aboutsummaryrefslogtreecommitdiffstats
path: root/fs
diff options
context:
space:
mode:
authorSteven J. Magnani <steve@digidescorp.com>2012-10-04 20:14:44 -0400
committerLinus Torvalds <torvalds@linux-foundation.org>2012-10-05 14:05:09 -0400
commit21b6633d516c4f5d03ec02ede6374e320191003f (patch)
tree1117879875a138230c3fb2dea435026012270141 /fs
parent4b63709861e431e73f0be6b83f420fdd8fc518f5 (diff)
fat (exportfs): move NFS support code
Under memory pressure, the system may evict dentries from cache. When the FAT driver receives a NFS request involving an evicted dentry, it is unable to reconnect it to the filesystem root. This causes the request to fail, often with ENOENT. This is partially due to ineffectiveness of the current FAT NFS implementation, and partially due to an unimplemented fh_to_parent method. The latter can cause file accesses to fail on shares exported with subtree_check. This patch set provides the FAT driver with the ability to reconnect dentries. NFS file handle generation and lookups are simplified and made congruent with ext2. Testing has involved a memory-starved virtual machine running 3.5-rc5 that exports a ~2 GB vfat filesystem containing a kernel tree (~770 MB, ~40000 files, 9 levels). Both 'cp -r' and 'ls -lR' operations were performed from a client, some overlapping, some consecutive. Exports with 'subtree_check' and 'no_subtree_check' have been tested. Note that while this patch set improves FAT's NFS support, it does not eliminate ESTALE errors completely. The following should be considered for NFS clients who are sensitive to ESTALE: * Mounting with lookupcache=none Unfortunately this can degrade performance severely, particularly for deep filesystems. * Incorporating VFS patches to retry ESTALE failures on the client-side, such as https://lkml.org/lkml/2012/6/29/381 * Handling ESTALE errors in client application code This patch: Move NFS-related code into its own C file. No functional changes. Signed-off-by: Steven J. Magnani <steve@digidescorp.com> Acked-by: OGAWA Hirofumi <hirofumi@mail.parknet.co.jp> Signed-off-by: Andrew Morton <akpm@linux-foundation.org> Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
Diffstat (limited to 'fs')
-rw-r--r--fs/fat/Makefile2
-rw-r--r--fs/fat/fat.h22
-rw-r--r--fs/fat/inode.c130
-rw-r--r--fs/fat/nfs.c151
4 files changed, 174 insertions, 131 deletions
diff --git a/fs/fat/Makefile b/fs/fat/Makefile
index e06190322c1c..964b634f6667 100644
--- a/fs/fat/Makefile
+++ b/fs/fat/Makefile
@@ -6,6 +6,6 @@ obj-$(CONFIG_FAT_FS) += fat.o
6obj-$(CONFIG_VFAT_FS) += vfat.o 6obj-$(CONFIG_VFAT_FS) += vfat.o
7obj-$(CONFIG_MSDOS_FS) += msdos.o 7obj-$(CONFIG_MSDOS_FS) += msdos.o
8 8
9fat-y := cache.o dir.o fatent.o file.o inode.o misc.o 9fat-y := cache.o dir.o fatent.o file.o inode.o misc.o nfs.o
10vfat-y := namei_vfat.o 10vfat-y := namei_vfat.o
11msdos-y := namei_msdos.o 11msdos-y := namei_msdos.o
diff --git a/fs/fat/fat.h b/fs/fat/fat.h
index 7d8e0dcac5d5..fb95939ff870 100644
--- a/fs/fat/fat.h
+++ b/fs/fat/fat.h
@@ -341,6 +341,20 @@ extern int fat_fill_super(struct super_block *sb, void *data, int silent,
341 341
342extern int fat_flush_inodes(struct super_block *sb, struct inode *i1, 342extern int fat_flush_inodes(struct super_block *sb, struct inode *i1,
343 struct inode *i2); 343 struct inode *i2);
344static inline loff_t fat_i_pos_read(struct msdos_sb_info *sbi,
345 struct inode *inode)
346{
347 loff_t i_pos;
348#if BITS_PER_LONG == 32
349 spin_lock(&sbi->inode_hash_lock);
350#endif
351 i_pos = MSDOS_I(inode)->i_pos;
352#if BITS_PER_LONG == 32
353 spin_unlock(&sbi->inode_hash_lock);
354#endif
355 return i_pos;
356}
357
344/* fat/misc.c */ 358/* fat/misc.c */
345extern __printf(3, 4) __cold 359extern __printf(3, 4) __cold
346void __fat_fs_error(struct super_block *sb, int report, const char *fmt, ...); 360void __fat_fs_error(struct super_block *sb, int report, const char *fmt, ...);
@@ -366,6 +380,14 @@ extern int fat_sync_bhs(struct buffer_head **bhs, int nr_bhs);
366int fat_cache_init(void); 380int fat_cache_init(void);
367void fat_cache_destroy(void); 381void fat_cache_destroy(void);
368 382
383/* fat/nfs.c */
384struct fid;
385extern int fat_encode_fh(struct inode *inode, __u32 *fh, int *lenp,
386 struct inode *parent);
387extern struct dentry *fat_fh_to_dentry(struct super_block *sb, struct fid *fid,
388 int fh_len, int fh_type);
389extern struct dentry *fat_get_parent(struct dentry *child_dir);
390
369/* helper for printk */ 391/* helper for printk */
370typedef unsigned long long llu; 392typedef unsigned long long llu;
371 393
diff --git a/fs/fat/inode.c b/fs/fat/inode.c
index 4e5a6ac54ebd..169f6ebddf96 100644
--- a/fs/fat/inode.c
+++ b/fs/fat/inode.c
@@ -562,20 +562,6 @@ static int fat_statfs(struct dentry *dentry, struct kstatfs *buf)
562 return 0; 562 return 0;
563} 563}
564 564
565static inline loff_t fat_i_pos_read(struct msdos_sb_info *sbi,
566 struct inode *inode)
567{
568 loff_t i_pos;
569#if BITS_PER_LONG == 32
570 spin_lock(&sbi->inode_hash_lock);
571#endif
572 i_pos = MSDOS_I(inode)->i_pos;
573#if BITS_PER_LONG == 32
574 spin_unlock(&sbi->inode_hash_lock);
575#endif
576 return i_pos;
577}
578
579static int __fat_write_inode(struct inode *inode, int wait) 565static int __fat_write_inode(struct inode *inode, int wait)
580{ 566{
581 struct super_block *sb = inode->i_sb; 567 struct super_block *sb = inode->i_sb;
@@ -668,122 +654,6 @@ static const struct super_operations fat_sops = {
668 .show_options = fat_show_options, 654 .show_options = fat_show_options,
669}; 655};
670 656
671/*
672 * a FAT file handle with fhtype 3 is
673 * 0/ i_ino - for fast, reliable lookup if still in the cache
674 * 1/ i_generation - to see if i_ino is still valid
675 * bit 0 == 0 iff directory
676 * 2/ i_pos(8-39) - if ino has changed, but still in cache
677 * 3/ i_pos(4-7)|i_logstart - to semi-verify inode found at i_pos
678 * 4/ i_pos(0-3)|parent->i_logstart - maybe used to hunt for the file on disc
679 *
680 * Hack for NFSv2: Maximum FAT entry number is 28bits and maximum
681 * i_pos is 40bits (blocknr(32) + dir offset(8)), so two 4bits
682 * of i_logstart is used to store the directory entry offset.
683 */
684
685static struct dentry *fat_fh_to_dentry(struct super_block *sb,
686 struct fid *fid, int fh_len, int fh_type)
687{
688 struct inode *inode = NULL;
689 u32 *fh = fid->raw;
690
691 if (fh_len < 5 || fh_type != 3)
692 return NULL;
693
694 inode = ilookup(sb, fh[0]);
695 if (!inode || inode->i_generation != fh[1]) {
696 if (inode)
697 iput(inode);
698 inode = NULL;
699 }
700 if (!inode) {
701 loff_t i_pos;
702 int i_logstart = fh[3] & 0x0fffffff;
703
704 i_pos = (loff_t)fh[2] << 8;
705 i_pos |= ((fh[3] >> 24) & 0xf0) | (fh[4] >> 28);
706
707 /* try 2 - see if i_pos is in F-d-c
708 * require i_logstart to be the same
709 * Will fail if you truncate and then re-write
710 */
711
712 inode = fat_iget(sb, i_pos);
713 if (inode && MSDOS_I(inode)->i_logstart != i_logstart) {
714 iput(inode);
715 inode = NULL;
716 }
717 }
718
719 /*
720 * For now, do nothing if the inode is not found.
721 *
722 * What we could do is:
723 *
724 * - follow the file starting at fh[4], and record the ".." entry,
725 * and the name of the fh[2] entry.
726 * - then follow the ".." file finding the next step up.
727 *
728 * This way we build a path to the root of the tree. If this works, we
729 * lookup the path and so get this inode into the cache. Finally try
730 * the fat_iget lookup again. If that fails, then we are totally out
731 * of luck. But all that is for another day
732 */
733 return d_obtain_alias(inode);
734}
735
736static int
737fat_encode_fh(struct inode *inode, __u32 *fh, int *lenp, struct inode *parent)
738{
739 int len = *lenp;
740 struct msdos_sb_info *sbi = MSDOS_SB(inode->i_sb);
741 loff_t i_pos;
742
743 if (len < 5) {
744 *lenp = 5;
745 return 255; /* no room */
746 }
747
748 i_pos = fat_i_pos_read(sbi, inode);
749 *lenp = 5;
750 fh[0] = inode->i_ino;
751 fh[1] = inode->i_generation;
752 fh[2] = i_pos >> 8;
753 fh[3] = ((i_pos & 0xf0) << 24) | MSDOS_I(inode)->i_logstart;
754 fh[4] = (i_pos & 0x0f) << 28;
755 if (parent)
756 fh[4] |= MSDOS_I(parent)->i_logstart;
757 return 3;
758}
759
760static struct dentry *fat_get_parent(struct dentry *child)
761{
762 struct super_block *sb = child->d_sb;
763 struct buffer_head *bh;
764 struct msdos_dir_entry *de;
765 loff_t i_pos;
766 struct dentry *parent;
767 struct inode *inode;
768 int err;
769
770 lock_super(sb);
771
772 err = fat_get_dotdot_entry(child->d_inode, &bh, &de, &i_pos);
773 if (err) {
774 parent = ERR_PTR(err);
775 goto out;
776 }
777 inode = fat_build_inode(sb, de, i_pos);
778 brelse(bh);
779
780 parent = d_obtain_alias(inode);
781out:
782 unlock_super(sb);
783
784 return parent;
785}
786
787static const struct export_operations fat_export_ops = { 657static const struct export_operations fat_export_ops = {
788 .encode_fh = fat_encode_fh, 658 .encode_fh = fat_encode_fh,
789 .fh_to_dentry = fat_fh_to_dentry, 659 .fh_to_dentry = fat_fh_to_dentry,
diff --git a/fs/fat/nfs.c b/fs/fat/nfs.c
new file mode 100644
index 000000000000..21609a1e9355
--- /dev/null
+++ b/fs/fat/nfs.c
@@ -0,0 +1,151 @@
1/* fs/fat/nfs.c
2 *
3 * This software is licensed under the terms of the GNU General Public
4 * License version 2, as published by the Free Software Foundation, and
5 * may be copied, distributed, and modified under those terms.
6 *
7 * This program is distributed in the hope that it will be useful,
8 * but WITHOUT ANY WARRANTY; without even the implied warranty of
9 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
10 * GNU General Public License for more details.
11 *
12 */
13
14#include <linux/exportfs.h>
15#include "fat.h"
16
17/*
18 * a FAT file handle with fhtype 3 is
19 * 0/ i_ino - for fast, reliable lookup if still in the cache
20 * 1/ i_generation - to see if i_ino is still valid
21 * bit 0 == 0 iff directory
22 * 2/ i_pos(8-39) - if ino has changed, but still in cache
23 * 3/ i_pos(4-7)|i_logstart - to semi-verify inode found at i_pos
24 * 4/ i_pos(0-3)|parent->i_logstart - maybe used to hunt for the file on disc
25 *
26 * Hack for NFSv2: Maximum FAT entry number is 28bits and maximum
27 * i_pos is 40bits (blocknr(32) + dir offset(8)), so two 4bits
28 * of i_logstart is used to store the directory entry offset.
29 */
30
31int
32fat_encode_fh(struct inode *inode, __u32 *fh, int *lenp, struct inode *parent)
33{
34 int len = *lenp;
35 struct msdos_sb_info *sbi = MSDOS_SB(inode->i_sb);
36 loff_t i_pos;
37
38 if (len < 5) {
39 *lenp = 5;
40 return 255; /* no room */
41 }
42
43 i_pos = fat_i_pos_read(sbi, inode);
44 *lenp = 5;
45 fh[0] = inode->i_ino;
46 fh[1] = inode->i_generation;
47 fh[2] = i_pos >> 8;
48 fh[3] = ((i_pos & 0xf0) << 24) | MSDOS_I(inode)->i_logstart;
49 fh[4] = (i_pos & 0x0f) << 28;
50 if (parent)
51 fh[4] |= MSDOS_I(parent)->i_logstart;
52 return 3;
53}
54
55static int fat_is_valid_fh(int fh_len, int fh_type)
56{
57 return ((fh_len >= 5) && (fh_type == 3));
58}
59
60/**
61 * Map a NFS file handle to a corresponding dentry.
62 * The dentry may or may not be connected to the filesystem root.
63 */
64struct dentry *fat_fh_to_dentry(struct super_block *sb, struct fid *fid,
65 int fh_len, int fh_type)
66{
67 struct inode *inode = NULL;
68 u32 *fh = fid->raw;
69 loff_t i_pos;
70 unsigned long i_ino;
71 __u32 i_generation;
72 int i_logstart;
73
74 if (!fat_is_valid_fh(fh_len, fh_type))
75 return NULL;
76
77 i_ino = fh[0];
78 i_generation = fh[1];
79 i_logstart = fh[3] & 0x0fffffff;
80
81 /* Try i_ino lookup first - fastest and most reliable */
82 inode = ilookup(sb, i_ino);
83 if (inode && (inode->i_generation != i_generation)) {
84 iput(inode);
85 inode = NULL;
86 }
87 if (!inode) {
88 i_pos = (loff_t)fh[2] << 8;
89 i_pos |= ((fh[3] >> 24) & 0xf0) | (fh[4] >> 28);
90
91 /* try 2 - see if i_pos is in F-d-c
92 * require i_logstart to be the same
93 * Will fail if you truncate and then re-write
94 */
95
96 inode = fat_iget(sb, i_pos);
97 if (inode && MSDOS_I(inode)->i_logstart != i_logstart) {
98 iput(inode);
99 inode = NULL;
100 }
101 }
102
103 /*
104 * For now, do nothing if the inode is not found.
105 *
106 * What we could do is:
107 *
108 * - follow the file starting at fh[4], and record the ".." entry,
109 * and the name of the fh[2] entry.
110 * - then follow the ".." file finding the next step up.
111 *
112 * This way we build a path to the root of the tree. If this works, we
113 * lookup the path and so get this inode into the cache. Finally try
114 * the fat_iget lookup again. If that fails, then we are totally out
115 * of luck. But all that is for another day
116 */
117 return d_obtain_alias(inode);
118}
119
120/*
121 * Find the parent for a directory that is not currently connected to
122 * the filesystem root.
123 *
124 * On entry, the caller holds child_dir->d_inode->i_mutex.
125 */
126struct dentry *fat_get_parent(struct dentry *child_dir)
127{
128 struct super_block *sb = child_dir->d_sb;
129 struct buffer_head *bh = NULL;
130 struct msdos_dir_entry *de;
131 loff_t i_pos;
132 struct dentry *parent;
133 struct inode *inode;
134 int err;
135
136 lock_super(sb);
137
138 err = fat_get_dotdot_entry(child_dir->d_inode, &bh, &de, &i_pos);
139 if (err) {
140 parent = ERR_PTR(err);
141 goto out;
142 }
143 inode = fat_build_inode(sb, de, i_pos);
144
145 parent = d_obtain_alias(inode);
146out:
147 brelse(bh);
148 unlock_super(sb);
149
150 return parent;
151}