diff options
author | Benjamin Marzinski <bmarzins@redhat.com> | 2007-09-18 14:33:18 -0400 |
---|---|---|
committer | Steven Whitehouse <swhiteho@redhat.com> | 2007-10-10 03:56:29 -0400 |
commit | 7a9f53b3c1875bef22ad4588e818bc046ef183da (patch) | |
tree | 0b23b5b8a667d0d57c695c47f6b4eb6d50c2d661 | |
parent | de986e859a29097fb9211b052d86a9a2c868f6cd (diff) |
[GFS2] Alternate gfs2_iget to avoid looking up inodes being freed
There is a possible deadlock between two processes on the same node, where one
process is deleting an inode, and another process is looking for allocated but
unused inodes to delete in order to create more space.
process A does an iput() on inode X, and it's i_count drops to 0. This causes
iput_final() to be called, which puts an inode into state I_FREEING at
generic_delete_inode(). There no point between when iput_final() is called, and
when I_FREEING is set where GFS2 could acquire any glocks. Once I_FREEING is
set, no other process on that node can successfully look up that inode until
the delete finishes.
process B locks the the resource group for the same inode in get_local_rgrp(),
which is called by gfs2_inplace_reserve_i()
process A tries to lock the resource group for the inode in
gfs2_dinode_dealloc(), but it's already locked by process B
process B waits in find_inode for the inode to have the I_FREEING state cleared.
Deadlock.
This patch solves the problem by adding an alternative to gfs2_iget(),
gfs2_iget_skip(), that simply skips any inodes that are in the I_FREEING
state.o The alternate test function is just like the original one, except that
it fails if the inode is being freed, and sets a skipped flag. The alternate
set function is just like the original, except that it fails if the skipped
flag is set. Only try_rgrp_unlink() calls gfs2_iget_skip() instead of
gfs2_iget().
Signed-off-by: Benjamin E. Marzinski <bmarzins@redhat.com>
Signed-off-by: Steven Whitehouse <swhiteho@redhat.com>
-rw-r--r-- | fs/gfs2/dir.c | 2 | ||||
-rw-r--r-- | fs/gfs2/inode.c | 58 | ||||
-rw-r--r-- | fs/gfs2/inode.h | 3 | ||||
-rw-r--r-- | fs/gfs2/ops_export.c | 2 | ||||
-rw-r--r-- | fs/gfs2/ops_fstype.c | 2 | ||||
-rw-r--r-- | fs/gfs2/rgrp.c | 2 |
6 files changed, 60 insertions, 9 deletions
diff --git a/fs/gfs2/dir.c b/fs/gfs2/dir.c index 08c6dd0c6713..9949bb746a52 100644 --- a/fs/gfs2/dir.c +++ b/fs/gfs2/dir.c | |||
@@ -1502,7 +1502,7 @@ struct inode *gfs2_dir_search(struct inode *dir, const struct qstr *name) | |||
1502 | inode = gfs2_inode_lookup(dir->i_sb, | 1502 | inode = gfs2_inode_lookup(dir->i_sb, |
1503 | be16_to_cpu(dent->de_type), | 1503 | be16_to_cpu(dent->de_type), |
1504 | be64_to_cpu(dent->de_inum.no_addr), | 1504 | be64_to_cpu(dent->de_inum.no_addr), |
1505 | be64_to_cpu(dent->de_inum.no_formal_ino)); | 1505 | be64_to_cpu(dent->de_inum.no_formal_ino), 0); |
1506 | brelse(bh); | 1506 | brelse(bh); |
1507 | return inode; | 1507 | return inode; |
1508 | } | 1508 | } |
diff --git a/fs/gfs2/inode.c b/fs/gfs2/inode.c index 013f00b90cc2..5f6dc32946cd 100644 --- a/fs/gfs2/inode.c +++ b/fs/gfs2/inode.c | |||
@@ -77,6 +77,49 @@ static struct inode *gfs2_iget(struct super_block *sb, u64 no_addr) | |||
77 | return iget5_locked(sb, hash, iget_test, iget_set, &no_addr); | 77 | return iget5_locked(sb, hash, iget_test, iget_set, &no_addr); |
78 | } | 78 | } |
79 | 79 | ||
80 | struct gfs2_skip_data { | ||
81 | u64 no_addr; | ||
82 | int skipped; | ||
83 | }; | ||
84 | |||
85 | static int iget_skip_test(struct inode *inode, void *opaque) | ||
86 | { | ||
87 | struct gfs2_inode *ip = GFS2_I(inode); | ||
88 | struct gfs2_skip_data *data = opaque; | ||
89 | |||
90 | if (ip->i_no_addr == data->no_addr && inode->i_private != NULL){ | ||
91 | if (inode->i_state & (I_FREEING|I_CLEAR|I_WILL_FREE)){ | ||
92 | data->skipped = 1; | ||
93 | return 0; | ||
94 | } | ||
95 | return 1; | ||
96 | } | ||
97 | return 0; | ||
98 | } | ||
99 | |||
100 | static int iget_skip_set(struct inode *inode, void *opaque) | ||
101 | { | ||
102 | struct gfs2_inode *ip = GFS2_I(inode); | ||
103 | struct gfs2_skip_data *data = opaque; | ||
104 | |||
105 | if (data->skipped) | ||
106 | return 1; | ||
107 | inode->i_ino = (unsigned long)(data->no_addr); | ||
108 | ip->i_no_addr = data->no_addr; | ||
109 | return 0; | ||
110 | } | ||
111 | |||
112 | static struct inode *gfs2_iget_skip(struct super_block *sb, | ||
113 | u64 no_addr) | ||
114 | { | ||
115 | struct gfs2_skip_data data; | ||
116 | unsigned long hash = (unsigned long)no_addr; | ||
117 | |||
118 | data.no_addr = no_addr; | ||
119 | data.skipped = 0; | ||
120 | return iget5_locked(sb, hash, iget_skip_test, iget_skip_set, &data); | ||
121 | } | ||
122 | |||
80 | /** | 123 | /** |
81 | * GFS2 lookup code fills in vfs inode contents based on info obtained | 124 | * GFS2 lookup code fills in vfs inode contents based on info obtained |
82 | * from directory entry inside gfs2_inode_lookup(). This has caused issues | 125 | * from directory entry inside gfs2_inode_lookup(). This has caused issues |
@@ -112,6 +155,7 @@ void gfs2_set_iop(struct inode *inode) | |||
112 | * @sb: The super block | 155 | * @sb: The super block |
113 | * @no_addr: The inode number | 156 | * @no_addr: The inode number |
114 | * @type: The type of the inode | 157 | * @type: The type of the inode |
158 | * @skip_freeing: set this not return an inode if it is currently being freed. | ||
115 | * | 159 | * |
116 | * Returns: A VFS inode, or an error | 160 | * Returns: A VFS inode, or an error |
117 | */ | 161 | */ |
@@ -119,13 +163,19 @@ void gfs2_set_iop(struct inode *inode) | |||
119 | struct inode *gfs2_inode_lookup(struct super_block *sb, | 163 | struct inode *gfs2_inode_lookup(struct super_block *sb, |
120 | unsigned int type, | 164 | unsigned int type, |
121 | u64 no_addr, | 165 | u64 no_addr, |
122 | u64 no_formal_ino) | 166 | u64 no_formal_ino, int skip_freeing) |
123 | { | 167 | { |
124 | struct inode *inode = gfs2_iget(sb, no_addr); | 168 | struct inode *inode; |
125 | struct gfs2_inode *ip = GFS2_I(inode); | 169 | struct gfs2_inode *ip; |
126 | struct gfs2_glock *io_gl; | 170 | struct gfs2_glock *io_gl; |
127 | int error; | 171 | int error; |
128 | 172 | ||
173 | if (skip_freeing) | ||
174 | inode = gfs2_iget_skip(sb, no_addr); | ||
175 | else | ||
176 | inode = gfs2_iget(sb, no_addr); | ||
177 | ip = GFS2_I(inode); | ||
178 | |||
129 | if (!inode) | 179 | if (!inode) |
130 | return ERR_PTR(-ENOBUFS); | 180 | return ERR_PTR(-ENOBUFS); |
131 | 181 | ||
@@ -949,7 +999,7 @@ struct inode *gfs2_createi(struct gfs2_holder *ghs, const struct qstr *name, | |||
949 | 999 | ||
950 | inode = gfs2_inode_lookup(dir->i_sb, IF2DT(mode), | 1000 | inode = gfs2_inode_lookup(dir->i_sb, IF2DT(mode), |
951 | inum.no_addr, | 1001 | inum.no_addr, |
952 | inum.no_formal_ino); | 1002 | inum.no_formal_ino, 0); |
953 | if (IS_ERR(inode)) | 1003 | if (IS_ERR(inode)) |
954 | goto fail_gunlock2; | 1004 | goto fail_gunlock2; |
955 | 1005 | ||
diff --git a/fs/gfs2/inode.h b/fs/gfs2/inode.h index 4517ac82c01c..351ac87ab384 100644 --- a/fs/gfs2/inode.h +++ b/fs/gfs2/inode.h | |||
@@ -49,7 +49,8 @@ static inline void gfs2_inum_out(const struct gfs2_inode *ip, | |||
49 | void gfs2_inode_attr_in(struct gfs2_inode *ip); | 49 | void gfs2_inode_attr_in(struct gfs2_inode *ip); |
50 | void gfs2_set_iop(struct inode *inode); | 50 | void gfs2_set_iop(struct inode *inode); |
51 | struct inode *gfs2_inode_lookup(struct super_block *sb, unsigned type, | 51 | struct inode *gfs2_inode_lookup(struct super_block *sb, unsigned type, |
52 | u64 no_addr, u64 no_formal_ino); | 52 | u64 no_addr, u64 no_formal_ino, |
53 | int skip_freeing); | ||
53 | struct inode *gfs2_ilookup(struct super_block *sb, u64 no_addr); | 54 | struct inode *gfs2_ilookup(struct super_block *sb, u64 no_addr); |
54 | 55 | ||
55 | int gfs2_inode_refresh(struct gfs2_inode *ip); | 56 | int gfs2_inode_refresh(struct gfs2_inode *ip); |
diff --git a/fs/gfs2/ops_export.c b/fs/gfs2/ops_export.c index b8312edee0e4..e2d1347796a9 100644 --- a/fs/gfs2/ops_export.c +++ b/fs/gfs2/ops_export.c | |||
@@ -237,7 +237,7 @@ static struct dentry *gfs2_get_dentry(struct super_block *sb, void *inum_obj) | |||
237 | 237 | ||
238 | inode = gfs2_inode_lookup(sb, DT_UNKNOWN, | 238 | inode = gfs2_inode_lookup(sb, DT_UNKNOWN, |
239 | inum->no_addr, | 239 | inum->no_addr, |
240 | 0); | 240 | 0, 0); |
241 | if (!inode) | 241 | if (!inode) |
242 | goto fail; | 242 | goto fail; |
243 | if (IS_ERR(inode)) { | 243 | if (IS_ERR(inode)) { |
diff --git a/fs/gfs2/ops_fstype.c b/fs/gfs2/ops_fstype.c index 1ac9afa58c65..17de58e83d92 100644 --- a/fs/gfs2/ops_fstype.c +++ b/fs/gfs2/ops_fstype.c | |||
@@ -230,7 +230,7 @@ fail: | |||
230 | static inline struct inode *gfs2_lookup_root(struct super_block *sb, | 230 | static inline struct inode *gfs2_lookup_root(struct super_block *sb, |
231 | u64 no_addr) | 231 | u64 no_addr) |
232 | { | 232 | { |
233 | return gfs2_inode_lookup(sb, DT_DIR, no_addr, 0); | 233 | return gfs2_inode_lookup(sb, DT_DIR, no_addr, 0, 0); |
234 | } | 234 | } |
235 | 235 | ||
236 | static int init_sb(struct gfs2_sbd *sdp, int silent, int undo) | 236 | static int init_sb(struct gfs2_sbd *sdp, int silent, int undo) |
diff --git a/fs/gfs2/rgrp.c b/fs/gfs2/rgrp.c index 2d7f7ea0c9a8..708c287e1d0e 100644 --- a/fs/gfs2/rgrp.c +++ b/fs/gfs2/rgrp.c | |||
@@ -884,7 +884,7 @@ static struct inode *try_rgrp_unlink(struct gfs2_rgrpd *rgd, u64 *last_unlinked) | |||
884 | continue; | 884 | continue; |
885 | *last_unlinked = no_addr; | 885 | *last_unlinked = no_addr; |
886 | inode = gfs2_inode_lookup(rgd->rd_sbd->sd_vfs, DT_UNKNOWN, | 886 | inode = gfs2_inode_lookup(rgd->rd_sbd->sd_vfs, DT_UNKNOWN, |
887 | no_addr, -1); | 887 | no_addr, -1, 1); |
888 | if (!IS_ERR(inode)) | 888 | if (!IS_ERR(inode)) |
889 | return inode; | 889 | return inode; |
890 | } | 890 | } |