diff options
author | Linus Torvalds <torvalds@linux-foundation.org> | 2016-03-04 20:36:46 -0500 |
---|---|---|
committer | Linus Torvalds <torvalds@linux-foundation.org> | 2016-03-04 20:36:46 -0500 |
commit | c51797d25d84833804e9e6950c8c1abbe6c73808 (patch) | |
tree | 4b43585feca6e7d76cd67f51799de199ccb43d77 /fs | |
parent | 2cdcb2b5b5d6d7c1bdefbc1a63187d47fd809408 (diff) | |
parent | 44248affb5d82d32cdb58a92a94ce191d4ddee60 (diff) |
Merge tag 'for-linus-20160304' of git://git.infradead.org/linux-mtd
Pull jffs2 fixes from David Woodhouse:
"This contains two important JFFS2 fixes marked for stable:
- a lock ordering problem between the page lock and the internal
f->sem mutex, which was causing occasional deadlocks in garbage
collection
- a scan failure causing moved directories to sometimes end up
appearing to have hard links.
There are also a couple of trivial MAINTAINERS file updates"
* tag 'for-linus-20160304' of git://git.infradead.org/linux-mtd:
MAINTAINERS: add maintainer entry for FREESCALE GPMI NAND driver
Fix directory hardlinks from deleted directories
jffs2: Fix page lock / f->sem deadlock
Revert "jffs2: Fix lock acquisition order bug in jffs2_write_begin"
MAINTAINERS: update Han's email
Diffstat (limited to 'fs')
-rw-r--r-- | fs/jffs2/README.Locking | 5 | ||||
-rw-r--r-- | fs/jffs2/build.c | 75 | ||||
-rw-r--r-- | fs/jffs2/file.c | 39 | ||||
-rw-r--r-- | fs/jffs2/gc.c | 17 | ||||
-rw-r--r-- | fs/jffs2/nodelist.h | 6 |
5 files changed, 91 insertions, 51 deletions
diff --git a/fs/jffs2/README.Locking b/fs/jffs2/README.Locking index 3ea36554107f..8918ac905a3b 100644 --- a/fs/jffs2/README.Locking +++ b/fs/jffs2/README.Locking | |||
@@ -2,10 +2,6 @@ | |||
2 | JFFS2 LOCKING DOCUMENTATION | 2 | JFFS2 LOCKING DOCUMENTATION |
3 | --------------------------- | 3 | --------------------------- |
4 | 4 | ||
5 | At least theoretically, JFFS2 does not require the Big Kernel Lock | ||
6 | (BKL), which was always helpfully obtained for it by Linux 2.4 VFS | ||
7 | code. It has its own locking, as described below. | ||
8 | |||
9 | This document attempts to describe the existing locking rules for | 5 | This document attempts to describe the existing locking rules for |
10 | JFFS2. It is not expected to remain perfectly up to date, but ought to | 6 | JFFS2. It is not expected to remain perfectly up to date, but ought to |
11 | be fairly close. | 7 | be fairly close. |
@@ -69,6 +65,7 @@ Ordering constraints: | |||
69 | any f->sem held. | 65 | any f->sem held. |
70 | 2. Never attempt to lock two file mutexes in one thread. | 66 | 2. Never attempt to lock two file mutexes in one thread. |
71 | No ordering rules have been made for doing so. | 67 | No ordering rules have been made for doing so. |
68 | 3. Never lock a page cache page with f->sem held. | ||
72 | 69 | ||
73 | 70 | ||
74 | erase_completion_lock spinlock | 71 | erase_completion_lock spinlock |
diff --git a/fs/jffs2/build.c b/fs/jffs2/build.c index 0ae91ad6df2d..b288c8ae1236 100644 --- a/fs/jffs2/build.c +++ b/fs/jffs2/build.c | |||
@@ -50,7 +50,8 @@ next_inode(int *i, struct jffs2_inode_cache *ic, struct jffs2_sb_info *c) | |||
50 | 50 | ||
51 | 51 | ||
52 | static void jffs2_build_inode_pass1(struct jffs2_sb_info *c, | 52 | static void jffs2_build_inode_pass1(struct jffs2_sb_info *c, |
53 | struct jffs2_inode_cache *ic) | 53 | struct jffs2_inode_cache *ic, |
54 | int *dir_hardlinks) | ||
54 | { | 55 | { |
55 | struct jffs2_full_dirent *fd; | 56 | struct jffs2_full_dirent *fd; |
56 | 57 | ||
@@ -69,19 +70,21 @@ static void jffs2_build_inode_pass1(struct jffs2_sb_info *c, | |||
69 | dbg_fsbuild("child \"%s\" (ino #%u) of dir ino #%u doesn't exist!\n", | 70 | dbg_fsbuild("child \"%s\" (ino #%u) of dir ino #%u doesn't exist!\n", |
70 | fd->name, fd->ino, ic->ino); | 71 | fd->name, fd->ino, ic->ino); |
71 | jffs2_mark_node_obsolete(c, fd->raw); | 72 | jffs2_mark_node_obsolete(c, fd->raw); |
73 | /* Clear the ic/raw union so it doesn't cause problems later. */ | ||
74 | fd->ic = NULL; | ||
72 | continue; | 75 | continue; |
73 | } | 76 | } |
74 | 77 | ||
78 | /* From this point, fd->raw is no longer used so we can set fd->ic */ | ||
79 | fd->ic = child_ic; | ||
80 | child_ic->pino_nlink++; | ||
81 | /* If we appear (at this stage) to have hard-linked directories, | ||
82 | * set a flag to trigger a scan later */ | ||
75 | if (fd->type == DT_DIR) { | 83 | if (fd->type == DT_DIR) { |
76 | if (child_ic->pino_nlink) { | 84 | child_ic->flags |= INO_FLAGS_IS_DIR; |
77 | JFFS2_ERROR("child dir \"%s\" (ino #%u) of dir ino #%u appears to be a hard link\n", | 85 | if (child_ic->pino_nlink > 1) |
78 | fd->name, fd->ino, ic->ino); | 86 | *dir_hardlinks = 1; |
79 | /* TODO: What do we do about it? */ | 87 | } |
80 | } else { | ||
81 | child_ic->pino_nlink = ic->ino; | ||
82 | } | ||
83 | } else | ||
84 | child_ic->pino_nlink++; | ||
85 | 88 | ||
86 | dbg_fsbuild("increased nlink for child \"%s\" (ino #%u)\n", fd->name, fd->ino); | 89 | dbg_fsbuild("increased nlink for child \"%s\" (ino #%u)\n", fd->name, fd->ino); |
87 | /* Can't free scan_dents so far. We might need them in pass 2 */ | 90 | /* Can't free scan_dents so far. We might need them in pass 2 */ |
@@ -95,8 +98,7 @@ static void jffs2_build_inode_pass1(struct jffs2_sb_info *c, | |||
95 | */ | 98 | */ |
96 | static int jffs2_build_filesystem(struct jffs2_sb_info *c) | 99 | static int jffs2_build_filesystem(struct jffs2_sb_info *c) |
97 | { | 100 | { |
98 | int ret; | 101 | int ret, i, dir_hardlinks = 0; |
99 | int i; | ||
100 | struct jffs2_inode_cache *ic; | 102 | struct jffs2_inode_cache *ic; |
101 | struct jffs2_full_dirent *fd; | 103 | struct jffs2_full_dirent *fd; |
102 | struct jffs2_full_dirent *dead_fds = NULL; | 104 | struct jffs2_full_dirent *dead_fds = NULL; |
@@ -120,7 +122,7 @@ static int jffs2_build_filesystem(struct jffs2_sb_info *c) | |||
120 | /* Now scan the directory tree, increasing nlink according to every dirent found. */ | 122 | /* Now scan the directory tree, increasing nlink according to every dirent found. */ |
121 | for_each_inode(i, c, ic) { | 123 | for_each_inode(i, c, ic) { |
122 | if (ic->scan_dents) { | 124 | if (ic->scan_dents) { |
123 | jffs2_build_inode_pass1(c, ic); | 125 | jffs2_build_inode_pass1(c, ic, &dir_hardlinks); |
124 | cond_resched(); | 126 | cond_resched(); |
125 | } | 127 | } |
126 | } | 128 | } |
@@ -156,6 +158,20 @@ static int jffs2_build_filesystem(struct jffs2_sb_info *c) | |||
156 | } | 158 | } |
157 | 159 | ||
158 | dbg_fsbuild("pass 2a complete\n"); | 160 | dbg_fsbuild("pass 2a complete\n"); |
161 | |||
162 | if (dir_hardlinks) { | ||
163 | /* If we detected directory hardlinks earlier, *hopefully* | ||
164 | * they are gone now because some of the links were from | ||
165 | * dead directories which still had some old dirents lying | ||
166 | * around and not yet garbage-collected, but which have | ||
167 | * been discarded above. So clear the pino_nlink field | ||
168 | * in each directory, so that the final scan below can | ||
169 | * print appropriate warnings. */ | ||
170 | for_each_inode(i, c, ic) { | ||
171 | if (ic->flags & INO_FLAGS_IS_DIR) | ||
172 | ic->pino_nlink = 0; | ||
173 | } | ||
174 | } | ||
159 | dbg_fsbuild("freeing temporary data structures\n"); | 175 | dbg_fsbuild("freeing temporary data structures\n"); |
160 | 176 | ||
161 | /* Finally, we can scan again and free the dirent structs */ | 177 | /* Finally, we can scan again and free the dirent structs */ |
@@ -163,6 +179,33 @@ static int jffs2_build_filesystem(struct jffs2_sb_info *c) | |||
163 | while(ic->scan_dents) { | 179 | while(ic->scan_dents) { |
164 | fd = ic->scan_dents; | 180 | fd = ic->scan_dents; |
165 | ic->scan_dents = fd->next; | 181 | ic->scan_dents = fd->next; |
182 | /* We do use the pino_nlink field to count nlink of | ||
183 | * directories during fs build, so set it to the | ||
184 | * parent ino# now. Now that there's hopefully only | ||
185 | * one. */ | ||
186 | if (fd->type == DT_DIR) { | ||
187 | if (!fd->ic) { | ||
188 | /* We'll have complained about it and marked the coresponding | ||
189 | raw node obsolete already. Just skip it. */ | ||
190 | continue; | ||
191 | } | ||
192 | |||
193 | /* We *have* to have set this in jffs2_build_inode_pass1() */ | ||
194 | BUG_ON(!(fd->ic->flags & INO_FLAGS_IS_DIR)); | ||
195 | |||
196 | /* We clear ic->pino_nlink ∀ directories' ic *only* if dir_hardlinks | ||
197 | * is set. Otherwise, we know this should never trigger anyway, so | ||
198 | * we don't do the check. And ic->pino_nlink still contains the nlink | ||
199 | * value (which is 1). */ | ||
200 | if (dir_hardlinks && fd->ic->pino_nlink) { | ||
201 | JFFS2_ERROR("child dir \"%s\" (ino #%u) of dir ino #%u is also hard linked from dir ino #%u\n", | ||
202 | fd->name, fd->ino, ic->ino, fd->ic->pino_nlink); | ||
203 | /* Should we unlink it from its previous parent? */ | ||
204 | } | ||
205 | |||
206 | /* For directories, ic->pino_nlink holds that parent inode # */ | ||
207 | fd->ic->pino_nlink = ic->ino; | ||
208 | } | ||
166 | jffs2_free_full_dirent(fd); | 209 | jffs2_free_full_dirent(fd); |
167 | } | 210 | } |
168 | ic->scan_dents = NULL; | 211 | ic->scan_dents = NULL; |
@@ -241,11 +284,7 @@ static void jffs2_build_remove_unlinked_inode(struct jffs2_sb_info *c, | |||
241 | 284 | ||
242 | /* Reduce nlink of the child. If it's now zero, stick it on the | 285 | /* Reduce nlink of the child. If it's now zero, stick it on the |
243 | dead_fds list to be cleaned up later. Else just free the fd */ | 286 | dead_fds list to be cleaned up later. Else just free the fd */ |
244 | 287 | child_ic->pino_nlink--; | |
245 | if (fd->type == DT_DIR) | ||
246 | child_ic->pino_nlink = 0; | ||
247 | else | ||
248 | child_ic->pino_nlink--; | ||
249 | 288 | ||
250 | if (!child_ic->pino_nlink) { | 289 | if (!child_ic->pino_nlink) { |
251 | dbg_fsbuild("inode #%u (\"%s\") now has no links; adding to dead_fds list.\n", | 290 | dbg_fsbuild("inode #%u (\"%s\") now has no links; adding to dead_fds list.\n", |
diff --git a/fs/jffs2/file.c b/fs/jffs2/file.c index c5ac5944bc1b..cad86bac3453 100644 --- a/fs/jffs2/file.c +++ b/fs/jffs2/file.c | |||
@@ -137,39 +137,33 @@ static int jffs2_write_begin(struct file *filp, struct address_space *mapping, | |||
137 | struct page *pg; | 137 | struct page *pg; |
138 | struct inode *inode = mapping->host; | 138 | struct inode *inode = mapping->host; |
139 | struct jffs2_inode_info *f = JFFS2_INODE_INFO(inode); | 139 | struct jffs2_inode_info *f = JFFS2_INODE_INFO(inode); |
140 | struct jffs2_sb_info *c = JFFS2_SB_INFO(inode->i_sb); | ||
141 | struct jffs2_raw_inode ri; | ||
142 | uint32_t alloc_len = 0; | ||
143 | pgoff_t index = pos >> PAGE_CACHE_SHIFT; | 140 | pgoff_t index = pos >> PAGE_CACHE_SHIFT; |
144 | uint32_t pageofs = index << PAGE_CACHE_SHIFT; | 141 | uint32_t pageofs = index << PAGE_CACHE_SHIFT; |
145 | int ret = 0; | 142 | int ret = 0; |
146 | 143 | ||
147 | jffs2_dbg(1, "%s()\n", __func__); | ||
148 | |||
149 | if (pageofs > inode->i_size) { | ||
150 | ret = jffs2_reserve_space(c, sizeof(ri), &alloc_len, | ||
151 | ALLOC_NORMAL, JFFS2_SUMMARY_INODE_SIZE); | ||
152 | if (ret) | ||
153 | return ret; | ||
154 | } | ||
155 | |||
156 | mutex_lock(&f->sem); | ||
157 | pg = grab_cache_page_write_begin(mapping, index, flags); | 144 | pg = grab_cache_page_write_begin(mapping, index, flags); |
158 | if (!pg) { | 145 | if (!pg) |
159 | if (alloc_len) | ||
160 | jffs2_complete_reservation(c); | ||
161 | mutex_unlock(&f->sem); | ||
162 | return -ENOMEM; | 146 | return -ENOMEM; |
163 | } | ||
164 | *pagep = pg; | 147 | *pagep = pg; |
165 | 148 | ||
166 | if (alloc_len) { | 149 | jffs2_dbg(1, "%s()\n", __func__); |
150 | |||
151 | if (pageofs > inode->i_size) { | ||
167 | /* Make new hole frag from old EOF to new page */ | 152 | /* Make new hole frag from old EOF to new page */ |
153 | struct jffs2_sb_info *c = JFFS2_SB_INFO(inode->i_sb); | ||
154 | struct jffs2_raw_inode ri; | ||
168 | struct jffs2_full_dnode *fn; | 155 | struct jffs2_full_dnode *fn; |
156 | uint32_t alloc_len; | ||
169 | 157 | ||
170 | jffs2_dbg(1, "Writing new hole frag 0x%x-0x%x between current EOF and new page\n", | 158 | jffs2_dbg(1, "Writing new hole frag 0x%x-0x%x between current EOF and new page\n", |
171 | (unsigned int)inode->i_size, pageofs); | 159 | (unsigned int)inode->i_size, pageofs); |
172 | 160 | ||
161 | ret = jffs2_reserve_space(c, sizeof(ri), &alloc_len, | ||
162 | ALLOC_NORMAL, JFFS2_SUMMARY_INODE_SIZE); | ||
163 | if (ret) | ||
164 | goto out_page; | ||
165 | |||
166 | mutex_lock(&f->sem); | ||
173 | memset(&ri, 0, sizeof(ri)); | 167 | memset(&ri, 0, sizeof(ri)); |
174 | 168 | ||
175 | ri.magic = cpu_to_je16(JFFS2_MAGIC_BITMASK); | 169 | ri.magic = cpu_to_je16(JFFS2_MAGIC_BITMASK); |
@@ -196,6 +190,7 @@ static int jffs2_write_begin(struct file *filp, struct address_space *mapping, | |||
196 | if (IS_ERR(fn)) { | 190 | if (IS_ERR(fn)) { |
197 | ret = PTR_ERR(fn); | 191 | ret = PTR_ERR(fn); |
198 | jffs2_complete_reservation(c); | 192 | jffs2_complete_reservation(c); |
193 | mutex_unlock(&f->sem); | ||
199 | goto out_page; | 194 | goto out_page; |
200 | } | 195 | } |
201 | ret = jffs2_add_full_dnode_to_inode(c, f, fn); | 196 | ret = jffs2_add_full_dnode_to_inode(c, f, fn); |
@@ -210,10 +205,12 @@ static int jffs2_write_begin(struct file *filp, struct address_space *mapping, | |||
210 | jffs2_mark_node_obsolete(c, fn->raw); | 205 | jffs2_mark_node_obsolete(c, fn->raw); |
211 | jffs2_free_full_dnode(fn); | 206 | jffs2_free_full_dnode(fn); |
212 | jffs2_complete_reservation(c); | 207 | jffs2_complete_reservation(c); |
208 | mutex_unlock(&f->sem); | ||
213 | goto out_page; | 209 | goto out_page; |
214 | } | 210 | } |
215 | jffs2_complete_reservation(c); | 211 | jffs2_complete_reservation(c); |
216 | inode->i_size = pageofs; | 212 | inode->i_size = pageofs; |
213 | mutex_unlock(&f->sem); | ||
217 | } | 214 | } |
218 | 215 | ||
219 | /* | 216 | /* |
@@ -222,18 +219,18 @@ static int jffs2_write_begin(struct file *filp, struct address_space *mapping, | |||
222 | * case of a short-copy. | 219 | * case of a short-copy. |
223 | */ | 220 | */ |
224 | if (!PageUptodate(pg)) { | 221 | if (!PageUptodate(pg)) { |
222 | mutex_lock(&f->sem); | ||
225 | ret = jffs2_do_readpage_nolock(inode, pg); | 223 | ret = jffs2_do_readpage_nolock(inode, pg); |
224 | mutex_unlock(&f->sem); | ||
226 | if (ret) | 225 | if (ret) |
227 | goto out_page; | 226 | goto out_page; |
228 | } | 227 | } |
229 | mutex_unlock(&f->sem); | ||
230 | jffs2_dbg(1, "end write_begin(). pg->flags %lx\n", pg->flags); | 228 | jffs2_dbg(1, "end write_begin(). pg->flags %lx\n", pg->flags); |
231 | return ret; | 229 | return ret; |
232 | 230 | ||
233 | out_page: | 231 | out_page: |
234 | unlock_page(pg); | 232 | unlock_page(pg); |
235 | page_cache_release(pg); | 233 | page_cache_release(pg); |
236 | mutex_unlock(&f->sem); | ||
237 | return ret; | 234 | return ret; |
238 | } | 235 | } |
239 | 236 | ||
diff --git a/fs/jffs2/gc.c b/fs/jffs2/gc.c index 5a2dec2b064c..95d5880a63ee 100644 --- a/fs/jffs2/gc.c +++ b/fs/jffs2/gc.c | |||
@@ -1296,14 +1296,17 @@ static int jffs2_garbage_collect_dnode(struct jffs2_sb_info *c, struct jffs2_era | |||
1296 | BUG_ON(start > orig_start); | 1296 | BUG_ON(start > orig_start); |
1297 | } | 1297 | } |
1298 | 1298 | ||
1299 | /* First, use readpage() to read the appropriate page into the page cache */ | 1299 | /* The rules state that we must obtain the page lock *before* f->sem, so |
1300 | /* Q: What happens if we actually try to GC the _same_ page for which commit_write() | 1300 | * drop f->sem temporarily. Since we also hold c->alloc_sem, nothing's |
1301 | * triggered garbage collection in the first place? | 1301 | * actually going to *change* so we're safe; we only allow reading. |
1302 | * A: I _think_ it's OK. read_cache_page shouldn't deadlock, we'll write out the | 1302 | * |
1303 | * page OK. We'll actually write it out again in commit_write, which is a little | 1303 | * It is important to note that jffs2_write_begin() will ensure that its |
1304 | * suboptimal, but at least we're correct. | 1304 | * page is marked Uptodate before allocating space. That means that if we |
1305 | */ | 1305 | * end up here trying to GC the *same* page that jffs2_write_begin() is |
1306 | * trying to write out, read_cache_page() will not deadlock. */ | ||
1307 | mutex_unlock(&f->sem); | ||
1306 | pg_ptr = jffs2_gc_fetch_page(c, f, start, &pg); | 1308 | pg_ptr = jffs2_gc_fetch_page(c, f, start, &pg); |
1309 | mutex_lock(&f->sem); | ||
1307 | 1310 | ||
1308 | if (IS_ERR(pg_ptr)) { | 1311 | if (IS_ERR(pg_ptr)) { |
1309 | pr_warn("read_cache_page() returned error: %ld\n", | 1312 | pr_warn("read_cache_page() returned error: %ld\n", |
diff --git a/fs/jffs2/nodelist.h b/fs/jffs2/nodelist.h index fa35ff79ab35..0637271f3770 100644 --- a/fs/jffs2/nodelist.h +++ b/fs/jffs2/nodelist.h | |||
@@ -194,6 +194,7 @@ struct jffs2_inode_cache { | |||
194 | #define INO_STATE_CLEARING 6 /* In clear_inode() */ | 194 | #define INO_STATE_CLEARING 6 /* In clear_inode() */ |
195 | 195 | ||
196 | #define INO_FLAGS_XATTR_CHECKED 0x01 /* has no duplicate xattr_ref */ | 196 | #define INO_FLAGS_XATTR_CHECKED 0x01 /* has no duplicate xattr_ref */ |
197 | #define INO_FLAGS_IS_DIR 0x02 /* is a directory */ | ||
197 | 198 | ||
198 | #define RAWNODE_CLASS_INODE_CACHE 0 | 199 | #define RAWNODE_CLASS_INODE_CACHE 0 |
199 | #define RAWNODE_CLASS_XATTR_DATUM 1 | 200 | #define RAWNODE_CLASS_XATTR_DATUM 1 |
@@ -249,7 +250,10 @@ struct jffs2_readinode_info | |||
249 | 250 | ||
250 | struct jffs2_full_dirent | 251 | struct jffs2_full_dirent |
251 | { | 252 | { |
252 | struct jffs2_raw_node_ref *raw; | 253 | union { |
254 | struct jffs2_raw_node_ref *raw; | ||
255 | struct jffs2_inode_cache *ic; /* Just during part of build */ | ||
256 | }; | ||
253 | struct jffs2_full_dirent *next; | 257 | struct jffs2_full_dirent *next; |
254 | uint32_t version; | 258 | uint32_t version; |
255 | uint32_t ino; /* == zero for unlink */ | 259 | uint32_t ino; /* == zero for unlink */ |