diff options
Diffstat (limited to 'fs')
286 files changed, 13782 insertions, 4810 deletions
diff --git a/fs/9p/Kconfig b/fs/9p/Kconfig index 74e0723e90bc..795233702a4e 100644 --- a/fs/9p/Kconfig +++ b/fs/9p/Kconfig | |||
@@ -8,3 +8,12 @@ config 9P_FS | |||
8 | See <http://v9fs.sf.net> for more information. | 8 | See <http://v9fs.sf.net> for more information. |
9 | 9 | ||
10 | If unsure, say N. | 10 | If unsure, say N. |
11 | |||
12 | config 9P_FSCACHE | ||
13 | bool "Enable 9P client caching support (EXPERIMENTAL)" | ||
14 | depends on EXPERIMENTAL | ||
15 | depends on 9P_FS=m && FSCACHE || 9P_FS=y && FSCACHE=y | ||
16 | help | ||
17 | Choose Y here to enable persistent, read-only local | ||
18 | caching support for 9p clients using FS-Cache | ||
19 | |||
diff --git a/fs/9p/Makefile b/fs/9p/Makefile index bc7f0d1551e6..1a940ec7af61 100644 --- a/fs/9p/Makefile +++ b/fs/9p/Makefile | |||
@@ -8,5 +8,6 @@ obj-$(CONFIG_9P_FS) := 9p.o | |||
8 | vfs_dir.o \ | 8 | vfs_dir.o \ |
9 | vfs_dentry.o \ | 9 | vfs_dentry.o \ |
10 | v9fs.o \ | 10 | v9fs.o \ |
11 | fid.o \ | 11 | fid.o |
12 | 12 | ||
13 | 9p-$(CONFIG_9P_FSCACHE) += cache.o | ||
diff --git a/fs/9p/cache.c b/fs/9p/cache.c new file mode 100644 index 000000000000..51c94e26a346 --- /dev/null +++ b/fs/9p/cache.c | |||
@@ -0,0 +1,474 @@ | |||
1 | /* | ||
2 | * V9FS cache definitions. | ||
3 | * | ||
4 | * Copyright (C) 2009 by Abhishek Kulkarni <adkulkar@umail.iu.edu> | ||
5 | * | ||
6 | * This program is free software; you can redistribute it and/or modify | ||
7 | * it under the terms of the GNU General Public License version 2 | ||
8 | * as published by the Free Software Foundation. | ||
9 | * | ||
10 | * This program is distributed in the hope that it will be useful, | ||
11 | * but WITHOUT ANY WARRANTY; without even the implied warranty of | ||
12 | * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the | ||
13 | * GNU General Public License for more details. | ||
14 | * | ||
15 | * You should have received a copy of the GNU General Public License | ||
16 | * along with this program; if not, write to: | ||
17 | * Free Software Foundation | ||
18 | * 51 Franklin Street, Fifth Floor | ||
19 | * Boston, MA 02111-1301 USA | ||
20 | * | ||
21 | */ | ||
22 | |||
23 | #include <linux/jiffies.h> | ||
24 | #include <linux/file.h> | ||
25 | #include <linux/stat.h> | ||
26 | #include <linux/sched.h> | ||
27 | #include <linux/fs.h> | ||
28 | #include <net/9p/9p.h> | ||
29 | |||
30 | #include "v9fs.h" | ||
31 | #include "cache.h" | ||
32 | |||
33 | #define CACHETAG_LEN 11 | ||
34 | |||
35 | struct kmem_cache *vcookie_cache; | ||
36 | |||
37 | struct fscache_netfs v9fs_cache_netfs = { | ||
38 | .name = "9p", | ||
39 | .version = 0, | ||
40 | }; | ||
41 | |||
42 | static void init_once(void *foo) | ||
43 | { | ||
44 | struct v9fs_cookie *vcookie = (struct v9fs_cookie *) foo; | ||
45 | vcookie->fscache = NULL; | ||
46 | vcookie->qid = NULL; | ||
47 | inode_init_once(&vcookie->inode); | ||
48 | } | ||
49 | |||
50 | /** | ||
51 | * v9fs_init_vcookiecache - initialize a cache for vcookies to maintain | ||
52 | * vcookie to inode mapping | ||
53 | * | ||
54 | * Returns 0 on success. | ||
55 | */ | ||
56 | |||
57 | static int v9fs_init_vcookiecache(void) | ||
58 | { | ||
59 | vcookie_cache = kmem_cache_create("vcookie_cache", | ||
60 | sizeof(struct v9fs_cookie), | ||
61 | 0, (SLAB_RECLAIM_ACCOUNT| | ||
62 | SLAB_MEM_SPREAD), | ||
63 | init_once); | ||
64 | if (!vcookie_cache) | ||
65 | return -ENOMEM; | ||
66 | |||
67 | return 0; | ||
68 | } | ||
69 | |||
70 | /** | ||
71 | * v9fs_destroy_vcookiecache - destroy the cache of vcookies | ||
72 | * | ||
73 | */ | ||
74 | |||
75 | static void v9fs_destroy_vcookiecache(void) | ||
76 | { | ||
77 | kmem_cache_destroy(vcookie_cache); | ||
78 | } | ||
79 | |||
80 | int __v9fs_cache_register(void) | ||
81 | { | ||
82 | int ret; | ||
83 | ret = v9fs_init_vcookiecache(); | ||
84 | if (ret < 0) | ||
85 | return ret; | ||
86 | |||
87 | return fscache_register_netfs(&v9fs_cache_netfs); | ||
88 | } | ||
89 | |||
90 | void __v9fs_cache_unregister(void) | ||
91 | { | ||
92 | v9fs_destroy_vcookiecache(); | ||
93 | fscache_unregister_netfs(&v9fs_cache_netfs); | ||
94 | } | ||
95 | |||
96 | /** | ||
97 | * v9fs_random_cachetag - Generate a random tag to be associated | ||
98 | * with a new cache session. | ||
99 | * | ||
100 | * The value of jiffies is used for a fairly randomly cache tag. | ||
101 | */ | ||
102 | |||
103 | static | ||
104 | int v9fs_random_cachetag(struct v9fs_session_info *v9ses) | ||
105 | { | ||
106 | v9ses->cachetag = kmalloc(CACHETAG_LEN, GFP_KERNEL); | ||
107 | if (!v9ses->cachetag) | ||
108 | return -ENOMEM; | ||
109 | |||
110 | return scnprintf(v9ses->cachetag, CACHETAG_LEN, "%lu", jiffies); | ||
111 | } | ||
112 | |||
113 | static uint16_t v9fs_cache_session_get_key(const void *cookie_netfs_data, | ||
114 | void *buffer, uint16_t bufmax) | ||
115 | { | ||
116 | struct v9fs_session_info *v9ses; | ||
117 | uint16_t klen = 0; | ||
118 | |||
119 | v9ses = (struct v9fs_session_info *)cookie_netfs_data; | ||
120 | P9_DPRINTK(P9_DEBUG_FSC, "session %p buf %p size %u", v9ses, | ||
121 | buffer, bufmax); | ||
122 | |||
123 | if (v9ses->cachetag) | ||
124 | klen = strlen(v9ses->cachetag); | ||
125 | |||
126 | if (klen > bufmax) | ||
127 | return 0; | ||
128 | |||
129 | memcpy(buffer, v9ses->cachetag, klen); | ||
130 | P9_DPRINTK(P9_DEBUG_FSC, "cache session tag %s", v9ses->cachetag); | ||
131 | return klen; | ||
132 | } | ||
133 | |||
134 | const struct fscache_cookie_def v9fs_cache_session_index_def = { | ||
135 | .name = "9P.session", | ||
136 | .type = FSCACHE_COOKIE_TYPE_INDEX, | ||
137 | .get_key = v9fs_cache_session_get_key, | ||
138 | }; | ||
139 | |||
140 | void v9fs_cache_session_get_cookie(struct v9fs_session_info *v9ses) | ||
141 | { | ||
142 | /* If no cache session tag was specified, we generate a random one. */ | ||
143 | if (!v9ses->cachetag) | ||
144 | v9fs_random_cachetag(v9ses); | ||
145 | |||
146 | v9ses->fscache = fscache_acquire_cookie(v9fs_cache_netfs.primary_index, | ||
147 | &v9fs_cache_session_index_def, | ||
148 | v9ses); | ||
149 | P9_DPRINTK(P9_DEBUG_FSC, "session %p get cookie %p", v9ses, | ||
150 | v9ses->fscache); | ||
151 | } | ||
152 | |||
153 | void v9fs_cache_session_put_cookie(struct v9fs_session_info *v9ses) | ||
154 | { | ||
155 | P9_DPRINTK(P9_DEBUG_FSC, "session %p put cookie %p", v9ses, | ||
156 | v9ses->fscache); | ||
157 | fscache_relinquish_cookie(v9ses->fscache, 0); | ||
158 | v9ses->fscache = NULL; | ||
159 | } | ||
160 | |||
161 | |||
162 | static uint16_t v9fs_cache_inode_get_key(const void *cookie_netfs_data, | ||
163 | void *buffer, uint16_t bufmax) | ||
164 | { | ||
165 | const struct v9fs_cookie *vcookie = cookie_netfs_data; | ||
166 | memcpy(buffer, &vcookie->qid->path, sizeof(vcookie->qid->path)); | ||
167 | |||
168 | P9_DPRINTK(P9_DEBUG_FSC, "inode %p get key %llu", &vcookie->inode, | ||
169 | vcookie->qid->path); | ||
170 | return sizeof(vcookie->qid->path); | ||
171 | } | ||
172 | |||
173 | static void v9fs_cache_inode_get_attr(const void *cookie_netfs_data, | ||
174 | uint64_t *size) | ||
175 | { | ||
176 | const struct v9fs_cookie *vcookie = cookie_netfs_data; | ||
177 | *size = i_size_read(&vcookie->inode); | ||
178 | |||
179 | P9_DPRINTK(P9_DEBUG_FSC, "inode %p get attr %llu", &vcookie->inode, | ||
180 | *size); | ||
181 | } | ||
182 | |||
183 | static uint16_t v9fs_cache_inode_get_aux(const void *cookie_netfs_data, | ||
184 | void *buffer, uint16_t buflen) | ||
185 | { | ||
186 | const struct v9fs_cookie *vcookie = cookie_netfs_data; | ||
187 | memcpy(buffer, &vcookie->qid->version, sizeof(vcookie->qid->version)); | ||
188 | |||
189 | P9_DPRINTK(P9_DEBUG_FSC, "inode %p get aux %u", &vcookie->inode, | ||
190 | vcookie->qid->version); | ||
191 | return sizeof(vcookie->qid->version); | ||
192 | } | ||
193 | |||
194 | static enum | ||
195 | fscache_checkaux v9fs_cache_inode_check_aux(void *cookie_netfs_data, | ||
196 | const void *buffer, | ||
197 | uint16_t buflen) | ||
198 | { | ||
199 | const struct v9fs_cookie *vcookie = cookie_netfs_data; | ||
200 | |||
201 | if (buflen != sizeof(vcookie->qid->version)) | ||
202 | return FSCACHE_CHECKAUX_OBSOLETE; | ||
203 | |||
204 | if (memcmp(buffer, &vcookie->qid->version, | ||
205 | sizeof(vcookie->qid->version))) | ||
206 | return FSCACHE_CHECKAUX_OBSOLETE; | ||
207 | |||
208 | return FSCACHE_CHECKAUX_OKAY; | ||
209 | } | ||
210 | |||
211 | static void v9fs_cache_inode_now_uncached(void *cookie_netfs_data) | ||
212 | { | ||
213 | struct v9fs_cookie *vcookie = cookie_netfs_data; | ||
214 | struct pagevec pvec; | ||
215 | pgoff_t first; | ||
216 | int loop, nr_pages; | ||
217 | |||
218 | pagevec_init(&pvec, 0); | ||
219 | first = 0; | ||
220 | |||
221 | for (;;) { | ||
222 | nr_pages = pagevec_lookup(&pvec, vcookie->inode.i_mapping, | ||
223 | first, | ||
224 | PAGEVEC_SIZE - pagevec_count(&pvec)); | ||
225 | if (!nr_pages) | ||
226 | break; | ||
227 | |||
228 | for (loop = 0; loop < nr_pages; loop++) | ||
229 | ClearPageFsCache(pvec.pages[loop]); | ||
230 | |||
231 | first = pvec.pages[nr_pages - 1]->index + 1; | ||
232 | |||
233 | pvec.nr = nr_pages; | ||
234 | pagevec_release(&pvec); | ||
235 | cond_resched(); | ||
236 | } | ||
237 | } | ||
238 | |||
239 | const struct fscache_cookie_def v9fs_cache_inode_index_def = { | ||
240 | .name = "9p.inode", | ||
241 | .type = FSCACHE_COOKIE_TYPE_DATAFILE, | ||
242 | .get_key = v9fs_cache_inode_get_key, | ||
243 | .get_attr = v9fs_cache_inode_get_attr, | ||
244 | .get_aux = v9fs_cache_inode_get_aux, | ||
245 | .check_aux = v9fs_cache_inode_check_aux, | ||
246 | .now_uncached = v9fs_cache_inode_now_uncached, | ||
247 | }; | ||
248 | |||
249 | void v9fs_cache_inode_get_cookie(struct inode *inode) | ||
250 | { | ||
251 | struct v9fs_cookie *vcookie; | ||
252 | struct v9fs_session_info *v9ses; | ||
253 | |||
254 | if (!S_ISREG(inode->i_mode)) | ||
255 | return; | ||
256 | |||
257 | vcookie = v9fs_inode2cookie(inode); | ||
258 | if (vcookie->fscache) | ||
259 | return; | ||
260 | |||
261 | v9ses = v9fs_inode2v9ses(inode); | ||
262 | vcookie->fscache = fscache_acquire_cookie(v9ses->fscache, | ||
263 | &v9fs_cache_inode_index_def, | ||
264 | vcookie); | ||
265 | |||
266 | P9_DPRINTK(P9_DEBUG_FSC, "inode %p get cookie %p", inode, | ||
267 | vcookie->fscache); | ||
268 | } | ||
269 | |||
270 | void v9fs_cache_inode_put_cookie(struct inode *inode) | ||
271 | { | ||
272 | struct v9fs_cookie *vcookie = v9fs_inode2cookie(inode); | ||
273 | |||
274 | if (!vcookie->fscache) | ||
275 | return; | ||
276 | P9_DPRINTK(P9_DEBUG_FSC, "inode %p put cookie %p", inode, | ||
277 | vcookie->fscache); | ||
278 | |||
279 | fscache_relinquish_cookie(vcookie->fscache, 0); | ||
280 | vcookie->fscache = NULL; | ||
281 | } | ||
282 | |||
283 | void v9fs_cache_inode_flush_cookie(struct inode *inode) | ||
284 | { | ||
285 | struct v9fs_cookie *vcookie = v9fs_inode2cookie(inode); | ||
286 | |||
287 | if (!vcookie->fscache) | ||
288 | return; | ||
289 | P9_DPRINTK(P9_DEBUG_FSC, "inode %p flush cookie %p", inode, | ||
290 | vcookie->fscache); | ||
291 | |||
292 | fscache_relinquish_cookie(vcookie->fscache, 1); | ||
293 | vcookie->fscache = NULL; | ||
294 | } | ||
295 | |||
296 | void v9fs_cache_inode_set_cookie(struct inode *inode, struct file *filp) | ||
297 | { | ||
298 | struct v9fs_cookie *vcookie = v9fs_inode2cookie(inode); | ||
299 | struct p9_fid *fid; | ||
300 | |||
301 | if (!vcookie->fscache) | ||
302 | return; | ||
303 | |||
304 | spin_lock(&vcookie->lock); | ||
305 | fid = filp->private_data; | ||
306 | if ((filp->f_flags & O_ACCMODE) != O_RDONLY) | ||
307 | v9fs_cache_inode_flush_cookie(inode); | ||
308 | else | ||
309 | v9fs_cache_inode_get_cookie(inode); | ||
310 | |||
311 | spin_unlock(&vcookie->lock); | ||
312 | } | ||
313 | |||
314 | void v9fs_cache_inode_reset_cookie(struct inode *inode) | ||
315 | { | ||
316 | struct v9fs_cookie *vcookie = v9fs_inode2cookie(inode); | ||
317 | struct v9fs_session_info *v9ses; | ||
318 | struct fscache_cookie *old; | ||
319 | |||
320 | if (!vcookie->fscache) | ||
321 | return; | ||
322 | |||
323 | old = vcookie->fscache; | ||
324 | |||
325 | spin_lock(&vcookie->lock); | ||
326 | fscache_relinquish_cookie(vcookie->fscache, 1); | ||
327 | |||
328 | v9ses = v9fs_inode2v9ses(inode); | ||
329 | vcookie->fscache = fscache_acquire_cookie(v9ses->fscache, | ||
330 | &v9fs_cache_inode_index_def, | ||
331 | vcookie); | ||
332 | |||
333 | P9_DPRINTK(P9_DEBUG_FSC, "inode %p revalidating cookie old %p new %p", | ||
334 | inode, old, vcookie->fscache); | ||
335 | |||
336 | spin_unlock(&vcookie->lock); | ||
337 | } | ||
338 | |||
339 | int __v9fs_fscache_release_page(struct page *page, gfp_t gfp) | ||
340 | { | ||
341 | struct inode *inode = page->mapping->host; | ||
342 | struct v9fs_cookie *vcookie = v9fs_inode2cookie(inode); | ||
343 | |||
344 | BUG_ON(!vcookie->fscache); | ||
345 | |||
346 | if (PageFsCache(page)) { | ||
347 | if (fscache_check_page_write(vcookie->fscache, page)) { | ||
348 | if (!(gfp & __GFP_WAIT)) | ||
349 | return 0; | ||
350 | fscache_wait_on_page_write(vcookie->fscache, page); | ||
351 | } | ||
352 | |||
353 | fscache_uncache_page(vcookie->fscache, page); | ||
354 | ClearPageFsCache(page); | ||
355 | } | ||
356 | |||
357 | return 1; | ||
358 | } | ||
359 | |||
360 | void __v9fs_fscache_invalidate_page(struct page *page) | ||
361 | { | ||
362 | struct inode *inode = page->mapping->host; | ||
363 | struct v9fs_cookie *vcookie = v9fs_inode2cookie(inode); | ||
364 | |||
365 | BUG_ON(!vcookie->fscache); | ||
366 | |||
367 | if (PageFsCache(page)) { | ||
368 | fscache_wait_on_page_write(vcookie->fscache, page); | ||
369 | BUG_ON(!PageLocked(page)); | ||
370 | fscache_uncache_page(vcookie->fscache, page); | ||
371 | ClearPageFsCache(page); | ||
372 | } | ||
373 | } | ||
374 | |||
375 | static void v9fs_vfs_readpage_complete(struct page *page, void *data, | ||
376 | int error) | ||
377 | { | ||
378 | if (!error) | ||
379 | SetPageUptodate(page); | ||
380 | |||
381 | unlock_page(page); | ||
382 | } | ||
383 | |||
384 | /** | ||
385 | * __v9fs_readpage_from_fscache - read a page from cache | ||
386 | * | ||
387 | * Returns 0 if the pages are in cache and a BIO is submitted, | ||
388 | * 1 if the pages are not in cache and -error otherwise. | ||
389 | */ | ||
390 | |||
391 | int __v9fs_readpage_from_fscache(struct inode *inode, struct page *page) | ||
392 | { | ||
393 | int ret; | ||
394 | const struct v9fs_cookie *vcookie = v9fs_inode2cookie(inode); | ||
395 | |||
396 | P9_DPRINTK(P9_DEBUG_FSC, "inode %p page %p", inode, page); | ||
397 | if (!vcookie->fscache) | ||
398 | return -ENOBUFS; | ||
399 | |||
400 | ret = fscache_read_or_alloc_page(vcookie->fscache, | ||
401 | page, | ||
402 | v9fs_vfs_readpage_complete, | ||
403 | NULL, | ||
404 | GFP_KERNEL); | ||
405 | switch (ret) { | ||
406 | case -ENOBUFS: | ||
407 | case -ENODATA: | ||
408 | P9_DPRINTK(P9_DEBUG_FSC, "page/inode not in cache %d", ret); | ||
409 | return 1; | ||
410 | case 0: | ||
411 | P9_DPRINTK(P9_DEBUG_FSC, "BIO submitted"); | ||
412 | return ret; | ||
413 | default: | ||
414 | P9_DPRINTK(P9_DEBUG_FSC, "ret %d", ret); | ||
415 | return ret; | ||
416 | } | ||
417 | } | ||
418 | |||
419 | /** | ||
420 | * __v9fs_readpages_from_fscache - read multiple pages from cache | ||
421 | * | ||
422 | * Returns 0 if the pages are in cache and a BIO is submitted, | ||
423 | * 1 if the pages are not in cache and -error otherwise. | ||
424 | */ | ||
425 | |||
426 | int __v9fs_readpages_from_fscache(struct inode *inode, | ||
427 | struct address_space *mapping, | ||
428 | struct list_head *pages, | ||
429 | unsigned *nr_pages) | ||
430 | { | ||
431 | int ret; | ||
432 | const struct v9fs_cookie *vcookie = v9fs_inode2cookie(inode); | ||
433 | |||
434 | P9_DPRINTK(P9_DEBUG_FSC, "inode %p pages %u", inode, *nr_pages); | ||
435 | if (!vcookie->fscache) | ||
436 | return -ENOBUFS; | ||
437 | |||
438 | ret = fscache_read_or_alloc_pages(vcookie->fscache, | ||
439 | mapping, pages, nr_pages, | ||
440 | v9fs_vfs_readpage_complete, | ||
441 | NULL, | ||
442 | mapping_gfp_mask(mapping)); | ||
443 | switch (ret) { | ||
444 | case -ENOBUFS: | ||
445 | case -ENODATA: | ||
446 | P9_DPRINTK(P9_DEBUG_FSC, "pages/inodes not in cache %d", ret); | ||
447 | return 1; | ||
448 | case 0: | ||
449 | BUG_ON(!list_empty(pages)); | ||
450 | BUG_ON(*nr_pages != 0); | ||
451 | P9_DPRINTK(P9_DEBUG_FSC, "BIO submitted"); | ||
452 | return ret; | ||
453 | default: | ||
454 | P9_DPRINTK(P9_DEBUG_FSC, "ret %d", ret); | ||
455 | return ret; | ||
456 | } | ||
457 | } | ||
458 | |||
459 | /** | ||
460 | * __v9fs_readpage_to_fscache - write a page to the cache | ||
461 | * | ||
462 | */ | ||
463 | |||
464 | void __v9fs_readpage_to_fscache(struct inode *inode, struct page *page) | ||
465 | { | ||
466 | int ret; | ||
467 | const struct v9fs_cookie *vcookie = v9fs_inode2cookie(inode); | ||
468 | |||
469 | P9_DPRINTK(P9_DEBUG_FSC, "inode %p page %p", inode, page); | ||
470 | ret = fscache_write_page(vcookie->fscache, page, GFP_KERNEL); | ||
471 | P9_DPRINTK(P9_DEBUG_FSC, "ret = %d", ret); | ||
472 | if (ret != 0) | ||
473 | v9fs_uncache_page(inode, page); | ||
474 | } | ||
diff --git a/fs/9p/cache.h b/fs/9p/cache.h new file mode 100644 index 000000000000..a94192bfaee8 --- /dev/null +++ b/fs/9p/cache.h | |||
@@ -0,0 +1,176 @@ | |||
1 | /* | ||
2 | * V9FS cache definitions. | ||
3 | * | ||
4 | * Copyright (C) 2009 by Abhishek Kulkarni <adkulkar@umail.iu.edu> | ||
5 | * | ||
6 | * This program is free software; you can redistribute it and/or modify | ||
7 | * it under the terms of the GNU General Public License version 2 | ||
8 | * as published by the Free Software Foundation. | ||
9 | * | ||
10 | * This program is distributed in the hope that it will be useful, | ||
11 | * but WITHOUT ANY WARRANTY; without even the implied warranty of | ||
12 | * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the | ||
13 | * GNU General Public License for more details. | ||
14 | * | ||
15 | * You should have received a copy of the GNU General Public License | ||
16 | * along with this program; if not, write to: | ||
17 | * Free Software Foundation | ||
18 | * 51 Franklin Street, Fifth Floor | ||
19 | * Boston, MA 02111-1301 USA | ||
20 | * | ||
21 | */ | ||
22 | |||
23 | #ifndef _9P_CACHE_H | ||
24 | #ifdef CONFIG_9P_FSCACHE | ||
25 | #include <linux/fscache.h> | ||
26 | #include <linux/spinlock.h> | ||
27 | |||
28 | extern struct kmem_cache *vcookie_cache; | ||
29 | |||
30 | struct v9fs_cookie { | ||
31 | spinlock_t lock; | ||
32 | struct inode inode; | ||
33 | struct fscache_cookie *fscache; | ||
34 | struct p9_qid *qid; | ||
35 | }; | ||
36 | |||
37 | static inline struct v9fs_cookie *v9fs_inode2cookie(const struct inode *inode) | ||
38 | { | ||
39 | return container_of(inode, struct v9fs_cookie, inode); | ||
40 | } | ||
41 | |||
42 | extern struct fscache_netfs v9fs_cache_netfs; | ||
43 | extern const struct fscache_cookie_def v9fs_cache_session_index_def; | ||
44 | extern const struct fscache_cookie_def v9fs_cache_inode_index_def; | ||
45 | |||
46 | extern void v9fs_cache_session_get_cookie(struct v9fs_session_info *v9ses); | ||
47 | extern void v9fs_cache_session_put_cookie(struct v9fs_session_info *v9ses); | ||
48 | |||
49 | extern void v9fs_cache_inode_get_cookie(struct inode *inode); | ||
50 | extern void v9fs_cache_inode_put_cookie(struct inode *inode); | ||
51 | extern void v9fs_cache_inode_flush_cookie(struct inode *inode); | ||
52 | extern void v9fs_cache_inode_set_cookie(struct inode *inode, struct file *filp); | ||
53 | extern void v9fs_cache_inode_reset_cookie(struct inode *inode); | ||
54 | |||
55 | extern int __v9fs_cache_register(void); | ||
56 | extern void __v9fs_cache_unregister(void); | ||
57 | |||
58 | extern int __v9fs_fscache_release_page(struct page *page, gfp_t gfp); | ||
59 | extern void __v9fs_fscache_invalidate_page(struct page *page); | ||
60 | extern int __v9fs_readpage_from_fscache(struct inode *inode, | ||
61 | struct page *page); | ||
62 | extern int __v9fs_readpages_from_fscache(struct inode *inode, | ||
63 | struct address_space *mapping, | ||
64 | struct list_head *pages, | ||
65 | unsigned *nr_pages); | ||
66 | extern void __v9fs_readpage_to_fscache(struct inode *inode, struct page *page); | ||
67 | |||
68 | |||
69 | /** | ||
70 | * v9fs_cache_register - Register v9fs file system with the cache | ||
71 | */ | ||
72 | static inline int v9fs_cache_register(void) | ||
73 | { | ||
74 | return __v9fs_cache_register(); | ||
75 | } | ||
76 | |||
77 | /** | ||
78 | * v9fs_cache_unregister - Unregister v9fs from the cache | ||
79 | */ | ||
80 | static inline void v9fs_cache_unregister(void) | ||
81 | { | ||
82 | __v9fs_cache_unregister(); | ||
83 | } | ||
84 | |||
85 | static inline int v9fs_fscache_release_page(struct page *page, | ||
86 | gfp_t gfp) | ||
87 | { | ||
88 | return __v9fs_fscache_release_page(page, gfp); | ||
89 | } | ||
90 | |||
91 | static inline void v9fs_fscache_invalidate_page(struct page *page) | ||
92 | { | ||
93 | __v9fs_fscache_invalidate_page(page); | ||
94 | } | ||
95 | |||
96 | static inline int v9fs_readpage_from_fscache(struct inode *inode, | ||
97 | struct page *page) | ||
98 | { | ||
99 | return __v9fs_readpage_from_fscache(inode, page); | ||
100 | } | ||
101 | |||
102 | static inline int v9fs_readpages_from_fscache(struct inode *inode, | ||
103 | struct address_space *mapping, | ||
104 | struct list_head *pages, | ||
105 | unsigned *nr_pages) | ||
106 | { | ||
107 | return __v9fs_readpages_from_fscache(inode, mapping, pages, | ||
108 | nr_pages); | ||
109 | } | ||
110 | |||
111 | static inline void v9fs_readpage_to_fscache(struct inode *inode, | ||
112 | struct page *page) | ||
113 | { | ||
114 | if (PageFsCache(page)) | ||
115 | __v9fs_readpage_to_fscache(inode, page); | ||
116 | } | ||
117 | |||
118 | static inline void v9fs_uncache_page(struct inode *inode, struct page *page) | ||
119 | { | ||
120 | struct v9fs_cookie *vcookie = v9fs_inode2cookie(inode); | ||
121 | fscache_uncache_page(vcookie->fscache, page); | ||
122 | BUG_ON(PageFsCache(page)); | ||
123 | } | ||
124 | |||
125 | static inline void v9fs_vcookie_set_qid(struct inode *inode, | ||
126 | struct p9_qid *qid) | ||
127 | { | ||
128 | struct v9fs_cookie *vcookie = v9fs_inode2cookie(inode); | ||
129 | spin_lock(&vcookie->lock); | ||
130 | vcookie->qid = qid; | ||
131 | spin_unlock(&vcookie->lock); | ||
132 | } | ||
133 | |||
134 | #else /* CONFIG_9P_FSCACHE */ | ||
135 | |||
136 | static inline int v9fs_cache_register(void) | ||
137 | { | ||
138 | return 1; | ||
139 | } | ||
140 | |||
141 | static inline void v9fs_cache_unregister(void) {} | ||
142 | |||
143 | static inline int v9fs_fscache_release_page(struct page *page, | ||
144 | gfp_t gfp) { | ||
145 | return 1; | ||
146 | } | ||
147 | |||
148 | static inline void v9fs_fscache_invalidate_page(struct page *page) {} | ||
149 | |||
150 | static inline int v9fs_readpage_from_fscache(struct inode *inode, | ||
151 | struct page *page) | ||
152 | { | ||
153 | return -ENOBUFS; | ||
154 | } | ||
155 | |||
156 | static inline int v9fs_readpages_from_fscache(struct inode *inode, | ||
157 | struct address_space *mapping, | ||
158 | struct list_head *pages, | ||
159 | unsigned *nr_pages) | ||
160 | { | ||
161 | return -ENOBUFS; | ||
162 | } | ||
163 | |||
164 | static inline void v9fs_readpage_to_fscache(struct inode *inode, | ||
165 | struct page *page) | ||
166 | {} | ||
167 | |||
168 | static inline void v9fs_uncache_page(struct inode *inode, struct page *page) | ||
169 | {} | ||
170 | |||
171 | static inline void v9fs_vcookie_set_qid(struct inode *inode, | ||
172 | struct p9_qid *qid) | ||
173 | {} | ||
174 | |||
175 | #endif /* CONFIG_9P_FSCACHE */ | ||
176 | #endif /* _9P_CACHE_H */ | ||
diff --git a/fs/9p/v9fs.c b/fs/9p/v9fs.c index f7003cfac63d..cf62b05e296a 100644 --- a/fs/9p/v9fs.c +++ b/fs/9p/v9fs.c | |||
@@ -34,21 +34,25 @@ | |||
34 | #include <net/9p/transport.h> | 34 | #include <net/9p/transport.h> |
35 | #include "v9fs.h" | 35 | #include "v9fs.h" |
36 | #include "v9fs_vfs.h" | 36 | #include "v9fs_vfs.h" |
37 | #include "cache.h" | ||
38 | |||
39 | static DEFINE_SPINLOCK(v9fs_sessionlist_lock); | ||
40 | static LIST_HEAD(v9fs_sessionlist); | ||
37 | 41 | ||
38 | /* | 42 | /* |
39 | * Option Parsing (code inspired by NFS code) | 43 | * Option Parsing (code inspired by NFS code) |
40 | * NOTE: each transport will parse its own options | 44 | * NOTE: each transport will parse its own options |
41 | */ | 45 | */ |
42 | 46 | ||
43 | enum { | 47 | enum { |
44 | /* Options that take integer arguments */ | 48 | /* Options that take integer arguments */ |
45 | Opt_debug, Opt_dfltuid, Opt_dfltgid, Opt_afid, | 49 | Opt_debug, Opt_dfltuid, Opt_dfltgid, Opt_afid, |
46 | /* String options */ | 50 | /* String options */ |
47 | Opt_uname, Opt_remotename, Opt_trans, | 51 | Opt_uname, Opt_remotename, Opt_trans, Opt_cache, Opt_cachetag, |
48 | /* Options that take no arguments */ | 52 | /* Options that take no arguments */ |
49 | Opt_nodevmap, | 53 | Opt_nodevmap, |
50 | /* Cache options */ | 54 | /* Cache options */ |
51 | Opt_cache_loose, | 55 | Opt_cache_loose, Opt_fscache, |
52 | /* Access options */ | 56 | /* Access options */ |
53 | Opt_access, | 57 | Opt_access, |
54 | /* Error token */ | 58 | /* Error token */ |
@@ -63,8 +67,10 @@ static const match_table_t tokens = { | |||
63 | {Opt_uname, "uname=%s"}, | 67 | {Opt_uname, "uname=%s"}, |
64 | {Opt_remotename, "aname=%s"}, | 68 | {Opt_remotename, "aname=%s"}, |
65 | {Opt_nodevmap, "nodevmap"}, | 69 | {Opt_nodevmap, "nodevmap"}, |
66 | {Opt_cache_loose, "cache=loose"}, | 70 | {Opt_cache, "cache=%s"}, |
67 | {Opt_cache_loose, "loose"}, | 71 | {Opt_cache_loose, "loose"}, |
72 | {Opt_fscache, "fscache"}, | ||
73 | {Opt_cachetag, "cachetag=%s"}, | ||
68 | {Opt_access, "access=%s"}, | 74 | {Opt_access, "access=%s"}, |
69 | {Opt_err, NULL} | 75 | {Opt_err, NULL} |
70 | }; | 76 | }; |
@@ -89,16 +95,16 @@ static int v9fs_parse_options(struct v9fs_session_info *v9ses, char *opts) | |||
89 | v9ses->afid = ~0; | 95 | v9ses->afid = ~0; |
90 | v9ses->debug = 0; | 96 | v9ses->debug = 0; |
91 | v9ses->cache = 0; | 97 | v9ses->cache = 0; |
98 | #ifdef CONFIG_9P_FSCACHE | ||
99 | v9ses->cachetag = NULL; | ||
100 | #endif | ||
92 | 101 | ||
93 | if (!opts) | 102 | if (!opts) |
94 | return 0; | 103 | return 0; |
95 | 104 | ||
96 | options = kstrdup(opts, GFP_KERNEL); | 105 | options = kstrdup(opts, GFP_KERNEL); |
97 | if (!options) { | 106 | if (!options) |
98 | P9_DPRINTK(P9_DEBUG_ERROR, | 107 | goto fail_option_alloc; |
99 | "failed to allocate copy of option string\n"); | ||
100 | return -ENOMEM; | ||
101 | } | ||
102 | 108 | ||
103 | while ((p = strsep(&options, ",")) != NULL) { | 109 | while ((p = strsep(&options, ",")) != NULL) { |
104 | int token; | 110 | int token; |
@@ -143,16 +149,33 @@ static int v9fs_parse_options(struct v9fs_session_info *v9ses, char *opts) | |||
143 | case Opt_cache_loose: | 149 | case Opt_cache_loose: |
144 | v9ses->cache = CACHE_LOOSE; | 150 | v9ses->cache = CACHE_LOOSE; |
145 | break; | 151 | break; |
152 | case Opt_fscache: | ||
153 | v9ses->cache = CACHE_FSCACHE; | ||
154 | break; | ||
155 | case Opt_cachetag: | ||
156 | #ifdef CONFIG_9P_FSCACHE | ||
157 | v9ses->cachetag = match_strdup(&args[0]); | ||
158 | #endif | ||
159 | break; | ||
160 | case Opt_cache: | ||
161 | s = match_strdup(&args[0]); | ||
162 | if (!s) | ||
163 | goto fail_option_alloc; | ||
164 | |||
165 | if (strcmp(s, "loose") == 0) | ||
166 | v9ses->cache = CACHE_LOOSE; | ||
167 | else if (strcmp(s, "fscache") == 0) | ||
168 | v9ses->cache = CACHE_FSCACHE; | ||
169 | else | ||
170 | v9ses->cache = CACHE_NONE; | ||
171 | kfree(s); | ||
172 | break; | ||
146 | 173 | ||
147 | case Opt_access: | 174 | case Opt_access: |
148 | s = match_strdup(&args[0]); | 175 | s = match_strdup(&args[0]); |
149 | if (!s) { | 176 | if (!s) |
150 | P9_DPRINTK(P9_DEBUG_ERROR, | 177 | goto fail_option_alloc; |
151 | "failed to allocate copy" | 178 | |
152 | " of option argument\n"); | ||
153 | ret = -ENOMEM; | ||
154 | break; | ||
155 | } | ||
156 | v9ses->flags &= ~V9FS_ACCESS_MASK; | 179 | v9ses->flags &= ~V9FS_ACCESS_MASK; |
157 | if (strcmp(s, "user") == 0) | 180 | if (strcmp(s, "user") == 0) |
158 | v9ses->flags |= V9FS_ACCESS_USER; | 181 | v9ses->flags |= V9FS_ACCESS_USER; |
@@ -173,6 +196,11 @@ static int v9fs_parse_options(struct v9fs_session_info *v9ses, char *opts) | |||
173 | } | 196 | } |
174 | kfree(options); | 197 | kfree(options); |
175 | return ret; | 198 | return ret; |
199 | |||
200 | fail_option_alloc: | ||
201 | P9_DPRINTK(P9_DEBUG_ERROR, | ||
202 | "failed to allocate copy of option argument\n"); | ||
203 | return -ENOMEM; | ||
176 | } | 204 | } |
177 | 205 | ||
178 | /** | 206 | /** |
@@ -200,6 +228,10 @@ struct p9_fid *v9fs_session_init(struct v9fs_session_info *v9ses, | |||
200 | return ERR_PTR(-ENOMEM); | 228 | return ERR_PTR(-ENOMEM); |
201 | } | 229 | } |
202 | 230 | ||
231 | spin_lock(&v9fs_sessionlist_lock); | ||
232 | list_add(&v9ses->slist, &v9fs_sessionlist); | ||
233 | spin_unlock(&v9fs_sessionlist_lock); | ||
234 | |||
203 | v9ses->flags = V9FS_EXTENDED | V9FS_ACCESS_USER; | 235 | v9ses->flags = V9FS_EXTENDED | V9FS_ACCESS_USER; |
204 | strcpy(v9ses->uname, V9FS_DEFUSER); | 236 | strcpy(v9ses->uname, V9FS_DEFUSER); |
205 | strcpy(v9ses->aname, V9FS_DEFANAME); | 237 | strcpy(v9ses->aname, V9FS_DEFANAME); |
@@ -249,6 +281,11 @@ struct p9_fid *v9fs_session_init(struct v9fs_session_info *v9ses, | |||
249 | else | 281 | else |
250 | fid->uid = ~0; | 282 | fid->uid = ~0; |
251 | 283 | ||
284 | #ifdef CONFIG_9P_FSCACHE | ||
285 | /* register the session for caching */ | ||
286 | v9fs_cache_session_get_cookie(v9ses); | ||
287 | #endif | ||
288 | |||
252 | return fid; | 289 | return fid; |
253 | 290 | ||
254 | error: | 291 | error: |
@@ -268,8 +305,18 @@ void v9fs_session_close(struct v9fs_session_info *v9ses) | |||
268 | v9ses->clnt = NULL; | 305 | v9ses->clnt = NULL; |
269 | } | 306 | } |
270 | 307 | ||
308 | #ifdef CONFIG_9P_FSCACHE | ||
309 | if (v9ses->fscache) { | ||
310 | v9fs_cache_session_put_cookie(v9ses); | ||
311 | kfree(v9ses->cachetag); | ||
312 | } | ||
313 | #endif | ||
271 | __putname(v9ses->uname); | 314 | __putname(v9ses->uname); |
272 | __putname(v9ses->aname); | 315 | __putname(v9ses->aname); |
316 | |||
317 | spin_lock(&v9fs_sessionlist_lock); | ||
318 | list_del(&v9ses->slist); | ||
319 | spin_unlock(&v9fs_sessionlist_lock); | ||
273 | } | 320 | } |
274 | 321 | ||
275 | /** | 322 | /** |
@@ -286,25 +333,132 @@ void v9fs_session_cancel(struct v9fs_session_info *v9ses) { | |||
286 | 333 | ||
287 | extern int v9fs_error_init(void); | 334 | extern int v9fs_error_init(void); |
288 | 335 | ||
336 | static struct kobject *v9fs_kobj; | ||
337 | |||
338 | #ifdef CONFIG_9P_FSCACHE | ||
289 | /** | 339 | /** |
290 | * v9fs_init - Initialize module | 340 | * caches_show - list caches associated with a session |
341 | * | ||
342 | * Returns the size of buffer written. | ||
343 | */ | ||
344 | |||
345 | static ssize_t caches_show(struct kobject *kobj, | ||
346 | struct kobj_attribute *attr, | ||
347 | char *buf) | ||
348 | { | ||
349 | ssize_t n = 0, count = 0, limit = PAGE_SIZE; | ||
350 | struct v9fs_session_info *v9ses; | ||
351 | |||
352 | spin_lock(&v9fs_sessionlist_lock); | ||
353 | list_for_each_entry(v9ses, &v9fs_sessionlist, slist) { | ||
354 | if (v9ses->cachetag) { | ||
355 | n = snprintf(buf, limit, "%s\n", v9ses->cachetag); | ||
356 | if (n < 0) { | ||
357 | count = n; | ||
358 | break; | ||
359 | } | ||
360 | |||
361 | count += n; | ||
362 | limit -= n; | ||
363 | } | ||
364 | } | ||
365 | |||
366 | spin_unlock(&v9fs_sessionlist_lock); | ||
367 | return count; | ||
368 | } | ||
369 | |||
370 | static struct kobj_attribute v9fs_attr_cache = __ATTR_RO(caches); | ||
371 | #endif /* CONFIG_9P_FSCACHE */ | ||
372 | |||
373 | static struct attribute *v9fs_attrs[] = { | ||
374 | #ifdef CONFIG_9P_FSCACHE | ||
375 | &v9fs_attr_cache.attr, | ||
376 | #endif | ||
377 | NULL, | ||
378 | }; | ||
379 | |||
380 | static struct attribute_group v9fs_attr_group = { | ||
381 | .attrs = v9fs_attrs, | ||
382 | }; | ||
383 | |||
384 | /** | ||
385 | * v9fs_sysfs_init - Initialize the v9fs sysfs interface | ||
386 | * | ||
387 | */ | ||
388 | |||
389 | static int v9fs_sysfs_init(void) | ||
390 | { | ||
391 | v9fs_kobj = kobject_create_and_add("9p", fs_kobj); | ||
392 | if (!v9fs_kobj) | ||
393 | return -ENOMEM; | ||
394 | |||
395 | if (sysfs_create_group(v9fs_kobj, &v9fs_attr_group)) { | ||
396 | kobject_put(v9fs_kobj); | ||
397 | return -ENOMEM; | ||
398 | } | ||
399 | |||
400 | return 0; | ||
401 | } | ||
402 | |||
403 | /** | ||
404 | * v9fs_sysfs_cleanup - Unregister the v9fs sysfs interface | ||
405 | * | ||
406 | */ | ||
407 | |||
408 | static void v9fs_sysfs_cleanup(void) | ||
409 | { | ||
410 | sysfs_remove_group(v9fs_kobj, &v9fs_attr_group); | ||
411 | kobject_put(v9fs_kobj); | ||
412 | } | ||
413 | |||
414 | /** | ||
415 | * init_v9fs - Initialize module | ||
291 | * | 416 | * |
292 | */ | 417 | */ |
293 | 418 | ||
294 | static int __init init_v9fs(void) | 419 | static int __init init_v9fs(void) |
295 | { | 420 | { |
421 | int err; | ||
296 | printk(KERN_INFO "Installing v9fs 9p2000 file system support\n"); | 422 | printk(KERN_INFO "Installing v9fs 9p2000 file system support\n"); |
297 | /* TODO: Setup list of registered trasnport modules */ | 423 | /* TODO: Setup list of registered trasnport modules */ |
298 | return register_filesystem(&v9fs_fs_type); | 424 | err = register_filesystem(&v9fs_fs_type); |
425 | if (err < 0) { | ||
426 | printk(KERN_ERR "Failed to register filesystem\n"); | ||
427 | return err; | ||
428 | } | ||
429 | |||
430 | err = v9fs_cache_register(); | ||
431 | if (err < 0) { | ||
432 | printk(KERN_ERR "Failed to register v9fs for caching\n"); | ||
433 | goto out_fs_unreg; | ||
434 | } | ||
435 | |||
436 | err = v9fs_sysfs_init(); | ||
437 | if (err < 0) { | ||
438 | printk(KERN_ERR "Failed to register with sysfs\n"); | ||
439 | goto out_sysfs_cleanup; | ||
440 | } | ||
441 | |||
442 | return 0; | ||
443 | |||
444 | out_sysfs_cleanup: | ||
445 | v9fs_sysfs_cleanup(); | ||
446 | |||
447 | out_fs_unreg: | ||
448 | unregister_filesystem(&v9fs_fs_type); | ||
449 | |||
450 | return err; | ||
299 | } | 451 | } |
300 | 452 | ||
301 | /** | 453 | /** |
302 | * v9fs_init - shutdown module | 454 | * exit_v9fs - shutdown module |
303 | * | 455 | * |
304 | */ | 456 | */ |
305 | 457 | ||
306 | static void __exit exit_v9fs(void) | 458 | static void __exit exit_v9fs(void) |
307 | { | 459 | { |
460 | v9fs_sysfs_cleanup(); | ||
461 | v9fs_cache_unregister(); | ||
308 | unregister_filesystem(&v9fs_fs_type); | 462 | unregister_filesystem(&v9fs_fs_type); |
309 | } | 463 | } |
310 | 464 | ||
diff --git a/fs/9p/v9fs.h b/fs/9p/v9fs.h index 38762bf102a9..019f4ccb70c1 100644 --- a/fs/9p/v9fs.h +++ b/fs/9p/v9fs.h | |||
@@ -51,6 +51,7 @@ enum p9_session_flags { | |||
51 | enum p9_cache_modes { | 51 | enum p9_cache_modes { |
52 | CACHE_NONE, | 52 | CACHE_NONE, |
53 | CACHE_LOOSE, | 53 | CACHE_LOOSE, |
54 | CACHE_FSCACHE, | ||
54 | }; | 55 | }; |
55 | 56 | ||
56 | /** | 57 | /** |
@@ -60,6 +61,8 @@ enum p9_cache_modes { | |||
60 | * @debug: debug level | 61 | * @debug: debug level |
61 | * @afid: authentication handle | 62 | * @afid: authentication handle |
62 | * @cache: cache mode of type &p9_cache_modes | 63 | * @cache: cache mode of type &p9_cache_modes |
64 | * @cachetag: the tag of the cache associated with this session | ||
65 | * @fscache: session cookie associated with FS-Cache | ||
63 | * @options: copy of options string given by user | 66 | * @options: copy of options string given by user |
64 | * @uname: string user name to mount hierarchy as | 67 | * @uname: string user name to mount hierarchy as |
65 | * @aname: mount specifier for remote hierarchy | 68 | * @aname: mount specifier for remote hierarchy |
@@ -68,7 +71,7 @@ enum p9_cache_modes { | |||
68 | * @dfltgid: default numeric groupid to mount hierarchy as | 71 | * @dfltgid: default numeric groupid to mount hierarchy as |
69 | * @uid: if %V9FS_ACCESS_SINGLE, the numeric uid which mounted the hierarchy | 72 | * @uid: if %V9FS_ACCESS_SINGLE, the numeric uid which mounted the hierarchy |
70 | * @clnt: reference to 9P network client instantiated for this session | 73 | * @clnt: reference to 9P network client instantiated for this session |
71 | * @debugfs_dir: reference to debugfs_dir which can be used for add'l debug | 74 | * @slist: reference to list of registered 9p sessions |
72 | * | 75 | * |
73 | * This structure holds state for each session instance established during | 76 | * This structure holds state for each session instance established during |
74 | * a sys_mount() . | 77 | * a sys_mount() . |
@@ -84,6 +87,10 @@ struct v9fs_session_info { | |||
84 | unsigned short debug; | 87 | unsigned short debug; |
85 | unsigned int afid; | 88 | unsigned int afid; |
86 | unsigned int cache; | 89 | unsigned int cache; |
90 | #ifdef CONFIG_9P_FSCACHE | ||
91 | char *cachetag; | ||
92 | struct fscache_cookie *fscache; | ||
93 | #endif | ||
87 | 94 | ||
88 | char *uname; /* user name to mount as */ | 95 | char *uname; /* user name to mount as */ |
89 | char *aname; /* name of remote hierarchy being mounted */ | 96 | char *aname; /* name of remote hierarchy being mounted */ |
@@ -92,11 +99,9 @@ struct v9fs_session_info { | |||
92 | unsigned int dfltgid; /* default gid for legacy support */ | 99 | unsigned int dfltgid; /* default gid for legacy support */ |
93 | u32 uid; /* if ACCESS_SINGLE, the uid that has access */ | 100 | u32 uid; /* if ACCESS_SINGLE, the uid that has access */ |
94 | struct p9_client *clnt; /* 9p client */ | 101 | struct p9_client *clnt; /* 9p client */ |
95 | struct dentry *debugfs_dir; | 102 | struct list_head slist; /* list of sessions registered with v9fs */ |
96 | }; | 103 | }; |
97 | 104 | ||
98 | extern struct dentry *v9fs_debugfs_root; | ||
99 | |||
100 | struct p9_fid *v9fs_session_init(struct v9fs_session_info *, const char *, | 105 | struct p9_fid *v9fs_session_init(struct v9fs_session_info *, const char *, |
101 | char *); | 106 | char *); |
102 | void v9fs_session_close(struct v9fs_session_info *v9ses); | 107 | void v9fs_session_close(struct v9fs_session_info *v9ses); |
diff --git a/fs/9p/v9fs_vfs.h b/fs/9p/v9fs_vfs.h index f0c7de78e205..3a7560e35865 100644 --- a/fs/9p/v9fs_vfs.h +++ b/fs/9p/v9fs_vfs.h | |||
@@ -44,7 +44,13 @@ extern const struct file_operations v9fs_dir_operations; | |||
44 | extern const struct dentry_operations v9fs_dentry_operations; | 44 | extern const struct dentry_operations v9fs_dentry_operations; |
45 | extern const struct dentry_operations v9fs_cached_dentry_operations; | 45 | extern const struct dentry_operations v9fs_cached_dentry_operations; |
46 | 46 | ||
47 | #ifdef CONFIG_9P_FSCACHE | ||
48 | struct inode *v9fs_alloc_inode(struct super_block *sb); | ||
49 | void v9fs_destroy_inode(struct inode *inode); | ||
50 | #endif | ||
51 | |||
47 | struct inode *v9fs_get_inode(struct super_block *sb, int mode); | 52 | struct inode *v9fs_get_inode(struct super_block *sb, int mode); |
53 | void v9fs_clear_inode(struct inode *inode); | ||
48 | ino_t v9fs_qid2ino(struct p9_qid *qid); | 54 | ino_t v9fs_qid2ino(struct p9_qid *qid); |
49 | void v9fs_stat2inode(struct p9_wstat *, struct inode *, struct super_block *); | 55 | void v9fs_stat2inode(struct p9_wstat *, struct inode *, struct super_block *); |
50 | int v9fs_dir_release(struct inode *inode, struct file *filp); | 56 | int v9fs_dir_release(struct inode *inode, struct file *filp); |
diff --git a/fs/9p/vfs_addr.c b/fs/9p/vfs_addr.c index 92828281a30b..90e38449f4b3 100644 --- a/fs/9p/vfs_addr.c +++ b/fs/9p/vfs_addr.c | |||
@@ -38,6 +38,7 @@ | |||
38 | 38 | ||
39 | #include "v9fs.h" | 39 | #include "v9fs.h" |
40 | #include "v9fs_vfs.h" | 40 | #include "v9fs_vfs.h" |
41 | #include "cache.h" | ||
41 | 42 | ||
42 | /** | 43 | /** |
43 | * v9fs_vfs_readpage - read an entire page in from 9P | 44 | * v9fs_vfs_readpage - read an entire page in from 9P |
@@ -52,18 +53,31 @@ static int v9fs_vfs_readpage(struct file *filp, struct page *page) | |||
52 | int retval; | 53 | int retval; |
53 | loff_t offset; | 54 | loff_t offset; |
54 | char *buffer; | 55 | char *buffer; |
56 | struct inode *inode; | ||
55 | 57 | ||
58 | inode = page->mapping->host; | ||
56 | P9_DPRINTK(P9_DEBUG_VFS, "\n"); | 59 | P9_DPRINTK(P9_DEBUG_VFS, "\n"); |
60 | |||
61 | BUG_ON(!PageLocked(page)); | ||
62 | |||
63 | retval = v9fs_readpage_from_fscache(inode, page); | ||
64 | if (retval == 0) | ||
65 | return retval; | ||
66 | |||
57 | buffer = kmap(page); | 67 | buffer = kmap(page); |
58 | offset = page_offset(page); | 68 | offset = page_offset(page); |
59 | 69 | ||
60 | retval = v9fs_file_readn(filp, buffer, NULL, PAGE_CACHE_SIZE, offset); | 70 | retval = v9fs_file_readn(filp, buffer, NULL, PAGE_CACHE_SIZE, offset); |
61 | if (retval < 0) | 71 | if (retval < 0) { |
72 | v9fs_uncache_page(inode, page); | ||
62 | goto done; | 73 | goto done; |
74 | } | ||
63 | 75 | ||
64 | memset(buffer + retval, 0, PAGE_CACHE_SIZE - retval); | 76 | memset(buffer + retval, 0, PAGE_CACHE_SIZE - retval); |
65 | flush_dcache_page(page); | 77 | flush_dcache_page(page); |
66 | SetPageUptodate(page); | 78 | SetPageUptodate(page); |
79 | |||
80 | v9fs_readpage_to_fscache(inode, page); | ||
67 | retval = 0; | 81 | retval = 0; |
68 | 82 | ||
69 | done: | 83 | done: |
@@ -72,6 +86,78 @@ done: | |||
72 | return retval; | 86 | return retval; |
73 | } | 87 | } |
74 | 88 | ||
89 | /** | ||
90 | * v9fs_vfs_readpages - read a set of pages from 9P | ||
91 | * | ||
92 | * @filp: file being read | ||
93 | * @mapping: the address space | ||
94 | * @pages: list of pages to read | ||
95 | * @nr_pages: count of pages to read | ||
96 | * | ||
97 | */ | ||
98 | |||
99 | static int v9fs_vfs_readpages(struct file *filp, struct address_space *mapping, | ||
100 | struct list_head *pages, unsigned nr_pages) | ||
101 | { | ||
102 | int ret = 0; | ||
103 | struct inode *inode; | ||
104 | |||
105 | inode = mapping->host; | ||
106 | P9_DPRINTK(P9_DEBUG_VFS, "inode: %p file: %p\n", inode, filp); | ||
107 | |||
108 | ret = v9fs_readpages_from_fscache(inode, mapping, pages, &nr_pages); | ||
109 | if (ret == 0) | ||
110 | return ret; | ||
111 | |||
112 | ret = read_cache_pages(mapping, pages, (void *)v9fs_vfs_readpage, filp); | ||
113 | P9_DPRINTK(P9_DEBUG_VFS, " = %d\n", ret); | ||
114 | return ret; | ||
115 | } | ||
116 | |||
117 | /** | ||
118 | * v9fs_release_page - release the private state associated with a page | ||
119 | * | ||
120 | * Returns 1 if the page can be released, false otherwise. | ||
121 | */ | ||
122 | |||
123 | static int v9fs_release_page(struct page *page, gfp_t gfp) | ||
124 | { | ||
125 | if (PagePrivate(page)) | ||
126 | return 0; | ||
127 | |||
128 | return v9fs_fscache_release_page(page, gfp); | ||
129 | } | ||
130 | |||
131 | /** | ||
132 | * v9fs_invalidate_page - Invalidate a page completely or partially | ||
133 | * | ||
134 | * @page: structure to page | ||
135 | * @offset: offset in the page | ||
136 | */ | ||
137 | |||
138 | static void v9fs_invalidate_page(struct page *page, unsigned long offset) | ||
139 | { | ||
140 | if (offset == 0) | ||
141 | v9fs_fscache_invalidate_page(page); | ||
142 | } | ||
143 | |||
144 | /** | ||
145 | * v9fs_launder_page - Writeback a dirty page | ||
146 | * Since the writes go directly to the server, we simply return a 0 | ||
147 | * here to indicate success. | ||
148 | * | ||
149 | * Returns 0 on success. | ||
150 | */ | ||
151 | |||
152 | static int v9fs_launder_page(struct page *page) | ||
153 | { | ||
154 | return 0; | ||
155 | } | ||
156 | |||
75 | const struct address_space_operations v9fs_addr_operations = { | 157 | const struct address_space_operations v9fs_addr_operations = { |
76 | .readpage = v9fs_vfs_readpage, | 158 | .readpage = v9fs_vfs_readpage, |
159 | .readpages = v9fs_vfs_readpages, | ||
160 | .releasepage = v9fs_release_page, | ||
161 | .invalidatepage = v9fs_invalidate_page, | ||
162 | .launder_page = v9fs_launder_page, | ||
77 | }; | 163 | }; |
diff --git a/fs/9p/vfs_file.c b/fs/9p/vfs_file.c index 68bf2af6c389..3902bf43a088 100644 --- a/fs/9p/vfs_file.c +++ b/fs/9p/vfs_file.c | |||
@@ -32,6 +32,7 @@ | |||
32 | #include <linux/string.h> | 32 | #include <linux/string.h> |
33 | #include <linux/inet.h> | 33 | #include <linux/inet.h> |
34 | #include <linux/list.h> | 34 | #include <linux/list.h> |
35 | #include <linux/pagemap.h> | ||
35 | #include <asm/uaccess.h> | 36 | #include <asm/uaccess.h> |
36 | #include <linux/idr.h> | 37 | #include <linux/idr.h> |
37 | #include <net/9p/9p.h> | 38 | #include <net/9p/9p.h> |
@@ -40,6 +41,7 @@ | |||
40 | #include "v9fs.h" | 41 | #include "v9fs.h" |
41 | #include "v9fs_vfs.h" | 42 | #include "v9fs_vfs.h" |
42 | #include "fid.h" | 43 | #include "fid.h" |
44 | #include "cache.h" | ||
43 | 45 | ||
44 | static const struct file_operations v9fs_cached_file_operations; | 46 | static const struct file_operations v9fs_cached_file_operations; |
45 | 47 | ||
@@ -72,7 +74,7 @@ int v9fs_file_open(struct inode *inode, struct file *file) | |||
72 | return err; | 74 | return err; |
73 | } | 75 | } |
74 | if (omode & P9_OTRUNC) { | 76 | if (omode & P9_OTRUNC) { |
75 | inode->i_size = 0; | 77 | i_size_write(inode, 0); |
76 | inode->i_blocks = 0; | 78 | inode->i_blocks = 0; |
77 | } | 79 | } |
78 | if ((file->f_flags & O_APPEND) && (!v9fs_extended(v9ses))) | 80 | if ((file->f_flags & O_APPEND) && (!v9fs_extended(v9ses))) |
@@ -85,6 +87,10 @@ int v9fs_file_open(struct inode *inode, struct file *file) | |||
85 | /* enable cached file options */ | 87 | /* enable cached file options */ |
86 | if(file->f_op == &v9fs_file_operations) | 88 | if(file->f_op == &v9fs_file_operations) |
87 | file->f_op = &v9fs_cached_file_operations; | 89 | file->f_op = &v9fs_cached_file_operations; |
90 | |||
91 | #ifdef CONFIG_9P_FSCACHE | ||
92 | v9fs_cache_inode_set_cookie(inode, file); | ||
93 | #endif | ||
88 | } | 94 | } |
89 | 95 | ||
90 | return 0; | 96 | return 0; |
@@ -210,6 +216,7 @@ v9fs_file_write(struct file *filp, const char __user * data, | |||
210 | struct p9_client *clnt; | 216 | struct p9_client *clnt; |
211 | struct inode *inode = filp->f_path.dentry->d_inode; | 217 | struct inode *inode = filp->f_path.dentry->d_inode; |
212 | int origin = *offset; | 218 | int origin = *offset; |
219 | unsigned long pg_start, pg_end; | ||
213 | 220 | ||
214 | P9_DPRINTK(P9_DEBUG_VFS, "data %p count %d offset %x\n", data, | 221 | P9_DPRINTK(P9_DEBUG_VFS, "data %p count %d offset %x\n", data, |
215 | (int)count, (int)*offset); | 222 | (int)count, (int)*offset); |
@@ -225,7 +232,7 @@ v9fs_file_write(struct file *filp, const char __user * data, | |||
225 | if (count < rsize) | 232 | if (count < rsize) |
226 | rsize = count; | 233 | rsize = count; |
227 | 234 | ||
228 | n = p9_client_write(fid, NULL, data+total, *offset+total, | 235 | n = p9_client_write(fid, NULL, data+total, origin+total, |
229 | rsize); | 236 | rsize); |
230 | if (n <= 0) | 237 | if (n <= 0) |
231 | break; | 238 | break; |
@@ -234,14 +241,14 @@ v9fs_file_write(struct file *filp, const char __user * data, | |||
234 | } while (count > 0); | 241 | } while (count > 0); |
235 | 242 | ||
236 | if (total > 0) { | 243 | if (total > 0) { |
237 | invalidate_inode_pages2_range(inode->i_mapping, origin, | 244 | pg_start = origin >> PAGE_CACHE_SHIFT; |
238 | origin+total); | 245 | pg_end = (origin + total - 1) >> PAGE_CACHE_SHIFT; |
246 | if (inode->i_mapping && inode->i_mapping->nrpages) | ||
247 | invalidate_inode_pages2_range(inode->i_mapping, | ||
248 | pg_start, pg_end); | ||
239 | *offset += total; | 249 | *offset += total; |
240 | } | 250 | i_size_write(inode, i_size_read(inode) + total); |
241 | 251 | inode->i_blocks = (i_size_read(inode) + 512 - 1) >> 9; | |
242 | if (*offset > inode->i_size) { | ||
243 | inode->i_size = *offset; | ||
244 | inode->i_blocks = (inode->i_size + 512 - 1) >> 9; | ||
245 | } | 252 | } |
246 | 253 | ||
247 | if (n < 0) | 254 | if (n < 0) |
diff --git a/fs/9p/vfs_inode.c b/fs/9p/vfs_inode.c index 06a223d50a81..5947628aefef 100644 --- a/fs/9p/vfs_inode.c +++ b/fs/9p/vfs_inode.c | |||
@@ -40,6 +40,7 @@ | |||
40 | #include "v9fs.h" | 40 | #include "v9fs.h" |
41 | #include "v9fs_vfs.h" | 41 | #include "v9fs_vfs.h" |
42 | #include "fid.h" | 42 | #include "fid.h" |
43 | #include "cache.h" | ||
43 | 44 | ||
44 | static const struct inode_operations v9fs_dir_inode_operations; | 45 | static const struct inode_operations v9fs_dir_inode_operations; |
45 | static const struct inode_operations v9fs_dir_inode_operations_ext; | 46 | static const struct inode_operations v9fs_dir_inode_operations_ext; |
@@ -197,6 +198,39 @@ v9fs_blank_wstat(struct p9_wstat *wstat) | |||
197 | wstat->extension = NULL; | 198 | wstat->extension = NULL; |
198 | } | 199 | } |
199 | 200 | ||
201 | #ifdef CONFIG_9P_FSCACHE | ||
202 | /** | ||
203 | * v9fs_alloc_inode - helper function to allocate an inode | ||
204 | * This callback is executed before setting up the inode so that we | ||
205 | * can associate a vcookie with each inode. | ||
206 | * | ||
207 | */ | ||
208 | |||
209 | struct inode *v9fs_alloc_inode(struct super_block *sb) | ||
210 | { | ||
211 | struct v9fs_cookie *vcookie; | ||
212 | vcookie = (struct v9fs_cookie *)kmem_cache_alloc(vcookie_cache, | ||
213 | GFP_KERNEL); | ||
214 | if (!vcookie) | ||
215 | return NULL; | ||
216 | |||
217 | vcookie->fscache = NULL; | ||
218 | vcookie->qid = NULL; | ||
219 | spin_lock_init(&vcookie->lock); | ||
220 | return &vcookie->inode; | ||
221 | } | ||
222 | |||
223 | /** | ||
224 | * v9fs_destroy_inode - destroy an inode | ||
225 | * | ||
226 | */ | ||
227 | |||
228 | void v9fs_destroy_inode(struct inode *inode) | ||
229 | { | ||
230 | kmem_cache_free(vcookie_cache, v9fs_inode2cookie(inode)); | ||
231 | } | ||
232 | #endif | ||
233 | |||
200 | /** | 234 | /** |
201 | * v9fs_get_inode - helper function to setup an inode | 235 | * v9fs_get_inode - helper function to setup an inode |
202 | * @sb: superblock | 236 | * @sb: superblock |
@@ -326,6 +360,21 @@ error: | |||
326 | } | 360 | } |
327 | */ | 361 | */ |
328 | 362 | ||
363 | |||
364 | /** | ||
365 | * v9fs_clear_inode - release an inode | ||
366 | * @inode: inode to release | ||
367 | * | ||
368 | */ | ||
369 | void v9fs_clear_inode(struct inode *inode) | ||
370 | { | ||
371 | filemap_fdatawrite(inode->i_mapping); | ||
372 | |||
373 | #ifdef CONFIG_9P_FSCACHE | ||
374 | v9fs_cache_inode_put_cookie(inode); | ||
375 | #endif | ||
376 | } | ||
377 | |||
329 | /** | 378 | /** |
330 | * v9fs_inode_from_fid - populate an inode by issuing a attribute request | 379 | * v9fs_inode_from_fid - populate an inode by issuing a attribute request |
331 | * @v9ses: session information | 380 | * @v9ses: session information |
@@ -356,8 +405,14 @@ v9fs_inode_from_fid(struct v9fs_session_info *v9ses, struct p9_fid *fid, | |||
356 | 405 | ||
357 | v9fs_stat2inode(st, ret, sb); | 406 | v9fs_stat2inode(st, ret, sb); |
358 | ret->i_ino = v9fs_qid2ino(&st->qid); | 407 | ret->i_ino = v9fs_qid2ino(&st->qid); |
408 | |||
409 | #ifdef CONFIG_9P_FSCACHE | ||
410 | v9fs_vcookie_set_qid(ret, &st->qid); | ||
411 | v9fs_cache_inode_get_cookie(ret); | ||
412 | #endif | ||
359 | p9stat_free(st); | 413 | p9stat_free(st); |
360 | kfree(st); | 414 | kfree(st); |
415 | |||
361 | return ret; | 416 | return ret; |
362 | 417 | ||
363 | error: | 418 | error: |
@@ -751,7 +806,7 @@ v9fs_vfs_getattr(struct vfsmount *mnt, struct dentry *dentry, | |||
751 | P9_DPRINTK(P9_DEBUG_VFS, "dentry: %p\n", dentry); | 806 | P9_DPRINTK(P9_DEBUG_VFS, "dentry: %p\n", dentry); |
752 | err = -EPERM; | 807 | err = -EPERM; |
753 | v9ses = v9fs_inode2v9ses(dentry->d_inode); | 808 | v9ses = v9fs_inode2v9ses(dentry->d_inode); |
754 | if (v9ses->cache == CACHE_LOOSE) | 809 | if (v9ses->cache == CACHE_LOOSE || v9ses->cache == CACHE_FSCACHE) |
755 | return simple_getattr(mnt, dentry, stat); | 810 | return simple_getattr(mnt, dentry, stat); |
756 | 811 | ||
757 | fid = v9fs_fid_lookup(dentry); | 812 | fid = v9fs_fid_lookup(dentry); |
@@ -872,10 +927,10 @@ v9fs_stat2inode(struct p9_wstat *stat, struct inode *inode, | |||
872 | } else | 927 | } else |
873 | inode->i_rdev = 0; | 928 | inode->i_rdev = 0; |
874 | 929 | ||
875 | inode->i_size = stat->length; | 930 | i_size_write(inode, stat->length); |
876 | 931 | ||
877 | /* not real number of blocks, but 512 byte ones ... */ | 932 | /* not real number of blocks, but 512 byte ones ... */ |
878 | inode->i_blocks = (inode->i_size + 512 - 1) >> 9; | 933 | inode->i_blocks = (i_size_read(inode) + 512 - 1) >> 9; |
879 | } | 934 | } |
880 | 935 | ||
881 | /** | 936 | /** |
diff --git a/fs/9p/vfs_super.c b/fs/9p/vfs_super.c index 8961f1a8f668..14a86448572c 100644 --- a/fs/9p/vfs_super.c +++ b/fs/9p/vfs_super.c | |||
@@ -44,21 +44,9 @@ | |||
44 | #include "v9fs_vfs.h" | 44 | #include "v9fs_vfs.h" |
45 | #include "fid.h" | 45 | #include "fid.h" |
46 | 46 | ||
47 | static void v9fs_clear_inode(struct inode *); | ||
48 | static const struct super_operations v9fs_super_ops; | 47 | static const struct super_operations v9fs_super_ops; |
49 | 48 | ||
50 | /** | 49 | /** |
51 | * v9fs_clear_inode - release an inode | ||
52 | * @inode: inode to release | ||
53 | * | ||
54 | */ | ||
55 | |||
56 | static void v9fs_clear_inode(struct inode *inode) | ||
57 | { | ||
58 | filemap_fdatawrite(inode->i_mapping); | ||
59 | } | ||
60 | |||
61 | /** | ||
62 | * v9fs_set_super - set the superblock | 50 | * v9fs_set_super - set the superblock |
63 | * @s: super block | 51 | * @s: super block |
64 | * @data: file system specific data | 52 | * @data: file system specific data |
@@ -220,6 +208,10 @@ v9fs_umount_begin(struct super_block *sb) | |||
220 | } | 208 | } |
221 | 209 | ||
222 | static const struct super_operations v9fs_super_ops = { | 210 | static const struct super_operations v9fs_super_ops = { |
211 | #ifdef CONFIG_9P_FSCACHE | ||
212 | .alloc_inode = v9fs_alloc_inode, | ||
213 | .destroy_inode = v9fs_destroy_inode, | ||
214 | #endif | ||
223 | .statfs = simple_statfs, | 215 | .statfs = simple_statfs, |
224 | .clear_inode = v9fs_clear_inode, | 216 | .clear_inode = v9fs_clear_inode, |
225 | .show_options = generic_show_options, | 217 | .show_options = generic_show_options, |
diff --git a/fs/Kconfig b/fs/Kconfig index 455aa207e67e..d4bf8caad8d0 100644 --- a/fs/Kconfig +++ b/fs/Kconfig | |||
@@ -109,6 +109,7 @@ source "fs/sysfs/Kconfig" | |||
109 | 109 | ||
110 | config TMPFS | 110 | config TMPFS |
111 | bool "Virtual memory file system support (former shm fs)" | 111 | bool "Virtual memory file system support (former shm fs)" |
112 | depends on SHMEM | ||
112 | help | 113 | help |
113 | Tmpfs is a file system which keeps all files in virtual memory. | 114 | Tmpfs is a file system which keeps all files in virtual memory. |
114 | 115 | ||
diff --git a/fs/adfs/inode.c b/fs/adfs/inode.c index 798cb071d132..3f57ce4bee5d 100644 --- a/fs/adfs/inode.c +++ b/fs/adfs/inode.c | |||
@@ -19,9 +19,6 @@ static int | |||
19 | adfs_get_block(struct inode *inode, sector_t block, struct buffer_head *bh, | 19 | adfs_get_block(struct inode *inode, sector_t block, struct buffer_head *bh, |
20 | int create) | 20 | int create) |
21 | { | 21 | { |
22 | if (block < 0) | ||
23 | goto abort_negative; | ||
24 | |||
25 | if (!create) { | 22 | if (!create) { |
26 | if (block >= inode->i_blocks) | 23 | if (block >= inode->i_blocks) |
27 | goto abort_toobig; | 24 | goto abort_toobig; |
@@ -34,10 +31,6 @@ adfs_get_block(struct inode *inode, sector_t block, struct buffer_head *bh, | |||
34 | /* don't support allocation of blocks yet */ | 31 | /* don't support allocation of blocks yet */ |
35 | return -EIO; | 32 | return -EIO; |
36 | 33 | ||
37 | abort_negative: | ||
38 | adfs_error(inode->i_sb, "block %d < 0", block); | ||
39 | return -EIO; | ||
40 | |||
41 | abort_toobig: | 34 | abort_toobig: |
42 | return 0; | 35 | return 0; |
43 | } | 36 | } |
diff --git a/fs/afs/flock.c b/fs/afs/flock.c index 3ff8bdd18fb3..0931bc1325eb 100644 --- a/fs/afs/flock.c +++ b/fs/afs/flock.c | |||
@@ -21,7 +21,7 @@ static void afs_fl_release_private(struct file_lock *fl); | |||
21 | static struct workqueue_struct *afs_lock_manager; | 21 | static struct workqueue_struct *afs_lock_manager; |
22 | static DEFINE_MUTEX(afs_lock_manager_mutex); | 22 | static DEFINE_MUTEX(afs_lock_manager_mutex); |
23 | 23 | ||
24 | static struct file_lock_operations afs_lock_ops = { | 24 | static const struct file_lock_operations afs_lock_ops = { |
25 | .fl_copy_lock = afs_fl_copy_lock, | 25 | .fl_copy_lock = afs_fl_copy_lock, |
26 | .fl_release_private = afs_fl_release_private, | 26 | .fl_release_private = afs_fl_release_private, |
27 | }; | 27 | }; |
diff --git a/fs/afs/proc.c b/fs/afs/proc.c index 8630615e57fe..852739d262a9 100644 --- a/fs/afs/proc.c +++ b/fs/afs/proc.c | |||
@@ -28,7 +28,7 @@ static int afs_proc_cells_show(struct seq_file *m, void *v); | |||
28 | static ssize_t afs_proc_cells_write(struct file *file, const char __user *buf, | 28 | static ssize_t afs_proc_cells_write(struct file *file, const char __user *buf, |
29 | size_t size, loff_t *_pos); | 29 | size_t size, loff_t *_pos); |
30 | 30 | ||
31 | static struct seq_operations afs_proc_cells_ops = { | 31 | static const struct seq_operations afs_proc_cells_ops = { |
32 | .start = afs_proc_cells_start, | 32 | .start = afs_proc_cells_start, |
33 | .next = afs_proc_cells_next, | 33 | .next = afs_proc_cells_next, |
34 | .stop = afs_proc_cells_stop, | 34 | .stop = afs_proc_cells_stop, |
@@ -70,7 +70,7 @@ static void *afs_proc_cell_volumes_next(struct seq_file *p, void *v, | |||
70 | static void afs_proc_cell_volumes_stop(struct seq_file *p, void *v); | 70 | static void afs_proc_cell_volumes_stop(struct seq_file *p, void *v); |
71 | static int afs_proc_cell_volumes_show(struct seq_file *m, void *v); | 71 | static int afs_proc_cell_volumes_show(struct seq_file *m, void *v); |
72 | 72 | ||
73 | static struct seq_operations afs_proc_cell_volumes_ops = { | 73 | static const struct seq_operations afs_proc_cell_volumes_ops = { |
74 | .start = afs_proc_cell_volumes_start, | 74 | .start = afs_proc_cell_volumes_start, |
75 | .next = afs_proc_cell_volumes_next, | 75 | .next = afs_proc_cell_volumes_next, |
76 | .stop = afs_proc_cell_volumes_stop, | 76 | .stop = afs_proc_cell_volumes_stop, |
@@ -95,7 +95,7 @@ static void *afs_proc_cell_vlservers_next(struct seq_file *p, void *v, | |||
95 | static void afs_proc_cell_vlservers_stop(struct seq_file *p, void *v); | 95 | static void afs_proc_cell_vlservers_stop(struct seq_file *p, void *v); |
96 | static int afs_proc_cell_vlservers_show(struct seq_file *m, void *v); | 96 | static int afs_proc_cell_vlservers_show(struct seq_file *m, void *v); |
97 | 97 | ||
98 | static struct seq_operations afs_proc_cell_vlservers_ops = { | 98 | static const struct seq_operations afs_proc_cell_vlservers_ops = { |
99 | .start = afs_proc_cell_vlservers_start, | 99 | .start = afs_proc_cell_vlservers_start, |
100 | .next = afs_proc_cell_vlservers_next, | 100 | .next = afs_proc_cell_vlservers_next, |
101 | .stop = afs_proc_cell_vlservers_stop, | 101 | .stop = afs_proc_cell_vlservers_stop, |
@@ -119,7 +119,7 @@ static void *afs_proc_cell_servers_next(struct seq_file *p, void *v, | |||
119 | static void afs_proc_cell_servers_stop(struct seq_file *p, void *v); | 119 | static void afs_proc_cell_servers_stop(struct seq_file *p, void *v); |
120 | static int afs_proc_cell_servers_show(struct seq_file *m, void *v); | 120 | static int afs_proc_cell_servers_show(struct seq_file *m, void *v); |
121 | 121 | ||
122 | static struct seq_operations afs_proc_cell_servers_ops = { | 122 | static const struct seq_operations afs_proc_cell_servers_ops = { |
123 | .start = afs_proc_cell_servers_start, | 123 | .start = afs_proc_cell_servers_start, |
124 | .next = afs_proc_cell_servers_next, | 124 | .next = afs_proc_cell_servers_next, |
125 | .stop = afs_proc_cell_servers_stop, | 125 | .stop = afs_proc_cell_servers_stop, |
diff --git a/fs/afs/write.c b/fs/afs/write.c index c2e7a7ff0080..c63a3c8beb73 100644 --- a/fs/afs/write.c +++ b/fs/afs/write.c | |||
@@ -712,7 +712,6 @@ int afs_writeback_all(struct afs_vnode *vnode) | |||
712 | .bdi = mapping->backing_dev_info, | 712 | .bdi = mapping->backing_dev_info, |
713 | .sync_mode = WB_SYNC_ALL, | 713 | .sync_mode = WB_SYNC_ALL, |
714 | .nr_to_write = LONG_MAX, | 714 | .nr_to_write = LONG_MAX, |
715 | .for_writepages = 1, | ||
716 | .range_cyclic = 1, | 715 | .range_cyclic = 1, |
717 | }; | 716 | }; |
718 | int ret; | 717 | int ret; |
@@ -24,6 +24,7 @@ | |||
24 | #include <linux/file.h> | 24 | #include <linux/file.h> |
25 | #include <linux/mm.h> | 25 | #include <linux/mm.h> |
26 | #include <linux/mman.h> | 26 | #include <linux/mman.h> |
27 | #include <linux/mmu_context.h> | ||
27 | #include <linux/slab.h> | 28 | #include <linux/slab.h> |
28 | #include <linux/timer.h> | 29 | #include <linux/timer.h> |
29 | #include <linux/aio.h> | 30 | #include <linux/aio.h> |
@@ -34,7 +35,6 @@ | |||
34 | 35 | ||
35 | #include <asm/kmap_types.h> | 36 | #include <asm/kmap_types.h> |
36 | #include <asm/uaccess.h> | 37 | #include <asm/uaccess.h> |
37 | #include <asm/mmu_context.h> | ||
38 | 38 | ||
39 | #if DEBUG > 1 | 39 | #if DEBUG > 1 |
40 | #define dprintk printk | 40 | #define dprintk printk |
@@ -78,6 +78,7 @@ static int __init aio_setup(void) | |||
78 | 78 | ||
79 | return 0; | 79 | return 0; |
80 | } | 80 | } |
81 | __initcall(aio_setup); | ||
81 | 82 | ||
82 | static void aio_free_ring(struct kioctx *ctx) | 83 | static void aio_free_ring(struct kioctx *ctx) |
83 | { | 84 | { |
@@ -380,6 +381,7 @@ ssize_t wait_on_sync_kiocb(struct kiocb *iocb) | |||
380 | __set_current_state(TASK_RUNNING); | 381 | __set_current_state(TASK_RUNNING); |
381 | return iocb->ki_user_data; | 382 | return iocb->ki_user_data; |
382 | } | 383 | } |
384 | EXPORT_SYMBOL(wait_on_sync_kiocb); | ||
383 | 385 | ||
384 | /* exit_aio: called when the last user of mm goes away. At this point, | 386 | /* exit_aio: called when the last user of mm goes away. At this point, |
385 | * there is no way for any new requests to be submited or any of the | 387 | * there is no way for any new requests to be submited or any of the |
@@ -573,6 +575,7 @@ int aio_put_req(struct kiocb *req) | |||
573 | spin_unlock_irq(&ctx->ctx_lock); | 575 | spin_unlock_irq(&ctx->ctx_lock); |
574 | return ret; | 576 | return ret; |
575 | } | 577 | } |
578 | EXPORT_SYMBOL(aio_put_req); | ||
576 | 579 | ||
577 | static struct kioctx *lookup_ioctx(unsigned long ctx_id) | 580 | static struct kioctx *lookup_ioctx(unsigned long ctx_id) |
578 | { | 581 | { |
@@ -595,51 +598,6 @@ static struct kioctx *lookup_ioctx(unsigned long ctx_id) | |||
595 | } | 598 | } |
596 | 599 | ||
597 | /* | 600 | /* |
598 | * use_mm | ||
599 | * Makes the calling kernel thread take on the specified | ||
600 | * mm context. | ||
601 | * Called by the retry thread execute retries within the | ||
602 | * iocb issuer's mm context, so that copy_from/to_user | ||
603 | * operations work seamlessly for aio. | ||
604 | * (Note: this routine is intended to be called only | ||
605 | * from a kernel thread context) | ||
606 | */ | ||
607 | static void use_mm(struct mm_struct *mm) | ||
608 | { | ||
609 | struct mm_struct *active_mm; | ||
610 | struct task_struct *tsk = current; | ||
611 | |||
612 | task_lock(tsk); | ||
613 | active_mm = tsk->active_mm; | ||
614 | atomic_inc(&mm->mm_count); | ||
615 | tsk->mm = mm; | ||
616 | tsk->active_mm = mm; | ||
617 | switch_mm(active_mm, mm, tsk); | ||
618 | task_unlock(tsk); | ||
619 | |||
620 | mmdrop(active_mm); | ||
621 | } | ||
622 | |||
623 | /* | ||
624 | * unuse_mm | ||
625 | * Reverses the effect of use_mm, i.e. releases the | ||
626 | * specified mm context which was earlier taken on | ||
627 | * by the calling kernel thread | ||
628 | * (Note: this routine is intended to be called only | ||
629 | * from a kernel thread context) | ||
630 | */ | ||
631 | static void unuse_mm(struct mm_struct *mm) | ||
632 | { | ||
633 | struct task_struct *tsk = current; | ||
634 | |||
635 | task_lock(tsk); | ||
636 | tsk->mm = NULL; | ||
637 | /* active_mm is still 'mm' */ | ||
638 | enter_lazy_tlb(mm, tsk); | ||
639 | task_unlock(tsk); | ||
640 | } | ||
641 | |||
642 | /* | ||
643 | * Queue up a kiocb to be retried. Assumes that the kiocb | 601 | * Queue up a kiocb to be retried. Assumes that the kiocb |
644 | * has already been marked as kicked, and places it on | 602 | * has already been marked as kicked, and places it on |
645 | * the retry run list for the corresponding ioctx, if it | 603 | * the retry run list for the corresponding ioctx, if it |
@@ -1037,6 +995,7 @@ put_rq: | |||
1037 | spin_unlock_irqrestore(&ctx->ctx_lock, flags); | 995 | spin_unlock_irqrestore(&ctx->ctx_lock, flags); |
1038 | return ret; | 996 | return ret; |
1039 | } | 997 | } |
998 | EXPORT_SYMBOL(aio_complete); | ||
1040 | 999 | ||
1041 | /* aio_read_evt | 1000 | /* aio_read_evt |
1042 | * Pull an event off of the ioctx's event ring. Returns the number of | 1001 | * Pull an event off of the ioctx's event ring. Returns the number of |
@@ -1825,9 +1784,3 @@ SYSCALL_DEFINE5(io_getevents, aio_context_t, ctx_id, | |||
1825 | asmlinkage_protect(5, ret, ctx_id, min_nr, nr, events, timeout); | 1784 | asmlinkage_protect(5, ret, ctx_id, min_nr, nr, events, timeout); |
1826 | return ret; | 1785 | return ret; |
1827 | } | 1786 | } |
1828 | |||
1829 | __initcall(aio_setup); | ||
1830 | |||
1831 | EXPORT_SYMBOL(aio_complete); | ||
1832 | EXPORT_SYMBOL(aio_put_req); | ||
1833 | EXPORT_SYMBOL(wait_on_sync_kiocb); | ||
diff --git a/fs/anon_inodes.c b/fs/anon_inodes.c index 47d4a01c5393..d11c51fc2a3f 100644 --- a/fs/anon_inodes.c +++ b/fs/anon_inodes.c | |||
@@ -77,28 +77,24 @@ static const struct address_space_operations anon_aops = { | |||
77 | * | 77 | * |
78 | * Creates a new file by hooking it on a single inode. This is useful for files | 78 | * Creates a new file by hooking it on a single inode. This is useful for files |
79 | * that do not need to have a full-fledged inode in order to operate correctly. | 79 | * that do not need to have a full-fledged inode in order to operate correctly. |
80 | * All the files created with anon_inode_getfd() will share a single inode, | 80 | * All the files created with anon_inode_getfile() will share a single inode, |
81 | * hence saving memory and avoiding code duplication for the file/inode/dentry | 81 | * hence saving memory and avoiding code duplication for the file/inode/dentry |
82 | * setup. Returns new descriptor or -error. | 82 | * setup. Returns the newly created file* or an error pointer. |
83 | */ | 83 | */ |
84 | int anon_inode_getfd(const char *name, const struct file_operations *fops, | 84 | struct file *anon_inode_getfile(const char *name, |
85 | void *priv, int flags) | 85 | const struct file_operations *fops, |
86 | void *priv, int flags) | ||
86 | { | 87 | { |
87 | struct qstr this; | 88 | struct qstr this; |
88 | struct dentry *dentry; | 89 | struct dentry *dentry; |
89 | struct file *file; | 90 | struct file *file; |
90 | int error, fd; | 91 | int error; |
91 | 92 | ||
92 | if (IS_ERR(anon_inode_inode)) | 93 | if (IS_ERR(anon_inode_inode)) |
93 | return -ENODEV; | 94 | return ERR_PTR(-ENODEV); |
94 | 95 | ||
95 | if (fops->owner && !try_module_get(fops->owner)) | 96 | if (fops->owner && !try_module_get(fops->owner)) |
96 | return -ENOENT; | 97 | return ERR_PTR(-ENOENT); |
97 | |||
98 | error = get_unused_fd_flags(flags); | ||
99 | if (error < 0) | ||
100 | goto err_module; | ||
101 | fd = error; | ||
102 | 98 | ||
103 | /* | 99 | /* |
104 | * Link the inode to a directory entry by creating a unique name | 100 | * Link the inode to a directory entry by creating a unique name |
@@ -110,7 +106,7 @@ int anon_inode_getfd(const char *name, const struct file_operations *fops, | |||
110 | this.hash = 0; | 106 | this.hash = 0; |
111 | dentry = d_alloc(anon_inode_mnt->mnt_sb->s_root, &this); | 107 | dentry = d_alloc(anon_inode_mnt->mnt_sb->s_root, &this); |
112 | if (!dentry) | 108 | if (!dentry) |
113 | goto err_put_unused_fd; | 109 | goto err_module; |
114 | 110 | ||
115 | /* | 111 | /* |
116 | * We know the anon_inode inode count is always greater than zero, | 112 | * We know the anon_inode inode count is always greater than zero, |
@@ -136,16 +132,54 @@ int anon_inode_getfd(const char *name, const struct file_operations *fops, | |||
136 | file->f_version = 0; | 132 | file->f_version = 0; |
137 | file->private_data = priv; | 133 | file->private_data = priv; |
138 | 134 | ||
135 | return file; | ||
136 | |||
137 | err_dput: | ||
138 | dput(dentry); | ||
139 | err_module: | ||
140 | module_put(fops->owner); | ||
141 | return ERR_PTR(error); | ||
142 | } | ||
143 | EXPORT_SYMBOL_GPL(anon_inode_getfile); | ||
144 | |||
145 | /** | ||
146 | * anon_inode_getfd - creates a new file instance by hooking it up to an | ||
147 | * anonymous inode, and a dentry that describe the "class" | ||
148 | * of the file | ||
149 | * | ||
150 | * @name: [in] name of the "class" of the new file | ||
151 | * @fops: [in] file operations for the new file | ||
152 | * @priv: [in] private data for the new file (will be file's private_data) | ||
153 | * @flags: [in] flags | ||
154 | * | ||
155 | * Creates a new file by hooking it on a single inode. This is useful for files | ||
156 | * that do not need to have a full-fledged inode in order to operate correctly. | ||
157 | * All the files created with anon_inode_getfd() will share a single inode, | ||
158 | * hence saving memory and avoiding code duplication for the file/inode/dentry | ||
159 | * setup. Returns new descriptor or an error code. | ||
160 | */ | ||
161 | int anon_inode_getfd(const char *name, const struct file_operations *fops, | ||
162 | void *priv, int flags) | ||
163 | { | ||
164 | int error, fd; | ||
165 | struct file *file; | ||
166 | |||
167 | error = get_unused_fd_flags(flags); | ||
168 | if (error < 0) | ||
169 | return error; | ||
170 | fd = error; | ||
171 | |||
172 | file = anon_inode_getfile(name, fops, priv, flags); | ||
173 | if (IS_ERR(file)) { | ||
174 | error = PTR_ERR(file); | ||
175 | goto err_put_unused_fd; | ||
176 | } | ||
139 | fd_install(fd, file); | 177 | fd_install(fd, file); |
140 | 178 | ||
141 | return fd; | 179 | return fd; |
142 | 180 | ||
143 | err_dput: | ||
144 | dput(dentry); | ||
145 | err_put_unused_fd: | 181 | err_put_unused_fd: |
146 | put_unused_fd(fd); | 182 | put_unused_fd(fd); |
147 | err_module: | ||
148 | module_put(fops->owner); | ||
149 | return error; | 183 | return error; |
150 | } | 184 | } |
151 | EXPORT_SYMBOL_GPL(anon_inode_getfd); | 185 | EXPORT_SYMBOL_GPL(anon_inode_getfd); |
diff --git a/fs/autofs/dirhash.c b/fs/autofs/dirhash.c index 2316e944a109..e947915109e5 100644 --- a/fs/autofs/dirhash.c +++ b/fs/autofs/dirhash.c | |||
@@ -90,7 +90,7 @@ struct autofs_dir_ent *autofs_expire(struct super_block *sb, | |||
90 | DPRINTK(("autofs: not expirable (not a mounted directory): %s\n", ent->name)); | 90 | DPRINTK(("autofs: not expirable (not a mounted directory): %s\n", ent->name)); |
91 | continue; | 91 | continue; |
92 | } | 92 | } |
93 | while (d_mountpoint(path.dentry) && follow_down(&path)); | 93 | while (d_mountpoint(path.dentry) && follow_down(&path)) |
94 | ; | 94 | ; |
95 | umount_ok = may_umount(path.mnt); | 95 | umount_ok = may_umount(path.mnt); |
96 | path_put(&path); | 96 | path_put(&path); |
diff --git a/fs/befs/linuxvfs.c b/fs/befs/linuxvfs.c index 615d5496fe0f..dd376c124e71 100644 --- a/fs/befs/linuxvfs.c +++ b/fs/befs/linuxvfs.c | |||
@@ -842,7 +842,7 @@ befs_fill_super(struct super_block *sb, void *data, int silent) | |||
842 | sb->s_magic = BEFS_SUPER_MAGIC; | 842 | sb->s_magic = BEFS_SUPER_MAGIC; |
843 | /* Set real blocksize of fs */ | 843 | /* Set real blocksize of fs */ |
844 | sb_set_blocksize(sb, (ulong) befs_sb->block_size); | 844 | sb_set_blocksize(sb, (ulong) befs_sb->block_size); |
845 | sb->s_op = (struct super_operations *) &befs_sops; | 845 | sb->s_op = &befs_sops; |
846 | root = befs_iget(sb, iaddr2blockno(sb, &(befs_sb->root_dir))); | 846 | root = befs_iget(sb, iaddr2blockno(sb, &(befs_sb->root_dir))); |
847 | if (IS_ERR(root)) { | 847 | if (IS_ERR(root)) { |
848 | ret = PTR_ERR(root); | 848 | ret = PTR_ERR(root); |
diff --git a/fs/binfmt_elf.c b/fs/binfmt_elf.c index 7c1e65d54872..b9b3bb51b1e4 100644 --- a/fs/binfmt_elf.c +++ b/fs/binfmt_elf.c | |||
@@ -1280,9 +1280,6 @@ static int writenote(struct memelfnote *men, struct file *file, | |||
1280 | #define DUMP_WRITE(addr, nr) \ | 1280 | #define DUMP_WRITE(addr, nr) \ |
1281 | if ((size += (nr)) > limit || !dump_write(file, (addr), (nr))) \ | 1281 | if ((size += (nr)) > limit || !dump_write(file, (addr), (nr))) \ |
1282 | goto end_coredump; | 1282 | goto end_coredump; |
1283 | #define DUMP_SEEK(off) \ | ||
1284 | if (!dump_seek(file, (off))) \ | ||
1285 | goto end_coredump; | ||
1286 | 1283 | ||
1287 | static void fill_elf_header(struct elfhdr *elf, int segs, | 1284 | static void fill_elf_header(struct elfhdr *elf, int segs, |
1288 | u16 machine, u32 flags, u8 osabi) | 1285 | u16 machine, u32 flags, u8 osabi) |
@@ -1714,42 +1711,52 @@ struct elf_note_info { | |||
1714 | int numnote; | 1711 | int numnote; |
1715 | }; | 1712 | }; |
1716 | 1713 | ||
1717 | static int fill_note_info(struct elfhdr *elf, int phdrs, | 1714 | static int elf_note_info_init(struct elf_note_info *info) |
1718 | struct elf_note_info *info, | ||
1719 | long signr, struct pt_regs *regs) | ||
1720 | { | 1715 | { |
1721 | #define NUM_NOTES 6 | 1716 | memset(info, 0, sizeof(*info)); |
1722 | struct list_head *t; | ||
1723 | |||
1724 | info->notes = NULL; | ||
1725 | info->prstatus = NULL; | ||
1726 | info->psinfo = NULL; | ||
1727 | info->fpu = NULL; | ||
1728 | #ifdef ELF_CORE_COPY_XFPREGS | ||
1729 | info->xfpu = NULL; | ||
1730 | #endif | ||
1731 | INIT_LIST_HEAD(&info->thread_list); | 1717 | INIT_LIST_HEAD(&info->thread_list); |
1732 | 1718 | ||
1733 | info->notes = kmalloc(NUM_NOTES * sizeof(struct memelfnote), | 1719 | /* Allocate space for six ELF notes */ |
1734 | GFP_KERNEL); | 1720 | info->notes = kmalloc(6 * sizeof(struct memelfnote), GFP_KERNEL); |
1735 | if (!info->notes) | 1721 | if (!info->notes) |
1736 | return 0; | 1722 | return 0; |
1737 | info->psinfo = kmalloc(sizeof(*info->psinfo), GFP_KERNEL); | 1723 | info->psinfo = kmalloc(sizeof(*info->psinfo), GFP_KERNEL); |
1738 | if (!info->psinfo) | 1724 | if (!info->psinfo) |
1739 | return 0; | 1725 | goto notes_free; |
1740 | info->prstatus = kmalloc(sizeof(*info->prstatus), GFP_KERNEL); | 1726 | info->prstatus = kmalloc(sizeof(*info->prstatus), GFP_KERNEL); |
1741 | if (!info->prstatus) | 1727 | if (!info->prstatus) |
1742 | return 0; | 1728 | goto psinfo_free; |
1743 | info->fpu = kmalloc(sizeof(*info->fpu), GFP_KERNEL); | 1729 | info->fpu = kmalloc(sizeof(*info->fpu), GFP_KERNEL); |
1744 | if (!info->fpu) | 1730 | if (!info->fpu) |
1745 | return 0; | 1731 | goto prstatus_free; |
1746 | #ifdef ELF_CORE_COPY_XFPREGS | 1732 | #ifdef ELF_CORE_COPY_XFPREGS |
1747 | info->xfpu = kmalloc(sizeof(*info->xfpu), GFP_KERNEL); | 1733 | info->xfpu = kmalloc(sizeof(*info->xfpu), GFP_KERNEL); |
1748 | if (!info->xfpu) | 1734 | if (!info->xfpu) |
1749 | return 0; | 1735 | goto fpu_free; |
1736 | #endif | ||
1737 | return 1; | ||
1738 | #ifdef ELF_CORE_COPY_XFPREGS | ||
1739 | fpu_free: | ||
1740 | kfree(info->fpu); | ||
1750 | #endif | 1741 | #endif |
1742 | prstatus_free: | ||
1743 | kfree(info->prstatus); | ||
1744 | psinfo_free: | ||
1745 | kfree(info->psinfo); | ||
1746 | notes_free: | ||
1747 | kfree(info->notes); | ||
1748 | return 0; | ||
1749 | } | ||
1750 | |||
1751 | static int fill_note_info(struct elfhdr *elf, int phdrs, | ||
1752 | struct elf_note_info *info, | ||
1753 | long signr, struct pt_regs *regs) | ||
1754 | { | ||
1755 | struct list_head *t; | ||
1756 | |||
1757 | if (!elf_note_info_init(info)) | ||
1758 | return 0; | ||
1751 | 1759 | ||
1752 | info->thread_status_size = 0; | ||
1753 | if (signr) { | 1760 | if (signr) { |
1754 | struct core_thread *ct; | 1761 | struct core_thread *ct; |
1755 | struct elf_thread_status *ets; | 1762 | struct elf_thread_status *ets; |
@@ -1809,8 +1816,6 @@ static int fill_note_info(struct elfhdr *elf, int phdrs, | |||
1809 | #endif | 1816 | #endif |
1810 | 1817 | ||
1811 | return 1; | 1818 | return 1; |
1812 | |||
1813 | #undef NUM_NOTES | ||
1814 | } | 1819 | } |
1815 | 1820 | ||
1816 | static size_t get_note_info_size(struct elf_note_info *info) | 1821 | static size_t get_note_info_size(struct elf_note_info *info) |
@@ -2016,7 +2021,8 @@ static int elf_core_dump(long signr, struct pt_regs *regs, struct file *file, un | |||
2016 | goto end_coredump; | 2021 | goto end_coredump; |
2017 | 2022 | ||
2018 | /* Align to page */ | 2023 | /* Align to page */ |
2019 | DUMP_SEEK(dataoff - foffset); | 2024 | if (!dump_seek(file, dataoff - foffset)) |
2025 | goto end_coredump; | ||
2020 | 2026 | ||
2021 | for (vma = first_vma(current, gate_vma); vma != NULL; | 2027 | for (vma = first_vma(current, gate_vma); vma != NULL; |
2022 | vma = next_vma(vma, gate_vma)) { | 2028 | vma = next_vma(vma, gate_vma)) { |
@@ -2027,33 +2033,19 @@ static int elf_core_dump(long signr, struct pt_regs *regs, struct file *file, un | |||
2027 | 2033 | ||
2028 | for (addr = vma->vm_start; addr < end; addr += PAGE_SIZE) { | 2034 | for (addr = vma->vm_start; addr < end; addr += PAGE_SIZE) { |
2029 | struct page *page; | 2035 | struct page *page; |
2030 | struct vm_area_struct *tmp_vma; | 2036 | int stop; |
2031 | 2037 | ||
2032 | if (get_user_pages(current, current->mm, addr, 1, 0, 1, | 2038 | page = get_dump_page(addr); |
2033 | &page, &tmp_vma) <= 0) { | 2039 | if (page) { |
2034 | DUMP_SEEK(PAGE_SIZE); | 2040 | void *kaddr = kmap(page); |
2035 | } else { | 2041 | stop = ((size += PAGE_SIZE) > limit) || |
2036 | if (page == ZERO_PAGE(0)) { | 2042 | !dump_write(file, kaddr, PAGE_SIZE); |
2037 | if (!dump_seek(file, PAGE_SIZE)) { | 2043 | kunmap(page); |
2038 | page_cache_release(page); | ||
2039 | goto end_coredump; | ||
2040 | } | ||
2041 | } else { | ||
2042 | void *kaddr; | ||
2043 | flush_cache_page(tmp_vma, addr, | ||
2044 | page_to_pfn(page)); | ||
2045 | kaddr = kmap(page); | ||
2046 | if ((size += PAGE_SIZE) > limit || | ||
2047 | !dump_write(file, kaddr, | ||
2048 | PAGE_SIZE)) { | ||
2049 | kunmap(page); | ||
2050 | page_cache_release(page); | ||
2051 | goto end_coredump; | ||
2052 | } | ||
2053 | kunmap(page); | ||
2054 | } | ||
2055 | page_cache_release(page); | 2044 | page_cache_release(page); |
2056 | } | 2045 | } else |
2046 | stop = !dump_seek(file, PAGE_SIZE); | ||
2047 | if (stop) | ||
2048 | goto end_coredump; | ||
2057 | } | 2049 | } |
2058 | } | 2050 | } |
2059 | 2051 | ||
diff --git a/fs/binfmt_elf_fdpic.c b/fs/binfmt_elf_fdpic.c index 20fbeced472b..38502c67987c 100644 --- a/fs/binfmt_elf_fdpic.c +++ b/fs/binfmt_elf_fdpic.c | |||
@@ -283,20 +283,23 @@ static int load_elf_fdpic_binary(struct linux_binprm *bprm, | |||
283 | } | 283 | } |
284 | 284 | ||
285 | stack_size = exec_params.stack_size; | 285 | stack_size = exec_params.stack_size; |
286 | if (stack_size < interp_params.stack_size) | ||
287 | stack_size = interp_params.stack_size; | ||
288 | |||
289 | if (exec_params.flags & ELF_FDPIC_FLAG_EXEC_STACK) | 286 | if (exec_params.flags & ELF_FDPIC_FLAG_EXEC_STACK) |
290 | executable_stack = EXSTACK_ENABLE_X; | 287 | executable_stack = EXSTACK_ENABLE_X; |
291 | else if (exec_params.flags & ELF_FDPIC_FLAG_NOEXEC_STACK) | 288 | else if (exec_params.flags & ELF_FDPIC_FLAG_NOEXEC_STACK) |
292 | executable_stack = EXSTACK_DISABLE_X; | 289 | executable_stack = EXSTACK_DISABLE_X; |
293 | else if (interp_params.flags & ELF_FDPIC_FLAG_EXEC_STACK) | ||
294 | executable_stack = EXSTACK_ENABLE_X; | ||
295 | else if (interp_params.flags & ELF_FDPIC_FLAG_NOEXEC_STACK) | ||
296 | executable_stack = EXSTACK_DISABLE_X; | ||
297 | else | 290 | else |
298 | executable_stack = EXSTACK_DEFAULT; | 291 | executable_stack = EXSTACK_DEFAULT; |
299 | 292 | ||
293 | if (stack_size == 0) { | ||
294 | stack_size = interp_params.stack_size; | ||
295 | if (interp_params.flags & ELF_FDPIC_FLAG_EXEC_STACK) | ||
296 | executable_stack = EXSTACK_ENABLE_X; | ||
297 | else if (interp_params.flags & ELF_FDPIC_FLAG_NOEXEC_STACK) | ||
298 | executable_stack = EXSTACK_DISABLE_X; | ||
299 | else | ||
300 | executable_stack = EXSTACK_DEFAULT; | ||
301 | } | ||
302 | |||
300 | retval = -ENOEXEC; | 303 | retval = -ENOEXEC; |
301 | if (stack_size == 0) | 304 | if (stack_size == 0) |
302 | goto error; | 305 | goto error; |
@@ -1325,9 +1328,6 @@ static int writenote(struct memelfnote *men, struct file *file) | |||
1325 | #define DUMP_WRITE(addr, nr) \ | 1328 | #define DUMP_WRITE(addr, nr) \ |
1326 | if ((size += (nr)) > limit || !dump_write(file, (addr), (nr))) \ | 1329 | if ((size += (nr)) > limit || !dump_write(file, (addr), (nr))) \ |
1327 | goto end_coredump; | 1330 | goto end_coredump; |
1328 | #define DUMP_SEEK(off) \ | ||
1329 | if (!dump_seek(file, (off))) \ | ||
1330 | goto end_coredump; | ||
1331 | 1331 | ||
1332 | static inline void fill_elf_fdpic_header(struct elfhdr *elf, int segs) | 1332 | static inline void fill_elf_fdpic_header(struct elfhdr *elf, int segs) |
1333 | { | 1333 | { |
@@ -1518,6 +1518,7 @@ static int elf_fdpic_dump_segments(struct file *file, size_t *size, | |||
1518 | unsigned long *limit, unsigned long mm_flags) | 1518 | unsigned long *limit, unsigned long mm_flags) |
1519 | { | 1519 | { |
1520 | struct vm_area_struct *vma; | 1520 | struct vm_area_struct *vma; |
1521 | int err = 0; | ||
1521 | 1522 | ||
1522 | for (vma = current->mm->mmap; vma; vma = vma->vm_next) { | 1523 | for (vma = current->mm->mmap; vma; vma = vma->vm_next) { |
1523 | unsigned long addr; | 1524 | unsigned long addr; |
@@ -1525,43 +1526,26 @@ static int elf_fdpic_dump_segments(struct file *file, size_t *size, | |||
1525 | if (!maydump(vma, mm_flags)) | 1526 | if (!maydump(vma, mm_flags)) |
1526 | continue; | 1527 | continue; |
1527 | 1528 | ||
1528 | for (addr = vma->vm_start; | 1529 | for (addr = vma->vm_start; addr < vma->vm_end; |
1529 | addr < vma->vm_end; | 1530 | addr += PAGE_SIZE) { |
1530 | addr += PAGE_SIZE | 1531 | struct page *page = get_dump_page(addr); |
1531 | ) { | 1532 | if (page) { |
1532 | struct vm_area_struct *vma; | 1533 | void *kaddr = kmap(page); |
1533 | struct page *page; | 1534 | *size += PAGE_SIZE; |
1534 | 1535 | if (*size > *limit) | |
1535 | if (get_user_pages(current, current->mm, addr, 1, 0, 1, | 1536 | err = -EFBIG; |
1536 | &page, &vma) <= 0) { | 1537 | else if (!dump_write(file, kaddr, PAGE_SIZE)) |
1537 | DUMP_SEEK(file->f_pos + PAGE_SIZE); | 1538 | err = -EIO; |
1538 | } | ||
1539 | else if (page == ZERO_PAGE(0)) { | ||
1540 | page_cache_release(page); | ||
1541 | DUMP_SEEK(file->f_pos + PAGE_SIZE); | ||
1542 | } | ||
1543 | else { | ||
1544 | void *kaddr; | ||
1545 | |||
1546 | flush_cache_page(vma, addr, page_to_pfn(page)); | ||
1547 | kaddr = kmap(page); | ||
1548 | if ((*size += PAGE_SIZE) > *limit || | ||
1549 | !dump_write(file, kaddr, PAGE_SIZE) | ||
1550 | ) { | ||
1551 | kunmap(page); | ||
1552 | page_cache_release(page); | ||
1553 | return -EIO; | ||
1554 | } | ||
1555 | kunmap(page); | 1539 | kunmap(page); |
1556 | page_cache_release(page); | 1540 | page_cache_release(page); |
1557 | } | 1541 | } else if (!dump_seek(file, file->f_pos + PAGE_SIZE)) |
1542 | err = -EFBIG; | ||
1543 | if (err) | ||
1544 | goto out; | ||
1558 | } | 1545 | } |
1559 | } | 1546 | } |
1560 | 1547 | out: | |
1561 | return 0; | 1548 | return err; |
1562 | |||
1563 | end_coredump: | ||
1564 | return -EFBIG; | ||
1565 | } | 1549 | } |
1566 | #endif | 1550 | #endif |
1567 | 1551 | ||
@@ -1802,7 +1786,8 @@ static int elf_fdpic_core_dump(long signr, struct pt_regs *regs, | |||
1802 | goto end_coredump; | 1786 | goto end_coredump; |
1803 | } | 1787 | } |
1804 | 1788 | ||
1805 | DUMP_SEEK(dataoff); | 1789 | if (!dump_seek(file, dataoff)) |
1790 | goto end_coredump; | ||
1806 | 1791 | ||
1807 | if (elf_fdpic_dump_segments(file, &size, &limit, mm_flags) < 0) | 1792 | if (elf_fdpic_dump_segments(file, &size, &limit, mm_flags) < 0) |
1808 | goto end_coredump; | 1793 | goto end_coredump; |
diff --git a/fs/binfmt_flat.c b/fs/binfmt_flat.c index e92f229e3c6e..a2796651e756 100644 --- a/fs/binfmt_flat.c +++ b/fs/binfmt_flat.c | |||
@@ -278,8 +278,6 @@ static int decompress_exec( | |||
278 | ret = bprm->file->f_op->read(bprm->file, buf, LBUFSIZE, &fpos); | 278 | ret = bprm->file->f_op->read(bprm->file, buf, LBUFSIZE, &fpos); |
279 | if (ret <= 0) | 279 | if (ret <= 0) |
280 | break; | 280 | break; |
281 | if (ret >= (unsigned long) -4096) | ||
282 | break; | ||
283 | len -= ret; | 281 | len -= ret; |
284 | 282 | ||
285 | strm.next_in = buf; | 283 | strm.next_in = buf; |
@@ -335,7 +333,7 @@ calc_reloc(unsigned long r, struct lib_info *p, int curid, int internalp) | |||
335 | "(%d != %d)", (unsigned) r, curid, id); | 333 | "(%d != %d)", (unsigned) r, curid, id); |
336 | goto failed; | 334 | goto failed; |
337 | } else if ( ! p->lib_list[id].loaded && | 335 | } else if ( ! p->lib_list[id].loaded && |
338 | load_flat_shared_library(id, p) > (unsigned long) -4096) { | 336 | IS_ERR_VALUE(load_flat_shared_library(id, p))) { |
339 | printk("BINFMT_FLAT: failed to load library %d", id); | 337 | printk("BINFMT_FLAT: failed to load library %d", id); |
340 | goto failed; | 338 | goto failed; |
341 | } | 339 | } |
@@ -545,7 +543,7 @@ static int load_flat_file(struct linux_binprm * bprm, | |||
545 | textpos = do_mmap(bprm->file, 0, text_len, PROT_READ|PROT_EXEC, | 543 | textpos = do_mmap(bprm->file, 0, text_len, PROT_READ|PROT_EXEC, |
546 | MAP_PRIVATE|MAP_EXECUTABLE, 0); | 544 | MAP_PRIVATE|MAP_EXECUTABLE, 0); |
547 | up_write(¤t->mm->mmap_sem); | 545 | up_write(¤t->mm->mmap_sem); |
548 | if (!textpos || textpos >= (unsigned long) -4096) { | 546 | if (!textpos || IS_ERR_VALUE(textpos)) { |
549 | if (!textpos) | 547 | if (!textpos) |
550 | textpos = (unsigned long) -ENOMEM; | 548 | textpos = (unsigned long) -ENOMEM; |
551 | printk("Unable to mmap process text, errno %d\n", (int)-textpos); | 549 | printk("Unable to mmap process text, errno %d\n", (int)-textpos); |
@@ -560,7 +558,7 @@ static int load_flat_file(struct linux_binprm * bprm, | |||
560 | PROT_READ|PROT_WRITE|PROT_EXEC, MAP_PRIVATE, 0); | 558 | PROT_READ|PROT_WRITE|PROT_EXEC, MAP_PRIVATE, 0); |
561 | up_write(¤t->mm->mmap_sem); | 559 | up_write(¤t->mm->mmap_sem); |
562 | 560 | ||
563 | if (realdatastart == 0 || realdatastart >= (unsigned long)-4096) { | 561 | if (realdatastart == 0 || IS_ERR_VALUE(realdatastart)) { |
564 | if (!realdatastart) | 562 | if (!realdatastart) |
565 | realdatastart = (unsigned long) -ENOMEM; | 563 | realdatastart = (unsigned long) -ENOMEM; |
566 | printk("Unable to allocate RAM for process data, errno %d\n", | 564 | printk("Unable to allocate RAM for process data, errno %d\n", |
@@ -587,7 +585,7 @@ static int load_flat_file(struct linux_binprm * bprm, | |||
587 | result = bprm->file->f_op->read(bprm->file, (char *) datapos, | 585 | result = bprm->file->f_op->read(bprm->file, (char *) datapos, |
588 | data_len + (relocs * sizeof(unsigned long)), &fpos); | 586 | data_len + (relocs * sizeof(unsigned long)), &fpos); |
589 | } | 587 | } |
590 | if (result >= (unsigned long)-4096) { | 588 | if (IS_ERR_VALUE(result)) { |
591 | printk("Unable to read data+bss, errno %d\n", (int)-result); | 589 | printk("Unable to read data+bss, errno %d\n", (int)-result); |
592 | do_munmap(current->mm, textpos, text_len); | 590 | do_munmap(current->mm, textpos, text_len); |
593 | do_munmap(current->mm, realdatastart, data_len + extra); | 591 | do_munmap(current->mm, realdatastart, data_len + extra); |
@@ -607,7 +605,7 @@ static int load_flat_file(struct linux_binprm * bprm, | |||
607 | PROT_READ | PROT_EXEC | PROT_WRITE, MAP_PRIVATE, 0); | 605 | PROT_READ | PROT_EXEC | PROT_WRITE, MAP_PRIVATE, 0); |
608 | up_write(¤t->mm->mmap_sem); | 606 | up_write(¤t->mm->mmap_sem); |
609 | 607 | ||
610 | if (!textpos || textpos >= (unsigned long) -4096) { | 608 | if (!textpos || IS_ERR_VALUE(textpos)) { |
611 | if (!textpos) | 609 | if (!textpos) |
612 | textpos = (unsigned long) -ENOMEM; | 610 | textpos = (unsigned long) -ENOMEM; |
613 | printk("Unable to allocate RAM for process text/data, errno %d\n", | 611 | printk("Unable to allocate RAM for process text/data, errno %d\n", |
@@ -641,7 +639,7 @@ static int load_flat_file(struct linux_binprm * bprm, | |||
641 | fpos = 0; | 639 | fpos = 0; |
642 | result = bprm->file->f_op->read(bprm->file, | 640 | result = bprm->file->f_op->read(bprm->file, |
643 | (char *) textpos, text_len, &fpos); | 641 | (char *) textpos, text_len, &fpos); |
644 | if (result < (unsigned long) -4096) | 642 | if (!IS_ERR_VALUE(result)) |
645 | result = decompress_exec(bprm, text_len, (char *) datapos, | 643 | result = decompress_exec(bprm, text_len, (char *) datapos, |
646 | data_len + (relocs * sizeof(unsigned long)), 0); | 644 | data_len + (relocs * sizeof(unsigned long)), 0); |
647 | } | 645 | } |
@@ -651,13 +649,13 @@ static int load_flat_file(struct linux_binprm * bprm, | |||
651 | fpos = 0; | 649 | fpos = 0; |
652 | result = bprm->file->f_op->read(bprm->file, | 650 | result = bprm->file->f_op->read(bprm->file, |
653 | (char *) textpos, text_len, &fpos); | 651 | (char *) textpos, text_len, &fpos); |
654 | if (result < (unsigned long) -4096) { | 652 | if (!IS_ERR_VALUE(result)) { |
655 | fpos = ntohl(hdr->data_start); | 653 | fpos = ntohl(hdr->data_start); |
656 | result = bprm->file->f_op->read(bprm->file, (char *) datapos, | 654 | result = bprm->file->f_op->read(bprm->file, (char *) datapos, |
657 | data_len + (relocs * sizeof(unsigned long)), &fpos); | 655 | data_len + (relocs * sizeof(unsigned long)), &fpos); |
658 | } | 656 | } |
659 | } | 657 | } |
660 | if (result >= (unsigned long)-4096) { | 658 | if (IS_ERR_VALUE(result)) { |
661 | printk("Unable to read code+data+bss, errno %d\n",(int)-result); | 659 | printk("Unable to read code+data+bss, errno %d\n",(int)-result); |
662 | do_munmap(current->mm, textpos, text_len + data_len + extra + | 660 | do_munmap(current->mm, textpos, text_len + data_len + extra + |
663 | MAX_SHARED_LIBS * sizeof(unsigned long)); | 661 | MAX_SHARED_LIBS * sizeof(unsigned long)); |
@@ -835,7 +833,7 @@ static int load_flat_shared_library(int id, struct lib_info *libs) | |||
835 | 833 | ||
836 | res = prepare_binprm(&bprm); | 834 | res = prepare_binprm(&bprm); |
837 | 835 | ||
838 | if (res <= (unsigned long)-4096) | 836 | if (!IS_ERR_VALUE(res)) |
839 | res = load_flat_file(&bprm, libs, id, NULL); | 837 | res = load_flat_file(&bprm, libs, id, NULL); |
840 | 838 | ||
841 | abort_creds(bprm.cred); | 839 | abort_creds(bprm.cred); |
@@ -880,7 +878,7 @@ static int load_flat_binary(struct linux_binprm * bprm, struct pt_regs * regs) | |||
880 | stack_len += FLAT_DATA_ALIGN - 1; /* reserve for upcoming alignment */ | 878 | stack_len += FLAT_DATA_ALIGN - 1; /* reserve for upcoming alignment */ |
881 | 879 | ||
882 | res = load_flat_file(bprm, &libinfo, 0, &stack_len); | 880 | res = load_flat_file(bprm, &libinfo, 0, &stack_len); |
883 | if (res > (unsigned long)-4096) | 881 | if (IS_ERR_VALUE(res)) |
884 | return res; | 882 | return res; |
885 | 883 | ||
886 | /* Update data segment pointers for all libraries */ | 884 | /* Update data segment pointers for all libraries */ |
diff --git a/fs/block_dev.c b/fs/block_dev.c index 3581a4e53942..5d1ed50bd46c 100644 --- a/fs/block_dev.c +++ b/fs/block_dev.c | |||
@@ -420,7 +420,6 @@ static void bdev_destroy_inode(struct inode *inode) | |||
420 | { | 420 | { |
421 | struct bdev_inode *bdi = BDEV_I(inode); | 421 | struct bdev_inode *bdi = BDEV_I(inode); |
422 | 422 | ||
423 | bdi->bdev.bd_inode_backing_dev_info = NULL; | ||
424 | kmem_cache_free(bdev_cachep, bdi); | 423 | kmem_cache_free(bdev_cachep, bdi); |
425 | } | 424 | } |
426 | 425 | ||
@@ -1115,7 +1114,7 @@ EXPORT_SYMBOL(revalidate_disk); | |||
1115 | int check_disk_change(struct block_device *bdev) | 1114 | int check_disk_change(struct block_device *bdev) |
1116 | { | 1115 | { |
1117 | struct gendisk *disk = bdev->bd_disk; | 1116 | struct gendisk *disk = bdev->bd_disk; |
1118 | struct block_device_operations * bdops = disk->fops; | 1117 | const struct block_device_operations *bdops = disk->fops; |
1119 | 1118 | ||
1120 | if (!bdops->media_changed) | 1119 | if (!bdops->media_changed) |
1121 | return 0; | 1120 | return 0; |
diff --git a/fs/btrfs/disk-io.c b/fs/btrfs/disk-io.c index 15831d5c7367..6c4173146bb7 100644 --- a/fs/btrfs/disk-io.c +++ b/fs/btrfs/disk-io.c | |||
@@ -772,7 +772,7 @@ static void btree_invalidatepage(struct page *page, unsigned long offset) | |||
772 | } | 772 | } |
773 | } | 773 | } |
774 | 774 | ||
775 | static struct address_space_operations btree_aops = { | 775 | static const struct address_space_operations btree_aops = { |
776 | .readpage = btree_readpage, | 776 | .readpage = btree_readpage, |
777 | .writepage = btree_writepage, | 777 | .writepage = btree_writepage, |
778 | .writepages = btree_writepages, | 778 | .writepages = btree_writepages, |
@@ -1600,6 +1600,7 @@ struct btrfs_root *open_ctree(struct super_block *sb, | |||
1600 | 1600 | ||
1601 | sb->s_blocksize = 4096; | 1601 | sb->s_blocksize = 4096; |
1602 | sb->s_blocksize_bits = blksize_bits(4096); | 1602 | sb->s_blocksize_bits = blksize_bits(4096); |
1603 | sb->s_bdi = &fs_info->bdi; | ||
1603 | 1604 | ||
1604 | /* | 1605 | /* |
1605 | * we set the i_size on the btree inode to the max possible int. | 1606 | * we set the i_size on the btree inode to the max possible int. |
diff --git a/fs/btrfs/inode.c b/fs/btrfs/inode.c index dd86050190fc..d154a3f365d5 100644 --- a/fs/btrfs/inode.c +++ b/fs/btrfs/inode.c | |||
@@ -55,13 +55,13 @@ struct btrfs_iget_args { | |||
55 | struct btrfs_root *root; | 55 | struct btrfs_root *root; |
56 | }; | 56 | }; |
57 | 57 | ||
58 | static struct inode_operations btrfs_dir_inode_operations; | 58 | static const struct inode_operations btrfs_dir_inode_operations; |
59 | static struct inode_operations btrfs_symlink_inode_operations; | 59 | static const struct inode_operations btrfs_symlink_inode_operations; |
60 | static struct inode_operations btrfs_dir_ro_inode_operations; | 60 | static const struct inode_operations btrfs_dir_ro_inode_operations; |
61 | static struct inode_operations btrfs_special_inode_operations; | 61 | static const struct inode_operations btrfs_special_inode_operations; |
62 | static struct inode_operations btrfs_file_inode_operations; | 62 | static const struct inode_operations btrfs_file_inode_operations; |
63 | static struct address_space_operations btrfs_aops; | 63 | static const struct address_space_operations btrfs_aops; |
64 | static struct address_space_operations btrfs_symlink_aops; | 64 | static const struct address_space_operations btrfs_symlink_aops; |
65 | static struct file_operations btrfs_dir_file_operations; | 65 | static struct file_operations btrfs_dir_file_operations; |
66 | static struct extent_io_ops btrfs_extent_io_ops; | 66 | static struct extent_io_ops btrfs_extent_io_ops; |
67 | 67 | ||
@@ -5201,7 +5201,7 @@ static int btrfs_permission(struct inode *inode, int mask) | |||
5201 | return generic_permission(inode, mask, btrfs_check_acl); | 5201 | return generic_permission(inode, mask, btrfs_check_acl); |
5202 | } | 5202 | } |
5203 | 5203 | ||
5204 | static struct inode_operations btrfs_dir_inode_operations = { | 5204 | static const struct inode_operations btrfs_dir_inode_operations = { |
5205 | .getattr = btrfs_getattr, | 5205 | .getattr = btrfs_getattr, |
5206 | .lookup = btrfs_lookup, | 5206 | .lookup = btrfs_lookup, |
5207 | .create = btrfs_create, | 5207 | .create = btrfs_create, |
@@ -5219,7 +5219,7 @@ static struct inode_operations btrfs_dir_inode_operations = { | |||
5219 | .removexattr = btrfs_removexattr, | 5219 | .removexattr = btrfs_removexattr, |
5220 | .permission = btrfs_permission, | 5220 | .permission = btrfs_permission, |
5221 | }; | 5221 | }; |
5222 | static struct inode_operations btrfs_dir_ro_inode_operations = { | 5222 | static const struct inode_operations btrfs_dir_ro_inode_operations = { |
5223 | .lookup = btrfs_lookup, | 5223 | .lookup = btrfs_lookup, |
5224 | .permission = btrfs_permission, | 5224 | .permission = btrfs_permission, |
5225 | }; | 5225 | }; |
@@ -5259,7 +5259,7 @@ static struct extent_io_ops btrfs_extent_io_ops = { | |||
5259 | * | 5259 | * |
5260 | * For now we're avoiding this by dropping bmap. | 5260 | * For now we're avoiding this by dropping bmap. |
5261 | */ | 5261 | */ |
5262 | static struct address_space_operations btrfs_aops = { | 5262 | static const struct address_space_operations btrfs_aops = { |
5263 | .readpage = btrfs_readpage, | 5263 | .readpage = btrfs_readpage, |
5264 | .writepage = btrfs_writepage, | 5264 | .writepage = btrfs_writepage, |
5265 | .writepages = btrfs_writepages, | 5265 | .writepages = btrfs_writepages, |
@@ -5272,14 +5272,14 @@ static struct address_space_operations btrfs_aops = { | |||
5272 | .error_remove_page = generic_error_remove_page, | 5272 | .error_remove_page = generic_error_remove_page, |
5273 | }; | 5273 | }; |
5274 | 5274 | ||
5275 | static struct address_space_operations btrfs_symlink_aops = { | 5275 | static const struct address_space_operations btrfs_symlink_aops = { |
5276 | .readpage = btrfs_readpage, | 5276 | .readpage = btrfs_readpage, |
5277 | .writepage = btrfs_writepage, | 5277 | .writepage = btrfs_writepage, |
5278 | .invalidatepage = btrfs_invalidatepage, | 5278 | .invalidatepage = btrfs_invalidatepage, |
5279 | .releasepage = btrfs_releasepage, | 5279 | .releasepage = btrfs_releasepage, |
5280 | }; | 5280 | }; |
5281 | 5281 | ||
5282 | static struct inode_operations btrfs_file_inode_operations = { | 5282 | static const struct inode_operations btrfs_file_inode_operations = { |
5283 | .truncate = btrfs_truncate, | 5283 | .truncate = btrfs_truncate, |
5284 | .getattr = btrfs_getattr, | 5284 | .getattr = btrfs_getattr, |
5285 | .setattr = btrfs_setattr, | 5285 | .setattr = btrfs_setattr, |
@@ -5291,7 +5291,7 @@ static struct inode_operations btrfs_file_inode_operations = { | |||
5291 | .fallocate = btrfs_fallocate, | 5291 | .fallocate = btrfs_fallocate, |
5292 | .fiemap = btrfs_fiemap, | 5292 | .fiemap = btrfs_fiemap, |
5293 | }; | 5293 | }; |
5294 | static struct inode_operations btrfs_special_inode_operations = { | 5294 | static const struct inode_operations btrfs_special_inode_operations = { |
5295 | .getattr = btrfs_getattr, | 5295 | .getattr = btrfs_getattr, |
5296 | .setattr = btrfs_setattr, | 5296 | .setattr = btrfs_setattr, |
5297 | .permission = btrfs_permission, | 5297 | .permission = btrfs_permission, |
@@ -5300,7 +5300,7 @@ static struct inode_operations btrfs_special_inode_operations = { | |||
5300 | .listxattr = btrfs_listxattr, | 5300 | .listxattr = btrfs_listxattr, |
5301 | .removexattr = btrfs_removexattr, | 5301 | .removexattr = btrfs_removexattr, |
5302 | }; | 5302 | }; |
5303 | static struct inode_operations btrfs_symlink_inode_operations = { | 5303 | static const struct inode_operations btrfs_symlink_inode_operations = { |
5304 | .readlink = generic_readlink, | 5304 | .readlink = generic_readlink, |
5305 | .follow_link = page_follow_link_light, | 5305 | .follow_link = page_follow_link_light, |
5306 | .put_link = page_put_link, | 5306 | .put_link = page_put_link, |
diff --git a/fs/btrfs/ordered-data.c b/fs/btrfs/ordered-data.c index d6f0806c682f..7b2f401e604e 100644 --- a/fs/btrfs/ordered-data.c +++ b/fs/btrfs/ordered-data.c | |||
@@ -740,7 +740,6 @@ int btrfs_fdatawrite_range(struct address_space *mapping, loff_t start, | |||
740 | .nr_to_write = mapping->nrpages * 2, | 740 | .nr_to_write = mapping->nrpages * 2, |
741 | .range_start = start, | 741 | .range_start = start, |
742 | .range_end = end, | 742 | .range_end = end, |
743 | .for_writepages = 1, | ||
744 | }; | 743 | }; |
745 | return btrfs_writepages(mapping, &wbc); | 744 | return btrfs_writepages(mapping, &wbc); |
746 | } | 745 | } |
diff --git a/fs/btrfs/super.c b/fs/btrfs/super.c index 6d6d06cb6dfc..2db17cd66fc5 100644 --- a/fs/btrfs/super.c +++ b/fs/btrfs/super.c | |||
@@ -51,7 +51,7 @@ | |||
51 | #include "export.h" | 51 | #include "export.h" |
52 | #include "compression.h" | 52 | #include "compression.h" |
53 | 53 | ||
54 | static struct super_operations btrfs_super_ops; | 54 | static const struct super_operations btrfs_super_ops; |
55 | 55 | ||
56 | static void btrfs_put_super(struct super_block *sb) | 56 | static void btrfs_put_super(struct super_block *sb) |
57 | { | 57 | { |
@@ -675,7 +675,7 @@ static int btrfs_unfreeze(struct super_block *sb) | |||
675 | return 0; | 675 | return 0; |
676 | } | 676 | } |
677 | 677 | ||
678 | static struct super_operations btrfs_super_ops = { | 678 | static const struct super_operations btrfs_super_ops = { |
679 | .delete_inode = btrfs_delete_inode, | 679 | .delete_inode = btrfs_delete_inode, |
680 | .put_super = btrfs_put_super, | 680 | .put_super = btrfs_put_super, |
681 | .sync_fs = btrfs_sync_fs, | 681 | .sync_fs = btrfs_sync_fs, |
diff --git a/fs/btrfs/tree-log.c b/fs/btrfs/tree-log.c index d91b0de7c502..30c0d45c1b5e 100644 --- a/fs/btrfs/tree-log.c +++ b/fs/btrfs/tree-log.c | |||
@@ -2605,7 +2605,7 @@ static noinline int copy_items(struct btrfs_trans_handle *trans, | |||
2605 | extent); | 2605 | extent); |
2606 | cs = btrfs_file_extent_offset(src, extent); | 2606 | cs = btrfs_file_extent_offset(src, extent); |
2607 | cl = btrfs_file_extent_num_bytes(src, | 2607 | cl = btrfs_file_extent_num_bytes(src, |
2608 | extent);; | 2608 | extent); |
2609 | if (btrfs_file_extent_compression(src, | 2609 | if (btrfs_file_extent_compression(src, |
2610 | extent)) { | 2610 | extent)) { |
2611 | cs = 0; | 2611 | cs = 0; |
diff --git a/fs/buffer.c b/fs/buffer.c index 90a98865b0cc..209f7f15f5f8 100644 --- a/fs/buffer.c +++ b/fs/buffer.c | |||
@@ -52,6 +52,7 @@ init_buffer(struct buffer_head *bh, bh_end_io_t *handler, void *private) | |||
52 | bh->b_end_io = handler; | 52 | bh->b_end_io = handler; |
53 | bh->b_private = private; | 53 | bh->b_private = private; |
54 | } | 54 | } |
55 | EXPORT_SYMBOL(init_buffer); | ||
55 | 56 | ||
56 | static int sync_buffer(void *word) | 57 | static int sync_buffer(void *word) |
57 | { | 58 | { |
@@ -80,6 +81,7 @@ void unlock_buffer(struct buffer_head *bh) | |||
80 | smp_mb__after_clear_bit(); | 81 | smp_mb__after_clear_bit(); |
81 | wake_up_bit(&bh->b_state, BH_Lock); | 82 | wake_up_bit(&bh->b_state, BH_Lock); |
82 | } | 83 | } |
84 | EXPORT_SYMBOL(unlock_buffer); | ||
83 | 85 | ||
84 | /* | 86 | /* |
85 | * Block until a buffer comes unlocked. This doesn't stop it | 87 | * Block until a buffer comes unlocked. This doesn't stop it |
@@ -90,6 +92,7 @@ void __wait_on_buffer(struct buffer_head * bh) | |||
90 | { | 92 | { |
91 | wait_on_bit(&bh->b_state, BH_Lock, sync_buffer, TASK_UNINTERRUPTIBLE); | 93 | wait_on_bit(&bh->b_state, BH_Lock, sync_buffer, TASK_UNINTERRUPTIBLE); |
92 | } | 94 | } |
95 | EXPORT_SYMBOL(__wait_on_buffer); | ||
93 | 96 | ||
94 | static void | 97 | static void |
95 | __clear_page_buffers(struct page *page) | 98 | __clear_page_buffers(struct page *page) |
@@ -144,6 +147,7 @@ void end_buffer_read_sync(struct buffer_head *bh, int uptodate) | |||
144 | __end_buffer_read_notouch(bh, uptodate); | 147 | __end_buffer_read_notouch(bh, uptodate); |
145 | put_bh(bh); | 148 | put_bh(bh); |
146 | } | 149 | } |
150 | EXPORT_SYMBOL(end_buffer_read_sync); | ||
147 | 151 | ||
148 | void end_buffer_write_sync(struct buffer_head *bh, int uptodate) | 152 | void end_buffer_write_sync(struct buffer_head *bh, int uptodate) |
149 | { | 153 | { |
@@ -164,6 +168,7 @@ void end_buffer_write_sync(struct buffer_head *bh, int uptodate) | |||
164 | unlock_buffer(bh); | 168 | unlock_buffer(bh); |
165 | put_bh(bh); | 169 | put_bh(bh); |
166 | } | 170 | } |
171 | EXPORT_SYMBOL(end_buffer_write_sync); | ||
167 | 172 | ||
168 | /* | 173 | /* |
169 | * Various filesystems appear to want __find_get_block to be non-blocking. | 174 | * Various filesystems appear to want __find_get_block to be non-blocking. |
@@ -272,6 +277,7 @@ void invalidate_bdev(struct block_device *bdev) | |||
272 | invalidate_bh_lrus(); | 277 | invalidate_bh_lrus(); |
273 | invalidate_mapping_pages(mapping, 0, -1); | 278 | invalidate_mapping_pages(mapping, 0, -1); |
274 | } | 279 | } |
280 | EXPORT_SYMBOL(invalidate_bdev); | ||
275 | 281 | ||
276 | /* | 282 | /* |
277 | * Kick pdflush then try to free up some ZONE_NORMAL memory. | 283 | * Kick pdflush then try to free up some ZONE_NORMAL memory. |
@@ -410,6 +416,7 @@ still_busy: | |||
410 | local_irq_restore(flags); | 416 | local_irq_restore(flags); |
411 | return; | 417 | return; |
412 | } | 418 | } |
419 | EXPORT_SYMBOL(end_buffer_async_write); | ||
413 | 420 | ||
414 | /* | 421 | /* |
415 | * If a page's buffers are under async readin (end_buffer_async_read | 422 | * If a page's buffers are under async readin (end_buffer_async_read |
@@ -438,8 +445,8 @@ static void mark_buffer_async_read(struct buffer_head *bh) | |||
438 | set_buffer_async_read(bh); | 445 | set_buffer_async_read(bh); |
439 | } | 446 | } |
440 | 447 | ||
441 | void mark_buffer_async_write_endio(struct buffer_head *bh, | 448 | static void mark_buffer_async_write_endio(struct buffer_head *bh, |
442 | bh_end_io_t *handler) | 449 | bh_end_io_t *handler) |
443 | { | 450 | { |
444 | bh->b_end_io = handler; | 451 | bh->b_end_io = handler; |
445 | set_buffer_async_write(bh); | 452 | set_buffer_async_write(bh); |
@@ -553,7 +560,7 @@ repeat: | |||
553 | return err; | 560 | return err; |
554 | } | 561 | } |
555 | 562 | ||
556 | void do_thaw_all(struct work_struct *work) | 563 | static void do_thaw_all(struct work_struct *work) |
557 | { | 564 | { |
558 | struct super_block *sb; | 565 | struct super_block *sb; |
559 | char b[BDEVNAME_SIZE]; | 566 | char b[BDEVNAME_SIZE]; |
@@ -1172,6 +1179,7 @@ void mark_buffer_dirty(struct buffer_head *bh) | |||
1172 | } | 1179 | } |
1173 | } | 1180 | } |
1174 | } | 1181 | } |
1182 | EXPORT_SYMBOL(mark_buffer_dirty); | ||
1175 | 1183 | ||
1176 | /* | 1184 | /* |
1177 | * Decrement a buffer_head's reference count. If all buffers against a page | 1185 | * Decrement a buffer_head's reference count. If all buffers against a page |
@@ -1188,6 +1196,7 @@ void __brelse(struct buffer_head * buf) | |||
1188 | } | 1196 | } |
1189 | WARN(1, KERN_ERR "VFS: brelse: Trying to free free buffer\n"); | 1197 | WARN(1, KERN_ERR "VFS: brelse: Trying to free free buffer\n"); |
1190 | } | 1198 | } |
1199 | EXPORT_SYMBOL(__brelse); | ||
1191 | 1200 | ||
1192 | /* | 1201 | /* |
1193 | * bforget() is like brelse(), except it discards any | 1202 | * bforget() is like brelse(), except it discards any |
@@ -1206,6 +1215,7 @@ void __bforget(struct buffer_head *bh) | |||
1206 | } | 1215 | } |
1207 | __brelse(bh); | 1216 | __brelse(bh); |
1208 | } | 1217 | } |
1218 | EXPORT_SYMBOL(__bforget); | ||
1209 | 1219 | ||
1210 | static struct buffer_head *__bread_slow(struct buffer_head *bh) | 1220 | static struct buffer_head *__bread_slow(struct buffer_head *bh) |
1211 | { | 1221 | { |
@@ -2218,6 +2228,7 @@ int block_read_full_page(struct page *page, get_block_t *get_block) | |||
2218 | } | 2228 | } |
2219 | return 0; | 2229 | return 0; |
2220 | } | 2230 | } |
2231 | EXPORT_SYMBOL(block_read_full_page); | ||
2221 | 2232 | ||
2222 | /* utility function for filesystems that need to do work on expanding | 2233 | /* utility function for filesystems that need to do work on expanding |
2223 | * truncates. Uses filesystem pagecache writes to allow the filesystem to | 2234 | * truncates. Uses filesystem pagecache writes to allow the filesystem to |
@@ -2252,6 +2263,7 @@ int generic_cont_expand_simple(struct inode *inode, loff_t size) | |||
2252 | out: | 2263 | out: |
2253 | return err; | 2264 | return err; |
2254 | } | 2265 | } |
2266 | EXPORT_SYMBOL(generic_cont_expand_simple); | ||
2255 | 2267 | ||
2256 | static int cont_expand_zero(struct file *file, struct address_space *mapping, | 2268 | static int cont_expand_zero(struct file *file, struct address_space *mapping, |
2257 | loff_t pos, loff_t *bytes) | 2269 | loff_t pos, loff_t *bytes) |
@@ -2352,6 +2364,7 @@ int cont_write_begin(struct file *file, struct address_space *mapping, | |||
2352 | out: | 2364 | out: |
2353 | return err; | 2365 | return err; |
2354 | } | 2366 | } |
2367 | EXPORT_SYMBOL(cont_write_begin); | ||
2355 | 2368 | ||
2356 | int block_prepare_write(struct page *page, unsigned from, unsigned to, | 2369 | int block_prepare_write(struct page *page, unsigned from, unsigned to, |
2357 | get_block_t *get_block) | 2370 | get_block_t *get_block) |
@@ -2362,6 +2375,7 @@ int block_prepare_write(struct page *page, unsigned from, unsigned to, | |||
2362 | ClearPageUptodate(page); | 2375 | ClearPageUptodate(page); |
2363 | return err; | 2376 | return err; |
2364 | } | 2377 | } |
2378 | EXPORT_SYMBOL(block_prepare_write); | ||
2365 | 2379 | ||
2366 | int block_commit_write(struct page *page, unsigned from, unsigned to) | 2380 | int block_commit_write(struct page *page, unsigned from, unsigned to) |
2367 | { | 2381 | { |
@@ -2369,6 +2383,7 @@ int block_commit_write(struct page *page, unsigned from, unsigned to) | |||
2369 | __block_commit_write(inode,page,from,to); | 2383 | __block_commit_write(inode,page,from,to); |
2370 | return 0; | 2384 | return 0; |
2371 | } | 2385 | } |
2386 | EXPORT_SYMBOL(block_commit_write); | ||
2372 | 2387 | ||
2373 | /* | 2388 | /* |
2374 | * block_page_mkwrite() is not allowed to change the file size as it gets | 2389 | * block_page_mkwrite() is not allowed to change the file size as it gets |
@@ -2426,6 +2441,7 @@ block_page_mkwrite(struct vm_area_struct *vma, struct vm_fault *vmf, | |||
2426 | out: | 2441 | out: |
2427 | return ret; | 2442 | return ret; |
2428 | } | 2443 | } |
2444 | EXPORT_SYMBOL(block_page_mkwrite); | ||
2429 | 2445 | ||
2430 | /* | 2446 | /* |
2431 | * nobh_write_begin()'s prereads are special: the buffer_heads are freed | 2447 | * nobh_write_begin()'s prereads are special: the buffer_heads are freed |
@@ -2849,6 +2865,7 @@ unlock: | |||
2849 | out: | 2865 | out: |
2850 | return err; | 2866 | return err; |
2851 | } | 2867 | } |
2868 | EXPORT_SYMBOL(block_truncate_page); | ||
2852 | 2869 | ||
2853 | /* | 2870 | /* |
2854 | * The generic ->writepage function for buffer-backed address_spaces | 2871 | * The generic ->writepage function for buffer-backed address_spaces |
@@ -2890,6 +2907,7 @@ int block_write_full_page_endio(struct page *page, get_block_t *get_block, | |||
2890 | zero_user_segment(page, offset, PAGE_CACHE_SIZE); | 2907 | zero_user_segment(page, offset, PAGE_CACHE_SIZE); |
2891 | return __block_write_full_page(inode, page, get_block, wbc, handler); | 2908 | return __block_write_full_page(inode, page, get_block, wbc, handler); |
2892 | } | 2909 | } |
2910 | EXPORT_SYMBOL(block_write_full_page_endio); | ||
2893 | 2911 | ||
2894 | /* | 2912 | /* |
2895 | * The generic ->writepage function for buffer-backed address_spaces | 2913 | * The generic ->writepage function for buffer-backed address_spaces |
@@ -2900,7 +2918,7 @@ int block_write_full_page(struct page *page, get_block_t *get_block, | |||
2900 | return block_write_full_page_endio(page, get_block, wbc, | 2918 | return block_write_full_page_endio(page, get_block, wbc, |
2901 | end_buffer_async_write); | 2919 | end_buffer_async_write); |
2902 | } | 2920 | } |
2903 | 2921 | EXPORT_SYMBOL(block_write_full_page); | |
2904 | 2922 | ||
2905 | sector_t generic_block_bmap(struct address_space *mapping, sector_t block, | 2923 | sector_t generic_block_bmap(struct address_space *mapping, sector_t block, |
2906 | get_block_t *get_block) | 2924 | get_block_t *get_block) |
@@ -2913,6 +2931,7 @@ sector_t generic_block_bmap(struct address_space *mapping, sector_t block, | |||
2913 | get_block(inode, block, &tmp, 0); | 2931 | get_block(inode, block, &tmp, 0); |
2914 | return tmp.b_blocknr; | 2932 | return tmp.b_blocknr; |
2915 | } | 2933 | } |
2934 | EXPORT_SYMBOL(generic_block_bmap); | ||
2916 | 2935 | ||
2917 | static void end_bio_bh_io_sync(struct bio *bio, int err) | 2936 | static void end_bio_bh_io_sync(struct bio *bio, int err) |
2918 | { | 2937 | { |
@@ -2982,6 +3001,7 @@ int submit_bh(int rw, struct buffer_head * bh) | |||
2982 | bio_put(bio); | 3001 | bio_put(bio); |
2983 | return ret; | 3002 | return ret; |
2984 | } | 3003 | } |
3004 | EXPORT_SYMBOL(submit_bh); | ||
2985 | 3005 | ||
2986 | /** | 3006 | /** |
2987 | * ll_rw_block: low-level access to block devices (DEPRECATED) | 3007 | * ll_rw_block: low-level access to block devices (DEPRECATED) |
@@ -3043,6 +3063,7 @@ void ll_rw_block(int rw, int nr, struct buffer_head *bhs[]) | |||
3043 | unlock_buffer(bh); | 3063 | unlock_buffer(bh); |
3044 | } | 3064 | } |
3045 | } | 3065 | } |
3066 | EXPORT_SYMBOL(ll_rw_block); | ||
3046 | 3067 | ||
3047 | /* | 3068 | /* |
3048 | * For a data-integrity writeout, we need to wait upon any in-progress I/O | 3069 | * For a data-integrity writeout, we need to wait upon any in-progress I/O |
@@ -3071,6 +3092,7 @@ int sync_dirty_buffer(struct buffer_head *bh) | |||
3071 | } | 3092 | } |
3072 | return ret; | 3093 | return ret; |
3073 | } | 3094 | } |
3095 | EXPORT_SYMBOL(sync_dirty_buffer); | ||
3074 | 3096 | ||
3075 | /* | 3097 | /* |
3076 | * try_to_free_buffers() checks if all the buffers on this particular page | 3098 | * try_to_free_buffers() checks if all the buffers on this particular page |
@@ -3185,6 +3207,7 @@ void block_sync_page(struct page *page) | |||
3185 | if (mapping) | 3207 | if (mapping) |
3186 | blk_run_backing_dev(mapping->backing_dev_info, page); | 3208 | blk_run_backing_dev(mapping->backing_dev_info, page); |
3187 | } | 3209 | } |
3210 | EXPORT_SYMBOL(block_sync_page); | ||
3188 | 3211 | ||
3189 | /* | 3212 | /* |
3190 | * There are no bdflush tunables left. But distributions are | 3213 | * There are no bdflush tunables left. But distributions are |
@@ -3361,29 +3384,3 @@ void __init buffer_init(void) | |||
3361 | max_buffer_heads = nrpages * (PAGE_SIZE / sizeof(struct buffer_head)); | 3384 | max_buffer_heads = nrpages * (PAGE_SIZE / sizeof(struct buffer_head)); |
3362 | hotcpu_notifier(buffer_cpu_notify, 0); | 3385 | hotcpu_notifier(buffer_cpu_notify, 0); |
3363 | } | 3386 | } |
3364 | |||
3365 | EXPORT_SYMBOL(__bforget); | ||
3366 | EXPORT_SYMBOL(__brelse); | ||
3367 | EXPORT_SYMBOL(__wait_on_buffer); | ||
3368 | EXPORT_SYMBOL(block_commit_write); | ||
3369 | EXPORT_SYMBOL(block_prepare_write); | ||
3370 | EXPORT_SYMBOL(block_page_mkwrite); | ||
3371 | EXPORT_SYMBOL(block_read_full_page); | ||
3372 | EXPORT_SYMBOL(block_sync_page); | ||
3373 | EXPORT_SYMBOL(block_truncate_page); | ||
3374 | EXPORT_SYMBOL(block_write_full_page); | ||
3375 | EXPORT_SYMBOL(block_write_full_page_endio); | ||
3376 | EXPORT_SYMBOL(cont_write_begin); | ||
3377 | EXPORT_SYMBOL(end_buffer_read_sync); | ||
3378 | EXPORT_SYMBOL(end_buffer_write_sync); | ||
3379 | EXPORT_SYMBOL(end_buffer_async_write); | ||
3380 | EXPORT_SYMBOL(file_fsync); | ||
3381 | EXPORT_SYMBOL(generic_block_bmap); | ||
3382 | EXPORT_SYMBOL(generic_cont_expand_simple); | ||
3383 | EXPORT_SYMBOL(init_buffer); | ||
3384 | EXPORT_SYMBOL(invalidate_bdev); | ||
3385 | EXPORT_SYMBOL(ll_rw_block); | ||
3386 | EXPORT_SYMBOL(mark_buffer_dirty); | ||
3387 | EXPORT_SYMBOL(submit_bh); | ||
3388 | EXPORT_SYMBOL(sync_dirty_buffer); | ||
3389 | EXPORT_SYMBOL(unlock_buffer); | ||
diff --git a/fs/char_dev.c b/fs/char_dev.c index 3cbc57f932d2..d6db933df2b2 100644 --- a/fs/char_dev.c +++ b/fs/char_dev.c | |||
@@ -264,7 +264,6 @@ int __register_chrdev(unsigned int major, unsigned int baseminor, | |||
264 | { | 264 | { |
265 | struct char_device_struct *cd; | 265 | struct char_device_struct *cd; |
266 | struct cdev *cdev; | 266 | struct cdev *cdev; |
267 | char *s; | ||
268 | int err = -ENOMEM; | 267 | int err = -ENOMEM; |
269 | 268 | ||
270 | cd = __register_chrdev_region(major, baseminor, count, name); | 269 | cd = __register_chrdev_region(major, baseminor, count, name); |
@@ -278,8 +277,6 @@ int __register_chrdev(unsigned int major, unsigned int baseminor, | |||
278 | cdev->owner = fops->owner; | 277 | cdev->owner = fops->owner; |
279 | cdev->ops = fops; | 278 | cdev->ops = fops; |
280 | kobject_set_name(&cdev->kobj, "%s", name); | 279 | kobject_set_name(&cdev->kobj, "%s", name); |
281 | for (s = strchr(kobject_name(&cdev->kobj),'/'); s; s = strchr(s, '/')) | ||
282 | *s = '!'; | ||
283 | 280 | ||
284 | err = cdev_add(cdev, MKDEV(cd->major, baseminor), count); | 281 | err = cdev_add(cdev, MKDEV(cd->major, baseminor), count); |
285 | if (err) | 282 | if (err) |
diff --git a/fs/cifs/cifs_dfs_ref.c b/fs/cifs/cifs_dfs_ref.c index 606912d8f2a8..fea9e898c4ba 100644 --- a/fs/cifs/cifs_dfs_ref.c +++ b/fs/cifs/cifs_dfs_ref.c | |||
@@ -142,7 +142,7 @@ char *cifs_compose_mount_options(const char *sb_mountdata, | |||
142 | rc = dns_resolve_server_name_to_ip(*devname, &srvIP); | 142 | rc = dns_resolve_server_name_to_ip(*devname, &srvIP); |
143 | if (rc != 0) { | 143 | if (rc != 0) { |
144 | cERROR(1, ("%s: Failed to resolve server part of %s to IP: %d", | 144 | cERROR(1, ("%s: Failed to resolve server part of %s to IP: %d", |
145 | __func__, *devname, rc));; | 145 | __func__, *devname, rc)); |
146 | goto compose_mount_options_err; | 146 | goto compose_mount_options_err; |
147 | } | 147 | } |
148 | /* md_len = strlen(...) + 12 for 'sep+prefixpath=' | 148 | /* md_len = strlen(...) + 12 for 'sep+prefixpath=' |
@@ -385,7 +385,7 @@ out_err: | |||
385 | goto out; | 385 | goto out; |
386 | } | 386 | } |
387 | 387 | ||
388 | struct inode_operations cifs_dfs_referral_inode_operations = { | 388 | const struct inode_operations cifs_dfs_referral_inode_operations = { |
389 | .follow_link = cifs_dfs_follow_mountpoint, | 389 | .follow_link = cifs_dfs_follow_mountpoint, |
390 | }; | 390 | }; |
391 | 391 | ||
diff --git a/fs/cifs/cifsfs.c b/fs/cifs/cifsfs.c index 3610e9958b4c..d79ce2e95c23 100644 --- a/fs/cifs/cifsfs.c +++ b/fs/cifs/cifsfs.c | |||
@@ -50,7 +50,7 @@ | |||
50 | #define CIFS_MAGIC_NUMBER 0xFF534D42 /* the first four bytes of SMB PDUs */ | 50 | #define CIFS_MAGIC_NUMBER 0xFF534D42 /* the first four bytes of SMB PDUs */ |
51 | 51 | ||
52 | #ifdef CONFIG_CIFS_QUOTA | 52 | #ifdef CONFIG_CIFS_QUOTA |
53 | static struct quotactl_ops cifs_quotactl_ops; | 53 | static const struct quotactl_ops cifs_quotactl_ops; |
54 | #endif /* QUOTA */ | 54 | #endif /* QUOTA */ |
55 | 55 | ||
56 | int cifsFYI = 0; | 56 | int cifsFYI = 0; |
@@ -517,7 +517,7 @@ int cifs_xstate_get(struct super_block *sb, struct fs_quota_stat *qstats) | |||
517 | return rc; | 517 | return rc; |
518 | } | 518 | } |
519 | 519 | ||
520 | static struct quotactl_ops cifs_quotactl_ops = { | 520 | static const struct quotactl_ops cifs_quotactl_ops = { |
521 | .set_xquota = cifs_xquota_set, | 521 | .set_xquota = cifs_xquota_set, |
522 | .get_xquota = cifs_xquota_get, | 522 | .get_xquota = cifs_xquota_get, |
523 | .set_xstate = cifs_xstate_set, | 523 | .set_xstate = cifs_xstate_set, |
diff --git a/fs/cifs/cifsfs.h b/fs/cifs/cifsfs.h index 094325e3f714..ac2b24c192f8 100644 --- a/fs/cifs/cifsfs.h +++ b/fs/cifs/cifsfs.h | |||
@@ -67,7 +67,7 @@ extern int cifs_setattr(struct dentry *, struct iattr *); | |||
67 | 67 | ||
68 | extern const struct inode_operations cifs_file_inode_ops; | 68 | extern const struct inode_operations cifs_file_inode_ops; |
69 | extern const struct inode_operations cifs_symlink_inode_ops; | 69 | extern const struct inode_operations cifs_symlink_inode_ops; |
70 | extern struct inode_operations cifs_dfs_referral_inode_operations; | 70 | extern const struct inode_operations cifs_dfs_referral_inode_operations; |
71 | 71 | ||
72 | 72 | ||
73 | /* Functions related to files and directories */ | 73 | /* Functions related to files and directories */ |
diff --git a/fs/coda/coda_int.h b/fs/coda/coda_int.h index 8ccd5ed81d9c..d99860a33890 100644 --- a/fs/coda/coda_int.h +++ b/fs/coda/coda_int.h | |||
@@ -2,6 +2,7 @@ | |||
2 | #define _CODA_INT_ | 2 | #define _CODA_INT_ |
3 | 3 | ||
4 | struct dentry; | 4 | struct dentry; |
5 | struct file; | ||
5 | 6 | ||
6 | extern struct file_system_type coda_fs_type; | 7 | extern struct file_system_type coda_fs_type; |
7 | extern unsigned long coda_timeout; | 8 | extern unsigned long coda_timeout; |
diff --git a/fs/compat.c b/fs/compat.c index 6d6f98fe64a0..3aa48834a222 100644 --- a/fs/compat.c +++ b/fs/compat.c | |||
@@ -100,13 +100,6 @@ asmlinkage long compat_sys_utimensat(unsigned int dfd, char __user *filename, st | |||
100 | get_compat_timespec(&tv[1], &t[1])) | 100 | get_compat_timespec(&tv[1], &t[1])) |
101 | return -EFAULT; | 101 | return -EFAULT; |
102 | 102 | ||
103 | if ((tv[0].tv_nsec == UTIME_OMIT || tv[0].tv_nsec == UTIME_NOW) | ||
104 | && tv[0].tv_sec != 0) | ||
105 | return -EINVAL; | ||
106 | if ((tv[1].tv_nsec == UTIME_OMIT || tv[1].tv_nsec == UTIME_NOW) | ||
107 | && tv[1].tv_sec != 0) | ||
108 | return -EINVAL; | ||
109 | |||
110 | if (tv[0].tv_nsec == UTIME_OMIT && tv[1].tv_nsec == UTIME_OMIT) | 103 | if (tv[0].tv_nsec == UTIME_OMIT && tv[1].tv_nsec == UTIME_OMIT) |
111 | return 0; | 104 | return 0; |
112 | } | 105 | } |
diff --git a/fs/devpts/inode.c b/fs/devpts/inode.c index 75efb028974b..d5f8c96964be 100644 --- a/fs/devpts/inode.c +++ b/fs/devpts/inode.c | |||
@@ -18,14 +18,13 @@ | |||
18 | #include <linux/mount.h> | 18 | #include <linux/mount.h> |
19 | #include <linux/tty.h> | 19 | #include <linux/tty.h> |
20 | #include <linux/mutex.h> | 20 | #include <linux/mutex.h> |
21 | #include <linux/magic.h> | ||
21 | #include <linux/idr.h> | 22 | #include <linux/idr.h> |
22 | #include <linux/devpts_fs.h> | 23 | #include <linux/devpts_fs.h> |
23 | #include <linux/parser.h> | 24 | #include <linux/parser.h> |
24 | #include <linux/fsnotify.h> | 25 | #include <linux/fsnotify.h> |
25 | #include <linux/seq_file.h> | 26 | #include <linux/seq_file.h> |
26 | 27 | ||
27 | #define DEVPTS_SUPER_MAGIC 0x1cd1 | ||
28 | |||
29 | #define DEVPTS_DEFAULT_MODE 0600 | 28 | #define DEVPTS_DEFAULT_MODE 0600 |
30 | /* | 29 | /* |
31 | * ptmx is a new node in /dev/pts and will be unused in legacy (single- | 30 | * ptmx is a new node in /dev/pts and will be unused in legacy (single- |
diff --git a/fs/dlm/debug_fs.c b/fs/dlm/debug_fs.c index 1d1d27442235..1c8bb8c3a82e 100644 --- a/fs/dlm/debug_fs.c +++ b/fs/dlm/debug_fs.c | |||
@@ -386,9 +386,9 @@ static int table_seq_show(struct seq_file *seq, void *iter_ptr) | |||
386 | return rv; | 386 | return rv; |
387 | } | 387 | } |
388 | 388 | ||
389 | static struct seq_operations format1_seq_ops; | 389 | static const struct seq_operations format1_seq_ops; |
390 | static struct seq_operations format2_seq_ops; | 390 | static const struct seq_operations format2_seq_ops; |
391 | static struct seq_operations format3_seq_ops; | 391 | static const struct seq_operations format3_seq_ops; |
392 | 392 | ||
393 | static void *table_seq_start(struct seq_file *seq, loff_t *pos) | 393 | static void *table_seq_start(struct seq_file *seq, loff_t *pos) |
394 | { | 394 | { |
@@ -534,21 +534,21 @@ static void table_seq_stop(struct seq_file *seq, void *iter_ptr) | |||
534 | } | 534 | } |
535 | } | 535 | } |
536 | 536 | ||
537 | static struct seq_operations format1_seq_ops = { | 537 | static const struct seq_operations format1_seq_ops = { |
538 | .start = table_seq_start, | 538 | .start = table_seq_start, |
539 | .next = table_seq_next, | 539 | .next = table_seq_next, |
540 | .stop = table_seq_stop, | 540 | .stop = table_seq_stop, |
541 | .show = table_seq_show, | 541 | .show = table_seq_show, |
542 | }; | 542 | }; |
543 | 543 | ||
544 | static struct seq_operations format2_seq_ops = { | 544 | static const struct seq_operations format2_seq_ops = { |
545 | .start = table_seq_start, | 545 | .start = table_seq_start, |
546 | .next = table_seq_next, | 546 | .next = table_seq_next, |
547 | .stop = table_seq_stop, | 547 | .stop = table_seq_stop, |
548 | .show = table_seq_show, | 548 | .show = table_seq_show, |
549 | }; | 549 | }; |
550 | 550 | ||
551 | static struct seq_operations format3_seq_ops = { | 551 | static const struct seq_operations format3_seq_ops = { |
552 | .start = table_seq_start, | 552 | .start = table_seq_start, |
553 | .next = table_seq_next, | 553 | .next = table_seq_next, |
554 | .stop = table_seq_stop, | 554 | .stop = table_seq_stop, |
diff --git a/fs/dlm/lowcomms.c b/fs/dlm/lowcomms.c index 618a60f03886..240cef14fe58 100644 --- a/fs/dlm/lowcomms.c +++ b/fs/dlm/lowcomms.c | |||
@@ -106,6 +106,7 @@ struct connection { | |||
106 | #define CF_CONNECT_PENDING 3 | 106 | #define CF_CONNECT_PENDING 3 |
107 | #define CF_INIT_PENDING 4 | 107 | #define CF_INIT_PENDING 4 |
108 | #define CF_IS_OTHERCON 5 | 108 | #define CF_IS_OTHERCON 5 |
109 | #define CF_CLOSE 6 | ||
109 | struct list_head writequeue; /* List of outgoing writequeue_entries */ | 110 | struct list_head writequeue; /* List of outgoing writequeue_entries */ |
110 | spinlock_t writequeue_lock; | 111 | spinlock_t writequeue_lock; |
111 | int (*rx_action) (struct connection *); /* What to do when active */ | 112 | int (*rx_action) (struct connection *); /* What to do when active */ |
@@ -299,6 +300,8 @@ static void lowcomms_write_space(struct sock *sk) | |||
299 | 300 | ||
300 | static inline void lowcomms_connect_sock(struct connection *con) | 301 | static inline void lowcomms_connect_sock(struct connection *con) |
301 | { | 302 | { |
303 | if (test_bit(CF_CLOSE, &con->flags)) | ||
304 | return; | ||
302 | if (!test_and_set_bit(CF_CONNECT_PENDING, &con->flags)) | 305 | if (!test_and_set_bit(CF_CONNECT_PENDING, &con->flags)) |
303 | queue_work(send_workqueue, &con->swork); | 306 | queue_work(send_workqueue, &con->swork); |
304 | } | 307 | } |
@@ -926,10 +929,8 @@ static void tcp_connect_to_sock(struct connection *con) | |||
926 | goto out_err; | 929 | goto out_err; |
927 | 930 | ||
928 | memset(&saddr, 0, sizeof(saddr)); | 931 | memset(&saddr, 0, sizeof(saddr)); |
929 | if (dlm_nodeid_to_addr(con->nodeid, &saddr)) { | 932 | if (dlm_nodeid_to_addr(con->nodeid, &saddr)) |
930 | sock_release(sock); | ||
931 | goto out_err; | 933 | goto out_err; |
932 | } | ||
933 | 934 | ||
934 | sock->sk->sk_user_data = con; | 935 | sock->sk->sk_user_data = con; |
935 | con->rx_action = receive_from_sock; | 936 | con->rx_action = receive_from_sock; |
@@ -1284,7 +1285,6 @@ out: | |||
1284 | static void send_to_sock(struct connection *con) | 1285 | static void send_to_sock(struct connection *con) |
1285 | { | 1286 | { |
1286 | int ret = 0; | 1287 | int ret = 0; |
1287 | ssize_t(*sendpage) (struct socket *, struct page *, int, size_t, int); | ||
1288 | const int msg_flags = MSG_DONTWAIT | MSG_NOSIGNAL; | 1288 | const int msg_flags = MSG_DONTWAIT | MSG_NOSIGNAL; |
1289 | struct writequeue_entry *e; | 1289 | struct writequeue_entry *e; |
1290 | int len, offset; | 1290 | int len, offset; |
@@ -1293,8 +1293,6 @@ static void send_to_sock(struct connection *con) | |||
1293 | if (con->sock == NULL) | 1293 | if (con->sock == NULL) |
1294 | goto out_connect; | 1294 | goto out_connect; |
1295 | 1295 | ||
1296 | sendpage = con->sock->ops->sendpage; | ||
1297 | |||
1298 | spin_lock(&con->writequeue_lock); | 1296 | spin_lock(&con->writequeue_lock); |
1299 | for (;;) { | 1297 | for (;;) { |
1300 | e = list_entry(con->writequeue.next, struct writequeue_entry, | 1298 | e = list_entry(con->writequeue.next, struct writequeue_entry, |
@@ -1309,8 +1307,8 @@ static void send_to_sock(struct connection *con) | |||
1309 | 1307 | ||
1310 | ret = 0; | 1308 | ret = 0; |
1311 | if (len) { | 1309 | if (len) { |
1312 | ret = sendpage(con->sock, e->page, offset, len, | 1310 | ret = kernel_sendpage(con->sock, e->page, offset, len, |
1313 | msg_flags); | 1311 | msg_flags); |
1314 | if (ret == -EAGAIN || ret == 0) { | 1312 | if (ret == -EAGAIN || ret == 0) { |
1315 | cond_resched(); | 1313 | cond_resched(); |
1316 | goto out; | 1314 | goto out; |
@@ -1370,6 +1368,13 @@ int dlm_lowcomms_close(int nodeid) | |||
1370 | log_print("closing connection to node %d", nodeid); | 1368 | log_print("closing connection to node %d", nodeid); |
1371 | con = nodeid2con(nodeid, 0); | 1369 | con = nodeid2con(nodeid, 0); |
1372 | if (con) { | 1370 | if (con) { |
1371 | clear_bit(CF_CONNECT_PENDING, &con->flags); | ||
1372 | clear_bit(CF_WRITE_PENDING, &con->flags); | ||
1373 | set_bit(CF_CLOSE, &con->flags); | ||
1374 | if (cancel_work_sync(&con->swork)) | ||
1375 | log_print("canceled swork for node %d", nodeid); | ||
1376 | if (cancel_work_sync(&con->rwork)) | ||
1377 | log_print("canceled rwork for node %d", nodeid); | ||
1373 | clean_one_writequeue(con); | 1378 | clean_one_writequeue(con); |
1374 | close_connection(con, true); | 1379 | close_connection(con, true); |
1375 | } | 1380 | } |
@@ -1395,9 +1400,10 @@ static void process_send_sockets(struct work_struct *work) | |||
1395 | 1400 | ||
1396 | if (test_and_clear_bit(CF_CONNECT_PENDING, &con->flags)) { | 1401 | if (test_and_clear_bit(CF_CONNECT_PENDING, &con->flags)) { |
1397 | con->connect_action(con); | 1402 | con->connect_action(con); |
1403 | set_bit(CF_WRITE_PENDING, &con->flags); | ||
1398 | } | 1404 | } |
1399 | clear_bit(CF_WRITE_PENDING, &con->flags); | 1405 | if (test_and_clear_bit(CF_WRITE_PENDING, &con->flags)) |
1400 | send_to_sock(con); | 1406 | send_to_sock(con); |
1401 | } | 1407 | } |
1402 | 1408 | ||
1403 | 1409 | ||
diff --git a/fs/drop_caches.c b/fs/drop_caches.c index a2edb7913447..31f4b0e6d72c 100644 --- a/fs/drop_caches.c +++ b/fs/drop_caches.c | |||
@@ -63,9 +63,9 @@ static void drop_slab(void) | |||
63 | } | 63 | } |
64 | 64 | ||
65 | int drop_caches_sysctl_handler(ctl_table *table, int write, | 65 | int drop_caches_sysctl_handler(ctl_table *table, int write, |
66 | struct file *file, void __user *buffer, size_t *length, loff_t *ppos) | 66 | void __user *buffer, size_t *length, loff_t *ppos) |
67 | { | 67 | { |
68 | proc_dointvec_minmax(table, write, file, buffer, length, ppos); | 68 | proc_dointvec_minmax(table, write, buffer, length, ppos); |
69 | if (write) { | 69 | if (write) { |
70 | if (sysctl_drop_caches & 1) | 70 | if (sysctl_drop_caches & 1) |
71 | drop_pagecache(); | 71 | drop_pagecache(); |
diff --git a/fs/ecryptfs/ecryptfs_kernel.h b/fs/ecryptfs/ecryptfs_kernel.h index 00b30a2d5466..542f625312f3 100644 --- a/fs/ecryptfs/ecryptfs_kernel.h +++ b/fs/ecryptfs/ecryptfs_kernel.h | |||
@@ -582,7 +582,7 @@ extern const struct inode_operations ecryptfs_dir_iops; | |||
582 | extern const struct inode_operations ecryptfs_symlink_iops; | 582 | extern const struct inode_operations ecryptfs_symlink_iops; |
583 | extern const struct super_operations ecryptfs_sops; | 583 | extern const struct super_operations ecryptfs_sops; |
584 | extern const struct dentry_operations ecryptfs_dops; | 584 | extern const struct dentry_operations ecryptfs_dops; |
585 | extern struct address_space_operations ecryptfs_aops; | 585 | extern const struct address_space_operations ecryptfs_aops; |
586 | extern int ecryptfs_verbosity; | 586 | extern int ecryptfs_verbosity; |
587 | extern unsigned int ecryptfs_message_buf_len; | 587 | extern unsigned int ecryptfs_message_buf_len; |
588 | extern signed long ecryptfs_message_wait_timeout; | 588 | extern signed long ecryptfs_message_wait_timeout; |
diff --git a/fs/ecryptfs/mmap.c b/fs/ecryptfs/mmap.c index 5c6bab9786e3..05772aeaa8f4 100644 --- a/fs/ecryptfs/mmap.c +++ b/fs/ecryptfs/mmap.c | |||
@@ -545,7 +545,7 @@ static sector_t ecryptfs_bmap(struct address_space *mapping, sector_t block) | |||
545 | return rc; | 545 | return rc; |
546 | } | 546 | } |
547 | 547 | ||
548 | struct address_space_operations ecryptfs_aops = { | 548 | const struct address_space_operations ecryptfs_aops = { |
549 | .writepage = ecryptfs_writepage, | 549 | .writepage = ecryptfs_writepage, |
550 | .readpage = ecryptfs_readpage, | 550 | .readpage = ecryptfs_readpage, |
551 | .write_begin = ecryptfs_write_begin, | 551 | .write_begin = ecryptfs_write_begin, |
diff --git a/fs/eventfd.c b/fs/eventfd.c index 31d12de83a2a..8b47e4200e65 100644 --- a/fs/eventfd.c +++ b/fs/eventfd.c | |||
@@ -68,11 +68,16 @@ int eventfd_signal(struct eventfd_ctx *ctx, int n) | |||
68 | } | 68 | } |
69 | EXPORT_SYMBOL_GPL(eventfd_signal); | 69 | EXPORT_SYMBOL_GPL(eventfd_signal); |
70 | 70 | ||
71 | static void eventfd_free_ctx(struct eventfd_ctx *ctx) | ||
72 | { | ||
73 | kfree(ctx); | ||
74 | } | ||
75 | |||
71 | static void eventfd_free(struct kref *kref) | 76 | static void eventfd_free(struct kref *kref) |
72 | { | 77 | { |
73 | struct eventfd_ctx *ctx = container_of(kref, struct eventfd_ctx, kref); | 78 | struct eventfd_ctx *ctx = container_of(kref, struct eventfd_ctx, kref); |
74 | 79 | ||
75 | kfree(ctx); | 80 | eventfd_free_ctx(ctx); |
76 | } | 81 | } |
77 | 82 | ||
78 | /** | 83 | /** |
@@ -298,9 +303,23 @@ struct eventfd_ctx *eventfd_ctx_fileget(struct file *file) | |||
298 | } | 303 | } |
299 | EXPORT_SYMBOL_GPL(eventfd_ctx_fileget); | 304 | EXPORT_SYMBOL_GPL(eventfd_ctx_fileget); |
300 | 305 | ||
301 | SYSCALL_DEFINE2(eventfd2, unsigned int, count, int, flags) | 306 | /** |
307 | * eventfd_file_create - Creates an eventfd file pointer. | ||
308 | * @count: Initial eventfd counter value. | ||
309 | * @flags: Flags for the eventfd file. | ||
310 | * | ||
311 | * This function creates an eventfd file pointer, w/out installing it into | ||
312 | * the fd table. This is useful when the eventfd file is used during the | ||
313 | * initialization of data structures that require extra setup after the eventfd | ||
314 | * creation. So the eventfd creation is split into the file pointer creation | ||
315 | * phase, and the file descriptor installation phase. | ||
316 | * In this way races with userspace closing the newly installed file descriptor | ||
317 | * can be avoided. | ||
318 | * Returns an eventfd file pointer, or a proper error pointer. | ||
319 | */ | ||
320 | struct file *eventfd_file_create(unsigned int count, int flags) | ||
302 | { | 321 | { |
303 | int fd; | 322 | struct file *file; |
304 | struct eventfd_ctx *ctx; | 323 | struct eventfd_ctx *ctx; |
305 | 324 | ||
306 | /* Check the EFD_* constants for consistency. */ | 325 | /* Check the EFD_* constants for consistency. */ |
@@ -308,26 +327,48 @@ SYSCALL_DEFINE2(eventfd2, unsigned int, count, int, flags) | |||
308 | BUILD_BUG_ON(EFD_NONBLOCK != O_NONBLOCK); | 327 | BUILD_BUG_ON(EFD_NONBLOCK != O_NONBLOCK); |
309 | 328 | ||
310 | if (flags & ~EFD_FLAGS_SET) | 329 | if (flags & ~EFD_FLAGS_SET) |
311 | return -EINVAL; | 330 | return ERR_PTR(-EINVAL); |
312 | 331 | ||
313 | ctx = kmalloc(sizeof(*ctx), GFP_KERNEL); | 332 | ctx = kmalloc(sizeof(*ctx), GFP_KERNEL); |
314 | if (!ctx) | 333 | if (!ctx) |
315 | return -ENOMEM; | 334 | return ERR_PTR(-ENOMEM); |
316 | 335 | ||
317 | kref_init(&ctx->kref); | 336 | kref_init(&ctx->kref); |
318 | init_waitqueue_head(&ctx->wqh); | 337 | init_waitqueue_head(&ctx->wqh); |
319 | ctx->count = count; | 338 | ctx->count = count; |
320 | ctx->flags = flags; | 339 | ctx->flags = flags; |
321 | 340 | ||
322 | /* | 341 | file = anon_inode_getfile("[eventfd]", &eventfd_fops, ctx, |
323 | * When we call this, the initialization must be complete, since | 342 | flags & EFD_SHARED_FCNTL_FLAGS); |
324 | * anon_inode_getfd() will install the fd. | 343 | if (IS_ERR(file)) |
325 | */ | 344 | eventfd_free_ctx(ctx); |
326 | fd = anon_inode_getfd("[eventfd]", &eventfd_fops, ctx, | 345 | |
327 | flags & EFD_SHARED_FCNTL_FLAGS); | 346 | return file; |
328 | if (fd < 0) | 347 | } |
329 | kfree(ctx); | 348 | |
349 | SYSCALL_DEFINE2(eventfd2, unsigned int, count, int, flags) | ||
350 | { | ||
351 | int fd, error; | ||
352 | struct file *file; | ||
353 | |||
354 | error = get_unused_fd_flags(flags & EFD_SHARED_FCNTL_FLAGS); | ||
355 | if (error < 0) | ||
356 | return error; | ||
357 | fd = error; | ||
358 | |||
359 | file = eventfd_file_create(count, flags); | ||
360 | if (IS_ERR(file)) { | ||
361 | error = PTR_ERR(file); | ||
362 | goto err_put_unused_fd; | ||
363 | } | ||
364 | fd_install(fd, file); | ||
365 | |||
330 | return fd; | 366 | return fd; |
367 | |||
368 | err_put_unused_fd: | ||
369 | put_unused_fd(fd); | ||
370 | |||
371 | return error; | ||
331 | } | 372 | } |
332 | 373 | ||
333 | SYSCALL_DEFINE1(eventfd, unsigned int, count) | 374 | SYSCALL_DEFINE1(eventfd, unsigned int, count) |
@@ -33,7 +33,7 @@ | |||
33 | #include <linux/string.h> | 33 | #include <linux/string.h> |
34 | #include <linux/init.h> | 34 | #include <linux/init.h> |
35 | #include <linux/pagemap.h> | 35 | #include <linux/pagemap.h> |
36 | #include <linux/perf_counter.h> | 36 | #include <linux/perf_event.h> |
37 | #include <linux/highmem.h> | 37 | #include <linux/highmem.h> |
38 | #include <linux/spinlock.h> | 38 | #include <linux/spinlock.h> |
39 | #include <linux/key.h> | 39 | #include <linux/key.h> |
@@ -55,6 +55,7 @@ | |||
55 | #include <linux/kmod.h> | 55 | #include <linux/kmod.h> |
56 | #include <linux/fsnotify.h> | 56 | #include <linux/fsnotify.h> |
57 | #include <linux/fs_struct.h> | 57 | #include <linux/fs_struct.h> |
58 | #include <linux/pipe_fs_i.h> | ||
58 | 59 | ||
59 | #include <asm/uaccess.h> | 60 | #include <asm/uaccess.h> |
60 | #include <asm/mmu_context.h> | 61 | #include <asm/mmu_context.h> |
@@ -63,6 +64,7 @@ | |||
63 | 64 | ||
64 | int core_uses_pid; | 65 | int core_uses_pid; |
65 | char core_pattern[CORENAME_MAX_SIZE] = "core"; | 66 | char core_pattern[CORENAME_MAX_SIZE] = "core"; |
67 | unsigned int core_pipe_limit; | ||
66 | int suid_dumpable = 0; | 68 | int suid_dumpable = 0; |
67 | 69 | ||
68 | /* The maximal length of core_pattern is also specified in sysctl.c */ | 70 | /* The maximal length of core_pattern is also specified in sysctl.c */ |
@@ -845,6 +847,9 @@ static int de_thread(struct task_struct *tsk) | |||
845 | sig->notify_count = 0; | 847 | sig->notify_count = 0; |
846 | 848 | ||
847 | no_thread_group: | 849 | no_thread_group: |
850 | if (current->mm) | ||
851 | setmax_mm_hiwater_rss(&sig->maxrss, current->mm); | ||
852 | |||
848 | exit_itimers(sig); | 853 | exit_itimers(sig); |
849 | flush_itimer_signals(); | 854 | flush_itimer_signals(); |
850 | 855 | ||
@@ -923,7 +928,7 @@ void set_task_comm(struct task_struct *tsk, char *buf) | |||
923 | task_lock(tsk); | 928 | task_lock(tsk); |
924 | strlcpy(tsk->comm, buf, sizeof(tsk->comm)); | 929 | strlcpy(tsk->comm, buf, sizeof(tsk->comm)); |
925 | task_unlock(tsk); | 930 | task_unlock(tsk); |
926 | perf_counter_comm(tsk); | 931 | perf_event_comm(tsk); |
927 | } | 932 | } |
928 | 933 | ||
929 | int flush_old_exec(struct linux_binprm * bprm) | 934 | int flush_old_exec(struct linux_binprm * bprm) |
@@ -997,7 +1002,7 @@ int flush_old_exec(struct linux_binprm * bprm) | |||
997 | * security domain: | 1002 | * security domain: |
998 | */ | 1003 | */ |
999 | if (!get_dumpable(current->mm)) | 1004 | if (!get_dumpable(current->mm)) |
1000 | perf_counter_exit_task(current); | 1005 | perf_event_exit_task(current); |
1001 | 1006 | ||
1002 | /* An exec changes our domain. We are no longer part of the thread | 1007 | /* An exec changes our domain. We are no longer part of the thread |
1003 | group */ | 1008 | group */ |
@@ -1354,6 +1359,8 @@ int do_execve(char * filename, | |||
1354 | if (retval < 0) | 1359 | if (retval < 0) |
1355 | goto out; | 1360 | goto out; |
1356 | 1361 | ||
1362 | current->stack_start = current->mm->start_stack; | ||
1363 | |||
1357 | /* execve succeeded */ | 1364 | /* execve succeeded */ |
1358 | current->fs->in_exec = 0; | 1365 | current->fs->in_exec = 0; |
1359 | current->in_execve = 0; | 1366 | current->in_execve = 0; |
@@ -1388,18 +1395,16 @@ out_ret: | |||
1388 | return retval; | 1395 | return retval; |
1389 | } | 1396 | } |
1390 | 1397 | ||
1391 | int set_binfmt(struct linux_binfmt *new) | 1398 | void set_binfmt(struct linux_binfmt *new) |
1392 | { | 1399 | { |
1393 | struct linux_binfmt *old = current->binfmt; | 1400 | struct mm_struct *mm = current->mm; |
1394 | 1401 | ||
1395 | if (new) { | 1402 | if (mm->binfmt) |
1396 | if (!try_module_get(new->module)) | 1403 | module_put(mm->binfmt->module); |
1397 | return -1; | 1404 | |
1398 | } | 1405 | mm->binfmt = new; |
1399 | current->binfmt = new; | 1406 | if (new) |
1400 | if (old) | 1407 | __module_get(new->module); |
1401 | module_put(old->module); | ||
1402 | return 0; | ||
1403 | } | 1408 | } |
1404 | 1409 | ||
1405 | EXPORT_SYMBOL(set_binfmt); | 1410 | EXPORT_SYMBOL(set_binfmt); |
@@ -1723,6 +1728,29 @@ int get_dumpable(struct mm_struct *mm) | |||
1723 | return (ret >= 2) ? 2 : ret; | 1728 | return (ret >= 2) ? 2 : ret; |
1724 | } | 1729 | } |
1725 | 1730 | ||
1731 | static void wait_for_dump_helpers(struct file *file) | ||
1732 | { | ||
1733 | struct pipe_inode_info *pipe; | ||
1734 | |||
1735 | pipe = file->f_path.dentry->d_inode->i_pipe; | ||
1736 | |||
1737 | pipe_lock(pipe); | ||
1738 | pipe->readers++; | ||
1739 | pipe->writers--; | ||
1740 | |||
1741 | while ((pipe->readers > 1) && (!signal_pending(current))) { | ||
1742 | wake_up_interruptible_sync(&pipe->wait); | ||
1743 | kill_fasync(&pipe->fasync_readers, SIGIO, POLL_IN); | ||
1744 | pipe_wait(pipe); | ||
1745 | } | ||
1746 | |||
1747 | pipe->readers--; | ||
1748 | pipe->writers++; | ||
1749 | pipe_unlock(pipe); | ||
1750 | |||
1751 | } | ||
1752 | |||
1753 | |||
1726 | void do_coredump(long signr, int exit_code, struct pt_regs *regs) | 1754 | void do_coredump(long signr, int exit_code, struct pt_regs *regs) |
1727 | { | 1755 | { |
1728 | struct core_state core_state; | 1756 | struct core_state core_state; |
@@ -1739,11 +1767,12 @@ void do_coredump(long signr, int exit_code, struct pt_regs *regs) | |||
1739 | unsigned long core_limit = current->signal->rlim[RLIMIT_CORE].rlim_cur; | 1767 | unsigned long core_limit = current->signal->rlim[RLIMIT_CORE].rlim_cur; |
1740 | char **helper_argv = NULL; | 1768 | char **helper_argv = NULL; |
1741 | int helper_argc = 0; | 1769 | int helper_argc = 0; |
1742 | char *delimit; | 1770 | int dump_count = 0; |
1771 | static atomic_t core_dump_count = ATOMIC_INIT(0); | ||
1743 | 1772 | ||
1744 | audit_core_dumps(signr); | 1773 | audit_core_dumps(signr); |
1745 | 1774 | ||
1746 | binfmt = current->binfmt; | 1775 | binfmt = mm->binfmt; |
1747 | if (!binfmt || !binfmt->core_dump) | 1776 | if (!binfmt || !binfmt->core_dump) |
1748 | goto fail; | 1777 | goto fail; |
1749 | 1778 | ||
@@ -1794,54 +1823,63 @@ void do_coredump(long signr, int exit_code, struct pt_regs *regs) | |||
1794 | lock_kernel(); | 1823 | lock_kernel(); |
1795 | ispipe = format_corename(corename, signr); | 1824 | ispipe = format_corename(corename, signr); |
1796 | unlock_kernel(); | 1825 | unlock_kernel(); |
1797 | /* | 1826 | |
1798 | * Don't bother to check the RLIMIT_CORE value if core_pattern points | ||
1799 | * to a pipe. Since we're not writing directly to the filesystem | ||
1800 | * RLIMIT_CORE doesn't really apply, as no actual core file will be | ||
1801 | * created unless the pipe reader choses to write out the core file | ||
1802 | * at which point file size limits and permissions will be imposed | ||
1803 | * as it does with any other process | ||
1804 | */ | ||
1805 | if ((!ispipe) && (core_limit < binfmt->min_coredump)) | 1827 | if ((!ispipe) && (core_limit < binfmt->min_coredump)) |
1806 | goto fail_unlock; | 1828 | goto fail_unlock; |
1807 | 1829 | ||
1808 | if (ispipe) { | 1830 | if (ispipe) { |
1831 | if (core_limit == 0) { | ||
1832 | /* | ||
1833 | * Normally core limits are irrelevant to pipes, since | ||
1834 | * we're not writing to the file system, but we use | ||
1835 | * core_limit of 0 here as a speacial value. Any | ||
1836 | * non-zero limit gets set to RLIM_INFINITY below, but | ||
1837 | * a limit of 0 skips the dump. This is a consistent | ||
1838 | * way to catch recursive crashes. We can still crash | ||
1839 | * if the core_pattern binary sets RLIM_CORE = !0 | ||
1840 | * but it runs as root, and can do lots of stupid things | ||
1841 | * Note that we use task_tgid_vnr here to grab the pid | ||
1842 | * of the process group leader. That way we get the | ||
1843 | * right pid if a thread in a multi-threaded | ||
1844 | * core_pattern process dies. | ||
1845 | */ | ||
1846 | printk(KERN_WARNING | ||
1847 | "Process %d(%s) has RLIMIT_CORE set to 0\n", | ||
1848 | task_tgid_vnr(current), current->comm); | ||
1849 | printk(KERN_WARNING "Aborting core\n"); | ||
1850 | goto fail_unlock; | ||
1851 | } | ||
1852 | |||
1853 | dump_count = atomic_inc_return(&core_dump_count); | ||
1854 | if (core_pipe_limit && (core_pipe_limit < dump_count)) { | ||
1855 | printk(KERN_WARNING "Pid %d(%s) over core_pipe_limit\n", | ||
1856 | task_tgid_vnr(current), current->comm); | ||
1857 | printk(KERN_WARNING "Skipping core dump\n"); | ||
1858 | goto fail_dropcount; | ||
1859 | } | ||
1860 | |||
1809 | helper_argv = argv_split(GFP_KERNEL, corename+1, &helper_argc); | 1861 | helper_argv = argv_split(GFP_KERNEL, corename+1, &helper_argc); |
1810 | if (!helper_argv) { | 1862 | if (!helper_argv) { |
1811 | printk(KERN_WARNING "%s failed to allocate memory\n", | 1863 | printk(KERN_WARNING "%s failed to allocate memory\n", |
1812 | __func__); | 1864 | __func__); |
1813 | goto fail_unlock; | 1865 | goto fail_dropcount; |
1814 | } | ||
1815 | /* Terminate the string before the first option */ | ||
1816 | delimit = strchr(corename, ' '); | ||
1817 | if (delimit) | ||
1818 | *delimit = '\0'; | ||
1819 | delimit = strrchr(helper_argv[0], '/'); | ||
1820 | if (delimit) | ||
1821 | delimit++; | ||
1822 | else | ||
1823 | delimit = helper_argv[0]; | ||
1824 | if (!strcmp(delimit, current->comm)) { | ||
1825 | printk(KERN_NOTICE "Recursive core dump detected, " | ||
1826 | "aborting\n"); | ||
1827 | goto fail_unlock; | ||
1828 | } | 1866 | } |
1829 | 1867 | ||
1830 | core_limit = RLIM_INFINITY; | 1868 | core_limit = RLIM_INFINITY; |
1831 | 1869 | ||
1832 | /* SIGPIPE can happen, but it's just never processed */ | 1870 | /* SIGPIPE can happen, but it's just never processed */ |
1833 | if (call_usermodehelper_pipe(corename+1, helper_argv, NULL, | 1871 | if (call_usermodehelper_pipe(helper_argv[0], helper_argv, NULL, |
1834 | &file)) { | 1872 | &file)) { |
1835 | printk(KERN_INFO "Core dump to %s pipe failed\n", | 1873 | printk(KERN_INFO "Core dump to %s pipe failed\n", |
1836 | corename); | 1874 | corename); |
1837 | goto fail_unlock; | 1875 | goto fail_dropcount; |
1838 | } | 1876 | } |
1839 | } else | 1877 | } else |
1840 | file = filp_open(corename, | 1878 | file = filp_open(corename, |
1841 | O_CREAT | 2 | O_NOFOLLOW | O_LARGEFILE | flag, | 1879 | O_CREAT | 2 | O_NOFOLLOW | O_LARGEFILE | flag, |
1842 | 0600); | 1880 | 0600); |
1843 | if (IS_ERR(file)) | 1881 | if (IS_ERR(file)) |
1844 | goto fail_unlock; | 1882 | goto fail_dropcount; |
1845 | inode = file->f_path.dentry->d_inode; | 1883 | inode = file->f_path.dentry->d_inode; |
1846 | if (inode->i_nlink > 1) | 1884 | if (inode->i_nlink > 1) |
1847 | goto close_fail; /* multiple links - don't dump */ | 1885 | goto close_fail; /* multiple links - don't dump */ |
@@ -1870,7 +1908,12 @@ void do_coredump(long signr, int exit_code, struct pt_regs *regs) | |||
1870 | if (retval) | 1908 | if (retval) |
1871 | current->signal->group_exit_code |= 0x80; | 1909 | current->signal->group_exit_code |= 0x80; |
1872 | close_fail: | 1910 | close_fail: |
1911 | if (ispipe && core_pipe_limit) | ||
1912 | wait_for_dump_helpers(file); | ||
1873 | filp_close(file, NULL); | 1913 | filp_close(file, NULL); |
1914 | fail_dropcount: | ||
1915 | if (dump_count) | ||
1916 | atomic_dec(&core_dump_count); | ||
1874 | fail_unlock: | 1917 | fail_unlock: |
1875 | if (helper_argv) | 1918 | if (helper_argv) |
1876 | argv_free(helper_argv); | 1919 | argv_free(helper_argv); |
diff --git a/fs/ext2/namei.c b/fs/ext2/namei.c index 23701f289e98..dd7175ce5606 100644 --- a/fs/ext2/namei.c +++ b/fs/ext2/namei.c | |||
@@ -70,7 +70,7 @@ static struct dentry *ext2_lookup(struct inode * dir, struct dentry *dentry, str | |||
70 | if (PTR_ERR(inode) == -ESTALE) { | 70 | if (PTR_ERR(inode) == -ESTALE) { |
71 | ext2_error(dir->i_sb, __func__, | 71 | ext2_error(dir->i_sb, __func__, |
72 | "deleted inode referenced: %lu", | 72 | "deleted inode referenced: %lu", |
73 | ino); | 73 | (unsigned long) ino); |
74 | return ERR_PTR(-EIO); | 74 | return ERR_PTR(-EIO); |
75 | } else { | 75 | } else { |
76 | return ERR_CAST(inode); | 76 | return ERR_CAST(inode); |
diff --git a/fs/ext2/xip.c b/fs/ext2/xip.c index b72b85884223..c18fbf3e4068 100644 --- a/fs/ext2/xip.c +++ b/fs/ext2/xip.c | |||
@@ -20,7 +20,7 @@ __inode_direct_access(struct inode *inode, sector_t block, | |||
20 | void **kaddr, unsigned long *pfn) | 20 | void **kaddr, unsigned long *pfn) |
21 | { | 21 | { |
22 | struct block_device *bdev = inode->i_sb->s_bdev; | 22 | struct block_device *bdev = inode->i_sb->s_bdev; |
23 | struct block_device_operations *ops = bdev->bd_disk->fops; | 23 | const struct block_device_operations *ops = bdev->bd_disk->fops; |
24 | sector_t sector; | 24 | sector_t sector; |
25 | 25 | ||
26 | sector = block * (PAGE_SIZE / 512); /* ext2 block to bdev sector */ | 26 | sector = block * (PAGE_SIZE / 512); /* ext2 block to bdev sector */ |
diff --git a/fs/ext3/fsync.c b/fs/ext3/fsync.c index d33634119e17..451d166bbe93 100644 --- a/fs/ext3/fsync.c +++ b/fs/ext3/fsync.c | |||
@@ -23,6 +23,7 @@ | |||
23 | */ | 23 | */ |
24 | 24 | ||
25 | #include <linux/time.h> | 25 | #include <linux/time.h> |
26 | #include <linux/blkdev.h> | ||
26 | #include <linux/fs.h> | 27 | #include <linux/fs.h> |
27 | #include <linux/sched.h> | 28 | #include <linux/sched.h> |
28 | #include <linux/writeback.h> | 29 | #include <linux/writeback.h> |
@@ -73,7 +74,7 @@ int ext3_sync_file(struct file * file, struct dentry *dentry, int datasync) | |||
73 | } | 74 | } |
74 | 75 | ||
75 | if (datasync && !(inode->i_state & I_DIRTY_DATASYNC)) | 76 | if (datasync && !(inode->i_state & I_DIRTY_DATASYNC)) |
76 | goto out; | 77 | goto flush; |
77 | 78 | ||
78 | /* | 79 | /* |
79 | * The VFS has written the file data. If the inode is unaltered | 80 | * The VFS has written the file data. If the inode is unaltered |
@@ -85,7 +86,16 @@ int ext3_sync_file(struct file * file, struct dentry *dentry, int datasync) | |||
85 | .nr_to_write = 0, /* sys_fsync did this */ | 86 | .nr_to_write = 0, /* sys_fsync did this */ |
86 | }; | 87 | }; |
87 | ret = sync_inode(inode, &wbc); | 88 | ret = sync_inode(inode, &wbc); |
89 | goto out; | ||
88 | } | 90 | } |
91 | flush: | ||
92 | /* | ||
93 | * In case we didn't commit a transaction, we have to flush | ||
94 | * disk caches manually so that data really is on persistent | ||
95 | * storage | ||
96 | */ | ||
97 | if (test_opt(inode->i_sb, BARRIER)) | ||
98 | blkdev_issue_flush(inode->i_sb->s_bdev, NULL); | ||
89 | out: | 99 | out: |
90 | return ret; | 100 | return ret; |
91 | } | 101 | } |
diff --git a/fs/ext3/inode.c b/fs/ext3/inode.c index 953b430f92e3..acf1b1423327 100644 --- a/fs/ext3/inode.c +++ b/fs/ext3/inode.c | |||
@@ -172,10 +172,21 @@ static int try_to_extend_transaction(handle_t *handle, struct inode *inode) | |||
172 | * so before we call here everything must be consistently dirtied against | 172 | * so before we call here everything must be consistently dirtied against |
173 | * this transaction. | 173 | * this transaction. |
174 | */ | 174 | */ |
175 | static int ext3_journal_test_restart(handle_t *handle, struct inode *inode) | 175 | static int truncate_restart_transaction(handle_t *handle, struct inode *inode) |
176 | { | 176 | { |
177 | int ret; | ||
178 | |||
177 | jbd_debug(2, "restarting handle %p\n", handle); | 179 | jbd_debug(2, "restarting handle %p\n", handle); |
178 | return ext3_journal_restart(handle, blocks_for_truncate(inode)); | 180 | /* |
181 | * Drop truncate_mutex to avoid deadlock with ext3_get_blocks_handle | ||
182 | * At this moment, get_block can be called only for blocks inside | ||
183 | * i_size since page cache has been already dropped and writes are | ||
184 | * blocked by i_mutex. So we can safely drop the truncate_mutex. | ||
185 | */ | ||
186 | mutex_unlock(&EXT3_I(inode)->truncate_mutex); | ||
187 | ret = ext3_journal_restart(handle, blocks_for_truncate(inode)); | ||
188 | mutex_lock(&EXT3_I(inode)->truncate_mutex); | ||
189 | return ret; | ||
179 | } | 190 | } |
180 | 191 | ||
181 | /* | 192 | /* |
@@ -2075,7 +2086,7 @@ static void ext3_clear_blocks(handle_t *handle, struct inode *inode, | |||
2075 | ext3_journal_dirty_metadata(handle, bh); | 2086 | ext3_journal_dirty_metadata(handle, bh); |
2076 | } | 2087 | } |
2077 | ext3_mark_inode_dirty(handle, inode); | 2088 | ext3_mark_inode_dirty(handle, inode); |
2078 | ext3_journal_test_restart(handle, inode); | 2089 | truncate_restart_transaction(handle, inode); |
2079 | if (bh) { | 2090 | if (bh) { |
2080 | BUFFER_TRACE(bh, "retaking write access"); | 2091 | BUFFER_TRACE(bh, "retaking write access"); |
2081 | ext3_journal_get_write_access(handle, bh); | 2092 | ext3_journal_get_write_access(handle, bh); |
@@ -2285,7 +2296,7 @@ static void ext3_free_branches(handle_t *handle, struct inode *inode, | |||
2285 | return; | 2296 | return; |
2286 | if (try_to_extend_transaction(handle, inode)) { | 2297 | if (try_to_extend_transaction(handle, inode)) { |
2287 | ext3_mark_inode_dirty(handle, inode); | 2298 | ext3_mark_inode_dirty(handle, inode); |
2288 | ext3_journal_test_restart(handle, inode); | 2299 | truncate_restart_transaction(handle, inode); |
2289 | } | 2300 | } |
2290 | 2301 | ||
2291 | ext3_free_blocks(handle, inode, nr, 1); | 2302 | ext3_free_blocks(handle, inode, nr, 1); |
@@ -2895,6 +2906,10 @@ static int ext3_do_update_inode(handle_t *handle, | |||
2895 | struct buffer_head *bh = iloc->bh; | 2906 | struct buffer_head *bh = iloc->bh; |
2896 | int err = 0, rc, block; | 2907 | int err = 0, rc, block; |
2897 | 2908 | ||
2909 | again: | ||
2910 | /* we can't allow multiple procs in here at once, its a bit racey */ | ||
2911 | lock_buffer(bh); | ||
2912 | |||
2898 | /* For fields not not tracking in the in-memory inode, | 2913 | /* For fields not not tracking in the in-memory inode, |
2899 | * initialise them to zero for new inodes. */ | 2914 | * initialise them to zero for new inodes. */ |
2900 | if (ei->i_state & EXT3_STATE_NEW) | 2915 | if (ei->i_state & EXT3_STATE_NEW) |
@@ -2954,16 +2969,20 @@ static int ext3_do_update_inode(handle_t *handle, | |||
2954 | /* If this is the first large file | 2969 | /* If this is the first large file |
2955 | * created, add a flag to the superblock. | 2970 | * created, add a flag to the superblock. |
2956 | */ | 2971 | */ |
2972 | unlock_buffer(bh); | ||
2957 | err = ext3_journal_get_write_access(handle, | 2973 | err = ext3_journal_get_write_access(handle, |
2958 | EXT3_SB(sb)->s_sbh); | 2974 | EXT3_SB(sb)->s_sbh); |
2959 | if (err) | 2975 | if (err) |
2960 | goto out_brelse; | 2976 | goto out_brelse; |
2977 | |||
2961 | ext3_update_dynamic_rev(sb); | 2978 | ext3_update_dynamic_rev(sb); |
2962 | EXT3_SET_RO_COMPAT_FEATURE(sb, | 2979 | EXT3_SET_RO_COMPAT_FEATURE(sb, |
2963 | EXT3_FEATURE_RO_COMPAT_LARGE_FILE); | 2980 | EXT3_FEATURE_RO_COMPAT_LARGE_FILE); |
2964 | handle->h_sync = 1; | 2981 | handle->h_sync = 1; |
2965 | err = ext3_journal_dirty_metadata(handle, | 2982 | err = ext3_journal_dirty_metadata(handle, |
2966 | EXT3_SB(sb)->s_sbh); | 2983 | EXT3_SB(sb)->s_sbh); |
2984 | /* get our lock and start over */ | ||
2985 | goto again; | ||
2967 | } | 2986 | } |
2968 | } | 2987 | } |
2969 | } | 2988 | } |
@@ -2986,6 +3005,7 @@ static int ext3_do_update_inode(handle_t *handle, | |||
2986 | raw_inode->i_extra_isize = cpu_to_le16(ei->i_extra_isize); | 3005 | raw_inode->i_extra_isize = cpu_to_le16(ei->i_extra_isize); |
2987 | 3006 | ||
2988 | BUFFER_TRACE(bh, "call ext3_journal_dirty_metadata"); | 3007 | BUFFER_TRACE(bh, "call ext3_journal_dirty_metadata"); |
3008 | unlock_buffer(bh); | ||
2989 | rc = ext3_journal_dirty_metadata(handle, bh); | 3009 | rc = ext3_journal_dirty_metadata(handle, bh); |
2990 | if (!err) | 3010 | if (!err) |
2991 | err = rc; | 3011 | err = rc; |
diff --git a/fs/ext3/super.c b/fs/ext3/super.c index a8d80a7f1105..72743d360509 100644 --- a/fs/ext3/super.c +++ b/fs/ext3/super.c | |||
@@ -720,7 +720,7 @@ static ssize_t ext3_quota_read(struct super_block *sb, int type, char *data, | |||
720 | static ssize_t ext3_quota_write(struct super_block *sb, int type, | 720 | static ssize_t ext3_quota_write(struct super_block *sb, int type, |
721 | const char *data, size_t len, loff_t off); | 721 | const char *data, size_t len, loff_t off); |
722 | 722 | ||
723 | static struct dquot_operations ext3_quota_operations = { | 723 | static const struct dquot_operations ext3_quota_operations = { |
724 | .initialize = dquot_initialize, | 724 | .initialize = dquot_initialize, |
725 | .drop = dquot_drop, | 725 | .drop = dquot_drop, |
726 | .alloc_space = dquot_alloc_space, | 726 | .alloc_space = dquot_alloc_space, |
@@ -737,7 +737,7 @@ static struct dquot_operations ext3_quota_operations = { | |||
737 | .destroy_dquot = dquot_destroy, | 737 | .destroy_dquot = dquot_destroy, |
738 | }; | 738 | }; |
739 | 739 | ||
740 | static struct quotactl_ops ext3_qctl_operations = { | 740 | static const struct quotactl_ops ext3_qctl_operations = { |
741 | .quota_on = ext3_quota_on, | 741 | .quota_on = ext3_quota_on, |
742 | .quota_off = vfs_quota_off, | 742 | .quota_off = vfs_quota_off, |
743 | .quota_sync = vfs_quota_sync, | 743 | .quota_sync = vfs_quota_sync, |
diff --git a/fs/ext4/Kconfig b/fs/ext4/Kconfig index 418b6f3b0ae8..d5c0ea2e8f2d 100644 --- a/fs/ext4/Kconfig +++ b/fs/ext4/Kconfig | |||
@@ -37,7 +37,7 @@ config EXT4DEV_COMPAT | |||
37 | 37 | ||
38 | To enable backwards compatibility so that systems that are | 38 | To enable backwards compatibility so that systems that are |
39 | still expecting to mount ext4 filesystems using ext4dev, | 39 | still expecting to mount ext4 filesystems using ext4dev, |
40 | chose Y here. This feature will go away by 2.6.31, so | 40 | choose Y here. This feature will go away by 2.6.31, so |
41 | please arrange to get your userspace programs fixed! | 41 | please arrange to get your userspace programs fixed! |
42 | 42 | ||
43 | config EXT4_FS_XATTR | 43 | config EXT4_FS_XATTR |
@@ -77,3 +77,12 @@ config EXT4_FS_SECURITY | |||
77 | 77 | ||
78 | If you are not using a security module that requires using | 78 | If you are not using a security module that requires using |
79 | extended attributes for file security labels, say N. | 79 | extended attributes for file security labels, say N. |
80 | |||
81 | config EXT4_DEBUG | ||
82 | bool "EXT4 debugging support" | ||
83 | depends on EXT4_FS | ||
84 | help | ||
85 | Enables run-time debugging support for the ext4 filesystem. | ||
86 | |||
87 | If you select Y here, then you will be able to turn on debugging | ||
88 | with a command such as "echo 1 > /sys/kernel/debug/ext4/mballoc-debug" | ||
diff --git a/fs/ext4/balloc.c b/fs/ext4/balloc.c index e2126d70dff5..1d0418980f8d 100644 --- a/fs/ext4/balloc.c +++ b/fs/ext4/balloc.c | |||
@@ -478,7 +478,7 @@ void ext4_add_groupblocks(handle_t *handle, struct super_block *sb, | |||
478 | * new bitmap information | 478 | * new bitmap information |
479 | */ | 479 | */ |
480 | set_bit(EXT4_GROUP_INFO_NEED_INIT_BIT, &(grp->bb_state)); | 480 | set_bit(EXT4_GROUP_INFO_NEED_INIT_BIT, &(grp->bb_state)); |
481 | ext4_mb_update_group_info(grp, blocks_freed); | 481 | grp->bb_free += blocks_freed; |
482 | up_write(&grp->alloc_sem); | 482 | up_write(&grp->alloc_sem); |
483 | 483 | ||
484 | /* We dirtied the bitmap block */ | 484 | /* We dirtied the bitmap block */ |
diff --git a/fs/ext4/ext4.h b/fs/ext4/ext4.h index 9714db393efe..e227eea23f05 100644 --- a/fs/ext4/ext4.h +++ b/fs/ext4/ext4.h | |||
@@ -67,27 +67,29 @@ typedef unsigned int ext4_group_t; | |||
67 | 67 | ||
68 | 68 | ||
69 | /* prefer goal again. length */ | 69 | /* prefer goal again. length */ |
70 | #define EXT4_MB_HINT_MERGE 1 | 70 | #define EXT4_MB_HINT_MERGE 0x0001 |
71 | /* blocks already reserved */ | 71 | /* blocks already reserved */ |
72 | #define EXT4_MB_HINT_RESERVED 2 | 72 | #define EXT4_MB_HINT_RESERVED 0x0002 |
73 | /* metadata is being allocated */ | 73 | /* metadata is being allocated */ |
74 | #define EXT4_MB_HINT_METADATA 4 | 74 | #define EXT4_MB_HINT_METADATA 0x0004 |
75 | /* first blocks in the file */ | 75 | /* first blocks in the file */ |
76 | #define EXT4_MB_HINT_FIRST 8 | 76 | #define EXT4_MB_HINT_FIRST 0x0008 |
77 | /* search for the best chunk */ | 77 | /* search for the best chunk */ |
78 | #define EXT4_MB_HINT_BEST 16 | 78 | #define EXT4_MB_HINT_BEST 0x0010 |
79 | /* data is being allocated */ | 79 | /* data is being allocated */ |
80 | #define EXT4_MB_HINT_DATA 32 | 80 | #define EXT4_MB_HINT_DATA 0x0020 |
81 | /* don't preallocate (for tails) */ | 81 | /* don't preallocate (for tails) */ |
82 | #define EXT4_MB_HINT_NOPREALLOC 64 | 82 | #define EXT4_MB_HINT_NOPREALLOC 0x0040 |
83 | /* allocate for locality group */ | 83 | /* allocate for locality group */ |
84 | #define EXT4_MB_HINT_GROUP_ALLOC 128 | 84 | #define EXT4_MB_HINT_GROUP_ALLOC 0x0080 |
85 | /* allocate goal blocks or none */ | 85 | /* allocate goal blocks or none */ |
86 | #define EXT4_MB_HINT_GOAL_ONLY 256 | 86 | #define EXT4_MB_HINT_GOAL_ONLY 0x0100 |
87 | /* goal is meaningful */ | 87 | /* goal is meaningful */ |
88 | #define EXT4_MB_HINT_TRY_GOAL 512 | 88 | #define EXT4_MB_HINT_TRY_GOAL 0x0200 |
89 | /* blocks already pre-reserved by delayed allocation */ | 89 | /* blocks already pre-reserved by delayed allocation */ |
90 | #define EXT4_MB_DELALLOC_RESERVED 1024 | 90 | #define EXT4_MB_DELALLOC_RESERVED 0x0400 |
91 | /* We are doing stream allocation */ | ||
92 | #define EXT4_MB_STREAM_ALLOC 0x0800 | ||
91 | 93 | ||
92 | 94 | ||
93 | struct ext4_allocation_request { | 95 | struct ext4_allocation_request { |
@@ -112,6 +114,21 @@ struct ext4_allocation_request { | |||
112 | }; | 114 | }; |
113 | 115 | ||
114 | /* | 116 | /* |
117 | * For delayed allocation tracking | ||
118 | */ | ||
119 | struct mpage_da_data { | ||
120 | struct inode *inode; | ||
121 | sector_t b_blocknr; /* start block number of extent */ | ||
122 | size_t b_size; /* size of extent */ | ||
123 | unsigned long b_state; /* state of the extent */ | ||
124 | unsigned long first_page, next_page; /* extent of pages */ | ||
125 | struct writeback_control *wbc; | ||
126 | int io_done; | ||
127 | int pages_written; | ||
128 | int retval; | ||
129 | }; | ||
130 | |||
131 | /* | ||
115 | * Special inodes numbers | 132 | * Special inodes numbers |
116 | */ | 133 | */ |
117 | #define EXT4_BAD_INO 1 /* Bad blocks inode */ | 134 | #define EXT4_BAD_INO 1 /* Bad blocks inode */ |
@@ -251,7 +268,6 @@ struct flex_groups { | |||
251 | #define EXT4_TOPDIR_FL 0x00020000 /* Top of directory hierarchies*/ | 268 | #define EXT4_TOPDIR_FL 0x00020000 /* Top of directory hierarchies*/ |
252 | #define EXT4_HUGE_FILE_FL 0x00040000 /* Set to each huge file */ | 269 | #define EXT4_HUGE_FILE_FL 0x00040000 /* Set to each huge file */ |
253 | #define EXT4_EXTENTS_FL 0x00080000 /* Inode uses extents */ | 270 | #define EXT4_EXTENTS_FL 0x00080000 /* Inode uses extents */ |
254 | #define EXT4_EXT_MIGRATE 0x00100000 /* Inode is migrating */ | ||
255 | #define EXT4_RESERVED_FL 0x80000000 /* reserved for ext4 lib */ | 271 | #define EXT4_RESERVED_FL 0x80000000 /* reserved for ext4 lib */ |
256 | 272 | ||
257 | #define EXT4_FL_USER_VISIBLE 0x000BDFFF /* User visible flags */ | 273 | #define EXT4_FL_USER_VISIBLE 0x000BDFFF /* User visible flags */ |
@@ -289,6 +305,7 @@ static inline __u32 ext4_mask_flags(umode_t mode, __u32 flags) | |||
289 | #define EXT4_STATE_XATTR 0x00000004 /* has in-inode xattrs */ | 305 | #define EXT4_STATE_XATTR 0x00000004 /* has in-inode xattrs */ |
290 | #define EXT4_STATE_NO_EXPAND 0x00000008 /* No space for expansion */ | 306 | #define EXT4_STATE_NO_EXPAND 0x00000008 /* No space for expansion */ |
291 | #define EXT4_STATE_DA_ALLOC_CLOSE 0x00000010 /* Alloc DA blks on close */ | 307 | #define EXT4_STATE_DA_ALLOC_CLOSE 0x00000010 /* Alloc DA blks on close */ |
308 | #define EXT4_STATE_EXT_MIGRATE 0x00000020 /* Inode is migrating */ | ||
292 | 309 | ||
293 | /* Used to pass group descriptor data when online resize is done */ | 310 | /* Used to pass group descriptor data when online resize is done */ |
294 | struct ext4_new_group_input { | 311 | struct ext4_new_group_input { |
@@ -386,6 +403,9 @@ struct ext4_mount_options { | |||
386 | #endif | 403 | #endif |
387 | }; | 404 | }; |
388 | 405 | ||
406 | /* Max physical block we can addres w/o extents */ | ||
407 | #define EXT4_MAX_BLOCK_FILE_PHYS 0xFFFFFFFF | ||
408 | |||
389 | /* | 409 | /* |
390 | * Structure of an inode on the disk | 410 | * Structure of an inode on the disk |
391 | */ | 411 | */ |
@@ -456,7 +476,6 @@ struct move_extent { | |||
456 | __u64 len; /* block length to be moved */ | 476 | __u64 len; /* block length to be moved */ |
457 | __u64 moved_len; /* moved block length */ | 477 | __u64 moved_len; /* moved block length */ |
458 | }; | 478 | }; |
459 | #define MAX_DEFRAG_SIZE ((1UL<<31) - 1) | ||
460 | 479 | ||
461 | #define EXT4_EPOCH_BITS 2 | 480 | #define EXT4_EPOCH_BITS 2 |
462 | #define EXT4_EPOCH_MASK ((1 << EXT4_EPOCH_BITS) - 1) | 481 | #define EXT4_EPOCH_MASK ((1 << EXT4_EPOCH_BITS) - 1) |
@@ -694,7 +713,6 @@ struct ext4_inode_info { | |||
694 | #define EXT4_MOUNT_QUOTA 0x80000 /* Some quota option set */ | 713 | #define EXT4_MOUNT_QUOTA 0x80000 /* Some quota option set */ |
695 | #define EXT4_MOUNT_USRQUOTA 0x100000 /* "old" user quota */ | 714 | #define EXT4_MOUNT_USRQUOTA 0x100000 /* "old" user quota */ |
696 | #define EXT4_MOUNT_GRPQUOTA 0x200000 /* "old" group quota */ | 715 | #define EXT4_MOUNT_GRPQUOTA 0x200000 /* "old" group quota */ |
697 | #define EXT4_MOUNT_JOURNAL_CHECKSUM 0x800000 /* Journal checksums */ | ||
698 | #define EXT4_MOUNT_JOURNAL_ASYNC_COMMIT 0x1000000 /* Journal Async Commit */ | 716 | #define EXT4_MOUNT_JOURNAL_ASYNC_COMMIT 0x1000000 /* Journal Async Commit */ |
699 | #define EXT4_MOUNT_I_VERSION 0x2000000 /* i_version support */ | 717 | #define EXT4_MOUNT_I_VERSION 0x2000000 /* i_version support */ |
700 | #define EXT4_MOUNT_DELALLOC 0x8000000 /* Delalloc support */ | 718 | #define EXT4_MOUNT_DELALLOC 0x8000000 /* Delalloc support */ |
@@ -841,6 +859,7 @@ struct ext4_sb_info { | |||
841 | unsigned long s_gdb_count; /* Number of group descriptor blocks */ | 859 | unsigned long s_gdb_count; /* Number of group descriptor blocks */ |
842 | unsigned long s_desc_per_block; /* Number of group descriptors per block */ | 860 | unsigned long s_desc_per_block; /* Number of group descriptors per block */ |
843 | ext4_group_t s_groups_count; /* Number of groups in the fs */ | 861 | ext4_group_t s_groups_count; /* Number of groups in the fs */ |
862 | ext4_group_t s_blockfile_groups;/* Groups acceptable for non-extent files */ | ||
844 | unsigned long s_overhead_last; /* Last calculated overhead */ | 863 | unsigned long s_overhead_last; /* Last calculated overhead */ |
845 | unsigned long s_blocks_last; /* Last seen block count */ | 864 | unsigned long s_blocks_last; /* Last seen block count */ |
846 | loff_t s_bitmap_maxbytes; /* max bytes for bitmap files */ | 865 | loff_t s_bitmap_maxbytes; /* max bytes for bitmap files */ |
@@ -950,6 +969,7 @@ struct ext4_sb_info { | |||
950 | atomic_t s_mb_lost_chunks; | 969 | atomic_t s_mb_lost_chunks; |
951 | atomic_t s_mb_preallocated; | 970 | atomic_t s_mb_preallocated; |
952 | atomic_t s_mb_discarded; | 971 | atomic_t s_mb_discarded; |
972 | atomic_t s_lock_busy; | ||
953 | 973 | ||
954 | /* locality groups */ | 974 | /* locality groups */ |
955 | struct ext4_locality_group *s_locality_groups; | 975 | struct ext4_locality_group *s_locality_groups; |
@@ -1340,8 +1360,6 @@ extern void ext4_mb_free_blocks(handle_t *, struct inode *, | |||
1340 | ext4_fsblk_t, unsigned long, int, unsigned long *); | 1360 | ext4_fsblk_t, unsigned long, int, unsigned long *); |
1341 | extern int ext4_mb_add_groupinfo(struct super_block *sb, | 1361 | extern int ext4_mb_add_groupinfo(struct super_block *sb, |
1342 | ext4_group_t i, struct ext4_group_desc *desc); | 1362 | ext4_group_t i, struct ext4_group_desc *desc); |
1343 | extern void ext4_mb_update_group_info(struct ext4_group_info *grp, | ||
1344 | ext4_grpblk_t add); | ||
1345 | extern int ext4_mb_get_buddy_cache_lock(struct super_block *, ext4_group_t); | 1363 | extern int ext4_mb_get_buddy_cache_lock(struct super_block *, ext4_group_t); |
1346 | extern void ext4_mb_put_buddy_cache_lock(struct super_block *, | 1364 | extern void ext4_mb_put_buddy_cache_lock(struct super_block *, |
1347 | ext4_group_t, int); | 1365 | ext4_group_t, int); |
@@ -1367,6 +1385,7 @@ extern int ext4_change_inode_journal_flag(struct inode *, int); | |||
1367 | extern int ext4_get_inode_loc(struct inode *, struct ext4_iloc *); | 1385 | extern int ext4_get_inode_loc(struct inode *, struct ext4_iloc *); |
1368 | extern int ext4_can_truncate(struct inode *inode); | 1386 | extern int ext4_can_truncate(struct inode *inode); |
1369 | extern void ext4_truncate(struct inode *); | 1387 | extern void ext4_truncate(struct inode *); |
1388 | extern int ext4_truncate_restart_trans(handle_t *, struct inode *, int nblocks); | ||
1370 | extern void ext4_set_inode_flags(struct inode *); | 1389 | extern void ext4_set_inode_flags(struct inode *); |
1371 | extern void ext4_get_inode_flags(struct ext4_inode_info *); | 1390 | extern void ext4_get_inode_flags(struct ext4_inode_info *); |
1372 | extern int ext4_alloc_da_blocks(struct inode *inode); | 1391 | extern int ext4_alloc_da_blocks(struct inode *inode); |
@@ -1575,15 +1594,18 @@ static inline void ext4_update_i_disksize(struct inode *inode, loff_t newsize) | |||
1575 | struct ext4_group_info { | 1594 | struct ext4_group_info { |
1576 | unsigned long bb_state; | 1595 | unsigned long bb_state; |
1577 | struct rb_root bb_free_root; | 1596 | struct rb_root bb_free_root; |
1578 | unsigned short bb_first_free; | 1597 | ext4_grpblk_t bb_first_free; /* first free block */ |
1579 | unsigned short bb_free; | 1598 | ext4_grpblk_t bb_free; /* total free blocks */ |
1580 | unsigned short bb_fragments; | 1599 | ext4_grpblk_t bb_fragments; /* nr of freespace fragments */ |
1581 | struct list_head bb_prealloc_list; | 1600 | struct list_head bb_prealloc_list; |
1582 | #ifdef DOUBLE_CHECK | 1601 | #ifdef DOUBLE_CHECK |
1583 | void *bb_bitmap; | 1602 | void *bb_bitmap; |
1584 | #endif | 1603 | #endif |
1585 | struct rw_semaphore alloc_sem; | 1604 | struct rw_semaphore alloc_sem; |
1586 | unsigned short bb_counters[]; | 1605 | ext4_grpblk_t bb_counters[]; /* Nr of free power-of-two-block |
1606 | * regions, index is order. | ||
1607 | * bb_counters[3] = 5 means | ||
1608 | * 5 free 8-block regions. */ | ||
1587 | }; | 1609 | }; |
1588 | 1610 | ||
1589 | #define EXT4_GROUP_INFO_NEED_INIT_BIT 0 | 1611 | #define EXT4_GROUP_INFO_NEED_INIT_BIT 0 |
@@ -1591,15 +1613,42 @@ struct ext4_group_info { | |||
1591 | #define EXT4_MB_GRP_NEED_INIT(grp) \ | 1613 | #define EXT4_MB_GRP_NEED_INIT(grp) \ |
1592 | (test_bit(EXT4_GROUP_INFO_NEED_INIT_BIT, &((grp)->bb_state))) | 1614 | (test_bit(EXT4_GROUP_INFO_NEED_INIT_BIT, &((grp)->bb_state))) |
1593 | 1615 | ||
1616 | #define EXT4_MAX_CONTENTION 8 | ||
1617 | #define EXT4_CONTENTION_THRESHOLD 2 | ||
1618 | |||
1594 | static inline spinlock_t *ext4_group_lock_ptr(struct super_block *sb, | 1619 | static inline spinlock_t *ext4_group_lock_ptr(struct super_block *sb, |
1595 | ext4_group_t group) | 1620 | ext4_group_t group) |
1596 | { | 1621 | { |
1597 | return bgl_lock_ptr(EXT4_SB(sb)->s_blockgroup_lock, group); | 1622 | return bgl_lock_ptr(EXT4_SB(sb)->s_blockgroup_lock, group); |
1598 | } | 1623 | } |
1599 | 1624 | ||
1625 | /* | ||
1626 | * Returns true if the filesystem is busy enough that attempts to | ||
1627 | * access the block group locks has run into contention. | ||
1628 | */ | ||
1629 | static inline int ext4_fs_is_busy(struct ext4_sb_info *sbi) | ||
1630 | { | ||
1631 | return (atomic_read(&sbi->s_lock_busy) > EXT4_CONTENTION_THRESHOLD); | ||
1632 | } | ||
1633 | |||
1600 | static inline void ext4_lock_group(struct super_block *sb, ext4_group_t group) | 1634 | static inline void ext4_lock_group(struct super_block *sb, ext4_group_t group) |
1601 | { | 1635 | { |
1602 | spin_lock(ext4_group_lock_ptr(sb, group)); | 1636 | spinlock_t *lock = ext4_group_lock_ptr(sb, group); |
1637 | if (spin_trylock(lock)) | ||
1638 | /* | ||
1639 | * We're able to grab the lock right away, so drop the | ||
1640 | * lock contention counter. | ||
1641 | */ | ||
1642 | atomic_add_unless(&EXT4_SB(sb)->s_lock_busy, -1, 0); | ||
1643 | else { | ||
1644 | /* | ||
1645 | * The lock is busy, so bump the contention counter, | ||
1646 | * and then wait on the spin lock. | ||
1647 | */ | ||
1648 | atomic_add_unless(&EXT4_SB(sb)->s_lock_busy, 1, | ||
1649 | EXT4_MAX_CONTENTION); | ||
1650 | spin_lock(lock); | ||
1651 | } | ||
1603 | } | 1652 | } |
1604 | 1653 | ||
1605 | static inline void ext4_unlock_group(struct super_block *sb, | 1654 | static inline void ext4_unlock_group(struct super_block *sb, |
diff --git a/fs/ext4/ext4_extents.h b/fs/ext4/ext4_extents.h index 20a84105a10b..61652f1d15e6 100644 --- a/fs/ext4/ext4_extents.h +++ b/fs/ext4/ext4_extents.h | |||
@@ -43,8 +43,7 @@ | |||
43 | #define CHECK_BINSEARCH__ | 43 | #define CHECK_BINSEARCH__ |
44 | 44 | ||
45 | /* | 45 | /* |
46 | * If EXT_DEBUG is defined you can use the 'extdebug' mount option | 46 | * Turn on EXT_DEBUG to get lots of info about extents operations. |
47 | * to get lots of info about what's going on. | ||
48 | */ | 47 | */ |
49 | #define EXT_DEBUG__ | 48 | #define EXT_DEBUG__ |
50 | #ifdef EXT_DEBUG | 49 | #ifdef EXT_DEBUG |
@@ -138,6 +137,7 @@ typedef int (*ext_prepare_callback)(struct inode *, struct ext4_ext_path *, | |||
138 | #define EXT_BREAK 1 | 137 | #define EXT_BREAK 1 |
139 | #define EXT_REPEAT 2 | 138 | #define EXT_REPEAT 2 |
140 | 139 | ||
140 | /* Maximum logical block in a file; ext4_extent's ee_block is __le32 */ | ||
141 | #define EXT_MAX_BLOCK 0xffffffff | 141 | #define EXT_MAX_BLOCK 0xffffffff |
142 | 142 | ||
143 | /* | 143 | /* |
diff --git a/fs/ext4/ext4_jbd2.c b/fs/ext4/ext4_jbd2.c index eb27fd0f2ee8..6a9409920dee 100644 --- a/fs/ext4/ext4_jbd2.c +++ b/fs/ext4/ext4_jbd2.c | |||
@@ -44,7 +44,7 @@ int __ext4_journal_forget(const char *where, handle_t *handle, | |||
44 | handle, err); | 44 | handle, err); |
45 | } | 45 | } |
46 | else | 46 | else |
47 | brelse(bh); | 47 | bforget(bh); |
48 | return err; | 48 | return err; |
49 | } | 49 | } |
50 | 50 | ||
@@ -60,7 +60,7 @@ int __ext4_journal_revoke(const char *where, handle_t *handle, | |||
60 | handle, err); | 60 | handle, err); |
61 | } | 61 | } |
62 | else | 62 | else |
63 | brelse(bh); | 63 | bforget(bh); |
64 | return err; | 64 | return err; |
65 | } | 65 | } |
66 | 66 | ||
@@ -89,7 +89,10 @@ int __ext4_handle_dirty_metadata(const char *where, handle_t *handle, | |||
89 | ext4_journal_abort_handle(where, __func__, bh, | 89 | ext4_journal_abort_handle(where, __func__, bh, |
90 | handle, err); | 90 | handle, err); |
91 | } else { | 91 | } else { |
92 | mark_buffer_dirty(bh); | 92 | if (inode && bh) |
93 | mark_buffer_dirty_inode(bh, inode); | ||
94 | else | ||
95 | mark_buffer_dirty(bh); | ||
93 | if (inode && inode_needs_sync(inode)) { | 96 | if (inode && inode_needs_sync(inode)) { |
94 | sync_dirty_buffer(bh); | 97 | sync_dirty_buffer(bh); |
95 | if (buffer_req(bh) && !buffer_uptodate(bh)) { | 98 | if (buffer_req(bh) && !buffer_uptodate(bh)) { |
diff --git a/fs/ext4/extents.c b/fs/ext4/extents.c index 73ebfb44ad75..7a3832577923 100644 --- a/fs/ext4/extents.c +++ b/fs/ext4/extents.c | |||
@@ -93,7 +93,9 @@ static void ext4_idx_store_pblock(struct ext4_extent_idx *ix, ext4_fsblk_t pb) | |||
93 | ix->ei_leaf_hi = cpu_to_le16((unsigned long) ((pb >> 31) >> 1) & 0xffff); | 93 | ix->ei_leaf_hi = cpu_to_le16((unsigned long) ((pb >> 31) >> 1) & 0xffff); |
94 | } | 94 | } |
95 | 95 | ||
96 | static int ext4_ext_journal_restart(handle_t *handle, int needed) | 96 | static int ext4_ext_truncate_extend_restart(handle_t *handle, |
97 | struct inode *inode, | ||
98 | int needed) | ||
97 | { | 99 | { |
98 | int err; | 100 | int err; |
99 | 101 | ||
@@ -104,7 +106,14 @@ static int ext4_ext_journal_restart(handle_t *handle, int needed) | |||
104 | err = ext4_journal_extend(handle, needed); | 106 | err = ext4_journal_extend(handle, needed); |
105 | if (err <= 0) | 107 | if (err <= 0) |
106 | return err; | 108 | return err; |
107 | return ext4_journal_restart(handle, needed); | 109 | err = ext4_truncate_restart_trans(handle, inode, needed); |
110 | /* | ||
111 | * We have dropped i_data_sem so someone might have cached again | ||
112 | * an extent we are going to truncate. | ||
113 | */ | ||
114 | ext4_ext_invalidate_cache(inode); | ||
115 | |||
116 | return err; | ||
108 | } | 117 | } |
109 | 118 | ||
110 | /* | 119 | /* |
@@ -220,57 +229,65 @@ ext4_ext_new_meta_block(handle_t *handle, struct inode *inode, | |||
220 | return newblock; | 229 | return newblock; |
221 | } | 230 | } |
222 | 231 | ||
223 | static int ext4_ext_space_block(struct inode *inode) | 232 | static inline int ext4_ext_space_block(struct inode *inode, int check) |
224 | { | 233 | { |
225 | int size; | 234 | int size; |
226 | 235 | ||
227 | size = (inode->i_sb->s_blocksize - sizeof(struct ext4_extent_header)) | 236 | size = (inode->i_sb->s_blocksize - sizeof(struct ext4_extent_header)) |
228 | / sizeof(struct ext4_extent); | 237 | / sizeof(struct ext4_extent); |
238 | if (!check) { | ||
229 | #ifdef AGGRESSIVE_TEST | 239 | #ifdef AGGRESSIVE_TEST |
230 | if (size > 6) | 240 | if (size > 6) |
231 | size = 6; | 241 | size = 6; |
232 | #endif | 242 | #endif |
243 | } | ||
233 | return size; | 244 | return size; |
234 | } | 245 | } |
235 | 246 | ||
236 | static int ext4_ext_space_block_idx(struct inode *inode) | 247 | static inline int ext4_ext_space_block_idx(struct inode *inode, int check) |
237 | { | 248 | { |
238 | int size; | 249 | int size; |
239 | 250 | ||
240 | size = (inode->i_sb->s_blocksize - sizeof(struct ext4_extent_header)) | 251 | size = (inode->i_sb->s_blocksize - sizeof(struct ext4_extent_header)) |
241 | / sizeof(struct ext4_extent_idx); | 252 | / sizeof(struct ext4_extent_idx); |
253 | if (!check) { | ||
242 | #ifdef AGGRESSIVE_TEST | 254 | #ifdef AGGRESSIVE_TEST |
243 | if (size > 5) | 255 | if (size > 5) |
244 | size = 5; | 256 | size = 5; |
245 | #endif | 257 | #endif |
258 | } | ||
246 | return size; | 259 | return size; |
247 | } | 260 | } |
248 | 261 | ||
249 | static int ext4_ext_space_root(struct inode *inode) | 262 | static inline int ext4_ext_space_root(struct inode *inode, int check) |
250 | { | 263 | { |
251 | int size; | 264 | int size; |
252 | 265 | ||
253 | size = sizeof(EXT4_I(inode)->i_data); | 266 | size = sizeof(EXT4_I(inode)->i_data); |
254 | size -= sizeof(struct ext4_extent_header); | 267 | size -= sizeof(struct ext4_extent_header); |
255 | size /= sizeof(struct ext4_extent); | 268 | size /= sizeof(struct ext4_extent); |
269 | if (!check) { | ||
256 | #ifdef AGGRESSIVE_TEST | 270 | #ifdef AGGRESSIVE_TEST |
257 | if (size > 3) | 271 | if (size > 3) |
258 | size = 3; | 272 | size = 3; |
259 | #endif | 273 | #endif |
274 | } | ||
260 | return size; | 275 | return size; |
261 | } | 276 | } |
262 | 277 | ||
263 | static int ext4_ext_space_root_idx(struct inode *inode) | 278 | static inline int ext4_ext_space_root_idx(struct inode *inode, int check) |
264 | { | 279 | { |
265 | int size; | 280 | int size; |
266 | 281 | ||
267 | size = sizeof(EXT4_I(inode)->i_data); | 282 | size = sizeof(EXT4_I(inode)->i_data); |
268 | size -= sizeof(struct ext4_extent_header); | 283 | size -= sizeof(struct ext4_extent_header); |
269 | size /= sizeof(struct ext4_extent_idx); | 284 | size /= sizeof(struct ext4_extent_idx); |
285 | if (!check) { | ||
270 | #ifdef AGGRESSIVE_TEST | 286 | #ifdef AGGRESSIVE_TEST |
271 | if (size > 4) | 287 | if (size > 4) |
272 | size = 4; | 288 | size = 4; |
273 | #endif | 289 | #endif |
290 | } | ||
274 | return size; | 291 | return size; |
275 | } | 292 | } |
276 | 293 | ||
@@ -284,9 +301,9 @@ int ext4_ext_calc_metadata_amount(struct inode *inode, int blocks) | |||
284 | int lcap, icap, rcap, leafs, idxs, num; | 301 | int lcap, icap, rcap, leafs, idxs, num; |
285 | int newextents = blocks; | 302 | int newextents = blocks; |
286 | 303 | ||
287 | rcap = ext4_ext_space_root_idx(inode); | 304 | rcap = ext4_ext_space_root_idx(inode, 0); |
288 | lcap = ext4_ext_space_block(inode); | 305 | lcap = ext4_ext_space_block(inode, 0); |
289 | icap = ext4_ext_space_block_idx(inode); | 306 | icap = ext4_ext_space_block_idx(inode, 0); |
290 | 307 | ||
291 | /* number of new leaf blocks needed */ | 308 | /* number of new leaf blocks needed */ |
292 | num = leafs = (newextents + lcap - 1) / lcap; | 309 | num = leafs = (newextents + lcap - 1) / lcap; |
@@ -311,14 +328,14 @@ ext4_ext_max_entries(struct inode *inode, int depth) | |||
311 | 328 | ||
312 | if (depth == ext_depth(inode)) { | 329 | if (depth == ext_depth(inode)) { |
313 | if (depth == 0) | 330 | if (depth == 0) |
314 | max = ext4_ext_space_root(inode); | 331 | max = ext4_ext_space_root(inode, 1); |
315 | else | 332 | else |
316 | max = ext4_ext_space_root_idx(inode); | 333 | max = ext4_ext_space_root_idx(inode, 1); |
317 | } else { | 334 | } else { |
318 | if (depth == 0) | 335 | if (depth == 0) |
319 | max = ext4_ext_space_block(inode); | 336 | max = ext4_ext_space_block(inode, 1); |
320 | else | 337 | else |
321 | max = ext4_ext_space_block_idx(inode); | 338 | max = ext4_ext_space_block_idx(inode, 1); |
322 | } | 339 | } |
323 | 340 | ||
324 | return max; | 341 | return max; |
@@ -437,8 +454,9 @@ static void ext4_ext_show_path(struct inode *inode, struct ext4_ext_path *path) | |||
437 | ext_debug(" %d->%llu", le32_to_cpu(path->p_idx->ei_block), | 454 | ext_debug(" %d->%llu", le32_to_cpu(path->p_idx->ei_block), |
438 | idx_pblock(path->p_idx)); | 455 | idx_pblock(path->p_idx)); |
439 | } else if (path->p_ext) { | 456 | } else if (path->p_ext) { |
440 | ext_debug(" %d:%d:%llu ", | 457 | ext_debug(" %d:[%d]%d:%llu ", |
441 | le32_to_cpu(path->p_ext->ee_block), | 458 | le32_to_cpu(path->p_ext->ee_block), |
459 | ext4_ext_is_uninitialized(path->p_ext), | ||
442 | ext4_ext_get_actual_len(path->p_ext), | 460 | ext4_ext_get_actual_len(path->p_ext), |
443 | ext_pblock(path->p_ext)); | 461 | ext_pblock(path->p_ext)); |
444 | } else | 462 | } else |
@@ -460,8 +478,11 @@ static void ext4_ext_show_leaf(struct inode *inode, struct ext4_ext_path *path) | |||
460 | eh = path[depth].p_hdr; | 478 | eh = path[depth].p_hdr; |
461 | ex = EXT_FIRST_EXTENT(eh); | 479 | ex = EXT_FIRST_EXTENT(eh); |
462 | 480 | ||
481 | ext_debug("Displaying leaf extents for inode %lu\n", inode->i_ino); | ||
482 | |||
463 | for (i = 0; i < le16_to_cpu(eh->eh_entries); i++, ex++) { | 483 | for (i = 0; i < le16_to_cpu(eh->eh_entries); i++, ex++) { |
464 | ext_debug("%d:%d:%llu ", le32_to_cpu(ex->ee_block), | 484 | ext_debug("%d:[%d]%d:%llu ", le32_to_cpu(ex->ee_block), |
485 | ext4_ext_is_uninitialized(ex), | ||
465 | ext4_ext_get_actual_len(ex), ext_pblock(ex)); | 486 | ext4_ext_get_actual_len(ex), ext_pblock(ex)); |
466 | } | 487 | } |
467 | ext_debug("\n"); | 488 | ext_debug("\n"); |
@@ -580,9 +601,10 @@ ext4_ext_binsearch(struct inode *inode, | |||
580 | } | 601 | } |
581 | 602 | ||
582 | path->p_ext = l - 1; | 603 | path->p_ext = l - 1; |
583 | ext_debug(" -> %d:%llu:%d ", | 604 | ext_debug(" -> %d:%llu:[%d]%d ", |
584 | le32_to_cpu(path->p_ext->ee_block), | 605 | le32_to_cpu(path->p_ext->ee_block), |
585 | ext_pblock(path->p_ext), | 606 | ext_pblock(path->p_ext), |
607 | ext4_ext_is_uninitialized(path->p_ext), | ||
586 | ext4_ext_get_actual_len(path->p_ext)); | 608 | ext4_ext_get_actual_len(path->p_ext)); |
587 | 609 | ||
588 | #ifdef CHECK_BINSEARCH | 610 | #ifdef CHECK_BINSEARCH |
@@ -612,7 +634,7 @@ int ext4_ext_tree_init(handle_t *handle, struct inode *inode) | |||
612 | eh->eh_depth = 0; | 634 | eh->eh_depth = 0; |
613 | eh->eh_entries = 0; | 635 | eh->eh_entries = 0; |
614 | eh->eh_magic = EXT4_EXT_MAGIC; | 636 | eh->eh_magic = EXT4_EXT_MAGIC; |
615 | eh->eh_max = cpu_to_le16(ext4_ext_space_root(inode)); | 637 | eh->eh_max = cpu_to_le16(ext4_ext_space_root(inode, 0)); |
616 | ext4_mark_inode_dirty(handle, inode); | 638 | ext4_mark_inode_dirty(handle, inode); |
617 | ext4_ext_invalidate_cache(inode); | 639 | ext4_ext_invalidate_cache(inode); |
618 | return 0; | 640 | return 0; |
@@ -837,7 +859,7 @@ static int ext4_ext_split(handle_t *handle, struct inode *inode, | |||
837 | 859 | ||
838 | neh = ext_block_hdr(bh); | 860 | neh = ext_block_hdr(bh); |
839 | neh->eh_entries = 0; | 861 | neh->eh_entries = 0; |
840 | neh->eh_max = cpu_to_le16(ext4_ext_space_block(inode)); | 862 | neh->eh_max = cpu_to_le16(ext4_ext_space_block(inode, 0)); |
841 | neh->eh_magic = EXT4_EXT_MAGIC; | 863 | neh->eh_magic = EXT4_EXT_MAGIC; |
842 | neh->eh_depth = 0; | 864 | neh->eh_depth = 0; |
843 | ex = EXT_FIRST_EXTENT(neh); | 865 | ex = EXT_FIRST_EXTENT(neh); |
@@ -850,9 +872,10 @@ static int ext4_ext_split(handle_t *handle, struct inode *inode, | |||
850 | path[depth].p_ext++; | 872 | path[depth].p_ext++; |
851 | while (path[depth].p_ext <= | 873 | while (path[depth].p_ext <= |
852 | EXT_MAX_EXTENT(path[depth].p_hdr)) { | 874 | EXT_MAX_EXTENT(path[depth].p_hdr)) { |
853 | ext_debug("move %d:%llu:%d in new leaf %llu\n", | 875 | ext_debug("move %d:%llu:[%d]%d in new leaf %llu\n", |
854 | le32_to_cpu(path[depth].p_ext->ee_block), | 876 | le32_to_cpu(path[depth].p_ext->ee_block), |
855 | ext_pblock(path[depth].p_ext), | 877 | ext_pblock(path[depth].p_ext), |
878 | ext4_ext_is_uninitialized(path[depth].p_ext), | ||
856 | ext4_ext_get_actual_len(path[depth].p_ext), | 879 | ext4_ext_get_actual_len(path[depth].p_ext), |
857 | newblock); | 880 | newblock); |
858 | /*memmove(ex++, path[depth].p_ext++, | 881 | /*memmove(ex++, path[depth].p_ext++, |
@@ -912,7 +935,7 @@ static int ext4_ext_split(handle_t *handle, struct inode *inode, | |||
912 | neh = ext_block_hdr(bh); | 935 | neh = ext_block_hdr(bh); |
913 | neh->eh_entries = cpu_to_le16(1); | 936 | neh->eh_entries = cpu_to_le16(1); |
914 | neh->eh_magic = EXT4_EXT_MAGIC; | 937 | neh->eh_magic = EXT4_EXT_MAGIC; |
915 | neh->eh_max = cpu_to_le16(ext4_ext_space_block_idx(inode)); | 938 | neh->eh_max = cpu_to_le16(ext4_ext_space_block_idx(inode, 0)); |
916 | neh->eh_depth = cpu_to_le16(depth - i); | 939 | neh->eh_depth = cpu_to_le16(depth - i); |
917 | fidx = EXT_FIRST_INDEX(neh); | 940 | fidx = EXT_FIRST_INDEX(neh); |
918 | fidx->ei_block = border; | 941 | fidx->ei_block = border; |
@@ -1037,9 +1060,9 @@ static int ext4_ext_grow_indepth(handle_t *handle, struct inode *inode, | |||
1037 | /* old root could have indexes or leaves | 1060 | /* old root could have indexes or leaves |
1038 | * so calculate e_max right way */ | 1061 | * so calculate e_max right way */ |
1039 | if (ext_depth(inode)) | 1062 | if (ext_depth(inode)) |
1040 | neh->eh_max = cpu_to_le16(ext4_ext_space_block_idx(inode)); | 1063 | neh->eh_max = cpu_to_le16(ext4_ext_space_block_idx(inode, 0)); |
1041 | else | 1064 | else |
1042 | neh->eh_max = cpu_to_le16(ext4_ext_space_block(inode)); | 1065 | neh->eh_max = cpu_to_le16(ext4_ext_space_block(inode, 0)); |
1043 | neh->eh_magic = EXT4_EXT_MAGIC; | 1066 | neh->eh_magic = EXT4_EXT_MAGIC; |
1044 | set_buffer_uptodate(bh); | 1067 | set_buffer_uptodate(bh); |
1045 | unlock_buffer(bh); | 1068 | unlock_buffer(bh); |
@@ -1054,7 +1077,7 @@ static int ext4_ext_grow_indepth(handle_t *handle, struct inode *inode, | |||
1054 | goto out; | 1077 | goto out; |
1055 | 1078 | ||
1056 | curp->p_hdr->eh_magic = EXT4_EXT_MAGIC; | 1079 | curp->p_hdr->eh_magic = EXT4_EXT_MAGIC; |
1057 | curp->p_hdr->eh_max = cpu_to_le16(ext4_ext_space_root_idx(inode)); | 1080 | curp->p_hdr->eh_max = cpu_to_le16(ext4_ext_space_root_idx(inode, 0)); |
1058 | curp->p_hdr->eh_entries = cpu_to_le16(1); | 1081 | curp->p_hdr->eh_entries = cpu_to_le16(1); |
1059 | curp->p_idx = EXT_FIRST_INDEX(curp->p_hdr); | 1082 | curp->p_idx = EXT_FIRST_INDEX(curp->p_hdr); |
1060 | 1083 | ||
@@ -1580,9 +1603,11 @@ int ext4_ext_insert_extent(handle_t *handle, struct inode *inode, | |||
1580 | 1603 | ||
1581 | /* try to insert block into found extent and return */ | 1604 | /* try to insert block into found extent and return */ |
1582 | if (ex && ext4_can_extents_be_merged(inode, ex, newext)) { | 1605 | if (ex && ext4_can_extents_be_merged(inode, ex, newext)) { |
1583 | ext_debug("append %d block to %d:%d (from %llu)\n", | 1606 | ext_debug("append [%d]%d block to %d:[%d]%d (from %llu)\n", |
1607 | ext4_ext_is_uninitialized(newext), | ||
1584 | ext4_ext_get_actual_len(newext), | 1608 | ext4_ext_get_actual_len(newext), |
1585 | le32_to_cpu(ex->ee_block), | 1609 | le32_to_cpu(ex->ee_block), |
1610 | ext4_ext_is_uninitialized(ex), | ||
1586 | ext4_ext_get_actual_len(ex), ext_pblock(ex)); | 1611 | ext4_ext_get_actual_len(ex), ext_pblock(ex)); |
1587 | err = ext4_ext_get_access(handle, inode, path + depth); | 1612 | err = ext4_ext_get_access(handle, inode, path + depth); |
1588 | if (err) | 1613 | if (err) |
@@ -1651,9 +1676,10 @@ has_space: | |||
1651 | 1676 | ||
1652 | if (!nearex) { | 1677 | if (!nearex) { |
1653 | /* there is no extent in this leaf, create first one */ | 1678 | /* there is no extent in this leaf, create first one */ |
1654 | ext_debug("first extent in the leaf: %d:%llu:%d\n", | 1679 | ext_debug("first extent in the leaf: %d:%llu:[%d]%d\n", |
1655 | le32_to_cpu(newext->ee_block), | 1680 | le32_to_cpu(newext->ee_block), |
1656 | ext_pblock(newext), | 1681 | ext_pblock(newext), |
1682 | ext4_ext_is_uninitialized(newext), | ||
1657 | ext4_ext_get_actual_len(newext)); | 1683 | ext4_ext_get_actual_len(newext)); |
1658 | path[depth].p_ext = EXT_FIRST_EXTENT(eh); | 1684 | path[depth].p_ext = EXT_FIRST_EXTENT(eh); |
1659 | } else if (le32_to_cpu(newext->ee_block) | 1685 | } else if (le32_to_cpu(newext->ee_block) |
@@ -1663,10 +1689,11 @@ has_space: | |||
1663 | len = EXT_MAX_EXTENT(eh) - nearex; | 1689 | len = EXT_MAX_EXTENT(eh) - nearex; |
1664 | len = (len - 1) * sizeof(struct ext4_extent); | 1690 | len = (len - 1) * sizeof(struct ext4_extent); |
1665 | len = len < 0 ? 0 : len; | 1691 | len = len < 0 ? 0 : len; |
1666 | ext_debug("insert %d:%llu:%d after: nearest 0x%p, " | 1692 | ext_debug("insert %d:%llu:[%d]%d after: nearest 0x%p, " |
1667 | "move %d from 0x%p to 0x%p\n", | 1693 | "move %d from 0x%p to 0x%p\n", |
1668 | le32_to_cpu(newext->ee_block), | 1694 | le32_to_cpu(newext->ee_block), |
1669 | ext_pblock(newext), | 1695 | ext_pblock(newext), |
1696 | ext4_ext_is_uninitialized(newext), | ||
1670 | ext4_ext_get_actual_len(newext), | 1697 | ext4_ext_get_actual_len(newext), |
1671 | nearex, len, nearex + 1, nearex + 2); | 1698 | nearex, len, nearex + 1, nearex + 2); |
1672 | memmove(nearex + 2, nearex + 1, len); | 1699 | memmove(nearex + 2, nearex + 1, len); |
@@ -1676,10 +1703,11 @@ has_space: | |||
1676 | BUG_ON(newext->ee_block == nearex->ee_block); | 1703 | BUG_ON(newext->ee_block == nearex->ee_block); |
1677 | len = (EXT_MAX_EXTENT(eh) - nearex) * sizeof(struct ext4_extent); | 1704 | len = (EXT_MAX_EXTENT(eh) - nearex) * sizeof(struct ext4_extent); |
1678 | len = len < 0 ? 0 : len; | 1705 | len = len < 0 ? 0 : len; |
1679 | ext_debug("insert %d:%llu:%d before: nearest 0x%p, " | 1706 | ext_debug("insert %d:%llu:[%d]%d before: nearest 0x%p, " |
1680 | "move %d from 0x%p to 0x%p\n", | 1707 | "move %d from 0x%p to 0x%p\n", |
1681 | le32_to_cpu(newext->ee_block), | 1708 | le32_to_cpu(newext->ee_block), |
1682 | ext_pblock(newext), | 1709 | ext_pblock(newext), |
1710 | ext4_ext_is_uninitialized(newext), | ||
1683 | ext4_ext_get_actual_len(newext), | 1711 | ext4_ext_get_actual_len(newext), |
1684 | nearex, len, nearex + 1, nearex + 2); | 1712 | nearex, len, nearex + 1, nearex + 2); |
1685 | memmove(nearex + 1, nearex, len); | 1713 | memmove(nearex + 1, nearex, len); |
@@ -2094,7 +2122,8 @@ ext4_ext_rm_leaf(handle_t *handle, struct inode *inode, | |||
2094 | else | 2122 | else |
2095 | uninitialized = 0; | 2123 | uninitialized = 0; |
2096 | 2124 | ||
2097 | ext_debug("remove ext %lu:%u\n", ex_ee_block, ex_ee_len); | 2125 | ext_debug("remove ext %u:[%d]%d\n", ex_ee_block, |
2126 | uninitialized, ex_ee_len); | ||
2098 | path[depth].p_ext = ex; | 2127 | path[depth].p_ext = ex; |
2099 | 2128 | ||
2100 | a = ex_ee_block > start ? ex_ee_block : start; | 2129 | a = ex_ee_block > start ? ex_ee_block : start; |
@@ -2138,7 +2167,7 @@ ext4_ext_rm_leaf(handle_t *handle, struct inode *inode, | |||
2138 | } | 2167 | } |
2139 | credits += 2 * EXT4_QUOTA_TRANS_BLOCKS(inode->i_sb); | 2168 | credits += 2 * EXT4_QUOTA_TRANS_BLOCKS(inode->i_sb); |
2140 | 2169 | ||
2141 | err = ext4_ext_journal_restart(handle, credits); | 2170 | err = ext4_ext_truncate_extend_restart(handle, inode, credits); |
2142 | if (err) | 2171 | if (err) |
2143 | goto out; | 2172 | goto out; |
2144 | 2173 | ||
@@ -2327,7 +2356,7 @@ static int ext4_ext_remove_space(struct inode *inode, ext4_lblk_t start) | |||
2327 | if (err == 0) { | 2356 | if (err == 0) { |
2328 | ext_inode_hdr(inode)->eh_depth = 0; | 2357 | ext_inode_hdr(inode)->eh_depth = 0; |
2329 | ext_inode_hdr(inode)->eh_max = | 2358 | ext_inode_hdr(inode)->eh_max = |
2330 | cpu_to_le16(ext4_ext_space_root(inode)); | 2359 | cpu_to_le16(ext4_ext_space_root(inode, 0)); |
2331 | err = ext4_ext_dirty(handle, inode, path); | 2360 | err = ext4_ext_dirty(handle, inode, path); |
2332 | } | 2361 | } |
2333 | } | 2362 | } |
@@ -2743,6 +2772,7 @@ insert: | |||
2743 | } else if (err) | 2772 | } else if (err) |
2744 | goto fix_extent_len; | 2773 | goto fix_extent_len; |
2745 | out: | 2774 | out: |
2775 | ext4_ext_show_leaf(inode, path); | ||
2746 | return err ? err : allocated; | 2776 | return err ? err : allocated; |
2747 | 2777 | ||
2748 | fix_extent_len: | 2778 | fix_extent_len: |
@@ -2786,7 +2816,7 @@ int ext4_ext_get_blocks(handle_t *handle, struct inode *inode, | |||
2786 | struct ext4_allocation_request ar; | 2816 | struct ext4_allocation_request ar; |
2787 | 2817 | ||
2788 | __clear_bit(BH_New, &bh_result->b_state); | 2818 | __clear_bit(BH_New, &bh_result->b_state); |
2789 | ext_debug("blocks %u/%u requested for inode %u\n", | 2819 | ext_debug("blocks %u/%u requested for inode %lu\n", |
2790 | iblock, max_blocks, inode->i_ino); | 2820 | iblock, max_blocks, inode->i_ino); |
2791 | 2821 | ||
2792 | /* check in cache */ | 2822 | /* check in cache */ |
@@ -2849,7 +2879,7 @@ int ext4_ext_get_blocks(handle_t *handle, struct inode *inode, | |||
2849 | newblock = iblock - ee_block + ee_start; | 2879 | newblock = iblock - ee_block + ee_start; |
2850 | /* number of remaining blocks in the extent */ | 2880 | /* number of remaining blocks in the extent */ |
2851 | allocated = ee_len - (iblock - ee_block); | 2881 | allocated = ee_len - (iblock - ee_block); |
2852 | ext_debug("%u fit into %lu:%d -> %llu\n", iblock, | 2882 | ext_debug("%u fit into %u:%d -> %llu\n", iblock, |
2853 | ee_block, ee_len, newblock); | 2883 | ee_block, ee_len, newblock); |
2854 | 2884 | ||
2855 | /* Do not put uninitialized extent in the cache */ | 2885 | /* Do not put uninitialized extent in the cache */ |
@@ -2950,7 +2980,7 @@ int ext4_ext_get_blocks(handle_t *handle, struct inode *inode, | |||
2950 | newblock = ext4_mb_new_blocks(handle, &ar, &err); | 2980 | newblock = ext4_mb_new_blocks(handle, &ar, &err); |
2951 | if (!newblock) | 2981 | if (!newblock) |
2952 | goto out2; | 2982 | goto out2; |
2953 | ext_debug("allocate new block: goal %llu, found %llu/%lu\n", | 2983 | ext_debug("allocate new block: goal %llu, found %llu/%u\n", |
2954 | ar.goal, newblock, allocated); | 2984 | ar.goal, newblock, allocated); |
2955 | 2985 | ||
2956 | /* try to insert new extent into found leaf and return */ | 2986 | /* try to insert new extent into found leaf and return */ |
diff --git a/fs/ext4/fsync.c b/fs/ext4/fsync.c index 83cf6415f599..07475740b512 100644 --- a/fs/ext4/fsync.c +++ b/fs/ext4/fsync.c | |||
@@ -50,7 +50,7 @@ int ext4_sync_file(struct file *file, struct dentry *dentry, int datasync) | |||
50 | { | 50 | { |
51 | struct inode *inode = dentry->d_inode; | 51 | struct inode *inode = dentry->d_inode; |
52 | journal_t *journal = EXT4_SB(inode->i_sb)->s_journal; | 52 | journal_t *journal = EXT4_SB(inode->i_sb)->s_journal; |
53 | int ret = 0; | 53 | int err, ret = 0; |
54 | 54 | ||
55 | J_ASSERT(ext4_journal_current_handle() == NULL); | 55 | J_ASSERT(ext4_journal_current_handle() == NULL); |
56 | 56 | ||
@@ -79,6 +79,9 @@ int ext4_sync_file(struct file *file, struct dentry *dentry, int datasync) | |||
79 | goto out; | 79 | goto out; |
80 | } | 80 | } |
81 | 81 | ||
82 | if (!journal) | ||
83 | ret = sync_mapping_buffers(inode->i_mapping); | ||
84 | |||
82 | if (datasync && !(inode->i_state & I_DIRTY_DATASYNC)) | 85 | if (datasync && !(inode->i_state & I_DIRTY_DATASYNC)) |
83 | goto out; | 86 | goto out; |
84 | 87 | ||
@@ -91,10 +94,12 @@ int ext4_sync_file(struct file *file, struct dentry *dentry, int datasync) | |||
91 | .sync_mode = WB_SYNC_ALL, | 94 | .sync_mode = WB_SYNC_ALL, |
92 | .nr_to_write = 0, /* sys_fsync did this */ | 95 | .nr_to_write = 0, /* sys_fsync did this */ |
93 | }; | 96 | }; |
94 | ret = sync_inode(inode, &wbc); | 97 | err = sync_inode(inode, &wbc); |
95 | if (journal && (journal->j_flags & JBD2_BARRIER)) | 98 | if (ret == 0) |
96 | blkdev_issue_flush(inode->i_sb->s_bdev, NULL); | 99 | ret = err; |
97 | } | 100 | } |
98 | out: | 101 | out: |
102 | if (journal && (journal->j_flags & JBD2_BARRIER)) | ||
103 | blkdev_issue_flush(inode->i_sb->s_bdev, NULL); | ||
99 | return ret; | 104 | return ret; |
100 | } | 105 | } |
diff --git a/fs/ext4/ialloc.c b/fs/ext4/ialloc.c index 29e6dc7299b8..f3624ead4f6c 100644 --- a/fs/ext4/ialloc.c +++ b/fs/ext4/ialloc.c | |||
@@ -1189,7 +1189,7 @@ unsigned long ext4_count_free_inodes(struct super_block *sb) | |||
1189 | 1189 | ||
1190 | x = ext4_count_free(bitmap_bh, EXT4_INODES_PER_GROUP(sb) / 8); | 1190 | x = ext4_count_free(bitmap_bh, EXT4_INODES_PER_GROUP(sb) / 8); |
1191 | printk(KERN_DEBUG "group %lu: stored = %d, counted = %lu\n", | 1191 | printk(KERN_DEBUG "group %lu: stored = %d, counted = %lu\n", |
1192 | i, ext4_free_inodes_count(sb, gdp), x); | 1192 | (unsigned long) i, ext4_free_inodes_count(sb, gdp), x); |
1193 | bitmap_count += x; | 1193 | bitmap_count += x; |
1194 | } | 1194 | } |
1195 | brelse(bitmap_bh); | 1195 | brelse(bitmap_bh); |
diff --git a/fs/ext4/inode.c b/fs/ext4/inode.c index 349dd6b4da47..064746fad581 100644 --- a/fs/ext4/inode.c +++ b/fs/ext4/inode.c | |||
@@ -192,11 +192,24 @@ static int try_to_extend_transaction(handle_t *handle, struct inode *inode) | |||
192 | * so before we call here everything must be consistently dirtied against | 192 | * so before we call here everything must be consistently dirtied against |
193 | * this transaction. | 193 | * this transaction. |
194 | */ | 194 | */ |
195 | static int ext4_journal_test_restart(handle_t *handle, struct inode *inode) | 195 | int ext4_truncate_restart_trans(handle_t *handle, struct inode *inode, |
196 | int nblocks) | ||
196 | { | 197 | { |
198 | int ret; | ||
199 | |||
200 | /* | ||
201 | * Drop i_data_sem to avoid deadlock with ext4_get_blocks At this | ||
202 | * moment, get_block can be called only for blocks inside i_size since | ||
203 | * page cache has been already dropped and writes are blocked by | ||
204 | * i_mutex. So we can safely drop the i_data_sem here. | ||
205 | */ | ||
197 | BUG_ON(EXT4_JOURNAL(inode) == NULL); | 206 | BUG_ON(EXT4_JOURNAL(inode) == NULL); |
198 | jbd_debug(2, "restarting handle %p\n", handle); | 207 | jbd_debug(2, "restarting handle %p\n", handle); |
199 | return ext4_journal_restart(handle, blocks_for_truncate(inode)); | 208 | up_write(&EXT4_I(inode)->i_data_sem); |
209 | ret = ext4_journal_restart(handle, blocks_for_truncate(inode)); | ||
210 | down_write(&EXT4_I(inode)->i_data_sem); | ||
211 | |||
212 | return ret; | ||
200 | } | 213 | } |
201 | 214 | ||
202 | /* | 215 | /* |
@@ -341,9 +354,7 @@ static int ext4_block_to_path(struct inode *inode, | |||
341 | int n = 0; | 354 | int n = 0; |
342 | int final = 0; | 355 | int final = 0; |
343 | 356 | ||
344 | if (i_block < 0) { | 357 | if (i_block < direct_blocks) { |
345 | ext4_warning(inode->i_sb, "ext4_block_to_path", "block < 0"); | ||
346 | } else if (i_block < direct_blocks) { | ||
347 | offsets[n++] = i_block; | 358 | offsets[n++] = i_block; |
348 | final = direct_blocks; | 359 | final = direct_blocks; |
349 | } else if ((i_block -= direct_blocks) < indirect_blocks) { | 360 | } else if ((i_block -= direct_blocks) < indirect_blocks) { |
@@ -551,15 +562,21 @@ static ext4_fsblk_t ext4_find_near(struct inode *inode, Indirect *ind) | |||
551 | * | 562 | * |
552 | * Normally this function find the preferred place for block allocation, | 563 | * Normally this function find the preferred place for block allocation, |
553 | * returns it. | 564 | * returns it. |
565 | * Because this is only used for non-extent files, we limit the block nr | ||
566 | * to 32 bits. | ||
554 | */ | 567 | */ |
555 | static ext4_fsblk_t ext4_find_goal(struct inode *inode, ext4_lblk_t block, | 568 | static ext4_fsblk_t ext4_find_goal(struct inode *inode, ext4_lblk_t block, |
556 | Indirect *partial) | 569 | Indirect *partial) |
557 | { | 570 | { |
571 | ext4_fsblk_t goal; | ||
572 | |||
558 | /* | 573 | /* |
559 | * XXX need to get goal block from mballoc's data structures | 574 | * XXX need to get goal block from mballoc's data structures |
560 | */ | 575 | */ |
561 | 576 | ||
562 | return ext4_find_near(inode, partial); | 577 | goal = ext4_find_near(inode, partial); |
578 | goal = goal & EXT4_MAX_BLOCK_FILE_PHYS; | ||
579 | return goal; | ||
563 | } | 580 | } |
564 | 581 | ||
565 | /** | 582 | /** |
@@ -640,6 +657,8 @@ static int ext4_alloc_blocks(handle_t *handle, struct inode *inode, | |||
640 | if (*err) | 657 | if (*err) |
641 | goto failed_out; | 658 | goto failed_out; |
642 | 659 | ||
660 | BUG_ON(current_block + count > EXT4_MAX_BLOCK_FILE_PHYS); | ||
661 | |||
643 | target -= count; | 662 | target -= count; |
644 | /* allocate blocks for indirect blocks */ | 663 | /* allocate blocks for indirect blocks */ |
645 | while (index < indirect_blks && count) { | 664 | while (index < indirect_blks && count) { |
@@ -674,6 +693,7 @@ static int ext4_alloc_blocks(handle_t *handle, struct inode *inode, | |||
674 | ar.flags = EXT4_MB_HINT_DATA; | 693 | ar.flags = EXT4_MB_HINT_DATA; |
675 | 694 | ||
676 | current_block = ext4_mb_new_blocks(handle, &ar, err); | 695 | current_block = ext4_mb_new_blocks(handle, &ar, err); |
696 | BUG_ON(current_block + ar.len > EXT4_MAX_BLOCK_FILE_PHYS); | ||
677 | 697 | ||
678 | if (*err && (target == blks)) { | 698 | if (*err && (target == blks)) { |
679 | /* | 699 | /* |
@@ -762,8 +782,9 @@ static int ext4_alloc_branch(handle_t *handle, struct inode *inode, | |||
762 | BUFFER_TRACE(bh, "call get_create_access"); | 782 | BUFFER_TRACE(bh, "call get_create_access"); |
763 | err = ext4_journal_get_create_access(handle, bh); | 783 | err = ext4_journal_get_create_access(handle, bh); |
764 | if (err) { | 784 | if (err) { |
785 | /* Don't brelse(bh) here; it's done in | ||
786 | * ext4_journal_forget() below */ | ||
765 | unlock_buffer(bh); | 787 | unlock_buffer(bh); |
766 | brelse(bh); | ||
767 | goto failed; | 788 | goto failed; |
768 | } | 789 | } |
769 | 790 | ||
@@ -1109,16 +1130,15 @@ static void ext4_da_update_reserve_space(struct inode *inode, int used) | |||
1109 | ext4_discard_preallocations(inode); | 1130 | ext4_discard_preallocations(inode); |
1110 | } | 1131 | } |
1111 | 1132 | ||
1112 | static int check_block_validity(struct inode *inode, sector_t logical, | 1133 | static int check_block_validity(struct inode *inode, const char *msg, |
1113 | sector_t phys, int len) | 1134 | sector_t logical, sector_t phys, int len) |
1114 | { | 1135 | { |
1115 | if (!ext4_data_block_valid(EXT4_SB(inode->i_sb), phys, len)) { | 1136 | if (!ext4_data_block_valid(EXT4_SB(inode->i_sb), phys, len)) { |
1116 | ext4_error(inode->i_sb, "check_block_validity", | 1137 | ext4_error(inode->i_sb, msg, |
1117 | "inode #%lu logical block %llu mapped to %llu " | 1138 | "inode #%lu logical block %llu mapped to %llu " |
1118 | "(size %d)", inode->i_ino, | 1139 | "(size %d)", inode->i_ino, |
1119 | (unsigned long long) logical, | 1140 | (unsigned long long) logical, |
1120 | (unsigned long long) phys, len); | 1141 | (unsigned long long) phys, len); |
1121 | WARN_ON(1); | ||
1122 | return -EIO; | 1142 | return -EIO; |
1123 | } | 1143 | } |
1124 | return 0; | 1144 | return 0; |
@@ -1170,8 +1190,8 @@ int ext4_get_blocks(handle_t *handle, struct inode *inode, sector_t block, | |||
1170 | up_read((&EXT4_I(inode)->i_data_sem)); | 1190 | up_read((&EXT4_I(inode)->i_data_sem)); |
1171 | 1191 | ||
1172 | if (retval > 0 && buffer_mapped(bh)) { | 1192 | if (retval > 0 && buffer_mapped(bh)) { |
1173 | int ret = check_block_validity(inode, block, | 1193 | int ret = check_block_validity(inode, "file system corruption", |
1174 | bh->b_blocknr, retval); | 1194 | block, bh->b_blocknr, retval); |
1175 | if (ret != 0) | 1195 | if (ret != 0) |
1176 | return ret; | 1196 | return ret; |
1177 | } | 1197 | } |
@@ -1235,8 +1255,7 @@ int ext4_get_blocks(handle_t *handle, struct inode *inode, sector_t block, | |||
1235 | * i_data's format changing. Force the migrate | 1255 | * i_data's format changing. Force the migrate |
1236 | * to fail by clearing migrate flags | 1256 | * to fail by clearing migrate flags |
1237 | */ | 1257 | */ |
1238 | EXT4_I(inode)->i_flags = EXT4_I(inode)->i_flags & | 1258 | EXT4_I(inode)->i_state &= ~EXT4_STATE_EXT_MIGRATE; |
1239 | ~EXT4_EXT_MIGRATE; | ||
1240 | } | 1259 | } |
1241 | } | 1260 | } |
1242 | 1261 | ||
@@ -1252,8 +1271,9 @@ int ext4_get_blocks(handle_t *handle, struct inode *inode, sector_t block, | |||
1252 | 1271 | ||
1253 | up_write((&EXT4_I(inode)->i_data_sem)); | 1272 | up_write((&EXT4_I(inode)->i_data_sem)); |
1254 | if (retval > 0 && buffer_mapped(bh)) { | 1273 | if (retval > 0 && buffer_mapped(bh)) { |
1255 | int ret = check_block_validity(inode, block, | 1274 | int ret = check_block_validity(inode, "file system " |
1256 | bh->b_blocknr, retval); | 1275 | "corruption after allocation", |
1276 | block, bh->b_blocknr, retval); | ||
1257 | if (ret != 0) | 1277 | if (ret != 0) |
1258 | return ret; | 1278 | return ret; |
1259 | } | 1279 | } |
@@ -1863,18 +1883,6 @@ static void ext4_da_page_release_reservation(struct page *page, | |||
1863 | * Delayed allocation stuff | 1883 | * Delayed allocation stuff |
1864 | */ | 1884 | */ |
1865 | 1885 | ||
1866 | struct mpage_da_data { | ||
1867 | struct inode *inode; | ||
1868 | sector_t b_blocknr; /* start block number of extent */ | ||
1869 | size_t b_size; /* size of extent */ | ||
1870 | unsigned long b_state; /* state of the extent */ | ||
1871 | unsigned long first_page, next_page; /* extent of pages */ | ||
1872 | struct writeback_control *wbc; | ||
1873 | int io_done; | ||
1874 | int pages_written; | ||
1875 | int retval; | ||
1876 | }; | ||
1877 | |||
1878 | /* | 1886 | /* |
1879 | * mpage_da_submit_io - walks through extent of pages and try to write | 1887 | * mpage_da_submit_io - walks through extent of pages and try to write |
1880 | * them with writepage() call back | 1888 | * them with writepage() call back |
@@ -2329,7 +2337,7 @@ static int __mpage_da_writepage(struct page *page, | |||
2329 | /* | 2337 | /* |
2330 | * Rest of the page in the page_vec | 2338 | * Rest of the page in the page_vec |
2331 | * redirty then and skip then. We will | 2339 | * redirty then and skip then. We will |
2332 | * try to to write them again after | 2340 | * try to write them again after |
2333 | * starting a new transaction | 2341 | * starting a new transaction |
2334 | */ | 2342 | */ |
2335 | redirty_page_for_writepage(wbc, page); | 2343 | redirty_page_for_writepage(wbc, page); |
@@ -2737,6 +2745,7 @@ static int ext4_da_writepages(struct address_space *mapping, | |||
2737 | long pages_skipped; | 2745 | long pages_skipped; |
2738 | int range_cyclic, cycled = 1, io_done = 0; | 2746 | int range_cyclic, cycled = 1, io_done = 0; |
2739 | int needed_blocks, ret = 0, nr_to_writebump = 0; | 2747 | int needed_blocks, ret = 0, nr_to_writebump = 0; |
2748 | loff_t range_start = wbc->range_start; | ||
2740 | struct ext4_sb_info *sbi = EXT4_SB(mapping->host->i_sb); | 2749 | struct ext4_sb_info *sbi = EXT4_SB(mapping->host->i_sb); |
2741 | 2750 | ||
2742 | trace_ext4_da_writepages(inode, wbc); | 2751 | trace_ext4_da_writepages(inode, wbc); |
@@ -2850,6 +2859,7 @@ retry: | |||
2850 | mpd.io_done = 1; | 2859 | mpd.io_done = 1; |
2851 | ret = MPAGE_DA_EXTENT_TAIL; | 2860 | ret = MPAGE_DA_EXTENT_TAIL; |
2852 | } | 2861 | } |
2862 | trace_ext4_da_write_pages(inode, &mpd); | ||
2853 | wbc->nr_to_write -= mpd.pages_written; | 2863 | wbc->nr_to_write -= mpd.pages_written; |
2854 | 2864 | ||
2855 | ext4_journal_stop(handle); | 2865 | ext4_journal_stop(handle); |
@@ -2905,6 +2915,7 @@ out_writepages: | |||
2905 | if (!no_nrwrite_index_update) | 2915 | if (!no_nrwrite_index_update) |
2906 | wbc->no_nrwrite_index_update = 0; | 2916 | wbc->no_nrwrite_index_update = 0; |
2907 | wbc->nr_to_write -= nr_to_writebump; | 2917 | wbc->nr_to_write -= nr_to_writebump; |
2918 | wbc->range_start = range_start; | ||
2908 | trace_ext4_da_writepages_result(inode, wbc, ret, pages_written); | 2919 | trace_ext4_da_writepages_result(inode, wbc, ret, pages_written); |
2909 | return ret; | 2920 | return ret; |
2910 | } | 2921 | } |
@@ -3117,6 +3128,8 @@ out: | |||
3117 | */ | 3128 | */ |
3118 | int ext4_alloc_da_blocks(struct inode *inode) | 3129 | int ext4_alloc_da_blocks(struct inode *inode) |
3119 | { | 3130 | { |
3131 | trace_ext4_alloc_da_blocks(inode); | ||
3132 | |||
3120 | if (!EXT4_I(inode)->i_reserved_data_blocks && | 3133 | if (!EXT4_I(inode)->i_reserved_data_blocks && |
3121 | !EXT4_I(inode)->i_reserved_meta_blocks) | 3134 | !EXT4_I(inode)->i_reserved_meta_blocks) |
3122 | return 0; | 3135 | return 0; |
@@ -3663,7 +3676,8 @@ static void ext4_clear_blocks(handle_t *handle, struct inode *inode, | |||
3663 | ext4_handle_dirty_metadata(handle, inode, bh); | 3676 | ext4_handle_dirty_metadata(handle, inode, bh); |
3664 | } | 3677 | } |
3665 | ext4_mark_inode_dirty(handle, inode); | 3678 | ext4_mark_inode_dirty(handle, inode); |
3666 | ext4_journal_test_restart(handle, inode); | 3679 | ext4_truncate_restart_trans(handle, inode, |
3680 | blocks_for_truncate(inode)); | ||
3667 | if (bh) { | 3681 | if (bh) { |
3668 | BUFFER_TRACE(bh, "retaking write access"); | 3682 | BUFFER_TRACE(bh, "retaking write access"); |
3669 | ext4_journal_get_write_access(handle, bh); | 3683 | ext4_journal_get_write_access(handle, bh); |
@@ -3874,7 +3888,8 @@ static void ext4_free_branches(handle_t *handle, struct inode *inode, | |||
3874 | return; | 3888 | return; |
3875 | if (try_to_extend_transaction(handle, inode)) { | 3889 | if (try_to_extend_transaction(handle, inode)) { |
3876 | ext4_mark_inode_dirty(handle, inode); | 3890 | ext4_mark_inode_dirty(handle, inode); |
3877 | ext4_journal_test_restart(handle, inode); | 3891 | ext4_truncate_restart_trans(handle, inode, |
3892 | blocks_for_truncate(inode)); | ||
3878 | } | 3893 | } |
3879 | 3894 | ||
3880 | ext4_free_blocks(handle, inode, nr, 1, 1); | 3895 | ext4_free_blocks(handle, inode, nr, 1, 1); |
@@ -3962,8 +3977,7 @@ void ext4_truncate(struct inode *inode) | |||
3962 | if (!ext4_can_truncate(inode)) | 3977 | if (!ext4_can_truncate(inode)) |
3963 | return; | 3978 | return; |
3964 | 3979 | ||
3965 | if (ei->i_disksize && inode->i_size == 0 && | 3980 | if (inode->i_size == 0 && !test_opt(inode->i_sb, NO_AUTO_DA_ALLOC)) |
3966 | !test_opt(inode->i_sb, NO_AUTO_DA_ALLOC)) | ||
3967 | ei->i_state |= EXT4_STATE_DA_ALLOC_CLOSE; | 3981 | ei->i_state |= EXT4_STATE_DA_ALLOC_CLOSE; |
3968 | 3982 | ||
3969 | if (EXT4_I(inode)->i_flags & EXT4_EXTENTS_FL) { | 3983 | if (EXT4_I(inode)->i_flags & EXT4_EXTENTS_FL) { |
@@ -4537,7 +4551,8 @@ static int ext4_inode_blocks_set(handle_t *handle, | |||
4537 | */ | 4551 | */ |
4538 | static int ext4_do_update_inode(handle_t *handle, | 4552 | static int ext4_do_update_inode(handle_t *handle, |
4539 | struct inode *inode, | 4553 | struct inode *inode, |
4540 | struct ext4_iloc *iloc) | 4554 | struct ext4_iloc *iloc, |
4555 | int do_sync) | ||
4541 | { | 4556 | { |
4542 | struct ext4_inode *raw_inode = ext4_raw_inode(iloc); | 4557 | struct ext4_inode *raw_inode = ext4_raw_inode(iloc); |
4543 | struct ext4_inode_info *ei = EXT4_I(inode); | 4558 | struct ext4_inode_info *ei = EXT4_I(inode); |
@@ -4585,8 +4600,7 @@ static int ext4_do_update_inode(handle_t *handle, | |||
4585 | if (ext4_inode_blocks_set(handle, raw_inode, ei)) | 4600 | if (ext4_inode_blocks_set(handle, raw_inode, ei)) |
4586 | goto out_brelse; | 4601 | goto out_brelse; |
4587 | raw_inode->i_dtime = cpu_to_le32(ei->i_dtime); | 4602 | raw_inode->i_dtime = cpu_to_le32(ei->i_dtime); |
4588 | /* clear the migrate flag in the raw_inode */ | 4603 | raw_inode->i_flags = cpu_to_le32(ei->i_flags); |
4589 | raw_inode->i_flags = cpu_to_le32(ei->i_flags & ~EXT4_EXT_MIGRATE); | ||
4590 | if (EXT4_SB(inode->i_sb)->s_es->s_creator_os != | 4604 | if (EXT4_SB(inode->i_sb)->s_es->s_creator_os != |
4591 | cpu_to_le32(EXT4_OS_HURD)) | 4605 | cpu_to_le32(EXT4_OS_HURD)) |
4592 | raw_inode->i_file_acl_high = | 4606 | raw_inode->i_file_acl_high = |
@@ -4639,10 +4653,22 @@ static int ext4_do_update_inode(handle_t *handle, | |||
4639 | raw_inode->i_extra_isize = cpu_to_le16(ei->i_extra_isize); | 4653 | raw_inode->i_extra_isize = cpu_to_le16(ei->i_extra_isize); |
4640 | } | 4654 | } |
4641 | 4655 | ||
4642 | BUFFER_TRACE(bh, "call ext4_handle_dirty_metadata"); | 4656 | /* |
4643 | rc = ext4_handle_dirty_metadata(handle, inode, bh); | 4657 | * If we're not using a journal and we were called from |
4644 | if (!err) | 4658 | * ext4_write_inode() to sync the inode (making do_sync true), |
4645 | err = rc; | 4659 | * we can just use sync_dirty_buffer() directly to do our dirty |
4660 | * work. Testing s_journal here is a bit redundant but it's | ||
4661 | * worth it to avoid potential future trouble. | ||
4662 | */ | ||
4663 | if (EXT4_SB(inode->i_sb)->s_journal == NULL && do_sync) { | ||
4664 | BUFFER_TRACE(bh, "call sync_dirty_buffer"); | ||
4665 | sync_dirty_buffer(bh); | ||
4666 | } else { | ||
4667 | BUFFER_TRACE(bh, "call ext4_handle_dirty_metadata"); | ||
4668 | rc = ext4_handle_dirty_metadata(handle, inode, bh); | ||
4669 | if (!err) | ||
4670 | err = rc; | ||
4671 | } | ||
4646 | ei->i_state &= ~EXT4_STATE_NEW; | 4672 | ei->i_state &= ~EXT4_STATE_NEW; |
4647 | 4673 | ||
4648 | out_brelse: | 4674 | out_brelse: |
@@ -4688,19 +4714,32 @@ out_brelse: | |||
4688 | */ | 4714 | */ |
4689 | int ext4_write_inode(struct inode *inode, int wait) | 4715 | int ext4_write_inode(struct inode *inode, int wait) |
4690 | { | 4716 | { |
4717 | int err; | ||
4718 | |||
4691 | if (current->flags & PF_MEMALLOC) | 4719 | if (current->flags & PF_MEMALLOC) |
4692 | return 0; | 4720 | return 0; |
4693 | 4721 | ||
4694 | if (ext4_journal_current_handle()) { | 4722 | if (EXT4_SB(inode->i_sb)->s_journal) { |
4695 | jbd_debug(1, "called recursively, non-PF_MEMALLOC!\n"); | 4723 | if (ext4_journal_current_handle()) { |
4696 | dump_stack(); | 4724 | jbd_debug(1, "called recursively, non-PF_MEMALLOC!\n"); |
4697 | return -EIO; | 4725 | dump_stack(); |
4698 | } | 4726 | return -EIO; |
4727 | } | ||
4699 | 4728 | ||
4700 | if (!wait) | 4729 | if (!wait) |
4701 | return 0; | 4730 | return 0; |
4731 | |||
4732 | err = ext4_force_commit(inode->i_sb); | ||
4733 | } else { | ||
4734 | struct ext4_iloc iloc; | ||
4702 | 4735 | ||
4703 | return ext4_force_commit(inode->i_sb); | 4736 | err = ext4_get_inode_loc(inode, &iloc); |
4737 | if (err) | ||
4738 | return err; | ||
4739 | err = ext4_do_update_inode(EXT4_NOJOURNAL_HANDLE, | ||
4740 | inode, &iloc, wait); | ||
4741 | } | ||
4742 | return err; | ||
4704 | } | 4743 | } |
4705 | 4744 | ||
4706 | /* | 4745 | /* |
@@ -4994,7 +5033,7 @@ int ext4_mark_iloc_dirty(handle_t *handle, | |||
4994 | get_bh(iloc->bh); | 5033 | get_bh(iloc->bh); |
4995 | 5034 | ||
4996 | /* ext4_do_update_inode() does jbd2_journal_dirty_metadata */ | 5035 | /* ext4_do_update_inode() does jbd2_journal_dirty_metadata */ |
4997 | err = ext4_do_update_inode(handle, inode, iloc); | 5036 | err = ext4_do_update_inode(handle, inode, iloc, 0); |
4998 | put_bh(iloc->bh); | 5037 | put_bh(iloc->bh); |
4999 | return err; | 5038 | return err; |
5000 | } | 5039 | } |
@@ -5285,12 +5324,21 @@ int ext4_page_mkwrite(struct vm_area_struct *vma, struct vm_fault *vmf) | |||
5285 | else | 5324 | else |
5286 | len = PAGE_CACHE_SIZE; | 5325 | len = PAGE_CACHE_SIZE; |
5287 | 5326 | ||
5327 | lock_page(page); | ||
5328 | /* | ||
5329 | * return if we have all the buffers mapped. This avoid | ||
5330 | * the need to call write_begin/write_end which does a | ||
5331 | * journal_start/journal_stop which can block and take | ||
5332 | * long time | ||
5333 | */ | ||
5288 | if (page_has_buffers(page)) { | 5334 | if (page_has_buffers(page)) { |
5289 | /* return if we have all the buffers mapped */ | ||
5290 | if (!walk_page_buffers(NULL, page_buffers(page), 0, len, NULL, | 5335 | if (!walk_page_buffers(NULL, page_buffers(page), 0, len, NULL, |
5291 | ext4_bh_unmapped)) | 5336 | ext4_bh_unmapped)) { |
5337 | unlock_page(page); | ||
5292 | goto out_unlock; | 5338 | goto out_unlock; |
5339 | } | ||
5293 | } | 5340 | } |
5341 | unlock_page(page); | ||
5294 | /* | 5342 | /* |
5295 | * OK, we need to fill the hole... Do write_begin write_end | 5343 | * OK, we need to fill the hole... Do write_begin write_end |
5296 | * to do block allocation/reservation.We are not holding | 5344 | * to do block allocation/reservation.We are not holding |
diff --git a/fs/ext4/ioctl.c b/fs/ext4/ioctl.c index 7050a9cd04a4..c1cdf613e725 100644 --- a/fs/ext4/ioctl.c +++ b/fs/ext4/ioctl.c | |||
@@ -243,10 +243,9 @@ setversion_out: | |||
243 | me.donor_start, me.len, &me.moved_len); | 243 | me.donor_start, me.len, &me.moved_len); |
244 | fput(donor_filp); | 244 | fput(donor_filp); |
245 | 245 | ||
246 | if (!err) | 246 | if (copy_to_user((struct move_extent *)arg, &me, sizeof(me))) |
247 | if (copy_to_user((struct move_extent *)arg, | 247 | return -EFAULT; |
248 | &me, sizeof(me))) | 248 | |
249 | return -EFAULT; | ||
250 | return err; | 249 | return err; |
251 | } | 250 | } |
252 | 251 | ||
diff --git a/fs/ext4/mballoc.c b/fs/ext4/mballoc.c index cd258463e2a9..e9c61896d605 100644 --- a/fs/ext4/mballoc.c +++ b/fs/ext4/mballoc.c | |||
@@ -22,6 +22,7 @@ | |||
22 | */ | 22 | */ |
23 | 23 | ||
24 | #include "mballoc.h" | 24 | #include "mballoc.h" |
25 | #include <linux/debugfs.h> | ||
25 | #include <trace/events/ext4.h> | 26 | #include <trace/events/ext4.h> |
26 | 27 | ||
27 | /* | 28 | /* |
@@ -622,13 +623,13 @@ static int __mb_check_buddy(struct ext4_buddy *e4b, char *file, | |||
622 | 623 | ||
623 | /* FIXME!! need more doc */ | 624 | /* FIXME!! need more doc */ |
624 | static void ext4_mb_mark_free_simple(struct super_block *sb, | 625 | static void ext4_mb_mark_free_simple(struct super_block *sb, |
625 | void *buddy, unsigned first, int len, | 626 | void *buddy, ext4_grpblk_t first, ext4_grpblk_t len, |
626 | struct ext4_group_info *grp) | 627 | struct ext4_group_info *grp) |
627 | { | 628 | { |
628 | struct ext4_sb_info *sbi = EXT4_SB(sb); | 629 | struct ext4_sb_info *sbi = EXT4_SB(sb); |
629 | unsigned short min; | 630 | ext4_grpblk_t min; |
630 | unsigned short max; | 631 | ext4_grpblk_t max; |
631 | unsigned short chunk; | 632 | ext4_grpblk_t chunk; |
632 | unsigned short border; | 633 | unsigned short border; |
633 | 634 | ||
634 | BUG_ON(len > EXT4_BLOCKS_PER_GROUP(sb)); | 635 | BUG_ON(len > EXT4_BLOCKS_PER_GROUP(sb)); |
@@ -662,10 +663,10 @@ void ext4_mb_generate_buddy(struct super_block *sb, | |||
662 | void *buddy, void *bitmap, ext4_group_t group) | 663 | void *buddy, void *bitmap, ext4_group_t group) |
663 | { | 664 | { |
664 | struct ext4_group_info *grp = ext4_get_group_info(sb, group); | 665 | struct ext4_group_info *grp = ext4_get_group_info(sb, group); |
665 | unsigned short max = EXT4_BLOCKS_PER_GROUP(sb); | 666 | ext4_grpblk_t max = EXT4_BLOCKS_PER_GROUP(sb); |
666 | unsigned short i = 0; | 667 | ext4_grpblk_t i = 0; |
667 | unsigned short first; | 668 | ext4_grpblk_t first; |
668 | unsigned short len; | 669 | ext4_grpblk_t len; |
669 | unsigned free = 0; | 670 | unsigned free = 0; |
670 | unsigned fragments = 0; | 671 | unsigned fragments = 0; |
671 | unsigned long long period = get_cycles(); | 672 | unsigned long long period = get_cycles(); |
@@ -743,7 +744,7 @@ static int ext4_mb_init_cache(struct page *page, char *incore) | |||
743 | char *data; | 744 | char *data; |
744 | char *bitmap; | 745 | char *bitmap; |
745 | 746 | ||
746 | mb_debug("init page %lu\n", page->index); | 747 | mb_debug(1, "init page %lu\n", page->index); |
747 | 748 | ||
748 | inode = page->mapping->host; | 749 | inode = page->mapping->host; |
749 | sb = inode->i_sb; | 750 | sb = inode->i_sb; |
@@ -822,7 +823,7 @@ static int ext4_mb_init_cache(struct page *page, char *incore) | |||
822 | set_bitmap_uptodate(bh[i]); | 823 | set_bitmap_uptodate(bh[i]); |
823 | bh[i]->b_end_io = end_buffer_read_sync; | 824 | bh[i]->b_end_io = end_buffer_read_sync; |
824 | submit_bh(READ, bh[i]); | 825 | submit_bh(READ, bh[i]); |
825 | mb_debug("read bitmap for group %u\n", first_group + i); | 826 | mb_debug(1, "read bitmap for group %u\n", first_group + i); |
826 | } | 827 | } |
827 | 828 | ||
828 | /* wait for I/O completion */ | 829 | /* wait for I/O completion */ |
@@ -862,12 +863,13 @@ static int ext4_mb_init_cache(struct page *page, char *incore) | |||
862 | if ((first_block + i) & 1) { | 863 | if ((first_block + i) & 1) { |
863 | /* this is block of buddy */ | 864 | /* this is block of buddy */ |
864 | BUG_ON(incore == NULL); | 865 | BUG_ON(incore == NULL); |
865 | mb_debug("put buddy for group %u in page %lu/%x\n", | 866 | mb_debug(1, "put buddy for group %u in page %lu/%x\n", |
866 | group, page->index, i * blocksize); | 867 | group, page->index, i * blocksize); |
867 | grinfo = ext4_get_group_info(sb, group); | 868 | grinfo = ext4_get_group_info(sb, group); |
868 | grinfo->bb_fragments = 0; | 869 | grinfo->bb_fragments = 0; |
869 | memset(grinfo->bb_counters, 0, | 870 | memset(grinfo->bb_counters, 0, |
870 | sizeof(unsigned short)*(sb->s_blocksize_bits+2)); | 871 | sizeof(*grinfo->bb_counters) * |
872 | (sb->s_blocksize_bits+2)); | ||
871 | /* | 873 | /* |
872 | * incore got set to the group block bitmap below | 874 | * incore got set to the group block bitmap below |
873 | */ | 875 | */ |
@@ -878,7 +880,7 @@ static int ext4_mb_init_cache(struct page *page, char *incore) | |||
878 | } else { | 880 | } else { |
879 | /* this is block of bitmap */ | 881 | /* this is block of bitmap */ |
880 | BUG_ON(incore != NULL); | 882 | BUG_ON(incore != NULL); |
881 | mb_debug("put bitmap for group %u in page %lu/%x\n", | 883 | mb_debug(1, "put bitmap for group %u in page %lu/%x\n", |
882 | group, page->index, i * blocksize); | 884 | group, page->index, i * blocksize); |
883 | 885 | ||
884 | /* see comments in ext4_mb_put_pa() */ | 886 | /* see comments in ext4_mb_put_pa() */ |
@@ -908,6 +910,100 @@ out: | |||
908 | return err; | 910 | return err; |
909 | } | 911 | } |
910 | 912 | ||
913 | static noinline_for_stack | ||
914 | int ext4_mb_init_group(struct super_block *sb, ext4_group_t group) | ||
915 | { | ||
916 | |||
917 | int ret = 0; | ||
918 | void *bitmap; | ||
919 | int blocks_per_page; | ||
920 | int block, pnum, poff; | ||
921 | int num_grp_locked = 0; | ||
922 | struct ext4_group_info *this_grp; | ||
923 | struct ext4_sb_info *sbi = EXT4_SB(sb); | ||
924 | struct inode *inode = sbi->s_buddy_cache; | ||
925 | struct page *page = NULL, *bitmap_page = NULL; | ||
926 | |||
927 | mb_debug(1, "init group %u\n", group); | ||
928 | blocks_per_page = PAGE_CACHE_SIZE / sb->s_blocksize; | ||
929 | this_grp = ext4_get_group_info(sb, group); | ||
930 | /* | ||
931 | * This ensures that we don't reinit the buddy cache | ||
932 | * page which map to the group from which we are already | ||
933 | * allocating. If we are looking at the buddy cache we would | ||
934 | * have taken a reference using ext4_mb_load_buddy and that | ||
935 | * would have taken the alloc_sem lock. | ||
936 | */ | ||
937 | num_grp_locked = ext4_mb_get_buddy_cache_lock(sb, group); | ||
938 | if (!EXT4_MB_GRP_NEED_INIT(this_grp)) { | ||
939 | /* | ||
940 | * somebody initialized the group | ||
941 | * return without doing anything | ||
942 | */ | ||
943 | ret = 0; | ||
944 | goto err; | ||
945 | } | ||
946 | /* | ||
947 | * the buddy cache inode stores the block bitmap | ||
948 | * and buddy information in consecutive blocks. | ||
949 | * So for each group we need two blocks. | ||
950 | */ | ||
951 | block = group * 2; | ||
952 | pnum = block / blocks_per_page; | ||
953 | poff = block % blocks_per_page; | ||
954 | page = find_or_create_page(inode->i_mapping, pnum, GFP_NOFS); | ||
955 | if (page) { | ||
956 | BUG_ON(page->mapping != inode->i_mapping); | ||
957 | ret = ext4_mb_init_cache(page, NULL); | ||
958 | if (ret) { | ||
959 | unlock_page(page); | ||
960 | goto err; | ||
961 | } | ||
962 | unlock_page(page); | ||
963 | } | ||
964 | if (page == NULL || !PageUptodate(page)) { | ||
965 | ret = -EIO; | ||
966 | goto err; | ||
967 | } | ||
968 | mark_page_accessed(page); | ||
969 | bitmap_page = page; | ||
970 | bitmap = page_address(page) + (poff * sb->s_blocksize); | ||
971 | |||
972 | /* init buddy cache */ | ||
973 | block++; | ||
974 | pnum = block / blocks_per_page; | ||
975 | poff = block % blocks_per_page; | ||
976 | page = find_or_create_page(inode->i_mapping, pnum, GFP_NOFS); | ||
977 | if (page == bitmap_page) { | ||
978 | /* | ||
979 | * If both the bitmap and buddy are in | ||
980 | * the same page we don't need to force | ||
981 | * init the buddy | ||
982 | */ | ||
983 | unlock_page(page); | ||
984 | } else if (page) { | ||
985 | BUG_ON(page->mapping != inode->i_mapping); | ||
986 | ret = ext4_mb_init_cache(page, bitmap); | ||
987 | if (ret) { | ||
988 | unlock_page(page); | ||
989 | goto err; | ||
990 | } | ||
991 | unlock_page(page); | ||
992 | } | ||
993 | if (page == NULL || !PageUptodate(page)) { | ||
994 | ret = -EIO; | ||
995 | goto err; | ||
996 | } | ||
997 | mark_page_accessed(page); | ||
998 | err: | ||
999 | ext4_mb_put_buddy_cache_lock(sb, group, num_grp_locked); | ||
1000 | if (bitmap_page) | ||
1001 | page_cache_release(bitmap_page); | ||
1002 | if (page) | ||
1003 | page_cache_release(page); | ||
1004 | return ret; | ||
1005 | } | ||
1006 | |||
911 | static noinline_for_stack int | 1007 | static noinline_for_stack int |
912 | ext4_mb_load_buddy(struct super_block *sb, ext4_group_t group, | 1008 | ext4_mb_load_buddy(struct super_block *sb, ext4_group_t group, |
913 | struct ext4_buddy *e4b) | 1009 | struct ext4_buddy *e4b) |
@@ -922,7 +1018,7 @@ ext4_mb_load_buddy(struct super_block *sb, ext4_group_t group, | |||
922 | struct ext4_sb_info *sbi = EXT4_SB(sb); | 1018 | struct ext4_sb_info *sbi = EXT4_SB(sb); |
923 | struct inode *inode = sbi->s_buddy_cache; | 1019 | struct inode *inode = sbi->s_buddy_cache; |
924 | 1020 | ||
925 | mb_debug("load group %u\n", group); | 1021 | mb_debug(1, "load group %u\n", group); |
926 | 1022 | ||
927 | blocks_per_page = PAGE_CACHE_SIZE / sb->s_blocksize; | 1023 | blocks_per_page = PAGE_CACHE_SIZE / sb->s_blocksize; |
928 | grp = ext4_get_group_info(sb, group); | 1024 | grp = ext4_get_group_info(sb, group); |
@@ -941,8 +1037,26 @@ ext4_mb_load_buddy(struct super_block *sb, ext4_group_t group, | |||
941 | * groups mapped by the page is blocked | 1037 | * groups mapped by the page is blocked |
942 | * till we are done with allocation | 1038 | * till we are done with allocation |
943 | */ | 1039 | */ |
1040 | repeat_load_buddy: | ||
944 | down_read(e4b->alloc_semp); | 1041 | down_read(e4b->alloc_semp); |
945 | 1042 | ||
1043 | if (unlikely(EXT4_MB_GRP_NEED_INIT(grp))) { | ||
1044 | /* we need to check for group need init flag | ||
1045 | * with alloc_semp held so that we can be sure | ||
1046 | * that new blocks didn't get added to the group | ||
1047 | * when we are loading the buddy cache | ||
1048 | */ | ||
1049 | up_read(e4b->alloc_semp); | ||
1050 | /* | ||
1051 | * we need full data about the group | ||
1052 | * to make a good selection | ||
1053 | */ | ||
1054 | ret = ext4_mb_init_group(sb, group); | ||
1055 | if (ret) | ||
1056 | return ret; | ||
1057 | goto repeat_load_buddy; | ||
1058 | } | ||
1059 | |||
946 | /* | 1060 | /* |
947 | * the buddy cache inode stores the block bitmap | 1061 | * the buddy cache inode stores the block bitmap |
948 | * and buddy information in consecutive blocks. | 1062 | * and buddy information in consecutive blocks. |
@@ -1360,7 +1474,7 @@ static void ext4_mb_use_best_found(struct ext4_allocation_context *ac, | |||
1360 | ac->alloc_semp = e4b->alloc_semp; | 1474 | ac->alloc_semp = e4b->alloc_semp; |
1361 | e4b->alloc_semp = NULL; | 1475 | e4b->alloc_semp = NULL; |
1362 | /* store last allocated for subsequent stream allocation */ | 1476 | /* store last allocated for subsequent stream allocation */ |
1363 | if ((ac->ac_flags & EXT4_MB_HINT_DATA)) { | 1477 | if (ac->ac_flags & EXT4_MB_STREAM_ALLOC) { |
1364 | spin_lock(&sbi->s_md_lock); | 1478 | spin_lock(&sbi->s_md_lock); |
1365 | sbi->s_mb_last_group = ac->ac_f_ex.fe_group; | 1479 | sbi->s_mb_last_group = ac->ac_f_ex.fe_group; |
1366 | sbi->s_mb_last_start = ac->ac_f_ex.fe_start; | 1480 | sbi->s_mb_last_start = ac->ac_f_ex.fe_start; |
@@ -1837,97 +1951,6 @@ void ext4_mb_put_buddy_cache_lock(struct super_block *sb, | |||
1837 | 1951 | ||
1838 | } | 1952 | } |
1839 | 1953 | ||
1840 | static noinline_for_stack | ||
1841 | int ext4_mb_init_group(struct super_block *sb, ext4_group_t group) | ||
1842 | { | ||
1843 | |||
1844 | int ret; | ||
1845 | void *bitmap; | ||
1846 | int blocks_per_page; | ||
1847 | int block, pnum, poff; | ||
1848 | int num_grp_locked = 0; | ||
1849 | struct ext4_group_info *this_grp; | ||
1850 | struct ext4_sb_info *sbi = EXT4_SB(sb); | ||
1851 | struct inode *inode = sbi->s_buddy_cache; | ||
1852 | struct page *page = NULL, *bitmap_page = NULL; | ||
1853 | |||
1854 | mb_debug("init group %lu\n", group); | ||
1855 | blocks_per_page = PAGE_CACHE_SIZE / sb->s_blocksize; | ||
1856 | this_grp = ext4_get_group_info(sb, group); | ||
1857 | /* | ||
1858 | * This ensures we don't add group | ||
1859 | * to this buddy cache via resize | ||
1860 | */ | ||
1861 | num_grp_locked = ext4_mb_get_buddy_cache_lock(sb, group); | ||
1862 | if (!EXT4_MB_GRP_NEED_INIT(this_grp)) { | ||
1863 | /* | ||
1864 | * somebody initialized the group | ||
1865 | * return without doing anything | ||
1866 | */ | ||
1867 | ret = 0; | ||
1868 | goto err; | ||
1869 | } | ||
1870 | /* | ||
1871 | * the buddy cache inode stores the block bitmap | ||
1872 | * and buddy information in consecutive blocks. | ||
1873 | * So for each group we need two blocks. | ||
1874 | */ | ||
1875 | block = group * 2; | ||
1876 | pnum = block / blocks_per_page; | ||
1877 | poff = block % blocks_per_page; | ||
1878 | page = find_or_create_page(inode->i_mapping, pnum, GFP_NOFS); | ||
1879 | if (page) { | ||
1880 | BUG_ON(page->mapping != inode->i_mapping); | ||
1881 | ret = ext4_mb_init_cache(page, NULL); | ||
1882 | if (ret) { | ||
1883 | unlock_page(page); | ||
1884 | goto err; | ||
1885 | } | ||
1886 | unlock_page(page); | ||
1887 | } | ||
1888 | if (page == NULL || !PageUptodate(page)) { | ||
1889 | ret = -EIO; | ||
1890 | goto err; | ||
1891 | } | ||
1892 | mark_page_accessed(page); | ||
1893 | bitmap_page = page; | ||
1894 | bitmap = page_address(page) + (poff * sb->s_blocksize); | ||
1895 | |||
1896 | /* init buddy cache */ | ||
1897 | block++; | ||
1898 | pnum = block / blocks_per_page; | ||
1899 | poff = block % blocks_per_page; | ||
1900 | page = find_or_create_page(inode->i_mapping, pnum, GFP_NOFS); | ||
1901 | if (page == bitmap_page) { | ||
1902 | /* | ||
1903 | * If both the bitmap and buddy are in | ||
1904 | * the same page we don't need to force | ||
1905 | * init the buddy | ||
1906 | */ | ||
1907 | unlock_page(page); | ||
1908 | } else if (page) { | ||
1909 | BUG_ON(page->mapping != inode->i_mapping); | ||
1910 | ret = ext4_mb_init_cache(page, bitmap); | ||
1911 | if (ret) { | ||
1912 | unlock_page(page); | ||
1913 | goto err; | ||
1914 | } | ||
1915 | unlock_page(page); | ||
1916 | } | ||
1917 | if (page == NULL || !PageUptodate(page)) { | ||
1918 | ret = -EIO; | ||
1919 | goto err; | ||
1920 | } | ||
1921 | mark_page_accessed(page); | ||
1922 | err: | ||
1923 | ext4_mb_put_buddy_cache_lock(sb, group, num_grp_locked); | ||
1924 | if (bitmap_page) | ||
1925 | page_cache_release(bitmap_page); | ||
1926 | if (page) | ||
1927 | page_cache_release(page); | ||
1928 | return ret; | ||
1929 | } | ||
1930 | |||
1931 | static noinline_for_stack int | 1954 | static noinline_for_stack int |
1932 | ext4_mb_regular_allocator(struct ext4_allocation_context *ac) | 1955 | ext4_mb_regular_allocator(struct ext4_allocation_context *ac) |
1933 | { | 1956 | { |
@@ -1938,11 +1961,14 @@ ext4_mb_regular_allocator(struct ext4_allocation_context *ac) | |||
1938 | struct ext4_sb_info *sbi; | 1961 | struct ext4_sb_info *sbi; |
1939 | struct super_block *sb; | 1962 | struct super_block *sb; |
1940 | struct ext4_buddy e4b; | 1963 | struct ext4_buddy e4b; |
1941 | loff_t size, isize; | ||
1942 | 1964 | ||
1943 | sb = ac->ac_sb; | 1965 | sb = ac->ac_sb; |
1944 | sbi = EXT4_SB(sb); | 1966 | sbi = EXT4_SB(sb); |
1945 | ngroups = ext4_get_groups_count(sb); | 1967 | ngroups = ext4_get_groups_count(sb); |
1968 | /* non-extent files are limited to low blocks/groups */ | ||
1969 | if (!(EXT4_I(ac->ac_inode)->i_flags & EXT4_EXTENTS_FL)) | ||
1970 | ngroups = sbi->s_blockfile_groups; | ||
1971 | |||
1946 | BUG_ON(ac->ac_status == AC_STATUS_FOUND); | 1972 | BUG_ON(ac->ac_status == AC_STATUS_FOUND); |
1947 | 1973 | ||
1948 | /* first, try the goal */ | 1974 | /* first, try the goal */ |
@@ -1974,20 +2000,16 @@ ext4_mb_regular_allocator(struct ext4_allocation_context *ac) | |||
1974 | } | 2000 | } |
1975 | 2001 | ||
1976 | bsbits = ac->ac_sb->s_blocksize_bits; | 2002 | bsbits = ac->ac_sb->s_blocksize_bits; |
1977 | /* if stream allocation is enabled, use global goal */ | ||
1978 | size = ac->ac_o_ex.fe_logical + ac->ac_o_ex.fe_len; | ||
1979 | isize = i_size_read(ac->ac_inode) >> bsbits; | ||
1980 | if (size < isize) | ||
1981 | size = isize; | ||
1982 | 2003 | ||
1983 | if (size < sbi->s_mb_stream_request && | 2004 | /* if stream allocation is enabled, use global goal */ |
1984 | (ac->ac_flags & EXT4_MB_HINT_DATA)) { | 2005 | if (ac->ac_flags & EXT4_MB_STREAM_ALLOC) { |
1985 | /* TBD: may be hot point */ | 2006 | /* TBD: may be hot point */ |
1986 | spin_lock(&sbi->s_md_lock); | 2007 | spin_lock(&sbi->s_md_lock); |
1987 | ac->ac_g_ex.fe_group = sbi->s_mb_last_group; | 2008 | ac->ac_g_ex.fe_group = sbi->s_mb_last_group; |
1988 | ac->ac_g_ex.fe_start = sbi->s_mb_last_start; | 2009 | ac->ac_g_ex.fe_start = sbi->s_mb_last_start; |
1989 | spin_unlock(&sbi->s_md_lock); | 2010 | spin_unlock(&sbi->s_md_lock); |
1990 | } | 2011 | } |
2012 | |||
1991 | /* Let's just scan groups to find more-less suitable blocks */ | 2013 | /* Let's just scan groups to find more-less suitable blocks */ |
1992 | cr = ac->ac_2order ? 0 : 1; | 2014 | cr = ac->ac_2order ? 0 : 1; |
1993 | /* | 2015 | /* |
@@ -2015,27 +2037,6 @@ repeat: | |||
2015 | if (grp->bb_free == 0) | 2037 | if (grp->bb_free == 0) |
2016 | continue; | 2038 | continue; |
2017 | 2039 | ||
2018 | /* | ||
2019 | * if the group is already init we check whether it is | ||
2020 | * a good group and if not we don't load the buddy | ||
2021 | */ | ||
2022 | if (EXT4_MB_GRP_NEED_INIT(grp)) { | ||
2023 | /* | ||
2024 | * we need full data about the group | ||
2025 | * to make a good selection | ||
2026 | */ | ||
2027 | err = ext4_mb_init_group(sb, group); | ||
2028 | if (err) | ||
2029 | goto out; | ||
2030 | } | ||
2031 | |||
2032 | /* | ||
2033 | * If the particular group doesn't satisfy our | ||
2034 | * criteria we continue with the next group | ||
2035 | */ | ||
2036 | if (!ext4_mb_good_group(ac, group, cr)) | ||
2037 | continue; | ||
2038 | |||
2039 | err = ext4_mb_load_buddy(sb, group, &e4b); | 2040 | err = ext4_mb_load_buddy(sb, group, &e4b); |
2040 | if (err) | 2041 | if (err) |
2041 | goto out; | 2042 | goto out; |
@@ -2156,7 +2157,7 @@ static int ext4_mb_seq_history_show(struct seq_file *seq, void *v) | |||
2156 | 2157 | ||
2157 | if (v == SEQ_START_TOKEN) { | 2158 | if (v == SEQ_START_TOKEN) { |
2158 | seq_printf(seq, "%-5s %-8s %-23s %-23s %-23s %-5s " | 2159 | seq_printf(seq, "%-5s %-8s %-23s %-23s %-23s %-5s " |
2159 | "%-5s %-2s %-5s %-5s %-5s %-6s\n", | 2160 | "%-5s %-2s %-6s %-5s %-5s %-6s\n", |
2160 | "pid", "inode", "original", "goal", "result", "found", | 2161 | "pid", "inode", "original", "goal", "result", "found", |
2161 | "grps", "cr", "flags", "merge", "tail", "broken"); | 2162 | "grps", "cr", "flags", "merge", "tail", "broken"); |
2162 | return 0; | 2163 | return 0; |
@@ -2164,7 +2165,7 @@ static int ext4_mb_seq_history_show(struct seq_file *seq, void *v) | |||
2164 | 2165 | ||
2165 | if (hs->op == EXT4_MB_HISTORY_ALLOC) { | 2166 | if (hs->op == EXT4_MB_HISTORY_ALLOC) { |
2166 | fmt = "%-5u %-8u %-23s %-23s %-23s %-5u %-5u %-2u " | 2167 | fmt = "%-5u %-8u %-23s %-23s %-23s %-5u %-5u %-2u " |
2167 | "%-5u %-5s %-5u %-6u\n"; | 2168 | "0x%04x %-5s %-5u %-6u\n"; |
2168 | sprintf(buf2, "%u/%d/%u@%u", hs->result.fe_group, | 2169 | sprintf(buf2, "%u/%d/%u@%u", hs->result.fe_group, |
2169 | hs->result.fe_start, hs->result.fe_len, | 2170 | hs->result.fe_start, hs->result.fe_len, |
2170 | hs->result.fe_logical); | 2171 | hs->result.fe_logical); |
@@ -2205,7 +2206,7 @@ static void ext4_mb_seq_history_stop(struct seq_file *seq, void *v) | |||
2205 | { | 2206 | { |
2206 | } | 2207 | } |
2207 | 2208 | ||
2208 | static struct seq_operations ext4_mb_seq_history_ops = { | 2209 | static const struct seq_operations ext4_mb_seq_history_ops = { |
2209 | .start = ext4_mb_seq_history_start, | 2210 | .start = ext4_mb_seq_history_start, |
2210 | .next = ext4_mb_seq_history_next, | 2211 | .next = ext4_mb_seq_history_next, |
2211 | .stop = ext4_mb_seq_history_stop, | 2212 | .stop = ext4_mb_seq_history_stop, |
@@ -2287,7 +2288,7 @@ static ssize_t ext4_mb_seq_history_write(struct file *file, | |||
2287 | return count; | 2288 | return count; |
2288 | } | 2289 | } |
2289 | 2290 | ||
2290 | static struct file_operations ext4_mb_seq_history_fops = { | 2291 | static const struct file_operations ext4_mb_seq_history_fops = { |
2291 | .owner = THIS_MODULE, | 2292 | .owner = THIS_MODULE, |
2292 | .open = ext4_mb_seq_history_open, | 2293 | .open = ext4_mb_seq_history_open, |
2293 | .read = seq_read, | 2294 | .read = seq_read, |
@@ -2328,7 +2329,7 @@ static int ext4_mb_seq_groups_show(struct seq_file *seq, void *v) | |||
2328 | struct ext4_buddy e4b; | 2329 | struct ext4_buddy e4b; |
2329 | struct sg { | 2330 | struct sg { |
2330 | struct ext4_group_info info; | 2331 | struct ext4_group_info info; |
2331 | unsigned short counters[16]; | 2332 | ext4_grpblk_t counters[16]; |
2332 | } sg; | 2333 | } sg; |
2333 | 2334 | ||
2334 | group--; | 2335 | group--; |
@@ -2366,7 +2367,7 @@ static void ext4_mb_seq_groups_stop(struct seq_file *seq, void *v) | |||
2366 | { | 2367 | { |
2367 | } | 2368 | } |
2368 | 2369 | ||
2369 | static struct seq_operations ext4_mb_seq_groups_ops = { | 2370 | static const struct seq_operations ext4_mb_seq_groups_ops = { |
2370 | .start = ext4_mb_seq_groups_start, | 2371 | .start = ext4_mb_seq_groups_start, |
2371 | .next = ext4_mb_seq_groups_next, | 2372 | .next = ext4_mb_seq_groups_next, |
2372 | .stop = ext4_mb_seq_groups_stop, | 2373 | .stop = ext4_mb_seq_groups_stop, |
@@ -2387,7 +2388,7 @@ static int ext4_mb_seq_groups_open(struct inode *inode, struct file *file) | |||
2387 | 2388 | ||
2388 | } | 2389 | } |
2389 | 2390 | ||
2390 | static struct file_operations ext4_mb_seq_groups_fops = { | 2391 | static const struct file_operations ext4_mb_seq_groups_fops = { |
2391 | .owner = THIS_MODULE, | 2392 | .owner = THIS_MODULE, |
2392 | .open = ext4_mb_seq_groups_open, | 2393 | .open = ext4_mb_seq_groups_open, |
2393 | .read = seq_read, | 2394 | .read = seq_read, |
@@ -2532,7 +2533,7 @@ int ext4_mb_add_groupinfo(struct super_block *sb, ext4_group_t group, | |||
2532 | 2533 | ||
2533 | INIT_LIST_HEAD(&meta_group_info[i]->bb_prealloc_list); | 2534 | INIT_LIST_HEAD(&meta_group_info[i]->bb_prealloc_list); |
2534 | init_rwsem(&meta_group_info[i]->alloc_sem); | 2535 | init_rwsem(&meta_group_info[i]->alloc_sem); |
2535 | meta_group_info[i]->bb_free_root.rb_node = NULL;; | 2536 | meta_group_info[i]->bb_free_root.rb_node = NULL; |
2536 | 2537 | ||
2537 | #ifdef DOUBLE_CHECK | 2538 | #ifdef DOUBLE_CHECK |
2538 | { | 2539 | { |
@@ -2558,26 +2559,15 @@ exit_meta_group_info: | |||
2558 | return -ENOMEM; | 2559 | return -ENOMEM; |
2559 | } /* ext4_mb_add_groupinfo */ | 2560 | } /* ext4_mb_add_groupinfo */ |
2560 | 2561 | ||
2561 | /* | ||
2562 | * Update an existing group. | ||
2563 | * This function is used for online resize | ||
2564 | */ | ||
2565 | void ext4_mb_update_group_info(struct ext4_group_info *grp, ext4_grpblk_t add) | ||
2566 | { | ||
2567 | grp->bb_free += add; | ||
2568 | } | ||
2569 | |||
2570 | static int ext4_mb_init_backend(struct super_block *sb) | 2562 | static int ext4_mb_init_backend(struct super_block *sb) |
2571 | { | 2563 | { |
2572 | ext4_group_t ngroups = ext4_get_groups_count(sb); | 2564 | ext4_group_t ngroups = ext4_get_groups_count(sb); |
2573 | ext4_group_t i; | 2565 | ext4_group_t i; |
2574 | int metalen; | ||
2575 | struct ext4_sb_info *sbi = EXT4_SB(sb); | 2566 | struct ext4_sb_info *sbi = EXT4_SB(sb); |
2576 | struct ext4_super_block *es = sbi->s_es; | 2567 | struct ext4_super_block *es = sbi->s_es; |
2577 | int num_meta_group_infos; | 2568 | int num_meta_group_infos; |
2578 | int num_meta_group_infos_max; | 2569 | int num_meta_group_infos_max; |
2579 | int array_size; | 2570 | int array_size; |
2580 | struct ext4_group_info **meta_group_info; | ||
2581 | struct ext4_group_desc *desc; | 2571 | struct ext4_group_desc *desc; |
2582 | 2572 | ||
2583 | /* This is the number of blocks used by GDT */ | 2573 | /* This is the number of blocks used by GDT */ |
@@ -2622,22 +2612,6 @@ static int ext4_mb_init_backend(struct super_block *sb) | |||
2622 | goto err_freesgi; | 2612 | goto err_freesgi; |
2623 | } | 2613 | } |
2624 | EXT4_I(sbi->s_buddy_cache)->i_disksize = 0; | 2614 | EXT4_I(sbi->s_buddy_cache)->i_disksize = 0; |
2625 | |||
2626 | metalen = sizeof(*meta_group_info) << EXT4_DESC_PER_BLOCK_BITS(sb); | ||
2627 | for (i = 0; i < num_meta_group_infos; i++) { | ||
2628 | if ((i + 1) == num_meta_group_infos) | ||
2629 | metalen = sizeof(*meta_group_info) * | ||
2630 | (ngroups - | ||
2631 | (i << EXT4_DESC_PER_BLOCK_BITS(sb))); | ||
2632 | meta_group_info = kmalloc(metalen, GFP_KERNEL); | ||
2633 | if (meta_group_info == NULL) { | ||
2634 | printk(KERN_ERR "EXT4-fs: can't allocate mem for a " | ||
2635 | "buddy group\n"); | ||
2636 | goto err_freemeta; | ||
2637 | } | ||
2638 | sbi->s_group_info[i] = meta_group_info; | ||
2639 | } | ||
2640 | |||
2641 | for (i = 0; i < ngroups; i++) { | 2615 | for (i = 0; i < ngroups; i++) { |
2642 | desc = ext4_get_group_desc(sb, i, NULL); | 2616 | desc = ext4_get_group_desc(sb, i, NULL); |
2643 | if (desc == NULL) { | 2617 | if (desc == NULL) { |
@@ -2655,7 +2629,6 @@ err_freebuddy: | |||
2655 | while (i-- > 0) | 2629 | while (i-- > 0) |
2656 | kfree(ext4_get_group_info(sb, i)); | 2630 | kfree(ext4_get_group_info(sb, i)); |
2657 | i = num_meta_group_infos; | 2631 | i = num_meta_group_infos; |
2658 | err_freemeta: | ||
2659 | while (i-- > 0) | 2632 | while (i-- > 0) |
2660 | kfree(sbi->s_group_info[i]); | 2633 | kfree(sbi->s_group_info[i]); |
2661 | iput(sbi->s_buddy_cache); | 2634 | iput(sbi->s_buddy_cache); |
@@ -2672,14 +2645,14 @@ int ext4_mb_init(struct super_block *sb, int needs_recovery) | |||
2672 | unsigned max; | 2645 | unsigned max; |
2673 | int ret; | 2646 | int ret; |
2674 | 2647 | ||
2675 | i = (sb->s_blocksize_bits + 2) * sizeof(unsigned short); | 2648 | i = (sb->s_blocksize_bits + 2) * sizeof(*sbi->s_mb_offsets); |
2676 | 2649 | ||
2677 | sbi->s_mb_offsets = kmalloc(i, GFP_KERNEL); | 2650 | sbi->s_mb_offsets = kmalloc(i, GFP_KERNEL); |
2678 | if (sbi->s_mb_offsets == NULL) { | 2651 | if (sbi->s_mb_offsets == NULL) { |
2679 | return -ENOMEM; | 2652 | return -ENOMEM; |
2680 | } | 2653 | } |
2681 | 2654 | ||
2682 | i = (sb->s_blocksize_bits + 2) * sizeof(unsigned int); | 2655 | i = (sb->s_blocksize_bits + 2) * sizeof(*sbi->s_mb_maxs); |
2683 | sbi->s_mb_maxs = kmalloc(i, GFP_KERNEL); | 2656 | sbi->s_mb_maxs = kmalloc(i, GFP_KERNEL); |
2684 | if (sbi->s_mb_maxs == NULL) { | 2657 | if (sbi->s_mb_maxs == NULL) { |
2685 | kfree(sbi->s_mb_offsets); | 2658 | kfree(sbi->s_mb_offsets); |
@@ -2758,7 +2731,7 @@ static void ext4_mb_cleanup_pa(struct ext4_group_info *grp) | |||
2758 | kmem_cache_free(ext4_pspace_cachep, pa); | 2731 | kmem_cache_free(ext4_pspace_cachep, pa); |
2759 | } | 2732 | } |
2760 | if (count) | 2733 | if (count) |
2761 | mb_debug("mballoc: %u PAs left\n", count); | 2734 | mb_debug(1, "mballoc: %u PAs left\n", count); |
2762 | 2735 | ||
2763 | } | 2736 | } |
2764 | 2737 | ||
@@ -2839,7 +2812,7 @@ static void release_blocks_on_commit(journal_t *journal, transaction_t *txn) | |||
2839 | list_for_each_safe(l, ltmp, &txn->t_private_list) { | 2812 | list_for_each_safe(l, ltmp, &txn->t_private_list) { |
2840 | entry = list_entry(l, struct ext4_free_data, list); | 2813 | entry = list_entry(l, struct ext4_free_data, list); |
2841 | 2814 | ||
2842 | mb_debug("gonna free %u blocks in group %u (0x%p):", | 2815 | mb_debug(1, "gonna free %u blocks in group %u (0x%p):", |
2843 | entry->count, entry->group, entry); | 2816 | entry->count, entry->group, entry); |
2844 | 2817 | ||
2845 | err = ext4_mb_load_buddy(sb, entry->group, &e4b); | 2818 | err = ext4_mb_load_buddy(sb, entry->group, &e4b); |
@@ -2874,9 +2847,43 @@ static void release_blocks_on_commit(journal_t *journal, transaction_t *txn) | |||
2874 | ext4_mb_release_desc(&e4b); | 2847 | ext4_mb_release_desc(&e4b); |
2875 | } | 2848 | } |
2876 | 2849 | ||
2877 | mb_debug("freed %u blocks in %u structures\n", count, count2); | 2850 | mb_debug(1, "freed %u blocks in %u structures\n", count, count2); |
2851 | } | ||
2852 | |||
2853 | #ifdef CONFIG_EXT4_DEBUG | ||
2854 | u8 mb_enable_debug __read_mostly; | ||
2855 | |||
2856 | static struct dentry *debugfs_dir; | ||
2857 | static struct dentry *debugfs_debug; | ||
2858 | |||
2859 | static void __init ext4_create_debugfs_entry(void) | ||
2860 | { | ||
2861 | debugfs_dir = debugfs_create_dir("ext4", NULL); | ||
2862 | if (debugfs_dir) | ||
2863 | debugfs_debug = debugfs_create_u8("mballoc-debug", | ||
2864 | S_IRUGO | S_IWUSR, | ||
2865 | debugfs_dir, | ||
2866 | &mb_enable_debug); | ||
2867 | } | ||
2868 | |||
2869 | static void ext4_remove_debugfs_entry(void) | ||
2870 | { | ||
2871 | debugfs_remove(debugfs_debug); | ||
2872 | debugfs_remove(debugfs_dir); | ||
2878 | } | 2873 | } |
2879 | 2874 | ||
2875 | #else | ||
2876 | |||
2877 | static void __init ext4_create_debugfs_entry(void) | ||
2878 | { | ||
2879 | } | ||
2880 | |||
2881 | static void ext4_remove_debugfs_entry(void) | ||
2882 | { | ||
2883 | } | ||
2884 | |||
2885 | #endif | ||
2886 | |||
2880 | int __init init_ext4_mballoc(void) | 2887 | int __init init_ext4_mballoc(void) |
2881 | { | 2888 | { |
2882 | ext4_pspace_cachep = | 2889 | ext4_pspace_cachep = |
@@ -2904,6 +2911,7 @@ int __init init_ext4_mballoc(void) | |||
2904 | kmem_cache_destroy(ext4_ac_cachep); | 2911 | kmem_cache_destroy(ext4_ac_cachep); |
2905 | return -ENOMEM; | 2912 | return -ENOMEM; |
2906 | } | 2913 | } |
2914 | ext4_create_debugfs_entry(); | ||
2907 | return 0; | 2915 | return 0; |
2908 | } | 2916 | } |
2909 | 2917 | ||
@@ -2917,6 +2925,7 @@ void exit_ext4_mballoc(void) | |||
2917 | kmem_cache_destroy(ext4_pspace_cachep); | 2925 | kmem_cache_destroy(ext4_pspace_cachep); |
2918 | kmem_cache_destroy(ext4_ac_cachep); | 2926 | kmem_cache_destroy(ext4_ac_cachep); |
2919 | kmem_cache_destroy(ext4_free_ext_cachep); | 2927 | kmem_cache_destroy(ext4_free_ext_cachep); |
2928 | ext4_remove_debugfs_entry(); | ||
2920 | } | 2929 | } |
2921 | 2930 | ||
2922 | 2931 | ||
@@ -3061,7 +3070,7 @@ static void ext4_mb_normalize_group_request(struct ext4_allocation_context *ac) | |||
3061 | ac->ac_g_ex.fe_len = EXT4_SB(sb)->s_stripe; | 3070 | ac->ac_g_ex.fe_len = EXT4_SB(sb)->s_stripe; |
3062 | else | 3071 | else |
3063 | ac->ac_g_ex.fe_len = EXT4_SB(sb)->s_mb_group_prealloc; | 3072 | ac->ac_g_ex.fe_len = EXT4_SB(sb)->s_mb_group_prealloc; |
3064 | mb_debug("#%u: goal %u blocks for locality group\n", | 3073 | mb_debug(1, "#%u: goal %u blocks for locality group\n", |
3065 | current->pid, ac->ac_g_ex.fe_len); | 3074 | current->pid, ac->ac_g_ex.fe_len); |
3066 | } | 3075 | } |
3067 | 3076 | ||
@@ -3180,23 +3189,18 @@ ext4_mb_normalize_request(struct ext4_allocation_context *ac, | |||
3180 | BUG_ON(!(ac->ac_o_ex.fe_logical >= pa_end || | 3189 | BUG_ON(!(ac->ac_o_ex.fe_logical >= pa_end || |
3181 | ac->ac_o_ex.fe_logical < pa->pa_lstart)); | 3190 | ac->ac_o_ex.fe_logical < pa->pa_lstart)); |
3182 | 3191 | ||
3183 | /* skip PA normalized request doesn't overlap with */ | 3192 | /* skip PAs this normalized request doesn't overlap with */ |
3184 | if (pa->pa_lstart >= end) { | 3193 | if (pa->pa_lstart >= end || pa_end <= start) { |
3185 | spin_unlock(&pa->pa_lock); | ||
3186 | continue; | ||
3187 | } | ||
3188 | if (pa_end <= start) { | ||
3189 | spin_unlock(&pa->pa_lock); | 3194 | spin_unlock(&pa->pa_lock); |
3190 | continue; | 3195 | continue; |
3191 | } | 3196 | } |
3192 | BUG_ON(pa->pa_lstart <= start && pa_end >= end); | 3197 | BUG_ON(pa->pa_lstart <= start && pa_end >= end); |
3193 | 3198 | ||
3199 | /* adjust start or end to be adjacent to this pa */ | ||
3194 | if (pa_end <= ac->ac_o_ex.fe_logical) { | 3200 | if (pa_end <= ac->ac_o_ex.fe_logical) { |
3195 | BUG_ON(pa_end < start); | 3201 | BUG_ON(pa_end < start); |
3196 | start = pa_end; | 3202 | start = pa_end; |
3197 | } | 3203 | } else if (pa->pa_lstart > ac->ac_o_ex.fe_logical) { |
3198 | |||
3199 | if (pa->pa_lstart > ac->ac_o_ex.fe_logical) { | ||
3200 | BUG_ON(pa->pa_lstart > end); | 3204 | BUG_ON(pa->pa_lstart > end); |
3201 | end = pa->pa_lstart; | 3205 | end = pa->pa_lstart; |
3202 | } | 3206 | } |
@@ -3251,7 +3255,7 @@ ext4_mb_normalize_request(struct ext4_allocation_context *ac, | |||
3251 | ac->ac_flags |= EXT4_MB_HINT_TRY_GOAL; | 3255 | ac->ac_flags |= EXT4_MB_HINT_TRY_GOAL; |
3252 | } | 3256 | } |
3253 | 3257 | ||
3254 | mb_debug("goal: %u(was %u) blocks at %u\n", (unsigned) size, | 3258 | mb_debug(1, "goal: %u(was %u) blocks at %u\n", (unsigned) size, |
3255 | (unsigned) orig_size, (unsigned) start); | 3259 | (unsigned) orig_size, (unsigned) start); |
3256 | } | 3260 | } |
3257 | 3261 | ||
@@ -3300,7 +3304,7 @@ static void ext4_mb_use_inode_pa(struct ext4_allocation_context *ac, | |||
3300 | BUG_ON(pa->pa_free < len); | 3304 | BUG_ON(pa->pa_free < len); |
3301 | pa->pa_free -= len; | 3305 | pa->pa_free -= len; |
3302 | 3306 | ||
3303 | mb_debug("use %llu/%u from inode pa %p\n", start, len, pa); | 3307 | mb_debug(1, "use %llu/%u from inode pa %p\n", start, len, pa); |
3304 | } | 3308 | } |
3305 | 3309 | ||
3306 | /* | 3310 | /* |
@@ -3324,7 +3328,7 @@ static void ext4_mb_use_group_pa(struct ext4_allocation_context *ac, | |||
3324 | * in on-disk bitmap -- see ext4_mb_release_context() | 3328 | * in on-disk bitmap -- see ext4_mb_release_context() |
3325 | * Other CPUs are prevented from allocating from this pa by lg_mutex | 3329 | * Other CPUs are prevented from allocating from this pa by lg_mutex |
3326 | */ | 3330 | */ |
3327 | mb_debug("use %u/%u from group pa %p\n", pa->pa_lstart-len, len, pa); | 3331 | mb_debug(1, "use %u/%u from group pa %p\n", pa->pa_lstart-len, len, pa); |
3328 | } | 3332 | } |
3329 | 3333 | ||
3330 | /* | 3334 | /* |
@@ -3382,6 +3386,11 @@ ext4_mb_use_preallocated(struct ext4_allocation_context *ac) | |||
3382 | ac->ac_o_ex.fe_logical >= pa->pa_lstart + pa->pa_len) | 3386 | ac->ac_o_ex.fe_logical >= pa->pa_lstart + pa->pa_len) |
3383 | continue; | 3387 | continue; |
3384 | 3388 | ||
3389 | /* non-extent files can't have physical blocks past 2^32 */ | ||
3390 | if (!(EXT4_I(ac->ac_inode)->i_flags & EXT4_EXTENTS_FL) && | ||
3391 | pa->pa_pstart + pa->pa_len > EXT4_MAX_BLOCK_FILE_PHYS) | ||
3392 | continue; | ||
3393 | |||
3385 | /* found preallocated blocks, use them */ | 3394 | /* found preallocated blocks, use them */ |
3386 | spin_lock(&pa->pa_lock); | 3395 | spin_lock(&pa->pa_lock); |
3387 | if (pa->pa_deleted == 0 && pa->pa_free) { | 3396 | if (pa->pa_deleted == 0 && pa->pa_free) { |
@@ -3503,7 +3512,7 @@ void ext4_mb_generate_from_pa(struct super_block *sb, void *bitmap, | |||
3503 | preallocated += len; | 3512 | preallocated += len; |
3504 | count++; | 3513 | count++; |
3505 | } | 3514 | } |
3506 | mb_debug("prellocated %u for group %u\n", preallocated, group); | 3515 | mb_debug(1, "prellocated %u for group %u\n", preallocated, group); |
3507 | } | 3516 | } |
3508 | 3517 | ||
3509 | static void ext4_mb_pa_callback(struct rcu_head *head) | 3518 | static void ext4_mb_pa_callback(struct rcu_head *head) |
@@ -3638,7 +3647,7 @@ ext4_mb_new_inode_pa(struct ext4_allocation_context *ac) | |||
3638 | pa->pa_deleted = 0; | 3647 | pa->pa_deleted = 0; |
3639 | pa->pa_type = MB_INODE_PA; | 3648 | pa->pa_type = MB_INODE_PA; |
3640 | 3649 | ||
3641 | mb_debug("new inode pa %p: %llu/%u for %u\n", pa, | 3650 | mb_debug(1, "new inode pa %p: %llu/%u for %u\n", pa, |
3642 | pa->pa_pstart, pa->pa_len, pa->pa_lstart); | 3651 | pa->pa_pstart, pa->pa_len, pa->pa_lstart); |
3643 | trace_ext4_mb_new_inode_pa(ac, pa); | 3652 | trace_ext4_mb_new_inode_pa(ac, pa); |
3644 | 3653 | ||
@@ -3698,7 +3707,7 @@ ext4_mb_new_group_pa(struct ext4_allocation_context *ac) | |||
3698 | pa->pa_deleted = 0; | 3707 | pa->pa_deleted = 0; |
3699 | pa->pa_type = MB_GROUP_PA; | 3708 | pa->pa_type = MB_GROUP_PA; |
3700 | 3709 | ||
3701 | mb_debug("new group pa %p: %llu/%u for %u\n", pa, | 3710 | mb_debug(1, "new group pa %p: %llu/%u for %u\n", pa, |
3702 | pa->pa_pstart, pa->pa_len, pa->pa_lstart); | 3711 | pa->pa_pstart, pa->pa_len, pa->pa_lstart); |
3703 | trace_ext4_mb_new_group_pa(ac, pa); | 3712 | trace_ext4_mb_new_group_pa(ac, pa); |
3704 | 3713 | ||
@@ -3777,7 +3786,7 @@ ext4_mb_release_inode_pa(struct ext4_buddy *e4b, struct buffer_head *bitmap_bh, | |||
3777 | next = mb_find_next_bit(bitmap_bh->b_data, end, bit); | 3786 | next = mb_find_next_bit(bitmap_bh->b_data, end, bit); |
3778 | start = group * EXT4_BLOCKS_PER_GROUP(sb) + bit + | 3787 | start = group * EXT4_BLOCKS_PER_GROUP(sb) + bit + |
3779 | le32_to_cpu(sbi->s_es->s_first_data_block); | 3788 | le32_to_cpu(sbi->s_es->s_first_data_block); |
3780 | mb_debug(" free preallocated %u/%u in group %u\n", | 3789 | mb_debug(1, " free preallocated %u/%u in group %u\n", |
3781 | (unsigned) start, (unsigned) next - bit, | 3790 | (unsigned) start, (unsigned) next - bit, |
3782 | (unsigned) group); | 3791 | (unsigned) group); |
3783 | free += next - bit; | 3792 | free += next - bit; |
@@ -3868,7 +3877,7 @@ ext4_mb_discard_group_preallocations(struct super_block *sb, | |||
3868 | int busy = 0; | 3877 | int busy = 0; |
3869 | int free = 0; | 3878 | int free = 0; |
3870 | 3879 | ||
3871 | mb_debug("discard preallocation for group %u\n", group); | 3880 | mb_debug(1, "discard preallocation for group %u\n", group); |
3872 | 3881 | ||
3873 | if (list_empty(&grp->bb_prealloc_list)) | 3882 | if (list_empty(&grp->bb_prealloc_list)) |
3874 | return 0; | 3883 | return 0; |
@@ -3992,7 +4001,7 @@ void ext4_discard_preallocations(struct inode *inode) | |||
3992 | return; | 4001 | return; |
3993 | } | 4002 | } |
3994 | 4003 | ||
3995 | mb_debug("discard preallocation for inode %lu\n", inode->i_ino); | 4004 | mb_debug(1, "discard preallocation for inode %lu\n", inode->i_ino); |
3996 | trace_ext4_discard_preallocations(inode); | 4005 | trace_ext4_discard_preallocations(inode); |
3997 | 4006 | ||
3998 | INIT_LIST_HEAD(&list); | 4007 | INIT_LIST_HEAD(&list); |
@@ -4097,7 +4106,7 @@ static void ext4_mb_return_to_preallocation(struct inode *inode, | |||
4097 | { | 4106 | { |
4098 | BUG_ON(!list_empty(&EXT4_I(inode)->i_prealloc_list)); | 4107 | BUG_ON(!list_empty(&EXT4_I(inode)->i_prealloc_list)); |
4099 | } | 4108 | } |
4100 | #ifdef MB_DEBUG | 4109 | #ifdef CONFIG_EXT4_DEBUG |
4101 | static void ext4_mb_show_ac(struct ext4_allocation_context *ac) | 4110 | static void ext4_mb_show_ac(struct ext4_allocation_context *ac) |
4102 | { | 4111 | { |
4103 | struct super_block *sb = ac->ac_sb; | 4112 | struct super_block *sb = ac->ac_sb; |
@@ -4139,14 +4148,14 @@ static void ext4_mb_show_ac(struct ext4_allocation_context *ac) | |||
4139 | ext4_get_group_no_and_offset(sb, pa->pa_pstart, | 4148 | ext4_get_group_no_and_offset(sb, pa->pa_pstart, |
4140 | NULL, &start); | 4149 | NULL, &start); |
4141 | spin_unlock(&pa->pa_lock); | 4150 | spin_unlock(&pa->pa_lock); |
4142 | printk(KERN_ERR "PA:%lu:%d:%u \n", i, | 4151 | printk(KERN_ERR "PA:%u:%d:%u \n", i, |
4143 | start, pa->pa_len); | 4152 | start, pa->pa_len); |
4144 | } | 4153 | } |
4145 | ext4_unlock_group(sb, i); | 4154 | ext4_unlock_group(sb, i); |
4146 | 4155 | ||
4147 | if (grp->bb_free == 0) | 4156 | if (grp->bb_free == 0) |
4148 | continue; | 4157 | continue; |
4149 | printk(KERN_ERR "%lu: %d/%d \n", | 4158 | printk(KERN_ERR "%u: %d/%d \n", |
4150 | i, grp->bb_free, grp->bb_fragments); | 4159 | i, grp->bb_free, grp->bb_fragments); |
4151 | } | 4160 | } |
4152 | printk(KERN_ERR "\n"); | 4161 | printk(KERN_ERR "\n"); |
@@ -4174,16 +4183,26 @@ static void ext4_mb_group_or_file(struct ext4_allocation_context *ac) | |||
4174 | if (!(ac->ac_flags & EXT4_MB_HINT_DATA)) | 4183 | if (!(ac->ac_flags & EXT4_MB_HINT_DATA)) |
4175 | return; | 4184 | return; |
4176 | 4185 | ||
4186 | if (unlikely(ac->ac_flags & EXT4_MB_HINT_GOAL_ONLY)) | ||
4187 | return; | ||
4188 | |||
4177 | size = ac->ac_o_ex.fe_logical + ac->ac_o_ex.fe_len; | 4189 | size = ac->ac_o_ex.fe_logical + ac->ac_o_ex.fe_len; |
4178 | isize = i_size_read(ac->ac_inode) >> bsbits; | 4190 | isize = (i_size_read(ac->ac_inode) + ac->ac_sb->s_blocksize - 1) |
4191 | >> bsbits; | ||
4179 | size = max(size, isize); | 4192 | size = max(size, isize); |
4180 | 4193 | ||
4181 | /* don't use group allocation for large files */ | 4194 | if ((size == isize) && |
4182 | if (size >= sbi->s_mb_stream_request) | 4195 | !ext4_fs_is_busy(sbi) && |
4196 | (atomic_read(&ac->ac_inode->i_writecount) == 0)) { | ||
4197 | ac->ac_flags |= EXT4_MB_HINT_NOPREALLOC; | ||
4183 | return; | 4198 | return; |
4199 | } | ||
4184 | 4200 | ||
4185 | if (unlikely(ac->ac_flags & EXT4_MB_HINT_GOAL_ONLY)) | 4201 | /* don't use group allocation for large files */ |
4202 | if (size >= sbi->s_mb_stream_request) { | ||
4203 | ac->ac_flags |= EXT4_MB_STREAM_ALLOC; | ||
4186 | return; | 4204 | return; |
4205 | } | ||
4187 | 4206 | ||
4188 | BUG_ON(ac->ac_lg != NULL); | 4207 | BUG_ON(ac->ac_lg != NULL); |
4189 | /* | 4208 | /* |
@@ -4246,7 +4265,7 @@ ext4_mb_initialize_context(struct ext4_allocation_context *ac, | |||
4246 | * locality group. this is a policy, actually */ | 4265 | * locality group. this is a policy, actually */ |
4247 | ext4_mb_group_or_file(ac); | 4266 | ext4_mb_group_or_file(ac); |
4248 | 4267 | ||
4249 | mb_debug("init ac: %u blocks @ %u, goal %u, flags %x, 2^%d, " | 4268 | mb_debug(1, "init ac: %u blocks @ %u, goal %u, flags %x, 2^%d, " |
4250 | "left: %u/%u, right %u/%u to %swritable\n", | 4269 | "left: %u/%u, right %u/%u to %swritable\n", |
4251 | (unsigned) ar->len, (unsigned) ar->logical, | 4270 | (unsigned) ar->len, (unsigned) ar->logical, |
4252 | (unsigned) ar->goal, ac->ac_flags, ac->ac_2order, | 4271 | (unsigned) ar->goal, ac->ac_flags, ac->ac_2order, |
@@ -4268,7 +4287,7 @@ ext4_mb_discard_lg_preallocations(struct super_block *sb, | |||
4268 | struct ext4_prealloc_space *pa, *tmp; | 4287 | struct ext4_prealloc_space *pa, *tmp; |
4269 | struct ext4_allocation_context *ac; | 4288 | struct ext4_allocation_context *ac; |
4270 | 4289 | ||
4271 | mb_debug("discard locality group preallocation\n"); | 4290 | mb_debug(1, "discard locality group preallocation\n"); |
4272 | 4291 | ||
4273 | INIT_LIST_HEAD(&discard_list); | 4292 | INIT_LIST_HEAD(&discard_list); |
4274 | ac = kmem_cache_alloc(ext4_ac_cachep, GFP_NOFS); | 4293 | ac = kmem_cache_alloc(ext4_ac_cachep, GFP_NOFS); |
diff --git a/fs/ext4/mballoc.h b/fs/ext4/mballoc.h index c96bb19f58f9..188d3d709b24 100644 --- a/fs/ext4/mballoc.h +++ b/fs/ext4/mballoc.h | |||
@@ -37,11 +37,19 @@ | |||
37 | 37 | ||
38 | /* | 38 | /* |
39 | */ | 39 | */ |
40 | #define MB_DEBUG__ | 40 | #ifdef CONFIG_EXT4_DEBUG |
41 | #ifdef MB_DEBUG | 41 | extern u8 mb_enable_debug; |
42 | #define mb_debug(fmt, a...) printk(fmt, ##a) | 42 | |
43 | #define mb_debug(n, fmt, a...) \ | ||
44 | do { \ | ||
45 | if ((n) <= mb_enable_debug) { \ | ||
46 | printk(KERN_DEBUG "(%s, %d): %s: ", \ | ||
47 | __FILE__, __LINE__, __func__); \ | ||
48 | printk(fmt, ## a); \ | ||
49 | } \ | ||
50 | } while (0) | ||
43 | #else | 51 | #else |
44 | #define mb_debug(fmt, a...) | 52 | #define mb_debug(n, fmt, a...) |
45 | #endif | 53 | #endif |
46 | 54 | ||
47 | /* | 55 | /* |
@@ -128,8 +136,8 @@ struct ext4_prealloc_space { | |||
128 | unsigned pa_deleted; | 136 | unsigned pa_deleted; |
129 | ext4_fsblk_t pa_pstart; /* phys. block */ | 137 | ext4_fsblk_t pa_pstart; /* phys. block */ |
130 | ext4_lblk_t pa_lstart; /* log. block */ | 138 | ext4_lblk_t pa_lstart; /* log. block */ |
131 | unsigned short pa_len; /* len of preallocated chunk */ | 139 | ext4_grpblk_t pa_len; /* len of preallocated chunk */ |
132 | unsigned short pa_free; /* how many blocks are free */ | 140 | ext4_grpblk_t pa_free; /* how many blocks are free */ |
133 | unsigned short pa_type; /* pa type. inode or group */ | 141 | unsigned short pa_type; /* pa type. inode or group */ |
134 | spinlock_t *pa_obj_lock; | 142 | spinlock_t *pa_obj_lock; |
135 | struct inode *pa_inode; /* hack, for history only */ | 143 | struct inode *pa_inode; /* hack, for history only */ |
@@ -144,7 +152,7 @@ struct ext4_free_extent { | |||
144 | ext4_lblk_t fe_logical; | 152 | ext4_lblk_t fe_logical; |
145 | ext4_grpblk_t fe_start; | 153 | ext4_grpblk_t fe_start; |
146 | ext4_group_t fe_group; | 154 | ext4_group_t fe_group; |
147 | int fe_len; | 155 | ext4_grpblk_t fe_len; |
148 | }; | 156 | }; |
149 | 157 | ||
150 | /* | 158 | /* |
diff --git a/fs/ext4/migrate.c b/fs/ext4/migrate.c index 313a50b39741..bf519f239ae6 100644 --- a/fs/ext4/migrate.c +++ b/fs/ext4/migrate.c | |||
@@ -353,17 +353,16 @@ static int ext4_ext_swap_inode_data(handle_t *handle, struct inode *inode, | |||
353 | 353 | ||
354 | down_write(&EXT4_I(inode)->i_data_sem); | 354 | down_write(&EXT4_I(inode)->i_data_sem); |
355 | /* | 355 | /* |
356 | * if EXT4_EXT_MIGRATE is cleared a block allocation | 356 | * if EXT4_STATE_EXT_MIGRATE is cleared a block allocation |
357 | * happened after we started the migrate. We need to | 357 | * happened after we started the migrate. We need to |
358 | * fail the migrate | 358 | * fail the migrate |
359 | */ | 359 | */ |
360 | if (!(EXT4_I(inode)->i_flags & EXT4_EXT_MIGRATE)) { | 360 | if (!(EXT4_I(inode)->i_state & EXT4_STATE_EXT_MIGRATE)) { |
361 | retval = -EAGAIN; | 361 | retval = -EAGAIN; |
362 | up_write(&EXT4_I(inode)->i_data_sem); | 362 | up_write(&EXT4_I(inode)->i_data_sem); |
363 | goto err_out; | 363 | goto err_out; |
364 | } else | 364 | } else |
365 | EXT4_I(inode)->i_flags = EXT4_I(inode)->i_flags & | 365 | EXT4_I(inode)->i_state &= ~EXT4_STATE_EXT_MIGRATE; |
366 | ~EXT4_EXT_MIGRATE; | ||
367 | /* | 366 | /* |
368 | * We have the extent map build with the tmp inode. | 367 | * We have the extent map build with the tmp inode. |
369 | * Now copy the i_data across | 368 | * Now copy the i_data across |
@@ -517,14 +516,15 @@ int ext4_ext_migrate(struct inode *inode) | |||
517 | * when we add extents we extent the journal | 516 | * when we add extents we extent the journal |
518 | */ | 517 | */ |
519 | /* | 518 | /* |
520 | * Even though we take i_mutex we can still cause block allocation | 519 | * Even though we take i_mutex we can still cause block |
521 | * via mmap write to holes. If we have allocated new blocks we fail | 520 | * allocation via mmap write to holes. If we have allocated |
522 | * migrate. New block allocation will clear EXT4_EXT_MIGRATE flag. | 521 | * new blocks we fail migrate. New block allocation will |
523 | * The flag is updated with i_data_sem held to prevent racing with | 522 | * clear EXT4_STATE_EXT_MIGRATE flag. The flag is updated |
524 | * block allocation. | 523 | * with i_data_sem held to prevent racing with block |
524 | * allocation. | ||
525 | */ | 525 | */ |
526 | down_read((&EXT4_I(inode)->i_data_sem)); | 526 | down_read((&EXT4_I(inode)->i_data_sem)); |
527 | EXT4_I(inode)->i_flags = EXT4_I(inode)->i_flags | EXT4_EXT_MIGRATE; | 527 | EXT4_I(inode)->i_state |= EXT4_STATE_EXT_MIGRATE; |
528 | up_read((&EXT4_I(inode)->i_data_sem)); | 528 | up_read((&EXT4_I(inode)->i_data_sem)); |
529 | 529 | ||
530 | handle = ext4_journal_start(inode, 1); | 530 | handle = ext4_journal_start(inode, 1); |
@@ -618,7 +618,7 @@ err_out: | |||
618 | tmp_inode->i_nlink = 0; | 618 | tmp_inode->i_nlink = 0; |
619 | 619 | ||
620 | ext4_journal_stop(handle); | 620 | ext4_journal_stop(handle); |
621 | 621 | unlock_new_inode(tmp_inode); | |
622 | iput(tmp_inode); | 622 | iput(tmp_inode); |
623 | 623 | ||
624 | return retval; | 624 | return retval; |
diff --git a/fs/ext4/move_extent.c b/fs/ext4/move_extent.c index bbf2dd9404dc..c07a2915e40b 100644 --- a/fs/ext4/move_extent.c +++ b/fs/ext4/move_extent.c | |||
@@ -19,14 +19,31 @@ | |||
19 | #include "ext4_extents.h" | 19 | #include "ext4_extents.h" |
20 | #include "ext4.h" | 20 | #include "ext4.h" |
21 | 21 | ||
22 | #define get_ext_path(path, inode, block, ret) \ | 22 | /** |
23 | do { \ | 23 | * get_ext_path - Find an extent path for designated logical block number. |
24 | path = ext4_ext_find_extent(inode, block, path); \ | 24 | * |
25 | if (IS_ERR(path)) { \ | 25 | * @inode: an inode which is searched |
26 | ret = PTR_ERR(path); \ | 26 | * @lblock: logical block number to find an extent path |
27 | path = NULL; \ | 27 | * @path: pointer to an extent path pointer (for output) |
28 | } \ | 28 | * |
29 | } while (0) | 29 | * ext4_ext_find_extent wrapper. Return 0 on success, or a negative error value |
30 | * on failure. | ||
31 | */ | ||
32 | static inline int | ||
33 | get_ext_path(struct inode *inode, ext4_lblk_t lblock, | ||
34 | struct ext4_ext_path **path) | ||
35 | { | ||
36 | int ret = 0; | ||
37 | |||
38 | *path = ext4_ext_find_extent(inode, lblock, *path); | ||
39 | if (IS_ERR(*path)) { | ||
40 | ret = PTR_ERR(*path); | ||
41 | *path = NULL; | ||
42 | } else if ((*path)[ext_depth(inode)].p_ext == NULL) | ||
43 | ret = -ENODATA; | ||
44 | |||
45 | return ret; | ||
46 | } | ||
30 | 47 | ||
31 | /** | 48 | /** |
32 | * copy_extent_status - Copy the extent's initialization status | 49 | * copy_extent_status - Copy the extent's initialization status |
@@ -113,6 +130,31 @@ mext_next_extent(struct inode *inode, struct ext4_ext_path *path, | |||
113 | } | 130 | } |
114 | 131 | ||
115 | /** | 132 | /** |
133 | * mext_check_null_inode - NULL check for two inodes | ||
134 | * | ||
135 | * If inode1 or inode2 is NULL, return -EIO. Otherwise, return 0. | ||
136 | */ | ||
137 | static int | ||
138 | mext_check_null_inode(struct inode *inode1, struct inode *inode2, | ||
139 | const char *function) | ||
140 | { | ||
141 | int ret = 0; | ||
142 | |||
143 | if (inode1 == NULL) { | ||
144 | ext4_error(inode2->i_sb, function, | ||
145 | "Both inodes should not be NULL: " | ||
146 | "inode1 NULL inode2 %lu", inode2->i_ino); | ||
147 | ret = -EIO; | ||
148 | } else if (inode2 == NULL) { | ||
149 | ext4_error(inode1->i_sb, function, | ||
150 | "Both inodes should not be NULL: " | ||
151 | "inode1 %lu inode2 NULL", inode1->i_ino); | ||
152 | ret = -EIO; | ||
153 | } | ||
154 | return ret; | ||
155 | } | ||
156 | |||
157 | /** | ||
116 | * mext_double_down_read - Acquire two inodes' read semaphore | 158 | * mext_double_down_read - Acquire two inodes' read semaphore |
117 | * | 159 | * |
118 | * @orig_inode: original inode structure | 160 | * @orig_inode: original inode structure |
@@ -124,8 +166,6 @@ mext_double_down_read(struct inode *orig_inode, struct inode *donor_inode) | |||
124 | { | 166 | { |
125 | struct inode *first = orig_inode, *second = donor_inode; | 167 | struct inode *first = orig_inode, *second = donor_inode; |
126 | 168 | ||
127 | BUG_ON(orig_inode == NULL || donor_inode == NULL); | ||
128 | |||
129 | /* | 169 | /* |
130 | * Use the inode number to provide the stable locking order instead | 170 | * Use the inode number to provide the stable locking order instead |
131 | * of its address, because the C language doesn't guarantee you can | 171 | * of its address, because the C language doesn't guarantee you can |
@@ -152,8 +192,6 @@ mext_double_down_write(struct inode *orig_inode, struct inode *donor_inode) | |||
152 | { | 192 | { |
153 | struct inode *first = orig_inode, *second = donor_inode; | 193 | struct inode *first = orig_inode, *second = donor_inode; |
154 | 194 | ||
155 | BUG_ON(orig_inode == NULL || donor_inode == NULL); | ||
156 | |||
157 | /* | 195 | /* |
158 | * Use the inode number to provide the stable locking order instead | 196 | * Use the inode number to provide the stable locking order instead |
159 | * of its address, because the C language doesn't guarantee you can | 197 | * of its address, because the C language doesn't guarantee you can |
@@ -178,8 +216,6 @@ mext_double_down_write(struct inode *orig_inode, struct inode *donor_inode) | |||
178 | static void | 216 | static void |
179 | mext_double_up_read(struct inode *orig_inode, struct inode *donor_inode) | 217 | mext_double_up_read(struct inode *orig_inode, struct inode *donor_inode) |
180 | { | 218 | { |
181 | BUG_ON(orig_inode == NULL || donor_inode == NULL); | ||
182 | |||
183 | up_read(&EXT4_I(orig_inode)->i_data_sem); | 219 | up_read(&EXT4_I(orig_inode)->i_data_sem); |
184 | up_read(&EXT4_I(donor_inode)->i_data_sem); | 220 | up_read(&EXT4_I(donor_inode)->i_data_sem); |
185 | } | 221 | } |
@@ -194,8 +230,6 @@ mext_double_up_read(struct inode *orig_inode, struct inode *donor_inode) | |||
194 | static void | 230 | static void |
195 | mext_double_up_write(struct inode *orig_inode, struct inode *donor_inode) | 231 | mext_double_up_write(struct inode *orig_inode, struct inode *donor_inode) |
196 | { | 232 | { |
197 | BUG_ON(orig_inode == NULL || donor_inode == NULL); | ||
198 | |||
199 | up_write(&EXT4_I(orig_inode)->i_data_sem); | 233 | up_write(&EXT4_I(orig_inode)->i_data_sem); |
200 | up_write(&EXT4_I(donor_inode)->i_data_sem); | 234 | up_write(&EXT4_I(donor_inode)->i_data_sem); |
201 | } | 235 | } |
@@ -283,8 +317,8 @@ mext_insert_across_blocks(handle_t *handle, struct inode *orig_inode, | |||
283 | } | 317 | } |
284 | 318 | ||
285 | if (new_flag) { | 319 | if (new_flag) { |
286 | get_ext_path(orig_path, orig_inode, eblock, err); | 320 | err = get_ext_path(orig_inode, eblock, &orig_path); |
287 | if (orig_path == NULL) | 321 | if (err) |
288 | goto out; | 322 | goto out; |
289 | 323 | ||
290 | if (ext4_ext_insert_extent(handle, orig_inode, | 324 | if (ext4_ext_insert_extent(handle, orig_inode, |
@@ -293,9 +327,9 @@ mext_insert_across_blocks(handle_t *handle, struct inode *orig_inode, | |||
293 | } | 327 | } |
294 | 328 | ||
295 | if (end_flag) { | 329 | if (end_flag) { |
296 | get_ext_path(orig_path, orig_inode, | 330 | err = get_ext_path(orig_inode, |
297 | le32_to_cpu(end_ext->ee_block) - 1, err); | 331 | le32_to_cpu(end_ext->ee_block) - 1, &orig_path); |
298 | if (orig_path == NULL) | 332 | if (err) |
299 | goto out; | 333 | goto out; |
300 | 334 | ||
301 | if (ext4_ext_insert_extent(handle, orig_inode, | 335 | if (ext4_ext_insert_extent(handle, orig_inode, |
@@ -519,7 +553,15 @@ mext_leaf_block(handle_t *handle, struct inode *orig_inode, | |||
519 | * oext |-----------| | 553 | * oext |-----------| |
520 | * new_ext |-------| | 554 | * new_ext |-------| |
521 | */ | 555 | */ |
522 | BUG_ON(le32_to_cpu(oext->ee_block) + oext_alen - 1 < new_ext_end); | 556 | if (le32_to_cpu(oext->ee_block) + oext_alen - 1 < new_ext_end) { |
557 | ext4_error(orig_inode->i_sb, __func__, | ||
558 | "new_ext_end(%u) should be less than or equal to " | ||
559 | "oext->ee_block(%u) + oext_alen(%d) - 1", | ||
560 | new_ext_end, le32_to_cpu(oext->ee_block), | ||
561 | oext_alen); | ||
562 | ret = -EIO; | ||
563 | goto out; | ||
564 | } | ||
523 | 565 | ||
524 | /* | 566 | /* |
525 | * Case: new_ext is smaller than original extent | 567 | * Case: new_ext is smaller than original extent |
@@ -543,6 +585,7 @@ mext_leaf_block(handle_t *handle, struct inode *orig_inode, | |||
543 | 585 | ||
544 | ret = mext_insert_extents(handle, orig_inode, orig_path, o_start, | 586 | ret = mext_insert_extents(handle, orig_inode, orig_path, o_start, |
545 | o_end, &start_ext, &new_ext, &end_ext); | 587 | o_end, &start_ext, &new_ext, &end_ext); |
588 | out: | ||
546 | return ret; | 589 | return ret; |
547 | } | 590 | } |
548 | 591 | ||
@@ -554,8 +597,10 @@ mext_leaf_block(handle_t *handle, struct inode *orig_inode, | |||
554 | * @orig_off: block offset of original inode | 597 | * @orig_off: block offset of original inode |
555 | * @donor_off: block offset of donor inode | 598 | * @donor_off: block offset of donor inode |
556 | * @max_count: the maximun length of extents | 599 | * @max_count: the maximun length of extents |
600 | * | ||
601 | * Return 0 on success, or a negative error value on failure. | ||
557 | */ | 602 | */ |
558 | static void | 603 | static int |
559 | mext_calc_swap_extents(struct ext4_extent *tmp_dext, | 604 | mext_calc_swap_extents(struct ext4_extent *tmp_dext, |
560 | struct ext4_extent *tmp_oext, | 605 | struct ext4_extent *tmp_oext, |
561 | ext4_lblk_t orig_off, ext4_lblk_t donor_off, | 606 | ext4_lblk_t orig_off, ext4_lblk_t donor_off, |
@@ -564,6 +609,19 @@ mext_calc_swap_extents(struct ext4_extent *tmp_dext, | |||
564 | ext4_lblk_t diff, orig_diff; | 609 | ext4_lblk_t diff, orig_diff; |
565 | struct ext4_extent dext_old, oext_old; | 610 | struct ext4_extent dext_old, oext_old; |
566 | 611 | ||
612 | BUG_ON(orig_off != donor_off); | ||
613 | |||
614 | /* original and donor extents have to cover the same block offset */ | ||
615 | if (orig_off < le32_to_cpu(tmp_oext->ee_block) || | ||
616 | le32_to_cpu(tmp_oext->ee_block) + | ||
617 | ext4_ext_get_actual_len(tmp_oext) - 1 < orig_off) | ||
618 | return -ENODATA; | ||
619 | |||
620 | if (orig_off < le32_to_cpu(tmp_dext->ee_block) || | ||
621 | le32_to_cpu(tmp_dext->ee_block) + | ||
622 | ext4_ext_get_actual_len(tmp_dext) - 1 < orig_off) | ||
623 | return -ENODATA; | ||
624 | |||
567 | dext_old = *tmp_dext; | 625 | dext_old = *tmp_dext; |
568 | oext_old = *tmp_oext; | 626 | oext_old = *tmp_oext; |
569 | 627 | ||
@@ -591,6 +649,8 @@ mext_calc_swap_extents(struct ext4_extent *tmp_dext, | |||
591 | 649 | ||
592 | copy_extent_status(&oext_old, tmp_dext); | 650 | copy_extent_status(&oext_old, tmp_dext); |
593 | copy_extent_status(&dext_old, tmp_oext); | 651 | copy_extent_status(&dext_old, tmp_oext); |
652 | |||
653 | return 0; | ||
594 | } | 654 | } |
595 | 655 | ||
596 | /** | 656 | /** |
@@ -631,13 +691,13 @@ mext_replace_branches(handle_t *handle, struct inode *orig_inode, | |||
631 | mext_double_down_write(orig_inode, donor_inode); | 691 | mext_double_down_write(orig_inode, donor_inode); |
632 | 692 | ||
633 | /* Get the original extent for the block "orig_off" */ | 693 | /* Get the original extent for the block "orig_off" */ |
634 | get_ext_path(orig_path, orig_inode, orig_off, err); | 694 | err = get_ext_path(orig_inode, orig_off, &orig_path); |
635 | if (orig_path == NULL) | 695 | if (err) |
636 | goto out; | 696 | goto out; |
637 | 697 | ||
638 | /* Get the donor extent for the head */ | 698 | /* Get the donor extent for the head */ |
639 | get_ext_path(donor_path, donor_inode, donor_off, err); | 699 | err = get_ext_path(donor_inode, donor_off, &donor_path); |
640 | if (donor_path == NULL) | 700 | if (err) |
641 | goto out; | 701 | goto out; |
642 | depth = ext_depth(orig_inode); | 702 | depth = ext_depth(orig_inode); |
643 | oext = orig_path[depth].p_ext; | 703 | oext = orig_path[depth].p_ext; |
@@ -647,13 +707,28 @@ mext_replace_branches(handle_t *handle, struct inode *orig_inode, | |||
647 | dext = donor_path[depth].p_ext; | 707 | dext = donor_path[depth].p_ext; |
648 | tmp_dext = *dext; | 708 | tmp_dext = *dext; |
649 | 709 | ||
650 | mext_calc_swap_extents(&tmp_dext, &tmp_oext, orig_off, | 710 | err = mext_calc_swap_extents(&tmp_dext, &tmp_oext, orig_off, |
651 | donor_off, count); | 711 | donor_off, count); |
712 | if (err) | ||
713 | goto out; | ||
652 | 714 | ||
653 | /* Loop for the donor extents */ | 715 | /* Loop for the donor extents */ |
654 | while (1) { | 716 | while (1) { |
655 | /* The extent for donor must be found. */ | 717 | /* The extent for donor must be found. */ |
656 | BUG_ON(!dext || donor_off != le32_to_cpu(tmp_dext.ee_block)); | 718 | if (!dext) { |
719 | ext4_error(donor_inode->i_sb, __func__, | ||
720 | "The extent for donor must be found"); | ||
721 | err = -EIO; | ||
722 | goto out; | ||
723 | } else if (donor_off != le32_to_cpu(tmp_dext.ee_block)) { | ||
724 | ext4_error(donor_inode->i_sb, __func__, | ||
725 | "Donor offset(%u) and the first block of donor " | ||
726 | "extent(%u) should be equal", | ||
727 | donor_off, | ||
728 | le32_to_cpu(tmp_dext.ee_block)); | ||
729 | err = -EIO; | ||
730 | goto out; | ||
731 | } | ||
657 | 732 | ||
658 | /* Set donor extent to orig extent */ | 733 | /* Set donor extent to orig extent */ |
659 | err = mext_leaf_block(handle, orig_inode, | 734 | err = mext_leaf_block(handle, orig_inode, |
@@ -678,8 +753,8 @@ mext_replace_branches(handle_t *handle, struct inode *orig_inode, | |||
678 | 753 | ||
679 | if (orig_path) | 754 | if (orig_path) |
680 | ext4_ext_drop_refs(orig_path); | 755 | ext4_ext_drop_refs(orig_path); |
681 | get_ext_path(orig_path, orig_inode, orig_off, err); | 756 | err = get_ext_path(orig_inode, orig_off, &orig_path); |
682 | if (orig_path == NULL) | 757 | if (err) |
683 | goto out; | 758 | goto out; |
684 | depth = ext_depth(orig_inode); | 759 | depth = ext_depth(orig_inode); |
685 | oext = orig_path[depth].p_ext; | 760 | oext = orig_path[depth].p_ext; |
@@ -692,9 +767,8 @@ mext_replace_branches(handle_t *handle, struct inode *orig_inode, | |||
692 | 767 | ||
693 | if (donor_path) | 768 | if (donor_path) |
694 | ext4_ext_drop_refs(donor_path); | 769 | ext4_ext_drop_refs(donor_path); |
695 | get_ext_path(donor_path, donor_inode, | 770 | err = get_ext_path(donor_inode, donor_off, &donor_path); |
696 | donor_off, err); | 771 | if (err) |
697 | if (donor_path == NULL) | ||
698 | goto out; | 772 | goto out; |
699 | depth = ext_depth(donor_inode); | 773 | depth = ext_depth(donor_inode); |
700 | dext = donor_path[depth].p_ext; | 774 | dext = donor_path[depth].p_ext; |
@@ -705,9 +779,10 @@ mext_replace_branches(handle_t *handle, struct inode *orig_inode, | |||
705 | } | 779 | } |
706 | tmp_dext = *dext; | 780 | tmp_dext = *dext; |
707 | 781 | ||
708 | mext_calc_swap_extents(&tmp_dext, &tmp_oext, orig_off, | 782 | err = mext_calc_swap_extents(&tmp_dext, &tmp_oext, orig_off, |
709 | donor_off, | 783 | donor_off, count - replaced_count); |
710 | count - replaced_count); | 784 | if (err) |
785 | goto out; | ||
711 | } | 786 | } |
712 | 787 | ||
713 | out: | 788 | out: |
@@ -740,7 +815,7 @@ out: | |||
740 | * on success, or a negative error value on failure. | 815 | * on success, or a negative error value on failure. |
741 | */ | 816 | */ |
742 | static int | 817 | static int |
743 | move_extent_par_page(struct file *o_filp, struct inode *donor_inode, | 818 | move_extent_per_page(struct file *o_filp, struct inode *donor_inode, |
744 | pgoff_t orig_page_offset, int data_offset_in_page, | 819 | pgoff_t orig_page_offset, int data_offset_in_page, |
745 | int block_len_in_page, int uninit) | 820 | int block_len_in_page, int uninit) |
746 | { | 821 | { |
@@ -871,6 +946,7 @@ out: | |||
871 | if (PageLocked(page)) | 946 | if (PageLocked(page)) |
872 | unlock_page(page); | 947 | unlock_page(page); |
873 | page_cache_release(page); | 948 | page_cache_release(page); |
949 | ext4_journal_stop(handle); | ||
874 | } | 950 | } |
875 | out2: | 951 | out2: |
876 | ext4_journal_stop(handle); | 952 | ext4_journal_stop(handle); |
@@ -897,6 +973,10 @@ mext_check_arguments(struct inode *orig_inode, | |||
897 | struct inode *donor_inode, __u64 orig_start, | 973 | struct inode *donor_inode, __u64 orig_start, |
898 | __u64 donor_start, __u64 *len, __u64 moved_len) | 974 | __u64 donor_start, __u64 *len, __u64 moved_len) |
899 | { | 975 | { |
976 | ext4_lblk_t orig_blocks, donor_blocks; | ||
977 | unsigned int blkbits = orig_inode->i_blkbits; | ||
978 | unsigned int blocksize = 1 << blkbits; | ||
979 | |||
900 | /* Regular file check */ | 980 | /* Regular file check */ |
901 | if (!S_ISREG(orig_inode->i_mode) || !S_ISREG(donor_inode->i_mode)) { | 981 | if (!S_ISREG(orig_inode->i_mode) || !S_ISREG(donor_inode->i_mode)) { |
902 | ext4_debug("ext4 move extent: The argument files should be " | 982 | ext4_debug("ext4 move extent: The argument files should be " |
@@ -960,54 +1040,58 @@ mext_check_arguments(struct inode *orig_inode, | |||
960 | return -EINVAL; | 1040 | return -EINVAL; |
961 | } | 1041 | } |
962 | 1042 | ||
963 | if ((orig_start > MAX_DEFRAG_SIZE) || | 1043 | if ((orig_start > EXT_MAX_BLOCK) || |
964 | (donor_start > MAX_DEFRAG_SIZE) || | 1044 | (donor_start > EXT_MAX_BLOCK) || |
965 | (*len > MAX_DEFRAG_SIZE) || | 1045 | (*len > EXT_MAX_BLOCK) || |
966 | (orig_start + *len > MAX_DEFRAG_SIZE)) { | 1046 | (orig_start + *len > EXT_MAX_BLOCK)) { |
967 | ext4_debug("ext4 move extent: Can't handle over [%lu] blocks " | 1047 | ext4_debug("ext4 move extent: Can't handle over [%u] blocks " |
968 | "[ino:orig %lu, donor %lu]\n", MAX_DEFRAG_SIZE, | 1048 | "[ino:orig %lu, donor %lu]\n", EXT_MAX_BLOCK, |
969 | orig_inode->i_ino, donor_inode->i_ino); | 1049 | orig_inode->i_ino, donor_inode->i_ino); |
970 | return -EINVAL; | 1050 | return -EINVAL; |
971 | } | 1051 | } |
972 | 1052 | ||
973 | if (orig_inode->i_size > donor_inode->i_size) { | 1053 | if (orig_inode->i_size > donor_inode->i_size) { |
974 | if (orig_start >= donor_inode->i_size) { | 1054 | donor_blocks = (donor_inode->i_size + blocksize - 1) >> blkbits; |
1055 | /* TODO: eliminate this artificial restriction */ | ||
1056 | if (orig_start >= donor_blocks) { | ||
975 | ext4_debug("ext4 move extent: orig start offset " | 1057 | ext4_debug("ext4 move extent: orig start offset " |
976 | "[%llu] should be less than donor file size " | 1058 | "[%llu] should be less than donor file blocks " |
977 | "[%lld] [ino:orig %lu, donor_inode %lu]\n", | 1059 | "[%u] [ino:orig %lu, donor %lu]\n", |
978 | orig_start, donor_inode->i_size, | 1060 | orig_start, donor_blocks, |
979 | orig_inode->i_ino, donor_inode->i_ino); | 1061 | orig_inode->i_ino, donor_inode->i_ino); |
980 | return -EINVAL; | 1062 | return -EINVAL; |
981 | } | 1063 | } |
982 | 1064 | ||
983 | if (orig_start + *len > donor_inode->i_size) { | 1065 | /* TODO: eliminate this artificial restriction */ |
1066 | if (orig_start + *len > donor_blocks) { | ||
984 | ext4_debug("ext4 move extent: End offset [%llu] should " | 1067 | ext4_debug("ext4 move extent: End offset [%llu] should " |
985 | "be less than donor file size [%lld]." | 1068 | "be less than donor file blocks [%u]." |
986 | "So adjust length from %llu to %lld " | 1069 | "So adjust length from %llu to %llu " |
987 | "[ino:orig %lu, donor %lu]\n", | 1070 | "[ino:orig %lu, donor %lu]\n", |
988 | orig_start + *len, donor_inode->i_size, | 1071 | orig_start + *len, donor_blocks, |
989 | *len, donor_inode->i_size - orig_start, | 1072 | *len, donor_blocks - orig_start, |
990 | orig_inode->i_ino, donor_inode->i_ino); | 1073 | orig_inode->i_ino, donor_inode->i_ino); |
991 | *len = donor_inode->i_size - orig_start; | 1074 | *len = donor_blocks - orig_start; |
992 | } | 1075 | } |
993 | } else { | 1076 | } else { |
994 | if (orig_start >= orig_inode->i_size) { | 1077 | orig_blocks = (orig_inode->i_size + blocksize - 1) >> blkbits; |
1078 | if (orig_start >= orig_blocks) { | ||
995 | ext4_debug("ext4 move extent: start offset [%llu] " | 1079 | ext4_debug("ext4 move extent: start offset [%llu] " |
996 | "should be less than original file size " | 1080 | "should be less than original file blocks " |
997 | "[%lld] [inode:orig %lu, donor %lu]\n", | 1081 | "[%u] [ino:orig %lu, donor %lu]\n", |
998 | orig_start, orig_inode->i_size, | 1082 | orig_start, orig_blocks, |
999 | orig_inode->i_ino, donor_inode->i_ino); | 1083 | orig_inode->i_ino, donor_inode->i_ino); |
1000 | return -EINVAL; | 1084 | return -EINVAL; |
1001 | } | 1085 | } |
1002 | 1086 | ||
1003 | if (orig_start + *len > orig_inode->i_size) { | 1087 | if (orig_start + *len > orig_blocks) { |
1004 | ext4_debug("ext4 move extent: Adjust length " | 1088 | ext4_debug("ext4 move extent: Adjust length " |
1005 | "from %llu to %lld. Because it should be " | 1089 | "from %llu to %llu. Because it should be " |
1006 | "less than original file size " | 1090 | "less than original file blocks " |
1007 | "[ino:orig %lu, donor %lu]\n", | 1091 | "[ino:orig %lu, donor %lu]\n", |
1008 | *len, orig_inode->i_size - orig_start, | 1092 | *len, orig_blocks - orig_start, |
1009 | orig_inode->i_ino, donor_inode->i_ino); | 1093 | orig_inode->i_ino, donor_inode->i_ino); |
1010 | *len = orig_inode->i_size - orig_start; | 1094 | *len = orig_blocks - orig_start; |
1011 | } | 1095 | } |
1012 | } | 1096 | } |
1013 | 1097 | ||
@@ -1027,18 +1111,23 @@ mext_check_arguments(struct inode *orig_inode, | |||
1027 | * @inode1: the inode structure | 1111 | * @inode1: the inode structure |
1028 | * @inode2: the inode structure | 1112 | * @inode2: the inode structure |
1029 | * | 1113 | * |
1030 | * Lock two inodes' i_mutex by i_ino order. This function is moved from | 1114 | * Lock two inodes' i_mutex by i_ino order. |
1031 | * fs/inode.c. | 1115 | * If inode1 or inode2 is NULL, return -EIO. Otherwise, return 0. |
1032 | */ | 1116 | */ |
1033 | static void | 1117 | static int |
1034 | mext_inode_double_lock(struct inode *inode1, struct inode *inode2) | 1118 | mext_inode_double_lock(struct inode *inode1, struct inode *inode2) |
1035 | { | 1119 | { |
1036 | if (inode1 == NULL || inode2 == NULL || inode1 == inode2) { | 1120 | int ret = 0; |
1037 | if (inode1) | 1121 | |
1038 | mutex_lock(&inode1->i_mutex); | 1122 | BUG_ON(inode1 == NULL && inode2 == NULL); |
1039 | else if (inode2) | 1123 | |
1040 | mutex_lock(&inode2->i_mutex); | 1124 | ret = mext_check_null_inode(inode1, inode2, __func__); |
1041 | return; | 1125 | if (ret < 0) |
1126 | goto out; | ||
1127 | |||
1128 | if (inode1 == inode2) { | ||
1129 | mutex_lock(&inode1->i_mutex); | ||
1130 | goto out; | ||
1042 | } | 1131 | } |
1043 | 1132 | ||
1044 | if (inode1->i_ino < inode2->i_ino) { | 1133 | if (inode1->i_ino < inode2->i_ino) { |
@@ -1048,6 +1137,9 @@ mext_inode_double_lock(struct inode *inode1, struct inode *inode2) | |||
1048 | mutex_lock_nested(&inode2->i_mutex, I_MUTEX_PARENT); | 1137 | mutex_lock_nested(&inode2->i_mutex, I_MUTEX_PARENT); |
1049 | mutex_lock_nested(&inode1->i_mutex, I_MUTEX_CHILD); | 1138 | mutex_lock_nested(&inode1->i_mutex, I_MUTEX_CHILD); |
1050 | } | 1139 | } |
1140 | |||
1141 | out: | ||
1142 | return ret; | ||
1051 | } | 1143 | } |
1052 | 1144 | ||
1053 | /** | 1145 | /** |
@@ -1056,17 +1148,28 @@ mext_inode_double_lock(struct inode *inode1, struct inode *inode2) | |||
1056 | * @inode1: the inode that is released first | 1148 | * @inode1: the inode that is released first |
1057 | * @inode2: the inode that is released second | 1149 | * @inode2: the inode that is released second |
1058 | * | 1150 | * |
1059 | * This function is moved from fs/inode.c. | 1151 | * If inode1 or inode2 is NULL, return -EIO. Otherwise, return 0. |
1060 | */ | 1152 | */ |
1061 | 1153 | ||
1062 | static void | 1154 | static int |
1063 | mext_inode_double_unlock(struct inode *inode1, struct inode *inode2) | 1155 | mext_inode_double_unlock(struct inode *inode1, struct inode *inode2) |
1064 | { | 1156 | { |
1157 | int ret = 0; | ||
1158 | |||
1159 | BUG_ON(inode1 == NULL && inode2 == NULL); | ||
1160 | |||
1161 | ret = mext_check_null_inode(inode1, inode2, __func__); | ||
1162 | if (ret < 0) | ||
1163 | goto out; | ||
1164 | |||
1065 | if (inode1) | 1165 | if (inode1) |
1066 | mutex_unlock(&inode1->i_mutex); | 1166 | mutex_unlock(&inode1->i_mutex); |
1067 | 1167 | ||
1068 | if (inode2 && inode2 != inode1) | 1168 | if (inode2 && inode2 != inode1) |
1069 | mutex_unlock(&inode2->i_mutex); | 1169 | mutex_unlock(&inode2->i_mutex); |
1170 | |||
1171 | out: | ||
1172 | return ret; | ||
1070 | } | 1173 | } |
1071 | 1174 | ||
1072 | /** | 1175 | /** |
@@ -1123,70 +1226,76 @@ ext4_move_extents(struct file *o_filp, struct file *d_filp, | |||
1123 | ext4_lblk_t block_end, seq_start, add_blocks, file_end, seq_blocks = 0; | 1226 | ext4_lblk_t block_end, seq_start, add_blocks, file_end, seq_blocks = 0; |
1124 | ext4_lblk_t rest_blocks; | 1227 | ext4_lblk_t rest_blocks; |
1125 | pgoff_t orig_page_offset = 0, seq_end_page; | 1228 | pgoff_t orig_page_offset = 0, seq_end_page; |
1126 | int ret, depth, last_extent = 0; | 1229 | int ret1, ret2, depth, last_extent = 0; |
1127 | int blocks_per_page = PAGE_CACHE_SIZE >> orig_inode->i_blkbits; | 1230 | int blocks_per_page = PAGE_CACHE_SIZE >> orig_inode->i_blkbits; |
1128 | int data_offset_in_page; | 1231 | int data_offset_in_page; |
1129 | int block_len_in_page; | 1232 | int block_len_in_page; |
1130 | int uninit; | 1233 | int uninit; |
1131 | 1234 | ||
1132 | /* protect orig and donor against a truncate */ | 1235 | /* protect orig and donor against a truncate */ |
1133 | mext_inode_double_lock(orig_inode, donor_inode); | 1236 | ret1 = mext_inode_double_lock(orig_inode, donor_inode); |
1237 | if (ret1 < 0) | ||
1238 | return ret1; | ||
1134 | 1239 | ||
1135 | mext_double_down_read(orig_inode, donor_inode); | 1240 | mext_double_down_read(orig_inode, donor_inode); |
1136 | /* Check the filesystem environment whether move_extent can be done */ | 1241 | /* Check the filesystem environment whether move_extent can be done */ |
1137 | ret = mext_check_arguments(orig_inode, donor_inode, orig_start, | 1242 | ret1 = mext_check_arguments(orig_inode, donor_inode, orig_start, |
1138 | donor_start, &len, *moved_len); | 1243 | donor_start, &len, *moved_len); |
1139 | mext_double_up_read(orig_inode, donor_inode); | 1244 | mext_double_up_read(orig_inode, donor_inode); |
1140 | if (ret) | 1245 | if (ret1) |
1141 | goto out2; | 1246 | goto out; |
1142 | 1247 | ||
1143 | file_end = (i_size_read(orig_inode) - 1) >> orig_inode->i_blkbits; | 1248 | file_end = (i_size_read(orig_inode) - 1) >> orig_inode->i_blkbits; |
1144 | block_end = block_start + len - 1; | 1249 | block_end = block_start + len - 1; |
1145 | if (file_end < block_end) | 1250 | if (file_end < block_end) |
1146 | len -= block_end - file_end; | 1251 | len -= block_end - file_end; |
1147 | 1252 | ||
1148 | get_ext_path(orig_path, orig_inode, block_start, ret); | 1253 | ret1 = get_ext_path(orig_inode, block_start, &orig_path); |
1149 | if (orig_path == NULL) | 1254 | if (ret1) |
1150 | goto out2; | 1255 | goto out; |
1151 | 1256 | ||
1152 | /* Get path structure to check the hole */ | 1257 | /* Get path structure to check the hole */ |
1153 | get_ext_path(holecheck_path, orig_inode, block_start, ret); | 1258 | ret1 = get_ext_path(orig_inode, block_start, &holecheck_path); |
1154 | if (holecheck_path == NULL) | 1259 | if (ret1) |
1155 | goto out; | 1260 | goto out; |
1156 | 1261 | ||
1157 | depth = ext_depth(orig_inode); | 1262 | depth = ext_depth(orig_inode); |
1158 | ext_cur = holecheck_path[depth].p_ext; | 1263 | ext_cur = holecheck_path[depth].p_ext; |
1159 | if (ext_cur == NULL) { | ||
1160 | ret = -EINVAL; | ||
1161 | goto out; | ||
1162 | } | ||
1163 | 1264 | ||
1164 | /* | 1265 | /* |
1165 | * Get proper extent whose ee_block is beyond block_start | 1266 | * Get proper starting location of block replacement if block_start was |
1166 | * if block_start was within the hole. | 1267 | * within the hole. |
1167 | */ | 1268 | */ |
1168 | if (le32_to_cpu(ext_cur->ee_block) + | 1269 | if (le32_to_cpu(ext_cur->ee_block) + |
1169 | ext4_ext_get_actual_len(ext_cur) - 1 < block_start) { | 1270 | ext4_ext_get_actual_len(ext_cur) - 1 < block_start) { |
1271 | /* | ||
1272 | * The hole exists between extents or the tail of | ||
1273 | * original file. | ||
1274 | */ | ||
1170 | last_extent = mext_next_extent(orig_inode, | 1275 | last_extent = mext_next_extent(orig_inode, |
1171 | holecheck_path, &ext_cur); | 1276 | holecheck_path, &ext_cur); |
1172 | if (last_extent < 0) { | 1277 | if (last_extent < 0) { |
1173 | ret = last_extent; | 1278 | ret1 = last_extent; |
1174 | goto out; | 1279 | goto out; |
1175 | } | 1280 | } |
1176 | last_extent = mext_next_extent(orig_inode, orig_path, | 1281 | last_extent = mext_next_extent(orig_inode, orig_path, |
1177 | &ext_dummy); | 1282 | &ext_dummy); |
1178 | if (last_extent < 0) { | 1283 | if (last_extent < 0) { |
1179 | ret = last_extent; | 1284 | ret1 = last_extent; |
1180 | goto out; | 1285 | goto out; |
1181 | } | 1286 | } |
1182 | } | 1287 | seq_start = le32_to_cpu(ext_cur->ee_block); |
1183 | seq_start = block_start; | 1288 | } else if (le32_to_cpu(ext_cur->ee_block) > block_start) |
1289 | /* The hole exists at the beginning of original file. */ | ||
1290 | seq_start = le32_to_cpu(ext_cur->ee_block); | ||
1291 | else | ||
1292 | seq_start = block_start; | ||
1184 | 1293 | ||
1185 | /* No blocks within the specified range. */ | 1294 | /* No blocks within the specified range. */ |
1186 | if (le32_to_cpu(ext_cur->ee_block) > block_end) { | 1295 | if (le32_to_cpu(ext_cur->ee_block) > block_end) { |
1187 | ext4_debug("ext4 move extent: The specified range of file " | 1296 | ext4_debug("ext4 move extent: The specified range of file " |
1188 | "may be the hole\n"); | 1297 | "may be the hole\n"); |
1189 | ret = -EINVAL; | 1298 | ret1 = -EINVAL; |
1190 | goto out; | 1299 | goto out; |
1191 | } | 1300 | } |
1192 | 1301 | ||
@@ -1206,7 +1315,7 @@ ext4_move_extents(struct file *o_filp, struct file *d_filp, | |||
1206 | last_extent = mext_next_extent(orig_inode, holecheck_path, | 1315 | last_extent = mext_next_extent(orig_inode, holecheck_path, |
1207 | &ext_cur); | 1316 | &ext_cur); |
1208 | if (last_extent < 0) { | 1317 | if (last_extent < 0) { |
1209 | ret = last_extent; | 1318 | ret1 = last_extent; |
1210 | break; | 1319 | break; |
1211 | } | 1320 | } |
1212 | add_blocks = ext4_ext_get_actual_len(ext_cur); | 1321 | add_blocks = ext4_ext_get_actual_len(ext_cur); |
@@ -1258,16 +1367,23 @@ ext4_move_extents(struct file *o_filp, struct file *d_filp, | |||
1258 | while (orig_page_offset <= seq_end_page) { | 1367 | while (orig_page_offset <= seq_end_page) { |
1259 | 1368 | ||
1260 | /* Swap original branches with new branches */ | 1369 | /* Swap original branches with new branches */ |
1261 | ret = move_extent_par_page(o_filp, donor_inode, | 1370 | ret1 = move_extent_per_page(o_filp, donor_inode, |
1262 | orig_page_offset, | 1371 | orig_page_offset, |
1263 | data_offset_in_page, | 1372 | data_offset_in_page, |
1264 | block_len_in_page, uninit); | 1373 | block_len_in_page, uninit); |
1265 | if (ret < 0) | 1374 | if (ret1 < 0) |
1266 | goto out; | 1375 | goto out; |
1267 | orig_page_offset++; | 1376 | orig_page_offset++; |
1268 | /* Count how many blocks we have exchanged */ | 1377 | /* Count how many blocks we have exchanged */ |
1269 | *moved_len += block_len_in_page; | 1378 | *moved_len += block_len_in_page; |
1270 | BUG_ON(*moved_len > len); | 1379 | if (*moved_len > len) { |
1380 | ext4_error(orig_inode->i_sb, __func__, | ||
1381 | "We replaced blocks too much! " | ||
1382 | "sum of replaced: %llu requested: %llu", | ||
1383 | *moved_len, len); | ||
1384 | ret1 = -EIO; | ||
1385 | goto out; | ||
1386 | } | ||
1271 | 1387 | ||
1272 | data_offset_in_page = 0; | 1388 | data_offset_in_page = 0; |
1273 | rest_blocks -= block_len_in_page; | 1389 | rest_blocks -= block_len_in_page; |
@@ -1280,17 +1396,16 @@ ext4_move_extents(struct file *o_filp, struct file *d_filp, | |||
1280 | /* Decrease buffer counter */ | 1396 | /* Decrease buffer counter */ |
1281 | if (holecheck_path) | 1397 | if (holecheck_path) |
1282 | ext4_ext_drop_refs(holecheck_path); | 1398 | ext4_ext_drop_refs(holecheck_path); |
1283 | get_ext_path(holecheck_path, orig_inode, | 1399 | ret1 = get_ext_path(orig_inode, seq_start, &holecheck_path); |
1284 | seq_start, ret); | 1400 | if (ret1) |
1285 | if (holecheck_path == NULL) | ||
1286 | break; | 1401 | break; |
1287 | depth = holecheck_path->p_depth; | 1402 | depth = holecheck_path->p_depth; |
1288 | 1403 | ||
1289 | /* Decrease buffer counter */ | 1404 | /* Decrease buffer counter */ |
1290 | if (orig_path) | 1405 | if (orig_path) |
1291 | ext4_ext_drop_refs(orig_path); | 1406 | ext4_ext_drop_refs(orig_path); |
1292 | get_ext_path(orig_path, orig_inode, seq_start, ret); | 1407 | ret1 = get_ext_path(orig_inode, seq_start, &orig_path); |
1293 | if (orig_path == NULL) | 1408 | if (ret1) |
1294 | break; | 1409 | break; |
1295 | 1410 | ||
1296 | ext_cur = holecheck_path[depth].p_ext; | 1411 | ext_cur = holecheck_path[depth].p_ext; |
@@ -1307,14 +1422,13 @@ out: | |||
1307 | ext4_ext_drop_refs(holecheck_path); | 1422 | ext4_ext_drop_refs(holecheck_path); |
1308 | kfree(holecheck_path); | 1423 | kfree(holecheck_path); |
1309 | } | 1424 | } |
1310 | out2: | ||
1311 | mext_inode_double_unlock(orig_inode, donor_inode); | ||
1312 | 1425 | ||
1313 | if (ret) | 1426 | ret2 = mext_inode_double_unlock(orig_inode, donor_inode); |
1314 | return ret; | ||
1315 | 1427 | ||
1316 | /* All of the specified blocks must be exchanged in succeed */ | 1428 | if (ret1) |
1317 | BUG_ON(*moved_len != len); | 1429 | return ret1; |
1430 | else if (ret2) | ||
1431 | return ret2; | ||
1318 | 1432 | ||
1319 | return 0; | 1433 | return 0; |
1320 | } | 1434 | } |
diff --git a/fs/ext4/namei.c b/fs/ext4/namei.c index 114abe5d2c1d..42f81d285cd5 100644 --- a/fs/ext4/namei.c +++ b/fs/ext4/namei.c | |||
@@ -1518,8 +1518,12 @@ static int ext4_add_entry(handle_t *handle, struct dentry *dentry, | |||
1518 | return retval; | 1518 | return retval; |
1519 | 1519 | ||
1520 | if (blocks == 1 && !dx_fallback && | 1520 | if (blocks == 1 && !dx_fallback && |
1521 | EXT4_HAS_COMPAT_FEATURE(sb, EXT4_FEATURE_COMPAT_DIR_INDEX)) | 1521 | EXT4_HAS_COMPAT_FEATURE(sb, EXT4_FEATURE_COMPAT_DIR_INDEX)) { |
1522 | return make_indexed_dir(handle, dentry, inode, bh); | 1522 | retval = make_indexed_dir(handle, dentry, inode, bh); |
1523 | if (retval == -ENOSPC) | ||
1524 | brelse(bh); | ||
1525 | return retval; | ||
1526 | } | ||
1523 | brelse(bh); | 1527 | brelse(bh); |
1524 | } | 1528 | } |
1525 | bh = ext4_append(handle, dir, &block, &retval); | 1529 | bh = ext4_append(handle, dir, &block, &retval); |
@@ -1528,7 +1532,10 @@ static int ext4_add_entry(handle_t *handle, struct dentry *dentry, | |||
1528 | de = (struct ext4_dir_entry_2 *) bh->b_data; | 1532 | de = (struct ext4_dir_entry_2 *) bh->b_data; |
1529 | de->inode = 0; | 1533 | de->inode = 0; |
1530 | de->rec_len = ext4_rec_len_to_disk(blocksize, blocksize); | 1534 | de->rec_len = ext4_rec_len_to_disk(blocksize, blocksize); |
1531 | return add_dirent_to_buf(handle, dentry, inode, de, bh); | 1535 | retval = add_dirent_to_buf(handle, dentry, inode, de, bh); |
1536 | if (retval == -ENOSPC) | ||
1537 | brelse(bh); | ||
1538 | return retval; | ||
1532 | } | 1539 | } |
1533 | 1540 | ||
1534 | /* | 1541 | /* |
@@ -1590,9 +1597,9 @@ static int ext4_dx_add_entry(handle_t *handle, struct dentry *dentry, | |||
1590 | goto cleanup; | 1597 | goto cleanup; |
1591 | node2 = (struct dx_node *)(bh2->b_data); | 1598 | node2 = (struct dx_node *)(bh2->b_data); |
1592 | entries2 = node2->entries; | 1599 | entries2 = node2->entries; |
1600 | memset(&node2->fake, 0, sizeof(struct fake_dirent)); | ||
1593 | node2->fake.rec_len = ext4_rec_len_to_disk(sb->s_blocksize, | 1601 | node2->fake.rec_len = ext4_rec_len_to_disk(sb->s_blocksize, |
1594 | sb->s_blocksize); | 1602 | sb->s_blocksize); |
1595 | node2->fake.inode = 0; | ||
1596 | BUFFER_TRACE(frame->bh, "get_write_access"); | 1603 | BUFFER_TRACE(frame->bh, "get_write_access"); |
1597 | err = ext4_journal_get_write_access(handle, frame->bh); | 1604 | err = ext4_journal_get_write_access(handle, frame->bh); |
1598 | if (err) | 1605 | if (err) |
@@ -1657,7 +1664,8 @@ static int ext4_dx_add_entry(handle_t *handle, struct dentry *dentry, | |||
1657 | if (!de) | 1664 | if (!de) |
1658 | goto cleanup; | 1665 | goto cleanup; |
1659 | err = add_dirent_to_buf(handle, dentry, inode, de, bh); | 1666 | err = add_dirent_to_buf(handle, dentry, inode, de, bh); |
1660 | bh = NULL; | 1667 | if (err != -ENOSPC) |
1668 | bh = NULL; | ||
1661 | goto cleanup; | 1669 | goto cleanup; |
1662 | 1670 | ||
1663 | journal_error: | 1671 | journal_error: |
@@ -2310,7 +2318,7 @@ static int ext4_link(struct dentry *old_dentry, | |||
2310 | struct inode *inode = old_dentry->d_inode; | 2318 | struct inode *inode = old_dentry->d_inode; |
2311 | int err, retries = 0; | 2319 | int err, retries = 0; |
2312 | 2320 | ||
2313 | if (EXT4_DIR_LINK_MAX(inode)) | 2321 | if (inode->i_nlink >= EXT4_LINK_MAX) |
2314 | return -EMLINK; | 2322 | return -EMLINK; |
2315 | 2323 | ||
2316 | /* | 2324 | /* |
@@ -2413,7 +2421,7 @@ static int ext4_rename(struct inode *old_dir, struct dentry *old_dentry, | |||
2413 | goto end_rename; | 2421 | goto end_rename; |
2414 | retval = -EMLINK; | 2422 | retval = -EMLINK; |
2415 | if (!new_inode && new_dir != old_dir && | 2423 | if (!new_inode && new_dir != old_dir && |
2416 | new_dir->i_nlink >= EXT4_LINK_MAX) | 2424 | EXT4_DIR_LINK_MAX(new_dir)) |
2417 | goto end_rename; | 2425 | goto end_rename; |
2418 | } | 2426 | } |
2419 | if (!new_bh) { | 2427 | if (!new_bh) { |
diff --git a/fs/ext4/resize.c b/fs/ext4/resize.c index 68b0351fc647..3cfc343c41b5 100644 --- a/fs/ext4/resize.c +++ b/fs/ext4/resize.c | |||
@@ -746,7 +746,6 @@ int ext4_group_add(struct super_block *sb, struct ext4_new_group_data *input) | |||
746 | struct inode *inode = NULL; | 746 | struct inode *inode = NULL; |
747 | handle_t *handle; | 747 | handle_t *handle; |
748 | int gdb_off, gdb_num; | 748 | int gdb_off, gdb_num; |
749 | int num_grp_locked = 0; | ||
750 | int err, err2; | 749 | int err, err2; |
751 | 750 | ||
752 | gdb_num = input->group / EXT4_DESC_PER_BLOCK(sb); | 751 | gdb_num = input->group / EXT4_DESC_PER_BLOCK(sb); |
@@ -856,7 +855,6 @@ int ext4_group_add(struct super_block *sb, struct ext4_new_group_data *input) | |||
856 | * using the new disk blocks. | 855 | * using the new disk blocks. |
857 | */ | 856 | */ |
858 | 857 | ||
859 | num_grp_locked = ext4_mb_get_buddy_cache_lock(sb, input->group); | ||
860 | /* Update group descriptor block for new group */ | 858 | /* Update group descriptor block for new group */ |
861 | gdp = (struct ext4_group_desc *)((char *)primary->b_data + | 859 | gdp = (struct ext4_group_desc *)((char *)primary->b_data + |
862 | gdb_off * EXT4_DESC_SIZE(sb)); | 860 | gdb_off * EXT4_DESC_SIZE(sb)); |
@@ -875,10 +873,8 @@ int ext4_group_add(struct super_block *sb, struct ext4_new_group_data *input) | |||
875 | * descriptor | 873 | * descriptor |
876 | */ | 874 | */ |
877 | err = ext4_mb_add_groupinfo(sb, input->group, gdp); | 875 | err = ext4_mb_add_groupinfo(sb, input->group, gdp); |
878 | if (err) { | 876 | if (err) |
879 | ext4_mb_put_buddy_cache_lock(sb, input->group, num_grp_locked); | ||
880 | goto exit_journal; | 877 | goto exit_journal; |
881 | } | ||
882 | 878 | ||
883 | /* | 879 | /* |
884 | * Make the new blocks and inodes valid next. We do this before | 880 | * Make the new blocks and inodes valid next. We do this before |
@@ -920,7 +916,6 @@ int ext4_group_add(struct super_block *sb, struct ext4_new_group_data *input) | |||
920 | 916 | ||
921 | /* Update the global fs size fields */ | 917 | /* Update the global fs size fields */ |
922 | sbi->s_groups_count++; | 918 | sbi->s_groups_count++; |
923 | ext4_mb_put_buddy_cache_lock(sb, input->group, num_grp_locked); | ||
924 | 919 | ||
925 | ext4_handle_dirty_metadata(handle, NULL, primary); | 920 | ext4_handle_dirty_metadata(handle, NULL, primary); |
926 | 921 | ||
diff --git a/fs/ext4/super.c b/fs/ext4/super.c index 8f4f079e6b9a..df539ba27779 100644 --- a/fs/ext4/super.c +++ b/fs/ext4/super.c | |||
@@ -45,6 +45,7 @@ | |||
45 | #include "ext4_jbd2.h" | 45 | #include "ext4_jbd2.h" |
46 | #include "xattr.h" | 46 | #include "xattr.h" |
47 | #include "acl.h" | 47 | #include "acl.h" |
48 | #include "mballoc.h" | ||
48 | 49 | ||
49 | #define CREATE_TRACE_POINTS | 50 | #define CREATE_TRACE_POINTS |
50 | #include <trace/events/ext4.h> | 51 | #include <trace/events/ext4.h> |
@@ -344,7 +345,8 @@ static const char *ext4_decode_error(struct super_block *sb, int errno, | |||
344 | errstr = "Out of memory"; | 345 | errstr = "Out of memory"; |
345 | break; | 346 | break; |
346 | case -EROFS: | 347 | case -EROFS: |
347 | if (!sb || EXT4_SB(sb)->s_journal->j_flags & JBD2_ABORT) | 348 | if (!sb || (EXT4_SB(sb)->s_journal && |
349 | EXT4_SB(sb)->s_journal->j_flags & JBD2_ABORT)) | ||
348 | errstr = "Journal has aborted"; | 350 | errstr = "Journal has aborted"; |
349 | else | 351 | else |
350 | errstr = "Readonly filesystem"; | 352 | errstr = "Readonly filesystem"; |
@@ -962,7 +964,7 @@ static ssize_t ext4_quota_read(struct super_block *sb, int type, char *data, | |||
962 | static ssize_t ext4_quota_write(struct super_block *sb, int type, | 964 | static ssize_t ext4_quota_write(struct super_block *sb, int type, |
963 | const char *data, size_t len, loff_t off); | 965 | const char *data, size_t len, loff_t off); |
964 | 966 | ||
965 | static struct dquot_operations ext4_quota_operations = { | 967 | static const struct dquot_operations ext4_quota_operations = { |
966 | .initialize = dquot_initialize, | 968 | .initialize = dquot_initialize, |
967 | .drop = dquot_drop, | 969 | .drop = dquot_drop, |
968 | .alloc_space = dquot_alloc_space, | 970 | .alloc_space = dquot_alloc_space, |
@@ -983,7 +985,7 @@ static struct dquot_operations ext4_quota_operations = { | |||
983 | .destroy_dquot = dquot_destroy, | 985 | .destroy_dquot = dquot_destroy, |
984 | }; | 986 | }; |
985 | 987 | ||
986 | static struct quotactl_ops ext4_qctl_operations = { | 988 | static const struct quotactl_ops ext4_qctl_operations = { |
987 | .quota_on = ext4_quota_on, | 989 | .quota_on = ext4_quota_on, |
988 | .quota_off = vfs_quota_off, | 990 | .quota_off = vfs_quota_off, |
989 | .quota_sync = vfs_quota_sync, | 991 | .quota_sync = vfs_quota_sync, |
@@ -1279,11 +1281,9 @@ static int parse_options(char *options, struct super_block *sb, | |||
1279 | *journal_devnum = option; | 1281 | *journal_devnum = option; |
1280 | break; | 1282 | break; |
1281 | case Opt_journal_checksum: | 1283 | case Opt_journal_checksum: |
1282 | set_opt(sbi->s_mount_opt, JOURNAL_CHECKSUM); | 1284 | break; /* Kept for backwards compatibility */ |
1283 | break; | ||
1284 | case Opt_journal_async_commit: | 1285 | case Opt_journal_async_commit: |
1285 | set_opt(sbi->s_mount_opt, JOURNAL_ASYNC_COMMIT); | 1286 | set_opt(sbi->s_mount_opt, JOURNAL_ASYNC_COMMIT); |
1286 | set_opt(sbi->s_mount_opt, JOURNAL_CHECKSUM); | ||
1287 | break; | 1287 | break; |
1288 | case Opt_noload: | 1288 | case Opt_noload: |
1289 | set_opt(sbi->s_mount_opt, NOLOAD); | 1289 | set_opt(sbi->s_mount_opt, NOLOAD); |
@@ -1695,12 +1695,12 @@ static int ext4_fill_flex_info(struct super_block *sb) | |||
1695 | gdp = ext4_get_group_desc(sb, i, NULL); | 1695 | gdp = ext4_get_group_desc(sb, i, NULL); |
1696 | 1696 | ||
1697 | flex_group = ext4_flex_group(sbi, i); | 1697 | flex_group = ext4_flex_group(sbi, i); |
1698 | atomic_set(&sbi->s_flex_groups[flex_group].free_inodes, | 1698 | atomic_add(ext4_free_inodes_count(sb, gdp), |
1699 | ext4_free_inodes_count(sb, gdp)); | 1699 | &sbi->s_flex_groups[flex_group].free_inodes); |
1700 | atomic_set(&sbi->s_flex_groups[flex_group].free_blocks, | 1700 | atomic_add(ext4_free_blks_count(sb, gdp), |
1701 | ext4_free_blks_count(sb, gdp)); | 1701 | &sbi->s_flex_groups[flex_group].free_blocks); |
1702 | atomic_set(&sbi->s_flex_groups[flex_group].used_dirs, | 1702 | atomic_add(ext4_used_dirs_count(sb, gdp), |
1703 | ext4_used_dirs_count(sb, gdp)); | 1703 | &sbi->s_flex_groups[flex_group].used_dirs); |
1704 | } | 1704 | } |
1705 | 1705 | ||
1706 | return 1; | 1706 | return 1; |
@@ -2253,6 +2253,49 @@ static struct kobj_type ext4_ktype = { | |||
2253 | .release = ext4_sb_release, | 2253 | .release = ext4_sb_release, |
2254 | }; | 2254 | }; |
2255 | 2255 | ||
2256 | /* | ||
2257 | * Check whether this filesystem can be mounted based on | ||
2258 | * the features present and the RDONLY/RDWR mount requested. | ||
2259 | * Returns 1 if this filesystem can be mounted as requested, | ||
2260 | * 0 if it cannot be. | ||
2261 | */ | ||
2262 | static int ext4_feature_set_ok(struct super_block *sb, int readonly) | ||
2263 | { | ||
2264 | if (EXT4_HAS_INCOMPAT_FEATURE(sb, ~EXT4_FEATURE_INCOMPAT_SUPP)) { | ||
2265 | ext4_msg(sb, KERN_ERR, | ||
2266 | "Couldn't mount because of " | ||
2267 | "unsupported optional features (%x)", | ||
2268 | (le32_to_cpu(EXT4_SB(sb)->s_es->s_feature_incompat) & | ||
2269 | ~EXT4_FEATURE_INCOMPAT_SUPP)); | ||
2270 | return 0; | ||
2271 | } | ||
2272 | |||
2273 | if (readonly) | ||
2274 | return 1; | ||
2275 | |||
2276 | /* Check that feature set is OK for a read-write mount */ | ||
2277 | if (EXT4_HAS_RO_COMPAT_FEATURE(sb, ~EXT4_FEATURE_RO_COMPAT_SUPP)) { | ||
2278 | ext4_msg(sb, KERN_ERR, "couldn't mount RDWR because of " | ||
2279 | "unsupported optional features (%x)", | ||
2280 | (le32_to_cpu(EXT4_SB(sb)->s_es->s_feature_ro_compat) & | ||
2281 | ~EXT4_FEATURE_RO_COMPAT_SUPP)); | ||
2282 | return 0; | ||
2283 | } | ||
2284 | /* | ||
2285 | * Large file size enabled file system can only be mounted | ||
2286 | * read-write on 32-bit systems if kernel is built with CONFIG_LBDAF | ||
2287 | */ | ||
2288 | if (EXT4_HAS_RO_COMPAT_FEATURE(sb, EXT4_FEATURE_RO_COMPAT_HUGE_FILE)) { | ||
2289 | if (sizeof(blkcnt_t) < sizeof(u64)) { | ||
2290 | ext4_msg(sb, KERN_ERR, "Filesystem with huge files " | ||
2291 | "cannot be mounted RDWR without " | ||
2292 | "CONFIG_LBDAF"); | ||
2293 | return 0; | ||
2294 | } | ||
2295 | } | ||
2296 | return 1; | ||
2297 | } | ||
2298 | |||
2256 | static int ext4_fill_super(struct super_block *sb, void *data, int silent) | 2299 | static int ext4_fill_super(struct super_block *sb, void *data, int silent) |
2257 | __releases(kernel_lock) | 2300 | __releases(kernel_lock) |
2258 | __acquires(kernel_lock) | 2301 | __acquires(kernel_lock) |
@@ -2274,7 +2317,6 @@ static int ext4_fill_super(struct super_block *sb, void *data, int silent) | |||
2274 | unsigned int db_count; | 2317 | unsigned int db_count; |
2275 | unsigned int i; | 2318 | unsigned int i; |
2276 | int needs_recovery, has_huge_files; | 2319 | int needs_recovery, has_huge_files; |
2277 | int features; | ||
2278 | __u64 blocks_count; | 2320 | __u64 blocks_count; |
2279 | int err; | 2321 | int err; |
2280 | unsigned int journal_ioprio = DEFAULT_JOURNAL_IOPRIO; | 2322 | unsigned int journal_ioprio = DEFAULT_JOURNAL_IOPRIO; |
@@ -2401,39 +2443,9 @@ static int ext4_fill_super(struct super_block *sb, void *data, int silent) | |||
2401 | * previously didn't change the revision level when setting the flags, | 2443 | * previously didn't change the revision level when setting the flags, |
2402 | * so there is a chance incompat flags are set on a rev 0 filesystem. | 2444 | * so there is a chance incompat flags are set on a rev 0 filesystem. |
2403 | */ | 2445 | */ |
2404 | features = EXT4_HAS_INCOMPAT_FEATURE(sb, ~EXT4_FEATURE_INCOMPAT_SUPP); | 2446 | if (!ext4_feature_set_ok(sb, (sb->s_flags & MS_RDONLY))) |
2405 | if (features) { | ||
2406 | ext4_msg(sb, KERN_ERR, | ||
2407 | "Couldn't mount because of " | ||
2408 | "unsupported optional features (%x)", | ||
2409 | (le32_to_cpu(EXT4_SB(sb)->s_es->s_feature_incompat) & | ||
2410 | ~EXT4_FEATURE_INCOMPAT_SUPP)); | ||
2411 | goto failed_mount; | ||
2412 | } | ||
2413 | features = EXT4_HAS_RO_COMPAT_FEATURE(sb, ~EXT4_FEATURE_RO_COMPAT_SUPP); | ||
2414 | if (!(sb->s_flags & MS_RDONLY) && features) { | ||
2415 | ext4_msg(sb, KERN_ERR, | ||
2416 | "Couldn't mount RDWR because of " | ||
2417 | "unsupported optional features (%x)", | ||
2418 | (le32_to_cpu(EXT4_SB(sb)->s_es->s_feature_ro_compat) & | ||
2419 | ~EXT4_FEATURE_RO_COMPAT_SUPP)); | ||
2420 | goto failed_mount; | 2447 | goto failed_mount; |
2421 | } | 2448 | |
2422 | has_huge_files = EXT4_HAS_RO_COMPAT_FEATURE(sb, | ||
2423 | EXT4_FEATURE_RO_COMPAT_HUGE_FILE); | ||
2424 | if (has_huge_files) { | ||
2425 | /* | ||
2426 | * Large file size enabled file system can only be | ||
2427 | * mount if kernel is build with CONFIG_LBDAF | ||
2428 | */ | ||
2429 | if (sizeof(root->i_blocks) < sizeof(u64) && | ||
2430 | !(sb->s_flags & MS_RDONLY)) { | ||
2431 | ext4_msg(sb, KERN_ERR, "Filesystem with huge " | ||
2432 | "files cannot be mounted read-write " | ||
2433 | "without CONFIG_LBDAF"); | ||
2434 | goto failed_mount; | ||
2435 | } | ||
2436 | } | ||
2437 | blocksize = BLOCK_SIZE << le32_to_cpu(es->s_log_block_size); | 2449 | blocksize = BLOCK_SIZE << le32_to_cpu(es->s_log_block_size); |
2438 | 2450 | ||
2439 | if (blocksize < EXT4_MIN_BLOCK_SIZE || | 2451 | if (blocksize < EXT4_MIN_BLOCK_SIZE || |
@@ -2469,6 +2481,8 @@ static int ext4_fill_super(struct super_block *sb, void *data, int silent) | |||
2469 | } | 2481 | } |
2470 | } | 2482 | } |
2471 | 2483 | ||
2484 | has_huge_files = EXT4_HAS_RO_COMPAT_FEATURE(sb, | ||
2485 | EXT4_FEATURE_RO_COMPAT_HUGE_FILE); | ||
2472 | sbi->s_bitmap_maxbytes = ext4_max_bitmap_size(sb->s_blocksize_bits, | 2486 | sbi->s_bitmap_maxbytes = ext4_max_bitmap_size(sb->s_blocksize_bits, |
2473 | has_huge_files); | 2487 | has_huge_files); |
2474 | sb->s_maxbytes = ext4_max_size(sb->s_blocksize_bits, has_huge_files); | 2488 | sb->s_maxbytes = ext4_max_size(sb->s_blocksize_bits, has_huge_files); |
@@ -2549,12 +2563,19 @@ static int ext4_fill_super(struct super_block *sb, void *data, int silent) | |||
2549 | goto failed_mount; | 2563 | goto failed_mount; |
2550 | } | 2564 | } |
2551 | 2565 | ||
2552 | if (ext4_blocks_count(es) > | 2566 | /* |
2553 | (sector_t)(~0ULL) >> (sb->s_blocksize_bits - 9)) { | 2567 | * Test whether we have more sectors than will fit in sector_t, |
2568 | * and whether the max offset is addressable by the page cache. | ||
2569 | */ | ||
2570 | if ((ext4_blocks_count(es) > | ||
2571 | (sector_t)(~0ULL) >> (sb->s_blocksize_bits - 9)) || | ||
2572 | (ext4_blocks_count(es) > | ||
2573 | (pgoff_t)(~0ULL) >> (PAGE_CACHE_SHIFT - sb->s_blocksize_bits))) { | ||
2554 | ext4_msg(sb, KERN_ERR, "filesystem" | 2574 | ext4_msg(sb, KERN_ERR, "filesystem" |
2555 | " too large to mount safely"); | 2575 | " too large to mount safely on this system"); |
2556 | if (sizeof(sector_t) < 8) | 2576 | if (sizeof(sector_t) < 8) |
2557 | ext4_msg(sb, KERN_WARNING, "CONFIG_LBDAF not enabled"); | 2577 | ext4_msg(sb, KERN_WARNING, "CONFIG_LBDAF not enabled"); |
2578 | ret = -EFBIG; | ||
2558 | goto failed_mount; | 2579 | goto failed_mount; |
2559 | } | 2580 | } |
2560 | 2581 | ||
@@ -2595,6 +2616,8 @@ static int ext4_fill_super(struct super_block *sb, void *data, int silent) | |||
2595 | goto failed_mount; | 2616 | goto failed_mount; |
2596 | } | 2617 | } |
2597 | sbi->s_groups_count = blocks_count; | 2618 | sbi->s_groups_count = blocks_count; |
2619 | sbi->s_blockfile_groups = min_t(ext4_group_t, sbi->s_groups_count, | ||
2620 | (EXT4_MAX_BLOCK_FILE_PHYS / EXT4_BLOCKS_PER_GROUP(sb))); | ||
2598 | db_count = (sbi->s_groups_count + EXT4_DESC_PER_BLOCK(sb) - 1) / | 2621 | db_count = (sbi->s_groups_count + EXT4_DESC_PER_BLOCK(sb) - 1) / |
2599 | EXT4_DESC_PER_BLOCK(sb); | 2622 | EXT4_DESC_PER_BLOCK(sb); |
2600 | sbi->s_group_desc = kmalloc(db_count * sizeof(struct buffer_head *), | 2623 | sbi->s_group_desc = kmalloc(db_count * sizeof(struct buffer_head *), |
@@ -2729,20 +2752,14 @@ static int ext4_fill_super(struct super_block *sb, void *data, int silent) | |||
2729 | goto failed_mount4; | 2752 | goto failed_mount4; |
2730 | } | 2753 | } |
2731 | 2754 | ||
2732 | if (test_opt(sb, JOURNAL_ASYNC_COMMIT)) { | 2755 | jbd2_journal_set_features(sbi->s_journal, |
2733 | jbd2_journal_set_features(sbi->s_journal, | 2756 | JBD2_FEATURE_COMPAT_CHECKSUM, 0, 0); |
2734 | JBD2_FEATURE_COMPAT_CHECKSUM, 0, | 2757 | if (test_opt(sb, JOURNAL_ASYNC_COMMIT)) |
2758 | jbd2_journal_set_features(sbi->s_journal, 0, 0, | ||
2735 | JBD2_FEATURE_INCOMPAT_ASYNC_COMMIT); | 2759 | JBD2_FEATURE_INCOMPAT_ASYNC_COMMIT); |
2736 | } else if (test_opt(sb, JOURNAL_CHECKSUM)) { | 2760 | else |
2737 | jbd2_journal_set_features(sbi->s_journal, | ||
2738 | JBD2_FEATURE_COMPAT_CHECKSUM, 0, 0); | ||
2739 | jbd2_journal_clear_features(sbi->s_journal, 0, 0, | 2761 | jbd2_journal_clear_features(sbi->s_journal, 0, 0, |
2740 | JBD2_FEATURE_INCOMPAT_ASYNC_COMMIT); | 2762 | JBD2_FEATURE_INCOMPAT_ASYNC_COMMIT); |
2741 | } else { | ||
2742 | jbd2_journal_clear_features(sbi->s_journal, | ||
2743 | JBD2_FEATURE_COMPAT_CHECKSUM, 0, | ||
2744 | JBD2_FEATURE_INCOMPAT_ASYNC_COMMIT); | ||
2745 | } | ||
2746 | 2763 | ||
2747 | /* We have now updated the journal if required, so we can | 2764 | /* We have now updated the journal if required, so we can |
2748 | * validate the data journaling mode. */ | 2765 | * validate the data journaling mode. */ |
@@ -3208,7 +3225,18 @@ static int ext4_commit_super(struct super_block *sb, int sync) | |||
3208 | clear_buffer_write_io_error(sbh); | 3225 | clear_buffer_write_io_error(sbh); |
3209 | set_buffer_uptodate(sbh); | 3226 | set_buffer_uptodate(sbh); |
3210 | } | 3227 | } |
3211 | es->s_wtime = cpu_to_le32(get_seconds()); | 3228 | /* |
3229 | * If the file system is mounted read-only, don't update the | ||
3230 | * superblock write time. This avoids updating the superblock | ||
3231 | * write time when we are mounting the root file system | ||
3232 | * read/only but we need to replay the journal; at that point, | ||
3233 | * for people who are east of GMT and who make their clock | ||
3234 | * tick in localtime for Windows bug-for-bug compatibility, | ||
3235 | * the clock is set in the future, and this will cause e2fsck | ||
3236 | * to complain and force a full file system check. | ||
3237 | */ | ||
3238 | if (!(sb->s_flags & MS_RDONLY)) | ||
3239 | es->s_wtime = cpu_to_le32(get_seconds()); | ||
3212 | es->s_kbytes_written = | 3240 | es->s_kbytes_written = |
3213 | cpu_to_le64(EXT4_SB(sb)->s_kbytes_written + | 3241 | cpu_to_le64(EXT4_SB(sb)->s_kbytes_written + |
3214 | ((part_stat_read(sb->s_bdev->bd_part, sectors[1]) - | 3242 | ((part_stat_read(sb->s_bdev->bd_part, sectors[1]) - |
@@ -3477,18 +3505,11 @@ static int ext4_remount(struct super_block *sb, int *flags, char *data) | |||
3477 | if (sbi->s_journal) | 3505 | if (sbi->s_journal) |
3478 | ext4_mark_recovery_complete(sb, es); | 3506 | ext4_mark_recovery_complete(sb, es); |
3479 | } else { | 3507 | } else { |
3480 | int ret; | 3508 | /* Make sure we can mount this feature set readwrite */ |
3481 | if ((ret = EXT4_HAS_RO_COMPAT_FEATURE(sb, | 3509 | if (!ext4_feature_set_ok(sb, 0)) { |
3482 | ~EXT4_FEATURE_RO_COMPAT_SUPP))) { | ||
3483 | ext4_msg(sb, KERN_WARNING, "couldn't " | ||
3484 | "remount RDWR because of unsupported " | ||
3485 | "optional features (%x)", | ||
3486 | (le32_to_cpu(sbi->s_es->s_feature_ro_compat) & | ||
3487 | ~EXT4_FEATURE_RO_COMPAT_SUPP)); | ||
3488 | err = -EROFS; | 3510 | err = -EROFS; |
3489 | goto restore_opts; | 3511 | goto restore_opts; |
3490 | } | 3512 | } |
3491 | |||
3492 | /* | 3513 | /* |
3493 | * Make sure the group descriptor checksums | 3514 | * Make sure the group descriptor checksums |
3494 | * are sane. If they aren't, refuse to remount r/w. | 3515 | * are sane. If they aren't, refuse to remount r/w. |
diff --git a/fs/ext4/xattr.c b/fs/ext4/xattr.c index 62b31c246994..fed5b01d7a8d 100644 --- a/fs/ext4/xattr.c +++ b/fs/ext4/xattr.c | |||
@@ -810,12 +810,23 @@ inserted: | |||
810 | get_bh(new_bh); | 810 | get_bh(new_bh); |
811 | } else { | 811 | } else { |
812 | /* We need to allocate a new block */ | 812 | /* We need to allocate a new block */ |
813 | ext4_fsblk_t goal = ext4_group_first_block_no(sb, | 813 | ext4_fsblk_t goal, block; |
814 | |||
815 | goal = ext4_group_first_block_no(sb, | ||
814 | EXT4_I(inode)->i_block_group); | 816 | EXT4_I(inode)->i_block_group); |
815 | ext4_fsblk_t block = ext4_new_meta_blocks(handle, inode, | 817 | |
818 | /* non-extent files can't have physical blocks past 2^32 */ | ||
819 | if (!(EXT4_I(inode)->i_flags & EXT4_EXTENTS_FL)) | ||
820 | goal = goal & EXT4_MAX_BLOCK_FILE_PHYS; | ||
821 | |||
822 | block = ext4_new_meta_blocks(handle, inode, | ||
816 | goal, NULL, &error); | 823 | goal, NULL, &error); |
817 | if (error) | 824 | if (error) |
818 | goto cleanup; | 825 | goto cleanup; |
826 | |||
827 | if (!(EXT4_I(inode)->i_flags & EXT4_EXTENTS_FL)) | ||
828 | BUG_ON(block > EXT4_MAX_BLOCK_FILE_PHYS); | ||
829 | |||
819 | ea_idebug(inode, "creating block %d", block); | 830 | ea_idebug(inode, "creating block %d", block); |
820 | 831 | ||
821 | new_bh = sb_getblk(sb, block); | 832 | new_bh = sb_getblk(sb, block); |
diff --git a/fs/fcntl.c b/fs/fcntl.c index ae413086db97..fc089f2f7f56 100644 --- a/fs/fcntl.c +++ b/fs/fcntl.c | |||
@@ -263,6 +263,79 @@ pid_t f_getown(struct file *filp) | |||
263 | return pid; | 263 | return pid; |
264 | } | 264 | } |
265 | 265 | ||
266 | static int f_setown_ex(struct file *filp, unsigned long arg) | ||
267 | { | ||
268 | struct f_owner_ex * __user owner_p = (void * __user)arg; | ||
269 | struct f_owner_ex owner; | ||
270 | struct pid *pid; | ||
271 | int type; | ||
272 | int ret; | ||
273 | |||
274 | ret = copy_from_user(&owner, owner_p, sizeof(owner)); | ||
275 | if (ret) | ||
276 | return ret; | ||
277 | |||
278 | switch (owner.type) { | ||
279 | case F_OWNER_TID: | ||
280 | type = PIDTYPE_MAX; | ||
281 | break; | ||
282 | |||
283 | case F_OWNER_PID: | ||
284 | type = PIDTYPE_PID; | ||
285 | break; | ||
286 | |||
287 | case F_OWNER_GID: | ||
288 | type = PIDTYPE_PGID; | ||
289 | break; | ||
290 | |||
291 | default: | ||
292 | return -EINVAL; | ||
293 | } | ||
294 | |||
295 | rcu_read_lock(); | ||
296 | pid = find_vpid(owner.pid); | ||
297 | if (owner.pid && !pid) | ||
298 | ret = -ESRCH; | ||
299 | else | ||
300 | ret = __f_setown(filp, pid, type, 1); | ||
301 | rcu_read_unlock(); | ||
302 | |||
303 | return ret; | ||
304 | } | ||
305 | |||
306 | static int f_getown_ex(struct file *filp, unsigned long arg) | ||
307 | { | ||
308 | struct f_owner_ex * __user owner_p = (void * __user)arg; | ||
309 | struct f_owner_ex owner; | ||
310 | int ret = 0; | ||
311 | |||
312 | read_lock(&filp->f_owner.lock); | ||
313 | owner.pid = pid_vnr(filp->f_owner.pid); | ||
314 | switch (filp->f_owner.pid_type) { | ||
315 | case PIDTYPE_MAX: | ||
316 | owner.type = F_OWNER_TID; | ||
317 | break; | ||
318 | |||
319 | case PIDTYPE_PID: | ||
320 | owner.type = F_OWNER_PID; | ||
321 | break; | ||
322 | |||
323 | case PIDTYPE_PGID: | ||
324 | owner.type = F_OWNER_GID; | ||
325 | break; | ||
326 | |||
327 | default: | ||
328 | WARN_ON(1); | ||
329 | ret = -EINVAL; | ||
330 | break; | ||
331 | } | ||
332 | read_unlock(&filp->f_owner.lock); | ||
333 | |||
334 | if (!ret) | ||
335 | ret = copy_to_user(owner_p, &owner, sizeof(owner)); | ||
336 | return ret; | ||
337 | } | ||
338 | |||
266 | static long do_fcntl(int fd, unsigned int cmd, unsigned long arg, | 339 | static long do_fcntl(int fd, unsigned int cmd, unsigned long arg, |
267 | struct file *filp) | 340 | struct file *filp) |
268 | { | 341 | { |
@@ -313,6 +386,12 @@ static long do_fcntl(int fd, unsigned int cmd, unsigned long arg, | |||
313 | case F_SETOWN: | 386 | case F_SETOWN: |
314 | err = f_setown(filp, arg, 1); | 387 | err = f_setown(filp, arg, 1); |
315 | break; | 388 | break; |
389 | case F_GETOWN_EX: | ||
390 | err = f_getown_ex(filp, arg); | ||
391 | break; | ||
392 | case F_SETOWN_EX: | ||
393 | err = f_setown_ex(filp, arg); | ||
394 | break; | ||
316 | case F_GETSIG: | 395 | case F_GETSIG: |
317 | err = filp->f_owner.signum; | 396 | err = filp->f_owner.signum; |
318 | break; | 397 | break; |
@@ -428,8 +507,7 @@ static inline int sigio_perm(struct task_struct *p, | |||
428 | 507 | ||
429 | static void send_sigio_to_task(struct task_struct *p, | 508 | static void send_sigio_to_task(struct task_struct *p, |
430 | struct fown_struct *fown, | 509 | struct fown_struct *fown, |
431 | int fd, | 510 | int fd, int reason, int group) |
432 | int reason) | ||
433 | { | 511 | { |
434 | /* | 512 | /* |
435 | * F_SETSIG can change ->signum lockless in parallel, make | 513 | * F_SETSIG can change ->signum lockless in parallel, make |
@@ -461,11 +539,11 @@ static void send_sigio_to_task(struct task_struct *p, | |||
461 | else | 539 | else |
462 | si.si_band = band_table[reason - POLL_IN]; | 540 | si.si_band = band_table[reason - POLL_IN]; |
463 | si.si_fd = fd; | 541 | si.si_fd = fd; |
464 | if (!group_send_sig_info(signum, &si, p)) | 542 | if (!do_send_sig_info(signum, &si, p, group)) |
465 | break; | 543 | break; |
466 | /* fall-through: fall back on the old plain SIGIO signal */ | 544 | /* fall-through: fall back on the old plain SIGIO signal */ |
467 | case 0: | 545 | case 0: |
468 | group_send_sig_info(SIGIO, SEND_SIG_PRIV, p); | 546 | do_send_sig_info(SIGIO, SEND_SIG_PRIV, p, group); |
469 | } | 547 | } |
470 | } | 548 | } |
471 | 549 | ||
@@ -474,16 +552,23 @@ void send_sigio(struct fown_struct *fown, int fd, int band) | |||
474 | struct task_struct *p; | 552 | struct task_struct *p; |
475 | enum pid_type type; | 553 | enum pid_type type; |
476 | struct pid *pid; | 554 | struct pid *pid; |
555 | int group = 1; | ||
477 | 556 | ||
478 | read_lock(&fown->lock); | 557 | read_lock(&fown->lock); |
558 | |||
479 | type = fown->pid_type; | 559 | type = fown->pid_type; |
560 | if (type == PIDTYPE_MAX) { | ||
561 | group = 0; | ||
562 | type = PIDTYPE_PID; | ||
563 | } | ||
564 | |||
480 | pid = fown->pid; | 565 | pid = fown->pid; |
481 | if (!pid) | 566 | if (!pid) |
482 | goto out_unlock_fown; | 567 | goto out_unlock_fown; |
483 | 568 | ||
484 | read_lock(&tasklist_lock); | 569 | read_lock(&tasklist_lock); |
485 | do_each_pid_task(pid, type, p) { | 570 | do_each_pid_task(pid, type, p) { |
486 | send_sigio_to_task(p, fown, fd, band); | 571 | send_sigio_to_task(p, fown, fd, band, group); |
487 | } while_each_pid_task(pid, type, p); | 572 | } while_each_pid_task(pid, type, p); |
488 | read_unlock(&tasklist_lock); | 573 | read_unlock(&tasklist_lock); |
489 | out_unlock_fown: | 574 | out_unlock_fown: |
@@ -491,10 +576,10 @@ void send_sigio(struct fown_struct *fown, int fd, int band) | |||
491 | } | 576 | } |
492 | 577 | ||
493 | static void send_sigurg_to_task(struct task_struct *p, | 578 | static void send_sigurg_to_task(struct task_struct *p, |
494 | struct fown_struct *fown) | 579 | struct fown_struct *fown, int group) |
495 | { | 580 | { |
496 | if (sigio_perm(p, fown, SIGURG)) | 581 | if (sigio_perm(p, fown, SIGURG)) |
497 | group_send_sig_info(SIGURG, SEND_SIG_PRIV, p); | 582 | do_send_sig_info(SIGURG, SEND_SIG_PRIV, p, group); |
498 | } | 583 | } |
499 | 584 | ||
500 | int send_sigurg(struct fown_struct *fown) | 585 | int send_sigurg(struct fown_struct *fown) |
@@ -502,10 +587,17 @@ int send_sigurg(struct fown_struct *fown) | |||
502 | struct task_struct *p; | 587 | struct task_struct *p; |
503 | enum pid_type type; | 588 | enum pid_type type; |
504 | struct pid *pid; | 589 | struct pid *pid; |
590 | int group = 1; | ||
505 | int ret = 0; | 591 | int ret = 0; |
506 | 592 | ||
507 | read_lock(&fown->lock); | 593 | read_lock(&fown->lock); |
594 | |||
508 | type = fown->pid_type; | 595 | type = fown->pid_type; |
596 | if (type == PIDTYPE_MAX) { | ||
597 | group = 0; | ||
598 | type = PIDTYPE_PID; | ||
599 | } | ||
600 | |||
509 | pid = fown->pid; | 601 | pid = fown->pid; |
510 | if (!pid) | 602 | if (!pid) |
511 | goto out_unlock_fown; | 603 | goto out_unlock_fown; |
@@ -514,7 +606,7 @@ int send_sigurg(struct fown_struct *fown) | |||
514 | 606 | ||
515 | read_lock(&tasklist_lock); | 607 | read_lock(&tasklist_lock); |
516 | do_each_pid_task(pid, type, p) { | 608 | do_each_pid_task(pid, type, p) { |
517 | send_sigurg_to_task(p, fown); | 609 | send_sigurg_to_task(p, fown, group); |
518 | } while_each_pid_task(pid, type, p); | 610 | } while_each_pid_task(pid, type, p); |
519 | read_unlock(&tasklist_lock); | 611 | read_unlock(&tasklist_lock); |
520 | out_unlock_fown: | 612 | out_unlock_fown: |
diff --git a/fs/file_table.c b/fs/file_table.c index 334ce39881f8..8eb44042e009 100644 --- a/fs/file_table.c +++ b/fs/file_table.c | |||
@@ -74,14 +74,14 @@ EXPORT_SYMBOL_GPL(get_max_files); | |||
74 | * Handle nr_files sysctl | 74 | * Handle nr_files sysctl |
75 | */ | 75 | */ |
76 | #if defined(CONFIG_SYSCTL) && defined(CONFIG_PROC_FS) | 76 | #if defined(CONFIG_SYSCTL) && defined(CONFIG_PROC_FS) |
77 | int proc_nr_files(ctl_table *table, int write, struct file *filp, | 77 | int proc_nr_files(ctl_table *table, int write, |
78 | void __user *buffer, size_t *lenp, loff_t *ppos) | 78 | void __user *buffer, size_t *lenp, loff_t *ppos) |
79 | { | 79 | { |
80 | files_stat.nr_files = get_nr_files(); | 80 | files_stat.nr_files = get_nr_files(); |
81 | return proc_dointvec(table, write, filp, buffer, lenp, ppos); | 81 | return proc_dointvec(table, write, buffer, lenp, ppos); |
82 | } | 82 | } |
83 | #else | 83 | #else |
84 | int proc_nr_files(ctl_table *table, int write, struct file *filp, | 84 | int proc_nr_files(ctl_table *table, int write, |
85 | void __user *buffer, size_t *lenp, loff_t *ppos) | 85 | void __user *buffer, size_t *lenp, loff_t *ppos) |
86 | { | 86 | { |
87 | return -ENOSYS; | 87 | return -ENOSYS; |
diff --git a/fs/fs-writeback.c b/fs/fs-writeback.c index 628235cf44b5..8e1e5e19d21e 100644 --- a/fs/fs-writeback.c +++ b/fs/fs-writeback.c | |||
@@ -35,21 +35,29 @@ | |||
35 | int nr_pdflush_threads; | 35 | int nr_pdflush_threads; |
36 | 36 | ||
37 | /* | 37 | /* |
38 | * Passed into wb_writeback(), essentially a subset of writeback_control | ||
39 | */ | ||
40 | struct wb_writeback_args { | ||
41 | long nr_pages; | ||
42 | struct super_block *sb; | ||
43 | enum writeback_sync_modes sync_mode; | ||
44 | int for_kupdate; | ||
45 | int range_cyclic; | ||
46 | }; | ||
47 | |||
48 | /* | ||
38 | * Work items for the bdi_writeback threads | 49 | * Work items for the bdi_writeback threads |
39 | */ | 50 | */ |
40 | struct bdi_work { | 51 | struct bdi_work { |
41 | struct list_head list; | 52 | struct list_head list; /* pending work list */ |
42 | struct list_head wait_list; | 53 | struct rcu_head rcu_head; /* for RCU free/clear of work */ |
43 | struct rcu_head rcu_head; | ||
44 | 54 | ||
45 | unsigned long seen; | 55 | unsigned long seen; /* threads that have seen this work */ |
46 | atomic_t pending; | 56 | atomic_t pending; /* number of threads still to do work */ |
47 | 57 | ||
48 | struct super_block *sb; | 58 | struct wb_writeback_args args; /* writeback arguments */ |
49 | unsigned long nr_pages; | ||
50 | enum writeback_sync_modes sync_mode; | ||
51 | 59 | ||
52 | unsigned long state; | 60 | unsigned long state; /* flag bits, see WS_* */ |
53 | }; | 61 | }; |
54 | 62 | ||
55 | enum { | 63 | enum { |
@@ -66,22 +74,13 @@ static inline bool bdi_work_on_stack(struct bdi_work *work) | |||
66 | } | 74 | } |
67 | 75 | ||
68 | static inline void bdi_work_init(struct bdi_work *work, | 76 | static inline void bdi_work_init(struct bdi_work *work, |
69 | struct writeback_control *wbc) | 77 | struct wb_writeback_args *args) |
70 | { | 78 | { |
71 | INIT_RCU_HEAD(&work->rcu_head); | 79 | INIT_RCU_HEAD(&work->rcu_head); |
72 | work->sb = wbc->sb; | 80 | work->args = *args; |
73 | work->nr_pages = wbc->nr_to_write; | ||
74 | work->sync_mode = wbc->sync_mode; | ||
75 | work->state = WS_USED; | 81 | work->state = WS_USED; |
76 | } | 82 | } |
77 | 83 | ||
78 | static inline void bdi_work_init_on_stack(struct bdi_work *work, | ||
79 | struct writeback_control *wbc) | ||
80 | { | ||
81 | bdi_work_init(work, wbc); | ||
82 | work->state |= WS_ONSTACK; | ||
83 | } | ||
84 | |||
85 | /** | 84 | /** |
86 | * writeback_in_progress - determine whether there is writeback in progress | 85 | * writeback_in_progress - determine whether there is writeback in progress |
87 | * @bdi: the device's backing_dev_info structure. | 86 | * @bdi: the device's backing_dev_info structure. |
@@ -98,6 +97,11 @@ static void bdi_work_clear(struct bdi_work *work) | |||
98 | { | 97 | { |
99 | clear_bit(WS_USED_B, &work->state); | 98 | clear_bit(WS_USED_B, &work->state); |
100 | smp_mb__after_clear_bit(); | 99 | smp_mb__after_clear_bit(); |
100 | /* | ||
101 | * work can have disappeared at this point. bit waitq functions | ||
102 | * should be able to tolerate this, provided bdi_sched_wait does | ||
103 | * not dereference it's pointer argument. | ||
104 | */ | ||
101 | wake_up_bit(&work->state, WS_USED_B); | 105 | wake_up_bit(&work->state, WS_USED_B); |
102 | } | 106 | } |
103 | 107 | ||
@@ -113,7 +117,8 @@ static void bdi_work_free(struct rcu_head *head) | |||
113 | 117 | ||
114 | static void wb_work_complete(struct bdi_work *work) | 118 | static void wb_work_complete(struct bdi_work *work) |
115 | { | 119 | { |
116 | const enum writeback_sync_modes sync_mode = work->sync_mode; | 120 | const enum writeback_sync_modes sync_mode = work->args.sync_mode; |
121 | int onstack = bdi_work_on_stack(work); | ||
117 | 122 | ||
118 | /* | 123 | /* |
119 | * For allocated work, we can clear the done/seen bit right here. | 124 | * For allocated work, we can clear the done/seen bit right here. |
@@ -121,9 +126,9 @@ static void wb_work_complete(struct bdi_work *work) | |||
121 | * to after the RCU grace period, since the stack could be invalidated | 126 | * to after the RCU grace period, since the stack could be invalidated |
122 | * as soon as bdi_work_clear() has done the wakeup. | 127 | * as soon as bdi_work_clear() has done the wakeup. |
123 | */ | 128 | */ |
124 | if (!bdi_work_on_stack(work)) | 129 | if (!onstack) |
125 | bdi_work_clear(work); | 130 | bdi_work_clear(work); |
126 | if (sync_mode == WB_SYNC_NONE || bdi_work_on_stack(work)) | 131 | if (sync_mode == WB_SYNC_NONE || onstack) |
127 | call_rcu(&work->rcu_head, bdi_work_free); | 132 | call_rcu(&work->rcu_head, bdi_work_free); |
128 | } | 133 | } |
129 | 134 | ||
@@ -146,21 +151,19 @@ static void wb_clear_pending(struct bdi_writeback *wb, struct bdi_work *work) | |||
146 | 151 | ||
147 | static void bdi_queue_work(struct backing_dev_info *bdi, struct bdi_work *work) | 152 | static void bdi_queue_work(struct backing_dev_info *bdi, struct bdi_work *work) |
148 | { | 153 | { |
149 | if (work) { | 154 | work->seen = bdi->wb_mask; |
150 | work->seen = bdi->wb_mask; | 155 | BUG_ON(!work->seen); |
151 | BUG_ON(!work->seen); | 156 | atomic_set(&work->pending, bdi->wb_cnt); |
152 | atomic_set(&work->pending, bdi->wb_cnt); | 157 | BUG_ON(!bdi->wb_cnt); |
153 | BUG_ON(!bdi->wb_cnt); | ||
154 | |||
155 | /* | ||
156 | * Make sure stores are seen before it appears on the list | ||
157 | */ | ||
158 | smp_mb(); | ||
159 | 158 | ||
160 | spin_lock(&bdi->wb_lock); | 159 | /* |
161 | list_add_tail_rcu(&work->list, &bdi->work_list); | 160 | * list_add_tail_rcu() contains the necessary barriers to |
162 | spin_unlock(&bdi->wb_lock); | 161 | * make sure the above stores are seen before the item is |
163 | } | 162 | * noticed on the list |
163 | */ | ||
164 | spin_lock(&bdi->wb_lock); | ||
165 | list_add_tail_rcu(&work->list, &bdi->work_list); | ||
166 | spin_unlock(&bdi->wb_lock); | ||
164 | 167 | ||
165 | /* | 168 | /* |
166 | * If the default thread isn't there, make sure we add it. When | 169 | * If the default thread isn't there, make sure we add it. When |
@@ -171,15 +174,7 @@ static void bdi_queue_work(struct backing_dev_info *bdi, struct bdi_work *work) | |||
171 | else { | 174 | else { |
172 | struct bdi_writeback *wb = &bdi->wb; | 175 | struct bdi_writeback *wb = &bdi->wb; |
173 | 176 | ||
174 | /* | 177 | if (wb->task) |
175 | * If we failed allocating the bdi work item, wake up the wb | ||
176 | * thread always. As a safety precaution, it'll flush out | ||
177 | * everything | ||
178 | */ | ||
179 | if (!wb_has_dirty_io(wb)) { | ||
180 | if (work) | ||
181 | wb_clear_pending(wb, work); | ||
182 | } else if (wb->task) | ||
183 | wake_up_process(wb->task); | 178 | wake_up_process(wb->task); |
184 | } | 179 | } |
185 | } | 180 | } |
@@ -194,48 +189,75 @@ static void bdi_wait_on_work_clear(struct bdi_work *work) | |||
194 | TASK_UNINTERRUPTIBLE); | 189 | TASK_UNINTERRUPTIBLE); |
195 | } | 190 | } |
196 | 191 | ||
197 | static struct bdi_work *bdi_alloc_work(struct writeback_control *wbc) | 192 | static void bdi_alloc_queue_work(struct backing_dev_info *bdi, |
193 | struct wb_writeback_args *args) | ||
198 | { | 194 | { |
199 | struct bdi_work *work; | 195 | struct bdi_work *work; |
200 | 196 | ||
197 | /* | ||
198 | * This is WB_SYNC_NONE writeback, so if allocation fails just | ||
199 | * wakeup the thread for old dirty data writeback | ||
200 | */ | ||
201 | work = kmalloc(sizeof(*work), GFP_ATOMIC); | 201 | work = kmalloc(sizeof(*work), GFP_ATOMIC); |
202 | if (work) | 202 | if (work) { |
203 | bdi_work_init(work, wbc); | 203 | bdi_work_init(work, args); |
204 | bdi_queue_work(bdi, work); | ||
205 | } else { | ||
206 | struct bdi_writeback *wb = &bdi->wb; | ||
204 | 207 | ||
205 | return work; | 208 | if (wb->task) |
209 | wake_up_process(wb->task); | ||
210 | } | ||
206 | } | 211 | } |
207 | 212 | ||
208 | void bdi_start_writeback(struct writeback_control *wbc) | 213 | /** |
214 | * bdi_sync_writeback - start and wait for writeback | ||
215 | * @bdi: the backing device to write from | ||
216 | * @sb: write inodes from this super_block | ||
217 | * | ||
218 | * Description: | ||
219 | * This does WB_SYNC_ALL data integrity writeback and waits for the | ||
220 | * IO to complete. Callers must hold the sb s_umount semaphore for | ||
221 | * reading, to avoid having the super disappear before we are done. | ||
222 | */ | ||
223 | static void bdi_sync_writeback(struct backing_dev_info *bdi, | ||
224 | struct super_block *sb) | ||
209 | { | 225 | { |
210 | const bool must_wait = wbc->sync_mode == WB_SYNC_ALL; | 226 | struct wb_writeback_args args = { |
211 | struct bdi_work work_stack, *work = NULL; | 227 | .sb = sb, |
228 | .sync_mode = WB_SYNC_ALL, | ||
229 | .nr_pages = LONG_MAX, | ||
230 | .range_cyclic = 0, | ||
231 | }; | ||
232 | struct bdi_work work; | ||
212 | 233 | ||
213 | if (!must_wait) | 234 | bdi_work_init(&work, &args); |
214 | work = bdi_alloc_work(wbc); | 235 | work.state |= WS_ONSTACK; |
215 | 236 | ||
216 | if (!work) { | 237 | bdi_queue_work(bdi, &work); |
217 | work = &work_stack; | 238 | bdi_wait_on_work_clear(&work); |
218 | bdi_work_init_on_stack(work, wbc); | 239 | } |
219 | } | ||
220 | 240 | ||
221 | bdi_queue_work(wbc->bdi, work); | 241 | /** |
242 | * bdi_start_writeback - start writeback | ||
243 | * @bdi: the backing device to write from | ||
244 | * @nr_pages: the number of pages to write | ||
245 | * | ||
246 | * Description: | ||
247 | * This does WB_SYNC_NONE opportunistic writeback. The IO is only | ||
248 | * started when this function returns, we make no guarentees on | ||
249 | * completion. Caller need not hold sb s_umount semaphore. | ||
250 | * | ||
251 | */ | ||
252 | void bdi_start_writeback(struct backing_dev_info *bdi, long nr_pages) | ||
253 | { | ||
254 | struct wb_writeback_args args = { | ||
255 | .sync_mode = WB_SYNC_NONE, | ||
256 | .nr_pages = nr_pages, | ||
257 | .range_cyclic = 1, | ||
258 | }; | ||
222 | 259 | ||
223 | /* | 260 | bdi_alloc_queue_work(bdi, &args); |
224 | * If the sync mode is WB_SYNC_ALL, block waiting for the work to | ||
225 | * complete. If not, we only need to wait for the work to be started, | ||
226 | * if we allocated it on-stack. We use the same mechanism, if the | ||
227 | * wait bit is set in the bdi_work struct, then threads will not | ||
228 | * clear pending until after they are done. | ||
229 | * | ||
230 | * Note that work == &work_stack if must_wait is true, so we don't | ||
231 | * need to do call_rcu() here ever, since the completion path will | ||
232 | * have done that for us. | ||
233 | */ | ||
234 | if (must_wait || work == &work_stack) { | ||
235 | bdi_wait_on_work_clear(work); | ||
236 | if (work != &work_stack) | ||
237 | call_rcu(&work->rcu_head, bdi_work_free); | ||
238 | } | ||
239 | } | 261 | } |
240 | 262 | ||
241 | /* | 263 | /* |
@@ -671,17 +693,16 @@ static inline bool over_bground_thresh(void) | |||
671 | * older_than_this takes precedence over nr_to_write. So we'll only write back | 693 | * older_than_this takes precedence over nr_to_write. So we'll only write back |
672 | * all dirty pages if they are all attached to "old" mappings. | 694 | * all dirty pages if they are all attached to "old" mappings. |
673 | */ | 695 | */ |
674 | static long wb_writeback(struct bdi_writeback *wb, long nr_pages, | 696 | static long wb_writeback(struct bdi_writeback *wb, |
675 | struct super_block *sb, | 697 | struct wb_writeback_args *args) |
676 | enum writeback_sync_modes sync_mode, int for_kupdate) | ||
677 | { | 698 | { |
678 | struct writeback_control wbc = { | 699 | struct writeback_control wbc = { |
679 | .bdi = wb->bdi, | 700 | .bdi = wb->bdi, |
680 | .sb = sb, | 701 | .sb = args->sb, |
681 | .sync_mode = sync_mode, | 702 | .sync_mode = args->sync_mode, |
682 | .older_than_this = NULL, | 703 | .older_than_this = NULL, |
683 | .for_kupdate = for_kupdate, | 704 | .for_kupdate = args->for_kupdate, |
684 | .range_cyclic = 1, | 705 | .range_cyclic = args->range_cyclic, |
685 | }; | 706 | }; |
686 | unsigned long oldest_jif; | 707 | unsigned long oldest_jif; |
687 | long wrote = 0; | 708 | long wrote = 0; |
@@ -691,13 +712,18 @@ static long wb_writeback(struct bdi_writeback *wb, long nr_pages, | |||
691 | oldest_jif = jiffies - | 712 | oldest_jif = jiffies - |
692 | msecs_to_jiffies(dirty_expire_interval * 10); | 713 | msecs_to_jiffies(dirty_expire_interval * 10); |
693 | } | 714 | } |
715 | if (!wbc.range_cyclic) { | ||
716 | wbc.range_start = 0; | ||
717 | wbc.range_end = LLONG_MAX; | ||
718 | } | ||
694 | 719 | ||
695 | for (;;) { | 720 | for (;;) { |
696 | /* | 721 | /* |
697 | * Don't flush anything for non-integrity writeback where | 722 | * Don't flush anything for non-integrity writeback where |
698 | * no nr_pages was given | 723 | * no nr_pages was given |
699 | */ | 724 | */ |
700 | if (!for_kupdate && nr_pages <= 0 && sync_mode == WB_SYNC_NONE) | 725 | if (!args->for_kupdate && args->nr_pages <= 0 && |
726 | args->sync_mode == WB_SYNC_NONE) | ||
701 | break; | 727 | break; |
702 | 728 | ||
703 | /* | 729 | /* |
@@ -705,7 +731,8 @@ static long wb_writeback(struct bdi_writeback *wb, long nr_pages, | |||
705 | * periodic background writeout and we are below the | 731 | * periodic background writeout and we are below the |
706 | * background dirty threshold, don't do anything | 732 | * background dirty threshold, don't do anything |
707 | */ | 733 | */ |
708 | if (for_kupdate && nr_pages <= 0 && !over_bground_thresh()) | 734 | if (args->for_kupdate && args->nr_pages <= 0 && |
735 | !over_bground_thresh()) | ||
709 | break; | 736 | break; |
710 | 737 | ||
711 | wbc.more_io = 0; | 738 | wbc.more_io = 0; |
@@ -713,7 +740,7 @@ static long wb_writeback(struct bdi_writeback *wb, long nr_pages, | |||
713 | wbc.nr_to_write = MAX_WRITEBACK_PAGES; | 740 | wbc.nr_to_write = MAX_WRITEBACK_PAGES; |
714 | wbc.pages_skipped = 0; | 741 | wbc.pages_skipped = 0; |
715 | writeback_inodes_wb(wb, &wbc); | 742 | writeback_inodes_wb(wb, &wbc); |
716 | nr_pages -= MAX_WRITEBACK_PAGES - wbc.nr_to_write; | 743 | args->nr_pages -= MAX_WRITEBACK_PAGES - wbc.nr_to_write; |
717 | wrote += MAX_WRITEBACK_PAGES - wbc.nr_to_write; | 744 | wrote += MAX_WRITEBACK_PAGES - wbc.nr_to_write; |
718 | 745 | ||
719 | /* | 746 | /* |
@@ -731,7 +758,11 @@ static long wb_writeback(struct bdi_writeback *wb, long nr_pages, | |||
731 | 758 | ||
732 | /* | 759 | /* |
733 | * Return the next bdi_work struct that hasn't been processed by this | 760 | * Return the next bdi_work struct that hasn't been processed by this |
734 | * wb thread yet | 761 | * wb thread yet. ->seen is initially set for each thread that exists |
762 | * for this device, when a thread first notices a piece of work it | ||
763 | * clears its bit. Depending on writeback type, the thread will notify | ||
764 | * completion on either receiving the work (WB_SYNC_NONE) or after | ||
765 | * it is done (WB_SYNC_ALL). | ||
735 | */ | 766 | */ |
736 | static struct bdi_work *get_next_work_item(struct backing_dev_info *bdi, | 767 | static struct bdi_work *get_next_work_item(struct backing_dev_info *bdi, |
737 | struct bdi_writeback *wb) | 768 | struct bdi_writeback *wb) |
@@ -741,8 +772,9 @@ static struct bdi_work *get_next_work_item(struct backing_dev_info *bdi, | |||
741 | rcu_read_lock(); | 772 | rcu_read_lock(); |
742 | 773 | ||
743 | list_for_each_entry_rcu(work, &bdi->work_list, list) { | 774 | list_for_each_entry_rcu(work, &bdi->work_list, list) { |
744 | if (!test_and_clear_bit(wb->nr, &work->seen)) | 775 | if (!test_bit(wb->nr, &work->seen)) |
745 | continue; | 776 | continue; |
777 | clear_bit(wb->nr, &work->seen); | ||
746 | 778 | ||
747 | ret = work; | 779 | ret = work; |
748 | break; | 780 | break; |
@@ -767,8 +799,16 @@ static long wb_check_old_data_flush(struct bdi_writeback *wb) | |||
767 | global_page_state(NR_UNSTABLE_NFS) + | 799 | global_page_state(NR_UNSTABLE_NFS) + |
768 | (inodes_stat.nr_inodes - inodes_stat.nr_unused); | 800 | (inodes_stat.nr_inodes - inodes_stat.nr_unused); |
769 | 801 | ||
770 | if (nr_pages) | 802 | if (nr_pages) { |
771 | return wb_writeback(wb, nr_pages, NULL, WB_SYNC_NONE, 1); | 803 | struct wb_writeback_args args = { |
804 | .nr_pages = nr_pages, | ||
805 | .sync_mode = WB_SYNC_NONE, | ||
806 | .for_kupdate = 1, | ||
807 | .range_cyclic = 1, | ||
808 | }; | ||
809 | |||
810 | return wb_writeback(wb, &args); | ||
811 | } | ||
772 | 812 | ||
773 | return 0; | 813 | return 0; |
774 | } | 814 | } |
@@ -780,35 +820,31 @@ long wb_do_writeback(struct bdi_writeback *wb, int force_wait) | |||
780 | { | 820 | { |
781 | struct backing_dev_info *bdi = wb->bdi; | 821 | struct backing_dev_info *bdi = wb->bdi; |
782 | struct bdi_work *work; | 822 | struct bdi_work *work; |
783 | long nr_pages, wrote = 0; | 823 | long wrote = 0; |
784 | 824 | ||
785 | while ((work = get_next_work_item(bdi, wb)) != NULL) { | 825 | while ((work = get_next_work_item(bdi, wb)) != NULL) { |
786 | enum writeback_sync_modes sync_mode; | 826 | struct wb_writeback_args args = work->args; |
787 | |||
788 | nr_pages = work->nr_pages; | ||
789 | 827 | ||
790 | /* | 828 | /* |
791 | * Override sync mode, in case we must wait for completion | 829 | * Override sync mode, in case we must wait for completion |
792 | */ | 830 | */ |
793 | if (force_wait) | 831 | if (force_wait) |
794 | work->sync_mode = sync_mode = WB_SYNC_ALL; | 832 | work->args.sync_mode = args.sync_mode = WB_SYNC_ALL; |
795 | else | ||
796 | sync_mode = work->sync_mode; | ||
797 | 833 | ||
798 | /* | 834 | /* |
799 | * If this isn't a data integrity operation, just notify | 835 | * If this isn't a data integrity operation, just notify |
800 | * that we have seen this work and we are now starting it. | 836 | * that we have seen this work and we are now starting it. |
801 | */ | 837 | */ |
802 | if (sync_mode == WB_SYNC_NONE) | 838 | if (args.sync_mode == WB_SYNC_NONE) |
803 | wb_clear_pending(wb, work); | 839 | wb_clear_pending(wb, work); |
804 | 840 | ||
805 | wrote += wb_writeback(wb, nr_pages, work->sb, sync_mode, 0); | 841 | wrote += wb_writeback(wb, &args); |
806 | 842 | ||
807 | /* | 843 | /* |
808 | * This is a data integrity writeback, so only do the | 844 | * This is a data integrity writeback, so only do the |
809 | * notification when we have completed the work. | 845 | * notification when we have completed the work. |
810 | */ | 846 | */ |
811 | if (sync_mode == WB_SYNC_ALL) | 847 | if (args.sync_mode == WB_SYNC_ALL) |
812 | wb_clear_pending(wb, work); | 848 | wb_clear_pending(wb, work); |
813 | } | 849 | } |
814 | 850 | ||
@@ -849,8 +885,7 @@ int bdi_writeback_task(struct bdi_writeback *wb) | |||
849 | } | 885 | } |
850 | 886 | ||
851 | wait_jiffies = msecs_to_jiffies(dirty_writeback_interval * 10); | 887 | wait_jiffies = msecs_to_jiffies(dirty_writeback_interval * 10); |
852 | set_current_state(TASK_INTERRUPTIBLE); | 888 | schedule_timeout_interruptible(wait_jiffies); |
853 | schedule_timeout(wait_jiffies); | ||
854 | try_to_freeze(); | 889 | try_to_freeze(); |
855 | } | 890 | } |
856 | 891 | ||
@@ -858,67 +893,28 @@ int bdi_writeback_task(struct bdi_writeback *wb) | |||
858 | } | 893 | } |
859 | 894 | ||
860 | /* | 895 | /* |
861 | * Schedule writeback for all backing devices. Expensive! If this is a data | 896 | * Schedule writeback for all backing devices. This does WB_SYNC_NONE |
862 | * integrity operation, writeback will be complete when this returns. If | 897 | * writeback, for integrity writeback see bdi_sync_writeback(). |
863 | * we are simply called for WB_SYNC_NONE, then writeback will merely be | ||
864 | * scheduled to run. | ||
865 | */ | 898 | */ |
866 | static void bdi_writeback_all(struct writeback_control *wbc) | 899 | static void bdi_writeback_all(struct super_block *sb, long nr_pages) |
867 | { | 900 | { |
868 | const bool must_wait = wbc->sync_mode == WB_SYNC_ALL; | 901 | struct wb_writeback_args args = { |
902 | .sb = sb, | ||
903 | .nr_pages = nr_pages, | ||
904 | .sync_mode = WB_SYNC_NONE, | ||
905 | }; | ||
869 | struct backing_dev_info *bdi; | 906 | struct backing_dev_info *bdi; |
870 | struct bdi_work *work; | ||
871 | LIST_HEAD(list); | ||
872 | |||
873 | restart: | ||
874 | spin_lock(&bdi_lock); | ||
875 | 907 | ||
876 | list_for_each_entry(bdi, &bdi_list, bdi_list) { | 908 | rcu_read_lock(); |
877 | struct bdi_work *work; | ||
878 | 909 | ||
910 | list_for_each_entry_rcu(bdi, &bdi_list, bdi_list) { | ||
879 | if (!bdi_has_dirty_io(bdi)) | 911 | if (!bdi_has_dirty_io(bdi)) |
880 | continue; | 912 | continue; |
881 | 913 | ||
882 | /* | 914 | bdi_alloc_queue_work(bdi, &args); |
883 | * If work allocation fails, do the writes inline. We drop | ||
884 | * the lock and restart the list writeout. This should be OK, | ||
885 | * since this happens rarely and because the writeout should | ||
886 | * eventually make more free memory available. | ||
887 | */ | ||
888 | work = bdi_alloc_work(wbc); | ||
889 | if (!work) { | ||
890 | struct writeback_control __wbc; | ||
891 | |||
892 | /* | ||
893 | * Not a data integrity writeout, just continue | ||
894 | */ | ||
895 | if (!must_wait) | ||
896 | continue; | ||
897 | |||
898 | spin_unlock(&bdi_lock); | ||
899 | __wbc = *wbc; | ||
900 | __wbc.bdi = bdi; | ||
901 | writeback_inodes_wbc(&__wbc); | ||
902 | goto restart; | ||
903 | } | ||
904 | if (must_wait) | ||
905 | list_add_tail(&work->wait_list, &list); | ||
906 | |||
907 | bdi_queue_work(bdi, work); | ||
908 | } | 915 | } |
909 | 916 | ||
910 | spin_unlock(&bdi_lock); | 917 | rcu_read_unlock(); |
911 | |||
912 | /* | ||
913 | * If this is for WB_SYNC_ALL, wait for pending work to complete | ||
914 | * before returning. | ||
915 | */ | ||
916 | while (!list_empty(&list)) { | ||
917 | work = list_entry(list.next, struct bdi_work, wait_list); | ||
918 | list_del(&work->wait_list); | ||
919 | bdi_wait_on_work_clear(work); | ||
920 | call_rcu(&work->rcu_head, bdi_work_free); | ||
921 | } | ||
922 | } | 918 | } |
923 | 919 | ||
924 | /* | 920 | /* |
@@ -927,17 +923,10 @@ restart: | |||
927 | */ | 923 | */ |
928 | void wakeup_flusher_threads(long nr_pages) | 924 | void wakeup_flusher_threads(long nr_pages) |
929 | { | 925 | { |
930 | struct writeback_control wbc = { | ||
931 | .sync_mode = WB_SYNC_NONE, | ||
932 | .older_than_this = NULL, | ||
933 | .range_cyclic = 1, | ||
934 | }; | ||
935 | |||
936 | if (nr_pages == 0) | 926 | if (nr_pages == 0) |
937 | nr_pages = global_page_state(NR_FILE_DIRTY) + | 927 | nr_pages = global_page_state(NR_FILE_DIRTY) + |
938 | global_page_state(NR_UNSTABLE_NFS); | 928 | global_page_state(NR_UNSTABLE_NFS); |
939 | wbc.nr_to_write = nr_pages; | 929 | bdi_writeback_all(NULL, nr_pages); |
940 | bdi_writeback_all(&wbc); | ||
941 | } | 930 | } |
942 | 931 | ||
943 | static noinline void block_dump___mark_inode_dirty(struct inode *inode) | 932 | static noinline void block_dump___mark_inode_dirty(struct inode *inode) |
@@ -1084,7 +1073,7 @@ EXPORT_SYMBOL(__mark_inode_dirty); | |||
1084 | * on the writer throttling path, and we get decent balancing between many | 1073 | * on the writer throttling path, and we get decent balancing between many |
1085 | * throttled threads: we don't want them all piling up on inode_sync_wait. | 1074 | * throttled threads: we don't want them all piling up on inode_sync_wait. |
1086 | */ | 1075 | */ |
1087 | static void wait_sb_inodes(struct writeback_control *wbc) | 1076 | static void wait_sb_inodes(struct super_block *sb) |
1088 | { | 1077 | { |
1089 | struct inode *inode, *old_inode = NULL; | 1078 | struct inode *inode, *old_inode = NULL; |
1090 | 1079 | ||
@@ -1092,7 +1081,7 @@ static void wait_sb_inodes(struct writeback_control *wbc) | |||
1092 | * We need to be protected against the filesystem going from | 1081 | * We need to be protected against the filesystem going from |
1093 | * r/o to r/w or vice versa. | 1082 | * r/o to r/w or vice versa. |
1094 | */ | 1083 | */ |
1095 | WARN_ON(!rwsem_is_locked(&wbc->sb->s_umount)); | 1084 | WARN_ON(!rwsem_is_locked(&sb->s_umount)); |
1096 | 1085 | ||
1097 | spin_lock(&inode_lock); | 1086 | spin_lock(&inode_lock); |
1098 | 1087 | ||
@@ -1103,7 +1092,7 @@ static void wait_sb_inodes(struct writeback_control *wbc) | |||
1103 | * In which case, the inode may not be on the dirty list, but | 1092 | * In which case, the inode may not be on the dirty list, but |
1104 | * we still have to wait for that writeout. | 1093 | * we still have to wait for that writeout. |
1105 | */ | 1094 | */ |
1106 | list_for_each_entry(inode, &wbc->sb->s_inodes, i_sb_list) { | 1095 | list_for_each_entry(inode, &sb->s_inodes, i_sb_list) { |
1107 | struct address_space *mapping; | 1096 | struct address_space *mapping; |
1108 | 1097 | ||
1109 | if (inode->i_state & (I_FREEING|I_CLEAR|I_WILL_FREE|I_NEW)) | 1098 | if (inode->i_state & (I_FREEING|I_CLEAR|I_WILL_FREE|I_NEW)) |
@@ -1143,14 +1132,8 @@ static void wait_sb_inodes(struct writeback_control *wbc) | |||
1143 | * for IO completion of submitted IO. The number of pages submitted is | 1132 | * for IO completion of submitted IO. The number of pages submitted is |
1144 | * returned. | 1133 | * returned. |
1145 | */ | 1134 | */ |
1146 | long writeback_inodes_sb(struct super_block *sb) | 1135 | void writeback_inodes_sb(struct super_block *sb) |
1147 | { | 1136 | { |
1148 | struct writeback_control wbc = { | ||
1149 | .sb = sb, | ||
1150 | .sync_mode = WB_SYNC_NONE, | ||
1151 | .range_start = 0, | ||
1152 | .range_end = LLONG_MAX, | ||
1153 | }; | ||
1154 | unsigned long nr_dirty = global_page_state(NR_FILE_DIRTY); | 1137 | unsigned long nr_dirty = global_page_state(NR_FILE_DIRTY); |
1155 | unsigned long nr_unstable = global_page_state(NR_UNSTABLE_NFS); | 1138 | unsigned long nr_unstable = global_page_state(NR_UNSTABLE_NFS); |
1156 | long nr_to_write; | 1139 | long nr_to_write; |
@@ -1158,9 +1141,7 @@ long writeback_inodes_sb(struct super_block *sb) | |||
1158 | nr_to_write = nr_dirty + nr_unstable + | 1141 | nr_to_write = nr_dirty + nr_unstable + |
1159 | (inodes_stat.nr_inodes - inodes_stat.nr_unused); | 1142 | (inodes_stat.nr_inodes - inodes_stat.nr_unused); |
1160 | 1143 | ||
1161 | wbc.nr_to_write = nr_to_write; | 1144 | bdi_writeback_all(sb, nr_to_write); |
1162 | bdi_writeback_all(&wbc); | ||
1163 | return nr_to_write - wbc.nr_to_write; | ||
1164 | } | 1145 | } |
1165 | EXPORT_SYMBOL(writeback_inodes_sb); | 1146 | EXPORT_SYMBOL(writeback_inodes_sb); |
1166 | 1147 | ||
@@ -1171,20 +1152,10 @@ EXPORT_SYMBOL(writeback_inodes_sb); | |||
1171 | * This function writes and waits on any dirty inode belonging to this | 1152 | * This function writes and waits on any dirty inode belonging to this |
1172 | * super_block. The number of pages synced is returned. | 1153 | * super_block. The number of pages synced is returned. |
1173 | */ | 1154 | */ |
1174 | long sync_inodes_sb(struct super_block *sb) | 1155 | void sync_inodes_sb(struct super_block *sb) |
1175 | { | 1156 | { |
1176 | struct writeback_control wbc = { | 1157 | bdi_sync_writeback(sb->s_bdi, sb); |
1177 | .sb = sb, | 1158 | wait_sb_inodes(sb); |
1178 | .sync_mode = WB_SYNC_ALL, | ||
1179 | .range_start = 0, | ||
1180 | .range_end = LLONG_MAX, | ||
1181 | }; | ||
1182 | long nr_to_write = LONG_MAX; /* doesn't actually matter */ | ||
1183 | |||
1184 | wbc.nr_to_write = nr_to_write; | ||
1185 | bdi_writeback_all(&wbc); | ||
1186 | wait_sb_inodes(&wbc); | ||
1187 | return nr_to_write - wbc.nr_to_write; | ||
1188 | } | 1159 | } |
1189 | EXPORT_SYMBOL(sync_inodes_sb); | 1160 | EXPORT_SYMBOL(sync_inodes_sb); |
1190 | 1161 | ||
diff --git a/fs/fuse/control.c b/fs/fuse/control.c index 99c99dfb0373..3773fd63d2f9 100644 --- a/fs/fuse/control.c +++ b/fs/fuse/control.c | |||
@@ -61,6 +61,121 @@ static ssize_t fuse_conn_waiting_read(struct file *file, char __user *buf, | |||
61 | return simple_read_from_buffer(buf, len, ppos, tmp, size); | 61 | return simple_read_from_buffer(buf, len, ppos, tmp, size); |
62 | } | 62 | } |
63 | 63 | ||
64 | static ssize_t fuse_conn_limit_read(struct file *file, char __user *buf, | ||
65 | size_t len, loff_t *ppos, unsigned val) | ||
66 | { | ||
67 | char tmp[32]; | ||
68 | size_t size = sprintf(tmp, "%u\n", val); | ||
69 | |||
70 | return simple_read_from_buffer(buf, len, ppos, tmp, size); | ||
71 | } | ||
72 | |||
73 | static ssize_t fuse_conn_limit_write(struct file *file, const char __user *buf, | ||
74 | size_t count, loff_t *ppos, unsigned *val, | ||
75 | unsigned global_limit) | ||
76 | { | ||
77 | unsigned long t; | ||
78 | char tmp[32]; | ||
79 | unsigned limit = (1 << 16) - 1; | ||
80 | int err; | ||
81 | |||
82 | if (*ppos || count >= sizeof(tmp) - 1) | ||
83 | return -EINVAL; | ||
84 | |||
85 | if (copy_from_user(tmp, buf, count)) | ||
86 | return -EINVAL; | ||
87 | |||
88 | tmp[count] = '\0'; | ||
89 | |||
90 | err = strict_strtoul(tmp, 0, &t); | ||
91 | if (err) | ||
92 | return err; | ||
93 | |||
94 | if (!capable(CAP_SYS_ADMIN)) | ||
95 | limit = min(limit, global_limit); | ||
96 | |||
97 | if (t > limit) | ||
98 | return -EINVAL; | ||
99 | |||
100 | *val = t; | ||
101 | |||
102 | return count; | ||
103 | } | ||
104 | |||
105 | static ssize_t fuse_conn_max_background_read(struct file *file, | ||
106 | char __user *buf, size_t len, | ||
107 | loff_t *ppos) | ||
108 | { | ||
109 | struct fuse_conn *fc; | ||
110 | unsigned val; | ||
111 | |||
112 | fc = fuse_ctl_file_conn_get(file); | ||
113 | if (!fc) | ||
114 | return 0; | ||
115 | |||
116 | val = fc->max_background; | ||
117 | fuse_conn_put(fc); | ||
118 | |||
119 | return fuse_conn_limit_read(file, buf, len, ppos, val); | ||
120 | } | ||
121 | |||
122 | static ssize_t fuse_conn_max_background_write(struct file *file, | ||
123 | const char __user *buf, | ||
124 | size_t count, loff_t *ppos) | ||
125 | { | ||
126 | unsigned val; | ||
127 | ssize_t ret; | ||
128 | |||
129 | ret = fuse_conn_limit_write(file, buf, count, ppos, &val, | ||
130 | max_user_bgreq); | ||
131 | if (ret > 0) { | ||
132 | struct fuse_conn *fc = fuse_ctl_file_conn_get(file); | ||
133 | if (fc) { | ||
134 | fc->max_background = val; | ||
135 | fuse_conn_put(fc); | ||
136 | } | ||
137 | } | ||
138 | |||
139 | return ret; | ||
140 | } | ||
141 | |||
142 | static ssize_t fuse_conn_congestion_threshold_read(struct file *file, | ||
143 | char __user *buf, size_t len, | ||
144 | loff_t *ppos) | ||
145 | { | ||
146 | struct fuse_conn *fc; | ||
147 | unsigned val; | ||
148 | |||
149 | fc = fuse_ctl_file_conn_get(file); | ||
150 | if (!fc) | ||
151 | return 0; | ||
152 | |||
153 | val = fc->congestion_threshold; | ||
154 | fuse_conn_put(fc); | ||
155 | |||
156 | return fuse_conn_limit_read(file, buf, len, ppos, val); | ||
157 | } | ||
158 | |||
159 | static ssize_t fuse_conn_congestion_threshold_write(struct file *file, | ||
160 | const char __user *buf, | ||
161 | size_t count, loff_t *ppos) | ||
162 | { | ||
163 | unsigned val; | ||
164 | ssize_t ret; | ||
165 | |||
166 | ret = fuse_conn_limit_write(file, buf, count, ppos, &val, | ||
167 | max_user_congthresh); | ||
168 | if (ret > 0) { | ||
169 | struct fuse_conn *fc = fuse_ctl_file_conn_get(file); | ||
170 | if (fc) { | ||
171 | fc->congestion_threshold = val; | ||
172 | fuse_conn_put(fc); | ||
173 | } | ||
174 | } | ||
175 | |||
176 | return ret; | ||
177 | } | ||
178 | |||
64 | static const struct file_operations fuse_ctl_abort_ops = { | 179 | static const struct file_operations fuse_ctl_abort_ops = { |
65 | .open = nonseekable_open, | 180 | .open = nonseekable_open, |
66 | .write = fuse_conn_abort_write, | 181 | .write = fuse_conn_abort_write, |
@@ -71,6 +186,18 @@ static const struct file_operations fuse_ctl_waiting_ops = { | |||
71 | .read = fuse_conn_waiting_read, | 186 | .read = fuse_conn_waiting_read, |
72 | }; | 187 | }; |
73 | 188 | ||
189 | static const struct file_operations fuse_conn_max_background_ops = { | ||
190 | .open = nonseekable_open, | ||
191 | .read = fuse_conn_max_background_read, | ||
192 | .write = fuse_conn_max_background_write, | ||
193 | }; | ||
194 | |||
195 | static const struct file_operations fuse_conn_congestion_threshold_ops = { | ||
196 | .open = nonseekable_open, | ||
197 | .read = fuse_conn_congestion_threshold_read, | ||
198 | .write = fuse_conn_congestion_threshold_write, | ||
199 | }; | ||
200 | |||
74 | static struct dentry *fuse_ctl_add_dentry(struct dentry *parent, | 201 | static struct dentry *fuse_ctl_add_dentry(struct dentry *parent, |
75 | struct fuse_conn *fc, | 202 | struct fuse_conn *fc, |
76 | const char *name, | 203 | const char *name, |
@@ -127,9 +254,14 @@ int fuse_ctl_add_conn(struct fuse_conn *fc) | |||
127 | goto err; | 254 | goto err; |
128 | 255 | ||
129 | if (!fuse_ctl_add_dentry(parent, fc, "waiting", S_IFREG | 0400, 1, | 256 | if (!fuse_ctl_add_dentry(parent, fc, "waiting", S_IFREG | 0400, 1, |
130 | NULL, &fuse_ctl_waiting_ops) || | 257 | NULL, &fuse_ctl_waiting_ops) || |
131 | !fuse_ctl_add_dentry(parent, fc, "abort", S_IFREG | 0200, 1, | 258 | !fuse_ctl_add_dentry(parent, fc, "abort", S_IFREG | 0200, 1, |
132 | NULL, &fuse_ctl_abort_ops)) | 259 | NULL, &fuse_ctl_abort_ops) || |
260 | !fuse_ctl_add_dentry(parent, fc, "max_background", S_IFREG | 0600, | ||
261 | 1, NULL, &fuse_conn_max_background_ops) || | ||
262 | !fuse_ctl_add_dentry(parent, fc, "congestion_threshold", | ||
263 | S_IFREG | 0600, 1, NULL, | ||
264 | &fuse_conn_congestion_threshold_ops)) | ||
133 | goto err; | 265 | goto err; |
134 | 266 | ||
135 | return 0; | 267 | return 0; |
@@ -156,7 +288,7 @@ void fuse_ctl_remove_conn(struct fuse_conn *fc) | |||
156 | d_drop(dentry); | 288 | d_drop(dentry); |
157 | dput(dentry); | 289 | dput(dentry); |
158 | } | 290 | } |
159 | fuse_control_sb->s_root->d_inode->i_nlink--; | 291 | drop_nlink(fuse_control_sb->s_root->d_inode); |
160 | } | 292 | } |
161 | 293 | ||
162 | static int fuse_ctl_fill_super(struct super_block *sb, void *data, int silent) | 294 | static int fuse_ctl_fill_super(struct super_block *sb, void *data, int silent) |
diff --git a/fs/fuse/dev.c b/fs/fuse/dev.c index 6484eb75acd6..51d9e33d634f 100644 --- a/fs/fuse/dev.c +++ b/fs/fuse/dev.c | |||
@@ -250,7 +250,7 @@ static void queue_request(struct fuse_conn *fc, struct fuse_req *req) | |||
250 | 250 | ||
251 | static void flush_bg_queue(struct fuse_conn *fc) | 251 | static void flush_bg_queue(struct fuse_conn *fc) |
252 | { | 252 | { |
253 | while (fc->active_background < FUSE_MAX_BACKGROUND && | 253 | while (fc->active_background < fc->max_background && |
254 | !list_empty(&fc->bg_queue)) { | 254 | !list_empty(&fc->bg_queue)) { |
255 | struct fuse_req *req; | 255 | struct fuse_req *req; |
256 | 256 | ||
@@ -280,11 +280,11 @@ __releases(&fc->lock) | |||
280 | list_del(&req->intr_entry); | 280 | list_del(&req->intr_entry); |
281 | req->state = FUSE_REQ_FINISHED; | 281 | req->state = FUSE_REQ_FINISHED; |
282 | if (req->background) { | 282 | if (req->background) { |
283 | if (fc->num_background == FUSE_MAX_BACKGROUND) { | 283 | if (fc->num_background == fc->max_background) { |
284 | fc->blocked = 0; | 284 | fc->blocked = 0; |
285 | wake_up_all(&fc->blocked_waitq); | 285 | wake_up_all(&fc->blocked_waitq); |
286 | } | 286 | } |
287 | if (fc->num_background == FUSE_CONGESTION_THRESHOLD && | 287 | if (fc->num_background == fc->congestion_threshold && |
288 | fc->connected && fc->bdi_initialized) { | 288 | fc->connected && fc->bdi_initialized) { |
289 | clear_bdi_congested(&fc->bdi, BLK_RW_SYNC); | 289 | clear_bdi_congested(&fc->bdi, BLK_RW_SYNC); |
290 | clear_bdi_congested(&fc->bdi, BLK_RW_ASYNC); | 290 | clear_bdi_congested(&fc->bdi, BLK_RW_ASYNC); |
@@ -410,9 +410,9 @@ static void fuse_request_send_nowait_locked(struct fuse_conn *fc, | |||
410 | { | 410 | { |
411 | req->background = 1; | 411 | req->background = 1; |
412 | fc->num_background++; | 412 | fc->num_background++; |
413 | if (fc->num_background == FUSE_MAX_BACKGROUND) | 413 | if (fc->num_background == fc->max_background) |
414 | fc->blocked = 1; | 414 | fc->blocked = 1; |
415 | if (fc->num_background == FUSE_CONGESTION_THRESHOLD && | 415 | if (fc->num_background == fc->congestion_threshold && |
416 | fc->bdi_initialized) { | 416 | fc->bdi_initialized) { |
417 | set_bdi_congested(&fc->bdi, BLK_RW_SYNC); | 417 | set_bdi_congested(&fc->bdi, BLK_RW_SYNC); |
418 | set_bdi_congested(&fc->bdi, BLK_RW_ASYNC); | 418 | set_bdi_congested(&fc->bdi, BLK_RW_ASYNC); |
diff --git a/fs/fuse/fuse_i.h b/fs/fuse/fuse_i.h index 52b641fc0faf..fc9c79feb5f7 100644 --- a/fs/fuse/fuse_i.h +++ b/fs/fuse/fuse_i.h | |||
@@ -25,12 +25,6 @@ | |||
25 | /** Max number of pages that can be used in a single read request */ | 25 | /** Max number of pages that can be used in a single read request */ |
26 | #define FUSE_MAX_PAGES_PER_REQ 32 | 26 | #define FUSE_MAX_PAGES_PER_REQ 32 |
27 | 27 | ||
28 | /** Maximum number of outstanding background requests */ | ||
29 | #define FUSE_MAX_BACKGROUND 12 | ||
30 | |||
31 | /** Congestion starts at 75% of maximum */ | ||
32 | #define FUSE_CONGESTION_THRESHOLD (FUSE_MAX_BACKGROUND * 75 / 100) | ||
33 | |||
34 | /** Bias for fi->writectr, meaning new writepages must not be sent */ | 28 | /** Bias for fi->writectr, meaning new writepages must not be sent */ |
35 | #define FUSE_NOWRITE INT_MIN | 29 | #define FUSE_NOWRITE INT_MIN |
36 | 30 | ||
@@ -38,7 +32,7 @@ | |||
38 | #define FUSE_NAME_MAX 1024 | 32 | #define FUSE_NAME_MAX 1024 |
39 | 33 | ||
40 | /** Number of dentries for each connection in the control filesystem */ | 34 | /** Number of dentries for each connection in the control filesystem */ |
41 | #define FUSE_CTL_NUM_DENTRIES 3 | 35 | #define FUSE_CTL_NUM_DENTRIES 5 |
42 | 36 | ||
43 | /** If the FUSE_DEFAULT_PERMISSIONS flag is given, the filesystem | 37 | /** If the FUSE_DEFAULT_PERMISSIONS flag is given, the filesystem |
44 | module will check permissions based on the file mode. Otherwise no | 38 | module will check permissions based on the file mode. Otherwise no |
@@ -55,6 +49,10 @@ extern struct list_head fuse_conn_list; | |||
55 | /** Global mutex protecting fuse_conn_list and the control filesystem */ | 49 | /** Global mutex protecting fuse_conn_list and the control filesystem */ |
56 | extern struct mutex fuse_mutex; | 50 | extern struct mutex fuse_mutex; |
57 | 51 | ||
52 | /** Module parameters */ | ||
53 | extern unsigned max_user_bgreq; | ||
54 | extern unsigned max_user_congthresh; | ||
55 | |||
58 | /** FUSE inode */ | 56 | /** FUSE inode */ |
59 | struct fuse_inode { | 57 | struct fuse_inode { |
60 | /** Inode data */ | 58 | /** Inode data */ |
@@ -349,6 +347,12 @@ struct fuse_conn { | |||
349 | /** rbtree of fuse_files waiting for poll events indexed by ph */ | 347 | /** rbtree of fuse_files waiting for poll events indexed by ph */ |
350 | struct rb_root polled_files; | 348 | struct rb_root polled_files; |
351 | 349 | ||
350 | /** Maximum number of outstanding background requests */ | ||
351 | unsigned max_background; | ||
352 | |||
353 | /** Number of background requests at which congestion starts */ | ||
354 | unsigned congestion_threshold; | ||
355 | |||
352 | /** Number of requests currently in the background */ | 356 | /** Number of requests currently in the background */ |
353 | unsigned num_background; | 357 | unsigned num_background; |
354 | 358 | ||
diff --git a/fs/fuse/inode.c b/fs/fuse/inode.c index 4567db6f9430..6da947daabda 100644 --- a/fs/fuse/inode.c +++ b/fs/fuse/inode.c | |||
@@ -14,6 +14,7 @@ | |||
14 | #include <linux/seq_file.h> | 14 | #include <linux/seq_file.h> |
15 | #include <linux/init.h> | 15 | #include <linux/init.h> |
16 | #include <linux/module.h> | 16 | #include <linux/module.h> |
17 | #include <linux/moduleparam.h> | ||
17 | #include <linux/parser.h> | 18 | #include <linux/parser.h> |
18 | #include <linux/statfs.h> | 19 | #include <linux/statfs.h> |
19 | #include <linux/random.h> | 20 | #include <linux/random.h> |
@@ -28,10 +29,34 @@ static struct kmem_cache *fuse_inode_cachep; | |||
28 | struct list_head fuse_conn_list; | 29 | struct list_head fuse_conn_list; |
29 | DEFINE_MUTEX(fuse_mutex); | 30 | DEFINE_MUTEX(fuse_mutex); |
30 | 31 | ||
32 | static int set_global_limit(const char *val, struct kernel_param *kp); | ||
33 | |||
34 | unsigned max_user_bgreq; | ||
35 | module_param_call(max_user_bgreq, set_global_limit, param_get_uint, | ||
36 | &max_user_bgreq, 0644); | ||
37 | __MODULE_PARM_TYPE(max_user_bgreq, "uint"); | ||
38 | MODULE_PARM_DESC(max_user_bgreq, | ||
39 | "Global limit for the maximum number of backgrounded requests an " | ||
40 | "unprivileged user can set"); | ||
41 | |||
42 | unsigned max_user_congthresh; | ||
43 | module_param_call(max_user_congthresh, set_global_limit, param_get_uint, | ||
44 | &max_user_congthresh, 0644); | ||
45 | __MODULE_PARM_TYPE(max_user_congthresh, "uint"); | ||
46 | MODULE_PARM_DESC(max_user_congthresh, | ||
47 | "Global limit for the maximum congestion threshold an " | ||
48 | "unprivileged user can set"); | ||
49 | |||
31 | #define FUSE_SUPER_MAGIC 0x65735546 | 50 | #define FUSE_SUPER_MAGIC 0x65735546 |
32 | 51 | ||
33 | #define FUSE_DEFAULT_BLKSIZE 512 | 52 | #define FUSE_DEFAULT_BLKSIZE 512 |
34 | 53 | ||
54 | /** Maximum number of outstanding background requests */ | ||
55 | #define FUSE_DEFAULT_MAX_BACKGROUND 12 | ||
56 | |||
57 | /** Congestion starts at 75% of maximum */ | ||
58 | #define FUSE_DEFAULT_CONGESTION_THRESHOLD (FUSE_DEFAULT_MAX_BACKGROUND * 3 / 4) | ||
59 | |||
35 | struct fuse_mount_data { | 60 | struct fuse_mount_data { |
36 | int fd; | 61 | int fd; |
37 | unsigned rootmode; | 62 | unsigned rootmode; |
@@ -517,6 +542,8 @@ void fuse_conn_init(struct fuse_conn *fc) | |||
517 | INIT_LIST_HEAD(&fc->bg_queue); | 542 | INIT_LIST_HEAD(&fc->bg_queue); |
518 | INIT_LIST_HEAD(&fc->entry); | 543 | INIT_LIST_HEAD(&fc->entry); |
519 | atomic_set(&fc->num_waiting, 0); | 544 | atomic_set(&fc->num_waiting, 0); |
545 | fc->max_background = FUSE_DEFAULT_MAX_BACKGROUND; | ||
546 | fc->congestion_threshold = FUSE_DEFAULT_CONGESTION_THRESHOLD; | ||
520 | fc->khctr = 0; | 547 | fc->khctr = 0; |
521 | fc->polled_files = RB_ROOT; | 548 | fc->polled_files = RB_ROOT; |
522 | fc->reqctr = 0; | 549 | fc->reqctr = 0; |
@@ -727,6 +754,54 @@ static const struct super_operations fuse_super_operations = { | |||
727 | .show_options = fuse_show_options, | 754 | .show_options = fuse_show_options, |
728 | }; | 755 | }; |
729 | 756 | ||
757 | static void sanitize_global_limit(unsigned *limit) | ||
758 | { | ||
759 | if (*limit == 0) | ||
760 | *limit = ((num_physpages << PAGE_SHIFT) >> 13) / | ||
761 | sizeof(struct fuse_req); | ||
762 | |||
763 | if (*limit >= 1 << 16) | ||
764 | *limit = (1 << 16) - 1; | ||
765 | } | ||
766 | |||
767 | static int set_global_limit(const char *val, struct kernel_param *kp) | ||
768 | { | ||
769 | int rv; | ||
770 | |||
771 | rv = param_set_uint(val, kp); | ||
772 | if (rv) | ||
773 | return rv; | ||
774 | |||
775 | sanitize_global_limit((unsigned *)kp->arg); | ||
776 | |||
777 | return 0; | ||
778 | } | ||
779 | |||
780 | static void process_init_limits(struct fuse_conn *fc, struct fuse_init_out *arg) | ||
781 | { | ||
782 | int cap_sys_admin = capable(CAP_SYS_ADMIN); | ||
783 | |||
784 | if (arg->minor < 13) | ||
785 | return; | ||
786 | |||
787 | sanitize_global_limit(&max_user_bgreq); | ||
788 | sanitize_global_limit(&max_user_congthresh); | ||
789 | |||
790 | if (arg->max_background) { | ||
791 | fc->max_background = arg->max_background; | ||
792 | |||
793 | if (!cap_sys_admin && fc->max_background > max_user_bgreq) | ||
794 | fc->max_background = max_user_bgreq; | ||
795 | } | ||
796 | if (arg->congestion_threshold) { | ||
797 | fc->congestion_threshold = arg->congestion_threshold; | ||
798 | |||
799 | if (!cap_sys_admin && | ||
800 | fc->congestion_threshold > max_user_congthresh) | ||
801 | fc->congestion_threshold = max_user_congthresh; | ||
802 | } | ||
803 | } | ||
804 | |||
730 | static void process_init_reply(struct fuse_conn *fc, struct fuse_req *req) | 805 | static void process_init_reply(struct fuse_conn *fc, struct fuse_req *req) |
731 | { | 806 | { |
732 | struct fuse_init_out *arg = &req->misc.init_out; | 807 | struct fuse_init_out *arg = &req->misc.init_out; |
@@ -736,6 +811,8 @@ static void process_init_reply(struct fuse_conn *fc, struct fuse_req *req) | |||
736 | else { | 811 | else { |
737 | unsigned long ra_pages; | 812 | unsigned long ra_pages; |
738 | 813 | ||
814 | process_init_limits(fc, arg); | ||
815 | |||
739 | if (arg->minor >= 6) { | 816 | if (arg->minor >= 6) { |
740 | ra_pages = arg->max_readahead / PAGE_CACHE_SIZE; | 817 | ra_pages = arg->max_readahead / PAGE_CACHE_SIZE; |
741 | if (arg->flags & FUSE_ASYNC_READ) | 818 | if (arg->flags & FUSE_ASYNC_READ) |
@@ -894,6 +971,8 @@ static int fuse_fill_super(struct super_block *sb, void *data, int silent) | |||
894 | if (err) | 971 | if (err) |
895 | goto err_put_conn; | 972 | goto err_put_conn; |
896 | 973 | ||
974 | sb->s_bdi = &fc->bdi; | ||
975 | |||
897 | /* Handle umasking inside the fuse code */ | 976 | /* Handle umasking inside the fuse code */ |
898 | if (sb->s_flags & MS_POSIXACL) | 977 | if (sb->s_flags & MS_POSIXACL) |
899 | fc->dont_mask = 1; | 978 | fc->dont_mask = 1; |
@@ -1148,6 +1227,9 @@ static int __init fuse_init(void) | |||
1148 | if (res) | 1227 | if (res) |
1149 | goto err_sysfs_cleanup; | 1228 | goto err_sysfs_cleanup; |
1150 | 1229 | ||
1230 | sanitize_global_limit(&max_user_bgreq); | ||
1231 | sanitize_global_limit(&max_user_congthresh); | ||
1232 | |||
1151 | return 0; | 1233 | return 0; |
1152 | 1234 | ||
1153 | err_sysfs_cleanup: | 1235 | err_sysfs_cleanup: |
diff --git a/fs/gfs2/ops_inode.c b/fs/gfs2/ops_inode.c index c3ac18054057..247436c10deb 100644 --- a/fs/gfs2/ops_inode.c +++ b/fs/gfs2/ops_inode.c | |||
@@ -12,7 +12,6 @@ | |||
12 | #include <linux/completion.h> | 12 | #include <linux/completion.h> |
13 | #include <linux/buffer_head.h> | 13 | #include <linux/buffer_head.h> |
14 | #include <linux/namei.h> | 14 | #include <linux/namei.h> |
15 | #include <linux/utsname.h> | ||
16 | #include <linux/mm.h> | 15 | #include <linux/mm.h> |
17 | #include <linux/xattr.h> | 16 | #include <linux/xattr.h> |
18 | #include <linux/posix_acl.h> | 17 | #include <linux/posix_acl.h> |
diff --git a/fs/gfs2/rgrp.c b/fs/gfs2/rgrp.c index 28c590b7c9da..8f1cfb02a6cb 100644 --- a/fs/gfs2/rgrp.c +++ b/fs/gfs2/rgrp.c | |||
@@ -179,7 +179,7 @@ static inline u64 gfs2_bit_search(const __le64 *ptr, u64 mask, u8 state) | |||
179 | * always aligned to a 64 bit boundary. | 179 | * always aligned to a 64 bit boundary. |
180 | * | 180 | * |
181 | * The size of the buffer is in bytes, but is it assumed that it is | 181 | * The size of the buffer is in bytes, but is it assumed that it is |
182 | * always ok to to read a complete multiple of 64 bits at the end | 182 | * always ok to read a complete multiple of 64 bits at the end |
183 | * of the block in case the end is no aligned to a natural boundary. | 183 | * of the block in case the end is no aligned to a natural boundary. |
184 | * | 184 | * |
185 | * Return: the block number (bitmap buffer scope) that was found | 185 | * Return: the block number (bitmap buffer scope) that was found |
diff --git a/fs/hugetlbfs/inode.c b/fs/hugetlbfs/inode.c index a93b885311d8..133335479c24 100644 --- a/fs/hugetlbfs/inode.c +++ b/fs/hugetlbfs/inode.c | |||
@@ -31,12 +31,10 @@ | |||
31 | #include <linux/statfs.h> | 31 | #include <linux/statfs.h> |
32 | #include <linux/security.h> | 32 | #include <linux/security.h> |
33 | #include <linux/ima.h> | 33 | #include <linux/ima.h> |
34 | #include <linux/magic.h> | ||
34 | 35 | ||
35 | #include <asm/uaccess.h> | 36 | #include <asm/uaccess.h> |
36 | 37 | ||
37 | /* some random number */ | ||
38 | #define HUGETLBFS_MAGIC 0x958458f6 | ||
39 | |||
40 | static const struct super_operations hugetlbfs_ops; | 38 | static const struct super_operations hugetlbfs_ops; |
41 | static const struct address_space_operations hugetlbfs_aops; | 39 | static const struct address_space_operations hugetlbfs_aops; |
42 | const struct file_operations hugetlbfs_file_operations; | 40 | const struct file_operations hugetlbfs_file_operations; |
@@ -507,6 +505,13 @@ static struct inode *hugetlbfs_get_inode(struct super_block *sb, uid_t uid, | |||
507 | inode->i_atime = inode->i_mtime = inode->i_ctime = CURRENT_TIME; | 505 | inode->i_atime = inode->i_mtime = inode->i_ctime = CURRENT_TIME; |
508 | INIT_LIST_HEAD(&inode->i_mapping->private_list); | 506 | INIT_LIST_HEAD(&inode->i_mapping->private_list); |
509 | info = HUGETLBFS_I(inode); | 507 | info = HUGETLBFS_I(inode); |
508 | /* | ||
509 | * The policy is initialized here even if we are creating a | ||
510 | * private inode because initialization simply creates an | ||
511 | * an empty rb tree and calls spin_lock_init(), later when we | ||
512 | * call mpol_free_shared_policy() it will just return because | ||
513 | * the rb tree will still be empty. | ||
514 | */ | ||
510 | mpol_shared_policy_init(&info->policy, NULL); | 515 | mpol_shared_policy_init(&info->policy, NULL); |
511 | switch (mode & S_IFMT) { | 516 | switch (mode & S_IFMT) { |
512 | default: | 517 | default: |
@@ -937,7 +942,7 @@ static int can_do_hugetlb_shm(void) | |||
937 | } | 942 | } |
938 | 943 | ||
939 | struct file *hugetlb_file_setup(const char *name, size_t size, int acctflag, | 944 | struct file *hugetlb_file_setup(const char *name, size_t size, int acctflag, |
940 | struct user_struct **user) | 945 | struct user_struct **user, int creat_flags) |
941 | { | 946 | { |
942 | int error = -ENOMEM; | 947 | int error = -ENOMEM; |
943 | struct file *file; | 948 | struct file *file; |
@@ -949,7 +954,7 @@ struct file *hugetlb_file_setup(const char *name, size_t size, int acctflag, | |||
949 | if (!hugetlbfs_vfsmount) | 954 | if (!hugetlbfs_vfsmount) |
950 | return ERR_PTR(-ENOENT); | 955 | return ERR_PTR(-ENOENT); |
951 | 956 | ||
952 | if (!can_do_hugetlb_shm()) { | 957 | if (creat_flags == HUGETLB_SHMFS_INODE && !can_do_hugetlb_shm()) { |
953 | *user = current_user(); | 958 | *user = current_user(); |
954 | if (user_shm_lock(size, *user)) { | 959 | if (user_shm_lock(size, *user)) { |
955 | WARN_ONCE(1, | 960 | WARN_ONCE(1, |
diff --git a/fs/inode.c b/fs/inode.c index ae7b67e48661..76582b06ab97 100644 --- a/fs/inode.c +++ b/fs/inode.c | |||
@@ -14,6 +14,7 @@ | |||
14 | #include <linux/module.h> | 14 | #include <linux/module.h> |
15 | #include <linux/backing-dev.h> | 15 | #include <linux/backing-dev.h> |
16 | #include <linux/wait.h> | 16 | #include <linux/wait.h> |
17 | #include <linux/rwsem.h> | ||
17 | #include <linux/hash.h> | 18 | #include <linux/hash.h> |
18 | #include <linux/swap.h> | 19 | #include <linux/swap.h> |
19 | #include <linux/security.h> | 20 | #include <linux/security.h> |
@@ -87,14 +88,18 @@ static struct hlist_head *inode_hashtable __read_mostly; | |||
87 | DEFINE_SPINLOCK(inode_lock); | 88 | DEFINE_SPINLOCK(inode_lock); |
88 | 89 | ||
89 | /* | 90 | /* |
90 | * iprune_mutex provides exclusion between the kswapd or try_to_free_pages | 91 | * iprune_sem provides exclusion between the kswapd or try_to_free_pages |
91 | * icache shrinking path, and the umount path. Without this exclusion, | 92 | * icache shrinking path, and the umount path. Without this exclusion, |
92 | * by the time prune_icache calls iput for the inode whose pages it has | 93 | * by the time prune_icache calls iput for the inode whose pages it has |
93 | * been invalidating, or by the time it calls clear_inode & destroy_inode | 94 | * been invalidating, or by the time it calls clear_inode & destroy_inode |
94 | * from its final dispose_list, the struct super_block they refer to | 95 | * from its final dispose_list, the struct super_block they refer to |
95 | * (for inode->i_sb->s_op) may already have been freed and reused. | 96 | * (for inode->i_sb->s_op) may already have been freed and reused. |
97 | * | ||
98 | * We make this an rwsem because the fastpath is icache shrinking. In | ||
99 | * some cases a filesystem may be doing a significant amount of work in | ||
100 | * its inode reclaim code, so this should improve parallelism. | ||
96 | */ | 101 | */ |
97 | static DEFINE_MUTEX(iprune_mutex); | 102 | static DECLARE_RWSEM(iprune_sem); |
98 | 103 | ||
99 | /* | 104 | /* |
100 | * Statistics gathering.. | 105 | * Statistics gathering.. |
@@ -123,7 +128,7 @@ static void wake_up_inode(struct inode *inode) | |||
123 | int inode_init_always(struct super_block *sb, struct inode *inode) | 128 | int inode_init_always(struct super_block *sb, struct inode *inode) |
124 | { | 129 | { |
125 | static const struct address_space_operations empty_aops; | 130 | static const struct address_space_operations empty_aops; |
126 | static struct inode_operations empty_iops; | 131 | static const struct inode_operations empty_iops; |
127 | static const struct file_operations empty_fops; | 132 | static const struct file_operations empty_fops; |
128 | struct address_space *const mapping = &inode->i_data; | 133 | struct address_space *const mapping = &inode->i_data; |
129 | 134 | ||
@@ -182,9 +187,7 @@ int inode_init_always(struct super_block *sb, struct inode *inode) | |||
182 | if (sb->s_bdev) { | 187 | if (sb->s_bdev) { |
183 | struct backing_dev_info *bdi; | 188 | struct backing_dev_info *bdi; |
184 | 189 | ||
185 | bdi = sb->s_bdev->bd_inode_backing_dev_info; | 190 | bdi = sb->s_bdev->bd_inode->i_mapping->backing_dev_info; |
186 | if (!bdi) | ||
187 | bdi = sb->s_bdev->bd_inode->i_mapping->backing_dev_info; | ||
188 | mapping->backing_dev_info = bdi; | 191 | mapping->backing_dev_info = bdi; |
189 | } | 192 | } |
190 | inode->i_private = NULL; | 193 | inode->i_private = NULL; |
@@ -383,7 +386,7 @@ static int invalidate_list(struct list_head *head, struct list_head *dispose) | |||
383 | /* | 386 | /* |
384 | * We can reschedule here without worrying about the list's | 387 | * We can reschedule here without worrying about the list's |
385 | * consistency because the per-sb list of inodes must not | 388 | * consistency because the per-sb list of inodes must not |
386 | * change during umount anymore, and because iprune_mutex keeps | 389 | * change during umount anymore, and because iprune_sem keeps |
387 | * shrink_icache_memory() away. | 390 | * shrink_icache_memory() away. |
388 | */ | 391 | */ |
389 | cond_resched_lock(&inode_lock); | 392 | cond_resched_lock(&inode_lock); |
@@ -422,7 +425,7 @@ int invalidate_inodes(struct super_block *sb) | |||
422 | int busy; | 425 | int busy; |
423 | LIST_HEAD(throw_away); | 426 | LIST_HEAD(throw_away); |
424 | 427 | ||
425 | mutex_lock(&iprune_mutex); | 428 | down_write(&iprune_sem); |
426 | spin_lock(&inode_lock); | 429 | spin_lock(&inode_lock); |
427 | inotify_unmount_inodes(&sb->s_inodes); | 430 | inotify_unmount_inodes(&sb->s_inodes); |
428 | fsnotify_unmount_inodes(&sb->s_inodes); | 431 | fsnotify_unmount_inodes(&sb->s_inodes); |
@@ -430,7 +433,7 @@ int invalidate_inodes(struct super_block *sb) | |||
430 | spin_unlock(&inode_lock); | 433 | spin_unlock(&inode_lock); |
431 | 434 | ||
432 | dispose_list(&throw_away); | 435 | dispose_list(&throw_away); |
433 | mutex_unlock(&iprune_mutex); | 436 | up_write(&iprune_sem); |
434 | 437 | ||
435 | return busy; | 438 | return busy; |
436 | } | 439 | } |
@@ -469,7 +472,7 @@ static void prune_icache(int nr_to_scan) | |||
469 | int nr_scanned; | 472 | int nr_scanned; |
470 | unsigned long reap = 0; | 473 | unsigned long reap = 0; |
471 | 474 | ||
472 | mutex_lock(&iprune_mutex); | 475 | down_read(&iprune_sem); |
473 | spin_lock(&inode_lock); | 476 | spin_lock(&inode_lock); |
474 | for (nr_scanned = 0; nr_scanned < nr_to_scan; nr_scanned++) { | 477 | for (nr_scanned = 0; nr_scanned < nr_to_scan; nr_scanned++) { |
475 | struct inode *inode; | 478 | struct inode *inode; |
@@ -511,7 +514,7 @@ static void prune_icache(int nr_to_scan) | |||
511 | spin_unlock(&inode_lock); | 514 | spin_unlock(&inode_lock); |
512 | 515 | ||
513 | dispose_list(&freeable); | 516 | dispose_list(&freeable); |
514 | mutex_unlock(&iprune_mutex); | 517 | up_read(&iprune_sem); |
515 | } | 518 | } |
516 | 519 | ||
517 | /* | 520 | /* |
@@ -697,13 +700,15 @@ void unlock_new_inode(struct inode *inode) | |||
697 | } | 700 | } |
698 | #endif | 701 | #endif |
699 | /* | 702 | /* |
700 | * This is special! We do not need the spinlock | 703 | * This is special! We do not need the spinlock when clearing I_LOCK, |
701 | * when clearing I_LOCK, because we're guaranteed | 704 | * because we're guaranteed that nobody else tries to do anything about |
702 | * that nobody else tries to do anything about the | 705 | * the state of the inode when it is locked, as we just created it (so |
703 | * state of the inode when it is locked, as we | 706 | * there can be no old holders that haven't tested I_LOCK). |
704 | * just created it (so there can be no old holders | 707 | * However we must emit the memory barrier so that other CPUs reliably |
705 | * that haven't tested I_LOCK). | 708 | * see the clearing of I_LOCK after the other inode initialisation has |
709 | * completed. | ||
706 | */ | 710 | */ |
711 | smp_mb(); | ||
707 | WARN_ON((inode->i_state & (I_LOCK|I_NEW)) != (I_LOCK|I_NEW)); | 712 | WARN_ON((inode->i_state & (I_LOCK|I_NEW)) != (I_LOCK|I_NEW)); |
708 | inode->i_state &= ~(I_LOCK|I_NEW); | 713 | inode->i_state &= ~(I_LOCK|I_NEW); |
709 | wake_up_inode(inode); | 714 | wake_up_inode(inode); |
diff --git a/fs/jbd/checkpoint.c b/fs/jbd/checkpoint.c index 61f32f3868cd..b0435dd0654d 100644 --- a/fs/jbd/checkpoint.c +++ b/fs/jbd/checkpoint.c | |||
@@ -456,7 +456,7 @@ int cleanup_journal_tail(journal_t *journal) | |||
456 | { | 456 | { |
457 | transaction_t * transaction; | 457 | transaction_t * transaction; |
458 | tid_t first_tid; | 458 | tid_t first_tid; |
459 | unsigned long blocknr, freed; | 459 | unsigned int blocknr, freed; |
460 | 460 | ||
461 | if (is_journal_aborted(journal)) | 461 | if (is_journal_aborted(journal)) |
462 | return 1; | 462 | return 1; |
@@ -502,8 +502,8 @@ int cleanup_journal_tail(journal_t *journal) | |||
502 | freed = freed + journal->j_last - journal->j_first; | 502 | freed = freed + journal->j_last - journal->j_first; |
503 | 503 | ||
504 | jbd_debug(1, | 504 | jbd_debug(1, |
505 | "Cleaning journal tail from %d to %d (offset %lu), " | 505 | "Cleaning journal tail from %d to %d (offset %u), " |
506 | "freeing %lu\n", | 506 | "freeing %u\n", |
507 | journal->j_tail_sequence, first_tid, blocknr, freed); | 507 | journal->j_tail_sequence, first_tid, blocknr, freed); |
508 | 508 | ||
509 | journal->j_free += freed; | 509 | journal->j_free += freed; |
diff --git a/fs/jbd/commit.c b/fs/jbd/commit.c index 618e21c0b7a3..4bd882548c45 100644 --- a/fs/jbd/commit.c +++ b/fs/jbd/commit.c | |||
@@ -308,7 +308,7 @@ void journal_commit_transaction(journal_t *journal) | |||
308 | int bufs; | 308 | int bufs; |
309 | int flags; | 309 | int flags; |
310 | int err; | 310 | int err; |
311 | unsigned long blocknr; | 311 | unsigned int blocknr; |
312 | ktime_t start_time; | 312 | ktime_t start_time; |
313 | u64 commit_time; | 313 | u64 commit_time; |
314 | char *tagp = NULL; | 314 | char *tagp = NULL; |
diff --git a/fs/jbd/journal.c b/fs/jbd/journal.c index f96f85092d1c..bd3c073b485d 100644 --- a/fs/jbd/journal.c +++ b/fs/jbd/journal.c | |||
@@ -276,7 +276,7 @@ static void journal_kill_thread(journal_t *journal) | |||
276 | int journal_write_metadata_buffer(transaction_t *transaction, | 276 | int journal_write_metadata_buffer(transaction_t *transaction, |
277 | struct journal_head *jh_in, | 277 | struct journal_head *jh_in, |
278 | struct journal_head **jh_out, | 278 | struct journal_head **jh_out, |
279 | unsigned long blocknr) | 279 | unsigned int blocknr) |
280 | { | 280 | { |
281 | int need_copy_out = 0; | 281 | int need_copy_out = 0; |
282 | int done_copy_out = 0; | 282 | int done_copy_out = 0; |
@@ -567,9 +567,9 @@ int log_wait_commit(journal_t *journal, tid_t tid) | |||
567 | * Log buffer allocation routines: | 567 | * Log buffer allocation routines: |
568 | */ | 568 | */ |
569 | 569 | ||
570 | int journal_next_log_block(journal_t *journal, unsigned long *retp) | 570 | int journal_next_log_block(journal_t *journal, unsigned int *retp) |
571 | { | 571 | { |
572 | unsigned long blocknr; | 572 | unsigned int blocknr; |
573 | 573 | ||
574 | spin_lock(&journal->j_state_lock); | 574 | spin_lock(&journal->j_state_lock); |
575 | J_ASSERT(journal->j_free > 1); | 575 | J_ASSERT(journal->j_free > 1); |
@@ -590,11 +590,11 @@ int journal_next_log_block(journal_t *journal, unsigned long *retp) | |||
590 | * this is a no-op. If needed, we can use j_blk_offset - everything is | 590 | * this is a no-op. If needed, we can use j_blk_offset - everything is |
591 | * ready. | 591 | * ready. |
592 | */ | 592 | */ |
593 | int journal_bmap(journal_t *journal, unsigned long blocknr, | 593 | int journal_bmap(journal_t *journal, unsigned int blocknr, |
594 | unsigned long *retp) | 594 | unsigned int *retp) |
595 | { | 595 | { |
596 | int err = 0; | 596 | int err = 0; |
597 | unsigned long ret; | 597 | unsigned int ret; |
598 | 598 | ||
599 | if (journal->j_inode) { | 599 | if (journal->j_inode) { |
600 | ret = bmap(journal->j_inode, blocknr); | 600 | ret = bmap(journal->j_inode, blocknr); |
@@ -604,7 +604,7 @@ int journal_bmap(journal_t *journal, unsigned long blocknr, | |||
604 | char b[BDEVNAME_SIZE]; | 604 | char b[BDEVNAME_SIZE]; |
605 | 605 | ||
606 | printk(KERN_ALERT "%s: journal block not found " | 606 | printk(KERN_ALERT "%s: journal block not found " |
607 | "at offset %lu on %s\n", | 607 | "at offset %u on %s\n", |
608 | __func__, | 608 | __func__, |
609 | blocknr, | 609 | blocknr, |
610 | bdevname(journal->j_dev, b)); | 610 | bdevname(journal->j_dev, b)); |
@@ -630,7 +630,7 @@ int journal_bmap(journal_t *journal, unsigned long blocknr, | |||
630 | struct journal_head *journal_get_descriptor_buffer(journal_t *journal) | 630 | struct journal_head *journal_get_descriptor_buffer(journal_t *journal) |
631 | { | 631 | { |
632 | struct buffer_head *bh; | 632 | struct buffer_head *bh; |
633 | unsigned long blocknr; | 633 | unsigned int blocknr; |
634 | int err; | 634 | int err; |
635 | 635 | ||
636 | err = journal_next_log_block(journal, &blocknr); | 636 | err = journal_next_log_block(journal, &blocknr); |
@@ -774,7 +774,7 @@ journal_t * journal_init_inode (struct inode *inode) | |||
774 | journal_t *journal = journal_init_common(); | 774 | journal_t *journal = journal_init_common(); |
775 | int err; | 775 | int err; |
776 | int n; | 776 | int n; |
777 | unsigned long blocknr; | 777 | unsigned int blocknr; |
778 | 778 | ||
779 | if (!journal) | 779 | if (!journal) |
780 | return NULL; | 780 | return NULL; |
@@ -846,12 +846,12 @@ static void journal_fail_superblock (journal_t *journal) | |||
846 | static int journal_reset(journal_t *journal) | 846 | static int journal_reset(journal_t *journal) |
847 | { | 847 | { |
848 | journal_superblock_t *sb = journal->j_superblock; | 848 | journal_superblock_t *sb = journal->j_superblock; |
849 | unsigned long first, last; | 849 | unsigned int first, last; |
850 | 850 | ||
851 | first = be32_to_cpu(sb->s_first); | 851 | first = be32_to_cpu(sb->s_first); |
852 | last = be32_to_cpu(sb->s_maxlen); | 852 | last = be32_to_cpu(sb->s_maxlen); |
853 | if (first + JFS_MIN_JOURNAL_BLOCKS > last + 1) { | 853 | if (first + JFS_MIN_JOURNAL_BLOCKS > last + 1) { |
854 | printk(KERN_ERR "JBD: Journal too short (blocks %lu-%lu).\n", | 854 | printk(KERN_ERR "JBD: Journal too short (blocks %u-%u).\n", |
855 | first, last); | 855 | first, last); |
856 | journal_fail_superblock(journal); | 856 | journal_fail_superblock(journal); |
857 | return -EINVAL; | 857 | return -EINVAL; |
@@ -885,7 +885,7 @@ static int journal_reset(journal_t *journal) | |||
885 | **/ | 885 | **/ |
886 | int journal_create(journal_t *journal) | 886 | int journal_create(journal_t *journal) |
887 | { | 887 | { |
888 | unsigned long blocknr; | 888 | unsigned int blocknr; |
889 | struct buffer_head *bh; | 889 | struct buffer_head *bh; |
890 | journal_superblock_t *sb; | 890 | journal_superblock_t *sb; |
891 | int i, err; | 891 | int i, err; |
@@ -969,14 +969,14 @@ void journal_update_superblock(journal_t *journal, int wait) | |||
969 | if (sb->s_start == 0 && journal->j_tail_sequence == | 969 | if (sb->s_start == 0 && journal->j_tail_sequence == |
970 | journal->j_transaction_sequence) { | 970 | journal->j_transaction_sequence) { |
971 | jbd_debug(1,"JBD: Skipping superblock update on recovered sb " | 971 | jbd_debug(1,"JBD: Skipping superblock update on recovered sb " |
972 | "(start %ld, seq %d, errno %d)\n", | 972 | "(start %u, seq %d, errno %d)\n", |
973 | journal->j_tail, journal->j_tail_sequence, | 973 | journal->j_tail, journal->j_tail_sequence, |
974 | journal->j_errno); | 974 | journal->j_errno); |
975 | goto out; | 975 | goto out; |
976 | } | 976 | } |
977 | 977 | ||
978 | spin_lock(&journal->j_state_lock); | 978 | spin_lock(&journal->j_state_lock); |
979 | jbd_debug(1,"JBD: updating superblock (start %ld, seq %d, errno %d)\n", | 979 | jbd_debug(1,"JBD: updating superblock (start %u, seq %d, errno %d)\n", |
980 | journal->j_tail, journal->j_tail_sequence, journal->j_errno); | 980 | journal->j_tail, journal->j_tail_sequence, journal->j_errno); |
981 | 981 | ||
982 | sb->s_sequence = cpu_to_be32(journal->j_tail_sequence); | 982 | sb->s_sequence = cpu_to_be32(journal->j_tail_sequence); |
@@ -1371,7 +1371,7 @@ int journal_flush(journal_t *journal) | |||
1371 | { | 1371 | { |
1372 | int err = 0; | 1372 | int err = 0; |
1373 | transaction_t *transaction = NULL; | 1373 | transaction_t *transaction = NULL; |
1374 | unsigned long old_tail; | 1374 | unsigned int old_tail; |
1375 | 1375 | ||
1376 | spin_lock(&journal->j_state_lock); | 1376 | spin_lock(&journal->j_state_lock); |
1377 | 1377 | ||
diff --git a/fs/jbd/recovery.c b/fs/jbd/recovery.c index db5e982c5ddf..cb1a49ae605e 100644 --- a/fs/jbd/recovery.c +++ b/fs/jbd/recovery.c | |||
@@ -70,7 +70,7 @@ static int do_readahead(journal_t *journal, unsigned int start) | |||
70 | { | 70 | { |
71 | int err; | 71 | int err; |
72 | unsigned int max, nbufs, next; | 72 | unsigned int max, nbufs, next; |
73 | unsigned long blocknr; | 73 | unsigned int blocknr; |
74 | struct buffer_head *bh; | 74 | struct buffer_head *bh; |
75 | 75 | ||
76 | struct buffer_head * bufs[MAXBUF]; | 76 | struct buffer_head * bufs[MAXBUF]; |
@@ -132,7 +132,7 @@ static int jread(struct buffer_head **bhp, journal_t *journal, | |||
132 | unsigned int offset) | 132 | unsigned int offset) |
133 | { | 133 | { |
134 | int err; | 134 | int err; |
135 | unsigned long blocknr; | 135 | unsigned int blocknr; |
136 | struct buffer_head *bh; | 136 | struct buffer_head *bh; |
137 | 137 | ||
138 | *bhp = NULL; | 138 | *bhp = NULL; |
@@ -314,7 +314,7 @@ static int do_one_pass(journal_t *journal, | |||
314 | struct recovery_info *info, enum passtype pass) | 314 | struct recovery_info *info, enum passtype pass) |
315 | { | 315 | { |
316 | unsigned int first_commit_ID, next_commit_ID; | 316 | unsigned int first_commit_ID, next_commit_ID; |
317 | unsigned long next_log_block; | 317 | unsigned int next_log_block; |
318 | int err, success = 0; | 318 | int err, success = 0; |
319 | journal_superblock_t * sb; | 319 | journal_superblock_t * sb; |
320 | journal_header_t * tmp; | 320 | journal_header_t * tmp; |
@@ -367,14 +367,14 @@ static int do_one_pass(journal_t *journal, | |||
367 | if (tid_geq(next_commit_ID, info->end_transaction)) | 367 | if (tid_geq(next_commit_ID, info->end_transaction)) |
368 | break; | 368 | break; |
369 | 369 | ||
370 | jbd_debug(2, "Scanning for sequence ID %u at %lu/%lu\n", | 370 | jbd_debug(2, "Scanning for sequence ID %u at %u/%u\n", |
371 | next_commit_ID, next_log_block, journal->j_last); | 371 | next_commit_ID, next_log_block, journal->j_last); |
372 | 372 | ||
373 | /* Skip over each chunk of the transaction looking | 373 | /* Skip over each chunk of the transaction looking |
374 | * either the next descriptor block or the final commit | 374 | * either the next descriptor block or the final commit |
375 | * record. */ | 375 | * record. */ |
376 | 376 | ||
377 | jbd_debug(3, "JBD: checking block %ld\n", next_log_block); | 377 | jbd_debug(3, "JBD: checking block %u\n", next_log_block); |
378 | err = jread(&bh, journal, next_log_block); | 378 | err = jread(&bh, journal, next_log_block); |
379 | if (err) | 379 | if (err) |
380 | goto failed; | 380 | goto failed; |
@@ -429,7 +429,7 @@ static int do_one_pass(journal_t *journal, | |||
429 | tagp = &bh->b_data[sizeof(journal_header_t)]; | 429 | tagp = &bh->b_data[sizeof(journal_header_t)]; |
430 | while ((tagp - bh->b_data +sizeof(journal_block_tag_t)) | 430 | while ((tagp - bh->b_data +sizeof(journal_block_tag_t)) |
431 | <= journal->j_blocksize) { | 431 | <= journal->j_blocksize) { |
432 | unsigned long io_block; | 432 | unsigned int io_block; |
433 | 433 | ||
434 | tag = (journal_block_tag_t *) tagp; | 434 | tag = (journal_block_tag_t *) tagp; |
435 | flags = be32_to_cpu(tag->t_flags); | 435 | flags = be32_to_cpu(tag->t_flags); |
@@ -443,10 +443,10 @@ static int do_one_pass(journal_t *journal, | |||
443 | success = err; | 443 | success = err; |
444 | printk (KERN_ERR | 444 | printk (KERN_ERR |
445 | "JBD: IO error %d recovering " | 445 | "JBD: IO error %d recovering " |
446 | "block %ld in log\n", | 446 | "block %u in log\n", |
447 | err, io_block); | 447 | err, io_block); |
448 | } else { | 448 | } else { |
449 | unsigned long blocknr; | 449 | unsigned int blocknr; |
450 | 450 | ||
451 | J_ASSERT(obh != NULL); | 451 | J_ASSERT(obh != NULL); |
452 | blocknr = be32_to_cpu(tag->t_blocknr); | 452 | blocknr = be32_to_cpu(tag->t_blocknr); |
@@ -581,7 +581,7 @@ static int scan_revoke_records(journal_t *journal, struct buffer_head *bh, | |||
581 | max = be32_to_cpu(header->r_count); | 581 | max = be32_to_cpu(header->r_count); |
582 | 582 | ||
583 | while (offset < max) { | 583 | while (offset < max) { |
584 | unsigned long blocknr; | 584 | unsigned int blocknr; |
585 | int err; | 585 | int err; |
586 | 586 | ||
587 | blocknr = be32_to_cpu(* ((__be32 *) (bh->b_data+offset))); | 587 | blocknr = be32_to_cpu(* ((__be32 *) (bh->b_data+offset))); |
diff --git a/fs/jbd/revoke.c b/fs/jbd/revoke.c index da6cd9bdaabc..ad717328343a 100644 --- a/fs/jbd/revoke.c +++ b/fs/jbd/revoke.c | |||
@@ -101,7 +101,7 @@ struct jbd_revoke_record_s | |||
101 | { | 101 | { |
102 | struct list_head hash; | 102 | struct list_head hash; |
103 | tid_t sequence; /* Used for recovery only */ | 103 | tid_t sequence; /* Used for recovery only */ |
104 | unsigned long blocknr; | 104 | unsigned int blocknr; |
105 | }; | 105 | }; |
106 | 106 | ||
107 | 107 | ||
@@ -126,7 +126,7 @@ static void flush_descriptor(journal_t *, struct journal_head *, int, int); | |||
126 | /* Utility functions to maintain the revoke table */ | 126 | /* Utility functions to maintain the revoke table */ |
127 | 127 | ||
128 | /* Borrowed from buffer.c: this is a tried and tested block hash function */ | 128 | /* Borrowed from buffer.c: this is a tried and tested block hash function */ |
129 | static inline int hash(journal_t *journal, unsigned long block) | 129 | static inline int hash(journal_t *journal, unsigned int block) |
130 | { | 130 | { |
131 | struct jbd_revoke_table_s *table = journal->j_revoke; | 131 | struct jbd_revoke_table_s *table = journal->j_revoke; |
132 | int hash_shift = table->hash_shift; | 132 | int hash_shift = table->hash_shift; |
@@ -136,7 +136,7 @@ static inline int hash(journal_t *journal, unsigned long block) | |||
136 | (block << (hash_shift - 12))) & (table->hash_size - 1); | 136 | (block << (hash_shift - 12))) & (table->hash_size - 1); |
137 | } | 137 | } |
138 | 138 | ||
139 | static int insert_revoke_hash(journal_t *journal, unsigned long blocknr, | 139 | static int insert_revoke_hash(journal_t *journal, unsigned int blocknr, |
140 | tid_t seq) | 140 | tid_t seq) |
141 | { | 141 | { |
142 | struct list_head *hash_list; | 142 | struct list_head *hash_list; |
@@ -166,7 +166,7 @@ oom: | |||
166 | /* Find a revoke record in the journal's hash table. */ | 166 | /* Find a revoke record in the journal's hash table. */ |
167 | 167 | ||
168 | static struct jbd_revoke_record_s *find_revoke_record(journal_t *journal, | 168 | static struct jbd_revoke_record_s *find_revoke_record(journal_t *journal, |
169 | unsigned long blocknr) | 169 | unsigned int blocknr) |
170 | { | 170 | { |
171 | struct list_head *hash_list; | 171 | struct list_head *hash_list; |
172 | struct jbd_revoke_record_s *record; | 172 | struct jbd_revoke_record_s *record; |
@@ -332,7 +332,7 @@ void journal_destroy_revoke(journal_t *journal) | |||
332 | * by one. | 332 | * by one. |
333 | */ | 333 | */ |
334 | 334 | ||
335 | int journal_revoke(handle_t *handle, unsigned long blocknr, | 335 | int journal_revoke(handle_t *handle, unsigned int blocknr, |
336 | struct buffer_head *bh_in) | 336 | struct buffer_head *bh_in) |
337 | { | 337 | { |
338 | struct buffer_head *bh = NULL; | 338 | struct buffer_head *bh = NULL; |
@@ -401,7 +401,7 @@ int journal_revoke(handle_t *handle, unsigned long blocknr, | |||
401 | } | 401 | } |
402 | } | 402 | } |
403 | 403 | ||
404 | jbd_debug(2, "insert revoke for block %lu, bh_in=%p\n", blocknr, bh_in); | 404 | jbd_debug(2, "insert revoke for block %u, bh_in=%p\n", blocknr, bh_in); |
405 | err = insert_revoke_hash(journal, blocknr, | 405 | err = insert_revoke_hash(journal, blocknr, |
406 | handle->h_transaction->t_tid); | 406 | handle->h_transaction->t_tid); |
407 | BUFFER_TRACE(bh_in, "exit"); | 407 | BUFFER_TRACE(bh_in, "exit"); |
@@ -644,7 +644,7 @@ static void flush_descriptor(journal_t *journal, | |||
644 | */ | 644 | */ |
645 | 645 | ||
646 | int journal_set_revoke(journal_t *journal, | 646 | int journal_set_revoke(journal_t *journal, |
647 | unsigned long blocknr, | 647 | unsigned int blocknr, |
648 | tid_t sequence) | 648 | tid_t sequence) |
649 | { | 649 | { |
650 | struct jbd_revoke_record_s *record; | 650 | struct jbd_revoke_record_s *record; |
@@ -668,7 +668,7 @@ int journal_set_revoke(journal_t *journal, | |||
668 | */ | 668 | */ |
669 | 669 | ||
670 | int journal_test_revoke(journal_t *journal, | 670 | int journal_test_revoke(journal_t *journal, |
671 | unsigned long blocknr, | 671 | unsigned int blocknr, |
672 | tid_t sequence) | 672 | tid_t sequence) |
673 | { | 673 | { |
674 | struct jbd_revoke_record_s *record; | 674 | struct jbd_revoke_record_s *record; |
diff --git a/fs/jbd/transaction.c b/fs/jbd/transaction.c index c03ac11f74be..006f9ad838a2 100644 --- a/fs/jbd/transaction.c +++ b/fs/jbd/transaction.c | |||
@@ -56,7 +56,8 @@ get_transaction(journal_t *journal, transaction_t *transaction) | |||
56 | spin_lock_init(&transaction->t_handle_lock); | 56 | spin_lock_init(&transaction->t_handle_lock); |
57 | 57 | ||
58 | /* Set up the commit timer for the new transaction. */ | 58 | /* Set up the commit timer for the new transaction. */ |
59 | journal->j_commit_timer.expires = round_jiffies(transaction->t_expires); | 59 | journal->j_commit_timer.expires = |
60 | round_jiffies_up(transaction->t_expires); | ||
60 | add_timer(&journal->j_commit_timer); | 61 | add_timer(&journal->j_commit_timer); |
61 | 62 | ||
62 | J_ASSERT(journal->j_running_transaction == NULL); | 63 | J_ASSERT(journal->j_running_transaction == NULL); |
@@ -228,6 +229,8 @@ repeat_locked: | |||
228 | __log_space_left(journal)); | 229 | __log_space_left(journal)); |
229 | spin_unlock(&transaction->t_handle_lock); | 230 | spin_unlock(&transaction->t_handle_lock); |
230 | spin_unlock(&journal->j_state_lock); | 231 | spin_unlock(&journal->j_state_lock); |
232 | |||
233 | lock_map_acquire(&handle->h_lockdep_map); | ||
231 | out: | 234 | out: |
232 | if (unlikely(new_transaction)) /* It's usually NULL */ | 235 | if (unlikely(new_transaction)) /* It's usually NULL */ |
233 | kfree(new_transaction); | 236 | kfree(new_transaction); |
@@ -292,9 +295,6 @@ handle_t *journal_start(journal_t *journal, int nblocks) | |||
292 | handle = ERR_PTR(err); | 295 | handle = ERR_PTR(err); |
293 | goto out; | 296 | goto out; |
294 | } | 297 | } |
295 | |||
296 | lock_map_acquire(&handle->h_lockdep_map); | ||
297 | |||
298 | out: | 298 | out: |
299 | return handle; | 299 | return handle; |
300 | } | 300 | } |
@@ -416,6 +416,7 @@ int journal_restart(handle_t *handle, int nblocks) | |||
416 | __log_start_commit(journal, transaction->t_tid); | 416 | __log_start_commit(journal, transaction->t_tid); |
417 | spin_unlock(&journal->j_state_lock); | 417 | spin_unlock(&journal->j_state_lock); |
418 | 418 | ||
419 | lock_map_release(&handle->h_lockdep_map); | ||
419 | handle->h_buffer_credits = nblocks; | 420 | handle->h_buffer_credits = nblocks; |
420 | ret = start_this_handle(journal, handle); | 421 | ret = start_this_handle(journal, handle); |
421 | return ret; | 422 | return ret; |
diff --git a/fs/jbd2/commit.c b/fs/jbd2/commit.c index 7b4088b2364d..26d991ddc1e6 100644 --- a/fs/jbd2/commit.c +++ b/fs/jbd2/commit.c | |||
@@ -25,6 +25,7 @@ | |||
25 | #include <linux/writeback.h> | 25 | #include <linux/writeback.h> |
26 | #include <linux/backing-dev.h> | 26 | #include <linux/backing-dev.h> |
27 | #include <linux/bio.h> | 27 | #include <linux/bio.h> |
28 | #include <linux/blkdev.h> | ||
28 | #include <trace/events/jbd2.h> | 29 | #include <trace/events/jbd2.h> |
29 | 30 | ||
30 | /* | 31 | /* |
@@ -133,8 +134,8 @@ static int journal_submit_commit_record(journal_t *journal, | |||
133 | bh->b_end_io = journal_end_buffer_io_sync; | 134 | bh->b_end_io = journal_end_buffer_io_sync; |
134 | 135 | ||
135 | if (journal->j_flags & JBD2_BARRIER && | 136 | if (journal->j_flags & JBD2_BARRIER && |
136 | !JBD2_HAS_INCOMPAT_FEATURE(journal, | 137 | !JBD2_HAS_INCOMPAT_FEATURE(journal, |
137 | JBD2_FEATURE_INCOMPAT_ASYNC_COMMIT)) { | 138 | JBD2_FEATURE_INCOMPAT_ASYNC_COMMIT)) { |
138 | set_buffer_ordered(bh); | 139 | set_buffer_ordered(bh); |
139 | barrier_done = 1; | 140 | barrier_done = 1; |
140 | } | 141 | } |
@@ -220,7 +221,6 @@ static int journal_submit_inode_data_buffers(struct address_space *mapping) | |||
220 | .nr_to_write = mapping->nrpages * 2, | 221 | .nr_to_write = mapping->nrpages * 2, |
221 | .range_start = 0, | 222 | .range_start = 0, |
222 | .range_end = i_size_read(mapping->host), | 223 | .range_end = i_size_read(mapping->host), |
223 | .for_writepages = 1, | ||
224 | }; | 224 | }; |
225 | 225 | ||
226 | ret = generic_writepages(mapping, &wbc); | 226 | ret = generic_writepages(mapping, &wbc); |
@@ -707,11 +707,13 @@ start_journal_io: | |||
707 | /* Done it all: now write the commit record asynchronously. */ | 707 | /* Done it all: now write the commit record asynchronously. */ |
708 | 708 | ||
709 | if (JBD2_HAS_INCOMPAT_FEATURE(journal, | 709 | if (JBD2_HAS_INCOMPAT_FEATURE(journal, |
710 | JBD2_FEATURE_INCOMPAT_ASYNC_COMMIT)) { | 710 | JBD2_FEATURE_INCOMPAT_ASYNC_COMMIT)) { |
711 | err = journal_submit_commit_record(journal, commit_transaction, | 711 | err = journal_submit_commit_record(journal, commit_transaction, |
712 | &cbh, crc32_sum); | 712 | &cbh, crc32_sum); |
713 | if (err) | 713 | if (err) |
714 | __jbd2_journal_abort_hard(journal); | 714 | __jbd2_journal_abort_hard(journal); |
715 | if (journal->j_flags & JBD2_BARRIER) | ||
716 | blkdev_issue_flush(journal->j_dev, NULL); | ||
715 | } | 717 | } |
716 | 718 | ||
717 | /* | 719 | /* |
@@ -834,7 +836,7 @@ wait_for_iobuf: | |||
834 | jbd_debug(3, "JBD: commit phase 5\n"); | 836 | jbd_debug(3, "JBD: commit phase 5\n"); |
835 | 837 | ||
836 | if (!JBD2_HAS_INCOMPAT_FEATURE(journal, | 838 | if (!JBD2_HAS_INCOMPAT_FEATURE(journal, |
837 | JBD2_FEATURE_INCOMPAT_ASYNC_COMMIT)) { | 839 | JBD2_FEATURE_INCOMPAT_ASYNC_COMMIT)) { |
838 | err = journal_submit_commit_record(journal, commit_transaction, | 840 | err = journal_submit_commit_record(journal, commit_transaction, |
839 | &cbh, crc32_sum); | 841 | &cbh, crc32_sum); |
840 | if (err) | 842 | if (err) |
diff --git a/fs/jbd2/journal.c b/fs/jbd2/journal.c index e378cb383979..53b86e16e5fe 100644 --- a/fs/jbd2/journal.c +++ b/fs/jbd2/journal.c | |||
@@ -768,7 +768,7 @@ static void jbd2_seq_history_stop(struct seq_file *seq, void *v) | |||
768 | { | 768 | { |
769 | } | 769 | } |
770 | 770 | ||
771 | static struct seq_operations jbd2_seq_history_ops = { | 771 | static const struct seq_operations jbd2_seq_history_ops = { |
772 | .start = jbd2_seq_history_start, | 772 | .start = jbd2_seq_history_start, |
773 | .next = jbd2_seq_history_next, | 773 | .next = jbd2_seq_history_next, |
774 | .stop = jbd2_seq_history_stop, | 774 | .stop = jbd2_seq_history_stop, |
@@ -872,7 +872,7 @@ static void jbd2_seq_info_stop(struct seq_file *seq, void *v) | |||
872 | { | 872 | { |
873 | } | 873 | } |
874 | 874 | ||
875 | static struct seq_operations jbd2_seq_info_ops = { | 875 | static const struct seq_operations jbd2_seq_info_ops = { |
876 | .start = jbd2_seq_info_start, | 876 | .start = jbd2_seq_info_start, |
877 | .next = jbd2_seq_info_next, | 877 | .next = jbd2_seq_info_next, |
878 | .stop = jbd2_seq_info_stop, | 878 | .stop = jbd2_seq_info_stop, |
@@ -1187,6 +1187,12 @@ static int journal_reset(journal_t *journal) | |||
1187 | 1187 | ||
1188 | first = be32_to_cpu(sb->s_first); | 1188 | first = be32_to_cpu(sb->s_first); |
1189 | last = be32_to_cpu(sb->s_maxlen); | 1189 | last = be32_to_cpu(sb->s_maxlen); |
1190 | if (first + JBD2_MIN_JOURNAL_BLOCKS > last + 1) { | ||
1191 | printk(KERN_ERR "JBD: Journal too short (blocks %llu-%llu).\n", | ||
1192 | first, last); | ||
1193 | journal_fail_superblock(journal); | ||
1194 | return -EINVAL; | ||
1195 | } | ||
1190 | 1196 | ||
1191 | journal->j_first = first; | 1197 | journal->j_first = first; |
1192 | journal->j_last = last; | 1198 | journal->j_last = last; |
diff --git a/fs/jbd2/transaction.c b/fs/jbd2/transaction.c index 6213ac728f30..a0512700542f 100644 --- a/fs/jbd2/transaction.c +++ b/fs/jbd2/transaction.c | |||
@@ -57,7 +57,7 @@ jbd2_get_transaction(journal_t *journal, transaction_t *transaction) | |||
57 | INIT_LIST_HEAD(&transaction->t_private_list); | 57 | INIT_LIST_HEAD(&transaction->t_private_list); |
58 | 58 | ||
59 | /* Set up the commit timer for the new transaction. */ | 59 | /* Set up the commit timer for the new transaction. */ |
60 | journal->j_commit_timer.expires = round_jiffies(transaction->t_expires); | 60 | journal->j_commit_timer.expires = round_jiffies_up(transaction->t_expires); |
61 | add_timer(&journal->j_commit_timer); | 61 | add_timer(&journal->j_commit_timer); |
62 | 62 | ||
63 | J_ASSERT(journal->j_running_transaction == NULL); | 63 | J_ASSERT(journal->j_running_transaction == NULL); |
@@ -238,6 +238,8 @@ repeat_locked: | |||
238 | __jbd2_log_space_left(journal)); | 238 | __jbd2_log_space_left(journal)); |
239 | spin_unlock(&transaction->t_handle_lock); | 239 | spin_unlock(&transaction->t_handle_lock); |
240 | spin_unlock(&journal->j_state_lock); | 240 | spin_unlock(&journal->j_state_lock); |
241 | |||
242 | lock_map_acquire(&handle->h_lockdep_map); | ||
241 | out: | 243 | out: |
242 | if (unlikely(new_transaction)) /* It's usually NULL */ | 244 | if (unlikely(new_transaction)) /* It's usually NULL */ |
243 | kfree(new_transaction); | 245 | kfree(new_transaction); |
@@ -303,8 +305,6 @@ handle_t *jbd2_journal_start(journal_t *journal, int nblocks) | |||
303 | handle = ERR_PTR(err); | 305 | handle = ERR_PTR(err); |
304 | goto out; | 306 | goto out; |
305 | } | 307 | } |
306 | |||
307 | lock_map_acquire(&handle->h_lockdep_map); | ||
308 | out: | 308 | out: |
309 | return handle; | 309 | return handle; |
310 | } | 310 | } |
@@ -426,6 +426,7 @@ int jbd2_journal_restart(handle_t *handle, int nblocks) | |||
426 | __jbd2_log_start_commit(journal, transaction->t_tid); | 426 | __jbd2_log_start_commit(journal, transaction->t_tid); |
427 | spin_unlock(&journal->j_state_lock); | 427 | spin_unlock(&journal->j_state_lock); |
428 | 428 | ||
429 | lock_map_release(&handle->h_lockdep_map); | ||
429 | handle->h_buffer_credits = nblocks; | 430 | handle->h_buffer_credits = nblocks; |
430 | ret = start_this_handle(journal, handle); | 431 | ret = start_this_handle(journal, handle); |
431 | return ret; | 432 | return ret; |
diff --git a/fs/jffs2/background.c b/fs/jffs2/background.c index e9580104b6ba..3ff50da94789 100644 --- a/fs/jffs2/background.c +++ b/fs/jffs2/background.c | |||
@@ -15,6 +15,7 @@ | |||
15 | #include <linux/completion.h> | 15 | #include <linux/completion.h> |
16 | #include <linux/sched.h> | 16 | #include <linux/sched.h> |
17 | #include <linux/freezer.h> | 17 | #include <linux/freezer.h> |
18 | #include <linux/kthread.h> | ||
18 | #include "nodelist.h" | 19 | #include "nodelist.h" |
19 | 20 | ||
20 | 21 | ||
@@ -31,7 +32,7 @@ void jffs2_garbage_collect_trigger(struct jffs2_sb_info *c) | |||
31 | /* This must only ever be called when no GC thread is currently running */ | 32 | /* This must only ever be called when no GC thread is currently running */ |
32 | int jffs2_start_garbage_collect_thread(struct jffs2_sb_info *c) | 33 | int jffs2_start_garbage_collect_thread(struct jffs2_sb_info *c) |
33 | { | 34 | { |
34 | pid_t pid; | 35 | struct task_struct *tsk; |
35 | int ret = 0; | 36 | int ret = 0; |
36 | 37 | ||
37 | BUG_ON(c->gc_task); | 38 | BUG_ON(c->gc_task); |
@@ -39,15 +40,16 @@ int jffs2_start_garbage_collect_thread(struct jffs2_sb_info *c) | |||
39 | init_completion(&c->gc_thread_start); | 40 | init_completion(&c->gc_thread_start); |
40 | init_completion(&c->gc_thread_exit); | 41 | init_completion(&c->gc_thread_exit); |
41 | 42 | ||
42 | pid = kernel_thread(jffs2_garbage_collect_thread, c, CLONE_FS|CLONE_FILES); | 43 | tsk = kthread_run(jffs2_garbage_collect_thread, c, "jffs2_gcd_mtd%d", c->mtd->index); |
43 | if (pid < 0) { | 44 | if (IS_ERR(tsk)) { |
44 | printk(KERN_WARNING "fork failed for JFFS2 garbage collect thread: %d\n", -pid); | 45 | printk(KERN_WARNING "fork failed for JFFS2 garbage collect thread: %ld\n", -PTR_ERR(tsk)); |
45 | complete(&c->gc_thread_exit); | 46 | complete(&c->gc_thread_exit); |
46 | ret = pid; | 47 | ret = PTR_ERR(tsk); |
47 | } else { | 48 | } else { |
48 | /* Wait for it... */ | 49 | /* Wait for it... */ |
49 | D1(printk(KERN_DEBUG "JFFS2: Garbage collect thread is pid %d\n", pid)); | 50 | D1(printk(KERN_DEBUG "JFFS2: Garbage collect thread is pid %d\n", tsk->pid)); |
50 | wait_for_completion(&c->gc_thread_start); | 51 | wait_for_completion(&c->gc_thread_start); |
52 | ret = tsk->pid; | ||
51 | } | 53 | } |
52 | 54 | ||
53 | return ret; | 55 | return ret; |
@@ -71,7 +73,6 @@ static int jffs2_garbage_collect_thread(void *_c) | |||
71 | { | 73 | { |
72 | struct jffs2_sb_info *c = _c; | 74 | struct jffs2_sb_info *c = _c; |
73 | 75 | ||
74 | daemonize("jffs2_gcd_mtd%d", c->mtd->index); | ||
75 | allow_signal(SIGKILL); | 76 | allow_signal(SIGKILL); |
76 | allow_signal(SIGSTOP); | 77 | allow_signal(SIGSTOP); |
77 | allow_signal(SIGCONT); | 78 | allow_signal(SIGCONT); |
@@ -107,6 +108,11 @@ static int jffs2_garbage_collect_thread(void *_c) | |||
107 | * the GC thread get there first. */ | 108 | * the GC thread get there first. */ |
108 | schedule_timeout_interruptible(msecs_to_jiffies(50)); | 109 | schedule_timeout_interruptible(msecs_to_jiffies(50)); |
109 | 110 | ||
111 | if (kthread_should_stop()) { | ||
112 | D1(printk(KERN_DEBUG "jffs2_garbage_collect_thread(): kthread_stop() called.\n")); | ||
113 | goto die; | ||
114 | } | ||
115 | |||
110 | /* Put_super will send a SIGKILL and then wait on the sem. | 116 | /* Put_super will send a SIGKILL and then wait on the sem. |
111 | */ | 117 | */ |
112 | while (signal_pending(current) || freezing(current)) { | 118 | while (signal_pending(current) || freezing(current)) { |
diff --git a/fs/jffs2/malloc.c b/fs/jffs2/malloc.c index 9eff2bdae8a7..c082868910f2 100644 --- a/fs/jffs2/malloc.c +++ b/fs/jffs2/malloc.c | |||
@@ -39,13 +39,13 @@ int __init jffs2_create_slab_caches(void) | |||
39 | 39 | ||
40 | raw_dirent_slab = kmem_cache_create("jffs2_raw_dirent", | 40 | raw_dirent_slab = kmem_cache_create("jffs2_raw_dirent", |
41 | sizeof(struct jffs2_raw_dirent), | 41 | sizeof(struct jffs2_raw_dirent), |
42 | 0, 0, NULL); | 42 | 0, SLAB_HWCACHE_ALIGN, NULL); |
43 | if (!raw_dirent_slab) | 43 | if (!raw_dirent_slab) |
44 | goto err; | 44 | goto err; |
45 | 45 | ||
46 | raw_inode_slab = kmem_cache_create("jffs2_raw_inode", | 46 | raw_inode_slab = kmem_cache_create("jffs2_raw_inode", |
47 | sizeof(struct jffs2_raw_inode), | 47 | sizeof(struct jffs2_raw_inode), |
48 | 0, 0, NULL); | 48 | 0, SLAB_HWCACHE_ALIGN, NULL); |
49 | if (!raw_inode_slab) | 49 | if (!raw_inode_slab) |
50 | goto err; | 50 | goto err; |
51 | 51 | ||
diff --git a/fs/jffs2/super.c b/fs/jffs2/super.c index 0035c021395a..9a80e8e595d0 100644 --- a/fs/jffs2/super.c +++ b/fs/jffs2/super.c | |||
@@ -123,7 +123,7 @@ static struct dentry *jffs2_get_parent(struct dentry *child) | |||
123 | return d_obtain_alias(jffs2_iget(child->d_inode->i_sb, pino)); | 123 | return d_obtain_alias(jffs2_iget(child->d_inode->i_sb, pino)); |
124 | } | 124 | } |
125 | 125 | ||
126 | static struct export_operations jffs2_export_ops = { | 126 | static const struct export_operations jffs2_export_ops = { |
127 | .get_parent = jffs2_get_parent, | 127 | .get_parent = jffs2_get_parent, |
128 | .fh_to_dentry = jffs2_fh_to_dentry, | 128 | .fh_to_dentry = jffs2_fh_to_dentry, |
129 | .fh_to_parent = jffs2_fh_to_parent, | 129 | .fh_to_parent = jffs2_fh_to_parent, |
diff --git a/fs/lockd/clntlock.c b/fs/lockd/clntlock.c index 1f3b0fc0d351..fc9032dc8862 100644 --- a/fs/lockd/clntlock.c +++ b/fs/lockd/clntlock.c | |||
@@ -166,7 +166,7 @@ __be32 nlmclnt_grant(const struct sockaddr *addr, const struct nlm_lock *lock) | |||
166 | */ | 166 | */ |
167 | if (fl_blocked->fl_u.nfs_fl.owner->pid != lock->svid) | 167 | if (fl_blocked->fl_u.nfs_fl.owner->pid != lock->svid) |
168 | continue; | 168 | continue; |
169 | if (!nlm_cmp_addr(nlm_addr(block->b_host), addr)) | 169 | if (!rpc_cmp_addr(nlm_addr(block->b_host), addr)) |
170 | continue; | 170 | continue; |
171 | if (nfs_compare_fh(NFS_FH(fl_blocked->fl_file->f_path.dentry->d_inode) ,fh) != 0) | 171 | if (nfs_compare_fh(NFS_FH(fl_blocked->fl_file->f_path.dentry->d_inode) ,fh) != 0) |
172 | continue; | 172 | continue; |
diff --git a/fs/lockd/clntproc.c b/fs/lockd/clntproc.c index 4336adba952a..c81249fef11f 100644 --- a/fs/lockd/clntproc.c +++ b/fs/lockd/clntproc.c | |||
@@ -458,7 +458,7 @@ static void nlmclnt_locks_release_private(struct file_lock *fl) | |||
458 | nlm_put_lockowner(fl->fl_u.nfs_fl.owner); | 458 | nlm_put_lockowner(fl->fl_u.nfs_fl.owner); |
459 | } | 459 | } |
460 | 460 | ||
461 | static struct file_lock_operations nlmclnt_lock_ops = { | 461 | static const struct file_lock_operations nlmclnt_lock_ops = { |
462 | .fl_copy_lock = nlmclnt_locks_copy_lock, | 462 | .fl_copy_lock = nlmclnt_locks_copy_lock, |
463 | .fl_release_private = nlmclnt_locks_release_private, | 463 | .fl_release_private = nlmclnt_locks_release_private, |
464 | }; | 464 | }; |
diff --git a/fs/lockd/host.c b/fs/lockd/host.c index 7cb076ac6b45..4600c2037b8b 100644 --- a/fs/lockd/host.c +++ b/fs/lockd/host.c | |||
@@ -111,7 +111,7 @@ static struct nlm_host *nlm_lookup_host(struct nlm_lookup_host_info *ni) | |||
111 | */ | 111 | */ |
112 | chain = &nlm_hosts[nlm_hash_address(ni->sap)]; | 112 | chain = &nlm_hosts[nlm_hash_address(ni->sap)]; |
113 | hlist_for_each_entry(host, pos, chain, h_hash) { | 113 | hlist_for_each_entry(host, pos, chain, h_hash) { |
114 | if (!nlm_cmp_addr(nlm_addr(host), ni->sap)) | 114 | if (!rpc_cmp_addr(nlm_addr(host), ni->sap)) |
115 | continue; | 115 | continue; |
116 | 116 | ||
117 | /* See if we have an NSM handle for this client */ | 117 | /* See if we have an NSM handle for this client */ |
@@ -125,7 +125,7 @@ static struct nlm_host *nlm_lookup_host(struct nlm_lookup_host_info *ni) | |||
125 | if (host->h_server != ni->server) | 125 | if (host->h_server != ni->server) |
126 | continue; | 126 | continue; |
127 | if (ni->server && | 127 | if (ni->server && |
128 | !nlm_cmp_addr(nlm_srcaddr(host), ni->src_sap)) | 128 | !rpc_cmp_addr(nlm_srcaddr(host), ni->src_sap)) |
129 | continue; | 129 | continue; |
130 | 130 | ||
131 | /* Move to head of hash chain. */ | 131 | /* Move to head of hash chain. */ |
diff --git a/fs/lockd/mon.c b/fs/lockd/mon.c index 30c933188dd7..f956651d0f65 100644 --- a/fs/lockd/mon.c +++ b/fs/lockd/mon.c | |||
@@ -209,7 +209,7 @@ static struct nsm_handle *nsm_lookup_addr(const struct sockaddr *sap) | |||
209 | struct nsm_handle *nsm; | 209 | struct nsm_handle *nsm; |
210 | 210 | ||
211 | list_for_each_entry(nsm, &nsm_handles, sm_link) | 211 | list_for_each_entry(nsm, &nsm_handles, sm_link) |
212 | if (nlm_cmp_addr(nsm_addr(nsm), sap)) | 212 | if (rpc_cmp_addr(nsm_addr(nsm), sap)) |
213 | return nsm; | 213 | return nsm; |
214 | return NULL; | 214 | return NULL; |
215 | } | 215 | } |
diff --git a/fs/lockd/svclock.c b/fs/lockd/svclock.c index e577a78d7bac..d1001790fa9a 100644 --- a/fs/lockd/svclock.c +++ b/fs/lockd/svclock.c | |||
@@ -705,7 +705,7 @@ static int nlmsvc_same_owner(struct file_lock *fl1, struct file_lock *fl2) | |||
705 | return fl1->fl_owner == fl2->fl_owner && fl1->fl_pid == fl2->fl_pid; | 705 | return fl1->fl_owner == fl2->fl_owner && fl1->fl_pid == fl2->fl_pid; |
706 | } | 706 | } |
707 | 707 | ||
708 | struct lock_manager_operations nlmsvc_lock_operations = { | 708 | const struct lock_manager_operations nlmsvc_lock_operations = { |
709 | .fl_compare_owner = nlmsvc_same_owner, | 709 | .fl_compare_owner = nlmsvc_same_owner, |
710 | .fl_notify = nlmsvc_notify_blocked, | 710 | .fl_notify = nlmsvc_notify_blocked, |
711 | .fl_grant = nlmsvc_grant_deferred, | 711 | .fl_grant = nlmsvc_grant_deferred, |
diff --git a/fs/lockd/svcsubs.c b/fs/lockd/svcsubs.c index 9e4d6aab611b..ad478da7ca63 100644 --- a/fs/lockd/svcsubs.c +++ b/fs/lockd/svcsubs.c | |||
@@ -417,7 +417,7 @@ EXPORT_SYMBOL_GPL(nlmsvc_unlock_all_by_sb); | |||
417 | static int | 417 | static int |
418 | nlmsvc_match_ip(void *datap, struct nlm_host *host) | 418 | nlmsvc_match_ip(void *datap, struct nlm_host *host) |
419 | { | 419 | { |
420 | return nlm_cmp_addr(nlm_srcaddr(host), datap); | 420 | return rpc_cmp_addr(nlm_srcaddr(host), datap); |
421 | } | 421 | } |
422 | 422 | ||
423 | /** | 423 | /** |
diff --git a/fs/lockd/xdr.c b/fs/lockd/xdr.c index 0336f2beacde..b583ab0a4cbb 100644 --- a/fs/lockd/xdr.c +++ b/fs/lockd/xdr.c | |||
@@ -8,7 +8,6 @@ | |||
8 | 8 | ||
9 | #include <linux/types.h> | 9 | #include <linux/types.h> |
10 | #include <linux/sched.h> | 10 | #include <linux/sched.h> |
11 | #include <linux/utsname.h> | ||
12 | #include <linux/nfs.h> | 11 | #include <linux/nfs.h> |
13 | 12 | ||
14 | #include <linux/sunrpc/xdr.h> | 13 | #include <linux/sunrpc/xdr.h> |
diff --git a/fs/lockd/xdr4.c b/fs/lockd/xdr4.c index e1d528653192..ad9dbbc9145d 100644 --- a/fs/lockd/xdr4.c +++ b/fs/lockd/xdr4.c | |||
@@ -9,7 +9,6 @@ | |||
9 | 9 | ||
10 | #include <linux/types.h> | 10 | #include <linux/types.h> |
11 | #include <linux/sched.h> | 11 | #include <linux/sched.h> |
12 | #include <linux/utsname.h> | ||
13 | #include <linux/nfs.h> | 12 | #include <linux/nfs.h> |
14 | 13 | ||
15 | #include <linux/sunrpc/xdr.h> | 14 | #include <linux/sunrpc/xdr.h> |
diff --git a/fs/locks.c b/fs/locks.c index 19ee18a6829b..a8794f233bc9 100644 --- a/fs/locks.c +++ b/fs/locks.c | |||
@@ -434,7 +434,7 @@ static int lease_mylease_callback(struct file_lock *fl, struct file_lock *try) | |||
434 | return fl->fl_file == try->fl_file; | 434 | return fl->fl_file == try->fl_file; |
435 | } | 435 | } |
436 | 436 | ||
437 | static struct lock_manager_operations lease_manager_ops = { | 437 | static const struct lock_manager_operations lease_manager_ops = { |
438 | .fl_break = lease_break_callback, | 438 | .fl_break = lease_break_callback, |
439 | .fl_release_private = lease_release_private_callback, | 439 | .fl_release_private = lease_release_private_callback, |
440 | .fl_mylease = lease_mylease_callback, | 440 | .fl_mylease = lease_mylease_callback, |
diff --git a/fs/minix/dir.c b/fs/minix/dir.c index d407e7a0b6fe..6198731d7fcd 100644 --- a/fs/minix/dir.c +++ b/fs/minix/dir.c | |||
@@ -308,14 +308,18 @@ int minix_delete_entry(struct minix_dir_entry *de, struct page *page) | |||
308 | struct inode *inode = (struct inode*)mapping->host; | 308 | struct inode *inode = (struct inode*)mapping->host; |
309 | char *kaddr = page_address(page); | 309 | char *kaddr = page_address(page); |
310 | loff_t pos = page_offset(page) + (char*)de - kaddr; | 310 | loff_t pos = page_offset(page) + (char*)de - kaddr; |
311 | unsigned len = minix_sb(inode->i_sb)->s_dirsize; | 311 | struct minix_sb_info *sbi = minix_sb(inode->i_sb); |
312 | unsigned len = sbi->s_dirsize; | ||
312 | int err; | 313 | int err; |
313 | 314 | ||
314 | lock_page(page); | 315 | lock_page(page); |
315 | err = __minix_write_begin(NULL, mapping, pos, len, | 316 | err = __minix_write_begin(NULL, mapping, pos, len, |
316 | AOP_FLAG_UNINTERRUPTIBLE, &page, NULL); | 317 | AOP_FLAG_UNINTERRUPTIBLE, &page, NULL); |
317 | if (err == 0) { | 318 | if (err == 0) { |
318 | de->inode = 0; | 319 | if (sbi->s_version == MINIX_V3) |
320 | ((minix3_dirent *) de)->inode = 0; | ||
321 | else | ||
322 | de->inode = 0; | ||
319 | err = dir_commit_chunk(page, pos, len); | 323 | err = dir_commit_chunk(page, pos, len); |
320 | } else { | 324 | } else { |
321 | unlock_page(page); | 325 | unlock_page(page); |
@@ -440,7 +444,10 @@ void minix_set_link(struct minix_dir_entry *de, struct page *page, | |||
440 | err = __minix_write_begin(NULL, mapping, pos, sbi->s_dirsize, | 444 | err = __minix_write_begin(NULL, mapping, pos, sbi->s_dirsize, |
441 | AOP_FLAG_UNINTERRUPTIBLE, &page, NULL); | 445 | AOP_FLAG_UNINTERRUPTIBLE, &page, NULL); |
442 | if (err == 0) { | 446 | if (err == 0) { |
443 | de->inode = inode->i_ino; | 447 | if (sbi->s_version == MINIX_V3) |
448 | ((minix3_dirent *) de)->inode = inode->i_ino; | ||
449 | else | ||
450 | de->inode = inode->i_ino; | ||
444 | err = dir_commit_chunk(page, pos, sbi->s_dirsize); | 451 | err = dir_commit_chunk(page, pos, sbi->s_dirsize); |
445 | } else { | 452 | } else { |
446 | unlock_page(page); | 453 | unlock_page(page); |
@@ -470,7 +477,14 @@ ino_t minix_inode_by_name(struct dentry *dentry) | |||
470 | ino_t res = 0; | 477 | ino_t res = 0; |
471 | 478 | ||
472 | if (de) { | 479 | if (de) { |
473 | res = de->inode; | 480 | struct address_space *mapping = page->mapping; |
481 | struct inode *inode = mapping->host; | ||
482 | struct minix_sb_info *sbi = minix_sb(inode->i_sb); | ||
483 | |||
484 | if (sbi->s_version == MINIX_V3) | ||
485 | res = ((minix3_dirent *) de)->inode; | ||
486 | else | ||
487 | res = de->inode; | ||
474 | dir_put_page(page); | 488 | dir_put_page(page); |
475 | } | 489 | } |
476 | return res; | 490 | return res; |
diff --git a/fs/ncpfs/dir.c b/fs/ncpfs/dir.c index 9c590722d87e..b8b5b30d53f0 100644 --- a/fs/ncpfs/dir.c +++ b/fs/ncpfs/dir.c | |||
@@ -1241,7 +1241,7 @@ ncp_date_unix2dos(int unix_date, __le16 *time, __le16 *date) | |||
1241 | month = 2; | 1241 | month = 2; |
1242 | } else { | 1242 | } else { |
1243 | nl_day = (year & 3) || day <= 59 ? day : day - 1; | 1243 | nl_day = (year & 3) || day <= 59 ? day : day - 1; |
1244 | for (month = 0; month < 12; month++) | 1244 | for (month = 1; month < 12; month++) |
1245 | if (day_n[month] > nl_day) | 1245 | if (day_n[month] > nl_day) |
1246 | break; | 1246 | break; |
1247 | } | 1247 | } |
diff --git a/fs/ncpfs/ioctl.c b/fs/ncpfs/ioctl.c index fa038df63ac8..53a7ed7eb9c6 100644 --- a/fs/ncpfs/ioctl.c +++ b/fs/ncpfs/ioctl.c | |||
@@ -442,7 +442,7 @@ static int __ncp_ioctl(struct inode *inode, struct file *filp, | |||
442 | if (dentry) { | 442 | if (dentry) { |
443 | struct inode* s_inode = dentry->d_inode; | 443 | struct inode* s_inode = dentry->d_inode; |
444 | 444 | ||
445 | if (inode) { | 445 | if (s_inode) { |
446 | NCP_FINFO(s_inode)->volNumber = vnum; | 446 | NCP_FINFO(s_inode)->volNumber = vnum; |
447 | NCP_FINFO(s_inode)->dirEntNum = de; | 447 | NCP_FINFO(s_inode)->dirEntNum = de; |
448 | NCP_FINFO(s_inode)->DosDirNum = dosde; | 448 | NCP_FINFO(s_inode)->DosDirNum = dosde; |
diff --git a/fs/nfs/callback_xdr.c b/fs/nfs/callback_xdr.c index e5a2dac5f715..76b0aa0f73bf 100644 --- a/fs/nfs/callback_xdr.c +++ b/fs/nfs/callback_xdr.c | |||
@@ -222,7 +222,7 @@ static unsigned decode_sessionid(struct xdr_stream *xdr, | |||
222 | 222 | ||
223 | p = read_buf(xdr, len); | 223 | p = read_buf(xdr, len); |
224 | if (unlikely(p == NULL)) | 224 | if (unlikely(p == NULL)) |
225 | return htonl(NFS4ERR_RESOURCE);; | 225 | return htonl(NFS4ERR_RESOURCE); |
226 | 226 | ||
227 | memcpy(sid->data, p, len); | 227 | memcpy(sid->data, p, len); |
228 | return 0; | 228 | return 0; |
diff --git a/fs/nfs/client.c b/fs/nfs/client.c index e350bd6a2334..63976c0ccc25 100644 --- a/fs/nfs/client.c +++ b/fs/nfs/client.c | |||
@@ -648,8 +648,6 @@ static int nfs_start_lockd(struct nfs_server *server) | |||
648 | .hostname = clp->cl_hostname, | 648 | .hostname = clp->cl_hostname, |
649 | .address = (struct sockaddr *)&clp->cl_addr, | 649 | .address = (struct sockaddr *)&clp->cl_addr, |
650 | .addrlen = clp->cl_addrlen, | 650 | .addrlen = clp->cl_addrlen, |
651 | .protocol = server->flags & NFS_MOUNT_TCP ? | ||
652 | IPPROTO_TCP : IPPROTO_UDP, | ||
653 | .nfs_version = clp->rpc_ops->version, | 651 | .nfs_version = clp->rpc_ops->version, |
654 | .noresvport = server->flags & NFS_MOUNT_NORESVPORT ? | 652 | .noresvport = server->flags & NFS_MOUNT_NORESVPORT ? |
655 | 1 : 0, | 653 | 1 : 0, |
@@ -660,6 +658,14 @@ static int nfs_start_lockd(struct nfs_server *server) | |||
660 | if (server->flags & NFS_MOUNT_NONLM) | 658 | if (server->flags & NFS_MOUNT_NONLM) |
661 | return 0; | 659 | return 0; |
662 | 660 | ||
661 | switch (clp->cl_proto) { | ||
662 | default: | ||
663 | nlm_init.protocol = IPPROTO_TCP; | ||
664 | break; | ||
665 | case XPRT_TRANSPORT_UDP: | ||
666 | nlm_init.protocol = IPPROTO_UDP; | ||
667 | } | ||
668 | |||
663 | host = nlmclnt_init(&nlm_init); | 669 | host = nlmclnt_init(&nlm_init); |
664 | if (IS_ERR(host)) | 670 | if (IS_ERR(host)) |
665 | return PTR_ERR(host); | 671 | return PTR_ERR(host); |
@@ -787,7 +793,7 @@ static int nfs_init_server(struct nfs_server *server, | |||
787 | dprintk("--> nfs_init_server()\n"); | 793 | dprintk("--> nfs_init_server()\n"); |
788 | 794 | ||
789 | #ifdef CONFIG_NFS_V3 | 795 | #ifdef CONFIG_NFS_V3 |
790 | if (data->flags & NFS_MOUNT_VER3) | 796 | if (data->version == 3) |
791 | cl_init.rpc_ops = &nfs_v3_clientops; | 797 | cl_init.rpc_ops = &nfs_v3_clientops; |
792 | #endif | 798 | #endif |
793 | 799 | ||
@@ -933,10 +939,6 @@ static int nfs_probe_fsinfo(struct nfs_server *server, struct nfs_fh *mntfh, str | |||
933 | goto out_error; | 939 | goto out_error; |
934 | 940 | ||
935 | nfs_server_set_fsinfo(server, &fsinfo); | 941 | nfs_server_set_fsinfo(server, &fsinfo); |
936 | error = bdi_init(&server->backing_dev_info); | ||
937 | if (error) | ||
938 | goto out_error; | ||
939 | |||
940 | 942 | ||
941 | /* Get some general file system info */ | 943 | /* Get some general file system info */ |
942 | if (server->namelen == 0) { | 944 | if (server->namelen == 0) { |
@@ -968,6 +970,7 @@ static void nfs_server_copy_userdata(struct nfs_server *target, struct nfs_serve | |||
968 | target->acdirmin = source->acdirmin; | 970 | target->acdirmin = source->acdirmin; |
969 | target->acdirmax = source->acdirmax; | 971 | target->acdirmax = source->acdirmax; |
970 | target->caps = source->caps; | 972 | target->caps = source->caps; |
973 | target->options = source->options; | ||
971 | } | 974 | } |
972 | 975 | ||
973 | /* | 976 | /* |
@@ -995,6 +998,12 @@ static struct nfs_server *nfs_alloc_server(void) | |||
995 | return NULL; | 998 | return NULL; |
996 | } | 999 | } |
997 | 1000 | ||
1001 | if (bdi_init(&server->backing_dev_info)) { | ||
1002 | nfs_free_iostats(server->io_stats); | ||
1003 | kfree(server); | ||
1004 | return NULL; | ||
1005 | } | ||
1006 | |||
998 | return server; | 1007 | return server; |
999 | } | 1008 | } |
1000 | 1009 | ||
@@ -1529,7 +1538,7 @@ static void *nfs_server_list_next(struct seq_file *p, void *v, loff_t *pos); | |||
1529 | static void nfs_server_list_stop(struct seq_file *p, void *v); | 1538 | static void nfs_server_list_stop(struct seq_file *p, void *v); |
1530 | static int nfs_server_list_show(struct seq_file *m, void *v); | 1539 | static int nfs_server_list_show(struct seq_file *m, void *v); |
1531 | 1540 | ||
1532 | static struct seq_operations nfs_server_list_ops = { | 1541 | static const struct seq_operations nfs_server_list_ops = { |
1533 | .start = nfs_server_list_start, | 1542 | .start = nfs_server_list_start, |
1534 | .next = nfs_server_list_next, | 1543 | .next = nfs_server_list_next, |
1535 | .stop = nfs_server_list_stop, | 1544 | .stop = nfs_server_list_stop, |
@@ -1550,7 +1559,7 @@ static void *nfs_volume_list_next(struct seq_file *p, void *v, loff_t *pos); | |||
1550 | static void nfs_volume_list_stop(struct seq_file *p, void *v); | 1559 | static void nfs_volume_list_stop(struct seq_file *p, void *v); |
1551 | static int nfs_volume_list_show(struct seq_file *m, void *v); | 1560 | static int nfs_volume_list_show(struct seq_file *m, void *v); |
1552 | 1561 | ||
1553 | static struct seq_operations nfs_volume_list_ops = { | 1562 | static const struct seq_operations nfs_volume_list_ops = { |
1554 | .start = nfs_volume_list_start, | 1563 | .start = nfs_volume_list_start, |
1555 | .next = nfs_volume_list_next, | 1564 | .next = nfs_volume_list_next, |
1556 | .stop = nfs_volume_list_stop, | 1565 | .stop = nfs_volume_list_stop, |
diff --git a/fs/nfs/fscache.c b/fs/nfs/fscache.c index 379be678cb7e..70fad69eb959 100644 --- a/fs/nfs/fscache.c +++ b/fs/nfs/fscache.c | |||
@@ -58,17 +58,34 @@ void nfs_fscache_release_client_cookie(struct nfs_client *clp) | |||
58 | /* | 58 | /* |
59 | * Get the cache cookie for an NFS superblock. We have to handle | 59 | * Get the cache cookie for an NFS superblock. We have to handle |
60 | * uniquification here because the cache doesn't do it for us. | 60 | * uniquification here because the cache doesn't do it for us. |
61 | * | ||
62 | * The default uniquifier is just an empty string, but it may be overridden | ||
63 | * either by the 'fsc=xxx' option to mount, or by inheriting it from the parent | ||
64 | * superblock across an automount point of some nature. | ||
61 | */ | 65 | */ |
62 | void nfs_fscache_get_super_cookie(struct super_block *sb, | 66 | void nfs_fscache_get_super_cookie(struct super_block *sb, const char *uniq, |
63 | struct nfs_parsed_mount_data *data) | 67 | struct nfs_clone_mount *mntdata) |
64 | { | 68 | { |
65 | struct nfs_fscache_key *key, *xkey; | 69 | struct nfs_fscache_key *key, *xkey; |
66 | struct nfs_server *nfss = NFS_SB(sb); | 70 | struct nfs_server *nfss = NFS_SB(sb); |
67 | struct rb_node **p, *parent; | 71 | struct rb_node **p, *parent; |
68 | const char *uniq = data->fscache_uniq ?: ""; | ||
69 | int diff, ulen; | 72 | int diff, ulen; |
70 | 73 | ||
71 | ulen = strlen(uniq); | 74 | if (uniq) { |
75 | ulen = strlen(uniq); | ||
76 | } else if (mntdata) { | ||
77 | struct nfs_server *mnt_s = NFS_SB(mntdata->sb); | ||
78 | if (mnt_s->fscache_key) { | ||
79 | uniq = mnt_s->fscache_key->key.uniquifier; | ||
80 | ulen = mnt_s->fscache_key->key.uniq_len; | ||
81 | } | ||
82 | } | ||
83 | |||
84 | if (!uniq) { | ||
85 | uniq = ""; | ||
86 | ulen = 1; | ||
87 | } | ||
88 | |||
72 | key = kzalloc(sizeof(*key) + ulen, GFP_KERNEL); | 89 | key = kzalloc(sizeof(*key) + ulen, GFP_KERNEL); |
73 | if (!key) | 90 | if (!key) |
74 | return; | 91 | return; |
diff --git a/fs/nfs/fscache.h b/fs/nfs/fscache.h index 6e809bb0ff08..b9c572d0679f 100644 --- a/fs/nfs/fscache.h +++ b/fs/nfs/fscache.h | |||
@@ -74,7 +74,8 @@ extern void nfs_fscache_get_client_cookie(struct nfs_client *); | |||
74 | extern void nfs_fscache_release_client_cookie(struct nfs_client *); | 74 | extern void nfs_fscache_release_client_cookie(struct nfs_client *); |
75 | 75 | ||
76 | extern void nfs_fscache_get_super_cookie(struct super_block *, | 76 | extern void nfs_fscache_get_super_cookie(struct super_block *, |
77 | struct nfs_parsed_mount_data *); | 77 | const char *, |
78 | struct nfs_clone_mount *); | ||
78 | extern void nfs_fscache_release_super_cookie(struct super_block *); | 79 | extern void nfs_fscache_release_super_cookie(struct super_block *); |
79 | 80 | ||
80 | extern void nfs_fscache_init_inode_cookie(struct inode *); | 81 | extern void nfs_fscache_init_inode_cookie(struct inode *); |
@@ -173,7 +174,8 @@ static inline void nfs_fscache_release_client_cookie(struct nfs_client *clp) {} | |||
173 | 174 | ||
174 | static inline void nfs_fscache_get_super_cookie( | 175 | static inline void nfs_fscache_get_super_cookie( |
175 | struct super_block *sb, | 176 | struct super_block *sb, |
176 | struct nfs_parsed_mount_data *data) | 177 | const char *uniq, |
178 | struct nfs_clone_mount *mntdata) | ||
177 | { | 179 | { |
178 | } | 180 | } |
179 | static inline void nfs_fscache_release_super_cookie(struct super_block *sb) {} | 181 | static inline void nfs_fscache_release_super_cookie(struct super_block *sb) {} |
diff --git a/fs/nfs/nfs2xdr.c b/fs/nfs/nfs2xdr.c index c862c9340f9a..5e078b222b4e 100644 --- a/fs/nfs/nfs2xdr.c +++ b/fs/nfs/nfs2xdr.c | |||
@@ -13,7 +13,6 @@ | |||
13 | #include <linux/time.h> | 13 | #include <linux/time.h> |
14 | #include <linux/mm.h> | 14 | #include <linux/mm.h> |
15 | #include <linux/slab.h> | 15 | #include <linux/slab.h> |
16 | #include <linux/utsname.h> | ||
17 | #include <linux/errno.h> | 16 | #include <linux/errno.h> |
18 | #include <linux/string.h> | 17 | #include <linux/string.h> |
19 | #include <linux/in.h> | 18 | #include <linux/in.h> |
diff --git a/fs/nfs/nfs3proc.c b/fs/nfs/nfs3proc.c index ee6a13f05443..3f8881d1a050 100644 --- a/fs/nfs/nfs3proc.c +++ b/fs/nfs/nfs3proc.c | |||
@@ -7,7 +7,6 @@ | |||
7 | */ | 7 | */ |
8 | 8 | ||
9 | #include <linux/mm.h> | 9 | #include <linux/mm.h> |
10 | #include <linux/utsname.h> | ||
11 | #include <linux/errno.h> | 10 | #include <linux/errno.h> |
12 | #include <linux/string.h> | 11 | #include <linux/string.h> |
13 | #include <linux/sunrpc/clnt.h> | 12 | #include <linux/sunrpc/clnt.h> |
diff --git a/fs/nfs/nfs3xdr.c b/fs/nfs/nfs3xdr.c index 35869a4921f1..5fe5492fbd29 100644 --- a/fs/nfs/nfs3xdr.c +++ b/fs/nfs/nfs3xdr.c | |||
@@ -10,7 +10,6 @@ | |||
10 | #include <linux/time.h> | 10 | #include <linux/time.h> |
11 | #include <linux/mm.h> | 11 | #include <linux/mm.h> |
12 | #include <linux/slab.h> | 12 | #include <linux/slab.h> |
13 | #include <linux/utsname.h> | ||
14 | #include <linux/errno.h> | 13 | #include <linux/errno.h> |
15 | #include <linux/string.h> | 14 | #include <linux/string.h> |
16 | #include <linux/in.h> | 15 | #include <linux/in.h> |
diff --git a/fs/nfs/nfs4proc.c b/fs/nfs/nfs4proc.c index be6544aef41f..ed7c269e2514 100644 --- a/fs/nfs/nfs4proc.c +++ b/fs/nfs/nfs4proc.c | |||
@@ -36,7 +36,6 @@ | |||
36 | */ | 36 | */ |
37 | 37 | ||
38 | #include <linux/mm.h> | 38 | #include <linux/mm.h> |
39 | #include <linux/utsname.h> | ||
40 | #include <linux/delay.h> | 39 | #include <linux/delay.h> |
41 | #include <linux/errno.h> | 40 | #include <linux/errno.h> |
42 | #include <linux/string.h> | 41 | #include <linux/string.h> |
diff --git a/fs/nfs/nfs4state.c b/fs/nfs/nfs4state.c index 1434080aefeb..2ef4fecf3984 100644 --- a/fs/nfs/nfs4state.c +++ b/fs/nfs/nfs4state.c | |||
@@ -638,7 +638,7 @@ static void nfs4_fl_release_lock(struct file_lock *fl) | |||
638 | nfs4_put_lock_state(fl->fl_u.nfs4_fl.owner); | 638 | nfs4_put_lock_state(fl->fl_u.nfs4_fl.owner); |
639 | } | 639 | } |
640 | 640 | ||
641 | static struct file_lock_operations nfs4_fl_lock_ops = { | 641 | static const struct file_lock_operations nfs4_fl_lock_ops = { |
642 | .fl_copy_lock = nfs4_fl_copy_lock, | 642 | .fl_copy_lock = nfs4_fl_copy_lock, |
643 | .fl_release_private = nfs4_fl_release_lock, | 643 | .fl_release_private = nfs4_fl_release_lock, |
644 | }; | 644 | }; |
diff --git a/fs/nfs/nfs4xdr.c b/fs/nfs/nfs4xdr.c index cfc30d362f94..83ad47cbdd8a 100644 --- a/fs/nfs/nfs4xdr.c +++ b/fs/nfs/nfs4xdr.c | |||
@@ -39,7 +39,6 @@ | |||
39 | #include <linux/time.h> | 39 | #include <linux/time.h> |
40 | #include <linux/mm.h> | 40 | #include <linux/mm.h> |
41 | #include <linux/slab.h> | 41 | #include <linux/slab.h> |
42 | #include <linux/utsname.h> | ||
43 | #include <linux/errno.h> | 42 | #include <linux/errno.h> |
44 | #include <linux/string.h> | 43 | #include <linux/string.h> |
45 | #include <linux/in.h> | 44 | #include <linux/in.h> |
diff --git a/fs/nfs/proc.c b/fs/nfs/proc.c index 7be72d90d49d..ef583854d8d0 100644 --- a/fs/nfs/proc.c +++ b/fs/nfs/proc.c | |||
@@ -32,7 +32,6 @@ | |||
32 | #include <linux/slab.h> | 32 | #include <linux/slab.h> |
33 | #include <linux/time.h> | 33 | #include <linux/time.h> |
34 | #include <linux/mm.h> | 34 | #include <linux/mm.h> |
35 | #include <linux/utsname.h> | ||
36 | #include <linux/errno.h> | 35 | #include <linux/errno.h> |
37 | #include <linux/string.h> | 36 | #include <linux/string.h> |
38 | #include <linux/in.h> | 37 | #include <linux/in.h> |
diff --git a/fs/nfs/super.c b/fs/nfs/super.c index 867f70504531..810770f96816 100644 --- a/fs/nfs/super.c +++ b/fs/nfs/super.c | |||
@@ -728,6 +728,27 @@ static void nfs_umount_begin(struct super_block *sb) | |||
728 | unlock_kernel(); | 728 | unlock_kernel(); |
729 | } | 729 | } |
730 | 730 | ||
731 | static struct nfs_parsed_mount_data *nfs_alloc_parsed_mount_data(int flags) | ||
732 | { | ||
733 | struct nfs_parsed_mount_data *data; | ||
734 | |||
735 | data = kzalloc(sizeof(*data), GFP_KERNEL); | ||
736 | if (data) { | ||
737 | data->flags = flags; | ||
738 | data->rsize = NFS_MAX_FILE_IO_SIZE; | ||
739 | data->wsize = NFS_MAX_FILE_IO_SIZE; | ||
740 | data->acregmin = NFS_DEF_ACREGMIN; | ||
741 | data->acregmax = NFS_DEF_ACREGMAX; | ||
742 | data->acdirmin = NFS_DEF_ACDIRMIN; | ||
743 | data->acdirmax = NFS_DEF_ACDIRMAX; | ||
744 | data->nfs_server.port = NFS_UNSPEC_PORT; | ||
745 | data->auth_flavors[0] = RPC_AUTH_UNIX; | ||
746 | data->auth_flavor_len = 1; | ||
747 | data->minorversion = 0; | ||
748 | } | ||
749 | return data; | ||
750 | } | ||
751 | |||
731 | /* | 752 | /* |
732 | * Sanity-check a server address provided by the mount command. | 753 | * Sanity-check a server address provided by the mount command. |
733 | * | 754 | * |
@@ -1430,10 +1451,13 @@ static int nfs_try_mount(struct nfs_parsed_mount_data *args, | |||
1430 | int status; | 1451 | int status; |
1431 | 1452 | ||
1432 | if (args->mount_server.version == 0) { | 1453 | if (args->mount_server.version == 0) { |
1433 | if (args->flags & NFS_MOUNT_VER3) | 1454 | switch (args->version) { |
1434 | args->mount_server.version = NFS_MNT3_VERSION; | 1455 | default: |
1435 | else | 1456 | args->mount_server.version = NFS_MNT3_VERSION; |
1436 | args->mount_server.version = NFS_MNT_VERSION; | 1457 | break; |
1458 | case 2: | ||
1459 | args->mount_server.version = NFS_MNT_VERSION; | ||
1460 | } | ||
1437 | } | 1461 | } |
1438 | request.version = args->mount_server.version; | 1462 | request.version = args->mount_server.version; |
1439 | 1463 | ||
@@ -1634,20 +1658,6 @@ static int nfs_validate_mount_data(void *options, | |||
1634 | if (data == NULL) | 1658 | if (data == NULL) |
1635 | goto out_no_data; | 1659 | goto out_no_data; |
1636 | 1660 | ||
1637 | args->flags = (NFS_MOUNT_VER3 | NFS_MOUNT_TCP); | ||
1638 | args->rsize = NFS_MAX_FILE_IO_SIZE; | ||
1639 | args->wsize = NFS_MAX_FILE_IO_SIZE; | ||
1640 | args->acregmin = NFS_DEF_ACREGMIN; | ||
1641 | args->acregmax = NFS_DEF_ACREGMAX; | ||
1642 | args->acdirmin = NFS_DEF_ACDIRMIN; | ||
1643 | args->acdirmax = NFS_DEF_ACDIRMAX; | ||
1644 | args->mount_server.port = NFS_UNSPEC_PORT; | ||
1645 | args->nfs_server.port = NFS_UNSPEC_PORT; | ||
1646 | args->nfs_server.protocol = XPRT_TRANSPORT_TCP; | ||
1647 | args->auth_flavors[0] = RPC_AUTH_UNIX; | ||
1648 | args->auth_flavor_len = 1; | ||
1649 | args->minorversion = 0; | ||
1650 | |||
1651 | switch (data->version) { | 1661 | switch (data->version) { |
1652 | case 1: | 1662 | case 1: |
1653 | data->namlen = 0; | 1663 | data->namlen = 0; |
@@ -1778,7 +1788,7 @@ static int nfs_validate_mount_data(void *options, | |||
1778 | } | 1788 | } |
1779 | 1789 | ||
1780 | #ifndef CONFIG_NFS_V3 | 1790 | #ifndef CONFIG_NFS_V3 |
1781 | if (args->flags & NFS_MOUNT_VER3) | 1791 | if (args->version == 3) |
1782 | goto out_v3_not_compiled; | 1792 | goto out_v3_not_compiled; |
1783 | #endif /* !CONFIG_NFS_V3 */ | 1793 | #endif /* !CONFIG_NFS_V3 */ |
1784 | 1794 | ||
@@ -1918,6 +1928,8 @@ static inline void nfs_initialise_sb(struct super_block *sb) | |||
1918 | if (server->flags & NFS_MOUNT_NOAC) | 1928 | if (server->flags & NFS_MOUNT_NOAC) |
1919 | sb->s_flags |= MS_SYNCHRONOUS; | 1929 | sb->s_flags |= MS_SYNCHRONOUS; |
1920 | 1930 | ||
1931 | sb->s_bdi = &server->backing_dev_info; | ||
1932 | |||
1921 | nfs_super_set_maxbytes(sb, server->maxfilesize); | 1933 | nfs_super_set_maxbytes(sb, server->maxfilesize); |
1922 | } | 1934 | } |
1923 | 1935 | ||
@@ -1934,7 +1946,7 @@ static void nfs_fill_super(struct super_block *sb, | |||
1934 | if (data->bsize) | 1946 | if (data->bsize) |
1935 | sb->s_blocksize = nfs_block_size(data->bsize, &sb->s_blocksize_bits); | 1947 | sb->s_blocksize = nfs_block_size(data->bsize, &sb->s_blocksize_bits); |
1936 | 1948 | ||
1937 | if (server->flags & NFS_MOUNT_VER3) { | 1949 | if (server->nfs_client->rpc_ops->version == 3) { |
1938 | /* The VFS shouldn't apply the umask to mode bits. We will do | 1950 | /* The VFS shouldn't apply the umask to mode bits. We will do |
1939 | * so ourselves when necessary. | 1951 | * so ourselves when necessary. |
1940 | */ | 1952 | */ |
@@ -1958,7 +1970,7 @@ static void nfs_clone_super(struct super_block *sb, | |||
1958 | sb->s_blocksize = old_sb->s_blocksize; | 1970 | sb->s_blocksize = old_sb->s_blocksize; |
1959 | sb->s_maxbytes = old_sb->s_maxbytes; | 1971 | sb->s_maxbytes = old_sb->s_maxbytes; |
1960 | 1972 | ||
1961 | if (server->flags & NFS_MOUNT_VER3) { | 1973 | if (server->nfs_client->rpc_ops->version == 3) { |
1962 | /* The VFS shouldn't apply the umask to mode bits. We will do | 1974 | /* The VFS shouldn't apply the umask to mode bits. We will do |
1963 | * so ourselves when necessary. | 1975 | * so ourselves when necessary. |
1964 | */ | 1976 | */ |
@@ -2092,7 +2104,7 @@ static int nfs_get_sb(struct file_system_type *fs_type, | |||
2092 | }; | 2104 | }; |
2093 | int error = -ENOMEM; | 2105 | int error = -ENOMEM; |
2094 | 2106 | ||
2095 | data = kzalloc(sizeof(*data), GFP_KERNEL); | 2107 | data = nfs_alloc_parsed_mount_data(NFS_MOUNT_VER3 | NFS_MOUNT_TCP); |
2096 | mntfh = kzalloc(sizeof(*mntfh), GFP_KERNEL); | 2108 | mntfh = kzalloc(sizeof(*mntfh), GFP_KERNEL); |
2097 | if (data == NULL || mntfh == NULL) | 2109 | if (data == NULL || mntfh == NULL) |
2098 | goto out_free_fh; | 2110 | goto out_free_fh; |
@@ -2142,7 +2154,8 @@ static int nfs_get_sb(struct file_system_type *fs_type, | |||
2142 | if (!s->s_root) { | 2154 | if (!s->s_root) { |
2143 | /* initial superblock/root creation */ | 2155 | /* initial superblock/root creation */ |
2144 | nfs_fill_super(s, data); | 2156 | nfs_fill_super(s, data); |
2145 | nfs_fscache_get_super_cookie(s, data); | 2157 | nfs_fscache_get_super_cookie( |
2158 | s, data ? data->fscache_uniq : NULL, NULL); | ||
2146 | } | 2159 | } |
2147 | 2160 | ||
2148 | mntroot = nfs_get_root(s, mntfh); | 2161 | mntroot = nfs_get_root(s, mntfh); |
@@ -2188,8 +2201,8 @@ static void nfs_kill_super(struct super_block *s) | |||
2188 | { | 2201 | { |
2189 | struct nfs_server *server = NFS_SB(s); | 2202 | struct nfs_server *server = NFS_SB(s); |
2190 | 2203 | ||
2191 | bdi_unregister(&server->backing_dev_info); | ||
2192 | kill_anon_super(s); | 2204 | kill_anon_super(s); |
2205 | bdi_unregister(&server->backing_dev_info); | ||
2193 | nfs_fscache_release_super_cookie(s); | 2206 | nfs_fscache_release_super_cookie(s); |
2194 | nfs_free_server(server); | 2207 | nfs_free_server(server); |
2195 | } | 2208 | } |
@@ -2243,6 +2256,7 @@ static int nfs_xdev_get_sb(struct file_system_type *fs_type, int flags, | |||
2243 | if (!s->s_root) { | 2256 | if (!s->s_root) { |
2244 | /* initial superblock/root creation */ | 2257 | /* initial superblock/root creation */ |
2245 | nfs_clone_super(s, data->sb); | 2258 | nfs_clone_super(s, data->sb); |
2259 | nfs_fscache_get_super_cookie(s, NULL, data); | ||
2246 | } | 2260 | } |
2247 | 2261 | ||
2248 | mntroot = nfs_get_root(s, data->fh); | 2262 | mntroot = nfs_get_root(s, data->fh); |
@@ -2360,18 +2374,7 @@ static int nfs4_validate_mount_data(void *options, | |||
2360 | if (data == NULL) | 2374 | if (data == NULL) |
2361 | goto out_no_data; | 2375 | goto out_no_data; |
2362 | 2376 | ||
2363 | args->rsize = NFS_MAX_FILE_IO_SIZE; | ||
2364 | args->wsize = NFS_MAX_FILE_IO_SIZE; | ||
2365 | args->acregmin = NFS_DEF_ACREGMIN; | ||
2366 | args->acregmax = NFS_DEF_ACREGMAX; | ||
2367 | args->acdirmin = NFS_DEF_ACDIRMIN; | ||
2368 | args->acdirmax = NFS_DEF_ACDIRMAX; | ||
2369 | args->nfs_server.port = NFS_UNSPEC_PORT; | ||
2370 | args->auth_flavors[0] = RPC_AUTH_UNIX; | ||
2371 | args->auth_flavor_len = 1; | ||
2372 | args->version = 4; | 2377 | args->version = 4; |
2373 | args->minorversion = 0; | ||
2374 | |||
2375 | switch (data->version) { | 2378 | switch (data->version) { |
2376 | case 1: | 2379 | case 1: |
2377 | if (data->host_addrlen > sizeof(args->nfs_server.address)) | 2380 | if (data->host_addrlen > sizeof(args->nfs_server.address)) |
@@ -2506,7 +2509,8 @@ static int nfs4_remote_get_sb(struct file_system_type *fs_type, | |||
2506 | if (!s->s_root) { | 2509 | if (!s->s_root) { |
2507 | /* initial superblock/root creation */ | 2510 | /* initial superblock/root creation */ |
2508 | nfs4_fill_super(s); | 2511 | nfs4_fill_super(s); |
2509 | nfs_fscache_get_super_cookie(s, data); | 2512 | nfs_fscache_get_super_cookie( |
2513 | s, data ? data->fscache_uniq : NULL, NULL); | ||
2510 | } | 2514 | } |
2511 | 2515 | ||
2512 | mntroot = nfs4_get_root(s, mntfh); | 2516 | mntroot = nfs4_get_root(s, mntfh); |
@@ -2654,7 +2658,7 @@ static int nfs4_get_sb(struct file_system_type *fs_type, | |||
2654 | struct nfs_parsed_mount_data *data; | 2658 | struct nfs_parsed_mount_data *data; |
2655 | int error = -ENOMEM; | 2659 | int error = -ENOMEM; |
2656 | 2660 | ||
2657 | data = kzalloc(sizeof(*data), GFP_KERNEL); | 2661 | data = nfs_alloc_parsed_mount_data(0); |
2658 | if (data == NULL) | 2662 | if (data == NULL) |
2659 | goto out_free_data; | 2663 | goto out_free_data; |
2660 | 2664 | ||
@@ -2739,6 +2743,7 @@ static int nfs4_xdev_get_sb(struct file_system_type *fs_type, int flags, | |||
2739 | if (!s->s_root) { | 2743 | if (!s->s_root) { |
2740 | /* initial superblock/root creation */ | 2744 | /* initial superblock/root creation */ |
2741 | nfs4_clone_super(s, data->sb); | 2745 | nfs4_clone_super(s, data->sb); |
2746 | nfs_fscache_get_super_cookie(s, NULL, data); | ||
2742 | } | 2747 | } |
2743 | 2748 | ||
2744 | mntroot = nfs4_get_root(s, data->fh); | 2749 | mntroot = nfs4_get_root(s, data->fh); |
@@ -2820,6 +2825,7 @@ static int nfs4_remote_referral_get_sb(struct file_system_type *fs_type, | |||
2820 | if (!s->s_root) { | 2825 | if (!s->s_root) { |
2821 | /* initial superblock/root creation */ | 2826 | /* initial superblock/root creation */ |
2822 | nfs4_fill_super(s); | 2827 | nfs4_fill_super(s); |
2828 | nfs_fscache_get_super_cookie(s, NULL, data); | ||
2823 | } | 2829 | } |
2824 | 2830 | ||
2825 | mntroot = nfs4_get_root(s, &mntfh); | 2831 | mntroot = nfs4_get_root(s, &mntfh); |
diff --git a/fs/nfs/write.c b/fs/nfs/write.c index 120acadc6a84..53eb26c16b50 100644 --- a/fs/nfs/write.c +++ b/fs/nfs/write.c | |||
@@ -1490,7 +1490,6 @@ static int nfs_write_mapping(struct address_space *mapping, int how) | |||
1490 | .nr_to_write = LONG_MAX, | 1490 | .nr_to_write = LONG_MAX, |
1491 | .range_start = 0, | 1491 | .range_start = 0, |
1492 | .range_end = LLONG_MAX, | 1492 | .range_end = LLONG_MAX, |
1493 | .for_writepages = 1, | ||
1494 | }; | 1493 | }; |
1495 | 1494 | ||
1496 | return __nfs_write_mapping(mapping, &wbc, how); | 1495 | return __nfs_write_mapping(mapping, &wbc, how); |
diff --git a/fs/nfsd/export.c b/fs/nfsd/export.c index d9462643155c..c1c9e035d4a4 100644 --- a/fs/nfsd/export.c +++ b/fs/nfsd/export.c | |||
@@ -1341,6 +1341,8 @@ exp_pseudoroot(struct svc_rqst *rqstp, struct svc_fh *fhp) | |||
1341 | if (rv) | 1341 | if (rv) |
1342 | goto out; | 1342 | goto out; |
1343 | rv = check_nfsd_access(exp, rqstp); | 1343 | rv = check_nfsd_access(exp, rqstp); |
1344 | if (rv) | ||
1345 | fh_put(fhp); | ||
1344 | out: | 1346 | out: |
1345 | exp_put(exp); | 1347 | exp_put(exp); |
1346 | return rv; | 1348 | return rv; |
@@ -1515,7 +1517,7 @@ static int e_show(struct seq_file *m, void *p) | |||
1515 | return svc_export_show(m, &svc_export_cache, cp); | 1517 | return svc_export_show(m, &svc_export_cache, cp); |
1516 | } | 1518 | } |
1517 | 1519 | ||
1518 | struct seq_operations nfs_exports_op = { | 1520 | const struct seq_operations nfs_exports_op = { |
1519 | .start = e_start, | 1521 | .start = e_start, |
1520 | .next = e_next, | 1522 | .next = e_next, |
1521 | .stop = e_stop, | 1523 | .stop = e_stop, |
diff --git a/fs/nfsd/nfs3xdr.c b/fs/nfsd/nfs3xdr.c index 01d4ec1c88e0..edf926e1062f 100644 --- a/fs/nfsd/nfs3xdr.c +++ b/fs/nfsd/nfs3xdr.c | |||
@@ -814,17 +814,6 @@ encode_entry_baggage(struct nfsd3_readdirres *cd, __be32 *p, const char *name, | |||
814 | return p; | 814 | return p; |
815 | } | 815 | } |
816 | 816 | ||
817 | static __be32 * | ||
818 | encode_entryplus_baggage(struct nfsd3_readdirres *cd, __be32 *p, | ||
819 | struct svc_fh *fhp) | ||
820 | { | ||
821 | p = encode_post_op_attr(cd->rqstp, p, fhp); | ||
822 | *p++ = xdr_one; /* yes, a file handle follows */ | ||
823 | p = encode_fh(p, fhp); | ||
824 | fh_put(fhp); | ||
825 | return p; | ||
826 | } | ||
827 | |||
828 | static int | 817 | static int |
829 | compose_entry_fh(struct nfsd3_readdirres *cd, struct svc_fh *fhp, | 818 | compose_entry_fh(struct nfsd3_readdirres *cd, struct svc_fh *fhp, |
830 | const char *name, int namlen) | 819 | const char *name, int namlen) |
@@ -836,29 +825,54 @@ compose_entry_fh(struct nfsd3_readdirres *cd, struct svc_fh *fhp, | |||
836 | dparent = cd->fh.fh_dentry; | 825 | dparent = cd->fh.fh_dentry; |
837 | exp = cd->fh.fh_export; | 826 | exp = cd->fh.fh_export; |
838 | 827 | ||
839 | fh_init(fhp, NFS3_FHSIZE); | ||
840 | if (isdotent(name, namlen)) { | 828 | if (isdotent(name, namlen)) { |
841 | if (namlen == 2) { | 829 | if (namlen == 2) { |
842 | dchild = dget_parent(dparent); | 830 | dchild = dget_parent(dparent); |
843 | if (dchild == dparent) { | 831 | if (dchild == dparent) { |
844 | /* filesystem root - cannot return filehandle for ".." */ | 832 | /* filesystem root - cannot return filehandle for ".." */ |
845 | dput(dchild); | 833 | dput(dchild); |
846 | return 1; | 834 | return -ENOENT; |
847 | } | 835 | } |
848 | } else | 836 | } else |
849 | dchild = dget(dparent); | 837 | dchild = dget(dparent); |
850 | } else | 838 | } else |
851 | dchild = lookup_one_len(name, dparent, namlen); | 839 | dchild = lookup_one_len(name, dparent, namlen); |
852 | if (IS_ERR(dchild)) | 840 | if (IS_ERR(dchild)) |
853 | return 1; | 841 | return -ENOENT; |
854 | if (d_mountpoint(dchild) || | 842 | rv = -ENOENT; |
855 | fh_compose(fhp, exp, dchild, &cd->fh) != 0 || | 843 | if (d_mountpoint(dchild)) |
856 | !dchild->d_inode) | 844 | goto out; |
857 | rv = 1; | 845 | rv = fh_compose(fhp, exp, dchild, &cd->fh); |
846 | if (rv) | ||
847 | goto out; | ||
848 | if (!dchild->d_inode) | ||
849 | goto out; | ||
850 | rv = 0; | ||
851 | out: | ||
858 | dput(dchild); | 852 | dput(dchild); |
859 | return rv; | 853 | return rv; |
860 | } | 854 | } |
861 | 855 | ||
856 | __be32 *encode_entryplus_baggage(struct nfsd3_readdirres *cd, __be32 *p, const char *name, int namlen) | ||
857 | { | ||
858 | struct svc_fh fh; | ||
859 | int err; | ||
860 | |||
861 | fh_init(&fh, NFS3_FHSIZE); | ||
862 | err = compose_entry_fh(cd, &fh, name, namlen); | ||
863 | if (err) { | ||
864 | *p++ = 0; | ||
865 | *p++ = 0; | ||
866 | goto out; | ||
867 | } | ||
868 | p = encode_post_op_attr(cd->rqstp, p, &fh); | ||
869 | *p++ = xdr_one; /* yes, a file handle follows */ | ||
870 | p = encode_fh(p, &fh); | ||
871 | out: | ||
872 | fh_put(&fh); | ||
873 | return p; | ||
874 | } | ||
875 | |||
862 | /* | 876 | /* |
863 | * Encode a directory entry. This one works for both normal readdir | 877 | * Encode a directory entry. This one works for both normal readdir |
864 | * and readdirplus. | 878 | * and readdirplus. |
@@ -929,16 +943,8 @@ encode_entry(struct readdir_cd *ccd, const char *name, int namlen, | |||
929 | 943 | ||
930 | p = encode_entry_baggage(cd, p, name, namlen, ino); | 944 | p = encode_entry_baggage(cd, p, name, namlen, ino); |
931 | 945 | ||
932 | /* throw in readdirplus baggage */ | 946 | if (plus) |
933 | if (plus) { | 947 | p = encode_entryplus_baggage(cd, p, name, namlen); |
934 | struct svc_fh fh; | ||
935 | |||
936 | if (compose_entry_fh(cd, &fh, name, namlen) > 0) { | ||
937 | *p++ = 0; | ||
938 | *p++ = 0; | ||
939 | } else | ||
940 | p = encode_entryplus_baggage(cd, p, &fh); | ||
941 | } | ||
942 | num_entry_words = p - cd->buffer; | 948 | num_entry_words = p - cd->buffer; |
943 | } else if (cd->rqstp->rq_respages[pn+1] != NULL) { | 949 | } else if (cd->rqstp->rq_respages[pn+1] != NULL) { |
944 | /* temporarily encode entry into next page, then move back to | 950 | /* temporarily encode entry into next page, then move back to |
@@ -951,17 +957,8 @@ encode_entry(struct readdir_cd *ccd, const char *name, int namlen, | |||
951 | 957 | ||
952 | p1 = encode_entry_baggage(cd, p1, name, namlen, ino); | 958 | p1 = encode_entry_baggage(cd, p1, name, namlen, ino); |
953 | 959 | ||
954 | /* throw in readdirplus baggage */ | 960 | if (plus) |
955 | if (plus) { | 961 | p = encode_entryplus_baggage(cd, p1, name, namlen); |
956 | struct svc_fh fh; | ||
957 | |||
958 | if (compose_entry_fh(cd, &fh, name, namlen) > 0) { | ||
959 | /* zero out the filehandle */ | ||
960 | *p1++ = 0; | ||
961 | *p1++ = 0; | ||
962 | } else | ||
963 | p1 = encode_entryplus_baggage(cd, p1, &fh); | ||
964 | } | ||
965 | 962 | ||
966 | /* determine entry word length and lengths to go in pages */ | 963 | /* determine entry word length and lengths to go in pages */ |
967 | num_entry_words = p1 - tmp; | 964 | num_entry_words = p1 - tmp; |
diff --git a/fs/nfsd/nfs4acl.c b/fs/nfsd/nfs4acl.c index 54b8b4140c8f..725d02f210e2 100644 --- a/fs/nfsd/nfs4acl.c +++ b/fs/nfsd/nfs4acl.c | |||
@@ -321,7 +321,7 @@ _posix_to_nfsv4_one(struct posix_acl *pacl, struct nfs4_acl *acl, | |||
321 | deny = ~pas.group & pas.other; | 321 | deny = ~pas.group & pas.other; |
322 | if (deny) { | 322 | if (deny) { |
323 | ace->type = NFS4_ACE_ACCESS_DENIED_ACE_TYPE; | 323 | ace->type = NFS4_ACE_ACCESS_DENIED_ACE_TYPE; |
324 | ace->flag = eflag | NFS4_ACE_IDENTIFIER_GROUP; | 324 | ace->flag = eflag; |
325 | ace->access_mask = deny_mask_from_posix(deny, flags); | 325 | ace->access_mask = deny_mask_from_posix(deny, flags); |
326 | ace->whotype = NFS4_ACL_WHO_GROUP; | 326 | ace->whotype = NFS4_ACL_WHO_GROUP; |
327 | ace++; | 327 | ace++; |
@@ -335,7 +335,7 @@ _posix_to_nfsv4_one(struct posix_acl *pacl, struct nfs4_acl *acl, | |||
335 | if (deny) { | 335 | if (deny) { |
336 | ace->type = NFS4_ACE_ACCESS_DENIED_ACE_TYPE; | 336 | ace->type = NFS4_ACE_ACCESS_DENIED_ACE_TYPE; |
337 | ace->flag = eflag | NFS4_ACE_IDENTIFIER_GROUP; | 337 | ace->flag = eflag | NFS4_ACE_IDENTIFIER_GROUP; |
338 | ace->access_mask = mask_from_posix(deny, flags); | 338 | ace->access_mask = deny_mask_from_posix(deny, flags); |
339 | ace->whotype = NFS4_ACL_WHO_NAMED; | 339 | ace->whotype = NFS4_ACL_WHO_NAMED; |
340 | ace->who = pa->e_id; | 340 | ace->who = pa->e_id; |
341 | ace++; | 341 | ace++; |
diff --git a/fs/nfsd/nfs4callback.c b/fs/nfsd/nfs4callback.c index 3fd23f7aceca..24e8d78f8dde 100644 --- a/fs/nfsd/nfs4callback.c +++ b/fs/nfsd/nfs4callback.c | |||
@@ -43,25 +43,30 @@ | |||
43 | #include <linux/sunrpc/xdr.h> | 43 | #include <linux/sunrpc/xdr.h> |
44 | #include <linux/sunrpc/svc.h> | 44 | #include <linux/sunrpc/svc.h> |
45 | #include <linux/sunrpc/clnt.h> | 45 | #include <linux/sunrpc/clnt.h> |
46 | #include <linux/sunrpc/svcsock.h> | ||
46 | #include <linux/nfsd/nfsd.h> | 47 | #include <linux/nfsd/nfsd.h> |
47 | #include <linux/nfsd/state.h> | 48 | #include <linux/nfsd/state.h> |
48 | #include <linux/sunrpc/sched.h> | 49 | #include <linux/sunrpc/sched.h> |
49 | #include <linux/nfs4.h> | 50 | #include <linux/nfs4.h> |
51 | #include <linux/sunrpc/xprtsock.h> | ||
50 | 52 | ||
51 | #define NFSDDBG_FACILITY NFSDDBG_PROC | 53 | #define NFSDDBG_FACILITY NFSDDBG_PROC |
52 | 54 | ||
53 | #define NFSPROC4_CB_NULL 0 | 55 | #define NFSPROC4_CB_NULL 0 |
54 | #define NFSPROC4_CB_COMPOUND 1 | 56 | #define NFSPROC4_CB_COMPOUND 1 |
57 | #define NFS4_STATEID_SIZE 16 | ||
55 | 58 | ||
56 | /* Index of predefined Linux callback client operations */ | 59 | /* Index of predefined Linux callback client operations */ |
57 | 60 | ||
58 | enum { | 61 | enum { |
59 | NFSPROC4_CLNT_CB_NULL = 0, | 62 | NFSPROC4_CLNT_CB_NULL = 0, |
60 | NFSPROC4_CLNT_CB_RECALL, | 63 | NFSPROC4_CLNT_CB_RECALL, |
64 | NFSPROC4_CLNT_CB_SEQUENCE, | ||
61 | }; | 65 | }; |
62 | 66 | ||
63 | enum nfs_cb_opnum4 { | 67 | enum nfs_cb_opnum4 { |
64 | OP_CB_RECALL = 4, | 68 | OP_CB_RECALL = 4, |
69 | OP_CB_SEQUENCE = 11, | ||
65 | }; | 70 | }; |
66 | 71 | ||
67 | #define NFS4_MAXTAGLEN 20 | 72 | #define NFS4_MAXTAGLEN 20 |
@@ -70,17 +75,29 @@ enum nfs_cb_opnum4 { | |||
70 | #define NFS4_dec_cb_null_sz 0 | 75 | #define NFS4_dec_cb_null_sz 0 |
71 | #define cb_compound_enc_hdr_sz 4 | 76 | #define cb_compound_enc_hdr_sz 4 |
72 | #define cb_compound_dec_hdr_sz (3 + (NFS4_MAXTAGLEN >> 2)) | 77 | #define cb_compound_dec_hdr_sz (3 + (NFS4_MAXTAGLEN >> 2)) |
78 | #define sessionid_sz (NFS4_MAX_SESSIONID_LEN >> 2) | ||
79 | #define cb_sequence_enc_sz (sessionid_sz + 4 + \ | ||
80 | 1 /* no referring calls list yet */) | ||
81 | #define cb_sequence_dec_sz (op_dec_sz + sessionid_sz + 4) | ||
82 | |||
73 | #define op_enc_sz 1 | 83 | #define op_enc_sz 1 |
74 | #define op_dec_sz 2 | 84 | #define op_dec_sz 2 |
75 | #define enc_nfs4_fh_sz (1 + (NFS4_FHSIZE >> 2)) | 85 | #define enc_nfs4_fh_sz (1 + (NFS4_FHSIZE >> 2)) |
76 | #define enc_stateid_sz (NFS4_STATEID_SIZE >> 2) | 86 | #define enc_stateid_sz (NFS4_STATEID_SIZE >> 2) |
77 | #define NFS4_enc_cb_recall_sz (cb_compound_enc_hdr_sz + \ | 87 | #define NFS4_enc_cb_recall_sz (cb_compound_enc_hdr_sz + \ |
88 | cb_sequence_enc_sz + \ | ||
78 | 1 + enc_stateid_sz + \ | 89 | 1 + enc_stateid_sz + \ |
79 | enc_nfs4_fh_sz) | 90 | enc_nfs4_fh_sz) |
80 | 91 | ||
81 | #define NFS4_dec_cb_recall_sz (cb_compound_dec_hdr_sz + \ | 92 | #define NFS4_dec_cb_recall_sz (cb_compound_dec_hdr_sz + \ |
93 | cb_sequence_dec_sz + \ | ||
82 | op_dec_sz) | 94 | op_dec_sz) |
83 | 95 | ||
96 | struct nfs4_rpc_args { | ||
97 | void *args_op; | ||
98 | struct nfsd4_cb_sequence args_seq; | ||
99 | }; | ||
100 | |||
84 | /* | 101 | /* |
85 | * Generic encode routines from fs/nfs/nfs4xdr.c | 102 | * Generic encode routines from fs/nfs/nfs4xdr.c |
86 | */ | 103 | */ |
@@ -137,11 +154,13 @@ xdr_error: \ | |||
137 | } while (0) | 154 | } while (0) |
138 | 155 | ||
139 | struct nfs4_cb_compound_hdr { | 156 | struct nfs4_cb_compound_hdr { |
140 | int status; | 157 | /* args */ |
141 | u32 ident; | 158 | u32 ident; /* minorversion 0 only */ |
142 | u32 nops; | 159 | u32 nops; |
143 | __be32 *nops_p; | 160 | __be32 *nops_p; |
144 | u32 minorversion; | 161 | u32 minorversion; |
162 | /* res */ | ||
163 | int status; | ||
145 | u32 taglen; | 164 | u32 taglen; |
146 | char *tag; | 165 | char *tag; |
147 | }; | 166 | }; |
@@ -238,6 +257,27 @@ encode_cb_recall(struct xdr_stream *xdr, struct nfs4_delegation *dp, | |||
238 | hdr->nops++; | 257 | hdr->nops++; |
239 | } | 258 | } |
240 | 259 | ||
260 | static void | ||
261 | encode_cb_sequence(struct xdr_stream *xdr, struct nfsd4_cb_sequence *args, | ||
262 | struct nfs4_cb_compound_hdr *hdr) | ||
263 | { | ||
264 | __be32 *p; | ||
265 | |||
266 | if (hdr->minorversion == 0) | ||
267 | return; | ||
268 | |||
269 | RESERVE_SPACE(1 + NFS4_MAX_SESSIONID_LEN + 20); | ||
270 | |||
271 | WRITE32(OP_CB_SEQUENCE); | ||
272 | WRITEMEM(args->cbs_clp->cl_sessionid.data, NFS4_MAX_SESSIONID_LEN); | ||
273 | WRITE32(args->cbs_clp->cl_cb_seq_nr); | ||
274 | WRITE32(0); /* slotid, always 0 */ | ||
275 | WRITE32(0); /* highest slotid always 0 */ | ||
276 | WRITE32(0); /* cachethis always 0 */ | ||
277 | WRITE32(0); /* FIXME: support referring_call_lists */ | ||
278 | hdr->nops++; | ||
279 | } | ||
280 | |||
241 | static int | 281 | static int |
242 | nfs4_xdr_enc_cb_null(struct rpc_rqst *req, __be32 *p) | 282 | nfs4_xdr_enc_cb_null(struct rpc_rqst *req, __be32 *p) |
243 | { | 283 | { |
@@ -249,15 +289,19 @@ nfs4_xdr_enc_cb_null(struct rpc_rqst *req, __be32 *p) | |||
249 | } | 289 | } |
250 | 290 | ||
251 | static int | 291 | static int |
252 | nfs4_xdr_enc_cb_recall(struct rpc_rqst *req, __be32 *p, struct nfs4_delegation *args) | 292 | nfs4_xdr_enc_cb_recall(struct rpc_rqst *req, __be32 *p, |
293 | struct nfs4_rpc_args *rpc_args) | ||
253 | { | 294 | { |
254 | struct xdr_stream xdr; | 295 | struct xdr_stream xdr; |
296 | struct nfs4_delegation *args = rpc_args->args_op; | ||
255 | struct nfs4_cb_compound_hdr hdr = { | 297 | struct nfs4_cb_compound_hdr hdr = { |
256 | .ident = args->dl_ident, | 298 | .ident = args->dl_ident, |
299 | .minorversion = rpc_args->args_seq.cbs_minorversion, | ||
257 | }; | 300 | }; |
258 | 301 | ||
259 | xdr_init_encode(&xdr, &req->rq_snd_buf, p); | 302 | xdr_init_encode(&xdr, &req->rq_snd_buf, p); |
260 | encode_cb_compound_hdr(&xdr, &hdr); | 303 | encode_cb_compound_hdr(&xdr, &hdr); |
304 | encode_cb_sequence(&xdr, &rpc_args->args_seq, &hdr); | ||
261 | encode_cb_recall(&xdr, args, &hdr); | 305 | encode_cb_recall(&xdr, args, &hdr); |
262 | encode_cb_nops(&hdr); | 306 | encode_cb_nops(&hdr); |
263 | return 0; | 307 | return 0; |
@@ -299,6 +343,57 @@ decode_cb_op_hdr(struct xdr_stream *xdr, enum nfs_opnum4 expected) | |||
299 | return 0; | 343 | return 0; |
300 | } | 344 | } |
301 | 345 | ||
346 | /* | ||
347 | * Our current back channel implmentation supports a single backchannel | ||
348 | * with a single slot. | ||
349 | */ | ||
350 | static int | ||
351 | decode_cb_sequence(struct xdr_stream *xdr, struct nfsd4_cb_sequence *res, | ||
352 | struct rpc_rqst *rqstp) | ||
353 | { | ||
354 | struct nfs4_sessionid id; | ||
355 | int status; | ||
356 | u32 dummy; | ||
357 | __be32 *p; | ||
358 | |||
359 | if (res->cbs_minorversion == 0) | ||
360 | return 0; | ||
361 | |||
362 | status = decode_cb_op_hdr(xdr, OP_CB_SEQUENCE); | ||
363 | if (status) | ||
364 | return status; | ||
365 | |||
366 | /* | ||
367 | * If the server returns different values for sessionID, slotID or | ||
368 | * sequence number, the server is looney tunes. | ||
369 | */ | ||
370 | status = -ESERVERFAULT; | ||
371 | |||
372 | READ_BUF(NFS4_MAX_SESSIONID_LEN + 16); | ||
373 | memcpy(id.data, p, NFS4_MAX_SESSIONID_LEN); | ||
374 | p += XDR_QUADLEN(NFS4_MAX_SESSIONID_LEN); | ||
375 | if (memcmp(id.data, res->cbs_clp->cl_sessionid.data, | ||
376 | NFS4_MAX_SESSIONID_LEN)) { | ||
377 | dprintk("%s Invalid session id\n", __func__); | ||
378 | goto out; | ||
379 | } | ||
380 | READ32(dummy); | ||
381 | if (dummy != res->cbs_clp->cl_cb_seq_nr) { | ||
382 | dprintk("%s Invalid sequence number\n", __func__); | ||
383 | goto out; | ||
384 | } | ||
385 | READ32(dummy); /* slotid must be 0 */ | ||
386 | if (dummy != 0) { | ||
387 | dprintk("%s Invalid slotid\n", __func__); | ||
388 | goto out; | ||
389 | } | ||
390 | /* FIXME: process highest slotid and target highest slotid */ | ||
391 | status = 0; | ||
392 | out: | ||
393 | return status; | ||
394 | } | ||
395 | |||
396 | |||
302 | static int | 397 | static int |
303 | nfs4_xdr_dec_cb_null(struct rpc_rqst *req, __be32 *p) | 398 | nfs4_xdr_dec_cb_null(struct rpc_rqst *req, __be32 *p) |
304 | { | 399 | { |
@@ -306,7 +401,8 @@ nfs4_xdr_dec_cb_null(struct rpc_rqst *req, __be32 *p) | |||
306 | } | 401 | } |
307 | 402 | ||
308 | static int | 403 | static int |
309 | nfs4_xdr_dec_cb_recall(struct rpc_rqst *rqstp, __be32 *p) | 404 | nfs4_xdr_dec_cb_recall(struct rpc_rqst *rqstp, __be32 *p, |
405 | struct nfsd4_cb_sequence *seq) | ||
310 | { | 406 | { |
311 | struct xdr_stream xdr; | 407 | struct xdr_stream xdr; |
312 | struct nfs4_cb_compound_hdr hdr; | 408 | struct nfs4_cb_compound_hdr hdr; |
@@ -316,6 +412,11 @@ nfs4_xdr_dec_cb_recall(struct rpc_rqst *rqstp, __be32 *p) | |||
316 | status = decode_cb_compound_hdr(&xdr, &hdr); | 412 | status = decode_cb_compound_hdr(&xdr, &hdr); |
317 | if (status) | 413 | if (status) |
318 | goto out; | 414 | goto out; |
415 | if (seq) { | ||
416 | status = decode_cb_sequence(&xdr, seq, rqstp); | ||
417 | if (status) | ||
418 | goto out; | ||
419 | } | ||
319 | status = decode_cb_op_hdr(&xdr, OP_CB_RECALL); | 420 | status = decode_cb_op_hdr(&xdr, OP_CB_RECALL); |
320 | out: | 421 | out: |
321 | return status; | 422 | return status; |
@@ -377,16 +478,15 @@ static int max_cb_time(void) | |||
377 | 478 | ||
378 | int setup_callback_client(struct nfs4_client *clp) | 479 | int setup_callback_client(struct nfs4_client *clp) |
379 | { | 480 | { |
380 | struct sockaddr_in addr; | ||
381 | struct nfs4_cb_conn *cb = &clp->cl_cb_conn; | 481 | struct nfs4_cb_conn *cb = &clp->cl_cb_conn; |
382 | struct rpc_timeout timeparms = { | 482 | struct rpc_timeout timeparms = { |
383 | .to_initval = max_cb_time(), | 483 | .to_initval = max_cb_time(), |
384 | .to_retries = 0, | 484 | .to_retries = 0, |
385 | }; | 485 | }; |
386 | struct rpc_create_args args = { | 486 | struct rpc_create_args args = { |
387 | .protocol = IPPROTO_TCP, | 487 | .protocol = XPRT_TRANSPORT_TCP, |
388 | .address = (struct sockaddr *)&addr, | 488 | .address = (struct sockaddr *) &cb->cb_addr, |
389 | .addrsize = sizeof(addr), | 489 | .addrsize = cb->cb_addrlen, |
390 | .timeout = &timeparms, | 490 | .timeout = &timeparms, |
391 | .program = &cb_program, | 491 | .program = &cb_program, |
392 | .prognumber = cb->cb_prog, | 492 | .prognumber = cb->cb_prog, |
@@ -399,13 +499,10 @@ int setup_callback_client(struct nfs4_client *clp) | |||
399 | 499 | ||
400 | if (!clp->cl_principal && (clp->cl_flavor >= RPC_AUTH_GSS_KRB5)) | 500 | if (!clp->cl_principal && (clp->cl_flavor >= RPC_AUTH_GSS_KRB5)) |
401 | return -EINVAL; | 501 | return -EINVAL; |
402 | 502 | if (cb->cb_minorversion) { | |
403 | /* Initialize address */ | 503 | args.bc_xprt = clp->cl_cb_xprt; |
404 | memset(&addr, 0, sizeof(addr)); | 504 | args.protocol = XPRT_TRANSPORT_BC_TCP; |
405 | addr.sin_family = AF_INET; | 505 | } |
406 | addr.sin_port = htons(cb->cb_port); | ||
407 | addr.sin_addr.s_addr = htonl(cb->cb_addr); | ||
408 | |||
409 | /* Create RPC client */ | 506 | /* Create RPC client */ |
410 | client = rpc_create(&args); | 507 | client = rpc_create(&args); |
411 | if (IS_ERR(client)) { | 508 | if (IS_ERR(client)) { |
@@ -439,42 +536,29 @@ static const struct rpc_call_ops nfsd4_cb_probe_ops = { | |||
439 | .rpc_call_done = nfsd4_cb_probe_done, | 536 | .rpc_call_done = nfsd4_cb_probe_done, |
440 | }; | 537 | }; |
441 | 538 | ||
442 | static struct rpc_cred *lookup_cb_cred(struct nfs4_cb_conn *cb) | 539 | static struct rpc_cred *callback_cred; |
443 | { | ||
444 | struct auth_cred acred = { | ||
445 | .machine_cred = 1 | ||
446 | }; | ||
447 | 540 | ||
448 | /* | 541 | int set_callback_cred(void) |
449 | * Note in the gss case this doesn't actually have to wait for a | 542 | { |
450 | * gss upcall (or any calls to the client); this just creates a | 543 | callback_cred = rpc_lookup_machine_cred(); |
451 | * non-uptodate cred which the rpc state machine will fill in with | 544 | if (!callback_cred) |
452 | * a refresh_upcall later. | 545 | return -ENOMEM; |
453 | */ | 546 | return 0; |
454 | return rpcauth_lookup_credcache(cb->cb_client->cl_auth, &acred, | ||
455 | RPCAUTH_LOOKUP_NEW); | ||
456 | } | 547 | } |
457 | 548 | ||
549 | |||
458 | void do_probe_callback(struct nfs4_client *clp) | 550 | void do_probe_callback(struct nfs4_client *clp) |
459 | { | 551 | { |
460 | struct nfs4_cb_conn *cb = &clp->cl_cb_conn; | 552 | struct nfs4_cb_conn *cb = &clp->cl_cb_conn; |
461 | struct rpc_message msg = { | 553 | struct rpc_message msg = { |
462 | .rpc_proc = &nfs4_cb_procedures[NFSPROC4_CLNT_CB_NULL], | 554 | .rpc_proc = &nfs4_cb_procedures[NFSPROC4_CLNT_CB_NULL], |
463 | .rpc_argp = clp, | 555 | .rpc_argp = clp, |
556 | .rpc_cred = callback_cred | ||
464 | }; | 557 | }; |
465 | struct rpc_cred *cred; | ||
466 | int status; | 558 | int status; |
467 | 559 | ||
468 | cred = lookup_cb_cred(cb); | ||
469 | if (IS_ERR(cred)) { | ||
470 | status = PTR_ERR(cred); | ||
471 | goto out; | ||
472 | } | ||
473 | cb->cb_cred = cred; | ||
474 | msg.rpc_cred = cb->cb_cred; | ||
475 | status = rpc_call_async(cb->cb_client, &msg, RPC_TASK_SOFT, | 560 | status = rpc_call_async(cb->cb_client, &msg, RPC_TASK_SOFT, |
476 | &nfsd4_cb_probe_ops, (void *)clp); | 561 | &nfsd4_cb_probe_ops, (void *)clp); |
477 | out: | ||
478 | if (status) { | 562 | if (status) { |
479 | warn_no_callback_path(clp, status); | 563 | warn_no_callback_path(clp, status); |
480 | put_nfs4_client(clp); | 564 | put_nfs4_client(clp); |
@@ -503,11 +587,95 @@ nfsd4_probe_callback(struct nfs4_client *clp) | |||
503 | do_probe_callback(clp); | 587 | do_probe_callback(clp); |
504 | } | 588 | } |
505 | 589 | ||
590 | /* | ||
591 | * There's currently a single callback channel slot. | ||
592 | * If the slot is available, then mark it busy. Otherwise, set the | ||
593 | * thread for sleeping on the callback RPC wait queue. | ||
594 | */ | ||
595 | static int nfsd41_cb_setup_sequence(struct nfs4_client *clp, | ||
596 | struct rpc_task *task) | ||
597 | { | ||
598 | struct nfs4_rpc_args *args = task->tk_msg.rpc_argp; | ||
599 | u32 *ptr = (u32 *)clp->cl_sessionid.data; | ||
600 | int status = 0; | ||
601 | |||
602 | dprintk("%s: %u:%u:%u:%u\n", __func__, | ||
603 | ptr[0], ptr[1], ptr[2], ptr[3]); | ||
604 | |||
605 | if (test_and_set_bit(0, &clp->cl_cb_slot_busy) != 0) { | ||
606 | rpc_sleep_on(&clp->cl_cb_waitq, task, NULL); | ||
607 | dprintk("%s slot is busy\n", __func__); | ||
608 | status = -EAGAIN; | ||
609 | goto out; | ||
610 | } | ||
611 | |||
612 | /* | ||
613 | * We'll need the clp during XDR encoding and decoding, | ||
614 | * and the sequence during decoding to verify the reply | ||
615 | */ | ||
616 | args->args_seq.cbs_clp = clp; | ||
617 | task->tk_msg.rpc_resp = &args->args_seq; | ||
618 | |||
619 | out: | ||
620 | dprintk("%s status=%d\n", __func__, status); | ||
621 | return status; | ||
622 | } | ||
623 | |||
624 | /* | ||
625 | * TODO: cb_sequence should support referring call lists, cachethis, multiple | ||
626 | * slots, and mark callback channel down on communication errors. | ||
627 | */ | ||
628 | static void nfsd4_cb_prepare(struct rpc_task *task, void *calldata) | ||
629 | { | ||
630 | struct nfs4_delegation *dp = calldata; | ||
631 | struct nfs4_client *clp = dp->dl_client; | ||
632 | struct nfs4_rpc_args *args = task->tk_msg.rpc_argp; | ||
633 | u32 minorversion = clp->cl_cb_conn.cb_minorversion; | ||
634 | int status = 0; | ||
635 | |||
636 | args->args_seq.cbs_minorversion = minorversion; | ||
637 | if (minorversion) { | ||
638 | status = nfsd41_cb_setup_sequence(clp, task); | ||
639 | if (status) { | ||
640 | if (status != -EAGAIN) { | ||
641 | /* terminate rpc task */ | ||
642 | task->tk_status = status; | ||
643 | task->tk_action = NULL; | ||
644 | } | ||
645 | return; | ||
646 | } | ||
647 | } | ||
648 | rpc_call_start(task); | ||
649 | } | ||
650 | |||
651 | static void nfsd4_cb_done(struct rpc_task *task, void *calldata) | ||
652 | { | ||
653 | struct nfs4_delegation *dp = calldata; | ||
654 | struct nfs4_client *clp = dp->dl_client; | ||
655 | |||
656 | dprintk("%s: minorversion=%d\n", __func__, | ||
657 | clp->cl_cb_conn.cb_minorversion); | ||
658 | |||
659 | if (clp->cl_cb_conn.cb_minorversion) { | ||
660 | /* No need for lock, access serialized in nfsd4_cb_prepare */ | ||
661 | ++clp->cl_cb_seq_nr; | ||
662 | clear_bit(0, &clp->cl_cb_slot_busy); | ||
663 | rpc_wake_up_next(&clp->cl_cb_waitq); | ||
664 | dprintk("%s: freed slot, new seqid=%d\n", __func__, | ||
665 | clp->cl_cb_seq_nr); | ||
666 | |||
667 | /* We're done looking into the sequence information */ | ||
668 | task->tk_msg.rpc_resp = NULL; | ||
669 | } | ||
670 | } | ||
671 | |||
506 | static void nfsd4_cb_recall_done(struct rpc_task *task, void *calldata) | 672 | static void nfsd4_cb_recall_done(struct rpc_task *task, void *calldata) |
507 | { | 673 | { |
508 | struct nfs4_delegation *dp = calldata; | 674 | struct nfs4_delegation *dp = calldata; |
509 | struct nfs4_client *clp = dp->dl_client; | 675 | struct nfs4_client *clp = dp->dl_client; |
510 | 676 | ||
677 | nfsd4_cb_done(task, calldata); | ||
678 | |||
511 | switch (task->tk_status) { | 679 | switch (task->tk_status) { |
512 | case -EIO: | 680 | case -EIO: |
513 | /* Network partition? */ | 681 | /* Network partition? */ |
@@ -520,16 +688,19 @@ static void nfsd4_cb_recall_done(struct rpc_task *task, void *calldata) | |||
520 | break; | 688 | break; |
521 | default: | 689 | default: |
522 | /* success, or error we can't handle */ | 690 | /* success, or error we can't handle */ |
523 | return; | 691 | goto done; |
524 | } | 692 | } |
525 | if (dp->dl_retries--) { | 693 | if (dp->dl_retries--) { |
526 | rpc_delay(task, 2*HZ); | 694 | rpc_delay(task, 2*HZ); |
527 | task->tk_status = 0; | 695 | task->tk_status = 0; |
528 | rpc_restart_call(task); | 696 | rpc_restart_call(task); |
697 | return; | ||
529 | } else { | 698 | } else { |
530 | atomic_set(&clp->cl_cb_conn.cb_set, 0); | 699 | atomic_set(&clp->cl_cb_conn.cb_set, 0); |
531 | warn_no_callback_path(clp, task->tk_status); | 700 | warn_no_callback_path(clp, task->tk_status); |
532 | } | 701 | } |
702 | done: | ||
703 | kfree(task->tk_msg.rpc_argp); | ||
533 | } | 704 | } |
534 | 705 | ||
535 | static void nfsd4_cb_recall_release(void *calldata) | 706 | static void nfsd4_cb_recall_release(void *calldata) |
@@ -542,6 +713,7 @@ static void nfsd4_cb_recall_release(void *calldata) | |||
542 | } | 713 | } |
543 | 714 | ||
544 | static const struct rpc_call_ops nfsd4_cb_recall_ops = { | 715 | static const struct rpc_call_ops nfsd4_cb_recall_ops = { |
716 | .rpc_call_prepare = nfsd4_cb_prepare, | ||
545 | .rpc_call_done = nfsd4_cb_recall_done, | 717 | .rpc_call_done = nfsd4_cb_recall_done, |
546 | .rpc_release = nfsd4_cb_recall_release, | 718 | .rpc_release = nfsd4_cb_recall_release, |
547 | }; | 719 | }; |
@@ -554,17 +726,24 @@ nfsd4_cb_recall(struct nfs4_delegation *dp) | |||
554 | { | 726 | { |
555 | struct nfs4_client *clp = dp->dl_client; | 727 | struct nfs4_client *clp = dp->dl_client; |
556 | struct rpc_clnt *clnt = clp->cl_cb_conn.cb_client; | 728 | struct rpc_clnt *clnt = clp->cl_cb_conn.cb_client; |
729 | struct nfs4_rpc_args *args; | ||
557 | struct rpc_message msg = { | 730 | struct rpc_message msg = { |
558 | .rpc_proc = &nfs4_cb_procedures[NFSPROC4_CLNT_CB_RECALL], | 731 | .rpc_proc = &nfs4_cb_procedures[NFSPROC4_CLNT_CB_RECALL], |
559 | .rpc_argp = dp, | 732 | .rpc_cred = callback_cred |
560 | .rpc_cred = clp->cl_cb_conn.cb_cred | ||
561 | }; | 733 | }; |
562 | int status; | 734 | int status = -ENOMEM; |
563 | 735 | ||
736 | args = kzalloc(sizeof(*args), GFP_KERNEL); | ||
737 | if (!args) | ||
738 | goto out; | ||
739 | args->args_op = dp; | ||
740 | msg.rpc_argp = args; | ||
564 | dp->dl_retries = 1; | 741 | dp->dl_retries = 1; |
565 | status = rpc_call_async(clnt, &msg, RPC_TASK_SOFT, | 742 | status = rpc_call_async(clnt, &msg, RPC_TASK_SOFT, |
566 | &nfsd4_cb_recall_ops, dp); | 743 | &nfsd4_cb_recall_ops, dp); |
744 | out: | ||
567 | if (status) { | 745 | if (status) { |
746 | kfree(args); | ||
568 | put_nfs4_client(clp); | 747 | put_nfs4_client(clp); |
569 | nfs4_put_delegation(dp); | 748 | nfs4_put_delegation(dp); |
570 | } | 749 | } |
diff --git a/fs/nfsd/nfs4idmap.c b/fs/nfsd/nfs4idmap.c index cdfa86fa1471..ba2c199592fd 100644 --- a/fs/nfsd/nfs4idmap.c +++ b/fs/nfsd/nfs4idmap.c | |||
@@ -38,7 +38,6 @@ | |||
38 | #include <linux/init.h> | 38 | #include <linux/init.h> |
39 | 39 | ||
40 | #include <linux/mm.h> | 40 | #include <linux/mm.h> |
41 | #include <linux/utsname.h> | ||
42 | #include <linux/errno.h> | 41 | #include <linux/errno.h> |
43 | #include <linux/string.h> | 42 | #include <linux/string.h> |
44 | #include <linux/sunrpc/clnt.h> | 43 | #include <linux/sunrpc/clnt.h> |
diff --git a/fs/nfsd/nfs4proc.c b/fs/nfsd/nfs4proc.c index 7c8801769a3c..bebc0c2e1b0a 100644 --- a/fs/nfsd/nfs4proc.c +++ b/fs/nfsd/nfs4proc.c | |||
@@ -68,7 +68,6 @@ check_attr_support(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate, | |||
68 | u32 *bmval, u32 *writable) | 68 | u32 *bmval, u32 *writable) |
69 | { | 69 | { |
70 | struct dentry *dentry = cstate->current_fh.fh_dentry; | 70 | struct dentry *dentry = cstate->current_fh.fh_dentry; |
71 | struct svc_export *exp = cstate->current_fh.fh_export; | ||
72 | 71 | ||
73 | /* | 72 | /* |
74 | * Check about attributes are supported by the NFSv4 server or not. | 73 | * Check about attributes are supported by the NFSv4 server or not. |
@@ -80,17 +79,13 @@ check_attr_support(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate, | |||
80 | return nfserr_attrnotsupp; | 79 | return nfserr_attrnotsupp; |
81 | 80 | ||
82 | /* | 81 | /* |
83 | * Check FATTR4_WORD0_ACL & FATTR4_WORD0_FS_LOCATIONS can be supported | 82 | * Check FATTR4_WORD0_ACL can be supported |
84 | * in current environment or not. | 83 | * in current environment or not. |
85 | */ | 84 | */ |
86 | if (bmval[0] & FATTR4_WORD0_ACL) { | 85 | if (bmval[0] & FATTR4_WORD0_ACL) { |
87 | if (!IS_POSIXACL(dentry->d_inode)) | 86 | if (!IS_POSIXACL(dentry->d_inode)) |
88 | return nfserr_attrnotsupp; | 87 | return nfserr_attrnotsupp; |
89 | } | 88 | } |
90 | if (bmval[0] & FATTR4_WORD0_FS_LOCATIONS) { | ||
91 | if (exp->ex_fslocs.locations == NULL) | ||
92 | return nfserr_attrnotsupp; | ||
93 | } | ||
94 | 89 | ||
95 | /* | 90 | /* |
96 | * According to spec, read-only attributes return ERR_INVAL. | 91 | * According to spec, read-only attributes return ERR_INVAL. |
@@ -123,6 +118,35 @@ nfsd4_check_open_attributes(struct svc_rqst *rqstp, | |||
123 | return status; | 118 | return status; |
124 | } | 119 | } |
125 | 120 | ||
121 | static int | ||
122 | is_create_with_attrs(struct nfsd4_open *open) | ||
123 | { | ||
124 | return open->op_create == NFS4_OPEN_CREATE | ||
125 | && (open->op_createmode == NFS4_CREATE_UNCHECKED | ||
126 | || open->op_createmode == NFS4_CREATE_GUARDED | ||
127 | || open->op_createmode == NFS4_CREATE_EXCLUSIVE4_1); | ||
128 | } | ||
129 | |||
130 | /* | ||
131 | * if error occurs when setting the acl, just clear the acl bit | ||
132 | * in the returned attr bitmap. | ||
133 | */ | ||
134 | static void | ||
135 | do_set_nfs4_acl(struct svc_rqst *rqstp, struct svc_fh *fhp, | ||
136 | struct nfs4_acl *acl, u32 *bmval) | ||
137 | { | ||
138 | __be32 status; | ||
139 | |||
140 | status = nfsd4_set_nfs4_acl(rqstp, fhp, acl); | ||
141 | if (status) | ||
142 | /* | ||
143 | * We should probably fail the whole open at this point, | ||
144 | * but we've already created the file, so it's too late; | ||
145 | * So this seems the least of evils: | ||
146 | */ | ||
147 | bmval[0] &= ~FATTR4_WORD0_ACL; | ||
148 | } | ||
149 | |||
126 | static inline void | 150 | static inline void |
127 | fh_dup2(struct svc_fh *dst, struct svc_fh *src) | 151 | fh_dup2(struct svc_fh *dst, struct svc_fh *src) |
128 | { | 152 | { |
@@ -206,6 +230,9 @@ do_open_lookup(struct svc_rqst *rqstp, struct svc_fh *current_fh, struct nfsd4_o | |||
206 | if (status) | 230 | if (status) |
207 | goto out; | 231 | goto out; |
208 | 232 | ||
233 | if (is_create_with_attrs(open) && open->op_acl != NULL) | ||
234 | do_set_nfs4_acl(rqstp, &resfh, open->op_acl, open->op_bmval); | ||
235 | |||
209 | set_change_info(&open->op_cinfo, current_fh); | 236 | set_change_info(&open->op_cinfo, current_fh); |
210 | fh_dup2(current_fh, &resfh); | 237 | fh_dup2(current_fh, &resfh); |
211 | 238 | ||
@@ -536,12 +563,17 @@ nfsd4_create(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate, | |||
536 | status = nfserr_badtype; | 563 | status = nfserr_badtype; |
537 | } | 564 | } |
538 | 565 | ||
539 | if (!status) { | 566 | if (status) |
540 | fh_unlock(&cstate->current_fh); | 567 | goto out; |
541 | set_change_info(&create->cr_cinfo, &cstate->current_fh); | 568 | |
542 | fh_dup2(&cstate->current_fh, &resfh); | 569 | if (create->cr_acl != NULL) |
543 | } | 570 | do_set_nfs4_acl(rqstp, &resfh, create->cr_acl, |
571 | create->cr_bmval); | ||
544 | 572 | ||
573 | fh_unlock(&cstate->current_fh); | ||
574 | set_change_info(&create->cr_cinfo, &cstate->current_fh); | ||
575 | fh_dup2(&cstate->current_fh, &resfh); | ||
576 | out: | ||
545 | fh_put(&resfh); | 577 | fh_put(&resfh); |
546 | return status; | 578 | return status; |
547 | } | 579 | } |
@@ -947,34 +979,6 @@ static struct nfsd4_operation nfsd4_ops[]; | |||
947 | static const char *nfsd4_op_name(unsigned opnum); | 979 | static const char *nfsd4_op_name(unsigned opnum); |
948 | 980 | ||
949 | /* | 981 | /* |
950 | * This is a replay of a compound for which no cache entry pages | ||
951 | * were used. Encode the sequence operation, and if cachethis is FALSE | ||
952 | * encode the uncache rep error on the next operation. | ||
953 | */ | ||
954 | static __be32 | ||
955 | nfsd4_enc_uncached_replay(struct nfsd4_compoundargs *args, | ||
956 | struct nfsd4_compoundres *resp) | ||
957 | { | ||
958 | struct nfsd4_op *op; | ||
959 | |||
960 | dprintk("--> %s resp->opcnt %d ce_cachethis %u \n", __func__, | ||
961 | resp->opcnt, resp->cstate.slot->sl_cache_entry.ce_cachethis); | ||
962 | |||
963 | /* Encode the replayed sequence operation */ | ||
964 | BUG_ON(resp->opcnt != 1); | ||
965 | op = &args->ops[resp->opcnt - 1]; | ||
966 | nfsd4_encode_operation(resp, op); | ||
967 | |||
968 | /*return nfserr_retry_uncached_rep in next operation. */ | ||
969 | if (resp->cstate.slot->sl_cache_entry.ce_cachethis == 0) { | ||
970 | op = &args->ops[resp->opcnt++]; | ||
971 | op->status = nfserr_retry_uncached_rep; | ||
972 | nfsd4_encode_operation(resp, op); | ||
973 | } | ||
974 | return op->status; | ||
975 | } | ||
976 | |||
977 | /* | ||
978 | * Enforce NFSv4.1 COMPOUND ordering rules. | 982 | * Enforce NFSv4.1 COMPOUND ordering rules. |
979 | * | 983 | * |
980 | * TODO: | 984 | * TODO: |
@@ -1083,13 +1087,10 @@ nfsd4_proc_compound(struct svc_rqst *rqstp, | |||
1083 | BUG_ON(op->status == nfs_ok); | 1087 | BUG_ON(op->status == nfs_ok); |
1084 | 1088 | ||
1085 | encode_op: | 1089 | encode_op: |
1086 | /* Only from SEQUENCE or CREATE_SESSION */ | 1090 | /* Only from SEQUENCE */ |
1087 | if (resp->cstate.status == nfserr_replay_cache) { | 1091 | if (resp->cstate.status == nfserr_replay_cache) { |
1088 | dprintk("%s NFS4.1 replay from cache\n", __func__); | 1092 | dprintk("%s NFS4.1 replay from cache\n", __func__); |
1089 | if (nfsd4_not_cached(resp)) | 1093 | status = op->status; |
1090 | status = nfsd4_enc_uncached_replay(args, resp); | ||
1091 | else | ||
1092 | status = op->status; | ||
1093 | goto out; | 1094 | goto out; |
1094 | } | 1095 | } |
1095 | if (op->status == nfserr_replay_me) { | 1096 | if (op->status == nfserr_replay_me) { |
diff --git a/fs/nfsd/nfs4state.c b/fs/nfsd/nfs4state.c index 980a216a48c8..2153f9bdbebd 100644 --- a/fs/nfsd/nfs4state.c +++ b/fs/nfsd/nfs4state.c | |||
@@ -55,6 +55,7 @@ | |||
55 | #include <linux/lockd/bind.h> | 55 | #include <linux/lockd/bind.h> |
56 | #include <linux/module.h> | 56 | #include <linux/module.h> |
57 | #include <linux/sunrpc/svcauth_gss.h> | 57 | #include <linux/sunrpc/svcauth_gss.h> |
58 | #include <linux/sunrpc/clnt.h> | ||
58 | 59 | ||
59 | #define NFSDDBG_FACILITY NFSDDBG_PROC | 60 | #define NFSDDBG_FACILITY NFSDDBG_PROC |
60 | 61 | ||
@@ -413,36 +414,65 @@ gen_sessionid(struct nfsd4_session *ses) | |||
413 | } | 414 | } |
414 | 415 | ||
415 | /* | 416 | /* |
416 | * Give the client the number of slots it requests bound by | 417 | * The protocol defines ca_maxresponssize_cached to include the size of |
417 | * NFSD_MAX_SLOTS_PER_SESSION and by sv_drc_max_pages. | 418 | * the rpc header, but all we need to cache is the data starting after |
419 | * the end of the initial SEQUENCE operation--the rest we regenerate | ||
420 | * each time. Therefore we can advertise a ca_maxresponssize_cached | ||
421 | * value that is the number of bytes in our cache plus a few additional | ||
422 | * bytes. In order to stay on the safe side, and not promise more than | ||
423 | * we can cache, those additional bytes must be the minimum possible: 24 | ||
424 | * bytes of rpc header (xid through accept state, with AUTH_NULL | ||
425 | * verifier), 12 for the compound header (with zero-length tag), and 44 | ||
426 | * for the SEQUENCE op response: | ||
427 | */ | ||
428 | #define NFSD_MIN_HDR_SEQ_SZ (24 + 12 + 44) | ||
429 | |||
430 | /* | ||
431 | * Give the client the number of ca_maxresponsesize_cached slots it | ||
432 | * requests, of size bounded by NFSD_SLOT_CACHE_SIZE, | ||
433 | * NFSD_MAX_MEM_PER_SESSION, and nfsd_drc_max_mem. Do not allow more | ||
434 | * than NFSD_MAX_SLOTS_PER_SESSION. | ||
418 | * | 435 | * |
419 | * If we run out of pages (sv_drc_pages_used == sv_drc_max_pages) we | 436 | * If we run out of reserved DRC memory we should (up to a point) |
420 | * should (up to a point) re-negotiate active sessions and reduce their | 437 | * re-negotiate active sessions and reduce their slot usage to make |
421 | * slot usage to make rooom for new connections. For now we just fail the | 438 | * rooom for new connections. For now we just fail the create session. |
422 | * create session. | ||
423 | */ | 439 | */ |
424 | static int set_forechannel_maxreqs(struct nfsd4_channel_attrs *fchan) | 440 | static int set_forechannel_drc_size(struct nfsd4_channel_attrs *fchan) |
425 | { | 441 | { |
426 | int status = 0, np = fchan->maxreqs * NFSD_PAGES_PER_SLOT; | 442 | int mem, size = fchan->maxresp_cached; |
427 | 443 | ||
428 | if (fchan->maxreqs < 1) | 444 | if (fchan->maxreqs < 1) |
429 | return nfserr_inval; | 445 | return nfserr_inval; |
430 | else if (fchan->maxreqs > NFSD_MAX_SLOTS_PER_SESSION) | ||
431 | fchan->maxreqs = NFSD_MAX_SLOTS_PER_SESSION; | ||
432 | 446 | ||
433 | spin_lock(&nfsd_serv->sv_lock); | 447 | if (size < NFSD_MIN_HDR_SEQ_SZ) |
434 | if (np + nfsd_serv->sv_drc_pages_used > nfsd_serv->sv_drc_max_pages) | 448 | size = NFSD_MIN_HDR_SEQ_SZ; |
435 | np = nfsd_serv->sv_drc_max_pages - nfsd_serv->sv_drc_pages_used; | 449 | size -= NFSD_MIN_HDR_SEQ_SZ; |
436 | nfsd_serv->sv_drc_pages_used += np; | 450 | if (size > NFSD_SLOT_CACHE_SIZE) |
437 | spin_unlock(&nfsd_serv->sv_lock); | 451 | size = NFSD_SLOT_CACHE_SIZE; |
452 | |||
453 | /* bound the maxreqs by NFSD_MAX_MEM_PER_SESSION */ | ||
454 | mem = fchan->maxreqs * size; | ||
455 | if (mem > NFSD_MAX_MEM_PER_SESSION) { | ||
456 | fchan->maxreqs = NFSD_MAX_MEM_PER_SESSION / size; | ||
457 | if (fchan->maxreqs > NFSD_MAX_SLOTS_PER_SESSION) | ||
458 | fchan->maxreqs = NFSD_MAX_SLOTS_PER_SESSION; | ||
459 | mem = fchan->maxreqs * size; | ||
460 | } | ||
438 | 461 | ||
439 | if (np <= 0) { | 462 | spin_lock(&nfsd_drc_lock); |
440 | status = nfserr_resource; | 463 | /* bound the total session drc memory ussage */ |
441 | fchan->maxreqs = 0; | 464 | if (mem + nfsd_drc_mem_used > nfsd_drc_max_mem) { |
442 | } else | 465 | fchan->maxreqs = (nfsd_drc_max_mem - nfsd_drc_mem_used) / size; |
443 | fchan->maxreqs = np / NFSD_PAGES_PER_SLOT; | 466 | mem = fchan->maxreqs * size; |
467 | } | ||
468 | nfsd_drc_mem_used += mem; | ||
469 | spin_unlock(&nfsd_drc_lock); | ||
444 | 470 | ||
445 | return status; | 471 | if (fchan->maxreqs == 0) |
472 | return nfserr_serverfault; | ||
473 | |||
474 | fchan->maxresp_cached = size + NFSD_MIN_HDR_SEQ_SZ; | ||
475 | return 0; | ||
446 | } | 476 | } |
447 | 477 | ||
448 | /* | 478 | /* |
@@ -466,36 +496,41 @@ static int init_forechannel_attrs(struct svc_rqst *rqstp, | |||
466 | fchan->maxresp_sz = maxcount; | 496 | fchan->maxresp_sz = maxcount; |
467 | session_fchan->maxresp_sz = fchan->maxresp_sz; | 497 | session_fchan->maxresp_sz = fchan->maxresp_sz; |
468 | 498 | ||
469 | /* Set the max response cached size our default which is | ||
470 | * a multiple of PAGE_SIZE and small */ | ||
471 | session_fchan->maxresp_cached = NFSD_PAGES_PER_SLOT * PAGE_SIZE; | ||
472 | fchan->maxresp_cached = session_fchan->maxresp_cached; | ||
473 | |||
474 | /* Use the client's maxops if possible */ | 499 | /* Use the client's maxops if possible */ |
475 | if (fchan->maxops > NFSD_MAX_OPS_PER_COMPOUND) | 500 | if (fchan->maxops > NFSD_MAX_OPS_PER_COMPOUND) |
476 | fchan->maxops = NFSD_MAX_OPS_PER_COMPOUND; | 501 | fchan->maxops = NFSD_MAX_OPS_PER_COMPOUND; |
477 | session_fchan->maxops = fchan->maxops; | 502 | session_fchan->maxops = fchan->maxops; |
478 | 503 | ||
479 | /* try to use the client requested number of slots */ | ||
480 | if (fchan->maxreqs > NFSD_MAX_SLOTS_PER_SESSION) | ||
481 | fchan->maxreqs = NFSD_MAX_SLOTS_PER_SESSION; | ||
482 | |||
483 | /* FIXME: Error means no more DRC pages so the server should | 504 | /* FIXME: Error means no more DRC pages so the server should |
484 | * recover pages from existing sessions. For now fail session | 505 | * recover pages from existing sessions. For now fail session |
485 | * creation. | 506 | * creation. |
486 | */ | 507 | */ |
487 | status = set_forechannel_maxreqs(fchan); | 508 | status = set_forechannel_drc_size(fchan); |
488 | 509 | ||
510 | session_fchan->maxresp_cached = fchan->maxresp_cached; | ||
489 | session_fchan->maxreqs = fchan->maxreqs; | 511 | session_fchan->maxreqs = fchan->maxreqs; |
512 | |||
513 | dprintk("%s status %d\n", __func__, status); | ||
490 | return status; | 514 | return status; |
491 | } | 515 | } |
492 | 516 | ||
517 | static void | ||
518 | free_session_slots(struct nfsd4_session *ses) | ||
519 | { | ||
520 | int i; | ||
521 | |||
522 | for (i = 0; i < ses->se_fchannel.maxreqs; i++) | ||
523 | kfree(ses->se_slots[i]); | ||
524 | } | ||
525 | |||
493 | static int | 526 | static int |
494 | alloc_init_session(struct svc_rqst *rqstp, struct nfs4_client *clp, | 527 | alloc_init_session(struct svc_rqst *rqstp, struct nfs4_client *clp, |
495 | struct nfsd4_create_session *cses) | 528 | struct nfsd4_create_session *cses) |
496 | { | 529 | { |
497 | struct nfsd4_session *new, tmp; | 530 | struct nfsd4_session *new, tmp; |
498 | int idx, status = nfserr_resource, slotsize; | 531 | struct nfsd4_slot *sp; |
532 | int idx, slotsize, cachesize, i; | ||
533 | int status; | ||
499 | 534 | ||
500 | memset(&tmp, 0, sizeof(tmp)); | 535 | memset(&tmp, 0, sizeof(tmp)); |
501 | 536 | ||
@@ -506,14 +541,27 @@ alloc_init_session(struct svc_rqst *rqstp, struct nfs4_client *clp, | |||
506 | if (status) | 541 | if (status) |
507 | goto out; | 542 | goto out; |
508 | 543 | ||
509 | /* allocate struct nfsd4_session and slot table in one piece */ | 544 | BUILD_BUG_ON(NFSD_MAX_SLOTS_PER_SESSION * sizeof(struct nfsd4_slot) |
510 | slotsize = tmp.se_fchannel.maxreqs * sizeof(struct nfsd4_slot); | 545 | + sizeof(struct nfsd4_session) > PAGE_SIZE); |
546 | |||
547 | status = nfserr_serverfault; | ||
548 | /* allocate struct nfsd4_session and slot table pointers in one piece */ | ||
549 | slotsize = tmp.se_fchannel.maxreqs * sizeof(struct nfsd4_slot *); | ||
511 | new = kzalloc(sizeof(*new) + slotsize, GFP_KERNEL); | 550 | new = kzalloc(sizeof(*new) + slotsize, GFP_KERNEL); |
512 | if (!new) | 551 | if (!new) |
513 | goto out; | 552 | goto out; |
514 | 553 | ||
515 | memcpy(new, &tmp, sizeof(*new)); | 554 | memcpy(new, &tmp, sizeof(*new)); |
516 | 555 | ||
556 | /* allocate each struct nfsd4_slot and data cache in one piece */ | ||
557 | cachesize = new->se_fchannel.maxresp_cached - NFSD_MIN_HDR_SEQ_SZ; | ||
558 | for (i = 0; i < new->se_fchannel.maxreqs; i++) { | ||
559 | sp = kzalloc(sizeof(*sp) + cachesize, GFP_KERNEL); | ||
560 | if (!sp) | ||
561 | goto out_free; | ||
562 | new->se_slots[i] = sp; | ||
563 | } | ||
564 | |||
517 | new->se_client = clp; | 565 | new->se_client = clp; |
518 | gen_sessionid(new); | 566 | gen_sessionid(new); |
519 | idx = hash_sessionid(&new->se_sessionid); | 567 | idx = hash_sessionid(&new->se_sessionid); |
@@ -530,6 +578,10 @@ alloc_init_session(struct svc_rqst *rqstp, struct nfs4_client *clp, | |||
530 | status = nfs_ok; | 578 | status = nfs_ok; |
531 | out: | 579 | out: |
532 | return status; | 580 | return status; |
581 | out_free: | ||
582 | free_session_slots(new); | ||
583 | kfree(new); | ||
584 | goto out; | ||
533 | } | 585 | } |
534 | 586 | ||
535 | /* caller must hold sessionid_lock */ | 587 | /* caller must hold sessionid_lock */ |
@@ -572,19 +624,16 @@ release_session(struct nfsd4_session *ses) | |||
572 | nfsd4_put_session(ses); | 624 | nfsd4_put_session(ses); |
573 | } | 625 | } |
574 | 626 | ||
575 | static void nfsd4_release_respages(struct page **respages, short resused); | ||
576 | |||
577 | void | 627 | void |
578 | free_session(struct kref *kref) | 628 | free_session(struct kref *kref) |
579 | { | 629 | { |
580 | struct nfsd4_session *ses; | 630 | struct nfsd4_session *ses; |
581 | int i; | ||
582 | 631 | ||
583 | ses = container_of(kref, struct nfsd4_session, se_ref); | 632 | ses = container_of(kref, struct nfsd4_session, se_ref); |
584 | for (i = 0; i < ses->se_fchannel.maxreqs; i++) { | 633 | spin_lock(&nfsd_drc_lock); |
585 | struct nfsd4_cache_entry *e = &ses->se_slots[i].sl_cache_entry; | 634 | nfsd_drc_mem_used -= ses->se_fchannel.maxreqs * NFSD_SLOT_CACHE_SIZE; |
586 | nfsd4_release_respages(e->ce_respages, e->ce_resused); | 635 | spin_unlock(&nfsd_drc_lock); |
587 | } | 636 | free_session_slots(ses); |
588 | kfree(ses); | 637 | kfree(ses); |
589 | } | 638 | } |
590 | 639 | ||
@@ -647,18 +696,14 @@ shutdown_callback_client(struct nfs4_client *clp) | |||
647 | clp->cl_cb_conn.cb_client = NULL; | 696 | clp->cl_cb_conn.cb_client = NULL; |
648 | rpc_shutdown_client(clnt); | 697 | rpc_shutdown_client(clnt); |
649 | } | 698 | } |
650 | if (clp->cl_cb_conn.cb_cred) { | ||
651 | put_rpccred(clp->cl_cb_conn.cb_cred); | ||
652 | clp->cl_cb_conn.cb_cred = NULL; | ||
653 | } | ||
654 | } | 699 | } |
655 | 700 | ||
656 | static inline void | 701 | static inline void |
657 | free_client(struct nfs4_client *clp) | 702 | free_client(struct nfs4_client *clp) |
658 | { | 703 | { |
659 | shutdown_callback_client(clp); | 704 | shutdown_callback_client(clp); |
660 | nfsd4_release_respages(clp->cl_slot.sl_cache_entry.ce_respages, | 705 | if (clp->cl_cb_xprt) |
661 | clp->cl_slot.sl_cache_entry.ce_resused); | 706 | svc_xprt_put(clp->cl_cb_xprt); |
662 | if (clp->cl_cred.cr_group_info) | 707 | if (clp->cl_cred.cr_group_info) |
663 | put_group_info(clp->cl_cred.cr_group_info); | 708 | put_group_info(clp->cl_cred.cr_group_info); |
664 | kfree(clp->cl_principal); | 709 | kfree(clp->cl_principal); |
@@ -714,25 +759,6 @@ expire_client(struct nfs4_client *clp) | |||
714 | put_nfs4_client(clp); | 759 | put_nfs4_client(clp); |
715 | } | 760 | } |
716 | 761 | ||
717 | static struct nfs4_client *create_client(struct xdr_netobj name, char *recdir) | ||
718 | { | ||
719 | struct nfs4_client *clp; | ||
720 | |||
721 | clp = alloc_client(name); | ||
722 | if (clp == NULL) | ||
723 | return NULL; | ||
724 | memcpy(clp->cl_recdir, recdir, HEXDIR_LEN); | ||
725 | atomic_set(&clp->cl_count, 1); | ||
726 | atomic_set(&clp->cl_cb_conn.cb_set, 0); | ||
727 | INIT_LIST_HEAD(&clp->cl_idhash); | ||
728 | INIT_LIST_HEAD(&clp->cl_strhash); | ||
729 | INIT_LIST_HEAD(&clp->cl_openowners); | ||
730 | INIT_LIST_HEAD(&clp->cl_delegations); | ||
731 | INIT_LIST_HEAD(&clp->cl_sessions); | ||
732 | INIT_LIST_HEAD(&clp->cl_lru); | ||
733 | return clp; | ||
734 | } | ||
735 | |||
736 | static void copy_verf(struct nfs4_client *target, nfs4_verifier *source) | 762 | static void copy_verf(struct nfs4_client *target, nfs4_verifier *source) |
737 | { | 763 | { |
738 | memcpy(target->cl_verifier.data, source->data, | 764 | memcpy(target->cl_verifier.data, source->data, |
@@ -795,6 +821,46 @@ static void gen_confirm(struct nfs4_client *clp) | |||
795 | *p++ = i++; | 821 | *p++ = i++; |
796 | } | 822 | } |
797 | 823 | ||
824 | static struct nfs4_client *create_client(struct xdr_netobj name, char *recdir, | ||
825 | struct svc_rqst *rqstp, nfs4_verifier *verf) | ||
826 | { | ||
827 | struct nfs4_client *clp; | ||
828 | struct sockaddr *sa = svc_addr(rqstp); | ||
829 | char *princ; | ||
830 | |||
831 | clp = alloc_client(name); | ||
832 | if (clp == NULL) | ||
833 | return NULL; | ||
834 | |||
835 | princ = svc_gss_principal(rqstp); | ||
836 | if (princ) { | ||
837 | clp->cl_principal = kstrdup(princ, GFP_KERNEL); | ||
838 | if (clp->cl_principal == NULL) { | ||
839 | free_client(clp); | ||
840 | return NULL; | ||
841 | } | ||
842 | } | ||
843 | |||
844 | memcpy(clp->cl_recdir, recdir, HEXDIR_LEN); | ||
845 | atomic_set(&clp->cl_count, 1); | ||
846 | atomic_set(&clp->cl_cb_conn.cb_set, 0); | ||
847 | INIT_LIST_HEAD(&clp->cl_idhash); | ||
848 | INIT_LIST_HEAD(&clp->cl_strhash); | ||
849 | INIT_LIST_HEAD(&clp->cl_openowners); | ||
850 | INIT_LIST_HEAD(&clp->cl_delegations); | ||
851 | INIT_LIST_HEAD(&clp->cl_sessions); | ||
852 | INIT_LIST_HEAD(&clp->cl_lru); | ||
853 | clear_bit(0, &clp->cl_cb_slot_busy); | ||
854 | rpc_init_wait_queue(&clp->cl_cb_waitq, "Backchannel slot table"); | ||
855 | copy_verf(clp, verf); | ||
856 | rpc_copy_addr((struct sockaddr *) &clp->cl_addr, sa); | ||
857 | clp->cl_flavor = rqstp->rq_flavor; | ||
858 | copy_cred(&clp->cl_cred, &rqstp->rq_cred); | ||
859 | gen_confirm(clp); | ||
860 | |||
861 | return clp; | ||
862 | } | ||
863 | |||
798 | static int check_name(struct xdr_netobj name) | 864 | static int check_name(struct xdr_netobj name) |
799 | { | 865 | { |
800 | if (name.len == 0) | 866 | if (name.len == 0) |
@@ -902,93 +968,40 @@ find_unconfirmed_client_by_str(const char *dname, unsigned int hashval, | |||
902 | return NULL; | 968 | return NULL; |
903 | } | 969 | } |
904 | 970 | ||
905 | /* a helper function for parse_callback */ | ||
906 | static int | ||
907 | parse_octet(unsigned int *lenp, char **addrp) | ||
908 | { | ||
909 | unsigned int len = *lenp; | ||
910 | char *p = *addrp; | ||
911 | int n = -1; | ||
912 | char c; | ||
913 | |||
914 | for (;;) { | ||
915 | if (!len) | ||
916 | break; | ||
917 | len--; | ||
918 | c = *p++; | ||
919 | if (c == '.') | ||
920 | break; | ||
921 | if ((c < '0') || (c > '9')) { | ||
922 | n = -1; | ||
923 | break; | ||
924 | } | ||
925 | if (n < 0) | ||
926 | n = 0; | ||
927 | n = (n * 10) + (c - '0'); | ||
928 | if (n > 255) { | ||
929 | n = -1; | ||
930 | break; | ||
931 | } | ||
932 | } | ||
933 | *lenp = len; | ||
934 | *addrp = p; | ||
935 | return n; | ||
936 | } | ||
937 | |||
938 | /* parse and set the setclientid ipv4 callback address */ | ||
939 | static int | ||
940 | parse_ipv4(unsigned int addr_len, char *addr_val, unsigned int *cbaddrp, unsigned short *cbportp) | ||
941 | { | ||
942 | int temp = 0; | ||
943 | u32 cbaddr = 0; | ||
944 | u16 cbport = 0; | ||
945 | u32 addrlen = addr_len; | ||
946 | char *addr = addr_val; | ||
947 | int i, shift; | ||
948 | |||
949 | /* ipaddress */ | ||
950 | shift = 24; | ||
951 | for(i = 4; i > 0 ; i--) { | ||
952 | if ((temp = parse_octet(&addrlen, &addr)) < 0) { | ||
953 | return 0; | ||
954 | } | ||
955 | cbaddr |= (temp << shift); | ||
956 | if (shift > 0) | ||
957 | shift -= 8; | ||
958 | } | ||
959 | *cbaddrp = cbaddr; | ||
960 | |||
961 | /* port */ | ||
962 | shift = 8; | ||
963 | for(i = 2; i > 0 ; i--) { | ||
964 | if ((temp = parse_octet(&addrlen, &addr)) < 0) { | ||
965 | return 0; | ||
966 | } | ||
967 | cbport |= (temp << shift); | ||
968 | if (shift > 0) | ||
969 | shift -= 8; | ||
970 | } | ||
971 | *cbportp = cbport; | ||
972 | return 1; | ||
973 | } | ||
974 | |||
975 | static void | 971 | static void |
976 | gen_callback(struct nfs4_client *clp, struct nfsd4_setclientid *se) | 972 | gen_callback(struct nfs4_client *clp, struct nfsd4_setclientid *se, u32 scopeid) |
977 | { | 973 | { |
978 | struct nfs4_cb_conn *cb = &clp->cl_cb_conn; | 974 | struct nfs4_cb_conn *cb = &clp->cl_cb_conn; |
979 | 975 | unsigned short expected_family; | |
980 | /* Currently, we only support tcp for the callback channel */ | 976 | |
981 | if ((se->se_callback_netid_len != 3) || memcmp((char *)se->se_callback_netid_val, "tcp", 3)) | 977 | /* Currently, we only support tcp and tcp6 for the callback channel */ |
978 | if (se->se_callback_netid_len == 3 && | ||
979 | !memcmp(se->se_callback_netid_val, "tcp", 3)) | ||
980 | expected_family = AF_INET; | ||
981 | else if (se->se_callback_netid_len == 4 && | ||
982 | !memcmp(se->se_callback_netid_val, "tcp6", 4)) | ||
983 | expected_family = AF_INET6; | ||
984 | else | ||
982 | goto out_err; | 985 | goto out_err; |
983 | 986 | ||
984 | if ( !(parse_ipv4(se->se_callback_addr_len, se->se_callback_addr_val, | 987 | cb->cb_addrlen = rpc_uaddr2sockaddr(se->se_callback_addr_val, |
985 | &cb->cb_addr, &cb->cb_port))) | 988 | se->se_callback_addr_len, |
989 | (struct sockaddr *) &cb->cb_addr, | ||
990 | sizeof(cb->cb_addr)); | ||
991 | |||
992 | if (!cb->cb_addrlen || cb->cb_addr.ss_family != expected_family) | ||
986 | goto out_err; | 993 | goto out_err; |
994 | |||
995 | if (cb->cb_addr.ss_family == AF_INET6) | ||
996 | ((struct sockaddr_in6 *) &cb->cb_addr)->sin6_scope_id = scopeid; | ||
997 | |||
987 | cb->cb_minorversion = 0; | 998 | cb->cb_minorversion = 0; |
988 | cb->cb_prog = se->se_callback_prog; | 999 | cb->cb_prog = se->se_callback_prog; |
989 | cb->cb_ident = se->se_callback_ident; | 1000 | cb->cb_ident = se->se_callback_ident; |
990 | return; | 1001 | return; |
991 | out_err: | 1002 | out_err: |
1003 | cb->cb_addr.ss_family = AF_UNSPEC; | ||
1004 | cb->cb_addrlen = 0; | ||
992 | dprintk(KERN_INFO "NFSD: this client (clientid %08x/%08x) " | 1005 | dprintk(KERN_INFO "NFSD: this client (clientid %08x/%08x) " |
993 | "will not receive delegations\n", | 1006 | "will not receive delegations\n", |
994 | clp->cl_clientid.cl_boot, clp->cl_clientid.cl_id); | 1007 | clp->cl_clientid.cl_boot, clp->cl_clientid.cl_id); |
@@ -996,175 +1009,87 @@ out_err: | |||
996 | return; | 1009 | return; |
997 | } | 1010 | } |
998 | 1011 | ||
999 | void | ||
1000 | nfsd4_set_statp(struct svc_rqst *rqstp, __be32 *statp) | ||
1001 | { | ||
1002 | struct nfsd4_compoundres *resp = rqstp->rq_resp; | ||
1003 | |||
1004 | resp->cstate.statp = statp; | ||
1005 | } | ||
1006 | |||
1007 | /* | 1012 | /* |
1008 | * Dereference the result pages. | 1013 | * Cache a reply. nfsd4_check_drc_limit() has bounded the cache size. |
1009 | */ | 1014 | */ |
1010 | static void | 1015 | void |
1011 | nfsd4_release_respages(struct page **respages, short resused) | 1016 | nfsd4_store_cache_entry(struct nfsd4_compoundres *resp) |
1012 | { | 1017 | { |
1013 | int i; | 1018 | struct nfsd4_slot *slot = resp->cstate.slot; |
1019 | unsigned int base; | ||
1014 | 1020 | ||
1015 | dprintk("--> %s\n", __func__); | 1021 | dprintk("--> %s slot %p\n", __func__, slot); |
1016 | for (i = 0; i < resused; i++) { | ||
1017 | if (!respages[i]) | ||
1018 | continue; | ||
1019 | put_page(respages[i]); | ||
1020 | respages[i] = NULL; | ||
1021 | } | ||
1022 | } | ||
1023 | 1022 | ||
1024 | static void | 1023 | slot->sl_opcnt = resp->opcnt; |
1025 | nfsd4_copy_pages(struct page **topages, struct page **frompages, short count) | 1024 | slot->sl_status = resp->cstate.status; |
1026 | { | ||
1027 | int i; | ||
1028 | 1025 | ||
1029 | for (i = 0; i < count; i++) { | 1026 | if (nfsd4_not_cached(resp)) { |
1030 | topages[i] = frompages[i]; | 1027 | slot->sl_datalen = 0; |
1031 | if (!topages[i]) | 1028 | return; |
1032 | continue; | ||
1033 | get_page(topages[i]); | ||
1034 | } | 1029 | } |
1030 | slot->sl_datalen = (char *)resp->p - (char *)resp->cstate.datap; | ||
1031 | base = (char *)resp->cstate.datap - | ||
1032 | (char *)resp->xbuf->head[0].iov_base; | ||
1033 | if (read_bytes_from_xdr_buf(resp->xbuf, base, slot->sl_data, | ||
1034 | slot->sl_datalen)) | ||
1035 | WARN("%s: sessions DRC could not cache compound\n", __func__); | ||
1036 | return; | ||
1035 | } | 1037 | } |
1036 | 1038 | ||
1037 | /* | 1039 | /* |
1038 | * Cache the reply pages up to NFSD_PAGES_PER_SLOT + 1, clearing the previous | 1040 | * Encode the replay sequence operation from the slot values. |
1039 | * pages. We add a page to NFSD_PAGES_PER_SLOT for the case where the total | 1041 | * If cachethis is FALSE encode the uncached rep error on the next |
1040 | * length of the XDR response is less than se_fmaxresp_cached | 1042 | * operation which sets resp->p and increments resp->opcnt for |
1041 | * (NFSD_PAGES_PER_SLOT * PAGE_SIZE) but the xdr_buf pages is used for a | 1043 | * nfs4svc_encode_compoundres. |
1042 | * of the reply (e.g. readdir). | ||
1043 | * | 1044 | * |
1044 | * Store the base and length of the rq_req.head[0] page | ||
1045 | * of the NFSv4.1 data, just past the rpc header. | ||
1046 | */ | 1045 | */ |
1047 | void | 1046 | static __be32 |
1048 | nfsd4_store_cache_entry(struct nfsd4_compoundres *resp) | 1047 | nfsd4_enc_sequence_replay(struct nfsd4_compoundargs *args, |
1048 | struct nfsd4_compoundres *resp) | ||
1049 | { | 1049 | { |
1050 | struct nfsd4_cache_entry *entry = &resp->cstate.slot->sl_cache_entry; | 1050 | struct nfsd4_op *op; |
1051 | struct svc_rqst *rqstp = resp->rqstp; | 1051 | struct nfsd4_slot *slot = resp->cstate.slot; |
1052 | struct nfsd4_compoundargs *args = rqstp->rq_argp; | ||
1053 | struct nfsd4_op *op = &args->ops[resp->opcnt]; | ||
1054 | struct kvec *resv = &rqstp->rq_res.head[0]; | ||
1055 | |||
1056 | dprintk("--> %s entry %p\n", __func__, entry); | ||
1057 | |||
1058 | /* Don't cache a failed OP_SEQUENCE. */ | ||
1059 | if (resp->opcnt == 1 && op->opnum == OP_SEQUENCE && resp->cstate.status) | ||
1060 | return; | ||
1061 | 1052 | ||
1062 | nfsd4_release_respages(entry->ce_respages, entry->ce_resused); | 1053 | dprintk("--> %s resp->opcnt %d cachethis %u \n", __func__, |
1063 | entry->ce_opcnt = resp->opcnt; | 1054 | resp->opcnt, resp->cstate.slot->sl_cachethis); |
1064 | entry->ce_status = resp->cstate.status; | ||
1065 | 1055 | ||
1066 | /* | 1056 | /* Encode the replayed sequence operation */ |
1067 | * Don't need a page to cache just the sequence operation - the slot | 1057 | op = &args->ops[resp->opcnt - 1]; |
1068 | * does this for us! | 1058 | nfsd4_encode_operation(resp, op); |
1069 | */ | ||
1070 | 1059 | ||
1071 | if (nfsd4_not_cached(resp)) { | 1060 | /* Return nfserr_retry_uncached_rep in next operation. */ |
1072 | entry->ce_resused = 0; | 1061 | if (args->opcnt > 1 && slot->sl_cachethis == 0) { |
1073 | entry->ce_rpchdrlen = 0; | 1062 | op = &args->ops[resp->opcnt++]; |
1074 | dprintk("%s Just cache SEQUENCE. ce_cachethis %d\n", __func__, | 1063 | op->status = nfserr_retry_uncached_rep; |
1075 | resp->cstate.slot->sl_cache_entry.ce_cachethis); | 1064 | nfsd4_encode_operation(resp, op); |
1076 | return; | ||
1077 | } | ||
1078 | entry->ce_resused = rqstp->rq_resused; | ||
1079 | if (entry->ce_resused > NFSD_PAGES_PER_SLOT + 1) | ||
1080 | entry->ce_resused = NFSD_PAGES_PER_SLOT + 1; | ||
1081 | nfsd4_copy_pages(entry->ce_respages, rqstp->rq_respages, | ||
1082 | entry->ce_resused); | ||
1083 | entry->ce_datav.iov_base = resp->cstate.statp; | ||
1084 | entry->ce_datav.iov_len = resv->iov_len - ((char *)resp->cstate.statp - | ||
1085 | (char *)page_address(rqstp->rq_respages[0])); | ||
1086 | /* Current request rpc header length*/ | ||
1087 | entry->ce_rpchdrlen = (char *)resp->cstate.statp - | ||
1088 | (char *)page_address(rqstp->rq_respages[0]); | ||
1089 | } | ||
1090 | |||
1091 | /* | ||
1092 | * We keep the rpc header, but take the nfs reply from the replycache. | ||
1093 | */ | ||
1094 | static int | ||
1095 | nfsd41_copy_replay_data(struct nfsd4_compoundres *resp, | ||
1096 | struct nfsd4_cache_entry *entry) | ||
1097 | { | ||
1098 | struct svc_rqst *rqstp = resp->rqstp; | ||
1099 | struct kvec *resv = &resp->rqstp->rq_res.head[0]; | ||
1100 | int len; | ||
1101 | |||
1102 | /* Current request rpc header length*/ | ||
1103 | len = (char *)resp->cstate.statp - | ||
1104 | (char *)page_address(rqstp->rq_respages[0]); | ||
1105 | if (entry->ce_datav.iov_len + len > PAGE_SIZE) { | ||
1106 | dprintk("%s v41 cached reply too large (%Zd).\n", __func__, | ||
1107 | entry->ce_datav.iov_len); | ||
1108 | return 0; | ||
1109 | } | 1065 | } |
1110 | /* copy the cached reply nfsd data past the current rpc header */ | 1066 | return op->status; |
1111 | memcpy((char *)resv->iov_base + len, entry->ce_datav.iov_base, | ||
1112 | entry->ce_datav.iov_len); | ||
1113 | resv->iov_len = len + entry->ce_datav.iov_len; | ||
1114 | return 1; | ||
1115 | } | 1067 | } |
1116 | 1068 | ||
1117 | /* | 1069 | /* |
1118 | * Keep the first page of the replay. Copy the NFSv4.1 data from the first | 1070 | * The sequence operation is not cached because we can use the slot and |
1119 | * cached page. Replace any futher replay pages from the cache. | 1071 | * session values. |
1120 | */ | 1072 | */ |
1121 | __be32 | 1073 | __be32 |
1122 | nfsd4_replay_cache_entry(struct nfsd4_compoundres *resp, | 1074 | nfsd4_replay_cache_entry(struct nfsd4_compoundres *resp, |
1123 | struct nfsd4_sequence *seq) | 1075 | struct nfsd4_sequence *seq) |
1124 | { | 1076 | { |
1125 | struct nfsd4_cache_entry *entry = &resp->cstate.slot->sl_cache_entry; | 1077 | struct nfsd4_slot *slot = resp->cstate.slot; |
1126 | __be32 status; | 1078 | __be32 status; |
1127 | 1079 | ||
1128 | dprintk("--> %s entry %p\n", __func__, entry); | 1080 | dprintk("--> %s slot %p\n", __func__, slot); |
1129 | |||
1130 | /* | ||
1131 | * If this is just the sequence operation, we did not keep | ||
1132 | * a page in the cache entry because we can just use the | ||
1133 | * slot info stored in struct nfsd4_sequence that was checked | ||
1134 | * against the slot in nfsd4_sequence(). | ||
1135 | * | ||
1136 | * This occurs when seq->cachethis is FALSE, or when the client | ||
1137 | * session inactivity timer fires and a solo sequence operation | ||
1138 | * is sent (lease renewal). | ||
1139 | */ | ||
1140 | if (seq && nfsd4_not_cached(resp)) { | ||
1141 | seq->maxslots = resp->cstate.session->se_fchannel.maxreqs; | ||
1142 | return nfs_ok; | ||
1143 | } | ||
1144 | |||
1145 | if (!nfsd41_copy_replay_data(resp, entry)) { | ||
1146 | /* | ||
1147 | * Not enough room to use the replay rpc header, send the | ||
1148 | * cached header. Release all the allocated result pages. | ||
1149 | */ | ||
1150 | svc_free_res_pages(resp->rqstp); | ||
1151 | nfsd4_copy_pages(resp->rqstp->rq_respages, entry->ce_respages, | ||
1152 | entry->ce_resused); | ||
1153 | } else { | ||
1154 | /* Release all but the first allocated result page */ | ||
1155 | 1081 | ||
1156 | resp->rqstp->rq_resused--; | 1082 | /* Either returns 0 or nfserr_retry_uncached */ |
1157 | svc_free_res_pages(resp->rqstp); | 1083 | status = nfsd4_enc_sequence_replay(resp->rqstp->rq_argp, resp); |
1084 | if (status == nfserr_retry_uncached_rep) | ||
1085 | return status; | ||
1158 | 1086 | ||
1159 | nfsd4_copy_pages(&resp->rqstp->rq_respages[1], | 1087 | /* The sequence operation has been encoded, cstate->datap set. */ |
1160 | &entry->ce_respages[1], | 1088 | memcpy(resp->cstate.datap, slot->sl_data, slot->sl_datalen); |
1161 | entry->ce_resused - 1); | ||
1162 | } | ||
1163 | 1089 | ||
1164 | resp->rqstp->rq_resused = entry->ce_resused; | 1090 | resp->opcnt = slot->sl_opcnt; |
1165 | resp->opcnt = entry->ce_opcnt; | 1091 | resp->p = resp->cstate.datap + XDR_QUADLEN(slot->sl_datalen); |
1166 | resp->cstate.iovlen = entry->ce_datav.iov_len + entry->ce_rpchdrlen; | 1092 | status = slot->sl_status; |
1167 | status = entry->ce_status; | ||
1168 | 1093 | ||
1169 | return status; | 1094 | return status; |
1170 | } | 1095 | } |
@@ -1194,13 +1119,15 @@ nfsd4_exchange_id(struct svc_rqst *rqstp, | |||
1194 | int status; | 1119 | int status; |
1195 | unsigned int strhashval; | 1120 | unsigned int strhashval; |
1196 | char dname[HEXDIR_LEN]; | 1121 | char dname[HEXDIR_LEN]; |
1122 | char addr_str[INET6_ADDRSTRLEN]; | ||
1197 | nfs4_verifier verf = exid->verifier; | 1123 | nfs4_verifier verf = exid->verifier; |
1198 | u32 ip_addr = svc_addr_in(rqstp)->sin_addr.s_addr; | 1124 | struct sockaddr *sa = svc_addr(rqstp); |
1199 | 1125 | ||
1126 | rpc_ntop(sa, addr_str, sizeof(addr_str)); | ||
1200 | dprintk("%s rqstp=%p exid=%p clname.len=%u clname.data=%p " | 1127 | dprintk("%s rqstp=%p exid=%p clname.len=%u clname.data=%p " |
1201 | " ip_addr=%u flags %x, spa_how %d\n", | 1128 | "ip_addr=%s flags %x, spa_how %d\n", |
1202 | __func__, rqstp, exid, exid->clname.len, exid->clname.data, | 1129 | __func__, rqstp, exid, exid->clname.len, exid->clname.data, |
1203 | ip_addr, exid->flags, exid->spa_how); | 1130 | addr_str, exid->flags, exid->spa_how); |
1204 | 1131 | ||
1205 | if (!check_name(exid->clname) || (exid->flags & ~EXCHGID4_FLAG_MASK_A)) | 1132 | if (!check_name(exid->clname) || (exid->flags & ~EXCHGID4_FLAG_MASK_A)) |
1206 | return nfserr_inval; | 1133 | return nfserr_inval; |
@@ -1281,28 +1208,23 @@ nfsd4_exchange_id(struct svc_rqst *rqstp, | |||
1281 | 1208 | ||
1282 | out_new: | 1209 | out_new: |
1283 | /* Normal case */ | 1210 | /* Normal case */ |
1284 | new = create_client(exid->clname, dname); | 1211 | new = create_client(exid->clname, dname, rqstp, &verf); |
1285 | if (new == NULL) { | 1212 | if (new == NULL) { |
1286 | status = nfserr_resource; | 1213 | status = nfserr_serverfault; |
1287 | goto out; | 1214 | goto out; |
1288 | } | 1215 | } |
1289 | 1216 | ||
1290 | copy_verf(new, &verf); | ||
1291 | copy_cred(&new->cl_cred, &rqstp->rq_cred); | ||
1292 | new->cl_addr = ip_addr; | ||
1293 | gen_clid(new); | 1217 | gen_clid(new); |
1294 | gen_confirm(new); | ||
1295 | add_to_unconfirmed(new, strhashval); | 1218 | add_to_unconfirmed(new, strhashval); |
1296 | out_copy: | 1219 | out_copy: |
1297 | exid->clientid.cl_boot = new->cl_clientid.cl_boot; | 1220 | exid->clientid.cl_boot = new->cl_clientid.cl_boot; |
1298 | exid->clientid.cl_id = new->cl_clientid.cl_id; | 1221 | exid->clientid.cl_id = new->cl_clientid.cl_id; |
1299 | 1222 | ||
1300 | new->cl_slot.sl_seqid = 0; | ||
1301 | exid->seqid = 1; | 1223 | exid->seqid = 1; |
1302 | nfsd4_set_ex_flags(new, exid); | 1224 | nfsd4_set_ex_flags(new, exid); |
1303 | 1225 | ||
1304 | dprintk("nfsd4_exchange_id seqid %d flags %x\n", | 1226 | dprintk("nfsd4_exchange_id seqid %d flags %x\n", |
1305 | new->cl_slot.sl_seqid, new->cl_exchange_flags); | 1227 | new->cl_cs_slot.sl_seqid, new->cl_exchange_flags); |
1306 | status = nfs_ok; | 1228 | status = nfs_ok; |
1307 | 1229 | ||
1308 | out: | 1230 | out: |
@@ -1313,40 +1235,60 @@ error: | |||
1313 | } | 1235 | } |
1314 | 1236 | ||
1315 | static int | 1237 | static int |
1316 | check_slot_seqid(u32 seqid, struct nfsd4_slot *slot) | 1238 | check_slot_seqid(u32 seqid, u32 slot_seqid, int slot_inuse) |
1317 | { | 1239 | { |
1318 | dprintk("%s enter. seqid %d slot->sl_seqid %d\n", __func__, seqid, | 1240 | dprintk("%s enter. seqid %d slot_seqid %d\n", __func__, seqid, |
1319 | slot->sl_seqid); | 1241 | slot_seqid); |
1320 | 1242 | ||
1321 | /* The slot is in use, and no response has been sent. */ | 1243 | /* The slot is in use, and no response has been sent. */ |
1322 | if (slot->sl_inuse) { | 1244 | if (slot_inuse) { |
1323 | if (seqid == slot->sl_seqid) | 1245 | if (seqid == slot_seqid) |
1324 | return nfserr_jukebox; | 1246 | return nfserr_jukebox; |
1325 | else | 1247 | else |
1326 | return nfserr_seq_misordered; | 1248 | return nfserr_seq_misordered; |
1327 | } | 1249 | } |
1328 | /* Normal */ | 1250 | /* Normal */ |
1329 | if (likely(seqid == slot->sl_seqid + 1)) | 1251 | if (likely(seqid == slot_seqid + 1)) |
1330 | return nfs_ok; | 1252 | return nfs_ok; |
1331 | /* Replay */ | 1253 | /* Replay */ |
1332 | if (seqid == slot->sl_seqid) | 1254 | if (seqid == slot_seqid) |
1333 | return nfserr_replay_cache; | 1255 | return nfserr_replay_cache; |
1334 | /* Wraparound */ | 1256 | /* Wraparound */ |
1335 | if (seqid == 1 && (slot->sl_seqid + 1) == 0) | 1257 | if (seqid == 1 && (slot_seqid + 1) == 0) |
1336 | return nfs_ok; | 1258 | return nfs_ok; |
1337 | /* Misordered replay or misordered new request */ | 1259 | /* Misordered replay or misordered new request */ |
1338 | return nfserr_seq_misordered; | 1260 | return nfserr_seq_misordered; |
1339 | } | 1261 | } |
1340 | 1262 | ||
1263 | /* | ||
1264 | * Cache the create session result into the create session single DRC | ||
1265 | * slot cache by saving the xdr structure. sl_seqid has been set. | ||
1266 | * Do this for solo or embedded create session operations. | ||
1267 | */ | ||
1268 | static void | ||
1269 | nfsd4_cache_create_session(struct nfsd4_create_session *cr_ses, | ||
1270 | struct nfsd4_clid_slot *slot, int nfserr) | ||
1271 | { | ||
1272 | slot->sl_status = nfserr; | ||
1273 | memcpy(&slot->sl_cr_ses, cr_ses, sizeof(*cr_ses)); | ||
1274 | } | ||
1275 | |||
1276 | static __be32 | ||
1277 | nfsd4_replay_create_session(struct nfsd4_create_session *cr_ses, | ||
1278 | struct nfsd4_clid_slot *slot) | ||
1279 | { | ||
1280 | memcpy(cr_ses, &slot->sl_cr_ses, sizeof(*cr_ses)); | ||
1281 | return slot->sl_status; | ||
1282 | } | ||
1283 | |||
1341 | __be32 | 1284 | __be32 |
1342 | nfsd4_create_session(struct svc_rqst *rqstp, | 1285 | nfsd4_create_session(struct svc_rqst *rqstp, |
1343 | struct nfsd4_compound_state *cstate, | 1286 | struct nfsd4_compound_state *cstate, |
1344 | struct nfsd4_create_session *cr_ses) | 1287 | struct nfsd4_create_session *cr_ses) |
1345 | { | 1288 | { |
1346 | u32 ip_addr = svc_addr_in(rqstp)->sin_addr.s_addr; | 1289 | struct sockaddr *sa = svc_addr(rqstp); |
1347 | struct nfsd4_compoundres *resp = rqstp->rq_resp; | ||
1348 | struct nfs4_client *conf, *unconf; | 1290 | struct nfs4_client *conf, *unconf; |
1349 | struct nfsd4_slot *slot = NULL; | 1291 | struct nfsd4_clid_slot *cs_slot = NULL; |
1350 | int status = 0; | 1292 | int status = 0; |
1351 | 1293 | ||
1352 | nfs4_lock_state(); | 1294 | nfs4_lock_state(); |
@@ -1354,40 +1296,38 @@ nfsd4_create_session(struct svc_rqst *rqstp, | |||
1354 | conf = find_confirmed_client(&cr_ses->clientid); | 1296 | conf = find_confirmed_client(&cr_ses->clientid); |
1355 | 1297 | ||
1356 | if (conf) { | 1298 | if (conf) { |
1357 | slot = &conf->cl_slot; | 1299 | cs_slot = &conf->cl_cs_slot; |
1358 | status = check_slot_seqid(cr_ses->seqid, slot); | 1300 | status = check_slot_seqid(cr_ses->seqid, cs_slot->sl_seqid, 0); |
1359 | if (status == nfserr_replay_cache) { | 1301 | if (status == nfserr_replay_cache) { |
1360 | dprintk("Got a create_session replay! seqid= %d\n", | 1302 | dprintk("Got a create_session replay! seqid= %d\n", |
1361 | slot->sl_seqid); | 1303 | cs_slot->sl_seqid); |
1362 | cstate->slot = slot; | ||
1363 | cstate->status = status; | ||
1364 | /* Return the cached reply status */ | 1304 | /* Return the cached reply status */ |
1365 | status = nfsd4_replay_cache_entry(resp, NULL); | 1305 | status = nfsd4_replay_create_session(cr_ses, cs_slot); |
1366 | goto out; | 1306 | goto out; |
1367 | } else if (cr_ses->seqid != conf->cl_slot.sl_seqid + 1) { | 1307 | } else if (cr_ses->seqid != cs_slot->sl_seqid + 1) { |
1368 | status = nfserr_seq_misordered; | 1308 | status = nfserr_seq_misordered; |
1369 | dprintk("Sequence misordered!\n"); | 1309 | dprintk("Sequence misordered!\n"); |
1370 | dprintk("Expected seqid= %d but got seqid= %d\n", | 1310 | dprintk("Expected seqid= %d but got seqid= %d\n", |
1371 | slot->sl_seqid, cr_ses->seqid); | 1311 | cs_slot->sl_seqid, cr_ses->seqid); |
1372 | goto out; | 1312 | goto out; |
1373 | } | 1313 | } |
1374 | conf->cl_slot.sl_seqid++; | 1314 | cs_slot->sl_seqid++; |
1375 | } else if (unconf) { | 1315 | } else if (unconf) { |
1376 | if (!same_creds(&unconf->cl_cred, &rqstp->rq_cred) || | 1316 | if (!same_creds(&unconf->cl_cred, &rqstp->rq_cred) || |
1377 | (ip_addr != unconf->cl_addr)) { | 1317 | !rpc_cmp_addr(sa, (struct sockaddr *) &unconf->cl_addr)) { |
1378 | status = nfserr_clid_inuse; | 1318 | status = nfserr_clid_inuse; |
1379 | goto out; | 1319 | goto out; |
1380 | } | 1320 | } |
1381 | 1321 | ||
1382 | slot = &unconf->cl_slot; | 1322 | cs_slot = &unconf->cl_cs_slot; |
1383 | status = check_slot_seqid(cr_ses->seqid, slot); | 1323 | status = check_slot_seqid(cr_ses->seqid, cs_slot->sl_seqid, 0); |
1384 | if (status) { | 1324 | if (status) { |
1385 | /* an unconfirmed replay returns misordered */ | 1325 | /* an unconfirmed replay returns misordered */ |
1386 | status = nfserr_seq_misordered; | 1326 | status = nfserr_seq_misordered; |
1387 | goto out; | 1327 | goto out_cache; |
1388 | } | 1328 | } |
1389 | 1329 | ||
1390 | slot->sl_seqid++; /* from 0 to 1 */ | 1330 | cs_slot->sl_seqid++; /* from 0 to 1 */ |
1391 | move_to_confirmed(unconf); | 1331 | move_to_confirmed(unconf); |
1392 | 1332 | ||
1393 | /* | 1333 | /* |
@@ -1396,6 +1336,19 @@ nfsd4_create_session(struct svc_rqst *rqstp, | |||
1396 | cr_ses->flags &= ~SESSION4_PERSIST; | 1336 | cr_ses->flags &= ~SESSION4_PERSIST; |
1397 | cr_ses->flags &= ~SESSION4_RDMA; | 1337 | cr_ses->flags &= ~SESSION4_RDMA; |
1398 | 1338 | ||
1339 | if (cr_ses->flags & SESSION4_BACK_CHAN) { | ||
1340 | unconf->cl_cb_xprt = rqstp->rq_xprt; | ||
1341 | svc_xprt_get(unconf->cl_cb_xprt); | ||
1342 | rpc_copy_addr( | ||
1343 | (struct sockaddr *)&unconf->cl_cb_conn.cb_addr, | ||
1344 | sa); | ||
1345 | unconf->cl_cb_conn.cb_addrlen = svc_addr_len(sa); | ||
1346 | unconf->cl_cb_conn.cb_minorversion = | ||
1347 | cstate->minorversion; | ||
1348 | unconf->cl_cb_conn.cb_prog = cr_ses->callback_prog; | ||
1349 | unconf->cl_cb_seq_nr = 1; | ||
1350 | nfsd4_probe_callback(unconf); | ||
1351 | } | ||
1399 | conf = unconf; | 1352 | conf = unconf; |
1400 | } else { | 1353 | } else { |
1401 | status = nfserr_stale_clientid; | 1354 | status = nfserr_stale_clientid; |
@@ -1408,12 +1361,11 @@ nfsd4_create_session(struct svc_rqst *rqstp, | |||
1408 | 1361 | ||
1409 | memcpy(cr_ses->sessionid.data, conf->cl_sessionid.data, | 1362 | memcpy(cr_ses->sessionid.data, conf->cl_sessionid.data, |
1410 | NFS4_MAX_SESSIONID_LEN); | 1363 | NFS4_MAX_SESSIONID_LEN); |
1411 | cr_ses->seqid = slot->sl_seqid; | 1364 | cr_ses->seqid = cs_slot->sl_seqid; |
1412 | 1365 | ||
1413 | slot->sl_inuse = true; | 1366 | out_cache: |
1414 | cstate->slot = slot; | 1367 | /* cache solo and embedded create sessions under the state lock */ |
1415 | /* Ensure a page is used for the cache */ | 1368 | nfsd4_cache_create_session(cr_ses, cs_slot, status); |
1416 | slot->sl_cache_entry.ce_cachethis = 1; | ||
1417 | out: | 1369 | out: |
1418 | nfs4_unlock_state(); | 1370 | nfs4_unlock_state(); |
1419 | dprintk("%s returns %d\n", __func__, ntohl(status)); | 1371 | dprintk("%s returns %d\n", __func__, ntohl(status)); |
@@ -1478,18 +1430,23 @@ nfsd4_sequence(struct svc_rqst *rqstp, | |||
1478 | if (seq->slotid >= session->se_fchannel.maxreqs) | 1430 | if (seq->slotid >= session->se_fchannel.maxreqs) |
1479 | goto out; | 1431 | goto out; |
1480 | 1432 | ||
1481 | slot = &session->se_slots[seq->slotid]; | 1433 | slot = session->se_slots[seq->slotid]; |
1482 | dprintk("%s: slotid %d\n", __func__, seq->slotid); | 1434 | dprintk("%s: slotid %d\n", __func__, seq->slotid); |
1483 | 1435 | ||
1484 | status = check_slot_seqid(seq->seqid, slot); | 1436 | /* We do not negotiate the number of slots yet, so set the |
1437 | * maxslots to the session maxreqs which is used to encode | ||
1438 | * sr_highest_slotid and the sr_target_slot id to maxslots */ | ||
1439 | seq->maxslots = session->se_fchannel.maxreqs; | ||
1440 | |||
1441 | status = check_slot_seqid(seq->seqid, slot->sl_seqid, slot->sl_inuse); | ||
1485 | if (status == nfserr_replay_cache) { | 1442 | if (status == nfserr_replay_cache) { |
1486 | cstate->slot = slot; | 1443 | cstate->slot = slot; |
1487 | cstate->session = session; | 1444 | cstate->session = session; |
1488 | /* Return the cached reply status and set cstate->status | 1445 | /* Return the cached reply status and set cstate->status |
1489 | * for nfsd4_svc_encode_compoundres processing */ | 1446 | * for nfsd4_proc_compound processing */ |
1490 | status = nfsd4_replay_cache_entry(resp, seq); | 1447 | status = nfsd4_replay_cache_entry(resp, seq); |
1491 | cstate->status = nfserr_replay_cache; | 1448 | cstate->status = nfserr_replay_cache; |
1492 | goto replay_cache; | 1449 | goto out; |
1493 | } | 1450 | } |
1494 | if (status) | 1451 | if (status) |
1495 | goto out; | 1452 | goto out; |
@@ -1497,23 +1454,23 @@ nfsd4_sequence(struct svc_rqst *rqstp, | |||
1497 | /* Success! bump slot seqid */ | 1454 | /* Success! bump slot seqid */ |
1498 | slot->sl_inuse = true; | 1455 | slot->sl_inuse = true; |
1499 | slot->sl_seqid = seq->seqid; | 1456 | slot->sl_seqid = seq->seqid; |
1500 | slot->sl_cache_entry.ce_cachethis = seq->cachethis; | 1457 | slot->sl_cachethis = seq->cachethis; |
1501 | /* Always set the cache entry cachethis for solo sequence */ | ||
1502 | if (nfsd4_is_solo_sequence(resp)) | ||
1503 | slot->sl_cache_entry.ce_cachethis = 1; | ||
1504 | 1458 | ||
1505 | cstate->slot = slot; | 1459 | cstate->slot = slot; |
1506 | cstate->session = session; | 1460 | cstate->session = session; |
1507 | 1461 | ||
1508 | replay_cache: | 1462 | /* Hold a session reference until done processing the compound: |
1509 | /* Renew the clientid on success and on replay. | ||
1510 | * Hold a session reference until done processing the compound: | ||
1511 | * nfsd4_put_session called only if the cstate slot is set. | 1463 | * nfsd4_put_session called only if the cstate slot is set. |
1512 | */ | 1464 | */ |
1513 | renew_client(session->se_client); | ||
1514 | nfsd4_get_session(session); | 1465 | nfsd4_get_session(session); |
1515 | out: | 1466 | out: |
1516 | spin_unlock(&sessionid_lock); | 1467 | spin_unlock(&sessionid_lock); |
1468 | /* Renew the clientid on success and on replay */ | ||
1469 | if (cstate->session) { | ||
1470 | nfs4_lock_state(); | ||
1471 | renew_client(session->se_client); | ||
1472 | nfs4_unlock_state(); | ||
1473 | } | ||
1517 | dprintk("%s: return %d\n", __func__, ntohl(status)); | 1474 | dprintk("%s: return %d\n", __func__, ntohl(status)); |
1518 | return status; | 1475 | return status; |
1519 | } | 1476 | } |
@@ -1522,7 +1479,7 @@ __be32 | |||
1522 | nfsd4_setclientid(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate, | 1479 | nfsd4_setclientid(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate, |
1523 | struct nfsd4_setclientid *setclid) | 1480 | struct nfsd4_setclientid *setclid) |
1524 | { | 1481 | { |
1525 | struct sockaddr_in *sin = svc_addr_in(rqstp); | 1482 | struct sockaddr *sa = svc_addr(rqstp); |
1526 | struct xdr_netobj clname = { | 1483 | struct xdr_netobj clname = { |
1527 | .len = setclid->se_namelen, | 1484 | .len = setclid->se_namelen, |
1528 | .data = setclid->se_name, | 1485 | .data = setclid->se_name, |
@@ -1531,7 +1488,6 @@ nfsd4_setclientid(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate, | |||
1531 | unsigned int strhashval; | 1488 | unsigned int strhashval; |
1532 | struct nfs4_client *conf, *unconf, *new; | 1489 | struct nfs4_client *conf, *unconf, *new; |
1533 | __be32 status; | 1490 | __be32 status; |
1534 | char *princ; | ||
1535 | char dname[HEXDIR_LEN]; | 1491 | char dname[HEXDIR_LEN]; |
1536 | 1492 | ||
1537 | if (!check_name(clname)) | 1493 | if (!check_name(clname)) |
@@ -1554,8 +1510,11 @@ nfsd4_setclientid(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate, | |||
1554 | /* RFC 3530 14.2.33 CASE 0: */ | 1510 | /* RFC 3530 14.2.33 CASE 0: */ |
1555 | status = nfserr_clid_inuse; | 1511 | status = nfserr_clid_inuse; |
1556 | if (!same_creds(&conf->cl_cred, &rqstp->rq_cred)) { | 1512 | if (!same_creds(&conf->cl_cred, &rqstp->rq_cred)) { |
1557 | dprintk("NFSD: setclientid: string in use by client" | 1513 | char addr_str[INET6_ADDRSTRLEN]; |
1558 | " at %pI4\n", &conf->cl_addr); | 1514 | rpc_ntop((struct sockaddr *) &conf->cl_addr, addr_str, |
1515 | sizeof(addr_str)); | ||
1516 | dprintk("NFSD: setclientid: string in use by client " | ||
1517 | "at %s\n", addr_str); | ||
1559 | goto out; | 1518 | goto out; |
1560 | } | 1519 | } |
1561 | } | 1520 | } |
@@ -1573,7 +1532,7 @@ nfsd4_setclientid(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate, | |||
1573 | */ | 1532 | */ |
1574 | if (unconf) | 1533 | if (unconf) |
1575 | expire_client(unconf); | 1534 | expire_client(unconf); |
1576 | new = create_client(clname, dname); | 1535 | new = create_client(clname, dname, rqstp, &clverifier); |
1577 | if (new == NULL) | 1536 | if (new == NULL) |
1578 | goto out; | 1537 | goto out; |
1579 | gen_clid(new); | 1538 | gen_clid(new); |
@@ -1590,7 +1549,7 @@ nfsd4_setclientid(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate, | |||
1590 | */ | 1549 | */ |
1591 | expire_client(unconf); | 1550 | expire_client(unconf); |
1592 | } | 1551 | } |
1593 | new = create_client(clname, dname); | 1552 | new = create_client(clname, dname, rqstp, &clverifier); |
1594 | if (new == NULL) | 1553 | if (new == NULL) |
1595 | goto out; | 1554 | goto out; |
1596 | copy_clid(new, conf); | 1555 | copy_clid(new, conf); |
@@ -1600,7 +1559,7 @@ nfsd4_setclientid(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate, | |||
1600 | * probable client reboot; state will be removed if | 1559 | * probable client reboot; state will be removed if |
1601 | * confirmed. | 1560 | * confirmed. |
1602 | */ | 1561 | */ |
1603 | new = create_client(clname, dname); | 1562 | new = create_client(clname, dname, rqstp, &clverifier); |
1604 | if (new == NULL) | 1563 | if (new == NULL) |
1605 | goto out; | 1564 | goto out; |
1606 | gen_clid(new); | 1565 | gen_clid(new); |
@@ -1611,25 +1570,12 @@ nfsd4_setclientid(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate, | |||
1611 | * confirmed. | 1570 | * confirmed. |
1612 | */ | 1571 | */ |
1613 | expire_client(unconf); | 1572 | expire_client(unconf); |
1614 | new = create_client(clname, dname); | 1573 | new = create_client(clname, dname, rqstp, &clverifier); |
1615 | if (new == NULL) | 1574 | if (new == NULL) |
1616 | goto out; | 1575 | goto out; |
1617 | gen_clid(new); | 1576 | gen_clid(new); |
1618 | } | 1577 | } |
1619 | copy_verf(new, &clverifier); | 1578 | gen_callback(new, setclid, rpc_get_scope_id(sa)); |
1620 | new->cl_addr = sin->sin_addr.s_addr; | ||
1621 | new->cl_flavor = rqstp->rq_flavor; | ||
1622 | princ = svc_gss_principal(rqstp); | ||
1623 | if (princ) { | ||
1624 | new->cl_principal = kstrdup(princ, GFP_KERNEL); | ||
1625 | if (new->cl_principal == NULL) { | ||
1626 | free_client(new); | ||
1627 | goto out; | ||
1628 | } | ||
1629 | } | ||
1630 | copy_cred(&new->cl_cred, &rqstp->rq_cred); | ||
1631 | gen_confirm(new); | ||
1632 | gen_callback(new, setclid); | ||
1633 | add_to_unconfirmed(new, strhashval); | 1579 | add_to_unconfirmed(new, strhashval); |
1634 | setclid->se_clientid.cl_boot = new->cl_clientid.cl_boot; | 1580 | setclid->se_clientid.cl_boot = new->cl_clientid.cl_boot; |
1635 | setclid->se_clientid.cl_id = new->cl_clientid.cl_id; | 1581 | setclid->se_clientid.cl_id = new->cl_clientid.cl_id; |
@@ -1651,7 +1597,7 @@ nfsd4_setclientid_confirm(struct svc_rqst *rqstp, | |||
1651 | struct nfsd4_compound_state *cstate, | 1597 | struct nfsd4_compound_state *cstate, |
1652 | struct nfsd4_setclientid_confirm *setclientid_confirm) | 1598 | struct nfsd4_setclientid_confirm *setclientid_confirm) |
1653 | { | 1599 | { |
1654 | struct sockaddr_in *sin = svc_addr_in(rqstp); | 1600 | struct sockaddr *sa = svc_addr(rqstp); |
1655 | struct nfs4_client *conf, *unconf; | 1601 | struct nfs4_client *conf, *unconf; |
1656 | nfs4_verifier confirm = setclientid_confirm->sc_confirm; | 1602 | nfs4_verifier confirm = setclientid_confirm->sc_confirm; |
1657 | clientid_t * clid = &setclientid_confirm->sc_clientid; | 1603 | clientid_t * clid = &setclientid_confirm->sc_clientid; |
@@ -1670,9 +1616,9 @@ nfsd4_setclientid_confirm(struct svc_rqst *rqstp, | |||
1670 | unconf = find_unconfirmed_client(clid); | 1616 | unconf = find_unconfirmed_client(clid); |
1671 | 1617 | ||
1672 | status = nfserr_clid_inuse; | 1618 | status = nfserr_clid_inuse; |
1673 | if (conf && conf->cl_addr != sin->sin_addr.s_addr) | 1619 | if (conf && !rpc_cmp_addr((struct sockaddr *) &conf->cl_addr, sa)) |
1674 | goto out; | 1620 | goto out; |
1675 | if (unconf && unconf->cl_addr != sin->sin_addr.s_addr) | 1621 | if (unconf && !rpc_cmp_addr((struct sockaddr *) &unconf->cl_addr, sa)) |
1676 | goto out; | 1622 | goto out; |
1677 | 1623 | ||
1678 | /* | 1624 | /* |
@@ -2163,7 +2109,7 @@ int nfsd_change_deleg_cb(struct file_lock **onlist, int arg) | |||
2163 | return -EAGAIN; | 2109 | return -EAGAIN; |
2164 | } | 2110 | } |
2165 | 2111 | ||
2166 | static struct lock_manager_operations nfsd_lease_mng_ops = { | 2112 | static const struct lock_manager_operations nfsd_lease_mng_ops = { |
2167 | .fl_break = nfsd_break_deleg_cb, | 2113 | .fl_break = nfsd_break_deleg_cb, |
2168 | .fl_release_private = nfsd_release_deleg_cb, | 2114 | .fl_release_private = nfsd_release_deleg_cb, |
2169 | .fl_copy_lock = nfsd_copy_lock_deleg_cb, | 2115 | .fl_copy_lock = nfsd_copy_lock_deleg_cb, |
@@ -3368,7 +3314,7 @@ nfs4_transform_lock_offset(struct file_lock *lock) | |||
3368 | 3314 | ||
3369 | /* Hack!: For now, we're defining this just so we can use a pointer to it | 3315 | /* Hack!: For now, we're defining this just so we can use a pointer to it |
3370 | * as a unique cookie to identify our (NFSv4's) posix locks. */ | 3316 | * as a unique cookie to identify our (NFSv4's) posix locks. */ |
3371 | static struct lock_manager_operations nfsd_posix_mng_ops = { | 3317 | static const struct lock_manager_operations nfsd_posix_mng_ops = { |
3372 | }; | 3318 | }; |
3373 | 3319 | ||
3374 | static inline void | 3320 | static inline void |
@@ -4072,7 +4018,7 @@ set_max_delegations(void) | |||
4072 | 4018 | ||
4073 | /* initialization to perform when the nfsd service is started: */ | 4019 | /* initialization to perform when the nfsd service is started: */ |
4074 | 4020 | ||
4075 | static void | 4021 | static int |
4076 | __nfs4_state_start(void) | 4022 | __nfs4_state_start(void) |
4077 | { | 4023 | { |
4078 | unsigned long grace_time; | 4024 | unsigned long grace_time; |
@@ -4084,19 +4030,26 @@ __nfs4_state_start(void) | |||
4084 | printk(KERN_INFO "NFSD: starting %ld-second grace period\n", | 4030 | printk(KERN_INFO "NFSD: starting %ld-second grace period\n", |
4085 | grace_time/HZ); | 4031 | grace_time/HZ); |
4086 | laundry_wq = create_singlethread_workqueue("nfsd4"); | 4032 | laundry_wq = create_singlethread_workqueue("nfsd4"); |
4033 | if (laundry_wq == NULL) | ||
4034 | return -ENOMEM; | ||
4087 | queue_delayed_work(laundry_wq, &laundromat_work, grace_time); | 4035 | queue_delayed_work(laundry_wq, &laundromat_work, grace_time); |
4088 | set_max_delegations(); | 4036 | set_max_delegations(); |
4037 | return set_callback_cred(); | ||
4089 | } | 4038 | } |
4090 | 4039 | ||
4091 | void | 4040 | int |
4092 | nfs4_state_start(void) | 4041 | nfs4_state_start(void) |
4093 | { | 4042 | { |
4043 | int ret; | ||
4044 | |||
4094 | if (nfs4_init) | 4045 | if (nfs4_init) |
4095 | return; | 4046 | return 0; |
4096 | nfsd4_load_reboot_recovery_data(); | 4047 | nfsd4_load_reboot_recovery_data(); |
4097 | __nfs4_state_start(); | 4048 | ret = __nfs4_state_start(); |
4049 | if (ret) | ||
4050 | return ret; | ||
4098 | nfs4_init = 1; | 4051 | nfs4_init = 1; |
4099 | return; | 4052 | return 0; |
4100 | } | 4053 | } |
4101 | 4054 | ||
4102 | time_t | 4055 | time_t |
diff --git a/fs/nfsd/nfs4xdr.c b/fs/nfsd/nfs4xdr.c index 2dcc7feaa6ff..0fbd50cee1f6 100644 --- a/fs/nfsd/nfs4xdr.c +++ b/fs/nfsd/nfs4xdr.c | |||
@@ -1599,7 +1599,8 @@ static __be32 nfsd4_encode_fs_location4(struct nfsd4_fs_location *location, | |||
1599 | static char *nfsd4_path(struct svc_rqst *rqstp, struct svc_export *exp, __be32 *stat) | 1599 | static char *nfsd4_path(struct svc_rqst *rqstp, struct svc_export *exp, __be32 *stat) |
1600 | { | 1600 | { |
1601 | struct svc_fh tmp_fh; | 1601 | struct svc_fh tmp_fh; |
1602 | char *path, *rootpath; | 1602 | char *path = NULL, *rootpath; |
1603 | size_t rootlen; | ||
1603 | 1604 | ||
1604 | fh_init(&tmp_fh, NFS4_FHSIZE); | 1605 | fh_init(&tmp_fh, NFS4_FHSIZE); |
1605 | *stat = exp_pseudoroot(rqstp, &tmp_fh); | 1606 | *stat = exp_pseudoroot(rqstp, &tmp_fh); |
@@ -1609,14 +1610,18 @@ static char *nfsd4_path(struct svc_rqst *rqstp, struct svc_export *exp, __be32 * | |||
1609 | 1610 | ||
1610 | path = exp->ex_pathname; | 1611 | path = exp->ex_pathname; |
1611 | 1612 | ||
1612 | if (strncmp(path, rootpath, strlen(rootpath))) { | 1613 | rootlen = strlen(rootpath); |
1614 | if (strncmp(path, rootpath, rootlen)) { | ||
1613 | dprintk("nfsd: fs_locations failed;" | 1615 | dprintk("nfsd: fs_locations failed;" |
1614 | "%s is not contained in %s\n", path, rootpath); | 1616 | "%s is not contained in %s\n", path, rootpath); |
1615 | *stat = nfserr_notsupp; | 1617 | *stat = nfserr_notsupp; |
1616 | return NULL; | 1618 | path = NULL; |
1619 | goto out; | ||
1617 | } | 1620 | } |
1618 | 1621 | path += rootlen; | |
1619 | return path + strlen(rootpath); | 1622 | out: |
1623 | fh_put(&tmp_fh); | ||
1624 | return path; | ||
1620 | } | 1625 | } |
1621 | 1626 | ||
1622 | /* | 1627 | /* |
@@ -1793,11 +1798,6 @@ nfsd4_encode_fattr(struct svc_fh *fhp, struct svc_export *exp, | |||
1793 | goto out_nfserr; | 1798 | goto out_nfserr; |
1794 | } | 1799 | } |
1795 | } | 1800 | } |
1796 | if (bmval0 & FATTR4_WORD0_FS_LOCATIONS) { | ||
1797 | if (exp->ex_fslocs.locations == NULL) { | ||
1798 | bmval0 &= ~FATTR4_WORD0_FS_LOCATIONS; | ||
1799 | } | ||
1800 | } | ||
1801 | if ((buflen -= 16) < 0) | 1801 | if ((buflen -= 16) < 0) |
1802 | goto out_resource; | 1802 | goto out_resource; |
1803 | 1803 | ||
@@ -1825,8 +1825,6 @@ nfsd4_encode_fattr(struct svc_fh *fhp, struct svc_export *exp, | |||
1825 | goto out_resource; | 1825 | goto out_resource; |
1826 | if (!aclsupport) | 1826 | if (!aclsupport) |
1827 | word0 &= ~FATTR4_WORD0_ACL; | 1827 | word0 &= ~FATTR4_WORD0_ACL; |
1828 | if (!exp->ex_fslocs.locations) | ||
1829 | word0 &= ~FATTR4_WORD0_FS_LOCATIONS; | ||
1830 | if (!word2) { | 1828 | if (!word2) { |
1831 | WRITE32(2); | 1829 | WRITE32(2); |
1832 | WRITE32(word0); | 1830 | WRITE32(word0); |
@@ -3064,6 +3062,7 @@ nfsd4_encode_sequence(struct nfsd4_compoundres *resp, int nfserr, | |||
3064 | WRITE32(0); | 3062 | WRITE32(0); |
3065 | 3063 | ||
3066 | ADJUST_ARGS(); | 3064 | ADJUST_ARGS(); |
3065 | resp->cstate.datap = p; /* DRC cache data pointer */ | ||
3067 | return 0; | 3066 | return 0; |
3068 | } | 3067 | } |
3069 | 3068 | ||
@@ -3166,7 +3165,7 @@ static int nfsd4_check_drc_limit(struct nfsd4_compoundres *resp) | |||
3166 | return status; | 3165 | return status; |
3167 | 3166 | ||
3168 | session = resp->cstate.session; | 3167 | session = resp->cstate.session; |
3169 | if (session == NULL || slot->sl_cache_entry.ce_cachethis == 0) | 3168 | if (session == NULL || slot->sl_cachethis == 0) |
3170 | return status; | 3169 | return status; |
3171 | 3170 | ||
3172 | if (resp->opcnt >= args->opcnt) | 3171 | if (resp->opcnt >= args->opcnt) |
@@ -3291,6 +3290,7 @@ nfs4svc_encode_compoundres(struct svc_rqst *rqstp, __be32 *p, struct nfsd4_compo | |||
3291 | /* | 3290 | /* |
3292 | * All that remains is to write the tag and operation count... | 3291 | * All that remains is to write the tag and operation count... |
3293 | */ | 3292 | */ |
3293 | struct nfsd4_compound_state *cs = &resp->cstate; | ||
3294 | struct kvec *iov; | 3294 | struct kvec *iov; |
3295 | p = resp->tagp; | 3295 | p = resp->tagp; |
3296 | *p++ = htonl(resp->taglen); | 3296 | *p++ = htonl(resp->taglen); |
@@ -3304,17 +3304,11 @@ nfs4svc_encode_compoundres(struct svc_rqst *rqstp, __be32 *p, struct nfsd4_compo | |||
3304 | iov = &rqstp->rq_res.head[0]; | 3304 | iov = &rqstp->rq_res.head[0]; |
3305 | iov->iov_len = ((char*)resp->p) - (char*)iov->iov_base; | 3305 | iov->iov_len = ((char*)resp->p) - (char*)iov->iov_base; |
3306 | BUG_ON(iov->iov_len > PAGE_SIZE); | 3306 | BUG_ON(iov->iov_len > PAGE_SIZE); |
3307 | if (nfsd4_has_session(&resp->cstate)) { | 3307 | if (nfsd4_has_session(cs) && cs->status != nfserr_replay_cache) { |
3308 | if (resp->cstate.status == nfserr_replay_cache && | 3308 | nfsd4_store_cache_entry(resp); |
3309 | !nfsd4_not_cached(resp)) { | 3309 | dprintk("%s: SET SLOT STATE TO AVAILABLE\n", __func__); |
3310 | iov->iov_len = resp->cstate.iovlen; | 3310 | resp->cstate.slot->sl_inuse = false; |
3311 | } else { | 3311 | nfsd4_put_session(resp->cstate.session); |
3312 | nfsd4_store_cache_entry(resp); | ||
3313 | dprintk("%s: SET SLOT STATE TO AVAILABLE\n", __func__); | ||
3314 | resp->cstate.slot->sl_inuse = 0; | ||
3315 | } | ||
3316 | if (resp->cstate.session) | ||
3317 | nfsd4_put_session(resp->cstate.session); | ||
3318 | } | 3312 | } |
3319 | return 1; | 3313 | return 1; |
3320 | } | 3314 | } |
diff --git a/fs/nfsd/nfsctl.c b/fs/nfsd/nfsctl.c index 7e906c5b7671..00388d2a3c99 100644 --- a/fs/nfsd/nfsctl.c +++ b/fs/nfsd/nfsctl.c | |||
@@ -174,12 +174,13 @@ static const struct file_operations exports_operations = { | |||
174 | }; | 174 | }; |
175 | 175 | ||
176 | extern int nfsd_pool_stats_open(struct inode *inode, struct file *file); | 176 | extern int nfsd_pool_stats_open(struct inode *inode, struct file *file); |
177 | extern int nfsd_pool_stats_release(struct inode *inode, struct file *file); | ||
177 | 178 | ||
178 | static struct file_operations pool_stats_operations = { | 179 | static struct file_operations pool_stats_operations = { |
179 | .open = nfsd_pool_stats_open, | 180 | .open = nfsd_pool_stats_open, |
180 | .read = seq_read, | 181 | .read = seq_read, |
181 | .llseek = seq_lseek, | 182 | .llseek = seq_lseek, |
182 | .release = seq_release, | 183 | .release = nfsd_pool_stats_release, |
183 | .owner = THIS_MODULE, | 184 | .owner = THIS_MODULE, |
184 | }; | 185 | }; |
185 | 186 | ||
@@ -776,10 +777,7 @@ static ssize_t write_pool_threads(struct file *file, char *buf, size_t size) | |||
776 | size -= len; | 777 | size -= len; |
777 | mesg += len; | 778 | mesg += len; |
778 | } | 779 | } |
779 | 780 | rv = mesg - buf; | |
780 | mutex_unlock(&nfsd_mutex); | ||
781 | return (mesg-buf); | ||
782 | |||
783 | out_free: | 781 | out_free: |
784 | kfree(nthreads); | 782 | kfree(nthreads); |
785 | mutex_unlock(&nfsd_mutex); | 783 | mutex_unlock(&nfsd_mutex); |
diff --git a/fs/nfsd/nfsfh.c b/fs/nfsd/nfsfh.c index 8847f3fbfc1e..01965b2f3a76 100644 --- a/fs/nfsd/nfsfh.c +++ b/fs/nfsd/nfsfh.c | |||
@@ -397,44 +397,51 @@ static inline void _fh_update_old(struct dentry *dentry, | |||
397 | fh->ofh_dirino = 0; | 397 | fh->ofh_dirino = 0; |
398 | } | 398 | } |
399 | 399 | ||
400 | __be32 | 400 | static bool is_root_export(struct svc_export *exp) |
401 | fh_compose(struct svc_fh *fhp, struct svc_export *exp, struct dentry *dentry, | ||
402 | struct svc_fh *ref_fh) | ||
403 | { | 401 | { |
404 | /* ref_fh is a reference file handle. | 402 | return exp->ex_path.dentry == exp->ex_path.dentry->d_sb->s_root; |
405 | * if it is non-null and for the same filesystem, then we should compose | 403 | } |
406 | * a filehandle which is of the same version, where possible. | ||
407 | * Currently, that means that if ref_fh->fh_handle.fh_version == 0xca | ||
408 | * Then create a 32byte filehandle using nfs_fhbase_old | ||
409 | * | ||
410 | */ | ||
411 | 404 | ||
412 | u8 version; | 405 | static struct super_block *exp_sb(struct svc_export *exp) |
413 | u8 fsid_type = 0; | 406 | { |
414 | struct inode * inode = dentry->d_inode; | 407 | return exp->ex_path.dentry->d_inode->i_sb; |
415 | struct dentry *parent = dentry->d_parent; | 408 | } |
416 | __u32 *datap; | ||
417 | dev_t ex_dev = exp->ex_path.dentry->d_inode->i_sb->s_dev; | ||
418 | int root_export = (exp->ex_path.dentry == exp->ex_path.dentry->d_sb->s_root); | ||
419 | 409 | ||
420 | dprintk("nfsd: fh_compose(exp %02x:%02x/%ld %s/%s, ino=%ld)\n", | 410 | static bool fsid_type_ok_for_exp(u8 fsid_type, struct svc_export *exp) |
421 | MAJOR(ex_dev), MINOR(ex_dev), | 411 | { |
422 | (long) exp->ex_path.dentry->d_inode->i_ino, | 412 | switch (fsid_type) { |
423 | parent->d_name.name, dentry->d_name.name, | 413 | case FSID_DEV: |
424 | (inode ? inode->i_ino : 0)); | 414 | if (!old_valid_dev(exp_sb(exp)->s_dev)) |
415 | return 0; | ||
416 | /* FALL THROUGH */ | ||
417 | case FSID_MAJOR_MINOR: | ||
418 | case FSID_ENCODE_DEV: | ||
419 | return exp_sb(exp)->s_type->fs_flags & FS_REQUIRES_DEV; | ||
420 | case FSID_NUM: | ||
421 | return exp->ex_flags & NFSEXP_FSID; | ||
422 | case FSID_UUID8: | ||
423 | case FSID_UUID16: | ||
424 | if (!is_root_export(exp)) | ||
425 | return 0; | ||
426 | /* fall through */ | ||
427 | case FSID_UUID4_INUM: | ||
428 | case FSID_UUID16_INUM: | ||
429 | return exp->ex_uuid != NULL; | ||
430 | } | ||
431 | return 1; | ||
432 | } | ||
425 | 433 | ||
426 | /* Choose filehandle version and fsid type based on | 434 | |
427 | * the reference filehandle (if it is in the same export) | 435 | static void set_version_and_fsid_type(struct svc_fh *fhp, struct svc_export *exp, struct svc_fh *ref_fh) |
428 | * or the export options. | 436 | { |
429 | */ | 437 | u8 version; |
430 | retry: | 438 | u8 fsid_type; |
439 | retry: | ||
431 | version = 1; | 440 | version = 1; |
432 | if (ref_fh && ref_fh->fh_export == exp) { | 441 | if (ref_fh && ref_fh->fh_export == exp) { |
433 | version = ref_fh->fh_handle.fh_version; | 442 | version = ref_fh->fh_handle.fh_version; |
434 | fsid_type = ref_fh->fh_handle.fh_fsid_type; | 443 | fsid_type = ref_fh->fh_handle.fh_fsid_type; |
435 | 444 | ||
436 | if (ref_fh == fhp) | ||
437 | fh_put(ref_fh); | ||
438 | ref_fh = NULL; | 445 | ref_fh = NULL; |
439 | 446 | ||
440 | switch (version) { | 447 | switch (version) { |
@@ -447,58 +454,66 @@ fh_compose(struct svc_fh *fhp, struct svc_export *exp, struct dentry *dentry, | |||
447 | goto retry; | 454 | goto retry; |
448 | } | 455 | } |
449 | 456 | ||
450 | /* Need to check that this type works for this | 457 | /* |
451 | * export point. As the fsid -> filesystem mapping | 458 | * As the fsid -> filesystem mapping was guided by |
452 | * was guided by user-space, there is no guarantee | 459 | * user-space, there is no guarantee that the filesystem |
453 | * that the filesystem actually supports that fsid | 460 | * actually supports that fsid type. If it doesn't we |
454 | * type. If it doesn't we loop around again without | 461 | * loop around again without ref_fh set. |
455 | * ref_fh set. | ||
456 | */ | 462 | */ |
457 | switch(fsid_type) { | 463 | if (!fsid_type_ok_for_exp(fsid_type, exp)) |
458 | case FSID_DEV: | 464 | goto retry; |
459 | if (!old_valid_dev(ex_dev)) | ||
460 | goto retry; | ||
461 | /* FALL THROUGH */ | ||
462 | case FSID_MAJOR_MINOR: | ||
463 | case FSID_ENCODE_DEV: | ||
464 | if (!(exp->ex_path.dentry->d_inode->i_sb->s_type->fs_flags | ||
465 | & FS_REQUIRES_DEV)) | ||
466 | goto retry; | ||
467 | break; | ||
468 | case FSID_NUM: | ||
469 | if (! (exp->ex_flags & NFSEXP_FSID)) | ||
470 | goto retry; | ||
471 | break; | ||
472 | case FSID_UUID8: | ||
473 | case FSID_UUID16: | ||
474 | if (!root_export) | ||
475 | goto retry; | ||
476 | /* fall through */ | ||
477 | case FSID_UUID4_INUM: | ||
478 | case FSID_UUID16_INUM: | ||
479 | if (exp->ex_uuid == NULL) | ||
480 | goto retry; | ||
481 | break; | ||
482 | } | ||
483 | } else if (exp->ex_flags & NFSEXP_FSID) { | 465 | } else if (exp->ex_flags & NFSEXP_FSID) { |
484 | fsid_type = FSID_NUM; | 466 | fsid_type = FSID_NUM; |
485 | } else if (exp->ex_uuid) { | 467 | } else if (exp->ex_uuid) { |
486 | if (fhp->fh_maxsize >= 64) { | 468 | if (fhp->fh_maxsize >= 64) { |
487 | if (root_export) | 469 | if (is_root_export(exp)) |
488 | fsid_type = FSID_UUID16; | 470 | fsid_type = FSID_UUID16; |
489 | else | 471 | else |
490 | fsid_type = FSID_UUID16_INUM; | 472 | fsid_type = FSID_UUID16_INUM; |
491 | } else { | 473 | } else { |
492 | if (root_export) | 474 | if (is_root_export(exp)) |
493 | fsid_type = FSID_UUID8; | 475 | fsid_type = FSID_UUID8; |
494 | else | 476 | else |
495 | fsid_type = FSID_UUID4_INUM; | 477 | fsid_type = FSID_UUID4_INUM; |
496 | } | 478 | } |
497 | } else if (!old_valid_dev(ex_dev)) | 479 | } else if (!old_valid_dev(exp_sb(exp)->s_dev)) |
498 | /* for newer device numbers, we must use a newer fsid format */ | 480 | /* for newer device numbers, we must use a newer fsid format */ |
499 | fsid_type = FSID_ENCODE_DEV; | 481 | fsid_type = FSID_ENCODE_DEV; |
500 | else | 482 | else |
501 | fsid_type = FSID_DEV; | 483 | fsid_type = FSID_DEV; |
484 | fhp->fh_handle.fh_version = version; | ||
485 | if (version) | ||
486 | fhp->fh_handle.fh_fsid_type = fsid_type; | ||
487 | } | ||
488 | |||
489 | __be32 | ||
490 | fh_compose(struct svc_fh *fhp, struct svc_export *exp, struct dentry *dentry, | ||
491 | struct svc_fh *ref_fh) | ||
492 | { | ||
493 | /* ref_fh is a reference file handle. | ||
494 | * if it is non-null and for the same filesystem, then we should compose | ||
495 | * a filehandle which is of the same version, where possible. | ||
496 | * Currently, that means that if ref_fh->fh_handle.fh_version == 0xca | ||
497 | * Then create a 32byte filehandle using nfs_fhbase_old | ||
498 | * | ||
499 | */ | ||
500 | |||
501 | struct inode * inode = dentry->d_inode; | ||
502 | struct dentry *parent = dentry->d_parent; | ||
503 | __u32 *datap; | ||
504 | dev_t ex_dev = exp_sb(exp)->s_dev; | ||
505 | |||
506 | dprintk("nfsd: fh_compose(exp %02x:%02x/%ld %s/%s, ino=%ld)\n", | ||
507 | MAJOR(ex_dev), MINOR(ex_dev), | ||
508 | (long) exp->ex_path.dentry->d_inode->i_ino, | ||
509 | parent->d_name.name, dentry->d_name.name, | ||
510 | (inode ? inode->i_ino : 0)); | ||
511 | |||
512 | /* Choose filehandle version and fsid type based on | ||
513 | * the reference filehandle (if it is in the same export) | ||
514 | * or the export options. | ||
515 | */ | ||
516 | set_version_and_fsid_type(fhp, exp, ref_fh); | ||
502 | 517 | ||
503 | if (ref_fh == fhp) | 518 | if (ref_fh == fhp) |
504 | fh_put(ref_fh); | 519 | fh_put(ref_fh); |
@@ -516,7 +531,7 @@ fh_compose(struct svc_fh *fhp, struct svc_export *exp, struct dentry *dentry, | |||
516 | fhp->fh_export = exp; | 531 | fhp->fh_export = exp; |
517 | cache_get(&exp->h); | 532 | cache_get(&exp->h); |
518 | 533 | ||
519 | if (version == 0xca) { | 534 | if (fhp->fh_handle.fh_version == 0xca) { |
520 | /* old style filehandle please */ | 535 | /* old style filehandle please */ |
521 | memset(&fhp->fh_handle.fh_base, 0, NFS_FHSIZE); | 536 | memset(&fhp->fh_handle.fh_base, 0, NFS_FHSIZE); |
522 | fhp->fh_handle.fh_size = NFS_FHSIZE; | 537 | fhp->fh_handle.fh_size = NFS_FHSIZE; |
@@ -530,22 +545,22 @@ fh_compose(struct svc_fh *fhp, struct svc_export *exp, struct dentry *dentry, | |||
530 | _fh_update_old(dentry, exp, &fhp->fh_handle); | 545 | _fh_update_old(dentry, exp, &fhp->fh_handle); |
531 | } else { | 546 | } else { |
532 | int len; | 547 | int len; |
533 | fhp->fh_handle.fh_version = 1; | ||
534 | fhp->fh_handle.fh_auth_type = 0; | 548 | fhp->fh_handle.fh_auth_type = 0; |
535 | datap = fhp->fh_handle.fh_auth+0; | 549 | datap = fhp->fh_handle.fh_auth+0; |
536 | fhp->fh_handle.fh_fsid_type = fsid_type; | 550 | mk_fsid(fhp->fh_handle.fh_fsid_type, datap, ex_dev, |
537 | mk_fsid(fsid_type, datap, ex_dev, | ||
538 | exp->ex_path.dentry->d_inode->i_ino, | 551 | exp->ex_path.dentry->d_inode->i_ino, |
539 | exp->ex_fsid, exp->ex_uuid); | 552 | exp->ex_fsid, exp->ex_uuid); |
540 | 553 | ||
541 | len = key_len(fsid_type); | 554 | len = key_len(fhp->fh_handle.fh_fsid_type); |
542 | datap += len/4; | 555 | datap += len/4; |
543 | fhp->fh_handle.fh_size = 4 + len; | 556 | fhp->fh_handle.fh_size = 4 + len; |
544 | 557 | ||
545 | if (inode) | 558 | if (inode) |
546 | _fh_update(fhp, exp, dentry); | 559 | _fh_update(fhp, exp, dentry); |
547 | if (fhp->fh_handle.fh_fileid_type == 255) | 560 | if (fhp->fh_handle.fh_fileid_type == 255) { |
561 | fh_put(fhp); | ||
548 | return nfserr_opnotsupp; | 562 | return nfserr_opnotsupp; |
563 | } | ||
549 | } | 564 | } |
550 | 565 | ||
551 | return 0; | 566 | return 0; |
@@ -639,8 +654,7 @@ enum fsid_source fsid_source(struct svc_fh *fhp) | |||
639 | case FSID_DEV: | 654 | case FSID_DEV: |
640 | case FSID_ENCODE_DEV: | 655 | case FSID_ENCODE_DEV: |
641 | case FSID_MAJOR_MINOR: | 656 | case FSID_MAJOR_MINOR: |
642 | if (fhp->fh_export->ex_path.dentry->d_inode->i_sb->s_type->fs_flags | 657 | if (exp_sb(fhp->fh_export)->s_type->fs_flags & FS_REQUIRES_DEV) |
643 | & FS_REQUIRES_DEV) | ||
644 | return FSIDSOURCE_DEV; | 658 | return FSIDSOURCE_DEV; |
645 | break; | 659 | break; |
646 | case FSID_NUM: | 660 | case FSID_NUM: |
diff --git a/fs/nfsd/nfssvc.c b/fs/nfsd/nfssvc.c index 24d58adfe5fd..67ea83eedd43 100644 --- a/fs/nfsd/nfssvc.c +++ b/fs/nfsd/nfssvc.c | |||
@@ -34,6 +34,7 @@ | |||
34 | #include <linux/nfsd/syscall.h> | 34 | #include <linux/nfsd/syscall.h> |
35 | #include <linux/lockd/bind.h> | 35 | #include <linux/lockd/bind.h> |
36 | #include <linux/nfsacl.h> | 36 | #include <linux/nfsacl.h> |
37 | #include <linux/seq_file.h> | ||
37 | 38 | ||
38 | #define NFSDDBG_FACILITY NFSDDBG_SVC | 39 | #define NFSDDBG_FACILITY NFSDDBG_SVC |
39 | 40 | ||
@@ -66,6 +67,16 @@ struct timeval nfssvc_boot; | |||
66 | DEFINE_MUTEX(nfsd_mutex); | 67 | DEFINE_MUTEX(nfsd_mutex); |
67 | struct svc_serv *nfsd_serv; | 68 | struct svc_serv *nfsd_serv; |
68 | 69 | ||
70 | /* | ||
71 | * nfsd_drc_lock protects nfsd_drc_max_pages and nfsd_drc_pages_used. | ||
72 | * nfsd_drc_max_pages limits the total amount of memory available for | ||
73 | * version 4.1 DRC caches. | ||
74 | * nfsd_drc_pages_used tracks the current version 4.1 DRC memory usage. | ||
75 | */ | ||
76 | spinlock_t nfsd_drc_lock; | ||
77 | unsigned int nfsd_drc_max_mem; | ||
78 | unsigned int nfsd_drc_mem_used; | ||
79 | |||
69 | #if defined(CONFIG_NFSD_V2_ACL) || defined(CONFIG_NFSD_V3_ACL) | 80 | #if defined(CONFIG_NFSD_V2_ACL) || defined(CONFIG_NFSD_V3_ACL) |
70 | static struct svc_stat nfsd_acl_svcstats; | 81 | static struct svc_stat nfsd_acl_svcstats; |
71 | static struct svc_version * nfsd_acl_version[] = { | 82 | static struct svc_version * nfsd_acl_version[] = { |
@@ -235,13 +246,12 @@ void nfsd_reset_versions(void) | |||
235 | */ | 246 | */ |
236 | static void set_max_drc(void) | 247 | static void set_max_drc(void) |
237 | { | 248 | { |
238 | /* The percent of nr_free_buffer_pages used by the V4.1 server DRC */ | 249 | #define NFSD_DRC_SIZE_SHIFT 10 |
239 | #define NFSD_DRC_SIZE_SHIFT 7 | 250 | nfsd_drc_max_mem = (nr_free_buffer_pages() |
240 | nfsd_serv->sv_drc_max_pages = nr_free_buffer_pages() | 251 | >> NFSD_DRC_SIZE_SHIFT) * PAGE_SIZE; |
241 | >> NFSD_DRC_SIZE_SHIFT; | 252 | nfsd_drc_mem_used = 0; |
242 | nfsd_serv->sv_drc_pages_used = 0; | 253 | spin_lock_init(&nfsd_drc_lock); |
243 | dprintk("%s svc_drc_max_pages %u\n", __func__, | 254 | dprintk("%s nfsd_drc_max_mem %u \n", __func__, nfsd_drc_max_mem); |
244 | nfsd_serv->sv_drc_max_pages); | ||
245 | } | 255 | } |
246 | 256 | ||
247 | int nfsd_create_serv(void) | 257 | int nfsd_create_serv(void) |
@@ -401,7 +411,9 @@ nfsd_svc(unsigned short port, int nrservs) | |||
401 | error = nfsd_racache_init(2*nrservs); | 411 | error = nfsd_racache_init(2*nrservs); |
402 | if (error<0) | 412 | if (error<0) |
403 | goto out; | 413 | goto out; |
404 | nfs4_state_start(); | 414 | error = nfs4_state_start(); |
415 | if (error) | ||
416 | goto out; | ||
405 | 417 | ||
406 | nfsd_reset_versions(); | 418 | nfsd_reset_versions(); |
407 | 419 | ||
@@ -569,10 +581,6 @@ nfsd_dispatch(struct svc_rqst *rqstp, __be32 *statp) | |||
569 | + rqstp->rq_res.head[0].iov_len; | 581 | + rqstp->rq_res.head[0].iov_len; |
570 | rqstp->rq_res.head[0].iov_len += sizeof(__be32); | 582 | rqstp->rq_res.head[0].iov_len += sizeof(__be32); |
571 | 583 | ||
572 | /* NFSv4.1 DRC requires statp */ | ||
573 | if (rqstp->rq_vers == 4) | ||
574 | nfsd4_set_statp(rqstp, statp); | ||
575 | |||
576 | /* Now call the procedure handler, and encode NFS status. */ | 584 | /* Now call the procedure handler, and encode NFS status. */ |
577 | nfserr = proc->pc_func(rqstp, rqstp->rq_argp, rqstp->rq_resp); | 585 | nfserr = proc->pc_func(rqstp, rqstp->rq_argp, rqstp->rq_resp); |
578 | nfserr = map_new_errors(rqstp->rq_vers, nfserr); | 586 | nfserr = map_new_errors(rqstp->rq_vers, nfserr); |
@@ -607,7 +615,25 @@ nfsd_dispatch(struct svc_rqst *rqstp, __be32 *statp) | |||
607 | 615 | ||
608 | int nfsd_pool_stats_open(struct inode *inode, struct file *file) | 616 | int nfsd_pool_stats_open(struct inode *inode, struct file *file) |
609 | { | 617 | { |
610 | if (nfsd_serv == NULL) | 618 | int ret; |
619 | mutex_lock(&nfsd_mutex); | ||
620 | if (nfsd_serv == NULL) { | ||
621 | mutex_unlock(&nfsd_mutex); | ||
611 | return -ENODEV; | 622 | return -ENODEV; |
612 | return svc_pool_stats_open(nfsd_serv, file); | 623 | } |
624 | /* bump up the psudo refcount while traversing */ | ||
625 | svc_get(nfsd_serv); | ||
626 | ret = svc_pool_stats_open(nfsd_serv, file); | ||
627 | mutex_unlock(&nfsd_mutex); | ||
628 | return ret; | ||
629 | } | ||
630 | |||
631 | int nfsd_pool_stats_release(struct inode *inode, struct file *file) | ||
632 | { | ||
633 | int ret = seq_release(inode, file); | ||
634 | mutex_lock(&nfsd_mutex); | ||
635 | /* this function really, really should have been called svc_put() */ | ||
636 | svc_destroy(nfsd_serv); | ||
637 | mutex_unlock(&nfsd_mutex); | ||
638 | return ret; | ||
613 | } | 639 | } |
diff --git a/fs/nfsd/vfs.c b/fs/nfsd/vfs.c index 8fa09bfbcba7..a293f0273263 100644 --- a/fs/nfsd/vfs.c +++ b/fs/nfsd/vfs.c | |||
@@ -89,6 +89,12 @@ struct raparm_hbucket { | |||
89 | #define RAPARM_HASH_MASK (RAPARM_HASH_SIZE-1) | 89 | #define RAPARM_HASH_MASK (RAPARM_HASH_SIZE-1) |
90 | static struct raparm_hbucket raparm_hash[RAPARM_HASH_SIZE]; | 90 | static struct raparm_hbucket raparm_hash[RAPARM_HASH_SIZE]; |
91 | 91 | ||
92 | static inline int | ||
93 | nfsd_v4client(struct svc_rqst *rq) | ||
94 | { | ||
95 | return rq->rq_prog == NFS_PROGRAM && rq->rq_vers == 4; | ||
96 | } | ||
97 | |||
92 | /* | 98 | /* |
93 | * Called from nfsd_lookup and encode_dirent. Check if we have crossed | 99 | * Called from nfsd_lookup and encode_dirent. Check if we have crossed |
94 | * a mount point. | 100 | * a mount point. |
@@ -115,7 +121,8 @@ nfsd_cross_mnt(struct svc_rqst *rqstp, struct dentry **dpp, | |||
115 | path_put(&path); | 121 | path_put(&path); |
116 | goto out; | 122 | goto out; |
117 | } | 123 | } |
118 | if ((exp->ex_flags & NFSEXP_CROSSMOUNT) || EX_NOHIDE(exp2)) { | 124 | if (nfsd_v4client(rqstp) || |
125 | (exp->ex_flags & NFSEXP_CROSSMOUNT) || EX_NOHIDE(exp2)) { | ||
119 | /* successfully crossed mount point */ | 126 | /* successfully crossed mount point */ |
120 | /* | 127 | /* |
121 | * This is subtle: path.dentry is *not* on path.mnt | 128 | * This is subtle: path.dentry is *not* on path.mnt |
diff --git a/fs/nilfs2/btnode.c b/fs/nilfs2/btnode.c index c668bca579c1..6a2711f4c321 100644 --- a/fs/nilfs2/btnode.c +++ b/fs/nilfs2/btnode.c | |||
@@ -46,7 +46,7 @@ void nilfs_btnode_cache_init_once(struct address_space *btnc) | |||
46 | INIT_LIST_HEAD(&btnc->i_mmap_nonlinear); | 46 | INIT_LIST_HEAD(&btnc->i_mmap_nonlinear); |
47 | } | 47 | } |
48 | 48 | ||
49 | static struct address_space_operations def_btnode_aops = { | 49 | static const struct address_space_operations def_btnode_aops = { |
50 | .sync_page = block_sync_page, | 50 | .sync_page = block_sync_page, |
51 | }; | 51 | }; |
52 | 52 | ||
diff --git a/fs/nilfs2/file.c b/fs/nilfs2/file.c index 6bd84a0d8238..fc8278c77cdd 100644 --- a/fs/nilfs2/file.c +++ b/fs/nilfs2/file.c | |||
@@ -151,7 +151,7 @@ struct file_operations nilfs_file_operations = { | |||
151 | .splice_read = generic_file_splice_read, | 151 | .splice_read = generic_file_splice_read, |
152 | }; | 152 | }; |
153 | 153 | ||
154 | struct inode_operations nilfs_file_inode_operations = { | 154 | const struct inode_operations nilfs_file_inode_operations = { |
155 | .truncate = nilfs_truncate, | 155 | .truncate = nilfs_truncate, |
156 | .setattr = nilfs_setattr, | 156 | .setattr = nilfs_setattr, |
157 | .permission = nilfs_permission, | 157 | .permission = nilfs_permission, |
diff --git a/fs/nilfs2/gcinode.c b/fs/nilfs2/gcinode.c index 1b3c2bb20da9..e6de0a27ab5d 100644 --- a/fs/nilfs2/gcinode.c +++ b/fs/nilfs2/gcinode.c | |||
@@ -52,7 +52,7 @@ | |||
52 | #include "dat.h" | 52 | #include "dat.h" |
53 | #include "ifile.h" | 53 | #include "ifile.h" |
54 | 54 | ||
55 | static struct address_space_operations def_gcinode_aops = { | 55 | static const struct address_space_operations def_gcinode_aops = { |
56 | .sync_page = block_sync_page, | 56 | .sync_page = block_sync_page, |
57 | }; | 57 | }; |
58 | 58 | ||
diff --git a/fs/nilfs2/inode.c b/fs/nilfs2/inode.c index 807e584b163d..2d2c501deb54 100644 --- a/fs/nilfs2/inode.c +++ b/fs/nilfs2/inode.c | |||
@@ -238,7 +238,7 @@ nilfs_direct_IO(int rw, struct kiocb *iocb, const struct iovec *iov, | |||
238 | return size; | 238 | return size; |
239 | } | 239 | } |
240 | 240 | ||
241 | struct address_space_operations nilfs_aops = { | 241 | const struct address_space_operations nilfs_aops = { |
242 | .writepage = nilfs_writepage, | 242 | .writepage = nilfs_writepage, |
243 | .readpage = nilfs_readpage, | 243 | .readpage = nilfs_readpage, |
244 | .sync_page = block_sync_page, | 244 | .sync_page = block_sync_page, |
diff --git a/fs/nilfs2/mdt.c b/fs/nilfs2/mdt.c index 156bf6091a96..b18c4998f8d0 100644 --- a/fs/nilfs2/mdt.c +++ b/fs/nilfs2/mdt.c | |||
@@ -427,12 +427,12 @@ nilfs_mdt_write_page(struct page *page, struct writeback_control *wbc) | |||
427 | } | 427 | } |
428 | 428 | ||
429 | 429 | ||
430 | static struct address_space_operations def_mdt_aops = { | 430 | static const struct address_space_operations def_mdt_aops = { |
431 | .writepage = nilfs_mdt_write_page, | 431 | .writepage = nilfs_mdt_write_page, |
432 | .sync_page = block_sync_page, | 432 | .sync_page = block_sync_page, |
433 | }; | 433 | }; |
434 | 434 | ||
435 | static struct inode_operations def_mdt_iops; | 435 | static const struct inode_operations def_mdt_iops; |
436 | static struct file_operations def_mdt_fops; | 436 | static struct file_operations def_mdt_fops; |
437 | 437 | ||
438 | /* | 438 | /* |
diff --git a/fs/nilfs2/namei.c b/fs/nilfs2/namei.c index df70dadb336f..ed02e886fa79 100644 --- a/fs/nilfs2/namei.c +++ b/fs/nilfs2/namei.c | |||
@@ -448,7 +448,7 @@ out: | |||
448 | return err; | 448 | return err; |
449 | } | 449 | } |
450 | 450 | ||
451 | struct inode_operations nilfs_dir_inode_operations = { | 451 | const struct inode_operations nilfs_dir_inode_operations = { |
452 | .create = nilfs_create, | 452 | .create = nilfs_create, |
453 | .lookup = nilfs_lookup, | 453 | .lookup = nilfs_lookup, |
454 | .link = nilfs_link, | 454 | .link = nilfs_link, |
@@ -462,12 +462,12 @@ struct inode_operations nilfs_dir_inode_operations = { | |||
462 | .permission = nilfs_permission, | 462 | .permission = nilfs_permission, |
463 | }; | 463 | }; |
464 | 464 | ||
465 | struct inode_operations nilfs_special_inode_operations = { | 465 | const struct inode_operations nilfs_special_inode_operations = { |
466 | .setattr = nilfs_setattr, | 466 | .setattr = nilfs_setattr, |
467 | .permission = nilfs_permission, | 467 | .permission = nilfs_permission, |
468 | }; | 468 | }; |
469 | 469 | ||
470 | struct inode_operations nilfs_symlink_inode_operations = { | 470 | const struct inode_operations nilfs_symlink_inode_operations = { |
471 | .readlink = generic_readlink, | 471 | .readlink = generic_readlink, |
472 | .follow_link = page_follow_link_light, | 472 | .follow_link = page_follow_link_light, |
473 | .put_link = page_put_link, | 473 | .put_link = page_put_link, |
diff --git a/fs/nilfs2/nilfs.h b/fs/nilfs2/nilfs.h index 724c63766e82..bad7368782d0 100644 --- a/fs/nilfs2/nilfs.h +++ b/fs/nilfs2/nilfs.h | |||
@@ -295,12 +295,12 @@ void nilfs_clear_gcdat_inode(struct the_nilfs *); | |||
295 | * Inodes and files operations | 295 | * Inodes and files operations |
296 | */ | 296 | */ |
297 | extern struct file_operations nilfs_dir_operations; | 297 | extern struct file_operations nilfs_dir_operations; |
298 | extern struct inode_operations nilfs_file_inode_operations; | 298 | extern const struct inode_operations nilfs_file_inode_operations; |
299 | extern struct file_operations nilfs_file_operations; | 299 | extern struct file_operations nilfs_file_operations; |
300 | extern struct address_space_operations nilfs_aops; | 300 | extern const struct address_space_operations nilfs_aops; |
301 | extern struct inode_operations nilfs_dir_inode_operations; | 301 | extern const struct inode_operations nilfs_dir_inode_operations; |
302 | extern struct inode_operations nilfs_special_inode_operations; | 302 | extern const struct inode_operations nilfs_special_inode_operations; |
303 | extern struct inode_operations nilfs_symlink_inode_operations; | 303 | extern const struct inode_operations nilfs_symlink_inode_operations; |
304 | 304 | ||
305 | /* | 305 | /* |
306 | * filesystem type | 306 | * filesystem type |
diff --git a/fs/nilfs2/super.c b/fs/nilfs2/super.c index 55f3d6b60732..644e66727dd0 100644 --- a/fs/nilfs2/super.c +++ b/fs/nilfs2/super.c | |||
@@ -504,7 +504,7 @@ static int nilfs_show_options(struct seq_file *seq, struct vfsmount *vfs) | |||
504 | return 0; | 504 | return 0; |
505 | } | 505 | } |
506 | 506 | ||
507 | static struct super_operations nilfs_sops = { | 507 | static const struct super_operations nilfs_sops = { |
508 | .alloc_inode = nilfs_alloc_inode, | 508 | .alloc_inode = nilfs_alloc_inode, |
509 | .destroy_inode = nilfs_destroy_inode, | 509 | .destroy_inode = nilfs_destroy_inode, |
510 | .dirty_inode = nilfs_dirty_inode, | 510 | .dirty_inode = nilfs_dirty_inode, |
@@ -560,7 +560,7 @@ nilfs_fh_to_parent(struct super_block *sb, struct fid *fid, int fh_len, | |||
560 | nilfs_nfs_get_inode); | 560 | nilfs_nfs_get_inode); |
561 | } | 561 | } |
562 | 562 | ||
563 | static struct export_operations nilfs_export_ops = { | 563 | static const struct export_operations nilfs_export_ops = { |
564 | .fh_to_dentry = nilfs_fh_to_dentry, | 564 | .fh_to_dentry = nilfs_fh_to_dentry, |
565 | .fh_to_parent = nilfs_fh_to_parent, | 565 | .fh_to_parent = nilfs_fh_to_parent, |
566 | .get_parent = nilfs_get_parent, | 566 | .get_parent = nilfs_get_parent, |
diff --git a/fs/nilfs2/the_nilfs.c b/fs/nilfs2/the_nilfs.c index d4168e269c5d..ad391a8c3e7e 100644 --- a/fs/nilfs2/the_nilfs.c +++ b/fs/nilfs2/the_nilfs.c | |||
@@ -591,9 +591,7 @@ int init_nilfs(struct the_nilfs *nilfs, struct nilfs_sb_info *sbi, char *data) | |||
591 | 591 | ||
592 | nilfs->ns_mount_state = le16_to_cpu(sbp->s_state); | 592 | nilfs->ns_mount_state = le16_to_cpu(sbp->s_state); |
593 | 593 | ||
594 | bdi = nilfs->ns_bdev->bd_inode_backing_dev_info; | 594 | bdi = nilfs->ns_bdev->bd_inode->i_mapping->backing_dev_info; |
595 | if (!bdi) | ||
596 | bdi = nilfs->ns_bdev->bd_inode->i_mapping->backing_dev_info; | ||
597 | nilfs->ns_bdi = bdi ? : &default_backing_dev_info; | 595 | nilfs->ns_bdi = bdi ? : &default_backing_dev_info; |
598 | 596 | ||
599 | /* Finding last segment */ | 597 | /* Finding last segment */ |
diff --git a/fs/ntfs/file.c b/fs/ntfs/file.c index 4350d4993b18..663c0e341f8b 100644 --- a/fs/ntfs/file.c +++ b/fs/ntfs/file.c | |||
@@ -2146,46 +2146,6 @@ static ssize_t ntfs_file_aio_write(struct kiocb *iocb, const struct iovec *iov, | |||
2146 | } | 2146 | } |
2147 | 2147 | ||
2148 | /** | 2148 | /** |
2149 | * ntfs_file_writev - | ||
2150 | * | ||
2151 | * Basically the same as generic_file_writev() except that it ends up calling | ||
2152 | * ntfs_file_aio_write_nolock() instead of __generic_file_aio_write_nolock(). | ||
2153 | */ | ||
2154 | static ssize_t ntfs_file_writev(struct file *file, const struct iovec *iov, | ||
2155 | unsigned long nr_segs, loff_t *ppos) | ||
2156 | { | ||
2157 | struct address_space *mapping = file->f_mapping; | ||
2158 | struct inode *inode = mapping->host; | ||
2159 | struct kiocb kiocb; | ||
2160 | ssize_t ret; | ||
2161 | |||
2162 | mutex_lock(&inode->i_mutex); | ||
2163 | init_sync_kiocb(&kiocb, file); | ||
2164 | ret = ntfs_file_aio_write_nolock(&kiocb, iov, nr_segs, ppos); | ||
2165 | if (ret == -EIOCBQUEUED) | ||
2166 | ret = wait_on_sync_kiocb(&kiocb); | ||
2167 | mutex_unlock(&inode->i_mutex); | ||
2168 | if (ret > 0) { | ||
2169 | int err = generic_write_sync(file, *ppos - ret, ret); | ||
2170 | if (err < 0) | ||
2171 | ret = err; | ||
2172 | } | ||
2173 | return ret; | ||
2174 | } | ||
2175 | |||
2176 | /** | ||
2177 | * ntfs_file_write - simple wrapper for ntfs_file_writev() | ||
2178 | */ | ||
2179 | static ssize_t ntfs_file_write(struct file *file, const char __user *buf, | ||
2180 | size_t count, loff_t *ppos) | ||
2181 | { | ||
2182 | struct iovec local_iov = { .iov_base = (void __user *)buf, | ||
2183 | .iov_len = count }; | ||
2184 | |||
2185 | return ntfs_file_writev(file, &local_iov, 1, ppos); | ||
2186 | } | ||
2187 | |||
2188 | /** | ||
2189 | * ntfs_file_fsync - sync a file to disk | 2149 | * ntfs_file_fsync - sync a file to disk |
2190 | * @filp: file to be synced | 2150 | * @filp: file to be synced |
2191 | * @dentry: dentry describing the file to sync | 2151 | * @dentry: dentry describing the file to sync |
@@ -2247,7 +2207,7 @@ const struct file_operations ntfs_file_ops = { | |||
2247 | .read = do_sync_read, /* Read from file. */ | 2207 | .read = do_sync_read, /* Read from file. */ |
2248 | .aio_read = generic_file_aio_read, /* Async read from file. */ | 2208 | .aio_read = generic_file_aio_read, /* Async read from file. */ |
2249 | #ifdef NTFS_RW | 2209 | #ifdef NTFS_RW |
2250 | .write = ntfs_file_write, /* Write to file. */ | 2210 | .write = do_sync_write, /* Write to file. */ |
2251 | .aio_write = ntfs_file_aio_write, /* Async write to file. */ | 2211 | .aio_write = ntfs_file_aio_write, /* Async write to file. */ |
2252 | /*.release = ,*/ /* Last file is closed. See | 2212 | /*.release = ,*/ /* Last file is closed. See |
2253 | fs/ext2/file.c:: | 2213 | fs/ext2/file.c:: |
diff --git a/fs/ntfs/layout.h b/fs/ntfs/layout.h index 50931b1ce4b9..8b2549f672bf 100644 --- a/fs/ntfs/layout.h +++ b/fs/ntfs/layout.h | |||
@@ -829,7 +829,7 @@ enum { | |||
829 | /* Note, FILE_ATTR_VALID_SET_FLAGS masks out the old DOS VolId, the | 829 | /* Note, FILE_ATTR_VALID_SET_FLAGS masks out the old DOS VolId, the |
830 | F_A_DEVICE, F_A_DIRECTORY, F_A_SPARSE_FILE, F_A_REPARSE_POINT, | 830 | F_A_DEVICE, F_A_DIRECTORY, F_A_SPARSE_FILE, F_A_REPARSE_POINT, |
831 | F_A_COMPRESSED, and F_A_ENCRYPTED and preserves the rest. This mask | 831 | F_A_COMPRESSED, and F_A_ENCRYPTED and preserves the rest. This mask |
832 | is used to to obtain all flags that are valid for setting. */ | 832 | is used to obtain all flags that are valid for setting. */ |
833 | /* | 833 | /* |
834 | * The flag FILE_ATTR_DUP_FILENAME_INDEX_PRESENT is present in all | 834 | * The flag FILE_ATTR_DUP_FILENAME_INDEX_PRESENT is present in all |
835 | * FILENAME_ATTR attributes but not in the STANDARD_INFORMATION | 835 | * FILENAME_ATTR attributes but not in the STANDARD_INFORMATION |
diff --git a/fs/ntfs/malloc.h b/fs/ntfs/malloc.h index cd0be3f5c3cd..a44b14cbceeb 100644 --- a/fs/ntfs/malloc.h +++ b/fs/ntfs/malloc.h | |||
@@ -47,7 +47,7 @@ static inline void *__ntfs_malloc(unsigned long size, gfp_t gfp_mask) | |||
47 | return kmalloc(PAGE_SIZE, gfp_mask & ~__GFP_HIGHMEM); | 47 | return kmalloc(PAGE_SIZE, gfp_mask & ~__GFP_HIGHMEM); |
48 | /* return (void *)__get_free_page(gfp_mask); */ | 48 | /* return (void *)__get_free_page(gfp_mask); */ |
49 | } | 49 | } |
50 | if (likely(size >> PAGE_SHIFT < num_physpages)) | 50 | if (likely((size >> PAGE_SHIFT) < totalram_pages)) |
51 | return __vmalloc(size, gfp_mask, PAGE_KERNEL); | 51 | return __vmalloc(size, gfp_mask, PAGE_KERNEL); |
52 | return NULL; | 52 | return NULL; |
53 | } | 53 | } |
diff --git a/fs/ocfs2/Makefile b/fs/ocfs2/Makefile index 01596079dd63..31f25ce32c97 100644 --- a/fs/ocfs2/Makefile +++ b/fs/ocfs2/Makefile | |||
@@ -28,6 +28,7 @@ ocfs2-objs := \ | |||
28 | locks.o \ | 28 | locks.o \ |
29 | mmap.o \ | 29 | mmap.o \ |
30 | namei.o \ | 30 | namei.o \ |
31 | refcounttree.o \ | ||
31 | resize.o \ | 32 | resize.o \ |
32 | slot_map.o \ | 33 | slot_map.o \ |
33 | suballoc.o \ | 34 | suballoc.o \ |
diff --git a/fs/ocfs2/alloc.c b/fs/ocfs2/alloc.c index ab513ddaeff2..38a42f5d59ff 100644 --- a/fs/ocfs2/alloc.c +++ b/fs/ocfs2/alloc.c | |||
@@ -49,10 +49,21 @@ | |||
49 | #include "super.h" | 49 | #include "super.h" |
50 | #include "uptodate.h" | 50 | #include "uptodate.h" |
51 | #include "xattr.h" | 51 | #include "xattr.h" |
52 | #include "refcounttree.h" | ||
52 | 53 | ||
53 | #include "buffer_head_io.h" | 54 | #include "buffer_head_io.h" |
54 | 55 | ||
56 | enum ocfs2_contig_type { | ||
57 | CONTIG_NONE = 0, | ||
58 | CONTIG_LEFT, | ||
59 | CONTIG_RIGHT, | ||
60 | CONTIG_LEFTRIGHT, | ||
61 | }; | ||
55 | 62 | ||
63 | static enum ocfs2_contig_type | ||
64 | ocfs2_extent_rec_contig(struct super_block *sb, | ||
65 | struct ocfs2_extent_rec *ext, | ||
66 | struct ocfs2_extent_rec *insert_rec); | ||
56 | /* | 67 | /* |
57 | * Operations for a specific extent tree type. | 68 | * Operations for a specific extent tree type. |
58 | * | 69 | * |
@@ -79,18 +90,30 @@ struct ocfs2_extent_tree_operations { | |||
79 | * that value. new_clusters is the delta, and must be | 90 | * that value. new_clusters is the delta, and must be |
80 | * added to the total. Required. | 91 | * added to the total. Required. |
81 | */ | 92 | */ |
82 | void (*eo_update_clusters)(struct inode *inode, | 93 | void (*eo_update_clusters)(struct ocfs2_extent_tree *et, |
83 | struct ocfs2_extent_tree *et, | ||
84 | u32 new_clusters); | 94 | u32 new_clusters); |
85 | 95 | ||
86 | /* | 96 | /* |
97 | * If this extent tree is supported by an extent map, insert | ||
98 | * a record into the map. | ||
99 | */ | ||
100 | void (*eo_extent_map_insert)(struct ocfs2_extent_tree *et, | ||
101 | struct ocfs2_extent_rec *rec); | ||
102 | |||
103 | /* | ||
104 | * If this extent tree is supported by an extent map, truncate the | ||
105 | * map to clusters, | ||
106 | */ | ||
107 | void (*eo_extent_map_truncate)(struct ocfs2_extent_tree *et, | ||
108 | u32 clusters); | ||
109 | |||
110 | /* | ||
87 | * If ->eo_insert_check() exists, it is called before rec is | 111 | * If ->eo_insert_check() exists, it is called before rec is |
88 | * inserted into the extent tree. It is optional. | 112 | * inserted into the extent tree. It is optional. |
89 | */ | 113 | */ |
90 | int (*eo_insert_check)(struct inode *inode, | 114 | int (*eo_insert_check)(struct ocfs2_extent_tree *et, |
91 | struct ocfs2_extent_tree *et, | ||
92 | struct ocfs2_extent_rec *rec); | 115 | struct ocfs2_extent_rec *rec); |
93 | int (*eo_sanity_check)(struct inode *inode, struct ocfs2_extent_tree *et); | 116 | int (*eo_sanity_check)(struct ocfs2_extent_tree *et); |
94 | 117 | ||
95 | /* | 118 | /* |
96 | * -------------------------------------------------------------- | 119 | * -------------------------------------------------------------- |
@@ -109,8 +132,17 @@ struct ocfs2_extent_tree_operations { | |||
109 | * it exists. If it does not, et->et_max_leaf_clusters is set | 132 | * it exists. If it does not, et->et_max_leaf_clusters is set |
110 | * to 0 (unlimited). Optional. | 133 | * to 0 (unlimited). Optional. |
111 | */ | 134 | */ |
112 | void (*eo_fill_max_leaf_clusters)(struct inode *inode, | 135 | void (*eo_fill_max_leaf_clusters)(struct ocfs2_extent_tree *et); |
113 | struct ocfs2_extent_tree *et); | 136 | |
137 | /* | ||
138 | * ->eo_extent_contig test whether the 2 ocfs2_extent_rec | ||
139 | * are contiguous or not. Optional. Don't need to set it if use | ||
140 | * ocfs2_extent_rec as the tree leaf. | ||
141 | */ | ||
142 | enum ocfs2_contig_type | ||
143 | (*eo_extent_contig)(struct ocfs2_extent_tree *et, | ||
144 | struct ocfs2_extent_rec *ext, | ||
145 | struct ocfs2_extent_rec *insert_rec); | ||
114 | }; | 146 | }; |
115 | 147 | ||
116 | 148 | ||
@@ -121,19 +153,22 @@ struct ocfs2_extent_tree_operations { | |||
121 | static u64 ocfs2_dinode_get_last_eb_blk(struct ocfs2_extent_tree *et); | 153 | static u64 ocfs2_dinode_get_last_eb_blk(struct ocfs2_extent_tree *et); |
122 | static void ocfs2_dinode_set_last_eb_blk(struct ocfs2_extent_tree *et, | 154 | static void ocfs2_dinode_set_last_eb_blk(struct ocfs2_extent_tree *et, |
123 | u64 blkno); | 155 | u64 blkno); |
124 | static void ocfs2_dinode_update_clusters(struct inode *inode, | 156 | static void ocfs2_dinode_update_clusters(struct ocfs2_extent_tree *et, |
125 | struct ocfs2_extent_tree *et, | ||
126 | u32 clusters); | 157 | u32 clusters); |
127 | static int ocfs2_dinode_insert_check(struct inode *inode, | 158 | static void ocfs2_dinode_extent_map_insert(struct ocfs2_extent_tree *et, |
128 | struct ocfs2_extent_tree *et, | 159 | struct ocfs2_extent_rec *rec); |
160 | static void ocfs2_dinode_extent_map_truncate(struct ocfs2_extent_tree *et, | ||
161 | u32 clusters); | ||
162 | static int ocfs2_dinode_insert_check(struct ocfs2_extent_tree *et, | ||
129 | struct ocfs2_extent_rec *rec); | 163 | struct ocfs2_extent_rec *rec); |
130 | static int ocfs2_dinode_sanity_check(struct inode *inode, | 164 | static int ocfs2_dinode_sanity_check(struct ocfs2_extent_tree *et); |
131 | struct ocfs2_extent_tree *et); | ||
132 | static void ocfs2_dinode_fill_root_el(struct ocfs2_extent_tree *et); | 165 | static void ocfs2_dinode_fill_root_el(struct ocfs2_extent_tree *et); |
133 | static struct ocfs2_extent_tree_operations ocfs2_dinode_et_ops = { | 166 | static struct ocfs2_extent_tree_operations ocfs2_dinode_et_ops = { |
134 | .eo_set_last_eb_blk = ocfs2_dinode_set_last_eb_blk, | 167 | .eo_set_last_eb_blk = ocfs2_dinode_set_last_eb_blk, |
135 | .eo_get_last_eb_blk = ocfs2_dinode_get_last_eb_blk, | 168 | .eo_get_last_eb_blk = ocfs2_dinode_get_last_eb_blk, |
136 | .eo_update_clusters = ocfs2_dinode_update_clusters, | 169 | .eo_update_clusters = ocfs2_dinode_update_clusters, |
170 | .eo_extent_map_insert = ocfs2_dinode_extent_map_insert, | ||
171 | .eo_extent_map_truncate = ocfs2_dinode_extent_map_truncate, | ||
137 | .eo_insert_check = ocfs2_dinode_insert_check, | 172 | .eo_insert_check = ocfs2_dinode_insert_check, |
138 | .eo_sanity_check = ocfs2_dinode_sanity_check, | 173 | .eo_sanity_check = ocfs2_dinode_sanity_check, |
139 | .eo_fill_root_el = ocfs2_dinode_fill_root_el, | 174 | .eo_fill_root_el = ocfs2_dinode_fill_root_el, |
@@ -156,40 +191,53 @@ static u64 ocfs2_dinode_get_last_eb_blk(struct ocfs2_extent_tree *et) | |||
156 | return le64_to_cpu(di->i_last_eb_blk); | 191 | return le64_to_cpu(di->i_last_eb_blk); |
157 | } | 192 | } |
158 | 193 | ||
159 | static void ocfs2_dinode_update_clusters(struct inode *inode, | 194 | static void ocfs2_dinode_update_clusters(struct ocfs2_extent_tree *et, |
160 | struct ocfs2_extent_tree *et, | ||
161 | u32 clusters) | 195 | u32 clusters) |
162 | { | 196 | { |
197 | struct ocfs2_inode_info *oi = cache_info_to_inode(et->et_ci); | ||
163 | struct ocfs2_dinode *di = et->et_object; | 198 | struct ocfs2_dinode *di = et->et_object; |
164 | 199 | ||
165 | le32_add_cpu(&di->i_clusters, clusters); | 200 | le32_add_cpu(&di->i_clusters, clusters); |
166 | spin_lock(&OCFS2_I(inode)->ip_lock); | 201 | spin_lock(&oi->ip_lock); |
167 | OCFS2_I(inode)->ip_clusters = le32_to_cpu(di->i_clusters); | 202 | oi->ip_clusters = le32_to_cpu(di->i_clusters); |
168 | spin_unlock(&OCFS2_I(inode)->ip_lock); | 203 | spin_unlock(&oi->ip_lock); |
169 | } | 204 | } |
170 | 205 | ||
171 | static int ocfs2_dinode_insert_check(struct inode *inode, | 206 | static void ocfs2_dinode_extent_map_insert(struct ocfs2_extent_tree *et, |
172 | struct ocfs2_extent_tree *et, | 207 | struct ocfs2_extent_rec *rec) |
208 | { | ||
209 | struct inode *inode = &cache_info_to_inode(et->et_ci)->vfs_inode; | ||
210 | |||
211 | ocfs2_extent_map_insert_rec(inode, rec); | ||
212 | } | ||
213 | |||
214 | static void ocfs2_dinode_extent_map_truncate(struct ocfs2_extent_tree *et, | ||
215 | u32 clusters) | ||
216 | { | ||
217 | struct inode *inode = &cache_info_to_inode(et->et_ci)->vfs_inode; | ||
218 | |||
219 | ocfs2_extent_map_trunc(inode, clusters); | ||
220 | } | ||
221 | |||
222 | static int ocfs2_dinode_insert_check(struct ocfs2_extent_tree *et, | ||
173 | struct ocfs2_extent_rec *rec) | 223 | struct ocfs2_extent_rec *rec) |
174 | { | 224 | { |
175 | struct ocfs2_super *osb = OCFS2_SB(inode->i_sb); | 225 | struct ocfs2_inode_info *oi = cache_info_to_inode(et->et_ci); |
226 | struct ocfs2_super *osb = OCFS2_SB(oi->vfs_inode.i_sb); | ||
176 | 227 | ||
177 | BUG_ON(OCFS2_I(inode)->ip_dyn_features & OCFS2_INLINE_DATA_FL); | 228 | BUG_ON(oi->ip_dyn_features & OCFS2_INLINE_DATA_FL); |
178 | mlog_bug_on_msg(!ocfs2_sparse_alloc(osb) && | 229 | mlog_bug_on_msg(!ocfs2_sparse_alloc(osb) && |
179 | (OCFS2_I(inode)->ip_clusters != | 230 | (oi->ip_clusters != le32_to_cpu(rec->e_cpos)), |
180 | le32_to_cpu(rec->e_cpos)), | ||
181 | "Device %s, asking for sparse allocation: inode %llu, " | 231 | "Device %s, asking for sparse allocation: inode %llu, " |
182 | "cpos %u, clusters %u\n", | 232 | "cpos %u, clusters %u\n", |
183 | osb->dev_str, | 233 | osb->dev_str, |
184 | (unsigned long long)OCFS2_I(inode)->ip_blkno, | 234 | (unsigned long long)oi->ip_blkno, |
185 | rec->e_cpos, | 235 | rec->e_cpos, oi->ip_clusters); |
186 | OCFS2_I(inode)->ip_clusters); | ||
187 | 236 | ||
188 | return 0; | 237 | return 0; |
189 | } | 238 | } |
190 | 239 | ||
191 | static int ocfs2_dinode_sanity_check(struct inode *inode, | 240 | static int ocfs2_dinode_sanity_check(struct ocfs2_extent_tree *et) |
192 | struct ocfs2_extent_tree *et) | ||
193 | { | 241 | { |
194 | struct ocfs2_dinode *di = et->et_object; | 242 | struct ocfs2_dinode *di = et->et_object; |
195 | 243 | ||
@@ -229,8 +277,7 @@ static u64 ocfs2_xattr_value_get_last_eb_blk(struct ocfs2_extent_tree *et) | |||
229 | return le64_to_cpu(vb->vb_xv->xr_last_eb_blk); | 277 | return le64_to_cpu(vb->vb_xv->xr_last_eb_blk); |
230 | } | 278 | } |
231 | 279 | ||
232 | static void ocfs2_xattr_value_update_clusters(struct inode *inode, | 280 | static void ocfs2_xattr_value_update_clusters(struct ocfs2_extent_tree *et, |
233 | struct ocfs2_extent_tree *et, | ||
234 | u32 clusters) | 281 | u32 clusters) |
235 | { | 282 | { |
236 | struct ocfs2_xattr_value_buf *vb = et->et_object; | 283 | struct ocfs2_xattr_value_buf *vb = et->et_object; |
@@ -252,12 +299,11 @@ static void ocfs2_xattr_tree_fill_root_el(struct ocfs2_extent_tree *et) | |||
252 | et->et_root_el = &xb->xb_attrs.xb_root.xt_list; | 299 | et->et_root_el = &xb->xb_attrs.xb_root.xt_list; |
253 | } | 300 | } |
254 | 301 | ||
255 | static void ocfs2_xattr_tree_fill_max_leaf_clusters(struct inode *inode, | 302 | static void ocfs2_xattr_tree_fill_max_leaf_clusters(struct ocfs2_extent_tree *et) |
256 | struct ocfs2_extent_tree *et) | ||
257 | { | 303 | { |
304 | struct super_block *sb = ocfs2_metadata_cache_get_super(et->et_ci); | ||
258 | et->et_max_leaf_clusters = | 305 | et->et_max_leaf_clusters = |
259 | ocfs2_clusters_for_bytes(inode->i_sb, | 306 | ocfs2_clusters_for_bytes(sb, OCFS2_MAX_XATTR_TREE_LEAF_SIZE); |
260 | OCFS2_MAX_XATTR_TREE_LEAF_SIZE); | ||
261 | } | 307 | } |
262 | 308 | ||
263 | static void ocfs2_xattr_tree_set_last_eb_blk(struct ocfs2_extent_tree *et, | 309 | static void ocfs2_xattr_tree_set_last_eb_blk(struct ocfs2_extent_tree *et, |
@@ -277,8 +323,7 @@ static u64 ocfs2_xattr_tree_get_last_eb_blk(struct ocfs2_extent_tree *et) | |||
277 | return le64_to_cpu(xt->xt_last_eb_blk); | 323 | return le64_to_cpu(xt->xt_last_eb_blk); |
278 | } | 324 | } |
279 | 325 | ||
280 | static void ocfs2_xattr_tree_update_clusters(struct inode *inode, | 326 | static void ocfs2_xattr_tree_update_clusters(struct ocfs2_extent_tree *et, |
281 | struct ocfs2_extent_tree *et, | ||
282 | u32 clusters) | 327 | u32 clusters) |
283 | { | 328 | { |
284 | struct ocfs2_xattr_block *xb = et->et_object; | 329 | struct ocfs2_xattr_block *xb = et->et_object; |
@@ -309,8 +354,7 @@ static u64 ocfs2_dx_root_get_last_eb_blk(struct ocfs2_extent_tree *et) | |||
309 | return le64_to_cpu(dx_root->dr_last_eb_blk); | 354 | return le64_to_cpu(dx_root->dr_last_eb_blk); |
310 | } | 355 | } |
311 | 356 | ||
312 | static void ocfs2_dx_root_update_clusters(struct inode *inode, | 357 | static void ocfs2_dx_root_update_clusters(struct ocfs2_extent_tree *et, |
313 | struct ocfs2_extent_tree *et, | ||
314 | u32 clusters) | 358 | u32 clusters) |
315 | { | 359 | { |
316 | struct ocfs2_dx_root_block *dx_root = et->et_object; | 360 | struct ocfs2_dx_root_block *dx_root = et->et_object; |
@@ -318,8 +362,7 @@ static void ocfs2_dx_root_update_clusters(struct inode *inode, | |||
318 | le32_add_cpu(&dx_root->dr_clusters, clusters); | 362 | le32_add_cpu(&dx_root->dr_clusters, clusters); |
319 | } | 363 | } |
320 | 364 | ||
321 | static int ocfs2_dx_root_sanity_check(struct inode *inode, | 365 | static int ocfs2_dx_root_sanity_check(struct ocfs2_extent_tree *et) |
322 | struct ocfs2_extent_tree *et) | ||
323 | { | 366 | { |
324 | struct ocfs2_dx_root_block *dx_root = et->et_object; | 367 | struct ocfs2_dx_root_block *dx_root = et->et_object; |
325 | 368 | ||
@@ -343,8 +386,54 @@ static struct ocfs2_extent_tree_operations ocfs2_dx_root_et_ops = { | |||
343 | .eo_fill_root_el = ocfs2_dx_root_fill_root_el, | 386 | .eo_fill_root_el = ocfs2_dx_root_fill_root_el, |
344 | }; | 387 | }; |
345 | 388 | ||
389 | static void ocfs2_refcount_tree_fill_root_el(struct ocfs2_extent_tree *et) | ||
390 | { | ||
391 | struct ocfs2_refcount_block *rb = et->et_object; | ||
392 | |||
393 | et->et_root_el = &rb->rf_list; | ||
394 | } | ||
395 | |||
396 | static void ocfs2_refcount_tree_set_last_eb_blk(struct ocfs2_extent_tree *et, | ||
397 | u64 blkno) | ||
398 | { | ||
399 | struct ocfs2_refcount_block *rb = et->et_object; | ||
400 | |||
401 | rb->rf_last_eb_blk = cpu_to_le64(blkno); | ||
402 | } | ||
403 | |||
404 | static u64 ocfs2_refcount_tree_get_last_eb_blk(struct ocfs2_extent_tree *et) | ||
405 | { | ||
406 | struct ocfs2_refcount_block *rb = et->et_object; | ||
407 | |||
408 | return le64_to_cpu(rb->rf_last_eb_blk); | ||
409 | } | ||
410 | |||
411 | static void ocfs2_refcount_tree_update_clusters(struct ocfs2_extent_tree *et, | ||
412 | u32 clusters) | ||
413 | { | ||
414 | struct ocfs2_refcount_block *rb = et->et_object; | ||
415 | |||
416 | le32_add_cpu(&rb->rf_clusters, clusters); | ||
417 | } | ||
418 | |||
419 | static enum ocfs2_contig_type | ||
420 | ocfs2_refcount_tree_extent_contig(struct ocfs2_extent_tree *et, | ||
421 | struct ocfs2_extent_rec *ext, | ||
422 | struct ocfs2_extent_rec *insert_rec) | ||
423 | { | ||
424 | return CONTIG_NONE; | ||
425 | } | ||
426 | |||
427 | static struct ocfs2_extent_tree_operations ocfs2_refcount_tree_et_ops = { | ||
428 | .eo_set_last_eb_blk = ocfs2_refcount_tree_set_last_eb_blk, | ||
429 | .eo_get_last_eb_blk = ocfs2_refcount_tree_get_last_eb_blk, | ||
430 | .eo_update_clusters = ocfs2_refcount_tree_update_clusters, | ||
431 | .eo_fill_root_el = ocfs2_refcount_tree_fill_root_el, | ||
432 | .eo_extent_contig = ocfs2_refcount_tree_extent_contig, | ||
433 | }; | ||
434 | |||
346 | static void __ocfs2_init_extent_tree(struct ocfs2_extent_tree *et, | 435 | static void __ocfs2_init_extent_tree(struct ocfs2_extent_tree *et, |
347 | struct inode *inode, | 436 | struct ocfs2_caching_info *ci, |
348 | struct buffer_head *bh, | 437 | struct buffer_head *bh, |
349 | ocfs2_journal_access_func access, | 438 | ocfs2_journal_access_func access, |
350 | void *obj, | 439 | void *obj, |
@@ -352,6 +441,7 @@ static void __ocfs2_init_extent_tree(struct ocfs2_extent_tree *et, | |||
352 | { | 441 | { |
353 | et->et_ops = ops; | 442 | et->et_ops = ops; |
354 | et->et_root_bh = bh; | 443 | et->et_root_bh = bh; |
444 | et->et_ci = ci; | ||
355 | et->et_root_journal_access = access; | 445 | et->et_root_journal_access = access; |
356 | if (!obj) | 446 | if (!obj) |
357 | obj = (void *)bh->b_data; | 447 | obj = (void *)bh->b_data; |
@@ -361,41 +451,49 @@ static void __ocfs2_init_extent_tree(struct ocfs2_extent_tree *et, | |||
361 | if (!et->et_ops->eo_fill_max_leaf_clusters) | 451 | if (!et->et_ops->eo_fill_max_leaf_clusters) |
362 | et->et_max_leaf_clusters = 0; | 452 | et->et_max_leaf_clusters = 0; |
363 | else | 453 | else |
364 | et->et_ops->eo_fill_max_leaf_clusters(inode, et); | 454 | et->et_ops->eo_fill_max_leaf_clusters(et); |
365 | } | 455 | } |
366 | 456 | ||
367 | void ocfs2_init_dinode_extent_tree(struct ocfs2_extent_tree *et, | 457 | void ocfs2_init_dinode_extent_tree(struct ocfs2_extent_tree *et, |
368 | struct inode *inode, | 458 | struct ocfs2_caching_info *ci, |
369 | struct buffer_head *bh) | 459 | struct buffer_head *bh) |
370 | { | 460 | { |
371 | __ocfs2_init_extent_tree(et, inode, bh, ocfs2_journal_access_di, | 461 | __ocfs2_init_extent_tree(et, ci, bh, ocfs2_journal_access_di, |
372 | NULL, &ocfs2_dinode_et_ops); | 462 | NULL, &ocfs2_dinode_et_ops); |
373 | } | 463 | } |
374 | 464 | ||
375 | void ocfs2_init_xattr_tree_extent_tree(struct ocfs2_extent_tree *et, | 465 | void ocfs2_init_xattr_tree_extent_tree(struct ocfs2_extent_tree *et, |
376 | struct inode *inode, | 466 | struct ocfs2_caching_info *ci, |
377 | struct buffer_head *bh) | 467 | struct buffer_head *bh) |
378 | { | 468 | { |
379 | __ocfs2_init_extent_tree(et, inode, bh, ocfs2_journal_access_xb, | 469 | __ocfs2_init_extent_tree(et, ci, bh, ocfs2_journal_access_xb, |
380 | NULL, &ocfs2_xattr_tree_et_ops); | 470 | NULL, &ocfs2_xattr_tree_et_ops); |
381 | } | 471 | } |
382 | 472 | ||
383 | void ocfs2_init_xattr_value_extent_tree(struct ocfs2_extent_tree *et, | 473 | void ocfs2_init_xattr_value_extent_tree(struct ocfs2_extent_tree *et, |
384 | struct inode *inode, | 474 | struct ocfs2_caching_info *ci, |
385 | struct ocfs2_xattr_value_buf *vb) | 475 | struct ocfs2_xattr_value_buf *vb) |
386 | { | 476 | { |
387 | __ocfs2_init_extent_tree(et, inode, vb->vb_bh, vb->vb_access, vb, | 477 | __ocfs2_init_extent_tree(et, ci, vb->vb_bh, vb->vb_access, vb, |
388 | &ocfs2_xattr_value_et_ops); | 478 | &ocfs2_xattr_value_et_ops); |
389 | } | 479 | } |
390 | 480 | ||
391 | void ocfs2_init_dx_root_extent_tree(struct ocfs2_extent_tree *et, | 481 | void ocfs2_init_dx_root_extent_tree(struct ocfs2_extent_tree *et, |
392 | struct inode *inode, | 482 | struct ocfs2_caching_info *ci, |
393 | struct buffer_head *bh) | 483 | struct buffer_head *bh) |
394 | { | 484 | { |
395 | __ocfs2_init_extent_tree(et, inode, bh, ocfs2_journal_access_dr, | 485 | __ocfs2_init_extent_tree(et, ci, bh, ocfs2_journal_access_dr, |
396 | NULL, &ocfs2_dx_root_et_ops); | 486 | NULL, &ocfs2_dx_root_et_ops); |
397 | } | 487 | } |
398 | 488 | ||
489 | void ocfs2_init_refcount_extent_tree(struct ocfs2_extent_tree *et, | ||
490 | struct ocfs2_caching_info *ci, | ||
491 | struct buffer_head *bh) | ||
492 | { | ||
493 | __ocfs2_init_extent_tree(et, ci, bh, ocfs2_journal_access_rb, | ||
494 | NULL, &ocfs2_refcount_tree_et_ops); | ||
495 | } | ||
496 | |||
399 | static inline void ocfs2_et_set_last_eb_blk(struct ocfs2_extent_tree *et, | 497 | static inline void ocfs2_et_set_last_eb_blk(struct ocfs2_extent_tree *et, |
400 | u64 new_last_eb_blk) | 498 | u64 new_last_eb_blk) |
401 | { | 499 | { |
@@ -407,78 +505,71 @@ static inline u64 ocfs2_et_get_last_eb_blk(struct ocfs2_extent_tree *et) | |||
407 | return et->et_ops->eo_get_last_eb_blk(et); | 505 | return et->et_ops->eo_get_last_eb_blk(et); |
408 | } | 506 | } |
409 | 507 | ||
410 | static inline void ocfs2_et_update_clusters(struct inode *inode, | 508 | static inline void ocfs2_et_update_clusters(struct ocfs2_extent_tree *et, |
411 | struct ocfs2_extent_tree *et, | ||
412 | u32 clusters) | 509 | u32 clusters) |
413 | { | 510 | { |
414 | et->et_ops->eo_update_clusters(inode, et, clusters); | 511 | et->et_ops->eo_update_clusters(et, clusters); |
512 | } | ||
513 | |||
514 | static inline void ocfs2_et_extent_map_insert(struct ocfs2_extent_tree *et, | ||
515 | struct ocfs2_extent_rec *rec) | ||
516 | { | ||
517 | if (et->et_ops->eo_extent_map_insert) | ||
518 | et->et_ops->eo_extent_map_insert(et, rec); | ||
519 | } | ||
520 | |||
521 | static inline void ocfs2_et_extent_map_truncate(struct ocfs2_extent_tree *et, | ||
522 | u32 clusters) | ||
523 | { | ||
524 | if (et->et_ops->eo_extent_map_truncate) | ||
525 | et->et_ops->eo_extent_map_truncate(et, clusters); | ||
415 | } | 526 | } |
416 | 527 | ||
417 | static inline int ocfs2_et_root_journal_access(handle_t *handle, | 528 | static inline int ocfs2_et_root_journal_access(handle_t *handle, |
418 | struct inode *inode, | ||
419 | struct ocfs2_extent_tree *et, | 529 | struct ocfs2_extent_tree *et, |
420 | int type) | 530 | int type) |
421 | { | 531 | { |
422 | return et->et_root_journal_access(handle, inode, et->et_root_bh, | 532 | return et->et_root_journal_access(handle, et->et_ci, et->et_root_bh, |
423 | type); | 533 | type); |
424 | } | 534 | } |
425 | 535 | ||
426 | static inline int ocfs2_et_insert_check(struct inode *inode, | 536 | static inline enum ocfs2_contig_type |
427 | struct ocfs2_extent_tree *et, | 537 | ocfs2_et_extent_contig(struct ocfs2_extent_tree *et, |
538 | struct ocfs2_extent_rec *rec, | ||
539 | struct ocfs2_extent_rec *insert_rec) | ||
540 | { | ||
541 | if (et->et_ops->eo_extent_contig) | ||
542 | return et->et_ops->eo_extent_contig(et, rec, insert_rec); | ||
543 | |||
544 | return ocfs2_extent_rec_contig( | ||
545 | ocfs2_metadata_cache_get_super(et->et_ci), | ||
546 | rec, insert_rec); | ||
547 | } | ||
548 | |||
549 | static inline int ocfs2_et_insert_check(struct ocfs2_extent_tree *et, | ||
428 | struct ocfs2_extent_rec *rec) | 550 | struct ocfs2_extent_rec *rec) |
429 | { | 551 | { |
430 | int ret = 0; | 552 | int ret = 0; |
431 | 553 | ||
432 | if (et->et_ops->eo_insert_check) | 554 | if (et->et_ops->eo_insert_check) |
433 | ret = et->et_ops->eo_insert_check(inode, et, rec); | 555 | ret = et->et_ops->eo_insert_check(et, rec); |
434 | return ret; | 556 | return ret; |
435 | } | 557 | } |
436 | 558 | ||
437 | static inline int ocfs2_et_sanity_check(struct inode *inode, | 559 | static inline int ocfs2_et_sanity_check(struct ocfs2_extent_tree *et) |
438 | struct ocfs2_extent_tree *et) | ||
439 | { | 560 | { |
440 | int ret = 0; | 561 | int ret = 0; |
441 | 562 | ||
442 | if (et->et_ops->eo_sanity_check) | 563 | if (et->et_ops->eo_sanity_check) |
443 | ret = et->et_ops->eo_sanity_check(inode, et); | 564 | ret = et->et_ops->eo_sanity_check(et); |
444 | return ret; | 565 | return ret; |
445 | } | 566 | } |
446 | 567 | ||
447 | static void ocfs2_free_truncate_context(struct ocfs2_truncate_context *tc); | 568 | static void ocfs2_free_truncate_context(struct ocfs2_truncate_context *tc); |
448 | static int ocfs2_cache_extent_block_free(struct ocfs2_cached_dealloc_ctxt *ctxt, | 569 | static int ocfs2_cache_extent_block_free(struct ocfs2_cached_dealloc_ctxt *ctxt, |
449 | struct ocfs2_extent_block *eb); | 570 | struct ocfs2_extent_block *eb); |
450 | 571 | static void ocfs2_adjust_rightmost_records(handle_t *handle, | |
451 | /* | 572 | struct ocfs2_extent_tree *et, |
452 | * Structures which describe a path through a btree, and functions to | ||
453 | * manipulate them. | ||
454 | * | ||
455 | * The idea here is to be as generic as possible with the tree | ||
456 | * manipulation code. | ||
457 | */ | ||
458 | struct ocfs2_path_item { | ||
459 | struct buffer_head *bh; | ||
460 | struct ocfs2_extent_list *el; | ||
461 | }; | ||
462 | |||
463 | #define OCFS2_MAX_PATH_DEPTH 5 | ||
464 | |||
465 | struct ocfs2_path { | ||
466 | int p_tree_depth; | ||
467 | ocfs2_journal_access_func p_root_access; | ||
468 | struct ocfs2_path_item p_node[OCFS2_MAX_PATH_DEPTH]; | ||
469 | }; | ||
470 | |||
471 | #define path_root_bh(_path) ((_path)->p_node[0].bh) | ||
472 | #define path_root_el(_path) ((_path)->p_node[0].el) | ||
473 | #define path_root_access(_path)((_path)->p_root_access) | ||
474 | #define path_leaf_bh(_path) ((_path)->p_node[(_path)->p_tree_depth].bh) | ||
475 | #define path_leaf_el(_path) ((_path)->p_node[(_path)->p_tree_depth].el) | ||
476 | #define path_num_items(_path) ((_path)->p_tree_depth + 1) | ||
477 | |||
478 | static int ocfs2_find_path(struct inode *inode, struct ocfs2_path *path, | ||
479 | u32 cpos); | ||
480 | static void ocfs2_adjust_rightmost_records(struct inode *inode, | ||
481 | handle_t *handle, | ||
482 | struct ocfs2_path *path, | 573 | struct ocfs2_path *path, |
483 | struct ocfs2_extent_rec *insert_rec); | 574 | struct ocfs2_extent_rec *insert_rec); |
484 | /* | 575 | /* |
@@ -486,7 +577,7 @@ static void ocfs2_adjust_rightmost_records(struct inode *inode, | |||
486 | * to build another path. Generally, this involves freeing the buffer | 577 | * to build another path. Generally, this involves freeing the buffer |
487 | * heads. | 578 | * heads. |
488 | */ | 579 | */ |
489 | static void ocfs2_reinit_path(struct ocfs2_path *path, int keep_root) | 580 | void ocfs2_reinit_path(struct ocfs2_path *path, int keep_root) |
490 | { | 581 | { |
491 | int i, start = 0, depth = 0; | 582 | int i, start = 0, depth = 0; |
492 | struct ocfs2_path_item *node; | 583 | struct ocfs2_path_item *node; |
@@ -515,7 +606,7 @@ static void ocfs2_reinit_path(struct ocfs2_path *path, int keep_root) | |||
515 | path->p_tree_depth = depth; | 606 | path->p_tree_depth = depth; |
516 | } | 607 | } |
517 | 608 | ||
518 | static void ocfs2_free_path(struct ocfs2_path *path) | 609 | void ocfs2_free_path(struct ocfs2_path *path) |
519 | { | 610 | { |
520 | if (path) { | 611 | if (path) { |
521 | ocfs2_reinit_path(path, 0); | 612 | ocfs2_reinit_path(path, 0); |
@@ -613,13 +704,13 @@ static struct ocfs2_path *ocfs2_new_path(struct buffer_head *root_bh, | |||
613 | return path; | 704 | return path; |
614 | } | 705 | } |
615 | 706 | ||
616 | static struct ocfs2_path *ocfs2_new_path_from_path(struct ocfs2_path *path) | 707 | struct ocfs2_path *ocfs2_new_path_from_path(struct ocfs2_path *path) |
617 | { | 708 | { |
618 | return ocfs2_new_path(path_root_bh(path), path_root_el(path), | 709 | return ocfs2_new_path(path_root_bh(path), path_root_el(path), |
619 | path_root_access(path)); | 710 | path_root_access(path)); |
620 | } | 711 | } |
621 | 712 | ||
622 | static struct ocfs2_path *ocfs2_new_path_from_et(struct ocfs2_extent_tree *et) | 713 | struct ocfs2_path *ocfs2_new_path_from_et(struct ocfs2_extent_tree *et) |
623 | { | 714 | { |
624 | return ocfs2_new_path(et->et_root_bh, et->et_root_el, | 715 | return ocfs2_new_path(et->et_root_bh, et->et_root_el, |
625 | et->et_root_journal_access); | 716 | et->et_root_journal_access); |
@@ -632,10 +723,10 @@ static struct ocfs2_path *ocfs2_new_path_from_et(struct ocfs2_extent_tree *et) | |||
632 | * I don't like the way this function's name looks next to | 723 | * I don't like the way this function's name looks next to |
633 | * ocfs2_journal_access_path(), but I don't have a better one. | 724 | * ocfs2_journal_access_path(), but I don't have a better one. |
634 | */ | 725 | */ |
635 | static int ocfs2_path_bh_journal_access(handle_t *handle, | 726 | int ocfs2_path_bh_journal_access(handle_t *handle, |
636 | struct inode *inode, | 727 | struct ocfs2_caching_info *ci, |
637 | struct ocfs2_path *path, | 728 | struct ocfs2_path *path, |
638 | int idx) | 729 | int idx) |
639 | { | 730 | { |
640 | ocfs2_journal_access_func access = path_root_access(path); | 731 | ocfs2_journal_access_func access = path_root_access(path); |
641 | 732 | ||
@@ -645,15 +736,16 @@ static int ocfs2_path_bh_journal_access(handle_t *handle, | |||
645 | if (idx) | 736 | if (idx) |
646 | access = ocfs2_journal_access_eb; | 737 | access = ocfs2_journal_access_eb; |
647 | 738 | ||
648 | return access(handle, inode, path->p_node[idx].bh, | 739 | return access(handle, ci, path->p_node[idx].bh, |
649 | OCFS2_JOURNAL_ACCESS_WRITE); | 740 | OCFS2_JOURNAL_ACCESS_WRITE); |
650 | } | 741 | } |
651 | 742 | ||
652 | /* | 743 | /* |
653 | * Convenience function to journal all components in a path. | 744 | * Convenience function to journal all components in a path. |
654 | */ | 745 | */ |
655 | static int ocfs2_journal_access_path(struct inode *inode, handle_t *handle, | 746 | int ocfs2_journal_access_path(struct ocfs2_caching_info *ci, |
656 | struct ocfs2_path *path) | 747 | handle_t *handle, |
748 | struct ocfs2_path *path) | ||
657 | { | 749 | { |
658 | int i, ret = 0; | 750 | int i, ret = 0; |
659 | 751 | ||
@@ -661,7 +753,7 @@ static int ocfs2_journal_access_path(struct inode *inode, handle_t *handle, | |||
661 | goto out; | 753 | goto out; |
662 | 754 | ||
663 | for(i = 0; i < path_num_items(path); i++) { | 755 | for(i = 0; i < path_num_items(path); i++) { |
664 | ret = ocfs2_path_bh_journal_access(handle, inode, path, i); | 756 | ret = ocfs2_path_bh_journal_access(handle, ci, path, i); |
665 | if (ret < 0) { | 757 | if (ret < 0) { |
666 | mlog_errno(ret); | 758 | mlog_errno(ret); |
667 | goto out; | 759 | goto out; |
@@ -702,17 +794,9 @@ int ocfs2_search_extent_list(struct ocfs2_extent_list *el, u32 v_cluster) | |||
702 | return ret; | 794 | return ret; |
703 | } | 795 | } |
704 | 796 | ||
705 | enum ocfs2_contig_type { | ||
706 | CONTIG_NONE = 0, | ||
707 | CONTIG_LEFT, | ||
708 | CONTIG_RIGHT, | ||
709 | CONTIG_LEFTRIGHT, | ||
710 | }; | ||
711 | |||
712 | |||
713 | /* | 797 | /* |
714 | * NOTE: ocfs2_block_extent_contig(), ocfs2_extents_adjacent() and | 798 | * NOTE: ocfs2_block_extent_contig(), ocfs2_extents_adjacent() and |
715 | * ocfs2_extent_contig only work properly against leaf nodes! | 799 | * ocfs2_extent_rec_contig only work properly against leaf nodes! |
716 | */ | 800 | */ |
717 | static int ocfs2_block_extent_contig(struct super_block *sb, | 801 | static int ocfs2_block_extent_contig(struct super_block *sb, |
718 | struct ocfs2_extent_rec *ext, | 802 | struct ocfs2_extent_rec *ext, |
@@ -738,9 +822,9 @@ static int ocfs2_extents_adjacent(struct ocfs2_extent_rec *left, | |||
738 | } | 822 | } |
739 | 823 | ||
740 | static enum ocfs2_contig_type | 824 | static enum ocfs2_contig_type |
741 | ocfs2_extent_contig(struct inode *inode, | 825 | ocfs2_extent_rec_contig(struct super_block *sb, |
742 | struct ocfs2_extent_rec *ext, | 826 | struct ocfs2_extent_rec *ext, |
743 | struct ocfs2_extent_rec *insert_rec) | 827 | struct ocfs2_extent_rec *insert_rec) |
744 | { | 828 | { |
745 | u64 blkno = le64_to_cpu(insert_rec->e_blkno); | 829 | u64 blkno = le64_to_cpu(insert_rec->e_blkno); |
746 | 830 | ||
@@ -753,12 +837,12 @@ static enum ocfs2_contig_type | |||
753 | return CONTIG_NONE; | 837 | return CONTIG_NONE; |
754 | 838 | ||
755 | if (ocfs2_extents_adjacent(ext, insert_rec) && | 839 | if (ocfs2_extents_adjacent(ext, insert_rec) && |
756 | ocfs2_block_extent_contig(inode->i_sb, ext, blkno)) | 840 | ocfs2_block_extent_contig(sb, ext, blkno)) |
757 | return CONTIG_RIGHT; | 841 | return CONTIG_RIGHT; |
758 | 842 | ||
759 | blkno = le64_to_cpu(ext->e_blkno); | 843 | blkno = le64_to_cpu(ext->e_blkno); |
760 | if (ocfs2_extents_adjacent(insert_rec, ext) && | 844 | if (ocfs2_extents_adjacent(insert_rec, ext) && |
761 | ocfs2_block_extent_contig(inode->i_sb, insert_rec, blkno)) | 845 | ocfs2_block_extent_contig(sb, insert_rec, blkno)) |
762 | return CONTIG_LEFT; | 846 | return CONTIG_LEFT; |
763 | 847 | ||
764 | return CONTIG_NONE; | 848 | return CONTIG_NONE; |
@@ -853,13 +937,13 @@ static int ocfs2_validate_extent_block(struct super_block *sb, | |||
853 | return 0; | 937 | return 0; |
854 | } | 938 | } |
855 | 939 | ||
856 | int ocfs2_read_extent_block(struct inode *inode, u64 eb_blkno, | 940 | int ocfs2_read_extent_block(struct ocfs2_caching_info *ci, u64 eb_blkno, |
857 | struct buffer_head **bh) | 941 | struct buffer_head **bh) |
858 | { | 942 | { |
859 | int rc; | 943 | int rc; |
860 | struct buffer_head *tmp = *bh; | 944 | struct buffer_head *tmp = *bh; |
861 | 945 | ||
862 | rc = ocfs2_read_block(inode, eb_blkno, &tmp, | 946 | rc = ocfs2_read_block(ci, eb_blkno, &tmp, |
863 | ocfs2_validate_extent_block); | 947 | ocfs2_validate_extent_block); |
864 | 948 | ||
865 | /* If ocfs2_read_block() got us a new bh, pass it up. */ | 949 | /* If ocfs2_read_block() got us a new bh, pass it up. */ |
@@ -874,7 +958,6 @@ int ocfs2_read_extent_block(struct inode *inode, u64 eb_blkno, | |||
874 | * How many free extents have we got before we need more meta data? | 958 | * How many free extents have we got before we need more meta data? |
875 | */ | 959 | */ |
876 | int ocfs2_num_free_extents(struct ocfs2_super *osb, | 960 | int ocfs2_num_free_extents(struct ocfs2_super *osb, |
877 | struct inode *inode, | ||
878 | struct ocfs2_extent_tree *et) | 961 | struct ocfs2_extent_tree *et) |
879 | { | 962 | { |
880 | int retval; | 963 | int retval; |
@@ -889,7 +972,8 @@ int ocfs2_num_free_extents(struct ocfs2_super *osb, | |||
889 | last_eb_blk = ocfs2_et_get_last_eb_blk(et); | 972 | last_eb_blk = ocfs2_et_get_last_eb_blk(et); |
890 | 973 | ||
891 | if (last_eb_blk) { | 974 | if (last_eb_blk) { |
892 | retval = ocfs2_read_extent_block(inode, last_eb_blk, &eb_bh); | 975 | retval = ocfs2_read_extent_block(et->et_ci, last_eb_blk, |
976 | &eb_bh); | ||
893 | if (retval < 0) { | 977 | if (retval < 0) { |
894 | mlog_errno(retval); | 978 | mlog_errno(retval); |
895 | goto bail; | 979 | goto bail; |
@@ -913,9 +997,8 @@ bail: | |||
913 | * sets h_signature, h_blkno, h_suballoc_bit, h_suballoc_slot, and | 997 | * sets h_signature, h_blkno, h_suballoc_bit, h_suballoc_slot, and |
914 | * l_count for you | 998 | * l_count for you |
915 | */ | 999 | */ |
916 | static int ocfs2_create_new_meta_bhs(struct ocfs2_super *osb, | 1000 | static int ocfs2_create_new_meta_bhs(handle_t *handle, |
917 | handle_t *handle, | 1001 | struct ocfs2_extent_tree *et, |
918 | struct inode *inode, | ||
919 | int wanted, | 1002 | int wanted, |
920 | struct ocfs2_alloc_context *meta_ac, | 1003 | struct ocfs2_alloc_context *meta_ac, |
921 | struct buffer_head *bhs[]) | 1004 | struct buffer_head *bhs[]) |
@@ -924,6 +1007,8 @@ static int ocfs2_create_new_meta_bhs(struct ocfs2_super *osb, | |||
924 | u16 suballoc_bit_start; | 1007 | u16 suballoc_bit_start; |
925 | u32 num_got; | 1008 | u32 num_got; |
926 | u64 first_blkno; | 1009 | u64 first_blkno; |
1010 | struct ocfs2_super *osb = | ||
1011 | OCFS2_SB(ocfs2_metadata_cache_get_super(et->et_ci)); | ||
927 | struct ocfs2_extent_block *eb; | 1012 | struct ocfs2_extent_block *eb; |
928 | 1013 | ||
929 | mlog_entry_void(); | 1014 | mlog_entry_void(); |
@@ -949,9 +1034,10 @@ static int ocfs2_create_new_meta_bhs(struct ocfs2_super *osb, | |||
949 | mlog_errno(status); | 1034 | mlog_errno(status); |
950 | goto bail; | 1035 | goto bail; |
951 | } | 1036 | } |
952 | ocfs2_set_new_buffer_uptodate(inode, bhs[i]); | 1037 | ocfs2_set_new_buffer_uptodate(et->et_ci, bhs[i]); |
953 | 1038 | ||
954 | status = ocfs2_journal_access_eb(handle, inode, bhs[i], | 1039 | status = ocfs2_journal_access_eb(handle, et->et_ci, |
1040 | bhs[i], | ||
955 | OCFS2_JOURNAL_ACCESS_CREATE); | 1041 | OCFS2_JOURNAL_ACCESS_CREATE); |
956 | if (status < 0) { | 1042 | if (status < 0) { |
957 | mlog_errno(status); | 1043 | mlog_errno(status); |
@@ -1023,7 +1109,6 @@ static inline u32 ocfs2_sum_rightmost_rec(struct ocfs2_extent_list *el) | |||
1023 | * extent block's rightmost record. | 1109 | * extent block's rightmost record. |
1024 | */ | 1110 | */ |
1025 | static int ocfs2_adjust_rightmost_branch(handle_t *handle, | 1111 | static int ocfs2_adjust_rightmost_branch(handle_t *handle, |
1026 | struct inode *inode, | ||
1027 | struct ocfs2_extent_tree *et) | 1112 | struct ocfs2_extent_tree *et) |
1028 | { | 1113 | { |
1029 | int status; | 1114 | int status; |
@@ -1037,7 +1122,7 @@ static int ocfs2_adjust_rightmost_branch(handle_t *handle, | |||
1037 | return status; | 1122 | return status; |
1038 | } | 1123 | } |
1039 | 1124 | ||
1040 | status = ocfs2_find_path(inode, path, UINT_MAX); | 1125 | status = ocfs2_find_path(et->et_ci, path, UINT_MAX); |
1041 | if (status < 0) { | 1126 | if (status < 0) { |
1042 | mlog_errno(status); | 1127 | mlog_errno(status); |
1043 | goto out; | 1128 | goto out; |
@@ -1050,7 +1135,7 @@ static int ocfs2_adjust_rightmost_branch(handle_t *handle, | |||
1050 | goto out; | 1135 | goto out; |
1051 | } | 1136 | } |
1052 | 1137 | ||
1053 | status = ocfs2_journal_access_path(inode, handle, path); | 1138 | status = ocfs2_journal_access_path(et->et_ci, handle, path); |
1054 | if (status < 0) { | 1139 | if (status < 0) { |
1055 | mlog_errno(status); | 1140 | mlog_errno(status); |
1056 | goto out; | 1141 | goto out; |
@@ -1059,7 +1144,7 @@ static int ocfs2_adjust_rightmost_branch(handle_t *handle, | |||
1059 | el = path_leaf_el(path); | 1144 | el = path_leaf_el(path); |
1060 | rec = &el->l_recs[le32_to_cpu(el->l_next_free_rec) - 1]; | 1145 | rec = &el->l_recs[le32_to_cpu(el->l_next_free_rec) - 1]; |
1061 | 1146 | ||
1062 | ocfs2_adjust_rightmost_records(inode, handle, path, rec); | 1147 | ocfs2_adjust_rightmost_records(handle, et, path, rec); |
1063 | 1148 | ||
1064 | out: | 1149 | out: |
1065 | ocfs2_free_path(path); | 1150 | ocfs2_free_path(path); |
@@ -1068,7 +1153,7 @@ out: | |||
1068 | 1153 | ||
1069 | /* | 1154 | /* |
1070 | * Add an entire tree branch to our inode. eb_bh is the extent block | 1155 | * Add an entire tree branch to our inode. eb_bh is the extent block |
1071 | * to start at, if we don't want to start the branch at the dinode | 1156 | * to start at, if we don't want to start the branch at the root |
1072 | * structure. | 1157 | * structure. |
1073 | * | 1158 | * |
1074 | * last_eb_bh is required as we have to update it's next_leaf pointer | 1159 | * last_eb_bh is required as we have to update it's next_leaf pointer |
@@ -1077,9 +1162,7 @@ out: | |||
1077 | * the new branch will be 'empty' in the sense that every block will | 1162 | * the new branch will be 'empty' in the sense that every block will |
1078 | * contain a single record with cluster count == 0. | 1163 | * contain a single record with cluster count == 0. |
1079 | */ | 1164 | */ |
1080 | static int ocfs2_add_branch(struct ocfs2_super *osb, | 1165 | static int ocfs2_add_branch(handle_t *handle, |
1081 | handle_t *handle, | ||
1082 | struct inode *inode, | ||
1083 | struct ocfs2_extent_tree *et, | 1166 | struct ocfs2_extent_tree *et, |
1084 | struct buffer_head *eb_bh, | 1167 | struct buffer_head *eb_bh, |
1085 | struct buffer_head **last_eb_bh, | 1168 | struct buffer_head **last_eb_bh, |
@@ -1123,7 +1206,7 @@ static int ocfs2_add_branch(struct ocfs2_super *osb, | |||
1123 | if (root_end > new_cpos) { | 1206 | if (root_end > new_cpos) { |
1124 | mlog(0, "adjust the cluster end from %u to %u\n", | 1207 | mlog(0, "adjust the cluster end from %u to %u\n", |
1125 | root_end, new_cpos); | 1208 | root_end, new_cpos); |
1126 | status = ocfs2_adjust_rightmost_branch(handle, inode, et); | 1209 | status = ocfs2_adjust_rightmost_branch(handle, et); |
1127 | if (status) { | 1210 | if (status) { |
1128 | mlog_errno(status); | 1211 | mlog_errno(status); |
1129 | goto bail; | 1212 | goto bail; |
@@ -1139,7 +1222,7 @@ static int ocfs2_add_branch(struct ocfs2_super *osb, | |||
1139 | goto bail; | 1222 | goto bail; |
1140 | } | 1223 | } |
1141 | 1224 | ||
1142 | status = ocfs2_create_new_meta_bhs(osb, handle, inode, new_blocks, | 1225 | status = ocfs2_create_new_meta_bhs(handle, et, new_blocks, |
1143 | meta_ac, new_eb_bhs); | 1226 | meta_ac, new_eb_bhs); |
1144 | if (status < 0) { | 1227 | if (status < 0) { |
1145 | mlog_errno(status); | 1228 | mlog_errno(status); |
@@ -1161,7 +1244,7 @@ static int ocfs2_add_branch(struct ocfs2_super *osb, | |||
1161 | BUG_ON(!OCFS2_IS_VALID_EXTENT_BLOCK(eb)); | 1244 | BUG_ON(!OCFS2_IS_VALID_EXTENT_BLOCK(eb)); |
1162 | eb_el = &eb->h_list; | 1245 | eb_el = &eb->h_list; |
1163 | 1246 | ||
1164 | status = ocfs2_journal_access_eb(handle, inode, bh, | 1247 | status = ocfs2_journal_access_eb(handle, et->et_ci, bh, |
1165 | OCFS2_JOURNAL_ACCESS_CREATE); | 1248 | OCFS2_JOURNAL_ACCESS_CREATE); |
1166 | if (status < 0) { | 1249 | if (status < 0) { |
1167 | mlog_errno(status); | 1250 | mlog_errno(status); |
@@ -1201,20 +1284,20 @@ static int ocfs2_add_branch(struct ocfs2_super *osb, | |||
1201 | * journal_dirty erroring as it won't unless we've aborted the | 1284 | * journal_dirty erroring as it won't unless we've aborted the |
1202 | * handle (in which case we would never be here) so reserving | 1285 | * handle (in which case we would never be here) so reserving |
1203 | * the write with journal_access is all we need to do. */ | 1286 | * the write with journal_access is all we need to do. */ |
1204 | status = ocfs2_journal_access_eb(handle, inode, *last_eb_bh, | 1287 | status = ocfs2_journal_access_eb(handle, et->et_ci, *last_eb_bh, |
1205 | OCFS2_JOURNAL_ACCESS_WRITE); | 1288 | OCFS2_JOURNAL_ACCESS_WRITE); |
1206 | if (status < 0) { | 1289 | if (status < 0) { |
1207 | mlog_errno(status); | 1290 | mlog_errno(status); |
1208 | goto bail; | 1291 | goto bail; |
1209 | } | 1292 | } |
1210 | status = ocfs2_et_root_journal_access(handle, inode, et, | 1293 | status = ocfs2_et_root_journal_access(handle, et, |
1211 | OCFS2_JOURNAL_ACCESS_WRITE); | 1294 | OCFS2_JOURNAL_ACCESS_WRITE); |
1212 | if (status < 0) { | 1295 | if (status < 0) { |
1213 | mlog_errno(status); | 1296 | mlog_errno(status); |
1214 | goto bail; | 1297 | goto bail; |
1215 | } | 1298 | } |
1216 | if (eb_bh) { | 1299 | if (eb_bh) { |
1217 | status = ocfs2_journal_access_eb(handle, inode, eb_bh, | 1300 | status = ocfs2_journal_access_eb(handle, et->et_ci, eb_bh, |
1218 | OCFS2_JOURNAL_ACCESS_WRITE); | 1301 | OCFS2_JOURNAL_ACCESS_WRITE); |
1219 | if (status < 0) { | 1302 | if (status < 0) { |
1220 | mlog_errno(status); | 1303 | mlog_errno(status); |
@@ -1274,9 +1357,7 @@ bail: | |||
1274 | * returns back the new extent block so you can add a branch to it | 1357 | * returns back the new extent block so you can add a branch to it |
1275 | * after this call. | 1358 | * after this call. |
1276 | */ | 1359 | */ |
1277 | static int ocfs2_shift_tree_depth(struct ocfs2_super *osb, | 1360 | static int ocfs2_shift_tree_depth(handle_t *handle, |
1278 | handle_t *handle, | ||
1279 | struct inode *inode, | ||
1280 | struct ocfs2_extent_tree *et, | 1361 | struct ocfs2_extent_tree *et, |
1281 | struct ocfs2_alloc_context *meta_ac, | 1362 | struct ocfs2_alloc_context *meta_ac, |
1282 | struct buffer_head **ret_new_eb_bh) | 1363 | struct buffer_head **ret_new_eb_bh) |
@@ -1290,7 +1371,7 @@ static int ocfs2_shift_tree_depth(struct ocfs2_super *osb, | |||
1290 | 1371 | ||
1291 | mlog_entry_void(); | 1372 | mlog_entry_void(); |
1292 | 1373 | ||
1293 | status = ocfs2_create_new_meta_bhs(osb, handle, inode, 1, meta_ac, | 1374 | status = ocfs2_create_new_meta_bhs(handle, et, 1, meta_ac, |
1294 | &new_eb_bh); | 1375 | &new_eb_bh); |
1295 | if (status < 0) { | 1376 | if (status < 0) { |
1296 | mlog_errno(status); | 1377 | mlog_errno(status); |
@@ -1304,7 +1385,7 @@ static int ocfs2_shift_tree_depth(struct ocfs2_super *osb, | |||
1304 | eb_el = &eb->h_list; | 1385 | eb_el = &eb->h_list; |
1305 | root_el = et->et_root_el; | 1386 | root_el = et->et_root_el; |
1306 | 1387 | ||
1307 | status = ocfs2_journal_access_eb(handle, inode, new_eb_bh, | 1388 | status = ocfs2_journal_access_eb(handle, et->et_ci, new_eb_bh, |
1308 | OCFS2_JOURNAL_ACCESS_CREATE); | 1389 | OCFS2_JOURNAL_ACCESS_CREATE); |
1309 | if (status < 0) { | 1390 | if (status < 0) { |
1310 | mlog_errno(status); | 1391 | mlog_errno(status); |
@@ -1323,7 +1404,7 @@ static int ocfs2_shift_tree_depth(struct ocfs2_super *osb, | |||
1323 | goto bail; | 1404 | goto bail; |
1324 | } | 1405 | } |
1325 | 1406 | ||
1326 | status = ocfs2_et_root_journal_access(handle, inode, et, | 1407 | status = ocfs2_et_root_journal_access(handle, et, |
1327 | OCFS2_JOURNAL_ACCESS_WRITE); | 1408 | OCFS2_JOURNAL_ACCESS_WRITE); |
1328 | if (status < 0) { | 1409 | if (status < 0) { |
1329 | mlog_errno(status); | 1410 | mlog_errno(status); |
@@ -1379,9 +1460,7 @@ bail: | |||
1379 | * | 1460 | * |
1380 | * return status < 0 indicates an error. | 1461 | * return status < 0 indicates an error. |
1381 | */ | 1462 | */ |
1382 | static int ocfs2_find_branch_target(struct ocfs2_super *osb, | 1463 | static int ocfs2_find_branch_target(struct ocfs2_extent_tree *et, |
1383 | struct inode *inode, | ||
1384 | struct ocfs2_extent_tree *et, | ||
1385 | struct buffer_head **target_bh) | 1464 | struct buffer_head **target_bh) |
1386 | { | 1465 | { |
1387 | int status = 0, i; | 1466 | int status = 0, i; |
@@ -1399,19 +1478,21 @@ static int ocfs2_find_branch_target(struct ocfs2_super *osb, | |||
1399 | 1478 | ||
1400 | while(le16_to_cpu(el->l_tree_depth) > 1) { | 1479 | while(le16_to_cpu(el->l_tree_depth) > 1) { |
1401 | if (le16_to_cpu(el->l_next_free_rec) == 0) { | 1480 | if (le16_to_cpu(el->l_next_free_rec) == 0) { |
1402 | ocfs2_error(inode->i_sb, "Dinode %llu has empty " | 1481 | ocfs2_error(ocfs2_metadata_cache_get_super(et->et_ci), |
1482 | "Owner %llu has empty " | ||
1403 | "extent list (next_free_rec == 0)", | 1483 | "extent list (next_free_rec == 0)", |
1404 | (unsigned long long)OCFS2_I(inode)->ip_blkno); | 1484 | (unsigned long long)ocfs2_metadata_cache_owner(et->et_ci)); |
1405 | status = -EIO; | 1485 | status = -EIO; |
1406 | goto bail; | 1486 | goto bail; |
1407 | } | 1487 | } |
1408 | i = le16_to_cpu(el->l_next_free_rec) - 1; | 1488 | i = le16_to_cpu(el->l_next_free_rec) - 1; |
1409 | blkno = le64_to_cpu(el->l_recs[i].e_blkno); | 1489 | blkno = le64_to_cpu(el->l_recs[i].e_blkno); |
1410 | if (!blkno) { | 1490 | if (!blkno) { |
1411 | ocfs2_error(inode->i_sb, "Dinode %llu has extent " | 1491 | ocfs2_error(ocfs2_metadata_cache_get_super(et->et_ci), |
1492 | "Owner %llu has extent " | ||
1412 | "list where extent # %d has no physical " | 1493 | "list where extent # %d has no physical " |
1413 | "block start", | 1494 | "block start", |
1414 | (unsigned long long)OCFS2_I(inode)->ip_blkno, i); | 1495 | (unsigned long long)ocfs2_metadata_cache_owner(et->et_ci), i); |
1415 | status = -EIO; | 1496 | status = -EIO; |
1416 | goto bail; | 1497 | goto bail; |
1417 | } | 1498 | } |
@@ -1419,7 +1500,7 @@ static int ocfs2_find_branch_target(struct ocfs2_super *osb, | |||
1419 | brelse(bh); | 1500 | brelse(bh); |
1420 | bh = NULL; | 1501 | bh = NULL; |
1421 | 1502 | ||
1422 | status = ocfs2_read_extent_block(inode, blkno, &bh); | 1503 | status = ocfs2_read_extent_block(et->et_ci, blkno, &bh); |
1423 | if (status < 0) { | 1504 | if (status < 0) { |
1424 | mlog_errno(status); | 1505 | mlog_errno(status); |
1425 | goto bail; | 1506 | goto bail; |
@@ -1460,20 +1541,18 @@ bail: | |||
1460 | * | 1541 | * |
1461 | * *last_eb_bh will be updated by ocfs2_add_branch(). | 1542 | * *last_eb_bh will be updated by ocfs2_add_branch(). |
1462 | */ | 1543 | */ |
1463 | static int ocfs2_grow_tree(struct inode *inode, handle_t *handle, | 1544 | static int ocfs2_grow_tree(handle_t *handle, struct ocfs2_extent_tree *et, |
1464 | struct ocfs2_extent_tree *et, int *final_depth, | 1545 | int *final_depth, struct buffer_head **last_eb_bh, |
1465 | struct buffer_head **last_eb_bh, | ||
1466 | struct ocfs2_alloc_context *meta_ac) | 1546 | struct ocfs2_alloc_context *meta_ac) |
1467 | { | 1547 | { |
1468 | int ret, shift; | 1548 | int ret, shift; |
1469 | struct ocfs2_extent_list *el = et->et_root_el; | 1549 | struct ocfs2_extent_list *el = et->et_root_el; |
1470 | int depth = le16_to_cpu(el->l_tree_depth); | 1550 | int depth = le16_to_cpu(el->l_tree_depth); |
1471 | struct ocfs2_super *osb = OCFS2_SB(inode->i_sb); | ||
1472 | struct buffer_head *bh = NULL; | 1551 | struct buffer_head *bh = NULL; |
1473 | 1552 | ||
1474 | BUG_ON(meta_ac == NULL); | 1553 | BUG_ON(meta_ac == NULL); |
1475 | 1554 | ||
1476 | shift = ocfs2_find_branch_target(osb, inode, et, &bh); | 1555 | shift = ocfs2_find_branch_target(et, &bh); |
1477 | if (shift < 0) { | 1556 | if (shift < 0) { |
1478 | ret = shift; | 1557 | ret = shift; |
1479 | mlog_errno(ret); | 1558 | mlog_errno(ret); |
@@ -1490,8 +1569,7 @@ static int ocfs2_grow_tree(struct inode *inode, handle_t *handle, | |||
1490 | /* ocfs2_shift_tree_depth will return us a buffer with | 1569 | /* ocfs2_shift_tree_depth will return us a buffer with |
1491 | * the new extent block (so we can pass that to | 1570 | * the new extent block (so we can pass that to |
1492 | * ocfs2_add_branch). */ | 1571 | * ocfs2_add_branch). */ |
1493 | ret = ocfs2_shift_tree_depth(osb, handle, inode, et, | 1572 | ret = ocfs2_shift_tree_depth(handle, et, meta_ac, &bh); |
1494 | meta_ac, &bh); | ||
1495 | if (ret < 0) { | 1573 | if (ret < 0) { |
1496 | mlog_errno(ret); | 1574 | mlog_errno(ret); |
1497 | goto out; | 1575 | goto out; |
@@ -1517,7 +1595,7 @@ static int ocfs2_grow_tree(struct inode *inode, handle_t *handle, | |||
1517 | /* call ocfs2_add_branch to add the final part of the tree with | 1595 | /* call ocfs2_add_branch to add the final part of the tree with |
1518 | * the new data. */ | 1596 | * the new data. */ |
1519 | mlog(0, "add branch. bh = %p\n", bh); | 1597 | mlog(0, "add branch. bh = %p\n", bh); |
1520 | ret = ocfs2_add_branch(osb, handle, inode, et, bh, last_eb_bh, | 1598 | ret = ocfs2_add_branch(handle, et, bh, last_eb_bh, |
1521 | meta_ac); | 1599 | meta_ac); |
1522 | if (ret < 0) { | 1600 | if (ret < 0) { |
1523 | mlog_errno(ret); | 1601 | mlog_errno(ret); |
@@ -1687,7 +1765,7 @@ set_and_inc: | |||
1687 | * | 1765 | * |
1688 | * The array index of the subtree root is passed back. | 1766 | * The array index of the subtree root is passed back. |
1689 | */ | 1767 | */ |
1690 | static int ocfs2_find_subtree_root(struct inode *inode, | 1768 | static int ocfs2_find_subtree_root(struct ocfs2_extent_tree *et, |
1691 | struct ocfs2_path *left, | 1769 | struct ocfs2_path *left, |
1692 | struct ocfs2_path *right) | 1770 | struct ocfs2_path *right) |
1693 | { | 1771 | { |
@@ -1705,10 +1783,10 @@ static int ocfs2_find_subtree_root(struct inode *inode, | |||
1705 | * The caller didn't pass two adjacent paths. | 1783 | * The caller didn't pass two adjacent paths. |
1706 | */ | 1784 | */ |
1707 | mlog_bug_on_msg(i > left->p_tree_depth, | 1785 | mlog_bug_on_msg(i > left->p_tree_depth, |
1708 | "Inode %lu, left depth %u, right depth %u\n" | 1786 | "Owner %llu, left depth %u, right depth %u\n" |
1709 | "left leaf blk %llu, right leaf blk %llu\n", | 1787 | "left leaf blk %llu, right leaf blk %llu\n", |
1710 | inode->i_ino, left->p_tree_depth, | 1788 | (unsigned long long)ocfs2_metadata_cache_owner(et->et_ci), |
1711 | right->p_tree_depth, | 1789 | left->p_tree_depth, right->p_tree_depth, |
1712 | (unsigned long long)path_leaf_bh(left)->b_blocknr, | 1790 | (unsigned long long)path_leaf_bh(left)->b_blocknr, |
1713 | (unsigned long long)path_leaf_bh(right)->b_blocknr); | 1791 | (unsigned long long)path_leaf_bh(right)->b_blocknr); |
1714 | } while (left->p_node[i].bh->b_blocknr == | 1792 | } while (left->p_node[i].bh->b_blocknr == |
@@ -1725,7 +1803,7 @@ typedef void (path_insert_t)(void *, struct buffer_head *); | |||
1725 | * This code can be called with a cpos larger than the tree, in which | 1803 | * This code can be called with a cpos larger than the tree, in which |
1726 | * case it will return the rightmost path. | 1804 | * case it will return the rightmost path. |
1727 | */ | 1805 | */ |
1728 | static int __ocfs2_find_path(struct inode *inode, | 1806 | static int __ocfs2_find_path(struct ocfs2_caching_info *ci, |
1729 | struct ocfs2_extent_list *root_el, u32 cpos, | 1807 | struct ocfs2_extent_list *root_el, u32 cpos, |
1730 | path_insert_t *func, void *data) | 1808 | path_insert_t *func, void *data) |
1731 | { | 1809 | { |
@@ -1736,15 +1814,14 @@ static int __ocfs2_find_path(struct inode *inode, | |||
1736 | struct ocfs2_extent_block *eb; | 1814 | struct ocfs2_extent_block *eb; |
1737 | struct ocfs2_extent_list *el; | 1815 | struct ocfs2_extent_list *el; |
1738 | struct ocfs2_extent_rec *rec; | 1816 | struct ocfs2_extent_rec *rec; |
1739 | struct ocfs2_inode_info *oi = OCFS2_I(inode); | ||
1740 | 1817 | ||
1741 | el = root_el; | 1818 | el = root_el; |
1742 | while (el->l_tree_depth) { | 1819 | while (el->l_tree_depth) { |
1743 | if (le16_to_cpu(el->l_next_free_rec) == 0) { | 1820 | if (le16_to_cpu(el->l_next_free_rec) == 0) { |
1744 | ocfs2_error(inode->i_sb, | 1821 | ocfs2_error(ocfs2_metadata_cache_get_super(ci), |
1745 | "Inode %llu has empty extent list at " | 1822 | "Owner %llu has empty extent list at " |
1746 | "depth %u\n", | 1823 | "depth %u\n", |
1747 | (unsigned long long)oi->ip_blkno, | 1824 | (unsigned long long)ocfs2_metadata_cache_owner(ci), |
1748 | le16_to_cpu(el->l_tree_depth)); | 1825 | le16_to_cpu(el->l_tree_depth)); |
1749 | ret = -EROFS; | 1826 | ret = -EROFS; |
1750 | goto out; | 1827 | goto out; |
@@ -1767,10 +1844,10 @@ static int __ocfs2_find_path(struct inode *inode, | |||
1767 | 1844 | ||
1768 | blkno = le64_to_cpu(el->l_recs[i].e_blkno); | 1845 | blkno = le64_to_cpu(el->l_recs[i].e_blkno); |
1769 | if (blkno == 0) { | 1846 | if (blkno == 0) { |
1770 | ocfs2_error(inode->i_sb, | 1847 | ocfs2_error(ocfs2_metadata_cache_get_super(ci), |
1771 | "Inode %llu has bad blkno in extent list " | 1848 | "Owner %llu has bad blkno in extent list " |
1772 | "at depth %u (index %d)\n", | 1849 | "at depth %u (index %d)\n", |
1773 | (unsigned long long)oi->ip_blkno, | 1850 | (unsigned long long)ocfs2_metadata_cache_owner(ci), |
1774 | le16_to_cpu(el->l_tree_depth), i); | 1851 | le16_to_cpu(el->l_tree_depth), i); |
1775 | ret = -EROFS; | 1852 | ret = -EROFS; |
1776 | goto out; | 1853 | goto out; |
@@ -1778,7 +1855,7 @@ static int __ocfs2_find_path(struct inode *inode, | |||
1778 | 1855 | ||
1779 | brelse(bh); | 1856 | brelse(bh); |
1780 | bh = NULL; | 1857 | bh = NULL; |
1781 | ret = ocfs2_read_extent_block(inode, blkno, &bh); | 1858 | ret = ocfs2_read_extent_block(ci, blkno, &bh); |
1782 | if (ret) { | 1859 | if (ret) { |
1783 | mlog_errno(ret); | 1860 | mlog_errno(ret); |
1784 | goto out; | 1861 | goto out; |
@@ -1789,10 +1866,10 @@ static int __ocfs2_find_path(struct inode *inode, | |||
1789 | 1866 | ||
1790 | if (le16_to_cpu(el->l_next_free_rec) > | 1867 | if (le16_to_cpu(el->l_next_free_rec) > |
1791 | le16_to_cpu(el->l_count)) { | 1868 | le16_to_cpu(el->l_count)) { |
1792 | ocfs2_error(inode->i_sb, | 1869 | ocfs2_error(ocfs2_metadata_cache_get_super(ci), |
1793 | "Inode %llu has bad count in extent list " | 1870 | "Owner %llu has bad count in extent list " |
1794 | "at block %llu (next free=%u, count=%u)\n", | 1871 | "at block %llu (next free=%u, count=%u)\n", |
1795 | (unsigned long long)oi->ip_blkno, | 1872 | (unsigned long long)ocfs2_metadata_cache_owner(ci), |
1796 | (unsigned long long)bh->b_blocknr, | 1873 | (unsigned long long)bh->b_blocknr, |
1797 | le16_to_cpu(el->l_next_free_rec), | 1874 | le16_to_cpu(el->l_next_free_rec), |
1798 | le16_to_cpu(el->l_count)); | 1875 | le16_to_cpu(el->l_count)); |
@@ -1836,14 +1913,14 @@ static void find_path_ins(void *data, struct buffer_head *bh) | |||
1836 | ocfs2_path_insert_eb(fp->path, fp->index, bh); | 1913 | ocfs2_path_insert_eb(fp->path, fp->index, bh); |
1837 | fp->index++; | 1914 | fp->index++; |
1838 | } | 1915 | } |
1839 | static int ocfs2_find_path(struct inode *inode, struct ocfs2_path *path, | 1916 | int ocfs2_find_path(struct ocfs2_caching_info *ci, |
1840 | u32 cpos) | 1917 | struct ocfs2_path *path, u32 cpos) |
1841 | { | 1918 | { |
1842 | struct find_path_data data; | 1919 | struct find_path_data data; |
1843 | 1920 | ||
1844 | data.index = 1; | 1921 | data.index = 1; |
1845 | data.path = path; | 1922 | data.path = path; |
1846 | return __ocfs2_find_path(inode, path_root_el(path), cpos, | 1923 | return __ocfs2_find_path(ci, path_root_el(path), cpos, |
1847 | find_path_ins, &data); | 1924 | find_path_ins, &data); |
1848 | } | 1925 | } |
1849 | 1926 | ||
@@ -1868,13 +1945,14 @@ static void find_leaf_ins(void *data, struct buffer_head *bh) | |||
1868 | * | 1945 | * |
1869 | * This function doesn't handle non btree extent lists. | 1946 | * This function doesn't handle non btree extent lists. |
1870 | */ | 1947 | */ |
1871 | int ocfs2_find_leaf(struct inode *inode, struct ocfs2_extent_list *root_el, | 1948 | int ocfs2_find_leaf(struct ocfs2_caching_info *ci, |
1872 | u32 cpos, struct buffer_head **leaf_bh) | 1949 | struct ocfs2_extent_list *root_el, u32 cpos, |
1950 | struct buffer_head **leaf_bh) | ||
1873 | { | 1951 | { |
1874 | int ret; | 1952 | int ret; |
1875 | struct buffer_head *bh = NULL; | 1953 | struct buffer_head *bh = NULL; |
1876 | 1954 | ||
1877 | ret = __ocfs2_find_path(inode, root_el, cpos, find_leaf_ins, &bh); | 1955 | ret = __ocfs2_find_path(ci, root_el, cpos, find_leaf_ins, &bh); |
1878 | if (ret) { | 1956 | if (ret) { |
1879 | mlog_errno(ret); | 1957 | mlog_errno(ret); |
1880 | goto out; | 1958 | goto out; |
@@ -1980,7 +2058,7 @@ static void ocfs2_adjust_root_records(struct ocfs2_extent_list *root_el, | |||
1980 | * - When we've adjusted the last extent record in the left path leaf and the | 2058 | * - When we've adjusted the last extent record in the left path leaf and the |
1981 | * 1st extent record in the right path leaf during cross extent block merge. | 2059 | * 1st extent record in the right path leaf during cross extent block merge. |
1982 | */ | 2060 | */ |
1983 | static void ocfs2_complete_edge_insert(struct inode *inode, handle_t *handle, | 2061 | static void ocfs2_complete_edge_insert(handle_t *handle, |
1984 | struct ocfs2_path *left_path, | 2062 | struct ocfs2_path *left_path, |
1985 | struct ocfs2_path *right_path, | 2063 | struct ocfs2_path *right_path, |
1986 | int subtree_index) | 2064 | int subtree_index) |
@@ -2058,8 +2136,8 @@ static void ocfs2_complete_edge_insert(struct inode *inode, handle_t *handle, | |||
2058 | mlog_errno(ret); | 2136 | mlog_errno(ret); |
2059 | } | 2137 | } |
2060 | 2138 | ||
2061 | static int ocfs2_rotate_subtree_right(struct inode *inode, | 2139 | static int ocfs2_rotate_subtree_right(handle_t *handle, |
2062 | handle_t *handle, | 2140 | struct ocfs2_extent_tree *et, |
2063 | struct ocfs2_path *left_path, | 2141 | struct ocfs2_path *left_path, |
2064 | struct ocfs2_path *right_path, | 2142 | struct ocfs2_path *right_path, |
2065 | int subtree_index) | 2143 | int subtree_index) |
@@ -2075,10 +2153,10 @@ static int ocfs2_rotate_subtree_right(struct inode *inode, | |||
2075 | left_el = path_leaf_el(left_path); | 2153 | left_el = path_leaf_el(left_path); |
2076 | 2154 | ||
2077 | if (left_el->l_next_free_rec != left_el->l_count) { | 2155 | if (left_el->l_next_free_rec != left_el->l_count) { |
2078 | ocfs2_error(inode->i_sb, | 2156 | ocfs2_error(ocfs2_metadata_cache_get_super(et->et_ci), |
2079 | "Inode %llu has non-full interior leaf node %llu" | 2157 | "Inode %llu has non-full interior leaf node %llu" |
2080 | "(next free = %u)", | 2158 | "(next free = %u)", |
2081 | (unsigned long long)OCFS2_I(inode)->ip_blkno, | 2159 | (unsigned long long)ocfs2_metadata_cache_owner(et->et_ci), |
2082 | (unsigned long long)left_leaf_bh->b_blocknr, | 2160 | (unsigned long long)left_leaf_bh->b_blocknr, |
2083 | le16_to_cpu(left_el->l_next_free_rec)); | 2161 | le16_to_cpu(left_el->l_next_free_rec)); |
2084 | return -EROFS; | 2162 | return -EROFS; |
@@ -2094,7 +2172,7 @@ static int ocfs2_rotate_subtree_right(struct inode *inode, | |||
2094 | root_bh = left_path->p_node[subtree_index].bh; | 2172 | root_bh = left_path->p_node[subtree_index].bh; |
2095 | BUG_ON(root_bh != right_path->p_node[subtree_index].bh); | 2173 | BUG_ON(root_bh != right_path->p_node[subtree_index].bh); |
2096 | 2174 | ||
2097 | ret = ocfs2_path_bh_journal_access(handle, inode, right_path, | 2175 | ret = ocfs2_path_bh_journal_access(handle, et->et_ci, right_path, |
2098 | subtree_index); | 2176 | subtree_index); |
2099 | if (ret) { | 2177 | if (ret) { |
2100 | mlog_errno(ret); | 2178 | mlog_errno(ret); |
@@ -2102,14 +2180,14 @@ static int ocfs2_rotate_subtree_right(struct inode *inode, | |||
2102 | } | 2180 | } |
2103 | 2181 | ||
2104 | for(i = subtree_index + 1; i < path_num_items(right_path); i++) { | 2182 | for(i = subtree_index + 1; i < path_num_items(right_path); i++) { |
2105 | ret = ocfs2_path_bh_journal_access(handle, inode, | 2183 | ret = ocfs2_path_bh_journal_access(handle, et->et_ci, |
2106 | right_path, i); | 2184 | right_path, i); |
2107 | if (ret) { | 2185 | if (ret) { |
2108 | mlog_errno(ret); | 2186 | mlog_errno(ret); |
2109 | goto out; | 2187 | goto out; |
2110 | } | 2188 | } |
2111 | 2189 | ||
2112 | ret = ocfs2_path_bh_journal_access(handle, inode, | 2190 | ret = ocfs2_path_bh_journal_access(handle, et->et_ci, |
2113 | left_path, i); | 2191 | left_path, i); |
2114 | if (ret) { | 2192 | if (ret) { |
2115 | mlog_errno(ret); | 2193 | mlog_errno(ret); |
@@ -2123,7 +2201,7 @@ static int ocfs2_rotate_subtree_right(struct inode *inode, | |||
2123 | /* This is a code error, not a disk corruption. */ | 2201 | /* This is a code error, not a disk corruption. */ |
2124 | mlog_bug_on_msg(!right_el->l_next_free_rec, "Inode %llu: Rotate fails " | 2202 | mlog_bug_on_msg(!right_el->l_next_free_rec, "Inode %llu: Rotate fails " |
2125 | "because rightmost leaf block %llu is empty\n", | 2203 | "because rightmost leaf block %llu is empty\n", |
2126 | (unsigned long long)OCFS2_I(inode)->ip_blkno, | 2204 | (unsigned long long)ocfs2_metadata_cache_owner(et->et_ci), |
2127 | (unsigned long long)right_leaf_bh->b_blocknr); | 2205 | (unsigned long long)right_leaf_bh->b_blocknr); |
2128 | 2206 | ||
2129 | ocfs2_create_empty_extent(right_el); | 2207 | ocfs2_create_empty_extent(right_el); |
@@ -2157,8 +2235,8 @@ static int ocfs2_rotate_subtree_right(struct inode *inode, | |||
2157 | goto out; | 2235 | goto out; |
2158 | } | 2236 | } |
2159 | 2237 | ||
2160 | ocfs2_complete_edge_insert(inode, handle, left_path, right_path, | 2238 | ocfs2_complete_edge_insert(handle, left_path, right_path, |
2161 | subtree_index); | 2239 | subtree_index); |
2162 | 2240 | ||
2163 | out: | 2241 | out: |
2164 | return ret; | 2242 | return ret; |
@@ -2248,10 +2326,18 @@ static int ocfs2_extend_rotate_transaction(handle_t *handle, int subtree_depth, | |||
2248 | int op_credits, | 2326 | int op_credits, |
2249 | struct ocfs2_path *path) | 2327 | struct ocfs2_path *path) |
2250 | { | 2328 | { |
2329 | int ret; | ||
2251 | int credits = (path->p_tree_depth - subtree_depth) * 2 + 1 + op_credits; | 2330 | int credits = (path->p_tree_depth - subtree_depth) * 2 + 1 + op_credits; |
2252 | 2331 | ||
2253 | if (handle->h_buffer_credits < credits) | 2332 | if (handle->h_buffer_credits < credits) { |
2254 | return ocfs2_extend_trans(handle, credits); | 2333 | ret = ocfs2_extend_trans(handle, |
2334 | credits - handle->h_buffer_credits); | ||
2335 | if (ret) | ||
2336 | return ret; | ||
2337 | |||
2338 | if (unlikely(handle->h_buffer_credits < credits)) | ||
2339 | return ocfs2_extend_trans(handle, credits); | ||
2340 | } | ||
2255 | 2341 | ||
2256 | return 0; | 2342 | return 0; |
2257 | } | 2343 | } |
@@ -2321,8 +2407,8 @@ static int ocfs2_leftmost_rec_contains(struct ocfs2_extent_list *el, u32 cpos) | |||
2321 | * *ret_left_path will contain a valid path which can be passed to | 2407 | * *ret_left_path will contain a valid path which can be passed to |
2322 | * ocfs2_insert_path(). | 2408 | * ocfs2_insert_path(). |
2323 | */ | 2409 | */ |
2324 | static int ocfs2_rotate_tree_right(struct inode *inode, | 2410 | static int ocfs2_rotate_tree_right(handle_t *handle, |
2325 | handle_t *handle, | 2411 | struct ocfs2_extent_tree *et, |
2326 | enum ocfs2_split_type split, | 2412 | enum ocfs2_split_type split, |
2327 | u32 insert_cpos, | 2413 | u32 insert_cpos, |
2328 | struct ocfs2_path *right_path, | 2414 | struct ocfs2_path *right_path, |
@@ -2331,6 +2417,7 @@ static int ocfs2_rotate_tree_right(struct inode *inode, | |||
2331 | int ret, start, orig_credits = handle->h_buffer_credits; | 2417 | int ret, start, orig_credits = handle->h_buffer_credits; |
2332 | u32 cpos; | 2418 | u32 cpos; |
2333 | struct ocfs2_path *left_path = NULL; | 2419 | struct ocfs2_path *left_path = NULL; |
2420 | struct super_block *sb = ocfs2_metadata_cache_get_super(et->et_ci); | ||
2334 | 2421 | ||
2335 | *ret_left_path = NULL; | 2422 | *ret_left_path = NULL; |
2336 | 2423 | ||
@@ -2341,7 +2428,7 @@ static int ocfs2_rotate_tree_right(struct inode *inode, | |||
2341 | goto out; | 2428 | goto out; |
2342 | } | 2429 | } |
2343 | 2430 | ||
2344 | ret = ocfs2_find_cpos_for_left_leaf(inode->i_sb, right_path, &cpos); | 2431 | ret = ocfs2_find_cpos_for_left_leaf(sb, right_path, &cpos); |
2345 | if (ret) { | 2432 | if (ret) { |
2346 | mlog_errno(ret); | 2433 | mlog_errno(ret); |
2347 | goto out; | 2434 | goto out; |
@@ -2379,7 +2466,7 @@ static int ocfs2_rotate_tree_right(struct inode *inode, | |||
2379 | mlog(0, "Rotating a tree: ins. cpos: %u, left path cpos: %u\n", | 2466 | mlog(0, "Rotating a tree: ins. cpos: %u, left path cpos: %u\n", |
2380 | insert_cpos, cpos); | 2467 | insert_cpos, cpos); |
2381 | 2468 | ||
2382 | ret = ocfs2_find_path(inode, left_path, cpos); | 2469 | ret = ocfs2_find_path(et->et_ci, left_path, cpos); |
2383 | if (ret) { | 2470 | if (ret) { |
2384 | mlog_errno(ret); | 2471 | mlog_errno(ret); |
2385 | goto out; | 2472 | goto out; |
@@ -2387,10 +2474,11 @@ static int ocfs2_rotate_tree_right(struct inode *inode, | |||
2387 | 2474 | ||
2388 | mlog_bug_on_msg(path_leaf_bh(left_path) == | 2475 | mlog_bug_on_msg(path_leaf_bh(left_path) == |
2389 | path_leaf_bh(right_path), | 2476 | path_leaf_bh(right_path), |
2390 | "Inode %lu: error during insert of %u " | 2477 | "Owner %llu: error during insert of %u " |
2391 | "(left path cpos %u) results in two identical " | 2478 | "(left path cpos %u) results in two identical " |
2392 | "paths ending at %llu\n", | 2479 | "paths ending at %llu\n", |
2393 | inode->i_ino, insert_cpos, cpos, | 2480 | (unsigned long long)ocfs2_metadata_cache_owner(et->et_ci), |
2481 | insert_cpos, cpos, | ||
2394 | (unsigned long long) | 2482 | (unsigned long long) |
2395 | path_leaf_bh(left_path)->b_blocknr); | 2483 | path_leaf_bh(left_path)->b_blocknr); |
2396 | 2484 | ||
@@ -2416,7 +2504,7 @@ static int ocfs2_rotate_tree_right(struct inode *inode, | |||
2416 | goto out_ret_path; | 2504 | goto out_ret_path; |
2417 | } | 2505 | } |
2418 | 2506 | ||
2419 | start = ocfs2_find_subtree_root(inode, left_path, right_path); | 2507 | start = ocfs2_find_subtree_root(et, left_path, right_path); |
2420 | 2508 | ||
2421 | mlog(0, "Subtree root at index %d (blk %llu, depth %d)\n", | 2509 | mlog(0, "Subtree root at index %d (blk %llu, depth %d)\n", |
2422 | start, | 2510 | start, |
@@ -2430,7 +2518,7 @@ static int ocfs2_rotate_tree_right(struct inode *inode, | |||
2430 | goto out; | 2518 | goto out; |
2431 | } | 2519 | } |
2432 | 2520 | ||
2433 | ret = ocfs2_rotate_subtree_right(inode, handle, left_path, | 2521 | ret = ocfs2_rotate_subtree_right(handle, et, left_path, |
2434 | right_path, start); | 2522 | right_path, start); |
2435 | if (ret) { | 2523 | if (ret) { |
2436 | mlog_errno(ret); | 2524 | mlog_errno(ret); |
@@ -2462,8 +2550,7 @@ static int ocfs2_rotate_tree_right(struct inode *inode, | |||
2462 | */ | 2550 | */ |
2463 | ocfs2_mv_path(right_path, left_path); | 2551 | ocfs2_mv_path(right_path, left_path); |
2464 | 2552 | ||
2465 | ret = ocfs2_find_cpos_for_left_leaf(inode->i_sb, right_path, | 2553 | ret = ocfs2_find_cpos_for_left_leaf(sb, right_path, &cpos); |
2466 | &cpos); | ||
2467 | if (ret) { | 2554 | if (ret) { |
2468 | mlog_errno(ret); | 2555 | mlog_errno(ret); |
2469 | goto out; | 2556 | goto out; |
@@ -2477,7 +2564,8 @@ out_ret_path: | |||
2477 | return ret; | 2564 | return ret; |
2478 | } | 2565 | } |
2479 | 2566 | ||
2480 | static int ocfs2_update_edge_lengths(struct inode *inode, handle_t *handle, | 2567 | static int ocfs2_update_edge_lengths(handle_t *handle, |
2568 | struct ocfs2_extent_tree *et, | ||
2481 | int subtree_index, struct ocfs2_path *path) | 2569 | int subtree_index, struct ocfs2_path *path) |
2482 | { | 2570 | { |
2483 | int i, idx, ret; | 2571 | int i, idx, ret; |
@@ -2502,7 +2590,7 @@ static int ocfs2_update_edge_lengths(struct inode *inode, handle_t *handle, | |||
2502 | goto out; | 2590 | goto out; |
2503 | } | 2591 | } |
2504 | 2592 | ||
2505 | ret = ocfs2_journal_access_path(inode, handle, path); | 2593 | ret = ocfs2_journal_access_path(et->et_ci, handle, path); |
2506 | if (ret) { | 2594 | if (ret) { |
2507 | mlog_errno(ret); | 2595 | mlog_errno(ret); |
2508 | goto out; | 2596 | goto out; |
@@ -2532,7 +2620,8 @@ out: | |||
2532 | return ret; | 2620 | return ret; |
2533 | } | 2621 | } |
2534 | 2622 | ||
2535 | static void ocfs2_unlink_path(struct inode *inode, handle_t *handle, | 2623 | static void ocfs2_unlink_path(handle_t *handle, |
2624 | struct ocfs2_extent_tree *et, | ||
2536 | struct ocfs2_cached_dealloc_ctxt *dealloc, | 2625 | struct ocfs2_cached_dealloc_ctxt *dealloc, |
2537 | struct ocfs2_path *path, int unlink_start) | 2626 | struct ocfs2_path *path, int unlink_start) |
2538 | { | 2627 | { |
@@ -2554,12 +2643,12 @@ static void ocfs2_unlink_path(struct inode *inode, handle_t *handle, | |||
2554 | mlog(ML_ERROR, | 2643 | mlog(ML_ERROR, |
2555 | "Inode %llu, attempted to remove extent block " | 2644 | "Inode %llu, attempted to remove extent block " |
2556 | "%llu with %u records\n", | 2645 | "%llu with %u records\n", |
2557 | (unsigned long long)OCFS2_I(inode)->ip_blkno, | 2646 | (unsigned long long)ocfs2_metadata_cache_owner(et->et_ci), |
2558 | (unsigned long long)le64_to_cpu(eb->h_blkno), | 2647 | (unsigned long long)le64_to_cpu(eb->h_blkno), |
2559 | le16_to_cpu(el->l_next_free_rec)); | 2648 | le16_to_cpu(el->l_next_free_rec)); |
2560 | 2649 | ||
2561 | ocfs2_journal_dirty(handle, bh); | 2650 | ocfs2_journal_dirty(handle, bh); |
2562 | ocfs2_remove_from_cache(inode, bh); | 2651 | ocfs2_remove_from_cache(et->et_ci, bh); |
2563 | continue; | 2652 | continue; |
2564 | } | 2653 | } |
2565 | 2654 | ||
@@ -2572,11 +2661,12 @@ static void ocfs2_unlink_path(struct inode *inode, handle_t *handle, | |||
2572 | if (ret) | 2661 | if (ret) |
2573 | mlog_errno(ret); | 2662 | mlog_errno(ret); |
2574 | 2663 | ||
2575 | ocfs2_remove_from_cache(inode, bh); | 2664 | ocfs2_remove_from_cache(et->et_ci, bh); |
2576 | } | 2665 | } |
2577 | } | 2666 | } |
2578 | 2667 | ||
2579 | static void ocfs2_unlink_subtree(struct inode *inode, handle_t *handle, | 2668 | static void ocfs2_unlink_subtree(handle_t *handle, |
2669 | struct ocfs2_extent_tree *et, | ||
2580 | struct ocfs2_path *left_path, | 2670 | struct ocfs2_path *left_path, |
2581 | struct ocfs2_path *right_path, | 2671 | struct ocfs2_path *right_path, |
2582 | int subtree_index, | 2672 | int subtree_index, |
@@ -2607,17 +2697,17 @@ static void ocfs2_unlink_subtree(struct inode *inode, handle_t *handle, | |||
2607 | ocfs2_journal_dirty(handle, root_bh); | 2697 | ocfs2_journal_dirty(handle, root_bh); |
2608 | ocfs2_journal_dirty(handle, path_leaf_bh(left_path)); | 2698 | ocfs2_journal_dirty(handle, path_leaf_bh(left_path)); |
2609 | 2699 | ||
2610 | ocfs2_unlink_path(inode, handle, dealloc, right_path, | 2700 | ocfs2_unlink_path(handle, et, dealloc, right_path, |
2611 | subtree_index + 1); | 2701 | subtree_index + 1); |
2612 | } | 2702 | } |
2613 | 2703 | ||
2614 | static int ocfs2_rotate_subtree_left(struct inode *inode, handle_t *handle, | 2704 | static int ocfs2_rotate_subtree_left(handle_t *handle, |
2705 | struct ocfs2_extent_tree *et, | ||
2615 | struct ocfs2_path *left_path, | 2706 | struct ocfs2_path *left_path, |
2616 | struct ocfs2_path *right_path, | 2707 | struct ocfs2_path *right_path, |
2617 | int subtree_index, | 2708 | int subtree_index, |
2618 | struct ocfs2_cached_dealloc_ctxt *dealloc, | 2709 | struct ocfs2_cached_dealloc_ctxt *dealloc, |
2619 | int *deleted, | 2710 | int *deleted) |
2620 | struct ocfs2_extent_tree *et) | ||
2621 | { | 2711 | { |
2622 | int ret, i, del_right_subtree = 0, right_has_empty = 0; | 2712 | int ret, i, del_right_subtree = 0, right_has_empty = 0; |
2623 | struct buffer_head *root_bh, *et_root_bh = path_root_bh(right_path); | 2713 | struct buffer_head *root_bh, *et_root_bh = path_root_bh(right_path); |
@@ -2653,7 +2743,7 @@ static int ocfs2_rotate_subtree_left(struct inode *inode, handle_t *handle, | |||
2653 | return -EAGAIN; | 2743 | return -EAGAIN; |
2654 | 2744 | ||
2655 | if (le16_to_cpu(right_leaf_el->l_next_free_rec) > 1) { | 2745 | if (le16_to_cpu(right_leaf_el->l_next_free_rec) > 1) { |
2656 | ret = ocfs2_journal_access_eb(handle, inode, | 2746 | ret = ocfs2_journal_access_eb(handle, et->et_ci, |
2657 | path_leaf_bh(right_path), | 2747 | path_leaf_bh(right_path), |
2658 | OCFS2_JOURNAL_ACCESS_WRITE); | 2748 | OCFS2_JOURNAL_ACCESS_WRITE); |
2659 | if (ret) { | 2749 | if (ret) { |
@@ -2672,7 +2762,7 @@ static int ocfs2_rotate_subtree_left(struct inode *inode, handle_t *handle, | |||
2672 | * We have to update i_last_eb_blk during the meta | 2762 | * We have to update i_last_eb_blk during the meta |
2673 | * data delete. | 2763 | * data delete. |
2674 | */ | 2764 | */ |
2675 | ret = ocfs2_et_root_journal_access(handle, inode, et, | 2765 | ret = ocfs2_et_root_journal_access(handle, et, |
2676 | OCFS2_JOURNAL_ACCESS_WRITE); | 2766 | OCFS2_JOURNAL_ACCESS_WRITE); |
2677 | if (ret) { | 2767 | if (ret) { |
2678 | mlog_errno(ret); | 2768 | mlog_errno(ret); |
@@ -2688,7 +2778,7 @@ static int ocfs2_rotate_subtree_left(struct inode *inode, handle_t *handle, | |||
2688 | */ | 2778 | */ |
2689 | BUG_ON(right_has_empty && !del_right_subtree); | 2779 | BUG_ON(right_has_empty && !del_right_subtree); |
2690 | 2780 | ||
2691 | ret = ocfs2_path_bh_journal_access(handle, inode, right_path, | 2781 | ret = ocfs2_path_bh_journal_access(handle, et->et_ci, right_path, |
2692 | subtree_index); | 2782 | subtree_index); |
2693 | if (ret) { | 2783 | if (ret) { |
2694 | mlog_errno(ret); | 2784 | mlog_errno(ret); |
@@ -2696,14 +2786,14 @@ static int ocfs2_rotate_subtree_left(struct inode *inode, handle_t *handle, | |||
2696 | } | 2786 | } |
2697 | 2787 | ||
2698 | for(i = subtree_index + 1; i < path_num_items(right_path); i++) { | 2788 | for(i = subtree_index + 1; i < path_num_items(right_path); i++) { |
2699 | ret = ocfs2_path_bh_journal_access(handle, inode, | 2789 | ret = ocfs2_path_bh_journal_access(handle, et->et_ci, |
2700 | right_path, i); | 2790 | right_path, i); |
2701 | if (ret) { | 2791 | if (ret) { |
2702 | mlog_errno(ret); | 2792 | mlog_errno(ret); |
2703 | goto out; | 2793 | goto out; |
2704 | } | 2794 | } |
2705 | 2795 | ||
2706 | ret = ocfs2_path_bh_journal_access(handle, inode, | 2796 | ret = ocfs2_path_bh_journal_access(handle, et->et_ci, |
2707 | left_path, i); | 2797 | left_path, i); |
2708 | if (ret) { | 2798 | if (ret) { |
2709 | mlog_errno(ret); | 2799 | mlog_errno(ret); |
@@ -2740,9 +2830,9 @@ static int ocfs2_rotate_subtree_left(struct inode *inode, handle_t *handle, | |||
2740 | mlog_errno(ret); | 2830 | mlog_errno(ret); |
2741 | 2831 | ||
2742 | if (del_right_subtree) { | 2832 | if (del_right_subtree) { |
2743 | ocfs2_unlink_subtree(inode, handle, left_path, right_path, | 2833 | ocfs2_unlink_subtree(handle, et, left_path, right_path, |
2744 | subtree_index, dealloc); | 2834 | subtree_index, dealloc); |
2745 | ret = ocfs2_update_edge_lengths(inode, handle, subtree_index, | 2835 | ret = ocfs2_update_edge_lengths(handle, et, subtree_index, |
2746 | left_path); | 2836 | left_path); |
2747 | if (ret) { | 2837 | if (ret) { |
2748 | mlog_errno(ret); | 2838 | mlog_errno(ret); |
@@ -2766,7 +2856,7 @@ static int ocfs2_rotate_subtree_left(struct inode *inode, handle_t *handle, | |||
2766 | 2856 | ||
2767 | *deleted = 1; | 2857 | *deleted = 1; |
2768 | } else | 2858 | } else |
2769 | ocfs2_complete_edge_insert(inode, handle, left_path, right_path, | 2859 | ocfs2_complete_edge_insert(handle, left_path, right_path, |
2770 | subtree_index); | 2860 | subtree_index); |
2771 | 2861 | ||
2772 | out: | 2862 | out: |
@@ -2852,8 +2942,8 @@ out: | |||
2852 | return ret; | 2942 | return ret; |
2853 | } | 2943 | } |
2854 | 2944 | ||
2855 | static int ocfs2_rotate_rightmost_leaf_left(struct inode *inode, | 2945 | static int ocfs2_rotate_rightmost_leaf_left(handle_t *handle, |
2856 | handle_t *handle, | 2946 | struct ocfs2_extent_tree *et, |
2857 | struct ocfs2_path *path) | 2947 | struct ocfs2_path *path) |
2858 | { | 2948 | { |
2859 | int ret; | 2949 | int ret; |
@@ -2863,7 +2953,7 @@ static int ocfs2_rotate_rightmost_leaf_left(struct inode *inode, | |||
2863 | if (!ocfs2_is_empty_extent(&el->l_recs[0])) | 2953 | if (!ocfs2_is_empty_extent(&el->l_recs[0])) |
2864 | return 0; | 2954 | return 0; |
2865 | 2955 | ||
2866 | ret = ocfs2_path_bh_journal_access(handle, inode, path, | 2956 | ret = ocfs2_path_bh_journal_access(handle, et->et_ci, path, |
2867 | path_num_items(path) - 1); | 2957 | path_num_items(path) - 1); |
2868 | if (ret) { | 2958 | if (ret) { |
2869 | mlog_errno(ret); | 2959 | mlog_errno(ret); |
@@ -2880,24 +2970,24 @@ out: | |||
2880 | return ret; | 2970 | return ret; |
2881 | } | 2971 | } |
2882 | 2972 | ||
2883 | static int __ocfs2_rotate_tree_left(struct inode *inode, | 2973 | static int __ocfs2_rotate_tree_left(handle_t *handle, |
2884 | handle_t *handle, int orig_credits, | 2974 | struct ocfs2_extent_tree *et, |
2975 | int orig_credits, | ||
2885 | struct ocfs2_path *path, | 2976 | struct ocfs2_path *path, |
2886 | struct ocfs2_cached_dealloc_ctxt *dealloc, | 2977 | struct ocfs2_cached_dealloc_ctxt *dealloc, |
2887 | struct ocfs2_path **empty_extent_path, | 2978 | struct ocfs2_path **empty_extent_path) |
2888 | struct ocfs2_extent_tree *et) | ||
2889 | { | 2979 | { |
2890 | int ret, subtree_root, deleted; | 2980 | int ret, subtree_root, deleted; |
2891 | u32 right_cpos; | 2981 | u32 right_cpos; |
2892 | struct ocfs2_path *left_path = NULL; | 2982 | struct ocfs2_path *left_path = NULL; |
2893 | struct ocfs2_path *right_path = NULL; | 2983 | struct ocfs2_path *right_path = NULL; |
2984 | struct super_block *sb = ocfs2_metadata_cache_get_super(et->et_ci); | ||
2894 | 2985 | ||
2895 | BUG_ON(!ocfs2_is_empty_extent(&(path_leaf_el(path)->l_recs[0]))); | 2986 | BUG_ON(!ocfs2_is_empty_extent(&(path_leaf_el(path)->l_recs[0]))); |
2896 | 2987 | ||
2897 | *empty_extent_path = NULL; | 2988 | *empty_extent_path = NULL; |
2898 | 2989 | ||
2899 | ret = ocfs2_find_cpos_for_right_leaf(inode->i_sb, path, | 2990 | ret = ocfs2_find_cpos_for_right_leaf(sb, path, &right_cpos); |
2900 | &right_cpos); | ||
2901 | if (ret) { | 2991 | if (ret) { |
2902 | mlog_errno(ret); | 2992 | mlog_errno(ret); |
2903 | goto out; | 2993 | goto out; |
@@ -2920,13 +3010,13 @@ static int __ocfs2_rotate_tree_left(struct inode *inode, | |||
2920 | } | 3010 | } |
2921 | 3011 | ||
2922 | while (right_cpos) { | 3012 | while (right_cpos) { |
2923 | ret = ocfs2_find_path(inode, right_path, right_cpos); | 3013 | ret = ocfs2_find_path(et->et_ci, right_path, right_cpos); |
2924 | if (ret) { | 3014 | if (ret) { |
2925 | mlog_errno(ret); | 3015 | mlog_errno(ret); |
2926 | goto out; | 3016 | goto out; |
2927 | } | 3017 | } |
2928 | 3018 | ||
2929 | subtree_root = ocfs2_find_subtree_root(inode, left_path, | 3019 | subtree_root = ocfs2_find_subtree_root(et, left_path, |
2930 | right_path); | 3020 | right_path); |
2931 | 3021 | ||
2932 | mlog(0, "Subtree root at index %d (blk %llu, depth %d)\n", | 3022 | mlog(0, "Subtree root at index %d (blk %llu, depth %d)\n", |
@@ -2946,16 +3036,16 @@ static int __ocfs2_rotate_tree_left(struct inode *inode, | |||
2946 | * Caller might still want to make changes to the | 3036 | * Caller might still want to make changes to the |
2947 | * tree root, so re-add it to the journal here. | 3037 | * tree root, so re-add it to the journal here. |
2948 | */ | 3038 | */ |
2949 | ret = ocfs2_path_bh_journal_access(handle, inode, | 3039 | ret = ocfs2_path_bh_journal_access(handle, et->et_ci, |
2950 | left_path, 0); | 3040 | left_path, 0); |
2951 | if (ret) { | 3041 | if (ret) { |
2952 | mlog_errno(ret); | 3042 | mlog_errno(ret); |
2953 | goto out; | 3043 | goto out; |
2954 | } | 3044 | } |
2955 | 3045 | ||
2956 | ret = ocfs2_rotate_subtree_left(inode, handle, left_path, | 3046 | ret = ocfs2_rotate_subtree_left(handle, et, left_path, |
2957 | right_path, subtree_root, | 3047 | right_path, subtree_root, |
2958 | dealloc, &deleted, et); | 3048 | dealloc, &deleted); |
2959 | if (ret == -EAGAIN) { | 3049 | if (ret == -EAGAIN) { |
2960 | /* | 3050 | /* |
2961 | * The rotation has to temporarily stop due to | 3051 | * The rotation has to temporarily stop due to |
@@ -2982,7 +3072,7 @@ static int __ocfs2_rotate_tree_left(struct inode *inode, | |||
2982 | 3072 | ||
2983 | ocfs2_mv_path(left_path, right_path); | 3073 | ocfs2_mv_path(left_path, right_path); |
2984 | 3074 | ||
2985 | ret = ocfs2_find_cpos_for_right_leaf(inode->i_sb, left_path, | 3075 | ret = ocfs2_find_cpos_for_right_leaf(sb, left_path, |
2986 | &right_cpos); | 3076 | &right_cpos); |
2987 | if (ret) { | 3077 | if (ret) { |
2988 | mlog_errno(ret); | 3078 | mlog_errno(ret); |
@@ -2997,10 +3087,10 @@ out: | |||
2997 | return ret; | 3087 | return ret; |
2998 | } | 3088 | } |
2999 | 3089 | ||
3000 | static int ocfs2_remove_rightmost_path(struct inode *inode, handle_t *handle, | 3090 | static int ocfs2_remove_rightmost_path(handle_t *handle, |
3091 | struct ocfs2_extent_tree *et, | ||
3001 | struct ocfs2_path *path, | 3092 | struct ocfs2_path *path, |
3002 | struct ocfs2_cached_dealloc_ctxt *dealloc, | 3093 | struct ocfs2_cached_dealloc_ctxt *dealloc) |
3003 | struct ocfs2_extent_tree *et) | ||
3004 | { | 3094 | { |
3005 | int ret, subtree_index; | 3095 | int ret, subtree_index; |
3006 | u32 cpos; | 3096 | u32 cpos; |
@@ -3009,7 +3099,7 @@ static int ocfs2_remove_rightmost_path(struct inode *inode, handle_t *handle, | |||
3009 | struct ocfs2_extent_list *el; | 3099 | struct ocfs2_extent_list *el; |
3010 | 3100 | ||
3011 | 3101 | ||
3012 | ret = ocfs2_et_sanity_check(inode, et); | 3102 | ret = ocfs2_et_sanity_check(et); |
3013 | if (ret) | 3103 | if (ret) |
3014 | goto out; | 3104 | goto out; |
3015 | /* | 3105 | /* |
@@ -3024,13 +3114,14 @@ static int ocfs2_remove_rightmost_path(struct inode *inode, handle_t *handle, | |||
3024 | goto out; | 3114 | goto out; |
3025 | } | 3115 | } |
3026 | 3116 | ||
3027 | ret = ocfs2_journal_access_path(inode, handle, path); | 3117 | ret = ocfs2_journal_access_path(et->et_ci, handle, path); |
3028 | if (ret) { | 3118 | if (ret) { |
3029 | mlog_errno(ret); | 3119 | mlog_errno(ret); |
3030 | goto out; | 3120 | goto out; |
3031 | } | 3121 | } |
3032 | 3122 | ||
3033 | ret = ocfs2_find_cpos_for_left_leaf(inode->i_sb, path, &cpos); | 3123 | ret = ocfs2_find_cpos_for_left_leaf(ocfs2_metadata_cache_get_super(et->et_ci), |
3124 | path, &cpos); | ||
3034 | if (ret) { | 3125 | if (ret) { |
3035 | mlog_errno(ret); | 3126 | mlog_errno(ret); |
3036 | goto out; | 3127 | goto out; |
@@ -3048,23 +3139,23 @@ static int ocfs2_remove_rightmost_path(struct inode *inode, handle_t *handle, | |||
3048 | goto out; | 3139 | goto out; |
3049 | } | 3140 | } |
3050 | 3141 | ||
3051 | ret = ocfs2_find_path(inode, left_path, cpos); | 3142 | ret = ocfs2_find_path(et->et_ci, left_path, cpos); |
3052 | if (ret) { | 3143 | if (ret) { |
3053 | mlog_errno(ret); | 3144 | mlog_errno(ret); |
3054 | goto out; | 3145 | goto out; |
3055 | } | 3146 | } |
3056 | 3147 | ||
3057 | ret = ocfs2_journal_access_path(inode, handle, left_path); | 3148 | ret = ocfs2_journal_access_path(et->et_ci, handle, left_path); |
3058 | if (ret) { | 3149 | if (ret) { |
3059 | mlog_errno(ret); | 3150 | mlog_errno(ret); |
3060 | goto out; | 3151 | goto out; |
3061 | } | 3152 | } |
3062 | 3153 | ||
3063 | subtree_index = ocfs2_find_subtree_root(inode, left_path, path); | 3154 | subtree_index = ocfs2_find_subtree_root(et, left_path, path); |
3064 | 3155 | ||
3065 | ocfs2_unlink_subtree(inode, handle, left_path, path, | 3156 | ocfs2_unlink_subtree(handle, et, left_path, path, |
3066 | subtree_index, dealloc); | 3157 | subtree_index, dealloc); |
3067 | ret = ocfs2_update_edge_lengths(inode, handle, subtree_index, | 3158 | ret = ocfs2_update_edge_lengths(handle, et, subtree_index, |
3068 | left_path); | 3159 | left_path); |
3069 | if (ret) { | 3160 | if (ret) { |
3070 | mlog_errno(ret); | 3161 | mlog_errno(ret); |
@@ -3078,10 +3169,10 @@ static int ocfs2_remove_rightmost_path(struct inode *inode, handle_t *handle, | |||
3078 | * 'path' is also the leftmost path which | 3169 | * 'path' is also the leftmost path which |
3079 | * means it must be the only one. This gets | 3170 | * means it must be the only one. This gets |
3080 | * handled differently because we want to | 3171 | * handled differently because we want to |
3081 | * revert the inode back to having extents | 3172 | * revert the root back to having extents |
3082 | * in-line. | 3173 | * in-line. |
3083 | */ | 3174 | */ |
3084 | ocfs2_unlink_path(inode, handle, dealloc, path, 1); | 3175 | ocfs2_unlink_path(handle, et, dealloc, path, 1); |
3085 | 3176 | ||
3086 | el = et->et_root_el; | 3177 | el = et->et_root_el; |
3087 | el->l_tree_depth = 0; | 3178 | el->l_tree_depth = 0; |
@@ -3114,10 +3205,10 @@ out: | |||
3114 | * the rightmost tree leaf record is removed so the caller is | 3205 | * the rightmost tree leaf record is removed so the caller is |
3115 | * responsible for detecting and correcting that. | 3206 | * responsible for detecting and correcting that. |
3116 | */ | 3207 | */ |
3117 | static int ocfs2_rotate_tree_left(struct inode *inode, handle_t *handle, | 3208 | static int ocfs2_rotate_tree_left(handle_t *handle, |
3209 | struct ocfs2_extent_tree *et, | ||
3118 | struct ocfs2_path *path, | 3210 | struct ocfs2_path *path, |
3119 | struct ocfs2_cached_dealloc_ctxt *dealloc, | 3211 | struct ocfs2_cached_dealloc_ctxt *dealloc) |
3120 | struct ocfs2_extent_tree *et) | ||
3121 | { | 3212 | { |
3122 | int ret, orig_credits = handle->h_buffer_credits; | 3213 | int ret, orig_credits = handle->h_buffer_credits; |
3123 | struct ocfs2_path *tmp_path = NULL, *restart_path = NULL; | 3214 | struct ocfs2_path *tmp_path = NULL, *restart_path = NULL; |
@@ -3134,8 +3225,7 @@ rightmost_no_delete: | |||
3134 | * Inline extents. This is trivially handled, so do | 3225 | * Inline extents. This is trivially handled, so do |
3135 | * it up front. | 3226 | * it up front. |
3136 | */ | 3227 | */ |
3137 | ret = ocfs2_rotate_rightmost_leaf_left(inode, handle, | 3228 | ret = ocfs2_rotate_rightmost_leaf_left(handle, et, path); |
3138 | path); | ||
3139 | if (ret) | 3229 | if (ret) |
3140 | mlog_errno(ret); | 3230 | mlog_errno(ret); |
3141 | goto out; | 3231 | goto out; |
@@ -3151,7 +3241,7 @@ rightmost_no_delete: | |||
3151 | * | 3241 | * |
3152 | * 1) is handled via ocfs2_rotate_rightmost_leaf_left() | 3242 | * 1) is handled via ocfs2_rotate_rightmost_leaf_left() |
3153 | * 2a) we need the left branch so that we can update it with the unlink | 3243 | * 2a) we need the left branch so that we can update it with the unlink |
3154 | * 2b) we need to bring the inode back to inline extents. | 3244 | * 2b) we need to bring the root back to inline extents. |
3155 | */ | 3245 | */ |
3156 | 3246 | ||
3157 | eb = (struct ocfs2_extent_block *)path_leaf_bh(path)->b_data; | 3247 | eb = (struct ocfs2_extent_block *)path_leaf_bh(path)->b_data; |
@@ -3167,9 +3257,9 @@ rightmost_no_delete: | |||
3167 | 3257 | ||
3168 | if (le16_to_cpu(el->l_next_free_rec) == 0) { | 3258 | if (le16_to_cpu(el->l_next_free_rec) == 0) { |
3169 | ret = -EIO; | 3259 | ret = -EIO; |
3170 | ocfs2_error(inode->i_sb, | 3260 | ocfs2_error(ocfs2_metadata_cache_get_super(et->et_ci), |
3171 | "Inode %llu has empty extent block at %llu", | 3261 | "Owner %llu has empty extent block at %llu", |
3172 | (unsigned long long)OCFS2_I(inode)->ip_blkno, | 3262 | (unsigned long long)ocfs2_metadata_cache_owner(et->et_ci), |
3173 | (unsigned long long)le64_to_cpu(eb->h_blkno)); | 3263 | (unsigned long long)le64_to_cpu(eb->h_blkno)); |
3174 | goto out; | 3264 | goto out; |
3175 | } | 3265 | } |
@@ -3183,8 +3273,8 @@ rightmost_no_delete: | |||
3183 | * nonempty list. | 3273 | * nonempty list. |
3184 | */ | 3274 | */ |
3185 | 3275 | ||
3186 | ret = ocfs2_remove_rightmost_path(inode, handle, path, | 3276 | ret = ocfs2_remove_rightmost_path(handle, et, path, |
3187 | dealloc, et); | 3277 | dealloc); |
3188 | if (ret) | 3278 | if (ret) |
3189 | mlog_errno(ret); | 3279 | mlog_errno(ret); |
3190 | goto out; | 3280 | goto out; |
@@ -3195,8 +3285,8 @@ rightmost_no_delete: | |||
3195 | * and restarting from there. | 3285 | * and restarting from there. |
3196 | */ | 3286 | */ |
3197 | try_rotate: | 3287 | try_rotate: |
3198 | ret = __ocfs2_rotate_tree_left(inode, handle, orig_credits, path, | 3288 | ret = __ocfs2_rotate_tree_left(handle, et, orig_credits, path, |
3199 | dealloc, &restart_path, et); | 3289 | dealloc, &restart_path); |
3200 | if (ret && ret != -EAGAIN) { | 3290 | if (ret && ret != -EAGAIN) { |
3201 | mlog_errno(ret); | 3291 | mlog_errno(ret); |
3202 | goto out; | 3292 | goto out; |
@@ -3206,9 +3296,9 @@ try_rotate: | |||
3206 | tmp_path = restart_path; | 3296 | tmp_path = restart_path; |
3207 | restart_path = NULL; | 3297 | restart_path = NULL; |
3208 | 3298 | ||
3209 | ret = __ocfs2_rotate_tree_left(inode, handle, orig_credits, | 3299 | ret = __ocfs2_rotate_tree_left(handle, et, orig_credits, |
3210 | tmp_path, dealloc, | 3300 | tmp_path, dealloc, |
3211 | &restart_path, et); | 3301 | &restart_path); |
3212 | if (ret && ret != -EAGAIN) { | 3302 | if (ret && ret != -EAGAIN) { |
3213 | mlog_errno(ret); | 3303 | mlog_errno(ret); |
3214 | goto out; | 3304 | goto out; |
@@ -3259,7 +3349,7 @@ static void ocfs2_cleanup_merge(struct ocfs2_extent_list *el, | |||
3259 | } | 3349 | } |
3260 | } | 3350 | } |
3261 | 3351 | ||
3262 | static int ocfs2_get_right_path(struct inode *inode, | 3352 | static int ocfs2_get_right_path(struct ocfs2_extent_tree *et, |
3263 | struct ocfs2_path *left_path, | 3353 | struct ocfs2_path *left_path, |
3264 | struct ocfs2_path **ret_right_path) | 3354 | struct ocfs2_path **ret_right_path) |
3265 | { | 3355 | { |
@@ -3276,8 +3366,8 @@ static int ocfs2_get_right_path(struct inode *inode, | |||
3276 | left_el = path_leaf_el(left_path); | 3366 | left_el = path_leaf_el(left_path); |
3277 | BUG_ON(left_el->l_next_free_rec != left_el->l_count); | 3367 | BUG_ON(left_el->l_next_free_rec != left_el->l_count); |
3278 | 3368 | ||
3279 | ret = ocfs2_find_cpos_for_right_leaf(inode->i_sb, left_path, | 3369 | ret = ocfs2_find_cpos_for_right_leaf(ocfs2_metadata_cache_get_super(et->et_ci), |
3280 | &right_cpos); | 3370 | left_path, &right_cpos); |
3281 | if (ret) { | 3371 | if (ret) { |
3282 | mlog_errno(ret); | 3372 | mlog_errno(ret); |
3283 | goto out; | 3373 | goto out; |
@@ -3293,7 +3383,7 @@ static int ocfs2_get_right_path(struct inode *inode, | |||
3293 | goto out; | 3383 | goto out; |
3294 | } | 3384 | } |
3295 | 3385 | ||
3296 | ret = ocfs2_find_path(inode, right_path, right_cpos); | 3386 | ret = ocfs2_find_path(et->et_ci, right_path, right_cpos); |
3297 | if (ret) { | 3387 | if (ret) { |
3298 | mlog_errno(ret); | 3388 | mlog_errno(ret); |
3299 | goto out; | 3389 | goto out; |
@@ -3313,9 +3403,9 @@ out: | |||
3313 | * For index == l_count - 1, the "next" means the 1st extent rec of the | 3403 | * For index == l_count - 1, the "next" means the 1st extent rec of the |
3314 | * next extent block. | 3404 | * next extent block. |
3315 | */ | 3405 | */ |
3316 | static int ocfs2_merge_rec_right(struct inode *inode, | 3406 | static int ocfs2_merge_rec_right(struct ocfs2_path *left_path, |
3317 | struct ocfs2_path *left_path, | ||
3318 | handle_t *handle, | 3407 | handle_t *handle, |
3408 | struct ocfs2_extent_tree *et, | ||
3319 | struct ocfs2_extent_rec *split_rec, | 3409 | struct ocfs2_extent_rec *split_rec, |
3320 | int index) | 3410 | int index) |
3321 | { | 3411 | { |
@@ -3336,7 +3426,7 @@ static int ocfs2_merge_rec_right(struct inode *inode, | |||
3336 | if (index == le16_to_cpu(el->l_next_free_rec) - 1 && | 3426 | if (index == le16_to_cpu(el->l_next_free_rec) - 1 && |
3337 | le16_to_cpu(el->l_next_free_rec) == le16_to_cpu(el->l_count)) { | 3427 | le16_to_cpu(el->l_next_free_rec) == le16_to_cpu(el->l_count)) { |
3338 | /* we meet with a cross extent block merge. */ | 3428 | /* we meet with a cross extent block merge. */ |
3339 | ret = ocfs2_get_right_path(inode, left_path, &right_path); | 3429 | ret = ocfs2_get_right_path(et, left_path, &right_path); |
3340 | if (ret) { | 3430 | if (ret) { |
3341 | mlog_errno(ret); | 3431 | mlog_errno(ret); |
3342 | goto out; | 3432 | goto out; |
@@ -3355,8 +3445,8 @@ static int ocfs2_merge_rec_right(struct inode *inode, | |||
3355 | le16_to_cpu(left_rec->e_leaf_clusters) != | 3445 | le16_to_cpu(left_rec->e_leaf_clusters) != |
3356 | le32_to_cpu(right_rec->e_cpos)); | 3446 | le32_to_cpu(right_rec->e_cpos)); |
3357 | 3447 | ||
3358 | subtree_index = ocfs2_find_subtree_root(inode, | 3448 | subtree_index = ocfs2_find_subtree_root(et, left_path, |
3359 | left_path, right_path); | 3449 | right_path); |
3360 | 3450 | ||
3361 | ret = ocfs2_extend_rotate_transaction(handle, subtree_index, | 3451 | ret = ocfs2_extend_rotate_transaction(handle, subtree_index, |
3362 | handle->h_buffer_credits, | 3452 | handle->h_buffer_credits, |
@@ -3369,7 +3459,7 @@ static int ocfs2_merge_rec_right(struct inode *inode, | |||
3369 | root_bh = left_path->p_node[subtree_index].bh; | 3459 | root_bh = left_path->p_node[subtree_index].bh; |
3370 | BUG_ON(root_bh != right_path->p_node[subtree_index].bh); | 3460 | BUG_ON(root_bh != right_path->p_node[subtree_index].bh); |
3371 | 3461 | ||
3372 | ret = ocfs2_path_bh_journal_access(handle, inode, right_path, | 3462 | ret = ocfs2_path_bh_journal_access(handle, et->et_ci, right_path, |
3373 | subtree_index); | 3463 | subtree_index); |
3374 | if (ret) { | 3464 | if (ret) { |
3375 | mlog_errno(ret); | 3465 | mlog_errno(ret); |
@@ -3378,14 +3468,14 @@ static int ocfs2_merge_rec_right(struct inode *inode, | |||
3378 | 3468 | ||
3379 | for (i = subtree_index + 1; | 3469 | for (i = subtree_index + 1; |
3380 | i < path_num_items(right_path); i++) { | 3470 | i < path_num_items(right_path); i++) { |
3381 | ret = ocfs2_path_bh_journal_access(handle, inode, | 3471 | ret = ocfs2_path_bh_journal_access(handle, et->et_ci, |
3382 | right_path, i); | 3472 | right_path, i); |
3383 | if (ret) { | 3473 | if (ret) { |
3384 | mlog_errno(ret); | 3474 | mlog_errno(ret); |
3385 | goto out; | 3475 | goto out; |
3386 | } | 3476 | } |
3387 | 3477 | ||
3388 | ret = ocfs2_path_bh_journal_access(handle, inode, | 3478 | ret = ocfs2_path_bh_journal_access(handle, et->et_ci, |
3389 | left_path, i); | 3479 | left_path, i); |
3390 | if (ret) { | 3480 | if (ret) { |
3391 | mlog_errno(ret); | 3481 | mlog_errno(ret); |
@@ -3398,7 +3488,7 @@ static int ocfs2_merge_rec_right(struct inode *inode, | |||
3398 | right_rec = &el->l_recs[index + 1]; | 3488 | right_rec = &el->l_recs[index + 1]; |
3399 | } | 3489 | } |
3400 | 3490 | ||
3401 | ret = ocfs2_path_bh_journal_access(handle, inode, left_path, | 3491 | ret = ocfs2_path_bh_journal_access(handle, et->et_ci, left_path, |
3402 | path_num_items(left_path) - 1); | 3492 | path_num_items(left_path) - 1); |
3403 | if (ret) { | 3493 | if (ret) { |
3404 | mlog_errno(ret); | 3494 | mlog_errno(ret); |
@@ -3409,7 +3499,8 @@ static int ocfs2_merge_rec_right(struct inode *inode, | |||
3409 | 3499 | ||
3410 | le32_add_cpu(&right_rec->e_cpos, -split_clusters); | 3500 | le32_add_cpu(&right_rec->e_cpos, -split_clusters); |
3411 | le64_add_cpu(&right_rec->e_blkno, | 3501 | le64_add_cpu(&right_rec->e_blkno, |
3412 | -ocfs2_clusters_to_blocks(inode->i_sb, split_clusters)); | 3502 | -ocfs2_clusters_to_blocks(ocfs2_metadata_cache_get_super(et->et_ci), |
3503 | split_clusters)); | ||
3413 | le16_add_cpu(&right_rec->e_leaf_clusters, split_clusters); | 3504 | le16_add_cpu(&right_rec->e_leaf_clusters, split_clusters); |
3414 | 3505 | ||
3415 | ocfs2_cleanup_merge(el, index); | 3506 | ocfs2_cleanup_merge(el, index); |
@@ -3423,8 +3514,8 @@ static int ocfs2_merge_rec_right(struct inode *inode, | |||
3423 | if (ret) | 3514 | if (ret) |
3424 | mlog_errno(ret); | 3515 | mlog_errno(ret); |
3425 | 3516 | ||
3426 | ocfs2_complete_edge_insert(inode, handle, left_path, | 3517 | ocfs2_complete_edge_insert(handle, left_path, right_path, |
3427 | right_path, subtree_index); | 3518 | subtree_index); |
3428 | } | 3519 | } |
3429 | out: | 3520 | out: |
3430 | if (right_path) | 3521 | if (right_path) |
@@ -3432,7 +3523,7 @@ out: | |||
3432 | return ret; | 3523 | return ret; |
3433 | } | 3524 | } |
3434 | 3525 | ||
3435 | static int ocfs2_get_left_path(struct inode *inode, | 3526 | static int ocfs2_get_left_path(struct ocfs2_extent_tree *et, |
3436 | struct ocfs2_path *right_path, | 3527 | struct ocfs2_path *right_path, |
3437 | struct ocfs2_path **ret_left_path) | 3528 | struct ocfs2_path **ret_left_path) |
3438 | { | 3529 | { |
@@ -3445,7 +3536,7 @@ static int ocfs2_get_left_path(struct inode *inode, | |||
3445 | /* This function shouldn't be called for non-trees. */ | 3536 | /* This function shouldn't be called for non-trees. */ |
3446 | BUG_ON(right_path->p_tree_depth == 0); | 3537 | BUG_ON(right_path->p_tree_depth == 0); |
3447 | 3538 | ||
3448 | ret = ocfs2_find_cpos_for_left_leaf(inode->i_sb, | 3539 | ret = ocfs2_find_cpos_for_left_leaf(ocfs2_metadata_cache_get_super(et->et_ci), |
3449 | right_path, &left_cpos); | 3540 | right_path, &left_cpos); |
3450 | if (ret) { | 3541 | if (ret) { |
3451 | mlog_errno(ret); | 3542 | mlog_errno(ret); |
@@ -3462,7 +3553,7 @@ static int ocfs2_get_left_path(struct inode *inode, | |||
3462 | goto out; | 3553 | goto out; |
3463 | } | 3554 | } |
3464 | 3555 | ||
3465 | ret = ocfs2_find_path(inode, left_path, left_cpos); | 3556 | ret = ocfs2_find_path(et->et_ci, left_path, left_cpos); |
3466 | if (ret) { | 3557 | if (ret) { |
3467 | mlog_errno(ret); | 3558 | mlog_errno(ret); |
3468 | goto out; | 3559 | goto out; |
@@ -3485,12 +3576,11 @@ out: | |||
3485 | * remove the rightmost leaf extent block in the right_path and change | 3576 | * remove the rightmost leaf extent block in the right_path and change |
3486 | * the right path to indicate the new rightmost path. | 3577 | * the right path to indicate the new rightmost path. |
3487 | */ | 3578 | */ |
3488 | static int ocfs2_merge_rec_left(struct inode *inode, | 3579 | static int ocfs2_merge_rec_left(struct ocfs2_path *right_path, |
3489 | struct ocfs2_path *right_path, | ||
3490 | handle_t *handle, | 3580 | handle_t *handle, |
3581 | struct ocfs2_extent_tree *et, | ||
3491 | struct ocfs2_extent_rec *split_rec, | 3582 | struct ocfs2_extent_rec *split_rec, |
3492 | struct ocfs2_cached_dealloc_ctxt *dealloc, | 3583 | struct ocfs2_cached_dealloc_ctxt *dealloc, |
3493 | struct ocfs2_extent_tree *et, | ||
3494 | int index) | 3584 | int index) |
3495 | { | 3585 | { |
3496 | int ret, i, subtree_index = 0, has_empty_extent = 0; | 3586 | int ret, i, subtree_index = 0, has_empty_extent = 0; |
@@ -3508,7 +3598,7 @@ static int ocfs2_merge_rec_left(struct inode *inode, | |||
3508 | right_rec = &el->l_recs[index]; | 3598 | right_rec = &el->l_recs[index]; |
3509 | if (index == 0) { | 3599 | if (index == 0) { |
3510 | /* we meet with a cross extent block merge. */ | 3600 | /* we meet with a cross extent block merge. */ |
3511 | ret = ocfs2_get_left_path(inode, right_path, &left_path); | 3601 | ret = ocfs2_get_left_path(et, right_path, &left_path); |
3512 | if (ret) { | 3602 | if (ret) { |
3513 | mlog_errno(ret); | 3603 | mlog_errno(ret); |
3514 | goto out; | 3604 | goto out; |
@@ -3524,8 +3614,8 @@ static int ocfs2_merge_rec_left(struct inode *inode, | |||
3524 | le16_to_cpu(left_rec->e_leaf_clusters) != | 3614 | le16_to_cpu(left_rec->e_leaf_clusters) != |
3525 | le32_to_cpu(split_rec->e_cpos)); | 3615 | le32_to_cpu(split_rec->e_cpos)); |
3526 | 3616 | ||
3527 | subtree_index = ocfs2_find_subtree_root(inode, | 3617 | subtree_index = ocfs2_find_subtree_root(et, left_path, |
3528 | left_path, right_path); | 3618 | right_path); |
3529 | 3619 | ||
3530 | ret = ocfs2_extend_rotate_transaction(handle, subtree_index, | 3620 | ret = ocfs2_extend_rotate_transaction(handle, subtree_index, |
3531 | handle->h_buffer_credits, | 3621 | handle->h_buffer_credits, |
@@ -3538,7 +3628,7 @@ static int ocfs2_merge_rec_left(struct inode *inode, | |||
3538 | root_bh = left_path->p_node[subtree_index].bh; | 3628 | root_bh = left_path->p_node[subtree_index].bh; |
3539 | BUG_ON(root_bh != right_path->p_node[subtree_index].bh); | 3629 | BUG_ON(root_bh != right_path->p_node[subtree_index].bh); |
3540 | 3630 | ||
3541 | ret = ocfs2_path_bh_journal_access(handle, inode, right_path, | 3631 | ret = ocfs2_path_bh_journal_access(handle, et->et_ci, right_path, |
3542 | subtree_index); | 3632 | subtree_index); |
3543 | if (ret) { | 3633 | if (ret) { |
3544 | mlog_errno(ret); | 3634 | mlog_errno(ret); |
@@ -3547,14 +3637,14 @@ static int ocfs2_merge_rec_left(struct inode *inode, | |||
3547 | 3637 | ||
3548 | for (i = subtree_index + 1; | 3638 | for (i = subtree_index + 1; |
3549 | i < path_num_items(right_path); i++) { | 3639 | i < path_num_items(right_path); i++) { |
3550 | ret = ocfs2_path_bh_journal_access(handle, inode, | 3640 | ret = ocfs2_path_bh_journal_access(handle, et->et_ci, |
3551 | right_path, i); | 3641 | right_path, i); |
3552 | if (ret) { | 3642 | if (ret) { |
3553 | mlog_errno(ret); | 3643 | mlog_errno(ret); |
3554 | goto out; | 3644 | goto out; |
3555 | } | 3645 | } |
3556 | 3646 | ||
3557 | ret = ocfs2_path_bh_journal_access(handle, inode, | 3647 | ret = ocfs2_path_bh_journal_access(handle, et->et_ci, |
3558 | left_path, i); | 3648 | left_path, i); |
3559 | if (ret) { | 3649 | if (ret) { |
3560 | mlog_errno(ret); | 3650 | mlog_errno(ret); |
@@ -3567,7 +3657,7 @@ static int ocfs2_merge_rec_left(struct inode *inode, | |||
3567 | has_empty_extent = 1; | 3657 | has_empty_extent = 1; |
3568 | } | 3658 | } |
3569 | 3659 | ||
3570 | ret = ocfs2_path_bh_journal_access(handle, inode, right_path, | 3660 | ret = ocfs2_path_bh_journal_access(handle, et->et_ci, right_path, |
3571 | path_num_items(right_path) - 1); | 3661 | path_num_items(right_path) - 1); |
3572 | if (ret) { | 3662 | if (ret) { |
3573 | mlog_errno(ret); | 3663 | mlog_errno(ret); |
@@ -3586,7 +3676,8 @@ static int ocfs2_merge_rec_left(struct inode *inode, | |||
3586 | 3676 | ||
3587 | le32_add_cpu(&right_rec->e_cpos, split_clusters); | 3677 | le32_add_cpu(&right_rec->e_cpos, split_clusters); |
3588 | le64_add_cpu(&right_rec->e_blkno, | 3678 | le64_add_cpu(&right_rec->e_blkno, |
3589 | ocfs2_clusters_to_blocks(inode->i_sb, split_clusters)); | 3679 | ocfs2_clusters_to_blocks(ocfs2_metadata_cache_get_super(et->et_ci), |
3680 | split_clusters)); | ||
3590 | le16_add_cpu(&right_rec->e_leaf_clusters, -split_clusters); | 3681 | le16_add_cpu(&right_rec->e_leaf_clusters, -split_clusters); |
3591 | 3682 | ||
3592 | ocfs2_cleanup_merge(el, index); | 3683 | ocfs2_cleanup_merge(el, index); |
@@ -3608,9 +3699,9 @@ static int ocfs2_merge_rec_left(struct inode *inode, | |||
3608 | if (le16_to_cpu(right_rec->e_leaf_clusters) == 0 && | 3699 | if (le16_to_cpu(right_rec->e_leaf_clusters) == 0 && |
3609 | le16_to_cpu(el->l_next_free_rec) == 1) { | 3700 | le16_to_cpu(el->l_next_free_rec) == 1) { |
3610 | 3701 | ||
3611 | ret = ocfs2_remove_rightmost_path(inode, handle, | 3702 | ret = ocfs2_remove_rightmost_path(handle, et, |
3612 | right_path, | 3703 | right_path, |
3613 | dealloc, et); | 3704 | dealloc); |
3614 | if (ret) { | 3705 | if (ret) { |
3615 | mlog_errno(ret); | 3706 | mlog_errno(ret); |
3616 | goto out; | 3707 | goto out; |
@@ -3622,7 +3713,7 @@ static int ocfs2_merge_rec_left(struct inode *inode, | |||
3622 | ocfs2_mv_path(right_path, left_path); | 3713 | ocfs2_mv_path(right_path, left_path); |
3623 | left_path = NULL; | 3714 | left_path = NULL; |
3624 | } else | 3715 | } else |
3625 | ocfs2_complete_edge_insert(inode, handle, left_path, | 3716 | ocfs2_complete_edge_insert(handle, left_path, |
3626 | right_path, subtree_index); | 3717 | right_path, subtree_index); |
3627 | } | 3718 | } |
3628 | out: | 3719 | out: |
@@ -3631,15 +3722,13 @@ out: | |||
3631 | return ret; | 3722 | return ret; |
3632 | } | 3723 | } |
3633 | 3724 | ||
3634 | static int ocfs2_try_to_merge_extent(struct inode *inode, | 3725 | static int ocfs2_try_to_merge_extent(handle_t *handle, |
3635 | handle_t *handle, | 3726 | struct ocfs2_extent_tree *et, |
3636 | struct ocfs2_path *path, | 3727 | struct ocfs2_path *path, |
3637 | int split_index, | 3728 | int split_index, |
3638 | struct ocfs2_extent_rec *split_rec, | 3729 | struct ocfs2_extent_rec *split_rec, |
3639 | struct ocfs2_cached_dealloc_ctxt *dealloc, | 3730 | struct ocfs2_cached_dealloc_ctxt *dealloc, |
3640 | struct ocfs2_merge_ctxt *ctxt, | 3731 | struct ocfs2_merge_ctxt *ctxt) |
3641 | struct ocfs2_extent_tree *et) | ||
3642 | |||
3643 | { | 3732 | { |
3644 | int ret = 0; | 3733 | int ret = 0; |
3645 | struct ocfs2_extent_list *el = path_leaf_el(path); | 3734 | struct ocfs2_extent_list *el = path_leaf_el(path); |
@@ -3655,8 +3744,7 @@ static int ocfs2_try_to_merge_extent(struct inode *inode, | |||
3655 | * extents - having more than one in a leaf is | 3744 | * extents - having more than one in a leaf is |
3656 | * illegal. | 3745 | * illegal. |
3657 | */ | 3746 | */ |
3658 | ret = ocfs2_rotate_tree_left(inode, handle, path, | 3747 | ret = ocfs2_rotate_tree_left(handle, et, path, dealloc); |
3659 | dealloc, et); | ||
3660 | if (ret) { | 3748 | if (ret) { |
3661 | mlog_errno(ret); | 3749 | mlog_errno(ret); |
3662 | goto out; | 3750 | goto out; |
@@ -3685,8 +3773,7 @@ static int ocfs2_try_to_merge_extent(struct inode *inode, | |||
3685 | * prevoius extent block. It is more efficient and easier | 3773 | * prevoius extent block. It is more efficient and easier |
3686 | * if we do merge_right first and merge_left later. | 3774 | * if we do merge_right first and merge_left later. |
3687 | */ | 3775 | */ |
3688 | ret = ocfs2_merge_rec_right(inode, path, | 3776 | ret = ocfs2_merge_rec_right(path, handle, et, split_rec, |
3689 | handle, split_rec, | ||
3690 | split_index); | 3777 | split_index); |
3691 | if (ret) { | 3778 | if (ret) { |
3692 | mlog_errno(ret); | 3779 | mlog_errno(ret); |
@@ -3699,8 +3786,7 @@ static int ocfs2_try_to_merge_extent(struct inode *inode, | |||
3699 | BUG_ON(!ocfs2_is_empty_extent(&el->l_recs[0])); | 3786 | BUG_ON(!ocfs2_is_empty_extent(&el->l_recs[0])); |
3700 | 3787 | ||
3701 | /* The merge left us with an empty extent, remove it. */ | 3788 | /* The merge left us with an empty extent, remove it. */ |
3702 | ret = ocfs2_rotate_tree_left(inode, handle, path, | 3789 | ret = ocfs2_rotate_tree_left(handle, et, path, dealloc); |
3703 | dealloc, et); | ||
3704 | if (ret) { | 3790 | if (ret) { |
3705 | mlog_errno(ret); | 3791 | mlog_errno(ret); |
3706 | goto out; | 3792 | goto out; |
@@ -3712,18 +3798,15 @@ static int ocfs2_try_to_merge_extent(struct inode *inode, | |||
3712 | * Note that we don't pass split_rec here on purpose - | 3798 | * Note that we don't pass split_rec here on purpose - |
3713 | * we've merged it into the rec already. | 3799 | * we've merged it into the rec already. |
3714 | */ | 3800 | */ |
3715 | ret = ocfs2_merge_rec_left(inode, path, | 3801 | ret = ocfs2_merge_rec_left(path, handle, et, rec, |
3716 | handle, rec, | 3802 | dealloc, split_index); |
3717 | dealloc, et, | ||
3718 | split_index); | ||
3719 | 3803 | ||
3720 | if (ret) { | 3804 | if (ret) { |
3721 | mlog_errno(ret); | 3805 | mlog_errno(ret); |
3722 | goto out; | 3806 | goto out; |
3723 | } | 3807 | } |
3724 | 3808 | ||
3725 | ret = ocfs2_rotate_tree_left(inode, handle, path, | 3809 | ret = ocfs2_rotate_tree_left(handle, et, path, dealloc); |
3726 | dealloc, et); | ||
3727 | /* | 3810 | /* |
3728 | * Error from this last rotate is not critical, so | 3811 | * Error from this last rotate is not critical, so |
3729 | * print but don't bubble it up. | 3812 | * print but don't bubble it up. |
@@ -3740,19 +3823,16 @@ static int ocfs2_try_to_merge_extent(struct inode *inode, | |||
3740 | * the record on the left (hence the left merge). | 3823 | * the record on the left (hence the left merge). |
3741 | */ | 3824 | */ |
3742 | if (ctxt->c_contig_type == CONTIG_RIGHT) { | 3825 | if (ctxt->c_contig_type == CONTIG_RIGHT) { |
3743 | ret = ocfs2_merge_rec_left(inode, | 3826 | ret = ocfs2_merge_rec_left(path, handle, et, |
3744 | path, | 3827 | split_rec, dealloc, |
3745 | handle, split_rec, | ||
3746 | dealloc, et, | ||
3747 | split_index); | 3828 | split_index); |
3748 | if (ret) { | 3829 | if (ret) { |
3749 | mlog_errno(ret); | 3830 | mlog_errno(ret); |
3750 | goto out; | 3831 | goto out; |
3751 | } | 3832 | } |
3752 | } else { | 3833 | } else { |
3753 | ret = ocfs2_merge_rec_right(inode, | 3834 | ret = ocfs2_merge_rec_right(path, handle, |
3754 | path, | 3835 | et, split_rec, |
3755 | handle, split_rec, | ||
3756 | split_index); | 3836 | split_index); |
3757 | if (ret) { | 3837 | if (ret) { |
3758 | mlog_errno(ret); | 3838 | mlog_errno(ret); |
@@ -3765,8 +3845,8 @@ static int ocfs2_try_to_merge_extent(struct inode *inode, | |||
3765 | * The merge may have left an empty extent in | 3845 | * The merge may have left an empty extent in |
3766 | * our leaf. Try to rotate it away. | 3846 | * our leaf. Try to rotate it away. |
3767 | */ | 3847 | */ |
3768 | ret = ocfs2_rotate_tree_left(inode, handle, path, | 3848 | ret = ocfs2_rotate_tree_left(handle, et, path, |
3769 | dealloc, et); | 3849 | dealloc); |
3770 | if (ret) | 3850 | if (ret) |
3771 | mlog_errno(ret); | 3851 | mlog_errno(ret); |
3772 | ret = 0; | 3852 | ret = 0; |
@@ -3812,10 +3892,10 @@ static void ocfs2_subtract_from_rec(struct super_block *sb, | |||
3812 | * list. If this leaf is part of an allocation tree, it is assumed | 3892 | * list. If this leaf is part of an allocation tree, it is assumed |
3813 | * that the tree above has been prepared. | 3893 | * that the tree above has been prepared. |
3814 | */ | 3894 | */ |
3815 | static void ocfs2_insert_at_leaf(struct ocfs2_extent_rec *insert_rec, | 3895 | static void ocfs2_insert_at_leaf(struct ocfs2_extent_tree *et, |
3896 | struct ocfs2_extent_rec *insert_rec, | ||
3816 | struct ocfs2_extent_list *el, | 3897 | struct ocfs2_extent_list *el, |
3817 | struct ocfs2_insert_type *insert, | 3898 | struct ocfs2_insert_type *insert) |
3818 | struct inode *inode) | ||
3819 | { | 3899 | { |
3820 | int i = insert->ins_contig_index; | 3900 | int i = insert->ins_contig_index; |
3821 | unsigned int range; | 3901 | unsigned int range; |
@@ -3827,7 +3907,8 @@ static void ocfs2_insert_at_leaf(struct ocfs2_extent_rec *insert_rec, | |||
3827 | i = ocfs2_search_extent_list(el, le32_to_cpu(insert_rec->e_cpos)); | 3907 | i = ocfs2_search_extent_list(el, le32_to_cpu(insert_rec->e_cpos)); |
3828 | BUG_ON(i == -1); | 3908 | BUG_ON(i == -1); |
3829 | rec = &el->l_recs[i]; | 3909 | rec = &el->l_recs[i]; |
3830 | ocfs2_subtract_from_rec(inode->i_sb, insert->ins_split, rec, | 3910 | ocfs2_subtract_from_rec(ocfs2_metadata_cache_get_super(et->et_ci), |
3911 | insert->ins_split, rec, | ||
3831 | insert_rec); | 3912 | insert_rec); |
3832 | goto rotate; | 3913 | goto rotate; |
3833 | } | 3914 | } |
@@ -3869,10 +3950,10 @@ static void ocfs2_insert_at_leaf(struct ocfs2_extent_rec *insert_rec, | |||
3869 | 3950 | ||
3870 | mlog_bug_on_msg(le16_to_cpu(el->l_next_free_rec) >= | 3951 | mlog_bug_on_msg(le16_to_cpu(el->l_next_free_rec) >= |
3871 | le16_to_cpu(el->l_count), | 3952 | le16_to_cpu(el->l_count), |
3872 | "inode %lu, depth %u, count %u, next free %u, " | 3953 | "owner %llu, depth %u, count %u, next free %u, " |
3873 | "rec.cpos %u, rec.clusters %u, " | 3954 | "rec.cpos %u, rec.clusters %u, " |
3874 | "insert.cpos %u, insert.clusters %u\n", | 3955 | "insert.cpos %u, insert.clusters %u\n", |
3875 | inode->i_ino, | 3956 | ocfs2_metadata_cache_owner(et->et_ci), |
3876 | le16_to_cpu(el->l_tree_depth), | 3957 | le16_to_cpu(el->l_tree_depth), |
3877 | le16_to_cpu(el->l_count), | 3958 | le16_to_cpu(el->l_count), |
3878 | le16_to_cpu(el->l_next_free_rec), | 3959 | le16_to_cpu(el->l_next_free_rec), |
@@ -3900,8 +3981,8 @@ rotate: | |||
3900 | ocfs2_rotate_leaf(el, insert_rec); | 3981 | ocfs2_rotate_leaf(el, insert_rec); |
3901 | } | 3982 | } |
3902 | 3983 | ||
3903 | static void ocfs2_adjust_rightmost_records(struct inode *inode, | 3984 | static void ocfs2_adjust_rightmost_records(handle_t *handle, |
3904 | handle_t *handle, | 3985 | struct ocfs2_extent_tree *et, |
3905 | struct ocfs2_path *path, | 3986 | struct ocfs2_path *path, |
3906 | struct ocfs2_extent_rec *insert_rec) | 3987 | struct ocfs2_extent_rec *insert_rec) |
3907 | { | 3988 | { |
@@ -3919,9 +4000,9 @@ static void ocfs2_adjust_rightmost_records(struct inode *inode, | |||
3919 | 4000 | ||
3920 | next_free = le16_to_cpu(el->l_next_free_rec); | 4001 | next_free = le16_to_cpu(el->l_next_free_rec); |
3921 | if (next_free == 0) { | 4002 | if (next_free == 0) { |
3922 | ocfs2_error(inode->i_sb, | 4003 | ocfs2_error(ocfs2_metadata_cache_get_super(et->et_ci), |
3923 | "Dinode %llu has a bad extent list", | 4004 | "Owner %llu has a bad extent list", |
3924 | (unsigned long long)OCFS2_I(inode)->ip_blkno); | 4005 | (unsigned long long)ocfs2_metadata_cache_owner(et->et_ci)); |
3925 | ret = -EIO; | 4006 | ret = -EIO; |
3926 | return; | 4007 | return; |
3927 | } | 4008 | } |
@@ -3941,7 +4022,8 @@ static void ocfs2_adjust_rightmost_records(struct inode *inode, | |||
3941 | } | 4022 | } |
3942 | } | 4023 | } |
3943 | 4024 | ||
3944 | static int ocfs2_append_rec_to_path(struct inode *inode, handle_t *handle, | 4025 | static int ocfs2_append_rec_to_path(handle_t *handle, |
4026 | struct ocfs2_extent_tree *et, | ||
3945 | struct ocfs2_extent_rec *insert_rec, | 4027 | struct ocfs2_extent_rec *insert_rec, |
3946 | struct ocfs2_path *right_path, | 4028 | struct ocfs2_path *right_path, |
3947 | struct ocfs2_path **ret_left_path) | 4029 | struct ocfs2_path **ret_left_path) |
@@ -3969,8 +4051,8 @@ static int ocfs2_append_rec_to_path(struct inode *inode, handle_t *handle, | |||
3969 | (next_free == 1 && ocfs2_is_empty_extent(&el->l_recs[0]))) { | 4051 | (next_free == 1 && ocfs2_is_empty_extent(&el->l_recs[0]))) { |
3970 | u32 left_cpos; | 4052 | u32 left_cpos; |
3971 | 4053 | ||
3972 | ret = ocfs2_find_cpos_for_left_leaf(inode->i_sb, right_path, | 4054 | ret = ocfs2_find_cpos_for_left_leaf(ocfs2_metadata_cache_get_super(et->et_ci), |
3973 | &left_cpos); | 4055 | right_path, &left_cpos); |
3974 | if (ret) { | 4056 | if (ret) { |
3975 | mlog_errno(ret); | 4057 | mlog_errno(ret); |
3976 | goto out; | 4058 | goto out; |
@@ -3992,7 +4074,8 @@ static int ocfs2_append_rec_to_path(struct inode *inode, handle_t *handle, | |||
3992 | goto out; | 4074 | goto out; |
3993 | } | 4075 | } |
3994 | 4076 | ||
3995 | ret = ocfs2_find_path(inode, left_path, left_cpos); | 4077 | ret = ocfs2_find_path(et->et_ci, left_path, |
4078 | left_cpos); | ||
3996 | if (ret) { | 4079 | if (ret) { |
3997 | mlog_errno(ret); | 4080 | mlog_errno(ret); |
3998 | goto out; | 4081 | goto out; |
@@ -4005,13 +4088,13 @@ static int ocfs2_append_rec_to_path(struct inode *inode, handle_t *handle, | |||
4005 | } | 4088 | } |
4006 | } | 4089 | } |
4007 | 4090 | ||
4008 | ret = ocfs2_journal_access_path(inode, handle, right_path); | 4091 | ret = ocfs2_journal_access_path(et->et_ci, handle, right_path); |
4009 | if (ret) { | 4092 | if (ret) { |
4010 | mlog_errno(ret); | 4093 | mlog_errno(ret); |
4011 | goto out; | 4094 | goto out; |
4012 | } | 4095 | } |
4013 | 4096 | ||
4014 | ocfs2_adjust_rightmost_records(inode, handle, right_path, insert_rec); | 4097 | ocfs2_adjust_rightmost_records(handle, et, right_path, insert_rec); |
4015 | 4098 | ||
4016 | *ret_left_path = left_path; | 4099 | *ret_left_path = left_path; |
4017 | ret = 0; | 4100 | ret = 0; |
@@ -4022,7 +4105,7 @@ out: | |||
4022 | return ret; | 4105 | return ret; |
4023 | } | 4106 | } |
4024 | 4107 | ||
4025 | static void ocfs2_split_record(struct inode *inode, | 4108 | static void ocfs2_split_record(struct ocfs2_extent_tree *et, |
4026 | struct ocfs2_path *left_path, | 4109 | struct ocfs2_path *left_path, |
4027 | struct ocfs2_path *right_path, | 4110 | struct ocfs2_path *right_path, |
4028 | struct ocfs2_extent_rec *split_rec, | 4111 | struct ocfs2_extent_rec *split_rec, |
@@ -4095,7 +4178,8 @@ static void ocfs2_split_record(struct inode *inode, | |||
4095 | } | 4178 | } |
4096 | 4179 | ||
4097 | rec = &el->l_recs[index]; | 4180 | rec = &el->l_recs[index]; |
4098 | ocfs2_subtract_from_rec(inode->i_sb, split, rec, split_rec); | 4181 | ocfs2_subtract_from_rec(ocfs2_metadata_cache_get_super(et->et_ci), |
4182 | split, rec, split_rec); | ||
4099 | ocfs2_rotate_leaf(insert_el, split_rec); | 4183 | ocfs2_rotate_leaf(insert_el, split_rec); |
4100 | } | 4184 | } |
4101 | 4185 | ||
@@ -4107,8 +4191,8 @@ static void ocfs2_split_record(struct inode *inode, | |||
4107 | * in. left_path should only be passed in if we need to update that | 4191 | * in. left_path should only be passed in if we need to update that |
4108 | * portion of the tree after an edge insert. | 4192 | * portion of the tree after an edge insert. |
4109 | */ | 4193 | */ |
4110 | static int ocfs2_insert_path(struct inode *inode, | 4194 | static int ocfs2_insert_path(handle_t *handle, |
4111 | handle_t *handle, | 4195 | struct ocfs2_extent_tree *et, |
4112 | struct ocfs2_path *left_path, | 4196 | struct ocfs2_path *left_path, |
4113 | struct ocfs2_path *right_path, | 4197 | struct ocfs2_path *right_path, |
4114 | struct ocfs2_extent_rec *insert_rec, | 4198 | struct ocfs2_extent_rec *insert_rec, |
@@ -4134,7 +4218,7 @@ static int ocfs2_insert_path(struct inode *inode, | |||
4134 | goto out; | 4218 | goto out; |
4135 | } | 4219 | } |
4136 | 4220 | ||
4137 | ret = ocfs2_journal_access_path(inode, handle, left_path); | 4221 | ret = ocfs2_journal_access_path(et->et_ci, handle, left_path); |
4138 | if (ret < 0) { | 4222 | if (ret < 0) { |
4139 | mlog_errno(ret); | 4223 | mlog_errno(ret); |
4140 | goto out; | 4224 | goto out; |
@@ -4145,7 +4229,7 @@ static int ocfs2_insert_path(struct inode *inode, | |||
4145 | * Pass both paths to the journal. The majority of inserts | 4229 | * Pass both paths to the journal. The majority of inserts |
4146 | * will be touching all components anyway. | 4230 | * will be touching all components anyway. |
4147 | */ | 4231 | */ |
4148 | ret = ocfs2_journal_access_path(inode, handle, right_path); | 4232 | ret = ocfs2_journal_access_path(et->et_ci, handle, right_path); |
4149 | if (ret < 0) { | 4233 | if (ret < 0) { |
4150 | mlog_errno(ret); | 4234 | mlog_errno(ret); |
4151 | goto out; | 4235 | goto out; |
@@ -4157,7 +4241,7 @@ static int ocfs2_insert_path(struct inode *inode, | |||
4157 | * of splits, but it's easier to just let one separate | 4241 | * of splits, but it's easier to just let one separate |
4158 | * function sort it all out. | 4242 | * function sort it all out. |
4159 | */ | 4243 | */ |
4160 | ocfs2_split_record(inode, left_path, right_path, | 4244 | ocfs2_split_record(et, left_path, right_path, |
4161 | insert_rec, insert->ins_split); | 4245 | insert_rec, insert->ins_split); |
4162 | 4246 | ||
4163 | /* | 4247 | /* |
@@ -4171,8 +4255,8 @@ static int ocfs2_insert_path(struct inode *inode, | |||
4171 | if (ret) | 4255 | if (ret) |
4172 | mlog_errno(ret); | 4256 | mlog_errno(ret); |
4173 | } else | 4257 | } else |
4174 | ocfs2_insert_at_leaf(insert_rec, path_leaf_el(right_path), | 4258 | ocfs2_insert_at_leaf(et, insert_rec, path_leaf_el(right_path), |
4175 | insert, inode); | 4259 | insert); |
4176 | 4260 | ||
4177 | ret = ocfs2_journal_dirty(handle, leaf_bh); | 4261 | ret = ocfs2_journal_dirty(handle, leaf_bh); |
4178 | if (ret) | 4262 | if (ret) |
@@ -4185,10 +4269,10 @@ static int ocfs2_insert_path(struct inode *inode, | |||
4185 | * | 4269 | * |
4186 | * XXX: Should we extend the transaction here? | 4270 | * XXX: Should we extend the transaction here? |
4187 | */ | 4271 | */ |
4188 | subtree_index = ocfs2_find_subtree_root(inode, left_path, | 4272 | subtree_index = ocfs2_find_subtree_root(et, left_path, |
4189 | right_path); | 4273 | right_path); |
4190 | ocfs2_complete_edge_insert(inode, handle, left_path, | 4274 | ocfs2_complete_edge_insert(handle, left_path, right_path, |
4191 | right_path, subtree_index); | 4275 | subtree_index); |
4192 | } | 4276 | } |
4193 | 4277 | ||
4194 | ret = 0; | 4278 | ret = 0; |
@@ -4196,8 +4280,7 @@ out: | |||
4196 | return ret; | 4280 | return ret; |
4197 | } | 4281 | } |
4198 | 4282 | ||
4199 | static int ocfs2_do_insert_extent(struct inode *inode, | 4283 | static int ocfs2_do_insert_extent(handle_t *handle, |
4200 | handle_t *handle, | ||
4201 | struct ocfs2_extent_tree *et, | 4284 | struct ocfs2_extent_tree *et, |
4202 | struct ocfs2_extent_rec *insert_rec, | 4285 | struct ocfs2_extent_rec *insert_rec, |
4203 | struct ocfs2_insert_type *type) | 4286 | struct ocfs2_insert_type *type) |
@@ -4210,7 +4293,7 @@ static int ocfs2_do_insert_extent(struct inode *inode, | |||
4210 | 4293 | ||
4211 | el = et->et_root_el; | 4294 | el = et->et_root_el; |
4212 | 4295 | ||
4213 | ret = ocfs2_et_root_journal_access(handle, inode, et, | 4296 | ret = ocfs2_et_root_journal_access(handle, et, |
4214 | OCFS2_JOURNAL_ACCESS_WRITE); | 4297 | OCFS2_JOURNAL_ACCESS_WRITE); |
4215 | if (ret) { | 4298 | if (ret) { |
4216 | mlog_errno(ret); | 4299 | mlog_errno(ret); |
@@ -4218,7 +4301,7 @@ static int ocfs2_do_insert_extent(struct inode *inode, | |||
4218 | } | 4301 | } |
4219 | 4302 | ||
4220 | if (le16_to_cpu(el->l_tree_depth) == 0) { | 4303 | if (le16_to_cpu(el->l_tree_depth) == 0) { |
4221 | ocfs2_insert_at_leaf(insert_rec, el, type, inode); | 4304 | ocfs2_insert_at_leaf(et, insert_rec, el, type); |
4222 | goto out_update_clusters; | 4305 | goto out_update_clusters; |
4223 | } | 4306 | } |
4224 | 4307 | ||
@@ -4241,7 +4324,7 @@ static int ocfs2_do_insert_extent(struct inode *inode, | |||
4241 | cpos = UINT_MAX; | 4324 | cpos = UINT_MAX; |
4242 | } | 4325 | } |
4243 | 4326 | ||
4244 | ret = ocfs2_find_path(inode, right_path, cpos); | 4327 | ret = ocfs2_find_path(et->et_ci, right_path, cpos); |
4245 | if (ret) { | 4328 | if (ret) { |
4246 | mlog_errno(ret); | 4329 | mlog_errno(ret); |
4247 | goto out; | 4330 | goto out; |
@@ -4260,7 +4343,7 @@ static int ocfs2_do_insert_extent(struct inode *inode, | |||
4260 | * can wind up skipping both of these two special cases... | 4343 | * can wind up skipping both of these two special cases... |
4261 | */ | 4344 | */ |
4262 | if (rotate) { | 4345 | if (rotate) { |
4263 | ret = ocfs2_rotate_tree_right(inode, handle, type->ins_split, | 4346 | ret = ocfs2_rotate_tree_right(handle, et, type->ins_split, |
4264 | le32_to_cpu(insert_rec->e_cpos), | 4347 | le32_to_cpu(insert_rec->e_cpos), |
4265 | right_path, &left_path); | 4348 | right_path, &left_path); |
4266 | if (ret) { | 4349 | if (ret) { |
@@ -4272,7 +4355,7 @@ static int ocfs2_do_insert_extent(struct inode *inode, | |||
4272 | * ocfs2_rotate_tree_right() might have extended the | 4355 | * ocfs2_rotate_tree_right() might have extended the |
4273 | * transaction without re-journaling our tree root. | 4356 | * transaction without re-journaling our tree root. |
4274 | */ | 4357 | */ |
4275 | ret = ocfs2_et_root_journal_access(handle, inode, et, | 4358 | ret = ocfs2_et_root_journal_access(handle, et, |
4276 | OCFS2_JOURNAL_ACCESS_WRITE); | 4359 | OCFS2_JOURNAL_ACCESS_WRITE); |
4277 | if (ret) { | 4360 | if (ret) { |
4278 | mlog_errno(ret); | 4361 | mlog_errno(ret); |
@@ -4280,7 +4363,7 @@ static int ocfs2_do_insert_extent(struct inode *inode, | |||
4280 | } | 4363 | } |
4281 | } else if (type->ins_appending == APPEND_TAIL | 4364 | } else if (type->ins_appending == APPEND_TAIL |
4282 | && type->ins_contig != CONTIG_LEFT) { | 4365 | && type->ins_contig != CONTIG_LEFT) { |
4283 | ret = ocfs2_append_rec_to_path(inode, handle, insert_rec, | 4366 | ret = ocfs2_append_rec_to_path(handle, et, insert_rec, |
4284 | right_path, &left_path); | 4367 | right_path, &left_path); |
4285 | if (ret) { | 4368 | if (ret) { |
4286 | mlog_errno(ret); | 4369 | mlog_errno(ret); |
@@ -4288,7 +4371,7 @@ static int ocfs2_do_insert_extent(struct inode *inode, | |||
4288 | } | 4371 | } |
4289 | } | 4372 | } |
4290 | 4373 | ||
4291 | ret = ocfs2_insert_path(inode, handle, left_path, right_path, | 4374 | ret = ocfs2_insert_path(handle, et, left_path, right_path, |
4292 | insert_rec, type); | 4375 | insert_rec, type); |
4293 | if (ret) { | 4376 | if (ret) { |
4294 | mlog_errno(ret); | 4377 | mlog_errno(ret); |
@@ -4297,7 +4380,7 @@ static int ocfs2_do_insert_extent(struct inode *inode, | |||
4297 | 4380 | ||
4298 | out_update_clusters: | 4381 | out_update_clusters: |
4299 | if (type->ins_split == SPLIT_NONE) | 4382 | if (type->ins_split == SPLIT_NONE) |
4300 | ocfs2_et_update_clusters(inode, et, | 4383 | ocfs2_et_update_clusters(et, |
4301 | le16_to_cpu(insert_rec->e_leaf_clusters)); | 4384 | le16_to_cpu(insert_rec->e_leaf_clusters)); |
4302 | 4385 | ||
4303 | ret = ocfs2_journal_dirty(handle, et->et_root_bh); | 4386 | ret = ocfs2_journal_dirty(handle, et->et_root_bh); |
@@ -4312,7 +4395,8 @@ out: | |||
4312 | } | 4395 | } |
4313 | 4396 | ||
4314 | static enum ocfs2_contig_type | 4397 | static enum ocfs2_contig_type |
4315 | ocfs2_figure_merge_contig_type(struct inode *inode, struct ocfs2_path *path, | 4398 | ocfs2_figure_merge_contig_type(struct ocfs2_extent_tree *et, |
4399 | struct ocfs2_path *path, | ||
4316 | struct ocfs2_extent_list *el, int index, | 4400 | struct ocfs2_extent_list *el, int index, |
4317 | struct ocfs2_extent_rec *split_rec) | 4401 | struct ocfs2_extent_rec *split_rec) |
4318 | { | 4402 | { |
@@ -4324,12 +4408,12 @@ ocfs2_figure_merge_contig_type(struct inode *inode, struct ocfs2_path *path, | |||
4324 | struct ocfs2_path *left_path = NULL, *right_path = NULL; | 4408 | struct ocfs2_path *left_path = NULL, *right_path = NULL; |
4325 | struct buffer_head *bh; | 4409 | struct buffer_head *bh; |
4326 | struct ocfs2_extent_block *eb; | 4410 | struct ocfs2_extent_block *eb; |
4411 | struct super_block *sb = ocfs2_metadata_cache_get_super(et->et_ci); | ||
4327 | 4412 | ||
4328 | if (index > 0) { | 4413 | if (index > 0) { |
4329 | rec = &el->l_recs[index - 1]; | 4414 | rec = &el->l_recs[index - 1]; |
4330 | } else if (path->p_tree_depth > 0) { | 4415 | } else if (path->p_tree_depth > 0) { |
4331 | status = ocfs2_find_cpos_for_left_leaf(inode->i_sb, | 4416 | status = ocfs2_find_cpos_for_left_leaf(sb, path, &left_cpos); |
4332 | path, &left_cpos); | ||
4333 | if (status) | 4417 | if (status) |
4334 | goto out; | 4418 | goto out; |
4335 | 4419 | ||
@@ -4338,7 +4422,8 @@ ocfs2_figure_merge_contig_type(struct inode *inode, struct ocfs2_path *path, | |||
4338 | if (!left_path) | 4422 | if (!left_path) |
4339 | goto out; | 4423 | goto out; |
4340 | 4424 | ||
4341 | status = ocfs2_find_path(inode, left_path, left_cpos); | 4425 | status = ocfs2_find_path(et->et_ci, left_path, |
4426 | left_cpos); | ||
4342 | if (status) | 4427 | if (status) |
4343 | goto out; | 4428 | goto out; |
4344 | 4429 | ||
@@ -4348,7 +4433,7 @@ ocfs2_figure_merge_contig_type(struct inode *inode, struct ocfs2_path *path, | |||
4348 | le16_to_cpu(new_el->l_count)) { | 4433 | le16_to_cpu(new_el->l_count)) { |
4349 | bh = path_leaf_bh(left_path); | 4434 | bh = path_leaf_bh(left_path); |
4350 | eb = (struct ocfs2_extent_block *)bh->b_data; | 4435 | eb = (struct ocfs2_extent_block *)bh->b_data; |
4351 | ocfs2_error(inode->i_sb, | 4436 | ocfs2_error(sb, |
4352 | "Extent block #%llu has an " | 4437 | "Extent block #%llu has an " |
4353 | "invalid l_next_free_rec of " | 4438 | "invalid l_next_free_rec of " |
4354 | "%d. It should have " | 4439 | "%d. It should have " |
@@ -4373,7 +4458,7 @@ ocfs2_figure_merge_contig_type(struct inode *inode, struct ocfs2_path *path, | |||
4373 | if (split_rec->e_cpos == el->l_recs[index].e_cpos) | 4458 | if (split_rec->e_cpos == el->l_recs[index].e_cpos) |
4374 | ret = CONTIG_RIGHT; | 4459 | ret = CONTIG_RIGHT; |
4375 | } else { | 4460 | } else { |
4376 | ret = ocfs2_extent_contig(inode, rec, split_rec); | 4461 | ret = ocfs2_et_extent_contig(et, rec, split_rec); |
4377 | } | 4462 | } |
4378 | } | 4463 | } |
4379 | 4464 | ||
@@ -4382,8 +4467,7 @@ ocfs2_figure_merge_contig_type(struct inode *inode, struct ocfs2_path *path, | |||
4382 | rec = &el->l_recs[index + 1]; | 4467 | rec = &el->l_recs[index + 1]; |
4383 | else if (le16_to_cpu(el->l_next_free_rec) == le16_to_cpu(el->l_count) && | 4468 | else if (le16_to_cpu(el->l_next_free_rec) == le16_to_cpu(el->l_count) && |
4384 | path->p_tree_depth > 0) { | 4469 | path->p_tree_depth > 0) { |
4385 | status = ocfs2_find_cpos_for_right_leaf(inode->i_sb, | 4470 | status = ocfs2_find_cpos_for_right_leaf(sb, path, &right_cpos); |
4386 | path, &right_cpos); | ||
4387 | if (status) | 4471 | if (status) |
4388 | goto out; | 4472 | goto out; |
4389 | 4473 | ||
@@ -4394,7 +4478,7 @@ ocfs2_figure_merge_contig_type(struct inode *inode, struct ocfs2_path *path, | |||
4394 | if (!right_path) | 4478 | if (!right_path) |
4395 | goto out; | 4479 | goto out; |
4396 | 4480 | ||
4397 | status = ocfs2_find_path(inode, right_path, right_cpos); | 4481 | status = ocfs2_find_path(et->et_ci, right_path, right_cpos); |
4398 | if (status) | 4482 | if (status) |
4399 | goto out; | 4483 | goto out; |
4400 | 4484 | ||
@@ -4404,7 +4488,7 @@ ocfs2_figure_merge_contig_type(struct inode *inode, struct ocfs2_path *path, | |||
4404 | if (le16_to_cpu(new_el->l_next_free_rec) <= 1) { | 4488 | if (le16_to_cpu(new_el->l_next_free_rec) <= 1) { |
4405 | bh = path_leaf_bh(right_path); | 4489 | bh = path_leaf_bh(right_path); |
4406 | eb = (struct ocfs2_extent_block *)bh->b_data; | 4490 | eb = (struct ocfs2_extent_block *)bh->b_data; |
4407 | ocfs2_error(inode->i_sb, | 4491 | ocfs2_error(sb, |
4408 | "Extent block #%llu has an " | 4492 | "Extent block #%llu has an " |
4409 | "invalid l_next_free_rec of %d", | 4493 | "invalid l_next_free_rec of %d", |
4410 | (unsigned long long)le64_to_cpu(eb->h_blkno), | 4494 | (unsigned long long)le64_to_cpu(eb->h_blkno), |
@@ -4419,7 +4503,7 @@ ocfs2_figure_merge_contig_type(struct inode *inode, struct ocfs2_path *path, | |||
4419 | if (rec) { | 4503 | if (rec) { |
4420 | enum ocfs2_contig_type contig_type; | 4504 | enum ocfs2_contig_type contig_type; |
4421 | 4505 | ||
4422 | contig_type = ocfs2_extent_contig(inode, rec, split_rec); | 4506 | contig_type = ocfs2_et_extent_contig(et, rec, split_rec); |
4423 | 4507 | ||
4424 | if (contig_type == CONTIG_LEFT && ret == CONTIG_RIGHT) | 4508 | if (contig_type == CONTIG_LEFT && ret == CONTIG_RIGHT) |
4425 | ret = CONTIG_LEFTRIGHT; | 4509 | ret = CONTIG_LEFTRIGHT; |
@@ -4436,11 +4520,10 @@ out: | |||
4436 | return ret; | 4520 | return ret; |
4437 | } | 4521 | } |
4438 | 4522 | ||
4439 | static void ocfs2_figure_contig_type(struct inode *inode, | 4523 | static void ocfs2_figure_contig_type(struct ocfs2_extent_tree *et, |
4440 | struct ocfs2_insert_type *insert, | 4524 | struct ocfs2_insert_type *insert, |
4441 | struct ocfs2_extent_list *el, | 4525 | struct ocfs2_extent_list *el, |
4442 | struct ocfs2_extent_rec *insert_rec, | 4526 | struct ocfs2_extent_rec *insert_rec) |
4443 | struct ocfs2_extent_tree *et) | ||
4444 | { | 4527 | { |
4445 | int i; | 4528 | int i; |
4446 | enum ocfs2_contig_type contig_type = CONTIG_NONE; | 4529 | enum ocfs2_contig_type contig_type = CONTIG_NONE; |
@@ -4448,8 +4531,8 @@ static void ocfs2_figure_contig_type(struct inode *inode, | |||
4448 | BUG_ON(le16_to_cpu(el->l_tree_depth) != 0); | 4531 | BUG_ON(le16_to_cpu(el->l_tree_depth) != 0); |
4449 | 4532 | ||
4450 | for(i = 0; i < le16_to_cpu(el->l_next_free_rec); i++) { | 4533 | for(i = 0; i < le16_to_cpu(el->l_next_free_rec); i++) { |
4451 | contig_type = ocfs2_extent_contig(inode, &el->l_recs[i], | 4534 | contig_type = ocfs2_et_extent_contig(et, &el->l_recs[i], |
4452 | insert_rec); | 4535 | insert_rec); |
4453 | if (contig_type != CONTIG_NONE) { | 4536 | if (contig_type != CONTIG_NONE) { |
4454 | insert->ins_contig_index = i; | 4537 | insert->ins_contig_index = i; |
4455 | break; | 4538 | break; |
@@ -4530,8 +4613,7 @@ set_tail_append: | |||
4530 | * All of the information is stored on the ocfs2_insert_type | 4613 | * All of the information is stored on the ocfs2_insert_type |
4531 | * structure. | 4614 | * structure. |
4532 | */ | 4615 | */ |
4533 | static int ocfs2_figure_insert_type(struct inode *inode, | 4616 | static int ocfs2_figure_insert_type(struct ocfs2_extent_tree *et, |
4534 | struct ocfs2_extent_tree *et, | ||
4535 | struct buffer_head **last_eb_bh, | 4617 | struct buffer_head **last_eb_bh, |
4536 | struct ocfs2_extent_rec *insert_rec, | 4618 | struct ocfs2_extent_rec *insert_rec, |
4537 | int *free_records, | 4619 | int *free_records, |
@@ -4555,7 +4637,7 @@ static int ocfs2_figure_insert_type(struct inode *inode, | |||
4555 | * ocfs2_figure_insert_type() and ocfs2_add_branch() | 4637 | * ocfs2_figure_insert_type() and ocfs2_add_branch() |
4556 | * may want it later. | 4638 | * may want it later. |
4557 | */ | 4639 | */ |
4558 | ret = ocfs2_read_extent_block(inode, | 4640 | ret = ocfs2_read_extent_block(et->et_ci, |
4559 | ocfs2_et_get_last_eb_blk(et), | 4641 | ocfs2_et_get_last_eb_blk(et), |
4560 | &bh); | 4642 | &bh); |
4561 | if (ret) { | 4643 | if (ret) { |
@@ -4578,7 +4660,7 @@ static int ocfs2_figure_insert_type(struct inode *inode, | |||
4578 | le16_to_cpu(el->l_next_free_rec); | 4660 | le16_to_cpu(el->l_next_free_rec); |
4579 | 4661 | ||
4580 | if (!insert->ins_tree_depth) { | 4662 | if (!insert->ins_tree_depth) { |
4581 | ocfs2_figure_contig_type(inode, insert, el, insert_rec, et); | 4663 | ocfs2_figure_contig_type(et, insert, el, insert_rec); |
4582 | ocfs2_figure_appending_type(insert, el, insert_rec); | 4664 | ocfs2_figure_appending_type(insert, el, insert_rec); |
4583 | return 0; | 4665 | return 0; |
4584 | } | 4666 | } |
@@ -4596,7 +4678,7 @@ static int ocfs2_figure_insert_type(struct inode *inode, | |||
4596 | * us the rightmost tree path. This is accounted for below in | 4678 | * us the rightmost tree path. This is accounted for below in |
4597 | * the appending code. | 4679 | * the appending code. |
4598 | */ | 4680 | */ |
4599 | ret = ocfs2_find_path(inode, path, le32_to_cpu(insert_rec->e_cpos)); | 4681 | ret = ocfs2_find_path(et->et_ci, path, le32_to_cpu(insert_rec->e_cpos)); |
4600 | if (ret) { | 4682 | if (ret) { |
4601 | mlog_errno(ret); | 4683 | mlog_errno(ret); |
4602 | goto out; | 4684 | goto out; |
@@ -4612,7 +4694,7 @@ static int ocfs2_figure_insert_type(struct inode *inode, | |||
4612 | * into two types of appends: simple record append, or a | 4694 | * into two types of appends: simple record append, or a |
4613 | * rotate inside the tail leaf. | 4695 | * rotate inside the tail leaf. |
4614 | */ | 4696 | */ |
4615 | ocfs2_figure_contig_type(inode, insert, el, insert_rec, et); | 4697 | ocfs2_figure_contig_type(et, insert, el, insert_rec); |
4616 | 4698 | ||
4617 | /* | 4699 | /* |
4618 | * The insert code isn't quite ready to deal with all cases of | 4700 | * The insert code isn't quite ready to deal with all cases of |
@@ -4657,13 +4739,11 @@ out: | |||
4657 | } | 4739 | } |
4658 | 4740 | ||
4659 | /* | 4741 | /* |
4660 | * Insert an extent into an inode btree. | 4742 | * Insert an extent into a btree. |
4661 | * | 4743 | * |
4662 | * The caller needs to update fe->i_clusters | 4744 | * The caller needs to update the owning btree's cluster count. |
4663 | */ | 4745 | */ |
4664 | int ocfs2_insert_extent(struct ocfs2_super *osb, | 4746 | int ocfs2_insert_extent(handle_t *handle, |
4665 | handle_t *handle, | ||
4666 | struct inode *inode, | ||
4667 | struct ocfs2_extent_tree *et, | 4747 | struct ocfs2_extent_tree *et, |
4668 | u32 cpos, | 4748 | u32 cpos, |
4669 | u64 start_blk, | 4749 | u64 start_blk, |
@@ -4677,21 +4757,22 @@ int ocfs2_insert_extent(struct ocfs2_super *osb, | |||
4677 | struct ocfs2_insert_type insert = {0, }; | 4757 | struct ocfs2_insert_type insert = {0, }; |
4678 | struct ocfs2_extent_rec rec; | 4758 | struct ocfs2_extent_rec rec; |
4679 | 4759 | ||
4680 | mlog(0, "add %u clusters at position %u to inode %llu\n", | 4760 | mlog(0, "add %u clusters at position %u to owner %llu\n", |
4681 | new_clusters, cpos, (unsigned long long)OCFS2_I(inode)->ip_blkno); | 4761 | new_clusters, cpos, |
4762 | (unsigned long long)ocfs2_metadata_cache_owner(et->et_ci)); | ||
4682 | 4763 | ||
4683 | memset(&rec, 0, sizeof(rec)); | 4764 | memset(&rec, 0, sizeof(rec)); |
4684 | rec.e_cpos = cpu_to_le32(cpos); | 4765 | rec.e_cpos = cpu_to_le32(cpos); |
4685 | rec.e_blkno = cpu_to_le64(start_blk); | 4766 | rec.e_blkno = cpu_to_le64(start_blk); |
4686 | rec.e_leaf_clusters = cpu_to_le16(new_clusters); | 4767 | rec.e_leaf_clusters = cpu_to_le16(new_clusters); |
4687 | rec.e_flags = flags; | 4768 | rec.e_flags = flags; |
4688 | status = ocfs2_et_insert_check(inode, et, &rec); | 4769 | status = ocfs2_et_insert_check(et, &rec); |
4689 | if (status) { | 4770 | if (status) { |
4690 | mlog_errno(status); | 4771 | mlog_errno(status); |
4691 | goto bail; | 4772 | goto bail; |
4692 | } | 4773 | } |
4693 | 4774 | ||
4694 | status = ocfs2_figure_insert_type(inode, et, &last_eb_bh, &rec, | 4775 | status = ocfs2_figure_insert_type(et, &last_eb_bh, &rec, |
4695 | &free_records, &insert); | 4776 | &free_records, &insert); |
4696 | if (status < 0) { | 4777 | if (status < 0) { |
4697 | mlog_errno(status); | 4778 | mlog_errno(status); |
@@ -4705,7 +4786,7 @@ int ocfs2_insert_extent(struct ocfs2_super *osb, | |||
4705 | free_records, insert.ins_tree_depth); | 4786 | free_records, insert.ins_tree_depth); |
4706 | 4787 | ||
4707 | if (insert.ins_contig == CONTIG_NONE && free_records == 0) { | 4788 | if (insert.ins_contig == CONTIG_NONE && free_records == 0) { |
4708 | status = ocfs2_grow_tree(inode, handle, et, | 4789 | status = ocfs2_grow_tree(handle, et, |
4709 | &insert.ins_tree_depth, &last_eb_bh, | 4790 | &insert.ins_tree_depth, &last_eb_bh, |
4710 | meta_ac); | 4791 | meta_ac); |
4711 | if (status) { | 4792 | if (status) { |
@@ -4715,11 +4796,11 @@ int ocfs2_insert_extent(struct ocfs2_super *osb, | |||
4715 | } | 4796 | } |
4716 | 4797 | ||
4717 | /* Finally, we can add clusters. This might rotate the tree for us. */ | 4798 | /* Finally, we can add clusters. This might rotate the tree for us. */ |
4718 | status = ocfs2_do_insert_extent(inode, handle, et, &rec, &insert); | 4799 | status = ocfs2_do_insert_extent(handle, et, &rec, &insert); |
4719 | if (status < 0) | 4800 | if (status < 0) |
4720 | mlog_errno(status); | 4801 | mlog_errno(status); |
4721 | else if (et->et_ops == &ocfs2_dinode_et_ops) | 4802 | else |
4722 | ocfs2_extent_map_insert_rec(inode, &rec); | 4803 | ocfs2_et_extent_map_insert(et, &rec); |
4723 | 4804 | ||
4724 | bail: | 4805 | bail: |
4725 | brelse(last_eb_bh); | 4806 | brelse(last_eb_bh); |
@@ -4735,13 +4816,11 @@ bail: | |||
4735 | * it is not limited to the file storage. Any extent tree can use this | 4816 | * it is not limited to the file storage. Any extent tree can use this |
4736 | * function if it implements the proper ocfs2_extent_tree. | 4817 | * function if it implements the proper ocfs2_extent_tree. |
4737 | */ | 4818 | */ |
4738 | int ocfs2_add_clusters_in_btree(struct ocfs2_super *osb, | 4819 | int ocfs2_add_clusters_in_btree(handle_t *handle, |
4739 | struct inode *inode, | 4820 | struct ocfs2_extent_tree *et, |
4740 | u32 *logical_offset, | 4821 | u32 *logical_offset, |
4741 | u32 clusters_to_add, | 4822 | u32 clusters_to_add, |
4742 | int mark_unwritten, | 4823 | int mark_unwritten, |
4743 | struct ocfs2_extent_tree *et, | ||
4744 | handle_t *handle, | ||
4745 | struct ocfs2_alloc_context *data_ac, | 4824 | struct ocfs2_alloc_context *data_ac, |
4746 | struct ocfs2_alloc_context *meta_ac, | 4825 | struct ocfs2_alloc_context *meta_ac, |
4747 | enum ocfs2_alloc_restarted *reason_ret) | 4826 | enum ocfs2_alloc_restarted *reason_ret) |
@@ -4752,13 +4831,15 @@ int ocfs2_add_clusters_in_btree(struct ocfs2_super *osb, | |||
4752 | u32 bit_off, num_bits; | 4831 | u32 bit_off, num_bits; |
4753 | u64 block; | 4832 | u64 block; |
4754 | u8 flags = 0; | 4833 | u8 flags = 0; |
4834 | struct ocfs2_super *osb = | ||
4835 | OCFS2_SB(ocfs2_metadata_cache_get_super(et->et_ci)); | ||
4755 | 4836 | ||
4756 | BUG_ON(!clusters_to_add); | 4837 | BUG_ON(!clusters_to_add); |
4757 | 4838 | ||
4758 | if (mark_unwritten) | 4839 | if (mark_unwritten) |
4759 | flags = OCFS2_EXT_UNWRITTEN; | 4840 | flags = OCFS2_EXT_UNWRITTEN; |
4760 | 4841 | ||
4761 | free_extents = ocfs2_num_free_extents(osb, inode, et); | 4842 | free_extents = ocfs2_num_free_extents(osb, et); |
4762 | if (free_extents < 0) { | 4843 | if (free_extents < 0) { |
4763 | status = free_extents; | 4844 | status = free_extents; |
4764 | mlog_errno(status); | 4845 | mlog_errno(status); |
@@ -4795,7 +4876,7 @@ int ocfs2_add_clusters_in_btree(struct ocfs2_super *osb, | |||
4795 | BUG_ON(num_bits > clusters_to_add); | 4876 | BUG_ON(num_bits > clusters_to_add); |
4796 | 4877 | ||
4797 | /* reserve our write early -- insert_extent may update the tree root */ | 4878 | /* reserve our write early -- insert_extent may update the tree root */ |
4798 | status = ocfs2_et_root_journal_access(handle, inode, et, | 4879 | status = ocfs2_et_root_journal_access(handle, et, |
4799 | OCFS2_JOURNAL_ACCESS_WRITE); | 4880 | OCFS2_JOURNAL_ACCESS_WRITE); |
4800 | if (status < 0) { | 4881 | if (status < 0) { |
4801 | mlog_errno(status); | 4882 | mlog_errno(status); |
@@ -4803,10 +4884,10 @@ int ocfs2_add_clusters_in_btree(struct ocfs2_super *osb, | |||
4803 | } | 4884 | } |
4804 | 4885 | ||
4805 | block = ocfs2_clusters_to_blocks(osb->sb, bit_off); | 4886 | block = ocfs2_clusters_to_blocks(osb->sb, bit_off); |
4806 | mlog(0, "Allocating %u clusters at block %u for inode %llu\n", | 4887 | mlog(0, "Allocating %u clusters at block %u for owner %llu\n", |
4807 | num_bits, bit_off, (unsigned long long)OCFS2_I(inode)->ip_blkno); | 4888 | num_bits, bit_off, |
4808 | status = ocfs2_insert_extent(osb, handle, inode, et, | 4889 | (unsigned long long)ocfs2_metadata_cache_owner(et->et_ci)); |
4809 | *logical_offset, block, | 4890 | status = ocfs2_insert_extent(handle, et, *logical_offset, block, |
4810 | num_bits, flags, meta_ac); | 4891 | num_bits, flags, meta_ac); |
4811 | if (status < 0) { | 4892 | if (status < 0) { |
4812 | mlog_errno(status); | 4893 | mlog_errno(status); |
@@ -4856,10 +4937,9 @@ static void ocfs2_make_right_split_rec(struct super_block *sb, | |||
4856 | split_rec->e_flags = rec->e_flags; | 4937 | split_rec->e_flags = rec->e_flags; |
4857 | } | 4938 | } |
4858 | 4939 | ||
4859 | static int ocfs2_split_and_insert(struct inode *inode, | 4940 | static int ocfs2_split_and_insert(handle_t *handle, |
4860 | handle_t *handle, | ||
4861 | struct ocfs2_path *path, | ||
4862 | struct ocfs2_extent_tree *et, | 4941 | struct ocfs2_extent_tree *et, |
4942 | struct ocfs2_path *path, | ||
4863 | struct buffer_head **last_eb_bh, | 4943 | struct buffer_head **last_eb_bh, |
4864 | int split_index, | 4944 | int split_index, |
4865 | struct ocfs2_extent_rec *orig_split_rec, | 4945 | struct ocfs2_extent_rec *orig_split_rec, |
@@ -4892,7 +4972,7 @@ leftright: | |||
4892 | 4972 | ||
4893 | if (le16_to_cpu(rightmost_el->l_next_free_rec) == | 4973 | if (le16_to_cpu(rightmost_el->l_next_free_rec) == |
4894 | le16_to_cpu(rightmost_el->l_count)) { | 4974 | le16_to_cpu(rightmost_el->l_count)) { |
4895 | ret = ocfs2_grow_tree(inode, handle, et, | 4975 | ret = ocfs2_grow_tree(handle, et, |
4896 | &depth, last_eb_bh, meta_ac); | 4976 | &depth, last_eb_bh, meta_ac); |
4897 | if (ret) { | 4977 | if (ret) { |
4898 | mlog_errno(ret); | 4978 | mlog_errno(ret); |
@@ -4921,8 +5001,8 @@ leftright: | |||
4921 | */ | 5001 | */ |
4922 | insert.ins_split = SPLIT_RIGHT; | 5002 | insert.ins_split = SPLIT_RIGHT; |
4923 | 5003 | ||
4924 | ocfs2_make_right_split_rec(inode->i_sb, &tmprec, insert_range, | 5004 | ocfs2_make_right_split_rec(ocfs2_metadata_cache_get_super(et->et_ci), |
4925 | &rec); | 5005 | &tmprec, insert_range, &rec); |
4926 | 5006 | ||
4927 | split_rec = tmprec; | 5007 | split_rec = tmprec; |
4928 | 5008 | ||
@@ -4930,7 +5010,7 @@ leftright: | |||
4930 | do_leftright = 1; | 5010 | do_leftright = 1; |
4931 | } | 5011 | } |
4932 | 5012 | ||
4933 | ret = ocfs2_do_insert_extent(inode, handle, et, &split_rec, &insert); | 5013 | ret = ocfs2_do_insert_extent(handle, et, &split_rec, &insert); |
4934 | if (ret) { | 5014 | if (ret) { |
4935 | mlog_errno(ret); | 5015 | mlog_errno(ret); |
4936 | goto out; | 5016 | goto out; |
@@ -4946,7 +5026,7 @@ leftright: | |||
4946 | ocfs2_reinit_path(path, 1); | 5026 | ocfs2_reinit_path(path, 1); |
4947 | 5027 | ||
4948 | cpos = le32_to_cpu(split_rec.e_cpos); | 5028 | cpos = le32_to_cpu(split_rec.e_cpos); |
4949 | ret = ocfs2_find_path(inode, path, cpos); | 5029 | ret = ocfs2_find_path(et->et_ci, path, cpos); |
4950 | if (ret) { | 5030 | if (ret) { |
4951 | mlog_errno(ret); | 5031 | mlog_errno(ret); |
4952 | goto out; | 5032 | goto out; |
@@ -4961,8 +5041,8 @@ out: | |||
4961 | return ret; | 5041 | return ret; |
4962 | } | 5042 | } |
4963 | 5043 | ||
4964 | static int ocfs2_replace_extent_rec(struct inode *inode, | 5044 | static int ocfs2_replace_extent_rec(handle_t *handle, |
4965 | handle_t *handle, | 5045 | struct ocfs2_extent_tree *et, |
4966 | struct ocfs2_path *path, | 5046 | struct ocfs2_path *path, |
4967 | struct ocfs2_extent_list *el, | 5047 | struct ocfs2_extent_list *el, |
4968 | int split_index, | 5048 | int split_index, |
@@ -4970,7 +5050,7 @@ static int ocfs2_replace_extent_rec(struct inode *inode, | |||
4970 | { | 5050 | { |
4971 | int ret; | 5051 | int ret; |
4972 | 5052 | ||
4973 | ret = ocfs2_path_bh_journal_access(handle, inode, path, | 5053 | ret = ocfs2_path_bh_journal_access(handle, et->et_ci, path, |
4974 | path_num_items(path) - 1); | 5054 | path_num_items(path) - 1); |
4975 | if (ret) { | 5055 | if (ret) { |
4976 | mlog_errno(ret); | 5056 | mlog_errno(ret); |
@@ -4985,9 +5065,8 @@ out: | |||
4985 | } | 5065 | } |
4986 | 5066 | ||
4987 | /* | 5067 | /* |
4988 | * Mark part or all of the extent record at split_index in the leaf | 5068 | * Split part or all of the extent record at split_index in the leaf |
4989 | * pointed to by path as written. This removes the unwritten | 5069 | * pointed to by path. Merge with the contiguous extent record if needed. |
4990 | * extent flag. | ||
4991 | * | 5070 | * |
4992 | * Care is taken to handle contiguousness so as to not grow the tree. | 5071 | * Care is taken to handle contiguousness so as to not grow the tree. |
4993 | * | 5072 | * |
@@ -5004,14 +5083,13 @@ out: | |||
5004 | * have been brought into cache (and pinned via the journal), so the | 5083 | * have been brought into cache (and pinned via the journal), so the |
5005 | * extra overhead is not expressed in terms of disk reads. | 5084 | * extra overhead is not expressed in terms of disk reads. |
5006 | */ | 5085 | */ |
5007 | static int __ocfs2_mark_extent_written(struct inode *inode, | 5086 | int ocfs2_split_extent(handle_t *handle, |
5008 | struct ocfs2_extent_tree *et, | 5087 | struct ocfs2_extent_tree *et, |
5009 | handle_t *handle, | 5088 | struct ocfs2_path *path, |
5010 | struct ocfs2_path *path, | 5089 | int split_index, |
5011 | int split_index, | 5090 | struct ocfs2_extent_rec *split_rec, |
5012 | struct ocfs2_extent_rec *split_rec, | 5091 | struct ocfs2_alloc_context *meta_ac, |
5013 | struct ocfs2_alloc_context *meta_ac, | 5092 | struct ocfs2_cached_dealloc_ctxt *dealloc) |
5014 | struct ocfs2_cached_dealloc_ctxt *dealloc) | ||
5015 | { | 5093 | { |
5016 | int ret = 0; | 5094 | int ret = 0; |
5017 | struct ocfs2_extent_list *el = path_leaf_el(path); | 5095 | struct ocfs2_extent_list *el = path_leaf_el(path); |
@@ -5020,12 +5098,6 @@ static int __ocfs2_mark_extent_written(struct inode *inode, | |||
5020 | struct ocfs2_merge_ctxt ctxt; | 5098 | struct ocfs2_merge_ctxt ctxt; |
5021 | struct ocfs2_extent_list *rightmost_el; | 5099 | struct ocfs2_extent_list *rightmost_el; |
5022 | 5100 | ||
5023 | if (!(rec->e_flags & OCFS2_EXT_UNWRITTEN)) { | ||
5024 | ret = -EIO; | ||
5025 | mlog_errno(ret); | ||
5026 | goto out; | ||
5027 | } | ||
5028 | |||
5029 | if (le32_to_cpu(rec->e_cpos) > le32_to_cpu(split_rec->e_cpos) || | 5101 | if (le32_to_cpu(rec->e_cpos) > le32_to_cpu(split_rec->e_cpos) || |
5030 | ((le32_to_cpu(rec->e_cpos) + le16_to_cpu(rec->e_leaf_clusters)) < | 5102 | ((le32_to_cpu(rec->e_cpos) + le16_to_cpu(rec->e_leaf_clusters)) < |
5031 | (le32_to_cpu(split_rec->e_cpos) + le16_to_cpu(split_rec->e_leaf_clusters)))) { | 5103 | (le32_to_cpu(split_rec->e_cpos) + le16_to_cpu(split_rec->e_leaf_clusters)))) { |
@@ -5034,19 +5106,19 @@ static int __ocfs2_mark_extent_written(struct inode *inode, | |||
5034 | goto out; | 5106 | goto out; |
5035 | } | 5107 | } |
5036 | 5108 | ||
5037 | ctxt.c_contig_type = ocfs2_figure_merge_contig_type(inode, path, el, | 5109 | ctxt.c_contig_type = ocfs2_figure_merge_contig_type(et, path, el, |
5038 | split_index, | 5110 | split_index, |
5039 | split_rec); | 5111 | split_rec); |
5040 | 5112 | ||
5041 | /* | 5113 | /* |
5042 | * The core merge / split code wants to know how much room is | 5114 | * The core merge / split code wants to know how much room is |
5043 | * left in this inodes allocation tree, so we pass the | 5115 | * left in this allocation tree, so we pass the |
5044 | * rightmost extent list. | 5116 | * rightmost extent list. |
5045 | */ | 5117 | */ |
5046 | if (path->p_tree_depth) { | 5118 | if (path->p_tree_depth) { |
5047 | struct ocfs2_extent_block *eb; | 5119 | struct ocfs2_extent_block *eb; |
5048 | 5120 | ||
5049 | ret = ocfs2_read_extent_block(inode, | 5121 | ret = ocfs2_read_extent_block(et->et_ci, |
5050 | ocfs2_et_get_last_eb_blk(et), | 5122 | ocfs2_et_get_last_eb_blk(et), |
5051 | &last_eb_bh); | 5123 | &last_eb_bh); |
5052 | if (ret) { | 5124 | if (ret) { |
@@ -5073,19 +5145,18 @@ static int __ocfs2_mark_extent_written(struct inode *inode, | |||
5073 | 5145 | ||
5074 | if (ctxt.c_contig_type == CONTIG_NONE) { | 5146 | if (ctxt.c_contig_type == CONTIG_NONE) { |
5075 | if (ctxt.c_split_covers_rec) | 5147 | if (ctxt.c_split_covers_rec) |
5076 | ret = ocfs2_replace_extent_rec(inode, handle, | 5148 | ret = ocfs2_replace_extent_rec(handle, et, path, el, |
5077 | path, el, | ||
5078 | split_index, split_rec); | 5149 | split_index, split_rec); |
5079 | else | 5150 | else |
5080 | ret = ocfs2_split_and_insert(inode, handle, path, et, | 5151 | ret = ocfs2_split_and_insert(handle, et, path, |
5081 | &last_eb_bh, split_index, | 5152 | &last_eb_bh, split_index, |
5082 | split_rec, meta_ac); | 5153 | split_rec, meta_ac); |
5083 | if (ret) | 5154 | if (ret) |
5084 | mlog_errno(ret); | 5155 | mlog_errno(ret); |
5085 | } else { | 5156 | } else { |
5086 | ret = ocfs2_try_to_merge_extent(inode, handle, path, | 5157 | ret = ocfs2_try_to_merge_extent(handle, et, path, |
5087 | split_index, split_rec, | 5158 | split_index, split_rec, |
5088 | dealloc, &ctxt, et); | 5159 | dealloc, &ctxt); |
5089 | if (ret) | 5160 | if (ret) |
5090 | mlog_errno(ret); | 5161 | mlog_errno(ret); |
5091 | } | 5162 | } |
@@ -5096,46 +5167,31 @@ out: | |||
5096 | } | 5167 | } |
5097 | 5168 | ||
5098 | /* | 5169 | /* |
5099 | * Mark the already-existing extent at cpos as written for len clusters. | 5170 | * Change the flags of the already-existing extent at cpos for len clusters. |
5171 | * | ||
5172 | * new_flags: the flags we want to set. | ||
5173 | * clear_flags: the flags we want to clear. | ||
5174 | * phys: the new physical offset we want this new extent starts from. | ||
5100 | * | 5175 | * |
5101 | * If the existing extent is larger than the request, initiate a | 5176 | * If the existing extent is larger than the request, initiate a |
5102 | * split. An attempt will be made at merging with adjacent extents. | 5177 | * split. An attempt will be made at merging with adjacent extents. |
5103 | * | 5178 | * |
5104 | * The caller is responsible for passing down meta_ac if we'll need it. | 5179 | * The caller is responsible for passing down meta_ac if we'll need it. |
5105 | */ | 5180 | */ |
5106 | int ocfs2_mark_extent_written(struct inode *inode, | 5181 | int ocfs2_change_extent_flag(handle_t *handle, |
5107 | struct ocfs2_extent_tree *et, | 5182 | struct ocfs2_extent_tree *et, |
5108 | handle_t *handle, u32 cpos, u32 len, u32 phys, | 5183 | u32 cpos, u32 len, u32 phys, |
5109 | struct ocfs2_alloc_context *meta_ac, | 5184 | struct ocfs2_alloc_context *meta_ac, |
5110 | struct ocfs2_cached_dealloc_ctxt *dealloc) | 5185 | struct ocfs2_cached_dealloc_ctxt *dealloc, |
5186 | int new_flags, int clear_flags) | ||
5111 | { | 5187 | { |
5112 | int ret, index; | 5188 | int ret, index; |
5113 | u64 start_blkno = ocfs2_clusters_to_blocks(inode->i_sb, phys); | 5189 | struct super_block *sb = ocfs2_metadata_cache_get_super(et->et_ci); |
5190 | u64 start_blkno = ocfs2_clusters_to_blocks(sb, phys); | ||
5114 | struct ocfs2_extent_rec split_rec; | 5191 | struct ocfs2_extent_rec split_rec; |
5115 | struct ocfs2_path *left_path = NULL; | 5192 | struct ocfs2_path *left_path = NULL; |
5116 | struct ocfs2_extent_list *el; | 5193 | struct ocfs2_extent_list *el; |
5117 | 5194 | struct ocfs2_extent_rec *rec; | |
5118 | mlog(0, "Inode %lu cpos %u, len %u, phys %u (%llu)\n", | ||
5119 | inode->i_ino, cpos, len, phys, (unsigned long long)start_blkno); | ||
5120 | |||
5121 | if (!ocfs2_writes_unwritten_extents(OCFS2_SB(inode->i_sb))) { | ||
5122 | ocfs2_error(inode->i_sb, "Inode %llu has unwritten extents " | ||
5123 | "that are being written to, but the feature bit " | ||
5124 | "is not set in the super block.", | ||
5125 | (unsigned long long)OCFS2_I(inode)->ip_blkno); | ||
5126 | ret = -EROFS; | ||
5127 | goto out; | ||
5128 | } | ||
5129 | |||
5130 | /* | ||
5131 | * XXX: This should be fixed up so that we just re-insert the | ||
5132 | * next extent records. | ||
5133 | * | ||
5134 | * XXX: This is a hack on the extent tree, maybe it should be | ||
5135 | * an op? | ||
5136 | */ | ||
5137 | if (et->et_ops == &ocfs2_dinode_et_ops) | ||
5138 | ocfs2_extent_map_trunc(inode, 0); | ||
5139 | 5195 | ||
5140 | left_path = ocfs2_new_path_from_et(et); | 5196 | left_path = ocfs2_new_path_from_et(et); |
5141 | if (!left_path) { | 5197 | if (!left_path) { |
@@ -5144,7 +5200,7 @@ int ocfs2_mark_extent_written(struct inode *inode, | |||
5144 | goto out; | 5200 | goto out; |
5145 | } | 5201 | } |
5146 | 5202 | ||
5147 | ret = ocfs2_find_path(inode, left_path, cpos); | 5203 | ret = ocfs2_find_path(et->et_ci, left_path, cpos); |
5148 | if (ret) { | 5204 | if (ret) { |
5149 | mlog_errno(ret); | 5205 | mlog_errno(ret); |
5150 | goto out; | 5206 | goto out; |
@@ -5153,34 +5209,102 @@ int ocfs2_mark_extent_written(struct inode *inode, | |||
5153 | 5209 | ||
5154 | index = ocfs2_search_extent_list(el, cpos); | 5210 | index = ocfs2_search_extent_list(el, cpos); |
5155 | if (index == -1 || index >= le16_to_cpu(el->l_next_free_rec)) { | 5211 | if (index == -1 || index >= le16_to_cpu(el->l_next_free_rec)) { |
5156 | ocfs2_error(inode->i_sb, | 5212 | ocfs2_error(sb, |
5157 | "Inode %llu has an extent at cpos %u which can no " | 5213 | "Owner %llu has an extent at cpos %u which can no " |
5158 | "longer be found.\n", | 5214 | "longer be found.\n", |
5159 | (unsigned long long)OCFS2_I(inode)->ip_blkno, cpos); | 5215 | (unsigned long long) |
5216 | ocfs2_metadata_cache_owner(et->et_ci), cpos); | ||
5160 | ret = -EROFS; | 5217 | ret = -EROFS; |
5161 | goto out; | 5218 | goto out; |
5162 | } | 5219 | } |
5163 | 5220 | ||
5221 | ret = -EIO; | ||
5222 | rec = &el->l_recs[index]; | ||
5223 | if (new_flags && (rec->e_flags & new_flags)) { | ||
5224 | mlog(ML_ERROR, "Owner %llu tried to set %d flags on an " | ||
5225 | "extent that already had them", | ||
5226 | (unsigned long long)ocfs2_metadata_cache_owner(et->et_ci), | ||
5227 | new_flags); | ||
5228 | goto out; | ||
5229 | } | ||
5230 | |||
5231 | if (clear_flags && !(rec->e_flags & clear_flags)) { | ||
5232 | mlog(ML_ERROR, "Owner %llu tried to clear %d flags on an " | ||
5233 | "extent that didn't have them", | ||
5234 | (unsigned long long)ocfs2_metadata_cache_owner(et->et_ci), | ||
5235 | clear_flags); | ||
5236 | goto out; | ||
5237 | } | ||
5238 | |||
5164 | memset(&split_rec, 0, sizeof(struct ocfs2_extent_rec)); | 5239 | memset(&split_rec, 0, sizeof(struct ocfs2_extent_rec)); |
5165 | split_rec.e_cpos = cpu_to_le32(cpos); | 5240 | split_rec.e_cpos = cpu_to_le32(cpos); |
5166 | split_rec.e_leaf_clusters = cpu_to_le16(len); | 5241 | split_rec.e_leaf_clusters = cpu_to_le16(len); |
5167 | split_rec.e_blkno = cpu_to_le64(start_blkno); | 5242 | split_rec.e_blkno = cpu_to_le64(start_blkno); |
5168 | split_rec.e_flags = path_leaf_el(left_path)->l_recs[index].e_flags; | 5243 | split_rec.e_flags = rec->e_flags; |
5169 | split_rec.e_flags &= ~OCFS2_EXT_UNWRITTEN; | 5244 | if (new_flags) |
5170 | 5245 | split_rec.e_flags |= new_flags; | |
5171 | ret = __ocfs2_mark_extent_written(inode, et, handle, left_path, | 5246 | if (clear_flags) |
5172 | index, &split_rec, meta_ac, | 5247 | split_rec.e_flags &= ~clear_flags; |
5173 | dealloc); | 5248 | |
5249 | ret = ocfs2_split_extent(handle, et, left_path, | ||
5250 | index, &split_rec, meta_ac, | ||
5251 | dealloc); | ||
5174 | if (ret) | 5252 | if (ret) |
5175 | mlog_errno(ret); | 5253 | mlog_errno(ret); |
5176 | 5254 | ||
5177 | out: | 5255 | out: |
5178 | ocfs2_free_path(left_path); | 5256 | ocfs2_free_path(left_path); |
5179 | return ret; | 5257 | return ret; |
5258 | |||
5180 | } | 5259 | } |
5181 | 5260 | ||
5182 | static int ocfs2_split_tree(struct inode *inode, struct ocfs2_extent_tree *et, | 5261 | /* |
5183 | handle_t *handle, struct ocfs2_path *path, | 5262 | * Mark the already-existing extent at cpos as written for len clusters. |
5263 | * This removes the unwritten extent flag. | ||
5264 | * | ||
5265 | * If the existing extent is larger than the request, initiate a | ||
5266 | * split. An attempt will be made at merging with adjacent extents. | ||
5267 | * | ||
5268 | * The caller is responsible for passing down meta_ac if we'll need it. | ||
5269 | */ | ||
5270 | int ocfs2_mark_extent_written(struct inode *inode, | ||
5271 | struct ocfs2_extent_tree *et, | ||
5272 | handle_t *handle, u32 cpos, u32 len, u32 phys, | ||
5273 | struct ocfs2_alloc_context *meta_ac, | ||
5274 | struct ocfs2_cached_dealloc_ctxt *dealloc) | ||
5275 | { | ||
5276 | int ret; | ||
5277 | |||
5278 | mlog(0, "Inode %lu cpos %u, len %u, phys clusters %u\n", | ||
5279 | inode->i_ino, cpos, len, phys); | ||
5280 | |||
5281 | if (!ocfs2_writes_unwritten_extents(OCFS2_SB(inode->i_sb))) { | ||
5282 | ocfs2_error(inode->i_sb, "Inode %llu has unwritten extents " | ||
5283 | "that are being written to, but the feature bit " | ||
5284 | "is not set in the super block.", | ||
5285 | (unsigned long long)OCFS2_I(inode)->ip_blkno); | ||
5286 | ret = -EROFS; | ||
5287 | goto out; | ||
5288 | } | ||
5289 | |||
5290 | /* | ||
5291 | * XXX: This should be fixed up so that we just re-insert the | ||
5292 | * next extent records. | ||
5293 | */ | ||
5294 | ocfs2_et_extent_map_truncate(et, 0); | ||
5295 | |||
5296 | ret = ocfs2_change_extent_flag(handle, et, cpos, | ||
5297 | len, phys, meta_ac, dealloc, | ||
5298 | 0, OCFS2_EXT_UNWRITTEN); | ||
5299 | if (ret) | ||
5300 | mlog_errno(ret); | ||
5301 | |||
5302 | out: | ||
5303 | return ret; | ||
5304 | } | ||
5305 | |||
5306 | static int ocfs2_split_tree(handle_t *handle, struct ocfs2_extent_tree *et, | ||
5307 | struct ocfs2_path *path, | ||
5184 | int index, u32 new_range, | 5308 | int index, u32 new_range, |
5185 | struct ocfs2_alloc_context *meta_ac) | 5309 | struct ocfs2_alloc_context *meta_ac) |
5186 | { | 5310 | { |
@@ -5197,11 +5321,12 @@ static int ocfs2_split_tree(struct inode *inode, struct ocfs2_extent_tree *et, | |||
5197 | */ | 5321 | */ |
5198 | el = path_leaf_el(path); | 5322 | el = path_leaf_el(path); |
5199 | rec = &el->l_recs[index]; | 5323 | rec = &el->l_recs[index]; |
5200 | ocfs2_make_right_split_rec(inode->i_sb, &split_rec, new_range, rec); | 5324 | ocfs2_make_right_split_rec(ocfs2_metadata_cache_get_super(et->et_ci), |
5325 | &split_rec, new_range, rec); | ||
5201 | 5326 | ||
5202 | depth = path->p_tree_depth; | 5327 | depth = path->p_tree_depth; |
5203 | if (depth > 0) { | 5328 | if (depth > 0) { |
5204 | ret = ocfs2_read_extent_block(inode, | 5329 | ret = ocfs2_read_extent_block(et->et_ci, |
5205 | ocfs2_et_get_last_eb_blk(et), | 5330 | ocfs2_et_get_last_eb_blk(et), |
5206 | &last_eb_bh); | 5331 | &last_eb_bh); |
5207 | if (ret < 0) { | 5332 | if (ret < 0) { |
@@ -5224,7 +5349,7 @@ static int ocfs2_split_tree(struct inode *inode, struct ocfs2_extent_tree *et, | |||
5224 | 5349 | ||
5225 | if (le16_to_cpu(rightmost_el->l_next_free_rec) == | 5350 | if (le16_to_cpu(rightmost_el->l_next_free_rec) == |
5226 | le16_to_cpu(rightmost_el->l_count)) { | 5351 | le16_to_cpu(rightmost_el->l_count)) { |
5227 | ret = ocfs2_grow_tree(inode, handle, et, &depth, &last_eb_bh, | 5352 | ret = ocfs2_grow_tree(handle, et, &depth, &last_eb_bh, |
5228 | meta_ac); | 5353 | meta_ac); |
5229 | if (ret) { | 5354 | if (ret) { |
5230 | mlog_errno(ret); | 5355 | mlog_errno(ret); |
@@ -5238,7 +5363,7 @@ static int ocfs2_split_tree(struct inode *inode, struct ocfs2_extent_tree *et, | |||
5238 | insert.ins_split = SPLIT_RIGHT; | 5363 | insert.ins_split = SPLIT_RIGHT; |
5239 | insert.ins_tree_depth = depth; | 5364 | insert.ins_tree_depth = depth; |
5240 | 5365 | ||
5241 | ret = ocfs2_do_insert_extent(inode, handle, et, &split_rec, &insert); | 5366 | ret = ocfs2_do_insert_extent(handle, et, &split_rec, &insert); |
5242 | if (ret) | 5367 | if (ret) |
5243 | mlog_errno(ret); | 5368 | mlog_errno(ret); |
5244 | 5369 | ||
@@ -5247,23 +5372,23 @@ out: | |||
5247 | return ret; | 5372 | return ret; |
5248 | } | 5373 | } |
5249 | 5374 | ||
5250 | static int ocfs2_truncate_rec(struct inode *inode, handle_t *handle, | 5375 | static int ocfs2_truncate_rec(handle_t *handle, |
5376 | struct ocfs2_extent_tree *et, | ||
5251 | struct ocfs2_path *path, int index, | 5377 | struct ocfs2_path *path, int index, |
5252 | struct ocfs2_cached_dealloc_ctxt *dealloc, | 5378 | struct ocfs2_cached_dealloc_ctxt *dealloc, |
5253 | u32 cpos, u32 len, | 5379 | u32 cpos, u32 len) |
5254 | struct ocfs2_extent_tree *et) | ||
5255 | { | 5380 | { |
5256 | int ret; | 5381 | int ret; |
5257 | u32 left_cpos, rec_range, trunc_range; | 5382 | u32 left_cpos, rec_range, trunc_range; |
5258 | int wants_rotate = 0, is_rightmost_tree_rec = 0; | 5383 | int wants_rotate = 0, is_rightmost_tree_rec = 0; |
5259 | struct super_block *sb = inode->i_sb; | 5384 | struct super_block *sb = ocfs2_metadata_cache_get_super(et->et_ci); |
5260 | struct ocfs2_path *left_path = NULL; | 5385 | struct ocfs2_path *left_path = NULL; |
5261 | struct ocfs2_extent_list *el = path_leaf_el(path); | 5386 | struct ocfs2_extent_list *el = path_leaf_el(path); |
5262 | struct ocfs2_extent_rec *rec; | 5387 | struct ocfs2_extent_rec *rec; |
5263 | struct ocfs2_extent_block *eb; | 5388 | struct ocfs2_extent_block *eb; |
5264 | 5389 | ||
5265 | if (ocfs2_is_empty_extent(&el->l_recs[0]) && index > 0) { | 5390 | if (ocfs2_is_empty_extent(&el->l_recs[0]) && index > 0) { |
5266 | ret = ocfs2_rotate_tree_left(inode, handle, path, dealloc, et); | 5391 | ret = ocfs2_rotate_tree_left(handle, et, path, dealloc); |
5267 | if (ret) { | 5392 | if (ret) { |
5268 | mlog_errno(ret); | 5393 | mlog_errno(ret); |
5269 | goto out; | 5394 | goto out; |
@@ -5295,14 +5420,13 @@ static int ocfs2_truncate_rec(struct inode *inode, handle_t *handle, | |||
5295 | * by this leaf and the one to it's left. | 5420 | * by this leaf and the one to it's left. |
5296 | * | 5421 | * |
5297 | * There are two cases we can skip: | 5422 | * There are two cases we can skip: |
5298 | * 1) Path is the leftmost one in our inode tree. | 5423 | * 1) Path is the leftmost one in our btree. |
5299 | * 2) The leaf is rightmost and will be empty after | 5424 | * 2) The leaf is rightmost and will be empty after |
5300 | * we remove the extent record - the rotate code | 5425 | * we remove the extent record - the rotate code |
5301 | * knows how to update the newly formed edge. | 5426 | * knows how to update the newly formed edge. |
5302 | */ | 5427 | */ |
5303 | 5428 | ||
5304 | ret = ocfs2_find_cpos_for_left_leaf(inode->i_sb, path, | 5429 | ret = ocfs2_find_cpos_for_left_leaf(sb, path, &left_cpos); |
5305 | &left_cpos); | ||
5306 | if (ret) { | 5430 | if (ret) { |
5307 | mlog_errno(ret); | 5431 | mlog_errno(ret); |
5308 | goto out; | 5432 | goto out; |
@@ -5316,7 +5440,8 @@ static int ocfs2_truncate_rec(struct inode *inode, handle_t *handle, | |||
5316 | goto out; | 5440 | goto out; |
5317 | } | 5441 | } |
5318 | 5442 | ||
5319 | ret = ocfs2_find_path(inode, left_path, left_cpos); | 5443 | ret = ocfs2_find_path(et->et_ci, left_path, |
5444 | left_cpos); | ||
5320 | if (ret) { | 5445 | if (ret) { |
5321 | mlog_errno(ret); | 5446 | mlog_errno(ret); |
5322 | goto out; | 5447 | goto out; |
@@ -5332,13 +5457,13 @@ static int ocfs2_truncate_rec(struct inode *inode, handle_t *handle, | |||
5332 | goto out; | 5457 | goto out; |
5333 | } | 5458 | } |
5334 | 5459 | ||
5335 | ret = ocfs2_journal_access_path(inode, handle, path); | 5460 | ret = ocfs2_journal_access_path(et->et_ci, handle, path); |
5336 | if (ret) { | 5461 | if (ret) { |
5337 | mlog_errno(ret); | 5462 | mlog_errno(ret); |
5338 | goto out; | 5463 | goto out; |
5339 | } | 5464 | } |
5340 | 5465 | ||
5341 | ret = ocfs2_journal_access_path(inode, handle, left_path); | 5466 | ret = ocfs2_journal_access_path(et->et_ci, handle, left_path); |
5342 | if (ret) { | 5467 | if (ret) { |
5343 | mlog_errno(ret); | 5468 | mlog_errno(ret); |
5344 | goto out; | 5469 | goto out; |
@@ -5361,7 +5486,7 @@ static int ocfs2_truncate_rec(struct inode *inode, handle_t *handle, | |||
5361 | * be deleted by the rotate code. | 5486 | * be deleted by the rotate code. |
5362 | */ | 5487 | */ |
5363 | rec = &el->l_recs[next_free - 1]; | 5488 | rec = &el->l_recs[next_free - 1]; |
5364 | ocfs2_adjust_rightmost_records(inode, handle, path, | 5489 | ocfs2_adjust_rightmost_records(handle, et, path, |
5365 | rec); | 5490 | rec); |
5366 | } | 5491 | } |
5367 | } else if (le32_to_cpu(rec->e_cpos) == cpos) { | 5492 | } else if (le32_to_cpu(rec->e_cpos) == cpos) { |
@@ -5373,11 +5498,12 @@ static int ocfs2_truncate_rec(struct inode *inode, handle_t *handle, | |||
5373 | /* Remove rightmost portion of the record */ | 5498 | /* Remove rightmost portion of the record */ |
5374 | le16_add_cpu(&rec->e_leaf_clusters, -len); | 5499 | le16_add_cpu(&rec->e_leaf_clusters, -len); |
5375 | if (is_rightmost_tree_rec) | 5500 | if (is_rightmost_tree_rec) |
5376 | ocfs2_adjust_rightmost_records(inode, handle, path, rec); | 5501 | ocfs2_adjust_rightmost_records(handle, et, path, rec); |
5377 | } else { | 5502 | } else { |
5378 | /* Caller should have trapped this. */ | 5503 | /* Caller should have trapped this. */ |
5379 | mlog(ML_ERROR, "Inode %llu: Invalid record truncate: (%u, %u) " | 5504 | mlog(ML_ERROR, "Owner %llu: Invalid record truncate: (%u, %u) " |
5380 | "(%u, %u)\n", (unsigned long long)OCFS2_I(inode)->ip_blkno, | 5505 | "(%u, %u)\n", |
5506 | (unsigned long long)ocfs2_metadata_cache_owner(et->et_ci), | ||
5381 | le32_to_cpu(rec->e_cpos), | 5507 | le32_to_cpu(rec->e_cpos), |
5382 | le16_to_cpu(rec->e_leaf_clusters), cpos, len); | 5508 | le16_to_cpu(rec->e_leaf_clusters), cpos, len); |
5383 | BUG(); | 5509 | BUG(); |
@@ -5386,14 +5512,14 @@ static int ocfs2_truncate_rec(struct inode *inode, handle_t *handle, | |||
5386 | if (left_path) { | 5512 | if (left_path) { |
5387 | int subtree_index; | 5513 | int subtree_index; |
5388 | 5514 | ||
5389 | subtree_index = ocfs2_find_subtree_root(inode, left_path, path); | 5515 | subtree_index = ocfs2_find_subtree_root(et, left_path, path); |
5390 | ocfs2_complete_edge_insert(inode, handle, left_path, path, | 5516 | ocfs2_complete_edge_insert(handle, left_path, path, |
5391 | subtree_index); | 5517 | subtree_index); |
5392 | } | 5518 | } |
5393 | 5519 | ||
5394 | ocfs2_journal_dirty(handle, path_leaf_bh(path)); | 5520 | ocfs2_journal_dirty(handle, path_leaf_bh(path)); |
5395 | 5521 | ||
5396 | ret = ocfs2_rotate_tree_left(inode, handle, path, dealloc, et); | 5522 | ret = ocfs2_rotate_tree_left(handle, et, path, dealloc); |
5397 | if (ret) { | 5523 | if (ret) { |
5398 | mlog_errno(ret); | 5524 | mlog_errno(ret); |
5399 | goto out; | 5525 | goto out; |
@@ -5404,9 +5530,9 @@ out: | |||
5404 | return ret; | 5530 | return ret; |
5405 | } | 5531 | } |
5406 | 5532 | ||
5407 | int ocfs2_remove_extent(struct inode *inode, | 5533 | int ocfs2_remove_extent(handle_t *handle, |
5408 | struct ocfs2_extent_tree *et, | 5534 | struct ocfs2_extent_tree *et, |
5409 | u32 cpos, u32 len, handle_t *handle, | 5535 | u32 cpos, u32 len, |
5410 | struct ocfs2_alloc_context *meta_ac, | 5536 | struct ocfs2_alloc_context *meta_ac, |
5411 | struct ocfs2_cached_dealloc_ctxt *dealloc) | 5537 | struct ocfs2_cached_dealloc_ctxt *dealloc) |
5412 | { | 5538 | { |
@@ -5416,7 +5542,11 @@ int ocfs2_remove_extent(struct inode *inode, | |||
5416 | struct ocfs2_extent_list *el; | 5542 | struct ocfs2_extent_list *el; |
5417 | struct ocfs2_path *path = NULL; | 5543 | struct ocfs2_path *path = NULL; |
5418 | 5544 | ||
5419 | ocfs2_extent_map_trunc(inode, 0); | 5545 | /* |
5546 | * XXX: Why are we truncating to 0 instead of wherever this | ||
5547 | * affects us? | ||
5548 | */ | ||
5549 | ocfs2_et_extent_map_truncate(et, 0); | ||
5420 | 5550 | ||
5421 | path = ocfs2_new_path_from_et(et); | 5551 | path = ocfs2_new_path_from_et(et); |
5422 | if (!path) { | 5552 | if (!path) { |
@@ -5425,7 +5555,7 @@ int ocfs2_remove_extent(struct inode *inode, | |||
5425 | goto out; | 5555 | goto out; |
5426 | } | 5556 | } |
5427 | 5557 | ||
5428 | ret = ocfs2_find_path(inode, path, cpos); | 5558 | ret = ocfs2_find_path(et->et_ci, path, cpos); |
5429 | if (ret) { | 5559 | if (ret) { |
5430 | mlog_errno(ret); | 5560 | mlog_errno(ret); |
5431 | goto out; | 5561 | goto out; |
@@ -5434,10 +5564,11 @@ int ocfs2_remove_extent(struct inode *inode, | |||
5434 | el = path_leaf_el(path); | 5564 | el = path_leaf_el(path); |
5435 | index = ocfs2_search_extent_list(el, cpos); | 5565 | index = ocfs2_search_extent_list(el, cpos); |
5436 | if (index == -1 || index >= le16_to_cpu(el->l_next_free_rec)) { | 5566 | if (index == -1 || index >= le16_to_cpu(el->l_next_free_rec)) { |
5437 | ocfs2_error(inode->i_sb, | 5567 | ocfs2_error(ocfs2_metadata_cache_get_super(et->et_ci), |
5438 | "Inode %llu has an extent at cpos %u which can no " | 5568 | "Owner %llu has an extent at cpos %u which can no " |
5439 | "longer be found.\n", | 5569 | "longer be found.\n", |
5440 | (unsigned long long)OCFS2_I(inode)->ip_blkno, cpos); | 5570 | (unsigned long long)ocfs2_metadata_cache_owner(et->et_ci), |
5571 | cpos); | ||
5441 | ret = -EROFS; | 5572 | ret = -EROFS; |
5442 | goto out; | 5573 | goto out; |
5443 | } | 5574 | } |
@@ -5464,20 +5595,21 @@ int ocfs2_remove_extent(struct inode *inode, | |||
5464 | 5595 | ||
5465 | BUG_ON(cpos < le32_to_cpu(rec->e_cpos) || trunc_range > rec_range); | 5596 | BUG_ON(cpos < le32_to_cpu(rec->e_cpos) || trunc_range > rec_range); |
5466 | 5597 | ||
5467 | mlog(0, "Inode %llu, remove (cpos %u, len %u). Existing index %d " | 5598 | mlog(0, "Owner %llu, remove (cpos %u, len %u). Existing index %d " |
5468 | "(cpos %u, len %u)\n", | 5599 | "(cpos %u, len %u)\n", |
5469 | (unsigned long long)OCFS2_I(inode)->ip_blkno, cpos, len, index, | 5600 | (unsigned long long)ocfs2_metadata_cache_owner(et->et_ci), |
5601 | cpos, len, index, | ||
5470 | le32_to_cpu(rec->e_cpos), ocfs2_rec_clusters(el, rec)); | 5602 | le32_to_cpu(rec->e_cpos), ocfs2_rec_clusters(el, rec)); |
5471 | 5603 | ||
5472 | if (le32_to_cpu(rec->e_cpos) == cpos || rec_range == trunc_range) { | 5604 | if (le32_to_cpu(rec->e_cpos) == cpos || rec_range == trunc_range) { |
5473 | ret = ocfs2_truncate_rec(inode, handle, path, index, dealloc, | 5605 | ret = ocfs2_truncate_rec(handle, et, path, index, dealloc, |
5474 | cpos, len, et); | 5606 | cpos, len); |
5475 | if (ret) { | 5607 | if (ret) { |
5476 | mlog_errno(ret); | 5608 | mlog_errno(ret); |
5477 | goto out; | 5609 | goto out; |
5478 | } | 5610 | } |
5479 | } else { | 5611 | } else { |
5480 | ret = ocfs2_split_tree(inode, et, handle, path, index, | 5612 | ret = ocfs2_split_tree(handle, et, path, index, |
5481 | trunc_range, meta_ac); | 5613 | trunc_range, meta_ac); |
5482 | if (ret) { | 5614 | if (ret) { |
5483 | mlog_errno(ret); | 5615 | mlog_errno(ret); |
@@ -5490,7 +5622,7 @@ int ocfs2_remove_extent(struct inode *inode, | |||
5490 | */ | 5622 | */ |
5491 | ocfs2_reinit_path(path, 1); | 5623 | ocfs2_reinit_path(path, 1); |
5492 | 5624 | ||
5493 | ret = ocfs2_find_path(inode, path, cpos); | 5625 | ret = ocfs2_find_path(et->et_ci, path, cpos); |
5494 | if (ret) { | 5626 | if (ret) { |
5495 | mlog_errno(ret); | 5627 | mlog_errno(ret); |
5496 | goto out; | 5628 | goto out; |
@@ -5499,9 +5631,9 @@ int ocfs2_remove_extent(struct inode *inode, | |||
5499 | el = path_leaf_el(path); | 5631 | el = path_leaf_el(path); |
5500 | index = ocfs2_search_extent_list(el, cpos); | 5632 | index = ocfs2_search_extent_list(el, cpos); |
5501 | if (index == -1 || index >= le16_to_cpu(el->l_next_free_rec)) { | 5633 | if (index == -1 || index >= le16_to_cpu(el->l_next_free_rec)) { |
5502 | ocfs2_error(inode->i_sb, | 5634 | ocfs2_error(ocfs2_metadata_cache_get_super(et->et_ci), |
5503 | "Inode %llu: split at cpos %u lost record.", | 5635 | "Owner %llu: split at cpos %u lost record.", |
5504 | (unsigned long long)OCFS2_I(inode)->ip_blkno, | 5636 | (unsigned long long)ocfs2_metadata_cache_owner(et->et_ci), |
5505 | cpos); | 5637 | cpos); |
5506 | ret = -EROFS; | 5638 | ret = -EROFS; |
5507 | goto out; | 5639 | goto out; |
@@ -5515,18 +5647,18 @@ int ocfs2_remove_extent(struct inode *inode, | |||
5515 | rec_range = le32_to_cpu(rec->e_cpos) + | 5647 | rec_range = le32_to_cpu(rec->e_cpos) + |
5516 | ocfs2_rec_clusters(el, rec); | 5648 | ocfs2_rec_clusters(el, rec); |
5517 | if (rec_range != trunc_range) { | 5649 | if (rec_range != trunc_range) { |
5518 | ocfs2_error(inode->i_sb, | 5650 | ocfs2_error(ocfs2_metadata_cache_get_super(et->et_ci), |
5519 | "Inode %llu: error after split at cpos %u" | 5651 | "Owner %llu: error after split at cpos %u" |
5520 | "trunc len %u, existing record is (%u,%u)", | 5652 | "trunc len %u, existing record is (%u,%u)", |
5521 | (unsigned long long)OCFS2_I(inode)->ip_blkno, | 5653 | (unsigned long long)ocfs2_metadata_cache_owner(et->et_ci), |
5522 | cpos, len, le32_to_cpu(rec->e_cpos), | 5654 | cpos, len, le32_to_cpu(rec->e_cpos), |
5523 | ocfs2_rec_clusters(el, rec)); | 5655 | ocfs2_rec_clusters(el, rec)); |
5524 | ret = -EROFS; | 5656 | ret = -EROFS; |
5525 | goto out; | 5657 | goto out; |
5526 | } | 5658 | } |
5527 | 5659 | ||
5528 | ret = ocfs2_truncate_rec(inode, handle, path, index, dealloc, | 5660 | ret = ocfs2_truncate_rec(handle, et, path, index, dealloc, |
5529 | cpos, len, et); | 5661 | cpos, len); |
5530 | if (ret) { | 5662 | if (ret) { |
5531 | mlog_errno(ret); | 5663 | mlog_errno(ret); |
5532 | goto out; | 5664 | goto out; |
@@ -5573,7 +5705,7 @@ int ocfs2_remove_btree_range(struct inode *inode, | |||
5573 | goto out; | 5705 | goto out; |
5574 | } | 5706 | } |
5575 | 5707 | ||
5576 | ret = ocfs2_et_root_journal_access(handle, inode, et, | 5708 | ret = ocfs2_et_root_journal_access(handle, et, |
5577 | OCFS2_JOURNAL_ACCESS_WRITE); | 5709 | OCFS2_JOURNAL_ACCESS_WRITE); |
5578 | if (ret) { | 5710 | if (ret) { |
5579 | mlog_errno(ret); | 5711 | mlog_errno(ret); |
@@ -5583,14 +5715,13 @@ int ocfs2_remove_btree_range(struct inode *inode, | |||
5583 | vfs_dq_free_space_nodirty(inode, | 5715 | vfs_dq_free_space_nodirty(inode, |
5584 | ocfs2_clusters_to_bytes(inode->i_sb, len)); | 5716 | ocfs2_clusters_to_bytes(inode->i_sb, len)); |
5585 | 5717 | ||
5586 | ret = ocfs2_remove_extent(inode, et, cpos, len, handle, meta_ac, | 5718 | ret = ocfs2_remove_extent(handle, et, cpos, len, meta_ac, dealloc); |
5587 | dealloc); | ||
5588 | if (ret) { | 5719 | if (ret) { |
5589 | mlog_errno(ret); | 5720 | mlog_errno(ret); |
5590 | goto out_commit; | 5721 | goto out_commit; |
5591 | } | 5722 | } |
5592 | 5723 | ||
5593 | ocfs2_et_update_clusters(inode, et, -len); | 5724 | ocfs2_et_update_clusters(et, -len); |
5594 | 5725 | ||
5595 | ret = ocfs2_journal_dirty(handle, et->et_root_bh); | 5726 | ret = ocfs2_journal_dirty(handle, et->et_root_bh); |
5596 | if (ret) { | 5727 | if (ret) { |
@@ -5690,7 +5821,7 @@ int ocfs2_truncate_log_append(struct ocfs2_super *osb, | |||
5690 | goto bail; | 5821 | goto bail; |
5691 | } | 5822 | } |
5692 | 5823 | ||
5693 | status = ocfs2_journal_access_di(handle, tl_inode, tl_bh, | 5824 | status = ocfs2_journal_access_di(handle, INODE_CACHE(tl_inode), tl_bh, |
5694 | OCFS2_JOURNAL_ACCESS_WRITE); | 5825 | OCFS2_JOURNAL_ACCESS_WRITE); |
5695 | if (status < 0) { | 5826 | if (status < 0) { |
5696 | mlog_errno(status); | 5827 | mlog_errno(status); |
@@ -5752,7 +5883,7 @@ static int ocfs2_replay_truncate_records(struct ocfs2_super *osb, | |||
5752 | while (i >= 0) { | 5883 | while (i >= 0) { |
5753 | /* Caller has given us at least enough credits to | 5884 | /* Caller has given us at least enough credits to |
5754 | * update the truncate log dinode */ | 5885 | * update the truncate log dinode */ |
5755 | status = ocfs2_journal_access_di(handle, tl_inode, tl_bh, | 5886 | status = ocfs2_journal_access_di(handle, INODE_CACHE(tl_inode), tl_bh, |
5756 | OCFS2_JOURNAL_ACCESS_WRITE); | 5887 | OCFS2_JOURNAL_ACCESS_WRITE); |
5757 | if (status < 0) { | 5888 | if (status < 0) { |
5758 | mlog_errno(status); | 5889 | mlog_errno(status); |
@@ -6010,7 +6141,7 @@ int ocfs2_begin_truncate_log_recovery(struct ocfs2_super *osb, | |||
6010 | tl->tl_used = 0; | 6141 | tl->tl_used = 0; |
6011 | 6142 | ||
6012 | ocfs2_compute_meta_ecc(osb->sb, tl_bh->b_data, &di->i_check); | 6143 | ocfs2_compute_meta_ecc(osb->sb, tl_bh->b_data, &di->i_check); |
6013 | status = ocfs2_write_block(osb, tl_bh, tl_inode); | 6144 | status = ocfs2_write_block(osb, tl_bh, INODE_CACHE(tl_inode)); |
6014 | if (status < 0) { | 6145 | if (status < 0) { |
6015 | mlog_errno(status); | 6146 | mlog_errno(status); |
6016 | goto bail; | 6147 | goto bail; |
@@ -6400,9 +6531,9 @@ ocfs2_find_per_slot_free_list(int type, | |||
6400 | return fl; | 6531 | return fl; |
6401 | } | 6532 | } |
6402 | 6533 | ||
6403 | static int ocfs2_cache_block_dealloc(struct ocfs2_cached_dealloc_ctxt *ctxt, | 6534 | int ocfs2_cache_block_dealloc(struct ocfs2_cached_dealloc_ctxt *ctxt, |
6404 | int type, int slot, u64 blkno, | 6535 | int type, int slot, u64 blkno, |
6405 | unsigned int bit) | 6536 | unsigned int bit) |
6406 | { | 6537 | { |
6407 | int ret; | 6538 | int ret; |
6408 | struct ocfs2_per_slot_free_list *fl; | 6539 | struct ocfs2_per_slot_free_list *fl; |
@@ -6518,7 +6649,7 @@ static int ocfs2_find_new_last_ext_blk(struct inode *inode, | |||
6518 | goto out; | 6649 | goto out; |
6519 | } | 6650 | } |
6520 | 6651 | ||
6521 | ret = ocfs2_find_leaf(inode, path_root_el(path), cpos, &bh); | 6652 | ret = ocfs2_find_leaf(INODE_CACHE(inode), path_root_el(path), cpos, &bh); |
6522 | if (ret) { | 6653 | if (ret) { |
6523 | mlog_errno(ret); | 6654 | mlog_errno(ret); |
6524 | goto out; | 6655 | goto out; |
@@ -6551,7 +6682,7 @@ out: | |||
6551 | */ | 6682 | */ |
6552 | static int ocfs2_trim_tree(struct inode *inode, struct ocfs2_path *path, | 6683 | static int ocfs2_trim_tree(struct inode *inode, struct ocfs2_path *path, |
6553 | handle_t *handle, struct ocfs2_truncate_context *tc, | 6684 | handle_t *handle, struct ocfs2_truncate_context *tc, |
6554 | u32 clusters_to_del, u64 *delete_start) | 6685 | u32 clusters_to_del, u64 *delete_start, u8 *flags) |
6555 | { | 6686 | { |
6556 | int ret, i, index = path->p_tree_depth; | 6687 | int ret, i, index = path->p_tree_depth; |
6557 | u32 new_edge = 0; | 6688 | u32 new_edge = 0; |
@@ -6561,6 +6692,7 @@ static int ocfs2_trim_tree(struct inode *inode, struct ocfs2_path *path, | |||
6561 | struct ocfs2_extent_rec *rec; | 6692 | struct ocfs2_extent_rec *rec; |
6562 | 6693 | ||
6563 | *delete_start = 0; | 6694 | *delete_start = 0; |
6695 | *flags = 0; | ||
6564 | 6696 | ||
6565 | while (index >= 0) { | 6697 | while (index >= 0) { |
6566 | bh = path->p_node[index].bh; | 6698 | bh = path->p_node[index].bh; |
@@ -6648,6 +6780,7 @@ find_tail_record: | |||
6648 | *delete_start = le64_to_cpu(rec->e_blkno) | 6780 | *delete_start = le64_to_cpu(rec->e_blkno) |
6649 | + ocfs2_clusters_to_blocks(inode->i_sb, | 6781 | + ocfs2_clusters_to_blocks(inode->i_sb, |
6650 | le16_to_cpu(rec->e_leaf_clusters)); | 6782 | le16_to_cpu(rec->e_leaf_clusters)); |
6783 | *flags = rec->e_flags; | ||
6651 | 6784 | ||
6652 | /* | 6785 | /* |
6653 | * If it's now empty, remove this record. | 6786 | * If it's now empty, remove this record. |
@@ -6719,7 +6852,7 @@ delete: | |||
6719 | 6852 | ||
6720 | mlog(0, "deleting this extent block.\n"); | 6853 | mlog(0, "deleting this extent block.\n"); |
6721 | 6854 | ||
6722 | ocfs2_remove_from_cache(inode, bh); | 6855 | ocfs2_remove_from_cache(INODE_CACHE(inode), bh); |
6723 | 6856 | ||
6724 | BUG_ON(ocfs2_rec_clusters(el, &el->l_recs[0])); | 6857 | BUG_ON(ocfs2_rec_clusters(el, &el->l_recs[0])); |
6725 | BUG_ON(le32_to_cpu(el->l_recs[0].e_cpos)); | 6858 | BUG_ON(le32_to_cpu(el->l_recs[0].e_cpos)); |
@@ -6747,7 +6880,8 @@ static int ocfs2_do_truncate(struct ocfs2_super *osb, | |||
6747 | struct buffer_head *fe_bh, | 6880 | struct buffer_head *fe_bh, |
6748 | handle_t *handle, | 6881 | handle_t *handle, |
6749 | struct ocfs2_truncate_context *tc, | 6882 | struct ocfs2_truncate_context *tc, |
6750 | struct ocfs2_path *path) | 6883 | struct ocfs2_path *path, |
6884 | struct ocfs2_alloc_context *meta_ac) | ||
6751 | { | 6885 | { |
6752 | int status; | 6886 | int status; |
6753 | struct ocfs2_dinode *fe; | 6887 | struct ocfs2_dinode *fe; |
@@ -6755,6 +6889,7 @@ static int ocfs2_do_truncate(struct ocfs2_super *osb, | |||
6755 | struct ocfs2_extent_list *el; | 6889 | struct ocfs2_extent_list *el; |
6756 | struct buffer_head *last_eb_bh = NULL; | 6890 | struct buffer_head *last_eb_bh = NULL; |
6757 | u64 delete_blk = 0; | 6891 | u64 delete_blk = 0; |
6892 | u8 rec_flags; | ||
6758 | 6893 | ||
6759 | fe = (struct ocfs2_dinode *) fe_bh->b_data; | 6894 | fe = (struct ocfs2_dinode *) fe_bh->b_data; |
6760 | 6895 | ||
@@ -6769,14 +6904,14 @@ static int ocfs2_do_truncate(struct ocfs2_super *osb, | |||
6769 | * Each component will be touched, so we might as well journal | 6904 | * Each component will be touched, so we might as well journal |
6770 | * here to avoid having to handle errors later. | 6905 | * here to avoid having to handle errors later. |
6771 | */ | 6906 | */ |
6772 | status = ocfs2_journal_access_path(inode, handle, path); | 6907 | status = ocfs2_journal_access_path(INODE_CACHE(inode), handle, path); |
6773 | if (status < 0) { | 6908 | if (status < 0) { |
6774 | mlog_errno(status); | 6909 | mlog_errno(status); |
6775 | goto bail; | 6910 | goto bail; |
6776 | } | 6911 | } |
6777 | 6912 | ||
6778 | if (last_eb_bh) { | 6913 | if (last_eb_bh) { |
6779 | status = ocfs2_journal_access_eb(handle, inode, last_eb_bh, | 6914 | status = ocfs2_journal_access_eb(handle, INODE_CACHE(inode), last_eb_bh, |
6780 | OCFS2_JOURNAL_ACCESS_WRITE); | 6915 | OCFS2_JOURNAL_ACCESS_WRITE); |
6781 | if (status < 0) { | 6916 | if (status < 0) { |
6782 | mlog_errno(status); | 6917 | mlog_errno(status); |
@@ -6810,7 +6945,7 @@ static int ocfs2_do_truncate(struct ocfs2_super *osb, | |||
6810 | inode->i_blocks = ocfs2_inode_sector_count(inode); | 6945 | inode->i_blocks = ocfs2_inode_sector_count(inode); |
6811 | 6946 | ||
6812 | status = ocfs2_trim_tree(inode, path, handle, tc, | 6947 | status = ocfs2_trim_tree(inode, path, handle, tc, |
6813 | clusters_to_del, &delete_blk); | 6948 | clusters_to_del, &delete_blk, &rec_flags); |
6814 | if (status) { | 6949 | if (status) { |
6815 | mlog_errno(status); | 6950 | mlog_errno(status); |
6816 | goto bail; | 6951 | goto bail; |
@@ -6842,8 +6977,16 @@ static int ocfs2_do_truncate(struct ocfs2_super *osb, | |||
6842 | } | 6977 | } |
6843 | 6978 | ||
6844 | if (delete_blk) { | 6979 | if (delete_blk) { |
6845 | status = ocfs2_truncate_log_append(osb, handle, delete_blk, | 6980 | if (rec_flags & OCFS2_EXT_REFCOUNTED) |
6846 | clusters_to_del); | 6981 | status = ocfs2_decrease_refcount(inode, handle, |
6982 | ocfs2_blocks_to_clusters(osb->sb, | ||
6983 | delete_blk), | ||
6984 | clusters_to_del, meta_ac, | ||
6985 | &tc->tc_dealloc, 1); | ||
6986 | else | ||
6987 | status = ocfs2_truncate_log_append(osb, handle, | ||
6988 | delete_blk, | ||
6989 | clusters_to_del); | ||
6847 | if (status < 0) { | 6990 | if (status < 0) { |
6848 | mlog_errno(status); | 6991 | mlog_errno(status); |
6849 | goto bail; | 6992 | goto bail; |
@@ -6863,9 +7006,9 @@ static int ocfs2_zero_func(handle_t *handle, struct buffer_head *bh) | |||
6863 | return 0; | 7006 | return 0; |
6864 | } | 7007 | } |
6865 | 7008 | ||
6866 | static void ocfs2_map_and_dirty_page(struct inode *inode, handle_t *handle, | 7009 | void ocfs2_map_and_dirty_page(struct inode *inode, handle_t *handle, |
6867 | unsigned int from, unsigned int to, | 7010 | unsigned int from, unsigned int to, |
6868 | struct page *page, int zero, u64 *phys) | 7011 | struct page *page, int zero, u64 *phys) |
6869 | { | 7012 | { |
6870 | int ret, partial = 0; | 7013 | int ret, partial = 0; |
6871 | 7014 | ||
@@ -6933,20 +7076,16 @@ out: | |||
6933 | ocfs2_unlock_and_free_pages(pages, numpages); | 7076 | ocfs2_unlock_and_free_pages(pages, numpages); |
6934 | } | 7077 | } |
6935 | 7078 | ||
6936 | static int ocfs2_grab_eof_pages(struct inode *inode, loff_t start, loff_t end, | 7079 | int ocfs2_grab_pages(struct inode *inode, loff_t start, loff_t end, |
6937 | struct page **pages, int *num) | 7080 | struct page **pages, int *num) |
6938 | { | 7081 | { |
6939 | int numpages, ret = 0; | 7082 | int numpages, ret = 0; |
6940 | struct super_block *sb = inode->i_sb; | ||
6941 | struct address_space *mapping = inode->i_mapping; | 7083 | struct address_space *mapping = inode->i_mapping; |
6942 | unsigned long index; | 7084 | unsigned long index; |
6943 | loff_t last_page_bytes; | 7085 | loff_t last_page_bytes; |
6944 | 7086 | ||
6945 | BUG_ON(start > end); | 7087 | BUG_ON(start > end); |
6946 | 7088 | ||
6947 | BUG_ON(start >> OCFS2_SB(sb)->s_clustersize_bits != | ||
6948 | (end - 1) >> OCFS2_SB(sb)->s_clustersize_bits); | ||
6949 | |||
6950 | numpages = 0; | 7089 | numpages = 0; |
6951 | last_page_bytes = PAGE_ALIGN(end); | 7090 | last_page_bytes = PAGE_ALIGN(end); |
6952 | index = start >> PAGE_CACHE_SHIFT; | 7091 | index = start >> PAGE_CACHE_SHIFT; |
@@ -6974,6 +7113,17 @@ out: | |||
6974 | return ret; | 7113 | return ret; |
6975 | } | 7114 | } |
6976 | 7115 | ||
7116 | static int ocfs2_grab_eof_pages(struct inode *inode, loff_t start, loff_t end, | ||
7117 | struct page **pages, int *num) | ||
7118 | { | ||
7119 | struct super_block *sb = inode->i_sb; | ||
7120 | |||
7121 | BUG_ON(start >> OCFS2_SB(sb)->s_clustersize_bits != | ||
7122 | (end - 1) >> OCFS2_SB(sb)->s_clustersize_bits); | ||
7123 | |||
7124 | return ocfs2_grab_pages(inode, start, end, pages, num); | ||
7125 | } | ||
7126 | |||
6977 | /* | 7127 | /* |
6978 | * Zero the area past i_size but still within an allocated | 7128 | * Zero the area past i_size but still within an allocated |
6979 | * cluster. This avoids exposing nonzero data on subsequent file | 7129 | * cluster. This avoids exposing nonzero data on subsequent file |
@@ -7138,7 +7288,7 @@ int ocfs2_convert_inline_data_to_extents(struct inode *inode, | |||
7138 | goto out_unlock; | 7288 | goto out_unlock; |
7139 | } | 7289 | } |
7140 | 7290 | ||
7141 | ret = ocfs2_journal_access_di(handle, inode, di_bh, | 7291 | ret = ocfs2_journal_access_di(handle, INODE_CACHE(inode), di_bh, |
7142 | OCFS2_JOURNAL_ACCESS_WRITE); | 7292 | OCFS2_JOURNAL_ACCESS_WRITE); |
7143 | if (ret) { | 7293 | if (ret) { |
7144 | mlog_errno(ret); | 7294 | mlog_errno(ret); |
@@ -7218,9 +7368,8 @@ int ocfs2_convert_inline_data_to_extents(struct inode *inode, | |||
7218 | * this proves to be false, we could always re-build | 7368 | * this proves to be false, we could always re-build |
7219 | * the in-inode data from our pages. | 7369 | * the in-inode data from our pages. |
7220 | */ | 7370 | */ |
7221 | ocfs2_init_dinode_extent_tree(&et, inode, di_bh); | 7371 | ocfs2_init_dinode_extent_tree(&et, INODE_CACHE(inode), di_bh); |
7222 | ret = ocfs2_insert_extent(osb, handle, inode, &et, | 7372 | ret = ocfs2_insert_extent(handle, &et, 0, block, 1, 0, NULL); |
7223 | 0, block, 1, 0, NULL); | ||
7224 | if (ret) { | 7373 | if (ret) { |
7225 | mlog_errno(ret); | 7374 | mlog_errno(ret); |
7226 | goto out_commit; | 7375 | goto out_commit; |
@@ -7262,11 +7411,14 @@ int ocfs2_commit_truncate(struct ocfs2_super *osb, | |||
7262 | { | 7411 | { |
7263 | int status, i, credits, tl_sem = 0; | 7412 | int status, i, credits, tl_sem = 0; |
7264 | u32 clusters_to_del, new_highest_cpos, range; | 7413 | u32 clusters_to_del, new_highest_cpos, range; |
7414 | u64 blkno = 0; | ||
7265 | struct ocfs2_extent_list *el; | 7415 | struct ocfs2_extent_list *el; |
7266 | handle_t *handle = NULL; | 7416 | handle_t *handle = NULL; |
7267 | struct inode *tl_inode = osb->osb_tl_inode; | 7417 | struct inode *tl_inode = osb->osb_tl_inode; |
7268 | struct ocfs2_path *path = NULL; | 7418 | struct ocfs2_path *path = NULL; |
7269 | struct ocfs2_dinode *di = (struct ocfs2_dinode *)fe_bh->b_data; | 7419 | struct ocfs2_dinode *di = (struct ocfs2_dinode *)fe_bh->b_data; |
7420 | struct ocfs2_alloc_context *meta_ac = NULL; | ||
7421 | struct ocfs2_refcount_tree *ref_tree = NULL; | ||
7270 | 7422 | ||
7271 | mlog_entry_void(); | 7423 | mlog_entry_void(); |
7272 | 7424 | ||
@@ -7292,10 +7444,12 @@ start: | |||
7292 | goto bail; | 7444 | goto bail; |
7293 | } | 7445 | } |
7294 | 7446 | ||
7447 | credits = 0; | ||
7448 | |||
7295 | /* | 7449 | /* |
7296 | * Truncate always works against the rightmost tree branch. | 7450 | * Truncate always works against the rightmost tree branch. |
7297 | */ | 7451 | */ |
7298 | status = ocfs2_find_path(inode, path, UINT_MAX); | 7452 | status = ocfs2_find_path(INODE_CACHE(inode), path, UINT_MAX); |
7299 | if (status) { | 7453 | if (status) { |
7300 | mlog_errno(status); | 7454 | mlog_errno(status); |
7301 | goto bail; | 7455 | goto bail; |
@@ -7332,10 +7486,15 @@ start: | |||
7332 | clusters_to_del = 0; | 7486 | clusters_to_del = 0; |
7333 | } else if (le32_to_cpu(el->l_recs[i].e_cpos) >= new_highest_cpos) { | 7487 | } else if (le32_to_cpu(el->l_recs[i].e_cpos) >= new_highest_cpos) { |
7334 | clusters_to_del = ocfs2_rec_clusters(el, &el->l_recs[i]); | 7488 | clusters_to_del = ocfs2_rec_clusters(el, &el->l_recs[i]); |
7489 | blkno = le64_to_cpu(el->l_recs[i].e_blkno); | ||
7335 | } else if (range > new_highest_cpos) { | 7490 | } else if (range > new_highest_cpos) { |
7336 | clusters_to_del = (ocfs2_rec_clusters(el, &el->l_recs[i]) + | 7491 | clusters_to_del = (ocfs2_rec_clusters(el, &el->l_recs[i]) + |
7337 | le32_to_cpu(el->l_recs[i].e_cpos)) - | 7492 | le32_to_cpu(el->l_recs[i].e_cpos)) - |
7338 | new_highest_cpos; | 7493 | new_highest_cpos; |
7494 | blkno = le64_to_cpu(el->l_recs[i].e_blkno) + | ||
7495 | ocfs2_clusters_to_blocks(inode->i_sb, | ||
7496 | ocfs2_rec_clusters(el, &el->l_recs[i]) - | ||
7497 | clusters_to_del); | ||
7339 | } else { | 7498 | } else { |
7340 | status = 0; | 7499 | status = 0; |
7341 | goto bail; | 7500 | goto bail; |
@@ -7344,6 +7503,29 @@ start: | |||
7344 | mlog(0, "clusters_to_del = %u in this pass, tail blk=%llu\n", | 7503 | mlog(0, "clusters_to_del = %u in this pass, tail blk=%llu\n", |
7345 | clusters_to_del, (unsigned long long)path_leaf_bh(path)->b_blocknr); | 7504 | clusters_to_del, (unsigned long long)path_leaf_bh(path)->b_blocknr); |
7346 | 7505 | ||
7506 | if (el->l_recs[i].e_flags & OCFS2_EXT_REFCOUNTED && clusters_to_del) { | ||
7507 | BUG_ON(!(OCFS2_I(inode)->ip_dyn_features & | ||
7508 | OCFS2_HAS_REFCOUNT_FL)); | ||
7509 | |||
7510 | status = ocfs2_lock_refcount_tree(osb, | ||
7511 | le64_to_cpu(di->i_refcount_loc), | ||
7512 | 1, &ref_tree, NULL); | ||
7513 | if (status) { | ||
7514 | mlog_errno(status); | ||
7515 | goto bail; | ||
7516 | } | ||
7517 | |||
7518 | status = ocfs2_prepare_refcount_change_for_del(inode, fe_bh, | ||
7519 | blkno, | ||
7520 | clusters_to_del, | ||
7521 | &credits, | ||
7522 | &meta_ac); | ||
7523 | if (status < 0) { | ||
7524 | mlog_errno(status); | ||
7525 | goto bail; | ||
7526 | } | ||
7527 | } | ||
7528 | |||
7347 | mutex_lock(&tl_inode->i_mutex); | 7529 | mutex_lock(&tl_inode->i_mutex); |
7348 | tl_sem = 1; | 7530 | tl_sem = 1; |
7349 | /* ocfs2_truncate_log_needs_flush guarantees us at least one | 7531 | /* ocfs2_truncate_log_needs_flush guarantees us at least one |
@@ -7357,7 +7539,7 @@ start: | |||
7357 | } | 7539 | } |
7358 | } | 7540 | } |
7359 | 7541 | ||
7360 | credits = ocfs2_calc_tree_trunc_credits(osb->sb, clusters_to_del, | 7542 | credits += ocfs2_calc_tree_trunc_credits(osb->sb, clusters_to_del, |
7361 | (struct ocfs2_dinode *)fe_bh->b_data, | 7543 | (struct ocfs2_dinode *)fe_bh->b_data, |
7362 | el); | 7544 | el); |
7363 | handle = ocfs2_start_trans(osb, credits); | 7545 | handle = ocfs2_start_trans(osb, credits); |
@@ -7369,7 +7551,7 @@ start: | |||
7369 | } | 7551 | } |
7370 | 7552 | ||
7371 | status = ocfs2_do_truncate(osb, clusters_to_del, inode, fe_bh, handle, | 7553 | status = ocfs2_do_truncate(osb, clusters_to_del, inode, fe_bh, handle, |
7372 | tc, path); | 7554 | tc, path, meta_ac); |
7373 | if (status < 0) { | 7555 | if (status < 0) { |
7374 | mlog_errno(status); | 7556 | mlog_errno(status); |
7375 | goto bail; | 7557 | goto bail; |
@@ -7383,6 +7565,16 @@ start: | |||
7383 | 7565 | ||
7384 | ocfs2_reinit_path(path, 1); | 7566 | ocfs2_reinit_path(path, 1); |
7385 | 7567 | ||
7568 | if (meta_ac) { | ||
7569 | ocfs2_free_alloc_context(meta_ac); | ||
7570 | meta_ac = NULL; | ||
7571 | } | ||
7572 | |||
7573 | if (ref_tree) { | ||
7574 | ocfs2_unlock_refcount_tree(osb, ref_tree, 1); | ||
7575 | ref_tree = NULL; | ||
7576 | } | ||
7577 | |||
7386 | /* | 7578 | /* |
7387 | * The check above will catch the case where we've truncated | 7579 | * The check above will catch the case where we've truncated |
7388 | * away all allocation. | 7580 | * away all allocation. |
@@ -7399,6 +7591,12 @@ bail: | |||
7399 | if (handle) | 7591 | if (handle) |
7400 | ocfs2_commit_trans(osb, handle); | 7592 | ocfs2_commit_trans(osb, handle); |
7401 | 7593 | ||
7594 | if (meta_ac) | ||
7595 | ocfs2_free_alloc_context(meta_ac); | ||
7596 | |||
7597 | if (ref_tree) | ||
7598 | ocfs2_unlock_refcount_tree(osb, ref_tree, 1); | ||
7599 | |||
7402 | ocfs2_run_deallocs(osb, &tc->tc_dealloc); | 7600 | ocfs2_run_deallocs(osb, &tc->tc_dealloc); |
7403 | 7601 | ||
7404 | ocfs2_free_path(path); | 7602 | ocfs2_free_path(path); |
@@ -7445,7 +7643,7 @@ int ocfs2_prepare_truncate(struct ocfs2_super *osb, | |||
7445 | ocfs2_init_dealloc_ctxt(&(*tc)->tc_dealloc); | 7643 | ocfs2_init_dealloc_ctxt(&(*tc)->tc_dealloc); |
7446 | 7644 | ||
7447 | if (fe->id2.i_list.l_tree_depth) { | 7645 | if (fe->id2.i_list.l_tree_depth) { |
7448 | status = ocfs2_read_extent_block(inode, | 7646 | status = ocfs2_read_extent_block(INODE_CACHE(inode), |
7449 | le64_to_cpu(fe->i_last_eb_blk), | 7647 | le64_to_cpu(fe->i_last_eb_blk), |
7450 | &last_eb_bh); | 7648 | &last_eb_bh); |
7451 | if (status < 0) { | 7649 | if (status < 0) { |
@@ -7507,7 +7705,7 @@ int ocfs2_truncate_inline(struct inode *inode, struct buffer_head *di_bh, | |||
7507 | goto out; | 7705 | goto out; |
7508 | } | 7706 | } |
7509 | 7707 | ||
7510 | ret = ocfs2_journal_access_di(handle, inode, di_bh, | 7708 | ret = ocfs2_journal_access_di(handle, INODE_CACHE(inode), di_bh, |
7511 | OCFS2_JOURNAL_ACCESS_WRITE); | 7709 | OCFS2_JOURNAL_ACCESS_WRITE); |
7512 | if (ret) { | 7710 | if (ret) { |
7513 | mlog_errno(ret); | 7711 | mlog_errno(ret); |
diff --git a/fs/ocfs2/alloc.h b/fs/ocfs2/alloc.h index 353254ba29e1..9c122d574464 100644 --- a/fs/ocfs2/alloc.h +++ b/fs/ocfs2/alloc.h | |||
@@ -45,7 +45,8 @@ | |||
45 | * | 45 | * |
46 | * ocfs2_extent_tree contains info for the root of the b-tree, it must have a | 46 | * ocfs2_extent_tree contains info for the root of the b-tree, it must have a |
47 | * root ocfs2_extent_list and a root_bh so that they can be used in the b-tree | 47 | * root ocfs2_extent_list and a root_bh so that they can be used in the b-tree |
48 | * functions. With metadata ecc, we now call different journal_access | 48 | * functions. It needs the ocfs2_caching_info structure associated with |
49 | * I/O on the tree. With metadata ecc, we now call different journal_access | ||
49 | * functions for each type of metadata, so it must have the | 50 | * functions for each type of metadata, so it must have the |
50 | * root_journal_access function. | 51 | * root_journal_access function. |
51 | * ocfs2_extent_tree_operations abstract the normal operations we do for | 52 | * ocfs2_extent_tree_operations abstract the normal operations we do for |
@@ -56,6 +57,7 @@ struct ocfs2_extent_tree { | |||
56 | struct ocfs2_extent_tree_operations *et_ops; | 57 | struct ocfs2_extent_tree_operations *et_ops; |
57 | struct buffer_head *et_root_bh; | 58 | struct buffer_head *et_root_bh; |
58 | struct ocfs2_extent_list *et_root_el; | 59 | struct ocfs2_extent_list *et_root_el; |
60 | struct ocfs2_caching_info *et_ci; | ||
59 | ocfs2_journal_access_func et_root_journal_access; | 61 | ocfs2_journal_access_func et_root_journal_access; |
60 | void *et_object; | 62 | void *et_object; |
61 | unsigned int et_max_leaf_clusters; | 63 | unsigned int et_max_leaf_clusters; |
@@ -66,31 +68,32 @@ struct ocfs2_extent_tree { | |||
66 | * specified object buffer. | 68 | * specified object buffer. |
67 | */ | 69 | */ |
68 | void ocfs2_init_dinode_extent_tree(struct ocfs2_extent_tree *et, | 70 | void ocfs2_init_dinode_extent_tree(struct ocfs2_extent_tree *et, |
69 | struct inode *inode, | 71 | struct ocfs2_caching_info *ci, |
70 | struct buffer_head *bh); | 72 | struct buffer_head *bh); |
71 | void ocfs2_init_xattr_tree_extent_tree(struct ocfs2_extent_tree *et, | 73 | void ocfs2_init_xattr_tree_extent_tree(struct ocfs2_extent_tree *et, |
72 | struct inode *inode, | 74 | struct ocfs2_caching_info *ci, |
73 | struct buffer_head *bh); | 75 | struct buffer_head *bh); |
74 | struct ocfs2_xattr_value_buf; | 76 | struct ocfs2_xattr_value_buf; |
75 | void ocfs2_init_xattr_value_extent_tree(struct ocfs2_extent_tree *et, | 77 | void ocfs2_init_xattr_value_extent_tree(struct ocfs2_extent_tree *et, |
76 | struct inode *inode, | 78 | struct ocfs2_caching_info *ci, |
77 | struct ocfs2_xattr_value_buf *vb); | 79 | struct ocfs2_xattr_value_buf *vb); |
78 | void ocfs2_init_dx_root_extent_tree(struct ocfs2_extent_tree *et, | 80 | void ocfs2_init_dx_root_extent_tree(struct ocfs2_extent_tree *et, |
79 | struct inode *inode, | 81 | struct ocfs2_caching_info *ci, |
80 | struct buffer_head *bh); | 82 | struct buffer_head *bh); |
83 | void ocfs2_init_refcount_extent_tree(struct ocfs2_extent_tree *et, | ||
84 | struct ocfs2_caching_info *ci, | ||
85 | struct buffer_head *bh); | ||
81 | 86 | ||
82 | /* | 87 | /* |
83 | * Read an extent block into *bh. If *bh is NULL, a bh will be | 88 | * Read an extent block into *bh. If *bh is NULL, a bh will be |
84 | * allocated. This is a cached read. The extent block will be validated | 89 | * allocated. This is a cached read. The extent block will be validated |
85 | * with ocfs2_validate_extent_block(). | 90 | * with ocfs2_validate_extent_block(). |
86 | */ | 91 | */ |
87 | int ocfs2_read_extent_block(struct inode *inode, u64 eb_blkno, | 92 | int ocfs2_read_extent_block(struct ocfs2_caching_info *ci, u64 eb_blkno, |
88 | struct buffer_head **bh); | 93 | struct buffer_head **bh); |
89 | 94 | ||
90 | struct ocfs2_alloc_context; | 95 | struct ocfs2_alloc_context; |
91 | int ocfs2_insert_extent(struct ocfs2_super *osb, | 96 | int ocfs2_insert_extent(handle_t *handle, |
92 | handle_t *handle, | ||
93 | struct inode *inode, | ||
94 | struct ocfs2_extent_tree *et, | 97 | struct ocfs2_extent_tree *et, |
95 | u32 cpos, | 98 | u32 cpos, |
96 | u64 start_blk, | 99 | u64 start_blk, |
@@ -103,25 +106,36 @@ enum ocfs2_alloc_restarted { | |||
103 | RESTART_TRANS, | 106 | RESTART_TRANS, |
104 | RESTART_META | 107 | RESTART_META |
105 | }; | 108 | }; |
106 | int ocfs2_add_clusters_in_btree(struct ocfs2_super *osb, | 109 | int ocfs2_add_clusters_in_btree(handle_t *handle, |
107 | struct inode *inode, | 110 | struct ocfs2_extent_tree *et, |
108 | u32 *logical_offset, | 111 | u32 *logical_offset, |
109 | u32 clusters_to_add, | 112 | u32 clusters_to_add, |
110 | int mark_unwritten, | 113 | int mark_unwritten, |
111 | struct ocfs2_extent_tree *et, | ||
112 | handle_t *handle, | ||
113 | struct ocfs2_alloc_context *data_ac, | 114 | struct ocfs2_alloc_context *data_ac, |
114 | struct ocfs2_alloc_context *meta_ac, | 115 | struct ocfs2_alloc_context *meta_ac, |
115 | enum ocfs2_alloc_restarted *reason_ret); | 116 | enum ocfs2_alloc_restarted *reason_ret); |
116 | struct ocfs2_cached_dealloc_ctxt; | 117 | struct ocfs2_cached_dealloc_ctxt; |
118 | struct ocfs2_path; | ||
119 | int ocfs2_split_extent(handle_t *handle, | ||
120 | struct ocfs2_extent_tree *et, | ||
121 | struct ocfs2_path *path, | ||
122 | int split_index, | ||
123 | struct ocfs2_extent_rec *split_rec, | ||
124 | struct ocfs2_alloc_context *meta_ac, | ||
125 | struct ocfs2_cached_dealloc_ctxt *dealloc); | ||
117 | int ocfs2_mark_extent_written(struct inode *inode, | 126 | int ocfs2_mark_extent_written(struct inode *inode, |
118 | struct ocfs2_extent_tree *et, | 127 | struct ocfs2_extent_tree *et, |
119 | handle_t *handle, u32 cpos, u32 len, u32 phys, | 128 | handle_t *handle, u32 cpos, u32 len, u32 phys, |
120 | struct ocfs2_alloc_context *meta_ac, | 129 | struct ocfs2_alloc_context *meta_ac, |
121 | struct ocfs2_cached_dealloc_ctxt *dealloc); | 130 | struct ocfs2_cached_dealloc_ctxt *dealloc); |
122 | int ocfs2_remove_extent(struct inode *inode, | 131 | int ocfs2_change_extent_flag(handle_t *handle, |
123 | struct ocfs2_extent_tree *et, | 132 | struct ocfs2_extent_tree *et, |
124 | u32 cpos, u32 len, handle_t *handle, | 133 | u32 cpos, u32 len, u32 phys, |
134 | struct ocfs2_alloc_context *meta_ac, | ||
135 | struct ocfs2_cached_dealloc_ctxt *dealloc, | ||
136 | int new_flags, int clear_flags); | ||
137 | int ocfs2_remove_extent(handle_t *handle, struct ocfs2_extent_tree *et, | ||
138 | u32 cpos, u32 len, | ||
125 | struct ocfs2_alloc_context *meta_ac, | 139 | struct ocfs2_alloc_context *meta_ac, |
126 | struct ocfs2_cached_dealloc_ctxt *dealloc); | 140 | struct ocfs2_cached_dealloc_ctxt *dealloc); |
127 | int ocfs2_remove_btree_range(struct inode *inode, | 141 | int ocfs2_remove_btree_range(struct inode *inode, |
@@ -130,7 +144,6 @@ int ocfs2_remove_btree_range(struct inode *inode, | |||
130 | struct ocfs2_cached_dealloc_ctxt *dealloc); | 144 | struct ocfs2_cached_dealloc_ctxt *dealloc); |
131 | 145 | ||
132 | int ocfs2_num_free_extents(struct ocfs2_super *osb, | 146 | int ocfs2_num_free_extents(struct ocfs2_super *osb, |
133 | struct inode *inode, | ||
134 | struct ocfs2_extent_tree *et); | 147 | struct ocfs2_extent_tree *et); |
135 | 148 | ||
136 | /* | 149 | /* |
@@ -195,6 +208,9 @@ static inline void ocfs2_init_dealloc_ctxt(struct ocfs2_cached_dealloc_ctxt *c) | |||
195 | } | 208 | } |
196 | int ocfs2_cache_cluster_dealloc(struct ocfs2_cached_dealloc_ctxt *ctxt, | 209 | int ocfs2_cache_cluster_dealloc(struct ocfs2_cached_dealloc_ctxt *ctxt, |
197 | u64 blkno, unsigned int bit); | 210 | u64 blkno, unsigned int bit); |
211 | int ocfs2_cache_block_dealloc(struct ocfs2_cached_dealloc_ctxt *ctxt, | ||
212 | int type, int slot, u64 blkno, | ||
213 | unsigned int bit); | ||
198 | static inline int ocfs2_dealloc_has_cluster(struct ocfs2_cached_dealloc_ctxt *c) | 214 | static inline int ocfs2_dealloc_has_cluster(struct ocfs2_cached_dealloc_ctxt *c) |
199 | { | 215 | { |
200 | return c->c_global_allocator != NULL; | 216 | return c->c_global_allocator != NULL; |
@@ -222,8 +238,9 @@ int ocfs2_commit_truncate(struct ocfs2_super *osb, | |||
222 | int ocfs2_truncate_inline(struct inode *inode, struct buffer_head *di_bh, | 238 | int ocfs2_truncate_inline(struct inode *inode, struct buffer_head *di_bh, |
223 | unsigned int start, unsigned int end, int trunc); | 239 | unsigned int start, unsigned int end, int trunc); |
224 | 240 | ||
225 | int ocfs2_find_leaf(struct inode *inode, struct ocfs2_extent_list *root_el, | 241 | int ocfs2_find_leaf(struct ocfs2_caching_info *ci, |
226 | u32 cpos, struct buffer_head **leaf_bh); | 242 | struct ocfs2_extent_list *root_el, u32 cpos, |
243 | struct buffer_head **leaf_bh); | ||
227 | int ocfs2_search_extent_list(struct ocfs2_extent_list *el, u32 v_cluster); | 244 | int ocfs2_search_extent_list(struct ocfs2_extent_list *el, u32 v_cluster); |
228 | 245 | ||
229 | /* | 246 | /* |
@@ -254,4 +271,50 @@ static inline int ocfs2_is_empty_extent(struct ocfs2_extent_rec *rec) | |||
254 | return !rec->e_leaf_clusters; | 271 | return !rec->e_leaf_clusters; |
255 | } | 272 | } |
256 | 273 | ||
274 | int ocfs2_grab_pages(struct inode *inode, loff_t start, loff_t end, | ||
275 | struct page **pages, int *num); | ||
276 | void ocfs2_map_and_dirty_page(struct inode *inode, handle_t *handle, | ||
277 | unsigned int from, unsigned int to, | ||
278 | struct page *page, int zero, u64 *phys); | ||
279 | /* | ||
280 | * Structures which describe a path through a btree, and functions to | ||
281 | * manipulate them. | ||
282 | * | ||
283 | * The idea here is to be as generic as possible with the tree | ||
284 | * manipulation code. | ||
285 | */ | ||
286 | struct ocfs2_path_item { | ||
287 | struct buffer_head *bh; | ||
288 | struct ocfs2_extent_list *el; | ||
289 | }; | ||
290 | |||
291 | #define OCFS2_MAX_PATH_DEPTH 5 | ||
292 | |||
293 | struct ocfs2_path { | ||
294 | int p_tree_depth; | ||
295 | ocfs2_journal_access_func p_root_access; | ||
296 | struct ocfs2_path_item p_node[OCFS2_MAX_PATH_DEPTH]; | ||
297 | }; | ||
298 | |||
299 | #define path_root_bh(_path) ((_path)->p_node[0].bh) | ||
300 | #define path_root_el(_path) ((_path)->p_node[0].el) | ||
301 | #define path_root_access(_path)((_path)->p_root_access) | ||
302 | #define path_leaf_bh(_path) ((_path)->p_node[(_path)->p_tree_depth].bh) | ||
303 | #define path_leaf_el(_path) ((_path)->p_node[(_path)->p_tree_depth].el) | ||
304 | #define path_num_items(_path) ((_path)->p_tree_depth + 1) | ||
305 | |||
306 | void ocfs2_reinit_path(struct ocfs2_path *path, int keep_root); | ||
307 | void ocfs2_free_path(struct ocfs2_path *path); | ||
308 | int ocfs2_find_path(struct ocfs2_caching_info *ci, | ||
309 | struct ocfs2_path *path, | ||
310 | u32 cpos); | ||
311 | struct ocfs2_path *ocfs2_new_path_from_path(struct ocfs2_path *path); | ||
312 | struct ocfs2_path *ocfs2_new_path_from_et(struct ocfs2_extent_tree *et); | ||
313 | int ocfs2_path_bh_journal_access(handle_t *handle, | ||
314 | struct ocfs2_caching_info *ci, | ||
315 | struct ocfs2_path *path, | ||
316 | int idx); | ||
317 | int ocfs2_journal_access_path(struct ocfs2_caching_info *ci, | ||
318 | handle_t *handle, | ||
319 | struct ocfs2_path *path); | ||
257 | #endif /* OCFS2_ALLOC_H */ | 320 | #endif /* OCFS2_ALLOC_H */ |
diff --git a/fs/ocfs2/aops.c b/fs/ocfs2/aops.c index 747f15eefd82..deb2b132ae5e 100644 --- a/fs/ocfs2/aops.c +++ b/fs/ocfs2/aops.c | |||
@@ -44,6 +44,7 @@ | |||
44 | #include "suballoc.h" | 44 | #include "suballoc.h" |
45 | #include "super.h" | 45 | #include "super.h" |
46 | #include "symlink.h" | 46 | #include "symlink.h" |
47 | #include "refcounttree.h" | ||
47 | 48 | ||
48 | #include "buffer_head_io.h" | 49 | #include "buffer_head_io.h" |
49 | 50 | ||
@@ -126,8 +127,8 @@ bail: | |||
126 | return err; | 127 | return err; |
127 | } | 128 | } |
128 | 129 | ||
129 | static int ocfs2_get_block(struct inode *inode, sector_t iblock, | 130 | int ocfs2_get_block(struct inode *inode, sector_t iblock, |
130 | struct buffer_head *bh_result, int create) | 131 | struct buffer_head *bh_result, int create) |
131 | { | 132 | { |
132 | int err = 0; | 133 | int err = 0; |
133 | unsigned int ext_flags; | 134 | unsigned int ext_flags; |
@@ -590,6 +591,8 @@ static int ocfs2_direct_IO_get_blocks(struct inode *inode, sector_t iblock, | |||
590 | goto bail; | 591 | goto bail; |
591 | } | 592 | } |
592 | 593 | ||
594 | /* We should already CoW the refcounted extent. */ | ||
595 | BUG_ON(ext_flags & OCFS2_EXT_REFCOUNTED); | ||
593 | /* | 596 | /* |
594 | * get_more_blocks() expects us to describe a hole by clearing | 597 | * get_more_blocks() expects us to describe a hole by clearing |
595 | * the mapped bit on bh_result(). | 598 | * the mapped bit on bh_result(). |
@@ -687,6 +690,10 @@ static ssize_t ocfs2_direct_IO(int rw, | |||
687 | if (OCFS2_I(inode)->ip_dyn_features & OCFS2_INLINE_DATA_FL) | 690 | if (OCFS2_I(inode)->ip_dyn_features & OCFS2_INLINE_DATA_FL) |
688 | return 0; | 691 | return 0; |
689 | 692 | ||
693 | /* Fallback to buffered I/O if we are appending. */ | ||
694 | if (i_size_read(inode) <= offset) | ||
695 | return 0; | ||
696 | |||
690 | ret = blockdev_direct_IO_no_locking(rw, iocb, inode, | 697 | ret = blockdev_direct_IO_no_locking(rw, iocb, inode, |
691 | inode->i_sb->s_bdev, iov, offset, | 698 | inode->i_sb->s_bdev, iov, offset, |
692 | nr_segs, | 699 | nr_segs, |
@@ -1259,7 +1266,8 @@ static int ocfs2_write_cluster(struct address_space *mapping, | |||
1259 | goto out; | 1266 | goto out; |
1260 | } | 1267 | } |
1261 | } else if (unwritten) { | 1268 | } else if (unwritten) { |
1262 | ocfs2_init_dinode_extent_tree(&et, inode, wc->w_di_bh); | 1269 | ocfs2_init_dinode_extent_tree(&et, INODE_CACHE(inode), |
1270 | wc->w_di_bh); | ||
1263 | ret = ocfs2_mark_extent_written(inode, &et, | 1271 | ret = ocfs2_mark_extent_written(inode, &et, |
1264 | wc->w_handle, cpos, 1, phys, | 1272 | wc->w_handle, cpos, 1, phys, |
1265 | meta_ac, &wc->w_dealloc); | 1273 | meta_ac, &wc->w_dealloc); |
@@ -1448,6 +1456,9 @@ static int ocfs2_populate_write_desc(struct inode *inode, | |||
1448 | goto out; | 1456 | goto out; |
1449 | } | 1457 | } |
1450 | 1458 | ||
1459 | /* We should already CoW the refcountd extent. */ | ||
1460 | BUG_ON(ext_flags & OCFS2_EXT_REFCOUNTED); | ||
1461 | |||
1451 | /* | 1462 | /* |
1452 | * Assume worst case - that we're writing in | 1463 | * Assume worst case - that we're writing in |
1453 | * the middle of the extent. | 1464 | * the middle of the extent. |
@@ -1528,7 +1539,7 @@ static int ocfs2_write_begin_inline(struct address_space *mapping, | |||
1528 | goto out; | 1539 | goto out; |
1529 | } | 1540 | } |
1530 | 1541 | ||
1531 | ret = ocfs2_journal_access_di(handle, inode, wc->w_di_bh, | 1542 | ret = ocfs2_journal_access_di(handle, INODE_CACHE(inode), wc->w_di_bh, |
1532 | OCFS2_JOURNAL_ACCESS_WRITE); | 1543 | OCFS2_JOURNAL_ACCESS_WRITE); |
1533 | if (ret) { | 1544 | if (ret) { |
1534 | ocfs2_commit_trans(osb, handle); | 1545 | ocfs2_commit_trans(osb, handle); |
@@ -1699,6 +1710,19 @@ int ocfs2_write_begin_nolock(struct address_space *mapping, | |||
1699 | goto out; | 1710 | goto out; |
1700 | } | 1711 | } |
1701 | 1712 | ||
1713 | ret = ocfs2_check_range_for_refcount(inode, pos, len); | ||
1714 | if (ret < 0) { | ||
1715 | mlog_errno(ret); | ||
1716 | goto out; | ||
1717 | } else if (ret == 1) { | ||
1718 | ret = ocfs2_refcount_cow(inode, di_bh, | ||
1719 | wc->w_cpos, wc->w_clen, UINT_MAX); | ||
1720 | if (ret) { | ||
1721 | mlog_errno(ret); | ||
1722 | goto out; | ||
1723 | } | ||
1724 | } | ||
1725 | |||
1702 | ret = ocfs2_populate_write_desc(inode, wc, &clusters_to_alloc, | 1726 | ret = ocfs2_populate_write_desc(inode, wc, &clusters_to_alloc, |
1703 | &extents_to_split); | 1727 | &extents_to_split); |
1704 | if (ret) { | 1728 | if (ret) { |
@@ -1726,7 +1750,8 @@ int ocfs2_write_begin_nolock(struct address_space *mapping, | |||
1726 | (long long)i_size_read(inode), le32_to_cpu(di->i_clusters), | 1750 | (long long)i_size_read(inode), le32_to_cpu(di->i_clusters), |
1727 | clusters_to_alloc, extents_to_split); | 1751 | clusters_to_alloc, extents_to_split); |
1728 | 1752 | ||
1729 | ocfs2_init_dinode_extent_tree(&et, inode, wc->w_di_bh); | 1753 | ocfs2_init_dinode_extent_tree(&et, INODE_CACHE(inode), |
1754 | wc->w_di_bh); | ||
1730 | ret = ocfs2_lock_allocators(inode, &et, | 1755 | ret = ocfs2_lock_allocators(inode, &et, |
1731 | clusters_to_alloc, extents_to_split, | 1756 | clusters_to_alloc, extents_to_split, |
1732 | &data_ac, &meta_ac); | 1757 | &data_ac, &meta_ac); |
@@ -1773,7 +1798,7 @@ int ocfs2_write_begin_nolock(struct address_space *mapping, | |||
1773 | * We don't want this to fail in ocfs2_write_end(), so do it | 1798 | * We don't want this to fail in ocfs2_write_end(), so do it |
1774 | * here. | 1799 | * here. |
1775 | */ | 1800 | */ |
1776 | ret = ocfs2_journal_access_di(handle, inode, wc->w_di_bh, | 1801 | ret = ocfs2_journal_access_di(handle, INODE_CACHE(inode), wc->w_di_bh, |
1777 | OCFS2_JOURNAL_ACCESS_WRITE); | 1802 | OCFS2_JOURNAL_ACCESS_WRITE); |
1778 | if (ret) { | 1803 | if (ret) { |
1779 | mlog_errno(ret); | 1804 | mlog_errno(ret); |
diff --git a/fs/ocfs2/aops.h b/fs/ocfs2/aops.h index 503e49232e11..c48e93ffc513 100644 --- a/fs/ocfs2/aops.h +++ b/fs/ocfs2/aops.h | |||
@@ -57,6 +57,8 @@ int ocfs2_read_inline_data(struct inode *inode, struct page *page, | |||
57 | struct buffer_head *di_bh); | 57 | struct buffer_head *di_bh); |
58 | int ocfs2_size_fits_inline_data(struct buffer_head *di_bh, u64 new_size); | 58 | int ocfs2_size_fits_inline_data(struct buffer_head *di_bh, u64 new_size); |
59 | 59 | ||
60 | int ocfs2_get_block(struct inode *inode, sector_t iblock, | ||
61 | struct buffer_head *bh_result, int create); | ||
60 | /* all ocfs2_dio_end_io()'s fault */ | 62 | /* all ocfs2_dio_end_io()'s fault */ |
61 | #define ocfs2_iocb_is_rw_locked(iocb) \ | 63 | #define ocfs2_iocb_is_rw_locked(iocb) \ |
62 | test_bit(0, (unsigned long *)&iocb->private) | 64 | test_bit(0, (unsigned long *)&iocb->private) |
diff --git a/fs/ocfs2/buffer_head_io.c b/fs/ocfs2/buffer_head_io.c index 15c8e6deee2e..d43d34a1dd31 100644 --- a/fs/ocfs2/buffer_head_io.c +++ b/fs/ocfs2/buffer_head_io.c | |||
@@ -52,12 +52,12 @@ enum ocfs2_state_bits { | |||
52 | BUFFER_FNS(NeedsValidate, needs_validate); | 52 | BUFFER_FNS(NeedsValidate, needs_validate); |
53 | 53 | ||
54 | int ocfs2_write_block(struct ocfs2_super *osb, struct buffer_head *bh, | 54 | int ocfs2_write_block(struct ocfs2_super *osb, struct buffer_head *bh, |
55 | struct inode *inode) | 55 | struct ocfs2_caching_info *ci) |
56 | { | 56 | { |
57 | int ret = 0; | 57 | int ret = 0; |
58 | 58 | ||
59 | mlog_entry("(bh->b_blocknr = %llu, inode=%p)\n", | 59 | mlog_entry("(bh->b_blocknr = %llu, ci=%p)\n", |
60 | (unsigned long long)bh->b_blocknr, inode); | 60 | (unsigned long long)bh->b_blocknr, ci); |
61 | 61 | ||
62 | BUG_ON(bh->b_blocknr < OCFS2_SUPER_BLOCK_BLKNO); | 62 | BUG_ON(bh->b_blocknr < OCFS2_SUPER_BLOCK_BLKNO); |
63 | BUG_ON(buffer_jbd(bh)); | 63 | BUG_ON(buffer_jbd(bh)); |
@@ -70,7 +70,7 @@ int ocfs2_write_block(struct ocfs2_super *osb, struct buffer_head *bh, | |||
70 | goto out; | 70 | goto out; |
71 | } | 71 | } |
72 | 72 | ||
73 | mutex_lock(&OCFS2_I(inode)->ip_io_mutex); | 73 | ocfs2_metadata_cache_io_lock(ci); |
74 | 74 | ||
75 | lock_buffer(bh); | 75 | lock_buffer(bh); |
76 | set_buffer_uptodate(bh); | 76 | set_buffer_uptodate(bh); |
@@ -85,7 +85,7 @@ int ocfs2_write_block(struct ocfs2_super *osb, struct buffer_head *bh, | |||
85 | wait_on_buffer(bh); | 85 | wait_on_buffer(bh); |
86 | 86 | ||
87 | if (buffer_uptodate(bh)) { | 87 | if (buffer_uptodate(bh)) { |
88 | ocfs2_set_buffer_uptodate(inode, bh); | 88 | ocfs2_set_buffer_uptodate(ci, bh); |
89 | } else { | 89 | } else { |
90 | /* We don't need to remove the clustered uptodate | 90 | /* We don't need to remove the clustered uptodate |
91 | * information for this bh as it's not marked locally | 91 | * information for this bh as it's not marked locally |
@@ -94,7 +94,7 @@ int ocfs2_write_block(struct ocfs2_super *osb, struct buffer_head *bh, | |||
94 | put_bh(bh); | 94 | put_bh(bh); |
95 | } | 95 | } |
96 | 96 | ||
97 | mutex_unlock(&OCFS2_I(inode)->ip_io_mutex); | 97 | ocfs2_metadata_cache_io_unlock(ci); |
98 | out: | 98 | out: |
99 | mlog_exit(ret); | 99 | mlog_exit(ret); |
100 | return ret; | 100 | return ret; |
@@ -177,7 +177,7 @@ bail: | |||
177 | return status; | 177 | return status; |
178 | } | 178 | } |
179 | 179 | ||
180 | int ocfs2_read_blocks(struct inode *inode, u64 block, int nr, | 180 | int ocfs2_read_blocks(struct ocfs2_caching_info *ci, u64 block, int nr, |
181 | struct buffer_head *bhs[], int flags, | 181 | struct buffer_head *bhs[], int flags, |
182 | int (*validate)(struct super_block *sb, | 182 | int (*validate)(struct super_block *sb, |
183 | struct buffer_head *bh)) | 183 | struct buffer_head *bh)) |
@@ -185,11 +185,12 @@ int ocfs2_read_blocks(struct inode *inode, u64 block, int nr, | |||
185 | int status = 0; | 185 | int status = 0; |
186 | int i, ignore_cache = 0; | 186 | int i, ignore_cache = 0; |
187 | struct buffer_head *bh; | 187 | struct buffer_head *bh; |
188 | struct super_block *sb = ocfs2_metadata_cache_get_super(ci); | ||
188 | 189 | ||
189 | mlog_entry("(inode=%p, block=(%llu), nr=(%d), flags=%d)\n", | 190 | mlog_entry("(ci=%p, block=(%llu), nr=(%d), flags=%d)\n", |
190 | inode, (unsigned long long)block, nr, flags); | 191 | ci, (unsigned long long)block, nr, flags); |
191 | 192 | ||
192 | BUG_ON(!inode); | 193 | BUG_ON(!ci); |
193 | BUG_ON((flags & OCFS2_BH_READAHEAD) && | 194 | BUG_ON((flags & OCFS2_BH_READAHEAD) && |
194 | (flags & OCFS2_BH_IGNORE_CACHE)); | 195 | (flags & OCFS2_BH_IGNORE_CACHE)); |
195 | 196 | ||
@@ -212,12 +213,12 @@ int ocfs2_read_blocks(struct inode *inode, u64 block, int nr, | |||
212 | goto bail; | 213 | goto bail; |
213 | } | 214 | } |
214 | 215 | ||
215 | mutex_lock(&OCFS2_I(inode)->ip_io_mutex); | 216 | ocfs2_metadata_cache_io_lock(ci); |
216 | for (i = 0 ; i < nr ; i++) { | 217 | for (i = 0 ; i < nr ; i++) { |
217 | if (bhs[i] == NULL) { | 218 | if (bhs[i] == NULL) { |
218 | bhs[i] = sb_getblk(inode->i_sb, block++); | 219 | bhs[i] = sb_getblk(sb, block++); |
219 | if (bhs[i] == NULL) { | 220 | if (bhs[i] == NULL) { |
220 | mutex_unlock(&OCFS2_I(inode)->ip_io_mutex); | 221 | ocfs2_metadata_cache_io_unlock(ci); |
221 | status = -EIO; | 222 | status = -EIO; |
222 | mlog_errno(status); | 223 | mlog_errno(status); |
223 | goto bail; | 224 | goto bail; |
@@ -250,11 +251,11 @@ int ocfs2_read_blocks(struct inode *inode, u64 block, int nr, | |||
250 | * before our is-it-in-flight check. | 251 | * before our is-it-in-flight check. |
251 | */ | 252 | */ |
252 | 253 | ||
253 | if (!ignore_cache && !ocfs2_buffer_uptodate(inode, bh)) { | 254 | if (!ignore_cache && !ocfs2_buffer_uptodate(ci, bh)) { |
254 | mlog(ML_UPTODATE, | 255 | mlog(ML_UPTODATE, |
255 | "bh (%llu), inode %llu not uptodate\n", | 256 | "bh (%llu), owner %llu not uptodate\n", |
256 | (unsigned long long)bh->b_blocknr, | 257 | (unsigned long long)bh->b_blocknr, |
257 | (unsigned long long)OCFS2_I(inode)->ip_blkno); | 258 | (unsigned long long)ocfs2_metadata_cache_owner(ci)); |
258 | /* We're using ignore_cache here to say | 259 | /* We're using ignore_cache here to say |
259 | * "go to disk" */ | 260 | * "go to disk" */ |
260 | ignore_cache = 1; | 261 | ignore_cache = 1; |
@@ -283,7 +284,7 @@ int ocfs2_read_blocks(struct inode *inode, u64 block, int nr, | |||
283 | * previously submitted request than we are | 284 | * previously submitted request than we are |
284 | * done here. */ | 285 | * done here. */ |
285 | if ((flags & OCFS2_BH_READAHEAD) | 286 | if ((flags & OCFS2_BH_READAHEAD) |
286 | && ocfs2_buffer_read_ahead(inode, bh)) | 287 | && ocfs2_buffer_read_ahead(ci, bh)) |
287 | continue; | 288 | continue; |
288 | 289 | ||
289 | lock_buffer(bh); | 290 | lock_buffer(bh); |
@@ -305,7 +306,7 @@ int ocfs2_read_blocks(struct inode *inode, u64 block, int nr, | |||
305 | * buffer lock. */ | 306 | * buffer lock. */ |
306 | if (!(flags & OCFS2_BH_IGNORE_CACHE) | 307 | if (!(flags & OCFS2_BH_IGNORE_CACHE) |
307 | && !(flags & OCFS2_BH_READAHEAD) | 308 | && !(flags & OCFS2_BH_READAHEAD) |
308 | && ocfs2_buffer_uptodate(inode, bh)) { | 309 | && ocfs2_buffer_uptodate(ci, bh)) { |
309 | unlock_buffer(bh); | 310 | unlock_buffer(bh); |
310 | continue; | 311 | continue; |
311 | } | 312 | } |
@@ -327,7 +328,7 @@ int ocfs2_read_blocks(struct inode *inode, u64 block, int nr, | |||
327 | 328 | ||
328 | if (!(flags & OCFS2_BH_READAHEAD)) { | 329 | if (!(flags & OCFS2_BH_READAHEAD)) { |
329 | /* We know this can't have changed as we hold the | 330 | /* We know this can't have changed as we hold the |
330 | * inode sem. Avoid doing any work on the bh if the | 331 | * owner sem. Avoid doing any work on the bh if the |
331 | * journal has it. */ | 332 | * journal has it. */ |
332 | if (!buffer_jbd(bh)) | 333 | if (!buffer_jbd(bh)) |
333 | wait_on_buffer(bh); | 334 | wait_on_buffer(bh); |
@@ -351,7 +352,7 @@ int ocfs2_read_blocks(struct inode *inode, u64 block, int nr, | |||
351 | * that better not have changed */ | 352 | * that better not have changed */ |
352 | BUG_ON(buffer_jbd(bh)); | 353 | BUG_ON(buffer_jbd(bh)); |
353 | clear_buffer_needs_validate(bh); | 354 | clear_buffer_needs_validate(bh); |
354 | status = validate(inode->i_sb, bh); | 355 | status = validate(sb, bh); |
355 | if (status) { | 356 | if (status) { |
356 | put_bh(bh); | 357 | put_bh(bh); |
357 | bhs[i] = NULL; | 358 | bhs[i] = NULL; |
@@ -363,9 +364,9 @@ int ocfs2_read_blocks(struct inode *inode, u64 block, int nr, | |||
363 | /* Always set the buffer in the cache, even if it was | 364 | /* Always set the buffer in the cache, even if it was |
364 | * a forced read, or read-ahead which hasn't yet | 365 | * a forced read, or read-ahead which hasn't yet |
365 | * completed. */ | 366 | * completed. */ |
366 | ocfs2_set_buffer_uptodate(inode, bh); | 367 | ocfs2_set_buffer_uptodate(ci, bh); |
367 | } | 368 | } |
368 | mutex_unlock(&OCFS2_I(inode)->ip_io_mutex); | 369 | ocfs2_metadata_cache_io_unlock(ci); |
369 | 370 | ||
370 | mlog(ML_BH_IO, "block=(%llu), nr=(%d), cached=%s, flags=0x%x\n", | 371 | mlog(ML_BH_IO, "block=(%llu), nr=(%d), cached=%s, flags=0x%x\n", |
371 | (unsigned long long)block, nr, | 372 | (unsigned long long)block, nr, |
@@ -399,7 +400,7 @@ static void ocfs2_check_super_or_backup(struct super_block *sb, | |||
399 | 400 | ||
400 | /* | 401 | /* |
401 | * Write super block and backups doesn't need to collaborate with journal, | 402 | * Write super block and backups doesn't need to collaborate with journal, |
402 | * so we don't need to lock ip_io_mutex and inode doesn't need to bea passed | 403 | * so we don't need to lock ip_io_mutex and ci doesn't need to bea passed |
403 | * into this function. | 404 | * into this function. |
404 | */ | 405 | */ |
405 | int ocfs2_write_super_or_backup(struct ocfs2_super *osb, | 406 | int ocfs2_write_super_or_backup(struct ocfs2_super *osb, |
diff --git a/fs/ocfs2/buffer_head_io.h b/fs/ocfs2/buffer_head_io.h index c75d682dadd8..b97bcc6dde7c 100644 --- a/fs/ocfs2/buffer_head_io.h +++ b/fs/ocfs2/buffer_head_io.h | |||
@@ -33,7 +33,7 @@ void ocfs2_end_buffer_io_sync(struct buffer_head *bh, | |||
33 | 33 | ||
34 | int ocfs2_write_block(struct ocfs2_super *osb, | 34 | int ocfs2_write_block(struct ocfs2_super *osb, |
35 | struct buffer_head *bh, | 35 | struct buffer_head *bh, |
36 | struct inode *inode); | 36 | struct ocfs2_caching_info *ci); |
37 | int ocfs2_read_blocks_sync(struct ocfs2_super *osb, u64 block, | 37 | int ocfs2_read_blocks_sync(struct ocfs2_super *osb, u64 block, |
38 | unsigned int nr, struct buffer_head *bhs[]); | 38 | unsigned int nr, struct buffer_head *bhs[]); |
39 | 39 | ||
@@ -44,7 +44,7 @@ int ocfs2_read_blocks_sync(struct ocfs2_super *osb, u64 block, | |||
44 | * be set even for a READAHEAD call, as it marks the buffer for later | 44 | * be set even for a READAHEAD call, as it marks the buffer for later |
45 | * validation. | 45 | * validation. |
46 | */ | 46 | */ |
47 | int ocfs2_read_blocks(struct inode *inode, u64 block, int nr, | 47 | int ocfs2_read_blocks(struct ocfs2_caching_info *ci, u64 block, int nr, |
48 | struct buffer_head *bhs[], int flags, | 48 | struct buffer_head *bhs[], int flags, |
49 | int (*validate)(struct super_block *sb, | 49 | int (*validate)(struct super_block *sb, |
50 | struct buffer_head *bh)); | 50 | struct buffer_head *bh)); |
@@ -55,7 +55,7 @@ int ocfs2_write_super_or_backup(struct ocfs2_super *osb, | |||
55 | #define OCFS2_BH_IGNORE_CACHE 1 | 55 | #define OCFS2_BH_IGNORE_CACHE 1 |
56 | #define OCFS2_BH_READAHEAD 8 | 56 | #define OCFS2_BH_READAHEAD 8 |
57 | 57 | ||
58 | static inline int ocfs2_read_block(struct inode *inode, u64 off, | 58 | static inline int ocfs2_read_block(struct ocfs2_caching_info *ci, u64 off, |
59 | struct buffer_head **bh, | 59 | struct buffer_head **bh, |
60 | int (*validate)(struct super_block *sb, | 60 | int (*validate)(struct super_block *sb, |
61 | struct buffer_head *bh)) | 61 | struct buffer_head *bh)) |
@@ -68,7 +68,7 @@ static inline int ocfs2_read_block(struct inode *inode, u64 off, | |||
68 | goto bail; | 68 | goto bail; |
69 | } | 69 | } |
70 | 70 | ||
71 | status = ocfs2_read_blocks(inode, off, 1, bh, 0, validate); | 71 | status = ocfs2_read_blocks(ci, off, 1, bh, 0, validate); |
72 | 72 | ||
73 | bail: | 73 | bail: |
74 | return status; | 74 | return status; |
diff --git a/fs/ocfs2/cluster/masklog.c b/fs/ocfs2/cluster/masklog.c index 96df5416993e..1cd2934de615 100644 --- a/fs/ocfs2/cluster/masklog.c +++ b/fs/ocfs2/cluster/masklog.c | |||
@@ -111,6 +111,7 @@ static struct mlog_attribute mlog_attrs[MLOG_MAX_BITS] = { | |||
111 | define_mask(EXPORT), | 111 | define_mask(EXPORT), |
112 | define_mask(XATTR), | 112 | define_mask(XATTR), |
113 | define_mask(QUOTA), | 113 | define_mask(QUOTA), |
114 | define_mask(REFCOUNT), | ||
114 | define_mask(ERROR), | 115 | define_mask(ERROR), |
115 | define_mask(NOTICE), | 116 | define_mask(NOTICE), |
116 | define_mask(KTHREAD), | 117 | define_mask(KTHREAD), |
diff --git a/fs/ocfs2/cluster/masklog.h b/fs/ocfs2/cluster/masklog.h index 696c32e50716..9b4d11726cf2 100644 --- a/fs/ocfs2/cluster/masklog.h +++ b/fs/ocfs2/cluster/masklog.h | |||
@@ -113,6 +113,7 @@ | |||
113 | #define ML_EXPORT 0x0000000010000000ULL /* ocfs2 export operations */ | 113 | #define ML_EXPORT 0x0000000010000000ULL /* ocfs2 export operations */ |
114 | #define ML_XATTR 0x0000000020000000ULL /* ocfs2 extended attributes */ | 114 | #define ML_XATTR 0x0000000020000000ULL /* ocfs2 extended attributes */ |
115 | #define ML_QUOTA 0x0000000040000000ULL /* ocfs2 quota operations */ | 115 | #define ML_QUOTA 0x0000000040000000ULL /* ocfs2 quota operations */ |
116 | #define ML_REFCOUNT 0x0000000080000000ULL /* refcount tree operations */ | ||
116 | /* bits that are infrequently given and frequently matched in the high word */ | 117 | /* bits that are infrequently given and frequently matched in the high word */ |
117 | #define ML_ERROR 0x0000000100000000ULL /* sent to KERN_ERR */ | 118 | #define ML_ERROR 0x0000000100000000ULL /* sent to KERN_ERR */ |
118 | #define ML_NOTICE 0x0000000200000000ULL /* setn to KERN_NOTICE */ | 119 | #define ML_NOTICE 0x0000000200000000ULL /* setn to KERN_NOTICE */ |
diff --git a/fs/ocfs2/cluster/netdebug.c b/fs/ocfs2/cluster/netdebug.c index f8424874fa07..cfb2be708abe 100644 --- a/fs/ocfs2/cluster/netdebug.c +++ b/fs/ocfs2/cluster/netdebug.c | |||
@@ -163,7 +163,7 @@ static void nst_seq_stop(struct seq_file *seq, void *v) | |||
163 | { | 163 | { |
164 | } | 164 | } |
165 | 165 | ||
166 | static struct seq_operations nst_seq_ops = { | 166 | static const struct seq_operations nst_seq_ops = { |
167 | .start = nst_seq_start, | 167 | .start = nst_seq_start, |
168 | .next = nst_seq_next, | 168 | .next = nst_seq_next, |
169 | .stop = nst_seq_stop, | 169 | .stop = nst_seq_stop, |
@@ -344,7 +344,7 @@ static void sc_seq_stop(struct seq_file *seq, void *v) | |||
344 | { | 344 | { |
345 | } | 345 | } |
346 | 346 | ||
347 | static struct seq_operations sc_seq_ops = { | 347 | static const struct seq_operations sc_seq_ops = { |
348 | .start = sc_seq_start, | 348 | .start = sc_seq_start, |
349 | .next = sc_seq_next, | 349 | .next = sc_seq_next, |
350 | .stop = sc_seq_stop, | 350 | .stop = sc_seq_stop, |
diff --git a/fs/ocfs2/dir.c b/fs/ocfs2/dir.c index b358f3bf896d..28c3ec238796 100644 --- a/fs/ocfs2/dir.c +++ b/fs/ocfs2/dir.c | |||
@@ -176,7 +176,7 @@ static int ocfs2_dx_dir_link_trailer(struct inode *dir, handle_t *handle, | |||
176 | struct ocfs2_dx_root_block *dx_root; | 176 | struct ocfs2_dx_root_block *dx_root; |
177 | struct ocfs2_dir_block_trailer *trailer; | 177 | struct ocfs2_dir_block_trailer *trailer; |
178 | 178 | ||
179 | ret = ocfs2_journal_access_dr(handle, dir, dx_root_bh, | 179 | ret = ocfs2_journal_access_dr(handle, INODE_CACHE(dir), dx_root_bh, |
180 | OCFS2_JOURNAL_ACCESS_WRITE); | 180 | OCFS2_JOURNAL_ACCESS_WRITE); |
181 | if (ret) { | 181 | if (ret) { |
182 | mlog_errno(ret); | 182 | mlog_errno(ret); |
@@ -564,7 +564,8 @@ static int ocfs2_read_dir_block_direct(struct inode *dir, u64 phys, | |||
564 | int ret; | 564 | int ret; |
565 | struct buffer_head *tmp = *bh; | 565 | struct buffer_head *tmp = *bh; |
566 | 566 | ||
567 | ret = ocfs2_read_block(dir, phys, &tmp, ocfs2_validate_dir_block); | 567 | ret = ocfs2_read_block(INODE_CACHE(dir), phys, &tmp, |
568 | ocfs2_validate_dir_block); | ||
568 | if (ret) { | 569 | if (ret) { |
569 | mlog_errno(ret); | 570 | mlog_errno(ret); |
570 | goto out; | 571 | goto out; |
@@ -622,7 +623,8 @@ static int ocfs2_read_dx_root(struct inode *dir, struct ocfs2_dinode *di, | |||
622 | u64 blkno = le64_to_cpu(di->i_dx_root); | 623 | u64 blkno = le64_to_cpu(di->i_dx_root); |
623 | struct buffer_head *tmp = *dx_root_bh; | 624 | struct buffer_head *tmp = *dx_root_bh; |
624 | 625 | ||
625 | ret = ocfs2_read_block(dir, blkno, &tmp, ocfs2_validate_dx_root); | 626 | ret = ocfs2_read_block(INODE_CACHE(dir), blkno, &tmp, |
627 | ocfs2_validate_dx_root); | ||
626 | 628 | ||
627 | /* If ocfs2_read_block() got us a new bh, pass it up. */ | 629 | /* If ocfs2_read_block() got us a new bh, pass it up. */ |
628 | if (!ret && !*dx_root_bh) | 630 | if (!ret && !*dx_root_bh) |
@@ -662,7 +664,8 @@ static int ocfs2_read_dx_leaf(struct inode *dir, u64 blkno, | |||
662 | int ret; | 664 | int ret; |
663 | struct buffer_head *tmp = *dx_leaf_bh; | 665 | struct buffer_head *tmp = *dx_leaf_bh; |
664 | 666 | ||
665 | ret = ocfs2_read_block(dir, blkno, &tmp, ocfs2_validate_dx_leaf); | 667 | ret = ocfs2_read_block(INODE_CACHE(dir), blkno, &tmp, |
668 | ocfs2_validate_dx_leaf); | ||
666 | 669 | ||
667 | /* If ocfs2_read_block() got us a new bh, pass it up. */ | 670 | /* If ocfs2_read_block() got us a new bh, pass it up. */ |
668 | if (!ret && !*dx_leaf_bh) | 671 | if (!ret && !*dx_leaf_bh) |
@@ -680,7 +683,7 @@ static int ocfs2_read_dx_leaves(struct inode *dir, u64 start, int num, | |||
680 | { | 683 | { |
681 | int ret; | 684 | int ret; |
682 | 685 | ||
683 | ret = ocfs2_read_blocks(dir, start, num, dx_leaf_bhs, 0, | 686 | ret = ocfs2_read_blocks(INODE_CACHE(dir), start, num, dx_leaf_bhs, 0, |
684 | ocfs2_validate_dx_leaf); | 687 | ocfs2_validate_dx_leaf); |
685 | if (ret) | 688 | if (ret) |
686 | mlog_errno(ret); | 689 | mlog_errno(ret); |
@@ -802,7 +805,8 @@ static int ocfs2_dx_dir_lookup_rec(struct inode *inode, | |||
802 | struct ocfs2_extent_rec *rec = NULL; | 805 | struct ocfs2_extent_rec *rec = NULL; |
803 | 806 | ||
804 | if (el->l_tree_depth) { | 807 | if (el->l_tree_depth) { |
805 | ret = ocfs2_find_leaf(inode, el, major_hash, &eb_bh); | 808 | ret = ocfs2_find_leaf(INODE_CACHE(inode), el, major_hash, |
809 | &eb_bh); | ||
806 | if (ret) { | 810 | if (ret) { |
807 | mlog_errno(ret); | 811 | mlog_errno(ret); |
808 | goto out; | 812 | goto out; |
@@ -1133,7 +1137,8 @@ int ocfs2_update_entry(struct inode *dir, handle_t *handle, | |||
1133 | if (OCFS2_I(dir)->ip_dyn_features & OCFS2_INLINE_DATA_FL) | 1137 | if (OCFS2_I(dir)->ip_dyn_features & OCFS2_INLINE_DATA_FL) |
1134 | access = ocfs2_journal_access_di; | 1138 | access = ocfs2_journal_access_di; |
1135 | 1139 | ||
1136 | ret = access(handle, dir, de_bh, OCFS2_JOURNAL_ACCESS_WRITE); | 1140 | ret = access(handle, INODE_CACHE(dir), de_bh, |
1141 | OCFS2_JOURNAL_ACCESS_WRITE); | ||
1137 | if (ret) { | 1142 | if (ret) { |
1138 | mlog_errno(ret); | 1143 | mlog_errno(ret); |
1139 | goto out; | 1144 | goto out; |
@@ -1176,7 +1181,7 @@ static int __ocfs2_delete_entry(handle_t *handle, struct inode *dir, | |||
1176 | goto bail; | 1181 | goto bail; |
1177 | } | 1182 | } |
1178 | if (de == de_del) { | 1183 | if (de == de_del) { |
1179 | status = access(handle, dir, bh, | 1184 | status = access(handle, INODE_CACHE(dir), bh, |
1180 | OCFS2_JOURNAL_ACCESS_WRITE); | 1185 | OCFS2_JOURNAL_ACCESS_WRITE); |
1181 | if (status < 0) { | 1186 | if (status < 0) { |
1182 | status = -EIO; | 1187 | status = -EIO; |
@@ -1326,7 +1331,7 @@ static int ocfs2_delete_entry_dx(handle_t *handle, struct inode *dir, | |||
1326 | * the entry count needs to be updated. Also, we might be | 1331 | * the entry count needs to be updated. Also, we might be |
1327 | * adding to the start of the free list. | 1332 | * adding to the start of the free list. |
1328 | */ | 1333 | */ |
1329 | ret = ocfs2_journal_access_dr(handle, dir, dx_root_bh, | 1334 | ret = ocfs2_journal_access_dr(handle, INODE_CACHE(dir), dx_root_bh, |
1330 | OCFS2_JOURNAL_ACCESS_WRITE); | 1335 | OCFS2_JOURNAL_ACCESS_WRITE); |
1331 | if (ret) { | 1336 | if (ret) { |
1332 | mlog_errno(ret); | 1337 | mlog_errno(ret); |
@@ -1334,7 +1339,7 @@ static int ocfs2_delete_entry_dx(handle_t *handle, struct inode *dir, | |||
1334 | } | 1339 | } |
1335 | 1340 | ||
1336 | if (!ocfs2_dx_root_inline(dx_root)) { | 1341 | if (!ocfs2_dx_root_inline(dx_root)) { |
1337 | ret = ocfs2_journal_access_dl(handle, dir, | 1342 | ret = ocfs2_journal_access_dl(handle, INODE_CACHE(dir), |
1338 | lookup->dl_dx_leaf_bh, | 1343 | lookup->dl_dx_leaf_bh, |
1339 | OCFS2_JOURNAL_ACCESS_WRITE); | 1344 | OCFS2_JOURNAL_ACCESS_WRITE); |
1340 | if (ret) { | 1345 | if (ret) { |
@@ -1493,7 +1498,7 @@ static int __ocfs2_dx_dir_leaf_insert(struct inode *dir, handle_t *handle, | |||
1493 | int ret; | 1498 | int ret; |
1494 | struct ocfs2_dx_leaf *dx_leaf; | 1499 | struct ocfs2_dx_leaf *dx_leaf; |
1495 | 1500 | ||
1496 | ret = ocfs2_journal_access_dl(handle, dir, dx_leaf_bh, | 1501 | ret = ocfs2_journal_access_dl(handle, INODE_CACHE(dir), dx_leaf_bh, |
1497 | OCFS2_JOURNAL_ACCESS_WRITE); | 1502 | OCFS2_JOURNAL_ACCESS_WRITE); |
1498 | if (ret) { | 1503 | if (ret) { |
1499 | mlog_errno(ret); | 1504 | mlog_errno(ret); |
@@ -1523,7 +1528,7 @@ static int ocfs2_dx_dir_insert(struct inode *dir, handle_t *handle, | |||
1523 | struct ocfs2_dx_root_block *dx_root; | 1528 | struct ocfs2_dx_root_block *dx_root; |
1524 | struct buffer_head *dx_root_bh = lookup->dl_dx_root_bh; | 1529 | struct buffer_head *dx_root_bh = lookup->dl_dx_root_bh; |
1525 | 1530 | ||
1526 | ret = ocfs2_journal_access_dr(handle, dir, dx_root_bh, | 1531 | ret = ocfs2_journal_access_dr(handle, INODE_CACHE(dir), dx_root_bh, |
1527 | OCFS2_JOURNAL_ACCESS_WRITE); | 1532 | OCFS2_JOURNAL_ACCESS_WRITE); |
1528 | if (ret) { | 1533 | if (ret) { |
1529 | mlog_errno(ret); | 1534 | mlog_errno(ret); |
@@ -1645,11 +1650,13 @@ int __ocfs2_add_entry(handle_t *handle, | |||
1645 | */ | 1650 | */ |
1646 | if (ocfs2_free_list_at_root(lookup)) { | 1651 | if (ocfs2_free_list_at_root(lookup)) { |
1647 | bh = lookup->dl_dx_root_bh; | 1652 | bh = lookup->dl_dx_root_bh; |
1648 | retval = ocfs2_journal_access_dr(handle, dir, bh, | 1653 | retval = ocfs2_journal_access_dr(handle, |
1654 | INODE_CACHE(dir), bh, | ||
1649 | OCFS2_JOURNAL_ACCESS_WRITE); | 1655 | OCFS2_JOURNAL_ACCESS_WRITE); |
1650 | } else { | 1656 | } else { |
1651 | bh = lookup->dl_prev_leaf_bh; | 1657 | bh = lookup->dl_prev_leaf_bh; |
1652 | retval = ocfs2_journal_access_db(handle, dir, bh, | 1658 | retval = ocfs2_journal_access_db(handle, |
1659 | INODE_CACHE(dir), bh, | ||
1653 | OCFS2_JOURNAL_ACCESS_WRITE); | 1660 | OCFS2_JOURNAL_ACCESS_WRITE); |
1654 | } | 1661 | } |
1655 | if (retval) { | 1662 | if (retval) { |
@@ -1700,11 +1707,13 @@ int __ocfs2_add_entry(handle_t *handle, | |||
1700 | } | 1707 | } |
1701 | 1708 | ||
1702 | if (insert_bh == parent_fe_bh) | 1709 | if (insert_bh == parent_fe_bh) |
1703 | status = ocfs2_journal_access_di(handle, dir, | 1710 | status = ocfs2_journal_access_di(handle, |
1711 | INODE_CACHE(dir), | ||
1704 | insert_bh, | 1712 | insert_bh, |
1705 | OCFS2_JOURNAL_ACCESS_WRITE); | 1713 | OCFS2_JOURNAL_ACCESS_WRITE); |
1706 | else { | 1714 | else { |
1707 | status = ocfs2_journal_access_db(handle, dir, | 1715 | status = ocfs2_journal_access_db(handle, |
1716 | INODE_CACHE(dir), | ||
1708 | insert_bh, | 1717 | insert_bh, |
1709 | OCFS2_JOURNAL_ACCESS_WRITE); | 1718 | OCFS2_JOURNAL_ACCESS_WRITE); |
1710 | 1719 | ||
@@ -2280,7 +2289,7 @@ static int ocfs2_fill_new_dir_id(struct ocfs2_super *osb, | |||
2280 | struct ocfs2_inline_data *data = &di->id2.i_data; | 2289 | struct ocfs2_inline_data *data = &di->id2.i_data; |
2281 | unsigned int size = le16_to_cpu(data->id_count); | 2290 | unsigned int size = le16_to_cpu(data->id_count); |
2282 | 2291 | ||
2283 | ret = ocfs2_journal_access_di(handle, inode, di_bh, | 2292 | ret = ocfs2_journal_access_di(handle, INODE_CACHE(inode), di_bh, |
2284 | OCFS2_JOURNAL_ACCESS_WRITE); | 2293 | OCFS2_JOURNAL_ACCESS_WRITE); |
2285 | if (ret) { | 2294 | if (ret) { |
2286 | mlog_errno(ret); | 2295 | mlog_errno(ret); |
@@ -2332,9 +2341,9 @@ static int ocfs2_fill_new_dir_el(struct ocfs2_super *osb, | |||
2332 | goto bail; | 2341 | goto bail; |
2333 | } | 2342 | } |
2334 | 2343 | ||
2335 | ocfs2_set_new_buffer_uptodate(inode, new_bh); | 2344 | ocfs2_set_new_buffer_uptodate(INODE_CACHE(inode), new_bh); |
2336 | 2345 | ||
2337 | status = ocfs2_journal_access_db(handle, inode, new_bh, | 2346 | status = ocfs2_journal_access_db(handle, INODE_CACHE(inode), new_bh, |
2338 | OCFS2_JOURNAL_ACCESS_CREATE); | 2347 | OCFS2_JOURNAL_ACCESS_CREATE); |
2339 | if (status < 0) { | 2348 | if (status < 0) { |
2340 | mlog_errno(status); | 2349 | mlog_errno(status); |
@@ -2418,9 +2427,9 @@ static int ocfs2_dx_dir_attach_index(struct ocfs2_super *osb, | |||
2418 | ret = -EIO; | 2427 | ret = -EIO; |
2419 | goto out; | 2428 | goto out; |
2420 | } | 2429 | } |
2421 | ocfs2_set_new_buffer_uptodate(dir, dx_root_bh); | 2430 | ocfs2_set_new_buffer_uptodate(INODE_CACHE(dir), dx_root_bh); |
2422 | 2431 | ||
2423 | ret = ocfs2_journal_access_dr(handle, dir, dx_root_bh, | 2432 | ret = ocfs2_journal_access_dr(handle, INODE_CACHE(dir), dx_root_bh, |
2424 | OCFS2_JOURNAL_ACCESS_CREATE); | 2433 | OCFS2_JOURNAL_ACCESS_CREATE); |
2425 | if (ret < 0) { | 2434 | if (ret < 0) { |
2426 | mlog_errno(ret); | 2435 | mlog_errno(ret); |
@@ -2454,7 +2463,7 @@ static int ocfs2_dx_dir_attach_index(struct ocfs2_super *osb, | |||
2454 | if (ret) | 2463 | if (ret) |
2455 | mlog_errno(ret); | 2464 | mlog_errno(ret); |
2456 | 2465 | ||
2457 | ret = ocfs2_journal_access_di(handle, dir, di_bh, | 2466 | ret = ocfs2_journal_access_di(handle, INODE_CACHE(dir), di_bh, |
2458 | OCFS2_JOURNAL_ACCESS_CREATE); | 2467 | OCFS2_JOURNAL_ACCESS_CREATE); |
2459 | if (ret) { | 2468 | if (ret) { |
2460 | mlog_errno(ret); | 2469 | mlog_errno(ret); |
@@ -2495,9 +2504,9 @@ static int ocfs2_dx_dir_format_cluster(struct ocfs2_super *osb, | |||
2495 | } | 2504 | } |
2496 | dx_leaves[i] = bh; | 2505 | dx_leaves[i] = bh; |
2497 | 2506 | ||
2498 | ocfs2_set_new_buffer_uptodate(dir, bh); | 2507 | ocfs2_set_new_buffer_uptodate(INODE_CACHE(dir), bh); |
2499 | 2508 | ||
2500 | ret = ocfs2_journal_access_dl(handle, dir, bh, | 2509 | ret = ocfs2_journal_access_dl(handle, INODE_CACHE(dir), bh, |
2501 | OCFS2_JOURNAL_ACCESS_CREATE); | 2510 | OCFS2_JOURNAL_ACCESS_CREATE); |
2502 | if (ret < 0) { | 2511 | if (ret < 0) { |
2503 | mlog_errno(ret); | 2512 | mlog_errno(ret); |
@@ -2582,7 +2591,6 @@ static int ocfs2_dx_dir_new_cluster(struct inode *dir, | |||
2582 | { | 2591 | { |
2583 | int ret; | 2592 | int ret; |
2584 | u64 phys_blkno; | 2593 | u64 phys_blkno; |
2585 | struct ocfs2_super *osb = OCFS2_SB(dir->i_sb); | ||
2586 | 2594 | ||
2587 | ret = __ocfs2_dx_dir_new_cluster(dir, cpos, handle, data_ac, dx_leaves, | 2595 | ret = __ocfs2_dx_dir_new_cluster(dir, cpos, handle, data_ac, dx_leaves, |
2588 | num_dx_leaves, &phys_blkno); | 2596 | num_dx_leaves, &phys_blkno); |
@@ -2591,7 +2599,7 @@ static int ocfs2_dx_dir_new_cluster(struct inode *dir, | |||
2591 | goto out; | 2599 | goto out; |
2592 | } | 2600 | } |
2593 | 2601 | ||
2594 | ret = ocfs2_insert_extent(osb, handle, dir, et, cpos, phys_blkno, 1, 0, | 2602 | ret = ocfs2_insert_extent(handle, et, cpos, phys_blkno, 1, 0, |
2595 | meta_ac); | 2603 | meta_ac); |
2596 | if (ret) | 2604 | if (ret) |
2597 | mlog_errno(ret); | 2605 | mlog_errno(ret); |
@@ -2895,7 +2903,7 @@ static int ocfs2_expand_inline_dir(struct inode *dir, struct buffer_head *di_bh, | |||
2895 | struct ocfs2_extent_tree dx_et; | 2903 | struct ocfs2_extent_tree dx_et; |
2896 | int did_quota = 0, bytes_allocated = 0; | 2904 | int did_quota = 0, bytes_allocated = 0; |
2897 | 2905 | ||
2898 | ocfs2_init_dinode_extent_tree(&et, dir, di_bh); | 2906 | ocfs2_init_dinode_extent_tree(&et, INODE_CACHE(dir), di_bh); |
2899 | 2907 | ||
2900 | alloc = ocfs2_clusters_for_bytes(sb, bytes); | 2908 | alloc = ocfs2_clusters_for_bytes(sb, bytes); |
2901 | dx_alloc = 0; | 2909 | dx_alloc = 0; |
@@ -3005,9 +3013,9 @@ static int ocfs2_expand_inline_dir(struct inode *dir, struct buffer_head *di_bh, | |||
3005 | goto out_commit; | 3013 | goto out_commit; |
3006 | } | 3014 | } |
3007 | 3015 | ||
3008 | ocfs2_set_new_buffer_uptodate(dir, dirdata_bh); | 3016 | ocfs2_set_new_buffer_uptodate(INODE_CACHE(dir), dirdata_bh); |
3009 | 3017 | ||
3010 | ret = ocfs2_journal_access_db(handle, dir, dirdata_bh, | 3018 | ret = ocfs2_journal_access_db(handle, INODE_CACHE(dir), dirdata_bh, |
3011 | OCFS2_JOURNAL_ACCESS_CREATE); | 3019 | OCFS2_JOURNAL_ACCESS_CREATE); |
3012 | if (ret) { | 3020 | if (ret) { |
3013 | mlog_errno(ret); | 3021 | mlog_errno(ret); |
@@ -3060,7 +3068,7 @@ static int ocfs2_expand_inline_dir(struct inode *dir, struct buffer_head *di_bh, | |||
3060 | * We let the later dirent insert modify c/mtime - to the user | 3068 | * We let the later dirent insert modify c/mtime - to the user |
3061 | * the data hasn't changed. | 3069 | * the data hasn't changed. |
3062 | */ | 3070 | */ |
3063 | ret = ocfs2_journal_access_di(handle, dir, di_bh, | 3071 | ret = ocfs2_journal_access_di(handle, INODE_CACHE(dir), di_bh, |
3064 | OCFS2_JOURNAL_ACCESS_CREATE); | 3072 | OCFS2_JOURNAL_ACCESS_CREATE); |
3065 | if (ret) { | 3073 | if (ret) { |
3066 | mlog_errno(ret); | 3074 | mlog_errno(ret); |
@@ -3085,7 +3093,7 @@ static int ocfs2_expand_inline_dir(struct inode *dir, struct buffer_head *di_bh, | |||
3085 | * This should never fail as our extent list is empty and all | 3093 | * This should never fail as our extent list is empty and all |
3086 | * related blocks have been journaled already. | 3094 | * related blocks have been journaled already. |
3087 | */ | 3095 | */ |
3088 | ret = ocfs2_insert_extent(osb, handle, dir, &et, 0, blkno, len, | 3096 | ret = ocfs2_insert_extent(handle, &et, 0, blkno, len, |
3089 | 0, NULL); | 3097 | 0, NULL); |
3090 | if (ret) { | 3098 | if (ret) { |
3091 | mlog_errno(ret); | 3099 | mlog_errno(ret); |
@@ -3117,8 +3125,10 @@ static int ocfs2_expand_inline_dir(struct inode *dir, struct buffer_head *di_bh, | |||
3117 | ocfs2_dx_dir_index_root_block(dir, dx_root_bh, | 3125 | ocfs2_dx_dir_index_root_block(dir, dx_root_bh, |
3118 | dirdata_bh); | 3126 | dirdata_bh); |
3119 | } else { | 3127 | } else { |
3120 | ocfs2_init_dx_root_extent_tree(&dx_et, dir, dx_root_bh); | 3128 | ocfs2_init_dx_root_extent_tree(&dx_et, |
3121 | ret = ocfs2_insert_extent(osb, handle, dir, &dx_et, 0, | 3129 | INODE_CACHE(dir), |
3130 | dx_root_bh); | ||
3131 | ret = ocfs2_insert_extent(handle, &dx_et, 0, | ||
3122 | dx_insert_blkno, 1, 0, NULL); | 3132 | dx_insert_blkno, 1, 0, NULL); |
3123 | if (ret) | 3133 | if (ret) |
3124 | mlog_errno(ret); | 3134 | mlog_errno(ret); |
@@ -3138,7 +3148,7 @@ static int ocfs2_expand_inline_dir(struct inode *dir, struct buffer_head *di_bh, | |||
3138 | } | 3148 | } |
3139 | blkno = ocfs2_clusters_to_blocks(dir->i_sb, bit_off); | 3149 | blkno = ocfs2_clusters_to_blocks(dir->i_sb, bit_off); |
3140 | 3150 | ||
3141 | ret = ocfs2_insert_extent(osb, handle, dir, &et, 1, | 3151 | ret = ocfs2_insert_extent(handle, &et, 1, |
3142 | blkno, len, 0, NULL); | 3152 | blkno, len, 0, NULL); |
3143 | if (ret) { | 3153 | if (ret) { |
3144 | mlog_errno(ret); | 3154 | mlog_errno(ret); |
@@ -3337,8 +3347,9 @@ static int ocfs2_extend_dir(struct ocfs2_super *osb, | |||
3337 | spin_lock(&OCFS2_I(dir)->ip_lock); | 3347 | spin_lock(&OCFS2_I(dir)->ip_lock); |
3338 | if (dir_i_size == ocfs2_clusters_to_bytes(sb, OCFS2_I(dir)->ip_clusters)) { | 3348 | if (dir_i_size == ocfs2_clusters_to_bytes(sb, OCFS2_I(dir)->ip_clusters)) { |
3339 | spin_unlock(&OCFS2_I(dir)->ip_lock); | 3349 | spin_unlock(&OCFS2_I(dir)->ip_lock); |
3340 | ocfs2_init_dinode_extent_tree(&et, dir, parent_fe_bh); | 3350 | ocfs2_init_dinode_extent_tree(&et, INODE_CACHE(dir), |
3341 | num_free_extents = ocfs2_num_free_extents(osb, dir, &et); | 3351 | parent_fe_bh); |
3352 | num_free_extents = ocfs2_num_free_extents(osb, &et); | ||
3342 | if (num_free_extents < 0) { | 3353 | if (num_free_extents < 0) { |
3343 | status = num_free_extents; | 3354 | status = num_free_extents; |
3344 | mlog_errno(status); | 3355 | mlog_errno(status); |
@@ -3387,9 +3398,9 @@ do_extend: | |||
3387 | goto bail; | 3398 | goto bail; |
3388 | } | 3399 | } |
3389 | 3400 | ||
3390 | ocfs2_set_new_buffer_uptodate(dir, new_bh); | 3401 | ocfs2_set_new_buffer_uptodate(INODE_CACHE(dir), new_bh); |
3391 | 3402 | ||
3392 | status = ocfs2_journal_access_db(handle, dir, new_bh, | 3403 | status = ocfs2_journal_access_db(handle, INODE_CACHE(dir), new_bh, |
3393 | OCFS2_JOURNAL_ACCESS_CREATE); | 3404 | OCFS2_JOURNAL_ACCESS_CREATE); |
3394 | if (status < 0) { | 3405 | if (status < 0) { |
3395 | mlog_errno(status); | 3406 | mlog_errno(status); |
@@ -3829,7 +3840,7 @@ static int ocfs2_dx_dir_rebalance(struct ocfs2_super *osb, struct inode *dir, | |||
3829 | (unsigned long long)OCFS2_I(dir)->ip_blkno, | 3840 | (unsigned long long)OCFS2_I(dir)->ip_blkno, |
3830 | (unsigned long long)leaf_blkno, insert_hash); | 3841 | (unsigned long long)leaf_blkno, insert_hash); |
3831 | 3842 | ||
3832 | ocfs2_init_dx_root_extent_tree(&et, dir, dx_root_bh); | 3843 | ocfs2_init_dx_root_extent_tree(&et, INODE_CACHE(dir), dx_root_bh); |
3833 | 3844 | ||
3834 | dx_root = (struct ocfs2_dx_root_block *)dx_root_bh->b_data; | 3845 | dx_root = (struct ocfs2_dx_root_block *)dx_root_bh->b_data; |
3835 | /* | 3846 | /* |
@@ -3885,7 +3896,7 @@ static int ocfs2_dx_dir_rebalance(struct ocfs2_super *osb, struct inode *dir, | |||
3885 | } | 3896 | } |
3886 | did_quota = 1; | 3897 | did_quota = 1; |
3887 | 3898 | ||
3888 | ret = ocfs2_journal_access_dl(handle, dir, dx_leaf_bh, | 3899 | ret = ocfs2_journal_access_dl(handle, INODE_CACHE(dir), dx_leaf_bh, |
3889 | OCFS2_JOURNAL_ACCESS_WRITE); | 3900 | OCFS2_JOURNAL_ACCESS_WRITE); |
3890 | if (ret) { | 3901 | if (ret) { |
3891 | mlog_errno(ret); | 3902 | mlog_errno(ret); |
@@ -3949,7 +3960,8 @@ static int ocfs2_dx_dir_rebalance(struct ocfs2_super *osb, struct inode *dir, | |||
3949 | } | 3960 | } |
3950 | 3961 | ||
3951 | for (i = 0; i < num_dx_leaves; i++) { | 3962 | for (i = 0; i < num_dx_leaves; i++) { |
3952 | ret = ocfs2_journal_access_dl(handle, dir, orig_dx_leaves[i], | 3963 | ret = ocfs2_journal_access_dl(handle, INODE_CACHE(dir), |
3964 | orig_dx_leaves[i], | ||
3953 | OCFS2_JOURNAL_ACCESS_WRITE); | 3965 | OCFS2_JOURNAL_ACCESS_WRITE); |
3954 | if (ret) { | 3966 | if (ret) { |
3955 | mlog_errno(ret); | 3967 | mlog_errno(ret); |
@@ -4165,7 +4177,7 @@ static int ocfs2_expand_inline_dx_root(struct inode *dir, | |||
4165 | * failure to add the dx_root_bh to the journal won't result | 4177 | * failure to add the dx_root_bh to the journal won't result |
4166 | * us losing clusters. | 4178 | * us losing clusters. |
4167 | */ | 4179 | */ |
4168 | ret = ocfs2_journal_access_dr(handle, dir, dx_root_bh, | 4180 | ret = ocfs2_journal_access_dr(handle, INODE_CACHE(dir), dx_root_bh, |
4169 | OCFS2_JOURNAL_ACCESS_WRITE); | 4181 | OCFS2_JOURNAL_ACCESS_WRITE); |
4170 | if (ret) { | 4182 | if (ret) { |
4171 | mlog_errno(ret); | 4183 | mlog_errno(ret); |
@@ -4207,9 +4219,8 @@ static int ocfs2_expand_inline_dx_root(struct inode *dir, | |||
4207 | 4219 | ||
4208 | /* This should never fail considering we start with an empty | 4220 | /* This should never fail considering we start with an empty |
4209 | * dx_root. */ | 4221 | * dx_root. */ |
4210 | ocfs2_init_dx_root_extent_tree(&et, dir, dx_root_bh); | 4222 | ocfs2_init_dx_root_extent_tree(&et, INODE_CACHE(dir), dx_root_bh); |
4211 | ret = ocfs2_insert_extent(osb, handle, dir, &et, 0, | 4223 | ret = ocfs2_insert_extent(handle, &et, 0, insert_blkno, 1, 0, NULL); |
4212 | insert_blkno, 1, 0, NULL); | ||
4213 | if (ret) | 4224 | if (ret) |
4214 | mlog_errno(ret); | 4225 | mlog_errno(ret); |
4215 | did_quota = 0; | 4226 | did_quota = 0; |
@@ -4469,7 +4480,7 @@ static int ocfs2_dx_dir_remove_index(struct inode *dir, | |||
4469 | goto out_unlock; | 4480 | goto out_unlock; |
4470 | } | 4481 | } |
4471 | 4482 | ||
4472 | ret = ocfs2_journal_access_di(handle, dir, di_bh, | 4483 | ret = ocfs2_journal_access_di(handle, INODE_CACHE(dir), di_bh, |
4473 | OCFS2_JOURNAL_ACCESS_WRITE); | 4484 | OCFS2_JOURNAL_ACCESS_WRITE); |
4474 | if (ret) { | 4485 | if (ret) { |
4475 | mlog_errno(ret); | 4486 | mlog_errno(ret); |
@@ -4532,7 +4543,7 @@ int ocfs2_dx_dir_truncate(struct inode *dir, struct buffer_head *di_bh) | |||
4532 | if (ocfs2_dx_root_inline(dx_root)) | 4543 | if (ocfs2_dx_root_inline(dx_root)) |
4533 | goto remove_index; | 4544 | goto remove_index; |
4534 | 4545 | ||
4535 | ocfs2_init_dx_root_extent_tree(&et, dir, dx_root_bh); | 4546 | ocfs2_init_dx_root_extent_tree(&et, INODE_CACHE(dir), dx_root_bh); |
4536 | 4547 | ||
4537 | /* XXX: What if dr_clusters is too large? */ | 4548 | /* XXX: What if dr_clusters is too large? */ |
4538 | while (le32_to_cpu(dx_root->dr_clusters)) { | 4549 | while (le32_to_cpu(dx_root->dr_clusters)) { |
@@ -4565,7 +4576,7 @@ remove_index: | |||
4565 | goto out; | 4576 | goto out; |
4566 | } | 4577 | } |
4567 | 4578 | ||
4568 | ocfs2_remove_from_cache(dir, dx_root_bh); | 4579 | ocfs2_remove_from_cache(INODE_CACHE(dir), dx_root_bh); |
4569 | out: | 4580 | out: |
4570 | ocfs2_schedule_truncate_log_flush(osb, 1); | 4581 | ocfs2_schedule_truncate_log_flush(osb, 1); |
4571 | ocfs2_run_deallocs(osb, &dealloc); | 4582 | ocfs2_run_deallocs(osb, &dealloc); |
diff --git a/fs/ocfs2/dlm/dlmast.c b/fs/ocfs2/dlm/dlmast.c index 81eff8e58322..01cf8cc3d286 100644 --- a/fs/ocfs2/dlm/dlmast.c +++ b/fs/ocfs2/dlm/dlmast.c | |||
@@ -30,7 +30,6 @@ | |||
30 | #include <linux/types.h> | 30 | #include <linux/types.h> |
31 | #include <linux/slab.h> | 31 | #include <linux/slab.h> |
32 | #include <linux/highmem.h> | 32 | #include <linux/highmem.h> |
33 | #include <linux/utsname.h> | ||
34 | #include <linux/init.h> | 33 | #include <linux/init.h> |
35 | #include <linux/sysctl.h> | 34 | #include <linux/sysctl.h> |
36 | #include <linux/random.h> | 35 | #include <linux/random.h> |
diff --git a/fs/ocfs2/dlm/dlmconvert.c b/fs/ocfs2/dlm/dlmconvert.c index 75997b4deaf3..ca96bce50e18 100644 --- a/fs/ocfs2/dlm/dlmconvert.c +++ b/fs/ocfs2/dlm/dlmconvert.c | |||
@@ -30,7 +30,6 @@ | |||
30 | #include <linux/types.h> | 30 | #include <linux/types.h> |
31 | #include <linux/slab.h> | 31 | #include <linux/slab.h> |
32 | #include <linux/highmem.h> | 32 | #include <linux/highmem.h> |
33 | #include <linux/utsname.h> | ||
34 | #include <linux/init.h> | 33 | #include <linux/init.h> |
35 | #include <linux/sysctl.h> | 34 | #include <linux/sysctl.h> |
36 | #include <linux/random.h> | 35 | #include <linux/random.h> |
diff --git a/fs/ocfs2/dlm/dlmdebug.c b/fs/ocfs2/dlm/dlmdebug.c index df52f706f669..ca46002ec10e 100644 --- a/fs/ocfs2/dlm/dlmdebug.c +++ b/fs/ocfs2/dlm/dlmdebug.c | |||
@@ -27,7 +27,6 @@ | |||
27 | #include <linux/types.h> | 27 | #include <linux/types.h> |
28 | #include <linux/slab.h> | 28 | #include <linux/slab.h> |
29 | #include <linux/highmem.h> | 29 | #include <linux/highmem.h> |
30 | #include <linux/utsname.h> | ||
31 | #include <linux/sysctl.h> | 30 | #include <linux/sysctl.h> |
32 | #include <linux/spinlock.h> | 31 | #include <linux/spinlock.h> |
33 | #include <linux/debugfs.h> | 32 | #include <linux/debugfs.h> |
@@ -683,7 +682,7 @@ static int lockres_seq_show(struct seq_file *s, void *v) | |||
683 | return 0; | 682 | return 0; |
684 | } | 683 | } |
685 | 684 | ||
686 | static struct seq_operations debug_lockres_ops = { | 685 | static const struct seq_operations debug_lockres_ops = { |
687 | .start = lockres_seq_start, | 686 | .start = lockres_seq_start, |
688 | .stop = lockres_seq_stop, | 687 | .stop = lockres_seq_stop, |
689 | .next = lockres_seq_next, | 688 | .next = lockres_seq_next, |
diff --git a/fs/ocfs2/dlm/dlmdomain.c b/fs/ocfs2/dlm/dlmdomain.c index 4d9e6b288dd8..0334000676d3 100644 --- a/fs/ocfs2/dlm/dlmdomain.c +++ b/fs/ocfs2/dlm/dlmdomain.c | |||
@@ -28,7 +28,6 @@ | |||
28 | #include <linux/types.h> | 28 | #include <linux/types.h> |
29 | #include <linux/slab.h> | 29 | #include <linux/slab.h> |
30 | #include <linux/highmem.h> | 30 | #include <linux/highmem.h> |
31 | #include <linux/utsname.h> | ||
32 | #include <linux/init.h> | 31 | #include <linux/init.h> |
33 | #include <linux/spinlock.h> | 32 | #include <linux/spinlock.h> |
34 | #include <linux/delay.h> | 33 | #include <linux/delay.h> |
diff --git a/fs/ocfs2/dlm/dlmlock.c b/fs/ocfs2/dlm/dlmlock.c index 83a9f2972ac8..437698e9465f 100644 --- a/fs/ocfs2/dlm/dlmlock.c +++ b/fs/ocfs2/dlm/dlmlock.c | |||
@@ -30,7 +30,6 @@ | |||
30 | #include <linux/types.h> | 30 | #include <linux/types.h> |
31 | #include <linux/slab.h> | 31 | #include <linux/slab.h> |
32 | #include <linux/highmem.h> | 32 | #include <linux/highmem.h> |
33 | #include <linux/utsname.h> | ||
34 | #include <linux/init.h> | 33 | #include <linux/init.h> |
35 | #include <linux/sysctl.h> | 34 | #include <linux/sysctl.h> |
36 | #include <linux/random.h> | 35 | #include <linux/random.h> |
diff --git a/fs/ocfs2/dlm/dlmmaster.c b/fs/ocfs2/dlm/dlmmaster.c index f8b653fcd4dd..83bcaf266b35 100644 --- a/fs/ocfs2/dlm/dlmmaster.c +++ b/fs/ocfs2/dlm/dlmmaster.c | |||
@@ -30,7 +30,6 @@ | |||
30 | #include <linux/types.h> | 30 | #include <linux/types.h> |
31 | #include <linux/slab.h> | 31 | #include <linux/slab.h> |
32 | #include <linux/highmem.h> | 32 | #include <linux/highmem.h> |
33 | #include <linux/utsname.h> | ||
34 | #include <linux/init.h> | 33 | #include <linux/init.h> |
35 | #include <linux/sysctl.h> | 34 | #include <linux/sysctl.h> |
36 | #include <linux/random.h> | 35 | #include <linux/random.h> |
diff --git a/fs/ocfs2/dlm/dlmrecovery.c b/fs/ocfs2/dlm/dlmrecovery.c index 43e6e3280569..d9fa3d22e17c 100644 --- a/fs/ocfs2/dlm/dlmrecovery.c +++ b/fs/ocfs2/dlm/dlmrecovery.c | |||
@@ -30,7 +30,6 @@ | |||
30 | #include <linux/types.h> | 30 | #include <linux/types.h> |
31 | #include <linux/slab.h> | 31 | #include <linux/slab.h> |
32 | #include <linux/highmem.h> | 32 | #include <linux/highmem.h> |
33 | #include <linux/utsname.h> | ||
34 | #include <linux/init.h> | 33 | #include <linux/init.h> |
35 | #include <linux/sysctl.h> | 34 | #include <linux/sysctl.h> |
36 | #include <linux/random.h> | 35 | #include <linux/random.h> |
diff --git a/fs/ocfs2/dlm/dlmthread.c b/fs/ocfs2/dlm/dlmthread.c index d490b66ad9d7..52ec020ea78b 100644 --- a/fs/ocfs2/dlm/dlmthread.c +++ b/fs/ocfs2/dlm/dlmthread.c | |||
@@ -30,7 +30,6 @@ | |||
30 | #include <linux/types.h> | 30 | #include <linux/types.h> |
31 | #include <linux/slab.h> | 31 | #include <linux/slab.h> |
32 | #include <linux/highmem.h> | 32 | #include <linux/highmem.h> |
33 | #include <linux/utsname.h> | ||
34 | #include <linux/init.h> | 33 | #include <linux/init.h> |
35 | #include <linux/sysctl.h> | 34 | #include <linux/sysctl.h> |
36 | #include <linux/random.h> | 35 | #include <linux/random.h> |
@@ -212,14 +211,18 @@ static int dlm_purge_lockres(struct dlm_ctxt *dlm, | |||
212 | spin_lock(&dlm->spinlock); | 211 | spin_lock(&dlm->spinlock); |
213 | } | 212 | } |
214 | 213 | ||
214 | spin_lock(&res->spinlock); | ||
215 | if (!list_empty(&res->purge)) { | 215 | if (!list_empty(&res->purge)) { |
216 | mlog(0, "removing lockres %.*s:%p from purgelist, " | 216 | mlog(0, "removing lockres %.*s:%p from purgelist, " |
217 | "master = %d\n", res->lockname.len, res->lockname.name, | 217 | "master = %d\n", res->lockname.len, res->lockname.name, |
218 | res, master); | 218 | res, master); |
219 | list_del_init(&res->purge); | 219 | list_del_init(&res->purge); |
220 | spin_unlock(&res->spinlock); | ||
220 | dlm_lockres_put(res); | 221 | dlm_lockres_put(res); |
221 | dlm->purge_count--; | 222 | dlm->purge_count--; |
222 | } | 223 | } else |
224 | spin_unlock(&res->spinlock); | ||
225 | |||
223 | __dlm_unhash_lockres(res); | 226 | __dlm_unhash_lockres(res); |
224 | 227 | ||
225 | /* lockres is not in the hash now. drop the flag and wake up | 228 | /* lockres is not in the hash now. drop the flag and wake up |
diff --git a/fs/ocfs2/dlm/dlmunlock.c b/fs/ocfs2/dlm/dlmunlock.c index 756f5b0998e0..00f53b2aea76 100644 --- a/fs/ocfs2/dlm/dlmunlock.c +++ b/fs/ocfs2/dlm/dlmunlock.c | |||
@@ -30,7 +30,6 @@ | |||
30 | #include <linux/types.h> | 30 | #include <linux/types.h> |
31 | #include <linux/slab.h> | 31 | #include <linux/slab.h> |
32 | #include <linux/highmem.h> | 32 | #include <linux/highmem.h> |
33 | #include <linux/utsname.h> | ||
34 | #include <linux/init.h> | 33 | #include <linux/init.h> |
35 | #include <linux/sysctl.h> | 34 | #include <linux/sysctl.h> |
36 | #include <linux/random.h> | 35 | #include <linux/random.h> |
diff --git a/fs/ocfs2/dlmglue.c b/fs/ocfs2/dlmglue.c index 110bb57c46ab..0d38d67194cb 100644 --- a/fs/ocfs2/dlmglue.c +++ b/fs/ocfs2/dlmglue.c | |||
@@ -53,6 +53,7 @@ | |||
53 | #include "super.h" | 53 | #include "super.h" |
54 | #include "uptodate.h" | 54 | #include "uptodate.h" |
55 | #include "quota.h" | 55 | #include "quota.h" |
56 | #include "refcounttree.h" | ||
56 | 57 | ||
57 | #include "buffer_head_io.h" | 58 | #include "buffer_head_io.h" |
58 | 59 | ||
@@ -110,6 +111,11 @@ static void ocfs2_dentry_post_unlock(struct ocfs2_super *osb, | |||
110 | 111 | ||
111 | static void ocfs2_set_qinfo_lvb(struct ocfs2_lock_res *lockres); | 112 | static void ocfs2_set_qinfo_lvb(struct ocfs2_lock_res *lockres); |
112 | 113 | ||
114 | static int ocfs2_check_refcount_downconvert(struct ocfs2_lock_res *lockres, | ||
115 | int new_level); | ||
116 | static int ocfs2_refcount_convert_worker(struct ocfs2_lock_res *lockres, | ||
117 | int blocking); | ||
118 | |||
113 | #define mlog_meta_lvb(__level, __lockres) ocfs2_dump_meta_lvb_info(__level, __PRETTY_FUNCTION__, __LINE__, __lockres) | 119 | #define mlog_meta_lvb(__level, __lockres) ocfs2_dump_meta_lvb_info(__level, __PRETTY_FUNCTION__, __LINE__, __lockres) |
114 | 120 | ||
115 | /* This aids in debugging situations where a bad LVB might be involved. */ | 121 | /* This aids in debugging situations where a bad LVB might be involved. */ |
@@ -278,6 +284,12 @@ static struct ocfs2_lock_res_ops ocfs2_qinfo_lops = { | |||
278 | .flags = LOCK_TYPE_REQUIRES_REFRESH | LOCK_TYPE_USES_LVB, | 284 | .flags = LOCK_TYPE_REQUIRES_REFRESH | LOCK_TYPE_USES_LVB, |
279 | }; | 285 | }; |
280 | 286 | ||
287 | static struct ocfs2_lock_res_ops ocfs2_refcount_block_lops = { | ||
288 | .check_downconvert = ocfs2_check_refcount_downconvert, | ||
289 | .downconvert_worker = ocfs2_refcount_convert_worker, | ||
290 | .flags = 0, | ||
291 | }; | ||
292 | |||
281 | static inline int ocfs2_is_inode_lock(struct ocfs2_lock_res *lockres) | 293 | static inline int ocfs2_is_inode_lock(struct ocfs2_lock_res *lockres) |
282 | { | 294 | { |
283 | return lockres->l_type == OCFS2_LOCK_TYPE_META || | 295 | return lockres->l_type == OCFS2_LOCK_TYPE_META || |
@@ -306,6 +318,12 @@ static inline struct ocfs2_mem_dqinfo *ocfs2_lock_res_qinfo(struct ocfs2_lock_re | |||
306 | return (struct ocfs2_mem_dqinfo *)lockres->l_priv; | 318 | return (struct ocfs2_mem_dqinfo *)lockres->l_priv; |
307 | } | 319 | } |
308 | 320 | ||
321 | static inline struct ocfs2_refcount_tree * | ||
322 | ocfs2_lock_res_refcount_tree(struct ocfs2_lock_res *res) | ||
323 | { | ||
324 | return container_of(res, struct ocfs2_refcount_tree, rf_lockres); | ||
325 | } | ||
326 | |||
309 | static inline struct ocfs2_super *ocfs2_get_lockres_osb(struct ocfs2_lock_res *lockres) | 327 | static inline struct ocfs2_super *ocfs2_get_lockres_osb(struct ocfs2_lock_res *lockres) |
310 | { | 328 | { |
311 | if (lockres->l_ops->get_osb) | 329 | if (lockres->l_ops->get_osb) |
@@ -693,6 +711,17 @@ void ocfs2_qinfo_lock_res_init(struct ocfs2_lock_res *lockres, | |||
693 | info); | 711 | info); |
694 | } | 712 | } |
695 | 713 | ||
714 | void ocfs2_refcount_lock_res_init(struct ocfs2_lock_res *lockres, | ||
715 | struct ocfs2_super *osb, u64 ref_blkno, | ||
716 | unsigned int generation) | ||
717 | { | ||
718 | ocfs2_lock_res_init_once(lockres); | ||
719 | ocfs2_build_lock_name(OCFS2_LOCK_TYPE_REFCOUNT, ref_blkno, | ||
720 | generation, lockres->l_name); | ||
721 | ocfs2_lock_res_init_common(osb, lockres, OCFS2_LOCK_TYPE_REFCOUNT, | ||
722 | &ocfs2_refcount_block_lops, osb); | ||
723 | } | ||
724 | |||
696 | void ocfs2_lock_res_free(struct ocfs2_lock_res *res) | 725 | void ocfs2_lock_res_free(struct ocfs2_lock_res *res) |
697 | { | 726 | { |
698 | mlog_entry_void(); | 727 | mlog_entry_void(); |
@@ -1548,8 +1577,10 @@ int ocfs2_rw_lock(struct inode *inode, int write) | |||
1548 | (unsigned long long)OCFS2_I(inode)->ip_blkno, | 1577 | (unsigned long long)OCFS2_I(inode)->ip_blkno, |
1549 | write ? "EXMODE" : "PRMODE"); | 1578 | write ? "EXMODE" : "PRMODE"); |
1550 | 1579 | ||
1551 | if (ocfs2_mount_local(osb)) | 1580 | if (ocfs2_mount_local(osb)) { |
1581 | mlog_exit(0); | ||
1552 | return 0; | 1582 | return 0; |
1583 | } | ||
1553 | 1584 | ||
1554 | lockres = &OCFS2_I(inode)->ip_rw_lockres; | 1585 | lockres = &OCFS2_I(inode)->ip_rw_lockres; |
1555 | 1586 | ||
@@ -2127,7 +2158,7 @@ static int ocfs2_inode_lock_update(struct inode *inode, | |||
2127 | 2158 | ||
2128 | /* This will discard any caching information we might have had | 2159 | /* This will discard any caching information we might have had |
2129 | * for the inode metadata. */ | 2160 | * for the inode metadata. */ |
2130 | ocfs2_metadata_cache_purge(inode); | 2161 | ocfs2_metadata_cache_purge(INODE_CACHE(inode)); |
2131 | 2162 | ||
2132 | ocfs2_extent_map_trunc(inode, 0); | 2163 | ocfs2_extent_map_trunc(inode, 0); |
2133 | 2164 | ||
@@ -3009,6 +3040,7 @@ static void ocfs2_unlock_ast(void *opaque, int error) | |||
3009 | "unlock_action %d\n", error, lockres->l_name, | 3040 | "unlock_action %d\n", error, lockres->l_name, |
3010 | lockres->l_unlock_action); | 3041 | lockres->l_unlock_action); |
3011 | spin_unlock_irqrestore(&lockres->l_lock, flags); | 3042 | spin_unlock_irqrestore(&lockres->l_lock, flags); |
3043 | mlog_exit_void(); | ||
3012 | return; | 3044 | return; |
3013 | } | 3045 | } |
3014 | 3046 | ||
@@ -3495,11 +3527,11 @@ out: | |||
3495 | return UNBLOCK_CONTINUE; | 3527 | return UNBLOCK_CONTINUE; |
3496 | } | 3528 | } |
3497 | 3529 | ||
3498 | static int ocfs2_check_meta_downconvert(struct ocfs2_lock_res *lockres, | 3530 | static int ocfs2_ci_checkpointed(struct ocfs2_caching_info *ci, |
3499 | int new_level) | 3531 | struct ocfs2_lock_res *lockres, |
3532 | int new_level) | ||
3500 | { | 3533 | { |
3501 | struct inode *inode = ocfs2_lock_res_inode(lockres); | 3534 | int checkpointed = ocfs2_ci_fully_checkpointed(ci); |
3502 | int checkpointed = ocfs2_inode_fully_checkpointed(inode); | ||
3503 | 3535 | ||
3504 | BUG_ON(new_level != DLM_LOCK_NL && new_level != DLM_LOCK_PR); | 3536 | BUG_ON(new_level != DLM_LOCK_NL && new_level != DLM_LOCK_PR); |
3505 | BUG_ON(lockres->l_level != DLM_LOCK_EX && !checkpointed); | 3537 | BUG_ON(lockres->l_level != DLM_LOCK_EX && !checkpointed); |
@@ -3507,10 +3539,18 @@ static int ocfs2_check_meta_downconvert(struct ocfs2_lock_res *lockres, | |||
3507 | if (checkpointed) | 3539 | if (checkpointed) |
3508 | return 1; | 3540 | return 1; |
3509 | 3541 | ||
3510 | ocfs2_start_checkpoint(OCFS2_SB(inode->i_sb)); | 3542 | ocfs2_start_checkpoint(OCFS2_SB(ocfs2_metadata_cache_get_super(ci))); |
3511 | return 0; | 3543 | return 0; |
3512 | } | 3544 | } |
3513 | 3545 | ||
3546 | static int ocfs2_check_meta_downconvert(struct ocfs2_lock_res *lockres, | ||
3547 | int new_level) | ||
3548 | { | ||
3549 | struct inode *inode = ocfs2_lock_res_inode(lockres); | ||
3550 | |||
3551 | return ocfs2_ci_checkpointed(INODE_CACHE(inode), lockres, new_level); | ||
3552 | } | ||
3553 | |||
3514 | static void ocfs2_set_meta_lvb(struct ocfs2_lock_res *lockres) | 3554 | static void ocfs2_set_meta_lvb(struct ocfs2_lock_res *lockres) |
3515 | { | 3555 | { |
3516 | struct inode *inode = ocfs2_lock_res_inode(lockres); | 3556 | struct inode *inode = ocfs2_lock_res_inode(lockres); |
@@ -3640,6 +3680,26 @@ static int ocfs2_dentry_convert_worker(struct ocfs2_lock_res *lockres, | |||
3640 | return UNBLOCK_CONTINUE_POST; | 3680 | return UNBLOCK_CONTINUE_POST; |
3641 | } | 3681 | } |
3642 | 3682 | ||
3683 | static int ocfs2_check_refcount_downconvert(struct ocfs2_lock_res *lockres, | ||
3684 | int new_level) | ||
3685 | { | ||
3686 | struct ocfs2_refcount_tree *tree = | ||
3687 | ocfs2_lock_res_refcount_tree(lockres); | ||
3688 | |||
3689 | return ocfs2_ci_checkpointed(&tree->rf_ci, lockres, new_level); | ||
3690 | } | ||
3691 | |||
3692 | static int ocfs2_refcount_convert_worker(struct ocfs2_lock_res *lockres, | ||
3693 | int blocking) | ||
3694 | { | ||
3695 | struct ocfs2_refcount_tree *tree = | ||
3696 | ocfs2_lock_res_refcount_tree(lockres); | ||
3697 | |||
3698 | ocfs2_metadata_cache_purge(&tree->rf_ci); | ||
3699 | |||
3700 | return UNBLOCK_CONTINUE; | ||
3701 | } | ||
3702 | |||
3643 | static void ocfs2_set_qinfo_lvb(struct ocfs2_lock_res *lockres) | 3703 | static void ocfs2_set_qinfo_lvb(struct ocfs2_lock_res *lockres) |
3644 | { | 3704 | { |
3645 | struct ocfs2_qinfo_lvb *lvb; | 3705 | struct ocfs2_qinfo_lvb *lvb; |
@@ -3752,6 +3812,37 @@ bail: | |||
3752 | return status; | 3812 | return status; |
3753 | } | 3813 | } |
3754 | 3814 | ||
3815 | int ocfs2_refcount_lock(struct ocfs2_refcount_tree *ref_tree, int ex) | ||
3816 | { | ||
3817 | int status; | ||
3818 | int level = ex ? DLM_LOCK_EX : DLM_LOCK_PR; | ||
3819 | struct ocfs2_lock_res *lockres = &ref_tree->rf_lockres; | ||
3820 | struct ocfs2_super *osb = lockres->l_priv; | ||
3821 | |||
3822 | |||
3823 | if (ocfs2_is_hard_readonly(osb)) | ||
3824 | return -EROFS; | ||
3825 | |||
3826 | if (ocfs2_mount_local(osb)) | ||
3827 | return 0; | ||
3828 | |||
3829 | status = ocfs2_cluster_lock(osb, lockres, level, 0, 0); | ||
3830 | if (status < 0) | ||
3831 | mlog_errno(status); | ||
3832 | |||
3833 | return status; | ||
3834 | } | ||
3835 | |||
3836 | void ocfs2_refcount_unlock(struct ocfs2_refcount_tree *ref_tree, int ex) | ||
3837 | { | ||
3838 | int level = ex ? DLM_LOCK_EX : DLM_LOCK_PR; | ||
3839 | struct ocfs2_lock_res *lockres = &ref_tree->rf_lockres; | ||
3840 | struct ocfs2_super *osb = lockres->l_priv; | ||
3841 | |||
3842 | if (!ocfs2_mount_local(osb)) | ||
3843 | ocfs2_cluster_unlock(osb, lockres, level); | ||
3844 | } | ||
3845 | |||
3755 | /* | 3846 | /* |
3756 | * This is the filesystem locking protocol. It provides the lock handling | 3847 | * This is the filesystem locking protocol. It provides the lock handling |
3757 | * hooks for the underlying DLM. It has a maximum version number. | 3848 | * hooks for the underlying DLM. It has a maximum version number. |
diff --git a/fs/ocfs2/dlmglue.h b/fs/ocfs2/dlmglue.h index 7553836931de..d1ce48e1b3d6 100644 --- a/fs/ocfs2/dlmglue.h +++ b/fs/ocfs2/dlmglue.h | |||
@@ -101,6 +101,9 @@ void ocfs2_file_lock_res_init(struct ocfs2_lock_res *lockres, | |||
101 | struct ocfs2_mem_dqinfo; | 101 | struct ocfs2_mem_dqinfo; |
102 | void ocfs2_qinfo_lock_res_init(struct ocfs2_lock_res *lockres, | 102 | void ocfs2_qinfo_lock_res_init(struct ocfs2_lock_res *lockres, |
103 | struct ocfs2_mem_dqinfo *info); | 103 | struct ocfs2_mem_dqinfo *info); |
104 | void ocfs2_refcount_lock_res_init(struct ocfs2_lock_res *lockres, | ||
105 | struct ocfs2_super *osb, u64 ref_blkno, | ||
106 | unsigned int generation); | ||
104 | void ocfs2_lock_res_free(struct ocfs2_lock_res *res); | 107 | void ocfs2_lock_res_free(struct ocfs2_lock_res *res); |
105 | int ocfs2_create_new_inode_locks(struct inode *inode); | 108 | int ocfs2_create_new_inode_locks(struct inode *inode); |
106 | int ocfs2_drop_inode_locks(struct inode *inode); | 109 | int ocfs2_drop_inode_locks(struct inode *inode); |
@@ -148,6 +151,9 @@ int ocfs2_file_lock(struct file *file, int ex, int trylock); | |||
148 | void ocfs2_file_unlock(struct file *file); | 151 | void ocfs2_file_unlock(struct file *file); |
149 | int ocfs2_qinfo_lock(struct ocfs2_mem_dqinfo *oinfo, int ex); | 152 | int ocfs2_qinfo_lock(struct ocfs2_mem_dqinfo *oinfo, int ex); |
150 | void ocfs2_qinfo_unlock(struct ocfs2_mem_dqinfo *oinfo, int ex); | 153 | void ocfs2_qinfo_unlock(struct ocfs2_mem_dqinfo *oinfo, int ex); |
154 | struct ocfs2_refcount_tree; | ||
155 | int ocfs2_refcount_lock(struct ocfs2_refcount_tree *ref_tree, int ex); | ||
156 | void ocfs2_refcount_unlock(struct ocfs2_refcount_tree *ref_tree, int ex); | ||
151 | 157 | ||
152 | 158 | ||
153 | void ocfs2_mark_lockres_freeing(struct ocfs2_lock_res *lockres); | 159 | void ocfs2_mark_lockres_freeing(struct ocfs2_lock_res *lockres); |
diff --git a/fs/ocfs2/extent_map.c b/fs/ocfs2/extent_map.c index f2bb1a04d253..843db64e9d4a 100644 --- a/fs/ocfs2/extent_map.c +++ b/fs/ocfs2/extent_map.c | |||
@@ -293,7 +293,7 @@ static int ocfs2_last_eb_is_empty(struct inode *inode, | |||
293 | struct ocfs2_extent_block *eb; | 293 | struct ocfs2_extent_block *eb; |
294 | struct ocfs2_extent_list *el; | 294 | struct ocfs2_extent_list *el; |
295 | 295 | ||
296 | ret = ocfs2_read_extent_block(inode, last_eb_blk, &eb_bh); | 296 | ret = ocfs2_read_extent_block(INODE_CACHE(inode), last_eb_blk, &eb_bh); |
297 | if (ret) { | 297 | if (ret) { |
298 | mlog_errno(ret); | 298 | mlog_errno(ret); |
299 | goto out; | 299 | goto out; |
@@ -353,11 +353,11 @@ static int ocfs2_search_for_hole_index(struct ocfs2_extent_list *el, | |||
353 | * eb_bh is NULL. Otherwise, eb_bh should point to the extent block | 353 | * eb_bh is NULL. Otherwise, eb_bh should point to the extent block |
354 | * containing el. | 354 | * containing el. |
355 | */ | 355 | */ |
356 | static int ocfs2_figure_hole_clusters(struct inode *inode, | 356 | int ocfs2_figure_hole_clusters(struct ocfs2_caching_info *ci, |
357 | struct ocfs2_extent_list *el, | 357 | struct ocfs2_extent_list *el, |
358 | struct buffer_head *eb_bh, | 358 | struct buffer_head *eb_bh, |
359 | u32 v_cluster, | 359 | u32 v_cluster, |
360 | u32 *num_clusters) | 360 | u32 *num_clusters) |
361 | { | 361 | { |
362 | int ret, i; | 362 | int ret, i; |
363 | struct buffer_head *next_eb_bh = NULL; | 363 | struct buffer_head *next_eb_bh = NULL; |
@@ -375,7 +375,7 @@ static int ocfs2_figure_hole_clusters(struct inode *inode, | |||
375 | if (le64_to_cpu(eb->h_next_leaf_blk) == 0ULL) | 375 | if (le64_to_cpu(eb->h_next_leaf_blk) == 0ULL) |
376 | goto no_more_extents; | 376 | goto no_more_extents; |
377 | 377 | ||
378 | ret = ocfs2_read_extent_block(inode, | 378 | ret = ocfs2_read_extent_block(ci, |
379 | le64_to_cpu(eb->h_next_leaf_blk), | 379 | le64_to_cpu(eb->h_next_leaf_blk), |
380 | &next_eb_bh); | 380 | &next_eb_bh); |
381 | if (ret) { | 381 | if (ret) { |
@@ -428,7 +428,8 @@ static int ocfs2_get_clusters_nocache(struct inode *inode, | |||
428 | tree_height = le16_to_cpu(el->l_tree_depth); | 428 | tree_height = le16_to_cpu(el->l_tree_depth); |
429 | 429 | ||
430 | if (tree_height > 0) { | 430 | if (tree_height > 0) { |
431 | ret = ocfs2_find_leaf(inode, el, v_cluster, &eb_bh); | 431 | ret = ocfs2_find_leaf(INODE_CACHE(inode), el, v_cluster, |
432 | &eb_bh); | ||
432 | if (ret) { | 433 | if (ret) { |
433 | mlog_errno(ret); | 434 | mlog_errno(ret); |
434 | goto out; | 435 | goto out; |
@@ -455,7 +456,8 @@ static int ocfs2_get_clusters_nocache(struct inode *inode, | |||
455 | * field. | 456 | * field. |
456 | */ | 457 | */ |
457 | if (hole_len) { | 458 | if (hole_len) { |
458 | ret = ocfs2_figure_hole_clusters(inode, el, eb_bh, | 459 | ret = ocfs2_figure_hole_clusters(INODE_CACHE(inode), |
460 | el, eb_bh, | ||
459 | v_cluster, &len); | 461 | v_cluster, &len); |
460 | if (ret) { | 462 | if (ret) { |
461 | mlog_errno(ret); | 463 | mlog_errno(ret); |
@@ -539,7 +541,8 @@ static void ocfs2_relative_extent_offsets(struct super_block *sb, | |||
539 | 541 | ||
540 | int ocfs2_xattr_get_clusters(struct inode *inode, u32 v_cluster, | 542 | int ocfs2_xattr_get_clusters(struct inode *inode, u32 v_cluster, |
541 | u32 *p_cluster, u32 *num_clusters, | 543 | u32 *p_cluster, u32 *num_clusters, |
542 | struct ocfs2_extent_list *el) | 544 | struct ocfs2_extent_list *el, |
545 | unsigned int *extent_flags) | ||
543 | { | 546 | { |
544 | int ret = 0, i; | 547 | int ret = 0, i; |
545 | struct buffer_head *eb_bh = NULL; | 548 | struct buffer_head *eb_bh = NULL; |
@@ -548,7 +551,8 @@ int ocfs2_xattr_get_clusters(struct inode *inode, u32 v_cluster, | |||
548 | u32 coff; | 551 | u32 coff; |
549 | 552 | ||
550 | if (el->l_tree_depth) { | 553 | if (el->l_tree_depth) { |
551 | ret = ocfs2_find_leaf(inode, el, v_cluster, &eb_bh); | 554 | ret = ocfs2_find_leaf(INODE_CACHE(inode), el, v_cluster, |
555 | &eb_bh); | ||
552 | if (ret) { | 556 | if (ret) { |
553 | mlog_errno(ret); | 557 | mlog_errno(ret); |
554 | goto out; | 558 | goto out; |
@@ -590,6 +594,9 @@ int ocfs2_xattr_get_clusters(struct inode *inode, u32 v_cluster, | |||
590 | *p_cluster = *p_cluster + coff; | 594 | *p_cluster = *p_cluster + coff; |
591 | if (num_clusters) | 595 | if (num_clusters) |
592 | *num_clusters = ocfs2_rec_clusters(el, rec) - coff; | 596 | *num_clusters = ocfs2_rec_clusters(el, rec) - coff; |
597 | |||
598 | if (extent_flags) | ||
599 | *extent_flags = rec->e_flags; | ||
593 | } | 600 | } |
594 | out: | 601 | out: |
595 | if (eb_bh) | 602 | if (eb_bh) |
@@ -862,8 +869,8 @@ int ocfs2_read_virt_blocks(struct inode *inode, u64 v_block, int nr, | |||
862 | BUG_ON(bhs[done + i]->b_blocknr != (p_block + i)); | 869 | BUG_ON(bhs[done + i]->b_blocknr != (p_block + i)); |
863 | } | 870 | } |
864 | 871 | ||
865 | rc = ocfs2_read_blocks(inode, p_block, count, bhs + done, | 872 | rc = ocfs2_read_blocks(INODE_CACHE(inode), p_block, count, |
866 | flags, validate); | 873 | bhs + done, flags, validate); |
867 | if (rc) { | 874 | if (rc) { |
868 | mlog_errno(rc); | 875 | mlog_errno(rc); |
869 | break; | 876 | break; |
diff --git a/fs/ocfs2/extent_map.h b/fs/ocfs2/extent_map.h index b7dd9731b462..e79d41c2c909 100644 --- a/fs/ocfs2/extent_map.h +++ b/fs/ocfs2/extent_map.h | |||
@@ -55,12 +55,18 @@ int ocfs2_fiemap(struct inode *inode, struct fiemap_extent_info *fieinfo, | |||
55 | 55 | ||
56 | int ocfs2_xattr_get_clusters(struct inode *inode, u32 v_cluster, | 56 | int ocfs2_xattr_get_clusters(struct inode *inode, u32 v_cluster, |
57 | u32 *p_cluster, u32 *num_clusters, | 57 | u32 *p_cluster, u32 *num_clusters, |
58 | struct ocfs2_extent_list *el); | 58 | struct ocfs2_extent_list *el, |
59 | unsigned int *extent_flags); | ||
59 | 60 | ||
60 | int ocfs2_read_virt_blocks(struct inode *inode, u64 v_block, int nr, | 61 | int ocfs2_read_virt_blocks(struct inode *inode, u64 v_block, int nr, |
61 | struct buffer_head *bhs[], int flags, | 62 | struct buffer_head *bhs[], int flags, |
62 | int (*validate)(struct super_block *sb, | 63 | int (*validate)(struct super_block *sb, |
63 | struct buffer_head *bh)); | 64 | struct buffer_head *bh)); |
65 | int ocfs2_figure_hole_clusters(struct ocfs2_caching_info *ci, | ||
66 | struct ocfs2_extent_list *el, | ||
67 | struct buffer_head *eb_bh, | ||
68 | u32 v_cluster, | ||
69 | u32 *num_clusters); | ||
64 | static inline int ocfs2_read_virt_block(struct inode *inode, u64 v_block, | 70 | static inline int ocfs2_read_virt_block(struct inode *inode, u64 v_block, |
65 | struct buffer_head **bh, | 71 | struct buffer_head **bh, |
66 | int (*validate)(struct super_block *sb, | 72 | int (*validate)(struct super_block *sb, |
diff --git a/fs/ocfs2/file.c b/fs/ocfs2/file.c index 221c5e98957b..89fc8ee1f5a5 100644 --- a/fs/ocfs2/file.c +++ b/fs/ocfs2/file.c | |||
@@ -59,6 +59,7 @@ | |||
59 | #include "xattr.h" | 59 | #include "xattr.h" |
60 | #include "acl.h" | 60 | #include "acl.h" |
61 | #include "quota.h" | 61 | #include "quota.h" |
62 | #include "refcounttree.h" | ||
62 | 63 | ||
63 | #include "buffer_head_io.h" | 64 | #include "buffer_head_io.h" |
64 | 65 | ||
@@ -259,7 +260,7 @@ int ocfs2_update_inode_atime(struct inode *inode, | |||
259 | goto out; | 260 | goto out; |
260 | } | 261 | } |
261 | 262 | ||
262 | ret = ocfs2_journal_access_di(handle, inode, bh, | 263 | ret = ocfs2_journal_access_di(handle, INODE_CACHE(inode), bh, |
263 | OCFS2_JOURNAL_ACCESS_WRITE); | 264 | OCFS2_JOURNAL_ACCESS_WRITE); |
264 | if (ret) { | 265 | if (ret) { |
265 | mlog_errno(ret); | 266 | mlog_errno(ret); |
@@ -334,6 +335,39 @@ out: | |||
334 | return ret; | 335 | return ret; |
335 | } | 336 | } |
336 | 337 | ||
338 | static int ocfs2_cow_file_pos(struct inode *inode, | ||
339 | struct buffer_head *fe_bh, | ||
340 | u64 offset) | ||
341 | { | ||
342 | int status; | ||
343 | u32 phys, cpos = offset >> OCFS2_SB(inode->i_sb)->s_clustersize_bits; | ||
344 | unsigned int num_clusters = 0; | ||
345 | unsigned int ext_flags = 0; | ||
346 | |||
347 | /* | ||
348 | * If the new offset is aligned to the range of the cluster, there is | ||
349 | * no space for ocfs2_zero_range_for_truncate to fill, so no need to | ||
350 | * CoW either. | ||
351 | */ | ||
352 | if ((offset & (OCFS2_SB(inode->i_sb)->s_clustersize - 1)) == 0) | ||
353 | return 0; | ||
354 | |||
355 | status = ocfs2_get_clusters(inode, cpos, &phys, | ||
356 | &num_clusters, &ext_flags); | ||
357 | if (status) { | ||
358 | mlog_errno(status); | ||
359 | goto out; | ||
360 | } | ||
361 | |||
362 | if (!(ext_flags & OCFS2_EXT_REFCOUNTED)) | ||
363 | goto out; | ||
364 | |||
365 | return ocfs2_refcount_cow(inode, fe_bh, cpos, 1, cpos+1); | ||
366 | |||
367 | out: | ||
368 | return status; | ||
369 | } | ||
370 | |||
337 | static int ocfs2_orphan_for_truncate(struct ocfs2_super *osb, | 371 | static int ocfs2_orphan_for_truncate(struct ocfs2_super *osb, |
338 | struct inode *inode, | 372 | struct inode *inode, |
339 | struct buffer_head *fe_bh, | 373 | struct buffer_head *fe_bh, |
@@ -346,6 +380,17 @@ static int ocfs2_orphan_for_truncate(struct ocfs2_super *osb, | |||
346 | 380 | ||
347 | mlog_entry_void(); | 381 | mlog_entry_void(); |
348 | 382 | ||
383 | /* | ||
384 | * We need to CoW the cluster contains the offset if it is reflinked | ||
385 | * since we will call ocfs2_zero_range_for_truncate later which will | ||
386 | * write "0" from offset to the end of the cluster. | ||
387 | */ | ||
388 | status = ocfs2_cow_file_pos(inode, fe_bh, new_i_size); | ||
389 | if (status) { | ||
390 | mlog_errno(status); | ||
391 | return status; | ||
392 | } | ||
393 | |||
349 | /* TODO: This needs to actually orphan the inode in this | 394 | /* TODO: This needs to actually orphan the inode in this |
350 | * transaction. */ | 395 | * transaction. */ |
351 | 396 | ||
@@ -356,7 +401,7 @@ static int ocfs2_orphan_for_truncate(struct ocfs2_super *osb, | |||
356 | goto out; | 401 | goto out; |
357 | } | 402 | } |
358 | 403 | ||
359 | status = ocfs2_journal_access_di(handle, inode, fe_bh, | 404 | status = ocfs2_journal_access_di(handle, INODE_CACHE(inode), fe_bh, |
360 | OCFS2_JOURNAL_ACCESS_WRITE); | 405 | OCFS2_JOURNAL_ACCESS_WRITE); |
361 | if (status < 0) { | 406 | if (status < 0) { |
362 | mlog_errno(status); | 407 | mlog_errno(status); |
@@ -486,6 +531,8 @@ bail_unlock_sem: | |||
486 | up_write(&OCFS2_I(inode)->ip_alloc_sem); | 531 | up_write(&OCFS2_I(inode)->ip_alloc_sem); |
487 | 532 | ||
488 | bail: | 533 | bail: |
534 | if (!status && OCFS2_I(inode)->ip_clusters == 0) | ||
535 | status = ocfs2_try_remove_refcount_tree(inode, di_bh); | ||
489 | 536 | ||
490 | mlog_exit(status); | 537 | mlog_exit(status); |
491 | return status; | 538 | return status; |
@@ -515,11 +562,10 @@ int ocfs2_add_inode_data(struct ocfs2_super *osb, | |||
515 | int ret; | 562 | int ret; |
516 | struct ocfs2_extent_tree et; | 563 | struct ocfs2_extent_tree et; |
517 | 564 | ||
518 | ocfs2_init_dinode_extent_tree(&et, inode, fe_bh); | 565 | ocfs2_init_dinode_extent_tree(&et, INODE_CACHE(inode), fe_bh); |
519 | ret = ocfs2_add_clusters_in_btree(osb, inode, logical_offset, | 566 | ret = ocfs2_add_clusters_in_btree(handle, &et, logical_offset, |
520 | clusters_to_add, mark_unwritten, | 567 | clusters_to_add, mark_unwritten, |
521 | &et, handle, | 568 | data_ac, meta_ac, reason_ret); |
522 | data_ac, meta_ac, reason_ret); | ||
523 | 569 | ||
524 | return ret; | 570 | return ret; |
525 | } | 571 | } |
@@ -564,7 +610,7 @@ restart_all: | |||
564 | (unsigned long long)OCFS2_I(inode)->ip_blkno, | 610 | (unsigned long long)OCFS2_I(inode)->ip_blkno, |
565 | (long long)i_size_read(inode), le32_to_cpu(fe->i_clusters), | 611 | (long long)i_size_read(inode), le32_to_cpu(fe->i_clusters), |
566 | clusters_to_add); | 612 | clusters_to_add); |
567 | ocfs2_init_dinode_extent_tree(&et, inode, bh); | 613 | ocfs2_init_dinode_extent_tree(&et, INODE_CACHE(inode), bh); |
568 | status = ocfs2_lock_allocators(inode, &et, clusters_to_add, 0, | 614 | status = ocfs2_lock_allocators(inode, &et, clusters_to_add, 0, |
569 | &data_ac, &meta_ac); | 615 | &data_ac, &meta_ac); |
570 | if (status) { | 616 | if (status) { |
@@ -593,7 +639,7 @@ restarted_transaction: | |||
593 | /* reserve a write to the file entry early on - that we if we | 639 | /* reserve a write to the file entry early on - that we if we |
594 | * run out of credits in the allocation path, we can still | 640 | * run out of credits in the allocation path, we can still |
595 | * update i_size. */ | 641 | * update i_size. */ |
596 | status = ocfs2_journal_access_di(handle, inode, bh, | 642 | status = ocfs2_journal_access_di(handle, INODE_CACHE(inode), bh, |
597 | OCFS2_JOURNAL_ACCESS_WRITE); | 643 | OCFS2_JOURNAL_ACCESS_WRITE); |
598 | if (status < 0) { | 644 | if (status < 0) { |
599 | mlog_errno(status); | 645 | mlog_errno(status); |
@@ -1131,7 +1177,7 @@ static int __ocfs2_write_remove_suid(struct inode *inode, | |||
1131 | goto out; | 1177 | goto out; |
1132 | } | 1178 | } |
1133 | 1179 | ||
1134 | ret = ocfs2_journal_access_di(handle, inode, bh, | 1180 | ret = ocfs2_journal_access_di(handle, INODE_CACHE(inode), bh, |
1135 | OCFS2_JOURNAL_ACCESS_WRITE); | 1181 | OCFS2_JOURNAL_ACCESS_WRITE); |
1136 | if (ret < 0) { | 1182 | if (ret < 0) { |
1137 | mlog_errno(ret); | 1183 | mlog_errno(ret); |
@@ -1395,7 +1441,7 @@ static int ocfs2_remove_inode_range(struct inode *inode, | |||
1395 | struct address_space *mapping = inode->i_mapping; | 1441 | struct address_space *mapping = inode->i_mapping; |
1396 | struct ocfs2_extent_tree et; | 1442 | struct ocfs2_extent_tree et; |
1397 | 1443 | ||
1398 | ocfs2_init_dinode_extent_tree(&et, inode, di_bh); | 1444 | ocfs2_init_dinode_extent_tree(&et, INODE_CACHE(inode), di_bh); |
1399 | ocfs2_init_dealloc_ctxt(&dealloc); | 1445 | ocfs2_init_dealloc_ctxt(&dealloc); |
1400 | 1446 | ||
1401 | if (byte_len == 0) | 1447 | if (byte_len == 0) |
@@ -1657,6 +1703,70 @@ static long ocfs2_fallocate(struct inode *inode, int mode, loff_t offset, | |||
1657 | OCFS2_IOC_RESVSP64, &sr, change_size); | 1703 | OCFS2_IOC_RESVSP64, &sr, change_size); |
1658 | } | 1704 | } |
1659 | 1705 | ||
1706 | int ocfs2_check_range_for_refcount(struct inode *inode, loff_t pos, | ||
1707 | size_t count) | ||
1708 | { | ||
1709 | int ret = 0; | ||
1710 | unsigned int extent_flags; | ||
1711 | u32 cpos, clusters, extent_len, phys_cpos; | ||
1712 | struct super_block *sb = inode->i_sb; | ||
1713 | |||
1714 | if (!ocfs2_refcount_tree(OCFS2_SB(inode->i_sb)) || | ||
1715 | !(OCFS2_I(inode)->ip_dyn_features & OCFS2_HAS_REFCOUNT_FL)) | ||
1716 | return 0; | ||
1717 | |||
1718 | cpos = pos >> OCFS2_SB(sb)->s_clustersize_bits; | ||
1719 | clusters = ocfs2_clusters_for_bytes(sb, pos + count) - cpos; | ||
1720 | |||
1721 | while (clusters) { | ||
1722 | ret = ocfs2_get_clusters(inode, cpos, &phys_cpos, &extent_len, | ||
1723 | &extent_flags); | ||
1724 | if (ret < 0) { | ||
1725 | mlog_errno(ret); | ||
1726 | goto out; | ||
1727 | } | ||
1728 | |||
1729 | if (phys_cpos && (extent_flags & OCFS2_EXT_REFCOUNTED)) { | ||
1730 | ret = 1; | ||
1731 | break; | ||
1732 | } | ||
1733 | |||
1734 | if (extent_len > clusters) | ||
1735 | extent_len = clusters; | ||
1736 | |||
1737 | clusters -= extent_len; | ||
1738 | cpos += extent_len; | ||
1739 | } | ||
1740 | out: | ||
1741 | return ret; | ||
1742 | } | ||
1743 | |||
1744 | static int ocfs2_prepare_inode_for_refcount(struct inode *inode, | ||
1745 | loff_t pos, size_t count, | ||
1746 | int *meta_level) | ||
1747 | { | ||
1748 | int ret; | ||
1749 | struct buffer_head *di_bh = NULL; | ||
1750 | u32 cpos = pos >> OCFS2_SB(inode->i_sb)->s_clustersize_bits; | ||
1751 | u32 clusters = | ||
1752 | ocfs2_clusters_for_bytes(inode->i_sb, pos + count) - cpos; | ||
1753 | |||
1754 | ret = ocfs2_inode_lock(inode, &di_bh, 1); | ||
1755 | if (ret) { | ||
1756 | mlog_errno(ret); | ||
1757 | goto out; | ||
1758 | } | ||
1759 | |||
1760 | *meta_level = 1; | ||
1761 | |||
1762 | ret = ocfs2_refcount_cow(inode, di_bh, cpos, clusters, UINT_MAX); | ||
1763 | if (ret) | ||
1764 | mlog_errno(ret); | ||
1765 | out: | ||
1766 | brelse(di_bh); | ||
1767 | return ret; | ||
1768 | } | ||
1769 | |||
1660 | static int ocfs2_prepare_inode_for_write(struct dentry *dentry, | 1770 | static int ocfs2_prepare_inode_for_write(struct dentry *dentry, |
1661 | loff_t *ppos, | 1771 | loff_t *ppos, |
1662 | size_t count, | 1772 | size_t count, |
@@ -1713,6 +1823,22 @@ static int ocfs2_prepare_inode_for_write(struct dentry *dentry, | |||
1713 | 1823 | ||
1714 | end = saved_pos + count; | 1824 | end = saved_pos + count; |
1715 | 1825 | ||
1826 | ret = ocfs2_check_range_for_refcount(inode, saved_pos, count); | ||
1827 | if (ret == 1) { | ||
1828 | ocfs2_inode_unlock(inode, meta_level); | ||
1829 | meta_level = -1; | ||
1830 | |||
1831 | ret = ocfs2_prepare_inode_for_refcount(inode, | ||
1832 | saved_pos, | ||
1833 | count, | ||
1834 | &meta_level); | ||
1835 | } | ||
1836 | |||
1837 | if (ret < 0) { | ||
1838 | mlog_errno(ret); | ||
1839 | goto out_unlock; | ||
1840 | } | ||
1841 | |||
1716 | /* | 1842 | /* |
1717 | * Skip the O_DIRECT checks if we don't need | 1843 | * Skip the O_DIRECT checks if we don't need |
1718 | * them. | 1844 | * them. |
@@ -1759,7 +1885,8 @@ static int ocfs2_prepare_inode_for_write(struct dentry *dentry, | |||
1759 | *ppos = saved_pos; | 1885 | *ppos = saved_pos; |
1760 | 1886 | ||
1761 | out_unlock: | 1887 | out_unlock: |
1762 | ocfs2_inode_unlock(inode, meta_level); | 1888 | if (meta_level >= 0) |
1889 | ocfs2_inode_unlock(inode, meta_level); | ||
1763 | 1890 | ||
1764 | out: | 1891 | out: |
1765 | return ret; | 1892 | return ret; |
diff --git a/fs/ocfs2/file.h b/fs/ocfs2/file.h index 172f9fbc9fc7..d66cf4f7c70e 100644 --- a/fs/ocfs2/file.h +++ b/fs/ocfs2/file.h | |||
@@ -69,4 +69,6 @@ int ocfs2_update_inode_atime(struct inode *inode, | |||
69 | int ocfs2_change_file_space(struct file *file, unsigned int cmd, | 69 | int ocfs2_change_file_space(struct file *file, unsigned int cmd, |
70 | struct ocfs2_space_resv *sr); | 70 | struct ocfs2_space_resv *sr); |
71 | 71 | ||
72 | int ocfs2_check_range_for_refcount(struct inode *inode, loff_t pos, | ||
73 | size_t count); | ||
72 | #endif /* OCFS2_FILE_H */ | 74 | #endif /* OCFS2_FILE_H */ |
diff --git a/fs/ocfs2/inode.c b/fs/ocfs2/inode.c index 4dc8890ba316..0297fb8982b8 100644 --- a/fs/ocfs2/inode.c +++ b/fs/ocfs2/inode.c | |||
@@ -53,6 +53,7 @@ | |||
53 | #include "sysfile.h" | 53 | #include "sysfile.h" |
54 | #include "uptodate.h" | 54 | #include "uptodate.h" |
55 | #include "xattr.h" | 55 | #include "xattr.h" |
56 | #include "refcounttree.h" | ||
56 | 57 | ||
57 | #include "buffer_head_io.h" | 58 | #include "buffer_head_io.h" |
58 | 59 | ||
@@ -562,7 +563,8 @@ static int ocfs2_truncate_for_delete(struct ocfs2_super *osb, | |||
562 | goto out; | 563 | goto out; |
563 | } | 564 | } |
564 | 565 | ||
565 | status = ocfs2_journal_access_di(handle, inode, fe_bh, | 566 | status = ocfs2_journal_access_di(handle, INODE_CACHE(inode), |
567 | fe_bh, | ||
566 | OCFS2_JOURNAL_ACCESS_WRITE); | 568 | OCFS2_JOURNAL_ACCESS_WRITE); |
567 | if (status < 0) { | 569 | if (status < 0) { |
568 | mlog_errno(status); | 570 | mlog_errno(status); |
@@ -646,7 +648,7 @@ static int ocfs2_remove_inode(struct inode *inode, | |||
646 | } | 648 | } |
647 | 649 | ||
648 | /* set the inodes dtime */ | 650 | /* set the inodes dtime */ |
649 | status = ocfs2_journal_access_di(handle, inode, di_bh, | 651 | status = ocfs2_journal_access_di(handle, INODE_CACHE(inode), di_bh, |
650 | OCFS2_JOURNAL_ACCESS_WRITE); | 652 | OCFS2_JOURNAL_ACCESS_WRITE); |
651 | if (status < 0) { | 653 | if (status < 0) { |
652 | mlog_errno(status); | 654 | mlog_errno(status); |
@@ -662,7 +664,7 @@ static int ocfs2_remove_inode(struct inode *inode, | |||
662 | goto bail_commit; | 664 | goto bail_commit; |
663 | } | 665 | } |
664 | 666 | ||
665 | ocfs2_remove_from_cache(inode, di_bh); | 667 | ocfs2_remove_from_cache(INODE_CACHE(inode), di_bh); |
666 | vfs_dq_free_inode(inode); | 668 | vfs_dq_free_inode(inode); |
667 | 669 | ||
668 | status = ocfs2_free_dinode(handle, inode_alloc_inode, | 670 | status = ocfs2_free_dinode(handle, inode_alloc_inode, |
@@ -781,6 +783,12 @@ static int ocfs2_wipe_inode(struct inode *inode, | |||
781 | goto bail_unlock_dir; | 783 | goto bail_unlock_dir; |
782 | } | 784 | } |
783 | 785 | ||
786 | status = ocfs2_remove_refcount_tree(inode, di_bh); | ||
787 | if (status < 0) { | ||
788 | mlog_errno(status); | ||
789 | goto bail_unlock_dir; | ||
790 | } | ||
791 | |||
784 | status = ocfs2_remove_inode(inode, di_bh, orphan_dir_inode, | 792 | status = ocfs2_remove_inode(inode, di_bh, orphan_dir_inode, |
785 | orphan_dir_bh); | 793 | orphan_dir_bh); |
786 | if (status < 0) | 794 | if (status < 0) |
@@ -1112,13 +1120,14 @@ void ocfs2_clear_inode(struct inode *inode) | |||
1112 | ocfs2_lock_res_free(&oi->ip_inode_lockres); | 1120 | ocfs2_lock_res_free(&oi->ip_inode_lockres); |
1113 | ocfs2_lock_res_free(&oi->ip_open_lockres); | 1121 | ocfs2_lock_res_free(&oi->ip_open_lockres); |
1114 | 1122 | ||
1115 | ocfs2_metadata_cache_purge(inode); | 1123 | ocfs2_metadata_cache_exit(INODE_CACHE(inode)); |
1116 | 1124 | ||
1117 | mlog_bug_on_msg(oi->ip_metadata_cache.ci_num_cached, | 1125 | mlog_bug_on_msg(INODE_CACHE(inode)->ci_num_cached, |
1118 | "Clear inode of %llu, inode has %u cache items\n", | 1126 | "Clear inode of %llu, inode has %u cache items\n", |
1119 | (unsigned long long)oi->ip_blkno, oi->ip_metadata_cache.ci_num_cached); | 1127 | (unsigned long long)oi->ip_blkno, |
1128 | INODE_CACHE(inode)->ci_num_cached); | ||
1120 | 1129 | ||
1121 | mlog_bug_on_msg(!(oi->ip_flags & OCFS2_INODE_CACHE_INLINE), | 1130 | mlog_bug_on_msg(!(INODE_CACHE(inode)->ci_flags & OCFS2_CACHE_FL_INLINE), |
1122 | "Clear inode of %llu, inode has a bad flag\n", | 1131 | "Clear inode of %llu, inode has a bad flag\n", |
1123 | (unsigned long long)oi->ip_blkno); | 1132 | (unsigned long long)oi->ip_blkno); |
1124 | 1133 | ||
@@ -1145,9 +1154,7 @@ void ocfs2_clear_inode(struct inode *inode) | |||
1145 | (unsigned long long)oi->ip_blkno, oi->ip_open_count); | 1154 | (unsigned long long)oi->ip_blkno, oi->ip_open_count); |
1146 | 1155 | ||
1147 | /* Clear all other flags. */ | 1156 | /* Clear all other flags. */ |
1148 | oi->ip_flags = OCFS2_INODE_CACHE_INLINE; | 1157 | oi->ip_flags = 0; |
1149 | oi->ip_created_trans = 0; | ||
1150 | oi->ip_last_trans = 0; | ||
1151 | oi->ip_dir_start_lookup = 0; | 1158 | oi->ip_dir_start_lookup = 0; |
1152 | oi->ip_blkno = 0ULL; | 1159 | oi->ip_blkno = 0ULL; |
1153 | 1160 | ||
@@ -1239,7 +1246,7 @@ int ocfs2_mark_inode_dirty(handle_t *handle, | |||
1239 | mlog_entry("(inode %llu)\n", | 1246 | mlog_entry("(inode %llu)\n", |
1240 | (unsigned long long)OCFS2_I(inode)->ip_blkno); | 1247 | (unsigned long long)OCFS2_I(inode)->ip_blkno); |
1241 | 1248 | ||
1242 | status = ocfs2_journal_access_di(handle, inode, bh, | 1249 | status = ocfs2_journal_access_di(handle, INODE_CACHE(inode), bh, |
1243 | OCFS2_JOURNAL_ACCESS_WRITE); | 1250 | OCFS2_JOURNAL_ACCESS_WRITE); |
1244 | if (status < 0) { | 1251 | if (status < 0) { |
1245 | mlog_errno(status); | 1252 | mlog_errno(status); |
@@ -1380,8 +1387,8 @@ int ocfs2_read_inode_block_full(struct inode *inode, struct buffer_head **bh, | |||
1380 | int rc; | 1387 | int rc; |
1381 | struct buffer_head *tmp = *bh; | 1388 | struct buffer_head *tmp = *bh; |
1382 | 1389 | ||
1383 | rc = ocfs2_read_blocks(inode, OCFS2_I(inode)->ip_blkno, 1, &tmp, | 1390 | rc = ocfs2_read_blocks(INODE_CACHE(inode), OCFS2_I(inode)->ip_blkno, |
1384 | flags, ocfs2_validate_inode_block); | 1391 | 1, &tmp, flags, ocfs2_validate_inode_block); |
1385 | 1392 | ||
1386 | /* If ocfs2_read_blocks() got us a new bh, pass it up. */ | 1393 | /* If ocfs2_read_blocks() got us a new bh, pass it up. */ |
1387 | if (!rc && !*bh) | 1394 | if (!rc && !*bh) |
@@ -1394,3 +1401,56 @@ int ocfs2_read_inode_block(struct inode *inode, struct buffer_head **bh) | |||
1394 | { | 1401 | { |
1395 | return ocfs2_read_inode_block_full(inode, bh, 0); | 1402 | return ocfs2_read_inode_block_full(inode, bh, 0); |
1396 | } | 1403 | } |
1404 | |||
1405 | |||
1406 | static u64 ocfs2_inode_cache_owner(struct ocfs2_caching_info *ci) | ||
1407 | { | ||
1408 | struct ocfs2_inode_info *oi = cache_info_to_inode(ci); | ||
1409 | |||
1410 | return oi->ip_blkno; | ||
1411 | } | ||
1412 | |||
1413 | static struct super_block *ocfs2_inode_cache_get_super(struct ocfs2_caching_info *ci) | ||
1414 | { | ||
1415 | struct ocfs2_inode_info *oi = cache_info_to_inode(ci); | ||
1416 | |||
1417 | return oi->vfs_inode.i_sb; | ||
1418 | } | ||
1419 | |||
1420 | static void ocfs2_inode_cache_lock(struct ocfs2_caching_info *ci) | ||
1421 | { | ||
1422 | struct ocfs2_inode_info *oi = cache_info_to_inode(ci); | ||
1423 | |||
1424 | spin_lock(&oi->ip_lock); | ||
1425 | } | ||
1426 | |||
1427 | static void ocfs2_inode_cache_unlock(struct ocfs2_caching_info *ci) | ||
1428 | { | ||
1429 | struct ocfs2_inode_info *oi = cache_info_to_inode(ci); | ||
1430 | |||
1431 | spin_unlock(&oi->ip_lock); | ||
1432 | } | ||
1433 | |||
1434 | static void ocfs2_inode_cache_io_lock(struct ocfs2_caching_info *ci) | ||
1435 | { | ||
1436 | struct ocfs2_inode_info *oi = cache_info_to_inode(ci); | ||
1437 | |||
1438 | mutex_lock(&oi->ip_io_mutex); | ||
1439 | } | ||
1440 | |||
1441 | static void ocfs2_inode_cache_io_unlock(struct ocfs2_caching_info *ci) | ||
1442 | { | ||
1443 | struct ocfs2_inode_info *oi = cache_info_to_inode(ci); | ||
1444 | |||
1445 | mutex_unlock(&oi->ip_io_mutex); | ||
1446 | } | ||
1447 | |||
1448 | const struct ocfs2_caching_operations ocfs2_inode_caching_ops = { | ||
1449 | .co_owner = ocfs2_inode_cache_owner, | ||
1450 | .co_get_super = ocfs2_inode_cache_get_super, | ||
1451 | .co_cache_lock = ocfs2_inode_cache_lock, | ||
1452 | .co_cache_unlock = ocfs2_inode_cache_unlock, | ||
1453 | .co_io_lock = ocfs2_inode_cache_io_lock, | ||
1454 | .co_io_unlock = ocfs2_inode_cache_io_unlock, | ||
1455 | }; | ||
1456 | |||
diff --git a/fs/ocfs2/inode.h b/fs/ocfs2/inode.h index ea71525aad41..ba4fe07b293c 100644 --- a/fs/ocfs2/inode.h +++ b/fs/ocfs2/inode.h | |||
@@ -60,12 +60,6 @@ struct ocfs2_inode_info | |||
60 | 60 | ||
61 | u32 ip_dir_start_lookup; | 61 | u32 ip_dir_start_lookup; |
62 | 62 | ||
63 | /* next two are protected by trans_inc_lock */ | ||
64 | /* which transaction were we created on? Zero if none. */ | ||
65 | unsigned long ip_created_trans; | ||
66 | /* last transaction we were a part of. */ | ||
67 | unsigned long ip_last_trans; | ||
68 | |||
69 | struct ocfs2_caching_info ip_metadata_cache; | 63 | struct ocfs2_caching_info ip_metadata_cache; |
70 | 64 | ||
71 | struct ocfs2_extent_map ip_extent_map; | 65 | struct ocfs2_extent_map ip_extent_map; |
@@ -106,8 +100,6 @@ struct ocfs2_inode_info | |||
106 | #define OCFS2_INODE_MAYBE_ORPHANED 0x00000020 | 100 | #define OCFS2_INODE_MAYBE_ORPHANED 0x00000020 |
107 | /* Does someone have the file open O_DIRECT */ | 101 | /* Does someone have the file open O_DIRECT */ |
108 | #define OCFS2_INODE_OPEN_DIRECT 0x00000040 | 102 | #define OCFS2_INODE_OPEN_DIRECT 0x00000040 |
109 | /* Indicates that the metadata cache should be used as an array. */ | ||
110 | #define OCFS2_INODE_CACHE_INLINE 0x00000080 | ||
111 | 103 | ||
112 | static inline struct ocfs2_inode_info *OCFS2_I(struct inode *inode) | 104 | static inline struct ocfs2_inode_info *OCFS2_I(struct inode *inode) |
113 | { | 105 | { |
@@ -120,6 +112,12 @@ static inline struct ocfs2_inode_info *OCFS2_I(struct inode *inode) | |||
120 | extern struct kmem_cache *ocfs2_inode_cache; | 112 | extern struct kmem_cache *ocfs2_inode_cache; |
121 | 113 | ||
122 | extern const struct address_space_operations ocfs2_aops; | 114 | extern const struct address_space_operations ocfs2_aops; |
115 | extern const struct ocfs2_caching_operations ocfs2_inode_caching_ops; | ||
116 | |||
117 | static inline struct ocfs2_caching_info *INODE_CACHE(struct inode *inode) | ||
118 | { | ||
119 | return &OCFS2_I(inode)->ip_metadata_cache; | ||
120 | } | ||
123 | 121 | ||
124 | void ocfs2_clear_inode(struct inode *inode); | 122 | void ocfs2_clear_inode(struct inode *inode); |
125 | void ocfs2_delete_inode(struct inode *inode); | 123 | void ocfs2_delete_inode(struct inode *inode); |
@@ -172,4 +170,10 @@ int ocfs2_read_inode_block(struct inode *inode, struct buffer_head **bh); | |||
172 | /* The same, but can be passed OCFS2_BH_* flags */ | 170 | /* The same, but can be passed OCFS2_BH_* flags */ |
173 | int ocfs2_read_inode_block_full(struct inode *inode, struct buffer_head **bh, | 171 | int ocfs2_read_inode_block_full(struct inode *inode, struct buffer_head **bh, |
174 | int flags); | 172 | int flags); |
173 | |||
174 | static inline struct ocfs2_inode_info *cache_info_to_inode(struct ocfs2_caching_info *ci) | ||
175 | { | ||
176 | return container_of(ci, struct ocfs2_inode_info, ip_metadata_cache); | ||
177 | } | ||
178 | |||
175 | #endif /* OCFS2_INODE_H */ | 179 | #endif /* OCFS2_INODE_H */ |
diff --git a/fs/ocfs2/ioctl.c b/fs/ocfs2/ioctl.c index 467b413bec21..31fbb0619510 100644 --- a/fs/ocfs2/ioctl.c +++ b/fs/ocfs2/ioctl.c | |||
@@ -21,6 +21,7 @@ | |||
21 | #include "ocfs2_fs.h" | 21 | #include "ocfs2_fs.h" |
22 | #include "ioctl.h" | 22 | #include "ioctl.h" |
23 | #include "resize.h" | 23 | #include "resize.h" |
24 | #include "refcounttree.h" | ||
24 | 25 | ||
25 | #include <linux/ext2_fs.h> | 26 | #include <linux/ext2_fs.h> |
26 | 27 | ||
@@ -115,6 +116,9 @@ long ocfs2_ioctl(struct file *filp, unsigned int cmd, unsigned long arg) | |||
115 | int status; | 116 | int status; |
116 | struct ocfs2_space_resv sr; | 117 | struct ocfs2_space_resv sr; |
117 | struct ocfs2_new_group_input input; | 118 | struct ocfs2_new_group_input input; |
119 | struct reflink_arguments args; | ||
120 | const char *old_path, *new_path; | ||
121 | bool preserve; | ||
118 | 122 | ||
119 | switch (cmd) { | 123 | switch (cmd) { |
120 | case OCFS2_IOC_GETFLAGS: | 124 | case OCFS2_IOC_GETFLAGS: |
@@ -160,6 +164,15 @@ long ocfs2_ioctl(struct file *filp, unsigned int cmd, unsigned long arg) | |||
160 | return -EFAULT; | 164 | return -EFAULT; |
161 | 165 | ||
162 | return ocfs2_group_add(inode, &input); | 166 | return ocfs2_group_add(inode, &input); |
167 | case OCFS2_IOC_REFLINK: | ||
168 | if (copy_from_user(&args, (struct reflink_arguments *)arg, | ||
169 | sizeof(args))) | ||
170 | return -EFAULT; | ||
171 | old_path = (const char *)(unsigned long)args.old_path; | ||
172 | new_path = (const char *)(unsigned long)args.new_path; | ||
173 | preserve = (args.preserve != 0); | ||
174 | |||
175 | return ocfs2_reflink_ioctl(inode, old_path, new_path, preserve); | ||
163 | default: | 176 | default: |
164 | return -ENOTTY; | 177 | return -ENOTTY; |
165 | } | 178 | } |
@@ -182,6 +195,7 @@ long ocfs2_compat_ioctl(struct file *file, unsigned cmd, unsigned long arg) | |||
182 | case OCFS2_IOC_GROUP_EXTEND: | 195 | case OCFS2_IOC_GROUP_EXTEND: |
183 | case OCFS2_IOC_GROUP_ADD: | 196 | case OCFS2_IOC_GROUP_ADD: |
184 | case OCFS2_IOC_GROUP_ADD64: | 197 | case OCFS2_IOC_GROUP_ADD64: |
198 | case OCFS2_IOC_REFLINK: | ||
185 | break; | 199 | break; |
186 | default: | 200 | default: |
187 | return -ENOIOCTLCMD; | 201 | return -ENOIOCTLCMD; |
diff --git a/fs/ocfs2/journal.c b/fs/ocfs2/journal.c index c48b93ac6b65..54c16b66327e 100644 --- a/fs/ocfs2/journal.c +++ b/fs/ocfs2/journal.c | |||
@@ -48,6 +48,7 @@ | |||
48 | #include "slot_map.h" | 48 | #include "slot_map.h" |
49 | #include "super.h" | 49 | #include "super.h" |
50 | #include "sysfile.h" | 50 | #include "sysfile.h" |
51 | #include "uptodate.h" | ||
51 | #include "quota.h" | 52 | #include "quota.h" |
52 | 53 | ||
53 | #include "buffer_head_io.h" | 54 | #include "buffer_head_io.h" |
@@ -554,6 +555,14 @@ static struct ocfs2_triggers eb_triggers = { | |||
554 | .ot_offset = offsetof(struct ocfs2_extent_block, h_check), | 555 | .ot_offset = offsetof(struct ocfs2_extent_block, h_check), |
555 | }; | 556 | }; |
556 | 557 | ||
558 | static struct ocfs2_triggers rb_triggers = { | ||
559 | .ot_triggers = { | ||
560 | .t_commit = ocfs2_commit_trigger, | ||
561 | .t_abort = ocfs2_abort_trigger, | ||
562 | }, | ||
563 | .ot_offset = offsetof(struct ocfs2_refcount_block, rf_check), | ||
564 | }; | ||
565 | |||
557 | static struct ocfs2_triggers gd_triggers = { | 566 | static struct ocfs2_triggers gd_triggers = { |
558 | .ot_triggers = { | 567 | .ot_triggers = { |
559 | .t_commit = ocfs2_commit_trigger, | 568 | .t_commit = ocfs2_commit_trigger, |
@@ -601,14 +610,16 @@ static struct ocfs2_triggers dl_triggers = { | |||
601 | }; | 610 | }; |
602 | 611 | ||
603 | static int __ocfs2_journal_access(handle_t *handle, | 612 | static int __ocfs2_journal_access(handle_t *handle, |
604 | struct inode *inode, | 613 | struct ocfs2_caching_info *ci, |
605 | struct buffer_head *bh, | 614 | struct buffer_head *bh, |
606 | struct ocfs2_triggers *triggers, | 615 | struct ocfs2_triggers *triggers, |
607 | int type) | 616 | int type) |
608 | { | 617 | { |
609 | int status; | 618 | int status; |
619 | struct ocfs2_super *osb = | ||
620 | OCFS2_SB(ocfs2_metadata_cache_get_super(ci)); | ||
610 | 621 | ||
611 | BUG_ON(!inode); | 622 | BUG_ON(!ci || !ci->ci_ops); |
612 | BUG_ON(!handle); | 623 | BUG_ON(!handle); |
613 | BUG_ON(!bh); | 624 | BUG_ON(!bh); |
614 | 625 | ||
@@ -627,15 +638,15 @@ static int __ocfs2_journal_access(handle_t *handle, | |||
627 | BUG(); | 638 | BUG(); |
628 | } | 639 | } |
629 | 640 | ||
630 | /* Set the current transaction information on the inode so | 641 | /* Set the current transaction information on the ci so |
631 | * that the locking code knows whether it can drop it's locks | 642 | * that the locking code knows whether it can drop it's locks |
632 | * on this inode or not. We're protected from the commit | 643 | * on this ci or not. We're protected from the commit |
633 | * thread updating the current transaction id until | 644 | * thread updating the current transaction id until |
634 | * ocfs2_commit_trans() because ocfs2_start_trans() took | 645 | * ocfs2_commit_trans() because ocfs2_start_trans() took |
635 | * j_trans_barrier for us. */ | 646 | * j_trans_barrier for us. */ |
636 | ocfs2_set_inode_lock_trans(OCFS2_SB(inode->i_sb)->journal, inode); | 647 | ocfs2_set_ci_lock_trans(osb->journal, ci); |
637 | 648 | ||
638 | mutex_lock(&OCFS2_I(inode)->ip_io_mutex); | 649 | ocfs2_metadata_cache_io_lock(ci); |
639 | switch (type) { | 650 | switch (type) { |
640 | case OCFS2_JOURNAL_ACCESS_CREATE: | 651 | case OCFS2_JOURNAL_ACCESS_CREATE: |
641 | case OCFS2_JOURNAL_ACCESS_WRITE: | 652 | case OCFS2_JOURNAL_ACCESS_WRITE: |
@@ -650,9 +661,9 @@ static int __ocfs2_journal_access(handle_t *handle, | |||
650 | status = -EINVAL; | 661 | status = -EINVAL; |
651 | mlog(ML_ERROR, "Uknown access type!\n"); | 662 | mlog(ML_ERROR, "Uknown access type!\n"); |
652 | } | 663 | } |
653 | if (!status && ocfs2_meta_ecc(OCFS2_SB(inode->i_sb)) && triggers) | 664 | if (!status && ocfs2_meta_ecc(osb) && triggers) |
654 | jbd2_journal_set_triggers(bh, &triggers->ot_triggers); | 665 | jbd2_journal_set_triggers(bh, &triggers->ot_triggers); |
655 | mutex_unlock(&OCFS2_I(inode)->ip_io_mutex); | 666 | ocfs2_metadata_cache_io_unlock(ci); |
656 | 667 | ||
657 | if (status < 0) | 668 | if (status < 0) |
658 | mlog(ML_ERROR, "Error %d getting %d access to buffer!\n", | 669 | mlog(ML_ERROR, "Error %d getting %d access to buffer!\n", |
@@ -662,66 +673,65 @@ static int __ocfs2_journal_access(handle_t *handle, | |||
662 | return status; | 673 | return status; |
663 | } | 674 | } |
664 | 675 | ||
665 | int ocfs2_journal_access_di(handle_t *handle, struct inode *inode, | 676 | int ocfs2_journal_access_di(handle_t *handle, struct ocfs2_caching_info *ci, |
666 | struct buffer_head *bh, int type) | 677 | struct buffer_head *bh, int type) |
667 | { | 678 | { |
668 | return __ocfs2_journal_access(handle, inode, bh, &di_triggers, | 679 | return __ocfs2_journal_access(handle, ci, bh, &di_triggers, type); |
669 | type); | ||
670 | } | 680 | } |
671 | 681 | ||
672 | int ocfs2_journal_access_eb(handle_t *handle, struct inode *inode, | 682 | int ocfs2_journal_access_eb(handle_t *handle, struct ocfs2_caching_info *ci, |
673 | struct buffer_head *bh, int type) | 683 | struct buffer_head *bh, int type) |
674 | { | 684 | { |
675 | return __ocfs2_journal_access(handle, inode, bh, &eb_triggers, | 685 | return __ocfs2_journal_access(handle, ci, bh, &eb_triggers, type); |
676 | type); | ||
677 | } | 686 | } |
678 | 687 | ||
679 | int ocfs2_journal_access_gd(handle_t *handle, struct inode *inode, | 688 | int ocfs2_journal_access_rb(handle_t *handle, struct ocfs2_caching_info *ci, |
680 | struct buffer_head *bh, int type) | 689 | struct buffer_head *bh, int type) |
681 | { | 690 | { |
682 | return __ocfs2_journal_access(handle, inode, bh, &gd_triggers, | 691 | return __ocfs2_journal_access(handle, ci, bh, &rb_triggers, |
683 | type); | 692 | type); |
684 | } | 693 | } |
685 | 694 | ||
686 | int ocfs2_journal_access_db(handle_t *handle, struct inode *inode, | 695 | int ocfs2_journal_access_gd(handle_t *handle, struct ocfs2_caching_info *ci, |
687 | struct buffer_head *bh, int type) | 696 | struct buffer_head *bh, int type) |
688 | { | 697 | { |
689 | return __ocfs2_journal_access(handle, inode, bh, &db_triggers, | 698 | return __ocfs2_journal_access(handle, ci, bh, &gd_triggers, type); |
690 | type); | ||
691 | } | 699 | } |
692 | 700 | ||
693 | int ocfs2_journal_access_xb(handle_t *handle, struct inode *inode, | 701 | int ocfs2_journal_access_db(handle_t *handle, struct ocfs2_caching_info *ci, |
694 | struct buffer_head *bh, int type) | 702 | struct buffer_head *bh, int type) |
695 | { | 703 | { |
696 | return __ocfs2_journal_access(handle, inode, bh, &xb_triggers, | 704 | return __ocfs2_journal_access(handle, ci, bh, &db_triggers, type); |
697 | type); | ||
698 | } | 705 | } |
699 | 706 | ||
700 | int ocfs2_journal_access_dq(handle_t *handle, struct inode *inode, | 707 | int ocfs2_journal_access_xb(handle_t *handle, struct ocfs2_caching_info *ci, |
701 | struct buffer_head *bh, int type) | 708 | struct buffer_head *bh, int type) |
702 | { | 709 | { |
703 | return __ocfs2_journal_access(handle, inode, bh, &dq_triggers, | 710 | return __ocfs2_journal_access(handle, ci, bh, &xb_triggers, type); |
704 | type); | ||
705 | } | 711 | } |
706 | 712 | ||
707 | int ocfs2_journal_access_dr(handle_t *handle, struct inode *inode, | 713 | int ocfs2_journal_access_dq(handle_t *handle, struct ocfs2_caching_info *ci, |
708 | struct buffer_head *bh, int type) | 714 | struct buffer_head *bh, int type) |
709 | { | 715 | { |
710 | return __ocfs2_journal_access(handle, inode, bh, &dr_triggers, | 716 | return __ocfs2_journal_access(handle, ci, bh, &dq_triggers, type); |
711 | type); | ||
712 | } | 717 | } |
713 | 718 | ||
714 | int ocfs2_journal_access_dl(handle_t *handle, struct inode *inode, | 719 | int ocfs2_journal_access_dr(handle_t *handle, struct ocfs2_caching_info *ci, |
715 | struct buffer_head *bh, int type) | 720 | struct buffer_head *bh, int type) |
716 | { | 721 | { |
717 | return __ocfs2_journal_access(handle, inode, bh, &dl_triggers, | 722 | return __ocfs2_journal_access(handle, ci, bh, &dr_triggers, type); |
718 | type); | 723 | } |
724 | |||
725 | int ocfs2_journal_access_dl(handle_t *handle, struct ocfs2_caching_info *ci, | ||
726 | struct buffer_head *bh, int type) | ||
727 | { | ||
728 | return __ocfs2_journal_access(handle, ci, bh, &dl_triggers, type); | ||
719 | } | 729 | } |
720 | 730 | ||
721 | int ocfs2_journal_access(handle_t *handle, struct inode *inode, | 731 | int ocfs2_journal_access(handle_t *handle, struct ocfs2_caching_info *ci, |
722 | struct buffer_head *bh, int type) | 732 | struct buffer_head *bh, int type) |
723 | { | 733 | { |
724 | return __ocfs2_journal_access(handle, inode, bh, NULL, type); | 734 | return __ocfs2_journal_access(handle, ci, bh, NULL, type); |
725 | } | 735 | } |
726 | 736 | ||
727 | int ocfs2_journal_dirty(handle_t *handle, | 737 | int ocfs2_journal_dirty(handle_t *handle, |
@@ -898,7 +908,7 @@ static int ocfs2_journal_toggle_dirty(struct ocfs2_super *osb, | |||
898 | ocfs2_bump_recovery_generation(fe); | 908 | ocfs2_bump_recovery_generation(fe); |
899 | 909 | ||
900 | ocfs2_compute_meta_ecc(osb->sb, bh->b_data, &fe->i_check); | 910 | ocfs2_compute_meta_ecc(osb->sb, bh->b_data, &fe->i_check); |
901 | status = ocfs2_write_block(osb, bh, journal->j_inode); | 911 | status = ocfs2_write_block(osb, bh, INODE_CACHE(journal->j_inode)); |
902 | if (status < 0) | 912 | if (status < 0) |
903 | mlog_errno(status); | 913 | mlog_errno(status); |
904 | 914 | ||
@@ -1642,7 +1652,7 @@ static int ocfs2_replay_journal(struct ocfs2_super *osb, | |||
1642 | ocfs2_get_recovery_generation(fe); | 1652 | ocfs2_get_recovery_generation(fe); |
1643 | 1653 | ||
1644 | ocfs2_compute_meta_ecc(osb->sb, bh->b_data, &fe->i_check); | 1654 | ocfs2_compute_meta_ecc(osb->sb, bh->b_data, &fe->i_check); |
1645 | status = ocfs2_write_block(osb, bh, inode); | 1655 | status = ocfs2_write_block(osb, bh, INODE_CACHE(inode)); |
1646 | if (status < 0) | 1656 | if (status < 0) |
1647 | mlog_errno(status); | 1657 | mlog_errno(status); |
1648 | 1658 | ||
diff --git a/fs/ocfs2/journal.h b/fs/ocfs2/journal.h index 2c3222aec622..3f74e09b0d80 100644 --- a/fs/ocfs2/journal.h +++ b/fs/ocfs2/journal.h | |||
@@ -90,56 +90,66 @@ static inline unsigned long ocfs2_inc_trans_id(struct ocfs2_journal *j) | |||
90 | return old_id; | 90 | return old_id; |
91 | } | 91 | } |
92 | 92 | ||
93 | static inline void ocfs2_set_inode_lock_trans(struct ocfs2_journal *journal, | 93 | static inline void ocfs2_set_ci_lock_trans(struct ocfs2_journal *journal, |
94 | struct inode *inode) | 94 | struct ocfs2_caching_info *ci) |
95 | { | 95 | { |
96 | spin_lock(&trans_inc_lock); | 96 | spin_lock(&trans_inc_lock); |
97 | OCFS2_I(inode)->ip_last_trans = journal->j_trans_id; | 97 | ci->ci_last_trans = journal->j_trans_id; |
98 | spin_unlock(&trans_inc_lock); | 98 | spin_unlock(&trans_inc_lock); |
99 | } | 99 | } |
100 | 100 | ||
101 | /* Used to figure out whether it's safe to drop a metadata lock on an | 101 | /* Used to figure out whether it's safe to drop a metadata lock on an |
102 | * inode. Returns true if all the inodes changes have been | 102 | * cached object. Returns true if all the object's changes have been |
103 | * checkpointed to disk. You should be holding the spinlock on the | 103 | * checkpointed to disk. You should be holding the spinlock on the |
104 | * metadata lock while calling this to be sure that nobody can take | 104 | * metadata lock while calling this to be sure that nobody can take |
105 | * the lock and put it on another transaction. */ | 105 | * the lock and put it on another transaction. */ |
106 | static inline int ocfs2_inode_fully_checkpointed(struct inode *inode) | 106 | static inline int ocfs2_ci_fully_checkpointed(struct ocfs2_caching_info *ci) |
107 | { | 107 | { |
108 | int ret; | 108 | int ret; |
109 | struct ocfs2_journal *journal = OCFS2_SB(inode->i_sb)->journal; | 109 | struct ocfs2_journal *journal = |
110 | OCFS2_SB(ocfs2_metadata_cache_get_super(ci))->journal; | ||
110 | 111 | ||
111 | spin_lock(&trans_inc_lock); | 112 | spin_lock(&trans_inc_lock); |
112 | ret = time_after(journal->j_trans_id, OCFS2_I(inode)->ip_last_trans); | 113 | ret = time_after(journal->j_trans_id, ci->ci_last_trans); |
113 | spin_unlock(&trans_inc_lock); | 114 | spin_unlock(&trans_inc_lock); |
114 | return ret; | 115 | return ret; |
115 | } | 116 | } |
116 | 117 | ||
117 | /* convenience function to check if an inode is still new (has never | 118 | /* convenience function to check if an object backed by struct |
118 | * hit disk) Will do you a favor and set created_trans = 0 when you've | 119 | * ocfs2_caching_info is still new (has never hit disk) Will do you a |
119 | * been checkpointed. returns '1' if the inode is still new. */ | 120 | * favor and set created_trans = 0 when you've |
120 | static inline int ocfs2_inode_is_new(struct inode *inode) | 121 | * been checkpointed. returns '1' if the ci is still new. */ |
122 | static inline int ocfs2_ci_is_new(struct ocfs2_caching_info *ci) | ||
121 | { | 123 | { |
122 | int ret; | 124 | int ret; |
125 | struct ocfs2_journal *journal = | ||
126 | OCFS2_SB(ocfs2_metadata_cache_get_super(ci))->journal; | ||
123 | 127 | ||
128 | spin_lock(&trans_inc_lock); | ||
129 | ret = !(time_after(journal->j_trans_id, ci->ci_created_trans)); | ||
130 | if (!ret) | ||
131 | ci->ci_created_trans = 0; | ||
132 | spin_unlock(&trans_inc_lock); | ||
133 | return ret; | ||
134 | } | ||
135 | |||
136 | /* Wrapper for inodes so we can check system files */ | ||
137 | static inline int ocfs2_inode_is_new(struct inode *inode) | ||
138 | { | ||
124 | /* System files are never "new" as they're written out by | 139 | /* System files are never "new" as they're written out by |
125 | * mkfs. This helps us early during mount, before we have the | 140 | * mkfs. This helps us early during mount, before we have the |
126 | * journal open and j_trans_id could be junk. */ | 141 | * journal open and j_trans_id could be junk. */ |
127 | if (OCFS2_I(inode)->ip_flags & OCFS2_INODE_SYSTEM_FILE) | 142 | if (OCFS2_I(inode)->ip_flags & OCFS2_INODE_SYSTEM_FILE) |
128 | return 0; | 143 | return 0; |
129 | spin_lock(&trans_inc_lock); | 144 | |
130 | ret = !(time_after(OCFS2_SB(inode->i_sb)->journal->j_trans_id, | 145 | return ocfs2_ci_is_new(INODE_CACHE(inode)); |
131 | OCFS2_I(inode)->ip_created_trans)); | ||
132 | if (!ret) | ||
133 | OCFS2_I(inode)->ip_created_trans = 0; | ||
134 | spin_unlock(&trans_inc_lock); | ||
135 | return ret; | ||
136 | } | 146 | } |
137 | 147 | ||
138 | static inline void ocfs2_inode_set_new(struct ocfs2_super *osb, | 148 | static inline void ocfs2_ci_set_new(struct ocfs2_super *osb, |
139 | struct inode *inode) | 149 | struct ocfs2_caching_info *ci) |
140 | { | 150 | { |
141 | spin_lock(&trans_inc_lock); | 151 | spin_lock(&trans_inc_lock); |
142 | OCFS2_I(inode)->ip_created_trans = osb->journal->j_trans_id; | 152 | ci->ci_created_trans = osb->journal->j_trans_id; |
143 | spin_unlock(&trans_inc_lock); | 153 | spin_unlock(&trans_inc_lock); |
144 | } | 154 | } |
145 | 155 | ||
@@ -200,7 +210,7 @@ static inline void ocfs2_checkpoint_inode(struct inode *inode) | |||
200 | if (ocfs2_mount_local(osb)) | 210 | if (ocfs2_mount_local(osb)) |
201 | return; | 211 | return; |
202 | 212 | ||
203 | if (!ocfs2_inode_fully_checkpointed(inode)) { | 213 | if (!ocfs2_ci_fully_checkpointed(INODE_CACHE(inode))) { |
204 | /* WARNING: This only kicks off a single | 214 | /* WARNING: This only kicks off a single |
205 | * checkpoint. If someone races you and adds more | 215 | * checkpoint. If someone races you and adds more |
206 | * metadata to the journal, you won't know, and will | 216 | * metadata to the journal, you won't know, and will |
@@ -210,7 +220,7 @@ static inline void ocfs2_checkpoint_inode(struct inode *inode) | |||
210 | ocfs2_start_checkpoint(osb); | 220 | ocfs2_start_checkpoint(osb); |
211 | 221 | ||
212 | wait_event(osb->journal->j_checkpointed, | 222 | wait_event(osb->journal->j_checkpointed, |
213 | ocfs2_inode_fully_checkpointed(inode)); | 223 | ocfs2_ci_fully_checkpointed(INODE_CACHE(inode))); |
214 | } | 224 | } |
215 | } | 225 | } |
216 | 226 | ||
@@ -266,31 +276,34 @@ int ocfs2_extend_trans(handle_t *handle, int nblocks); | |||
266 | 276 | ||
267 | 277 | ||
268 | /* ocfs2_inode */ | 278 | /* ocfs2_inode */ |
269 | int ocfs2_journal_access_di(handle_t *handle, struct inode *inode, | 279 | int ocfs2_journal_access_di(handle_t *handle, struct ocfs2_caching_info *ci, |
270 | struct buffer_head *bh, int type); | 280 | struct buffer_head *bh, int type); |
271 | /* ocfs2_extent_block */ | 281 | /* ocfs2_extent_block */ |
272 | int ocfs2_journal_access_eb(handle_t *handle, struct inode *inode, | 282 | int ocfs2_journal_access_eb(handle_t *handle, struct ocfs2_caching_info *ci, |
283 | struct buffer_head *bh, int type); | ||
284 | /* ocfs2_refcount_block */ | ||
285 | int ocfs2_journal_access_rb(handle_t *handle, struct ocfs2_caching_info *ci, | ||
273 | struct buffer_head *bh, int type); | 286 | struct buffer_head *bh, int type); |
274 | /* ocfs2_group_desc */ | 287 | /* ocfs2_group_desc */ |
275 | int ocfs2_journal_access_gd(handle_t *handle, struct inode *inode, | 288 | int ocfs2_journal_access_gd(handle_t *handle, struct ocfs2_caching_info *ci, |
276 | struct buffer_head *bh, int type); | 289 | struct buffer_head *bh, int type); |
277 | /* ocfs2_xattr_block */ | 290 | /* ocfs2_xattr_block */ |
278 | int ocfs2_journal_access_xb(handle_t *handle, struct inode *inode, | 291 | int ocfs2_journal_access_xb(handle_t *handle, struct ocfs2_caching_info *ci, |
279 | struct buffer_head *bh, int type); | 292 | struct buffer_head *bh, int type); |
280 | /* quota blocks */ | 293 | /* quota blocks */ |
281 | int ocfs2_journal_access_dq(handle_t *handle, struct inode *inode, | 294 | int ocfs2_journal_access_dq(handle_t *handle, struct ocfs2_caching_info *ci, |
282 | struct buffer_head *bh, int type); | 295 | struct buffer_head *bh, int type); |
283 | /* dirblock */ | 296 | /* dirblock */ |
284 | int ocfs2_journal_access_db(handle_t *handle, struct inode *inode, | 297 | int ocfs2_journal_access_db(handle_t *handle, struct ocfs2_caching_info *ci, |
285 | struct buffer_head *bh, int type); | 298 | struct buffer_head *bh, int type); |
286 | /* ocfs2_dx_root_block */ | 299 | /* ocfs2_dx_root_block */ |
287 | int ocfs2_journal_access_dr(handle_t *handle, struct inode *inode, | 300 | int ocfs2_journal_access_dr(handle_t *handle, struct ocfs2_caching_info *ci, |
288 | struct buffer_head *bh, int type); | 301 | struct buffer_head *bh, int type); |
289 | /* ocfs2_dx_leaf */ | 302 | /* ocfs2_dx_leaf */ |
290 | int ocfs2_journal_access_dl(handle_t *handle, struct inode *inode, | 303 | int ocfs2_journal_access_dl(handle_t *handle, struct ocfs2_caching_info *ci, |
291 | struct buffer_head *bh, int type); | 304 | struct buffer_head *bh, int type); |
292 | /* Anything that has no ecc */ | 305 | /* Anything that has no ecc */ |
293 | int ocfs2_journal_access(handle_t *handle, struct inode *inode, | 306 | int ocfs2_journal_access(handle_t *handle, struct ocfs2_caching_info *ci, |
294 | struct buffer_head *bh, int type); | 307 | struct buffer_head *bh, int type); |
295 | 308 | ||
296 | /* | 309 | /* |
@@ -477,6 +490,23 @@ static inline int ocfs2_calc_dxi_expand_credits(struct super_block *sb) | |||
477 | return credits; | 490 | return credits; |
478 | } | 491 | } |
479 | 492 | ||
493 | /* inode update, new refcount block and its allocation credits. */ | ||
494 | #define OCFS2_REFCOUNT_TREE_CREATE_CREDITS (OCFS2_INODE_UPDATE_CREDITS + 1 \ | ||
495 | + OCFS2_SUBALLOC_ALLOC) | ||
496 | |||
497 | /* inode and the refcount block update. */ | ||
498 | #define OCFS2_REFCOUNT_TREE_SET_CREDITS (OCFS2_INODE_UPDATE_CREDITS + 1) | ||
499 | |||
500 | /* | ||
501 | * inode and the refcount block update. | ||
502 | * It doesn't include the credits for sub alloc change. | ||
503 | * So if we need to free the bit, OCFS2_SUBALLOC_FREE needs to be added. | ||
504 | */ | ||
505 | #define OCFS2_REFCOUNT_TREE_REMOVE_CREDITS (OCFS2_INODE_UPDATE_CREDITS + 1) | ||
506 | |||
507 | /* 2 metadata alloc, 2 new blocks and root refcount block */ | ||
508 | #define OCFS2_EXPAND_REFCOUNT_TREE_CREDITS (OCFS2_SUBALLOC_ALLOC * 2 + 3) | ||
509 | |||
480 | /* | 510 | /* |
481 | * Please note that the caller must make sure that root_el is the root | 511 | * Please note that the caller must make sure that root_el is the root |
482 | * of extent tree. So for an inode, it should be &fe->id2.i_list. Otherwise | 512 | * of extent tree. So for an inode, it should be &fe->id2.i_list. Otherwise |
diff --git a/fs/ocfs2/localalloc.c b/fs/ocfs2/localalloc.c index bac7e6abaf47..ac10f83edb95 100644 --- a/fs/ocfs2/localalloc.c +++ b/fs/ocfs2/localalloc.c | |||
@@ -297,8 +297,8 @@ void ocfs2_shutdown_local_alloc(struct ocfs2_super *osb) | |||
297 | } | 297 | } |
298 | memcpy(alloc_copy, alloc, bh->b_size); | 298 | memcpy(alloc_copy, alloc, bh->b_size); |
299 | 299 | ||
300 | status = ocfs2_journal_access_di(handle, local_alloc_inode, bh, | 300 | status = ocfs2_journal_access_di(handle, INODE_CACHE(local_alloc_inode), |
301 | OCFS2_JOURNAL_ACCESS_WRITE); | 301 | bh, OCFS2_JOURNAL_ACCESS_WRITE); |
302 | if (status < 0) { | 302 | if (status < 0) { |
303 | mlog_errno(status); | 303 | mlog_errno(status); |
304 | goto out_commit; | 304 | goto out_commit; |
@@ -392,7 +392,7 @@ int ocfs2_begin_local_alloc_recovery(struct ocfs2_super *osb, | |||
392 | ocfs2_clear_local_alloc(alloc); | 392 | ocfs2_clear_local_alloc(alloc); |
393 | 393 | ||
394 | ocfs2_compute_meta_ecc(osb->sb, alloc_bh->b_data, &alloc->i_check); | 394 | ocfs2_compute_meta_ecc(osb->sb, alloc_bh->b_data, &alloc->i_check); |
395 | status = ocfs2_write_block(osb, alloc_bh, inode); | 395 | status = ocfs2_write_block(osb, alloc_bh, INODE_CACHE(inode)); |
396 | if (status < 0) | 396 | if (status < 0) |
397 | mlog_errno(status); | 397 | mlog_errno(status); |
398 | 398 | ||
@@ -678,7 +678,8 @@ int ocfs2_claim_local_alloc_bits(struct ocfs2_super *osb, | |||
678 | * delete bits from it! */ | 678 | * delete bits from it! */ |
679 | *num_bits = bits_wanted; | 679 | *num_bits = bits_wanted; |
680 | 680 | ||
681 | status = ocfs2_journal_access_di(handle, local_alloc_inode, | 681 | status = ocfs2_journal_access_di(handle, |
682 | INODE_CACHE(local_alloc_inode), | ||
682 | osb->local_alloc_bh, | 683 | osb->local_alloc_bh, |
683 | OCFS2_JOURNAL_ACCESS_WRITE); | 684 | OCFS2_JOURNAL_ACCESS_WRITE); |
684 | if (status < 0) { | 685 | if (status < 0) { |
@@ -1156,7 +1157,8 @@ static int ocfs2_local_alloc_slide_window(struct ocfs2_super *osb, | |||
1156 | } | 1157 | } |
1157 | memcpy(alloc_copy, alloc, osb->local_alloc_bh->b_size); | 1158 | memcpy(alloc_copy, alloc, osb->local_alloc_bh->b_size); |
1158 | 1159 | ||
1159 | status = ocfs2_journal_access_di(handle, local_alloc_inode, | 1160 | status = ocfs2_journal_access_di(handle, |
1161 | INODE_CACHE(local_alloc_inode), | ||
1160 | osb->local_alloc_bh, | 1162 | osb->local_alloc_bh, |
1161 | OCFS2_JOURNAL_ACCESS_WRITE); | 1163 | OCFS2_JOURNAL_ACCESS_WRITE); |
1162 | if (status < 0) { | 1164 | if (status < 0) { |
diff --git a/fs/ocfs2/namei.c b/fs/ocfs2/namei.c index 8601f934010b..f010b22b1c44 100644 --- a/fs/ocfs2/namei.c +++ b/fs/ocfs2/namei.c | |||
@@ -69,7 +69,6 @@ | |||
69 | static int ocfs2_mknod_locked(struct ocfs2_super *osb, | 69 | static int ocfs2_mknod_locked(struct ocfs2_super *osb, |
70 | struct inode *dir, | 70 | struct inode *dir, |
71 | struct inode *inode, | 71 | struct inode *inode, |
72 | struct dentry *dentry, | ||
73 | dev_t dev, | 72 | dev_t dev, |
74 | struct buffer_head **new_fe_bh, | 73 | struct buffer_head **new_fe_bh, |
75 | struct buffer_head *parent_fe_bh, | 74 | struct buffer_head *parent_fe_bh, |
@@ -78,7 +77,7 @@ static int ocfs2_mknod_locked(struct ocfs2_super *osb, | |||
78 | 77 | ||
79 | static int ocfs2_prepare_orphan_dir(struct ocfs2_super *osb, | 78 | static int ocfs2_prepare_orphan_dir(struct ocfs2_super *osb, |
80 | struct inode **ret_orphan_dir, | 79 | struct inode **ret_orphan_dir, |
81 | struct inode *inode, | 80 | u64 blkno, |
82 | char *name, | 81 | char *name, |
83 | struct ocfs2_dir_lookup_result *lookup); | 82 | struct ocfs2_dir_lookup_result *lookup); |
84 | 83 | ||
@@ -358,8 +357,12 @@ static int ocfs2_mknod(struct inode *dir, | |||
358 | } | 357 | } |
359 | did_quota_inode = 1; | 358 | did_quota_inode = 1; |
360 | 359 | ||
360 | mlog_entry("(0x%p, 0x%p, %d, %lu, '%.*s')\n", dir, dentry, | ||
361 | inode->i_mode, (unsigned long)dev, dentry->d_name.len, | ||
362 | dentry->d_name.name); | ||
363 | |||
361 | /* do the real work now. */ | 364 | /* do the real work now. */ |
362 | status = ocfs2_mknod_locked(osb, dir, inode, dentry, dev, | 365 | status = ocfs2_mknod_locked(osb, dir, inode, dev, |
363 | &new_fe_bh, parent_fe_bh, handle, | 366 | &new_fe_bh, parent_fe_bh, handle, |
364 | inode_ac); | 367 | inode_ac); |
365 | if (status < 0) { | 368 | if (status < 0) { |
@@ -375,7 +378,8 @@ static int ocfs2_mknod(struct inode *dir, | |||
375 | goto leave; | 378 | goto leave; |
376 | } | 379 | } |
377 | 380 | ||
378 | status = ocfs2_journal_access_di(handle, dir, parent_fe_bh, | 381 | status = ocfs2_journal_access_di(handle, INODE_CACHE(dir), |
382 | parent_fe_bh, | ||
379 | OCFS2_JOURNAL_ACCESS_WRITE); | 383 | OCFS2_JOURNAL_ACCESS_WRITE); |
380 | if (status < 0) { | 384 | if (status < 0) { |
381 | mlog_errno(status); | 385 | mlog_errno(status); |
@@ -465,7 +469,6 @@ leave: | |||
465 | static int ocfs2_mknod_locked(struct ocfs2_super *osb, | 469 | static int ocfs2_mknod_locked(struct ocfs2_super *osb, |
466 | struct inode *dir, | 470 | struct inode *dir, |
467 | struct inode *inode, | 471 | struct inode *inode, |
468 | struct dentry *dentry, | ||
469 | dev_t dev, | 472 | dev_t dev, |
470 | struct buffer_head **new_fe_bh, | 473 | struct buffer_head **new_fe_bh, |
471 | struct buffer_head *parent_fe_bh, | 474 | struct buffer_head *parent_fe_bh, |
@@ -479,10 +482,6 @@ static int ocfs2_mknod_locked(struct ocfs2_super *osb, | |||
479 | u16 suballoc_bit; | 482 | u16 suballoc_bit; |
480 | u16 feat; | 483 | u16 feat; |
481 | 484 | ||
482 | mlog_entry("(0x%p, 0x%p, %d, %lu, '%.*s')\n", dir, dentry, | ||
483 | inode->i_mode, (unsigned long)dev, dentry->d_name.len, | ||
484 | dentry->d_name.name); | ||
485 | |||
486 | *new_fe_bh = NULL; | 485 | *new_fe_bh = NULL; |
487 | 486 | ||
488 | status = ocfs2_claim_new_inode(osb, handle, dir, parent_fe_bh, | 487 | status = ocfs2_claim_new_inode(osb, handle, dir, parent_fe_bh, |
@@ -507,9 +506,10 @@ static int ocfs2_mknod_locked(struct ocfs2_super *osb, | |||
507 | mlog_errno(status); | 506 | mlog_errno(status); |
508 | goto leave; | 507 | goto leave; |
509 | } | 508 | } |
510 | ocfs2_set_new_buffer_uptodate(inode, *new_fe_bh); | 509 | ocfs2_set_new_buffer_uptodate(INODE_CACHE(inode), *new_fe_bh); |
511 | 510 | ||
512 | status = ocfs2_journal_access_di(handle, inode, *new_fe_bh, | 511 | status = ocfs2_journal_access_di(handle, INODE_CACHE(inode), |
512 | *new_fe_bh, | ||
513 | OCFS2_JOURNAL_ACCESS_CREATE); | 513 | OCFS2_JOURNAL_ACCESS_CREATE); |
514 | if (status < 0) { | 514 | if (status < 0) { |
515 | mlog_errno(status); | 515 | mlog_errno(status); |
@@ -565,7 +565,7 @@ static int ocfs2_mknod_locked(struct ocfs2_super *osb, | |||
565 | } | 565 | } |
566 | 566 | ||
567 | ocfs2_populate_inode(inode, fe, 1); | 567 | ocfs2_populate_inode(inode, fe, 1); |
568 | ocfs2_inode_set_new(osb, inode); | 568 | ocfs2_ci_set_new(osb, INODE_CACHE(inode)); |
569 | if (!ocfs2_mount_local(osb)) { | 569 | if (!ocfs2_mount_local(osb)) { |
570 | status = ocfs2_create_new_inode_locks(inode); | 570 | status = ocfs2_create_new_inode_locks(inode); |
571 | if (status < 0) | 571 | if (status < 0) |
@@ -682,7 +682,7 @@ static int ocfs2_link(struct dentry *old_dentry, | |||
682 | goto out_unlock_inode; | 682 | goto out_unlock_inode; |
683 | } | 683 | } |
684 | 684 | ||
685 | err = ocfs2_journal_access_di(handle, inode, fe_bh, | 685 | err = ocfs2_journal_access_di(handle, INODE_CACHE(inode), fe_bh, |
686 | OCFS2_JOURNAL_ACCESS_WRITE); | 686 | OCFS2_JOURNAL_ACCESS_WRITE); |
687 | if (err < 0) { | 687 | if (err < 0) { |
688 | mlog_errno(err); | 688 | mlog_errno(err); |
@@ -850,7 +850,8 @@ static int ocfs2_unlink(struct inode *dir, | |||
850 | } | 850 | } |
851 | 851 | ||
852 | if (inode_is_unlinkable(inode)) { | 852 | if (inode_is_unlinkable(inode)) { |
853 | status = ocfs2_prepare_orphan_dir(osb, &orphan_dir, inode, | 853 | status = ocfs2_prepare_orphan_dir(osb, &orphan_dir, |
854 | OCFS2_I(inode)->ip_blkno, | ||
854 | orphan_name, &orphan_insert); | 855 | orphan_name, &orphan_insert); |
855 | if (status < 0) { | 856 | if (status < 0) { |
856 | mlog_errno(status); | 857 | mlog_errno(status); |
@@ -866,7 +867,7 @@ static int ocfs2_unlink(struct inode *dir, | |||
866 | goto leave; | 867 | goto leave; |
867 | } | 868 | } |
868 | 869 | ||
869 | status = ocfs2_journal_access_di(handle, inode, fe_bh, | 870 | status = ocfs2_journal_access_di(handle, INODE_CACHE(inode), fe_bh, |
870 | OCFS2_JOURNAL_ACCESS_WRITE); | 871 | OCFS2_JOURNAL_ACCESS_WRITE); |
871 | if (status < 0) { | 872 | if (status < 0) { |
872 | mlog_errno(status); | 873 | mlog_errno(status); |
@@ -1241,9 +1242,8 @@ static int ocfs2_rename(struct inode *old_dir, | |||
1241 | 1242 | ||
1242 | if (S_ISDIR(new_inode->i_mode) || (new_inode->i_nlink == 1)) { | 1243 | if (S_ISDIR(new_inode->i_mode) || (new_inode->i_nlink == 1)) { |
1243 | status = ocfs2_prepare_orphan_dir(osb, &orphan_dir, | 1244 | status = ocfs2_prepare_orphan_dir(osb, &orphan_dir, |
1244 | new_inode, | 1245 | OCFS2_I(new_inode)->ip_blkno, |
1245 | orphan_name, | 1246 | orphan_name, &orphan_insert); |
1246 | &orphan_insert); | ||
1247 | if (status < 0) { | 1247 | if (status < 0) { |
1248 | mlog_errno(status); | 1248 | mlog_errno(status); |
1249 | goto bail; | 1249 | goto bail; |
@@ -1284,7 +1284,8 @@ static int ocfs2_rename(struct inode *old_dir, | |||
1284 | goto bail; | 1284 | goto bail; |
1285 | } | 1285 | } |
1286 | } | 1286 | } |
1287 | status = ocfs2_journal_access_di(handle, new_inode, newfe_bh, | 1287 | status = ocfs2_journal_access_di(handle, INODE_CACHE(new_inode), |
1288 | newfe_bh, | ||
1288 | OCFS2_JOURNAL_ACCESS_WRITE); | 1289 | OCFS2_JOURNAL_ACCESS_WRITE); |
1289 | if (status < 0) { | 1290 | if (status < 0) { |
1290 | mlog_errno(status); | 1291 | mlog_errno(status); |
@@ -1331,7 +1332,8 @@ static int ocfs2_rename(struct inode *old_dir, | |||
1331 | old_inode->i_ctime = CURRENT_TIME; | 1332 | old_inode->i_ctime = CURRENT_TIME; |
1332 | mark_inode_dirty(old_inode); | 1333 | mark_inode_dirty(old_inode); |
1333 | 1334 | ||
1334 | status = ocfs2_journal_access_di(handle, old_inode, old_inode_bh, | 1335 | status = ocfs2_journal_access_di(handle, INODE_CACHE(old_inode), |
1336 | old_inode_bh, | ||
1335 | OCFS2_JOURNAL_ACCESS_WRITE); | 1337 | OCFS2_JOURNAL_ACCESS_WRITE); |
1336 | if (status >= 0) { | 1338 | if (status >= 0) { |
1337 | old_di = (struct ocfs2_dinode *) old_inode_bh->b_data; | 1339 | old_di = (struct ocfs2_dinode *) old_inode_bh->b_data; |
@@ -1407,9 +1409,10 @@ static int ocfs2_rename(struct inode *old_dir, | |||
1407 | (int)old_dir_nlink, old_dir->i_nlink); | 1409 | (int)old_dir_nlink, old_dir->i_nlink); |
1408 | } else { | 1410 | } else { |
1409 | struct ocfs2_dinode *fe; | 1411 | struct ocfs2_dinode *fe; |
1410 | status = ocfs2_journal_access_di(handle, old_dir, | 1412 | status = ocfs2_journal_access_di(handle, |
1411 | old_dir_bh, | 1413 | INODE_CACHE(old_dir), |
1412 | OCFS2_JOURNAL_ACCESS_WRITE); | 1414 | old_dir_bh, |
1415 | OCFS2_JOURNAL_ACCESS_WRITE); | ||
1413 | fe = (struct ocfs2_dinode *) old_dir_bh->b_data; | 1416 | fe = (struct ocfs2_dinode *) old_dir_bh->b_data; |
1414 | ocfs2_set_links_count(fe, old_dir->i_nlink); | 1417 | ocfs2_set_links_count(fe, old_dir->i_nlink); |
1415 | status = ocfs2_journal_dirty(handle, old_dir_bh); | 1418 | status = ocfs2_journal_dirty(handle, old_dir_bh); |
@@ -1527,9 +1530,11 @@ static int ocfs2_create_symlink_data(struct ocfs2_super *osb, | |||
1527 | mlog_errno(status); | 1530 | mlog_errno(status); |
1528 | goto bail; | 1531 | goto bail; |
1529 | } | 1532 | } |
1530 | ocfs2_set_new_buffer_uptodate(inode, bhs[virtual]); | 1533 | ocfs2_set_new_buffer_uptodate(INODE_CACHE(inode), |
1534 | bhs[virtual]); | ||
1531 | 1535 | ||
1532 | status = ocfs2_journal_access(handle, inode, bhs[virtual], | 1536 | status = ocfs2_journal_access(handle, INODE_CACHE(inode), |
1537 | bhs[virtual], | ||
1533 | OCFS2_JOURNAL_ACCESS_CREATE); | 1538 | OCFS2_JOURNAL_ACCESS_CREATE); |
1534 | if (status < 0) { | 1539 | if (status < 0) { |
1535 | mlog_errno(status); | 1540 | mlog_errno(status); |
@@ -1692,7 +1697,11 @@ static int ocfs2_symlink(struct inode *dir, | |||
1692 | } | 1697 | } |
1693 | did_quota_inode = 1; | 1698 | did_quota_inode = 1; |
1694 | 1699 | ||
1695 | status = ocfs2_mknod_locked(osb, dir, inode, dentry, | 1700 | mlog_entry("(0x%p, 0x%p, %d, '%.*s')\n", dir, dentry, |
1701 | inode->i_mode, dentry->d_name.len, | ||
1702 | dentry->d_name.name); | ||
1703 | |||
1704 | status = ocfs2_mknod_locked(osb, dir, inode, | ||
1696 | 0, &new_fe_bh, parent_fe_bh, handle, | 1705 | 0, &new_fe_bh, parent_fe_bh, handle, |
1697 | inode_ac); | 1706 | inode_ac); |
1698 | if (status < 0) { | 1707 | if (status < 0) { |
@@ -1842,7 +1851,7 @@ bail: | |||
1842 | 1851 | ||
1843 | static int ocfs2_prepare_orphan_dir(struct ocfs2_super *osb, | 1852 | static int ocfs2_prepare_orphan_dir(struct ocfs2_super *osb, |
1844 | struct inode **ret_orphan_dir, | 1853 | struct inode **ret_orphan_dir, |
1845 | struct inode *inode, | 1854 | u64 blkno, |
1846 | char *name, | 1855 | char *name, |
1847 | struct ocfs2_dir_lookup_result *lookup) | 1856 | struct ocfs2_dir_lookup_result *lookup) |
1848 | { | 1857 | { |
@@ -1850,7 +1859,7 @@ static int ocfs2_prepare_orphan_dir(struct ocfs2_super *osb, | |||
1850 | struct buffer_head *orphan_dir_bh = NULL; | 1859 | struct buffer_head *orphan_dir_bh = NULL; |
1851 | int status = 0; | 1860 | int status = 0; |
1852 | 1861 | ||
1853 | status = ocfs2_blkno_stringify(OCFS2_I(inode)->ip_blkno, name); | 1862 | status = ocfs2_blkno_stringify(blkno, name); |
1854 | if (status < 0) { | 1863 | if (status < 0) { |
1855 | mlog_errno(status); | 1864 | mlog_errno(status); |
1856 | return status; | 1865 | return status; |
@@ -1917,7 +1926,9 @@ static int ocfs2_orphan_add(struct ocfs2_super *osb, | |||
1917 | goto leave; | 1926 | goto leave; |
1918 | } | 1927 | } |
1919 | 1928 | ||
1920 | status = ocfs2_journal_access_di(handle, orphan_dir_inode, orphan_dir_bh, | 1929 | status = ocfs2_journal_access_di(handle, |
1930 | INODE_CACHE(orphan_dir_inode), | ||
1931 | orphan_dir_bh, | ||
1921 | OCFS2_JOURNAL_ACCESS_WRITE); | 1932 | OCFS2_JOURNAL_ACCESS_WRITE); |
1922 | if (status < 0) { | 1933 | if (status < 0) { |
1923 | mlog_errno(status); | 1934 | mlog_errno(status); |
@@ -2002,7 +2013,9 @@ int ocfs2_orphan_del(struct ocfs2_super *osb, | |||
2002 | goto leave; | 2013 | goto leave; |
2003 | } | 2014 | } |
2004 | 2015 | ||
2005 | status = ocfs2_journal_access_di(handle,orphan_dir_inode, orphan_dir_bh, | 2016 | status = ocfs2_journal_access_di(handle, |
2017 | INODE_CACHE(orphan_dir_inode), | ||
2018 | orphan_dir_bh, | ||
2006 | OCFS2_JOURNAL_ACCESS_WRITE); | 2019 | OCFS2_JOURNAL_ACCESS_WRITE); |
2007 | if (status < 0) { | 2020 | if (status < 0) { |
2008 | mlog_errno(status); | 2021 | mlog_errno(status); |
@@ -2028,6 +2041,274 @@ leave: | |||
2028 | return status; | 2041 | return status; |
2029 | } | 2042 | } |
2030 | 2043 | ||
2044 | int ocfs2_create_inode_in_orphan(struct inode *dir, | ||
2045 | int mode, | ||
2046 | struct inode **new_inode) | ||
2047 | { | ||
2048 | int status, did_quota_inode = 0; | ||
2049 | struct inode *inode = NULL; | ||
2050 | struct inode *orphan_dir = NULL; | ||
2051 | struct ocfs2_super *osb = OCFS2_SB(dir->i_sb); | ||
2052 | struct ocfs2_dinode *di = NULL; | ||
2053 | handle_t *handle = NULL; | ||
2054 | char orphan_name[OCFS2_ORPHAN_NAMELEN + 1]; | ||
2055 | struct buffer_head *parent_di_bh = NULL; | ||
2056 | struct buffer_head *new_di_bh = NULL; | ||
2057 | struct ocfs2_alloc_context *inode_ac = NULL; | ||
2058 | struct ocfs2_dir_lookup_result orphan_insert = { NULL, }; | ||
2059 | |||
2060 | status = ocfs2_inode_lock(dir, &parent_di_bh, 1); | ||
2061 | if (status < 0) { | ||
2062 | if (status != -ENOENT) | ||
2063 | mlog_errno(status); | ||
2064 | return status; | ||
2065 | } | ||
2066 | |||
2067 | /* | ||
2068 | * We give the orphan dir the root blkno to fake an orphan name, | ||
2069 | * and allocate enough space for our insertion. | ||
2070 | */ | ||
2071 | status = ocfs2_prepare_orphan_dir(osb, &orphan_dir, | ||
2072 | osb->root_blkno, | ||
2073 | orphan_name, &orphan_insert); | ||
2074 | if (status < 0) { | ||
2075 | mlog_errno(status); | ||
2076 | goto leave; | ||
2077 | } | ||
2078 | |||
2079 | /* reserve an inode spot */ | ||
2080 | status = ocfs2_reserve_new_inode(osb, &inode_ac); | ||
2081 | if (status < 0) { | ||
2082 | if (status != -ENOSPC) | ||
2083 | mlog_errno(status); | ||
2084 | goto leave; | ||
2085 | } | ||
2086 | |||
2087 | inode = ocfs2_get_init_inode(dir, mode); | ||
2088 | if (!inode) { | ||
2089 | status = -ENOMEM; | ||
2090 | mlog_errno(status); | ||
2091 | goto leave; | ||
2092 | } | ||
2093 | |||
2094 | handle = ocfs2_start_trans(osb, ocfs2_mknod_credits(osb->sb, 0, 0)); | ||
2095 | if (IS_ERR(handle)) { | ||
2096 | status = PTR_ERR(handle); | ||
2097 | handle = NULL; | ||
2098 | mlog_errno(status); | ||
2099 | goto leave; | ||
2100 | } | ||
2101 | |||
2102 | /* We don't use standard VFS wrapper because we don't want vfs_dq_init | ||
2103 | * to be called. */ | ||
2104 | if (sb_any_quota_active(osb->sb) && | ||
2105 | osb->sb->dq_op->alloc_inode(inode, 1) == NO_QUOTA) { | ||
2106 | status = -EDQUOT; | ||
2107 | goto leave; | ||
2108 | } | ||
2109 | did_quota_inode = 1; | ||
2110 | |||
2111 | /* do the real work now. */ | ||
2112 | status = ocfs2_mknod_locked(osb, dir, inode, | ||
2113 | 0, &new_di_bh, parent_di_bh, handle, | ||
2114 | inode_ac); | ||
2115 | if (status < 0) { | ||
2116 | mlog_errno(status); | ||
2117 | goto leave; | ||
2118 | } | ||
2119 | |||
2120 | status = ocfs2_blkno_stringify(OCFS2_I(inode)->ip_blkno, orphan_name); | ||
2121 | if (status < 0) { | ||
2122 | mlog_errno(status); | ||
2123 | goto leave; | ||
2124 | } | ||
2125 | |||
2126 | di = (struct ocfs2_dinode *)new_di_bh->b_data; | ||
2127 | status = ocfs2_orphan_add(osb, handle, inode, di, orphan_name, | ||
2128 | &orphan_insert, orphan_dir); | ||
2129 | if (status < 0) { | ||
2130 | mlog_errno(status); | ||
2131 | goto leave; | ||
2132 | } | ||
2133 | |||
2134 | /* get open lock so that only nodes can't remove it from orphan dir. */ | ||
2135 | status = ocfs2_open_lock(inode); | ||
2136 | if (status < 0) | ||
2137 | mlog_errno(status); | ||
2138 | |||
2139 | leave: | ||
2140 | if (status < 0 && did_quota_inode) | ||
2141 | vfs_dq_free_inode(inode); | ||
2142 | if (handle) | ||
2143 | ocfs2_commit_trans(osb, handle); | ||
2144 | |||
2145 | if (orphan_dir) { | ||
2146 | /* This was locked for us in ocfs2_prepare_orphan_dir() */ | ||
2147 | ocfs2_inode_unlock(orphan_dir, 1); | ||
2148 | mutex_unlock(&orphan_dir->i_mutex); | ||
2149 | iput(orphan_dir); | ||
2150 | } | ||
2151 | |||
2152 | if (status == -ENOSPC) | ||
2153 | mlog(0, "Disk is full\n"); | ||
2154 | |||
2155 | if ((status < 0) && inode) { | ||
2156 | clear_nlink(inode); | ||
2157 | iput(inode); | ||
2158 | } | ||
2159 | |||
2160 | if (inode_ac) | ||
2161 | ocfs2_free_alloc_context(inode_ac); | ||
2162 | |||
2163 | brelse(new_di_bh); | ||
2164 | |||
2165 | if (!status) | ||
2166 | *new_inode = inode; | ||
2167 | |||
2168 | ocfs2_free_dir_lookup_result(&orphan_insert); | ||
2169 | |||
2170 | ocfs2_inode_unlock(dir, 1); | ||
2171 | brelse(parent_di_bh); | ||
2172 | return status; | ||
2173 | } | ||
2174 | |||
2175 | int ocfs2_mv_orphaned_inode_to_new(struct inode *dir, | ||
2176 | struct inode *inode, | ||
2177 | struct dentry *dentry) | ||
2178 | { | ||
2179 | int status = 0; | ||
2180 | struct buffer_head *parent_di_bh = NULL; | ||
2181 | handle_t *handle = NULL; | ||
2182 | struct ocfs2_super *osb = OCFS2_SB(dir->i_sb); | ||
2183 | struct ocfs2_dinode *dir_di, *di; | ||
2184 | struct inode *orphan_dir_inode = NULL; | ||
2185 | struct buffer_head *orphan_dir_bh = NULL; | ||
2186 | struct buffer_head *di_bh = NULL; | ||
2187 | struct ocfs2_dir_lookup_result lookup = { NULL, }; | ||
2188 | |||
2189 | mlog_entry("(0x%p, 0x%p, %.*s')\n", dir, dentry, | ||
2190 | dentry->d_name.len, dentry->d_name.name); | ||
2191 | |||
2192 | status = ocfs2_inode_lock(dir, &parent_di_bh, 1); | ||
2193 | if (status < 0) { | ||
2194 | if (status != -ENOENT) | ||
2195 | mlog_errno(status); | ||
2196 | return status; | ||
2197 | } | ||
2198 | |||
2199 | dir_di = (struct ocfs2_dinode *) parent_di_bh->b_data; | ||
2200 | if (!dir_di->i_links_count) { | ||
2201 | /* can't make a file in a deleted directory. */ | ||
2202 | status = -ENOENT; | ||
2203 | goto leave; | ||
2204 | } | ||
2205 | |||
2206 | status = ocfs2_check_dir_for_entry(dir, dentry->d_name.name, | ||
2207 | dentry->d_name.len); | ||
2208 | if (status) | ||
2209 | goto leave; | ||
2210 | |||
2211 | /* get a spot inside the dir. */ | ||
2212 | status = ocfs2_prepare_dir_for_insert(osb, dir, parent_di_bh, | ||
2213 | dentry->d_name.name, | ||
2214 | dentry->d_name.len, &lookup); | ||
2215 | if (status < 0) { | ||
2216 | mlog_errno(status); | ||
2217 | goto leave; | ||
2218 | } | ||
2219 | |||
2220 | orphan_dir_inode = ocfs2_get_system_file_inode(osb, | ||
2221 | ORPHAN_DIR_SYSTEM_INODE, | ||
2222 | osb->slot_num); | ||
2223 | if (!orphan_dir_inode) { | ||
2224 | status = -EEXIST; | ||
2225 | mlog_errno(status); | ||
2226 | goto leave; | ||
2227 | } | ||
2228 | |||
2229 | mutex_lock(&orphan_dir_inode->i_mutex); | ||
2230 | |||
2231 | status = ocfs2_inode_lock(orphan_dir_inode, &orphan_dir_bh, 1); | ||
2232 | if (status < 0) { | ||
2233 | mlog_errno(status); | ||
2234 | mutex_unlock(&orphan_dir_inode->i_mutex); | ||
2235 | iput(orphan_dir_inode); | ||
2236 | goto leave; | ||
2237 | } | ||
2238 | |||
2239 | status = ocfs2_read_inode_block(inode, &di_bh); | ||
2240 | if (status < 0) { | ||
2241 | mlog_errno(status); | ||
2242 | goto orphan_unlock; | ||
2243 | } | ||
2244 | |||
2245 | handle = ocfs2_start_trans(osb, ocfs2_rename_credits(osb->sb)); | ||
2246 | if (IS_ERR(handle)) { | ||
2247 | status = PTR_ERR(handle); | ||
2248 | handle = NULL; | ||
2249 | mlog_errno(status); | ||
2250 | goto orphan_unlock; | ||
2251 | } | ||
2252 | |||
2253 | status = ocfs2_journal_access_di(handle, INODE_CACHE(inode), | ||
2254 | di_bh, OCFS2_JOURNAL_ACCESS_WRITE); | ||
2255 | if (status < 0) { | ||
2256 | mlog_errno(status); | ||
2257 | goto out_commit; | ||
2258 | } | ||
2259 | |||
2260 | status = ocfs2_orphan_del(osb, handle, orphan_dir_inode, inode, | ||
2261 | orphan_dir_bh); | ||
2262 | if (status < 0) { | ||
2263 | mlog_errno(status); | ||
2264 | goto out_commit; | ||
2265 | } | ||
2266 | |||
2267 | di = (struct ocfs2_dinode *)di_bh->b_data; | ||
2268 | le32_add_cpu(&di->i_flags, -OCFS2_ORPHANED_FL); | ||
2269 | di->i_orphaned_slot = 0; | ||
2270 | ocfs2_journal_dirty(handle, di_bh); | ||
2271 | |||
2272 | status = ocfs2_add_entry(handle, dentry, inode, | ||
2273 | OCFS2_I(inode)->ip_blkno, parent_di_bh, | ||
2274 | &lookup); | ||
2275 | if (status < 0) { | ||
2276 | mlog_errno(status); | ||
2277 | goto out_commit; | ||
2278 | } | ||
2279 | |||
2280 | status = ocfs2_dentry_attach_lock(dentry, inode, | ||
2281 | OCFS2_I(dir)->ip_blkno); | ||
2282 | if (status) { | ||
2283 | mlog_errno(status); | ||
2284 | goto out_commit; | ||
2285 | } | ||
2286 | |||
2287 | insert_inode_hash(inode); | ||
2288 | dentry->d_op = &ocfs2_dentry_ops; | ||
2289 | d_instantiate(dentry, inode); | ||
2290 | status = 0; | ||
2291 | out_commit: | ||
2292 | ocfs2_commit_trans(osb, handle); | ||
2293 | orphan_unlock: | ||
2294 | ocfs2_inode_unlock(orphan_dir_inode, 1); | ||
2295 | mutex_unlock(&orphan_dir_inode->i_mutex); | ||
2296 | iput(orphan_dir_inode); | ||
2297 | leave: | ||
2298 | |||
2299 | ocfs2_inode_unlock(dir, 1); | ||
2300 | |||
2301 | brelse(di_bh); | ||
2302 | brelse(parent_di_bh); | ||
2303 | brelse(orphan_dir_bh); | ||
2304 | |||
2305 | ocfs2_free_dir_lookup_result(&lookup); | ||
2306 | |||
2307 | mlog_exit(status); | ||
2308 | |||
2309 | return status; | ||
2310 | } | ||
2311 | |||
2031 | const struct inode_operations ocfs2_dir_iops = { | 2312 | const struct inode_operations ocfs2_dir_iops = { |
2032 | .create = ocfs2_create, | 2313 | .create = ocfs2_create, |
2033 | .lookup = ocfs2_lookup, | 2314 | .lookup = ocfs2_lookup, |
diff --git a/fs/ocfs2/namei.h b/fs/ocfs2/namei.h index 688aef64c879..e5d059d4f115 100644 --- a/fs/ocfs2/namei.h +++ b/fs/ocfs2/namei.h | |||
@@ -35,5 +35,11 @@ int ocfs2_orphan_del(struct ocfs2_super *osb, | |||
35 | struct inode *orphan_dir_inode, | 35 | struct inode *orphan_dir_inode, |
36 | struct inode *inode, | 36 | struct inode *inode, |
37 | struct buffer_head *orphan_dir_bh); | 37 | struct buffer_head *orphan_dir_bh); |
38 | int ocfs2_create_inode_in_orphan(struct inode *dir, | ||
39 | int mode, | ||
40 | struct inode **new_inode); | ||
41 | int ocfs2_mv_orphaned_inode_to_new(struct inode *dir, | ||
42 | struct inode *new_inode, | ||
43 | struct dentry *new_dentry); | ||
38 | 44 | ||
39 | #endif /* OCFS2_NAMEI_H */ | 45 | #endif /* OCFS2_NAMEI_H */ |
diff --git a/fs/ocfs2/ocfs2.h b/fs/ocfs2/ocfs2.h index 39e1d5a39505..eae404602424 100644 --- a/fs/ocfs2/ocfs2.h +++ b/fs/ocfs2/ocfs2.h | |||
@@ -51,20 +51,51 @@ | |||
51 | /* For struct ocfs2_blockcheck_stats */ | 51 | /* For struct ocfs2_blockcheck_stats */ |
52 | #include "blockcheck.h" | 52 | #include "blockcheck.h" |
53 | 53 | ||
54 | |||
55 | /* Caching of metadata buffers */ | ||
56 | |||
54 | /* Most user visible OCFS2 inodes will have very few pieces of | 57 | /* Most user visible OCFS2 inodes will have very few pieces of |
55 | * metadata, but larger files (including bitmaps, etc) must be taken | 58 | * metadata, but larger files (including bitmaps, etc) must be taken |
56 | * into account when designing an access scheme. We allow a small | 59 | * into account when designing an access scheme. We allow a small |
57 | * amount of inlined blocks to be stored on an array and grow the | 60 | * amount of inlined blocks to be stored on an array and grow the |
58 | * structure into a rb tree when necessary. */ | 61 | * structure into a rb tree when necessary. */ |
59 | #define OCFS2_INODE_MAX_CACHE_ARRAY 2 | 62 | #define OCFS2_CACHE_INFO_MAX_ARRAY 2 |
63 | |||
64 | /* Flags for ocfs2_caching_info */ | ||
65 | |||
66 | enum ocfs2_caching_info_flags { | ||
67 | /* Indicates that the metadata cache is using the inline array */ | ||
68 | OCFS2_CACHE_FL_INLINE = 1<<1, | ||
69 | }; | ||
60 | 70 | ||
71 | struct ocfs2_caching_operations; | ||
61 | struct ocfs2_caching_info { | 72 | struct ocfs2_caching_info { |
73 | /* | ||
74 | * The parent structure provides the locks, but because the | ||
75 | * parent structure can differ, it provides locking operations | ||
76 | * to struct ocfs2_caching_info. | ||
77 | */ | ||
78 | const struct ocfs2_caching_operations *ci_ops; | ||
79 | |||
80 | /* next two are protected by trans_inc_lock */ | ||
81 | /* which transaction were we created on? Zero if none. */ | ||
82 | unsigned long ci_created_trans; | ||
83 | /* last transaction we were a part of. */ | ||
84 | unsigned long ci_last_trans; | ||
85 | |||
86 | /* Cache structures */ | ||
87 | unsigned int ci_flags; | ||
62 | unsigned int ci_num_cached; | 88 | unsigned int ci_num_cached; |
63 | union { | 89 | union { |
64 | sector_t ci_array[OCFS2_INODE_MAX_CACHE_ARRAY]; | 90 | sector_t ci_array[OCFS2_CACHE_INFO_MAX_ARRAY]; |
65 | struct rb_root ci_tree; | 91 | struct rb_root ci_tree; |
66 | } ci_cache; | 92 | } ci_cache; |
67 | }; | 93 | }; |
94 | /* | ||
95 | * Need this prototype here instead of in uptodate.h because journal.h | ||
96 | * uses it. | ||
97 | */ | ||
98 | struct super_block *ocfs2_metadata_cache_get_super(struct ocfs2_caching_info *ci); | ||
68 | 99 | ||
69 | /* this limits us to 256 nodes | 100 | /* this limits us to 256 nodes |
70 | * if we need more, we can do a kmalloc for the map */ | 101 | * if we need more, we can do a kmalloc for the map */ |
@@ -377,12 +408,17 @@ struct ocfs2_super | |||
377 | 408 | ||
378 | /* the group we used to allocate inodes. */ | 409 | /* the group we used to allocate inodes. */ |
379 | u64 osb_inode_alloc_group; | 410 | u64 osb_inode_alloc_group; |
411 | |||
412 | /* rb tree root for refcount lock. */ | ||
413 | struct rb_root osb_rf_lock_tree; | ||
414 | struct ocfs2_refcount_tree *osb_ref_tree_lru; | ||
380 | }; | 415 | }; |
381 | 416 | ||
382 | #define OCFS2_SB(sb) ((struct ocfs2_super *)(sb)->s_fs_info) | 417 | #define OCFS2_SB(sb) ((struct ocfs2_super *)(sb)->s_fs_info) |
383 | 418 | ||
384 | /* Useful typedef for passing around journal access functions */ | 419 | /* Useful typedef for passing around journal access functions */ |
385 | typedef int (*ocfs2_journal_access_func)(handle_t *handle, struct inode *inode, | 420 | typedef int (*ocfs2_journal_access_func)(handle_t *handle, |
421 | struct ocfs2_caching_info *ci, | ||
386 | struct buffer_head *bh, int type); | 422 | struct buffer_head *bh, int type); |
387 | 423 | ||
388 | static inline int ocfs2_should_order_data(struct inode *inode) | 424 | static inline int ocfs2_should_order_data(struct inode *inode) |
@@ -480,6 +516,13 @@ static inline void ocfs2_add_links_count(struct ocfs2_dinode *di, int n) | |||
480 | ocfs2_set_links_count(di, links); | 516 | ocfs2_set_links_count(di, links); |
481 | } | 517 | } |
482 | 518 | ||
519 | static inline int ocfs2_refcount_tree(struct ocfs2_super *osb) | ||
520 | { | ||
521 | if (osb->s_feature_incompat & OCFS2_FEATURE_INCOMPAT_REFCOUNT_TREE) | ||
522 | return 1; | ||
523 | return 0; | ||
524 | } | ||
525 | |||
483 | /* set / clear functions because cluster events can make these happen | 526 | /* set / clear functions because cluster events can make these happen |
484 | * in parallel so we want the transitions to be atomic. this also | 527 | * in parallel so we want the transitions to be atomic. this also |
485 | * means that any future flags osb_flags must be protected by spinlock | 528 | * means that any future flags osb_flags must be protected by spinlock |
@@ -578,6 +621,9 @@ static inline int ocfs2_uses_extended_slot_map(struct ocfs2_super *osb) | |||
578 | #define OCFS2_IS_VALID_DX_LEAF(ptr) \ | 621 | #define OCFS2_IS_VALID_DX_LEAF(ptr) \ |
579 | (!strcmp((ptr)->dl_signature, OCFS2_DX_LEAF_SIGNATURE)) | 622 | (!strcmp((ptr)->dl_signature, OCFS2_DX_LEAF_SIGNATURE)) |
580 | 623 | ||
624 | #define OCFS2_IS_VALID_REFCOUNT_BLOCK(ptr) \ | ||
625 | (!strcmp((ptr)->rf_signature, OCFS2_REFCOUNT_BLOCK_SIGNATURE)) | ||
626 | |||
581 | static inline unsigned long ino_from_blkno(struct super_block *sb, | 627 | static inline unsigned long ino_from_blkno(struct super_block *sb, |
582 | u64 blkno) | 628 | u64 blkno) |
583 | { | 629 | { |
diff --git a/fs/ocfs2/ocfs2_fs.h b/fs/ocfs2/ocfs2_fs.h index 7ab6e9e5e77c..e9431e4a5e7c 100644 --- a/fs/ocfs2/ocfs2_fs.h +++ b/fs/ocfs2/ocfs2_fs.h | |||
@@ -68,6 +68,7 @@ | |||
68 | #define OCFS2_DIR_TRAILER_SIGNATURE "DIRTRL1" | 68 | #define OCFS2_DIR_TRAILER_SIGNATURE "DIRTRL1" |
69 | #define OCFS2_DX_ROOT_SIGNATURE "DXDIR01" | 69 | #define OCFS2_DX_ROOT_SIGNATURE "DXDIR01" |
70 | #define OCFS2_DX_LEAF_SIGNATURE "DXLEAF1" | 70 | #define OCFS2_DX_LEAF_SIGNATURE "DXLEAF1" |
71 | #define OCFS2_REFCOUNT_BLOCK_SIGNATURE "REFCNT1" | ||
71 | 72 | ||
72 | /* Compatibility flags */ | 73 | /* Compatibility flags */ |
73 | #define OCFS2_HAS_COMPAT_FEATURE(sb,mask) \ | 74 | #define OCFS2_HAS_COMPAT_FEATURE(sb,mask) \ |
@@ -98,7 +99,8 @@ | |||
98 | | OCFS2_FEATURE_INCOMPAT_USERSPACE_STACK \ | 99 | | OCFS2_FEATURE_INCOMPAT_USERSPACE_STACK \ |
99 | | OCFS2_FEATURE_INCOMPAT_XATTR \ | 100 | | OCFS2_FEATURE_INCOMPAT_XATTR \ |
100 | | OCFS2_FEATURE_INCOMPAT_META_ECC \ | 101 | | OCFS2_FEATURE_INCOMPAT_META_ECC \ |
101 | | OCFS2_FEATURE_INCOMPAT_INDEXED_DIRS) | 102 | | OCFS2_FEATURE_INCOMPAT_INDEXED_DIRS \ |
103 | | OCFS2_FEATURE_INCOMPAT_REFCOUNT_TREE) | ||
102 | #define OCFS2_FEATURE_RO_COMPAT_SUPP (OCFS2_FEATURE_RO_COMPAT_UNWRITTEN \ | 104 | #define OCFS2_FEATURE_RO_COMPAT_SUPP (OCFS2_FEATURE_RO_COMPAT_UNWRITTEN \ |
103 | | OCFS2_FEATURE_RO_COMPAT_USRQUOTA \ | 105 | | OCFS2_FEATURE_RO_COMPAT_USRQUOTA \ |
104 | | OCFS2_FEATURE_RO_COMPAT_GRPQUOTA) | 106 | | OCFS2_FEATURE_RO_COMPAT_GRPQUOTA) |
@@ -160,6 +162,9 @@ | |||
160 | /* Metadata checksum and error correction */ | 162 | /* Metadata checksum and error correction */ |
161 | #define OCFS2_FEATURE_INCOMPAT_META_ECC 0x0800 | 163 | #define OCFS2_FEATURE_INCOMPAT_META_ECC 0x0800 |
162 | 164 | ||
165 | /* Refcount tree support */ | ||
166 | #define OCFS2_FEATURE_INCOMPAT_REFCOUNT_TREE 0x1000 | ||
167 | |||
163 | /* | 168 | /* |
164 | * backup superblock flag is used to indicate that this volume | 169 | * backup superblock flag is used to indicate that this volume |
165 | * has backup superblocks. | 170 | * has backup superblocks. |
@@ -223,6 +228,7 @@ | |||
223 | #define OCFS2_HAS_XATTR_FL (0x0002) | 228 | #define OCFS2_HAS_XATTR_FL (0x0002) |
224 | #define OCFS2_INLINE_XATTR_FL (0x0004) | 229 | #define OCFS2_INLINE_XATTR_FL (0x0004) |
225 | #define OCFS2_INDEXED_DIR_FL (0x0008) | 230 | #define OCFS2_INDEXED_DIR_FL (0x0008) |
231 | #define OCFS2_HAS_REFCOUNT_FL (0x0010) | ||
226 | 232 | ||
227 | /* Inode attributes, keep in sync with EXT2 */ | 233 | /* Inode attributes, keep in sync with EXT2 */ |
228 | #define OCFS2_SECRM_FL (0x00000001) /* Secure deletion */ | 234 | #define OCFS2_SECRM_FL (0x00000001) /* Secure deletion */ |
@@ -241,8 +247,11 @@ | |||
241 | /* | 247 | /* |
242 | * Extent record flags (e_node.leaf.flags) | 248 | * Extent record flags (e_node.leaf.flags) |
243 | */ | 249 | */ |
244 | #define OCFS2_EXT_UNWRITTEN (0x01) /* Extent is allocated but | 250 | #define OCFS2_EXT_UNWRITTEN (0x01) /* Extent is allocated but |
245 | * unwritten */ | 251 | * unwritten */ |
252 | #define OCFS2_EXT_REFCOUNTED (0x02) /* Extent is reference | ||
253 | * counted in an associated | ||
254 | * refcount tree */ | ||
246 | 255 | ||
247 | /* | 256 | /* |
248 | * ioctl commands | 257 | * ioctl commands |
@@ -292,6 +301,15 @@ struct ocfs2_new_group_input { | |||
292 | #define OCFS2_IOC_GROUP_ADD _IOW('o', 2,struct ocfs2_new_group_input) | 301 | #define OCFS2_IOC_GROUP_ADD _IOW('o', 2,struct ocfs2_new_group_input) |
293 | #define OCFS2_IOC_GROUP_ADD64 _IOW('o', 3,struct ocfs2_new_group_input) | 302 | #define OCFS2_IOC_GROUP_ADD64 _IOW('o', 3,struct ocfs2_new_group_input) |
294 | 303 | ||
304 | /* Used to pass 2 file names to reflink. */ | ||
305 | struct reflink_arguments { | ||
306 | __u64 old_path; | ||
307 | __u64 new_path; | ||
308 | __u64 preserve; | ||
309 | }; | ||
310 | #define OCFS2_IOC_REFLINK _IOW('o', 4, struct reflink_arguments) | ||
311 | |||
312 | |||
295 | /* | 313 | /* |
296 | * Journal Flags (ocfs2_dinode.id1.journal1.i_flags) | 314 | * Journal Flags (ocfs2_dinode.id1.journal1.i_flags) |
297 | */ | 315 | */ |
@@ -717,7 +735,8 @@ struct ocfs2_dinode { | |||
717 | __le64 i_xattr_loc; | 735 | __le64 i_xattr_loc; |
718 | /*80*/ struct ocfs2_block_check i_check; /* Error checking */ | 736 | /*80*/ struct ocfs2_block_check i_check; /* Error checking */ |
719 | /*88*/ __le64 i_dx_root; /* Pointer to dir index root block */ | 737 | /*88*/ __le64 i_dx_root; /* Pointer to dir index root block */ |
720 | __le64 i_reserved2[5]; | 738 | /*90*/ __le64 i_refcount_loc; |
739 | __le64 i_reserved2[4]; | ||
721 | /*B8*/ union { | 740 | /*B8*/ union { |
722 | __le64 i_pad1; /* Generic way to refer to this | 741 | __le64 i_pad1; /* Generic way to refer to this |
723 | 64bit union */ | 742 | 64bit union */ |
@@ -901,6 +920,60 @@ struct ocfs2_group_desc | |||
901 | /*40*/ __u8 bg_bitmap[0]; | 920 | /*40*/ __u8 bg_bitmap[0]; |
902 | }; | 921 | }; |
903 | 922 | ||
923 | struct ocfs2_refcount_rec { | ||
924 | /*00*/ __le64 r_cpos; /* Physical offset, in clusters */ | ||
925 | __le32 r_clusters; /* Clusters covered by this extent */ | ||
926 | __le32 r_refcount; /* Reference count of this extent */ | ||
927 | /*10*/ | ||
928 | }; | ||
929 | #define OCFS2_32BIT_POS_MASK (0xffffffffULL) | ||
930 | |||
931 | #define OCFS2_REFCOUNT_LEAF_FL (0x00000001) | ||
932 | #define OCFS2_REFCOUNT_TREE_FL (0x00000002) | ||
933 | |||
934 | struct ocfs2_refcount_list { | ||
935 | /*00*/ __le16 rl_count; /* Maximum number of entries possible | ||
936 | in rl_records */ | ||
937 | __le16 rl_used; /* Current number of used records */ | ||
938 | __le32 rl_reserved2; | ||
939 | __le64 rl_reserved1; /* Pad to sizeof(ocfs2_refcount_record) */ | ||
940 | /*10*/ struct ocfs2_refcount_rec rl_recs[0]; /* Refcount records */ | ||
941 | }; | ||
942 | |||
943 | |||
944 | struct ocfs2_refcount_block { | ||
945 | /*00*/ __u8 rf_signature[8]; /* Signature for verification */ | ||
946 | __le16 rf_suballoc_slot; /* Slot suballocator this block | ||
947 | belongs to */ | ||
948 | __le16 rf_suballoc_bit; /* Bit offset in suballocator | ||
949 | block group */ | ||
950 | __le32 rf_fs_generation; /* Must match superblock */ | ||
951 | /*10*/ __le64 rf_blkno; /* Offset on disk, in blocks */ | ||
952 | __le64 rf_parent; /* Parent block, only valid if | ||
953 | OCFS2_REFCOUNT_LEAF_FL is set in | ||
954 | rf_flags */ | ||
955 | /*20*/ struct ocfs2_block_check rf_check; /* Error checking */ | ||
956 | __le64 rf_last_eb_blk; /* Pointer to last extent block */ | ||
957 | /*30*/ __le32 rf_count; /* Number of inodes sharing this | ||
958 | refcount tree */ | ||
959 | __le32 rf_flags; /* See the flags above */ | ||
960 | __le32 rf_clusters; /* clusters covered by refcount tree. */ | ||
961 | __le32 rf_cpos; /* cluster offset in refcount tree.*/ | ||
962 | /*40*/ __le32 rf_generation; /* generation number. all be the same | ||
963 | * for the same refcount tree. */ | ||
964 | __le32 rf_reserved0; | ||
965 | __le64 rf_reserved1[7]; | ||
966 | /*80*/ union { | ||
967 | struct ocfs2_refcount_list rf_records; /* List of refcount | ||
968 | records */ | ||
969 | struct ocfs2_extent_list rf_list; /* Extent record list, | ||
970 | only valid if | ||
971 | OCFS2_REFCOUNT_TREE_FL | ||
972 | is set in rf_flags */ | ||
973 | }; | ||
974 | /* Actual on-disk size is one block */ | ||
975 | }; | ||
976 | |||
904 | /* | 977 | /* |
905 | * On disk extended attribute structure for OCFS2. | 978 | * On disk extended attribute structure for OCFS2. |
906 | */ | 979 | */ |
@@ -1312,6 +1385,32 @@ static inline u16 ocfs2_xattr_recs_per_xb(struct super_block *sb) | |||
1312 | 1385 | ||
1313 | return size / sizeof(struct ocfs2_extent_rec); | 1386 | return size / sizeof(struct ocfs2_extent_rec); |
1314 | } | 1387 | } |
1388 | |||
1389 | static inline u16 ocfs2_extent_recs_per_rb(struct super_block *sb) | ||
1390 | { | ||
1391 | int size; | ||
1392 | |||
1393 | size = sb->s_blocksize - | ||
1394 | offsetof(struct ocfs2_refcount_block, rf_list.l_recs); | ||
1395 | |||
1396 | return size / sizeof(struct ocfs2_extent_rec); | ||
1397 | } | ||
1398 | |||
1399 | static inline u16 ocfs2_refcount_recs_per_rb(struct super_block *sb) | ||
1400 | { | ||
1401 | int size; | ||
1402 | |||
1403 | size = sb->s_blocksize - | ||
1404 | offsetof(struct ocfs2_refcount_block, rf_records.rl_recs); | ||
1405 | |||
1406 | return size / sizeof(struct ocfs2_refcount_rec); | ||
1407 | } | ||
1408 | |||
1409 | static inline u32 | ||
1410 | ocfs2_get_ref_rec_low_cpos(const struct ocfs2_refcount_rec *rec) | ||
1411 | { | ||
1412 | return le64_to_cpu(rec->r_cpos) & OCFS2_32BIT_POS_MASK; | ||
1413 | } | ||
1315 | #else | 1414 | #else |
1316 | static inline int ocfs2_fast_symlink_chars(int blocksize) | 1415 | static inline int ocfs2_fast_symlink_chars(int blocksize) |
1317 | { | 1416 | { |
diff --git a/fs/ocfs2/ocfs2_lockid.h b/fs/ocfs2/ocfs2_lockid.h index c212cf5a2bdf..d277aabf5dfb 100644 --- a/fs/ocfs2/ocfs2_lockid.h +++ b/fs/ocfs2/ocfs2_lockid.h | |||
@@ -49,6 +49,7 @@ enum ocfs2_lock_type { | |||
49 | OCFS2_LOCK_TYPE_QINFO, | 49 | OCFS2_LOCK_TYPE_QINFO, |
50 | OCFS2_LOCK_TYPE_NFS_SYNC, | 50 | OCFS2_LOCK_TYPE_NFS_SYNC, |
51 | OCFS2_LOCK_TYPE_ORPHAN_SCAN, | 51 | OCFS2_LOCK_TYPE_ORPHAN_SCAN, |
52 | OCFS2_LOCK_TYPE_REFCOUNT, | ||
52 | OCFS2_NUM_LOCK_TYPES | 53 | OCFS2_NUM_LOCK_TYPES |
53 | }; | 54 | }; |
54 | 55 | ||
@@ -89,6 +90,9 @@ static inline char ocfs2_lock_type_char(enum ocfs2_lock_type type) | |||
89 | case OCFS2_LOCK_TYPE_ORPHAN_SCAN: | 90 | case OCFS2_LOCK_TYPE_ORPHAN_SCAN: |
90 | c = 'P'; | 91 | c = 'P'; |
91 | break; | 92 | break; |
93 | case OCFS2_LOCK_TYPE_REFCOUNT: | ||
94 | c = 'T'; | ||
95 | break; | ||
92 | default: | 96 | default: |
93 | c = '\0'; | 97 | c = '\0'; |
94 | } | 98 | } |
@@ -110,6 +114,7 @@ static char *ocfs2_lock_type_strings[] = { | |||
110 | [OCFS2_LOCK_TYPE_QINFO] = "Quota", | 114 | [OCFS2_LOCK_TYPE_QINFO] = "Quota", |
111 | [OCFS2_LOCK_TYPE_NFS_SYNC] = "NFSSync", | 115 | [OCFS2_LOCK_TYPE_NFS_SYNC] = "NFSSync", |
112 | [OCFS2_LOCK_TYPE_ORPHAN_SCAN] = "OrphanScan", | 116 | [OCFS2_LOCK_TYPE_ORPHAN_SCAN] = "OrphanScan", |
117 | [OCFS2_LOCK_TYPE_REFCOUNT] = "Refcount", | ||
113 | }; | 118 | }; |
114 | 119 | ||
115 | static inline const char *ocfs2_lock_type_string(enum ocfs2_lock_type type) | 120 | static inline const char *ocfs2_lock_type_string(enum ocfs2_lock_type type) |
diff --git a/fs/ocfs2/quota.h b/fs/ocfs2/quota.h index 3fb96fcd4c81..e5df9d170b0c 100644 --- a/fs/ocfs2/quota.h +++ b/fs/ocfs2/quota.h | |||
@@ -109,7 +109,7 @@ void ocfs2_unlock_global_qf(struct ocfs2_mem_dqinfo *oinfo, int ex); | |||
109 | int ocfs2_read_quota_block(struct inode *inode, u64 v_block, | 109 | int ocfs2_read_quota_block(struct inode *inode, u64 v_block, |
110 | struct buffer_head **bh); | 110 | struct buffer_head **bh); |
111 | 111 | ||
112 | extern struct dquot_operations ocfs2_quota_operations; | 112 | extern const struct dquot_operations ocfs2_quota_operations; |
113 | extern struct quota_format_type ocfs2_quota_format; | 113 | extern struct quota_format_type ocfs2_quota_format; |
114 | 114 | ||
115 | int ocfs2_quota_setup(void); | 115 | int ocfs2_quota_setup(void); |
diff --git a/fs/ocfs2/quota_global.c b/fs/ocfs2/quota_global.c index 44f2a5e1d042..b437dc0c4cad 100644 --- a/fs/ocfs2/quota_global.c +++ b/fs/ocfs2/quota_global.c | |||
@@ -154,7 +154,7 @@ static int ocfs2_get_quota_block(struct inode *inode, int block, | |||
154 | err = -EIO; | 154 | err = -EIO; |
155 | mlog_errno(err); | 155 | mlog_errno(err); |
156 | } | 156 | } |
157 | return err;; | 157 | return err; |
158 | } | 158 | } |
159 | 159 | ||
160 | /* Read data from global quotafile - avoid pagecache and such because we cannot | 160 | /* Read data from global quotafile - avoid pagecache and such because we cannot |
@@ -253,8 +253,9 @@ ssize_t ocfs2_quota_write(struct super_block *sb, int type, | |||
253 | flush_dcache_page(bh->b_page); | 253 | flush_dcache_page(bh->b_page); |
254 | set_buffer_uptodate(bh); | 254 | set_buffer_uptodate(bh); |
255 | unlock_buffer(bh); | 255 | unlock_buffer(bh); |
256 | ocfs2_set_buffer_uptodate(gqinode, bh); | 256 | ocfs2_set_buffer_uptodate(INODE_CACHE(gqinode), bh); |
257 | err = ocfs2_journal_access_dq(handle, gqinode, bh, ja_type); | 257 | err = ocfs2_journal_access_dq(handle, INODE_CACHE(gqinode), bh, |
258 | ja_type); | ||
258 | if (err < 0) { | 259 | if (err < 0) { |
259 | brelse(bh); | 260 | brelse(bh); |
260 | goto out; | 261 | goto out; |
@@ -849,7 +850,7 @@ static void ocfs2_destroy_dquot(struct dquot *dquot) | |||
849 | kmem_cache_free(ocfs2_dquot_cachep, dquot); | 850 | kmem_cache_free(ocfs2_dquot_cachep, dquot); |
850 | } | 851 | } |
851 | 852 | ||
852 | struct dquot_operations ocfs2_quota_operations = { | 853 | const struct dquot_operations ocfs2_quota_operations = { |
853 | .initialize = dquot_initialize, | 854 | .initialize = dquot_initialize, |
854 | .drop = dquot_drop, | 855 | .drop = dquot_drop, |
855 | .alloc_space = dquot_alloc_space, | 856 | .alloc_space = dquot_alloc_space, |
diff --git a/fs/ocfs2/quota_local.c b/fs/ocfs2/quota_local.c index bdb09cb6e1fe..1a2c50a759fa 100644 --- a/fs/ocfs2/quota_local.c +++ b/fs/ocfs2/quota_local.c | |||
@@ -108,7 +108,7 @@ static int ocfs2_modify_bh(struct inode *inode, struct buffer_head *bh, | |||
108 | mlog_errno(status); | 108 | mlog_errno(status); |
109 | return status; | 109 | return status; |
110 | } | 110 | } |
111 | status = ocfs2_journal_access_dq(handle, inode, bh, | 111 | status = ocfs2_journal_access_dq(handle, INODE_CACHE(inode), bh, |
112 | OCFS2_JOURNAL_ACCESS_WRITE); | 112 | OCFS2_JOURNAL_ACCESS_WRITE); |
113 | if (status < 0) { | 113 | if (status < 0) { |
114 | mlog_errno(status); | 114 | mlog_errno(status); |
@@ -510,7 +510,8 @@ static int ocfs2_recover_local_quota_file(struct inode *lqinode, | |||
510 | goto out_commit; | 510 | goto out_commit; |
511 | } | 511 | } |
512 | /* Release local quota file entry */ | 512 | /* Release local quota file entry */ |
513 | status = ocfs2_journal_access_dq(handle, lqinode, | 513 | status = ocfs2_journal_access_dq(handle, |
514 | INODE_CACHE(lqinode), | ||
514 | qbh, OCFS2_JOURNAL_ACCESS_WRITE); | 515 | qbh, OCFS2_JOURNAL_ACCESS_WRITE); |
515 | if (status < 0) { | 516 | if (status < 0) { |
516 | mlog_errno(status); | 517 | mlog_errno(status); |
@@ -619,7 +620,8 @@ int ocfs2_finish_quota_recovery(struct ocfs2_super *osb, | |||
619 | mlog_errno(status); | 620 | mlog_errno(status); |
620 | goto out_bh; | 621 | goto out_bh; |
621 | } | 622 | } |
622 | status = ocfs2_journal_access_dq(handle, lqinode, bh, | 623 | status = ocfs2_journal_access_dq(handle, INODE_CACHE(lqinode), |
624 | bh, | ||
623 | OCFS2_JOURNAL_ACCESS_WRITE); | 625 | OCFS2_JOURNAL_ACCESS_WRITE); |
624 | if (status < 0) { | 626 | if (status < 0) { |
625 | mlog_errno(status); | 627 | mlog_errno(status); |
@@ -993,8 +995,8 @@ static struct ocfs2_quota_chunk *ocfs2_local_quota_add_chunk( | |||
993 | goto out_trans; | 995 | goto out_trans; |
994 | } | 996 | } |
995 | dchunk = (struct ocfs2_local_disk_chunk *)bh->b_data; | 997 | dchunk = (struct ocfs2_local_disk_chunk *)bh->b_data; |
996 | ocfs2_set_new_buffer_uptodate(lqinode, bh); | 998 | ocfs2_set_new_buffer_uptodate(INODE_CACHE(lqinode), bh); |
997 | status = ocfs2_journal_access_dq(handle, lqinode, bh, | 999 | status = ocfs2_journal_access_dq(handle, INODE_CACHE(lqinode), bh, |
998 | OCFS2_JOURNAL_ACCESS_CREATE); | 1000 | OCFS2_JOURNAL_ACCESS_CREATE); |
999 | if (status < 0) { | 1001 | if (status < 0) { |
1000 | mlog_errno(status); | 1002 | mlog_errno(status); |
@@ -1027,8 +1029,8 @@ static struct ocfs2_quota_chunk *ocfs2_local_quota_add_chunk( | |||
1027 | mlog_errno(status); | 1029 | mlog_errno(status); |
1028 | goto out_trans; | 1030 | goto out_trans; |
1029 | } | 1031 | } |
1030 | ocfs2_set_new_buffer_uptodate(lqinode, dbh); | 1032 | ocfs2_set_new_buffer_uptodate(INODE_CACHE(lqinode), dbh); |
1031 | status = ocfs2_journal_access_dq(handle, lqinode, dbh, | 1033 | status = ocfs2_journal_access_dq(handle, INODE_CACHE(lqinode), dbh, |
1032 | OCFS2_JOURNAL_ACCESS_CREATE); | 1034 | OCFS2_JOURNAL_ACCESS_CREATE); |
1033 | if (status < 0) { | 1035 | if (status < 0) { |
1034 | mlog_errno(status); | 1036 | mlog_errno(status); |
@@ -1131,7 +1133,7 @@ static struct ocfs2_quota_chunk *ocfs2_extend_local_quota_file( | |||
1131 | mlog_errno(status); | 1133 | mlog_errno(status); |
1132 | goto out; | 1134 | goto out; |
1133 | } | 1135 | } |
1134 | ocfs2_set_new_buffer_uptodate(lqinode, bh); | 1136 | ocfs2_set_new_buffer_uptodate(INODE_CACHE(lqinode), bh); |
1135 | 1137 | ||
1136 | /* Local quota info, chunk header and the new block we initialize */ | 1138 | /* Local quota info, chunk header and the new block we initialize */ |
1137 | handle = ocfs2_start_trans(OCFS2_SB(sb), | 1139 | handle = ocfs2_start_trans(OCFS2_SB(sb), |
@@ -1143,7 +1145,7 @@ static struct ocfs2_quota_chunk *ocfs2_extend_local_quota_file( | |||
1143 | goto out; | 1145 | goto out; |
1144 | } | 1146 | } |
1145 | /* Zero created block */ | 1147 | /* Zero created block */ |
1146 | status = ocfs2_journal_access_dq(handle, lqinode, bh, | 1148 | status = ocfs2_journal_access_dq(handle, INODE_CACHE(lqinode), bh, |
1147 | OCFS2_JOURNAL_ACCESS_CREATE); | 1149 | OCFS2_JOURNAL_ACCESS_CREATE); |
1148 | if (status < 0) { | 1150 | if (status < 0) { |
1149 | mlog_errno(status); | 1151 | mlog_errno(status); |
@@ -1158,7 +1160,8 @@ static struct ocfs2_quota_chunk *ocfs2_extend_local_quota_file( | |||
1158 | goto out_trans; | 1160 | goto out_trans; |
1159 | } | 1161 | } |
1160 | /* Update chunk header */ | 1162 | /* Update chunk header */ |
1161 | status = ocfs2_journal_access_dq(handle, lqinode, chunk->qc_headerbh, | 1163 | status = ocfs2_journal_access_dq(handle, INODE_CACHE(lqinode), |
1164 | chunk->qc_headerbh, | ||
1162 | OCFS2_JOURNAL_ACCESS_WRITE); | 1165 | OCFS2_JOURNAL_ACCESS_WRITE); |
1163 | if (status < 0) { | 1166 | if (status < 0) { |
1164 | mlog_errno(status); | 1167 | mlog_errno(status); |
@@ -1292,7 +1295,8 @@ static int ocfs2_local_release_dquot(struct dquot *dquot) | |||
1292 | goto out; | 1295 | goto out; |
1293 | } | 1296 | } |
1294 | 1297 | ||
1295 | status = ocfs2_journal_access_dq(handle, sb_dqopt(sb)->files[type], | 1298 | status = ocfs2_journal_access_dq(handle, |
1299 | INODE_CACHE(sb_dqopt(sb)->files[type]), | ||
1296 | od->dq_chunk->qc_headerbh, OCFS2_JOURNAL_ACCESS_WRITE); | 1300 | od->dq_chunk->qc_headerbh, OCFS2_JOURNAL_ACCESS_WRITE); |
1297 | if (status < 0) { | 1301 | if (status < 0) { |
1298 | mlog_errno(status); | 1302 | mlog_errno(status); |
diff --git a/fs/ocfs2/refcounttree.c b/fs/ocfs2/refcounttree.c new file mode 100644 index 000000000000..60287fc56bcb --- /dev/null +++ b/fs/ocfs2/refcounttree.c | |||
@@ -0,0 +1,4313 @@ | |||
1 | /* -*- mode: c; c-basic-offset: 8; -*- | ||
2 | * vim: noexpandtab sw=8 ts=8 sts=0: | ||
3 | * | ||
4 | * refcounttree.c | ||
5 | * | ||
6 | * Copyright (C) 2009 Oracle. All rights reserved. | ||
7 | * | ||
8 | * This program is free software; you can redistribute it and/or | ||
9 | * modify it under the terms of the GNU General Public | ||
10 | * License version 2 as published by the Free Software Foundation. | ||
11 | * | ||
12 | * This program is distributed in the hope that it will be useful, | ||
13 | * but WITHOUT ANY WARRANTY; without even the implied warranty of | ||
14 | * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU | ||
15 | * General Public License for more details. | ||
16 | */ | ||
17 | |||
18 | #include <linux/sort.h> | ||
19 | #define MLOG_MASK_PREFIX ML_REFCOUNT | ||
20 | #include <cluster/masklog.h> | ||
21 | #include "ocfs2.h" | ||
22 | #include "inode.h" | ||
23 | #include "alloc.h" | ||
24 | #include "suballoc.h" | ||
25 | #include "journal.h" | ||
26 | #include "uptodate.h" | ||
27 | #include "super.h" | ||
28 | #include "buffer_head_io.h" | ||
29 | #include "blockcheck.h" | ||
30 | #include "refcounttree.h" | ||
31 | #include "sysfile.h" | ||
32 | #include "dlmglue.h" | ||
33 | #include "extent_map.h" | ||
34 | #include "aops.h" | ||
35 | #include "xattr.h" | ||
36 | #include "namei.h" | ||
37 | |||
38 | #include <linux/bio.h> | ||
39 | #include <linux/blkdev.h> | ||
40 | #include <linux/gfp.h> | ||
41 | #include <linux/slab.h> | ||
42 | #include <linux/writeback.h> | ||
43 | #include <linux/pagevec.h> | ||
44 | #include <linux/swap.h> | ||
45 | #include <linux/security.h> | ||
46 | #include <linux/fsnotify.h> | ||
47 | #include <linux/quotaops.h> | ||
48 | #include <linux/namei.h> | ||
49 | #include <linux/mount.h> | ||
50 | |||
51 | struct ocfs2_cow_context { | ||
52 | struct inode *inode; | ||
53 | u32 cow_start; | ||
54 | u32 cow_len; | ||
55 | struct ocfs2_extent_tree data_et; | ||
56 | struct ocfs2_refcount_tree *ref_tree; | ||
57 | struct buffer_head *ref_root_bh; | ||
58 | struct ocfs2_alloc_context *meta_ac; | ||
59 | struct ocfs2_alloc_context *data_ac; | ||
60 | struct ocfs2_cached_dealloc_ctxt dealloc; | ||
61 | void *cow_object; | ||
62 | struct ocfs2_post_refcount *post_refcount; | ||
63 | int extra_credits; | ||
64 | int (*get_clusters)(struct ocfs2_cow_context *context, | ||
65 | u32 v_cluster, u32 *p_cluster, | ||
66 | u32 *num_clusters, | ||
67 | unsigned int *extent_flags); | ||
68 | int (*cow_duplicate_clusters)(handle_t *handle, | ||
69 | struct ocfs2_cow_context *context, | ||
70 | u32 cpos, u32 old_cluster, | ||
71 | u32 new_cluster, u32 new_len); | ||
72 | }; | ||
73 | |||
74 | static inline struct ocfs2_refcount_tree * | ||
75 | cache_info_to_refcount(struct ocfs2_caching_info *ci) | ||
76 | { | ||
77 | return container_of(ci, struct ocfs2_refcount_tree, rf_ci); | ||
78 | } | ||
79 | |||
80 | static int ocfs2_validate_refcount_block(struct super_block *sb, | ||
81 | struct buffer_head *bh) | ||
82 | { | ||
83 | int rc; | ||
84 | struct ocfs2_refcount_block *rb = | ||
85 | (struct ocfs2_refcount_block *)bh->b_data; | ||
86 | |||
87 | mlog(0, "Validating refcount block %llu\n", | ||
88 | (unsigned long long)bh->b_blocknr); | ||
89 | |||
90 | BUG_ON(!buffer_uptodate(bh)); | ||
91 | |||
92 | /* | ||
93 | * If the ecc fails, we return the error but otherwise | ||
94 | * leave the filesystem running. We know any error is | ||
95 | * local to this block. | ||
96 | */ | ||
97 | rc = ocfs2_validate_meta_ecc(sb, bh->b_data, &rb->rf_check); | ||
98 | if (rc) { | ||
99 | mlog(ML_ERROR, "Checksum failed for refcount block %llu\n", | ||
100 | (unsigned long long)bh->b_blocknr); | ||
101 | return rc; | ||
102 | } | ||
103 | |||
104 | |||
105 | if (!OCFS2_IS_VALID_REFCOUNT_BLOCK(rb)) { | ||
106 | ocfs2_error(sb, | ||
107 | "Refcount block #%llu has bad signature %.*s", | ||
108 | (unsigned long long)bh->b_blocknr, 7, | ||
109 | rb->rf_signature); | ||
110 | return -EINVAL; | ||
111 | } | ||
112 | |||
113 | if (le64_to_cpu(rb->rf_blkno) != bh->b_blocknr) { | ||
114 | ocfs2_error(sb, | ||
115 | "Refcount block #%llu has an invalid rf_blkno " | ||
116 | "of %llu", | ||
117 | (unsigned long long)bh->b_blocknr, | ||
118 | (unsigned long long)le64_to_cpu(rb->rf_blkno)); | ||
119 | return -EINVAL; | ||
120 | } | ||
121 | |||
122 | if (le32_to_cpu(rb->rf_fs_generation) != OCFS2_SB(sb)->fs_generation) { | ||
123 | ocfs2_error(sb, | ||
124 | "Refcount block #%llu has an invalid " | ||
125 | "rf_fs_generation of #%u", | ||
126 | (unsigned long long)bh->b_blocknr, | ||
127 | le32_to_cpu(rb->rf_fs_generation)); | ||
128 | return -EINVAL; | ||
129 | } | ||
130 | |||
131 | return 0; | ||
132 | } | ||
133 | |||
134 | static int ocfs2_read_refcount_block(struct ocfs2_caching_info *ci, | ||
135 | u64 rb_blkno, | ||
136 | struct buffer_head **bh) | ||
137 | { | ||
138 | int rc; | ||
139 | struct buffer_head *tmp = *bh; | ||
140 | |||
141 | rc = ocfs2_read_block(ci, rb_blkno, &tmp, | ||
142 | ocfs2_validate_refcount_block); | ||
143 | |||
144 | /* If ocfs2_read_block() got us a new bh, pass it up. */ | ||
145 | if (!rc && !*bh) | ||
146 | *bh = tmp; | ||
147 | |||
148 | return rc; | ||
149 | } | ||
150 | |||
151 | static u64 ocfs2_refcount_cache_owner(struct ocfs2_caching_info *ci) | ||
152 | { | ||
153 | struct ocfs2_refcount_tree *rf = cache_info_to_refcount(ci); | ||
154 | |||
155 | return rf->rf_blkno; | ||
156 | } | ||
157 | |||
158 | static struct super_block * | ||
159 | ocfs2_refcount_cache_get_super(struct ocfs2_caching_info *ci) | ||
160 | { | ||
161 | struct ocfs2_refcount_tree *rf = cache_info_to_refcount(ci); | ||
162 | |||
163 | return rf->rf_sb; | ||
164 | } | ||
165 | |||
166 | static void ocfs2_refcount_cache_lock(struct ocfs2_caching_info *ci) | ||
167 | { | ||
168 | struct ocfs2_refcount_tree *rf = cache_info_to_refcount(ci); | ||
169 | |||
170 | spin_lock(&rf->rf_lock); | ||
171 | } | ||
172 | |||
173 | static void ocfs2_refcount_cache_unlock(struct ocfs2_caching_info *ci) | ||
174 | { | ||
175 | struct ocfs2_refcount_tree *rf = cache_info_to_refcount(ci); | ||
176 | |||
177 | spin_unlock(&rf->rf_lock); | ||
178 | } | ||
179 | |||
180 | static void ocfs2_refcount_cache_io_lock(struct ocfs2_caching_info *ci) | ||
181 | { | ||
182 | struct ocfs2_refcount_tree *rf = cache_info_to_refcount(ci); | ||
183 | |||
184 | mutex_lock(&rf->rf_io_mutex); | ||
185 | } | ||
186 | |||
187 | static void ocfs2_refcount_cache_io_unlock(struct ocfs2_caching_info *ci) | ||
188 | { | ||
189 | struct ocfs2_refcount_tree *rf = cache_info_to_refcount(ci); | ||
190 | |||
191 | mutex_unlock(&rf->rf_io_mutex); | ||
192 | } | ||
193 | |||
194 | static const struct ocfs2_caching_operations ocfs2_refcount_caching_ops = { | ||
195 | .co_owner = ocfs2_refcount_cache_owner, | ||
196 | .co_get_super = ocfs2_refcount_cache_get_super, | ||
197 | .co_cache_lock = ocfs2_refcount_cache_lock, | ||
198 | .co_cache_unlock = ocfs2_refcount_cache_unlock, | ||
199 | .co_io_lock = ocfs2_refcount_cache_io_lock, | ||
200 | .co_io_unlock = ocfs2_refcount_cache_io_unlock, | ||
201 | }; | ||
202 | |||
203 | static struct ocfs2_refcount_tree * | ||
204 | ocfs2_find_refcount_tree(struct ocfs2_super *osb, u64 blkno) | ||
205 | { | ||
206 | struct rb_node *n = osb->osb_rf_lock_tree.rb_node; | ||
207 | struct ocfs2_refcount_tree *tree = NULL; | ||
208 | |||
209 | while (n) { | ||
210 | tree = rb_entry(n, struct ocfs2_refcount_tree, rf_node); | ||
211 | |||
212 | if (blkno < tree->rf_blkno) | ||
213 | n = n->rb_left; | ||
214 | else if (blkno > tree->rf_blkno) | ||
215 | n = n->rb_right; | ||
216 | else | ||
217 | return tree; | ||
218 | } | ||
219 | |||
220 | return NULL; | ||
221 | } | ||
222 | |||
223 | /* osb_lock is already locked. */ | ||
224 | static void ocfs2_insert_refcount_tree(struct ocfs2_super *osb, | ||
225 | struct ocfs2_refcount_tree *new) | ||
226 | { | ||
227 | u64 rf_blkno = new->rf_blkno; | ||
228 | struct rb_node *parent = NULL; | ||
229 | struct rb_node **p = &osb->osb_rf_lock_tree.rb_node; | ||
230 | struct ocfs2_refcount_tree *tmp; | ||
231 | |||
232 | while (*p) { | ||
233 | parent = *p; | ||
234 | |||
235 | tmp = rb_entry(parent, struct ocfs2_refcount_tree, | ||
236 | rf_node); | ||
237 | |||
238 | if (rf_blkno < tmp->rf_blkno) | ||
239 | p = &(*p)->rb_left; | ||
240 | else if (rf_blkno > tmp->rf_blkno) | ||
241 | p = &(*p)->rb_right; | ||
242 | else { | ||
243 | /* This should never happen! */ | ||
244 | mlog(ML_ERROR, "Duplicate refcount block %llu found!\n", | ||
245 | (unsigned long long)rf_blkno); | ||
246 | BUG(); | ||
247 | } | ||
248 | } | ||
249 | |||
250 | rb_link_node(&new->rf_node, parent, p); | ||
251 | rb_insert_color(&new->rf_node, &osb->osb_rf_lock_tree); | ||
252 | } | ||
253 | |||
254 | static void ocfs2_free_refcount_tree(struct ocfs2_refcount_tree *tree) | ||
255 | { | ||
256 | ocfs2_metadata_cache_exit(&tree->rf_ci); | ||
257 | ocfs2_simple_drop_lockres(OCFS2_SB(tree->rf_sb), &tree->rf_lockres); | ||
258 | ocfs2_lock_res_free(&tree->rf_lockres); | ||
259 | kfree(tree); | ||
260 | } | ||
261 | |||
262 | static inline void | ||
263 | ocfs2_erase_refcount_tree_from_list_no_lock(struct ocfs2_super *osb, | ||
264 | struct ocfs2_refcount_tree *tree) | ||
265 | { | ||
266 | rb_erase(&tree->rf_node, &osb->osb_rf_lock_tree); | ||
267 | if (osb->osb_ref_tree_lru && osb->osb_ref_tree_lru == tree) | ||
268 | osb->osb_ref_tree_lru = NULL; | ||
269 | } | ||
270 | |||
271 | static void ocfs2_erase_refcount_tree_from_list(struct ocfs2_super *osb, | ||
272 | struct ocfs2_refcount_tree *tree) | ||
273 | { | ||
274 | spin_lock(&osb->osb_lock); | ||
275 | ocfs2_erase_refcount_tree_from_list_no_lock(osb, tree); | ||
276 | spin_unlock(&osb->osb_lock); | ||
277 | } | ||
278 | |||
279 | void ocfs2_kref_remove_refcount_tree(struct kref *kref) | ||
280 | { | ||
281 | struct ocfs2_refcount_tree *tree = | ||
282 | container_of(kref, struct ocfs2_refcount_tree, rf_getcnt); | ||
283 | |||
284 | ocfs2_free_refcount_tree(tree); | ||
285 | } | ||
286 | |||
287 | static inline void | ||
288 | ocfs2_refcount_tree_get(struct ocfs2_refcount_tree *tree) | ||
289 | { | ||
290 | kref_get(&tree->rf_getcnt); | ||
291 | } | ||
292 | |||
293 | static inline void | ||
294 | ocfs2_refcount_tree_put(struct ocfs2_refcount_tree *tree) | ||
295 | { | ||
296 | kref_put(&tree->rf_getcnt, ocfs2_kref_remove_refcount_tree); | ||
297 | } | ||
298 | |||
299 | static inline void ocfs2_init_refcount_tree_ci(struct ocfs2_refcount_tree *new, | ||
300 | struct super_block *sb) | ||
301 | { | ||
302 | ocfs2_metadata_cache_init(&new->rf_ci, &ocfs2_refcount_caching_ops); | ||
303 | mutex_init(&new->rf_io_mutex); | ||
304 | new->rf_sb = sb; | ||
305 | spin_lock_init(&new->rf_lock); | ||
306 | } | ||
307 | |||
308 | static inline void ocfs2_init_refcount_tree_lock(struct ocfs2_super *osb, | ||
309 | struct ocfs2_refcount_tree *new, | ||
310 | u64 rf_blkno, u32 generation) | ||
311 | { | ||
312 | init_rwsem(&new->rf_sem); | ||
313 | ocfs2_refcount_lock_res_init(&new->rf_lockres, osb, | ||
314 | rf_blkno, generation); | ||
315 | } | ||
316 | |||
317 | static struct ocfs2_refcount_tree* | ||
318 | ocfs2_allocate_refcount_tree(struct ocfs2_super *osb, u64 rf_blkno) | ||
319 | { | ||
320 | struct ocfs2_refcount_tree *new; | ||
321 | |||
322 | new = kzalloc(sizeof(struct ocfs2_refcount_tree), GFP_NOFS); | ||
323 | if (!new) | ||
324 | return NULL; | ||
325 | |||
326 | new->rf_blkno = rf_blkno; | ||
327 | kref_init(&new->rf_getcnt); | ||
328 | ocfs2_init_refcount_tree_ci(new, osb->sb); | ||
329 | |||
330 | return new; | ||
331 | } | ||
332 | |||
333 | static int ocfs2_get_refcount_tree(struct ocfs2_super *osb, u64 rf_blkno, | ||
334 | struct ocfs2_refcount_tree **ret_tree) | ||
335 | { | ||
336 | int ret = 0; | ||
337 | struct ocfs2_refcount_tree *tree, *new = NULL; | ||
338 | struct buffer_head *ref_root_bh = NULL; | ||
339 | struct ocfs2_refcount_block *ref_rb; | ||
340 | |||
341 | spin_lock(&osb->osb_lock); | ||
342 | if (osb->osb_ref_tree_lru && | ||
343 | osb->osb_ref_tree_lru->rf_blkno == rf_blkno) | ||
344 | tree = osb->osb_ref_tree_lru; | ||
345 | else | ||
346 | tree = ocfs2_find_refcount_tree(osb, rf_blkno); | ||
347 | if (tree) | ||
348 | goto out; | ||
349 | |||
350 | spin_unlock(&osb->osb_lock); | ||
351 | |||
352 | new = ocfs2_allocate_refcount_tree(osb, rf_blkno); | ||
353 | if (!new) { | ||
354 | ret = -ENOMEM; | ||
355 | mlog_errno(ret); | ||
356 | return ret; | ||
357 | } | ||
358 | /* | ||
359 | * We need the generation to create the refcount tree lock and since | ||
360 | * it isn't changed during the tree modification, we are safe here to | ||
361 | * read without protection. | ||
362 | * We also have to purge the cache after we create the lock since the | ||
363 | * refcount block may have the stale data. It can only be trusted when | ||
364 | * we hold the refcount lock. | ||
365 | */ | ||
366 | ret = ocfs2_read_refcount_block(&new->rf_ci, rf_blkno, &ref_root_bh); | ||
367 | if (ret) { | ||
368 | mlog_errno(ret); | ||
369 | ocfs2_metadata_cache_exit(&new->rf_ci); | ||
370 | kfree(new); | ||
371 | return ret; | ||
372 | } | ||
373 | |||
374 | ref_rb = (struct ocfs2_refcount_block *)ref_root_bh->b_data; | ||
375 | new->rf_generation = le32_to_cpu(ref_rb->rf_generation); | ||
376 | ocfs2_init_refcount_tree_lock(osb, new, rf_blkno, | ||
377 | new->rf_generation); | ||
378 | ocfs2_metadata_cache_purge(&new->rf_ci); | ||
379 | |||
380 | spin_lock(&osb->osb_lock); | ||
381 | tree = ocfs2_find_refcount_tree(osb, rf_blkno); | ||
382 | if (tree) | ||
383 | goto out; | ||
384 | |||
385 | ocfs2_insert_refcount_tree(osb, new); | ||
386 | |||
387 | tree = new; | ||
388 | new = NULL; | ||
389 | |||
390 | out: | ||
391 | *ret_tree = tree; | ||
392 | |||
393 | osb->osb_ref_tree_lru = tree; | ||
394 | |||
395 | spin_unlock(&osb->osb_lock); | ||
396 | |||
397 | if (new) | ||
398 | ocfs2_free_refcount_tree(new); | ||
399 | |||
400 | brelse(ref_root_bh); | ||
401 | return ret; | ||
402 | } | ||
403 | |||
404 | static int ocfs2_get_refcount_block(struct inode *inode, u64 *ref_blkno) | ||
405 | { | ||
406 | int ret; | ||
407 | struct buffer_head *di_bh = NULL; | ||
408 | struct ocfs2_dinode *di; | ||
409 | |||
410 | ret = ocfs2_read_inode_block(inode, &di_bh); | ||
411 | if (ret) { | ||
412 | mlog_errno(ret); | ||
413 | goto out; | ||
414 | } | ||
415 | |||
416 | BUG_ON(!(OCFS2_I(inode)->ip_dyn_features & OCFS2_HAS_REFCOUNT_FL)); | ||
417 | |||
418 | di = (struct ocfs2_dinode *)di_bh->b_data; | ||
419 | *ref_blkno = le64_to_cpu(di->i_refcount_loc); | ||
420 | brelse(di_bh); | ||
421 | out: | ||
422 | return ret; | ||
423 | } | ||
424 | |||
425 | static int __ocfs2_lock_refcount_tree(struct ocfs2_super *osb, | ||
426 | struct ocfs2_refcount_tree *tree, int rw) | ||
427 | { | ||
428 | int ret; | ||
429 | |||
430 | ret = ocfs2_refcount_lock(tree, rw); | ||
431 | if (ret) { | ||
432 | mlog_errno(ret); | ||
433 | goto out; | ||
434 | } | ||
435 | |||
436 | if (rw) | ||
437 | down_write(&tree->rf_sem); | ||
438 | else | ||
439 | down_read(&tree->rf_sem); | ||
440 | |||
441 | out: | ||
442 | return ret; | ||
443 | } | ||
444 | |||
445 | /* | ||
446 | * Lock the refcount tree pointed by ref_blkno and return the tree. | ||
447 | * In most case, we lock the tree and read the refcount block. | ||
448 | * So read it here if the caller really needs it. | ||
449 | * | ||
450 | * If the tree has been re-created by other node, it will free the | ||
451 | * old one and re-create it. | ||
452 | */ | ||
453 | int ocfs2_lock_refcount_tree(struct ocfs2_super *osb, | ||
454 | u64 ref_blkno, int rw, | ||
455 | struct ocfs2_refcount_tree **ret_tree, | ||
456 | struct buffer_head **ref_bh) | ||
457 | { | ||
458 | int ret, delete_tree = 0; | ||
459 | struct ocfs2_refcount_tree *tree = NULL; | ||
460 | struct buffer_head *ref_root_bh = NULL; | ||
461 | struct ocfs2_refcount_block *rb; | ||
462 | |||
463 | again: | ||
464 | ret = ocfs2_get_refcount_tree(osb, ref_blkno, &tree); | ||
465 | if (ret) { | ||
466 | mlog_errno(ret); | ||
467 | return ret; | ||
468 | } | ||
469 | |||
470 | ocfs2_refcount_tree_get(tree); | ||
471 | |||
472 | ret = __ocfs2_lock_refcount_tree(osb, tree, rw); | ||
473 | if (ret) { | ||
474 | mlog_errno(ret); | ||
475 | ocfs2_refcount_tree_put(tree); | ||
476 | goto out; | ||
477 | } | ||
478 | |||
479 | ret = ocfs2_read_refcount_block(&tree->rf_ci, tree->rf_blkno, | ||
480 | &ref_root_bh); | ||
481 | if (ret) { | ||
482 | mlog_errno(ret); | ||
483 | ocfs2_unlock_refcount_tree(osb, tree, rw); | ||
484 | ocfs2_refcount_tree_put(tree); | ||
485 | goto out; | ||
486 | } | ||
487 | |||
488 | rb = (struct ocfs2_refcount_block *)ref_root_bh->b_data; | ||
489 | /* | ||
490 | * If the refcount block has been freed and re-created, we may need | ||
491 | * to recreate the refcount tree also. | ||
492 | * | ||
493 | * Here we just remove the tree from the rb-tree, and the last | ||
494 | * kref holder will unlock and delete this refcount_tree. | ||
495 | * Then we goto "again" and ocfs2_get_refcount_tree will create | ||
496 | * the new refcount tree for us. | ||
497 | */ | ||
498 | if (tree->rf_generation != le32_to_cpu(rb->rf_generation)) { | ||
499 | if (!tree->rf_removed) { | ||
500 | ocfs2_erase_refcount_tree_from_list(osb, tree); | ||
501 | tree->rf_removed = 1; | ||
502 | delete_tree = 1; | ||
503 | } | ||
504 | |||
505 | ocfs2_unlock_refcount_tree(osb, tree, rw); | ||
506 | /* | ||
507 | * We get an extra reference when we create the refcount | ||
508 | * tree, so another put will destroy it. | ||
509 | */ | ||
510 | if (delete_tree) | ||
511 | ocfs2_refcount_tree_put(tree); | ||
512 | brelse(ref_root_bh); | ||
513 | ref_root_bh = NULL; | ||
514 | goto again; | ||
515 | } | ||
516 | |||
517 | *ret_tree = tree; | ||
518 | if (ref_bh) { | ||
519 | *ref_bh = ref_root_bh; | ||
520 | ref_root_bh = NULL; | ||
521 | } | ||
522 | out: | ||
523 | brelse(ref_root_bh); | ||
524 | return ret; | ||
525 | } | ||
526 | |||
527 | int ocfs2_lock_refcount_tree_by_inode(struct inode *inode, int rw, | ||
528 | struct ocfs2_refcount_tree **ret_tree, | ||
529 | struct buffer_head **ref_bh) | ||
530 | { | ||
531 | int ret; | ||
532 | u64 ref_blkno; | ||
533 | |||
534 | ret = ocfs2_get_refcount_block(inode, &ref_blkno); | ||
535 | if (ret) { | ||
536 | mlog_errno(ret); | ||
537 | return ret; | ||
538 | } | ||
539 | |||
540 | return ocfs2_lock_refcount_tree(OCFS2_SB(inode->i_sb), ref_blkno, | ||
541 | rw, ret_tree, ref_bh); | ||
542 | } | ||
543 | |||
544 | void ocfs2_unlock_refcount_tree(struct ocfs2_super *osb, | ||
545 | struct ocfs2_refcount_tree *tree, int rw) | ||
546 | { | ||
547 | if (rw) | ||
548 | up_write(&tree->rf_sem); | ||
549 | else | ||
550 | up_read(&tree->rf_sem); | ||
551 | |||
552 | ocfs2_refcount_unlock(tree, rw); | ||
553 | ocfs2_refcount_tree_put(tree); | ||
554 | } | ||
555 | |||
556 | void ocfs2_purge_refcount_trees(struct ocfs2_super *osb) | ||
557 | { | ||
558 | struct rb_node *node; | ||
559 | struct ocfs2_refcount_tree *tree; | ||
560 | struct rb_root *root = &osb->osb_rf_lock_tree; | ||
561 | |||
562 | while ((node = rb_last(root)) != NULL) { | ||
563 | tree = rb_entry(node, struct ocfs2_refcount_tree, rf_node); | ||
564 | |||
565 | mlog(0, "Purge tree %llu\n", | ||
566 | (unsigned long long) tree->rf_blkno); | ||
567 | |||
568 | rb_erase(&tree->rf_node, root); | ||
569 | ocfs2_free_refcount_tree(tree); | ||
570 | } | ||
571 | } | ||
572 | |||
573 | /* | ||
574 | * Create a refcount tree for an inode. | ||
575 | * We take for granted that the inode is already locked. | ||
576 | */ | ||
577 | static int ocfs2_create_refcount_tree(struct inode *inode, | ||
578 | struct buffer_head *di_bh) | ||
579 | { | ||
580 | int ret; | ||
581 | handle_t *handle = NULL; | ||
582 | struct ocfs2_alloc_context *meta_ac = NULL; | ||
583 | struct ocfs2_dinode *di = (struct ocfs2_dinode *)di_bh->b_data; | ||
584 | struct ocfs2_inode_info *oi = OCFS2_I(inode); | ||
585 | struct ocfs2_super *osb = OCFS2_SB(inode->i_sb); | ||
586 | struct buffer_head *new_bh = NULL; | ||
587 | struct ocfs2_refcount_block *rb; | ||
588 | struct ocfs2_refcount_tree *new_tree = NULL, *tree = NULL; | ||
589 | u16 suballoc_bit_start; | ||
590 | u32 num_got; | ||
591 | u64 first_blkno; | ||
592 | |||
593 | BUG_ON(oi->ip_dyn_features & OCFS2_HAS_REFCOUNT_FL); | ||
594 | |||
595 | mlog(0, "create tree for inode %lu\n", inode->i_ino); | ||
596 | |||
597 | ret = ocfs2_reserve_new_metadata_blocks(osb, 1, &meta_ac); | ||
598 | if (ret) { | ||
599 | mlog_errno(ret); | ||
600 | goto out; | ||
601 | } | ||
602 | |||
603 | handle = ocfs2_start_trans(osb, OCFS2_REFCOUNT_TREE_CREATE_CREDITS); | ||
604 | if (IS_ERR(handle)) { | ||
605 | ret = PTR_ERR(handle); | ||
606 | mlog_errno(ret); | ||
607 | goto out; | ||
608 | } | ||
609 | |||
610 | ret = ocfs2_journal_access_di(handle, INODE_CACHE(inode), di_bh, | ||
611 | OCFS2_JOURNAL_ACCESS_WRITE); | ||
612 | if (ret) { | ||
613 | mlog_errno(ret); | ||
614 | goto out_commit; | ||
615 | } | ||
616 | |||
617 | ret = ocfs2_claim_metadata(osb, handle, meta_ac, 1, | ||
618 | &suballoc_bit_start, &num_got, | ||
619 | &first_blkno); | ||
620 | if (ret) { | ||
621 | mlog_errno(ret); | ||
622 | goto out_commit; | ||
623 | } | ||
624 | |||
625 | new_tree = ocfs2_allocate_refcount_tree(osb, first_blkno); | ||
626 | if (!new_tree) { | ||
627 | ret = -ENOMEM; | ||
628 | mlog_errno(ret); | ||
629 | goto out_commit; | ||
630 | } | ||
631 | |||
632 | new_bh = sb_getblk(inode->i_sb, first_blkno); | ||
633 | ocfs2_set_new_buffer_uptodate(&new_tree->rf_ci, new_bh); | ||
634 | |||
635 | ret = ocfs2_journal_access_rb(handle, &new_tree->rf_ci, new_bh, | ||
636 | OCFS2_JOURNAL_ACCESS_CREATE); | ||
637 | if (ret) { | ||
638 | mlog_errno(ret); | ||
639 | goto out_commit; | ||
640 | } | ||
641 | |||
642 | /* Initialize ocfs2_refcount_block. */ | ||
643 | rb = (struct ocfs2_refcount_block *)new_bh->b_data; | ||
644 | memset(rb, 0, inode->i_sb->s_blocksize); | ||
645 | strcpy((void *)rb, OCFS2_REFCOUNT_BLOCK_SIGNATURE); | ||
646 | rb->rf_suballoc_slot = cpu_to_le16(osb->slot_num); | ||
647 | rb->rf_suballoc_bit = cpu_to_le16(suballoc_bit_start); | ||
648 | rb->rf_fs_generation = cpu_to_le32(osb->fs_generation); | ||
649 | rb->rf_blkno = cpu_to_le64(first_blkno); | ||
650 | rb->rf_count = cpu_to_le32(1); | ||
651 | rb->rf_records.rl_count = | ||
652 | cpu_to_le16(ocfs2_refcount_recs_per_rb(osb->sb)); | ||
653 | spin_lock(&osb->osb_lock); | ||
654 | rb->rf_generation = osb->s_next_generation++; | ||
655 | spin_unlock(&osb->osb_lock); | ||
656 | |||
657 | ocfs2_journal_dirty(handle, new_bh); | ||
658 | |||
659 | spin_lock(&oi->ip_lock); | ||
660 | oi->ip_dyn_features |= OCFS2_HAS_REFCOUNT_FL; | ||
661 | di->i_dyn_features = cpu_to_le16(oi->ip_dyn_features); | ||
662 | di->i_refcount_loc = cpu_to_le64(first_blkno); | ||
663 | spin_unlock(&oi->ip_lock); | ||
664 | |||
665 | mlog(0, "created tree for inode %lu, refblock %llu\n", | ||
666 | inode->i_ino, (unsigned long long)first_blkno); | ||
667 | |||
668 | ocfs2_journal_dirty(handle, di_bh); | ||
669 | |||
670 | /* | ||
671 | * We have to init the tree lock here since it will use | ||
672 | * the generation number to create it. | ||
673 | */ | ||
674 | new_tree->rf_generation = le32_to_cpu(rb->rf_generation); | ||
675 | ocfs2_init_refcount_tree_lock(osb, new_tree, first_blkno, | ||
676 | new_tree->rf_generation); | ||
677 | |||
678 | spin_lock(&osb->osb_lock); | ||
679 | tree = ocfs2_find_refcount_tree(osb, first_blkno); | ||
680 | |||
681 | /* | ||
682 | * We've just created a new refcount tree in this block. If | ||
683 | * we found a refcount tree on the ocfs2_super, it must be | ||
684 | * one we just deleted. We free the old tree before | ||
685 | * inserting the new tree. | ||
686 | */ | ||
687 | BUG_ON(tree && tree->rf_generation == new_tree->rf_generation); | ||
688 | if (tree) | ||
689 | ocfs2_erase_refcount_tree_from_list_no_lock(osb, tree); | ||
690 | ocfs2_insert_refcount_tree(osb, new_tree); | ||
691 | spin_unlock(&osb->osb_lock); | ||
692 | new_tree = NULL; | ||
693 | if (tree) | ||
694 | ocfs2_refcount_tree_put(tree); | ||
695 | |||
696 | out_commit: | ||
697 | ocfs2_commit_trans(osb, handle); | ||
698 | |||
699 | out: | ||
700 | if (new_tree) { | ||
701 | ocfs2_metadata_cache_exit(&new_tree->rf_ci); | ||
702 | kfree(new_tree); | ||
703 | } | ||
704 | |||
705 | brelse(new_bh); | ||
706 | if (meta_ac) | ||
707 | ocfs2_free_alloc_context(meta_ac); | ||
708 | |||
709 | return ret; | ||
710 | } | ||
711 | |||
712 | static int ocfs2_set_refcount_tree(struct inode *inode, | ||
713 | struct buffer_head *di_bh, | ||
714 | u64 refcount_loc) | ||
715 | { | ||
716 | int ret; | ||
717 | handle_t *handle = NULL; | ||
718 | struct ocfs2_dinode *di = (struct ocfs2_dinode *)di_bh->b_data; | ||
719 | struct ocfs2_inode_info *oi = OCFS2_I(inode); | ||
720 | struct ocfs2_super *osb = OCFS2_SB(inode->i_sb); | ||
721 | struct buffer_head *ref_root_bh = NULL; | ||
722 | struct ocfs2_refcount_block *rb; | ||
723 | struct ocfs2_refcount_tree *ref_tree; | ||
724 | |||
725 | BUG_ON(oi->ip_dyn_features & OCFS2_HAS_REFCOUNT_FL); | ||
726 | |||
727 | ret = ocfs2_lock_refcount_tree(osb, refcount_loc, 1, | ||
728 | &ref_tree, &ref_root_bh); | ||
729 | if (ret) { | ||
730 | mlog_errno(ret); | ||
731 | return ret; | ||
732 | } | ||
733 | |||
734 | handle = ocfs2_start_trans(osb, OCFS2_REFCOUNT_TREE_SET_CREDITS); | ||
735 | if (IS_ERR(handle)) { | ||
736 | ret = PTR_ERR(handle); | ||
737 | mlog_errno(ret); | ||
738 | goto out; | ||
739 | } | ||
740 | |||
741 | ret = ocfs2_journal_access_di(handle, INODE_CACHE(inode), di_bh, | ||
742 | OCFS2_JOURNAL_ACCESS_WRITE); | ||
743 | if (ret) { | ||
744 | mlog_errno(ret); | ||
745 | goto out_commit; | ||
746 | } | ||
747 | |||
748 | ret = ocfs2_journal_access_rb(handle, &ref_tree->rf_ci, ref_root_bh, | ||
749 | OCFS2_JOURNAL_ACCESS_WRITE); | ||
750 | if (ret) { | ||
751 | mlog_errno(ret); | ||
752 | goto out_commit; | ||
753 | } | ||
754 | |||
755 | rb = (struct ocfs2_refcount_block *)ref_root_bh->b_data; | ||
756 | le32_add_cpu(&rb->rf_count, 1); | ||
757 | |||
758 | ocfs2_journal_dirty(handle, ref_root_bh); | ||
759 | |||
760 | spin_lock(&oi->ip_lock); | ||
761 | oi->ip_dyn_features |= OCFS2_HAS_REFCOUNT_FL; | ||
762 | di->i_dyn_features = cpu_to_le16(oi->ip_dyn_features); | ||
763 | di->i_refcount_loc = cpu_to_le64(refcount_loc); | ||
764 | spin_unlock(&oi->ip_lock); | ||
765 | ocfs2_journal_dirty(handle, di_bh); | ||
766 | |||
767 | out_commit: | ||
768 | ocfs2_commit_trans(osb, handle); | ||
769 | out: | ||
770 | ocfs2_unlock_refcount_tree(osb, ref_tree, 1); | ||
771 | brelse(ref_root_bh); | ||
772 | |||
773 | return ret; | ||
774 | } | ||
775 | |||
776 | int ocfs2_remove_refcount_tree(struct inode *inode, struct buffer_head *di_bh) | ||
777 | { | ||
778 | int ret, delete_tree = 0; | ||
779 | handle_t *handle = NULL; | ||
780 | struct ocfs2_dinode *di = (struct ocfs2_dinode *)di_bh->b_data; | ||
781 | struct ocfs2_inode_info *oi = OCFS2_I(inode); | ||
782 | struct ocfs2_super *osb = OCFS2_SB(inode->i_sb); | ||
783 | struct ocfs2_refcount_block *rb; | ||
784 | struct inode *alloc_inode = NULL; | ||
785 | struct buffer_head *alloc_bh = NULL; | ||
786 | struct buffer_head *blk_bh = NULL; | ||
787 | struct ocfs2_refcount_tree *ref_tree; | ||
788 | int credits = OCFS2_REFCOUNT_TREE_REMOVE_CREDITS; | ||
789 | u64 blk = 0, bg_blkno = 0, ref_blkno = le64_to_cpu(di->i_refcount_loc); | ||
790 | u16 bit = 0; | ||
791 | |||
792 | if (!(oi->ip_dyn_features & OCFS2_HAS_REFCOUNT_FL)) | ||
793 | return 0; | ||
794 | |||
795 | BUG_ON(!ref_blkno); | ||
796 | ret = ocfs2_lock_refcount_tree(osb, ref_blkno, 1, &ref_tree, &blk_bh); | ||
797 | if (ret) { | ||
798 | mlog_errno(ret); | ||
799 | return ret; | ||
800 | } | ||
801 | |||
802 | rb = (struct ocfs2_refcount_block *)blk_bh->b_data; | ||
803 | |||
804 | /* | ||
805 | * If we are the last user, we need to free the block. | ||
806 | * So lock the allocator ahead. | ||
807 | */ | ||
808 | if (le32_to_cpu(rb->rf_count) == 1) { | ||
809 | blk = le64_to_cpu(rb->rf_blkno); | ||
810 | bit = le16_to_cpu(rb->rf_suballoc_bit); | ||
811 | bg_blkno = ocfs2_which_suballoc_group(blk, bit); | ||
812 | |||
813 | alloc_inode = ocfs2_get_system_file_inode(osb, | ||
814 | EXTENT_ALLOC_SYSTEM_INODE, | ||
815 | le16_to_cpu(rb->rf_suballoc_slot)); | ||
816 | if (!alloc_inode) { | ||
817 | ret = -ENOMEM; | ||
818 | mlog_errno(ret); | ||
819 | goto out; | ||
820 | } | ||
821 | mutex_lock(&alloc_inode->i_mutex); | ||
822 | |||
823 | ret = ocfs2_inode_lock(alloc_inode, &alloc_bh, 1); | ||
824 | if (ret) { | ||
825 | mlog_errno(ret); | ||
826 | goto out_mutex; | ||
827 | } | ||
828 | |||
829 | credits += OCFS2_SUBALLOC_FREE; | ||
830 | } | ||
831 | |||
832 | handle = ocfs2_start_trans(osb, credits); | ||
833 | if (IS_ERR(handle)) { | ||
834 | ret = PTR_ERR(handle); | ||
835 | mlog_errno(ret); | ||
836 | goto out_unlock; | ||
837 | } | ||
838 | |||
839 | ret = ocfs2_journal_access_di(handle, INODE_CACHE(inode), di_bh, | ||
840 | OCFS2_JOURNAL_ACCESS_WRITE); | ||
841 | if (ret) { | ||
842 | mlog_errno(ret); | ||
843 | goto out_commit; | ||
844 | } | ||
845 | |||
846 | ret = ocfs2_journal_access_rb(handle, &ref_tree->rf_ci, blk_bh, | ||
847 | OCFS2_JOURNAL_ACCESS_WRITE); | ||
848 | if (ret) { | ||
849 | mlog_errno(ret); | ||
850 | goto out_commit; | ||
851 | } | ||
852 | |||
853 | spin_lock(&oi->ip_lock); | ||
854 | oi->ip_dyn_features &= ~OCFS2_HAS_REFCOUNT_FL; | ||
855 | di->i_dyn_features = cpu_to_le16(oi->ip_dyn_features); | ||
856 | di->i_refcount_loc = 0; | ||
857 | spin_unlock(&oi->ip_lock); | ||
858 | ocfs2_journal_dirty(handle, di_bh); | ||
859 | |||
860 | le32_add_cpu(&rb->rf_count , -1); | ||
861 | ocfs2_journal_dirty(handle, blk_bh); | ||
862 | |||
863 | if (!rb->rf_count) { | ||
864 | delete_tree = 1; | ||
865 | ocfs2_erase_refcount_tree_from_list(osb, ref_tree); | ||
866 | ret = ocfs2_free_suballoc_bits(handle, alloc_inode, | ||
867 | alloc_bh, bit, bg_blkno, 1); | ||
868 | if (ret) | ||
869 | mlog_errno(ret); | ||
870 | } | ||
871 | |||
872 | out_commit: | ||
873 | ocfs2_commit_trans(osb, handle); | ||
874 | out_unlock: | ||
875 | if (alloc_inode) { | ||
876 | ocfs2_inode_unlock(alloc_inode, 1); | ||
877 | brelse(alloc_bh); | ||
878 | } | ||
879 | out_mutex: | ||
880 | if (alloc_inode) { | ||
881 | mutex_unlock(&alloc_inode->i_mutex); | ||
882 | iput(alloc_inode); | ||
883 | } | ||
884 | out: | ||
885 | ocfs2_unlock_refcount_tree(osb, ref_tree, 1); | ||
886 | if (delete_tree) | ||
887 | ocfs2_refcount_tree_put(ref_tree); | ||
888 | brelse(blk_bh); | ||
889 | |||
890 | return ret; | ||
891 | } | ||
892 | |||
893 | static void ocfs2_find_refcount_rec_in_rl(struct ocfs2_caching_info *ci, | ||
894 | struct buffer_head *ref_leaf_bh, | ||
895 | u64 cpos, unsigned int len, | ||
896 | struct ocfs2_refcount_rec *ret_rec, | ||
897 | int *index) | ||
898 | { | ||
899 | int i = 0; | ||
900 | struct ocfs2_refcount_block *rb = | ||
901 | (struct ocfs2_refcount_block *)ref_leaf_bh->b_data; | ||
902 | struct ocfs2_refcount_rec *rec = NULL; | ||
903 | |||
904 | for (; i < le16_to_cpu(rb->rf_records.rl_used); i++) { | ||
905 | rec = &rb->rf_records.rl_recs[i]; | ||
906 | |||
907 | if (le64_to_cpu(rec->r_cpos) + | ||
908 | le32_to_cpu(rec->r_clusters) <= cpos) | ||
909 | continue; | ||
910 | else if (le64_to_cpu(rec->r_cpos) > cpos) | ||
911 | break; | ||
912 | |||
913 | /* ok, cpos fail in this rec. Just return. */ | ||
914 | if (ret_rec) | ||
915 | *ret_rec = *rec; | ||
916 | goto out; | ||
917 | } | ||
918 | |||
919 | if (ret_rec) { | ||
920 | /* We meet with a hole here, so fake the rec. */ | ||
921 | ret_rec->r_cpos = cpu_to_le64(cpos); | ||
922 | ret_rec->r_refcount = 0; | ||
923 | if (i < le16_to_cpu(rb->rf_records.rl_used) && | ||
924 | le64_to_cpu(rec->r_cpos) < cpos + len) | ||
925 | ret_rec->r_clusters = | ||
926 | cpu_to_le32(le64_to_cpu(rec->r_cpos) - cpos); | ||
927 | else | ||
928 | ret_rec->r_clusters = cpu_to_le32(len); | ||
929 | } | ||
930 | |||
931 | out: | ||
932 | *index = i; | ||
933 | } | ||
934 | |||
935 | /* | ||
936 | * Try to remove refcount tree. The mechanism is: | ||
937 | * 1) Check whether i_clusters == 0, if no, exit. | ||
938 | * 2) check whether we have i_xattr_loc in dinode. if yes, exit. | ||
939 | * 3) Check whether we have inline xattr stored outside, if yes, exit. | ||
940 | * 4) Remove the tree. | ||
941 | */ | ||
942 | int ocfs2_try_remove_refcount_tree(struct inode *inode, | ||
943 | struct buffer_head *di_bh) | ||
944 | { | ||
945 | int ret; | ||
946 | struct ocfs2_inode_info *oi = OCFS2_I(inode); | ||
947 | struct ocfs2_dinode *di = (struct ocfs2_dinode *)di_bh->b_data; | ||
948 | |||
949 | down_write(&oi->ip_xattr_sem); | ||
950 | down_write(&oi->ip_alloc_sem); | ||
951 | |||
952 | if (oi->ip_clusters) | ||
953 | goto out; | ||
954 | |||
955 | if ((oi->ip_dyn_features & OCFS2_HAS_XATTR_FL) && di->i_xattr_loc) | ||
956 | goto out; | ||
957 | |||
958 | if (oi->ip_dyn_features & OCFS2_INLINE_XATTR_FL && | ||
959 | ocfs2_has_inline_xattr_value_outside(inode, di)) | ||
960 | goto out; | ||
961 | |||
962 | ret = ocfs2_remove_refcount_tree(inode, di_bh); | ||
963 | if (ret) | ||
964 | mlog_errno(ret); | ||
965 | out: | ||
966 | up_write(&oi->ip_alloc_sem); | ||
967 | up_write(&oi->ip_xattr_sem); | ||
968 | return 0; | ||
969 | } | ||
970 | |||
971 | /* | ||
972 | * Given a cpos and len, try to find the refcount record which contains cpos. | ||
973 | * 1. If cpos can be found in one refcount record, return the record. | ||
974 | * 2. If cpos can't be found, return a fake record which start from cpos | ||
975 | * and end at a small value between cpos+len and start of the next record. | ||
976 | * This fake record has r_refcount = 0. | ||
977 | */ | ||
978 | static int ocfs2_get_refcount_rec(struct ocfs2_caching_info *ci, | ||
979 | struct buffer_head *ref_root_bh, | ||
980 | u64 cpos, unsigned int len, | ||
981 | struct ocfs2_refcount_rec *ret_rec, | ||
982 | int *index, | ||
983 | struct buffer_head **ret_bh) | ||
984 | { | ||
985 | int ret = 0, i, found; | ||
986 | u32 low_cpos; | ||
987 | struct ocfs2_extent_list *el; | ||
988 | struct ocfs2_extent_rec *tmp, *rec = NULL; | ||
989 | struct ocfs2_extent_block *eb; | ||
990 | struct buffer_head *eb_bh = NULL, *ref_leaf_bh = NULL; | ||
991 | struct super_block *sb = ocfs2_metadata_cache_get_super(ci); | ||
992 | struct ocfs2_refcount_block *rb = | ||
993 | (struct ocfs2_refcount_block *)ref_root_bh->b_data; | ||
994 | |||
995 | if (!(le32_to_cpu(rb->rf_flags) & OCFS2_REFCOUNT_TREE_FL)) { | ||
996 | ocfs2_find_refcount_rec_in_rl(ci, ref_root_bh, cpos, len, | ||
997 | ret_rec, index); | ||
998 | *ret_bh = ref_root_bh; | ||
999 | get_bh(ref_root_bh); | ||
1000 | return 0; | ||
1001 | } | ||
1002 | |||
1003 | el = &rb->rf_list; | ||
1004 | low_cpos = cpos & OCFS2_32BIT_POS_MASK; | ||
1005 | |||
1006 | if (el->l_tree_depth) { | ||
1007 | ret = ocfs2_find_leaf(ci, el, low_cpos, &eb_bh); | ||
1008 | if (ret) { | ||
1009 | mlog_errno(ret); | ||
1010 | goto out; | ||
1011 | } | ||
1012 | |||
1013 | eb = (struct ocfs2_extent_block *) eb_bh->b_data; | ||
1014 | el = &eb->h_list; | ||
1015 | |||
1016 | if (el->l_tree_depth) { | ||
1017 | ocfs2_error(sb, | ||
1018 | "refcount tree %llu has non zero tree " | ||
1019 | "depth in leaf btree tree block %llu\n", | ||
1020 | (unsigned long long)ocfs2_metadata_cache_owner(ci), | ||
1021 | (unsigned long long)eb_bh->b_blocknr); | ||
1022 | ret = -EROFS; | ||
1023 | goto out; | ||
1024 | } | ||
1025 | } | ||
1026 | |||
1027 | found = 0; | ||
1028 | for (i = le16_to_cpu(el->l_next_free_rec) - 1; i >= 0; i--) { | ||
1029 | rec = &el->l_recs[i]; | ||
1030 | |||
1031 | if (le32_to_cpu(rec->e_cpos) <= low_cpos) { | ||
1032 | found = 1; | ||
1033 | break; | ||
1034 | } | ||
1035 | } | ||
1036 | |||
1037 | /* adjust len when we have ocfs2_extent_rec after it. */ | ||
1038 | if (found && i < le16_to_cpu(el->l_next_free_rec) - 1) { | ||
1039 | tmp = &el->l_recs[i+1]; | ||
1040 | |||
1041 | if (le32_to_cpu(tmp->e_cpos) < cpos + len) | ||
1042 | len = le32_to_cpu(tmp->e_cpos) - cpos; | ||
1043 | } | ||
1044 | |||
1045 | ret = ocfs2_read_refcount_block(ci, le64_to_cpu(rec->e_blkno), | ||
1046 | &ref_leaf_bh); | ||
1047 | if (ret) { | ||
1048 | mlog_errno(ret); | ||
1049 | goto out; | ||
1050 | } | ||
1051 | |||
1052 | ocfs2_find_refcount_rec_in_rl(ci, ref_leaf_bh, cpos, len, | ||
1053 | ret_rec, index); | ||
1054 | *ret_bh = ref_leaf_bh; | ||
1055 | out: | ||
1056 | brelse(eb_bh); | ||
1057 | return ret; | ||
1058 | } | ||
1059 | |||
1060 | enum ocfs2_ref_rec_contig { | ||
1061 | REF_CONTIG_NONE = 0, | ||
1062 | REF_CONTIG_LEFT, | ||
1063 | REF_CONTIG_RIGHT, | ||
1064 | REF_CONTIG_LEFTRIGHT, | ||
1065 | }; | ||
1066 | |||
1067 | static enum ocfs2_ref_rec_contig | ||
1068 | ocfs2_refcount_rec_adjacent(struct ocfs2_refcount_block *rb, | ||
1069 | int index) | ||
1070 | { | ||
1071 | if ((rb->rf_records.rl_recs[index].r_refcount == | ||
1072 | rb->rf_records.rl_recs[index + 1].r_refcount) && | ||
1073 | (le64_to_cpu(rb->rf_records.rl_recs[index].r_cpos) + | ||
1074 | le32_to_cpu(rb->rf_records.rl_recs[index].r_clusters) == | ||
1075 | le64_to_cpu(rb->rf_records.rl_recs[index + 1].r_cpos))) | ||
1076 | return REF_CONTIG_RIGHT; | ||
1077 | |||
1078 | return REF_CONTIG_NONE; | ||
1079 | } | ||
1080 | |||
1081 | static enum ocfs2_ref_rec_contig | ||
1082 | ocfs2_refcount_rec_contig(struct ocfs2_refcount_block *rb, | ||
1083 | int index) | ||
1084 | { | ||
1085 | enum ocfs2_ref_rec_contig ret = REF_CONTIG_NONE; | ||
1086 | |||
1087 | if (index < le16_to_cpu(rb->rf_records.rl_used) - 1) | ||
1088 | ret = ocfs2_refcount_rec_adjacent(rb, index); | ||
1089 | |||
1090 | if (index > 0) { | ||
1091 | enum ocfs2_ref_rec_contig tmp; | ||
1092 | |||
1093 | tmp = ocfs2_refcount_rec_adjacent(rb, index - 1); | ||
1094 | |||
1095 | if (tmp == REF_CONTIG_RIGHT) { | ||
1096 | if (ret == REF_CONTIG_RIGHT) | ||
1097 | ret = REF_CONTIG_LEFTRIGHT; | ||
1098 | else | ||
1099 | ret = REF_CONTIG_LEFT; | ||
1100 | } | ||
1101 | } | ||
1102 | |||
1103 | return ret; | ||
1104 | } | ||
1105 | |||
1106 | static void ocfs2_rotate_refcount_rec_left(struct ocfs2_refcount_block *rb, | ||
1107 | int index) | ||
1108 | { | ||
1109 | BUG_ON(rb->rf_records.rl_recs[index].r_refcount != | ||
1110 | rb->rf_records.rl_recs[index+1].r_refcount); | ||
1111 | |||
1112 | le32_add_cpu(&rb->rf_records.rl_recs[index].r_clusters, | ||
1113 | le32_to_cpu(rb->rf_records.rl_recs[index+1].r_clusters)); | ||
1114 | |||
1115 | if (index < le16_to_cpu(rb->rf_records.rl_used) - 2) | ||
1116 | memmove(&rb->rf_records.rl_recs[index + 1], | ||
1117 | &rb->rf_records.rl_recs[index + 2], | ||
1118 | sizeof(struct ocfs2_refcount_rec) * | ||
1119 | (le16_to_cpu(rb->rf_records.rl_used) - index - 2)); | ||
1120 | |||
1121 | memset(&rb->rf_records.rl_recs[le16_to_cpu(rb->rf_records.rl_used) - 1], | ||
1122 | 0, sizeof(struct ocfs2_refcount_rec)); | ||
1123 | le16_add_cpu(&rb->rf_records.rl_used, -1); | ||
1124 | } | ||
1125 | |||
1126 | /* | ||
1127 | * Merge the refcount rec if we are contiguous with the adjacent recs. | ||
1128 | */ | ||
1129 | static void ocfs2_refcount_rec_merge(struct ocfs2_refcount_block *rb, | ||
1130 | int index) | ||
1131 | { | ||
1132 | enum ocfs2_ref_rec_contig contig = | ||
1133 | ocfs2_refcount_rec_contig(rb, index); | ||
1134 | |||
1135 | if (contig == REF_CONTIG_NONE) | ||
1136 | return; | ||
1137 | |||
1138 | if (contig == REF_CONTIG_LEFT || contig == REF_CONTIG_LEFTRIGHT) { | ||
1139 | BUG_ON(index == 0); | ||
1140 | index--; | ||
1141 | } | ||
1142 | |||
1143 | ocfs2_rotate_refcount_rec_left(rb, index); | ||
1144 | |||
1145 | if (contig == REF_CONTIG_LEFTRIGHT) | ||
1146 | ocfs2_rotate_refcount_rec_left(rb, index); | ||
1147 | } | ||
1148 | |||
1149 | /* | ||
1150 | * Change the refcount indexed by "index" in ref_bh. | ||
1151 | * If refcount reaches 0, remove it. | ||
1152 | */ | ||
1153 | static int ocfs2_change_refcount_rec(handle_t *handle, | ||
1154 | struct ocfs2_caching_info *ci, | ||
1155 | struct buffer_head *ref_leaf_bh, | ||
1156 | int index, int merge, int change) | ||
1157 | { | ||
1158 | int ret; | ||
1159 | struct ocfs2_refcount_block *rb = | ||
1160 | (struct ocfs2_refcount_block *)ref_leaf_bh->b_data; | ||
1161 | struct ocfs2_refcount_list *rl = &rb->rf_records; | ||
1162 | struct ocfs2_refcount_rec *rec = &rl->rl_recs[index]; | ||
1163 | |||
1164 | ret = ocfs2_journal_access_rb(handle, ci, ref_leaf_bh, | ||
1165 | OCFS2_JOURNAL_ACCESS_WRITE); | ||
1166 | if (ret) { | ||
1167 | mlog_errno(ret); | ||
1168 | goto out; | ||
1169 | } | ||
1170 | |||
1171 | mlog(0, "change index %d, old count %u, change %d\n", index, | ||
1172 | le32_to_cpu(rec->r_refcount), change); | ||
1173 | le32_add_cpu(&rec->r_refcount, change); | ||
1174 | |||
1175 | if (!rec->r_refcount) { | ||
1176 | if (index != le16_to_cpu(rl->rl_used) - 1) { | ||
1177 | memmove(rec, rec + 1, | ||
1178 | (le16_to_cpu(rl->rl_used) - index - 1) * | ||
1179 | sizeof(struct ocfs2_refcount_rec)); | ||
1180 | memset(&rl->rl_recs[le16_to_cpu(rl->rl_used) - 1], | ||
1181 | 0, sizeof(struct ocfs2_refcount_rec)); | ||
1182 | } | ||
1183 | |||
1184 | le16_add_cpu(&rl->rl_used, -1); | ||
1185 | } else if (merge) | ||
1186 | ocfs2_refcount_rec_merge(rb, index); | ||
1187 | |||
1188 | ret = ocfs2_journal_dirty(handle, ref_leaf_bh); | ||
1189 | if (ret) | ||
1190 | mlog_errno(ret); | ||
1191 | out: | ||
1192 | return ret; | ||
1193 | } | ||
1194 | |||
1195 | static int ocfs2_expand_inline_ref_root(handle_t *handle, | ||
1196 | struct ocfs2_caching_info *ci, | ||
1197 | struct buffer_head *ref_root_bh, | ||
1198 | struct buffer_head **ref_leaf_bh, | ||
1199 | struct ocfs2_alloc_context *meta_ac) | ||
1200 | { | ||
1201 | int ret; | ||
1202 | u16 suballoc_bit_start; | ||
1203 | u32 num_got; | ||
1204 | u64 blkno; | ||
1205 | struct super_block *sb = ocfs2_metadata_cache_get_super(ci); | ||
1206 | struct buffer_head *new_bh = NULL; | ||
1207 | struct ocfs2_refcount_block *new_rb; | ||
1208 | struct ocfs2_refcount_block *root_rb = | ||
1209 | (struct ocfs2_refcount_block *)ref_root_bh->b_data; | ||
1210 | |||
1211 | ret = ocfs2_journal_access_rb(handle, ci, ref_root_bh, | ||
1212 | OCFS2_JOURNAL_ACCESS_WRITE); | ||
1213 | if (ret) { | ||
1214 | mlog_errno(ret); | ||
1215 | goto out; | ||
1216 | } | ||
1217 | |||
1218 | ret = ocfs2_claim_metadata(OCFS2_SB(sb), handle, meta_ac, 1, | ||
1219 | &suballoc_bit_start, &num_got, | ||
1220 | &blkno); | ||
1221 | if (ret) { | ||
1222 | mlog_errno(ret); | ||
1223 | goto out; | ||
1224 | } | ||
1225 | |||
1226 | new_bh = sb_getblk(sb, blkno); | ||
1227 | if (new_bh == NULL) { | ||
1228 | ret = -EIO; | ||
1229 | mlog_errno(ret); | ||
1230 | goto out; | ||
1231 | } | ||
1232 | ocfs2_set_new_buffer_uptodate(ci, new_bh); | ||
1233 | |||
1234 | ret = ocfs2_journal_access_rb(handle, ci, new_bh, | ||
1235 | OCFS2_JOURNAL_ACCESS_CREATE); | ||
1236 | if (ret) { | ||
1237 | mlog_errno(ret); | ||
1238 | goto out; | ||
1239 | } | ||
1240 | |||
1241 | /* | ||
1242 | * Initialize ocfs2_refcount_block. | ||
1243 | * It should contain the same information as the old root. | ||
1244 | * so just memcpy it and change the corresponding field. | ||
1245 | */ | ||
1246 | memcpy(new_bh->b_data, ref_root_bh->b_data, sb->s_blocksize); | ||
1247 | |||
1248 | new_rb = (struct ocfs2_refcount_block *)new_bh->b_data; | ||
1249 | new_rb->rf_suballoc_slot = cpu_to_le16(OCFS2_SB(sb)->slot_num); | ||
1250 | new_rb->rf_suballoc_bit = cpu_to_le16(suballoc_bit_start); | ||
1251 | new_rb->rf_blkno = cpu_to_le64(blkno); | ||
1252 | new_rb->rf_cpos = cpu_to_le32(0); | ||
1253 | new_rb->rf_parent = cpu_to_le64(ref_root_bh->b_blocknr); | ||
1254 | new_rb->rf_flags = cpu_to_le32(OCFS2_REFCOUNT_LEAF_FL); | ||
1255 | ocfs2_journal_dirty(handle, new_bh); | ||
1256 | |||
1257 | /* Now change the root. */ | ||
1258 | memset(&root_rb->rf_list, 0, sb->s_blocksize - | ||
1259 | offsetof(struct ocfs2_refcount_block, rf_list)); | ||
1260 | root_rb->rf_list.l_count = cpu_to_le16(ocfs2_extent_recs_per_rb(sb)); | ||
1261 | root_rb->rf_clusters = cpu_to_le32(1); | ||
1262 | root_rb->rf_list.l_next_free_rec = cpu_to_le16(1); | ||
1263 | root_rb->rf_list.l_recs[0].e_blkno = cpu_to_le64(blkno); | ||
1264 | root_rb->rf_list.l_recs[0].e_leaf_clusters = cpu_to_le16(1); | ||
1265 | root_rb->rf_flags = cpu_to_le32(OCFS2_REFCOUNT_TREE_FL); | ||
1266 | |||
1267 | ocfs2_journal_dirty(handle, ref_root_bh); | ||
1268 | |||
1269 | mlog(0, "new leaf block %llu, used %u\n", (unsigned long long)blkno, | ||
1270 | le16_to_cpu(new_rb->rf_records.rl_used)); | ||
1271 | |||
1272 | *ref_leaf_bh = new_bh; | ||
1273 | new_bh = NULL; | ||
1274 | out: | ||
1275 | brelse(new_bh); | ||
1276 | return ret; | ||
1277 | } | ||
1278 | |||
1279 | static int ocfs2_refcount_rec_no_intersect(struct ocfs2_refcount_rec *prev, | ||
1280 | struct ocfs2_refcount_rec *next) | ||
1281 | { | ||
1282 | if (ocfs2_get_ref_rec_low_cpos(prev) + le32_to_cpu(prev->r_clusters) <= | ||
1283 | ocfs2_get_ref_rec_low_cpos(next)) | ||
1284 | return 1; | ||
1285 | |||
1286 | return 0; | ||
1287 | } | ||
1288 | |||
1289 | static int cmp_refcount_rec_by_low_cpos(const void *a, const void *b) | ||
1290 | { | ||
1291 | const struct ocfs2_refcount_rec *l = a, *r = b; | ||
1292 | u32 l_cpos = ocfs2_get_ref_rec_low_cpos(l); | ||
1293 | u32 r_cpos = ocfs2_get_ref_rec_low_cpos(r); | ||
1294 | |||
1295 | if (l_cpos > r_cpos) | ||
1296 | return 1; | ||
1297 | if (l_cpos < r_cpos) | ||
1298 | return -1; | ||
1299 | return 0; | ||
1300 | } | ||
1301 | |||
1302 | static int cmp_refcount_rec_by_cpos(const void *a, const void *b) | ||
1303 | { | ||
1304 | const struct ocfs2_refcount_rec *l = a, *r = b; | ||
1305 | u64 l_cpos = le64_to_cpu(l->r_cpos); | ||
1306 | u64 r_cpos = le64_to_cpu(r->r_cpos); | ||
1307 | |||
1308 | if (l_cpos > r_cpos) | ||
1309 | return 1; | ||
1310 | if (l_cpos < r_cpos) | ||
1311 | return -1; | ||
1312 | return 0; | ||
1313 | } | ||
1314 | |||
1315 | static void swap_refcount_rec(void *a, void *b, int size) | ||
1316 | { | ||
1317 | struct ocfs2_refcount_rec *l = a, *r = b, tmp; | ||
1318 | |||
1319 | tmp = *(struct ocfs2_refcount_rec *)l; | ||
1320 | *(struct ocfs2_refcount_rec *)l = | ||
1321 | *(struct ocfs2_refcount_rec *)r; | ||
1322 | *(struct ocfs2_refcount_rec *)r = tmp; | ||
1323 | } | ||
1324 | |||
1325 | /* | ||
1326 | * The refcount cpos are ordered by their 64bit cpos, | ||
1327 | * But we will use the low 32 bit to be the e_cpos in the b-tree. | ||
1328 | * So we need to make sure that this pos isn't intersected with others. | ||
1329 | * | ||
1330 | * Note: The refcount block is already sorted by their low 32 bit cpos, | ||
1331 | * So just try the middle pos first, and we will exit when we find | ||
1332 | * the good position. | ||
1333 | */ | ||
1334 | static int ocfs2_find_refcount_split_pos(struct ocfs2_refcount_list *rl, | ||
1335 | u32 *split_pos, int *split_index) | ||
1336 | { | ||
1337 | int num_used = le16_to_cpu(rl->rl_used); | ||
1338 | int delta, middle = num_used / 2; | ||
1339 | |||
1340 | for (delta = 0; delta < middle; delta++) { | ||
1341 | /* Let's check delta earlier than middle */ | ||
1342 | if (ocfs2_refcount_rec_no_intersect( | ||
1343 | &rl->rl_recs[middle - delta - 1], | ||
1344 | &rl->rl_recs[middle - delta])) { | ||
1345 | *split_index = middle - delta; | ||
1346 | break; | ||
1347 | } | ||
1348 | |||
1349 | /* For even counts, don't walk off the end */ | ||
1350 | if ((middle + delta + 1) == num_used) | ||
1351 | continue; | ||
1352 | |||
1353 | /* Now try delta past middle */ | ||
1354 | if (ocfs2_refcount_rec_no_intersect( | ||
1355 | &rl->rl_recs[middle + delta], | ||
1356 | &rl->rl_recs[middle + delta + 1])) { | ||
1357 | *split_index = middle + delta + 1; | ||
1358 | break; | ||
1359 | } | ||
1360 | } | ||
1361 | |||
1362 | if (delta >= middle) | ||
1363 | return -ENOSPC; | ||
1364 | |||
1365 | *split_pos = ocfs2_get_ref_rec_low_cpos(&rl->rl_recs[*split_index]); | ||
1366 | return 0; | ||
1367 | } | ||
1368 | |||
1369 | static int ocfs2_divide_leaf_refcount_block(struct buffer_head *ref_leaf_bh, | ||
1370 | struct buffer_head *new_bh, | ||
1371 | u32 *split_cpos) | ||
1372 | { | ||
1373 | int split_index = 0, num_moved, ret; | ||
1374 | u32 cpos = 0; | ||
1375 | struct ocfs2_refcount_block *rb = | ||
1376 | (struct ocfs2_refcount_block *)ref_leaf_bh->b_data; | ||
1377 | struct ocfs2_refcount_list *rl = &rb->rf_records; | ||
1378 | struct ocfs2_refcount_block *new_rb = | ||
1379 | (struct ocfs2_refcount_block *)new_bh->b_data; | ||
1380 | struct ocfs2_refcount_list *new_rl = &new_rb->rf_records; | ||
1381 | |||
1382 | mlog(0, "split old leaf refcount block %llu, count = %u, used = %u\n", | ||
1383 | (unsigned long long)ref_leaf_bh->b_blocknr, | ||
1384 | le32_to_cpu(rl->rl_count), le32_to_cpu(rl->rl_used)); | ||
1385 | |||
1386 | /* | ||
1387 | * XXX: Improvement later. | ||
1388 | * If we know all the high 32 bit cpos is the same, no need to sort. | ||
1389 | * | ||
1390 | * In order to make the whole process safe, we do: | ||
1391 | * 1. sort the entries by their low 32 bit cpos first so that we can | ||
1392 | * find the split cpos easily. | ||
1393 | * 2. call ocfs2_insert_extent to insert the new refcount block. | ||
1394 | * 3. move the refcount rec to the new block. | ||
1395 | * 4. sort the entries by their 64 bit cpos. | ||
1396 | * 5. dirty the new_rb and rb. | ||
1397 | */ | ||
1398 | sort(&rl->rl_recs, le16_to_cpu(rl->rl_used), | ||
1399 | sizeof(struct ocfs2_refcount_rec), | ||
1400 | cmp_refcount_rec_by_low_cpos, swap_refcount_rec); | ||
1401 | |||
1402 | ret = ocfs2_find_refcount_split_pos(rl, &cpos, &split_index); | ||
1403 | if (ret) { | ||
1404 | mlog_errno(ret); | ||
1405 | return ret; | ||
1406 | } | ||
1407 | |||
1408 | new_rb->rf_cpos = cpu_to_le32(cpos); | ||
1409 | |||
1410 | /* move refcount records starting from split_index to the new block. */ | ||
1411 | num_moved = le16_to_cpu(rl->rl_used) - split_index; | ||
1412 | memcpy(new_rl->rl_recs, &rl->rl_recs[split_index], | ||
1413 | num_moved * sizeof(struct ocfs2_refcount_rec)); | ||
1414 | |||
1415 | /*ok, remove the entries we just moved over to the other block. */ | ||
1416 | memset(&rl->rl_recs[split_index], 0, | ||
1417 | num_moved * sizeof(struct ocfs2_refcount_rec)); | ||
1418 | |||
1419 | /* change old and new rl_used accordingly. */ | ||
1420 | le16_add_cpu(&rl->rl_used, -num_moved); | ||
1421 | new_rl->rl_used = cpu_to_le32(num_moved); | ||
1422 | |||
1423 | sort(&rl->rl_recs, le16_to_cpu(rl->rl_used), | ||
1424 | sizeof(struct ocfs2_refcount_rec), | ||
1425 | cmp_refcount_rec_by_cpos, swap_refcount_rec); | ||
1426 | |||
1427 | sort(&new_rl->rl_recs, le16_to_cpu(new_rl->rl_used), | ||
1428 | sizeof(struct ocfs2_refcount_rec), | ||
1429 | cmp_refcount_rec_by_cpos, swap_refcount_rec); | ||
1430 | |||
1431 | *split_cpos = cpos; | ||
1432 | return 0; | ||
1433 | } | ||
1434 | |||
1435 | static int ocfs2_new_leaf_refcount_block(handle_t *handle, | ||
1436 | struct ocfs2_caching_info *ci, | ||
1437 | struct buffer_head *ref_root_bh, | ||
1438 | struct buffer_head *ref_leaf_bh, | ||
1439 | struct ocfs2_alloc_context *meta_ac) | ||
1440 | { | ||
1441 | int ret; | ||
1442 | u16 suballoc_bit_start; | ||
1443 | u32 num_got, new_cpos; | ||
1444 | u64 blkno; | ||
1445 | struct super_block *sb = ocfs2_metadata_cache_get_super(ci); | ||
1446 | struct ocfs2_refcount_block *root_rb = | ||
1447 | (struct ocfs2_refcount_block *)ref_root_bh->b_data; | ||
1448 | struct buffer_head *new_bh = NULL; | ||
1449 | struct ocfs2_refcount_block *new_rb; | ||
1450 | struct ocfs2_extent_tree ref_et; | ||
1451 | |||
1452 | BUG_ON(!(le32_to_cpu(root_rb->rf_flags) & OCFS2_REFCOUNT_TREE_FL)); | ||
1453 | |||
1454 | ret = ocfs2_journal_access_rb(handle, ci, ref_root_bh, | ||
1455 | OCFS2_JOURNAL_ACCESS_WRITE); | ||
1456 | if (ret) { | ||
1457 | mlog_errno(ret); | ||
1458 | goto out; | ||
1459 | } | ||
1460 | |||
1461 | ret = ocfs2_journal_access_rb(handle, ci, ref_leaf_bh, | ||
1462 | OCFS2_JOURNAL_ACCESS_WRITE); | ||
1463 | if (ret) { | ||
1464 | mlog_errno(ret); | ||
1465 | goto out; | ||
1466 | } | ||
1467 | |||
1468 | ret = ocfs2_claim_metadata(OCFS2_SB(sb), handle, meta_ac, 1, | ||
1469 | &suballoc_bit_start, &num_got, | ||
1470 | &blkno); | ||
1471 | if (ret) { | ||
1472 | mlog_errno(ret); | ||
1473 | goto out; | ||
1474 | } | ||
1475 | |||
1476 | new_bh = sb_getblk(sb, blkno); | ||
1477 | if (new_bh == NULL) { | ||
1478 | ret = -EIO; | ||
1479 | mlog_errno(ret); | ||
1480 | goto out; | ||
1481 | } | ||
1482 | ocfs2_set_new_buffer_uptodate(ci, new_bh); | ||
1483 | |||
1484 | ret = ocfs2_journal_access_rb(handle, ci, new_bh, | ||
1485 | OCFS2_JOURNAL_ACCESS_CREATE); | ||
1486 | if (ret) { | ||
1487 | mlog_errno(ret); | ||
1488 | goto out; | ||
1489 | } | ||
1490 | |||
1491 | /* Initialize ocfs2_refcount_block. */ | ||
1492 | new_rb = (struct ocfs2_refcount_block *)new_bh->b_data; | ||
1493 | memset(new_rb, 0, sb->s_blocksize); | ||
1494 | strcpy((void *)new_rb, OCFS2_REFCOUNT_BLOCK_SIGNATURE); | ||
1495 | new_rb->rf_suballoc_slot = cpu_to_le16(OCFS2_SB(sb)->slot_num); | ||
1496 | new_rb->rf_suballoc_bit = cpu_to_le16(suballoc_bit_start); | ||
1497 | new_rb->rf_fs_generation = cpu_to_le32(OCFS2_SB(sb)->fs_generation); | ||
1498 | new_rb->rf_blkno = cpu_to_le64(blkno); | ||
1499 | new_rb->rf_parent = cpu_to_le64(ref_root_bh->b_blocknr); | ||
1500 | new_rb->rf_flags = cpu_to_le32(OCFS2_REFCOUNT_LEAF_FL); | ||
1501 | new_rb->rf_records.rl_count = | ||
1502 | cpu_to_le16(ocfs2_refcount_recs_per_rb(sb)); | ||
1503 | new_rb->rf_generation = root_rb->rf_generation; | ||
1504 | |||
1505 | ret = ocfs2_divide_leaf_refcount_block(ref_leaf_bh, new_bh, &new_cpos); | ||
1506 | if (ret) { | ||
1507 | mlog_errno(ret); | ||
1508 | goto out; | ||
1509 | } | ||
1510 | |||
1511 | ocfs2_journal_dirty(handle, ref_leaf_bh); | ||
1512 | ocfs2_journal_dirty(handle, new_bh); | ||
1513 | |||
1514 | ocfs2_init_refcount_extent_tree(&ref_et, ci, ref_root_bh); | ||
1515 | |||
1516 | mlog(0, "insert new leaf block %llu at %u\n", | ||
1517 | (unsigned long long)new_bh->b_blocknr, new_cpos); | ||
1518 | |||
1519 | /* Insert the new leaf block with the specific offset cpos. */ | ||
1520 | ret = ocfs2_insert_extent(handle, &ref_et, new_cpos, new_bh->b_blocknr, | ||
1521 | 1, 0, meta_ac); | ||
1522 | if (ret) | ||
1523 | mlog_errno(ret); | ||
1524 | |||
1525 | out: | ||
1526 | brelse(new_bh); | ||
1527 | return ret; | ||
1528 | } | ||
1529 | |||
1530 | static int ocfs2_expand_refcount_tree(handle_t *handle, | ||
1531 | struct ocfs2_caching_info *ci, | ||
1532 | struct buffer_head *ref_root_bh, | ||
1533 | struct buffer_head *ref_leaf_bh, | ||
1534 | struct ocfs2_alloc_context *meta_ac) | ||
1535 | { | ||
1536 | int ret; | ||
1537 | struct buffer_head *expand_bh = NULL; | ||
1538 | |||
1539 | if (ref_root_bh == ref_leaf_bh) { | ||
1540 | /* | ||
1541 | * the old root bh hasn't been expanded to a b-tree, | ||
1542 | * so expand it first. | ||
1543 | */ | ||
1544 | ret = ocfs2_expand_inline_ref_root(handle, ci, ref_root_bh, | ||
1545 | &expand_bh, meta_ac); | ||
1546 | if (ret) { | ||
1547 | mlog_errno(ret); | ||
1548 | goto out; | ||
1549 | } | ||
1550 | } else { | ||
1551 | expand_bh = ref_leaf_bh; | ||
1552 | get_bh(expand_bh); | ||
1553 | } | ||
1554 | |||
1555 | |||
1556 | /* Now add a new refcount block into the tree.*/ | ||
1557 | ret = ocfs2_new_leaf_refcount_block(handle, ci, ref_root_bh, | ||
1558 | expand_bh, meta_ac); | ||
1559 | if (ret) | ||
1560 | mlog_errno(ret); | ||
1561 | out: | ||
1562 | brelse(expand_bh); | ||
1563 | return ret; | ||
1564 | } | ||
1565 | |||
1566 | /* | ||
1567 | * Adjust the extent rec in b-tree representing ref_leaf_bh. | ||
1568 | * | ||
1569 | * Only called when we have inserted a new refcount rec at index 0 | ||
1570 | * which means ocfs2_extent_rec.e_cpos may need some change. | ||
1571 | */ | ||
1572 | static int ocfs2_adjust_refcount_rec(handle_t *handle, | ||
1573 | struct ocfs2_caching_info *ci, | ||
1574 | struct buffer_head *ref_root_bh, | ||
1575 | struct buffer_head *ref_leaf_bh, | ||
1576 | struct ocfs2_refcount_rec *rec) | ||
1577 | { | ||
1578 | int ret = 0, i; | ||
1579 | u32 new_cpos, old_cpos; | ||
1580 | struct ocfs2_path *path = NULL; | ||
1581 | struct ocfs2_extent_tree et; | ||
1582 | struct ocfs2_refcount_block *rb = | ||
1583 | (struct ocfs2_refcount_block *)ref_root_bh->b_data; | ||
1584 | struct ocfs2_extent_list *el; | ||
1585 | |||
1586 | if (!(le32_to_cpu(rb->rf_flags) & OCFS2_REFCOUNT_TREE_FL)) | ||
1587 | goto out; | ||
1588 | |||
1589 | rb = (struct ocfs2_refcount_block *)ref_leaf_bh->b_data; | ||
1590 | old_cpos = le32_to_cpu(rb->rf_cpos); | ||
1591 | new_cpos = le64_to_cpu(rec->r_cpos) & OCFS2_32BIT_POS_MASK; | ||
1592 | if (old_cpos <= new_cpos) | ||
1593 | goto out; | ||
1594 | |||
1595 | ocfs2_init_refcount_extent_tree(&et, ci, ref_root_bh); | ||
1596 | |||
1597 | path = ocfs2_new_path_from_et(&et); | ||
1598 | if (!path) { | ||
1599 | ret = -ENOMEM; | ||
1600 | mlog_errno(ret); | ||
1601 | goto out; | ||
1602 | } | ||
1603 | |||
1604 | ret = ocfs2_find_path(ci, path, old_cpos); | ||
1605 | if (ret) { | ||
1606 | mlog_errno(ret); | ||
1607 | goto out; | ||
1608 | } | ||
1609 | |||
1610 | /* | ||
1611 | * 2 more credits, one for the leaf refcount block, one for | ||
1612 | * the extent block contains the extent rec. | ||
1613 | */ | ||
1614 | ret = ocfs2_extend_trans(handle, handle->h_buffer_credits + 2); | ||
1615 | if (ret < 0) { | ||
1616 | mlog_errno(ret); | ||
1617 | goto out; | ||
1618 | } | ||
1619 | |||
1620 | ret = ocfs2_journal_access_rb(handle, ci, ref_leaf_bh, | ||
1621 | OCFS2_JOURNAL_ACCESS_WRITE); | ||
1622 | if (ret < 0) { | ||
1623 | mlog_errno(ret); | ||
1624 | goto out; | ||
1625 | } | ||
1626 | |||
1627 | ret = ocfs2_journal_access_eb(handle, ci, path_leaf_bh(path), | ||
1628 | OCFS2_JOURNAL_ACCESS_WRITE); | ||
1629 | if (ret < 0) { | ||
1630 | mlog_errno(ret); | ||
1631 | goto out; | ||
1632 | } | ||
1633 | |||
1634 | /* change the leaf extent block first. */ | ||
1635 | el = path_leaf_el(path); | ||
1636 | |||
1637 | for (i = 0; i < le16_to_cpu(el->l_next_free_rec); i++) | ||
1638 | if (le32_to_cpu(el->l_recs[i].e_cpos) == old_cpos) | ||
1639 | break; | ||
1640 | |||
1641 | BUG_ON(i == le16_to_cpu(el->l_next_free_rec)); | ||
1642 | |||
1643 | el->l_recs[i].e_cpos = cpu_to_le32(new_cpos); | ||
1644 | |||
1645 | /* change the r_cpos in the leaf block. */ | ||
1646 | rb->rf_cpos = cpu_to_le32(new_cpos); | ||
1647 | |||
1648 | ocfs2_journal_dirty(handle, path_leaf_bh(path)); | ||
1649 | ocfs2_journal_dirty(handle, ref_leaf_bh); | ||
1650 | |||
1651 | out: | ||
1652 | ocfs2_free_path(path); | ||
1653 | return ret; | ||
1654 | } | ||
1655 | |||
1656 | static int ocfs2_insert_refcount_rec(handle_t *handle, | ||
1657 | struct ocfs2_caching_info *ci, | ||
1658 | struct buffer_head *ref_root_bh, | ||
1659 | struct buffer_head *ref_leaf_bh, | ||
1660 | struct ocfs2_refcount_rec *rec, | ||
1661 | int index, int merge, | ||
1662 | struct ocfs2_alloc_context *meta_ac) | ||
1663 | { | ||
1664 | int ret; | ||
1665 | struct ocfs2_refcount_block *rb = | ||
1666 | (struct ocfs2_refcount_block *)ref_leaf_bh->b_data; | ||
1667 | struct ocfs2_refcount_list *rf_list = &rb->rf_records; | ||
1668 | struct buffer_head *new_bh = NULL; | ||
1669 | |||
1670 | BUG_ON(le32_to_cpu(rb->rf_flags) & OCFS2_REFCOUNT_TREE_FL); | ||
1671 | |||
1672 | if (rf_list->rl_used == rf_list->rl_count) { | ||
1673 | u64 cpos = le64_to_cpu(rec->r_cpos); | ||
1674 | u32 len = le32_to_cpu(rec->r_clusters); | ||
1675 | |||
1676 | ret = ocfs2_expand_refcount_tree(handle, ci, ref_root_bh, | ||
1677 | ref_leaf_bh, meta_ac); | ||
1678 | if (ret) { | ||
1679 | mlog_errno(ret); | ||
1680 | goto out; | ||
1681 | } | ||
1682 | |||
1683 | ret = ocfs2_get_refcount_rec(ci, ref_root_bh, | ||
1684 | cpos, len, NULL, &index, | ||
1685 | &new_bh); | ||
1686 | if (ret) { | ||
1687 | mlog_errno(ret); | ||
1688 | goto out; | ||
1689 | } | ||
1690 | |||
1691 | ref_leaf_bh = new_bh; | ||
1692 | rb = (struct ocfs2_refcount_block *)ref_leaf_bh->b_data; | ||
1693 | rf_list = &rb->rf_records; | ||
1694 | } | ||
1695 | |||
1696 | ret = ocfs2_journal_access_rb(handle, ci, ref_leaf_bh, | ||
1697 | OCFS2_JOURNAL_ACCESS_WRITE); | ||
1698 | if (ret) { | ||
1699 | mlog_errno(ret); | ||
1700 | goto out; | ||
1701 | } | ||
1702 | |||
1703 | if (index < le16_to_cpu(rf_list->rl_used)) | ||
1704 | memmove(&rf_list->rl_recs[index + 1], | ||
1705 | &rf_list->rl_recs[index], | ||
1706 | (le16_to_cpu(rf_list->rl_used) - index) * | ||
1707 | sizeof(struct ocfs2_refcount_rec)); | ||
1708 | |||
1709 | mlog(0, "insert refcount record start %llu, len %u, count %u " | ||
1710 | "to leaf block %llu at index %d\n", | ||
1711 | (unsigned long long)le64_to_cpu(rec->r_cpos), | ||
1712 | le32_to_cpu(rec->r_clusters), le32_to_cpu(rec->r_refcount), | ||
1713 | (unsigned long long)ref_leaf_bh->b_blocknr, index); | ||
1714 | |||
1715 | rf_list->rl_recs[index] = *rec; | ||
1716 | |||
1717 | le16_add_cpu(&rf_list->rl_used, 1); | ||
1718 | |||
1719 | if (merge) | ||
1720 | ocfs2_refcount_rec_merge(rb, index); | ||
1721 | |||
1722 | ret = ocfs2_journal_dirty(handle, ref_leaf_bh); | ||
1723 | if (ret) { | ||
1724 | mlog_errno(ret); | ||
1725 | goto out; | ||
1726 | } | ||
1727 | |||
1728 | if (index == 0) { | ||
1729 | ret = ocfs2_adjust_refcount_rec(handle, ci, | ||
1730 | ref_root_bh, | ||
1731 | ref_leaf_bh, rec); | ||
1732 | if (ret) | ||
1733 | mlog_errno(ret); | ||
1734 | } | ||
1735 | out: | ||
1736 | brelse(new_bh); | ||
1737 | return ret; | ||
1738 | } | ||
1739 | |||
1740 | /* | ||
1741 | * Split the refcount_rec indexed by "index" in ref_leaf_bh. | ||
1742 | * This is much simple than our b-tree code. | ||
1743 | * split_rec is the new refcount rec we want to insert. | ||
1744 | * If split_rec->r_refcount > 0, we are changing the refcount(in case we | ||
1745 | * increase refcount or decrease a refcount to non-zero). | ||
1746 | * If split_rec->r_refcount == 0, we are punching a hole in current refcount | ||
1747 | * rec( in case we decrease a refcount to zero). | ||
1748 | */ | ||
1749 | static int ocfs2_split_refcount_rec(handle_t *handle, | ||
1750 | struct ocfs2_caching_info *ci, | ||
1751 | struct buffer_head *ref_root_bh, | ||
1752 | struct buffer_head *ref_leaf_bh, | ||
1753 | struct ocfs2_refcount_rec *split_rec, | ||
1754 | int index, int merge, | ||
1755 | struct ocfs2_alloc_context *meta_ac, | ||
1756 | struct ocfs2_cached_dealloc_ctxt *dealloc) | ||
1757 | { | ||
1758 | int ret, recs_need; | ||
1759 | u32 len; | ||
1760 | struct ocfs2_refcount_block *rb = | ||
1761 | (struct ocfs2_refcount_block *)ref_leaf_bh->b_data; | ||
1762 | struct ocfs2_refcount_list *rf_list = &rb->rf_records; | ||
1763 | struct ocfs2_refcount_rec *orig_rec = &rf_list->rl_recs[index]; | ||
1764 | struct ocfs2_refcount_rec *tail_rec = NULL; | ||
1765 | struct buffer_head *new_bh = NULL; | ||
1766 | |||
1767 | BUG_ON(le32_to_cpu(rb->rf_flags) & OCFS2_REFCOUNT_TREE_FL); | ||
1768 | |||
1769 | mlog(0, "original r_pos %llu, cluster %u, split %llu, cluster %u\n", | ||
1770 | le64_to_cpu(orig_rec->r_cpos), le32_to_cpu(orig_rec->r_clusters), | ||
1771 | le64_to_cpu(split_rec->r_cpos), | ||
1772 | le32_to_cpu(split_rec->r_clusters)); | ||
1773 | |||
1774 | /* | ||
1775 | * If we just need to split the header or tail clusters, | ||
1776 | * no more recs are needed, just split is OK. | ||
1777 | * Otherwise we at least need one new recs. | ||
1778 | */ | ||
1779 | if (!split_rec->r_refcount && | ||
1780 | (split_rec->r_cpos == orig_rec->r_cpos || | ||
1781 | le64_to_cpu(split_rec->r_cpos) + | ||
1782 | le32_to_cpu(split_rec->r_clusters) == | ||
1783 | le64_to_cpu(orig_rec->r_cpos) + le32_to_cpu(orig_rec->r_clusters))) | ||
1784 | recs_need = 0; | ||
1785 | else | ||
1786 | recs_need = 1; | ||
1787 | |||
1788 | /* | ||
1789 | * We need one more rec if we split in the middle and the new rec have | ||
1790 | * some refcount in it. | ||
1791 | */ | ||
1792 | if (split_rec->r_refcount && | ||
1793 | (split_rec->r_cpos != orig_rec->r_cpos && | ||
1794 | le64_to_cpu(split_rec->r_cpos) + | ||
1795 | le32_to_cpu(split_rec->r_clusters) != | ||
1796 | le64_to_cpu(orig_rec->r_cpos) + le32_to_cpu(orig_rec->r_clusters))) | ||
1797 | recs_need++; | ||
1798 | |||
1799 | /* If the leaf block don't have enough record, expand it. */ | ||
1800 | if (le16_to_cpu(rf_list->rl_used) + recs_need > rf_list->rl_count) { | ||
1801 | struct ocfs2_refcount_rec tmp_rec; | ||
1802 | u64 cpos = le64_to_cpu(orig_rec->r_cpos); | ||
1803 | len = le32_to_cpu(orig_rec->r_clusters); | ||
1804 | ret = ocfs2_expand_refcount_tree(handle, ci, ref_root_bh, | ||
1805 | ref_leaf_bh, meta_ac); | ||
1806 | if (ret) { | ||
1807 | mlog_errno(ret); | ||
1808 | goto out; | ||
1809 | } | ||
1810 | |||
1811 | /* | ||
1812 | * We have to re-get it since now cpos may be moved to | ||
1813 | * another leaf block. | ||
1814 | */ | ||
1815 | ret = ocfs2_get_refcount_rec(ci, ref_root_bh, | ||
1816 | cpos, len, &tmp_rec, &index, | ||
1817 | &new_bh); | ||
1818 | if (ret) { | ||
1819 | mlog_errno(ret); | ||
1820 | goto out; | ||
1821 | } | ||
1822 | |||
1823 | ref_leaf_bh = new_bh; | ||
1824 | rb = (struct ocfs2_refcount_block *)ref_leaf_bh->b_data; | ||
1825 | rf_list = &rb->rf_records; | ||
1826 | orig_rec = &rf_list->rl_recs[index]; | ||
1827 | } | ||
1828 | |||
1829 | ret = ocfs2_journal_access_rb(handle, ci, ref_leaf_bh, | ||
1830 | OCFS2_JOURNAL_ACCESS_WRITE); | ||
1831 | if (ret) { | ||
1832 | mlog_errno(ret); | ||
1833 | goto out; | ||
1834 | } | ||
1835 | |||
1836 | /* | ||
1837 | * We have calculated out how many new records we need and store | ||
1838 | * in recs_need, so spare enough space first by moving the records | ||
1839 | * after "index" to the end. | ||
1840 | */ | ||
1841 | if (index != le16_to_cpu(rf_list->rl_used) - 1) | ||
1842 | memmove(&rf_list->rl_recs[index + 1 + recs_need], | ||
1843 | &rf_list->rl_recs[index + 1], | ||
1844 | (le16_to_cpu(rf_list->rl_used) - index - 1) * | ||
1845 | sizeof(struct ocfs2_refcount_rec)); | ||
1846 | |||
1847 | len = (le64_to_cpu(orig_rec->r_cpos) + | ||
1848 | le32_to_cpu(orig_rec->r_clusters)) - | ||
1849 | (le64_to_cpu(split_rec->r_cpos) + | ||
1850 | le32_to_cpu(split_rec->r_clusters)); | ||
1851 | |||
1852 | /* | ||
1853 | * If we have "len", the we will split in the tail and move it | ||
1854 | * to the end of the space we have just spared. | ||
1855 | */ | ||
1856 | if (len) { | ||
1857 | tail_rec = &rf_list->rl_recs[index + recs_need]; | ||
1858 | |||
1859 | memcpy(tail_rec, orig_rec, sizeof(struct ocfs2_refcount_rec)); | ||
1860 | le64_add_cpu(&tail_rec->r_cpos, | ||
1861 | le32_to_cpu(tail_rec->r_clusters) - len); | ||
1862 | tail_rec->r_clusters = le32_to_cpu(len); | ||
1863 | } | ||
1864 | |||
1865 | /* | ||
1866 | * If the split pos isn't the same as the original one, we need to | ||
1867 | * split in the head. | ||
1868 | * | ||
1869 | * Note: We have the chance that split_rec.r_refcount = 0, | ||
1870 | * recs_need = 0 and len > 0, which means we just cut the head from | ||
1871 | * the orig_rec and in that case we have done some modification in | ||
1872 | * orig_rec above, so the check for r_cpos is faked. | ||
1873 | */ | ||
1874 | if (split_rec->r_cpos != orig_rec->r_cpos && tail_rec != orig_rec) { | ||
1875 | len = le64_to_cpu(split_rec->r_cpos) - | ||
1876 | le64_to_cpu(orig_rec->r_cpos); | ||
1877 | orig_rec->r_clusters = cpu_to_le32(len); | ||
1878 | index++; | ||
1879 | } | ||
1880 | |||
1881 | le16_add_cpu(&rf_list->rl_used, recs_need); | ||
1882 | |||
1883 | if (split_rec->r_refcount) { | ||
1884 | rf_list->rl_recs[index] = *split_rec; | ||
1885 | mlog(0, "insert refcount record start %llu, len %u, count %u " | ||
1886 | "to leaf block %llu at index %d\n", | ||
1887 | (unsigned long long)le64_to_cpu(split_rec->r_cpos), | ||
1888 | le32_to_cpu(split_rec->r_clusters), | ||
1889 | le32_to_cpu(split_rec->r_refcount), | ||
1890 | (unsigned long long)ref_leaf_bh->b_blocknr, index); | ||
1891 | |||
1892 | if (merge) | ||
1893 | ocfs2_refcount_rec_merge(rb, index); | ||
1894 | } | ||
1895 | |||
1896 | ret = ocfs2_journal_dirty(handle, ref_leaf_bh); | ||
1897 | if (ret) | ||
1898 | mlog_errno(ret); | ||
1899 | |||
1900 | out: | ||
1901 | brelse(new_bh); | ||
1902 | return ret; | ||
1903 | } | ||
1904 | |||
1905 | static int __ocfs2_increase_refcount(handle_t *handle, | ||
1906 | struct ocfs2_caching_info *ci, | ||
1907 | struct buffer_head *ref_root_bh, | ||
1908 | u64 cpos, u32 len, int merge, | ||
1909 | struct ocfs2_alloc_context *meta_ac, | ||
1910 | struct ocfs2_cached_dealloc_ctxt *dealloc) | ||
1911 | { | ||
1912 | int ret = 0, index; | ||
1913 | struct buffer_head *ref_leaf_bh = NULL; | ||
1914 | struct ocfs2_refcount_rec rec; | ||
1915 | unsigned int set_len = 0; | ||
1916 | |||
1917 | mlog(0, "Tree owner %llu, add refcount start %llu, len %u\n", | ||
1918 | (unsigned long long)ocfs2_metadata_cache_owner(ci), | ||
1919 | (unsigned long long)cpos, len); | ||
1920 | |||
1921 | while (len) { | ||
1922 | ret = ocfs2_get_refcount_rec(ci, ref_root_bh, | ||
1923 | cpos, len, &rec, &index, | ||
1924 | &ref_leaf_bh); | ||
1925 | if (ret) { | ||
1926 | mlog_errno(ret); | ||
1927 | goto out; | ||
1928 | } | ||
1929 | |||
1930 | set_len = le32_to_cpu(rec.r_clusters); | ||
1931 | |||
1932 | /* | ||
1933 | * Here we may meet with 3 situations: | ||
1934 | * | ||
1935 | * 1. If we find an already existing record, and the length | ||
1936 | * is the same, cool, we just need to increase the r_refcount | ||
1937 | * and it is OK. | ||
1938 | * 2. If we find a hole, just insert it with r_refcount = 1. | ||
1939 | * 3. If we are in the middle of one extent record, split | ||
1940 | * it. | ||
1941 | */ | ||
1942 | if (rec.r_refcount && le64_to_cpu(rec.r_cpos) == cpos && | ||
1943 | set_len <= len) { | ||
1944 | mlog(0, "increase refcount rec, start %llu, len %u, " | ||
1945 | "count %u\n", (unsigned long long)cpos, set_len, | ||
1946 | le32_to_cpu(rec.r_refcount)); | ||
1947 | ret = ocfs2_change_refcount_rec(handle, ci, | ||
1948 | ref_leaf_bh, index, | ||
1949 | merge, 1); | ||
1950 | if (ret) { | ||
1951 | mlog_errno(ret); | ||
1952 | goto out; | ||
1953 | } | ||
1954 | } else if (!rec.r_refcount) { | ||
1955 | rec.r_refcount = cpu_to_le32(1); | ||
1956 | |||
1957 | mlog(0, "insert refcount rec, start %llu, len %u\n", | ||
1958 | (unsigned long long)le64_to_cpu(rec.r_cpos), | ||
1959 | set_len); | ||
1960 | ret = ocfs2_insert_refcount_rec(handle, ci, ref_root_bh, | ||
1961 | ref_leaf_bh, | ||
1962 | &rec, index, | ||
1963 | merge, meta_ac); | ||
1964 | if (ret) { | ||
1965 | mlog_errno(ret); | ||
1966 | goto out; | ||
1967 | } | ||
1968 | } else { | ||
1969 | set_len = min((u64)(cpos + len), | ||
1970 | le64_to_cpu(rec.r_cpos) + set_len) - cpos; | ||
1971 | rec.r_cpos = cpu_to_le64(cpos); | ||
1972 | rec.r_clusters = cpu_to_le32(set_len); | ||
1973 | le32_add_cpu(&rec.r_refcount, 1); | ||
1974 | |||
1975 | mlog(0, "split refcount rec, start %llu, " | ||
1976 | "len %u, count %u\n", | ||
1977 | (unsigned long long)le64_to_cpu(rec.r_cpos), | ||
1978 | set_len, le32_to_cpu(rec.r_refcount)); | ||
1979 | ret = ocfs2_split_refcount_rec(handle, ci, | ||
1980 | ref_root_bh, ref_leaf_bh, | ||
1981 | &rec, index, merge, | ||
1982 | meta_ac, dealloc); | ||
1983 | if (ret) { | ||
1984 | mlog_errno(ret); | ||
1985 | goto out; | ||
1986 | } | ||
1987 | } | ||
1988 | |||
1989 | cpos += set_len; | ||
1990 | len -= set_len; | ||
1991 | brelse(ref_leaf_bh); | ||
1992 | ref_leaf_bh = NULL; | ||
1993 | } | ||
1994 | |||
1995 | out: | ||
1996 | brelse(ref_leaf_bh); | ||
1997 | return ret; | ||
1998 | } | ||
1999 | |||
2000 | static int ocfs2_remove_refcount_extent(handle_t *handle, | ||
2001 | struct ocfs2_caching_info *ci, | ||
2002 | struct buffer_head *ref_root_bh, | ||
2003 | struct buffer_head *ref_leaf_bh, | ||
2004 | struct ocfs2_alloc_context *meta_ac, | ||
2005 | struct ocfs2_cached_dealloc_ctxt *dealloc) | ||
2006 | { | ||
2007 | int ret; | ||
2008 | struct super_block *sb = ocfs2_metadata_cache_get_super(ci); | ||
2009 | struct ocfs2_refcount_block *rb = | ||
2010 | (struct ocfs2_refcount_block *)ref_leaf_bh->b_data; | ||
2011 | struct ocfs2_extent_tree et; | ||
2012 | |||
2013 | BUG_ON(rb->rf_records.rl_used); | ||
2014 | |||
2015 | ocfs2_init_refcount_extent_tree(&et, ci, ref_root_bh); | ||
2016 | ret = ocfs2_remove_extent(handle, &et, le32_to_cpu(rb->rf_cpos), | ||
2017 | 1, meta_ac, dealloc); | ||
2018 | if (ret) { | ||
2019 | mlog_errno(ret); | ||
2020 | goto out; | ||
2021 | } | ||
2022 | |||
2023 | ocfs2_remove_from_cache(ci, ref_leaf_bh); | ||
2024 | |||
2025 | /* | ||
2026 | * add the freed block to the dealloc so that it will be freed | ||
2027 | * when we run dealloc. | ||
2028 | */ | ||
2029 | ret = ocfs2_cache_block_dealloc(dealloc, EXTENT_ALLOC_SYSTEM_INODE, | ||
2030 | le16_to_cpu(rb->rf_suballoc_slot), | ||
2031 | le64_to_cpu(rb->rf_blkno), | ||
2032 | le16_to_cpu(rb->rf_suballoc_bit)); | ||
2033 | if (ret) { | ||
2034 | mlog_errno(ret); | ||
2035 | goto out; | ||
2036 | } | ||
2037 | |||
2038 | ret = ocfs2_journal_access_rb(handle, ci, ref_root_bh, | ||
2039 | OCFS2_JOURNAL_ACCESS_WRITE); | ||
2040 | if (ret) { | ||
2041 | mlog_errno(ret); | ||
2042 | goto out; | ||
2043 | } | ||
2044 | |||
2045 | rb = (struct ocfs2_refcount_block *)ref_root_bh->b_data; | ||
2046 | |||
2047 | le32_add_cpu(&rb->rf_clusters, -1); | ||
2048 | |||
2049 | /* | ||
2050 | * check whether we need to restore the root refcount block if | ||
2051 | * there is no leaf extent block at atll. | ||
2052 | */ | ||
2053 | if (!rb->rf_list.l_next_free_rec) { | ||
2054 | BUG_ON(rb->rf_clusters); | ||
2055 | |||
2056 | mlog(0, "reset refcount tree root %llu to be a record block.\n", | ||
2057 | (unsigned long long)ref_root_bh->b_blocknr); | ||
2058 | |||
2059 | rb->rf_flags = 0; | ||
2060 | rb->rf_parent = 0; | ||
2061 | rb->rf_cpos = 0; | ||
2062 | memset(&rb->rf_records, 0, sb->s_blocksize - | ||
2063 | offsetof(struct ocfs2_refcount_block, rf_records)); | ||
2064 | rb->rf_records.rl_count = | ||
2065 | cpu_to_le16(ocfs2_refcount_recs_per_rb(sb)); | ||
2066 | } | ||
2067 | |||
2068 | ocfs2_journal_dirty(handle, ref_root_bh); | ||
2069 | |||
2070 | out: | ||
2071 | return ret; | ||
2072 | } | ||
2073 | |||
2074 | int ocfs2_increase_refcount(handle_t *handle, | ||
2075 | struct ocfs2_caching_info *ci, | ||
2076 | struct buffer_head *ref_root_bh, | ||
2077 | u64 cpos, u32 len, | ||
2078 | struct ocfs2_alloc_context *meta_ac, | ||
2079 | struct ocfs2_cached_dealloc_ctxt *dealloc) | ||
2080 | { | ||
2081 | return __ocfs2_increase_refcount(handle, ci, ref_root_bh, | ||
2082 | cpos, len, 1, | ||
2083 | meta_ac, dealloc); | ||
2084 | } | ||
2085 | |||
2086 | static int ocfs2_decrease_refcount_rec(handle_t *handle, | ||
2087 | struct ocfs2_caching_info *ci, | ||
2088 | struct buffer_head *ref_root_bh, | ||
2089 | struct buffer_head *ref_leaf_bh, | ||
2090 | int index, u64 cpos, unsigned int len, | ||
2091 | struct ocfs2_alloc_context *meta_ac, | ||
2092 | struct ocfs2_cached_dealloc_ctxt *dealloc) | ||
2093 | { | ||
2094 | int ret; | ||
2095 | struct ocfs2_refcount_block *rb = | ||
2096 | (struct ocfs2_refcount_block *)ref_leaf_bh->b_data; | ||
2097 | struct ocfs2_refcount_rec *rec = &rb->rf_records.rl_recs[index]; | ||
2098 | |||
2099 | BUG_ON(cpos < le64_to_cpu(rec->r_cpos)); | ||
2100 | BUG_ON(cpos + len > | ||
2101 | le64_to_cpu(rec->r_cpos) + le32_to_cpu(rec->r_clusters)); | ||
2102 | |||
2103 | if (cpos == le64_to_cpu(rec->r_cpos) && | ||
2104 | len == le32_to_cpu(rec->r_clusters)) | ||
2105 | ret = ocfs2_change_refcount_rec(handle, ci, | ||
2106 | ref_leaf_bh, index, 1, -1); | ||
2107 | else { | ||
2108 | struct ocfs2_refcount_rec split = *rec; | ||
2109 | split.r_cpos = cpu_to_le64(cpos); | ||
2110 | split.r_clusters = cpu_to_le32(len); | ||
2111 | |||
2112 | le32_add_cpu(&split.r_refcount, -1); | ||
2113 | |||
2114 | mlog(0, "split refcount rec, start %llu, " | ||
2115 | "len %u, count %u, original start %llu, len %u\n", | ||
2116 | (unsigned long long)le64_to_cpu(split.r_cpos), | ||
2117 | len, le32_to_cpu(split.r_refcount), | ||
2118 | (unsigned long long)le64_to_cpu(rec->r_cpos), | ||
2119 | le32_to_cpu(rec->r_clusters)); | ||
2120 | ret = ocfs2_split_refcount_rec(handle, ci, | ||
2121 | ref_root_bh, ref_leaf_bh, | ||
2122 | &split, index, 1, | ||
2123 | meta_ac, dealloc); | ||
2124 | } | ||
2125 | |||
2126 | if (ret) { | ||
2127 | mlog_errno(ret); | ||
2128 | goto out; | ||
2129 | } | ||
2130 | |||
2131 | /* Remove the leaf refcount block if it contains no refcount record. */ | ||
2132 | if (!rb->rf_records.rl_used && ref_leaf_bh != ref_root_bh) { | ||
2133 | ret = ocfs2_remove_refcount_extent(handle, ci, ref_root_bh, | ||
2134 | ref_leaf_bh, meta_ac, | ||
2135 | dealloc); | ||
2136 | if (ret) | ||
2137 | mlog_errno(ret); | ||
2138 | } | ||
2139 | |||
2140 | out: | ||
2141 | return ret; | ||
2142 | } | ||
2143 | |||
2144 | static int __ocfs2_decrease_refcount(handle_t *handle, | ||
2145 | struct ocfs2_caching_info *ci, | ||
2146 | struct buffer_head *ref_root_bh, | ||
2147 | u64 cpos, u32 len, | ||
2148 | struct ocfs2_alloc_context *meta_ac, | ||
2149 | struct ocfs2_cached_dealloc_ctxt *dealloc, | ||
2150 | int delete) | ||
2151 | { | ||
2152 | int ret = 0, index = 0; | ||
2153 | struct ocfs2_refcount_rec rec; | ||
2154 | unsigned int r_count = 0, r_len; | ||
2155 | struct super_block *sb = ocfs2_metadata_cache_get_super(ci); | ||
2156 | struct buffer_head *ref_leaf_bh = NULL; | ||
2157 | |||
2158 | mlog(0, "Tree owner %llu, decrease refcount start %llu, " | ||
2159 | "len %u, delete %u\n", | ||
2160 | (unsigned long long)ocfs2_metadata_cache_owner(ci), | ||
2161 | (unsigned long long)cpos, len, delete); | ||
2162 | |||
2163 | while (len) { | ||
2164 | ret = ocfs2_get_refcount_rec(ci, ref_root_bh, | ||
2165 | cpos, len, &rec, &index, | ||
2166 | &ref_leaf_bh); | ||
2167 | if (ret) { | ||
2168 | mlog_errno(ret); | ||
2169 | goto out; | ||
2170 | } | ||
2171 | |||
2172 | r_count = le32_to_cpu(rec.r_refcount); | ||
2173 | BUG_ON(r_count == 0); | ||
2174 | if (!delete) | ||
2175 | BUG_ON(r_count > 1); | ||
2176 | |||
2177 | r_len = min((u64)(cpos + len), le64_to_cpu(rec.r_cpos) + | ||
2178 | le32_to_cpu(rec.r_clusters)) - cpos; | ||
2179 | |||
2180 | ret = ocfs2_decrease_refcount_rec(handle, ci, ref_root_bh, | ||
2181 | ref_leaf_bh, index, | ||
2182 | cpos, r_len, | ||
2183 | meta_ac, dealloc); | ||
2184 | if (ret) { | ||
2185 | mlog_errno(ret); | ||
2186 | goto out; | ||
2187 | } | ||
2188 | |||
2189 | if (le32_to_cpu(rec.r_refcount) == 1 && delete) { | ||
2190 | ret = ocfs2_cache_cluster_dealloc(dealloc, | ||
2191 | ocfs2_clusters_to_blocks(sb, cpos), | ||
2192 | r_len); | ||
2193 | if (ret) { | ||
2194 | mlog_errno(ret); | ||
2195 | goto out; | ||
2196 | } | ||
2197 | } | ||
2198 | |||
2199 | cpos += r_len; | ||
2200 | len -= r_len; | ||
2201 | brelse(ref_leaf_bh); | ||
2202 | ref_leaf_bh = NULL; | ||
2203 | } | ||
2204 | |||
2205 | out: | ||
2206 | brelse(ref_leaf_bh); | ||
2207 | return ret; | ||
2208 | } | ||
2209 | |||
2210 | /* Caller must hold refcount tree lock. */ | ||
2211 | int ocfs2_decrease_refcount(struct inode *inode, | ||
2212 | handle_t *handle, u32 cpos, u32 len, | ||
2213 | struct ocfs2_alloc_context *meta_ac, | ||
2214 | struct ocfs2_cached_dealloc_ctxt *dealloc, | ||
2215 | int delete) | ||
2216 | { | ||
2217 | int ret; | ||
2218 | u64 ref_blkno; | ||
2219 | struct ocfs2_inode_info *oi = OCFS2_I(inode); | ||
2220 | struct buffer_head *ref_root_bh = NULL; | ||
2221 | struct ocfs2_refcount_tree *tree; | ||
2222 | |||
2223 | BUG_ON(!(oi->ip_dyn_features & OCFS2_HAS_REFCOUNT_FL)); | ||
2224 | |||
2225 | ret = ocfs2_get_refcount_block(inode, &ref_blkno); | ||
2226 | if (ret) { | ||
2227 | mlog_errno(ret); | ||
2228 | goto out; | ||
2229 | } | ||
2230 | |||
2231 | ret = ocfs2_get_refcount_tree(OCFS2_SB(inode->i_sb), ref_blkno, &tree); | ||
2232 | if (ret) { | ||
2233 | mlog_errno(ret); | ||
2234 | goto out; | ||
2235 | } | ||
2236 | |||
2237 | ret = ocfs2_read_refcount_block(&tree->rf_ci, tree->rf_blkno, | ||
2238 | &ref_root_bh); | ||
2239 | if (ret) { | ||
2240 | mlog_errno(ret); | ||
2241 | goto out; | ||
2242 | } | ||
2243 | |||
2244 | ret = __ocfs2_decrease_refcount(handle, &tree->rf_ci, ref_root_bh, | ||
2245 | cpos, len, meta_ac, dealloc, delete); | ||
2246 | if (ret) | ||
2247 | mlog_errno(ret); | ||
2248 | out: | ||
2249 | brelse(ref_root_bh); | ||
2250 | return ret; | ||
2251 | } | ||
2252 | |||
2253 | /* | ||
2254 | * Mark the already-existing extent at cpos as refcounted for len clusters. | ||
2255 | * This adds the refcount extent flag. | ||
2256 | * | ||
2257 | * If the existing extent is larger than the request, initiate a | ||
2258 | * split. An attempt will be made at merging with adjacent extents. | ||
2259 | * | ||
2260 | * The caller is responsible for passing down meta_ac if we'll need it. | ||
2261 | */ | ||
2262 | static int ocfs2_mark_extent_refcounted(struct inode *inode, | ||
2263 | struct ocfs2_extent_tree *et, | ||
2264 | handle_t *handle, u32 cpos, | ||
2265 | u32 len, u32 phys, | ||
2266 | struct ocfs2_alloc_context *meta_ac, | ||
2267 | struct ocfs2_cached_dealloc_ctxt *dealloc) | ||
2268 | { | ||
2269 | int ret; | ||
2270 | |||
2271 | mlog(0, "Inode %lu refcount tree cpos %u, len %u, phys cluster %u\n", | ||
2272 | inode->i_ino, cpos, len, phys); | ||
2273 | |||
2274 | if (!ocfs2_refcount_tree(OCFS2_SB(inode->i_sb))) { | ||
2275 | ocfs2_error(inode->i_sb, "Inode %lu want to use refcount " | ||
2276 | "tree, but the feature bit is not set in the " | ||
2277 | "super block.", inode->i_ino); | ||
2278 | ret = -EROFS; | ||
2279 | goto out; | ||
2280 | } | ||
2281 | |||
2282 | ret = ocfs2_change_extent_flag(handle, et, cpos, | ||
2283 | len, phys, meta_ac, dealloc, | ||
2284 | OCFS2_EXT_REFCOUNTED, 0); | ||
2285 | if (ret) | ||
2286 | mlog_errno(ret); | ||
2287 | |||
2288 | out: | ||
2289 | return ret; | ||
2290 | } | ||
2291 | |||
2292 | /* | ||
2293 | * Given some contiguous physical clusters, calculate what we need | ||
2294 | * for modifying their refcount. | ||
2295 | */ | ||
2296 | static int ocfs2_calc_refcount_meta_credits(struct super_block *sb, | ||
2297 | struct ocfs2_caching_info *ci, | ||
2298 | struct buffer_head *ref_root_bh, | ||
2299 | u64 start_cpos, | ||
2300 | u32 clusters, | ||
2301 | int *meta_add, | ||
2302 | int *credits) | ||
2303 | { | ||
2304 | int ret = 0, index, ref_blocks = 0, recs_add = 0; | ||
2305 | u64 cpos = start_cpos; | ||
2306 | struct ocfs2_refcount_block *rb; | ||
2307 | struct ocfs2_refcount_rec rec; | ||
2308 | struct buffer_head *ref_leaf_bh = NULL, *prev_bh = NULL; | ||
2309 | u32 len; | ||
2310 | |||
2311 | mlog(0, "start_cpos %llu, clusters %u\n", | ||
2312 | (unsigned long long)start_cpos, clusters); | ||
2313 | while (clusters) { | ||
2314 | ret = ocfs2_get_refcount_rec(ci, ref_root_bh, | ||
2315 | cpos, clusters, &rec, | ||
2316 | &index, &ref_leaf_bh); | ||
2317 | if (ret) { | ||
2318 | mlog_errno(ret); | ||
2319 | goto out; | ||
2320 | } | ||
2321 | |||
2322 | if (ref_leaf_bh != prev_bh) { | ||
2323 | /* | ||
2324 | * Now we encounter a new leaf block, so calculate | ||
2325 | * whether we need to extend the old leaf. | ||
2326 | */ | ||
2327 | if (prev_bh) { | ||
2328 | rb = (struct ocfs2_refcount_block *) | ||
2329 | prev_bh->b_data; | ||
2330 | |||
2331 | if (le64_to_cpu(rb->rf_records.rl_used) + | ||
2332 | recs_add > | ||
2333 | le16_to_cpu(rb->rf_records.rl_count)) | ||
2334 | ref_blocks++; | ||
2335 | } | ||
2336 | |||
2337 | recs_add = 0; | ||
2338 | *credits += 1; | ||
2339 | brelse(prev_bh); | ||
2340 | prev_bh = ref_leaf_bh; | ||
2341 | get_bh(prev_bh); | ||
2342 | } | ||
2343 | |||
2344 | rb = (struct ocfs2_refcount_block *)ref_leaf_bh->b_data; | ||
2345 | |||
2346 | mlog(0, "recs_add %d,cpos %llu, clusters %u, rec->r_cpos %llu," | ||
2347 | "rec->r_clusters %u, rec->r_refcount %u, index %d\n", | ||
2348 | recs_add, (unsigned long long)cpos, clusters, | ||
2349 | (unsigned long long)le64_to_cpu(rec.r_cpos), | ||
2350 | le32_to_cpu(rec.r_clusters), | ||
2351 | le32_to_cpu(rec.r_refcount), index); | ||
2352 | |||
2353 | len = min((u64)cpos + clusters, le64_to_cpu(rec.r_cpos) + | ||
2354 | le32_to_cpu(rec.r_clusters)) - cpos; | ||
2355 | /* | ||
2356 | * If the refcount rec already exist, cool. We just need | ||
2357 | * to check whether there is a split. Otherwise we just need | ||
2358 | * to increase the refcount. | ||
2359 | * If we will insert one, increases recs_add. | ||
2360 | * | ||
2361 | * We record all the records which will be inserted to the | ||
2362 | * same refcount block, so that we can tell exactly whether | ||
2363 | * we need a new refcount block or not. | ||
2364 | */ | ||
2365 | if (rec.r_refcount) { | ||
2366 | /* Check whether we need a split at the beginning. */ | ||
2367 | if (cpos == start_cpos && | ||
2368 | cpos != le64_to_cpu(rec.r_cpos)) | ||
2369 | recs_add++; | ||
2370 | |||
2371 | /* Check whether we need a split in the end. */ | ||
2372 | if (cpos + clusters < le64_to_cpu(rec.r_cpos) + | ||
2373 | le32_to_cpu(rec.r_clusters)) | ||
2374 | recs_add++; | ||
2375 | } else | ||
2376 | recs_add++; | ||
2377 | |||
2378 | brelse(ref_leaf_bh); | ||
2379 | ref_leaf_bh = NULL; | ||
2380 | clusters -= len; | ||
2381 | cpos += len; | ||
2382 | } | ||
2383 | |||
2384 | if (prev_bh) { | ||
2385 | rb = (struct ocfs2_refcount_block *)prev_bh->b_data; | ||
2386 | |||
2387 | if (le64_to_cpu(rb->rf_records.rl_used) + recs_add > | ||
2388 | le16_to_cpu(rb->rf_records.rl_count)) | ||
2389 | ref_blocks++; | ||
2390 | |||
2391 | *credits += 1; | ||
2392 | } | ||
2393 | |||
2394 | if (!ref_blocks) | ||
2395 | goto out; | ||
2396 | |||
2397 | mlog(0, "we need ref_blocks %d\n", ref_blocks); | ||
2398 | *meta_add += ref_blocks; | ||
2399 | *credits += ref_blocks; | ||
2400 | |||
2401 | /* | ||
2402 | * So we may need ref_blocks to insert into the tree. | ||
2403 | * That also means we need to change the b-tree and add that number | ||
2404 | * of records since we never merge them. | ||
2405 | * We need one more block for expansion since the new created leaf | ||
2406 | * block is also full and needs split. | ||
2407 | */ | ||
2408 | rb = (struct ocfs2_refcount_block *)ref_root_bh->b_data; | ||
2409 | if (le32_to_cpu(rb->rf_flags) & OCFS2_REFCOUNT_TREE_FL) { | ||
2410 | struct ocfs2_extent_tree et; | ||
2411 | |||
2412 | ocfs2_init_refcount_extent_tree(&et, ci, ref_root_bh); | ||
2413 | *meta_add += ocfs2_extend_meta_needed(et.et_root_el); | ||
2414 | *credits += ocfs2_calc_extend_credits(sb, | ||
2415 | et.et_root_el, | ||
2416 | ref_blocks); | ||
2417 | } else { | ||
2418 | *credits += OCFS2_EXPAND_REFCOUNT_TREE_CREDITS; | ||
2419 | *meta_add += 1; | ||
2420 | } | ||
2421 | |||
2422 | out: | ||
2423 | brelse(ref_leaf_bh); | ||
2424 | brelse(prev_bh); | ||
2425 | return ret; | ||
2426 | } | ||
2427 | |||
2428 | /* | ||
2429 | * For refcount tree, we will decrease some contiguous clusters | ||
2430 | * refcount count, so just go through it to see how many blocks | ||
2431 | * we gonna touch and whether we need to create new blocks. | ||
2432 | * | ||
2433 | * Normally the refcount blocks store these refcount should be | ||
2434 | * continguous also, so that we can get the number easily. | ||
2435 | * As for meta_ac, we will at most add split 2 refcount record and | ||
2436 | * 2 more refcount block, so just check it in a rough way. | ||
2437 | * | ||
2438 | * Caller must hold refcount tree lock. | ||
2439 | */ | ||
2440 | int ocfs2_prepare_refcount_change_for_del(struct inode *inode, | ||
2441 | struct buffer_head *di_bh, | ||
2442 | u64 phys_blkno, | ||
2443 | u32 clusters, | ||
2444 | int *credits, | ||
2445 | struct ocfs2_alloc_context **meta_ac) | ||
2446 | { | ||
2447 | int ret, ref_blocks = 0; | ||
2448 | struct ocfs2_dinode *di = (struct ocfs2_dinode *)di_bh->b_data; | ||
2449 | struct ocfs2_inode_info *oi = OCFS2_I(inode); | ||
2450 | struct buffer_head *ref_root_bh = NULL; | ||
2451 | struct ocfs2_refcount_tree *tree; | ||
2452 | u64 start_cpos = ocfs2_blocks_to_clusters(inode->i_sb, phys_blkno); | ||
2453 | |||
2454 | if (!ocfs2_refcount_tree(OCFS2_SB(inode->i_sb))) { | ||
2455 | ocfs2_error(inode->i_sb, "Inode %lu want to use refcount " | ||
2456 | "tree, but the feature bit is not set in the " | ||
2457 | "super block.", inode->i_ino); | ||
2458 | ret = -EROFS; | ||
2459 | goto out; | ||
2460 | } | ||
2461 | |||
2462 | BUG_ON(!(oi->ip_dyn_features & OCFS2_HAS_REFCOUNT_FL)); | ||
2463 | |||
2464 | ret = ocfs2_get_refcount_tree(OCFS2_SB(inode->i_sb), | ||
2465 | le64_to_cpu(di->i_refcount_loc), &tree); | ||
2466 | if (ret) { | ||
2467 | mlog_errno(ret); | ||
2468 | goto out; | ||
2469 | } | ||
2470 | |||
2471 | ret = ocfs2_read_refcount_block(&tree->rf_ci, | ||
2472 | le64_to_cpu(di->i_refcount_loc), | ||
2473 | &ref_root_bh); | ||
2474 | if (ret) { | ||
2475 | mlog_errno(ret); | ||
2476 | goto out; | ||
2477 | } | ||
2478 | |||
2479 | ret = ocfs2_calc_refcount_meta_credits(inode->i_sb, | ||
2480 | &tree->rf_ci, | ||
2481 | ref_root_bh, | ||
2482 | start_cpos, clusters, | ||
2483 | &ref_blocks, credits); | ||
2484 | if (ret) { | ||
2485 | mlog_errno(ret); | ||
2486 | goto out; | ||
2487 | } | ||
2488 | |||
2489 | mlog(0, "reserve new metadata %d, credits = %d\n", | ||
2490 | ref_blocks, *credits); | ||
2491 | |||
2492 | if (ref_blocks) { | ||
2493 | ret = ocfs2_reserve_new_metadata_blocks(OCFS2_SB(inode->i_sb), | ||
2494 | ref_blocks, meta_ac); | ||
2495 | if (ret) | ||
2496 | mlog_errno(ret); | ||
2497 | } | ||
2498 | |||
2499 | out: | ||
2500 | brelse(ref_root_bh); | ||
2501 | return ret; | ||
2502 | } | ||
2503 | |||
2504 | #define MAX_CONTIG_BYTES 1048576 | ||
2505 | |||
2506 | static inline unsigned int ocfs2_cow_contig_clusters(struct super_block *sb) | ||
2507 | { | ||
2508 | return ocfs2_clusters_for_bytes(sb, MAX_CONTIG_BYTES); | ||
2509 | } | ||
2510 | |||
2511 | static inline unsigned int ocfs2_cow_contig_mask(struct super_block *sb) | ||
2512 | { | ||
2513 | return ~(ocfs2_cow_contig_clusters(sb) - 1); | ||
2514 | } | ||
2515 | |||
2516 | /* | ||
2517 | * Given an extent that starts at 'start' and an I/O that starts at 'cpos', | ||
2518 | * find an offset (start + (n * contig_clusters)) that is closest to cpos | ||
2519 | * while still being less than or equal to it. | ||
2520 | * | ||
2521 | * The goal is to break the extent at a multiple of contig_clusters. | ||
2522 | */ | ||
2523 | static inline unsigned int ocfs2_cow_align_start(struct super_block *sb, | ||
2524 | unsigned int start, | ||
2525 | unsigned int cpos) | ||
2526 | { | ||
2527 | BUG_ON(start > cpos); | ||
2528 | |||
2529 | return start + ((cpos - start) & ocfs2_cow_contig_mask(sb)); | ||
2530 | } | ||
2531 | |||
2532 | /* | ||
2533 | * Given a cluster count of len, pad it out so that it is a multiple | ||
2534 | * of contig_clusters. | ||
2535 | */ | ||
2536 | static inline unsigned int ocfs2_cow_align_length(struct super_block *sb, | ||
2537 | unsigned int len) | ||
2538 | { | ||
2539 | unsigned int padded = | ||
2540 | (len + (ocfs2_cow_contig_clusters(sb) - 1)) & | ||
2541 | ocfs2_cow_contig_mask(sb); | ||
2542 | |||
2543 | /* Did we wrap? */ | ||
2544 | if (padded < len) | ||
2545 | padded = UINT_MAX; | ||
2546 | |||
2547 | return padded; | ||
2548 | } | ||
2549 | |||
2550 | /* | ||
2551 | * Calculate out the start and number of virtual clusters we need to to CoW. | ||
2552 | * | ||
2553 | * cpos is vitual start cluster position we want to do CoW in a | ||
2554 | * file and write_len is the cluster length. | ||
2555 | * max_cpos is the place where we want to stop CoW intentionally. | ||
2556 | * | ||
2557 | * Normal we will start CoW from the beginning of extent record cotaining cpos. | ||
2558 | * We try to break up extents on boundaries of MAX_CONTIG_BYTES so that we | ||
2559 | * get good I/O from the resulting extent tree. | ||
2560 | */ | ||
2561 | static int ocfs2_refcount_cal_cow_clusters(struct inode *inode, | ||
2562 | struct ocfs2_extent_list *el, | ||
2563 | u32 cpos, | ||
2564 | u32 write_len, | ||
2565 | u32 max_cpos, | ||
2566 | u32 *cow_start, | ||
2567 | u32 *cow_len) | ||
2568 | { | ||
2569 | int ret = 0; | ||
2570 | int tree_height = le16_to_cpu(el->l_tree_depth), i; | ||
2571 | struct buffer_head *eb_bh = NULL; | ||
2572 | struct ocfs2_extent_block *eb = NULL; | ||
2573 | struct ocfs2_extent_rec *rec; | ||
2574 | unsigned int want_clusters, rec_end = 0; | ||
2575 | int contig_clusters = ocfs2_cow_contig_clusters(inode->i_sb); | ||
2576 | int leaf_clusters; | ||
2577 | |||
2578 | BUG_ON(cpos + write_len > max_cpos); | ||
2579 | |||
2580 | if (tree_height > 0) { | ||
2581 | ret = ocfs2_find_leaf(INODE_CACHE(inode), el, cpos, &eb_bh); | ||
2582 | if (ret) { | ||
2583 | mlog_errno(ret); | ||
2584 | goto out; | ||
2585 | } | ||
2586 | |||
2587 | eb = (struct ocfs2_extent_block *) eb_bh->b_data; | ||
2588 | el = &eb->h_list; | ||
2589 | |||
2590 | if (el->l_tree_depth) { | ||
2591 | ocfs2_error(inode->i_sb, | ||
2592 | "Inode %lu has non zero tree depth in " | ||
2593 | "leaf block %llu\n", inode->i_ino, | ||
2594 | (unsigned long long)eb_bh->b_blocknr); | ||
2595 | ret = -EROFS; | ||
2596 | goto out; | ||
2597 | } | ||
2598 | } | ||
2599 | |||
2600 | *cow_len = 0; | ||
2601 | for (i = 0; i < le16_to_cpu(el->l_next_free_rec); i++) { | ||
2602 | rec = &el->l_recs[i]; | ||
2603 | |||
2604 | if (ocfs2_is_empty_extent(rec)) { | ||
2605 | mlog_bug_on_msg(i != 0, "Inode %lu has empty record in " | ||
2606 | "index %d\n", inode->i_ino, i); | ||
2607 | continue; | ||
2608 | } | ||
2609 | |||
2610 | if (le32_to_cpu(rec->e_cpos) + | ||
2611 | le16_to_cpu(rec->e_leaf_clusters) <= cpos) | ||
2612 | continue; | ||
2613 | |||
2614 | if (*cow_len == 0) { | ||
2615 | /* | ||
2616 | * We should find a refcounted record in the | ||
2617 | * first pass. | ||
2618 | */ | ||
2619 | BUG_ON(!(rec->e_flags & OCFS2_EXT_REFCOUNTED)); | ||
2620 | *cow_start = le32_to_cpu(rec->e_cpos); | ||
2621 | } | ||
2622 | |||
2623 | /* | ||
2624 | * If we encounter a hole, a non-refcounted record or | ||
2625 | * pass the max_cpos, stop the search. | ||
2626 | */ | ||
2627 | if ((!(rec->e_flags & OCFS2_EXT_REFCOUNTED)) || | ||
2628 | (*cow_len && rec_end != le32_to_cpu(rec->e_cpos)) || | ||
2629 | (max_cpos <= le32_to_cpu(rec->e_cpos))) | ||
2630 | break; | ||
2631 | |||
2632 | leaf_clusters = le16_to_cpu(rec->e_leaf_clusters); | ||
2633 | rec_end = le32_to_cpu(rec->e_cpos) + leaf_clusters; | ||
2634 | if (rec_end > max_cpos) { | ||
2635 | rec_end = max_cpos; | ||
2636 | leaf_clusters = rec_end - le32_to_cpu(rec->e_cpos); | ||
2637 | } | ||
2638 | |||
2639 | /* | ||
2640 | * How many clusters do we actually need from | ||
2641 | * this extent? First we see how many we actually | ||
2642 | * need to complete the write. If that's smaller | ||
2643 | * than contig_clusters, we try for contig_clusters. | ||
2644 | */ | ||
2645 | if (!*cow_len) | ||
2646 | want_clusters = write_len; | ||
2647 | else | ||
2648 | want_clusters = (cpos + write_len) - | ||
2649 | (*cow_start + *cow_len); | ||
2650 | if (want_clusters < contig_clusters) | ||
2651 | want_clusters = contig_clusters; | ||
2652 | |||
2653 | /* | ||
2654 | * If the write does not cover the whole extent, we | ||
2655 | * need to calculate how we're going to split the extent. | ||
2656 | * We try to do it on contig_clusters boundaries. | ||
2657 | * | ||
2658 | * Any extent smaller than contig_clusters will be | ||
2659 | * CoWed in its entirety. | ||
2660 | */ | ||
2661 | if (leaf_clusters <= contig_clusters) | ||
2662 | *cow_len += leaf_clusters; | ||
2663 | else if (*cow_len || (*cow_start == cpos)) { | ||
2664 | /* | ||
2665 | * This extent needs to be CoW'd from its | ||
2666 | * beginning, so all we have to do is compute | ||
2667 | * how many clusters to grab. We align | ||
2668 | * want_clusters to the edge of contig_clusters | ||
2669 | * to get better I/O. | ||
2670 | */ | ||
2671 | want_clusters = ocfs2_cow_align_length(inode->i_sb, | ||
2672 | want_clusters); | ||
2673 | |||
2674 | if (leaf_clusters < want_clusters) | ||
2675 | *cow_len += leaf_clusters; | ||
2676 | else | ||
2677 | *cow_len += want_clusters; | ||
2678 | } else if ((*cow_start + contig_clusters) >= | ||
2679 | (cpos + write_len)) { | ||
2680 | /* | ||
2681 | * Breaking off contig_clusters at the front | ||
2682 | * of the extent will cover our write. That's | ||
2683 | * easy. | ||
2684 | */ | ||
2685 | *cow_len = contig_clusters; | ||
2686 | } else if ((rec_end - cpos) <= contig_clusters) { | ||
2687 | /* | ||
2688 | * Breaking off contig_clusters at the tail of | ||
2689 | * this extent will cover cpos. | ||
2690 | */ | ||
2691 | *cow_start = rec_end - contig_clusters; | ||
2692 | *cow_len = contig_clusters; | ||
2693 | } else if ((rec_end - cpos) <= want_clusters) { | ||
2694 | /* | ||
2695 | * While we can't fit the entire write in this | ||
2696 | * extent, we know that the write goes from cpos | ||
2697 | * to the end of the extent. Break that off. | ||
2698 | * We try to break it at some multiple of | ||
2699 | * contig_clusters from the front of the extent. | ||
2700 | * Failing that (ie, cpos is within | ||
2701 | * contig_clusters of the front), we'll CoW the | ||
2702 | * entire extent. | ||
2703 | */ | ||
2704 | *cow_start = ocfs2_cow_align_start(inode->i_sb, | ||
2705 | *cow_start, cpos); | ||
2706 | *cow_len = rec_end - *cow_start; | ||
2707 | } else { | ||
2708 | /* | ||
2709 | * Ok, the entire write lives in the middle of | ||
2710 | * this extent. Let's try to slice the extent up | ||
2711 | * nicely. Optimally, our CoW region starts at | ||
2712 | * m*contig_clusters from the beginning of the | ||
2713 | * extent and goes for n*contig_clusters, | ||
2714 | * covering the entire write. | ||
2715 | */ | ||
2716 | *cow_start = ocfs2_cow_align_start(inode->i_sb, | ||
2717 | *cow_start, cpos); | ||
2718 | |||
2719 | want_clusters = (cpos + write_len) - *cow_start; | ||
2720 | want_clusters = ocfs2_cow_align_length(inode->i_sb, | ||
2721 | want_clusters); | ||
2722 | if (*cow_start + want_clusters <= rec_end) | ||
2723 | *cow_len = want_clusters; | ||
2724 | else | ||
2725 | *cow_len = rec_end - *cow_start; | ||
2726 | } | ||
2727 | |||
2728 | /* Have we covered our entire write yet? */ | ||
2729 | if ((*cow_start + *cow_len) >= (cpos + write_len)) | ||
2730 | break; | ||
2731 | |||
2732 | /* | ||
2733 | * If we reach the end of the extent block and don't get enough | ||
2734 | * clusters, continue with the next extent block if possible. | ||
2735 | */ | ||
2736 | if (i + 1 == le16_to_cpu(el->l_next_free_rec) && | ||
2737 | eb && eb->h_next_leaf_blk) { | ||
2738 | brelse(eb_bh); | ||
2739 | eb_bh = NULL; | ||
2740 | |||
2741 | ret = ocfs2_read_extent_block(INODE_CACHE(inode), | ||
2742 | le64_to_cpu(eb->h_next_leaf_blk), | ||
2743 | &eb_bh); | ||
2744 | if (ret) { | ||
2745 | mlog_errno(ret); | ||
2746 | goto out; | ||
2747 | } | ||
2748 | |||
2749 | eb = (struct ocfs2_extent_block *) eb_bh->b_data; | ||
2750 | el = &eb->h_list; | ||
2751 | i = -1; | ||
2752 | } | ||
2753 | } | ||
2754 | |||
2755 | out: | ||
2756 | brelse(eb_bh); | ||
2757 | return ret; | ||
2758 | } | ||
2759 | |||
2760 | /* | ||
2761 | * Prepare meta_ac, data_ac and calculate credits when we want to add some | ||
2762 | * num_clusters in data_tree "et" and change the refcount for the old | ||
2763 | * clusters(starting form p_cluster) in the refcount tree. | ||
2764 | * | ||
2765 | * Note: | ||
2766 | * 1. since we may split the old tree, so we at most will need num_clusters + 2 | ||
2767 | * more new leaf records. | ||
2768 | * 2. In some case, we may not need to reserve new clusters(e.g, reflink), so | ||
2769 | * just give data_ac = NULL. | ||
2770 | */ | ||
2771 | static int ocfs2_lock_refcount_allocators(struct super_block *sb, | ||
2772 | u32 p_cluster, u32 num_clusters, | ||
2773 | struct ocfs2_extent_tree *et, | ||
2774 | struct ocfs2_caching_info *ref_ci, | ||
2775 | struct buffer_head *ref_root_bh, | ||
2776 | struct ocfs2_alloc_context **meta_ac, | ||
2777 | struct ocfs2_alloc_context **data_ac, | ||
2778 | int *credits) | ||
2779 | { | ||
2780 | int ret = 0, meta_add = 0; | ||
2781 | int num_free_extents = ocfs2_num_free_extents(OCFS2_SB(sb), et); | ||
2782 | |||
2783 | if (num_free_extents < 0) { | ||
2784 | ret = num_free_extents; | ||
2785 | mlog_errno(ret); | ||
2786 | goto out; | ||
2787 | } | ||
2788 | |||
2789 | if (num_free_extents < num_clusters + 2) | ||
2790 | meta_add = | ||
2791 | ocfs2_extend_meta_needed(et->et_root_el); | ||
2792 | |||
2793 | *credits += ocfs2_calc_extend_credits(sb, et->et_root_el, | ||
2794 | num_clusters + 2); | ||
2795 | |||
2796 | ret = ocfs2_calc_refcount_meta_credits(sb, ref_ci, ref_root_bh, | ||
2797 | p_cluster, num_clusters, | ||
2798 | &meta_add, credits); | ||
2799 | if (ret) { | ||
2800 | mlog_errno(ret); | ||
2801 | goto out; | ||
2802 | } | ||
2803 | |||
2804 | mlog(0, "reserve new metadata %d, clusters %u, credits = %d\n", | ||
2805 | meta_add, num_clusters, *credits); | ||
2806 | ret = ocfs2_reserve_new_metadata_blocks(OCFS2_SB(sb), meta_add, | ||
2807 | meta_ac); | ||
2808 | if (ret) { | ||
2809 | mlog_errno(ret); | ||
2810 | goto out; | ||
2811 | } | ||
2812 | |||
2813 | if (data_ac) { | ||
2814 | ret = ocfs2_reserve_clusters(OCFS2_SB(sb), num_clusters, | ||
2815 | data_ac); | ||
2816 | if (ret) | ||
2817 | mlog_errno(ret); | ||
2818 | } | ||
2819 | |||
2820 | out: | ||
2821 | if (ret) { | ||
2822 | if (*meta_ac) { | ||
2823 | ocfs2_free_alloc_context(*meta_ac); | ||
2824 | *meta_ac = NULL; | ||
2825 | } | ||
2826 | } | ||
2827 | |||
2828 | return ret; | ||
2829 | } | ||
2830 | |||
2831 | static int ocfs2_clear_cow_buffer(handle_t *handle, struct buffer_head *bh) | ||
2832 | { | ||
2833 | BUG_ON(buffer_dirty(bh)); | ||
2834 | |||
2835 | clear_buffer_mapped(bh); | ||
2836 | |||
2837 | return 0; | ||
2838 | } | ||
2839 | |||
2840 | static int ocfs2_duplicate_clusters_by_page(handle_t *handle, | ||
2841 | struct ocfs2_cow_context *context, | ||
2842 | u32 cpos, u32 old_cluster, | ||
2843 | u32 new_cluster, u32 new_len) | ||
2844 | { | ||
2845 | int ret = 0, partial; | ||
2846 | struct ocfs2_caching_info *ci = context->data_et.et_ci; | ||
2847 | struct super_block *sb = ocfs2_metadata_cache_get_super(ci); | ||
2848 | u64 new_block = ocfs2_clusters_to_blocks(sb, new_cluster); | ||
2849 | struct page *page; | ||
2850 | pgoff_t page_index; | ||
2851 | unsigned int from, to; | ||
2852 | loff_t offset, end, map_end; | ||
2853 | struct address_space *mapping = context->inode->i_mapping; | ||
2854 | |||
2855 | mlog(0, "old_cluster %u, new %u, len %u at offset %u\n", old_cluster, | ||
2856 | new_cluster, new_len, cpos); | ||
2857 | |||
2858 | offset = ((loff_t)cpos) << OCFS2_SB(sb)->s_clustersize_bits; | ||
2859 | end = offset + (new_len << OCFS2_SB(sb)->s_clustersize_bits); | ||
2860 | |||
2861 | while (offset < end) { | ||
2862 | page_index = offset >> PAGE_CACHE_SHIFT; | ||
2863 | map_end = (page_index + 1) << PAGE_CACHE_SHIFT; | ||
2864 | if (map_end > end) | ||
2865 | map_end = end; | ||
2866 | |||
2867 | /* from, to is the offset within the page. */ | ||
2868 | from = offset & (PAGE_CACHE_SIZE - 1); | ||
2869 | to = PAGE_CACHE_SIZE; | ||
2870 | if (map_end & (PAGE_CACHE_SIZE - 1)) | ||
2871 | to = map_end & (PAGE_CACHE_SIZE - 1); | ||
2872 | |||
2873 | page = grab_cache_page(mapping, page_index); | ||
2874 | |||
2875 | /* This page can't be dirtied before we CoW it out. */ | ||
2876 | BUG_ON(PageDirty(page)); | ||
2877 | |||
2878 | if (!PageUptodate(page)) { | ||
2879 | ret = block_read_full_page(page, ocfs2_get_block); | ||
2880 | if (ret) { | ||
2881 | mlog_errno(ret); | ||
2882 | goto unlock; | ||
2883 | } | ||
2884 | lock_page(page); | ||
2885 | } | ||
2886 | |||
2887 | if (page_has_buffers(page)) { | ||
2888 | ret = walk_page_buffers(handle, page_buffers(page), | ||
2889 | from, to, &partial, | ||
2890 | ocfs2_clear_cow_buffer); | ||
2891 | if (ret) { | ||
2892 | mlog_errno(ret); | ||
2893 | goto unlock; | ||
2894 | } | ||
2895 | } | ||
2896 | |||
2897 | ocfs2_map_and_dirty_page(context->inode, | ||
2898 | handle, from, to, | ||
2899 | page, 0, &new_block); | ||
2900 | mark_page_accessed(page); | ||
2901 | unlock: | ||
2902 | unlock_page(page); | ||
2903 | page_cache_release(page); | ||
2904 | page = NULL; | ||
2905 | offset = map_end; | ||
2906 | if (ret) | ||
2907 | break; | ||
2908 | } | ||
2909 | |||
2910 | return ret; | ||
2911 | } | ||
2912 | |||
2913 | static int ocfs2_duplicate_clusters_by_jbd(handle_t *handle, | ||
2914 | struct ocfs2_cow_context *context, | ||
2915 | u32 cpos, u32 old_cluster, | ||
2916 | u32 new_cluster, u32 new_len) | ||
2917 | { | ||
2918 | int ret = 0; | ||
2919 | struct super_block *sb = context->inode->i_sb; | ||
2920 | struct ocfs2_caching_info *ci = context->data_et.et_ci; | ||
2921 | int i, blocks = ocfs2_clusters_to_blocks(sb, new_len); | ||
2922 | u64 old_block = ocfs2_clusters_to_blocks(sb, old_cluster); | ||
2923 | u64 new_block = ocfs2_clusters_to_blocks(sb, new_cluster); | ||
2924 | struct ocfs2_super *osb = OCFS2_SB(sb); | ||
2925 | struct buffer_head *old_bh = NULL; | ||
2926 | struct buffer_head *new_bh = NULL; | ||
2927 | |||
2928 | mlog(0, "old_cluster %u, new %u, len %u\n", old_cluster, | ||
2929 | new_cluster, new_len); | ||
2930 | |||
2931 | for (i = 0; i < blocks; i++, old_block++, new_block++) { | ||
2932 | new_bh = sb_getblk(osb->sb, new_block); | ||
2933 | if (new_bh == NULL) { | ||
2934 | ret = -EIO; | ||
2935 | mlog_errno(ret); | ||
2936 | break; | ||
2937 | } | ||
2938 | |||
2939 | ocfs2_set_new_buffer_uptodate(ci, new_bh); | ||
2940 | |||
2941 | ret = ocfs2_read_block(ci, old_block, &old_bh, NULL); | ||
2942 | if (ret) { | ||
2943 | mlog_errno(ret); | ||
2944 | break; | ||
2945 | } | ||
2946 | |||
2947 | ret = ocfs2_journal_access(handle, ci, new_bh, | ||
2948 | OCFS2_JOURNAL_ACCESS_CREATE); | ||
2949 | if (ret) { | ||
2950 | mlog_errno(ret); | ||
2951 | break; | ||
2952 | } | ||
2953 | |||
2954 | memcpy(new_bh->b_data, old_bh->b_data, sb->s_blocksize); | ||
2955 | ret = ocfs2_journal_dirty(handle, new_bh); | ||
2956 | if (ret) { | ||
2957 | mlog_errno(ret); | ||
2958 | break; | ||
2959 | } | ||
2960 | |||
2961 | brelse(new_bh); | ||
2962 | brelse(old_bh); | ||
2963 | new_bh = NULL; | ||
2964 | old_bh = NULL; | ||
2965 | } | ||
2966 | |||
2967 | brelse(new_bh); | ||
2968 | brelse(old_bh); | ||
2969 | return ret; | ||
2970 | } | ||
2971 | |||
2972 | static int ocfs2_clear_ext_refcount(handle_t *handle, | ||
2973 | struct ocfs2_extent_tree *et, | ||
2974 | u32 cpos, u32 p_cluster, u32 len, | ||
2975 | unsigned int ext_flags, | ||
2976 | struct ocfs2_alloc_context *meta_ac, | ||
2977 | struct ocfs2_cached_dealloc_ctxt *dealloc) | ||
2978 | { | ||
2979 | int ret, index; | ||
2980 | struct ocfs2_extent_rec replace_rec; | ||
2981 | struct ocfs2_path *path = NULL; | ||
2982 | struct ocfs2_extent_list *el; | ||
2983 | struct super_block *sb = ocfs2_metadata_cache_get_super(et->et_ci); | ||
2984 | u64 ino = ocfs2_metadata_cache_owner(et->et_ci); | ||
2985 | |||
2986 | mlog(0, "inode %llu cpos %u, len %u, p_cluster %u, ext_flags %u\n", | ||
2987 | (unsigned long long)ino, cpos, len, p_cluster, ext_flags); | ||
2988 | |||
2989 | memset(&replace_rec, 0, sizeof(replace_rec)); | ||
2990 | replace_rec.e_cpos = cpu_to_le32(cpos); | ||
2991 | replace_rec.e_leaf_clusters = cpu_to_le16(len); | ||
2992 | replace_rec.e_blkno = cpu_to_le64(ocfs2_clusters_to_blocks(sb, | ||
2993 | p_cluster)); | ||
2994 | replace_rec.e_flags = ext_flags; | ||
2995 | replace_rec.e_flags &= ~OCFS2_EXT_REFCOUNTED; | ||
2996 | |||
2997 | path = ocfs2_new_path_from_et(et); | ||
2998 | if (!path) { | ||
2999 | ret = -ENOMEM; | ||
3000 | mlog_errno(ret); | ||
3001 | goto out; | ||
3002 | } | ||
3003 | |||
3004 | ret = ocfs2_find_path(et->et_ci, path, cpos); | ||
3005 | if (ret) { | ||
3006 | mlog_errno(ret); | ||
3007 | goto out; | ||
3008 | } | ||
3009 | |||
3010 | el = path_leaf_el(path); | ||
3011 | |||
3012 | index = ocfs2_search_extent_list(el, cpos); | ||
3013 | if (index == -1 || index >= le16_to_cpu(el->l_next_free_rec)) { | ||
3014 | ocfs2_error(sb, | ||
3015 | "Inode %llu has an extent at cpos %u which can no " | ||
3016 | "longer be found.\n", | ||
3017 | (unsigned long long)ino, cpos); | ||
3018 | ret = -EROFS; | ||
3019 | goto out; | ||
3020 | } | ||
3021 | |||
3022 | ret = ocfs2_split_extent(handle, et, path, index, | ||
3023 | &replace_rec, meta_ac, dealloc); | ||
3024 | if (ret) | ||
3025 | mlog_errno(ret); | ||
3026 | |||
3027 | out: | ||
3028 | ocfs2_free_path(path); | ||
3029 | return ret; | ||
3030 | } | ||
3031 | |||
3032 | static int ocfs2_replace_clusters(handle_t *handle, | ||
3033 | struct ocfs2_cow_context *context, | ||
3034 | u32 cpos, u32 old, | ||
3035 | u32 new, u32 len, | ||
3036 | unsigned int ext_flags) | ||
3037 | { | ||
3038 | int ret; | ||
3039 | struct ocfs2_caching_info *ci = context->data_et.et_ci; | ||
3040 | u64 ino = ocfs2_metadata_cache_owner(ci); | ||
3041 | |||
3042 | mlog(0, "inode %llu, cpos %u, old %u, new %u, len %u, ext_flags %u\n", | ||
3043 | (unsigned long long)ino, cpos, old, new, len, ext_flags); | ||
3044 | |||
3045 | /*If the old clusters is unwritten, no need to duplicate. */ | ||
3046 | if (!(ext_flags & OCFS2_EXT_UNWRITTEN)) { | ||
3047 | ret = context->cow_duplicate_clusters(handle, context, cpos, | ||
3048 | old, new, len); | ||
3049 | if (ret) { | ||
3050 | mlog_errno(ret); | ||
3051 | goto out; | ||
3052 | } | ||
3053 | } | ||
3054 | |||
3055 | ret = ocfs2_clear_ext_refcount(handle, &context->data_et, | ||
3056 | cpos, new, len, ext_flags, | ||
3057 | context->meta_ac, &context->dealloc); | ||
3058 | if (ret) | ||
3059 | mlog_errno(ret); | ||
3060 | out: | ||
3061 | return ret; | ||
3062 | } | ||
3063 | |||
3064 | static int ocfs2_cow_sync_writeback(struct super_block *sb, | ||
3065 | struct ocfs2_cow_context *context, | ||
3066 | u32 cpos, u32 num_clusters) | ||
3067 | { | ||
3068 | int ret = 0; | ||
3069 | loff_t offset, end, map_end; | ||
3070 | pgoff_t page_index; | ||
3071 | struct page *page; | ||
3072 | |||
3073 | if (ocfs2_should_order_data(context->inode)) | ||
3074 | return 0; | ||
3075 | |||
3076 | offset = ((loff_t)cpos) << OCFS2_SB(sb)->s_clustersize_bits; | ||
3077 | end = offset + (num_clusters << OCFS2_SB(sb)->s_clustersize_bits); | ||
3078 | |||
3079 | ret = filemap_fdatawrite_range(context->inode->i_mapping, | ||
3080 | offset, end - 1); | ||
3081 | if (ret < 0) { | ||
3082 | mlog_errno(ret); | ||
3083 | return ret; | ||
3084 | } | ||
3085 | |||
3086 | while (offset < end) { | ||
3087 | page_index = offset >> PAGE_CACHE_SHIFT; | ||
3088 | map_end = (page_index + 1) << PAGE_CACHE_SHIFT; | ||
3089 | if (map_end > end) | ||
3090 | map_end = end; | ||
3091 | |||
3092 | page = grab_cache_page(context->inode->i_mapping, page_index); | ||
3093 | BUG_ON(!page); | ||
3094 | |||
3095 | wait_on_page_writeback(page); | ||
3096 | if (PageError(page)) { | ||
3097 | ret = -EIO; | ||
3098 | mlog_errno(ret); | ||
3099 | } else | ||
3100 | mark_page_accessed(page); | ||
3101 | |||
3102 | unlock_page(page); | ||
3103 | page_cache_release(page); | ||
3104 | page = NULL; | ||
3105 | offset = map_end; | ||
3106 | if (ret) | ||
3107 | break; | ||
3108 | } | ||
3109 | |||
3110 | return ret; | ||
3111 | } | ||
3112 | |||
3113 | static int ocfs2_di_get_clusters(struct ocfs2_cow_context *context, | ||
3114 | u32 v_cluster, u32 *p_cluster, | ||
3115 | u32 *num_clusters, | ||
3116 | unsigned int *extent_flags) | ||
3117 | { | ||
3118 | return ocfs2_get_clusters(context->inode, v_cluster, p_cluster, | ||
3119 | num_clusters, extent_flags); | ||
3120 | } | ||
3121 | |||
3122 | static int ocfs2_make_clusters_writable(struct super_block *sb, | ||
3123 | struct ocfs2_cow_context *context, | ||
3124 | u32 cpos, u32 p_cluster, | ||
3125 | u32 num_clusters, unsigned int e_flags) | ||
3126 | { | ||
3127 | int ret, delete, index, credits = 0; | ||
3128 | u32 new_bit, new_len; | ||
3129 | unsigned int set_len; | ||
3130 | struct ocfs2_super *osb = OCFS2_SB(sb); | ||
3131 | handle_t *handle; | ||
3132 | struct buffer_head *ref_leaf_bh = NULL; | ||
3133 | struct ocfs2_caching_info *ref_ci = &context->ref_tree->rf_ci; | ||
3134 | struct ocfs2_refcount_rec rec; | ||
3135 | |||
3136 | mlog(0, "cpos %u, p_cluster %u, num_clusters %u, e_flags %u\n", | ||
3137 | cpos, p_cluster, num_clusters, e_flags); | ||
3138 | |||
3139 | ret = ocfs2_lock_refcount_allocators(sb, p_cluster, num_clusters, | ||
3140 | &context->data_et, | ||
3141 | ref_ci, | ||
3142 | context->ref_root_bh, | ||
3143 | &context->meta_ac, | ||
3144 | &context->data_ac, &credits); | ||
3145 | if (ret) { | ||
3146 | mlog_errno(ret); | ||
3147 | return ret; | ||
3148 | } | ||
3149 | |||
3150 | if (context->post_refcount) | ||
3151 | credits += context->post_refcount->credits; | ||
3152 | |||
3153 | credits += context->extra_credits; | ||
3154 | handle = ocfs2_start_trans(osb, credits); | ||
3155 | if (IS_ERR(handle)) { | ||
3156 | ret = PTR_ERR(handle); | ||
3157 | mlog_errno(ret); | ||
3158 | goto out; | ||
3159 | } | ||
3160 | |||
3161 | while (num_clusters) { | ||
3162 | ret = ocfs2_get_refcount_rec(ref_ci, context->ref_root_bh, | ||
3163 | p_cluster, num_clusters, | ||
3164 | &rec, &index, &ref_leaf_bh); | ||
3165 | if (ret) { | ||
3166 | mlog_errno(ret); | ||
3167 | goto out_commit; | ||
3168 | } | ||
3169 | |||
3170 | BUG_ON(!rec.r_refcount); | ||
3171 | set_len = min((u64)p_cluster + num_clusters, | ||
3172 | le64_to_cpu(rec.r_cpos) + | ||
3173 | le32_to_cpu(rec.r_clusters)) - p_cluster; | ||
3174 | |||
3175 | /* | ||
3176 | * There are many different situation here. | ||
3177 | * 1. If refcount == 1, remove the flag and don't COW. | ||
3178 | * 2. If refcount > 1, allocate clusters. | ||
3179 | * Here we may not allocate r_len once at a time, so continue | ||
3180 | * until we reach num_clusters. | ||
3181 | */ | ||
3182 | if (le32_to_cpu(rec.r_refcount) == 1) { | ||
3183 | delete = 0; | ||
3184 | ret = ocfs2_clear_ext_refcount(handle, | ||
3185 | &context->data_et, | ||
3186 | cpos, p_cluster, | ||
3187 | set_len, e_flags, | ||
3188 | context->meta_ac, | ||
3189 | &context->dealloc); | ||
3190 | if (ret) { | ||
3191 | mlog_errno(ret); | ||
3192 | goto out_commit; | ||
3193 | } | ||
3194 | } else { | ||
3195 | delete = 1; | ||
3196 | |||
3197 | ret = __ocfs2_claim_clusters(osb, handle, | ||
3198 | context->data_ac, | ||
3199 | 1, set_len, | ||
3200 | &new_bit, &new_len); | ||
3201 | if (ret) { | ||
3202 | mlog_errno(ret); | ||
3203 | goto out_commit; | ||
3204 | } | ||
3205 | |||
3206 | ret = ocfs2_replace_clusters(handle, context, | ||
3207 | cpos, p_cluster, new_bit, | ||
3208 | new_len, e_flags); | ||
3209 | if (ret) { | ||
3210 | mlog_errno(ret); | ||
3211 | goto out_commit; | ||
3212 | } | ||
3213 | set_len = new_len; | ||
3214 | } | ||
3215 | |||
3216 | ret = __ocfs2_decrease_refcount(handle, ref_ci, | ||
3217 | context->ref_root_bh, | ||
3218 | p_cluster, set_len, | ||
3219 | context->meta_ac, | ||
3220 | &context->dealloc, delete); | ||
3221 | if (ret) { | ||
3222 | mlog_errno(ret); | ||
3223 | goto out_commit; | ||
3224 | } | ||
3225 | |||
3226 | cpos += set_len; | ||
3227 | p_cluster += set_len; | ||
3228 | num_clusters -= set_len; | ||
3229 | brelse(ref_leaf_bh); | ||
3230 | ref_leaf_bh = NULL; | ||
3231 | } | ||
3232 | |||
3233 | /* handle any post_cow action. */ | ||
3234 | if (context->post_refcount && context->post_refcount->func) { | ||
3235 | ret = context->post_refcount->func(context->inode, handle, | ||
3236 | context->post_refcount->para); | ||
3237 | if (ret) { | ||
3238 | mlog_errno(ret); | ||
3239 | goto out_commit; | ||
3240 | } | ||
3241 | } | ||
3242 | |||
3243 | /* | ||
3244 | * Here we should write the new page out first if we are | ||
3245 | * in write-back mode. | ||
3246 | */ | ||
3247 | if (context->get_clusters == ocfs2_di_get_clusters) { | ||
3248 | ret = ocfs2_cow_sync_writeback(sb, context, cpos, num_clusters); | ||
3249 | if (ret) | ||
3250 | mlog_errno(ret); | ||
3251 | } | ||
3252 | |||
3253 | out_commit: | ||
3254 | ocfs2_commit_trans(osb, handle); | ||
3255 | |||
3256 | out: | ||
3257 | if (context->data_ac) { | ||
3258 | ocfs2_free_alloc_context(context->data_ac); | ||
3259 | context->data_ac = NULL; | ||
3260 | } | ||
3261 | if (context->meta_ac) { | ||
3262 | ocfs2_free_alloc_context(context->meta_ac); | ||
3263 | context->meta_ac = NULL; | ||
3264 | } | ||
3265 | brelse(ref_leaf_bh); | ||
3266 | |||
3267 | return ret; | ||
3268 | } | ||
3269 | |||
3270 | static int ocfs2_replace_cow(struct ocfs2_cow_context *context) | ||
3271 | { | ||
3272 | int ret = 0; | ||
3273 | struct inode *inode = context->inode; | ||
3274 | u32 cow_start = context->cow_start, cow_len = context->cow_len; | ||
3275 | u32 p_cluster, num_clusters; | ||
3276 | unsigned int ext_flags; | ||
3277 | struct ocfs2_super *osb = OCFS2_SB(inode->i_sb); | ||
3278 | |||
3279 | if (!ocfs2_refcount_tree(OCFS2_SB(inode->i_sb))) { | ||
3280 | ocfs2_error(inode->i_sb, "Inode %lu want to use refcount " | ||
3281 | "tree, but the feature bit is not set in the " | ||
3282 | "super block.", inode->i_ino); | ||
3283 | return -EROFS; | ||
3284 | } | ||
3285 | |||
3286 | ocfs2_init_dealloc_ctxt(&context->dealloc); | ||
3287 | |||
3288 | while (cow_len) { | ||
3289 | ret = context->get_clusters(context, cow_start, &p_cluster, | ||
3290 | &num_clusters, &ext_flags); | ||
3291 | if (ret) { | ||
3292 | mlog_errno(ret); | ||
3293 | break; | ||
3294 | } | ||
3295 | |||
3296 | BUG_ON(!(ext_flags & OCFS2_EXT_REFCOUNTED)); | ||
3297 | |||
3298 | if (cow_len < num_clusters) | ||
3299 | num_clusters = cow_len; | ||
3300 | |||
3301 | ret = ocfs2_make_clusters_writable(inode->i_sb, context, | ||
3302 | cow_start, p_cluster, | ||
3303 | num_clusters, ext_flags); | ||
3304 | if (ret) { | ||
3305 | mlog_errno(ret); | ||
3306 | break; | ||
3307 | } | ||
3308 | |||
3309 | cow_len -= num_clusters; | ||
3310 | cow_start += num_clusters; | ||
3311 | } | ||
3312 | |||
3313 | if (ocfs2_dealloc_has_cluster(&context->dealloc)) { | ||
3314 | ocfs2_schedule_truncate_log_flush(osb, 1); | ||
3315 | ocfs2_run_deallocs(osb, &context->dealloc); | ||
3316 | } | ||
3317 | |||
3318 | return ret; | ||
3319 | } | ||
3320 | |||
3321 | /* | ||
3322 | * Starting at cpos, try to CoW write_len clusters. Don't CoW | ||
3323 | * past max_cpos. This will stop when it runs into a hole or an | ||
3324 | * unrefcounted extent. | ||
3325 | */ | ||
3326 | static int ocfs2_refcount_cow_hunk(struct inode *inode, | ||
3327 | struct buffer_head *di_bh, | ||
3328 | u32 cpos, u32 write_len, u32 max_cpos) | ||
3329 | { | ||
3330 | int ret; | ||
3331 | u32 cow_start = 0, cow_len = 0; | ||
3332 | struct ocfs2_inode_info *oi = OCFS2_I(inode); | ||
3333 | struct ocfs2_super *osb = OCFS2_SB(inode->i_sb); | ||
3334 | struct ocfs2_dinode *di = (struct ocfs2_dinode *)di_bh->b_data; | ||
3335 | struct buffer_head *ref_root_bh = NULL; | ||
3336 | struct ocfs2_refcount_tree *ref_tree; | ||
3337 | struct ocfs2_cow_context *context = NULL; | ||
3338 | |||
3339 | BUG_ON(!(oi->ip_dyn_features & OCFS2_HAS_REFCOUNT_FL)); | ||
3340 | |||
3341 | ret = ocfs2_refcount_cal_cow_clusters(inode, &di->id2.i_list, | ||
3342 | cpos, write_len, max_cpos, | ||
3343 | &cow_start, &cow_len); | ||
3344 | if (ret) { | ||
3345 | mlog_errno(ret); | ||
3346 | goto out; | ||
3347 | } | ||
3348 | |||
3349 | mlog(0, "CoW inode %lu, cpos %u, write_len %u, cow_start %u, " | ||
3350 | "cow_len %u\n", inode->i_ino, | ||
3351 | cpos, write_len, cow_start, cow_len); | ||
3352 | |||
3353 | BUG_ON(cow_len == 0); | ||
3354 | |||
3355 | context = kzalloc(sizeof(struct ocfs2_cow_context), GFP_NOFS); | ||
3356 | if (!context) { | ||
3357 | ret = -ENOMEM; | ||
3358 | mlog_errno(ret); | ||
3359 | goto out; | ||
3360 | } | ||
3361 | |||
3362 | ret = ocfs2_lock_refcount_tree(osb, le64_to_cpu(di->i_refcount_loc), | ||
3363 | 1, &ref_tree, &ref_root_bh); | ||
3364 | if (ret) { | ||
3365 | mlog_errno(ret); | ||
3366 | goto out; | ||
3367 | } | ||
3368 | |||
3369 | context->inode = inode; | ||
3370 | context->cow_start = cow_start; | ||
3371 | context->cow_len = cow_len; | ||
3372 | context->ref_tree = ref_tree; | ||
3373 | context->ref_root_bh = ref_root_bh; | ||
3374 | context->cow_duplicate_clusters = ocfs2_duplicate_clusters_by_page; | ||
3375 | context->get_clusters = ocfs2_di_get_clusters; | ||
3376 | |||
3377 | ocfs2_init_dinode_extent_tree(&context->data_et, | ||
3378 | INODE_CACHE(inode), di_bh); | ||
3379 | |||
3380 | ret = ocfs2_replace_cow(context); | ||
3381 | if (ret) | ||
3382 | mlog_errno(ret); | ||
3383 | |||
3384 | /* | ||
3385 | * truncate the extent map here since no matter whether we meet with | ||
3386 | * any error during the action, we shouldn't trust cached extent map | ||
3387 | * any more. | ||
3388 | */ | ||
3389 | ocfs2_extent_map_trunc(inode, cow_start); | ||
3390 | |||
3391 | ocfs2_unlock_refcount_tree(osb, ref_tree, 1); | ||
3392 | brelse(ref_root_bh); | ||
3393 | out: | ||
3394 | kfree(context); | ||
3395 | return ret; | ||
3396 | } | ||
3397 | |||
3398 | /* | ||
3399 | * CoW any and all clusters between cpos and cpos+write_len. | ||
3400 | * Don't CoW past max_cpos. If this returns successfully, all | ||
3401 | * clusters between cpos and cpos+write_len are safe to modify. | ||
3402 | */ | ||
3403 | int ocfs2_refcount_cow(struct inode *inode, | ||
3404 | struct buffer_head *di_bh, | ||
3405 | u32 cpos, u32 write_len, u32 max_cpos) | ||
3406 | { | ||
3407 | int ret = 0; | ||
3408 | u32 p_cluster, num_clusters; | ||
3409 | unsigned int ext_flags; | ||
3410 | |||
3411 | while (write_len) { | ||
3412 | ret = ocfs2_get_clusters(inode, cpos, &p_cluster, | ||
3413 | &num_clusters, &ext_flags); | ||
3414 | if (ret) { | ||
3415 | mlog_errno(ret); | ||
3416 | break; | ||
3417 | } | ||
3418 | |||
3419 | if (write_len < num_clusters) | ||
3420 | num_clusters = write_len; | ||
3421 | |||
3422 | if (ext_flags & OCFS2_EXT_REFCOUNTED) { | ||
3423 | ret = ocfs2_refcount_cow_hunk(inode, di_bh, cpos, | ||
3424 | num_clusters, max_cpos); | ||
3425 | if (ret) { | ||
3426 | mlog_errno(ret); | ||
3427 | break; | ||
3428 | } | ||
3429 | } | ||
3430 | |||
3431 | write_len -= num_clusters; | ||
3432 | cpos += num_clusters; | ||
3433 | } | ||
3434 | |||
3435 | return ret; | ||
3436 | } | ||
3437 | |||
3438 | static int ocfs2_xattr_value_get_clusters(struct ocfs2_cow_context *context, | ||
3439 | u32 v_cluster, u32 *p_cluster, | ||
3440 | u32 *num_clusters, | ||
3441 | unsigned int *extent_flags) | ||
3442 | { | ||
3443 | struct inode *inode = context->inode; | ||
3444 | struct ocfs2_xattr_value_root *xv = context->cow_object; | ||
3445 | |||
3446 | return ocfs2_xattr_get_clusters(inode, v_cluster, p_cluster, | ||
3447 | num_clusters, &xv->xr_list, | ||
3448 | extent_flags); | ||
3449 | } | ||
3450 | |||
3451 | /* | ||
3452 | * Given a xattr value root, calculate the most meta/credits we need for | ||
3453 | * refcount tree change if we truncate it to 0. | ||
3454 | */ | ||
3455 | int ocfs2_refcounted_xattr_delete_need(struct inode *inode, | ||
3456 | struct ocfs2_caching_info *ref_ci, | ||
3457 | struct buffer_head *ref_root_bh, | ||
3458 | struct ocfs2_xattr_value_root *xv, | ||
3459 | int *meta_add, int *credits) | ||
3460 | { | ||
3461 | int ret = 0, index, ref_blocks = 0; | ||
3462 | u32 p_cluster, num_clusters; | ||
3463 | u32 cpos = 0, clusters = le32_to_cpu(xv->xr_clusters); | ||
3464 | struct ocfs2_refcount_block *rb; | ||
3465 | struct ocfs2_refcount_rec rec; | ||
3466 | struct buffer_head *ref_leaf_bh = NULL; | ||
3467 | |||
3468 | while (cpos < clusters) { | ||
3469 | ret = ocfs2_xattr_get_clusters(inode, cpos, &p_cluster, | ||
3470 | &num_clusters, &xv->xr_list, | ||
3471 | NULL); | ||
3472 | if (ret) { | ||
3473 | mlog_errno(ret); | ||
3474 | goto out; | ||
3475 | } | ||
3476 | |||
3477 | cpos += num_clusters; | ||
3478 | |||
3479 | while (num_clusters) { | ||
3480 | ret = ocfs2_get_refcount_rec(ref_ci, ref_root_bh, | ||
3481 | p_cluster, num_clusters, | ||
3482 | &rec, &index, | ||
3483 | &ref_leaf_bh); | ||
3484 | if (ret) { | ||
3485 | mlog_errno(ret); | ||
3486 | goto out; | ||
3487 | } | ||
3488 | |||
3489 | BUG_ON(!rec.r_refcount); | ||
3490 | |||
3491 | rb = (struct ocfs2_refcount_block *)ref_leaf_bh->b_data; | ||
3492 | |||
3493 | /* | ||
3494 | * We really don't know whether the other clusters is in | ||
3495 | * this refcount block or not, so just take the worst | ||
3496 | * case that all the clusters are in this block and each | ||
3497 | * one will split a refcount rec, so totally we need | ||
3498 | * clusters * 2 new refcount rec. | ||
3499 | */ | ||
3500 | if (le64_to_cpu(rb->rf_records.rl_used) + clusters * 2 > | ||
3501 | le16_to_cpu(rb->rf_records.rl_count)) | ||
3502 | ref_blocks++; | ||
3503 | |||
3504 | *credits += 1; | ||
3505 | brelse(ref_leaf_bh); | ||
3506 | ref_leaf_bh = NULL; | ||
3507 | |||
3508 | if (num_clusters <= le32_to_cpu(rec.r_clusters)) | ||
3509 | break; | ||
3510 | else | ||
3511 | num_clusters -= le32_to_cpu(rec.r_clusters); | ||
3512 | p_cluster += num_clusters; | ||
3513 | } | ||
3514 | } | ||
3515 | |||
3516 | *meta_add += ref_blocks; | ||
3517 | if (!ref_blocks) | ||
3518 | goto out; | ||
3519 | |||
3520 | rb = (struct ocfs2_refcount_block *)ref_root_bh->b_data; | ||
3521 | if (le32_to_cpu(rb->rf_flags) & OCFS2_REFCOUNT_TREE_FL) | ||
3522 | *credits += OCFS2_EXPAND_REFCOUNT_TREE_CREDITS; | ||
3523 | else { | ||
3524 | struct ocfs2_extent_tree et; | ||
3525 | |||
3526 | ocfs2_init_refcount_extent_tree(&et, ref_ci, ref_root_bh); | ||
3527 | *credits += ocfs2_calc_extend_credits(inode->i_sb, | ||
3528 | et.et_root_el, | ||
3529 | ref_blocks); | ||
3530 | } | ||
3531 | |||
3532 | out: | ||
3533 | brelse(ref_leaf_bh); | ||
3534 | return ret; | ||
3535 | } | ||
3536 | |||
3537 | /* | ||
3538 | * Do CoW for xattr. | ||
3539 | */ | ||
3540 | int ocfs2_refcount_cow_xattr(struct inode *inode, | ||
3541 | struct ocfs2_dinode *di, | ||
3542 | struct ocfs2_xattr_value_buf *vb, | ||
3543 | struct ocfs2_refcount_tree *ref_tree, | ||
3544 | struct buffer_head *ref_root_bh, | ||
3545 | u32 cpos, u32 write_len, | ||
3546 | struct ocfs2_post_refcount *post) | ||
3547 | { | ||
3548 | int ret; | ||
3549 | struct ocfs2_xattr_value_root *xv = vb->vb_xv; | ||
3550 | struct ocfs2_inode_info *oi = OCFS2_I(inode); | ||
3551 | struct ocfs2_cow_context *context = NULL; | ||
3552 | u32 cow_start, cow_len; | ||
3553 | |||
3554 | BUG_ON(!(oi->ip_dyn_features & OCFS2_HAS_REFCOUNT_FL)); | ||
3555 | |||
3556 | ret = ocfs2_refcount_cal_cow_clusters(inode, &xv->xr_list, | ||
3557 | cpos, write_len, UINT_MAX, | ||
3558 | &cow_start, &cow_len); | ||
3559 | if (ret) { | ||
3560 | mlog_errno(ret); | ||
3561 | goto out; | ||
3562 | } | ||
3563 | |||
3564 | BUG_ON(cow_len == 0); | ||
3565 | |||
3566 | context = kzalloc(sizeof(struct ocfs2_cow_context), GFP_NOFS); | ||
3567 | if (!context) { | ||
3568 | ret = -ENOMEM; | ||
3569 | mlog_errno(ret); | ||
3570 | goto out; | ||
3571 | } | ||
3572 | |||
3573 | context->inode = inode; | ||
3574 | context->cow_start = cow_start; | ||
3575 | context->cow_len = cow_len; | ||
3576 | context->ref_tree = ref_tree; | ||
3577 | context->ref_root_bh = ref_root_bh;; | ||
3578 | context->cow_object = xv; | ||
3579 | |||
3580 | context->cow_duplicate_clusters = ocfs2_duplicate_clusters_by_jbd; | ||
3581 | /* We need the extra credits for duplicate_clusters by jbd. */ | ||
3582 | context->extra_credits = | ||
3583 | ocfs2_clusters_to_blocks(inode->i_sb, 1) * cow_len; | ||
3584 | context->get_clusters = ocfs2_xattr_value_get_clusters; | ||
3585 | context->post_refcount = post; | ||
3586 | |||
3587 | ocfs2_init_xattr_value_extent_tree(&context->data_et, | ||
3588 | INODE_CACHE(inode), vb); | ||
3589 | |||
3590 | ret = ocfs2_replace_cow(context); | ||
3591 | if (ret) | ||
3592 | mlog_errno(ret); | ||
3593 | |||
3594 | out: | ||
3595 | kfree(context); | ||
3596 | return ret; | ||
3597 | } | ||
3598 | |||
3599 | /* | ||
3600 | * Insert a new extent into refcount tree and mark a extent rec | ||
3601 | * as refcounted in the dinode tree. | ||
3602 | */ | ||
3603 | int ocfs2_add_refcount_flag(struct inode *inode, | ||
3604 | struct ocfs2_extent_tree *data_et, | ||
3605 | struct ocfs2_caching_info *ref_ci, | ||
3606 | struct buffer_head *ref_root_bh, | ||
3607 | u32 cpos, u32 p_cluster, u32 num_clusters, | ||
3608 | struct ocfs2_cached_dealloc_ctxt *dealloc, | ||
3609 | struct ocfs2_post_refcount *post) | ||
3610 | { | ||
3611 | int ret; | ||
3612 | handle_t *handle; | ||
3613 | int credits = 1, ref_blocks = 0; | ||
3614 | struct ocfs2_super *osb = OCFS2_SB(inode->i_sb); | ||
3615 | struct ocfs2_alloc_context *meta_ac = NULL; | ||
3616 | |||
3617 | ret = ocfs2_calc_refcount_meta_credits(inode->i_sb, | ||
3618 | ref_ci, ref_root_bh, | ||
3619 | p_cluster, num_clusters, | ||
3620 | &ref_blocks, &credits); | ||
3621 | if (ret) { | ||
3622 | mlog_errno(ret); | ||
3623 | goto out; | ||
3624 | } | ||
3625 | |||
3626 | mlog(0, "reserve new metadata %d, credits = %d\n", | ||
3627 | ref_blocks, credits); | ||
3628 | |||
3629 | if (ref_blocks) { | ||
3630 | ret = ocfs2_reserve_new_metadata_blocks(OCFS2_SB(inode->i_sb), | ||
3631 | ref_blocks, &meta_ac); | ||
3632 | if (ret) { | ||
3633 | mlog_errno(ret); | ||
3634 | goto out; | ||
3635 | } | ||
3636 | } | ||
3637 | |||
3638 | if (post) | ||
3639 | credits += post->credits; | ||
3640 | |||
3641 | handle = ocfs2_start_trans(osb, credits); | ||
3642 | if (IS_ERR(handle)) { | ||
3643 | ret = PTR_ERR(handle); | ||
3644 | mlog_errno(ret); | ||
3645 | goto out; | ||
3646 | } | ||
3647 | |||
3648 | ret = ocfs2_mark_extent_refcounted(inode, data_et, handle, | ||
3649 | cpos, num_clusters, p_cluster, | ||
3650 | meta_ac, dealloc); | ||
3651 | if (ret) { | ||
3652 | mlog_errno(ret); | ||
3653 | goto out_commit; | ||
3654 | } | ||
3655 | |||
3656 | ret = __ocfs2_increase_refcount(handle, ref_ci, ref_root_bh, | ||
3657 | p_cluster, num_clusters, 0, | ||
3658 | meta_ac, dealloc); | ||
3659 | if (ret) { | ||
3660 | mlog_errno(ret); | ||
3661 | goto out_commit; | ||
3662 | } | ||
3663 | |||
3664 | if (post && post->func) { | ||
3665 | ret = post->func(inode, handle, post->para); | ||
3666 | if (ret) | ||
3667 | mlog_errno(ret); | ||
3668 | } | ||
3669 | |||
3670 | out_commit: | ||
3671 | ocfs2_commit_trans(osb, handle); | ||
3672 | out: | ||
3673 | if (meta_ac) | ||
3674 | ocfs2_free_alloc_context(meta_ac); | ||
3675 | return ret; | ||
3676 | } | ||
3677 | |||
3678 | static int ocfs2_change_ctime(struct inode *inode, | ||
3679 | struct buffer_head *di_bh) | ||
3680 | { | ||
3681 | int ret; | ||
3682 | handle_t *handle; | ||
3683 | struct ocfs2_dinode *di = (struct ocfs2_dinode *)di_bh->b_data; | ||
3684 | |||
3685 | handle = ocfs2_start_trans(OCFS2_SB(inode->i_sb), | ||
3686 | OCFS2_INODE_UPDATE_CREDITS); | ||
3687 | if (IS_ERR(handle)) { | ||
3688 | ret = PTR_ERR(handle); | ||
3689 | mlog_errno(ret); | ||
3690 | goto out; | ||
3691 | } | ||
3692 | |||
3693 | ret = ocfs2_journal_access_di(handle, INODE_CACHE(inode), di_bh, | ||
3694 | OCFS2_JOURNAL_ACCESS_WRITE); | ||
3695 | if (ret) { | ||
3696 | mlog_errno(ret); | ||
3697 | goto out_commit; | ||
3698 | } | ||
3699 | |||
3700 | inode->i_ctime = CURRENT_TIME; | ||
3701 | di->i_ctime = cpu_to_le64(inode->i_ctime.tv_sec); | ||
3702 | di->i_ctime_nsec = cpu_to_le32(inode->i_ctime.tv_nsec); | ||
3703 | |||
3704 | ocfs2_journal_dirty(handle, di_bh); | ||
3705 | |||
3706 | out_commit: | ||
3707 | ocfs2_commit_trans(OCFS2_SB(inode->i_sb), handle); | ||
3708 | out: | ||
3709 | return ret; | ||
3710 | } | ||
3711 | |||
3712 | static int ocfs2_attach_refcount_tree(struct inode *inode, | ||
3713 | struct buffer_head *di_bh) | ||
3714 | { | ||
3715 | int ret, data_changed = 0; | ||
3716 | struct buffer_head *ref_root_bh = NULL; | ||
3717 | struct ocfs2_inode_info *oi = OCFS2_I(inode); | ||
3718 | struct ocfs2_dinode *di = (struct ocfs2_dinode *)di_bh->b_data; | ||
3719 | struct ocfs2_super *osb = OCFS2_SB(inode->i_sb); | ||
3720 | struct ocfs2_refcount_tree *ref_tree; | ||
3721 | unsigned int ext_flags; | ||
3722 | loff_t size; | ||
3723 | u32 cpos, num_clusters, clusters, p_cluster; | ||
3724 | struct ocfs2_cached_dealloc_ctxt dealloc; | ||
3725 | struct ocfs2_extent_tree di_et; | ||
3726 | |||
3727 | ocfs2_init_dealloc_ctxt(&dealloc); | ||
3728 | |||
3729 | if (!(oi->ip_dyn_features & OCFS2_HAS_REFCOUNT_FL)) { | ||
3730 | ret = ocfs2_create_refcount_tree(inode, di_bh); | ||
3731 | if (ret) { | ||
3732 | mlog_errno(ret); | ||
3733 | goto out; | ||
3734 | } | ||
3735 | } | ||
3736 | |||
3737 | BUG_ON(!di->i_refcount_loc); | ||
3738 | ret = ocfs2_lock_refcount_tree(osb, | ||
3739 | le64_to_cpu(di->i_refcount_loc), 1, | ||
3740 | &ref_tree, &ref_root_bh); | ||
3741 | if (ret) { | ||
3742 | mlog_errno(ret); | ||
3743 | goto out; | ||
3744 | } | ||
3745 | |||
3746 | ocfs2_init_dinode_extent_tree(&di_et, INODE_CACHE(inode), di_bh); | ||
3747 | |||
3748 | size = i_size_read(inode); | ||
3749 | clusters = ocfs2_clusters_for_bytes(inode->i_sb, size); | ||
3750 | |||
3751 | cpos = 0; | ||
3752 | while (cpos < clusters) { | ||
3753 | ret = ocfs2_get_clusters(inode, cpos, &p_cluster, | ||
3754 | &num_clusters, &ext_flags); | ||
3755 | |||
3756 | if (p_cluster && !(ext_flags & OCFS2_EXT_REFCOUNTED)) { | ||
3757 | ret = ocfs2_add_refcount_flag(inode, &di_et, | ||
3758 | &ref_tree->rf_ci, | ||
3759 | ref_root_bh, cpos, | ||
3760 | p_cluster, num_clusters, | ||
3761 | &dealloc, NULL); | ||
3762 | if (ret) { | ||
3763 | mlog_errno(ret); | ||
3764 | goto unlock; | ||
3765 | } | ||
3766 | |||
3767 | data_changed = 1; | ||
3768 | } | ||
3769 | cpos += num_clusters; | ||
3770 | } | ||
3771 | |||
3772 | if (oi->ip_dyn_features & OCFS2_HAS_XATTR_FL) { | ||
3773 | ret = ocfs2_xattr_attach_refcount_tree(inode, di_bh, | ||
3774 | &ref_tree->rf_ci, | ||
3775 | ref_root_bh, | ||
3776 | &dealloc); | ||
3777 | if (ret) { | ||
3778 | mlog_errno(ret); | ||
3779 | goto unlock; | ||
3780 | } | ||
3781 | } | ||
3782 | |||
3783 | if (data_changed) { | ||
3784 | ret = ocfs2_change_ctime(inode, di_bh); | ||
3785 | if (ret) | ||
3786 | mlog_errno(ret); | ||
3787 | } | ||
3788 | |||
3789 | unlock: | ||
3790 | ocfs2_unlock_refcount_tree(osb, ref_tree, 1); | ||
3791 | brelse(ref_root_bh); | ||
3792 | |||
3793 | if (!ret && ocfs2_dealloc_has_cluster(&dealloc)) { | ||
3794 | ocfs2_schedule_truncate_log_flush(osb, 1); | ||
3795 | ocfs2_run_deallocs(osb, &dealloc); | ||
3796 | } | ||
3797 | out: | ||
3798 | /* | ||
3799 | * Empty the extent map so that we may get the right extent | ||
3800 | * record from the disk. | ||
3801 | */ | ||
3802 | ocfs2_extent_map_trunc(inode, 0); | ||
3803 | |||
3804 | return ret; | ||
3805 | } | ||
3806 | |||
3807 | static int ocfs2_add_refcounted_extent(struct inode *inode, | ||
3808 | struct ocfs2_extent_tree *et, | ||
3809 | struct ocfs2_caching_info *ref_ci, | ||
3810 | struct buffer_head *ref_root_bh, | ||
3811 | u32 cpos, u32 p_cluster, u32 num_clusters, | ||
3812 | unsigned int ext_flags, | ||
3813 | struct ocfs2_cached_dealloc_ctxt *dealloc) | ||
3814 | { | ||
3815 | int ret; | ||
3816 | handle_t *handle; | ||
3817 | int credits = 0; | ||
3818 | struct ocfs2_super *osb = OCFS2_SB(inode->i_sb); | ||
3819 | struct ocfs2_alloc_context *meta_ac = NULL; | ||
3820 | |||
3821 | ret = ocfs2_lock_refcount_allocators(inode->i_sb, | ||
3822 | p_cluster, num_clusters, | ||
3823 | et, ref_ci, | ||
3824 | ref_root_bh, &meta_ac, | ||
3825 | NULL, &credits); | ||
3826 | if (ret) { | ||
3827 | mlog_errno(ret); | ||
3828 | goto out; | ||
3829 | } | ||
3830 | |||
3831 | handle = ocfs2_start_trans(osb, credits); | ||
3832 | if (IS_ERR(handle)) { | ||
3833 | ret = PTR_ERR(handle); | ||
3834 | mlog_errno(ret); | ||
3835 | goto out; | ||
3836 | } | ||
3837 | |||
3838 | ret = ocfs2_insert_extent(handle, et, cpos, | ||
3839 | cpu_to_le64(ocfs2_clusters_to_blocks(inode->i_sb, | ||
3840 | p_cluster)), | ||
3841 | num_clusters, ext_flags, meta_ac); | ||
3842 | if (ret) { | ||
3843 | mlog_errno(ret); | ||
3844 | goto out_commit; | ||
3845 | } | ||
3846 | |||
3847 | ret = ocfs2_increase_refcount(handle, ref_ci, ref_root_bh, | ||
3848 | p_cluster, num_clusters, | ||
3849 | meta_ac, dealloc); | ||
3850 | if (ret) | ||
3851 | mlog_errno(ret); | ||
3852 | |||
3853 | out_commit: | ||
3854 | ocfs2_commit_trans(osb, handle); | ||
3855 | out: | ||
3856 | if (meta_ac) | ||
3857 | ocfs2_free_alloc_context(meta_ac); | ||
3858 | return ret; | ||
3859 | } | ||
3860 | |||
3861 | static int ocfs2_duplicate_extent_list(struct inode *s_inode, | ||
3862 | struct inode *t_inode, | ||
3863 | struct buffer_head *t_bh, | ||
3864 | struct ocfs2_caching_info *ref_ci, | ||
3865 | struct buffer_head *ref_root_bh, | ||
3866 | struct ocfs2_cached_dealloc_ctxt *dealloc) | ||
3867 | { | ||
3868 | int ret = 0; | ||
3869 | u32 p_cluster, num_clusters, clusters, cpos; | ||
3870 | loff_t size; | ||
3871 | unsigned int ext_flags; | ||
3872 | struct ocfs2_extent_tree et; | ||
3873 | |||
3874 | ocfs2_init_dinode_extent_tree(&et, INODE_CACHE(t_inode), t_bh); | ||
3875 | |||
3876 | size = i_size_read(s_inode); | ||
3877 | clusters = ocfs2_clusters_for_bytes(s_inode->i_sb, size); | ||
3878 | |||
3879 | cpos = 0; | ||
3880 | while (cpos < clusters) { | ||
3881 | ret = ocfs2_get_clusters(s_inode, cpos, &p_cluster, | ||
3882 | &num_clusters, &ext_flags); | ||
3883 | |||
3884 | if (p_cluster) { | ||
3885 | ret = ocfs2_add_refcounted_extent(t_inode, &et, | ||
3886 | ref_ci, ref_root_bh, | ||
3887 | cpos, p_cluster, | ||
3888 | num_clusters, | ||
3889 | ext_flags, | ||
3890 | dealloc); | ||
3891 | if (ret) { | ||
3892 | mlog_errno(ret); | ||
3893 | goto out; | ||
3894 | } | ||
3895 | } | ||
3896 | |||
3897 | cpos += num_clusters; | ||
3898 | } | ||
3899 | |||
3900 | out: | ||
3901 | return ret; | ||
3902 | } | ||
3903 | |||
3904 | /* | ||
3905 | * change the new file's attributes to the src. | ||
3906 | * | ||
3907 | * reflink creates a snapshot of a file, that means the attributes | ||
3908 | * must be identical except for three exceptions - nlink, ino, and ctime. | ||
3909 | */ | ||
3910 | static int ocfs2_complete_reflink(struct inode *s_inode, | ||
3911 | struct buffer_head *s_bh, | ||
3912 | struct inode *t_inode, | ||
3913 | struct buffer_head *t_bh, | ||
3914 | bool preserve) | ||
3915 | { | ||
3916 | int ret; | ||
3917 | handle_t *handle; | ||
3918 | struct ocfs2_dinode *s_di = (struct ocfs2_dinode *)s_bh->b_data; | ||
3919 | struct ocfs2_dinode *di = (struct ocfs2_dinode *)t_bh->b_data; | ||
3920 | loff_t size = i_size_read(s_inode); | ||
3921 | |||
3922 | handle = ocfs2_start_trans(OCFS2_SB(t_inode->i_sb), | ||
3923 | OCFS2_INODE_UPDATE_CREDITS); | ||
3924 | if (IS_ERR(handle)) { | ||
3925 | ret = PTR_ERR(handle); | ||
3926 | mlog_errno(ret); | ||
3927 | return ret; | ||
3928 | } | ||
3929 | |||
3930 | ret = ocfs2_journal_access_di(handle, INODE_CACHE(t_inode), t_bh, | ||
3931 | OCFS2_JOURNAL_ACCESS_WRITE); | ||
3932 | if (ret) { | ||
3933 | mlog_errno(ret); | ||
3934 | goto out_commit; | ||
3935 | } | ||
3936 | |||
3937 | spin_lock(&OCFS2_I(t_inode)->ip_lock); | ||
3938 | OCFS2_I(t_inode)->ip_clusters = OCFS2_I(s_inode)->ip_clusters; | ||
3939 | OCFS2_I(t_inode)->ip_attr = OCFS2_I(s_inode)->ip_attr; | ||
3940 | OCFS2_I(t_inode)->ip_dyn_features = OCFS2_I(s_inode)->ip_dyn_features; | ||
3941 | spin_unlock(&OCFS2_I(t_inode)->ip_lock); | ||
3942 | i_size_write(t_inode, size); | ||
3943 | |||
3944 | di->i_xattr_inline_size = s_di->i_xattr_inline_size; | ||
3945 | di->i_clusters = s_di->i_clusters; | ||
3946 | di->i_size = s_di->i_size; | ||
3947 | di->i_dyn_features = s_di->i_dyn_features; | ||
3948 | di->i_attr = s_di->i_attr; | ||
3949 | |||
3950 | if (preserve) { | ||
3951 | di->i_uid = s_di->i_uid; | ||
3952 | di->i_gid = s_di->i_gid; | ||
3953 | di->i_mode = s_di->i_mode; | ||
3954 | |||
3955 | /* | ||
3956 | * update time. | ||
3957 | * we want mtime to appear identical to the source and | ||
3958 | * update ctime. | ||
3959 | */ | ||
3960 | t_inode->i_ctime = CURRENT_TIME; | ||
3961 | |||
3962 | di->i_ctime = cpu_to_le64(t_inode->i_ctime.tv_sec); | ||
3963 | di->i_ctime_nsec = cpu_to_le32(t_inode->i_ctime.tv_nsec); | ||
3964 | |||
3965 | t_inode->i_mtime = s_inode->i_mtime; | ||
3966 | di->i_mtime = s_di->i_mtime; | ||
3967 | di->i_mtime_nsec = s_di->i_mtime_nsec; | ||
3968 | } | ||
3969 | |||
3970 | ocfs2_journal_dirty(handle, t_bh); | ||
3971 | |||
3972 | out_commit: | ||
3973 | ocfs2_commit_trans(OCFS2_SB(t_inode->i_sb), handle); | ||
3974 | return ret; | ||
3975 | } | ||
3976 | |||
3977 | static int ocfs2_create_reflink_node(struct inode *s_inode, | ||
3978 | struct buffer_head *s_bh, | ||
3979 | struct inode *t_inode, | ||
3980 | struct buffer_head *t_bh, | ||
3981 | bool preserve) | ||
3982 | { | ||
3983 | int ret; | ||
3984 | struct buffer_head *ref_root_bh = NULL; | ||
3985 | struct ocfs2_cached_dealloc_ctxt dealloc; | ||
3986 | struct ocfs2_super *osb = OCFS2_SB(s_inode->i_sb); | ||
3987 | struct ocfs2_refcount_block *rb; | ||
3988 | struct ocfs2_dinode *di = (struct ocfs2_dinode *)s_bh->b_data; | ||
3989 | struct ocfs2_refcount_tree *ref_tree; | ||
3990 | |||
3991 | ocfs2_init_dealloc_ctxt(&dealloc); | ||
3992 | |||
3993 | ret = ocfs2_set_refcount_tree(t_inode, t_bh, | ||
3994 | le64_to_cpu(di->i_refcount_loc)); | ||
3995 | if (ret) { | ||
3996 | mlog_errno(ret); | ||
3997 | goto out; | ||
3998 | } | ||
3999 | |||
4000 | ret = ocfs2_lock_refcount_tree(osb, le64_to_cpu(di->i_refcount_loc), | ||
4001 | 1, &ref_tree, &ref_root_bh); | ||
4002 | if (ret) { | ||
4003 | mlog_errno(ret); | ||
4004 | goto out; | ||
4005 | } | ||
4006 | rb = (struct ocfs2_refcount_block *)ref_root_bh->b_data; | ||
4007 | |||
4008 | ret = ocfs2_duplicate_extent_list(s_inode, t_inode, t_bh, | ||
4009 | &ref_tree->rf_ci, ref_root_bh, | ||
4010 | &dealloc); | ||
4011 | if (ret) { | ||
4012 | mlog_errno(ret); | ||
4013 | goto out_unlock_refcount; | ||
4014 | } | ||
4015 | |||
4016 | ret = ocfs2_complete_reflink(s_inode, s_bh, t_inode, t_bh, preserve); | ||
4017 | if (ret) | ||
4018 | mlog_errno(ret); | ||
4019 | |||
4020 | out_unlock_refcount: | ||
4021 | ocfs2_unlock_refcount_tree(osb, ref_tree, 1); | ||
4022 | brelse(ref_root_bh); | ||
4023 | out: | ||
4024 | if (ocfs2_dealloc_has_cluster(&dealloc)) { | ||
4025 | ocfs2_schedule_truncate_log_flush(osb, 1); | ||
4026 | ocfs2_run_deallocs(osb, &dealloc); | ||
4027 | } | ||
4028 | |||
4029 | return ret; | ||
4030 | } | ||
4031 | |||
4032 | static int __ocfs2_reflink(struct dentry *old_dentry, | ||
4033 | struct buffer_head *old_bh, | ||
4034 | struct inode *new_inode, | ||
4035 | bool preserve) | ||
4036 | { | ||
4037 | int ret; | ||
4038 | struct inode *inode = old_dentry->d_inode; | ||
4039 | struct buffer_head *new_bh = NULL; | ||
4040 | |||
4041 | ret = filemap_fdatawrite(inode->i_mapping); | ||
4042 | if (ret) { | ||
4043 | mlog_errno(ret); | ||
4044 | goto out; | ||
4045 | } | ||
4046 | |||
4047 | ret = ocfs2_attach_refcount_tree(inode, old_bh); | ||
4048 | if (ret) { | ||
4049 | mlog_errno(ret); | ||
4050 | goto out; | ||
4051 | } | ||
4052 | |||
4053 | mutex_lock(&new_inode->i_mutex); | ||
4054 | ret = ocfs2_inode_lock(new_inode, &new_bh, 1); | ||
4055 | if (ret) { | ||
4056 | mlog_errno(ret); | ||
4057 | goto out_unlock; | ||
4058 | } | ||
4059 | |||
4060 | ret = ocfs2_create_reflink_node(inode, old_bh, | ||
4061 | new_inode, new_bh, preserve); | ||
4062 | if (ret) { | ||
4063 | mlog_errno(ret); | ||
4064 | goto inode_unlock; | ||
4065 | } | ||
4066 | |||
4067 | if (OCFS2_I(inode)->ip_dyn_features & OCFS2_HAS_XATTR_FL) { | ||
4068 | ret = ocfs2_reflink_xattrs(inode, old_bh, | ||
4069 | new_inode, new_bh, | ||
4070 | preserve); | ||
4071 | if (ret) | ||
4072 | mlog_errno(ret); | ||
4073 | } | ||
4074 | inode_unlock: | ||
4075 | ocfs2_inode_unlock(new_inode, 1); | ||
4076 | brelse(new_bh); | ||
4077 | out_unlock: | ||
4078 | mutex_unlock(&new_inode->i_mutex); | ||
4079 | out: | ||
4080 | if (!ret) { | ||
4081 | ret = filemap_fdatawait(inode->i_mapping); | ||
4082 | if (ret) | ||
4083 | mlog_errno(ret); | ||
4084 | } | ||
4085 | return ret; | ||
4086 | } | ||
4087 | |||
4088 | static int ocfs2_reflink(struct dentry *old_dentry, struct inode *dir, | ||
4089 | struct dentry *new_dentry, bool preserve) | ||
4090 | { | ||
4091 | int error; | ||
4092 | struct inode *inode = old_dentry->d_inode; | ||
4093 | struct buffer_head *old_bh = NULL; | ||
4094 | struct inode *new_orphan_inode = NULL; | ||
4095 | |||
4096 | if (!ocfs2_refcount_tree(OCFS2_SB(inode->i_sb))) | ||
4097 | return -EOPNOTSUPP; | ||
4098 | |||
4099 | error = ocfs2_create_inode_in_orphan(dir, inode->i_mode, | ||
4100 | &new_orphan_inode); | ||
4101 | if (error) { | ||
4102 | mlog_errno(error); | ||
4103 | goto out; | ||
4104 | } | ||
4105 | |||
4106 | error = ocfs2_inode_lock(inode, &old_bh, 1); | ||
4107 | if (error) { | ||
4108 | mlog_errno(error); | ||
4109 | goto out; | ||
4110 | } | ||
4111 | |||
4112 | down_write(&OCFS2_I(inode)->ip_xattr_sem); | ||
4113 | down_write(&OCFS2_I(inode)->ip_alloc_sem); | ||
4114 | error = __ocfs2_reflink(old_dentry, old_bh, | ||
4115 | new_orphan_inode, preserve); | ||
4116 | up_write(&OCFS2_I(inode)->ip_alloc_sem); | ||
4117 | up_write(&OCFS2_I(inode)->ip_xattr_sem); | ||
4118 | |||
4119 | ocfs2_inode_unlock(inode, 1); | ||
4120 | brelse(old_bh); | ||
4121 | |||
4122 | if (error) { | ||
4123 | mlog_errno(error); | ||
4124 | goto out; | ||
4125 | } | ||
4126 | |||
4127 | /* If the security isn't preserved, we need to re-initialize them. */ | ||
4128 | if (!preserve) { | ||
4129 | error = ocfs2_init_security_and_acl(dir, new_orphan_inode); | ||
4130 | if (error) | ||
4131 | mlog_errno(error); | ||
4132 | } | ||
4133 | out: | ||
4134 | if (!error) { | ||
4135 | error = ocfs2_mv_orphaned_inode_to_new(dir, new_orphan_inode, | ||
4136 | new_dentry); | ||
4137 | if (error) | ||
4138 | mlog_errno(error); | ||
4139 | } | ||
4140 | |||
4141 | if (new_orphan_inode) { | ||
4142 | /* | ||
4143 | * We need to open_unlock the inode no matter whether we | ||
4144 | * succeed or not, so that other nodes can delete it later. | ||
4145 | */ | ||
4146 | ocfs2_open_unlock(new_orphan_inode); | ||
4147 | if (error) | ||
4148 | iput(new_orphan_inode); | ||
4149 | } | ||
4150 | |||
4151 | return error; | ||
4152 | } | ||
4153 | |||
4154 | /* | ||
4155 | * Below here are the bits used by OCFS2_IOC_REFLINK() to fake | ||
4156 | * sys_reflink(). This will go away when vfs_reflink() exists in | ||
4157 | * fs/namei.c. | ||
4158 | */ | ||
4159 | |||
4160 | /* copied from may_create in VFS. */ | ||
4161 | static inline int ocfs2_may_create(struct inode *dir, struct dentry *child) | ||
4162 | { | ||
4163 | if (child->d_inode) | ||
4164 | return -EEXIST; | ||
4165 | if (IS_DEADDIR(dir)) | ||
4166 | return -ENOENT; | ||
4167 | return inode_permission(dir, MAY_WRITE | MAY_EXEC); | ||
4168 | } | ||
4169 | |||
4170 | /* copied from user_path_parent. */ | ||
4171 | static int ocfs2_user_path_parent(const char __user *path, | ||
4172 | struct nameidata *nd, char **name) | ||
4173 | { | ||
4174 | char *s = getname(path); | ||
4175 | int error; | ||
4176 | |||
4177 | if (IS_ERR(s)) | ||
4178 | return PTR_ERR(s); | ||
4179 | |||
4180 | error = path_lookup(s, LOOKUP_PARENT, nd); | ||
4181 | if (error) | ||
4182 | putname(s); | ||
4183 | else | ||
4184 | *name = s; | ||
4185 | |||
4186 | return error; | ||
4187 | } | ||
4188 | |||
4189 | /** | ||
4190 | * ocfs2_vfs_reflink - Create a reference-counted link | ||
4191 | * | ||
4192 | * @old_dentry: source dentry + inode | ||
4193 | * @dir: directory to create the target | ||
4194 | * @new_dentry: target dentry | ||
4195 | * @preserve: if true, preserve all file attributes | ||
4196 | */ | ||
4197 | int ocfs2_vfs_reflink(struct dentry *old_dentry, struct inode *dir, | ||
4198 | struct dentry *new_dentry, bool preserve) | ||
4199 | { | ||
4200 | struct inode *inode = old_dentry->d_inode; | ||
4201 | int error; | ||
4202 | |||
4203 | if (!inode) | ||
4204 | return -ENOENT; | ||
4205 | |||
4206 | error = ocfs2_may_create(dir, new_dentry); | ||
4207 | if (error) | ||
4208 | return error; | ||
4209 | |||
4210 | if (dir->i_sb != inode->i_sb) | ||
4211 | return -EXDEV; | ||
4212 | |||
4213 | /* | ||
4214 | * A reflink to an append-only or immutable file cannot be created. | ||
4215 | */ | ||
4216 | if (IS_APPEND(inode) || IS_IMMUTABLE(inode)) | ||
4217 | return -EPERM; | ||
4218 | |||
4219 | /* Only regular files can be reflinked. */ | ||
4220 | if (!S_ISREG(inode->i_mode)) | ||
4221 | return -EPERM; | ||
4222 | |||
4223 | /* | ||
4224 | * If the caller wants to preserve ownership, they require the | ||
4225 | * rights to do so. | ||
4226 | */ | ||
4227 | if (preserve) { | ||
4228 | if ((current_fsuid() != inode->i_uid) && !capable(CAP_CHOWN)) | ||
4229 | return -EPERM; | ||
4230 | if (!in_group_p(inode->i_gid) && !capable(CAP_CHOWN)) | ||
4231 | return -EPERM; | ||
4232 | } | ||
4233 | |||
4234 | /* | ||
4235 | * If the caller is modifying any aspect of the attributes, they | ||
4236 | * are not creating a snapshot. They need read permission on the | ||
4237 | * file. | ||
4238 | */ | ||
4239 | if (!preserve) { | ||
4240 | error = inode_permission(inode, MAY_READ); | ||
4241 | if (error) | ||
4242 | return error; | ||
4243 | } | ||
4244 | |||
4245 | mutex_lock(&inode->i_mutex); | ||
4246 | vfs_dq_init(dir); | ||
4247 | error = ocfs2_reflink(old_dentry, dir, new_dentry, preserve); | ||
4248 | mutex_unlock(&inode->i_mutex); | ||
4249 | if (!error) | ||
4250 | fsnotify_create(dir, new_dentry); | ||
4251 | return error; | ||
4252 | } | ||
4253 | /* | ||
4254 | * Most codes are copied from sys_linkat. | ||
4255 | */ | ||
4256 | int ocfs2_reflink_ioctl(struct inode *inode, | ||
4257 | const char __user *oldname, | ||
4258 | const char __user *newname, | ||
4259 | bool preserve) | ||
4260 | { | ||
4261 | struct dentry *new_dentry; | ||
4262 | struct nameidata nd; | ||
4263 | struct path old_path; | ||
4264 | int error; | ||
4265 | char *to = NULL; | ||
4266 | |||
4267 | if (!ocfs2_refcount_tree(OCFS2_SB(inode->i_sb))) | ||
4268 | return -EOPNOTSUPP; | ||
4269 | |||
4270 | error = user_path_at(AT_FDCWD, oldname, 0, &old_path); | ||
4271 | if (error) { | ||
4272 | mlog_errno(error); | ||
4273 | return error; | ||
4274 | } | ||
4275 | |||
4276 | error = ocfs2_user_path_parent(newname, &nd, &to); | ||
4277 | if (error) { | ||
4278 | mlog_errno(error); | ||
4279 | goto out; | ||
4280 | } | ||
4281 | |||
4282 | error = -EXDEV; | ||
4283 | if (old_path.mnt != nd.path.mnt) | ||
4284 | goto out_release; | ||
4285 | new_dentry = lookup_create(&nd, 0); | ||
4286 | error = PTR_ERR(new_dentry); | ||
4287 | if (IS_ERR(new_dentry)) { | ||
4288 | mlog_errno(error); | ||
4289 | goto out_unlock; | ||
4290 | } | ||
4291 | |||
4292 | error = mnt_want_write(nd.path.mnt); | ||
4293 | if (error) { | ||
4294 | mlog_errno(error); | ||
4295 | goto out_dput; | ||
4296 | } | ||
4297 | |||
4298 | error = ocfs2_vfs_reflink(old_path.dentry, | ||
4299 | nd.path.dentry->d_inode, | ||
4300 | new_dentry, preserve); | ||
4301 | mnt_drop_write(nd.path.mnt); | ||
4302 | out_dput: | ||
4303 | dput(new_dentry); | ||
4304 | out_unlock: | ||
4305 | mutex_unlock(&nd.path.dentry->d_inode->i_mutex); | ||
4306 | out_release: | ||
4307 | path_put(&nd.path); | ||
4308 | putname(to); | ||
4309 | out: | ||
4310 | path_put(&old_path); | ||
4311 | |||
4312 | return error; | ||
4313 | } | ||
diff --git a/fs/ocfs2/refcounttree.h b/fs/ocfs2/refcounttree.h new file mode 100644 index 000000000000..c1d19b1d3ecc --- /dev/null +++ b/fs/ocfs2/refcounttree.h | |||
@@ -0,0 +1,106 @@ | |||
1 | /* -*- mode: c; c-basic-offset: 8; -*- | ||
2 | * vim: noexpandtab sw=8 ts=8 sts=0: | ||
3 | * | ||
4 | * refcounttree.h | ||
5 | * | ||
6 | * Copyright (C) 2009 Oracle. All rights reserved. | ||
7 | * | ||
8 | * This program is free software; you can redistribute it and/or | ||
9 | * modify it under the terms of the GNU General Public | ||
10 | * License version 2 as published by the Free Software Foundation. | ||
11 | * | ||
12 | * This program is distributed in the hope that it will be useful, | ||
13 | * but WITHOUT ANY WARRANTY; without even the implied warranty of | ||
14 | * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU | ||
15 | * General Public License for more details. | ||
16 | */ | ||
17 | #ifndef OCFS2_REFCOUNTTREE_H | ||
18 | #define OCFS2_REFCOUNTTREE_H | ||
19 | |||
20 | struct ocfs2_refcount_tree { | ||
21 | struct rb_node rf_node; | ||
22 | u64 rf_blkno; | ||
23 | u32 rf_generation; | ||
24 | struct rw_semaphore rf_sem; | ||
25 | struct ocfs2_lock_res rf_lockres; | ||
26 | struct kref rf_getcnt; | ||
27 | int rf_removed; | ||
28 | |||
29 | /* the following 4 fields are used by caching_info. */ | ||
30 | struct ocfs2_caching_info rf_ci; | ||
31 | spinlock_t rf_lock; | ||
32 | struct mutex rf_io_mutex; | ||
33 | struct super_block *rf_sb; | ||
34 | }; | ||
35 | |||
36 | void ocfs2_purge_refcount_trees(struct ocfs2_super *osb); | ||
37 | int ocfs2_lock_refcount_tree(struct ocfs2_super *osb, u64 ref_blkno, int rw, | ||
38 | struct ocfs2_refcount_tree **tree, | ||
39 | struct buffer_head **ref_bh); | ||
40 | void ocfs2_unlock_refcount_tree(struct ocfs2_super *osb, | ||
41 | struct ocfs2_refcount_tree *tree, | ||
42 | int rw); | ||
43 | |||
44 | int ocfs2_decrease_refcount(struct inode *inode, | ||
45 | handle_t *handle, u32 cpos, u32 len, | ||
46 | struct ocfs2_alloc_context *meta_ac, | ||
47 | struct ocfs2_cached_dealloc_ctxt *dealloc, | ||
48 | int delete); | ||
49 | int ocfs2_prepare_refcount_change_for_del(struct inode *inode, | ||
50 | struct buffer_head *di_bh, | ||
51 | u64 phys_blkno, | ||
52 | u32 clusters, | ||
53 | int *credits, | ||
54 | struct ocfs2_alloc_context **meta_ac); | ||
55 | int ocfs2_refcount_cow(struct inode *inode, struct buffer_head *di_bh, | ||
56 | u32 cpos, u32 write_len, u32 max_cpos); | ||
57 | |||
58 | typedef int (ocfs2_post_refcount_func)(struct inode *inode, | ||
59 | handle_t *handle, | ||
60 | void *para); | ||
61 | /* | ||
62 | * Some refcount caller need to do more work after we modify the data b-tree | ||
63 | * during refcount operation(including CoW and add refcount flag), and make the | ||
64 | * transaction complete. So it must give us this structure so that we can do it | ||
65 | * within our transaction. | ||
66 | * | ||
67 | */ | ||
68 | struct ocfs2_post_refcount { | ||
69 | int credits; /* credits it need for journal. */ | ||
70 | ocfs2_post_refcount_func *func; /* real function. */ | ||
71 | void *para; | ||
72 | }; | ||
73 | |||
74 | int ocfs2_refcounted_xattr_delete_need(struct inode *inode, | ||
75 | struct ocfs2_caching_info *ref_ci, | ||
76 | struct buffer_head *ref_root_bh, | ||
77 | struct ocfs2_xattr_value_root *xv, | ||
78 | int *meta_add, int *credits); | ||
79 | int ocfs2_refcount_cow_xattr(struct inode *inode, | ||
80 | struct ocfs2_dinode *di, | ||
81 | struct ocfs2_xattr_value_buf *vb, | ||
82 | struct ocfs2_refcount_tree *ref_tree, | ||
83 | struct buffer_head *ref_root_bh, | ||
84 | u32 cpos, u32 write_len, | ||
85 | struct ocfs2_post_refcount *post); | ||
86 | int ocfs2_add_refcount_flag(struct inode *inode, | ||
87 | struct ocfs2_extent_tree *data_et, | ||
88 | struct ocfs2_caching_info *ref_ci, | ||
89 | struct buffer_head *ref_root_bh, | ||
90 | u32 cpos, u32 p_cluster, u32 num_clusters, | ||
91 | struct ocfs2_cached_dealloc_ctxt *dealloc, | ||
92 | struct ocfs2_post_refcount *post); | ||
93 | int ocfs2_remove_refcount_tree(struct inode *inode, struct buffer_head *di_bh); | ||
94 | int ocfs2_try_remove_refcount_tree(struct inode *inode, | ||
95 | struct buffer_head *di_bh); | ||
96 | int ocfs2_increase_refcount(handle_t *handle, | ||
97 | struct ocfs2_caching_info *ci, | ||
98 | struct buffer_head *ref_root_bh, | ||
99 | u64 cpos, u32 len, | ||
100 | struct ocfs2_alloc_context *meta_ac, | ||
101 | struct ocfs2_cached_dealloc_ctxt *dealloc); | ||
102 | int ocfs2_reflink_ioctl(struct inode *inode, | ||
103 | const char __user *oldname, | ||
104 | const char __user *newname, | ||
105 | bool preserve); | ||
106 | #endif /* OCFS2_REFCOUNTTREE_H */ | ||
diff --git a/fs/ocfs2/resize.c b/fs/ocfs2/resize.c index 424adaa5f900..3c3d673a4d20 100644 --- a/fs/ocfs2/resize.c +++ b/fs/ocfs2/resize.c | |||
@@ -106,8 +106,8 @@ static int ocfs2_update_last_group_and_inode(handle_t *handle, | |||
106 | mlog_entry("(new_clusters=%d, first_new_cluster = %u)\n", | 106 | mlog_entry("(new_clusters=%d, first_new_cluster = %u)\n", |
107 | new_clusters, first_new_cluster); | 107 | new_clusters, first_new_cluster); |
108 | 108 | ||
109 | ret = ocfs2_journal_access_gd(handle, bm_inode, group_bh, | 109 | ret = ocfs2_journal_access_gd(handle, INODE_CACHE(bm_inode), |
110 | OCFS2_JOURNAL_ACCESS_WRITE); | 110 | group_bh, OCFS2_JOURNAL_ACCESS_WRITE); |
111 | if (ret < 0) { | 111 | if (ret < 0) { |
112 | mlog_errno(ret); | 112 | mlog_errno(ret); |
113 | goto out; | 113 | goto out; |
@@ -141,7 +141,7 @@ static int ocfs2_update_last_group_and_inode(handle_t *handle, | |||
141 | } | 141 | } |
142 | 142 | ||
143 | /* update the inode accordingly. */ | 143 | /* update the inode accordingly. */ |
144 | ret = ocfs2_journal_access_di(handle, bm_inode, bm_bh, | 144 | ret = ocfs2_journal_access_di(handle, INODE_CACHE(bm_inode), bm_bh, |
145 | OCFS2_JOURNAL_ACCESS_WRITE); | 145 | OCFS2_JOURNAL_ACCESS_WRITE); |
146 | if (ret < 0) { | 146 | if (ret < 0) { |
147 | mlog_errno(ret); | 147 | mlog_errno(ret); |
@@ -514,7 +514,7 @@ int ocfs2_group_add(struct inode *inode, struct ocfs2_new_group_input *input) | |||
514 | goto out_unlock; | 514 | goto out_unlock; |
515 | } | 515 | } |
516 | 516 | ||
517 | ocfs2_set_new_buffer_uptodate(inode, group_bh); | 517 | ocfs2_set_new_buffer_uptodate(INODE_CACHE(inode), group_bh); |
518 | 518 | ||
519 | ret = ocfs2_verify_group_and_input(main_bm_inode, fe, input, group_bh); | 519 | ret = ocfs2_verify_group_and_input(main_bm_inode, fe, input, group_bh); |
520 | if (ret) { | 520 | if (ret) { |
@@ -536,8 +536,8 @@ int ocfs2_group_add(struct inode *inode, struct ocfs2_new_group_input *input) | |||
536 | cl = &fe->id2.i_chain; | 536 | cl = &fe->id2.i_chain; |
537 | cr = &cl->cl_recs[input->chain]; | 537 | cr = &cl->cl_recs[input->chain]; |
538 | 538 | ||
539 | ret = ocfs2_journal_access_gd(handle, main_bm_inode, group_bh, | 539 | ret = ocfs2_journal_access_gd(handle, INODE_CACHE(main_bm_inode), |
540 | OCFS2_JOURNAL_ACCESS_WRITE); | 540 | group_bh, OCFS2_JOURNAL_ACCESS_WRITE); |
541 | if (ret < 0) { | 541 | if (ret < 0) { |
542 | mlog_errno(ret); | 542 | mlog_errno(ret); |
543 | goto out_commit; | 543 | goto out_commit; |
@@ -552,8 +552,8 @@ int ocfs2_group_add(struct inode *inode, struct ocfs2_new_group_input *input) | |||
552 | goto out_commit; | 552 | goto out_commit; |
553 | } | 553 | } |
554 | 554 | ||
555 | ret = ocfs2_journal_access_di(handle, main_bm_inode, main_bm_bh, | 555 | ret = ocfs2_journal_access_di(handle, INODE_CACHE(main_bm_inode), |
556 | OCFS2_JOURNAL_ACCESS_WRITE); | 556 | main_bm_bh, OCFS2_JOURNAL_ACCESS_WRITE); |
557 | if (ret < 0) { | 557 | if (ret < 0) { |
558 | mlog_errno(ret); | 558 | mlog_errno(ret); |
559 | goto out_commit; | 559 | goto out_commit; |
diff --git a/fs/ocfs2/slot_map.c b/fs/ocfs2/slot_map.c index 40661e7824e9..bfbd7e9e949f 100644 --- a/fs/ocfs2/slot_map.c +++ b/fs/ocfs2/slot_map.c | |||
@@ -150,8 +150,8 @@ int ocfs2_refresh_slot_info(struct ocfs2_super *osb) | |||
150 | * be !NULL. Thus, ocfs2_read_blocks() will ignore blocknr. If | 150 | * be !NULL. Thus, ocfs2_read_blocks() will ignore blocknr. If |
151 | * this is not true, the read of -1 (UINT64_MAX) will fail. | 151 | * this is not true, the read of -1 (UINT64_MAX) will fail. |
152 | */ | 152 | */ |
153 | ret = ocfs2_read_blocks(si->si_inode, -1, si->si_blocks, si->si_bh, | 153 | ret = ocfs2_read_blocks(INODE_CACHE(si->si_inode), -1, si->si_blocks, |
154 | OCFS2_BH_IGNORE_CACHE, NULL); | 154 | si->si_bh, OCFS2_BH_IGNORE_CACHE, NULL); |
155 | if (ret == 0) { | 155 | if (ret == 0) { |
156 | spin_lock(&osb->osb_lock); | 156 | spin_lock(&osb->osb_lock); |
157 | ocfs2_update_slot_info(si); | 157 | ocfs2_update_slot_info(si); |
@@ -213,7 +213,7 @@ static int ocfs2_update_disk_slot(struct ocfs2_super *osb, | |||
213 | ocfs2_update_disk_slot_old(si, slot_num, &bh); | 213 | ocfs2_update_disk_slot_old(si, slot_num, &bh); |
214 | spin_unlock(&osb->osb_lock); | 214 | spin_unlock(&osb->osb_lock); |
215 | 215 | ||
216 | status = ocfs2_write_block(osb, bh, si->si_inode); | 216 | status = ocfs2_write_block(osb, bh, INODE_CACHE(si->si_inode)); |
217 | if (status < 0) | 217 | if (status < 0) |
218 | mlog_errno(status); | 218 | mlog_errno(status); |
219 | 219 | ||
@@ -404,8 +404,8 @@ static int ocfs2_map_slot_buffers(struct ocfs2_super *osb, | |||
404 | (unsigned long long)blkno); | 404 | (unsigned long long)blkno); |
405 | 405 | ||
406 | bh = NULL; /* Acquire a fresh bh */ | 406 | bh = NULL; /* Acquire a fresh bh */ |
407 | status = ocfs2_read_blocks(si->si_inode, blkno, 1, &bh, | 407 | status = ocfs2_read_blocks(INODE_CACHE(si->si_inode), blkno, |
408 | OCFS2_BH_IGNORE_CACHE, NULL); | 408 | 1, &bh, OCFS2_BH_IGNORE_CACHE, NULL); |
409 | if (status < 0) { | 409 | if (status < 0) { |
410 | mlog_errno(status); | 410 | mlog_errno(status); |
411 | goto bail; | 411 | goto bail; |
diff --git a/fs/ocfs2/suballoc.c b/fs/ocfs2/suballoc.c index 73a16d4666dc..c30b644d9572 100644 --- a/fs/ocfs2/suballoc.c +++ b/fs/ocfs2/suballoc.c | |||
@@ -310,7 +310,7 @@ int ocfs2_read_group_descriptor(struct inode *inode, struct ocfs2_dinode *di, | |||
310 | int rc; | 310 | int rc; |
311 | struct buffer_head *tmp = *bh; | 311 | struct buffer_head *tmp = *bh; |
312 | 312 | ||
313 | rc = ocfs2_read_block(inode, gd_blkno, &tmp, | 313 | rc = ocfs2_read_block(INODE_CACHE(inode), gd_blkno, &tmp, |
314 | ocfs2_validate_group_descriptor); | 314 | ocfs2_validate_group_descriptor); |
315 | if (rc) | 315 | if (rc) |
316 | goto out; | 316 | goto out; |
@@ -352,7 +352,7 @@ static int ocfs2_block_group_fill(handle_t *handle, | |||
352 | } | 352 | } |
353 | 353 | ||
354 | status = ocfs2_journal_access_gd(handle, | 354 | status = ocfs2_journal_access_gd(handle, |
355 | alloc_inode, | 355 | INODE_CACHE(alloc_inode), |
356 | bg_bh, | 356 | bg_bh, |
357 | OCFS2_JOURNAL_ACCESS_CREATE); | 357 | OCFS2_JOURNAL_ACCESS_CREATE); |
358 | if (status < 0) { | 358 | if (status < 0) { |
@@ -476,7 +476,7 @@ static int ocfs2_block_group_alloc(struct ocfs2_super *osb, | |||
476 | mlog_errno(status); | 476 | mlog_errno(status); |
477 | goto bail; | 477 | goto bail; |
478 | } | 478 | } |
479 | ocfs2_set_new_buffer_uptodate(alloc_inode, bg_bh); | 479 | ocfs2_set_new_buffer_uptodate(INODE_CACHE(alloc_inode), bg_bh); |
480 | 480 | ||
481 | status = ocfs2_block_group_fill(handle, | 481 | status = ocfs2_block_group_fill(handle, |
482 | alloc_inode, | 482 | alloc_inode, |
@@ -491,7 +491,7 @@ static int ocfs2_block_group_alloc(struct ocfs2_super *osb, | |||
491 | 491 | ||
492 | bg = (struct ocfs2_group_desc *) bg_bh->b_data; | 492 | bg = (struct ocfs2_group_desc *) bg_bh->b_data; |
493 | 493 | ||
494 | status = ocfs2_journal_access_di(handle, alloc_inode, | 494 | status = ocfs2_journal_access_di(handle, INODE_CACHE(alloc_inode), |
495 | bh, OCFS2_JOURNAL_ACCESS_WRITE); | 495 | bh, OCFS2_JOURNAL_ACCESS_WRITE); |
496 | if (status < 0) { | 496 | if (status < 0) { |
497 | mlog_errno(status); | 497 | mlog_errno(status); |
@@ -1033,7 +1033,7 @@ static inline int ocfs2_block_group_set_bits(handle_t *handle, | |||
1033 | journal_type = OCFS2_JOURNAL_ACCESS_UNDO; | 1033 | journal_type = OCFS2_JOURNAL_ACCESS_UNDO; |
1034 | 1034 | ||
1035 | status = ocfs2_journal_access_gd(handle, | 1035 | status = ocfs2_journal_access_gd(handle, |
1036 | alloc_inode, | 1036 | INODE_CACHE(alloc_inode), |
1037 | group_bh, | 1037 | group_bh, |
1038 | journal_type); | 1038 | journal_type); |
1039 | if (status < 0) { | 1039 | if (status < 0) { |
@@ -1106,7 +1106,8 @@ static int ocfs2_relink_block_group(handle_t *handle, | |||
1106 | bg_ptr = le64_to_cpu(bg->bg_next_group); | 1106 | bg_ptr = le64_to_cpu(bg->bg_next_group); |
1107 | prev_bg_ptr = le64_to_cpu(prev_bg->bg_next_group); | 1107 | prev_bg_ptr = le64_to_cpu(prev_bg->bg_next_group); |
1108 | 1108 | ||
1109 | status = ocfs2_journal_access_gd(handle, alloc_inode, prev_bg_bh, | 1109 | status = ocfs2_journal_access_gd(handle, INODE_CACHE(alloc_inode), |
1110 | prev_bg_bh, | ||
1110 | OCFS2_JOURNAL_ACCESS_WRITE); | 1111 | OCFS2_JOURNAL_ACCESS_WRITE); |
1111 | if (status < 0) { | 1112 | if (status < 0) { |
1112 | mlog_errno(status); | 1113 | mlog_errno(status); |
@@ -1121,8 +1122,8 @@ static int ocfs2_relink_block_group(handle_t *handle, | |||
1121 | goto out_rollback; | 1122 | goto out_rollback; |
1122 | } | 1123 | } |
1123 | 1124 | ||
1124 | status = ocfs2_journal_access_gd(handle, alloc_inode, bg_bh, | 1125 | status = ocfs2_journal_access_gd(handle, INODE_CACHE(alloc_inode), |
1125 | OCFS2_JOURNAL_ACCESS_WRITE); | 1126 | bg_bh, OCFS2_JOURNAL_ACCESS_WRITE); |
1126 | if (status < 0) { | 1127 | if (status < 0) { |
1127 | mlog_errno(status); | 1128 | mlog_errno(status); |
1128 | goto out_rollback; | 1129 | goto out_rollback; |
@@ -1136,8 +1137,8 @@ static int ocfs2_relink_block_group(handle_t *handle, | |||
1136 | goto out_rollback; | 1137 | goto out_rollback; |
1137 | } | 1138 | } |
1138 | 1139 | ||
1139 | status = ocfs2_journal_access_di(handle, alloc_inode, fe_bh, | 1140 | status = ocfs2_journal_access_di(handle, INODE_CACHE(alloc_inode), |
1140 | OCFS2_JOURNAL_ACCESS_WRITE); | 1141 | fe_bh, OCFS2_JOURNAL_ACCESS_WRITE); |
1141 | if (status < 0) { | 1142 | if (status < 0) { |
1142 | mlog_errno(status); | 1143 | mlog_errno(status); |
1143 | goto out_rollback; | 1144 | goto out_rollback; |
@@ -1288,7 +1289,7 @@ static int ocfs2_alloc_dinode_update_counts(struct inode *inode, | |||
1288 | struct ocfs2_dinode *di = (struct ocfs2_dinode *) di_bh->b_data; | 1289 | struct ocfs2_dinode *di = (struct ocfs2_dinode *) di_bh->b_data; |
1289 | struct ocfs2_chain_list *cl = (struct ocfs2_chain_list *) &di->id2.i_chain; | 1290 | struct ocfs2_chain_list *cl = (struct ocfs2_chain_list *) &di->id2.i_chain; |
1290 | 1291 | ||
1291 | ret = ocfs2_journal_access_di(handle, inode, di_bh, | 1292 | ret = ocfs2_journal_access_di(handle, INODE_CACHE(inode), di_bh, |
1292 | OCFS2_JOURNAL_ACCESS_WRITE); | 1293 | OCFS2_JOURNAL_ACCESS_WRITE); |
1293 | if (ret < 0) { | 1294 | if (ret < 0) { |
1294 | mlog_errno(ret); | 1295 | mlog_errno(ret); |
@@ -1461,7 +1462,7 @@ static int ocfs2_search_chain(struct ocfs2_alloc_context *ac, | |||
1461 | /* Ok, claim our bits now: set the info on dinode, chainlist | 1462 | /* Ok, claim our bits now: set the info on dinode, chainlist |
1462 | * and then the group */ | 1463 | * and then the group */ |
1463 | status = ocfs2_journal_access_di(handle, | 1464 | status = ocfs2_journal_access_di(handle, |
1464 | alloc_inode, | 1465 | INODE_CACHE(alloc_inode), |
1465 | ac->ac_bh, | 1466 | ac->ac_bh, |
1466 | OCFS2_JOURNAL_ACCESS_WRITE); | 1467 | OCFS2_JOURNAL_ACCESS_WRITE); |
1467 | if (status < 0) { | 1468 | if (status < 0) { |
@@ -1907,8 +1908,8 @@ static inline int ocfs2_block_group_clear_bits(handle_t *handle, | |||
1907 | if (ocfs2_is_cluster_bitmap(alloc_inode)) | 1908 | if (ocfs2_is_cluster_bitmap(alloc_inode)) |
1908 | journal_type = OCFS2_JOURNAL_ACCESS_UNDO; | 1909 | journal_type = OCFS2_JOURNAL_ACCESS_UNDO; |
1909 | 1910 | ||
1910 | status = ocfs2_journal_access_gd(handle, alloc_inode, group_bh, | 1911 | status = ocfs2_journal_access_gd(handle, INODE_CACHE(alloc_inode), |
1911 | journal_type); | 1912 | group_bh, journal_type); |
1912 | if (status < 0) { | 1913 | if (status < 0) { |
1913 | mlog_errno(status); | 1914 | mlog_errno(status); |
1914 | goto bail; | 1915 | goto bail; |
@@ -1993,8 +1994,8 @@ int ocfs2_free_suballoc_bits(handle_t *handle, | |||
1993 | goto bail; | 1994 | goto bail; |
1994 | } | 1995 | } |
1995 | 1996 | ||
1996 | status = ocfs2_journal_access_di(handle, alloc_inode, alloc_bh, | 1997 | status = ocfs2_journal_access_di(handle, INODE_CACHE(alloc_inode), |
1997 | OCFS2_JOURNAL_ACCESS_WRITE); | 1998 | alloc_bh, OCFS2_JOURNAL_ACCESS_WRITE); |
1998 | if (status < 0) { | 1999 | if (status < 0) { |
1999 | mlog_errno(status); | 2000 | mlog_errno(status); |
2000 | goto bail; | 2001 | goto bail; |
@@ -2151,7 +2152,7 @@ int ocfs2_lock_allocators(struct inode *inode, | |||
2151 | 2152 | ||
2152 | BUG_ON(clusters_to_add != 0 && data_ac == NULL); | 2153 | BUG_ON(clusters_to_add != 0 && data_ac == NULL); |
2153 | 2154 | ||
2154 | num_free_extents = ocfs2_num_free_extents(osb, inode, et); | 2155 | num_free_extents = ocfs2_num_free_extents(osb, et); |
2155 | if (num_free_extents < 0) { | 2156 | if (num_free_extents < 0) { |
2156 | ret = num_free_extents; | 2157 | ret = num_free_extents; |
2157 | mlog_errno(ret); | 2158 | mlog_errno(ret); |
diff --git a/fs/ocfs2/super.c b/fs/ocfs2/super.c index a3f8871d21fd..4cc3c890a2cd 100644 --- a/fs/ocfs2/super.c +++ b/fs/ocfs2/super.c | |||
@@ -28,7 +28,6 @@ | |||
28 | #include <linux/types.h> | 28 | #include <linux/types.h> |
29 | #include <linux/slab.h> | 29 | #include <linux/slab.h> |
30 | #include <linux/highmem.h> | 30 | #include <linux/highmem.h> |
31 | #include <linux/utsname.h> | ||
32 | #include <linux/init.h> | 31 | #include <linux/init.h> |
33 | #include <linux/random.h> | 32 | #include <linux/random.h> |
34 | #include <linux/statfs.h> | 33 | #include <linux/statfs.h> |
@@ -69,6 +68,7 @@ | |||
69 | #include "ver.h" | 68 | #include "ver.h" |
70 | #include "xattr.h" | 69 | #include "xattr.h" |
71 | #include "quota.h" | 70 | #include "quota.h" |
71 | #include "refcounttree.h" | ||
72 | 72 | ||
73 | #include "buffer_head_io.h" | 73 | #include "buffer_head_io.h" |
74 | 74 | ||
@@ -965,7 +965,7 @@ static int ocfs2_quota_off(struct super_block *sb, int type, int remount) | |||
965 | return vfs_quota_disable(sb, type, DQUOT_LIMITS_ENABLED); | 965 | return vfs_quota_disable(sb, type, DQUOT_LIMITS_ENABLED); |
966 | } | 966 | } |
967 | 967 | ||
968 | static struct quotactl_ops ocfs2_quotactl_ops = { | 968 | static const struct quotactl_ops ocfs2_quotactl_ops = { |
969 | .quota_on = ocfs2_quota_on, | 969 | .quota_on = ocfs2_quota_on, |
970 | .quota_off = ocfs2_quota_off, | 970 | .quota_off = ocfs2_quota_off, |
971 | .quota_sync = vfs_quota_sync, | 971 | .quota_sync = vfs_quota_sync, |
@@ -1668,8 +1668,6 @@ static void ocfs2_inode_init_once(void *data) | |||
1668 | spin_lock_init(&oi->ip_lock); | 1668 | spin_lock_init(&oi->ip_lock); |
1669 | ocfs2_extent_map_init(&oi->vfs_inode); | 1669 | ocfs2_extent_map_init(&oi->vfs_inode); |
1670 | INIT_LIST_HEAD(&oi->ip_io_markers); | 1670 | INIT_LIST_HEAD(&oi->ip_io_markers); |
1671 | oi->ip_created_trans = 0; | ||
1672 | oi->ip_last_trans = 0; | ||
1673 | oi->ip_dir_start_lookup = 0; | 1671 | oi->ip_dir_start_lookup = 0; |
1674 | 1672 | ||
1675 | init_rwsem(&oi->ip_alloc_sem); | 1673 | init_rwsem(&oi->ip_alloc_sem); |
@@ -1683,7 +1681,8 @@ static void ocfs2_inode_init_once(void *data) | |||
1683 | ocfs2_lock_res_init_once(&oi->ip_inode_lockres); | 1681 | ocfs2_lock_res_init_once(&oi->ip_inode_lockres); |
1684 | ocfs2_lock_res_init_once(&oi->ip_open_lockres); | 1682 | ocfs2_lock_res_init_once(&oi->ip_open_lockres); |
1685 | 1683 | ||
1686 | ocfs2_metadata_cache_init(&oi->vfs_inode); | 1684 | ocfs2_metadata_cache_init(INODE_CACHE(&oi->vfs_inode), |
1685 | &ocfs2_inode_caching_ops); | ||
1687 | 1686 | ||
1688 | inode_init_once(&oi->vfs_inode); | 1687 | inode_init_once(&oi->vfs_inode); |
1689 | } | 1688 | } |
@@ -1859,6 +1858,8 @@ static void ocfs2_dismount_volume(struct super_block *sb, int mnt_err) | |||
1859 | 1858 | ||
1860 | ocfs2_sync_blockdev(sb); | 1859 | ocfs2_sync_blockdev(sb); |
1861 | 1860 | ||
1861 | ocfs2_purge_refcount_trees(osb); | ||
1862 | |||
1862 | /* No cluster connection means we've failed during mount, so skip | 1863 | /* No cluster connection means we've failed during mount, so skip |
1863 | * all the steps which depended on that to complete. */ | 1864 | * all the steps which depended on that to complete. */ |
1864 | if (osb->cconn) { | 1865 | if (osb->cconn) { |
@@ -2065,6 +2066,8 @@ static int ocfs2_initialize_super(struct super_block *sb, | |||
2065 | goto bail; | 2066 | goto bail; |
2066 | } | 2067 | } |
2067 | 2068 | ||
2069 | osb->osb_rf_lock_tree = RB_ROOT; | ||
2070 | |||
2068 | osb->s_feature_compat = | 2071 | osb->s_feature_compat = |
2069 | le32_to_cpu(OCFS2_RAW_SB(di)->s_feature_compat); | 2072 | le32_to_cpu(OCFS2_RAW_SB(di)->s_feature_compat); |
2070 | osb->s_feature_ro_compat = | 2073 | osb->s_feature_ro_compat = |
@@ -2490,7 +2493,8 @@ void __ocfs2_abort(struct super_block* sb, | |||
2490 | /* Force a panic(). This stinks, but it's better than letting | 2493 | /* Force a panic(). This stinks, but it's better than letting |
2491 | * things continue without having a proper hard readonly | 2494 | * things continue without having a proper hard readonly |
2492 | * here. */ | 2495 | * here. */ |
2493 | OCFS2_SB(sb)->s_mount_opt |= OCFS2_MOUNT_ERRORS_PANIC; | 2496 | if (!ocfs2_mount_local(OCFS2_SB(sb))) |
2497 | OCFS2_SB(sb)->s_mount_opt |= OCFS2_MOUNT_ERRORS_PANIC; | ||
2494 | ocfs2_handle_error(sb); | 2498 | ocfs2_handle_error(sb); |
2495 | } | 2499 | } |
2496 | 2500 | ||
diff --git a/fs/ocfs2/symlink.c b/fs/ocfs2/symlink.c index 579dd1b1110f..e3421030a69f 100644 --- a/fs/ocfs2/symlink.c +++ b/fs/ocfs2/symlink.c | |||
@@ -38,7 +38,6 @@ | |||
38 | #include <linux/types.h> | 38 | #include <linux/types.h> |
39 | #include <linux/slab.h> | 39 | #include <linux/slab.h> |
40 | #include <linux/pagemap.h> | 40 | #include <linux/pagemap.h> |
41 | #include <linux/utsname.h> | ||
42 | #include <linux/namei.h> | 41 | #include <linux/namei.h> |
43 | 42 | ||
44 | #define MLOG_MASK_PREFIX ML_NAMEI | 43 | #define MLOG_MASK_PREFIX ML_NAMEI |
diff --git a/fs/ocfs2/uptodate.c b/fs/ocfs2/uptodate.c index 187b99ff0368..b6284f235d2f 100644 --- a/fs/ocfs2/uptodate.c +++ b/fs/ocfs2/uptodate.c | |||
@@ -75,15 +75,77 @@ struct ocfs2_meta_cache_item { | |||
75 | 75 | ||
76 | static struct kmem_cache *ocfs2_uptodate_cachep = NULL; | 76 | static struct kmem_cache *ocfs2_uptodate_cachep = NULL; |
77 | 77 | ||
78 | void ocfs2_metadata_cache_init(struct inode *inode) | 78 | u64 ocfs2_metadata_cache_owner(struct ocfs2_caching_info *ci) |
79 | { | 79 | { |
80 | struct ocfs2_inode_info *oi = OCFS2_I(inode); | 80 | BUG_ON(!ci || !ci->ci_ops); |
81 | struct ocfs2_caching_info *ci = &oi->ip_metadata_cache; | ||
82 | 81 | ||
83 | oi->ip_flags |= OCFS2_INODE_CACHE_INLINE; | 82 | return ci->ci_ops->co_owner(ci); |
83 | } | ||
84 | |||
85 | struct super_block *ocfs2_metadata_cache_get_super(struct ocfs2_caching_info *ci) | ||
86 | { | ||
87 | BUG_ON(!ci || !ci->ci_ops); | ||
88 | |||
89 | return ci->ci_ops->co_get_super(ci); | ||
90 | } | ||
91 | |||
92 | static void ocfs2_metadata_cache_lock(struct ocfs2_caching_info *ci) | ||
93 | { | ||
94 | BUG_ON(!ci || !ci->ci_ops); | ||
95 | |||
96 | ci->ci_ops->co_cache_lock(ci); | ||
97 | } | ||
98 | |||
99 | static void ocfs2_metadata_cache_unlock(struct ocfs2_caching_info *ci) | ||
100 | { | ||
101 | BUG_ON(!ci || !ci->ci_ops); | ||
102 | |||
103 | ci->ci_ops->co_cache_unlock(ci); | ||
104 | } | ||
105 | |||
106 | void ocfs2_metadata_cache_io_lock(struct ocfs2_caching_info *ci) | ||
107 | { | ||
108 | BUG_ON(!ci || !ci->ci_ops); | ||
109 | |||
110 | ci->ci_ops->co_io_lock(ci); | ||
111 | } | ||
112 | |||
113 | void ocfs2_metadata_cache_io_unlock(struct ocfs2_caching_info *ci) | ||
114 | { | ||
115 | BUG_ON(!ci || !ci->ci_ops); | ||
116 | |||
117 | ci->ci_ops->co_io_unlock(ci); | ||
118 | } | ||
119 | |||
120 | |||
121 | static void ocfs2_metadata_cache_reset(struct ocfs2_caching_info *ci, | ||
122 | int clear) | ||
123 | { | ||
124 | ci->ci_flags |= OCFS2_CACHE_FL_INLINE; | ||
84 | ci->ci_num_cached = 0; | 125 | ci->ci_num_cached = 0; |
126 | |||
127 | if (clear) { | ||
128 | ci->ci_created_trans = 0; | ||
129 | ci->ci_last_trans = 0; | ||
130 | } | ||
131 | } | ||
132 | |||
133 | void ocfs2_metadata_cache_init(struct ocfs2_caching_info *ci, | ||
134 | const struct ocfs2_caching_operations *ops) | ||
135 | { | ||
136 | BUG_ON(!ops); | ||
137 | |||
138 | ci->ci_ops = ops; | ||
139 | ocfs2_metadata_cache_reset(ci, 1); | ||
85 | } | 140 | } |
86 | 141 | ||
142 | void ocfs2_metadata_cache_exit(struct ocfs2_caching_info *ci) | ||
143 | { | ||
144 | ocfs2_metadata_cache_purge(ci); | ||
145 | ocfs2_metadata_cache_reset(ci, 1); | ||
146 | } | ||
147 | |||
148 | |||
87 | /* No lock taken here as 'root' is not expected to be visible to other | 149 | /* No lock taken here as 'root' is not expected to be visible to other |
88 | * processes. */ | 150 | * processes. */ |
89 | static unsigned int ocfs2_purge_copied_metadata_tree(struct rb_root *root) | 151 | static unsigned int ocfs2_purge_copied_metadata_tree(struct rb_root *root) |
@@ -112,19 +174,20 @@ static unsigned int ocfs2_purge_copied_metadata_tree(struct rb_root *root) | |||
112 | * This function is a few more lines longer than necessary due to some | 174 | * This function is a few more lines longer than necessary due to some |
113 | * accounting done here, but I think it's worth tracking down those | 175 | * accounting done here, but I think it's worth tracking down those |
114 | * bugs sooner -- Mark */ | 176 | * bugs sooner -- Mark */ |
115 | void ocfs2_metadata_cache_purge(struct inode *inode) | 177 | void ocfs2_metadata_cache_purge(struct ocfs2_caching_info *ci) |
116 | { | 178 | { |
117 | struct ocfs2_inode_info *oi = OCFS2_I(inode); | ||
118 | unsigned int tree, to_purge, purged; | 179 | unsigned int tree, to_purge, purged; |
119 | struct ocfs2_caching_info *ci = &oi->ip_metadata_cache; | ||
120 | struct rb_root root = RB_ROOT; | 180 | struct rb_root root = RB_ROOT; |
121 | 181 | ||
122 | spin_lock(&oi->ip_lock); | 182 | BUG_ON(!ci || !ci->ci_ops); |
123 | tree = !(oi->ip_flags & OCFS2_INODE_CACHE_INLINE); | 183 | |
184 | ocfs2_metadata_cache_lock(ci); | ||
185 | tree = !(ci->ci_flags & OCFS2_CACHE_FL_INLINE); | ||
124 | to_purge = ci->ci_num_cached; | 186 | to_purge = ci->ci_num_cached; |
125 | 187 | ||
126 | mlog(0, "Purge %u %s items from Inode %llu\n", to_purge, | 188 | mlog(0, "Purge %u %s items from Owner %llu\n", to_purge, |
127 | tree ? "array" : "tree", (unsigned long long)oi->ip_blkno); | 189 | tree ? "array" : "tree", |
190 | (unsigned long long)ocfs2_metadata_cache_owner(ci)); | ||
128 | 191 | ||
129 | /* If we're a tree, save off the root so that we can safely | 192 | /* If we're a tree, save off the root so that we can safely |
130 | * initialize the cache. We do the work to free tree members | 193 | * initialize the cache. We do the work to free tree members |
@@ -132,16 +195,17 @@ void ocfs2_metadata_cache_purge(struct inode *inode) | |||
132 | if (tree) | 195 | if (tree) |
133 | root = ci->ci_cache.ci_tree; | 196 | root = ci->ci_cache.ci_tree; |
134 | 197 | ||
135 | ocfs2_metadata_cache_init(inode); | 198 | ocfs2_metadata_cache_reset(ci, 0); |
136 | spin_unlock(&oi->ip_lock); | 199 | ocfs2_metadata_cache_unlock(ci); |
137 | 200 | ||
138 | purged = ocfs2_purge_copied_metadata_tree(&root); | 201 | purged = ocfs2_purge_copied_metadata_tree(&root); |
139 | /* If possible, track the number wiped so that we can more | 202 | /* If possible, track the number wiped so that we can more |
140 | * easily detect counting errors. Unfortunately, this is only | 203 | * easily detect counting errors. Unfortunately, this is only |
141 | * meaningful for trees. */ | 204 | * meaningful for trees. */ |
142 | if (tree && purged != to_purge) | 205 | if (tree && purged != to_purge) |
143 | mlog(ML_ERROR, "Inode %llu, count = %u, purged = %u\n", | 206 | mlog(ML_ERROR, "Owner %llu, count = %u, purged = %u\n", |
144 | (unsigned long long)oi->ip_blkno, to_purge, purged); | 207 | (unsigned long long)ocfs2_metadata_cache_owner(ci), |
208 | to_purge, purged); | ||
145 | } | 209 | } |
146 | 210 | ||
147 | /* Returns the index in the cache array, -1 if not found. | 211 | /* Returns the index in the cache array, -1 if not found. |
@@ -182,27 +246,25 @@ ocfs2_search_cache_tree(struct ocfs2_caching_info *ci, | |||
182 | return NULL; | 246 | return NULL; |
183 | } | 247 | } |
184 | 248 | ||
185 | static int ocfs2_buffer_cached(struct ocfs2_inode_info *oi, | 249 | static int ocfs2_buffer_cached(struct ocfs2_caching_info *ci, |
186 | struct buffer_head *bh) | 250 | struct buffer_head *bh) |
187 | { | 251 | { |
188 | int index = -1; | 252 | int index = -1; |
189 | struct ocfs2_meta_cache_item *item = NULL; | 253 | struct ocfs2_meta_cache_item *item = NULL; |
190 | 254 | ||
191 | spin_lock(&oi->ip_lock); | 255 | ocfs2_metadata_cache_lock(ci); |
192 | 256 | ||
193 | mlog(0, "Inode %llu, query block %llu (inline = %u)\n", | 257 | mlog(0, "Owner %llu, query block %llu (inline = %u)\n", |
194 | (unsigned long long)oi->ip_blkno, | 258 | (unsigned long long)ocfs2_metadata_cache_owner(ci), |
195 | (unsigned long long) bh->b_blocknr, | 259 | (unsigned long long) bh->b_blocknr, |
196 | !!(oi->ip_flags & OCFS2_INODE_CACHE_INLINE)); | 260 | !!(ci->ci_flags & OCFS2_CACHE_FL_INLINE)); |
197 | 261 | ||
198 | if (oi->ip_flags & OCFS2_INODE_CACHE_INLINE) | 262 | if (ci->ci_flags & OCFS2_CACHE_FL_INLINE) |
199 | index = ocfs2_search_cache_array(&oi->ip_metadata_cache, | 263 | index = ocfs2_search_cache_array(ci, bh->b_blocknr); |
200 | bh->b_blocknr); | ||
201 | else | 264 | else |
202 | item = ocfs2_search_cache_tree(&oi->ip_metadata_cache, | 265 | item = ocfs2_search_cache_tree(ci, bh->b_blocknr); |
203 | bh->b_blocknr); | ||
204 | 266 | ||
205 | spin_unlock(&oi->ip_lock); | 267 | ocfs2_metadata_cache_unlock(ci); |
206 | 268 | ||
207 | mlog(0, "index = %d, item = %p\n", index, item); | 269 | mlog(0, "index = %d, item = %p\n", index, item); |
208 | 270 | ||
@@ -214,7 +276,7 @@ static int ocfs2_buffer_cached(struct ocfs2_inode_info *oi, | |||
214 | * | 276 | * |
215 | * This can be called under lock_buffer() | 277 | * This can be called under lock_buffer() |
216 | */ | 278 | */ |
217 | int ocfs2_buffer_uptodate(struct inode *inode, | 279 | int ocfs2_buffer_uptodate(struct ocfs2_caching_info *ci, |
218 | struct buffer_head *bh) | 280 | struct buffer_head *bh) |
219 | { | 281 | { |
220 | /* Doesn't matter if the bh is in our cache or not -- if it's | 282 | /* Doesn't matter if the bh is in our cache or not -- if it's |
@@ -230,24 +292,24 @@ int ocfs2_buffer_uptodate(struct inode *inode, | |||
230 | 292 | ||
231 | /* Ok, locally the buffer is marked as up to date, now search | 293 | /* Ok, locally the buffer is marked as up to date, now search |
232 | * our cache to see if we can trust that. */ | 294 | * our cache to see if we can trust that. */ |
233 | return ocfs2_buffer_cached(OCFS2_I(inode), bh); | 295 | return ocfs2_buffer_cached(ci, bh); |
234 | } | 296 | } |
235 | 297 | ||
236 | /* | 298 | /* |
237 | * Determine whether a buffer is currently out on a read-ahead request. | 299 | * Determine whether a buffer is currently out on a read-ahead request. |
238 | * ip_io_sem should be held to serialize submitters with the logic here. | 300 | * ci_io_sem should be held to serialize submitters with the logic here. |
239 | */ | 301 | */ |
240 | int ocfs2_buffer_read_ahead(struct inode *inode, | 302 | int ocfs2_buffer_read_ahead(struct ocfs2_caching_info *ci, |
241 | struct buffer_head *bh) | 303 | struct buffer_head *bh) |
242 | { | 304 | { |
243 | return buffer_locked(bh) && ocfs2_buffer_cached(OCFS2_I(inode), bh); | 305 | return buffer_locked(bh) && ocfs2_buffer_cached(ci, bh); |
244 | } | 306 | } |
245 | 307 | ||
246 | /* Requires ip_lock */ | 308 | /* Requires ip_lock */ |
247 | static void ocfs2_append_cache_array(struct ocfs2_caching_info *ci, | 309 | static void ocfs2_append_cache_array(struct ocfs2_caching_info *ci, |
248 | sector_t block) | 310 | sector_t block) |
249 | { | 311 | { |
250 | BUG_ON(ci->ci_num_cached >= OCFS2_INODE_MAX_CACHE_ARRAY); | 312 | BUG_ON(ci->ci_num_cached >= OCFS2_CACHE_INFO_MAX_ARRAY); |
251 | 313 | ||
252 | mlog(0, "block %llu takes position %u\n", (unsigned long long) block, | 314 | mlog(0, "block %llu takes position %u\n", (unsigned long long) block, |
253 | ci->ci_num_cached); | 315 | ci->ci_num_cached); |
@@ -292,66 +354,64 @@ static void __ocfs2_insert_cache_tree(struct ocfs2_caching_info *ci, | |||
292 | ci->ci_num_cached++; | 354 | ci->ci_num_cached++; |
293 | } | 355 | } |
294 | 356 | ||
295 | static inline int ocfs2_insert_can_use_array(struct ocfs2_inode_info *oi, | 357 | /* co_cache_lock() must be held */ |
296 | struct ocfs2_caching_info *ci) | 358 | static inline int ocfs2_insert_can_use_array(struct ocfs2_caching_info *ci) |
297 | { | 359 | { |
298 | assert_spin_locked(&oi->ip_lock); | 360 | return (ci->ci_flags & OCFS2_CACHE_FL_INLINE) && |
299 | 361 | (ci->ci_num_cached < OCFS2_CACHE_INFO_MAX_ARRAY); | |
300 | return (oi->ip_flags & OCFS2_INODE_CACHE_INLINE) && | ||
301 | (ci->ci_num_cached < OCFS2_INODE_MAX_CACHE_ARRAY); | ||
302 | } | 362 | } |
303 | 363 | ||
304 | /* tree should be exactly OCFS2_INODE_MAX_CACHE_ARRAY wide. NULL the | 364 | /* tree should be exactly OCFS2_CACHE_INFO_MAX_ARRAY wide. NULL the |
305 | * pointers in tree after we use them - this allows caller to detect | 365 | * pointers in tree after we use them - this allows caller to detect |
306 | * when to free in case of error. */ | 366 | * when to free in case of error. |
307 | static void ocfs2_expand_cache(struct ocfs2_inode_info *oi, | 367 | * |
368 | * The co_cache_lock() must be held. */ | ||
369 | static void ocfs2_expand_cache(struct ocfs2_caching_info *ci, | ||
308 | struct ocfs2_meta_cache_item **tree) | 370 | struct ocfs2_meta_cache_item **tree) |
309 | { | 371 | { |
310 | int i; | 372 | int i; |
311 | struct ocfs2_caching_info *ci = &oi->ip_metadata_cache; | ||
312 | 373 | ||
313 | mlog_bug_on_msg(ci->ci_num_cached != OCFS2_INODE_MAX_CACHE_ARRAY, | 374 | mlog_bug_on_msg(ci->ci_num_cached != OCFS2_CACHE_INFO_MAX_ARRAY, |
314 | "Inode %llu, num cached = %u, should be %u\n", | 375 | "Owner %llu, num cached = %u, should be %u\n", |
315 | (unsigned long long)oi->ip_blkno, ci->ci_num_cached, | 376 | (unsigned long long)ocfs2_metadata_cache_owner(ci), |
316 | OCFS2_INODE_MAX_CACHE_ARRAY); | 377 | ci->ci_num_cached, OCFS2_CACHE_INFO_MAX_ARRAY); |
317 | mlog_bug_on_msg(!(oi->ip_flags & OCFS2_INODE_CACHE_INLINE), | 378 | mlog_bug_on_msg(!(ci->ci_flags & OCFS2_CACHE_FL_INLINE), |
318 | "Inode %llu not marked as inline anymore!\n", | 379 | "Owner %llu not marked as inline anymore!\n", |
319 | (unsigned long long)oi->ip_blkno); | 380 | (unsigned long long)ocfs2_metadata_cache_owner(ci)); |
320 | assert_spin_locked(&oi->ip_lock); | ||
321 | 381 | ||
322 | /* Be careful to initialize the tree members *first* because | 382 | /* Be careful to initialize the tree members *first* because |
323 | * once the ci_tree is used, the array is junk... */ | 383 | * once the ci_tree is used, the array is junk... */ |
324 | for(i = 0; i < OCFS2_INODE_MAX_CACHE_ARRAY; i++) | 384 | for (i = 0; i < OCFS2_CACHE_INFO_MAX_ARRAY; i++) |
325 | tree[i]->c_block = ci->ci_cache.ci_array[i]; | 385 | tree[i]->c_block = ci->ci_cache.ci_array[i]; |
326 | 386 | ||
327 | oi->ip_flags &= ~OCFS2_INODE_CACHE_INLINE; | 387 | ci->ci_flags &= ~OCFS2_CACHE_FL_INLINE; |
328 | ci->ci_cache.ci_tree = RB_ROOT; | 388 | ci->ci_cache.ci_tree = RB_ROOT; |
329 | /* this will be set again by __ocfs2_insert_cache_tree */ | 389 | /* this will be set again by __ocfs2_insert_cache_tree */ |
330 | ci->ci_num_cached = 0; | 390 | ci->ci_num_cached = 0; |
331 | 391 | ||
332 | for(i = 0; i < OCFS2_INODE_MAX_CACHE_ARRAY; i++) { | 392 | for (i = 0; i < OCFS2_CACHE_INFO_MAX_ARRAY; i++) { |
333 | __ocfs2_insert_cache_tree(ci, tree[i]); | 393 | __ocfs2_insert_cache_tree(ci, tree[i]); |
334 | tree[i] = NULL; | 394 | tree[i] = NULL; |
335 | } | 395 | } |
336 | 396 | ||
337 | mlog(0, "Expanded %llu to a tree cache: flags 0x%x, num = %u\n", | 397 | mlog(0, "Expanded %llu to a tree cache: flags 0x%x, num = %u\n", |
338 | (unsigned long long)oi->ip_blkno, oi->ip_flags, ci->ci_num_cached); | 398 | (unsigned long long)ocfs2_metadata_cache_owner(ci), |
399 | ci->ci_flags, ci->ci_num_cached); | ||
339 | } | 400 | } |
340 | 401 | ||
341 | /* Slow path function - memory allocation is necessary. See the | 402 | /* Slow path function - memory allocation is necessary. See the |
342 | * comment above ocfs2_set_buffer_uptodate for more information. */ | 403 | * comment above ocfs2_set_buffer_uptodate for more information. */ |
343 | static void __ocfs2_set_buffer_uptodate(struct ocfs2_inode_info *oi, | 404 | static void __ocfs2_set_buffer_uptodate(struct ocfs2_caching_info *ci, |
344 | sector_t block, | 405 | sector_t block, |
345 | int expand_tree) | 406 | int expand_tree) |
346 | { | 407 | { |
347 | int i; | 408 | int i; |
348 | struct ocfs2_caching_info *ci = &oi->ip_metadata_cache; | ||
349 | struct ocfs2_meta_cache_item *new = NULL; | 409 | struct ocfs2_meta_cache_item *new = NULL; |
350 | struct ocfs2_meta_cache_item *tree[OCFS2_INODE_MAX_CACHE_ARRAY] = | 410 | struct ocfs2_meta_cache_item *tree[OCFS2_CACHE_INFO_MAX_ARRAY] = |
351 | { NULL, }; | 411 | { NULL, }; |
352 | 412 | ||
353 | mlog(0, "Inode %llu, block %llu, expand = %d\n", | 413 | mlog(0, "Owner %llu, block %llu, expand = %d\n", |
354 | (unsigned long long)oi->ip_blkno, | 414 | (unsigned long long)ocfs2_metadata_cache_owner(ci), |
355 | (unsigned long long)block, expand_tree); | 415 | (unsigned long long)block, expand_tree); |
356 | 416 | ||
357 | new = kmem_cache_alloc(ocfs2_uptodate_cachep, GFP_NOFS); | 417 | new = kmem_cache_alloc(ocfs2_uptodate_cachep, GFP_NOFS); |
@@ -364,7 +424,7 @@ static void __ocfs2_set_buffer_uptodate(struct ocfs2_inode_info *oi, | |||
364 | if (expand_tree) { | 424 | if (expand_tree) { |
365 | /* Do *not* allocate an array here - the removal code | 425 | /* Do *not* allocate an array here - the removal code |
366 | * has no way of tracking that. */ | 426 | * has no way of tracking that. */ |
367 | for(i = 0; i < OCFS2_INODE_MAX_CACHE_ARRAY; i++) { | 427 | for (i = 0; i < OCFS2_CACHE_INFO_MAX_ARRAY; i++) { |
368 | tree[i] = kmem_cache_alloc(ocfs2_uptodate_cachep, | 428 | tree[i] = kmem_cache_alloc(ocfs2_uptodate_cachep, |
369 | GFP_NOFS); | 429 | GFP_NOFS); |
370 | if (!tree[i]) { | 430 | if (!tree[i]) { |
@@ -376,21 +436,21 @@ static void __ocfs2_set_buffer_uptodate(struct ocfs2_inode_info *oi, | |||
376 | } | 436 | } |
377 | } | 437 | } |
378 | 438 | ||
379 | spin_lock(&oi->ip_lock); | 439 | ocfs2_metadata_cache_lock(ci); |
380 | if (ocfs2_insert_can_use_array(oi, ci)) { | 440 | if (ocfs2_insert_can_use_array(ci)) { |
381 | mlog(0, "Someone cleared the tree underneath us\n"); | 441 | mlog(0, "Someone cleared the tree underneath us\n"); |
382 | /* Ok, items were removed from the cache in between | 442 | /* Ok, items were removed from the cache in between |
383 | * locks. Detect this and revert back to the fast path */ | 443 | * locks. Detect this and revert back to the fast path */ |
384 | ocfs2_append_cache_array(ci, block); | 444 | ocfs2_append_cache_array(ci, block); |
385 | spin_unlock(&oi->ip_lock); | 445 | ocfs2_metadata_cache_unlock(ci); |
386 | goto out_free; | 446 | goto out_free; |
387 | } | 447 | } |
388 | 448 | ||
389 | if (expand_tree) | 449 | if (expand_tree) |
390 | ocfs2_expand_cache(oi, tree); | 450 | ocfs2_expand_cache(ci, tree); |
391 | 451 | ||
392 | __ocfs2_insert_cache_tree(ci, new); | 452 | __ocfs2_insert_cache_tree(ci, new); |
393 | spin_unlock(&oi->ip_lock); | 453 | ocfs2_metadata_cache_unlock(ci); |
394 | 454 | ||
395 | new = NULL; | 455 | new = NULL; |
396 | out_free: | 456 | out_free: |
@@ -400,14 +460,14 @@ out_free: | |||
400 | /* If these were used, then ocfs2_expand_cache re-set them to | 460 | /* If these were used, then ocfs2_expand_cache re-set them to |
401 | * NULL for us. */ | 461 | * NULL for us. */ |
402 | if (tree[0]) { | 462 | if (tree[0]) { |
403 | for(i = 0; i < OCFS2_INODE_MAX_CACHE_ARRAY; i++) | 463 | for (i = 0; i < OCFS2_CACHE_INFO_MAX_ARRAY; i++) |
404 | if (tree[i]) | 464 | if (tree[i]) |
405 | kmem_cache_free(ocfs2_uptodate_cachep, | 465 | kmem_cache_free(ocfs2_uptodate_cachep, |
406 | tree[i]); | 466 | tree[i]); |
407 | } | 467 | } |
408 | } | 468 | } |
409 | 469 | ||
410 | /* Item insertion is guarded by ip_io_mutex, so the insertion path takes | 470 | /* Item insertion is guarded by co_io_lock(), so the insertion path takes |
411 | * advantage of this by not rechecking for a duplicate insert during | 471 | * advantage of this by not rechecking for a duplicate insert during |
412 | * the slow case. Additionally, if the cache needs to be bumped up to | 472 | * the slow case. Additionally, if the cache needs to be bumped up to |
413 | * a tree, the code will not recheck after acquiring the lock -- | 473 | * a tree, the code will not recheck after acquiring the lock -- |
@@ -425,59 +485,55 @@ out_free: | |||
425 | * Readahead buffers can be passed in here before the I/O request is | 485 | * Readahead buffers can be passed in here before the I/O request is |
426 | * completed. | 486 | * completed. |
427 | */ | 487 | */ |
428 | void ocfs2_set_buffer_uptodate(struct inode *inode, | 488 | void ocfs2_set_buffer_uptodate(struct ocfs2_caching_info *ci, |
429 | struct buffer_head *bh) | 489 | struct buffer_head *bh) |
430 | { | 490 | { |
431 | int expand; | 491 | int expand; |
432 | struct ocfs2_inode_info *oi = OCFS2_I(inode); | ||
433 | struct ocfs2_caching_info *ci = &oi->ip_metadata_cache; | ||
434 | 492 | ||
435 | /* The block may very well exist in our cache already, so avoid | 493 | /* The block may very well exist in our cache already, so avoid |
436 | * doing any more work in that case. */ | 494 | * doing any more work in that case. */ |
437 | if (ocfs2_buffer_cached(oi, bh)) | 495 | if (ocfs2_buffer_cached(ci, bh)) |
438 | return; | 496 | return; |
439 | 497 | ||
440 | mlog(0, "Inode %llu, inserting block %llu\n", | 498 | mlog(0, "Owner %llu, inserting block %llu\n", |
441 | (unsigned long long)oi->ip_blkno, | 499 | (unsigned long long)ocfs2_metadata_cache_owner(ci), |
442 | (unsigned long long)bh->b_blocknr); | 500 | (unsigned long long)bh->b_blocknr); |
443 | 501 | ||
444 | /* No need to recheck under spinlock - insertion is guarded by | 502 | /* No need to recheck under spinlock - insertion is guarded by |
445 | * ip_io_mutex */ | 503 | * co_io_lock() */ |
446 | spin_lock(&oi->ip_lock); | 504 | ocfs2_metadata_cache_lock(ci); |
447 | if (ocfs2_insert_can_use_array(oi, ci)) { | 505 | if (ocfs2_insert_can_use_array(ci)) { |
448 | /* Fast case - it's an array and there's a free | 506 | /* Fast case - it's an array and there's a free |
449 | * spot. */ | 507 | * spot. */ |
450 | ocfs2_append_cache_array(ci, bh->b_blocknr); | 508 | ocfs2_append_cache_array(ci, bh->b_blocknr); |
451 | spin_unlock(&oi->ip_lock); | 509 | ocfs2_metadata_cache_unlock(ci); |
452 | return; | 510 | return; |
453 | } | 511 | } |
454 | 512 | ||
455 | expand = 0; | 513 | expand = 0; |
456 | if (oi->ip_flags & OCFS2_INODE_CACHE_INLINE) { | 514 | if (ci->ci_flags & OCFS2_CACHE_FL_INLINE) { |
457 | /* We need to bump things up to a tree. */ | 515 | /* We need to bump things up to a tree. */ |
458 | expand = 1; | 516 | expand = 1; |
459 | } | 517 | } |
460 | spin_unlock(&oi->ip_lock); | 518 | ocfs2_metadata_cache_unlock(ci); |
461 | 519 | ||
462 | __ocfs2_set_buffer_uptodate(oi, bh->b_blocknr, expand); | 520 | __ocfs2_set_buffer_uptodate(ci, bh->b_blocknr, expand); |
463 | } | 521 | } |
464 | 522 | ||
465 | /* Called against a newly allocated buffer. Most likely nobody should | 523 | /* Called against a newly allocated buffer. Most likely nobody should |
466 | * be able to read this sort of metadata while it's still being | 524 | * be able to read this sort of metadata while it's still being |
467 | * allocated, but this is careful to take ip_io_mutex anyway. */ | 525 | * allocated, but this is careful to take co_io_lock() anyway. */ |
468 | void ocfs2_set_new_buffer_uptodate(struct inode *inode, | 526 | void ocfs2_set_new_buffer_uptodate(struct ocfs2_caching_info *ci, |
469 | struct buffer_head *bh) | 527 | struct buffer_head *bh) |
470 | { | 528 | { |
471 | struct ocfs2_inode_info *oi = OCFS2_I(inode); | ||
472 | |||
473 | /* This should definitely *not* exist in our cache */ | 529 | /* This should definitely *not* exist in our cache */ |
474 | BUG_ON(ocfs2_buffer_cached(oi, bh)); | 530 | BUG_ON(ocfs2_buffer_cached(ci, bh)); |
475 | 531 | ||
476 | set_buffer_uptodate(bh); | 532 | set_buffer_uptodate(bh); |
477 | 533 | ||
478 | mutex_lock(&oi->ip_io_mutex); | 534 | ocfs2_metadata_cache_io_lock(ci); |
479 | ocfs2_set_buffer_uptodate(inode, bh); | 535 | ocfs2_set_buffer_uptodate(ci, bh); |
480 | mutex_unlock(&oi->ip_io_mutex); | 536 | ocfs2_metadata_cache_io_unlock(ci); |
481 | } | 537 | } |
482 | 538 | ||
483 | /* Requires ip_lock. */ | 539 | /* Requires ip_lock. */ |
@@ -487,7 +543,7 @@ static void ocfs2_remove_metadata_array(struct ocfs2_caching_info *ci, | |||
487 | sector_t *array = ci->ci_cache.ci_array; | 543 | sector_t *array = ci->ci_cache.ci_array; |
488 | int bytes; | 544 | int bytes; |
489 | 545 | ||
490 | BUG_ON(index < 0 || index >= OCFS2_INODE_MAX_CACHE_ARRAY); | 546 | BUG_ON(index < 0 || index >= OCFS2_CACHE_INFO_MAX_ARRAY); |
491 | BUG_ON(index >= ci->ci_num_cached); | 547 | BUG_ON(index >= ci->ci_num_cached); |
492 | BUG_ON(!ci->ci_num_cached); | 548 | BUG_ON(!ci->ci_num_cached); |
493 | 549 | ||
@@ -515,21 +571,19 @@ static void ocfs2_remove_metadata_tree(struct ocfs2_caching_info *ci, | |||
515 | ci->ci_num_cached--; | 571 | ci->ci_num_cached--; |
516 | } | 572 | } |
517 | 573 | ||
518 | static void ocfs2_remove_block_from_cache(struct inode *inode, | 574 | static void ocfs2_remove_block_from_cache(struct ocfs2_caching_info *ci, |
519 | sector_t block) | 575 | sector_t block) |
520 | { | 576 | { |
521 | int index; | 577 | int index; |
522 | struct ocfs2_meta_cache_item *item = NULL; | 578 | struct ocfs2_meta_cache_item *item = NULL; |
523 | struct ocfs2_inode_info *oi = OCFS2_I(inode); | ||
524 | struct ocfs2_caching_info *ci = &oi->ip_metadata_cache; | ||
525 | 579 | ||
526 | spin_lock(&oi->ip_lock); | 580 | ocfs2_metadata_cache_lock(ci); |
527 | mlog(0, "Inode %llu, remove %llu, items = %u, array = %u\n", | 581 | mlog(0, "Owner %llu, remove %llu, items = %u, array = %u\n", |
528 | (unsigned long long)oi->ip_blkno, | 582 | (unsigned long long)ocfs2_metadata_cache_owner(ci), |
529 | (unsigned long long) block, ci->ci_num_cached, | 583 | (unsigned long long) block, ci->ci_num_cached, |
530 | oi->ip_flags & OCFS2_INODE_CACHE_INLINE); | 584 | ci->ci_flags & OCFS2_CACHE_FL_INLINE); |
531 | 585 | ||
532 | if (oi->ip_flags & OCFS2_INODE_CACHE_INLINE) { | 586 | if (ci->ci_flags & OCFS2_CACHE_FL_INLINE) { |
533 | index = ocfs2_search_cache_array(ci, block); | 587 | index = ocfs2_search_cache_array(ci, block); |
534 | if (index != -1) | 588 | if (index != -1) |
535 | ocfs2_remove_metadata_array(ci, index); | 589 | ocfs2_remove_metadata_array(ci, index); |
@@ -538,7 +592,7 @@ static void ocfs2_remove_block_from_cache(struct inode *inode, | |||
538 | if (item) | 592 | if (item) |
539 | ocfs2_remove_metadata_tree(ci, item); | 593 | ocfs2_remove_metadata_tree(ci, item); |
540 | } | 594 | } |
541 | spin_unlock(&oi->ip_lock); | 595 | ocfs2_metadata_cache_unlock(ci); |
542 | 596 | ||
543 | if (item) | 597 | if (item) |
544 | kmem_cache_free(ocfs2_uptodate_cachep, item); | 598 | kmem_cache_free(ocfs2_uptodate_cachep, item); |
@@ -549,23 +603,24 @@ static void ocfs2_remove_block_from_cache(struct inode *inode, | |||
549 | * bother reverting things to an inlined array in the case of a remove | 603 | * bother reverting things to an inlined array in the case of a remove |
550 | * which moves us back under the limit. | 604 | * which moves us back under the limit. |
551 | */ | 605 | */ |
552 | void ocfs2_remove_from_cache(struct inode *inode, | 606 | void ocfs2_remove_from_cache(struct ocfs2_caching_info *ci, |
553 | struct buffer_head *bh) | 607 | struct buffer_head *bh) |
554 | { | 608 | { |
555 | sector_t block = bh->b_blocknr; | 609 | sector_t block = bh->b_blocknr; |
556 | 610 | ||
557 | ocfs2_remove_block_from_cache(inode, block); | 611 | ocfs2_remove_block_from_cache(ci, block); |
558 | } | 612 | } |
559 | 613 | ||
560 | /* Called when we remove xattr clusters from an inode. */ | 614 | /* Called when we remove xattr clusters from an inode. */ |
561 | void ocfs2_remove_xattr_clusters_from_cache(struct inode *inode, | 615 | void ocfs2_remove_xattr_clusters_from_cache(struct ocfs2_caching_info *ci, |
562 | sector_t block, | 616 | sector_t block, |
563 | u32 c_len) | 617 | u32 c_len) |
564 | { | 618 | { |
565 | unsigned int i, b_len = ocfs2_clusters_to_blocks(inode->i_sb, 1) * c_len; | 619 | struct super_block *sb = ocfs2_metadata_cache_get_super(ci); |
620 | unsigned int i, b_len = ocfs2_clusters_to_blocks(sb, 1) * c_len; | ||
566 | 621 | ||
567 | for (i = 0; i < b_len; i++, block++) | 622 | for (i = 0; i < b_len; i++, block++) |
568 | ocfs2_remove_block_from_cache(inode, block); | 623 | ocfs2_remove_block_from_cache(ci, block); |
569 | } | 624 | } |
570 | 625 | ||
571 | int __init init_ocfs2_uptodate_cache(void) | 626 | int __init init_ocfs2_uptodate_cache(void) |
@@ -577,7 +632,7 @@ int __init init_ocfs2_uptodate_cache(void) | |||
577 | return -ENOMEM; | 632 | return -ENOMEM; |
578 | 633 | ||
579 | mlog(0, "%u inlined cache items per inode.\n", | 634 | mlog(0, "%u inlined cache items per inode.\n", |
580 | OCFS2_INODE_MAX_CACHE_ARRAY); | 635 | OCFS2_CACHE_INFO_MAX_ARRAY); |
581 | 636 | ||
582 | return 0; | 637 | return 0; |
583 | } | 638 | } |
diff --git a/fs/ocfs2/uptodate.h b/fs/ocfs2/uptodate.h index 531b4b3a0c47..0d826fe2da0d 100644 --- a/fs/ocfs2/uptodate.h +++ b/fs/ocfs2/uptodate.h | |||
@@ -26,24 +26,59 @@ | |||
26 | #ifndef OCFS2_UPTODATE_H | 26 | #ifndef OCFS2_UPTODATE_H |
27 | #define OCFS2_UPTODATE_H | 27 | #define OCFS2_UPTODATE_H |
28 | 28 | ||
29 | /* | ||
30 | * The caching code relies on locking provided by the user of | ||
31 | * struct ocfs2_caching_info. These operations connect that up. | ||
32 | */ | ||
33 | struct ocfs2_caching_operations { | ||
34 | /* | ||
35 | * A u64 representing the owning structure. Usually this | ||
36 | * is the block number (i_blkno or whatnot). This is used so | ||
37 | * that caching log messages can identify the owning structure. | ||
38 | */ | ||
39 | u64 (*co_owner)(struct ocfs2_caching_info *ci); | ||
40 | |||
41 | /* The superblock is needed during I/O. */ | ||
42 | struct super_block *(*co_get_super)(struct ocfs2_caching_info *ci); | ||
43 | /* | ||
44 | * Lock and unlock the caching data. These will not sleep, and | ||
45 | * should probably be spinlocks. | ||
46 | */ | ||
47 | void (*co_cache_lock)(struct ocfs2_caching_info *ci); | ||
48 | void (*co_cache_unlock)(struct ocfs2_caching_info *ci); | ||
49 | |||
50 | /* | ||
51 | * Lock and unlock for disk I/O. These will sleep, and should | ||
52 | * be mutexes. | ||
53 | */ | ||
54 | void (*co_io_lock)(struct ocfs2_caching_info *ci); | ||
55 | void (*co_io_unlock)(struct ocfs2_caching_info *ci); | ||
56 | }; | ||
57 | |||
29 | int __init init_ocfs2_uptodate_cache(void); | 58 | int __init init_ocfs2_uptodate_cache(void); |
30 | void exit_ocfs2_uptodate_cache(void); | 59 | void exit_ocfs2_uptodate_cache(void); |
31 | 60 | ||
32 | void ocfs2_metadata_cache_init(struct inode *inode); | 61 | void ocfs2_metadata_cache_init(struct ocfs2_caching_info *ci, |
33 | void ocfs2_metadata_cache_purge(struct inode *inode); | 62 | const struct ocfs2_caching_operations *ops); |
63 | void ocfs2_metadata_cache_purge(struct ocfs2_caching_info *ci); | ||
64 | void ocfs2_metadata_cache_exit(struct ocfs2_caching_info *ci); | ||
65 | |||
66 | u64 ocfs2_metadata_cache_owner(struct ocfs2_caching_info *ci); | ||
67 | void ocfs2_metadata_cache_io_lock(struct ocfs2_caching_info *ci); | ||
68 | void ocfs2_metadata_cache_io_unlock(struct ocfs2_caching_info *ci); | ||
34 | 69 | ||
35 | int ocfs2_buffer_uptodate(struct inode *inode, | 70 | int ocfs2_buffer_uptodate(struct ocfs2_caching_info *ci, |
36 | struct buffer_head *bh); | 71 | struct buffer_head *bh); |
37 | void ocfs2_set_buffer_uptodate(struct inode *inode, | 72 | void ocfs2_set_buffer_uptodate(struct ocfs2_caching_info *ci, |
38 | struct buffer_head *bh); | 73 | struct buffer_head *bh); |
39 | void ocfs2_set_new_buffer_uptodate(struct inode *inode, | 74 | void ocfs2_set_new_buffer_uptodate(struct ocfs2_caching_info *ci, |
40 | struct buffer_head *bh); | 75 | struct buffer_head *bh); |
41 | void ocfs2_remove_from_cache(struct inode *inode, | 76 | void ocfs2_remove_from_cache(struct ocfs2_caching_info *ci, |
42 | struct buffer_head *bh); | 77 | struct buffer_head *bh); |
43 | void ocfs2_remove_xattr_clusters_from_cache(struct inode *inode, | 78 | void ocfs2_remove_xattr_clusters_from_cache(struct ocfs2_caching_info *ci, |
44 | sector_t block, | 79 | sector_t block, |
45 | u32 c_len); | 80 | u32 c_len); |
46 | int ocfs2_buffer_read_ahead(struct inode *inode, | 81 | int ocfs2_buffer_read_ahead(struct ocfs2_caching_info *ci, |
47 | struct buffer_head *bh); | 82 | struct buffer_head *bh); |
48 | 83 | ||
49 | #endif /* OCFS2_UPTODATE_H */ | 84 | #endif /* OCFS2_UPTODATE_H */ |
diff --git a/fs/ocfs2/xattr.c b/fs/ocfs2/xattr.c index d1a27cda984f..fe3419068df2 100644 --- a/fs/ocfs2/xattr.c +++ b/fs/ocfs2/xattr.c | |||
@@ -55,7 +55,8 @@ | |||
55 | #include "buffer_head_io.h" | 55 | #include "buffer_head_io.h" |
56 | #include "super.h" | 56 | #include "super.h" |
57 | #include "xattr.h" | 57 | #include "xattr.h" |
58 | 58 | #include "refcounttree.h" | |
59 | #include "acl.h" | ||
59 | 60 | ||
60 | struct ocfs2_xattr_def_value_root { | 61 | struct ocfs2_xattr_def_value_root { |
61 | struct ocfs2_xattr_value_root xv; | 62 | struct ocfs2_xattr_value_root xv; |
@@ -140,7 +141,7 @@ struct ocfs2_xattr_search { | |||
140 | int not_found; | 141 | int not_found; |
141 | }; | 142 | }; |
142 | 143 | ||
143 | static int ocfs2_xattr_bucket_get_name_value(struct inode *inode, | 144 | static int ocfs2_xattr_bucket_get_name_value(struct super_block *sb, |
144 | struct ocfs2_xattr_header *xh, | 145 | struct ocfs2_xattr_header *xh, |
145 | int index, | 146 | int index, |
146 | int *block_off, | 147 | int *block_off, |
@@ -157,7 +158,7 @@ static int ocfs2_xattr_index_block_find(struct inode *inode, | |||
157 | struct ocfs2_xattr_search *xs); | 158 | struct ocfs2_xattr_search *xs); |
158 | 159 | ||
159 | static int ocfs2_xattr_tree_list_index_block(struct inode *inode, | 160 | static int ocfs2_xattr_tree_list_index_block(struct inode *inode, |
160 | struct ocfs2_xattr_tree_root *xt, | 161 | struct buffer_head *blk_bh, |
161 | char *buffer, | 162 | char *buffer, |
162 | size_t buffer_size); | 163 | size_t buffer_size); |
163 | 164 | ||
@@ -170,12 +171,42 @@ static int ocfs2_xattr_set_entry_index_block(struct inode *inode, | |||
170 | struct ocfs2_xattr_search *xs, | 171 | struct ocfs2_xattr_search *xs, |
171 | struct ocfs2_xattr_set_ctxt *ctxt); | 172 | struct ocfs2_xattr_set_ctxt *ctxt); |
172 | 173 | ||
173 | static int ocfs2_delete_xattr_index_block(struct inode *inode, | 174 | typedef int (xattr_tree_rec_func)(struct inode *inode, |
174 | struct buffer_head *xb_bh); | 175 | struct buffer_head *root_bh, |
176 | u64 blkno, u32 cpos, u32 len, void *para); | ||
177 | static int ocfs2_iterate_xattr_index_block(struct inode *inode, | ||
178 | struct buffer_head *root_bh, | ||
179 | xattr_tree_rec_func *rec_func, | ||
180 | void *para); | ||
181 | static int ocfs2_delete_xattr_in_bucket(struct inode *inode, | ||
182 | struct ocfs2_xattr_bucket *bucket, | ||
183 | void *para); | ||
184 | static int ocfs2_rm_xattr_cluster(struct inode *inode, | ||
185 | struct buffer_head *root_bh, | ||
186 | u64 blkno, | ||
187 | u32 cpos, | ||
188 | u32 len, | ||
189 | void *para); | ||
190 | |||
175 | static int ocfs2_mv_xattr_buckets(struct inode *inode, handle_t *handle, | 191 | static int ocfs2_mv_xattr_buckets(struct inode *inode, handle_t *handle, |
176 | u64 src_blk, u64 last_blk, u64 to_blk, | 192 | u64 src_blk, u64 last_blk, u64 to_blk, |
177 | unsigned int start_bucket, | 193 | unsigned int start_bucket, |
178 | u32 *first_hash); | 194 | u32 *first_hash); |
195 | static int ocfs2_prepare_refcount_xattr(struct inode *inode, | ||
196 | struct ocfs2_dinode *di, | ||
197 | struct ocfs2_xattr_info *xi, | ||
198 | struct ocfs2_xattr_search *xis, | ||
199 | struct ocfs2_xattr_search *xbs, | ||
200 | struct ocfs2_refcount_tree **ref_tree, | ||
201 | int *meta_need, | ||
202 | int *credits); | ||
203 | static int ocfs2_get_xattr_tree_value_root(struct super_block *sb, | ||
204 | struct ocfs2_xattr_bucket *bucket, | ||
205 | int offset, | ||
206 | struct ocfs2_xattr_value_root **xv, | ||
207 | struct buffer_head **bh); | ||
208 | static int ocfs2_xattr_security_set(struct inode *inode, const char *name, | ||
209 | const void *value, size_t size, int flags); | ||
179 | 210 | ||
180 | static inline u16 ocfs2_xattr_buckets_per_cluster(struct ocfs2_super *osb) | 211 | static inline u16 ocfs2_xattr_buckets_per_cluster(struct ocfs2_super *osb) |
181 | { | 212 | { |
@@ -254,9 +285,9 @@ static int ocfs2_init_xattr_bucket(struct ocfs2_xattr_bucket *bucket, | |||
254 | break; | 285 | break; |
255 | } | 286 | } |
256 | 287 | ||
257 | if (!ocfs2_buffer_uptodate(bucket->bu_inode, | 288 | if (!ocfs2_buffer_uptodate(INODE_CACHE(bucket->bu_inode), |
258 | bucket->bu_bhs[i])) | 289 | bucket->bu_bhs[i])) |
259 | ocfs2_set_new_buffer_uptodate(bucket->bu_inode, | 290 | ocfs2_set_new_buffer_uptodate(INODE_CACHE(bucket->bu_inode), |
260 | bucket->bu_bhs[i]); | 291 | bucket->bu_bhs[i]); |
261 | } | 292 | } |
262 | 293 | ||
@@ -271,7 +302,7 @@ static int ocfs2_read_xattr_bucket(struct ocfs2_xattr_bucket *bucket, | |||
271 | { | 302 | { |
272 | int rc; | 303 | int rc; |
273 | 304 | ||
274 | rc = ocfs2_read_blocks(bucket->bu_inode, xb_blkno, | 305 | rc = ocfs2_read_blocks(INODE_CACHE(bucket->bu_inode), xb_blkno, |
275 | bucket->bu_blocks, bucket->bu_bhs, 0, | 306 | bucket->bu_blocks, bucket->bu_bhs, 0, |
276 | NULL); | 307 | NULL); |
277 | if (!rc) { | 308 | if (!rc) { |
@@ -297,7 +328,8 @@ static int ocfs2_xattr_bucket_journal_access(handle_t *handle, | |||
297 | int i, rc = 0; | 328 | int i, rc = 0; |
298 | 329 | ||
299 | for (i = 0; i < bucket->bu_blocks; i++) { | 330 | for (i = 0; i < bucket->bu_blocks; i++) { |
300 | rc = ocfs2_journal_access(handle, bucket->bu_inode, | 331 | rc = ocfs2_journal_access(handle, |
332 | INODE_CACHE(bucket->bu_inode), | ||
301 | bucket->bu_bhs[i], type); | 333 | bucket->bu_bhs[i], type); |
302 | if (rc) { | 334 | if (rc) { |
303 | mlog_errno(rc); | 335 | mlog_errno(rc); |
@@ -399,7 +431,7 @@ static int ocfs2_read_xattr_block(struct inode *inode, u64 xb_blkno, | |||
399 | int rc; | 431 | int rc; |
400 | struct buffer_head *tmp = *bh; | 432 | struct buffer_head *tmp = *bh; |
401 | 433 | ||
402 | rc = ocfs2_read_block(inode, xb_blkno, &tmp, | 434 | rc = ocfs2_read_block(INODE_CACHE(inode), xb_blkno, &tmp, |
403 | ocfs2_validate_xattr_block); | 435 | ocfs2_validate_xattr_block); |
404 | 436 | ||
405 | /* If ocfs2_read_block() got us a new bh, pass it up. */ | 437 | /* If ocfs2_read_block() got us a new bh, pass it up. */ |
@@ -596,15 +628,14 @@ static int ocfs2_xattr_extend_allocation(struct inode *inode, | |||
596 | int status = 0; | 628 | int status = 0; |
597 | handle_t *handle = ctxt->handle; | 629 | handle_t *handle = ctxt->handle; |
598 | enum ocfs2_alloc_restarted why; | 630 | enum ocfs2_alloc_restarted why; |
599 | struct ocfs2_super *osb = OCFS2_SB(inode->i_sb); | ||
600 | u32 prev_clusters, logical_start = le32_to_cpu(vb->vb_xv->xr_clusters); | 631 | u32 prev_clusters, logical_start = le32_to_cpu(vb->vb_xv->xr_clusters); |
601 | struct ocfs2_extent_tree et; | 632 | struct ocfs2_extent_tree et; |
602 | 633 | ||
603 | mlog(0, "(clusters_to_add for xattr= %u)\n", clusters_to_add); | 634 | mlog(0, "(clusters_to_add for xattr= %u)\n", clusters_to_add); |
604 | 635 | ||
605 | ocfs2_init_xattr_value_extent_tree(&et, inode, vb); | 636 | ocfs2_init_xattr_value_extent_tree(&et, INODE_CACHE(inode), vb); |
606 | 637 | ||
607 | status = vb->vb_access(handle, inode, vb->vb_bh, | 638 | status = vb->vb_access(handle, INODE_CACHE(inode), vb->vb_bh, |
608 | OCFS2_JOURNAL_ACCESS_WRITE); | 639 | OCFS2_JOURNAL_ACCESS_WRITE); |
609 | if (status < 0) { | 640 | if (status < 0) { |
610 | mlog_errno(status); | 641 | mlog_errno(status); |
@@ -612,13 +643,11 @@ static int ocfs2_xattr_extend_allocation(struct inode *inode, | |||
612 | } | 643 | } |
613 | 644 | ||
614 | prev_clusters = le32_to_cpu(vb->vb_xv->xr_clusters); | 645 | prev_clusters = le32_to_cpu(vb->vb_xv->xr_clusters); |
615 | status = ocfs2_add_clusters_in_btree(osb, | 646 | status = ocfs2_add_clusters_in_btree(handle, |
616 | inode, | 647 | &et, |
617 | &logical_start, | 648 | &logical_start, |
618 | clusters_to_add, | 649 | clusters_to_add, |
619 | 0, | 650 | 0, |
620 | &et, | ||
621 | handle, | ||
622 | ctxt->data_ac, | 651 | ctxt->data_ac, |
623 | ctxt->meta_ac, | 652 | ctxt->meta_ac, |
624 | &why); | 653 | &why); |
@@ -649,6 +678,7 @@ leave: | |||
649 | static int __ocfs2_remove_xattr_range(struct inode *inode, | 678 | static int __ocfs2_remove_xattr_range(struct inode *inode, |
650 | struct ocfs2_xattr_value_buf *vb, | 679 | struct ocfs2_xattr_value_buf *vb, |
651 | u32 cpos, u32 phys_cpos, u32 len, | 680 | u32 cpos, u32 phys_cpos, u32 len, |
681 | unsigned int ext_flags, | ||
652 | struct ocfs2_xattr_set_ctxt *ctxt) | 682 | struct ocfs2_xattr_set_ctxt *ctxt) |
653 | { | 683 | { |
654 | int ret; | 684 | int ret; |
@@ -656,16 +686,16 @@ static int __ocfs2_remove_xattr_range(struct inode *inode, | |||
656 | handle_t *handle = ctxt->handle; | 686 | handle_t *handle = ctxt->handle; |
657 | struct ocfs2_extent_tree et; | 687 | struct ocfs2_extent_tree et; |
658 | 688 | ||
659 | ocfs2_init_xattr_value_extent_tree(&et, inode, vb); | 689 | ocfs2_init_xattr_value_extent_tree(&et, INODE_CACHE(inode), vb); |
660 | 690 | ||
661 | ret = vb->vb_access(handle, inode, vb->vb_bh, | 691 | ret = vb->vb_access(handle, INODE_CACHE(inode), vb->vb_bh, |
662 | OCFS2_JOURNAL_ACCESS_WRITE); | 692 | OCFS2_JOURNAL_ACCESS_WRITE); |
663 | if (ret) { | 693 | if (ret) { |
664 | mlog_errno(ret); | 694 | mlog_errno(ret); |
665 | goto out; | 695 | goto out; |
666 | } | 696 | } |
667 | 697 | ||
668 | ret = ocfs2_remove_extent(inode, &et, cpos, len, handle, ctxt->meta_ac, | 698 | ret = ocfs2_remove_extent(handle, &et, cpos, len, ctxt->meta_ac, |
669 | &ctxt->dealloc); | 699 | &ctxt->dealloc); |
670 | if (ret) { | 700 | if (ret) { |
671 | mlog_errno(ret); | 701 | mlog_errno(ret); |
@@ -680,7 +710,14 @@ static int __ocfs2_remove_xattr_range(struct inode *inode, | |||
680 | goto out; | 710 | goto out; |
681 | } | 711 | } |
682 | 712 | ||
683 | ret = ocfs2_cache_cluster_dealloc(&ctxt->dealloc, phys_blkno, len); | 713 | if (ext_flags & OCFS2_EXT_REFCOUNTED) |
714 | ret = ocfs2_decrease_refcount(inode, handle, | ||
715 | ocfs2_blocks_to_clusters(inode->i_sb, | ||
716 | phys_blkno), | ||
717 | len, ctxt->meta_ac, &ctxt->dealloc, 1); | ||
718 | else | ||
719 | ret = ocfs2_cache_cluster_dealloc(&ctxt->dealloc, | ||
720 | phys_blkno, len); | ||
684 | if (ret) | 721 | if (ret) |
685 | mlog_errno(ret); | 722 | mlog_errno(ret); |
686 | 723 | ||
@@ -695,6 +732,7 @@ static int ocfs2_xattr_shrink_size(struct inode *inode, | |||
695 | struct ocfs2_xattr_set_ctxt *ctxt) | 732 | struct ocfs2_xattr_set_ctxt *ctxt) |
696 | { | 733 | { |
697 | int ret = 0; | 734 | int ret = 0; |
735 | unsigned int ext_flags; | ||
698 | u32 trunc_len, cpos, phys_cpos, alloc_size; | 736 | u32 trunc_len, cpos, phys_cpos, alloc_size; |
699 | u64 block; | 737 | u64 block; |
700 | 738 | ||
@@ -706,7 +744,7 @@ static int ocfs2_xattr_shrink_size(struct inode *inode, | |||
706 | while (trunc_len) { | 744 | while (trunc_len) { |
707 | ret = ocfs2_xattr_get_clusters(inode, cpos, &phys_cpos, | 745 | ret = ocfs2_xattr_get_clusters(inode, cpos, &phys_cpos, |
708 | &alloc_size, | 746 | &alloc_size, |
709 | &vb->vb_xv->xr_list); | 747 | &vb->vb_xv->xr_list, &ext_flags); |
710 | if (ret) { | 748 | if (ret) { |
711 | mlog_errno(ret); | 749 | mlog_errno(ret); |
712 | goto out; | 750 | goto out; |
@@ -717,15 +755,15 @@ static int ocfs2_xattr_shrink_size(struct inode *inode, | |||
717 | 755 | ||
718 | ret = __ocfs2_remove_xattr_range(inode, vb, cpos, | 756 | ret = __ocfs2_remove_xattr_range(inode, vb, cpos, |
719 | phys_cpos, alloc_size, | 757 | phys_cpos, alloc_size, |
720 | ctxt); | 758 | ext_flags, ctxt); |
721 | if (ret) { | 759 | if (ret) { |
722 | mlog_errno(ret); | 760 | mlog_errno(ret); |
723 | goto out; | 761 | goto out; |
724 | } | 762 | } |
725 | 763 | ||
726 | block = ocfs2_clusters_to_blocks(inode->i_sb, phys_cpos); | 764 | block = ocfs2_clusters_to_blocks(inode->i_sb, phys_cpos); |
727 | ocfs2_remove_xattr_clusters_from_cache(inode, block, | 765 | ocfs2_remove_xattr_clusters_from_cache(INODE_CACHE(inode), |
728 | alloc_size); | 766 | block, alloc_size); |
729 | cpos += alloc_size; | 767 | cpos += alloc_size; |
730 | trunc_len -= alloc_size; | 768 | trunc_len -= alloc_size; |
731 | } | 769 | } |
@@ -810,6 +848,23 @@ static int ocfs2_xattr_list_entries(struct inode *inode, | |||
810 | return result; | 848 | return result; |
811 | } | 849 | } |
812 | 850 | ||
851 | int ocfs2_has_inline_xattr_value_outside(struct inode *inode, | ||
852 | struct ocfs2_dinode *di) | ||
853 | { | ||
854 | struct ocfs2_xattr_header *xh; | ||
855 | int i; | ||
856 | |||
857 | xh = (struct ocfs2_xattr_header *) | ||
858 | ((void *)di + inode->i_sb->s_blocksize - | ||
859 | le16_to_cpu(di->i_xattr_inline_size)); | ||
860 | |||
861 | for (i = 0; i < le16_to_cpu(xh->xh_count); i++) | ||
862 | if (!ocfs2_xattr_is_local(&xh->xh_entries[i])) | ||
863 | return 1; | ||
864 | |||
865 | return 0; | ||
866 | } | ||
867 | |||
813 | static int ocfs2_xattr_ibody_list(struct inode *inode, | 868 | static int ocfs2_xattr_ibody_list(struct inode *inode, |
814 | struct ocfs2_dinode *di, | 869 | struct ocfs2_dinode *di, |
815 | char *buffer, | 870 | char *buffer, |
@@ -855,11 +910,9 @@ static int ocfs2_xattr_block_list(struct inode *inode, | |||
855 | struct ocfs2_xattr_header *header = &xb->xb_attrs.xb_header; | 910 | struct ocfs2_xattr_header *header = &xb->xb_attrs.xb_header; |
856 | ret = ocfs2_xattr_list_entries(inode, header, | 911 | ret = ocfs2_xattr_list_entries(inode, header, |
857 | buffer, buffer_size); | 912 | buffer, buffer_size); |
858 | } else { | 913 | } else |
859 | struct ocfs2_xattr_tree_root *xt = &xb->xb_attrs.xb_root; | 914 | ret = ocfs2_xattr_tree_list_index_block(inode, blk_bh, |
860 | ret = ocfs2_xattr_tree_list_index_block(inode, xt, | ||
861 | buffer, buffer_size); | 915 | buffer, buffer_size); |
862 | } | ||
863 | 916 | ||
864 | brelse(blk_bh); | 917 | brelse(blk_bh); |
865 | 918 | ||
@@ -961,7 +1014,7 @@ static int ocfs2_xattr_get_value_outside(struct inode *inode, | |||
961 | cpos = 0; | 1014 | cpos = 0; |
962 | while (cpos < clusters) { | 1015 | while (cpos < clusters) { |
963 | ret = ocfs2_xattr_get_clusters(inode, cpos, &p_cluster, | 1016 | ret = ocfs2_xattr_get_clusters(inode, cpos, &p_cluster, |
964 | &num_clusters, el); | 1017 | &num_clusters, el, NULL); |
965 | if (ret) { | 1018 | if (ret) { |
966 | mlog_errno(ret); | 1019 | mlog_errno(ret); |
967 | goto out; | 1020 | goto out; |
@@ -970,7 +1023,8 @@ static int ocfs2_xattr_get_value_outside(struct inode *inode, | |||
970 | blkno = ocfs2_clusters_to_blocks(inode->i_sb, p_cluster); | 1023 | blkno = ocfs2_clusters_to_blocks(inode->i_sb, p_cluster); |
971 | /* Copy ocfs2_xattr_value */ | 1024 | /* Copy ocfs2_xattr_value */ |
972 | for (i = 0; i < num_clusters * bpc; i++, blkno++) { | 1025 | for (i = 0; i < num_clusters * bpc; i++, blkno++) { |
973 | ret = ocfs2_read_block(inode, blkno, &bh, NULL); | 1026 | ret = ocfs2_read_block(INODE_CACHE(inode), blkno, |
1027 | &bh, NULL); | ||
974 | if (ret) { | 1028 | if (ret) { |
975 | mlog_errno(ret); | 1029 | mlog_errno(ret); |
976 | goto out; | 1030 | goto out; |
@@ -1085,7 +1139,7 @@ static int ocfs2_xattr_block_get(struct inode *inode, | |||
1085 | i = xs->here - xs->header->xh_entries; | 1139 | i = xs->here - xs->header->xh_entries; |
1086 | 1140 | ||
1087 | if (le16_to_cpu(xb->xb_flags) & OCFS2_XATTR_INDEXED) { | 1141 | if (le16_to_cpu(xb->xb_flags) & OCFS2_XATTR_INDEXED) { |
1088 | ret = ocfs2_xattr_bucket_get_name_value(inode, | 1142 | ret = ocfs2_xattr_bucket_get_name_value(inode->i_sb, |
1089 | bucket_xh(xs->bucket), | 1143 | bucket_xh(xs->bucket), |
1090 | i, | 1144 | i, |
1091 | &block_off, | 1145 | &block_off, |
@@ -1183,7 +1237,7 @@ static int ocfs2_xattr_get(struct inode *inode, | |||
1183 | 1237 | ||
1184 | static int __ocfs2_xattr_set_value_outside(struct inode *inode, | 1238 | static int __ocfs2_xattr_set_value_outside(struct inode *inode, |
1185 | handle_t *handle, | 1239 | handle_t *handle, |
1186 | struct ocfs2_xattr_value_root *xv, | 1240 | struct ocfs2_xattr_value_buf *vb, |
1187 | const void *value, | 1241 | const void *value, |
1188 | int value_len) | 1242 | int value_len) |
1189 | { | 1243 | { |
@@ -1194,28 +1248,34 @@ static int __ocfs2_xattr_set_value_outside(struct inode *inode, | |||
1194 | u32 clusters = ocfs2_clusters_for_bytes(inode->i_sb, value_len); | 1248 | u32 clusters = ocfs2_clusters_for_bytes(inode->i_sb, value_len); |
1195 | u64 blkno; | 1249 | u64 blkno; |
1196 | struct buffer_head *bh = NULL; | 1250 | struct buffer_head *bh = NULL; |
1251 | unsigned int ext_flags; | ||
1252 | struct ocfs2_xattr_value_root *xv = vb->vb_xv; | ||
1197 | 1253 | ||
1198 | BUG_ON(clusters > le32_to_cpu(xv->xr_clusters)); | 1254 | BUG_ON(clusters > le32_to_cpu(xv->xr_clusters)); |
1199 | 1255 | ||
1200 | while (cpos < clusters) { | 1256 | while (cpos < clusters) { |
1201 | ret = ocfs2_xattr_get_clusters(inode, cpos, &p_cluster, | 1257 | ret = ocfs2_xattr_get_clusters(inode, cpos, &p_cluster, |
1202 | &num_clusters, &xv->xr_list); | 1258 | &num_clusters, &xv->xr_list, |
1259 | &ext_flags); | ||
1203 | if (ret) { | 1260 | if (ret) { |
1204 | mlog_errno(ret); | 1261 | mlog_errno(ret); |
1205 | goto out; | 1262 | goto out; |
1206 | } | 1263 | } |
1207 | 1264 | ||
1265 | BUG_ON(ext_flags & OCFS2_EXT_REFCOUNTED); | ||
1266 | |||
1208 | blkno = ocfs2_clusters_to_blocks(inode->i_sb, p_cluster); | 1267 | blkno = ocfs2_clusters_to_blocks(inode->i_sb, p_cluster); |
1209 | 1268 | ||
1210 | for (i = 0; i < num_clusters * bpc; i++, blkno++) { | 1269 | for (i = 0; i < num_clusters * bpc; i++, blkno++) { |
1211 | ret = ocfs2_read_block(inode, blkno, &bh, NULL); | 1270 | ret = ocfs2_read_block(INODE_CACHE(inode), blkno, |
1271 | &bh, NULL); | ||
1212 | if (ret) { | 1272 | if (ret) { |
1213 | mlog_errno(ret); | 1273 | mlog_errno(ret); |
1214 | goto out; | 1274 | goto out; |
1215 | } | 1275 | } |
1216 | 1276 | ||
1217 | ret = ocfs2_journal_access(handle, | 1277 | ret = ocfs2_journal_access(handle, |
1218 | inode, | 1278 | INODE_CACHE(inode), |
1219 | bh, | 1279 | bh, |
1220 | OCFS2_JOURNAL_ACCESS_WRITE); | 1280 | OCFS2_JOURNAL_ACCESS_WRITE); |
1221 | if (ret < 0) { | 1281 | if (ret < 0) { |
@@ -1266,7 +1326,7 @@ static int ocfs2_xattr_cleanup(struct inode *inode, | |||
1266 | void *val = xs->base + offs; | 1326 | void *val = xs->base + offs; |
1267 | size_t size = OCFS2_XATTR_SIZE(name_len) + OCFS2_XATTR_ROOT_SIZE; | 1327 | size_t size = OCFS2_XATTR_SIZE(name_len) + OCFS2_XATTR_ROOT_SIZE; |
1268 | 1328 | ||
1269 | ret = vb->vb_access(handle, inode, vb->vb_bh, | 1329 | ret = vb->vb_access(handle, INODE_CACHE(inode), vb->vb_bh, |
1270 | OCFS2_JOURNAL_ACCESS_WRITE); | 1330 | OCFS2_JOURNAL_ACCESS_WRITE); |
1271 | if (ret) { | 1331 | if (ret) { |
1272 | mlog_errno(ret); | 1332 | mlog_errno(ret); |
@@ -1294,7 +1354,7 @@ static int ocfs2_xattr_update_entry(struct inode *inode, | |||
1294 | { | 1354 | { |
1295 | int ret; | 1355 | int ret; |
1296 | 1356 | ||
1297 | ret = vb->vb_access(handle, inode, vb->vb_bh, | 1357 | ret = vb->vb_access(handle, INODE_CACHE(inode), vb->vb_bh, |
1298 | OCFS2_JOURNAL_ACCESS_WRITE); | 1358 | OCFS2_JOURNAL_ACCESS_WRITE); |
1299 | if (ret) { | 1359 | if (ret) { |
1300 | mlog_errno(ret); | 1360 | mlog_errno(ret); |
@@ -1355,7 +1415,7 @@ static int ocfs2_xattr_set_value_outside(struct inode *inode, | |||
1355 | mlog_errno(ret); | 1415 | mlog_errno(ret); |
1356 | return ret; | 1416 | return ret; |
1357 | } | 1417 | } |
1358 | ret = __ocfs2_xattr_set_value_outside(inode, ctxt->handle, vb->vb_xv, | 1418 | ret = __ocfs2_xattr_set_value_outside(inode, ctxt->handle, vb, |
1359 | xi->value, xi->value_len); | 1419 | xi->value, xi->value_len); |
1360 | if (ret < 0) | 1420 | if (ret < 0) |
1361 | mlog_errno(ret); | 1421 | mlog_errno(ret); |
@@ -1594,7 +1654,7 @@ static int ocfs2_xattr_set_entry(struct inode *inode, | |||
1594 | 1654 | ||
1595 | ret = __ocfs2_xattr_set_value_outside(inode, | 1655 | ret = __ocfs2_xattr_set_value_outside(inode, |
1596 | handle, | 1656 | handle, |
1597 | vb.vb_xv, | 1657 | &vb, |
1598 | xi->value, | 1658 | xi->value, |
1599 | xi->value_len); | 1659 | xi->value_len); |
1600 | if (ret < 0) | 1660 | if (ret < 0) |
@@ -1615,7 +1675,7 @@ static int ocfs2_xattr_set_entry(struct inode *inode, | |||
1615 | } | 1675 | } |
1616 | } | 1676 | } |
1617 | 1677 | ||
1618 | ret = ocfs2_journal_access_di(handle, inode, xs->inode_bh, | 1678 | ret = ocfs2_journal_access_di(handle, INODE_CACHE(inode), xs->inode_bh, |
1619 | OCFS2_JOURNAL_ACCESS_WRITE); | 1679 | OCFS2_JOURNAL_ACCESS_WRITE); |
1620 | if (ret) { | 1680 | if (ret) { |
1621 | mlog_errno(ret); | 1681 | mlog_errno(ret); |
@@ -1623,7 +1683,7 @@ static int ocfs2_xattr_set_entry(struct inode *inode, | |||
1623 | } | 1683 | } |
1624 | 1684 | ||
1625 | if (!(flag & OCFS2_INLINE_XATTR_FL)) { | 1685 | if (!(flag & OCFS2_INLINE_XATTR_FL)) { |
1626 | ret = vb.vb_access(handle, inode, vb.vb_bh, | 1686 | ret = vb.vb_access(handle, INODE_CACHE(inode), vb.vb_bh, |
1627 | OCFS2_JOURNAL_ACCESS_WRITE); | 1687 | OCFS2_JOURNAL_ACCESS_WRITE); |
1628 | if (ret) { | 1688 | if (ret) { |
1629 | mlog_errno(ret); | 1689 | mlog_errno(ret); |
@@ -1700,51 +1760,112 @@ out: | |||
1700 | return ret; | 1760 | return ret; |
1701 | } | 1761 | } |
1702 | 1762 | ||
1763 | /* | ||
1764 | * In xattr remove, if it is stored outside and refcounted, we may have | ||
1765 | * the chance to split the refcount tree. So need the allocators. | ||
1766 | */ | ||
1767 | static int ocfs2_lock_xattr_remove_allocators(struct inode *inode, | ||
1768 | struct ocfs2_xattr_value_root *xv, | ||
1769 | struct ocfs2_caching_info *ref_ci, | ||
1770 | struct buffer_head *ref_root_bh, | ||
1771 | struct ocfs2_alloc_context **meta_ac, | ||
1772 | int *ref_credits) | ||
1773 | { | ||
1774 | int ret, meta_add = 0; | ||
1775 | u32 p_cluster, num_clusters; | ||
1776 | unsigned int ext_flags; | ||
1777 | |||
1778 | *ref_credits = 0; | ||
1779 | ret = ocfs2_xattr_get_clusters(inode, 0, &p_cluster, | ||
1780 | &num_clusters, | ||
1781 | &xv->xr_list, | ||
1782 | &ext_flags); | ||
1783 | if (ret) { | ||
1784 | mlog_errno(ret); | ||
1785 | goto out; | ||
1786 | } | ||
1787 | |||
1788 | if (!(ext_flags & OCFS2_EXT_REFCOUNTED)) | ||
1789 | goto out; | ||
1790 | |||
1791 | ret = ocfs2_refcounted_xattr_delete_need(inode, ref_ci, | ||
1792 | ref_root_bh, xv, | ||
1793 | &meta_add, ref_credits); | ||
1794 | if (ret) { | ||
1795 | mlog_errno(ret); | ||
1796 | goto out; | ||
1797 | } | ||
1798 | |||
1799 | ret = ocfs2_reserve_new_metadata_blocks(OCFS2_SB(inode->i_sb), | ||
1800 | meta_add, meta_ac); | ||
1801 | if (ret) | ||
1802 | mlog_errno(ret); | ||
1803 | |||
1804 | out: | ||
1805 | return ret; | ||
1806 | } | ||
1807 | |||
1703 | static int ocfs2_remove_value_outside(struct inode*inode, | 1808 | static int ocfs2_remove_value_outside(struct inode*inode, |
1704 | struct ocfs2_xattr_value_buf *vb, | 1809 | struct ocfs2_xattr_value_buf *vb, |
1705 | struct ocfs2_xattr_header *header) | 1810 | struct ocfs2_xattr_header *header, |
1811 | struct ocfs2_caching_info *ref_ci, | ||
1812 | struct buffer_head *ref_root_bh) | ||
1706 | { | 1813 | { |
1707 | int ret = 0, i; | 1814 | int ret = 0, i, ref_credits; |
1708 | struct ocfs2_super *osb = OCFS2_SB(inode->i_sb); | 1815 | struct ocfs2_super *osb = OCFS2_SB(inode->i_sb); |
1709 | struct ocfs2_xattr_set_ctxt ctxt = { NULL, NULL, }; | 1816 | struct ocfs2_xattr_set_ctxt ctxt = { NULL, NULL, }; |
1817 | void *val; | ||
1710 | 1818 | ||
1711 | ocfs2_init_dealloc_ctxt(&ctxt.dealloc); | 1819 | ocfs2_init_dealloc_ctxt(&ctxt.dealloc); |
1712 | 1820 | ||
1713 | ctxt.handle = ocfs2_start_trans(osb, | ||
1714 | ocfs2_remove_extent_credits(osb->sb)); | ||
1715 | if (IS_ERR(ctxt.handle)) { | ||
1716 | ret = PTR_ERR(ctxt.handle); | ||
1717 | mlog_errno(ret); | ||
1718 | goto out; | ||
1719 | } | ||
1720 | |||
1721 | for (i = 0; i < le16_to_cpu(header->xh_count); i++) { | 1821 | for (i = 0; i < le16_to_cpu(header->xh_count); i++) { |
1722 | struct ocfs2_xattr_entry *entry = &header->xh_entries[i]; | 1822 | struct ocfs2_xattr_entry *entry = &header->xh_entries[i]; |
1723 | 1823 | ||
1724 | if (!ocfs2_xattr_is_local(entry)) { | 1824 | if (ocfs2_xattr_is_local(entry)) |
1725 | void *val; | 1825 | continue; |
1726 | 1826 | ||
1727 | val = (void *)header + | 1827 | val = (void *)header + |
1728 | le16_to_cpu(entry->xe_name_offset); | 1828 | le16_to_cpu(entry->xe_name_offset); |
1729 | vb->vb_xv = (struct ocfs2_xattr_value_root *) | 1829 | vb->vb_xv = (struct ocfs2_xattr_value_root *) |
1730 | (val + OCFS2_XATTR_SIZE(entry->xe_name_len)); | 1830 | (val + OCFS2_XATTR_SIZE(entry->xe_name_len)); |
1731 | ret = ocfs2_xattr_value_truncate(inode, vb, 0, &ctxt); | 1831 | |
1732 | if (ret < 0) { | 1832 | ret = ocfs2_lock_xattr_remove_allocators(inode, vb->vb_xv, |
1733 | mlog_errno(ret); | 1833 | ref_ci, ref_root_bh, |
1734 | break; | 1834 | &ctxt.meta_ac, |
1735 | } | 1835 | &ref_credits); |
1836 | |||
1837 | ctxt.handle = ocfs2_start_trans(osb, ref_credits + | ||
1838 | ocfs2_remove_extent_credits(osb->sb)); | ||
1839 | if (IS_ERR(ctxt.handle)) { | ||
1840 | ret = PTR_ERR(ctxt.handle); | ||
1841 | mlog_errno(ret); | ||
1842 | break; | ||
1843 | } | ||
1844 | |||
1845 | ret = ocfs2_xattr_value_truncate(inode, vb, 0, &ctxt); | ||
1846 | if (ret < 0) { | ||
1847 | mlog_errno(ret); | ||
1848 | break; | ||
1849 | } | ||
1850 | |||
1851 | ocfs2_commit_trans(osb, ctxt.handle); | ||
1852 | if (ctxt.meta_ac) { | ||
1853 | ocfs2_free_alloc_context(ctxt.meta_ac); | ||
1854 | ctxt.meta_ac = NULL; | ||
1736 | } | 1855 | } |
1737 | } | 1856 | } |
1738 | 1857 | ||
1739 | ocfs2_commit_trans(osb, ctxt.handle); | 1858 | if (ctxt.meta_ac) |
1859 | ocfs2_free_alloc_context(ctxt.meta_ac); | ||
1740 | ocfs2_schedule_truncate_log_flush(osb, 1); | 1860 | ocfs2_schedule_truncate_log_flush(osb, 1); |
1741 | ocfs2_run_deallocs(osb, &ctxt.dealloc); | 1861 | ocfs2_run_deallocs(osb, &ctxt.dealloc); |
1742 | out: | ||
1743 | return ret; | 1862 | return ret; |
1744 | } | 1863 | } |
1745 | 1864 | ||
1746 | static int ocfs2_xattr_ibody_remove(struct inode *inode, | 1865 | static int ocfs2_xattr_ibody_remove(struct inode *inode, |
1747 | struct buffer_head *di_bh) | 1866 | struct buffer_head *di_bh, |
1867 | struct ocfs2_caching_info *ref_ci, | ||
1868 | struct buffer_head *ref_root_bh) | ||
1748 | { | 1869 | { |
1749 | 1870 | ||
1750 | struct ocfs2_dinode *di = (struct ocfs2_dinode *)di_bh->b_data; | 1871 | struct ocfs2_dinode *di = (struct ocfs2_dinode *)di_bh->b_data; |
@@ -1759,13 +1880,21 @@ static int ocfs2_xattr_ibody_remove(struct inode *inode, | |||
1759 | ((void *)di + inode->i_sb->s_blocksize - | 1880 | ((void *)di + inode->i_sb->s_blocksize - |
1760 | le16_to_cpu(di->i_xattr_inline_size)); | 1881 | le16_to_cpu(di->i_xattr_inline_size)); |
1761 | 1882 | ||
1762 | ret = ocfs2_remove_value_outside(inode, &vb, header); | 1883 | ret = ocfs2_remove_value_outside(inode, &vb, header, |
1884 | ref_ci, ref_root_bh); | ||
1763 | 1885 | ||
1764 | return ret; | 1886 | return ret; |
1765 | } | 1887 | } |
1766 | 1888 | ||
1889 | struct ocfs2_rm_xattr_bucket_para { | ||
1890 | struct ocfs2_caching_info *ref_ci; | ||
1891 | struct buffer_head *ref_root_bh; | ||
1892 | }; | ||
1893 | |||
1767 | static int ocfs2_xattr_block_remove(struct inode *inode, | 1894 | static int ocfs2_xattr_block_remove(struct inode *inode, |
1768 | struct buffer_head *blk_bh) | 1895 | struct buffer_head *blk_bh, |
1896 | struct ocfs2_caching_info *ref_ci, | ||
1897 | struct buffer_head *ref_root_bh) | ||
1769 | { | 1898 | { |
1770 | struct ocfs2_xattr_block *xb; | 1899 | struct ocfs2_xattr_block *xb; |
1771 | int ret = 0; | 1900 | int ret = 0; |
@@ -1773,19 +1902,29 @@ static int ocfs2_xattr_block_remove(struct inode *inode, | |||
1773 | .vb_bh = blk_bh, | 1902 | .vb_bh = blk_bh, |
1774 | .vb_access = ocfs2_journal_access_xb, | 1903 | .vb_access = ocfs2_journal_access_xb, |
1775 | }; | 1904 | }; |
1905 | struct ocfs2_rm_xattr_bucket_para args = { | ||
1906 | .ref_ci = ref_ci, | ||
1907 | .ref_root_bh = ref_root_bh, | ||
1908 | }; | ||
1776 | 1909 | ||
1777 | xb = (struct ocfs2_xattr_block *)blk_bh->b_data; | 1910 | xb = (struct ocfs2_xattr_block *)blk_bh->b_data; |
1778 | if (!(le16_to_cpu(xb->xb_flags) & OCFS2_XATTR_INDEXED)) { | 1911 | if (!(le16_to_cpu(xb->xb_flags) & OCFS2_XATTR_INDEXED)) { |
1779 | struct ocfs2_xattr_header *header = &(xb->xb_attrs.xb_header); | 1912 | struct ocfs2_xattr_header *header = &(xb->xb_attrs.xb_header); |
1780 | ret = ocfs2_remove_value_outside(inode, &vb, header); | 1913 | ret = ocfs2_remove_value_outside(inode, &vb, header, |
1914 | ref_ci, ref_root_bh); | ||
1781 | } else | 1915 | } else |
1782 | ret = ocfs2_delete_xattr_index_block(inode, blk_bh); | 1916 | ret = ocfs2_iterate_xattr_index_block(inode, |
1917 | blk_bh, | ||
1918 | ocfs2_rm_xattr_cluster, | ||
1919 | &args); | ||
1783 | 1920 | ||
1784 | return ret; | 1921 | return ret; |
1785 | } | 1922 | } |
1786 | 1923 | ||
1787 | static int ocfs2_xattr_free_block(struct inode *inode, | 1924 | static int ocfs2_xattr_free_block(struct inode *inode, |
1788 | u64 block) | 1925 | u64 block, |
1926 | struct ocfs2_caching_info *ref_ci, | ||
1927 | struct buffer_head *ref_root_bh) | ||
1789 | { | 1928 | { |
1790 | struct inode *xb_alloc_inode; | 1929 | struct inode *xb_alloc_inode; |
1791 | struct buffer_head *xb_alloc_bh = NULL; | 1930 | struct buffer_head *xb_alloc_bh = NULL; |
@@ -1803,7 +1942,7 @@ static int ocfs2_xattr_free_block(struct inode *inode, | |||
1803 | goto out; | 1942 | goto out; |
1804 | } | 1943 | } |
1805 | 1944 | ||
1806 | ret = ocfs2_xattr_block_remove(inode, blk_bh); | 1945 | ret = ocfs2_xattr_block_remove(inode, blk_bh, ref_ci, ref_root_bh); |
1807 | if (ret < 0) { | 1946 | if (ret < 0) { |
1808 | mlog_errno(ret); | 1947 | mlog_errno(ret); |
1809 | goto out; | 1948 | goto out; |
@@ -1863,6 +2002,9 @@ int ocfs2_xattr_remove(struct inode *inode, struct buffer_head *di_bh) | |||
1863 | { | 2002 | { |
1864 | struct ocfs2_inode_info *oi = OCFS2_I(inode); | 2003 | struct ocfs2_inode_info *oi = OCFS2_I(inode); |
1865 | struct ocfs2_dinode *di = (struct ocfs2_dinode *)di_bh->b_data; | 2004 | struct ocfs2_dinode *di = (struct ocfs2_dinode *)di_bh->b_data; |
2005 | struct ocfs2_refcount_tree *ref_tree = NULL; | ||
2006 | struct buffer_head *ref_root_bh = NULL; | ||
2007 | struct ocfs2_caching_info *ref_ci = NULL; | ||
1866 | handle_t *handle; | 2008 | handle_t *handle; |
1867 | int ret; | 2009 | int ret; |
1868 | 2010 | ||
@@ -1872,8 +2014,21 @@ int ocfs2_xattr_remove(struct inode *inode, struct buffer_head *di_bh) | |||
1872 | if (!(oi->ip_dyn_features & OCFS2_HAS_XATTR_FL)) | 2014 | if (!(oi->ip_dyn_features & OCFS2_HAS_XATTR_FL)) |
1873 | return 0; | 2015 | return 0; |
1874 | 2016 | ||
2017 | if (OCFS2_I(inode)->ip_dyn_features & OCFS2_HAS_REFCOUNT_FL) { | ||
2018 | ret = ocfs2_lock_refcount_tree(OCFS2_SB(inode->i_sb), | ||
2019 | le64_to_cpu(di->i_refcount_loc), | ||
2020 | 1, &ref_tree, &ref_root_bh); | ||
2021 | if (ret) { | ||
2022 | mlog_errno(ret); | ||
2023 | goto out; | ||
2024 | } | ||
2025 | ref_ci = &ref_tree->rf_ci; | ||
2026 | |||
2027 | } | ||
2028 | |||
1875 | if (oi->ip_dyn_features & OCFS2_INLINE_XATTR_FL) { | 2029 | if (oi->ip_dyn_features & OCFS2_INLINE_XATTR_FL) { |
1876 | ret = ocfs2_xattr_ibody_remove(inode, di_bh); | 2030 | ret = ocfs2_xattr_ibody_remove(inode, di_bh, |
2031 | ref_ci, ref_root_bh); | ||
1877 | if (ret < 0) { | 2032 | if (ret < 0) { |
1878 | mlog_errno(ret); | 2033 | mlog_errno(ret); |
1879 | goto out; | 2034 | goto out; |
@@ -1882,7 +2037,8 @@ int ocfs2_xattr_remove(struct inode *inode, struct buffer_head *di_bh) | |||
1882 | 2037 | ||
1883 | if (di->i_xattr_loc) { | 2038 | if (di->i_xattr_loc) { |
1884 | ret = ocfs2_xattr_free_block(inode, | 2039 | ret = ocfs2_xattr_free_block(inode, |
1885 | le64_to_cpu(di->i_xattr_loc)); | 2040 | le64_to_cpu(di->i_xattr_loc), |
2041 | ref_ci, ref_root_bh); | ||
1886 | if (ret < 0) { | 2042 | if (ret < 0) { |
1887 | mlog_errno(ret); | 2043 | mlog_errno(ret); |
1888 | goto out; | 2044 | goto out; |
@@ -1896,7 +2052,7 @@ int ocfs2_xattr_remove(struct inode *inode, struct buffer_head *di_bh) | |||
1896 | mlog_errno(ret); | 2052 | mlog_errno(ret); |
1897 | goto out; | 2053 | goto out; |
1898 | } | 2054 | } |
1899 | ret = ocfs2_journal_access_di(handle, inode, di_bh, | 2055 | ret = ocfs2_journal_access_di(handle, INODE_CACHE(inode), di_bh, |
1900 | OCFS2_JOURNAL_ACCESS_WRITE); | 2056 | OCFS2_JOURNAL_ACCESS_WRITE); |
1901 | if (ret) { | 2057 | if (ret) { |
1902 | mlog_errno(ret); | 2058 | mlog_errno(ret); |
@@ -1916,6 +2072,9 @@ int ocfs2_xattr_remove(struct inode *inode, struct buffer_head *di_bh) | |||
1916 | out_commit: | 2072 | out_commit: |
1917 | ocfs2_commit_trans(OCFS2_SB(inode->i_sb), handle); | 2073 | ocfs2_commit_trans(OCFS2_SB(inode->i_sb), handle); |
1918 | out: | 2074 | out: |
2075 | if (ref_tree) | ||
2076 | ocfs2_unlock_refcount_tree(OCFS2_SB(inode->i_sb), ref_tree, 1); | ||
2077 | brelse(ref_root_bh); | ||
1919 | return ret; | 2078 | return ret; |
1920 | } | 2079 | } |
1921 | 2080 | ||
@@ -2083,6 +2242,84 @@ cleanup: | |||
2083 | return ret; | 2242 | return ret; |
2084 | } | 2243 | } |
2085 | 2244 | ||
2245 | static int ocfs2_create_xattr_block(handle_t *handle, | ||
2246 | struct inode *inode, | ||
2247 | struct buffer_head *inode_bh, | ||
2248 | struct ocfs2_alloc_context *meta_ac, | ||
2249 | struct buffer_head **ret_bh, | ||
2250 | int indexed) | ||
2251 | { | ||
2252 | int ret; | ||
2253 | u16 suballoc_bit_start; | ||
2254 | u32 num_got; | ||
2255 | u64 first_blkno; | ||
2256 | struct ocfs2_dinode *di = (struct ocfs2_dinode *)inode_bh->b_data; | ||
2257 | struct ocfs2_super *osb = OCFS2_SB(inode->i_sb); | ||
2258 | struct buffer_head *new_bh = NULL; | ||
2259 | struct ocfs2_xattr_block *xblk; | ||
2260 | |||
2261 | ret = ocfs2_journal_access_di(handle, INODE_CACHE(inode), inode_bh, | ||
2262 | OCFS2_JOURNAL_ACCESS_CREATE); | ||
2263 | if (ret < 0) { | ||
2264 | mlog_errno(ret); | ||
2265 | goto end; | ||
2266 | } | ||
2267 | |||
2268 | ret = ocfs2_claim_metadata(osb, handle, meta_ac, 1, | ||
2269 | &suballoc_bit_start, &num_got, | ||
2270 | &first_blkno); | ||
2271 | if (ret < 0) { | ||
2272 | mlog_errno(ret); | ||
2273 | goto end; | ||
2274 | } | ||
2275 | |||
2276 | new_bh = sb_getblk(inode->i_sb, first_blkno); | ||
2277 | ocfs2_set_new_buffer_uptodate(INODE_CACHE(inode), new_bh); | ||
2278 | |||
2279 | ret = ocfs2_journal_access_xb(handle, INODE_CACHE(inode), | ||
2280 | new_bh, | ||
2281 | OCFS2_JOURNAL_ACCESS_CREATE); | ||
2282 | if (ret < 0) { | ||
2283 | mlog_errno(ret); | ||
2284 | goto end; | ||
2285 | } | ||
2286 | |||
2287 | /* Initialize ocfs2_xattr_block */ | ||
2288 | xblk = (struct ocfs2_xattr_block *)new_bh->b_data; | ||
2289 | memset(xblk, 0, inode->i_sb->s_blocksize); | ||
2290 | strcpy((void *)xblk, OCFS2_XATTR_BLOCK_SIGNATURE); | ||
2291 | xblk->xb_suballoc_slot = cpu_to_le16(osb->slot_num); | ||
2292 | xblk->xb_suballoc_bit = cpu_to_le16(suballoc_bit_start); | ||
2293 | xblk->xb_fs_generation = cpu_to_le32(osb->fs_generation); | ||
2294 | xblk->xb_blkno = cpu_to_le64(first_blkno); | ||
2295 | |||
2296 | if (indexed) { | ||
2297 | struct ocfs2_xattr_tree_root *xr = &xblk->xb_attrs.xb_root; | ||
2298 | xr->xt_clusters = cpu_to_le32(1); | ||
2299 | xr->xt_last_eb_blk = 0; | ||
2300 | xr->xt_list.l_tree_depth = 0; | ||
2301 | xr->xt_list.l_count = cpu_to_le16( | ||
2302 | ocfs2_xattr_recs_per_xb(inode->i_sb)); | ||
2303 | xr->xt_list.l_next_free_rec = cpu_to_le16(1); | ||
2304 | xblk->xb_flags = cpu_to_le16(OCFS2_XATTR_INDEXED); | ||
2305 | } | ||
2306 | |||
2307 | ret = ocfs2_journal_dirty(handle, new_bh); | ||
2308 | if (ret < 0) { | ||
2309 | mlog_errno(ret); | ||
2310 | goto end; | ||
2311 | } | ||
2312 | di->i_xattr_loc = cpu_to_le64(first_blkno); | ||
2313 | ocfs2_journal_dirty(handle, inode_bh); | ||
2314 | |||
2315 | *ret_bh = new_bh; | ||
2316 | new_bh = NULL; | ||
2317 | |||
2318 | end: | ||
2319 | brelse(new_bh); | ||
2320 | return ret; | ||
2321 | } | ||
2322 | |||
2086 | /* | 2323 | /* |
2087 | * ocfs2_xattr_block_set() | 2324 | * ocfs2_xattr_block_set() |
2088 | * | 2325 | * |
@@ -2095,63 +2332,24 @@ static int ocfs2_xattr_block_set(struct inode *inode, | |||
2095 | struct ocfs2_xattr_set_ctxt *ctxt) | 2332 | struct ocfs2_xattr_set_ctxt *ctxt) |
2096 | { | 2333 | { |
2097 | struct buffer_head *new_bh = NULL; | 2334 | struct buffer_head *new_bh = NULL; |
2098 | struct ocfs2_super *osb = OCFS2_SB(inode->i_sb); | ||
2099 | struct ocfs2_dinode *di = (struct ocfs2_dinode *)xs->inode_bh->b_data; | ||
2100 | handle_t *handle = ctxt->handle; | 2335 | handle_t *handle = ctxt->handle; |
2101 | struct ocfs2_xattr_block *xblk = NULL; | 2336 | struct ocfs2_xattr_block *xblk = NULL; |
2102 | u16 suballoc_bit_start; | ||
2103 | u32 num_got; | ||
2104 | u64 first_blkno; | ||
2105 | int ret; | 2337 | int ret; |
2106 | 2338 | ||
2107 | if (!xs->xattr_bh) { | 2339 | if (!xs->xattr_bh) { |
2108 | ret = ocfs2_journal_access_di(handle, inode, xs->inode_bh, | 2340 | ret = ocfs2_create_xattr_block(handle, inode, xs->inode_bh, |
2109 | OCFS2_JOURNAL_ACCESS_CREATE); | 2341 | ctxt->meta_ac, &new_bh, 0); |
2110 | if (ret < 0) { | 2342 | if (ret) { |
2111 | mlog_errno(ret); | ||
2112 | goto end; | ||
2113 | } | ||
2114 | |||
2115 | ret = ocfs2_claim_metadata(osb, handle, ctxt->meta_ac, 1, | ||
2116 | &suballoc_bit_start, &num_got, | ||
2117 | &first_blkno); | ||
2118 | if (ret < 0) { | ||
2119 | mlog_errno(ret); | ||
2120 | goto end; | ||
2121 | } | ||
2122 | |||
2123 | new_bh = sb_getblk(inode->i_sb, first_blkno); | ||
2124 | ocfs2_set_new_buffer_uptodate(inode, new_bh); | ||
2125 | |||
2126 | ret = ocfs2_journal_access_xb(handle, inode, new_bh, | ||
2127 | OCFS2_JOURNAL_ACCESS_CREATE); | ||
2128 | if (ret < 0) { | ||
2129 | mlog_errno(ret); | 2343 | mlog_errno(ret); |
2130 | goto end; | 2344 | goto end; |
2131 | } | 2345 | } |
2132 | 2346 | ||
2133 | /* Initialize ocfs2_xattr_block */ | ||
2134 | xs->xattr_bh = new_bh; | 2347 | xs->xattr_bh = new_bh; |
2135 | xblk = (struct ocfs2_xattr_block *)new_bh->b_data; | 2348 | xblk = (struct ocfs2_xattr_block *)xs->xattr_bh->b_data; |
2136 | memset(xblk, 0, inode->i_sb->s_blocksize); | ||
2137 | strcpy((void *)xblk, OCFS2_XATTR_BLOCK_SIGNATURE); | ||
2138 | xblk->xb_suballoc_slot = cpu_to_le16(osb->slot_num); | ||
2139 | xblk->xb_suballoc_bit = cpu_to_le16(suballoc_bit_start); | ||
2140 | xblk->xb_fs_generation = cpu_to_le32(osb->fs_generation); | ||
2141 | xblk->xb_blkno = cpu_to_le64(first_blkno); | ||
2142 | |||
2143 | xs->header = &xblk->xb_attrs.xb_header; | 2349 | xs->header = &xblk->xb_attrs.xb_header; |
2144 | xs->base = (void *)xs->header; | 2350 | xs->base = (void *)xs->header; |
2145 | xs->end = (void *)xblk + inode->i_sb->s_blocksize; | 2351 | xs->end = (void *)xblk + inode->i_sb->s_blocksize; |
2146 | xs->here = xs->header->xh_entries; | 2352 | xs->here = xs->header->xh_entries; |
2147 | |||
2148 | ret = ocfs2_journal_dirty(handle, new_bh); | ||
2149 | if (ret < 0) { | ||
2150 | mlog_errno(ret); | ||
2151 | goto end; | ||
2152 | } | ||
2153 | di->i_xattr_loc = cpu_to_le64(first_blkno); | ||
2154 | ocfs2_journal_dirty(handle, xs->inode_bh); | ||
2155 | } else | 2353 | } else |
2156 | xblk = (struct ocfs2_xattr_block *)xs->xattr_bh->b_data; | 2354 | xblk = (struct ocfs2_xattr_block *)xs->xattr_bh->b_data; |
2157 | 2355 | ||
@@ -2273,7 +2471,7 @@ static int ocfs2_calc_xattr_set_need(struct inode *inode, | |||
2273 | old_in_xb = 1; | 2471 | old_in_xb = 1; |
2274 | 2472 | ||
2275 | if (le16_to_cpu(xb->xb_flags) & OCFS2_XATTR_INDEXED) { | 2473 | if (le16_to_cpu(xb->xb_flags) & OCFS2_XATTR_INDEXED) { |
2276 | ret = ocfs2_xattr_bucket_get_name_value(inode, | 2474 | ret = ocfs2_xattr_bucket_get_name_value(inode->i_sb, |
2277 | bucket_xh(xbs->bucket), | 2475 | bucket_xh(xbs->bucket), |
2278 | i, &block_off, | 2476 | i, &block_off, |
2279 | &name_offset); | 2477 | &name_offset); |
@@ -2428,6 +2626,7 @@ static int ocfs2_init_xattr_set_ctxt(struct inode *inode, | |||
2428 | struct ocfs2_xattr_search *xis, | 2626 | struct ocfs2_xattr_search *xis, |
2429 | struct ocfs2_xattr_search *xbs, | 2627 | struct ocfs2_xattr_search *xbs, |
2430 | struct ocfs2_xattr_set_ctxt *ctxt, | 2628 | struct ocfs2_xattr_set_ctxt *ctxt, |
2629 | int extra_meta, | ||
2431 | int *credits) | 2630 | int *credits) |
2432 | { | 2631 | { |
2433 | int clusters_add, meta_add, ret; | 2632 | int clusters_add, meta_add, ret; |
@@ -2444,6 +2643,7 @@ static int ocfs2_init_xattr_set_ctxt(struct inode *inode, | |||
2444 | return ret; | 2643 | return ret; |
2445 | } | 2644 | } |
2446 | 2645 | ||
2646 | meta_add += extra_meta; | ||
2447 | mlog(0, "Set xattr %s, reserve meta blocks = %d, clusters = %d, " | 2647 | mlog(0, "Set xattr %s, reserve meta blocks = %d, clusters = %d, " |
2448 | "credits = %d\n", xi->name, meta_add, clusters_add, *credits); | 2648 | "credits = %d\n", xi->name, meta_add, clusters_add, *credits); |
2449 | 2649 | ||
@@ -2598,7 +2798,7 @@ static int __ocfs2_xattr_set_handle(struct inode *inode, | |||
2598 | 2798 | ||
2599 | if (!ret) { | 2799 | if (!ret) { |
2600 | /* Update inode ctime. */ | 2800 | /* Update inode ctime. */ |
2601 | ret = ocfs2_journal_access_di(ctxt->handle, inode, | 2801 | ret = ocfs2_journal_access_di(ctxt->handle, INODE_CACHE(inode), |
2602 | xis->inode_bh, | 2802 | xis->inode_bh, |
2603 | OCFS2_JOURNAL_ACCESS_WRITE); | 2803 | OCFS2_JOURNAL_ACCESS_WRITE); |
2604 | if (ret) { | 2804 | if (ret) { |
@@ -2711,10 +2911,11 @@ int ocfs2_xattr_set(struct inode *inode, | |||
2711 | { | 2911 | { |
2712 | struct buffer_head *di_bh = NULL; | 2912 | struct buffer_head *di_bh = NULL; |
2713 | struct ocfs2_dinode *di; | 2913 | struct ocfs2_dinode *di; |
2714 | int ret, credits; | 2914 | int ret, credits, ref_meta = 0, ref_credits = 0; |
2715 | struct ocfs2_super *osb = OCFS2_SB(inode->i_sb); | 2915 | struct ocfs2_super *osb = OCFS2_SB(inode->i_sb); |
2716 | struct inode *tl_inode = osb->osb_tl_inode; | 2916 | struct inode *tl_inode = osb->osb_tl_inode; |
2717 | struct ocfs2_xattr_set_ctxt ctxt = { NULL, NULL, }; | 2917 | struct ocfs2_xattr_set_ctxt ctxt = { NULL, NULL, }; |
2918 | struct ocfs2_refcount_tree *ref_tree = NULL; | ||
2718 | 2919 | ||
2719 | struct ocfs2_xattr_info xi = { | 2920 | struct ocfs2_xattr_info xi = { |
2720 | .name_index = name_index, | 2921 | .name_index = name_index, |
@@ -2779,6 +2980,17 @@ int ocfs2_xattr_set(struct inode *inode, | |||
2779 | goto cleanup; | 2980 | goto cleanup; |
2780 | } | 2981 | } |
2781 | 2982 | ||
2983 | /* Check whether the value is refcounted and do some prepartion. */ | ||
2984 | if (OCFS2_I(inode)->ip_dyn_features & OCFS2_HAS_REFCOUNT_FL && | ||
2985 | (!xis.not_found || !xbs.not_found)) { | ||
2986 | ret = ocfs2_prepare_refcount_xattr(inode, di, &xi, | ||
2987 | &xis, &xbs, &ref_tree, | ||
2988 | &ref_meta, &ref_credits); | ||
2989 | if (ret) { | ||
2990 | mlog_errno(ret); | ||
2991 | goto cleanup; | ||
2992 | } | ||
2993 | } | ||
2782 | 2994 | ||
2783 | mutex_lock(&tl_inode->i_mutex); | 2995 | mutex_lock(&tl_inode->i_mutex); |
2784 | 2996 | ||
@@ -2793,7 +3005,7 @@ int ocfs2_xattr_set(struct inode *inode, | |||
2793 | mutex_unlock(&tl_inode->i_mutex); | 3005 | mutex_unlock(&tl_inode->i_mutex); |
2794 | 3006 | ||
2795 | ret = ocfs2_init_xattr_set_ctxt(inode, di, &xi, &xis, | 3007 | ret = ocfs2_init_xattr_set_ctxt(inode, di, &xi, &xis, |
2796 | &xbs, &ctxt, &credits); | 3008 | &xbs, &ctxt, ref_meta, &credits); |
2797 | if (ret) { | 3009 | if (ret) { |
2798 | mlog_errno(ret); | 3010 | mlog_errno(ret); |
2799 | goto cleanup; | 3011 | goto cleanup; |
@@ -2801,7 +3013,7 @@ int ocfs2_xattr_set(struct inode *inode, | |||
2801 | 3013 | ||
2802 | /* we need to update inode's ctime field, so add credit for it. */ | 3014 | /* we need to update inode's ctime field, so add credit for it. */ |
2803 | credits += OCFS2_INODE_UPDATE_CREDITS; | 3015 | credits += OCFS2_INODE_UPDATE_CREDITS; |
2804 | ctxt.handle = ocfs2_start_trans(osb, credits); | 3016 | ctxt.handle = ocfs2_start_trans(osb, credits + ref_credits); |
2805 | if (IS_ERR(ctxt.handle)) { | 3017 | if (IS_ERR(ctxt.handle)) { |
2806 | ret = PTR_ERR(ctxt.handle); | 3018 | ret = PTR_ERR(ctxt.handle); |
2807 | mlog_errno(ret); | 3019 | mlog_errno(ret); |
@@ -2819,8 +3031,16 @@ int ocfs2_xattr_set(struct inode *inode, | |||
2819 | if (ocfs2_dealloc_has_cluster(&ctxt.dealloc)) | 3031 | if (ocfs2_dealloc_has_cluster(&ctxt.dealloc)) |
2820 | ocfs2_schedule_truncate_log_flush(osb, 1); | 3032 | ocfs2_schedule_truncate_log_flush(osb, 1); |
2821 | ocfs2_run_deallocs(osb, &ctxt.dealloc); | 3033 | ocfs2_run_deallocs(osb, &ctxt.dealloc); |
3034 | |||
2822 | cleanup: | 3035 | cleanup: |
3036 | if (ref_tree) | ||
3037 | ocfs2_unlock_refcount_tree(osb, ref_tree, 1); | ||
2823 | up_write(&OCFS2_I(inode)->ip_xattr_sem); | 3038 | up_write(&OCFS2_I(inode)->ip_xattr_sem); |
3039 | if (!value && !ret) { | ||
3040 | ret = ocfs2_try_remove_refcount_tree(inode, di_bh); | ||
3041 | if (ret) | ||
3042 | mlog_errno(ret); | ||
3043 | } | ||
2824 | ocfs2_inode_unlock(inode, 1); | 3044 | ocfs2_inode_unlock(inode, 1); |
2825 | cleanup_nolock: | 3045 | cleanup_nolock: |
2826 | brelse(di_bh); | 3046 | brelse(di_bh); |
@@ -2849,7 +3069,8 @@ static int ocfs2_xattr_get_rec(struct inode *inode, | |||
2849 | u64 e_blkno = 0; | 3069 | u64 e_blkno = 0; |
2850 | 3070 | ||
2851 | if (el->l_tree_depth) { | 3071 | if (el->l_tree_depth) { |
2852 | ret = ocfs2_find_leaf(inode, el, name_hash, &eb_bh); | 3072 | ret = ocfs2_find_leaf(INODE_CACHE(inode), el, name_hash, |
3073 | &eb_bh); | ||
2853 | if (ret) { | 3074 | if (ret) { |
2854 | mlog_errno(ret); | 3075 | mlog_errno(ret); |
2855 | goto out; | 3076 | goto out; |
@@ -2931,7 +3152,7 @@ static int ocfs2_find_xe_in_bucket(struct inode *inode, | |||
2931 | if (cmp) | 3152 | if (cmp) |
2932 | continue; | 3153 | continue; |
2933 | 3154 | ||
2934 | ret = ocfs2_xattr_bucket_get_name_value(inode, | 3155 | ret = ocfs2_xattr_bucket_get_name_value(inode->i_sb, |
2935 | xh, | 3156 | xh, |
2936 | i, | 3157 | i, |
2937 | &block_off, | 3158 | &block_off, |
@@ -3175,7 +3396,7 @@ struct ocfs2_xattr_tree_list { | |||
3175 | size_t result; | 3396 | size_t result; |
3176 | }; | 3397 | }; |
3177 | 3398 | ||
3178 | static int ocfs2_xattr_bucket_get_name_value(struct inode *inode, | 3399 | static int ocfs2_xattr_bucket_get_name_value(struct super_block *sb, |
3179 | struct ocfs2_xattr_header *xh, | 3400 | struct ocfs2_xattr_header *xh, |
3180 | int index, | 3401 | int index, |
3181 | int *block_off, | 3402 | int *block_off, |
@@ -3188,8 +3409,8 @@ static int ocfs2_xattr_bucket_get_name_value(struct inode *inode, | |||
3188 | 3409 | ||
3189 | name_offset = le16_to_cpu(xh->xh_entries[index].xe_name_offset); | 3410 | name_offset = le16_to_cpu(xh->xh_entries[index].xe_name_offset); |
3190 | 3411 | ||
3191 | *block_off = name_offset >> inode->i_sb->s_blocksize_bits; | 3412 | *block_off = name_offset >> sb->s_blocksize_bits; |
3192 | *new_offset = name_offset % inode->i_sb->s_blocksize; | 3413 | *new_offset = name_offset % sb->s_blocksize; |
3193 | 3414 | ||
3194 | return 0; | 3415 | return 0; |
3195 | } | 3416 | } |
@@ -3209,7 +3430,7 @@ static int ocfs2_list_xattr_bucket(struct inode *inode, | |||
3209 | prefix = ocfs2_xattr_prefix(type); | 3430 | prefix = ocfs2_xattr_prefix(type); |
3210 | 3431 | ||
3211 | if (prefix) { | 3432 | if (prefix) { |
3212 | ret = ocfs2_xattr_bucket_get_name_value(inode, | 3433 | ret = ocfs2_xattr_bucket_get_name_value(inode->i_sb, |
3213 | bucket_xh(bucket), | 3434 | bucket_xh(bucket), |
3214 | i, | 3435 | i, |
3215 | &block_off, | 3436 | &block_off, |
@@ -3232,22 +3453,19 @@ static int ocfs2_list_xattr_bucket(struct inode *inode, | |||
3232 | return ret; | 3453 | return ret; |
3233 | } | 3454 | } |
3234 | 3455 | ||
3235 | static int ocfs2_xattr_tree_list_index_block(struct inode *inode, | 3456 | static int ocfs2_iterate_xattr_index_block(struct inode *inode, |
3236 | struct ocfs2_xattr_tree_root *xt, | 3457 | struct buffer_head *blk_bh, |
3237 | char *buffer, | 3458 | xattr_tree_rec_func *rec_func, |
3238 | size_t buffer_size) | 3459 | void *para) |
3239 | { | 3460 | { |
3240 | struct ocfs2_extent_list *el = &xt->xt_list; | 3461 | struct ocfs2_xattr_block *xb = |
3462 | (struct ocfs2_xattr_block *)blk_bh->b_data; | ||
3463 | struct ocfs2_extent_list *el = &xb->xb_attrs.xb_root.xt_list; | ||
3241 | int ret = 0; | 3464 | int ret = 0; |
3242 | u32 name_hash = UINT_MAX, e_cpos = 0, num_clusters = 0; | 3465 | u32 name_hash = UINT_MAX, e_cpos = 0, num_clusters = 0; |
3243 | u64 p_blkno = 0; | 3466 | u64 p_blkno = 0; |
3244 | struct ocfs2_xattr_tree_list xl = { | ||
3245 | .buffer = buffer, | ||
3246 | .buffer_size = buffer_size, | ||
3247 | .result = 0, | ||
3248 | }; | ||
3249 | 3467 | ||
3250 | if (le16_to_cpu(el->l_next_free_rec) == 0) | 3468 | if (!el->l_next_free_rec || !rec_func) |
3251 | return 0; | 3469 | return 0; |
3252 | 3470 | ||
3253 | while (name_hash > 0) { | 3471 | while (name_hash > 0) { |
@@ -3255,16 +3473,15 @@ static int ocfs2_xattr_tree_list_index_block(struct inode *inode, | |||
3255 | &e_cpos, &num_clusters, el); | 3473 | &e_cpos, &num_clusters, el); |
3256 | if (ret) { | 3474 | if (ret) { |
3257 | mlog_errno(ret); | 3475 | mlog_errno(ret); |
3258 | goto out; | 3476 | break; |
3259 | } | 3477 | } |
3260 | 3478 | ||
3261 | ret = ocfs2_iterate_xattr_buckets(inode, p_blkno, num_clusters, | 3479 | ret = rec_func(inode, blk_bh, p_blkno, e_cpos, |
3262 | ocfs2_list_xattr_bucket, | 3480 | num_clusters, para); |
3263 | &xl); | ||
3264 | if (ret) { | 3481 | if (ret) { |
3265 | if (ret != -ERANGE) | 3482 | if (ret != -ERANGE) |
3266 | mlog_errno(ret); | 3483 | mlog_errno(ret); |
3267 | goto out; | 3484 | break; |
3268 | } | 3485 | } |
3269 | 3486 | ||
3270 | if (e_cpos == 0) | 3487 | if (e_cpos == 0) |
@@ -3273,6 +3490,37 @@ static int ocfs2_xattr_tree_list_index_block(struct inode *inode, | |||
3273 | name_hash = e_cpos - 1; | 3490 | name_hash = e_cpos - 1; |
3274 | } | 3491 | } |
3275 | 3492 | ||
3493 | return ret; | ||
3494 | |||
3495 | } | ||
3496 | |||
3497 | static int ocfs2_list_xattr_tree_rec(struct inode *inode, | ||
3498 | struct buffer_head *root_bh, | ||
3499 | u64 blkno, u32 cpos, u32 len, void *para) | ||
3500 | { | ||
3501 | return ocfs2_iterate_xattr_buckets(inode, blkno, len, | ||
3502 | ocfs2_list_xattr_bucket, para); | ||
3503 | } | ||
3504 | |||
3505 | static int ocfs2_xattr_tree_list_index_block(struct inode *inode, | ||
3506 | struct buffer_head *blk_bh, | ||
3507 | char *buffer, | ||
3508 | size_t buffer_size) | ||
3509 | { | ||
3510 | int ret; | ||
3511 | struct ocfs2_xattr_tree_list xl = { | ||
3512 | .buffer = buffer, | ||
3513 | .buffer_size = buffer_size, | ||
3514 | .result = 0, | ||
3515 | }; | ||
3516 | |||
3517 | ret = ocfs2_iterate_xattr_index_block(inode, blk_bh, | ||
3518 | ocfs2_list_xattr_tree_rec, &xl); | ||
3519 | if (ret) { | ||
3520 | mlog_errno(ret); | ||
3521 | goto out; | ||
3522 | } | ||
3523 | |||
3276 | ret = xl.result; | 3524 | ret = xl.result; |
3277 | out: | 3525 | out: |
3278 | return ret; | 3526 | return ret; |
@@ -3426,7 +3674,7 @@ static int ocfs2_xattr_create_index_block(struct inode *inode, | |||
3426 | */ | 3674 | */ |
3427 | down_write(&oi->ip_alloc_sem); | 3675 | down_write(&oi->ip_alloc_sem); |
3428 | 3676 | ||
3429 | ret = ocfs2_journal_access_xb(handle, inode, xb_bh, | 3677 | ret = ocfs2_journal_access_xb(handle, INODE_CACHE(inode), xb_bh, |
3430 | OCFS2_JOURNAL_ACCESS_WRITE); | 3678 | OCFS2_JOURNAL_ACCESS_WRITE); |
3431 | if (ret) { | 3679 | if (ret) { |
3432 | mlog_errno(ret); | 3680 | mlog_errno(ret); |
@@ -4263,9 +4511,9 @@ static int ocfs2_add_new_xattr_cluster(struct inode *inode, | |||
4263 | (unsigned long long)OCFS2_I(inode)->ip_blkno, | 4511 | (unsigned long long)OCFS2_I(inode)->ip_blkno, |
4264 | prev_cpos, (unsigned long long)bucket_blkno(first)); | 4512 | prev_cpos, (unsigned long long)bucket_blkno(first)); |
4265 | 4513 | ||
4266 | ocfs2_init_xattr_tree_extent_tree(&et, inode, root_bh); | 4514 | ocfs2_init_xattr_tree_extent_tree(&et, INODE_CACHE(inode), root_bh); |
4267 | 4515 | ||
4268 | ret = ocfs2_journal_access_xb(handle, inode, root_bh, | 4516 | ret = ocfs2_journal_access_xb(handle, INODE_CACHE(inode), root_bh, |
4269 | OCFS2_JOURNAL_ACCESS_WRITE); | 4517 | OCFS2_JOURNAL_ACCESS_WRITE); |
4270 | if (ret < 0) { | 4518 | if (ret < 0) { |
4271 | mlog_errno(ret); | 4519 | mlog_errno(ret); |
@@ -4319,7 +4567,7 @@ static int ocfs2_add_new_xattr_cluster(struct inode *inode, | |||
4319 | 4567 | ||
4320 | mlog(0, "Insert %u clusters at block %llu for xattr at %u\n", | 4568 | mlog(0, "Insert %u clusters at block %llu for xattr at %u\n", |
4321 | num_bits, (unsigned long long)block, v_start); | 4569 | num_bits, (unsigned long long)block, v_start); |
4322 | ret = ocfs2_insert_extent(osb, handle, inode, &et, v_start, block, | 4570 | ret = ocfs2_insert_extent(handle, &et, v_start, block, |
4323 | num_bits, 0, ctxt->meta_ac); | 4571 | num_bits, 0, ctxt->meta_ac); |
4324 | if (ret < 0) { | 4572 | if (ret < 0) { |
4325 | mlog_errno(ret); | 4573 | mlog_errno(ret); |
@@ -4798,10 +5046,13 @@ static int ocfs2_xattr_bucket_set_value_outside(struct inode *inode, | |||
4798 | struct ocfs2_xattr_entry *xe = xs->here; | 5046 | struct ocfs2_xattr_entry *xe = xs->here; |
4799 | struct ocfs2_xattr_header *xh = bucket_xh(xs->bucket); | 5047 | struct ocfs2_xattr_header *xh = bucket_xh(xs->bucket); |
4800 | void *base; | 5048 | void *base; |
5049 | struct ocfs2_xattr_value_buf vb = { | ||
5050 | .vb_access = ocfs2_journal_access, | ||
5051 | }; | ||
4801 | 5052 | ||
4802 | BUG_ON(!xs->base || !xe || ocfs2_xattr_is_local(xe)); | 5053 | BUG_ON(!xs->base || !xe || ocfs2_xattr_is_local(xe)); |
4803 | 5054 | ||
4804 | ret = ocfs2_xattr_bucket_get_name_value(inode, xh, | 5055 | ret = ocfs2_xattr_bucket_get_name_value(inode->i_sb, xh, |
4805 | xe - xh->xh_entries, | 5056 | xe - xh->xh_entries, |
4806 | &block_off, | 5057 | &block_off, |
4807 | &offset); | 5058 | &offset); |
@@ -4814,8 +5065,10 @@ static int ocfs2_xattr_bucket_set_value_outside(struct inode *inode, | |||
4814 | xv = (struct ocfs2_xattr_value_root *)(base + offset + | 5065 | xv = (struct ocfs2_xattr_value_root *)(base + offset + |
4815 | OCFS2_XATTR_SIZE(xe->xe_name_len)); | 5066 | OCFS2_XATTR_SIZE(xe->xe_name_len)); |
4816 | 5067 | ||
5068 | vb.vb_xv = xv; | ||
5069 | vb.vb_bh = xs->bucket->bu_bhs[block_off]; | ||
4817 | ret = __ocfs2_xattr_set_value_outside(inode, handle, | 5070 | ret = __ocfs2_xattr_set_value_outside(inode, handle, |
4818 | xv, val, value_len); | 5071 | &vb, val, value_len); |
4819 | if (ret) | 5072 | if (ret) |
4820 | mlog_errno(ret); | 5073 | mlog_errno(ret); |
4821 | out: | 5074 | out: |
@@ -4826,7 +5079,8 @@ static int ocfs2_rm_xattr_cluster(struct inode *inode, | |||
4826 | struct buffer_head *root_bh, | 5079 | struct buffer_head *root_bh, |
4827 | u64 blkno, | 5080 | u64 blkno, |
4828 | u32 cpos, | 5081 | u32 cpos, |
4829 | u32 len) | 5082 | u32 len, |
5083 | void *para) | ||
4830 | { | 5084 | { |
4831 | int ret; | 5085 | int ret; |
4832 | struct ocfs2_super *osb = OCFS2_SB(inode->i_sb); | 5086 | struct ocfs2_super *osb = OCFS2_SB(inode->i_sb); |
@@ -4838,14 +5092,22 @@ static int ocfs2_rm_xattr_cluster(struct inode *inode, | |||
4838 | struct ocfs2_cached_dealloc_ctxt dealloc; | 5092 | struct ocfs2_cached_dealloc_ctxt dealloc; |
4839 | struct ocfs2_extent_tree et; | 5093 | struct ocfs2_extent_tree et; |
4840 | 5094 | ||
4841 | ocfs2_init_xattr_tree_extent_tree(&et, inode, root_bh); | 5095 | ret = ocfs2_iterate_xattr_buckets(inode, blkno, len, |
5096 | ocfs2_delete_xattr_in_bucket, para); | ||
5097 | if (ret) { | ||
5098 | mlog_errno(ret); | ||
5099 | return ret; | ||
5100 | } | ||
5101 | |||
5102 | ocfs2_init_xattr_tree_extent_tree(&et, INODE_CACHE(inode), root_bh); | ||
4842 | 5103 | ||
4843 | ocfs2_init_dealloc_ctxt(&dealloc); | 5104 | ocfs2_init_dealloc_ctxt(&dealloc); |
4844 | 5105 | ||
4845 | mlog(0, "rm xattr extent rec at %u len = %u, start from %llu\n", | 5106 | mlog(0, "rm xattr extent rec at %u len = %u, start from %llu\n", |
4846 | cpos, len, (unsigned long long)blkno); | 5107 | cpos, len, (unsigned long long)blkno); |
4847 | 5108 | ||
4848 | ocfs2_remove_xattr_clusters_from_cache(inode, blkno, len); | 5109 | ocfs2_remove_xattr_clusters_from_cache(INODE_CACHE(inode), blkno, |
5110 | len); | ||
4849 | 5111 | ||
4850 | ret = ocfs2_lock_allocators(inode, &et, 0, 1, NULL, &meta_ac); | 5112 | ret = ocfs2_lock_allocators(inode, &et, 0, 1, NULL, &meta_ac); |
4851 | if (ret) { | 5113 | if (ret) { |
@@ -4870,14 +5132,14 @@ static int ocfs2_rm_xattr_cluster(struct inode *inode, | |||
4870 | goto out; | 5132 | goto out; |
4871 | } | 5133 | } |
4872 | 5134 | ||
4873 | ret = ocfs2_journal_access_xb(handle, inode, root_bh, | 5135 | ret = ocfs2_journal_access_xb(handle, INODE_CACHE(inode), root_bh, |
4874 | OCFS2_JOURNAL_ACCESS_WRITE); | 5136 | OCFS2_JOURNAL_ACCESS_WRITE); |
4875 | if (ret) { | 5137 | if (ret) { |
4876 | mlog_errno(ret); | 5138 | mlog_errno(ret); |
4877 | goto out_commit; | 5139 | goto out_commit; |
4878 | } | 5140 | } |
4879 | 5141 | ||
4880 | ret = ocfs2_remove_extent(inode, &et, cpos, len, handle, meta_ac, | 5142 | ret = ocfs2_remove_extent(handle, &et, cpos, len, meta_ac, |
4881 | &dealloc); | 5143 | &dealloc); |
4882 | if (ret) { | 5144 | if (ret) { |
4883 | mlog_errno(ret); | 5145 | mlog_errno(ret); |
@@ -5220,7 +5482,7 @@ static int ocfs2_delete_xattr_in_bucket(struct inode *inode, | |||
5220 | struct ocfs2_xattr_bucket *bucket, | 5482 | struct ocfs2_xattr_bucket *bucket, |
5221 | void *para) | 5483 | void *para) |
5222 | { | 5484 | { |
5223 | int ret = 0; | 5485 | int ret = 0, ref_credits; |
5224 | struct ocfs2_xattr_header *xh = bucket_xh(bucket); | 5486 | struct ocfs2_xattr_header *xh = bucket_xh(bucket); |
5225 | u16 i; | 5487 | u16 i; |
5226 | struct ocfs2_xattr_entry *xe; | 5488 | struct ocfs2_xattr_entry *xe; |
@@ -5228,7 +5490,9 @@ static int ocfs2_delete_xattr_in_bucket(struct inode *inode, | |||
5228 | struct ocfs2_xattr_set_ctxt ctxt = {NULL, NULL,}; | 5490 | struct ocfs2_xattr_set_ctxt ctxt = {NULL, NULL,}; |
5229 | int credits = ocfs2_remove_extent_credits(osb->sb) + | 5491 | int credits = ocfs2_remove_extent_credits(osb->sb) + |
5230 | ocfs2_blocks_per_xattr_bucket(inode->i_sb); | 5492 | ocfs2_blocks_per_xattr_bucket(inode->i_sb); |
5231 | 5493 | struct ocfs2_xattr_value_root *xv; | |
5494 | struct ocfs2_rm_xattr_bucket_para *args = | ||
5495 | (struct ocfs2_rm_xattr_bucket_para *)para; | ||
5232 | 5496 | ||
5233 | ocfs2_init_dealloc_ctxt(&ctxt.dealloc); | 5497 | ocfs2_init_dealloc_ctxt(&ctxt.dealloc); |
5234 | 5498 | ||
@@ -5237,7 +5501,16 @@ static int ocfs2_delete_xattr_in_bucket(struct inode *inode, | |||
5237 | if (ocfs2_xattr_is_local(xe)) | 5501 | if (ocfs2_xattr_is_local(xe)) |
5238 | continue; | 5502 | continue; |
5239 | 5503 | ||
5240 | ctxt.handle = ocfs2_start_trans(osb, credits); | 5504 | ret = ocfs2_get_xattr_tree_value_root(inode->i_sb, bucket, |
5505 | i, &xv, NULL); | ||
5506 | |||
5507 | ret = ocfs2_lock_xattr_remove_allocators(inode, xv, | ||
5508 | args->ref_ci, | ||
5509 | args->ref_root_bh, | ||
5510 | &ctxt.meta_ac, | ||
5511 | &ref_credits); | ||
5512 | |||
5513 | ctxt.handle = ocfs2_start_trans(osb, credits + ref_credits); | ||
5241 | if (IS_ERR(ctxt.handle)) { | 5514 | if (IS_ERR(ctxt.handle)) { |
5242 | ret = PTR_ERR(ctxt.handle); | 5515 | ret = PTR_ERR(ctxt.handle); |
5243 | mlog_errno(ret); | 5516 | mlog_errno(ret); |
@@ -5248,57 +5521,1439 @@ static int ocfs2_delete_xattr_in_bucket(struct inode *inode, | |||
5248 | i, 0, &ctxt); | 5521 | i, 0, &ctxt); |
5249 | 5522 | ||
5250 | ocfs2_commit_trans(osb, ctxt.handle); | 5523 | ocfs2_commit_trans(osb, ctxt.handle); |
5524 | if (ctxt.meta_ac) { | ||
5525 | ocfs2_free_alloc_context(ctxt.meta_ac); | ||
5526 | ctxt.meta_ac = NULL; | ||
5527 | } | ||
5251 | if (ret) { | 5528 | if (ret) { |
5252 | mlog_errno(ret); | 5529 | mlog_errno(ret); |
5253 | break; | 5530 | break; |
5254 | } | 5531 | } |
5255 | } | 5532 | } |
5256 | 5533 | ||
5534 | if (ctxt.meta_ac) | ||
5535 | ocfs2_free_alloc_context(ctxt.meta_ac); | ||
5257 | ocfs2_schedule_truncate_log_flush(osb, 1); | 5536 | ocfs2_schedule_truncate_log_flush(osb, 1); |
5258 | ocfs2_run_deallocs(osb, &ctxt.dealloc); | 5537 | ocfs2_run_deallocs(osb, &ctxt.dealloc); |
5259 | return ret; | 5538 | return ret; |
5260 | } | 5539 | } |
5261 | 5540 | ||
5262 | static int ocfs2_delete_xattr_index_block(struct inode *inode, | 5541 | /* |
5263 | struct buffer_head *xb_bh) | 5542 | * Whenever we modify a xattr value root in the bucket(e.g, CoW |
5543 | * or change the extent record flag), we need to recalculate | ||
5544 | * the metaecc for the whole bucket. So it is done here. | ||
5545 | * | ||
5546 | * Note: | ||
5547 | * We have to give the extra credits for the caller. | ||
5548 | */ | ||
5549 | static int ocfs2_xattr_bucket_post_refcount(struct inode *inode, | ||
5550 | handle_t *handle, | ||
5551 | void *para) | ||
5552 | { | ||
5553 | int ret; | ||
5554 | struct ocfs2_xattr_bucket *bucket = | ||
5555 | (struct ocfs2_xattr_bucket *)para; | ||
5556 | |||
5557 | ret = ocfs2_xattr_bucket_journal_access(handle, bucket, | ||
5558 | OCFS2_JOURNAL_ACCESS_WRITE); | ||
5559 | if (ret) { | ||
5560 | mlog_errno(ret); | ||
5561 | return ret; | ||
5562 | } | ||
5563 | |||
5564 | ocfs2_xattr_bucket_journal_dirty(handle, bucket); | ||
5565 | |||
5566 | return 0; | ||
5567 | } | ||
5568 | |||
5569 | /* | ||
5570 | * Special action we need if the xattr value is refcounted. | ||
5571 | * | ||
5572 | * 1. If the xattr is refcounted, lock the tree. | ||
5573 | * 2. CoW the xattr if we are setting the new value and the value | ||
5574 | * will be stored outside. | ||
5575 | * 3. In other case, decrease_refcount will work for us, so just | ||
5576 | * lock the refcount tree, calculate the meta and credits is OK. | ||
5577 | * | ||
5578 | * We have to do CoW before ocfs2_init_xattr_set_ctxt since | ||
5579 | * currently CoW is a completed transaction, while this function | ||
5580 | * will also lock the allocators and let us deadlock. So we will | ||
5581 | * CoW the whole xattr value. | ||
5582 | */ | ||
5583 | static int ocfs2_prepare_refcount_xattr(struct inode *inode, | ||
5584 | struct ocfs2_dinode *di, | ||
5585 | struct ocfs2_xattr_info *xi, | ||
5586 | struct ocfs2_xattr_search *xis, | ||
5587 | struct ocfs2_xattr_search *xbs, | ||
5588 | struct ocfs2_refcount_tree **ref_tree, | ||
5589 | int *meta_add, | ||
5590 | int *credits) | ||
5264 | { | 5591 | { |
5265 | struct ocfs2_xattr_block *xb = | ||
5266 | (struct ocfs2_xattr_block *)xb_bh->b_data; | ||
5267 | struct ocfs2_extent_list *el = &xb->xb_attrs.xb_root.xt_list; | ||
5268 | int ret = 0; | 5592 | int ret = 0; |
5269 | u32 name_hash = UINT_MAX, e_cpos, num_clusters; | 5593 | struct ocfs2_xattr_block *xb; |
5270 | u64 p_blkno; | 5594 | struct ocfs2_xattr_entry *xe; |
5595 | char *base; | ||
5596 | u32 p_cluster, num_clusters; | ||
5597 | unsigned int ext_flags; | ||
5598 | int name_offset, name_len; | ||
5599 | struct ocfs2_xattr_value_buf vb; | ||
5600 | struct ocfs2_xattr_bucket *bucket = NULL; | ||
5601 | struct ocfs2_super *osb = OCFS2_SB(inode->i_sb); | ||
5602 | struct ocfs2_post_refcount refcount; | ||
5603 | struct ocfs2_post_refcount *p = NULL; | ||
5604 | struct buffer_head *ref_root_bh = NULL; | ||
5271 | 5605 | ||
5272 | if (le16_to_cpu(el->l_next_free_rec) == 0) | 5606 | if (!xis->not_found) { |
5273 | return 0; | 5607 | xe = xis->here; |
5608 | name_offset = le16_to_cpu(xe->xe_name_offset); | ||
5609 | name_len = OCFS2_XATTR_SIZE(xe->xe_name_len); | ||
5610 | base = xis->base; | ||
5611 | vb.vb_bh = xis->inode_bh; | ||
5612 | vb.vb_access = ocfs2_journal_access_di; | ||
5613 | } else { | ||
5614 | int i, block_off = 0; | ||
5615 | xb = (struct ocfs2_xattr_block *)xbs->xattr_bh->b_data; | ||
5616 | xe = xbs->here; | ||
5617 | name_offset = le16_to_cpu(xe->xe_name_offset); | ||
5618 | name_len = OCFS2_XATTR_SIZE(xe->xe_name_len); | ||
5619 | i = xbs->here - xbs->header->xh_entries; | ||
5274 | 5620 | ||
5275 | while (name_hash > 0) { | 5621 | if (le16_to_cpu(xb->xb_flags) & OCFS2_XATTR_INDEXED) { |
5276 | ret = ocfs2_xattr_get_rec(inode, name_hash, &p_blkno, | 5622 | ret = ocfs2_xattr_bucket_get_name_value(inode->i_sb, |
5277 | &e_cpos, &num_clusters, el); | 5623 | bucket_xh(xbs->bucket), |
5624 | i, &block_off, | ||
5625 | &name_offset); | ||
5626 | if (ret) { | ||
5627 | mlog_errno(ret); | ||
5628 | goto out; | ||
5629 | } | ||
5630 | base = bucket_block(xbs->bucket, block_off); | ||
5631 | vb.vb_bh = xbs->bucket->bu_bhs[block_off]; | ||
5632 | vb.vb_access = ocfs2_journal_access; | ||
5633 | |||
5634 | if (ocfs2_meta_ecc(osb)) { | ||
5635 | /*create parameters for ocfs2_post_refcount. */ | ||
5636 | bucket = xbs->bucket; | ||
5637 | refcount.credits = bucket->bu_blocks; | ||
5638 | refcount.para = bucket; | ||
5639 | refcount.func = | ||
5640 | ocfs2_xattr_bucket_post_refcount; | ||
5641 | p = &refcount; | ||
5642 | } | ||
5643 | } else { | ||
5644 | base = xbs->base; | ||
5645 | vb.vb_bh = xbs->xattr_bh; | ||
5646 | vb.vb_access = ocfs2_journal_access_xb; | ||
5647 | } | ||
5648 | } | ||
5649 | |||
5650 | if (ocfs2_xattr_is_local(xe)) | ||
5651 | goto out; | ||
5652 | |||
5653 | vb.vb_xv = (struct ocfs2_xattr_value_root *) | ||
5654 | (base + name_offset + name_len); | ||
5655 | |||
5656 | ret = ocfs2_xattr_get_clusters(inode, 0, &p_cluster, | ||
5657 | &num_clusters, &vb.vb_xv->xr_list, | ||
5658 | &ext_flags); | ||
5659 | if (ret) { | ||
5660 | mlog_errno(ret); | ||
5661 | goto out; | ||
5662 | } | ||
5663 | |||
5664 | /* | ||
5665 | * We just need to check the 1st extent record, since we always | ||
5666 | * CoW the whole xattr. So there shouldn't be a xattr with | ||
5667 | * some REFCOUNT extent recs after the 1st one. | ||
5668 | */ | ||
5669 | if (!(ext_flags & OCFS2_EXT_REFCOUNTED)) | ||
5670 | goto out; | ||
5671 | |||
5672 | ret = ocfs2_lock_refcount_tree(osb, le64_to_cpu(di->i_refcount_loc), | ||
5673 | 1, ref_tree, &ref_root_bh); | ||
5674 | if (ret) { | ||
5675 | mlog_errno(ret); | ||
5676 | goto out; | ||
5677 | } | ||
5678 | |||
5679 | /* | ||
5680 | * If we are deleting the xattr or the new size will be stored inside, | ||
5681 | * cool, leave it there, the xattr truncate process will remove them | ||
5682 | * for us(it still needs the refcount tree lock and the meta, credits). | ||
5683 | * And the worse case is that every cluster truncate will split the | ||
5684 | * refcount tree, and make the original extent become 3. So we will need | ||
5685 | * 2 * cluster more extent recs at most. | ||
5686 | */ | ||
5687 | if (!xi->value || xi->value_len <= OCFS2_XATTR_INLINE_SIZE) { | ||
5688 | |||
5689 | ret = ocfs2_refcounted_xattr_delete_need(inode, | ||
5690 | &(*ref_tree)->rf_ci, | ||
5691 | ref_root_bh, vb.vb_xv, | ||
5692 | meta_add, credits); | ||
5693 | if (ret) | ||
5694 | mlog_errno(ret); | ||
5695 | goto out; | ||
5696 | } | ||
5697 | |||
5698 | ret = ocfs2_refcount_cow_xattr(inode, di, &vb, | ||
5699 | *ref_tree, ref_root_bh, 0, | ||
5700 | le32_to_cpu(vb.vb_xv->xr_clusters), p); | ||
5701 | if (ret) | ||
5702 | mlog_errno(ret); | ||
5703 | |||
5704 | out: | ||
5705 | brelse(ref_root_bh); | ||
5706 | return ret; | ||
5707 | } | ||
5708 | |||
5709 | /* | ||
5710 | * Add the REFCOUNTED flags for all the extent rec in ocfs2_xattr_value_root. | ||
5711 | * The physical clusters will be added to refcount tree. | ||
5712 | */ | ||
5713 | static int ocfs2_xattr_value_attach_refcount(struct inode *inode, | ||
5714 | struct ocfs2_xattr_value_root *xv, | ||
5715 | struct ocfs2_extent_tree *value_et, | ||
5716 | struct ocfs2_caching_info *ref_ci, | ||
5717 | struct buffer_head *ref_root_bh, | ||
5718 | struct ocfs2_cached_dealloc_ctxt *dealloc, | ||
5719 | struct ocfs2_post_refcount *refcount) | ||
5720 | { | ||
5721 | int ret = 0; | ||
5722 | u32 clusters = le32_to_cpu(xv->xr_clusters); | ||
5723 | u32 cpos, p_cluster, num_clusters; | ||
5724 | struct ocfs2_extent_list *el = &xv->xr_list; | ||
5725 | unsigned int ext_flags; | ||
5726 | |||
5727 | cpos = 0; | ||
5728 | while (cpos < clusters) { | ||
5729 | ret = ocfs2_xattr_get_clusters(inode, cpos, &p_cluster, | ||
5730 | &num_clusters, el, &ext_flags); | ||
5731 | |||
5732 | cpos += num_clusters; | ||
5733 | if ((ext_flags & OCFS2_EXT_REFCOUNTED)) | ||
5734 | continue; | ||
5735 | |||
5736 | BUG_ON(!p_cluster); | ||
5737 | |||
5738 | ret = ocfs2_add_refcount_flag(inode, value_et, | ||
5739 | ref_ci, ref_root_bh, | ||
5740 | cpos - num_clusters, | ||
5741 | p_cluster, num_clusters, | ||
5742 | dealloc, refcount); | ||
5743 | if (ret) { | ||
5744 | mlog_errno(ret); | ||
5745 | break; | ||
5746 | } | ||
5747 | } | ||
5748 | |||
5749 | return ret; | ||
5750 | } | ||
5751 | |||
5752 | /* | ||
5753 | * Given a normal ocfs2_xattr_header, refcount all the entries which | ||
5754 | * have value stored outside. | ||
5755 | * Used for xattrs stored in inode and ocfs2_xattr_block. | ||
5756 | */ | ||
5757 | static int ocfs2_xattr_attach_refcount_normal(struct inode *inode, | ||
5758 | struct ocfs2_xattr_value_buf *vb, | ||
5759 | struct ocfs2_xattr_header *header, | ||
5760 | struct ocfs2_caching_info *ref_ci, | ||
5761 | struct buffer_head *ref_root_bh, | ||
5762 | struct ocfs2_cached_dealloc_ctxt *dealloc) | ||
5763 | { | ||
5764 | |||
5765 | struct ocfs2_xattr_entry *xe; | ||
5766 | struct ocfs2_xattr_value_root *xv; | ||
5767 | struct ocfs2_extent_tree et; | ||
5768 | int i, ret = 0; | ||
5769 | |||
5770 | for (i = 0; i < le16_to_cpu(header->xh_count); i++) { | ||
5771 | xe = &header->xh_entries[i]; | ||
5772 | |||
5773 | if (ocfs2_xattr_is_local(xe)) | ||
5774 | continue; | ||
5775 | |||
5776 | xv = (struct ocfs2_xattr_value_root *)((void *)header + | ||
5777 | le16_to_cpu(xe->xe_name_offset) + | ||
5778 | OCFS2_XATTR_SIZE(xe->xe_name_len)); | ||
5779 | |||
5780 | vb->vb_xv = xv; | ||
5781 | ocfs2_init_xattr_value_extent_tree(&et, INODE_CACHE(inode), vb); | ||
5782 | |||
5783 | ret = ocfs2_xattr_value_attach_refcount(inode, xv, &et, | ||
5784 | ref_ci, ref_root_bh, | ||
5785 | dealloc, NULL); | ||
5786 | if (ret) { | ||
5787 | mlog_errno(ret); | ||
5788 | break; | ||
5789 | } | ||
5790 | } | ||
5791 | |||
5792 | return ret; | ||
5793 | } | ||
5794 | |||
5795 | static int ocfs2_xattr_inline_attach_refcount(struct inode *inode, | ||
5796 | struct buffer_head *fe_bh, | ||
5797 | struct ocfs2_caching_info *ref_ci, | ||
5798 | struct buffer_head *ref_root_bh, | ||
5799 | struct ocfs2_cached_dealloc_ctxt *dealloc) | ||
5800 | { | ||
5801 | struct ocfs2_dinode *di = (struct ocfs2_dinode *)fe_bh->b_data; | ||
5802 | struct ocfs2_xattr_header *header = (struct ocfs2_xattr_header *) | ||
5803 | (fe_bh->b_data + inode->i_sb->s_blocksize - | ||
5804 | le16_to_cpu(di->i_xattr_inline_size)); | ||
5805 | struct ocfs2_xattr_value_buf vb = { | ||
5806 | .vb_bh = fe_bh, | ||
5807 | .vb_access = ocfs2_journal_access_di, | ||
5808 | }; | ||
5809 | |||
5810 | return ocfs2_xattr_attach_refcount_normal(inode, &vb, header, | ||
5811 | ref_ci, ref_root_bh, dealloc); | ||
5812 | } | ||
5813 | |||
5814 | struct ocfs2_xattr_tree_value_refcount_para { | ||
5815 | struct ocfs2_caching_info *ref_ci; | ||
5816 | struct buffer_head *ref_root_bh; | ||
5817 | struct ocfs2_cached_dealloc_ctxt *dealloc; | ||
5818 | }; | ||
5819 | |||
5820 | static int ocfs2_get_xattr_tree_value_root(struct super_block *sb, | ||
5821 | struct ocfs2_xattr_bucket *bucket, | ||
5822 | int offset, | ||
5823 | struct ocfs2_xattr_value_root **xv, | ||
5824 | struct buffer_head **bh) | ||
5825 | { | ||
5826 | int ret, block_off, name_offset; | ||
5827 | struct ocfs2_xattr_header *xh = bucket_xh(bucket); | ||
5828 | struct ocfs2_xattr_entry *xe = &xh->xh_entries[offset]; | ||
5829 | void *base; | ||
5830 | |||
5831 | ret = ocfs2_xattr_bucket_get_name_value(sb, | ||
5832 | bucket_xh(bucket), | ||
5833 | offset, | ||
5834 | &block_off, | ||
5835 | &name_offset); | ||
5836 | if (ret) { | ||
5837 | mlog_errno(ret); | ||
5838 | goto out; | ||
5839 | } | ||
5840 | |||
5841 | base = bucket_block(bucket, block_off); | ||
5842 | |||
5843 | *xv = (struct ocfs2_xattr_value_root *)(base + name_offset + | ||
5844 | OCFS2_XATTR_SIZE(xe->xe_name_len)); | ||
5845 | |||
5846 | if (bh) | ||
5847 | *bh = bucket->bu_bhs[block_off]; | ||
5848 | out: | ||
5849 | return ret; | ||
5850 | } | ||
5851 | |||
5852 | /* | ||
5853 | * For a given xattr bucket, refcount all the entries which | ||
5854 | * have value stored outside. | ||
5855 | */ | ||
5856 | static int ocfs2_xattr_bucket_value_refcount(struct inode *inode, | ||
5857 | struct ocfs2_xattr_bucket *bucket, | ||
5858 | void *para) | ||
5859 | { | ||
5860 | int i, ret = 0; | ||
5861 | struct ocfs2_extent_tree et; | ||
5862 | struct ocfs2_xattr_tree_value_refcount_para *ref = | ||
5863 | (struct ocfs2_xattr_tree_value_refcount_para *)para; | ||
5864 | struct ocfs2_xattr_header *xh = | ||
5865 | (struct ocfs2_xattr_header *)bucket->bu_bhs[0]->b_data; | ||
5866 | struct ocfs2_xattr_entry *xe; | ||
5867 | struct ocfs2_xattr_value_buf vb = { | ||
5868 | .vb_access = ocfs2_journal_access, | ||
5869 | }; | ||
5870 | struct ocfs2_post_refcount refcount = { | ||
5871 | .credits = bucket->bu_blocks, | ||
5872 | .para = bucket, | ||
5873 | .func = ocfs2_xattr_bucket_post_refcount, | ||
5874 | }; | ||
5875 | struct ocfs2_post_refcount *p = NULL; | ||
5876 | |||
5877 | /* We only need post_refcount if we support metaecc. */ | ||
5878 | if (ocfs2_meta_ecc(OCFS2_SB(inode->i_sb))) | ||
5879 | p = &refcount; | ||
5880 | |||
5881 | mlog(0, "refcount bucket %llu, count = %u\n", | ||
5882 | (unsigned long long)bucket_blkno(bucket), | ||
5883 | le16_to_cpu(xh->xh_count)); | ||
5884 | for (i = 0; i < le16_to_cpu(xh->xh_count); i++) { | ||
5885 | xe = &xh->xh_entries[i]; | ||
5886 | |||
5887 | if (ocfs2_xattr_is_local(xe)) | ||
5888 | continue; | ||
5889 | |||
5890 | ret = ocfs2_get_xattr_tree_value_root(inode->i_sb, bucket, i, | ||
5891 | &vb.vb_xv, &vb.vb_bh); | ||
5892 | if (ret) { | ||
5893 | mlog_errno(ret); | ||
5894 | break; | ||
5895 | } | ||
5896 | |||
5897 | ocfs2_init_xattr_value_extent_tree(&et, | ||
5898 | INODE_CACHE(inode), &vb); | ||
5899 | |||
5900 | ret = ocfs2_xattr_value_attach_refcount(inode, vb.vb_xv, | ||
5901 | &et, ref->ref_ci, | ||
5902 | ref->ref_root_bh, | ||
5903 | ref->dealloc, p); | ||
5904 | if (ret) { | ||
5905 | mlog_errno(ret); | ||
5906 | break; | ||
5907 | } | ||
5908 | } | ||
5909 | |||
5910 | return ret; | ||
5911 | |||
5912 | } | ||
5913 | |||
5914 | static int ocfs2_refcount_xattr_tree_rec(struct inode *inode, | ||
5915 | struct buffer_head *root_bh, | ||
5916 | u64 blkno, u32 cpos, u32 len, void *para) | ||
5917 | { | ||
5918 | return ocfs2_iterate_xattr_buckets(inode, blkno, len, | ||
5919 | ocfs2_xattr_bucket_value_refcount, | ||
5920 | para); | ||
5921 | } | ||
5922 | |||
5923 | static int ocfs2_xattr_block_attach_refcount(struct inode *inode, | ||
5924 | struct buffer_head *blk_bh, | ||
5925 | struct ocfs2_caching_info *ref_ci, | ||
5926 | struct buffer_head *ref_root_bh, | ||
5927 | struct ocfs2_cached_dealloc_ctxt *dealloc) | ||
5928 | { | ||
5929 | int ret = 0; | ||
5930 | struct ocfs2_xattr_block *xb = | ||
5931 | (struct ocfs2_xattr_block *)blk_bh->b_data; | ||
5932 | |||
5933 | if (!(le16_to_cpu(xb->xb_flags) & OCFS2_XATTR_INDEXED)) { | ||
5934 | struct ocfs2_xattr_header *header = &xb->xb_attrs.xb_header; | ||
5935 | struct ocfs2_xattr_value_buf vb = { | ||
5936 | .vb_bh = blk_bh, | ||
5937 | .vb_access = ocfs2_journal_access_xb, | ||
5938 | }; | ||
5939 | |||
5940 | ret = ocfs2_xattr_attach_refcount_normal(inode, &vb, header, | ||
5941 | ref_ci, ref_root_bh, | ||
5942 | dealloc); | ||
5943 | } else { | ||
5944 | struct ocfs2_xattr_tree_value_refcount_para para = { | ||
5945 | .ref_ci = ref_ci, | ||
5946 | .ref_root_bh = ref_root_bh, | ||
5947 | .dealloc = dealloc, | ||
5948 | }; | ||
5949 | |||
5950 | ret = ocfs2_iterate_xattr_index_block(inode, blk_bh, | ||
5951 | ocfs2_refcount_xattr_tree_rec, | ||
5952 | ¶); | ||
5953 | } | ||
5954 | |||
5955 | return ret; | ||
5956 | } | ||
5957 | |||
5958 | int ocfs2_xattr_attach_refcount_tree(struct inode *inode, | ||
5959 | struct buffer_head *fe_bh, | ||
5960 | struct ocfs2_caching_info *ref_ci, | ||
5961 | struct buffer_head *ref_root_bh, | ||
5962 | struct ocfs2_cached_dealloc_ctxt *dealloc) | ||
5963 | { | ||
5964 | int ret = 0; | ||
5965 | struct ocfs2_inode_info *oi = OCFS2_I(inode); | ||
5966 | struct ocfs2_dinode *di = (struct ocfs2_dinode *)fe_bh->b_data; | ||
5967 | struct buffer_head *blk_bh = NULL; | ||
5968 | |||
5969 | if (oi->ip_dyn_features & OCFS2_INLINE_XATTR_FL) { | ||
5970 | ret = ocfs2_xattr_inline_attach_refcount(inode, fe_bh, | ||
5971 | ref_ci, ref_root_bh, | ||
5972 | dealloc); | ||
5278 | if (ret) { | 5973 | if (ret) { |
5279 | mlog_errno(ret); | 5974 | mlog_errno(ret); |
5280 | goto out; | 5975 | goto out; |
5281 | } | 5976 | } |
5977 | } | ||
5978 | |||
5979 | if (!di->i_xattr_loc) | ||
5980 | goto out; | ||
5981 | |||
5982 | ret = ocfs2_read_xattr_block(inode, le64_to_cpu(di->i_xattr_loc), | ||
5983 | &blk_bh); | ||
5984 | if (ret < 0) { | ||
5985 | mlog_errno(ret); | ||
5986 | goto out; | ||
5987 | } | ||
5988 | |||
5989 | ret = ocfs2_xattr_block_attach_refcount(inode, blk_bh, ref_ci, | ||
5990 | ref_root_bh, dealloc); | ||
5991 | if (ret) | ||
5992 | mlog_errno(ret); | ||
5993 | |||
5994 | brelse(blk_bh); | ||
5995 | out: | ||
5996 | |||
5997 | return ret; | ||
5998 | } | ||
5999 | |||
6000 | typedef int (should_xattr_reflinked)(struct ocfs2_xattr_entry *xe); | ||
6001 | /* | ||
6002 | * Store the information we need in xattr reflink. | ||
6003 | * old_bh and new_bh are inode bh for the old and new inode. | ||
6004 | */ | ||
6005 | struct ocfs2_xattr_reflink { | ||
6006 | struct inode *old_inode; | ||
6007 | struct inode *new_inode; | ||
6008 | struct buffer_head *old_bh; | ||
6009 | struct buffer_head *new_bh; | ||
6010 | struct ocfs2_caching_info *ref_ci; | ||
6011 | struct buffer_head *ref_root_bh; | ||
6012 | struct ocfs2_cached_dealloc_ctxt *dealloc; | ||
6013 | should_xattr_reflinked *xattr_reflinked; | ||
6014 | }; | ||
6015 | |||
6016 | /* | ||
6017 | * Given a xattr header and xe offset, | ||
6018 | * return the proper xv and the corresponding bh. | ||
6019 | * xattr in inode, block and xattr tree have different implementaions. | ||
6020 | */ | ||
6021 | typedef int (get_xattr_value_root)(struct super_block *sb, | ||
6022 | struct buffer_head *bh, | ||
6023 | struct ocfs2_xattr_header *xh, | ||
6024 | int offset, | ||
6025 | struct ocfs2_xattr_value_root **xv, | ||
6026 | struct buffer_head **ret_bh, | ||
6027 | void *para); | ||
6028 | |||
6029 | /* | ||
6030 | * Calculate all the xattr value root metadata stored in this xattr header and | ||
6031 | * credits we need if we create them from the scratch. | ||
6032 | * We use get_xattr_value_root so that all types of xattr container can use it. | ||
6033 | */ | ||
6034 | static int ocfs2_value_metas_in_xattr_header(struct super_block *sb, | ||
6035 | struct buffer_head *bh, | ||
6036 | struct ocfs2_xattr_header *xh, | ||
6037 | int *metas, int *credits, | ||
6038 | int *num_recs, | ||
6039 | get_xattr_value_root *func, | ||
6040 | void *para) | ||
6041 | { | ||
6042 | int i, ret = 0; | ||
6043 | struct ocfs2_xattr_value_root *xv; | ||
6044 | struct ocfs2_xattr_entry *xe; | ||
6045 | |||
6046 | for (i = 0; i < le16_to_cpu(xh->xh_count); i++) { | ||
6047 | xe = &xh->xh_entries[i]; | ||
6048 | if (ocfs2_xattr_is_local(xe)) | ||
6049 | continue; | ||
6050 | |||
6051 | ret = func(sb, bh, xh, i, &xv, NULL, para); | ||
6052 | if (ret) { | ||
6053 | mlog_errno(ret); | ||
6054 | break; | ||
6055 | } | ||
6056 | |||
6057 | *metas += le16_to_cpu(xv->xr_list.l_tree_depth) * | ||
6058 | le16_to_cpu(xv->xr_list.l_next_free_rec); | ||
6059 | |||
6060 | *credits += ocfs2_calc_extend_credits(sb, | ||
6061 | &def_xv.xv.xr_list, | ||
6062 | le32_to_cpu(xv->xr_clusters)); | ||
6063 | |||
6064 | /* | ||
6065 | * If the value is a tree with depth > 1, We don't go deep | ||
6066 | * to the extent block, so just calculate a maximum record num. | ||
6067 | */ | ||
6068 | if (!xv->xr_list.l_tree_depth) | ||
6069 | *num_recs += xv->xr_list.l_next_free_rec; | ||
6070 | else | ||
6071 | *num_recs += ocfs2_clusters_for_bytes(sb, | ||
6072 | XATTR_SIZE_MAX); | ||
6073 | } | ||
6074 | |||
6075 | return ret; | ||
6076 | } | ||
6077 | |||
6078 | /* Used by xattr inode and block to return the right xv and buffer_head. */ | ||
6079 | static int ocfs2_get_xattr_value_root(struct super_block *sb, | ||
6080 | struct buffer_head *bh, | ||
6081 | struct ocfs2_xattr_header *xh, | ||
6082 | int offset, | ||
6083 | struct ocfs2_xattr_value_root **xv, | ||
6084 | struct buffer_head **ret_bh, | ||
6085 | void *para) | ||
6086 | { | ||
6087 | struct ocfs2_xattr_entry *xe = &xh->xh_entries[offset]; | ||
6088 | |||
6089 | *xv = (struct ocfs2_xattr_value_root *)((void *)xh + | ||
6090 | le16_to_cpu(xe->xe_name_offset) + | ||
6091 | OCFS2_XATTR_SIZE(xe->xe_name_len)); | ||
6092 | |||
6093 | if (ret_bh) | ||
6094 | *ret_bh = bh; | ||
6095 | |||
6096 | return 0; | ||
6097 | } | ||
6098 | |||
6099 | /* | ||
6100 | * Lock the meta_ac and caculate how much credits we need for reflink xattrs. | ||
6101 | * It is only used for inline xattr and xattr block. | ||
6102 | */ | ||
6103 | static int ocfs2_reflink_lock_xattr_allocators(struct ocfs2_super *osb, | ||
6104 | struct ocfs2_xattr_header *xh, | ||
6105 | struct buffer_head *ref_root_bh, | ||
6106 | int *credits, | ||
6107 | struct ocfs2_alloc_context **meta_ac) | ||
6108 | { | ||
6109 | int ret, meta_add = 0, num_recs = 0; | ||
6110 | struct ocfs2_refcount_block *rb = | ||
6111 | (struct ocfs2_refcount_block *)ref_root_bh->b_data; | ||
6112 | |||
6113 | *credits = 0; | ||
6114 | |||
6115 | ret = ocfs2_value_metas_in_xattr_header(osb->sb, NULL, xh, | ||
6116 | &meta_add, credits, &num_recs, | ||
6117 | ocfs2_get_xattr_value_root, | ||
6118 | NULL); | ||
6119 | if (ret) { | ||
6120 | mlog_errno(ret); | ||
6121 | goto out; | ||
6122 | } | ||
6123 | |||
6124 | /* | ||
6125 | * We need to add/modify num_recs in refcount tree, so just calculate | ||
6126 | * an approximate number we need for refcount tree change. | ||
6127 | * Sometimes we need to split the tree, and after split, half recs | ||
6128 | * will be moved to the new block, and a new block can only provide | ||
6129 | * half number of recs. So we multiple new blocks by 2. | ||
6130 | */ | ||
6131 | num_recs = num_recs / ocfs2_refcount_recs_per_rb(osb->sb) * 2; | ||
6132 | meta_add += num_recs; | ||
6133 | *credits += num_recs + num_recs * OCFS2_EXPAND_REFCOUNT_TREE_CREDITS; | ||
6134 | if (le32_to_cpu(rb->rf_flags) & OCFS2_REFCOUNT_TREE_FL) | ||
6135 | *credits += le16_to_cpu(rb->rf_list.l_tree_depth) * | ||
6136 | le16_to_cpu(rb->rf_list.l_next_free_rec) + 1; | ||
6137 | else | ||
6138 | *credits += 1; | ||
6139 | |||
6140 | ret = ocfs2_reserve_new_metadata_blocks(osb, meta_add, meta_ac); | ||
6141 | if (ret) | ||
6142 | mlog_errno(ret); | ||
6143 | |||
6144 | out: | ||
6145 | return ret; | ||
6146 | } | ||
5282 | 6147 | ||
5283 | ret = ocfs2_iterate_xattr_buckets(inode, p_blkno, num_clusters, | 6148 | /* |
5284 | ocfs2_delete_xattr_in_bucket, | 6149 | * Given a xattr header, reflink all the xattrs in this container. |
5285 | NULL); | 6150 | * It can be used for inode, block and bucket. |
6151 | * | ||
6152 | * NOTE: | ||
6153 | * Before we call this function, the caller has memcpy the xattr in | ||
6154 | * old_xh to the new_xh. | ||
6155 | * | ||
6156 | * If args.xattr_reflinked is set, call it to decide whether the xe should | ||
6157 | * be reflinked or not. If not, remove it from the new xattr header. | ||
6158 | */ | ||
6159 | static int ocfs2_reflink_xattr_header(handle_t *handle, | ||
6160 | struct ocfs2_xattr_reflink *args, | ||
6161 | struct buffer_head *old_bh, | ||
6162 | struct ocfs2_xattr_header *xh, | ||
6163 | struct buffer_head *new_bh, | ||
6164 | struct ocfs2_xattr_header *new_xh, | ||
6165 | struct ocfs2_xattr_value_buf *vb, | ||
6166 | struct ocfs2_alloc_context *meta_ac, | ||
6167 | get_xattr_value_root *func, | ||
6168 | void *para) | ||
6169 | { | ||
6170 | int ret = 0, i, j; | ||
6171 | struct super_block *sb = args->old_inode->i_sb; | ||
6172 | struct buffer_head *value_bh; | ||
6173 | struct ocfs2_xattr_entry *xe, *last; | ||
6174 | struct ocfs2_xattr_value_root *xv, *new_xv; | ||
6175 | struct ocfs2_extent_tree data_et; | ||
6176 | u32 clusters, cpos, p_cluster, num_clusters; | ||
6177 | unsigned int ext_flags = 0; | ||
6178 | |||
6179 | mlog(0, "reflink xattr in container %llu, count = %u\n", | ||
6180 | (unsigned long long)old_bh->b_blocknr, le16_to_cpu(xh->xh_count)); | ||
6181 | |||
6182 | last = &new_xh->xh_entries[le16_to_cpu(new_xh->xh_count)]; | ||
6183 | for (i = 0, j = 0; i < le16_to_cpu(xh->xh_count); i++, j++) { | ||
6184 | xe = &xh->xh_entries[i]; | ||
6185 | |||
6186 | if (args->xattr_reflinked && !args->xattr_reflinked(xe)) { | ||
6187 | xe = &new_xh->xh_entries[j]; | ||
6188 | |||
6189 | le16_add_cpu(&new_xh->xh_count, -1); | ||
6190 | if (new_xh->xh_count) { | ||
6191 | memmove(xe, xe + 1, | ||
6192 | (void *)last - (void *)xe); | ||
6193 | memset(last, 0, | ||
6194 | sizeof(struct ocfs2_xattr_entry)); | ||
6195 | } | ||
6196 | |||
6197 | /* | ||
6198 | * We don't want j to increase in the next round since | ||
6199 | * it is already moved ahead. | ||
6200 | */ | ||
6201 | j--; | ||
6202 | continue; | ||
6203 | } | ||
6204 | |||
6205 | if (ocfs2_xattr_is_local(xe)) | ||
6206 | continue; | ||
6207 | |||
6208 | ret = func(sb, old_bh, xh, i, &xv, NULL, para); | ||
6209 | if (ret) { | ||
6210 | mlog_errno(ret); | ||
6211 | break; | ||
6212 | } | ||
6213 | |||
6214 | ret = func(sb, new_bh, new_xh, j, &new_xv, &value_bh, para); | ||
6215 | if (ret) { | ||
6216 | mlog_errno(ret); | ||
6217 | break; | ||
6218 | } | ||
6219 | |||
6220 | /* | ||
6221 | * For the xattr which has l_tree_depth = 0, all the extent | ||
6222 | * recs have already be copied to the new xh with the | ||
6223 | * propriate OCFS2_EXT_REFCOUNTED flag we just need to | ||
6224 | * increase the refount count int the refcount tree. | ||
6225 | * | ||
6226 | * For the xattr which has l_tree_depth > 0, we need | ||
6227 | * to initialize it to the empty default value root, | ||
6228 | * and then insert the extents one by one. | ||
6229 | */ | ||
6230 | if (xv->xr_list.l_tree_depth) { | ||
6231 | memcpy(new_xv, &def_xv, sizeof(def_xv)); | ||
6232 | vb->vb_xv = new_xv; | ||
6233 | vb->vb_bh = value_bh; | ||
6234 | ocfs2_init_xattr_value_extent_tree(&data_et, | ||
6235 | INODE_CACHE(args->new_inode), vb); | ||
6236 | } | ||
6237 | |||
6238 | clusters = le32_to_cpu(xv->xr_clusters); | ||
6239 | cpos = 0; | ||
6240 | while (cpos < clusters) { | ||
6241 | ret = ocfs2_xattr_get_clusters(args->old_inode, | ||
6242 | cpos, | ||
6243 | &p_cluster, | ||
6244 | &num_clusters, | ||
6245 | &xv->xr_list, | ||
6246 | &ext_flags); | ||
6247 | if (ret) { | ||
6248 | mlog_errno(ret); | ||
6249 | goto out; | ||
6250 | } | ||
6251 | |||
6252 | BUG_ON(!p_cluster); | ||
6253 | |||
6254 | if (xv->xr_list.l_tree_depth) { | ||
6255 | ret = ocfs2_insert_extent(handle, | ||
6256 | &data_et, cpos, | ||
6257 | ocfs2_clusters_to_blocks( | ||
6258 | args->old_inode->i_sb, | ||
6259 | p_cluster), | ||
6260 | num_clusters, ext_flags, | ||
6261 | meta_ac); | ||
6262 | if (ret) { | ||
6263 | mlog_errno(ret); | ||
6264 | goto out; | ||
6265 | } | ||
6266 | } | ||
6267 | |||
6268 | ret = ocfs2_increase_refcount(handle, args->ref_ci, | ||
6269 | args->ref_root_bh, | ||
6270 | p_cluster, num_clusters, | ||
6271 | meta_ac, args->dealloc); | ||
6272 | if (ret) { | ||
6273 | mlog_errno(ret); | ||
6274 | goto out; | ||
6275 | } | ||
6276 | |||
6277 | cpos += num_clusters; | ||
6278 | } | ||
6279 | } | ||
6280 | |||
6281 | out: | ||
6282 | return ret; | ||
6283 | } | ||
6284 | |||
6285 | static int ocfs2_reflink_xattr_inline(struct ocfs2_xattr_reflink *args) | ||
6286 | { | ||
6287 | int ret = 0, credits = 0; | ||
6288 | handle_t *handle; | ||
6289 | struct ocfs2_super *osb = OCFS2_SB(args->old_inode->i_sb); | ||
6290 | struct ocfs2_dinode *di = (struct ocfs2_dinode *)args->old_bh->b_data; | ||
6291 | int inline_size = le16_to_cpu(di->i_xattr_inline_size); | ||
6292 | int header_off = osb->sb->s_blocksize - inline_size; | ||
6293 | struct ocfs2_xattr_header *xh = (struct ocfs2_xattr_header *) | ||
6294 | (args->old_bh->b_data + header_off); | ||
6295 | struct ocfs2_xattr_header *new_xh = (struct ocfs2_xattr_header *) | ||
6296 | (args->new_bh->b_data + header_off); | ||
6297 | struct ocfs2_alloc_context *meta_ac = NULL; | ||
6298 | struct ocfs2_inode_info *new_oi; | ||
6299 | struct ocfs2_dinode *new_di; | ||
6300 | struct ocfs2_xattr_value_buf vb = { | ||
6301 | .vb_bh = args->new_bh, | ||
6302 | .vb_access = ocfs2_journal_access_di, | ||
6303 | }; | ||
6304 | |||
6305 | ret = ocfs2_reflink_lock_xattr_allocators(osb, xh, args->ref_root_bh, | ||
6306 | &credits, &meta_ac); | ||
6307 | if (ret) { | ||
6308 | mlog_errno(ret); | ||
6309 | goto out; | ||
6310 | } | ||
6311 | |||
6312 | handle = ocfs2_start_trans(osb, credits); | ||
6313 | if (IS_ERR(handle)) { | ||
6314 | ret = PTR_ERR(handle); | ||
6315 | mlog_errno(ret); | ||
6316 | goto out; | ||
6317 | } | ||
6318 | |||
6319 | ret = ocfs2_journal_access_di(handle, INODE_CACHE(args->new_inode), | ||
6320 | args->new_bh, OCFS2_JOURNAL_ACCESS_WRITE); | ||
6321 | if (ret) { | ||
6322 | mlog_errno(ret); | ||
6323 | goto out_commit; | ||
6324 | } | ||
6325 | |||
6326 | memcpy(args->new_bh->b_data + header_off, | ||
6327 | args->old_bh->b_data + header_off, inline_size); | ||
6328 | |||
6329 | new_di = (struct ocfs2_dinode *)args->new_bh->b_data; | ||
6330 | new_di->i_xattr_inline_size = cpu_to_le16(inline_size); | ||
6331 | |||
6332 | ret = ocfs2_reflink_xattr_header(handle, args, args->old_bh, xh, | ||
6333 | args->new_bh, new_xh, &vb, meta_ac, | ||
6334 | ocfs2_get_xattr_value_root, NULL); | ||
6335 | if (ret) { | ||
6336 | mlog_errno(ret); | ||
6337 | goto out_commit; | ||
6338 | } | ||
6339 | |||
6340 | new_oi = OCFS2_I(args->new_inode); | ||
6341 | spin_lock(&new_oi->ip_lock); | ||
6342 | new_oi->ip_dyn_features |= OCFS2_HAS_XATTR_FL | OCFS2_INLINE_XATTR_FL; | ||
6343 | new_di->i_dyn_features = cpu_to_le16(new_oi->ip_dyn_features); | ||
6344 | spin_unlock(&new_oi->ip_lock); | ||
6345 | |||
6346 | ocfs2_journal_dirty(handle, args->new_bh); | ||
6347 | |||
6348 | out_commit: | ||
6349 | ocfs2_commit_trans(osb, handle); | ||
6350 | |||
6351 | out: | ||
6352 | if (meta_ac) | ||
6353 | ocfs2_free_alloc_context(meta_ac); | ||
6354 | return ret; | ||
6355 | } | ||
6356 | |||
6357 | static int ocfs2_create_empty_xattr_block(struct inode *inode, | ||
6358 | struct buffer_head *fe_bh, | ||
6359 | struct buffer_head **ret_bh, | ||
6360 | int indexed) | ||
6361 | { | ||
6362 | int ret; | ||
6363 | handle_t *handle; | ||
6364 | struct ocfs2_alloc_context *meta_ac; | ||
6365 | struct ocfs2_super *osb = OCFS2_SB(inode->i_sb); | ||
6366 | |||
6367 | ret = ocfs2_reserve_new_metadata_blocks(osb, 1, &meta_ac); | ||
6368 | if (ret < 0) { | ||
6369 | mlog_errno(ret); | ||
6370 | return ret; | ||
6371 | } | ||
6372 | |||
6373 | handle = ocfs2_start_trans(osb, OCFS2_XATTR_BLOCK_CREATE_CREDITS); | ||
6374 | if (IS_ERR(handle)) { | ||
6375 | ret = PTR_ERR(handle); | ||
6376 | mlog_errno(ret); | ||
6377 | goto out; | ||
6378 | } | ||
6379 | |||
6380 | mlog(0, "create new xattr block for inode %llu, index = %d\n", | ||
6381 | (unsigned long long)fe_bh->b_blocknr, indexed); | ||
6382 | ret = ocfs2_create_xattr_block(handle, inode, fe_bh, | ||
6383 | meta_ac, ret_bh, indexed); | ||
6384 | if (ret) | ||
6385 | mlog_errno(ret); | ||
6386 | |||
6387 | ocfs2_commit_trans(osb, handle); | ||
6388 | out: | ||
6389 | ocfs2_free_alloc_context(meta_ac); | ||
6390 | return ret; | ||
6391 | } | ||
6392 | |||
6393 | static int ocfs2_reflink_xattr_block(struct ocfs2_xattr_reflink *args, | ||
6394 | struct buffer_head *blk_bh, | ||
6395 | struct buffer_head *new_blk_bh) | ||
6396 | { | ||
6397 | int ret = 0, credits = 0; | ||
6398 | handle_t *handle; | ||
6399 | struct ocfs2_inode_info *new_oi = OCFS2_I(args->new_inode); | ||
6400 | struct ocfs2_dinode *new_di; | ||
6401 | struct ocfs2_super *osb = OCFS2_SB(args->new_inode->i_sb); | ||
6402 | int header_off = offsetof(struct ocfs2_xattr_block, xb_attrs.xb_header); | ||
6403 | struct ocfs2_xattr_block *xb = | ||
6404 | (struct ocfs2_xattr_block *)blk_bh->b_data; | ||
6405 | struct ocfs2_xattr_header *xh = &xb->xb_attrs.xb_header; | ||
6406 | struct ocfs2_xattr_block *new_xb = | ||
6407 | (struct ocfs2_xattr_block *)new_blk_bh->b_data; | ||
6408 | struct ocfs2_xattr_header *new_xh = &new_xb->xb_attrs.xb_header; | ||
6409 | struct ocfs2_alloc_context *meta_ac; | ||
6410 | struct ocfs2_xattr_value_buf vb = { | ||
6411 | .vb_bh = new_blk_bh, | ||
6412 | .vb_access = ocfs2_journal_access_xb, | ||
6413 | }; | ||
6414 | |||
6415 | ret = ocfs2_reflink_lock_xattr_allocators(osb, xh, args->ref_root_bh, | ||
6416 | &credits, &meta_ac); | ||
6417 | if (ret) { | ||
6418 | mlog_errno(ret); | ||
6419 | return ret; | ||
6420 | } | ||
6421 | |||
6422 | /* One more credits in case we need to add xattr flags in new inode. */ | ||
6423 | handle = ocfs2_start_trans(osb, credits + 1); | ||
6424 | if (IS_ERR(handle)) { | ||
6425 | ret = PTR_ERR(handle); | ||
6426 | mlog_errno(ret); | ||
6427 | goto out; | ||
6428 | } | ||
6429 | |||
6430 | if (!(new_oi->ip_dyn_features & OCFS2_HAS_XATTR_FL)) { | ||
6431 | ret = ocfs2_journal_access_di(handle, | ||
6432 | INODE_CACHE(args->new_inode), | ||
6433 | args->new_bh, | ||
6434 | OCFS2_JOURNAL_ACCESS_WRITE); | ||
6435 | if (ret) { | ||
6436 | mlog_errno(ret); | ||
6437 | goto out_commit; | ||
6438 | } | ||
6439 | } | ||
6440 | |||
6441 | ret = ocfs2_journal_access_xb(handle, INODE_CACHE(args->new_inode), | ||
6442 | new_blk_bh, OCFS2_JOURNAL_ACCESS_WRITE); | ||
6443 | if (ret) { | ||
6444 | mlog_errno(ret); | ||
6445 | goto out_commit; | ||
6446 | } | ||
6447 | |||
6448 | memcpy(new_blk_bh->b_data + header_off, blk_bh->b_data + header_off, | ||
6449 | osb->sb->s_blocksize - header_off); | ||
6450 | |||
6451 | ret = ocfs2_reflink_xattr_header(handle, args, blk_bh, xh, | ||
6452 | new_blk_bh, new_xh, &vb, meta_ac, | ||
6453 | ocfs2_get_xattr_value_root, NULL); | ||
6454 | if (ret) { | ||
6455 | mlog_errno(ret); | ||
6456 | goto out_commit; | ||
6457 | } | ||
6458 | |||
6459 | ocfs2_journal_dirty(handle, new_blk_bh); | ||
6460 | |||
6461 | if (!(new_oi->ip_dyn_features & OCFS2_HAS_XATTR_FL)) { | ||
6462 | new_di = (struct ocfs2_dinode *)args->new_bh->b_data; | ||
6463 | spin_lock(&new_oi->ip_lock); | ||
6464 | new_oi->ip_dyn_features |= OCFS2_HAS_XATTR_FL; | ||
6465 | new_di->i_dyn_features = cpu_to_le16(new_oi->ip_dyn_features); | ||
6466 | spin_unlock(&new_oi->ip_lock); | ||
6467 | |||
6468 | ocfs2_journal_dirty(handle, args->new_bh); | ||
6469 | } | ||
6470 | |||
6471 | out_commit: | ||
6472 | ocfs2_commit_trans(osb, handle); | ||
6473 | |||
6474 | out: | ||
6475 | ocfs2_free_alloc_context(meta_ac); | ||
6476 | return ret; | ||
6477 | } | ||
6478 | |||
6479 | struct ocfs2_reflink_xattr_tree_args { | ||
6480 | struct ocfs2_xattr_reflink *reflink; | ||
6481 | struct buffer_head *old_blk_bh; | ||
6482 | struct buffer_head *new_blk_bh; | ||
6483 | struct ocfs2_xattr_bucket *old_bucket; | ||
6484 | struct ocfs2_xattr_bucket *new_bucket; | ||
6485 | }; | ||
6486 | |||
6487 | /* | ||
6488 | * NOTE: | ||
6489 | * We have to handle the case that both old bucket and new bucket | ||
6490 | * will call this function to get the right ret_bh. | ||
6491 | * So The caller must give us the right bh. | ||
6492 | */ | ||
6493 | static int ocfs2_get_reflink_xattr_value_root(struct super_block *sb, | ||
6494 | struct buffer_head *bh, | ||
6495 | struct ocfs2_xattr_header *xh, | ||
6496 | int offset, | ||
6497 | struct ocfs2_xattr_value_root **xv, | ||
6498 | struct buffer_head **ret_bh, | ||
6499 | void *para) | ||
6500 | { | ||
6501 | struct ocfs2_reflink_xattr_tree_args *args = | ||
6502 | (struct ocfs2_reflink_xattr_tree_args *)para; | ||
6503 | struct ocfs2_xattr_bucket *bucket; | ||
6504 | |||
6505 | if (bh == args->old_bucket->bu_bhs[0]) | ||
6506 | bucket = args->old_bucket; | ||
6507 | else | ||
6508 | bucket = args->new_bucket; | ||
6509 | |||
6510 | return ocfs2_get_xattr_tree_value_root(sb, bucket, offset, | ||
6511 | xv, ret_bh); | ||
6512 | } | ||
6513 | |||
6514 | struct ocfs2_value_tree_metas { | ||
6515 | int num_metas; | ||
6516 | int credits; | ||
6517 | int num_recs; | ||
6518 | }; | ||
6519 | |||
6520 | static int ocfs2_value_tree_metas_in_bucket(struct super_block *sb, | ||
6521 | struct buffer_head *bh, | ||
6522 | struct ocfs2_xattr_header *xh, | ||
6523 | int offset, | ||
6524 | struct ocfs2_xattr_value_root **xv, | ||
6525 | struct buffer_head **ret_bh, | ||
6526 | void *para) | ||
6527 | { | ||
6528 | struct ocfs2_xattr_bucket *bucket = | ||
6529 | (struct ocfs2_xattr_bucket *)para; | ||
6530 | |||
6531 | return ocfs2_get_xattr_tree_value_root(sb, bucket, offset, | ||
6532 | xv, ret_bh); | ||
6533 | } | ||
6534 | |||
6535 | static int ocfs2_calc_value_tree_metas(struct inode *inode, | ||
6536 | struct ocfs2_xattr_bucket *bucket, | ||
6537 | void *para) | ||
6538 | { | ||
6539 | struct ocfs2_value_tree_metas *metas = | ||
6540 | (struct ocfs2_value_tree_metas *)para; | ||
6541 | struct ocfs2_xattr_header *xh = | ||
6542 | (struct ocfs2_xattr_header *)bucket->bu_bhs[0]->b_data; | ||
6543 | |||
6544 | /* Add the credits for this bucket first. */ | ||
6545 | metas->credits += bucket->bu_blocks; | ||
6546 | return ocfs2_value_metas_in_xattr_header(inode->i_sb, bucket->bu_bhs[0], | ||
6547 | xh, &metas->num_metas, | ||
6548 | &metas->credits, &metas->num_recs, | ||
6549 | ocfs2_value_tree_metas_in_bucket, | ||
6550 | bucket); | ||
6551 | } | ||
6552 | |||
6553 | /* | ||
6554 | * Given a xattr extent rec starting from blkno and having len clusters, | ||
6555 | * iterate all the buckets calculate how much metadata we need for reflinking | ||
6556 | * all the ocfs2_xattr_value_root and lock the allocators accordingly. | ||
6557 | */ | ||
6558 | static int ocfs2_lock_reflink_xattr_rec_allocators( | ||
6559 | struct ocfs2_reflink_xattr_tree_args *args, | ||
6560 | struct ocfs2_extent_tree *xt_et, | ||
6561 | u64 blkno, u32 len, int *credits, | ||
6562 | struct ocfs2_alloc_context **meta_ac, | ||
6563 | struct ocfs2_alloc_context **data_ac) | ||
6564 | { | ||
6565 | int ret, num_free_extents; | ||
6566 | struct ocfs2_value_tree_metas metas; | ||
6567 | struct ocfs2_super *osb = OCFS2_SB(args->reflink->old_inode->i_sb); | ||
6568 | struct ocfs2_refcount_block *rb; | ||
6569 | |||
6570 | memset(&metas, 0, sizeof(metas)); | ||
6571 | |||
6572 | ret = ocfs2_iterate_xattr_buckets(args->reflink->old_inode, blkno, len, | ||
6573 | ocfs2_calc_value_tree_metas, &metas); | ||
6574 | if (ret) { | ||
6575 | mlog_errno(ret); | ||
6576 | goto out; | ||
6577 | } | ||
6578 | |||
6579 | *credits = metas.credits; | ||
6580 | |||
6581 | /* | ||
6582 | * Calculate we need for refcount tree change. | ||
6583 | * | ||
6584 | * We need to add/modify num_recs in refcount tree, so just calculate | ||
6585 | * an approximate number we need for refcount tree change. | ||
6586 | * Sometimes we need to split the tree, and after split, half recs | ||
6587 | * will be moved to the new block, and a new block can only provide | ||
6588 | * half number of recs. So we multiple new blocks by 2. | ||
6589 | * In the end, we have to add credits for modifying the already | ||
6590 | * existed refcount block. | ||
6591 | */ | ||
6592 | rb = (struct ocfs2_refcount_block *)args->reflink->ref_root_bh->b_data; | ||
6593 | metas.num_recs = | ||
6594 | (metas.num_recs + ocfs2_refcount_recs_per_rb(osb->sb) - 1) / | ||
6595 | ocfs2_refcount_recs_per_rb(osb->sb) * 2; | ||
6596 | metas.num_metas += metas.num_recs; | ||
6597 | *credits += metas.num_recs + | ||
6598 | metas.num_recs * OCFS2_EXPAND_REFCOUNT_TREE_CREDITS; | ||
6599 | if (le32_to_cpu(rb->rf_flags) & OCFS2_REFCOUNT_TREE_FL) | ||
6600 | *credits += le16_to_cpu(rb->rf_list.l_tree_depth) * | ||
6601 | le16_to_cpu(rb->rf_list.l_next_free_rec) + 1; | ||
6602 | else | ||
6603 | *credits += 1; | ||
6604 | |||
6605 | /* count in the xattr tree change. */ | ||
6606 | num_free_extents = ocfs2_num_free_extents(osb, xt_et); | ||
6607 | if (num_free_extents < 0) { | ||
6608 | ret = num_free_extents; | ||
6609 | mlog_errno(ret); | ||
6610 | goto out; | ||
6611 | } | ||
6612 | |||
6613 | if (num_free_extents < len) | ||
6614 | metas.num_metas += ocfs2_extend_meta_needed(xt_et->et_root_el); | ||
6615 | |||
6616 | *credits += ocfs2_calc_extend_credits(osb->sb, | ||
6617 | xt_et->et_root_el, len); | ||
6618 | |||
6619 | if (metas.num_metas) { | ||
6620 | ret = ocfs2_reserve_new_metadata_blocks(osb, metas.num_metas, | ||
6621 | meta_ac); | ||
5286 | if (ret) { | 6622 | if (ret) { |
5287 | mlog_errno(ret); | 6623 | mlog_errno(ret); |
5288 | goto out; | 6624 | goto out; |
5289 | } | 6625 | } |
6626 | } | ||
5290 | 6627 | ||
5291 | ret = ocfs2_rm_xattr_cluster(inode, xb_bh, | 6628 | if (len) { |
5292 | p_blkno, e_cpos, num_clusters); | 6629 | ret = ocfs2_reserve_clusters(osb, len, data_ac); |
6630 | if (ret) | ||
6631 | mlog_errno(ret); | ||
6632 | } | ||
6633 | out: | ||
6634 | if (ret) { | ||
6635 | if (*meta_ac) { | ||
6636 | ocfs2_free_alloc_context(*meta_ac); | ||
6637 | meta_ac = NULL; | ||
6638 | } | ||
6639 | } | ||
6640 | |||
6641 | return ret; | ||
6642 | } | ||
6643 | |||
6644 | static int ocfs2_reflink_xattr_buckets(handle_t *handle, | ||
6645 | u64 blkno, u64 new_blkno, u32 clusters, | ||
6646 | struct ocfs2_alloc_context *meta_ac, | ||
6647 | struct ocfs2_alloc_context *data_ac, | ||
6648 | struct ocfs2_reflink_xattr_tree_args *args) | ||
6649 | { | ||
6650 | int i, j, ret = 0; | ||
6651 | struct super_block *sb = args->reflink->old_inode->i_sb; | ||
6652 | u32 bpc = ocfs2_xattr_buckets_per_cluster(OCFS2_SB(sb)); | ||
6653 | u32 num_buckets = clusters * bpc; | ||
6654 | int bpb = args->old_bucket->bu_blocks; | ||
6655 | struct ocfs2_xattr_value_buf vb = { | ||
6656 | .vb_access = ocfs2_journal_access, | ||
6657 | }; | ||
6658 | |||
6659 | for (i = 0; i < num_buckets; i++, blkno += bpb, new_blkno += bpb) { | ||
6660 | ret = ocfs2_read_xattr_bucket(args->old_bucket, blkno); | ||
5293 | if (ret) { | 6661 | if (ret) { |
5294 | mlog_errno(ret); | 6662 | mlog_errno(ret); |
5295 | break; | 6663 | break; |
5296 | } | 6664 | } |
5297 | 6665 | ||
5298 | if (e_cpos == 0) | 6666 | ret = ocfs2_init_xattr_bucket(args->new_bucket, new_blkno); |
6667 | if (ret) { | ||
6668 | mlog_errno(ret); | ||
5299 | break; | 6669 | break; |
6670 | } | ||
5300 | 6671 | ||
5301 | name_hash = e_cpos - 1; | 6672 | /* |
6673 | * The real bucket num in this series of blocks is stored | ||
6674 | * in the 1st bucket. | ||
6675 | */ | ||
6676 | if (i == 0) | ||
6677 | num_buckets = le16_to_cpu( | ||
6678 | bucket_xh(args->old_bucket)->xh_num_buckets); | ||
6679 | |||
6680 | ret = ocfs2_xattr_bucket_journal_access(handle, | ||
6681 | args->new_bucket, | ||
6682 | OCFS2_JOURNAL_ACCESS_CREATE); | ||
6683 | if (ret) { | ||
6684 | mlog_errno(ret); | ||
6685 | break; | ||
6686 | } | ||
6687 | |||
6688 | for (j = 0; j < bpb; j++) | ||
6689 | memcpy(bucket_block(args->new_bucket, j), | ||
6690 | bucket_block(args->old_bucket, j), | ||
6691 | sb->s_blocksize); | ||
6692 | |||
6693 | ocfs2_xattr_bucket_journal_dirty(handle, args->new_bucket); | ||
6694 | |||
6695 | ret = ocfs2_reflink_xattr_header(handle, args->reflink, | ||
6696 | args->old_bucket->bu_bhs[0], | ||
6697 | bucket_xh(args->old_bucket), | ||
6698 | args->new_bucket->bu_bhs[0], | ||
6699 | bucket_xh(args->new_bucket), | ||
6700 | &vb, meta_ac, | ||
6701 | ocfs2_get_reflink_xattr_value_root, | ||
6702 | args); | ||
6703 | if (ret) { | ||
6704 | mlog_errno(ret); | ||
6705 | break; | ||
6706 | } | ||
6707 | |||
6708 | /* | ||
6709 | * Re-access and dirty the bucket to calculate metaecc. | ||
6710 | * Because we may extend the transaction in reflink_xattr_header | ||
6711 | * which will let the already accessed block gone. | ||
6712 | */ | ||
6713 | ret = ocfs2_xattr_bucket_journal_access(handle, | ||
6714 | args->new_bucket, | ||
6715 | OCFS2_JOURNAL_ACCESS_WRITE); | ||
6716 | if (ret) { | ||
6717 | mlog_errno(ret); | ||
6718 | break; | ||
6719 | } | ||
6720 | |||
6721 | ocfs2_xattr_bucket_journal_dirty(handle, args->new_bucket); | ||
6722 | ocfs2_xattr_bucket_relse(args->old_bucket); | ||
6723 | ocfs2_xattr_bucket_relse(args->new_bucket); | ||
6724 | } | ||
6725 | |||
6726 | ocfs2_xattr_bucket_relse(args->old_bucket); | ||
6727 | ocfs2_xattr_bucket_relse(args->new_bucket); | ||
6728 | return ret; | ||
6729 | } | ||
6730 | /* | ||
6731 | * Create the same xattr extent record in the new inode's xattr tree. | ||
6732 | */ | ||
6733 | static int ocfs2_reflink_xattr_rec(struct inode *inode, | ||
6734 | struct buffer_head *root_bh, | ||
6735 | u64 blkno, | ||
6736 | u32 cpos, | ||
6737 | u32 len, | ||
6738 | void *para) | ||
6739 | { | ||
6740 | int ret, credits = 0; | ||
6741 | u32 p_cluster, num_clusters; | ||
6742 | u64 new_blkno; | ||
6743 | handle_t *handle; | ||
6744 | struct ocfs2_reflink_xattr_tree_args *args = | ||
6745 | (struct ocfs2_reflink_xattr_tree_args *)para; | ||
6746 | struct ocfs2_super *osb = OCFS2_SB(inode->i_sb); | ||
6747 | struct ocfs2_alloc_context *meta_ac = NULL; | ||
6748 | struct ocfs2_alloc_context *data_ac = NULL; | ||
6749 | struct ocfs2_extent_tree et; | ||
6750 | |||
6751 | ocfs2_init_xattr_tree_extent_tree(&et, | ||
6752 | INODE_CACHE(args->reflink->new_inode), | ||
6753 | args->new_blk_bh); | ||
6754 | |||
6755 | ret = ocfs2_lock_reflink_xattr_rec_allocators(args, &et, blkno, | ||
6756 | len, &credits, | ||
6757 | &meta_ac, &data_ac); | ||
6758 | if (ret) { | ||
6759 | mlog_errno(ret); | ||
6760 | goto out; | ||
6761 | } | ||
6762 | |||
6763 | handle = ocfs2_start_trans(osb, credits); | ||
6764 | if (IS_ERR(handle)) { | ||
6765 | ret = PTR_ERR(handle); | ||
6766 | mlog_errno(ret); | ||
6767 | goto out; | ||
6768 | } | ||
6769 | |||
6770 | ret = ocfs2_claim_clusters(osb, handle, data_ac, | ||
6771 | len, &p_cluster, &num_clusters); | ||
6772 | if (ret) { | ||
6773 | mlog_errno(ret); | ||
6774 | goto out_commit; | ||
6775 | } | ||
6776 | |||
6777 | new_blkno = ocfs2_clusters_to_blocks(osb->sb, p_cluster); | ||
6778 | |||
6779 | mlog(0, "reflink xattr buckets %llu to %llu, len %u\n", | ||
6780 | (unsigned long long)blkno, (unsigned long long)new_blkno, len); | ||
6781 | ret = ocfs2_reflink_xattr_buckets(handle, blkno, new_blkno, len, | ||
6782 | meta_ac, data_ac, args); | ||
6783 | if (ret) { | ||
6784 | mlog_errno(ret); | ||
6785 | goto out_commit; | ||
6786 | } | ||
6787 | |||
6788 | mlog(0, "insert new xattr extent rec start %llu len %u to %u\n", | ||
6789 | (unsigned long long)new_blkno, len, cpos); | ||
6790 | ret = ocfs2_insert_extent(handle, &et, cpos, new_blkno, | ||
6791 | len, 0, meta_ac); | ||
6792 | if (ret) | ||
6793 | mlog_errno(ret); | ||
6794 | |||
6795 | out_commit: | ||
6796 | ocfs2_commit_trans(osb, handle); | ||
6797 | |||
6798 | out: | ||
6799 | if (meta_ac) | ||
6800 | ocfs2_free_alloc_context(meta_ac); | ||
6801 | if (data_ac) | ||
6802 | ocfs2_free_alloc_context(data_ac); | ||
6803 | return ret; | ||
6804 | } | ||
6805 | |||
6806 | /* | ||
6807 | * Create reflinked xattr buckets. | ||
6808 | * We will add bucket one by one, and refcount all the xattrs in the bucket | ||
6809 | * if they are stored outside. | ||
6810 | */ | ||
6811 | static int ocfs2_reflink_xattr_tree(struct ocfs2_xattr_reflink *args, | ||
6812 | struct buffer_head *blk_bh, | ||
6813 | struct buffer_head *new_blk_bh) | ||
6814 | { | ||
6815 | int ret; | ||
6816 | struct ocfs2_reflink_xattr_tree_args para; | ||
6817 | |||
6818 | memset(¶, 0, sizeof(para)); | ||
6819 | para.reflink = args; | ||
6820 | para.old_blk_bh = blk_bh; | ||
6821 | para.new_blk_bh = new_blk_bh; | ||
6822 | |||
6823 | para.old_bucket = ocfs2_xattr_bucket_new(args->old_inode); | ||
6824 | if (!para.old_bucket) { | ||
6825 | mlog_errno(-ENOMEM); | ||
6826 | return -ENOMEM; | ||
6827 | } | ||
6828 | |||
6829 | para.new_bucket = ocfs2_xattr_bucket_new(args->new_inode); | ||
6830 | if (!para.new_bucket) { | ||
6831 | ret = -ENOMEM; | ||
6832 | mlog_errno(ret); | ||
6833 | goto out; | ||
6834 | } | ||
6835 | |||
6836 | ret = ocfs2_iterate_xattr_index_block(args->old_inode, blk_bh, | ||
6837 | ocfs2_reflink_xattr_rec, | ||
6838 | ¶); | ||
6839 | if (ret) | ||
6840 | mlog_errno(ret); | ||
6841 | |||
6842 | out: | ||
6843 | ocfs2_xattr_bucket_free(para.old_bucket); | ||
6844 | ocfs2_xattr_bucket_free(para.new_bucket); | ||
6845 | return ret; | ||
6846 | } | ||
6847 | |||
6848 | static int ocfs2_reflink_xattr_in_block(struct ocfs2_xattr_reflink *args, | ||
6849 | struct buffer_head *blk_bh) | ||
6850 | { | ||
6851 | int ret, indexed = 0; | ||
6852 | struct buffer_head *new_blk_bh = NULL; | ||
6853 | struct ocfs2_xattr_block *xb = | ||
6854 | (struct ocfs2_xattr_block *)blk_bh->b_data; | ||
6855 | |||
6856 | |||
6857 | if (le16_to_cpu(xb->xb_flags) & OCFS2_XATTR_INDEXED) | ||
6858 | indexed = 1; | ||
6859 | |||
6860 | ret = ocfs2_create_empty_xattr_block(args->new_inode, args->new_bh, | ||
6861 | &new_blk_bh, indexed); | ||
6862 | if (ret) { | ||
6863 | mlog_errno(ret); | ||
6864 | goto out; | ||
6865 | } | ||
6866 | |||
6867 | if (!(le16_to_cpu(xb->xb_flags) & OCFS2_XATTR_INDEXED)) | ||
6868 | ret = ocfs2_reflink_xattr_block(args, blk_bh, new_blk_bh); | ||
6869 | else | ||
6870 | ret = ocfs2_reflink_xattr_tree(args, blk_bh, new_blk_bh); | ||
6871 | if (ret) | ||
6872 | mlog_errno(ret); | ||
6873 | |||
6874 | out: | ||
6875 | brelse(new_blk_bh); | ||
6876 | return ret; | ||
6877 | } | ||
6878 | |||
6879 | static int ocfs2_reflink_xattr_no_security(struct ocfs2_xattr_entry *xe) | ||
6880 | { | ||
6881 | int type = ocfs2_xattr_get_type(xe); | ||
6882 | |||
6883 | return type != OCFS2_XATTR_INDEX_SECURITY && | ||
6884 | type != OCFS2_XATTR_INDEX_POSIX_ACL_ACCESS && | ||
6885 | type != OCFS2_XATTR_INDEX_POSIX_ACL_DEFAULT; | ||
6886 | } | ||
6887 | |||
6888 | int ocfs2_reflink_xattrs(struct inode *old_inode, | ||
6889 | struct buffer_head *old_bh, | ||
6890 | struct inode *new_inode, | ||
6891 | struct buffer_head *new_bh, | ||
6892 | bool preserve_security) | ||
6893 | { | ||
6894 | int ret; | ||
6895 | struct ocfs2_xattr_reflink args; | ||
6896 | struct ocfs2_inode_info *oi = OCFS2_I(old_inode); | ||
6897 | struct ocfs2_dinode *di = (struct ocfs2_dinode *)old_bh->b_data; | ||
6898 | struct buffer_head *blk_bh = NULL; | ||
6899 | struct ocfs2_cached_dealloc_ctxt dealloc; | ||
6900 | struct ocfs2_refcount_tree *ref_tree; | ||
6901 | struct buffer_head *ref_root_bh = NULL; | ||
6902 | |||
6903 | ret = ocfs2_lock_refcount_tree(OCFS2_SB(old_inode->i_sb), | ||
6904 | le64_to_cpu(di->i_refcount_loc), | ||
6905 | 1, &ref_tree, &ref_root_bh); | ||
6906 | if (ret) { | ||
6907 | mlog_errno(ret); | ||
6908 | goto out; | ||
6909 | } | ||
6910 | |||
6911 | ocfs2_init_dealloc_ctxt(&dealloc); | ||
6912 | |||
6913 | args.old_inode = old_inode; | ||
6914 | args.new_inode = new_inode; | ||
6915 | args.old_bh = old_bh; | ||
6916 | args.new_bh = new_bh; | ||
6917 | args.ref_ci = &ref_tree->rf_ci; | ||
6918 | args.ref_root_bh = ref_root_bh; | ||
6919 | args.dealloc = &dealloc; | ||
6920 | if (preserve_security) | ||
6921 | args.xattr_reflinked = NULL; | ||
6922 | else | ||
6923 | args.xattr_reflinked = ocfs2_reflink_xattr_no_security; | ||
6924 | |||
6925 | if (oi->ip_dyn_features & OCFS2_INLINE_XATTR_FL) { | ||
6926 | ret = ocfs2_reflink_xattr_inline(&args); | ||
6927 | if (ret) { | ||
6928 | mlog_errno(ret); | ||
6929 | goto out_unlock; | ||
6930 | } | ||
6931 | } | ||
6932 | |||
6933 | if (!di->i_xattr_loc) | ||
6934 | goto out_unlock; | ||
6935 | |||
6936 | ret = ocfs2_read_xattr_block(old_inode, le64_to_cpu(di->i_xattr_loc), | ||
6937 | &blk_bh); | ||
6938 | if (ret < 0) { | ||
6939 | mlog_errno(ret); | ||
6940 | goto out_unlock; | ||
6941 | } | ||
6942 | |||
6943 | ret = ocfs2_reflink_xattr_in_block(&args, blk_bh); | ||
6944 | if (ret) | ||
6945 | mlog_errno(ret); | ||
6946 | |||
6947 | brelse(blk_bh); | ||
6948 | |||
6949 | out_unlock: | ||
6950 | ocfs2_unlock_refcount_tree(OCFS2_SB(old_inode->i_sb), | ||
6951 | ref_tree, 1); | ||
6952 | brelse(ref_root_bh); | ||
6953 | |||
6954 | if (ocfs2_dealloc_has_cluster(&dealloc)) { | ||
6955 | ocfs2_schedule_truncate_log_flush(OCFS2_SB(old_inode->i_sb), 1); | ||
6956 | ocfs2_run_deallocs(OCFS2_SB(old_inode->i_sb), &dealloc); | ||
5302 | } | 6957 | } |
5303 | 6958 | ||
5304 | out: | 6959 | out: |
@@ -5306,6 +6961,51 @@ out: | |||
5306 | } | 6961 | } |
5307 | 6962 | ||
5308 | /* | 6963 | /* |
6964 | * Initialize security and acl for a already created inode. | ||
6965 | * Used for reflink a non-preserve-security file. | ||
6966 | * | ||
6967 | * It uses common api like ocfs2_xattr_set, so the caller | ||
6968 | * must not hold any lock expect i_mutex. | ||
6969 | */ | ||
6970 | int ocfs2_init_security_and_acl(struct inode *dir, | ||
6971 | struct inode *inode) | ||
6972 | { | ||
6973 | int ret = 0; | ||
6974 | struct buffer_head *dir_bh = NULL; | ||
6975 | struct ocfs2_security_xattr_info si = { | ||
6976 | .enable = 1, | ||
6977 | }; | ||
6978 | |||
6979 | ret = ocfs2_init_security_get(inode, dir, &si); | ||
6980 | if (!ret) { | ||
6981 | ret = ocfs2_xattr_security_set(inode, si.name, | ||
6982 | si.value, si.value_len, | ||
6983 | XATTR_CREATE); | ||
6984 | if (ret) { | ||
6985 | mlog_errno(ret); | ||
6986 | goto leave; | ||
6987 | } | ||
6988 | } else if (ret != -EOPNOTSUPP) { | ||
6989 | mlog_errno(ret); | ||
6990 | goto leave; | ||
6991 | } | ||
6992 | |||
6993 | ret = ocfs2_inode_lock(dir, &dir_bh, 0); | ||
6994 | if (ret) { | ||
6995 | mlog_errno(ret); | ||
6996 | goto leave; | ||
6997 | } | ||
6998 | |||
6999 | ret = ocfs2_init_acl(NULL, inode, dir, NULL, dir_bh, NULL, NULL); | ||
7000 | if (ret) | ||
7001 | mlog_errno(ret); | ||
7002 | |||
7003 | ocfs2_inode_unlock(dir, 0); | ||
7004 | brelse(dir_bh); | ||
7005 | leave: | ||
7006 | return ret; | ||
7007 | } | ||
7008 | /* | ||
5309 | * 'security' attributes support | 7009 | * 'security' attributes support |
5310 | */ | 7010 | */ |
5311 | static size_t ocfs2_xattr_security_list(struct inode *inode, char *list, | 7011 | static size_t ocfs2_xattr_security_list(struct inode *inode, char *list, |
diff --git a/fs/ocfs2/xattr.h b/fs/ocfs2/xattr.h index 1ca7e9a1b7bc..08e36389f56d 100644 --- a/fs/ocfs2/xattr.h +++ b/fs/ocfs2/xattr.h | |||
@@ -55,6 +55,8 @@ int ocfs2_xattr_set_handle(handle_t *, struct inode *, struct buffer_head *, | |||
55 | int, const char *, const void *, size_t, int, | 55 | int, const char *, const void *, size_t, int, |
56 | struct ocfs2_alloc_context *, | 56 | struct ocfs2_alloc_context *, |
57 | struct ocfs2_alloc_context *); | 57 | struct ocfs2_alloc_context *); |
58 | int ocfs2_has_inline_xattr_value_outside(struct inode *inode, | ||
59 | struct ocfs2_dinode *di); | ||
58 | int ocfs2_xattr_remove(struct inode *, struct buffer_head *); | 60 | int ocfs2_xattr_remove(struct inode *, struct buffer_head *); |
59 | int ocfs2_init_security_get(struct inode *, struct inode *, | 61 | int ocfs2_init_security_get(struct inode *, struct inode *, |
60 | struct ocfs2_security_xattr_info *); | 62 | struct ocfs2_security_xattr_info *); |
@@ -83,5 +85,16 @@ struct ocfs2_xattr_value_buf { | |||
83 | struct ocfs2_xattr_value_root *vb_xv; | 85 | struct ocfs2_xattr_value_root *vb_xv; |
84 | }; | 86 | }; |
85 | 87 | ||
86 | 88 | int ocfs2_xattr_attach_refcount_tree(struct inode *inode, | |
89 | struct buffer_head *fe_bh, | ||
90 | struct ocfs2_caching_info *ref_ci, | ||
91 | struct buffer_head *ref_root_bh, | ||
92 | struct ocfs2_cached_dealloc_ctxt *dealloc); | ||
93 | int ocfs2_reflink_xattrs(struct inode *old_inode, | ||
94 | struct buffer_head *old_bh, | ||
95 | struct inode *new_inode, | ||
96 | struct buffer_head *new_bh, | ||
97 | bool preserve_security); | ||
98 | int ocfs2_init_security_and_acl(struct inode *dir, | ||
99 | struct inode *inode); | ||
87 | #endif /* OCFS2_XATTR_H */ | 100 | #endif /* OCFS2_XATTR_H */ |
diff --git a/fs/omfs/dir.c b/fs/omfs/dir.c index c7275cfbdcfb..3680bae335b5 100644 --- a/fs/omfs/dir.c +++ b/fs/omfs/dir.c | |||
@@ -489,7 +489,7 @@ out: | |||
489 | return ret; | 489 | return ret; |
490 | } | 490 | } |
491 | 491 | ||
492 | struct inode_operations omfs_dir_inops = { | 492 | const struct inode_operations omfs_dir_inops = { |
493 | .lookup = omfs_lookup, | 493 | .lookup = omfs_lookup, |
494 | .mkdir = omfs_mkdir, | 494 | .mkdir = omfs_mkdir, |
495 | .rename = omfs_rename, | 495 | .rename = omfs_rename, |
diff --git a/fs/omfs/file.c b/fs/omfs/file.c index d17e774eaf45..4845fbb18e6e 100644 --- a/fs/omfs/file.c +++ b/fs/omfs/file.c | |||
@@ -333,11 +333,11 @@ struct file_operations omfs_file_operations = { | |||
333 | .splice_read = generic_file_splice_read, | 333 | .splice_read = generic_file_splice_read, |
334 | }; | 334 | }; |
335 | 335 | ||
336 | struct inode_operations omfs_file_inops = { | 336 | const struct inode_operations omfs_file_inops = { |
337 | .truncate = omfs_truncate | 337 | .truncate = omfs_truncate |
338 | }; | 338 | }; |
339 | 339 | ||
340 | struct address_space_operations omfs_aops = { | 340 | const struct address_space_operations omfs_aops = { |
341 | .readpage = omfs_readpage, | 341 | .readpage = omfs_readpage, |
342 | .readpages = omfs_readpages, | 342 | .readpages = omfs_readpages, |
343 | .writepage = omfs_writepage, | 343 | .writepage = omfs_writepage, |
diff --git a/fs/omfs/inode.c b/fs/omfs/inode.c index 379ae5fb4411..f3b7c1541f3a 100644 --- a/fs/omfs/inode.c +++ b/fs/omfs/inode.c | |||
@@ -278,7 +278,7 @@ static int omfs_statfs(struct dentry *dentry, struct kstatfs *buf) | |||
278 | return 0; | 278 | return 0; |
279 | } | 279 | } |
280 | 280 | ||
281 | static struct super_operations omfs_sops = { | 281 | static const struct super_operations omfs_sops = { |
282 | .write_inode = omfs_write_inode, | 282 | .write_inode = omfs_write_inode, |
283 | .delete_inode = omfs_delete_inode, | 283 | .delete_inode = omfs_delete_inode, |
284 | .put_super = omfs_put_super, | 284 | .put_super = omfs_put_super, |
diff --git a/fs/omfs/omfs.h b/fs/omfs/omfs.h index 2bc0f0670406..df71039945ac 100644 --- a/fs/omfs/omfs.h +++ b/fs/omfs/omfs.h | |||
@@ -45,15 +45,15 @@ extern int omfs_clear_range(struct super_block *sb, u64 block, int count); | |||
45 | 45 | ||
46 | /* dir.c */ | 46 | /* dir.c */ |
47 | extern struct file_operations omfs_dir_operations; | 47 | extern struct file_operations omfs_dir_operations; |
48 | extern struct inode_operations omfs_dir_inops; | 48 | extern const struct inode_operations omfs_dir_inops; |
49 | extern int omfs_make_empty(struct inode *inode, struct super_block *sb); | 49 | extern int omfs_make_empty(struct inode *inode, struct super_block *sb); |
50 | extern int omfs_is_bad(struct omfs_sb_info *sbi, struct omfs_header *header, | 50 | extern int omfs_is_bad(struct omfs_sb_info *sbi, struct omfs_header *header, |
51 | u64 fsblock); | 51 | u64 fsblock); |
52 | 52 | ||
53 | /* file.c */ | 53 | /* file.c */ |
54 | extern struct file_operations omfs_file_operations; | 54 | extern struct file_operations omfs_file_operations; |
55 | extern struct inode_operations omfs_file_inops; | 55 | extern const struct inode_operations omfs_file_inops; |
56 | extern struct address_space_operations omfs_aops; | 56 | extern const struct address_space_operations omfs_aops; |
57 | extern void omfs_make_empty_table(struct buffer_head *bh, int offset); | 57 | extern void omfs_make_empty_table(struct buffer_head *bh, int offset); |
58 | extern int omfs_shrink_inode(struct inode *inode); | 58 | extern int omfs_shrink_inode(struct inode *inode); |
59 | 59 | ||
@@ -290,10 +290,9 @@ out: | |||
290 | return error; | 290 | return error; |
291 | } | 291 | } |
292 | 292 | ||
293 | SYSCALL_DEFINE2(truncate, const char __user *, path, unsigned long, length) | 293 | SYSCALL_DEFINE2(truncate, const char __user *, path, long, length) |
294 | { | 294 | { |
295 | /* on 32-bit boxen it will cut the range 2^31--2^32-1 off */ | 295 | return do_sys_truncate(path, length); |
296 | return do_sys_truncate(path, (long)length); | ||
297 | } | 296 | } |
298 | 297 | ||
299 | static long do_sys_ftruncate(unsigned int fd, loff_t length, int small) | 298 | static long do_sys_ftruncate(unsigned int fd, loff_t length, int small) |
diff --git a/fs/partitions/check.c b/fs/partitions/check.c index 619ba99dfe39..7b685e10cbad 100644 --- a/fs/partitions/check.c +++ b/fs/partitions/check.c | |||
@@ -312,7 +312,7 @@ static struct attribute_group part_attr_group = { | |||
312 | .attrs = part_attrs, | 312 | .attrs = part_attrs, |
313 | }; | 313 | }; |
314 | 314 | ||
315 | static struct attribute_group *part_attr_groups[] = { | 315 | static const struct attribute_group *part_attr_groups[] = { |
316 | &part_attr_group, | 316 | &part_attr_group, |
317 | #ifdef CONFIG_BLK_DEV_IO_TRACE | 317 | #ifdef CONFIG_BLK_DEV_IO_TRACE |
318 | &blk_trace_attr_group, | 318 | &blk_trace_attr_group, |
@@ -581,7 +581,7 @@ try_scan: | |||
581 | } | 581 | } |
582 | 582 | ||
583 | if (from + size > get_capacity(disk)) { | 583 | if (from + size > get_capacity(disk)) { |
584 | struct block_device_operations *bdops = disk->fops; | 584 | const struct block_device_operations *bdops = disk->fops; |
585 | unsigned long long capacity; | 585 | unsigned long long capacity; |
586 | 586 | ||
587 | printk(KERN_WARNING | 587 | printk(KERN_WARNING |
diff --git a/fs/proc/array.c b/fs/proc/array.c index 725a650bbbb8..0c6bc602e6c4 100644 --- a/fs/proc/array.c +++ b/fs/proc/array.c | |||
@@ -82,6 +82,7 @@ | |||
82 | #include <linux/pid_namespace.h> | 82 | #include <linux/pid_namespace.h> |
83 | #include <linux/ptrace.h> | 83 | #include <linux/ptrace.h> |
84 | #include <linux/tracehook.h> | 84 | #include <linux/tracehook.h> |
85 | #include <linux/swapops.h> | ||
85 | 86 | ||
86 | #include <asm/pgtable.h> | 87 | #include <asm/pgtable.h> |
87 | #include <asm/processor.h> | 88 | #include <asm/processor.h> |
@@ -321,6 +322,87 @@ static inline void task_context_switch_counts(struct seq_file *m, | |||
321 | p->nivcsw); | 322 | p->nivcsw); |
322 | } | 323 | } |
323 | 324 | ||
325 | struct stack_stats { | ||
326 | struct vm_area_struct *vma; | ||
327 | unsigned long startpage; | ||
328 | unsigned long usage; | ||
329 | }; | ||
330 | |||
331 | static int stack_usage_pte_range(pmd_t *pmd, unsigned long addr, | ||
332 | unsigned long end, struct mm_walk *walk) | ||
333 | { | ||
334 | struct stack_stats *ss = walk->private; | ||
335 | struct vm_area_struct *vma = ss->vma; | ||
336 | pte_t *pte, ptent; | ||
337 | spinlock_t *ptl; | ||
338 | int ret = 0; | ||
339 | |||
340 | pte = pte_offset_map_lock(vma->vm_mm, pmd, addr, &ptl); | ||
341 | for (; addr != end; pte++, addr += PAGE_SIZE) { | ||
342 | ptent = *pte; | ||
343 | |||
344 | #ifdef CONFIG_STACK_GROWSUP | ||
345 | if (pte_present(ptent) || is_swap_pte(ptent)) | ||
346 | ss->usage = addr - ss->startpage + PAGE_SIZE; | ||
347 | #else | ||
348 | if (pte_present(ptent) || is_swap_pte(ptent)) { | ||
349 | ss->usage = ss->startpage - addr + PAGE_SIZE; | ||
350 | pte++; | ||
351 | ret = 1; | ||
352 | break; | ||
353 | } | ||
354 | #endif | ||
355 | } | ||
356 | pte_unmap_unlock(pte - 1, ptl); | ||
357 | cond_resched(); | ||
358 | return ret; | ||
359 | } | ||
360 | |||
361 | static inline unsigned long get_stack_usage_in_bytes(struct vm_area_struct *vma, | ||
362 | struct task_struct *task) | ||
363 | { | ||
364 | struct stack_stats ss; | ||
365 | struct mm_walk stack_walk = { | ||
366 | .pmd_entry = stack_usage_pte_range, | ||
367 | .mm = vma->vm_mm, | ||
368 | .private = &ss, | ||
369 | }; | ||
370 | |||
371 | if (!vma->vm_mm || is_vm_hugetlb_page(vma)) | ||
372 | return 0; | ||
373 | |||
374 | ss.vma = vma; | ||
375 | ss.startpage = task->stack_start & PAGE_MASK; | ||
376 | ss.usage = 0; | ||
377 | |||
378 | #ifdef CONFIG_STACK_GROWSUP | ||
379 | walk_page_range(KSTK_ESP(task) & PAGE_MASK, vma->vm_end, | ||
380 | &stack_walk); | ||
381 | #else | ||
382 | walk_page_range(vma->vm_start, (KSTK_ESP(task) & PAGE_MASK) + PAGE_SIZE, | ||
383 | &stack_walk); | ||
384 | #endif | ||
385 | return ss.usage; | ||
386 | } | ||
387 | |||
388 | static inline void task_show_stack_usage(struct seq_file *m, | ||
389 | struct task_struct *task) | ||
390 | { | ||
391 | struct vm_area_struct *vma; | ||
392 | struct mm_struct *mm = get_task_mm(task); | ||
393 | |||
394 | if (mm) { | ||
395 | down_read(&mm->mmap_sem); | ||
396 | vma = find_vma(mm, task->stack_start); | ||
397 | if (vma) | ||
398 | seq_printf(m, "Stack usage:\t%lu kB\n", | ||
399 | get_stack_usage_in_bytes(vma, task) >> 10); | ||
400 | |||
401 | up_read(&mm->mmap_sem); | ||
402 | mmput(mm); | ||
403 | } | ||
404 | } | ||
405 | |||
324 | int proc_pid_status(struct seq_file *m, struct pid_namespace *ns, | 406 | int proc_pid_status(struct seq_file *m, struct pid_namespace *ns, |
325 | struct pid *pid, struct task_struct *task) | 407 | struct pid *pid, struct task_struct *task) |
326 | { | 408 | { |
@@ -340,6 +422,7 @@ int proc_pid_status(struct seq_file *m, struct pid_namespace *ns, | |||
340 | task_show_regs(m, task); | 422 | task_show_regs(m, task); |
341 | #endif | 423 | #endif |
342 | task_context_switch_counts(m, task); | 424 | task_context_switch_counts(m, task); |
425 | task_show_stack_usage(m, task); | ||
343 | return 0; | 426 | return 0; |
344 | } | 427 | } |
345 | 428 | ||
@@ -481,7 +564,7 @@ static int do_task_stat(struct seq_file *m, struct pid_namespace *ns, | |||
481 | rsslim, | 564 | rsslim, |
482 | mm ? mm->start_code : 0, | 565 | mm ? mm->start_code : 0, |
483 | mm ? mm->end_code : 0, | 566 | mm ? mm->end_code : 0, |
484 | (permitted && mm) ? mm->start_stack : 0, | 567 | (permitted) ? task->stack_start : 0, |
485 | esp, | 568 | esp, |
486 | eip, | 569 | eip, |
487 | /* The signal information here is obsolete. | 570 | /* The signal information here is obsolete. |
diff --git a/fs/proc/base.c b/fs/proc/base.c index 6f742f6658a9..837469a96598 100644 --- a/fs/proc/base.c +++ b/fs/proc/base.c | |||
@@ -447,7 +447,7 @@ static int proc_oom_score(struct task_struct *task, char *buffer) | |||
447 | 447 | ||
448 | do_posix_clock_monotonic_gettime(&uptime); | 448 | do_posix_clock_monotonic_gettime(&uptime); |
449 | read_lock(&tasklist_lock); | 449 | read_lock(&tasklist_lock); |
450 | points = badness(task, uptime.tv_sec); | 450 | points = badness(task->group_leader, uptime.tv_sec); |
451 | read_unlock(&tasklist_lock); | 451 | read_unlock(&tasklist_lock); |
452 | return sprintf(buffer, "%lu\n", points); | 452 | return sprintf(buffer, "%lu\n", points); |
453 | } | 453 | } |
@@ -458,7 +458,7 @@ struct limit_names { | |||
458 | }; | 458 | }; |
459 | 459 | ||
460 | static const struct limit_names lnames[RLIM_NLIMITS] = { | 460 | static const struct limit_names lnames[RLIM_NLIMITS] = { |
461 | [RLIMIT_CPU] = {"Max cpu time", "ms"}, | 461 | [RLIMIT_CPU] = {"Max cpu time", "seconds"}, |
462 | [RLIMIT_FSIZE] = {"Max file size", "bytes"}, | 462 | [RLIMIT_FSIZE] = {"Max file size", "bytes"}, |
463 | [RLIMIT_DATA] = {"Max data size", "bytes"}, | 463 | [RLIMIT_DATA] = {"Max data size", "bytes"}, |
464 | [RLIMIT_STACK] = {"Max stack size", "bytes"}, | 464 | [RLIMIT_STACK] = {"Max stack size", "bytes"}, |
@@ -999,11 +999,17 @@ static ssize_t oom_adjust_read(struct file *file, char __user *buf, | |||
999 | struct task_struct *task = get_proc_task(file->f_path.dentry->d_inode); | 999 | struct task_struct *task = get_proc_task(file->f_path.dentry->d_inode); |
1000 | char buffer[PROC_NUMBUF]; | 1000 | char buffer[PROC_NUMBUF]; |
1001 | size_t len; | 1001 | size_t len; |
1002 | int oom_adjust; | 1002 | int oom_adjust = OOM_DISABLE; |
1003 | unsigned long flags; | ||
1003 | 1004 | ||
1004 | if (!task) | 1005 | if (!task) |
1005 | return -ESRCH; | 1006 | return -ESRCH; |
1006 | oom_adjust = task->oomkilladj; | 1007 | |
1008 | if (lock_task_sighand(task, &flags)) { | ||
1009 | oom_adjust = task->signal->oom_adj; | ||
1010 | unlock_task_sighand(task, &flags); | ||
1011 | } | ||
1012 | |||
1007 | put_task_struct(task); | 1013 | put_task_struct(task); |
1008 | 1014 | ||
1009 | len = snprintf(buffer, sizeof(buffer), "%i\n", oom_adjust); | 1015 | len = snprintf(buffer, sizeof(buffer), "%i\n", oom_adjust); |
@@ -1015,32 +1021,44 @@ static ssize_t oom_adjust_write(struct file *file, const char __user *buf, | |||
1015 | size_t count, loff_t *ppos) | 1021 | size_t count, loff_t *ppos) |
1016 | { | 1022 | { |
1017 | struct task_struct *task; | 1023 | struct task_struct *task; |
1018 | char buffer[PROC_NUMBUF], *end; | 1024 | char buffer[PROC_NUMBUF]; |
1019 | int oom_adjust; | 1025 | long oom_adjust; |
1026 | unsigned long flags; | ||
1027 | int err; | ||
1020 | 1028 | ||
1021 | memset(buffer, 0, sizeof(buffer)); | 1029 | memset(buffer, 0, sizeof(buffer)); |
1022 | if (count > sizeof(buffer) - 1) | 1030 | if (count > sizeof(buffer) - 1) |
1023 | count = sizeof(buffer) - 1; | 1031 | count = sizeof(buffer) - 1; |
1024 | if (copy_from_user(buffer, buf, count)) | 1032 | if (copy_from_user(buffer, buf, count)) |
1025 | return -EFAULT; | 1033 | return -EFAULT; |
1026 | oom_adjust = simple_strtol(buffer, &end, 0); | 1034 | |
1035 | err = strict_strtol(strstrip(buffer), 0, &oom_adjust); | ||
1036 | if (err) | ||
1037 | return -EINVAL; | ||
1027 | if ((oom_adjust < OOM_ADJUST_MIN || oom_adjust > OOM_ADJUST_MAX) && | 1038 | if ((oom_adjust < OOM_ADJUST_MIN || oom_adjust > OOM_ADJUST_MAX) && |
1028 | oom_adjust != OOM_DISABLE) | 1039 | oom_adjust != OOM_DISABLE) |
1029 | return -EINVAL; | 1040 | return -EINVAL; |
1030 | if (*end == '\n') | 1041 | |
1031 | end++; | ||
1032 | task = get_proc_task(file->f_path.dentry->d_inode); | 1042 | task = get_proc_task(file->f_path.dentry->d_inode); |
1033 | if (!task) | 1043 | if (!task) |
1034 | return -ESRCH; | 1044 | return -ESRCH; |
1035 | if (oom_adjust < task->oomkilladj && !capable(CAP_SYS_RESOURCE)) { | 1045 | if (!lock_task_sighand(task, &flags)) { |
1046 | put_task_struct(task); | ||
1047 | return -ESRCH; | ||
1048 | } | ||
1049 | |||
1050 | if (oom_adjust < task->signal->oom_adj && !capable(CAP_SYS_RESOURCE)) { | ||
1051 | unlock_task_sighand(task, &flags); | ||
1036 | put_task_struct(task); | 1052 | put_task_struct(task); |
1037 | return -EACCES; | 1053 | return -EACCES; |
1038 | } | 1054 | } |
1039 | task->oomkilladj = oom_adjust; | 1055 | |
1056 | task->signal->oom_adj = oom_adjust; | ||
1057 | |||
1058 | unlock_task_sighand(task, &flags); | ||
1040 | put_task_struct(task); | 1059 | put_task_struct(task); |
1041 | if (end - buffer == 0) | 1060 | |
1042 | return -EIO; | 1061 | return count; |
1043 | return end - buffer; | ||
1044 | } | 1062 | } |
1045 | 1063 | ||
1046 | static const struct file_operations proc_oom_adjust_operations = { | 1064 | static const struct file_operations proc_oom_adjust_operations = { |
@@ -1169,17 +1187,16 @@ static ssize_t proc_fault_inject_write(struct file * file, | |||
1169 | count = sizeof(buffer) - 1; | 1187 | count = sizeof(buffer) - 1; |
1170 | if (copy_from_user(buffer, buf, count)) | 1188 | if (copy_from_user(buffer, buf, count)) |
1171 | return -EFAULT; | 1189 | return -EFAULT; |
1172 | make_it_fail = simple_strtol(buffer, &end, 0); | 1190 | make_it_fail = simple_strtol(strstrip(buffer), &end, 0); |
1173 | if (*end == '\n') | 1191 | if (*end) |
1174 | end++; | 1192 | return -EINVAL; |
1175 | task = get_proc_task(file->f_dentry->d_inode); | 1193 | task = get_proc_task(file->f_dentry->d_inode); |
1176 | if (!task) | 1194 | if (!task) |
1177 | return -ESRCH; | 1195 | return -ESRCH; |
1178 | task->make_it_fail = make_it_fail; | 1196 | task->make_it_fail = make_it_fail; |
1179 | put_task_struct(task); | 1197 | put_task_struct(task); |
1180 | if (end - buffer == 0) | 1198 | |
1181 | return -EIO; | 1199 | return count; |
1182 | return end - buffer; | ||
1183 | } | 1200 | } |
1184 | 1201 | ||
1185 | static const struct file_operations proc_fault_inject_operations = { | 1202 | static const struct file_operations proc_fault_inject_operations = { |
@@ -2586,9 +2603,6 @@ static void proc_flush_task_mnt(struct vfsmount *mnt, pid_t pid, pid_t tgid) | |||
2586 | dput(dentry); | 2603 | dput(dentry); |
2587 | } | 2604 | } |
2588 | 2605 | ||
2589 | if (tgid == 0) | ||
2590 | goto out; | ||
2591 | |||
2592 | name.name = buf; | 2606 | name.name = buf; |
2593 | name.len = snprintf(buf, sizeof(buf), "%d", tgid); | 2607 | name.len = snprintf(buf, sizeof(buf), "%d", tgid); |
2594 | leader = d_hash_and_lookup(mnt->mnt_root, &name); | 2608 | leader = d_hash_and_lookup(mnt->mnt_root, &name); |
@@ -2645,17 +2659,16 @@ out: | |||
2645 | void proc_flush_task(struct task_struct *task) | 2659 | void proc_flush_task(struct task_struct *task) |
2646 | { | 2660 | { |
2647 | int i; | 2661 | int i; |
2648 | struct pid *pid, *tgid = NULL; | 2662 | struct pid *pid, *tgid; |
2649 | struct upid *upid; | 2663 | struct upid *upid; |
2650 | 2664 | ||
2651 | pid = task_pid(task); | 2665 | pid = task_pid(task); |
2652 | if (thread_group_leader(task)) | 2666 | tgid = task_tgid(task); |
2653 | tgid = task_tgid(task); | ||
2654 | 2667 | ||
2655 | for (i = 0; i <= pid->level; i++) { | 2668 | for (i = 0; i <= pid->level; i++) { |
2656 | upid = &pid->numbers[i]; | 2669 | upid = &pid->numbers[i]; |
2657 | proc_flush_task_mnt(upid->ns->proc_mnt, upid->nr, | 2670 | proc_flush_task_mnt(upid->ns->proc_mnt, upid->nr, |
2658 | tgid ? tgid->numbers[i].nr : 0); | 2671 | tgid->numbers[i].nr); |
2659 | } | 2672 | } |
2660 | 2673 | ||
2661 | upid = &pid->numbers[pid->level]; | 2674 | upid = &pid->numbers[pid->level]; |
diff --git a/fs/proc/kcore.c b/fs/proc/kcore.c index 59b43a068872..56013371f9f3 100644 --- a/fs/proc/kcore.c +++ b/fs/proc/kcore.c | |||
@@ -17,9 +17,15 @@ | |||
17 | #include <linux/elfcore.h> | 17 | #include <linux/elfcore.h> |
18 | #include <linux/vmalloc.h> | 18 | #include <linux/vmalloc.h> |
19 | #include <linux/highmem.h> | 19 | #include <linux/highmem.h> |
20 | #include <linux/bootmem.h> | ||
20 | #include <linux/init.h> | 21 | #include <linux/init.h> |
21 | #include <asm/uaccess.h> | 22 | #include <asm/uaccess.h> |
22 | #include <asm/io.h> | 23 | #include <asm/io.h> |
24 | #include <linux/list.h> | ||
25 | #include <linux/ioport.h> | ||
26 | #include <linux/mm.h> | ||
27 | #include <linux/memory.h> | ||
28 | #include <asm/sections.h> | ||
23 | 29 | ||
24 | #define CORE_STR "CORE" | 30 | #define CORE_STR "CORE" |
25 | 31 | ||
@@ -29,17 +35,6 @@ | |||
29 | 35 | ||
30 | static struct proc_dir_entry *proc_root_kcore; | 36 | static struct proc_dir_entry *proc_root_kcore; |
31 | 37 | ||
32 | static int open_kcore(struct inode * inode, struct file * filp) | ||
33 | { | ||
34 | return capable(CAP_SYS_RAWIO) ? 0 : -EPERM; | ||
35 | } | ||
36 | |||
37 | static ssize_t read_kcore(struct file *, char __user *, size_t, loff_t *); | ||
38 | |||
39 | static const struct file_operations proc_kcore_operations = { | ||
40 | .read = read_kcore, | ||
41 | .open = open_kcore, | ||
42 | }; | ||
43 | 38 | ||
44 | #ifndef kc_vaddr_to_offset | 39 | #ifndef kc_vaddr_to_offset |
45 | #define kc_vaddr_to_offset(v) ((v) - PAGE_OFFSET) | 40 | #define kc_vaddr_to_offset(v) ((v) - PAGE_OFFSET) |
@@ -57,18 +52,19 @@ struct memelfnote | |||
57 | void *data; | 52 | void *data; |
58 | }; | 53 | }; |
59 | 54 | ||
60 | static struct kcore_list *kclist; | 55 | static LIST_HEAD(kclist_head); |
61 | static DEFINE_RWLOCK(kclist_lock); | 56 | static DEFINE_RWLOCK(kclist_lock); |
57 | static int kcore_need_update = 1; | ||
62 | 58 | ||
63 | void | 59 | void |
64 | kclist_add(struct kcore_list *new, void *addr, size_t size) | 60 | kclist_add(struct kcore_list *new, void *addr, size_t size, int type) |
65 | { | 61 | { |
66 | new->addr = (unsigned long)addr; | 62 | new->addr = (unsigned long)addr; |
67 | new->size = size; | 63 | new->size = size; |
64 | new->type = type; | ||
68 | 65 | ||
69 | write_lock(&kclist_lock); | 66 | write_lock(&kclist_lock); |
70 | new->next = kclist; | 67 | list_add_tail(&new->list, &kclist_head); |
71 | kclist = new; | ||
72 | write_unlock(&kclist_lock); | 68 | write_unlock(&kclist_lock); |
73 | } | 69 | } |
74 | 70 | ||
@@ -80,7 +76,7 @@ static size_t get_kcore_size(int *nphdr, size_t *elf_buflen) | |||
80 | *nphdr = 1; /* PT_NOTE */ | 76 | *nphdr = 1; /* PT_NOTE */ |
81 | size = 0; | 77 | size = 0; |
82 | 78 | ||
83 | for (m=kclist; m; m=m->next) { | 79 | list_for_each_entry(m, &kclist_head, list) { |
84 | try = kc_vaddr_to_offset((size_t)m->addr + m->size); | 80 | try = kc_vaddr_to_offset((size_t)m->addr + m->size); |
85 | if (try > size) | 81 | if (try > size) |
86 | size = try; | 82 | size = try; |
@@ -97,6 +93,177 @@ static size_t get_kcore_size(int *nphdr, size_t *elf_buflen) | |||
97 | return size + *elf_buflen; | 93 | return size + *elf_buflen; |
98 | } | 94 | } |
99 | 95 | ||
96 | static void free_kclist_ents(struct list_head *head) | ||
97 | { | ||
98 | struct kcore_list *tmp, *pos; | ||
99 | |||
100 | list_for_each_entry_safe(pos, tmp, head, list) { | ||
101 | list_del(&pos->list); | ||
102 | kfree(pos); | ||
103 | } | ||
104 | } | ||
105 | /* | ||
106 | * Replace all KCORE_RAM/KCORE_VMEMMAP information with passed list. | ||
107 | */ | ||
108 | static void __kcore_update_ram(struct list_head *list) | ||
109 | { | ||
110 | int nphdr; | ||
111 | size_t size; | ||
112 | struct kcore_list *tmp, *pos; | ||
113 | LIST_HEAD(garbage); | ||
114 | |||
115 | write_lock(&kclist_lock); | ||
116 | if (kcore_need_update) { | ||
117 | list_for_each_entry_safe(pos, tmp, &kclist_head, list) { | ||
118 | if (pos->type == KCORE_RAM | ||
119 | || pos->type == KCORE_VMEMMAP) | ||
120 | list_move(&pos->list, &garbage); | ||
121 | } | ||
122 | list_splice_tail(list, &kclist_head); | ||
123 | } else | ||
124 | list_splice(list, &garbage); | ||
125 | kcore_need_update = 0; | ||
126 | proc_root_kcore->size = get_kcore_size(&nphdr, &size); | ||
127 | write_unlock(&kclist_lock); | ||
128 | |||
129 | free_kclist_ents(&garbage); | ||
130 | } | ||
131 | |||
132 | |||
133 | #ifdef CONFIG_HIGHMEM | ||
134 | /* | ||
135 | * If no highmem, we can assume [0...max_low_pfn) continuous range of memory | ||
136 | * because memory hole is not as big as !HIGHMEM case. | ||
137 | * (HIGHMEM is special because part of memory is _invisible_ from the kernel.) | ||
138 | */ | ||
139 | static int kcore_update_ram(void) | ||
140 | { | ||
141 | LIST_HEAD(head); | ||
142 | struct kcore_list *ent; | ||
143 | int ret = 0; | ||
144 | |||
145 | ent = kmalloc(sizeof(*ent), GFP_KERNEL); | ||
146 | if (!ent) | ||
147 | return -ENOMEM; | ||
148 | ent->addr = (unsigned long)__va(0); | ||
149 | ent->size = max_low_pfn << PAGE_SHIFT; | ||
150 | ent->type = KCORE_RAM; | ||
151 | list_add(&ent->list, &head); | ||
152 | __kcore_update_ram(&head); | ||
153 | return ret; | ||
154 | } | ||
155 | |||
156 | #else /* !CONFIG_HIGHMEM */ | ||
157 | |||
158 | #ifdef CONFIG_SPARSEMEM_VMEMMAP | ||
159 | /* calculate vmemmap's address from given system ram pfn and register it */ | ||
160 | int get_sparsemem_vmemmap_info(struct kcore_list *ent, struct list_head *head) | ||
161 | { | ||
162 | unsigned long pfn = __pa(ent->addr) >> PAGE_SHIFT; | ||
163 | unsigned long nr_pages = ent->size >> PAGE_SHIFT; | ||
164 | unsigned long start, end; | ||
165 | struct kcore_list *vmm, *tmp; | ||
166 | |||
167 | |||
168 | start = ((unsigned long)pfn_to_page(pfn)) & PAGE_MASK; | ||
169 | end = ((unsigned long)pfn_to_page(pfn + nr_pages)) - 1; | ||
170 | end = ALIGN(end, PAGE_SIZE); | ||
171 | /* overlap check (because we have to align page */ | ||
172 | list_for_each_entry(tmp, head, list) { | ||
173 | if (tmp->type != KCORE_VMEMMAP) | ||
174 | continue; | ||
175 | if (start < tmp->addr + tmp->size) | ||
176 | if (end > tmp->addr) | ||
177 | end = tmp->addr; | ||
178 | } | ||
179 | if (start < end) { | ||
180 | vmm = kmalloc(sizeof(*vmm), GFP_KERNEL); | ||
181 | if (!vmm) | ||
182 | return 0; | ||
183 | vmm->addr = start; | ||
184 | vmm->size = end - start; | ||
185 | vmm->type = KCORE_VMEMMAP; | ||
186 | list_add_tail(&vmm->list, head); | ||
187 | } | ||
188 | return 1; | ||
189 | |||
190 | } | ||
191 | #else | ||
192 | int get_sparsemem_vmemmap_info(struct kcore_list *ent, struct list_head *head) | ||
193 | { | ||
194 | return 1; | ||
195 | } | ||
196 | |||
197 | #endif | ||
198 | |||
199 | static int | ||
200 | kclist_add_private(unsigned long pfn, unsigned long nr_pages, void *arg) | ||
201 | { | ||
202 | struct list_head *head = (struct list_head *)arg; | ||
203 | struct kcore_list *ent; | ||
204 | |||
205 | ent = kmalloc(sizeof(*ent), GFP_KERNEL); | ||
206 | if (!ent) | ||
207 | return -ENOMEM; | ||
208 | ent->addr = (unsigned long)__va((pfn << PAGE_SHIFT)); | ||
209 | ent->size = nr_pages << PAGE_SHIFT; | ||
210 | |||
211 | /* Sanity check: Can happen in 32bit arch...maybe */ | ||
212 | if (ent->addr < (unsigned long) __va(0)) | ||
213 | goto free_out; | ||
214 | |||
215 | /* cut not-mapped area. ....from ppc-32 code. */ | ||
216 | if (ULONG_MAX - ent->addr < ent->size) | ||
217 | ent->size = ULONG_MAX - ent->addr; | ||
218 | |||
219 | /* cut when vmalloc() area is higher than direct-map area */ | ||
220 | if (VMALLOC_START > (unsigned long)__va(0)) { | ||
221 | if (ent->addr > VMALLOC_START) | ||
222 | goto free_out; | ||
223 | if (VMALLOC_START - ent->addr < ent->size) | ||
224 | ent->size = VMALLOC_START - ent->addr; | ||
225 | } | ||
226 | |||
227 | ent->type = KCORE_RAM; | ||
228 | list_add_tail(&ent->list, head); | ||
229 | |||
230 | if (!get_sparsemem_vmemmap_info(ent, head)) { | ||
231 | list_del(&ent->list); | ||
232 | goto free_out; | ||
233 | } | ||
234 | |||
235 | return 0; | ||
236 | free_out: | ||
237 | kfree(ent); | ||
238 | return 1; | ||
239 | } | ||
240 | |||
241 | static int kcore_update_ram(void) | ||
242 | { | ||
243 | int nid, ret; | ||
244 | unsigned long end_pfn; | ||
245 | LIST_HEAD(head); | ||
246 | |||
247 | /* Not inialized....update now */ | ||
248 | /* find out "max pfn" */ | ||
249 | end_pfn = 0; | ||
250 | for_each_node_state(nid, N_HIGH_MEMORY) { | ||
251 | unsigned long node_end; | ||
252 | node_end = NODE_DATA(nid)->node_start_pfn + | ||
253 | NODE_DATA(nid)->node_spanned_pages; | ||
254 | if (end_pfn < node_end) | ||
255 | end_pfn = node_end; | ||
256 | } | ||
257 | /* scan 0 to max_pfn */ | ||
258 | ret = walk_system_ram_range(0, end_pfn, &head, kclist_add_private); | ||
259 | if (ret) { | ||
260 | free_kclist_ents(&head); | ||
261 | return -ENOMEM; | ||
262 | } | ||
263 | __kcore_update_ram(&head); | ||
264 | return ret; | ||
265 | } | ||
266 | #endif /* CONFIG_HIGHMEM */ | ||
100 | 267 | ||
101 | /*****************************************************************************/ | 268 | /*****************************************************************************/ |
102 | /* | 269 | /* |
@@ -192,7 +359,7 @@ static void elf_kcore_store_hdr(char *bufp, int nphdr, int dataoff) | |||
192 | nhdr->p_align = 0; | 359 | nhdr->p_align = 0; |
193 | 360 | ||
194 | /* setup ELF PT_LOAD program header for every area */ | 361 | /* setup ELF PT_LOAD program header for every area */ |
195 | for (m=kclist; m; m=m->next) { | 362 | list_for_each_entry(m, &kclist_head, list) { |
196 | phdr = (struct elf_phdr *) bufp; | 363 | phdr = (struct elf_phdr *) bufp; |
197 | bufp += sizeof(struct elf_phdr); | 364 | bufp += sizeof(struct elf_phdr); |
198 | offset += sizeof(struct elf_phdr); | 365 | offset += sizeof(struct elf_phdr); |
@@ -265,7 +432,8 @@ read_kcore(struct file *file, char __user *buffer, size_t buflen, loff_t *fpos) | |||
265 | unsigned long start; | 432 | unsigned long start; |
266 | 433 | ||
267 | read_lock(&kclist_lock); | 434 | read_lock(&kclist_lock); |
268 | proc_root_kcore->size = size = get_kcore_size(&nphdr, &elf_buflen); | 435 | size = get_kcore_size(&nphdr, &elf_buflen); |
436 | |||
269 | if (buflen == 0 || *fpos >= size) { | 437 | if (buflen == 0 || *fpos >= size) { |
270 | read_unlock(&kclist_lock); | 438 | read_unlock(&kclist_lock); |
271 | return 0; | 439 | return 0; |
@@ -317,7 +485,7 @@ read_kcore(struct file *file, char __user *buffer, size_t buflen, loff_t *fpos) | |||
317 | struct kcore_list *m; | 485 | struct kcore_list *m; |
318 | 486 | ||
319 | read_lock(&kclist_lock); | 487 | read_lock(&kclist_lock); |
320 | for (m=kclist; m; m=m->next) { | 488 | list_for_each_entry(m, &kclist_head, list) { |
321 | if (start >= m->addr && start < (m->addr+m->size)) | 489 | if (start >= m->addr && start < (m->addr+m->size)) |
322 | break; | 490 | break; |
323 | } | 491 | } |
@@ -326,45 +494,14 @@ read_kcore(struct file *file, char __user *buffer, size_t buflen, loff_t *fpos) | |||
326 | if (m == NULL) { | 494 | if (m == NULL) { |
327 | if (clear_user(buffer, tsz)) | 495 | if (clear_user(buffer, tsz)) |
328 | return -EFAULT; | 496 | return -EFAULT; |
329 | } else if (is_vmalloc_addr((void *)start)) { | 497 | } else if (is_vmalloc_or_module_addr((void *)start)) { |
330 | char * elf_buf; | 498 | char * elf_buf; |
331 | struct vm_struct *m; | ||
332 | unsigned long curstart = start; | ||
333 | unsigned long cursize = tsz; | ||
334 | 499 | ||
335 | elf_buf = kzalloc(tsz, GFP_KERNEL); | 500 | elf_buf = kzalloc(tsz, GFP_KERNEL); |
336 | if (!elf_buf) | 501 | if (!elf_buf) |
337 | return -ENOMEM; | 502 | return -ENOMEM; |
338 | 503 | vread(elf_buf, (char *)start, tsz); | |
339 | read_lock(&vmlist_lock); | 504 | /* we have to zero-fill user buffer even if no read */ |
340 | for (m=vmlist; m && cursize; m=m->next) { | ||
341 | unsigned long vmstart; | ||
342 | unsigned long vmsize; | ||
343 | unsigned long msize = m->size - PAGE_SIZE; | ||
344 | |||
345 | if (((unsigned long)m->addr + msize) < | ||
346 | curstart) | ||
347 | continue; | ||
348 | if ((unsigned long)m->addr > (curstart + | ||
349 | cursize)) | ||
350 | break; | ||
351 | vmstart = (curstart < (unsigned long)m->addr ? | ||
352 | (unsigned long)m->addr : curstart); | ||
353 | if (((unsigned long)m->addr + msize) > | ||
354 | (curstart + cursize)) | ||
355 | vmsize = curstart + cursize - vmstart; | ||
356 | else | ||
357 | vmsize = (unsigned long)m->addr + | ||
358 | msize - vmstart; | ||
359 | curstart = vmstart + vmsize; | ||
360 | cursize -= vmsize; | ||
361 | /* don't dump ioremap'd stuff! (TA) */ | ||
362 | if (m->flags & VM_IOREMAP) | ||
363 | continue; | ||
364 | memcpy(elf_buf + (vmstart - start), | ||
365 | (char *)vmstart, vmsize); | ||
366 | } | ||
367 | read_unlock(&vmlist_lock); | ||
368 | if (copy_to_user(buffer, elf_buf, tsz)) { | 505 | if (copy_to_user(buffer, elf_buf, tsz)) { |
369 | kfree(elf_buf); | 506 | kfree(elf_buf); |
370 | return -EFAULT; | 507 | return -EFAULT; |
@@ -402,12 +539,96 @@ read_kcore(struct file *file, char __user *buffer, size_t buflen, loff_t *fpos) | |||
402 | return acc; | 539 | return acc; |
403 | } | 540 | } |
404 | 541 | ||
542 | |||
543 | static int open_kcore(struct inode *inode, struct file *filp) | ||
544 | { | ||
545 | if (!capable(CAP_SYS_RAWIO)) | ||
546 | return -EPERM; | ||
547 | if (kcore_need_update) | ||
548 | kcore_update_ram(); | ||
549 | if (i_size_read(inode) != proc_root_kcore->size) { | ||
550 | mutex_lock(&inode->i_mutex); | ||
551 | i_size_write(inode, proc_root_kcore->size); | ||
552 | mutex_unlock(&inode->i_mutex); | ||
553 | } | ||
554 | return 0; | ||
555 | } | ||
556 | |||
557 | |||
558 | static const struct file_operations proc_kcore_operations = { | ||
559 | .read = read_kcore, | ||
560 | .open = open_kcore, | ||
561 | }; | ||
562 | |||
563 | #ifdef CONFIG_MEMORY_HOTPLUG | ||
564 | /* just remember that we have to update kcore */ | ||
565 | static int __meminit kcore_callback(struct notifier_block *self, | ||
566 | unsigned long action, void *arg) | ||
567 | { | ||
568 | switch (action) { | ||
569 | case MEM_ONLINE: | ||
570 | case MEM_OFFLINE: | ||
571 | write_lock(&kclist_lock); | ||
572 | kcore_need_update = 1; | ||
573 | write_unlock(&kclist_lock); | ||
574 | } | ||
575 | return NOTIFY_OK; | ||
576 | } | ||
577 | #endif | ||
578 | |||
579 | |||
580 | static struct kcore_list kcore_vmalloc; | ||
581 | |||
582 | #ifdef CONFIG_ARCH_PROC_KCORE_TEXT | ||
583 | static struct kcore_list kcore_text; | ||
584 | /* | ||
585 | * If defined, special segment is used for mapping kernel text instead of | ||
586 | * direct-map area. We need to create special TEXT section. | ||
587 | */ | ||
588 | static void __init proc_kcore_text_init(void) | ||
589 | { | ||
590 | kclist_add(&kcore_text, _stext, _end - _stext, KCORE_TEXT); | ||
591 | } | ||
592 | #else | ||
593 | static void __init proc_kcore_text_init(void) | ||
594 | { | ||
595 | } | ||
596 | #endif | ||
597 | |||
598 | #if defined(CONFIG_MODULES) && defined(MODULES_VADDR) | ||
599 | /* | ||
600 | * MODULES_VADDR has no intersection with VMALLOC_ADDR. | ||
601 | */ | ||
602 | struct kcore_list kcore_modules; | ||
603 | static void __init add_modules_range(void) | ||
604 | { | ||
605 | kclist_add(&kcore_modules, (void *)MODULES_VADDR, | ||
606 | MODULES_END - MODULES_VADDR, KCORE_VMALLOC); | ||
607 | } | ||
608 | #else | ||
609 | static void __init add_modules_range(void) | ||
610 | { | ||
611 | } | ||
612 | #endif | ||
613 | |||
405 | static int __init proc_kcore_init(void) | 614 | static int __init proc_kcore_init(void) |
406 | { | 615 | { |
407 | proc_root_kcore = proc_create("kcore", S_IRUSR, NULL, &proc_kcore_operations); | 616 | proc_root_kcore = proc_create("kcore", S_IRUSR, NULL, |
408 | if (proc_root_kcore) | 617 | &proc_kcore_operations); |
409 | proc_root_kcore->size = | 618 | if (!proc_root_kcore) { |
410 | (size_t)high_memory - PAGE_OFFSET + PAGE_SIZE; | 619 | printk(KERN_ERR "couldn't create /proc/kcore\n"); |
620 | return 0; /* Always returns 0. */ | ||
621 | } | ||
622 | /* Store text area if it's special */ | ||
623 | proc_kcore_text_init(); | ||
624 | /* Store vmalloc area */ | ||
625 | kclist_add(&kcore_vmalloc, (void *)VMALLOC_START, | ||
626 | VMALLOC_END - VMALLOC_START, KCORE_VMALLOC); | ||
627 | add_modules_range(); | ||
628 | /* Store direct-map area from physical memory map */ | ||
629 | kcore_update_ram(); | ||
630 | hotplug_memory_notifier(kcore_callback, 0); | ||
631 | |||
411 | return 0; | 632 | return 0; |
412 | } | 633 | } |
413 | module_init(proc_kcore_init); | 634 | module_init(proc_kcore_init); |
diff --git a/fs/proc/meminfo.c b/fs/proc/meminfo.c index 78faedcb0a8d..c7bff4f603ff 100644 --- a/fs/proc/meminfo.c +++ b/fs/proc/meminfo.c | |||
@@ -81,9 +81,11 @@ static int meminfo_proc_show(struct seq_file *m, void *v) | |||
81 | "Writeback: %8lu kB\n" | 81 | "Writeback: %8lu kB\n" |
82 | "AnonPages: %8lu kB\n" | 82 | "AnonPages: %8lu kB\n" |
83 | "Mapped: %8lu kB\n" | 83 | "Mapped: %8lu kB\n" |
84 | "Shmem: %8lu kB\n" | ||
84 | "Slab: %8lu kB\n" | 85 | "Slab: %8lu kB\n" |
85 | "SReclaimable: %8lu kB\n" | 86 | "SReclaimable: %8lu kB\n" |
86 | "SUnreclaim: %8lu kB\n" | 87 | "SUnreclaim: %8lu kB\n" |
88 | "KernelStack: %8lu kB\n" | ||
87 | "PageTables: %8lu kB\n" | 89 | "PageTables: %8lu kB\n" |
88 | #ifdef CONFIG_QUICKLIST | 90 | #ifdef CONFIG_QUICKLIST |
89 | "Quicklists: %8lu kB\n" | 91 | "Quicklists: %8lu kB\n" |
@@ -128,10 +130,12 @@ static int meminfo_proc_show(struct seq_file *m, void *v) | |||
128 | K(global_page_state(NR_WRITEBACK)), | 130 | K(global_page_state(NR_WRITEBACK)), |
129 | K(global_page_state(NR_ANON_PAGES)), | 131 | K(global_page_state(NR_ANON_PAGES)), |
130 | K(global_page_state(NR_FILE_MAPPED)), | 132 | K(global_page_state(NR_FILE_MAPPED)), |
133 | K(global_page_state(NR_SHMEM)), | ||
131 | K(global_page_state(NR_SLAB_RECLAIMABLE) + | 134 | K(global_page_state(NR_SLAB_RECLAIMABLE) + |
132 | global_page_state(NR_SLAB_UNRECLAIMABLE)), | 135 | global_page_state(NR_SLAB_UNRECLAIMABLE)), |
133 | K(global_page_state(NR_SLAB_RECLAIMABLE)), | 136 | K(global_page_state(NR_SLAB_RECLAIMABLE)), |
134 | K(global_page_state(NR_SLAB_UNRECLAIMABLE)), | 137 | K(global_page_state(NR_SLAB_UNRECLAIMABLE)), |
138 | global_page_state(NR_KERNEL_STACK) * THREAD_SIZE / 1024, | ||
135 | K(global_page_state(NR_PAGETABLE)), | 139 | K(global_page_state(NR_PAGETABLE)), |
136 | #ifdef CONFIG_QUICKLIST | 140 | #ifdef CONFIG_QUICKLIST |
137 | K(quicklist_total_size()), | 141 | K(quicklist_total_size()), |
diff --git a/fs/proc/nommu.c b/fs/proc/nommu.c index 7e14d1a04001..9fe7d7ebe115 100644 --- a/fs/proc/nommu.c +++ b/fs/proc/nommu.c | |||
@@ -109,7 +109,7 @@ static void *nommu_region_list_next(struct seq_file *m, void *v, loff_t *pos) | |||
109 | return rb_next((struct rb_node *) v); | 109 | return rb_next((struct rb_node *) v); |
110 | } | 110 | } |
111 | 111 | ||
112 | static struct seq_operations proc_nommu_region_list_seqop = { | 112 | static const struct seq_operations proc_nommu_region_list_seqop = { |
113 | .start = nommu_region_list_start, | 113 | .start = nommu_region_list_start, |
114 | .next = nommu_region_list_next, | 114 | .next = nommu_region_list_next, |
115 | .stop = nommu_region_list_stop, | 115 | .stop = nommu_region_list_stop, |
diff --git a/fs/proc/page.c b/fs/proc/page.c index 2707c6c7a20f..2281c2cbfe2b 100644 --- a/fs/proc/page.c +++ b/fs/proc/page.c | |||
@@ -2,6 +2,7 @@ | |||
2 | #include <linux/compiler.h> | 2 | #include <linux/compiler.h> |
3 | #include <linux/fs.h> | 3 | #include <linux/fs.h> |
4 | #include <linux/init.h> | 4 | #include <linux/init.h> |
5 | #include <linux/ksm.h> | ||
5 | #include <linux/mm.h> | 6 | #include <linux/mm.h> |
6 | #include <linux/mmzone.h> | 7 | #include <linux/mmzone.h> |
7 | #include <linux/proc_fs.h> | 8 | #include <linux/proc_fs.h> |
@@ -95,6 +96,8 @@ static const struct file_operations proc_kpagecount_operations = { | |||
95 | #define KPF_UNEVICTABLE 18 | 96 | #define KPF_UNEVICTABLE 18 |
96 | #define KPF_NOPAGE 20 | 97 | #define KPF_NOPAGE 20 |
97 | 98 | ||
99 | #define KPF_KSM 21 | ||
100 | |||
98 | /* kernel hacking assistances | 101 | /* kernel hacking assistances |
99 | * WARNING: subject to change, never rely on them! | 102 | * WARNING: subject to change, never rely on them! |
100 | */ | 103 | */ |
@@ -137,6 +140,8 @@ static u64 get_uflags(struct page *page) | |||
137 | u |= 1 << KPF_MMAP; | 140 | u |= 1 << KPF_MMAP; |
138 | if (PageAnon(page)) | 141 | if (PageAnon(page)) |
139 | u |= 1 << KPF_ANON; | 142 | u |= 1 << KPF_ANON; |
143 | if (PageKsm(page)) | ||
144 | u |= 1 << KPF_KSM; | ||
140 | 145 | ||
141 | /* | 146 | /* |
142 | * compound pages: export both head/tail info | 147 | * compound pages: export both head/tail info |
diff --git a/fs/proc/proc_sysctl.c b/fs/proc/proc_sysctl.c index 9b1e4e9a16bf..f667e8aeabdf 100644 --- a/fs/proc/proc_sysctl.c +++ b/fs/proc/proc_sysctl.c | |||
@@ -153,7 +153,7 @@ static ssize_t proc_sys_call_handler(struct file *filp, void __user *buf, | |||
153 | 153 | ||
154 | /* careful: calling conventions are nasty here */ | 154 | /* careful: calling conventions are nasty here */ |
155 | res = count; | 155 | res = count; |
156 | error = table->proc_handler(table, write, filp, buf, &res, ppos); | 156 | error = table->proc_handler(table, write, buf, &res, ppos); |
157 | if (!error) | 157 | if (!error) |
158 | error = res; | 158 | error = res; |
159 | out: | 159 | out: |
diff --git a/fs/proc/task_mmu.c b/fs/proc/task_mmu.c index 9bd8be1d235c..2a1bef9203c6 100644 --- a/fs/proc/task_mmu.c +++ b/fs/proc/task_mmu.c | |||
@@ -243,6 +243,25 @@ static void show_map_vma(struct seq_file *m, struct vm_area_struct *vma) | |||
243 | } else if (vma->vm_start <= mm->start_stack && | 243 | } else if (vma->vm_start <= mm->start_stack && |
244 | vma->vm_end >= mm->start_stack) { | 244 | vma->vm_end >= mm->start_stack) { |
245 | name = "[stack]"; | 245 | name = "[stack]"; |
246 | } else { | ||
247 | unsigned long stack_start; | ||
248 | struct proc_maps_private *pmp; | ||
249 | |||
250 | pmp = m->private; | ||
251 | stack_start = pmp->task->stack_start; | ||
252 | |||
253 | if (vma->vm_start <= stack_start && | ||
254 | vma->vm_end >= stack_start) { | ||
255 | pad_len_spaces(m, len); | ||
256 | seq_printf(m, | ||
257 | "[threadstack:%08lx]", | ||
258 | #ifdef CONFIG_STACK_GROWSUP | ||
259 | vma->vm_end - stack_start | ||
260 | #else | ||
261 | stack_start - vma->vm_start | ||
262 | #endif | ||
263 | ); | ||
264 | } | ||
246 | } | 265 | } |
247 | } else { | 266 | } else { |
248 | name = "[vdso]"; | 267 | name = "[vdso]"; |
@@ -465,23 +484,28 @@ static int clear_refs_pte_range(pmd_t *pmd, unsigned long addr, | |||
465 | return 0; | 484 | return 0; |
466 | } | 485 | } |
467 | 486 | ||
487 | #define CLEAR_REFS_ALL 1 | ||
488 | #define CLEAR_REFS_ANON 2 | ||
489 | #define CLEAR_REFS_MAPPED 3 | ||
490 | |||
468 | static ssize_t clear_refs_write(struct file *file, const char __user *buf, | 491 | static ssize_t clear_refs_write(struct file *file, const char __user *buf, |
469 | size_t count, loff_t *ppos) | 492 | size_t count, loff_t *ppos) |
470 | { | 493 | { |
471 | struct task_struct *task; | 494 | struct task_struct *task; |
472 | char buffer[PROC_NUMBUF], *end; | 495 | char buffer[PROC_NUMBUF]; |
473 | struct mm_struct *mm; | 496 | struct mm_struct *mm; |
474 | struct vm_area_struct *vma; | 497 | struct vm_area_struct *vma; |
498 | long type; | ||
475 | 499 | ||
476 | memset(buffer, 0, sizeof(buffer)); | 500 | memset(buffer, 0, sizeof(buffer)); |
477 | if (count > sizeof(buffer) - 1) | 501 | if (count > sizeof(buffer) - 1) |
478 | count = sizeof(buffer) - 1; | 502 | count = sizeof(buffer) - 1; |
479 | if (copy_from_user(buffer, buf, count)) | 503 | if (copy_from_user(buffer, buf, count)) |
480 | return -EFAULT; | 504 | return -EFAULT; |
481 | if (!simple_strtol(buffer, &end, 0)) | 505 | if (strict_strtol(strstrip(buffer), 10, &type)) |
506 | return -EINVAL; | ||
507 | if (type < CLEAR_REFS_ALL || type > CLEAR_REFS_MAPPED) | ||
482 | return -EINVAL; | 508 | return -EINVAL; |
483 | if (*end == '\n') | ||
484 | end++; | ||
485 | task = get_proc_task(file->f_path.dentry->d_inode); | 509 | task = get_proc_task(file->f_path.dentry->d_inode); |
486 | if (!task) | 510 | if (!task) |
487 | return -ESRCH; | 511 | return -ESRCH; |
@@ -494,18 +518,31 @@ static ssize_t clear_refs_write(struct file *file, const char __user *buf, | |||
494 | down_read(&mm->mmap_sem); | 518 | down_read(&mm->mmap_sem); |
495 | for (vma = mm->mmap; vma; vma = vma->vm_next) { | 519 | for (vma = mm->mmap; vma; vma = vma->vm_next) { |
496 | clear_refs_walk.private = vma; | 520 | clear_refs_walk.private = vma; |
497 | if (!is_vm_hugetlb_page(vma)) | 521 | if (is_vm_hugetlb_page(vma)) |
498 | walk_page_range(vma->vm_start, vma->vm_end, | 522 | continue; |
499 | &clear_refs_walk); | 523 | /* |
524 | * Writing 1 to /proc/pid/clear_refs affects all pages. | ||
525 | * | ||
526 | * Writing 2 to /proc/pid/clear_refs only affects | ||
527 | * Anonymous pages. | ||
528 | * | ||
529 | * Writing 3 to /proc/pid/clear_refs only affects file | ||
530 | * mapped pages. | ||
531 | */ | ||
532 | if (type == CLEAR_REFS_ANON && vma->vm_file) | ||
533 | continue; | ||
534 | if (type == CLEAR_REFS_MAPPED && !vma->vm_file) | ||
535 | continue; | ||
536 | walk_page_range(vma->vm_start, vma->vm_end, | ||
537 | &clear_refs_walk); | ||
500 | } | 538 | } |
501 | flush_tlb_mm(mm); | 539 | flush_tlb_mm(mm); |
502 | up_read(&mm->mmap_sem); | 540 | up_read(&mm->mmap_sem); |
503 | mmput(mm); | 541 | mmput(mm); |
504 | } | 542 | } |
505 | put_task_struct(task); | 543 | put_task_struct(task); |
506 | if (end - buffer == 0) | 544 | |
507 | return -EIO; | 545 | return count; |
508 | return end - buffer; | ||
509 | } | 546 | } |
510 | 547 | ||
511 | const struct file_operations proc_clear_refs_operations = { | 548 | const struct file_operations proc_clear_refs_operations = { |
diff --git a/fs/qnx4/Kconfig b/fs/qnx4/Kconfig index be8e0e1445b6..5f6089994042 100644 --- a/fs/qnx4/Kconfig +++ b/fs/qnx4/Kconfig | |||
@@ -6,20 +6,9 @@ config QNX4FS_FS | |||
6 | QNX 4 and QNX 6 (the latter is also called QNX RTP). | 6 | QNX 4 and QNX 6 (the latter is also called QNX RTP). |
7 | Further information is available at <http://www.qnx.com/>. | 7 | Further information is available at <http://www.qnx.com/>. |
8 | Say Y if you intend to mount QNX hard disks or floppies. | 8 | Say Y if you intend to mount QNX hard disks or floppies. |
9 | Unless you say Y to "QNX4FS read-write support" below, you will | ||
10 | only be able to read these file systems. | ||
11 | 9 | ||
12 | To compile this file system support as a module, choose M here: the | 10 | To compile this file system support as a module, choose M here: the |
13 | module will be called qnx4. | 11 | module will be called qnx4. |
14 | 12 | ||
15 | If you don't know whether you need it, then you don't need it: | 13 | If you don't know whether you need it, then you don't need it: |
16 | answer N. | 14 | answer N. |
17 | |||
18 | config QNX4FS_RW | ||
19 | bool "QNX4FS write support (DANGEROUS)" | ||
20 | depends on QNX4FS_FS && EXPERIMENTAL && BROKEN | ||
21 | help | ||
22 | Say Y if you want to test write support for QNX4 file systems. | ||
23 | |||
24 | It's currently broken, so for now: | ||
25 | answer N. | ||
diff --git a/fs/qnx4/Makefile b/fs/qnx4/Makefile index e4d408cc5473..4a283b3f87f8 100644 --- a/fs/qnx4/Makefile +++ b/fs/qnx4/Makefile | |||
@@ -4,4 +4,4 @@ | |||
4 | 4 | ||
5 | obj-$(CONFIG_QNX4FS_FS) += qnx4.o | 5 | obj-$(CONFIG_QNX4FS_FS) += qnx4.o |
6 | 6 | ||
7 | qnx4-objs := inode.o dir.o namei.o file.o bitmap.o truncate.o | 7 | qnx4-objs := inode.o dir.o namei.o bitmap.o |
diff --git a/fs/qnx4/bitmap.c b/fs/qnx4/bitmap.c index e1cd061a25f7..0afba069d567 100644 --- a/fs/qnx4/bitmap.c +++ b/fs/qnx4/bitmap.c | |||
@@ -78,84 +78,3 @@ unsigned long qnx4_count_free_blocks(struct super_block *sb) | |||
78 | 78 | ||
79 | return total_free; | 79 | return total_free; |
80 | } | 80 | } |
81 | |||
82 | #ifdef CONFIG_QNX4FS_RW | ||
83 | |||
84 | int qnx4_is_free(struct super_block *sb, long block) | ||
85 | { | ||
86 | int start = le32_to_cpu(qnx4_sb(sb)->BitMap->di_first_xtnt.xtnt_blk) - 1; | ||
87 | int size = le32_to_cpu(qnx4_sb(sb)->BitMap->di_size); | ||
88 | struct buffer_head *bh; | ||
89 | const char *g; | ||
90 | int ret = -EIO; | ||
91 | |||
92 | start += block / (QNX4_BLOCK_SIZE * 8); | ||
93 | QNX4DEBUG(("qnx4: is_free requesting block [%lu], bitmap in block [%lu]\n", | ||
94 | (unsigned long) block, (unsigned long) start)); | ||
95 | (void) size; /* CHECKME */ | ||
96 | bh = sb_bread(sb, start); | ||
97 | if (bh == NULL) { | ||
98 | return -EIO; | ||
99 | } | ||
100 | g = bh->b_data + (block % QNX4_BLOCK_SIZE); | ||
101 | if (((*g) & (1 << (block % 8))) == 0) { | ||
102 | QNX4DEBUG(("qnx4: is_free -> block is free\n")); | ||
103 | ret = 1; | ||
104 | } else { | ||
105 | QNX4DEBUG(("qnx4: is_free -> block is busy\n")); | ||
106 | ret = 0; | ||
107 | } | ||
108 | brelse(bh); | ||
109 | |||
110 | return ret; | ||
111 | } | ||
112 | |||
113 | int qnx4_set_bitmap(struct super_block *sb, long block, int busy) | ||
114 | { | ||
115 | int start = le32_to_cpu(qnx4_sb(sb)->BitMap->di_first_xtnt.xtnt_blk) - 1; | ||
116 | int size = le32_to_cpu(qnx4_sb(sb)->BitMap->di_size); | ||
117 | struct buffer_head *bh; | ||
118 | char *g; | ||
119 | |||
120 | start += block / (QNX4_BLOCK_SIZE * 8); | ||
121 | QNX4DEBUG(("qnx4: set_bitmap requesting block [%lu], bitmap in block [%lu]\n", | ||
122 | (unsigned long) block, (unsigned long) start)); | ||
123 | (void) size; /* CHECKME */ | ||
124 | bh = sb_bread(sb, start); | ||
125 | if (bh == NULL) { | ||
126 | return -EIO; | ||
127 | } | ||
128 | g = bh->b_data + (block % QNX4_BLOCK_SIZE); | ||
129 | if (busy == 0) { | ||
130 | (*g) &= ~(1 << (block % 8)); | ||
131 | } else { | ||
132 | (*g) |= (1 << (block % 8)); | ||
133 | } | ||
134 | mark_buffer_dirty(bh); | ||
135 | brelse(bh); | ||
136 | |||
137 | return 0; | ||
138 | } | ||
139 | |||
140 | static void qnx4_clear_inode(struct inode *inode) | ||
141 | { | ||
142 | struct qnx4_inode_entry *qnx4_ino = qnx4_raw_inode(inode); | ||
143 | /* What for? */ | ||
144 | memset(qnx4_ino->di_fname, 0, sizeof qnx4_ino->di_fname); | ||
145 | qnx4_ino->di_size = 0; | ||
146 | qnx4_ino->di_num_xtnts = 0; | ||
147 | qnx4_ino->di_mode = 0; | ||
148 | qnx4_ino->di_status = 0; | ||
149 | } | ||
150 | |||
151 | void qnx4_free_inode(struct inode *inode) | ||
152 | { | ||
153 | if (inode->i_ino < 1) { | ||
154 | printk("free_inode: inode 0 or nonexistent inode\n"); | ||
155 | return; | ||
156 | } | ||
157 | qnx4_clear_inode(inode); | ||
158 | clear_inode(inode); | ||
159 | } | ||
160 | |||
161 | #endif | ||
diff --git a/fs/qnx4/dir.c b/fs/qnx4/dir.c index 003c68f3238b..86cc39cb1398 100644 --- a/fs/qnx4/dir.c +++ b/fs/qnx4/dir.c | |||
@@ -85,9 +85,4 @@ const struct file_operations qnx4_dir_operations = | |||
85 | const struct inode_operations qnx4_dir_inode_operations = | 85 | const struct inode_operations qnx4_dir_inode_operations = |
86 | { | 86 | { |
87 | .lookup = qnx4_lookup, | 87 | .lookup = qnx4_lookup, |
88 | #ifdef CONFIG_QNX4FS_RW | ||
89 | .create = qnx4_create, | ||
90 | .unlink = qnx4_unlink, | ||
91 | .rmdir = qnx4_rmdir, | ||
92 | #endif | ||
93 | }; | 88 | }; |
diff --git a/fs/qnx4/file.c b/fs/qnx4/file.c deleted file mode 100644 index 09b170ac936c..000000000000 --- a/fs/qnx4/file.c +++ /dev/null | |||
@@ -1,40 +0,0 @@ | |||
1 | /* | ||
2 | * QNX4 file system, Linux implementation. | ||
3 | * | ||
4 | * Version : 0.2.1 | ||
5 | * | ||
6 | * Using parts of the xiafs filesystem. | ||
7 | * | ||
8 | * History : | ||
9 | * | ||
10 | * 25-05-1998 by Richard Frowijn : first release. | ||
11 | * 21-06-1998 by Frank Denis : wrote qnx4_readpage to use generic_file_read. | ||
12 | * 27-06-1998 by Frank Denis : file overwriting. | ||
13 | */ | ||
14 | |||
15 | #include "qnx4.h" | ||
16 | |||
17 | /* | ||
18 | * We have mostly NULL's here: the current defaults are ok for | ||
19 | * the qnx4 filesystem. | ||
20 | */ | ||
21 | const struct file_operations qnx4_file_operations = | ||
22 | { | ||
23 | .llseek = generic_file_llseek, | ||
24 | .read = do_sync_read, | ||
25 | .aio_read = generic_file_aio_read, | ||
26 | .mmap = generic_file_mmap, | ||
27 | .splice_read = generic_file_splice_read, | ||
28 | #ifdef CONFIG_QNX4FS_RW | ||
29 | .write = do_sync_write, | ||
30 | .aio_write = generic_file_aio_write, | ||
31 | .fsync = simple_fsync, | ||
32 | #endif | ||
33 | }; | ||
34 | |||
35 | const struct inode_operations qnx4_file_inode_operations = | ||
36 | { | ||
37 | #ifdef CONFIG_QNX4FS_RW | ||
38 | .truncate = qnx4_truncate, | ||
39 | #endif | ||
40 | }; | ||
diff --git a/fs/qnx4/inode.c b/fs/qnx4/inode.c index 681df5fcd161..d2cd1798d8c4 100644 --- a/fs/qnx4/inode.c +++ b/fs/qnx4/inode.c | |||
@@ -28,73 +28,6 @@ | |||
28 | 28 | ||
29 | static const struct super_operations qnx4_sops; | 29 | static const struct super_operations qnx4_sops; |
30 | 30 | ||
31 | #ifdef CONFIG_QNX4FS_RW | ||
32 | |||
33 | static void qnx4_delete_inode(struct inode *inode) | ||
34 | { | ||
35 | QNX4DEBUG(("qnx4: deleting inode [%lu]\n", (unsigned long) inode->i_ino)); | ||
36 | truncate_inode_pages(&inode->i_data, 0); | ||
37 | inode->i_size = 0; | ||
38 | qnx4_truncate(inode); | ||
39 | lock_kernel(); | ||
40 | qnx4_free_inode(inode); | ||
41 | unlock_kernel(); | ||
42 | } | ||
43 | |||
44 | static int qnx4_write_inode(struct inode *inode, int do_sync) | ||
45 | { | ||
46 | struct qnx4_inode_entry *raw_inode; | ||
47 | int block, ino; | ||
48 | struct buffer_head *bh; | ||
49 | ino = inode->i_ino; | ||
50 | |||
51 | QNX4DEBUG(("qnx4: write inode 1.\n")); | ||
52 | if (inode->i_nlink == 0) { | ||
53 | return 0; | ||
54 | } | ||
55 | if (!ino) { | ||
56 | printk("qnx4: bad inode number on dev %s: %d is out of range\n", | ||
57 | inode->i_sb->s_id, ino); | ||
58 | return -EIO; | ||
59 | } | ||
60 | QNX4DEBUG(("qnx4: write inode 2.\n")); | ||
61 | block = ino / QNX4_INODES_PER_BLOCK; | ||
62 | lock_kernel(); | ||
63 | if (!(bh = sb_bread(inode->i_sb, block))) { | ||
64 | printk("qnx4: major problem: unable to read inode from dev " | ||
65 | "%s\n", inode->i_sb->s_id); | ||
66 | unlock_kernel(); | ||
67 | return -EIO; | ||
68 | } | ||
69 | raw_inode = ((struct qnx4_inode_entry *) bh->b_data) + | ||
70 | (ino % QNX4_INODES_PER_BLOCK); | ||
71 | raw_inode->di_mode = cpu_to_le16(inode->i_mode); | ||
72 | raw_inode->di_uid = cpu_to_le16(fs_high2lowuid(inode->i_uid)); | ||
73 | raw_inode->di_gid = cpu_to_le16(fs_high2lowgid(inode->i_gid)); | ||
74 | raw_inode->di_nlink = cpu_to_le16(inode->i_nlink); | ||
75 | raw_inode->di_size = cpu_to_le32(inode->i_size); | ||
76 | raw_inode->di_mtime = cpu_to_le32(inode->i_mtime.tv_sec); | ||
77 | raw_inode->di_atime = cpu_to_le32(inode->i_atime.tv_sec); | ||
78 | raw_inode->di_ctime = cpu_to_le32(inode->i_ctime.tv_sec); | ||
79 | raw_inode->di_first_xtnt.xtnt_size = cpu_to_le32(inode->i_blocks); | ||
80 | mark_buffer_dirty(bh); | ||
81 | if (do_sync) { | ||
82 | sync_dirty_buffer(bh); | ||
83 | if (buffer_req(bh) && !buffer_uptodate(bh)) { | ||
84 | printk("qnx4: IO error syncing inode [%s:%08x]\n", | ||
85 | inode->i_sb->s_id, ino); | ||
86 | brelse(bh); | ||
87 | unlock_kernel(); | ||
88 | return -EIO; | ||
89 | } | ||
90 | } | ||
91 | brelse(bh); | ||
92 | unlock_kernel(); | ||
93 | return 0; | ||
94 | } | ||
95 | |||
96 | #endif | ||
97 | |||
98 | static void qnx4_put_super(struct super_block *sb); | 31 | static void qnx4_put_super(struct super_block *sb); |
99 | static struct inode *qnx4_alloc_inode(struct super_block *sb); | 32 | static struct inode *qnx4_alloc_inode(struct super_block *sb); |
100 | static void qnx4_destroy_inode(struct inode *inode); | 33 | static void qnx4_destroy_inode(struct inode *inode); |
@@ -108,10 +41,6 @@ static const struct super_operations qnx4_sops = | |||
108 | .put_super = qnx4_put_super, | 41 | .put_super = qnx4_put_super, |
109 | .statfs = qnx4_statfs, | 42 | .statfs = qnx4_statfs, |
110 | .remount_fs = qnx4_remount, | 43 | .remount_fs = qnx4_remount, |
111 | #ifdef CONFIG_QNX4FS_RW | ||
112 | .write_inode = qnx4_write_inode, | ||
113 | .delete_inode = qnx4_delete_inode, | ||
114 | #endif | ||
115 | }; | 44 | }; |
116 | 45 | ||
117 | static int qnx4_remount(struct super_block *sb, int *flags, char *data) | 46 | static int qnx4_remount(struct super_block *sb, int *flags, char *data) |
@@ -120,15 +49,7 @@ static int qnx4_remount(struct super_block *sb, int *flags, char *data) | |||
120 | 49 | ||
121 | qs = qnx4_sb(sb); | 50 | qs = qnx4_sb(sb); |
122 | qs->Version = QNX4_VERSION; | 51 | qs->Version = QNX4_VERSION; |
123 | #ifndef CONFIG_QNX4FS_RW | ||
124 | *flags |= MS_RDONLY; | 52 | *flags |= MS_RDONLY; |
125 | #endif | ||
126 | if (*flags & MS_RDONLY) { | ||
127 | return 0; | ||
128 | } | ||
129 | |||
130 | mark_buffer_dirty(qs->sb_buf); | ||
131 | |||
132 | return 0; | 53 | return 0; |
133 | } | 54 | } |
134 | 55 | ||
@@ -354,9 +275,7 @@ static int qnx4_fill_super(struct super_block *s, void *data, int silent) | |||
354 | } | 275 | } |
355 | s->s_op = &qnx4_sops; | 276 | s->s_op = &qnx4_sops; |
356 | s->s_magic = QNX4_SUPER_MAGIC; | 277 | s->s_magic = QNX4_SUPER_MAGIC; |
357 | #ifndef CONFIG_QNX4FS_RW | ||
358 | s->s_flags |= MS_RDONLY; /* Yup, read-only yet */ | 278 | s->s_flags |= MS_RDONLY; /* Yup, read-only yet */ |
359 | #endif | ||
360 | qnx4_sb(s)->sb_buf = bh; | 279 | qnx4_sb(s)->sb_buf = bh; |
361 | qnx4_sb(s)->sb = (struct qnx4_super_block *) bh->b_data; | 280 | qnx4_sb(s)->sb = (struct qnx4_super_block *) bh->b_data; |
362 | 281 | ||
@@ -489,8 +408,7 @@ struct inode *qnx4_iget(struct super_block *sb, unsigned long ino) | |||
489 | 408 | ||
490 | memcpy(qnx4_inode, raw_inode, QNX4_DIR_ENTRY_SIZE); | 409 | memcpy(qnx4_inode, raw_inode, QNX4_DIR_ENTRY_SIZE); |
491 | if (S_ISREG(inode->i_mode)) { | 410 | if (S_ISREG(inode->i_mode)) { |
492 | inode->i_op = &qnx4_file_inode_operations; | 411 | inode->i_fop = &generic_ro_fops; |
493 | inode->i_fop = &qnx4_file_operations; | ||
494 | inode->i_mapping->a_ops = &qnx4_aops; | 412 | inode->i_mapping->a_ops = &qnx4_aops; |
495 | qnx4_i(inode)->mmu_private = inode->i_size; | 413 | qnx4_i(inode)->mmu_private = inode->i_size; |
496 | } else if (S_ISDIR(inode->i_mode)) { | 414 | } else if (S_ISDIR(inode->i_mode)) { |
diff --git a/fs/qnx4/namei.c b/fs/qnx4/namei.c index 5972ed214937..ae1e7edbacd6 100644 --- a/fs/qnx4/namei.c +++ b/fs/qnx4/namei.c | |||
@@ -134,108 +134,3 @@ out: | |||
134 | 134 | ||
135 | return NULL; | 135 | return NULL; |
136 | } | 136 | } |
137 | |||
138 | #ifdef CONFIG_QNX4FS_RW | ||
139 | int qnx4_create(struct inode *dir, struct dentry *dentry, int mode, | ||
140 | struct nameidata *nd) | ||
141 | { | ||
142 | QNX4DEBUG(("qnx4: qnx4_create\n")); | ||
143 | if (dir == NULL) { | ||
144 | return -ENOENT; | ||
145 | } | ||
146 | return -ENOSPC; | ||
147 | } | ||
148 | |||
149 | int qnx4_rmdir(struct inode *dir, struct dentry *dentry) | ||
150 | { | ||
151 | struct buffer_head *bh; | ||
152 | struct qnx4_inode_entry *de; | ||
153 | struct inode *inode; | ||
154 | int retval; | ||
155 | int ino; | ||
156 | |||
157 | QNX4DEBUG(("qnx4: qnx4_rmdir [%s]\n", dentry->d_name.name)); | ||
158 | lock_kernel(); | ||
159 | bh = qnx4_find_entry(dentry->d_name.len, dir, dentry->d_name.name, | ||
160 | &de, &ino); | ||
161 | if (bh == NULL) { | ||
162 | unlock_kernel(); | ||
163 | return -ENOENT; | ||
164 | } | ||
165 | inode = dentry->d_inode; | ||
166 | if (inode->i_ino != ino) { | ||
167 | retval = -EIO; | ||
168 | goto end_rmdir; | ||
169 | } | ||
170 | #if 0 | ||
171 | if (!empty_dir(inode)) { | ||
172 | retval = -ENOTEMPTY; | ||
173 | goto end_rmdir; | ||
174 | } | ||
175 | #endif | ||
176 | if (inode->i_nlink != 2) { | ||
177 | QNX4DEBUG(("empty directory has nlink!=2 (%d)\n", inode->i_nlink)); | ||
178 | } | ||
179 | QNX4DEBUG(("qnx4: deleting directory\n")); | ||
180 | de->di_status = 0; | ||
181 | memset(de->di_fname, 0, sizeof de->di_fname); | ||
182 | de->di_mode = 0; | ||
183 | mark_buffer_dirty_inode(bh, dir); | ||
184 | clear_nlink(inode); | ||
185 | mark_inode_dirty(inode); | ||
186 | inode->i_ctime = dir->i_ctime = dir->i_mtime = CURRENT_TIME_SEC; | ||
187 | inode_dec_link_count(dir); | ||
188 | retval = 0; | ||
189 | |||
190 | end_rmdir: | ||
191 | brelse(bh); | ||
192 | |||
193 | unlock_kernel(); | ||
194 | return retval; | ||
195 | } | ||
196 | |||
197 | int qnx4_unlink(struct inode *dir, struct dentry *dentry) | ||
198 | { | ||
199 | struct buffer_head *bh; | ||
200 | struct qnx4_inode_entry *de; | ||
201 | struct inode *inode; | ||
202 | int retval; | ||
203 | int ino; | ||
204 | |||
205 | QNX4DEBUG(("qnx4: qnx4_unlink [%s]\n", dentry->d_name.name)); | ||
206 | lock_kernel(); | ||
207 | bh = qnx4_find_entry(dentry->d_name.len, dir, dentry->d_name.name, | ||
208 | &de, &ino); | ||
209 | if (bh == NULL) { | ||
210 | unlock_kernel(); | ||
211 | return -ENOENT; | ||
212 | } | ||
213 | inode = dentry->d_inode; | ||
214 | if (inode->i_ino != ino) { | ||
215 | retval = -EIO; | ||
216 | goto end_unlink; | ||
217 | } | ||
218 | retval = -EPERM; | ||
219 | if (!inode->i_nlink) { | ||
220 | QNX4DEBUG(("Deleting nonexistent file (%s:%lu), %d\n", | ||
221 | inode->i_sb->s_id, | ||
222 | inode->i_ino, inode->i_nlink)); | ||
223 | inode->i_nlink = 1; | ||
224 | } | ||
225 | de->di_status = 0; | ||
226 | memset(de->di_fname, 0, sizeof de->di_fname); | ||
227 | de->di_mode = 0; | ||
228 | mark_buffer_dirty_inode(bh, dir); | ||
229 | dir->i_ctime = dir->i_mtime = CURRENT_TIME_SEC; | ||
230 | mark_inode_dirty(dir); | ||
231 | inode->i_ctime = dir->i_ctime; | ||
232 | inode_dec_link_count(inode); | ||
233 | retval = 0; | ||
234 | |||
235 | end_unlink: | ||
236 | unlock_kernel(); | ||
237 | brelse(bh); | ||
238 | |||
239 | return retval; | ||
240 | } | ||
241 | #endif | ||
diff --git a/fs/qnx4/qnx4.h b/fs/qnx4/qnx4.h index 9efc089454f6..33a60858203b 100644 --- a/fs/qnx4/qnx4.h +++ b/fs/qnx4/qnx4.h | |||
@@ -29,17 +29,9 @@ extern unsigned long qnx4_block_map(struct inode *inode, long iblock); | |||
29 | 29 | ||
30 | extern struct buffer_head *qnx4_bread(struct inode *, int, int); | 30 | extern struct buffer_head *qnx4_bread(struct inode *, int, int); |
31 | 31 | ||
32 | extern const struct inode_operations qnx4_file_inode_operations; | ||
33 | extern const struct inode_operations qnx4_dir_inode_operations; | 32 | extern const struct inode_operations qnx4_dir_inode_operations; |
34 | extern const struct file_operations qnx4_file_operations; | ||
35 | extern const struct file_operations qnx4_dir_operations; | 33 | extern const struct file_operations qnx4_dir_operations; |
36 | extern int qnx4_is_free(struct super_block *sb, long block); | 34 | extern int qnx4_is_free(struct super_block *sb, long block); |
37 | extern int qnx4_set_bitmap(struct super_block *sb, long block, int busy); | ||
38 | extern int qnx4_create(struct inode *inode, struct dentry *dentry, int mode, struct nameidata *nd); | ||
39 | extern void qnx4_truncate(struct inode *inode); | ||
40 | extern void qnx4_free_inode(struct inode *inode); | ||
41 | extern int qnx4_unlink(struct inode *dir, struct dentry *dentry); | ||
42 | extern int qnx4_rmdir(struct inode *dir, struct dentry *dentry); | ||
43 | 35 | ||
44 | static inline struct qnx4_sb_info *qnx4_sb(struct super_block *sb) | 36 | static inline struct qnx4_sb_info *qnx4_sb(struct super_block *sb) |
45 | { | 37 | { |
diff --git a/fs/qnx4/truncate.c b/fs/qnx4/truncate.c deleted file mode 100644 index d94d9ee241fe..000000000000 --- a/fs/qnx4/truncate.c +++ /dev/null | |||
@@ -1,34 +0,0 @@ | |||
1 | /* | ||
2 | * QNX4 file system, Linux implementation. | ||
3 | * | ||
4 | * Version : 0.1 | ||
5 | * | ||
6 | * Using parts of the xiafs filesystem. | ||
7 | * | ||
8 | * History : | ||
9 | * | ||
10 | * 30-06-1998 by Frank DENIS : ugly filler. | ||
11 | */ | ||
12 | |||
13 | #include <linux/smp_lock.h> | ||
14 | #include "qnx4.h" | ||
15 | |||
16 | #ifdef CONFIG_QNX4FS_RW | ||
17 | |||
18 | void qnx4_truncate(struct inode *inode) | ||
19 | { | ||
20 | if (!(S_ISREG(inode->i_mode) || S_ISDIR(inode->i_mode) || | ||
21 | S_ISLNK(inode->i_mode))) { | ||
22 | return; | ||
23 | } | ||
24 | lock_kernel(); | ||
25 | if (!(S_ISDIR(inode->i_mode))) { | ||
26 | /* TODO */ | ||
27 | } | ||
28 | QNX4DEBUG(("qnx4: qnx4_truncate called\n")); | ||
29 | inode->i_mtime = inode->i_ctime = CURRENT_TIME_SEC; | ||
30 | mark_inode_dirty(inode); | ||
31 | unlock_kernel(); | ||
32 | } | ||
33 | |||
34 | #endif | ||
diff --git a/fs/quota/dquot.c b/fs/quota/dquot.c index 38f7bd559f35..39b49c42a7ed 100644 --- a/fs/quota/dquot.c +++ b/fs/quota/dquot.c | |||
@@ -1839,7 +1839,7 @@ EXPORT_SYMBOL(dquot_commit_info); | |||
1839 | /* | 1839 | /* |
1840 | * Definitions of diskquota operations. | 1840 | * Definitions of diskquota operations. |
1841 | */ | 1841 | */ |
1842 | struct dquot_operations dquot_operations = { | 1842 | const struct dquot_operations dquot_operations = { |
1843 | .initialize = dquot_initialize, | 1843 | .initialize = dquot_initialize, |
1844 | .drop = dquot_drop, | 1844 | .drop = dquot_drop, |
1845 | .alloc_space = dquot_alloc_space, | 1845 | .alloc_space = dquot_alloc_space, |
@@ -2461,7 +2461,7 @@ out: | |||
2461 | } | 2461 | } |
2462 | EXPORT_SYMBOL(vfs_set_dqinfo); | 2462 | EXPORT_SYMBOL(vfs_set_dqinfo); |
2463 | 2463 | ||
2464 | struct quotactl_ops vfs_quotactl_ops = { | 2464 | const struct quotactl_ops vfs_quotactl_ops = { |
2465 | .quota_on = vfs_quota_on, | 2465 | .quota_on = vfs_quota_on, |
2466 | .quota_off = vfs_quota_off, | 2466 | .quota_off = vfs_quota_off, |
2467 | .quota_sync = vfs_quota_sync, | 2467 | .quota_sync = vfs_quota_sync, |
diff --git a/fs/ramfs/inode.c b/fs/ramfs/inode.c index a7f0110fca4c..a6090aa1a7c1 100644 --- a/fs/ramfs/inode.c +++ b/fs/ramfs/inode.c | |||
@@ -34,12 +34,10 @@ | |||
34 | #include <linux/ramfs.h> | 34 | #include <linux/ramfs.h> |
35 | #include <linux/sched.h> | 35 | #include <linux/sched.h> |
36 | #include <linux/parser.h> | 36 | #include <linux/parser.h> |
37 | #include <linux/magic.h> | ||
37 | #include <asm/uaccess.h> | 38 | #include <asm/uaccess.h> |
38 | #include "internal.h" | 39 | #include "internal.h" |
39 | 40 | ||
40 | /* some random number */ | ||
41 | #define RAMFS_MAGIC 0x858458f6 | ||
42 | |||
43 | #define RAMFS_DEFAULT_MODE 0755 | 41 | #define RAMFS_DEFAULT_MODE 0755 |
44 | 42 | ||
45 | static const struct super_operations ramfs_ops; | 43 | static const struct super_operations ramfs_ops; |
diff --git a/fs/reiserfs/super.c b/fs/reiserfs/super.c index 7adea74d6a8a..f0ad05f38022 100644 --- a/fs/reiserfs/super.c +++ b/fs/reiserfs/super.c | |||
@@ -612,7 +612,7 @@ static int reiserfs_mark_dquot_dirty(struct dquot *); | |||
612 | static int reiserfs_write_info(struct super_block *, int); | 612 | static int reiserfs_write_info(struct super_block *, int); |
613 | static int reiserfs_quota_on(struct super_block *, int, int, char *, int); | 613 | static int reiserfs_quota_on(struct super_block *, int, int, char *, int); |
614 | 614 | ||
615 | static struct dquot_operations reiserfs_quota_operations = { | 615 | static const struct dquot_operations reiserfs_quota_operations = { |
616 | .initialize = dquot_initialize, | 616 | .initialize = dquot_initialize, |
617 | .drop = dquot_drop, | 617 | .drop = dquot_drop, |
618 | .alloc_space = dquot_alloc_space, | 618 | .alloc_space = dquot_alloc_space, |
@@ -629,7 +629,7 @@ static struct dquot_operations reiserfs_quota_operations = { | |||
629 | .destroy_dquot = dquot_destroy, | 629 | .destroy_dquot = dquot_destroy, |
630 | }; | 630 | }; |
631 | 631 | ||
632 | static struct quotactl_ops reiserfs_qctl_operations = { | 632 | static const struct quotactl_ops reiserfs_qctl_operations = { |
633 | .quota_on = reiserfs_quota_on, | 633 | .quota_on = reiserfs_quota_on, |
634 | .quota_off = vfs_quota_off, | 634 | .quota_off = vfs_quota_off, |
635 | .quota_sync = vfs_quota_sync, | 635 | .quota_sync = vfs_quota_sync, |
diff --git a/fs/romfs/super.c b/fs/romfs/super.c index 4ab3c03d8f95..c117fa80d1e9 100644 --- a/fs/romfs/super.c +++ b/fs/romfs/super.c | |||
@@ -284,7 +284,7 @@ static const struct file_operations romfs_dir_operations = { | |||
284 | .readdir = romfs_readdir, | 284 | .readdir = romfs_readdir, |
285 | }; | 285 | }; |
286 | 286 | ||
287 | static struct inode_operations romfs_dir_inode_operations = { | 287 | static const struct inode_operations romfs_dir_inode_operations = { |
288 | .lookup = romfs_lookup, | 288 | .lookup = romfs_lookup, |
289 | }; | 289 | }; |
290 | 290 | ||
@@ -528,7 +528,7 @@ static int romfs_fill_super(struct super_block *sb, void *data, int silent) | |||
528 | pos = (ROMFH_SIZE + len + 1 + ROMFH_PAD) & ROMFH_MASK; | 528 | pos = (ROMFH_SIZE + len + 1 + ROMFH_PAD) & ROMFH_MASK; |
529 | 529 | ||
530 | root = romfs_iget(sb, pos); | 530 | root = romfs_iget(sb, pos); |
531 | if (!root) | 531 | if (IS_ERR(root)) |
532 | goto error; | 532 | goto error; |
533 | 533 | ||
534 | sb->s_root = d_alloc_root(root); | 534 | sb->s_root = d_alloc_root(root); |
diff --git a/fs/select.c b/fs/select.c index 8084834e123e..a201fc370223 100644 --- a/fs/select.c +++ b/fs/select.c | |||
@@ -41,22 +41,28 @@ | |||
41 | * better solutions.. | 41 | * better solutions.. |
42 | */ | 42 | */ |
43 | 43 | ||
44 | #define MAX_SLACK (100 * NSEC_PER_MSEC) | ||
45 | |||
44 | static long __estimate_accuracy(struct timespec *tv) | 46 | static long __estimate_accuracy(struct timespec *tv) |
45 | { | 47 | { |
46 | long slack; | 48 | long slack; |
47 | int divfactor = 1000; | 49 | int divfactor = 1000; |
48 | 50 | ||
51 | if (tv->tv_sec < 0) | ||
52 | return 0; | ||
53 | |||
49 | if (task_nice(current) > 0) | 54 | if (task_nice(current) > 0) |
50 | divfactor = divfactor / 5; | 55 | divfactor = divfactor / 5; |
51 | 56 | ||
57 | if (tv->tv_sec > MAX_SLACK / (NSEC_PER_SEC/divfactor)) | ||
58 | return MAX_SLACK; | ||
59 | |||
52 | slack = tv->tv_nsec / divfactor; | 60 | slack = tv->tv_nsec / divfactor; |
53 | slack += tv->tv_sec * (NSEC_PER_SEC/divfactor); | 61 | slack += tv->tv_sec * (NSEC_PER_SEC/divfactor); |
54 | 62 | ||
55 | if (slack > 100 * NSEC_PER_MSEC) | 63 | if (slack > MAX_SLACK) |
56 | slack = 100 * NSEC_PER_MSEC; | 64 | return MAX_SLACK; |
57 | 65 | ||
58 | if (slack < 0) | ||
59 | slack = 0; | ||
60 | return slack; | 66 | return slack; |
61 | } | 67 | } |
62 | 68 | ||
diff --git a/fs/smbfs/proc.c b/fs/smbfs/proc.c index 9468168b9af5..71c29b6670b4 100644 --- a/fs/smbfs/proc.c +++ b/fs/smbfs/proc.c | |||
@@ -509,7 +509,7 @@ date_unix2dos(struct smb_sb_info *server, | |||
509 | month = 2; | 509 | month = 2; |
510 | } else { | 510 | } else { |
511 | nl_day = (year & 3) || day <= 59 ? day : day - 1; | 511 | nl_day = (year & 3) || day <= 59 ? day : day - 1; |
512 | for (month = 0; month < 12; month++) | 512 | for (month = 1; month < 12; month++) |
513 | if (day_n[month] > nl_day) | 513 | if (day_n[month] > nl_day) |
514 | break; | 514 | break; |
515 | } | 515 | } |
diff --git a/fs/squashfs/super.c b/fs/squashfs/super.c index cb5fc57e370b..6c197ef53add 100644 --- a/fs/squashfs/super.c +++ b/fs/squashfs/super.c | |||
@@ -44,7 +44,7 @@ | |||
44 | #include "squashfs.h" | 44 | #include "squashfs.h" |
45 | 45 | ||
46 | static struct file_system_type squashfs_fs_type; | 46 | static struct file_system_type squashfs_fs_type; |
47 | static struct super_operations squashfs_super_ops; | 47 | static const struct super_operations squashfs_super_ops; |
48 | 48 | ||
49 | static int supported_squashfs_filesystem(short major, short minor, short comp) | 49 | static int supported_squashfs_filesystem(short major, short minor, short comp) |
50 | { | 50 | { |
@@ -444,7 +444,7 @@ static struct file_system_type squashfs_fs_type = { | |||
444 | .fs_flags = FS_REQUIRES_DEV | 444 | .fs_flags = FS_REQUIRES_DEV |
445 | }; | 445 | }; |
446 | 446 | ||
447 | static struct super_operations squashfs_super_ops = { | 447 | static const struct super_operations squashfs_super_ops = { |
448 | .alloc_inode = squashfs_alloc_inode, | 448 | .alloc_inode = squashfs_alloc_inode, |
449 | .destroy_inode = squashfs_destroy_inode, | 449 | .destroy_inode = squashfs_destroy_inode, |
450 | .statfs = squashfs_statfs, | 450 | .statfs = squashfs_statfs, |
diff --git a/fs/super.c b/fs/super.c index 9cda337ddae2..0e7207b9815c 100644 --- a/fs/super.c +++ b/fs/super.c | |||
@@ -54,7 +54,7 @@ DEFINE_SPINLOCK(sb_lock); | |||
54 | static struct super_block *alloc_super(struct file_system_type *type) | 54 | static struct super_block *alloc_super(struct file_system_type *type) |
55 | { | 55 | { |
56 | struct super_block *s = kzalloc(sizeof(struct super_block), GFP_USER); | 56 | struct super_block *s = kzalloc(sizeof(struct super_block), GFP_USER); |
57 | static struct super_operations default_op; | 57 | static const struct super_operations default_op; |
58 | 58 | ||
59 | if (s) { | 59 | if (s) { |
60 | if (security_sb_alloc(s)) { | 60 | if (security_sb_alloc(s)) { |
@@ -707,6 +707,12 @@ static int set_bdev_super(struct super_block *s, void *data) | |||
707 | { | 707 | { |
708 | s->s_bdev = data; | 708 | s->s_bdev = data; |
709 | s->s_dev = s->s_bdev->bd_dev; | 709 | s->s_dev = s->s_bdev->bd_dev; |
710 | |||
711 | /* | ||
712 | * We set the bdi here to the queue backing, file systems can | ||
713 | * overwrite this in ->fill_super() | ||
714 | */ | ||
715 | s->s_bdi = &bdev_get_queue(s->s_bdev)->backing_dev_info; | ||
710 | return 0; | 716 | return 0; |
711 | } | 717 | } |
712 | 718 | ||
@@ -27,6 +27,13 @@ | |||
27 | */ | 27 | */ |
28 | static int __sync_filesystem(struct super_block *sb, int wait) | 28 | static int __sync_filesystem(struct super_block *sb, int wait) |
29 | { | 29 | { |
30 | /* | ||
31 | * This should be safe, as we require bdi backing to actually | ||
32 | * write out data in the first place | ||
33 | */ | ||
34 | if (!sb->s_bdi) | ||
35 | return 0; | ||
36 | |||
30 | /* Avoid doing twice syncing and cache pruning for quota sync */ | 37 | /* Avoid doing twice syncing and cache pruning for quota sync */ |
31 | if (!wait) { | 38 | if (!wait) { |
32 | writeout_quota_sb(sb, -1); | 39 | writeout_quota_sb(sb, -1); |
@@ -101,7 +108,7 @@ restart: | |||
101 | spin_unlock(&sb_lock); | 108 | spin_unlock(&sb_lock); |
102 | 109 | ||
103 | down_read(&sb->s_umount); | 110 | down_read(&sb->s_umount); |
104 | if (!(sb->s_flags & MS_RDONLY) && sb->s_root) | 111 | if (!(sb->s_flags & MS_RDONLY) && sb->s_root && sb->s_bdi) |
105 | __sync_filesystem(sb, wait); | 112 | __sync_filesystem(sb, wait); |
106 | up_read(&sb->s_umount); | 113 | up_read(&sb->s_umount); |
107 | 114 | ||
@@ -176,6 +183,7 @@ int file_fsync(struct file *filp, struct dentry *dentry, int datasync) | |||
176 | ret = err; | 183 | ret = err; |
177 | return ret; | 184 | return ret; |
178 | } | 185 | } |
186 | EXPORT_SYMBOL(file_fsync); | ||
179 | 187 | ||
180 | /** | 188 | /** |
181 | * vfs_fsync_range - helper to sync a range of data & metadata to disk | 189 | * vfs_fsync_range - helper to sync a range of data & metadata to disk |
diff --git a/fs/ubifs/budget.c b/fs/ubifs/budget.c index 1c8991b0db13..076ca50e9933 100644 --- a/fs/ubifs/budget.c +++ b/fs/ubifs/budget.c | |||
@@ -54,29 +54,15 @@ | |||
54 | * @nr_to_write: how many dirty pages to write-back | 54 | * @nr_to_write: how many dirty pages to write-back |
55 | * | 55 | * |
56 | * This function shrinks UBIFS liability by means of writing back some amount | 56 | * This function shrinks UBIFS liability by means of writing back some amount |
57 | * of dirty inodes and their pages. Returns the amount of pages which were | 57 | * of dirty inodes and their pages. |
58 | * written back. The returned value does not include dirty inodes which were | ||
59 | * synchronized. | ||
60 | * | 58 | * |
61 | * Note, this function synchronizes even VFS inodes which are locked | 59 | * Note, this function synchronizes even VFS inodes which are locked |
62 | * (@i_mutex) by the caller of the budgeting function, because write-back does | 60 | * (@i_mutex) by the caller of the budgeting function, because write-back does |
63 | * not touch @i_mutex. | 61 | * not touch @i_mutex. |
64 | */ | 62 | */ |
65 | static int shrink_liability(struct ubifs_info *c, int nr_to_write) | 63 | static void shrink_liability(struct ubifs_info *c, int nr_to_write) |
66 | { | 64 | { |
67 | int nr_written; | 65 | writeback_inodes_sb(c->vfs_sb); |
68 | |||
69 | nr_written = writeback_inodes_sb(c->vfs_sb); | ||
70 | if (!nr_written) { | ||
71 | /* | ||
72 | * Re-try again but wait on pages/inodes which are being | ||
73 | * written-back concurrently (e.g., by pdflush). | ||
74 | */ | ||
75 | nr_written = sync_inodes_sb(c->vfs_sb); | ||
76 | } | ||
77 | |||
78 | dbg_budg("%d pages were written back", nr_written); | ||
79 | return nr_written; | ||
80 | } | 66 | } |
81 | 67 | ||
82 | /** | 68 | /** |
@@ -729,7 +715,7 @@ long long ubifs_get_free_space_nolock(struct ubifs_info *c) | |||
729 | * ubifs_get_free_space - return amount of free space. | 715 | * ubifs_get_free_space - return amount of free space. |
730 | * @c: UBIFS file-system description object | 716 | * @c: UBIFS file-system description object |
731 | * | 717 | * |
732 | * This function calculates and retuns amount of free space to report to | 718 | * This function calculates and returns amount of free space to report to |
733 | * user-space. | 719 | * user-space. |
734 | */ | 720 | */ |
735 | long long ubifs_get_free_space(struct ubifs_info *c) | 721 | long long ubifs_get_free_space(struct ubifs_info *c) |
diff --git a/fs/ubifs/commit.c b/fs/ubifs/commit.c index f3a7945527fb..4775af401167 100644 --- a/fs/ubifs/commit.c +++ b/fs/ubifs/commit.c | |||
@@ -510,7 +510,7 @@ int dbg_check_old_index(struct ubifs_info *c, struct ubifs_zbranch *zroot) | |||
510 | int lnum, offs, len, err = 0, uninitialized_var(last_level), child_cnt; | 510 | int lnum, offs, len, err = 0, uninitialized_var(last_level), child_cnt; |
511 | int first = 1, iip; | 511 | int first = 1, iip; |
512 | struct ubifs_debug_info *d = c->dbg; | 512 | struct ubifs_debug_info *d = c->dbg; |
513 | union ubifs_key lower_key, upper_key, l_key, u_key; | 513 | union ubifs_key uninitialized_var(lower_key), upper_key, l_key, u_key; |
514 | unsigned long long uninitialized_var(last_sqnum); | 514 | unsigned long long uninitialized_var(last_sqnum); |
515 | struct ubifs_idx_node *idx; | 515 | struct ubifs_idx_node *idx; |
516 | struct list_head list; | 516 | struct list_head list; |
diff --git a/fs/ubifs/debug.c b/fs/ubifs/debug.c index ce2cd8343618..dbc093afd946 100644 --- a/fs/ubifs/debug.c +++ b/fs/ubifs/debug.c | |||
@@ -210,6 +210,20 @@ const char *dbg_cstate(int cmt_state) | |||
210 | } | 210 | } |
211 | } | 211 | } |
212 | 212 | ||
213 | const char *dbg_jhead(int jhead) | ||
214 | { | ||
215 | switch (jhead) { | ||
216 | case GCHD: | ||
217 | return "0 (GC)"; | ||
218 | case BASEHD: | ||
219 | return "1 (base)"; | ||
220 | case DATAHD: | ||
221 | return "2 (data)"; | ||
222 | default: | ||
223 | return "unknown journal head"; | ||
224 | } | ||
225 | } | ||
226 | |||
213 | static void dump_ch(const struct ubifs_ch *ch) | 227 | static void dump_ch(const struct ubifs_ch *ch) |
214 | { | 228 | { |
215 | printk(KERN_DEBUG "\tmagic %#x\n", le32_to_cpu(ch->magic)); | 229 | printk(KERN_DEBUG "\tmagic %#x\n", le32_to_cpu(ch->magic)); |
@@ -623,8 +637,9 @@ void dbg_dump_budg(struct ubifs_info *c) | |||
623 | /* If we are in R/O mode, journal heads do not exist */ | 637 | /* If we are in R/O mode, journal heads do not exist */ |
624 | if (c->jheads) | 638 | if (c->jheads) |
625 | for (i = 0; i < c->jhead_cnt; i++) | 639 | for (i = 0; i < c->jhead_cnt; i++) |
626 | printk(KERN_DEBUG "\tjhead %d\t LEB %d\n", | 640 | printk(KERN_DEBUG "\tjhead %s\t LEB %d\n", |
627 | c->jheads[i].wbuf.jhead, c->jheads[i].wbuf.lnum); | 641 | dbg_jhead(c->jheads[i].wbuf.jhead), |
642 | c->jheads[i].wbuf.lnum); | ||
628 | for (rb = rb_first(&c->buds); rb; rb = rb_next(rb)) { | 643 | for (rb = rb_first(&c->buds); rb; rb = rb_next(rb)) { |
629 | bud = rb_entry(rb, struct ubifs_bud, rb); | 644 | bud = rb_entry(rb, struct ubifs_bud, rb); |
630 | printk(KERN_DEBUG "\tbud LEB %d\n", bud->lnum); | 645 | printk(KERN_DEBUG "\tbud LEB %d\n", bud->lnum); |
@@ -648,9 +663,90 @@ void dbg_dump_budg(struct ubifs_info *c) | |||
648 | 663 | ||
649 | void dbg_dump_lprop(const struct ubifs_info *c, const struct ubifs_lprops *lp) | 664 | void dbg_dump_lprop(const struct ubifs_info *c, const struct ubifs_lprops *lp) |
650 | { | 665 | { |
651 | printk(KERN_DEBUG "LEB %d lprops: free %d, dirty %d (used %d), " | 666 | int i, spc, dark = 0, dead = 0; |
652 | "flags %#x\n", lp->lnum, lp->free, lp->dirty, | 667 | struct rb_node *rb; |
653 | c->leb_size - lp->free - lp->dirty, lp->flags); | 668 | struct ubifs_bud *bud; |
669 | |||
670 | spc = lp->free + lp->dirty; | ||
671 | if (spc < c->dead_wm) | ||
672 | dead = spc; | ||
673 | else | ||
674 | dark = ubifs_calc_dark(c, spc); | ||
675 | |||
676 | if (lp->flags & LPROPS_INDEX) | ||
677 | printk(KERN_DEBUG "LEB %-7d free %-8d dirty %-8d used %-8d " | ||
678 | "free + dirty %-8d flags %#x (", lp->lnum, lp->free, | ||
679 | lp->dirty, c->leb_size - spc, spc, lp->flags); | ||
680 | else | ||
681 | printk(KERN_DEBUG "LEB %-7d free %-8d dirty %-8d used %-8d " | ||
682 | "free + dirty %-8d dark %-4d dead %-4d nodes fit %-3d " | ||
683 | "flags %#-4x (", lp->lnum, lp->free, lp->dirty, | ||
684 | c->leb_size - spc, spc, dark, dead, | ||
685 | (int)(spc / UBIFS_MAX_NODE_SZ), lp->flags); | ||
686 | |||
687 | if (lp->flags & LPROPS_TAKEN) { | ||
688 | if (lp->flags & LPROPS_INDEX) | ||
689 | printk(KERN_CONT "index, taken"); | ||
690 | else | ||
691 | printk(KERN_CONT "taken"); | ||
692 | } else { | ||
693 | const char *s; | ||
694 | |||
695 | if (lp->flags & LPROPS_INDEX) { | ||
696 | switch (lp->flags & LPROPS_CAT_MASK) { | ||
697 | case LPROPS_DIRTY_IDX: | ||
698 | s = "dirty index"; | ||
699 | break; | ||
700 | case LPROPS_FRDI_IDX: | ||
701 | s = "freeable index"; | ||
702 | break; | ||
703 | default: | ||
704 | s = "index"; | ||
705 | } | ||
706 | } else { | ||
707 | switch (lp->flags & LPROPS_CAT_MASK) { | ||
708 | case LPROPS_UNCAT: | ||
709 | s = "not categorized"; | ||
710 | break; | ||
711 | case LPROPS_DIRTY: | ||
712 | s = "dirty"; | ||
713 | break; | ||
714 | case LPROPS_FREE: | ||
715 | s = "free"; | ||
716 | break; | ||
717 | case LPROPS_EMPTY: | ||
718 | s = "empty"; | ||
719 | break; | ||
720 | case LPROPS_FREEABLE: | ||
721 | s = "freeable"; | ||
722 | break; | ||
723 | default: | ||
724 | s = NULL; | ||
725 | break; | ||
726 | } | ||
727 | } | ||
728 | printk(KERN_CONT "%s", s); | ||
729 | } | ||
730 | |||
731 | for (rb = rb_first((struct rb_root *)&c->buds); rb; rb = rb_next(rb)) { | ||
732 | bud = rb_entry(rb, struct ubifs_bud, rb); | ||
733 | if (bud->lnum == lp->lnum) { | ||
734 | int head = 0; | ||
735 | for (i = 0; i < c->jhead_cnt; i++) { | ||
736 | if (lp->lnum == c->jheads[i].wbuf.lnum) { | ||
737 | printk(KERN_CONT ", jhead %s", | ||
738 | dbg_jhead(i)); | ||
739 | head = 1; | ||
740 | } | ||
741 | } | ||
742 | if (!head) | ||
743 | printk(KERN_CONT ", bud of jhead %s", | ||
744 | dbg_jhead(bud->jhead)); | ||
745 | } | ||
746 | } | ||
747 | if (lp->lnum == c->gc_lnum) | ||
748 | printk(KERN_CONT ", GC LEB"); | ||
749 | printk(KERN_CONT ")\n"); | ||
654 | } | 750 | } |
655 | 751 | ||
656 | void dbg_dump_lprops(struct ubifs_info *c) | 752 | void dbg_dump_lprops(struct ubifs_info *c) |
@@ -724,7 +820,7 @@ void dbg_dump_leb(const struct ubifs_info *c, int lnum) | |||
724 | 820 | ||
725 | printk(KERN_DEBUG "(pid %d) start dumping LEB %d\n", | 821 | printk(KERN_DEBUG "(pid %d) start dumping LEB %d\n", |
726 | current->pid, lnum); | 822 | current->pid, lnum); |
727 | sleb = ubifs_scan(c, lnum, 0, c->dbg->buf); | 823 | sleb = ubifs_scan(c, lnum, 0, c->dbg->buf, 0); |
728 | if (IS_ERR(sleb)) { | 824 | if (IS_ERR(sleb)) { |
729 | ubifs_err("scan error %d", (int)PTR_ERR(sleb)); | 825 | ubifs_err("scan error %d", (int)PTR_ERR(sleb)); |
730 | return; | 826 | return; |
@@ -909,8 +1005,10 @@ out: | |||
909 | ubifs_msg("saved lprops statistics dump"); | 1005 | ubifs_msg("saved lprops statistics dump"); |
910 | dbg_dump_lstats(&d->saved_lst); | 1006 | dbg_dump_lstats(&d->saved_lst); |
911 | ubifs_get_lp_stats(c, &lst); | 1007 | ubifs_get_lp_stats(c, &lst); |
1008 | |||
912 | ubifs_msg("current lprops statistics dump"); | 1009 | ubifs_msg("current lprops statistics dump"); |
913 | dbg_dump_lstats(&d->saved_lst); | 1010 | dbg_dump_lstats(&lst); |
1011 | |||
914 | spin_lock(&c->space_lock); | 1012 | spin_lock(&c->space_lock); |
915 | dbg_dump_budg(c); | 1013 | dbg_dump_budg(c); |
916 | spin_unlock(&c->space_lock); | 1014 | spin_unlock(&c->space_lock); |
diff --git a/fs/ubifs/debug.h b/fs/ubifs/debug.h index c1cd73b2e06e..29d960101ea6 100644 --- a/fs/ubifs/debug.h +++ b/fs/ubifs/debug.h | |||
@@ -271,6 +271,7 @@ void ubifs_debugging_exit(struct ubifs_info *c); | |||
271 | /* Dump functions */ | 271 | /* Dump functions */ |
272 | const char *dbg_ntype(int type); | 272 | const char *dbg_ntype(int type); |
273 | const char *dbg_cstate(int cmt_state); | 273 | const char *dbg_cstate(int cmt_state); |
274 | const char *dbg_jhead(int jhead); | ||
274 | const char *dbg_get_key_dump(const struct ubifs_info *c, | 275 | const char *dbg_get_key_dump(const struct ubifs_info *c, |
275 | const union ubifs_key *key); | 276 | const union ubifs_key *key); |
276 | void dbg_dump_inode(const struct ubifs_info *c, const struct inode *inode); | 277 | void dbg_dump_inode(const struct ubifs_info *c, const struct inode *inode); |
@@ -321,6 +322,8 @@ void dbg_check_heap(struct ubifs_info *c, struct ubifs_lpt_heap *heap, int cat, | |||
321 | int dbg_check_lprops(struct ubifs_info *c); | 322 | int dbg_check_lprops(struct ubifs_info *c); |
322 | int dbg_check_lpt_nodes(struct ubifs_info *c, struct ubifs_cnode *cnode, | 323 | int dbg_check_lpt_nodes(struct ubifs_info *c, struct ubifs_cnode *cnode, |
323 | int row, int col); | 324 | int row, int col); |
325 | int dbg_check_inode_size(struct ubifs_info *c, const struct inode *inode, | ||
326 | loff_t size); | ||
324 | 327 | ||
325 | /* Force the use of in-the-gaps method for testing */ | 328 | /* Force the use of in-the-gaps method for testing */ |
326 | 329 | ||
@@ -425,6 +428,7 @@ void dbg_debugfs_exit_fs(struct ubifs_info *c); | |||
425 | 428 | ||
426 | #define dbg_ntype(type) "" | 429 | #define dbg_ntype(type) "" |
427 | #define dbg_cstate(cmt_state) "" | 430 | #define dbg_cstate(cmt_state) "" |
431 | #define dbg_jhead(jhead) "" | ||
428 | #define dbg_get_key_dump(c, key) ({}) | 432 | #define dbg_get_key_dump(c, key) ({}) |
429 | #define dbg_dump_inode(c, inode) ({}) | 433 | #define dbg_dump_inode(c, inode) ({}) |
430 | #define dbg_dump_node(c, node) ({}) | 434 | #define dbg_dump_node(c, node) ({}) |
@@ -460,6 +464,7 @@ void dbg_debugfs_exit_fs(struct ubifs_info *c); | |||
460 | #define dbg_check_heap(c, heap, cat, add_pos) ({}) | 464 | #define dbg_check_heap(c, heap, cat, add_pos) ({}) |
461 | #define dbg_check_lprops(c) 0 | 465 | #define dbg_check_lprops(c) 0 |
462 | #define dbg_check_lpt_nodes(c, cnode, row, col) 0 | 466 | #define dbg_check_lpt_nodes(c, cnode, row, col) 0 |
467 | #define dbg_check_inode_size(c, inode, size) 0 | ||
463 | #define dbg_force_in_the_gaps_enabled 0 | 468 | #define dbg_force_in_the_gaps_enabled 0 |
464 | #define dbg_force_in_the_gaps() 0 | 469 | #define dbg_force_in_the_gaps() 0 |
465 | #define dbg_failure_mode 0 | 470 | #define dbg_failure_mode 0 |
diff --git a/fs/ubifs/file.c b/fs/ubifs/file.c index 6d34dc7e33e1..2e6481a7701c 100644 --- a/fs/ubifs/file.c +++ b/fs/ubifs/file.c | |||
@@ -21,34 +21,32 @@ | |||
21 | */ | 21 | */ |
22 | 22 | ||
23 | /* | 23 | /* |
24 | * This file implements VFS file and inode operations of regular files, device | 24 | * This file implements VFS file and inode operations for regular files, device |
25 | * nodes and symlinks as well as address space operations. | 25 | * nodes and symlinks as well as address space operations. |
26 | * | 26 | * |
27 | * UBIFS uses 2 page flags: PG_private and PG_checked. PG_private is set if the | 27 | * UBIFS uses 2 page flags: @PG_private and @PG_checked. @PG_private is set if |
28 | * page is dirty and is used for budgeting purposes - dirty pages should not be | 28 | * the page is dirty and is used for optimization purposes - dirty pages are |
29 | * budgeted. The PG_checked flag is set if full budgeting is required for the | 29 | * not budgeted so the flag shows that 'ubifs_write_end()' should not release |
30 | * page e.g., when it corresponds to a file hole or it is just beyond the file | 30 | * the budget for this page. The @PG_checked flag is set if full budgeting is |
31 | * size. The budgeting is done in 'ubifs_write_begin()', because it is OK to | 31 | * required for the page e.g., when it corresponds to a file hole or it is |
32 | * fail in this function, and the budget is released in 'ubifs_write_end()'. So | 32 | * beyond the file size. The budgeting is done in 'ubifs_write_begin()', because |
33 | * the PG_private and PG_checked flags carry the information about how the page | 33 | * it is OK to fail in this function, and the budget is released in |
34 | * was budgeted, to make it possible to release the budget properly. | 34 | * 'ubifs_write_end()'. So the @PG_private and @PG_checked flags carry |
35 | * information about how the page was budgeted, to make it possible to release | ||
36 | * the budget properly. | ||
35 | * | 37 | * |
36 | * A thing to keep in mind: inode's 'i_mutex' is locked in most VFS operations | 38 | * A thing to keep in mind: inode @i_mutex is locked in most VFS operations we |
37 | * we implement. However, this is not true for '->writepage()', which might be | 39 | * implement. However, this is not true for 'ubifs_writepage()', which may be |
38 | * called with 'i_mutex' unlocked. For example, when pdflush is performing | 40 | * called with @i_mutex unlocked. For example, when pdflush is doing background |
39 | * write-back, it calls 'writepage()' with unlocked 'i_mutex', although the | 41 | * write-back, it calls 'ubifs_writepage()' with unlocked @i_mutex. At "normal" |
40 | * inode has 'I_LOCK' flag in this case. At "normal" work-paths 'i_mutex' is | 42 | * work-paths the @i_mutex is locked in 'ubifs_writepage()', e.g. in the |
41 | * locked in '->writepage', e.g. in "sys_write -> alloc_pages -> direct reclaim | 43 | * "sys_write -> alloc_pages -> direct reclaim path". So, in 'ubifs_writepage()' |
42 | * path'. So, in '->writepage()' we are only guaranteed that the page is | 44 | * we are only guaranteed that the page is locked. |
43 | * locked. | ||
44 | * | 45 | * |
45 | * Similarly, 'i_mutex' does not have to be locked in readpage(), e.g., | 46 | * Similarly, @i_mutex is not always locked in 'ubifs_readpage()', e.g., the |
46 | * readahead path does not have it locked ("sys_read -> generic_file_aio_read | 47 | * read-ahead path does not lock it ("sys_read -> generic_file_aio_read -> |
47 | * -> ondemand_readahead -> readpage"). In case of readahead, 'I_LOCK' flag is | 48 | * ondemand_readahead -> readpage"). In case of readahead, @I_LOCK flag is not |
48 | * not set as well. However, UBIFS disables readahead. | 49 | * set as well. However, UBIFS disables readahead. |
49 | * | ||
50 | * This, for example means that there might be 2 concurrent '->writepage()' | ||
51 | * calls for the same inode, but different inode dirty pages. | ||
52 | */ | 50 | */ |
53 | 51 | ||
54 | #include "ubifs.h" | 52 | #include "ubifs.h" |
@@ -449,9 +447,9 @@ static int ubifs_write_begin(struct file *file, struct address_space *mapping, | |||
449 | /* | 447 | /* |
450 | * We change whole page so no need to load it. But we | 448 | * We change whole page so no need to load it. But we |
451 | * have to set the @PG_checked flag to make the further | 449 | * have to set the @PG_checked flag to make the further |
452 | * code the page is new. This might be not true, but it | 450 | * code know that the page is new. This might be not |
453 | * is better to budget more that to read the page from | 451 | * true, but it is better to budget more than to read |
454 | * the media. | 452 | * the page from the media. |
455 | */ | 453 | */ |
456 | SetPageChecked(page); | 454 | SetPageChecked(page); |
457 | skipped_read = 1; | 455 | skipped_read = 1; |
@@ -497,8 +495,8 @@ static int ubifs_write_begin(struct file *file, struct address_space *mapping, | |||
497 | } | 495 | } |
498 | 496 | ||
499 | /* | 497 | /* |
500 | * Whee, we aquired budgeting quickly - without involving | 498 | * Whee, we acquired budgeting quickly - without involving |
501 | * garbage-collection, committing or forceing write-back. We return | 499 | * garbage-collection, committing or forcing write-back. We return |
502 | * with @ui->ui_mutex locked if we are appending pages, and unlocked | 500 | * with @ui->ui_mutex locked if we are appending pages, and unlocked |
503 | * otherwise. This is an optimization (slightly hacky though). | 501 | * otherwise. This is an optimization (slightly hacky though). |
504 | */ | 502 | */ |
@@ -562,7 +560,7 @@ static int ubifs_write_end(struct file *file, struct address_space *mapping, | |||
562 | 560 | ||
563 | /* | 561 | /* |
564 | * Return 0 to force VFS to repeat the whole operation, or the | 562 | * Return 0 to force VFS to repeat the whole operation, or the |
565 | * error code if 'do_readpage()' failes. | 563 | * error code if 'do_readpage()' fails. |
566 | */ | 564 | */ |
567 | copied = do_readpage(page); | 565 | copied = do_readpage(page); |
568 | goto out; | 566 | goto out; |
@@ -1175,11 +1173,11 @@ static int do_truncation(struct ubifs_info *c, struct inode *inode, | |||
1175 | ui->ui_size = inode->i_size; | 1173 | ui->ui_size = inode->i_size; |
1176 | /* Truncation changes inode [mc]time */ | 1174 | /* Truncation changes inode [mc]time */ |
1177 | inode->i_mtime = inode->i_ctime = ubifs_current_time(inode); | 1175 | inode->i_mtime = inode->i_ctime = ubifs_current_time(inode); |
1178 | /* The other attributes may be changed at the same time as well */ | 1176 | /* Other attributes may be changed at the same time as well */ |
1179 | do_attr_changes(inode, attr); | 1177 | do_attr_changes(inode, attr); |
1180 | |||
1181 | err = ubifs_jnl_truncate(c, inode, old_size, new_size); | 1178 | err = ubifs_jnl_truncate(c, inode, old_size, new_size); |
1182 | mutex_unlock(&ui->ui_mutex); | 1179 | mutex_unlock(&ui->ui_mutex); |
1180 | |||
1183 | out_budg: | 1181 | out_budg: |
1184 | if (budgeted) | 1182 | if (budgeted) |
1185 | ubifs_release_budget(c, &req); | 1183 | ubifs_release_budget(c, &req); |
diff --git a/fs/ubifs/gc.c b/fs/ubifs/gc.c index f0f5f15d384e..618c2701d3a7 100644 --- a/fs/ubifs/gc.c +++ b/fs/ubifs/gc.c | |||
@@ -529,7 +529,7 @@ int ubifs_garbage_collect_leb(struct ubifs_info *c, struct ubifs_lprops *lp) | |||
529 | * We scan the entire LEB even though we only really need to scan up to | 529 | * We scan the entire LEB even though we only really need to scan up to |
530 | * (c->leb_size - lp->free). | 530 | * (c->leb_size - lp->free). |
531 | */ | 531 | */ |
532 | sleb = ubifs_scan(c, lnum, 0, c->sbuf); | 532 | sleb = ubifs_scan(c, lnum, 0, c->sbuf, 0); |
533 | if (IS_ERR(sleb)) | 533 | if (IS_ERR(sleb)) |
534 | return PTR_ERR(sleb); | 534 | return PTR_ERR(sleb); |
535 | 535 | ||
diff --git a/fs/ubifs/io.c b/fs/ubifs/io.c index 762a7d6cec73..e589fedaf1ef 100644 --- a/fs/ubifs/io.c +++ b/fs/ubifs/io.c | |||
@@ -297,7 +297,7 @@ static enum hrtimer_restart wbuf_timer_callback_nolock(struct hrtimer *timer) | |||
297 | { | 297 | { |
298 | struct ubifs_wbuf *wbuf = container_of(timer, struct ubifs_wbuf, timer); | 298 | struct ubifs_wbuf *wbuf = container_of(timer, struct ubifs_wbuf, timer); |
299 | 299 | ||
300 | dbg_io("jhead %d", wbuf->jhead); | 300 | dbg_io("jhead %s", dbg_jhead(wbuf->jhead)); |
301 | wbuf->need_sync = 1; | 301 | wbuf->need_sync = 1; |
302 | wbuf->c->need_wbuf_sync = 1; | 302 | wbuf->c->need_wbuf_sync = 1; |
303 | ubifs_wake_up_bgt(wbuf->c); | 303 | ubifs_wake_up_bgt(wbuf->c); |
@@ -314,7 +314,8 @@ static void new_wbuf_timer_nolock(struct ubifs_wbuf *wbuf) | |||
314 | 314 | ||
315 | if (wbuf->no_timer) | 315 | if (wbuf->no_timer) |
316 | return; | 316 | return; |
317 | dbg_io("set timer for jhead %d, %llu-%llu millisecs", wbuf->jhead, | 317 | dbg_io("set timer for jhead %s, %llu-%llu millisecs", |
318 | dbg_jhead(wbuf->jhead), | ||
318 | div_u64(ktime_to_ns(wbuf->softlimit), USEC_PER_SEC), | 319 | div_u64(ktime_to_ns(wbuf->softlimit), USEC_PER_SEC), |
319 | div_u64(ktime_to_ns(wbuf->softlimit) + wbuf->delta, | 320 | div_u64(ktime_to_ns(wbuf->softlimit) + wbuf->delta, |
320 | USEC_PER_SEC)); | 321 | USEC_PER_SEC)); |
@@ -351,8 +352,8 @@ int ubifs_wbuf_sync_nolock(struct ubifs_wbuf *wbuf) | |||
351 | /* Write-buffer is empty or not seeked */ | 352 | /* Write-buffer is empty or not seeked */ |
352 | return 0; | 353 | return 0; |
353 | 354 | ||
354 | dbg_io("LEB %d:%d, %d bytes, jhead %d", | 355 | dbg_io("LEB %d:%d, %d bytes, jhead %s", |
355 | wbuf->lnum, wbuf->offs, wbuf->used, wbuf->jhead); | 356 | wbuf->lnum, wbuf->offs, wbuf->used, dbg_jhead(wbuf->jhead)); |
356 | ubifs_assert(!(c->vfs_sb->s_flags & MS_RDONLY)); | 357 | ubifs_assert(!(c->vfs_sb->s_flags & MS_RDONLY)); |
357 | ubifs_assert(!(wbuf->avail & 7)); | 358 | ubifs_assert(!(wbuf->avail & 7)); |
358 | ubifs_assert(wbuf->offs + c->min_io_size <= c->leb_size); | 359 | ubifs_assert(wbuf->offs + c->min_io_size <= c->leb_size); |
@@ -401,7 +402,7 @@ int ubifs_wbuf_seek_nolock(struct ubifs_wbuf *wbuf, int lnum, int offs, | |||
401 | { | 402 | { |
402 | const struct ubifs_info *c = wbuf->c; | 403 | const struct ubifs_info *c = wbuf->c; |
403 | 404 | ||
404 | dbg_io("LEB %d:%d, jhead %d", lnum, offs, wbuf->jhead); | 405 | dbg_io("LEB %d:%d, jhead %s", lnum, offs, dbg_jhead(wbuf->jhead)); |
405 | ubifs_assert(lnum >= 0 && lnum < c->leb_cnt); | 406 | ubifs_assert(lnum >= 0 && lnum < c->leb_cnt); |
406 | ubifs_assert(offs >= 0 && offs <= c->leb_size); | 407 | ubifs_assert(offs >= 0 && offs <= c->leb_size); |
407 | ubifs_assert(offs % c->min_io_size == 0 && !(offs & 7)); | 408 | ubifs_assert(offs % c->min_io_size == 0 && !(offs & 7)); |
@@ -508,9 +509,9 @@ int ubifs_wbuf_write_nolock(struct ubifs_wbuf *wbuf, void *buf, int len) | |||
508 | struct ubifs_info *c = wbuf->c; | 509 | struct ubifs_info *c = wbuf->c; |
509 | int err, written, n, aligned_len = ALIGN(len, 8), offs; | 510 | int err, written, n, aligned_len = ALIGN(len, 8), offs; |
510 | 511 | ||
511 | dbg_io("%d bytes (%s) to jhead %d wbuf at LEB %d:%d", len, | 512 | dbg_io("%d bytes (%s) to jhead %s wbuf at LEB %d:%d", len, |
512 | dbg_ntype(((struct ubifs_ch *)buf)->node_type), wbuf->jhead, | 513 | dbg_ntype(((struct ubifs_ch *)buf)->node_type), |
513 | wbuf->lnum, wbuf->offs + wbuf->used); | 514 | dbg_jhead(wbuf->jhead), wbuf->lnum, wbuf->offs + wbuf->used); |
514 | ubifs_assert(len > 0 && wbuf->lnum >= 0 && wbuf->lnum < c->leb_cnt); | 515 | ubifs_assert(len > 0 && wbuf->lnum >= 0 && wbuf->lnum < c->leb_cnt); |
515 | ubifs_assert(wbuf->offs >= 0 && wbuf->offs % c->min_io_size == 0); | 516 | ubifs_assert(wbuf->offs >= 0 && wbuf->offs % c->min_io_size == 0); |
516 | ubifs_assert(!(wbuf->offs & 7) && wbuf->offs <= c->leb_size); | 517 | ubifs_assert(!(wbuf->offs & 7) && wbuf->offs <= c->leb_size); |
@@ -535,8 +536,8 @@ int ubifs_wbuf_write_nolock(struct ubifs_wbuf *wbuf, void *buf, int len) | |||
535 | memcpy(wbuf->buf + wbuf->used, buf, len); | 536 | memcpy(wbuf->buf + wbuf->used, buf, len); |
536 | 537 | ||
537 | if (aligned_len == wbuf->avail) { | 538 | if (aligned_len == wbuf->avail) { |
538 | dbg_io("flush jhead %d wbuf to LEB %d:%d", | 539 | dbg_io("flush jhead %s wbuf to LEB %d:%d", |
539 | wbuf->jhead, wbuf->lnum, wbuf->offs); | 540 | dbg_jhead(wbuf->jhead), wbuf->lnum, wbuf->offs); |
540 | err = ubi_leb_write(c->ubi, wbuf->lnum, wbuf->buf, | 541 | err = ubi_leb_write(c->ubi, wbuf->lnum, wbuf->buf, |
541 | wbuf->offs, c->min_io_size, | 542 | wbuf->offs, c->min_io_size, |
542 | wbuf->dtype); | 543 | wbuf->dtype); |
@@ -564,8 +565,8 @@ int ubifs_wbuf_write_nolock(struct ubifs_wbuf *wbuf, void *buf, int len) | |||
564 | * minimal I/O unit. We have to fill and flush write-buffer and switch | 565 | * minimal I/O unit. We have to fill and flush write-buffer and switch |
565 | * to the next min. I/O unit. | 566 | * to the next min. I/O unit. |
566 | */ | 567 | */ |
567 | dbg_io("flush jhead %d wbuf to LEB %d:%d", | 568 | dbg_io("flush jhead %s wbuf to LEB %d:%d", |
568 | wbuf->jhead, wbuf->lnum, wbuf->offs); | 569 | dbg_jhead(wbuf->jhead), wbuf->lnum, wbuf->offs); |
569 | memcpy(wbuf->buf + wbuf->used, buf, wbuf->avail); | 570 | memcpy(wbuf->buf + wbuf->used, buf, wbuf->avail); |
570 | err = ubi_leb_write(c->ubi, wbuf->lnum, wbuf->buf, wbuf->offs, | 571 | err = ubi_leb_write(c->ubi, wbuf->lnum, wbuf->buf, wbuf->offs, |
571 | c->min_io_size, wbuf->dtype); | 572 | c->min_io_size, wbuf->dtype); |
@@ -698,8 +699,8 @@ int ubifs_read_node_wbuf(struct ubifs_wbuf *wbuf, void *buf, int type, int len, | |||
698 | int err, rlen, overlap; | 699 | int err, rlen, overlap; |
699 | struct ubifs_ch *ch = buf; | 700 | struct ubifs_ch *ch = buf; |
700 | 701 | ||
701 | dbg_io("LEB %d:%d, %s, length %d, jhead %d", lnum, offs, | 702 | dbg_io("LEB %d:%d, %s, length %d, jhead %s", lnum, offs, |
702 | dbg_ntype(type), len, wbuf->jhead); | 703 | dbg_ntype(type), len, dbg_jhead(wbuf->jhead)); |
703 | ubifs_assert(wbuf && lnum >= 0 && lnum < c->leb_cnt && offs >= 0); | 704 | ubifs_assert(wbuf && lnum >= 0 && lnum < c->leb_cnt && offs >= 0); |
704 | ubifs_assert(!(offs & 7) && offs < c->leb_size); | 705 | ubifs_assert(!(offs & 7) && offs < c->leb_size); |
705 | ubifs_assert(type >= 0 && type < UBIFS_NODE_TYPES_CNT); | 706 | ubifs_assert(type >= 0 && type < UBIFS_NODE_TYPES_CNT); |
diff --git a/fs/ubifs/journal.c b/fs/ubifs/journal.c index 64b5f3a309f5..d321baeca68d 100644 --- a/fs/ubifs/journal.c +++ b/fs/ubifs/journal.c | |||
@@ -158,7 +158,7 @@ again: | |||
158 | * some. But the write-buffer mutex has to be unlocked because | 158 | * some. But the write-buffer mutex has to be unlocked because |
159 | * GC also takes it. | 159 | * GC also takes it. |
160 | */ | 160 | */ |
161 | dbg_jnl("no free space jhead %d, run GC", jhead); | 161 | dbg_jnl("no free space in jhead %s, run GC", dbg_jhead(jhead)); |
162 | mutex_unlock(&wbuf->io_mutex); | 162 | mutex_unlock(&wbuf->io_mutex); |
163 | 163 | ||
164 | lnum = ubifs_garbage_collect(c, 0); | 164 | lnum = ubifs_garbage_collect(c, 0); |
@@ -173,7 +173,8 @@ again: | |||
173 | * because we dropped @wbuf->io_mutex, so try once | 173 | * because we dropped @wbuf->io_mutex, so try once |
174 | * again. | 174 | * again. |
175 | */ | 175 | */ |
176 | dbg_jnl("GC couldn't make a free LEB for jhead %d", jhead); | 176 | dbg_jnl("GC couldn't make a free LEB for jhead %s", |
177 | dbg_jhead(jhead)); | ||
177 | if (retries++ < 2) { | 178 | if (retries++ < 2) { |
178 | dbg_jnl("retry (%d)", retries); | 179 | dbg_jnl("retry (%d)", retries); |
179 | goto again; | 180 | goto again; |
@@ -184,7 +185,7 @@ again: | |||
184 | } | 185 | } |
185 | 186 | ||
186 | mutex_lock_nested(&wbuf->io_mutex, wbuf->jhead); | 187 | mutex_lock_nested(&wbuf->io_mutex, wbuf->jhead); |
187 | dbg_jnl("got LEB %d for jhead %d", lnum, jhead); | 188 | dbg_jnl("got LEB %d for jhead %s", lnum, dbg_jhead(jhead)); |
188 | avail = c->leb_size - wbuf->offs - wbuf->used; | 189 | avail = c->leb_size - wbuf->offs - wbuf->used; |
189 | 190 | ||
190 | if (wbuf->lnum != -1 && avail >= len) { | 191 | if (wbuf->lnum != -1 && avail >= len) { |
@@ -255,7 +256,8 @@ static int write_node(struct ubifs_info *c, int jhead, void *node, int len, | |||
255 | *lnum = c->jheads[jhead].wbuf.lnum; | 256 | *lnum = c->jheads[jhead].wbuf.lnum; |
256 | *offs = c->jheads[jhead].wbuf.offs + c->jheads[jhead].wbuf.used; | 257 | *offs = c->jheads[jhead].wbuf.offs + c->jheads[jhead].wbuf.used; |
257 | 258 | ||
258 | dbg_jnl("jhead %d, LEB %d:%d, len %d", jhead, *lnum, *offs, len); | 259 | dbg_jnl("jhead %s, LEB %d:%d, len %d", |
260 | dbg_jhead(jhead), *lnum, *offs, len); | ||
259 | ubifs_prepare_node(c, node, len, 0); | 261 | ubifs_prepare_node(c, node, len, 0); |
260 | 262 | ||
261 | return ubifs_wbuf_write_nolock(wbuf, node, len); | 263 | return ubifs_wbuf_write_nolock(wbuf, node, len); |
@@ -285,7 +287,8 @@ static int write_head(struct ubifs_info *c, int jhead, void *buf, int len, | |||
285 | 287 | ||
286 | *lnum = c->jheads[jhead].wbuf.lnum; | 288 | *lnum = c->jheads[jhead].wbuf.lnum; |
287 | *offs = c->jheads[jhead].wbuf.offs + c->jheads[jhead].wbuf.used; | 289 | *offs = c->jheads[jhead].wbuf.offs + c->jheads[jhead].wbuf.used; |
288 | dbg_jnl("jhead %d, LEB %d:%d, len %d", jhead, *lnum, *offs, len); | 290 | dbg_jnl("jhead %s, LEB %d:%d, len %d", |
291 | dbg_jhead(jhead), *lnum, *offs, len); | ||
289 | 292 | ||
290 | err = ubifs_wbuf_write_nolock(wbuf, buf, len); | 293 | err = ubifs_wbuf_write_nolock(wbuf, buf, len); |
291 | if (err) | 294 | if (err) |
diff --git a/fs/ubifs/key.h b/fs/ubifs/key.h index 5fa27ea031ba..0f530c684f0b 100644 --- a/fs/ubifs/key.h +++ b/fs/ubifs/key.h | |||
@@ -229,23 +229,6 @@ static inline void xent_key_init(const struct ubifs_info *c, | |||
229 | } | 229 | } |
230 | 230 | ||
231 | /** | 231 | /** |
232 | * xent_key_init_hash - initialize extended attribute entry key without | ||
233 | * re-calculating hash function. | ||
234 | * @c: UBIFS file-system description object | ||
235 | * @key: key to initialize | ||
236 | * @inum: host inode number | ||
237 | * @hash: extended attribute entry name hash | ||
238 | */ | ||
239 | static inline void xent_key_init_hash(const struct ubifs_info *c, | ||
240 | union ubifs_key *key, ino_t inum, | ||
241 | uint32_t hash) | ||
242 | { | ||
243 | ubifs_assert(!(hash & ~UBIFS_S_KEY_HASH_MASK)); | ||
244 | key->u32[0] = inum; | ||
245 | key->u32[1] = hash | (UBIFS_XENT_KEY << UBIFS_S_KEY_HASH_BITS); | ||
246 | } | ||
247 | |||
248 | /** | ||
249 | * xent_key_init_flash - initialize on-flash extended attribute entry key. | 232 | * xent_key_init_flash - initialize on-flash extended attribute entry key. |
250 | * @c: UBIFS file-system description object | 233 | * @c: UBIFS file-system description object |
251 | * @k: key to initialize | 234 | * @k: key to initialize |
@@ -295,22 +278,15 @@ static inline void data_key_init(const struct ubifs_info *c, | |||
295 | } | 278 | } |
296 | 279 | ||
297 | /** | 280 | /** |
298 | * data_key_init_flash - initialize on-flash data key. | 281 | * highest_data_key - get the highest possible data key for an inode. |
299 | * @c: UBIFS file-system description object | 282 | * @c: UBIFS file-system description object |
300 | * @k: key to initialize | 283 | * @key: key to initialize |
301 | * @inum: inode number | 284 | * @inum: inode number |
302 | * @block: block number | ||
303 | */ | 285 | */ |
304 | static inline void data_key_init_flash(const struct ubifs_info *c, void *k, | 286 | static inline void highest_data_key(const struct ubifs_info *c, |
305 | ino_t inum, unsigned int block) | 287 | union ubifs_key *key, ino_t inum) |
306 | { | 288 | { |
307 | union ubifs_key *key = k; | 289 | data_key_init(c, key, inum, UBIFS_S_KEY_BLOCK_MASK); |
308 | |||
309 | ubifs_assert(!(block & ~UBIFS_S_KEY_BLOCK_MASK)); | ||
310 | key->j32[0] = cpu_to_le32(inum); | ||
311 | key->j32[1] = cpu_to_le32(block | | ||
312 | (UBIFS_DATA_KEY << UBIFS_S_KEY_BLOCK_BITS)); | ||
313 | memset(k + 8, 0, UBIFS_MAX_KEY_LEN - 8); | ||
314 | } | 290 | } |
315 | 291 | ||
316 | /** | 292 | /** |
@@ -554,4 +530,5 @@ static inline unsigned long long key_max_inode_size(const struct ubifs_info *c) | |||
554 | return 0; | 530 | return 0; |
555 | } | 531 | } |
556 | } | 532 | } |
533 | |||
557 | #endif /* !__UBIFS_KEY_H__ */ | 534 | #endif /* !__UBIFS_KEY_H__ */ |
diff --git a/fs/ubifs/log.c b/fs/ubifs/log.c index 56e33772a1ee..c345e125f42c 100644 --- a/fs/ubifs/log.c +++ b/fs/ubifs/log.c | |||
@@ -169,8 +169,8 @@ void ubifs_add_bud(struct ubifs_info *c, struct ubifs_bud *bud) | |||
169 | */ | 169 | */ |
170 | c->bud_bytes += c->leb_size - bud->start; | 170 | c->bud_bytes += c->leb_size - bud->start; |
171 | 171 | ||
172 | dbg_log("LEB %d:%d, jhead %d, bud_bytes %lld", bud->lnum, | 172 | dbg_log("LEB %d:%d, jhead %s, bud_bytes %lld", bud->lnum, |
173 | bud->start, bud->jhead, c->bud_bytes); | 173 | bud->start, dbg_jhead(bud->jhead), c->bud_bytes); |
174 | spin_unlock(&c->buds_lock); | 174 | spin_unlock(&c->buds_lock); |
175 | } | 175 | } |
176 | 176 | ||
@@ -355,16 +355,16 @@ static void remove_buds(struct ubifs_info *c) | |||
355 | * heads (non-closed buds). | 355 | * heads (non-closed buds). |
356 | */ | 356 | */ |
357 | c->cmt_bud_bytes += wbuf->offs - bud->start; | 357 | c->cmt_bud_bytes += wbuf->offs - bud->start; |
358 | dbg_log("preserve %d:%d, jhead %d, bud bytes %d, " | 358 | dbg_log("preserve %d:%d, jhead %s, bud bytes %d, " |
359 | "cmt_bud_bytes %lld", bud->lnum, bud->start, | 359 | "cmt_bud_bytes %lld", bud->lnum, bud->start, |
360 | bud->jhead, wbuf->offs - bud->start, | 360 | dbg_jhead(bud->jhead), wbuf->offs - bud->start, |
361 | c->cmt_bud_bytes); | 361 | c->cmt_bud_bytes); |
362 | bud->start = wbuf->offs; | 362 | bud->start = wbuf->offs; |
363 | } else { | 363 | } else { |
364 | c->cmt_bud_bytes += c->leb_size - bud->start; | 364 | c->cmt_bud_bytes += c->leb_size - bud->start; |
365 | dbg_log("remove %d:%d, jhead %d, bud bytes %d, " | 365 | dbg_log("remove %d:%d, jhead %s, bud bytes %d, " |
366 | "cmt_bud_bytes %lld", bud->lnum, bud->start, | 366 | "cmt_bud_bytes %lld", bud->lnum, bud->start, |
367 | bud->jhead, c->leb_size - bud->start, | 367 | dbg_jhead(bud->jhead), c->leb_size - bud->start, |
368 | c->cmt_bud_bytes); | 368 | c->cmt_bud_bytes); |
369 | rb_erase(p1, &c->buds); | 369 | rb_erase(p1, &c->buds); |
370 | /* | 370 | /* |
@@ -429,7 +429,8 @@ int ubifs_log_start_commit(struct ubifs_info *c, int *ltail_lnum) | |||
429 | if (lnum == -1 || offs == c->leb_size) | 429 | if (lnum == -1 || offs == c->leb_size) |
430 | continue; | 430 | continue; |
431 | 431 | ||
432 | dbg_log("add ref to LEB %d:%d for jhead %d", lnum, offs, i); | 432 | dbg_log("add ref to LEB %d:%d for jhead %s", |
433 | lnum, offs, dbg_jhead(i)); | ||
433 | ref = buf + len; | 434 | ref = buf + len; |
434 | ref->ch.node_type = UBIFS_REF_NODE; | 435 | ref->ch.node_type = UBIFS_REF_NODE; |
435 | ref->lnum = cpu_to_le32(lnum); | 436 | ref->lnum = cpu_to_le32(lnum); |
@@ -695,7 +696,7 @@ int ubifs_consolidate_log(struct ubifs_info *c) | |||
695 | lnum = c->ltail_lnum; | 696 | lnum = c->ltail_lnum; |
696 | write_lnum = lnum; | 697 | write_lnum = lnum; |
697 | while (1) { | 698 | while (1) { |
698 | sleb = ubifs_scan(c, lnum, 0, c->sbuf); | 699 | sleb = ubifs_scan(c, lnum, 0, c->sbuf, 0); |
699 | if (IS_ERR(sleb)) { | 700 | if (IS_ERR(sleb)) { |
700 | err = PTR_ERR(sleb); | 701 | err = PTR_ERR(sleb); |
701 | goto out_free; | 702 | goto out_free; |
diff --git a/fs/ubifs/lprops.c b/fs/ubifs/lprops.c index 4cdd284dea56..4d4ca388889b 100644 --- a/fs/ubifs/lprops.c +++ b/fs/ubifs/lprops.c | |||
@@ -281,7 +281,7 @@ void ubifs_add_to_cat(struct ubifs_info *c, struct ubifs_lprops *lprops, | |||
281 | case LPROPS_FREE: | 281 | case LPROPS_FREE: |
282 | if (add_to_lpt_heap(c, lprops, cat)) | 282 | if (add_to_lpt_heap(c, lprops, cat)) |
283 | break; | 283 | break; |
284 | /* No more room on heap so make it uncategorized */ | 284 | /* No more room on heap so make it un-categorized */ |
285 | cat = LPROPS_UNCAT; | 285 | cat = LPROPS_UNCAT; |
286 | /* Fall through */ | 286 | /* Fall through */ |
287 | case LPROPS_UNCAT: | 287 | case LPROPS_UNCAT: |
@@ -375,8 +375,8 @@ void ubifs_replace_cat(struct ubifs_info *c, struct ubifs_lprops *old_lprops, | |||
375 | * @lprops: LEB properties | 375 | * @lprops: LEB properties |
376 | * | 376 | * |
377 | * A LEB may have fallen off of the bottom of a heap, and ended up as | 377 | * A LEB may have fallen off of the bottom of a heap, and ended up as |
378 | * uncategorized even though it has enough space for us now. If that is the case | 378 | * un-categorized even though it has enough space for us now. If that is the |
379 | * this function will put the LEB back onto a heap. | 379 | * case this function will put the LEB back onto a heap. |
380 | */ | 380 | */ |
381 | void ubifs_ensure_cat(struct ubifs_info *c, struct ubifs_lprops *lprops) | 381 | void ubifs_ensure_cat(struct ubifs_info *c, struct ubifs_lprops *lprops) |
382 | { | 382 | { |
@@ -436,10 +436,10 @@ int ubifs_categorize_lprops(const struct ubifs_info *c, | |||
436 | /** | 436 | /** |
437 | * change_category - change LEB properties category. | 437 | * change_category - change LEB properties category. |
438 | * @c: UBIFS file-system description object | 438 | * @c: UBIFS file-system description object |
439 | * @lprops: LEB properties to recategorize | 439 | * @lprops: LEB properties to re-categorize |
440 | * | 440 | * |
441 | * LEB properties are categorized to enable fast find operations. When the LEB | 441 | * LEB properties are categorized to enable fast find operations. When the LEB |
442 | * properties change they must be recategorized. | 442 | * properties change they must be re-categorized. |
443 | */ | 443 | */ |
444 | static void change_category(struct ubifs_info *c, struct ubifs_lprops *lprops) | 444 | static void change_category(struct ubifs_info *c, struct ubifs_lprops *lprops) |
445 | { | 445 | { |
@@ -461,21 +461,18 @@ static void change_category(struct ubifs_info *c, struct ubifs_lprops *lprops) | |||
461 | } | 461 | } |
462 | 462 | ||
463 | /** | 463 | /** |
464 | * calc_dark - calculate LEB dark space size. | 464 | * ubifs_calc_dark - calculate LEB dark space size. |
465 | * @c: the UBIFS file-system description object | 465 | * @c: the UBIFS file-system description object |
466 | * @spc: amount of free and dirty space in the LEB | 466 | * @spc: amount of free and dirty space in the LEB |
467 | * | 467 | * |
468 | * This function calculates amount of dark space in an LEB which has @spc bytes | 468 | * This function calculates and returns amount of dark space in an LEB which |
469 | * of free and dirty space. Returns the calculations result. | 469 | * has @spc bytes of free and dirty space. |
470 | * | 470 | * |
471 | * Dark space is the space which is not always usable - it depends on which | 471 | * UBIFS is trying to account the space which might not be usable, and this |
472 | * nodes are written in which order. E.g., if an LEB has only 512 free bytes, | 472 | * space is called "dark space". For example, if an LEB has only %512 free |
473 | * it is dark space, because it cannot fit a large data node. So UBIFS cannot | 473 | * bytes, it is dark space, because it cannot fit a large data node. |
474 | * count on this LEB and treat these 512 bytes as usable because it is not true | ||
475 | * if, for example, only big chunks of uncompressible data will be written to | ||
476 | * the FS. | ||
477 | */ | 474 | */ |
478 | static int calc_dark(struct ubifs_info *c, int spc) | 475 | int ubifs_calc_dark(const struct ubifs_info *c, int spc) |
479 | { | 476 | { |
480 | ubifs_assert(!(spc & 7)); | 477 | ubifs_assert(!(spc & 7)); |
481 | 478 | ||
@@ -518,7 +515,7 @@ static int is_lprops_dirty(struct ubifs_info *c, struct ubifs_lprops *lprops) | |||
518 | * @free: new free space amount | 515 | * @free: new free space amount |
519 | * @dirty: new dirty space amount | 516 | * @dirty: new dirty space amount |
520 | * @flags: new flags | 517 | * @flags: new flags |
521 | * @idx_gc_cnt: change to the count of idx_gc list | 518 | * @idx_gc_cnt: change to the count of @idx_gc list |
522 | * | 519 | * |
523 | * This function changes LEB properties (@free, @dirty or @flag). However, the | 520 | * This function changes LEB properties (@free, @dirty or @flag). However, the |
524 | * property which has the %LPROPS_NC value is not changed. Returns a pointer to | 521 | * property which has the %LPROPS_NC value is not changed. Returns a pointer to |
@@ -535,7 +532,7 @@ const struct ubifs_lprops *ubifs_change_lp(struct ubifs_info *c, | |||
535 | { | 532 | { |
536 | /* | 533 | /* |
537 | * This is the only function that is allowed to change lprops, so we | 534 | * This is the only function that is allowed to change lprops, so we |
538 | * discard the const qualifier. | 535 | * discard the "const" qualifier. |
539 | */ | 536 | */ |
540 | struct ubifs_lprops *lprops = (struct ubifs_lprops *)lp; | 537 | struct ubifs_lprops *lprops = (struct ubifs_lprops *)lp; |
541 | 538 | ||
@@ -575,7 +572,7 @@ const struct ubifs_lprops *ubifs_change_lp(struct ubifs_info *c, | |||
575 | if (old_spc < c->dead_wm) | 572 | if (old_spc < c->dead_wm) |
576 | c->lst.total_dead -= old_spc; | 573 | c->lst.total_dead -= old_spc; |
577 | else | 574 | else |
578 | c->lst.total_dark -= calc_dark(c, old_spc); | 575 | c->lst.total_dark -= ubifs_calc_dark(c, old_spc); |
579 | 576 | ||
580 | c->lst.total_used -= c->leb_size - old_spc; | 577 | c->lst.total_used -= c->leb_size - old_spc; |
581 | } | 578 | } |
@@ -616,7 +613,7 @@ const struct ubifs_lprops *ubifs_change_lp(struct ubifs_info *c, | |||
616 | if (new_spc < c->dead_wm) | 613 | if (new_spc < c->dead_wm) |
617 | c->lst.total_dead += new_spc; | 614 | c->lst.total_dead += new_spc; |
618 | else | 615 | else |
619 | c->lst.total_dark += calc_dark(c, new_spc); | 616 | c->lst.total_dark += ubifs_calc_dark(c, new_spc); |
620 | 617 | ||
621 | c->lst.total_used += c->leb_size - new_spc; | 618 | c->lst.total_used += c->leb_size - new_spc; |
622 | } | 619 | } |
@@ -1096,7 +1093,7 @@ static int scan_check_cb(struct ubifs_info *c, | |||
1096 | } | 1093 | } |
1097 | } | 1094 | } |
1098 | 1095 | ||
1099 | sleb = ubifs_scan(c, lnum, 0, c->dbg->buf); | 1096 | sleb = ubifs_scan(c, lnum, 0, c->dbg->buf, 0); |
1100 | if (IS_ERR(sleb)) { | 1097 | if (IS_ERR(sleb)) { |
1101 | /* | 1098 | /* |
1102 | * After an unclean unmount, empty and freeable LEBs | 1099 | * After an unclean unmount, empty and freeable LEBs |
@@ -1107,7 +1104,7 @@ static int scan_check_cb(struct ubifs_info *c, | |||
1107 | "- continuing checking"); | 1104 | "- continuing checking"); |
1108 | lst->empty_lebs += 1; | 1105 | lst->empty_lebs += 1; |
1109 | lst->total_free += c->leb_size; | 1106 | lst->total_free += c->leb_size; |
1110 | lst->total_dark += calc_dark(c, c->leb_size); | 1107 | lst->total_dark += ubifs_calc_dark(c, c->leb_size); |
1111 | return LPT_SCAN_CONTINUE; | 1108 | return LPT_SCAN_CONTINUE; |
1112 | } | 1109 | } |
1113 | 1110 | ||
@@ -1117,7 +1114,7 @@ static int scan_check_cb(struct ubifs_info *c, | |||
1117 | "- continuing checking"); | 1114 | "- continuing checking"); |
1118 | lst->total_free += lp->free; | 1115 | lst->total_free += lp->free; |
1119 | lst->total_dirty += lp->dirty; | 1116 | lst->total_dirty += lp->dirty; |
1120 | lst->total_dark += calc_dark(c, c->leb_size); | 1117 | lst->total_dark += ubifs_calc_dark(c, c->leb_size); |
1121 | return LPT_SCAN_CONTINUE; | 1118 | return LPT_SCAN_CONTINUE; |
1122 | } | 1119 | } |
1123 | data->err = PTR_ERR(sleb); | 1120 | data->err = PTR_ERR(sleb); |
@@ -1235,7 +1232,7 @@ static int scan_check_cb(struct ubifs_info *c, | |||
1235 | if (spc < c->dead_wm) | 1232 | if (spc < c->dead_wm) |
1236 | lst->total_dead += spc; | 1233 | lst->total_dead += spc; |
1237 | else | 1234 | else |
1238 | lst->total_dark += calc_dark(c, spc); | 1235 | lst->total_dark += ubifs_calc_dark(c, spc); |
1239 | } | 1236 | } |
1240 | 1237 | ||
1241 | ubifs_scan_destroy(sleb); | 1238 | ubifs_scan_destroy(sleb); |
diff --git a/fs/ubifs/master.c b/fs/ubifs/master.c index a88f33801b98..28beaeedadc0 100644 --- a/fs/ubifs/master.c +++ b/fs/ubifs/master.c | |||
@@ -29,7 +29,8 @@ | |||
29 | * @c: UBIFS file-system description object | 29 | * @c: UBIFS file-system description object |
30 | * | 30 | * |
31 | * This function scans the master node LEBs and search for the latest master | 31 | * This function scans the master node LEBs and search for the latest master |
32 | * node. Returns zero in case of success and a negative error code in case of | 32 | * node. Returns zero in case of success, %-EUCLEAN if there master area is |
33 | * corrupted and requires recovery, and a negative error code in case of | ||
33 | * failure. | 34 | * failure. |
34 | */ | 35 | */ |
35 | static int scan_for_master(struct ubifs_info *c) | 36 | static int scan_for_master(struct ubifs_info *c) |
@@ -40,7 +41,7 @@ static int scan_for_master(struct ubifs_info *c) | |||
40 | 41 | ||
41 | lnum = UBIFS_MST_LNUM; | 42 | lnum = UBIFS_MST_LNUM; |
42 | 43 | ||
43 | sleb = ubifs_scan(c, lnum, 0, c->sbuf); | 44 | sleb = ubifs_scan(c, lnum, 0, c->sbuf, 1); |
44 | if (IS_ERR(sleb)) | 45 | if (IS_ERR(sleb)) |
45 | return PTR_ERR(sleb); | 46 | return PTR_ERR(sleb); |
46 | nodes_cnt = sleb->nodes_cnt; | 47 | nodes_cnt = sleb->nodes_cnt; |
@@ -48,7 +49,7 @@ static int scan_for_master(struct ubifs_info *c) | |||
48 | snod = list_entry(sleb->nodes.prev, struct ubifs_scan_node, | 49 | snod = list_entry(sleb->nodes.prev, struct ubifs_scan_node, |
49 | list); | 50 | list); |
50 | if (snod->type != UBIFS_MST_NODE) | 51 | if (snod->type != UBIFS_MST_NODE) |
51 | goto out; | 52 | goto out_dump; |
52 | memcpy(c->mst_node, snod->node, snod->len); | 53 | memcpy(c->mst_node, snod->node, snod->len); |
53 | offs = snod->offs; | 54 | offs = snod->offs; |
54 | } | 55 | } |
@@ -56,7 +57,7 @@ static int scan_for_master(struct ubifs_info *c) | |||
56 | 57 | ||
57 | lnum += 1; | 58 | lnum += 1; |
58 | 59 | ||
59 | sleb = ubifs_scan(c, lnum, 0, c->sbuf); | 60 | sleb = ubifs_scan(c, lnum, 0, c->sbuf, 1); |
60 | if (IS_ERR(sleb)) | 61 | if (IS_ERR(sleb)) |
61 | return PTR_ERR(sleb); | 62 | return PTR_ERR(sleb); |
62 | if (sleb->nodes_cnt != nodes_cnt) | 63 | if (sleb->nodes_cnt != nodes_cnt) |
@@ -65,7 +66,7 @@ static int scan_for_master(struct ubifs_info *c) | |||
65 | goto out; | 66 | goto out; |
66 | snod = list_entry(sleb->nodes.prev, struct ubifs_scan_node, list); | 67 | snod = list_entry(sleb->nodes.prev, struct ubifs_scan_node, list); |
67 | if (snod->type != UBIFS_MST_NODE) | 68 | if (snod->type != UBIFS_MST_NODE) |
68 | goto out; | 69 | goto out_dump; |
69 | if (snod->offs != offs) | 70 | if (snod->offs != offs) |
70 | goto out; | 71 | goto out; |
71 | if (memcmp((void *)c->mst_node + UBIFS_CH_SZ, | 72 | if (memcmp((void *)c->mst_node + UBIFS_CH_SZ, |
@@ -78,6 +79,12 @@ static int scan_for_master(struct ubifs_info *c) | |||
78 | 79 | ||
79 | out: | 80 | out: |
80 | ubifs_scan_destroy(sleb); | 81 | ubifs_scan_destroy(sleb); |
82 | return -EUCLEAN; | ||
83 | |||
84 | out_dump: | ||
85 | ubifs_err("unexpected node type %d master LEB %d:%d", | ||
86 | snod->type, lnum, snod->offs); | ||
87 | ubifs_scan_destroy(sleb); | ||
81 | return -EINVAL; | 88 | return -EINVAL; |
82 | } | 89 | } |
83 | 90 | ||
@@ -256,7 +263,8 @@ int ubifs_read_master(struct ubifs_info *c) | |||
256 | 263 | ||
257 | err = scan_for_master(c); | 264 | err = scan_for_master(c); |
258 | if (err) { | 265 | if (err) { |
259 | err = ubifs_recover_master_node(c); | 266 | if (err == -EUCLEAN) |
267 | err = ubifs_recover_master_node(c); | ||
260 | if (err) | 268 | if (err) |
261 | /* | 269 | /* |
262 | * Note, we do not free 'c->mst_node' here because the | 270 | * Note, we do not free 'c->mst_node' here because the |
diff --git a/fs/ubifs/orphan.c b/fs/ubifs/orphan.c index 152a7b34a141..82009c74b6a3 100644 --- a/fs/ubifs/orphan.c +++ b/fs/ubifs/orphan.c | |||
@@ -670,9 +670,10 @@ static int kill_orphans(struct ubifs_info *c) | |||
670 | struct ubifs_scan_leb *sleb; | 670 | struct ubifs_scan_leb *sleb; |
671 | 671 | ||
672 | dbg_rcvry("LEB %d", lnum); | 672 | dbg_rcvry("LEB %d", lnum); |
673 | sleb = ubifs_scan(c, lnum, 0, c->sbuf); | 673 | sleb = ubifs_scan(c, lnum, 0, c->sbuf, 1); |
674 | if (IS_ERR(sleb)) { | 674 | if (IS_ERR(sleb)) { |
675 | sleb = ubifs_recover_leb(c, lnum, 0, c->sbuf, 0); | 675 | if (PTR_ERR(sleb) == -EUCLEAN) |
676 | sleb = ubifs_recover_leb(c, lnum, 0, c->sbuf, 0); | ||
676 | if (IS_ERR(sleb)) { | 677 | if (IS_ERR(sleb)) { |
677 | err = PTR_ERR(sleb); | 678 | err = PTR_ERR(sleb); |
678 | break; | 679 | break; |
@@ -899,7 +900,7 @@ static int dbg_scan_orphans(struct ubifs_info *c, struct check_info *ci) | |||
899 | for (lnum = c->orph_first; lnum <= c->orph_last; lnum++) { | 900 | for (lnum = c->orph_first; lnum <= c->orph_last; lnum++) { |
900 | struct ubifs_scan_leb *sleb; | 901 | struct ubifs_scan_leb *sleb; |
901 | 902 | ||
902 | sleb = ubifs_scan(c, lnum, 0, c->dbg->buf); | 903 | sleb = ubifs_scan(c, lnum, 0, c->dbg->buf, 0); |
903 | if (IS_ERR(sleb)) { | 904 | if (IS_ERR(sleb)) { |
904 | err = PTR_ERR(sleb); | 905 | err = PTR_ERR(sleb); |
905 | break; | 906 | break; |
diff --git a/fs/ubifs/recovery.c b/fs/ubifs/recovery.c index e5f6cf8a1155..f94ddf7efba0 100644 --- a/fs/ubifs/recovery.c +++ b/fs/ubifs/recovery.c | |||
@@ -286,7 +286,7 @@ int ubifs_recover_master_node(struct ubifs_info *c) | |||
286 | mst = mst2; | 286 | mst = mst2; |
287 | } | 287 | } |
288 | 288 | ||
289 | dbg_rcvry("recovered master node from LEB %d", | 289 | ubifs_msg("recovered master node from LEB %d", |
290 | (mst == mst1 ? UBIFS_MST_LNUM : UBIFS_MST_LNUM + 1)); | 290 | (mst == mst1 ? UBIFS_MST_LNUM : UBIFS_MST_LNUM + 1)); |
291 | 291 | ||
292 | memcpy(c->mst_node, mst, UBIFS_MST_NODE_SZ); | 292 | memcpy(c->mst_node, mst, UBIFS_MST_NODE_SZ); |
@@ -790,7 +790,7 @@ struct ubifs_scan_leb *ubifs_recover_log_leb(struct ubifs_info *c, int lnum, | |||
790 | * We can only recover at the end of the log, so check that the | 790 | * We can only recover at the end of the log, so check that the |
791 | * next log LEB is empty or out of date. | 791 | * next log LEB is empty or out of date. |
792 | */ | 792 | */ |
793 | sleb = ubifs_scan(c, next_lnum, 0, sbuf); | 793 | sleb = ubifs_scan(c, next_lnum, 0, sbuf, 0); |
794 | if (IS_ERR(sleb)) | 794 | if (IS_ERR(sleb)) |
795 | return sleb; | 795 | return sleb; |
796 | if (sleb->nodes_cnt) { | 796 | if (sleb->nodes_cnt) { |
diff --git a/fs/ubifs/replay.c b/fs/ubifs/replay.c index 2970500f32df..5c2d6d759a3e 100644 --- a/fs/ubifs/replay.c +++ b/fs/ubifs/replay.c | |||
@@ -506,7 +506,7 @@ static int replay_bud(struct ubifs_info *c, int lnum, int offs, int jhead, | |||
506 | if (c->need_recovery) | 506 | if (c->need_recovery) |
507 | sleb = ubifs_recover_leb(c, lnum, offs, c->sbuf, jhead != GCHD); | 507 | sleb = ubifs_recover_leb(c, lnum, offs, c->sbuf, jhead != GCHD); |
508 | else | 508 | else |
509 | sleb = ubifs_scan(c, lnum, offs, c->sbuf); | 509 | sleb = ubifs_scan(c, lnum, offs, c->sbuf, 0); |
510 | if (IS_ERR(sleb)) | 510 | if (IS_ERR(sleb)) |
511 | return PTR_ERR(sleb); | 511 | return PTR_ERR(sleb); |
512 | 512 | ||
@@ -836,8 +836,8 @@ static int replay_log_leb(struct ubifs_info *c, int lnum, int offs, void *sbuf) | |||
836 | const struct ubifs_cs_node *node; | 836 | const struct ubifs_cs_node *node; |
837 | 837 | ||
838 | dbg_mnt("replay log LEB %d:%d", lnum, offs); | 838 | dbg_mnt("replay log LEB %d:%d", lnum, offs); |
839 | sleb = ubifs_scan(c, lnum, offs, sbuf); | 839 | sleb = ubifs_scan(c, lnum, offs, sbuf, c->need_recovery); |
840 | if (IS_ERR(sleb) ) { | 840 | if (IS_ERR(sleb)) { |
841 | if (PTR_ERR(sleb) != -EUCLEAN || !c->need_recovery) | 841 | if (PTR_ERR(sleb) != -EUCLEAN || !c->need_recovery) |
842 | return PTR_ERR(sleb); | 842 | return PTR_ERR(sleb); |
843 | sleb = ubifs_recover_log_leb(c, lnum, offs, sbuf); | 843 | sleb = ubifs_recover_log_leb(c, lnum, offs, sbuf); |
diff --git a/fs/ubifs/scan.c b/fs/ubifs/scan.c index 892ebfee4fe5..96c525384191 100644 --- a/fs/ubifs/scan.c +++ b/fs/ubifs/scan.c | |||
@@ -108,10 +108,9 @@ int ubifs_scan_a_node(const struct ubifs_info *c, void *buf, int len, int lnum, | |||
108 | 108 | ||
109 | /* Make the node pads to 8-byte boundary */ | 109 | /* Make the node pads to 8-byte boundary */ |
110 | if ((node_len + pad_len) & 7) { | 110 | if ((node_len + pad_len) & 7) { |
111 | if (!quiet) { | 111 | if (!quiet) |
112 | dbg_err("bad padding length %d - %d", | 112 | dbg_err("bad padding length %d - %d", |
113 | offs, offs + node_len + pad_len); | 113 | offs, offs + node_len + pad_len); |
114 | } | ||
115 | return SCANNED_A_BAD_PAD_NODE; | 114 | return SCANNED_A_BAD_PAD_NODE; |
116 | } | 115 | } |
117 | 116 | ||
@@ -253,15 +252,19 @@ void ubifs_scanned_corruption(const struct ubifs_info *c, int lnum, int offs, | |||
253 | * @c: UBIFS file-system description object | 252 | * @c: UBIFS file-system description object |
254 | * @lnum: logical eraseblock number | 253 | * @lnum: logical eraseblock number |
255 | * @offs: offset to start at (usually zero) | 254 | * @offs: offset to start at (usually zero) |
256 | * @sbuf: scan buffer (must be c->leb_size) | 255 | * @sbuf: scan buffer (must be of @c->leb_size bytes in size) |
256 | * @quiet: print no messages | ||
257 | * | 257 | * |
258 | * This function scans LEB number @lnum and returns complete information about | 258 | * This function scans LEB number @lnum and returns complete information about |
259 | * its contents. Returns the scaned information in case of success and, | 259 | * its contents. Returns the scaned information in case of success and, |
260 | * %-EUCLEAN if the LEB neads recovery, and other negative error codes in case | 260 | * %-EUCLEAN if the LEB neads recovery, and other negative error codes in case |
261 | * of failure. | 261 | * of failure. |
262 | * | ||
263 | * If @quiet is non-zero, this function does not print large and scary | ||
264 | * error messages and flash dumps in case of errors. | ||
262 | */ | 265 | */ |
263 | struct ubifs_scan_leb *ubifs_scan(const struct ubifs_info *c, int lnum, | 266 | struct ubifs_scan_leb *ubifs_scan(const struct ubifs_info *c, int lnum, |
264 | int offs, void *sbuf) | 267 | int offs, void *sbuf, int quiet) |
265 | { | 268 | { |
266 | void *buf = sbuf + offs; | 269 | void *buf = sbuf + offs; |
267 | int err, len = c->leb_size - offs; | 270 | int err, len = c->leb_size - offs; |
@@ -280,7 +283,7 @@ struct ubifs_scan_leb *ubifs_scan(const struct ubifs_info *c, int lnum, | |||
280 | 283 | ||
281 | cond_resched(); | 284 | cond_resched(); |
282 | 285 | ||
283 | ret = ubifs_scan_a_node(c, buf, len, lnum, offs, 0); | 286 | ret = ubifs_scan_a_node(c, buf, len, lnum, offs, quiet); |
284 | if (ret > 0) { | 287 | if (ret > 0) { |
285 | /* Padding bytes or a valid padding node */ | 288 | /* Padding bytes or a valid padding node */ |
286 | offs += ret; | 289 | offs += ret; |
@@ -320,7 +323,9 @@ struct ubifs_scan_leb *ubifs_scan(const struct ubifs_info *c, int lnum, | |||
320 | } | 323 | } |
321 | 324 | ||
322 | if (offs % c->min_io_size) { | 325 | if (offs % c->min_io_size) { |
323 | ubifs_err("empty space starts at non-aligned offset %d", offs); | 326 | if (!quiet) |
327 | ubifs_err("empty space starts at non-aligned offset %d", | ||
328 | offs); | ||
324 | goto corrupted;; | 329 | goto corrupted;; |
325 | } | 330 | } |
326 | 331 | ||
@@ -331,18 +336,25 @@ struct ubifs_scan_leb *ubifs_scan(const struct ubifs_info *c, int lnum, | |||
331 | break; | 336 | break; |
332 | for (; len; offs++, buf++, len--) | 337 | for (; len; offs++, buf++, len--) |
333 | if (*(uint8_t *)buf != 0xff) { | 338 | if (*(uint8_t *)buf != 0xff) { |
334 | ubifs_err("corrupt empty space at LEB %d:%d", | 339 | if (!quiet) |
335 | lnum, offs); | 340 | ubifs_err("corrupt empty space at LEB %d:%d", |
341 | lnum, offs); | ||
336 | goto corrupted; | 342 | goto corrupted; |
337 | } | 343 | } |
338 | 344 | ||
339 | return sleb; | 345 | return sleb; |
340 | 346 | ||
341 | corrupted: | 347 | corrupted: |
342 | ubifs_scanned_corruption(c, lnum, offs, buf); | 348 | if (!quiet) { |
349 | ubifs_scanned_corruption(c, lnum, offs, buf); | ||
350 | ubifs_err("LEB %d scanning failed", lnum); | ||
351 | } | ||
343 | err = -EUCLEAN; | 352 | err = -EUCLEAN; |
353 | ubifs_scan_destroy(sleb); | ||
354 | return ERR_PTR(err); | ||
355 | |||
344 | error: | 356 | error: |
345 | ubifs_err("LEB %d scanning failed", lnum); | 357 | ubifs_err("LEB %d scanning failed, error %d", lnum, err); |
346 | ubifs_scan_destroy(sleb); | 358 | ubifs_scan_destroy(sleb); |
347 | return ERR_PTR(err); | 359 | return ERR_PTR(err); |
348 | } | 360 | } |
diff --git a/fs/ubifs/super.c b/fs/ubifs/super.c index 51763aa8f4de..333e181ee987 100644 --- a/fs/ubifs/super.c +++ b/fs/ubifs/super.c | |||
@@ -36,7 +36,6 @@ | |||
36 | #include <linux/mount.h> | 36 | #include <linux/mount.h> |
37 | #include <linux/math64.h> | 37 | #include <linux/math64.h> |
38 | #include <linux/writeback.h> | 38 | #include <linux/writeback.h> |
39 | #include <linux/smp_lock.h> | ||
40 | #include "ubifs.h" | 39 | #include "ubifs.h" |
41 | 40 | ||
42 | /* | 41 | /* |
@@ -318,6 +317,8 @@ static int ubifs_write_inode(struct inode *inode, int wait) | |||
318 | if (err) | 317 | if (err) |
319 | ubifs_err("can't write inode %lu, error %d", | 318 | ubifs_err("can't write inode %lu, error %d", |
320 | inode->i_ino, err); | 319 | inode->i_ino, err); |
320 | else | ||
321 | err = dbg_check_inode_size(c, inode, ui->ui_size); | ||
321 | } | 322 | } |
322 | 323 | ||
323 | ui->dirty = 0; | 324 | ui->dirty = 0; |
@@ -448,17 +449,6 @@ static int ubifs_sync_fs(struct super_block *sb, int wait) | |||
448 | return 0; | 449 | return 0; |
449 | 450 | ||
450 | /* | 451 | /* |
451 | * VFS calls '->sync_fs()' before synchronizing all dirty inodes and | ||
452 | * pages, so synchronize them first, then commit the journal. Strictly | ||
453 | * speaking, it is not necessary to commit the journal here, | ||
454 | * synchronizing write-buffers would be enough. But committing makes | ||
455 | * UBIFS free space predictions much more accurate, so we want to let | ||
456 | * the user be able to get more accurate results of 'statfs()' after | ||
457 | * they synchronize the file system. | ||
458 | */ | ||
459 | sync_inodes_sb(sb); | ||
460 | |||
461 | /* | ||
462 | * Synchronize write buffers, because 'ubifs_run_commit()' does not | 452 | * Synchronize write buffers, because 'ubifs_run_commit()' does not |
463 | * do this if it waits for an already running commit. | 453 | * do this if it waits for an already running commit. |
464 | */ | 454 | */ |
@@ -468,6 +458,13 @@ static int ubifs_sync_fs(struct super_block *sb, int wait) | |||
468 | return err; | 458 | return err; |
469 | } | 459 | } |
470 | 460 | ||
461 | /* | ||
462 | * Strictly speaking, it is not necessary to commit the journal here, | ||
463 | * synchronizing write-buffers would be enough. But committing makes | ||
464 | * UBIFS free space predictions much more accurate, so we want to let | ||
465 | * the user be able to get more accurate results of 'statfs()' after | ||
466 | * they synchronize the file system. | ||
467 | */ | ||
471 | err = ubifs_run_commit(c); | 468 | err = ubifs_run_commit(c); |
472 | if (err) | 469 | if (err) |
473 | return err; | 470 | return err; |
@@ -1720,8 +1717,6 @@ static void ubifs_put_super(struct super_block *sb) | |||
1720 | ubifs_msg("un-mount UBI device %d, volume %d", c->vi.ubi_num, | 1717 | ubifs_msg("un-mount UBI device %d, volume %d", c->vi.ubi_num, |
1721 | c->vi.vol_id); | 1718 | c->vi.vol_id); |
1722 | 1719 | ||
1723 | lock_kernel(); | ||
1724 | |||
1725 | /* | 1720 | /* |
1726 | * The following asserts are only valid if there has not been a failure | 1721 | * The following asserts are only valid if there has not been a failure |
1727 | * of the media. For example, there will be dirty inodes if we failed | 1722 | * of the media. For example, there will be dirty inodes if we failed |
@@ -1786,8 +1781,6 @@ static void ubifs_put_super(struct super_block *sb) | |||
1786 | ubi_close_volume(c->ubi); | 1781 | ubi_close_volume(c->ubi); |
1787 | mutex_unlock(&c->umount_mutex); | 1782 | mutex_unlock(&c->umount_mutex); |
1788 | kfree(c); | 1783 | kfree(c); |
1789 | |||
1790 | unlock_kernel(); | ||
1791 | } | 1784 | } |
1792 | 1785 | ||
1793 | static int ubifs_remount_fs(struct super_block *sb, int *flags, char *data) | 1786 | static int ubifs_remount_fs(struct super_block *sb, int *flags, char *data) |
@@ -1803,22 +1796,17 @@ static int ubifs_remount_fs(struct super_block *sb, int *flags, char *data) | |||
1803 | return err; | 1796 | return err; |
1804 | } | 1797 | } |
1805 | 1798 | ||
1806 | lock_kernel(); | ||
1807 | if ((sb->s_flags & MS_RDONLY) && !(*flags & MS_RDONLY)) { | 1799 | if ((sb->s_flags & MS_RDONLY) && !(*flags & MS_RDONLY)) { |
1808 | if (c->ro_media) { | 1800 | if (c->ro_media) { |
1809 | ubifs_msg("cannot re-mount due to prior errors"); | 1801 | ubifs_msg("cannot re-mount due to prior errors"); |
1810 | unlock_kernel(); | ||
1811 | return -EROFS; | 1802 | return -EROFS; |
1812 | } | 1803 | } |
1813 | err = ubifs_remount_rw(c); | 1804 | err = ubifs_remount_rw(c); |
1814 | if (err) { | 1805 | if (err) |
1815 | unlock_kernel(); | ||
1816 | return err; | 1806 | return err; |
1817 | } | ||
1818 | } else if (!(sb->s_flags & MS_RDONLY) && (*flags & MS_RDONLY)) { | 1807 | } else if (!(sb->s_flags & MS_RDONLY) && (*flags & MS_RDONLY)) { |
1819 | if (c->ro_media) { | 1808 | if (c->ro_media) { |
1820 | ubifs_msg("cannot re-mount due to prior errors"); | 1809 | ubifs_msg("cannot re-mount due to prior errors"); |
1821 | unlock_kernel(); | ||
1822 | return -EROFS; | 1810 | return -EROFS; |
1823 | } | 1811 | } |
1824 | ubifs_remount_ro(c); | 1812 | ubifs_remount_ro(c); |
@@ -1833,7 +1821,6 @@ static int ubifs_remount_fs(struct super_block *sb, int *flags, char *data) | |||
1833 | } | 1821 | } |
1834 | 1822 | ||
1835 | ubifs_assert(c->lst.taken_empty_lebs > 0); | 1823 | ubifs_assert(c->lst.taken_empty_lebs > 0); |
1836 | unlock_kernel(); | ||
1837 | return 0; | 1824 | return 0; |
1838 | } | 1825 | } |
1839 | 1826 | ||
@@ -1980,6 +1967,7 @@ static int ubifs_fill_super(struct super_block *sb, void *data, int silent) | |||
1980 | if (err) | 1967 | if (err) |
1981 | goto out_bdi; | 1968 | goto out_bdi; |
1982 | 1969 | ||
1970 | sb->s_bdi = &c->bdi; | ||
1983 | sb->s_fs_info = c; | 1971 | sb->s_fs_info = c; |
1984 | sb->s_magic = UBIFS_SUPER_MAGIC; | 1972 | sb->s_magic = UBIFS_SUPER_MAGIC; |
1985 | sb->s_blocksize = UBIFS_BLOCK_SIZE; | 1973 | sb->s_blocksize = UBIFS_BLOCK_SIZE; |
diff --git a/fs/ubifs/tnc.c b/fs/ubifs/tnc.c index f249f7b0d656..e5b1a7d00fa0 100644 --- a/fs/ubifs/tnc.c +++ b/fs/ubifs/tnc.c | |||
@@ -1159,8 +1159,8 @@ static struct ubifs_znode *dirty_cow_bottom_up(struct ubifs_info *c, | |||
1159 | * o exact match, i.e. the found zero-level znode contains key @key, then %1 | 1159 | * o exact match, i.e. the found zero-level znode contains key @key, then %1 |
1160 | * is returned and slot number of the matched branch is stored in @n; | 1160 | * is returned and slot number of the matched branch is stored in @n; |
1161 | * o not exact match, which means that zero-level znode does not contain | 1161 | * o not exact match, which means that zero-level znode does not contain |
1162 | * @key, then %0 is returned and slot number of the closed branch is stored | 1162 | * @key, then %0 is returned and slot number of the closest branch is stored |
1163 | * in @n; | 1163 | * in @n; |
1164 | * o @key is so small that it is even less than the lowest key of the | 1164 | * o @key is so small that it is even less than the lowest key of the |
1165 | * leftmost zero-level node, then %0 is returned and %0 is stored in @n. | 1165 | * leftmost zero-level node, then %0 is returned and %0 is stored in @n. |
1166 | * | 1166 | * |
@@ -1433,7 +1433,7 @@ static int maybe_leb_gced(struct ubifs_info *c, int lnum, int gc_seq1) | |||
1433 | * @lnum: LEB number is returned here | 1433 | * @lnum: LEB number is returned here |
1434 | * @offs: offset is returned here | 1434 | * @offs: offset is returned here |
1435 | * | 1435 | * |
1436 | * This function look up and reads node with key @key. The caller has to make | 1436 | * This function looks up and reads node with key @key. The caller has to make |
1437 | * sure the @node buffer is large enough to fit the node. Returns zero in case | 1437 | * sure the @node buffer is large enough to fit the node. Returns zero in case |
1438 | * of success, %-ENOENT if the node was not found, and a negative error code in | 1438 | * of success, %-ENOENT if the node was not found, and a negative error code in |
1439 | * case of failure. The node location can be returned in @lnum and @offs. | 1439 | * case of failure. The node location can be returned in @lnum and @offs. |
@@ -3268,3 +3268,73 @@ out_unlock: | |||
3268 | mutex_unlock(&c->tnc_mutex); | 3268 | mutex_unlock(&c->tnc_mutex); |
3269 | return err; | 3269 | return err; |
3270 | } | 3270 | } |
3271 | |||
3272 | #ifdef CONFIG_UBIFS_FS_DEBUG | ||
3273 | |||
3274 | /** | ||
3275 | * dbg_check_inode_size - check if inode size is correct. | ||
3276 | * @c: UBIFS file-system description object | ||
3277 | * @inum: inode number | ||
3278 | * @size: inode size | ||
3279 | * | ||
3280 | * This function makes sure that the inode size (@size) is correct and it does | ||
3281 | * not have any pages beyond @size. Returns zero if the inode is OK, %-EINVAL | ||
3282 | * if it has a data page beyond @size, and other negative error code in case of | ||
3283 | * other errors. | ||
3284 | */ | ||
3285 | int dbg_check_inode_size(struct ubifs_info *c, const struct inode *inode, | ||
3286 | loff_t size) | ||
3287 | { | ||
3288 | int err, n; | ||
3289 | union ubifs_key from_key, to_key, *key; | ||
3290 | struct ubifs_znode *znode; | ||
3291 | unsigned int block; | ||
3292 | |||
3293 | if (!S_ISREG(inode->i_mode)) | ||
3294 | return 0; | ||
3295 | if (!(ubifs_chk_flags & UBIFS_CHK_GEN)) | ||
3296 | return 0; | ||
3297 | |||
3298 | block = (size + UBIFS_BLOCK_SIZE - 1) >> UBIFS_BLOCK_SHIFT; | ||
3299 | data_key_init(c, &from_key, inode->i_ino, block); | ||
3300 | highest_data_key(c, &to_key, inode->i_ino); | ||
3301 | |||
3302 | mutex_lock(&c->tnc_mutex); | ||
3303 | err = ubifs_lookup_level0(c, &from_key, &znode, &n); | ||
3304 | if (err < 0) | ||
3305 | goto out_unlock; | ||
3306 | |||
3307 | if (err) { | ||
3308 | err = -EINVAL; | ||
3309 | key = &from_key; | ||
3310 | goto out_dump; | ||
3311 | } | ||
3312 | |||
3313 | err = tnc_next(c, &znode, &n); | ||
3314 | if (err == -ENOENT) { | ||
3315 | err = 0; | ||
3316 | goto out_unlock; | ||
3317 | } | ||
3318 | if (err < 0) | ||
3319 | goto out_unlock; | ||
3320 | |||
3321 | ubifs_assert(err == 0); | ||
3322 | key = &znode->zbranch[n].key; | ||
3323 | if (!key_in_range(c, key, &from_key, &to_key)) | ||
3324 | goto out_unlock; | ||
3325 | |||
3326 | out_dump: | ||
3327 | block = key_block(c, key); | ||
3328 | ubifs_err("inode %lu has size %lld, but there are data at offset %lld " | ||
3329 | "(data key %s)", (unsigned long)inode->i_ino, size, | ||
3330 | ((loff_t)block) << UBIFS_BLOCK_SHIFT, DBGKEY(key)); | ||
3331 | dbg_dump_inode(c, inode); | ||
3332 | dbg_dump_stack(); | ||
3333 | err = -EINVAL; | ||
3334 | |||
3335 | out_unlock: | ||
3336 | mutex_unlock(&c->tnc_mutex); | ||
3337 | return err; | ||
3338 | } | ||
3339 | |||
3340 | #endif /* CONFIG_UBIFS_FS_DEBUG */ | ||
diff --git a/fs/ubifs/tnc_commit.c b/fs/ubifs/tnc_commit.c index fde8d127c768..53288e5d604e 100644 --- a/fs/ubifs/tnc_commit.c +++ b/fs/ubifs/tnc_commit.c | |||
@@ -245,7 +245,7 @@ static int layout_leb_in_gaps(struct ubifs_info *c, int *p) | |||
245 | * it is more comprehensive and less efficient than is needed for this | 245 | * it is more comprehensive and less efficient than is needed for this |
246 | * purpose. | 246 | * purpose. |
247 | */ | 247 | */ |
248 | sleb = ubifs_scan(c, lnum, 0, c->ileb_buf); | 248 | sleb = ubifs_scan(c, lnum, 0, c->ileb_buf, 0); |
249 | c->ileb_len = 0; | 249 | c->ileb_len = 0; |
250 | if (IS_ERR(sleb)) | 250 | if (IS_ERR(sleb)) |
251 | return PTR_ERR(sleb); | 251 | return PTR_ERR(sleb); |
diff --git a/fs/ubifs/ubifs-media.h b/fs/ubifs/ubifs-media.h index 3eee07e0c495..191ca7863fe7 100644 --- a/fs/ubifs/ubifs-media.h +++ b/fs/ubifs/ubifs-media.h | |||
@@ -135,6 +135,13 @@ | |||
135 | /* The key is always at the same position in all keyed nodes */ | 135 | /* The key is always at the same position in all keyed nodes */ |
136 | #define UBIFS_KEY_OFFSET offsetof(struct ubifs_ino_node, key) | 136 | #define UBIFS_KEY_OFFSET offsetof(struct ubifs_ino_node, key) |
137 | 137 | ||
138 | /* Garbage collector journal head number */ | ||
139 | #define UBIFS_GC_HEAD 0 | ||
140 | /* Base journal head number */ | ||
141 | #define UBIFS_BASE_HEAD 1 | ||
142 | /* Data journal head number */ | ||
143 | #define UBIFS_DATA_HEAD 2 | ||
144 | |||
138 | /* | 145 | /* |
139 | * LEB Properties Tree node types. | 146 | * LEB Properties Tree node types. |
140 | * | 147 | * |
diff --git a/fs/ubifs/ubifs.h b/fs/ubifs/ubifs.h index a29349094422..b2d976366a46 100644 --- a/fs/ubifs/ubifs.h +++ b/fs/ubifs/ubifs.h | |||
@@ -105,12 +105,10 @@ | |||
105 | /* Number of non-data journal heads */ | 105 | /* Number of non-data journal heads */ |
106 | #define NONDATA_JHEADS_CNT 2 | 106 | #define NONDATA_JHEADS_CNT 2 |
107 | 107 | ||
108 | /* Garbage collector head */ | 108 | /* Shorter names for journal head numbers for internal usage */ |
109 | #define GCHD 0 | 109 | #define GCHD UBIFS_GC_HEAD |
110 | /* Base journal head number */ | 110 | #define BASEHD UBIFS_BASE_HEAD |
111 | #define BASEHD 1 | 111 | #define DATAHD UBIFS_DATA_HEAD |
112 | /* First "general purpose" journal head */ | ||
113 | #define DATAHD 2 | ||
114 | 112 | ||
115 | /* 'No change' value for 'ubifs_change_lp()' */ | 113 | /* 'No change' value for 'ubifs_change_lp()' */ |
116 | #define LPROPS_NC 0x80000001 | 114 | #define LPROPS_NC 0x80000001 |
@@ -1451,7 +1449,7 @@ int ubifs_sync_wbufs_by_inode(struct ubifs_info *c, struct inode *inode); | |||
1451 | 1449 | ||
1452 | /* scan.c */ | 1450 | /* scan.c */ |
1453 | struct ubifs_scan_leb *ubifs_scan(const struct ubifs_info *c, int lnum, | 1451 | struct ubifs_scan_leb *ubifs_scan(const struct ubifs_info *c, int lnum, |
1454 | int offs, void *sbuf); | 1452 | int offs, void *sbuf, int quiet); |
1455 | void ubifs_scan_destroy(struct ubifs_scan_leb *sleb); | 1453 | void ubifs_scan_destroy(struct ubifs_scan_leb *sleb); |
1456 | int ubifs_scan_a_node(const struct ubifs_info *c, void *buf, int len, int lnum, | 1454 | int ubifs_scan_a_node(const struct ubifs_info *c, void *buf, int len, int lnum, |
1457 | int offs, int quiet); | 1455 | int offs, int quiet); |
@@ -1676,6 +1674,7 @@ const struct ubifs_lprops *ubifs_fast_find_free(struct ubifs_info *c); | |||
1676 | const struct ubifs_lprops *ubifs_fast_find_empty(struct ubifs_info *c); | 1674 | const struct ubifs_lprops *ubifs_fast_find_empty(struct ubifs_info *c); |
1677 | const struct ubifs_lprops *ubifs_fast_find_freeable(struct ubifs_info *c); | 1675 | const struct ubifs_lprops *ubifs_fast_find_freeable(struct ubifs_info *c); |
1678 | const struct ubifs_lprops *ubifs_fast_find_frdi_idx(struct ubifs_info *c); | 1676 | const struct ubifs_lprops *ubifs_fast_find_frdi_idx(struct ubifs_info *c); |
1677 | int ubifs_calc_dark(const struct ubifs_info *c, int spc); | ||
1679 | 1678 | ||
1680 | /* file.c */ | 1679 | /* file.c */ |
1681 | int ubifs_fsync(struct file *file, struct dentry *dentry, int datasync); | 1680 | int ubifs_fsync(struct file *file, struct dentry *dentry, int datasync); |
diff --git a/fs/ubifs/xattr.c b/fs/ubifs/xattr.c index adafcf556531..195830f47569 100644 --- a/fs/ubifs/xattr.c +++ b/fs/ubifs/xattr.c | |||
@@ -78,9 +78,9 @@ enum { | |||
78 | SECURITY_XATTR, | 78 | SECURITY_XATTR, |
79 | }; | 79 | }; |
80 | 80 | ||
81 | static struct inode_operations none_inode_operations; | 81 | static const struct inode_operations none_inode_operations; |
82 | static struct address_space_operations none_address_operations; | 82 | static const struct address_space_operations none_address_operations; |
83 | static struct file_operations none_file_operations; | 83 | static const struct file_operations none_file_operations; |
84 | 84 | ||
85 | /** | 85 | /** |
86 | * create_xattr - create an extended attribute. | 86 | * create_xattr - create an extended attribute. |
diff --git a/fs/xfs/linux-2.6/xfs_aops.c b/fs/xfs/linux-2.6/xfs_aops.c index 52f3fc63571a..381854461b28 100644 --- a/fs/xfs/linux-2.6/xfs_aops.c +++ b/fs/xfs/linux-2.6/xfs_aops.c | |||
@@ -216,7 +216,6 @@ xfs_setfilesize( | |||
216 | if (ip->i_d.di_size < isize) { | 216 | if (ip->i_d.di_size < isize) { |
217 | ip->i_d.di_size = isize; | 217 | ip->i_d.di_size = isize; |
218 | ip->i_update_core = 1; | 218 | ip->i_update_core = 1; |
219 | ip->i_update_size = 1; | ||
220 | xfs_mark_inode_dirty_sync(ip); | 219 | xfs_mark_inode_dirty_sync(ip); |
221 | } | 220 | } |
222 | 221 | ||
diff --git a/fs/xfs/linux-2.6/xfs_file.c b/fs/xfs/linux-2.6/xfs_file.c index 0542fd507649..988d8f87bc0f 100644 --- a/fs/xfs/linux-2.6/xfs_file.c +++ b/fs/xfs/linux-2.6/xfs_file.c | |||
@@ -172,12 +172,21 @@ xfs_file_release( | |||
172 | */ | 172 | */ |
173 | STATIC int | 173 | STATIC int |
174 | xfs_file_fsync( | 174 | xfs_file_fsync( |
175 | struct file *filp, | 175 | struct file *file, |
176 | struct dentry *dentry, | 176 | struct dentry *dentry, |
177 | int datasync) | 177 | int datasync) |
178 | { | 178 | { |
179 | xfs_iflags_clear(XFS_I(dentry->d_inode), XFS_ITRUNCATED); | 179 | struct inode *inode = dentry->d_inode; |
180 | return -xfs_fsync(XFS_I(dentry->d_inode)); | 180 | struct xfs_inode *ip = XFS_I(inode); |
181 | int error; | ||
182 | |||
183 | /* capture size updates in I/O completion before writing the inode. */ | ||
184 | error = filemap_fdatawait(inode->i_mapping); | ||
185 | if (error) | ||
186 | return error; | ||
187 | |||
188 | xfs_iflags_clear(ip, XFS_ITRUNCATED); | ||
189 | return -xfs_fsync(ip); | ||
181 | } | 190 | } |
182 | 191 | ||
183 | STATIC int | 192 | STATIC int |
diff --git a/fs/xfs/linux-2.6/xfs_iops.c b/fs/xfs/linux-2.6/xfs_iops.c index 6c32f1d63d8c..da0159d99f82 100644 --- a/fs/xfs/linux-2.6/xfs_iops.c +++ b/fs/xfs/linux-2.6/xfs_iops.c | |||
@@ -43,7 +43,6 @@ | |||
43 | #include "xfs_error.h" | 43 | #include "xfs_error.h" |
44 | #include "xfs_itable.h" | 44 | #include "xfs_itable.h" |
45 | #include "xfs_rw.h" | 45 | #include "xfs_rw.h" |
46 | #include "xfs_acl.h" | ||
47 | #include "xfs_attr.h" | 46 | #include "xfs_attr.h" |
48 | #include "xfs_buf_item.h" | 47 | #include "xfs_buf_item.h" |
49 | #include "xfs_utils.h" | 48 | #include "xfs_utils.h" |
diff --git a/fs/xfs/linux-2.6/xfs_lrw.c b/fs/xfs/linux-2.6/xfs_lrw.c index fde63a3c4ecc..49e4a6aea73c 100644 --- a/fs/xfs/linux-2.6/xfs_lrw.c +++ b/fs/xfs/linux-2.6/xfs_lrw.c | |||
@@ -812,19 +812,21 @@ write_retry: | |||
812 | 812 | ||
813 | /* Handle various SYNC-type writes */ | 813 | /* Handle various SYNC-type writes */ |
814 | if ((file->f_flags & O_SYNC) || IS_SYNC(inode)) { | 814 | if ((file->f_flags & O_SYNC) || IS_SYNC(inode)) { |
815 | loff_t end = pos + ret - 1; | ||
815 | int error2; | 816 | int error2; |
816 | 817 | ||
817 | xfs_iunlock(xip, iolock); | 818 | xfs_iunlock(xip, iolock); |
818 | if (need_i_mutex) | 819 | if (need_i_mutex) |
819 | mutex_unlock(&inode->i_mutex); | 820 | mutex_unlock(&inode->i_mutex); |
820 | error2 = filemap_write_and_wait_range(mapping, pos, | 821 | |
821 | pos + ret - 1); | 822 | error2 = filemap_write_and_wait_range(mapping, pos, end); |
822 | if (!error) | 823 | if (!error) |
823 | error = error2; | 824 | error = error2; |
824 | if (need_i_mutex) | 825 | if (need_i_mutex) |
825 | mutex_lock(&inode->i_mutex); | 826 | mutex_lock(&inode->i_mutex); |
826 | xfs_ilock(xip, iolock); | 827 | xfs_ilock(xip, iolock); |
827 | error2 = xfs_write_sync_logforce(mp, xip); | 828 | |
829 | error2 = xfs_fsync(xip); | ||
828 | if (!error) | 830 | if (!error) |
829 | error = error2; | 831 | error = error2; |
830 | } | 832 | } |
diff --git a/fs/xfs/linux-2.6/xfs_quotaops.c b/fs/xfs/linux-2.6/xfs_quotaops.c index cb6e2cca214f..9e41f91aa269 100644 --- a/fs/xfs/linux-2.6/xfs_quotaops.c +++ b/fs/xfs/linux-2.6/xfs_quotaops.c | |||
@@ -150,7 +150,7 @@ xfs_fs_set_xquota( | |||
150 | return -xfs_qm_scall_setqlim(mp, id, xfs_quota_type(type), fdq); | 150 | return -xfs_qm_scall_setqlim(mp, id, xfs_quota_type(type), fdq); |
151 | } | 151 | } |
152 | 152 | ||
153 | struct quotactl_ops xfs_quotactl_operations = { | 153 | const struct quotactl_ops xfs_quotactl_operations = { |
154 | .quota_sync = xfs_fs_quota_sync, | 154 | .quota_sync = xfs_fs_quota_sync, |
155 | .get_xstate = xfs_fs_get_xstate, | 155 | .get_xstate = xfs_fs_get_xstate, |
156 | .set_xstate = xfs_fs_set_xstate, | 156 | .set_xstate = xfs_fs_set_xstate, |
diff --git a/fs/xfs/linux-2.6/xfs_stats.c b/fs/xfs/linux-2.6/xfs_stats.c index c3526d445f6a..76fdc5861932 100644 --- a/fs/xfs/linux-2.6/xfs_stats.c +++ b/fs/xfs/linux-2.6/xfs_stats.c | |||
@@ -20,16 +20,9 @@ | |||
20 | 20 | ||
21 | DEFINE_PER_CPU(struct xfsstats, xfsstats); | 21 | DEFINE_PER_CPU(struct xfsstats, xfsstats); |
22 | 22 | ||
23 | STATIC int | 23 | static int xfs_stat_proc_show(struct seq_file *m, void *v) |
24 | xfs_read_xfsstats( | ||
25 | char *buffer, | ||
26 | char **start, | ||
27 | off_t offset, | ||
28 | int count, | ||
29 | int *eof, | ||
30 | void *data) | ||
31 | { | 24 | { |
32 | int c, i, j, len, val; | 25 | int c, i, j, val; |
33 | __uint64_t xs_xstrat_bytes = 0; | 26 | __uint64_t xs_xstrat_bytes = 0; |
34 | __uint64_t xs_write_bytes = 0; | 27 | __uint64_t xs_write_bytes = 0; |
35 | __uint64_t xs_read_bytes = 0; | 28 | __uint64_t xs_read_bytes = 0; |
@@ -60,18 +53,18 @@ xfs_read_xfsstats( | |||
60 | }; | 53 | }; |
61 | 54 | ||
62 | /* Loop over all stats groups */ | 55 | /* Loop over all stats groups */ |
63 | for (i=j=len = 0; i < ARRAY_SIZE(xstats); i++) { | 56 | for (i=j = 0; i < ARRAY_SIZE(xstats); i++) { |
64 | len += sprintf(buffer + len, "%s", xstats[i].desc); | 57 | seq_printf(m, "%s", xstats[i].desc); |
65 | /* inner loop does each group */ | 58 | /* inner loop does each group */ |
66 | while (j < xstats[i].endpoint) { | 59 | while (j < xstats[i].endpoint) { |
67 | val = 0; | 60 | val = 0; |
68 | /* sum over all cpus */ | 61 | /* sum over all cpus */ |
69 | for_each_possible_cpu(c) | 62 | for_each_possible_cpu(c) |
70 | val += *(((__u32*)&per_cpu(xfsstats, c) + j)); | 63 | val += *(((__u32*)&per_cpu(xfsstats, c) + j)); |
71 | len += sprintf(buffer + len, " %u", val); | 64 | seq_printf(m, " %u", val); |
72 | j++; | 65 | j++; |
73 | } | 66 | } |
74 | buffer[len++] = '\n'; | 67 | seq_putc(m, '\n'); |
75 | } | 68 | } |
76 | /* extra precision counters */ | 69 | /* extra precision counters */ |
77 | for_each_possible_cpu(i) { | 70 | for_each_possible_cpu(i) { |
@@ -80,36 +73,38 @@ xfs_read_xfsstats( | |||
80 | xs_read_bytes += per_cpu(xfsstats, i).xs_read_bytes; | 73 | xs_read_bytes += per_cpu(xfsstats, i).xs_read_bytes; |
81 | } | 74 | } |
82 | 75 | ||
83 | len += sprintf(buffer + len, "xpc %Lu %Lu %Lu\n", | 76 | seq_printf(m, "xpc %Lu %Lu %Lu\n", |
84 | xs_xstrat_bytes, xs_write_bytes, xs_read_bytes); | 77 | xs_xstrat_bytes, xs_write_bytes, xs_read_bytes); |
85 | len += sprintf(buffer + len, "debug %u\n", | 78 | seq_printf(m, "debug %u\n", |
86 | #if defined(DEBUG) | 79 | #if defined(DEBUG) |
87 | 1); | 80 | 1); |
88 | #else | 81 | #else |
89 | 0); | 82 | 0); |
90 | #endif | 83 | #endif |
84 | return 0; | ||
85 | } | ||
91 | 86 | ||
92 | if (offset >= len) { | 87 | static int xfs_stat_proc_open(struct inode *inode, struct file *file) |
93 | *start = buffer; | 88 | { |
94 | *eof = 1; | 89 | return single_open(file, xfs_stat_proc_show, NULL); |
95 | return 0; | ||
96 | } | ||
97 | *start = buffer + offset; | ||
98 | if ((len -= offset) > count) | ||
99 | return count; | ||
100 | *eof = 1; | ||
101 | |||
102 | return len; | ||
103 | } | 90 | } |
104 | 91 | ||
92 | static const struct file_operations xfs_stat_proc_fops = { | ||
93 | .owner = THIS_MODULE, | ||
94 | .open = xfs_stat_proc_open, | ||
95 | .read = seq_read, | ||
96 | .llseek = seq_lseek, | ||
97 | .release = single_release, | ||
98 | }; | ||
99 | |||
105 | int | 100 | int |
106 | xfs_init_procfs(void) | 101 | xfs_init_procfs(void) |
107 | { | 102 | { |
108 | if (!proc_mkdir("fs/xfs", NULL)) | 103 | if (!proc_mkdir("fs/xfs", NULL)) |
109 | goto out; | 104 | goto out; |
110 | 105 | ||
111 | if (!create_proc_read_entry("fs/xfs/stat", 0, NULL, | 106 | if (!proc_create("fs/xfs/stat", 0, NULL, |
112 | xfs_read_xfsstats, NULL)) | 107 | &xfs_stat_proc_fops)) |
113 | goto out_remove_entry; | 108 | goto out_remove_entry; |
114 | return 0; | 109 | return 0; |
115 | 110 | ||
diff --git a/fs/xfs/linux-2.6/xfs_super.c b/fs/xfs/linux-2.6/xfs_super.c index a220d36f789b..bdd41c8c342f 100644 --- a/fs/xfs/linux-2.6/xfs_super.c +++ b/fs/xfs/linux-2.6/xfs_super.c | |||
@@ -67,7 +67,7 @@ | |||
67 | #include <linux/freezer.h> | 67 | #include <linux/freezer.h> |
68 | #include <linux/parser.h> | 68 | #include <linux/parser.h> |
69 | 69 | ||
70 | static struct super_operations xfs_super_operations; | 70 | static const struct super_operations xfs_super_operations; |
71 | static kmem_zone_t *xfs_ioend_zone; | 71 | static kmem_zone_t *xfs_ioend_zone; |
72 | mempool_t *xfs_ioend_pool; | 72 | mempool_t *xfs_ioend_pool; |
73 | 73 | ||
@@ -579,15 +579,19 @@ xfs_showargs( | |||
579 | else if (mp->m_qflags & XFS_UQUOTA_ACCT) | 579 | else if (mp->m_qflags & XFS_UQUOTA_ACCT) |
580 | seq_puts(m, "," MNTOPT_UQUOTANOENF); | 580 | seq_puts(m, "," MNTOPT_UQUOTANOENF); |
581 | 581 | ||
582 | if (mp->m_qflags & (XFS_PQUOTA_ACCT|XFS_OQUOTA_ENFD)) | 582 | /* Either project or group quotas can be active, not both */ |
583 | seq_puts(m, "," MNTOPT_PRJQUOTA); | 583 | |
584 | else if (mp->m_qflags & XFS_PQUOTA_ACCT) | 584 | if (mp->m_qflags & XFS_PQUOTA_ACCT) { |
585 | seq_puts(m, "," MNTOPT_PQUOTANOENF); | 585 | if (mp->m_qflags & XFS_OQUOTA_ENFD) |
586 | 586 | seq_puts(m, "," MNTOPT_PRJQUOTA); | |
587 | if (mp->m_qflags & (XFS_GQUOTA_ACCT|XFS_OQUOTA_ENFD)) | 587 | else |
588 | seq_puts(m, "," MNTOPT_GRPQUOTA); | 588 | seq_puts(m, "," MNTOPT_PQUOTANOENF); |
589 | else if (mp->m_qflags & XFS_GQUOTA_ACCT) | 589 | } else if (mp->m_qflags & XFS_GQUOTA_ACCT) { |
590 | seq_puts(m, "," MNTOPT_GQUOTANOENF); | 590 | if (mp->m_qflags & XFS_OQUOTA_ENFD) |
591 | seq_puts(m, "," MNTOPT_GRPQUOTA); | ||
592 | else | ||
593 | seq_puts(m, "," MNTOPT_GQUOTANOENF); | ||
594 | } | ||
591 | 595 | ||
592 | if (!(mp->m_qflags & XFS_ALL_QUOTA_ACCT)) | 596 | if (!(mp->m_qflags & XFS_ALL_QUOTA_ACCT)) |
593 | seq_puts(m, "," MNTOPT_NOQUOTA); | 597 | seq_puts(m, "," MNTOPT_NOQUOTA); |
@@ -687,7 +691,7 @@ xfs_barrier_test( | |||
687 | return error; | 691 | return error; |
688 | } | 692 | } |
689 | 693 | ||
690 | void | 694 | STATIC void |
691 | xfs_mountfs_check_barriers(xfs_mount_t *mp) | 695 | xfs_mountfs_check_barriers(xfs_mount_t *mp) |
692 | { | 696 | { |
693 | int error; | 697 | int error; |
@@ -1532,7 +1536,7 @@ xfs_fs_get_sb( | |||
1532 | mnt); | 1536 | mnt); |
1533 | } | 1537 | } |
1534 | 1538 | ||
1535 | static struct super_operations xfs_super_operations = { | 1539 | static const struct super_operations xfs_super_operations = { |
1536 | .alloc_inode = xfs_fs_alloc_inode, | 1540 | .alloc_inode = xfs_fs_alloc_inode, |
1537 | .destroy_inode = xfs_fs_destroy_inode, | 1541 | .destroy_inode = xfs_fs_destroy_inode, |
1538 | .write_inode = xfs_fs_write_inode, | 1542 | .write_inode = xfs_fs_write_inode, |
diff --git a/fs/xfs/linux-2.6/xfs_super.h b/fs/xfs/linux-2.6/xfs_super.h index 5a2ea3a21781..18175ebd58ed 100644 --- a/fs/xfs/linux-2.6/xfs_super.h +++ b/fs/xfs/linux-2.6/xfs_super.h | |||
@@ -93,7 +93,7 @@ extern void xfs_blkdev_issue_flush(struct xfs_buftarg *); | |||
93 | 93 | ||
94 | extern const struct export_operations xfs_export_operations; | 94 | extern const struct export_operations xfs_export_operations; |
95 | extern struct xattr_handler *xfs_xattr_handlers[]; | 95 | extern struct xattr_handler *xfs_xattr_handlers[]; |
96 | extern struct quotactl_ops xfs_quotactl_operations; | 96 | extern const struct quotactl_ops xfs_quotactl_operations; |
97 | 97 | ||
98 | #define XFS_M(sb) ((struct xfs_mount *)((sb)->s_fs_info)) | 98 | #define XFS_M(sb) ((struct xfs_mount *)((sb)->s_fs_info)) |
99 | 99 | ||
diff --git a/fs/xfs/linux-2.6/xfs_sync.c b/fs/xfs/linux-2.6/xfs_sync.c index 98ef624d9baf..320be6aea492 100644 --- a/fs/xfs/linux-2.6/xfs_sync.c +++ b/fs/xfs/linux-2.6/xfs_sync.c | |||
@@ -749,21 +749,6 @@ __xfs_inode_clear_reclaim_tag( | |||
749 | XFS_INO_TO_AGINO(mp, ip->i_ino), XFS_ICI_RECLAIM_TAG); | 749 | XFS_INO_TO_AGINO(mp, ip->i_ino), XFS_ICI_RECLAIM_TAG); |
750 | } | 750 | } |
751 | 751 | ||
752 | void | ||
753 | xfs_inode_clear_reclaim_tag( | ||
754 | xfs_inode_t *ip) | ||
755 | { | ||
756 | xfs_mount_t *mp = ip->i_mount; | ||
757 | xfs_perag_t *pag = xfs_get_perag(mp, ip->i_ino); | ||
758 | |||
759 | read_lock(&pag->pag_ici_lock); | ||
760 | spin_lock(&ip->i_flags_lock); | ||
761 | __xfs_inode_clear_reclaim_tag(mp, pag, ip); | ||
762 | spin_unlock(&ip->i_flags_lock); | ||
763 | read_unlock(&pag->pag_ici_lock); | ||
764 | xfs_put_perag(mp, pag); | ||
765 | } | ||
766 | |||
767 | STATIC int | 752 | STATIC int |
768 | xfs_reclaim_inode_now( | 753 | xfs_reclaim_inode_now( |
769 | struct xfs_inode *ip, | 754 | struct xfs_inode *ip, |
diff --git a/fs/xfs/linux-2.6/xfs_sync.h b/fs/xfs/linux-2.6/xfs_sync.h index 59120602588a..27920eb7a820 100644 --- a/fs/xfs/linux-2.6/xfs_sync.h +++ b/fs/xfs/linux-2.6/xfs_sync.h | |||
@@ -49,7 +49,6 @@ int xfs_reclaim_inodes(struct xfs_mount *mp, int mode); | |||
49 | 49 | ||
50 | void xfs_inode_set_reclaim_tag(struct xfs_inode *ip); | 50 | void xfs_inode_set_reclaim_tag(struct xfs_inode *ip); |
51 | void __xfs_inode_set_reclaim_tag(struct xfs_perag *pag, struct xfs_inode *ip); | 51 | void __xfs_inode_set_reclaim_tag(struct xfs_perag *pag, struct xfs_inode *ip); |
52 | void xfs_inode_clear_reclaim_tag(struct xfs_inode *ip); | ||
53 | void __xfs_inode_clear_reclaim_tag(struct xfs_mount *mp, struct xfs_perag *pag, | 52 | void __xfs_inode_clear_reclaim_tag(struct xfs_mount *mp, struct xfs_perag *pag, |
54 | struct xfs_inode *ip); | 53 | struct xfs_inode *ip); |
55 | 54 | ||
diff --git a/fs/xfs/linux-2.6/xfs_sysctl.c b/fs/xfs/linux-2.6/xfs_sysctl.c index 916c0ffb6083..c5bc67c4e3bb 100644 --- a/fs/xfs/linux-2.6/xfs_sysctl.c +++ b/fs/xfs/linux-2.6/xfs_sysctl.c | |||
@@ -26,7 +26,6 @@ STATIC int | |||
26 | xfs_stats_clear_proc_handler( | 26 | xfs_stats_clear_proc_handler( |
27 | ctl_table *ctl, | 27 | ctl_table *ctl, |
28 | int write, | 28 | int write, |
29 | struct file *filp, | ||
30 | void __user *buffer, | 29 | void __user *buffer, |
31 | size_t *lenp, | 30 | size_t *lenp, |
32 | loff_t *ppos) | 31 | loff_t *ppos) |
@@ -34,7 +33,7 @@ xfs_stats_clear_proc_handler( | |||
34 | int c, ret, *valp = ctl->data; | 33 | int c, ret, *valp = ctl->data; |
35 | __uint32_t vn_active; | 34 | __uint32_t vn_active; |
36 | 35 | ||
37 | ret = proc_dointvec_minmax(ctl, write, filp, buffer, lenp, ppos); | 36 | ret = proc_dointvec_minmax(ctl, write, buffer, lenp, ppos); |
38 | 37 | ||
39 | if (!ret && write && *valp) { | 38 | if (!ret && write && *valp) { |
40 | printk("XFS Clearing xfsstats\n"); | 39 | printk("XFS Clearing xfsstats\n"); |
diff --git a/fs/xfs/quota/xfs_qm_stats.c b/fs/xfs/quota/xfs_qm_stats.c index 21b08c0396a1..83e7ea3e25fa 100644 --- a/fs/xfs/quota/xfs_qm_stats.c +++ b/fs/xfs/quota/xfs_qm_stats.c | |||
@@ -48,50 +48,34 @@ | |||
48 | 48 | ||
49 | struct xqmstats xqmstats; | 49 | struct xqmstats xqmstats; |
50 | 50 | ||
51 | STATIC int | 51 | static int xqm_proc_show(struct seq_file *m, void *v) |
52 | xfs_qm_read_xfsquota( | ||
53 | char *buffer, | ||
54 | char **start, | ||
55 | off_t offset, | ||
56 | int count, | ||
57 | int *eof, | ||
58 | void *data) | ||
59 | { | 52 | { |
60 | int len; | ||
61 | |||
62 | /* maximum; incore; ratio free to inuse; freelist */ | 53 | /* maximum; incore; ratio free to inuse; freelist */ |
63 | len = sprintf(buffer, "%d\t%d\t%d\t%u\n", | 54 | seq_printf(m, "%d\t%d\t%d\t%u\n", |
64 | ndquot, | 55 | ndquot, |
65 | xfs_Gqm? atomic_read(&xfs_Gqm->qm_totaldquots) : 0, | 56 | xfs_Gqm? atomic_read(&xfs_Gqm->qm_totaldquots) : 0, |
66 | xfs_Gqm? xfs_Gqm->qm_dqfree_ratio : 0, | 57 | xfs_Gqm? xfs_Gqm->qm_dqfree_ratio : 0, |
67 | xfs_Gqm? xfs_Gqm->qm_dqfreelist.qh_nelems : 0); | 58 | xfs_Gqm? xfs_Gqm->qm_dqfreelist.qh_nelems : 0); |
68 | 59 | return 0; | |
69 | if (offset >= len) { | ||
70 | *start = buffer; | ||
71 | *eof = 1; | ||
72 | return 0; | ||
73 | } | ||
74 | *start = buffer + offset; | ||
75 | if ((len -= offset) > count) | ||
76 | return count; | ||
77 | *eof = 1; | ||
78 | |||
79 | return len; | ||
80 | } | 60 | } |
81 | 61 | ||
82 | STATIC int | 62 | static int xqm_proc_open(struct inode *inode, struct file *file) |
83 | xfs_qm_read_stats( | ||
84 | char *buffer, | ||
85 | char **start, | ||
86 | off_t offset, | ||
87 | int count, | ||
88 | int *eof, | ||
89 | void *data) | ||
90 | { | 63 | { |
91 | int len; | 64 | return single_open(file, xqm_proc_show, NULL); |
65 | } | ||
66 | |||
67 | static const struct file_operations xqm_proc_fops = { | ||
68 | .owner = THIS_MODULE, | ||
69 | .open = xqm_proc_open, | ||
70 | .read = seq_read, | ||
71 | .llseek = seq_lseek, | ||
72 | .release = single_release, | ||
73 | }; | ||
92 | 74 | ||
75 | static int xqmstat_proc_show(struct seq_file *m, void *v) | ||
76 | { | ||
93 | /* quota performance statistics */ | 77 | /* quota performance statistics */ |
94 | len = sprintf(buffer, "qm %u %u %u %u %u %u %u %u\n", | 78 | seq_printf(m, "qm %u %u %u %u %u %u %u %u\n", |
95 | xqmstats.xs_qm_dqreclaims, | 79 | xqmstats.xs_qm_dqreclaims, |
96 | xqmstats.xs_qm_dqreclaim_misses, | 80 | xqmstats.xs_qm_dqreclaim_misses, |
97 | xqmstats.xs_qm_dquot_dups, | 81 | xqmstats.xs_qm_dquot_dups, |
@@ -100,25 +84,27 @@ xfs_qm_read_stats( | |||
100 | xqmstats.xs_qm_dqwants, | 84 | xqmstats.xs_qm_dqwants, |
101 | xqmstats.xs_qm_dqshake_reclaims, | 85 | xqmstats.xs_qm_dqshake_reclaims, |
102 | xqmstats.xs_qm_dqinact_reclaims); | 86 | xqmstats.xs_qm_dqinact_reclaims); |
87 | return 0; | ||
88 | } | ||
103 | 89 | ||
104 | if (offset >= len) { | 90 | static int xqmstat_proc_open(struct inode *inode, struct file *file) |
105 | *start = buffer; | 91 | { |
106 | *eof = 1; | 92 | return single_open(file, xqmstat_proc_show, NULL); |
107 | return 0; | ||
108 | } | ||
109 | *start = buffer + offset; | ||
110 | if ((len -= offset) > count) | ||
111 | return count; | ||
112 | *eof = 1; | ||
113 | |||
114 | return len; | ||
115 | } | 93 | } |
116 | 94 | ||
95 | static const struct file_operations xqmstat_proc_fops = { | ||
96 | .owner = THIS_MODULE, | ||
97 | .open = xqmstat_proc_open, | ||
98 | .read = seq_read, | ||
99 | .llseek = seq_lseek, | ||
100 | .release = single_release, | ||
101 | }; | ||
102 | |||
117 | void | 103 | void |
118 | xfs_qm_init_procfs(void) | 104 | xfs_qm_init_procfs(void) |
119 | { | 105 | { |
120 | create_proc_read_entry("fs/xfs/xqmstat", 0, NULL, xfs_qm_read_stats, NULL); | 106 | proc_create("fs/xfs/xqmstat", 0, NULL, &xqmstat_proc_fops); |
121 | create_proc_read_entry("fs/xfs/xqm", 0, NULL, xfs_qm_read_xfsquota, NULL); | 107 | proc_create("fs/xfs/xqm", 0, NULL, &xqm_proc_fops); |
122 | } | 108 | } |
123 | 109 | ||
124 | void | 110 | void |
diff --git a/fs/xfs/xfs_ag.h b/fs/xfs/xfs_ag.h index f24b50b68d03..a5d54bf4931b 100644 --- a/fs/xfs/xfs_ag.h +++ b/fs/xfs/xfs_ag.h | |||
@@ -198,6 +198,15 @@ typedef struct xfs_perag | |||
198 | xfs_agino_t pagi_count; /* number of allocated inodes */ | 198 | xfs_agino_t pagi_count; /* number of allocated inodes */ |
199 | int pagb_count; /* pagb slots in use */ | 199 | int pagb_count; /* pagb slots in use */ |
200 | xfs_perag_busy_t *pagb_list; /* unstable blocks */ | 200 | xfs_perag_busy_t *pagb_list; /* unstable blocks */ |
201 | |||
202 | /* | ||
203 | * Inode allocation search lookup optimisation. | ||
204 | * If the pagino matches, the search for new inodes | ||
205 | * doesn't need to search the near ones again straight away | ||
206 | */ | ||
207 | xfs_agino_t pagl_pagino; | ||
208 | xfs_agino_t pagl_leftrec; | ||
209 | xfs_agino_t pagl_rightrec; | ||
201 | #ifdef __KERNEL__ | 210 | #ifdef __KERNEL__ |
202 | spinlock_t pagb_lock; /* lock for pagb_list */ | 211 | spinlock_t pagb_lock; /* lock for pagb_list */ |
203 | 212 | ||
diff --git a/fs/xfs/xfs_bmap.c b/fs/xfs/xfs_bmap.c index 8ee5b5a76a2a..8971fb09d387 100644 --- a/fs/xfs/xfs_bmap.c +++ b/fs/xfs/xfs_bmap.c | |||
@@ -3713,7 +3713,7 @@ done: | |||
3713 | * entry (null if none). Else, *lastxp will be set to the index | 3713 | * entry (null if none). Else, *lastxp will be set to the index |
3714 | * of the found entry; *gotp will contain the entry. | 3714 | * of the found entry; *gotp will contain the entry. |
3715 | */ | 3715 | */ |
3716 | xfs_bmbt_rec_host_t * /* pointer to found extent entry */ | 3716 | STATIC xfs_bmbt_rec_host_t * /* pointer to found extent entry */ |
3717 | xfs_bmap_search_multi_extents( | 3717 | xfs_bmap_search_multi_extents( |
3718 | xfs_ifork_t *ifp, /* inode fork pointer */ | 3718 | xfs_ifork_t *ifp, /* inode fork pointer */ |
3719 | xfs_fileoff_t bno, /* block number searched for */ | 3719 | xfs_fileoff_t bno, /* block number searched for */ |
diff --git a/fs/xfs/xfs_bmap.h b/fs/xfs/xfs_bmap.h index 1b8ff9256bd0..56f62d2edc35 100644 --- a/fs/xfs/xfs_bmap.h +++ b/fs/xfs/xfs_bmap.h | |||
@@ -392,17 +392,6 @@ xfs_bmap_count_blocks( | |||
392 | int whichfork, | 392 | int whichfork, |
393 | int *count); | 393 | int *count); |
394 | 394 | ||
395 | /* | ||
396 | * Search the extent records for the entry containing block bno. | ||
397 | * If bno lies in a hole, point to the next entry. If bno lies | ||
398 | * past eof, *eofp will be set, and *prevp will contain the last | ||
399 | * entry (null if none). Else, *lastxp will be set to the index | ||
400 | * of the found entry; *gotp will contain the entry. | ||
401 | */ | ||
402 | xfs_bmbt_rec_host_t * | ||
403 | xfs_bmap_search_multi_extents(struct xfs_ifork *, xfs_fileoff_t, int *, | ||
404 | xfs_extnum_t *, xfs_bmbt_irec_t *, xfs_bmbt_irec_t *); | ||
405 | |||
406 | #endif /* __KERNEL__ */ | 395 | #endif /* __KERNEL__ */ |
407 | 396 | ||
408 | #endif /* __XFS_BMAP_H__ */ | 397 | #endif /* __XFS_BMAP_H__ */ |
diff --git a/fs/xfs/xfs_bmap_btree.c b/fs/xfs/xfs_bmap_btree.c index 5c1ade06578e..eb7b702d0690 100644 --- a/fs/xfs/xfs_bmap_btree.c +++ b/fs/xfs/xfs_bmap_btree.c | |||
@@ -202,16 +202,6 @@ xfs_bmbt_get_state( | |||
202 | ext_flag); | 202 | ext_flag); |
203 | } | 203 | } |
204 | 204 | ||
205 | /* Endian flipping versions of the bmbt extraction functions */ | ||
206 | void | ||
207 | xfs_bmbt_disk_get_all( | ||
208 | xfs_bmbt_rec_t *r, | ||
209 | xfs_bmbt_irec_t *s) | ||
210 | { | ||
211 | __xfs_bmbt_get_all(get_unaligned_be64(&r->l0), | ||
212 | get_unaligned_be64(&r->l1), s); | ||
213 | } | ||
214 | |||
215 | /* | 205 | /* |
216 | * Extract the blockcount field from an on disk bmap extent record. | 206 | * Extract the blockcount field from an on disk bmap extent record. |
217 | */ | 207 | */ |
@@ -816,6 +806,16 @@ xfs_bmbt_trace_key( | |||
816 | *l1 = 0; | 806 | *l1 = 0; |
817 | } | 807 | } |
818 | 808 | ||
809 | /* Endian flipping versions of the bmbt extraction functions */ | ||
810 | STATIC void | ||
811 | xfs_bmbt_disk_get_all( | ||
812 | xfs_bmbt_rec_t *r, | ||
813 | xfs_bmbt_irec_t *s) | ||
814 | { | ||
815 | __xfs_bmbt_get_all(get_unaligned_be64(&r->l0), | ||
816 | get_unaligned_be64(&r->l1), s); | ||
817 | } | ||
818 | |||
819 | STATIC void | 819 | STATIC void |
820 | xfs_bmbt_trace_record( | 820 | xfs_bmbt_trace_record( |
821 | struct xfs_btree_cur *cur, | 821 | struct xfs_btree_cur *cur, |
diff --git a/fs/xfs/xfs_bmap_btree.h b/fs/xfs/xfs_bmap_btree.h index 0e8df007615e..5549d495947f 100644 --- a/fs/xfs/xfs_bmap_btree.h +++ b/fs/xfs/xfs_bmap_btree.h | |||
@@ -220,7 +220,6 @@ extern xfs_fsblock_t xfs_bmbt_get_startblock(xfs_bmbt_rec_host_t *r); | |||
220 | extern xfs_fileoff_t xfs_bmbt_get_startoff(xfs_bmbt_rec_host_t *r); | 220 | extern xfs_fileoff_t xfs_bmbt_get_startoff(xfs_bmbt_rec_host_t *r); |
221 | extern xfs_exntst_t xfs_bmbt_get_state(xfs_bmbt_rec_host_t *r); | 221 | extern xfs_exntst_t xfs_bmbt_get_state(xfs_bmbt_rec_host_t *r); |
222 | 222 | ||
223 | extern void xfs_bmbt_disk_get_all(xfs_bmbt_rec_t *r, xfs_bmbt_irec_t *s); | ||
224 | extern xfs_filblks_t xfs_bmbt_disk_get_blockcount(xfs_bmbt_rec_t *r); | 223 | extern xfs_filblks_t xfs_bmbt_disk_get_blockcount(xfs_bmbt_rec_t *r); |
225 | extern xfs_fileoff_t xfs_bmbt_disk_get_startoff(xfs_bmbt_rec_t *r); | 224 | extern xfs_fileoff_t xfs_bmbt_disk_get_startoff(xfs_bmbt_rec_t *r); |
226 | 225 | ||
diff --git a/fs/xfs/xfs_btree.c b/fs/xfs/xfs_btree.c index 26717388acf5..52b5f14d0c32 100644 --- a/fs/xfs/xfs_btree.c +++ b/fs/xfs/xfs_btree.c | |||
@@ -646,46 +646,6 @@ xfs_btree_read_bufl( | |||
646 | } | 646 | } |
647 | 647 | ||
648 | /* | 648 | /* |
649 | * Get a buffer for the block, return it read in. | ||
650 | * Short-form addressing. | ||
651 | */ | ||
652 | int /* error */ | ||
653 | xfs_btree_read_bufs( | ||
654 | xfs_mount_t *mp, /* file system mount point */ | ||
655 | xfs_trans_t *tp, /* transaction pointer */ | ||
656 | xfs_agnumber_t agno, /* allocation group number */ | ||
657 | xfs_agblock_t agbno, /* allocation group block number */ | ||
658 | uint lock, /* lock flags for read_buf */ | ||
659 | xfs_buf_t **bpp, /* buffer for agno/agbno */ | ||
660 | int refval) /* ref count value for buffer */ | ||
661 | { | ||
662 | xfs_buf_t *bp; /* return value */ | ||
663 | xfs_daddr_t d; /* real disk block address */ | ||
664 | int error; | ||
665 | |||
666 | ASSERT(agno != NULLAGNUMBER); | ||
667 | ASSERT(agbno != NULLAGBLOCK); | ||
668 | d = XFS_AGB_TO_DADDR(mp, agno, agbno); | ||
669 | if ((error = xfs_trans_read_buf(mp, tp, mp->m_ddev_targp, d, | ||
670 | mp->m_bsize, lock, &bp))) { | ||
671 | return error; | ||
672 | } | ||
673 | ASSERT(!bp || !XFS_BUF_GETERROR(bp)); | ||
674 | if (bp != NULL) { | ||
675 | switch (refval) { | ||
676 | case XFS_ALLOC_BTREE_REF: | ||
677 | XFS_BUF_SET_VTYPE_REF(bp, B_FS_MAP, refval); | ||
678 | break; | ||
679 | case XFS_INO_BTREE_REF: | ||
680 | XFS_BUF_SET_VTYPE_REF(bp, B_FS_INOMAP, refval); | ||
681 | break; | ||
682 | } | ||
683 | } | ||
684 | *bpp = bp; | ||
685 | return 0; | ||
686 | } | ||
687 | |||
688 | /* | ||
689 | * Read-ahead the block, don't wait for it, don't return a buffer. | 649 | * Read-ahead the block, don't wait for it, don't return a buffer. |
690 | * Long-form addressing. | 650 | * Long-form addressing. |
691 | */ | 651 | */ |
@@ -2951,7 +2911,7 @@ error0: | |||
2951 | * inode we have to copy the single block it was pointing to into the | 2911 | * inode we have to copy the single block it was pointing to into the |
2952 | * inode. | 2912 | * inode. |
2953 | */ | 2913 | */ |
2954 | int | 2914 | STATIC int |
2955 | xfs_btree_kill_iroot( | 2915 | xfs_btree_kill_iroot( |
2956 | struct xfs_btree_cur *cur) | 2916 | struct xfs_btree_cur *cur) |
2957 | { | 2917 | { |
diff --git a/fs/xfs/xfs_btree.h b/fs/xfs/xfs_btree.h index 4f852b735b96..7fa07062bdda 100644 --- a/fs/xfs/xfs_btree.h +++ b/fs/xfs/xfs_btree.h | |||
@@ -379,20 +379,6 @@ xfs_btree_read_bufl( | |||
379 | int refval);/* ref count value for buffer */ | 379 | int refval);/* ref count value for buffer */ |
380 | 380 | ||
381 | /* | 381 | /* |
382 | * Get a buffer for the block, return it read in. | ||
383 | * Short-form addressing. | ||
384 | */ | ||
385 | int /* error */ | ||
386 | xfs_btree_read_bufs( | ||
387 | struct xfs_mount *mp, /* file system mount point */ | ||
388 | struct xfs_trans *tp, /* transaction pointer */ | ||
389 | xfs_agnumber_t agno, /* allocation group number */ | ||
390 | xfs_agblock_t agbno, /* allocation group block number */ | ||
391 | uint lock, /* lock flags for read_buf */ | ||
392 | struct xfs_buf **bpp, /* buffer for agno/agbno */ | ||
393 | int refval);/* ref count value for buffer */ | ||
394 | |||
395 | /* | ||
396 | * Read-ahead the block, don't wait for it, don't return a buffer. | 382 | * Read-ahead the block, don't wait for it, don't return a buffer. |
397 | * Long-form addressing. | 383 | * Long-form addressing. |
398 | */ | 384 | */ |
@@ -432,7 +418,6 @@ int xfs_btree_decrement(struct xfs_btree_cur *, int, int *); | |||
432 | int xfs_btree_lookup(struct xfs_btree_cur *, xfs_lookup_t, int *); | 418 | int xfs_btree_lookup(struct xfs_btree_cur *, xfs_lookup_t, int *); |
433 | int xfs_btree_update(struct xfs_btree_cur *, union xfs_btree_rec *); | 419 | int xfs_btree_update(struct xfs_btree_cur *, union xfs_btree_rec *); |
434 | int xfs_btree_new_iroot(struct xfs_btree_cur *, int *, int *); | 420 | int xfs_btree_new_iroot(struct xfs_btree_cur *, int *, int *); |
435 | int xfs_btree_kill_iroot(struct xfs_btree_cur *); | ||
436 | int xfs_btree_insert(struct xfs_btree_cur *, int *); | 421 | int xfs_btree_insert(struct xfs_btree_cur *, int *); |
437 | int xfs_btree_delete(struct xfs_btree_cur *, int *); | 422 | int xfs_btree_delete(struct xfs_btree_cur *, int *); |
438 | int xfs_btree_get_rec(struct xfs_btree_cur *, union xfs_btree_rec **, int *); | 423 | int xfs_btree_get_rec(struct xfs_btree_cur *, union xfs_btree_rec **, int *); |
diff --git a/fs/xfs/xfs_fs.h b/fs/xfs/xfs_fs.h index c4ea51b55dce..f52ac276277e 100644 --- a/fs/xfs/xfs_fs.h +++ b/fs/xfs/xfs_fs.h | |||
@@ -117,7 +117,7 @@ struct getbmapx { | |||
117 | #define BMV_IF_VALID \ | 117 | #define BMV_IF_VALID \ |
118 | (BMV_IF_ATTRFORK|BMV_IF_NO_DMAPI_READ|BMV_IF_PREALLOC|BMV_IF_DELALLOC) | 118 | (BMV_IF_ATTRFORK|BMV_IF_NO_DMAPI_READ|BMV_IF_PREALLOC|BMV_IF_DELALLOC) |
119 | 119 | ||
120 | /* bmv_oflags values - returned for for each non-header segment */ | 120 | /* bmv_oflags values - returned for each non-header segment */ |
121 | #define BMV_OF_PREALLOC 0x1 /* segment = unwritten pre-allocation */ | 121 | #define BMV_OF_PREALLOC 0x1 /* segment = unwritten pre-allocation */ |
122 | #define BMV_OF_DELALLOC 0x2 /* segment = delayed allocation */ | 122 | #define BMV_OF_DELALLOC 0x2 /* segment = delayed allocation */ |
123 | #define BMV_OF_LAST 0x4 /* segment is the last in the file */ | 123 | #define BMV_OF_LAST 0x4 /* segment is the last in the file */ |
diff --git a/fs/xfs/xfs_ialloc.c b/fs/xfs/xfs_ialloc.c index 3120a3a5e20f..ab64f3efb43b 100644 --- a/fs/xfs/xfs_ialloc.c +++ b/fs/xfs/xfs_ialloc.c | |||
@@ -57,75 +57,35 @@ xfs_ialloc_cluster_alignment( | |||
57 | } | 57 | } |
58 | 58 | ||
59 | /* | 59 | /* |
60 | * Lookup the record equal to ino in the btree given by cur. | 60 | * Lookup a record by ino in the btree given by cur. |
61 | */ | ||
62 | STATIC int /* error */ | ||
63 | xfs_inobt_lookup_eq( | ||
64 | struct xfs_btree_cur *cur, /* btree cursor */ | ||
65 | xfs_agino_t ino, /* starting inode of chunk */ | ||
66 | __int32_t fcnt, /* free inode count */ | ||
67 | xfs_inofree_t free, /* free inode mask */ | ||
68 | int *stat) /* success/failure */ | ||
69 | { | ||
70 | cur->bc_rec.i.ir_startino = ino; | ||
71 | cur->bc_rec.i.ir_freecount = fcnt; | ||
72 | cur->bc_rec.i.ir_free = free; | ||
73 | return xfs_btree_lookup(cur, XFS_LOOKUP_EQ, stat); | ||
74 | } | ||
75 | |||
76 | /* | ||
77 | * Lookup the first record greater than or equal to ino | ||
78 | * in the btree given by cur. | ||
79 | */ | 61 | */ |
80 | int /* error */ | 62 | int /* error */ |
81 | xfs_inobt_lookup_ge( | 63 | xfs_inobt_lookup( |
82 | struct xfs_btree_cur *cur, /* btree cursor */ | 64 | struct xfs_btree_cur *cur, /* btree cursor */ |
83 | xfs_agino_t ino, /* starting inode of chunk */ | 65 | xfs_agino_t ino, /* starting inode of chunk */ |
84 | __int32_t fcnt, /* free inode count */ | 66 | xfs_lookup_t dir, /* <=, >=, == */ |
85 | xfs_inofree_t free, /* free inode mask */ | ||
86 | int *stat) /* success/failure */ | 67 | int *stat) /* success/failure */ |
87 | { | 68 | { |
88 | cur->bc_rec.i.ir_startino = ino; | 69 | cur->bc_rec.i.ir_startino = ino; |
89 | cur->bc_rec.i.ir_freecount = fcnt; | 70 | cur->bc_rec.i.ir_freecount = 0; |
90 | cur->bc_rec.i.ir_free = free; | 71 | cur->bc_rec.i.ir_free = 0; |
91 | return xfs_btree_lookup(cur, XFS_LOOKUP_GE, stat); | 72 | return xfs_btree_lookup(cur, dir, stat); |
92 | } | 73 | } |
93 | 74 | ||
94 | /* | 75 | /* |
95 | * Lookup the first record less than or equal to ino | 76 | * Update the record referred to by cur to the value given. |
96 | * in the btree given by cur. | ||
97 | */ | ||
98 | int /* error */ | ||
99 | xfs_inobt_lookup_le( | ||
100 | struct xfs_btree_cur *cur, /* btree cursor */ | ||
101 | xfs_agino_t ino, /* starting inode of chunk */ | ||
102 | __int32_t fcnt, /* free inode count */ | ||
103 | xfs_inofree_t free, /* free inode mask */ | ||
104 | int *stat) /* success/failure */ | ||
105 | { | ||
106 | cur->bc_rec.i.ir_startino = ino; | ||
107 | cur->bc_rec.i.ir_freecount = fcnt; | ||
108 | cur->bc_rec.i.ir_free = free; | ||
109 | return xfs_btree_lookup(cur, XFS_LOOKUP_LE, stat); | ||
110 | } | ||
111 | |||
112 | /* | ||
113 | * Update the record referred to by cur to the value given | ||
114 | * by [ino, fcnt, free]. | ||
115 | * This either works (return 0) or gets an EFSCORRUPTED error. | 77 | * This either works (return 0) or gets an EFSCORRUPTED error. |
116 | */ | 78 | */ |
117 | STATIC int /* error */ | 79 | STATIC int /* error */ |
118 | xfs_inobt_update( | 80 | xfs_inobt_update( |
119 | struct xfs_btree_cur *cur, /* btree cursor */ | 81 | struct xfs_btree_cur *cur, /* btree cursor */ |
120 | xfs_agino_t ino, /* starting inode of chunk */ | 82 | xfs_inobt_rec_incore_t *irec) /* btree record */ |
121 | __int32_t fcnt, /* free inode count */ | ||
122 | xfs_inofree_t free) /* free inode mask */ | ||
123 | { | 83 | { |
124 | union xfs_btree_rec rec; | 84 | union xfs_btree_rec rec; |
125 | 85 | ||
126 | rec.inobt.ir_startino = cpu_to_be32(ino); | 86 | rec.inobt.ir_startino = cpu_to_be32(irec->ir_startino); |
127 | rec.inobt.ir_freecount = cpu_to_be32(fcnt); | 87 | rec.inobt.ir_freecount = cpu_to_be32(irec->ir_freecount); |
128 | rec.inobt.ir_free = cpu_to_be64(free); | 88 | rec.inobt.ir_free = cpu_to_be64(irec->ir_free); |
129 | return xfs_btree_update(cur, &rec); | 89 | return xfs_btree_update(cur, &rec); |
130 | } | 90 | } |
131 | 91 | ||
@@ -135,9 +95,7 @@ xfs_inobt_update( | |||
135 | int /* error */ | 95 | int /* error */ |
136 | xfs_inobt_get_rec( | 96 | xfs_inobt_get_rec( |
137 | struct xfs_btree_cur *cur, /* btree cursor */ | 97 | struct xfs_btree_cur *cur, /* btree cursor */ |
138 | xfs_agino_t *ino, /* output: starting inode of chunk */ | 98 | xfs_inobt_rec_incore_t *irec, /* btree record */ |
139 | __int32_t *fcnt, /* output: number of free inodes */ | ||
140 | xfs_inofree_t *free, /* output: free inode mask */ | ||
141 | int *stat) /* output: success/failure */ | 99 | int *stat) /* output: success/failure */ |
142 | { | 100 | { |
143 | union xfs_btree_rec *rec; | 101 | union xfs_btree_rec *rec; |
@@ -145,14 +103,136 @@ xfs_inobt_get_rec( | |||
145 | 103 | ||
146 | error = xfs_btree_get_rec(cur, &rec, stat); | 104 | error = xfs_btree_get_rec(cur, &rec, stat); |
147 | if (!error && *stat == 1) { | 105 | if (!error && *stat == 1) { |
148 | *ino = be32_to_cpu(rec->inobt.ir_startino); | 106 | irec->ir_startino = be32_to_cpu(rec->inobt.ir_startino); |
149 | *fcnt = be32_to_cpu(rec->inobt.ir_freecount); | 107 | irec->ir_freecount = be32_to_cpu(rec->inobt.ir_freecount); |
150 | *free = be64_to_cpu(rec->inobt.ir_free); | 108 | irec->ir_free = be64_to_cpu(rec->inobt.ir_free); |
151 | } | 109 | } |
152 | return error; | 110 | return error; |
153 | } | 111 | } |
154 | 112 | ||
155 | /* | 113 | /* |
114 | * Verify that the number of free inodes in the AGI is correct. | ||
115 | */ | ||
116 | #ifdef DEBUG | ||
117 | STATIC int | ||
118 | xfs_check_agi_freecount( | ||
119 | struct xfs_btree_cur *cur, | ||
120 | struct xfs_agi *agi) | ||
121 | { | ||
122 | if (cur->bc_nlevels == 1) { | ||
123 | xfs_inobt_rec_incore_t rec; | ||
124 | int freecount = 0; | ||
125 | int error; | ||
126 | int i; | ||
127 | |||
128 | error = xfs_inobt_lookup(cur, 0, XFS_LOOKUP_GE, &i); | ||
129 | if (error) | ||
130 | return error; | ||
131 | |||
132 | do { | ||
133 | error = xfs_inobt_get_rec(cur, &rec, &i); | ||
134 | if (error) | ||
135 | return error; | ||
136 | |||
137 | if (i) { | ||
138 | freecount += rec.ir_freecount; | ||
139 | error = xfs_btree_increment(cur, 0, &i); | ||
140 | if (error) | ||
141 | return error; | ||
142 | } | ||
143 | } while (i == 1); | ||
144 | |||
145 | if (!XFS_FORCED_SHUTDOWN(cur->bc_mp)) | ||
146 | ASSERT(freecount == be32_to_cpu(agi->agi_freecount)); | ||
147 | } | ||
148 | return 0; | ||
149 | } | ||
150 | #else | ||
151 | #define xfs_check_agi_freecount(cur, agi) 0 | ||
152 | #endif | ||
153 | |||
154 | /* | ||
155 | * Initialise a new set of inodes. | ||
156 | */ | ||
157 | STATIC void | ||
158 | xfs_ialloc_inode_init( | ||
159 | struct xfs_mount *mp, | ||
160 | struct xfs_trans *tp, | ||
161 | xfs_agnumber_t agno, | ||
162 | xfs_agblock_t agbno, | ||
163 | xfs_agblock_t length, | ||
164 | unsigned int gen) | ||
165 | { | ||
166 | struct xfs_buf *fbuf; | ||
167 | struct xfs_dinode *free; | ||
168 | int blks_per_cluster, nbufs, ninodes; | ||
169 | int version; | ||
170 | int i, j; | ||
171 | xfs_daddr_t d; | ||
172 | |||
173 | /* | ||
174 | * Loop over the new block(s), filling in the inodes. | ||
175 | * For small block sizes, manipulate the inodes in buffers | ||
176 | * which are multiples of the blocks size. | ||
177 | */ | ||
178 | if (mp->m_sb.sb_blocksize >= XFS_INODE_CLUSTER_SIZE(mp)) { | ||
179 | blks_per_cluster = 1; | ||
180 | nbufs = length; | ||
181 | ninodes = mp->m_sb.sb_inopblock; | ||
182 | } else { | ||
183 | blks_per_cluster = XFS_INODE_CLUSTER_SIZE(mp) / | ||
184 | mp->m_sb.sb_blocksize; | ||
185 | nbufs = length / blks_per_cluster; | ||
186 | ninodes = blks_per_cluster * mp->m_sb.sb_inopblock; | ||
187 | } | ||
188 | |||
189 | /* | ||
190 | * Figure out what version number to use in the inodes we create. | ||
191 | * If the superblock version has caught up to the one that supports | ||
192 | * the new inode format, then use the new inode version. Otherwise | ||
193 | * use the old version so that old kernels will continue to be | ||
194 | * able to use the file system. | ||
195 | */ | ||
196 | if (xfs_sb_version_hasnlink(&mp->m_sb)) | ||
197 | version = 2; | ||
198 | else | ||
199 | version = 1; | ||
200 | |||
201 | for (j = 0; j < nbufs; j++) { | ||
202 | /* | ||
203 | * Get the block. | ||
204 | */ | ||
205 | d = XFS_AGB_TO_DADDR(mp, agno, agbno + (j * blks_per_cluster)); | ||
206 | fbuf = xfs_trans_get_buf(tp, mp->m_ddev_targp, d, | ||
207 | mp->m_bsize * blks_per_cluster, | ||
208 | XFS_BUF_LOCK); | ||
209 | ASSERT(fbuf); | ||
210 | ASSERT(!XFS_BUF_GETERROR(fbuf)); | ||
211 | |||
212 | /* | ||
213 | * Initialize all inodes in this buffer and then log them. | ||
214 | * | ||
215 | * XXX: It would be much better if we had just one transaction | ||
216 | * to log a whole cluster of inodes instead of all the | ||
217 | * individual transactions causing a lot of log traffic. | ||
218 | */ | ||
219 | xfs_biozero(fbuf, 0, ninodes << mp->m_sb.sb_inodelog); | ||
220 | for (i = 0; i < ninodes; i++) { | ||
221 | int ioffset = i << mp->m_sb.sb_inodelog; | ||
222 | uint isize = sizeof(struct xfs_dinode); | ||
223 | |||
224 | free = xfs_make_iptr(mp, fbuf, i); | ||
225 | free->di_magic = cpu_to_be16(XFS_DINODE_MAGIC); | ||
226 | free->di_version = version; | ||
227 | free->di_gen = cpu_to_be32(gen); | ||
228 | free->di_next_unlinked = cpu_to_be32(NULLAGINO); | ||
229 | xfs_trans_log_buf(tp, fbuf, ioffset, ioffset + isize - 1); | ||
230 | } | ||
231 | xfs_trans_inode_alloc_buf(tp, fbuf); | ||
232 | } | ||
233 | } | ||
234 | |||
235 | /* | ||
156 | * Allocate new inodes in the allocation group specified by agbp. | 236 | * Allocate new inodes in the allocation group specified by agbp. |
157 | * Return 0 for success, else error code. | 237 | * Return 0 for success, else error code. |
158 | */ | 238 | */ |
@@ -164,24 +244,15 @@ xfs_ialloc_ag_alloc( | |||
164 | { | 244 | { |
165 | xfs_agi_t *agi; /* allocation group header */ | 245 | xfs_agi_t *agi; /* allocation group header */ |
166 | xfs_alloc_arg_t args; /* allocation argument structure */ | 246 | xfs_alloc_arg_t args; /* allocation argument structure */ |
167 | int blks_per_cluster; /* fs blocks per inode cluster */ | ||
168 | xfs_btree_cur_t *cur; /* inode btree cursor */ | 247 | xfs_btree_cur_t *cur; /* inode btree cursor */ |
169 | xfs_daddr_t d; /* disk addr of buffer */ | ||
170 | xfs_agnumber_t agno; | 248 | xfs_agnumber_t agno; |
171 | int error; | 249 | int error; |
172 | xfs_buf_t *fbuf; /* new free inodes' buffer */ | 250 | int i; |
173 | xfs_dinode_t *free; /* new free inode structure */ | ||
174 | int i; /* inode counter */ | ||
175 | int j; /* block counter */ | ||
176 | int nbufs; /* num bufs of new inodes */ | ||
177 | xfs_agino_t newino; /* new first inode's number */ | 251 | xfs_agino_t newino; /* new first inode's number */ |
178 | xfs_agino_t newlen; /* new number of inodes */ | 252 | xfs_agino_t newlen; /* new number of inodes */ |
179 | int ninodes; /* num inodes per buf */ | ||
180 | xfs_agino_t thisino; /* current inode number, for loop */ | 253 | xfs_agino_t thisino; /* current inode number, for loop */ |
181 | int version; /* inode version number to use */ | ||
182 | int isaligned = 0; /* inode allocation at stripe unit */ | 254 | int isaligned = 0; /* inode allocation at stripe unit */ |
183 | /* boundary */ | 255 | /* boundary */ |
184 | unsigned int gen; | ||
185 | 256 | ||
186 | args.tp = tp; | 257 | args.tp = tp; |
187 | args.mp = tp->t_mountp; | 258 | args.mp = tp->t_mountp; |
@@ -202,12 +273,12 @@ xfs_ialloc_ag_alloc( | |||
202 | */ | 273 | */ |
203 | agi = XFS_BUF_TO_AGI(agbp); | 274 | agi = XFS_BUF_TO_AGI(agbp); |
204 | newino = be32_to_cpu(agi->agi_newino); | 275 | newino = be32_to_cpu(agi->agi_newino); |
276 | agno = be32_to_cpu(agi->agi_seqno); | ||
205 | args.agbno = XFS_AGINO_TO_AGBNO(args.mp, newino) + | 277 | args.agbno = XFS_AGINO_TO_AGBNO(args.mp, newino) + |
206 | XFS_IALLOC_BLOCKS(args.mp); | 278 | XFS_IALLOC_BLOCKS(args.mp); |
207 | if (likely(newino != NULLAGINO && | 279 | if (likely(newino != NULLAGINO && |
208 | (args.agbno < be32_to_cpu(agi->agi_length)))) { | 280 | (args.agbno < be32_to_cpu(agi->agi_length)))) { |
209 | args.fsbno = XFS_AGB_TO_FSB(args.mp, | 281 | args.fsbno = XFS_AGB_TO_FSB(args.mp, agno, args.agbno); |
210 | be32_to_cpu(agi->agi_seqno), args.agbno); | ||
211 | args.type = XFS_ALLOCTYPE_THIS_BNO; | 282 | args.type = XFS_ALLOCTYPE_THIS_BNO; |
212 | args.mod = args.total = args.wasdel = args.isfl = | 283 | args.mod = args.total = args.wasdel = args.isfl = |
213 | args.userdata = args.minalignslop = 0; | 284 | args.userdata = args.minalignslop = 0; |
@@ -258,8 +329,7 @@ xfs_ialloc_ag_alloc( | |||
258 | * For now, just allocate blocks up front. | 329 | * For now, just allocate blocks up front. |
259 | */ | 330 | */ |
260 | args.agbno = be32_to_cpu(agi->agi_root); | 331 | args.agbno = be32_to_cpu(agi->agi_root); |
261 | args.fsbno = XFS_AGB_TO_FSB(args.mp, | 332 | args.fsbno = XFS_AGB_TO_FSB(args.mp, agno, args.agbno); |
262 | be32_to_cpu(agi->agi_seqno), args.agbno); | ||
263 | /* | 333 | /* |
264 | * Allocate a fixed-size extent of inodes. | 334 | * Allocate a fixed-size extent of inodes. |
265 | */ | 335 | */ |
@@ -282,8 +352,7 @@ xfs_ialloc_ag_alloc( | |||
282 | if (isaligned && args.fsbno == NULLFSBLOCK) { | 352 | if (isaligned && args.fsbno == NULLFSBLOCK) { |
283 | args.type = XFS_ALLOCTYPE_NEAR_BNO; | 353 | args.type = XFS_ALLOCTYPE_NEAR_BNO; |
284 | args.agbno = be32_to_cpu(agi->agi_root); | 354 | args.agbno = be32_to_cpu(agi->agi_root); |
285 | args.fsbno = XFS_AGB_TO_FSB(args.mp, | 355 | args.fsbno = XFS_AGB_TO_FSB(args.mp, agno, args.agbno); |
286 | be32_to_cpu(agi->agi_seqno), args.agbno); | ||
287 | args.alignment = xfs_ialloc_cluster_alignment(&args); | 356 | args.alignment = xfs_ialloc_cluster_alignment(&args); |
288 | if ((error = xfs_alloc_vextent(&args))) | 357 | if ((error = xfs_alloc_vextent(&args))) |
289 | return error; | 358 | return error; |
@@ -294,85 +363,30 @@ xfs_ialloc_ag_alloc( | |||
294 | return 0; | 363 | return 0; |
295 | } | 364 | } |
296 | ASSERT(args.len == args.minlen); | 365 | ASSERT(args.len == args.minlen); |
297 | /* | ||
298 | * Convert the results. | ||
299 | */ | ||
300 | newino = XFS_OFFBNO_TO_AGINO(args.mp, args.agbno, 0); | ||
301 | /* | ||
302 | * Loop over the new block(s), filling in the inodes. | ||
303 | * For small block sizes, manipulate the inodes in buffers | ||
304 | * which are multiples of the blocks size. | ||
305 | */ | ||
306 | if (args.mp->m_sb.sb_blocksize >= XFS_INODE_CLUSTER_SIZE(args.mp)) { | ||
307 | blks_per_cluster = 1; | ||
308 | nbufs = (int)args.len; | ||
309 | ninodes = args.mp->m_sb.sb_inopblock; | ||
310 | } else { | ||
311 | blks_per_cluster = XFS_INODE_CLUSTER_SIZE(args.mp) / | ||
312 | args.mp->m_sb.sb_blocksize; | ||
313 | nbufs = (int)args.len / blks_per_cluster; | ||
314 | ninodes = blks_per_cluster * args.mp->m_sb.sb_inopblock; | ||
315 | } | ||
316 | /* | ||
317 | * Figure out what version number to use in the inodes we create. | ||
318 | * If the superblock version has caught up to the one that supports | ||
319 | * the new inode format, then use the new inode version. Otherwise | ||
320 | * use the old version so that old kernels will continue to be | ||
321 | * able to use the file system. | ||
322 | */ | ||
323 | if (xfs_sb_version_hasnlink(&args.mp->m_sb)) | ||
324 | version = 2; | ||
325 | else | ||
326 | version = 1; | ||
327 | 366 | ||
328 | /* | 367 | /* |
368 | * Stamp and write the inode buffers. | ||
369 | * | ||
329 | * Seed the new inode cluster with a random generation number. This | 370 | * Seed the new inode cluster with a random generation number. This |
330 | * prevents short-term reuse of generation numbers if a chunk is | 371 | * prevents short-term reuse of generation numbers if a chunk is |
331 | * freed and then immediately reallocated. We use random numbers | 372 | * freed and then immediately reallocated. We use random numbers |
332 | * rather than a linear progression to prevent the next generation | 373 | * rather than a linear progression to prevent the next generation |
333 | * number from being easily guessable. | 374 | * number from being easily guessable. |
334 | */ | 375 | */ |
335 | gen = random32(); | 376 | xfs_ialloc_inode_init(args.mp, tp, agno, args.agbno, args.len, |
336 | for (j = 0; j < nbufs; j++) { | 377 | random32()); |
337 | /* | ||
338 | * Get the block. | ||
339 | */ | ||
340 | d = XFS_AGB_TO_DADDR(args.mp, be32_to_cpu(agi->agi_seqno), | ||
341 | args.agbno + (j * blks_per_cluster)); | ||
342 | fbuf = xfs_trans_get_buf(tp, args.mp->m_ddev_targp, d, | ||
343 | args.mp->m_bsize * blks_per_cluster, | ||
344 | XFS_BUF_LOCK); | ||
345 | ASSERT(fbuf); | ||
346 | ASSERT(!XFS_BUF_GETERROR(fbuf)); | ||
347 | 378 | ||
348 | /* | 379 | /* |
349 | * Initialize all inodes in this buffer and then log them. | 380 | * Convert the results. |
350 | * | 381 | */ |
351 | * XXX: It would be much better if we had just one transaction to | 382 | newino = XFS_OFFBNO_TO_AGINO(args.mp, args.agbno, 0); |
352 | * log a whole cluster of inodes instead of all the individual | ||
353 | * transactions causing a lot of log traffic. | ||
354 | */ | ||
355 | xfs_biozero(fbuf, 0, ninodes << args.mp->m_sb.sb_inodelog); | ||
356 | for (i = 0; i < ninodes; i++) { | ||
357 | int ioffset = i << args.mp->m_sb.sb_inodelog; | ||
358 | uint isize = sizeof(struct xfs_dinode); | ||
359 | |||
360 | free = xfs_make_iptr(args.mp, fbuf, i); | ||
361 | free->di_magic = cpu_to_be16(XFS_DINODE_MAGIC); | ||
362 | free->di_version = version; | ||
363 | free->di_gen = cpu_to_be32(gen); | ||
364 | free->di_next_unlinked = cpu_to_be32(NULLAGINO); | ||
365 | xfs_trans_log_buf(tp, fbuf, ioffset, ioffset + isize - 1); | ||
366 | } | ||
367 | xfs_trans_inode_alloc_buf(tp, fbuf); | ||
368 | } | ||
369 | be32_add_cpu(&agi->agi_count, newlen); | 383 | be32_add_cpu(&agi->agi_count, newlen); |
370 | be32_add_cpu(&agi->agi_freecount, newlen); | 384 | be32_add_cpu(&agi->agi_freecount, newlen); |
371 | agno = be32_to_cpu(agi->agi_seqno); | ||
372 | down_read(&args.mp->m_peraglock); | 385 | down_read(&args.mp->m_peraglock); |
373 | args.mp->m_perag[agno].pagi_freecount += newlen; | 386 | args.mp->m_perag[agno].pagi_freecount += newlen; |
374 | up_read(&args.mp->m_peraglock); | 387 | up_read(&args.mp->m_peraglock); |
375 | agi->agi_newino = cpu_to_be32(newino); | 388 | agi->agi_newino = cpu_to_be32(newino); |
389 | |||
376 | /* | 390 | /* |
377 | * Insert records describing the new inode chunk into the btree. | 391 | * Insert records describing the new inode chunk into the btree. |
378 | */ | 392 | */ |
@@ -380,13 +394,17 @@ xfs_ialloc_ag_alloc( | |||
380 | for (thisino = newino; | 394 | for (thisino = newino; |
381 | thisino < newino + newlen; | 395 | thisino < newino + newlen; |
382 | thisino += XFS_INODES_PER_CHUNK) { | 396 | thisino += XFS_INODES_PER_CHUNK) { |
383 | if ((error = xfs_inobt_lookup_eq(cur, thisino, | 397 | cur->bc_rec.i.ir_startino = thisino; |
384 | XFS_INODES_PER_CHUNK, XFS_INOBT_ALL_FREE, &i))) { | 398 | cur->bc_rec.i.ir_freecount = XFS_INODES_PER_CHUNK; |
399 | cur->bc_rec.i.ir_free = XFS_INOBT_ALL_FREE; | ||
400 | error = xfs_btree_lookup(cur, XFS_LOOKUP_EQ, &i); | ||
401 | if (error) { | ||
385 | xfs_btree_del_cursor(cur, XFS_BTREE_ERROR); | 402 | xfs_btree_del_cursor(cur, XFS_BTREE_ERROR); |
386 | return error; | 403 | return error; |
387 | } | 404 | } |
388 | ASSERT(i == 0); | 405 | ASSERT(i == 0); |
389 | if ((error = xfs_btree_insert(cur, &i))) { | 406 | error = xfs_btree_insert(cur, &i); |
407 | if (error) { | ||
390 | xfs_btree_del_cursor(cur, XFS_BTREE_ERROR); | 408 | xfs_btree_del_cursor(cur, XFS_BTREE_ERROR); |
391 | return error; | 409 | return error; |
392 | } | 410 | } |
@@ -539,6 +557,62 @@ nextag: | |||
539 | } | 557 | } |
540 | 558 | ||
541 | /* | 559 | /* |
560 | * Try to retrieve the next record to the left/right from the current one. | ||
561 | */ | ||
562 | STATIC int | ||
563 | xfs_ialloc_next_rec( | ||
564 | struct xfs_btree_cur *cur, | ||
565 | xfs_inobt_rec_incore_t *rec, | ||
566 | int *done, | ||
567 | int left) | ||
568 | { | ||
569 | int error; | ||
570 | int i; | ||
571 | |||
572 | if (left) | ||
573 | error = xfs_btree_decrement(cur, 0, &i); | ||
574 | else | ||
575 | error = xfs_btree_increment(cur, 0, &i); | ||
576 | |||
577 | if (error) | ||
578 | return error; | ||
579 | *done = !i; | ||
580 | if (i) { | ||
581 | error = xfs_inobt_get_rec(cur, rec, &i); | ||
582 | if (error) | ||
583 | return error; | ||
584 | XFS_WANT_CORRUPTED_RETURN(i == 1); | ||
585 | } | ||
586 | |||
587 | return 0; | ||
588 | } | ||
589 | |||
590 | STATIC int | ||
591 | xfs_ialloc_get_rec( | ||
592 | struct xfs_btree_cur *cur, | ||
593 | xfs_agino_t agino, | ||
594 | xfs_inobt_rec_incore_t *rec, | ||
595 | int *done, | ||
596 | int left) | ||
597 | { | ||
598 | int error; | ||
599 | int i; | ||
600 | |||
601 | error = xfs_inobt_lookup(cur, agino, XFS_LOOKUP_EQ, &i); | ||
602 | if (error) | ||
603 | return error; | ||
604 | *done = !i; | ||
605 | if (i) { | ||
606 | error = xfs_inobt_get_rec(cur, rec, &i); | ||
607 | if (error) | ||
608 | return error; | ||
609 | XFS_WANT_CORRUPTED_RETURN(i == 1); | ||
610 | } | ||
611 | |||
612 | return 0; | ||
613 | } | ||
614 | |||
615 | /* | ||
542 | * Visible inode allocation functions. | 616 | * Visible inode allocation functions. |
543 | */ | 617 | */ |
544 | 618 | ||
@@ -592,8 +666,8 @@ xfs_dialloc( | |||
592 | int j; /* result code */ | 666 | int j; /* result code */ |
593 | xfs_mount_t *mp; /* file system mount structure */ | 667 | xfs_mount_t *mp; /* file system mount structure */ |
594 | int offset; /* index of inode in chunk */ | 668 | int offset; /* index of inode in chunk */ |
595 | xfs_agino_t pagino; /* parent's a.g. relative inode # */ | 669 | xfs_agino_t pagino; /* parent's AG relative inode # */ |
596 | xfs_agnumber_t pagno; /* parent's allocation group number */ | 670 | xfs_agnumber_t pagno; /* parent's AG number */ |
597 | xfs_inobt_rec_incore_t rec; /* inode allocation record */ | 671 | xfs_inobt_rec_incore_t rec; /* inode allocation record */ |
598 | xfs_agnumber_t tagno; /* testing allocation group number */ | 672 | xfs_agnumber_t tagno; /* testing allocation group number */ |
599 | xfs_btree_cur_t *tcur; /* temp cursor */ | 673 | xfs_btree_cur_t *tcur; /* temp cursor */ |
@@ -716,6 +790,8 @@ nextag: | |||
716 | */ | 790 | */ |
717 | agno = tagno; | 791 | agno = tagno; |
718 | *IO_agbp = NULL; | 792 | *IO_agbp = NULL; |
793 | |||
794 | restart_pagno: | ||
719 | cur = xfs_inobt_init_cursor(mp, tp, agbp, be32_to_cpu(agi->agi_seqno)); | 795 | cur = xfs_inobt_init_cursor(mp, tp, agbp, be32_to_cpu(agi->agi_seqno)); |
720 | /* | 796 | /* |
721 | * If pagino is 0 (this is the root inode allocation) use newino. | 797 | * If pagino is 0 (this is the root inode allocation) use newino. |
@@ -723,220 +799,199 @@ nextag: | |||
723 | */ | 799 | */ |
724 | if (!pagino) | 800 | if (!pagino) |
725 | pagino = be32_to_cpu(agi->agi_newino); | 801 | pagino = be32_to_cpu(agi->agi_newino); |
726 | #ifdef DEBUG | ||
727 | if (cur->bc_nlevels == 1) { | ||
728 | int freecount = 0; | ||
729 | 802 | ||
730 | if ((error = xfs_inobt_lookup_ge(cur, 0, 0, 0, &i))) | 803 | error = xfs_check_agi_freecount(cur, agi); |
731 | goto error0; | 804 | if (error) |
732 | XFS_WANT_CORRUPTED_GOTO(i == 1, error0); | 805 | goto error0; |
733 | do { | ||
734 | if ((error = xfs_inobt_get_rec(cur, &rec.ir_startino, | ||
735 | &rec.ir_freecount, &rec.ir_free, &i))) | ||
736 | goto error0; | ||
737 | XFS_WANT_CORRUPTED_GOTO(i == 1, error0); | ||
738 | freecount += rec.ir_freecount; | ||
739 | if ((error = xfs_btree_increment(cur, 0, &i))) | ||
740 | goto error0; | ||
741 | } while (i == 1); | ||
742 | 806 | ||
743 | ASSERT(freecount == be32_to_cpu(agi->agi_freecount) || | ||
744 | XFS_FORCED_SHUTDOWN(mp)); | ||
745 | } | ||
746 | #endif | ||
747 | /* | 807 | /* |
748 | * If in the same a.g. as the parent, try to get near the parent. | 808 | * If in the same AG as the parent, try to get near the parent. |
749 | */ | 809 | */ |
750 | if (pagno == agno) { | 810 | if (pagno == agno) { |
751 | if ((error = xfs_inobt_lookup_le(cur, pagino, 0, 0, &i))) | 811 | xfs_perag_t *pag = &mp->m_perag[agno]; |
812 | int doneleft; /* done, to the left */ | ||
813 | int doneright; /* done, to the right */ | ||
814 | int searchdistance = 10; | ||
815 | |||
816 | error = xfs_inobt_lookup(cur, pagino, XFS_LOOKUP_LE, &i); | ||
817 | if (error) | ||
818 | goto error0; | ||
819 | XFS_WANT_CORRUPTED_GOTO(i == 1, error0); | ||
820 | |||
821 | error = xfs_inobt_get_rec(cur, &rec, &j); | ||
822 | if (error) | ||
752 | goto error0; | 823 | goto error0; |
753 | if (i != 0 && | 824 | XFS_WANT_CORRUPTED_GOTO(i == 1, error0); |
754 | (error = xfs_inobt_get_rec(cur, &rec.ir_startino, | 825 | |
755 | &rec.ir_freecount, &rec.ir_free, &j)) == 0 && | 826 | if (rec.ir_freecount > 0) { |
756 | j == 1 && | ||
757 | rec.ir_freecount > 0) { | ||
758 | /* | 827 | /* |
759 | * Found a free inode in the same chunk | 828 | * Found a free inode in the same chunk |
760 | * as parent, done. | 829 | * as the parent, done. |
761 | */ | 830 | */ |
831 | goto alloc_inode; | ||
762 | } | 832 | } |
833 | |||
834 | |||
835 | /* | ||
836 | * In the same AG as parent, but parent's chunk is full. | ||
837 | */ | ||
838 | |||
839 | /* duplicate the cursor, search left & right simultaneously */ | ||
840 | error = xfs_btree_dup_cursor(cur, &tcur); | ||
841 | if (error) | ||
842 | goto error0; | ||
843 | |||
763 | /* | 844 | /* |
764 | * In the same a.g. as parent, but parent's chunk is full. | 845 | * Skip to last blocks looked up if same parent inode. |
765 | */ | 846 | */ |
766 | else { | 847 | if (pagino != NULLAGINO && |
767 | int doneleft; /* done, to the left */ | 848 | pag->pagl_pagino == pagino && |
768 | int doneright; /* done, to the right */ | 849 | pag->pagl_leftrec != NULLAGINO && |
850 | pag->pagl_rightrec != NULLAGINO) { | ||
851 | error = xfs_ialloc_get_rec(tcur, pag->pagl_leftrec, | ||
852 | &trec, &doneleft, 1); | ||
853 | if (error) | ||
854 | goto error1; | ||
769 | 855 | ||
856 | error = xfs_ialloc_get_rec(cur, pag->pagl_rightrec, | ||
857 | &rec, &doneright, 0); | ||
770 | if (error) | 858 | if (error) |
771 | goto error0; | ||
772 | ASSERT(i == 1); | ||
773 | ASSERT(j == 1); | ||
774 | /* | ||
775 | * Duplicate the cursor, search left & right | ||
776 | * simultaneously. | ||
777 | */ | ||
778 | if ((error = xfs_btree_dup_cursor(cur, &tcur))) | ||
779 | goto error0; | ||
780 | /* | ||
781 | * Search left with tcur, back up 1 record. | ||
782 | */ | ||
783 | if ((error = xfs_btree_decrement(tcur, 0, &i))) | ||
784 | goto error1; | 859 | goto error1; |
785 | doneleft = !i; | 860 | } else { |
786 | if (!doneleft) { | 861 | /* search left with tcur, back up 1 record */ |
787 | if ((error = xfs_inobt_get_rec(tcur, | 862 | error = xfs_ialloc_next_rec(tcur, &trec, &doneleft, 1); |
788 | &trec.ir_startino, | 863 | if (error) |
789 | &trec.ir_freecount, | ||
790 | &trec.ir_free, &i))) | ||
791 | goto error1; | ||
792 | XFS_WANT_CORRUPTED_GOTO(i == 1, error1); | ||
793 | } | ||
794 | /* | ||
795 | * Search right with cur, go forward 1 record. | ||
796 | */ | ||
797 | if ((error = xfs_btree_increment(cur, 0, &i))) | ||
798 | goto error1; | 864 | goto error1; |
799 | doneright = !i; | ||
800 | if (!doneright) { | ||
801 | if ((error = xfs_inobt_get_rec(cur, | ||
802 | &rec.ir_startino, | ||
803 | &rec.ir_freecount, | ||
804 | &rec.ir_free, &i))) | ||
805 | goto error1; | ||
806 | XFS_WANT_CORRUPTED_GOTO(i == 1, error1); | ||
807 | } | ||
808 | /* | ||
809 | * Loop until we find the closest inode chunk | ||
810 | * with a free one. | ||
811 | */ | ||
812 | while (!doneleft || !doneright) { | ||
813 | int useleft; /* using left inode | ||
814 | chunk this time */ | ||
815 | 865 | ||
866 | /* search right with cur, go forward 1 record. */ | ||
867 | error = xfs_ialloc_next_rec(cur, &rec, &doneright, 0); | ||
868 | if (error) | ||
869 | goto error1; | ||
870 | } | ||
871 | |||
872 | /* | ||
873 | * Loop until we find an inode chunk with a free inode. | ||
874 | */ | ||
875 | while (!doneleft || !doneright) { | ||
876 | int useleft; /* using left inode chunk this time */ | ||
877 | |||
878 | if (!--searchdistance) { | ||
816 | /* | 879 | /* |
817 | * Figure out which block is closer, | 880 | * Not in range - save last search |
818 | * if both are valid. | 881 | * location and allocate a new inode |
819 | */ | ||
820 | if (!doneleft && !doneright) | ||
821 | useleft = | ||
822 | pagino - | ||
823 | (trec.ir_startino + | ||
824 | XFS_INODES_PER_CHUNK - 1) < | ||
825 | rec.ir_startino - pagino; | ||
826 | else | ||
827 | useleft = !doneleft; | ||
828 | /* | ||
829 | * If checking the left, does it have | ||
830 | * free inodes? | ||
831 | */ | ||
832 | if (useleft && trec.ir_freecount) { | ||
833 | /* | ||
834 | * Yes, set it up as the chunk to use. | ||
835 | */ | ||
836 | rec = trec; | ||
837 | xfs_btree_del_cursor(cur, | ||
838 | XFS_BTREE_NOERROR); | ||
839 | cur = tcur; | ||
840 | break; | ||
841 | } | ||
842 | /* | ||
843 | * If checking the right, does it have | ||
844 | * free inodes? | ||
845 | */ | ||
846 | if (!useleft && rec.ir_freecount) { | ||
847 | /* | ||
848 | * Yes, it's already set up. | ||
849 | */ | ||
850 | xfs_btree_del_cursor(tcur, | ||
851 | XFS_BTREE_NOERROR); | ||
852 | break; | ||
853 | } | ||
854 | /* | ||
855 | * If used the left, get another one | ||
856 | * further left. | ||
857 | */ | ||
858 | if (useleft) { | ||
859 | if ((error = xfs_btree_decrement(tcur, 0, | ||
860 | &i))) | ||
861 | goto error1; | ||
862 | doneleft = !i; | ||
863 | if (!doneleft) { | ||
864 | if ((error = xfs_inobt_get_rec( | ||
865 | tcur, | ||
866 | &trec.ir_startino, | ||
867 | &trec.ir_freecount, | ||
868 | &trec.ir_free, &i))) | ||
869 | goto error1; | ||
870 | XFS_WANT_CORRUPTED_GOTO(i == 1, | ||
871 | error1); | ||
872 | } | ||
873 | } | ||
874 | /* | ||
875 | * If used the right, get another one | ||
876 | * further right. | ||
877 | */ | 882 | */ |
878 | else { | 883 | pag->pagl_leftrec = trec.ir_startino; |
879 | if ((error = xfs_btree_increment(cur, 0, | 884 | pag->pagl_rightrec = rec.ir_startino; |
880 | &i))) | 885 | pag->pagl_pagino = pagino; |
881 | goto error1; | 886 | goto newino; |
882 | doneright = !i; | 887 | } |
883 | if (!doneright) { | 888 | |
884 | if ((error = xfs_inobt_get_rec( | 889 | /* figure out the closer block if both are valid. */ |
885 | cur, | 890 | if (!doneleft && !doneright) { |
886 | &rec.ir_startino, | 891 | useleft = pagino - |
887 | &rec.ir_freecount, | 892 | (trec.ir_startino + XFS_INODES_PER_CHUNK - 1) < |
888 | &rec.ir_free, &i))) | 893 | rec.ir_startino - pagino; |
889 | goto error1; | 894 | } else { |
890 | XFS_WANT_CORRUPTED_GOTO(i == 1, | 895 | useleft = !doneleft; |
891 | error1); | ||
892 | } | ||
893 | } | ||
894 | } | 896 | } |
895 | ASSERT(!doneleft || !doneright); | 897 | |
898 | /* free inodes to the left? */ | ||
899 | if (useleft && trec.ir_freecount) { | ||
900 | rec = trec; | ||
901 | xfs_btree_del_cursor(cur, XFS_BTREE_NOERROR); | ||
902 | cur = tcur; | ||
903 | |||
904 | pag->pagl_leftrec = trec.ir_startino; | ||
905 | pag->pagl_rightrec = rec.ir_startino; | ||
906 | pag->pagl_pagino = pagino; | ||
907 | goto alloc_inode; | ||
908 | } | ||
909 | |||
910 | /* free inodes to the right? */ | ||
911 | if (!useleft && rec.ir_freecount) { | ||
912 | xfs_btree_del_cursor(tcur, XFS_BTREE_NOERROR); | ||
913 | |||
914 | pag->pagl_leftrec = trec.ir_startino; | ||
915 | pag->pagl_rightrec = rec.ir_startino; | ||
916 | pag->pagl_pagino = pagino; | ||
917 | goto alloc_inode; | ||
918 | } | ||
919 | |||
920 | /* get next record to check */ | ||
921 | if (useleft) { | ||
922 | error = xfs_ialloc_next_rec(tcur, &trec, | ||
923 | &doneleft, 1); | ||
924 | } else { | ||
925 | error = xfs_ialloc_next_rec(cur, &rec, | ||
926 | &doneright, 0); | ||
927 | } | ||
928 | if (error) | ||
929 | goto error1; | ||
896 | } | 930 | } |
931 | |||
932 | /* | ||
933 | * We've reached the end of the btree. because | ||
934 | * we are only searching a small chunk of the | ||
935 | * btree each search, there is obviously free | ||
936 | * inodes closer to the parent inode than we | ||
937 | * are now. restart the search again. | ||
938 | */ | ||
939 | pag->pagl_pagino = NULLAGINO; | ||
940 | pag->pagl_leftrec = NULLAGINO; | ||
941 | pag->pagl_rightrec = NULLAGINO; | ||
942 | xfs_btree_del_cursor(tcur, XFS_BTREE_NOERROR); | ||
943 | xfs_btree_del_cursor(cur, XFS_BTREE_NOERROR); | ||
944 | goto restart_pagno; | ||
897 | } | 945 | } |
946 | |||
898 | /* | 947 | /* |
899 | * In a different a.g. from the parent. | 948 | * In a different AG from the parent. |
900 | * See if the most recently allocated block has any free. | 949 | * See if the most recently allocated block has any free. |
901 | */ | 950 | */ |
902 | else if (be32_to_cpu(agi->agi_newino) != NULLAGINO) { | 951 | newino: |
903 | if ((error = xfs_inobt_lookup_eq(cur, | 952 | if (be32_to_cpu(agi->agi_newino) != NULLAGINO) { |
904 | be32_to_cpu(agi->agi_newino), 0, 0, &i))) | 953 | error = xfs_inobt_lookup(cur, be32_to_cpu(agi->agi_newino), |
954 | XFS_LOOKUP_EQ, &i); | ||
955 | if (error) | ||
905 | goto error0; | 956 | goto error0; |
906 | if (i == 1 && | 957 | |
907 | (error = xfs_inobt_get_rec(cur, &rec.ir_startino, | 958 | if (i == 1) { |
908 | &rec.ir_freecount, &rec.ir_free, &j)) == 0 && | 959 | error = xfs_inobt_get_rec(cur, &rec, &j); |
909 | j == 1 && | ||
910 | rec.ir_freecount > 0) { | ||
911 | /* | ||
912 | * The last chunk allocated in the group still has | ||
913 | * a free inode. | ||
914 | */ | ||
915 | } | ||
916 | /* | ||
917 | * None left in the last group, search the whole a.g. | ||
918 | */ | ||
919 | else { | ||
920 | if (error) | 960 | if (error) |
921 | goto error0; | 961 | goto error0; |
922 | if ((error = xfs_inobt_lookup_ge(cur, 0, 0, 0, &i))) | 962 | |
923 | goto error0; | 963 | if (j == 1 && rec.ir_freecount > 0) { |
924 | ASSERT(i == 1); | 964 | /* |
925 | for (;;) { | 965 | * The last chunk allocated in the group |
926 | if ((error = xfs_inobt_get_rec(cur, | 966 | * still has a free inode. |
927 | &rec.ir_startino, | 967 | */ |
928 | &rec.ir_freecount, &rec.ir_free, | 968 | goto alloc_inode; |
929 | &i))) | ||
930 | goto error0; | ||
931 | XFS_WANT_CORRUPTED_GOTO(i == 1, error0); | ||
932 | if (rec.ir_freecount > 0) | ||
933 | break; | ||
934 | if ((error = xfs_btree_increment(cur, 0, &i))) | ||
935 | goto error0; | ||
936 | XFS_WANT_CORRUPTED_GOTO(i == 1, error0); | ||
937 | } | 969 | } |
938 | } | 970 | } |
939 | } | 971 | } |
972 | |||
973 | /* | ||
974 | * None left in the last group, search the whole AG | ||
975 | */ | ||
976 | error = xfs_inobt_lookup(cur, 0, XFS_LOOKUP_GE, &i); | ||
977 | if (error) | ||
978 | goto error0; | ||
979 | XFS_WANT_CORRUPTED_GOTO(i == 1, error0); | ||
980 | |||
981 | for (;;) { | ||
982 | error = xfs_inobt_get_rec(cur, &rec, &i); | ||
983 | if (error) | ||
984 | goto error0; | ||
985 | XFS_WANT_CORRUPTED_GOTO(i == 1, error0); | ||
986 | if (rec.ir_freecount > 0) | ||
987 | break; | ||
988 | error = xfs_btree_increment(cur, 0, &i); | ||
989 | if (error) | ||
990 | goto error0; | ||
991 | XFS_WANT_CORRUPTED_GOTO(i == 1, error0); | ||
992 | } | ||
993 | |||
994 | alloc_inode: | ||
940 | offset = xfs_ialloc_find_free(&rec.ir_free); | 995 | offset = xfs_ialloc_find_free(&rec.ir_free); |
941 | ASSERT(offset >= 0); | 996 | ASSERT(offset >= 0); |
942 | ASSERT(offset < XFS_INODES_PER_CHUNK); | 997 | ASSERT(offset < XFS_INODES_PER_CHUNK); |
@@ -945,33 +1000,19 @@ nextag: | |||
945 | ino = XFS_AGINO_TO_INO(mp, agno, rec.ir_startino + offset); | 1000 | ino = XFS_AGINO_TO_INO(mp, agno, rec.ir_startino + offset); |
946 | rec.ir_free &= ~XFS_INOBT_MASK(offset); | 1001 | rec.ir_free &= ~XFS_INOBT_MASK(offset); |
947 | rec.ir_freecount--; | 1002 | rec.ir_freecount--; |
948 | if ((error = xfs_inobt_update(cur, rec.ir_startino, rec.ir_freecount, | 1003 | error = xfs_inobt_update(cur, &rec); |
949 | rec.ir_free))) | 1004 | if (error) |
950 | goto error0; | 1005 | goto error0; |
951 | be32_add_cpu(&agi->agi_freecount, -1); | 1006 | be32_add_cpu(&agi->agi_freecount, -1); |
952 | xfs_ialloc_log_agi(tp, agbp, XFS_AGI_FREECOUNT); | 1007 | xfs_ialloc_log_agi(tp, agbp, XFS_AGI_FREECOUNT); |
953 | down_read(&mp->m_peraglock); | 1008 | down_read(&mp->m_peraglock); |
954 | mp->m_perag[tagno].pagi_freecount--; | 1009 | mp->m_perag[tagno].pagi_freecount--; |
955 | up_read(&mp->m_peraglock); | 1010 | up_read(&mp->m_peraglock); |
956 | #ifdef DEBUG | ||
957 | if (cur->bc_nlevels == 1) { | ||
958 | int freecount = 0; | ||
959 | 1011 | ||
960 | if ((error = xfs_inobt_lookup_ge(cur, 0, 0, 0, &i))) | 1012 | error = xfs_check_agi_freecount(cur, agi); |
961 | goto error0; | 1013 | if (error) |
962 | do { | 1014 | goto error0; |
963 | if ((error = xfs_inobt_get_rec(cur, &rec.ir_startino, | 1015 | |
964 | &rec.ir_freecount, &rec.ir_free, &i))) | ||
965 | goto error0; | ||
966 | XFS_WANT_CORRUPTED_GOTO(i == 1, error0); | ||
967 | freecount += rec.ir_freecount; | ||
968 | if ((error = xfs_btree_increment(cur, 0, &i))) | ||
969 | goto error0; | ||
970 | } while (i == 1); | ||
971 | ASSERT(freecount == be32_to_cpu(agi->agi_freecount) || | ||
972 | XFS_FORCED_SHUTDOWN(mp)); | ||
973 | } | ||
974 | #endif | ||
975 | xfs_btree_del_cursor(cur, XFS_BTREE_NOERROR); | 1016 | xfs_btree_del_cursor(cur, XFS_BTREE_NOERROR); |
976 | xfs_trans_mod_sb(tp, XFS_TRANS_SB_IFREE, -1); | 1017 | xfs_trans_mod_sb(tp, XFS_TRANS_SB_IFREE, -1); |
977 | *inop = ino; | 1018 | *inop = ino; |
@@ -1062,38 +1103,23 @@ xfs_difree( | |||
1062 | * Initialize the cursor. | 1103 | * Initialize the cursor. |
1063 | */ | 1104 | */ |
1064 | cur = xfs_inobt_init_cursor(mp, tp, agbp, agno); | 1105 | cur = xfs_inobt_init_cursor(mp, tp, agbp, agno); |
1065 | #ifdef DEBUG | ||
1066 | if (cur->bc_nlevels == 1) { | ||
1067 | int freecount = 0; | ||
1068 | 1106 | ||
1069 | if ((error = xfs_inobt_lookup_ge(cur, 0, 0, 0, &i))) | 1107 | error = xfs_check_agi_freecount(cur, agi); |
1070 | goto error0; | 1108 | if (error) |
1071 | do { | 1109 | goto error0; |
1072 | if ((error = xfs_inobt_get_rec(cur, &rec.ir_startino, | 1110 | |
1073 | &rec.ir_freecount, &rec.ir_free, &i))) | ||
1074 | goto error0; | ||
1075 | if (i) { | ||
1076 | freecount += rec.ir_freecount; | ||
1077 | if ((error = xfs_btree_increment(cur, 0, &i))) | ||
1078 | goto error0; | ||
1079 | } | ||
1080 | } while (i == 1); | ||
1081 | ASSERT(freecount == be32_to_cpu(agi->agi_freecount) || | ||
1082 | XFS_FORCED_SHUTDOWN(mp)); | ||
1083 | } | ||
1084 | #endif | ||
1085 | /* | 1111 | /* |
1086 | * Look for the entry describing this inode. | 1112 | * Look for the entry describing this inode. |
1087 | */ | 1113 | */ |
1088 | if ((error = xfs_inobt_lookup_le(cur, agino, 0, 0, &i))) { | 1114 | if ((error = xfs_inobt_lookup(cur, agino, XFS_LOOKUP_LE, &i))) { |
1089 | cmn_err(CE_WARN, | 1115 | cmn_err(CE_WARN, |
1090 | "xfs_difree: xfs_inobt_lookup_le returned() an error %d on %s. Returning error.", | 1116 | "xfs_difree: xfs_inobt_lookup returned() an error %d on %s. Returning error.", |
1091 | error, mp->m_fsname); | 1117 | error, mp->m_fsname); |
1092 | goto error0; | 1118 | goto error0; |
1093 | } | 1119 | } |
1094 | XFS_WANT_CORRUPTED_GOTO(i == 1, error0); | 1120 | XFS_WANT_CORRUPTED_GOTO(i == 1, error0); |
1095 | if ((error = xfs_inobt_get_rec(cur, &rec.ir_startino, &rec.ir_freecount, | 1121 | error = xfs_inobt_get_rec(cur, &rec, &i); |
1096 | &rec.ir_free, &i))) { | 1122 | if (error) { |
1097 | cmn_err(CE_WARN, | 1123 | cmn_err(CE_WARN, |
1098 | "xfs_difree: xfs_inobt_get_rec() returned an error %d on %s. Returning error.", | 1124 | "xfs_difree: xfs_inobt_get_rec() returned an error %d on %s. Returning error.", |
1099 | error, mp->m_fsname); | 1125 | error, mp->m_fsname); |
@@ -1148,12 +1174,14 @@ xfs_difree( | |||
1148 | } else { | 1174 | } else { |
1149 | *delete = 0; | 1175 | *delete = 0; |
1150 | 1176 | ||
1151 | if ((error = xfs_inobt_update(cur, rec.ir_startino, rec.ir_freecount, rec.ir_free))) { | 1177 | error = xfs_inobt_update(cur, &rec); |
1178 | if (error) { | ||
1152 | cmn_err(CE_WARN, | 1179 | cmn_err(CE_WARN, |
1153 | "xfs_difree: xfs_inobt_update() returned an error %d on %s. Returning error.", | 1180 | "xfs_difree: xfs_inobt_update returned an error %d on %s.", |
1154 | error, mp->m_fsname); | 1181 | error, mp->m_fsname); |
1155 | goto error0; | 1182 | goto error0; |
1156 | } | 1183 | } |
1184 | |||
1157 | /* | 1185 | /* |
1158 | * Change the inode free counts and log the ag/sb changes. | 1186 | * Change the inode free counts and log the ag/sb changes. |
1159 | */ | 1187 | */ |
@@ -1165,28 +1193,10 @@ xfs_difree( | |||
1165 | xfs_trans_mod_sb(tp, XFS_TRANS_SB_IFREE, 1); | 1193 | xfs_trans_mod_sb(tp, XFS_TRANS_SB_IFREE, 1); |
1166 | } | 1194 | } |
1167 | 1195 | ||
1168 | #ifdef DEBUG | 1196 | error = xfs_check_agi_freecount(cur, agi); |
1169 | if (cur->bc_nlevels == 1) { | 1197 | if (error) |
1170 | int freecount = 0; | 1198 | goto error0; |
1171 | 1199 | ||
1172 | if ((error = xfs_inobt_lookup_ge(cur, 0, 0, 0, &i))) | ||
1173 | goto error0; | ||
1174 | do { | ||
1175 | if ((error = xfs_inobt_get_rec(cur, | ||
1176 | &rec.ir_startino, | ||
1177 | &rec.ir_freecount, | ||
1178 | &rec.ir_free, &i))) | ||
1179 | goto error0; | ||
1180 | if (i) { | ||
1181 | freecount += rec.ir_freecount; | ||
1182 | if ((error = xfs_btree_increment(cur, 0, &i))) | ||
1183 | goto error0; | ||
1184 | } | ||
1185 | } while (i == 1); | ||
1186 | ASSERT(freecount == be32_to_cpu(agi->agi_freecount) || | ||
1187 | XFS_FORCED_SHUTDOWN(mp)); | ||
1188 | } | ||
1189 | #endif | ||
1190 | xfs_btree_del_cursor(cur, XFS_BTREE_NOERROR); | 1200 | xfs_btree_del_cursor(cur, XFS_BTREE_NOERROR); |
1191 | return 0; | 1201 | return 0; |
1192 | 1202 | ||
@@ -1297,9 +1307,7 @@ xfs_imap( | |||
1297 | chunk_agbno = agbno - offset_agbno; | 1307 | chunk_agbno = agbno - offset_agbno; |
1298 | } else { | 1308 | } else { |
1299 | xfs_btree_cur_t *cur; /* inode btree cursor */ | 1309 | xfs_btree_cur_t *cur; /* inode btree cursor */ |
1300 | xfs_agino_t chunk_agino; /* first agino in inode chunk */ | 1310 | xfs_inobt_rec_incore_t chunk_rec; |
1301 | __int32_t chunk_cnt; /* count of free inodes in chunk */ | ||
1302 | xfs_inofree_t chunk_free; /* mask of free inodes in chunk */ | ||
1303 | xfs_buf_t *agbp; /* agi buffer */ | 1311 | xfs_buf_t *agbp; /* agi buffer */ |
1304 | int i; /* temp state */ | 1312 | int i; /* temp state */ |
1305 | 1313 | ||
@@ -1315,15 +1323,14 @@ xfs_imap( | |||
1315 | } | 1323 | } |
1316 | 1324 | ||
1317 | cur = xfs_inobt_init_cursor(mp, tp, agbp, agno); | 1325 | cur = xfs_inobt_init_cursor(mp, tp, agbp, agno); |
1318 | error = xfs_inobt_lookup_le(cur, agino, 0, 0, &i); | 1326 | error = xfs_inobt_lookup(cur, agino, XFS_LOOKUP_LE, &i); |
1319 | if (error) { | 1327 | if (error) { |
1320 | xfs_fs_cmn_err(CE_ALERT, mp, "xfs_imap: " | 1328 | xfs_fs_cmn_err(CE_ALERT, mp, "xfs_imap: " |
1321 | "xfs_inobt_lookup_le() failed"); | 1329 | "xfs_inobt_lookup() failed"); |
1322 | goto error0; | 1330 | goto error0; |
1323 | } | 1331 | } |
1324 | 1332 | ||
1325 | error = xfs_inobt_get_rec(cur, &chunk_agino, &chunk_cnt, | 1333 | error = xfs_inobt_get_rec(cur, &chunk_rec, &i); |
1326 | &chunk_free, &i); | ||
1327 | if (error) { | 1334 | if (error) { |
1328 | xfs_fs_cmn_err(CE_ALERT, mp, "xfs_imap: " | 1335 | xfs_fs_cmn_err(CE_ALERT, mp, "xfs_imap: " |
1329 | "xfs_inobt_get_rec() failed"); | 1336 | "xfs_inobt_get_rec() failed"); |
@@ -1341,7 +1348,7 @@ xfs_imap( | |||
1341 | xfs_btree_del_cursor(cur, XFS_BTREE_NOERROR); | 1348 | xfs_btree_del_cursor(cur, XFS_BTREE_NOERROR); |
1342 | if (error) | 1349 | if (error) |
1343 | return error; | 1350 | return error; |
1344 | chunk_agbno = XFS_AGINO_TO_AGBNO(mp, chunk_agino); | 1351 | chunk_agbno = XFS_AGINO_TO_AGBNO(mp, chunk_rec.ir_startino); |
1345 | offset_agbno = agbno - chunk_agbno; | 1352 | offset_agbno = agbno - chunk_agbno; |
1346 | } | 1353 | } |
1347 | 1354 | ||
diff --git a/fs/xfs/xfs_ialloc.h b/fs/xfs/xfs_ialloc.h index aeee8278f92c..bb5385475e1f 100644 --- a/fs/xfs/xfs_ialloc.h +++ b/fs/xfs/xfs_ialloc.h | |||
@@ -150,23 +150,15 @@ xfs_ialloc_pagi_init( | |||
150 | xfs_agnumber_t agno); /* allocation group number */ | 150 | xfs_agnumber_t agno); /* allocation group number */ |
151 | 151 | ||
152 | /* | 152 | /* |
153 | * Lookup the first record greater than or equal to ino | 153 | * Lookup a record by ino in the btree given by cur. |
154 | * in the btree given by cur. | ||
155 | */ | 154 | */ |
156 | int xfs_inobt_lookup_ge(struct xfs_btree_cur *cur, xfs_agino_t ino, | 155 | int xfs_inobt_lookup(struct xfs_btree_cur *cur, xfs_agino_t ino, |
157 | __int32_t fcnt, xfs_inofree_t free, int *stat); | 156 | xfs_lookup_t dir, int *stat); |
158 | |||
159 | /* | ||
160 | * Lookup the first record less than or equal to ino | ||
161 | * in the btree given by cur. | ||
162 | */ | ||
163 | int xfs_inobt_lookup_le(struct xfs_btree_cur *cur, xfs_agino_t ino, | ||
164 | __int32_t fcnt, xfs_inofree_t free, int *stat); | ||
165 | 157 | ||
166 | /* | 158 | /* |
167 | * Get the data from the pointed-to record. | 159 | * Get the data from the pointed-to record. |
168 | */ | 160 | */ |
169 | extern int xfs_inobt_get_rec(struct xfs_btree_cur *cur, xfs_agino_t *ino, | 161 | extern int xfs_inobt_get_rec(struct xfs_btree_cur *cur, |
170 | __int32_t *fcnt, xfs_inofree_t *free, int *stat); | 162 | xfs_inobt_rec_incore_t *rec, int *stat); |
171 | 163 | ||
172 | #endif /* __XFS_IALLOC_H__ */ | 164 | #endif /* __XFS_IALLOC_H__ */ |
diff --git a/fs/xfs/xfs_iget.c b/fs/xfs/xfs_iget.c index ecbf8b4d2e2e..80e526489be5 100644 --- a/fs/xfs/xfs_iget.c +++ b/fs/xfs/xfs_iget.c | |||
@@ -82,7 +82,6 @@ xfs_inode_alloc( | |||
82 | memset(&ip->i_df, 0, sizeof(xfs_ifork_t)); | 82 | memset(&ip->i_df, 0, sizeof(xfs_ifork_t)); |
83 | ip->i_flags = 0; | 83 | ip->i_flags = 0; |
84 | ip->i_update_core = 0; | 84 | ip->i_update_core = 0; |
85 | ip->i_update_size = 0; | ||
86 | ip->i_delayed_blks = 0; | 85 | ip->i_delayed_blks = 0; |
87 | memset(&ip->i_d, 0, sizeof(xfs_icdinode_t)); | 86 | memset(&ip->i_d, 0, sizeof(xfs_icdinode_t)); |
88 | ip->i_size = 0; | 87 | ip->i_size = 0; |
@@ -456,32 +455,6 @@ out_error_or_again: | |||
456 | return error; | 455 | return error; |
457 | } | 456 | } |
458 | 457 | ||
459 | |||
460 | /* | ||
461 | * Look for the inode corresponding to the given ino in the hash table. | ||
462 | * If it is there and its i_transp pointer matches tp, return it. | ||
463 | * Otherwise, return NULL. | ||
464 | */ | ||
465 | xfs_inode_t * | ||
466 | xfs_inode_incore(xfs_mount_t *mp, | ||
467 | xfs_ino_t ino, | ||
468 | xfs_trans_t *tp) | ||
469 | { | ||
470 | xfs_inode_t *ip; | ||
471 | xfs_perag_t *pag; | ||
472 | |||
473 | pag = xfs_get_perag(mp, ino); | ||
474 | read_lock(&pag->pag_ici_lock); | ||
475 | ip = radix_tree_lookup(&pag->pag_ici_root, XFS_INO_TO_AGINO(mp, ino)); | ||
476 | read_unlock(&pag->pag_ici_lock); | ||
477 | xfs_put_perag(mp, pag); | ||
478 | |||
479 | /* the returned inode must match the transaction */ | ||
480 | if (ip && (ip->i_transp != tp)) | ||
481 | return NULL; | ||
482 | return ip; | ||
483 | } | ||
484 | |||
485 | /* | 458 | /* |
486 | * Decrement reference count of an inode structure and unlock it. | 459 | * Decrement reference count of an inode structure and unlock it. |
487 | * | 460 | * |
diff --git a/fs/xfs/xfs_inode.c b/fs/xfs/xfs_inode.c index da428b3fe0f5..c1dc7ef5a1d8 100644 --- a/fs/xfs/xfs_inode.c +++ b/fs/xfs/xfs_inode.c | |||
@@ -651,7 +651,7 @@ xfs_iformat_btree( | |||
651 | return 0; | 651 | return 0; |
652 | } | 652 | } |
653 | 653 | ||
654 | void | 654 | STATIC void |
655 | xfs_dinode_from_disk( | 655 | xfs_dinode_from_disk( |
656 | xfs_icdinode_t *to, | 656 | xfs_icdinode_t *to, |
657 | xfs_dinode_t *from) | 657 | xfs_dinode_t *from) |
@@ -1247,7 +1247,7 @@ xfs_isize_check( | |||
1247 | * In that case the pages will still be in memory, but the inode size | 1247 | * In that case the pages will still be in memory, but the inode size |
1248 | * will never have been updated. | 1248 | * will never have been updated. |
1249 | */ | 1249 | */ |
1250 | xfs_fsize_t | 1250 | STATIC xfs_fsize_t |
1251 | xfs_file_last_byte( | 1251 | xfs_file_last_byte( |
1252 | xfs_inode_t *ip) | 1252 | xfs_inode_t *ip) |
1253 | { | 1253 | { |
@@ -3837,7 +3837,7 @@ xfs_iext_inline_to_direct( | |||
3837 | /* | 3837 | /* |
3838 | * Resize an extent indirection array to new_size bytes. | 3838 | * Resize an extent indirection array to new_size bytes. |
3839 | */ | 3839 | */ |
3840 | void | 3840 | STATIC void |
3841 | xfs_iext_realloc_indirect( | 3841 | xfs_iext_realloc_indirect( |
3842 | xfs_ifork_t *ifp, /* inode fork pointer */ | 3842 | xfs_ifork_t *ifp, /* inode fork pointer */ |
3843 | int new_size) /* new indirection array size */ | 3843 | int new_size) /* new indirection array size */ |
@@ -3862,7 +3862,7 @@ xfs_iext_realloc_indirect( | |||
3862 | /* | 3862 | /* |
3863 | * Switch from indirection array to linear (direct) extent allocations. | 3863 | * Switch from indirection array to linear (direct) extent allocations. |
3864 | */ | 3864 | */ |
3865 | void | 3865 | STATIC void |
3866 | xfs_iext_indirect_to_direct( | 3866 | xfs_iext_indirect_to_direct( |
3867 | xfs_ifork_t *ifp) /* inode fork pointer */ | 3867 | xfs_ifork_t *ifp) /* inode fork pointer */ |
3868 | { | 3868 | { |
diff --git a/fs/xfs/xfs_inode.h b/fs/xfs/xfs_inode.h index 65f24a3cc992..0b38b9a869ec 100644 --- a/fs/xfs/xfs_inode.h +++ b/fs/xfs/xfs_inode.h | |||
@@ -261,7 +261,6 @@ typedef struct xfs_inode { | |||
261 | /* Miscellaneous state. */ | 261 | /* Miscellaneous state. */ |
262 | unsigned short i_flags; /* see defined flags below */ | 262 | unsigned short i_flags; /* see defined flags below */ |
263 | unsigned char i_update_core; /* timestamps/size is dirty */ | 263 | unsigned char i_update_core; /* timestamps/size is dirty */ |
264 | unsigned char i_update_size; /* di_size field is dirty */ | ||
265 | unsigned int i_delayed_blks; /* count of delay alloc blks */ | 264 | unsigned int i_delayed_blks; /* count of delay alloc blks */ |
266 | 265 | ||
267 | xfs_icdinode_t i_d; /* most of ondisk inode */ | 266 | xfs_icdinode_t i_d; /* most of ondisk inode */ |
@@ -468,8 +467,6 @@ static inline void xfs_ifunlock(xfs_inode_t *ip) | |||
468 | /* | 467 | /* |
469 | * xfs_iget.c prototypes. | 468 | * xfs_iget.c prototypes. |
470 | */ | 469 | */ |
471 | xfs_inode_t *xfs_inode_incore(struct xfs_mount *, xfs_ino_t, | ||
472 | struct xfs_trans *); | ||
473 | int xfs_iget(struct xfs_mount *, struct xfs_trans *, xfs_ino_t, | 470 | int xfs_iget(struct xfs_mount *, struct xfs_trans *, xfs_ino_t, |
474 | uint, uint, xfs_inode_t **, xfs_daddr_t); | 471 | uint, uint, xfs_inode_t **, xfs_daddr_t); |
475 | void xfs_iput(xfs_inode_t *, uint); | 472 | void xfs_iput(xfs_inode_t *, uint); |
@@ -504,7 +501,6 @@ void xfs_ipin(xfs_inode_t *); | |||
504 | void xfs_iunpin(xfs_inode_t *); | 501 | void xfs_iunpin(xfs_inode_t *); |
505 | int xfs_iflush(xfs_inode_t *, uint); | 502 | int xfs_iflush(xfs_inode_t *, uint); |
506 | void xfs_ichgtime(xfs_inode_t *, int); | 503 | void xfs_ichgtime(xfs_inode_t *, int); |
507 | xfs_fsize_t xfs_file_last_byte(xfs_inode_t *); | ||
508 | void xfs_lock_inodes(xfs_inode_t **, int, uint); | 504 | void xfs_lock_inodes(xfs_inode_t **, int, uint); |
509 | void xfs_lock_two_inodes(xfs_inode_t *, xfs_inode_t *, uint); | 505 | void xfs_lock_two_inodes(xfs_inode_t *, xfs_inode_t *, uint); |
510 | 506 | ||
@@ -572,8 +568,6 @@ int xfs_itobp(struct xfs_mount *, struct xfs_trans *, | |||
572 | struct xfs_buf **, uint); | 568 | struct xfs_buf **, uint); |
573 | int xfs_iread(struct xfs_mount *, struct xfs_trans *, | 569 | int xfs_iread(struct xfs_mount *, struct xfs_trans *, |
574 | struct xfs_inode *, xfs_daddr_t, uint); | 570 | struct xfs_inode *, xfs_daddr_t, uint); |
575 | void xfs_dinode_from_disk(struct xfs_icdinode *, | ||
576 | struct xfs_dinode *); | ||
577 | void xfs_dinode_to_disk(struct xfs_dinode *, | 571 | void xfs_dinode_to_disk(struct xfs_dinode *, |
578 | struct xfs_icdinode *); | 572 | struct xfs_icdinode *); |
579 | void xfs_idestroy_fork(struct xfs_inode *, int); | 573 | void xfs_idestroy_fork(struct xfs_inode *, int); |
@@ -592,8 +586,6 @@ void xfs_iext_remove_inline(xfs_ifork_t *, xfs_extnum_t, int); | |||
592 | void xfs_iext_remove_direct(xfs_ifork_t *, xfs_extnum_t, int); | 586 | void xfs_iext_remove_direct(xfs_ifork_t *, xfs_extnum_t, int); |
593 | void xfs_iext_remove_indirect(xfs_ifork_t *, xfs_extnum_t, int); | 587 | void xfs_iext_remove_indirect(xfs_ifork_t *, xfs_extnum_t, int); |
594 | void xfs_iext_realloc_direct(xfs_ifork_t *, int); | 588 | void xfs_iext_realloc_direct(xfs_ifork_t *, int); |
595 | void xfs_iext_realloc_indirect(xfs_ifork_t *, int); | ||
596 | void xfs_iext_indirect_to_direct(xfs_ifork_t *); | ||
597 | void xfs_iext_direct_to_inline(xfs_ifork_t *, xfs_extnum_t); | 589 | void xfs_iext_direct_to_inline(xfs_ifork_t *, xfs_extnum_t); |
598 | void xfs_iext_inline_to_direct(xfs_ifork_t *, int); | 590 | void xfs_iext_inline_to_direct(xfs_ifork_t *, int); |
599 | void xfs_iext_destroy(xfs_ifork_t *); | 591 | void xfs_iext_destroy(xfs_ifork_t *); |
diff --git a/fs/xfs/xfs_inode_item.c b/fs/xfs/xfs_inode_item.c index 977c4aec587e..47d5b663c37e 100644 --- a/fs/xfs/xfs_inode_item.c +++ b/fs/xfs/xfs_inode_item.c | |||
@@ -263,14 +263,6 @@ xfs_inode_item_format( | |||
263 | } | 263 | } |
264 | 264 | ||
265 | /* | 265 | /* |
266 | * We don't have to worry about re-ordering here because | ||
267 | * the update_size field is protected by the inode lock | ||
268 | * and we have that held in exclusive mode. | ||
269 | */ | ||
270 | if (ip->i_update_size) | ||
271 | ip->i_update_size = 0; | ||
272 | |||
273 | /* | ||
274 | * Make sure to get the latest atime from the Linux inode. | 266 | * Make sure to get the latest atime from the Linux inode. |
275 | */ | 267 | */ |
276 | xfs_synchronize_atime(ip); | 268 | xfs_synchronize_atime(ip); |
@@ -712,8 +704,6 @@ xfs_inode_item_unlock( | |||
712 | * Clear out the fields of the inode log item particular | 704 | * Clear out the fields of the inode log item particular |
713 | * to the current transaction. | 705 | * to the current transaction. |
714 | */ | 706 | */ |
715 | iip->ili_ilock_recur = 0; | ||
716 | iip->ili_iolock_recur = 0; | ||
717 | iip->ili_flags = 0; | 707 | iip->ili_flags = 0; |
718 | 708 | ||
719 | /* | 709 | /* |
diff --git a/fs/xfs/xfs_inode_item.h b/fs/xfs/xfs_inode_item.h index a52ac125f055..65bae4c9b8bf 100644 --- a/fs/xfs/xfs_inode_item.h +++ b/fs/xfs/xfs_inode_item.h | |||
@@ -137,8 +137,6 @@ typedef struct xfs_inode_log_item { | |||
137 | struct xfs_inode *ili_inode; /* inode ptr */ | 137 | struct xfs_inode *ili_inode; /* inode ptr */ |
138 | xfs_lsn_t ili_flush_lsn; /* lsn at last flush */ | 138 | xfs_lsn_t ili_flush_lsn; /* lsn at last flush */ |
139 | xfs_lsn_t ili_last_lsn; /* lsn at last transaction */ | 139 | xfs_lsn_t ili_last_lsn; /* lsn at last transaction */ |
140 | unsigned short ili_ilock_recur; /* lock recursion count */ | ||
141 | unsigned short ili_iolock_recur; /* lock recursion count */ | ||
142 | unsigned short ili_flags; /* misc flags */ | 140 | unsigned short ili_flags; /* misc flags */ |
143 | unsigned short ili_logged; /* flushed logged data */ | 141 | unsigned short ili_logged; /* flushed logged data */ |
144 | unsigned int ili_last_fields; /* fields when flushed */ | 142 | unsigned int ili_last_fields; /* fields when flushed */ |
diff --git a/fs/xfs/xfs_inum.h b/fs/xfs/xfs_inum.h index 7a28191cb0de..b8e4ee4e89a4 100644 --- a/fs/xfs/xfs_inum.h +++ b/fs/xfs/xfs_inum.h | |||
@@ -72,7 +72,6 @@ struct xfs_mount; | |||
72 | 72 | ||
73 | #if XFS_BIG_INUMS | 73 | #if XFS_BIG_INUMS |
74 | #define XFS_MAXINUMBER ((xfs_ino_t)((1ULL << 56) - 1ULL)) | 74 | #define XFS_MAXINUMBER ((xfs_ino_t)((1ULL << 56) - 1ULL)) |
75 | #define XFS_INO64_OFFSET ((xfs_ino_t)(1ULL << 32)) | ||
76 | #else | 75 | #else |
77 | #define XFS_MAXINUMBER ((xfs_ino_t)((1ULL << 32) - 1ULL)) | 76 | #define XFS_MAXINUMBER ((xfs_ino_t)((1ULL << 32) - 1ULL)) |
78 | #endif | 77 | #endif |
diff --git a/fs/xfs/xfs_itable.c b/fs/xfs/xfs_itable.c index aeb2d2221c7d..b68f9107e26c 100644 --- a/fs/xfs/xfs_itable.c +++ b/fs/xfs/xfs_itable.c | |||
@@ -39,7 +39,7 @@ | |||
39 | #include "xfs_error.h" | 39 | #include "xfs_error.h" |
40 | #include "xfs_btree.h" | 40 | #include "xfs_btree.h" |
41 | 41 | ||
42 | int | 42 | STATIC int |
43 | xfs_internal_inum( | 43 | xfs_internal_inum( |
44 | xfs_mount_t *mp, | 44 | xfs_mount_t *mp, |
45 | xfs_ino_t ino) | 45 | xfs_ino_t ino) |
@@ -353,9 +353,6 @@ xfs_bulkstat( | |||
353 | int end_of_ag; /* set if we've seen the ag end */ | 353 | int end_of_ag; /* set if we've seen the ag end */ |
354 | int error; /* error code */ | 354 | int error; /* error code */ |
355 | int fmterror;/* bulkstat formatter result */ | 355 | int fmterror;/* bulkstat formatter result */ |
356 | __int32_t gcnt; /* current btree rec's count */ | ||
357 | xfs_inofree_t gfree; /* current btree rec's free mask */ | ||
358 | xfs_agino_t gino; /* current btree rec's start inode */ | ||
359 | int i; /* loop index */ | 356 | int i; /* loop index */ |
360 | int icount; /* count of inodes good in irbuf */ | 357 | int icount; /* count of inodes good in irbuf */ |
361 | size_t irbsize; /* size of irec buffer in bytes */ | 358 | size_t irbsize; /* size of irec buffer in bytes */ |
@@ -442,40 +439,43 @@ xfs_bulkstat( | |||
442 | * we need to get the remainder of the chunk we're in. | 439 | * we need to get the remainder of the chunk we're in. |
443 | */ | 440 | */ |
444 | if (agino > 0) { | 441 | if (agino > 0) { |
442 | xfs_inobt_rec_incore_t r; | ||
443 | |||
445 | /* | 444 | /* |
446 | * Lookup the inode chunk that this inode lives in. | 445 | * Lookup the inode chunk that this inode lives in. |
447 | */ | 446 | */ |
448 | error = xfs_inobt_lookup_le(cur, agino, 0, 0, &tmp); | 447 | error = xfs_inobt_lookup(cur, agino, XFS_LOOKUP_LE, |
448 | &tmp); | ||
449 | if (!error && /* no I/O error */ | 449 | if (!error && /* no I/O error */ |
450 | tmp && /* lookup succeeded */ | 450 | tmp && /* lookup succeeded */ |
451 | /* got the record, should always work */ | 451 | /* got the record, should always work */ |
452 | !(error = xfs_inobt_get_rec(cur, &gino, &gcnt, | 452 | !(error = xfs_inobt_get_rec(cur, &r, &i)) && |
453 | &gfree, &i)) && | ||
454 | i == 1 && | 453 | i == 1 && |
455 | /* this is the right chunk */ | 454 | /* this is the right chunk */ |
456 | agino < gino + XFS_INODES_PER_CHUNK && | 455 | agino < r.ir_startino + XFS_INODES_PER_CHUNK && |
457 | /* lastino was not last in chunk */ | 456 | /* lastino was not last in chunk */ |
458 | (chunkidx = agino - gino + 1) < | 457 | (chunkidx = agino - r.ir_startino + 1) < |
459 | XFS_INODES_PER_CHUNK && | 458 | XFS_INODES_PER_CHUNK && |
460 | /* there are some left allocated */ | 459 | /* there are some left allocated */ |
461 | xfs_inobt_maskn(chunkidx, | 460 | xfs_inobt_maskn(chunkidx, |
462 | XFS_INODES_PER_CHUNK - chunkidx) & ~gfree) { | 461 | XFS_INODES_PER_CHUNK - chunkidx) & |
462 | ~r.ir_free) { | ||
463 | /* | 463 | /* |
464 | * Grab the chunk record. Mark all the | 464 | * Grab the chunk record. Mark all the |
465 | * uninteresting inodes (because they're | 465 | * uninteresting inodes (because they're |
466 | * before our start point) free. | 466 | * before our start point) free. |
467 | */ | 467 | */ |
468 | for (i = 0; i < chunkidx; i++) { | 468 | for (i = 0; i < chunkidx; i++) { |
469 | if (XFS_INOBT_MASK(i) & ~gfree) | 469 | if (XFS_INOBT_MASK(i) & ~r.ir_free) |
470 | gcnt++; | 470 | r.ir_freecount++; |
471 | } | 471 | } |
472 | gfree |= xfs_inobt_maskn(0, chunkidx); | 472 | r.ir_free |= xfs_inobt_maskn(0, chunkidx); |
473 | irbp->ir_startino = gino; | 473 | irbp->ir_startino = r.ir_startino; |
474 | irbp->ir_freecount = gcnt; | 474 | irbp->ir_freecount = r.ir_freecount; |
475 | irbp->ir_free = gfree; | 475 | irbp->ir_free = r.ir_free; |
476 | irbp++; | 476 | irbp++; |
477 | agino = gino + XFS_INODES_PER_CHUNK; | 477 | agino = r.ir_startino + XFS_INODES_PER_CHUNK; |
478 | icount = XFS_INODES_PER_CHUNK - gcnt; | 478 | icount = XFS_INODES_PER_CHUNK - r.ir_freecount; |
479 | } else { | 479 | } else { |
480 | /* | 480 | /* |
481 | * If any of those tests failed, bump the | 481 | * If any of those tests failed, bump the |
@@ -493,7 +493,7 @@ xfs_bulkstat( | |||
493 | /* | 493 | /* |
494 | * Start of ag. Lookup the first inode chunk. | 494 | * Start of ag. Lookup the first inode chunk. |
495 | */ | 495 | */ |
496 | error = xfs_inobt_lookup_ge(cur, 0, 0, 0, &tmp); | 496 | error = xfs_inobt_lookup(cur, 0, XFS_LOOKUP_GE, &tmp); |
497 | icount = 0; | 497 | icount = 0; |
498 | } | 498 | } |
499 | /* | 499 | /* |
@@ -501,6 +501,8 @@ xfs_bulkstat( | |||
501 | * until we run out of inodes or space in the buffer. | 501 | * until we run out of inodes or space in the buffer. |
502 | */ | 502 | */ |
503 | while (irbp < irbufend && icount < ubcount) { | 503 | while (irbp < irbufend && icount < ubcount) { |
504 | xfs_inobt_rec_incore_t r; | ||
505 | |||
504 | /* | 506 | /* |
505 | * Loop as long as we're unable to read the | 507 | * Loop as long as we're unable to read the |
506 | * inode btree. | 508 | * inode btree. |
@@ -510,51 +512,55 @@ xfs_bulkstat( | |||
510 | if (XFS_AGINO_TO_AGBNO(mp, agino) >= | 512 | if (XFS_AGINO_TO_AGBNO(mp, agino) >= |
511 | be32_to_cpu(agi->agi_length)) | 513 | be32_to_cpu(agi->agi_length)) |
512 | break; | 514 | break; |
513 | error = xfs_inobt_lookup_ge(cur, agino, 0, 0, | 515 | error = xfs_inobt_lookup(cur, agino, |
514 | &tmp); | 516 | XFS_LOOKUP_GE, &tmp); |
515 | cond_resched(); | 517 | cond_resched(); |
516 | } | 518 | } |
517 | /* | 519 | /* |
518 | * If ran off the end of the ag either with an error, | 520 | * If ran off the end of the ag either with an error, |
519 | * or the normal way, set end and stop collecting. | 521 | * or the normal way, set end and stop collecting. |
520 | */ | 522 | */ |
521 | if (error || | 523 | if (error) { |
522 | (error = xfs_inobt_get_rec(cur, &gino, &gcnt, | ||
523 | &gfree, &i)) || | ||
524 | i == 0) { | ||
525 | end_of_ag = 1; | 524 | end_of_ag = 1; |
526 | break; | 525 | break; |
527 | } | 526 | } |
527 | |||
528 | error = xfs_inobt_get_rec(cur, &r, &i); | ||
529 | if (error || i == 0) { | ||
530 | end_of_ag = 1; | ||
531 | break; | ||
532 | } | ||
533 | |||
528 | /* | 534 | /* |
529 | * If this chunk has any allocated inodes, save it. | 535 | * If this chunk has any allocated inodes, save it. |
530 | * Also start read-ahead now for this chunk. | 536 | * Also start read-ahead now for this chunk. |
531 | */ | 537 | */ |
532 | if (gcnt < XFS_INODES_PER_CHUNK) { | 538 | if (r.ir_freecount < XFS_INODES_PER_CHUNK) { |
533 | /* | 539 | /* |
534 | * Loop over all clusters in the next chunk. | 540 | * Loop over all clusters in the next chunk. |
535 | * Do a readahead if there are any allocated | 541 | * Do a readahead if there are any allocated |
536 | * inodes in that cluster. | 542 | * inodes in that cluster. |
537 | */ | 543 | */ |
538 | for (agbno = XFS_AGINO_TO_AGBNO(mp, gino), | 544 | agbno = XFS_AGINO_TO_AGBNO(mp, r.ir_startino); |
539 | chunkidx = 0; | 545 | for (chunkidx = 0; |
540 | chunkidx < XFS_INODES_PER_CHUNK; | 546 | chunkidx < XFS_INODES_PER_CHUNK; |
541 | chunkidx += nicluster, | 547 | chunkidx += nicluster, |
542 | agbno += nbcluster) { | 548 | agbno += nbcluster) { |
543 | if (xfs_inobt_maskn(chunkidx, | 549 | if (xfs_inobt_maskn(chunkidx, nicluster) |
544 | nicluster) & ~gfree) | 550 | & ~r.ir_free) |
545 | xfs_btree_reada_bufs(mp, agno, | 551 | xfs_btree_reada_bufs(mp, agno, |
546 | agbno, nbcluster); | 552 | agbno, nbcluster); |
547 | } | 553 | } |
548 | irbp->ir_startino = gino; | 554 | irbp->ir_startino = r.ir_startino; |
549 | irbp->ir_freecount = gcnt; | 555 | irbp->ir_freecount = r.ir_freecount; |
550 | irbp->ir_free = gfree; | 556 | irbp->ir_free = r.ir_free; |
551 | irbp++; | 557 | irbp++; |
552 | icount += XFS_INODES_PER_CHUNK - gcnt; | 558 | icount += XFS_INODES_PER_CHUNK - r.ir_freecount; |
553 | } | 559 | } |
554 | /* | 560 | /* |
555 | * Set agino to after this chunk and bump the cursor. | 561 | * Set agino to after this chunk and bump the cursor. |
556 | */ | 562 | */ |
557 | agino = gino + XFS_INODES_PER_CHUNK; | 563 | agino = r.ir_startino + XFS_INODES_PER_CHUNK; |
558 | error = xfs_btree_increment(cur, 0, &tmp); | 564 | error = xfs_btree_increment(cur, 0, &tmp); |
559 | cond_resched(); | 565 | cond_resched(); |
560 | } | 566 | } |
@@ -820,9 +826,7 @@ xfs_inumbers( | |||
820 | int bufidx; | 826 | int bufidx; |
821 | xfs_btree_cur_t *cur; | 827 | xfs_btree_cur_t *cur; |
822 | int error; | 828 | int error; |
823 | __int32_t gcnt; | 829 | xfs_inobt_rec_incore_t r; |
824 | xfs_inofree_t gfree; | ||
825 | xfs_agino_t gino; | ||
826 | int i; | 830 | int i; |
827 | xfs_ino_t ino; | 831 | xfs_ino_t ino; |
828 | int left; | 832 | int left; |
@@ -855,7 +859,8 @@ xfs_inumbers( | |||
855 | continue; | 859 | continue; |
856 | } | 860 | } |
857 | cur = xfs_inobt_init_cursor(mp, NULL, agbp, agno); | 861 | cur = xfs_inobt_init_cursor(mp, NULL, agbp, agno); |
858 | error = xfs_inobt_lookup_ge(cur, agino, 0, 0, &tmp); | 862 | error = xfs_inobt_lookup(cur, agino, XFS_LOOKUP_GE, |
863 | &tmp); | ||
859 | if (error) { | 864 | if (error) { |
860 | xfs_btree_del_cursor(cur, XFS_BTREE_ERROR); | 865 | xfs_btree_del_cursor(cur, XFS_BTREE_ERROR); |
861 | cur = NULL; | 866 | cur = NULL; |
@@ -870,9 +875,8 @@ xfs_inumbers( | |||
870 | continue; | 875 | continue; |
871 | } | 876 | } |
872 | } | 877 | } |
873 | if ((error = xfs_inobt_get_rec(cur, &gino, &gcnt, &gfree, | 878 | error = xfs_inobt_get_rec(cur, &r, &i); |
874 | &i)) || | 879 | if (error || i == 0) { |
875 | i == 0) { | ||
876 | xfs_buf_relse(agbp); | 880 | xfs_buf_relse(agbp); |
877 | agbp = NULL; | 881 | agbp = NULL; |
878 | xfs_btree_del_cursor(cur, XFS_BTREE_NOERROR); | 882 | xfs_btree_del_cursor(cur, XFS_BTREE_NOERROR); |
@@ -881,10 +885,12 @@ xfs_inumbers( | |||
881 | agino = 0; | 885 | agino = 0; |
882 | continue; | 886 | continue; |
883 | } | 887 | } |
884 | agino = gino + XFS_INODES_PER_CHUNK - 1; | 888 | agino = r.ir_startino + XFS_INODES_PER_CHUNK - 1; |
885 | buffer[bufidx].xi_startino = XFS_AGINO_TO_INO(mp, agno, gino); | 889 | buffer[bufidx].xi_startino = |
886 | buffer[bufidx].xi_alloccount = XFS_INODES_PER_CHUNK - gcnt; | 890 | XFS_AGINO_TO_INO(mp, agno, r.ir_startino); |
887 | buffer[bufidx].xi_allocmask = ~gfree; | 891 | buffer[bufidx].xi_alloccount = |
892 | XFS_INODES_PER_CHUNK - r.ir_freecount; | ||
893 | buffer[bufidx].xi_allocmask = ~r.ir_free; | ||
888 | bufidx++; | 894 | bufidx++; |
889 | left--; | 895 | left--; |
890 | if (bufidx == bcount) { | 896 | if (bufidx == bcount) { |
diff --git a/fs/xfs/xfs_itable.h b/fs/xfs/xfs_itable.h index 1fb04e7deb61..20792bf45946 100644 --- a/fs/xfs/xfs_itable.h +++ b/fs/xfs/xfs_itable.h | |||
@@ -99,11 +99,6 @@ xfs_bulkstat_one( | |||
99 | void *dibuff, | 99 | void *dibuff, |
100 | int *stat); | 100 | int *stat); |
101 | 101 | ||
102 | int | ||
103 | xfs_internal_inum( | ||
104 | xfs_mount_t *mp, | ||
105 | xfs_ino_t ino); | ||
106 | |||
107 | typedef int (*inumbers_fmt_pf)( | 102 | typedef int (*inumbers_fmt_pf)( |
108 | void __user *ubuffer, /* buffer to write to */ | 103 | void __user *ubuffer, /* buffer to write to */ |
109 | const xfs_inogrp_t *buffer, /* buffer to read from */ | 104 | const xfs_inogrp_t *buffer, /* buffer to read from */ |
diff --git a/fs/xfs/xfs_log_priv.h b/fs/xfs/xfs_log_priv.h index bcad5f4c1fd1..679c7c4926a2 100644 --- a/fs/xfs/xfs_log_priv.h +++ b/fs/xfs/xfs_log_priv.h | |||
@@ -451,8 +451,6 @@ extern int xlog_find_tail(xlog_t *log, | |||
451 | extern int xlog_recover(xlog_t *log); | 451 | extern int xlog_recover(xlog_t *log); |
452 | extern int xlog_recover_finish(xlog_t *log); | 452 | extern int xlog_recover_finish(xlog_t *log); |
453 | extern void xlog_pack_data(xlog_t *log, xlog_in_core_t *iclog, int); | 453 | extern void xlog_pack_data(xlog_t *log, xlog_in_core_t *iclog, int); |
454 | extern void xlog_recover_process_iunlinks(xlog_t *log); | ||
455 | |||
456 | extern struct xfs_buf *xlog_get_bp(xlog_t *, int); | 454 | extern struct xfs_buf *xlog_get_bp(xlog_t *, int); |
457 | extern void xlog_put_bp(struct xfs_buf *); | 455 | extern void xlog_put_bp(struct xfs_buf *); |
458 | 456 | ||
diff --git a/fs/xfs/xfs_log_recover.c b/fs/xfs/xfs_log_recover.c index 47da2fb45377..1099395d7d6c 100644 --- a/fs/xfs/xfs_log_recover.c +++ b/fs/xfs/xfs_log_recover.c | |||
@@ -3263,7 +3263,7 @@ xlog_recover_process_one_iunlink( | |||
3263 | * freeing of the inode and its removal from the list must be | 3263 | * freeing of the inode and its removal from the list must be |
3264 | * atomic. | 3264 | * atomic. |
3265 | */ | 3265 | */ |
3266 | void | 3266 | STATIC void |
3267 | xlog_recover_process_iunlinks( | 3267 | xlog_recover_process_iunlinks( |
3268 | xlog_t *log) | 3268 | xlog_t *log) |
3269 | { | 3269 | { |
diff --git a/fs/xfs/xfs_mount.c b/fs/xfs/xfs_mount.c index 5c6f092659c1..8b6c9e807efb 100644 --- a/fs/xfs/xfs_mount.c +++ b/fs/xfs/xfs_mount.c | |||
@@ -1568,7 +1568,7 @@ xfs_mod_sb(xfs_trans_t *tp, __int64_t fields) | |||
1568 | * | 1568 | * |
1569 | * The m_sb_lock must be held when this routine is called. | 1569 | * The m_sb_lock must be held when this routine is called. |
1570 | */ | 1570 | */ |
1571 | int | 1571 | STATIC int |
1572 | xfs_mod_incore_sb_unlocked( | 1572 | xfs_mod_incore_sb_unlocked( |
1573 | xfs_mount_t *mp, | 1573 | xfs_mount_t *mp, |
1574 | xfs_sb_field_t field, | 1574 | xfs_sb_field_t field, |
diff --git a/fs/xfs/xfs_mount.h b/fs/xfs/xfs_mount.h index a5122382afde..a6c023bc0fb2 100644 --- a/fs/xfs/xfs_mount.h +++ b/fs/xfs/xfs_mount.h | |||
@@ -414,13 +414,10 @@ typedef struct xfs_mod_sb { | |||
414 | 414 | ||
415 | extern int xfs_log_sbcount(xfs_mount_t *, uint); | 415 | extern int xfs_log_sbcount(xfs_mount_t *, uint); |
416 | extern int xfs_mountfs(xfs_mount_t *mp); | 416 | extern int xfs_mountfs(xfs_mount_t *mp); |
417 | extern void xfs_mountfs_check_barriers(xfs_mount_t *mp); | ||
418 | 417 | ||
419 | extern void xfs_unmountfs(xfs_mount_t *); | 418 | extern void xfs_unmountfs(xfs_mount_t *); |
420 | extern int xfs_unmountfs_writesb(xfs_mount_t *); | 419 | extern int xfs_unmountfs_writesb(xfs_mount_t *); |
421 | extern int xfs_mod_incore_sb(xfs_mount_t *, xfs_sb_field_t, int64_t, int); | 420 | extern int xfs_mod_incore_sb(xfs_mount_t *, xfs_sb_field_t, int64_t, int); |
422 | extern int xfs_mod_incore_sb_unlocked(xfs_mount_t *, xfs_sb_field_t, | ||
423 | int64_t, int); | ||
424 | extern int xfs_mod_incore_sb_batch(xfs_mount_t *, xfs_mod_sb_t *, | 421 | extern int xfs_mod_incore_sb_batch(xfs_mount_t *, xfs_mod_sb_t *, |
425 | uint, int); | 422 | uint, int); |
426 | extern int xfs_mount_log_sb(xfs_mount_t *, __int64_t); | 423 | extern int xfs_mount_log_sb(xfs_mount_t *, __int64_t); |
diff --git a/fs/xfs/xfs_mru_cache.c b/fs/xfs/xfs_mru_cache.c index afee7eb24323..4b0613d99faa 100644 --- a/fs/xfs/xfs_mru_cache.c +++ b/fs/xfs/xfs_mru_cache.c | |||
@@ -564,35 +564,6 @@ xfs_mru_cache_lookup( | |||
564 | } | 564 | } |
565 | 565 | ||
566 | /* | 566 | /* |
567 | * To look up an element using its key, but leave its location in the internal | ||
568 | * lists alone, call xfs_mru_cache_peek(). If the element isn't found, this | ||
569 | * function returns NULL. | ||
570 | * | ||
571 | * See the comments above the declaration of the xfs_mru_cache_lookup() function | ||
572 | * for important locking information pertaining to this call. | ||
573 | */ | ||
574 | void * | ||
575 | xfs_mru_cache_peek( | ||
576 | xfs_mru_cache_t *mru, | ||
577 | unsigned long key) | ||
578 | { | ||
579 | xfs_mru_cache_elem_t *elem; | ||
580 | |||
581 | ASSERT(mru && mru->lists); | ||
582 | if (!mru || !mru->lists) | ||
583 | return NULL; | ||
584 | |||
585 | spin_lock(&mru->lock); | ||
586 | elem = radix_tree_lookup(&mru->store, key); | ||
587 | if (!elem) | ||
588 | spin_unlock(&mru->lock); | ||
589 | else | ||
590 | __release(mru_lock); /* help sparse not be stupid */ | ||
591 | |||
592 | return elem ? elem->value : NULL; | ||
593 | } | ||
594 | |||
595 | /* | ||
596 | * To release the internal data structure spinlock after having performed an | 567 | * To release the internal data structure spinlock after having performed an |
597 | * xfs_mru_cache_lookup() or an xfs_mru_cache_peek(), call xfs_mru_cache_done() | 568 | * xfs_mru_cache_lookup() or an xfs_mru_cache_peek(), call xfs_mru_cache_done() |
598 | * with the data store pointer. | 569 | * with the data store pointer. |
diff --git a/fs/xfs/xfs_mru_cache.h b/fs/xfs/xfs_mru_cache.h index dd58ea1bbebe..5d439f34b0c9 100644 --- a/fs/xfs/xfs_mru_cache.h +++ b/fs/xfs/xfs_mru_cache.h | |||
@@ -49,7 +49,6 @@ int xfs_mru_cache_insert(struct xfs_mru_cache *mru, unsigned long key, | |||
49 | void * xfs_mru_cache_remove(struct xfs_mru_cache *mru, unsigned long key); | 49 | void * xfs_mru_cache_remove(struct xfs_mru_cache *mru, unsigned long key); |
50 | void xfs_mru_cache_delete(struct xfs_mru_cache *mru, unsigned long key); | 50 | void xfs_mru_cache_delete(struct xfs_mru_cache *mru, unsigned long key); |
51 | void *xfs_mru_cache_lookup(struct xfs_mru_cache *mru, unsigned long key); | 51 | void *xfs_mru_cache_lookup(struct xfs_mru_cache *mru, unsigned long key); |
52 | void *xfs_mru_cache_peek(struct xfs_mru_cache *mru, unsigned long key); | ||
53 | void xfs_mru_cache_done(struct xfs_mru_cache *mru); | 52 | void xfs_mru_cache_done(struct xfs_mru_cache *mru); |
54 | 53 | ||
55 | #endif /* __XFS_MRU_CACHE_H__ */ | 54 | #endif /* __XFS_MRU_CACHE_H__ */ |
diff --git a/fs/xfs/xfs_rw.c b/fs/xfs/xfs_rw.c index fea68615ed23..3f816ad7ff19 100644 --- a/fs/xfs/xfs_rw.c +++ b/fs/xfs/xfs_rw.c | |||
@@ -88,90 +88,6 @@ xfs_write_clear_setuid( | |||
88 | } | 88 | } |
89 | 89 | ||
90 | /* | 90 | /* |
91 | * Handle logging requirements of various synchronous types of write. | ||
92 | */ | ||
93 | int | ||
94 | xfs_write_sync_logforce( | ||
95 | xfs_mount_t *mp, | ||
96 | xfs_inode_t *ip) | ||
97 | { | ||
98 | int error = 0; | ||
99 | |||
100 | /* | ||
101 | * If we're treating this as O_DSYNC and we have not updated the | ||
102 | * size, force the log. | ||
103 | */ | ||
104 | if (!(mp->m_flags & XFS_MOUNT_OSYNCISOSYNC) && | ||
105 | !(ip->i_update_size)) { | ||
106 | xfs_inode_log_item_t *iip = ip->i_itemp; | ||
107 | |||
108 | /* | ||
109 | * If an allocation transaction occurred | ||
110 | * without extending the size, then we have to force | ||
111 | * the log up the proper point to ensure that the | ||
112 | * allocation is permanent. We can't count on | ||
113 | * the fact that buffered writes lock out direct I/O | ||
114 | * writes - the direct I/O write could have extended | ||
115 | * the size nontransactionally, then finished before | ||
116 | * we started. xfs_write_file will think that the file | ||
117 | * didn't grow but the update isn't safe unless the | ||
118 | * size change is logged. | ||
119 | * | ||
120 | * Force the log if we've committed a transaction | ||
121 | * against the inode or if someone else has and | ||
122 | * the commit record hasn't gone to disk (e.g. | ||
123 | * the inode is pinned). This guarantees that | ||
124 | * all changes affecting the inode are permanent | ||
125 | * when we return. | ||
126 | */ | ||
127 | if (iip && iip->ili_last_lsn) { | ||
128 | error = _xfs_log_force(mp, iip->ili_last_lsn, | ||
129 | XFS_LOG_FORCE | XFS_LOG_SYNC, NULL); | ||
130 | } else if (xfs_ipincount(ip) > 0) { | ||
131 | error = _xfs_log_force(mp, (xfs_lsn_t)0, | ||
132 | XFS_LOG_FORCE | XFS_LOG_SYNC, NULL); | ||
133 | } | ||
134 | |||
135 | } else { | ||
136 | xfs_trans_t *tp; | ||
137 | |||
138 | /* | ||
139 | * O_SYNC or O_DSYNC _with_ a size update are handled | ||
140 | * the same way. | ||
141 | * | ||
142 | * If the write was synchronous then we need to make | ||
143 | * sure that the inode modification time is permanent. | ||
144 | * We'll have updated the timestamp above, so here | ||
145 | * we use a synchronous transaction to log the inode. | ||
146 | * It's not fast, but it's necessary. | ||
147 | * | ||
148 | * If this a dsync write and the size got changed | ||
149 | * non-transactionally, then we need to ensure that | ||
150 | * the size change gets logged in a synchronous | ||
151 | * transaction. | ||
152 | */ | ||
153 | tp = xfs_trans_alloc(mp, XFS_TRANS_WRITE_SYNC); | ||
154 | if ((error = xfs_trans_reserve(tp, 0, | ||
155 | XFS_SWRITE_LOG_RES(mp), | ||
156 | 0, 0, 0))) { | ||
157 | /* Transaction reserve failed */ | ||
158 | xfs_trans_cancel(tp, 0); | ||
159 | } else { | ||
160 | /* Transaction reserve successful */ | ||
161 | xfs_ilock(ip, XFS_ILOCK_EXCL); | ||
162 | xfs_trans_ijoin(tp, ip, XFS_ILOCK_EXCL); | ||
163 | xfs_trans_ihold(tp, ip); | ||
164 | xfs_trans_log_inode(tp, ip, XFS_ILOG_CORE); | ||
165 | xfs_trans_set_sync(tp); | ||
166 | error = xfs_trans_commit(tp, 0); | ||
167 | xfs_iunlock(ip, XFS_ILOCK_EXCL); | ||
168 | } | ||
169 | } | ||
170 | |||
171 | return error; | ||
172 | } | ||
173 | |||
174 | /* | ||
175 | * Force a shutdown of the filesystem instantly while keeping | 91 | * Force a shutdown of the filesystem instantly while keeping |
176 | * the filesystem consistent. We don't do an unmount here; just shutdown | 92 | * the filesystem consistent. We don't do an unmount here; just shutdown |
177 | * the shop, make sure that absolutely nothing persistent happens to | 93 | * the shop, make sure that absolutely nothing persistent happens to |
diff --git a/fs/xfs/xfs_rw.h b/fs/xfs/xfs_rw.h index f76c003ec55d..f5e4874c37d8 100644 --- a/fs/xfs/xfs_rw.h +++ b/fs/xfs/xfs_rw.h | |||
@@ -68,7 +68,6 @@ xfs_get_extsz_hint( | |||
68 | * Prototypes for functions in xfs_rw.c. | 68 | * Prototypes for functions in xfs_rw.c. |
69 | */ | 69 | */ |
70 | extern int xfs_write_clear_setuid(struct xfs_inode *ip); | 70 | extern int xfs_write_clear_setuid(struct xfs_inode *ip); |
71 | extern int xfs_write_sync_logforce(struct xfs_mount *mp, struct xfs_inode *ip); | ||
72 | extern int xfs_bwrite(struct xfs_mount *mp, struct xfs_buf *bp); | 71 | extern int xfs_bwrite(struct xfs_mount *mp, struct xfs_buf *bp); |
73 | extern int xfs_bioerror(struct xfs_buf *bp); | 72 | extern int xfs_bioerror(struct xfs_buf *bp); |
74 | extern int xfs_bioerror_relse(struct xfs_buf *bp); | 73 | extern int xfs_bioerror_relse(struct xfs_buf *bp); |
@@ -78,10 +77,4 @@ extern int xfs_read_buf(struct xfs_mount *mp, xfs_buftarg_t *btp, | |||
78 | extern void xfs_ioerror_alert(char *func, struct xfs_mount *mp, | 77 | extern void xfs_ioerror_alert(char *func, struct xfs_mount *mp, |
79 | xfs_buf_t *bp, xfs_daddr_t blkno); | 78 | xfs_buf_t *bp, xfs_daddr_t blkno); |
80 | 79 | ||
81 | /* | ||
82 | * Prototypes for functions in xfs_vnodeops.c. | ||
83 | */ | ||
84 | extern int xfs_free_eofblocks(struct xfs_mount *mp, struct xfs_inode *ip, | ||
85 | int flags); | ||
86 | |||
87 | #endif /* __XFS_RW_H__ */ | 80 | #endif /* __XFS_RW_H__ */ |
diff --git a/fs/xfs/xfs_trans.h b/fs/xfs/xfs_trans.h index 775249a54f6f..ed47fc77759c 100644 --- a/fs/xfs/xfs_trans.h +++ b/fs/xfs/xfs_trans.h | |||
@@ -68,7 +68,7 @@ typedef struct xfs_trans_header { | |||
68 | #define XFS_TRANS_GROWFS 14 | 68 | #define XFS_TRANS_GROWFS 14 |
69 | #define XFS_TRANS_STRAT_WRITE 15 | 69 | #define XFS_TRANS_STRAT_WRITE 15 |
70 | #define XFS_TRANS_DIOSTRAT 16 | 70 | #define XFS_TRANS_DIOSTRAT 16 |
71 | #define XFS_TRANS_WRITE_SYNC 17 | 71 | /* 17 was XFS_TRANS_WRITE_SYNC */ |
72 | #define XFS_TRANS_WRITEID 18 | 72 | #define XFS_TRANS_WRITEID 18 |
73 | #define XFS_TRANS_ADDAFORK 19 | 73 | #define XFS_TRANS_ADDAFORK 19 |
74 | #define XFS_TRANS_ATTRINVAL 20 | 74 | #define XFS_TRANS_ATTRINVAL 20 |
diff --git a/fs/xfs/xfs_trans_buf.c b/fs/xfs/xfs_trans_buf.c index 8ee2f8c8b0a6..218829e6a152 100644 --- a/fs/xfs/xfs_trans_buf.c +++ b/fs/xfs/xfs_trans_buf.c | |||
@@ -307,7 +307,7 @@ xfs_trans_read_buf( | |||
307 | return (flags & XFS_BUF_TRYLOCK) ? | 307 | return (flags & XFS_BUF_TRYLOCK) ? |
308 | EAGAIN : XFS_ERROR(ENOMEM); | 308 | EAGAIN : XFS_ERROR(ENOMEM); |
309 | 309 | ||
310 | if ((bp != NULL) && (XFS_BUF_GETERROR(bp) != 0)) { | 310 | if (XFS_BUF_GETERROR(bp) != 0) { |
311 | xfs_ioerror_alert("xfs_trans_read_buf", mp, | 311 | xfs_ioerror_alert("xfs_trans_read_buf", mp, |
312 | bp, blkno); | 312 | bp, blkno); |
313 | error = XFS_BUF_GETERROR(bp); | 313 | error = XFS_BUF_GETERROR(bp); |
@@ -315,7 +315,7 @@ xfs_trans_read_buf( | |||
315 | return error; | 315 | return error; |
316 | } | 316 | } |
317 | #ifdef DEBUG | 317 | #ifdef DEBUG |
318 | if (xfs_do_error && (bp != NULL)) { | 318 | if (xfs_do_error) { |
319 | if (xfs_error_target == target) { | 319 | if (xfs_error_target == target) { |
320 | if (((xfs_req_num++) % xfs_error_mod) == 0) { | 320 | if (((xfs_req_num++) % xfs_error_mod) == 0) { |
321 | xfs_buf_relse(bp); | 321 | xfs_buf_relse(bp); |
diff --git a/fs/xfs/xfs_trans_inode.c b/fs/xfs/xfs_trans_inode.c index 23d276af2e0c..785ff101da0a 100644 --- a/fs/xfs/xfs_trans_inode.c +++ b/fs/xfs/xfs_trans_inode.c | |||
@@ -49,30 +49,7 @@ xfs_trans_inode_broot_debug( | |||
49 | 49 | ||
50 | 50 | ||
51 | /* | 51 | /* |
52 | * Get and lock the inode for the caller if it is not already | 52 | * Get an inode and join it to the transaction. |
53 | * locked within the given transaction. If it is already locked | ||
54 | * within the transaction, just increment its lock recursion count | ||
55 | * and return a pointer to it. | ||
56 | * | ||
57 | * For an inode to be locked in a transaction, the inode lock, as | ||
58 | * opposed to the io lock, must be taken exclusively. This ensures | ||
59 | * that the inode can be involved in only 1 transaction at a time. | ||
60 | * Lock recursion is handled on the io lock, but only for lock modes | ||
61 | * of equal or lesser strength. That is, you can recur on the io lock | ||
62 | * held EXCL with a SHARED request but not vice versa. Also, if | ||
63 | * the inode is already a part of the transaction then you cannot | ||
64 | * go from not holding the io lock to having it EXCL or SHARED. | ||
65 | * | ||
66 | * Use the inode cache routine xfs_inode_incore() to find the inode | ||
67 | * if it is already owned by this transaction. | ||
68 | * | ||
69 | * If we don't already own the inode, use xfs_iget() to get it. | ||
70 | * Since the inode log item structure is embedded in the incore | ||
71 | * inode structure and is initialized when the inode is brought | ||
72 | * into memory, there is nothing to do with it here. | ||
73 | * | ||
74 | * If the given transaction pointer is NULL, just call xfs_iget(). | ||
75 | * This simplifies code which must handle both cases. | ||
76 | */ | 53 | */ |
77 | int | 54 | int |
78 | xfs_trans_iget( | 55 | xfs_trans_iget( |
@@ -84,62 +61,11 @@ xfs_trans_iget( | |||
84 | xfs_inode_t **ipp) | 61 | xfs_inode_t **ipp) |
85 | { | 62 | { |
86 | int error; | 63 | int error; |
87 | xfs_inode_t *ip; | ||
88 | |||
89 | /* | ||
90 | * If the transaction pointer is NULL, just call the normal | ||
91 | * xfs_iget(). | ||
92 | */ | ||
93 | if (tp == NULL) | ||
94 | return xfs_iget(mp, NULL, ino, flags, lock_flags, ipp, 0); | ||
95 | |||
96 | /* | ||
97 | * If we find the inode in core with this transaction | ||
98 | * pointer in its i_transp field, then we know we already | ||
99 | * have it locked. In this case we just increment the lock | ||
100 | * recursion count and return the inode to the caller. | ||
101 | * Assert that the inode is already locked in the mode requested | ||
102 | * by the caller. We cannot do lock promotions yet, so | ||
103 | * die if someone gets this wrong. | ||
104 | */ | ||
105 | if ((ip = xfs_inode_incore(tp->t_mountp, ino, tp)) != NULL) { | ||
106 | /* | ||
107 | * Make sure that the inode lock is held EXCL and | ||
108 | * that the io lock is never upgraded when the inode | ||
109 | * is already a part of the transaction. | ||
110 | */ | ||
111 | ASSERT(ip->i_itemp != NULL); | ||
112 | ASSERT(lock_flags & XFS_ILOCK_EXCL); | ||
113 | ASSERT(xfs_isilocked(ip, XFS_ILOCK_EXCL)); | ||
114 | ASSERT((!(lock_flags & XFS_IOLOCK_EXCL)) || | ||
115 | xfs_isilocked(ip, XFS_IOLOCK_EXCL)); | ||
116 | ASSERT((!(lock_flags & XFS_IOLOCK_EXCL)) || | ||
117 | (ip->i_itemp->ili_flags & XFS_ILI_IOLOCKED_EXCL)); | ||
118 | ASSERT((!(lock_flags & XFS_IOLOCK_SHARED)) || | ||
119 | xfs_isilocked(ip, XFS_IOLOCK_EXCL|XFS_IOLOCK_SHARED)); | ||
120 | ASSERT((!(lock_flags & XFS_IOLOCK_SHARED)) || | ||
121 | (ip->i_itemp->ili_flags & XFS_ILI_IOLOCKED_ANY)); | ||
122 | |||
123 | if (lock_flags & (XFS_IOLOCK_SHARED | XFS_IOLOCK_EXCL)) { | ||
124 | ip->i_itemp->ili_iolock_recur++; | ||
125 | } | ||
126 | if (lock_flags & XFS_ILOCK_EXCL) { | ||
127 | ip->i_itemp->ili_ilock_recur++; | ||
128 | } | ||
129 | *ipp = ip; | ||
130 | return 0; | ||
131 | } | ||
132 | |||
133 | ASSERT(lock_flags & XFS_ILOCK_EXCL); | ||
134 | error = xfs_iget(tp->t_mountp, tp, ino, flags, lock_flags, &ip, 0); | ||
135 | if (error) { | ||
136 | return error; | ||
137 | } | ||
138 | ASSERT(ip != NULL); | ||
139 | 64 | ||
140 | xfs_trans_ijoin(tp, ip, lock_flags); | 65 | error = xfs_iget(mp, tp, ino, flags, lock_flags, ipp, 0); |
141 | *ipp = ip; | 66 | if (!error && tp) |
142 | return 0; | 67 | xfs_trans_ijoin(tp, *ipp, lock_flags); |
68 | return error; | ||
143 | } | 69 | } |
144 | 70 | ||
145 | /* | 71 | /* |
@@ -163,8 +89,6 @@ xfs_trans_ijoin( | |||
163 | xfs_inode_item_init(ip, ip->i_mount); | 89 | xfs_inode_item_init(ip, ip->i_mount); |
164 | iip = ip->i_itemp; | 90 | iip = ip->i_itemp; |
165 | ASSERT(iip->ili_flags == 0); | 91 | ASSERT(iip->ili_flags == 0); |
166 | ASSERT(iip->ili_ilock_recur == 0); | ||
167 | ASSERT(iip->ili_iolock_recur == 0); | ||
168 | 92 | ||
169 | /* | 93 | /* |
170 | * Get a log_item_desc to point at the new item. | 94 | * Get a log_item_desc to point at the new item. |
diff --git a/fs/xfs/xfs_vnodeops.c b/fs/xfs/xfs_vnodeops.c index 492d75bae2bf..a434f287962d 100644 --- a/fs/xfs/xfs_vnodeops.c +++ b/fs/xfs/xfs_vnodeops.c | |||
@@ -611,7 +611,7 @@ xfs_fsync( | |||
611 | xfs_inode_t *ip) | 611 | xfs_inode_t *ip) |
612 | { | 612 | { |
613 | xfs_trans_t *tp; | 613 | xfs_trans_t *tp; |
614 | int error; | 614 | int error = 0; |
615 | int log_flushed = 0, changed = 1; | 615 | int log_flushed = 0, changed = 1; |
616 | 616 | ||
617 | xfs_itrace_entry(ip); | 617 | xfs_itrace_entry(ip); |
@@ -619,14 +619,9 @@ xfs_fsync( | |||
619 | if (XFS_FORCED_SHUTDOWN(ip->i_mount)) | 619 | if (XFS_FORCED_SHUTDOWN(ip->i_mount)) |
620 | return XFS_ERROR(EIO); | 620 | return XFS_ERROR(EIO); |
621 | 621 | ||
622 | /* capture size updates in I/O completion before writing the inode. */ | ||
623 | error = xfs_wait_on_pages(ip, 0, -1); | ||
624 | if (error) | ||
625 | return XFS_ERROR(error); | ||
626 | |||
627 | /* | 622 | /* |
628 | * We always need to make sure that the required inode state is safe on | 623 | * We always need to make sure that the required inode state is safe on |
629 | * disk. The vnode might be clean but we still might need to force the | 624 | * disk. The inode might be clean but we still might need to force the |
630 | * log because of committed transactions that haven't hit the disk yet. | 625 | * log because of committed transactions that haven't hit the disk yet. |
631 | * Likewise, there could be unflushed non-transactional changes to the | 626 | * Likewise, there could be unflushed non-transactional changes to the |
632 | * inode core that have to go to disk and this requires us to issue | 627 | * inode core that have to go to disk and this requires us to issue |
@@ -638,7 +633,7 @@ xfs_fsync( | |||
638 | */ | 633 | */ |
639 | xfs_ilock(ip, XFS_ILOCK_SHARED); | 634 | xfs_ilock(ip, XFS_ILOCK_SHARED); |
640 | 635 | ||
641 | if (!(ip->i_update_size || ip->i_update_core)) { | 636 | if (!ip->i_update_core) { |
642 | /* | 637 | /* |
643 | * Timestamps/size haven't changed since last inode flush or | 638 | * Timestamps/size haven't changed since last inode flush or |
644 | * inode transaction commit. That means either nothing got | 639 | * inode transaction commit. That means either nothing got |
@@ -718,7 +713,7 @@ xfs_fsync( | |||
718 | * when the link count isn't zero and by xfs_dm_punch_hole() when | 713 | * when the link count isn't zero and by xfs_dm_punch_hole() when |
719 | * punching a hole to EOF. | 714 | * punching a hole to EOF. |
720 | */ | 715 | */ |
721 | int | 716 | STATIC int |
722 | xfs_free_eofblocks( | 717 | xfs_free_eofblocks( |
723 | xfs_mount_t *mp, | 718 | xfs_mount_t *mp, |
724 | xfs_inode_t *ip, | 719 | xfs_inode_t *ip, |
@@ -1476,8 +1471,8 @@ xfs_create( | |||
1476 | if (error == ENOSPC) { | 1471 | if (error == ENOSPC) { |
1477 | /* flush outstanding delalloc blocks and retry */ | 1472 | /* flush outstanding delalloc blocks and retry */ |
1478 | xfs_flush_inodes(dp); | 1473 | xfs_flush_inodes(dp); |
1479 | error = xfs_trans_reserve(tp, resblks, XFS_CREATE_LOG_RES(mp), 0, | 1474 | error = xfs_trans_reserve(tp, resblks, log_res, 0, |
1480 | XFS_TRANS_PERM_LOG_RES, XFS_CREATE_LOG_COUNT); | 1475 | XFS_TRANS_PERM_LOG_RES, log_count); |
1481 | } | 1476 | } |
1482 | if (error == ENOSPC) { | 1477 | if (error == ENOSPC) { |
1483 | /* No space at all so try a "no-allocation" reservation */ | 1478 | /* No space at all so try a "no-allocation" reservation */ |