diff options
author | Jiri Kosina <jkosina@suse.cz> | 2012-02-03 17:12:42 -0500 |
---|---|---|
committer | Jiri Kosina <jkosina@suse.cz> | 2012-02-03 17:13:05 -0500 |
commit | 972c5ae961d6e5103e2b33d935cfa4145fd47140 (patch) | |
tree | 350b2a76b979ba8766c09838617df67ff330eca0 /fs | |
parent | 5196d20305d5e30d871111d3a876cf067dd94255 (diff) | |
parent | 7c7ed8ec337bf5f62cc5287a6eb6b2f1b7504c2f (diff) |
Merge branch 'master' into for-next
Sync with Linus' tree to be able to apply patch to a newer
code (namely drivers/gpu/drm/gma500/psb_intel_lvds.c)
Diffstat (limited to 'fs')
292 files changed, 14423 insertions, 4685 deletions
diff --git a/fs/9p/cache.c b/fs/9p/cache.c index 945aa5f02f9b..a9ea73d6dcf3 100644 --- a/fs/9p/cache.c +++ b/fs/9p/cache.c | |||
@@ -62,8 +62,8 @@ static uint16_t v9fs_cache_session_get_key(const void *cookie_netfs_data, | |||
62 | uint16_t klen = 0; | 62 | uint16_t klen = 0; |
63 | 63 | ||
64 | v9ses = (struct v9fs_session_info *)cookie_netfs_data; | 64 | v9ses = (struct v9fs_session_info *)cookie_netfs_data; |
65 | P9_DPRINTK(P9_DEBUG_FSC, "session %p buf %p size %u", v9ses, | 65 | p9_debug(P9_DEBUG_FSC, "session %p buf %p size %u\n", |
66 | buffer, bufmax); | 66 | v9ses, buffer, bufmax); |
67 | 67 | ||
68 | if (v9ses->cachetag) | 68 | if (v9ses->cachetag) |
69 | klen = strlen(v9ses->cachetag); | 69 | klen = strlen(v9ses->cachetag); |
@@ -72,7 +72,7 @@ static uint16_t v9fs_cache_session_get_key(const void *cookie_netfs_data, | |||
72 | return 0; | 72 | return 0; |
73 | 73 | ||
74 | memcpy(buffer, v9ses->cachetag, klen); | 74 | memcpy(buffer, v9ses->cachetag, klen); |
75 | P9_DPRINTK(P9_DEBUG_FSC, "cache session tag %s", v9ses->cachetag); | 75 | p9_debug(P9_DEBUG_FSC, "cache session tag %s\n", v9ses->cachetag); |
76 | return klen; | 76 | return klen; |
77 | } | 77 | } |
78 | 78 | ||
@@ -91,14 +91,14 @@ void v9fs_cache_session_get_cookie(struct v9fs_session_info *v9ses) | |||
91 | v9ses->fscache = fscache_acquire_cookie(v9fs_cache_netfs.primary_index, | 91 | v9ses->fscache = fscache_acquire_cookie(v9fs_cache_netfs.primary_index, |
92 | &v9fs_cache_session_index_def, | 92 | &v9fs_cache_session_index_def, |
93 | v9ses); | 93 | v9ses); |
94 | P9_DPRINTK(P9_DEBUG_FSC, "session %p get cookie %p", v9ses, | 94 | p9_debug(P9_DEBUG_FSC, "session %p get cookie %p\n", |
95 | v9ses->fscache); | 95 | v9ses, v9ses->fscache); |
96 | } | 96 | } |
97 | 97 | ||
98 | void v9fs_cache_session_put_cookie(struct v9fs_session_info *v9ses) | 98 | void v9fs_cache_session_put_cookie(struct v9fs_session_info *v9ses) |
99 | { | 99 | { |
100 | P9_DPRINTK(P9_DEBUG_FSC, "session %p put cookie %p", v9ses, | 100 | p9_debug(P9_DEBUG_FSC, "session %p put cookie %p\n", |
101 | v9ses->fscache); | 101 | v9ses, v9ses->fscache); |
102 | fscache_relinquish_cookie(v9ses->fscache, 0); | 102 | fscache_relinquish_cookie(v9ses->fscache, 0); |
103 | v9ses->fscache = NULL; | 103 | v9ses->fscache = NULL; |
104 | } | 104 | } |
@@ -109,8 +109,8 @@ static uint16_t v9fs_cache_inode_get_key(const void *cookie_netfs_data, | |||
109 | { | 109 | { |
110 | const struct v9fs_inode *v9inode = cookie_netfs_data; | 110 | const struct v9fs_inode *v9inode = cookie_netfs_data; |
111 | memcpy(buffer, &v9inode->qid.path, sizeof(v9inode->qid.path)); | 111 | memcpy(buffer, &v9inode->qid.path, sizeof(v9inode->qid.path)); |
112 | P9_DPRINTK(P9_DEBUG_FSC, "inode %p get key %llu", &v9inode->vfs_inode, | 112 | p9_debug(P9_DEBUG_FSC, "inode %p get key %llu\n", |
113 | v9inode->qid.path); | 113 | &v9inode->vfs_inode, v9inode->qid.path); |
114 | return sizeof(v9inode->qid.path); | 114 | return sizeof(v9inode->qid.path); |
115 | } | 115 | } |
116 | 116 | ||
@@ -120,8 +120,8 @@ static void v9fs_cache_inode_get_attr(const void *cookie_netfs_data, | |||
120 | const struct v9fs_inode *v9inode = cookie_netfs_data; | 120 | const struct v9fs_inode *v9inode = cookie_netfs_data; |
121 | *size = i_size_read(&v9inode->vfs_inode); | 121 | *size = i_size_read(&v9inode->vfs_inode); |
122 | 122 | ||
123 | P9_DPRINTK(P9_DEBUG_FSC, "inode %p get attr %llu", &v9inode->vfs_inode, | 123 | p9_debug(P9_DEBUG_FSC, "inode %p get attr %llu\n", |
124 | *size); | 124 | &v9inode->vfs_inode, *size); |
125 | } | 125 | } |
126 | 126 | ||
127 | static uint16_t v9fs_cache_inode_get_aux(const void *cookie_netfs_data, | 127 | static uint16_t v9fs_cache_inode_get_aux(const void *cookie_netfs_data, |
@@ -129,8 +129,8 @@ static uint16_t v9fs_cache_inode_get_aux(const void *cookie_netfs_data, | |||
129 | { | 129 | { |
130 | const struct v9fs_inode *v9inode = cookie_netfs_data; | 130 | const struct v9fs_inode *v9inode = cookie_netfs_data; |
131 | memcpy(buffer, &v9inode->qid.version, sizeof(v9inode->qid.version)); | 131 | memcpy(buffer, &v9inode->qid.version, sizeof(v9inode->qid.version)); |
132 | P9_DPRINTK(P9_DEBUG_FSC, "inode %p get aux %u", &v9inode->vfs_inode, | 132 | p9_debug(P9_DEBUG_FSC, "inode %p get aux %u\n", |
133 | v9inode->qid.version); | 133 | &v9inode->vfs_inode, v9inode->qid.version); |
134 | return sizeof(v9inode->qid.version); | 134 | return sizeof(v9inode->qid.version); |
135 | } | 135 | } |
136 | 136 | ||
@@ -206,8 +206,8 @@ void v9fs_cache_inode_get_cookie(struct inode *inode) | |||
206 | &v9fs_cache_inode_index_def, | 206 | &v9fs_cache_inode_index_def, |
207 | v9inode); | 207 | v9inode); |
208 | 208 | ||
209 | P9_DPRINTK(P9_DEBUG_FSC, "inode %p get cookie %p", inode, | 209 | p9_debug(P9_DEBUG_FSC, "inode %p get cookie %p\n", |
210 | v9inode->fscache); | 210 | inode, v9inode->fscache); |
211 | } | 211 | } |
212 | 212 | ||
213 | void v9fs_cache_inode_put_cookie(struct inode *inode) | 213 | void v9fs_cache_inode_put_cookie(struct inode *inode) |
@@ -216,8 +216,8 @@ void v9fs_cache_inode_put_cookie(struct inode *inode) | |||
216 | 216 | ||
217 | if (!v9inode->fscache) | 217 | if (!v9inode->fscache) |
218 | return; | 218 | return; |
219 | P9_DPRINTK(P9_DEBUG_FSC, "inode %p put cookie %p", inode, | 219 | p9_debug(P9_DEBUG_FSC, "inode %p put cookie %p\n", |
220 | v9inode->fscache); | 220 | inode, v9inode->fscache); |
221 | 221 | ||
222 | fscache_relinquish_cookie(v9inode->fscache, 0); | 222 | fscache_relinquish_cookie(v9inode->fscache, 0); |
223 | v9inode->fscache = NULL; | 223 | v9inode->fscache = NULL; |
@@ -229,8 +229,8 @@ void v9fs_cache_inode_flush_cookie(struct inode *inode) | |||
229 | 229 | ||
230 | if (!v9inode->fscache) | 230 | if (!v9inode->fscache) |
231 | return; | 231 | return; |
232 | P9_DPRINTK(P9_DEBUG_FSC, "inode %p flush cookie %p", inode, | 232 | p9_debug(P9_DEBUG_FSC, "inode %p flush cookie %p\n", |
233 | v9inode->fscache); | 233 | inode, v9inode->fscache); |
234 | 234 | ||
235 | fscache_relinquish_cookie(v9inode->fscache, 1); | 235 | fscache_relinquish_cookie(v9inode->fscache, 1); |
236 | v9inode->fscache = NULL; | 236 | v9inode->fscache = NULL; |
@@ -272,8 +272,8 @@ void v9fs_cache_inode_reset_cookie(struct inode *inode) | |||
272 | v9inode->fscache = fscache_acquire_cookie(v9ses->fscache, | 272 | v9inode->fscache = fscache_acquire_cookie(v9ses->fscache, |
273 | &v9fs_cache_inode_index_def, | 273 | &v9fs_cache_inode_index_def, |
274 | v9inode); | 274 | v9inode); |
275 | P9_DPRINTK(P9_DEBUG_FSC, "inode %p revalidating cookie old %p new %p", | 275 | p9_debug(P9_DEBUG_FSC, "inode %p revalidating cookie old %p new %p\n", |
276 | inode, old, v9inode->fscache); | 276 | inode, old, v9inode->fscache); |
277 | 277 | ||
278 | spin_unlock(&v9inode->fscache_lock); | 278 | spin_unlock(&v9inode->fscache_lock); |
279 | } | 279 | } |
@@ -323,7 +323,7 @@ int __v9fs_readpage_from_fscache(struct inode *inode, struct page *page) | |||
323 | int ret; | 323 | int ret; |
324 | const struct v9fs_inode *v9inode = V9FS_I(inode); | 324 | const struct v9fs_inode *v9inode = V9FS_I(inode); |
325 | 325 | ||
326 | P9_DPRINTK(P9_DEBUG_FSC, "inode %p page %p", inode, page); | 326 | p9_debug(P9_DEBUG_FSC, "inode %p page %p\n", inode, page); |
327 | if (!v9inode->fscache) | 327 | if (!v9inode->fscache) |
328 | return -ENOBUFS; | 328 | return -ENOBUFS; |
329 | 329 | ||
@@ -335,13 +335,13 @@ int __v9fs_readpage_from_fscache(struct inode *inode, struct page *page) | |||
335 | switch (ret) { | 335 | switch (ret) { |
336 | case -ENOBUFS: | 336 | case -ENOBUFS: |
337 | case -ENODATA: | 337 | case -ENODATA: |
338 | P9_DPRINTK(P9_DEBUG_FSC, "page/inode not in cache %d", ret); | 338 | p9_debug(P9_DEBUG_FSC, "page/inode not in cache %d\n", ret); |
339 | return 1; | 339 | return 1; |
340 | case 0: | 340 | case 0: |
341 | P9_DPRINTK(P9_DEBUG_FSC, "BIO submitted"); | 341 | p9_debug(P9_DEBUG_FSC, "BIO submitted\n"); |
342 | return ret; | 342 | return ret; |
343 | default: | 343 | default: |
344 | P9_DPRINTK(P9_DEBUG_FSC, "ret %d", ret); | 344 | p9_debug(P9_DEBUG_FSC, "ret %d\n", ret); |
345 | return ret; | 345 | return ret; |
346 | } | 346 | } |
347 | } | 347 | } |
@@ -361,7 +361,7 @@ int __v9fs_readpages_from_fscache(struct inode *inode, | |||
361 | int ret; | 361 | int ret; |
362 | const struct v9fs_inode *v9inode = V9FS_I(inode); | 362 | const struct v9fs_inode *v9inode = V9FS_I(inode); |
363 | 363 | ||
364 | P9_DPRINTK(P9_DEBUG_FSC, "inode %p pages %u", inode, *nr_pages); | 364 | p9_debug(P9_DEBUG_FSC, "inode %p pages %u\n", inode, *nr_pages); |
365 | if (!v9inode->fscache) | 365 | if (!v9inode->fscache) |
366 | return -ENOBUFS; | 366 | return -ENOBUFS; |
367 | 367 | ||
@@ -373,15 +373,15 @@ int __v9fs_readpages_from_fscache(struct inode *inode, | |||
373 | switch (ret) { | 373 | switch (ret) { |
374 | case -ENOBUFS: | 374 | case -ENOBUFS: |
375 | case -ENODATA: | 375 | case -ENODATA: |
376 | P9_DPRINTK(P9_DEBUG_FSC, "pages/inodes not in cache %d", ret); | 376 | p9_debug(P9_DEBUG_FSC, "pages/inodes not in cache %d\n", ret); |
377 | return 1; | 377 | return 1; |
378 | case 0: | 378 | case 0: |
379 | BUG_ON(!list_empty(pages)); | 379 | BUG_ON(!list_empty(pages)); |
380 | BUG_ON(*nr_pages != 0); | 380 | BUG_ON(*nr_pages != 0); |
381 | P9_DPRINTK(P9_DEBUG_FSC, "BIO submitted"); | 381 | p9_debug(P9_DEBUG_FSC, "BIO submitted\n"); |
382 | return ret; | 382 | return ret; |
383 | default: | 383 | default: |
384 | P9_DPRINTK(P9_DEBUG_FSC, "ret %d", ret); | 384 | p9_debug(P9_DEBUG_FSC, "ret %d\n", ret); |
385 | return ret; | 385 | return ret; |
386 | } | 386 | } |
387 | } | 387 | } |
@@ -396,9 +396,9 @@ void __v9fs_readpage_to_fscache(struct inode *inode, struct page *page) | |||
396 | int ret; | 396 | int ret; |
397 | const struct v9fs_inode *v9inode = V9FS_I(inode); | 397 | const struct v9fs_inode *v9inode = V9FS_I(inode); |
398 | 398 | ||
399 | P9_DPRINTK(P9_DEBUG_FSC, "inode %p page %p", inode, page); | 399 | p9_debug(P9_DEBUG_FSC, "inode %p page %p\n", inode, page); |
400 | ret = fscache_write_page(v9inode->fscache, page, GFP_KERNEL); | 400 | ret = fscache_write_page(v9inode->fscache, page, GFP_KERNEL); |
401 | P9_DPRINTK(P9_DEBUG_FSC, "ret = %d", ret); | 401 | p9_debug(P9_DEBUG_FSC, "ret = %d\n", ret); |
402 | if (ret != 0) | 402 | if (ret != 0) |
403 | v9fs_uncache_page(inode, page); | 403 | v9fs_uncache_page(inode, page); |
404 | } | 404 | } |
@@ -409,7 +409,7 @@ void __v9fs_readpage_to_fscache(struct inode *inode, struct page *page) | |||
409 | void __v9fs_fscache_wait_on_page_write(struct inode *inode, struct page *page) | 409 | void __v9fs_fscache_wait_on_page_write(struct inode *inode, struct page *page) |
410 | { | 410 | { |
411 | const struct v9fs_inode *v9inode = V9FS_I(inode); | 411 | const struct v9fs_inode *v9inode = V9FS_I(inode); |
412 | P9_DPRINTK(P9_DEBUG_FSC, "inode %p page %p", inode, page); | 412 | p9_debug(P9_DEBUG_FSC, "inode %p page %p\n", inode, page); |
413 | if (PageFsCache(page)) | 413 | if (PageFsCache(page)) |
414 | fscache_wait_on_page_write(v9inode->fscache, page); | 414 | fscache_wait_on_page_write(v9inode->fscache, page); |
415 | } | 415 | } |
diff --git a/fs/9p/fid.c b/fs/9p/fid.c index 85b67ffa2a43..da8eefbe830d 100644 --- a/fs/9p/fid.c +++ b/fs/9p/fid.c | |||
@@ -45,8 +45,8 @@ int v9fs_fid_add(struct dentry *dentry, struct p9_fid *fid) | |||
45 | { | 45 | { |
46 | struct v9fs_dentry *dent; | 46 | struct v9fs_dentry *dent; |
47 | 47 | ||
48 | P9_DPRINTK(P9_DEBUG_VFS, "fid %d dentry %s\n", | 48 | p9_debug(P9_DEBUG_VFS, "fid %d dentry %s\n", |
49 | fid->fid, dentry->d_name.name); | 49 | fid->fid, dentry->d_name.name); |
50 | 50 | ||
51 | dent = dentry->d_fsdata; | 51 | dent = dentry->d_fsdata; |
52 | if (!dent) { | 52 | if (!dent) { |
@@ -79,8 +79,8 @@ static struct p9_fid *v9fs_fid_find(struct dentry *dentry, u32 uid, int any) | |||
79 | struct v9fs_dentry *dent; | 79 | struct v9fs_dentry *dent; |
80 | struct p9_fid *fid, *ret; | 80 | struct p9_fid *fid, *ret; |
81 | 81 | ||
82 | P9_DPRINTK(P9_DEBUG_VFS, " dentry: %s (%p) uid %d any %d\n", | 82 | p9_debug(P9_DEBUG_VFS, " dentry: %s (%p) uid %d any %d\n", |
83 | dentry->d_name.name, dentry, uid, any); | 83 | dentry->d_name.name, dentry, uid, any); |
84 | dent = (struct v9fs_dentry *) dentry->d_fsdata; | 84 | dent = (struct v9fs_dentry *) dentry->d_fsdata; |
85 | ret = NULL; | 85 | ret = NULL; |
86 | if (dent) { | 86 | if (dent) { |
diff --git a/fs/9p/v9fs.c b/fs/9p/v9fs.c index 2b78014a124a..1964f98e74be 100644 --- a/fs/9p/v9fs.c +++ b/fs/9p/v9fs.c | |||
@@ -23,6 +23,8 @@ | |||
23 | * | 23 | * |
24 | */ | 24 | */ |
25 | 25 | ||
26 | #define pr_fmt(fmt) KBUILD_MODNAME ": " fmt | ||
27 | |||
26 | #include <linux/module.h> | 28 | #include <linux/module.h> |
27 | #include <linux/errno.h> | 29 | #include <linux/errno.h> |
28 | #include <linux/fs.h> | 30 | #include <linux/fs.h> |
@@ -85,15 +87,15 @@ static int get_cache_mode(char *s) | |||
85 | 87 | ||
86 | if (!strcmp(s, "loose")) { | 88 | if (!strcmp(s, "loose")) { |
87 | version = CACHE_LOOSE; | 89 | version = CACHE_LOOSE; |
88 | P9_DPRINTK(P9_DEBUG_9P, "Cache mode: loose\n"); | 90 | p9_debug(P9_DEBUG_9P, "Cache mode: loose\n"); |
89 | } else if (!strcmp(s, "fscache")) { | 91 | } else if (!strcmp(s, "fscache")) { |
90 | version = CACHE_FSCACHE; | 92 | version = CACHE_FSCACHE; |
91 | P9_DPRINTK(P9_DEBUG_9P, "Cache mode: fscache\n"); | 93 | p9_debug(P9_DEBUG_9P, "Cache mode: fscache\n"); |
92 | } else if (!strcmp(s, "none")) { | 94 | } else if (!strcmp(s, "none")) { |
93 | version = CACHE_NONE; | 95 | version = CACHE_NONE; |
94 | P9_DPRINTK(P9_DEBUG_9P, "Cache mode: none\n"); | 96 | p9_debug(P9_DEBUG_9P, "Cache mode: none\n"); |
95 | } else | 97 | } else |
96 | printk(KERN_INFO "9p: Unknown Cache mode %s.\n", s); | 98 | pr_info("Unknown Cache mode %s\n", s); |
97 | return version; | 99 | return version; |
98 | } | 100 | } |
99 | 101 | ||
@@ -140,8 +142,8 @@ static int v9fs_parse_options(struct v9fs_session_info *v9ses, char *opts) | |||
140 | case Opt_debug: | 142 | case Opt_debug: |
141 | r = match_int(&args[0], &option); | 143 | r = match_int(&args[0], &option); |
142 | if (r < 0) { | 144 | if (r < 0) { |
143 | P9_DPRINTK(P9_DEBUG_ERROR, | 145 | p9_debug(P9_DEBUG_ERROR, |
144 | "integer field, but no integer?\n"); | 146 | "integer field, but no integer?\n"); |
145 | ret = r; | 147 | ret = r; |
146 | continue; | 148 | continue; |
147 | } | 149 | } |
@@ -154,8 +156,8 @@ static int v9fs_parse_options(struct v9fs_session_info *v9ses, char *opts) | |||
154 | case Opt_dfltuid: | 156 | case Opt_dfltuid: |
155 | r = match_int(&args[0], &option); | 157 | r = match_int(&args[0], &option); |
156 | if (r < 0) { | 158 | if (r < 0) { |
157 | P9_DPRINTK(P9_DEBUG_ERROR, | 159 | p9_debug(P9_DEBUG_ERROR, |
158 | "integer field, but no integer?\n"); | 160 | "integer field, but no integer?\n"); |
159 | ret = r; | 161 | ret = r; |
160 | continue; | 162 | continue; |
161 | } | 163 | } |
@@ -164,8 +166,8 @@ static int v9fs_parse_options(struct v9fs_session_info *v9ses, char *opts) | |||
164 | case Opt_dfltgid: | 166 | case Opt_dfltgid: |
165 | r = match_int(&args[0], &option); | 167 | r = match_int(&args[0], &option); |
166 | if (r < 0) { | 168 | if (r < 0) { |
167 | P9_DPRINTK(P9_DEBUG_ERROR, | 169 | p9_debug(P9_DEBUG_ERROR, |
168 | "integer field, but no integer?\n"); | 170 | "integer field, but no integer?\n"); |
169 | ret = r; | 171 | ret = r; |
170 | continue; | 172 | continue; |
171 | } | 173 | } |
@@ -174,8 +176,8 @@ static int v9fs_parse_options(struct v9fs_session_info *v9ses, char *opts) | |||
174 | case Opt_afid: | 176 | case Opt_afid: |
175 | r = match_int(&args[0], &option); | 177 | r = match_int(&args[0], &option); |
176 | if (r < 0) { | 178 | if (r < 0) { |
177 | P9_DPRINTK(P9_DEBUG_ERROR, | 179 | p9_debug(P9_DEBUG_ERROR, |
178 | "integer field, but no integer?\n"); | 180 | "integer field, but no integer?\n"); |
179 | ret = r; | 181 | ret = r; |
180 | continue; | 182 | continue; |
181 | } | 183 | } |
@@ -205,8 +207,8 @@ static int v9fs_parse_options(struct v9fs_session_info *v9ses, char *opts) | |||
205 | s = match_strdup(&args[0]); | 207 | s = match_strdup(&args[0]); |
206 | if (!s) { | 208 | if (!s) { |
207 | ret = -ENOMEM; | 209 | ret = -ENOMEM; |
208 | P9_DPRINTK(P9_DEBUG_ERROR, | 210 | p9_debug(P9_DEBUG_ERROR, |
209 | "problem allocating copy of cache arg\n"); | 211 | "problem allocating copy of cache arg\n"); |
210 | goto free_and_return; | 212 | goto free_and_return; |
211 | } | 213 | } |
212 | ret = get_cache_mode(s); | 214 | ret = get_cache_mode(s); |
@@ -223,8 +225,8 @@ static int v9fs_parse_options(struct v9fs_session_info *v9ses, char *opts) | |||
223 | s = match_strdup(&args[0]); | 225 | s = match_strdup(&args[0]); |
224 | if (!s) { | 226 | if (!s) { |
225 | ret = -ENOMEM; | 227 | ret = -ENOMEM; |
226 | P9_DPRINTK(P9_DEBUG_ERROR, | 228 | p9_debug(P9_DEBUG_ERROR, |
227 | "problem allocating copy of access arg\n"); | 229 | "problem allocating copy of access arg\n"); |
228 | goto free_and_return; | 230 | goto free_and_return; |
229 | } | 231 | } |
230 | 232 | ||
@@ -240,8 +242,8 @@ static int v9fs_parse_options(struct v9fs_session_info *v9ses, char *opts) | |||
240 | v9ses->uid = simple_strtoul(s, &e, 10); | 242 | v9ses->uid = simple_strtoul(s, &e, 10); |
241 | if (*e != '\0') { | 243 | if (*e != '\0') { |
242 | ret = -EINVAL; | 244 | ret = -EINVAL; |
243 | printk(KERN_INFO "9p: Unknown access " | 245 | pr_info("Unknown access argument %s\n", |
244 | "argument %s.\n", s); | 246 | s); |
245 | kfree(s); | 247 | kfree(s); |
246 | goto free_and_return; | 248 | goto free_and_return; |
247 | } | 249 | } |
@@ -254,9 +256,8 @@ static int v9fs_parse_options(struct v9fs_session_info *v9ses, char *opts) | |||
254 | #ifdef CONFIG_9P_FS_POSIX_ACL | 256 | #ifdef CONFIG_9P_FS_POSIX_ACL |
255 | v9ses->flags |= V9FS_POSIX_ACL; | 257 | v9ses->flags |= V9FS_POSIX_ACL; |
256 | #else | 258 | #else |
257 | P9_DPRINTK(P9_DEBUG_ERROR, | 259 | p9_debug(P9_DEBUG_ERROR, |
258 | "Not defined CONFIG_9P_FS_POSIX_ACL. " | 260 | "Not defined CONFIG_9P_FS_POSIX_ACL. Ignoring posixacl option\n"); |
259 | "Ignoring posixacl option\n"); | ||
260 | #endif | 261 | #endif |
261 | break; | 262 | break; |
262 | 263 | ||
@@ -318,7 +319,7 @@ struct p9_fid *v9fs_session_init(struct v9fs_session_info *v9ses, | |||
318 | if (IS_ERR(v9ses->clnt)) { | 319 | if (IS_ERR(v9ses->clnt)) { |
319 | retval = PTR_ERR(v9ses->clnt); | 320 | retval = PTR_ERR(v9ses->clnt); |
320 | v9ses->clnt = NULL; | 321 | v9ses->clnt = NULL; |
321 | P9_DPRINTK(P9_DEBUG_ERROR, "problem initializing 9p client\n"); | 322 | p9_debug(P9_DEBUG_ERROR, "problem initializing 9p client\n"); |
322 | goto error; | 323 | goto error; |
323 | } | 324 | } |
324 | 325 | ||
@@ -371,7 +372,7 @@ struct p9_fid *v9fs_session_init(struct v9fs_session_info *v9ses, | |||
371 | if (IS_ERR(fid)) { | 372 | if (IS_ERR(fid)) { |
372 | retval = PTR_ERR(fid); | 373 | retval = PTR_ERR(fid); |
373 | fid = NULL; | 374 | fid = NULL; |
374 | P9_DPRINTK(P9_DEBUG_ERROR, "cannot attach\n"); | 375 | p9_debug(P9_DEBUG_ERROR, "cannot attach\n"); |
375 | goto error; | 376 | goto error; |
376 | } | 377 | } |
377 | 378 | ||
@@ -429,7 +430,7 @@ void v9fs_session_close(struct v9fs_session_info *v9ses) | |||
429 | */ | 430 | */ |
430 | 431 | ||
431 | void v9fs_session_cancel(struct v9fs_session_info *v9ses) { | 432 | void v9fs_session_cancel(struct v9fs_session_info *v9ses) { |
432 | P9_DPRINTK(P9_DEBUG_ERROR, "cancel session %p\n", v9ses); | 433 | p9_debug(P9_DEBUG_ERROR, "cancel session %p\n", v9ses); |
433 | p9_client_disconnect(v9ses->clnt); | 434 | p9_client_disconnect(v9ses->clnt); |
434 | } | 435 | } |
435 | 436 | ||
@@ -442,7 +443,7 @@ void v9fs_session_cancel(struct v9fs_session_info *v9ses) { | |||
442 | 443 | ||
443 | void v9fs_session_begin_cancel(struct v9fs_session_info *v9ses) | 444 | void v9fs_session_begin_cancel(struct v9fs_session_info *v9ses) |
444 | { | 445 | { |
445 | P9_DPRINTK(P9_DEBUG_ERROR, "begin cancel session %p\n", v9ses); | 446 | p9_debug(P9_DEBUG_ERROR, "begin cancel session %p\n", v9ses); |
446 | p9_client_begin_disconnect(v9ses->clnt); | 447 | p9_client_begin_disconnect(v9ses->clnt); |
447 | } | 448 | } |
448 | 449 | ||
@@ -591,23 +592,23 @@ static void v9fs_cache_unregister(void) | |||
591 | static int __init init_v9fs(void) | 592 | static int __init init_v9fs(void) |
592 | { | 593 | { |
593 | int err; | 594 | int err; |
594 | printk(KERN_INFO "Installing v9fs 9p2000 file system support\n"); | 595 | pr_info("Installing v9fs 9p2000 file system support\n"); |
595 | /* TODO: Setup list of registered trasnport modules */ | 596 | /* TODO: Setup list of registered trasnport modules */ |
596 | err = register_filesystem(&v9fs_fs_type); | 597 | err = register_filesystem(&v9fs_fs_type); |
597 | if (err < 0) { | 598 | if (err < 0) { |
598 | printk(KERN_ERR "Failed to register filesystem\n"); | 599 | pr_err("Failed to register filesystem\n"); |
599 | return err; | 600 | return err; |
600 | } | 601 | } |
601 | 602 | ||
602 | err = v9fs_cache_register(); | 603 | err = v9fs_cache_register(); |
603 | if (err < 0) { | 604 | if (err < 0) { |
604 | printk(KERN_ERR "Failed to register v9fs for caching\n"); | 605 | pr_err("Failed to register v9fs for caching\n"); |
605 | goto out_fs_unreg; | 606 | goto out_fs_unreg; |
606 | } | 607 | } |
607 | 608 | ||
608 | err = v9fs_sysfs_init(); | 609 | err = v9fs_sysfs_init(); |
609 | if (err < 0) { | 610 | if (err < 0) { |
610 | printk(KERN_ERR "Failed to register with sysfs\n"); | 611 | pr_err("Failed to register with sysfs\n"); |
611 | goto out_sysfs_cleanup; | 612 | goto out_sysfs_cleanup; |
612 | } | 613 | } |
613 | 614 | ||
diff --git a/fs/9p/vfs_addr.c b/fs/9p/vfs_addr.c index 2524e4cbb8ea..0ad61c6a65a5 100644 --- a/fs/9p/vfs_addr.c +++ b/fs/9p/vfs_addr.c | |||
@@ -56,7 +56,7 @@ static int v9fs_fid_readpage(struct p9_fid *fid, struct page *page) | |||
56 | struct inode *inode; | 56 | struct inode *inode; |
57 | 57 | ||
58 | inode = page->mapping->host; | 58 | inode = page->mapping->host; |
59 | P9_DPRINTK(P9_DEBUG_VFS, "\n"); | 59 | p9_debug(P9_DEBUG_VFS, "\n"); |
60 | 60 | ||
61 | BUG_ON(!PageLocked(page)); | 61 | BUG_ON(!PageLocked(page)); |
62 | 62 | ||
@@ -116,14 +116,14 @@ static int v9fs_vfs_readpages(struct file *filp, struct address_space *mapping, | |||
116 | struct inode *inode; | 116 | struct inode *inode; |
117 | 117 | ||
118 | inode = mapping->host; | 118 | inode = mapping->host; |
119 | P9_DPRINTK(P9_DEBUG_VFS, "inode: %p file: %p\n", inode, filp); | 119 | p9_debug(P9_DEBUG_VFS, "inode: %p file: %p\n", inode, filp); |
120 | 120 | ||
121 | ret = v9fs_readpages_from_fscache(inode, mapping, pages, &nr_pages); | 121 | ret = v9fs_readpages_from_fscache(inode, mapping, pages, &nr_pages); |
122 | if (ret == 0) | 122 | if (ret == 0) |
123 | return ret; | 123 | return ret; |
124 | 124 | ||
125 | ret = read_cache_pages(mapping, pages, (void *)v9fs_vfs_readpage, filp); | 125 | ret = read_cache_pages(mapping, pages, (void *)v9fs_vfs_readpage, filp); |
126 | P9_DPRINTK(P9_DEBUG_VFS, " = %d\n", ret); | 126 | p9_debug(P9_DEBUG_VFS, " = %d\n", ret); |
127 | return ret; | 127 | return ret; |
128 | } | 128 | } |
129 | 129 | ||
@@ -263,10 +263,9 @@ v9fs_direct_IO(int rw, struct kiocb *iocb, const struct iovec *iov, | |||
263 | * Now that we do caching with cache mode enabled, We need | 263 | * Now that we do caching with cache mode enabled, We need |
264 | * to support direct IO | 264 | * to support direct IO |
265 | */ | 265 | */ |
266 | P9_DPRINTK(P9_DEBUG_VFS, "v9fs_direct_IO: v9fs_direct_IO (%s) " | 266 | p9_debug(P9_DEBUG_VFS, "v9fs_direct_IO: v9fs_direct_IO (%s) off/no(%lld/%lu) EINVAL\n", |
267 | "off/no(%lld/%lu) EINVAL\n", | 267 | iocb->ki_filp->f_path.dentry->d_name.name, |
268 | iocb->ki_filp->f_path.dentry->d_name.name, | 268 | (long long)pos, nr_segs); |
269 | (long long) pos, nr_segs); | ||
270 | 269 | ||
271 | return -EINVAL; | 270 | return -EINVAL; |
272 | } | 271 | } |
diff --git a/fs/9p/vfs_dentry.c b/fs/9p/vfs_dentry.c index e022890c6f40..d529437ff442 100644 --- a/fs/9p/vfs_dentry.c +++ b/fs/9p/vfs_dentry.c | |||
@@ -53,8 +53,8 @@ | |||
53 | 53 | ||
54 | static int v9fs_dentry_delete(const struct dentry *dentry) | 54 | static int v9fs_dentry_delete(const struct dentry *dentry) |
55 | { | 55 | { |
56 | P9_DPRINTK(P9_DEBUG_VFS, " dentry: %s (%p)\n", dentry->d_name.name, | 56 | p9_debug(P9_DEBUG_VFS, " dentry: %s (%p)\n", |
57 | dentry); | 57 | dentry->d_name.name, dentry); |
58 | 58 | ||
59 | return 1; | 59 | return 1; |
60 | } | 60 | } |
@@ -66,8 +66,8 @@ static int v9fs_dentry_delete(const struct dentry *dentry) | |||
66 | */ | 66 | */ |
67 | static int v9fs_cached_dentry_delete(const struct dentry *dentry) | 67 | static int v9fs_cached_dentry_delete(const struct dentry *dentry) |
68 | { | 68 | { |
69 | P9_DPRINTK(P9_DEBUG_VFS, " dentry: %s (%p)\n", | 69 | p9_debug(P9_DEBUG_VFS, " dentry: %s (%p)\n", |
70 | dentry->d_name.name, dentry); | 70 | dentry->d_name.name, dentry); |
71 | 71 | ||
72 | /* Don't cache negative dentries */ | 72 | /* Don't cache negative dentries */ |
73 | if (!dentry->d_inode) | 73 | if (!dentry->d_inode) |
@@ -86,8 +86,8 @@ static void v9fs_dentry_release(struct dentry *dentry) | |||
86 | struct v9fs_dentry *dent; | 86 | struct v9fs_dentry *dent; |
87 | struct p9_fid *temp, *current_fid; | 87 | struct p9_fid *temp, *current_fid; |
88 | 88 | ||
89 | P9_DPRINTK(P9_DEBUG_VFS, " dentry: %s (%p)\n", dentry->d_name.name, | 89 | p9_debug(P9_DEBUG_VFS, " dentry: %s (%p)\n", |
90 | dentry); | 90 | dentry->d_name.name, dentry); |
91 | dent = dentry->d_fsdata; | 91 | dent = dentry->d_fsdata; |
92 | if (dent) { | 92 | if (dent) { |
93 | list_for_each_entry_safe(current_fid, temp, &dent->fidlist, | 93 | list_for_each_entry_safe(current_fid, temp, &dent->fidlist, |
diff --git a/fs/9p/vfs_dir.c b/fs/9p/vfs_dir.c index 598fff1a54e5..ff911e779651 100644 --- a/fs/9p/vfs_dir.c +++ b/fs/9p/vfs_dir.c | |||
@@ -140,7 +140,7 @@ static int v9fs_dir_readdir(struct file *filp, void *dirent, filldir_t filldir) | |||
140 | int reclen = 0; | 140 | int reclen = 0; |
141 | struct p9_rdir *rdir; | 141 | struct p9_rdir *rdir; |
142 | 142 | ||
143 | P9_DPRINTK(P9_DEBUG_VFS, "name %s\n", filp->f_path.dentry->d_name.name); | 143 | p9_debug(P9_DEBUG_VFS, "name %s\n", filp->f_path.dentry->d_name.name); |
144 | fid = filp->private_data; | 144 | fid = filp->private_data; |
145 | 145 | ||
146 | buflen = fid->clnt->msize - P9_IOHDRSZ; | 146 | buflen = fid->clnt->msize - P9_IOHDRSZ; |
@@ -168,7 +168,7 @@ static int v9fs_dir_readdir(struct file *filp, void *dirent, filldir_t filldir) | |||
168 | err = p9stat_read(fid->clnt, rdir->buf + rdir->head, | 168 | err = p9stat_read(fid->clnt, rdir->buf + rdir->head, |
169 | rdir->tail - rdir->head, &st); | 169 | rdir->tail - rdir->head, &st); |
170 | if (err) { | 170 | if (err) { |
171 | P9_DPRINTK(P9_DEBUG_VFS, "returned %d\n", err); | 171 | p9_debug(P9_DEBUG_VFS, "returned %d\n", err); |
172 | err = -EIO; | 172 | err = -EIO; |
173 | p9stat_free(&st); | 173 | p9stat_free(&st); |
174 | goto unlock_and_exit; | 174 | goto unlock_and_exit; |
@@ -213,7 +213,7 @@ static int v9fs_dir_readdir_dotl(struct file *filp, void *dirent, | |||
213 | struct p9_dirent curdirent; | 213 | struct p9_dirent curdirent; |
214 | u64 oldoffset = 0; | 214 | u64 oldoffset = 0; |
215 | 215 | ||
216 | P9_DPRINTK(P9_DEBUG_VFS, "name %s\n", filp->f_path.dentry->d_name.name); | 216 | p9_debug(P9_DEBUG_VFS, "name %s\n", filp->f_path.dentry->d_name.name); |
217 | fid = filp->private_data; | 217 | fid = filp->private_data; |
218 | 218 | ||
219 | buflen = fid->clnt->msize - P9_READDIRHDRSZ; | 219 | buflen = fid->clnt->msize - P9_READDIRHDRSZ; |
@@ -244,7 +244,7 @@ static int v9fs_dir_readdir_dotl(struct file *filp, void *dirent, | |||
244 | rdir->tail - rdir->head, | 244 | rdir->tail - rdir->head, |
245 | &curdirent); | 245 | &curdirent); |
246 | if (err < 0) { | 246 | if (err < 0) { |
247 | P9_DPRINTK(P9_DEBUG_VFS, "returned %d\n", err); | 247 | p9_debug(P9_DEBUG_VFS, "returned %d\n", err); |
248 | err = -EIO; | 248 | err = -EIO; |
249 | goto unlock_and_exit; | 249 | goto unlock_and_exit; |
250 | } | 250 | } |
@@ -290,9 +290,8 @@ int v9fs_dir_release(struct inode *inode, struct file *filp) | |||
290 | struct p9_fid *fid; | 290 | struct p9_fid *fid; |
291 | 291 | ||
292 | fid = filp->private_data; | 292 | fid = filp->private_data; |
293 | P9_DPRINTK(P9_DEBUG_VFS, | 293 | p9_debug(P9_DEBUG_VFS, "inode: %p filp: %p fid: %d\n", |
294 | "v9fs_dir_release: inode: %p filp: %p fid: %d\n", | 294 | inode, filp, fid ? fid->fid : -1); |
295 | inode, filp, fid ? fid->fid : -1); | ||
296 | if (fid) | 295 | if (fid) |
297 | p9_client_clunk(fid); | 296 | p9_client_clunk(fid); |
298 | return 0; | 297 | return 0; |
diff --git a/fs/9p/vfs_file.c b/fs/9p/vfs_file.c index 62857a810a79..fc06fd27065e 100644 --- a/fs/9p/vfs_file.c +++ b/fs/9p/vfs_file.c | |||
@@ -61,7 +61,7 @@ int v9fs_file_open(struct inode *inode, struct file *file) | |||
61 | struct p9_fid *fid; | 61 | struct p9_fid *fid; |
62 | int omode; | 62 | int omode; |
63 | 63 | ||
64 | P9_DPRINTK(P9_DEBUG_VFS, "inode: %p file: %p\n", inode, file); | 64 | p9_debug(P9_DEBUG_VFS, "inode: %p file: %p\n", inode, file); |
65 | v9inode = V9FS_I(inode); | 65 | v9inode = V9FS_I(inode); |
66 | v9ses = v9fs_inode2v9ses(inode); | 66 | v9ses = v9fs_inode2v9ses(inode); |
67 | if (v9fs_proto_dotl(v9ses)) | 67 | if (v9fs_proto_dotl(v9ses)) |
@@ -135,7 +135,7 @@ static int v9fs_file_lock(struct file *filp, int cmd, struct file_lock *fl) | |||
135 | int res = 0; | 135 | int res = 0; |
136 | struct inode *inode = filp->f_path.dentry->d_inode; | 136 | struct inode *inode = filp->f_path.dentry->d_inode; |
137 | 137 | ||
138 | P9_DPRINTK(P9_DEBUG_VFS, "filp: %p lock: %p\n", filp, fl); | 138 | p9_debug(P9_DEBUG_VFS, "filp: %p lock: %p\n", filp, fl); |
139 | 139 | ||
140 | /* No mandatory locks */ | 140 | /* No mandatory locks */ |
141 | if (__mandatory_lock(inode) && fl->fl_type != F_UNLCK) | 141 | if (__mandatory_lock(inode) && fl->fl_type != F_UNLCK) |
@@ -204,7 +204,8 @@ static int v9fs_file_do_lock(struct file *filp, int cmd, struct file_lock *fl) | |||
204 | break; | 204 | break; |
205 | if (status == P9_LOCK_BLOCKED && !IS_SETLKW(cmd)) | 205 | if (status == P9_LOCK_BLOCKED && !IS_SETLKW(cmd)) |
206 | break; | 206 | break; |
207 | schedule_timeout_interruptible(P9_LOCK_TIMEOUT); | 207 | if (schedule_timeout_interruptible(P9_LOCK_TIMEOUT) != 0) |
208 | break; | ||
208 | } | 209 | } |
209 | 210 | ||
210 | /* map 9p status to VFS status */ | 211 | /* map 9p status to VFS status */ |
@@ -304,8 +305,8 @@ static int v9fs_file_lock_dotl(struct file *filp, int cmd, struct file_lock *fl) | |||
304 | struct inode *inode = filp->f_path.dentry->d_inode; | 305 | struct inode *inode = filp->f_path.dentry->d_inode; |
305 | int ret = -ENOLCK; | 306 | int ret = -ENOLCK; |
306 | 307 | ||
307 | P9_DPRINTK(P9_DEBUG_VFS, "filp: %p cmd:%d lock: %p name: %s\n", filp, | 308 | p9_debug(P9_DEBUG_VFS, "filp: %p cmd:%d lock: %p name: %s\n", |
308 | cmd, fl, filp->f_path.dentry->d_name.name); | 309 | filp, cmd, fl, filp->f_path.dentry->d_name.name); |
309 | 310 | ||
310 | /* No mandatory locks */ | 311 | /* No mandatory locks */ |
311 | if (__mandatory_lock(inode) && fl->fl_type != F_UNLCK) | 312 | if (__mandatory_lock(inode) && fl->fl_type != F_UNLCK) |
@@ -340,8 +341,8 @@ static int v9fs_file_flock_dotl(struct file *filp, int cmd, | |||
340 | struct inode *inode = filp->f_path.dentry->d_inode; | 341 | struct inode *inode = filp->f_path.dentry->d_inode; |
341 | int ret = -ENOLCK; | 342 | int ret = -ENOLCK; |
342 | 343 | ||
343 | P9_DPRINTK(P9_DEBUG_VFS, "filp: %p cmd:%d lock: %p name: %s\n", filp, | 344 | p9_debug(P9_DEBUG_VFS, "filp: %p cmd:%d lock: %p name: %s\n", |
344 | cmd, fl, filp->f_path.dentry->d_name.name); | 345 | filp, cmd, fl, filp->f_path.dentry->d_name.name); |
345 | 346 | ||
346 | /* No mandatory locks */ | 347 | /* No mandatory locks */ |
347 | if (__mandatory_lock(inode) && fl->fl_type != F_UNLCK) | 348 | if (__mandatory_lock(inode) && fl->fl_type != F_UNLCK) |
@@ -384,8 +385,8 @@ v9fs_fid_readn(struct p9_fid *fid, char *data, char __user *udata, u32 count, | |||
384 | { | 385 | { |
385 | int n, total, size; | 386 | int n, total, size; |
386 | 387 | ||
387 | P9_DPRINTK(P9_DEBUG_VFS, "fid %d offset %llu count %d\n", fid->fid, | 388 | p9_debug(P9_DEBUG_VFS, "fid %d offset %llu count %d\n", |
388 | (long long unsigned) offset, count); | 389 | fid->fid, (long long unsigned)offset, count); |
389 | n = 0; | 390 | n = 0; |
390 | total = 0; | 391 | total = 0; |
391 | size = fid->iounit ? fid->iounit : fid->clnt->msize - P9_IOHDRSZ; | 392 | size = fid->iounit ? fid->iounit : fid->clnt->msize - P9_IOHDRSZ; |
@@ -443,7 +444,7 @@ v9fs_file_read(struct file *filp, char __user *udata, size_t count, | |||
443 | struct p9_fid *fid; | 444 | struct p9_fid *fid; |
444 | size_t size; | 445 | size_t size; |
445 | 446 | ||
446 | P9_DPRINTK(P9_DEBUG_VFS, "count %zu offset %lld\n", count, *offset); | 447 | p9_debug(P9_DEBUG_VFS, "count %zu offset %lld\n", count, *offset); |
447 | fid = filp->private_data; | 448 | fid = filp->private_data; |
448 | 449 | ||
449 | size = fid->iounit ? fid->iounit : fid->clnt->msize - P9_IOHDRSZ; | 450 | size = fid->iounit ? fid->iounit : fid->clnt->msize - P9_IOHDRSZ; |
@@ -470,8 +471,8 @@ v9fs_file_write_internal(struct inode *inode, struct p9_fid *fid, | |||
470 | loff_t origin = *offset; | 471 | loff_t origin = *offset; |
471 | unsigned long pg_start, pg_end; | 472 | unsigned long pg_start, pg_end; |
472 | 473 | ||
473 | P9_DPRINTK(P9_DEBUG_VFS, "data %p count %d offset %x\n", data, | 474 | p9_debug(P9_DEBUG_VFS, "data %p count %d offset %x\n", |
474 | (int)count, (int)*offset); | 475 | data, (int)count, (int)*offset); |
475 | 476 | ||
476 | clnt = fid->clnt; | 477 | clnt = fid->clnt; |
477 | do { | 478 | do { |
@@ -552,7 +553,7 @@ static int v9fs_file_fsync(struct file *filp, loff_t start, loff_t end, | |||
552 | return retval; | 553 | return retval; |
553 | 554 | ||
554 | mutex_lock(&inode->i_mutex); | 555 | mutex_lock(&inode->i_mutex); |
555 | P9_DPRINTK(P9_DEBUG_VFS, "filp %p datasync %x\n", filp, datasync); | 556 | p9_debug(P9_DEBUG_VFS, "filp %p datasync %x\n", filp, datasync); |
556 | 557 | ||
557 | fid = filp->private_data; | 558 | fid = filp->private_data; |
558 | v9fs_blank_wstat(&wstat); | 559 | v9fs_blank_wstat(&wstat); |
@@ -575,8 +576,7 @@ int v9fs_file_fsync_dotl(struct file *filp, loff_t start, loff_t end, | |||
575 | return retval; | 576 | return retval; |
576 | 577 | ||
577 | mutex_lock(&inode->i_mutex); | 578 | mutex_lock(&inode->i_mutex); |
578 | P9_DPRINTK(P9_DEBUG_VFS, "v9fs_file_fsync_dotl: filp %p datasync %x\n", | 579 | p9_debug(P9_DEBUG_VFS, "filp %p datasync %x\n", filp, datasync); |
579 | filp, datasync); | ||
580 | 580 | ||
581 | fid = filp->private_data; | 581 | fid = filp->private_data; |
582 | 582 | ||
@@ -607,8 +607,8 @@ v9fs_vm_page_mkwrite(struct vm_area_struct *vma, struct vm_fault *vmf) | |||
607 | struct inode *inode = filp->f_path.dentry->d_inode; | 607 | struct inode *inode = filp->f_path.dentry->d_inode; |
608 | 608 | ||
609 | 609 | ||
610 | P9_DPRINTK(P9_DEBUG_VFS, "page %p fid %lx\n", | 610 | p9_debug(P9_DEBUG_VFS, "page %p fid %lx\n", |
611 | page, (unsigned long)filp->private_data); | 611 | page, (unsigned long)filp->private_data); |
612 | 612 | ||
613 | v9inode = V9FS_I(inode); | 613 | v9inode = V9FS_I(inode); |
614 | /* make sure the cache has finished storing the page */ | 614 | /* make sure the cache has finished storing the page */ |
diff --git a/fs/9p/vfs_inode.c b/fs/9p/vfs_inode.c index e0f20de6aa2b..014c8dd62962 100644 --- a/fs/9p/vfs_inode.c +++ b/fs/9p/vfs_inode.c | |||
@@ -23,6 +23,8 @@ | |||
23 | * | 23 | * |
24 | */ | 24 | */ |
25 | 25 | ||
26 | #define pr_fmt(fmt) KBUILD_MODNAME ": " fmt | ||
27 | |||
26 | #include <linux/module.h> | 28 | #include <linux/module.h> |
27 | #include <linux/errno.h> | 29 | #include <linux/errno.h> |
28 | #include <linux/fs.h> | 30 | #include <linux/fs.h> |
@@ -88,6 +90,32 @@ static u32 unixmode2p9mode(struct v9fs_session_info *v9ses, umode_t mode) | |||
88 | } | 90 | } |
89 | 91 | ||
90 | /** | 92 | /** |
93 | * p9mode2perm- convert plan9 mode bits to unix permission bits | ||
94 | * @v9ses: v9fs session information | ||
95 | * @stat: p9_wstat from which mode need to be derived | ||
96 | * | ||
97 | */ | ||
98 | static int p9mode2perm(struct v9fs_session_info *v9ses, | ||
99 | struct p9_wstat *stat) | ||
100 | { | ||
101 | int res; | ||
102 | int mode = stat->mode; | ||
103 | |||
104 | res = mode & S_IALLUGO; | ||
105 | if (v9fs_proto_dotu(v9ses)) { | ||
106 | if ((mode & P9_DMSETUID) == P9_DMSETUID) | ||
107 | res |= S_ISUID; | ||
108 | |||
109 | if ((mode & P9_DMSETGID) == P9_DMSETGID) | ||
110 | res |= S_ISGID; | ||
111 | |||
112 | if ((mode & P9_DMSETVTX) == P9_DMSETVTX) | ||
113 | res |= S_ISVTX; | ||
114 | } | ||
115 | return res; | ||
116 | } | ||
117 | |||
118 | /** | ||
91 | * p9mode2unixmode- convert plan9 mode bits to unix mode bits | 119 | * p9mode2unixmode- convert plan9 mode bits to unix mode bits |
92 | * @v9ses: v9fs session information | 120 | * @v9ses: v9fs session information |
93 | * @stat: p9_wstat from which mode need to be derived | 121 | * @stat: p9_wstat from which mode need to be derived |
@@ -100,8 +128,8 @@ static umode_t p9mode2unixmode(struct v9fs_session_info *v9ses, | |||
100 | int res; | 128 | int res; |
101 | u32 mode = stat->mode; | 129 | u32 mode = stat->mode; |
102 | 130 | ||
103 | res = mode & S_IALLUGO; | ||
104 | *rdev = 0; | 131 | *rdev = 0; |
132 | res = p9mode2perm(v9ses, stat); | ||
105 | 133 | ||
106 | if ((mode & P9_DMDIR) == P9_DMDIR) | 134 | if ((mode & P9_DMDIR) == P9_DMDIR) |
107 | res |= S_IFDIR; | 135 | res |= S_IFDIR; |
@@ -128,24 +156,13 @@ static umode_t p9mode2unixmode(struct v9fs_session_info *v9ses, | |||
128 | res |= S_IFBLK; | 156 | res |= S_IFBLK; |
129 | break; | 157 | break; |
130 | default: | 158 | default: |
131 | P9_DPRINTK(P9_DEBUG_ERROR, | 159 | p9_debug(P9_DEBUG_ERROR, "Unknown special type %c %s\n", |
132 | "Unknown special type %c %s\n", type, | 160 | type, stat->extension); |
133 | stat->extension); | ||
134 | }; | 161 | }; |
135 | *rdev = MKDEV(major, minor); | 162 | *rdev = MKDEV(major, minor); |
136 | } else | 163 | } else |
137 | res |= S_IFREG; | 164 | res |= S_IFREG; |
138 | 165 | ||
139 | if (v9fs_proto_dotu(v9ses)) { | ||
140 | if ((mode & P9_DMSETUID) == P9_DMSETUID) | ||
141 | res |= S_ISUID; | ||
142 | |||
143 | if ((mode & P9_DMSETGID) == P9_DMSETGID) | ||
144 | res |= S_ISGID; | ||
145 | |||
146 | if ((mode & P9_DMSETVTX) == P9_DMSETVTX) | ||
147 | res |= S_ISVTX; | ||
148 | } | ||
149 | return res; | 166 | return res; |
150 | } | 167 | } |
151 | 168 | ||
@@ -275,8 +292,8 @@ int v9fs_init_inode(struct v9fs_session_info *v9ses, | |||
275 | } else if (v9fs_proto_dotu(v9ses)) { | 292 | } else if (v9fs_proto_dotu(v9ses)) { |
276 | inode->i_op = &v9fs_file_inode_operations; | 293 | inode->i_op = &v9fs_file_inode_operations; |
277 | } else { | 294 | } else { |
278 | P9_DPRINTK(P9_DEBUG_ERROR, | 295 | p9_debug(P9_DEBUG_ERROR, |
279 | "special files without extended mode\n"); | 296 | "special files without extended mode\n"); |
280 | err = -EINVAL; | 297 | err = -EINVAL; |
281 | goto error; | 298 | goto error; |
282 | } | 299 | } |
@@ -301,8 +318,8 @@ int v9fs_init_inode(struct v9fs_session_info *v9ses, | |||
301 | break; | 318 | break; |
302 | case S_IFLNK: | 319 | case S_IFLNK: |
303 | if (!v9fs_proto_dotu(v9ses) && !v9fs_proto_dotl(v9ses)) { | 320 | if (!v9fs_proto_dotu(v9ses) && !v9fs_proto_dotl(v9ses)) { |
304 | P9_DPRINTK(P9_DEBUG_ERROR, "extended modes used with " | 321 | p9_debug(P9_DEBUG_ERROR, |
305 | "legacy protocol.\n"); | 322 | "extended modes used with legacy protocol\n"); |
306 | err = -EINVAL; | 323 | err = -EINVAL; |
307 | goto error; | 324 | goto error; |
308 | } | 325 | } |
@@ -329,8 +346,8 @@ int v9fs_init_inode(struct v9fs_session_info *v9ses, | |||
329 | 346 | ||
330 | break; | 347 | break; |
331 | default: | 348 | default: |
332 | P9_DPRINTK(P9_DEBUG_ERROR, "BAD mode 0x%hx S_IFMT 0x%x\n", | 349 | p9_debug(P9_DEBUG_ERROR, "BAD mode 0x%hx S_IFMT 0x%x\n", |
333 | mode, mode & S_IFMT); | 350 | mode, mode & S_IFMT); |
334 | err = -EINVAL; | 351 | err = -EINVAL; |
335 | goto error; | 352 | goto error; |
336 | } | 353 | } |
@@ -352,11 +369,12 @@ struct inode *v9fs_get_inode(struct super_block *sb, umode_t mode, dev_t rdev) | |||
352 | struct inode *inode; | 369 | struct inode *inode; |
353 | struct v9fs_session_info *v9ses = sb->s_fs_info; | 370 | struct v9fs_session_info *v9ses = sb->s_fs_info; |
354 | 371 | ||
355 | P9_DPRINTK(P9_DEBUG_VFS, "super block: %p mode: %ho\n", sb, mode); | 372 | p9_debug(P9_DEBUG_VFS, "super block: %p mode: %ho\n", sb, mode); |
356 | 373 | ||
357 | inode = new_inode(sb); | 374 | inode = new_inode(sb); |
358 | if (!inode) { | 375 | if (!inode) { |
359 | P9_EPRINTK(KERN_WARNING, "Problem allocating inode\n"); | 376 | pr_warn("%s (%d): Problem allocating inode\n", |
377 | __func__, task_pid_nr(current)); | ||
360 | return ERR_PTR(-ENOMEM); | 378 | return ERR_PTR(-ENOMEM); |
361 | } | 379 | } |
362 | err = v9fs_init_inode(v9ses, inode, mode, rdev); | 380 | err = v9fs_init_inode(v9ses, inode, mode, rdev); |
@@ -573,15 +591,15 @@ static int v9fs_remove(struct inode *dir, struct dentry *dentry, int flags) | |||
573 | struct p9_fid *v9fid, *dfid; | 591 | struct p9_fid *v9fid, *dfid; |
574 | struct v9fs_session_info *v9ses; | 592 | struct v9fs_session_info *v9ses; |
575 | 593 | ||
576 | P9_DPRINTK(P9_DEBUG_VFS, "inode: %p dentry: %p rmdir: %x\n", | 594 | p9_debug(P9_DEBUG_VFS, "inode: %p dentry: %p rmdir: %x\n", |
577 | dir, dentry, flags); | 595 | dir, dentry, flags); |
578 | 596 | ||
579 | v9ses = v9fs_inode2v9ses(dir); | 597 | v9ses = v9fs_inode2v9ses(dir); |
580 | inode = dentry->d_inode; | 598 | inode = dentry->d_inode; |
581 | dfid = v9fs_fid_lookup(dentry->d_parent); | 599 | dfid = v9fs_fid_lookup(dentry->d_parent); |
582 | if (IS_ERR(dfid)) { | 600 | if (IS_ERR(dfid)) { |
583 | retval = PTR_ERR(dfid); | 601 | retval = PTR_ERR(dfid); |
584 | P9_DPRINTK(P9_DEBUG_VFS, "fid lookup failed %d\n", retval); | 602 | p9_debug(P9_DEBUG_VFS, "fid lookup failed %d\n", retval); |
585 | return retval; | 603 | return retval; |
586 | } | 604 | } |
587 | if (v9fs_proto_dotl(v9ses)) | 605 | if (v9fs_proto_dotl(v9ses)) |
@@ -630,7 +648,7 @@ v9fs_create(struct v9fs_session_info *v9ses, struct inode *dir, | |||
630 | struct p9_fid *dfid, *ofid, *fid; | 648 | struct p9_fid *dfid, *ofid, *fid; |
631 | struct inode *inode; | 649 | struct inode *inode; |
632 | 650 | ||
633 | P9_DPRINTK(P9_DEBUG_VFS, "name %s\n", dentry->d_name.name); | 651 | p9_debug(P9_DEBUG_VFS, "name %s\n", dentry->d_name.name); |
634 | 652 | ||
635 | err = 0; | 653 | err = 0; |
636 | ofid = NULL; | 654 | ofid = NULL; |
@@ -639,7 +657,7 @@ v9fs_create(struct v9fs_session_info *v9ses, struct inode *dir, | |||
639 | dfid = v9fs_fid_lookup(dentry->d_parent); | 657 | dfid = v9fs_fid_lookup(dentry->d_parent); |
640 | if (IS_ERR(dfid)) { | 658 | if (IS_ERR(dfid)) { |
641 | err = PTR_ERR(dfid); | 659 | err = PTR_ERR(dfid); |
642 | P9_DPRINTK(P9_DEBUG_VFS, "fid lookup failed %d\n", err); | 660 | p9_debug(P9_DEBUG_VFS, "fid lookup failed %d\n", err); |
643 | return ERR_PTR(err); | 661 | return ERR_PTR(err); |
644 | } | 662 | } |
645 | 663 | ||
@@ -647,36 +665,41 @@ v9fs_create(struct v9fs_session_info *v9ses, struct inode *dir, | |||
647 | ofid = p9_client_walk(dfid, 0, NULL, 1); | 665 | ofid = p9_client_walk(dfid, 0, NULL, 1); |
648 | if (IS_ERR(ofid)) { | 666 | if (IS_ERR(ofid)) { |
649 | err = PTR_ERR(ofid); | 667 | err = PTR_ERR(ofid); |
650 | P9_DPRINTK(P9_DEBUG_VFS, "p9_client_walk failed %d\n", err); | 668 | p9_debug(P9_DEBUG_VFS, "p9_client_walk failed %d\n", err); |
651 | return ERR_PTR(err); | 669 | return ERR_PTR(err); |
652 | } | 670 | } |
653 | 671 | ||
654 | err = p9_client_fcreate(ofid, name, perm, mode, extension); | 672 | err = p9_client_fcreate(ofid, name, perm, mode, extension); |
655 | if (err < 0) { | 673 | if (err < 0) { |
656 | P9_DPRINTK(P9_DEBUG_VFS, "p9_client_fcreate failed %d\n", err); | 674 | p9_debug(P9_DEBUG_VFS, "p9_client_fcreate failed %d\n", err); |
657 | goto error; | ||
658 | } | ||
659 | |||
660 | /* now walk from the parent so we can get unopened fid */ | ||
661 | fid = p9_client_walk(dfid, 1, &name, 1); | ||
662 | if (IS_ERR(fid)) { | ||
663 | err = PTR_ERR(fid); | ||
664 | P9_DPRINTK(P9_DEBUG_VFS, "p9_client_walk failed %d\n", err); | ||
665 | fid = NULL; | ||
666 | goto error; | 675 | goto error; |
667 | } | 676 | } |
668 | 677 | ||
669 | /* instantiate inode and assign the unopened fid to the dentry */ | 678 | if (!(perm & P9_DMLINK)) { |
670 | inode = v9fs_get_new_inode_from_fid(v9ses, fid, dir->i_sb); | 679 | /* now walk from the parent so we can get unopened fid */ |
671 | if (IS_ERR(inode)) { | 680 | fid = p9_client_walk(dfid, 1, &name, 1); |
672 | err = PTR_ERR(inode); | 681 | if (IS_ERR(fid)) { |
673 | P9_DPRINTK(P9_DEBUG_VFS, "inode creation failed %d\n", err); | 682 | err = PTR_ERR(fid); |
674 | goto error; | 683 | p9_debug(P9_DEBUG_VFS, |
684 | "p9_client_walk failed %d\n", err); | ||
685 | fid = NULL; | ||
686 | goto error; | ||
687 | } | ||
688 | /* | ||
689 | * instantiate inode and assign the unopened fid to the dentry | ||
690 | */ | ||
691 | inode = v9fs_get_new_inode_from_fid(v9ses, fid, dir->i_sb); | ||
692 | if (IS_ERR(inode)) { | ||
693 | err = PTR_ERR(inode); | ||
694 | p9_debug(P9_DEBUG_VFS, | ||
695 | "inode creation failed %d\n", err); | ||
696 | goto error; | ||
697 | } | ||
698 | err = v9fs_fid_add(dentry, fid); | ||
699 | if (err < 0) | ||
700 | goto error; | ||
701 | d_instantiate(dentry, inode); | ||
675 | } | 702 | } |
676 | err = v9fs_fid_add(dentry, fid); | ||
677 | if (err < 0) | ||
678 | goto error; | ||
679 | d_instantiate(dentry, inode); | ||
680 | return ofid; | 703 | return ofid; |
681 | error: | 704 | error: |
682 | if (ofid) | 705 | if (ofid) |
@@ -788,7 +811,7 @@ static int v9fs_vfs_mkdir(struct inode *dir, struct dentry *dentry, umode_t mode | |||
788 | struct p9_fid *fid; | 811 | struct p9_fid *fid; |
789 | struct v9fs_session_info *v9ses; | 812 | struct v9fs_session_info *v9ses; |
790 | 813 | ||
791 | P9_DPRINTK(P9_DEBUG_VFS, "name %s\n", dentry->d_name.name); | 814 | p9_debug(P9_DEBUG_VFS, "name %s\n", dentry->d_name.name); |
792 | err = 0; | 815 | err = 0; |
793 | v9ses = v9fs_inode2v9ses(dir); | 816 | v9ses = v9fs_inode2v9ses(dir); |
794 | perm = unixmode2p9mode(v9ses, mode | S_IFDIR); | 817 | perm = unixmode2p9mode(v9ses, mode | S_IFDIR); |
@@ -826,8 +849,8 @@ struct dentry *v9fs_vfs_lookup(struct inode *dir, struct dentry *dentry, | |||
826 | char *name; | 849 | char *name; |
827 | int result = 0; | 850 | int result = 0; |
828 | 851 | ||
829 | P9_DPRINTK(P9_DEBUG_VFS, "dir: %p dentry: (%s) %p nameidata: %p\n", | 852 | p9_debug(P9_DEBUG_VFS, "dir: %p dentry: (%s) %p nameidata: %p\n", |
830 | dir, dentry->d_name.name, dentry, nameidata); | 853 | dir, dentry->d_name.name, dentry, nameidata); |
831 | 854 | ||
832 | if (dentry->d_name.len > NAME_MAX) | 855 | if (dentry->d_name.len > NAME_MAX) |
833 | return ERR_PTR(-ENAMETOOLONG); | 856 | return ERR_PTR(-ENAMETOOLONG); |
@@ -933,7 +956,7 @@ v9fs_vfs_rename(struct inode *old_dir, struct dentry *old_dentry, | |||
933 | struct p9_fid *newdirfid; | 956 | struct p9_fid *newdirfid; |
934 | struct p9_wstat wstat; | 957 | struct p9_wstat wstat; |
935 | 958 | ||
936 | P9_DPRINTK(P9_DEBUG_VFS, "\n"); | 959 | p9_debug(P9_DEBUG_VFS, "\n"); |
937 | retval = 0; | 960 | retval = 0; |
938 | old_inode = old_dentry->d_inode; | 961 | old_inode = old_dentry->d_inode; |
939 | new_inode = new_dentry->d_inode; | 962 | new_inode = new_dentry->d_inode; |
@@ -969,8 +992,7 @@ v9fs_vfs_rename(struct inode *old_dir, struct dentry *old_dentry, | |||
969 | * 9P .u can only handle file rename in the same directory | 992 | * 9P .u can only handle file rename in the same directory |
970 | */ | 993 | */ |
971 | 994 | ||
972 | P9_DPRINTK(P9_DEBUG_ERROR, | 995 | p9_debug(P9_DEBUG_ERROR, "old dir and new dir are different\n"); |
973 | "old dir and new dir are different\n"); | ||
974 | retval = -EXDEV; | 996 | retval = -EXDEV; |
975 | goto clunk_newdir; | 997 | goto clunk_newdir; |
976 | } | 998 | } |
@@ -1026,7 +1048,7 @@ v9fs_vfs_getattr(struct vfsmount *mnt, struct dentry *dentry, | |||
1026 | struct p9_fid *fid; | 1048 | struct p9_fid *fid; |
1027 | struct p9_wstat *st; | 1049 | struct p9_wstat *st; |
1028 | 1050 | ||
1029 | P9_DPRINTK(P9_DEBUG_VFS, "dentry: %p\n", dentry); | 1051 | p9_debug(P9_DEBUG_VFS, "dentry: %p\n", dentry); |
1030 | err = -EPERM; | 1052 | err = -EPERM; |
1031 | v9ses = v9fs_dentry2v9ses(dentry); | 1053 | v9ses = v9fs_dentry2v9ses(dentry); |
1032 | if (v9ses->cache == CACHE_LOOSE || v9ses->cache == CACHE_FSCACHE) { | 1054 | if (v9ses->cache == CACHE_LOOSE || v9ses->cache == CACHE_FSCACHE) { |
@@ -1063,7 +1085,7 @@ static int v9fs_vfs_setattr(struct dentry *dentry, struct iattr *iattr) | |||
1063 | struct p9_fid *fid; | 1085 | struct p9_fid *fid; |
1064 | struct p9_wstat wstat; | 1086 | struct p9_wstat wstat; |
1065 | 1087 | ||
1066 | P9_DPRINTK(P9_DEBUG_VFS, "\n"); | 1088 | p9_debug(P9_DEBUG_VFS, "\n"); |
1067 | retval = inode_change_ok(dentry->d_inode, iattr); | 1089 | retval = inode_change_ok(dentry->d_inode, iattr); |
1068 | if (retval) | 1090 | if (retval) |
1069 | return retval; | 1091 | return retval; |
@@ -1162,7 +1184,7 @@ v9fs_stat2inode(struct p9_wstat *stat, struct inode *inode, | |||
1162 | set_nlink(inode, i_nlink); | 1184 | set_nlink(inode, i_nlink); |
1163 | } | 1185 | } |
1164 | } | 1186 | } |
1165 | mode = stat->mode & S_IALLUGO; | 1187 | mode = p9mode2perm(v9ses, stat); |
1166 | mode |= inode->i_mode & ~S_IALLUGO; | 1188 | mode |= inode->i_mode & ~S_IALLUGO; |
1167 | inode->i_mode = mode; | 1189 | inode->i_mode = mode; |
1168 | i_size_write(inode, stat->length); | 1190 | i_size_write(inode, stat->length); |
@@ -1208,7 +1230,7 @@ static int v9fs_readlink(struct dentry *dentry, char *buffer, int buflen) | |||
1208 | struct p9_fid *fid; | 1230 | struct p9_fid *fid; |
1209 | struct p9_wstat *st; | 1231 | struct p9_wstat *st; |
1210 | 1232 | ||
1211 | P9_DPRINTK(P9_DEBUG_VFS, " %s\n", dentry->d_name.name); | 1233 | p9_debug(P9_DEBUG_VFS, " %s\n", dentry->d_name.name); |
1212 | retval = -EPERM; | 1234 | retval = -EPERM; |
1213 | v9ses = v9fs_dentry2v9ses(dentry); | 1235 | v9ses = v9fs_dentry2v9ses(dentry); |
1214 | fid = v9fs_fid_lookup(dentry); | 1236 | fid = v9fs_fid_lookup(dentry); |
@@ -1230,8 +1252,8 @@ static int v9fs_readlink(struct dentry *dentry, char *buffer, int buflen) | |||
1230 | /* copy extension buffer into buffer */ | 1252 | /* copy extension buffer into buffer */ |
1231 | strncpy(buffer, st->extension, buflen); | 1253 | strncpy(buffer, st->extension, buflen); |
1232 | 1254 | ||
1233 | P9_DPRINTK(P9_DEBUG_VFS, | 1255 | p9_debug(P9_DEBUG_VFS, "%s -> %s (%s)\n", |
1234 | "%s -> %s (%s)\n", dentry->d_name.name, st->extension, buffer); | 1256 | dentry->d_name.name, st->extension, buffer); |
1235 | 1257 | ||
1236 | retval = strnlen(buffer, buflen); | 1258 | retval = strnlen(buffer, buflen); |
1237 | done: | 1259 | done: |
@@ -1252,7 +1274,7 @@ static void *v9fs_vfs_follow_link(struct dentry *dentry, struct nameidata *nd) | |||
1252 | int len = 0; | 1274 | int len = 0; |
1253 | char *link = __getname(); | 1275 | char *link = __getname(); |
1254 | 1276 | ||
1255 | P9_DPRINTK(P9_DEBUG_VFS, "%s n", dentry->d_name.name); | 1277 | p9_debug(P9_DEBUG_VFS, "%s\n", dentry->d_name.name); |
1256 | 1278 | ||
1257 | if (!link) | 1279 | if (!link) |
1258 | link = ERR_PTR(-ENOMEM); | 1280 | link = ERR_PTR(-ENOMEM); |
@@ -1283,8 +1305,8 @@ v9fs_vfs_put_link(struct dentry *dentry, struct nameidata *nd, void *p) | |||
1283 | { | 1305 | { |
1284 | char *s = nd_get_link(nd); | 1306 | char *s = nd_get_link(nd); |
1285 | 1307 | ||
1286 | P9_DPRINTK(P9_DEBUG_VFS, " %s %s\n", dentry->d_name.name, | 1308 | p9_debug(P9_DEBUG_VFS, " %s %s\n", |
1287 | IS_ERR(s) ? "<error>" : s); | 1309 | dentry->d_name.name, IS_ERR(s) ? "<error>" : s); |
1288 | if (!IS_ERR(s)) | 1310 | if (!IS_ERR(s)) |
1289 | __putname(s); | 1311 | __putname(s); |
1290 | } | 1312 | } |
@@ -1306,7 +1328,7 @@ static int v9fs_vfs_mkspecial(struct inode *dir, struct dentry *dentry, | |||
1306 | 1328 | ||
1307 | v9ses = v9fs_inode2v9ses(dir); | 1329 | v9ses = v9fs_inode2v9ses(dir); |
1308 | if (!v9fs_proto_dotu(v9ses)) { | 1330 | if (!v9fs_proto_dotu(v9ses)) { |
1309 | P9_DPRINTK(P9_DEBUG_ERROR, "not extended\n"); | 1331 | p9_debug(P9_DEBUG_ERROR, "not extended\n"); |
1310 | return -EPERM; | 1332 | return -EPERM; |
1311 | } | 1333 | } |
1312 | 1334 | ||
@@ -1333,8 +1355,8 @@ static int v9fs_vfs_mkspecial(struct inode *dir, struct dentry *dentry, | |||
1333 | static int | 1355 | static int |
1334 | v9fs_vfs_symlink(struct inode *dir, struct dentry *dentry, const char *symname) | 1356 | v9fs_vfs_symlink(struct inode *dir, struct dentry *dentry, const char *symname) |
1335 | { | 1357 | { |
1336 | P9_DPRINTK(P9_DEBUG_VFS, " %lu,%s,%s\n", dir->i_ino, | 1358 | p9_debug(P9_DEBUG_VFS, " %lu,%s,%s\n", |
1337 | dentry->d_name.name, symname); | 1359 | dir->i_ino, dentry->d_name.name, symname); |
1338 | 1360 | ||
1339 | return v9fs_vfs_mkspecial(dir, dentry, P9_DMSYMLINK, symname); | 1361 | return v9fs_vfs_mkspecial(dir, dentry, P9_DMSYMLINK, symname); |
1340 | } | 1362 | } |
@@ -1355,9 +1377,8 @@ v9fs_vfs_link(struct dentry *old_dentry, struct inode *dir, | |||
1355 | char *name; | 1377 | char *name; |
1356 | struct p9_fid *oldfid; | 1378 | struct p9_fid *oldfid; |
1357 | 1379 | ||
1358 | P9_DPRINTK(P9_DEBUG_VFS, | 1380 | p9_debug(P9_DEBUG_VFS, " %lu,%s,%s\n", |
1359 | " %lu,%s,%s\n", dir->i_ino, dentry->d_name.name, | 1381 | dir->i_ino, dentry->d_name.name, old_dentry->d_name.name); |
1360 | old_dentry->d_name.name); | ||
1361 | 1382 | ||
1362 | oldfid = v9fs_fid_clone(old_dentry); | 1383 | oldfid = v9fs_fid_clone(old_dentry); |
1363 | if (IS_ERR(oldfid)) | 1384 | if (IS_ERR(oldfid)) |
@@ -1398,9 +1419,9 @@ v9fs_vfs_mknod(struct inode *dir, struct dentry *dentry, umode_t mode, dev_t rde | |||
1398 | char *name; | 1419 | char *name; |
1399 | u32 perm; | 1420 | u32 perm; |
1400 | 1421 | ||
1401 | P9_DPRINTK(P9_DEBUG_VFS, | 1422 | p9_debug(P9_DEBUG_VFS, " %lu,%s mode: %hx MAJOR: %u MINOR: %u\n", |
1402 | " %lu,%s mode: %hx MAJOR: %u MINOR: %u\n", dir->i_ino, | 1423 | dir->i_ino, dentry->d_name.name, mode, |
1403 | dentry->d_name.name, mode, MAJOR(rdev), MINOR(rdev)); | 1424 | MAJOR(rdev), MINOR(rdev)); |
1404 | 1425 | ||
1405 | if (!new_valid_dev(rdev)) | 1426 | if (!new_valid_dev(rdev)) |
1406 | return -EINVAL; | 1427 | return -EINVAL; |
diff --git a/fs/9p/vfs_inode_dotl.c b/fs/9p/vfs_inode_dotl.c index 8ef152ac6a16..a1e6c990cd41 100644 --- a/fs/9p/vfs_inode_dotl.c +++ b/fs/9p/vfs_inode_dotl.c | |||
@@ -283,13 +283,13 @@ v9fs_vfs_create_dotl(struct inode *dir, struct dentry *dentry, umode_t omode, | |||
283 | } | 283 | } |
284 | 284 | ||
285 | name = (char *) dentry->d_name.name; | 285 | name = (char *) dentry->d_name.name; |
286 | P9_DPRINTK(P9_DEBUG_VFS, "v9fs_vfs_create_dotl: name:%s flags:0x%x " | 286 | p9_debug(P9_DEBUG_VFS, "name:%s flags:0x%x mode:0x%hx\n", |
287 | "mode:0x%hx\n", name, flags, omode); | 287 | name, flags, omode); |
288 | 288 | ||
289 | dfid = v9fs_fid_lookup(dentry->d_parent); | 289 | dfid = v9fs_fid_lookup(dentry->d_parent); |
290 | if (IS_ERR(dfid)) { | 290 | if (IS_ERR(dfid)) { |
291 | err = PTR_ERR(dfid); | 291 | err = PTR_ERR(dfid); |
292 | P9_DPRINTK(P9_DEBUG_VFS, "fid lookup failed %d\n", err); | 292 | p9_debug(P9_DEBUG_VFS, "fid lookup failed %d\n", err); |
293 | return err; | 293 | return err; |
294 | } | 294 | } |
295 | 295 | ||
@@ -297,7 +297,7 @@ v9fs_vfs_create_dotl(struct inode *dir, struct dentry *dentry, umode_t omode, | |||
297 | ofid = p9_client_walk(dfid, 0, NULL, 1); | 297 | ofid = p9_client_walk(dfid, 0, NULL, 1); |
298 | if (IS_ERR(ofid)) { | 298 | if (IS_ERR(ofid)) { |
299 | err = PTR_ERR(ofid); | 299 | err = PTR_ERR(ofid); |
300 | P9_DPRINTK(P9_DEBUG_VFS, "p9_client_walk failed %d\n", err); | 300 | p9_debug(P9_DEBUG_VFS, "p9_client_walk failed %d\n", err); |
301 | return err; | 301 | return err; |
302 | } | 302 | } |
303 | 303 | ||
@@ -307,16 +307,15 @@ v9fs_vfs_create_dotl(struct inode *dir, struct dentry *dentry, umode_t omode, | |||
307 | /* Update mode based on ACL value */ | 307 | /* Update mode based on ACL value */ |
308 | err = v9fs_acl_mode(dir, &mode, &dacl, &pacl); | 308 | err = v9fs_acl_mode(dir, &mode, &dacl, &pacl); |
309 | if (err) { | 309 | if (err) { |
310 | P9_DPRINTK(P9_DEBUG_VFS, | 310 | p9_debug(P9_DEBUG_VFS, "Failed to get acl values in creat %d\n", |
311 | "Failed to get acl values in creat %d\n", err); | 311 | err); |
312 | goto error; | 312 | goto error; |
313 | } | 313 | } |
314 | err = p9_client_create_dotl(ofid, name, v9fs_open_to_dotl_flags(flags), | 314 | err = p9_client_create_dotl(ofid, name, v9fs_open_to_dotl_flags(flags), |
315 | mode, gid, &qid); | 315 | mode, gid, &qid); |
316 | if (err < 0) { | 316 | if (err < 0) { |
317 | P9_DPRINTK(P9_DEBUG_VFS, | 317 | p9_debug(P9_DEBUG_VFS, "p9_client_open_dotl failed in creat %d\n", |
318 | "p9_client_open_dotl failed in creat %d\n", | 318 | err); |
319 | err); | ||
320 | goto error; | 319 | goto error; |
321 | } | 320 | } |
322 | v9fs_invalidate_inode_attr(dir); | 321 | v9fs_invalidate_inode_attr(dir); |
@@ -325,14 +324,14 @@ v9fs_vfs_create_dotl(struct inode *dir, struct dentry *dentry, umode_t omode, | |||
325 | fid = p9_client_walk(dfid, 1, &name, 1); | 324 | fid = p9_client_walk(dfid, 1, &name, 1); |
326 | if (IS_ERR(fid)) { | 325 | if (IS_ERR(fid)) { |
327 | err = PTR_ERR(fid); | 326 | err = PTR_ERR(fid); |
328 | P9_DPRINTK(P9_DEBUG_VFS, "p9_client_walk failed %d\n", err); | 327 | p9_debug(P9_DEBUG_VFS, "p9_client_walk failed %d\n", err); |
329 | fid = NULL; | 328 | fid = NULL; |
330 | goto error; | 329 | goto error; |
331 | } | 330 | } |
332 | inode = v9fs_get_new_inode_from_fid(v9ses, fid, dir->i_sb); | 331 | inode = v9fs_get_new_inode_from_fid(v9ses, fid, dir->i_sb); |
333 | if (IS_ERR(inode)) { | 332 | if (IS_ERR(inode)) { |
334 | err = PTR_ERR(inode); | 333 | err = PTR_ERR(inode); |
335 | P9_DPRINTK(P9_DEBUG_VFS, "inode creation failed %d\n", err); | 334 | p9_debug(P9_DEBUG_VFS, "inode creation failed %d\n", err); |
336 | goto error; | 335 | goto error; |
337 | } | 336 | } |
338 | err = v9fs_fid_add(dentry, fid); | 337 | err = v9fs_fid_add(dentry, fid); |
@@ -408,7 +407,7 @@ static int v9fs_vfs_mkdir_dotl(struct inode *dir, | |||
408 | struct dentry *dir_dentry; | 407 | struct dentry *dir_dentry; |
409 | struct posix_acl *dacl = NULL, *pacl = NULL; | 408 | struct posix_acl *dacl = NULL, *pacl = NULL; |
410 | 409 | ||
411 | P9_DPRINTK(P9_DEBUG_VFS, "name %s\n", dentry->d_name.name); | 410 | p9_debug(P9_DEBUG_VFS, "name %s\n", dentry->d_name.name); |
412 | err = 0; | 411 | err = 0; |
413 | v9ses = v9fs_inode2v9ses(dir); | 412 | v9ses = v9fs_inode2v9ses(dir); |
414 | 413 | ||
@@ -420,7 +419,7 @@ static int v9fs_vfs_mkdir_dotl(struct inode *dir, | |||
420 | dfid = v9fs_fid_lookup(dir_dentry); | 419 | dfid = v9fs_fid_lookup(dir_dentry); |
421 | if (IS_ERR(dfid)) { | 420 | if (IS_ERR(dfid)) { |
422 | err = PTR_ERR(dfid); | 421 | err = PTR_ERR(dfid); |
423 | P9_DPRINTK(P9_DEBUG_VFS, "fid lookup failed %d\n", err); | 422 | p9_debug(P9_DEBUG_VFS, "fid lookup failed %d\n", err); |
424 | dfid = NULL; | 423 | dfid = NULL; |
425 | goto error; | 424 | goto error; |
426 | } | 425 | } |
@@ -430,8 +429,8 @@ static int v9fs_vfs_mkdir_dotl(struct inode *dir, | |||
430 | /* Update mode based on ACL value */ | 429 | /* Update mode based on ACL value */ |
431 | err = v9fs_acl_mode(dir, &mode, &dacl, &pacl); | 430 | err = v9fs_acl_mode(dir, &mode, &dacl, &pacl); |
432 | if (err) { | 431 | if (err) { |
433 | P9_DPRINTK(P9_DEBUG_VFS, | 432 | p9_debug(P9_DEBUG_VFS, "Failed to get acl values in mkdir %d\n", |
434 | "Failed to get acl values in mkdir %d\n", err); | 433 | err); |
435 | goto error; | 434 | goto error; |
436 | } | 435 | } |
437 | name = (char *) dentry->d_name.name; | 436 | name = (char *) dentry->d_name.name; |
@@ -444,8 +443,8 @@ static int v9fs_vfs_mkdir_dotl(struct inode *dir, | |||
444 | fid = p9_client_walk(dfid, 1, &name, 1); | 443 | fid = p9_client_walk(dfid, 1, &name, 1); |
445 | if (IS_ERR(fid)) { | 444 | if (IS_ERR(fid)) { |
446 | err = PTR_ERR(fid); | 445 | err = PTR_ERR(fid); |
447 | P9_DPRINTK(P9_DEBUG_VFS, "p9_client_walk failed %d\n", | 446 | p9_debug(P9_DEBUG_VFS, "p9_client_walk failed %d\n", |
448 | err); | 447 | err); |
449 | fid = NULL; | 448 | fid = NULL; |
450 | goto error; | 449 | goto error; |
451 | } | 450 | } |
@@ -453,8 +452,8 @@ static int v9fs_vfs_mkdir_dotl(struct inode *dir, | |||
453 | inode = v9fs_get_new_inode_from_fid(v9ses, fid, dir->i_sb); | 452 | inode = v9fs_get_new_inode_from_fid(v9ses, fid, dir->i_sb); |
454 | if (IS_ERR(inode)) { | 453 | if (IS_ERR(inode)) { |
455 | err = PTR_ERR(inode); | 454 | err = PTR_ERR(inode); |
456 | P9_DPRINTK(P9_DEBUG_VFS, "inode creation failed %d\n", | 455 | p9_debug(P9_DEBUG_VFS, "inode creation failed %d\n", |
457 | err); | 456 | err); |
458 | goto error; | 457 | goto error; |
459 | } | 458 | } |
460 | err = v9fs_fid_add(dentry, fid); | 459 | err = v9fs_fid_add(dentry, fid); |
@@ -495,7 +494,7 @@ v9fs_vfs_getattr_dotl(struct vfsmount *mnt, struct dentry *dentry, | |||
495 | struct p9_fid *fid; | 494 | struct p9_fid *fid; |
496 | struct p9_stat_dotl *st; | 495 | struct p9_stat_dotl *st; |
497 | 496 | ||
498 | P9_DPRINTK(P9_DEBUG_VFS, "dentry: %p\n", dentry); | 497 | p9_debug(P9_DEBUG_VFS, "dentry: %p\n", dentry); |
499 | err = -EPERM; | 498 | err = -EPERM; |
500 | v9ses = v9fs_dentry2v9ses(dentry); | 499 | v9ses = v9fs_dentry2v9ses(dentry); |
501 | if (v9ses->cache == CACHE_LOOSE || v9ses->cache == CACHE_FSCACHE) { | 500 | if (v9ses->cache == CACHE_LOOSE || v9ses->cache == CACHE_FSCACHE) { |
@@ -523,6 +522,46 @@ v9fs_vfs_getattr_dotl(struct vfsmount *mnt, struct dentry *dentry, | |||
523 | return 0; | 522 | return 0; |
524 | } | 523 | } |
525 | 524 | ||
525 | /* | ||
526 | * Attribute flags. | ||
527 | */ | ||
528 | #define P9_ATTR_MODE (1 << 0) | ||
529 | #define P9_ATTR_UID (1 << 1) | ||
530 | #define P9_ATTR_GID (1 << 2) | ||
531 | #define P9_ATTR_SIZE (1 << 3) | ||
532 | #define P9_ATTR_ATIME (1 << 4) | ||
533 | #define P9_ATTR_MTIME (1 << 5) | ||
534 | #define P9_ATTR_CTIME (1 << 6) | ||
535 | #define P9_ATTR_ATIME_SET (1 << 7) | ||
536 | #define P9_ATTR_MTIME_SET (1 << 8) | ||
537 | |||
538 | struct dotl_iattr_map { | ||
539 | int iattr_valid; | ||
540 | int p9_iattr_valid; | ||
541 | }; | ||
542 | |||
543 | static int v9fs_mapped_iattr_valid(int iattr_valid) | ||
544 | { | ||
545 | int i; | ||
546 | int p9_iattr_valid = 0; | ||
547 | struct dotl_iattr_map dotl_iattr_map[] = { | ||
548 | { ATTR_MODE, P9_ATTR_MODE }, | ||
549 | { ATTR_UID, P9_ATTR_UID }, | ||
550 | { ATTR_GID, P9_ATTR_GID }, | ||
551 | { ATTR_SIZE, P9_ATTR_SIZE }, | ||
552 | { ATTR_ATIME, P9_ATTR_ATIME }, | ||
553 | { ATTR_MTIME, P9_ATTR_MTIME }, | ||
554 | { ATTR_CTIME, P9_ATTR_CTIME }, | ||
555 | { ATTR_ATIME_SET, P9_ATTR_ATIME_SET }, | ||
556 | { ATTR_MTIME_SET, P9_ATTR_MTIME_SET }, | ||
557 | }; | ||
558 | for (i = 0; i < ARRAY_SIZE(dotl_iattr_map); i++) { | ||
559 | if (iattr_valid & dotl_iattr_map[i].iattr_valid) | ||
560 | p9_iattr_valid |= dotl_iattr_map[i].p9_iattr_valid; | ||
561 | } | ||
562 | return p9_iattr_valid; | ||
563 | } | ||
564 | |||
526 | /** | 565 | /** |
527 | * v9fs_vfs_setattr_dotl - set file metadata | 566 | * v9fs_vfs_setattr_dotl - set file metadata |
528 | * @dentry: file whose metadata to set | 567 | * @dentry: file whose metadata to set |
@@ -537,13 +576,13 @@ int v9fs_vfs_setattr_dotl(struct dentry *dentry, struct iattr *iattr) | |||
537 | struct p9_fid *fid; | 576 | struct p9_fid *fid; |
538 | struct p9_iattr_dotl p9attr; | 577 | struct p9_iattr_dotl p9attr; |
539 | 578 | ||
540 | P9_DPRINTK(P9_DEBUG_VFS, "\n"); | 579 | p9_debug(P9_DEBUG_VFS, "\n"); |
541 | 580 | ||
542 | retval = inode_change_ok(dentry->d_inode, iattr); | 581 | retval = inode_change_ok(dentry->d_inode, iattr); |
543 | if (retval) | 582 | if (retval) |
544 | return retval; | 583 | return retval; |
545 | 584 | ||
546 | p9attr.valid = iattr->ia_valid; | 585 | p9attr.valid = v9fs_mapped_iattr_valid(iattr->ia_valid); |
547 | p9attr.mode = iattr->ia_mode; | 586 | p9attr.mode = iattr->ia_mode; |
548 | p9attr.uid = iattr->ia_uid; | 587 | p9attr.uid = iattr->ia_uid; |
549 | p9attr.gid = iattr->ia_gid; | 588 | p9attr.gid = iattr->ia_gid; |
@@ -670,14 +709,13 @@ v9fs_vfs_symlink_dotl(struct inode *dir, struct dentry *dentry, | |||
670 | struct v9fs_session_info *v9ses; | 709 | struct v9fs_session_info *v9ses; |
671 | 710 | ||
672 | name = (char *) dentry->d_name.name; | 711 | name = (char *) dentry->d_name.name; |
673 | P9_DPRINTK(P9_DEBUG_VFS, "v9fs_vfs_symlink_dotl : %lu,%s,%s\n", | 712 | p9_debug(P9_DEBUG_VFS, "%lu,%s,%s\n", dir->i_ino, name, symname); |
674 | dir->i_ino, name, symname); | ||
675 | v9ses = v9fs_inode2v9ses(dir); | 713 | v9ses = v9fs_inode2v9ses(dir); |
676 | 714 | ||
677 | dfid = v9fs_fid_lookup(dentry->d_parent); | 715 | dfid = v9fs_fid_lookup(dentry->d_parent); |
678 | if (IS_ERR(dfid)) { | 716 | if (IS_ERR(dfid)) { |
679 | err = PTR_ERR(dfid); | 717 | err = PTR_ERR(dfid); |
680 | P9_DPRINTK(P9_DEBUG_VFS, "fid lookup failed %d\n", err); | 718 | p9_debug(P9_DEBUG_VFS, "fid lookup failed %d\n", err); |
681 | return err; | 719 | return err; |
682 | } | 720 | } |
683 | 721 | ||
@@ -687,7 +725,7 @@ v9fs_vfs_symlink_dotl(struct inode *dir, struct dentry *dentry, | |||
687 | err = p9_client_symlink(dfid, name, (char *)symname, gid, &qid); | 725 | err = p9_client_symlink(dfid, name, (char *)symname, gid, &qid); |
688 | 726 | ||
689 | if (err < 0) { | 727 | if (err < 0) { |
690 | P9_DPRINTK(P9_DEBUG_VFS, "p9_client_symlink failed %d\n", err); | 728 | p9_debug(P9_DEBUG_VFS, "p9_client_symlink failed %d\n", err); |
691 | goto error; | 729 | goto error; |
692 | } | 730 | } |
693 | 731 | ||
@@ -697,8 +735,8 @@ v9fs_vfs_symlink_dotl(struct inode *dir, struct dentry *dentry, | |||
697 | fid = p9_client_walk(dfid, 1, &name, 1); | 735 | fid = p9_client_walk(dfid, 1, &name, 1); |
698 | if (IS_ERR(fid)) { | 736 | if (IS_ERR(fid)) { |
699 | err = PTR_ERR(fid); | 737 | err = PTR_ERR(fid); |
700 | P9_DPRINTK(P9_DEBUG_VFS, "p9_client_walk failed %d\n", | 738 | p9_debug(P9_DEBUG_VFS, "p9_client_walk failed %d\n", |
701 | err); | 739 | err); |
702 | fid = NULL; | 740 | fid = NULL; |
703 | goto error; | 741 | goto error; |
704 | } | 742 | } |
@@ -707,8 +745,8 @@ v9fs_vfs_symlink_dotl(struct inode *dir, struct dentry *dentry, | |||
707 | inode = v9fs_get_new_inode_from_fid(v9ses, fid, dir->i_sb); | 745 | inode = v9fs_get_new_inode_from_fid(v9ses, fid, dir->i_sb); |
708 | if (IS_ERR(inode)) { | 746 | if (IS_ERR(inode)) { |
709 | err = PTR_ERR(inode); | 747 | err = PTR_ERR(inode); |
710 | P9_DPRINTK(P9_DEBUG_VFS, "inode creation failed %d\n", | 748 | p9_debug(P9_DEBUG_VFS, "inode creation failed %d\n", |
711 | err); | 749 | err); |
712 | goto error; | 750 | goto error; |
713 | } | 751 | } |
714 | err = v9fs_fid_add(dentry, fid); | 752 | err = v9fs_fid_add(dentry, fid); |
@@ -751,9 +789,8 @@ v9fs_vfs_link_dotl(struct dentry *old_dentry, struct inode *dir, | |||
751 | struct p9_fid *dfid, *oldfid; | 789 | struct p9_fid *dfid, *oldfid; |
752 | struct v9fs_session_info *v9ses; | 790 | struct v9fs_session_info *v9ses; |
753 | 791 | ||
754 | P9_DPRINTK(P9_DEBUG_VFS, "dir ino: %lu, old_name: %s, new_name: %s\n", | 792 | p9_debug(P9_DEBUG_VFS, "dir ino: %lu, old_name: %s, new_name: %s\n", |
755 | dir->i_ino, old_dentry->d_name.name, | 793 | dir->i_ino, old_dentry->d_name.name, dentry->d_name.name); |
756 | dentry->d_name.name); | ||
757 | 794 | ||
758 | v9ses = v9fs_inode2v9ses(dir); | 795 | v9ses = v9fs_inode2v9ses(dir); |
759 | dir_dentry = v9fs_dentry_from_dir_inode(dir); | 796 | dir_dentry = v9fs_dentry_from_dir_inode(dir); |
@@ -770,7 +807,7 @@ v9fs_vfs_link_dotl(struct dentry *old_dentry, struct inode *dir, | |||
770 | err = p9_client_link(dfid, oldfid, (char *)dentry->d_name.name); | 807 | err = p9_client_link(dfid, oldfid, (char *)dentry->d_name.name); |
771 | 808 | ||
772 | if (err < 0) { | 809 | if (err < 0) { |
773 | P9_DPRINTK(P9_DEBUG_VFS, "p9_client_link failed %d\n", err); | 810 | p9_debug(P9_DEBUG_VFS, "p9_client_link failed %d\n", err); |
774 | return err; | 811 | return err; |
775 | } | 812 | } |
776 | 813 | ||
@@ -813,9 +850,9 @@ v9fs_vfs_mknod_dotl(struct inode *dir, struct dentry *dentry, umode_t omode, | |||
813 | struct dentry *dir_dentry; | 850 | struct dentry *dir_dentry; |
814 | struct posix_acl *dacl = NULL, *pacl = NULL; | 851 | struct posix_acl *dacl = NULL, *pacl = NULL; |
815 | 852 | ||
816 | P9_DPRINTK(P9_DEBUG_VFS, | 853 | p9_debug(P9_DEBUG_VFS, " %lu,%s mode: %hx MAJOR: %u MINOR: %u\n", |
817 | " %lu,%s mode: %hx MAJOR: %u MINOR: %u\n", dir->i_ino, | 854 | dir->i_ino, dentry->d_name.name, omode, |
818 | dentry->d_name.name, omode, MAJOR(rdev), MINOR(rdev)); | 855 | MAJOR(rdev), MINOR(rdev)); |
819 | 856 | ||
820 | if (!new_valid_dev(rdev)) | 857 | if (!new_valid_dev(rdev)) |
821 | return -EINVAL; | 858 | return -EINVAL; |
@@ -825,7 +862,7 @@ v9fs_vfs_mknod_dotl(struct inode *dir, struct dentry *dentry, umode_t omode, | |||
825 | dfid = v9fs_fid_lookup(dir_dentry); | 862 | dfid = v9fs_fid_lookup(dir_dentry); |
826 | if (IS_ERR(dfid)) { | 863 | if (IS_ERR(dfid)) { |
827 | err = PTR_ERR(dfid); | 864 | err = PTR_ERR(dfid); |
828 | P9_DPRINTK(P9_DEBUG_VFS, "fid lookup failed %d\n", err); | 865 | p9_debug(P9_DEBUG_VFS, "fid lookup failed %d\n", err); |
829 | dfid = NULL; | 866 | dfid = NULL; |
830 | goto error; | 867 | goto error; |
831 | } | 868 | } |
@@ -835,8 +872,8 @@ v9fs_vfs_mknod_dotl(struct inode *dir, struct dentry *dentry, umode_t omode, | |||
835 | /* Update mode based on ACL value */ | 872 | /* Update mode based on ACL value */ |
836 | err = v9fs_acl_mode(dir, &mode, &dacl, &pacl); | 873 | err = v9fs_acl_mode(dir, &mode, &dacl, &pacl); |
837 | if (err) { | 874 | if (err) { |
838 | P9_DPRINTK(P9_DEBUG_VFS, | 875 | p9_debug(P9_DEBUG_VFS, "Failed to get acl values in mknod %d\n", |
839 | "Failed to get acl values in mknod %d\n", err); | 876 | err); |
840 | goto error; | 877 | goto error; |
841 | } | 878 | } |
842 | name = (char *) dentry->d_name.name; | 879 | name = (char *) dentry->d_name.name; |
@@ -851,8 +888,8 @@ v9fs_vfs_mknod_dotl(struct inode *dir, struct dentry *dentry, umode_t omode, | |||
851 | fid = p9_client_walk(dfid, 1, &name, 1); | 888 | fid = p9_client_walk(dfid, 1, &name, 1); |
852 | if (IS_ERR(fid)) { | 889 | if (IS_ERR(fid)) { |
853 | err = PTR_ERR(fid); | 890 | err = PTR_ERR(fid); |
854 | P9_DPRINTK(P9_DEBUG_VFS, "p9_client_walk failed %d\n", | 891 | p9_debug(P9_DEBUG_VFS, "p9_client_walk failed %d\n", |
855 | err); | 892 | err); |
856 | fid = NULL; | 893 | fid = NULL; |
857 | goto error; | 894 | goto error; |
858 | } | 895 | } |
@@ -860,8 +897,8 @@ v9fs_vfs_mknod_dotl(struct inode *dir, struct dentry *dentry, umode_t omode, | |||
860 | inode = v9fs_get_new_inode_from_fid(v9ses, fid, dir->i_sb); | 897 | inode = v9fs_get_new_inode_from_fid(v9ses, fid, dir->i_sb); |
861 | if (IS_ERR(inode)) { | 898 | if (IS_ERR(inode)) { |
862 | err = PTR_ERR(inode); | 899 | err = PTR_ERR(inode); |
863 | P9_DPRINTK(P9_DEBUG_VFS, "inode creation failed %d\n", | 900 | p9_debug(P9_DEBUG_VFS, "inode creation failed %d\n", |
864 | err); | 901 | err); |
865 | goto error; | 902 | goto error; |
866 | } | 903 | } |
867 | err = v9fs_fid_add(dentry, fid); | 904 | err = v9fs_fid_add(dentry, fid); |
@@ -905,7 +942,7 @@ v9fs_vfs_follow_link_dotl(struct dentry *dentry, struct nameidata *nd) | |||
905 | char *link = __getname(); | 942 | char *link = __getname(); |
906 | char *target; | 943 | char *target; |
907 | 944 | ||
908 | P9_DPRINTK(P9_DEBUG_VFS, "%s\n", dentry->d_name.name); | 945 | p9_debug(P9_DEBUG_VFS, "%s\n", dentry->d_name.name); |
909 | 946 | ||
910 | if (!link) { | 947 | if (!link) { |
911 | link = ERR_PTR(-ENOMEM); | 948 | link = ERR_PTR(-ENOMEM); |
diff --git a/fs/9p/vfs_super.c b/fs/9p/vfs_super.c index f68ff65a32a5..7b0cd87b07c2 100644 --- a/fs/9p/vfs_super.c +++ b/fs/9p/vfs_super.c | |||
@@ -121,7 +121,7 @@ static struct dentry *v9fs_mount(struct file_system_type *fs_type, int flags, | |||
121 | struct p9_fid *fid; | 121 | struct p9_fid *fid; |
122 | int retval = 0; | 122 | int retval = 0; |
123 | 123 | ||
124 | P9_DPRINTK(P9_DEBUG_VFS, " \n"); | 124 | p9_debug(P9_DEBUG_VFS, "\n"); |
125 | 125 | ||
126 | v9ses = kzalloc(sizeof(struct v9fs_session_info), GFP_KERNEL); | 126 | v9ses = kzalloc(sizeof(struct v9fs_session_info), GFP_KERNEL); |
127 | if (!v9ses) | 127 | if (!v9ses) |
@@ -191,7 +191,7 @@ static struct dentry *v9fs_mount(struct file_system_type *fs_type, int flags, | |||
191 | goto release_sb; | 191 | goto release_sb; |
192 | v9fs_fid_add(root, fid); | 192 | v9fs_fid_add(root, fid); |
193 | 193 | ||
194 | P9_DPRINTK(P9_DEBUG_VFS, " simple set mount, return 0\n"); | 194 | p9_debug(P9_DEBUG_VFS, " simple set mount, return 0\n"); |
195 | return dget(sb->s_root); | 195 | return dget(sb->s_root); |
196 | 196 | ||
197 | clunk_fid: | 197 | clunk_fid: |
@@ -223,7 +223,7 @@ static void v9fs_kill_super(struct super_block *s) | |||
223 | { | 223 | { |
224 | struct v9fs_session_info *v9ses = s->s_fs_info; | 224 | struct v9fs_session_info *v9ses = s->s_fs_info; |
225 | 225 | ||
226 | P9_DPRINTK(P9_DEBUG_VFS, " %p\n", s); | 226 | p9_debug(P9_DEBUG_VFS, " %p\n", s); |
227 | 227 | ||
228 | kill_anon_super(s); | 228 | kill_anon_super(s); |
229 | 229 | ||
@@ -231,7 +231,7 @@ static void v9fs_kill_super(struct super_block *s) | |||
231 | v9fs_session_close(v9ses); | 231 | v9fs_session_close(v9ses); |
232 | kfree(v9ses); | 232 | kfree(v9ses); |
233 | s->s_fs_info = NULL; | 233 | s->s_fs_info = NULL; |
234 | P9_DPRINTK(P9_DEBUG_VFS, "exiting kill_super\n"); | 234 | p9_debug(P9_DEBUG_VFS, "exiting kill_super\n"); |
235 | } | 235 | } |
236 | 236 | ||
237 | static void | 237 | static void |
@@ -303,7 +303,7 @@ static int v9fs_write_inode(struct inode *inode, | |||
303 | * send an fsync request to server irrespective of | 303 | * send an fsync request to server irrespective of |
304 | * wbc->sync_mode. | 304 | * wbc->sync_mode. |
305 | */ | 305 | */ |
306 | P9_DPRINTK(P9_DEBUG_VFS, "%s: inode %p\n", __func__, inode); | 306 | p9_debug(P9_DEBUG_VFS, "%s: inode %p\n", __func__, inode); |
307 | v9inode = V9FS_I(inode); | 307 | v9inode = V9FS_I(inode); |
308 | if (!v9inode->writeback_fid) | 308 | if (!v9inode->writeback_fid) |
309 | return 0; | 309 | return 0; |
@@ -326,7 +326,7 @@ static int v9fs_write_inode_dotl(struct inode *inode, | |||
326 | * send an fsync request to server irrespective of | 326 | * send an fsync request to server irrespective of |
327 | * wbc->sync_mode. | 327 | * wbc->sync_mode. |
328 | */ | 328 | */ |
329 | P9_DPRINTK(P9_DEBUG_VFS, "%s: inode %p\n", __func__, inode); | 329 | p9_debug(P9_DEBUG_VFS, "%s: inode %p\n", __func__, inode); |
330 | v9inode = V9FS_I(inode); | 330 | v9inode = V9FS_I(inode); |
331 | if (!v9inode->writeback_fid) | 331 | if (!v9inode->writeback_fid) |
332 | return 0; | 332 | return 0; |
diff --git a/fs/9p/xattr.c b/fs/9p/xattr.c index d288773871b3..29653b70a9c3 100644 --- a/fs/9p/xattr.c +++ b/fs/9p/xattr.c | |||
@@ -32,8 +32,8 @@ ssize_t v9fs_fid_xattr_get(struct p9_fid *fid, const char *name, | |||
32 | attr_fid = p9_client_xattrwalk(fid, name, &attr_size); | 32 | attr_fid = p9_client_xattrwalk(fid, name, &attr_size); |
33 | if (IS_ERR(attr_fid)) { | 33 | if (IS_ERR(attr_fid)) { |
34 | retval = PTR_ERR(attr_fid); | 34 | retval = PTR_ERR(attr_fid); |
35 | P9_DPRINTK(P9_DEBUG_VFS, | 35 | p9_debug(P9_DEBUG_VFS, "p9_client_attrwalk failed %zd\n", |
36 | "p9_client_attrwalk failed %zd\n", retval); | 36 | retval); |
37 | attr_fid = NULL; | 37 | attr_fid = NULL; |
38 | goto error; | 38 | goto error; |
39 | } | 39 | } |
@@ -87,8 +87,8 @@ ssize_t v9fs_xattr_get(struct dentry *dentry, const char *name, | |||
87 | { | 87 | { |
88 | struct p9_fid *fid; | 88 | struct p9_fid *fid; |
89 | 89 | ||
90 | P9_DPRINTK(P9_DEBUG_VFS, "%s: name = %s value_len = %zu\n", | 90 | p9_debug(P9_DEBUG_VFS, "name = %s value_len = %zu\n", |
91 | __func__, name, buffer_size); | 91 | name, buffer_size); |
92 | fid = v9fs_fid_lookup(dentry); | 92 | fid = v9fs_fid_lookup(dentry); |
93 | if (IS_ERR(fid)) | 93 | if (IS_ERR(fid)) |
94 | return PTR_ERR(fid); | 94 | return PTR_ERR(fid); |
@@ -115,8 +115,8 @@ int v9fs_xattr_set(struct dentry *dentry, const char *name, | |||
115 | int retval, msize, write_count; | 115 | int retval, msize, write_count; |
116 | struct p9_fid *fid = NULL; | 116 | struct p9_fid *fid = NULL; |
117 | 117 | ||
118 | P9_DPRINTK(P9_DEBUG_VFS, "%s: name = %s value_len = %zu flags = %d\n", | 118 | p9_debug(P9_DEBUG_VFS, "name = %s value_len = %zu flags = %d\n", |
119 | __func__, name, value_len, flags); | 119 | name, value_len, flags); |
120 | 120 | ||
121 | fid = v9fs_fid_clone(dentry); | 121 | fid = v9fs_fid_clone(dentry); |
122 | if (IS_ERR(fid)) { | 122 | if (IS_ERR(fid)) { |
@@ -129,8 +129,8 @@ int v9fs_xattr_set(struct dentry *dentry, const char *name, | |||
129 | */ | 129 | */ |
130 | retval = p9_client_xattrcreate(fid, name, value_len, flags); | 130 | retval = p9_client_xattrcreate(fid, name, value_len, flags); |
131 | if (retval < 0) { | 131 | if (retval < 0) { |
132 | P9_DPRINTK(P9_DEBUG_VFS, | 132 | p9_debug(P9_DEBUG_VFS, "p9_client_xattrcreate failed %d\n", |
133 | "p9_client_xattrcreate failed %d\n", retval); | 133 | retval); |
134 | goto error; | 134 | goto error; |
135 | } | 135 | } |
136 | msize = fid->clnt->msize; | 136 | msize = fid->clnt->msize; |
diff --git a/fs/Kconfig b/fs/Kconfig index 30145d886bc2..d621f02a3f9e 100644 --- a/fs/Kconfig +++ b/fs/Kconfig | |||
@@ -218,6 +218,8 @@ source "fs/exofs/Kconfig" | |||
218 | 218 | ||
219 | endif # MISC_FILESYSTEMS | 219 | endif # MISC_FILESYSTEMS |
220 | 220 | ||
221 | source "fs/exofs/Kconfig.ore" | ||
222 | |||
221 | menuconfig NETWORK_FILESYSTEMS | 223 | menuconfig NETWORK_FILESYSTEMS |
222 | bool "Network File Systems" | 224 | bool "Network File Systems" |
223 | default y | 225 | default y |
diff --git a/fs/Kconfig.binfmt b/fs/Kconfig.binfmt index 79e2ca7973b7..e95d1b64082c 100644 --- a/fs/Kconfig.binfmt +++ b/fs/Kconfig.binfmt | |||
@@ -27,6 +27,9 @@ config COMPAT_BINFMT_ELF | |||
27 | bool | 27 | bool |
28 | depends on COMPAT && BINFMT_ELF | 28 | depends on COMPAT && BINFMT_ELF |
29 | 29 | ||
30 | config ARCH_BINFMT_ELF_RANDOMIZE_PIE | ||
31 | bool | ||
32 | |||
30 | config BINFMT_ELF_FDPIC | 33 | config BINFMT_ELF_FDPIC |
31 | bool "Kernel support for FDPIC ELF binaries" | 34 | bool "Kernel support for FDPIC ELF binaries" |
32 | default y | 35 | default y |
@@ -476,14 +476,21 @@ static void kiocb_batch_init(struct kiocb_batch *batch, long total) | |||
476 | batch->count = total; | 476 | batch->count = total; |
477 | } | 477 | } |
478 | 478 | ||
479 | static void kiocb_batch_free(struct kiocb_batch *batch) | 479 | static void kiocb_batch_free(struct kioctx *ctx, struct kiocb_batch *batch) |
480 | { | 480 | { |
481 | struct kiocb *req, *n; | 481 | struct kiocb *req, *n; |
482 | 482 | ||
483 | if (list_empty(&batch->head)) | ||
484 | return; | ||
485 | |||
486 | spin_lock_irq(&ctx->ctx_lock); | ||
483 | list_for_each_entry_safe(req, n, &batch->head, ki_batch) { | 487 | list_for_each_entry_safe(req, n, &batch->head, ki_batch) { |
484 | list_del(&req->ki_batch); | 488 | list_del(&req->ki_batch); |
489 | list_del(&req->ki_list); | ||
485 | kmem_cache_free(kiocb_cachep, req); | 490 | kmem_cache_free(kiocb_cachep, req); |
491 | ctx->reqs_active--; | ||
486 | } | 492 | } |
493 | spin_unlock_irq(&ctx->ctx_lock); | ||
487 | } | 494 | } |
488 | 495 | ||
489 | /* | 496 | /* |
@@ -1742,7 +1749,7 @@ long do_io_submit(aio_context_t ctx_id, long nr, | |||
1742 | } | 1749 | } |
1743 | blk_finish_plug(&plug); | 1750 | blk_finish_plug(&plug); |
1744 | 1751 | ||
1745 | kiocb_batch_free(&batch); | 1752 | kiocb_batch_free(ctx, &batch); |
1746 | put_ioctx(ctx); | 1753 | put_ioctx(ctx); |
1747 | return i ? i : ret; | 1754 | return i ? i : ret; |
1748 | } | 1755 | } |
diff --git a/fs/autofs4/autofs_i.h b/fs/autofs4/autofs_i.h index 5869d4e974a9..d8d8e7ba6a1e 100644 --- a/fs/autofs4/autofs_i.h +++ b/fs/autofs4/autofs_i.h | |||
@@ -116,6 +116,7 @@ struct autofs_sb_info { | |||
116 | int needs_reghost; | 116 | int needs_reghost; |
117 | struct super_block *sb; | 117 | struct super_block *sb; |
118 | struct mutex wq_mutex; | 118 | struct mutex wq_mutex; |
119 | struct mutex pipe_mutex; | ||
119 | spinlock_t fs_lock; | 120 | spinlock_t fs_lock; |
120 | struct autofs_wait_queue *queues; /* Wait queue pointer */ | 121 | struct autofs_wait_queue *queues; /* Wait queue pointer */ |
121 | spinlock_t lookup_lock; | 122 | spinlock_t lookup_lock; |
diff --git a/fs/autofs4/inode.c b/fs/autofs4/inode.c index 2ba44c79d548..e16980b00b8d 100644 --- a/fs/autofs4/inode.c +++ b/fs/autofs4/inode.c | |||
@@ -225,6 +225,7 @@ int autofs4_fill_super(struct super_block *s, void *data, int silent) | |||
225 | sbi->min_proto = 0; | 225 | sbi->min_proto = 0; |
226 | sbi->max_proto = 0; | 226 | sbi->max_proto = 0; |
227 | mutex_init(&sbi->wq_mutex); | 227 | mutex_init(&sbi->wq_mutex); |
228 | mutex_init(&sbi->pipe_mutex); | ||
228 | spin_lock_init(&sbi->fs_lock); | 229 | spin_lock_init(&sbi->fs_lock); |
229 | sbi->queues = NULL; | 230 | sbi->queues = NULL; |
230 | spin_lock_init(&sbi->lookup_lock); | 231 | spin_lock_init(&sbi->lookup_lock); |
diff --git a/fs/autofs4/waitq.c b/fs/autofs4/waitq.c index e1fbdeef85db..da8876d38a7b 100644 --- a/fs/autofs4/waitq.c +++ b/fs/autofs4/waitq.c | |||
@@ -56,26 +56,27 @@ void autofs4_catatonic_mode(struct autofs_sb_info *sbi) | |||
56 | mutex_unlock(&sbi->wq_mutex); | 56 | mutex_unlock(&sbi->wq_mutex); |
57 | } | 57 | } |
58 | 58 | ||
59 | static int autofs4_write(struct file *file, const void *addr, int bytes) | 59 | static int autofs4_write(struct autofs_sb_info *sbi, |
60 | struct file *file, const void *addr, int bytes) | ||
60 | { | 61 | { |
61 | unsigned long sigpipe, flags; | 62 | unsigned long sigpipe, flags; |
62 | mm_segment_t fs; | 63 | mm_segment_t fs; |
63 | const char *data = (const char *)addr; | 64 | const char *data = (const char *)addr; |
64 | ssize_t wr = 0; | 65 | ssize_t wr = 0; |
65 | 66 | ||
66 | /** WARNING: this is not safe for writing more than PIPE_BUF bytes! **/ | ||
67 | |||
68 | sigpipe = sigismember(¤t->pending.signal, SIGPIPE); | 67 | sigpipe = sigismember(¤t->pending.signal, SIGPIPE); |
69 | 68 | ||
70 | /* Save pointer to user space and point back to kernel space */ | 69 | /* Save pointer to user space and point back to kernel space */ |
71 | fs = get_fs(); | 70 | fs = get_fs(); |
72 | set_fs(KERNEL_DS); | 71 | set_fs(KERNEL_DS); |
73 | 72 | ||
73 | mutex_lock(&sbi->pipe_mutex); | ||
74 | while (bytes && | 74 | while (bytes && |
75 | (wr = file->f_op->write(file,data,bytes,&file->f_pos)) > 0) { | 75 | (wr = file->f_op->write(file,data,bytes,&file->f_pos)) > 0) { |
76 | data += wr; | 76 | data += wr; |
77 | bytes -= wr; | 77 | bytes -= wr; |
78 | } | 78 | } |
79 | mutex_unlock(&sbi->pipe_mutex); | ||
79 | 80 | ||
80 | set_fs(fs); | 81 | set_fs(fs); |
81 | 82 | ||
@@ -110,6 +111,13 @@ static void autofs4_notify_daemon(struct autofs_sb_info *sbi, | |||
110 | 111 | ||
111 | pkt.hdr.proto_version = sbi->version; | 112 | pkt.hdr.proto_version = sbi->version; |
112 | pkt.hdr.type = type; | 113 | pkt.hdr.type = type; |
114 | mutex_lock(&sbi->wq_mutex); | ||
115 | |||
116 | /* Check if we have become catatonic */ | ||
117 | if (sbi->catatonic) { | ||
118 | mutex_unlock(&sbi->wq_mutex); | ||
119 | return; | ||
120 | } | ||
113 | switch (type) { | 121 | switch (type) { |
114 | /* Kernel protocol v4 missing and expire packets */ | 122 | /* Kernel protocol v4 missing and expire packets */ |
115 | case autofs_ptype_missing: | 123 | case autofs_ptype_missing: |
@@ -163,22 +171,18 @@ static void autofs4_notify_daemon(struct autofs_sb_info *sbi, | |||
163 | } | 171 | } |
164 | default: | 172 | default: |
165 | printk("autofs4_notify_daemon: bad type %d!\n", type); | 173 | printk("autofs4_notify_daemon: bad type %d!\n", type); |
174 | mutex_unlock(&sbi->wq_mutex); | ||
166 | return; | 175 | return; |
167 | } | 176 | } |
168 | 177 | ||
169 | /* Check if we have become catatonic */ | 178 | pipe = sbi->pipe; |
170 | mutex_lock(&sbi->wq_mutex); | 179 | get_file(pipe); |
171 | if (!sbi->catatonic) { | 180 | |
172 | pipe = sbi->pipe; | ||
173 | get_file(pipe); | ||
174 | } | ||
175 | mutex_unlock(&sbi->wq_mutex); | 181 | mutex_unlock(&sbi->wq_mutex); |
176 | 182 | ||
177 | if (pipe) { | 183 | if (autofs4_write(sbi, pipe, &pkt, pktsz)) |
178 | if (autofs4_write(pipe, &pkt, pktsz)) | 184 | autofs4_catatonic_mode(sbi); |
179 | autofs4_catatonic_mode(sbi); | 185 | fput(pipe); |
180 | fput(pipe); | ||
181 | } | ||
182 | } | 186 | } |
183 | 187 | ||
184 | static int autofs4_getpath(struct autofs_sb_info *sbi, | 188 | static int autofs4_getpath(struct autofs_sb_info *sbi, |
@@ -257,6 +261,9 @@ static int validate_request(struct autofs_wait_queue **wait, | |||
257 | struct autofs_wait_queue *wq; | 261 | struct autofs_wait_queue *wq; |
258 | struct autofs_info *ino; | 262 | struct autofs_info *ino; |
259 | 263 | ||
264 | if (sbi->catatonic) | ||
265 | return -ENOENT; | ||
266 | |||
260 | /* Wait in progress, continue; */ | 267 | /* Wait in progress, continue; */ |
261 | wq = autofs4_find_wait(sbi, qstr); | 268 | wq = autofs4_find_wait(sbi, qstr); |
262 | if (wq) { | 269 | if (wq) { |
@@ -289,6 +296,9 @@ static int validate_request(struct autofs_wait_queue **wait, | |||
289 | if (mutex_lock_interruptible(&sbi->wq_mutex)) | 296 | if (mutex_lock_interruptible(&sbi->wq_mutex)) |
290 | return -EINTR; | 297 | return -EINTR; |
291 | 298 | ||
299 | if (sbi->catatonic) | ||
300 | return -ENOENT; | ||
301 | |||
292 | wq = autofs4_find_wait(sbi, qstr); | 302 | wq = autofs4_find_wait(sbi, qstr); |
293 | if (wq) { | 303 | if (wq) { |
294 | *wait = wq; | 304 | *wait = wq; |
@@ -389,7 +399,7 @@ int autofs4_wait(struct autofs_sb_info *sbi, struct dentry *dentry, | |||
389 | 399 | ||
390 | ret = validate_request(&wq, sbi, &qstr, dentry, notify); | 400 | ret = validate_request(&wq, sbi, &qstr, dentry, notify); |
391 | if (ret <= 0) { | 401 | if (ret <= 0) { |
392 | if (ret == 0) | 402 | if (ret != -EINTR) |
393 | mutex_unlock(&sbi->wq_mutex); | 403 | mutex_unlock(&sbi->wq_mutex); |
394 | kfree(qstr.name); | 404 | kfree(qstr.name); |
395 | return ret; | 405 | return ret; |
diff --git a/fs/binfmt_elf.c b/fs/binfmt_elf.c index 21ac5ee4b43f..bcb884e2d613 100644 --- a/fs/binfmt_elf.c +++ b/fs/binfmt_elf.c | |||
@@ -794,7 +794,7 @@ static int load_elf_binary(struct linux_binprm *bprm, struct pt_regs *regs) | |||
794 | * default mmap base, as well as whatever program they | 794 | * default mmap base, as well as whatever program they |
795 | * might try to exec. This is because the brk will | 795 | * might try to exec. This is because the brk will |
796 | * follow the loader, and is not movable. */ | 796 | * follow the loader, and is not movable. */ |
797 | #if defined(CONFIG_X86) || defined(CONFIG_ARM) | 797 | #ifdef CONFIG_ARCH_BINFMT_ELF_RANDOMIZE_PIE |
798 | /* Memory randomization might have been switched off | 798 | /* Memory randomization might have been switched off |
799 | * in runtime via sysctl. | 799 | * in runtime via sysctl. |
800 | * If that is the case, retain the original non-zero | 800 | * If that is the case, retain the original non-zero |
diff --git a/fs/block_dev.c b/fs/block_dev.c index 69a5b6fbee2b..0e575d1304b4 100644 --- a/fs/block_dev.c +++ b/fs/block_dev.c | |||
@@ -25,7 +25,6 @@ | |||
25 | #include <linux/uio.h> | 25 | #include <linux/uio.h> |
26 | #include <linux/namei.h> | 26 | #include <linux/namei.h> |
27 | #include <linux/log2.h> | 27 | #include <linux/log2.h> |
28 | #include <linux/kmemleak.h> | ||
29 | #include <linux/cleancache.h> | 28 | #include <linux/cleancache.h> |
30 | #include <asm/uaccess.h> | 29 | #include <asm/uaccess.h> |
31 | #include "internal.h" | 30 | #include "internal.h" |
@@ -521,7 +520,7 @@ static struct super_block *blockdev_superblock __read_mostly; | |||
521 | void __init bdev_cache_init(void) | 520 | void __init bdev_cache_init(void) |
522 | { | 521 | { |
523 | int err; | 522 | int err; |
524 | struct vfsmount *bd_mnt; | 523 | static struct vfsmount *bd_mnt; |
525 | 524 | ||
526 | bdev_cachep = kmem_cache_create("bdev_cache", sizeof(struct bdev_inode), | 525 | bdev_cachep = kmem_cache_create("bdev_cache", sizeof(struct bdev_inode), |
527 | 0, (SLAB_HWCACHE_ALIGN|SLAB_RECLAIM_ACCOUNT| | 526 | 0, (SLAB_HWCACHE_ALIGN|SLAB_RECLAIM_ACCOUNT| |
@@ -533,12 +532,7 @@ void __init bdev_cache_init(void) | |||
533 | bd_mnt = kern_mount(&bd_type); | 532 | bd_mnt = kern_mount(&bd_type); |
534 | if (IS_ERR(bd_mnt)) | 533 | if (IS_ERR(bd_mnt)) |
535 | panic("Cannot create bdev pseudo-fs"); | 534 | panic("Cannot create bdev pseudo-fs"); |
536 | /* | 535 | blockdev_superblock = bd_mnt->mnt_sb; /* For writeback */ |
537 | * This vfsmount structure is only used to obtain the | ||
538 | * blockdev_superblock, so tell kmemleak not to report it. | ||
539 | */ | ||
540 | kmemleak_not_leak(bd_mnt); | ||
541 | blockdev_superblock = bd_mnt->mnt_sb; /* For writeback */ | ||
542 | } | 536 | } |
543 | 537 | ||
544 | /* | 538 | /* |
@@ -1145,6 +1139,7 @@ static int __blkdev_get(struct block_device *bdev, fmode_t mode, int for_part) | |||
1145 | mutex_lock_nested(&bdev->bd_mutex, for_part); | 1139 | mutex_lock_nested(&bdev->bd_mutex, for_part); |
1146 | if (!bdev->bd_openers) { | 1140 | if (!bdev->bd_openers) { |
1147 | bdev->bd_disk = disk; | 1141 | bdev->bd_disk = disk; |
1142 | bdev->bd_queue = disk->queue; | ||
1148 | bdev->bd_contains = bdev; | 1143 | bdev->bd_contains = bdev; |
1149 | if (!partno) { | 1144 | if (!partno) { |
1150 | struct backing_dev_info *bdi; | 1145 | struct backing_dev_info *bdi; |
@@ -1165,6 +1160,7 @@ static int __blkdev_get(struct block_device *bdev, fmode_t mode, int for_part) | |||
1165 | disk_put_part(bdev->bd_part); | 1160 | disk_put_part(bdev->bd_part); |
1166 | bdev->bd_part = NULL; | 1161 | bdev->bd_part = NULL; |
1167 | bdev->bd_disk = NULL; | 1162 | bdev->bd_disk = NULL; |
1163 | bdev->bd_queue = NULL; | ||
1168 | mutex_unlock(&bdev->bd_mutex); | 1164 | mutex_unlock(&bdev->bd_mutex); |
1169 | disk_unblock_events(disk); | 1165 | disk_unblock_events(disk); |
1170 | put_disk(disk); | 1166 | put_disk(disk); |
@@ -1238,6 +1234,7 @@ static int __blkdev_get(struct block_device *bdev, fmode_t mode, int for_part) | |||
1238 | disk_put_part(bdev->bd_part); | 1234 | disk_put_part(bdev->bd_part); |
1239 | bdev->bd_disk = NULL; | 1235 | bdev->bd_disk = NULL; |
1240 | bdev->bd_part = NULL; | 1236 | bdev->bd_part = NULL; |
1237 | bdev->bd_queue = NULL; | ||
1241 | bdev_inode_switch_bdi(bdev->bd_inode, &default_backing_dev_info); | 1238 | bdev_inode_switch_bdi(bdev->bd_inode, &default_backing_dev_info); |
1242 | if (bdev != bdev->bd_contains) | 1239 | if (bdev != bdev->bd_contains) |
1243 | __blkdev_put(bdev->bd_contains, mode, 1); | 1240 | __blkdev_put(bdev->bd_contains, mode, 1); |
diff --git a/fs/btrfs/Kconfig b/fs/btrfs/Kconfig index ecb9fd3be143..d33f01c08b60 100644 --- a/fs/btrfs/Kconfig +++ b/fs/btrfs/Kconfig | |||
@@ -31,3 +31,22 @@ config BTRFS_FS_POSIX_ACL | |||
31 | Linux website <http://acl.bestbits.at/>. | 31 | Linux website <http://acl.bestbits.at/>. |
32 | 32 | ||
33 | If you don't know what Access Control Lists are, say N | 33 | If you don't know what Access Control Lists are, say N |
34 | |||
35 | config BTRFS_FS_CHECK_INTEGRITY | ||
36 | bool "Btrfs with integrity check tool compiled in (DANGEROUS)" | ||
37 | depends on BTRFS_FS | ||
38 | help | ||
39 | Adds code that examines all block write requests (including | ||
40 | writes of the super block). The goal is to verify that the | ||
41 | state of the filesystem on disk is always consistent, i.e., | ||
42 | after a power-loss or kernel panic event the filesystem is | ||
43 | in a consistent state. | ||
44 | |||
45 | If the integrity check tool is included and activated in | ||
46 | the mount options, plenty of kernel memory is used, and | ||
47 | plenty of additional CPU cycles are spent. Enabling this | ||
48 | functionality is not intended for normal use. | ||
49 | |||
50 | In most cases, unless you are a btrfs developer who needs | ||
51 | to verify the integrity of (super)-block write requests | ||
52 | during the run of a regression test, say N | ||
diff --git a/fs/btrfs/Makefile b/fs/btrfs/Makefile index c0ddfd29c5e5..0c4fa2befae7 100644 --- a/fs/btrfs/Makefile +++ b/fs/btrfs/Makefile | |||
@@ -8,6 +8,7 @@ btrfs-y += super.o ctree.o extent-tree.o print-tree.o root-tree.o dir-item.o \ | |||
8 | extent_io.o volumes.o async-thread.o ioctl.o locking.o orphan.o \ | 8 | extent_io.o volumes.o async-thread.o ioctl.o locking.o orphan.o \ |
9 | export.o tree-log.o free-space-cache.o zlib.o lzo.o \ | 9 | export.o tree-log.o free-space-cache.o zlib.o lzo.o \ |
10 | compression.o delayed-ref.o relocation.o delayed-inode.o scrub.o \ | 10 | compression.o delayed-ref.o relocation.o delayed-inode.o scrub.o \ |
11 | reada.o backref.o | 11 | reada.o backref.o ulist.o |
12 | 12 | ||
13 | btrfs-$(CONFIG_BTRFS_FS_POSIX_ACL) += acl.o | 13 | btrfs-$(CONFIG_BTRFS_FS_POSIX_ACL) += acl.o |
14 | btrfs-$(CONFIG_BTRFS_FS_CHECK_INTEGRITY) += check-integrity.o | ||
diff --git a/fs/btrfs/backref.c b/fs/btrfs/backref.c index 22c64fff1bd5..633c701a287d 100644 --- a/fs/btrfs/backref.c +++ b/fs/btrfs/backref.c | |||
@@ -19,18 +19,789 @@ | |||
19 | #include "ctree.h" | 19 | #include "ctree.h" |
20 | #include "disk-io.h" | 20 | #include "disk-io.h" |
21 | #include "backref.h" | 21 | #include "backref.h" |
22 | #include "ulist.h" | ||
23 | #include "transaction.h" | ||
24 | #include "delayed-ref.h" | ||
22 | 25 | ||
23 | struct __data_ref { | 26 | /* |
27 | * this structure records all encountered refs on the way up to the root | ||
28 | */ | ||
29 | struct __prelim_ref { | ||
24 | struct list_head list; | 30 | struct list_head list; |
25 | u64 inum; | 31 | u64 root_id; |
26 | u64 root; | 32 | struct btrfs_key key; |
27 | u64 extent_data_item_offset; | 33 | int level; |
34 | int count; | ||
35 | u64 parent; | ||
36 | u64 wanted_disk_byte; | ||
28 | }; | 37 | }; |
29 | 38 | ||
30 | struct __shared_ref { | 39 | static int __add_prelim_ref(struct list_head *head, u64 root_id, |
31 | struct list_head list; | 40 | struct btrfs_key *key, int level, u64 parent, |
41 | u64 wanted_disk_byte, int count) | ||
42 | { | ||
43 | struct __prelim_ref *ref; | ||
44 | |||
45 | /* in case we're adding delayed refs, we're holding the refs spinlock */ | ||
46 | ref = kmalloc(sizeof(*ref), GFP_ATOMIC); | ||
47 | if (!ref) | ||
48 | return -ENOMEM; | ||
49 | |||
50 | ref->root_id = root_id; | ||
51 | if (key) | ||
52 | ref->key = *key; | ||
53 | else | ||
54 | memset(&ref->key, 0, sizeof(ref->key)); | ||
55 | |||
56 | ref->level = level; | ||
57 | ref->count = count; | ||
58 | ref->parent = parent; | ||
59 | ref->wanted_disk_byte = wanted_disk_byte; | ||
60 | list_add_tail(&ref->list, head); | ||
61 | |||
62 | return 0; | ||
63 | } | ||
64 | |||
65 | static int add_all_parents(struct btrfs_root *root, struct btrfs_path *path, | ||
66 | struct ulist *parents, | ||
67 | struct extent_buffer *eb, int level, | ||
68 | u64 wanted_objectid, u64 wanted_disk_byte) | ||
69 | { | ||
70 | int ret; | ||
71 | int slot; | ||
72 | struct btrfs_file_extent_item *fi; | ||
73 | struct btrfs_key key; | ||
32 | u64 disk_byte; | 74 | u64 disk_byte; |
33 | }; | 75 | |
76 | add_parent: | ||
77 | ret = ulist_add(parents, eb->start, 0, GFP_NOFS); | ||
78 | if (ret < 0) | ||
79 | return ret; | ||
80 | |||
81 | if (level != 0) | ||
82 | return 0; | ||
83 | |||
84 | /* | ||
85 | * if the current leaf is full with EXTENT_DATA items, we must | ||
86 | * check the next one if that holds a reference as well. | ||
87 | * ref->count cannot be used to skip this check. | ||
88 | * repeat this until we don't find any additional EXTENT_DATA items. | ||
89 | */ | ||
90 | while (1) { | ||
91 | ret = btrfs_next_leaf(root, path); | ||
92 | if (ret < 0) | ||
93 | return ret; | ||
94 | if (ret) | ||
95 | return 0; | ||
96 | |||
97 | eb = path->nodes[0]; | ||
98 | for (slot = 0; slot < btrfs_header_nritems(eb); ++slot) { | ||
99 | btrfs_item_key_to_cpu(eb, &key, slot); | ||
100 | if (key.objectid != wanted_objectid || | ||
101 | key.type != BTRFS_EXTENT_DATA_KEY) | ||
102 | return 0; | ||
103 | fi = btrfs_item_ptr(eb, slot, | ||
104 | struct btrfs_file_extent_item); | ||
105 | disk_byte = btrfs_file_extent_disk_bytenr(eb, fi); | ||
106 | if (disk_byte == wanted_disk_byte) | ||
107 | goto add_parent; | ||
108 | } | ||
109 | } | ||
110 | |||
111 | return 0; | ||
112 | } | ||
113 | |||
114 | /* | ||
115 | * resolve an indirect backref in the form (root_id, key, level) | ||
116 | * to a logical address | ||
117 | */ | ||
118 | static int __resolve_indirect_ref(struct btrfs_fs_info *fs_info, | ||
119 | struct __prelim_ref *ref, | ||
120 | struct ulist *parents) | ||
121 | { | ||
122 | struct btrfs_path *path; | ||
123 | struct btrfs_root *root; | ||
124 | struct btrfs_key root_key; | ||
125 | struct btrfs_key key = {0}; | ||
126 | struct extent_buffer *eb; | ||
127 | int ret = 0; | ||
128 | int root_level; | ||
129 | int level = ref->level; | ||
130 | |||
131 | path = btrfs_alloc_path(); | ||
132 | if (!path) | ||
133 | return -ENOMEM; | ||
134 | |||
135 | root_key.objectid = ref->root_id; | ||
136 | root_key.type = BTRFS_ROOT_ITEM_KEY; | ||
137 | root_key.offset = (u64)-1; | ||
138 | root = btrfs_read_fs_root_no_name(fs_info, &root_key); | ||
139 | if (IS_ERR(root)) { | ||
140 | ret = PTR_ERR(root); | ||
141 | goto out; | ||
142 | } | ||
143 | |||
144 | rcu_read_lock(); | ||
145 | root_level = btrfs_header_level(root->node); | ||
146 | rcu_read_unlock(); | ||
147 | |||
148 | if (root_level + 1 == level) | ||
149 | goto out; | ||
150 | |||
151 | path->lowest_level = level; | ||
152 | ret = btrfs_search_slot(NULL, root, &ref->key, path, 0, 0); | ||
153 | pr_debug("search slot in root %llu (level %d, ref count %d) returned " | ||
154 | "%d for key (%llu %u %llu)\n", | ||
155 | (unsigned long long)ref->root_id, level, ref->count, ret, | ||
156 | (unsigned long long)ref->key.objectid, ref->key.type, | ||
157 | (unsigned long long)ref->key.offset); | ||
158 | if (ret < 0) | ||
159 | goto out; | ||
160 | |||
161 | eb = path->nodes[level]; | ||
162 | if (!eb) { | ||
163 | WARN_ON(1); | ||
164 | ret = 1; | ||
165 | goto out; | ||
166 | } | ||
167 | |||
168 | if (level == 0) { | ||
169 | if (ret == 1 && path->slots[0] >= btrfs_header_nritems(eb)) { | ||
170 | ret = btrfs_next_leaf(root, path); | ||
171 | if (ret) | ||
172 | goto out; | ||
173 | eb = path->nodes[0]; | ||
174 | } | ||
175 | |||
176 | btrfs_item_key_to_cpu(eb, &key, path->slots[0]); | ||
177 | } | ||
178 | |||
179 | /* the last two parameters will only be used for level == 0 */ | ||
180 | ret = add_all_parents(root, path, parents, eb, level, key.objectid, | ||
181 | ref->wanted_disk_byte); | ||
182 | out: | ||
183 | btrfs_free_path(path); | ||
184 | return ret; | ||
185 | } | ||
186 | |||
187 | /* | ||
188 | * resolve all indirect backrefs from the list | ||
189 | */ | ||
190 | static int __resolve_indirect_refs(struct btrfs_fs_info *fs_info, | ||
191 | struct list_head *head) | ||
192 | { | ||
193 | int err; | ||
194 | int ret = 0; | ||
195 | struct __prelim_ref *ref; | ||
196 | struct __prelim_ref *ref_safe; | ||
197 | struct __prelim_ref *new_ref; | ||
198 | struct ulist *parents; | ||
199 | struct ulist_node *node; | ||
200 | |||
201 | parents = ulist_alloc(GFP_NOFS); | ||
202 | if (!parents) | ||
203 | return -ENOMEM; | ||
204 | |||
205 | /* | ||
206 | * _safe allows us to insert directly after the current item without | ||
207 | * iterating over the newly inserted items. | ||
208 | * we're also allowed to re-assign ref during iteration. | ||
209 | */ | ||
210 | list_for_each_entry_safe(ref, ref_safe, head, list) { | ||
211 | if (ref->parent) /* already direct */ | ||
212 | continue; | ||
213 | if (ref->count == 0) | ||
214 | continue; | ||
215 | err = __resolve_indirect_ref(fs_info, ref, parents); | ||
216 | if (err) { | ||
217 | if (ret == 0) | ||
218 | ret = err; | ||
219 | continue; | ||
220 | } | ||
221 | |||
222 | /* we put the first parent into the ref at hand */ | ||
223 | node = ulist_next(parents, NULL); | ||
224 | ref->parent = node ? node->val : 0; | ||
225 | |||
226 | /* additional parents require new refs being added here */ | ||
227 | while ((node = ulist_next(parents, node))) { | ||
228 | new_ref = kmalloc(sizeof(*new_ref), GFP_NOFS); | ||
229 | if (!new_ref) { | ||
230 | ret = -ENOMEM; | ||
231 | break; | ||
232 | } | ||
233 | memcpy(new_ref, ref, sizeof(*ref)); | ||
234 | new_ref->parent = node->val; | ||
235 | list_add(&new_ref->list, &ref->list); | ||
236 | } | ||
237 | ulist_reinit(parents); | ||
238 | } | ||
239 | |||
240 | ulist_free(parents); | ||
241 | return ret; | ||
242 | } | ||
243 | |||
244 | /* | ||
245 | * merge two lists of backrefs and adjust counts accordingly | ||
246 | * | ||
247 | * mode = 1: merge identical keys, if key is set | ||
248 | * mode = 2: merge identical parents | ||
249 | */ | ||
250 | static int __merge_refs(struct list_head *head, int mode) | ||
251 | { | ||
252 | struct list_head *pos1; | ||
253 | |||
254 | list_for_each(pos1, head) { | ||
255 | struct list_head *n2; | ||
256 | struct list_head *pos2; | ||
257 | struct __prelim_ref *ref1; | ||
258 | |||
259 | ref1 = list_entry(pos1, struct __prelim_ref, list); | ||
260 | |||
261 | if (mode == 1 && ref1->key.type == 0) | ||
262 | continue; | ||
263 | for (pos2 = pos1->next, n2 = pos2->next; pos2 != head; | ||
264 | pos2 = n2, n2 = pos2->next) { | ||
265 | struct __prelim_ref *ref2; | ||
266 | |||
267 | ref2 = list_entry(pos2, struct __prelim_ref, list); | ||
268 | |||
269 | if (mode == 1) { | ||
270 | if (memcmp(&ref1->key, &ref2->key, | ||
271 | sizeof(ref1->key)) || | ||
272 | ref1->level != ref2->level || | ||
273 | ref1->root_id != ref2->root_id) | ||
274 | continue; | ||
275 | ref1->count += ref2->count; | ||
276 | } else { | ||
277 | if (ref1->parent != ref2->parent) | ||
278 | continue; | ||
279 | ref1->count += ref2->count; | ||
280 | } | ||
281 | list_del(&ref2->list); | ||
282 | kfree(ref2); | ||
283 | } | ||
284 | |||
285 | } | ||
286 | return 0; | ||
287 | } | ||
288 | |||
289 | /* | ||
290 | * add all currently queued delayed refs from this head whose seq nr is | ||
291 | * smaller or equal that seq to the list | ||
292 | */ | ||
293 | static int __add_delayed_refs(struct btrfs_delayed_ref_head *head, u64 seq, | ||
294 | struct btrfs_key *info_key, | ||
295 | struct list_head *prefs) | ||
296 | { | ||
297 | struct btrfs_delayed_extent_op *extent_op = head->extent_op; | ||
298 | struct rb_node *n = &head->node.rb_node; | ||
299 | int sgn; | ||
300 | int ret = 0; | ||
301 | |||
302 | if (extent_op && extent_op->update_key) | ||
303 | btrfs_disk_key_to_cpu(info_key, &extent_op->key); | ||
304 | |||
305 | while ((n = rb_prev(n))) { | ||
306 | struct btrfs_delayed_ref_node *node; | ||
307 | node = rb_entry(n, struct btrfs_delayed_ref_node, | ||
308 | rb_node); | ||
309 | if (node->bytenr != head->node.bytenr) | ||
310 | break; | ||
311 | WARN_ON(node->is_head); | ||
312 | |||
313 | if (node->seq > seq) | ||
314 | continue; | ||
315 | |||
316 | switch (node->action) { | ||
317 | case BTRFS_ADD_DELAYED_EXTENT: | ||
318 | case BTRFS_UPDATE_DELAYED_HEAD: | ||
319 | WARN_ON(1); | ||
320 | continue; | ||
321 | case BTRFS_ADD_DELAYED_REF: | ||
322 | sgn = 1; | ||
323 | break; | ||
324 | case BTRFS_DROP_DELAYED_REF: | ||
325 | sgn = -1; | ||
326 | break; | ||
327 | default: | ||
328 | BUG_ON(1); | ||
329 | } | ||
330 | switch (node->type) { | ||
331 | case BTRFS_TREE_BLOCK_REF_KEY: { | ||
332 | struct btrfs_delayed_tree_ref *ref; | ||
333 | |||
334 | ref = btrfs_delayed_node_to_tree_ref(node); | ||
335 | ret = __add_prelim_ref(prefs, ref->root, info_key, | ||
336 | ref->level + 1, 0, node->bytenr, | ||
337 | node->ref_mod * sgn); | ||
338 | break; | ||
339 | } | ||
340 | case BTRFS_SHARED_BLOCK_REF_KEY: { | ||
341 | struct btrfs_delayed_tree_ref *ref; | ||
342 | |||
343 | ref = btrfs_delayed_node_to_tree_ref(node); | ||
344 | ret = __add_prelim_ref(prefs, ref->root, info_key, | ||
345 | ref->level + 1, ref->parent, | ||
346 | node->bytenr, | ||
347 | node->ref_mod * sgn); | ||
348 | break; | ||
349 | } | ||
350 | case BTRFS_EXTENT_DATA_REF_KEY: { | ||
351 | struct btrfs_delayed_data_ref *ref; | ||
352 | struct btrfs_key key; | ||
353 | |||
354 | ref = btrfs_delayed_node_to_data_ref(node); | ||
355 | |||
356 | key.objectid = ref->objectid; | ||
357 | key.type = BTRFS_EXTENT_DATA_KEY; | ||
358 | key.offset = ref->offset; | ||
359 | ret = __add_prelim_ref(prefs, ref->root, &key, 0, 0, | ||
360 | node->bytenr, | ||
361 | node->ref_mod * sgn); | ||
362 | break; | ||
363 | } | ||
364 | case BTRFS_SHARED_DATA_REF_KEY: { | ||
365 | struct btrfs_delayed_data_ref *ref; | ||
366 | struct btrfs_key key; | ||
367 | |||
368 | ref = btrfs_delayed_node_to_data_ref(node); | ||
369 | |||
370 | key.objectid = ref->objectid; | ||
371 | key.type = BTRFS_EXTENT_DATA_KEY; | ||
372 | key.offset = ref->offset; | ||
373 | ret = __add_prelim_ref(prefs, ref->root, &key, 0, | ||
374 | ref->parent, node->bytenr, | ||
375 | node->ref_mod * sgn); | ||
376 | break; | ||
377 | } | ||
378 | default: | ||
379 | WARN_ON(1); | ||
380 | } | ||
381 | BUG_ON(ret); | ||
382 | } | ||
383 | |||
384 | return 0; | ||
385 | } | ||
386 | |||
387 | /* | ||
388 | * add all inline backrefs for bytenr to the list | ||
389 | */ | ||
390 | static int __add_inline_refs(struct btrfs_fs_info *fs_info, | ||
391 | struct btrfs_path *path, u64 bytenr, | ||
392 | struct btrfs_key *info_key, int *info_level, | ||
393 | struct list_head *prefs) | ||
394 | { | ||
395 | int ret = 0; | ||
396 | int slot; | ||
397 | struct extent_buffer *leaf; | ||
398 | struct btrfs_key key; | ||
399 | unsigned long ptr; | ||
400 | unsigned long end; | ||
401 | struct btrfs_extent_item *ei; | ||
402 | u64 flags; | ||
403 | u64 item_size; | ||
404 | |||
405 | /* | ||
406 | * enumerate all inline refs | ||
407 | */ | ||
408 | leaf = path->nodes[0]; | ||
409 | slot = path->slots[0] - 1; | ||
410 | |||
411 | item_size = btrfs_item_size_nr(leaf, slot); | ||
412 | BUG_ON(item_size < sizeof(*ei)); | ||
413 | |||
414 | ei = btrfs_item_ptr(leaf, slot, struct btrfs_extent_item); | ||
415 | flags = btrfs_extent_flags(leaf, ei); | ||
416 | |||
417 | ptr = (unsigned long)(ei + 1); | ||
418 | end = (unsigned long)ei + item_size; | ||
419 | |||
420 | if (flags & BTRFS_EXTENT_FLAG_TREE_BLOCK) { | ||
421 | struct btrfs_tree_block_info *info; | ||
422 | struct btrfs_disk_key disk_key; | ||
423 | |||
424 | info = (struct btrfs_tree_block_info *)ptr; | ||
425 | *info_level = btrfs_tree_block_level(leaf, info); | ||
426 | btrfs_tree_block_key(leaf, info, &disk_key); | ||
427 | btrfs_disk_key_to_cpu(info_key, &disk_key); | ||
428 | ptr += sizeof(struct btrfs_tree_block_info); | ||
429 | BUG_ON(ptr > end); | ||
430 | } else { | ||
431 | BUG_ON(!(flags & BTRFS_EXTENT_FLAG_DATA)); | ||
432 | } | ||
433 | |||
434 | while (ptr < end) { | ||
435 | struct btrfs_extent_inline_ref *iref; | ||
436 | u64 offset; | ||
437 | int type; | ||
438 | |||
439 | iref = (struct btrfs_extent_inline_ref *)ptr; | ||
440 | type = btrfs_extent_inline_ref_type(leaf, iref); | ||
441 | offset = btrfs_extent_inline_ref_offset(leaf, iref); | ||
442 | |||
443 | switch (type) { | ||
444 | case BTRFS_SHARED_BLOCK_REF_KEY: | ||
445 | ret = __add_prelim_ref(prefs, 0, info_key, | ||
446 | *info_level + 1, offset, | ||
447 | bytenr, 1); | ||
448 | break; | ||
449 | case BTRFS_SHARED_DATA_REF_KEY: { | ||
450 | struct btrfs_shared_data_ref *sdref; | ||
451 | int count; | ||
452 | |||
453 | sdref = (struct btrfs_shared_data_ref *)(iref + 1); | ||
454 | count = btrfs_shared_data_ref_count(leaf, sdref); | ||
455 | ret = __add_prelim_ref(prefs, 0, NULL, 0, offset, | ||
456 | bytenr, count); | ||
457 | break; | ||
458 | } | ||
459 | case BTRFS_TREE_BLOCK_REF_KEY: | ||
460 | ret = __add_prelim_ref(prefs, offset, info_key, | ||
461 | *info_level + 1, 0, bytenr, 1); | ||
462 | break; | ||
463 | case BTRFS_EXTENT_DATA_REF_KEY: { | ||
464 | struct btrfs_extent_data_ref *dref; | ||
465 | int count; | ||
466 | u64 root; | ||
467 | |||
468 | dref = (struct btrfs_extent_data_ref *)(&iref->offset); | ||
469 | count = btrfs_extent_data_ref_count(leaf, dref); | ||
470 | key.objectid = btrfs_extent_data_ref_objectid(leaf, | ||
471 | dref); | ||
472 | key.type = BTRFS_EXTENT_DATA_KEY; | ||
473 | key.offset = btrfs_extent_data_ref_offset(leaf, dref); | ||
474 | root = btrfs_extent_data_ref_root(leaf, dref); | ||
475 | ret = __add_prelim_ref(prefs, root, &key, 0, 0, bytenr, | ||
476 | count); | ||
477 | break; | ||
478 | } | ||
479 | default: | ||
480 | WARN_ON(1); | ||
481 | } | ||
482 | BUG_ON(ret); | ||
483 | ptr += btrfs_extent_inline_ref_size(type); | ||
484 | } | ||
485 | |||
486 | return 0; | ||
487 | } | ||
488 | |||
489 | /* | ||
490 | * add all non-inline backrefs for bytenr to the list | ||
491 | */ | ||
492 | static int __add_keyed_refs(struct btrfs_fs_info *fs_info, | ||
493 | struct btrfs_path *path, u64 bytenr, | ||
494 | struct btrfs_key *info_key, int info_level, | ||
495 | struct list_head *prefs) | ||
496 | { | ||
497 | struct btrfs_root *extent_root = fs_info->extent_root; | ||
498 | int ret; | ||
499 | int slot; | ||
500 | struct extent_buffer *leaf; | ||
501 | struct btrfs_key key; | ||
502 | |||
503 | while (1) { | ||
504 | ret = btrfs_next_item(extent_root, path); | ||
505 | if (ret < 0) | ||
506 | break; | ||
507 | if (ret) { | ||
508 | ret = 0; | ||
509 | break; | ||
510 | } | ||
511 | |||
512 | slot = path->slots[0]; | ||
513 | leaf = path->nodes[0]; | ||
514 | btrfs_item_key_to_cpu(leaf, &key, slot); | ||
515 | |||
516 | if (key.objectid != bytenr) | ||
517 | break; | ||
518 | if (key.type < BTRFS_TREE_BLOCK_REF_KEY) | ||
519 | continue; | ||
520 | if (key.type > BTRFS_SHARED_DATA_REF_KEY) | ||
521 | break; | ||
522 | |||
523 | switch (key.type) { | ||
524 | case BTRFS_SHARED_BLOCK_REF_KEY: | ||
525 | ret = __add_prelim_ref(prefs, 0, info_key, | ||
526 | info_level + 1, key.offset, | ||
527 | bytenr, 1); | ||
528 | break; | ||
529 | case BTRFS_SHARED_DATA_REF_KEY: { | ||
530 | struct btrfs_shared_data_ref *sdref; | ||
531 | int count; | ||
532 | |||
533 | sdref = btrfs_item_ptr(leaf, slot, | ||
534 | struct btrfs_shared_data_ref); | ||
535 | count = btrfs_shared_data_ref_count(leaf, sdref); | ||
536 | ret = __add_prelim_ref(prefs, 0, NULL, 0, key.offset, | ||
537 | bytenr, count); | ||
538 | break; | ||
539 | } | ||
540 | case BTRFS_TREE_BLOCK_REF_KEY: | ||
541 | ret = __add_prelim_ref(prefs, key.offset, info_key, | ||
542 | info_level + 1, 0, bytenr, 1); | ||
543 | break; | ||
544 | case BTRFS_EXTENT_DATA_REF_KEY: { | ||
545 | struct btrfs_extent_data_ref *dref; | ||
546 | int count; | ||
547 | u64 root; | ||
548 | |||
549 | dref = btrfs_item_ptr(leaf, slot, | ||
550 | struct btrfs_extent_data_ref); | ||
551 | count = btrfs_extent_data_ref_count(leaf, dref); | ||
552 | key.objectid = btrfs_extent_data_ref_objectid(leaf, | ||
553 | dref); | ||
554 | key.type = BTRFS_EXTENT_DATA_KEY; | ||
555 | key.offset = btrfs_extent_data_ref_offset(leaf, dref); | ||
556 | root = btrfs_extent_data_ref_root(leaf, dref); | ||
557 | ret = __add_prelim_ref(prefs, root, &key, 0, 0, | ||
558 | bytenr, count); | ||
559 | break; | ||
560 | } | ||
561 | default: | ||
562 | WARN_ON(1); | ||
563 | } | ||
564 | BUG_ON(ret); | ||
565 | } | ||
566 | |||
567 | return ret; | ||
568 | } | ||
569 | |||
570 | /* | ||
571 | * this adds all existing backrefs (inline backrefs, backrefs and delayed | ||
572 | * refs) for the given bytenr to the refs list, merges duplicates and resolves | ||
573 | * indirect refs to their parent bytenr. | ||
574 | * When roots are found, they're added to the roots list | ||
575 | * | ||
576 | * FIXME some caching might speed things up | ||
577 | */ | ||
578 | static int find_parent_nodes(struct btrfs_trans_handle *trans, | ||
579 | struct btrfs_fs_info *fs_info, u64 bytenr, | ||
580 | u64 seq, struct ulist *refs, struct ulist *roots) | ||
581 | { | ||
582 | struct btrfs_key key; | ||
583 | struct btrfs_path *path; | ||
584 | struct btrfs_key info_key = { 0 }; | ||
585 | struct btrfs_delayed_ref_root *delayed_refs = NULL; | ||
586 | struct btrfs_delayed_ref_head *head = NULL; | ||
587 | int info_level = 0; | ||
588 | int ret; | ||
589 | struct list_head prefs_delayed; | ||
590 | struct list_head prefs; | ||
591 | struct __prelim_ref *ref; | ||
592 | |||
593 | INIT_LIST_HEAD(&prefs); | ||
594 | INIT_LIST_HEAD(&prefs_delayed); | ||
595 | |||
596 | key.objectid = bytenr; | ||
597 | key.type = BTRFS_EXTENT_ITEM_KEY; | ||
598 | key.offset = (u64)-1; | ||
599 | |||
600 | path = btrfs_alloc_path(); | ||
601 | if (!path) | ||
602 | return -ENOMEM; | ||
603 | |||
604 | /* | ||
605 | * grab both a lock on the path and a lock on the delayed ref head. | ||
606 | * We need both to get a consistent picture of how the refs look | ||
607 | * at a specified point in time | ||
608 | */ | ||
609 | again: | ||
610 | ret = btrfs_search_slot(trans, fs_info->extent_root, &key, path, 0, 0); | ||
611 | if (ret < 0) | ||
612 | goto out; | ||
613 | BUG_ON(ret == 0); | ||
614 | |||
615 | /* | ||
616 | * look if there are updates for this ref queued and lock the head | ||
617 | */ | ||
618 | delayed_refs = &trans->transaction->delayed_refs; | ||
619 | spin_lock(&delayed_refs->lock); | ||
620 | head = btrfs_find_delayed_ref_head(trans, bytenr); | ||
621 | if (head) { | ||
622 | if (!mutex_trylock(&head->mutex)) { | ||
623 | atomic_inc(&head->node.refs); | ||
624 | spin_unlock(&delayed_refs->lock); | ||
625 | |||
626 | btrfs_release_path(path); | ||
627 | |||
628 | /* | ||
629 | * Mutex was contended, block until it's | ||
630 | * released and try again | ||
631 | */ | ||
632 | mutex_lock(&head->mutex); | ||
633 | mutex_unlock(&head->mutex); | ||
634 | btrfs_put_delayed_ref(&head->node); | ||
635 | goto again; | ||
636 | } | ||
637 | ret = __add_delayed_refs(head, seq, &info_key, &prefs_delayed); | ||
638 | if (ret) | ||
639 | goto out; | ||
640 | } | ||
641 | spin_unlock(&delayed_refs->lock); | ||
642 | |||
643 | if (path->slots[0]) { | ||
644 | struct extent_buffer *leaf; | ||
645 | int slot; | ||
646 | |||
647 | leaf = path->nodes[0]; | ||
648 | slot = path->slots[0] - 1; | ||
649 | btrfs_item_key_to_cpu(leaf, &key, slot); | ||
650 | if (key.objectid == bytenr && | ||
651 | key.type == BTRFS_EXTENT_ITEM_KEY) { | ||
652 | ret = __add_inline_refs(fs_info, path, bytenr, | ||
653 | &info_key, &info_level, &prefs); | ||
654 | if (ret) | ||
655 | goto out; | ||
656 | ret = __add_keyed_refs(fs_info, path, bytenr, &info_key, | ||
657 | info_level, &prefs); | ||
658 | if (ret) | ||
659 | goto out; | ||
660 | } | ||
661 | } | ||
662 | btrfs_release_path(path); | ||
663 | |||
664 | /* | ||
665 | * when adding the delayed refs above, the info_key might not have | ||
666 | * been known yet. Go over the list and replace the missing keys | ||
667 | */ | ||
668 | list_for_each_entry(ref, &prefs_delayed, list) { | ||
669 | if ((ref->key.offset | ref->key.type | ref->key.objectid) == 0) | ||
670 | memcpy(&ref->key, &info_key, sizeof(ref->key)); | ||
671 | } | ||
672 | list_splice_init(&prefs_delayed, &prefs); | ||
673 | |||
674 | ret = __merge_refs(&prefs, 1); | ||
675 | if (ret) | ||
676 | goto out; | ||
677 | |||
678 | ret = __resolve_indirect_refs(fs_info, &prefs); | ||
679 | if (ret) | ||
680 | goto out; | ||
681 | |||
682 | ret = __merge_refs(&prefs, 2); | ||
683 | if (ret) | ||
684 | goto out; | ||
685 | |||
686 | while (!list_empty(&prefs)) { | ||
687 | ref = list_first_entry(&prefs, struct __prelim_ref, list); | ||
688 | list_del(&ref->list); | ||
689 | if (ref->count < 0) | ||
690 | WARN_ON(1); | ||
691 | if (ref->count && ref->root_id && ref->parent == 0) { | ||
692 | /* no parent == root of tree */ | ||
693 | ret = ulist_add(roots, ref->root_id, 0, GFP_NOFS); | ||
694 | BUG_ON(ret < 0); | ||
695 | } | ||
696 | if (ref->count && ref->parent) { | ||
697 | ret = ulist_add(refs, ref->parent, 0, GFP_NOFS); | ||
698 | BUG_ON(ret < 0); | ||
699 | } | ||
700 | kfree(ref); | ||
701 | } | ||
702 | |||
703 | out: | ||
704 | if (head) | ||
705 | mutex_unlock(&head->mutex); | ||
706 | btrfs_free_path(path); | ||
707 | while (!list_empty(&prefs)) { | ||
708 | ref = list_first_entry(&prefs, struct __prelim_ref, list); | ||
709 | list_del(&ref->list); | ||
710 | kfree(ref); | ||
711 | } | ||
712 | while (!list_empty(&prefs_delayed)) { | ||
713 | ref = list_first_entry(&prefs_delayed, struct __prelim_ref, | ||
714 | list); | ||
715 | list_del(&ref->list); | ||
716 | kfree(ref); | ||
717 | } | ||
718 | |||
719 | return ret; | ||
720 | } | ||
721 | |||
722 | /* | ||
723 | * Finds all leafs with a reference to the specified combination of bytenr and | ||
724 | * offset. key_list_head will point to a list of corresponding keys (caller must | ||
725 | * free each list element). The leafs will be stored in the leafs ulist, which | ||
726 | * must be freed with ulist_free. | ||
727 | * | ||
728 | * returns 0 on success, <0 on error | ||
729 | */ | ||
730 | static int btrfs_find_all_leafs(struct btrfs_trans_handle *trans, | ||
731 | struct btrfs_fs_info *fs_info, u64 bytenr, | ||
732 | u64 num_bytes, u64 seq, struct ulist **leafs) | ||
733 | { | ||
734 | struct ulist *tmp; | ||
735 | int ret; | ||
736 | |||
737 | tmp = ulist_alloc(GFP_NOFS); | ||
738 | if (!tmp) | ||
739 | return -ENOMEM; | ||
740 | *leafs = ulist_alloc(GFP_NOFS); | ||
741 | if (!*leafs) { | ||
742 | ulist_free(tmp); | ||
743 | return -ENOMEM; | ||
744 | } | ||
745 | |||
746 | ret = find_parent_nodes(trans, fs_info, bytenr, seq, *leafs, tmp); | ||
747 | ulist_free(tmp); | ||
748 | |||
749 | if (ret < 0 && ret != -ENOENT) { | ||
750 | ulist_free(*leafs); | ||
751 | return ret; | ||
752 | } | ||
753 | |||
754 | return 0; | ||
755 | } | ||
756 | |||
757 | /* | ||
758 | * walk all backrefs for a given extent to find all roots that reference this | ||
759 | * extent. Walking a backref means finding all extents that reference this | ||
760 | * extent and in turn walk the backrefs of those, too. Naturally this is a | ||
761 | * recursive process, but here it is implemented in an iterative fashion: We | ||
762 | * find all referencing extents for the extent in question and put them on a | ||
763 | * list. In turn, we find all referencing extents for those, further appending | ||
764 | * to the list. The way we iterate the list allows adding more elements after | ||
765 | * the current while iterating. The process stops when we reach the end of the | ||
766 | * list. Found roots are added to the roots list. | ||
767 | * | ||
768 | * returns 0 on success, < 0 on error. | ||
769 | */ | ||
770 | int btrfs_find_all_roots(struct btrfs_trans_handle *trans, | ||
771 | struct btrfs_fs_info *fs_info, u64 bytenr, | ||
772 | u64 num_bytes, u64 seq, struct ulist **roots) | ||
773 | { | ||
774 | struct ulist *tmp; | ||
775 | struct ulist_node *node = NULL; | ||
776 | int ret; | ||
777 | |||
778 | tmp = ulist_alloc(GFP_NOFS); | ||
779 | if (!tmp) | ||
780 | return -ENOMEM; | ||
781 | *roots = ulist_alloc(GFP_NOFS); | ||
782 | if (!*roots) { | ||
783 | ulist_free(tmp); | ||
784 | return -ENOMEM; | ||
785 | } | ||
786 | |||
787 | while (1) { | ||
788 | ret = find_parent_nodes(trans, fs_info, bytenr, seq, | ||
789 | tmp, *roots); | ||
790 | if (ret < 0 && ret != -ENOENT) { | ||
791 | ulist_free(tmp); | ||
792 | ulist_free(*roots); | ||
793 | return ret; | ||
794 | } | ||
795 | node = ulist_next(tmp, node); | ||
796 | if (!node) | ||
797 | break; | ||
798 | bytenr = node->val; | ||
799 | } | ||
800 | |||
801 | ulist_free(tmp); | ||
802 | return 0; | ||
803 | } | ||
804 | |||
34 | 805 | ||
35 | static int __inode_info(u64 inum, u64 ioff, u8 key_type, | 806 | static int __inode_info(u64 inum, u64 ioff, u8 key_type, |
36 | struct btrfs_root *fs_root, struct btrfs_path *path, | 807 | struct btrfs_root *fs_root, struct btrfs_path *path, |
@@ -181,8 +952,11 @@ int extent_from_logical(struct btrfs_fs_info *fs_info, u64 logical, | |||
181 | btrfs_item_key_to_cpu(path->nodes[0], found_key, path->slots[0]); | 952 | btrfs_item_key_to_cpu(path->nodes[0], found_key, path->slots[0]); |
182 | if (found_key->type != BTRFS_EXTENT_ITEM_KEY || | 953 | if (found_key->type != BTRFS_EXTENT_ITEM_KEY || |
183 | found_key->objectid > logical || | 954 | found_key->objectid > logical || |
184 | found_key->objectid + found_key->offset <= logical) | 955 | found_key->objectid + found_key->offset <= logical) { |
956 | pr_debug("logical %llu is not within any extent\n", | ||
957 | (unsigned long long)logical); | ||
185 | return -ENOENT; | 958 | return -ENOENT; |
959 | } | ||
186 | 960 | ||
187 | eb = path->nodes[0]; | 961 | eb = path->nodes[0]; |
188 | item_size = btrfs_item_size_nr(eb, path->slots[0]); | 962 | item_size = btrfs_item_size_nr(eb, path->slots[0]); |
@@ -191,6 +965,13 @@ int extent_from_logical(struct btrfs_fs_info *fs_info, u64 logical, | |||
191 | ei = btrfs_item_ptr(eb, path->slots[0], struct btrfs_extent_item); | 965 | ei = btrfs_item_ptr(eb, path->slots[0], struct btrfs_extent_item); |
192 | flags = btrfs_extent_flags(eb, ei); | 966 | flags = btrfs_extent_flags(eb, ei); |
193 | 967 | ||
968 | pr_debug("logical %llu is at position %llu within the extent (%llu " | ||
969 | "EXTENT_ITEM %llu) flags %#llx size %u\n", | ||
970 | (unsigned long long)logical, | ||
971 | (unsigned long long)(logical - found_key->objectid), | ||
972 | (unsigned long long)found_key->objectid, | ||
973 | (unsigned long long)found_key->offset, | ||
974 | (unsigned long long)flags, item_size); | ||
194 | if (flags & BTRFS_EXTENT_FLAG_TREE_BLOCK) | 975 | if (flags & BTRFS_EXTENT_FLAG_TREE_BLOCK) |
195 | return BTRFS_EXTENT_FLAG_TREE_BLOCK; | 976 | return BTRFS_EXTENT_FLAG_TREE_BLOCK; |
196 | if (flags & BTRFS_EXTENT_FLAG_DATA) | 977 | if (flags & BTRFS_EXTENT_FLAG_DATA) |
@@ -287,128 +1068,11 @@ int tree_backref_for_extent(unsigned long *ptr, struct extent_buffer *eb, | |||
287 | return 0; | 1068 | return 0; |
288 | } | 1069 | } |
289 | 1070 | ||
290 | static int __data_list_add(struct list_head *head, u64 inum, | 1071 | static int iterate_leaf_refs(struct btrfs_fs_info *fs_info, |
291 | u64 extent_data_item_offset, u64 root) | 1072 | struct btrfs_path *path, u64 logical, |
292 | { | 1073 | u64 orig_extent_item_objectid, |
293 | struct __data_ref *ref; | 1074 | u64 extent_item_pos, u64 root, |
294 | 1075 | iterate_extent_inodes_t *iterate, void *ctx) | |
295 | ref = kmalloc(sizeof(*ref), GFP_NOFS); | ||
296 | if (!ref) | ||
297 | return -ENOMEM; | ||
298 | |||
299 | ref->inum = inum; | ||
300 | ref->extent_data_item_offset = extent_data_item_offset; | ||
301 | ref->root = root; | ||
302 | list_add_tail(&ref->list, head); | ||
303 | |||
304 | return 0; | ||
305 | } | ||
306 | |||
307 | static int __data_list_add_eb(struct list_head *head, struct extent_buffer *eb, | ||
308 | struct btrfs_extent_data_ref *dref) | ||
309 | { | ||
310 | return __data_list_add(head, btrfs_extent_data_ref_objectid(eb, dref), | ||
311 | btrfs_extent_data_ref_offset(eb, dref), | ||
312 | btrfs_extent_data_ref_root(eb, dref)); | ||
313 | } | ||
314 | |||
315 | static int __shared_list_add(struct list_head *head, u64 disk_byte) | ||
316 | { | ||
317 | struct __shared_ref *ref; | ||
318 | |||
319 | ref = kmalloc(sizeof(*ref), GFP_NOFS); | ||
320 | if (!ref) | ||
321 | return -ENOMEM; | ||
322 | |||
323 | ref->disk_byte = disk_byte; | ||
324 | list_add_tail(&ref->list, head); | ||
325 | |||
326 | return 0; | ||
327 | } | ||
328 | |||
329 | static int __iter_shared_inline_ref_inodes(struct btrfs_fs_info *fs_info, | ||
330 | u64 logical, u64 inum, | ||
331 | u64 extent_data_item_offset, | ||
332 | u64 extent_offset, | ||
333 | struct btrfs_path *path, | ||
334 | struct list_head *data_refs, | ||
335 | iterate_extent_inodes_t *iterate, | ||
336 | void *ctx) | ||
337 | { | ||
338 | u64 ref_root; | ||
339 | u32 item_size; | ||
340 | struct btrfs_key key; | ||
341 | struct extent_buffer *eb; | ||
342 | struct btrfs_extent_item *ei; | ||
343 | struct btrfs_extent_inline_ref *eiref; | ||
344 | struct __data_ref *ref; | ||
345 | int ret; | ||
346 | int type; | ||
347 | int last; | ||
348 | unsigned long ptr = 0; | ||
349 | |||
350 | WARN_ON(!list_empty(data_refs)); | ||
351 | ret = extent_from_logical(fs_info, logical, path, &key); | ||
352 | if (ret & BTRFS_EXTENT_FLAG_DATA) | ||
353 | ret = -EIO; | ||
354 | if (ret < 0) | ||
355 | goto out; | ||
356 | |||
357 | eb = path->nodes[0]; | ||
358 | ei = btrfs_item_ptr(eb, path->slots[0], struct btrfs_extent_item); | ||
359 | item_size = btrfs_item_size_nr(eb, path->slots[0]); | ||
360 | |||
361 | ret = 0; | ||
362 | ref_root = 0; | ||
363 | /* | ||
364 | * as done in iterate_extent_inodes, we first build a list of refs to | ||
365 | * iterate, then free the path and then iterate them to avoid deadlocks. | ||
366 | */ | ||
367 | do { | ||
368 | last = __get_extent_inline_ref(&ptr, eb, ei, item_size, | ||
369 | &eiref, &type); | ||
370 | if (last < 0) { | ||
371 | ret = last; | ||
372 | goto out; | ||
373 | } | ||
374 | if (type == BTRFS_TREE_BLOCK_REF_KEY || | ||
375 | type == BTRFS_SHARED_BLOCK_REF_KEY) { | ||
376 | ref_root = btrfs_extent_inline_ref_offset(eb, eiref); | ||
377 | ret = __data_list_add(data_refs, inum, | ||
378 | extent_data_item_offset, | ||
379 | ref_root); | ||
380 | } | ||
381 | } while (!ret && !last); | ||
382 | |||
383 | btrfs_release_path(path); | ||
384 | |||
385 | if (ref_root == 0) { | ||
386 | printk(KERN_ERR "btrfs: failed to find tree block ref " | ||
387 | "for shared data backref %llu\n", logical); | ||
388 | WARN_ON(1); | ||
389 | ret = -EIO; | ||
390 | } | ||
391 | |||
392 | out: | ||
393 | while (!list_empty(data_refs)) { | ||
394 | ref = list_first_entry(data_refs, struct __data_ref, list); | ||
395 | list_del(&ref->list); | ||
396 | if (!ret) | ||
397 | ret = iterate(ref->inum, extent_offset + | ||
398 | ref->extent_data_item_offset, | ||
399 | ref->root, ctx); | ||
400 | kfree(ref); | ||
401 | } | ||
402 | |||
403 | return ret; | ||
404 | } | ||
405 | |||
406 | static int __iter_shared_inline_ref(struct btrfs_fs_info *fs_info, | ||
407 | u64 logical, u64 orig_extent_item_objectid, | ||
408 | u64 extent_offset, struct btrfs_path *path, | ||
409 | struct list_head *data_refs, | ||
410 | iterate_extent_inodes_t *iterate, | ||
411 | void *ctx) | ||
412 | { | 1076 | { |
413 | u64 disk_byte; | 1077 | u64 disk_byte; |
414 | struct btrfs_key key; | 1078 | struct btrfs_key key; |
@@ -416,8 +1080,10 @@ static int __iter_shared_inline_ref(struct btrfs_fs_info *fs_info, | |||
416 | struct extent_buffer *eb; | 1080 | struct extent_buffer *eb; |
417 | int slot; | 1081 | int slot; |
418 | int nritems; | 1082 | int nritems; |
419 | int ret; | 1083 | int ret = 0; |
420 | int found = 0; | 1084 | int extent_type; |
1085 | u64 data_offset; | ||
1086 | u64 data_len; | ||
421 | 1087 | ||
422 | eb = read_tree_block(fs_info->tree_root, logical, | 1088 | eb = read_tree_block(fs_info->tree_root, logical, |
423 | fs_info->tree_root->leafsize, 0); | 1089 | fs_info->tree_root->leafsize, 0); |
@@ -435,149 +1101,99 @@ static int __iter_shared_inline_ref(struct btrfs_fs_info *fs_info, | |||
435 | if (key.type != BTRFS_EXTENT_DATA_KEY) | 1101 | if (key.type != BTRFS_EXTENT_DATA_KEY) |
436 | continue; | 1102 | continue; |
437 | fi = btrfs_item_ptr(eb, slot, struct btrfs_file_extent_item); | 1103 | fi = btrfs_item_ptr(eb, slot, struct btrfs_file_extent_item); |
438 | if (!fi) { | 1104 | extent_type = btrfs_file_extent_type(eb, fi); |
439 | free_extent_buffer(eb); | 1105 | if (extent_type == BTRFS_FILE_EXTENT_INLINE) |
440 | return -EIO; | 1106 | continue; |
441 | } | 1107 | /* don't skip BTRFS_FILE_EXTENT_PREALLOC, we can handle that */ |
442 | disk_byte = btrfs_file_extent_disk_bytenr(eb, fi); | 1108 | disk_byte = btrfs_file_extent_disk_bytenr(eb, fi); |
443 | if (disk_byte != orig_extent_item_objectid) { | 1109 | if (disk_byte != orig_extent_item_objectid) |
444 | if (found) | 1110 | continue; |
445 | break; | ||
446 | else | ||
447 | continue; | ||
448 | } | ||
449 | ++found; | ||
450 | ret = __iter_shared_inline_ref_inodes(fs_info, logical, | ||
451 | key.objectid, | ||
452 | key.offset, | ||
453 | extent_offset, path, | ||
454 | data_refs, | ||
455 | iterate, ctx); | ||
456 | if (ret) | ||
457 | break; | ||
458 | } | ||
459 | 1111 | ||
460 | if (!found) { | 1112 | data_offset = btrfs_file_extent_offset(eb, fi); |
461 | printk(KERN_ERR "btrfs: failed to follow shared data backref " | 1113 | data_len = btrfs_file_extent_num_bytes(eb, fi); |
462 | "to parent %llu\n", logical); | 1114 | |
463 | WARN_ON(1); | 1115 | if (extent_item_pos < data_offset || |
464 | ret = -EIO; | 1116 | extent_item_pos >= data_offset + data_len) |
1117 | continue; | ||
1118 | |||
1119 | pr_debug("ref for %llu resolved, key (%llu EXTEND_DATA %llu), " | ||
1120 | "root %llu\n", orig_extent_item_objectid, | ||
1121 | key.objectid, key.offset, root); | ||
1122 | ret = iterate(key.objectid, | ||
1123 | key.offset + (extent_item_pos - data_offset), | ||
1124 | root, ctx); | ||
1125 | if (ret) { | ||
1126 | pr_debug("stopping iteration because ret=%d\n", ret); | ||
1127 | break; | ||
1128 | } | ||
465 | } | 1129 | } |
466 | 1130 | ||
467 | free_extent_buffer(eb); | 1131 | free_extent_buffer(eb); |
1132 | |||
468 | return ret; | 1133 | return ret; |
469 | } | 1134 | } |
470 | 1135 | ||
471 | /* | 1136 | /* |
472 | * calls iterate() for every inode that references the extent identified by | 1137 | * calls iterate() for every inode that references the extent identified by |
473 | * the given parameters. will use the path given as a parameter and return it | 1138 | * the given parameters. |
474 | * released. | ||
475 | * when the iterator function returns a non-zero value, iteration stops. | 1139 | * when the iterator function returns a non-zero value, iteration stops. |
1140 | * path is guaranteed to be in released state when iterate() is called. | ||
476 | */ | 1141 | */ |
477 | int iterate_extent_inodes(struct btrfs_fs_info *fs_info, | 1142 | int iterate_extent_inodes(struct btrfs_fs_info *fs_info, |
478 | struct btrfs_path *path, | 1143 | struct btrfs_path *path, |
479 | u64 extent_item_objectid, | 1144 | u64 extent_item_objectid, u64 extent_item_pos, |
480 | u64 extent_offset, | ||
481 | iterate_extent_inodes_t *iterate, void *ctx) | 1145 | iterate_extent_inodes_t *iterate, void *ctx) |
482 | { | 1146 | { |
483 | unsigned long ptr = 0; | ||
484 | int last; | ||
485 | int ret; | 1147 | int ret; |
486 | int type; | ||
487 | u64 logical; | ||
488 | u32 item_size; | ||
489 | struct btrfs_extent_inline_ref *eiref; | ||
490 | struct btrfs_extent_data_ref *dref; | ||
491 | struct extent_buffer *eb; | ||
492 | struct btrfs_extent_item *ei; | ||
493 | struct btrfs_key key; | ||
494 | struct list_head data_refs = LIST_HEAD_INIT(data_refs); | 1148 | struct list_head data_refs = LIST_HEAD_INIT(data_refs); |
495 | struct list_head shared_refs = LIST_HEAD_INIT(shared_refs); | 1149 | struct list_head shared_refs = LIST_HEAD_INIT(shared_refs); |
496 | struct __data_ref *ref_d; | 1150 | struct btrfs_trans_handle *trans; |
497 | struct __shared_ref *ref_s; | 1151 | struct ulist *refs; |
1152 | struct ulist *roots; | ||
1153 | struct ulist_node *ref_node = NULL; | ||
1154 | struct ulist_node *root_node = NULL; | ||
1155 | struct seq_list seq_elem; | ||
1156 | struct btrfs_delayed_ref_root *delayed_refs; | ||
1157 | |||
1158 | trans = btrfs_join_transaction(fs_info->extent_root); | ||
1159 | if (IS_ERR(trans)) | ||
1160 | return PTR_ERR(trans); | ||
1161 | |||
1162 | pr_debug("resolving all inodes for extent %llu\n", | ||
1163 | extent_item_objectid); | ||
1164 | |||
1165 | delayed_refs = &trans->transaction->delayed_refs; | ||
1166 | spin_lock(&delayed_refs->lock); | ||
1167 | btrfs_get_delayed_seq(delayed_refs, &seq_elem); | ||
1168 | spin_unlock(&delayed_refs->lock); | ||
1169 | |||
1170 | ret = btrfs_find_all_leafs(trans, fs_info, extent_item_objectid, | ||
1171 | extent_item_pos, seq_elem.seq, | ||
1172 | &refs); | ||
498 | 1173 | ||
499 | eb = path->nodes[0]; | 1174 | if (ret) |
500 | ei = btrfs_item_ptr(eb, path->slots[0], struct btrfs_extent_item); | 1175 | goto out; |
501 | item_size = btrfs_item_size_nr(eb, path->slots[0]); | ||
502 | |||
503 | /* first we iterate the inline refs, ... */ | ||
504 | do { | ||
505 | last = __get_extent_inline_ref(&ptr, eb, ei, item_size, | ||
506 | &eiref, &type); | ||
507 | if (last == -ENOENT) { | ||
508 | ret = 0; | ||
509 | break; | ||
510 | } | ||
511 | if (last < 0) { | ||
512 | ret = last; | ||
513 | break; | ||
514 | } | ||
515 | |||
516 | if (type == BTRFS_EXTENT_DATA_REF_KEY) { | ||
517 | dref = (struct btrfs_extent_data_ref *)(&eiref->offset); | ||
518 | ret = __data_list_add_eb(&data_refs, eb, dref); | ||
519 | } else if (type == BTRFS_SHARED_DATA_REF_KEY) { | ||
520 | logical = btrfs_extent_inline_ref_offset(eb, eiref); | ||
521 | ret = __shared_list_add(&shared_refs, logical); | ||
522 | } | ||
523 | } while (!ret && !last); | ||
524 | 1176 | ||
525 | /* ... then we proceed to in-tree references and ... */ | 1177 | while (!ret && (ref_node = ulist_next(refs, ref_node))) { |
526 | while (!ret) { | 1178 | ret = btrfs_find_all_roots(trans, fs_info, ref_node->val, -1, |
527 | ++path->slots[0]; | 1179 | seq_elem.seq, &roots); |
528 | if (path->slots[0] > btrfs_header_nritems(eb)) { | 1180 | if (ret) |
529 | ret = btrfs_next_leaf(fs_info->extent_root, path); | ||
530 | if (ret) { | ||
531 | if (ret == 1) | ||
532 | ret = 0; /* we're done */ | ||
533 | break; | ||
534 | } | ||
535 | eb = path->nodes[0]; | ||
536 | } | ||
537 | btrfs_item_key_to_cpu(eb, &key, path->slots[0]); | ||
538 | if (key.objectid != extent_item_objectid) | ||
539 | break; | 1181 | break; |
540 | if (key.type == BTRFS_EXTENT_DATA_REF_KEY) { | 1182 | while (!ret && (root_node = ulist_next(roots, root_node))) { |
541 | dref = btrfs_item_ptr(eb, path->slots[0], | 1183 | pr_debug("root %llu references leaf %llu\n", |
542 | struct btrfs_extent_data_ref); | 1184 | root_node->val, ref_node->val); |
543 | ret = __data_list_add_eb(&data_refs, eb, dref); | 1185 | ret = iterate_leaf_refs(fs_info, path, ref_node->val, |
544 | } else if (key.type == BTRFS_SHARED_DATA_REF_KEY) { | 1186 | extent_item_objectid, |
545 | ret = __shared_list_add(&shared_refs, key.offset); | 1187 | extent_item_pos, root_node->val, |
1188 | iterate, ctx); | ||
546 | } | 1189 | } |
547 | } | 1190 | } |
548 | 1191 | ||
549 | btrfs_release_path(path); | 1192 | ulist_free(refs); |
550 | 1193 | ulist_free(roots); | |
551 | /* | 1194 | out: |
552 | * ... only at the very end we can process the refs we found. this is | 1195 | btrfs_put_delayed_seq(delayed_refs, &seq_elem); |
553 | * because the iterator function we call is allowed to make tree lookups | 1196 | btrfs_end_transaction(trans, fs_info->extent_root); |
554 | * and we have to avoid deadlocks. additionally, we need more tree | ||
555 | * lookups ourselves for shared data refs. | ||
556 | */ | ||
557 | while (!list_empty(&data_refs)) { | ||
558 | ref_d = list_first_entry(&data_refs, struct __data_ref, list); | ||
559 | list_del(&ref_d->list); | ||
560 | if (!ret) | ||
561 | ret = iterate(ref_d->inum, extent_offset + | ||
562 | ref_d->extent_data_item_offset, | ||
563 | ref_d->root, ctx); | ||
564 | kfree(ref_d); | ||
565 | } | ||
566 | |||
567 | while (!list_empty(&shared_refs)) { | ||
568 | ref_s = list_first_entry(&shared_refs, struct __shared_ref, | ||
569 | list); | ||
570 | list_del(&ref_s->list); | ||
571 | if (!ret) | ||
572 | ret = __iter_shared_inline_ref(fs_info, | ||
573 | ref_s->disk_byte, | ||
574 | extent_item_objectid, | ||
575 | extent_offset, path, | ||
576 | &data_refs, | ||
577 | iterate, ctx); | ||
578 | kfree(ref_s); | ||
579 | } | ||
580 | |||
581 | return ret; | 1197 | return ret; |
582 | } | 1198 | } |
583 | 1199 | ||
@@ -586,19 +1202,20 @@ int iterate_inodes_from_logical(u64 logical, struct btrfs_fs_info *fs_info, | |||
586 | iterate_extent_inodes_t *iterate, void *ctx) | 1202 | iterate_extent_inodes_t *iterate, void *ctx) |
587 | { | 1203 | { |
588 | int ret; | 1204 | int ret; |
589 | u64 offset; | 1205 | u64 extent_item_pos; |
590 | struct btrfs_key found_key; | 1206 | struct btrfs_key found_key; |
591 | 1207 | ||
592 | ret = extent_from_logical(fs_info, logical, path, | 1208 | ret = extent_from_logical(fs_info, logical, path, |
593 | &found_key); | 1209 | &found_key); |
1210 | btrfs_release_path(path); | ||
594 | if (ret & BTRFS_EXTENT_FLAG_TREE_BLOCK) | 1211 | if (ret & BTRFS_EXTENT_FLAG_TREE_BLOCK) |
595 | ret = -EINVAL; | 1212 | ret = -EINVAL; |
596 | if (ret < 0) | 1213 | if (ret < 0) |
597 | return ret; | 1214 | return ret; |
598 | 1215 | ||
599 | offset = logical - found_key.objectid; | 1216 | extent_item_pos = logical - found_key.objectid; |
600 | ret = iterate_extent_inodes(fs_info, path, found_key.objectid, | 1217 | ret = iterate_extent_inodes(fs_info, path, found_key.objectid, |
601 | offset, iterate, ctx); | 1218 | extent_item_pos, iterate, ctx); |
602 | 1219 | ||
603 | return ret; | 1220 | return ret; |
604 | } | 1221 | } |
@@ -643,6 +1260,10 @@ static int iterate_irefs(u64 inum, struct btrfs_root *fs_root, | |||
643 | for (cur = 0; cur < btrfs_item_size(eb, item); cur += len) { | 1260 | for (cur = 0; cur < btrfs_item_size(eb, item); cur += len) { |
644 | name_len = btrfs_inode_ref_name_len(eb, iref); | 1261 | name_len = btrfs_inode_ref_name_len(eb, iref); |
645 | /* path must be released before calling iterate()! */ | 1262 | /* path must be released before calling iterate()! */ |
1263 | pr_debug("following ref at offset %u for inode %llu in " | ||
1264 | "tree %llu\n", cur, | ||
1265 | (unsigned long long)found_key.objectid, | ||
1266 | (unsigned long long)fs_root->objectid); | ||
646 | ret = iterate(parent, iref, eb, ctx); | 1267 | ret = iterate(parent, iref, eb, ctx); |
647 | if (ret) { | 1268 | if (ret) { |
648 | free_extent_buffer(eb); | 1269 | free_extent_buffer(eb); |
@@ -683,10 +1304,14 @@ static int inode_to_path(u64 inum, struct btrfs_inode_ref *iref, | |||
683 | return PTR_ERR(fspath); | 1304 | return PTR_ERR(fspath); |
684 | 1305 | ||
685 | if (fspath > fspath_min) { | 1306 | if (fspath > fspath_min) { |
1307 | pr_debug("path resolved: %s\n", fspath); | ||
686 | ipath->fspath->val[i] = (u64)(unsigned long)fspath; | 1308 | ipath->fspath->val[i] = (u64)(unsigned long)fspath; |
687 | ++ipath->fspath->elem_cnt; | 1309 | ++ipath->fspath->elem_cnt; |
688 | ipath->fspath->bytes_left = fspath - fspath_min; | 1310 | ipath->fspath->bytes_left = fspath - fspath_min; |
689 | } else { | 1311 | } else { |
1312 | pr_debug("missed path, not enough space. missing bytes: %lu, " | ||
1313 | "constructed so far: %s\n", | ||
1314 | (unsigned long)(fspath_min - fspath), fspath_min); | ||
690 | ++ipath->fspath->elem_missed; | 1315 | ++ipath->fspath->elem_missed; |
691 | ipath->fspath->bytes_missing += fspath_min - fspath; | 1316 | ipath->fspath->bytes_missing += fspath_min - fspath; |
692 | ipath->fspath->bytes_left = 0; | 1317 | ipath->fspath->bytes_left = 0; |
diff --git a/fs/btrfs/backref.h b/fs/btrfs/backref.h index 92618837cb8f..d00dfa9ca934 100644 --- a/fs/btrfs/backref.h +++ b/fs/btrfs/backref.h | |||
@@ -20,6 +20,7 @@ | |||
20 | #define __BTRFS_BACKREF__ | 20 | #define __BTRFS_BACKREF__ |
21 | 21 | ||
22 | #include "ioctl.h" | 22 | #include "ioctl.h" |
23 | #include "ulist.h" | ||
23 | 24 | ||
24 | struct inode_fs_paths { | 25 | struct inode_fs_paths { |
25 | struct btrfs_path *btrfs_path; | 26 | struct btrfs_path *btrfs_path; |
@@ -54,6 +55,10 @@ int iterate_inodes_from_logical(u64 logical, struct btrfs_fs_info *fs_info, | |||
54 | 55 | ||
55 | int paths_from_inode(u64 inum, struct inode_fs_paths *ipath); | 56 | int paths_from_inode(u64 inum, struct inode_fs_paths *ipath); |
56 | 57 | ||
58 | int btrfs_find_all_roots(struct btrfs_trans_handle *trans, | ||
59 | struct btrfs_fs_info *fs_info, u64 bytenr, | ||
60 | u64 num_bytes, u64 seq, struct ulist **roots); | ||
61 | |||
57 | struct btrfs_data_container *init_data_container(u32 total_bytes); | 62 | struct btrfs_data_container *init_data_container(u32 total_bytes); |
58 | struct inode_fs_paths *init_ipath(s32 total_bytes, struct btrfs_root *fs_root, | 63 | struct inode_fs_paths *init_ipath(s32 total_bytes, struct btrfs_root *fs_root, |
59 | struct btrfs_path *path); | 64 | struct btrfs_path *path); |
diff --git a/fs/btrfs/btrfs_inode.h b/fs/btrfs/btrfs_inode.h index 634608d2a6d0..9b9b15fd5204 100644 --- a/fs/btrfs/btrfs_inode.h +++ b/fs/btrfs/btrfs_inode.h | |||
@@ -51,6 +51,9 @@ struct btrfs_inode { | |||
51 | /* held while logging the inode in tree-log.c */ | 51 | /* held while logging the inode in tree-log.c */ |
52 | struct mutex log_mutex; | 52 | struct mutex log_mutex; |
53 | 53 | ||
54 | /* held while doing delalloc reservations */ | ||
55 | struct mutex delalloc_mutex; | ||
56 | |||
54 | /* used to order data wrt metadata */ | 57 | /* used to order data wrt metadata */ |
55 | struct btrfs_ordered_inode_tree ordered_tree; | 58 | struct btrfs_ordered_inode_tree ordered_tree; |
56 | 59 | ||
diff --git a/fs/btrfs/check-integrity.c b/fs/btrfs/check-integrity.c new file mode 100644 index 000000000000..b669a7d8e499 --- /dev/null +++ b/fs/btrfs/check-integrity.c | |||
@@ -0,0 +1,3069 @@ | |||
1 | /* | ||
2 | * Copyright (C) STRATO AG 2011. All rights reserved. | ||
3 | * | ||
4 | * This program is free software; you can redistribute it and/or | ||
5 | * modify it under the terms of the GNU General Public | ||
6 | * License v2 as published by the Free Software Foundation. | ||
7 | * | ||
8 | * This program is distributed in the hope that it will be useful, | ||
9 | * but WITHOUT ANY WARRANTY; without even the implied warranty of | ||
10 | * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU | ||
11 | * General Public License for more details. | ||
12 | * | ||
13 | * You should have received a copy of the GNU General Public | ||
14 | * License along with this program; if not, write to the | ||
15 | * Free Software Foundation, Inc., 59 Temple Place - Suite 330, | ||
16 | * Boston, MA 021110-1307, USA. | ||
17 | */ | ||
18 | |||
19 | /* | ||
20 | * This module can be used to catch cases when the btrfs kernel | ||
21 | * code executes write requests to the disk that bring the file | ||
22 | * system in an inconsistent state. In such a state, a power-loss | ||
23 | * or kernel panic event would cause that the data on disk is | ||
24 | * lost or at least damaged. | ||
25 | * | ||
26 | * Code is added that examines all block write requests during | ||
27 | * runtime (including writes of the super block). Three rules | ||
28 | * are verified and an error is printed on violation of the | ||
29 | * rules: | ||
30 | * 1. It is not allowed to write a disk block which is | ||
31 | * currently referenced by the super block (either directly | ||
32 | * or indirectly). | ||
33 | * 2. When a super block is written, it is verified that all | ||
34 | * referenced (directly or indirectly) blocks fulfill the | ||
35 | * following requirements: | ||
36 | * 2a. All referenced blocks have either been present when | ||
37 | * the file system was mounted, (i.e., they have been | ||
38 | * referenced by the super block) or they have been | ||
39 | * written since then and the write completion callback | ||
40 | * was called and a FLUSH request to the device where | ||
41 | * these blocks are located was received and completed. | ||
42 | * 2b. All referenced blocks need to have a generation | ||
43 | * number which is equal to the parent's number. | ||
44 | * | ||
45 | * One issue that was found using this module was that the log | ||
46 | * tree on disk became temporarily corrupted because disk blocks | ||
47 | * that had been in use for the log tree had been freed and | ||
48 | * reused too early, while being referenced by the written super | ||
49 | * block. | ||
50 | * | ||
51 | * The search term in the kernel log that can be used to filter | ||
52 | * on the existence of detected integrity issues is | ||
53 | * "btrfs: attempt". | ||
54 | * | ||
55 | * The integrity check is enabled via mount options. These | ||
56 | * mount options are only supported if the integrity check | ||
57 | * tool is compiled by defining BTRFS_FS_CHECK_INTEGRITY. | ||
58 | * | ||
59 | * Example #1, apply integrity checks to all metadata: | ||
60 | * mount /dev/sdb1 /mnt -o check_int | ||
61 | * | ||
62 | * Example #2, apply integrity checks to all metadata and | ||
63 | * to data extents: | ||
64 | * mount /dev/sdb1 /mnt -o check_int_data | ||
65 | * | ||
66 | * Example #3, apply integrity checks to all metadata and dump | ||
67 | * the tree that the super block references to kernel messages | ||
68 | * each time after a super block was written: | ||
69 | * mount /dev/sdb1 /mnt -o check_int,check_int_print_mask=263 | ||
70 | * | ||
71 | * If the integrity check tool is included and activated in | ||
72 | * the mount options, plenty of kernel memory is used, and | ||
73 | * plenty of additional CPU cycles are spent. Enabling this | ||
74 | * functionality is not intended for normal use. In most | ||
75 | * cases, unless you are a btrfs developer who needs to verify | ||
76 | * the integrity of (super)-block write requests, do not | ||
77 | * enable the config option BTRFS_FS_CHECK_INTEGRITY to | ||
78 | * include and compile the integrity check tool. | ||
79 | */ | ||
80 | |||
81 | #include <linux/sched.h> | ||
82 | #include <linux/slab.h> | ||
83 | #include <linux/buffer_head.h> | ||
84 | #include <linux/mutex.h> | ||
85 | #include <linux/crc32c.h> | ||
86 | #include <linux/genhd.h> | ||
87 | #include <linux/blkdev.h> | ||
88 | #include "ctree.h" | ||
89 | #include "disk-io.h" | ||
90 | #include "transaction.h" | ||
91 | #include "extent_io.h" | ||
92 | #include "disk-io.h" | ||
93 | #include "volumes.h" | ||
94 | #include "print-tree.h" | ||
95 | #include "locking.h" | ||
96 | #include "check-integrity.h" | ||
97 | |||
98 | #define BTRFSIC_BLOCK_HASHTABLE_SIZE 0x10000 | ||
99 | #define BTRFSIC_BLOCK_LINK_HASHTABLE_SIZE 0x10000 | ||
100 | #define BTRFSIC_DEV2STATE_HASHTABLE_SIZE 0x100 | ||
101 | #define BTRFSIC_BLOCK_MAGIC_NUMBER 0x14491051 | ||
102 | #define BTRFSIC_BLOCK_LINK_MAGIC_NUMBER 0x11070807 | ||
103 | #define BTRFSIC_DEV2STATE_MAGIC_NUMBER 0x20111530 | ||
104 | #define BTRFSIC_BLOCK_STACK_FRAME_MAGIC_NUMBER 20111300 | ||
105 | #define BTRFSIC_TREE_DUMP_MAX_INDENT_LEVEL (200 - 6) /* in characters, | ||
106 | * excluding " [...]" */ | ||
107 | #define BTRFSIC_BLOCK_SIZE PAGE_SIZE | ||
108 | |||
109 | #define BTRFSIC_GENERATION_UNKNOWN ((u64)-1) | ||
110 | |||
111 | /* | ||
112 | * The definition of the bitmask fields for the print_mask. | ||
113 | * They are specified with the mount option check_integrity_print_mask. | ||
114 | */ | ||
115 | #define BTRFSIC_PRINT_MASK_SUPERBLOCK_WRITE 0x00000001 | ||
116 | #define BTRFSIC_PRINT_MASK_ROOT_CHUNK_LOG_TREE_LOCATION 0x00000002 | ||
117 | #define BTRFSIC_PRINT_MASK_TREE_AFTER_SB_WRITE 0x00000004 | ||
118 | #define BTRFSIC_PRINT_MASK_TREE_BEFORE_SB_WRITE 0x00000008 | ||
119 | #define BTRFSIC_PRINT_MASK_SUBMIT_BIO_BH 0x00000010 | ||
120 | #define BTRFSIC_PRINT_MASK_END_IO_BIO_BH 0x00000020 | ||
121 | #define BTRFSIC_PRINT_MASK_VERBOSE 0x00000040 | ||
122 | #define BTRFSIC_PRINT_MASK_VERY_VERBOSE 0x00000080 | ||
123 | #define BTRFSIC_PRINT_MASK_INITIAL_TREE 0x00000100 | ||
124 | #define BTRFSIC_PRINT_MASK_INITIAL_ALL_TREES 0x00000200 | ||
125 | #define BTRFSIC_PRINT_MASK_INITIAL_DATABASE 0x00000400 | ||
126 | #define BTRFSIC_PRINT_MASK_NUM_COPIES 0x00000800 | ||
127 | #define BTRFSIC_PRINT_MASK_TREE_WITH_ALL_MIRRORS 0x00001000 | ||
128 | |||
129 | struct btrfsic_dev_state; | ||
130 | struct btrfsic_state; | ||
131 | |||
132 | struct btrfsic_block { | ||
133 | u32 magic_num; /* only used for debug purposes */ | ||
134 | unsigned int is_metadata:1; /* if it is meta-data, not data-data */ | ||
135 | unsigned int is_superblock:1; /* if it is one of the superblocks */ | ||
136 | unsigned int is_iodone:1; /* if is done by lower subsystem */ | ||
137 | unsigned int iodone_w_error:1; /* error was indicated to endio */ | ||
138 | unsigned int never_written:1; /* block was added because it was | ||
139 | * referenced, not because it was | ||
140 | * written */ | ||
141 | unsigned int mirror_num:2; /* large enough to hold | ||
142 | * BTRFS_SUPER_MIRROR_MAX */ | ||
143 | struct btrfsic_dev_state *dev_state; | ||
144 | u64 dev_bytenr; /* key, physical byte num on disk */ | ||
145 | u64 logical_bytenr; /* logical byte num on disk */ | ||
146 | u64 generation; | ||
147 | struct btrfs_disk_key disk_key; /* extra info to print in case of | ||
148 | * issues, will not always be correct */ | ||
149 | struct list_head collision_resolving_node; /* list node */ | ||
150 | struct list_head all_blocks_node; /* list node */ | ||
151 | |||
152 | /* the following two lists contain block_link items */ | ||
153 | struct list_head ref_to_list; /* list */ | ||
154 | struct list_head ref_from_list; /* list */ | ||
155 | struct btrfsic_block *next_in_same_bio; | ||
156 | void *orig_bio_bh_private; | ||
157 | union { | ||
158 | bio_end_io_t *bio; | ||
159 | bh_end_io_t *bh; | ||
160 | } orig_bio_bh_end_io; | ||
161 | int submit_bio_bh_rw; | ||
162 | u64 flush_gen; /* only valid if !never_written */ | ||
163 | }; | ||
164 | |||
165 | /* | ||
166 | * Elements of this type are allocated dynamically and required because | ||
167 | * each block object can refer to and can be ref from multiple blocks. | ||
168 | * The key to lookup them in the hashtable is the dev_bytenr of | ||
169 | * the block ref to plus the one from the block refered from. | ||
170 | * The fact that they are searchable via a hashtable and that a | ||
171 | * ref_cnt is maintained is not required for the btrfs integrity | ||
172 | * check algorithm itself, it is only used to make the output more | ||
173 | * beautiful in case that an error is detected (an error is defined | ||
174 | * as a write operation to a block while that block is still referenced). | ||
175 | */ | ||
176 | struct btrfsic_block_link { | ||
177 | u32 magic_num; /* only used for debug purposes */ | ||
178 | u32 ref_cnt; | ||
179 | struct list_head node_ref_to; /* list node */ | ||
180 | struct list_head node_ref_from; /* list node */ | ||
181 | struct list_head collision_resolving_node; /* list node */ | ||
182 | struct btrfsic_block *block_ref_to; | ||
183 | struct btrfsic_block *block_ref_from; | ||
184 | u64 parent_generation; | ||
185 | }; | ||
186 | |||
187 | struct btrfsic_dev_state { | ||
188 | u32 magic_num; /* only used for debug purposes */ | ||
189 | struct block_device *bdev; | ||
190 | struct btrfsic_state *state; | ||
191 | struct list_head collision_resolving_node; /* list node */ | ||
192 | struct btrfsic_block dummy_block_for_bio_bh_flush; | ||
193 | u64 last_flush_gen; | ||
194 | char name[BDEVNAME_SIZE]; | ||
195 | }; | ||
196 | |||
197 | struct btrfsic_block_hashtable { | ||
198 | struct list_head table[BTRFSIC_BLOCK_HASHTABLE_SIZE]; | ||
199 | }; | ||
200 | |||
201 | struct btrfsic_block_link_hashtable { | ||
202 | struct list_head table[BTRFSIC_BLOCK_LINK_HASHTABLE_SIZE]; | ||
203 | }; | ||
204 | |||
205 | struct btrfsic_dev_state_hashtable { | ||
206 | struct list_head table[BTRFSIC_DEV2STATE_HASHTABLE_SIZE]; | ||
207 | }; | ||
208 | |||
209 | struct btrfsic_block_data_ctx { | ||
210 | u64 start; /* virtual bytenr */ | ||
211 | u64 dev_bytenr; /* physical bytenr on device */ | ||
212 | u32 len; | ||
213 | struct btrfsic_dev_state *dev; | ||
214 | char *data; | ||
215 | struct buffer_head *bh; /* do not use if set to NULL */ | ||
216 | }; | ||
217 | |||
218 | /* This structure is used to implement recursion without occupying | ||
219 | * any stack space, refer to btrfsic_process_metablock() */ | ||
220 | struct btrfsic_stack_frame { | ||
221 | u32 magic; | ||
222 | u32 nr; | ||
223 | int error; | ||
224 | int i; | ||
225 | int limit_nesting; | ||
226 | int num_copies; | ||
227 | int mirror_num; | ||
228 | struct btrfsic_block *block; | ||
229 | struct btrfsic_block_data_ctx *block_ctx; | ||
230 | struct btrfsic_block *next_block; | ||
231 | struct btrfsic_block_data_ctx next_block_ctx; | ||
232 | struct btrfs_header *hdr; | ||
233 | struct btrfsic_stack_frame *prev; | ||
234 | }; | ||
235 | |||
236 | /* Some state per mounted filesystem */ | ||
237 | struct btrfsic_state { | ||
238 | u32 print_mask; | ||
239 | int include_extent_data; | ||
240 | int csum_size; | ||
241 | struct list_head all_blocks_list; | ||
242 | struct btrfsic_block_hashtable block_hashtable; | ||
243 | struct btrfsic_block_link_hashtable block_link_hashtable; | ||
244 | struct btrfs_root *root; | ||
245 | u64 max_superblock_generation; | ||
246 | struct btrfsic_block *latest_superblock; | ||
247 | }; | ||
248 | |||
249 | static void btrfsic_block_init(struct btrfsic_block *b); | ||
250 | static struct btrfsic_block *btrfsic_block_alloc(void); | ||
251 | static void btrfsic_block_free(struct btrfsic_block *b); | ||
252 | static void btrfsic_block_link_init(struct btrfsic_block_link *n); | ||
253 | static struct btrfsic_block_link *btrfsic_block_link_alloc(void); | ||
254 | static void btrfsic_block_link_free(struct btrfsic_block_link *n); | ||
255 | static void btrfsic_dev_state_init(struct btrfsic_dev_state *ds); | ||
256 | static struct btrfsic_dev_state *btrfsic_dev_state_alloc(void); | ||
257 | static void btrfsic_dev_state_free(struct btrfsic_dev_state *ds); | ||
258 | static void btrfsic_block_hashtable_init(struct btrfsic_block_hashtable *h); | ||
259 | static void btrfsic_block_hashtable_add(struct btrfsic_block *b, | ||
260 | struct btrfsic_block_hashtable *h); | ||
261 | static void btrfsic_block_hashtable_remove(struct btrfsic_block *b); | ||
262 | static struct btrfsic_block *btrfsic_block_hashtable_lookup( | ||
263 | struct block_device *bdev, | ||
264 | u64 dev_bytenr, | ||
265 | struct btrfsic_block_hashtable *h); | ||
266 | static void btrfsic_block_link_hashtable_init( | ||
267 | struct btrfsic_block_link_hashtable *h); | ||
268 | static void btrfsic_block_link_hashtable_add( | ||
269 | struct btrfsic_block_link *l, | ||
270 | struct btrfsic_block_link_hashtable *h); | ||
271 | static void btrfsic_block_link_hashtable_remove(struct btrfsic_block_link *l); | ||
272 | static struct btrfsic_block_link *btrfsic_block_link_hashtable_lookup( | ||
273 | struct block_device *bdev_ref_to, | ||
274 | u64 dev_bytenr_ref_to, | ||
275 | struct block_device *bdev_ref_from, | ||
276 | u64 dev_bytenr_ref_from, | ||
277 | struct btrfsic_block_link_hashtable *h); | ||
278 | static void btrfsic_dev_state_hashtable_init( | ||
279 | struct btrfsic_dev_state_hashtable *h); | ||
280 | static void btrfsic_dev_state_hashtable_add( | ||
281 | struct btrfsic_dev_state *ds, | ||
282 | struct btrfsic_dev_state_hashtable *h); | ||
283 | static void btrfsic_dev_state_hashtable_remove(struct btrfsic_dev_state *ds); | ||
284 | static struct btrfsic_dev_state *btrfsic_dev_state_hashtable_lookup( | ||
285 | struct block_device *bdev, | ||
286 | struct btrfsic_dev_state_hashtable *h); | ||
287 | static struct btrfsic_stack_frame *btrfsic_stack_frame_alloc(void); | ||
288 | static void btrfsic_stack_frame_free(struct btrfsic_stack_frame *sf); | ||
289 | static int btrfsic_process_superblock(struct btrfsic_state *state, | ||
290 | struct btrfs_fs_devices *fs_devices); | ||
291 | static int btrfsic_process_metablock(struct btrfsic_state *state, | ||
292 | struct btrfsic_block *block, | ||
293 | struct btrfsic_block_data_ctx *block_ctx, | ||
294 | struct btrfs_header *hdr, | ||
295 | int limit_nesting, int force_iodone_flag); | ||
296 | static int btrfsic_create_link_to_next_block( | ||
297 | struct btrfsic_state *state, | ||
298 | struct btrfsic_block *block, | ||
299 | struct btrfsic_block_data_ctx | ||
300 | *block_ctx, u64 next_bytenr, | ||
301 | int limit_nesting, | ||
302 | struct btrfsic_block_data_ctx *next_block_ctx, | ||
303 | struct btrfsic_block **next_blockp, | ||
304 | int force_iodone_flag, | ||
305 | int *num_copiesp, int *mirror_nump, | ||
306 | struct btrfs_disk_key *disk_key, | ||
307 | u64 parent_generation); | ||
308 | static int btrfsic_handle_extent_data(struct btrfsic_state *state, | ||
309 | struct btrfsic_block *block, | ||
310 | struct btrfsic_block_data_ctx *block_ctx, | ||
311 | u32 item_offset, int force_iodone_flag); | ||
312 | static int btrfsic_map_block(struct btrfsic_state *state, u64 bytenr, u32 len, | ||
313 | struct btrfsic_block_data_ctx *block_ctx_out, | ||
314 | int mirror_num); | ||
315 | static int btrfsic_map_superblock(struct btrfsic_state *state, u64 bytenr, | ||
316 | u32 len, struct block_device *bdev, | ||
317 | struct btrfsic_block_data_ctx *block_ctx_out); | ||
318 | static void btrfsic_release_block_ctx(struct btrfsic_block_data_ctx *block_ctx); | ||
319 | static int btrfsic_read_block(struct btrfsic_state *state, | ||
320 | struct btrfsic_block_data_ctx *block_ctx); | ||
321 | static void btrfsic_dump_database(struct btrfsic_state *state); | ||
322 | static int btrfsic_test_for_metadata(struct btrfsic_state *state, | ||
323 | const u8 *data, unsigned int size); | ||
324 | static void btrfsic_process_written_block(struct btrfsic_dev_state *dev_state, | ||
325 | u64 dev_bytenr, u8 *mapped_data, | ||
326 | unsigned int len, struct bio *bio, | ||
327 | int *bio_is_patched, | ||
328 | struct buffer_head *bh, | ||
329 | int submit_bio_bh_rw); | ||
330 | static int btrfsic_process_written_superblock( | ||
331 | struct btrfsic_state *state, | ||
332 | struct btrfsic_block *const block, | ||
333 | struct btrfs_super_block *const super_hdr); | ||
334 | static void btrfsic_bio_end_io(struct bio *bp, int bio_error_status); | ||
335 | static void btrfsic_bh_end_io(struct buffer_head *bh, int uptodate); | ||
336 | static int btrfsic_is_block_ref_by_superblock(const struct btrfsic_state *state, | ||
337 | const struct btrfsic_block *block, | ||
338 | int recursion_level); | ||
339 | static int btrfsic_check_all_ref_blocks(struct btrfsic_state *state, | ||
340 | struct btrfsic_block *const block, | ||
341 | int recursion_level); | ||
342 | static void btrfsic_print_add_link(const struct btrfsic_state *state, | ||
343 | const struct btrfsic_block_link *l); | ||
344 | static void btrfsic_print_rem_link(const struct btrfsic_state *state, | ||
345 | const struct btrfsic_block_link *l); | ||
346 | static char btrfsic_get_block_type(const struct btrfsic_state *state, | ||
347 | const struct btrfsic_block *block); | ||
348 | static void btrfsic_dump_tree(const struct btrfsic_state *state); | ||
349 | static void btrfsic_dump_tree_sub(const struct btrfsic_state *state, | ||
350 | const struct btrfsic_block *block, | ||
351 | int indent_level); | ||
352 | static struct btrfsic_block_link *btrfsic_block_link_lookup_or_add( | ||
353 | struct btrfsic_state *state, | ||
354 | struct btrfsic_block_data_ctx *next_block_ctx, | ||
355 | struct btrfsic_block *next_block, | ||
356 | struct btrfsic_block *from_block, | ||
357 | u64 parent_generation); | ||
358 | static struct btrfsic_block *btrfsic_block_lookup_or_add( | ||
359 | struct btrfsic_state *state, | ||
360 | struct btrfsic_block_data_ctx *block_ctx, | ||
361 | const char *additional_string, | ||
362 | int is_metadata, | ||
363 | int is_iodone, | ||
364 | int never_written, | ||
365 | int mirror_num, | ||
366 | int *was_created); | ||
367 | static int btrfsic_process_superblock_dev_mirror( | ||
368 | struct btrfsic_state *state, | ||
369 | struct btrfsic_dev_state *dev_state, | ||
370 | struct btrfs_device *device, | ||
371 | int superblock_mirror_num, | ||
372 | struct btrfsic_dev_state **selected_dev_state, | ||
373 | struct btrfs_super_block *selected_super); | ||
374 | static struct btrfsic_dev_state *btrfsic_dev_state_lookup( | ||
375 | struct block_device *bdev); | ||
376 | static void btrfsic_cmp_log_and_dev_bytenr(struct btrfsic_state *state, | ||
377 | u64 bytenr, | ||
378 | struct btrfsic_dev_state *dev_state, | ||
379 | u64 dev_bytenr, char *data); | ||
380 | |||
381 | static struct mutex btrfsic_mutex; | ||
382 | static int btrfsic_is_initialized; | ||
383 | static struct btrfsic_dev_state_hashtable btrfsic_dev_state_hashtable; | ||
384 | |||
385 | |||
386 | static void btrfsic_block_init(struct btrfsic_block *b) | ||
387 | { | ||
388 | b->magic_num = BTRFSIC_BLOCK_MAGIC_NUMBER; | ||
389 | b->dev_state = NULL; | ||
390 | b->dev_bytenr = 0; | ||
391 | b->logical_bytenr = 0; | ||
392 | b->generation = BTRFSIC_GENERATION_UNKNOWN; | ||
393 | b->disk_key.objectid = 0; | ||
394 | b->disk_key.type = 0; | ||
395 | b->disk_key.offset = 0; | ||
396 | b->is_metadata = 0; | ||
397 | b->is_superblock = 0; | ||
398 | b->is_iodone = 0; | ||
399 | b->iodone_w_error = 0; | ||
400 | b->never_written = 0; | ||
401 | b->mirror_num = 0; | ||
402 | b->next_in_same_bio = NULL; | ||
403 | b->orig_bio_bh_private = NULL; | ||
404 | b->orig_bio_bh_end_io.bio = NULL; | ||
405 | INIT_LIST_HEAD(&b->collision_resolving_node); | ||
406 | INIT_LIST_HEAD(&b->all_blocks_node); | ||
407 | INIT_LIST_HEAD(&b->ref_to_list); | ||
408 | INIT_LIST_HEAD(&b->ref_from_list); | ||
409 | b->submit_bio_bh_rw = 0; | ||
410 | b->flush_gen = 0; | ||
411 | } | ||
412 | |||
413 | static struct btrfsic_block *btrfsic_block_alloc(void) | ||
414 | { | ||
415 | struct btrfsic_block *b; | ||
416 | |||
417 | b = kzalloc(sizeof(*b), GFP_NOFS); | ||
418 | if (NULL != b) | ||
419 | btrfsic_block_init(b); | ||
420 | |||
421 | return b; | ||
422 | } | ||
423 | |||
424 | static void btrfsic_block_free(struct btrfsic_block *b) | ||
425 | { | ||
426 | BUG_ON(!(NULL == b || BTRFSIC_BLOCK_MAGIC_NUMBER == b->magic_num)); | ||
427 | kfree(b); | ||
428 | } | ||
429 | |||
430 | static void btrfsic_block_link_init(struct btrfsic_block_link *l) | ||
431 | { | ||
432 | l->magic_num = BTRFSIC_BLOCK_LINK_MAGIC_NUMBER; | ||
433 | l->ref_cnt = 1; | ||
434 | INIT_LIST_HEAD(&l->node_ref_to); | ||
435 | INIT_LIST_HEAD(&l->node_ref_from); | ||
436 | INIT_LIST_HEAD(&l->collision_resolving_node); | ||
437 | l->block_ref_to = NULL; | ||
438 | l->block_ref_from = NULL; | ||
439 | } | ||
440 | |||
441 | static struct btrfsic_block_link *btrfsic_block_link_alloc(void) | ||
442 | { | ||
443 | struct btrfsic_block_link *l; | ||
444 | |||
445 | l = kzalloc(sizeof(*l), GFP_NOFS); | ||
446 | if (NULL != l) | ||
447 | btrfsic_block_link_init(l); | ||
448 | |||
449 | return l; | ||
450 | } | ||
451 | |||
452 | static void btrfsic_block_link_free(struct btrfsic_block_link *l) | ||
453 | { | ||
454 | BUG_ON(!(NULL == l || BTRFSIC_BLOCK_LINK_MAGIC_NUMBER == l->magic_num)); | ||
455 | kfree(l); | ||
456 | } | ||
457 | |||
458 | static void btrfsic_dev_state_init(struct btrfsic_dev_state *ds) | ||
459 | { | ||
460 | ds->magic_num = BTRFSIC_DEV2STATE_MAGIC_NUMBER; | ||
461 | ds->bdev = NULL; | ||
462 | ds->state = NULL; | ||
463 | ds->name[0] = '\0'; | ||
464 | INIT_LIST_HEAD(&ds->collision_resolving_node); | ||
465 | ds->last_flush_gen = 0; | ||
466 | btrfsic_block_init(&ds->dummy_block_for_bio_bh_flush); | ||
467 | ds->dummy_block_for_bio_bh_flush.is_iodone = 1; | ||
468 | ds->dummy_block_for_bio_bh_flush.dev_state = ds; | ||
469 | } | ||
470 | |||
471 | static struct btrfsic_dev_state *btrfsic_dev_state_alloc(void) | ||
472 | { | ||
473 | struct btrfsic_dev_state *ds; | ||
474 | |||
475 | ds = kzalloc(sizeof(*ds), GFP_NOFS); | ||
476 | if (NULL != ds) | ||
477 | btrfsic_dev_state_init(ds); | ||
478 | |||
479 | return ds; | ||
480 | } | ||
481 | |||
482 | static void btrfsic_dev_state_free(struct btrfsic_dev_state *ds) | ||
483 | { | ||
484 | BUG_ON(!(NULL == ds || | ||
485 | BTRFSIC_DEV2STATE_MAGIC_NUMBER == ds->magic_num)); | ||
486 | kfree(ds); | ||
487 | } | ||
488 | |||
489 | static void btrfsic_block_hashtable_init(struct btrfsic_block_hashtable *h) | ||
490 | { | ||
491 | int i; | ||
492 | |||
493 | for (i = 0; i < BTRFSIC_BLOCK_HASHTABLE_SIZE; i++) | ||
494 | INIT_LIST_HEAD(h->table + i); | ||
495 | } | ||
496 | |||
497 | static void btrfsic_block_hashtable_add(struct btrfsic_block *b, | ||
498 | struct btrfsic_block_hashtable *h) | ||
499 | { | ||
500 | const unsigned int hashval = | ||
501 | (((unsigned int)(b->dev_bytenr >> 16)) ^ | ||
502 | ((unsigned int)((uintptr_t)b->dev_state->bdev))) & | ||
503 | (BTRFSIC_BLOCK_HASHTABLE_SIZE - 1); | ||
504 | |||
505 | list_add(&b->collision_resolving_node, h->table + hashval); | ||
506 | } | ||
507 | |||
508 | static void btrfsic_block_hashtable_remove(struct btrfsic_block *b) | ||
509 | { | ||
510 | list_del(&b->collision_resolving_node); | ||
511 | } | ||
512 | |||
513 | static struct btrfsic_block *btrfsic_block_hashtable_lookup( | ||
514 | struct block_device *bdev, | ||
515 | u64 dev_bytenr, | ||
516 | struct btrfsic_block_hashtable *h) | ||
517 | { | ||
518 | const unsigned int hashval = | ||
519 | (((unsigned int)(dev_bytenr >> 16)) ^ | ||
520 | ((unsigned int)((uintptr_t)bdev))) & | ||
521 | (BTRFSIC_BLOCK_HASHTABLE_SIZE - 1); | ||
522 | struct list_head *elem; | ||
523 | |||
524 | list_for_each(elem, h->table + hashval) { | ||
525 | struct btrfsic_block *const b = | ||
526 | list_entry(elem, struct btrfsic_block, | ||
527 | collision_resolving_node); | ||
528 | |||
529 | if (b->dev_state->bdev == bdev && b->dev_bytenr == dev_bytenr) | ||
530 | return b; | ||
531 | } | ||
532 | |||
533 | return NULL; | ||
534 | } | ||
535 | |||
536 | static void btrfsic_block_link_hashtable_init( | ||
537 | struct btrfsic_block_link_hashtable *h) | ||
538 | { | ||
539 | int i; | ||
540 | |||
541 | for (i = 0; i < BTRFSIC_BLOCK_LINK_HASHTABLE_SIZE; i++) | ||
542 | INIT_LIST_HEAD(h->table + i); | ||
543 | } | ||
544 | |||
545 | static void btrfsic_block_link_hashtable_add( | ||
546 | struct btrfsic_block_link *l, | ||
547 | struct btrfsic_block_link_hashtable *h) | ||
548 | { | ||
549 | const unsigned int hashval = | ||
550 | (((unsigned int)(l->block_ref_to->dev_bytenr >> 16)) ^ | ||
551 | ((unsigned int)(l->block_ref_from->dev_bytenr >> 16)) ^ | ||
552 | ((unsigned int)((uintptr_t)l->block_ref_to->dev_state->bdev)) ^ | ||
553 | ((unsigned int)((uintptr_t)l->block_ref_from->dev_state->bdev))) | ||
554 | & (BTRFSIC_BLOCK_LINK_HASHTABLE_SIZE - 1); | ||
555 | |||
556 | BUG_ON(NULL == l->block_ref_to); | ||
557 | BUG_ON(NULL == l->block_ref_from); | ||
558 | list_add(&l->collision_resolving_node, h->table + hashval); | ||
559 | } | ||
560 | |||
561 | static void btrfsic_block_link_hashtable_remove(struct btrfsic_block_link *l) | ||
562 | { | ||
563 | list_del(&l->collision_resolving_node); | ||
564 | } | ||
565 | |||
566 | static struct btrfsic_block_link *btrfsic_block_link_hashtable_lookup( | ||
567 | struct block_device *bdev_ref_to, | ||
568 | u64 dev_bytenr_ref_to, | ||
569 | struct block_device *bdev_ref_from, | ||
570 | u64 dev_bytenr_ref_from, | ||
571 | struct btrfsic_block_link_hashtable *h) | ||
572 | { | ||
573 | const unsigned int hashval = | ||
574 | (((unsigned int)(dev_bytenr_ref_to >> 16)) ^ | ||
575 | ((unsigned int)(dev_bytenr_ref_from >> 16)) ^ | ||
576 | ((unsigned int)((uintptr_t)bdev_ref_to)) ^ | ||
577 | ((unsigned int)((uintptr_t)bdev_ref_from))) & | ||
578 | (BTRFSIC_BLOCK_LINK_HASHTABLE_SIZE - 1); | ||
579 | struct list_head *elem; | ||
580 | |||
581 | list_for_each(elem, h->table + hashval) { | ||
582 | struct btrfsic_block_link *const l = | ||
583 | list_entry(elem, struct btrfsic_block_link, | ||
584 | collision_resolving_node); | ||
585 | |||
586 | BUG_ON(NULL == l->block_ref_to); | ||
587 | BUG_ON(NULL == l->block_ref_from); | ||
588 | if (l->block_ref_to->dev_state->bdev == bdev_ref_to && | ||
589 | l->block_ref_to->dev_bytenr == dev_bytenr_ref_to && | ||
590 | l->block_ref_from->dev_state->bdev == bdev_ref_from && | ||
591 | l->block_ref_from->dev_bytenr == dev_bytenr_ref_from) | ||
592 | return l; | ||
593 | } | ||
594 | |||
595 | return NULL; | ||
596 | } | ||
597 | |||
598 | static void btrfsic_dev_state_hashtable_init( | ||
599 | struct btrfsic_dev_state_hashtable *h) | ||
600 | { | ||
601 | int i; | ||
602 | |||
603 | for (i = 0; i < BTRFSIC_DEV2STATE_HASHTABLE_SIZE; i++) | ||
604 | INIT_LIST_HEAD(h->table + i); | ||
605 | } | ||
606 | |||
607 | static void btrfsic_dev_state_hashtable_add( | ||
608 | struct btrfsic_dev_state *ds, | ||
609 | struct btrfsic_dev_state_hashtable *h) | ||
610 | { | ||
611 | const unsigned int hashval = | ||
612 | (((unsigned int)((uintptr_t)ds->bdev)) & | ||
613 | (BTRFSIC_DEV2STATE_HASHTABLE_SIZE - 1)); | ||
614 | |||
615 | list_add(&ds->collision_resolving_node, h->table + hashval); | ||
616 | } | ||
617 | |||
618 | static void btrfsic_dev_state_hashtable_remove(struct btrfsic_dev_state *ds) | ||
619 | { | ||
620 | list_del(&ds->collision_resolving_node); | ||
621 | } | ||
622 | |||
623 | static struct btrfsic_dev_state *btrfsic_dev_state_hashtable_lookup( | ||
624 | struct block_device *bdev, | ||
625 | struct btrfsic_dev_state_hashtable *h) | ||
626 | { | ||
627 | const unsigned int hashval = | ||
628 | (((unsigned int)((uintptr_t)bdev)) & | ||
629 | (BTRFSIC_DEV2STATE_HASHTABLE_SIZE - 1)); | ||
630 | struct list_head *elem; | ||
631 | |||
632 | list_for_each(elem, h->table + hashval) { | ||
633 | struct btrfsic_dev_state *const ds = | ||
634 | list_entry(elem, struct btrfsic_dev_state, | ||
635 | collision_resolving_node); | ||
636 | |||
637 | if (ds->bdev == bdev) | ||
638 | return ds; | ||
639 | } | ||
640 | |||
641 | return NULL; | ||
642 | } | ||
643 | |||
644 | static int btrfsic_process_superblock(struct btrfsic_state *state, | ||
645 | struct btrfs_fs_devices *fs_devices) | ||
646 | { | ||
647 | int ret; | ||
648 | struct btrfs_super_block *selected_super; | ||
649 | struct list_head *dev_head = &fs_devices->devices; | ||
650 | struct btrfs_device *device; | ||
651 | struct btrfsic_dev_state *selected_dev_state = NULL; | ||
652 | int pass; | ||
653 | |||
654 | BUG_ON(NULL == state); | ||
655 | selected_super = kmalloc(sizeof(*selected_super), GFP_NOFS); | ||
656 | if (NULL == selected_super) { | ||
657 | printk(KERN_INFO "btrfsic: error, kmalloc failed!\n"); | ||
658 | return -1; | ||
659 | } | ||
660 | |||
661 | list_for_each_entry(device, dev_head, dev_list) { | ||
662 | int i; | ||
663 | struct btrfsic_dev_state *dev_state; | ||
664 | |||
665 | if (!device->bdev || !device->name) | ||
666 | continue; | ||
667 | |||
668 | dev_state = btrfsic_dev_state_lookup(device->bdev); | ||
669 | BUG_ON(NULL == dev_state); | ||
670 | for (i = 0; i < BTRFS_SUPER_MIRROR_MAX; i++) { | ||
671 | ret = btrfsic_process_superblock_dev_mirror( | ||
672 | state, dev_state, device, i, | ||
673 | &selected_dev_state, selected_super); | ||
674 | if (0 != ret && 0 == i) { | ||
675 | kfree(selected_super); | ||
676 | return ret; | ||
677 | } | ||
678 | } | ||
679 | } | ||
680 | |||
681 | if (NULL == state->latest_superblock) { | ||
682 | printk(KERN_INFO "btrfsic: no superblock found!\n"); | ||
683 | kfree(selected_super); | ||
684 | return -1; | ||
685 | } | ||
686 | |||
687 | state->csum_size = btrfs_super_csum_size(selected_super); | ||
688 | |||
689 | for (pass = 0; pass < 3; pass++) { | ||
690 | int num_copies; | ||
691 | int mirror_num; | ||
692 | u64 next_bytenr; | ||
693 | |||
694 | switch (pass) { | ||
695 | case 0: | ||
696 | next_bytenr = btrfs_super_root(selected_super); | ||
697 | if (state->print_mask & | ||
698 | BTRFSIC_PRINT_MASK_ROOT_CHUNK_LOG_TREE_LOCATION) | ||
699 | printk(KERN_INFO "root@%llu\n", | ||
700 | (unsigned long long)next_bytenr); | ||
701 | break; | ||
702 | case 1: | ||
703 | next_bytenr = btrfs_super_chunk_root(selected_super); | ||
704 | if (state->print_mask & | ||
705 | BTRFSIC_PRINT_MASK_ROOT_CHUNK_LOG_TREE_LOCATION) | ||
706 | printk(KERN_INFO "chunk@%llu\n", | ||
707 | (unsigned long long)next_bytenr); | ||
708 | break; | ||
709 | case 2: | ||
710 | next_bytenr = btrfs_super_log_root(selected_super); | ||
711 | if (0 == next_bytenr) | ||
712 | continue; | ||
713 | if (state->print_mask & | ||
714 | BTRFSIC_PRINT_MASK_ROOT_CHUNK_LOG_TREE_LOCATION) | ||
715 | printk(KERN_INFO "log@%llu\n", | ||
716 | (unsigned long long)next_bytenr); | ||
717 | break; | ||
718 | } | ||
719 | |||
720 | num_copies = | ||
721 | btrfs_num_copies(&state->root->fs_info->mapping_tree, | ||
722 | next_bytenr, PAGE_SIZE); | ||
723 | if (state->print_mask & BTRFSIC_PRINT_MASK_NUM_COPIES) | ||
724 | printk(KERN_INFO "num_copies(log_bytenr=%llu) = %d\n", | ||
725 | (unsigned long long)next_bytenr, num_copies); | ||
726 | |||
727 | for (mirror_num = 1; mirror_num <= num_copies; mirror_num++) { | ||
728 | struct btrfsic_block *next_block; | ||
729 | struct btrfsic_block_data_ctx tmp_next_block_ctx; | ||
730 | struct btrfsic_block_link *l; | ||
731 | struct btrfs_header *hdr; | ||
732 | |||
733 | ret = btrfsic_map_block(state, next_bytenr, PAGE_SIZE, | ||
734 | &tmp_next_block_ctx, | ||
735 | mirror_num); | ||
736 | if (ret) { | ||
737 | printk(KERN_INFO "btrfsic:" | ||
738 | " btrfsic_map_block(root @%llu," | ||
739 | " mirror %d) failed!\n", | ||
740 | (unsigned long long)next_bytenr, | ||
741 | mirror_num); | ||
742 | kfree(selected_super); | ||
743 | return -1; | ||
744 | } | ||
745 | |||
746 | next_block = btrfsic_block_hashtable_lookup( | ||
747 | tmp_next_block_ctx.dev->bdev, | ||
748 | tmp_next_block_ctx.dev_bytenr, | ||
749 | &state->block_hashtable); | ||
750 | BUG_ON(NULL == next_block); | ||
751 | |||
752 | l = btrfsic_block_link_hashtable_lookup( | ||
753 | tmp_next_block_ctx.dev->bdev, | ||
754 | tmp_next_block_ctx.dev_bytenr, | ||
755 | state->latest_superblock->dev_state-> | ||
756 | bdev, | ||
757 | state->latest_superblock->dev_bytenr, | ||
758 | &state->block_link_hashtable); | ||
759 | BUG_ON(NULL == l); | ||
760 | |||
761 | ret = btrfsic_read_block(state, &tmp_next_block_ctx); | ||
762 | if (ret < (int)BTRFSIC_BLOCK_SIZE) { | ||
763 | printk(KERN_INFO | ||
764 | "btrfsic: read @logical %llu failed!\n", | ||
765 | (unsigned long long) | ||
766 | tmp_next_block_ctx.start); | ||
767 | btrfsic_release_block_ctx(&tmp_next_block_ctx); | ||
768 | kfree(selected_super); | ||
769 | return -1; | ||
770 | } | ||
771 | |||
772 | hdr = (struct btrfs_header *)tmp_next_block_ctx.data; | ||
773 | ret = btrfsic_process_metablock(state, | ||
774 | next_block, | ||
775 | &tmp_next_block_ctx, | ||
776 | hdr, | ||
777 | BTRFS_MAX_LEVEL + 3, 1); | ||
778 | btrfsic_release_block_ctx(&tmp_next_block_ctx); | ||
779 | } | ||
780 | } | ||
781 | |||
782 | kfree(selected_super); | ||
783 | return ret; | ||
784 | } | ||
785 | |||
786 | static int btrfsic_process_superblock_dev_mirror( | ||
787 | struct btrfsic_state *state, | ||
788 | struct btrfsic_dev_state *dev_state, | ||
789 | struct btrfs_device *device, | ||
790 | int superblock_mirror_num, | ||
791 | struct btrfsic_dev_state **selected_dev_state, | ||
792 | struct btrfs_super_block *selected_super) | ||
793 | { | ||
794 | struct btrfs_super_block *super_tmp; | ||
795 | u64 dev_bytenr; | ||
796 | struct buffer_head *bh; | ||
797 | struct btrfsic_block *superblock_tmp; | ||
798 | int pass; | ||
799 | struct block_device *const superblock_bdev = device->bdev; | ||
800 | |||
801 | /* super block bytenr is always the unmapped device bytenr */ | ||
802 | dev_bytenr = btrfs_sb_offset(superblock_mirror_num); | ||
803 | bh = __bread(superblock_bdev, dev_bytenr / 4096, 4096); | ||
804 | if (NULL == bh) | ||
805 | return -1; | ||
806 | super_tmp = (struct btrfs_super_block *) | ||
807 | (bh->b_data + (dev_bytenr & 4095)); | ||
808 | |||
809 | if (btrfs_super_bytenr(super_tmp) != dev_bytenr || | ||
810 | strncmp((char *)(&(super_tmp->magic)), BTRFS_MAGIC, | ||
811 | sizeof(super_tmp->magic)) || | ||
812 | memcmp(device->uuid, super_tmp->dev_item.uuid, BTRFS_UUID_SIZE)) { | ||
813 | brelse(bh); | ||
814 | return 0; | ||
815 | } | ||
816 | |||
817 | superblock_tmp = | ||
818 | btrfsic_block_hashtable_lookup(superblock_bdev, | ||
819 | dev_bytenr, | ||
820 | &state->block_hashtable); | ||
821 | if (NULL == superblock_tmp) { | ||
822 | superblock_tmp = btrfsic_block_alloc(); | ||
823 | if (NULL == superblock_tmp) { | ||
824 | printk(KERN_INFO "btrfsic: error, kmalloc failed!\n"); | ||
825 | brelse(bh); | ||
826 | return -1; | ||
827 | } | ||
828 | /* for superblock, only the dev_bytenr makes sense */ | ||
829 | superblock_tmp->dev_bytenr = dev_bytenr; | ||
830 | superblock_tmp->dev_state = dev_state; | ||
831 | superblock_tmp->logical_bytenr = dev_bytenr; | ||
832 | superblock_tmp->generation = btrfs_super_generation(super_tmp); | ||
833 | superblock_tmp->is_metadata = 1; | ||
834 | superblock_tmp->is_superblock = 1; | ||
835 | superblock_tmp->is_iodone = 1; | ||
836 | superblock_tmp->never_written = 0; | ||
837 | superblock_tmp->mirror_num = 1 + superblock_mirror_num; | ||
838 | if (state->print_mask & BTRFSIC_PRINT_MASK_SUPERBLOCK_WRITE) | ||
839 | printk(KERN_INFO "New initial S-block (bdev %p, %s)" | ||
840 | " @%llu (%s/%llu/%d)\n", | ||
841 | superblock_bdev, device->name, | ||
842 | (unsigned long long)dev_bytenr, | ||
843 | dev_state->name, | ||
844 | (unsigned long long)dev_bytenr, | ||
845 | superblock_mirror_num); | ||
846 | list_add(&superblock_tmp->all_blocks_node, | ||
847 | &state->all_blocks_list); | ||
848 | btrfsic_block_hashtable_add(superblock_tmp, | ||
849 | &state->block_hashtable); | ||
850 | } | ||
851 | |||
852 | /* select the one with the highest generation field */ | ||
853 | if (btrfs_super_generation(super_tmp) > | ||
854 | state->max_superblock_generation || | ||
855 | 0 == state->max_superblock_generation) { | ||
856 | memcpy(selected_super, super_tmp, sizeof(*selected_super)); | ||
857 | *selected_dev_state = dev_state; | ||
858 | state->max_superblock_generation = | ||
859 | btrfs_super_generation(super_tmp); | ||
860 | state->latest_superblock = superblock_tmp; | ||
861 | } | ||
862 | |||
863 | for (pass = 0; pass < 3; pass++) { | ||
864 | u64 next_bytenr; | ||
865 | int num_copies; | ||
866 | int mirror_num; | ||
867 | const char *additional_string = NULL; | ||
868 | struct btrfs_disk_key tmp_disk_key; | ||
869 | |||
870 | tmp_disk_key.type = BTRFS_ROOT_ITEM_KEY; | ||
871 | tmp_disk_key.offset = 0; | ||
872 | switch (pass) { | ||
873 | case 0: | ||
874 | tmp_disk_key.objectid = | ||
875 | cpu_to_le64(BTRFS_ROOT_TREE_OBJECTID); | ||
876 | additional_string = "initial root "; | ||
877 | next_bytenr = btrfs_super_root(super_tmp); | ||
878 | break; | ||
879 | case 1: | ||
880 | tmp_disk_key.objectid = | ||
881 | cpu_to_le64(BTRFS_CHUNK_TREE_OBJECTID); | ||
882 | additional_string = "initial chunk "; | ||
883 | next_bytenr = btrfs_super_chunk_root(super_tmp); | ||
884 | break; | ||
885 | case 2: | ||
886 | tmp_disk_key.objectid = | ||
887 | cpu_to_le64(BTRFS_TREE_LOG_OBJECTID); | ||
888 | additional_string = "initial log "; | ||
889 | next_bytenr = btrfs_super_log_root(super_tmp); | ||
890 | if (0 == next_bytenr) | ||
891 | continue; | ||
892 | break; | ||
893 | } | ||
894 | |||
895 | num_copies = | ||
896 | btrfs_num_copies(&state->root->fs_info->mapping_tree, | ||
897 | next_bytenr, PAGE_SIZE); | ||
898 | if (state->print_mask & BTRFSIC_PRINT_MASK_NUM_COPIES) | ||
899 | printk(KERN_INFO "num_copies(log_bytenr=%llu) = %d\n", | ||
900 | (unsigned long long)next_bytenr, num_copies); | ||
901 | for (mirror_num = 1; mirror_num <= num_copies; mirror_num++) { | ||
902 | struct btrfsic_block *next_block; | ||
903 | struct btrfsic_block_data_ctx tmp_next_block_ctx; | ||
904 | struct btrfsic_block_link *l; | ||
905 | |||
906 | if (btrfsic_map_block(state, next_bytenr, PAGE_SIZE, | ||
907 | &tmp_next_block_ctx, | ||
908 | mirror_num)) { | ||
909 | printk(KERN_INFO "btrfsic: btrfsic_map_block(" | ||
910 | "bytenr @%llu, mirror %d) failed!\n", | ||
911 | (unsigned long long)next_bytenr, | ||
912 | mirror_num); | ||
913 | brelse(bh); | ||
914 | return -1; | ||
915 | } | ||
916 | |||
917 | next_block = btrfsic_block_lookup_or_add( | ||
918 | state, &tmp_next_block_ctx, | ||
919 | additional_string, 1, 1, 0, | ||
920 | mirror_num, NULL); | ||
921 | if (NULL == next_block) { | ||
922 | btrfsic_release_block_ctx(&tmp_next_block_ctx); | ||
923 | brelse(bh); | ||
924 | return -1; | ||
925 | } | ||
926 | |||
927 | next_block->disk_key = tmp_disk_key; | ||
928 | next_block->generation = BTRFSIC_GENERATION_UNKNOWN; | ||
929 | l = btrfsic_block_link_lookup_or_add( | ||
930 | state, &tmp_next_block_ctx, | ||
931 | next_block, superblock_tmp, | ||
932 | BTRFSIC_GENERATION_UNKNOWN); | ||
933 | btrfsic_release_block_ctx(&tmp_next_block_ctx); | ||
934 | if (NULL == l) { | ||
935 | brelse(bh); | ||
936 | return -1; | ||
937 | } | ||
938 | } | ||
939 | } | ||
940 | if (state->print_mask & BTRFSIC_PRINT_MASK_INITIAL_ALL_TREES) | ||
941 | btrfsic_dump_tree_sub(state, superblock_tmp, 0); | ||
942 | |||
943 | brelse(bh); | ||
944 | return 0; | ||
945 | } | ||
946 | |||
947 | static struct btrfsic_stack_frame *btrfsic_stack_frame_alloc(void) | ||
948 | { | ||
949 | struct btrfsic_stack_frame *sf; | ||
950 | |||
951 | sf = kzalloc(sizeof(*sf), GFP_NOFS); | ||
952 | if (NULL == sf) | ||
953 | printk(KERN_INFO "btrfsic: alloc memory failed!\n"); | ||
954 | else | ||
955 | sf->magic = BTRFSIC_BLOCK_STACK_FRAME_MAGIC_NUMBER; | ||
956 | return sf; | ||
957 | } | ||
958 | |||
959 | static void btrfsic_stack_frame_free(struct btrfsic_stack_frame *sf) | ||
960 | { | ||
961 | BUG_ON(!(NULL == sf || | ||
962 | BTRFSIC_BLOCK_STACK_FRAME_MAGIC_NUMBER == sf->magic)); | ||
963 | kfree(sf); | ||
964 | } | ||
965 | |||
966 | static int btrfsic_process_metablock( | ||
967 | struct btrfsic_state *state, | ||
968 | struct btrfsic_block *const first_block, | ||
969 | struct btrfsic_block_data_ctx *const first_block_ctx, | ||
970 | struct btrfs_header *const first_hdr, | ||
971 | int first_limit_nesting, int force_iodone_flag) | ||
972 | { | ||
973 | struct btrfsic_stack_frame initial_stack_frame = { 0 }; | ||
974 | struct btrfsic_stack_frame *sf; | ||
975 | struct btrfsic_stack_frame *next_stack; | ||
976 | |||
977 | sf = &initial_stack_frame; | ||
978 | sf->error = 0; | ||
979 | sf->i = -1; | ||
980 | sf->limit_nesting = first_limit_nesting; | ||
981 | sf->block = first_block; | ||
982 | sf->block_ctx = first_block_ctx; | ||
983 | sf->next_block = NULL; | ||
984 | sf->hdr = first_hdr; | ||
985 | sf->prev = NULL; | ||
986 | |||
987 | continue_with_new_stack_frame: | ||
988 | sf->block->generation = le64_to_cpu(sf->hdr->generation); | ||
989 | if (0 == sf->hdr->level) { | ||
990 | struct btrfs_leaf *const leafhdr = | ||
991 | (struct btrfs_leaf *)sf->hdr; | ||
992 | |||
993 | if (-1 == sf->i) { | ||
994 | sf->nr = le32_to_cpu(leafhdr->header.nritems); | ||
995 | |||
996 | if (state->print_mask & BTRFSIC_PRINT_MASK_VERBOSE) | ||
997 | printk(KERN_INFO | ||
998 | "leaf %llu items %d generation %llu" | ||
999 | " owner %llu\n", | ||
1000 | (unsigned long long) | ||
1001 | sf->block_ctx->start, | ||
1002 | sf->nr, | ||
1003 | (unsigned long long) | ||
1004 | le64_to_cpu(leafhdr->header.generation), | ||
1005 | (unsigned long long) | ||
1006 | le64_to_cpu(leafhdr->header.owner)); | ||
1007 | } | ||
1008 | |||
1009 | continue_with_current_leaf_stack_frame: | ||
1010 | if (0 == sf->num_copies || sf->mirror_num > sf->num_copies) { | ||
1011 | sf->i++; | ||
1012 | sf->num_copies = 0; | ||
1013 | } | ||
1014 | |||
1015 | if (sf->i < sf->nr) { | ||
1016 | struct btrfs_item *disk_item = leafhdr->items + sf->i; | ||
1017 | struct btrfs_disk_key *disk_key = &disk_item->key; | ||
1018 | u8 type; | ||
1019 | const u32 item_offset = le32_to_cpu(disk_item->offset); | ||
1020 | |||
1021 | type = disk_key->type; | ||
1022 | |||
1023 | if (BTRFS_ROOT_ITEM_KEY == type) { | ||
1024 | const struct btrfs_root_item *const root_item = | ||
1025 | (struct btrfs_root_item *) | ||
1026 | (sf->block_ctx->data + | ||
1027 | offsetof(struct btrfs_leaf, items) + | ||
1028 | item_offset); | ||
1029 | const u64 next_bytenr = | ||
1030 | le64_to_cpu(root_item->bytenr); | ||
1031 | |||
1032 | sf->error = | ||
1033 | btrfsic_create_link_to_next_block( | ||
1034 | state, | ||
1035 | sf->block, | ||
1036 | sf->block_ctx, | ||
1037 | next_bytenr, | ||
1038 | sf->limit_nesting, | ||
1039 | &sf->next_block_ctx, | ||
1040 | &sf->next_block, | ||
1041 | force_iodone_flag, | ||
1042 | &sf->num_copies, | ||
1043 | &sf->mirror_num, | ||
1044 | disk_key, | ||
1045 | le64_to_cpu(root_item-> | ||
1046 | generation)); | ||
1047 | if (sf->error) | ||
1048 | goto one_stack_frame_backwards; | ||
1049 | |||
1050 | if (NULL != sf->next_block) { | ||
1051 | struct btrfs_header *const next_hdr = | ||
1052 | (struct btrfs_header *) | ||
1053 | sf->next_block_ctx.data; | ||
1054 | |||
1055 | next_stack = | ||
1056 | btrfsic_stack_frame_alloc(); | ||
1057 | if (NULL == next_stack) { | ||
1058 | btrfsic_release_block_ctx( | ||
1059 | &sf-> | ||
1060 | next_block_ctx); | ||
1061 | goto one_stack_frame_backwards; | ||
1062 | } | ||
1063 | |||
1064 | next_stack->i = -1; | ||
1065 | next_stack->block = sf->next_block; | ||
1066 | next_stack->block_ctx = | ||
1067 | &sf->next_block_ctx; | ||
1068 | next_stack->next_block = NULL; | ||
1069 | next_stack->hdr = next_hdr; | ||
1070 | next_stack->limit_nesting = | ||
1071 | sf->limit_nesting - 1; | ||
1072 | next_stack->prev = sf; | ||
1073 | sf = next_stack; | ||
1074 | goto continue_with_new_stack_frame; | ||
1075 | } | ||
1076 | } else if (BTRFS_EXTENT_DATA_KEY == type && | ||
1077 | state->include_extent_data) { | ||
1078 | sf->error = btrfsic_handle_extent_data( | ||
1079 | state, | ||
1080 | sf->block, | ||
1081 | sf->block_ctx, | ||
1082 | item_offset, | ||
1083 | force_iodone_flag); | ||
1084 | if (sf->error) | ||
1085 | goto one_stack_frame_backwards; | ||
1086 | } | ||
1087 | |||
1088 | goto continue_with_current_leaf_stack_frame; | ||
1089 | } | ||
1090 | } else { | ||
1091 | struct btrfs_node *const nodehdr = (struct btrfs_node *)sf->hdr; | ||
1092 | |||
1093 | if (-1 == sf->i) { | ||
1094 | sf->nr = le32_to_cpu(nodehdr->header.nritems); | ||
1095 | |||
1096 | if (state->print_mask & BTRFSIC_PRINT_MASK_VERBOSE) | ||
1097 | printk(KERN_INFO "node %llu level %d items %d" | ||
1098 | " generation %llu owner %llu\n", | ||
1099 | (unsigned long long) | ||
1100 | sf->block_ctx->start, | ||
1101 | nodehdr->header.level, sf->nr, | ||
1102 | (unsigned long long) | ||
1103 | le64_to_cpu(nodehdr->header.generation), | ||
1104 | (unsigned long long) | ||
1105 | le64_to_cpu(nodehdr->header.owner)); | ||
1106 | } | ||
1107 | |||
1108 | continue_with_current_node_stack_frame: | ||
1109 | if (0 == sf->num_copies || sf->mirror_num > sf->num_copies) { | ||
1110 | sf->i++; | ||
1111 | sf->num_copies = 0; | ||
1112 | } | ||
1113 | |||
1114 | if (sf->i < sf->nr) { | ||
1115 | struct btrfs_key_ptr *disk_key_ptr = | ||
1116 | nodehdr->ptrs + sf->i; | ||
1117 | const u64 next_bytenr = | ||
1118 | le64_to_cpu(disk_key_ptr->blockptr); | ||
1119 | |||
1120 | sf->error = btrfsic_create_link_to_next_block( | ||
1121 | state, | ||
1122 | sf->block, | ||
1123 | sf->block_ctx, | ||
1124 | next_bytenr, | ||
1125 | sf->limit_nesting, | ||
1126 | &sf->next_block_ctx, | ||
1127 | &sf->next_block, | ||
1128 | force_iodone_flag, | ||
1129 | &sf->num_copies, | ||
1130 | &sf->mirror_num, | ||
1131 | &disk_key_ptr->key, | ||
1132 | le64_to_cpu(disk_key_ptr->generation)); | ||
1133 | if (sf->error) | ||
1134 | goto one_stack_frame_backwards; | ||
1135 | |||
1136 | if (NULL != sf->next_block) { | ||
1137 | struct btrfs_header *const next_hdr = | ||
1138 | (struct btrfs_header *) | ||
1139 | sf->next_block_ctx.data; | ||
1140 | |||
1141 | next_stack = btrfsic_stack_frame_alloc(); | ||
1142 | if (NULL == next_stack) | ||
1143 | goto one_stack_frame_backwards; | ||
1144 | |||
1145 | next_stack->i = -1; | ||
1146 | next_stack->block = sf->next_block; | ||
1147 | next_stack->block_ctx = &sf->next_block_ctx; | ||
1148 | next_stack->next_block = NULL; | ||
1149 | next_stack->hdr = next_hdr; | ||
1150 | next_stack->limit_nesting = | ||
1151 | sf->limit_nesting - 1; | ||
1152 | next_stack->prev = sf; | ||
1153 | sf = next_stack; | ||
1154 | goto continue_with_new_stack_frame; | ||
1155 | } | ||
1156 | |||
1157 | goto continue_with_current_node_stack_frame; | ||
1158 | } | ||
1159 | } | ||
1160 | |||
1161 | one_stack_frame_backwards: | ||
1162 | if (NULL != sf->prev) { | ||
1163 | struct btrfsic_stack_frame *const prev = sf->prev; | ||
1164 | |||
1165 | /* the one for the initial block is freed in the caller */ | ||
1166 | btrfsic_release_block_ctx(sf->block_ctx); | ||
1167 | |||
1168 | if (sf->error) { | ||
1169 | prev->error = sf->error; | ||
1170 | btrfsic_stack_frame_free(sf); | ||
1171 | sf = prev; | ||
1172 | goto one_stack_frame_backwards; | ||
1173 | } | ||
1174 | |||
1175 | btrfsic_stack_frame_free(sf); | ||
1176 | sf = prev; | ||
1177 | goto continue_with_new_stack_frame; | ||
1178 | } else { | ||
1179 | BUG_ON(&initial_stack_frame != sf); | ||
1180 | } | ||
1181 | |||
1182 | return sf->error; | ||
1183 | } | ||
1184 | |||
1185 | static int btrfsic_create_link_to_next_block( | ||
1186 | struct btrfsic_state *state, | ||
1187 | struct btrfsic_block *block, | ||
1188 | struct btrfsic_block_data_ctx *block_ctx, | ||
1189 | u64 next_bytenr, | ||
1190 | int limit_nesting, | ||
1191 | struct btrfsic_block_data_ctx *next_block_ctx, | ||
1192 | struct btrfsic_block **next_blockp, | ||
1193 | int force_iodone_flag, | ||
1194 | int *num_copiesp, int *mirror_nump, | ||
1195 | struct btrfs_disk_key *disk_key, | ||
1196 | u64 parent_generation) | ||
1197 | { | ||
1198 | struct btrfsic_block *next_block = NULL; | ||
1199 | int ret; | ||
1200 | struct btrfsic_block_link *l; | ||
1201 | int did_alloc_block_link; | ||
1202 | int block_was_created; | ||
1203 | |||
1204 | *next_blockp = NULL; | ||
1205 | if (0 == *num_copiesp) { | ||
1206 | *num_copiesp = | ||
1207 | btrfs_num_copies(&state->root->fs_info->mapping_tree, | ||
1208 | next_bytenr, PAGE_SIZE); | ||
1209 | if (state->print_mask & BTRFSIC_PRINT_MASK_NUM_COPIES) | ||
1210 | printk(KERN_INFO "num_copies(log_bytenr=%llu) = %d\n", | ||
1211 | (unsigned long long)next_bytenr, *num_copiesp); | ||
1212 | *mirror_nump = 1; | ||
1213 | } | ||
1214 | |||
1215 | if (*mirror_nump > *num_copiesp) | ||
1216 | return 0; | ||
1217 | |||
1218 | if (state->print_mask & BTRFSIC_PRINT_MASK_VERBOSE) | ||
1219 | printk(KERN_INFO | ||
1220 | "btrfsic_create_link_to_next_block(mirror_num=%d)\n", | ||
1221 | *mirror_nump); | ||
1222 | ret = btrfsic_map_block(state, next_bytenr, | ||
1223 | BTRFSIC_BLOCK_SIZE, | ||
1224 | next_block_ctx, *mirror_nump); | ||
1225 | if (ret) { | ||
1226 | printk(KERN_INFO | ||
1227 | "btrfsic: btrfsic_map_block(@%llu, mirror=%d) failed!\n", | ||
1228 | (unsigned long long)next_bytenr, *mirror_nump); | ||
1229 | btrfsic_release_block_ctx(next_block_ctx); | ||
1230 | *next_blockp = NULL; | ||
1231 | return -1; | ||
1232 | } | ||
1233 | |||
1234 | next_block = btrfsic_block_lookup_or_add(state, | ||
1235 | next_block_ctx, "referenced ", | ||
1236 | 1, force_iodone_flag, | ||
1237 | !force_iodone_flag, | ||
1238 | *mirror_nump, | ||
1239 | &block_was_created); | ||
1240 | if (NULL == next_block) { | ||
1241 | btrfsic_release_block_ctx(next_block_ctx); | ||
1242 | *next_blockp = NULL; | ||
1243 | return -1; | ||
1244 | } | ||
1245 | if (block_was_created) { | ||
1246 | l = NULL; | ||
1247 | next_block->generation = BTRFSIC_GENERATION_UNKNOWN; | ||
1248 | } else { | ||
1249 | if (next_block->logical_bytenr != next_bytenr && | ||
1250 | !(!next_block->is_metadata && | ||
1251 | 0 == next_block->logical_bytenr)) { | ||
1252 | printk(KERN_INFO | ||
1253 | "Referenced block @%llu (%s/%llu/%d)" | ||
1254 | " found in hash table, %c," | ||
1255 | " bytenr mismatch (!= stored %llu).\n", | ||
1256 | (unsigned long long)next_bytenr, | ||
1257 | next_block_ctx->dev->name, | ||
1258 | (unsigned long long)next_block_ctx->dev_bytenr, | ||
1259 | *mirror_nump, | ||
1260 | btrfsic_get_block_type(state, next_block), | ||
1261 | (unsigned long long)next_block->logical_bytenr); | ||
1262 | } else if (state->print_mask & BTRFSIC_PRINT_MASK_VERBOSE) | ||
1263 | printk(KERN_INFO | ||
1264 | "Referenced block @%llu (%s/%llu/%d)" | ||
1265 | " found in hash table, %c.\n", | ||
1266 | (unsigned long long)next_bytenr, | ||
1267 | next_block_ctx->dev->name, | ||
1268 | (unsigned long long)next_block_ctx->dev_bytenr, | ||
1269 | *mirror_nump, | ||
1270 | btrfsic_get_block_type(state, next_block)); | ||
1271 | next_block->logical_bytenr = next_bytenr; | ||
1272 | |||
1273 | next_block->mirror_num = *mirror_nump; | ||
1274 | l = btrfsic_block_link_hashtable_lookup( | ||
1275 | next_block_ctx->dev->bdev, | ||
1276 | next_block_ctx->dev_bytenr, | ||
1277 | block_ctx->dev->bdev, | ||
1278 | block_ctx->dev_bytenr, | ||
1279 | &state->block_link_hashtable); | ||
1280 | } | ||
1281 | |||
1282 | next_block->disk_key = *disk_key; | ||
1283 | if (NULL == l) { | ||
1284 | l = btrfsic_block_link_alloc(); | ||
1285 | if (NULL == l) { | ||
1286 | printk(KERN_INFO "btrfsic: error, kmalloc failed!\n"); | ||
1287 | btrfsic_release_block_ctx(next_block_ctx); | ||
1288 | *next_blockp = NULL; | ||
1289 | return -1; | ||
1290 | } | ||
1291 | |||
1292 | did_alloc_block_link = 1; | ||
1293 | l->block_ref_to = next_block; | ||
1294 | l->block_ref_from = block; | ||
1295 | l->ref_cnt = 1; | ||
1296 | l->parent_generation = parent_generation; | ||
1297 | |||
1298 | if (state->print_mask & BTRFSIC_PRINT_MASK_VERBOSE) | ||
1299 | btrfsic_print_add_link(state, l); | ||
1300 | |||
1301 | list_add(&l->node_ref_to, &block->ref_to_list); | ||
1302 | list_add(&l->node_ref_from, &next_block->ref_from_list); | ||
1303 | |||
1304 | btrfsic_block_link_hashtable_add(l, | ||
1305 | &state->block_link_hashtable); | ||
1306 | } else { | ||
1307 | did_alloc_block_link = 0; | ||
1308 | if (0 == limit_nesting) { | ||
1309 | l->ref_cnt++; | ||
1310 | l->parent_generation = parent_generation; | ||
1311 | if (state->print_mask & BTRFSIC_PRINT_MASK_VERBOSE) | ||
1312 | btrfsic_print_add_link(state, l); | ||
1313 | } | ||
1314 | } | ||
1315 | |||
1316 | if (limit_nesting > 0 && did_alloc_block_link) { | ||
1317 | ret = btrfsic_read_block(state, next_block_ctx); | ||
1318 | if (ret < (int)BTRFSIC_BLOCK_SIZE) { | ||
1319 | printk(KERN_INFO | ||
1320 | "btrfsic: read block @logical %llu failed!\n", | ||
1321 | (unsigned long long)next_bytenr); | ||
1322 | btrfsic_release_block_ctx(next_block_ctx); | ||
1323 | *next_blockp = NULL; | ||
1324 | return -1; | ||
1325 | } | ||
1326 | |||
1327 | *next_blockp = next_block; | ||
1328 | } else { | ||
1329 | *next_blockp = NULL; | ||
1330 | } | ||
1331 | (*mirror_nump)++; | ||
1332 | |||
1333 | return 0; | ||
1334 | } | ||
1335 | |||
1336 | static int btrfsic_handle_extent_data( | ||
1337 | struct btrfsic_state *state, | ||
1338 | struct btrfsic_block *block, | ||
1339 | struct btrfsic_block_data_ctx *block_ctx, | ||
1340 | u32 item_offset, int force_iodone_flag) | ||
1341 | { | ||
1342 | int ret; | ||
1343 | struct btrfs_file_extent_item *file_extent_item = | ||
1344 | (struct btrfs_file_extent_item *)(block_ctx->data + | ||
1345 | offsetof(struct btrfs_leaf, | ||
1346 | items) + item_offset); | ||
1347 | u64 next_bytenr = | ||
1348 | le64_to_cpu(file_extent_item->disk_bytenr) + | ||
1349 | le64_to_cpu(file_extent_item->offset); | ||
1350 | u64 num_bytes = le64_to_cpu(file_extent_item->num_bytes); | ||
1351 | u64 generation = le64_to_cpu(file_extent_item->generation); | ||
1352 | struct btrfsic_block_link *l; | ||
1353 | |||
1354 | if (state->print_mask & BTRFSIC_PRINT_MASK_VERY_VERBOSE) | ||
1355 | printk(KERN_INFO "extent_data: type %u, disk_bytenr = %llu," | ||
1356 | " offset = %llu, num_bytes = %llu\n", | ||
1357 | file_extent_item->type, | ||
1358 | (unsigned long long) | ||
1359 | le64_to_cpu(file_extent_item->disk_bytenr), | ||
1360 | (unsigned long long) | ||
1361 | le64_to_cpu(file_extent_item->offset), | ||
1362 | (unsigned long long) | ||
1363 | le64_to_cpu(file_extent_item->num_bytes)); | ||
1364 | if (BTRFS_FILE_EXTENT_REG != file_extent_item->type || | ||
1365 | ((u64)0) == le64_to_cpu(file_extent_item->disk_bytenr)) | ||
1366 | return 0; | ||
1367 | while (num_bytes > 0) { | ||
1368 | u32 chunk_len; | ||
1369 | int num_copies; | ||
1370 | int mirror_num; | ||
1371 | |||
1372 | if (num_bytes > BTRFSIC_BLOCK_SIZE) | ||
1373 | chunk_len = BTRFSIC_BLOCK_SIZE; | ||
1374 | else | ||
1375 | chunk_len = num_bytes; | ||
1376 | |||
1377 | num_copies = | ||
1378 | btrfs_num_copies(&state->root->fs_info->mapping_tree, | ||
1379 | next_bytenr, PAGE_SIZE); | ||
1380 | if (state->print_mask & BTRFSIC_PRINT_MASK_NUM_COPIES) | ||
1381 | printk(KERN_INFO "num_copies(log_bytenr=%llu) = %d\n", | ||
1382 | (unsigned long long)next_bytenr, num_copies); | ||
1383 | for (mirror_num = 1; mirror_num <= num_copies; mirror_num++) { | ||
1384 | struct btrfsic_block_data_ctx next_block_ctx; | ||
1385 | struct btrfsic_block *next_block; | ||
1386 | int block_was_created; | ||
1387 | |||
1388 | if (state->print_mask & BTRFSIC_PRINT_MASK_VERBOSE) | ||
1389 | printk(KERN_INFO "btrfsic_handle_extent_data(" | ||
1390 | "mirror_num=%d)\n", mirror_num); | ||
1391 | if (state->print_mask & BTRFSIC_PRINT_MASK_VERY_VERBOSE) | ||
1392 | printk(KERN_INFO | ||
1393 | "\tdisk_bytenr = %llu, num_bytes %u\n", | ||
1394 | (unsigned long long)next_bytenr, | ||
1395 | chunk_len); | ||
1396 | ret = btrfsic_map_block(state, next_bytenr, | ||
1397 | chunk_len, &next_block_ctx, | ||
1398 | mirror_num); | ||
1399 | if (ret) { | ||
1400 | printk(KERN_INFO | ||
1401 | "btrfsic: btrfsic_map_block(@%llu," | ||
1402 | " mirror=%d) failed!\n", | ||
1403 | (unsigned long long)next_bytenr, | ||
1404 | mirror_num); | ||
1405 | return -1; | ||
1406 | } | ||
1407 | |||
1408 | next_block = btrfsic_block_lookup_or_add( | ||
1409 | state, | ||
1410 | &next_block_ctx, | ||
1411 | "referenced ", | ||
1412 | 0, | ||
1413 | force_iodone_flag, | ||
1414 | !force_iodone_flag, | ||
1415 | mirror_num, | ||
1416 | &block_was_created); | ||
1417 | if (NULL == next_block) { | ||
1418 | printk(KERN_INFO | ||
1419 | "btrfsic: error, kmalloc failed!\n"); | ||
1420 | btrfsic_release_block_ctx(&next_block_ctx); | ||
1421 | return -1; | ||
1422 | } | ||
1423 | if (!block_was_created) { | ||
1424 | if (next_block->logical_bytenr != next_bytenr && | ||
1425 | !(!next_block->is_metadata && | ||
1426 | 0 == next_block->logical_bytenr)) { | ||
1427 | printk(KERN_INFO | ||
1428 | "Referenced block" | ||
1429 | " @%llu (%s/%llu/%d)" | ||
1430 | " found in hash table, D," | ||
1431 | " bytenr mismatch" | ||
1432 | " (!= stored %llu).\n", | ||
1433 | (unsigned long long)next_bytenr, | ||
1434 | next_block_ctx.dev->name, | ||
1435 | (unsigned long long) | ||
1436 | next_block_ctx.dev_bytenr, | ||
1437 | mirror_num, | ||
1438 | (unsigned long long) | ||
1439 | next_block->logical_bytenr); | ||
1440 | } | ||
1441 | next_block->logical_bytenr = next_bytenr; | ||
1442 | next_block->mirror_num = mirror_num; | ||
1443 | } | ||
1444 | |||
1445 | l = btrfsic_block_link_lookup_or_add(state, | ||
1446 | &next_block_ctx, | ||
1447 | next_block, block, | ||
1448 | generation); | ||
1449 | btrfsic_release_block_ctx(&next_block_ctx); | ||
1450 | if (NULL == l) | ||
1451 | return -1; | ||
1452 | } | ||
1453 | |||
1454 | next_bytenr += chunk_len; | ||
1455 | num_bytes -= chunk_len; | ||
1456 | } | ||
1457 | |||
1458 | return 0; | ||
1459 | } | ||
1460 | |||
1461 | static int btrfsic_map_block(struct btrfsic_state *state, u64 bytenr, u32 len, | ||
1462 | struct btrfsic_block_data_ctx *block_ctx_out, | ||
1463 | int mirror_num) | ||
1464 | { | ||
1465 | int ret; | ||
1466 | u64 length; | ||
1467 | struct btrfs_bio *multi = NULL; | ||
1468 | struct btrfs_device *device; | ||
1469 | |||
1470 | length = len; | ||
1471 | ret = btrfs_map_block(&state->root->fs_info->mapping_tree, READ, | ||
1472 | bytenr, &length, &multi, mirror_num); | ||
1473 | |||
1474 | device = multi->stripes[0].dev; | ||
1475 | block_ctx_out->dev = btrfsic_dev_state_lookup(device->bdev); | ||
1476 | block_ctx_out->dev_bytenr = multi->stripes[0].physical; | ||
1477 | block_ctx_out->start = bytenr; | ||
1478 | block_ctx_out->len = len; | ||
1479 | block_ctx_out->data = NULL; | ||
1480 | block_ctx_out->bh = NULL; | ||
1481 | |||
1482 | if (0 == ret) | ||
1483 | kfree(multi); | ||
1484 | if (NULL == block_ctx_out->dev) { | ||
1485 | ret = -ENXIO; | ||
1486 | printk(KERN_INFO "btrfsic: error, cannot lookup dev (#1)!\n"); | ||
1487 | } | ||
1488 | |||
1489 | return ret; | ||
1490 | } | ||
1491 | |||
1492 | static int btrfsic_map_superblock(struct btrfsic_state *state, u64 bytenr, | ||
1493 | u32 len, struct block_device *bdev, | ||
1494 | struct btrfsic_block_data_ctx *block_ctx_out) | ||
1495 | { | ||
1496 | block_ctx_out->dev = btrfsic_dev_state_lookup(bdev); | ||
1497 | block_ctx_out->dev_bytenr = bytenr; | ||
1498 | block_ctx_out->start = bytenr; | ||
1499 | block_ctx_out->len = len; | ||
1500 | block_ctx_out->data = NULL; | ||
1501 | block_ctx_out->bh = NULL; | ||
1502 | if (NULL != block_ctx_out->dev) { | ||
1503 | return 0; | ||
1504 | } else { | ||
1505 | printk(KERN_INFO "btrfsic: error, cannot lookup dev (#2)!\n"); | ||
1506 | return -ENXIO; | ||
1507 | } | ||
1508 | } | ||
1509 | |||
1510 | static void btrfsic_release_block_ctx(struct btrfsic_block_data_ctx *block_ctx) | ||
1511 | { | ||
1512 | if (NULL != block_ctx->bh) { | ||
1513 | brelse(block_ctx->bh); | ||
1514 | block_ctx->bh = NULL; | ||
1515 | } | ||
1516 | } | ||
1517 | |||
1518 | static int btrfsic_read_block(struct btrfsic_state *state, | ||
1519 | struct btrfsic_block_data_ctx *block_ctx) | ||
1520 | { | ||
1521 | block_ctx->bh = NULL; | ||
1522 | if (block_ctx->dev_bytenr & 4095) { | ||
1523 | printk(KERN_INFO | ||
1524 | "btrfsic: read_block() with unaligned bytenr %llu\n", | ||
1525 | (unsigned long long)block_ctx->dev_bytenr); | ||
1526 | return -1; | ||
1527 | } | ||
1528 | if (block_ctx->len > 4096) { | ||
1529 | printk(KERN_INFO | ||
1530 | "btrfsic: read_block() with too huge size %d\n", | ||
1531 | block_ctx->len); | ||
1532 | return -1; | ||
1533 | } | ||
1534 | |||
1535 | block_ctx->bh = __bread(block_ctx->dev->bdev, | ||
1536 | block_ctx->dev_bytenr >> 12, 4096); | ||
1537 | if (NULL == block_ctx->bh) | ||
1538 | return -1; | ||
1539 | block_ctx->data = block_ctx->bh->b_data; | ||
1540 | |||
1541 | return block_ctx->len; | ||
1542 | } | ||
1543 | |||
1544 | static void btrfsic_dump_database(struct btrfsic_state *state) | ||
1545 | { | ||
1546 | struct list_head *elem_all; | ||
1547 | |||
1548 | BUG_ON(NULL == state); | ||
1549 | |||
1550 | printk(KERN_INFO "all_blocks_list:\n"); | ||
1551 | list_for_each(elem_all, &state->all_blocks_list) { | ||
1552 | const struct btrfsic_block *const b_all = | ||
1553 | list_entry(elem_all, struct btrfsic_block, | ||
1554 | all_blocks_node); | ||
1555 | struct list_head *elem_ref_to; | ||
1556 | struct list_head *elem_ref_from; | ||
1557 | |||
1558 | printk(KERN_INFO "%c-block @%llu (%s/%llu/%d)\n", | ||
1559 | btrfsic_get_block_type(state, b_all), | ||
1560 | (unsigned long long)b_all->logical_bytenr, | ||
1561 | b_all->dev_state->name, | ||
1562 | (unsigned long long)b_all->dev_bytenr, | ||
1563 | b_all->mirror_num); | ||
1564 | |||
1565 | list_for_each(elem_ref_to, &b_all->ref_to_list) { | ||
1566 | const struct btrfsic_block_link *const l = | ||
1567 | list_entry(elem_ref_to, | ||
1568 | struct btrfsic_block_link, | ||
1569 | node_ref_to); | ||
1570 | |||
1571 | printk(KERN_INFO " %c @%llu (%s/%llu/%d)" | ||
1572 | " refers %u* to" | ||
1573 | " %c @%llu (%s/%llu/%d)\n", | ||
1574 | btrfsic_get_block_type(state, b_all), | ||
1575 | (unsigned long long)b_all->logical_bytenr, | ||
1576 | b_all->dev_state->name, | ||
1577 | (unsigned long long)b_all->dev_bytenr, | ||
1578 | b_all->mirror_num, | ||
1579 | l->ref_cnt, | ||
1580 | btrfsic_get_block_type(state, l->block_ref_to), | ||
1581 | (unsigned long long) | ||
1582 | l->block_ref_to->logical_bytenr, | ||
1583 | l->block_ref_to->dev_state->name, | ||
1584 | (unsigned long long)l->block_ref_to->dev_bytenr, | ||
1585 | l->block_ref_to->mirror_num); | ||
1586 | } | ||
1587 | |||
1588 | list_for_each(elem_ref_from, &b_all->ref_from_list) { | ||
1589 | const struct btrfsic_block_link *const l = | ||
1590 | list_entry(elem_ref_from, | ||
1591 | struct btrfsic_block_link, | ||
1592 | node_ref_from); | ||
1593 | |||
1594 | printk(KERN_INFO " %c @%llu (%s/%llu/%d)" | ||
1595 | " is ref %u* from" | ||
1596 | " %c @%llu (%s/%llu/%d)\n", | ||
1597 | btrfsic_get_block_type(state, b_all), | ||
1598 | (unsigned long long)b_all->logical_bytenr, | ||
1599 | b_all->dev_state->name, | ||
1600 | (unsigned long long)b_all->dev_bytenr, | ||
1601 | b_all->mirror_num, | ||
1602 | l->ref_cnt, | ||
1603 | btrfsic_get_block_type(state, l->block_ref_from), | ||
1604 | (unsigned long long) | ||
1605 | l->block_ref_from->logical_bytenr, | ||
1606 | l->block_ref_from->dev_state->name, | ||
1607 | (unsigned long long) | ||
1608 | l->block_ref_from->dev_bytenr, | ||
1609 | l->block_ref_from->mirror_num); | ||
1610 | } | ||
1611 | |||
1612 | printk(KERN_INFO "\n"); | ||
1613 | } | ||
1614 | } | ||
1615 | |||
1616 | /* | ||
1617 | * Test whether the disk block contains a tree block (leaf or node) | ||
1618 | * (note that this test fails for the super block) | ||
1619 | */ | ||
1620 | static int btrfsic_test_for_metadata(struct btrfsic_state *state, | ||
1621 | const u8 *data, unsigned int size) | ||
1622 | { | ||
1623 | struct btrfs_header *h; | ||
1624 | u8 csum[BTRFS_CSUM_SIZE]; | ||
1625 | u32 crc = ~(u32)0; | ||
1626 | int fail = 0; | ||
1627 | int crc_fail = 0; | ||
1628 | |||
1629 | h = (struct btrfs_header *)data; | ||
1630 | |||
1631 | if (memcmp(h->fsid, state->root->fs_info->fsid, BTRFS_UUID_SIZE)) | ||
1632 | fail++; | ||
1633 | |||
1634 | crc = crc32c(crc, data + BTRFS_CSUM_SIZE, PAGE_SIZE - BTRFS_CSUM_SIZE); | ||
1635 | btrfs_csum_final(crc, csum); | ||
1636 | if (memcmp(csum, h->csum, state->csum_size)) | ||
1637 | crc_fail++; | ||
1638 | |||
1639 | return fail || crc_fail; | ||
1640 | } | ||
1641 | |||
1642 | static void btrfsic_process_written_block(struct btrfsic_dev_state *dev_state, | ||
1643 | u64 dev_bytenr, | ||
1644 | u8 *mapped_data, unsigned int len, | ||
1645 | struct bio *bio, | ||
1646 | int *bio_is_patched, | ||
1647 | struct buffer_head *bh, | ||
1648 | int submit_bio_bh_rw) | ||
1649 | { | ||
1650 | int is_metadata; | ||
1651 | struct btrfsic_block *block; | ||
1652 | struct btrfsic_block_data_ctx block_ctx; | ||
1653 | int ret; | ||
1654 | struct btrfsic_state *state = dev_state->state; | ||
1655 | struct block_device *bdev = dev_state->bdev; | ||
1656 | |||
1657 | WARN_ON(len > PAGE_SIZE); | ||
1658 | is_metadata = (0 == btrfsic_test_for_metadata(state, mapped_data, len)); | ||
1659 | if (NULL != bio_is_patched) | ||
1660 | *bio_is_patched = 0; | ||
1661 | |||
1662 | block = btrfsic_block_hashtable_lookup(bdev, dev_bytenr, | ||
1663 | &state->block_hashtable); | ||
1664 | if (NULL != block) { | ||
1665 | u64 bytenr = 0; | ||
1666 | struct list_head *elem_ref_to; | ||
1667 | struct list_head *tmp_ref_to; | ||
1668 | |||
1669 | if (block->is_superblock) { | ||
1670 | bytenr = le64_to_cpu(((struct btrfs_super_block *) | ||
1671 | mapped_data)->bytenr); | ||
1672 | is_metadata = 1; | ||
1673 | if (state->print_mask & | ||
1674 | BTRFSIC_PRINT_MASK_TREE_BEFORE_SB_WRITE) { | ||
1675 | printk(KERN_INFO | ||
1676 | "[before new superblock is written]:\n"); | ||
1677 | btrfsic_dump_tree_sub(state, block, 0); | ||
1678 | } | ||
1679 | } | ||
1680 | if (is_metadata) { | ||
1681 | if (!block->is_superblock) { | ||
1682 | bytenr = le64_to_cpu(((struct btrfs_header *) | ||
1683 | mapped_data)->bytenr); | ||
1684 | btrfsic_cmp_log_and_dev_bytenr(state, bytenr, | ||
1685 | dev_state, | ||
1686 | dev_bytenr, | ||
1687 | mapped_data); | ||
1688 | } | ||
1689 | if (block->logical_bytenr != bytenr) { | ||
1690 | printk(KERN_INFO | ||
1691 | "Written block @%llu (%s/%llu/%d)" | ||
1692 | " found in hash table, %c," | ||
1693 | " bytenr mismatch" | ||
1694 | " (!= stored %llu).\n", | ||
1695 | (unsigned long long)bytenr, | ||
1696 | dev_state->name, | ||
1697 | (unsigned long long)dev_bytenr, | ||
1698 | block->mirror_num, | ||
1699 | btrfsic_get_block_type(state, block), | ||
1700 | (unsigned long long) | ||
1701 | block->logical_bytenr); | ||
1702 | block->logical_bytenr = bytenr; | ||
1703 | } else if (state->print_mask & | ||
1704 | BTRFSIC_PRINT_MASK_VERBOSE) | ||
1705 | printk(KERN_INFO | ||
1706 | "Written block @%llu (%s/%llu/%d)" | ||
1707 | " found in hash table, %c.\n", | ||
1708 | (unsigned long long)bytenr, | ||
1709 | dev_state->name, | ||
1710 | (unsigned long long)dev_bytenr, | ||
1711 | block->mirror_num, | ||
1712 | btrfsic_get_block_type(state, block)); | ||
1713 | } else { | ||
1714 | bytenr = block->logical_bytenr; | ||
1715 | if (state->print_mask & BTRFSIC_PRINT_MASK_VERBOSE) | ||
1716 | printk(KERN_INFO | ||
1717 | "Written block @%llu (%s/%llu/%d)" | ||
1718 | " found in hash table, %c.\n", | ||
1719 | (unsigned long long)bytenr, | ||
1720 | dev_state->name, | ||
1721 | (unsigned long long)dev_bytenr, | ||
1722 | block->mirror_num, | ||
1723 | btrfsic_get_block_type(state, block)); | ||
1724 | } | ||
1725 | |||
1726 | if (state->print_mask & BTRFSIC_PRINT_MASK_VERBOSE) | ||
1727 | printk(KERN_INFO | ||
1728 | "ref_to_list: %cE, ref_from_list: %cE\n", | ||
1729 | list_empty(&block->ref_to_list) ? ' ' : '!', | ||
1730 | list_empty(&block->ref_from_list) ? ' ' : '!'); | ||
1731 | if (btrfsic_is_block_ref_by_superblock(state, block, 0)) { | ||
1732 | printk(KERN_INFO "btrfs: attempt to overwrite %c-block" | ||
1733 | " @%llu (%s/%llu/%d), old(gen=%llu," | ||
1734 | " objectid=%llu, type=%d, offset=%llu)," | ||
1735 | " new(gen=%llu)," | ||
1736 | " which is referenced by most recent superblock" | ||
1737 | " (superblockgen=%llu)!\n", | ||
1738 | btrfsic_get_block_type(state, block), | ||
1739 | (unsigned long long)bytenr, | ||
1740 | dev_state->name, | ||
1741 | (unsigned long long)dev_bytenr, | ||
1742 | block->mirror_num, | ||
1743 | (unsigned long long)block->generation, | ||
1744 | (unsigned long long) | ||
1745 | le64_to_cpu(block->disk_key.objectid), | ||
1746 | block->disk_key.type, | ||
1747 | (unsigned long long) | ||
1748 | le64_to_cpu(block->disk_key.offset), | ||
1749 | (unsigned long long) | ||
1750 | le64_to_cpu(((struct btrfs_header *) | ||
1751 | mapped_data)->generation), | ||
1752 | (unsigned long long) | ||
1753 | state->max_superblock_generation); | ||
1754 | btrfsic_dump_tree(state); | ||
1755 | } | ||
1756 | |||
1757 | if (!block->is_iodone && !block->never_written) { | ||
1758 | printk(KERN_INFO "btrfs: attempt to overwrite %c-block" | ||
1759 | " @%llu (%s/%llu/%d), oldgen=%llu, newgen=%llu," | ||
1760 | " which is not yet iodone!\n", | ||
1761 | btrfsic_get_block_type(state, block), | ||
1762 | (unsigned long long)bytenr, | ||
1763 | dev_state->name, | ||
1764 | (unsigned long long)dev_bytenr, | ||
1765 | block->mirror_num, | ||
1766 | (unsigned long long)block->generation, | ||
1767 | (unsigned long long) | ||
1768 | le64_to_cpu(((struct btrfs_header *) | ||
1769 | mapped_data)->generation)); | ||
1770 | /* it would not be safe to go on */ | ||
1771 | btrfsic_dump_tree(state); | ||
1772 | return; | ||
1773 | } | ||
1774 | |||
1775 | /* | ||
1776 | * Clear all references of this block. Do not free | ||
1777 | * the block itself even if is not referenced anymore | ||
1778 | * because it still carries valueable information | ||
1779 | * like whether it was ever written and IO completed. | ||
1780 | */ | ||
1781 | list_for_each_safe(elem_ref_to, tmp_ref_to, | ||
1782 | &block->ref_to_list) { | ||
1783 | struct btrfsic_block_link *const l = | ||
1784 | list_entry(elem_ref_to, | ||
1785 | struct btrfsic_block_link, | ||
1786 | node_ref_to); | ||
1787 | |||
1788 | if (state->print_mask & BTRFSIC_PRINT_MASK_VERBOSE) | ||
1789 | btrfsic_print_rem_link(state, l); | ||
1790 | l->ref_cnt--; | ||
1791 | if (0 == l->ref_cnt) { | ||
1792 | list_del(&l->node_ref_to); | ||
1793 | list_del(&l->node_ref_from); | ||
1794 | btrfsic_block_link_hashtable_remove(l); | ||
1795 | btrfsic_block_link_free(l); | ||
1796 | } | ||
1797 | } | ||
1798 | |||
1799 | if (block->is_superblock) | ||
1800 | ret = btrfsic_map_superblock(state, bytenr, len, | ||
1801 | bdev, &block_ctx); | ||
1802 | else | ||
1803 | ret = btrfsic_map_block(state, bytenr, len, | ||
1804 | &block_ctx, 0); | ||
1805 | if (ret) { | ||
1806 | printk(KERN_INFO | ||
1807 | "btrfsic: btrfsic_map_block(root @%llu)" | ||
1808 | " failed!\n", (unsigned long long)bytenr); | ||
1809 | return; | ||
1810 | } | ||
1811 | block_ctx.data = mapped_data; | ||
1812 | /* the following is required in case of writes to mirrors, | ||
1813 | * use the same that was used for the lookup */ | ||
1814 | block_ctx.dev = dev_state; | ||
1815 | block_ctx.dev_bytenr = dev_bytenr; | ||
1816 | |||
1817 | if (is_metadata || state->include_extent_data) { | ||
1818 | block->never_written = 0; | ||
1819 | block->iodone_w_error = 0; | ||
1820 | if (NULL != bio) { | ||
1821 | block->is_iodone = 0; | ||
1822 | BUG_ON(NULL == bio_is_patched); | ||
1823 | if (!*bio_is_patched) { | ||
1824 | block->orig_bio_bh_private = | ||
1825 | bio->bi_private; | ||
1826 | block->orig_bio_bh_end_io.bio = | ||
1827 | bio->bi_end_io; | ||
1828 | block->next_in_same_bio = NULL; | ||
1829 | bio->bi_private = block; | ||
1830 | bio->bi_end_io = btrfsic_bio_end_io; | ||
1831 | *bio_is_patched = 1; | ||
1832 | } else { | ||
1833 | struct btrfsic_block *chained_block = | ||
1834 | (struct btrfsic_block *) | ||
1835 | bio->bi_private; | ||
1836 | |||
1837 | BUG_ON(NULL == chained_block); | ||
1838 | block->orig_bio_bh_private = | ||
1839 | chained_block->orig_bio_bh_private; | ||
1840 | block->orig_bio_bh_end_io.bio = | ||
1841 | chained_block->orig_bio_bh_end_io. | ||
1842 | bio; | ||
1843 | block->next_in_same_bio = chained_block; | ||
1844 | bio->bi_private = block; | ||
1845 | } | ||
1846 | } else if (NULL != bh) { | ||
1847 | block->is_iodone = 0; | ||
1848 | block->orig_bio_bh_private = bh->b_private; | ||
1849 | block->orig_bio_bh_end_io.bh = bh->b_end_io; | ||
1850 | block->next_in_same_bio = NULL; | ||
1851 | bh->b_private = block; | ||
1852 | bh->b_end_io = btrfsic_bh_end_io; | ||
1853 | } else { | ||
1854 | block->is_iodone = 1; | ||
1855 | block->orig_bio_bh_private = NULL; | ||
1856 | block->orig_bio_bh_end_io.bio = NULL; | ||
1857 | block->next_in_same_bio = NULL; | ||
1858 | } | ||
1859 | } | ||
1860 | |||
1861 | block->flush_gen = dev_state->last_flush_gen + 1; | ||
1862 | block->submit_bio_bh_rw = submit_bio_bh_rw; | ||
1863 | if (is_metadata) { | ||
1864 | block->logical_bytenr = bytenr; | ||
1865 | block->is_metadata = 1; | ||
1866 | if (block->is_superblock) { | ||
1867 | ret = btrfsic_process_written_superblock( | ||
1868 | state, | ||
1869 | block, | ||
1870 | (struct btrfs_super_block *) | ||
1871 | mapped_data); | ||
1872 | if (state->print_mask & | ||
1873 | BTRFSIC_PRINT_MASK_TREE_AFTER_SB_WRITE) { | ||
1874 | printk(KERN_INFO | ||
1875 | "[after new superblock is written]:\n"); | ||
1876 | btrfsic_dump_tree_sub(state, block, 0); | ||
1877 | } | ||
1878 | } else { | ||
1879 | block->mirror_num = 0; /* unknown */ | ||
1880 | ret = btrfsic_process_metablock( | ||
1881 | state, | ||
1882 | block, | ||
1883 | &block_ctx, | ||
1884 | (struct btrfs_header *) | ||
1885 | block_ctx.data, | ||
1886 | 0, 0); | ||
1887 | } | ||
1888 | if (ret) | ||
1889 | printk(KERN_INFO | ||
1890 | "btrfsic: btrfsic_process_metablock" | ||
1891 | "(root @%llu) failed!\n", | ||
1892 | (unsigned long long)dev_bytenr); | ||
1893 | } else { | ||
1894 | block->is_metadata = 0; | ||
1895 | block->mirror_num = 0; /* unknown */ | ||
1896 | block->generation = BTRFSIC_GENERATION_UNKNOWN; | ||
1897 | if (!state->include_extent_data | ||
1898 | && list_empty(&block->ref_from_list)) { | ||
1899 | /* | ||
1900 | * disk block is overwritten with extent | ||
1901 | * data (not meta data) and we are configured | ||
1902 | * to not include extent data: take the | ||
1903 | * chance and free the block's memory | ||
1904 | */ | ||
1905 | btrfsic_block_hashtable_remove(block); | ||
1906 | list_del(&block->all_blocks_node); | ||
1907 | btrfsic_block_free(block); | ||
1908 | } | ||
1909 | } | ||
1910 | btrfsic_release_block_ctx(&block_ctx); | ||
1911 | } else { | ||
1912 | /* block has not been found in hash table */ | ||
1913 | u64 bytenr; | ||
1914 | |||
1915 | if (!is_metadata) { | ||
1916 | if (state->print_mask & BTRFSIC_PRINT_MASK_VERBOSE) | ||
1917 | printk(KERN_INFO "Written block (%s/%llu/?)" | ||
1918 | " !found in hash table, D.\n", | ||
1919 | dev_state->name, | ||
1920 | (unsigned long long)dev_bytenr); | ||
1921 | if (!state->include_extent_data) | ||
1922 | return; /* ignore that written D block */ | ||
1923 | |||
1924 | /* this is getting ugly for the | ||
1925 | * include_extent_data case... */ | ||
1926 | bytenr = 0; /* unknown */ | ||
1927 | block_ctx.start = bytenr; | ||
1928 | block_ctx.len = len; | ||
1929 | block_ctx.bh = NULL; | ||
1930 | } else { | ||
1931 | bytenr = le64_to_cpu(((struct btrfs_header *) | ||
1932 | mapped_data)->bytenr); | ||
1933 | btrfsic_cmp_log_and_dev_bytenr(state, bytenr, dev_state, | ||
1934 | dev_bytenr, | ||
1935 | mapped_data); | ||
1936 | if (state->print_mask & BTRFSIC_PRINT_MASK_VERBOSE) | ||
1937 | printk(KERN_INFO | ||
1938 | "Written block @%llu (%s/%llu/?)" | ||
1939 | " !found in hash table, M.\n", | ||
1940 | (unsigned long long)bytenr, | ||
1941 | dev_state->name, | ||
1942 | (unsigned long long)dev_bytenr); | ||
1943 | |||
1944 | ret = btrfsic_map_block(state, bytenr, len, &block_ctx, | ||
1945 | 0); | ||
1946 | if (ret) { | ||
1947 | printk(KERN_INFO | ||
1948 | "btrfsic: btrfsic_map_block(root @%llu)" | ||
1949 | " failed!\n", | ||
1950 | (unsigned long long)dev_bytenr); | ||
1951 | return; | ||
1952 | } | ||
1953 | } | ||
1954 | block_ctx.data = mapped_data; | ||
1955 | /* the following is required in case of writes to mirrors, | ||
1956 | * use the same that was used for the lookup */ | ||
1957 | block_ctx.dev = dev_state; | ||
1958 | block_ctx.dev_bytenr = dev_bytenr; | ||
1959 | |||
1960 | block = btrfsic_block_alloc(); | ||
1961 | if (NULL == block) { | ||
1962 | printk(KERN_INFO "btrfsic: error, kmalloc failed!\n"); | ||
1963 | btrfsic_release_block_ctx(&block_ctx); | ||
1964 | return; | ||
1965 | } | ||
1966 | block->dev_state = dev_state; | ||
1967 | block->dev_bytenr = dev_bytenr; | ||
1968 | block->logical_bytenr = bytenr; | ||
1969 | block->is_metadata = is_metadata; | ||
1970 | block->never_written = 0; | ||
1971 | block->iodone_w_error = 0; | ||
1972 | block->mirror_num = 0; /* unknown */ | ||
1973 | block->flush_gen = dev_state->last_flush_gen + 1; | ||
1974 | block->submit_bio_bh_rw = submit_bio_bh_rw; | ||
1975 | if (NULL != bio) { | ||
1976 | block->is_iodone = 0; | ||
1977 | BUG_ON(NULL == bio_is_patched); | ||
1978 | if (!*bio_is_patched) { | ||
1979 | block->orig_bio_bh_private = bio->bi_private; | ||
1980 | block->orig_bio_bh_end_io.bio = bio->bi_end_io; | ||
1981 | block->next_in_same_bio = NULL; | ||
1982 | bio->bi_private = block; | ||
1983 | bio->bi_end_io = btrfsic_bio_end_io; | ||
1984 | *bio_is_patched = 1; | ||
1985 | } else { | ||
1986 | struct btrfsic_block *chained_block = | ||
1987 | (struct btrfsic_block *) | ||
1988 | bio->bi_private; | ||
1989 | |||
1990 | BUG_ON(NULL == chained_block); | ||
1991 | block->orig_bio_bh_private = | ||
1992 | chained_block->orig_bio_bh_private; | ||
1993 | block->orig_bio_bh_end_io.bio = | ||
1994 | chained_block->orig_bio_bh_end_io.bio; | ||
1995 | block->next_in_same_bio = chained_block; | ||
1996 | bio->bi_private = block; | ||
1997 | } | ||
1998 | } else if (NULL != bh) { | ||
1999 | block->is_iodone = 0; | ||
2000 | block->orig_bio_bh_private = bh->b_private; | ||
2001 | block->orig_bio_bh_end_io.bh = bh->b_end_io; | ||
2002 | block->next_in_same_bio = NULL; | ||
2003 | bh->b_private = block; | ||
2004 | bh->b_end_io = btrfsic_bh_end_io; | ||
2005 | } else { | ||
2006 | block->is_iodone = 1; | ||
2007 | block->orig_bio_bh_private = NULL; | ||
2008 | block->orig_bio_bh_end_io.bio = NULL; | ||
2009 | block->next_in_same_bio = NULL; | ||
2010 | } | ||
2011 | if (state->print_mask & BTRFSIC_PRINT_MASK_VERBOSE) | ||
2012 | printk(KERN_INFO | ||
2013 | "New written %c-block @%llu (%s/%llu/%d)\n", | ||
2014 | is_metadata ? 'M' : 'D', | ||
2015 | (unsigned long long)block->logical_bytenr, | ||
2016 | block->dev_state->name, | ||
2017 | (unsigned long long)block->dev_bytenr, | ||
2018 | block->mirror_num); | ||
2019 | list_add(&block->all_blocks_node, &state->all_blocks_list); | ||
2020 | btrfsic_block_hashtable_add(block, &state->block_hashtable); | ||
2021 | |||
2022 | if (is_metadata) { | ||
2023 | ret = btrfsic_process_metablock(state, block, | ||
2024 | &block_ctx, | ||
2025 | (struct btrfs_header *) | ||
2026 | block_ctx.data, 0, 0); | ||
2027 | if (ret) | ||
2028 | printk(KERN_INFO | ||
2029 | "btrfsic: process_metablock(root @%llu)" | ||
2030 | " failed!\n", | ||
2031 | (unsigned long long)dev_bytenr); | ||
2032 | } | ||
2033 | btrfsic_release_block_ctx(&block_ctx); | ||
2034 | } | ||
2035 | } | ||
2036 | |||
2037 | static void btrfsic_bio_end_io(struct bio *bp, int bio_error_status) | ||
2038 | { | ||
2039 | struct btrfsic_block *block = (struct btrfsic_block *)bp->bi_private; | ||
2040 | int iodone_w_error; | ||
2041 | |||
2042 | /* mutex is not held! This is not save if IO is not yet completed | ||
2043 | * on umount */ | ||
2044 | iodone_w_error = 0; | ||
2045 | if (bio_error_status) | ||
2046 | iodone_w_error = 1; | ||
2047 | |||
2048 | BUG_ON(NULL == block); | ||
2049 | bp->bi_private = block->orig_bio_bh_private; | ||
2050 | bp->bi_end_io = block->orig_bio_bh_end_io.bio; | ||
2051 | |||
2052 | do { | ||
2053 | struct btrfsic_block *next_block; | ||
2054 | struct btrfsic_dev_state *const dev_state = block->dev_state; | ||
2055 | |||
2056 | if ((dev_state->state->print_mask & | ||
2057 | BTRFSIC_PRINT_MASK_END_IO_BIO_BH)) | ||
2058 | printk(KERN_INFO | ||
2059 | "bio_end_io(err=%d) for %c @%llu (%s/%llu/%d)\n", | ||
2060 | bio_error_status, | ||
2061 | btrfsic_get_block_type(dev_state->state, block), | ||
2062 | (unsigned long long)block->logical_bytenr, | ||
2063 | dev_state->name, | ||
2064 | (unsigned long long)block->dev_bytenr, | ||
2065 | block->mirror_num); | ||
2066 | next_block = block->next_in_same_bio; | ||
2067 | block->iodone_w_error = iodone_w_error; | ||
2068 | if (block->submit_bio_bh_rw & REQ_FLUSH) { | ||
2069 | dev_state->last_flush_gen++; | ||
2070 | if ((dev_state->state->print_mask & | ||
2071 | BTRFSIC_PRINT_MASK_END_IO_BIO_BH)) | ||
2072 | printk(KERN_INFO | ||
2073 | "bio_end_io() new %s flush_gen=%llu\n", | ||
2074 | dev_state->name, | ||
2075 | (unsigned long long) | ||
2076 | dev_state->last_flush_gen); | ||
2077 | } | ||
2078 | if (block->submit_bio_bh_rw & REQ_FUA) | ||
2079 | block->flush_gen = 0; /* FUA completed means block is | ||
2080 | * on disk */ | ||
2081 | block->is_iodone = 1; /* for FLUSH, this releases the block */ | ||
2082 | block = next_block; | ||
2083 | } while (NULL != block); | ||
2084 | |||
2085 | bp->bi_end_io(bp, bio_error_status); | ||
2086 | } | ||
2087 | |||
2088 | static void btrfsic_bh_end_io(struct buffer_head *bh, int uptodate) | ||
2089 | { | ||
2090 | struct btrfsic_block *block = (struct btrfsic_block *)bh->b_private; | ||
2091 | int iodone_w_error = !uptodate; | ||
2092 | struct btrfsic_dev_state *dev_state; | ||
2093 | |||
2094 | BUG_ON(NULL == block); | ||
2095 | dev_state = block->dev_state; | ||
2096 | if ((dev_state->state->print_mask & BTRFSIC_PRINT_MASK_END_IO_BIO_BH)) | ||
2097 | printk(KERN_INFO | ||
2098 | "bh_end_io(error=%d) for %c @%llu (%s/%llu/%d)\n", | ||
2099 | iodone_w_error, | ||
2100 | btrfsic_get_block_type(dev_state->state, block), | ||
2101 | (unsigned long long)block->logical_bytenr, | ||
2102 | block->dev_state->name, | ||
2103 | (unsigned long long)block->dev_bytenr, | ||
2104 | block->mirror_num); | ||
2105 | |||
2106 | block->iodone_w_error = iodone_w_error; | ||
2107 | if (block->submit_bio_bh_rw & REQ_FLUSH) { | ||
2108 | dev_state->last_flush_gen++; | ||
2109 | if ((dev_state->state->print_mask & | ||
2110 | BTRFSIC_PRINT_MASK_END_IO_BIO_BH)) | ||
2111 | printk(KERN_INFO | ||
2112 | "bh_end_io() new %s flush_gen=%llu\n", | ||
2113 | dev_state->name, | ||
2114 | (unsigned long long)dev_state->last_flush_gen); | ||
2115 | } | ||
2116 | if (block->submit_bio_bh_rw & REQ_FUA) | ||
2117 | block->flush_gen = 0; /* FUA completed means block is on disk */ | ||
2118 | |||
2119 | bh->b_private = block->orig_bio_bh_private; | ||
2120 | bh->b_end_io = block->orig_bio_bh_end_io.bh; | ||
2121 | block->is_iodone = 1; /* for FLUSH, this releases the block */ | ||
2122 | bh->b_end_io(bh, uptodate); | ||
2123 | } | ||
2124 | |||
2125 | static int btrfsic_process_written_superblock( | ||
2126 | struct btrfsic_state *state, | ||
2127 | struct btrfsic_block *const superblock, | ||
2128 | struct btrfs_super_block *const super_hdr) | ||
2129 | { | ||
2130 | int pass; | ||
2131 | |||
2132 | superblock->generation = btrfs_super_generation(super_hdr); | ||
2133 | if (!(superblock->generation > state->max_superblock_generation || | ||
2134 | 0 == state->max_superblock_generation)) { | ||
2135 | if (state->print_mask & BTRFSIC_PRINT_MASK_SUPERBLOCK_WRITE) | ||
2136 | printk(KERN_INFO | ||
2137 | "btrfsic: superblock @%llu (%s/%llu/%d)" | ||
2138 | " with old gen %llu <= %llu\n", | ||
2139 | (unsigned long long)superblock->logical_bytenr, | ||
2140 | superblock->dev_state->name, | ||
2141 | (unsigned long long)superblock->dev_bytenr, | ||
2142 | superblock->mirror_num, | ||
2143 | (unsigned long long) | ||
2144 | btrfs_super_generation(super_hdr), | ||
2145 | (unsigned long long) | ||
2146 | state->max_superblock_generation); | ||
2147 | } else { | ||
2148 | if (state->print_mask & BTRFSIC_PRINT_MASK_SUPERBLOCK_WRITE) | ||
2149 | printk(KERN_INFO | ||
2150 | "btrfsic: got new superblock @%llu (%s/%llu/%d)" | ||
2151 | " with new gen %llu > %llu\n", | ||
2152 | (unsigned long long)superblock->logical_bytenr, | ||
2153 | superblock->dev_state->name, | ||
2154 | (unsigned long long)superblock->dev_bytenr, | ||
2155 | superblock->mirror_num, | ||
2156 | (unsigned long long) | ||
2157 | btrfs_super_generation(super_hdr), | ||
2158 | (unsigned long long) | ||
2159 | state->max_superblock_generation); | ||
2160 | |||
2161 | state->max_superblock_generation = | ||
2162 | btrfs_super_generation(super_hdr); | ||
2163 | state->latest_superblock = superblock; | ||
2164 | } | ||
2165 | |||
2166 | for (pass = 0; pass < 3; pass++) { | ||
2167 | int ret; | ||
2168 | u64 next_bytenr; | ||
2169 | struct btrfsic_block *next_block; | ||
2170 | struct btrfsic_block_data_ctx tmp_next_block_ctx; | ||
2171 | struct btrfsic_block_link *l; | ||
2172 | int num_copies; | ||
2173 | int mirror_num; | ||
2174 | const char *additional_string = NULL; | ||
2175 | struct btrfs_disk_key tmp_disk_key; | ||
2176 | |||
2177 | tmp_disk_key.type = BTRFS_ROOT_ITEM_KEY; | ||
2178 | tmp_disk_key.offset = 0; | ||
2179 | |||
2180 | switch (pass) { | ||
2181 | case 0: | ||
2182 | tmp_disk_key.objectid = | ||
2183 | cpu_to_le64(BTRFS_ROOT_TREE_OBJECTID); | ||
2184 | additional_string = "root "; | ||
2185 | next_bytenr = btrfs_super_root(super_hdr); | ||
2186 | if (state->print_mask & | ||
2187 | BTRFSIC_PRINT_MASK_ROOT_CHUNK_LOG_TREE_LOCATION) | ||
2188 | printk(KERN_INFO "root@%llu\n", | ||
2189 | (unsigned long long)next_bytenr); | ||
2190 | break; | ||
2191 | case 1: | ||
2192 | tmp_disk_key.objectid = | ||
2193 | cpu_to_le64(BTRFS_CHUNK_TREE_OBJECTID); | ||
2194 | additional_string = "chunk "; | ||
2195 | next_bytenr = btrfs_super_chunk_root(super_hdr); | ||
2196 | if (state->print_mask & | ||
2197 | BTRFSIC_PRINT_MASK_ROOT_CHUNK_LOG_TREE_LOCATION) | ||
2198 | printk(KERN_INFO "chunk@%llu\n", | ||
2199 | (unsigned long long)next_bytenr); | ||
2200 | break; | ||
2201 | case 2: | ||
2202 | tmp_disk_key.objectid = | ||
2203 | cpu_to_le64(BTRFS_TREE_LOG_OBJECTID); | ||
2204 | additional_string = "log "; | ||
2205 | next_bytenr = btrfs_super_log_root(super_hdr); | ||
2206 | if (0 == next_bytenr) | ||
2207 | continue; | ||
2208 | if (state->print_mask & | ||
2209 | BTRFSIC_PRINT_MASK_ROOT_CHUNK_LOG_TREE_LOCATION) | ||
2210 | printk(KERN_INFO "log@%llu\n", | ||
2211 | (unsigned long long)next_bytenr); | ||
2212 | break; | ||
2213 | } | ||
2214 | |||
2215 | num_copies = | ||
2216 | btrfs_num_copies(&state->root->fs_info->mapping_tree, | ||
2217 | next_bytenr, PAGE_SIZE); | ||
2218 | if (state->print_mask & BTRFSIC_PRINT_MASK_NUM_COPIES) | ||
2219 | printk(KERN_INFO "num_copies(log_bytenr=%llu) = %d\n", | ||
2220 | (unsigned long long)next_bytenr, num_copies); | ||
2221 | for (mirror_num = 1; mirror_num <= num_copies; mirror_num++) { | ||
2222 | int was_created; | ||
2223 | |||
2224 | if (state->print_mask & BTRFSIC_PRINT_MASK_VERBOSE) | ||
2225 | printk(KERN_INFO | ||
2226 | "btrfsic_process_written_superblock(" | ||
2227 | "mirror_num=%d)\n", mirror_num); | ||
2228 | ret = btrfsic_map_block(state, next_bytenr, PAGE_SIZE, | ||
2229 | &tmp_next_block_ctx, | ||
2230 | mirror_num); | ||
2231 | if (ret) { | ||
2232 | printk(KERN_INFO | ||
2233 | "btrfsic: btrfsic_map_block(@%llu," | ||
2234 | " mirror=%d) failed!\n", | ||
2235 | (unsigned long long)next_bytenr, | ||
2236 | mirror_num); | ||
2237 | return -1; | ||
2238 | } | ||
2239 | |||
2240 | next_block = btrfsic_block_lookup_or_add( | ||
2241 | state, | ||
2242 | &tmp_next_block_ctx, | ||
2243 | additional_string, | ||
2244 | 1, 0, 1, | ||
2245 | mirror_num, | ||
2246 | &was_created); | ||
2247 | if (NULL == next_block) { | ||
2248 | printk(KERN_INFO | ||
2249 | "btrfsic: error, kmalloc failed!\n"); | ||
2250 | btrfsic_release_block_ctx(&tmp_next_block_ctx); | ||
2251 | return -1; | ||
2252 | } | ||
2253 | |||
2254 | next_block->disk_key = tmp_disk_key; | ||
2255 | if (was_created) | ||
2256 | next_block->generation = | ||
2257 | BTRFSIC_GENERATION_UNKNOWN; | ||
2258 | l = btrfsic_block_link_lookup_or_add( | ||
2259 | state, | ||
2260 | &tmp_next_block_ctx, | ||
2261 | next_block, | ||
2262 | superblock, | ||
2263 | BTRFSIC_GENERATION_UNKNOWN); | ||
2264 | btrfsic_release_block_ctx(&tmp_next_block_ctx); | ||
2265 | if (NULL == l) | ||
2266 | return -1; | ||
2267 | } | ||
2268 | } | ||
2269 | |||
2270 | if (-1 == btrfsic_check_all_ref_blocks(state, superblock, 0)) { | ||
2271 | WARN_ON(1); | ||
2272 | btrfsic_dump_tree(state); | ||
2273 | } | ||
2274 | |||
2275 | return 0; | ||
2276 | } | ||
2277 | |||
2278 | static int btrfsic_check_all_ref_blocks(struct btrfsic_state *state, | ||
2279 | struct btrfsic_block *const block, | ||
2280 | int recursion_level) | ||
2281 | { | ||
2282 | struct list_head *elem_ref_to; | ||
2283 | int ret = 0; | ||
2284 | |||
2285 | if (recursion_level >= 3 + BTRFS_MAX_LEVEL) { | ||
2286 | /* | ||
2287 | * Note that this situation can happen and does not | ||
2288 | * indicate an error in regular cases. It happens | ||
2289 | * when disk blocks are freed and later reused. | ||
2290 | * The check-integrity module is not aware of any | ||
2291 | * block free operations, it just recognizes block | ||
2292 | * write operations. Therefore it keeps the linkage | ||
2293 | * information for a block until a block is | ||
2294 | * rewritten. This can temporarily cause incorrect | ||
2295 | * and even circular linkage informations. This | ||
2296 | * causes no harm unless such blocks are referenced | ||
2297 | * by the most recent super block. | ||
2298 | */ | ||
2299 | if (state->print_mask & BTRFSIC_PRINT_MASK_VERBOSE) | ||
2300 | printk(KERN_INFO | ||
2301 | "btrfsic: abort cyclic linkage (case 1).\n"); | ||
2302 | |||
2303 | return ret; | ||
2304 | } | ||
2305 | |||
2306 | /* | ||
2307 | * This algorithm is recursive because the amount of used stack | ||
2308 | * space is very small and the max recursion depth is limited. | ||
2309 | */ | ||
2310 | list_for_each(elem_ref_to, &block->ref_to_list) { | ||
2311 | const struct btrfsic_block_link *const l = | ||
2312 | list_entry(elem_ref_to, struct btrfsic_block_link, | ||
2313 | node_ref_to); | ||
2314 | |||
2315 | if (state->print_mask & BTRFSIC_PRINT_MASK_VERBOSE) | ||
2316 | printk(KERN_INFO | ||
2317 | "rl=%d, %c @%llu (%s/%llu/%d)" | ||
2318 | " %u* refers to %c @%llu (%s/%llu/%d)\n", | ||
2319 | recursion_level, | ||
2320 | btrfsic_get_block_type(state, block), | ||
2321 | (unsigned long long)block->logical_bytenr, | ||
2322 | block->dev_state->name, | ||
2323 | (unsigned long long)block->dev_bytenr, | ||
2324 | block->mirror_num, | ||
2325 | l->ref_cnt, | ||
2326 | btrfsic_get_block_type(state, l->block_ref_to), | ||
2327 | (unsigned long long) | ||
2328 | l->block_ref_to->logical_bytenr, | ||
2329 | l->block_ref_to->dev_state->name, | ||
2330 | (unsigned long long)l->block_ref_to->dev_bytenr, | ||
2331 | l->block_ref_to->mirror_num); | ||
2332 | if (l->block_ref_to->never_written) { | ||
2333 | printk(KERN_INFO "btrfs: attempt to write superblock" | ||
2334 | " which references block %c @%llu (%s/%llu/%d)" | ||
2335 | " which is never written!\n", | ||
2336 | btrfsic_get_block_type(state, l->block_ref_to), | ||
2337 | (unsigned long long) | ||
2338 | l->block_ref_to->logical_bytenr, | ||
2339 | l->block_ref_to->dev_state->name, | ||
2340 | (unsigned long long)l->block_ref_to->dev_bytenr, | ||
2341 | l->block_ref_to->mirror_num); | ||
2342 | ret = -1; | ||
2343 | } else if (!l->block_ref_to->is_iodone) { | ||
2344 | printk(KERN_INFO "btrfs: attempt to write superblock" | ||
2345 | " which references block %c @%llu (%s/%llu/%d)" | ||
2346 | " which is not yet iodone!\n", | ||
2347 | btrfsic_get_block_type(state, l->block_ref_to), | ||
2348 | (unsigned long long) | ||
2349 | l->block_ref_to->logical_bytenr, | ||
2350 | l->block_ref_to->dev_state->name, | ||
2351 | (unsigned long long)l->block_ref_to->dev_bytenr, | ||
2352 | l->block_ref_to->mirror_num); | ||
2353 | ret = -1; | ||
2354 | } else if (l->parent_generation != | ||
2355 | l->block_ref_to->generation && | ||
2356 | BTRFSIC_GENERATION_UNKNOWN != | ||
2357 | l->parent_generation && | ||
2358 | BTRFSIC_GENERATION_UNKNOWN != | ||
2359 | l->block_ref_to->generation) { | ||
2360 | printk(KERN_INFO "btrfs: attempt to write superblock" | ||
2361 | " which references block %c @%llu (%s/%llu/%d)" | ||
2362 | " with generation %llu !=" | ||
2363 | " parent generation %llu!\n", | ||
2364 | btrfsic_get_block_type(state, l->block_ref_to), | ||
2365 | (unsigned long long) | ||
2366 | l->block_ref_to->logical_bytenr, | ||
2367 | l->block_ref_to->dev_state->name, | ||
2368 | (unsigned long long)l->block_ref_to->dev_bytenr, | ||
2369 | l->block_ref_to->mirror_num, | ||
2370 | (unsigned long long)l->block_ref_to->generation, | ||
2371 | (unsigned long long)l->parent_generation); | ||
2372 | ret = -1; | ||
2373 | } else if (l->block_ref_to->flush_gen > | ||
2374 | l->block_ref_to->dev_state->last_flush_gen) { | ||
2375 | printk(KERN_INFO "btrfs: attempt to write superblock" | ||
2376 | " which references block %c @%llu (%s/%llu/%d)" | ||
2377 | " which is not flushed out of disk's write cache" | ||
2378 | " (block flush_gen=%llu," | ||
2379 | " dev->flush_gen=%llu)!\n", | ||
2380 | btrfsic_get_block_type(state, l->block_ref_to), | ||
2381 | (unsigned long long) | ||
2382 | l->block_ref_to->logical_bytenr, | ||
2383 | l->block_ref_to->dev_state->name, | ||
2384 | (unsigned long long)l->block_ref_to->dev_bytenr, | ||
2385 | l->block_ref_to->mirror_num, | ||
2386 | (unsigned long long)block->flush_gen, | ||
2387 | (unsigned long long) | ||
2388 | l->block_ref_to->dev_state->last_flush_gen); | ||
2389 | ret = -1; | ||
2390 | } else if (-1 == btrfsic_check_all_ref_blocks(state, | ||
2391 | l->block_ref_to, | ||
2392 | recursion_level + | ||
2393 | 1)) { | ||
2394 | ret = -1; | ||
2395 | } | ||
2396 | } | ||
2397 | |||
2398 | return ret; | ||
2399 | } | ||
2400 | |||
2401 | static int btrfsic_is_block_ref_by_superblock( | ||
2402 | const struct btrfsic_state *state, | ||
2403 | const struct btrfsic_block *block, | ||
2404 | int recursion_level) | ||
2405 | { | ||
2406 | struct list_head *elem_ref_from; | ||
2407 | |||
2408 | if (recursion_level >= 3 + BTRFS_MAX_LEVEL) { | ||
2409 | /* refer to comment at "abort cyclic linkage (case 1)" */ | ||
2410 | if (state->print_mask & BTRFSIC_PRINT_MASK_VERBOSE) | ||
2411 | printk(KERN_INFO | ||
2412 | "btrfsic: abort cyclic linkage (case 2).\n"); | ||
2413 | |||
2414 | return 0; | ||
2415 | } | ||
2416 | |||
2417 | /* | ||
2418 | * This algorithm is recursive because the amount of used stack space | ||
2419 | * is very small and the max recursion depth is limited. | ||
2420 | */ | ||
2421 | list_for_each(elem_ref_from, &block->ref_from_list) { | ||
2422 | const struct btrfsic_block_link *const l = | ||
2423 | list_entry(elem_ref_from, struct btrfsic_block_link, | ||
2424 | node_ref_from); | ||
2425 | |||
2426 | if (state->print_mask & BTRFSIC_PRINT_MASK_VERBOSE) | ||
2427 | printk(KERN_INFO | ||
2428 | "rl=%d, %c @%llu (%s/%llu/%d)" | ||
2429 | " is ref %u* from %c @%llu (%s/%llu/%d)\n", | ||
2430 | recursion_level, | ||
2431 | btrfsic_get_block_type(state, block), | ||
2432 | (unsigned long long)block->logical_bytenr, | ||
2433 | block->dev_state->name, | ||
2434 | (unsigned long long)block->dev_bytenr, | ||
2435 | block->mirror_num, | ||
2436 | l->ref_cnt, | ||
2437 | btrfsic_get_block_type(state, l->block_ref_from), | ||
2438 | (unsigned long long) | ||
2439 | l->block_ref_from->logical_bytenr, | ||
2440 | l->block_ref_from->dev_state->name, | ||
2441 | (unsigned long long) | ||
2442 | l->block_ref_from->dev_bytenr, | ||
2443 | l->block_ref_from->mirror_num); | ||
2444 | if (l->block_ref_from->is_superblock && | ||
2445 | state->latest_superblock->dev_bytenr == | ||
2446 | l->block_ref_from->dev_bytenr && | ||
2447 | state->latest_superblock->dev_state->bdev == | ||
2448 | l->block_ref_from->dev_state->bdev) | ||
2449 | return 1; | ||
2450 | else if (btrfsic_is_block_ref_by_superblock(state, | ||
2451 | l->block_ref_from, | ||
2452 | recursion_level + | ||
2453 | 1)) | ||
2454 | return 1; | ||
2455 | } | ||
2456 | |||
2457 | return 0; | ||
2458 | } | ||
2459 | |||
2460 | static void btrfsic_print_add_link(const struct btrfsic_state *state, | ||
2461 | const struct btrfsic_block_link *l) | ||
2462 | { | ||
2463 | printk(KERN_INFO | ||
2464 | "Add %u* link from %c @%llu (%s/%llu/%d)" | ||
2465 | " to %c @%llu (%s/%llu/%d).\n", | ||
2466 | l->ref_cnt, | ||
2467 | btrfsic_get_block_type(state, l->block_ref_from), | ||
2468 | (unsigned long long)l->block_ref_from->logical_bytenr, | ||
2469 | l->block_ref_from->dev_state->name, | ||
2470 | (unsigned long long)l->block_ref_from->dev_bytenr, | ||
2471 | l->block_ref_from->mirror_num, | ||
2472 | btrfsic_get_block_type(state, l->block_ref_to), | ||
2473 | (unsigned long long)l->block_ref_to->logical_bytenr, | ||
2474 | l->block_ref_to->dev_state->name, | ||
2475 | (unsigned long long)l->block_ref_to->dev_bytenr, | ||
2476 | l->block_ref_to->mirror_num); | ||
2477 | } | ||
2478 | |||
2479 | static void btrfsic_print_rem_link(const struct btrfsic_state *state, | ||
2480 | const struct btrfsic_block_link *l) | ||
2481 | { | ||
2482 | printk(KERN_INFO | ||
2483 | "Rem %u* link from %c @%llu (%s/%llu/%d)" | ||
2484 | " to %c @%llu (%s/%llu/%d).\n", | ||
2485 | l->ref_cnt, | ||
2486 | btrfsic_get_block_type(state, l->block_ref_from), | ||
2487 | (unsigned long long)l->block_ref_from->logical_bytenr, | ||
2488 | l->block_ref_from->dev_state->name, | ||
2489 | (unsigned long long)l->block_ref_from->dev_bytenr, | ||
2490 | l->block_ref_from->mirror_num, | ||
2491 | btrfsic_get_block_type(state, l->block_ref_to), | ||
2492 | (unsigned long long)l->block_ref_to->logical_bytenr, | ||
2493 | l->block_ref_to->dev_state->name, | ||
2494 | (unsigned long long)l->block_ref_to->dev_bytenr, | ||
2495 | l->block_ref_to->mirror_num); | ||
2496 | } | ||
2497 | |||
2498 | static char btrfsic_get_block_type(const struct btrfsic_state *state, | ||
2499 | const struct btrfsic_block *block) | ||
2500 | { | ||
2501 | if (block->is_superblock && | ||
2502 | state->latest_superblock->dev_bytenr == block->dev_bytenr && | ||
2503 | state->latest_superblock->dev_state->bdev == block->dev_state->bdev) | ||
2504 | return 'S'; | ||
2505 | else if (block->is_superblock) | ||
2506 | return 's'; | ||
2507 | else if (block->is_metadata) | ||
2508 | return 'M'; | ||
2509 | else | ||
2510 | return 'D'; | ||
2511 | } | ||
2512 | |||
2513 | static void btrfsic_dump_tree(const struct btrfsic_state *state) | ||
2514 | { | ||
2515 | btrfsic_dump_tree_sub(state, state->latest_superblock, 0); | ||
2516 | } | ||
2517 | |||
2518 | static void btrfsic_dump_tree_sub(const struct btrfsic_state *state, | ||
2519 | const struct btrfsic_block *block, | ||
2520 | int indent_level) | ||
2521 | { | ||
2522 | struct list_head *elem_ref_to; | ||
2523 | int indent_add; | ||
2524 | static char buf[80]; | ||
2525 | int cursor_position; | ||
2526 | |||
2527 | /* | ||
2528 | * Should better fill an on-stack buffer with a complete line and | ||
2529 | * dump it at once when it is time to print a newline character. | ||
2530 | */ | ||
2531 | |||
2532 | /* | ||
2533 | * This algorithm is recursive because the amount of used stack space | ||
2534 | * is very small and the max recursion depth is limited. | ||
2535 | */ | ||
2536 | indent_add = sprintf(buf, "%c-%llu(%s/%llu/%d)", | ||
2537 | btrfsic_get_block_type(state, block), | ||
2538 | (unsigned long long)block->logical_bytenr, | ||
2539 | block->dev_state->name, | ||
2540 | (unsigned long long)block->dev_bytenr, | ||
2541 | block->mirror_num); | ||
2542 | if (indent_level + indent_add > BTRFSIC_TREE_DUMP_MAX_INDENT_LEVEL) { | ||
2543 | printk("[...]\n"); | ||
2544 | return; | ||
2545 | } | ||
2546 | printk(buf); | ||
2547 | indent_level += indent_add; | ||
2548 | if (list_empty(&block->ref_to_list)) { | ||
2549 | printk("\n"); | ||
2550 | return; | ||
2551 | } | ||
2552 | if (block->mirror_num > 1 && | ||
2553 | !(state->print_mask & BTRFSIC_PRINT_MASK_TREE_WITH_ALL_MIRRORS)) { | ||
2554 | printk(" [...]\n"); | ||
2555 | return; | ||
2556 | } | ||
2557 | |||
2558 | cursor_position = indent_level; | ||
2559 | list_for_each(elem_ref_to, &block->ref_to_list) { | ||
2560 | const struct btrfsic_block_link *const l = | ||
2561 | list_entry(elem_ref_to, struct btrfsic_block_link, | ||
2562 | node_ref_to); | ||
2563 | |||
2564 | while (cursor_position < indent_level) { | ||
2565 | printk(" "); | ||
2566 | cursor_position++; | ||
2567 | } | ||
2568 | if (l->ref_cnt > 1) | ||
2569 | indent_add = sprintf(buf, " %d*--> ", l->ref_cnt); | ||
2570 | else | ||
2571 | indent_add = sprintf(buf, " --> "); | ||
2572 | if (indent_level + indent_add > | ||
2573 | BTRFSIC_TREE_DUMP_MAX_INDENT_LEVEL) { | ||
2574 | printk("[...]\n"); | ||
2575 | cursor_position = 0; | ||
2576 | continue; | ||
2577 | } | ||
2578 | |||
2579 | printk(buf); | ||
2580 | |||
2581 | btrfsic_dump_tree_sub(state, l->block_ref_to, | ||
2582 | indent_level + indent_add); | ||
2583 | cursor_position = 0; | ||
2584 | } | ||
2585 | } | ||
2586 | |||
2587 | static struct btrfsic_block_link *btrfsic_block_link_lookup_or_add( | ||
2588 | struct btrfsic_state *state, | ||
2589 | struct btrfsic_block_data_ctx *next_block_ctx, | ||
2590 | struct btrfsic_block *next_block, | ||
2591 | struct btrfsic_block *from_block, | ||
2592 | u64 parent_generation) | ||
2593 | { | ||
2594 | struct btrfsic_block_link *l; | ||
2595 | |||
2596 | l = btrfsic_block_link_hashtable_lookup(next_block_ctx->dev->bdev, | ||
2597 | next_block_ctx->dev_bytenr, | ||
2598 | from_block->dev_state->bdev, | ||
2599 | from_block->dev_bytenr, | ||
2600 | &state->block_link_hashtable); | ||
2601 | if (NULL == l) { | ||
2602 | l = btrfsic_block_link_alloc(); | ||
2603 | if (NULL == l) { | ||
2604 | printk(KERN_INFO | ||
2605 | "btrfsic: error, kmalloc" " failed!\n"); | ||
2606 | return NULL; | ||
2607 | } | ||
2608 | |||
2609 | l->block_ref_to = next_block; | ||
2610 | l->block_ref_from = from_block; | ||
2611 | l->ref_cnt = 1; | ||
2612 | l->parent_generation = parent_generation; | ||
2613 | |||
2614 | if (state->print_mask & BTRFSIC_PRINT_MASK_VERBOSE) | ||
2615 | btrfsic_print_add_link(state, l); | ||
2616 | |||
2617 | list_add(&l->node_ref_to, &from_block->ref_to_list); | ||
2618 | list_add(&l->node_ref_from, &next_block->ref_from_list); | ||
2619 | |||
2620 | btrfsic_block_link_hashtable_add(l, | ||
2621 | &state->block_link_hashtable); | ||
2622 | } else { | ||
2623 | l->ref_cnt++; | ||
2624 | l->parent_generation = parent_generation; | ||
2625 | if (state->print_mask & BTRFSIC_PRINT_MASK_VERBOSE) | ||
2626 | btrfsic_print_add_link(state, l); | ||
2627 | } | ||
2628 | |||
2629 | return l; | ||
2630 | } | ||
2631 | |||
2632 | static struct btrfsic_block *btrfsic_block_lookup_or_add( | ||
2633 | struct btrfsic_state *state, | ||
2634 | struct btrfsic_block_data_ctx *block_ctx, | ||
2635 | const char *additional_string, | ||
2636 | int is_metadata, | ||
2637 | int is_iodone, | ||
2638 | int never_written, | ||
2639 | int mirror_num, | ||
2640 | int *was_created) | ||
2641 | { | ||
2642 | struct btrfsic_block *block; | ||
2643 | |||
2644 | block = btrfsic_block_hashtable_lookup(block_ctx->dev->bdev, | ||
2645 | block_ctx->dev_bytenr, | ||
2646 | &state->block_hashtable); | ||
2647 | if (NULL == block) { | ||
2648 | struct btrfsic_dev_state *dev_state; | ||
2649 | |||
2650 | block = btrfsic_block_alloc(); | ||
2651 | if (NULL == block) { | ||
2652 | printk(KERN_INFO "btrfsic: error, kmalloc failed!\n"); | ||
2653 | return NULL; | ||
2654 | } | ||
2655 | dev_state = btrfsic_dev_state_lookup(block_ctx->dev->bdev); | ||
2656 | if (NULL == dev_state) { | ||
2657 | printk(KERN_INFO | ||
2658 | "btrfsic: error, lookup dev_state failed!\n"); | ||
2659 | btrfsic_block_free(block); | ||
2660 | return NULL; | ||
2661 | } | ||
2662 | block->dev_state = dev_state; | ||
2663 | block->dev_bytenr = block_ctx->dev_bytenr; | ||
2664 | block->logical_bytenr = block_ctx->start; | ||
2665 | block->is_metadata = is_metadata; | ||
2666 | block->is_iodone = is_iodone; | ||
2667 | block->never_written = never_written; | ||
2668 | block->mirror_num = mirror_num; | ||
2669 | if (state->print_mask & BTRFSIC_PRINT_MASK_VERBOSE) | ||
2670 | printk(KERN_INFO | ||
2671 | "New %s%c-block @%llu (%s/%llu/%d)\n", | ||
2672 | additional_string, | ||
2673 | btrfsic_get_block_type(state, block), | ||
2674 | (unsigned long long)block->logical_bytenr, | ||
2675 | dev_state->name, | ||
2676 | (unsigned long long)block->dev_bytenr, | ||
2677 | mirror_num); | ||
2678 | list_add(&block->all_blocks_node, &state->all_blocks_list); | ||
2679 | btrfsic_block_hashtable_add(block, &state->block_hashtable); | ||
2680 | if (NULL != was_created) | ||
2681 | *was_created = 1; | ||
2682 | } else { | ||
2683 | if (NULL != was_created) | ||
2684 | *was_created = 0; | ||
2685 | } | ||
2686 | |||
2687 | return block; | ||
2688 | } | ||
2689 | |||
2690 | static void btrfsic_cmp_log_and_dev_bytenr(struct btrfsic_state *state, | ||
2691 | u64 bytenr, | ||
2692 | struct btrfsic_dev_state *dev_state, | ||
2693 | u64 dev_bytenr, char *data) | ||
2694 | { | ||
2695 | int num_copies; | ||
2696 | int mirror_num; | ||
2697 | int ret; | ||
2698 | struct btrfsic_block_data_ctx block_ctx; | ||
2699 | int match = 0; | ||
2700 | |||
2701 | num_copies = btrfs_num_copies(&state->root->fs_info->mapping_tree, | ||
2702 | bytenr, PAGE_SIZE); | ||
2703 | |||
2704 | for (mirror_num = 1; mirror_num <= num_copies; mirror_num++) { | ||
2705 | ret = btrfsic_map_block(state, bytenr, PAGE_SIZE, | ||
2706 | &block_ctx, mirror_num); | ||
2707 | if (ret) { | ||
2708 | printk(KERN_INFO "btrfsic:" | ||
2709 | " btrfsic_map_block(logical @%llu," | ||
2710 | " mirror %d) failed!\n", | ||
2711 | (unsigned long long)bytenr, mirror_num); | ||
2712 | continue; | ||
2713 | } | ||
2714 | |||
2715 | if (dev_state->bdev == block_ctx.dev->bdev && | ||
2716 | dev_bytenr == block_ctx.dev_bytenr) { | ||
2717 | match++; | ||
2718 | btrfsic_release_block_ctx(&block_ctx); | ||
2719 | break; | ||
2720 | } | ||
2721 | btrfsic_release_block_ctx(&block_ctx); | ||
2722 | } | ||
2723 | |||
2724 | if (!match) { | ||
2725 | printk(KERN_INFO "btrfs: attempt to write M-block which contains logical bytenr that doesn't map to dev+physical bytenr of submit_bio," | ||
2726 | " buffer->log_bytenr=%llu, submit_bio(bdev=%s," | ||
2727 | " phys_bytenr=%llu)!\n", | ||
2728 | (unsigned long long)bytenr, dev_state->name, | ||
2729 | (unsigned long long)dev_bytenr); | ||
2730 | for (mirror_num = 1; mirror_num <= num_copies; mirror_num++) { | ||
2731 | ret = btrfsic_map_block(state, bytenr, PAGE_SIZE, | ||
2732 | &block_ctx, mirror_num); | ||
2733 | if (ret) | ||
2734 | continue; | ||
2735 | |||
2736 | printk(KERN_INFO "Read logical bytenr @%llu maps to" | ||
2737 | " (%s/%llu/%d)\n", | ||
2738 | (unsigned long long)bytenr, | ||
2739 | block_ctx.dev->name, | ||
2740 | (unsigned long long)block_ctx.dev_bytenr, | ||
2741 | mirror_num); | ||
2742 | } | ||
2743 | WARN_ON(1); | ||
2744 | } | ||
2745 | } | ||
2746 | |||
2747 | static struct btrfsic_dev_state *btrfsic_dev_state_lookup( | ||
2748 | struct block_device *bdev) | ||
2749 | { | ||
2750 | struct btrfsic_dev_state *ds; | ||
2751 | |||
2752 | ds = btrfsic_dev_state_hashtable_lookup(bdev, | ||
2753 | &btrfsic_dev_state_hashtable); | ||
2754 | return ds; | ||
2755 | } | ||
2756 | |||
2757 | int btrfsic_submit_bh(int rw, struct buffer_head *bh) | ||
2758 | { | ||
2759 | struct btrfsic_dev_state *dev_state; | ||
2760 | |||
2761 | if (!btrfsic_is_initialized) | ||
2762 | return submit_bh(rw, bh); | ||
2763 | |||
2764 | mutex_lock(&btrfsic_mutex); | ||
2765 | /* since btrfsic_submit_bh() might also be called before | ||
2766 | * btrfsic_mount(), this might return NULL */ | ||
2767 | dev_state = btrfsic_dev_state_lookup(bh->b_bdev); | ||
2768 | |||
2769 | /* Only called to write the superblock (incl. FLUSH/FUA) */ | ||
2770 | if (NULL != dev_state && | ||
2771 | (rw & WRITE) && bh->b_size > 0) { | ||
2772 | u64 dev_bytenr; | ||
2773 | |||
2774 | dev_bytenr = 4096 * bh->b_blocknr; | ||
2775 | if (dev_state->state->print_mask & | ||
2776 | BTRFSIC_PRINT_MASK_SUBMIT_BIO_BH) | ||
2777 | printk(KERN_INFO | ||
2778 | "submit_bh(rw=0x%x, blocknr=%lu (bytenr %llu)," | ||
2779 | " size=%lu, data=%p, bdev=%p)\n", | ||
2780 | rw, (unsigned long)bh->b_blocknr, | ||
2781 | (unsigned long long)dev_bytenr, | ||
2782 | (unsigned long)bh->b_size, bh->b_data, | ||
2783 | bh->b_bdev); | ||
2784 | btrfsic_process_written_block(dev_state, dev_bytenr, | ||
2785 | bh->b_data, bh->b_size, NULL, | ||
2786 | NULL, bh, rw); | ||
2787 | } else if (NULL != dev_state && (rw & REQ_FLUSH)) { | ||
2788 | if (dev_state->state->print_mask & | ||
2789 | BTRFSIC_PRINT_MASK_SUBMIT_BIO_BH) | ||
2790 | printk(KERN_INFO | ||
2791 | "submit_bh(rw=0x%x) FLUSH, bdev=%p)\n", | ||
2792 | rw, bh->b_bdev); | ||
2793 | if (!dev_state->dummy_block_for_bio_bh_flush.is_iodone) { | ||
2794 | if ((dev_state->state->print_mask & | ||
2795 | (BTRFSIC_PRINT_MASK_SUBMIT_BIO_BH | | ||
2796 | BTRFSIC_PRINT_MASK_VERBOSE))) | ||
2797 | printk(KERN_INFO | ||
2798 | "btrfsic_submit_bh(%s) with FLUSH" | ||
2799 | " but dummy block already in use" | ||
2800 | " (ignored)!\n", | ||
2801 | dev_state->name); | ||
2802 | } else { | ||
2803 | struct btrfsic_block *const block = | ||
2804 | &dev_state->dummy_block_for_bio_bh_flush; | ||
2805 | |||
2806 | block->is_iodone = 0; | ||
2807 | block->never_written = 0; | ||
2808 | block->iodone_w_error = 0; | ||
2809 | block->flush_gen = dev_state->last_flush_gen + 1; | ||
2810 | block->submit_bio_bh_rw = rw; | ||
2811 | block->orig_bio_bh_private = bh->b_private; | ||
2812 | block->orig_bio_bh_end_io.bh = bh->b_end_io; | ||
2813 | block->next_in_same_bio = NULL; | ||
2814 | bh->b_private = block; | ||
2815 | bh->b_end_io = btrfsic_bh_end_io; | ||
2816 | } | ||
2817 | } | ||
2818 | mutex_unlock(&btrfsic_mutex); | ||
2819 | return submit_bh(rw, bh); | ||
2820 | } | ||
2821 | |||
2822 | void btrfsic_submit_bio(int rw, struct bio *bio) | ||
2823 | { | ||
2824 | struct btrfsic_dev_state *dev_state; | ||
2825 | |||
2826 | if (!btrfsic_is_initialized) { | ||
2827 | submit_bio(rw, bio); | ||
2828 | return; | ||
2829 | } | ||
2830 | |||
2831 | mutex_lock(&btrfsic_mutex); | ||
2832 | /* since btrfsic_submit_bio() is also called before | ||
2833 | * btrfsic_mount(), this might return NULL */ | ||
2834 | dev_state = btrfsic_dev_state_lookup(bio->bi_bdev); | ||
2835 | if (NULL != dev_state && | ||
2836 | (rw & WRITE) && NULL != bio->bi_io_vec) { | ||
2837 | unsigned int i; | ||
2838 | u64 dev_bytenr; | ||
2839 | int bio_is_patched; | ||
2840 | |||
2841 | dev_bytenr = 512 * bio->bi_sector; | ||
2842 | bio_is_patched = 0; | ||
2843 | if (dev_state->state->print_mask & | ||
2844 | BTRFSIC_PRINT_MASK_SUBMIT_BIO_BH) | ||
2845 | printk(KERN_INFO | ||
2846 | "submit_bio(rw=0x%x, bi_vcnt=%u," | ||
2847 | " bi_sector=%lu (bytenr %llu), bi_bdev=%p)\n", | ||
2848 | rw, bio->bi_vcnt, (unsigned long)bio->bi_sector, | ||
2849 | (unsigned long long)dev_bytenr, | ||
2850 | bio->bi_bdev); | ||
2851 | |||
2852 | for (i = 0; i < bio->bi_vcnt; i++) { | ||
2853 | u8 *mapped_data; | ||
2854 | |||
2855 | mapped_data = kmap(bio->bi_io_vec[i].bv_page); | ||
2856 | if ((BTRFSIC_PRINT_MASK_SUBMIT_BIO_BH | | ||
2857 | BTRFSIC_PRINT_MASK_VERBOSE) == | ||
2858 | (dev_state->state->print_mask & | ||
2859 | (BTRFSIC_PRINT_MASK_SUBMIT_BIO_BH | | ||
2860 | BTRFSIC_PRINT_MASK_VERBOSE))) | ||
2861 | printk(KERN_INFO | ||
2862 | "#%u: page=%p, mapped=%p, len=%u," | ||
2863 | " offset=%u\n", | ||
2864 | i, bio->bi_io_vec[i].bv_page, | ||
2865 | mapped_data, | ||
2866 | bio->bi_io_vec[i].bv_len, | ||
2867 | bio->bi_io_vec[i].bv_offset); | ||
2868 | btrfsic_process_written_block(dev_state, dev_bytenr, | ||
2869 | mapped_data, | ||
2870 | bio->bi_io_vec[i].bv_len, | ||
2871 | bio, &bio_is_patched, | ||
2872 | NULL, rw); | ||
2873 | kunmap(bio->bi_io_vec[i].bv_page); | ||
2874 | dev_bytenr += bio->bi_io_vec[i].bv_len; | ||
2875 | } | ||
2876 | } else if (NULL != dev_state && (rw & REQ_FLUSH)) { | ||
2877 | if (dev_state->state->print_mask & | ||
2878 | BTRFSIC_PRINT_MASK_SUBMIT_BIO_BH) | ||
2879 | printk(KERN_INFO | ||
2880 | "submit_bio(rw=0x%x) FLUSH, bdev=%p)\n", | ||
2881 | rw, bio->bi_bdev); | ||
2882 | if (!dev_state->dummy_block_for_bio_bh_flush.is_iodone) { | ||
2883 | if ((dev_state->state->print_mask & | ||
2884 | (BTRFSIC_PRINT_MASK_SUBMIT_BIO_BH | | ||
2885 | BTRFSIC_PRINT_MASK_VERBOSE))) | ||
2886 | printk(KERN_INFO | ||
2887 | "btrfsic_submit_bio(%s) with FLUSH" | ||
2888 | " but dummy block already in use" | ||
2889 | " (ignored)!\n", | ||
2890 | dev_state->name); | ||
2891 | } else { | ||
2892 | struct btrfsic_block *const block = | ||
2893 | &dev_state->dummy_block_for_bio_bh_flush; | ||
2894 | |||
2895 | block->is_iodone = 0; | ||
2896 | block->never_written = 0; | ||
2897 | block->iodone_w_error = 0; | ||
2898 | block->flush_gen = dev_state->last_flush_gen + 1; | ||
2899 | block->submit_bio_bh_rw = rw; | ||
2900 | block->orig_bio_bh_private = bio->bi_private; | ||
2901 | block->orig_bio_bh_end_io.bio = bio->bi_end_io; | ||
2902 | block->next_in_same_bio = NULL; | ||
2903 | bio->bi_private = block; | ||
2904 | bio->bi_end_io = btrfsic_bio_end_io; | ||
2905 | } | ||
2906 | } | ||
2907 | mutex_unlock(&btrfsic_mutex); | ||
2908 | |||
2909 | submit_bio(rw, bio); | ||
2910 | } | ||
2911 | |||
2912 | int btrfsic_mount(struct btrfs_root *root, | ||
2913 | struct btrfs_fs_devices *fs_devices, | ||
2914 | int including_extent_data, u32 print_mask) | ||
2915 | { | ||
2916 | int ret; | ||
2917 | struct btrfsic_state *state; | ||
2918 | struct list_head *dev_head = &fs_devices->devices; | ||
2919 | struct btrfs_device *device; | ||
2920 | |||
2921 | state = kzalloc(sizeof(*state), GFP_NOFS); | ||
2922 | if (NULL == state) { | ||
2923 | printk(KERN_INFO "btrfs check-integrity: kmalloc() failed!\n"); | ||
2924 | return -1; | ||
2925 | } | ||
2926 | |||
2927 | if (!btrfsic_is_initialized) { | ||
2928 | mutex_init(&btrfsic_mutex); | ||
2929 | btrfsic_dev_state_hashtable_init(&btrfsic_dev_state_hashtable); | ||
2930 | btrfsic_is_initialized = 1; | ||
2931 | } | ||
2932 | mutex_lock(&btrfsic_mutex); | ||
2933 | state->root = root; | ||
2934 | state->print_mask = print_mask; | ||
2935 | state->include_extent_data = including_extent_data; | ||
2936 | state->csum_size = 0; | ||
2937 | INIT_LIST_HEAD(&state->all_blocks_list); | ||
2938 | btrfsic_block_hashtable_init(&state->block_hashtable); | ||
2939 | btrfsic_block_link_hashtable_init(&state->block_link_hashtable); | ||
2940 | state->max_superblock_generation = 0; | ||
2941 | state->latest_superblock = NULL; | ||
2942 | |||
2943 | list_for_each_entry(device, dev_head, dev_list) { | ||
2944 | struct btrfsic_dev_state *ds; | ||
2945 | char *p; | ||
2946 | |||
2947 | if (!device->bdev || !device->name) | ||
2948 | continue; | ||
2949 | |||
2950 | ds = btrfsic_dev_state_alloc(); | ||
2951 | if (NULL == ds) { | ||
2952 | printk(KERN_INFO | ||
2953 | "btrfs check-integrity: kmalloc() failed!\n"); | ||
2954 | mutex_unlock(&btrfsic_mutex); | ||
2955 | return -1; | ||
2956 | } | ||
2957 | ds->bdev = device->bdev; | ||
2958 | ds->state = state; | ||
2959 | bdevname(ds->bdev, ds->name); | ||
2960 | ds->name[BDEVNAME_SIZE - 1] = '\0'; | ||
2961 | for (p = ds->name; *p != '\0'; p++); | ||
2962 | while (p > ds->name && *p != '/') | ||
2963 | p--; | ||
2964 | if (*p == '/') | ||
2965 | p++; | ||
2966 | strlcpy(ds->name, p, sizeof(ds->name)); | ||
2967 | btrfsic_dev_state_hashtable_add(ds, | ||
2968 | &btrfsic_dev_state_hashtable); | ||
2969 | } | ||
2970 | |||
2971 | ret = btrfsic_process_superblock(state, fs_devices); | ||
2972 | if (0 != ret) { | ||
2973 | mutex_unlock(&btrfsic_mutex); | ||
2974 | btrfsic_unmount(root, fs_devices); | ||
2975 | return ret; | ||
2976 | } | ||
2977 | |||
2978 | if (state->print_mask & BTRFSIC_PRINT_MASK_INITIAL_DATABASE) | ||
2979 | btrfsic_dump_database(state); | ||
2980 | if (state->print_mask & BTRFSIC_PRINT_MASK_INITIAL_TREE) | ||
2981 | btrfsic_dump_tree(state); | ||
2982 | |||
2983 | mutex_unlock(&btrfsic_mutex); | ||
2984 | return 0; | ||
2985 | } | ||
2986 | |||
2987 | void btrfsic_unmount(struct btrfs_root *root, | ||
2988 | struct btrfs_fs_devices *fs_devices) | ||
2989 | { | ||
2990 | struct list_head *elem_all; | ||
2991 | struct list_head *tmp_all; | ||
2992 | struct btrfsic_state *state; | ||
2993 | struct list_head *dev_head = &fs_devices->devices; | ||
2994 | struct btrfs_device *device; | ||
2995 | |||
2996 | if (!btrfsic_is_initialized) | ||
2997 | return; | ||
2998 | |||
2999 | mutex_lock(&btrfsic_mutex); | ||
3000 | |||
3001 | state = NULL; | ||
3002 | list_for_each_entry(device, dev_head, dev_list) { | ||
3003 | struct btrfsic_dev_state *ds; | ||
3004 | |||
3005 | if (!device->bdev || !device->name) | ||
3006 | continue; | ||
3007 | |||
3008 | ds = btrfsic_dev_state_hashtable_lookup( | ||
3009 | device->bdev, | ||
3010 | &btrfsic_dev_state_hashtable); | ||
3011 | if (NULL != ds) { | ||
3012 | state = ds->state; | ||
3013 | btrfsic_dev_state_hashtable_remove(ds); | ||
3014 | btrfsic_dev_state_free(ds); | ||
3015 | } | ||
3016 | } | ||
3017 | |||
3018 | if (NULL == state) { | ||
3019 | printk(KERN_INFO | ||
3020 | "btrfsic: error, cannot find state information" | ||
3021 | " on umount!\n"); | ||
3022 | mutex_unlock(&btrfsic_mutex); | ||
3023 | return; | ||
3024 | } | ||
3025 | |||
3026 | /* | ||
3027 | * Don't care about keeping the lists' state up to date, | ||
3028 | * just free all memory that was allocated dynamically. | ||
3029 | * Free the blocks and the block_links. | ||
3030 | */ | ||
3031 | list_for_each_safe(elem_all, tmp_all, &state->all_blocks_list) { | ||
3032 | struct btrfsic_block *const b_all = | ||
3033 | list_entry(elem_all, struct btrfsic_block, | ||
3034 | all_blocks_node); | ||
3035 | struct list_head *elem_ref_to; | ||
3036 | struct list_head *tmp_ref_to; | ||
3037 | |||
3038 | list_for_each_safe(elem_ref_to, tmp_ref_to, | ||
3039 | &b_all->ref_to_list) { | ||
3040 | struct btrfsic_block_link *const l = | ||
3041 | list_entry(elem_ref_to, | ||
3042 | struct btrfsic_block_link, | ||
3043 | node_ref_to); | ||
3044 | |||
3045 | if (state->print_mask & BTRFSIC_PRINT_MASK_VERBOSE) | ||
3046 | btrfsic_print_rem_link(state, l); | ||
3047 | |||
3048 | l->ref_cnt--; | ||
3049 | if (0 == l->ref_cnt) | ||
3050 | btrfsic_block_link_free(l); | ||
3051 | } | ||
3052 | |||
3053 | if (b_all->is_iodone) | ||
3054 | btrfsic_block_free(b_all); | ||
3055 | else | ||
3056 | printk(KERN_INFO "btrfs: attempt to free %c-block" | ||
3057 | " @%llu (%s/%llu/%d) on umount which is" | ||
3058 | " not yet iodone!\n", | ||
3059 | btrfsic_get_block_type(state, b_all), | ||
3060 | (unsigned long long)b_all->logical_bytenr, | ||
3061 | b_all->dev_state->name, | ||
3062 | (unsigned long long)b_all->dev_bytenr, | ||
3063 | b_all->mirror_num); | ||
3064 | } | ||
3065 | |||
3066 | mutex_unlock(&btrfsic_mutex); | ||
3067 | |||
3068 | kfree(state); | ||
3069 | } | ||
diff --git a/fs/btrfs/check-integrity.h b/fs/btrfs/check-integrity.h new file mode 100644 index 000000000000..8b59175cc502 --- /dev/null +++ b/fs/btrfs/check-integrity.h | |||
@@ -0,0 +1,36 @@ | |||
1 | /* | ||
2 | * Copyright (C) STRATO AG 2011. All rights reserved. | ||
3 | * | ||
4 | * This program is free software; you can redistribute it and/or | ||
5 | * modify it under the terms of the GNU General Public | ||
6 | * License v2 as published by the Free Software Foundation. | ||
7 | * | ||
8 | * This program is distributed in the hope that it will be useful, | ||
9 | * but WITHOUT ANY WARRANTY; without even the implied warranty of | ||
10 | * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU | ||
11 | * General Public License for more details. | ||
12 | * | ||
13 | * You should have received a copy of the GNU General Public | ||
14 | * License along with this program; if not, write to the | ||
15 | * Free Software Foundation, Inc., 59 Temple Place - Suite 330, | ||
16 | * Boston, MA 021110-1307, USA. | ||
17 | */ | ||
18 | |||
19 | #if !defined(__BTRFS_CHECK_INTEGRITY__) | ||
20 | #define __BTRFS_CHECK_INTEGRITY__ | ||
21 | |||
22 | #ifdef CONFIG_BTRFS_FS_CHECK_INTEGRITY | ||
23 | int btrfsic_submit_bh(int rw, struct buffer_head *bh); | ||
24 | void btrfsic_submit_bio(int rw, struct bio *bio); | ||
25 | #else | ||
26 | #define btrfsic_submit_bh submit_bh | ||
27 | #define btrfsic_submit_bio submit_bio | ||
28 | #endif | ||
29 | |||
30 | int btrfsic_mount(struct btrfs_root *root, | ||
31 | struct btrfs_fs_devices *fs_devices, | ||
32 | int including_extent_data, u32 print_mask); | ||
33 | void btrfsic_unmount(struct btrfs_root *root, | ||
34 | struct btrfs_fs_devices *fs_devices); | ||
35 | |||
36 | #endif | ||
diff --git a/fs/btrfs/ctree.c b/fs/btrfs/ctree.c index dede441bdeee..0639a555e16e 100644 --- a/fs/btrfs/ctree.c +++ b/fs/btrfs/ctree.c | |||
@@ -240,7 +240,7 @@ int btrfs_copy_root(struct btrfs_trans_handle *trans, | |||
240 | 240 | ||
241 | cow = btrfs_alloc_free_block(trans, root, buf->len, 0, | 241 | cow = btrfs_alloc_free_block(trans, root, buf->len, 0, |
242 | new_root_objectid, &disk_key, level, | 242 | new_root_objectid, &disk_key, level, |
243 | buf->start, 0); | 243 | buf->start, 0, 1); |
244 | if (IS_ERR(cow)) | 244 | if (IS_ERR(cow)) |
245 | return PTR_ERR(cow); | 245 | return PTR_ERR(cow); |
246 | 246 | ||
@@ -261,9 +261,9 @@ int btrfs_copy_root(struct btrfs_trans_handle *trans, | |||
261 | 261 | ||
262 | WARN_ON(btrfs_header_generation(buf) > trans->transid); | 262 | WARN_ON(btrfs_header_generation(buf) > trans->transid); |
263 | if (new_root_objectid == BTRFS_TREE_RELOC_OBJECTID) | 263 | if (new_root_objectid == BTRFS_TREE_RELOC_OBJECTID) |
264 | ret = btrfs_inc_ref(trans, root, cow, 1); | 264 | ret = btrfs_inc_ref(trans, root, cow, 1, 1); |
265 | else | 265 | else |
266 | ret = btrfs_inc_ref(trans, root, cow, 0); | 266 | ret = btrfs_inc_ref(trans, root, cow, 0, 1); |
267 | 267 | ||
268 | if (ret) | 268 | if (ret) |
269 | return ret; | 269 | return ret; |
@@ -350,14 +350,14 @@ static noinline int update_ref_for_cow(struct btrfs_trans_handle *trans, | |||
350 | if ((owner == root->root_key.objectid || | 350 | if ((owner == root->root_key.objectid || |
351 | root->root_key.objectid == BTRFS_TREE_RELOC_OBJECTID) && | 351 | root->root_key.objectid == BTRFS_TREE_RELOC_OBJECTID) && |
352 | !(flags & BTRFS_BLOCK_FLAG_FULL_BACKREF)) { | 352 | !(flags & BTRFS_BLOCK_FLAG_FULL_BACKREF)) { |
353 | ret = btrfs_inc_ref(trans, root, buf, 1); | 353 | ret = btrfs_inc_ref(trans, root, buf, 1, 1); |
354 | BUG_ON(ret); | 354 | BUG_ON(ret); |
355 | 355 | ||
356 | if (root->root_key.objectid == | 356 | if (root->root_key.objectid == |
357 | BTRFS_TREE_RELOC_OBJECTID) { | 357 | BTRFS_TREE_RELOC_OBJECTID) { |
358 | ret = btrfs_dec_ref(trans, root, buf, 0); | 358 | ret = btrfs_dec_ref(trans, root, buf, 0, 1); |
359 | BUG_ON(ret); | 359 | BUG_ON(ret); |
360 | ret = btrfs_inc_ref(trans, root, cow, 1); | 360 | ret = btrfs_inc_ref(trans, root, cow, 1, 1); |
361 | BUG_ON(ret); | 361 | BUG_ON(ret); |
362 | } | 362 | } |
363 | new_flags |= BTRFS_BLOCK_FLAG_FULL_BACKREF; | 363 | new_flags |= BTRFS_BLOCK_FLAG_FULL_BACKREF; |
@@ -365,9 +365,9 @@ static noinline int update_ref_for_cow(struct btrfs_trans_handle *trans, | |||
365 | 365 | ||
366 | if (root->root_key.objectid == | 366 | if (root->root_key.objectid == |
367 | BTRFS_TREE_RELOC_OBJECTID) | 367 | BTRFS_TREE_RELOC_OBJECTID) |
368 | ret = btrfs_inc_ref(trans, root, cow, 1); | 368 | ret = btrfs_inc_ref(trans, root, cow, 1, 1); |
369 | else | 369 | else |
370 | ret = btrfs_inc_ref(trans, root, cow, 0); | 370 | ret = btrfs_inc_ref(trans, root, cow, 0, 1); |
371 | BUG_ON(ret); | 371 | BUG_ON(ret); |
372 | } | 372 | } |
373 | if (new_flags != 0) { | 373 | if (new_flags != 0) { |
@@ -381,11 +381,11 @@ static noinline int update_ref_for_cow(struct btrfs_trans_handle *trans, | |||
381 | if (flags & BTRFS_BLOCK_FLAG_FULL_BACKREF) { | 381 | if (flags & BTRFS_BLOCK_FLAG_FULL_BACKREF) { |
382 | if (root->root_key.objectid == | 382 | if (root->root_key.objectid == |
383 | BTRFS_TREE_RELOC_OBJECTID) | 383 | BTRFS_TREE_RELOC_OBJECTID) |
384 | ret = btrfs_inc_ref(trans, root, cow, 1); | 384 | ret = btrfs_inc_ref(trans, root, cow, 1, 1); |
385 | else | 385 | else |
386 | ret = btrfs_inc_ref(trans, root, cow, 0); | 386 | ret = btrfs_inc_ref(trans, root, cow, 0, 1); |
387 | BUG_ON(ret); | 387 | BUG_ON(ret); |
388 | ret = btrfs_dec_ref(trans, root, buf, 1); | 388 | ret = btrfs_dec_ref(trans, root, buf, 1, 1); |
389 | BUG_ON(ret); | 389 | BUG_ON(ret); |
390 | } | 390 | } |
391 | clean_tree_block(trans, root, buf); | 391 | clean_tree_block(trans, root, buf); |
@@ -446,7 +446,7 @@ static noinline int __btrfs_cow_block(struct btrfs_trans_handle *trans, | |||
446 | 446 | ||
447 | cow = btrfs_alloc_free_block(trans, root, buf->len, parent_start, | 447 | cow = btrfs_alloc_free_block(trans, root, buf->len, parent_start, |
448 | root->root_key.objectid, &disk_key, | 448 | root->root_key.objectid, &disk_key, |
449 | level, search_start, empty_size); | 449 | level, search_start, empty_size, 1); |
450 | if (IS_ERR(cow)) | 450 | if (IS_ERR(cow)) |
451 | return PTR_ERR(cow); | 451 | return PTR_ERR(cow); |
452 | 452 | ||
@@ -484,7 +484,7 @@ static noinline int __btrfs_cow_block(struct btrfs_trans_handle *trans, | |||
484 | rcu_assign_pointer(root->node, cow); | 484 | rcu_assign_pointer(root->node, cow); |
485 | 485 | ||
486 | btrfs_free_tree_block(trans, root, buf, parent_start, | 486 | btrfs_free_tree_block(trans, root, buf, parent_start, |
487 | last_ref); | 487 | last_ref, 1); |
488 | free_extent_buffer(buf); | 488 | free_extent_buffer(buf); |
489 | add_root_to_dirty_list(root); | 489 | add_root_to_dirty_list(root); |
490 | } else { | 490 | } else { |
@@ -500,7 +500,7 @@ static noinline int __btrfs_cow_block(struct btrfs_trans_handle *trans, | |||
500 | trans->transid); | 500 | trans->transid); |
501 | btrfs_mark_buffer_dirty(parent); | 501 | btrfs_mark_buffer_dirty(parent); |
502 | btrfs_free_tree_block(trans, root, buf, parent_start, | 502 | btrfs_free_tree_block(trans, root, buf, parent_start, |
503 | last_ref); | 503 | last_ref, 1); |
504 | } | 504 | } |
505 | if (unlock_orig) | 505 | if (unlock_orig) |
506 | btrfs_tree_unlock(buf); | 506 | btrfs_tree_unlock(buf); |
@@ -957,7 +957,7 @@ static noinline int balance_level(struct btrfs_trans_handle *trans, | |||
957 | free_extent_buffer(mid); | 957 | free_extent_buffer(mid); |
958 | 958 | ||
959 | root_sub_used(root, mid->len); | 959 | root_sub_used(root, mid->len); |
960 | btrfs_free_tree_block(trans, root, mid, 0, 1); | 960 | btrfs_free_tree_block(trans, root, mid, 0, 1, 0); |
961 | /* once for the root ptr */ | 961 | /* once for the root ptr */ |
962 | free_extent_buffer(mid); | 962 | free_extent_buffer(mid); |
963 | return 0; | 963 | return 0; |
@@ -1015,7 +1015,7 @@ static noinline int balance_level(struct btrfs_trans_handle *trans, | |||
1015 | if (wret) | 1015 | if (wret) |
1016 | ret = wret; | 1016 | ret = wret; |
1017 | root_sub_used(root, right->len); | 1017 | root_sub_used(root, right->len); |
1018 | btrfs_free_tree_block(trans, root, right, 0, 1); | 1018 | btrfs_free_tree_block(trans, root, right, 0, 1, 0); |
1019 | free_extent_buffer(right); | 1019 | free_extent_buffer(right); |
1020 | right = NULL; | 1020 | right = NULL; |
1021 | } else { | 1021 | } else { |
@@ -1055,7 +1055,7 @@ static noinline int balance_level(struct btrfs_trans_handle *trans, | |||
1055 | if (wret) | 1055 | if (wret) |
1056 | ret = wret; | 1056 | ret = wret; |
1057 | root_sub_used(root, mid->len); | 1057 | root_sub_used(root, mid->len); |
1058 | btrfs_free_tree_block(trans, root, mid, 0, 1); | 1058 | btrfs_free_tree_block(trans, root, mid, 0, 1, 0); |
1059 | free_extent_buffer(mid); | 1059 | free_extent_buffer(mid); |
1060 | mid = NULL; | 1060 | mid = NULL; |
1061 | } else { | 1061 | } else { |
@@ -2089,7 +2089,7 @@ static noinline int insert_new_root(struct btrfs_trans_handle *trans, | |||
2089 | 2089 | ||
2090 | c = btrfs_alloc_free_block(trans, root, root->nodesize, 0, | 2090 | c = btrfs_alloc_free_block(trans, root, root->nodesize, 0, |
2091 | root->root_key.objectid, &lower_key, | 2091 | root->root_key.objectid, &lower_key, |
2092 | level, root->node->start, 0); | 2092 | level, root->node->start, 0, 0); |
2093 | if (IS_ERR(c)) | 2093 | if (IS_ERR(c)) |
2094 | return PTR_ERR(c); | 2094 | return PTR_ERR(c); |
2095 | 2095 | ||
@@ -2216,7 +2216,7 @@ static noinline int split_node(struct btrfs_trans_handle *trans, | |||
2216 | 2216 | ||
2217 | split = btrfs_alloc_free_block(trans, root, root->nodesize, 0, | 2217 | split = btrfs_alloc_free_block(trans, root, root->nodesize, 0, |
2218 | root->root_key.objectid, | 2218 | root->root_key.objectid, |
2219 | &disk_key, level, c->start, 0); | 2219 | &disk_key, level, c->start, 0, 0); |
2220 | if (IS_ERR(split)) | 2220 | if (IS_ERR(split)) |
2221 | return PTR_ERR(split); | 2221 | return PTR_ERR(split); |
2222 | 2222 | ||
@@ -2970,7 +2970,7 @@ again: | |||
2970 | 2970 | ||
2971 | right = btrfs_alloc_free_block(trans, root, root->leafsize, 0, | 2971 | right = btrfs_alloc_free_block(trans, root, root->leafsize, 0, |
2972 | root->root_key.objectid, | 2972 | root->root_key.objectid, |
2973 | &disk_key, 0, l->start, 0); | 2973 | &disk_key, 0, l->start, 0, 0); |
2974 | if (IS_ERR(right)) | 2974 | if (IS_ERR(right)) |
2975 | return PTR_ERR(right); | 2975 | return PTR_ERR(right); |
2976 | 2976 | ||
@@ -3781,7 +3781,7 @@ static noinline int btrfs_del_leaf(struct btrfs_trans_handle *trans, | |||
3781 | 3781 | ||
3782 | root_sub_used(root, leaf->len); | 3782 | root_sub_used(root, leaf->len); |
3783 | 3783 | ||
3784 | btrfs_free_tree_block(trans, root, leaf, 0, 1); | 3784 | btrfs_free_tree_block(trans, root, leaf, 0, 1, 0); |
3785 | return 0; | 3785 | return 0; |
3786 | } | 3786 | } |
3787 | /* | 3787 | /* |
diff --git a/fs/btrfs/ctree.h b/fs/btrfs/ctree.h index 67385033323d..27ebe61d3ccc 100644 --- a/fs/btrfs/ctree.h +++ b/fs/btrfs/ctree.h | |||
@@ -86,6 +86,9 @@ struct btrfs_ordered_sum; | |||
86 | /* holds checksums of all the data extents */ | 86 | /* holds checksums of all the data extents */ |
87 | #define BTRFS_CSUM_TREE_OBJECTID 7ULL | 87 | #define BTRFS_CSUM_TREE_OBJECTID 7ULL |
88 | 88 | ||
89 | /* for storing balance parameters in the root tree */ | ||
90 | #define BTRFS_BALANCE_OBJECTID -4ULL | ||
91 | |||
89 | /* orhpan objectid for tracking unlinked/truncated files */ | 92 | /* orhpan objectid for tracking unlinked/truncated files */ |
90 | #define BTRFS_ORPHAN_OBJECTID -5ULL | 93 | #define BTRFS_ORPHAN_OBJECTID -5ULL |
91 | 94 | ||
@@ -692,6 +695,54 @@ struct btrfs_root_ref { | |||
692 | __le16 name_len; | 695 | __le16 name_len; |
693 | } __attribute__ ((__packed__)); | 696 | } __attribute__ ((__packed__)); |
694 | 697 | ||
698 | struct btrfs_disk_balance_args { | ||
699 | /* | ||
700 | * profiles to operate on, single is denoted by | ||
701 | * BTRFS_AVAIL_ALLOC_BIT_SINGLE | ||
702 | */ | ||
703 | __le64 profiles; | ||
704 | |||
705 | /* usage filter */ | ||
706 | __le64 usage; | ||
707 | |||
708 | /* devid filter */ | ||
709 | __le64 devid; | ||
710 | |||
711 | /* devid subset filter [pstart..pend) */ | ||
712 | __le64 pstart; | ||
713 | __le64 pend; | ||
714 | |||
715 | /* btrfs virtual address space subset filter [vstart..vend) */ | ||
716 | __le64 vstart; | ||
717 | __le64 vend; | ||
718 | |||
719 | /* | ||
720 | * profile to convert to, single is denoted by | ||
721 | * BTRFS_AVAIL_ALLOC_BIT_SINGLE | ||
722 | */ | ||
723 | __le64 target; | ||
724 | |||
725 | /* BTRFS_BALANCE_ARGS_* */ | ||
726 | __le64 flags; | ||
727 | |||
728 | __le64 unused[8]; | ||
729 | } __attribute__ ((__packed__)); | ||
730 | |||
731 | /* | ||
732 | * store balance parameters to disk so that balance can be properly | ||
733 | * resumed after crash or unmount | ||
734 | */ | ||
735 | struct btrfs_balance_item { | ||
736 | /* BTRFS_BALANCE_* */ | ||
737 | __le64 flags; | ||
738 | |||
739 | struct btrfs_disk_balance_args data; | ||
740 | struct btrfs_disk_balance_args meta; | ||
741 | struct btrfs_disk_balance_args sys; | ||
742 | |||
743 | __le64 unused[4]; | ||
744 | } __attribute__ ((__packed__)); | ||
745 | |||
695 | #define BTRFS_FILE_EXTENT_INLINE 0 | 746 | #define BTRFS_FILE_EXTENT_INLINE 0 |
696 | #define BTRFS_FILE_EXTENT_REG 1 | 747 | #define BTRFS_FILE_EXTENT_REG 1 |
697 | #define BTRFS_FILE_EXTENT_PREALLOC 2 | 748 | #define BTRFS_FILE_EXTENT_PREALLOC 2 |
@@ -751,14 +802,32 @@ struct btrfs_csum_item { | |||
751 | } __attribute__ ((__packed__)); | 802 | } __attribute__ ((__packed__)); |
752 | 803 | ||
753 | /* different types of block groups (and chunks) */ | 804 | /* different types of block groups (and chunks) */ |
754 | #define BTRFS_BLOCK_GROUP_DATA (1 << 0) | 805 | #define BTRFS_BLOCK_GROUP_DATA (1ULL << 0) |
755 | #define BTRFS_BLOCK_GROUP_SYSTEM (1 << 1) | 806 | #define BTRFS_BLOCK_GROUP_SYSTEM (1ULL << 1) |
756 | #define BTRFS_BLOCK_GROUP_METADATA (1 << 2) | 807 | #define BTRFS_BLOCK_GROUP_METADATA (1ULL << 2) |
757 | #define BTRFS_BLOCK_GROUP_RAID0 (1 << 3) | 808 | #define BTRFS_BLOCK_GROUP_RAID0 (1ULL << 3) |
758 | #define BTRFS_BLOCK_GROUP_RAID1 (1 << 4) | 809 | #define BTRFS_BLOCK_GROUP_RAID1 (1ULL << 4) |
759 | #define BTRFS_BLOCK_GROUP_DUP (1 << 5) | 810 | #define BTRFS_BLOCK_GROUP_DUP (1ULL << 5) |
760 | #define BTRFS_BLOCK_GROUP_RAID10 (1 << 6) | 811 | #define BTRFS_BLOCK_GROUP_RAID10 (1ULL << 6) |
761 | #define BTRFS_NR_RAID_TYPES 5 | 812 | #define BTRFS_BLOCK_GROUP_RESERVED BTRFS_AVAIL_ALLOC_BIT_SINGLE |
813 | #define BTRFS_NR_RAID_TYPES 5 | ||
814 | |||
815 | #define BTRFS_BLOCK_GROUP_TYPE_MASK (BTRFS_BLOCK_GROUP_DATA | \ | ||
816 | BTRFS_BLOCK_GROUP_SYSTEM | \ | ||
817 | BTRFS_BLOCK_GROUP_METADATA) | ||
818 | |||
819 | #define BTRFS_BLOCK_GROUP_PROFILE_MASK (BTRFS_BLOCK_GROUP_RAID0 | \ | ||
820 | BTRFS_BLOCK_GROUP_RAID1 | \ | ||
821 | BTRFS_BLOCK_GROUP_DUP | \ | ||
822 | BTRFS_BLOCK_GROUP_RAID10) | ||
823 | /* | ||
824 | * We need a bit for restriper to be able to tell when chunks of type | ||
825 | * SINGLE are available. This "extended" profile format is used in | ||
826 | * fs_info->avail_*_alloc_bits (in-memory) and balance item fields | ||
827 | * (on-disk). The corresponding on-disk bit in chunk.type is reserved | ||
828 | * to avoid remappings between two formats in future. | ||
829 | */ | ||
830 | #define BTRFS_AVAIL_ALLOC_BIT_SINGLE (1ULL << 48) | ||
762 | 831 | ||
763 | struct btrfs_block_group_item { | 832 | struct btrfs_block_group_item { |
764 | __le64 used; | 833 | __le64 used; |
@@ -916,6 +985,7 @@ struct btrfs_block_group_cache { | |||
916 | struct reloc_control; | 985 | struct reloc_control; |
917 | struct btrfs_device; | 986 | struct btrfs_device; |
918 | struct btrfs_fs_devices; | 987 | struct btrfs_fs_devices; |
988 | struct btrfs_balance_control; | ||
919 | struct btrfs_delayed_root; | 989 | struct btrfs_delayed_root; |
920 | struct btrfs_fs_info { | 990 | struct btrfs_fs_info { |
921 | u8 fsid[BTRFS_FSID_SIZE]; | 991 | u8 fsid[BTRFS_FSID_SIZE]; |
@@ -971,7 +1041,7 @@ struct btrfs_fs_info { | |||
971 | * is required instead of the faster short fsync log commits | 1041 | * is required instead of the faster short fsync log commits |
972 | */ | 1042 | */ |
973 | u64 last_trans_log_full_commit; | 1043 | u64 last_trans_log_full_commit; |
974 | unsigned long mount_opt:20; | 1044 | unsigned long mount_opt:21; |
975 | unsigned long compress_type:4; | 1045 | unsigned long compress_type:4; |
976 | u64 max_inline; | 1046 | u64 max_inline; |
977 | u64 alloc_start; | 1047 | u64 alloc_start; |
@@ -1132,12 +1202,23 @@ struct btrfs_fs_info { | |||
1132 | spinlock_t ref_cache_lock; | 1202 | spinlock_t ref_cache_lock; |
1133 | u64 total_ref_cache_size; | 1203 | u64 total_ref_cache_size; |
1134 | 1204 | ||
1205 | /* | ||
1206 | * these three are in extended format (availability of single | ||
1207 | * chunks is denoted by BTRFS_AVAIL_ALLOC_BIT_SINGLE bit, other | ||
1208 | * types are denoted by corresponding BTRFS_BLOCK_GROUP_* bits) | ||
1209 | */ | ||
1135 | u64 avail_data_alloc_bits; | 1210 | u64 avail_data_alloc_bits; |
1136 | u64 avail_metadata_alloc_bits; | 1211 | u64 avail_metadata_alloc_bits; |
1137 | u64 avail_system_alloc_bits; | 1212 | u64 avail_system_alloc_bits; |
1138 | u64 data_alloc_profile; | 1213 | |
1139 | u64 metadata_alloc_profile; | 1214 | /* restriper state */ |
1140 | u64 system_alloc_profile; | 1215 | spinlock_t balance_lock; |
1216 | struct mutex balance_mutex; | ||
1217 | atomic_t balance_running; | ||
1218 | atomic_t balance_pause_req; | ||
1219 | atomic_t balance_cancel_req; | ||
1220 | struct btrfs_balance_control *balance_ctl; | ||
1221 | wait_queue_head_t balance_wait_q; | ||
1141 | 1222 | ||
1142 | unsigned data_chunk_allocations; | 1223 | unsigned data_chunk_allocations; |
1143 | unsigned metadata_ratio; | 1224 | unsigned metadata_ratio; |
@@ -1155,6 +1236,10 @@ struct btrfs_fs_info { | |||
1155 | int scrub_workers_refcnt; | 1236 | int scrub_workers_refcnt; |
1156 | struct btrfs_workers scrub_workers; | 1237 | struct btrfs_workers scrub_workers; |
1157 | 1238 | ||
1239 | #ifdef CONFIG_BTRFS_FS_CHECK_INTEGRITY | ||
1240 | u32 check_integrity_print_mask; | ||
1241 | #endif | ||
1242 | |||
1158 | /* filesystem state */ | 1243 | /* filesystem state */ |
1159 | u64 fs_state; | 1244 | u64 fs_state; |
1160 | 1245 | ||
@@ -1383,6 +1468,8 @@ struct btrfs_ioctl_defrag_range_args { | |||
1383 | #define BTRFS_DEV_ITEM_KEY 216 | 1468 | #define BTRFS_DEV_ITEM_KEY 216 |
1384 | #define BTRFS_CHUNK_ITEM_KEY 228 | 1469 | #define BTRFS_CHUNK_ITEM_KEY 228 |
1385 | 1470 | ||
1471 | #define BTRFS_BALANCE_ITEM_KEY 248 | ||
1472 | |||
1386 | /* | 1473 | /* |
1387 | * string items are for debugging. They just store a short string of | 1474 | * string items are for debugging. They just store a short string of |
1388 | * data in the FS | 1475 | * data in the FS |
@@ -1413,6 +1500,9 @@ struct btrfs_ioctl_defrag_range_args { | |||
1413 | #define BTRFS_MOUNT_AUTO_DEFRAG (1 << 16) | 1500 | #define BTRFS_MOUNT_AUTO_DEFRAG (1 << 16) |
1414 | #define BTRFS_MOUNT_INODE_MAP_CACHE (1 << 17) | 1501 | #define BTRFS_MOUNT_INODE_MAP_CACHE (1 << 17) |
1415 | #define BTRFS_MOUNT_RECOVERY (1 << 18) | 1502 | #define BTRFS_MOUNT_RECOVERY (1 << 18) |
1503 | #define BTRFS_MOUNT_SKIP_BALANCE (1 << 19) | ||
1504 | #define BTRFS_MOUNT_CHECK_INTEGRITY (1 << 20) | ||
1505 | #define BTRFS_MOUNT_CHECK_INTEGRITY_INCLUDING_EXTENT_DATA (1 << 21) | ||
1416 | 1506 | ||
1417 | #define btrfs_clear_opt(o, opt) ((o) &= ~BTRFS_MOUNT_##opt) | 1507 | #define btrfs_clear_opt(o, opt) ((o) &= ~BTRFS_MOUNT_##opt) |
1418 | #define btrfs_set_opt(o, opt) ((o) |= BTRFS_MOUNT_##opt) | 1508 | #define btrfs_set_opt(o, opt) ((o) |= BTRFS_MOUNT_##opt) |
@@ -2077,8 +2167,86 @@ BTRFS_SETGET_STACK_FUNCS(backup_bytes_used, struct btrfs_root_backup, | |||
2077 | BTRFS_SETGET_STACK_FUNCS(backup_num_devices, struct btrfs_root_backup, | 2167 | BTRFS_SETGET_STACK_FUNCS(backup_num_devices, struct btrfs_root_backup, |
2078 | num_devices, 64); | 2168 | num_devices, 64); |
2079 | 2169 | ||
2080 | /* struct btrfs_super_block */ | 2170 | /* struct btrfs_balance_item */ |
2171 | BTRFS_SETGET_FUNCS(balance_flags, struct btrfs_balance_item, flags, 64); | ||
2081 | 2172 | ||
2173 | static inline void btrfs_balance_data(struct extent_buffer *eb, | ||
2174 | struct btrfs_balance_item *bi, | ||
2175 | struct btrfs_disk_balance_args *ba) | ||
2176 | { | ||
2177 | read_eb_member(eb, bi, struct btrfs_balance_item, data, ba); | ||
2178 | } | ||
2179 | |||
2180 | static inline void btrfs_set_balance_data(struct extent_buffer *eb, | ||
2181 | struct btrfs_balance_item *bi, | ||
2182 | struct btrfs_disk_balance_args *ba) | ||
2183 | { | ||
2184 | write_eb_member(eb, bi, struct btrfs_balance_item, data, ba); | ||
2185 | } | ||
2186 | |||
2187 | static inline void btrfs_balance_meta(struct extent_buffer *eb, | ||
2188 | struct btrfs_balance_item *bi, | ||
2189 | struct btrfs_disk_balance_args *ba) | ||
2190 | { | ||
2191 | read_eb_member(eb, bi, struct btrfs_balance_item, meta, ba); | ||
2192 | } | ||
2193 | |||
2194 | static inline void btrfs_set_balance_meta(struct extent_buffer *eb, | ||
2195 | struct btrfs_balance_item *bi, | ||
2196 | struct btrfs_disk_balance_args *ba) | ||
2197 | { | ||
2198 | write_eb_member(eb, bi, struct btrfs_balance_item, meta, ba); | ||
2199 | } | ||
2200 | |||
2201 | static inline void btrfs_balance_sys(struct extent_buffer *eb, | ||
2202 | struct btrfs_balance_item *bi, | ||
2203 | struct btrfs_disk_balance_args *ba) | ||
2204 | { | ||
2205 | read_eb_member(eb, bi, struct btrfs_balance_item, sys, ba); | ||
2206 | } | ||
2207 | |||
2208 | static inline void btrfs_set_balance_sys(struct extent_buffer *eb, | ||
2209 | struct btrfs_balance_item *bi, | ||
2210 | struct btrfs_disk_balance_args *ba) | ||
2211 | { | ||
2212 | write_eb_member(eb, bi, struct btrfs_balance_item, sys, ba); | ||
2213 | } | ||
2214 | |||
2215 | static inline void | ||
2216 | btrfs_disk_balance_args_to_cpu(struct btrfs_balance_args *cpu, | ||
2217 | struct btrfs_disk_balance_args *disk) | ||
2218 | { | ||
2219 | memset(cpu, 0, sizeof(*cpu)); | ||
2220 | |||
2221 | cpu->profiles = le64_to_cpu(disk->profiles); | ||
2222 | cpu->usage = le64_to_cpu(disk->usage); | ||
2223 | cpu->devid = le64_to_cpu(disk->devid); | ||
2224 | cpu->pstart = le64_to_cpu(disk->pstart); | ||
2225 | cpu->pend = le64_to_cpu(disk->pend); | ||
2226 | cpu->vstart = le64_to_cpu(disk->vstart); | ||
2227 | cpu->vend = le64_to_cpu(disk->vend); | ||
2228 | cpu->target = le64_to_cpu(disk->target); | ||
2229 | cpu->flags = le64_to_cpu(disk->flags); | ||
2230 | } | ||
2231 | |||
2232 | static inline void | ||
2233 | btrfs_cpu_balance_args_to_disk(struct btrfs_disk_balance_args *disk, | ||
2234 | struct btrfs_balance_args *cpu) | ||
2235 | { | ||
2236 | memset(disk, 0, sizeof(*disk)); | ||
2237 | |||
2238 | disk->profiles = cpu_to_le64(cpu->profiles); | ||
2239 | disk->usage = cpu_to_le64(cpu->usage); | ||
2240 | disk->devid = cpu_to_le64(cpu->devid); | ||
2241 | disk->pstart = cpu_to_le64(cpu->pstart); | ||
2242 | disk->pend = cpu_to_le64(cpu->pend); | ||
2243 | disk->vstart = cpu_to_le64(cpu->vstart); | ||
2244 | disk->vend = cpu_to_le64(cpu->vend); | ||
2245 | disk->target = cpu_to_le64(cpu->target); | ||
2246 | disk->flags = cpu_to_le64(cpu->flags); | ||
2247 | } | ||
2248 | |||
2249 | /* struct btrfs_super_block */ | ||
2082 | BTRFS_SETGET_STACK_FUNCS(super_bytenr, struct btrfs_super_block, bytenr, 64); | 2250 | BTRFS_SETGET_STACK_FUNCS(super_bytenr, struct btrfs_super_block, bytenr, 64); |
2083 | BTRFS_SETGET_STACK_FUNCS(super_flags, struct btrfs_super_block, flags, 64); | 2251 | BTRFS_SETGET_STACK_FUNCS(super_flags, struct btrfs_super_block, flags, 64); |
2084 | BTRFS_SETGET_STACK_FUNCS(super_generation, struct btrfs_super_block, | 2252 | BTRFS_SETGET_STACK_FUNCS(super_generation, struct btrfs_super_block, |
@@ -2196,7 +2364,7 @@ static inline u32 btrfs_file_extent_inline_item_len(struct extent_buffer *eb, | |||
2196 | return btrfs_item_size(eb, e) - offset; | 2364 | return btrfs_item_size(eb, e) - offset; |
2197 | } | 2365 | } |
2198 | 2366 | ||
2199 | static inline struct btrfs_root *btrfs_sb(struct super_block *sb) | 2367 | static inline struct btrfs_fs_info *btrfs_sb(struct super_block *sb) |
2200 | { | 2368 | { |
2201 | return sb->s_fs_info; | 2369 | return sb->s_fs_info; |
2202 | } | 2370 | } |
@@ -2277,11 +2445,11 @@ struct extent_buffer *btrfs_alloc_free_block(struct btrfs_trans_handle *trans, | |||
2277 | struct btrfs_root *root, u32 blocksize, | 2445 | struct btrfs_root *root, u32 blocksize, |
2278 | u64 parent, u64 root_objectid, | 2446 | u64 parent, u64 root_objectid, |
2279 | struct btrfs_disk_key *key, int level, | 2447 | struct btrfs_disk_key *key, int level, |
2280 | u64 hint, u64 empty_size); | 2448 | u64 hint, u64 empty_size, int for_cow); |
2281 | void btrfs_free_tree_block(struct btrfs_trans_handle *trans, | 2449 | void btrfs_free_tree_block(struct btrfs_trans_handle *trans, |
2282 | struct btrfs_root *root, | 2450 | struct btrfs_root *root, |
2283 | struct extent_buffer *buf, | 2451 | struct extent_buffer *buf, |
2284 | u64 parent, int last_ref); | 2452 | u64 parent, int last_ref, int for_cow); |
2285 | struct extent_buffer *btrfs_init_new_buffer(struct btrfs_trans_handle *trans, | 2453 | struct extent_buffer *btrfs_init_new_buffer(struct btrfs_trans_handle *trans, |
2286 | struct btrfs_root *root, | 2454 | struct btrfs_root *root, |
2287 | u64 bytenr, u32 blocksize, | 2455 | u64 bytenr, u32 blocksize, |
@@ -2301,17 +2469,17 @@ int btrfs_reserve_extent(struct btrfs_trans_handle *trans, | |||
2301 | u64 search_end, struct btrfs_key *ins, | 2469 | u64 search_end, struct btrfs_key *ins, |
2302 | u64 data); | 2470 | u64 data); |
2303 | int btrfs_inc_ref(struct btrfs_trans_handle *trans, struct btrfs_root *root, | 2471 | int btrfs_inc_ref(struct btrfs_trans_handle *trans, struct btrfs_root *root, |
2304 | struct extent_buffer *buf, int full_backref); | 2472 | struct extent_buffer *buf, int full_backref, int for_cow); |
2305 | int btrfs_dec_ref(struct btrfs_trans_handle *trans, struct btrfs_root *root, | 2473 | int btrfs_dec_ref(struct btrfs_trans_handle *trans, struct btrfs_root *root, |
2306 | struct extent_buffer *buf, int full_backref); | 2474 | struct extent_buffer *buf, int full_backref, int for_cow); |
2307 | int btrfs_set_disk_extent_flags(struct btrfs_trans_handle *trans, | 2475 | int btrfs_set_disk_extent_flags(struct btrfs_trans_handle *trans, |
2308 | struct btrfs_root *root, | 2476 | struct btrfs_root *root, |
2309 | u64 bytenr, u64 num_bytes, u64 flags, | 2477 | u64 bytenr, u64 num_bytes, u64 flags, |
2310 | int is_data); | 2478 | int is_data); |
2311 | int btrfs_free_extent(struct btrfs_trans_handle *trans, | 2479 | int btrfs_free_extent(struct btrfs_trans_handle *trans, |
2312 | struct btrfs_root *root, | 2480 | struct btrfs_root *root, |
2313 | u64 bytenr, u64 num_bytes, u64 parent, | 2481 | u64 bytenr, u64 num_bytes, u64 parent, u64 root_objectid, |
2314 | u64 root_objectid, u64 owner, u64 offset); | 2482 | u64 owner, u64 offset, int for_cow); |
2315 | 2483 | ||
2316 | int btrfs_free_reserved_extent(struct btrfs_root *root, u64 start, u64 len); | 2484 | int btrfs_free_reserved_extent(struct btrfs_root *root, u64 start, u64 len); |
2317 | int btrfs_free_and_pin_reserved_extent(struct btrfs_root *root, | 2485 | int btrfs_free_and_pin_reserved_extent(struct btrfs_root *root, |
@@ -2323,7 +2491,7 @@ int btrfs_finish_extent_commit(struct btrfs_trans_handle *trans, | |||
2323 | int btrfs_inc_extent_ref(struct btrfs_trans_handle *trans, | 2491 | int btrfs_inc_extent_ref(struct btrfs_trans_handle *trans, |
2324 | struct btrfs_root *root, | 2492 | struct btrfs_root *root, |
2325 | u64 bytenr, u64 num_bytes, u64 parent, | 2493 | u64 bytenr, u64 num_bytes, u64 parent, |
2326 | u64 root_objectid, u64 owner, u64 offset); | 2494 | u64 root_objectid, u64 owner, u64 offset, int for_cow); |
2327 | 2495 | ||
2328 | int btrfs_write_dirty_block_groups(struct btrfs_trans_handle *trans, | 2496 | int btrfs_write_dirty_block_groups(struct btrfs_trans_handle *trans, |
2329 | struct btrfs_root *root); | 2497 | struct btrfs_root *root); |
@@ -2482,10 +2650,18 @@ static inline int btrfs_insert_empty_item(struct btrfs_trans_handle *trans, | |||
2482 | } | 2650 | } |
2483 | 2651 | ||
2484 | int btrfs_next_leaf(struct btrfs_root *root, struct btrfs_path *path); | 2652 | int btrfs_next_leaf(struct btrfs_root *root, struct btrfs_path *path); |
2653 | static inline int btrfs_next_item(struct btrfs_root *root, struct btrfs_path *p) | ||
2654 | { | ||
2655 | ++p->slots[0]; | ||
2656 | if (p->slots[0] >= btrfs_header_nritems(p->nodes[0])) | ||
2657 | return btrfs_next_leaf(root, p); | ||
2658 | return 0; | ||
2659 | } | ||
2485 | int btrfs_prev_leaf(struct btrfs_root *root, struct btrfs_path *path); | 2660 | int btrfs_prev_leaf(struct btrfs_root *root, struct btrfs_path *path); |
2486 | int btrfs_leaf_free_space(struct btrfs_root *root, struct extent_buffer *leaf); | 2661 | int btrfs_leaf_free_space(struct btrfs_root *root, struct extent_buffer *leaf); |
2487 | void btrfs_drop_snapshot(struct btrfs_root *root, | 2662 | void btrfs_drop_snapshot(struct btrfs_root *root, |
2488 | struct btrfs_block_rsv *block_rsv, int update_ref); | 2663 | struct btrfs_block_rsv *block_rsv, int update_ref, |
2664 | int for_reloc); | ||
2489 | int btrfs_drop_subtree(struct btrfs_trans_handle *trans, | 2665 | int btrfs_drop_subtree(struct btrfs_trans_handle *trans, |
2490 | struct btrfs_root *root, | 2666 | struct btrfs_root *root, |
2491 | struct extent_buffer *node, | 2667 | struct extent_buffer *node, |
@@ -2500,6 +2676,7 @@ static inline int btrfs_fs_closing(struct btrfs_fs_info *fs_info) | |||
2500 | } | 2676 | } |
2501 | static inline void free_fs_info(struct btrfs_fs_info *fs_info) | 2677 | static inline void free_fs_info(struct btrfs_fs_info *fs_info) |
2502 | { | 2678 | { |
2679 | kfree(fs_info->balance_ctl); | ||
2503 | kfree(fs_info->delayed_root); | 2680 | kfree(fs_info->delayed_root); |
2504 | kfree(fs_info->extent_root); | 2681 | kfree(fs_info->extent_root); |
2505 | kfree(fs_info->tree_root); | 2682 | kfree(fs_info->tree_root); |
@@ -2510,6 +2687,24 @@ static inline void free_fs_info(struct btrfs_fs_info *fs_info) | |||
2510 | kfree(fs_info->super_for_commit); | 2687 | kfree(fs_info->super_for_commit); |
2511 | kfree(fs_info); | 2688 | kfree(fs_info); |
2512 | } | 2689 | } |
2690 | /** | ||
2691 | * profile_is_valid - tests whether a given profile is valid and reduced | ||
2692 | * @flags: profile to validate | ||
2693 | * @extended: if true @flags is treated as an extended profile | ||
2694 | */ | ||
2695 | static inline int profile_is_valid(u64 flags, int extended) | ||
2696 | { | ||
2697 | u64 mask = ~BTRFS_BLOCK_GROUP_PROFILE_MASK; | ||
2698 | |||
2699 | flags &= ~BTRFS_BLOCK_GROUP_TYPE_MASK; | ||
2700 | if (extended) | ||
2701 | mask &= ~BTRFS_AVAIL_ALLOC_BIT_SINGLE; | ||
2702 | |||
2703 | if (flags & mask) | ||
2704 | return 0; | ||
2705 | /* true if zero or exactly one bit set */ | ||
2706 | return (flags & (~flags + 1)) == flags; | ||
2707 | } | ||
2513 | 2708 | ||
2514 | /* root-item.c */ | 2709 | /* root-item.c */ |
2515 | int btrfs_find_root_ref(struct btrfs_root *tree_root, | 2710 | int btrfs_find_root_ref(struct btrfs_root *tree_root, |
diff --git a/fs/btrfs/delayed-inode.c b/fs/btrfs/delayed-inode.c index 9c1eccc2c503..fe4cd0f1cef1 100644 --- a/fs/btrfs/delayed-inode.c +++ b/fs/btrfs/delayed-inode.c | |||
@@ -595,8 +595,12 @@ static int btrfs_delayed_item_reserve_metadata(struct btrfs_trans_handle *trans, | |||
595 | 595 | ||
596 | num_bytes = btrfs_calc_trans_metadata_size(root, 1); | 596 | num_bytes = btrfs_calc_trans_metadata_size(root, 1); |
597 | ret = btrfs_block_rsv_migrate(src_rsv, dst_rsv, num_bytes); | 597 | ret = btrfs_block_rsv_migrate(src_rsv, dst_rsv, num_bytes); |
598 | if (!ret) | 598 | if (!ret) { |
599 | trace_btrfs_space_reservation(root->fs_info, "delayed_item", | ||
600 | item->key.objectid, | ||
601 | num_bytes, 1); | ||
599 | item->bytes_reserved = num_bytes; | 602 | item->bytes_reserved = num_bytes; |
603 | } | ||
600 | 604 | ||
601 | return ret; | 605 | return ret; |
602 | } | 606 | } |
@@ -610,6 +614,9 @@ static void btrfs_delayed_item_release_metadata(struct btrfs_root *root, | |||
610 | return; | 614 | return; |
611 | 615 | ||
612 | rsv = &root->fs_info->delayed_block_rsv; | 616 | rsv = &root->fs_info->delayed_block_rsv; |
617 | trace_btrfs_space_reservation(root->fs_info, "delayed_item", | ||
618 | item->key.objectid, item->bytes_reserved, | ||
619 | 0); | ||
613 | btrfs_block_rsv_release(root, rsv, | 620 | btrfs_block_rsv_release(root, rsv, |
614 | item->bytes_reserved); | 621 | item->bytes_reserved); |
615 | } | 622 | } |
@@ -624,7 +631,7 @@ static int btrfs_delayed_inode_reserve_metadata( | |||
624 | struct btrfs_block_rsv *dst_rsv; | 631 | struct btrfs_block_rsv *dst_rsv; |
625 | u64 num_bytes; | 632 | u64 num_bytes; |
626 | int ret; | 633 | int ret; |
627 | int release = false; | 634 | bool release = false; |
628 | 635 | ||
629 | src_rsv = trans->block_rsv; | 636 | src_rsv = trans->block_rsv; |
630 | dst_rsv = &root->fs_info->delayed_block_rsv; | 637 | dst_rsv = &root->fs_info->delayed_block_rsv; |
@@ -651,8 +658,13 @@ static int btrfs_delayed_inode_reserve_metadata( | |||
651 | */ | 658 | */ |
652 | if (ret == -EAGAIN) | 659 | if (ret == -EAGAIN) |
653 | ret = -ENOSPC; | 660 | ret = -ENOSPC; |
654 | if (!ret) | 661 | if (!ret) { |
655 | node->bytes_reserved = num_bytes; | 662 | node->bytes_reserved = num_bytes; |
663 | trace_btrfs_space_reservation(root->fs_info, | ||
664 | "delayed_inode", | ||
665 | btrfs_ino(inode), | ||
666 | num_bytes, 1); | ||
667 | } | ||
656 | return ret; | 668 | return ret; |
657 | } else if (src_rsv == &root->fs_info->delalloc_block_rsv) { | 669 | } else if (src_rsv == &root->fs_info->delalloc_block_rsv) { |
658 | spin_lock(&BTRFS_I(inode)->lock); | 670 | spin_lock(&BTRFS_I(inode)->lock); |
@@ -707,11 +719,17 @@ out: | |||
707 | * reservation here. I think it may be time for a documentation page on | 719 | * reservation here. I think it may be time for a documentation page on |
708 | * how block rsvs. work. | 720 | * how block rsvs. work. |
709 | */ | 721 | */ |
710 | if (!ret) | 722 | if (!ret) { |
723 | trace_btrfs_space_reservation(root->fs_info, "delayed_inode", | ||
724 | btrfs_ino(inode), num_bytes, 1); | ||
711 | node->bytes_reserved = num_bytes; | 725 | node->bytes_reserved = num_bytes; |
726 | } | ||
712 | 727 | ||
713 | if (release) | 728 | if (release) { |
729 | trace_btrfs_space_reservation(root->fs_info, "delalloc", | ||
730 | btrfs_ino(inode), num_bytes, 0); | ||
714 | btrfs_block_rsv_release(root, src_rsv, num_bytes); | 731 | btrfs_block_rsv_release(root, src_rsv, num_bytes); |
732 | } | ||
715 | 733 | ||
716 | return ret; | 734 | return ret; |
717 | } | 735 | } |
@@ -725,6 +743,8 @@ static void btrfs_delayed_inode_release_metadata(struct btrfs_root *root, | |||
725 | return; | 743 | return; |
726 | 744 | ||
727 | rsv = &root->fs_info->delayed_block_rsv; | 745 | rsv = &root->fs_info->delayed_block_rsv; |
746 | trace_btrfs_space_reservation(root->fs_info, "delayed_inode", | ||
747 | node->inode_id, node->bytes_reserved, 0); | ||
728 | btrfs_block_rsv_release(root, rsv, | 748 | btrfs_block_rsv_release(root, rsv, |
729 | node->bytes_reserved); | 749 | node->bytes_reserved); |
730 | node->bytes_reserved = 0; | 750 | node->bytes_reserved = 0; |
@@ -1372,13 +1392,6 @@ int btrfs_insert_delayed_dir_index(struct btrfs_trans_handle *trans, | |||
1372 | goto release_node; | 1392 | goto release_node; |
1373 | } | 1393 | } |
1374 | 1394 | ||
1375 | ret = btrfs_delayed_item_reserve_metadata(trans, root, delayed_item); | ||
1376 | /* | ||
1377 | * we have reserved enough space when we start a new transaction, | ||
1378 | * so reserving metadata failure is impossible | ||
1379 | */ | ||
1380 | BUG_ON(ret); | ||
1381 | |||
1382 | delayed_item->key.objectid = btrfs_ino(dir); | 1395 | delayed_item->key.objectid = btrfs_ino(dir); |
1383 | btrfs_set_key_type(&delayed_item->key, BTRFS_DIR_INDEX_KEY); | 1396 | btrfs_set_key_type(&delayed_item->key, BTRFS_DIR_INDEX_KEY); |
1384 | delayed_item->key.offset = index; | 1397 | delayed_item->key.offset = index; |
@@ -1391,6 +1404,14 @@ int btrfs_insert_delayed_dir_index(struct btrfs_trans_handle *trans, | |||
1391 | dir_item->type = type; | 1404 | dir_item->type = type; |
1392 | memcpy((char *)(dir_item + 1), name, name_len); | 1405 | memcpy((char *)(dir_item + 1), name, name_len); |
1393 | 1406 | ||
1407 | ret = btrfs_delayed_item_reserve_metadata(trans, root, delayed_item); | ||
1408 | /* | ||
1409 | * we have reserved enough space when we start a new transaction, | ||
1410 | * so reserving metadata failure is impossible | ||
1411 | */ | ||
1412 | BUG_ON(ret); | ||
1413 | |||
1414 | |||
1394 | mutex_lock(&delayed_node->mutex); | 1415 | mutex_lock(&delayed_node->mutex); |
1395 | ret = __btrfs_add_delayed_insertion_item(delayed_node, delayed_item); | 1416 | ret = __btrfs_add_delayed_insertion_item(delayed_node, delayed_item); |
1396 | if (unlikely(ret)) { | 1417 | if (unlikely(ret)) { |
diff --git a/fs/btrfs/delayed-ref.c b/fs/btrfs/delayed-ref.c index 125cf76fcd08..66e4f29505a3 100644 --- a/fs/btrfs/delayed-ref.c +++ b/fs/btrfs/delayed-ref.c | |||
@@ -101,6 +101,11 @@ static int comp_entry(struct btrfs_delayed_ref_node *ref2, | |||
101 | return -1; | 101 | return -1; |
102 | if (ref1->type > ref2->type) | 102 | if (ref1->type > ref2->type) |
103 | return 1; | 103 | return 1; |
104 | /* merging of sequenced refs is not allowed */ | ||
105 | if (ref1->seq < ref2->seq) | ||
106 | return -1; | ||
107 | if (ref1->seq > ref2->seq) | ||
108 | return 1; | ||
104 | if (ref1->type == BTRFS_TREE_BLOCK_REF_KEY || | 109 | if (ref1->type == BTRFS_TREE_BLOCK_REF_KEY || |
105 | ref1->type == BTRFS_SHARED_BLOCK_REF_KEY) { | 110 | ref1->type == BTRFS_SHARED_BLOCK_REF_KEY) { |
106 | return comp_tree_refs(btrfs_delayed_node_to_tree_ref(ref2), | 111 | return comp_tree_refs(btrfs_delayed_node_to_tree_ref(ref2), |
@@ -150,16 +155,22 @@ static struct btrfs_delayed_ref_node *tree_insert(struct rb_root *root, | |||
150 | 155 | ||
151 | /* | 156 | /* |
152 | * find an head entry based on bytenr. This returns the delayed ref | 157 | * find an head entry based on bytenr. This returns the delayed ref |
153 | * head if it was able to find one, or NULL if nothing was in that spot | 158 | * head if it was able to find one, or NULL if nothing was in that spot. |
159 | * If return_bigger is given, the next bigger entry is returned if no exact | ||
160 | * match is found. | ||
154 | */ | 161 | */ |
155 | static struct btrfs_delayed_ref_node *find_ref_head(struct rb_root *root, | 162 | static struct btrfs_delayed_ref_node *find_ref_head(struct rb_root *root, |
156 | u64 bytenr, | 163 | u64 bytenr, |
157 | struct btrfs_delayed_ref_node **last) | 164 | struct btrfs_delayed_ref_node **last, |
165 | int return_bigger) | ||
158 | { | 166 | { |
159 | struct rb_node *n = root->rb_node; | 167 | struct rb_node *n; |
160 | struct btrfs_delayed_ref_node *entry; | 168 | struct btrfs_delayed_ref_node *entry; |
161 | int cmp; | 169 | int cmp = 0; |
162 | 170 | ||
171 | again: | ||
172 | n = root->rb_node; | ||
173 | entry = NULL; | ||
163 | while (n) { | 174 | while (n) { |
164 | entry = rb_entry(n, struct btrfs_delayed_ref_node, rb_node); | 175 | entry = rb_entry(n, struct btrfs_delayed_ref_node, rb_node); |
165 | WARN_ON(!entry->in_tree); | 176 | WARN_ON(!entry->in_tree); |
@@ -182,6 +193,19 @@ static struct btrfs_delayed_ref_node *find_ref_head(struct rb_root *root, | |||
182 | else | 193 | else |
183 | return entry; | 194 | return entry; |
184 | } | 195 | } |
196 | if (entry && return_bigger) { | ||
197 | if (cmp > 0) { | ||
198 | n = rb_next(&entry->rb_node); | ||
199 | if (!n) | ||
200 | n = rb_first(root); | ||
201 | entry = rb_entry(n, struct btrfs_delayed_ref_node, | ||
202 | rb_node); | ||
203 | bytenr = entry->bytenr; | ||
204 | return_bigger = 0; | ||
205 | goto again; | ||
206 | } | ||
207 | return entry; | ||
208 | } | ||
185 | return NULL; | 209 | return NULL; |
186 | } | 210 | } |
187 | 211 | ||
@@ -209,6 +233,24 @@ int btrfs_delayed_ref_lock(struct btrfs_trans_handle *trans, | |||
209 | return 0; | 233 | return 0; |
210 | } | 234 | } |
211 | 235 | ||
236 | int btrfs_check_delayed_seq(struct btrfs_delayed_ref_root *delayed_refs, | ||
237 | u64 seq) | ||
238 | { | ||
239 | struct seq_list *elem; | ||
240 | |||
241 | assert_spin_locked(&delayed_refs->lock); | ||
242 | if (list_empty(&delayed_refs->seq_head)) | ||
243 | return 0; | ||
244 | |||
245 | elem = list_first_entry(&delayed_refs->seq_head, struct seq_list, list); | ||
246 | if (seq >= elem->seq) { | ||
247 | pr_debug("holding back delayed_ref %llu, lowest is %llu (%p)\n", | ||
248 | seq, elem->seq, delayed_refs); | ||
249 | return 1; | ||
250 | } | ||
251 | return 0; | ||
252 | } | ||
253 | |||
212 | int btrfs_find_ref_cluster(struct btrfs_trans_handle *trans, | 254 | int btrfs_find_ref_cluster(struct btrfs_trans_handle *trans, |
213 | struct list_head *cluster, u64 start) | 255 | struct list_head *cluster, u64 start) |
214 | { | 256 | { |
@@ -223,20 +265,8 @@ int btrfs_find_ref_cluster(struct btrfs_trans_handle *trans, | |||
223 | node = rb_first(&delayed_refs->root); | 265 | node = rb_first(&delayed_refs->root); |
224 | } else { | 266 | } else { |
225 | ref = NULL; | 267 | ref = NULL; |
226 | find_ref_head(&delayed_refs->root, start, &ref); | 268 | find_ref_head(&delayed_refs->root, start + 1, &ref, 1); |
227 | if (ref) { | 269 | if (ref) { |
228 | struct btrfs_delayed_ref_node *tmp; | ||
229 | |||
230 | node = rb_prev(&ref->rb_node); | ||
231 | while (node) { | ||
232 | tmp = rb_entry(node, | ||
233 | struct btrfs_delayed_ref_node, | ||
234 | rb_node); | ||
235 | if (tmp->bytenr < start) | ||
236 | break; | ||
237 | ref = tmp; | ||
238 | node = rb_prev(&ref->rb_node); | ||
239 | } | ||
240 | node = &ref->rb_node; | 270 | node = &ref->rb_node; |
241 | } else | 271 | } else |
242 | node = rb_first(&delayed_refs->root); | 272 | node = rb_first(&delayed_refs->root); |
@@ -390,7 +420,8 @@ update_existing_head_ref(struct btrfs_delayed_ref_node *existing, | |||
390 | * this does all the dirty work in terms of maintaining the correct | 420 | * this does all the dirty work in terms of maintaining the correct |
391 | * overall modification count. | 421 | * overall modification count. |
392 | */ | 422 | */ |
393 | static noinline int add_delayed_ref_head(struct btrfs_trans_handle *trans, | 423 | static noinline int add_delayed_ref_head(struct btrfs_fs_info *fs_info, |
424 | struct btrfs_trans_handle *trans, | ||
394 | struct btrfs_delayed_ref_node *ref, | 425 | struct btrfs_delayed_ref_node *ref, |
395 | u64 bytenr, u64 num_bytes, | 426 | u64 bytenr, u64 num_bytes, |
396 | int action, int is_data) | 427 | int action, int is_data) |
@@ -437,6 +468,7 @@ static noinline int add_delayed_ref_head(struct btrfs_trans_handle *trans, | |||
437 | ref->action = 0; | 468 | ref->action = 0; |
438 | ref->is_head = 1; | 469 | ref->is_head = 1; |
439 | ref->in_tree = 1; | 470 | ref->in_tree = 1; |
471 | ref->seq = 0; | ||
440 | 472 | ||
441 | head_ref = btrfs_delayed_node_to_head(ref); | 473 | head_ref = btrfs_delayed_node_to_head(ref); |
442 | head_ref->must_insert_reserved = must_insert_reserved; | 474 | head_ref->must_insert_reserved = must_insert_reserved; |
@@ -468,14 +500,17 @@ static noinline int add_delayed_ref_head(struct btrfs_trans_handle *trans, | |||
468 | /* | 500 | /* |
469 | * helper to insert a delayed tree ref into the rbtree. | 501 | * helper to insert a delayed tree ref into the rbtree. |
470 | */ | 502 | */ |
471 | static noinline int add_delayed_tree_ref(struct btrfs_trans_handle *trans, | 503 | static noinline int add_delayed_tree_ref(struct btrfs_fs_info *fs_info, |
504 | struct btrfs_trans_handle *trans, | ||
472 | struct btrfs_delayed_ref_node *ref, | 505 | struct btrfs_delayed_ref_node *ref, |
473 | u64 bytenr, u64 num_bytes, u64 parent, | 506 | u64 bytenr, u64 num_bytes, u64 parent, |
474 | u64 ref_root, int level, int action) | 507 | u64 ref_root, int level, int action, |
508 | int for_cow) | ||
475 | { | 509 | { |
476 | struct btrfs_delayed_ref_node *existing; | 510 | struct btrfs_delayed_ref_node *existing; |
477 | struct btrfs_delayed_tree_ref *full_ref; | 511 | struct btrfs_delayed_tree_ref *full_ref; |
478 | struct btrfs_delayed_ref_root *delayed_refs; | 512 | struct btrfs_delayed_ref_root *delayed_refs; |
513 | u64 seq = 0; | ||
479 | 514 | ||
480 | if (action == BTRFS_ADD_DELAYED_EXTENT) | 515 | if (action == BTRFS_ADD_DELAYED_EXTENT) |
481 | action = BTRFS_ADD_DELAYED_REF; | 516 | action = BTRFS_ADD_DELAYED_REF; |
@@ -491,14 +526,17 @@ static noinline int add_delayed_tree_ref(struct btrfs_trans_handle *trans, | |||
491 | ref->is_head = 0; | 526 | ref->is_head = 0; |
492 | ref->in_tree = 1; | 527 | ref->in_tree = 1; |
493 | 528 | ||
529 | if (need_ref_seq(for_cow, ref_root)) | ||
530 | seq = inc_delayed_seq(delayed_refs); | ||
531 | ref->seq = seq; | ||
532 | |||
494 | full_ref = btrfs_delayed_node_to_tree_ref(ref); | 533 | full_ref = btrfs_delayed_node_to_tree_ref(ref); |
495 | if (parent) { | 534 | full_ref->parent = parent; |
496 | full_ref->parent = parent; | 535 | full_ref->root = ref_root; |
536 | if (parent) | ||
497 | ref->type = BTRFS_SHARED_BLOCK_REF_KEY; | 537 | ref->type = BTRFS_SHARED_BLOCK_REF_KEY; |
498 | } else { | 538 | else |
499 | full_ref->root = ref_root; | ||
500 | ref->type = BTRFS_TREE_BLOCK_REF_KEY; | 539 | ref->type = BTRFS_TREE_BLOCK_REF_KEY; |
501 | } | ||
502 | full_ref->level = level; | 540 | full_ref->level = level; |
503 | 541 | ||
504 | trace_btrfs_delayed_tree_ref(ref, full_ref, action); | 542 | trace_btrfs_delayed_tree_ref(ref, full_ref, action); |
@@ -522,15 +560,17 @@ static noinline int add_delayed_tree_ref(struct btrfs_trans_handle *trans, | |||
522 | /* | 560 | /* |
523 | * helper to insert a delayed data ref into the rbtree. | 561 | * helper to insert a delayed data ref into the rbtree. |
524 | */ | 562 | */ |
525 | static noinline int add_delayed_data_ref(struct btrfs_trans_handle *trans, | 563 | static noinline int add_delayed_data_ref(struct btrfs_fs_info *fs_info, |
564 | struct btrfs_trans_handle *trans, | ||
526 | struct btrfs_delayed_ref_node *ref, | 565 | struct btrfs_delayed_ref_node *ref, |
527 | u64 bytenr, u64 num_bytes, u64 parent, | 566 | u64 bytenr, u64 num_bytes, u64 parent, |
528 | u64 ref_root, u64 owner, u64 offset, | 567 | u64 ref_root, u64 owner, u64 offset, |
529 | int action) | 568 | int action, int for_cow) |
530 | { | 569 | { |
531 | struct btrfs_delayed_ref_node *existing; | 570 | struct btrfs_delayed_ref_node *existing; |
532 | struct btrfs_delayed_data_ref *full_ref; | 571 | struct btrfs_delayed_data_ref *full_ref; |
533 | struct btrfs_delayed_ref_root *delayed_refs; | 572 | struct btrfs_delayed_ref_root *delayed_refs; |
573 | u64 seq = 0; | ||
534 | 574 | ||
535 | if (action == BTRFS_ADD_DELAYED_EXTENT) | 575 | if (action == BTRFS_ADD_DELAYED_EXTENT) |
536 | action = BTRFS_ADD_DELAYED_REF; | 576 | action = BTRFS_ADD_DELAYED_REF; |
@@ -546,14 +586,18 @@ static noinline int add_delayed_data_ref(struct btrfs_trans_handle *trans, | |||
546 | ref->is_head = 0; | 586 | ref->is_head = 0; |
547 | ref->in_tree = 1; | 587 | ref->in_tree = 1; |
548 | 588 | ||
589 | if (need_ref_seq(for_cow, ref_root)) | ||
590 | seq = inc_delayed_seq(delayed_refs); | ||
591 | ref->seq = seq; | ||
592 | |||
549 | full_ref = btrfs_delayed_node_to_data_ref(ref); | 593 | full_ref = btrfs_delayed_node_to_data_ref(ref); |
550 | if (parent) { | 594 | full_ref->parent = parent; |
551 | full_ref->parent = parent; | 595 | full_ref->root = ref_root; |
596 | if (parent) | ||
552 | ref->type = BTRFS_SHARED_DATA_REF_KEY; | 597 | ref->type = BTRFS_SHARED_DATA_REF_KEY; |
553 | } else { | 598 | else |
554 | full_ref->root = ref_root; | ||
555 | ref->type = BTRFS_EXTENT_DATA_REF_KEY; | 599 | ref->type = BTRFS_EXTENT_DATA_REF_KEY; |
556 | } | 600 | |
557 | full_ref->objectid = owner; | 601 | full_ref->objectid = owner; |
558 | full_ref->offset = offset; | 602 | full_ref->offset = offset; |
559 | 603 | ||
@@ -580,10 +624,12 @@ static noinline int add_delayed_data_ref(struct btrfs_trans_handle *trans, | |||
580 | * to make sure the delayed ref is eventually processed before this | 624 | * to make sure the delayed ref is eventually processed before this |
581 | * transaction commits. | 625 | * transaction commits. |
582 | */ | 626 | */ |
583 | int btrfs_add_delayed_tree_ref(struct btrfs_trans_handle *trans, | 627 | int btrfs_add_delayed_tree_ref(struct btrfs_fs_info *fs_info, |
628 | struct btrfs_trans_handle *trans, | ||
584 | u64 bytenr, u64 num_bytes, u64 parent, | 629 | u64 bytenr, u64 num_bytes, u64 parent, |
585 | u64 ref_root, int level, int action, | 630 | u64 ref_root, int level, int action, |
586 | struct btrfs_delayed_extent_op *extent_op) | 631 | struct btrfs_delayed_extent_op *extent_op, |
632 | int for_cow) | ||
587 | { | 633 | { |
588 | struct btrfs_delayed_tree_ref *ref; | 634 | struct btrfs_delayed_tree_ref *ref; |
589 | struct btrfs_delayed_ref_head *head_ref; | 635 | struct btrfs_delayed_ref_head *head_ref; |
@@ -610,13 +656,17 @@ int btrfs_add_delayed_tree_ref(struct btrfs_trans_handle *trans, | |||
610 | * insert both the head node and the new ref without dropping | 656 | * insert both the head node and the new ref without dropping |
611 | * the spin lock | 657 | * the spin lock |
612 | */ | 658 | */ |
613 | ret = add_delayed_ref_head(trans, &head_ref->node, bytenr, num_bytes, | 659 | ret = add_delayed_ref_head(fs_info, trans, &head_ref->node, bytenr, |
614 | action, 0); | 660 | num_bytes, action, 0); |
615 | BUG_ON(ret); | 661 | BUG_ON(ret); |
616 | 662 | ||
617 | ret = add_delayed_tree_ref(trans, &ref->node, bytenr, num_bytes, | 663 | ret = add_delayed_tree_ref(fs_info, trans, &ref->node, bytenr, |
618 | parent, ref_root, level, action); | 664 | num_bytes, parent, ref_root, level, action, |
665 | for_cow); | ||
619 | BUG_ON(ret); | 666 | BUG_ON(ret); |
667 | if (!need_ref_seq(for_cow, ref_root) && | ||
668 | waitqueue_active(&delayed_refs->seq_wait)) | ||
669 | wake_up(&delayed_refs->seq_wait); | ||
620 | spin_unlock(&delayed_refs->lock); | 670 | spin_unlock(&delayed_refs->lock); |
621 | return 0; | 671 | return 0; |
622 | } | 672 | } |
@@ -624,11 +674,13 @@ int btrfs_add_delayed_tree_ref(struct btrfs_trans_handle *trans, | |||
624 | /* | 674 | /* |
625 | * add a delayed data ref. it's similar to btrfs_add_delayed_tree_ref. | 675 | * add a delayed data ref. it's similar to btrfs_add_delayed_tree_ref. |
626 | */ | 676 | */ |
627 | int btrfs_add_delayed_data_ref(struct btrfs_trans_handle *trans, | 677 | int btrfs_add_delayed_data_ref(struct btrfs_fs_info *fs_info, |
678 | struct btrfs_trans_handle *trans, | ||
628 | u64 bytenr, u64 num_bytes, | 679 | u64 bytenr, u64 num_bytes, |
629 | u64 parent, u64 ref_root, | 680 | u64 parent, u64 ref_root, |
630 | u64 owner, u64 offset, int action, | 681 | u64 owner, u64 offset, int action, |
631 | struct btrfs_delayed_extent_op *extent_op) | 682 | struct btrfs_delayed_extent_op *extent_op, |
683 | int for_cow) | ||
632 | { | 684 | { |
633 | struct btrfs_delayed_data_ref *ref; | 685 | struct btrfs_delayed_data_ref *ref; |
634 | struct btrfs_delayed_ref_head *head_ref; | 686 | struct btrfs_delayed_ref_head *head_ref; |
@@ -655,18 +707,23 @@ int btrfs_add_delayed_data_ref(struct btrfs_trans_handle *trans, | |||
655 | * insert both the head node and the new ref without dropping | 707 | * insert both the head node and the new ref without dropping |
656 | * the spin lock | 708 | * the spin lock |
657 | */ | 709 | */ |
658 | ret = add_delayed_ref_head(trans, &head_ref->node, bytenr, num_bytes, | 710 | ret = add_delayed_ref_head(fs_info, trans, &head_ref->node, bytenr, |
659 | action, 1); | 711 | num_bytes, action, 1); |
660 | BUG_ON(ret); | 712 | BUG_ON(ret); |
661 | 713 | ||
662 | ret = add_delayed_data_ref(trans, &ref->node, bytenr, num_bytes, | 714 | ret = add_delayed_data_ref(fs_info, trans, &ref->node, bytenr, |
663 | parent, ref_root, owner, offset, action); | 715 | num_bytes, parent, ref_root, owner, offset, |
716 | action, for_cow); | ||
664 | BUG_ON(ret); | 717 | BUG_ON(ret); |
718 | if (!need_ref_seq(for_cow, ref_root) && | ||
719 | waitqueue_active(&delayed_refs->seq_wait)) | ||
720 | wake_up(&delayed_refs->seq_wait); | ||
665 | spin_unlock(&delayed_refs->lock); | 721 | spin_unlock(&delayed_refs->lock); |
666 | return 0; | 722 | return 0; |
667 | } | 723 | } |
668 | 724 | ||
669 | int btrfs_add_delayed_extent_op(struct btrfs_trans_handle *trans, | 725 | int btrfs_add_delayed_extent_op(struct btrfs_fs_info *fs_info, |
726 | struct btrfs_trans_handle *trans, | ||
670 | u64 bytenr, u64 num_bytes, | 727 | u64 bytenr, u64 num_bytes, |
671 | struct btrfs_delayed_extent_op *extent_op) | 728 | struct btrfs_delayed_extent_op *extent_op) |
672 | { | 729 | { |
@@ -683,11 +740,13 @@ int btrfs_add_delayed_extent_op(struct btrfs_trans_handle *trans, | |||
683 | delayed_refs = &trans->transaction->delayed_refs; | 740 | delayed_refs = &trans->transaction->delayed_refs; |
684 | spin_lock(&delayed_refs->lock); | 741 | spin_lock(&delayed_refs->lock); |
685 | 742 | ||
686 | ret = add_delayed_ref_head(trans, &head_ref->node, bytenr, | 743 | ret = add_delayed_ref_head(fs_info, trans, &head_ref->node, bytenr, |
687 | num_bytes, BTRFS_UPDATE_DELAYED_HEAD, | 744 | num_bytes, BTRFS_UPDATE_DELAYED_HEAD, |
688 | extent_op->is_data); | 745 | extent_op->is_data); |
689 | BUG_ON(ret); | 746 | BUG_ON(ret); |
690 | 747 | ||
748 | if (waitqueue_active(&delayed_refs->seq_wait)) | ||
749 | wake_up(&delayed_refs->seq_wait); | ||
691 | spin_unlock(&delayed_refs->lock); | 750 | spin_unlock(&delayed_refs->lock); |
692 | return 0; | 751 | return 0; |
693 | } | 752 | } |
@@ -704,7 +763,7 @@ btrfs_find_delayed_ref_head(struct btrfs_trans_handle *trans, u64 bytenr) | |||
704 | struct btrfs_delayed_ref_root *delayed_refs; | 763 | struct btrfs_delayed_ref_root *delayed_refs; |
705 | 764 | ||
706 | delayed_refs = &trans->transaction->delayed_refs; | 765 | delayed_refs = &trans->transaction->delayed_refs; |
707 | ref = find_ref_head(&delayed_refs->root, bytenr, NULL); | 766 | ref = find_ref_head(&delayed_refs->root, bytenr, NULL, 0); |
708 | if (ref) | 767 | if (ref) |
709 | return btrfs_delayed_node_to_head(ref); | 768 | return btrfs_delayed_node_to_head(ref); |
710 | return NULL; | 769 | return NULL; |
diff --git a/fs/btrfs/delayed-ref.h b/fs/btrfs/delayed-ref.h index e287e3b0eab0..d8f244d94925 100644 --- a/fs/btrfs/delayed-ref.h +++ b/fs/btrfs/delayed-ref.h | |||
@@ -33,6 +33,9 @@ struct btrfs_delayed_ref_node { | |||
33 | /* the size of the extent */ | 33 | /* the size of the extent */ |
34 | u64 num_bytes; | 34 | u64 num_bytes; |
35 | 35 | ||
36 | /* seq number to keep track of insertion order */ | ||
37 | u64 seq; | ||
38 | |||
36 | /* ref count on this data structure */ | 39 | /* ref count on this data structure */ |
37 | atomic_t refs; | 40 | atomic_t refs; |
38 | 41 | ||
@@ -98,19 +101,15 @@ struct btrfs_delayed_ref_head { | |||
98 | 101 | ||
99 | struct btrfs_delayed_tree_ref { | 102 | struct btrfs_delayed_tree_ref { |
100 | struct btrfs_delayed_ref_node node; | 103 | struct btrfs_delayed_ref_node node; |
101 | union { | 104 | u64 root; |
102 | u64 root; | 105 | u64 parent; |
103 | u64 parent; | ||
104 | }; | ||
105 | int level; | 106 | int level; |
106 | }; | 107 | }; |
107 | 108 | ||
108 | struct btrfs_delayed_data_ref { | 109 | struct btrfs_delayed_data_ref { |
109 | struct btrfs_delayed_ref_node node; | 110 | struct btrfs_delayed_ref_node node; |
110 | union { | 111 | u64 root; |
111 | u64 root; | 112 | u64 parent; |
112 | u64 parent; | ||
113 | }; | ||
114 | u64 objectid; | 113 | u64 objectid; |
115 | u64 offset; | 114 | u64 offset; |
116 | }; | 115 | }; |
@@ -140,6 +139,26 @@ struct btrfs_delayed_ref_root { | |||
140 | int flushing; | 139 | int flushing; |
141 | 140 | ||
142 | u64 run_delayed_start; | 141 | u64 run_delayed_start; |
142 | |||
143 | /* | ||
144 | * seq number of delayed refs. We need to know if a backref was being | ||
145 | * added before the currently processed ref or afterwards. | ||
146 | */ | ||
147 | u64 seq; | ||
148 | |||
149 | /* | ||
150 | * seq_list holds a list of all seq numbers that are currently being | ||
151 | * added to the list. While walking backrefs (btrfs_find_all_roots, | ||
152 | * qgroups), which might take some time, no newer ref must be processed, | ||
153 | * as it might influence the outcome of the walk. | ||
154 | */ | ||
155 | struct list_head seq_head; | ||
156 | |||
157 | /* | ||
158 | * when the only refs we have in the list must not be processed, we want | ||
159 | * to wait for more refs to show up or for the end of backref walking. | ||
160 | */ | ||
161 | wait_queue_head_t seq_wait; | ||
143 | }; | 162 | }; |
144 | 163 | ||
145 | static inline void btrfs_put_delayed_ref(struct btrfs_delayed_ref_node *ref) | 164 | static inline void btrfs_put_delayed_ref(struct btrfs_delayed_ref_node *ref) |
@@ -151,16 +170,21 @@ static inline void btrfs_put_delayed_ref(struct btrfs_delayed_ref_node *ref) | |||
151 | } | 170 | } |
152 | } | 171 | } |
153 | 172 | ||
154 | int btrfs_add_delayed_tree_ref(struct btrfs_trans_handle *trans, | 173 | int btrfs_add_delayed_tree_ref(struct btrfs_fs_info *fs_info, |
174 | struct btrfs_trans_handle *trans, | ||
155 | u64 bytenr, u64 num_bytes, u64 parent, | 175 | u64 bytenr, u64 num_bytes, u64 parent, |
156 | u64 ref_root, int level, int action, | 176 | u64 ref_root, int level, int action, |
157 | struct btrfs_delayed_extent_op *extent_op); | 177 | struct btrfs_delayed_extent_op *extent_op, |
158 | int btrfs_add_delayed_data_ref(struct btrfs_trans_handle *trans, | 178 | int for_cow); |
179 | int btrfs_add_delayed_data_ref(struct btrfs_fs_info *fs_info, | ||
180 | struct btrfs_trans_handle *trans, | ||
159 | u64 bytenr, u64 num_bytes, | 181 | u64 bytenr, u64 num_bytes, |
160 | u64 parent, u64 ref_root, | 182 | u64 parent, u64 ref_root, |
161 | u64 owner, u64 offset, int action, | 183 | u64 owner, u64 offset, int action, |
162 | struct btrfs_delayed_extent_op *extent_op); | 184 | struct btrfs_delayed_extent_op *extent_op, |
163 | int btrfs_add_delayed_extent_op(struct btrfs_trans_handle *trans, | 185 | int for_cow); |
186 | int btrfs_add_delayed_extent_op(struct btrfs_fs_info *fs_info, | ||
187 | struct btrfs_trans_handle *trans, | ||
164 | u64 bytenr, u64 num_bytes, | 188 | u64 bytenr, u64 num_bytes, |
165 | struct btrfs_delayed_extent_op *extent_op); | 189 | struct btrfs_delayed_extent_op *extent_op); |
166 | 190 | ||
@@ -170,6 +194,60 @@ int btrfs_delayed_ref_lock(struct btrfs_trans_handle *trans, | |||
170 | struct btrfs_delayed_ref_head *head); | 194 | struct btrfs_delayed_ref_head *head); |
171 | int btrfs_find_ref_cluster(struct btrfs_trans_handle *trans, | 195 | int btrfs_find_ref_cluster(struct btrfs_trans_handle *trans, |
172 | struct list_head *cluster, u64 search_start); | 196 | struct list_head *cluster, u64 search_start); |
197 | |||
198 | struct seq_list { | ||
199 | struct list_head list; | ||
200 | u64 seq; | ||
201 | }; | ||
202 | |||
203 | static inline u64 inc_delayed_seq(struct btrfs_delayed_ref_root *delayed_refs) | ||
204 | { | ||
205 | assert_spin_locked(&delayed_refs->lock); | ||
206 | ++delayed_refs->seq; | ||
207 | return delayed_refs->seq; | ||
208 | } | ||
209 | |||
210 | static inline void | ||
211 | btrfs_get_delayed_seq(struct btrfs_delayed_ref_root *delayed_refs, | ||
212 | struct seq_list *elem) | ||
213 | { | ||
214 | assert_spin_locked(&delayed_refs->lock); | ||
215 | elem->seq = delayed_refs->seq; | ||
216 | list_add_tail(&elem->list, &delayed_refs->seq_head); | ||
217 | } | ||
218 | |||
219 | static inline void | ||
220 | btrfs_put_delayed_seq(struct btrfs_delayed_ref_root *delayed_refs, | ||
221 | struct seq_list *elem) | ||
222 | { | ||
223 | spin_lock(&delayed_refs->lock); | ||
224 | list_del(&elem->list); | ||
225 | wake_up(&delayed_refs->seq_wait); | ||
226 | spin_unlock(&delayed_refs->lock); | ||
227 | } | ||
228 | |||
229 | int btrfs_check_delayed_seq(struct btrfs_delayed_ref_root *delayed_refs, | ||
230 | u64 seq); | ||
231 | |||
232 | /* | ||
233 | * delayed refs with a ref_seq > 0 must be held back during backref walking. | ||
234 | * this only applies to items in one of the fs-trees. for_cow items never need | ||
235 | * to be held back, so they won't get a ref_seq number. | ||
236 | */ | ||
237 | static inline int need_ref_seq(int for_cow, u64 rootid) | ||
238 | { | ||
239 | if (for_cow) | ||
240 | return 0; | ||
241 | |||
242 | if (rootid == BTRFS_FS_TREE_OBJECTID) | ||
243 | return 1; | ||
244 | |||
245 | if ((s64)rootid >= (s64)BTRFS_FIRST_FREE_OBJECTID) | ||
246 | return 1; | ||
247 | |||
248 | return 0; | ||
249 | } | ||
250 | |||
173 | /* | 251 | /* |
174 | * a node might live in a head or a regular ref, this lets you | 252 | * a node might live in a head or a regular ref, this lets you |
175 | * test for the proper type to use. | 253 | * test for the proper type to use. |
diff --git a/fs/btrfs/disk-io.c b/fs/btrfs/disk-io.c index f99a099a7747..811d9f918b1c 100644 --- a/fs/btrfs/disk-io.c +++ b/fs/btrfs/disk-io.c | |||
@@ -43,6 +43,7 @@ | |||
43 | #include "tree-log.h" | 43 | #include "tree-log.h" |
44 | #include "free-space-cache.h" | 44 | #include "free-space-cache.h" |
45 | #include "inode-map.h" | 45 | #include "inode-map.h" |
46 | #include "check-integrity.h" | ||
46 | 47 | ||
47 | static struct extent_io_ops btree_extent_io_ops; | 48 | static struct extent_io_ops btree_extent_io_ops; |
48 | static void end_workqueue_fn(struct btrfs_work *work); | 49 | static void end_workqueue_fn(struct btrfs_work *work); |
@@ -872,7 +873,8 @@ static int btree_submit_bio_hook(struct inode *inode, int rw, struct bio *bio, | |||
872 | 873 | ||
873 | #ifdef CONFIG_MIGRATION | 874 | #ifdef CONFIG_MIGRATION |
874 | static int btree_migratepage(struct address_space *mapping, | 875 | static int btree_migratepage(struct address_space *mapping, |
875 | struct page *newpage, struct page *page) | 876 | struct page *newpage, struct page *page, |
877 | enum migrate_mode mode) | ||
876 | { | 878 | { |
877 | /* | 879 | /* |
878 | * we can't safely write a btree page from here, | 880 | * we can't safely write a btree page from here, |
@@ -887,7 +889,7 @@ static int btree_migratepage(struct address_space *mapping, | |||
887 | if (page_has_private(page) && | 889 | if (page_has_private(page) && |
888 | !try_to_release_page(page, GFP_KERNEL)) | 890 | !try_to_release_page(page, GFP_KERNEL)) |
889 | return -EAGAIN; | 891 | return -EAGAIN; |
890 | return migrate_page(mapping, newpage, page); | 892 | return migrate_page(mapping, newpage, page, mode); |
891 | } | 893 | } |
892 | #endif | 894 | #endif |
893 | 895 | ||
@@ -960,6 +962,13 @@ static int btree_releasepage(struct page *page, gfp_t gfp_flags) | |||
960 | tree = &BTRFS_I(page->mapping->host)->io_tree; | 962 | tree = &BTRFS_I(page->mapping->host)->io_tree; |
961 | map = &BTRFS_I(page->mapping->host)->extent_tree; | 963 | map = &BTRFS_I(page->mapping->host)->extent_tree; |
962 | 964 | ||
965 | /* | ||
966 | * We need to mask out eg. __GFP_HIGHMEM and __GFP_DMA32 as we're doing | ||
967 | * slab allocation from alloc_extent_state down the callchain where | ||
968 | * it'd hit a BUG_ON as those flags are not allowed. | ||
969 | */ | ||
970 | gfp_flags &= ~GFP_SLAB_BUG_MASK; | ||
971 | |||
963 | ret = try_release_extent_state(map, tree, page, gfp_flags); | 972 | ret = try_release_extent_state(map, tree, page, gfp_flags); |
964 | if (!ret) | 973 | if (!ret) |
965 | return 0; | 974 | return 0; |
@@ -1142,7 +1151,6 @@ static int __setup_root(u32 nodesize, u32 leafsize, u32 sectorsize, | |||
1142 | root->orphan_item_inserted = 0; | 1151 | root->orphan_item_inserted = 0; |
1143 | root->orphan_cleanup_state = 0; | 1152 | root->orphan_cleanup_state = 0; |
1144 | 1153 | ||
1145 | root->fs_info = fs_info; | ||
1146 | root->objectid = objectid; | 1154 | root->objectid = objectid; |
1147 | root->last_trans = 0; | 1155 | root->last_trans = 0; |
1148 | root->highest_objectid = 0; | 1156 | root->highest_objectid = 0; |
@@ -1216,6 +1224,14 @@ static int find_and_setup_root(struct btrfs_root *tree_root, | |||
1216 | return 0; | 1224 | return 0; |
1217 | } | 1225 | } |
1218 | 1226 | ||
1227 | static struct btrfs_root *btrfs_alloc_root(struct btrfs_fs_info *fs_info) | ||
1228 | { | ||
1229 | struct btrfs_root *root = kzalloc(sizeof(*root), GFP_NOFS); | ||
1230 | if (root) | ||
1231 | root->fs_info = fs_info; | ||
1232 | return root; | ||
1233 | } | ||
1234 | |||
1219 | static struct btrfs_root *alloc_log_tree(struct btrfs_trans_handle *trans, | 1235 | static struct btrfs_root *alloc_log_tree(struct btrfs_trans_handle *trans, |
1220 | struct btrfs_fs_info *fs_info) | 1236 | struct btrfs_fs_info *fs_info) |
1221 | { | 1237 | { |
@@ -1223,7 +1239,7 @@ static struct btrfs_root *alloc_log_tree(struct btrfs_trans_handle *trans, | |||
1223 | struct btrfs_root *tree_root = fs_info->tree_root; | 1239 | struct btrfs_root *tree_root = fs_info->tree_root; |
1224 | struct extent_buffer *leaf; | 1240 | struct extent_buffer *leaf; |
1225 | 1241 | ||
1226 | root = kzalloc(sizeof(*root), GFP_NOFS); | 1242 | root = btrfs_alloc_root(fs_info); |
1227 | if (!root) | 1243 | if (!root) |
1228 | return ERR_PTR(-ENOMEM); | 1244 | return ERR_PTR(-ENOMEM); |
1229 | 1245 | ||
@@ -1243,7 +1259,8 @@ static struct btrfs_root *alloc_log_tree(struct btrfs_trans_handle *trans, | |||
1243 | root->ref_cows = 0; | 1259 | root->ref_cows = 0; |
1244 | 1260 | ||
1245 | leaf = btrfs_alloc_free_block(trans, root, root->leafsize, 0, | 1261 | leaf = btrfs_alloc_free_block(trans, root, root->leafsize, 0, |
1246 | BTRFS_TREE_LOG_OBJECTID, NULL, 0, 0, 0); | 1262 | BTRFS_TREE_LOG_OBJECTID, NULL, |
1263 | 0, 0, 0, 0); | ||
1247 | if (IS_ERR(leaf)) { | 1264 | if (IS_ERR(leaf)) { |
1248 | kfree(root); | 1265 | kfree(root); |
1249 | return ERR_CAST(leaf); | 1266 | return ERR_CAST(leaf); |
@@ -1317,7 +1334,7 @@ struct btrfs_root *btrfs_read_fs_root_no_radix(struct btrfs_root *tree_root, | |||
1317 | u32 blocksize; | 1334 | u32 blocksize; |
1318 | int ret = 0; | 1335 | int ret = 0; |
1319 | 1336 | ||
1320 | root = kzalloc(sizeof(*root), GFP_NOFS); | 1337 | root = btrfs_alloc_root(fs_info); |
1321 | if (!root) | 1338 | if (!root) |
1322 | return ERR_PTR(-ENOMEM); | 1339 | return ERR_PTR(-ENOMEM); |
1323 | if (location->offset == (u64)-1) { | 1340 | if (location->offset == (u64)-1) { |
@@ -1873,9 +1890,9 @@ static void free_root_pointers(struct btrfs_fs_info *info, int chunk_root) | |||
1873 | } | 1890 | } |
1874 | 1891 | ||
1875 | 1892 | ||
1876 | struct btrfs_root *open_ctree(struct super_block *sb, | 1893 | int open_ctree(struct super_block *sb, |
1877 | struct btrfs_fs_devices *fs_devices, | 1894 | struct btrfs_fs_devices *fs_devices, |
1878 | char *options) | 1895 | char *options) |
1879 | { | 1896 | { |
1880 | u32 sectorsize; | 1897 | u32 sectorsize; |
1881 | u32 nodesize; | 1898 | u32 nodesize; |
@@ -1887,8 +1904,8 @@ struct btrfs_root *open_ctree(struct super_block *sb, | |||
1887 | struct btrfs_key location; | 1904 | struct btrfs_key location; |
1888 | struct buffer_head *bh; | 1905 | struct buffer_head *bh; |
1889 | struct btrfs_super_block *disk_super; | 1906 | struct btrfs_super_block *disk_super; |
1890 | struct btrfs_root *tree_root = btrfs_sb(sb); | 1907 | struct btrfs_fs_info *fs_info = btrfs_sb(sb); |
1891 | struct btrfs_fs_info *fs_info = tree_root->fs_info; | 1908 | struct btrfs_root *tree_root; |
1892 | struct btrfs_root *extent_root; | 1909 | struct btrfs_root *extent_root; |
1893 | struct btrfs_root *csum_root; | 1910 | struct btrfs_root *csum_root; |
1894 | struct btrfs_root *chunk_root; | 1911 | struct btrfs_root *chunk_root; |
@@ -1899,16 +1916,14 @@ struct btrfs_root *open_ctree(struct super_block *sb, | |||
1899 | int num_backups_tried = 0; | 1916 | int num_backups_tried = 0; |
1900 | int backup_index = 0; | 1917 | int backup_index = 0; |
1901 | 1918 | ||
1902 | extent_root = fs_info->extent_root = | 1919 | tree_root = fs_info->tree_root = btrfs_alloc_root(fs_info); |
1903 | kzalloc(sizeof(struct btrfs_root), GFP_NOFS); | 1920 | extent_root = fs_info->extent_root = btrfs_alloc_root(fs_info); |
1904 | csum_root = fs_info->csum_root = | 1921 | csum_root = fs_info->csum_root = btrfs_alloc_root(fs_info); |
1905 | kzalloc(sizeof(struct btrfs_root), GFP_NOFS); | 1922 | chunk_root = fs_info->chunk_root = btrfs_alloc_root(fs_info); |
1906 | chunk_root = fs_info->chunk_root = | 1923 | dev_root = fs_info->dev_root = btrfs_alloc_root(fs_info); |
1907 | kzalloc(sizeof(struct btrfs_root), GFP_NOFS); | ||
1908 | dev_root = fs_info->dev_root = | ||
1909 | kzalloc(sizeof(struct btrfs_root), GFP_NOFS); | ||
1910 | 1924 | ||
1911 | if (!extent_root || !csum_root || !chunk_root || !dev_root) { | 1925 | if (!tree_root || !extent_root || !csum_root || |
1926 | !chunk_root || !dev_root) { | ||
1912 | err = -ENOMEM; | 1927 | err = -ENOMEM; |
1913 | goto fail; | 1928 | goto fail; |
1914 | } | 1929 | } |
@@ -1997,6 +2012,17 @@ struct btrfs_root *open_ctree(struct super_block *sb, | |||
1997 | init_waitqueue_head(&fs_info->scrub_pause_wait); | 2012 | init_waitqueue_head(&fs_info->scrub_pause_wait); |
1998 | init_rwsem(&fs_info->scrub_super_lock); | 2013 | init_rwsem(&fs_info->scrub_super_lock); |
1999 | fs_info->scrub_workers_refcnt = 0; | 2014 | fs_info->scrub_workers_refcnt = 0; |
2015 | #ifdef CONFIG_BTRFS_FS_CHECK_INTEGRITY | ||
2016 | fs_info->check_integrity_print_mask = 0; | ||
2017 | #endif | ||
2018 | |||
2019 | spin_lock_init(&fs_info->balance_lock); | ||
2020 | mutex_init(&fs_info->balance_mutex); | ||
2021 | atomic_set(&fs_info->balance_running, 0); | ||
2022 | atomic_set(&fs_info->balance_pause_req, 0); | ||
2023 | atomic_set(&fs_info->balance_cancel_req, 0); | ||
2024 | fs_info->balance_ctl = NULL; | ||
2025 | init_waitqueue_head(&fs_info->balance_wait_q); | ||
2000 | 2026 | ||
2001 | sb->s_blocksize = 4096; | 2027 | sb->s_blocksize = 4096; |
2002 | sb->s_blocksize_bits = blksize_bits(4096); | 2028 | sb->s_blocksize_bits = blksize_bits(4096); |
@@ -2266,9 +2292,7 @@ struct btrfs_root *open_ctree(struct super_block *sb, | |||
2266 | (unsigned long)btrfs_header_chunk_tree_uuid(chunk_root->node), | 2292 | (unsigned long)btrfs_header_chunk_tree_uuid(chunk_root->node), |
2267 | BTRFS_UUID_SIZE); | 2293 | BTRFS_UUID_SIZE); |
2268 | 2294 | ||
2269 | mutex_lock(&fs_info->chunk_mutex); | ||
2270 | ret = btrfs_read_chunk_tree(chunk_root); | 2295 | ret = btrfs_read_chunk_tree(chunk_root); |
2271 | mutex_unlock(&fs_info->chunk_mutex); | ||
2272 | if (ret) { | 2296 | if (ret) { |
2273 | printk(KERN_WARNING "btrfs: failed to read chunk tree on %s\n", | 2297 | printk(KERN_WARNING "btrfs: failed to read chunk tree on %s\n", |
2274 | sb->s_id); | 2298 | sb->s_id); |
@@ -2317,9 +2341,6 @@ retry_root_backup: | |||
2317 | 2341 | ||
2318 | fs_info->generation = generation; | 2342 | fs_info->generation = generation; |
2319 | fs_info->last_trans_committed = generation; | 2343 | fs_info->last_trans_committed = generation; |
2320 | fs_info->data_alloc_profile = (u64)-1; | ||
2321 | fs_info->metadata_alloc_profile = (u64)-1; | ||
2322 | fs_info->system_alloc_profile = fs_info->metadata_alloc_profile; | ||
2323 | 2344 | ||
2324 | ret = btrfs_init_space_info(fs_info); | 2345 | ret = btrfs_init_space_info(fs_info); |
2325 | if (ret) { | 2346 | if (ret) { |
@@ -2352,6 +2373,19 @@ retry_root_backup: | |||
2352 | btrfs_set_opt(fs_info->mount_opt, SSD); | 2373 | btrfs_set_opt(fs_info->mount_opt, SSD); |
2353 | } | 2374 | } |
2354 | 2375 | ||
2376 | #ifdef CONFIG_BTRFS_FS_CHECK_INTEGRITY | ||
2377 | if (btrfs_test_opt(tree_root, CHECK_INTEGRITY)) { | ||
2378 | ret = btrfsic_mount(tree_root, fs_devices, | ||
2379 | btrfs_test_opt(tree_root, | ||
2380 | CHECK_INTEGRITY_INCLUDING_EXTENT_DATA) ? | ||
2381 | 1 : 0, | ||
2382 | fs_info->check_integrity_print_mask); | ||
2383 | if (ret) | ||
2384 | printk(KERN_WARNING "btrfs: failed to initialize" | ||
2385 | " integrity check module %s\n", sb->s_id); | ||
2386 | } | ||
2387 | #endif | ||
2388 | |||
2355 | /* do not make disk changes in broken FS */ | 2389 | /* do not make disk changes in broken FS */ |
2356 | if (btrfs_super_log_root(disk_super) != 0 && | 2390 | if (btrfs_super_log_root(disk_super) != 0 && |
2357 | !(fs_info->fs_state & BTRFS_SUPER_FLAG_ERROR)) { | 2391 | !(fs_info->fs_state & BTRFS_SUPER_FLAG_ERROR)) { |
@@ -2367,7 +2401,7 @@ retry_root_backup: | |||
2367 | btrfs_level_size(tree_root, | 2401 | btrfs_level_size(tree_root, |
2368 | btrfs_super_log_root_level(disk_super)); | 2402 | btrfs_super_log_root_level(disk_super)); |
2369 | 2403 | ||
2370 | log_tree_root = kzalloc(sizeof(struct btrfs_root), GFP_NOFS); | 2404 | log_tree_root = btrfs_alloc_root(fs_info); |
2371 | if (!log_tree_root) { | 2405 | if (!log_tree_root) { |
2372 | err = -ENOMEM; | 2406 | err = -ENOMEM; |
2373 | goto fail_trans_kthread; | 2407 | goto fail_trans_kthread; |
@@ -2422,13 +2456,17 @@ retry_root_backup: | |||
2422 | if (!err) | 2456 | if (!err) |
2423 | err = btrfs_orphan_cleanup(fs_info->tree_root); | 2457 | err = btrfs_orphan_cleanup(fs_info->tree_root); |
2424 | up_read(&fs_info->cleanup_work_sem); | 2458 | up_read(&fs_info->cleanup_work_sem); |
2459 | |||
2460 | if (!err) | ||
2461 | err = btrfs_recover_balance(fs_info->tree_root); | ||
2462 | |||
2425 | if (err) { | 2463 | if (err) { |
2426 | close_ctree(tree_root); | 2464 | close_ctree(tree_root); |
2427 | return ERR_PTR(err); | 2465 | return err; |
2428 | } | 2466 | } |
2429 | } | 2467 | } |
2430 | 2468 | ||
2431 | return tree_root; | 2469 | return 0; |
2432 | 2470 | ||
2433 | fail_trans_kthread: | 2471 | fail_trans_kthread: |
2434 | kthread_stop(fs_info->transaction_kthread); | 2472 | kthread_stop(fs_info->transaction_kthread); |
@@ -2474,8 +2512,7 @@ fail_srcu: | |||
2474 | cleanup_srcu_struct(&fs_info->subvol_srcu); | 2512 | cleanup_srcu_struct(&fs_info->subvol_srcu); |
2475 | fail: | 2513 | fail: |
2476 | btrfs_close_devices(fs_info->fs_devices); | 2514 | btrfs_close_devices(fs_info->fs_devices); |
2477 | free_fs_info(fs_info); | 2515 | return err; |
2478 | return ERR_PTR(err); | ||
2479 | 2516 | ||
2480 | recovery_tree_root: | 2517 | recovery_tree_root: |
2481 | if (!btrfs_test_opt(tree_root, RECOVERY)) | 2518 | if (!btrfs_test_opt(tree_root, RECOVERY)) |
@@ -2630,7 +2667,7 @@ static int write_dev_supers(struct btrfs_device *device, | |||
2630 | * we fua the first super. The others we allow | 2667 | * we fua the first super. The others we allow |
2631 | * to go down lazy. | 2668 | * to go down lazy. |
2632 | */ | 2669 | */ |
2633 | ret = submit_bh(WRITE_FUA, bh); | 2670 | ret = btrfsic_submit_bh(WRITE_FUA, bh); |
2634 | if (ret) | 2671 | if (ret) |
2635 | errors++; | 2672 | errors++; |
2636 | } | 2673 | } |
@@ -2707,7 +2744,7 @@ static int write_dev_flush(struct btrfs_device *device, int wait) | |||
2707 | device->flush_bio = bio; | 2744 | device->flush_bio = bio; |
2708 | 2745 | ||
2709 | bio_get(bio); | 2746 | bio_get(bio); |
2710 | submit_bio(WRITE_FLUSH, bio); | 2747 | btrfsic_submit_bio(WRITE_FLUSH, bio); |
2711 | 2748 | ||
2712 | return 0; | 2749 | return 0; |
2713 | } | 2750 | } |
@@ -2971,6 +3008,9 @@ int close_ctree(struct btrfs_root *root) | |||
2971 | fs_info->closing = 1; | 3008 | fs_info->closing = 1; |
2972 | smp_mb(); | 3009 | smp_mb(); |
2973 | 3010 | ||
3011 | /* pause restriper - we want to resume on mount */ | ||
3012 | btrfs_pause_balance(root->fs_info); | ||
3013 | |||
2974 | btrfs_scrub_cancel(root); | 3014 | btrfs_scrub_cancel(root); |
2975 | 3015 | ||
2976 | /* wait for any defraggers to finish */ | 3016 | /* wait for any defraggers to finish */ |
@@ -2978,7 +3018,7 @@ int close_ctree(struct btrfs_root *root) | |||
2978 | (atomic_read(&fs_info->defrag_running) == 0)); | 3018 | (atomic_read(&fs_info->defrag_running) == 0)); |
2979 | 3019 | ||
2980 | /* clear out the rbtree of defraggable inodes */ | 3020 | /* clear out the rbtree of defraggable inodes */ |
2981 | btrfs_run_defrag_inodes(root->fs_info); | 3021 | btrfs_run_defrag_inodes(fs_info); |
2982 | 3022 | ||
2983 | /* | 3023 | /* |
2984 | * Here come 2 situations when btrfs is broken to flip readonly: | 3024 | * Here come 2 situations when btrfs is broken to flip readonly: |
@@ -3007,8 +3047,8 @@ int close_ctree(struct btrfs_root *root) | |||
3007 | 3047 | ||
3008 | btrfs_put_block_group_cache(fs_info); | 3048 | btrfs_put_block_group_cache(fs_info); |
3009 | 3049 | ||
3010 | kthread_stop(root->fs_info->transaction_kthread); | 3050 | kthread_stop(fs_info->transaction_kthread); |
3011 | kthread_stop(root->fs_info->cleaner_kthread); | 3051 | kthread_stop(fs_info->cleaner_kthread); |
3012 | 3052 | ||
3013 | fs_info->closing = 2; | 3053 | fs_info->closing = 2; |
3014 | smp_mb(); | 3054 | smp_mb(); |
@@ -3026,14 +3066,14 @@ int close_ctree(struct btrfs_root *root) | |||
3026 | free_extent_buffer(fs_info->extent_root->commit_root); | 3066 | free_extent_buffer(fs_info->extent_root->commit_root); |
3027 | free_extent_buffer(fs_info->tree_root->node); | 3067 | free_extent_buffer(fs_info->tree_root->node); |
3028 | free_extent_buffer(fs_info->tree_root->commit_root); | 3068 | free_extent_buffer(fs_info->tree_root->commit_root); |
3029 | free_extent_buffer(root->fs_info->chunk_root->node); | 3069 | free_extent_buffer(fs_info->chunk_root->node); |
3030 | free_extent_buffer(root->fs_info->chunk_root->commit_root); | 3070 | free_extent_buffer(fs_info->chunk_root->commit_root); |
3031 | free_extent_buffer(root->fs_info->dev_root->node); | 3071 | free_extent_buffer(fs_info->dev_root->node); |
3032 | free_extent_buffer(root->fs_info->dev_root->commit_root); | 3072 | free_extent_buffer(fs_info->dev_root->commit_root); |
3033 | free_extent_buffer(root->fs_info->csum_root->node); | 3073 | free_extent_buffer(fs_info->csum_root->node); |
3034 | free_extent_buffer(root->fs_info->csum_root->commit_root); | 3074 | free_extent_buffer(fs_info->csum_root->commit_root); |
3035 | 3075 | ||
3036 | btrfs_free_block_groups(root->fs_info); | 3076 | btrfs_free_block_groups(fs_info); |
3037 | 3077 | ||
3038 | del_fs_roots(fs_info); | 3078 | del_fs_roots(fs_info); |
3039 | 3079 | ||
@@ -3053,14 +3093,17 @@ int close_ctree(struct btrfs_root *root) | |||
3053 | btrfs_stop_workers(&fs_info->caching_workers); | 3093 | btrfs_stop_workers(&fs_info->caching_workers); |
3054 | btrfs_stop_workers(&fs_info->readahead_workers); | 3094 | btrfs_stop_workers(&fs_info->readahead_workers); |
3055 | 3095 | ||
3096 | #ifdef CONFIG_BTRFS_FS_CHECK_INTEGRITY | ||
3097 | if (btrfs_test_opt(root, CHECK_INTEGRITY)) | ||
3098 | btrfsic_unmount(root, fs_info->fs_devices); | ||
3099 | #endif | ||
3100 | |||
3056 | btrfs_close_devices(fs_info->fs_devices); | 3101 | btrfs_close_devices(fs_info->fs_devices); |
3057 | btrfs_mapping_tree_free(&fs_info->mapping_tree); | 3102 | btrfs_mapping_tree_free(&fs_info->mapping_tree); |
3058 | 3103 | ||
3059 | bdi_destroy(&fs_info->bdi); | 3104 | bdi_destroy(&fs_info->bdi); |
3060 | cleanup_srcu_struct(&fs_info->subvol_srcu); | 3105 | cleanup_srcu_struct(&fs_info->subvol_srcu); |
3061 | 3106 | ||
3062 | free_fs_info(fs_info); | ||
3063 | |||
3064 | return 0; | 3107 | return 0; |
3065 | } | 3108 | } |
3066 | 3109 | ||
diff --git a/fs/btrfs/disk-io.h b/fs/btrfs/disk-io.h index c99d0a8f13fa..e4bc4741319b 100644 --- a/fs/btrfs/disk-io.h +++ b/fs/btrfs/disk-io.h | |||
@@ -46,9 +46,9 @@ struct extent_buffer *btrfs_find_create_tree_block(struct btrfs_root *root, | |||
46 | u64 bytenr, u32 blocksize); | 46 | u64 bytenr, u32 blocksize); |
47 | int clean_tree_block(struct btrfs_trans_handle *trans, | 47 | int clean_tree_block(struct btrfs_trans_handle *trans, |
48 | struct btrfs_root *root, struct extent_buffer *buf); | 48 | struct btrfs_root *root, struct extent_buffer *buf); |
49 | struct btrfs_root *open_ctree(struct super_block *sb, | 49 | int open_ctree(struct super_block *sb, |
50 | struct btrfs_fs_devices *fs_devices, | 50 | struct btrfs_fs_devices *fs_devices, |
51 | char *options); | 51 | char *options); |
52 | int close_ctree(struct btrfs_root *root); | 52 | int close_ctree(struct btrfs_root *root); |
53 | int write_ctree_super(struct btrfs_trans_handle *trans, | 53 | int write_ctree_super(struct btrfs_trans_handle *trans, |
54 | struct btrfs_root *root, int max_mirrors); | 54 | struct btrfs_root *root, int max_mirrors); |
diff --git a/fs/btrfs/export.c b/fs/btrfs/export.c index 1b8dc33778f9..5f77166fd01c 100644 --- a/fs/btrfs/export.c +++ b/fs/btrfs/export.c | |||
@@ -67,7 +67,7 @@ static struct dentry *btrfs_get_dentry(struct super_block *sb, u64 objectid, | |||
67 | u64 root_objectid, u32 generation, | 67 | u64 root_objectid, u32 generation, |
68 | int check_generation) | 68 | int check_generation) |
69 | { | 69 | { |
70 | struct btrfs_fs_info *fs_info = btrfs_sb(sb)->fs_info; | 70 | struct btrfs_fs_info *fs_info = btrfs_sb(sb); |
71 | struct btrfs_root *root; | 71 | struct btrfs_root *root; |
72 | struct inode *inode; | 72 | struct inode *inode; |
73 | struct btrfs_key key; | 73 | struct btrfs_key key; |
diff --git a/fs/btrfs/extent-tree.c b/fs/btrfs/extent-tree.c index f5fbe576d2ba..283af7a676a3 100644 --- a/fs/btrfs/extent-tree.c +++ b/fs/btrfs/extent-tree.c | |||
@@ -34,23 +34,24 @@ | |||
34 | #include "locking.h" | 34 | #include "locking.h" |
35 | #include "free-space-cache.h" | 35 | #include "free-space-cache.h" |
36 | 36 | ||
37 | /* control flags for do_chunk_alloc's force field | 37 | /* |
38 | * control flags for do_chunk_alloc's force field | ||
38 | * CHUNK_ALLOC_NO_FORCE means to only allocate a chunk | 39 | * CHUNK_ALLOC_NO_FORCE means to only allocate a chunk |
39 | * if we really need one. | 40 | * if we really need one. |
40 | * | 41 | * |
41 | * CHUNK_ALLOC_FORCE means it must try to allocate one | ||
42 | * | ||
43 | * CHUNK_ALLOC_LIMITED means to only try and allocate one | 42 | * CHUNK_ALLOC_LIMITED means to only try and allocate one |
44 | * if we have very few chunks already allocated. This is | 43 | * if we have very few chunks already allocated. This is |
45 | * used as part of the clustering code to help make sure | 44 | * used as part of the clustering code to help make sure |
46 | * we have a good pool of storage to cluster in, without | 45 | * we have a good pool of storage to cluster in, without |
47 | * filling the FS with empty chunks | 46 | * filling the FS with empty chunks |
48 | * | 47 | * |
48 | * CHUNK_ALLOC_FORCE means it must try to allocate one | ||
49 | * | ||
49 | */ | 50 | */ |
50 | enum { | 51 | enum { |
51 | CHUNK_ALLOC_NO_FORCE = 0, | 52 | CHUNK_ALLOC_NO_FORCE = 0, |
52 | CHUNK_ALLOC_FORCE = 1, | 53 | CHUNK_ALLOC_LIMITED = 1, |
53 | CHUNK_ALLOC_LIMITED = 2, | 54 | CHUNK_ALLOC_FORCE = 2, |
54 | }; | 55 | }; |
55 | 56 | ||
56 | /* | 57 | /* |
@@ -618,8 +619,7 @@ static struct btrfs_space_info *__find_space_info(struct btrfs_fs_info *info, | |||
618 | struct list_head *head = &info->space_info; | 619 | struct list_head *head = &info->space_info; |
619 | struct btrfs_space_info *found; | 620 | struct btrfs_space_info *found; |
620 | 621 | ||
621 | flags &= BTRFS_BLOCK_GROUP_DATA | BTRFS_BLOCK_GROUP_SYSTEM | | 622 | flags &= BTRFS_BLOCK_GROUP_TYPE_MASK; |
622 | BTRFS_BLOCK_GROUP_METADATA; | ||
623 | 623 | ||
624 | rcu_read_lock(); | 624 | rcu_read_lock(); |
625 | list_for_each_entry_rcu(found, head, list) { | 625 | list_for_each_entry_rcu(found, head, list) { |
@@ -1872,20 +1872,24 @@ static int btrfs_discard_extent(struct btrfs_root *root, u64 bytenr, | |||
1872 | int btrfs_inc_extent_ref(struct btrfs_trans_handle *trans, | 1872 | int btrfs_inc_extent_ref(struct btrfs_trans_handle *trans, |
1873 | struct btrfs_root *root, | 1873 | struct btrfs_root *root, |
1874 | u64 bytenr, u64 num_bytes, u64 parent, | 1874 | u64 bytenr, u64 num_bytes, u64 parent, |
1875 | u64 root_objectid, u64 owner, u64 offset) | 1875 | u64 root_objectid, u64 owner, u64 offset, int for_cow) |
1876 | { | 1876 | { |
1877 | int ret; | 1877 | int ret; |
1878 | struct btrfs_fs_info *fs_info = root->fs_info; | ||
1879 | |||
1878 | BUG_ON(owner < BTRFS_FIRST_FREE_OBJECTID && | 1880 | BUG_ON(owner < BTRFS_FIRST_FREE_OBJECTID && |
1879 | root_objectid == BTRFS_TREE_LOG_OBJECTID); | 1881 | root_objectid == BTRFS_TREE_LOG_OBJECTID); |
1880 | 1882 | ||
1881 | if (owner < BTRFS_FIRST_FREE_OBJECTID) { | 1883 | if (owner < BTRFS_FIRST_FREE_OBJECTID) { |
1882 | ret = btrfs_add_delayed_tree_ref(trans, bytenr, num_bytes, | 1884 | ret = btrfs_add_delayed_tree_ref(fs_info, trans, bytenr, |
1885 | num_bytes, | ||
1883 | parent, root_objectid, (int)owner, | 1886 | parent, root_objectid, (int)owner, |
1884 | BTRFS_ADD_DELAYED_REF, NULL); | 1887 | BTRFS_ADD_DELAYED_REF, NULL, for_cow); |
1885 | } else { | 1888 | } else { |
1886 | ret = btrfs_add_delayed_data_ref(trans, bytenr, num_bytes, | 1889 | ret = btrfs_add_delayed_data_ref(fs_info, trans, bytenr, |
1890 | num_bytes, | ||
1887 | parent, root_objectid, owner, offset, | 1891 | parent, root_objectid, owner, offset, |
1888 | BTRFS_ADD_DELAYED_REF, NULL); | 1892 | BTRFS_ADD_DELAYED_REF, NULL, for_cow); |
1889 | } | 1893 | } |
1890 | return ret; | 1894 | return ret; |
1891 | } | 1895 | } |
@@ -2233,6 +2237,28 @@ static noinline int run_clustered_refs(struct btrfs_trans_handle *trans, | |||
2233 | } | 2237 | } |
2234 | 2238 | ||
2235 | /* | 2239 | /* |
2240 | * locked_ref is the head node, so we have to go one | ||
2241 | * node back for any delayed ref updates | ||
2242 | */ | ||
2243 | ref = select_delayed_ref(locked_ref); | ||
2244 | |||
2245 | if (ref && ref->seq && | ||
2246 | btrfs_check_delayed_seq(delayed_refs, ref->seq)) { | ||
2247 | /* | ||
2248 | * there are still refs with lower seq numbers in the | ||
2249 | * process of being added. Don't run this ref yet. | ||
2250 | */ | ||
2251 | list_del_init(&locked_ref->cluster); | ||
2252 | mutex_unlock(&locked_ref->mutex); | ||
2253 | locked_ref = NULL; | ||
2254 | delayed_refs->num_heads_ready++; | ||
2255 | spin_unlock(&delayed_refs->lock); | ||
2256 | cond_resched(); | ||
2257 | spin_lock(&delayed_refs->lock); | ||
2258 | continue; | ||
2259 | } | ||
2260 | |||
2261 | /* | ||
2236 | * record the must insert reserved flag before we | 2262 | * record the must insert reserved flag before we |
2237 | * drop the spin lock. | 2263 | * drop the spin lock. |
2238 | */ | 2264 | */ |
@@ -2242,11 +2268,6 @@ static noinline int run_clustered_refs(struct btrfs_trans_handle *trans, | |||
2242 | extent_op = locked_ref->extent_op; | 2268 | extent_op = locked_ref->extent_op; |
2243 | locked_ref->extent_op = NULL; | 2269 | locked_ref->extent_op = NULL; |
2244 | 2270 | ||
2245 | /* | ||
2246 | * locked_ref is the head node, so we have to go one | ||
2247 | * node back for any delayed ref updates | ||
2248 | */ | ||
2249 | ref = select_delayed_ref(locked_ref); | ||
2250 | if (!ref) { | 2271 | if (!ref) { |
2251 | /* All delayed refs have been processed, Go ahead | 2272 | /* All delayed refs have been processed, Go ahead |
2252 | * and send the head node to run_one_delayed_ref, | 2273 | * and send the head node to run_one_delayed_ref, |
@@ -2267,9 +2288,7 @@ static noinline int run_clustered_refs(struct btrfs_trans_handle *trans, | |||
2267 | BUG_ON(ret); | 2288 | BUG_ON(ret); |
2268 | kfree(extent_op); | 2289 | kfree(extent_op); |
2269 | 2290 | ||
2270 | cond_resched(); | 2291 | goto next; |
2271 | spin_lock(&delayed_refs->lock); | ||
2272 | continue; | ||
2273 | } | 2292 | } |
2274 | 2293 | ||
2275 | list_del_init(&locked_ref->cluster); | 2294 | list_del_init(&locked_ref->cluster); |
@@ -2279,7 +2298,12 @@ static noinline int run_clustered_refs(struct btrfs_trans_handle *trans, | |||
2279 | ref->in_tree = 0; | 2298 | ref->in_tree = 0; |
2280 | rb_erase(&ref->rb_node, &delayed_refs->root); | 2299 | rb_erase(&ref->rb_node, &delayed_refs->root); |
2281 | delayed_refs->num_entries--; | 2300 | delayed_refs->num_entries--; |
2282 | 2301 | /* | |
2302 | * we modified num_entries, but as we're currently running | ||
2303 | * delayed refs, skip | ||
2304 | * wake_up(&delayed_refs->seq_wait); | ||
2305 | * here. | ||
2306 | */ | ||
2283 | spin_unlock(&delayed_refs->lock); | 2307 | spin_unlock(&delayed_refs->lock); |
2284 | 2308 | ||
2285 | ret = run_one_delayed_ref(trans, root, ref, extent_op, | 2309 | ret = run_one_delayed_ref(trans, root, ref, extent_op, |
@@ -2289,13 +2313,34 @@ static noinline int run_clustered_refs(struct btrfs_trans_handle *trans, | |||
2289 | btrfs_put_delayed_ref(ref); | 2313 | btrfs_put_delayed_ref(ref); |
2290 | kfree(extent_op); | 2314 | kfree(extent_op); |
2291 | count++; | 2315 | count++; |
2292 | 2316 | next: | |
2317 | do_chunk_alloc(trans, root->fs_info->extent_root, | ||
2318 | 2 * 1024 * 1024, | ||
2319 | btrfs_get_alloc_profile(root, 0), | ||
2320 | CHUNK_ALLOC_NO_FORCE); | ||
2293 | cond_resched(); | 2321 | cond_resched(); |
2294 | spin_lock(&delayed_refs->lock); | 2322 | spin_lock(&delayed_refs->lock); |
2295 | } | 2323 | } |
2296 | return count; | 2324 | return count; |
2297 | } | 2325 | } |
2298 | 2326 | ||
2327 | |||
2328 | static void wait_for_more_refs(struct btrfs_delayed_ref_root *delayed_refs, | ||
2329 | unsigned long num_refs) | ||
2330 | { | ||
2331 | struct list_head *first_seq = delayed_refs->seq_head.next; | ||
2332 | |||
2333 | spin_unlock(&delayed_refs->lock); | ||
2334 | pr_debug("waiting for more refs (num %ld, first %p)\n", | ||
2335 | num_refs, first_seq); | ||
2336 | wait_event(delayed_refs->seq_wait, | ||
2337 | num_refs != delayed_refs->num_entries || | ||
2338 | delayed_refs->seq_head.next != first_seq); | ||
2339 | pr_debug("done waiting for more refs (num %ld, first %p)\n", | ||
2340 | delayed_refs->num_entries, delayed_refs->seq_head.next); | ||
2341 | spin_lock(&delayed_refs->lock); | ||
2342 | } | ||
2343 | |||
2299 | /* | 2344 | /* |
2300 | * this starts processing the delayed reference count updates and | 2345 | * this starts processing the delayed reference count updates and |
2301 | * extent insertions we have queued up so far. count can be | 2346 | * extent insertions we have queued up so far. count can be |
@@ -2311,15 +2356,23 @@ int btrfs_run_delayed_refs(struct btrfs_trans_handle *trans, | |||
2311 | struct btrfs_delayed_ref_node *ref; | 2356 | struct btrfs_delayed_ref_node *ref; |
2312 | struct list_head cluster; | 2357 | struct list_head cluster; |
2313 | int ret; | 2358 | int ret; |
2359 | u64 delayed_start; | ||
2314 | int run_all = count == (unsigned long)-1; | 2360 | int run_all = count == (unsigned long)-1; |
2315 | int run_most = 0; | 2361 | int run_most = 0; |
2362 | unsigned long num_refs = 0; | ||
2363 | int consider_waiting; | ||
2316 | 2364 | ||
2317 | if (root == root->fs_info->extent_root) | 2365 | if (root == root->fs_info->extent_root) |
2318 | root = root->fs_info->tree_root; | 2366 | root = root->fs_info->tree_root; |
2319 | 2367 | ||
2368 | do_chunk_alloc(trans, root->fs_info->extent_root, | ||
2369 | 2 * 1024 * 1024, btrfs_get_alloc_profile(root, 0), | ||
2370 | CHUNK_ALLOC_NO_FORCE); | ||
2371 | |||
2320 | delayed_refs = &trans->transaction->delayed_refs; | 2372 | delayed_refs = &trans->transaction->delayed_refs; |
2321 | INIT_LIST_HEAD(&cluster); | 2373 | INIT_LIST_HEAD(&cluster); |
2322 | again: | 2374 | again: |
2375 | consider_waiting = 0; | ||
2323 | spin_lock(&delayed_refs->lock); | 2376 | spin_lock(&delayed_refs->lock); |
2324 | if (count == 0) { | 2377 | if (count == 0) { |
2325 | count = delayed_refs->num_entries * 2; | 2378 | count = delayed_refs->num_entries * 2; |
@@ -2336,11 +2389,35 @@ again: | |||
2336 | * of refs to process starting at the first one we are able to | 2389 | * of refs to process starting at the first one we are able to |
2337 | * lock | 2390 | * lock |
2338 | */ | 2391 | */ |
2392 | delayed_start = delayed_refs->run_delayed_start; | ||
2339 | ret = btrfs_find_ref_cluster(trans, &cluster, | 2393 | ret = btrfs_find_ref_cluster(trans, &cluster, |
2340 | delayed_refs->run_delayed_start); | 2394 | delayed_refs->run_delayed_start); |
2341 | if (ret) | 2395 | if (ret) |
2342 | break; | 2396 | break; |
2343 | 2397 | ||
2398 | if (delayed_start >= delayed_refs->run_delayed_start) { | ||
2399 | if (consider_waiting == 0) { | ||
2400 | /* | ||
2401 | * btrfs_find_ref_cluster looped. let's do one | ||
2402 | * more cycle. if we don't run any delayed ref | ||
2403 | * during that cycle (because we can't because | ||
2404 | * all of them are blocked) and if the number of | ||
2405 | * refs doesn't change, we avoid busy waiting. | ||
2406 | */ | ||
2407 | consider_waiting = 1; | ||
2408 | num_refs = delayed_refs->num_entries; | ||
2409 | } else { | ||
2410 | wait_for_more_refs(delayed_refs, num_refs); | ||
2411 | /* | ||
2412 | * after waiting, things have changed. we | ||
2413 | * dropped the lock and someone else might have | ||
2414 | * run some refs, built new clusters and so on. | ||
2415 | * therefore, we restart staleness detection. | ||
2416 | */ | ||
2417 | consider_waiting = 0; | ||
2418 | } | ||
2419 | } | ||
2420 | |||
2344 | ret = run_clustered_refs(trans, root, &cluster); | 2421 | ret = run_clustered_refs(trans, root, &cluster); |
2345 | BUG_ON(ret < 0); | 2422 | BUG_ON(ret < 0); |
2346 | 2423 | ||
@@ -2348,6 +2425,11 @@ again: | |||
2348 | 2425 | ||
2349 | if (count == 0) | 2426 | if (count == 0) |
2350 | break; | 2427 | break; |
2428 | |||
2429 | if (ret || delayed_refs->run_delayed_start == 0) { | ||
2430 | /* refs were run, let's reset staleness detection */ | ||
2431 | consider_waiting = 0; | ||
2432 | } | ||
2351 | } | 2433 | } |
2352 | 2434 | ||
2353 | if (run_all) { | 2435 | if (run_all) { |
@@ -2405,7 +2487,8 @@ int btrfs_set_disk_extent_flags(struct btrfs_trans_handle *trans, | |||
2405 | extent_op->update_key = 0; | 2487 | extent_op->update_key = 0; |
2406 | extent_op->is_data = is_data ? 1 : 0; | 2488 | extent_op->is_data = is_data ? 1 : 0; |
2407 | 2489 | ||
2408 | ret = btrfs_add_delayed_extent_op(trans, bytenr, num_bytes, extent_op); | 2490 | ret = btrfs_add_delayed_extent_op(root->fs_info, trans, bytenr, |
2491 | num_bytes, extent_op); | ||
2409 | if (ret) | 2492 | if (ret) |
2410 | kfree(extent_op); | 2493 | kfree(extent_op); |
2411 | return ret; | 2494 | return ret; |
@@ -2590,7 +2673,7 @@ out: | |||
2590 | static int __btrfs_mod_ref(struct btrfs_trans_handle *trans, | 2673 | static int __btrfs_mod_ref(struct btrfs_trans_handle *trans, |
2591 | struct btrfs_root *root, | 2674 | struct btrfs_root *root, |
2592 | struct extent_buffer *buf, | 2675 | struct extent_buffer *buf, |
2593 | int full_backref, int inc) | 2676 | int full_backref, int inc, int for_cow) |
2594 | { | 2677 | { |
2595 | u64 bytenr; | 2678 | u64 bytenr; |
2596 | u64 num_bytes; | 2679 | u64 num_bytes; |
@@ -2603,7 +2686,7 @@ static int __btrfs_mod_ref(struct btrfs_trans_handle *trans, | |||
2603 | int level; | 2686 | int level; |
2604 | int ret = 0; | 2687 | int ret = 0; |
2605 | int (*process_func)(struct btrfs_trans_handle *, struct btrfs_root *, | 2688 | int (*process_func)(struct btrfs_trans_handle *, struct btrfs_root *, |
2606 | u64, u64, u64, u64, u64, u64); | 2689 | u64, u64, u64, u64, u64, u64, int); |
2607 | 2690 | ||
2608 | ref_root = btrfs_header_owner(buf); | 2691 | ref_root = btrfs_header_owner(buf); |
2609 | nritems = btrfs_header_nritems(buf); | 2692 | nritems = btrfs_header_nritems(buf); |
@@ -2640,14 +2723,15 @@ static int __btrfs_mod_ref(struct btrfs_trans_handle *trans, | |||
2640 | key.offset -= btrfs_file_extent_offset(buf, fi); | 2723 | key.offset -= btrfs_file_extent_offset(buf, fi); |
2641 | ret = process_func(trans, root, bytenr, num_bytes, | 2724 | ret = process_func(trans, root, bytenr, num_bytes, |
2642 | parent, ref_root, key.objectid, | 2725 | parent, ref_root, key.objectid, |
2643 | key.offset); | 2726 | key.offset, for_cow); |
2644 | if (ret) | 2727 | if (ret) |
2645 | goto fail; | 2728 | goto fail; |
2646 | } else { | 2729 | } else { |
2647 | bytenr = btrfs_node_blockptr(buf, i); | 2730 | bytenr = btrfs_node_blockptr(buf, i); |
2648 | num_bytes = btrfs_level_size(root, level - 1); | 2731 | num_bytes = btrfs_level_size(root, level - 1); |
2649 | ret = process_func(trans, root, bytenr, num_bytes, | 2732 | ret = process_func(trans, root, bytenr, num_bytes, |
2650 | parent, ref_root, level - 1, 0); | 2733 | parent, ref_root, level - 1, 0, |
2734 | for_cow); | ||
2651 | if (ret) | 2735 | if (ret) |
2652 | goto fail; | 2736 | goto fail; |
2653 | } | 2737 | } |
@@ -2659,15 +2743,15 @@ fail: | |||
2659 | } | 2743 | } |
2660 | 2744 | ||
2661 | int btrfs_inc_ref(struct btrfs_trans_handle *trans, struct btrfs_root *root, | 2745 | int btrfs_inc_ref(struct btrfs_trans_handle *trans, struct btrfs_root *root, |
2662 | struct extent_buffer *buf, int full_backref) | 2746 | struct extent_buffer *buf, int full_backref, int for_cow) |
2663 | { | 2747 | { |
2664 | return __btrfs_mod_ref(trans, root, buf, full_backref, 1); | 2748 | return __btrfs_mod_ref(trans, root, buf, full_backref, 1, for_cow); |
2665 | } | 2749 | } |
2666 | 2750 | ||
2667 | int btrfs_dec_ref(struct btrfs_trans_handle *trans, struct btrfs_root *root, | 2751 | int btrfs_dec_ref(struct btrfs_trans_handle *trans, struct btrfs_root *root, |
2668 | struct extent_buffer *buf, int full_backref) | 2752 | struct extent_buffer *buf, int full_backref, int for_cow) |
2669 | { | 2753 | { |
2670 | return __btrfs_mod_ref(trans, root, buf, full_backref, 0); | 2754 | return __btrfs_mod_ref(trans, root, buf, full_backref, 0, for_cow); |
2671 | } | 2755 | } |
2672 | 2756 | ||
2673 | static int write_one_cache_group(struct btrfs_trans_handle *trans, | 2757 | static int write_one_cache_group(struct btrfs_trans_handle *trans, |
@@ -2993,9 +3077,7 @@ static int update_space_info(struct btrfs_fs_info *info, u64 flags, | |||
2993 | INIT_LIST_HEAD(&found->block_groups[i]); | 3077 | INIT_LIST_HEAD(&found->block_groups[i]); |
2994 | init_rwsem(&found->groups_sem); | 3078 | init_rwsem(&found->groups_sem); |
2995 | spin_lock_init(&found->lock); | 3079 | spin_lock_init(&found->lock); |
2996 | found->flags = flags & (BTRFS_BLOCK_GROUP_DATA | | 3080 | found->flags = flags & BTRFS_BLOCK_GROUP_TYPE_MASK; |
2997 | BTRFS_BLOCK_GROUP_SYSTEM | | ||
2998 | BTRFS_BLOCK_GROUP_METADATA); | ||
2999 | found->total_bytes = total_bytes; | 3081 | found->total_bytes = total_bytes; |
3000 | found->disk_total = total_bytes * factor; | 3082 | found->disk_total = total_bytes * factor; |
3001 | found->bytes_used = bytes_used; | 3083 | found->bytes_used = bytes_used; |
@@ -3016,20 +3098,27 @@ static int update_space_info(struct btrfs_fs_info *info, u64 flags, | |||
3016 | 3098 | ||
3017 | static void set_avail_alloc_bits(struct btrfs_fs_info *fs_info, u64 flags) | 3099 | static void set_avail_alloc_bits(struct btrfs_fs_info *fs_info, u64 flags) |
3018 | { | 3100 | { |
3019 | u64 extra_flags = flags & (BTRFS_BLOCK_GROUP_RAID0 | | 3101 | u64 extra_flags = flags & BTRFS_BLOCK_GROUP_PROFILE_MASK; |
3020 | BTRFS_BLOCK_GROUP_RAID1 | | 3102 | |
3021 | BTRFS_BLOCK_GROUP_RAID10 | | 3103 | /* chunk -> extended profile */ |
3022 | BTRFS_BLOCK_GROUP_DUP); | 3104 | if (extra_flags == 0) |
3023 | if (extra_flags) { | 3105 | extra_flags = BTRFS_AVAIL_ALLOC_BIT_SINGLE; |
3024 | if (flags & BTRFS_BLOCK_GROUP_DATA) | 3106 | |
3025 | fs_info->avail_data_alloc_bits |= extra_flags; | 3107 | if (flags & BTRFS_BLOCK_GROUP_DATA) |
3026 | if (flags & BTRFS_BLOCK_GROUP_METADATA) | 3108 | fs_info->avail_data_alloc_bits |= extra_flags; |
3027 | fs_info->avail_metadata_alloc_bits |= extra_flags; | 3109 | if (flags & BTRFS_BLOCK_GROUP_METADATA) |
3028 | if (flags & BTRFS_BLOCK_GROUP_SYSTEM) | 3110 | fs_info->avail_metadata_alloc_bits |= extra_flags; |
3029 | fs_info->avail_system_alloc_bits |= extra_flags; | 3111 | if (flags & BTRFS_BLOCK_GROUP_SYSTEM) |
3030 | } | 3112 | fs_info->avail_system_alloc_bits |= extra_flags; |
3031 | } | 3113 | } |
3032 | 3114 | ||
3115 | /* | ||
3116 | * @flags: available profiles in extended format (see ctree.h) | ||
3117 | * | ||
3118 | * Returns reduced profile in chunk format. If profile changing is in | ||
3119 | * progress (either running or paused) picks the target profile (if it's | ||
3120 | * already available), otherwise falls back to plain reducing. | ||
3121 | */ | ||
3033 | u64 btrfs_reduce_alloc_profile(struct btrfs_root *root, u64 flags) | 3122 | u64 btrfs_reduce_alloc_profile(struct btrfs_root *root, u64 flags) |
3034 | { | 3123 | { |
3035 | /* | 3124 | /* |
@@ -3040,6 +3129,34 @@ u64 btrfs_reduce_alloc_profile(struct btrfs_root *root, u64 flags) | |||
3040 | u64 num_devices = root->fs_info->fs_devices->rw_devices + | 3129 | u64 num_devices = root->fs_info->fs_devices->rw_devices + |
3041 | root->fs_info->fs_devices->missing_devices; | 3130 | root->fs_info->fs_devices->missing_devices; |
3042 | 3131 | ||
3132 | /* pick restriper's target profile if it's available */ | ||
3133 | spin_lock(&root->fs_info->balance_lock); | ||
3134 | if (root->fs_info->balance_ctl) { | ||
3135 | struct btrfs_balance_control *bctl = root->fs_info->balance_ctl; | ||
3136 | u64 tgt = 0; | ||
3137 | |||
3138 | if ((flags & BTRFS_BLOCK_GROUP_DATA) && | ||
3139 | (bctl->data.flags & BTRFS_BALANCE_ARGS_CONVERT) && | ||
3140 | (flags & bctl->data.target)) { | ||
3141 | tgt = BTRFS_BLOCK_GROUP_DATA | bctl->data.target; | ||
3142 | } else if ((flags & BTRFS_BLOCK_GROUP_SYSTEM) && | ||
3143 | (bctl->sys.flags & BTRFS_BALANCE_ARGS_CONVERT) && | ||
3144 | (flags & bctl->sys.target)) { | ||
3145 | tgt = BTRFS_BLOCK_GROUP_SYSTEM | bctl->sys.target; | ||
3146 | } else if ((flags & BTRFS_BLOCK_GROUP_METADATA) && | ||
3147 | (bctl->meta.flags & BTRFS_BALANCE_ARGS_CONVERT) && | ||
3148 | (flags & bctl->meta.target)) { | ||
3149 | tgt = BTRFS_BLOCK_GROUP_METADATA | bctl->meta.target; | ||
3150 | } | ||
3151 | |||
3152 | if (tgt) { | ||
3153 | spin_unlock(&root->fs_info->balance_lock); | ||
3154 | flags = tgt; | ||
3155 | goto out; | ||
3156 | } | ||
3157 | } | ||
3158 | spin_unlock(&root->fs_info->balance_lock); | ||
3159 | |||
3043 | if (num_devices == 1) | 3160 | if (num_devices == 1) |
3044 | flags &= ~(BTRFS_BLOCK_GROUP_RAID1 | BTRFS_BLOCK_GROUP_RAID0); | 3161 | flags &= ~(BTRFS_BLOCK_GROUP_RAID1 | BTRFS_BLOCK_GROUP_RAID0); |
3045 | if (num_devices < 4) | 3162 | if (num_devices < 4) |
@@ -3059,22 +3176,25 @@ u64 btrfs_reduce_alloc_profile(struct btrfs_root *root, u64 flags) | |||
3059 | if ((flags & BTRFS_BLOCK_GROUP_RAID0) && | 3176 | if ((flags & BTRFS_BLOCK_GROUP_RAID0) && |
3060 | ((flags & BTRFS_BLOCK_GROUP_RAID1) | | 3177 | ((flags & BTRFS_BLOCK_GROUP_RAID1) | |
3061 | (flags & BTRFS_BLOCK_GROUP_RAID10) | | 3178 | (flags & BTRFS_BLOCK_GROUP_RAID10) | |
3062 | (flags & BTRFS_BLOCK_GROUP_DUP))) | 3179 | (flags & BTRFS_BLOCK_GROUP_DUP))) { |
3063 | flags &= ~BTRFS_BLOCK_GROUP_RAID0; | 3180 | flags &= ~BTRFS_BLOCK_GROUP_RAID0; |
3181 | } | ||
3182 | |||
3183 | out: | ||
3184 | /* extended -> chunk profile */ | ||
3185 | flags &= ~BTRFS_AVAIL_ALLOC_BIT_SINGLE; | ||
3064 | return flags; | 3186 | return flags; |
3065 | } | 3187 | } |
3066 | 3188 | ||
3067 | static u64 get_alloc_profile(struct btrfs_root *root, u64 flags) | 3189 | static u64 get_alloc_profile(struct btrfs_root *root, u64 flags) |
3068 | { | 3190 | { |
3069 | if (flags & BTRFS_BLOCK_GROUP_DATA) | 3191 | if (flags & BTRFS_BLOCK_GROUP_DATA) |
3070 | flags |= root->fs_info->avail_data_alloc_bits & | 3192 | flags |= root->fs_info->avail_data_alloc_bits; |
3071 | root->fs_info->data_alloc_profile; | ||
3072 | else if (flags & BTRFS_BLOCK_GROUP_SYSTEM) | 3193 | else if (flags & BTRFS_BLOCK_GROUP_SYSTEM) |
3073 | flags |= root->fs_info->avail_system_alloc_bits & | 3194 | flags |= root->fs_info->avail_system_alloc_bits; |
3074 | root->fs_info->system_alloc_profile; | ||
3075 | else if (flags & BTRFS_BLOCK_GROUP_METADATA) | 3195 | else if (flags & BTRFS_BLOCK_GROUP_METADATA) |
3076 | flags |= root->fs_info->avail_metadata_alloc_bits & | 3196 | flags |= root->fs_info->avail_metadata_alloc_bits; |
3077 | root->fs_info->metadata_alloc_profile; | 3197 | |
3078 | return btrfs_reduce_alloc_profile(root, flags); | 3198 | return btrfs_reduce_alloc_profile(root, flags); |
3079 | } | 3199 | } |
3080 | 3200 | ||
@@ -3191,6 +3311,8 @@ commit_trans: | |||
3191 | return -ENOSPC; | 3311 | return -ENOSPC; |
3192 | } | 3312 | } |
3193 | data_sinfo->bytes_may_use += bytes; | 3313 | data_sinfo->bytes_may_use += bytes; |
3314 | trace_btrfs_space_reservation(root->fs_info, "space_info", | ||
3315 | (u64)data_sinfo, bytes, 1); | ||
3194 | spin_unlock(&data_sinfo->lock); | 3316 | spin_unlock(&data_sinfo->lock); |
3195 | 3317 | ||
3196 | return 0; | 3318 | return 0; |
@@ -3210,6 +3332,8 @@ void btrfs_free_reserved_data_space(struct inode *inode, u64 bytes) | |||
3210 | data_sinfo = BTRFS_I(inode)->space_info; | 3332 | data_sinfo = BTRFS_I(inode)->space_info; |
3211 | spin_lock(&data_sinfo->lock); | 3333 | spin_lock(&data_sinfo->lock); |
3212 | data_sinfo->bytes_may_use -= bytes; | 3334 | data_sinfo->bytes_may_use -= bytes; |
3335 | trace_btrfs_space_reservation(root->fs_info, "space_info", | ||
3336 | (u64)data_sinfo, bytes, 0); | ||
3213 | spin_unlock(&data_sinfo->lock); | 3337 | spin_unlock(&data_sinfo->lock); |
3214 | } | 3338 | } |
3215 | 3339 | ||
@@ -3257,27 +3381,15 @@ static int should_alloc_chunk(struct btrfs_root *root, | |||
3257 | if (num_bytes - num_allocated < thresh) | 3381 | if (num_bytes - num_allocated < thresh) |
3258 | return 1; | 3382 | return 1; |
3259 | } | 3383 | } |
3260 | |||
3261 | /* | ||
3262 | * we have two similar checks here, one based on percentage | ||
3263 | * and once based on a hard number of 256MB. The idea | ||
3264 | * is that if we have a good amount of free | ||
3265 | * room, don't allocate a chunk. A good mount is | ||
3266 | * less than 80% utilized of the chunks we have allocated, | ||
3267 | * or more than 256MB free | ||
3268 | */ | ||
3269 | if (num_allocated + alloc_bytes + 256 * 1024 * 1024 < num_bytes) | ||
3270 | return 0; | ||
3271 | |||
3272 | if (num_allocated + alloc_bytes < div_factor(num_bytes, 8)) | ||
3273 | return 0; | ||
3274 | |||
3275 | thresh = btrfs_super_total_bytes(root->fs_info->super_copy); | 3384 | thresh = btrfs_super_total_bytes(root->fs_info->super_copy); |
3276 | 3385 | ||
3277 | /* 256MB or 5% of the FS */ | 3386 | /* 256MB or 2% of the FS */ |
3278 | thresh = max_t(u64, 256 * 1024 * 1024, div_factor_fine(thresh, 5)); | 3387 | thresh = max_t(u64, 256 * 1024 * 1024, div_factor_fine(thresh, 2)); |
3388 | /* system chunks need a much small threshold */ | ||
3389 | if (sinfo->flags & BTRFS_BLOCK_GROUP_SYSTEM) | ||
3390 | thresh = 32 * 1024 * 1024; | ||
3279 | 3391 | ||
3280 | if (num_bytes > thresh && sinfo->bytes_used < div_factor(num_bytes, 3)) | 3392 | if (num_bytes > thresh && sinfo->bytes_used < div_factor(num_bytes, 8)) |
3281 | return 0; | 3393 | return 0; |
3282 | return 1; | 3394 | return 1; |
3283 | } | 3395 | } |
@@ -3291,7 +3403,7 @@ static int do_chunk_alloc(struct btrfs_trans_handle *trans, | |||
3291 | int wait_for_alloc = 0; | 3403 | int wait_for_alloc = 0; |
3292 | int ret = 0; | 3404 | int ret = 0; |
3293 | 3405 | ||
3294 | flags = btrfs_reduce_alloc_profile(extent_root, flags); | 3406 | BUG_ON(!profile_is_valid(flags, 0)); |
3295 | 3407 | ||
3296 | space_info = __find_space_info(extent_root->fs_info, flags); | 3408 | space_info = __find_space_info(extent_root->fs_info, flags); |
3297 | if (!space_info) { | 3409 | if (!space_info) { |
@@ -3303,7 +3415,7 @@ static int do_chunk_alloc(struct btrfs_trans_handle *trans, | |||
3303 | 3415 | ||
3304 | again: | 3416 | again: |
3305 | spin_lock(&space_info->lock); | 3417 | spin_lock(&space_info->lock); |
3306 | if (space_info->force_alloc) | 3418 | if (force < space_info->force_alloc) |
3307 | force = space_info->force_alloc; | 3419 | force = space_info->force_alloc; |
3308 | if (space_info->full) { | 3420 | if (space_info->full) { |
3309 | spin_unlock(&space_info->lock); | 3421 | spin_unlock(&space_info->lock); |
@@ -3582,6 +3694,10 @@ again: | |||
3582 | if (used <= space_info->total_bytes) { | 3694 | if (used <= space_info->total_bytes) { |
3583 | if (used + orig_bytes <= space_info->total_bytes) { | 3695 | if (used + orig_bytes <= space_info->total_bytes) { |
3584 | space_info->bytes_may_use += orig_bytes; | 3696 | space_info->bytes_may_use += orig_bytes; |
3697 | trace_btrfs_space_reservation(root->fs_info, | ||
3698 | "space_info", | ||
3699 | (u64)space_info, | ||
3700 | orig_bytes, 1); | ||
3585 | ret = 0; | 3701 | ret = 0; |
3586 | } else { | 3702 | } else { |
3587 | /* | 3703 | /* |
@@ -3649,6 +3765,10 @@ again: | |||
3649 | 3765 | ||
3650 | if (used + num_bytes < space_info->total_bytes + avail) { | 3766 | if (used + num_bytes < space_info->total_bytes + avail) { |
3651 | space_info->bytes_may_use += orig_bytes; | 3767 | space_info->bytes_may_use += orig_bytes; |
3768 | trace_btrfs_space_reservation(root->fs_info, | ||
3769 | "space_info", | ||
3770 | (u64)space_info, | ||
3771 | orig_bytes, 1); | ||
3652 | ret = 0; | 3772 | ret = 0; |
3653 | } else { | 3773 | } else { |
3654 | wait_ordered = true; | 3774 | wait_ordered = true; |
@@ -3755,7 +3875,8 @@ static void block_rsv_add_bytes(struct btrfs_block_rsv *block_rsv, | |||
3755 | spin_unlock(&block_rsv->lock); | 3875 | spin_unlock(&block_rsv->lock); |
3756 | } | 3876 | } |
3757 | 3877 | ||
3758 | static void block_rsv_release_bytes(struct btrfs_block_rsv *block_rsv, | 3878 | static void block_rsv_release_bytes(struct btrfs_fs_info *fs_info, |
3879 | struct btrfs_block_rsv *block_rsv, | ||
3759 | struct btrfs_block_rsv *dest, u64 num_bytes) | 3880 | struct btrfs_block_rsv *dest, u64 num_bytes) |
3760 | { | 3881 | { |
3761 | struct btrfs_space_info *space_info = block_rsv->space_info; | 3882 | struct btrfs_space_info *space_info = block_rsv->space_info; |
@@ -3791,6 +3912,9 @@ static void block_rsv_release_bytes(struct btrfs_block_rsv *block_rsv, | |||
3791 | if (num_bytes) { | 3912 | if (num_bytes) { |
3792 | spin_lock(&space_info->lock); | 3913 | spin_lock(&space_info->lock); |
3793 | space_info->bytes_may_use -= num_bytes; | 3914 | space_info->bytes_may_use -= num_bytes; |
3915 | trace_btrfs_space_reservation(fs_info, "space_info", | ||
3916 | (u64)space_info, | ||
3917 | num_bytes, 0); | ||
3794 | space_info->reservation_progress++; | 3918 | space_info->reservation_progress++; |
3795 | spin_unlock(&space_info->lock); | 3919 | spin_unlock(&space_info->lock); |
3796 | } | 3920 | } |
@@ -3947,7 +4071,8 @@ void btrfs_block_rsv_release(struct btrfs_root *root, | |||
3947 | if (global_rsv->full || global_rsv == block_rsv || | 4071 | if (global_rsv->full || global_rsv == block_rsv || |
3948 | block_rsv->space_info != global_rsv->space_info) | 4072 | block_rsv->space_info != global_rsv->space_info) |
3949 | global_rsv = NULL; | 4073 | global_rsv = NULL; |
3950 | block_rsv_release_bytes(block_rsv, global_rsv, num_bytes); | 4074 | block_rsv_release_bytes(root->fs_info, block_rsv, global_rsv, |
4075 | num_bytes); | ||
3951 | } | 4076 | } |
3952 | 4077 | ||
3953 | /* | 4078 | /* |
@@ -4006,11 +4131,15 @@ static void update_global_block_rsv(struct btrfs_fs_info *fs_info) | |||
4006 | num_bytes = sinfo->total_bytes - num_bytes; | 4131 | num_bytes = sinfo->total_bytes - num_bytes; |
4007 | block_rsv->reserved += num_bytes; | 4132 | block_rsv->reserved += num_bytes; |
4008 | sinfo->bytes_may_use += num_bytes; | 4133 | sinfo->bytes_may_use += num_bytes; |
4134 | trace_btrfs_space_reservation(fs_info, "space_info", | ||
4135 | (u64)sinfo, num_bytes, 1); | ||
4009 | } | 4136 | } |
4010 | 4137 | ||
4011 | if (block_rsv->reserved >= block_rsv->size) { | 4138 | if (block_rsv->reserved >= block_rsv->size) { |
4012 | num_bytes = block_rsv->reserved - block_rsv->size; | 4139 | num_bytes = block_rsv->reserved - block_rsv->size; |
4013 | sinfo->bytes_may_use -= num_bytes; | 4140 | sinfo->bytes_may_use -= num_bytes; |
4141 | trace_btrfs_space_reservation(fs_info, "space_info", | ||
4142 | (u64)sinfo, num_bytes, 0); | ||
4014 | sinfo->reservation_progress++; | 4143 | sinfo->reservation_progress++; |
4015 | block_rsv->reserved = block_rsv->size; | 4144 | block_rsv->reserved = block_rsv->size; |
4016 | block_rsv->full = 1; | 4145 | block_rsv->full = 1; |
@@ -4045,7 +4174,8 @@ static void init_global_block_rsv(struct btrfs_fs_info *fs_info) | |||
4045 | 4174 | ||
4046 | static void release_global_block_rsv(struct btrfs_fs_info *fs_info) | 4175 | static void release_global_block_rsv(struct btrfs_fs_info *fs_info) |
4047 | { | 4176 | { |
4048 | block_rsv_release_bytes(&fs_info->global_block_rsv, NULL, (u64)-1); | 4177 | block_rsv_release_bytes(fs_info, &fs_info->global_block_rsv, NULL, |
4178 | (u64)-1); | ||
4049 | WARN_ON(fs_info->delalloc_block_rsv.size > 0); | 4179 | WARN_ON(fs_info->delalloc_block_rsv.size > 0); |
4050 | WARN_ON(fs_info->delalloc_block_rsv.reserved > 0); | 4180 | WARN_ON(fs_info->delalloc_block_rsv.reserved > 0); |
4051 | WARN_ON(fs_info->trans_block_rsv.size > 0); | 4181 | WARN_ON(fs_info->trans_block_rsv.size > 0); |
@@ -4062,6 +4192,8 @@ void btrfs_trans_release_metadata(struct btrfs_trans_handle *trans, | |||
4062 | if (!trans->bytes_reserved) | 4192 | if (!trans->bytes_reserved) |
4063 | return; | 4193 | return; |
4064 | 4194 | ||
4195 | trace_btrfs_space_reservation(root->fs_info, "transaction", (u64)trans, | ||
4196 | trans->bytes_reserved, 0); | ||
4065 | btrfs_block_rsv_release(root, trans->block_rsv, trans->bytes_reserved); | 4197 | btrfs_block_rsv_release(root, trans->block_rsv, trans->bytes_reserved); |
4066 | trans->bytes_reserved = 0; | 4198 | trans->bytes_reserved = 0; |
4067 | } | 4199 | } |
@@ -4079,6 +4211,8 @@ int btrfs_orphan_reserve_metadata(struct btrfs_trans_handle *trans, | |||
4079 | * when we are truly done with the orphan item. | 4211 | * when we are truly done with the orphan item. |
4080 | */ | 4212 | */ |
4081 | u64 num_bytes = btrfs_calc_trans_metadata_size(root, 1); | 4213 | u64 num_bytes = btrfs_calc_trans_metadata_size(root, 1); |
4214 | trace_btrfs_space_reservation(root->fs_info, "orphan", | ||
4215 | btrfs_ino(inode), num_bytes, 1); | ||
4082 | return block_rsv_migrate_bytes(src_rsv, dst_rsv, num_bytes); | 4216 | return block_rsv_migrate_bytes(src_rsv, dst_rsv, num_bytes); |
4083 | } | 4217 | } |
4084 | 4218 | ||
@@ -4086,6 +4220,8 @@ void btrfs_orphan_release_metadata(struct inode *inode) | |||
4086 | { | 4220 | { |
4087 | struct btrfs_root *root = BTRFS_I(inode)->root; | 4221 | struct btrfs_root *root = BTRFS_I(inode)->root; |
4088 | u64 num_bytes = btrfs_calc_trans_metadata_size(root, 1); | 4222 | u64 num_bytes = btrfs_calc_trans_metadata_size(root, 1); |
4223 | trace_btrfs_space_reservation(root->fs_info, "orphan", | ||
4224 | btrfs_ino(inode), num_bytes, 0); | ||
4089 | btrfs_block_rsv_release(root, root->orphan_block_rsv, num_bytes); | 4225 | btrfs_block_rsv_release(root, root->orphan_block_rsv, num_bytes); |
4090 | } | 4226 | } |
4091 | 4227 | ||
@@ -4213,12 +4349,11 @@ int btrfs_delalloc_reserve_metadata(struct inode *inode, u64 num_bytes) | |||
4213 | /* Need to be holding the i_mutex here if we aren't free space cache */ | 4349 | /* Need to be holding the i_mutex here if we aren't free space cache */ |
4214 | if (btrfs_is_free_space_inode(root, inode)) | 4350 | if (btrfs_is_free_space_inode(root, inode)) |
4215 | flush = 0; | 4351 | flush = 0; |
4216 | else | ||
4217 | WARN_ON(!mutex_is_locked(&inode->i_mutex)); | ||
4218 | 4352 | ||
4219 | if (flush && btrfs_transaction_in_commit(root->fs_info)) | 4353 | if (flush && btrfs_transaction_in_commit(root->fs_info)) |
4220 | schedule_timeout(1); | 4354 | schedule_timeout(1); |
4221 | 4355 | ||
4356 | mutex_lock(&BTRFS_I(inode)->delalloc_mutex); | ||
4222 | num_bytes = ALIGN(num_bytes, root->sectorsize); | 4357 | num_bytes = ALIGN(num_bytes, root->sectorsize); |
4223 | 4358 | ||
4224 | spin_lock(&BTRFS_I(inode)->lock); | 4359 | spin_lock(&BTRFS_I(inode)->lock); |
@@ -4266,8 +4401,14 @@ int btrfs_delalloc_reserve_metadata(struct inode *inode, u64 num_bytes) | |||
4266 | if (dropped) | 4401 | if (dropped) |
4267 | to_free += btrfs_calc_trans_metadata_size(root, dropped); | 4402 | to_free += btrfs_calc_trans_metadata_size(root, dropped); |
4268 | 4403 | ||
4269 | if (to_free) | 4404 | if (to_free) { |
4270 | btrfs_block_rsv_release(root, block_rsv, to_free); | 4405 | btrfs_block_rsv_release(root, block_rsv, to_free); |
4406 | trace_btrfs_space_reservation(root->fs_info, | ||
4407 | "delalloc", | ||
4408 | btrfs_ino(inode), | ||
4409 | to_free, 0); | ||
4410 | } | ||
4411 | mutex_unlock(&BTRFS_I(inode)->delalloc_mutex); | ||
4271 | return ret; | 4412 | return ret; |
4272 | } | 4413 | } |
4273 | 4414 | ||
@@ -4278,7 +4419,11 @@ int btrfs_delalloc_reserve_metadata(struct inode *inode, u64 num_bytes) | |||
4278 | } | 4419 | } |
4279 | BTRFS_I(inode)->reserved_extents += nr_extents; | 4420 | BTRFS_I(inode)->reserved_extents += nr_extents; |
4280 | spin_unlock(&BTRFS_I(inode)->lock); | 4421 | spin_unlock(&BTRFS_I(inode)->lock); |
4422 | mutex_unlock(&BTRFS_I(inode)->delalloc_mutex); | ||
4281 | 4423 | ||
4424 | if (to_reserve) | ||
4425 | trace_btrfs_space_reservation(root->fs_info,"delalloc", | ||
4426 | btrfs_ino(inode), to_reserve, 1); | ||
4282 | block_rsv_add_bytes(block_rsv, to_reserve, 1); | 4427 | block_rsv_add_bytes(block_rsv, to_reserve, 1); |
4283 | 4428 | ||
4284 | return 0; | 4429 | return 0; |
@@ -4308,6 +4453,8 @@ void btrfs_delalloc_release_metadata(struct inode *inode, u64 num_bytes) | |||
4308 | if (dropped > 0) | 4453 | if (dropped > 0) |
4309 | to_free += btrfs_calc_trans_metadata_size(root, dropped); | 4454 | to_free += btrfs_calc_trans_metadata_size(root, dropped); |
4310 | 4455 | ||
4456 | trace_btrfs_space_reservation(root->fs_info, "delalloc", | ||
4457 | btrfs_ino(inode), to_free, 0); | ||
4311 | btrfs_block_rsv_release(root, &root->fs_info->delalloc_block_rsv, | 4458 | btrfs_block_rsv_release(root, &root->fs_info->delalloc_block_rsv, |
4312 | to_free); | 4459 | to_free); |
4313 | } | 4460 | } |
@@ -4562,7 +4709,10 @@ static int btrfs_update_reserved_bytes(struct btrfs_block_group_cache *cache, | |||
4562 | cache->reserved += num_bytes; | 4709 | cache->reserved += num_bytes; |
4563 | space_info->bytes_reserved += num_bytes; | 4710 | space_info->bytes_reserved += num_bytes; |
4564 | if (reserve == RESERVE_ALLOC) { | 4711 | if (reserve == RESERVE_ALLOC) { |
4565 | BUG_ON(space_info->bytes_may_use < num_bytes); | 4712 | trace_btrfs_space_reservation(cache->fs_info, |
4713 | "space_info", | ||
4714 | (u64)space_info, | ||
4715 | num_bytes, 0); | ||
4566 | space_info->bytes_may_use -= num_bytes; | 4716 | space_info->bytes_may_use -= num_bytes; |
4567 | } | 4717 | } |
4568 | } | 4718 | } |
@@ -4928,6 +5078,8 @@ static noinline int check_ref_cleanup(struct btrfs_trans_handle *trans, | |||
4928 | rb_erase(&head->node.rb_node, &delayed_refs->root); | 5078 | rb_erase(&head->node.rb_node, &delayed_refs->root); |
4929 | 5079 | ||
4930 | delayed_refs->num_entries--; | 5080 | delayed_refs->num_entries--; |
5081 | if (waitqueue_active(&delayed_refs->seq_wait)) | ||
5082 | wake_up(&delayed_refs->seq_wait); | ||
4931 | 5083 | ||
4932 | /* | 5084 | /* |
4933 | * we don't take a ref on the node because we're removing it from the | 5085 | * we don't take a ref on the node because we're removing it from the |
@@ -4955,16 +5107,17 @@ out: | |||
4955 | void btrfs_free_tree_block(struct btrfs_trans_handle *trans, | 5107 | void btrfs_free_tree_block(struct btrfs_trans_handle *trans, |
4956 | struct btrfs_root *root, | 5108 | struct btrfs_root *root, |
4957 | struct extent_buffer *buf, | 5109 | struct extent_buffer *buf, |
4958 | u64 parent, int last_ref) | 5110 | u64 parent, int last_ref, int for_cow) |
4959 | { | 5111 | { |
4960 | struct btrfs_block_group_cache *cache = NULL; | 5112 | struct btrfs_block_group_cache *cache = NULL; |
4961 | int ret; | 5113 | int ret; |
4962 | 5114 | ||
4963 | if (root->root_key.objectid != BTRFS_TREE_LOG_OBJECTID) { | 5115 | if (root->root_key.objectid != BTRFS_TREE_LOG_OBJECTID) { |
4964 | ret = btrfs_add_delayed_tree_ref(trans, buf->start, buf->len, | 5116 | ret = btrfs_add_delayed_tree_ref(root->fs_info, trans, |
4965 | parent, root->root_key.objectid, | 5117 | buf->start, buf->len, |
4966 | btrfs_header_level(buf), | 5118 | parent, root->root_key.objectid, |
4967 | BTRFS_DROP_DELAYED_REF, NULL); | 5119 | btrfs_header_level(buf), |
5120 | BTRFS_DROP_DELAYED_REF, NULL, for_cow); | ||
4968 | BUG_ON(ret); | 5121 | BUG_ON(ret); |
4969 | } | 5122 | } |
4970 | 5123 | ||
@@ -4999,12 +5152,12 @@ out: | |||
4999 | btrfs_put_block_group(cache); | 5152 | btrfs_put_block_group(cache); |
5000 | } | 5153 | } |
5001 | 5154 | ||
5002 | int btrfs_free_extent(struct btrfs_trans_handle *trans, | 5155 | int btrfs_free_extent(struct btrfs_trans_handle *trans, struct btrfs_root *root, |
5003 | struct btrfs_root *root, | 5156 | u64 bytenr, u64 num_bytes, u64 parent, u64 root_objectid, |
5004 | u64 bytenr, u64 num_bytes, u64 parent, | 5157 | u64 owner, u64 offset, int for_cow) |
5005 | u64 root_objectid, u64 owner, u64 offset) | ||
5006 | { | 5158 | { |
5007 | int ret; | 5159 | int ret; |
5160 | struct btrfs_fs_info *fs_info = root->fs_info; | ||
5008 | 5161 | ||
5009 | /* | 5162 | /* |
5010 | * tree log blocks never actually go into the extent allocation | 5163 | * tree log blocks never actually go into the extent allocation |
@@ -5016,14 +5169,17 @@ int btrfs_free_extent(struct btrfs_trans_handle *trans, | |||
5016 | btrfs_pin_extent(root, bytenr, num_bytes, 1); | 5169 | btrfs_pin_extent(root, bytenr, num_bytes, 1); |
5017 | ret = 0; | 5170 | ret = 0; |
5018 | } else if (owner < BTRFS_FIRST_FREE_OBJECTID) { | 5171 | } else if (owner < BTRFS_FIRST_FREE_OBJECTID) { |
5019 | ret = btrfs_add_delayed_tree_ref(trans, bytenr, num_bytes, | 5172 | ret = btrfs_add_delayed_tree_ref(fs_info, trans, bytenr, |
5173 | num_bytes, | ||
5020 | parent, root_objectid, (int)owner, | 5174 | parent, root_objectid, (int)owner, |
5021 | BTRFS_DROP_DELAYED_REF, NULL); | 5175 | BTRFS_DROP_DELAYED_REF, NULL, for_cow); |
5022 | BUG_ON(ret); | 5176 | BUG_ON(ret); |
5023 | } else { | 5177 | } else { |
5024 | ret = btrfs_add_delayed_data_ref(trans, bytenr, num_bytes, | 5178 | ret = btrfs_add_delayed_data_ref(fs_info, trans, bytenr, |
5025 | parent, root_objectid, owner, | 5179 | num_bytes, |
5026 | offset, BTRFS_DROP_DELAYED_REF, NULL); | 5180 | parent, root_objectid, owner, |
5181 | offset, BTRFS_DROP_DELAYED_REF, | ||
5182 | NULL, for_cow); | ||
5027 | BUG_ON(ret); | 5183 | BUG_ON(ret); |
5028 | } | 5184 | } |
5029 | return ret; | 5185 | return ret; |
@@ -5146,6 +5302,8 @@ static noinline int find_free_extent(struct btrfs_trans_handle *trans, | |||
5146 | ins->objectid = 0; | 5302 | ins->objectid = 0; |
5147 | ins->offset = 0; | 5303 | ins->offset = 0; |
5148 | 5304 | ||
5305 | trace_find_free_extent(orig_root, num_bytes, empty_size, data); | ||
5306 | |||
5149 | space_info = __find_space_info(root->fs_info, data); | 5307 | space_info = __find_space_info(root->fs_info, data); |
5150 | if (!space_info) { | 5308 | if (!space_info) { |
5151 | printk(KERN_ERR "No space info for %llu\n", data); | 5309 | printk(KERN_ERR "No space info for %llu\n", data); |
@@ -5295,15 +5453,6 @@ alloc: | |||
5295 | if (unlikely(block_group->ro)) | 5453 | if (unlikely(block_group->ro)) |
5296 | goto loop; | 5454 | goto loop; |
5297 | 5455 | ||
5298 | spin_lock(&block_group->free_space_ctl->tree_lock); | ||
5299 | if (cached && | ||
5300 | block_group->free_space_ctl->free_space < | ||
5301 | num_bytes + empty_cluster + empty_size) { | ||
5302 | spin_unlock(&block_group->free_space_ctl->tree_lock); | ||
5303 | goto loop; | ||
5304 | } | ||
5305 | spin_unlock(&block_group->free_space_ctl->tree_lock); | ||
5306 | |||
5307 | /* | 5456 | /* |
5308 | * Ok we want to try and use the cluster allocator, so | 5457 | * Ok we want to try and use the cluster allocator, so |
5309 | * lets look there | 5458 | * lets look there |
@@ -5331,6 +5480,8 @@ alloc: | |||
5331 | if (offset) { | 5480 | if (offset) { |
5332 | /* we have a block, we're done */ | 5481 | /* we have a block, we're done */ |
5333 | spin_unlock(&last_ptr->refill_lock); | 5482 | spin_unlock(&last_ptr->refill_lock); |
5483 | trace_btrfs_reserve_extent_cluster(root, | ||
5484 | block_group, search_start, num_bytes); | ||
5334 | goto checks; | 5485 | goto checks; |
5335 | } | 5486 | } |
5336 | 5487 | ||
@@ -5349,8 +5500,15 @@ refill_cluster: | |||
5349 | * plenty of times and not have found | 5500 | * plenty of times and not have found |
5350 | * anything, so we are likely way too | 5501 | * anything, so we are likely way too |
5351 | * fragmented for the clustering stuff to find | 5502 | * fragmented for the clustering stuff to find |
5352 | * anything. */ | 5503 | * anything. |
5353 | if (loop >= LOOP_NO_EMPTY_SIZE) { | 5504 | * |
5505 | * However, if the cluster is taken from the | ||
5506 | * current block group, release the cluster | ||
5507 | * first, so that we stand a better chance of | ||
5508 | * succeeding in the unclustered | ||
5509 | * allocation. */ | ||
5510 | if (loop >= LOOP_NO_EMPTY_SIZE && | ||
5511 | last_ptr->block_group != block_group) { | ||
5354 | spin_unlock(&last_ptr->refill_lock); | 5512 | spin_unlock(&last_ptr->refill_lock); |
5355 | goto unclustered_alloc; | 5513 | goto unclustered_alloc; |
5356 | } | 5514 | } |
@@ -5361,6 +5519,11 @@ refill_cluster: | |||
5361 | */ | 5519 | */ |
5362 | btrfs_return_cluster_to_free_space(NULL, last_ptr); | 5520 | btrfs_return_cluster_to_free_space(NULL, last_ptr); |
5363 | 5521 | ||
5522 | if (loop >= LOOP_NO_EMPTY_SIZE) { | ||
5523 | spin_unlock(&last_ptr->refill_lock); | ||
5524 | goto unclustered_alloc; | ||
5525 | } | ||
5526 | |||
5364 | /* allocate a cluster in this block group */ | 5527 | /* allocate a cluster in this block group */ |
5365 | ret = btrfs_find_space_cluster(trans, root, | 5528 | ret = btrfs_find_space_cluster(trans, root, |
5366 | block_group, last_ptr, | 5529 | block_group, last_ptr, |
@@ -5377,6 +5540,9 @@ refill_cluster: | |||
5377 | if (offset) { | 5540 | if (offset) { |
5378 | /* we found one, proceed */ | 5541 | /* we found one, proceed */ |
5379 | spin_unlock(&last_ptr->refill_lock); | 5542 | spin_unlock(&last_ptr->refill_lock); |
5543 | trace_btrfs_reserve_extent_cluster(root, | ||
5544 | block_group, search_start, | ||
5545 | num_bytes); | ||
5380 | goto checks; | 5546 | goto checks; |
5381 | } | 5547 | } |
5382 | } else if (!cached && loop > LOOP_CACHING_NOWAIT | 5548 | } else if (!cached && loop > LOOP_CACHING_NOWAIT |
@@ -5401,6 +5567,15 @@ refill_cluster: | |||
5401 | } | 5567 | } |
5402 | 5568 | ||
5403 | unclustered_alloc: | 5569 | unclustered_alloc: |
5570 | spin_lock(&block_group->free_space_ctl->tree_lock); | ||
5571 | if (cached && | ||
5572 | block_group->free_space_ctl->free_space < | ||
5573 | num_bytes + empty_cluster + empty_size) { | ||
5574 | spin_unlock(&block_group->free_space_ctl->tree_lock); | ||
5575 | goto loop; | ||
5576 | } | ||
5577 | spin_unlock(&block_group->free_space_ctl->tree_lock); | ||
5578 | |||
5404 | offset = btrfs_find_space_for_alloc(block_group, search_start, | 5579 | offset = btrfs_find_space_for_alloc(block_group, search_start, |
5405 | num_bytes, empty_size); | 5580 | num_bytes, empty_size); |
5406 | /* | 5581 | /* |
@@ -5438,9 +5613,6 @@ checks: | |||
5438 | goto loop; | 5613 | goto loop; |
5439 | } | 5614 | } |
5440 | 5615 | ||
5441 | ins->objectid = search_start; | ||
5442 | ins->offset = num_bytes; | ||
5443 | |||
5444 | if (offset < search_start) | 5616 | if (offset < search_start) |
5445 | btrfs_add_free_space(used_block_group, offset, | 5617 | btrfs_add_free_space(used_block_group, offset, |
5446 | search_start - offset); | 5618 | search_start - offset); |
@@ -5457,6 +5629,8 @@ checks: | |||
5457 | ins->objectid = search_start; | 5629 | ins->objectid = search_start; |
5458 | ins->offset = num_bytes; | 5630 | ins->offset = num_bytes; |
5459 | 5631 | ||
5632 | trace_btrfs_reserve_extent(orig_root, block_group, | ||
5633 | search_start, num_bytes); | ||
5460 | if (offset < search_start) | 5634 | if (offset < search_start) |
5461 | btrfs_add_free_space(used_block_group, offset, | 5635 | btrfs_add_free_space(used_block_group, offset, |
5462 | search_start - offset); | 5636 | search_start - offset); |
@@ -5621,6 +5795,7 @@ int btrfs_reserve_extent(struct btrfs_trans_handle *trans, | |||
5621 | u64 search_end, struct btrfs_key *ins, | 5795 | u64 search_end, struct btrfs_key *ins, |
5622 | u64 data) | 5796 | u64 data) |
5623 | { | 5797 | { |
5798 | bool final_tried = false; | ||
5624 | int ret; | 5799 | int ret; |
5625 | u64 search_start = 0; | 5800 | u64 search_start = 0; |
5626 | 5801 | ||
@@ -5640,22 +5815,25 @@ again: | |||
5640 | search_start, search_end, hint_byte, | 5815 | search_start, search_end, hint_byte, |
5641 | ins, data); | 5816 | ins, data); |
5642 | 5817 | ||
5643 | if (ret == -ENOSPC && num_bytes > min_alloc_size) { | 5818 | if (ret == -ENOSPC) { |
5644 | num_bytes = num_bytes >> 1; | 5819 | if (!final_tried) { |
5645 | num_bytes = num_bytes & ~(root->sectorsize - 1); | 5820 | num_bytes = num_bytes >> 1; |
5646 | num_bytes = max(num_bytes, min_alloc_size); | 5821 | num_bytes = num_bytes & ~(root->sectorsize - 1); |
5647 | do_chunk_alloc(trans, root->fs_info->extent_root, | 5822 | num_bytes = max(num_bytes, min_alloc_size); |
5648 | num_bytes, data, CHUNK_ALLOC_FORCE); | 5823 | do_chunk_alloc(trans, root->fs_info->extent_root, |
5649 | goto again; | 5824 | num_bytes, data, CHUNK_ALLOC_FORCE); |
5650 | } | 5825 | if (num_bytes == min_alloc_size) |
5651 | if (ret == -ENOSPC && btrfs_test_opt(root, ENOSPC_DEBUG)) { | 5826 | final_tried = true; |
5652 | struct btrfs_space_info *sinfo; | 5827 | goto again; |
5653 | 5828 | } else if (btrfs_test_opt(root, ENOSPC_DEBUG)) { | |
5654 | sinfo = __find_space_info(root->fs_info, data); | 5829 | struct btrfs_space_info *sinfo; |
5655 | printk(KERN_ERR "btrfs allocation failed flags %llu, " | 5830 | |
5656 | "wanted %llu\n", (unsigned long long)data, | 5831 | sinfo = __find_space_info(root->fs_info, data); |
5657 | (unsigned long long)num_bytes); | 5832 | printk(KERN_ERR "btrfs allocation failed flags %llu, " |
5658 | dump_space_info(sinfo, num_bytes, 1); | 5833 | "wanted %llu\n", (unsigned long long)data, |
5834 | (unsigned long long)num_bytes); | ||
5835 | dump_space_info(sinfo, num_bytes, 1); | ||
5836 | } | ||
5659 | } | 5837 | } |
5660 | 5838 | ||
5661 | trace_btrfs_reserved_extent_alloc(root, ins->objectid, ins->offset); | 5839 | trace_btrfs_reserved_extent_alloc(root, ins->objectid, ins->offset); |
@@ -5842,9 +6020,10 @@ int btrfs_alloc_reserved_file_extent(struct btrfs_trans_handle *trans, | |||
5842 | 6020 | ||
5843 | BUG_ON(root_objectid == BTRFS_TREE_LOG_OBJECTID); | 6021 | BUG_ON(root_objectid == BTRFS_TREE_LOG_OBJECTID); |
5844 | 6022 | ||
5845 | ret = btrfs_add_delayed_data_ref(trans, ins->objectid, ins->offset, | 6023 | ret = btrfs_add_delayed_data_ref(root->fs_info, trans, ins->objectid, |
5846 | 0, root_objectid, owner, offset, | 6024 | ins->offset, 0, |
5847 | BTRFS_ADD_DELAYED_EXTENT, NULL); | 6025 | root_objectid, owner, offset, |
6026 | BTRFS_ADD_DELAYED_EXTENT, NULL, 0); | ||
5848 | return ret; | 6027 | return ret; |
5849 | } | 6028 | } |
5850 | 6029 | ||
@@ -5997,10 +6176,11 @@ use_block_rsv(struct btrfs_trans_handle *trans, | |||
5997 | return ERR_PTR(-ENOSPC); | 6176 | return ERR_PTR(-ENOSPC); |
5998 | } | 6177 | } |
5999 | 6178 | ||
6000 | static void unuse_block_rsv(struct btrfs_block_rsv *block_rsv, u32 blocksize) | 6179 | static void unuse_block_rsv(struct btrfs_fs_info *fs_info, |
6180 | struct btrfs_block_rsv *block_rsv, u32 blocksize) | ||
6001 | { | 6181 | { |
6002 | block_rsv_add_bytes(block_rsv, blocksize, 0); | 6182 | block_rsv_add_bytes(block_rsv, blocksize, 0); |
6003 | block_rsv_release_bytes(block_rsv, NULL, 0); | 6183 | block_rsv_release_bytes(fs_info, block_rsv, NULL, 0); |
6004 | } | 6184 | } |
6005 | 6185 | ||
6006 | /* | 6186 | /* |
@@ -6014,7 +6194,7 @@ struct extent_buffer *btrfs_alloc_free_block(struct btrfs_trans_handle *trans, | |||
6014 | struct btrfs_root *root, u32 blocksize, | 6194 | struct btrfs_root *root, u32 blocksize, |
6015 | u64 parent, u64 root_objectid, | 6195 | u64 parent, u64 root_objectid, |
6016 | struct btrfs_disk_key *key, int level, | 6196 | struct btrfs_disk_key *key, int level, |
6017 | u64 hint, u64 empty_size) | 6197 | u64 hint, u64 empty_size, int for_cow) |
6018 | { | 6198 | { |
6019 | struct btrfs_key ins; | 6199 | struct btrfs_key ins; |
6020 | struct btrfs_block_rsv *block_rsv; | 6200 | struct btrfs_block_rsv *block_rsv; |
@@ -6030,7 +6210,7 @@ struct extent_buffer *btrfs_alloc_free_block(struct btrfs_trans_handle *trans, | |||
6030 | ret = btrfs_reserve_extent(trans, root, blocksize, blocksize, | 6210 | ret = btrfs_reserve_extent(trans, root, blocksize, blocksize, |
6031 | empty_size, hint, (u64)-1, &ins, 0); | 6211 | empty_size, hint, (u64)-1, &ins, 0); |
6032 | if (ret) { | 6212 | if (ret) { |
6033 | unuse_block_rsv(block_rsv, blocksize); | 6213 | unuse_block_rsv(root->fs_info, block_rsv, blocksize); |
6034 | return ERR_PTR(ret); | 6214 | return ERR_PTR(ret); |
6035 | } | 6215 | } |
6036 | 6216 | ||
@@ -6058,10 +6238,11 @@ struct extent_buffer *btrfs_alloc_free_block(struct btrfs_trans_handle *trans, | |||
6058 | extent_op->update_flags = 1; | 6238 | extent_op->update_flags = 1; |
6059 | extent_op->is_data = 0; | 6239 | extent_op->is_data = 0; |
6060 | 6240 | ||
6061 | ret = btrfs_add_delayed_tree_ref(trans, ins.objectid, | 6241 | ret = btrfs_add_delayed_tree_ref(root->fs_info, trans, |
6242 | ins.objectid, | ||
6062 | ins.offset, parent, root_objectid, | 6243 | ins.offset, parent, root_objectid, |
6063 | level, BTRFS_ADD_DELAYED_EXTENT, | 6244 | level, BTRFS_ADD_DELAYED_EXTENT, |
6064 | extent_op); | 6245 | extent_op, for_cow); |
6065 | BUG_ON(ret); | 6246 | BUG_ON(ret); |
6066 | } | 6247 | } |
6067 | return buf; | 6248 | return buf; |
@@ -6078,6 +6259,7 @@ struct walk_control { | |||
6078 | int keep_locks; | 6259 | int keep_locks; |
6079 | int reada_slot; | 6260 | int reada_slot; |
6080 | int reada_count; | 6261 | int reada_count; |
6262 | int for_reloc; | ||
6081 | }; | 6263 | }; |
6082 | 6264 | ||
6083 | #define DROP_REFERENCE 1 | 6265 | #define DROP_REFERENCE 1 |
@@ -6216,9 +6398,9 @@ static noinline int walk_down_proc(struct btrfs_trans_handle *trans, | |||
6216 | /* wc->stage == UPDATE_BACKREF */ | 6398 | /* wc->stage == UPDATE_BACKREF */ |
6217 | if (!(wc->flags[level] & flag)) { | 6399 | if (!(wc->flags[level] & flag)) { |
6218 | BUG_ON(!path->locks[level]); | 6400 | BUG_ON(!path->locks[level]); |
6219 | ret = btrfs_inc_ref(trans, root, eb, 1); | 6401 | ret = btrfs_inc_ref(trans, root, eb, 1, wc->for_reloc); |
6220 | BUG_ON(ret); | 6402 | BUG_ON(ret); |
6221 | ret = btrfs_dec_ref(trans, root, eb, 0); | 6403 | ret = btrfs_dec_ref(trans, root, eb, 0, wc->for_reloc); |
6222 | BUG_ON(ret); | 6404 | BUG_ON(ret); |
6223 | ret = btrfs_set_disk_extent_flags(trans, root, eb->start, | 6405 | ret = btrfs_set_disk_extent_flags(trans, root, eb->start, |
6224 | eb->len, flag, 0); | 6406 | eb->len, flag, 0); |
@@ -6362,7 +6544,7 @@ skip: | |||
6362 | } | 6544 | } |
6363 | 6545 | ||
6364 | ret = btrfs_free_extent(trans, root, bytenr, blocksize, parent, | 6546 | ret = btrfs_free_extent(trans, root, bytenr, blocksize, parent, |
6365 | root->root_key.objectid, level - 1, 0); | 6547 | root->root_key.objectid, level - 1, 0, 0); |
6366 | BUG_ON(ret); | 6548 | BUG_ON(ret); |
6367 | } | 6549 | } |
6368 | btrfs_tree_unlock(next); | 6550 | btrfs_tree_unlock(next); |
@@ -6436,9 +6618,11 @@ static noinline int walk_up_proc(struct btrfs_trans_handle *trans, | |||
6436 | if (wc->refs[level] == 1) { | 6618 | if (wc->refs[level] == 1) { |
6437 | if (level == 0) { | 6619 | if (level == 0) { |
6438 | if (wc->flags[level] & BTRFS_BLOCK_FLAG_FULL_BACKREF) | 6620 | if (wc->flags[level] & BTRFS_BLOCK_FLAG_FULL_BACKREF) |
6439 | ret = btrfs_dec_ref(trans, root, eb, 1); | 6621 | ret = btrfs_dec_ref(trans, root, eb, 1, |
6622 | wc->for_reloc); | ||
6440 | else | 6623 | else |
6441 | ret = btrfs_dec_ref(trans, root, eb, 0); | 6624 | ret = btrfs_dec_ref(trans, root, eb, 0, |
6625 | wc->for_reloc); | ||
6442 | BUG_ON(ret); | 6626 | BUG_ON(ret); |
6443 | } | 6627 | } |
6444 | /* make block locked assertion in clean_tree_block happy */ | 6628 | /* make block locked assertion in clean_tree_block happy */ |
@@ -6465,7 +6649,7 @@ static noinline int walk_up_proc(struct btrfs_trans_handle *trans, | |||
6465 | btrfs_header_owner(path->nodes[level + 1])); | 6649 | btrfs_header_owner(path->nodes[level + 1])); |
6466 | } | 6650 | } |
6467 | 6651 | ||
6468 | btrfs_free_tree_block(trans, root, eb, parent, wc->refs[level] == 1); | 6652 | btrfs_free_tree_block(trans, root, eb, parent, wc->refs[level] == 1, 0); |
6469 | out: | 6653 | out: |
6470 | wc->refs[level] = 0; | 6654 | wc->refs[level] = 0; |
6471 | wc->flags[level] = 0; | 6655 | wc->flags[level] = 0; |
@@ -6549,7 +6733,8 @@ static noinline int walk_up_tree(struct btrfs_trans_handle *trans, | |||
6549 | * blocks are properly updated. | 6733 | * blocks are properly updated. |
6550 | */ | 6734 | */ |
6551 | void btrfs_drop_snapshot(struct btrfs_root *root, | 6735 | void btrfs_drop_snapshot(struct btrfs_root *root, |
6552 | struct btrfs_block_rsv *block_rsv, int update_ref) | 6736 | struct btrfs_block_rsv *block_rsv, int update_ref, |
6737 | int for_reloc) | ||
6553 | { | 6738 | { |
6554 | struct btrfs_path *path; | 6739 | struct btrfs_path *path; |
6555 | struct btrfs_trans_handle *trans; | 6740 | struct btrfs_trans_handle *trans; |
@@ -6637,6 +6822,7 @@ void btrfs_drop_snapshot(struct btrfs_root *root, | |||
6637 | wc->stage = DROP_REFERENCE; | 6822 | wc->stage = DROP_REFERENCE; |
6638 | wc->update_ref = update_ref; | 6823 | wc->update_ref = update_ref; |
6639 | wc->keep_locks = 0; | 6824 | wc->keep_locks = 0; |
6825 | wc->for_reloc = for_reloc; | ||
6640 | wc->reada_count = BTRFS_NODEPTRS_PER_BLOCK(root); | 6826 | wc->reada_count = BTRFS_NODEPTRS_PER_BLOCK(root); |
6641 | 6827 | ||
6642 | while (1) { | 6828 | while (1) { |
@@ -6721,6 +6907,7 @@ out: | |||
6721 | * drop subtree rooted at tree block 'node'. | 6907 | * drop subtree rooted at tree block 'node'. |
6722 | * | 6908 | * |
6723 | * NOTE: this function will unlock and release tree block 'node' | 6909 | * NOTE: this function will unlock and release tree block 'node' |
6910 | * only used by relocation code | ||
6724 | */ | 6911 | */ |
6725 | int btrfs_drop_subtree(struct btrfs_trans_handle *trans, | 6912 | int btrfs_drop_subtree(struct btrfs_trans_handle *trans, |
6726 | struct btrfs_root *root, | 6913 | struct btrfs_root *root, |
@@ -6765,6 +6952,7 @@ int btrfs_drop_subtree(struct btrfs_trans_handle *trans, | |||
6765 | wc->stage = DROP_REFERENCE; | 6952 | wc->stage = DROP_REFERENCE; |
6766 | wc->update_ref = 0; | 6953 | wc->update_ref = 0; |
6767 | wc->keep_locks = 1; | 6954 | wc->keep_locks = 1; |
6955 | wc->for_reloc = 1; | ||
6768 | wc->reada_count = BTRFS_NODEPTRS_PER_BLOCK(root); | 6956 | wc->reada_count = BTRFS_NODEPTRS_PER_BLOCK(root); |
6769 | 6957 | ||
6770 | while (1) { | 6958 | while (1) { |
@@ -6792,6 +6980,29 @@ static u64 update_block_group_flags(struct btrfs_root *root, u64 flags) | |||
6792 | u64 stripped = BTRFS_BLOCK_GROUP_RAID0 | | 6980 | u64 stripped = BTRFS_BLOCK_GROUP_RAID0 | |
6793 | BTRFS_BLOCK_GROUP_RAID1 | BTRFS_BLOCK_GROUP_RAID10; | 6981 | BTRFS_BLOCK_GROUP_RAID1 | BTRFS_BLOCK_GROUP_RAID10; |
6794 | 6982 | ||
6983 | if (root->fs_info->balance_ctl) { | ||
6984 | struct btrfs_balance_control *bctl = root->fs_info->balance_ctl; | ||
6985 | u64 tgt = 0; | ||
6986 | |||
6987 | /* pick restriper's target profile and return */ | ||
6988 | if (flags & BTRFS_BLOCK_GROUP_DATA && | ||
6989 | bctl->data.flags & BTRFS_BALANCE_ARGS_CONVERT) { | ||
6990 | tgt = BTRFS_BLOCK_GROUP_DATA | bctl->data.target; | ||
6991 | } else if (flags & BTRFS_BLOCK_GROUP_SYSTEM && | ||
6992 | bctl->sys.flags & BTRFS_BALANCE_ARGS_CONVERT) { | ||
6993 | tgt = BTRFS_BLOCK_GROUP_SYSTEM | bctl->sys.target; | ||
6994 | } else if (flags & BTRFS_BLOCK_GROUP_METADATA && | ||
6995 | bctl->meta.flags & BTRFS_BALANCE_ARGS_CONVERT) { | ||
6996 | tgt = BTRFS_BLOCK_GROUP_METADATA | bctl->meta.target; | ||
6997 | } | ||
6998 | |||
6999 | if (tgt) { | ||
7000 | /* extended -> chunk profile */ | ||
7001 | tgt &= ~BTRFS_AVAIL_ALLOC_BIT_SINGLE; | ||
7002 | return tgt; | ||
7003 | } | ||
7004 | } | ||
7005 | |||
6795 | /* | 7006 | /* |
6796 | * we add in the count of missing devices because we want | 7007 | * we add in the count of missing devices because we want |
6797 | * to make sure that any RAID levels on a degraded FS | 7008 | * to make sure that any RAID levels on a degraded FS |
@@ -7085,7 +7296,7 @@ int btrfs_can_relocate(struct btrfs_root *root, u64 bytenr) | |||
7085 | * space to fit our block group in. | 7296 | * space to fit our block group in. |
7086 | */ | 7297 | */ |
7087 | if (device->total_bytes > device->bytes_used + min_free) { | 7298 | if (device->total_bytes > device->bytes_used + min_free) { |
7088 | ret = find_free_dev_extent(NULL, device, min_free, | 7299 | ret = find_free_dev_extent(device, min_free, |
7089 | &dev_offset, NULL); | 7300 | &dev_offset, NULL); |
7090 | if (!ret) | 7301 | if (!ret) |
7091 | dev_nr++; | 7302 | dev_nr++; |
@@ -7447,6 +7658,7 @@ int btrfs_make_block_group(struct btrfs_trans_handle *trans, | |||
7447 | ret = update_space_info(root->fs_info, cache->flags, size, bytes_used, | 7658 | ret = update_space_info(root->fs_info, cache->flags, size, bytes_used, |
7448 | &cache->space_info); | 7659 | &cache->space_info); |
7449 | BUG_ON(ret); | 7660 | BUG_ON(ret); |
7661 | update_global_block_rsv(root->fs_info); | ||
7450 | 7662 | ||
7451 | spin_lock(&cache->space_info->lock); | 7663 | spin_lock(&cache->space_info->lock); |
7452 | cache->space_info->bytes_readonly += cache->bytes_super; | 7664 | cache->space_info->bytes_readonly += cache->bytes_super; |
@@ -7466,6 +7678,22 @@ int btrfs_make_block_group(struct btrfs_trans_handle *trans, | |||
7466 | return 0; | 7678 | return 0; |
7467 | } | 7679 | } |
7468 | 7680 | ||
7681 | static void clear_avail_alloc_bits(struct btrfs_fs_info *fs_info, u64 flags) | ||
7682 | { | ||
7683 | u64 extra_flags = flags & BTRFS_BLOCK_GROUP_PROFILE_MASK; | ||
7684 | |||
7685 | /* chunk -> extended profile */ | ||
7686 | if (extra_flags == 0) | ||
7687 | extra_flags = BTRFS_AVAIL_ALLOC_BIT_SINGLE; | ||
7688 | |||
7689 | if (flags & BTRFS_BLOCK_GROUP_DATA) | ||
7690 | fs_info->avail_data_alloc_bits &= ~extra_flags; | ||
7691 | if (flags & BTRFS_BLOCK_GROUP_METADATA) | ||
7692 | fs_info->avail_metadata_alloc_bits &= ~extra_flags; | ||
7693 | if (flags & BTRFS_BLOCK_GROUP_SYSTEM) | ||
7694 | fs_info->avail_system_alloc_bits &= ~extra_flags; | ||
7695 | } | ||
7696 | |||
7469 | int btrfs_remove_block_group(struct btrfs_trans_handle *trans, | 7697 | int btrfs_remove_block_group(struct btrfs_trans_handle *trans, |
7470 | struct btrfs_root *root, u64 group_start) | 7698 | struct btrfs_root *root, u64 group_start) |
7471 | { | 7699 | { |
@@ -7476,6 +7704,7 @@ int btrfs_remove_block_group(struct btrfs_trans_handle *trans, | |||
7476 | struct btrfs_key key; | 7704 | struct btrfs_key key; |
7477 | struct inode *inode; | 7705 | struct inode *inode; |
7478 | int ret; | 7706 | int ret; |
7707 | int index; | ||
7479 | int factor; | 7708 | int factor; |
7480 | 7709 | ||
7481 | root = root->fs_info->extent_root; | 7710 | root = root->fs_info->extent_root; |
@@ -7491,6 +7720,7 @@ int btrfs_remove_block_group(struct btrfs_trans_handle *trans, | |||
7491 | free_excluded_extents(root, block_group); | 7720 | free_excluded_extents(root, block_group); |
7492 | 7721 | ||
7493 | memcpy(&key, &block_group->key, sizeof(key)); | 7722 | memcpy(&key, &block_group->key, sizeof(key)); |
7723 | index = get_block_group_index(block_group); | ||
7494 | if (block_group->flags & (BTRFS_BLOCK_GROUP_DUP | | 7724 | if (block_group->flags & (BTRFS_BLOCK_GROUP_DUP | |
7495 | BTRFS_BLOCK_GROUP_RAID1 | | 7725 | BTRFS_BLOCK_GROUP_RAID1 | |
7496 | BTRFS_BLOCK_GROUP_RAID10)) | 7726 | BTRFS_BLOCK_GROUP_RAID10)) |
@@ -7565,6 +7795,8 @@ int btrfs_remove_block_group(struct btrfs_trans_handle *trans, | |||
7565 | * are still on the list after taking the semaphore | 7795 | * are still on the list after taking the semaphore |
7566 | */ | 7796 | */ |
7567 | list_del_init(&block_group->list); | 7797 | list_del_init(&block_group->list); |
7798 | if (list_empty(&block_group->space_info->block_groups[index])) | ||
7799 | clear_avail_alloc_bits(root->fs_info, block_group->flags); | ||
7568 | up_write(&block_group->space_info->groups_sem); | 7800 | up_write(&block_group->space_info->groups_sem); |
7569 | 7801 | ||
7570 | if (block_group->cached == BTRFS_CACHE_STARTED) | 7802 | if (block_group->cached == BTRFS_CACHE_STARTED) |
diff --git a/fs/btrfs/extent_io.c b/fs/btrfs/extent_io.c index 49f3c9dc09f4..fcf77e1ded40 100644 --- a/fs/btrfs/extent_io.c +++ b/fs/btrfs/extent_io.c | |||
@@ -18,6 +18,7 @@ | |||
18 | #include "ctree.h" | 18 | #include "ctree.h" |
19 | #include "btrfs_inode.h" | 19 | #include "btrfs_inode.h" |
20 | #include "volumes.h" | 20 | #include "volumes.h" |
21 | #include "check-integrity.h" | ||
21 | 22 | ||
22 | static struct kmem_cache *extent_state_cache; | 23 | static struct kmem_cache *extent_state_cache; |
23 | static struct kmem_cache *extent_buffer_cache; | 24 | static struct kmem_cache *extent_buffer_cache; |
@@ -1895,7 +1896,7 @@ int repair_io_failure(struct btrfs_mapping_tree *map_tree, u64 start, | |||
1895 | } | 1896 | } |
1896 | bio->bi_bdev = dev->bdev; | 1897 | bio->bi_bdev = dev->bdev; |
1897 | bio_add_page(bio, page, length, start-page_offset(page)); | 1898 | bio_add_page(bio, page, length, start-page_offset(page)); |
1898 | submit_bio(WRITE_SYNC, bio); | 1899 | btrfsic_submit_bio(WRITE_SYNC, bio); |
1899 | wait_for_completion(&compl); | 1900 | wait_for_completion(&compl); |
1900 | 1901 | ||
1901 | if (!test_bit(BIO_UPTODATE, &bio->bi_flags)) { | 1902 | if (!test_bit(BIO_UPTODATE, &bio->bi_flags)) { |
@@ -2393,7 +2394,7 @@ static int submit_one_bio(int rw, struct bio *bio, int mirror_num, | |||
2393 | ret = tree->ops->submit_bio_hook(page->mapping->host, rw, bio, | 2394 | ret = tree->ops->submit_bio_hook(page->mapping->host, rw, bio, |
2394 | mirror_num, bio_flags, start); | 2395 | mirror_num, bio_flags, start); |
2395 | else | 2396 | else |
2396 | submit_bio(rw, bio); | 2397 | btrfsic_submit_bio(rw, bio); |
2397 | 2398 | ||
2398 | if (bio_flagged(bio, BIO_EOPNOTSUPP)) | 2399 | if (bio_flagged(bio, BIO_EOPNOTSUPP)) |
2399 | ret = -EOPNOTSUPP; | 2400 | ret = -EOPNOTSUPP; |
@@ -3579,6 +3580,7 @@ static struct extent_buffer *__alloc_extent_buffer(struct extent_io_tree *tree, | |||
3579 | atomic_set(&eb->blocking_writers, 0); | 3580 | atomic_set(&eb->blocking_writers, 0); |
3580 | atomic_set(&eb->spinning_readers, 0); | 3581 | atomic_set(&eb->spinning_readers, 0); |
3581 | atomic_set(&eb->spinning_writers, 0); | 3582 | atomic_set(&eb->spinning_writers, 0); |
3583 | eb->lock_nested = 0; | ||
3582 | init_waitqueue_head(&eb->write_lock_wq); | 3584 | init_waitqueue_head(&eb->write_lock_wq); |
3583 | init_waitqueue_head(&eb->read_lock_wq); | 3585 | init_waitqueue_head(&eb->read_lock_wq); |
3584 | 3586 | ||
@@ -3907,6 +3909,8 @@ int extent_range_uptodate(struct extent_io_tree *tree, | |||
3907 | while (start <= end) { | 3909 | while (start <= end) { |
3908 | index = start >> PAGE_CACHE_SHIFT; | 3910 | index = start >> PAGE_CACHE_SHIFT; |
3909 | page = find_get_page(tree->mapping, index); | 3911 | page = find_get_page(tree->mapping, index); |
3912 | if (!page) | ||
3913 | return 1; | ||
3910 | uptodate = PageUptodate(page); | 3914 | uptodate = PageUptodate(page); |
3911 | page_cache_release(page); | 3915 | page_cache_release(page); |
3912 | if (!uptodate) { | 3916 | if (!uptodate) { |
diff --git a/fs/btrfs/extent_io.h b/fs/btrfs/extent_io.h index 7604c3001322..bc6a042cb6fc 100644 --- a/fs/btrfs/extent_io.h +++ b/fs/btrfs/extent_io.h | |||
@@ -129,6 +129,7 @@ struct extent_buffer { | |||
129 | struct list_head leak_list; | 129 | struct list_head leak_list; |
130 | struct rcu_head rcu_head; | 130 | struct rcu_head rcu_head; |
131 | atomic_t refs; | 131 | atomic_t refs; |
132 | pid_t lock_owner; | ||
132 | 133 | ||
133 | /* count of read lock holders on the extent buffer */ | 134 | /* count of read lock holders on the extent buffer */ |
134 | atomic_t write_locks; | 135 | atomic_t write_locks; |
@@ -137,6 +138,7 @@ struct extent_buffer { | |||
137 | atomic_t blocking_readers; | 138 | atomic_t blocking_readers; |
138 | atomic_t spinning_readers; | 139 | atomic_t spinning_readers; |
139 | atomic_t spinning_writers; | 140 | atomic_t spinning_writers; |
141 | int lock_nested; | ||
140 | 142 | ||
141 | /* protects write locks */ | 143 | /* protects write locks */ |
142 | rwlock_t lock; | 144 | rwlock_t lock; |
diff --git a/fs/btrfs/file.c b/fs/btrfs/file.c index 97fbe939c050..859ba2dd8890 100644 --- a/fs/btrfs/file.c +++ b/fs/btrfs/file.c | |||
@@ -678,7 +678,7 @@ next_slot: | |||
678 | disk_bytenr, num_bytes, 0, | 678 | disk_bytenr, num_bytes, 0, |
679 | root->root_key.objectid, | 679 | root->root_key.objectid, |
680 | new_key.objectid, | 680 | new_key.objectid, |
681 | start - extent_offset); | 681 | start - extent_offset, 0); |
682 | BUG_ON(ret); | 682 | BUG_ON(ret); |
683 | *hint_byte = disk_bytenr; | 683 | *hint_byte = disk_bytenr; |
684 | } | 684 | } |
@@ -753,7 +753,7 @@ next_slot: | |||
753 | disk_bytenr, num_bytes, 0, | 753 | disk_bytenr, num_bytes, 0, |
754 | root->root_key.objectid, | 754 | root->root_key.objectid, |
755 | key.objectid, key.offset - | 755 | key.objectid, key.offset - |
756 | extent_offset); | 756 | extent_offset, 0); |
757 | BUG_ON(ret); | 757 | BUG_ON(ret); |
758 | inode_sub_bytes(inode, | 758 | inode_sub_bytes(inode, |
759 | extent_end - key.offset); | 759 | extent_end - key.offset); |
@@ -962,7 +962,7 @@ again: | |||
962 | 962 | ||
963 | ret = btrfs_inc_extent_ref(trans, root, bytenr, num_bytes, 0, | 963 | ret = btrfs_inc_extent_ref(trans, root, bytenr, num_bytes, 0, |
964 | root->root_key.objectid, | 964 | root->root_key.objectid, |
965 | ino, orig_offset); | 965 | ino, orig_offset, 0); |
966 | BUG_ON(ret); | 966 | BUG_ON(ret); |
967 | 967 | ||
968 | if (split == start) { | 968 | if (split == start) { |
@@ -989,7 +989,7 @@ again: | |||
989 | del_nr++; | 989 | del_nr++; |
990 | ret = btrfs_free_extent(trans, root, bytenr, num_bytes, | 990 | ret = btrfs_free_extent(trans, root, bytenr, num_bytes, |
991 | 0, root->root_key.objectid, | 991 | 0, root->root_key.objectid, |
992 | ino, orig_offset); | 992 | ino, orig_offset, 0); |
993 | BUG_ON(ret); | 993 | BUG_ON(ret); |
994 | } | 994 | } |
995 | other_start = 0; | 995 | other_start = 0; |
@@ -1006,7 +1006,7 @@ again: | |||
1006 | del_nr++; | 1006 | del_nr++; |
1007 | ret = btrfs_free_extent(trans, root, bytenr, num_bytes, | 1007 | ret = btrfs_free_extent(trans, root, bytenr, num_bytes, |
1008 | 0, root->root_key.objectid, | 1008 | 0, root->root_key.objectid, |
1009 | ino, orig_offset); | 1009 | ino, orig_offset, 0); |
1010 | BUG_ON(ret); | 1010 | BUG_ON(ret); |
1011 | } | 1011 | } |
1012 | if (del_nr == 0) { | 1012 | if (del_nr == 0) { |
@@ -1081,7 +1081,7 @@ static noinline int prepare_pages(struct btrfs_root *root, struct file *file, | |||
1081 | again: | 1081 | again: |
1082 | for (i = 0; i < num_pages; i++) { | 1082 | for (i = 0; i < num_pages; i++) { |
1083 | pages[i] = find_or_create_page(inode->i_mapping, index + i, | 1083 | pages[i] = find_or_create_page(inode->i_mapping, index + i, |
1084 | mask); | 1084 | mask | __GFP_WRITE); |
1085 | if (!pages[i]) { | 1085 | if (!pages[i]) { |
1086 | faili = i - 1; | 1086 | faili = i - 1; |
1087 | err = -ENOMEM; | 1087 | err = -ENOMEM; |
@@ -1136,7 +1136,8 @@ again: | |||
1136 | GFP_NOFS); | 1136 | GFP_NOFS); |
1137 | } | 1137 | } |
1138 | for (i = 0; i < num_pages; i++) { | 1138 | for (i = 0; i < num_pages; i++) { |
1139 | clear_page_dirty_for_io(pages[i]); | 1139 | if (clear_page_dirty_for_io(pages[i])) |
1140 | account_page_redirty(pages[i]); | ||
1140 | set_page_extent_mapped(pages[i]); | 1141 | set_page_extent_mapped(pages[i]); |
1141 | WARN_ON(!PageLocked(pages[i])); | 1142 | WARN_ON(!PageLocked(pages[i])); |
1142 | } | 1143 | } |
@@ -1273,7 +1274,6 @@ static noinline ssize_t __btrfs_buffered_write(struct file *file, | |||
1273 | dirty_pages); | 1274 | dirty_pages); |
1274 | if (dirty_pages < (root->leafsize >> PAGE_CACHE_SHIFT) + 1) | 1275 | if (dirty_pages < (root->leafsize >> PAGE_CACHE_SHIFT) + 1) |
1275 | btrfs_btree_balance_dirty(root, 1); | 1276 | btrfs_btree_balance_dirty(root, 1); |
1276 | btrfs_throttle(root); | ||
1277 | 1277 | ||
1278 | pos += copied; | 1278 | pos += copied; |
1279 | num_written += copied; | 1279 | num_written += copied; |
diff --git a/fs/btrfs/free-space-cache.c b/fs/btrfs/free-space-cache.c index 9a897bf79538..c2f20594c9f7 100644 --- a/fs/btrfs/free-space-cache.c +++ b/fs/btrfs/free-space-cache.c | |||
@@ -319,9 +319,11 @@ static void io_ctl_drop_pages(struct io_ctl *io_ctl) | |||
319 | io_ctl_unmap_page(io_ctl); | 319 | io_ctl_unmap_page(io_ctl); |
320 | 320 | ||
321 | for (i = 0; i < io_ctl->num_pages; i++) { | 321 | for (i = 0; i < io_ctl->num_pages; i++) { |
322 | ClearPageChecked(io_ctl->pages[i]); | 322 | if (io_ctl->pages[i]) { |
323 | unlock_page(io_ctl->pages[i]); | 323 | ClearPageChecked(io_ctl->pages[i]); |
324 | page_cache_release(io_ctl->pages[i]); | 324 | unlock_page(io_ctl->pages[i]); |
325 | page_cache_release(io_ctl->pages[i]); | ||
326 | } | ||
325 | } | 327 | } |
326 | } | 328 | } |
327 | 329 | ||
@@ -635,7 +637,10 @@ int __load_free_space_cache(struct btrfs_root *root, struct inode *inode, | |||
635 | if (!num_entries) | 637 | if (!num_entries) |
636 | return 0; | 638 | return 0; |
637 | 639 | ||
638 | io_ctl_init(&io_ctl, inode, root); | 640 | ret = io_ctl_init(&io_ctl, inode, root); |
641 | if (ret) | ||
642 | return ret; | ||
643 | |||
639 | ret = readahead_cache(inode); | 644 | ret = readahead_cache(inode); |
640 | if (ret) | 645 | if (ret) |
641 | goto out; | 646 | goto out; |
@@ -838,7 +843,7 @@ int __btrfs_write_out_cache(struct btrfs_root *root, struct inode *inode, | |||
838 | struct io_ctl io_ctl; | 843 | struct io_ctl io_ctl; |
839 | struct list_head bitmap_list; | 844 | struct list_head bitmap_list; |
840 | struct btrfs_key key; | 845 | struct btrfs_key key; |
841 | u64 start, end, len; | 846 | u64 start, extent_start, extent_end, len; |
842 | int entries = 0; | 847 | int entries = 0; |
843 | int bitmaps = 0; | 848 | int bitmaps = 0; |
844 | int ret; | 849 | int ret; |
@@ -849,7 +854,9 @@ int __btrfs_write_out_cache(struct btrfs_root *root, struct inode *inode, | |||
849 | if (!i_size_read(inode)) | 854 | if (!i_size_read(inode)) |
850 | return -1; | 855 | return -1; |
851 | 856 | ||
852 | io_ctl_init(&io_ctl, inode, root); | 857 | ret = io_ctl_init(&io_ctl, inode, root); |
858 | if (ret) | ||
859 | return -1; | ||
853 | 860 | ||
854 | /* Get the cluster for this block_group if it exists */ | 861 | /* Get the cluster for this block_group if it exists */ |
855 | if (block_group && !list_empty(&block_group->cluster_list)) | 862 | if (block_group && !list_empty(&block_group->cluster_list)) |
@@ -857,25 +864,12 @@ int __btrfs_write_out_cache(struct btrfs_root *root, struct inode *inode, | |||
857 | struct btrfs_free_cluster, | 864 | struct btrfs_free_cluster, |
858 | block_group_list); | 865 | block_group_list); |
859 | 866 | ||
860 | /* | ||
861 | * We shouldn't have switched the pinned extents yet so this is the | ||
862 | * right one | ||
863 | */ | ||
864 | unpin = root->fs_info->pinned_extents; | ||
865 | |||
866 | /* Lock all pages first so we can lock the extent safely. */ | 867 | /* Lock all pages first so we can lock the extent safely. */ |
867 | io_ctl_prepare_pages(&io_ctl, inode, 0); | 868 | io_ctl_prepare_pages(&io_ctl, inode, 0); |
868 | 869 | ||
869 | lock_extent_bits(&BTRFS_I(inode)->io_tree, 0, i_size_read(inode) - 1, | 870 | lock_extent_bits(&BTRFS_I(inode)->io_tree, 0, i_size_read(inode) - 1, |
870 | 0, &cached_state, GFP_NOFS); | 871 | 0, &cached_state, GFP_NOFS); |
871 | 872 | ||
872 | /* | ||
873 | * When searching for pinned extents, we need to start at our start | ||
874 | * offset. | ||
875 | */ | ||
876 | if (block_group) | ||
877 | start = block_group->key.objectid; | ||
878 | |||
879 | node = rb_first(&ctl->free_space_offset); | 873 | node = rb_first(&ctl->free_space_offset); |
880 | if (!node && cluster) { | 874 | if (!node && cluster) { |
881 | node = rb_first(&cluster->root); | 875 | node = rb_first(&cluster->root); |
@@ -918,9 +912,20 @@ int __btrfs_write_out_cache(struct btrfs_root *root, struct inode *inode, | |||
918 | * We want to add any pinned extents to our free space cache | 912 | * We want to add any pinned extents to our free space cache |
919 | * so we don't leak the space | 913 | * so we don't leak the space |
920 | */ | 914 | */ |
915 | |||
916 | /* | ||
917 | * We shouldn't have switched the pinned extents yet so this is the | ||
918 | * right one | ||
919 | */ | ||
920 | unpin = root->fs_info->pinned_extents; | ||
921 | |||
922 | if (block_group) | ||
923 | start = block_group->key.objectid; | ||
924 | |||
921 | while (block_group && (start < block_group->key.objectid + | 925 | while (block_group && (start < block_group->key.objectid + |
922 | block_group->key.offset)) { | 926 | block_group->key.offset)) { |
923 | ret = find_first_extent_bit(unpin, start, &start, &end, | 927 | ret = find_first_extent_bit(unpin, start, |
928 | &extent_start, &extent_end, | ||
924 | EXTENT_DIRTY); | 929 | EXTENT_DIRTY); |
925 | if (ret) { | 930 | if (ret) { |
926 | ret = 0; | 931 | ret = 0; |
@@ -928,20 +933,21 @@ int __btrfs_write_out_cache(struct btrfs_root *root, struct inode *inode, | |||
928 | } | 933 | } |
929 | 934 | ||
930 | /* This pinned extent is out of our range */ | 935 | /* This pinned extent is out of our range */ |
931 | if (start >= block_group->key.objectid + | 936 | if (extent_start >= block_group->key.objectid + |
932 | block_group->key.offset) | 937 | block_group->key.offset) |
933 | break; | 938 | break; |
934 | 939 | ||
935 | len = block_group->key.objectid + | 940 | extent_start = max(extent_start, start); |
936 | block_group->key.offset - start; | 941 | extent_end = min(block_group->key.objectid + |
937 | len = min(len, end + 1 - start); | 942 | block_group->key.offset, extent_end + 1); |
943 | len = extent_end - extent_start; | ||
938 | 944 | ||
939 | entries++; | 945 | entries++; |
940 | ret = io_ctl_add_entry(&io_ctl, start, len, NULL); | 946 | ret = io_ctl_add_entry(&io_ctl, extent_start, len, NULL); |
941 | if (ret) | 947 | if (ret) |
942 | goto out_nospc; | 948 | goto out_nospc; |
943 | 949 | ||
944 | start = end + 1; | 950 | start = extent_end; |
945 | } | 951 | } |
946 | 952 | ||
947 | /* Write out the bitmaps */ | 953 | /* Write out the bitmaps */ |
@@ -2236,7 +2242,7 @@ u64 btrfs_alloc_from_cluster(struct btrfs_block_group_cache *block_group, | |||
2236 | if (entry->bitmap) { | 2242 | if (entry->bitmap) { |
2237 | ret = btrfs_alloc_from_bitmap(block_group, | 2243 | ret = btrfs_alloc_from_bitmap(block_group, |
2238 | cluster, entry, bytes, | 2244 | cluster, entry, bytes, |
2239 | min_start); | 2245 | cluster->window_start); |
2240 | if (ret == 0) { | 2246 | if (ret == 0) { |
2241 | node = rb_next(&entry->offset_index); | 2247 | node = rb_next(&entry->offset_index); |
2242 | if (!node) | 2248 | if (!node) |
@@ -2245,6 +2251,7 @@ u64 btrfs_alloc_from_cluster(struct btrfs_block_group_cache *block_group, | |||
2245 | offset_index); | 2251 | offset_index); |
2246 | continue; | 2252 | continue; |
2247 | } | 2253 | } |
2254 | cluster->window_start += bytes; | ||
2248 | } else { | 2255 | } else { |
2249 | ret = entry->offset; | 2256 | ret = entry->offset; |
2250 | 2257 | ||
@@ -2283,23 +2290,23 @@ out: | |||
2283 | static int btrfs_bitmap_cluster(struct btrfs_block_group_cache *block_group, | 2290 | static int btrfs_bitmap_cluster(struct btrfs_block_group_cache *block_group, |
2284 | struct btrfs_free_space *entry, | 2291 | struct btrfs_free_space *entry, |
2285 | struct btrfs_free_cluster *cluster, | 2292 | struct btrfs_free_cluster *cluster, |
2286 | u64 offset, u64 bytes, u64 min_bytes) | 2293 | u64 offset, u64 bytes, |
2294 | u64 cont1_bytes, u64 min_bytes) | ||
2287 | { | 2295 | { |
2288 | struct btrfs_free_space_ctl *ctl = block_group->free_space_ctl; | 2296 | struct btrfs_free_space_ctl *ctl = block_group->free_space_ctl; |
2289 | unsigned long next_zero; | 2297 | unsigned long next_zero; |
2290 | unsigned long i; | 2298 | unsigned long i; |
2291 | unsigned long search_bits; | 2299 | unsigned long want_bits; |
2292 | unsigned long total_bits; | 2300 | unsigned long min_bits; |
2293 | unsigned long found_bits; | 2301 | unsigned long found_bits; |
2294 | unsigned long start = 0; | 2302 | unsigned long start = 0; |
2295 | unsigned long total_found = 0; | 2303 | unsigned long total_found = 0; |
2296 | int ret; | 2304 | int ret; |
2297 | bool found = false; | ||
2298 | 2305 | ||
2299 | i = offset_to_bit(entry->offset, block_group->sectorsize, | 2306 | i = offset_to_bit(entry->offset, block_group->sectorsize, |
2300 | max_t(u64, offset, entry->offset)); | 2307 | max_t(u64, offset, entry->offset)); |
2301 | search_bits = bytes_to_bits(bytes, block_group->sectorsize); | 2308 | want_bits = bytes_to_bits(bytes, block_group->sectorsize); |
2302 | total_bits = bytes_to_bits(min_bytes, block_group->sectorsize); | 2309 | min_bits = bytes_to_bits(min_bytes, block_group->sectorsize); |
2303 | 2310 | ||
2304 | again: | 2311 | again: |
2305 | found_bits = 0; | 2312 | found_bits = 0; |
@@ -2308,7 +2315,7 @@ again: | |||
2308 | i = find_next_bit(entry->bitmap, BITS_PER_BITMAP, i + 1)) { | 2315 | i = find_next_bit(entry->bitmap, BITS_PER_BITMAP, i + 1)) { |
2309 | next_zero = find_next_zero_bit(entry->bitmap, | 2316 | next_zero = find_next_zero_bit(entry->bitmap, |
2310 | BITS_PER_BITMAP, i); | 2317 | BITS_PER_BITMAP, i); |
2311 | if (next_zero - i >= search_bits) { | 2318 | if (next_zero - i >= min_bits) { |
2312 | found_bits = next_zero - i; | 2319 | found_bits = next_zero - i; |
2313 | break; | 2320 | break; |
2314 | } | 2321 | } |
@@ -2318,10 +2325,9 @@ again: | |||
2318 | if (!found_bits) | 2325 | if (!found_bits) |
2319 | return -ENOSPC; | 2326 | return -ENOSPC; |
2320 | 2327 | ||
2321 | if (!found) { | 2328 | if (!total_found) { |
2322 | start = i; | 2329 | start = i; |
2323 | cluster->max_size = 0; | 2330 | cluster->max_size = 0; |
2324 | found = true; | ||
2325 | } | 2331 | } |
2326 | 2332 | ||
2327 | total_found += found_bits; | 2333 | total_found += found_bits; |
@@ -2329,13 +2335,8 @@ again: | |||
2329 | if (cluster->max_size < found_bits * block_group->sectorsize) | 2335 | if (cluster->max_size < found_bits * block_group->sectorsize) |
2330 | cluster->max_size = found_bits * block_group->sectorsize; | 2336 | cluster->max_size = found_bits * block_group->sectorsize; |
2331 | 2337 | ||
2332 | if (total_found < total_bits) { | 2338 | if (total_found < want_bits || cluster->max_size < cont1_bytes) { |
2333 | i = find_next_bit(entry->bitmap, BITS_PER_BITMAP, next_zero); | 2339 | i = next_zero + 1; |
2334 | if (i - start > total_bits * 2) { | ||
2335 | total_found = 0; | ||
2336 | cluster->max_size = 0; | ||
2337 | found = false; | ||
2338 | } | ||
2339 | goto again; | 2340 | goto again; |
2340 | } | 2341 | } |
2341 | 2342 | ||
@@ -2346,28 +2347,31 @@ again: | |||
2346 | &entry->offset_index, 1); | 2347 | &entry->offset_index, 1); |
2347 | BUG_ON(ret); | 2348 | BUG_ON(ret); |
2348 | 2349 | ||
2350 | trace_btrfs_setup_cluster(block_group, cluster, | ||
2351 | total_found * block_group->sectorsize, 1); | ||
2349 | return 0; | 2352 | return 0; |
2350 | } | 2353 | } |
2351 | 2354 | ||
2352 | /* | 2355 | /* |
2353 | * This searches the block group for just extents to fill the cluster with. | 2356 | * This searches the block group for just extents to fill the cluster with. |
2357 | * Try to find a cluster with at least bytes total bytes, at least one | ||
2358 | * extent of cont1_bytes, and other clusters of at least min_bytes. | ||
2354 | */ | 2359 | */ |
2355 | static noinline int | 2360 | static noinline int |
2356 | setup_cluster_no_bitmap(struct btrfs_block_group_cache *block_group, | 2361 | setup_cluster_no_bitmap(struct btrfs_block_group_cache *block_group, |
2357 | struct btrfs_free_cluster *cluster, | 2362 | struct btrfs_free_cluster *cluster, |
2358 | struct list_head *bitmaps, u64 offset, u64 bytes, | 2363 | struct list_head *bitmaps, u64 offset, u64 bytes, |
2359 | u64 min_bytes) | 2364 | u64 cont1_bytes, u64 min_bytes) |
2360 | { | 2365 | { |
2361 | struct btrfs_free_space_ctl *ctl = block_group->free_space_ctl; | 2366 | struct btrfs_free_space_ctl *ctl = block_group->free_space_ctl; |
2362 | struct btrfs_free_space *first = NULL; | 2367 | struct btrfs_free_space *first = NULL; |
2363 | struct btrfs_free_space *entry = NULL; | 2368 | struct btrfs_free_space *entry = NULL; |
2364 | struct btrfs_free_space *prev = NULL; | ||
2365 | struct btrfs_free_space *last; | 2369 | struct btrfs_free_space *last; |
2366 | struct rb_node *node; | 2370 | struct rb_node *node; |
2367 | u64 window_start; | 2371 | u64 window_start; |
2368 | u64 window_free; | 2372 | u64 window_free; |
2369 | u64 max_extent; | 2373 | u64 max_extent; |
2370 | u64 max_gap = 128 * 1024; | 2374 | u64 total_size = 0; |
2371 | 2375 | ||
2372 | entry = tree_search_offset(ctl, offset, 0, 1); | 2376 | entry = tree_search_offset(ctl, offset, 0, 1); |
2373 | if (!entry) | 2377 | if (!entry) |
@@ -2377,8 +2381,8 @@ setup_cluster_no_bitmap(struct btrfs_block_group_cache *block_group, | |||
2377 | * We don't want bitmaps, so just move along until we find a normal | 2381 | * We don't want bitmaps, so just move along until we find a normal |
2378 | * extent entry. | 2382 | * extent entry. |
2379 | */ | 2383 | */ |
2380 | while (entry->bitmap) { | 2384 | while (entry->bitmap || entry->bytes < min_bytes) { |
2381 | if (list_empty(&entry->list)) | 2385 | if (entry->bitmap && list_empty(&entry->list)) |
2382 | list_add_tail(&entry->list, bitmaps); | 2386 | list_add_tail(&entry->list, bitmaps); |
2383 | node = rb_next(&entry->offset_index); | 2387 | node = rb_next(&entry->offset_index); |
2384 | if (!node) | 2388 | if (!node) |
@@ -2391,12 +2395,9 @@ setup_cluster_no_bitmap(struct btrfs_block_group_cache *block_group, | |||
2391 | max_extent = entry->bytes; | 2395 | max_extent = entry->bytes; |
2392 | first = entry; | 2396 | first = entry; |
2393 | last = entry; | 2397 | last = entry; |
2394 | prev = entry; | ||
2395 | 2398 | ||
2396 | while (window_free <= min_bytes) { | 2399 | for (node = rb_next(&entry->offset_index); node; |
2397 | node = rb_next(&entry->offset_index); | 2400 | node = rb_next(&entry->offset_index)) { |
2398 | if (!node) | ||
2399 | return -ENOSPC; | ||
2400 | entry = rb_entry(node, struct btrfs_free_space, offset_index); | 2401 | entry = rb_entry(node, struct btrfs_free_space, offset_index); |
2401 | 2402 | ||
2402 | if (entry->bitmap) { | 2403 | if (entry->bitmap) { |
@@ -2405,26 +2406,18 @@ setup_cluster_no_bitmap(struct btrfs_block_group_cache *block_group, | |||
2405 | continue; | 2406 | continue; |
2406 | } | 2407 | } |
2407 | 2408 | ||
2408 | /* | 2409 | if (entry->bytes < min_bytes) |
2409 | * we haven't filled the empty size and the window is | 2410 | continue; |
2410 | * very large. reset and try again | 2411 | |
2411 | */ | 2412 | last = entry; |
2412 | if (entry->offset - (prev->offset + prev->bytes) > max_gap || | 2413 | window_free += entry->bytes; |
2413 | entry->offset - window_start > (min_bytes * 2)) { | 2414 | if (entry->bytes > max_extent) |
2414 | first = entry; | ||
2415 | window_start = entry->offset; | ||
2416 | window_free = entry->bytes; | ||
2417 | last = entry; | ||
2418 | max_extent = entry->bytes; | 2415 | max_extent = entry->bytes; |
2419 | } else { | ||
2420 | last = entry; | ||
2421 | window_free += entry->bytes; | ||
2422 | if (entry->bytes > max_extent) | ||
2423 | max_extent = entry->bytes; | ||
2424 | } | ||
2425 | prev = entry; | ||
2426 | } | 2416 | } |
2427 | 2417 | ||
2418 | if (window_free < bytes || max_extent < cont1_bytes) | ||
2419 | return -ENOSPC; | ||
2420 | |||
2428 | cluster->window_start = first->offset; | 2421 | cluster->window_start = first->offset; |
2429 | 2422 | ||
2430 | node = &first->offset_index; | 2423 | node = &first->offset_index; |
@@ -2438,17 +2431,18 @@ setup_cluster_no_bitmap(struct btrfs_block_group_cache *block_group, | |||
2438 | 2431 | ||
2439 | entry = rb_entry(node, struct btrfs_free_space, offset_index); | 2432 | entry = rb_entry(node, struct btrfs_free_space, offset_index); |
2440 | node = rb_next(&entry->offset_index); | 2433 | node = rb_next(&entry->offset_index); |
2441 | if (entry->bitmap) | 2434 | if (entry->bitmap || entry->bytes < min_bytes) |
2442 | continue; | 2435 | continue; |
2443 | 2436 | ||
2444 | rb_erase(&entry->offset_index, &ctl->free_space_offset); | 2437 | rb_erase(&entry->offset_index, &ctl->free_space_offset); |
2445 | ret = tree_insert_offset(&cluster->root, entry->offset, | 2438 | ret = tree_insert_offset(&cluster->root, entry->offset, |
2446 | &entry->offset_index, 0); | 2439 | &entry->offset_index, 0); |
2440 | total_size += entry->bytes; | ||
2447 | BUG_ON(ret); | 2441 | BUG_ON(ret); |
2448 | } while (node && entry != last); | 2442 | } while (node && entry != last); |
2449 | 2443 | ||
2450 | cluster->max_size = max_extent; | 2444 | cluster->max_size = max_extent; |
2451 | 2445 | trace_btrfs_setup_cluster(block_group, cluster, total_size, 0); | |
2452 | return 0; | 2446 | return 0; |
2453 | } | 2447 | } |
2454 | 2448 | ||
@@ -2460,7 +2454,7 @@ static noinline int | |||
2460 | setup_cluster_bitmap(struct btrfs_block_group_cache *block_group, | 2454 | setup_cluster_bitmap(struct btrfs_block_group_cache *block_group, |
2461 | struct btrfs_free_cluster *cluster, | 2455 | struct btrfs_free_cluster *cluster, |
2462 | struct list_head *bitmaps, u64 offset, u64 bytes, | 2456 | struct list_head *bitmaps, u64 offset, u64 bytes, |
2463 | u64 min_bytes) | 2457 | u64 cont1_bytes, u64 min_bytes) |
2464 | { | 2458 | { |
2465 | struct btrfs_free_space_ctl *ctl = block_group->free_space_ctl; | 2459 | struct btrfs_free_space_ctl *ctl = block_group->free_space_ctl; |
2466 | struct btrfs_free_space *entry; | 2460 | struct btrfs_free_space *entry; |
@@ -2482,10 +2476,10 @@ setup_cluster_bitmap(struct btrfs_block_group_cache *block_group, | |||
2482 | } | 2476 | } |
2483 | 2477 | ||
2484 | list_for_each_entry(entry, bitmaps, list) { | 2478 | list_for_each_entry(entry, bitmaps, list) { |
2485 | if (entry->bytes < min_bytes) | 2479 | if (entry->bytes < bytes) |
2486 | continue; | 2480 | continue; |
2487 | ret = btrfs_bitmap_cluster(block_group, entry, cluster, offset, | 2481 | ret = btrfs_bitmap_cluster(block_group, entry, cluster, offset, |
2488 | bytes, min_bytes); | 2482 | bytes, cont1_bytes, min_bytes); |
2489 | if (!ret) | 2483 | if (!ret) |
2490 | return 0; | 2484 | return 0; |
2491 | } | 2485 | } |
@@ -2499,7 +2493,7 @@ setup_cluster_bitmap(struct btrfs_block_group_cache *block_group, | |||
2499 | 2493 | ||
2500 | /* | 2494 | /* |
2501 | * here we try to find a cluster of blocks in a block group. The goal | 2495 | * here we try to find a cluster of blocks in a block group. The goal |
2502 | * is to find at least bytes free and up to empty_size + bytes free. | 2496 | * is to find at least bytes+empty_size. |
2503 | * We might not find them all in one contiguous area. | 2497 | * We might not find them all in one contiguous area. |
2504 | * | 2498 | * |
2505 | * returns zero and sets up cluster if things worked out, otherwise | 2499 | * returns zero and sets up cluster if things worked out, otherwise |
@@ -2515,23 +2509,24 @@ int btrfs_find_space_cluster(struct btrfs_trans_handle *trans, | |||
2515 | struct btrfs_free_space *entry, *tmp; | 2509 | struct btrfs_free_space *entry, *tmp; |
2516 | LIST_HEAD(bitmaps); | 2510 | LIST_HEAD(bitmaps); |
2517 | u64 min_bytes; | 2511 | u64 min_bytes; |
2512 | u64 cont1_bytes; | ||
2518 | int ret; | 2513 | int ret; |
2519 | 2514 | ||
2520 | /* for metadata, allow allocates with more holes */ | 2515 | /* |
2516 | * Choose the minimum extent size we'll require for this | ||
2517 | * cluster. For SSD_SPREAD, don't allow any fragmentation. | ||
2518 | * For metadata, allow allocates with smaller extents. For | ||
2519 | * data, keep it dense. | ||
2520 | */ | ||
2521 | if (btrfs_test_opt(root, SSD_SPREAD)) { | 2521 | if (btrfs_test_opt(root, SSD_SPREAD)) { |
2522 | min_bytes = bytes + empty_size; | 2522 | cont1_bytes = min_bytes = bytes + empty_size; |
2523 | } else if (block_group->flags & BTRFS_BLOCK_GROUP_METADATA) { | 2523 | } else if (block_group->flags & BTRFS_BLOCK_GROUP_METADATA) { |
2524 | /* | 2524 | cont1_bytes = bytes; |
2525 | * we want to do larger allocations when we are | 2525 | min_bytes = block_group->sectorsize; |
2526 | * flushing out the delayed refs, it helps prevent | 2526 | } else { |
2527 | * making more work as we go along. | 2527 | cont1_bytes = max(bytes, (bytes + empty_size) >> 2); |
2528 | */ | 2528 | min_bytes = block_group->sectorsize; |
2529 | if (trans->transaction->delayed_refs.flushing) | 2529 | } |
2530 | min_bytes = max(bytes, (bytes + empty_size) >> 1); | ||
2531 | else | ||
2532 | min_bytes = max(bytes, (bytes + empty_size) >> 4); | ||
2533 | } else | ||
2534 | min_bytes = max(bytes, (bytes + empty_size) >> 2); | ||
2535 | 2530 | ||
2536 | spin_lock(&ctl->tree_lock); | 2531 | spin_lock(&ctl->tree_lock); |
2537 | 2532 | ||
@@ -2539,7 +2534,7 @@ int btrfs_find_space_cluster(struct btrfs_trans_handle *trans, | |||
2539 | * If we know we don't have enough space to make a cluster don't even | 2534 | * If we know we don't have enough space to make a cluster don't even |
2540 | * bother doing all the work to try and find one. | 2535 | * bother doing all the work to try and find one. |
2541 | */ | 2536 | */ |
2542 | if (ctl->free_space < min_bytes) { | 2537 | if (ctl->free_space < bytes) { |
2543 | spin_unlock(&ctl->tree_lock); | 2538 | spin_unlock(&ctl->tree_lock); |
2544 | return -ENOSPC; | 2539 | return -ENOSPC; |
2545 | } | 2540 | } |
@@ -2552,11 +2547,17 @@ int btrfs_find_space_cluster(struct btrfs_trans_handle *trans, | |||
2552 | goto out; | 2547 | goto out; |
2553 | } | 2548 | } |
2554 | 2549 | ||
2550 | trace_btrfs_find_cluster(block_group, offset, bytes, empty_size, | ||
2551 | min_bytes); | ||
2552 | |||
2553 | INIT_LIST_HEAD(&bitmaps); | ||
2555 | ret = setup_cluster_no_bitmap(block_group, cluster, &bitmaps, offset, | 2554 | ret = setup_cluster_no_bitmap(block_group, cluster, &bitmaps, offset, |
2556 | bytes, min_bytes); | 2555 | bytes + empty_size, |
2556 | cont1_bytes, min_bytes); | ||
2557 | if (ret) | 2557 | if (ret) |
2558 | ret = setup_cluster_bitmap(block_group, cluster, &bitmaps, | 2558 | ret = setup_cluster_bitmap(block_group, cluster, &bitmaps, |
2559 | offset, bytes, min_bytes); | 2559 | offset, bytes + empty_size, |
2560 | cont1_bytes, min_bytes); | ||
2560 | 2561 | ||
2561 | /* Clear our temporary list */ | 2562 | /* Clear our temporary list */ |
2562 | list_for_each_entry_safe(entry, tmp, &bitmaps, list) | 2563 | list_for_each_entry_safe(entry, tmp, &bitmaps, list) |
@@ -2567,6 +2568,8 @@ int btrfs_find_space_cluster(struct btrfs_trans_handle *trans, | |||
2567 | list_add_tail(&cluster->block_group_list, | 2568 | list_add_tail(&cluster->block_group_list, |
2568 | &block_group->cluster_list); | 2569 | &block_group->cluster_list); |
2569 | cluster->block_group = block_group; | 2570 | cluster->block_group = block_group; |
2571 | } else { | ||
2572 | trace_btrfs_failed_cluster_setup(block_group); | ||
2570 | } | 2573 | } |
2571 | out: | 2574 | out: |
2572 | spin_unlock(&cluster->lock); | 2575 | spin_unlock(&cluster->lock); |
@@ -2588,17 +2591,57 @@ void btrfs_init_free_cluster(struct btrfs_free_cluster *cluster) | |||
2588 | cluster->block_group = NULL; | 2591 | cluster->block_group = NULL; |
2589 | } | 2592 | } |
2590 | 2593 | ||
2591 | int btrfs_trim_block_group(struct btrfs_block_group_cache *block_group, | 2594 | static int do_trimming(struct btrfs_block_group_cache *block_group, |
2592 | u64 *trimmed, u64 start, u64 end, u64 minlen) | 2595 | u64 *total_trimmed, u64 start, u64 bytes, |
2596 | u64 reserved_start, u64 reserved_bytes) | ||
2593 | { | 2597 | { |
2594 | struct btrfs_free_space_ctl *ctl = block_group->free_space_ctl; | 2598 | struct btrfs_space_info *space_info = block_group->space_info; |
2595 | struct btrfs_free_space *entry = NULL; | ||
2596 | struct btrfs_fs_info *fs_info = block_group->fs_info; | 2599 | struct btrfs_fs_info *fs_info = block_group->fs_info; |
2597 | u64 bytes = 0; | 2600 | int ret; |
2598 | u64 actually_trimmed; | 2601 | int update = 0; |
2599 | int ret = 0; | 2602 | u64 trimmed = 0; |
2600 | 2603 | ||
2601 | *trimmed = 0; | 2604 | spin_lock(&space_info->lock); |
2605 | spin_lock(&block_group->lock); | ||
2606 | if (!block_group->ro) { | ||
2607 | block_group->reserved += reserved_bytes; | ||
2608 | space_info->bytes_reserved += reserved_bytes; | ||
2609 | update = 1; | ||
2610 | } | ||
2611 | spin_unlock(&block_group->lock); | ||
2612 | spin_unlock(&space_info->lock); | ||
2613 | |||
2614 | ret = btrfs_error_discard_extent(fs_info->extent_root, | ||
2615 | start, bytes, &trimmed); | ||
2616 | if (!ret) | ||
2617 | *total_trimmed += trimmed; | ||
2618 | |||
2619 | btrfs_add_free_space(block_group, reserved_start, reserved_bytes); | ||
2620 | |||
2621 | if (update) { | ||
2622 | spin_lock(&space_info->lock); | ||
2623 | spin_lock(&block_group->lock); | ||
2624 | if (block_group->ro) | ||
2625 | space_info->bytes_readonly += reserved_bytes; | ||
2626 | block_group->reserved -= reserved_bytes; | ||
2627 | space_info->bytes_reserved -= reserved_bytes; | ||
2628 | spin_unlock(&space_info->lock); | ||
2629 | spin_unlock(&block_group->lock); | ||
2630 | } | ||
2631 | |||
2632 | return ret; | ||
2633 | } | ||
2634 | |||
2635 | static int trim_no_bitmap(struct btrfs_block_group_cache *block_group, | ||
2636 | u64 *total_trimmed, u64 start, u64 end, u64 minlen) | ||
2637 | { | ||
2638 | struct btrfs_free_space_ctl *ctl = block_group->free_space_ctl; | ||
2639 | struct btrfs_free_space *entry; | ||
2640 | struct rb_node *node; | ||
2641 | int ret = 0; | ||
2642 | u64 extent_start; | ||
2643 | u64 extent_bytes; | ||
2644 | u64 bytes; | ||
2602 | 2645 | ||
2603 | while (start < end) { | 2646 | while (start < end) { |
2604 | spin_lock(&ctl->tree_lock); | 2647 | spin_lock(&ctl->tree_lock); |
@@ -2609,81 +2652,118 @@ int btrfs_trim_block_group(struct btrfs_block_group_cache *block_group, | |||
2609 | } | 2652 | } |
2610 | 2653 | ||
2611 | entry = tree_search_offset(ctl, start, 0, 1); | 2654 | entry = tree_search_offset(ctl, start, 0, 1); |
2612 | if (!entry) | 2655 | if (!entry) { |
2613 | entry = tree_search_offset(ctl, | ||
2614 | offset_to_bitmap(ctl, start), | ||
2615 | 1, 1); | ||
2616 | |||
2617 | if (!entry || entry->offset >= end) { | ||
2618 | spin_unlock(&ctl->tree_lock); | 2656 | spin_unlock(&ctl->tree_lock); |
2619 | break; | 2657 | break; |
2620 | } | 2658 | } |
2621 | 2659 | ||
2622 | if (entry->bitmap) { | 2660 | /* skip bitmaps */ |
2623 | ret = search_bitmap(ctl, entry, &start, &bytes); | 2661 | while (entry->bitmap) { |
2624 | if (!ret) { | 2662 | node = rb_next(&entry->offset_index); |
2625 | if (start >= end) { | 2663 | if (!node) { |
2626 | spin_unlock(&ctl->tree_lock); | ||
2627 | break; | ||
2628 | } | ||
2629 | bytes = min(bytes, end - start); | ||
2630 | bitmap_clear_bits(ctl, entry, start, bytes); | ||
2631 | if (entry->bytes == 0) | ||
2632 | free_bitmap(ctl, entry); | ||
2633 | } else { | ||
2634 | start = entry->offset + BITS_PER_BITMAP * | ||
2635 | block_group->sectorsize; | ||
2636 | spin_unlock(&ctl->tree_lock); | 2664 | spin_unlock(&ctl->tree_lock); |
2637 | ret = 0; | 2665 | goto out; |
2638 | continue; | ||
2639 | } | 2666 | } |
2640 | } else { | 2667 | entry = rb_entry(node, struct btrfs_free_space, |
2641 | start = entry->offset; | 2668 | offset_index); |
2642 | bytes = min(entry->bytes, end - start); | ||
2643 | unlink_free_space(ctl, entry); | ||
2644 | kmem_cache_free(btrfs_free_space_cachep, entry); | ||
2645 | } | 2669 | } |
2646 | 2670 | ||
2671 | if (entry->offset >= end) { | ||
2672 | spin_unlock(&ctl->tree_lock); | ||
2673 | break; | ||
2674 | } | ||
2675 | |||
2676 | extent_start = entry->offset; | ||
2677 | extent_bytes = entry->bytes; | ||
2678 | start = max(start, extent_start); | ||
2679 | bytes = min(extent_start + extent_bytes, end) - start; | ||
2680 | if (bytes < minlen) { | ||
2681 | spin_unlock(&ctl->tree_lock); | ||
2682 | goto next; | ||
2683 | } | ||
2684 | |||
2685 | unlink_free_space(ctl, entry); | ||
2686 | kmem_cache_free(btrfs_free_space_cachep, entry); | ||
2687 | |||
2647 | spin_unlock(&ctl->tree_lock); | 2688 | spin_unlock(&ctl->tree_lock); |
2648 | 2689 | ||
2649 | if (bytes >= minlen) { | 2690 | ret = do_trimming(block_group, total_trimmed, start, bytes, |
2650 | struct btrfs_space_info *space_info; | 2691 | extent_start, extent_bytes); |
2651 | int update = 0; | 2692 | if (ret) |
2652 | 2693 | break; | |
2653 | space_info = block_group->space_info; | 2694 | next: |
2654 | spin_lock(&space_info->lock); | 2695 | start += bytes; |
2655 | spin_lock(&block_group->lock); | ||
2656 | if (!block_group->ro) { | ||
2657 | block_group->reserved += bytes; | ||
2658 | space_info->bytes_reserved += bytes; | ||
2659 | update = 1; | ||
2660 | } | ||
2661 | spin_unlock(&block_group->lock); | ||
2662 | spin_unlock(&space_info->lock); | ||
2663 | |||
2664 | ret = btrfs_error_discard_extent(fs_info->extent_root, | ||
2665 | start, | ||
2666 | bytes, | ||
2667 | &actually_trimmed); | ||
2668 | |||
2669 | btrfs_add_free_space(block_group, start, bytes); | ||
2670 | if (update) { | ||
2671 | spin_lock(&space_info->lock); | ||
2672 | spin_lock(&block_group->lock); | ||
2673 | if (block_group->ro) | ||
2674 | space_info->bytes_readonly += bytes; | ||
2675 | block_group->reserved -= bytes; | ||
2676 | space_info->bytes_reserved -= bytes; | ||
2677 | spin_unlock(&space_info->lock); | ||
2678 | spin_unlock(&block_group->lock); | ||
2679 | } | ||
2680 | 2696 | ||
2681 | if (ret) | 2697 | if (fatal_signal_pending(current)) { |
2682 | break; | 2698 | ret = -ERESTARTSYS; |
2683 | *trimmed += actually_trimmed; | 2699 | break; |
2700 | } | ||
2701 | |||
2702 | cond_resched(); | ||
2703 | } | ||
2704 | out: | ||
2705 | return ret; | ||
2706 | } | ||
2707 | |||
2708 | static int trim_bitmaps(struct btrfs_block_group_cache *block_group, | ||
2709 | u64 *total_trimmed, u64 start, u64 end, u64 minlen) | ||
2710 | { | ||
2711 | struct btrfs_free_space_ctl *ctl = block_group->free_space_ctl; | ||
2712 | struct btrfs_free_space *entry; | ||
2713 | int ret = 0; | ||
2714 | int ret2; | ||
2715 | u64 bytes; | ||
2716 | u64 offset = offset_to_bitmap(ctl, start); | ||
2717 | |||
2718 | while (offset < end) { | ||
2719 | bool next_bitmap = false; | ||
2720 | |||
2721 | spin_lock(&ctl->tree_lock); | ||
2722 | |||
2723 | if (ctl->free_space < minlen) { | ||
2724 | spin_unlock(&ctl->tree_lock); | ||
2725 | break; | ||
2726 | } | ||
2727 | |||
2728 | entry = tree_search_offset(ctl, offset, 1, 0); | ||
2729 | if (!entry) { | ||
2730 | spin_unlock(&ctl->tree_lock); | ||
2731 | next_bitmap = true; | ||
2732 | goto next; | ||
2733 | } | ||
2734 | |||
2735 | bytes = minlen; | ||
2736 | ret2 = search_bitmap(ctl, entry, &start, &bytes); | ||
2737 | if (ret2 || start >= end) { | ||
2738 | spin_unlock(&ctl->tree_lock); | ||
2739 | next_bitmap = true; | ||
2740 | goto next; | ||
2741 | } | ||
2742 | |||
2743 | bytes = min(bytes, end - start); | ||
2744 | if (bytes < minlen) { | ||
2745 | spin_unlock(&ctl->tree_lock); | ||
2746 | goto next; | ||
2747 | } | ||
2748 | |||
2749 | bitmap_clear_bits(ctl, entry, start, bytes); | ||
2750 | if (entry->bytes == 0) | ||
2751 | free_bitmap(ctl, entry); | ||
2752 | |||
2753 | spin_unlock(&ctl->tree_lock); | ||
2754 | |||
2755 | ret = do_trimming(block_group, total_trimmed, start, bytes, | ||
2756 | start, bytes); | ||
2757 | if (ret) | ||
2758 | break; | ||
2759 | next: | ||
2760 | if (next_bitmap) { | ||
2761 | offset += BITS_PER_BITMAP * ctl->unit; | ||
2762 | } else { | ||
2763 | start += bytes; | ||
2764 | if (start >= offset + BITS_PER_BITMAP * ctl->unit) | ||
2765 | offset += BITS_PER_BITMAP * ctl->unit; | ||
2684 | } | 2766 | } |
2685 | start += bytes; | ||
2686 | bytes = 0; | ||
2687 | 2767 | ||
2688 | if (fatal_signal_pending(current)) { | 2768 | if (fatal_signal_pending(current)) { |
2689 | ret = -ERESTARTSYS; | 2769 | ret = -ERESTARTSYS; |
@@ -2696,6 +2776,22 @@ int btrfs_trim_block_group(struct btrfs_block_group_cache *block_group, | |||
2696 | return ret; | 2776 | return ret; |
2697 | } | 2777 | } |
2698 | 2778 | ||
2779 | int btrfs_trim_block_group(struct btrfs_block_group_cache *block_group, | ||
2780 | u64 *trimmed, u64 start, u64 end, u64 minlen) | ||
2781 | { | ||
2782 | int ret; | ||
2783 | |||
2784 | *trimmed = 0; | ||
2785 | |||
2786 | ret = trim_no_bitmap(block_group, trimmed, start, end, minlen); | ||
2787 | if (ret) | ||
2788 | return ret; | ||
2789 | |||
2790 | ret = trim_bitmaps(block_group, trimmed, start, end, minlen); | ||
2791 | |||
2792 | return ret; | ||
2793 | } | ||
2794 | |||
2699 | /* | 2795 | /* |
2700 | * Find the left-most item in the cache tree, and then return the | 2796 | * Find the left-most item in the cache tree, and then return the |
2701 | * smallest inode number in the item. | 2797 | * smallest inode number in the item. |
diff --git a/fs/btrfs/inode-map.c b/fs/btrfs/inode-map.c index f8962a957d65..213ffa86ce1b 100644 --- a/fs/btrfs/inode-map.c +++ b/fs/btrfs/inode-map.c | |||
@@ -438,6 +438,8 @@ int btrfs_save_ino_cache(struct btrfs_root *root, | |||
438 | trans->bytes_reserved); | 438 | trans->bytes_reserved); |
439 | if (ret) | 439 | if (ret) |
440 | goto out; | 440 | goto out; |
441 | trace_btrfs_space_reservation(root->fs_info, "ino_cache", (u64)trans, | ||
442 | trans->bytes_reserved, 1); | ||
441 | again: | 443 | again: |
442 | inode = lookup_free_ino_inode(root, path); | 444 | inode = lookup_free_ino_inode(root, path); |
443 | if (IS_ERR(inode) && PTR_ERR(inode) != -ENOENT) { | 445 | if (IS_ERR(inode) && PTR_ERR(inode) != -ENOENT) { |
@@ -498,6 +500,8 @@ again: | |||
498 | out_put: | 500 | out_put: |
499 | iput(inode); | 501 | iput(inode); |
500 | out_release: | 502 | out_release: |
503 | trace_btrfs_space_reservation(root->fs_info, "ino_cache", (u64)trans, | ||
504 | trans->bytes_reserved, 0); | ||
501 | btrfs_block_rsv_release(root, trans->block_rsv, trans->bytes_reserved); | 505 | btrfs_block_rsv_release(root, trans->block_rsv, trans->bytes_reserved); |
502 | out: | 506 | out: |
503 | trans->block_rsv = rsv; | 507 | trans->block_rsv = rsv; |
diff --git a/fs/btrfs/inode.c b/fs/btrfs/inode.c index 81b235a61f8c..32214fe0f7e3 100644 --- a/fs/btrfs/inode.c +++ b/fs/btrfs/inode.c | |||
@@ -1951,12 +1951,28 @@ enum btrfs_orphan_cleanup_state { | |||
1951 | void btrfs_orphan_commit_root(struct btrfs_trans_handle *trans, | 1951 | void btrfs_orphan_commit_root(struct btrfs_trans_handle *trans, |
1952 | struct btrfs_root *root) | 1952 | struct btrfs_root *root) |
1953 | { | 1953 | { |
1954 | struct btrfs_block_rsv *block_rsv; | ||
1954 | int ret; | 1955 | int ret; |
1955 | 1956 | ||
1956 | if (!list_empty(&root->orphan_list) || | 1957 | if (!list_empty(&root->orphan_list) || |
1957 | root->orphan_cleanup_state != ORPHAN_CLEANUP_DONE) | 1958 | root->orphan_cleanup_state != ORPHAN_CLEANUP_DONE) |
1958 | return; | 1959 | return; |
1959 | 1960 | ||
1961 | spin_lock(&root->orphan_lock); | ||
1962 | if (!list_empty(&root->orphan_list)) { | ||
1963 | spin_unlock(&root->orphan_lock); | ||
1964 | return; | ||
1965 | } | ||
1966 | |||
1967 | if (root->orphan_cleanup_state != ORPHAN_CLEANUP_DONE) { | ||
1968 | spin_unlock(&root->orphan_lock); | ||
1969 | return; | ||
1970 | } | ||
1971 | |||
1972 | block_rsv = root->orphan_block_rsv; | ||
1973 | root->orphan_block_rsv = NULL; | ||
1974 | spin_unlock(&root->orphan_lock); | ||
1975 | |||
1960 | if (root->orphan_item_inserted && | 1976 | if (root->orphan_item_inserted && |
1961 | btrfs_root_refs(&root->root_item) > 0) { | 1977 | btrfs_root_refs(&root->root_item) > 0) { |
1962 | ret = btrfs_del_orphan_item(trans, root->fs_info->tree_root, | 1978 | ret = btrfs_del_orphan_item(trans, root->fs_info->tree_root, |
@@ -1965,10 +1981,9 @@ void btrfs_orphan_commit_root(struct btrfs_trans_handle *trans, | |||
1965 | root->orphan_item_inserted = 0; | 1981 | root->orphan_item_inserted = 0; |
1966 | } | 1982 | } |
1967 | 1983 | ||
1968 | if (root->orphan_block_rsv) { | 1984 | if (block_rsv) { |
1969 | WARN_ON(root->orphan_block_rsv->size > 0); | 1985 | WARN_ON(block_rsv->size > 0); |
1970 | btrfs_free_block_rsv(root, root->orphan_block_rsv); | 1986 | btrfs_free_block_rsv(root, block_rsv); |
1971 | root->orphan_block_rsv = NULL; | ||
1972 | } | 1987 | } |
1973 | } | 1988 | } |
1974 | 1989 | ||
@@ -2224,14 +2239,7 @@ int btrfs_orphan_cleanup(struct btrfs_root *root) | |||
2224 | continue; | 2239 | continue; |
2225 | } | 2240 | } |
2226 | nr_truncate++; | 2241 | nr_truncate++; |
2227 | /* | ||
2228 | * Need to hold the imutex for reservation purposes, not | ||
2229 | * a huge deal here but I have a WARN_ON in | ||
2230 | * btrfs_delalloc_reserve_space to catch offenders. | ||
2231 | */ | ||
2232 | mutex_lock(&inode->i_mutex); | ||
2233 | ret = btrfs_truncate(inode); | 2242 | ret = btrfs_truncate(inode); |
2234 | mutex_unlock(&inode->i_mutex); | ||
2235 | } else { | 2243 | } else { |
2236 | nr_unlink++; | 2244 | nr_unlink++; |
2237 | } | 2245 | } |
@@ -2845,7 +2853,7 @@ static void __unlink_end_trans(struct btrfs_trans_handle *trans, | |||
2845 | BUG_ON(!root->fs_info->enospc_unlink); | 2853 | BUG_ON(!root->fs_info->enospc_unlink); |
2846 | root->fs_info->enospc_unlink = 0; | 2854 | root->fs_info->enospc_unlink = 0; |
2847 | } | 2855 | } |
2848 | btrfs_end_transaction_throttle(trans, root); | 2856 | btrfs_end_transaction(trans, root); |
2849 | } | 2857 | } |
2850 | 2858 | ||
2851 | static int btrfs_unlink(struct inode *dir, struct dentry *dentry) | 2859 | static int btrfs_unlink(struct inode *dir, struct dentry *dentry) |
@@ -3009,7 +3017,6 @@ int btrfs_truncate_inode_items(struct btrfs_trans_handle *trans, | |||
3009 | int pending_del_nr = 0; | 3017 | int pending_del_nr = 0; |
3010 | int pending_del_slot = 0; | 3018 | int pending_del_slot = 0; |
3011 | int extent_type = -1; | 3019 | int extent_type = -1; |
3012 | int encoding; | ||
3013 | int ret; | 3020 | int ret; |
3014 | int err = 0; | 3021 | int err = 0; |
3015 | u64 ino = btrfs_ino(inode); | 3022 | u64 ino = btrfs_ino(inode); |
@@ -3059,7 +3066,6 @@ search_again: | |||
3059 | leaf = path->nodes[0]; | 3066 | leaf = path->nodes[0]; |
3060 | btrfs_item_key_to_cpu(leaf, &found_key, path->slots[0]); | 3067 | btrfs_item_key_to_cpu(leaf, &found_key, path->slots[0]); |
3061 | found_type = btrfs_key_type(&found_key); | 3068 | found_type = btrfs_key_type(&found_key); |
3062 | encoding = 0; | ||
3063 | 3069 | ||
3064 | if (found_key.objectid != ino) | 3070 | if (found_key.objectid != ino) |
3065 | break; | 3071 | break; |
@@ -3072,10 +3078,6 @@ search_again: | |||
3072 | fi = btrfs_item_ptr(leaf, path->slots[0], | 3078 | fi = btrfs_item_ptr(leaf, path->slots[0], |
3073 | struct btrfs_file_extent_item); | 3079 | struct btrfs_file_extent_item); |
3074 | extent_type = btrfs_file_extent_type(leaf, fi); | 3080 | extent_type = btrfs_file_extent_type(leaf, fi); |
3075 | encoding = btrfs_file_extent_compression(leaf, fi); | ||
3076 | encoding |= btrfs_file_extent_encryption(leaf, fi); | ||
3077 | encoding |= btrfs_file_extent_other_encoding(leaf, fi); | ||
3078 | |||
3079 | if (extent_type != BTRFS_FILE_EXTENT_INLINE) { | 3081 | if (extent_type != BTRFS_FILE_EXTENT_INLINE) { |
3080 | item_end += | 3082 | item_end += |
3081 | btrfs_file_extent_num_bytes(leaf, fi); | 3083 | btrfs_file_extent_num_bytes(leaf, fi); |
@@ -3103,7 +3105,7 @@ search_again: | |||
3103 | if (extent_type != BTRFS_FILE_EXTENT_INLINE) { | 3105 | if (extent_type != BTRFS_FILE_EXTENT_INLINE) { |
3104 | u64 num_dec; | 3106 | u64 num_dec; |
3105 | extent_start = btrfs_file_extent_disk_bytenr(leaf, fi); | 3107 | extent_start = btrfs_file_extent_disk_bytenr(leaf, fi); |
3106 | if (!del_item && !encoding) { | 3108 | if (!del_item) { |
3107 | u64 orig_num_bytes = | 3109 | u64 orig_num_bytes = |
3108 | btrfs_file_extent_num_bytes(leaf, fi); | 3110 | btrfs_file_extent_num_bytes(leaf, fi); |
3109 | extent_num_bytes = new_size - | 3111 | extent_num_bytes = new_size - |
@@ -3179,7 +3181,7 @@ delete: | |||
3179 | ret = btrfs_free_extent(trans, root, extent_start, | 3181 | ret = btrfs_free_extent(trans, root, extent_start, |
3180 | extent_num_bytes, 0, | 3182 | extent_num_bytes, 0, |
3181 | btrfs_header_owner(leaf), | 3183 | btrfs_header_owner(leaf), |
3182 | ino, extent_offset); | 3184 | ino, extent_offset, 0); |
3183 | BUG_ON(ret); | 3185 | BUG_ON(ret); |
3184 | } | 3186 | } |
3185 | 3187 | ||
@@ -3434,7 +3436,7 @@ static int btrfs_setsize(struct inode *inode, loff_t newsize) | |||
3434 | i_size_write(inode, newsize); | 3436 | i_size_write(inode, newsize); |
3435 | btrfs_ordered_update_i_size(inode, i_size_read(inode), NULL); | 3437 | btrfs_ordered_update_i_size(inode, i_size_read(inode), NULL); |
3436 | ret = btrfs_update_inode(trans, root, inode); | 3438 | ret = btrfs_update_inode(trans, root, inode); |
3437 | btrfs_end_transaction_throttle(trans, root); | 3439 | btrfs_end_transaction(trans, root); |
3438 | } else { | 3440 | } else { |
3439 | 3441 | ||
3440 | /* | 3442 | /* |
@@ -4655,7 +4657,7 @@ static int btrfs_mknod(struct inode *dir, struct dentry *dentry, | |||
4655 | } | 4657 | } |
4656 | out_unlock: | 4658 | out_unlock: |
4657 | nr = trans->blocks_used; | 4659 | nr = trans->blocks_used; |
4658 | btrfs_end_transaction_throttle(trans, root); | 4660 | btrfs_end_transaction(trans, root); |
4659 | btrfs_btree_balance_dirty(root, nr); | 4661 | btrfs_btree_balance_dirty(root, nr); |
4660 | if (drop_inode) { | 4662 | if (drop_inode) { |
4661 | inode_dec_link_count(inode); | 4663 | inode_dec_link_count(inode); |
@@ -4723,7 +4725,7 @@ static int btrfs_create(struct inode *dir, struct dentry *dentry, | |||
4723 | } | 4725 | } |
4724 | out_unlock: | 4726 | out_unlock: |
4725 | nr = trans->blocks_used; | 4727 | nr = trans->blocks_used; |
4726 | btrfs_end_transaction_throttle(trans, root); | 4728 | btrfs_end_transaction(trans, root); |
4727 | if (drop_inode) { | 4729 | if (drop_inode) { |
4728 | inode_dec_link_count(inode); | 4730 | inode_dec_link_count(inode); |
4729 | iput(inode); | 4731 | iput(inode); |
@@ -4782,7 +4784,7 @@ static int btrfs_link(struct dentry *old_dentry, struct inode *dir, | |||
4782 | } | 4784 | } |
4783 | 4785 | ||
4784 | nr = trans->blocks_used; | 4786 | nr = trans->blocks_used; |
4785 | btrfs_end_transaction_throttle(trans, root); | 4787 | btrfs_end_transaction(trans, root); |
4786 | fail: | 4788 | fail: |
4787 | if (drop_inode) { | 4789 | if (drop_inode) { |
4788 | inode_dec_link_count(inode); | 4790 | inode_dec_link_count(inode); |
@@ -4848,7 +4850,7 @@ static int btrfs_mkdir(struct inode *dir, struct dentry *dentry, umode_t mode) | |||
4848 | 4850 | ||
4849 | out_fail: | 4851 | out_fail: |
4850 | nr = trans->blocks_used; | 4852 | nr = trans->blocks_used; |
4851 | btrfs_end_transaction_throttle(trans, root); | 4853 | btrfs_end_transaction(trans, root); |
4852 | if (drop_on_err) | 4854 | if (drop_on_err) |
4853 | iput(inode); | 4855 | iput(inode); |
4854 | btrfs_btree_balance_dirty(root, nr); | 4856 | btrfs_btree_balance_dirty(root, nr); |
@@ -5121,7 +5123,7 @@ again: | |||
5121 | } | 5123 | } |
5122 | flush_dcache_page(page); | 5124 | flush_dcache_page(page); |
5123 | } else if (create && PageUptodate(page)) { | 5125 | } else if (create && PageUptodate(page)) { |
5124 | WARN_ON(1); | 5126 | BUG(); |
5125 | if (!trans) { | 5127 | if (!trans) { |
5126 | kunmap(page); | 5128 | kunmap(page); |
5127 | free_extent_map(em); | 5129 | free_extent_map(em); |
@@ -6399,21 +6401,23 @@ int btrfs_page_mkwrite(struct vm_area_struct *vma, struct vm_fault *vmf) | |||
6399 | unsigned long zero_start; | 6401 | unsigned long zero_start; |
6400 | loff_t size; | 6402 | loff_t size; |
6401 | int ret; | 6403 | int ret; |
6404 | int reserved = 0; | ||
6402 | u64 page_start; | 6405 | u64 page_start; |
6403 | u64 page_end; | 6406 | u64 page_end; |
6404 | 6407 | ||
6405 | /* Need this to keep space reservations serialized */ | ||
6406 | mutex_lock(&inode->i_mutex); | ||
6407 | ret = btrfs_delalloc_reserve_space(inode, PAGE_CACHE_SIZE); | 6408 | ret = btrfs_delalloc_reserve_space(inode, PAGE_CACHE_SIZE); |
6408 | mutex_unlock(&inode->i_mutex); | 6409 | if (!ret) { |
6409 | if (!ret) | ||
6410 | ret = btrfs_update_time(vma->vm_file); | 6410 | ret = btrfs_update_time(vma->vm_file); |
6411 | reserved = 1; | ||
6412 | } | ||
6411 | if (ret) { | 6413 | if (ret) { |
6412 | if (ret == -ENOMEM) | 6414 | if (ret == -ENOMEM) |
6413 | ret = VM_FAULT_OOM; | 6415 | ret = VM_FAULT_OOM; |
6414 | else /* -ENOSPC, -EIO, etc */ | 6416 | else /* -ENOSPC, -EIO, etc */ |
6415 | ret = VM_FAULT_SIGBUS; | 6417 | ret = VM_FAULT_SIGBUS; |
6416 | goto out; | 6418 | if (reserved) |
6419 | goto out; | ||
6420 | goto out_noreserve; | ||
6417 | } | 6421 | } |
6418 | 6422 | ||
6419 | ret = VM_FAULT_NOPAGE; /* make the VM retry the fault */ | 6423 | ret = VM_FAULT_NOPAGE; /* make the VM retry the fault */ |
@@ -6494,8 +6498,9 @@ out_unlock: | |||
6494 | if (!ret) | 6498 | if (!ret) |
6495 | return VM_FAULT_LOCKED; | 6499 | return VM_FAULT_LOCKED; |
6496 | unlock_page(page); | 6500 | unlock_page(page); |
6497 | btrfs_delalloc_release_space(inode, PAGE_CACHE_SIZE); | ||
6498 | out: | 6501 | out: |
6502 | btrfs_delalloc_release_space(inode, PAGE_CACHE_SIZE); | ||
6503 | out_noreserve: | ||
6499 | return ret; | 6504 | return ret; |
6500 | } | 6505 | } |
6501 | 6506 | ||
@@ -6668,7 +6673,7 @@ end_trans: | |||
6668 | err = ret; | 6673 | err = ret; |
6669 | 6674 | ||
6670 | nr = trans->blocks_used; | 6675 | nr = trans->blocks_used; |
6671 | ret = btrfs_end_transaction_throttle(trans, root); | 6676 | ret = btrfs_end_transaction(trans, root); |
6672 | btrfs_btree_balance_dirty(root, nr); | 6677 | btrfs_btree_balance_dirty(root, nr); |
6673 | } | 6678 | } |
6674 | 6679 | ||
@@ -6749,6 +6754,7 @@ struct inode *btrfs_alloc_inode(struct super_block *sb) | |||
6749 | extent_io_tree_init(&ei->io_tree, &inode->i_data); | 6754 | extent_io_tree_init(&ei->io_tree, &inode->i_data); |
6750 | extent_io_tree_init(&ei->io_failure_tree, &inode->i_data); | 6755 | extent_io_tree_init(&ei->io_failure_tree, &inode->i_data); |
6751 | mutex_init(&ei->log_mutex); | 6756 | mutex_init(&ei->log_mutex); |
6757 | mutex_init(&ei->delalloc_mutex); | ||
6752 | btrfs_ordered_inode_tree_init(&ei->ordered_tree); | 6758 | btrfs_ordered_inode_tree_init(&ei->ordered_tree); |
6753 | INIT_LIST_HEAD(&ei->i_orphan); | 6759 | INIT_LIST_HEAD(&ei->i_orphan); |
6754 | INIT_LIST_HEAD(&ei->delalloc_inodes); | 6760 | INIT_LIST_HEAD(&ei->delalloc_inodes); |
@@ -7074,7 +7080,7 @@ static int btrfs_rename(struct inode *old_dir, struct dentry *old_dentry, | |||
7074 | btrfs_end_log_trans(root); | 7080 | btrfs_end_log_trans(root); |
7075 | } | 7081 | } |
7076 | out_fail: | 7082 | out_fail: |
7077 | btrfs_end_transaction_throttle(trans, root); | 7083 | btrfs_end_transaction(trans, root); |
7078 | out_notrans: | 7084 | out_notrans: |
7079 | if (old_ino == BTRFS_FIRST_FREE_OBJECTID) | 7085 | if (old_ino == BTRFS_FIRST_FREE_OBJECTID) |
7080 | up_read(&root->fs_info->subvol_sem); | 7086 | up_read(&root->fs_info->subvol_sem); |
@@ -7246,7 +7252,7 @@ out_unlock: | |||
7246 | if (!err) | 7252 | if (!err) |
7247 | d_instantiate(dentry, inode); | 7253 | d_instantiate(dentry, inode); |
7248 | nr = trans->blocks_used; | 7254 | nr = trans->blocks_used; |
7249 | btrfs_end_transaction_throttle(trans, root); | 7255 | btrfs_end_transaction(trans, root); |
7250 | if (drop_inode) { | 7256 | if (drop_inode) { |
7251 | inode_dec_link_count(inode); | 7257 | inode_dec_link_count(inode); |
7252 | iput(inode); | 7258 | iput(inode); |
diff --git a/fs/btrfs/ioctl.c b/fs/btrfs/ioctl.c index 5441ff1480fd..03bb62a9ee24 100644 --- a/fs/btrfs/ioctl.c +++ b/fs/btrfs/ioctl.c | |||
@@ -176,6 +176,8 @@ static int btrfs_ioctl_setflags(struct file *file, void __user *arg) | |||
176 | struct btrfs_trans_handle *trans; | 176 | struct btrfs_trans_handle *trans; |
177 | unsigned int flags, oldflags; | 177 | unsigned int flags, oldflags; |
178 | int ret; | 178 | int ret; |
179 | u64 ip_oldflags; | ||
180 | unsigned int i_oldflags; | ||
179 | 181 | ||
180 | if (btrfs_root_readonly(root)) | 182 | if (btrfs_root_readonly(root)) |
181 | return -EROFS; | 183 | return -EROFS; |
@@ -192,6 +194,9 @@ static int btrfs_ioctl_setflags(struct file *file, void __user *arg) | |||
192 | 194 | ||
193 | mutex_lock(&inode->i_mutex); | 195 | mutex_lock(&inode->i_mutex); |
194 | 196 | ||
197 | ip_oldflags = ip->flags; | ||
198 | i_oldflags = inode->i_flags; | ||
199 | |||
195 | flags = btrfs_mask_flags(inode->i_mode, flags); | 200 | flags = btrfs_mask_flags(inode->i_mode, flags); |
196 | oldflags = btrfs_flags_to_ioctl(ip->flags); | 201 | oldflags = btrfs_flags_to_ioctl(ip->flags); |
197 | if ((flags ^ oldflags) & (FS_APPEND_FL | FS_IMMUTABLE_FL)) { | 202 | if ((flags ^ oldflags) & (FS_APPEND_FL | FS_IMMUTABLE_FL)) { |
@@ -249,19 +254,24 @@ static int btrfs_ioctl_setflags(struct file *file, void __user *arg) | |||
249 | ip->flags &= ~(BTRFS_INODE_COMPRESS | BTRFS_INODE_NOCOMPRESS); | 254 | ip->flags &= ~(BTRFS_INODE_COMPRESS | BTRFS_INODE_NOCOMPRESS); |
250 | } | 255 | } |
251 | 256 | ||
252 | trans = btrfs_join_transaction(root); | 257 | trans = btrfs_start_transaction(root, 1); |
253 | BUG_ON(IS_ERR(trans)); | 258 | if (IS_ERR(trans)) { |
259 | ret = PTR_ERR(trans); | ||
260 | goto out_drop; | ||
261 | } | ||
254 | 262 | ||
255 | btrfs_update_iflags(inode); | 263 | btrfs_update_iflags(inode); |
256 | inode->i_ctime = CURRENT_TIME; | 264 | inode->i_ctime = CURRENT_TIME; |
257 | ret = btrfs_update_inode(trans, root, inode); | 265 | ret = btrfs_update_inode(trans, root, inode); |
258 | BUG_ON(ret); | ||
259 | 266 | ||
260 | btrfs_end_transaction(trans, root); | 267 | btrfs_end_transaction(trans, root); |
268 | out_drop: | ||
269 | if (ret) { | ||
270 | ip->flags = ip_oldflags; | ||
271 | inode->i_flags = i_oldflags; | ||
272 | } | ||
261 | 273 | ||
262 | mnt_drop_write_file(file); | 274 | mnt_drop_write_file(file); |
263 | |||
264 | ret = 0; | ||
265 | out_unlock: | 275 | out_unlock: |
266 | mutex_unlock(&inode->i_mutex); | 276 | mutex_unlock(&inode->i_mutex); |
267 | return ret; | 277 | return ret; |
@@ -276,14 +286,13 @@ static int btrfs_ioctl_getversion(struct file *file, int __user *arg) | |||
276 | 286 | ||
277 | static noinline int btrfs_ioctl_fitrim(struct file *file, void __user *arg) | 287 | static noinline int btrfs_ioctl_fitrim(struct file *file, void __user *arg) |
278 | { | 288 | { |
279 | struct btrfs_root *root = fdentry(file)->d_sb->s_fs_info; | 289 | struct btrfs_fs_info *fs_info = btrfs_sb(fdentry(file)->d_sb); |
280 | struct btrfs_fs_info *fs_info = root->fs_info; | ||
281 | struct btrfs_device *device; | 290 | struct btrfs_device *device; |
282 | struct request_queue *q; | 291 | struct request_queue *q; |
283 | struct fstrim_range range; | 292 | struct fstrim_range range; |
284 | u64 minlen = ULLONG_MAX; | 293 | u64 minlen = ULLONG_MAX; |
285 | u64 num_devices = 0; | 294 | u64 num_devices = 0; |
286 | u64 total_bytes = btrfs_super_total_bytes(root->fs_info->super_copy); | 295 | u64 total_bytes = btrfs_super_total_bytes(fs_info->super_copy); |
287 | int ret; | 296 | int ret; |
288 | 297 | ||
289 | if (!capable(CAP_SYS_ADMIN)) | 298 | if (!capable(CAP_SYS_ADMIN)) |
@@ -312,7 +321,7 @@ static noinline int btrfs_ioctl_fitrim(struct file *file, void __user *arg) | |||
312 | 321 | ||
313 | range.len = min(range.len, total_bytes - range.start); | 322 | range.len = min(range.len, total_bytes - range.start); |
314 | range.minlen = max(range.minlen, minlen); | 323 | range.minlen = max(range.minlen, minlen); |
315 | ret = btrfs_trim_fs(root, &range); | 324 | ret = btrfs_trim_fs(fs_info->tree_root, &range); |
316 | if (ret < 0) | 325 | if (ret < 0) |
317 | return ret; | 326 | return ret; |
318 | 327 | ||
@@ -358,7 +367,7 @@ static noinline int create_subvol(struct btrfs_root *root, | |||
358 | return PTR_ERR(trans); | 367 | return PTR_ERR(trans); |
359 | 368 | ||
360 | leaf = btrfs_alloc_free_block(trans, root, root->leafsize, | 369 | leaf = btrfs_alloc_free_block(trans, root, root->leafsize, |
361 | 0, objectid, NULL, 0, 0, 0); | 370 | 0, objectid, NULL, 0, 0, 0, 0); |
362 | if (IS_ERR(leaf)) { | 371 | if (IS_ERR(leaf)) { |
363 | ret = PTR_ERR(leaf); | 372 | ret = PTR_ERR(leaf); |
364 | goto fail; | 373 | goto fail; |
@@ -858,10 +867,8 @@ static int cluster_pages_for_defrag(struct inode *inode, | |||
858 | return 0; | 867 | return 0; |
859 | file_end = (isize - 1) >> PAGE_CACHE_SHIFT; | 868 | file_end = (isize - 1) >> PAGE_CACHE_SHIFT; |
860 | 869 | ||
861 | mutex_lock(&inode->i_mutex); | ||
862 | ret = btrfs_delalloc_reserve_space(inode, | 870 | ret = btrfs_delalloc_reserve_space(inode, |
863 | num_pages << PAGE_CACHE_SHIFT); | 871 | num_pages << PAGE_CACHE_SHIFT); |
864 | mutex_unlock(&inode->i_mutex); | ||
865 | if (ret) | 872 | if (ret) |
866 | return ret; | 873 | return ret; |
867 | again: | 874 | again: |
@@ -1058,7 +1065,7 @@ int btrfs_defrag_file(struct inode *inode, struct file *file, | |||
1058 | i = range->start >> PAGE_CACHE_SHIFT; | 1065 | i = range->start >> PAGE_CACHE_SHIFT; |
1059 | } | 1066 | } |
1060 | if (!max_to_defrag) | 1067 | if (!max_to_defrag) |
1061 | max_to_defrag = last_index; | 1068 | max_to_defrag = last_index + 1; |
1062 | 1069 | ||
1063 | /* | 1070 | /* |
1064 | * make writeback starts from i, so the defrag range can be | 1071 | * make writeback starts from i, so the defrag range can be |
@@ -1203,13 +1210,21 @@ static noinline int btrfs_ioctl_resize(struct btrfs_root *root, | |||
1203 | if (!capable(CAP_SYS_ADMIN)) | 1210 | if (!capable(CAP_SYS_ADMIN)) |
1204 | return -EPERM; | 1211 | return -EPERM; |
1205 | 1212 | ||
1213 | mutex_lock(&root->fs_info->volume_mutex); | ||
1214 | if (root->fs_info->balance_ctl) { | ||
1215 | printk(KERN_INFO "btrfs: balance in progress\n"); | ||
1216 | ret = -EINVAL; | ||
1217 | goto out; | ||
1218 | } | ||
1219 | |||
1206 | vol_args = memdup_user(arg, sizeof(*vol_args)); | 1220 | vol_args = memdup_user(arg, sizeof(*vol_args)); |
1207 | if (IS_ERR(vol_args)) | 1221 | if (IS_ERR(vol_args)) { |
1208 | return PTR_ERR(vol_args); | 1222 | ret = PTR_ERR(vol_args); |
1223 | goto out; | ||
1224 | } | ||
1209 | 1225 | ||
1210 | vol_args->name[BTRFS_PATH_NAME_MAX] = '\0'; | 1226 | vol_args->name[BTRFS_PATH_NAME_MAX] = '\0'; |
1211 | 1227 | ||
1212 | mutex_lock(&root->fs_info->volume_mutex); | ||
1213 | sizestr = vol_args->name; | 1228 | sizestr = vol_args->name; |
1214 | devstr = strchr(sizestr, ':'); | 1229 | devstr = strchr(sizestr, ':'); |
1215 | if (devstr) { | 1230 | if (devstr) { |
@@ -1226,7 +1241,7 @@ static noinline int btrfs_ioctl_resize(struct btrfs_root *root, | |||
1226 | printk(KERN_INFO "btrfs: resizer unable to find device %llu\n", | 1241 | printk(KERN_INFO "btrfs: resizer unable to find device %llu\n", |
1227 | (unsigned long long)devid); | 1242 | (unsigned long long)devid); |
1228 | ret = -EINVAL; | 1243 | ret = -EINVAL; |
1229 | goto out_unlock; | 1244 | goto out_free; |
1230 | } | 1245 | } |
1231 | if (!strcmp(sizestr, "max")) | 1246 | if (!strcmp(sizestr, "max")) |
1232 | new_size = device->bdev->bd_inode->i_size; | 1247 | new_size = device->bdev->bd_inode->i_size; |
@@ -1241,7 +1256,7 @@ static noinline int btrfs_ioctl_resize(struct btrfs_root *root, | |||
1241 | new_size = memparse(sizestr, NULL); | 1256 | new_size = memparse(sizestr, NULL); |
1242 | if (new_size == 0) { | 1257 | if (new_size == 0) { |
1243 | ret = -EINVAL; | 1258 | ret = -EINVAL; |
1244 | goto out_unlock; | 1259 | goto out_free; |
1245 | } | 1260 | } |
1246 | } | 1261 | } |
1247 | 1262 | ||
@@ -1250,7 +1265,7 @@ static noinline int btrfs_ioctl_resize(struct btrfs_root *root, | |||
1250 | if (mod < 0) { | 1265 | if (mod < 0) { |
1251 | if (new_size > old_size) { | 1266 | if (new_size > old_size) { |
1252 | ret = -EINVAL; | 1267 | ret = -EINVAL; |
1253 | goto out_unlock; | 1268 | goto out_free; |
1254 | } | 1269 | } |
1255 | new_size = old_size - new_size; | 1270 | new_size = old_size - new_size; |
1256 | } else if (mod > 0) { | 1271 | } else if (mod > 0) { |
@@ -1259,11 +1274,11 @@ static noinline int btrfs_ioctl_resize(struct btrfs_root *root, | |||
1259 | 1274 | ||
1260 | if (new_size < 256 * 1024 * 1024) { | 1275 | if (new_size < 256 * 1024 * 1024) { |
1261 | ret = -EINVAL; | 1276 | ret = -EINVAL; |
1262 | goto out_unlock; | 1277 | goto out_free; |
1263 | } | 1278 | } |
1264 | if (new_size > device->bdev->bd_inode->i_size) { | 1279 | if (new_size > device->bdev->bd_inode->i_size) { |
1265 | ret = -EFBIG; | 1280 | ret = -EFBIG; |
1266 | goto out_unlock; | 1281 | goto out_free; |
1267 | } | 1282 | } |
1268 | 1283 | ||
1269 | do_div(new_size, root->sectorsize); | 1284 | do_div(new_size, root->sectorsize); |
@@ -1276,7 +1291,7 @@ static noinline int btrfs_ioctl_resize(struct btrfs_root *root, | |||
1276 | trans = btrfs_start_transaction(root, 0); | 1291 | trans = btrfs_start_transaction(root, 0); |
1277 | if (IS_ERR(trans)) { | 1292 | if (IS_ERR(trans)) { |
1278 | ret = PTR_ERR(trans); | 1293 | ret = PTR_ERR(trans); |
1279 | goto out_unlock; | 1294 | goto out_free; |
1280 | } | 1295 | } |
1281 | ret = btrfs_grow_device(trans, device, new_size); | 1296 | ret = btrfs_grow_device(trans, device, new_size); |
1282 | btrfs_commit_transaction(trans, root); | 1297 | btrfs_commit_transaction(trans, root); |
@@ -1284,9 +1299,10 @@ static noinline int btrfs_ioctl_resize(struct btrfs_root *root, | |||
1284 | ret = btrfs_shrink_device(device, new_size); | 1299 | ret = btrfs_shrink_device(device, new_size); |
1285 | } | 1300 | } |
1286 | 1301 | ||
1287 | out_unlock: | 1302 | out_free: |
1288 | mutex_unlock(&root->fs_info->volume_mutex); | ||
1289 | kfree(vol_args); | 1303 | kfree(vol_args); |
1304 | out: | ||
1305 | mutex_unlock(&root->fs_info->volume_mutex); | ||
1290 | return ret; | 1306 | return ret; |
1291 | } | 1307 | } |
1292 | 1308 | ||
@@ -2052,14 +2068,25 @@ static long btrfs_ioctl_add_dev(struct btrfs_root *root, void __user *arg) | |||
2052 | if (!capable(CAP_SYS_ADMIN)) | 2068 | if (!capable(CAP_SYS_ADMIN)) |
2053 | return -EPERM; | 2069 | return -EPERM; |
2054 | 2070 | ||
2071 | mutex_lock(&root->fs_info->volume_mutex); | ||
2072 | if (root->fs_info->balance_ctl) { | ||
2073 | printk(KERN_INFO "btrfs: balance in progress\n"); | ||
2074 | ret = -EINVAL; | ||
2075 | goto out; | ||
2076 | } | ||
2077 | |||
2055 | vol_args = memdup_user(arg, sizeof(*vol_args)); | 2078 | vol_args = memdup_user(arg, sizeof(*vol_args)); |
2056 | if (IS_ERR(vol_args)) | 2079 | if (IS_ERR(vol_args)) { |
2057 | return PTR_ERR(vol_args); | 2080 | ret = PTR_ERR(vol_args); |
2081 | goto out; | ||
2082 | } | ||
2058 | 2083 | ||
2059 | vol_args->name[BTRFS_PATH_NAME_MAX] = '\0'; | 2084 | vol_args->name[BTRFS_PATH_NAME_MAX] = '\0'; |
2060 | ret = btrfs_init_new_device(root, vol_args->name); | 2085 | ret = btrfs_init_new_device(root, vol_args->name); |
2061 | 2086 | ||
2062 | kfree(vol_args); | 2087 | kfree(vol_args); |
2088 | out: | ||
2089 | mutex_unlock(&root->fs_info->volume_mutex); | ||
2063 | return ret; | 2090 | return ret; |
2064 | } | 2091 | } |
2065 | 2092 | ||
@@ -2074,14 +2101,25 @@ static long btrfs_ioctl_rm_dev(struct btrfs_root *root, void __user *arg) | |||
2074 | if (root->fs_info->sb->s_flags & MS_RDONLY) | 2101 | if (root->fs_info->sb->s_flags & MS_RDONLY) |
2075 | return -EROFS; | 2102 | return -EROFS; |
2076 | 2103 | ||
2104 | mutex_lock(&root->fs_info->volume_mutex); | ||
2105 | if (root->fs_info->balance_ctl) { | ||
2106 | printk(KERN_INFO "btrfs: balance in progress\n"); | ||
2107 | ret = -EINVAL; | ||
2108 | goto out; | ||
2109 | } | ||
2110 | |||
2077 | vol_args = memdup_user(arg, sizeof(*vol_args)); | 2111 | vol_args = memdup_user(arg, sizeof(*vol_args)); |
2078 | if (IS_ERR(vol_args)) | 2112 | if (IS_ERR(vol_args)) { |
2079 | return PTR_ERR(vol_args); | 2113 | ret = PTR_ERR(vol_args); |
2114 | goto out; | ||
2115 | } | ||
2080 | 2116 | ||
2081 | vol_args->name[BTRFS_PATH_NAME_MAX] = '\0'; | 2117 | vol_args->name[BTRFS_PATH_NAME_MAX] = '\0'; |
2082 | ret = btrfs_rm_device(root, vol_args->name); | 2118 | ret = btrfs_rm_device(root, vol_args->name); |
2083 | 2119 | ||
2084 | kfree(vol_args); | 2120 | kfree(vol_args); |
2121 | out: | ||
2122 | mutex_unlock(&root->fs_info->volume_mutex); | ||
2085 | return ret; | 2123 | return ret; |
2086 | } | 2124 | } |
2087 | 2125 | ||
@@ -2427,7 +2465,8 @@ static noinline long btrfs_ioctl_clone(struct file *file, unsigned long srcfd, | |||
2427 | disko, diskl, 0, | 2465 | disko, diskl, 0, |
2428 | root->root_key.objectid, | 2466 | root->root_key.objectid, |
2429 | btrfs_ino(inode), | 2467 | btrfs_ino(inode), |
2430 | new_key.offset - datao); | 2468 | new_key.offset - datao, |
2469 | 0); | ||
2431 | BUG_ON(ret); | 2470 | BUG_ON(ret); |
2432 | } | 2471 | } |
2433 | } else if (type == BTRFS_FILE_EXTENT_INLINE) { | 2472 | } else if (type == BTRFS_FILE_EXTENT_INLINE) { |
@@ -2977,7 +3016,7 @@ static long btrfs_ioctl_logical_to_ino(struct btrfs_root *root, | |||
2977 | { | 3016 | { |
2978 | int ret = 0; | 3017 | int ret = 0; |
2979 | int size; | 3018 | int size; |
2980 | u64 extent_offset; | 3019 | u64 extent_item_pos; |
2981 | struct btrfs_ioctl_logical_ino_args *loi; | 3020 | struct btrfs_ioctl_logical_ino_args *loi; |
2982 | struct btrfs_data_container *inodes = NULL; | 3021 | struct btrfs_data_container *inodes = NULL; |
2983 | struct btrfs_path *path = NULL; | 3022 | struct btrfs_path *path = NULL; |
@@ -3008,15 +3047,17 @@ static long btrfs_ioctl_logical_to_ino(struct btrfs_root *root, | |||
3008 | } | 3047 | } |
3009 | 3048 | ||
3010 | ret = extent_from_logical(root->fs_info, loi->logical, path, &key); | 3049 | ret = extent_from_logical(root->fs_info, loi->logical, path, &key); |
3050 | btrfs_release_path(path); | ||
3011 | 3051 | ||
3012 | if (ret & BTRFS_EXTENT_FLAG_TREE_BLOCK) | 3052 | if (ret & BTRFS_EXTENT_FLAG_TREE_BLOCK) |
3013 | ret = -ENOENT; | 3053 | ret = -ENOENT; |
3014 | if (ret < 0) | 3054 | if (ret < 0) |
3015 | goto out; | 3055 | goto out; |
3016 | 3056 | ||
3017 | extent_offset = loi->logical - key.objectid; | 3057 | extent_item_pos = loi->logical - key.objectid; |
3018 | ret = iterate_extent_inodes(root->fs_info, path, key.objectid, | 3058 | ret = iterate_extent_inodes(root->fs_info, path, key.objectid, |
3019 | extent_offset, build_ino_list, inodes); | 3059 | extent_item_pos, build_ino_list, |
3060 | inodes); | ||
3020 | 3061 | ||
3021 | if (ret < 0) | 3062 | if (ret < 0) |
3022 | goto out; | 3063 | goto out; |
@@ -3034,6 +3075,163 @@ out: | |||
3034 | return ret; | 3075 | return ret; |
3035 | } | 3076 | } |
3036 | 3077 | ||
3078 | void update_ioctl_balance_args(struct btrfs_fs_info *fs_info, int lock, | ||
3079 | struct btrfs_ioctl_balance_args *bargs) | ||
3080 | { | ||
3081 | struct btrfs_balance_control *bctl = fs_info->balance_ctl; | ||
3082 | |||
3083 | bargs->flags = bctl->flags; | ||
3084 | |||
3085 | if (atomic_read(&fs_info->balance_running)) | ||
3086 | bargs->state |= BTRFS_BALANCE_STATE_RUNNING; | ||
3087 | if (atomic_read(&fs_info->balance_pause_req)) | ||
3088 | bargs->state |= BTRFS_BALANCE_STATE_PAUSE_REQ; | ||
3089 | if (atomic_read(&fs_info->balance_cancel_req)) | ||
3090 | bargs->state |= BTRFS_BALANCE_STATE_CANCEL_REQ; | ||
3091 | |||
3092 | memcpy(&bargs->data, &bctl->data, sizeof(bargs->data)); | ||
3093 | memcpy(&bargs->meta, &bctl->meta, sizeof(bargs->meta)); | ||
3094 | memcpy(&bargs->sys, &bctl->sys, sizeof(bargs->sys)); | ||
3095 | |||
3096 | if (lock) { | ||
3097 | spin_lock(&fs_info->balance_lock); | ||
3098 | memcpy(&bargs->stat, &bctl->stat, sizeof(bargs->stat)); | ||
3099 | spin_unlock(&fs_info->balance_lock); | ||
3100 | } else { | ||
3101 | memcpy(&bargs->stat, &bctl->stat, sizeof(bargs->stat)); | ||
3102 | } | ||
3103 | } | ||
3104 | |||
3105 | static long btrfs_ioctl_balance(struct btrfs_root *root, void __user *arg) | ||
3106 | { | ||
3107 | struct btrfs_fs_info *fs_info = root->fs_info; | ||
3108 | struct btrfs_ioctl_balance_args *bargs; | ||
3109 | struct btrfs_balance_control *bctl; | ||
3110 | int ret; | ||
3111 | |||
3112 | if (!capable(CAP_SYS_ADMIN)) | ||
3113 | return -EPERM; | ||
3114 | |||
3115 | if (fs_info->sb->s_flags & MS_RDONLY) | ||
3116 | return -EROFS; | ||
3117 | |||
3118 | mutex_lock(&fs_info->volume_mutex); | ||
3119 | mutex_lock(&fs_info->balance_mutex); | ||
3120 | |||
3121 | if (arg) { | ||
3122 | bargs = memdup_user(arg, sizeof(*bargs)); | ||
3123 | if (IS_ERR(bargs)) { | ||
3124 | ret = PTR_ERR(bargs); | ||
3125 | goto out; | ||
3126 | } | ||
3127 | |||
3128 | if (bargs->flags & BTRFS_BALANCE_RESUME) { | ||
3129 | if (!fs_info->balance_ctl) { | ||
3130 | ret = -ENOTCONN; | ||
3131 | goto out_bargs; | ||
3132 | } | ||
3133 | |||
3134 | bctl = fs_info->balance_ctl; | ||
3135 | spin_lock(&fs_info->balance_lock); | ||
3136 | bctl->flags |= BTRFS_BALANCE_RESUME; | ||
3137 | spin_unlock(&fs_info->balance_lock); | ||
3138 | |||
3139 | goto do_balance; | ||
3140 | } | ||
3141 | } else { | ||
3142 | bargs = NULL; | ||
3143 | } | ||
3144 | |||
3145 | if (fs_info->balance_ctl) { | ||
3146 | ret = -EINPROGRESS; | ||
3147 | goto out_bargs; | ||
3148 | } | ||
3149 | |||
3150 | bctl = kzalloc(sizeof(*bctl), GFP_NOFS); | ||
3151 | if (!bctl) { | ||
3152 | ret = -ENOMEM; | ||
3153 | goto out_bargs; | ||
3154 | } | ||
3155 | |||
3156 | bctl->fs_info = fs_info; | ||
3157 | if (arg) { | ||
3158 | memcpy(&bctl->data, &bargs->data, sizeof(bctl->data)); | ||
3159 | memcpy(&bctl->meta, &bargs->meta, sizeof(bctl->meta)); | ||
3160 | memcpy(&bctl->sys, &bargs->sys, sizeof(bctl->sys)); | ||
3161 | |||
3162 | bctl->flags = bargs->flags; | ||
3163 | } else { | ||
3164 | /* balance everything - no filters */ | ||
3165 | bctl->flags |= BTRFS_BALANCE_TYPE_MASK; | ||
3166 | } | ||
3167 | |||
3168 | do_balance: | ||
3169 | ret = btrfs_balance(bctl, bargs); | ||
3170 | /* | ||
3171 | * bctl is freed in __cancel_balance or in free_fs_info if | ||
3172 | * restriper was paused all the way until unmount | ||
3173 | */ | ||
3174 | if (arg) { | ||
3175 | if (copy_to_user(arg, bargs, sizeof(*bargs))) | ||
3176 | ret = -EFAULT; | ||
3177 | } | ||
3178 | |||
3179 | out_bargs: | ||
3180 | kfree(bargs); | ||
3181 | out: | ||
3182 | mutex_unlock(&fs_info->balance_mutex); | ||
3183 | mutex_unlock(&fs_info->volume_mutex); | ||
3184 | return ret; | ||
3185 | } | ||
3186 | |||
3187 | static long btrfs_ioctl_balance_ctl(struct btrfs_root *root, int cmd) | ||
3188 | { | ||
3189 | if (!capable(CAP_SYS_ADMIN)) | ||
3190 | return -EPERM; | ||
3191 | |||
3192 | switch (cmd) { | ||
3193 | case BTRFS_BALANCE_CTL_PAUSE: | ||
3194 | return btrfs_pause_balance(root->fs_info); | ||
3195 | case BTRFS_BALANCE_CTL_CANCEL: | ||
3196 | return btrfs_cancel_balance(root->fs_info); | ||
3197 | } | ||
3198 | |||
3199 | return -EINVAL; | ||
3200 | } | ||
3201 | |||
3202 | static long btrfs_ioctl_balance_progress(struct btrfs_root *root, | ||
3203 | void __user *arg) | ||
3204 | { | ||
3205 | struct btrfs_fs_info *fs_info = root->fs_info; | ||
3206 | struct btrfs_ioctl_balance_args *bargs; | ||
3207 | int ret = 0; | ||
3208 | |||
3209 | if (!capable(CAP_SYS_ADMIN)) | ||
3210 | return -EPERM; | ||
3211 | |||
3212 | mutex_lock(&fs_info->balance_mutex); | ||
3213 | if (!fs_info->balance_ctl) { | ||
3214 | ret = -ENOTCONN; | ||
3215 | goto out; | ||
3216 | } | ||
3217 | |||
3218 | bargs = kzalloc(sizeof(*bargs), GFP_NOFS); | ||
3219 | if (!bargs) { | ||
3220 | ret = -ENOMEM; | ||
3221 | goto out; | ||
3222 | } | ||
3223 | |||
3224 | update_ioctl_balance_args(fs_info, 1, bargs); | ||
3225 | |||
3226 | if (copy_to_user(arg, bargs, sizeof(*bargs))) | ||
3227 | ret = -EFAULT; | ||
3228 | |||
3229 | kfree(bargs); | ||
3230 | out: | ||
3231 | mutex_unlock(&fs_info->balance_mutex); | ||
3232 | return ret; | ||
3233 | } | ||
3234 | |||
3037 | long btrfs_ioctl(struct file *file, unsigned int | 3235 | long btrfs_ioctl(struct file *file, unsigned int |
3038 | cmd, unsigned long arg) | 3236 | cmd, unsigned long arg) |
3039 | { | 3237 | { |
@@ -3078,7 +3276,7 @@ long btrfs_ioctl(struct file *file, unsigned int | |||
3078 | case BTRFS_IOC_DEV_INFO: | 3276 | case BTRFS_IOC_DEV_INFO: |
3079 | return btrfs_ioctl_dev_info(root, argp); | 3277 | return btrfs_ioctl_dev_info(root, argp); |
3080 | case BTRFS_IOC_BALANCE: | 3278 | case BTRFS_IOC_BALANCE: |
3081 | return btrfs_balance(root->fs_info->dev_root); | 3279 | return btrfs_ioctl_balance(root, NULL); |
3082 | case BTRFS_IOC_CLONE: | 3280 | case BTRFS_IOC_CLONE: |
3083 | return btrfs_ioctl_clone(file, arg, 0, 0, 0); | 3281 | return btrfs_ioctl_clone(file, arg, 0, 0, 0); |
3084 | case BTRFS_IOC_CLONE_RANGE: | 3282 | case BTRFS_IOC_CLONE_RANGE: |
@@ -3110,6 +3308,12 @@ long btrfs_ioctl(struct file *file, unsigned int | |||
3110 | return btrfs_ioctl_scrub_cancel(root, argp); | 3308 | return btrfs_ioctl_scrub_cancel(root, argp); |
3111 | case BTRFS_IOC_SCRUB_PROGRESS: | 3309 | case BTRFS_IOC_SCRUB_PROGRESS: |
3112 | return btrfs_ioctl_scrub_progress(root, argp); | 3310 | return btrfs_ioctl_scrub_progress(root, argp); |
3311 | case BTRFS_IOC_BALANCE_V2: | ||
3312 | return btrfs_ioctl_balance(root, argp); | ||
3313 | case BTRFS_IOC_BALANCE_CTL: | ||
3314 | return btrfs_ioctl_balance_ctl(root, arg); | ||
3315 | case BTRFS_IOC_BALANCE_PROGRESS: | ||
3316 | return btrfs_ioctl_balance_progress(root, argp); | ||
3113 | } | 3317 | } |
3114 | 3318 | ||
3115 | return -ENOTTY; | 3319 | return -ENOTTY; |
diff --git a/fs/btrfs/ioctl.h b/fs/btrfs/ioctl.h index 252ae9915de8..4f69028a68c4 100644 --- a/fs/btrfs/ioctl.h +++ b/fs/btrfs/ioctl.h | |||
@@ -109,6 +109,55 @@ struct btrfs_ioctl_fs_info_args { | |||
109 | __u64 reserved[124]; /* pad to 1k */ | 109 | __u64 reserved[124]; /* pad to 1k */ |
110 | }; | 110 | }; |
111 | 111 | ||
112 | /* balance control ioctl modes */ | ||
113 | #define BTRFS_BALANCE_CTL_PAUSE 1 | ||
114 | #define BTRFS_BALANCE_CTL_CANCEL 2 | ||
115 | |||
116 | /* | ||
117 | * this is packed, because it should be exactly the same as its disk | ||
118 | * byte order counterpart (struct btrfs_disk_balance_args) | ||
119 | */ | ||
120 | struct btrfs_balance_args { | ||
121 | __u64 profiles; | ||
122 | __u64 usage; | ||
123 | __u64 devid; | ||
124 | __u64 pstart; | ||
125 | __u64 pend; | ||
126 | __u64 vstart; | ||
127 | __u64 vend; | ||
128 | |||
129 | __u64 target; | ||
130 | |||
131 | __u64 flags; | ||
132 | |||
133 | __u64 unused[8]; | ||
134 | } __attribute__ ((__packed__)); | ||
135 | |||
136 | /* report balance progress to userspace */ | ||
137 | struct btrfs_balance_progress { | ||
138 | __u64 expected; /* estimated # of chunks that will be | ||
139 | * relocated to fulfill the request */ | ||
140 | __u64 considered; /* # of chunks we have considered so far */ | ||
141 | __u64 completed; /* # of chunks relocated so far */ | ||
142 | }; | ||
143 | |||
144 | #define BTRFS_BALANCE_STATE_RUNNING (1ULL << 0) | ||
145 | #define BTRFS_BALANCE_STATE_PAUSE_REQ (1ULL << 1) | ||
146 | #define BTRFS_BALANCE_STATE_CANCEL_REQ (1ULL << 2) | ||
147 | |||
148 | struct btrfs_ioctl_balance_args { | ||
149 | __u64 flags; /* in/out */ | ||
150 | __u64 state; /* out */ | ||
151 | |||
152 | struct btrfs_balance_args data; /* in/out */ | ||
153 | struct btrfs_balance_args meta; /* in/out */ | ||
154 | struct btrfs_balance_args sys; /* in/out */ | ||
155 | |||
156 | struct btrfs_balance_progress stat; /* out */ | ||
157 | |||
158 | __u64 unused[72]; /* pad to 1k */ | ||
159 | }; | ||
160 | |||
112 | #define BTRFS_INO_LOOKUP_PATH_MAX 4080 | 161 | #define BTRFS_INO_LOOKUP_PATH_MAX 4080 |
113 | struct btrfs_ioctl_ino_lookup_args { | 162 | struct btrfs_ioctl_ino_lookup_args { |
114 | __u64 treeid; | 163 | __u64 treeid; |
@@ -272,6 +321,11 @@ struct btrfs_ioctl_logical_ino_args { | |||
272 | struct btrfs_ioctl_dev_info_args) | 321 | struct btrfs_ioctl_dev_info_args) |
273 | #define BTRFS_IOC_FS_INFO _IOR(BTRFS_IOCTL_MAGIC, 31, \ | 322 | #define BTRFS_IOC_FS_INFO _IOR(BTRFS_IOCTL_MAGIC, 31, \ |
274 | struct btrfs_ioctl_fs_info_args) | 323 | struct btrfs_ioctl_fs_info_args) |
324 | #define BTRFS_IOC_BALANCE_V2 _IOWR(BTRFS_IOCTL_MAGIC, 32, \ | ||
325 | struct btrfs_ioctl_balance_args) | ||
326 | #define BTRFS_IOC_BALANCE_CTL _IOW(BTRFS_IOCTL_MAGIC, 33, int) | ||
327 | #define BTRFS_IOC_BALANCE_PROGRESS _IOR(BTRFS_IOCTL_MAGIC, 34, \ | ||
328 | struct btrfs_ioctl_balance_args) | ||
275 | #define BTRFS_IOC_INO_PATHS _IOWR(BTRFS_IOCTL_MAGIC, 35, \ | 329 | #define BTRFS_IOC_INO_PATHS _IOWR(BTRFS_IOCTL_MAGIC, 35, \ |
276 | struct btrfs_ioctl_ino_path_args) | 330 | struct btrfs_ioctl_ino_path_args) |
277 | #define BTRFS_IOC_LOGICAL_INO _IOWR(BTRFS_IOCTL_MAGIC, 36, \ | 331 | #define BTRFS_IOC_LOGICAL_INO _IOWR(BTRFS_IOCTL_MAGIC, 36, \ |
diff --git a/fs/btrfs/locking.c b/fs/btrfs/locking.c index d77b67c4b275..5e178d8f7167 100644 --- a/fs/btrfs/locking.c +++ b/fs/btrfs/locking.c | |||
@@ -33,6 +33,14 @@ void btrfs_assert_tree_read_locked(struct extent_buffer *eb); | |||
33 | */ | 33 | */ |
34 | void btrfs_set_lock_blocking_rw(struct extent_buffer *eb, int rw) | 34 | void btrfs_set_lock_blocking_rw(struct extent_buffer *eb, int rw) |
35 | { | 35 | { |
36 | if (eb->lock_nested) { | ||
37 | read_lock(&eb->lock); | ||
38 | if (eb->lock_nested && current->pid == eb->lock_owner) { | ||
39 | read_unlock(&eb->lock); | ||
40 | return; | ||
41 | } | ||
42 | read_unlock(&eb->lock); | ||
43 | } | ||
36 | if (rw == BTRFS_WRITE_LOCK) { | 44 | if (rw == BTRFS_WRITE_LOCK) { |
37 | if (atomic_read(&eb->blocking_writers) == 0) { | 45 | if (atomic_read(&eb->blocking_writers) == 0) { |
38 | WARN_ON(atomic_read(&eb->spinning_writers) != 1); | 46 | WARN_ON(atomic_read(&eb->spinning_writers) != 1); |
@@ -57,6 +65,14 @@ void btrfs_set_lock_blocking_rw(struct extent_buffer *eb, int rw) | |||
57 | */ | 65 | */ |
58 | void btrfs_clear_lock_blocking_rw(struct extent_buffer *eb, int rw) | 66 | void btrfs_clear_lock_blocking_rw(struct extent_buffer *eb, int rw) |
59 | { | 67 | { |
68 | if (eb->lock_nested) { | ||
69 | read_lock(&eb->lock); | ||
70 | if (&eb->lock_nested && current->pid == eb->lock_owner) { | ||
71 | read_unlock(&eb->lock); | ||
72 | return; | ||
73 | } | ||
74 | read_unlock(&eb->lock); | ||
75 | } | ||
60 | if (rw == BTRFS_WRITE_LOCK_BLOCKING) { | 76 | if (rw == BTRFS_WRITE_LOCK_BLOCKING) { |
61 | BUG_ON(atomic_read(&eb->blocking_writers) != 1); | 77 | BUG_ON(atomic_read(&eb->blocking_writers) != 1); |
62 | write_lock(&eb->lock); | 78 | write_lock(&eb->lock); |
@@ -81,12 +97,25 @@ void btrfs_clear_lock_blocking_rw(struct extent_buffer *eb, int rw) | |||
81 | void btrfs_tree_read_lock(struct extent_buffer *eb) | 97 | void btrfs_tree_read_lock(struct extent_buffer *eb) |
82 | { | 98 | { |
83 | again: | 99 | again: |
100 | read_lock(&eb->lock); | ||
101 | if (atomic_read(&eb->blocking_writers) && | ||
102 | current->pid == eb->lock_owner) { | ||
103 | /* | ||
104 | * This extent is already write-locked by our thread. We allow | ||
105 | * an additional read lock to be added because it's for the same | ||
106 | * thread. btrfs_find_all_roots() depends on this as it may be | ||
107 | * called on a partly (write-)locked tree. | ||
108 | */ | ||
109 | BUG_ON(eb->lock_nested); | ||
110 | eb->lock_nested = 1; | ||
111 | read_unlock(&eb->lock); | ||
112 | return; | ||
113 | } | ||
114 | read_unlock(&eb->lock); | ||
84 | wait_event(eb->write_lock_wq, atomic_read(&eb->blocking_writers) == 0); | 115 | wait_event(eb->write_lock_wq, atomic_read(&eb->blocking_writers) == 0); |
85 | read_lock(&eb->lock); | 116 | read_lock(&eb->lock); |
86 | if (atomic_read(&eb->blocking_writers)) { | 117 | if (atomic_read(&eb->blocking_writers)) { |
87 | read_unlock(&eb->lock); | 118 | read_unlock(&eb->lock); |
88 | wait_event(eb->write_lock_wq, | ||
89 | atomic_read(&eb->blocking_writers) == 0); | ||
90 | goto again; | 119 | goto again; |
91 | } | 120 | } |
92 | atomic_inc(&eb->read_locks); | 121 | atomic_inc(&eb->read_locks); |
@@ -129,6 +158,7 @@ int btrfs_try_tree_write_lock(struct extent_buffer *eb) | |||
129 | } | 158 | } |
130 | atomic_inc(&eb->write_locks); | 159 | atomic_inc(&eb->write_locks); |
131 | atomic_inc(&eb->spinning_writers); | 160 | atomic_inc(&eb->spinning_writers); |
161 | eb->lock_owner = current->pid; | ||
132 | return 1; | 162 | return 1; |
133 | } | 163 | } |
134 | 164 | ||
@@ -137,6 +167,15 @@ int btrfs_try_tree_write_lock(struct extent_buffer *eb) | |||
137 | */ | 167 | */ |
138 | void btrfs_tree_read_unlock(struct extent_buffer *eb) | 168 | void btrfs_tree_read_unlock(struct extent_buffer *eb) |
139 | { | 169 | { |
170 | if (eb->lock_nested) { | ||
171 | read_lock(&eb->lock); | ||
172 | if (eb->lock_nested && current->pid == eb->lock_owner) { | ||
173 | eb->lock_nested = 0; | ||
174 | read_unlock(&eb->lock); | ||
175 | return; | ||
176 | } | ||
177 | read_unlock(&eb->lock); | ||
178 | } | ||
140 | btrfs_assert_tree_read_locked(eb); | 179 | btrfs_assert_tree_read_locked(eb); |
141 | WARN_ON(atomic_read(&eb->spinning_readers) == 0); | 180 | WARN_ON(atomic_read(&eb->spinning_readers) == 0); |
142 | atomic_dec(&eb->spinning_readers); | 181 | atomic_dec(&eb->spinning_readers); |
@@ -149,6 +188,15 @@ void btrfs_tree_read_unlock(struct extent_buffer *eb) | |||
149 | */ | 188 | */ |
150 | void btrfs_tree_read_unlock_blocking(struct extent_buffer *eb) | 189 | void btrfs_tree_read_unlock_blocking(struct extent_buffer *eb) |
151 | { | 190 | { |
191 | if (eb->lock_nested) { | ||
192 | read_lock(&eb->lock); | ||
193 | if (eb->lock_nested && current->pid == eb->lock_owner) { | ||
194 | eb->lock_nested = 0; | ||
195 | read_unlock(&eb->lock); | ||
196 | return; | ||
197 | } | ||
198 | read_unlock(&eb->lock); | ||
199 | } | ||
152 | btrfs_assert_tree_read_locked(eb); | 200 | btrfs_assert_tree_read_locked(eb); |
153 | WARN_ON(atomic_read(&eb->blocking_readers) == 0); | 201 | WARN_ON(atomic_read(&eb->blocking_readers) == 0); |
154 | if (atomic_dec_and_test(&eb->blocking_readers)) | 202 | if (atomic_dec_and_test(&eb->blocking_readers)) |
@@ -181,6 +229,7 @@ again: | |||
181 | WARN_ON(atomic_read(&eb->spinning_writers)); | 229 | WARN_ON(atomic_read(&eb->spinning_writers)); |
182 | atomic_inc(&eb->spinning_writers); | 230 | atomic_inc(&eb->spinning_writers); |
183 | atomic_inc(&eb->write_locks); | 231 | atomic_inc(&eb->write_locks); |
232 | eb->lock_owner = current->pid; | ||
184 | return 0; | 233 | return 0; |
185 | } | 234 | } |
186 | 235 | ||
diff --git a/fs/btrfs/relocation.c b/fs/btrfs/relocation.c index cfb55434a469..8c1aae2c845d 100644 --- a/fs/btrfs/relocation.c +++ b/fs/btrfs/relocation.c | |||
@@ -1604,12 +1604,12 @@ int replace_file_extents(struct btrfs_trans_handle *trans, | |||
1604 | ret = btrfs_inc_extent_ref(trans, root, new_bytenr, | 1604 | ret = btrfs_inc_extent_ref(trans, root, new_bytenr, |
1605 | num_bytes, parent, | 1605 | num_bytes, parent, |
1606 | btrfs_header_owner(leaf), | 1606 | btrfs_header_owner(leaf), |
1607 | key.objectid, key.offset); | 1607 | key.objectid, key.offset, 1); |
1608 | BUG_ON(ret); | 1608 | BUG_ON(ret); |
1609 | 1609 | ||
1610 | ret = btrfs_free_extent(trans, root, bytenr, num_bytes, | 1610 | ret = btrfs_free_extent(trans, root, bytenr, num_bytes, |
1611 | parent, btrfs_header_owner(leaf), | 1611 | parent, btrfs_header_owner(leaf), |
1612 | key.objectid, key.offset); | 1612 | key.objectid, key.offset, 1); |
1613 | BUG_ON(ret); | 1613 | BUG_ON(ret); |
1614 | } | 1614 | } |
1615 | if (dirty) | 1615 | if (dirty) |
@@ -1778,21 +1778,23 @@ again: | |||
1778 | 1778 | ||
1779 | ret = btrfs_inc_extent_ref(trans, src, old_bytenr, blocksize, | 1779 | ret = btrfs_inc_extent_ref(trans, src, old_bytenr, blocksize, |
1780 | path->nodes[level]->start, | 1780 | path->nodes[level]->start, |
1781 | src->root_key.objectid, level - 1, 0); | 1781 | src->root_key.objectid, level - 1, 0, |
1782 | 1); | ||
1782 | BUG_ON(ret); | 1783 | BUG_ON(ret); |
1783 | ret = btrfs_inc_extent_ref(trans, dest, new_bytenr, blocksize, | 1784 | ret = btrfs_inc_extent_ref(trans, dest, new_bytenr, blocksize, |
1784 | 0, dest->root_key.objectid, level - 1, | 1785 | 0, dest->root_key.objectid, level - 1, |
1785 | 0); | 1786 | 0, 1); |
1786 | BUG_ON(ret); | 1787 | BUG_ON(ret); |
1787 | 1788 | ||
1788 | ret = btrfs_free_extent(trans, src, new_bytenr, blocksize, | 1789 | ret = btrfs_free_extent(trans, src, new_bytenr, blocksize, |
1789 | path->nodes[level]->start, | 1790 | path->nodes[level]->start, |
1790 | src->root_key.objectid, level - 1, 0); | 1791 | src->root_key.objectid, level - 1, 0, |
1792 | 1); | ||
1791 | BUG_ON(ret); | 1793 | BUG_ON(ret); |
1792 | 1794 | ||
1793 | ret = btrfs_free_extent(trans, dest, old_bytenr, blocksize, | 1795 | ret = btrfs_free_extent(trans, dest, old_bytenr, blocksize, |
1794 | 0, dest->root_key.objectid, level - 1, | 1796 | 0, dest->root_key.objectid, level - 1, |
1795 | 0); | 1797 | 0, 1); |
1796 | BUG_ON(ret); | 1798 | BUG_ON(ret); |
1797 | 1799 | ||
1798 | btrfs_unlock_up_safe(path, 0); | 1800 | btrfs_unlock_up_safe(path, 0); |
@@ -2244,7 +2246,7 @@ again: | |||
2244 | } else { | 2246 | } else { |
2245 | list_del_init(&reloc_root->root_list); | 2247 | list_del_init(&reloc_root->root_list); |
2246 | } | 2248 | } |
2247 | btrfs_drop_snapshot(reloc_root, rc->block_rsv, 0); | 2249 | btrfs_drop_snapshot(reloc_root, rc->block_rsv, 0, 1); |
2248 | } | 2250 | } |
2249 | 2251 | ||
2250 | if (found) { | 2252 | if (found) { |
@@ -2558,7 +2560,7 @@ static int do_relocation(struct btrfs_trans_handle *trans, | |||
2558 | node->eb->start, blocksize, | 2560 | node->eb->start, blocksize, |
2559 | upper->eb->start, | 2561 | upper->eb->start, |
2560 | btrfs_header_owner(upper->eb), | 2562 | btrfs_header_owner(upper->eb), |
2561 | node->level, 0); | 2563 | node->level, 0, 1); |
2562 | BUG_ON(ret); | 2564 | BUG_ON(ret); |
2563 | 2565 | ||
2564 | ret = btrfs_drop_subtree(trans, root, eb, upper->eb); | 2566 | ret = btrfs_drop_subtree(trans, root, eb, upper->eb); |
@@ -2947,9 +2949,7 @@ static int relocate_file_extent_cluster(struct inode *inode, | |||
2947 | index = (cluster->start - offset) >> PAGE_CACHE_SHIFT; | 2949 | index = (cluster->start - offset) >> PAGE_CACHE_SHIFT; |
2948 | last_index = (cluster->end - offset) >> PAGE_CACHE_SHIFT; | 2950 | last_index = (cluster->end - offset) >> PAGE_CACHE_SHIFT; |
2949 | while (index <= last_index) { | 2951 | while (index <= last_index) { |
2950 | mutex_lock(&inode->i_mutex); | ||
2951 | ret = btrfs_delalloc_reserve_metadata(inode, PAGE_CACHE_SIZE); | 2952 | ret = btrfs_delalloc_reserve_metadata(inode, PAGE_CACHE_SIZE); |
2952 | mutex_unlock(&inode->i_mutex); | ||
2953 | if (ret) | 2953 | if (ret) |
2954 | goto out; | 2954 | goto out; |
2955 | 2955 | ||
diff --git a/fs/btrfs/scrub.c b/fs/btrfs/scrub.c index ddf2c90d3fc0..9770cc5bfb76 100644 --- a/fs/btrfs/scrub.c +++ b/fs/btrfs/scrub.c | |||
@@ -25,6 +25,7 @@ | |||
25 | #include "transaction.h" | 25 | #include "transaction.h" |
26 | #include "backref.h" | 26 | #include "backref.h" |
27 | #include "extent_io.h" | 27 | #include "extent_io.h" |
28 | #include "check-integrity.h" | ||
28 | 29 | ||
29 | /* | 30 | /* |
30 | * This is only the first step towards a full-features scrub. It reads all | 31 | * This is only the first step towards a full-features scrub. It reads all |
@@ -309,7 +310,7 @@ static void scrub_print_warning(const char *errstr, struct scrub_bio *sbio, | |||
309 | u8 ref_level; | 310 | u8 ref_level; |
310 | unsigned long ptr = 0; | 311 | unsigned long ptr = 0; |
311 | const int bufsize = 4096; | 312 | const int bufsize = 4096; |
312 | u64 extent_offset; | 313 | u64 extent_item_pos; |
313 | 314 | ||
314 | path = btrfs_alloc_path(); | 315 | path = btrfs_alloc_path(); |
315 | 316 | ||
@@ -329,12 +330,13 @@ static void scrub_print_warning(const char *errstr, struct scrub_bio *sbio, | |||
329 | if (ret < 0) | 330 | if (ret < 0) |
330 | goto out; | 331 | goto out; |
331 | 332 | ||
332 | extent_offset = swarn.logical - found_key.objectid; | 333 | extent_item_pos = swarn.logical - found_key.objectid; |
333 | swarn.extent_item_size = found_key.offset; | 334 | swarn.extent_item_size = found_key.offset; |
334 | 335 | ||
335 | eb = path->nodes[0]; | 336 | eb = path->nodes[0]; |
336 | ei = btrfs_item_ptr(eb, path->slots[0], struct btrfs_extent_item); | 337 | ei = btrfs_item_ptr(eb, path->slots[0], struct btrfs_extent_item); |
337 | item_size = btrfs_item_size_nr(eb, path->slots[0]); | 338 | item_size = btrfs_item_size_nr(eb, path->slots[0]); |
339 | btrfs_release_path(path); | ||
338 | 340 | ||
339 | if (ret & BTRFS_EXTENT_FLAG_TREE_BLOCK) { | 341 | if (ret & BTRFS_EXTENT_FLAG_TREE_BLOCK) { |
340 | do { | 342 | do { |
@@ -351,7 +353,7 @@ static void scrub_print_warning(const char *errstr, struct scrub_bio *sbio, | |||
351 | } else { | 353 | } else { |
352 | swarn.path = path; | 354 | swarn.path = path; |
353 | iterate_extent_inodes(fs_info, path, found_key.objectid, | 355 | iterate_extent_inodes(fs_info, path, found_key.objectid, |
354 | extent_offset, | 356 | extent_item_pos, |
355 | scrub_print_warning_inode, &swarn); | 357 | scrub_print_warning_inode, &swarn); |
356 | } | 358 | } |
357 | 359 | ||
@@ -732,7 +734,7 @@ static int scrub_fixup_io(int rw, struct block_device *bdev, sector_t sector, | |||
732 | bio_add_page(bio, page, PAGE_SIZE, 0); | 734 | bio_add_page(bio, page, PAGE_SIZE, 0); |
733 | bio->bi_end_io = scrub_fixup_end_io; | 735 | bio->bi_end_io = scrub_fixup_end_io; |
734 | bio->bi_private = &complete; | 736 | bio->bi_private = &complete; |
735 | submit_bio(rw, bio); | 737 | btrfsic_submit_bio(rw, bio); |
736 | 738 | ||
737 | /* this will also unplug the queue */ | 739 | /* this will also unplug the queue */ |
738 | wait_for_completion(&complete); | 740 | wait_for_completion(&complete); |
@@ -958,7 +960,7 @@ static int scrub_submit(struct scrub_dev *sdev) | |||
958 | sdev->curr = -1; | 960 | sdev->curr = -1; |
959 | atomic_inc(&sdev->in_flight); | 961 | atomic_inc(&sdev->in_flight); |
960 | 962 | ||
961 | submit_bio(READ, sbio->bio); | 963 | btrfsic_submit_bio(READ, sbio->bio); |
962 | 964 | ||
963 | return 0; | 965 | return 0; |
964 | } | 966 | } |
diff --git a/fs/btrfs/super.c b/fs/btrfs/super.c index ae488aa1966a..3ce97b217cbe 100644 --- a/fs/btrfs/super.c +++ b/fs/btrfs/super.c | |||
@@ -147,13 +147,13 @@ void __btrfs_std_error(struct btrfs_fs_info *fs_info, const char *function, | |||
147 | 147 | ||
148 | static void btrfs_put_super(struct super_block *sb) | 148 | static void btrfs_put_super(struct super_block *sb) |
149 | { | 149 | { |
150 | struct btrfs_root *root = btrfs_sb(sb); | 150 | (void)close_ctree(btrfs_sb(sb)->tree_root); |
151 | int ret; | 151 | /* FIXME: need to fix VFS to return error? */ |
152 | 152 | /* AV: return it _where_? ->put_super() can be triggered by any number | |
153 | ret = close_ctree(root); | 153 | * of async events, up to and including delivery of SIGKILL to the |
154 | sb->s_fs_info = NULL; | 154 | * last process that kept it busy. Or segfault in the aforementioned |
155 | 155 | * process... Whom would you report that to? | |
156 | (void)ret; /* FIXME: need to fix VFS to return error? */ | 156 | */ |
157 | } | 157 | } |
158 | 158 | ||
159 | enum { | 159 | enum { |
@@ -163,8 +163,11 @@ enum { | |||
163 | Opt_compress_type, Opt_compress_force, Opt_compress_force_type, | 163 | Opt_compress_type, Opt_compress_force, Opt_compress_force_type, |
164 | Opt_notreelog, Opt_ratio, Opt_flushoncommit, Opt_discard, | 164 | Opt_notreelog, Opt_ratio, Opt_flushoncommit, Opt_discard, |
165 | Opt_space_cache, Opt_clear_cache, Opt_user_subvol_rm_allowed, | 165 | Opt_space_cache, Opt_clear_cache, Opt_user_subvol_rm_allowed, |
166 | Opt_enospc_debug, Opt_subvolrootid, Opt_defrag, | 166 | Opt_enospc_debug, Opt_subvolrootid, Opt_defrag, Opt_inode_cache, |
167 | Opt_inode_cache, Opt_no_space_cache, Opt_recovery, Opt_err, | 167 | Opt_no_space_cache, Opt_recovery, Opt_skip_balance, |
168 | Opt_check_integrity, Opt_check_integrity_including_extent_data, | ||
169 | Opt_check_integrity_print_mask, | ||
170 | Opt_err, | ||
168 | }; | 171 | }; |
169 | 172 | ||
170 | static match_table_t tokens = { | 173 | static match_table_t tokens = { |
@@ -199,6 +202,10 @@ static match_table_t tokens = { | |||
199 | {Opt_inode_cache, "inode_cache"}, | 202 | {Opt_inode_cache, "inode_cache"}, |
200 | {Opt_no_space_cache, "nospace_cache"}, | 203 | {Opt_no_space_cache, "nospace_cache"}, |
201 | {Opt_recovery, "recovery"}, | 204 | {Opt_recovery, "recovery"}, |
205 | {Opt_skip_balance, "skip_balance"}, | ||
206 | {Opt_check_integrity, "check_int"}, | ||
207 | {Opt_check_integrity_including_extent_data, "check_int_data"}, | ||
208 | {Opt_check_integrity_print_mask, "check_int_print_mask=%d"}, | ||
202 | {Opt_err, NULL}, | 209 | {Opt_err, NULL}, |
203 | }; | 210 | }; |
204 | 211 | ||
@@ -397,6 +404,40 @@ int btrfs_parse_options(struct btrfs_root *root, char *options) | |||
397 | printk(KERN_INFO "btrfs: enabling auto recovery"); | 404 | printk(KERN_INFO "btrfs: enabling auto recovery"); |
398 | btrfs_set_opt(info->mount_opt, RECOVERY); | 405 | btrfs_set_opt(info->mount_opt, RECOVERY); |
399 | break; | 406 | break; |
407 | case Opt_skip_balance: | ||
408 | btrfs_set_opt(info->mount_opt, SKIP_BALANCE); | ||
409 | break; | ||
410 | #ifdef CONFIG_BTRFS_FS_CHECK_INTEGRITY | ||
411 | case Opt_check_integrity_including_extent_data: | ||
412 | printk(KERN_INFO "btrfs: enabling check integrity" | ||
413 | " including extent data\n"); | ||
414 | btrfs_set_opt(info->mount_opt, | ||
415 | CHECK_INTEGRITY_INCLUDING_EXTENT_DATA); | ||
416 | btrfs_set_opt(info->mount_opt, CHECK_INTEGRITY); | ||
417 | break; | ||
418 | case Opt_check_integrity: | ||
419 | printk(KERN_INFO "btrfs: enabling check integrity\n"); | ||
420 | btrfs_set_opt(info->mount_opt, CHECK_INTEGRITY); | ||
421 | break; | ||
422 | case Opt_check_integrity_print_mask: | ||
423 | intarg = 0; | ||
424 | match_int(&args[0], &intarg); | ||
425 | if (intarg) { | ||
426 | info->check_integrity_print_mask = intarg; | ||
427 | printk(KERN_INFO "btrfs:" | ||
428 | " check_integrity_print_mask 0x%x\n", | ||
429 | info->check_integrity_print_mask); | ||
430 | } | ||
431 | break; | ||
432 | #else | ||
433 | case Opt_check_integrity_including_extent_data: | ||
434 | case Opt_check_integrity: | ||
435 | case Opt_check_integrity_print_mask: | ||
436 | printk(KERN_ERR "btrfs: support for check_integrity*" | ||
437 | " not compiled in!\n"); | ||
438 | ret = -EINVAL; | ||
439 | goto out; | ||
440 | #endif | ||
400 | case Opt_err: | 441 | case Opt_err: |
401 | printk(KERN_INFO "btrfs: unrecognized mount option " | 442 | printk(KERN_INFO "btrfs: unrecognized mount option " |
402 | "'%s'\n", p); | 443 | "'%s'\n", p); |
@@ -500,7 +541,8 @@ out: | |||
500 | static struct dentry *get_default_root(struct super_block *sb, | 541 | static struct dentry *get_default_root(struct super_block *sb, |
501 | u64 subvol_objectid) | 542 | u64 subvol_objectid) |
502 | { | 543 | { |
503 | struct btrfs_root *root = sb->s_fs_info; | 544 | struct btrfs_fs_info *fs_info = btrfs_sb(sb); |
545 | struct btrfs_root *root = fs_info->tree_root; | ||
504 | struct btrfs_root *new_root; | 546 | struct btrfs_root *new_root; |
505 | struct btrfs_dir_item *di; | 547 | struct btrfs_dir_item *di; |
506 | struct btrfs_path *path; | 548 | struct btrfs_path *path; |
@@ -530,7 +572,7 @@ static struct dentry *get_default_root(struct super_block *sb, | |||
530 | * will mount by default if we haven't been given a specific subvolume | 572 | * will mount by default if we haven't been given a specific subvolume |
531 | * to mount. | 573 | * to mount. |
532 | */ | 574 | */ |
533 | dir_id = btrfs_super_root_dir(root->fs_info->super_copy); | 575 | dir_id = btrfs_super_root_dir(fs_info->super_copy); |
534 | di = btrfs_lookup_dir_item(NULL, root, path, dir_id, "default", 7, 0); | 576 | di = btrfs_lookup_dir_item(NULL, root, path, dir_id, "default", 7, 0); |
535 | if (IS_ERR(di)) { | 577 | if (IS_ERR(di)) { |
536 | btrfs_free_path(path); | 578 | btrfs_free_path(path); |
@@ -544,7 +586,7 @@ static struct dentry *get_default_root(struct super_block *sb, | |||
544 | */ | 586 | */ |
545 | btrfs_free_path(path); | 587 | btrfs_free_path(path); |
546 | dir_id = BTRFS_FIRST_FREE_OBJECTID; | 588 | dir_id = BTRFS_FIRST_FREE_OBJECTID; |
547 | new_root = root->fs_info->fs_root; | 589 | new_root = fs_info->fs_root; |
548 | goto setup_root; | 590 | goto setup_root; |
549 | } | 591 | } |
550 | 592 | ||
@@ -552,7 +594,7 @@ static struct dentry *get_default_root(struct super_block *sb, | |||
552 | btrfs_free_path(path); | 594 | btrfs_free_path(path); |
553 | 595 | ||
554 | find_root: | 596 | find_root: |
555 | new_root = btrfs_read_fs_root_no_name(root->fs_info, &location); | 597 | new_root = btrfs_read_fs_root_no_name(fs_info, &location); |
556 | if (IS_ERR(new_root)) | 598 | if (IS_ERR(new_root)) |
557 | return ERR_CAST(new_root); | 599 | return ERR_CAST(new_root); |
558 | 600 | ||
@@ -588,7 +630,7 @@ static int btrfs_fill_super(struct super_block *sb, | |||
588 | { | 630 | { |
589 | struct inode *inode; | 631 | struct inode *inode; |
590 | struct dentry *root_dentry; | 632 | struct dentry *root_dentry; |
591 | struct btrfs_root *tree_root; | 633 | struct btrfs_fs_info *fs_info = btrfs_sb(sb); |
592 | struct btrfs_key key; | 634 | struct btrfs_key key; |
593 | int err; | 635 | int err; |
594 | 636 | ||
@@ -603,18 +645,16 @@ static int btrfs_fill_super(struct super_block *sb, | |||
603 | sb->s_flags |= MS_POSIXACL; | 645 | sb->s_flags |= MS_POSIXACL; |
604 | #endif | 646 | #endif |
605 | 647 | ||
606 | tree_root = open_ctree(sb, fs_devices, (char *)data); | 648 | err = open_ctree(sb, fs_devices, (char *)data); |
607 | 649 | if (err) { | |
608 | if (IS_ERR(tree_root)) { | ||
609 | printk("btrfs: open_ctree failed\n"); | 650 | printk("btrfs: open_ctree failed\n"); |
610 | return PTR_ERR(tree_root); | 651 | return err; |
611 | } | 652 | } |
612 | sb->s_fs_info = tree_root; | ||
613 | 653 | ||
614 | key.objectid = BTRFS_FIRST_FREE_OBJECTID; | 654 | key.objectid = BTRFS_FIRST_FREE_OBJECTID; |
615 | key.type = BTRFS_INODE_ITEM_KEY; | 655 | key.type = BTRFS_INODE_ITEM_KEY; |
616 | key.offset = 0; | 656 | key.offset = 0; |
617 | inode = btrfs_iget(sb, &key, tree_root->fs_info->fs_root, NULL); | 657 | inode = btrfs_iget(sb, &key, fs_info->fs_root, NULL); |
618 | if (IS_ERR(inode)) { | 658 | if (IS_ERR(inode)) { |
619 | err = PTR_ERR(inode); | 659 | err = PTR_ERR(inode); |
620 | goto fail_close; | 660 | goto fail_close; |
@@ -631,23 +671,25 @@ static int btrfs_fill_super(struct super_block *sb, | |||
631 | 671 | ||
632 | save_mount_options(sb, data); | 672 | save_mount_options(sb, data); |
633 | cleancache_init_fs(sb); | 673 | cleancache_init_fs(sb); |
674 | sb->s_flags |= MS_ACTIVE; | ||
634 | return 0; | 675 | return 0; |
635 | 676 | ||
636 | fail_close: | 677 | fail_close: |
637 | close_ctree(tree_root); | 678 | close_ctree(fs_info->tree_root); |
638 | return err; | 679 | return err; |
639 | } | 680 | } |
640 | 681 | ||
641 | int btrfs_sync_fs(struct super_block *sb, int wait) | 682 | int btrfs_sync_fs(struct super_block *sb, int wait) |
642 | { | 683 | { |
643 | struct btrfs_trans_handle *trans; | 684 | struct btrfs_trans_handle *trans; |
644 | struct btrfs_root *root = btrfs_sb(sb); | 685 | struct btrfs_fs_info *fs_info = btrfs_sb(sb); |
686 | struct btrfs_root *root = fs_info->tree_root; | ||
645 | int ret; | 687 | int ret; |
646 | 688 | ||
647 | trace_btrfs_sync_fs(wait); | 689 | trace_btrfs_sync_fs(wait); |
648 | 690 | ||
649 | if (!wait) { | 691 | if (!wait) { |
650 | filemap_flush(root->fs_info->btree_inode->i_mapping); | 692 | filemap_flush(fs_info->btree_inode->i_mapping); |
651 | return 0; | 693 | return 0; |
652 | } | 694 | } |
653 | 695 | ||
@@ -663,8 +705,8 @@ int btrfs_sync_fs(struct super_block *sb, int wait) | |||
663 | 705 | ||
664 | static int btrfs_show_options(struct seq_file *seq, struct dentry *dentry) | 706 | static int btrfs_show_options(struct seq_file *seq, struct dentry *dentry) |
665 | { | 707 | { |
666 | struct btrfs_root *root = btrfs_sb(dentry->d_sb); | 708 | struct btrfs_fs_info *info = btrfs_sb(dentry->d_sb); |
667 | struct btrfs_fs_info *info = root->fs_info; | 709 | struct btrfs_root *root = info->tree_root; |
668 | char *compress_type; | 710 | char *compress_type; |
669 | 711 | ||
670 | if (btrfs_test_opt(root, DEGRADED)) | 712 | if (btrfs_test_opt(root, DEGRADED)) |
@@ -722,28 +764,25 @@ static int btrfs_show_options(struct seq_file *seq, struct dentry *dentry) | |||
722 | seq_puts(seq, ",autodefrag"); | 764 | seq_puts(seq, ",autodefrag"); |
723 | if (btrfs_test_opt(root, INODE_MAP_CACHE)) | 765 | if (btrfs_test_opt(root, INODE_MAP_CACHE)) |
724 | seq_puts(seq, ",inode_cache"); | 766 | seq_puts(seq, ",inode_cache"); |
767 | if (btrfs_test_opt(root, SKIP_BALANCE)) | ||
768 | seq_puts(seq, ",skip_balance"); | ||
725 | return 0; | 769 | return 0; |
726 | } | 770 | } |
727 | 771 | ||
728 | static int btrfs_test_super(struct super_block *s, void *data) | 772 | static int btrfs_test_super(struct super_block *s, void *data) |
729 | { | 773 | { |
730 | struct btrfs_root *test_root = data; | 774 | struct btrfs_fs_info *p = data; |
731 | struct btrfs_root *root = btrfs_sb(s); | 775 | struct btrfs_fs_info *fs_info = btrfs_sb(s); |
732 | 776 | ||
733 | /* | 777 | return fs_info->fs_devices == p->fs_devices; |
734 | * If this super block is going away, return false as it | ||
735 | * can't match as an existing super block. | ||
736 | */ | ||
737 | if (!atomic_read(&s->s_active)) | ||
738 | return 0; | ||
739 | return root->fs_info->fs_devices == test_root->fs_info->fs_devices; | ||
740 | } | 778 | } |
741 | 779 | ||
742 | static int btrfs_set_super(struct super_block *s, void *data) | 780 | static int btrfs_set_super(struct super_block *s, void *data) |
743 | { | 781 | { |
744 | s->s_fs_info = data; | 782 | int err = set_anon_super(s, data); |
745 | 783 | if (!err) | |
746 | return set_anon_super(s, data); | 784 | s->s_fs_info = data; |
785 | return err; | ||
747 | } | 786 | } |
748 | 787 | ||
749 | /* | 788 | /* |
@@ -903,12 +942,6 @@ static struct dentry *btrfs_mount(struct file_system_type *fs_type, int flags, | |||
903 | if (!fs_info) | 942 | if (!fs_info) |
904 | return ERR_PTR(-ENOMEM); | 943 | return ERR_PTR(-ENOMEM); |
905 | 944 | ||
906 | fs_info->tree_root = kzalloc(sizeof(struct btrfs_root), GFP_NOFS); | ||
907 | if (!fs_info->tree_root) { | ||
908 | error = -ENOMEM; | ||
909 | goto error_fs_info; | ||
910 | } | ||
911 | fs_info->tree_root->fs_info = fs_info; | ||
912 | fs_info->fs_devices = fs_devices; | 945 | fs_info->fs_devices = fs_devices; |
913 | 946 | ||
914 | fs_info->super_copy = kzalloc(BTRFS_SUPER_INFO_SIZE, GFP_NOFS); | 947 | fs_info->super_copy = kzalloc(BTRFS_SUPER_INFO_SIZE, GFP_NOFS); |
@@ -928,43 +961,30 @@ static struct dentry *btrfs_mount(struct file_system_type *fs_type, int flags, | |||
928 | } | 961 | } |
929 | 962 | ||
930 | bdev = fs_devices->latest_bdev; | 963 | bdev = fs_devices->latest_bdev; |
931 | s = sget(fs_type, btrfs_test_super, btrfs_set_super, | 964 | s = sget(fs_type, btrfs_test_super, btrfs_set_super, fs_info); |
932 | fs_info->tree_root); | ||
933 | if (IS_ERR(s)) { | 965 | if (IS_ERR(s)) { |
934 | error = PTR_ERR(s); | 966 | error = PTR_ERR(s); |
935 | goto error_close_devices; | 967 | goto error_close_devices; |
936 | } | 968 | } |
937 | 969 | ||
938 | if (s->s_root) { | 970 | if (s->s_root) { |
939 | if ((flags ^ s->s_flags) & MS_RDONLY) { | ||
940 | deactivate_locked_super(s); | ||
941 | error = -EBUSY; | ||
942 | goto error_close_devices; | ||
943 | } | ||
944 | |||
945 | btrfs_close_devices(fs_devices); | 971 | btrfs_close_devices(fs_devices); |
946 | free_fs_info(fs_info); | 972 | free_fs_info(fs_info); |
973 | if ((flags ^ s->s_flags) & MS_RDONLY) | ||
974 | error = -EBUSY; | ||
947 | } else { | 975 | } else { |
948 | char b[BDEVNAME_SIZE]; | 976 | char b[BDEVNAME_SIZE]; |
949 | 977 | ||
950 | s->s_flags = flags | MS_NOSEC; | 978 | s->s_flags = flags | MS_NOSEC; |
951 | strlcpy(s->s_id, bdevname(bdev, b), sizeof(s->s_id)); | 979 | strlcpy(s->s_id, bdevname(bdev, b), sizeof(s->s_id)); |
952 | btrfs_sb(s)->fs_info->bdev_holder = fs_type; | 980 | btrfs_sb(s)->bdev_holder = fs_type; |
953 | error = btrfs_fill_super(s, fs_devices, data, | 981 | error = btrfs_fill_super(s, fs_devices, data, |
954 | flags & MS_SILENT ? 1 : 0); | 982 | flags & MS_SILENT ? 1 : 0); |
955 | if (error) { | ||
956 | deactivate_locked_super(s); | ||
957 | return ERR_PTR(error); | ||
958 | } | ||
959 | |||
960 | s->s_flags |= MS_ACTIVE; | ||
961 | } | 983 | } |
962 | 984 | ||
963 | root = get_default_root(s, subvol_objectid); | 985 | root = !error ? get_default_root(s, subvol_objectid) : ERR_PTR(error); |
964 | if (IS_ERR(root)) { | 986 | if (IS_ERR(root)) |
965 | deactivate_locked_super(s); | 987 | deactivate_locked_super(s); |
966 | return root; | ||
967 | } | ||
968 | 988 | ||
969 | return root; | 989 | return root; |
970 | 990 | ||
@@ -977,7 +997,8 @@ error_fs_info: | |||
977 | 997 | ||
978 | static int btrfs_remount(struct super_block *sb, int *flags, char *data) | 998 | static int btrfs_remount(struct super_block *sb, int *flags, char *data) |
979 | { | 999 | { |
980 | struct btrfs_root *root = btrfs_sb(sb); | 1000 | struct btrfs_fs_info *fs_info = btrfs_sb(sb); |
1001 | struct btrfs_root *root = fs_info->tree_root; | ||
981 | int ret; | 1002 | int ret; |
982 | 1003 | ||
983 | ret = btrfs_parse_options(root, data); | 1004 | ret = btrfs_parse_options(root, data); |
@@ -993,13 +1014,13 @@ static int btrfs_remount(struct super_block *sb, int *flags, char *data) | |||
993 | ret = btrfs_commit_super(root); | 1014 | ret = btrfs_commit_super(root); |
994 | WARN_ON(ret); | 1015 | WARN_ON(ret); |
995 | } else { | 1016 | } else { |
996 | if (root->fs_info->fs_devices->rw_devices == 0) | 1017 | if (fs_info->fs_devices->rw_devices == 0) |
997 | return -EACCES; | 1018 | return -EACCES; |
998 | 1019 | ||
999 | if (btrfs_super_log_root(root->fs_info->super_copy) != 0) | 1020 | if (btrfs_super_log_root(fs_info->super_copy) != 0) |
1000 | return -EINVAL; | 1021 | return -EINVAL; |
1001 | 1022 | ||
1002 | ret = btrfs_cleanup_fs_roots(root->fs_info); | 1023 | ret = btrfs_cleanup_fs_roots(fs_info); |
1003 | WARN_ON(ret); | 1024 | WARN_ON(ret); |
1004 | 1025 | ||
1005 | /* recover relocation */ | 1026 | /* recover relocation */ |
@@ -1168,18 +1189,18 @@ static int btrfs_calc_avail_data_space(struct btrfs_root *root, u64 *free_bytes) | |||
1168 | 1189 | ||
1169 | static int btrfs_statfs(struct dentry *dentry, struct kstatfs *buf) | 1190 | static int btrfs_statfs(struct dentry *dentry, struct kstatfs *buf) |
1170 | { | 1191 | { |
1171 | struct btrfs_root *root = btrfs_sb(dentry->d_sb); | 1192 | struct btrfs_fs_info *fs_info = btrfs_sb(dentry->d_sb); |
1172 | struct btrfs_super_block *disk_super = root->fs_info->super_copy; | 1193 | struct btrfs_super_block *disk_super = fs_info->super_copy; |
1173 | struct list_head *head = &root->fs_info->space_info; | 1194 | struct list_head *head = &fs_info->space_info; |
1174 | struct btrfs_space_info *found; | 1195 | struct btrfs_space_info *found; |
1175 | u64 total_used = 0; | 1196 | u64 total_used = 0; |
1176 | u64 total_free_data = 0; | 1197 | u64 total_free_data = 0; |
1177 | int bits = dentry->d_sb->s_blocksize_bits; | 1198 | int bits = dentry->d_sb->s_blocksize_bits; |
1178 | __be32 *fsid = (__be32 *)root->fs_info->fsid; | 1199 | __be32 *fsid = (__be32 *)fs_info->fsid; |
1179 | int ret; | 1200 | int ret; |
1180 | 1201 | ||
1181 | /* holding chunk_muext to avoid allocating new chunks */ | 1202 | /* holding chunk_muext to avoid allocating new chunks */ |
1182 | mutex_lock(&root->fs_info->chunk_mutex); | 1203 | mutex_lock(&fs_info->chunk_mutex); |
1183 | rcu_read_lock(); | 1204 | rcu_read_lock(); |
1184 | list_for_each_entry_rcu(found, head, list) { | 1205 | list_for_each_entry_rcu(found, head, list) { |
1185 | if (found->flags & BTRFS_BLOCK_GROUP_DATA) { | 1206 | if (found->flags & BTRFS_BLOCK_GROUP_DATA) { |
@@ -1198,14 +1219,14 @@ static int btrfs_statfs(struct dentry *dentry, struct kstatfs *buf) | |||
1198 | buf->f_bsize = dentry->d_sb->s_blocksize; | 1219 | buf->f_bsize = dentry->d_sb->s_blocksize; |
1199 | buf->f_type = BTRFS_SUPER_MAGIC; | 1220 | buf->f_type = BTRFS_SUPER_MAGIC; |
1200 | buf->f_bavail = total_free_data; | 1221 | buf->f_bavail = total_free_data; |
1201 | ret = btrfs_calc_avail_data_space(root, &total_free_data); | 1222 | ret = btrfs_calc_avail_data_space(fs_info->tree_root, &total_free_data); |
1202 | if (ret) { | 1223 | if (ret) { |
1203 | mutex_unlock(&root->fs_info->chunk_mutex); | 1224 | mutex_unlock(&fs_info->chunk_mutex); |
1204 | return ret; | 1225 | return ret; |
1205 | } | 1226 | } |
1206 | buf->f_bavail += total_free_data; | 1227 | buf->f_bavail += total_free_data; |
1207 | buf->f_bavail = buf->f_bavail >> bits; | 1228 | buf->f_bavail = buf->f_bavail >> bits; |
1208 | mutex_unlock(&root->fs_info->chunk_mutex); | 1229 | mutex_unlock(&fs_info->chunk_mutex); |
1209 | 1230 | ||
1210 | /* We treat it as constant endianness (it doesn't matter _which_) | 1231 | /* We treat it as constant endianness (it doesn't matter _which_) |
1211 | because we want the fsid to come out the same whether mounted | 1232 | because we want the fsid to come out the same whether mounted |
@@ -1219,11 +1240,18 @@ static int btrfs_statfs(struct dentry *dentry, struct kstatfs *buf) | |||
1219 | return 0; | 1240 | return 0; |
1220 | } | 1241 | } |
1221 | 1242 | ||
1243 | static void btrfs_kill_super(struct super_block *sb) | ||
1244 | { | ||
1245 | struct btrfs_fs_info *fs_info = btrfs_sb(sb); | ||
1246 | kill_anon_super(sb); | ||
1247 | free_fs_info(fs_info); | ||
1248 | } | ||
1249 | |||
1222 | static struct file_system_type btrfs_fs_type = { | 1250 | static struct file_system_type btrfs_fs_type = { |
1223 | .owner = THIS_MODULE, | 1251 | .owner = THIS_MODULE, |
1224 | .name = "btrfs", | 1252 | .name = "btrfs", |
1225 | .mount = btrfs_mount, | 1253 | .mount = btrfs_mount, |
1226 | .kill_sb = kill_anon_super, | 1254 | .kill_sb = btrfs_kill_super, |
1227 | .fs_flags = FS_REQUIRES_DEV, | 1255 | .fs_flags = FS_REQUIRES_DEV, |
1228 | }; | 1256 | }; |
1229 | 1257 | ||
@@ -1257,17 +1285,17 @@ static long btrfs_control_ioctl(struct file *file, unsigned int cmd, | |||
1257 | 1285 | ||
1258 | static int btrfs_freeze(struct super_block *sb) | 1286 | static int btrfs_freeze(struct super_block *sb) |
1259 | { | 1287 | { |
1260 | struct btrfs_root *root = btrfs_sb(sb); | 1288 | struct btrfs_fs_info *fs_info = btrfs_sb(sb); |
1261 | mutex_lock(&root->fs_info->transaction_kthread_mutex); | 1289 | mutex_lock(&fs_info->transaction_kthread_mutex); |
1262 | mutex_lock(&root->fs_info->cleaner_mutex); | 1290 | mutex_lock(&fs_info->cleaner_mutex); |
1263 | return 0; | 1291 | return 0; |
1264 | } | 1292 | } |
1265 | 1293 | ||
1266 | static int btrfs_unfreeze(struct super_block *sb) | 1294 | static int btrfs_unfreeze(struct super_block *sb) |
1267 | { | 1295 | { |
1268 | struct btrfs_root *root = btrfs_sb(sb); | 1296 | struct btrfs_fs_info *fs_info = btrfs_sb(sb); |
1269 | mutex_unlock(&root->fs_info->cleaner_mutex); | 1297 | mutex_unlock(&fs_info->cleaner_mutex); |
1270 | mutex_unlock(&root->fs_info->transaction_kthread_mutex); | 1298 | mutex_unlock(&fs_info->transaction_kthread_mutex); |
1271 | return 0; | 1299 | return 0; |
1272 | } | 1300 | } |
1273 | 1301 | ||
diff --git a/fs/btrfs/transaction.c b/fs/btrfs/transaction.c index 81376d94cd3c..287a6728b1ad 100644 --- a/fs/btrfs/transaction.c +++ b/fs/btrfs/transaction.c | |||
@@ -36,6 +36,8 @@ static noinline void put_transaction(struct btrfs_transaction *transaction) | |||
36 | WARN_ON(atomic_read(&transaction->use_count) == 0); | 36 | WARN_ON(atomic_read(&transaction->use_count) == 0); |
37 | if (atomic_dec_and_test(&transaction->use_count)) { | 37 | if (atomic_dec_and_test(&transaction->use_count)) { |
38 | BUG_ON(!list_empty(&transaction->list)); | 38 | BUG_ON(!list_empty(&transaction->list)); |
39 | WARN_ON(transaction->delayed_refs.root.rb_node); | ||
40 | WARN_ON(!list_empty(&transaction->delayed_refs.seq_head)); | ||
39 | memset(transaction, 0, sizeof(*transaction)); | 41 | memset(transaction, 0, sizeof(*transaction)); |
40 | kmem_cache_free(btrfs_transaction_cachep, transaction); | 42 | kmem_cache_free(btrfs_transaction_cachep, transaction); |
41 | } | 43 | } |
@@ -108,8 +110,11 @@ loop: | |||
108 | cur_trans->delayed_refs.num_heads = 0; | 110 | cur_trans->delayed_refs.num_heads = 0; |
109 | cur_trans->delayed_refs.flushing = 0; | 111 | cur_trans->delayed_refs.flushing = 0; |
110 | cur_trans->delayed_refs.run_delayed_start = 0; | 112 | cur_trans->delayed_refs.run_delayed_start = 0; |
113 | cur_trans->delayed_refs.seq = 1; | ||
114 | init_waitqueue_head(&cur_trans->delayed_refs.seq_wait); | ||
111 | spin_lock_init(&cur_trans->commit_lock); | 115 | spin_lock_init(&cur_trans->commit_lock); |
112 | spin_lock_init(&cur_trans->delayed_refs.lock); | 116 | spin_lock_init(&cur_trans->delayed_refs.lock); |
117 | INIT_LIST_HEAD(&cur_trans->delayed_refs.seq_head); | ||
113 | 118 | ||
114 | INIT_LIST_HEAD(&cur_trans->pending_snapshots); | 119 | INIT_LIST_HEAD(&cur_trans->pending_snapshots); |
115 | list_add_tail(&cur_trans->list, &root->fs_info->trans_list); | 120 | list_add_tail(&cur_trans->list, &root->fs_info->trans_list); |
@@ -321,6 +326,8 @@ again: | |||
321 | } | 326 | } |
322 | 327 | ||
323 | if (num_bytes) { | 328 | if (num_bytes) { |
329 | trace_btrfs_space_reservation(root->fs_info, "transaction", | ||
330 | (u64)h, num_bytes, 1); | ||
324 | h->block_rsv = &root->fs_info->trans_block_rsv; | 331 | h->block_rsv = &root->fs_info->trans_block_rsv; |
325 | h->bytes_reserved = num_bytes; | 332 | h->bytes_reserved = num_bytes; |
326 | } | 333 | } |
@@ -467,19 +474,12 @@ static int __btrfs_end_transaction(struct btrfs_trans_handle *trans, | |||
467 | 474 | ||
468 | btrfs_trans_release_metadata(trans, root); | 475 | btrfs_trans_release_metadata(trans, root); |
469 | trans->block_rsv = NULL; | 476 | trans->block_rsv = NULL; |
470 | while (count < 4) { | 477 | while (count < 2) { |
471 | unsigned long cur = trans->delayed_ref_updates; | 478 | unsigned long cur = trans->delayed_ref_updates; |
472 | trans->delayed_ref_updates = 0; | 479 | trans->delayed_ref_updates = 0; |
473 | if (cur && | 480 | if (cur && |
474 | trans->transaction->delayed_refs.num_heads_ready > 64) { | 481 | trans->transaction->delayed_refs.num_heads_ready > 64) { |
475 | trans->delayed_ref_updates = 0; | 482 | trans->delayed_ref_updates = 0; |
476 | |||
477 | /* | ||
478 | * do a full flush if the transaction is trying | ||
479 | * to close | ||
480 | */ | ||
481 | if (trans->transaction->delayed_refs.flushing) | ||
482 | cur = 0; | ||
483 | btrfs_run_delayed_refs(trans, root, cur); | 483 | btrfs_run_delayed_refs(trans, root, cur); |
484 | } else { | 484 | } else { |
485 | break; | 485 | break; |
@@ -1393,9 +1393,9 @@ int btrfs_clean_old_snapshots(struct btrfs_root *root) | |||
1393 | 1393 | ||
1394 | if (btrfs_header_backref_rev(root->node) < | 1394 | if (btrfs_header_backref_rev(root->node) < |
1395 | BTRFS_MIXED_BACKREF_REV) | 1395 | BTRFS_MIXED_BACKREF_REV) |
1396 | btrfs_drop_snapshot(root, NULL, 0); | 1396 | btrfs_drop_snapshot(root, NULL, 0, 0); |
1397 | else | 1397 | else |
1398 | btrfs_drop_snapshot(root, NULL, 1); | 1398 | btrfs_drop_snapshot(root, NULL, 1, 0); |
1399 | } | 1399 | } |
1400 | return 0; | 1400 | return 0; |
1401 | } | 1401 | } |
diff --git a/fs/btrfs/tree-log.c b/fs/btrfs/tree-log.c index 3568374d419d..966cc74f5d6c 100644 --- a/fs/btrfs/tree-log.c +++ b/fs/btrfs/tree-log.c | |||
@@ -589,7 +589,7 @@ static noinline int replay_one_extent(struct btrfs_trans_handle *trans, | |||
589 | ret = btrfs_inc_extent_ref(trans, root, | 589 | ret = btrfs_inc_extent_ref(trans, root, |
590 | ins.objectid, ins.offset, | 590 | ins.objectid, ins.offset, |
591 | 0, root->root_key.objectid, | 591 | 0, root->root_key.objectid, |
592 | key->objectid, offset); | 592 | key->objectid, offset, 0); |
593 | BUG_ON(ret); | 593 | BUG_ON(ret); |
594 | } else { | 594 | } else { |
595 | /* | 595 | /* |
@@ -1957,7 +1957,8 @@ static int wait_log_commit(struct btrfs_trans_handle *trans, | |||
1957 | 1957 | ||
1958 | finish_wait(&root->log_commit_wait[index], &wait); | 1958 | finish_wait(&root->log_commit_wait[index], &wait); |
1959 | mutex_lock(&root->log_mutex); | 1959 | mutex_lock(&root->log_mutex); |
1960 | } while (root->log_transid < transid + 2 && | 1960 | } while (root->fs_info->last_trans_log_full_commit != |
1961 | trans->transid && root->log_transid < transid + 2 && | ||
1961 | atomic_read(&root->log_commit[index])); | 1962 | atomic_read(&root->log_commit[index])); |
1962 | return 0; | 1963 | return 0; |
1963 | } | 1964 | } |
@@ -1966,7 +1967,8 @@ static int wait_for_writer(struct btrfs_trans_handle *trans, | |||
1966 | struct btrfs_root *root) | 1967 | struct btrfs_root *root) |
1967 | { | 1968 | { |
1968 | DEFINE_WAIT(wait); | 1969 | DEFINE_WAIT(wait); |
1969 | while (atomic_read(&root->log_writers)) { | 1970 | while (root->fs_info->last_trans_log_full_commit != |
1971 | trans->transid && atomic_read(&root->log_writers)) { | ||
1970 | prepare_to_wait(&root->log_writer_wait, | 1972 | prepare_to_wait(&root->log_writer_wait, |
1971 | &wait, TASK_UNINTERRUPTIBLE); | 1973 | &wait, TASK_UNINTERRUPTIBLE); |
1972 | mutex_unlock(&root->log_mutex); | 1974 | mutex_unlock(&root->log_mutex); |
diff --git a/fs/btrfs/ulist.c b/fs/btrfs/ulist.c new file mode 100644 index 000000000000..12f5147bd2b1 --- /dev/null +++ b/fs/btrfs/ulist.c | |||
@@ -0,0 +1,220 @@ | |||
1 | /* | ||
2 | * Copyright (C) 2011 STRATO AG | ||
3 | * written by Arne Jansen <sensille@gmx.net> | ||
4 | * Distributed under the GNU GPL license version 2. | ||
5 | */ | ||
6 | |||
7 | #include <linux/slab.h> | ||
8 | #include <linux/module.h> | ||
9 | #include "ulist.h" | ||
10 | |||
11 | /* | ||
12 | * ulist is a generic data structure to hold a collection of unique u64 | ||
13 | * values. The only operations it supports is adding to the list and | ||
14 | * enumerating it. | ||
15 | * It is possible to store an auxiliary value along with the key. | ||
16 | * | ||
17 | * The implementation is preliminary and can probably be sped up | ||
18 | * significantly. A first step would be to store the values in an rbtree | ||
19 | * as soon as ULIST_SIZE is exceeded. | ||
20 | * | ||
21 | * A sample usage for ulists is the enumeration of directed graphs without | ||
22 | * visiting a node twice. The pseudo-code could look like this: | ||
23 | * | ||
24 | * ulist = ulist_alloc(); | ||
25 | * ulist_add(ulist, root); | ||
26 | * elem = NULL; | ||
27 | * | ||
28 | * while ((elem = ulist_next(ulist, elem)) { | ||
29 | * for (all child nodes n in elem) | ||
30 | * ulist_add(ulist, n); | ||
31 | * do something useful with the node; | ||
32 | * } | ||
33 | * ulist_free(ulist); | ||
34 | * | ||
35 | * This assumes the graph nodes are adressable by u64. This stems from the | ||
36 | * usage for tree enumeration in btrfs, where the logical addresses are | ||
37 | * 64 bit. | ||
38 | * | ||
39 | * It is also useful for tree enumeration which could be done elegantly | ||
40 | * recursively, but is not possible due to kernel stack limitations. The | ||
41 | * loop would be similar to the above. | ||
42 | */ | ||
43 | |||
44 | /** | ||
45 | * ulist_init - freshly initialize a ulist | ||
46 | * @ulist: the ulist to initialize | ||
47 | * | ||
48 | * Note: don't use this function to init an already used ulist, use | ||
49 | * ulist_reinit instead. | ||
50 | */ | ||
51 | void ulist_init(struct ulist *ulist) | ||
52 | { | ||
53 | ulist->nnodes = 0; | ||
54 | ulist->nodes = ulist->int_nodes; | ||
55 | ulist->nodes_alloced = ULIST_SIZE; | ||
56 | } | ||
57 | EXPORT_SYMBOL(ulist_init); | ||
58 | |||
59 | /** | ||
60 | * ulist_fini - free up additionally allocated memory for the ulist | ||
61 | * @ulist: the ulist from which to free the additional memory | ||
62 | * | ||
63 | * This is useful in cases where the base 'struct ulist' has been statically | ||
64 | * allocated. | ||
65 | */ | ||
66 | void ulist_fini(struct ulist *ulist) | ||
67 | { | ||
68 | /* | ||
69 | * The first ULIST_SIZE elements are stored inline in struct ulist. | ||
70 | * Only if more elements are alocated they need to be freed. | ||
71 | */ | ||
72 | if (ulist->nodes_alloced > ULIST_SIZE) | ||
73 | kfree(ulist->nodes); | ||
74 | ulist->nodes_alloced = 0; /* in case ulist_fini is called twice */ | ||
75 | } | ||
76 | EXPORT_SYMBOL(ulist_fini); | ||
77 | |||
78 | /** | ||
79 | * ulist_reinit - prepare a ulist for reuse | ||
80 | * @ulist: ulist to be reused | ||
81 | * | ||
82 | * Free up all additional memory allocated for the list elements and reinit | ||
83 | * the ulist. | ||
84 | */ | ||
85 | void ulist_reinit(struct ulist *ulist) | ||
86 | { | ||
87 | ulist_fini(ulist); | ||
88 | ulist_init(ulist); | ||
89 | } | ||
90 | EXPORT_SYMBOL(ulist_reinit); | ||
91 | |||
92 | /** | ||
93 | * ulist_alloc - dynamically allocate a ulist | ||
94 | * @gfp_mask: allocation flags to for base allocation | ||
95 | * | ||
96 | * The allocated ulist will be returned in an initialized state. | ||
97 | */ | ||
98 | struct ulist *ulist_alloc(unsigned long gfp_mask) | ||
99 | { | ||
100 | struct ulist *ulist = kmalloc(sizeof(*ulist), gfp_mask); | ||
101 | |||
102 | if (!ulist) | ||
103 | return NULL; | ||
104 | |||
105 | ulist_init(ulist); | ||
106 | |||
107 | return ulist; | ||
108 | } | ||
109 | EXPORT_SYMBOL(ulist_alloc); | ||
110 | |||
111 | /** | ||
112 | * ulist_free - free dynamically allocated ulist | ||
113 | * @ulist: ulist to free | ||
114 | * | ||
115 | * It is not necessary to call ulist_fini before. | ||
116 | */ | ||
117 | void ulist_free(struct ulist *ulist) | ||
118 | { | ||
119 | if (!ulist) | ||
120 | return; | ||
121 | ulist_fini(ulist); | ||
122 | kfree(ulist); | ||
123 | } | ||
124 | EXPORT_SYMBOL(ulist_free); | ||
125 | |||
126 | /** | ||
127 | * ulist_add - add an element to the ulist | ||
128 | * @ulist: ulist to add the element to | ||
129 | * @val: value to add to ulist | ||
130 | * @aux: auxiliary value to store along with val | ||
131 | * @gfp_mask: flags to use for allocation | ||
132 | * | ||
133 | * Note: locking must be provided by the caller. In case of rwlocks write | ||
134 | * locking is needed | ||
135 | * | ||
136 | * Add an element to a ulist. The @val will only be added if it doesn't | ||
137 | * already exist. If it is added, the auxiliary value @aux is stored along with | ||
138 | * it. In case @val already exists in the ulist, @aux is ignored, even if | ||
139 | * it differs from the already stored value. | ||
140 | * | ||
141 | * ulist_add returns 0 if @val already exists in ulist and 1 if @val has been | ||
142 | * inserted. | ||
143 | * In case of allocation failure -ENOMEM is returned and the ulist stays | ||
144 | * unaltered. | ||
145 | */ | ||
146 | int ulist_add(struct ulist *ulist, u64 val, unsigned long aux, | ||
147 | unsigned long gfp_mask) | ||
148 | { | ||
149 | int i; | ||
150 | |||
151 | for (i = 0; i < ulist->nnodes; ++i) { | ||
152 | if (ulist->nodes[i].val == val) | ||
153 | return 0; | ||
154 | } | ||
155 | |||
156 | if (ulist->nnodes >= ulist->nodes_alloced) { | ||
157 | u64 new_alloced = ulist->nodes_alloced + 128; | ||
158 | struct ulist_node *new_nodes; | ||
159 | void *old = NULL; | ||
160 | |||
161 | /* | ||
162 | * if nodes_alloced == ULIST_SIZE no memory has been allocated | ||
163 | * yet, so pass NULL to krealloc | ||
164 | */ | ||
165 | if (ulist->nodes_alloced > ULIST_SIZE) | ||
166 | old = ulist->nodes; | ||
167 | |||
168 | new_nodes = krealloc(old, sizeof(*new_nodes) * new_alloced, | ||
169 | gfp_mask); | ||
170 | if (!new_nodes) | ||
171 | return -ENOMEM; | ||
172 | |||
173 | if (!old) | ||
174 | memcpy(new_nodes, ulist->int_nodes, | ||
175 | sizeof(ulist->int_nodes)); | ||
176 | |||
177 | ulist->nodes = new_nodes; | ||
178 | ulist->nodes_alloced = new_alloced; | ||
179 | } | ||
180 | ulist->nodes[ulist->nnodes].val = val; | ||
181 | ulist->nodes[ulist->nnodes].aux = aux; | ||
182 | ++ulist->nnodes; | ||
183 | |||
184 | return 1; | ||
185 | } | ||
186 | EXPORT_SYMBOL(ulist_add); | ||
187 | |||
188 | /** | ||
189 | * ulist_next - iterate ulist | ||
190 | * @ulist: ulist to iterate | ||
191 | * @prev: previously returned element or %NULL to start iteration | ||
192 | * | ||
193 | * Note: locking must be provided by the caller. In case of rwlocks only read | ||
194 | * locking is needed | ||
195 | * | ||
196 | * This function is used to iterate an ulist. The iteration is started with | ||
197 | * @prev = %NULL. It returns the next element from the ulist or %NULL when the | ||
198 | * end is reached. No guarantee is made with respect to the order in which | ||
199 | * the elements are returned. They might neither be returned in order of | ||
200 | * addition nor in ascending order. | ||
201 | * It is allowed to call ulist_add during an enumeration. Newly added items | ||
202 | * are guaranteed to show up in the running enumeration. | ||
203 | */ | ||
204 | struct ulist_node *ulist_next(struct ulist *ulist, struct ulist_node *prev) | ||
205 | { | ||
206 | int next; | ||
207 | |||
208 | if (ulist->nnodes == 0) | ||
209 | return NULL; | ||
210 | |||
211 | if (!prev) | ||
212 | return &ulist->nodes[0]; | ||
213 | |||
214 | next = (prev - ulist->nodes) + 1; | ||
215 | if (next < 0 || next >= ulist->nnodes) | ||
216 | return NULL; | ||
217 | |||
218 | return &ulist->nodes[next]; | ||
219 | } | ||
220 | EXPORT_SYMBOL(ulist_next); | ||
diff --git a/fs/btrfs/ulist.h b/fs/btrfs/ulist.h new file mode 100644 index 000000000000..2e25dec58ec0 --- /dev/null +++ b/fs/btrfs/ulist.h | |||
@@ -0,0 +1,68 @@ | |||
1 | /* | ||
2 | * Copyright (C) 2011 STRATO AG | ||
3 | * written by Arne Jansen <sensille@gmx.net> | ||
4 | * Distributed under the GNU GPL license version 2. | ||
5 | * | ||
6 | */ | ||
7 | |||
8 | #ifndef __ULIST__ | ||
9 | #define __ULIST__ | ||
10 | |||
11 | /* | ||
12 | * ulist is a generic data structure to hold a collection of unique u64 | ||
13 | * values. The only operations it supports is adding to the list and | ||
14 | * enumerating it. | ||
15 | * It is possible to store an auxiliary value along with the key. | ||
16 | * | ||
17 | * The implementation is preliminary and can probably be sped up | ||
18 | * significantly. A first step would be to store the values in an rbtree | ||
19 | * as soon as ULIST_SIZE is exceeded. | ||
20 | */ | ||
21 | |||
22 | /* | ||
23 | * number of elements statically allocated inside struct ulist | ||
24 | */ | ||
25 | #define ULIST_SIZE 16 | ||
26 | |||
27 | /* | ||
28 | * element of the list | ||
29 | */ | ||
30 | struct ulist_node { | ||
31 | u64 val; /* value to store */ | ||
32 | unsigned long aux; /* auxiliary value saved along with the val */ | ||
33 | }; | ||
34 | |||
35 | struct ulist { | ||
36 | /* | ||
37 | * number of elements stored in list | ||
38 | */ | ||
39 | unsigned long nnodes; | ||
40 | |||
41 | /* | ||
42 | * number of nodes we already have room for | ||
43 | */ | ||
44 | unsigned long nodes_alloced; | ||
45 | |||
46 | /* | ||
47 | * pointer to the array storing the elements. The first ULIST_SIZE | ||
48 | * elements are stored inline. In this case the it points to int_nodes. | ||
49 | * After exceeding ULIST_SIZE, dynamic memory is allocated. | ||
50 | */ | ||
51 | struct ulist_node *nodes; | ||
52 | |||
53 | /* | ||
54 | * inline storage space for the first ULIST_SIZE entries | ||
55 | */ | ||
56 | struct ulist_node int_nodes[ULIST_SIZE]; | ||
57 | }; | ||
58 | |||
59 | void ulist_init(struct ulist *ulist); | ||
60 | void ulist_fini(struct ulist *ulist); | ||
61 | void ulist_reinit(struct ulist *ulist); | ||
62 | struct ulist *ulist_alloc(unsigned long gfp_mask); | ||
63 | void ulist_free(struct ulist *ulist); | ||
64 | int ulist_add(struct ulist *ulist, u64 val, unsigned long aux, | ||
65 | unsigned long gfp_mask); | ||
66 | struct ulist_node *ulist_next(struct ulist *ulist, struct ulist_node *prev); | ||
67 | |||
68 | #endif | ||
diff --git a/fs/btrfs/volumes.c b/fs/btrfs/volumes.c index f4b839fd3c9d..0b4e2af7954d 100644 --- a/fs/btrfs/volumes.c +++ b/fs/btrfs/volumes.c | |||
@@ -23,6 +23,7 @@ | |||
23 | #include <linux/random.h> | 23 | #include <linux/random.h> |
24 | #include <linux/iocontext.h> | 24 | #include <linux/iocontext.h> |
25 | #include <linux/capability.h> | 25 | #include <linux/capability.h> |
26 | #include <linux/kthread.h> | ||
26 | #include <asm/div64.h> | 27 | #include <asm/div64.h> |
27 | #include "compat.h" | 28 | #include "compat.h" |
28 | #include "ctree.h" | 29 | #include "ctree.h" |
@@ -32,6 +33,7 @@ | |||
32 | #include "print-tree.h" | 33 | #include "print-tree.h" |
33 | #include "volumes.h" | 34 | #include "volumes.h" |
34 | #include "async-thread.h" | 35 | #include "async-thread.h" |
36 | #include "check-integrity.h" | ||
35 | 37 | ||
36 | static int init_first_rw_device(struct btrfs_trans_handle *trans, | 38 | static int init_first_rw_device(struct btrfs_trans_handle *trans, |
37 | struct btrfs_root *root, | 39 | struct btrfs_root *root, |
@@ -246,7 +248,7 @@ loop_lock: | |||
246 | sync_pending = 0; | 248 | sync_pending = 0; |
247 | } | 249 | } |
248 | 250 | ||
249 | submit_bio(cur->bi_rw, cur); | 251 | btrfsic_submit_bio(cur->bi_rw, cur); |
250 | num_run++; | 252 | num_run++; |
251 | batch_run++; | 253 | batch_run++; |
252 | if (need_resched()) | 254 | if (need_resched()) |
@@ -706,8 +708,6 @@ int btrfs_scan_one_device(const char *path, fmode_t flags, void *holder, | |||
706 | u64 devid; | 708 | u64 devid; |
707 | u64 transid; | 709 | u64 transid; |
708 | 710 | ||
709 | mutex_lock(&uuid_mutex); | ||
710 | |||
711 | flags |= FMODE_EXCL; | 711 | flags |= FMODE_EXCL; |
712 | bdev = blkdev_get_by_path(path, flags, holder); | 712 | bdev = blkdev_get_by_path(path, flags, holder); |
713 | 713 | ||
@@ -716,6 +716,7 @@ int btrfs_scan_one_device(const char *path, fmode_t flags, void *holder, | |||
716 | goto error; | 716 | goto error; |
717 | } | 717 | } |
718 | 718 | ||
719 | mutex_lock(&uuid_mutex); | ||
719 | ret = set_blocksize(bdev, 4096); | 720 | ret = set_blocksize(bdev, 4096); |
720 | if (ret) | 721 | if (ret) |
721 | goto error_close; | 722 | goto error_close; |
@@ -737,9 +738,9 @@ int btrfs_scan_one_device(const char *path, fmode_t flags, void *holder, | |||
737 | 738 | ||
738 | brelse(bh); | 739 | brelse(bh); |
739 | error_close: | 740 | error_close: |
741 | mutex_unlock(&uuid_mutex); | ||
740 | blkdev_put(bdev, flags); | 742 | blkdev_put(bdev, flags); |
741 | error: | 743 | error: |
742 | mutex_unlock(&uuid_mutex); | ||
743 | return ret; | 744 | return ret; |
744 | } | 745 | } |
745 | 746 | ||
@@ -829,7 +830,6 @@ out: | |||
829 | 830 | ||
830 | /* | 831 | /* |
831 | * find_free_dev_extent - find free space in the specified device | 832 | * find_free_dev_extent - find free space in the specified device |
832 | * @trans: transaction handler | ||
833 | * @device: the device which we search the free space in | 833 | * @device: the device which we search the free space in |
834 | * @num_bytes: the size of the free space that we need | 834 | * @num_bytes: the size of the free space that we need |
835 | * @start: store the start of the free space. | 835 | * @start: store the start of the free space. |
@@ -848,8 +848,7 @@ out: | |||
848 | * But if we don't find suitable free space, it is used to store the size of | 848 | * But if we don't find suitable free space, it is used to store the size of |
849 | * the max free space. | 849 | * the max free space. |
850 | */ | 850 | */ |
851 | int find_free_dev_extent(struct btrfs_trans_handle *trans, | 851 | int find_free_dev_extent(struct btrfs_device *device, u64 num_bytes, |
852 | struct btrfs_device *device, u64 num_bytes, | ||
853 | u64 *start, u64 *len) | 852 | u64 *start, u64 *len) |
854 | { | 853 | { |
855 | struct btrfs_key key; | 854 | struct btrfs_key key; |
@@ -893,7 +892,7 @@ int find_free_dev_extent(struct btrfs_trans_handle *trans, | |||
893 | key.offset = search_start; | 892 | key.offset = search_start; |
894 | key.type = BTRFS_DEV_EXTENT_KEY; | 893 | key.type = BTRFS_DEV_EXTENT_KEY; |
895 | 894 | ||
896 | ret = btrfs_search_slot(trans, root, &key, path, 0, 0); | 895 | ret = btrfs_search_slot(NULL, root, &key, path, 0, 0); |
897 | if (ret < 0) | 896 | if (ret < 0) |
898 | goto out; | 897 | goto out; |
899 | if (ret > 0) { | 898 | if (ret > 0) { |
@@ -1282,7 +1281,6 @@ int btrfs_rm_device(struct btrfs_root *root, char *device_path) | |||
1282 | bool clear_super = false; | 1281 | bool clear_super = false; |
1283 | 1282 | ||
1284 | mutex_lock(&uuid_mutex); | 1283 | mutex_lock(&uuid_mutex); |
1285 | mutex_lock(&root->fs_info->volume_mutex); | ||
1286 | 1284 | ||
1287 | all_avail = root->fs_info->avail_data_alloc_bits | | 1285 | all_avail = root->fs_info->avail_data_alloc_bits | |
1288 | root->fs_info->avail_system_alloc_bits | | 1286 | root->fs_info->avail_system_alloc_bits | |
@@ -1452,7 +1450,6 @@ error_close: | |||
1452 | if (bdev) | 1450 | if (bdev) |
1453 | blkdev_put(bdev, FMODE_READ | FMODE_EXCL); | 1451 | blkdev_put(bdev, FMODE_READ | FMODE_EXCL); |
1454 | out: | 1452 | out: |
1455 | mutex_unlock(&root->fs_info->volume_mutex); | ||
1456 | mutex_unlock(&uuid_mutex); | 1453 | mutex_unlock(&uuid_mutex); |
1457 | return ret; | 1454 | return ret; |
1458 | error_undo: | 1455 | error_undo: |
@@ -1469,8 +1466,7 @@ error_undo: | |||
1469 | /* | 1466 | /* |
1470 | * does all the dirty work required for changing file system's UUID. | 1467 | * does all the dirty work required for changing file system's UUID. |
1471 | */ | 1468 | */ |
1472 | static int btrfs_prepare_sprout(struct btrfs_trans_handle *trans, | 1469 | static int btrfs_prepare_sprout(struct btrfs_root *root) |
1473 | struct btrfs_root *root) | ||
1474 | { | 1470 | { |
1475 | struct btrfs_fs_devices *fs_devices = root->fs_info->fs_devices; | 1471 | struct btrfs_fs_devices *fs_devices = root->fs_info->fs_devices; |
1476 | struct btrfs_fs_devices *old_devices; | 1472 | struct btrfs_fs_devices *old_devices; |
@@ -1629,7 +1625,6 @@ int btrfs_init_new_device(struct btrfs_root *root, char *device_path) | |||
1629 | } | 1625 | } |
1630 | 1626 | ||
1631 | filemap_write_and_wait(bdev->bd_inode->i_mapping); | 1627 | filemap_write_and_wait(bdev->bd_inode->i_mapping); |
1632 | mutex_lock(&root->fs_info->volume_mutex); | ||
1633 | 1628 | ||
1634 | devices = &root->fs_info->fs_devices->devices; | 1629 | devices = &root->fs_info->fs_devices->devices; |
1635 | /* | 1630 | /* |
@@ -1695,7 +1690,7 @@ int btrfs_init_new_device(struct btrfs_root *root, char *device_path) | |||
1695 | 1690 | ||
1696 | if (seeding_dev) { | 1691 | if (seeding_dev) { |
1697 | sb->s_flags &= ~MS_RDONLY; | 1692 | sb->s_flags &= ~MS_RDONLY; |
1698 | ret = btrfs_prepare_sprout(trans, root); | 1693 | ret = btrfs_prepare_sprout(root); |
1699 | BUG_ON(ret); | 1694 | BUG_ON(ret); |
1700 | } | 1695 | } |
1701 | 1696 | ||
@@ -1757,8 +1752,7 @@ int btrfs_init_new_device(struct btrfs_root *root, char *device_path) | |||
1757 | ret = btrfs_relocate_sys_chunks(root); | 1752 | ret = btrfs_relocate_sys_chunks(root); |
1758 | BUG_ON(ret); | 1753 | BUG_ON(ret); |
1759 | } | 1754 | } |
1760 | out: | 1755 | |
1761 | mutex_unlock(&root->fs_info->volume_mutex); | ||
1762 | return ret; | 1756 | return ret; |
1763 | error: | 1757 | error: |
1764 | blkdev_put(bdev, FMODE_EXCL); | 1758 | blkdev_put(bdev, FMODE_EXCL); |
@@ -1766,7 +1760,7 @@ error: | |||
1766 | mutex_unlock(&uuid_mutex); | 1760 | mutex_unlock(&uuid_mutex); |
1767 | up_write(&sb->s_umount); | 1761 | up_write(&sb->s_umount); |
1768 | } | 1762 | } |
1769 | goto out; | 1763 | return ret; |
1770 | } | 1764 | } |
1771 | 1765 | ||
1772 | static noinline int btrfs_update_device(struct btrfs_trans_handle *trans, | 1766 | static noinline int btrfs_update_device(struct btrfs_trans_handle *trans, |
@@ -2077,6 +2071,362 @@ error: | |||
2077 | return ret; | 2071 | return ret; |
2078 | } | 2072 | } |
2079 | 2073 | ||
2074 | static int insert_balance_item(struct btrfs_root *root, | ||
2075 | struct btrfs_balance_control *bctl) | ||
2076 | { | ||
2077 | struct btrfs_trans_handle *trans; | ||
2078 | struct btrfs_balance_item *item; | ||
2079 | struct btrfs_disk_balance_args disk_bargs; | ||
2080 | struct btrfs_path *path; | ||
2081 | struct extent_buffer *leaf; | ||
2082 | struct btrfs_key key; | ||
2083 | int ret, err; | ||
2084 | |||
2085 | path = btrfs_alloc_path(); | ||
2086 | if (!path) | ||
2087 | return -ENOMEM; | ||
2088 | |||
2089 | trans = btrfs_start_transaction(root, 0); | ||
2090 | if (IS_ERR(trans)) { | ||
2091 | btrfs_free_path(path); | ||
2092 | return PTR_ERR(trans); | ||
2093 | } | ||
2094 | |||
2095 | key.objectid = BTRFS_BALANCE_OBJECTID; | ||
2096 | key.type = BTRFS_BALANCE_ITEM_KEY; | ||
2097 | key.offset = 0; | ||
2098 | |||
2099 | ret = btrfs_insert_empty_item(trans, root, path, &key, | ||
2100 | sizeof(*item)); | ||
2101 | if (ret) | ||
2102 | goto out; | ||
2103 | |||
2104 | leaf = path->nodes[0]; | ||
2105 | item = btrfs_item_ptr(leaf, path->slots[0], struct btrfs_balance_item); | ||
2106 | |||
2107 | memset_extent_buffer(leaf, 0, (unsigned long)item, sizeof(*item)); | ||
2108 | |||
2109 | btrfs_cpu_balance_args_to_disk(&disk_bargs, &bctl->data); | ||
2110 | btrfs_set_balance_data(leaf, item, &disk_bargs); | ||
2111 | btrfs_cpu_balance_args_to_disk(&disk_bargs, &bctl->meta); | ||
2112 | btrfs_set_balance_meta(leaf, item, &disk_bargs); | ||
2113 | btrfs_cpu_balance_args_to_disk(&disk_bargs, &bctl->sys); | ||
2114 | btrfs_set_balance_sys(leaf, item, &disk_bargs); | ||
2115 | |||
2116 | btrfs_set_balance_flags(leaf, item, bctl->flags); | ||
2117 | |||
2118 | btrfs_mark_buffer_dirty(leaf); | ||
2119 | out: | ||
2120 | btrfs_free_path(path); | ||
2121 | err = btrfs_commit_transaction(trans, root); | ||
2122 | if (err && !ret) | ||
2123 | ret = err; | ||
2124 | return ret; | ||
2125 | } | ||
2126 | |||
2127 | static int del_balance_item(struct btrfs_root *root) | ||
2128 | { | ||
2129 | struct btrfs_trans_handle *trans; | ||
2130 | struct btrfs_path *path; | ||
2131 | struct btrfs_key key; | ||
2132 | int ret, err; | ||
2133 | |||
2134 | path = btrfs_alloc_path(); | ||
2135 | if (!path) | ||
2136 | return -ENOMEM; | ||
2137 | |||
2138 | trans = btrfs_start_transaction(root, 0); | ||
2139 | if (IS_ERR(trans)) { | ||
2140 | btrfs_free_path(path); | ||
2141 | return PTR_ERR(trans); | ||
2142 | } | ||
2143 | |||
2144 | key.objectid = BTRFS_BALANCE_OBJECTID; | ||
2145 | key.type = BTRFS_BALANCE_ITEM_KEY; | ||
2146 | key.offset = 0; | ||
2147 | |||
2148 | ret = btrfs_search_slot(trans, root, &key, path, -1, 1); | ||
2149 | if (ret < 0) | ||
2150 | goto out; | ||
2151 | if (ret > 0) { | ||
2152 | ret = -ENOENT; | ||
2153 | goto out; | ||
2154 | } | ||
2155 | |||
2156 | ret = btrfs_del_item(trans, root, path); | ||
2157 | out: | ||
2158 | btrfs_free_path(path); | ||
2159 | err = btrfs_commit_transaction(trans, root); | ||
2160 | if (err && !ret) | ||
2161 | ret = err; | ||
2162 | return ret; | ||
2163 | } | ||
2164 | |||
2165 | /* | ||
2166 | * This is a heuristic used to reduce the number of chunks balanced on | ||
2167 | * resume after balance was interrupted. | ||
2168 | */ | ||
2169 | static void update_balance_args(struct btrfs_balance_control *bctl) | ||
2170 | { | ||
2171 | /* | ||
2172 | * Turn on soft mode for chunk types that were being converted. | ||
2173 | */ | ||
2174 | if (bctl->data.flags & BTRFS_BALANCE_ARGS_CONVERT) | ||
2175 | bctl->data.flags |= BTRFS_BALANCE_ARGS_SOFT; | ||
2176 | if (bctl->sys.flags & BTRFS_BALANCE_ARGS_CONVERT) | ||
2177 | bctl->sys.flags |= BTRFS_BALANCE_ARGS_SOFT; | ||
2178 | if (bctl->meta.flags & BTRFS_BALANCE_ARGS_CONVERT) | ||
2179 | bctl->meta.flags |= BTRFS_BALANCE_ARGS_SOFT; | ||
2180 | |||
2181 | /* | ||
2182 | * Turn on usage filter if is not already used. The idea is | ||
2183 | * that chunks that we have already balanced should be | ||
2184 | * reasonably full. Don't do it for chunks that are being | ||
2185 | * converted - that will keep us from relocating unconverted | ||
2186 | * (albeit full) chunks. | ||
2187 | */ | ||
2188 | if (!(bctl->data.flags & BTRFS_BALANCE_ARGS_USAGE) && | ||
2189 | !(bctl->data.flags & BTRFS_BALANCE_ARGS_CONVERT)) { | ||
2190 | bctl->data.flags |= BTRFS_BALANCE_ARGS_USAGE; | ||
2191 | bctl->data.usage = 90; | ||
2192 | } | ||
2193 | if (!(bctl->sys.flags & BTRFS_BALANCE_ARGS_USAGE) && | ||
2194 | !(bctl->sys.flags & BTRFS_BALANCE_ARGS_CONVERT)) { | ||
2195 | bctl->sys.flags |= BTRFS_BALANCE_ARGS_USAGE; | ||
2196 | bctl->sys.usage = 90; | ||
2197 | } | ||
2198 | if (!(bctl->meta.flags & BTRFS_BALANCE_ARGS_USAGE) && | ||
2199 | !(bctl->meta.flags & BTRFS_BALANCE_ARGS_CONVERT)) { | ||
2200 | bctl->meta.flags |= BTRFS_BALANCE_ARGS_USAGE; | ||
2201 | bctl->meta.usage = 90; | ||
2202 | } | ||
2203 | } | ||
2204 | |||
2205 | /* | ||
2206 | * Should be called with both balance and volume mutexes held to | ||
2207 | * serialize other volume operations (add_dev/rm_dev/resize) with | ||
2208 | * restriper. Same goes for unset_balance_control. | ||
2209 | */ | ||
2210 | static void set_balance_control(struct btrfs_balance_control *bctl) | ||
2211 | { | ||
2212 | struct btrfs_fs_info *fs_info = bctl->fs_info; | ||
2213 | |||
2214 | BUG_ON(fs_info->balance_ctl); | ||
2215 | |||
2216 | spin_lock(&fs_info->balance_lock); | ||
2217 | fs_info->balance_ctl = bctl; | ||
2218 | spin_unlock(&fs_info->balance_lock); | ||
2219 | } | ||
2220 | |||
2221 | static void unset_balance_control(struct btrfs_fs_info *fs_info) | ||
2222 | { | ||
2223 | struct btrfs_balance_control *bctl = fs_info->balance_ctl; | ||
2224 | |||
2225 | BUG_ON(!fs_info->balance_ctl); | ||
2226 | |||
2227 | spin_lock(&fs_info->balance_lock); | ||
2228 | fs_info->balance_ctl = NULL; | ||
2229 | spin_unlock(&fs_info->balance_lock); | ||
2230 | |||
2231 | kfree(bctl); | ||
2232 | } | ||
2233 | |||
2234 | /* | ||
2235 | * Balance filters. Return 1 if chunk should be filtered out | ||
2236 | * (should not be balanced). | ||
2237 | */ | ||
2238 | static int chunk_profiles_filter(u64 chunk_profile, | ||
2239 | struct btrfs_balance_args *bargs) | ||
2240 | { | ||
2241 | chunk_profile &= BTRFS_BLOCK_GROUP_PROFILE_MASK; | ||
2242 | |||
2243 | if (chunk_profile == 0) | ||
2244 | chunk_profile = BTRFS_AVAIL_ALLOC_BIT_SINGLE; | ||
2245 | |||
2246 | if (bargs->profiles & chunk_profile) | ||
2247 | return 0; | ||
2248 | |||
2249 | return 1; | ||
2250 | } | ||
2251 | |||
2252 | static u64 div_factor_fine(u64 num, int factor) | ||
2253 | { | ||
2254 | if (factor <= 0) | ||
2255 | return 0; | ||
2256 | if (factor >= 100) | ||
2257 | return num; | ||
2258 | |||
2259 | num *= factor; | ||
2260 | do_div(num, 100); | ||
2261 | return num; | ||
2262 | } | ||
2263 | |||
2264 | static int chunk_usage_filter(struct btrfs_fs_info *fs_info, u64 chunk_offset, | ||
2265 | struct btrfs_balance_args *bargs) | ||
2266 | { | ||
2267 | struct btrfs_block_group_cache *cache; | ||
2268 | u64 chunk_used, user_thresh; | ||
2269 | int ret = 1; | ||
2270 | |||
2271 | cache = btrfs_lookup_block_group(fs_info, chunk_offset); | ||
2272 | chunk_used = btrfs_block_group_used(&cache->item); | ||
2273 | |||
2274 | user_thresh = div_factor_fine(cache->key.offset, bargs->usage); | ||
2275 | if (chunk_used < user_thresh) | ||
2276 | ret = 0; | ||
2277 | |||
2278 | btrfs_put_block_group(cache); | ||
2279 | return ret; | ||
2280 | } | ||
2281 | |||
2282 | static int chunk_devid_filter(struct extent_buffer *leaf, | ||
2283 | struct btrfs_chunk *chunk, | ||
2284 | struct btrfs_balance_args *bargs) | ||
2285 | { | ||
2286 | struct btrfs_stripe *stripe; | ||
2287 | int num_stripes = btrfs_chunk_num_stripes(leaf, chunk); | ||
2288 | int i; | ||
2289 | |||
2290 | for (i = 0; i < num_stripes; i++) { | ||
2291 | stripe = btrfs_stripe_nr(chunk, i); | ||
2292 | if (btrfs_stripe_devid(leaf, stripe) == bargs->devid) | ||
2293 | return 0; | ||
2294 | } | ||
2295 | |||
2296 | return 1; | ||
2297 | } | ||
2298 | |||
2299 | /* [pstart, pend) */ | ||
2300 | static int chunk_drange_filter(struct extent_buffer *leaf, | ||
2301 | struct btrfs_chunk *chunk, | ||
2302 | u64 chunk_offset, | ||
2303 | struct btrfs_balance_args *bargs) | ||
2304 | { | ||
2305 | struct btrfs_stripe *stripe; | ||
2306 | int num_stripes = btrfs_chunk_num_stripes(leaf, chunk); | ||
2307 | u64 stripe_offset; | ||
2308 | u64 stripe_length; | ||
2309 | int factor; | ||
2310 | int i; | ||
2311 | |||
2312 | if (!(bargs->flags & BTRFS_BALANCE_ARGS_DEVID)) | ||
2313 | return 0; | ||
2314 | |||
2315 | if (btrfs_chunk_type(leaf, chunk) & (BTRFS_BLOCK_GROUP_DUP | | ||
2316 | BTRFS_BLOCK_GROUP_RAID1 | BTRFS_BLOCK_GROUP_RAID10)) | ||
2317 | factor = 2; | ||
2318 | else | ||
2319 | factor = 1; | ||
2320 | factor = num_stripes / factor; | ||
2321 | |||
2322 | for (i = 0; i < num_stripes; i++) { | ||
2323 | stripe = btrfs_stripe_nr(chunk, i); | ||
2324 | if (btrfs_stripe_devid(leaf, stripe) != bargs->devid) | ||
2325 | continue; | ||
2326 | |||
2327 | stripe_offset = btrfs_stripe_offset(leaf, stripe); | ||
2328 | stripe_length = btrfs_chunk_length(leaf, chunk); | ||
2329 | do_div(stripe_length, factor); | ||
2330 | |||
2331 | if (stripe_offset < bargs->pend && | ||
2332 | stripe_offset + stripe_length > bargs->pstart) | ||
2333 | return 0; | ||
2334 | } | ||
2335 | |||
2336 | return 1; | ||
2337 | } | ||
2338 | |||
2339 | /* [vstart, vend) */ | ||
2340 | static int chunk_vrange_filter(struct extent_buffer *leaf, | ||
2341 | struct btrfs_chunk *chunk, | ||
2342 | u64 chunk_offset, | ||
2343 | struct btrfs_balance_args *bargs) | ||
2344 | { | ||
2345 | if (chunk_offset < bargs->vend && | ||
2346 | chunk_offset + btrfs_chunk_length(leaf, chunk) > bargs->vstart) | ||
2347 | /* at least part of the chunk is inside this vrange */ | ||
2348 | return 0; | ||
2349 | |||
2350 | return 1; | ||
2351 | } | ||
2352 | |||
2353 | static int chunk_soft_convert_filter(u64 chunk_profile, | ||
2354 | struct btrfs_balance_args *bargs) | ||
2355 | { | ||
2356 | if (!(bargs->flags & BTRFS_BALANCE_ARGS_CONVERT)) | ||
2357 | return 0; | ||
2358 | |||
2359 | chunk_profile &= BTRFS_BLOCK_GROUP_PROFILE_MASK; | ||
2360 | |||
2361 | if (chunk_profile == 0) | ||
2362 | chunk_profile = BTRFS_AVAIL_ALLOC_BIT_SINGLE; | ||
2363 | |||
2364 | if (bargs->target & chunk_profile) | ||
2365 | return 1; | ||
2366 | |||
2367 | return 0; | ||
2368 | } | ||
2369 | |||
2370 | static int should_balance_chunk(struct btrfs_root *root, | ||
2371 | struct extent_buffer *leaf, | ||
2372 | struct btrfs_chunk *chunk, u64 chunk_offset) | ||
2373 | { | ||
2374 | struct btrfs_balance_control *bctl = root->fs_info->balance_ctl; | ||
2375 | struct btrfs_balance_args *bargs = NULL; | ||
2376 | u64 chunk_type = btrfs_chunk_type(leaf, chunk); | ||
2377 | |||
2378 | /* type filter */ | ||
2379 | if (!((chunk_type & BTRFS_BLOCK_GROUP_TYPE_MASK) & | ||
2380 | (bctl->flags & BTRFS_BALANCE_TYPE_MASK))) { | ||
2381 | return 0; | ||
2382 | } | ||
2383 | |||
2384 | if (chunk_type & BTRFS_BLOCK_GROUP_DATA) | ||
2385 | bargs = &bctl->data; | ||
2386 | else if (chunk_type & BTRFS_BLOCK_GROUP_SYSTEM) | ||
2387 | bargs = &bctl->sys; | ||
2388 | else if (chunk_type & BTRFS_BLOCK_GROUP_METADATA) | ||
2389 | bargs = &bctl->meta; | ||
2390 | |||
2391 | /* profiles filter */ | ||
2392 | if ((bargs->flags & BTRFS_BALANCE_ARGS_PROFILES) && | ||
2393 | chunk_profiles_filter(chunk_type, bargs)) { | ||
2394 | return 0; | ||
2395 | } | ||
2396 | |||
2397 | /* usage filter */ | ||
2398 | if ((bargs->flags & BTRFS_BALANCE_ARGS_USAGE) && | ||
2399 | chunk_usage_filter(bctl->fs_info, chunk_offset, bargs)) { | ||
2400 | return 0; | ||
2401 | } | ||
2402 | |||
2403 | /* devid filter */ | ||
2404 | if ((bargs->flags & BTRFS_BALANCE_ARGS_DEVID) && | ||
2405 | chunk_devid_filter(leaf, chunk, bargs)) { | ||
2406 | return 0; | ||
2407 | } | ||
2408 | |||
2409 | /* drange filter, makes sense only with devid filter */ | ||
2410 | if ((bargs->flags & BTRFS_BALANCE_ARGS_DRANGE) && | ||
2411 | chunk_drange_filter(leaf, chunk, chunk_offset, bargs)) { | ||
2412 | return 0; | ||
2413 | } | ||
2414 | |||
2415 | /* vrange filter */ | ||
2416 | if ((bargs->flags & BTRFS_BALANCE_ARGS_VRANGE) && | ||
2417 | chunk_vrange_filter(leaf, chunk, chunk_offset, bargs)) { | ||
2418 | return 0; | ||
2419 | } | ||
2420 | |||
2421 | /* soft profile changing mode */ | ||
2422 | if ((bargs->flags & BTRFS_BALANCE_ARGS_SOFT) && | ||
2423 | chunk_soft_convert_filter(chunk_type, bargs)) { | ||
2424 | return 0; | ||
2425 | } | ||
2426 | |||
2427 | return 1; | ||
2428 | } | ||
2429 | |||
2080 | static u64 div_factor(u64 num, int factor) | 2430 | static u64 div_factor(u64 num, int factor) |
2081 | { | 2431 | { |
2082 | if (factor == 10) | 2432 | if (factor == 10) |
@@ -2086,29 +2436,28 @@ static u64 div_factor(u64 num, int factor) | |||
2086 | return num; | 2436 | return num; |
2087 | } | 2437 | } |
2088 | 2438 | ||
2089 | int btrfs_balance(struct btrfs_root *dev_root) | 2439 | static int __btrfs_balance(struct btrfs_fs_info *fs_info) |
2090 | { | 2440 | { |
2091 | int ret; | 2441 | struct btrfs_balance_control *bctl = fs_info->balance_ctl; |
2092 | struct list_head *devices = &dev_root->fs_info->fs_devices->devices; | 2442 | struct btrfs_root *chunk_root = fs_info->chunk_root; |
2443 | struct btrfs_root *dev_root = fs_info->dev_root; | ||
2444 | struct list_head *devices; | ||
2093 | struct btrfs_device *device; | 2445 | struct btrfs_device *device; |
2094 | u64 old_size; | 2446 | u64 old_size; |
2095 | u64 size_to_free; | 2447 | u64 size_to_free; |
2448 | struct btrfs_chunk *chunk; | ||
2096 | struct btrfs_path *path; | 2449 | struct btrfs_path *path; |
2097 | struct btrfs_key key; | 2450 | struct btrfs_key key; |
2098 | struct btrfs_root *chunk_root = dev_root->fs_info->chunk_root; | ||
2099 | struct btrfs_trans_handle *trans; | ||
2100 | struct btrfs_key found_key; | 2451 | struct btrfs_key found_key; |
2101 | 2452 | struct btrfs_trans_handle *trans; | |
2102 | if (dev_root->fs_info->sb->s_flags & MS_RDONLY) | 2453 | struct extent_buffer *leaf; |
2103 | return -EROFS; | 2454 | int slot; |
2104 | 2455 | int ret; | |
2105 | if (!capable(CAP_SYS_ADMIN)) | 2456 | int enospc_errors = 0; |
2106 | return -EPERM; | 2457 | bool counting = true; |
2107 | |||
2108 | mutex_lock(&dev_root->fs_info->volume_mutex); | ||
2109 | dev_root = dev_root->fs_info->dev_root; | ||
2110 | 2458 | ||
2111 | /* step one make some room on all the devices */ | 2459 | /* step one make some room on all the devices */ |
2460 | devices = &fs_info->fs_devices->devices; | ||
2112 | list_for_each_entry(device, devices, dev_list) { | 2461 | list_for_each_entry(device, devices, dev_list) { |
2113 | old_size = device->total_bytes; | 2462 | old_size = device->total_bytes; |
2114 | size_to_free = div_factor(old_size, 1); | 2463 | size_to_free = div_factor(old_size, 1); |
@@ -2137,11 +2486,23 @@ int btrfs_balance(struct btrfs_root *dev_root) | |||
2137 | ret = -ENOMEM; | 2486 | ret = -ENOMEM; |
2138 | goto error; | 2487 | goto error; |
2139 | } | 2488 | } |
2489 | |||
2490 | /* zero out stat counters */ | ||
2491 | spin_lock(&fs_info->balance_lock); | ||
2492 | memset(&bctl->stat, 0, sizeof(bctl->stat)); | ||
2493 | spin_unlock(&fs_info->balance_lock); | ||
2494 | again: | ||
2140 | key.objectid = BTRFS_FIRST_CHUNK_TREE_OBJECTID; | 2495 | key.objectid = BTRFS_FIRST_CHUNK_TREE_OBJECTID; |
2141 | key.offset = (u64)-1; | 2496 | key.offset = (u64)-1; |
2142 | key.type = BTRFS_CHUNK_ITEM_KEY; | 2497 | key.type = BTRFS_CHUNK_ITEM_KEY; |
2143 | 2498 | ||
2144 | while (1) { | 2499 | while (1) { |
2500 | if ((!counting && atomic_read(&fs_info->balance_pause_req)) || | ||
2501 | atomic_read(&fs_info->balance_cancel_req)) { | ||
2502 | ret = -ECANCELED; | ||
2503 | goto error; | ||
2504 | } | ||
2505 | |||
2145 | ret = btrfs_search_slot(NULL, chunk_root, &key, path, 0, 0); | 2506 | ret = btrfs_search_slot(NULL, chunk_root, &key, path, 0, 0); |
2146 | if (ret < 0) | 2507 | if (ret < 0) |
2147 | goto error; | 2508 | goto error; |
@@ -2151,15 +2512,19 @@ int btrfs_balance(struct btrfs_root *dev_root) | |||
2151 | * failed | 2512 | * failed |
2152 | */ | 2513 | */ |
2153 | if (ret == 0) | 2514 | if (ret == 0) |
2154 | break; | 2515 | BUG(); /* FIXME break ? */ |
2155 | 2516 | ||
2156 | ret = btrfs_previous_item(chunk_root, path, 0, | 2517 | ret = btrfs_previous_item(chunk_root, path, 0, |
2157 | BTRFS_CHUNK_ITEM_KEY); | 2518 | BTRFS_CHUNK_ITEM_KEY); |
2158 | if (ret) | 2519 | if (ret) { |
2520 | ret = 0; | ||
2159 | break; | 2521 | break; |
2522 | } | ||
2523 | |||
2524 | leaf = path->nodes[0]; | ||
2525 | slot = path->slots[0]; | ||
2526 | btrfs_item_key_to_cpu(leaf, &found_key, slot); | ||
2160 | 2527 | ||
2161 | btrfs_item_key_to_cpu(path->nodes[0], &found_key, | ||
2162 | path->slots[0]); | ||
2163 | if (found_key.objectid != key.objectid) | 2528 | if (found_key.objectid != key.objectid) |
2164 | break; | 2529 | break; |
2165 | 2530 | ||
@@ -2167,22 +2532,375 @@ int btrfs_balance(struct btrfs_root *dev_root) | |||
2167 | if (found_key.offset == 0) | 2532 | if (found_key.offset == 0) |
2168 | break; | 2533 | break; |
2169 | 2534 | ||
2535 | chunk = btrfs_item_ptr(leaf, slot, struct btrfs_chunk); | ||
2536 | |||
2537 | if (!counting) { | ||
2538 | spin_lock(&fs_info->balance_lock); | ||
2539 | bctl->stat.considered++; | ||
2540 | spin_unlock(&fs_info->balance_lock); | ||
2541 | } | ||
2542 | |||
2543 | ret = should_balance_chunk(chunk_root, leaf, chunk, | ||
2544 | found_key.offset); | ||
2170 | btrfs_release_path(path); | 2545 | btrfs_release_path(path); |
2546 | if (!ret) | ||
2547 | goto loop; | ||
2548 | |||
2549 | if (counting) { | ||
2550 | spin_lock(&fs_info->balance_lock); | ||
2551 | bctl->stat.expected++; | ||
2552 | spin_unlock(&fs_info->balance_lock); | ||
2553 | goto loop; | ||
2554 | } | ||
2555 | |||
2171 | ret = btrfs_relocate_chunk(chunk_root, | 2556 | ret = btrfs_relocate_chunk(chunk_root, |
2172 | chunk_root->root_key.objectid, | 2557 | chunk_root->root_key.objectid, |
2173 | found_key.objectid, | 2558 | found_key.objectid, |
2174 | found_key.offset); | 2559 | found_key.offset); |
2175 | if (ret && ret != -ENOSPC) | 2560 | if (ret && ret != -ENOSPC) |
2176 | goto error; | 2561 | goto error; |
2562 | if (ret == -ENOSPC) { | ||
2563 | enospc_errors++; | ||
2564 | } else { | ||
2565 | spin_lock(&fs_info->balance_lock); | ||
2566 | bctl->stat.completed++; | ||
2567 | spin_unlock(&fs_info->balance_lock); | ||
2568 | } | ||
2569 | loop: | ||
2177 | key.offset = found_key.offset - 1; | 2570 | key.offset = found_key.offset - 1; |
2178 | } | 2571 | } |
2179 | ret = 0; | 2572 | |
2573 | if (counting) { | ||
2574 | btrfs_release_path(path); | ||
2575 | counting = false; | ||
2576 | goto again; | ||
2577 | } | ||
2180 | error: | 2578 | error: |
2181 | btrfs_free_path(path); | 2579 | btrfs_free_path(path); |
2182 | mutex_unlock(&dev_root->fs_info->volume_mutex); | 2580 | if (enospc_errors) { |
2581 | printk(KERN_INFO "btrfs: %d enospc errors during balance\n", | ||
2582 | enospc_errors); | ||
2583 | if (!ret) | ||
2584 | ret = -ENOSPC; | ||
2585 | } | ||
2586 | |||
2183 | return ret; | 2587 | return ret; |
2184 | } | 2588 | } |
2185 | 2589 | ||
2590 | static inline int balance_need_close(struct btrfs_fs_info *fs_info) | ||
2591 | { | ||
2592 | /* cancel requested || normal exit path */ | ||
2593 | return atomic_read(&fs_info->balance_cancel_req) || | ||
2594 | (atomic_read(&fs_info->balance_pause_req) == 0 && | ||
2595 | atomic_read(&fs_info->balance_cancel_req) == 0); | ||
2596 | } | ||
2597 | |||
2598 | static void __cancel_balance(struct btrfs_fs_info *fs_info) | ||
2599 | { | ||
2600 | int ret; | ||
2601 | |||
2602 | unset_balance_control(fs_info); | ||
2603 | ret = del_balance_item(fs_info->tree_root); | ||
2604 | BUG_ON(ret); | ||
2605 | } | ||
2606 | |||
2607 | void update_ioctl_balance_args(struct btrfs_fs_info *fs_info, int lock, | ||
2608 | struct btrfs_ioctl_balance_args *bargs); | ||
2609 | |||
2610 | /* | ||
2611 | * Should be called with both balance and volume mutexes held | ||
2612 | */ | ||
2613 | int btrfs_balance(struct btrfs_balance_control *bctl, | ||
2614 | struct btrfs_ioctl_balance_args *bargs) | ||
2615 | { | ||
2616 | struct btrfs_fs_info *fs_info = bctl->fs_info; | ||
2617 | u64 allowed; | ||
2618 | int ret; | ||
2619 | |||
2620 | if (btrfs_fs_closing(fs_info) || | ||
2621 | atomic_read(&fs_info->balance_pause_req) || | ||
2622 | atomic_read(&fs_info->balance_cancel_req)) { | ||
2623 | ret = -EINVAL; | ||
2624 | goto out; | ||
2625 | } | ||
2626 | |||
2627 | /* | ||
2628 | * In case of mixed groups both data and meta should be picked, | ||
2629 | * and identical options should be given for both of them. | ||
2630 | */ | ||
2631 | allowed = btrfs_super_incompat_flags(fs_info->super_copy); | ||
2632 | if ((allowed & BTRFS_FEATURE_INCOMPAT_MIXED_GROUPS) && | ||
2633 | (bctl->flags & (BTRFS_BALANCE_DATA | BTRFS_BALANCE_METADATA))) { | ||
2634 | if (!(bctl->flags & BTRFS_BALANCE_DATA) || | ||
2635 | !(bctl->flags & BTRFS_BALANCE_METADATA) || | ||
2636 | memcmp(&bctl->data, &bctl->meta, sizeof(bctl->data))) { | ||
2637 | printk(KERN_ERR "btrfs: with mixed groups data and " | ||
2638 | "metadata balance options must be the same\n"); | ||
2639 | ret = -EINVAL; | ||
2640 | goto out; | ||
2641 | } | ||
2642 | } | ||
2643 | |||
2644 | /* | ||
2645 | * Profile changing sanity checks. Skip them if a simple | ||
2646 | * balance is requested. | ||
2647 | */ | ||
2648 | if (!((bctl->data.flags | bctl->sys.flags | bctl->meta.flags) & | ||
2649 | BTRFS_BALANCE_ARGS_CONVERT)) | ||
2650 | goto do_balance; | ||
2651 | |||
2652 | allowed = BTRFS_AVAIL_ALLOC_BIT_SINGLE; | ||
2653 | if (fs_info->fs_devices->num_devices == 1) | ||
2654 | allowed |= BTRFS_BLOCK_GROUP_DUP; | ||
2655 | else if (fs_info->fs_devices->num_devices < 4) | ||
2656 | allowed |= (BTRFS_BLOCK_GROUP_RAID0 | BTRFS_BLOCK_GROUP_RAID1); | ||
2657 | else | ||
2658 | allowed |= (BTRFS_BLOCK_GROUP_RAID0 | BTRFS_BLOCK_GROUP_RAID1 | | ||
2659 | BTRFS_BLOCK_GROUP_RAID10); | ||
2660 | |||
2661 | if (!profile_is_valid(bctl->data.target, 1) || | ||
2662 | bctl->data.target & ~allowed) { | ||
2663 | printk(KERN_ERR "btrfs: unable to start balance with target " | ||
2664 | "data profile %llu\n", | ||
2665 | (unsigned long long)bctl->data.target); | ||
2666 | ret = -EINVAL; | ||
2667 | goto out; | ||
2668 | } | ||
2669 | if (!profile_is_valid(bctl->meta.target, 1) || | ||
2670 | bctl->meta.target & ~allowed) { | ||
2671 | printk(KERN_ERR "btrfs: unable to start balance with target " | ||
2672 | "metadata profile %llu\n", | ||
2673 | (unsigned long long)bctl->meta.target); | ||
2674 | ret = -EINVAL; | ||
2675 | goto out; | ||
2676 | } | ||
2677 | if (!profile_is_valid(bctl->sys.target, 1) || | ||
2678 | bctl->sys.target & ~allowed) { | ||
2679 | printk(KERN_ERR "btrfs: unable to start balance with target " | ||
2680 | "system profile %llu\n", | ||
2681 | (unsigned long long)bctl->sys.target); | ||
2682 | ret = -EINVAL; | ||
2683 | goto out; | ||
2684 | } | ||
2685 | |||
2686 | if (bctl->data.target & BTRFS_BLOCK_GROUP_DUP) { | ||
2687 | printk(KERN_ERR "btrfs: dup for data is not allowed\n"); | ||
2688 | ret = -EINVAL; | ||
2689 | goto out; | ||
2690 | } | ||
2691 | |||
2692 | /* allow to reduce meta or sys integrity only if force set */ | ||
2693 | allowed = BTRFS_BLOCK_GROUP_DUP | BTRFS_BLOCK_GROUP_RAID1 | | ||
2694 | BTRFS_BLOCK_GROUP_RAID10; | ||
2695 | if (((bctl->sys.flags & BTRFS_BALANCE_ARGS_CONVERT) && | ||
2696 | (fs_info->avail_system_alloc_bits & allowed) && | ||
2697 | !(bctl->sys.target & allowed)) || | ||
2698 | ((bctl->meta.flags & BTRFS_BALANCE_ARGS_CONVERT) && | ||
2699 | (fs_info->avail_metadata_alloc_bits & allowed) && | ||
2700 | !(bctl->meta.target & allowed))) { | ||
2701 | if (bctl->flags & BTRFS_BALANCE_FORCE) { | ||
2702 | printk(KERN_INFO "btrfs: force reducing metadata " | ||
2703 | "integrity\n"); | ||
2704 | } else { | ||
2705 | printk(KERN_ERR "btrfs: balance will reduce metadata " | ||
2706 | "integrity, use force if you want this\n"); | ||
2707 | ret = -EINVAL; | ||
2708 | goto out; | ||
2709 | } | ||
2710 | } | ||
2711 | |||
2712 | do_balance: | ||
2713 | ret = insert_balance_item(fs_info->tree_root, bctl); | ||
2714 | if (ret && ret != -EEXIST) | ||
2715 | goto out; | ||
2716 | |||
2717 | if (!(bctl->flags & BTRFS_BALANCE_RESUME)) { | ||
2718 | BUG_ON(ret == -EEXIST); | ||
2719 | set_balance_control(bctl); | ||
2720 | } else { | ||
2721 | BUG_ON(ret != -EEXIST); | ||
2722 | spin_lock(&fs_info->balance_lock); | ||
2723 | update_balance_args(bctl); | ||
2724 | spin_unlock(&fs_info->balance_lock); | ||
2725 | } | ||
2726 | |||
2727 | atomic_inc(&fs_info->balance_running); | ||
2728 | mutex_unlock(&fs_info->balance_mutex); | ||
2729 | |||
2730 | ret = __btrfs_balance(fs_info); | ||
2731 | |||
2732 | mutex_lock(&fs_info->balance_mutex); | ||
2733 | atomic_dec(&fs_info->balance_running); | ||
2734 | |||
2735 | if (bargs) { | ||
2736 | memset(bargs, 0, sizeof(*bargs)); | ||
2737 | update_ioctl_balance_args(fs_info, 0, bargs); | ||
2738 | } | ||
2739 | |||
2740 | if ((ret && ret != -ECANCELED && ret != -ENOSPC) || | ||
2741 | balance_need_close(fs_info)) { | ||
2742 | __cancel_balance(fs_info); | ||
2743 | } | ||
2744 | |||
2745 | wake_up(&fs_info->balance_wait_q); | ||
2746 | |||
2747 | return ret; | ||
2748 | out: | ||
2749 | if (bctl->flags & BTRFS_BALANCE_RESUME) | ||
2750 | __cancel_balance(fs_info); | ||
2751 | else | ||
2752 | kfree(bctl); | ||
2753 | return ret; | ||
2754 | } | ||
2755 | |||
2756 | static int balance_kthread(void *data) | ||
2757 | { | ||
2758 | struct btrfs_balance_control *bctl = | ||
2759 | (struct btrfs_balance_control *)data; | ||
2760 | struct btrfs_fs_info *fs_info = bctl->fs_info; | ||
2761 | int ret = 0; | ||
2762 | |||
2763 | mutex_lock(&fs_info->volume_mutex); | ||
2764 | mutex_lock(&fs_info->balance_mutex); | ||
2765 | |||
2766 | set_balance_control(bctl); | ||
2767 | |||
2768 | if (btrfs_test_opt(fs_info->tree_root, SKIP_BALANCE)) { | ||
2769 | printk(KERN_INFO "btrfs: force skipping balance\n"); | ||
2770 | } else { | ||
2771 | printk(KERN_INFO "btrfs: continuing balance\n"); | ||
2772 | ret = btrfs_balance(bctl, NULL); | ||
2773 | } | ||
2774 | |||
2775 | mutex_unlock(&fs_info->balance_mutex); | ||
2776 | mutex_unlock(&fs_info->volume_mutex); | ||
2777 | return ret; | ||
2778 | } | ||
2779 | |||
2780 | int btrfs_recover_balance(struct btrfs_root *tree_root) | ||
2781 | { | ||
2782 | struct task_struct *tsk; | ||
2783 | struct btrfs_balance_control *bctl; | ||
2784 | struct btrfs_balance_item *item; | ||
2785 | struct btrfs_disk_balance_args disk_bargs; | ||
2786 | struct btrfs_path *path; | ||
2787 | struct extent_buffer *leaf; | ||
2788 | struct btrfs_key key; | ||
2789 | int ret; | ||
2790 | |||
2791 | path = btrfs_alloc_path(); | ||
2792 | if (!path) | ||
2793 | return -ENOMEM; | ||
2794 | |||
2795 | bctl = kzalloc(sizeof(*bctl), GFP_NOFS); | ||
2796 | if (!bctl) { | ||
2797 | ret = -ENOMEM; | ||
2798 | goto out; | ||
2799 | } | ||
2800 | |||
2801 | key.objectid = BTRFS_BALANCE_OBJECTID; | ||
2802 | key.type = BTRFS_BALANCE_ITEM_KEY; | ||
2803 | key.offset = 0; | ||
2804 | |||
2805 | ret = btrfs_search_slot(NULL, tree_root, &key, path, 0, 0); | ||
2806 | if (ret < 0) | ||
2807 | goto out_bctl; | ||
2808 | if (ret > 0) { /* ret = -ENOENT; */ | ||
2809 | ret = 0; | ||
2810 | goto out_bctl; | ||
2811 | } | ||
2812 | |||
2813 | leaf = path->nodes[0]; | ||
2814 | item = btrfs_item_ptr(leaf, path->slots[0], struct btrfs_balance_item); | ||
2815 | |||
2816 | bctl->fs_info = tree_root->fs_info; | ||
2817 | bctl->flags = btrfs_balance_flags(leaf, item) | BTRFS_BALANCE_RESUME; | ||
2818 | |||
2819 | btrfs_balance_data(leaf, item, &disk_bargs); | ||
2820 | btrfs_disk_balance_args_to_cpu(&bctl->data, &disk_bargs); | ||
2821 | btrfs_balance_meta(leaf, item, &disk_bargs); | ||
2822 | btrfs_disk_balance_args_to_cpu(&bctl->meta, &disk_bargs); | ||
2823 | btrfs_balance_sys(leaf, item, &disk_bargs); | ||
2824 | btrfs_disk_balance_args_to_cpu(&bctl->sys, &disk_bargs); | ||
2825 | |||
2826 | tsk = kthread_run(balance_kthread, bctl, "btrfs-balance"); | ||
2827 | if (IS_ERR(tsk)) | ||
2828 | ret = PTR_ERR(tsk); | ||
2829 | else | ||
2830 | goto out; | ||
2831 | |||
2832 | out_bctl: | ||
2833 | kfree(bctl); | ||
2834 | out: | ||
2835 | btrfs_free_path(path); | ||
2836 | return ret; | ||
2837 | } | ||
2838 | |||
2839 | int btrfs_pause_balance(struct btrfs_fs_info *fs_info) | ||
2840 | { | ||
2841 | int ret = 0; | ||
2842 | |||
2843 | mutex_lock(&fs_info->balance_mutex); | ||
2844 | if (!fs_info->balance_ctl) { | ||
2845 | mutex_unlock(&fs_info->balance_mutex); | ||
2846 | return -ENOTCONN; | ||
2847 | } | ||
2848 | |||
2849 | if (atomic_read(&fs_info->balance_running)) { | ||
2850 | atomic_inc(&fs_info->balance_pause_req); | ||
2851 | mutex_unlock(&fs_info->balance_mutex); | ||
2852 | |||
2853 | wait_event(fs_info->balance_wait_q, | ||
2854 | atomic_read(&fs_info->balance_running) == 0); | ||
2855 | |||
2856 | mutex_lock(&fs_info->balance_mutex); | ||
2857 | /* we are good with balance_ctl ripped off from under us */ | ||
2858 | BUG_ON(atomic_read(&fs_info->balance_running)); | ||
2859 | atomic_dec(&fs_info->balance_pause_req); | ||
2860 | } else { | ||
2861 | ret = -ENOTCONN; | ||
2862 | } | ||
2863 | |||
2864 | mutex_unlock(&fs_info->balance_mutex); | ||
2865 | return ret; | ||
2866 | } | ||
2867 | |||
2868 | int btrfs_cancel_balance(struct btrfs_fs_info *fs_info) | ||
2869 | { | ||
2870 | mutex_lock(&fs_info->balance_mutex); | ||
2871 | if (!fs_info->balance_ctl) { | ||
2872 | mutex_unlock(&fs_info->balance_mutex); | ||
2873 | return -ENOTCONN; | ||
2874 | } | ||
2875 | |||
2876 | atomic_inc(&fs_info->balance_cancel_req); | ||
2877 | /* | ||
2878 | * if we are running just wait and return, balance item is | ||
2879 | * deleted in btrfs_balance in this case | ||
2880 | */ | ||
2881 | if (atomic_read(&fs_info->balance_running)) { | ||
2882 | mutex_unlock(&fs_info->balance_mutex); | ||
2883 | wait_event(fs_info->balance_wait_q, | ||
2884 | atomic_read(&fs_info->balance_running) == 0); | ||
2885 | mutex_lock(&fs_info->balance_mutex); | ||
2886 | } else { | ||
2887 | /* __cancel_balance needs volume_mutex */ | ||
2888 | mutex_unlock(&fs_info->balance_mutex); | ||
2889 | mutex_lock(&fs_info->volume_mutex); | ||
2890 | mutex_lock(&fs_info->balance_mutex); | ||
2891 | |||
2892 | if (fs_info->balance_ctl) | ||
2893 | __cancel_balance(fs_info); | ||
2894 | |||
2895 | mutex_unlock(&fs_info->volume_mutex); | ||
2896 | } | ||
2897 | |||
2898 | BUG_ON(fs_info->balance_ctl || atomic_read(&fs_info->balance_running)); | ||
2899 | atomic_dec(&fs_info->balance_cancel_req); | ||
2900 | mutex_unlock(&fs_info->balance_mutex); | ||
2901 | return 0; | ||
2902 | } | ||
2903 | |||
2186 | /* | 2904 | /* |
2187 | * shrinking a device means finding all of the device extents past | 2905 | * shrinking a device means finding all of the device extents past |
2188 | * the new size, and then following the back refs to the chunks. | 2906 | * the new size, and then following the back refs to the chunks. |
@@ -2323,8 +3041,7 @@ done: | |||
2323 | return ret; | 3041 | return ret; |
2324 | } | 3042 | } |
2325 | 3043 | ||
2326 | static int btrfs_add_system_chunk(struct btrfs_trans_handle *trans, | 3044 | static int btrfs_add_system_chunk(struct btrfs_root *root, |
2327 | struct btrfs_root *root, | ||
2328 | struct btrfs_key *key, | 3045 | struct btrfs_key *key, |
2329 | struct btrfs_chunk *chunk, int item_size) | 3046 | struct btrfs_chunk *chunk, int item_size) |
2330 | { | 3047 | { |
@@ -2441,10 +3158,14 @@ static int __btrfs_alloc_chunk(struct btrfs_trans_handle *trans, | |||
2441 | max_stripe_size = 1024 * 1024 * 1024; | 3158 | max_stripe_size = 1024 * 1024 * 1024; |
2442 | max_chunk_size = 10 * max_stripe_size; | 3159 | max_chunk_size = 10 * max_stripe_size; |
2443 | } else if (type & BTRFS_BLOCK_GROUP_METADATA) { | 3160 | } else if (type & BTRFS_BLOCK_GROUP_METADATA) { |
2444 | max_stripe_size = 256 * 1024 * 1024; | 3161 | /* for larger filesystems, use larger metadata chunks */ |
3162 | if (fs_devices->total_rw_bytes > 50ULL * 1024 * 1024 * 1024) | ||
3163 | max_stripe_size = 1024 * 1024 * 1024; | ||
3164 | else | ||
3165 | max_stripe_size = 256 * 1024 * 1024; | ||
2445 | max_chunk_size = max_stripe_size; | 3166 | max_chunk_size = max_stripe_size; |
2446 | } else if (type & BTRFS_BLOCK_GROUP_SYSTEM) { | 3167 | } else if (type & BTRFS_BLOCK_GROUP_SYSTEM) { |
2447 | max_stripe_size = 8 * 1024 * 1024; | 3168 | max_stripe_size = 32 * 1024 * 1024; |
2448 | max_chunk_size = 2 * max_stripe_size; | 3169 | max_chunk_size = 2 * max_stripe_size; |
2449 | } else { | 3170 | } else { |
2450 | printk(KERN_ERR "btrfs: invalid chunk type 0x%llx requested\n", | 3171 | printk(KERN_ERR "btrfs: invalid chunk type 0x%llx requested\n", |
@@ -2496,7 +3217,7 @@ static int __btrfs_alloc_chunk(struct btrfs_trans_handle *trans, | |||
2496 | if (total_avail == 0) | 3217 | if (total_avail == 0) |
2497 | continue; | 3218 | continue; |
2498 | 3219 | ||
2499 | ret = find_free_dev_extent(trans, device, | 3220 | ret = find_free_dev_extent(device, |
2500 | max_stripe_size * dev_stripes, | 3221 | max_stripe_size * dev_stripes, |
2501 | &dev_offset, &max_avail); | 3222 | &dev_offset, &max_avail); |
2502 | if (ret && ret != -ENOSPC) | 3223 | if (ret && ret != -ENOSPC) |
@@ -2687,7 +3408,7 @@ static int __finish_chunk_alloc(struct btrfs_trans_handle *trans, | |||
2687 | BUG_ON(ret); | 3408 | BUG_ON(ret); |
2688 | 3409 | ||
2689 | if (map->type & BTRFS_BLOCK_GROUP_SYSTEM) { | 3410 | if (map->type & BTRFS_BLOCK_GROUP_SYSTEM) { |
2690 | ret = btrfs_add_system_chunk(trans, chunk_root, &key, chunk, | 3411 | ret = btrfs_add_system_chunk(chunk_root, &key, chunk, |
2691 | item_size); | 3412 | item_size); |
2692 | BUG_ON(ret); | 3413 | BUG_ON(ret); |
2693 | } | 3414 | } |
@@ -2752,8 +3473,7 @@ static noinline int init_first_rw_device(struct btrfs_trans_handle *trans, | |||
2752 | return ret; | 3473 | return ret; |
2753 | 3474 | ||
2754 | alloc_profile = BTRFS_BLOCK_GROUP_METADATA | | 3475 | alloc_profile = BTRFS_BLOCK_GROUP_METADATA | |
2755 | (fs_info->metadata_alloc_profile & | 3476 | fs_info->avail_metadata_alloc_bits; |
2756 | fs_info->avail_metadata_alloc_bits); | ||
2757 | alloc_profile = btrfs_reduce_alloc_profile(root, alloc_profile); | 3477 | alloc_profile = btrfs_reduce_alloc_profile(root, alloc_profile); |
2758 | 3478 | ||
2759 | ret = __btrfs_alloc_chunk(trans, extent_root, &map, &chunk_size, | 3479 | ret = __btrfs_alloc_chunk(trans, extent_root, &map, &chunk_size, |
@@ -2763,8 +3483,7 @@ static noinline int init_first_rw_device(struct btrfs_trans_handle *trans, | |||
2763 | sys_chunk_offset = chunk_offset + chunk_size; | 3483 | sys_chunk_offset = chunk_offset + chunk_size; |
2764 | 3484 | ||
2765 | alloc_profile = BTRFS_BLOCK_GROUP_SYSTEM | | 3485 | alloc_profile = BTRFS_BLOCK_GROUP_SYSTEM | |
2766 | (fs_info->system_alloc_profile & | 3486 | fs_info->avail_system_alloc_bits; |
2767 | fs_info->avail_system_alloc_bits); | ||
2768 | alloc_profile = btrfs_reduce_alloc_profile(root, alloc_profile); | 3487 | alloc_profile = btrfs_reduce_alloc_profile(root, alloc_profile); |
2769 | 3488 | ||
2770 | ret = __btrfs_alloc_chunk(trans, extent_root, &sys_map, | 3489 | ret = __btrfs_alloc_chunk(trans, extent_root, &sys_map, |
@@ -2901,26 +3620,13 @@ static int __btrfs_map_block(struct btrfs_mapping_tree *map_tree, int rw, | |||
2901 | u64 stripe_nr; | 3620 | u64 stripe_nr; |
2902 | u64 stripe_nr_orig; | 3621 | u64 stripe_nr_orig; |
2903 | u64 stripe_nr_end; | 3622 | u64 stripe_nr_end; |
2904 | int stripes_allocated = 8; | ||
2905 | int stripes_required = 1; | ||
2906 | int stripe_index; | 3623 | int stripe_index; |
2907 | int i; | 3624 | int i; |
3625 | int ret = 0; | ||
2908 | int num_stripes; | 3626 | int num_stripes; |
2909 | int max_errors = 0; | 3627 | int max_errors = 0; |
2910 | struct btrfs_bio *bbio = NULL; | 3628 | struct btrfs_bio *bbio = NULL; |
2911 | 3629 | ||
2912 | if (bbio_ret && !(rw & (REQ_WRITE | REQ_DISCARD))) | ||
2913 | stripes_allocated = 1; | ||
2914 | again: | ||
2915 | if (bbio_ret) { | ||
2916 | bbio = kzalloc(btrfs_bio_size(stripes_allocated), | ||
2917 | GFP_NOFS); | ||
2918 | if (!bbio) | ||
2919 | return -ENOMEM; | ||
2920 | |||
2921 | atomic_set(&bbio->error, 0); | ||
2922 | } | ||
2923 | |||
2924 | read_lock(&em_tree->lock); | 3630 | read_lock(&em_tree->lock); |
2925 | em = lookup_extent_mapping(em_tree, logical, *length); | 3631 | em = lookup_extent_mapping(em_tree, logical, *length); |
2926 | read_unlock(&em_tree->lock); | 3632 | read_unlock(&em_tree->lock); |
@@ -2939,32 +3645,6 @@ again: | |||
2939 | if (mirror_num > map->num_stripes) | 3645 | if (mirror_num > map->num_stripes) |
2940 | mirror_num = 0; | 3646 | mirror_num = 0; |
2941 | 3647 | ||
2942 | /* if our btrfs_bio struct is too small, back off and try again */ | ||
2943 | if (rw & REQ_WRITE) { | ||
2944 | if (map->type & (BTRFS_BLOCK_GROUP_RAID1 | | ||
2945 | BTRFS_BLOCK_GROUP_DUP)) { | ||
2946 | stripes_required = map->num_stripes; | ||
2947 | max_errors = 1; | ||
2948 | } else if (map->type & BTRFS_BLOCK_GROUP_RAID10) { | ||
2949 | stripes_required = map->sub_stripes; | ||
2950 | max_errors = 1; | ||
2951 | } | ||
2952 | } | ||
2953 | if (rw & REQ_DISCARD) { | ||
2954 | if (map->type & (BTRFS_BLOCK_GROUP_RAID0 | | ||
2955 | BTRFS_BLOCK_GROUP_RAID1 | | ||
2956 | BTRFS_BLOCK_GROUP_DUP | | ||
2957 | BTRFS_BLOCK_GROUP_RAID10)) { | ||
2958 | stripes_required = map->num_stripes; | ||
2959 | } | ||
2960 | } | ||
2961 | if (bbio_ret && (rw & (REQ_WRITE | REQ_DISCARD)) && | ||
2962 | stripes_allocated < stripes_required) { | ||
2963 | stripes_allocated = map->num_stripes; | ||
2964 | free_extent_map(em); | ||
2965 | kfree(bbio); | ||
2966 | goto again; | ||
2967 | } | ||
2968 | stripe_nr = offset; | 3648 | stripe_nr = offset; |
2969 | /* | 3649 | /* |
2970 | * stripe_nr counts the total number of stripes we have to stride | 3650 | * stripe_nr counts the total number of stripes we have to stride |
@@ -2980,10 +3660,7 @@ again: | |||
2980 | 3660 | ||
2981 | if (rw & REQ_DISCARD) | 3661 | if (rw & REQ_DISCARD) |
2982 | *length = min_t(u64, em->len - offset, *length); | 3662 | *length = min_t(u64, em->len - offset, *length); |
2983 | else if (map->type & (BTRFS_BLOCK_GROUP_RAID0 | | 3663 | else if (map->type & BTRFS_BLOCK_GROUP_PROFILE_MASK) { |
2984 | BTRFS_BLOCK_GROUP_RAID1 | | ||
2985 | BTRFS_BLOCK_GROUP_RAID10 | | ||
2986 | BTRFS_BLOCK_GROUP_DUP)) { | ||
2987 | /* we limit the length of each bio to what fits in a stripe */ | 3664 | /* we limit the length of each bio to what fits in a stripe */ |
2988 | *length = min_t(u64, em->len - offset, | 3665 | *length = min_t(u64, em->len - offset, |
2989 | map->stripe_len - stripe_offset); | 3666 | map->stripe_len - stripe_offset); |
@@ -3059,81 +3736,55 @@ again: | |||
3059 | } | 3736 | } |
3060 | BUG_ON(stripe_index >= map->num_stripes); | 3737 | BUG_ON(stripe_index >= map->num_stripes); |
3061 | 3738 | ||
3739 | bbio = kzalloc(btrfs_bio_size(num_stripes), GFP_NOFS); | ||
3740 | if (!bbio) { | ||
3741 | ret = -ENOMEM; | ||
3742 | goto out; | ||
3743 | } | ||
3744 | atomic_set(&bbio->error, 0); | ||
3745 | |||
3062 | if (rw & REQ_DISCARD) { | 3746 | if (rw & REQ_DISCARD) { |
3747 | int factor = 0; | ||
3748 | int sub_stripes = 0; | ||
3749 | u64 stripes_per_dev = 0; | ||
3750 | u32 remaining_stripes = 0; | ||
3751 | |||
3752 | if (map->type & | ||
3753 | (BTRFS_BLOCK_GROUP_RAID0 | BTRFS_BLOCK_GROUP_RAID10)) { | ||
3754 | if (map->type & BTRFS_BLOCK_GROUP_RAID0) | ||
3755 | sub_stripes = 1; | ||
3756 | else | ||
3757 | sub_stripes = map->sub_stripes; | ||
3758 | |||
3759 | factor = map->num_stripes / sub_stripes; | ||
3760 | stripes_per_dev = div_u64_rem(stripe_nr_end - | ||
3761 | stripe_nr_orig, | ||
3762 | factor, | ||
3763 | &remaining_stripes); | ||
3764 | } | ||
3765 | |||
3063 | for (i = 0; i < num_stripes; i++) { | 3766 | for (i = 0; i < num_stripes; i++) { |
3064 | bbio->stripes[i].physical = | 3767 | bbio->stripes[i].physical = |
3065 | map->stripes[stripe_index].physical + | 3768 | map->stripes[stripe_index].physical + |
3066 | stripe_offset + stripe_nr * map->stripe_len; | 3769 | stripe_offset + stripe_nr * map->stripe_len; |
3067 | bbio->stripes[i].dev = map->stripes[stripe_index].dev; | 3770 | bbio->stripes[i].dev = map->stripes[stripe_index].dev; |
3068 | 3771 | ||
3069 | if (map->type & BTRFS_BLOCK_GROUP_RAID0) { | 3772 | if (map->type & (BTRFS_BLOCK_GROUP_RAID0 | |
3070 | u64 stripes; | 3773 | BTRFS_BLOCK_GROUP_RAID10)) { |
3071 | u32 last_stripe = 0; | 3774 | bbio->stripes[i].length = stripes_per_dev * |
3072 | int j; | 3775 | map->stripe_len; |
3073 | 3776 | if (i / sub_stripes < remaining_stripes) | |
3074 | div_u64_rem(stripe_nr_end - 1, | 3777 | bbio->stripes[i].length += |
3075 | map->num_stripes, | 3778 | map->stripe_len; |
3076 | &last_stripe); | 3779 | if (i < sub_stripes) |
3077 | |||
3078 | for (j = 0; j < map->num_stripes; j++) { | ||
3079 | u32 test; | ||
3080 | |||
3081 | div_u64_rem(stripe_nr_end - 1 - j, | ||
3082 | map->num_stripes, &test); | ||
3083 | if (test == stripe_index) | ||
3084 | break; | ||
3085 | } | ||
3086 | stripes = stripe_nr_end - 1 - j; | ||
3087 | do_div(stripes, map->num_stripes); | ||
3088 | bbio->stripes[i].length = map->stripe_len * | ||
3089 | (stripes - stripe_nr + 1); | ||
3090 | |||
3091 | if (i == 0) { | ||
3092 | bbio->stripes[i].length -= | ||
3093 | stripe_offset; | ||
3094 | stripe_offset = 0; | ||
3095 | } | ||
3096 | if (stripe_index == last_stripe) | ||
3097 | bbio->stripes[i].length -= | ||
3098 | stripe_end_offset; | ||
3099 | } else if (map->type & BTRFS_BLOCK_GROUP_RAID10) { | ||
3100 | u64 stripes; | ||
3101 | int j; | ||
3102 | int factor = map->num_stripes / | ||
3103 | map->sub_stripes; | ||
3104 | u32 last_stripe = 0; | ||
3105 | |||
3106 | div_u64_rem(stripe_nr_end - 1, | ||
3107 | factor, &last_stripe); | ||
3108 | last_stripe *= map->sub_stripes; | ||
3109 | |||
3110 | for (j = 0; j < factor; j++) { | ||
3111 | u32 test; | ||
3112 | |||
3113 | div_u64_rem(stripe_nr_end - 1 - j, | ||
3114 | factor, &test); | ||
3115 | |||
3116 | if (test == | ||
3117 | stripe_index / map->sub_stripes) | ||
3118 | break; | ||
3119 | } | ||
3120 | stripes = stripe_nr_end - 1 - j; | ||
3121 | do_div(stripes, factor); | ||
3122 | bbio->stripes[i].length = map->stripe_len * | ||
3123 | (stripes - stripe_nr + 1); | ||
3124 | |||
3125 | if (i < map->sub_stripes) { | ||
3126 | bbio->stripes[i].length -= | 3780 | bbio->stripes[i].length -= |
3127 | stripe_offset; | 3781 | stripe_offset; |
3128 | if (i == map->sub_stripes - 1) | 3782 | if ((i / sub_stripes + 1) % |
3129 | stripe_offset = 0; | 3783 | sub_stripes == remaining_stripes) |
3130 | } | ||
3131 | if (stripe_index >= last_stripe && | ||
3132 | stripe_index <= (last_stripe + | ||
3133 | map->sub_stripes - 1)) { | ||
3134 | bbio->stripes[i].length -= | 3784 | bbio->stripes[i].length -= |
3135 | stripe_end_offset; | 3785 | stripe_end_offset; |
3136 | } | 3786 | if (i == sub_stripes - 1) |
3787 | stripe_offset = 0; | ||
3137 | } else | 3788 | } else |
3138 | bbio->stripes[i].length = *length; | 3789 | bbio->stripes[i].length = *length; |
3139 | 3790 | ||
@@ -3155,15 +3806,22 @@ again: | |||
3155 | stripe_index++; | 3806 | stripe_index++; |
3156 | } | 3807 | } |
3157 | } | 3808 | } |
3158 | if (bbio_ret) { | 3809 | |
3159 | *bbio_ret = bbio; | 3810 | if (rw & REQ_WRITE) { |
3160 | bbio->num_stripes = num_stripes; | 3811 | if (map->type & (BTRFS_BLOCK_GROUP_RAID1 | |
3161 | bbio->max_errors = max_errors; | 3812 | BTRFS_BLOCK_GROUP_RAID10 | |
3162 | bbio->mirror_num = mirror_num; | 3813 | BTRFS_BLOCK_GROUP_DUP)) { |
3814 | max_errors = 1; | ||
3815 | } | ||
3163 | } | 3816 | } |
3817 | |||
3818 | *bbio_ret = bbio; | ||
3819 | bbio->num_stripes = num_stripes; | ||
3820 | bbio->max_errors = max_errors; | ||
3821 | bbio->mirror_num = mirror_num; | ||
3164 | out: | 3822 | out: |
3165 | free_extent_map(em); | 3823 | free_extent_map(em); |
3166 | return 0; | 3824 | return ret; |
3167 | } | 3825 | } |
3168 | 3826 | ||
3169 | int btrfs_map_block(struct btrfs_mapping_tree *map_tree, int rw, | 3827 | int btrfs_map_block(struct btrfs_mapping_tree *map_tree, int rw, |
@@ -3304,7 +3962,7 @@ static noinline int schedule_bio(struct btrfs_root *root, | |||
3304 | /* don't bother with additional async steps for reads, right now */ | 3962 | /* don't bother with additional async steps for reads, right now */ |
3305 | if (!(rw & REQ_WRITE)) { | 3963 | if (!(rw & REQ_WRITE)) { |
3306 | bio_get(bio); | 3964 | bio_get(bio); |
3307 | submit_bio(rw, bio); | 3965 | btrfsic_submit_bio(rw, bio); |
3308 | bio_put(bio); | 3966 | bio_put(bio); |
3309 | return 0; | 3967 | return 0; |
3310 | } | 3968 | } |
@@ -3399,7 +4057,7 @@ int btrfs_map_bio(struct btrfs_root *root, int rw, struct bio *bio, | |||
3399 | if (async_submit) | 4057 | if (async_submit) |
3400 | schedule_bio(root, dev, rw, bio); | 4058 | schedule_bio(root, dev, rw, bio); |
3401 | else | 4059 | else |
3402 | submit_bio(rw, bio); | 4060 | btrfsic_submit_bio(rw, bio); |
3403 | } else { | 4061 | } else { |
3404 | bio->bi_bdev = root->fs_info->fs_devices->latest_bdev; | 4062 | bio->bi_bdev = root->fs_info->fs_devices->latest_bdev; |
3405 | bio->bi_sector = logical >> 9; | 4063 | bio->bi_sector = logical >> 9; |
@@ -3568,7 +4226,7 @@ static int open_seed_devices(struct btrfs_root *root, u8 *fsid) | |||
3568 | struct btrfs_fs_devices *fs_devices; | 4226 | struct btrfs_fs_devices *fs_devices; |
3569 | int ret; | 4227 | int ret; |
3570 | 4228 | ||
3571 | mutex_lock(&uuid_mutex); | 4229 | BUG_ON(!mutex_is_locked(&uuid_mutex)); |
3572 | 4230 | ||
3573 | fs_devices = root->fs_info->fs_devices->seed; | 4231 | fs_devices = root->fs_info->fs_devices->seed; |
3574 | while (fs_devices) { | 4232 | while (fs_devices) { |
@@ -3606,7 +4264,6 @@ static int open_seed_devices(struct btrfs_root *root, u8 *fsid) | |||
3606 | fs_devices->seed = root->fs_info->fs_devices->seed; | 4264 | fs_devices->seed = root->fs_info->fs_devices->seed; |
3607 | root->fs_info->fs_devices->seed = fs_devices; | 4265 | root->fs_info->fs_devices->seed = fs_devices; |
3608 | out: | 4266 | out: |
3609 | mutex_unlock(&uuid_mutex); | ||
3610 | return ret; | 4267 | return ret; |
3611 | } | 4268 | } |
3612 | 4269 | ||
@@ -3749,6 +4406,9 @@ int btrfs_read_chunk_tree(struct btrfs_root *root) | |||
3749 | if (!path) | 4406 | if (!path) |
3750 | return -ENOMEM; | 4407 | return -ENOMEM; |
3751 | 4408 | ||
4409 | mutex_lock(&uuid_mutex); | ||
4410 | lock_chunks(root); | ||
4411 | |||
3752 | /* first we search for all of the device items, and then we | 4412 | /* first we search for all of the device items, and then we |
3753 | * read in all of the chunk items. This way we can create chunk | 4413 | * read in all of the chunk items. This way we can create chunk |
3754 | * mappings that reference all of the devices that are afound | 4414 | * mappings that reference all of the devices that are afound |
@@ -3799,6 +4459,9 @@ again: | |||
3799 | } | 4459 | } |
3800 | ret = 0; | 4460 | ret = 0; |
3801 | error: | 4461 | error: |
4462 | unlock_chunks(root); | ||
4463 | mutex_unlock(&uuid_mutex); | ||
4464 | |||
3802 | btrfs_free_path(path); | 4465 | btrfs_free_path(path); |
3803 | return ret; | 4466 | return ret; |
3804 | } | 4467 | } |
diff --git a/fs/btrfs/volumes.h b/fs/btrfs/volumes.h index 78f2d4d4f37f..19ac95048b88 100644 --- a/fs/btrfs/volumes.h +++ b/fs/btrfs/volumes.h | |||
@@ -186,6 +186,51 @@ struct map_lookup { | |||
186 | #define map_lookup_size(n) (sizeof(struct map_lookup) + \ | 186 | #define map_lookup_size(n) (sizeof(struct map_lookup) + \ |
187 | (sizeof(struct btrfs_bio_stripe) * (n))) | 187 | (sizeof(struct btrfs_bio_stripe) * (n))) |
188 | 188 | ||
189 | /* | ||
190 | * Restriper's general type filter | ||
191 | */ | ||
192 | #define BTRFS_BALANCE_DATA (1ULL << 0) | ||
193 | #define BTRFS_BALANCE_SYSTEM (1ULL << 1) | ||
194 | #define BTRFS_BALANCE_METADATA (1ULL << 2) | ||
195 | |||
196 | #define BTRFS_BALANCE_TYPE_MASK (BTRFS_BALANCE_DATA | \ | ||
197 | BTRFS_BALANCE_SYSTEM | \ | ||
198 | BTRFS_BALANCE_METADATA) | ||
199 | |||
200 | #define BTRFS_BALANCE_FORCE (1ULL << 3) | ||
201 | #define BTRFS_BALANCE_RESUME (1ULL << 4) | ||
202 | |||
203 | /* | ||
204 | * Balance filters | ||
205 | */ | ||
206 | #define BTRFS_BALANCE_ARGS_PROFILES (1ULL << 0) | ||
207 | #define BTRFS_BALANCE_ARGS_USAGE (1ULL << 1) | ||
208 | #define BTRFS_BALANCE_ARGS_DEVID (1ULL << 2) | ||
209 | #define BTRFS_BALANCE_ARGS_DRANGE (1ULL << 3) | ||
210 | #define BTRFS_BALANCE_ARGS_VRANGE (1ULL << 4) | ||
211 | |||
212 | /* | ||
213 | * Profile changing flags. When SOFT is set we won't relocate chunk if | ||
214 | * it already has the target profile (even though it may be | ||
215 | * half-filled). | ||
216 | */ | ||
217 | #define BTRFS_BALANCE_ARGS_CONVERT (1ULL << 8) | ||
218 | #define BTRFS_BALANCE_ARGS_SOFT (1ULL << 9) | ||
219 | |||
220 | struct btrfs_balance_args; | ||
221 | struct btrfs_balance_progress; | ||
222 | struct btrfs_balance_control { | ||
223 | struct btrfs_fs_info *fs_info; | ||
224 | |||
225 | struct btrfs_balance_args data; | ||
226 | struct btrfs_balance_args meta; | ||
227 | struct btrfs_balance_args sys; | ||
228 | |||
229 | u64 flags; | ||
230 | |||
231 | struct btrfs_balance_progress stat; | ||
232 | }; | ||
233 | |||
189 | int btrfs_account_dev_extents_size(struct btrfs_device *device, u64 start, | 234 | int btrfs_account_dev_extents_size(struct btrfs_device *device, u64 start, |
190 | u64 end, u64 *length); | 235 | u64 end, u64 *length); |
191 | 236 | ||
@@ -228,9 +273,12 @@ struct btrfs_device *btrfs_find_device(struct btrfs_root *root, u64 devid, | |||
228 | u8 *uuid, u8 *fsid); | 273 | u8 *uuid, u8 *fsid); |
229 | int btrfs_shrink_device(struct btrfs_device *device, u64 new_size); | 274 | int btrfs_shrink_device(struct btrfs_device *device, u64 new_size); |
230 | int btrfs_init_new_device(struct btrfs_root *root, char *path); | 275 | int btrfs_init_new_device(struct btrfs_root *root, char *path); |
231 | int btrfs_balance(struct btrfs_root *dev_root); | 276 | int btrfs_balance(struct btrfs_balance_control *bctl, |
277 | struct btrfs_ioctl_balance_args *bargs); | ||
278 | int btrfs_recover_balance(struct btrfs_root *tree_root); | ||
279 | int btrfs_pause_balance(struct btrfs_fs_info *fs_info); | ||
280 | int btrfs_cancel_balance(struct btrfs_fs_info *fs_info); | ||
232 | int btrfs_chunk_readonly(struct btrfs_root *root, u64 chunk_offset); | 281 | int btrfs_chunk_readonly(struct btrfs_root *root, u64 chunk_offset); |
233 | int find_free_dev_extent(struct btrfs_trans_handle *trans, | 282 | int find_free_dev_extent(struct btrfs_device *device, u64 num_bytes, |
234 | struct btrfs_device *device, u64 num_bytes, | ||
235 | u64 *start, u64 *max_avail); | 283 | u64 *start, u64 *max_avail); |
236 | #endif | 284 | #endif |
diff --git a/fs/btrfs/xattr.c b/fs/btrfs/xattr.c index 3848b04e310e..e7a5659087e6 100644 --- a/fs/btrfs/xattr.c +++ b/fs/btrfs/xattr.c | |||
@@ -200,7 +200,7 @@ int __btrfs_setxattr(struct btrfs_trans_handle *trans, | |||
200 | ret = btrfs_update_inode(trans, root, inode); | 200 | ret = btrfs_update_inode(trans, root, inode); |
201 | BUG_ON(ret); | 201 | BUG_ON(ret); |
202 | out: | 202 | out: |
203 | btrfs_end_transaction_throttle(trans, root); | 203 | btrfs_end_transaction(trans, root); |
204 | return ret; | 204 | return ret; |
205 | } | 205 | } |
206 | 206 | ||
diff --git a/fs/ceph/caps.c b/fs/ceph/caps.c index b60fc8bfb3e9..620daad201db 100644 --- a/fs/ceph/caps.c +++ b/fs/ceph/caps.c | |||
@@ -641,10 +641,10 @@ static int __cap_is_valid(struct ceph_cap *cap) | |||
641 | unsigned long ttl; | 641 | unsigned long ttl; |
642 | u32 gen; | 642 | u32 gen; |
643 | 643 | ||
644 | spin_lock(&cap->session->s_cap_lock); | 644 | spin_lock(&cap->session->s_gen_ttl_lock); |
645 | gen = cap->session->s_cap_gen; | 645 | gen = cap->session->s_cap_gen; |
646 | ttl = cap->session->s_cap_ttl; | 646 | ttl = cap->session->s_cap_ttl; |
647 | spin_unlock(&cap->session->s_cap_lock); | 647 | spin_unlock(&cap->session->s_gen_ttl_lock); |
648 | 648 | ||
649 | if (cap->cap_gen < gen || time_after_eq(jiffies, ttl)) { | 649 | if (cap->cap_gen < gen || time_after_eq(jiffies, ttl)) { |
650 | dout("__cap_is_valid %p cap %p issued %s " | 650 | dout("__cap_is_valid %p cap %p issued %s " |
diff --git a/fs/ceph/dir.c b/fs/ceph/dir.c index 74fd74719dc2..3e8094be4604 100644 --- a/fs/ceph/dir.c +++ b/fs/ceph/dir.c | |||
@@ -973,12 +973,12 @@ static int dentry_lease_is_valid(struct dentry *dentry) | |||
973 | 973 | ||
974 | spin_lock(&dentry->d_lock); | 974 | spin_lock(&dentry->d_lock); |
975 | di = ceph_dentry(dentry); | 975 | di = ceph_dentry(dentry); |
976 | if (di && di->lease_session) { | 976 | if (di->lease_session) { |
977 | s = di->lease_session; | 977 | s = di->lease_session; |
978 | spin_lock(&s->s_cap_lock); | 978 | spin_lock(&s->s_gen_ttl_lock); |
979 | gen = s->s_cap_gen; | 979 | gen = s->s_cap_gen; |
980 | ttl = s->s_cap_ttl; | 980 | ttl = s->s_cap_ttl; |
981 | spin_unlock(&s->s_cap_lock); | 981 | spin_unlock(&s->s_gen_ttl_lock); |
982 | 982 | ||
983 | if (di->lease_gen == gen && | 983 | if (di->lease_gen == gen && |
984 | time_before(jiffies, dentry->d_time) && | 984 | time_before(jiffies, dentry->d_time) && |
@@ -1072,13 +1072,11 @@ static void ceph_d_release(struct dentry *dentry) | |||
1072 | struct ceph_dentry_info *di = ceph_dentry(dentry); | 1072 | struct ceph_dentry_info *di = ceph_dentry(dentry); |
1073 | 1073 | ||
1074 | dout("d_release %p\n", dentry); | 1074 | dout("d_release %p\n", dentry); |
1075 | if (di) { | 1075 | ceph_dentry_lru_del(dentry); |
1076 | ceph_dentry_lru_del(dentry); | 1076 | if (di->lease_session) |
1077 | if (di->lease_session) | 1077 | ceph_put_mds_session(di->lease_session); |
1078 | ceph_put_mds_session(di->lease_session); | 1078 | kmem_cache_free(ceph_dentry_cachep, di); |
1079 | kmem_cache_free(ceph_dentry_cachep, di); | 1079 | dentry->d_fsdata = NULL; |
1080 | dentry->d_fsdata = NULL; | ||
1081 | } | ||
1082 | } | 1080 | } |
1083 | 1081 | ||
1084 | static int ceph_snapdir_d_revalidate(struct dentry *dentry, | 1082 | static int ceph_snapdir_d_revalidate(struct dentry *dentry, |
@@ -1096,17 +1094,36 @@ static int ceph_snapdir_d_revalidate(struct dentry *dentry, | |||
1096 | */ | 1094 | */ |
1097 | void ceph_dir_set_complete(struct inode *inode) | 1095 | void ceph_dir_set_complete(struct inode *inode) |
1098 | { | 1096 | { |
1099 | /* not yet implemented */ | 1097 | struct dentry *dentry = d_find_any_alias(inode); |
1098 | |||
1099 | if (dentry && ceph_dentry(dentry) && | ||
1100 | ceph_test_mount_opt(ceph_sb_to_client(dentry->d_sb), DCACHE)) { | ||
1101 | dout(" marking %p (%p) complete\n", inode, dentry); | ||
1102 | set_bit(CEPH_D_COMPLETE, &ceph_dentry(dentry)->flags); | ||
1103 | } | ||
1104 | dput(dentry); | ||
1100 | } | 1105 | } |
1101 | 1106 | ||
1102 | void ceph_dir_clear_complete(struct inode *inode) | 1107 | void ceph_dir_clear_complete(struct inode *inode) |
1103 | { | 1108 | { |
1104 | /* not yet implemented */ | 1109 | struct dentry *dentry = d_find_any_alias(inode); |
1110 | |||
1111 | if (dentry && ceph_dentry(dentry)) { | ||
1112 | dout(" marking %p (%p) complete\n", inode, dentry); | ||
1113 | set_bit(CEPH_D_COMPLETE, &ceph_dentry(dentry)->flags); | ||
1114 | } | ||
1115 | dput(dentry); | ||
1105 | } | 1116 | } |
1106 | 1117 | ||
1107 | bool ceph_dir_test_complete(struct inode *inode) | 1118 | bool ceph_dir_test_complete(struct inode *inode) |
1108 | { | 1119 | { |
1109 | /* not yet implemented */ | 1120 | struct dentry *dentry = d_find_any_alias(inode); |
1121 | |||
1122 | if (dentry && ceph_dentry(dentry)) { | ||
1123 | dout(" marking %p (%p) NOT complete\n", inode, dentry); | ||
1124 | clear_bit(CEPH_D_COMPLETE, &ceph_dentry(dentry)->flags); | ||
1125 | } | ||
1126 | dput(dentry); | ||
1110 | return false; | 1127 | return false; |
1111 | } | 1128 | } |
1112 | 1129 | ||
@@ -1220,6 +1237,7 @@ static int ceph_dir_fsync(struct file *file, loff_t start, loff_t end, | |||
1220 | do { | 1237 | do { |
1221 | ceph_mdsc_get_request(req); | 1238 | ceph_mdsc_get_request(req); |
1222 | spin_unlock(&ci->i_unsafe_lock); | 1239 | spin_unlock(&ci->i_unsafe_lock); |
1240 | |||
1223 | dout("dir_fsync %p wait on tid %llu (until %llu)\n", | 1241 | dout("dir_fsync %p wait on tid %llu (until %llu)\n", |
1224 | inode, req->r_tid, last_tid); | 1242 | inode, req->r_tid, last_tid); |
1225 | if (req->r_timeout) { | 1243 | if (req->r_timeout) { |
@@ -1232,9 +1250,9 @@ static int ceph_dir_fsync(struct file *file, loff_t start, loff_t end, | |||
1232 | } else { | 1250 | } else { |
1233 | wait_for_completion(&req->r_safe_completion); | 1251 | wait_for_completion(&req->r_safe_completion); |
1234 | } | 1252 | } |
1235 | spin_lock(&ci->i_unsafe_lock); | ||
1236 | ceph_mdsc_put_request(req); | 1253 | ceph_mdsc_put_request(req); |
1237 | 1254 | ||
1255 | spin_lock(&ci->i_unsafe_lock); | ||
1238 | if (ret || list_empty(head)) | 1256 | if (ret || list_empty(head)) |
1239 | break; | 1257 | break; |
1240 | req = list_entry(head->next, | 1258 | req = list_entry(head->next, |
@@ -1259,13 +1277,11 @@ void ceph_dentry_lru_add(struct dentry *dn) | |||
1259 | 1277 | ||
1260 | dout("dentry_lru_add %p %p '%.*s'\n", di, dn, | 1278 | dout("dentry_lru_add %p %p '%.*s'\n", di, dn, |
1261 | dn->d_name.len, dn->d_name.name); | 1279 | dn->d_name.len, dn->d_name.name); |
1262 | if (di) { | 1280 | mdsc = ceph_sb_to_client(dn->d_sb)->mdsc; |
1263 | mdsc = ceph_sb_to_client(dn->d_sb)->mdsc; | 1281 | spin_lock(&mdsc->dentry_lru_lock); |
1264 | spin_lock(&mdsc->dentry_lru_lock); | 1282 | list_add_tail(&di->lru, &mdsc->dentry_lru); |
1265 | list_add_tail(&di->lru, &mdsc->dentry_lru); | 1283 | mdsc->num_dentry++; |
1266 | mdsc->num_dentry++; | 1284 | spin_unlock(&mdsc->dentry_lru_lock); |
1267 | spin_unlock(&mdsc->dentry_lru_lock); | ||
1268 | } | ||
1269 | } | 1285 | } |
1270 | 1286 | ||
1271 | void ceph_dentry_lru_touch(struct dentry *dn) | 1287 | void ceph_dentry_lru_touch(struct dentry *dn) |
@@ -1275,12 +1291,10 @@ void ceph_dentry_lru_touch(struct dentry *dn) | |||
1275 | 1291 | ||
1276 | dout("dentry_lru_touch %p %p '%.*s' (offset %lld)\n", di, dn, | 1292 | dout("dentry_lru_touch %p %p '%.*s' (offset %lld)\n", di, dn, |
1277 | dn->d_name.len, dn->d_name.name, di->offset); | 1293 | dn->d_name.len, dn->d_name.name, di->offset); |
1278 | if (di) { | 1294 | mdsc = ceph_sb_to_client(dn->d_sb)->mdsc; |
1279 | mdsc = ceph_sb_to_client(dn->d_sb)->mdsc; | 1295 | spin_lock(&mdsc->dentry_lru_lock); |
1280 | spin_lock(&mdsc->dentry_lru_lock); | 1296 | list_move_tail(&di->lru, &mdsc->dentry_lru); |
1281 | list_move_tail(&di->lru, &mdsc->dentry_lru); | 1297 | spin_unlock(&mdsc->dentry_lru_lock); |
1282 | spin_unlock(&mdsc->dentry_lru_lock); | ||
1283 | } | ||
1284 | } | 1298 | } |
1285 | 1299 | ||
1286 | void ceph_dentry_lru_del(struct dentry *dn) | 1300 | void ceph_dentry_lru_del(struct dentry *dn) |
@@ -1290,13 +1304,11 @@ void ceph_dentry_lru_del(struct dentry *dn) | |||
1290 | 1304 | ||
1291 | dout("dentry_lru_del %p %p '%.*s'\n", di, dn, | 1305 | dout("dentry_lru_del %p %p '%.*s'\n", di, dn, |
1292 | dn->d_name.len, dn->d_name.name); | 1306 | dn->d_name.len, dn->d_name.name); |
1293 | if (di) { | 1307 | mdsc = ceph_sb_to_client(dn->d_sb)->mdsc; |
1294 | mdsc = ceph_sb_to_client(dn->d_sb)->mdsc; | 1308 | spin_lock(&mdsc->dentry_lru_lock); |
1295 | spin_lock(&mdsc->dentry_lru_lock); | 1309 | list_del_init(&di->lru); |
1296 | list_del_init(&di->lru); | 1310 | mdsc->num_dentry--; |
1297 | mdsc->num_dentry--; | 1311 | spin_unlock(&mdsc->dentry_lru_lock); |
1298 | spin_unlock(&mdsc->dentry_lru_lock); | ||
1299 | } | ||
1300 | } | 1312 | } |
1301 | 1313 | ||
1302 | /* | 1314 | /* |
diff --git a/fs/ceph/export.c b/fs/ceph/export.c index 9fbcdecaaccd..fbb2a643ef10 100644 --- a/fs/ceph/export.c +++ b/fs/ceph/export.c | |||
@@ -56,9 +56,7 @@ static int ceph_encode_fh(struct dentry *dentry, u32 *rawfh, int *max_len, | |||
56 | return -EINVAL; | 56 | return -EINVAL; |
57 | 57 | ||
58 | spin_lock(&dentry->d_lock); | 58 | spin_lock(&dentry->d_lock); |
59 | parent = dget(dentry->d_parent); | 59 | parent = dentry->d_parent; |
60 | spin_unlock(&dentry->d_lock); | ||
61 | |||
62 | if (*max_len >= connected_handle_length) { | 60 | if (*max_len >= connected_handle_length) { |
63 | dout("encode_fh %p connectable\n", dentry); | 61 | dout("encode_fh %p connectable\n", dentry); |
64 | cfh->ino = ceph_ino(dentry->d_inode); | 62 | cfh->ino = ceph_ino(dentry->d_inode); |
@@ -81,7 +79,7 @@ static int ceph_encode_fh(struct dentry *dentry, u32 *rawfh, int *max_len, | |||
81 | *max_len = handle_length; | 79 | *max_len = handle_length; |
82 | type = 255; | 80 | type = 255; |
83 | } | 81 | } |
84 | dput(parent); | 82 | spin_unlock(&dentry->d_lock); |
85 | return type; | 83 | return type; |
86 | } | 84 | } |
87 | 85 | ||
diff --git a/fs/ceph/inode.c b/fs/ceph/inode.c index 25283e7a37f8..2c489378b4cd 100644 --- a/fs/ceph/inode.c +++ b/fs/ceph/inode.c | |||
@@ -850,11 +850,12 @@ static void ceph_set_dentry_offset(struct dentry *dn) | |||
850 | { | 850 | { |
851 | struct dentry *dir = dn->d_parent; | 851 | struct dentry *dir = dn->d_parent; |
852 | struct inode *inode = dir->d_inode; | 852 | struct inode *inode = dir->d_inode; |
853 | struct ceph_inode_info *ci = ceph_inode(inode); | 853 | struct ceph_inode_info *ci; |
854 | struct ceph_dentry_info *di; | 854 | struct ceph_dentry_info *di; |
855 | 855 | ||
856 | BUG_ON(!inode); | 856 | BUG_ON(!inode); |
857 | 857 | ||
858 | ci = ceph_inode(inode); | ||
858 | di = ceph_dentry(dn); | 859 | di = ceph_dentry(dn); |
859 | 860 | ||
860 | spin_lock(&ci->i_ceph_lock); | 861 | spin_lock(&ci->i_ceph_lock); |
diff --git a/fs/ceph/mds_client.c b/fs/ceph/mds_client.c index 6203d805eb45..866e8d7ca37d 100644 --- a/fs/ceph/mds_client.c +++ b/fs/ceph/mds_client.c | |||
@@ -262,6 +262,7 @@ static int parse_reply_info(struct ceph_msg *msg, | |||
262 | /* trace */ | 262 | /* trace */ |
263 | ceph_decode_32_safe(&p, end, len, bad); | 263 | ceph_decode_32_safe(&p, end, len, bad); |
264 | if (len > 0) { | 264 | if (len > 0) { |
265 | ceph_decode_need(&p, end, len, bad); | ||
265 | err = parse_reply_info_trace(&p, p+len, info, features); | 266 | err = parse_reply_info_trace(&p, p+len, info, features); |
266 | if (err < 0) | 267 | if (err < 0) |
267 | goto out_bad; | 268 | goto out_bad; |
@@ -270,6 +271,7 @@ static int parse_reply_info(struct ceph_msg *msg, | |||
270 | /* extra */ | 271 | /* extra */ |
271 | ceph_decode_32_safe(&p, end, len, bad); | 272 | ceph_decode_32_safe(&p, end, len, bad); |
272 | if (len > 0) { | 273 | if (len > 0) { |
274 | ceph_decode_need(&p, end, len, bad); | ||
273 | err = parse_reply_info_extra(&p, p+len, info, features); | 275 | err = parse_reply_info_extra(&p, p+len, info, features); |
274 | if (err < 0) | 276 | if (err < 0) |
275 | goto out_bad; | 277 | goto out_bad; |
@@ -398,9 +400,11 @@ static struct ceph_mds_session *register_session(struct ceph_mds_client *mdsc, | |||
398 | s->s_con.peer_name.type = CEPH_ENTITY_TYPE_MDS; | 400 | s->s_con.peer_name.type = CEPH_ENTITY_TYPE_MDS; |
399 | s->s_con.peer_name.num = cpu_to_le64(mds); | 401 | s->s_con.peer_name.num = cpu_to_le64(mds); |
400 | 402 | ||
401 | spin_lock_init(&s->s_cap_lock); | 403 | spin_lock_init(&s->s_gen_ttl_lock); |
402 | s->s_cap_gen = 0; | 404 | s->s_cap_gen = 0; |
403 | s->s_cap_ttl = 0; | 405 | s->s_cap_ttl = 0; |
406 | |||
407 | spin_lock_init(&s->s_cap_lock); | ||
404 | s->s_renew_requested = 0; | 408 | s->s_renew_requested = 0; |
405 | s->s_renew_seq = 0; | 409 | s->s_renew_seq = 0; |
406 | INIT_LIST_HEAD(&s->s_caps); | 410 | INIT_LIST_HEAD(&s->s_caps); |
@@ -2326,10 +2330,10 @@ static void handle_session(struct ceph_mds_session *session, | |||
2326 | case CEPH_SESSION_STALE: | 2330 | case CEPH_SESSION_STALE: |
2327 | pr_info("mds%d caps went stale, renewing\n", | 2331 | pr_info("mds%d caps went stale, renewing\n", |
2328 | session->s_mds); | 2332 | session->s_mds); |
2329 | spin_lock(&session->s_cap_lock); | 2333 | spin_lock(&session->s_gen_ttl_lock); |
2330 | session->s_cap_gen++; | 2334 | session->s_cap_gen++; |
2331 | session->s_cap_ttl = 0; | 2335 | session->s_cap_ttl = 0; |
2332 | spin_unlock(&session->s_cap_lock); | 2336 | spin_unlock(&session->s_gen_ttl_lock); |
2333 | send_renew_caps(mdsc, session); | 2337 | send_renew_caps(mdsc, session); |
2334 | break; | 2338 | break; |
2335 | 2339 | ||
@@ -2772,7 +2776,7 @@ static void handle_lease(struct ceph_mds_client *mdsc, | |||
2772 | di = ceph_dentry(dentry); | 2776 | di = ceph_dentry(dentry); |
2773 | switch (h->action) { | 2777 | switch (h->action) { |
2774 | case CEPH_MDS_LEASE_REVOKE: | 2778 | case CEPH_MDS_LEASE_REVOKE: |
2775 | if (di && di->lease_session == session) { | 2779 | if (di->lease_session == session) { |
2776 | if (ceph_seq_cmp(di->lease_seq, seq) > 0) | 2780 | if (ceph_seq_cmp(di->lease_seq, seq) > 0) |
2777 | h->seq = cpu_to_le32(di->lease_seq); | 2781 | h->seq = cpu_to_le32(di->lease_seq); |
2778 | __ceph_mdsc_drop_dentry_lease(dentry); | 2782 | __ceph_mdsc_drop_dentry_lease(dentry); |
@@ -2781,7 +2785,7 @@ static void handle_lease(struct ceph_mds_client *mdsc, | |||
2781 | break; | 2785 | break; |
2782 | 2786 | ||
2783 | case CEPH_MDS_LEASE_RENEW: | 2787 | case CEPH_MDS_LEASE_RENEW: |
2784 | if (di && di->lease_session == session && | 2788 | if (di->lease_session == session && |
2785 | di->lease_gen == session->s_cap_gen && | 2789 | di->lease_gen == session->s_cap_gen && |
2786 | di->lease_renew_from && | 2790 | di->lease_renew_from && |
2787 | di->lease_renew_after == 0) { | 2791 | di->lease_renew_after == 0) { |
diff --git a/fs/ceph/mds_client.h b/fs/ceph/mds_client.h index a50ca0e39475..8c7c04ebb595 100644 --- a/fs/ceph/mds_client.h +++ b/fs/ceph/mds_client.h | |||
@@ -117,10 +117,13 @@ struct ceph_mds_session { | |||
117 | void *s_authorizer_buf, *s_authorizer_reply_buf; | 117 | void *s_authorizer_buf, *s_authorizer_reply_buf; |
118 | size_t s_authorizer_buf_len, s_authorizer_reply_buf_len; | 118 | size_t s_authorizer_buf_len, s_authorizer_reply_buf_len; |
119 | 119 | ||
120 | /* protected by s_cap_lock */ | 120 | /* protected by s_gen_ttl_lock */ |
121 | spinlock_t s_cap_lock; | 121 | spinlock_t s_gen_ttl_lock; |
122 | u32 s_cap_gen; /* inc each time we get mds stale msg */ | 122 | u32 s_cap_gen; /* inc each time we get mds stale msg */ |
123 | unsigned long s_cap_ttl; /* when session caps expire */ | 123 | unsigned long s_cap_ttl; /* when session caps expire */ |
124 | |||
125 | /* protected by s_cap_lock */ | ||
126 | spinlock_t s_cap_lock; | ||
124 | struct list_head s_caps; /* all caps issued by this session */ | 127 | struct list_head s_caps; /* all caps issued by this session */ |
125 | int s_nr_caps, s_trim_caps; | 128 | int s_nr_caps, s_trim_caps; |
126 | int s_num_cap_releases; | 129 | int s_num_cap_releases; |
diff --git a/fs/ceph/super.c b/fs/ceph/super.c index 11bd0fc4853f..00de2c9568cd 100644 --- a/fs/ceph/super.c +++ b/fs/ceph/super.c | |||
@@ -131,6 +131,8 @@ enum { | |||
131 | Opt_rbytes, | 131 | Opt_rbytes, |
132 | Opt_norbytes, | 132 | Opt_norbytes, |
133 | Opt_noasyncreaddir, | 133 | Opt_noasyncreaddir, |
134 | Opt_dcache, | ||
135 | Opt_nodcache, | ||
134 | Opt_ino32, | 136 | Opt_ino32, |
135 | }; | 137 | }; |
136 | 138 | ||
@@ -152,6 +154,8 @@ static match_table_t fsopt_tokens = { | |||
152 | {Opt_rbytes, "rbytes"}, | 154 | {Opt_rbytes, "rbytes"}, |
153 | {Opt_norbytes, "norbytes"}, | 155 | {Opt_norbytes, "norbytes"}, |
154 | {Opt_noasyncreaddir, "noasyncreaddir"}, | 156 | {Opt_noasyncreaddir, "noasyncreaddir"}, |
157 | {Opt_dcache, "dcache"}, | ||
158 | {Opt_nodcache, "nodcache"}, | ||
155 | {Opt_ino32, "ino32"}, | 159 | {Opt_ino32, "ino32"}, |
156 | {-1, NULL} | 160 | {-1, NULL} |
157 | }; | 161 | }; |
@@ -231,6 +235,12 @@ static int parse_fsopt_token(char *c, void *private) | |||
231 | case Opt_noasyncreaddir: | 235 | case Opt_noasyncreaddir: |
232 | fsopt->flags |= CEPH_MOUNT_OPT_NOASYNCREADDIR; | 236 | fsopt->flags |= CEPH_MOUNT_OPT_NOASYNCREADDIR; |
233 | break; | 237 | break; |
238 | case Opt_dcache: | ||
239 | fsopt->flags |= CEPH_MOUNT_OPT_DCACHE; | ||
240 | break; | ||
241 | case Opt_nodcache: | ||
242 | fsopt->flags &= ~CEPH_MOUNT_OPT_DCACHE; | ||
243 | break; | ||
234 | case Opt_ino32: | 244 | case Opt_ino32: |
235 | fsopt->flags |= CEPH_MOUNT_OPT_INO32; | 245 | fsopt->flags |= CEPH_MOUNT_OPT_INO32; |
236 | break; | 246 | break; |
@@ -377,6 +387,10 @@ static int ceph_show_options(struct seq_file *m, struct dentry *root) | |||
377 | seq_puts(m, ",norbytes"); | 387 | seq_puts(m, ",norbytes"); |
378 | if (fsopt->flags & CEPH_MOUNT_OPT_NOASYNCREADDIR) | 388 | if (fsopt->flags & CEPH_MOUNT_OPT_NOASYNCREADDIR) |
379 | seq_puts(m, ",noasyncreaddir"); | 389 | seq_puts(m, ",noasyncreaddir"); |
390 | if (fsopt->flags & CEPH_MOUNT_OPT_DCACHE) | ||
391 | seq_puts(m, ",dcache"); | ||
392 | else | ||
393 | seq_puts(m, ",nodcache"); | ||
380 | 394 | ||
381 | if (fsopt->wsize) | 395 | if (fsopt->wsize) |
382 | seq_printf(m, ",wsize=%d", fsopt->wsize); | 396 | seq_printf(m, ",wsize=%d", fsopt->wsize); |
@@ -636,19 +650,26 @@ static struct dentry *open_root_dentry(struct ceph_fs_client *fsc, | |||
636 | req->r_num_caps = 2; | 650 | req->r_num_caps = 2; |
637 | err = ceph_mdsc_do_request(mdsc, NULL, req); | 651 | err = ceph_mdsc_do_request(mdsc, NULL, req); |
638 | if (err == 0) { | 652 | if (err == 0) { |
653 | struct inode *inode = req->r_target_inode; | ||
654 | req->r_target_inode = NULL; | ||
639 | dout("open_root_inode success\n"); | 655 | dout("open_root_inode success\n"); |
640 | if (ceph_ino(req->r_target_inode) == CEPH_INO_ROOT && | 656 | if (ceph_ino(inode) == CEPH_INO_ROOT && |
641 | fsc->sb->s_root == NULL) { | 657 | fsc->sb->s_root == NULL) { |
642 | root = d_alloc_root(req->r_target_inode); | 658 | root = d_alloc_root(inode); |
643 | ceph_init_dentry(root); | 659 | if (!root) { |
660 | iput(inode); | ||
661 | root = ERR_PTR(-ENOMEM); | ||
662 | goto out; | ||
663 | } | ||
644 | } else { | 664 | } else { |
645 | root = d_obtain_alias(req->r_target_inode); | 665 | root = d_obtain_alias(inode); |
646 | } | 666 | } |
647 | req->r_target_inode = NULL; | 667 | ceph_init_dentry(root); |
648 | dout("open_root_inode success, root dentry is %p\n", root); | 668 | dout("open_root_inode success, root dentry is %p\n", root); |
649 | } else { | 669 | } else { |
650 | root = ERR_PTR(err); | 670 | root = ERR_PTR(err); |
651 | } | 671 | } |
672 | out: | ||
652 | ceph_mdsc_put_request(req); | 673 | ceph_mdsc_put_request(req); |
653 | return root; | 674 | return root; |
654 | } | 675 | } |
diff --git a/fs/ceph/super.h b/fs/ceph/super.h index cb3652b37271..1421f3d875a2 100644 --- a/fs/ceph/super.h +++ b/fs/ceph/super.h | |||
@@ -28,6 +28,7 @@ | |||
28 | #define CEPH_MOUNT_OPT_RBYTES (1<<5) /* dir st_bytes = rbytes */ | 28 | #define CEPH_MOUNT_OPT_RBYTES (1<<5) /* dir st_bytes = rbytes */ |
29 | #define CEPH_MOUNT_OPT_NOASYNCREADDIR (1<<7) /* no dcache readdir */ | 29 | #define CEPH_MOUNT_OPT_NOASYNCREADDIR (1<<7) /* no dcache readdir */ |
30 | #define CEPH_MOUNT_OPT_INO32 (1<<8) /* 32 bit inos */ | 30 | #define CEPH_MOUNT_OPT_INO32 (1<<8) /* 32 bit inos */ |
31 | #define CEPH_MOUNT_OPT_DCACHE (1<<9) /* use dcache for readdir etc */ | ||
31 | 32 | ||
32 | #define CEPH_MOUNT_OPT_DEFAULT (CEPH_MOUNT_OPT_RBYTES) | 33 | #define CEPH_MOUNT_OPT_DEFAULT (CEPH_MOUNT_OPT_RBYTES) |
33 | 34 | ||
diff --git a/fs/ceph/xattr.c b/fs/ceph/xattr.c index a5e36e4488a7..a76f697303d9 100644 --- a/fs/ceph/xattr.c +++ b/fs/ceph/xattr.c | |||
@@ -111,8 +111,10 @@ static size_t ceph_vxattrcb_layout(struct ceph_inode_info *ci, char *val, | |||
111 | } | 111 | } |
112 | 112 | ||
113 | static struct ceph_vxattr_cb ceph_file_vxattrs[] = { | 113 | static struct ceph_vxattr_cb ceph_file_vxattrs[] = { |
114 | { true, "ceph.file.layout", ceph_vxattrcb_layout}, | ||
115 | /* The following extended attribute name is deprecated */ | ||
114 | { true, "ceph.layout", ceph_vxattrcb_layout}, | 116 | { true, "ceph.layout", ceph_vxattrcb_layout}, |
115 | { NULL, NULL } | 117 | { true, NULL, NULL } |
116 | }; | 118 | }; |
117 | 119 | ||
118 | static struct ceph_vxattr_cb *ceph_inode_vxattrs(struct inode *inode) | 120 | static struct ceph_vxattr_cb *ceph_inode_vxattrs(struct inode *inode) |
@@ -818,6 +820,7 @@ int ceph_removexattr(struct dentry *dentry, const char *name) | |||
818 | struct ceph_vxattr_cb *vxattrs = ceph_inode_vxattrs(inode); | 820 | struct ceph_vxattr_cb *vxattrs = ceph_inode_vxattrs(inode); |
819 | int issued; | 821 | int issued; |
820 | int err; | 822 | int err; |
823 | int required_blob_size; | ||
821 | int dirty; | 824 | int dirty; |
822 | 825 | ||
823 | if (ceph_snap(inode) != CEPH_NOSNAP) | 826 | if (ceph_snap(inode) != CEPH_NOSNAP) |
@@ -833,14 +836,34 @@ int ceph_removexattr(struct dentry *dentry, const char *name) | |||
833 | return -EOPNOTSUPP; | 836 | return -EOPNOTSUPP; |
834 | } | 837 | } |
835 | 838 | ||
839 | err = -ENOMEM; | ||
836 | spin_lock(&ci->i_ceph_lock); | 840 | spin_lock(&ci->i_ceph_lock); |
837 | __build_xattrs(inode); | 841 | __build_xattrs(inode); |
842 | retry: | ||
838 | issued = __ceph_caps_issued(ci, NULL); | 843 | issued = __ceph_caps_issued(ci, NULL); |
839 | dout("removexattr %p issued %s\n", inode, ceph_cap_string(issued)); | 844 | dout("removexattr %p issued %s\n", inode, ceph_cap_string(issued)); |
840 | 845 | ||
841 | if (!(issued & CEPH_CAP_XATTR_EXCL)) | 846 | if (!(issued & CEPH_CAP_XATTR_EXCL)) |
842 | goto do_sync; | 847 | goto do_sync; |
843 | 848 | ||
849 | required_blob_size = __get_required_blob_size(ci, 0, 0); | ||
850 | |||
851 | if (!ci->i_xattrs.prealloc_blob || | ||
852 | required_blob_size > ci->i_xattrs.prealloc_blob->alloc_len) { | ||
853 | struct ceph_buffer *blob; | ||
854 | |||
855 | spin_unlock(&ci->i_ceph_lock); | ||
856 | dout(" preaallocating new blob size=%d\n", required_blob_size); | ||
857 | blob = ceph_buffer_new(required_blob_size, GFP_NOFS); | ||
858 | if (!blob) | ||
859 | goto out; | ||
860 | spin_lock(&ci->i_ceph_lock); | ||
861 | if (ci->i_xattrs.prealloc_blob) | ||
862 | ceph_buffer_put(ci->i_xattrs.prealloc_blob); | ||
863 | ci->i_xattrs.prealloc_blob = blob; | ||
864 | goto retry; | ||
865 | } | ||
866 | |||
844 | err = __remove_xattr_by_name(ceph_inode(inode), name); | 867 | err = __remove_xattr_by_name(ceph_inode(inode), name); |
845 | dirty = __ceph_mark_dirty_caps(ci, CEPH_CAP_XATTR_EXCL); | 868 | dirty = __ceph_mark_dirty_caps(ci, CEPH_CAP_XATTR_EXCL); |
846 | ci->i_xattrs.dirty = true; | 869 | ci->i_xattrs.dirty = true; |
@@ -853,6 +876,7 @@ int ceph_removexattr(struct dentry *dentry, const char *name) | |||
853 | do_sync: | 876 | do_sync: |
854 | spin_unlock(&ci->i_ceph_lock); | 877 | spin_unlock(&ci->i_ceph_lock); |
855 | err = ceph_send_removexattr(dentry, name); | 878 | err = ceph_send_removexattr(dentry, name); |
879 | out: | ||
856 | return err; | 880 | return err; |
857 | } | 881 | } |
858 | 882 | ||
diff --git a/fs/char_dev.c b/fs/char_dev.c index dca9e5e0f73b..3f152b92a94a 100644 --- a/fs/char_dev.c +++ b/fs/char_dev.c | |||
@@ -272,7 +272,7 @@ int __register_chrdev(unsigned int major, unsigned int baseminor, | |||
272 | cd = __register_chrdev_region(major, baseminor, count, name); | 272 | cd = __register_chrdev_region(major, baseminor, count, name); |
273 | if (IS_ERR(cd)) | 273 | if (IS_ERR(cd)) |
274 | return PTR_ERR(cd); | 274 | return PTR_ERR(cd); |
275 | 275 | ||
276 | cdev = cdev_alloc(); | 276 | cdev = cdev_alloc(); |
277 | if (!cdev) | 277 | if (!cdev) |
278 | goto out2; | 278 | goto out2; |
@@ -280,7 +280,7 @@ int __register_chrdev(unsigned int major, unsigned int baseminor, | |||
280 | cdev->owner = fops->owner; | 280 | cdev->owner = fops->owner; |
281 | cdev->ops = fops; | 281 | cdev->ops = fops; |
282 | kobject_set_name(&cdev->kobj, "%s", name); | 282 | kobject_set_name(&cdev->kobj, "%s", name); |
283 | 283 | ||
284 | err = cdev_add(cdev, MKDEV(cd->major, baseminor), count); | 284 | err = cdev_add(cdev, MKDEV(cd->major, baseminor), count); |
285 | if (err) | 285 | if (err) |
286 | goto out; | 286 | goto out; |
@@ -405,7 +405,7 @@ static int chrdev_open(struct inode *inode, struct file *filp) | |||
405 | goto out_cdev_put; | 405 | goto out_cdev_put; |
406 | 406 | ||
407 | if (filp->f_op->open) { | 407 | if (filp->f_op->open) { |
408 | ret = filp->f_op->open(inode,filp); | 408 | ret = filp->f_op->open(inode, filp); |
409 | if (ret) | 409 | if (ret) |
410 | goto out_cdev_put; | 410 | goto out_cdev_put; |
411 | } | 411 | } |
diff --git a/fs/cifs/Kconfig b/fs/cifs/Kconfig index f66cc1625150..0554b00a7b33 100644 --- a/fs/cifs/Kconfig +++ b/fs/cifs/Kconfig | |||
@@ -140,7 +140,6 @@ config CIFS_DFS_UPCALL | |||
140 | 140 | ||
141 | config CIFS_FSCACHE | 141 | config CIFS_FSCACHE |
142 | bool "Provide CIFS client caching support (EXPERIMENTAL)" | 142 | bool "Provide CIFS client caching support (EXPERIMENTAL)" |
143 | depends on EXPERIMENTAL | ||
144 | depends on CIFS=m && FSCACHE || CIFS=y && FSCACHE=y | 143 | depends on CIFS=m && FSCACHE || CIFS=y && FSCACHE=y |
145 | help | 144 | help |
146 | Makes CIFS FS-Cache capable. Say Y here if you want your CIFS data | 145 | Makes CIFS FS-Cache capable. Say Y here if you want your CIFS data |
@@ -149,7 +148,7 @@ config CIFS_FSCACHE | |||
149 | 148 | ||
150 | config CIFS_ACL | 149 | config CIFS_ACL |
151 | bool "Provide CIFS ACL support (EXPERIMENTAL)" | 150 | bool "Provide CIFS ACL support (EXPERIMENTAL)" |
152 | depends on EXPERIMENTAL && CIFS_XATTR && KEYS | 151 | depends on CIFS_XATTR && KEYS |
153 | help | 152 | help |
154 | Allows to fetch CIFS/NTFS ACL from the server. The DACL blob | 153 | Allows to fetch CIFS/NTFS ACL from the server. The DACL blob |
155 | is handed over to the application/caller. | 154 | is handed over to the application/caller. |
diff --git a/fs/cifs/cifs_debug.c b/fs/cifs/cifs_debug.c index 84e8c0724704..24b3dfc05282 100644 --- a/fs/cifs/cifs_debug.c +++ b/fs/cifs/cifs_debug.c | |||
@@ -676,14 +676,23 @@ static ssize_t cifs_multiuser_mount_proc_write(struct file *file, | |||
676 | { | 676 | { |
677 | char c; | 677 | char c; |
678 | int rc; | 678 | int rc; |
679 | static bool warned; | ||
679 | 680 | ||
680 | rc = get_user(c, buffer); | 681 | rc = get_user(c, buffer); |
681 | if (rc) | 682 | if (rc) |
682 | return rc; | 683 | return rc; |
683 | if (c == '0' || c == 'n' || c == 'N') | 684 | if (c == '0' || c == 'n' || c == 'N') |
684 | multiuser_mount = 0; | 685 | multiuser_mount = 0; |
685 | else if (c == '1' || c == 'y' || c == 'Y') | 686 | else if (c == '1' || c == 'y' || c == 'Y') { |
686 | multiuser_mount = 1; | 687 | multiuser_mount = 1; |
688 | if (!warned) { | ||
689 | warned = true; | ||
690 | printk(KERN_WARNING "CIFS VFS: The legacy multiuser " | ||
691 | "mount code is scheduled to be deprecated in " | ||
692 | "3.5. Please switch to using the multiuser " | ||
693 | "mount option."); | ||
694 | } | ||
695 | } | ||
687 | 696 | ||
688 | return count; | 697 | return count; |
689 | } | 698 | } |
diff --git a/fs/cifs/cifs_spnego.c b/fs/cifs/cifs_spnego.c index 2272fd5fe5b7..e622863b292f 100644 --- a/fs/cifs/cifs_spnego.c +++ b/fs/cifs/cifs_spnego.c | |||
@@ -113,9 +113,11 @@ cifs_get_spnego_key(struct cifs_ses *sesInfo) | |||
113 | MAX_MECH_STR_LEN + | 113 | MAX_MECH_STR_LEN + |
114 | UID_KEY_LEN + (sizeof(uid_t) * 2) + | 114 | UID_KEY_LEN + (sizeof(uid_t) * 2) + |
115 | CREDUID_KEY_LEN + (sizeof(uid_t) * 2) + | 115 | CREDUID_KEY_LEN + (sizeof(uid_t) * 2) + |
116 | USER_KEY_LEN + strlen(sesInfo->user_name) + | ||
117 | PID_KEY_LEN + (sizeof(pid_t) * 2) + 1; | 116 | PID_KEY_LEN + (sizeof(pid_t) * 2) + 1; |
118 | 117 | ||
118 | if (sesInfo->user_name) | ||
119 | desc_len += USER_KEY_LEN + strlen(sesInfo->user_name); | ||
120 | |||
119 | spnego_key = ERR_PTR(-ENOMEM); | 121 | spnego_key = ERR_PTR(-ENOMEM); |
120 | description = kzalloc(desc_len, GFP_KERNEL); | 122 | description = kzalloc(desc_len, GFP_KERNEL); |
121 | if (description == NULL) | 123 | if (description == NULL) |
@@ -152,8 +154,10 @@ cifs_get_spnego_key(struct cifs_ses *sesInfo) | |||
152 | dp = description + strlen(description); | 154 | dp = description + strlen(description); |
153 | sprintf(dp, ";creduid=0x%x", sesInfo->cred_uid); | 155 | sprintf(dp, ";creduid=0x%x", sesInfo->cred_uid); |
154 | 156 | ||
155 | dp = description + strlen(description); | 157 | if (sesInfo->user_name) { |
156 | sprintf(dp, ";user=%s", sesInfo->user_name); | 158 | dp = description + strlen(description); |
159 | sprintf(dp, ";user=%s", sesInfo->user_name); | ||
160 | } | ||
157 | 161 | ||
158 | dp = description + strlen(description); | 162 | dp = description + strlen(description); |
159 | sprintf(dp, ";pid=0x%x", current->pid); | 163 | sprintf(dp, ";pid=0x%x", current->pid); |
diff --git a/fs/cifs/cifs_unicode.c b/fs/cifs/cifs_unicode.c index 1b2e180b018d..fbb9da951843 100644 --- a/fs/cifs/cifs_unicode.c +++ b/fs/cifs/cifs_unicode.c | |||
@@ -27,17 +27,17 @@ | |||
27 | #include "cifs_debug.h" | 27 | #include "cifs_debug.h" |
28 | 28 | ||
29 | /* | 29 | /* |
30 | * cifs_ucs2_bytes - how long will a string be after conversion? | 30 | * cifs_utf16_bytes - how long will a string be after conversion? |
31 | * @ucs - pointer to input string | 31 | * @utf16 - pointer to input string |
32 | * @maxbytes - don't go past this many bytes of input string | 32 | * @maxbytes - don't go past this many bytes of input string |
33 | * @codepage - destination codepage | 33 | * @codepage - destination codepage |
34 | * | 34 | * |
35 | * Walk a ucs2le string and return the number of bytes that the string will | 35 | * Walk a utf16le string and return the number of bytes that the string will |
36 | * be after being converted to the given charset, not including any null | 36 | * be after being converted to the given charset, not including any null |
37 | * termination required. Don't walk past maxbytes in the source buffer. | 37 | * termination required. Don't walk past maxbytes in the source buffer. |
38 | */ | 38 | */ |
39 | int | 39 | int |
40 | cifs_ucs2_bytes(const __le16 *from, int maxbytes, | 40 | cifs_utf16_bytes(const __le16 *from, int maxbytes, |
41 | const struct nls_table *codepage) | 41 | const struct nls_table *codepage) |
42 | { | 42 | { |
43 | int i; | 43 | int i; |
@@ -122,7 +122,7 @@ cp_convert: | |||
122 | } | 122 | } |
123 | 123 | ||
124 | /* | 124 | /* |
125 | * cifs_from_ucs2 - convert utf16le string to local charset | 125 | * cifs_from_utf16 - convert utf16le string to local charset |
126 | * @to - destination buffer | 126 | * @to - destination buffer |
127 | * @from - source buffer | 127 | * @from - source buffer |
128 | * @tolen - destination buffer size (in bytes) | 128 | * @tolen - destination buffer size (in bytes) |
@@ -130,7 +130,7 @@ cp_convert: | |||
130 | * @codepage - codepage to which characters should be converted | 130 | * @codepage - codepage to which characters should be converted |
131 | * @mapchar - should characters be remapped according to the mapchars option? | 131 | * @mapchar - should characters be remapped according to the mapchars option? |
132 | * | 132 | * |
133 | * Convert a little-endian ucs2le string (as sent by the server) to a string | 133 | * Convert a little-endian utf16le string (as sent by the server) to a string |
134 | * in the provided codepage. The tolen and fromlen parameters are to ensure | 134 | * in the provided codepage. The tolen and fromlen parameters are to ensure |
135 | * that the code doesn't walk off of the end of the buffer (which is always | 135 | * that the code doesn't walk off of the end of the buffer (which is always |
136 | * a danger if the alignment of the source buffer is off). The destination | 136 | * a danger if the alignment of the source buffer is off). The destination |
@@ -139,12 +139,12 @@ cp_convert: | |||
139 | * null terminator). | 139 | * null terminator). |
140 | * | 140 | * |
141 | * Note that some windows versions actually send multiword UTF-16 characters | 141 | * Note that some windows versions actually send multiword UTF-16 characters |
142 | * instead of straight UCS-2. The linux nls routines however aren't able to | 142 | * instead of straight UTF16-2. The linux nls routines however aren't able to |
143 | * deal with those characters properly. In the event that we get some of | 143 | * deal with those characters properly. In the event that we get some of |
144 | * those characters, they won't be translated properly. | 144 | * those characters, they won't be translated properly. |
145 | */ | 145 | */ |
146 | int | 146 | int |
147 | cifs_from_ucs2(char *to, const __le16 *from, int tolen, int fromlen, | 147 | cifs_from_utf16(char *to, const __le16 *from, int tolen, int fromlen, |
148 | const struct nls_table *codepage, bool mapchar) | 148 | const struct nls_table *codepage, bool mapchar) |
149 | { | 149 | { |
150 | int i, charlen, safelen; | 150 | int i, charlen, safelen; |
@@ -190,13 +190,13 @@ cifs_from_ucs2(char *to, const __le16 *from, int tolen, int fromlen, | |||
190 | } | 190 | } |
191 | 191 | ||
192 | /* | 192 | /* |
193 | * NAME: cifs_strtoUCS() | 193 | * NAME: cifs_strtoUTF16() |
194 | * | 194 | * |
195 | * FUNCTION: Convert character string to unicode string | 195 | * FUNCTION: Convert character string to unicode string |
196 | * | 196 | * |
197 | */ | 197 | */ |
198 | int | 198 | int |
199 | cifs_strtoUCS(__le16 *to, const char *from, int len, | 199 | cifs_strtoUTF16(__le16 *to, const char *from, int len, |
200 | const struct nls_table *codepage) | 200 | const struct nls_table *codepage) |
201 | { | 201 | { |
202 | int charlen; | 202 | int charlen; |
@@ -206,7 +206,7 @@ cifs_strtoUCS(__le16 *to, const char *from, int len, | |||
206 | for (i = 0; len && *from; i++, from += charlen, len -= charlen) { | 206 | for (i = 0; len && *from; i++, from += charlen, len -= charlen) { |
207 | charlen = codepage->char2uni(from, len, &wchar_to); | 207 | charlen = codepage->char2uni(from, len, &wchar_to); |
208 | if (charlen < 1) { | 208 | if (charlen < 1) { |
209 | cERROR(1, "strtoUCS: char2uni of 0x%x returned %d", | 209 | cERROR(1, "strtoUTF16: char2uni of 0x%x returned %d", |
210 | *from, charlen); | 210 | *from, charlen); |
211 | /* A question mark */ | 211 | /* A question mark */ |
212 | wchar_to = 0x003f; | 212 | wchar_to = 0x003f; |
@@ -220,7 +220,8 @@ cifs_strtoUCS(__le16 *to, const char *from, int len, | |||
220 | } | 220 | } |
221 | 221 | ||
222 | /* | 222 | /* |
223 | * cifs_strndup_from_ucs - copy a string from wire format to the local codepage | 223 | * cifs_strndup_from_utf16 - copy a string from wire format to the local |
224 | * codepage | ||
224 | * @src - source string | 225 | * @src - source string |
225 | * @maxlen - don't walk past this many bytes in the source string | 226 | * @maxlen - don't walk past this many bytes in the source string |
226 | * @is_unicode - is this a unicode string? | 227 | * @is_unicode - is this a unicode string? |
@@ -231,19 +232,19 @@ cifs_strtoUCS(__le16 *to, const char *from, int len, | |||
231 | * error. | 232 | * error. |
232 | */ | 233 | */ |
233 | char * | 234 | char * |
234 | cifs_strndup_from_ucs(const char *src, const int maxlen, const bool is_unicode, | 235 | cifs_strndup_from_utf16(const char *src, const int maxlen, |
235 | const struct nls_table *codepage) | 236 | const bool is_unicode, const struct nls_table *codepage) |
236 | { | 237 | { |
237 | int len; | 238 | int len; |
238 | char *dst; | 239 | char *dst; |
239 | 240 | ||
240 | if (is_unicode) { | 241 | if (is_unicode) { |
241 | len = cifs_ucs2_bytes((__le16 *) src, maxlen, codepage); | 242 | len = cifs_utf16_bytes((__le16 *) src, maxlen, codepage); |
242 | len += nls_nullsize(codepage); | 243 | len += nls_nullsize(codepage); |
243 | dst = kmalloc(len, GFP_KERNEL); | 244 | dst = kmalloc(len, GFP_KERNEL); |
244 | if (!dst) | 245 | if (!dst) |
245 | return NULL; | 246 | return NULL; |
246 | cifs_from_ucs2(dst, (__le16 *) src, len, maxlen, codepage, | 247 | cifs_from_utf16(dst, (__le16 *) src, len, maxlen, codepage, |
247 | false); | 248 | false); |
248 | } else { | 249 | } else { |
249 | len = strnlen(src, maxlen); | 250 | len = strnlen(src, maxlen); |
@@ -264,7 +265,7 @@ cifs_strndup_from_ucs(const char *src, const int maxlen, const bool is_unicode, | |||
264 | * names are little endian 16 bit Unicode on the wire | 265 | * names are little endian 16 bit Unicode on the wire |
265 | */ | 266 | */ |
266 | int | 267 | int |
267 | cifsConvertToUCS(__le16 *target, const char *source, int srclen, | 268 | cifsConvertToUTF16(__le16 *target, const char *source, int srclen, |
268 | const struct nls_table *cp, int mapChars) | 269 | const struct nls_table *cp, int mapChars) |
269 | { | 270 | { |
270 | int i, j, charlen; | 271 | int i, j, charlen; |
@@ -273,7 +274,7 @@ cifsConvertToUCS(__le16 *target, const char *source, int srclen, | |||
273 | wchar_t tmp; | 274 | wchar_t tmp; |
274 | 275 | ||
275 | if (!mapChars) | 276 | if (!mapChars) |
276 | return cifs_strtoUCS(target, source, PATH_MAX, cp); | 277 | return cifs_strtoUTF16(target, source, PATH_MAX, cp); |
277 | 278 | ||
278 | for (i = 0, j = 0; i < srclen; j++) { | 279 | for (i = 0, j = 0; i < srclen; j++) { |
279 | src_char = source[i]; | 280 | src_char = source[i]; |
@@ -281,7 +282,7 @@ cifsConvertToUCS(__le16 *target, const char *source, int srclen, | |||
281 | switch (src_char) { | 282 | switch (src_char) { |
282 | case 0: | 283 | case 0: |
283 | put_unaligned(0, &target[j]); | 284 | put_unaligned(0, &target[j]); |
284 | goto ctoUCS_out; | 285 | goto ctoUTF16_out; |
285 | case ':': | 286 | case ':': |
286 | dst_char = cpu_to_le16(UNI_COLON); | 287 | dst_char = cpu_to_le16(UNI_COLON); |
287 | break; | 288 | break; |
@@ -326,7 +327,7 @@ cifsConvertToUCS(__le16 *target, const char *source, int srclen, | |||
326 | put_unaligned(dst_char, &target[j]); | 327 | put_unaligned(dst_char, &target[j]); |
327 | } | 328 | } |
328 | 329 | ||
329 | ctoUCS_out: | 330 | ctoUTF16_out: |
330 | return i; | 331 | return i; |
331 | } | 332 | } |
332 | 333 | ||
diff --git a/fs/cifs/cifs_unicode.h b/fs/cifs/cifs_unicode.h index 6d02fd560566..a513a546700b 100644 --- a/fs/cifs/cifs_unicode.h +++ b/fs/cifs/cifs_unicode.h | |||
@@ -74,16 +74,16 @@ extern const struct UniCaseRange CifsUniLowerRange[]; | |||
74 | #endif /* UNIUPR_NOLOWER */ | 74 | #endif /* UNIUPR_NOLOWER */ |
75 | 75 | ||
76 | #ifdef __KERNEL__ | 76 | #ifdef __KERNEL__ |
77 | int cifs_from_ucs2(char *to, const __le16 *from, int tolen, int fromlen, | 77 | int cifs_from_utf16(char *to, const __le16 *from, int tolen, int fromlen, |
78 | const struct nls_table *codepage, bool mapchar); | 78 | const struct nls_table *codepage, bool mapchar); |
79 | int cifs_ucs2_bytes(const __le16 *from, int maxbytes, | 79 | int cifs_utf16_bytes(const __le16 *from, int maxbytes, |
80 | const struct nls_table *codepage); | 80 | const struct nls_table *codepage); |
81 | int cifs_strtoUCS(__le16 *, const char *, int, const struct nls_table *); | 81 | int cifs_strtoUTF16(__le16 *, const char *, int, const struct nls_table *); |
82 | char *cifs_strndup_from_ucs(const char *src, const int maxlen, | 82 | char *cifs_strndup_from_utf16(const char *src, const int maxlen, |
83 | const bool is_unicode, | 83 | const bool is_unicode, |
84 | const struct nls_table *codepage); | 84 | const struct nls_table *codepage); |
85 | extern int cifsConvertToUCS(__le16 *target, const char *source, int maxlen, | 85 | extern int cifsConvertToUTF16(__le16 *target, const char *source, int maxlen, |
86 | const struct nls_table *cp, int mapChars); | 86 | const struct nls_table *cp, int mapChars); |
87 | 87 | ||
88 | #endif | 88 | #endif |
89 | 89 | ||
diff --git a/fs/cifs/cifsacl.c b/fs/cifs/cifsacl.c index 72ddf23ef6f7..c1b254487388 100644 --- a/fs/cifs/cifsacl.c +++ b/fs/cifs/cifsacl.c | |||
@@ -909,6 +909,8 @@ static void parse_dacl(struct cifs_acl *pdacl, char *end_of_acl, | |||
909 | umode_t group_mask = S_IRWXG; | 909 | umode_t group_mask = S_IRWXG; |
910 | umode_t other_mask = S_IRWXU | S_IRWXG | S_IRWXO; | 910 | umode_t other_mask = S_IRWXU | S_IRWXG | S_IRWXO; |
911 | 911 | ||
912 | if (num_aces > ULONG_MAX / sizeof(struct cifs_ace *)) | ||
913 | return; | ||
912 | ppace = kmalloc(num_aces * sizeof(struct cifs_ace *), | 914 | ppace = kmalloc(num_aces * sizeof(struct cifs_ace *), |
913 | GFP_KERNEL); | 915 | GFP_KERNEL); |
914 | if (!ppace) { | 916 | if (!ppace) { |
diff --git a/fs/cifs/cifsencrypt.c b/fs/cifs/cifsencrypt.c index 5d9b9acc5fce..63c460e503b6 100644 --- a/fs/cifs/cifsencrypt.c +++ b/fs/cifs/cifsencrypt.c | |||
@@ -327,7 +327,7 @@ build_avpair_blob(struct cifs_ses *ses, const struct nls_table *nls_cp) | |||
327 | attrptr->type = cpu_to_le16(NTLMSSP_AV_NB_DOMAIN_NAME); | 327 | attrptr->type = cpu_to_le16(NTLMSSP_AV_NB_DOMAIN_NAME); |
328 | attrptr->length = cpu_to_le16(2 * dlen); | 328 | attrptr->length = cpu_to_le16(2 * dlen); |
329 | blobptr = (unsigned char *)attrptr + sizeof(struct ntlmssp2_name); | 329 | blobptr = (unsigned char *)attrptr + sizeof(struct ntlmssp2_name); |
330 | cifs_strtoUCS((__le16 *)blobptr, ses->domainName, dlen, nls_cp); | 330 | cifs_strtoUTF16((__le16 *)blobptr, ses->domainName, dlen, nls_cp); |
331 | 331 | ||
332 | return 0; | 332 | return 0; |
333 | } | 333 | } |
@@ -376,7 +376,7 @@ find_domain_name(struct cifs_ses *ses, const struct nls_table *nls_cp) | |||
376 | kmalloc(attrsize + 1, GFP_KERNEL); | 376 | kmalloc(attrsize + 1, GFP_KERNEL); |
377 | if (!ses->domainName) | 377 | if (!ses->domainName) |
378 | return -ENOMEM; | 378 | return -ENOMEM; |
379 | cifs_from_ucs2(ses->domainName, | 379 | cifs_from_utf16(ses->domainName, |
380 | (__le16 *)blobptr, attrsize, attrsize, | 380 | (__le16 *)blobptr, attrsize, attrsize, |
381 | nls_cp, false); | 381 | nls_cp, false); |
382 | break; | 382 | break; |
@@ -420,15 +420,20 @@ static int calc_ntlmv2_hash(struct cifs_ses *ses, char *ntlmv2_hash, | |||
420 | } | 420 | } |
421 | 421 | ||
422 | /* convert ses->user_name to unicode and uppercase */ | 422 | /* convert ses->user_name to unicode and uppercase */ |
423 | len = strlen(ses->user_name); | 423 | len = ses->user_name ? strlen(ses->user_name) : 0; |
424 | user = kmalloc(2 + (len * 2), GFP_KERNEL); | 424 | user = kmalloc(2 + (len * 2), GFP_KERNEL); |
425 | if (user == NULL) { | 425 | if (user == NULL) { |
426 | cERROR(1, "calc_ntlmv2_hash: user mem alloc failure\n"); | 426 | cERROR(1, "calc_ntlmv2_hash: user mem alloc failure\n"); |
427 | rc = -ENOMEM; | 427 | rc = -ENOMEM; |
428 | return rc; | 428 | return rc; |
429 | } | 429 | } |
430 | len = cifs_strtoUCS((__le16 *)user, ses->user_name, len, nls_cp); | 430 | |
431 | UniStrupr(user); | 431 | if (len) { |
432 | len = cifs_strtoUTF16((__le16 *)user, ses->user_name, len, nls_cp); | ||
433 | UniStrupr(user); | ||
434 | } else { | ||
435 | memset(user, '\0', 2); | ||
436 | } | ||
432 | 437 | ||
433 | rc = crypto_shash_update(&ses->server->secmech.sdeschmacmd5->shash, | 438 | rc = crypto_shash_update(&ses->server->secmech.sdeschmacmd5->shash, |
434 | (char *)user, 2 * len); | 439 | (char *)user, 2 * len); |
@@ -448,8 +453,8 @@ static int calc_ntlmv2_hash(struct cifs_ses *ses, char *ntlmv2_hash, | |||
448 | rc = -ENOMEM; | 453 | rc = -ENOMEM; |
449 | return rc; | 454 | return rc; |
450 | } | 455 | } |
451 | len = cifs_strtoUCS((__le16 *)domain, ses->domainName, len, | 456 | len = cifs_strtoUTF16((__le16 *)domain, ses->domainName, len, |
452 | nls_cp); | 457 | nls_cp); |
453 | rc = | 458 | rc = |
454 | crypto_shash_update(&ses->server->secmech.sdeschmacmd5->shash, | 459 | crypto_shash_update(&ses->server->secmech.sdeschmacmd5->shash, |
455 | (char *)domain, 2 * len); | 460 | (char *)domain, 2 * len); |
@@ -468,7 +473,7 @@ static int calc_ntlmv2_hash(struct cifs_ses *ses, char *ntlmv2_hash, | |||
468 | rc = -ENOMEM; | 473 | rc = -ENOMEM; |
469 | return rc; | 474 | return rc; |
470 | } | 475 | } |
471 | len = cifs_strtoUCS((__le16 *)server, ses->serverName, len, | 476 | len = cifs_strtoUTF16((__le16 *)server, ses->serverName, len, |
472 | nls_cp); | 477 | nls_cp); |
473 | rc = | 478 | rc = |
474 | crypto_shash_update(&ses->server->secmech.sdeschmacmd5->shash, | 479 | crypto_shash_update(&ses->server->secmech.sdeschmacmd5->shash, |
diff --git a/fs/cifs/cifsglob.h b/fs/cifs/cifsglob.h index ba53c1c6c6cc..76e7d8b6da17 100644 --- a/fs/cifs/cifsglob.h +++ b/fs/cifs/cifsglob.h | |||
@@ -879,6 +879,8 @@ require use of the stronger protocol */ | |||
879 | #define CIFSSEC_MASK 0xB70B7 /* current flags supported if weak */ | 879 | #define CIFSSEC_MASK 0xB70B7 /* current flags supported if weak */ |
880 | #endif /* UPCALL */ | 880 | #endif /* UPCALL */ |
881 | #else /* do not allow weak pw hash */ | 881 | #else /* do not allow weak pw hash */ |
882 | #define CIFSSEC_MUST_LANMAN 0 | ||
883 | #define CIFSSEC_MUST_PLNTXT 0 | ||
882 | #ifdef CONFIG_CIFS_UPCALL | 884 | #ifdef CONFIG_CIFS_UPCALL |
883 | #define CIFSSEC_MASK 0x8F08F /* flags supported if no weak allowed */ | 885 | #define CIFSSEC_MASK 0x8F08F /* flags supported if no weak allowed */ |
884 | #else | 886 | #else |
diff --git a/fs/cifs/cifssmb.c b/fs/cifs/cifssmb.c index 6600aa2d2ef3..8b7794c31591 100644 --- a/fs/cifs/cifssmb.c +++ b/fs/cifs/cifssmb.c | |||
@@ -821,8 +821,8 @@ PsxDelete: | |||
821 | 821 | ||
822 | if (pSMB->hdr.Flags2 & SMBFLG2_UNICODE) { | 822 | if (pSMB->hdr.Flags2 & SMBFLG2_UNICODE) { |
823 | name_len = | 823 | name_len = |
824 | cifsConvertToUCS((__le16 *) pSMB->FileName, fileName, | 824 | cifsConvertToUTF16((__le16 *) pSMB->FileName, fileName, |
825 | PATH_MAX, nls_codepage, remap); | 825 | PATH_MAX, nls_codepage, remap); |
826 | name_len++; /* trailing null */ | 826 | name_len++; /* trailing null */ |
827 | name_len *= 2; | 827 | name_len *= 2; |
828 | } else { /* BB add path length overrun check */ | 828 | } else { /* BB add path length overrun check */ |
@@ -893,8 +893,8 @@ DelFileRetry: | |||
893 | 893 | ||
894 | if (pSMB->hdr.Flags2 & SMBFLG2_UNICODE) { | 894 | if (pSMB->hdr.Flags2 & SMBFLG2_UNICODE) { |
895 | name_len = | 895 | name_len = |
896 | cifsConvertToUCS((__le16 *) pSMB->fileName, fileName, | 896 | cifsConvertToUTF16((__le16 *) pSMB->fileName, fileName, |
897 | PATH_MAX, nls_codepage, remap); | 897 | PATH_MAX, nls_codepage, remap); |
898 | name_len++; /* trailing null */ | 898 | name_len++; /* trailing null */ |
899 | name_len *= 2; | 899 | name_len *= 2; |
900 | } else { /* BB improve check for buffer overruns BB */ | 900 | } else { /* BB improve check for buffer overruns BB */ |
@@ -938,8 +938,8 @@ RmDirRetry: | |||
938 | return rc; | 938 | return rc; |
939 | 939 | ||
940 | if (pSMB->hdr.Flags2 & SMBFLG2_UNICODE) { | 940 | if (pSMB->hdr.Flags2 & SMBFLG2_UNICODE) { |
941 | name_len = cifsConvertToUCS((__le16 *) pSMB->DirName, dirName, | 941 | name_len = cifsConvertToUTF16((__le16 *) pSMB->DirName, dirName, |
942 | PATH_MAX, nls_codepage, remap); | 942 | PATH_MAX, nls_codepage, remap); |
943 | name_len++; /* trailing null */ | 943 | name_len++; /* trailing null */ |
944 | name_len *= 2; | 944 | name_len *= 2; |
945 | } else { /* BB improve check for buffer overruns BB */ | 945 | } else { /* BB improve check for buffer overruns BB */ |
@@ -981,8 +981,8 @@ MkDirRetry: | |||
981 | return rc; | 981 | return rc; |
982 | 982 | ||
983 | if (pSMB->hdr.Flags2 & SMBFLG2_UNICODE) { | 983 | if (pSMB->hdr.Flags2 & SMBFLG2_UNICODE) { |
984 | name_len = cifsConvertToUCS((__le16 *) pSMB->DirName, name, | 984 | name_len = cifsConvertToUTF16((__le16 *) pSMB->DirName, name, |
985 | PATH_MAX, nls_codepage, remap); | 985 | PATH_MAX, nls_codepage, remap); |
986 | name_len++; /* trailing null */ | 986 | name_len++; /* trailing null */ |
987 | name_len *= 2; | 987 | name_len *= 2; |
988 | } else { /* BB improve check for buffer overruns BB */ | 988 | } else { /* BB improve check for buffer overruns BB */ |
@@ -1030,8 +1030,8 @@ PsxCreat: | |||
1030 | 1030 | ||
1031 | if (pSMB->hdr.Flags2 & SMBFLG2_UNICODE) { | 1031 | if (pSMB->hdr.Flags2 & SMBFLG2_UNICODE) { |
1032 | name_len = | 1032 | name_len = |
1033 | cifsConvertToUCS((__le16 *) pSMB->FileName, name, | 1033 | cifsConvertToUTF16((__le16 *) pSMB->FileName, name, |
1034 | PATH_MAX, nls_codepage, remap); | 1034 | PATH_MAX, nls_codepage, remap); |
1035 | name_len++; /* trailing null */ | 1035 | name_len++; /* trailing null */ |
1036 | name_len *= 2; | 1036 | name_len *= 2; |
1037 | } else { /* BB improve the check for buffer overruns BB */ | 1037 | } else { /* BB improve the check for buffer overruns BB */ |
@@ -1197,8 +1197,8 @@ OldOpenRetry: | |||
1197 | if (pSMB->hdr.Flags2 & SMBFLG2_UNICODE) { | 1197 | if (pSMB->hdr.Flags2 & SMBFLG2_UNICODE) { |
1198 | count = 1; /* account for one byte pad to word boundary */ | 1198 | count = 1; /* account for one byte pad to word boundary */ |
1199 | name_len = | 1199 | name_len = |
1200 | cifsConvertToUCS((__le16 *) (pSMB->fileName + 1), | 1200 | cifsConvertToUTF16((__le16 *) (pSMB->fileName + 1), |
1201 | fileName, PATH_MAX, nls_codepage, remap); | 1201 | fileName, PATH_MAX, nls_codepage, remap); |
1202 | name_len++; /* trailing null */ | 1202 | name_len++; /* trailing null */ |
1203 | name_len *= 2; | 1203 | name_len *= 2; |
1204 | } else { /* BB improve check for buffer overruns BB */ | 1204 | } else { /* BB improve check for buffer overruns BB */ |
@@ -1304,8 +1304,8 @@ openRetry: | |||
1304 | if (pSMB->hdr.Flags2 & SMBFLG2_UNICODE) { | 1304 | if (pSMB->hdr.Flags2 & SMBFLG2_UNICODE) { |
1305 | count = 1; /* account for one byte pad to word boundary */ | 1305 | count = 1; /* account for one byte pad to word boundary */ |
1306 | name_len = | 1306 | name_len = |
1307 | cifsConvertToUCS((__le16 *) (pSMB->fileName + 1), | 1307 | cifsConvertToUTF16((__le16 *) (pSMB->fileName + 1), |
1308 | fileName, PATH_MAX, nls_codepage, remap); | 1308 | fileName, PATH_MAX, nls_codepage, remap); |
1309 | name_len++; /* trailing null */ | 1309 | name_len++; /* trailing null */ |
1310 | name_len *= 2; | 1310 | name_len *= 2; |
1311 | pSMB->NameLength = cpu_to_le16(name_len); | 1311 | pSMB->NameLength = cpu_to_le16(name_len); |
@@ -2649,16 +2649,16 @@ renameRetry: | |||
2649 | 2649 | ||
2650 | if (pSMB->hdr.Flags2 & SMBFLG2_UNICODE) { | 2650 | if (pSMB->hdr.Flags2 & SMBFLG2_UNICODE) { |
2651 | name_len = | 2651 | name_len = |
2652 | cifsConvertToUCS((__le16 *) pSMB->OldFileName, fromName, | 2652 | cifsConvertToUTF16((__le16 *) pSMB->OldFileName, fromName, |
2653 | PATH_MAX, nls_codepage, remap); | 2653 | PATH_MAX, nls_codepage, remap); |
2654 | name_len++; /* trailing null */ | 2654 | name_len++; /* trailing null */ |
2655 | name_len *= 2; | 2655 | name_len *= 2; |
2656 | pSMB->OldFileName[name_len] = 0x04; /* pad */ | 2656 | pSMB->OldFileName[name_len] = 0x04; /* pad */ |
2657 | /* protocol requires ASCII signature byte on Unicode string */ | 2657 | /* protocol requires ASCII signature byte on Unicode string */ |
2658 | pSMB->OldFileName[name_len + 1] = 0x00; | 2658 | pSMB->OldFileName[name_len + 1] = 0x00; |
2659 | name_len2 = | 2659 | name_len2 = |
2660 | cifsConvertToUCS((__le16 *)&pSMB->OldFileName[name_len + 2], | 2660 | cifsConvertToUTF16((__le16 *)&pSMB->OldFileName[name_len+2], |
2661 | toName, PATH_MAX, nls_codepage, remap); | 2661 | toName, PATH_MAX, nls_codepage, remap); |
2662 | name_len2 += 1 /* trailing null */ + 1 /* Signature word */ ; | 2662 | name_len2 += 1 /* trailing null */ + 1 /* Signature word */ ; |
2663 | name_len2 *= 2; /* convert to bytes */ | 2663 | name_len2 *= 2; /* convert to bytes */ |
2664 | } else { /* BB improve the check for buffer overruns BB */ | 2664 | } else { /* BB improve the check for buffer overruns BB */ |
@@ -2738,10 +2738,12 @@ int CIFSSMBRenameOpenFile(const int xid, struct cifs_tcon *pTcon, | |||
2738 | /* unicode only call */ | 2738 | /* unicode only call */ |
2739 | if (target_name == NULL) { | 2739 | if (target_name == NULL) { |
2740 | sprintf(dummy_string, "cifs%x", pSMB->hdr.Mid); | 2740 | sprintf(dummy_string, "cifs%x", pSMB->hdr.Mid); |
2741 | len_of_str = cifsConvertToUCS((__le16 *)rename_info->target_name, | 2741 | len_of_str = |
2742 | cifsConvertToUTF16((__le16 *)rename_info->target_name, | ||
2742 | dummy_string, 24, nls_codepage, remap); | 2743 | dummy_string, 24, nls_codepage, remap); |
2743 | } else { | 2744 | } else { |
2744 | len_of_str = cifsConvertToUCS((__le16 *)rename_info->target_name, | 2745 | len_of_str = |
2746 | cifsConvertToUTF16((__le16 *)rename_info->target_name, | ||
2745 | target_name, PATH_MAX, nls_codepage, | 2747 | target_name, PATH_MAX, nls_codepage, |
2746 | remap); | 2748 | remap); |
2747 | } | 2749 | } |
@@ -2795,17 +2797,17 @@ copyRetry: | |||
2795 | pSMB->Flags = cpu_to_le16(flags & COPY_TREE); | 2797 | pSMB->Flags = cpu_to_le16(flags & COPY_TREE); |
2796 | 2798 | ||
2797 | if (pSMB->hdr.Flags2 & SMBFLG2_UNICODE) { | 2799 | if (pSMB->hdr.Flags2 & SMBFLG2_UNICODE) { |
2798 | name_len = cifsConvertToUCS((__le16 *) pSMB->OldFileName, | 2800 | name_len = cifsConvertToUTF16((__le16 *) pSMB->OldFileName, |
2799 | fromName, PATH_MAX, nls_codepage, | 2801 | fromName, PATH_MAX, nls_codepage, |
2800 | remap); | 2802 | remap); |
2801 | name_len++; /* trailing null */ | 2803 | name_len++; /* trailing null */ |
2802 | name_len *= 2; | 2804 | name_len *= 2; |
2803 | pSMB->OldFileName[name_len] = 0x04; /* pad */ | 2805 | pSMB->OldFileName[name_len] = 0x04; /* pad */ |
2804 | /* protocol requires ASCII signature byte on Unicode string */ | 2806 | /* protocol requires ASCII signature byte on Unicode string */ |
2805 | pSMB->OldFileName[name_len + 1] = 0x00; | 2807 | pSMB->OldFileName[name_len + 1] = 0x00; |
2806 | name_len2 = | 2808 | name_len2 = |
2807 | cifsConvertToUCS((__le16 *)&pSMB->OldFileName[name_len + 2], | 2809 | cifsConvertToUTF16((__le16 *)&pSMB->OldFileName[name_len+2], |
2808 | toName, PATH_MAX, nls_codepage, remap); | 2810 | toName, PATH_MAX, nls_codepage, remap); |
2809 | name_len2 += 1 /* trailing null */ + 1 /* Signature word */ ; | 2811 | name_len2 += 1 /* trailing null */ + 1 /* Signature word */ ; |
2810 | name_len2 *= 2; /* convert to bytes */ | 2812 | name_len2 *= 2; /* convert to bytes */ |
2811 | } else { /* BB improve the check for buffer overruns BB */ | 2813 | } else { /* BB improve the check for buffer overruns BB */ |
@@ -2861,9 +2863,9 @@ createSymLinkRetry: | |||
2861 | 2863 | ||
2862 | if (pSMB->hdr.Flags2 & SMBFLG2_UNICODE) { | 2864 | if (pSMB->hdr.Flags2 & SMBFLG2_UNICODE) { |
2863 | name_len = | 2865 | name_len = |
2864 | cifs_strtoUCS((__le16 *) pSMB->FileName, fromName, PATH_MAX | 2866 | cifs_strtoUTF16((__le16 *) pSMB->FileName, fromName, |
2865 | /* find define for this maxpathcomponent */ | 2867 | /* find define for this maxpathcomponent */ |
2866 | , nls_codepage); | 2868 | PATH_MAX, nls_codepage); |
2867 | name_len++; /* trailing null */ | 2869 | name_len++; /* trailing null */ |
2868 | name_len *= 2; | 2870 | name_len *= 2; |
2869 | 2871 | ||
@@ -2885,9 +2887,9 @@ createSymLinkRetry: | |||
2885 | data_offset = (char *) (&pSMB->hdr.Protocol) + offset; | 2887 | data_offset = (char *) (&pSMB->hdr.Protocol) + offset; |
2886 | if (pSMB->hdr.Flags2 & SMBFLG2_UNICODE) { | 2888 | if (pSMB->hdr.Flags2 & SMBFLG2_UNICODE) { |
2887 | name_len_target = | 2889 | name_len_target = |
2888 | cifs_strtoUCS((__le16 *) data_offset, toName, PATH_MAX | 2890 | cifs_strtoUTF16((__le16 *) data_offset, toName, PATH_MAX |
2889 | /* find define for this maxpathcomponent */ | 2891 | /* find define for this maxpathcomponent */ |
2890 | , nls_codepage); | 2892 | , nls_codepage); |
2891 | name_len_target++; /* trailing null */ | 2893 | name_len_target++; /* trailing null */ |
2892 | name_len_target *= 2; | 2894 | name_len_target *= 2; |
2893 | } else { /* BB improve the check for buffer overruns BB */ | 2895 | } else { /* BB improve the check for buffer overruns BB */ |
@@ -2949,8 +2951,8 @@ createHardLinkRetry: | |||
2949 | return rc; | 2951 | return rc; |
2950 | 2952 | ||
2951 | if (pSMB->hdr.Flags2 & SMBFLG2_UNICODE) { | 2953 | if (pSMB->hdr.Flags2 & SMBFLG2_UNICODE) { |
2952 | name_len = cifsConvertToUCS((__le16 *) pSMB->FileName, toName, | 2954 | name_len = cifsConvertToUTF16((__le16 *) pSMB->FileName, toName, |
2953 | PATH_MAX, nls_codepage, remap); | 2955 | PATH_MAX, nls_codepage, remap); |
2954 | name_len++; /* trailing null */ | 2956 | name_len++; /* trailing null */ |
2955 | name_len *= 2; | 2957 | name_len *= 2; |
2956 | 2958 | ||
@@ -2972,8 +2974,8 @@ createHardLinkRetry: | |||
2972 | data_offset = (char *) (&pSMB->hdr.Protocol) + offset; | 2974 | data_offset = (char *) (&pSMB->hdr.Protocol) + offset; |
2973 | if (pSMB->hdr.Flags2 & SMBFLG2_UNICODE) { | 2975 | if (pSMB->hdr.Flags2 & SMBFLG2_UNICODE) { |
2974 | name_len_target = | 2976 | name_len_target = |
2975 | cifsConvertToUCS((__le16 *) data_offset, fromName, PATH_MAX, | 2977 | cifsConvertToUTF16((__le16 *) data_offset, fromName, |
2976 | nls_codepage, remap); | 2978 | PATH_MAX, nls_codepage, remap); |
2977 | name_len_target++; /* trailing null */ | 2979 | name_len_target++; /* trailing null */ |
2978 | name_len_target *= 2; | 2980 | name_len_target *= 2; |
2979 | } else { /* BB improve the check for buffer overruns BB */ | 2981 | } else { /* BB improve the check for buffer overruns BB */ |
@@ -3042,8 +3044,8 @@ winCreateHardLinkRetry: | |||
3042 | 3044 | ||
3043 | if (pSMB->hdr.Flags2 & SMBFLG2_UNICODE) { | 3045 | if (pSMB->hdr.Flags2 & SMBFLG2_UNICODE) { |
3044 | name_len = | 3046 | name_len = |
3045 | cifsConvertToUCS((__le16 *) pSMB->OldFileName, fromName, | 3047 | cifsConvertToUTF16((__le16 *) pSMB->OldFileName, fromName, |
3046 | PATH_MAX, nls_codepage, remap); | 3048 | PATH_MAX, nls_codepage, remap); |
3047 | name_len++; /* trailing null */ | 3049 | name_len++; /* trailing null */ |
3048 | name_len *= 2; | 3050 | name_len *= 2; |
3049 | 3051 | ||
@@ -3051,8 +3053,8 @@ winCreateHardLinkRetry: | |||
3051 | pSMB->OldFileName[name_len] = 0x04; | 3053 | pSMB->OldFileName[name_len] = 0x04; |
3052 | pSMB->OldFileName[name_len + 1] = 0x00; /* pad */ | 3054 | pSMB->OldFileName[name_len + 1] = 0x00; /* pad */ |
3053 | name_len2 = | 3055 | name_len2 = |
3054 | cifsConvertToUCS((__le16 *)&pSMB->OldFileName[name_len + 2], | 3056 | cifsConvertToUTF16((__le16 *)&pSMB->OldFileName[name_len+2], |
3055 | toName, PATH_MAX, nls_codepage, remap); | 3057 | toName, PATH_MAX, nls_codepage, remap); |
3056 | name_len2 += 1 /* trailing null */ + 1 /* Signature word */ ; | 3058 | name_len2 += 1 /* trailing null */ + 1 /* Signature word */ ; |
3057 | name_len2 *= 2; /* convert to bytes */ | 3059 | name_len2 *= 2; /* convert to bytes */ |
3058 | } else { /* BB improve the check for buffer overruns BB */ | 3060 | } else { /* BB improve the check for buffer overruns BB */ |
@@ -3108,8 +3110,8 @@ querySymLinkRetry: | |||
3108 | 3110 | ||
3109 | if (pSMB->hdr.Flags2 & SMBFLG2_UNICODE) { | 3111 | if (pSMB->hdr.Flags2 & SMBFLG2_UNICODE) { |
3110 | name_len = | 3112 | name_len = |
3111 | cifs_strtoUCS((__le16 *) pSMB->FileName, searchName, | 3113 | cifs_strtoUTF16((__le16 *) pSMB->FileName, searchName, |
3112 | PATH_MAX, nls_codepage); | 3114 | PATH_MAX, nls_codepage); |
3113 | name_len++; /* trailing null */ | 3115 | name_len++; /* trailing null */ |
3114 | name_len *= 2; | 3116 | name_len *= 2; |
3115 | } else { /* BB improve the check for buffer overruns BB */ | 3117 | } else { /* BB improve the check for buffer overruns BB */ |
@@ -3166,8 +3168,8 @@ querySymLinkRetry: | |||
3166 | is_unicode = false; | 3168 | is_unicode = false; |
3167 | 3169 | ||
3168 | /* BB FIXME investigate remapping reserved chars here */ | 3170 | /* BB FIXME investigate remapping reserved chars here */ |
3169 | *symlinkinfo = cifs_strndup_from_ucs(data_start, count, | 3171 | *symlinkinfo = cifs_strndup_from_utf16(data_start, |
3170 | is_unicode, nls_codepage); | 3172 | count, is_unicode, nls_codepage); |
3171 | if (!*symlinkinfo) | 3173 | if (!*symlinkinfo) |
3172 | rc = -ENOMEM; | 3174 | rc = -ENOMEM; |
3173 | } | 3175 | } |
@@ -3450,8 +3452,9 @@ queryAclRetry: | |||
3450 | 3452 | ||
3451 | if (pSMB->hdr.Flags2 & SMBFLG2_UNICODE) { | 3453 | if (pSMB->hdr.Flags2 & SMBFLG2_UNICODE) { |
3452 | name_len = | 3454 | name_len = |
3453 | cifsConvertToUCS((__le16 *) pSMB->FileName, searchName, | 3455 | cifsConvertToUTF16((__le16 *) pSMB->FileName, |
3454 | PATH_MAX, nls_codepage, remap); | 3456 | searchName, PATH_MAX, nls_codepage, |
3457 | remap); | ||
3455 | name_len++; /* trailing null */ | 3458 | name_len++; /* trailing null */ |
3456 | name_len *= 2; | 3459 | name_len *= 2; |
3457 | pSMB->FileName[name_len] = 0; | 3460 | pSMB->FileName[name_len] = 0; |
@@ -3537,8 +3540,8 @@ setAclRetry: | |||
3537 | return rc; | 3540 | return rc; |
3538 | if (pSMB->hdr.Flags2 & SMBFLG2_UNICODE) { | 3541 | if (pSMB->hdr.Flags2 & SMBFLG2_UNICODE) { |
3539 | name_len = | 3542 | name_len = |
3540 | cifsConvertToUCS((__le16 *) pSMB->FileName, fileName, | 3543 | cifsConvertToUTF16((__le16 *) pSMB->FileName, fileName, |
3541 | PATH_MAX, nls_codepage, remap); | 3544 | PATH_MAX, nls_codepage, remap); |
3542 | name_len++; /* trailing null */ | 3545 | name_len++; /* trailing null */ |
3543 | name_len *= 2; | 3546 | name_len *= 2; |
3544 | } else { /* BB improve the check for buffer overruns BB */ | 3547 | } else { /* BB improve the check for buffer overruns BB */ |
@@ -3948,8 +3951,9 @@ QInfRetry: | |||
3948 | 3951 | ||
3949 | if (pSMB->hdr.Flags2 & SMBFLG2_UNICODE) { | 3952 | if (pSMB->hdr.Flags2 & SMBFLG2_UNICODE) { |
3950 | name_len = | 3953 | name_len = |
3951 | cifsConvertToUCS((__le16 *) pSMB->FileName, searchName, | 3954 | cifsConvertToUTF16((__le16 *) pSMB->FileName, |
3952 | PATH_MAX, nls_codepage, remap); | 3955 | searchName, PATH_MAX, nls_codepage, |
3956 | remap); | ||
3953 | name_len++; /* trailing null */ | 3957 | name_len++; /* trailing null */ |
3954 | name_len *= 2; | 3958 | name_len *= 2; |
3955 | } else { | 3959 | } else { |
@@ -4086,8 +4090,8 @@ QPathInfoRetry: | |||
4086 | 4090 | ||
4087 | if (pSMB->hdr.Flags2 & SMBFLG2_UNICODE) { | 4091 | if (pSMB->hdr.Flags2 & SMBFLG2_UNICODE) { |
4088 | name_len = | 4092 | name_len = |
4089 | cifsConvertToUCS((__le16 *) pSMB->FileName, searchName, | 4093 | cifsConvertToUTF16((__le16 *) pSMB->FileName, searchName, |
4090 | PATH_MAX, nls_codepage, remap); | 4094 | PATH_MAX, nls_codepage, remap); |
4091 | name_len++; /* trailing null */ | 4095 | name_len++; /* trailing null */ |
4092 | name_len *= 2; | 4096 | name_len *= 2; |
4093 | } else { /* BB improve the check for buffer overruns BB */ | 4097 | } else { /* BB improve the check for buffer overruns BB */ |
@@ -4255,8 +4259,8 @@ UnixQPathInfoRetry: | |||
4255 | 4259 | ||
4256 | if (pSMB->hdr.Flags2 & SMBFLG2_UNICODE) { | 4260 | if (pSMB->hdr.Flags2 & SMBFLG2_UNICODE) { |
4257 | name_len = | 4261 | name_len = |
4258 | cifsConvertToUCS((__le16 *) pSMB->FileName, searchName, | 4262 | cifsConvertToUTF16((__le16 *) pSMB->FileName, searchName, |
4259 | PATH_MAX, nls_codepage, remap); | 4263 | PATH_MAX, nls_codepage, remap); |
4260 | name_len++; /* trailing null */ | 4264 | name_len++; /* trailing null */ |
4261 | name_len *= 2; | 4265 | name_len *= 2; |
4262 | } else { /* BB improve the check for buffer overruns BB */ | 4266 | } else { /* BB improve the check for buffer overruns BB */ |
@@ -4344,8 +4348,8 @@ findFirstRetry: | |||
4344 | 4348 | ||
4345 | if (pSMB->hdr.Flags2 & SMBFLG2_UNICODE) { | 4349 | if (pSMB->hdr.Flags2 & SMBFLG2_UNICODE) { |
4346 | name_len = | 4350 | name_len = |
4347 | cifsConvertToUCS((__le16 *) pSMB->FileName, searchName, | 4351 | cifsConvertToUTF16((__le16 *) pSMB->FileName, searchName, |
4348 | PATH_MAX, nls_codepage, remap); | 4352 | PATH_MAX, nls_codepage, remap); |
4349 | /* We can not add the asterik earlier in case | 4353 | /* We can not add the asterik earlier in case |
4350 | it got remapped to 0xF03A as if it were part of the | 4354 | it got remapped to 0xF03A as if it were part of the |
4351 | directory name instead of a wildcard */ | 4355 | directory name instead of a wildcard */ |
@@ -4656,8 +4660,9 @@ GetInodeNumberRetry: | |||
4656 | 4660 | ||
4657 | if (pSMB->hdr.Flags2 & SMBFLG2_UNICODE) { | 4661 | if (pSMB->hdr.Flags2 & SMBFLG2_UNICODE) { |
4658 | name_len = | 4662 | name_len = |
4659 | cifsConvertToUCS((__le16 *) pSMB->FileName, searchName, | 4663 | cifsConvertToUTF16((__le16 *) pSMB->FileName, |
4660 | PATH_MAX, nls_codepage, remap); | 4664 | searchName, PATH_MAX, nls_codepage, |
4665 | remap); | ||
4661 | name_len++; /* trailing null */ | 4666 | name_len++; /* trailing null */ |
4662 | name_len *= 2; | 4667 | name_len *= 2; |
4663 | } else { /* BB improve the check for buffer overruns BB */ | 4668 | } else { /* BB improve the check for buffer overruns BB */ |
@@ -4794,9 +4799,9 @@ parse_DFS_referrals(TRANSACTION2_GET_DFS_REFER_RSP *pSMBr, | |||
4794 | rc = -ENOMEM; | 4799 | rc = -ENOMEM; |
4795 | goto parse_DFS_referrals_exit; | 4800 | goto parse_DFS_referrals_exit; |
4796 | } | 4801 | } |
4797 | cifsConvertToUCS((__le16 *) tmp, searchName, | 4802 | cifsConvertToUTF16((__le16 *) tmp, searchName, |
4798 | PATH_MAX, nls_codepage, remap); | 4803 | PATH_MAX, nls_codepage, remap); |
4799 | node->path_consumed = cifs_ucs2_bytes(tmp, | 4804 | node->path_consumed = cifs_utf16_bytes(tmp, |
4800 | le16_to_cpu(pSMBr->PathConsumed), | 4805 | le16_to_cpu(pSMBr->PathConsumed), |
4801 | nls_codepage); | 4806 | nls_codepage); |
4802 | kfree(tmp); | 4807 | kfree(tmp); |
@@ -4809,8 +4814,8 @@ parse_DFS_referrals(TRANSACTION2_GET_DFS_REFER_RSP *pSMBr, | |||
4809 | /* copy DfsPath */ | 4814 | /* copy DfsPath */ |
4810 | temp = (char *)ref + le16_to_cpu(ref->DfsPathOffset); | 4815 | temp = (char *)ref + le16_to_cpu(ref->DfsPathOffset); |
4811 | max_len = data_end - temp; | 4816 | max_len = data_end - temp; |
4812 | node->path_name = cifs_strndup_from_ucs(temp, max_len, | 4817 | node->path_name = cifs_strndup_from_utf16(temp, max_len, |
4813 | is_unicode, nls_codepage); | 4818 | is_unicode, nls_codepage); |
4814 | if (!node->path_name) { | 4819 | if (!node->path_name) { |
4815 | rc = -ENOMEM; | 4820 | rc = -ENOMEM; |
4816 | goto parse_DFS_referrals_exit; | 4821 | goto parse_DFS_referrals_exit; |
@@ -4819,8 +4824,8 @@ parse_DFS_referrals(TRANSACTION2_GET_DFS_REFER_RSP *pSMBr, | |||
4819 | /* copy link target UNC */ | 4824 | /* copy link target UNC */ |
4820 | temp = (char *)ref + le16_to_cpu(ref->NetworkAddressOffset); | 4825 | temp = (char *)ref + le16_to_cpu(ref->NetworkAddressOffset); |
4821 | max_len = data_end - temp; | 4826 | max_len = data_end - temp; |
4822 | node->node_name = cifs_strndup_from_ucs(temp, max_len, | 4827 | node->node_name = cifs_strndup_from_utf16(temp, max_len, |
4823 | is_unicode, nls_codepage); | 4828 | is_unicode, nls_codepage); |
4824 | if (!node->node_name) | 4829 | if (!node->node_name) |
4825 | rc = -ENOMEM; | 4830 | rc = -ENOMEM; |
4826 | } | 4831 | } |
@@ -4873,8 +4878,9 @@ getDFSRetry: | |||
4873 | if (ses->capabilities & CAP_UNICODE) { | 4878 | if (ses->capabilities & CAP_UNICODE) { |
4874 | pSMB->hdr.Flags2 |= SMBFLG2_UNICODE; | 4879 | pSMB->hdr.Flags2 |= SMBFLG2_UNICODE; |
4875 | name_len = | 4880 | name_len = |
4876 | cifsConvertToUCS((__le16 *) pSMB->RequestFileName, | 4881 | cifsConvertToUTF16((__le16 *) pSMB->RequestFileName, |
4877 | searchName, PATH_MAX, nls_codepage, remap); | 4882 | searchName, PATH_MAX, nls_codepage, |
4883 | remap); | ||
4878 | name_len++; /* trailing null */ | 4884 | name_len++; /* trailing null */ |
4879 | name_len *= 2; | 4885 | name_len *= 2; |
4880 | } else { /* BB improve the check for buffer overruns BB */ | 4886 | } else { /* BB improve the check for buffer overruns BB */ |
@@ -5506,8 +5512,8 @@ SetEOFRetry: | |||
5506 | 5512 | ||
5507 | if (pSMB->hdr.Flags2 & SMBFLG2_UNICODE) { | 5513 | if (pSMB->hdr.Flags2 & SMBFLG2_UNICODE) { |
5508 | name_len = | 5514 | name_len = |
5509 | cifsConvertToUCS((__le16 *) pSMB->FileName, fileName, | 5515 | cifsConvertToUTF16((__le16 *) pSMB->FileName, fileName, |
5510 | PATH_MAX, nls_codepage, remap); | 5516 | PATH_MAX, nls_codepage, remap); |
5511 | name_len++; /* trailing null */ | 5517 | name_len++; /* trailing null */ |
5512 | name_len *= 2; | 5518 | name_len *= 2; |
5513 | } else { /* BB improve the check for buffer overruns BB */ | 5519 | } else { /* BB improve the check for buffer overruns BB */ |
@@ -5796,8 +5802,8 @@ SetTimesRetry: | |||
5796 | 5802 | ||
5797 | if (pSMB->hdr.Flags2 & SMBFLG2_UNICODE) { | 5803 | if (pSMB->hdr.Flags2 & SMBFLG2_UNICODE) { |
5798 | name_len = | 5804 | name_len = |
5799 | cifsConvertToUCS((__le16 *) pSMB->FileName, fileName, | 5805 | cifsConvertToUTF16((__le16 *) pSMB->FileName, fileName, |
5800 | PATH_MAX, nls_codepage, remap); | 5806 | PATH_MAX, nls_codepage, remap); |
5801 | name_len++; /* trailing null */ | 5807 | name_len++; /* trailing null */ |
5802 | name_len *= 2; | 5808 | name_len *= 2; |
5803 | } else { /* BB improve the check for buffer overruns BB */ | 5809 | } else { /* BB improve the check for buffer overruns BB */ |
@@ -5877,8 +5883,8 @@ SetAttrLgcyRetry: | |||
5877 | 5883 | ||
5878 | if (pSMB->hdr.Flags2 & SMBFLG2_UNICODE) { | 5884 | if (pSMB->hdr.Flags2 & SMBFLG2_UNICODE) { |
5879 | name_len = | 5885 | name_len = |
5880 | ConvertToUCS((__le16 *) pSMB->fileName, fileName, | 5886 | ConvertToUTF16((__le16 *) pSMB->fileName, fileName, |
5881 | PATH_MAX, nls_codepage); | 5887 | PATH_MAX, nls_codepage); |
5882 | name_len++; /* trailing null */ | 5888 | name_len++; /* trailing null */ |
5883 | name_len *= 2; | 5889 | name_len *= 2; |
5884 | } else { /* BB improve the check for buffer overruns BB */ | 5890 | } else { /* BB improve the check for buffer overruns BB */ |
@@ -6030,8 +6036,8 @@ setPermsRetry: | |||
6030 | 6036 | ||
6031 | if (pSMB->hdr.Flags2 & SMBFLG2_UNICODE) { | 6037 | if (pSMB->hdr.Flags2 & SMBFLG2_UNICODE) { |
6032 | name_len = | 6038 | name_len = |
6033 | cifsConvertToUCS((__le16 *) pSMB->FileName, fileName, | 6039 | cifsConvertToUTF16((__le16 *) pSMB->FileName, fileName, |
6034 | PATH_MAX, nls_codepage, remap); | 6040 | PATH_MAX, nls_codepage, remap); |
6035 | name_len++; /* trailing null */ | 6041 | name_len++; /* trailing null */ |
6036 | name_len *= 2; | 6042 | name_len *= 2; |
6037 | } else { /* BB improve the check for buffer overruns BB */ | 6043 | } else { /* BB improve the check for buffer overruns BB */ |
@@ -6123,8 +6129,8 @@ QAllEAsRetry: | |||
6123 | 6129 | ||
6124 | if (pSMB->hdr.Flags2 & SMBFLG2_UNICODE) { | 6130 | if (pSMB->hdr.Flags2 & SMBFLG2_UNICODE) { |
6125 | list_len = | 6131 | list_len = |
6126 | cifsConvertToUCS((__le16 *) pSMB->FileName, searchName, | 6132 | cifsConvertToUTF16((__le16 *) pSMB->FileName, searchName, |
6127 | PATH_MAX, nls_codepage, remap); | 6133 | PATH_MAX, nls_codepage, remap); |
6128 | list_len++; /* trailing null */ | 6134 | list_len++; /* trailing null */ |
6129 | list_len *= 2; | 6135 | list_len *= 2; |
6130 | } else { /* BB improve the check for buffer overruns BB */ | 6136 | } else { /* BB improve the check for buffer overruns BB */ |
@@ -6301,8 +6307,8 @@ SetEARetry: | |||
6301 | 6307 | ||
6302 | if (pSMB->hdr.Flags2 & SMBFLG2_UNICODE) { | 6308 | if (pSMB->hdr.Flags2 & SMBFLG2_UNICODE) { |
6303 | name_len = | 6309 | name_len = |
6304 | cifsConvertToUCS((__le16 *) pSMB->FileName, fileName, | 6310 | cifsConvertToUTF16((__le16 *) pSMB->FileName, fileName, |
6305 | PATH_MAX, nls_codepage, remap); | 6311 | PATH_MAX, nls_codepage, remap); |
6306 | name_len++; /* trailing null */ | 6312 | name_len++; /* trailing null */ |
6307 | name_len *= 2; | 6313 | name_len *= 2; |
6308 | } else { /* BB improve the check for buffer overruns BB */ | 6314 | } else { /* BB improve the check for buffer overruns BB */ |
diff --git a/fs/cifs/connect.c b/fs/cifs/connect.c index 4666780f315d..986709a8d903 100644 --- a/fs/cifs/connect.c +++ b/fs/cifs/connect.c | |||
@@ -38,6 +38,7 @@ | |||
38 | #include <asm/processor.h> | 38 | #include <asm/processor.h> |
39 | #include <linux/inet.h> | 39 | #include <linux/inet.h> |
40 | #include <linux/module.h> | 40 | #include <linux/module.h> |
41 | #include <keys/user-type.h> | ||
41 | #include <net/ipv6.h> | 42 | #include <net/ipv6.h> |
42 | #include "cifspdu.h" | 43 | #include "cifspdu.h" |
43 | #include "cifsglob.h" | 44 | #include "cifsglob.h" |
@@ -225,74 +226,90 @@ static int check2ndT2(struct smb_hdr *pSMB) | |||
225 | 226 | ||
226 | static int coalesce_t2(struct smb_hdr *psecond, struct smb_hdr *pTargetSMB) | 227 | static int coalesce_t2(struct smb_hdr *psecond, struct smb_hdr *pTargetSMB) |
227 | { | 228 | { |
228 | struct smb_t2_rsp *pSMB2 = (struct smb_t2_rsp *)psecond; | 229 | struct smb_t2_rsp *pSMBs = (struct smb_t2_rsp *)psecond; |
229 | struct smb_t2_rsp *pSMBt = (struct smb_t2_rsp *)pTargetSMB; | 230 | struct smb_t2_rsp *pSMBt = (struct smb_t2_rsp *)pTargetSMB; |
230 | char *data_area_of_target; | 231 | char *data_area_of_tgt; |
231 | char *data_area_of_buf2; | 232 | char *data_area_of_src; |
232 | int remaining; | 233 | int remaining; |
233 | unsigned int byte_count, total_in_buf; | 234 | unsigned int byte_count, total_in_tgt; |
234 | __u16 total_data_size, total_in_buf2; | 235 | __u16 tgt_total_cnt, src_total_cnt, total_in_src; |
235 | 236 | ||
236 | total_data_size = get_unaligned_le16(&pSMBt->t2_rsp.TotalDataCount); | 237 | src_total_cnt = get_unaligned_le16(&pSMBs->t2_rsp.TotalDataCount); |
238 | tgt_total_cnt = get_unaligned_le16(&pSMBt->t2_rsp.TotalDataCount); | ||
237 | 239 | ||
238 | if (total_data_size != | 240 | if (tgt_total_cnt != src_total_cnt) |
239 | get_unaligned_le16(&pSMB2->t2_rsp.TotalDataCount)) | 241 | cFYI(1, "total data count of primary and secondary t2 differ " |
240 | cFYI(1, "total data size of primary and secondary t2 differ"); | 242 | "source=%hu target=%hu", src_total_cnt, tgt_total_cnt); |
241 | 243 | ||
242 | total_in_buf = get_unaligned_le16(&pSMBt->t2_rsp.DataCount); | 244 | total_in_tgt = get_unaligned_le16(&pSMBt->t2_rsp.DataCount); |
243 | 245 | ||
244 | remaining = total_data_size - total_in_buf; | 246 | remaining = tgt_total_cnt - total_in_tgt; |
245 | 247 | ||
246 | if (remaining < 0) | 248 | if (remaining < 0) { |
249 | cFYI(1, "Server sent too much data. tgt_total_cnt=%hu " | ||
250 | "total_in_tgt=%hu", tgt_total_cnt, total_in_tgt); | ||
247 | return -EPROTO; | 251 | return -EPROTO; |
252 | } | ||
248 | 253 | ||
249 | if (remaining == 0) /* nothing to do, ignore */ | 254 | if (remaining == 0) { |
255 | /* nothing to do, ignore */ | ||
256 | cFYI(1, "no more data remains"); | ||
250 | return 0; | 257 | return 0; |
258 | } | ||
251 | 259 | ||
252 | total_in_buf2 = get_unaligned_le16(&pSMB2->t2_rsp.DataCount); | 260 | total_in_src = get_unaligned_le16(&pSMBs->t2_rsp.DataCount); |
253 | if (remaining < total_in_buf2) { | 261 | if (remaining < total_in_src) |
254 | cFYI(1, "transact2 2nd response contains too much data"); | 262 | cFYI(1, "transact2 2nd response contains too much data"); |
255 | } | ||
256 | 263 | ||
257 | /* find end of first SMB data area */ | 264 | /* find end of first SMB data area */ |
258 | data_area_of_target = (char *)&pSMBt->hdr.Protocol + | 265 | data_area_of_tgt = (char *)&pSMBt->hdr.Protocol + |
259 | get_unaligned_le16(&pSMBt->t2_rsp.DataOffset); | 266 | get_unaligned_le16(&pSMBt->t2_rsp.DataOffset); |
260 | /* validate target area */ | ||
261 | 267 | ||
262 | data_area_of_buf2 = (char *)&pSMB2->hdr.Protocol + | 268 | /* validate target area */ |
263 | get_unaligned_le16(&pSMB2->t2_rsp.DataOffset); | 269 | data_area_of_src = (char *)&pSMBs->hdr.Protocol + |
270 | get_unaligned_le16(&pSMBs->t2_rsp.DataOffset); | ||
264 | 271 | ||
265 | data_area_of_target += total_in_buf; | 272 | data_area_of_tgt += total_in_tgt; |
266 | 273 | ||
267 | /* copy second buffer into end of first buffer */ | 274 | total_in_tgt += total_in_src; |
268 | total_in_buf += total_in_buf2; | ||
269 | /* is the result too big for the field? */ | 275 | /* is the result too big for the field? */ |
270 | if (total_in_buf > USHRT_MAX) | 276 | if (total_in_tgt > USHRT_MAX) { |
277 | cFYI(1, "coalesced DataCount too large (%u)", total_in_tgt); | ||
271 | return -EPROTO; | 278 | return -EPROTO; |
272 | put_unaligned_le16(total_in_buf, &pSMBt->t2_rsp.DataCount); | 279 | } |
280 | put_unaligned_le16(total_in_tgt, &pSMBt->t2_rsp.DataCount); | ||
273 | 281 | ||
274 | /* fix up the BCC */ | 282 | /* fix up the BCC */ |
275 | byte_count = get_bcc(pTargetSMB); | 283 | byte_count = get_bcc(pTargetSMB); |
276 | byte_count += total_in_buf2; | 284 | byte_count += total_in_src; |
277 | /* is the result too big for the field? */ | 285 | /* is the result too big for the field? */ |
278 | if (byte_count > USHRT_MAX) | 286 | if (byte_count > USHRT_MAX) { |
287 | cFYI(1, "coalesced BCC too large (%u)", byte_count); | ||
279 | return -EPROTO; | 288 | return -EPROTO; |
289 | } | ||
280 | put_bcc(byte_count, pTargetSMB); | 290 | put_bcc(byte_count, pTargetSMB); |
281 | 291 | ||
282 | byte_count = be32_to_cpu(pTargetSMB->smb_buf_length); | 292 | byte_count = be32_to_cpu(pTargetSMB->smb_buf_length); |
283 | byte_count += total_in_buf2; | 293 | byte_count += total_in_src; |
284 | /* don't allow buffer to overflow */ | 294 | /* don't allow buffer to overflow */ |
285 | if (byte_count > CIFSMaxBufSize + MAX_CIFS_HDR_SIZE - 4) | 295 | if (byte_count > CIFSMaxBufSize + MAX_CIFS_HDR_SIZE - 4) { |
296 | cFYI(1, "coalesced BCC exceeds buffer size (%u)", byte_count); | ||
286 | return -ENOBUFS; | 297 | return -ENOBUFS; |
298 | } | ||
287 | pTargetSMB->smb_buf_length = cpu_to_be32(byte_count); | 299 | pTargetSMB->smb_buf_length = cpu_to_be32(byte_count); |
288 | 300 | ||
289 | memcpy(data_area_of_target, data_area_of_buf2, total_in_buf2); | 301 | /* copy second buffer into end of first buffer */ |
302 | memcpy(data_area_of_tgt, data_area_of_src, total_in_src); | ||
290 | 303 | ||
291 | if (remaining == total_in_buf2) { | 304 | if (remaining != total_in_src) { |
292 | cFYI(1, "found the last secondary response"); | 305 | /* more responses to go */ |
293 | return 0; /* we are done */ | 306 | cFYI(1, "waiting for more secondary responses"); |
294 | } else /* more responses to go */ | ||
295 | return 1; | 307 | return 1; |
308 | } | ||
309 | |||
310 | /* we are done */ | ||
311 | cFYI(1, "found the last secondary response"); | ||
312 | return 0; | ||
296 | } | 313 | } |
297 | 314 | ||
298 | static void | 315 | static void |
@@ -1578,11 +1595,14 @@ cifs_parse_mount_options(const char *mountdata, const char *devname, | |||
1578 | } | 1595 | } |
1579 | } | 1596 | } |
1580 | 1597 | ||
1581 | if (vol->multiuser && !(vol->secFlg & CIFSSEC_MAY_KRB5)) { | 1598 | #ifndef CONFIG_KEYS |
1582 | cERROR(1, "Multiuser mounts currently require krb5 " | 1599 | /* Muliuser mounts require CONFIG_KEYS support */ |
1583 | "authentication!"); | 1600 | if (vol->multiuser) { |
1601 | cERROR(1, "Multiuser mounts require kernels with " | ||
1602 | "CONFIG_KEYS enabled."); | ||
1584 | goto cifs_parse_mount_err; | 1603 | goto cifs_parse_mount_err; |
1585 | } | 1604 | } |
1605 | #endif | ||
1586 | 1606 | ||
1587 | if (vol->UNCip == NULL) | 1607 | if (vol->UNCip == NULL) |
1588 | vol->UNCip = &vol->UNC[2]; | 1608 | vol->UNCip = &vol->UNC[2]; |
@@ -1981,10 +2001,16 @@ static int match_session(struct cifs_ses *ses, struct smb_vol *vol) | |||
1981 | return 0; | 2001 | return 0; |
1982 | break; | 2002 | break; |
1983 | default: | 2003 | default: |
2004 | /* NULL username means anonymous session */ | ||
2005 | if (ses->user_name == NULL) { | ||
2006 | if (!vol->nullauth) | ||
2007 | return 0; | ||
2008 | break; | ||
2009 | } | ||
2010 | |||
1984 | /* anything else takes username/password */ | 2011 | /* anything else takes username/password */ |
1985 | if (ses->user_name == NULL) | 2012 | if (strncmp(ses->user_name, |
1986 | return 0; | 2013 | vol->username ? vol->username : "", |
1987 | if (strncmp(ses->user_name, vol->username, | ||
1988 | MAX_USERNAME_SIZE)) | 2014 | MAX_USERNAME_SIZE)) |
1989 | return 0; | 2015 | return 0; |
1990 | if (strlen(vol->username) != 0 && | 2016 | if (strlen(vol->username) != 0 && |
@@ -2039,6 +2065,132 @@ cifs_put_smb_ses(struct cifs_ses *ses) | |||
2039 | cifs_put_tcp_session(server); | 2065 | cifs_put_tcp_session(server); |
2040 | } | 2066 | } |
2041 | 2067 | ||
2068 | #ifdef CONFIG_KEYS | ||
2069 | |||
2070 | /* strlen("cifs:a:") + INET6_ADDRSTRLEN + 1 */ | ||
2071 | #define CIFSCREDS_DESC_SIZE (7 + INET6_ADDRSTRLEN + 1) | ||
2072 | |||
2073 | /* Populate username and pw fields from keyring if possible */ | ||
2074 | static int | ||
2075 | cifs_set_cifscreds(struct smb_vol *vol, struct cifs_ses *ses) | ||
2076 | { | ||
2077 | int rc = 0; | ||
2078 | char *desc, *delim, *payload; | ||
2079 | ssize_t len; | ||
2080 | struct key *key; | ||
2081 | struct TCP_Server_Info *server = ses->server; | ||
2082 | struct sockaddr_in *sa; | ||
2083 | struct sockaddr_in6 *sa6; | ||
2084 | struct user_key_payload *upayload; | ||
2085 | |||
2086 | desc = kmalloc(CIFSCREDS_DESC_SIZE, GFP_KERNEL); | ||
2087 | if (!desc) | ||
2088 | return -ENOMEM; | ||
2089 | |||
2090 | /* try to find an address key first */ | ||
2091 | switch (server->dstaddr.ss_family) { | ||
2092 | case AF_INET: | ||
2093 | sa = (struct sockaddr_in *)&server->dstaddr; | ||
2094 | sprintf(desc, "cifs:a:%pI4", &sa->sin_addr.s_addr); | ||
2095 | break; | ||
2096 | case AF_INET6: | ||
2097 | sa6 = (struct sockaddr_in6 *)&server->dstaddr; | ||
2098 | sprintf(desc, "cifs:a:%pI6c", &sa6->sin6_addr.s6_addr); | ||
2099 | break; | ||
2100 | default: | ||
2101 | cFYI(1, "Bad ss_family (%hu)", server->dstaddr.ss_family); | ||
2102 | rc = -EINVAL; | ||
2103 | goto out_err; | ||
2104 | } | ||
2105 | |||
2106 | cFYI(1, "%s: desc=%s", __func__, desc); | ||
2107 | key = request_key(&key_type_logon, desc, ""); | ||
2108 | if (IS_ERR(key)) { | ||
2109 | if (!ses->domainName) { | ||
2110 | cFYI(1, "domainName is NULL"); | ||
2111 | rc = PTR_ERR(key); | ||
2112 | goto out_err; | ||
2113 | } | ||
2114 | |||
2115 | /* didn't work, try to find a domain key */ | ||
2116 | sprintf(desc, "cifs:d:%s", ses->domainName); | ||
2117 | cFYI(1, "%s: desc=%s", __func__, desc); | ||
2118 | key = request_key(&key_type_logon, desc, ""); | ||
2119 | if (IS_ERR(key)) { | ||
2120 | rc = PTR_ERR(key); | ||
2121 | goto out_err; | ||
2122 | } | ||
2123 | } | ||
2124 | |||
2125 | down_read(&key->sem); | ||
2126 | upayload = key->payload.data; | ||
2127 | if (IS_ERR_OR_NULL(upayload)) { | ||
2128 | rc = PTR_ERR(key); | ||
2129 | goto out_key_put; | ||
2130 | } | ||
2131 | |||
2132 | /* find first : in payload */ | ||
2133 | payload = (char *)upayload->data; | ||
2134 | delim = strnchr(payload, upayload->datalen, ':'); | ||
2135 | cFYI(1, "payload=%s", payload); | ||
2136 | if (!delim) { | ||
2137 | cFYI(1, "Unable to find ':' in payload (datalen=%d)", | ||
2138 | upayload->datalen); | ||
2139 | rc = -EINVAL; | ||
2140 | goto out_key_put; | ||
2141 | } | ||
2142 | |||
2143 | len = delim - payload; | ||
2144 | if (len > MAX_USERNAME_SIZE || len <= 0) { | ||
2145 | cFYI(1, "Bad value from username search (len=%ld)", len); | ||
2146 | rc = -EINVAL; | ||
2147 | goto out_key_put; | ||
2148 | } | ||
2149 | |||
2150 | vol->username = kstrndup(payload, len, GFP_KERNEL); | ||
2151 | if (!vol->username) { | ||
2152 | cFYI(1, "Unable to allocate %ld bytes for username", len); | ||
2153 | rc = -ENOMEM; | ||
2154 | goto out_key_put; | ||
2155 | } | ||
2156 | cFYI(1, "%s: username=%s", __func__, vol->username); | ||
2157 | |||
2158 | len = key->datalen - (len + 1); | ||
2159 | if (len > MAX_PASSWORD_SIZE || len <= 0) { | ||
2160 | cFYI(1, "Bad len for password search (len=%ld)", len); | ||
2161 | rc = -EINVAL; | ||
2162 | kfree(vol->username); | ||
2163 | vol->username = NULL; | ||
2164 | goto out_key_put; | ||
2165 | } | ||
2166 | |||
2167 | ++delim; | ||
2168 | vol->password = kstrndup(delim, len, GFP_KERNEL); | ||
2169 | if (!vol->password) { | ||
2170 | cFYI(1, "Unable to allocate %ld bytes for password", len); | ||
2171 | rc = -ENOMEM; | ||
2172 | kfree(vol->username); | ||
2173 | vol->username = NULL; | ||
2174 | goto out_key_put; | ||
2175 | } | ||
2176 | |||
2177 | out_key_put: | ||
2178 | up_read(&key->sem); | ||
2179 | key_put(key); | ||
2180 | out_err: | ||
2181 | kfree(desc); | ||
2182 | cFYI(1, "%s: returning %d", __func__, rc); | ||
2183 | return rc; | ||
2184 | } | ||
2185 | #else /* ! CONFIG_KEYS */ | ||
2186 | static inline int | ||
2187 | cifs_set_cifscreds(struct smb_vol *vol __attribute__((unused)), | ||
2188 | struct cifs_ses *ses __attribute__((unused))) | ||
2189 | { | ||
2190 | return -ENOSYS; | ||
2191 | } | ||
2192 | #endif /* CONFIG_KEYS */ | ||
2193 | |||
2042 | static bool warned_on_ntlm; /* globals init to false automatically */ | 2194 | static bool warned_on_ntlm; /* globals init to false automatically */ |
2043 | 2195 | ||
2044 | static struct cifs_ses * | 2196 | static struct cifs_ses * |
@@ -2914,18 +3066,33 @@ void cifs_setup_cifs_sb(struct smb_vol *pvolume_info, | |||
2914 | #define CIFS_DEFAULT_IOSIZE (1024 * 1024) | 3066 | #define CIFS_DEFAULT_IOSIZE (1024 * 1024) |
2915 | 3067 | ||
2916 | /* | 3068 | /* |
2917 | * Windows only supports a max of 60k reads. Default to that when posix | 3069 | * Windows only supports a max of 60kb reads and 65535 byte writes. Default to |
2918 | * extensions aren't in force. | 3070 | * those values when posix extensions aren't in force. In actuality here, we |
3071 | * use 65536 to allow for a write that is a multiple of 4k. Most servers seem | ||
3072 | * to be ok with the extra byte even though Windows doesn't send writes that | ||
3073 | * are that large. | ||
3074 | * | ||
3075 | * Citation: | ||
3076 | * | ||
3077 | * http://blogs.msdn.com/b/openspecification/archive/2009/04/10/smb-maximum-transmit-buffer-size-and-performance-tuning.aspx | ||
2919 | */ | 3078 | */ |
2920 | #define CIFS_DEFAULT_NON_POSIX_RSIZE (60 * 1024) | 3079 | #define CIFS_DEFAULT_NON_POSIX_RSIZE (60 * 1024) |
3080 | #define CIFS_DEFAULT_NON_POSIX_WSIZE (65536) | ||
2921 | 3081 | ||
2922 | static unsigned int | 3082 | static unsigned int |
2923 | cifs_negotiate_wsize(struct cifs_tcon *tcon, struct smb_vol *pvolume_info) | 3083 | cifs_negotiate_wsize(struct cifs_tcon *tcon, struct smb_vol *pvolume_info) |
2924 | { | 3084 | { |
2925 | __u64 unix_cap = le64_to_cpu(tcon->fsUnixInfo.Capability); | 3085 | __u64 unix_cap = le64_to_cpu(tcon->fsUnixInfo.Capability); |
2926 | struct TCP_Server_Info *server = tcon->ses->server; | 3086 | struct TCP_Server_Info *server = tcon->ses->server; |
2927 | unsigned int wsize = pvolume_info->wsize ? pvolume_info->wsize : | 3087 | unsigned int wsize; |
2928 | CIFS_DEFAULT_IOSIZE; | 3088 | |
3089 | /* start with specified wsize, or default */ | ||
3090 | if (pvolume_info->wsize) | ||
3091 | wsize = pvolume_info->wsize; | ||
3092 | else if (tcon->unix_ext && (unix_cap & CIFS_UNIX_LARGE_WRITE_CAP)) | ||
3093 | wsize = CIFS_DEFAULT_IOSIZE; | ||
3094 | else | ||
3095 | wsize = CIFS_DEFAULT_NON_POSIX_WSIZE; | ||
2929 | 3096 | ||
2930 | /* can server support 24-bit write sizes? (via UNIX extensions) */ | 3097 | /* can server support 24-bit write sizes? (via UNIX extensions) */ |
2931 | if (!tcon->unix_ext || !(unix_cap & CIFS_UNIX_LARGE_WRITE_CAP)) | 3098 | if (!tcon->unix_ext || !(unix_cap & CIFS_UNIX_LARGE_WRITE_CAP)) |
@@ -3136,10 +3303,9 @@ cifs_setup_volume_info(struct smb_vol *volume_info, char *mount_data, | |||
3136 | return -EINVAL; | 3303 | return -EINVAL; |
3137 | 3304 | ||
3138 | if (volume_info->nullauth) { | 3305 | if (volume_info->nullauth) { |
3139 | cFYI(1, "null user"); | 3306 | cFYI(1, "Anonymous login"); |
3140 | volume_info->username = kzalloc(1, GFP_KERNEL); | 3307 | kfree(volume_info->username); |
3141 | if (volume_info->username == NULL) | 3308 | volume_info->username = NULL; |
3142 | return -ENOMEM; | ||
3143 | } else if (volume_info->username) { | 3309 | } else if (volume_info->username) { |
3144 | /* BB fixme parse for domain name here */ | 3310 | /* BB fixme parse for domain name here */ |
3145 | cFYI(1, "Username: %s", volume_info->username); | 3311 | cFYI(1, "Username: %s", volume_info->username); |
@@ -3478,7 +3644,7 @@ CIFSTCon(unsigned int xid, struct cifs_ses *ses, | |||
3478 | if (ses->capabilities & CAP_UNICODE) { | 3644 | if (ses->capabilities & CAP_UNICODE) { |
3479 | smb_buffer->Flags2 |= SMBFLG2_UNICODE; | 3645 | smb_buffer->Flags2 |= SMBFLG2_UNICODE; |
3480 | length = | 3646 | length = |
3481 | cifs_strtoUCS((__le16 *) bcc_ptr, tree, | 3647 | cifs_strtoUTF16((__le16 *) bcc_ptr, tree, |
3482 | 6 /* max utf8 char length in bytes */ * | 3648 | 6 /* max utf8 char length in bytes */ * |
3483 | (/* server len*/ + 256 /* share len */), nls_codepage); | 3649 | (/* server len*/ + 256 /* share len */), nls_codepage); |
3484 | bcc_ptr += 2 * length; /* convert num 16 bit words to bytes */ | 3650 | bcc_ptr += 2 * length; /* convert num 16 bit words to bytes */ |
@@ -3533,7 +3699,7 @@ CIFSTCon(unsigned int xid, struct cifs_ses *ses, | |||
3533 | 3699 | ||
3534 | /* mostly informational -- no need to fail on error here */ | 3700 | /* mostly informational -- no need to fail on error here */ |
3535 | kfree(tcon->nativeFileSystem); | 3701 | kfree(tcon->nativeFileSystem); |
3536 | tcon->nativeFileSystem = cifs_strndup_from_ucs(bcc_ptr, | 3702 | tcon->nativeFileSystem = cifs_strndup_from_utf16(bcc_ptr, |
3537 | bytes_left, is_unicode, | 3703 | bytes_left, is_unicode, |
3538 | nls_codepage); | 3704 | nls_codepage); |
3539 | 3705 | ||
@@ -3657,16 +3823,38 @@ int cifs_setup_session(unsigned int xid, struct cifs_ses *ses, | |||
3657 | return rc; | 3823 | return rc; |
3658 | } | 3824 | } |
3659 | 3825 | ||
3826 | static int | ||
3827 | cifs_set_vol_auth(struct smb_vol *vol, struct cifs_ses *ses) | ||
3828 | { | ||
3829 | switch (ses->server->secType) { | ||
3830 | case Kerberos: | ||
3831 | vol->secFlg = CIFSSEC_MUST_KRB5; | ||
3832 | return 0; | ||
3833 | case NTLMv2: | ||
3834 | vol->secFlg = CIFSSEC_MUST_NTLMV2; | ||
3835 | break; | ||
3836 | case NTLM: | ||
3837 | vol->secFlg = CIFSSEC_MUST_NTLM; | ||
3838 | break; | ||
3839 | case RawNTLMSSP: | ||
3840 | vol->secFlg = CIFSSEC_MUST_NTLMSSP; | ||
3841 | break; | ||
3842 | case LANMAN: | ||
3843 | vol->secFlg = CIFSSEC_MUST_LANMAN; | ||
3844 | break; | ||
3845 | } | ||
3846 | |||
3847 | return cifs_set_cifscreds(vol, ses); | ||
3848 | } | ||
3849 | |||
3660 | static struct cifs_tcon * | 3850 | static struct cifs_tcon * |
3661 | cifs_construct_tcon(struct cifs_sb_info *cifs_sb, uid_t fsuid) | 3851 | cifs_construct_tcon(struct cifs_sb_info *cifs_sb, uid_t fsuid) |
3662 | { | 3852 | { |
3853 | int rc; | ||
3663 | struct cifs_tcon *master_tcon = cifs_sb_master_tcon(cifs_sb); | 3854 | struct cifs_tcon *master_tcon = cifs_sb_master_tcon(cifs_sb); |
3664 | struct cifs_ses *ses; | 3855 | struct cifs_ses *ses; |
3665 | struct cifs_tcon *tcon = NULL; | 3856 | struct cifs_tcon *tcon = NULL; |
3666 | struct smb_vol *vol_info; | 3857 | struct smb_vol *vol_info; |
3667 | char username[28]; /* big enough for "krb50x" + hex of ULONG_MAX 6+16 */ | ||
3668 | /* We used to have this as MAX_USERNAME which is */ | ||
3669 | /* way too big now (256 instead of 32) */ | ||
3670 | 3858 | ||
3671 | vol_info = kzalloc(sizeof(*vol_info), GFP_KERNEL); | 3859 | vol_info = kzalloc(sizeof(*vol_info), GFP_KERNEL); |
3672 | if (vol_info == NULL) { | 3860 | if (vol_info == NULL) { |
@@ -3674,8 +3862,6 @@ cifs_construct_tcon(struct cifs_sb_info *cifs_sb, uid_t fsuid) | |||
3674 | goto out; | 3862 | goto out; |
3675 | } | 3863 | } |
3676 | 3864 | ||
3677 | snprintf(username, sizeof(username), "krb50x%x", fsuid); | ||
3678 | vol_info->username = username; | ||
3679 | vol_info->local_nls = cifs_sb->local_nls; | 3865 | vol_info->local_nls = cifs_sb->local_nls; |
3680 | vol_info->linux_uid = fsuid; | 3866 | vol_info->linux_uid = fsuid; |
3681 | vol_info->cred_uid = fsuid; | 3867 | vol_info->cred_uid = fsuid; |
@@ -3685,8 +3871,11 @@ cifs_construct_tcon(struct cifs_sb_info *cifs_sb, uid_t fsuid) | |||
3685 | vol_info->local_lease = master_tcon->local_lease; | 3871 | vol_info->local_lease = master_tcon->local_lease; |
3686 | vol_info->no_linux_ext = !master_tcon->unix_ext; | 3872 | vol_info->no_linux_ext = !master_tcon->unix_ext; |
3687 | 3873 | ||
3688 | /* FIXME: allow for other secFlg settings */ | 3874 | rc = cifs_set_vol_auth(vol_info, master_tcon->ses); |
3689 | vol_info->secFlg = CIFSSEC_MUST_KRB5; | 3875 | if (rc) { |
3876 | tcon = ERR_PTR(rc); | ||
3877 | goto out; | ||
3878 | } | ||
3690 | 3879 | ||
3691 | /* get a reference for the same TCP session */ | 3880 | /* get a reference for the same TCP session */ |
3692 | spin_lock(&cifs_tcp_ses_lock); | 3881 | spin_lock(&cifs_tcp_ses_lock); |
@@ -3709,6 +3898,8 @@ cifs_construct_tcon(struct cifs_sb_info *cifs_sb, uid_t fsuid) | |||
3709 | if (ses->capabilities & CAP_UNIX) | 3898 | if (ses->capabilities & CAP_UNIX) |
3710 | reset_cifs_unix_caps(0, tcon, NULL, vol_info); | 3899 | reset_cifs_unix_caps(0, tcon, NULL, vol_info); |
3711 | out: | 3900 | out: |
3901 | kfree(vol_info->username); | ||
3902 | kfree(vol_info->password); | ||
3712 | kfree(vol_info); | 3903 | kfree(vol_info); |
3713 | 3904 | ||
3714 | return tcon; | 3905 | return tcon; |
diff --git a/fs/cifs/readdir.c b/fs/cifs/readdir.c index a090bbe6ee29..e2bbc683e018 100644 --- a/fs/cifs/readdir.c +++ b/fs/cifs/readdir.c | |||
@@ -647,10 +647,11 @@ static int cifs_filldir(char *find_entry, struct file *file, filldir_t filldir, | |||
647 | 647 | ||
648 | name.name = scratch_buf; | 648 | name.name = scratch_buf; |
649 | name.len = | 649 | name.len = |
650 | cifs_from_ucs2((char *)name.name, (__le16 *)de.name, | 650 | cifs_from_utf16((char *)name.name, (__le16 *)de.name, |
651 | UNICODE_NAME_MAX, | 651 | UNICODE_NAME_MAX, |
652 | min(de.namelen, (size_t)max_len), nlt, | 652 | min_t(size_t, de.namelen, |
653 | cifs_sb->mnt_cifs_flags & | 653 | (size_t)max_len), nlt, |
654 | cifs_sb->mnt_cifs_flags & | ||
654 | CIFS_MOUNT_MAP_SPECIAL_CHR); | 655 | CIFS_MOUNT_MAP_SPECIAL_CHR); |
655 | name.len -= nls_nullsize(nlt); | 656 | name.len -= nls_nullsize(nlt); |
656 | } else { | 657 | } else { |
diff --git a/fs/cifs/sess.c b/fs/cifs/sess.c index 4ec3ee9d72cc..d85efad5765f 100644 --- a/fs/cifs/sess.c +++ b/fs/cifs/sess.c | |||
@@ -167,16 +167,16 @@ unicode_oslm_strings(char **pbcc_area, const struct nls_table *nls_cp) | |||
167 | int bytes_ret = 0; | 167 | int bytes_ret = 0; |
168 | 168 | ||
169 | /* Copy OS version */ | 169 | /* Copy OS version */ |
170 | bytes_ret = cifs_strtoUCS((__le16 *)bcc_ptr, "Linux version ", 32, | 170 | bytes_ret = cifs_strtoUTF16((__le16 *)bcc_ptr, "Linux version ", 32, |
171 | nls_cp); | 171 | nls_cp); |
172 | bcc_ptr += 2 * bytes_ret; | 172 | bcc_ptr += 2 * bytes_ret; |
173 | bytes_ret = cifs_strtoUCS((__le16 *) bcc_ptr, init_utsname()->release, | 173 | bytes_ret = cifs_strtoUTF16((__le16 *) bcc_ptr, init_utsname()->release, |
174 | 32, nls_cp); | 174 | 32, nls_cp); |
175 | bcc_ptr += 2 * bytes_ret; | 175 | bcc_ptr += 2 * bytes_ret; |
176 | bcc_ptr += 2; /* trailing null */ | 176 | bcc_ptr += 2; /* trailing null */ |
177 | 177 | ||
178 | bytes_ret = cifs_strtoUCS((__le16 *) bcc_ptr, CIFS_NETWORK_OPSYS, | 178 | bytes_ret = cifs_strtoUTF16((__le16 *) bcc_ptr, CIFS_NETWORK_OPSYS, |
179 | 32, nls_cp); | 179 | 32, nls_cp); |
180 | bcc_ptr += 2 * bytes_ret; | 180 | bcc_ptr += 2 * bytes_ret; |
181 | bcc_ptr += 2; /* trailing null */ | 181 | bcc_ptr += 2; /* trailing null */ |
182 | 182 | ||
@@ -197,8 +197,8 @@ static void unicode_domain_string(char **pbcc_area, struct cifs_ses *ses, | |||
197 | *(bcc_ptr+1) = 0; | 197 | *(bcc_ptr+1) = 0; |
198 | bytes_ret = 0; | 198 | bytes_ret = 0; |
199 | } else | 199 | } else |
200 | bytes_ret = cifs_strtoUCS((__le16 *) bcc_ptr, ses->domainName, | 200 | bytes_ret = cifs_strtoUTF16((__le16 *) bcc_ptr, ses->domainName, |
201 | 256, nls_cp); | 201 | 256, nls_cp); |
202 | bcc_ptr += 2 * bytes_ret; | 202 | bcc_ptr += 2 * bytes_ret; |
203 | bcc_ptr += 2; /* account for null terminator */ | 203 | bcc_ptr += 2; /* account for null terminator */ |
204 | 204 | ||
@@ -226,8 +226,8 @@ static void unicode_ssetup_strings(char **pbcc_area, struct cifs_ses *ses, | |||
226 | *bcc_ptr = 0; | 226 | *bcc_ptr = 0; |
227 | *(bcc_ptr+1) = 0; | 227 | *(bcc_ptr+1) = 0; |
228 | } else { | 228 | } else { |
229 | bytes_ret = cifs_strtoUCS((__le16 *) bcc_ptr, ses->user_name, | 229 | bytes_ret = cifs_strtoUTF16((__le16 *) bcc_ptr, ses->user_name, |
230 | MAX_USERNAME_SIZE, nls_cp); | 230 | MAX_USERNAME_SIZE, nls_cp); |
231 | } | 231 | } |
232 | bcc_ptr += 2 * bytes_ret; | 232 | bcc_ptr += 2 * bytes_ret; |
233 | bcc_ptr += 2; /* account for null termination */ | 233 | bcc_ptr += 2; /* account for null termination */ |
@@ -287,7 +287,7 @@ decode_unicode_ssetup(char **pbcc_area, int bleft, struct cifs_ses *ses, | |||
287 | cFYI(1, "bleft %d", bleft); | 287 | cFYI(1, "bleft %d", bleft); |
288 | 288 | ||
289 | kfree(ses->serverOS); | 289 | kfree(ses->serverOS); |
290 | ses->serverOS = cifs_strndup_from_ucs(data, bleft, true, nls_cp); | 290 | ses->serverOS = cifs_strndup_from_utf16(data, bleft, true, nls_cp); |
291 | cFYI(1, "serverOS=%s", ses->serverOS); | 291 | cFYI(1, "serverOS=%s", ses->serverOS); |
292 | len = (UniStrnlen((wchar_t *) data, bleft / 2) * 2) + 2; | 292 | len = (UniStrnlen((wchar_t *) data, bleft / 2) * 2) + 2; |
293 | data += len; | 293 | data += len; |
@@ -296,7 +296,7 @@ decode_unicode_ssetup(char **pbcc_area, int bleft, struct cifs_ses *ses, | |||
296 | return; | 296 | return; |
297 | 297 | ||
298 | kfree(ses->serverNOS); | 298 | kfree(ses->serverNOS); |
299 | ses->serverNOS = cifs_strndup_from_ucs(data, bleft, true, nls_cp); | 299 | ses->serverNOS = cifs_strndup_from_utf16(data, bleft, true, nls_cp); |
300 | cFYI(1, "serverNOS=%s", ses->serverNOS); | 300 | cFYI(1, "serverNOS=%s", ses->serverNOS); |
301 | len = (UniStrnlen((wchar_t *) data, bleft / 2) * 2) + 2; | 301 | len = (UniStrnlen((wchar_t *) data, bleft / 2) * 2) + 2; |
302 | data += len; | 302 | data += len; |
@@ -305,7 +305,7 @@ decode_unicode_ssetup(char **pbcc_area, int bleft, struct cifs_ses *ses, | |||
305 | return; | 305 | return; |
306 | 306 | ||
307 | kfree(ses->serverDomain); | 307 | kfree(ses->serverDomain); |
308 | ses->serverDomain = cifs_strndup_from_ucs(data, bleft, true, nls_cp); | 308 | ses->serverDomain = cifs_strndup_from_utf16(data, bleft, true, nls_cp); |
309 | cFYI(1, "serverDomain=%s", ses->serverDomain); | 309 | cFYI(1, "serverDomain=%s", ses->serverDomain); |
310 | 310 | ||
311 | return; | 311 | return; |
@@ -502,8 +502,8 @@ static int build_ntlmssp_auth_blob(unsigned char *pbuffer, | |||
502 | tmp += 2; | 502 | tmp += 2; |
503 | } else { | 503 | } else { |
504 | int len; | 504 | int len; |
505 | len = cifs_strtoUCS((__le16 *)tmp, ses->domainName, | 505 | len = cifs_strtoUTF16((__le16 *)tmp, ses->domainName, |
506 | MAX_USERNAME_SIZE, nls_cp); | 506 | MAX_USERNAME_SIZE, nls_cp); |
507 | len *= 2; /* unicode is 2 bytes each */ | 507 | len *= 2; /* unicode is 2 bytes each */ |
508 | sec_blob->DomainName.BufferOffset = cpu_to_le32(tmp - pbuffer); | 508 | sec_blob->DomainName.BufferOffset = cpu_to_le32(tmp - pbuffer); |
509 | sec_blob->DomainName.Length = cpu_to_le16(len); | 509 | sec_blob->DomainName.Length = cpu_to_le16(len); |
@@ -518,8 +518,8 @@ static int build_ntlmssp_auth_blob(unsigned char *pbuffer, | |||
518 | tmp += 2; | 518 | tmp += 2; |
519 | } else { | 519 | } else { |
520 | int len; | 520 | int len; |
521 | len = cifs_strtoUCS((__le16 *)tmp, ses->user_name, | 521 | len = cifs_strtoUTF16((__le16 *)tmp, ses->user_name, |
522 | MAX_USERNAME_SIZE, nls_cp); | 522 | MAX_USERNAME_SIZE, nls_cp); |
523 | len *= 2; /* unicode is 2 bytes each */ | 523 | len *= 2; /* unicode is 2 bytes each */ |
524 | sec_blob->UserName.BufferOffset = cpu_to_le32(tmp - pbuffer); | 524 | sec_blob->UserName.BufferOffset = cpu_to_le32(tmp - pbuffer); |
525 | sec_blob->UserName.Length = cpu_to_le16(len); | 525 | sec_blob->UserName.Length = cpu_to_le16(len); |
diff --git a/fs/cifs/smbencrypt.c b/fs/cifs/smbencrypt.c index 80d850881938..d5cd9aa7eacc 100644 --- a/fs/cifs/smbencrypt.c +++ b/fs/cifs/smbencrypt.c | |||
@@ -213,7 +213,7 @@ E_md4hash(const unsigned char *passwd, unsigned char *p16, | |||
213 | 213 | ||
214 | /* Password cannot be longer than 128 characters */ | 214 | /* Password cannot be longer than 128 characters */ |
215 | if (passwd) /* Password must be converted to NT unicode */ | 215 | if (passwd) /* Password must be converted to NT unicode */ |
216 | len = cifs_strtoUCS(wpwd, passwd, 128, codepage); | 216 | len = cifs_strtoUTF16(wpwd, passwd, 128, codepage); |
217 | else { | 217 | else { |
218 | len = 0; | 218 | len = 0; |
219 | *wpwd = 0; /* Ensure string is null terminated */ | 219 | *wpwd = 0; /* Ensure string is null terminated */ |
diff --git a/fs/coda/cnode.c b/fs/coda/cnode.c index 6475877b0763..911cf30d057d 100644 --- a/fs/coda/cnode.c +++ b/fs/coda/cnode.c | |||
@@ -88,24 +88,21 @@ struct inode * coda_iget(struct super_block * sb, struct CodaFid * fid, | |||
88 | - link the two up if this is needed | 88 | - link the two up if this is needed |
89 | - fill in the attributes | 89 | - fill in the attributes |
90 | */ | 90 | */ |
91 | int coda_cnode_make(struct inode **inode, struct CodaFid *fid, struct super_block *sb) | 91 | struct inode *coda_cnode_make(struct CodaFid *fid, struct super_block *sb) |
92 | { | 92 | { |
93 | struct coda_vattr attr; | 93 | struct coda_vattr attr; |
94 | struct inode *inode; | ||
94 | int error; | 95 | int error; |
95 | 96 | ||
96 | /* We get inode numbers from Venus -- see venus source */ | 97 | /* We get inode numbers from Venus -- see venus source */ |
97 | error = venus_getattr(sb, fid, &attr); | 98 | error = venus_getattr(sb, fid, &attr); |
98 | if ( error ) { | 99 | if (error) |
99 | *inode = NULL; | 100 | return ERR_PTR(error); |
100 | return error; | ||
101 | } | ||
102 | 101 | ||
103 | *inode = coda_iget(sb, fid, &attr); | 102 | inode = coda_iget(sb, fid, &attr); |
104 | if ( IS_ERR(*inode) ) { | 103 | if (IS_ERR(inode)) |
105 | printk("coda_cnode_make: coda_iget failed\n"); | 104 | printk("coda_cnode_make: coda_iget failed\n"); |
106 | return PTR_ERR(*inode); | 105 | return inode; |
107 | } | ||
108 | return 0; | ||
109 | } | 106 | } |
110 | 107 | ||
111 | 108 | ||
@@ -156,19 +153,16 @@ struct inode *coda_fid_to_inode(struct CodaFid *fid, struct super_block *sb) | |||
156 | } | 153 | } |
157 | 154 | ||
158 | /* the CONTROL inode is made without asking attributes from Venus */ | 155 | /* the CONTROL inode is made without asking attributes from Venus */ |
159 | int coda_cnode_makectl(struct inode **inode, struct super_block *sb) | 156 | struct inode *coda_cnode_makectl(struct super_block *sb) |
160 | { | 157 | { |
161 | int error = -ENOMEM; | 158 | struct inode *inode = new_inode(sb); |
162 | 159 | if (inode) { | |
163 | *inode = new_inode(sb); | 160 | inode->i_ino = CTL_INO; |
164 | if (*inode) { | 161 | inode->i_op = &coda_ioctl_inode_operations; |
165 | (*inode)->i_ino = CTL_INO; | 162 | inode->i_fop = &coda_ioctl_operations; |
166 | (*inode)->i_op = &coda_ioctl_inode_operations; | 163 | inode->i_mode = 0444; |
167 | (*inode)->i_fop = &coda_ioctl_operations; | 164 | return inode; |
168 | (*inode)->i_mode = 0444; | ||
169 | error = 0; | ||
170 | } | 165 | } |
171 | 166 | return ERR_PTR(-ENOMEM); | |
172 | return error; | ||
173 | } | 167 | } |
174 | 168 | ||
diff --git a/fs/coda/coda_fs_i.h b/fs/coda/coda_fs_i.h index e35071b1de0e..b24fdfd8a3f0 100644 --- a/fs/coda/coda_fs_i.h +++ b/fs/coda/coda_fs_i.h | |||
@@ -49,9 +49,9 @@ struct coda_file_info { | |||
49 | #define C_DYING 0x4 /* from venus (which died) */ | 49 | #define C_DYING 0x4 /* from venus (which died) */ |
50 | #define C_PURGE 0x8 | 50 | #define C_PURGE 0x8 |
51 | 51 | ||
52 | int coda_cnode_make(struct inode **, struct CodaFid *, struct super_block *); | 52 | struct inode *coda_cnode_make(struct CodaFid *, struct super_block *); |
53 | struct inode *coda_iget(struct super_block *sb, struct CodaFid *fid, struct coda_vattr *attr); | 53 | struct inode *coda_iget(struct super_block *sb, struct CodaFid *fid, struct coda_vattr *attr); |
54 | int coda_cnode_makectl(struct inode **inode, struct super_block *sb); | 54 | struct inode *coda_cnode_makectl(struct super_block *sb); |
55 | struct inode *coda_fid_to_inode(struct CodaFid *fid, struct super_block *sb); | 55 | struct inode *coda_fid_to_inode(struct CodaFid *fid, struct super_block *sb); |
56 | void coda_replace_fid(struct inode *, struct CodaFid *, struct CodaFid *); | 56 | void coda_replace_fid(struct inode *, struct CodaFid *, struct CodaFid *); |
57 | 57 | ||
diff --git a/fs/coda/dir.c b/fs/coda/dir.c index 83d2fd8ec24b..177515829062 100644 --- a/fs/coda/dir.c +++ b/fs/coda/dir.c | |||
@@ -96,12 +96,11 @@ const struct file_operations coda_dir_operations = { | |||
96 | /* access routines: lookup, readlink, permission */ | 96 | /* access routines: lookup, readlink, permission */ |
97 | static struct dentry *coda_lookup(struct inode *dir, struct dentry *entry, struct nameidata *nd) | 97 | static struct dentry *coda_lookup(struct inode *dir, struct dentry *entry, struct nameidata *nd) |
98 | { | 98 | { |
99 | struct inode *inode = NULL; | 99 | struct super_block *sb = dir->i_sb; |
100 | struct CodaFid resfid = { { 0, } }; | ||
101 | int type = 0; | ||
102 | int error = 0; | ||
103 | const char *name = entry->d_name.name; | 100 | const char *name = entry->d_name.name; |
104 | size_t length = entry->d_name.len; | 101 | size_t length = entry->d_name.len; |
102 | struct inode *inode; | ||
103 | int type = 0; | ||
105 | 104 | ||
106 | if (length > CODA_MAXNAMLEN) { | 105 | if (length > CODA_MAXNAMLEN) { |
107 | printk(KERN_ERR "name too long: lookup, %s (%*s)\n", | 106 | printk(KERN_ERR "name too long: lookup, %s (%*s)\n", |
@@ -111,23 +110,21 @@ static struct dentry *coda_lookup(struct inode *dir, struct dentry *entry, struc | |||
111 | 110 | ||
112 | /* control object, create inode on the fly */ | 111 | /* control object, create inode on the fly */ |
113 | if (coda_isroot(dir) && coda_iscontrol(name, length)) { | 112 | if (coda_isroot(dir) && coda_iscontrol(name, length)) { |
114 | error = coda_cnode_makectl(&inode, dir->i_sb); | 113 | inode = coda_cnode_makectl(sb); |
115 | type = CODA_NOCACHE; | 114 | type = CODA_NOCACHE; |
116 | goto exit; | 115 | } else { |
116 | struct CodaFid fid = { { 0, } }; | ||
117 | int error = venus_lookup(sb, coda_i2f(dir), name, length, | ||
118 | &type, &fid); | ||
119 | inode = !error ? coda_cnode_make(&fid, sb) : ERR_PTR(error); | ||
117 | } | 120 | } |
118 | 121 | ||
119 | error = venus_lookup(dir->i_sb, coda_i2f(dir), name, length, | 122 | if (!IS_ERR(inode) && (type & CODA_NOCACHE)) |
120 | &type, &resfid); | ||
121 | if (!error) | ||
122 | error = coda_cnode_make(&inode, &resfid, dir->i_sb); | ||
123 | |||
124 | if (error && error != -ENOENT) | ||
125 | return ERR_PTR(error); | ||
126 | |||
127 | exit: | ||
128 | if (inode && (type & CODA_NOCACHE)) | ||
129 | coda_flag_inode(inode, C_VATTR | C_PURGE); | 123 | coda_flag_inode(inode, C_VATTR | C_PURGE); |
130 | 124 | ||
125 | if (inode == ERR_PTR(-ENOENT)) | ||
126 | inode = NULL; | ||
127 | |||
131 | return d_splice_alias(inode, entry); | 128 | return d_splice_alias(inode, entry); |
132 | } | 129 | } |
133 | 130 | ||
diff --git a/fs/coda/inode.c b/fs/coda/inode.c index 1c08a8cd673a..5e2e1b3f068d 100644 --- a/fs/coda/inode.c +++ b/fs/coda/inode.c | |||
@@ -204,10 +204,12 @@ static int coda_fill_super(struct super_block *sb, void *data, int silent) | |||
204 | printk("coda_read_super: rootfid is %s\n", coda_f2s(&fid)); | 204 | printk("coda_read_super: rootfid is %s\n", coda_f2s(&fid)); |
205 | 205 | ||
206 | /* make root inode */ | 206 | /* make root inode */ |
207 | error = coda_cnode_make(&root, &fid, sb); | 207 | root = coda_cnode_make(&fid, sb); |
208 | if ( error || !root ) { | 208 | if (IS_ERR(root)) { |
209 | printk("Failure of coda_cnode_make for root: error %d\n", error); | 209 | error = PTR_ERR(root); |
210 | goto error; | 210 | printk("Failure of coda_cnode_make for root: error %d\n", error); |
211 | root = NULL; | ||
212 | goto error; | ||
211 | } | 213 | } |
212 | 214 | ||
213 | printk("coda_read_super: rootinode is %ld dev %s\n", | 215 | printk("coda_read_super: rootinode is %ld dev %s\n", |
diff --git a/fs/compat_ioctl.c b/fs/compat_ioctl.c index a10e428b32b4..a26bea10e81b 100644 --- a/fs/compat_ioctl.c +++ b/fs/compat_ioctl.c | |||
@@ -105,6 +105,7 @@ | |||
105 | 105 | ||
106 | #include <linux/hiddev.h> | 106 | #include <linux/hiddev.h> |
107 | 107 | ||
108 | #define __DVB_CORE__ | ||
108 | #include <linux/dvb/audio.h> | 109 | #include <linux/dvb/audio.h> |
109 | #include <linux/dvb/dmx.h> | 110 | #include <linux/dvb/dmx.h> |
110 | #include <linux/dvb/frontend.h> | 111 | #include <linux/dvb/frontend.h> |
diff --git a/fs/dcache.c b/fs/dcache.c index 9791b1e7eee4..16a53cc2cc02 100644 --- a/fs/dcache.c +++ b/fs/dcache.c | |||
@@ -243,6 +243,7 @@ static void dentry_lru_add(struct dentry *dentry) | |||
243 | static void __dentry_lru_del(struct dentry *dentry) | 243 | static void __dentry_lru_del(struct dentry *dentry) |
244 | { | 244 | { |
245 | list_del_init(&dentry->d_lru); | 245 | list_del_init(&dentry->d_lru); |
246 | dentry->d_flags &= ~DCACHE_SHRINK_LIST; | ||
246 | dentry->d_sb->s_nr_dentry_unused--; | 247 | dentry->d_sb->s_nr_dentry_unused--; |
247 | dentry_stat.nr_unused--; | 248 | dentry_stat.nr_unused--; |
248 | } | 249 | } |
@@ -276,15 +277,15 @@ static void dentry_lru_prune(struct dentry *dentry) | |||
276 | } | 277 | } |
277 | } | 278 | } |
278 | 279 | ||
279 | static void dentry_lru_move_tail(struct dentry *dentry) | 280 | static void dentry_lru_move_list(struct dentry *dentry, struct list_head *list) |
280 | { | 281 | { |
281 | spin_lock(&dcache_lru_lock); | 282 | spin_lock(&dcache_lru_lock); |
282 | if (list_empty(&dentry->d_lru)) { | 283 | if (list_empty(&dentry->d_lru)) { |
283 | list_add_tail(&dentry->d_lru, &dentry->d_sb->s_dentry_lru); | 284 | list_add_tail(&dentry->d_lru, list); |
284 | dentry->d_sb->s_nr_dentry_unused++; | 285 | dentry->d_sb->s_nr_dentry_unused++; |
285 | dentry_stat.nr_unused++; | 286 | dentry_stat.nr_unused++; |
286 | } else { | 287 | } else { |
287 | list_move_tail(&dentry->d_lru, &dentry->d_sb->s_dentry_lru); | 288 | list_move_tail(&dentry->d_lru, list); |
288 | } | 289 | } |
289 | spin_unlock(&dcache_lru_lock); | 290 | spin_unlock(&dcache_lru_lock); |
290 | } | 291 | } |
@@ -770,14 +771,18 @@ static void shrink_dentry_list(struct list_head *list) | |||
770 | } | 771 | } |
771 | 772 | ||
772 | /** | 773 | /** |
773 | * __shrink_dcache_sb - shrink the dentry LRU on a given superblock | 774 | * prune_dcache_sb - shrink the dcache |
774 | * @sb: superblock to shrink dentry LRU. | 775 | * @sb: superblock |
775 | * @count: number of entries to prune | 776 | * @count: number of entries to try to free |
776 | * @flags: flags to control the dentry processing | 777 | * |
778 | * Attempt to shrink the superblock dcache LRU by @count entries. This is | ||
779 | * done when we need more memory an called from the superblock shrinker | ||
780 | * function. | ||
777 | * | 781 | * |
778 | * If flags contains DCACHE_REFERENCED reference dentries will not be pruned. | 782 | * This function may fail to free any resources if all the dentries are in |
783 | * use. | ||
779 | */ | 784 | */ |
780 | static void __shrink_dcache_sb(struct super_block *sb, int count, int flags) | 785 | void prune_dcache_sb(struct super_block *sb, int count) |
781 | { | 786 | { |
782 | struct dentry *dentry; | 787 | struct dentry *dentry; |
783 | LIST_HEAD(referenced); | 788 | LIST_HEAD(referenced); |
@@ -796,18 +801,13 @@ relock: | |||
796 | goto relock; | 801 | goto relock; |
797 | } | 802 | } |
798 | 803 | ||
799 | /* | 804 | if (dentry->d_flags & DCACHE_REFERENCED) { |
800 | * If we are honouring the DCACHE_REFERENCED flag and the | ||
801 | * dentry has this flag set, don't free it. Clear the flag | ||
802 | * and put it back on the LRU. | ||
803 | */ | ||
804 | if (flags & DCACHE_REFERENCED && | ||
805 | dentry->d_flags & DCACHE_REFERENCED) { | ||
806 | dentry->d_flags &= ~DCACHE_REFERENCED; | 805 | dentry->d_flags &= ~DCACHE_REFERENCED; |
807 | list_move(&dentry->d_lru, &referenced); | 806 | list_move(&dentry->d_lru, &referenced); |
808 | spin_unlock(&dentry->d_lock); | 807 | spin_unlock(&dentry->d_lock); |
809 | } else { | 808 | } else { |
810 | list_move_tail(&dentry->d_lru, &tmp); | 809 | list_move_tail(&dentry->d_lru, &tmp); |
810 | dentry->d_flags |= DCACHE_SHRINK_LIST; | ||
811 | spin_unlock(&dentry->d_lock); | 811 | spin_unlock(&dentry->d_lock); |
812 | if (!--count) | 812 | if (!--count) |
813 | break; | 813 | break; |
@@ -822,23 +822,6 @@ relock: | |||
822 | } | 822 | } |
823 | 823 | ||
824 | /** | 824 | /** |
825 | * prune_dcache_sb - shrink the dcache | ||
826 | * @sb: superblock | ||
827 | * @nr_to_scan: number of entries to try to free | ||
828 | * | ||
829 | * Attempt to shrink the superblock dcache LRU by @nr_to_scan entries. This is | ||
830 | * done when we need more memory an called from the superblock shrinker | ||
831 | * function. | ||
832 | * | ||
833 | * This function may fail to free any resources if all the dentries are in | ||
834 | * use. | ||
835 | */ | ||
836 | void prune_dcache_sb(struct super_block *sb, int nr_to_scan) | ||
837 | { | ||
838 | __shrink_dcache_sb(sb, nr_to_scan, DCACHE_REFERENCED); | ||
839 | } | ||
840 | |||
841 | /** | ||
842 | * shrink_dcache_sb - shrink dcache for a superblock | 825 | * shrink_dcache_sb - shrink dcache for a superblock |
843 | * @sb: superblock | 826 | * @sb: superblock |
844 | * | 827 | * |
@@ -1092,7 +1075,7 @@ EXPORT_SYMBOL(have_submounts); | |||
1092 | * drop the lock and return early due to latency | 1075 | * drop the lock and return early due to latency |
1093 | * constraints. | 1076 | * constraints. |
1094 | */ | 1077 | */ |
1095 | static int select_parent(struct dentry * parent) | 1078 | static int select_parent(struct dentry *parent, struct list_head *dispose) |
1096 | { | 1079 | { |
1097 | struct dentry *this_parent; | 1080 | struct dentry *this_parent; |
1098 | struct list_head *next; | 1081 | struct list_head *next; |
@@ -1114,17 +1097,21 @@ resume: | |||
1114 | 1097 | ||
1115 | spin_lock_nested(&dentry->d_lock, DENTRY_D_LOCK_NESTED); | 1098 | spin_lock_nested(&dentry->d_lock, DENTRY_D_LOCK_NESTED); |
1116 | 1099 | ||
1117 | /* | 1100 | /* |
1118 | * move only zero ref count dentries to the end | 1101 | * move only zero ref count dentries to the dispose list. |
1119 | * of the unused list for prune_dcache | 1102 | * |
1103 | * Those which are presently on the shrink list, being processed | ||
1104 | * by shrink_dentry_list(), shouldn't be moved. Otherwise the | ||
1105 | * loop in shrink_dcache_parent() might not make any progress | ||
1106 | * and loop forever. | ||
1120 | */ | 1107 | */ |
1121 | if (!dentry->d_count) { | 1108 | if (dentry->d_count) { |
1122 | dentry_lru_move_tail(dentry); | ||
1123 | found++; | ||
1124 | } else { | ||
1125 | dentry_lru_del(dentry); | 1109 | dentry_lru_del(dentry); |
1110 | } else if (!(dentry->d_flags & DCACHE_SHRINK_LIST)) { | ||
1111 | dentry_lru_move_list(dentry, dispose); | ||
1112 | dentry->d_flags |= DCACHE_SHRINK_LIST; | ||
1113 | found++; | ||
1126 | } | 1114 | } |
1127 | |||
1128 | /* | 1115 | /* |
1129 | * We can return to the caller if we have found some (this | 1116 | * We can return to the caller if we have found some (this |
1130 | * ensures forward progress). We'll be coming back to find | 1117 | * ensures forward progress). We'll be coming back to find |
@@ -1181,14 +1168,13 @@ rename_retry: | |||
1181 | * | 1168 | * |
1182 | * Prune the dcache to remove unused children of the parent dentry. | 1169 | * Prune the dcache to remove unused children of the parent dentry. |
1183 | */ | 1170 | */ |
1184 | |||
1185 | void shrink_dcache_parent(struct dentry * parent) | 1171 | void shrink_dcache_parent(struct dentry * parent) |
1186 | { | 1172 | { |
1187 | struct super_block *sb = parent->d_sb; | 1173 | LIST_HEAD(dispose); |
1188 | int found; | 1174 | int found; |
1189 | 1175 | ||
1190 | while ((found = select_parent(parent)) != 0) | 1176 | while ((found = select_parent(parent, &dispose)) != 0) |
1191 | __shrink_dcache_sb(sb, found, 0); | 1177 | shrink_dentry_list(&dispose); |
1192 | } | 1178 | } |
1193 | EXPORT_SYMBOL(shrink_dcache_parent); | 1179 | EXPORT_SYMBOL(shrink_dcache_parent); |
1194 | 1180 | ||
@@ -1461,6 +1447,23 @@ struct dentry * d_alloc_root(struct inode * root_inode) | |||
1461 | } | 1447 | } |
1462 | EXPORT_SYMBOL(d_alloc_root); | 1448 | EXPORT_SYMBOL(d_alloc_root); |
1463 | 1449 | ||
1450 | struct dentry *d_make_root(struct inode *root_inode) | ||
1451 | { | ||
1452 | struct dentry *res = NULL; | ||
1453 | |||
1454 | if (root_inode) { | ||
1455 | static const struct qstr name = { .name = "/", .len = 1 }; | ||
1456 | |||
1457 | res = __d_alloc(root_inode->i_sb, &name); | ||
1458 | if (res) | ||
1459 | d_instantiate(res, root_inode); | ||
1460 | else | ||
1461 | iput(root_inode); | ||
1462 | } | ||
1463 | return res; | ||
1464 | } | ||
1465 | EXPORT_SYMBOL(d_make_root); | ||
1466 | |||
1464 | static struct dentry * __d_find_any_alias(struct inode *inode) | 1467 | static struct dentry * __d_find_any_alias(struct inode *inode) |
1465 | { | 1468 | { |
1466 | struct dentry *alias; | 1469 | struct dentry *alias; |
@@ -1472,7 +1475,14 @@ static struct dentry * __d_find_any_alias(struct inode *inode) | |||
1472 | return alias; | 1475 | return alias; |
1473 | } | 1476 | } |
1474 | 1477 | ||
1475 | static struct dentry * d_find_any_alias(struct inode *inode) | 1478 | /** |
1479 | * d_find_any_alias - find any alias for a given inode | ||
1480 | * @inode: inode to find an alias for | ||
1481 | * | ||
1482 | * If any aliases exist for the given inode, take and return a | ||
1483 | * reference for one of them. If no aliases exist, return %NULL. | ||
1484 | */ | ||
1485 | struct dentry *d_find_any_alias(struct inode *inode) | ||
1476 | { | 1486 | { |
1477 | struct dentry *de; | 1487 | struct dentry *de; |
1478 | 1488 | ||
@@ -1481,7 +1491,7 @@ static struct dentry * d_find_any_alias(struct inode *inode) | |||
1481 | spin_unlock(&inode->i_lock); | 1491 | spin_unlock(&inode->i_lock); |
1482 | return de; | 1492 | return de; |
1483 | } | 1493 | } |
1484 | 1494 | EXPORT_SYMBOL(d_find_any_alias); | |
1485 | 1495 | ||
1486 | /** | 1496 | /** |
1487 | * d_obtain_alias - find or allocate a dentry for a given inode | 1497 | * d_obtain_alias - find or allocate a dentry for a given inode |
diff --git a/fs/debugfs/file.c b/fs/debugfs/file.c index f65d4455c5e5..ef023eef0464 100644 --- a/fs/debugfs/file.c +++ b/fs/debugfs/file.c | |||
@@ -540,7 +540,7 @@ EXPORT_SYMBOL_GPL(debugfs_create_blob); | |||
540 | * debugfs_print_regs32 - use seq_print to describe a set of registers | 540 | * debugfs_print_regs32 - use seq_print to describe a set of registers |
541 | * @s: the seq_file structure being used to generate output | 541 | * @s: the seq_file structure being used to generate output |
542 | * @regs: an array if struct debugfs_reg32 structures | 542 | * @regs: an array if struct debugfs_reg32 structures |
543 | * @mregs: the length of the above array | 543 | * @nregs: the length of the above array |
544 | * @base: the base address to be used in reading the registers | 544 | * @base: the base address to be used in reading the registers |
545 | * @prefix: a string to be prefixed to every output line | 545 | * @prefix: a string to be prefixed to every output line |
546 | * | 546 | * |
diff --git a/fs/devpts/inode.c b/fs/devpts/inode.c index 79673eb71151..c4e2a58a2e82 100644 --- a/fs/devpts/inode.c +++ b/fs/devpts/inode.c | |||
@@ -301,7 +301,7 @@ devpts_fill_super(struct super_block *s, void *data, int silent) | |||
301 | 301 | ||
302 | inode = new_inode(s); | 302 | inode = new_inode(s); |
303 | if (!inode) | 303 | if (!inode) |
304 | goto free_fsi; | 304 | goto fail; |
305 | inode->i_ino = 1; | 305 | inode->i_ino = 1; |
306 | inode->i_mtime = inode->i_atime = inode->i_ctime = CURRENT_TIME; | 306 | inode->i_mtime = inode->i_atime = inode->i_ctime = CURRENT_TIME; |
307 | inode->i_mode = S_IFDIR | S_IRUGO | S_IXUGO | S_IWUSR; | 307 | inode->i_mode = S_IFDIR | S_IRUGO | S_IXUGO | S_IWUSR; |
@@ -316,8 +316,6 @@ devpts_fill_super(struct super_block *s, void *data, int silent) | |||
316 | printk(KERN_ERR "devpts: get root dentry failed\n"); | 316 | printk(KERN_ERR "devpts: get root dentry failed\n"); |
317 | iput(inode); | 317 | iput(inode); |
318 | 318 | ||
319 | free_fsi: | ||
320 | kfree(s->s_fs_info); | ||
321 | fail: | 319 | fail: |
322 | return -ENOMEM; | 320 | return -ENOMEM; |
323 | } | 321 | } |
diff --git a/fs/direct-io.c b/fs/direct-io.c index d740ab67ff6e..4a588dbd11bf 100644 --- a/fs/direct-io.c +++ b/fs/direct-io.c | |||
@@ -36,6 +36,7 @@ | |||
36 | #include <linux/rwsem.h> | 36 | #include <linux/rwsem.h> |
37 | #include <linux/uio.h> | 37 | #include <linux/uio.h> |
38 | #include <linux/atomic.h> | 38 | #include <linux/atomic.h> |
39 | #include <linux/prefetch.h> | ||
39 | 40 | ||
40 | /* | 41 | /* |
41 | * How many user pages to map in one call to get_user_pages(). This determines | 42 | * How many user pages to map in one call to get_user_pages(). This determines |
@@ -580,9 +581,8 @@ static int get_more_blocks(struct dio *dio, struct dio_submit *sdio, | |||
580 | { | 581 | { |
581 | int ret; | 582 | int ret; |
582 | sector_t fs_startblk; /* Into file, in filesystem-sized blocks */ | 583 | sector_t fs_startblk; /* Into file, in filesystem-sized blocks */ |
584 | sector_t fs_endblk; /* Into file, in filesystem-sized blocks */ | ||
583 | unsigned long fs_count; /* Number of filesystem-sized blocks */ | 585 | unsigned long fs_count; /* Number of filesystem-sized blocks */ |
584 | unsigned long dio_count;/* Number of dio_block-sized blocks */ | ||
585 | unsigned long blkmask; | ||
586 | int create; | 586 | int create; |
587 | 587 | ||
588 | /* | 588 | /* |
@@ -593,11 +593,9 @@ static int get_more_blocks(struct dio *dio, struct dio_submit *sdio, | |||
593 | if (ret == 0) { | 593 | if (ret == 0) { |
594 | BUG_ON(sdio->block_in_file >= sdio->final_block_in_request); | 594 | BUG_ON(sdio->block_in_file >= sdio->final_block_in_request); |
595 | fs_startblk = sdio->block_in_file >> sdio->blkfactor; | 595 | fs_startblk = sdio->block_in_file >> sdio->blkfactor; |
596 | dio_count = sdio->final_block_in_request - sdio->block_in_file; | 596 | fs_endblk = (sdio->final_block_in_request - 1) >> |
597 | fs_count = dio_count >> sdio->blkfactor; | 597 | sdio->blkfactor; |
598 | blkmask = (1 << sdio->blkfactor) - 1; | 598 | fs_count = fs_endblk - fs_startblk + 1; |
599 | if (dio_count & blkmask) | ||
600 | fs_count++; | ||
601 | 599 | ||
602 | map_bh->b_state = 0; | 600 | map_bh->b_state = 0; |
603 | map_bh->b_size = fs_count << dio->inode->i_blkbits; | 601 | map_bh->b_size = fs_count << dio->inode->i_blkbits; |
@@ -1090,8 +1088,8 @@ static inline int drop_refcount(struct dio *dio) | |||
1090 | * individual fields and will generate much worse code. This is important | 1088 | * individual fields and will generate much worse code. This is important |
1091 | * for the whole file. | 1089 | * for the whole file. |
1092 | */ | 1090 | */ |
1093 | ssize_t | 1091 | static inline ssize_t |
1094 | __blockdev_direct_IO(int rw, struct kiocb *iocb, struct inode *inode, | 1092 | do_blockdev_direct_IO(int rw, struct kiocb *iocb, struct inode *inode, |
1095 | struct block_device *bdev, const struct iovec *iov, loff_t offset, | 1093 | struct block_device *bdev, const struct iovec *iov, loff_t offset, |
1096 | unsigned long nr_segs, get_block_t get_block, dio_iodone_t end_io, | 1094 | unsigned long nr_segs, get_block_t get_block, dio_iodone_t end_io, |
1097 | dio_submit_t submit_io, int flags) | 1095 | dio_submit_t submit_io, int flags) |
@@ -1100,7 +1098,6 @@ __blockdev_direct_IO(int rw, struct kiocb *iocb, struct inode *inode, | |||
1100 | size_t size; | 1098 | size_t size; |
1101 | unsigned long addr; | 1099 | unsigned long addr; |
1102 | unsigned blkbits = inode->i_blkbits; | 1100 | unsigned blkbits = inode->i_blkbits; |
1103 | unsigned bdev_blkbits = 0; | ||
1104 | unsigned blocksize_mask = (1 << blkbits) - 1; | 1101 | unsigned blocksize_mask = (1 << blkbits) - 1; |
1105 | ssize_t retval = -EINVAL; | 1102 | ssize_t retval = -EINVAL; |
1106 | loff_t end = offset; | 1103 | loff_t end = offset; |
@@ -1113,12 +1110,14 @@ __blockdev_direct_IO(int rw, struct kiocb *iocb, struct inode *inode, | |||
1113 | if (rw & WRITE) | 1110 | if (rw & WRITE) |
1114 | rw = WRITE_ODIRECT; | 1111 | rw = WRITE_ODIRECT; |
1115 | 1112 | ||
1116 | if (bdev) | 1113 | /* |
1117 | bdev_blkbits = blksize_bits(bdev_logical_block_size(bdev)); | 1114 | * Avoid references to bdev if not absolutely needed to give |
1115 | * the early prefetch in the caller enough time. | ||
1116 | */ | ||
1118 | 1117 | ||
1119 | if (offset & blocksize_mask) { | 1118 | if (offset & blocksize_mask) { |
1120 | if (bdev) | 1119 | if (bdev) |
1121 | blkbits = bdev_blkbits; | 1120 | blkbits = blksize_bits(bdev_logical_block_size(bdev)); |
1122 | blocksize_mask = (1 << blkbits) - 1; | 1121 | blocksize_mask = (1 << blkbits) - 1; |
1123 | if (offset & blocksize_mask) | 1122 | if (offset & blocksize_mask) |
1124 | goto out; | 1123 | goto out; |
@@ -1129,11 +1128,13 @@ __blockdev_direct_IO(int rw, struct kiocb *iocb, struct inode *inode, | |||
1129 | addr = (unsigned long)iov[seg].iov_base; | 1128 | addr = (unsigned long)iov[seg].iov_base; |
1130 | size = iov[seg].iov_len; | 1129 | size = iov[seg].iov_len; |
1131 | end += size; | 1130 | end += size; |
1132 | if ((addr & blocksize_mask) || (size & blocksize_mask)) { | 1131 | if (unlikely((addr & blocksize_mask) || |
1132 | (size & blocksize_mask))) { | ||
1133 | if (bdev) | 1133 | if (bdev) |
1134 | blkbits = bdev_blkbits; | 1134 | blkbits = blksize_bits( |
1135 | bdev_logical_block_size(bdev)); | ||
1135 | blocksize_mask = (1 << blkbits) - 1; | 1136 | blocksize_mask = (1 << blkbits) - 1; |
1136 | if ((addr & blocksize_mask) || (size & blocksize_mask)) | 1137 | if ((addr & blocksize_mask) || (size & blocksize_mask)) |
1137 | goto out; | 1138 | goto out; |
1138 | } | 1139 | } |
1139 | } | 1140 | } |
@@ -1316,6 +1317,30 @@ __blockdev_direct_IO(int rw, struct kiocb *iocb, struct inode *inode, | |||
1316 | out: | 1317 | out: |
1317 | return retval; | 1318 | return retval; |
1318 | } | 1319 | } |
1320 | |||
1321 | ssize_t | ||
1322 | __blockdev_direct_IO(int rw, struct kiocb *iocb, struct inode *inode, | ||
1323 | struct block_device *bdev, const struct iovec *iov, loff_t offset, | ||
1324 | unsigned long nr_segs, get_block_t get_block, dio_iodone_t end_io, | ||
1325 | dio_submit_t submit_io, int flags) | ||
1326 | { | ||
1327 | /* | ||
1328 | * The block device state is needed in the end to finally | ||
1329 | * submit everything. Since it's likely to be cache cold | ||
1330 | * prefetch it here as first thing to hide some of the | ||
1331 | * latency. | ||
1332 | * | ||
1333 | * Attempt to prefetch the pieces we likely need later. | ||
1334 | */ | ||
1335 | prefetch(&bdev->bd_disk->part_tbl); | ||
1336 | prefetch(bdev->bd_queue); | ||
1337 | prefetch((char *)bdev->bd_queue + SMP_CACHE_BYTES); | ||
1338 | |||
1339 | return do_blockdev_direct_IO(rw, iocb, inode, bdev, iov, offset, | ||
1340 | nr_segs, get_block, end_io, | ||
1341 | submit_io, flags); | ||
1342 | } | ||
1343 | |||
1319 | EXPORT_SYMBOL(__blockdev_direct_IO); | 1344 | EXPORT_SYMBOL(__blockdev_direct_IO); |
1320 | 1345 | ||
1321 | static __init int dio_init(void) | 1346 | static __init int dio_init(void) |
diff --git a/fs/dlm/config.c b/fs/dlm/config.c index 6cf72fcc0d0c..e7e327d43fa5 100644 --- a/fs/dlm/config.c +++ b/fs/dlm/config.c | |||
@@ -2,7 +2,7 @@ | |||
2 | ******************************************************************************* | 2 | ******************************************************************************* |
3 | ** | 3 | ** |
4 | ** Copyright (C) Sistina Software, Inc. 1997-2003 All rights reserved. | 4 | ** Copyright (C) Sistina Software, Inc. 1997-2003 All rights reserved. |
5 | ** Copyright (C) 2004-2008 Red Hat, Inc. All rights reserved. | 5 | ** Copyright (C) 2004-2011 Red Hat, Inc. All rights reserved. |
6 | ** | 6 | ** |
7 | ** This copyrighted material is made available to anyone wishing to use, | 7 | ** This copyrighted material is made available to anyone wishing to use, |
8 | ** modify, copy, or redistribute it subject to the terms and conditions | 8 | ** modify, copy, or redistribute it subject to the terms and conditions |
@@ -17,6 +17,7 @@ | |||
17 | #include <linux/slab.h> | 17 | #include <linux/slab.h> |
18 | #include <linux/in.h> | 18 | #include <linux/in.h> |
19 | #include <linux/in6.h> | 19 | #include <linux/in6.h> |
20 | #include <linux/dlmconstants.h> | ||
20 | #include <net/ipv6.h> | 21 | #include <net/ipv6.h> |
21 | #include <net/sock.h> | 22 | #include <net/sock.h> |
22 | 23 | ||
@@ -36,6 +37,7 @@ | |||
36 | static struct config_group *space_list; | 37 | static struct config_group *space_list; |
37 | static struct config_group *comm_list; | 38 | static struct config_group *comm_list; |
38 | static struct dlm_comm *local_comm; | 39 | static struct dlm_comm *local_comm; |
40 | static uint32_t dlm_comm_count; | ||
39 | 41 | ||
40 | struct dlm_clusters; | 42 | struct dlm_clusters; |
41 | struct dlm_cluster; | 43 | struct dlm_cluster; |
@@ -103,6 +105,8 @@ struct dlm_cluster { | |||
103 | unsigned int cl_timewarn_cs; | 105 | unsigned int cl_timewarn_cs; |
104 | unsigned int cl_waitwarn_us; | 106 | unsigned int cl_waitwarn_us; |
105 | unsigned int cl_new_rsb_count; | 107 | unsigned int cl_new_rsb_count; |
108 | unsigned int cl_recover_callbacks; | ||
109 | char cl_cluster_name[DLM_LOCKSPACE_LEN]; | ||
106 | }; | 110 | }; |
107 | 111 | ||
108 | enum { | 112 | enum { |
@@ -118,6 +122,8 @@ enum { | |||
118 | CLUSTER_ATTR_TIMEWARN_CS, | 122 | CLUSTER_ATTR_TIMEWARN_CS, |
119 | CLUSTER_ATTR_WAITWARN_US, | 123 | CLUSTER_ATTR_WAITWARN_US, |
120 | CLUSTER_ATTR_NEW_RSB_COUNT, | 124 | CLUSTER_ATTR_NEW_RSB_COUNT, |
125 | CLUSTER_ATTR_RECOVER_CALLBACKS, | ||
126 | CLUSTER_ATTR_CLUSTER_NAME, | ||
121 | }; | 127 | }; |
122 | 128 | ||
123 | struct cluster_attribute { | 129 | struct cluster_attribute { |
@@ -126,6 +132,27 @@ struct cluster_attribute { | |||
126 | ssize_t (*store)(struct dlm_cluster *, const char *, size_t); | 132 | ssize_t (*store)(struct dlm_cluster *, const char *, size_t); |
127 | }; | 133 | }; |
128 | 134 | ||
135 | static ssize_t cluster_cluster_name_read(struct dlm_cluster *cl, char *buf) | ||
136 | { | ||
137 | return sprintf(buf, "%s\n", cl->cl_cluster_name); | ||
138 | } | ||
139 | |||
140 | static ssize_t cluster_cluster_name_write(struct dlm_cluster *cl, | ||
141 | const char *buf, size_t len) | ||
142 | { | ||
143 | strncpy(dlm_config.ci_cluster_name, buf, DLM_LOCKSPACE_LEN); | ||
144 | strncpy(cl->cl_cluster_name, buf, DLM_LOCKSPACE_LEN); | ||
145 | return len; | ||
146 | } | ||
147 | |||
148 | static struct cluster_attribute cluster_attr_cluster_name = { | ||
149 | .attr = { .ca_owner = THIS_MODULE, | ||
150 | .ca_name = "cluster_name", | ||
151 | .ca_mode = S_IRUGO | S_IWUSR }, | ||
152 | .show = cluster_cluster_name_read, | ||
153 | .store = cluster_cluster_name_write, | ||
154 | }; | ||
155 | |||
129 | static ssize_t cluster_set(struct dlm_cluster *cl, unsigned int *cl_field, | 156 | static ssize_t cluster_set(struct dlm_cluster *cl, unsigned int *cl_field, |
130 | int *info_field, int check_zero, | 157 | int *info_field, int check_zero, |
131 | const char *buf, size_t len) | 158 | const char *buf, size_t len) |
@@ -171,6 +198,7 @@ CLUSTER_ATTR(protocol, 0); | |||
171 | CLUSTER_ATTR(timewarn_cs, 1); | 198 | CLUSTER_ATTR(timewarn_cs, 1); |
172 | CLUSTER_ATTR(waitwarn_us, 0); | 199 | CLUSTER_ATTR(waitwarn_us, 0); |
173 | CLUSTER_ATTR(new_rsb_count, 0); | 200 | CLUSTER_ATTR(new_rsb_count, 0); |
201 | CLUSTER_ATTR(recover_callbacks, 0); | ||
174 | 202 | ||
175 | static struct configfs_attribute *cluster_attrs[] = { | 203 | static struct configfs_attribute *cluster_attrs[] = { |
176 | [CLUSTER_ATTR_TCP_PORT] = &cluster_attr_tcp_port.attr, | 204 | [CLUSTER_ATTR_TCP_PORT] = &cluster_attr_tcp_port.attr, |
@@ -185,6 +213,8 @@ static struct configfs_attribute *cluster_attrs[] = { | |||
185 | [CLUSTER_ATTR_TIMEWARN_CS] = &cluster_attr_timewarn_cs.attr, | 213 | [CLUSTER_ATTR_TIMEWARN_CS] = &cluster_attr_timewarn_cs.attr, |
186 | [CLUSTER_ATTR_WAITWARN_US] = &cluster_attr_waitwarn_us.attr, | 214 | [CLUSTER_ATTR_WAITWARN_US] = &cluster_attr_waitwarn_us.attr, |
187 | [CLUSTER_ATTR_NEW_RSB_COUNT] = &cluster_attr_new_rsb_count.attr, | 215 | [CLUSTER_ATTR_NEW_RSB_COUNT] = &cluster_attr_new_rsb_count.attr, |
216 | [CLUSTER_ATTR_RECOVER_CALLBACKS] = &cluster_attr_recover_callbacks.attr, | ||
217 | [CLUSTER_ATTR_CLUSTER_NAME] = &cluster_attr_cluster_name.attr, | ||
188 | NULL, | 218 | NULL, |
189 | }; | 219 | }; |
190 | 220 | ||
@@ -293,6 +323,7 @@ struct dlm_comms { | |||
293 | 323 | ||
294 | struct dlm_comm { | 324 | struct dlm_comm { |
295 | struct config_item item; | 325 | struct config_item item; |
326 | int seq; | ||
296 | int nodeid; | 327 | int nodeid; |
297 | int local; | 328 | int local; |
298 | int addr_count; | 329 | int addr_count; |
@@ -309,6 +340,7 @@ struct dlm_node { | |||
309 | int nodeid; | 340 | int nodeid; |
310 | int weight; | 341 | int weight; |
311 | int new; | 342 | int new; |
343 | int comm_seq; /* copy of cm->seq when nd->nodeid is set */ | ||
312 | }; | 344 | }; |
313 | 345 | ||
314 | static struct configfs_group_operations clusters_ops = { | 346 | static struct configfs_group_operations clusters_ops = { |
@@ -455,6 +487,9 @@ static struct config_group *make_cluster(struct config_group *g, | |||
455 | cl->cl_timewarn_cs = dlm_config.ci_timewarn_cs; | 487 | cl->cl_timewarn_cs = dlm_config.ci_timewarn_cs; |
456 | cl->cl_waitwarn_us = dlm_config.ci_waitwarn_us; | 488 | cl->cl_waitwarn_us = dlm_config.ci_waitwarn_us; |
457 | cl->cl_new_rsb_count = dlm_config.ci_new_rsb_count; | 489 | cl->cl_new_rsb_count = dlm_config.ci_new_rsb_count; |
490 | cl->cl_recover_callbacks = dlm_config.ci_recover_callbacks; | ||
491 | memcpy(cl->cl_cluster_name, dlm_config.ci_cluster_name, | ||
492 | DLM_LOCKSPACE_LEN); | ||
458 | 493 | ||
459 | space_list = &sps->ss_group; | 494 | space_list = &sps->ss_group; |
460 | comm_list = &cms->cs_group; | 495 | comm_list = &cms->cs_group; |
@@ -558,6 +593,11 @@ static struct config_item *make_comm(struct config_group *g, const char *name) | |||
558 | return ERR_PTR(-ENOMEM); | 593 | return ERR_PTR(-ENOMEM); |
559 | 594 | ||
560 | config_item_init_type_name(&cm->item, name, &comm_type); | 595 | config_item_init_type_name(&cm->item, name, &comm_type); |
596 | |||
597 | cm->seq = dlm_comm_count++; | ||
598 | if (!cm->seq) | ||
599 | cm->seq = dlm_comm_count++; | ||
600 | |||
561 | cm->nodeid = -1; | 601 | cm->nodeid = -1; |
562 | cm->local = 0; | 602 | cm->local = 0; |
563 | cm->addr_count = 0; | 603 | cm->addr_count = 0; |
@@ -801,7 +841,10 @@ static ssize_t node_nodeid_read(struct dlm_node *nd, char *buf) | |||
801 | static ssize_t node_nodeid_write(struct dlm_node *nd, const char *buf, | 841 | static ssize_t node_nodeid_write(struct dlm_node *nd, const char *buf, |
802 | size_t len) | 842 | size_t len) |
803 | { | 843 | { |
844 | uint32_t seq = 0; | ||
804 | nd->nodeid = simple_strtol(buf, NULL, 0); | 845 | nd->nodeid = simple_strtol(buf, NULL, 0); |
846 | dlm_comm_seq(nd->nodeid, &seq); | ||
847 | nd->comm_seq = seq; | ||
805 | return len; | 848 | return len; |
806 | } | 849 | } |
807 | 850 | ||
@@ -908,13 +951,13 @@ static void put_comm(struct dlm_comm *cm) | |||
908 | } | 951 | } |
909 | 952 | ||
910 | /* caller must free mem */ | 953 | /* caller must free mem */ |
911 | int dlm_nodeid_list(char *lsname, int **ids_out, int *ids_count_out, | 954 | int dlm_config_nodes(char *lsname, struct dlm_config_node **nodes_out, |
912 | int **new_out, int *new_count_out) | 955 | int *count_out) |
913 | { | 956 | { |
914 | struct dlm_space *sp; | 957 | struct dlm_space *sp; |
915 | struct dlm_node *nd; | 958 | struct dlm_node *nd; |
916 | int i = 0, rv = 0, ids_count = 0, new_count = 0; | 959 | struct dlm_config_node *nodes, *node; |
917 | int *ids, *new; | 960 | int rv, count; |
918 | 961 | ||
919 | sp = get_space(lsname); | 962 | sp = get_space(lsname); |
920 | if (!sp) | 963 | if (!sp) |
@@ -927,73 +970,42 @@ int dlm_nodeid_list(char *lsname, int **ids_out, int *ids_count_out, | |||
927 | goto out; | 970 | goto out; |
928 | } | 971 | } |
929 | 972 | ||
930 | ids_count = sp->members_count; | 973 | count = sp->members_count; |
931 | 974 | ||
932 | ids = kcalloc(ids_count, sizeof(int), GFP_NOFS); | 975 | nodes = kcalloc(count, sizeof(struct dlm_config_node), GFP_NOFS); |
933 | if (!ids) { | 976 | if (!nodes) { |
934 | rv = -ENOMEM; | 977 | rv = -ENOMEM; |
935 | goto out; | 978 | goto out; |
936 | } | 979 | } |
937 | 980 | ||
981 | node = nodes; | ||
938 | list_for_each_entry(nd, &sp->members, list) { | 982 | list_for_each_entry(nd, &sp->members, list) { |
939 | ids[i++] = nd->nodeid; | 983 | node->nodeid = nd->nodeid; |
940 | if (nd->new) | 984 | node->weight = nd->weight; |
941 | new_count++; | 985 | node->new = nd->new; |
942 | } | 986 | node->comm_seq = nd->comm_seq; |
943 | 987 | node++; | |
944 | if (ids_count != i) | ||
945 | printk(KERN_ERR "dlm: bad nodeid count %d %d\n", ids_count, i); | ||
946 | |||
947 | if (!new_count) | ||
948 | goto out_ids; | ||
949 | 988 | ||
950 | new = kcalloc(new_count, sizeof(int), GFP_NOFS); | 989 | nd->new = 0; |
951 | if (!new) { | ||
952 | kfree(ids); | ||
953 | rv = -ENOMEM; | ||
954 | goto out; | ||
955 | } | 990 | } |
956 | 991 | ||
957 | i = 0; | 992 | *count_out = count; |
958 | list_for_each_entry(nd, &sp->members, list) { | 993 | *nodes_out = nodes; |
959 | if (nd->new) { | 994 | rv = 0; |
960 | new[i++] = nd->nodeid; | ||
961 | nd->new = 0; | ||
962 | } | ||
963 | } | ||
964 | *new_count_out = new_count; | ||
965 | *new_out = new; | ||
966 | |||
967 | out_ids: | ||
968 | *ids_count_out = ids_count; | ||
969 | *ids_out = ids; | ||
970 | out: | 995 | out: |
971 | mutex_unlock(&sp->members_lock); | 996 | mutex_unlock(&sp->members_lock); |
972 | put_space(sp); | 997 | put_space(sp); |
973 | return rv; | 998 | return rv; |
974 | } | 999 | } |
975 | 1000 | ||
976 | int dlm_node_weight(char *lsname, int nodeid) | 1001 | int dlm_comm_seq(int nodeid, uint32_t *seq) |
977 | { | 1002 | { |
978 | struct dlm_space *sp; | 1003 | struct dlm_comm *cm = get_comm(nodeid, NULL); |
979 | struct dlm_node *nd; | 1004 | if (!cm) |
980 | int w = -EEXIST; | 1005 | return -EEXIST; |
981 | 1006 | *seq = cm->seq; | |
982 | sp = get_space(lsname); | 1007 | put_comm(cm); |
983 | if (!sp) | 1008 | return 0; |
984 | goto out; | ||
985 | |||
986 | mutex_lock(&sp->members_lock); | ||
987 | list_for_each_entry(nd, &sp->members, list) { | ||
988 | if (nd->nodeid != nodeid) | ||
989 | continue; | ||
990 | w = nd->weight; | ||
991 | break; | ||
992 | } | ||
993 | mutex_unlock(&sp->members_lock); | ||
994 | put_space(sp); | ||
995 | out: | ||
996 | return w; | ||
997 | } | 1009 | } |
998 | 1010 | ||
999 | int dlm_nodeid_to_addr(int nodeid, struct sockaddr_storage *addr) | 1011 | int dlm_nodeid_to_addr(int nodeid, struct sockaddr_storage *addr) |
@@ -1047,6 +1059,8 @@ int dlm_our_addr(struct sockaddr_storage *addr, int num) | |||
1047 | #define DEFAULT_TIMEWARN_CS 500 /* 5 sec = 500 centiseconds */ | 1059 | #define DEFAULT_TIMEWARN_CS 500 /* 5 sec = 500 centiseconds */ |
1048 | #define DEFAULT_WAITWARN_US 0 | 1060 | #define DEFAULT_WAITWARN_US 0 |
1049 | #define DEFAULT_NEW_RSB_COUNT 128 | 1061 | #define DEFAULT_NEW_RSB_COUNT 128 |
1062 | #define DEFAULT_RECOVER_CALLBACKS 0 | ||
1063 | #define DEFAULT_CLUSTER_NAME "" | ||
1050 | 1064 | ||
1051 | struct dlm_config_info dlm_config = { | 1065 | struct dlm_config_info dlm_config = { |
1052 | .ci_tcp_port = DEFAULT_TCP_PORT, | 1066 | .ci_tcp_port = DEFAULT_TCP_PORT, |
@@ -1060,6 +1074,8 @@ struct dlm_config_info dlm_config = { | |||
1060 | .ci_protocol = DEFAULT_PROTOCOL, | 1074 | .ci_protocol = DEFAULT_PROTOCOL, |
1061 | .ci_timewarn_cs = DEFAULT_TIMEWARN_CS, | 1075 | .ci_timewarn_cs = DEFAULT_TIMEWARN_CS, |
1062 | .ci_waitwarn_us = DEFAULT_WAITWARN_US, | 1076 | .ci_waitwarn_us = DEFAULT_WAITWARN_US, |
1063 | .ci_new_rsb_count = DEFAULT_NEW_RSB_COUNT | 1077 | .ci_new_rsb_count = DEFAULT_NEW_RSB_COUNT, |
1078 | .ci_recover_callbacks = DEFAULT_RECOVER_CALLBACKS, | ||
1079 | .ci_cluster_name = DEFAULT_CLUSTER_NAME | ||
1064 | }; | 1080 | }; |
1065 | 1081 | ||
diff --git a/fs/dlm/config.h b/fs/dlm/config.h index 3099d0dd26c0..9f5e3663bb0c 100644 --- a/fs/dlm/config.h +++ b/fs/dlm/config.h | |||
@@ -2,7 +2,7 @@ | |||
2 | ******************************************************************************* | 2 | ******************************************************************************* |
3 | ** | 3 | ** |
4 | ** Copyright (C) Sistina Software, Inc. 1997-2003 All rights reserved. | 4 | ** Copyright (C) Sistina Software, Inc. 1997-2003 All rights reserved. |
5 | ** Copyright (C) 2004-2007 Red Hat, Inc. All rights reserved. | 5 | ** Copyright (C) 2004-2011 Red Hat, Inc. All rights reserved. |
6 | ** | 6 | ** |
7 | ** This copyrighted material is made available to anyone wishing to use, | 7 | ** This copyrighted material is made available to anyone wishing to use, |
8 | ** modify, copy, or redistribute it subject to the terms and conditions | 8 | ** modify, copy, or redistribute it subject to the terms and conditions |
@@ -14,6 +14,13 @@ | |||
14 | #ifndef __CONFIG_DOT_H__ | 14 | #ifndef __CONFIG_DOT_H__ |
15 | #define __CONFIG_DOT_H__ | 15 | #define __CONFIG_DOT_H__ |
16 | 16 | ||
17 | struct dlm_config_node { | ||
18 | int nodeid; | ||
19 | int weight; | ||
20 | int new; | ||
21 | uint32_t comm_seq; | ||
22 | }; | ||
23 | |||
17 | #define DLM_MAX_ADDR_COUNT 3 | 24 | #define DLM_MAX_ADDR_COUNT 3 |
18 | 25 | ||
19 | struct dlm_config_info { | 26 | struct dlm_config_info { |
@@ -29,15 +36,17 @@ struct dlm_config_info { | |||
29 | int ci_timewarn_cs; | 36 | int ci_timewarn_cs; |
30 | int ci_waitwarn_us; | 37 | int ci_waitwarn_us; |
31 | int ci_new_rsb_count; | 38 | int ci_new_rsb_count; |
39 | int ci_recover_callbacks; | ||
40 | char ci_cluster_name[DLM_LOCKSPACE_LEN]; | ||
32 | }; | 41 | }; |
33 | 42 | ||
34 | extern struct dlm_config_info dlm_config; | 43 | extern struct dlm_config_info dlm_config; |
35 | 44 | ||
36 | int dlm_config_init(void); | 45 | int dlm_config_init(void); |
37 | void dlm_config_exit(void); | 46 | void dlm_config_exit(void); |
38 | int dlm_node_weight(char *lsname, int nodeid); | 47 | int dlm_config_nodes(char *lsname, struct dlm_config_node **nodes_out, |
39 | int dlm_nodeid_list(char *lsname, int **ids_out, int *ids_count_out, | 48 | int *count_out); |
40 | int **new_out, int *new_count_out); | 49 | int dlm_comm_seq(int nodeid, uint32_t *seq); |
41 | int dlm_nodeid_to_addr(int nodeid, struct sockaddr_storage *addr); | 50 | int dlm_nodeid_to_addr(int nodeid, struct sockaddr_storage *addr); |
42 | int dlm_addr_to_nodeid(struct sockaddr_storage *addr, int *nodeid); | 51 | int dlm_addr_to_nodeid(struct sockaddr_storage *addr, int *nodeid); |
43 | int dlm_our_nodeid(void); | 52 | int dlm_our_nodeid(void); |
diff --git a/fs/dlm/debug_fs.c b/fs/dlm/debug_fs.c index 59779237e2b4..3dca2b39e83f 100644 --- a/fs/dlm/debug_fs.c +++ b/fs/dlm/debug_fs.c | |||
@@ -393,6 +393,7 @@ static const struct seq_operations format3_seq_ops; | |||
393 | 393 | ||
394 | static void *table_seq_start(struct seq_file *seq, loff_t *pos) | 394 | static void *table_seq_start(struct seq_file *seq, loff_t *pos) |
395 | { | 395 | { |
396 | struct rb_node *node; | ||
396 | struct dlm_ls *ls = seq->private; | 397 | struct dlm_ls *ls = seq->private; |
397 | struct rsbtbl_iter *ri; | 398 | struct rsbtbl_iter *ri; |
398 | struct dlm_rsb *r; | 399 | struct dlm_rsb *r; |
@@ -418,9 +419,10 @@ static void *table_seq_start(struct seq_file *seq, loff_t *pos) | |||
418 | ri->format = 3; | 419 | ri->format = 3; |
419 | 420 | ||
420 | spin_lock(&ls->ls_rsbtbl[bucket].lock); | 421 | spin_lock(&ls->ls_rsbtbl[bucket].lock); |
421 | if (!list_empty(&ls->ls_rsbtbl[bucket].list)) { | 422 | if (!RB_EMPTY_ROOT(&ls->ls_rsbtbl[bucket].keep)) { |
422 | list_for_each_entry(r, &ls->ls_rsbtbl[bucket].list, | 423 | for (node = rb_first(&ls->ls_rsbtbl[bucket].keep); node; |
423 | res_hashchain) { | 424 | node = rb_next(node)) { |
425 | r = rb_entry(node, struct dlm_rsb, res_hashnode); | ||
424 | if (!entry--) { | 426 | if (!entry--) { |
425 | dlm_hold_rsb(r); | 427 | dlm_hold_rsb(r); |
426 | ri->rsb = r; | 428 | ri->rsb = r; |
@@ -449,9 +451,9 @@ static void *table_seq_start(struct seq_file *seq, loff_t *pos) | |||
449 | } | 451 | } |
450 | 452 | ||
451 | spin_lock(&ls->ls_rsbtbl[bucket].lock); | 453 | spin_lock(&ls->ls_rsbtbl[bucket].lock); |
452 | if (!list_empty(&ls->ls_rsbtbl[bucket].list)) { | 454 | if (!RB_EMPTY_ROOT(&ls->ls_rsbtbl[bucket].keep)) { |
453 | r = list_first_entry(&ls->ls_rsbtbl[bucket].list, | 455 | node = rb_first(&ls->ls_rsbtbl[bucket].keep); |
454 | struct dlm_rsb, res_hashchain); | 456 | r = rb_entry(node, struct dlm_rsb, res_hashnode); |
455 | dlm_hold_rsb(r); | 457 | dlm_hold_rsb(r); |
456 | ri->rsb = r; | 458 | ri->rsb = r; |
457 | ri->bucket = bucket; | 459 | ri->bucket = bucket; |
@@ -467,7 +469,7 @@ static void *table_seq_next(struct seq_file *seq, void *iter_ptr, loff_t *pos) | |||
467 | { | 469 | { |
468 | struct dlm_ls *ls = seq->private; | 470 | struct dlm_ls *ls = seq->private; |
469 | struct rsbtbl_iter *ri = iter_ptr; | 471 | struct rsbtbl_iter *ri = iter_ptr; |
470 | struct list_head *next; | 472 | struct rb_node *next; |
471 | struct dlm_rsb *r, *rp; | 473 | struct dlm_rsb *r, *rp; |
472 | loff_t n = *pos; | 474 | loff_t n = *pos; |
473 | unsigned bucket; | 475 | unsigned bucket; |
@@ -480,10 +482,10 @@ static void *table_seq_next(struct seq_file *seq, void *iter_ptr, loff_t *pos) | |||
480 | 482 | ||
481 | spin_lock(&ls->ls_rsbtbl[bucket].lock); | 483 | spin_lock(&ls->ls_rsbtbl[bucket].lock); |
482 | rp = ri->rsb; | 484 | rp = ri->rsb; |
483 | next = rp->res_hashchain.next; | 485 | next = rb_next(&rp->res_hashnode); |
484 | 486 | ||
485 | if (next != &ls->ls_rsbtbl[bucket].list) { | 487 | if (next) { |
486 | r = list_entry(next, struct dlm_rsb, res_hashchain); | 488 | r = rb_entry(next, struct dlm_rsb, res_hashnode); |
487 | dlm_hold_rsb(r); | 489 | dlm_hold_rsb(r); |
488 | ri->rsb = r; | 490 | ri->rsb = r; |
489 | spin_unlock(&ls->ls_rsbtbl[bucket].lock); | 491 | spin_unlock(&ls->ls_rsbtbl[bucket].lock); |
@@ -511,9 +513,9 @@ static void *table_seq_next(struct seq_file *seq, void *iter_ptr, loff_t *pos) | |||
511 | } | 513 | } |
512 | 514 | ||
513 | spin_lock(&ls->ls_rsbtbl[bucket].lock); | 515 | spin_lock(&ls->ls_rsbtbl[bucket].lock); |
514 | if (!list_empty(&ls->ls_rsbtbl[bucket].list)) { | 516 | if (!RB_EMPTY_ROOT(&ls->ls_rsbtbl[bucket].keep)) { |
515 | r = list_first_entry(&ls->ls_rsbtbl[bucket].list, | 517 | next = rb_first(&ls->ls_rsbtbl[bucket].keep); |
516 | struct dlm_rsb, res_hashchain); | 518 | r = rb_entry(next, struct dlm_rsb, res_hashnode); |
517 | dlm_hold_rsb(r); | 519 | dlm_hold_rsb(r); |
518 | ri->rsb = r; | 520 | ri->rsb = r; |
519 | ri->bucket = bucket; | 521 | ri->bucket = bucket; |
diff --git a/fs/dlm/dir.c b/fs/dlm/dir.c index 7b84c1dbc82e..83641574b016 100644 --- a/fs/dlm/dir.c +++ b/fs/dlm/dir.c | |||
@@ -290,7 +290,6 @@ int dlm_recover_directory(struct dlm_ls *ls) | |||
290 | 290 | ||
291 | out_status: | 291 | out_status: |
292 | error = 0; | 292 | error = 0; |
293 | dlm_set_recover_status(ls, DLM_RS_DIR); | ||
294 | log_debug(ls, "dlm_recover_directory %d entries", count); | 293 | log_debug(ls, "dlm_recover_directory %d entries", count); |
295 | out_free: | 294 | out_free: |
296 | kfree(last_name); | 295 | kfree(last_name); |
diff --git a/fs/dlm/dlm_internal.h b/fs/dlm/dlm_internal.h index fe2860c02449..3a564d197e99 100644 --- a/fs/dlm/dlm_internal.h +++ b/fs/dlm/dlm_internal.h | |||
@@ -2,7 +2,7 @@ | |||
2 | ******************************************************************************* | 2 | ******************************************************************************* |
3 | ** | 3 | ** |
4 | ** Copyright (C) Sistina Software, Inc. 1997-2003 All rights reserved. | 4 | ** Copyright (C) Sistina Software, Inc. 1997-2003 All rights reserved. |
5 | ** Copyright (C) 2004-2010 Red Hat, Inc. All rights reserved. | 5 | ** Copyright (C) 2004-2011 Red Hat, Inc. All rights reserved. |
6 | ** | 6 | ** |
7 | ** This copyrighted material is made available to anyone wishing to use, | 7 | ** This copyrighted material is made available to anyone wishing to use, |
8 | ** modify, copy, or redistribute it subject to the terms and conditions | 8 | ** modify, copy, or redistribute it subject to the terms and conditions |
@@ -103,8 +103,8 @@ struct dlm_dirtable { | |||
103 | }; | 103 | }; |
104 | 104 | ||
105 | struct dlm_rsbtable { | 105 | struct dlm_rsbtable { |
106 | struct list_head list; | 106 | struct rb_root keep; |
107 | struct list_head toss; | 107 | struct rb_root toss; |
108 | spinlock_t lock; | 108 | spinlock_t lock; |
109 | }; | 109 | }; |
110 | 110 | ||
@@ -117,6 +117,10 @@ struct dlm_member { | |||
117 | struct list_head list; | 117 | struct list_head list; |
118 | int nodeid; | 118 | int nodeid; |
119 | int weight; | 119 | int weight; |
120 | int slot; | ||
121 | int slot_prev; | ||
122 | int comm_seq; | ||
123 | uint32_t generation; | ||
120 | }; | 124 | }; |
121 | 125 | ||
122 | /* | 126 | /* |
@@ -125,10 +129,8 @@ struct dlm_member { | |||
125 | 129 | ||
126 | struct dlm_recover { | 130 | struct dlm_recover { |
127 | struct list_head list; | 131 | struct list_head list; |
128 | int *nodeids; /* nodeids of all members */ | 132 | struct dlm_config_node *nodes; |
129 | int node_count; | 133 | int nodes_count; |
130 | int *new; /* nodeids of new members */ | ||
131 | int new_count; | ||
132 | uint64_t seq; | 134 | uint64_t seq; |
133 | }; | 135 | }; |
134 | 136 | ||
@@ -285,7 +287,10 @@ struct dlm_rsb { | |||
285 | unsigned long res_toss_time; | 287 | unsigned long res_toss_time; |
286 | uint32_t res_first_lkid; | 288 | uint32_t res_first_lkid; |
287 | struct list_head res_lookup; /* lkbs waiting on first */ | 289 | struct list_head res_lookup; /* lkbs waiting on first */ |
288 | struct list_head res_hashchain; /* rsbtbl */ | 290 | union { |
291 | struct list_head res_hashchain; | ||
292 | struct rb_node res_hashnode; /* rsbtbl */ | ||
293 | }; | ||
289 | struct list_head res_grantqueue; | 294 | struct list_head res_grantqueue; |
290 | struct list_head res_convertqueue; | 295 | struct list_head res_convertqueue; |
291 | struct list_head res_waitqueue; | 296 | struct list_head res_waitqueue; |
@@ -334,7 +339,9 @@ static inline int rsb_flag(struct dlm_rsb *r, enum rsb_flags flag) | |||
334 | /* dlm_header is first element of all structs sent between nodes */ | 339 | /* dlm_header is first element of all structs sent between nodes */ |
335 | 340 | ||
336 | #define DLM_HEADER_MAJOR 0x00030000 | 341 | #define DLM_HEADER_MAJOR 0x00030000 |
337 | #define DLM_HEADER_MINOR 0x00000000 | 342 | #define DLM_HEADER_MINOR 0x00000001 |
343 | |||
344 | #define DLM_HEADER_SLOTS 0x00000001 | ||
338 | 345 | ||
339 | #define DLM_MSG 1 | 346 | #define DLM_MSG 1 |
340 | #define DLM_RCOM 2 | 347 | #define DLM_RCOM 2 |
@@ -422,10 +429,34 @@ union dlm_packet { | |||
422 | struct dlm_rcom rcom; | 429 | struct dlm_rcom rcom; |
423 | }; | 430 | }; |
424 | 431 | ||
432 | #define DLM_RSF_NEED_SLOTS 0x00000001 | ||
433 | |||
434 | /* RCOM_STATUS data */ | ||
435 | struct rcom_status { | ||
436 | __le32 rs_flags; | ||
437 | __le32 rs_unused1; | ||
438 | __le64 rs_unused2; | ||
439 | }; | ||
440 | |||
441 | /* RCOM_STATUS_REPLY data */ | ||
425 | struct rcom_config { | 442 | struct rcom_config { |
426 | __le32 rf_lvblen; | 443 | __le32 rf_lvblen; |
427 | __le32 rf_lsflags; | 444 | __le32 rf_lsflags; |
428 | __le64 rf_unused; | 445 | |
446 | /* DLM_HEADER_SLOTS adds: */ | ||
447 | __le32 rf_flags; | ||
448 | __le16 rf_our_slot; | ||
449 | __le16 rf_num_slots; | ||
450 | __le32 rf_generation; | ||
451 | __le32 rf_unused1; | ||
452 | __le64 rf_unused2; | ||
453 | }; | ||
454 | |||
455 | struct rcom_slot { | ||
456 | __le32 ro_nodeid; | ||
457 | __le16 ro_slot; | ||
458 | __le16 ro_unused1; | ||
459 | __le64 ro_unused2; | ||
429 | }; | 460 | }; |
430 | 461 | ||
431 | struct rcom_lock { | 462 | struct rcom_lock { |
@@ -452,6 +483,7 @@ struct dlm_ls { | |||
452 | struct list_head ls_list; /* list of lockspaces */ | 483 | struct list_head ls_list; /* list of lockspaces */ |
453 | dlm_lockspace_t *ls_local_handle; | 484 | dlm_lockspace_t *ls_local_handle; |
454 | uint32_t ls_global_id; /* global unique lockspace ID */ | 485 | uint32_t ls_global_id; /* global unique lockspace ID */ |
486 | uint32_t ls_generation; | ||
455 | uint32_t ls_exflags; | 487 | uint32_t ls_exflags; |
456 | int ls_lvblen; | 488 | int ls_lvblen; |
457 | int ls_count; /* refcount of processes in | 489 | int ls_count; /* refcount of processes in |
@@ -490,6 +522,11 @@ struct dlm_ls { | |||
490 | int ls_total_weight; | 522 | int ls_total_weight; |
491 | int *ls_node_array; | 523 | int *ls_node_array; |
492 | 524 | ||
525 | int ls_slot; | ||
526 | int ls_num_slots; | ||
527 | int ls_slots_size; | ||
528 | struct dlm_slot *ls_slots; | ||
529 | |||
493 | struct dlm_rsb ls_stub_rsb; /* for returning errors */ | 530 | struct dlm_rsb ls_stub_rsb; /* for returning errors */ |
494 | struct dlm_lkb ls_stub_lkb; /* for returning errors */ | 531 | struct dlm_lkb ls_stub_lkb; /* for returning errors */ |
495 | struct dlm_message ls_stub_ms; /* for faking a reply */ | 532 | struct dlm_message ls_stub_ms; /* for faking a reply */ |
@@ -537,6 +574,9 @@ struct dlm_ls { | |||
537 | struct list_head ls_root_list; /* root resources */ | 574 | struct list_head ls_root_list; /* root resources */ |
538 | struct rw_semaphore ls_root_sem; /* protect root_list */ | 575 | struct rw_semaphore ls_root_sem; /* protect root_list */ |
539 | 576 | ||
577 | const struct dlm_lockspace_ops *ls_ops; | ||
578 | void *ls_ops_arg; | ||
579 | |||
540 | int ls_namelen; | 580 | int ls_namelen; |
541 | char ls_name[1]; | 581 | char ls_name[1]; |
542 | }; | 582 | }; |
diff --git a/fs/dlm/lock.c b/fs/dlm/lock.c index 83b5e32514e1..d47183043c59 100644 --- a/fs/dlm/lock.c +++ b/fs/dlm/lock.c | |||
@@ -56,6 +56,7 @@ | |||
56 | L: receive_xxxx_reply() <- R: send_xxxx_reply() | 56 | L: receive_xxxx_reply() <- R: send_xxxx_reply() |
57 | */ | 57 | */ |
58 | #include <linux/types.h> | 58 | #include <linux/types.h> |
59 | #include <linux/rbtree.h> | ||
59 | #include <linux/slab.h> | 60 | #include <linux/slab.h> |
60 | #include "dlm_internal.h" | 61 | #include "dlm_internal.h" |
61 | #include <linux/dlm_device.h> | 62 | #include <linux/dlm_device.h> |
@@ -380,6 +381,8 @@ static int get_rsb_struct(struct dlm_ls *ls, char *name, int len, | |||
380 | 381 | ||
381 | r = list_first_entry(&ls->ls_new_rsb, struct dlm_rsb, res_hashchain); | 382 | r = list_first_entry(&ls->ls_new_rsb, struct dlm_rsb, res_hashchain); |
382 | list_del(&r->res_hashchain); | 383 | list_del(&r->res_hashchain); |
384 | /* Convert the empty list_head to a NULL rb_node for tree usage: */ | ||
385 | memset(&r->res_hashnode, 0, sizeof(struct rb_node)); | ||
383 | ls->ls_new_rsb_count--; | 386 | ls->ls_new_rsb_count--; |
384 | spin_unlock(&ls->ls_new_rsb_spin); | 387 | spin_unlock(&ls->ls_new_rsb_spin); |
385 | 388 | ||
@@ -388,7 +391,6 @@ static int get_rsb_struct(struct dlm_ls *ls, char *name, int len, | |||
388 | memcpy(r->res_name, name, len); | 391 | memcpy(r->res_name, name, len); |
389 | mutex_init(&r->res_mutex); | 392 | mutex_init(&r->res_mutex); |
390 | 393 | ||
391 | INIT_LIST_HEAD(&r->res_hashchain); | ||
392 | INIT_LIST_HEAD(&r->res_lookup); | 394 | INIT_LIST_HEAD(&r->res_lookup); |
393 | INIT_LIST_HEAD(&r->res_grantqueue); | 395 | INIT_LIST_HEAD(&r->res_grantqueue); |
394 | INIT_LIST_HEAD(&r->res_convertqueue); | 396 | INIT_LIST_HEAD(&r->res_convertqueue); |
@@ -400,14 +402,31 @@ static int get_rsb_struct(struct dlm_ls *ls, char *name, int len, | |||
400 | return 0; | 402 | return 0; |
401 | } | 403 | } |
402 | 404 | ||
403 | static int search_rsb_list(struct list_head *head, char *name, int len, | 405 | static int rsb_cmp(struct dlm_rsb *r, const char *name, int nlen) |
406 | { | ||
407 | char maxname[DLM_RESNAME_MAXLEN]; | ||
408 | |||
409 | memset(maxname, 0, DLM_RESNAME_MAXLEN); | ||
410 | memcpy(maxname, name, nlen); | ||
411 | return memcmp(r->res_name, maxname, DLM_RESNAME_MAXLEN); | ||
412 | } | ||
413 | |||
414 | static int search_rsb_tree(struct rb_root *tree, char *name, int len, | ||
404 | unsigned int flags, struct dlm_rsb **r_ret) | 415 | unsigned int flags, struct dlm_rsb **r_ret) |
405 | { | 416 | { |
417 | struct rb_node *node = tree->rb_node; | ||
406 | struct dlm_rsb *r; | 418 | struct dlm_rsb *r; |
407 | int error = 0; | 419 | int error = 0; |
408 | 420 | int rc; | |
409 | list_for_each_entry(r, head, res_hashchain) { | 421 | |
410 | if (len == r->res_length && !memcmp(name, r->res_name, len)) | 422 | while (node) { |
423 | r = rb_entry(node, struct dlm_rsb, res_hashnode); | ||
424 | rc = rsb_cmp(r, name, len); | ||
425 | if (rc < 0) | ||
426 | node = node->rb_left; | ||
427 | else if (rc > 0) | ||
428 | node = node->rb_right; | ||
429 | else | ||
411 | goto found; | 430 | goto found; |
412 | } | 431 | } |
413 | *r_ret = NULL; | 432 | *r_ret = NULL; |
@@ -420,22 +439,54 @@ static int search_rsb_list(struct list_head *head, char *name, int len, | |||
420 | return error; | 439 | return error; |
421 | } | 440 | } |
422 | 441 | ||
442 | static int rsb_insert(struct dlm_rsb *rsb, struct rb_root *tree) | ||
443 | { | ||
444 | struct rb_node **newn = &tree->rb_node; | ||
445 | struct rb_node *parent = NULL; | ||
446 | int rc; | ||
447 | |||
448 | while (*newn) { | ||
449 | struct dlm_rsb *cur = rb_entry(*newn, struct dlm_rsb, | ||
450 | res_hashnode); | ||
451 | |||
452 | parent = *newn; | ||
453 | rc = rsb_cmp(cur, rsb->res_name, rsb->res_length); | ||
454 | if (rc < 0) | ||
455 | newn = &parent->rb_left; | ||
456 | else if (rc > 0) | ||
457 | newn = &parent->rb_right; | ||
458 | else { | ||
459 | log_print("rsb_insert match"); | ||
460 | dlm_dump_rsb(rsb); | ||
461 | dlm_dump_rsb(cur); | ||
462 | return -EEXIST; | ||
463 | } | ||
464 | } | ||
465 | |||
466 | rb_link_node(&rsb->res_hashnode, parent, newn); | ||
467 | rb_insert_color(&rsb->res_hashnode, tree); | ||
468 | return 0; | ||
469 | } | ||
470 | |||
423 | static int _search_rsb(struct dlm_ls *ls, char *name, int len, int b, | 471 | static int _search_rsb(struct dlm_ls *ls, char *name, int len, int b, |
424 | unsigned int flags, struct dlm_rsb **r_ret) | 472 | unsigned int flags, struct dlm_rsb **r_ret) |
425 | { | 473 | { |
426 | struct dlm_rsb *r; | 474 | struct dlm_rsb *r; |
427 | int error; | 475 | int error; |
428 | 476 | ||
429 | error = search_rsb_list(&ls->ls_rsbtbl[b].list, name, len, flags, &r); | 477 | error = search_rsb_tree(&ls->ls_rsbtbl[b].keep, name, len, flags, &r); |
430 | if (!error) { | 478 | if (!error) { |
431 | kref_get(&r->res_ref); | 479 | kref_get(&r->res_ref); |
432 | goto out; | 480 | goto out; |
433 | } | 481 | } |
434 | error = search_rsb_list(&ls->ls_rsbtbl[b].toss, name, len, flags, &r); | 482 | error = search_rsb_tree(&ls->ls_rsbtbl[b].toss, name, len, flags, &r); |
435 | if (error) | 483 | if (error) |
436 | goto out; | 484 | goto out; |
437 | 485 | ||
438 | list_move(&r->res_hashchain, &ls->ls_rsbtbl[b].list); | 486 | rb_erase(&r->res_hashnode, &ls->ls_rsbtbl[b].toss); |
487 | error = rsb_insert(r, &ls->ls_rsbtbl[b].keep); | ||
488 | if (error) | ||
489 | return error; | ||
439 | 490 | ||
440 | if (dlm_no_directory(ls)) | 491 | if (dlm_no_directory(ls)) |
441 | goto out; | 492 | goto out; |
@@ -527,8 +578,7 @@ static int find_rsb(struct dlm_ls *ls, char *name, int namelen, | |||
527 | nodeid = 0; | 578 | nodeid = 0; |
528 | r->res_nodeid = nodeid; | 579 | r->res_nodeid = nodeid; |
529 | } | 580 | } |
530 | list_add(&r->res_hashchain, &ls->ls_rsbtbl[bucket].list); | 581 | error = rsb_insert(r, &ls->ls_rsbtbl[bucket].keep); |
531 | error = 0; | ||
532 | out_unlock: | 582 | out_unlock: |
533 | spin_unlock(&ls->ls_rsbtbl[bucket].lock); | 583 | spin_unlock(&ls->ls_rsbtbl[bucket].lock); |
534 | out: | 584 | out: |
@@ -556,7 +606,8 @@ static void toss_rsb(struct kref *kref) | |||
556 | 606 | ||
557 | DLM_ASSERT(list_empty(&r->res_root_list), dlm_print_rsb(r);); | 607 | DLM_ASSERT(list_empty(&r->res_root_list), dlm_print_rsb(r);); |
558 | kref_init(&r->res_ref); | 608 | kref_init(&r->res_ref); |
559 | list_move(&r->res_hashchain, &ls->ls_rsbtbl[r->res_bucket].toss); | 609 | rb_erase(&r->res_hashnode, &ls->ls_rsbtbl[r->res_bucket].keep); |
610 | rsb_insert(r, &ls->ls_rsbtbl[r->res_bucket].toss); | ||
560 | r->res_toss_time = jiffies; | 611 | r->res_toss_time = jiffies; |
561 | if (r->res_lvbptr) { | 612 | if (r->res_lvbptr) { |
562 | dlm_free_lvb(r->res_lvbptr); | 613 | dlm_free_lvb(r->res_lvbptr); |
@@ -1082,19 +1133,19 @@ static void dir_remove(struct dlm_rsb *r) | |||
1082 | r->res_name, r->res_length); | 1133 | r->res_name, r->res_length); |
1083 | } | 1134 | } |
1084 | 1135 | ||
1085 | /* FIXME: shouldn't this be able to exit as soon as one non-due rsb is | 1136 | /* FIXME: make this more efficient */ |
1086 | found since they are in order of newest to oldest? */ | ||
1087 | 1137 | ||
1088 | static int shrink_bucket(struct dlm_ls *ls, int b) | 1138 | static int shrink_bucket(struct dlm_ls *ls, int b) |
1089 | { | 1139 | { |
1140 | struct rb_node *n; | ||
1090 | struct dlm_rsb *r; | 1141 | struct dlm_rsb *r; |
1091 | int count = 0, found; | 1142 | int count = 0, found; |
1092 | 1143 | ||
1093 | for (;;) { | 1144 | for (;;) { |
1094 | found = 0; | 1145 | found = 0; |
1095 | spin_lock(&ls->ls_rsbtbl[b].lock); | 1146 | spin_lock(&ls->ls_rsbtbl[b].lock); |
1096 | list_for_each_entry_reverse(r, &ls->ls_rsbtbl[b].toss, | 1147 | for (n = rb_first(&ls->ls_rsbtbl[b].toss); n; n = rb_next(n)) { |
1097 | res_hashchain) { | 1148 | r = rb_entry(n, struct dlm_rsb, res_hashnode); |
1098 | if (!time_after_eq(jiffies, r->res_toss_time + | 1149 | if (!time_after_eq(jiffies, r->res_toss_time + |
1099 | dlm_config.ci_toss_secs * HZ)) | 1150 | dlm_config.ci_toss_secs * HZ)) |
1100 | continue; | 1151 | continue; |
@@ -1108,7 +1159,7 @@ static int shrink_bucket(struct dlm_ls *ls, int b) | |||
1108 | } | 1159 | } |
1109 | 1160 | ||
1110 | if (kref_put(&r->res_ref, kill_rsb)) { | 1161 | if (kref_put(&r->res_ref, kill_rsb)) { |
1111 | list_del(&r->res_hashchain); | 1162 | rb_erase(&r->res_hashnode, &ls->ls_rsbtbl[b].toss); |
1112 | spin_unlock(&ls->ls_rsbtbl[b].lock); | 1163 | spin_unlock(&ls->ls_rsbtbl[b].lock); |
1113 | 1164 | ||
1114 | if (is_master(r)) | 1165 | if (is_master(r)) |
@@ -4441,10 +4492,12 @@ int dlm_purge_locks(struct dlm_ls *ls) | |||
4441 | 4492 | ||
4442 | static struct dlm_rsb *find_purged_rsb(struct dlm_ls *ls, int bucket) | 4493 | static struct dlm_rsb *find_purged_rsb(struct dlm_ls *ls, int bucket) |
4443 | { | 4494 | { |
4495 | struct rb_node *n; | ||
4444 | struct dlm_rsb *r, *r_ret = NULL; | 4496 | struct dlm_rsb *r, *r_ret = NULL; |
4445 | 4497 | ||
4446 | spin_lock(&ls->ls_rsbtbl[bucket].lock); | 4498 | spin_lock(&ls->ls_rsbtbl[bucket].lock); |
4447 | list_for_each_entry(r, &ls->ls_rsbtbl[bucket].list, res_hashchain) { | 4499 | for (n = rb_first(&ls->ls_rsbtbl[bucket].keep); n; n = rb_next(n)) { |
4500 | r = rb_entry(n, struct dlm_rsb, res_hashnode); | ||
4448 | if (!rsb_flag(r, RSB_LOCKS_PURGED)) | 4501 | if (!rsb_flag(r, RSB_LOCKS_PURGED)) |
4449 | continue; | 4502 | continue; |
4450 | hold_rsb(r); | 4503 | hold_rsb(r); |
diff --git a/fs/dlm/lockspace.c b/fs/dlm/lockspace.c index a1d8f1af144b..a1ea25face82 100644 --- a/fs/dlm/lockspace.c +++ b/fs/dlm/lockspace.c | |||
@@ -2,7 +2,7 @@ | |||
2 | ******************************************************************************* | 2 | ******************************************************************************* |
3 | ** | 3 | ** |
4 | ** Copyright (C) Sistina Software, Inc. 1997-2003 All rights reserved. | 4 | ** Copyright (C) Sistina Software, Inc. 1997-2003 All rights reserved. |
5 | ** Copyright (C) 2004-2008 Red Hat, Inc. All rights reserved. | 5 | ** Copyright (C) 2004-2011 Red Hat, Inc. All rights reserved. |
6 | ** | 6 | ** |
7 | ** This copyrighted material is made available to anyone wishing to use, | 7 | ** This copyrighted material is made available to anyone wishing to use, |
8 | ** modify, copy, or redistribute it subject to the terms and conditions | 8 | ** modify, copy, or redistribute it subject to the terms and conditions |
@@ -386,12 +386,15 @@ static void threads_stop(void) | |||
386 | dlm_lowcomms_stop(); | 386 | dlm_lowcomms_stop(); |
387 | } | 387 | } |
388 | 388 | ||
389 | static int new_lockspace(const char *name, int namelen, void **lockspace, | 389 | static int new_lockspace(const char *name, const char *cluster, |
390 | uint32_t flags, int lvblen) | 390 | uint32_t flags, int lvblen, |
391 | const struct dlm_lockspace_ops *ops, void *ops_arg, | ||
392 | int *ops_result, dlm_lockspace_t **lockspace) | ||
391 | { | 393 | { |
392 | struct dlm_ls *ls; | 394 | struct dlm_ls *ls; |
393 | int i, size, error; | 395 | int i, size, error; |
394 | int do_unreg = 0; | 396 | int do_unreg = 0; |
397 | int namelen = strlen(name); | ||
395 | 398 | ||
396 | if (namelen > DLM_LOCKSPACE_LEN) | 399 | if (namelen > DLM_LOCKSPACE_LEN) |
397 | return -EINVAL; | 400 | return -EINVAL; |
@@ -403,8 +406,24 @@ static int new_lockspace(const char *name, int namelen, void **lockspace, | |||
403 | return -EINVAL; | 406 | return -EINVAL; |
404 | 407 | ||
405 | if (!dlm_user_daemon_available()) { | 408 | if (!dlm_user_daemon_available()) { |
406 | module_put(THIS_MODULE); | 409 | log_print("dlm user daemon not available"); |
407 | return -EUNATCH; | 410 | error = -EUNATCH; |
411 | goto out; | ||
412 | } | ||
413 | |||
414 | if (ops && ops_result) { | ||
415 | if (!dlm_config.ci_recover_callbacks) | ||
416 | *ops_result = -EOPNOTSUPP; | ||
417 | else | ||
418 | *ops_result = 0; | ||
419 | } | ||
420 | |||
421 | if (dlm_config.ci_recover_callbacks && cluster && | ||
422 | strncmp(cluster, dlm_config.ci_cluster_name, DLM_LOCKSPACE_LEN)) { | ||
423 | log_print("dlm cluster name %s mismatch %s", | ||
424 | dlm_config.ci_cluster_name, cluster); | ||
425 | error = -EBADR; | ||
426 | goto out; | ||
408 | } | 427 | } |
409 | 428 | ||
410 | error = 0; | 429 | error = 0; |
@@ -442,6 +461,11 @@ static int new_lockspace(const char *name, int namelen, void **lockspace, | |||
442 | ls->ls_flags = 0; | 461 | ls->ls_flags = 0; |
443 | ls->ls_scan_time = jiffies; | 462 | ls->ls_scan_time = jiffies; |
444 | 463 | ||
464 | if (ops && dlm_config.ci_recover_callbacks) { | ||
465 | ls->ls_ops = ops; | ||
466 | ls->ls_ops_arg = ops_arg; | ||
467 | } | ||
468 | |||
445 | if (flags & DLM_LSFL_TIMEWARN) | 469 | if (flags & DLM_LSFL_TIMEWARN) |
446 | set_bit(LSFL_TIMEWARN, &ls->ls_flags); | 470 | set_bit(LSFL_TIMEWARN, &ls->ls_flags); |
447 | 471 | ||
@@ -457,8 +481,8 @@ static int new_lockspace(const char *name, int namelen, void **lockspace, | |||
457 | if (!ls->ls_rsbtbl) | 481 | if (!ls->ls_rsbtbl) |
458 | goto out_lsfree; | 482 | goto out_lsfree; |
459 | for (i = 0; i < size; i++) { | 483 | for (i = 0; i < size; i++) { |
460 | INIT_LIST_HEAD(&ls->ls_rsbtbl[i].list); | 484 | ls->ls_rsbtbl[i].keep.rb_node = NULL; |
461 | INIT_LIST_HEAD(&ls->ls_rsbtbl[i].toss); | 485 | ls->ls_rsbtbl[i].toss.rb_node = NULL; |
462 | spin_lock_init(&ls->ls_rsbtbl[i].lock); | 486 | spin_lock_init(&ls->ls_rsbtbl[i].lock); |
463 | } | 487 | } |
464 | 488 | ||
@@ -525,6 +549,11 @@ static int new_lockspace(const char *name, int namelen, void **lockspace, | |||
525 | if (!ls->ls_recover_buf) | 549 | if (!ls->ls_recover_buf) |
526 | goto out_dirfree; | 550 | goto out_dirfree; |
527 | 551 | ||
552 | ls->ls_slot = 0; | ||
553 | ls->ls_num_slots = 0; | ||
554 | ls->ls_slots_size = 0; | ||
555 | ls->ls_slots = NULL; | ||
556 | |||
528 | INIT_LIST_HEAD(&ls->ls_recover_list); | 557 | INIT_LIST_HEAD(&ls->ls_recover_list); |
529 | spin_lock_init(&ls->ls_recover_list_lock); | 558 | spin_lock_init(&ls->ls_recover_list_lock); |
530 | ls->ls_recover_list_count = 0; | 559 | ls->ls_recover_list_count = 0; |
@@ -614,8 +643,10 @@ static int new_lockspace(const char *name, int namelen, void **lockspace, | |||
614 | return error; | 643 | return error; |
615 | } | 644 | } |
616 | 645 | ||
617 | int dlm_new_lockspace(const char *name, int namelen, void **lockspace, | 646 | int dlm_new_lockspace(const char *name, const char *cluster, |
618 | uint32_t flags, int lvblen) | 647 | uint32_t flags, int lvblen, |
648 | const struct dlm_lockspace_ops *ops, void *ops_arg, | ||
649 | int *ops_result, dlm_lockspace_t **lockspace) | ||
619 | { | 650 | { |
620 | int error = 0; | 651 | int error = 0; |
621 | 652 | ||
@@ -625,7 +656,8 @@ int dlm_new_lockspace(const char *name, int namelen, void **lockspace, | |||
625 | if (error) | 656 | if (error) |
626 | goto out; | 657 | goto out; |
627 | 658 | ||
628 | error = new_lockspace(name, namelen, lockspace, flags, lvblen); | 659 | error = new_lockspace(name, cluster, flags, lvblen, ops, ops_arg, |
660 | ops_result, lockspace); | ||
629 | if (!error) | 661 | if (!error) |
630 | ls_count++; | 662 | ls_count++; |
631 | if (error > 0) | 663 | if (error > 0) |
@@ -685,7 +717,7 @@ static int lockspace_busy(struct dlm_ls *ls, int force) | |||
685 | static int release_lockspace(struct dlm_ls *ls, int force) | 717 | static int release_lockspace(struct dlm_ls *ls, int force) |
686 | { | 718 | { |
687 | struct dlm_rsb *rsb; | 719 | struct dlm_rsb *rsb; |
688 | struct list_head *head; | 720 | struct rb_node *n; |
689 | int i, busy, rv; | 721 | int i, busy, rv; |
690 | 722 | ||
691 | busy = lockspace_busy(ls, force); | 723 | busy = lockspace_busy(ls, force); |
@@ -746,20 +778,15 @@ static int release_lockspace(struct dlm_ls *ls, int force) | |||
746 | */ | 778 | */ |
747 | 779 | ||
748 | for (i = 0; i < ls->ls_rsbtbl_size; i++) { | 780 | for (i = 0; i < ls->ls_rsbtbl_size; i++) { |
749 | head = &ls->ls_rsbtbl[i].list; | 781 | while ((n = rb_first(&ls->ls_rsbtbl[i].keep))) { |
750 | while (!list_empty(head)) { | 782 | rsb = rb_entry(n, struct dlm_rsb, res_hashnode); |
751 | rsb = list_entry(head->next, struct dlm_rsb, | 783 | rb_erase(n, &ls->ls_rsbtbl[i].keep); |
752 | res_hashchain); | ||
753 | |||
754 | list_del(&rsb->res_hashchain); | ||
755 | dlm_free_rsb(rsb); | 784 | dlm_free_rsb(rsb); |
756 | } | 785 | } |
757 | 786 | ||
758 | head = &ls->ls_rsbtbl[i].toss; | 787 | while ((n = rb_first(&ls->ls_rsbtbl[i].toss))) { |
759 | while (!list_empty(head)) { | 788 | rsb = rb_entry(n, struct dlm_rsb, res_hashnode); |
760 | rsb = list_entry(head->next, struct dlm_rsb, | 789 | rb_erase(n, &ls->ls_rsbtbl[i].toss); |
761 | res_hashchain); | ||
762 | list_del(&rsb->res_hashchain); | ||
763 | dlm_free_rsb(rsb); | 790 | dlm_free_rsb(rsb); |
764 | } | 791 | } |
765 | } | 792 | } |
diff --git a/fs/dlm/member.c b/fs/dlm/member.c index b12532e553f8..862640a36d5c 100644 --- a/fs/dlm/member.c +++ b/fs/dlm/member.c | |||
@@ -1,7 +1,7 @@ | |||
1 | /****************************************************************************** | 1 | /****************************************************************************** |
2 | ******************************************************************************* | 2 | ******************************************************************************* |
3 | ** | 3 | ** |
4 | ** Copyright (C) 2005-2009 Red Hat, Inc. All rights reserved. | 4 | ** Copyright (C) 2005-2011 Red Hat, Inc. All rights reserved. |
5 | ** | 5 | ** |
6 | ** This copyrighted material is made available to anyone wishing to use, | 6 | ** This copyrighted material is made available to anyone wishing to use, |
7 | ** modify, copy, or redistribute it subject to the terms and conditions | 7 | ** modify, copy, or redistribute it subject to the terms and conditions |
@@ -19,6 +19,280 @@ | |||
19 | #include "config.h" | 19 | #include "config.h" |
20 | #include "lowcomms.h" | 20 | #include "lowcomms.h" |
21 | 21 | ||
22 | int dlm_slots_version(struct dlm_header *h) | ||
23 | { | ||
24 | if ((h->h_version & 0x0000FFFF) < DLM_HEADER_SLOTS) | ||
25 | return 0; | ||
26 | return 1; | ||
27 | } | ||
28 | |||
29 | void dlm_slot_save(struct dlm_ls *ls, struct dlm_rcom *rc, | ||
30 | struct dlm_member *memb) | ||
31 | { | ||
32 | struct rcom_config *rf = (struct rcom_config *)rc->rc_buf; | ||
33 | |||
34 | if (!dlm_slots_version(&rc->rc_header)) | ||
35 | return; | ||
36 | |||
37 | memb->slot = le16_to_cpu(rf->rf_our_slot); | ||
38 | memb->generation = le32_to_cpu(rf->rf_generation); | ||
39 | } | ||
40 | |||
41 | void dlm_slots_copy_out(struct dlm_ls *ls, struct dlm_rcom *rc) | ||
42 | { | ||
43 | struct dlm_slot *slot; | ||
44 | struct rcom_slot *ro; | ||
45 | int i; | ||
46 | |||
47 | ro = (struct rcom_slot *)(rc->rc_buf + sizeof(struct rcom_config)); | ||
48 | |||
49 | /* ls_slots array is sparse, but not rcom_slots */ | ||
50 | |||
51 | for (i = 0; i < ls->ls_slots_size; i++) { | ||
52 | slot = &ls->ls_slots[i]; | ||
53 | if (!slot->nodeid) | ||
54 | continue; | ||
55 | ro->ro_nodeid = cpu_to_le32(slot->nodeid); | ||
56 | ro->ro_slot = cpu_to_le16(slot->slot); | ||
57 | ro++; | ||
58 | } | ||
59 | } | ||
60 | |||
61 | #define SLOT_DEBUG_LINE 128 | ||
62 | |||
63 | static void log_debug_slots(struct dlm_ls *ls, uint32_t gen, int num_slots, | ||
64 | struct rcom_slot *ro0, struct dlm_slot *array, | ||
65 | int array_size) | ||
66 | { | ||
67 | char line[SLOT_DEBUG_LINE]; | ||
68 | int len = SLOT_DEBUG_LINE - 1; | ||
69 | int pos = 0; | ||
70 | int ret, i; | ||
71 | |||
72 | if (!dlm_config.ci_log_debug) | ||
73 | return; | ||
74 | |||
75 | memset(line, 0, sizeof(line)); | ||
76 | |||
77 | if (array) { | ||
78 | for (i = 0; i < array_size; i++) { | ||
79 | if (!array[i].nodeid) | ||
80 | continue; | ||
81 | |||
82 | ret = snprintf(line + pos, len - pos, " %d:%d", | ||
83 | array[i].slot, array[i].nodeid); | ||
84 | if (ret >= len - pos) | ||
85 | break; | ||
86 | pos += ret; | ||
87 | } | ||
88 | } else if (ro0) { | ||
89 | for (i = 0; i < num_slots; i++) { | ||
90 | ret = snprintf(line + pos, len - pos, " %d:%d", | ||
91 | ro0[i].ro_slot, ro0[i].ro_nodeid); | ||
92 | if (ret >= len - pos) | ||
93 | break; | ||
94 | pos += ret; | ||
95 | } | ||
96 | } | ||
97 | |||
98 | log_debug(ls, "generation %u slots %d%s", gen, num_slots, line); | ||
99 | } | ||
100 | |||
101 | int dlm_slots_copy_in(struct dlm_ls *ls) | ||
102 | { | ||
103 | struct dlm_member *memb; | ||
104 | struct dlm_rcom *rc = ls->ls_recover_buf; | ||
105 | struct rcom_config *rf = (struct rcom_config *)rc->rc_buf; | ||
106 | struct rcom_slot *ro0, *ro; | ||
107 | int our_nodeid = dlm_our_nodeid(); | ||
108 | int i, num_slots; | ||
109 | uint32_t gen; | ||
110 | |||
111 | if (!dlm_slots_version(&rc->rc_header)) | ||
112 | return -1; | ||
113 | |||
114 | gen = le32_to_cpu(rf->rf_generation); | ||
115 | if (gen <= ls->ls_generation) { | ||
116 | log_error(ls, "dlm_slots_copy_in gen %u old %u", | ||
117 | gen, ls->ls_generation); | ||
118 | } | ||
119 | ls->ls_generation = gen; | ||
120 | |||
121 | num_slots = le16_to_cpu(rf->rf_num_slots); | ||
122 | if (!num_slots) | ||
123 | return -1; | ||
124 | |||
125 | ro0 = (struct rcom_slot *)(rc->rc_buf + sizeof(struct rcom_config)); | ||
126 | |||
127 | for (i = 0, ro = ro0; i < num_slots; i++, ro++) { | ||
128 | ro->ro_nodeid = le32_to_cpu(ro->ro_nodeid); | ||
129 | ro->ro_slot = le16_to_cpu(ro->ro_slot); | ||
130 | } | ||
131 | |||
132 | log_debug_slots(ls, gen, num_slots, ro0, NULL, 0); | ||
133 | |||
134 | list_for_each_entry(memb, &ls->ls_nodes, list) { | ||
135 | for (i = 0, ro = ro0; i < num_slots; i++, ro++) { | ||
136 | if (ro->ro_nodeid != memb->nodeid) | ||
137 | continue; | ||
138 | memb->slot = ro->ro_slot; | ||
139 | memb->slot_prev = memb->slot; | ||
140 | break; | ||
141 | } | ||
142 | |||
143 | if (memb->nodeid == our_nodeid) { | ||
144 | if (ls->ls_slot && ls->ls_slot != memb->slot) { | ||
145 | log_error(ls, "dlm_slots_copy_in our slot " | ||
146 | "changed %d %d", ls->ls_slot, | ||
147 | memb->slot); | ||
148 | return -1; | ||
149 | } | ||
150 | |||
151 | if (!ls->ls_slot) | ||
152 | ls->ls_slot = memb->slot; | ||
153 | } | ||
154 | |||
155 | if (!memb->slot) { | ||
156 | log_error(ls, "dlm_slots_copy_in nodeid %d no slot", | ||
157 | memb->nodeid); | ||
158 | return -1; | ||
159 | } | ||
160 | } | ||
161 | |||
162 | return 0; | ||
163 | } | ||
164 | |||
165 | /* for any nodes that do not support slots, we will not have set memb->slot | ||
166 | in wait_status_all(), so memb->slot will remain -1, and we will not | ||
167 | assign slots or set ls_num_slots here */ | ||
168 | |||
169 | int dlm_slots_assign(struct dlm_ls *ls, int *num_slots, int *slots_size, | ||
170 | struct dlm_slot **slots_out, uint32_t *gen_out) | ||
171 | { | ||
172 | struct dlm_member *memb; | ||
173 | struct dlm_slot *array; | ||
174 | int our_nodeid = dlm_our_nodeid(); | ||
175 | int array_size, max_slots, i; | ||
176 | int need = 0; | ||
177 | int max = 0; | ||
178 | int num = 0; | ||
179 | uint32_t gen = 0; | ||
180 | |||
181 | /* our own memb struct will have slot -1 gen 0 */ | ||
182 | |||
183 | list_for_each_entry(memb, &ls->ls_nodes, list) { | ||
184 | if (memb->nodeid == our_nodeid) { | ||
185 | memb->slot = ls->ls_slot; | ||
186 | memb->generation = ls->ls_generation; | ||
187 | break; | ||
188 | } | ||
189 | } | ||
190 | |||
191 | list_for_each_entry(memb, &ls->ls_nodes, list) { | ||
192 | if (memb->generation > gen) | ||
193 | gen = memb->generation; | ||
194 | |||
195 | /* node doesn't support slots */ | ||
196 | |||
197 | if (memb->slot == -1) | ||
198 | return -1; | ||
199 | |||
200 | /* node needs a slot assigned */ | ||
201 | |||
202 | if (!memb->slot) | ||
203 | need++; | ||
204 | |||
205 | /* node has a slot assigned */ | ||
206 | |||
207 | num++; | ||
208 | |||
209 | if (!max || max < memb->slot) | ||
210 | max = memb->slot; | ||
211 | |||
212 | /* sanity check, once slot is assigned it shouldn't change */ | ||
213 | |||
214 | if (memb->slot_prev && memb->slot && memb->slot_prev != memb->slot) { | ||
215 | log_error(ls, "nodeid %d slot changed %d %d", | ||
216 | memb->nodeid, memb->slot_prev, memb->slot); | ||
217 | return -1; | ||
218 | } | ||
219 | memb->slot_prev = memb->slot; | ||
220 | } | ||
221 | |||
222 | array_size = max + need; | ||
223 | |||
224 | array = kzalloc(array_size * sizeof(struct dlm_slot), GFP_NOFS); | ||
225 | if (!array) | ||
226 | return -ENOMEM; | ||
227 | |||
228 | num = 0; | ||
229 | |||
230 | /* fill in slots (offsets) that are used */ | ||
231 | |||
232 | list_for_each_entry(memb, &ls->ls_nodes, list) { | ||
233 | if (!memb->slot) | ||
234 | continue; | ||
235 | |||
236 | if (memb->slot > array_size) { | ||
237 | log_error(ls, "invalid slot number %d", memb->slot); | ||
238 | kfree(array); | ||
239 | return -1; | ||
240 | } | ||
241 | |||
242 | array[memb->slot - 1].nodeid = memb->nodeid; | ||
243 | array[memb->slot - 1].slot = memb->slot; | ||
244 | num++; | ||
245 | } | ||
246 | |||
247 | /* assign new slots from unused offsets */ | ||
248 | |||
249 | list_for_each_entry(memb, &ls->ls_nodes, list) { | ||
250 | if (memb->slot) | ||
251 | continue; | ||
252 | |||
253 | for (i = 0; i < array_size; i++) { | ||
254 | if (array[i].nodeid) | ||
255 | continue; | ||
256 | |||
257 | memb->slot = i + 1; | ||
258 | memb->slot_prev = memb->slot; | ||
259 | array[i].nodeid = memb->nodeid; | ||
260 | array[i].slot = memb->slot; | ||
261 | num++; | ||
262 | |||
263 | if (!ls->ls_slot && memb->nodeid == our_nodeid) | ||
264 | ls->ls_slot = memb->slot; | ||
265 | break; | ||
266 | } | ||
267 | |||
268 | if (!memb->slot) { | ||
269 | log_error(ls, "no free slot found"); | ||
270 | kfree(array); | ||
271 | return -1; | ||
272 | } | ||
273 | } | ||
274 | |||
275 | gen++; | ||
276 | |||
277 | log_debug_slots(ls, gen, num, NULL, array, array_size); | ||
278 | |||
279 | max_slots = (dlm_config.ci_buffer_size - sizeof(struct dlm_rcom) - | ||
280 | sizeof(struct rcom_config)) / sizeof(struct rcom_slot); | ||
281 | |||
282 | if (num > max_slots) { | ||
283 | log_error(ls, "num_slots %d exceeds max_slots %d", | ||
284 | num, max_slots); | ||
285 | kfree(array); | ||
286 | return -1; | ||
287 | } | ||
288 | |||
289 | *gen_out = gen; | ||
290 | *slots_out = array; | ||
291 | *slots_size = array_size; | ||
292 | *num_slots = num; | ||
293 | return 0; | ||
294 | } | ||
295 | |||
22 | static void add_ordered_member(struct dlm_ls *ls, struct dlm_member *new) | 296 | static void add_ordered_member(struct dlm_ls *ls, struct dlm_member *new) |
23 | { | 297 | { |
24 | struct dlm_member *memb = NULL; | 298 | struct dlm_member *memb = NULL; |
@@ -43,59 +317,51 @@ static void add_ordered_member(struct dlm_ls *ls, struct dlm_member *new) | |||
43 | } | 317 | } |
44 | } | 318 | } |
45 | 319 | ||
46 | static int dlm_add_member(struct dlm_ls *ls, int nodeid) | 320 | static int dlm_add_member(struct dlm_ls *ls, struct dlm_config_node *node) |
47 | { | 321 | { |
48 | struct dlm_member *memb; | 322 | struct dlm_member *memb; |
49 | int w, error; | 323 | int error; |
50 | 324 | ||
51 | memb = kzalloc(sizeof(struct dlm_member), GFP_NOFS); | 325 | memb = kzalloc(sizeof(struct dlm_member), GFP_NOFS); |
52 | if (!memb) | 326 | if (!memb) |
53 | return -ENOMEM; | 327 | return -ENOMEM; |
54 | 328 | ||
55 | w = dlm_node_weight(ls->ls_name, nodeid); | 329 | error = dlm_lowcomms_connect_node(node->nodeid); |
56 | if (w < 0) { | ||
57 | kfree(memb); | ||
58 | return w; | ||
59 | } | ||
60 | |||
61 | error = dlm_lowcomms_connect_node(nodeid); | ||
62 | if (error < 0) { | 330 | if (error < 0) { |
63 | kfree(memb); | 331 | kfree(memb); |
64 | return error; | 332 | return error; |
65 | } | 333 | } |
66 | 334 | ||
67 | memb->nodeid = nodeid; | 335 | memb->nodeid = node->nodeid; |
68 | memb->weight = w; | 336 | memb->weight = node->weight; |
337 | memb->comm_seq = node->comm_seq; | ||
69 | add_ordered_member(ls, memb); | 338 | add_ordered_member(ls, memb); |
70 | ls->ls_num_nodes++; | 339 | ls->ls_num_nodes++; |
71 | return 0; | 340 | return 0; |
72 | } | 341 | } |
73 | 342 | ||
74 | static void dlm_remove_member(struct dlm_ls *ls, struct dlm_member *memb) | 343 | static struct dlm_member *find_memb(struct list_head *head, int nodeid) |
75 | { | ||
76 | list_move(&memb->list, &ls->ls_nodes_gone); | ||
77 | ls->ls_num_nodes--; | ||
78 | } | ||
79 | |||
80 | int dlm_is_member(struct dlm_ls *ls, int nodeid) | ||
81 | { | 344 | { |
82 | struct dlm_member *memb; | 345 | struct dlm_member *memb; |
83 | 346 | ||
84 | list_for_each_entry(memb, &ls->ls_nodes, list) { | 347 | list_for_each_entry(memb, head, list) { |
85 | if (memb->nodeid == nodeid) | 348 | if (memb->nodeid == nodeid) |
86 | return 1; | 349 | return memb; |
87 | } | 350 | } |
351 | return NULL; | ||
352 | } | ||
353 | |||
354 | int dlm_is_member(struct dlm_ls *ls, int nodeid) | ||
355 | { | ||
356 | if (find_memb(&ls->ls_nodes, nodeid)) | ||
357 | return 1; | ||
88 | return 0; | 358 | return 0; |
89 | } | 359 | } |
90 | 360 | ||
91 | int dlm_is_removed(struct dlm_ls *ls, int nodeid) | 361 | int dlm_is_removed(struct dlm_ls *ls, int nodeid) |
92 | { | 362 | { |
93 | struct dlm_member *memb; | 363 | if (find_memb(&ls->ls_nodes_gone, nodeid)) |
94 | 364 | return 1; | |
95 | list_for_each_entry(memb, &ls->ls_nodes_gone, list) { | ||
96 | if (memb->nodeid == nodeid) | ||
97 | return 1; | ||
98 | } | ||
99 | return 0; | 365 | return 0; |
100 | } | 366 | } |
101 | 367 | ||
@@ -176,7 +442,7 @@ static int ping_members(struct dlm_ls *ls) | |||
176 | error = dlm_recovery_stopped(ls); | 442 | error = dlm_recovery_stopped(ls); |
177 | if (error) | 443 | if (error) |
178 | break; | 444 | break; |
179 | error = dlm_rcom_status(ls, memb->nodeid); | 445 | error = dlm_rcom_status(ls, memb->nodeid, 0); |
180 | if (error) | 446 | if (error) |
181 | break; | 447 | break; |
182 | } | 448 | } |
@@ -186,10 +452,88 @@ static int ping_members(struct dlm_ls *ls) | |||
186 | return error; | 452 | return error; |
187 | } | 453 | } |
188 | 454 | ||
455 | static void dlm_lsop_recover_prep(struct dlm_ls *ls) | ||
456 | { | ||
457 | if (!ls->ls_ops || !ls->ls_ops->recover_prep) | ||
458 | return; | ||
459 | ls->ls_ops->recover_prep(ls->ls_ops_arg); | ||
460 | } | ||
461 | |||
462 | static void dlm_lsop_recover_slot(struct dlm_ls *ls, struct dlm_member *memb) | ||
463 | { | ||
464 | struct dlm_slot slot; | ||
465 | uint32_t seq; | ||
466 | int error; | ||
467 | |||
468 | if (!ls->ls_ops || !ls->ls_ops->recover_slot) | ||
469 | return; | ||
470 | |||
471 | /* if there is no comms connection with this node | ||
472 | or the present comms connection is newer | ||
473 | than the one when this member was added, then | ||
474 | we consider the node to have failed (versus | ||
475 | being removed due to dlm_release_lockspace) */ | ||
476 | |||
477 | error = dlm_comm_seq(memb->nodeid, &seq); | ||
478 | |||
479 | if (!error && seq == memb->comm_seq) | ||
480 | return; | ||
481 | |||
482 | slot.nodeid = memb->nodeid; | ||
483 | slot.slot = memb->slot; | ||
484 | |||
485 | ls->ls_ops->recover_slot(ls->ls_ops_arg, &slot); | ||
486 | } | ||
487 | |||
488 | void dlm_lsop_recover_done(struct dlm_ls *ls) | ||
489 | { | ||
490 | struct dlm_member *memb; | ||
491 | struct dlm_slot *slots; | ||
492 | int i, num; | ||
493 | |||
494 | if (!ls->ls_ops || !ls->ls_ops->recover_done) | ||
495 | return; | ||
496 | |||
497 | num = ls->ls_num_nodes; | ||
498 | |||
499 | slots = kzalloc(num * sizeof(struct dlm_slot), GFP_KERNEL); | ||
500 | if (!slots) | ||
501 | return; | ||
502 | |||
503 | i = 0; | ||
504 | list_for_each_entry(memb, &ls->ls_nodes, list) { | ||
505 | if (i == num) { | ||
506 | log_error(ls, "dlm_lsop_recover_done bad num %d", num); | ||
507 | goto out; | ||
508 | } | ||
509 | slots[i].nodeid = memb->nodeid; | ||
510 | slots[i].slot = memb->slot; | ||
511 | i++; | ||
512 | } | ||
513 | |||
514 | ls->ls_ops->recover_done(ls->ls_ops_arg, slots, num, | ||
515 | ls->ls_slot, ls->ls_generation); | ||
516 | out: | ||
517 | kfree(slots); | ||
518 | } | ||
519 | |||
520 | static struct dlm_config_node *find_config_node(struct dlm_recover *rv, | ||
521 | int nodeid) | ||
522 | { | ||
523 | int i; | ||
524 | |||
525 | for (i = 0; i < rv->nodes_count; i++) { | ||
526 | if (rv->nodes[i].nodeid == nodeid) | ||
527 | return &rv->nodes[i]; | ||
528 | } | ||
529 | return NULL; | ||
530 | } | ||
531 | |||
189 | int dlm_recover_members(struct dlm_ls *ls, struct dlm_recover *rv, int *neg_out) | 532 | int dlm_recover_members(struct dlm_ls *ls, struct dlm_recover *rv, int *neg_out) |
190 | { | 533 | { |
191 | struct dlm_member *memb, *safe; | 534 | struct dlm_member *memb, *safe; |
192 | int i, error, found, pos = 0, neg = 0, low = -1; | 535 | struct dlm_config_node *node; |
536 | int i, error, neg = 0, low = -1; | ||
193 | 537 | ||
194 | /* previously removed members that we've not finished removing need to | 538 | /* previously removed members that we've not finished removing need to |
195 | count as a negative change so the "neg" recovery steps will happen */ | 539 | count as a negative change so the "neg" recovery steps will happen */ |
@@ -202,46 +546,32 @@ int dlm_recover_members(struct dlm_ls *ls, struct dlm_recover *rv, int *neg_out) | |||
202 | /* move departed members from ls_nodes to ls_nodes_gone */ | 546 | /* move departed members from ls_nodes to ls_nodes_gone */ |
203 | 547 | ||
204 | list_for_each_entry_safe(memb, safe, &ls->ls_nodes, list) { | 548 | list_for_each_entry_safe(memb, safe, &ls->ls_nodes, list) { |
205 | found = 0; | 549 | node = find_config_node(rv, memb->nodeid); |
206 | for (i = 0; i < rv->node_count; i++) { | 550 | if (node && !node->new) |
207 | if (memb->nodeid == rv->nodeids[i]) { | 551 | continue; |
208 | found = 1; | ||
209 | break; | ||
210 | } | ||
211 | } | ||
212 | 552 | ||
213 | if (!found) { | 553 | if (!node) { |
214 | neg++; | ||
215 | dlm_remove_member(ls, memb); | ||
216 | log_debug(ls, "remove member %d", memb->nodeid); | 554 | log_debug(ls, "remove member %d", memb->nodeid); |
555 | } else { | ||
556 | /* removed and re-added */ | ||
557 | log_debug(ls, "remove member %d comm_seq %u %u", | ||
558 | memb->nodeid, memb->comm_seq, node->comm_seq); | ||
217 | } | 559 | } |
218 | } | ||
219 | |||
220 | /* Add an entry to ls_nodes_gone for members that were removed and | ||
221 | then added again, so that previous state for these nodes will be | ||
222 | cleared during recovery. */ | ||
223 | |||
224 | for (i = 0; i < rv->new_count; i++) { | ||
225 | if (!dlm_is_member(ls, rv->new[i])) | ||
226 | continue; | ||
227 | log_debug(ls, "new nodeid %d is a re-added member", rv->new[i]); | ||
228 | 560 | ||
229 | memb = kzalloc(sizeof(struct dlm_member), GFP_NOFS); | ||
230 | if (!memb) | ||
231 | return -ENOMEM; | ||
232 | memb->nodeid = rv->new[i]; | ||
233 | list_add_tail(&memb->list, &ls->ls_nodes_gone); | ||
234 | neg++; | 561 | neg++; |
562 | list_move(&memb->list, &ls->ls_nodes_gone); | ||
563 | ls->ls_num_nodes--; | ||
564 | dlm_lsop_recover_slot(ls, memb); | ||
235 | } | 565 | } |
236 | 566 | ||
237 | /* add new members to ls_nodes */ | 567 | /* add new members to ls_nodes */ |
238 | 568 | ||
239 | for (i = 0; i < rv->node_count; i++) { | 569 | for (i = 0; i < rv->nodes_count; i++) { |
240 | if (dlm_is_member(ls, rv->nodeids[i])) | 570 | node = &rv->nodes[i]; |
571 | if (dlm_is_member(ls, node->nodeid)) | ||
241 | continue; | 572 | continue; |
242 | dlm_add_member(ls, rv->nodeids[i]); | 573 | dlm_add_member(ls, node); |
243 | pos++; | 574 | log_debug(ls, "add member %d", node->nodeid); |
244 | log_debug(ls, "add member %d", rv->nodeids[i]); | ||
245 | } | 575 | } |
246 | 576 | ||
247 | list_for_each_entry(memb, &ls->ls_nodes, list) { | 577 | list_for_each_entry(memb, &ls->ls_nodes, list) { |
@@ -251,7 +581,6 @@ int dlm_recover_members(struct dlm_ls *ls, struct dlm_recover *rv, int *neg_out) | |||
251 | ls->ls_low_nodeid = low; | 581 | ls->ls_low_nodeid = low; |
252 | 582 | ||
253 | make_member_array(ls); | 583 | make_member_array(ls); |
254 | dlm_set_recover_status(ls, DLM_RS_NODES); | ||
255 | *neg_out = neg; | 584 | *neg_out = neg; |
256 | 585 | ||
257 | error = ping_members(ls); | 586 | error = ping_members(ls); |
@@ -261,12 +590,8 @@ int dlm_recover_members(struct dlm_ls *ls, struct dlm_recover *rv, int *neg_out) | |||
261 | ls->ls_members_result = error; | 590 | ls->ls_members_result = error; |
262 | complete(&ls->ls_members_done); | 591 | complete(&ls->ls_members_done); |
263 | } | 592 | } |
264 | if (error) | ||
265 | goto out; | ||
266 | 593 | ||
267 | error = dlm_recover_members_wait(ls); | 594 | log_debug(ls, "dlm_recover_members %d nodes", ls->ls_num_nodes); |
268 | out: | ||
269 | log_debug(ls, "total members %d error %d", ls->ls_num_nodes, error); | ||
270 | return error; | 595 | return error; |
271 | } | 596 | } |
272 | 597 | ||
@@ -327,26 +652,35 @@ int dlm_ls_stop(struct dlm_ls *ls) | |||
327 | */ | 652 | */ |
328 | 653 | ||
329 | dlm_recoverd_suspend(ls); | 654 | dlm_recoverd_suspend(ls); |
655 | |||
656 | spin_lock(&ls->ls_recover_lock); | ||
657 | kfree(ls->ls_slots); | ||
658 | ls->ls_slots = NULL; | ||
659 | ls->ls_num_slots = 0; | ||
660 | ls->ls_slots_size = 0; | ||
330 | ls->ls_recover_status = 0; | 661 | ls->ls_recover_status = 0; |
662 | spin_unlock(&ls->ls_recover_lock); | ||
663 | |||
331 | dlm_recoverd_resume(ls); | 664 | dlm_recoverd_resume(ls); |
332 | 665 | ||
333 | if (!ls->ls_recover_begin) | 666 | if (!ls->ls_recover_begin) |
334 | ls->ls_recover_begin = jiffies; | 667 | ls->ls_recover_begin = jiffies; |
668 | |||
669 | dlm_lsop_recover_prep(ls); | ||
335 | return 0; | 670 | return 0; |
336 | } | 671 | } |
337 | 672 | ||
338 | int dlm_ls_start(struct dlm_ls *ls) | 673 | int dlm_ls_start(struct dlm_ls *ls) |
339 | { | 674 | { |
340 | struct dlm_recover *rv = NULL, *rv_old; | 675 | struct dlm_recover *rv = NULL, *rv_old; |
341 | int *ids = NULL, *new = NULL; | 676 | struct dlm_config_node *nodes; |
342 | int error, ids_count = 0, new_count = 0; | 677 | int error, count; |
343 | 678 | ||
344 | rv = kzalloc(sizeof(struct dlm_recover), GFP_NOFS); | 679 | rv = kzalloc(sizeof(struct dlm_recover), GFP_NOFS); |
345 | if (!rv) | 680 | if (!rv) |
346 | return -ENOMEM; | 681 | return -ENOMEM; |
347 | 682 | ||
348 | error = dlm_nodeid_list(ls->ls_name, &ids, &ids_count, | 683 | error = dlm_config_nodes(ls->ls_name, &nodes, &count); |
349 | &new, &new_count); | ||
350 | if (error < 0) | 684 | if (error < 0) |
351 | goto fail; | 685 | goto fail; |
352 | 686 | ||
@@ -361,10 +695,8 @@ int dlm_ls_start(struct dlm_ls *ls) | |||
361 | goto fail; | 695 | goto fail; |
362 | } | 696 | } |
363 | 697 | ||
364 | rv->nodeids = ids; | 698 | rv->nodes = nodes; |
365 | rv->node_count = ids_count; | 699 | rv->nodes_count = count; |
366 | rv->new = new; | ||
367 | rv->new_count = new_count; | ||
368 | rv->seq = ++ls->ls_recover_seq; | 700 | rv->seq = ++ls->ls_recover_seq; |
369 | rv_old = ls->ls_recover_args; | 701 | rv_old = ls->ls_recover_args; |
370 | ls->ls_recover_args = rv; | 702 | ls->ls_recover_args = rv; |
@@ -372,9 +704,8 @@ int dlm_ls_start(struct dlm_ls *ls) | |||
372 | 704 | ||
373 | if (rv_old) { | 705 | if (rv_old) { |
374 | log_error(ls, "unused recovery %llx %d", | 706 | log_error(ls, "unused recovery %llx %d", |
375 | (unsigned long long)rv_old->seq, rv_old->node_count); | 707 | (unsigned long long)rv_old->seq, rv_old->nodes_count); |
376 | kfree(rv_old->nodeids); | 708 | kfree(rv_old->nodes); |
377 | kfree(rv_old->new); | ||
378 | kfree(rv_old); | 709 | kfree(rv_old); |
379 | } | 710 | } |
380 | 711 | ||
@@ -383,8 +714,7 @@ int dlm_ls_start(struct dlm_ls *ls) | |||
383 | 714 | ||
384 | fail: | 715 | fail: |
385 | kfree(rv); | 716 | kfree(rv); |
386 | kfree(ids); | 717 | kfree(nodes); |
387 | kfree(new); | ||
388 | return error; | 718 | return error; |
389 | } | 719 | } |
390 | 720 | ||
diff --git a/fs/dlm/member.h b/fs/dlm/member.h index 7a26fca1e0b5..3deb70661c69 100644 --- a/fs/dlm/member.h +++ b/fs/dlm/member.h | |||
@@ -1,7 +1,7 @@ | |||
1 | /****************************************************************************** | 1 | /****************************************************************************** |
2 | ******************************************************************************* | 2 | ******************************************************************************* |
3 | ** | 3 | ** |
4 | ** Copyright (C) 2005-2008 Red Hat, Inc. All rights reserved. | 4 | ** Copyright (C) 2005-2011 Red Hat, Inc. All rights reserved. |
5 | ** | 5 | ** |
6 | ** This copyrighted material is made available to anyone wishing to use, | 6 | ** This copyrighted material is made available to anyone wishing to use, |
7 | ** modify, copy, or redistribute it subject to the terms and conditions | 7 | ** modify, copy, or redistribute it subject to the terms and conditions |
@@ -20,6 +20,14 @@ void dlm_clear_members_gone(struct dlm_ls *ls); | |||
20 | int dlm_recover_members(struct dlm_ls *ls, struct dlm_recover *rv,int *neg_out); | 20 | int dlm_recover_members(struct dlm_ls *ls, struct dlm_recover *rv,int *neg_out); |
21 | int dlm_is_removed(struct dlm_ls *ls, int nodeid); | 21 | int dlm_is_removed(struct dlm_ls *ls, int nodeid); |
22 | int dlm_is_member(struct dlm_ls *ls, int nodeid); | 22 | int dlm_is_member(struct dlm_ls *ls, int nodeid); |
23 | int dlm_slots_version(struct dlm_header *h); | ||
24 | void dlm_slot_save(struct dlm_ls *ls, struct dlm_rcom *rc, | ||
25 | struct dlm_member *memb); | ||
26 | void dlm_slots_copy_out(struct dlm_ls *ls, struct dlm_rcom *rc); | ||
27 | int dlm_slots_copy_in(struct dlm_ls *ls); | ||
28 | int dlm_slots_assign(struct dlm_ls *ls, int *num_slots, int *slots_size, | ||
29 | struct dlm_slot **slots_out, uint32_t *gen_out); | ||
30 | void dlm_lsop_recover_done(struct dlm_ls *ls); | ||
23 | 31 | ||
24 | #endif /* __MEMBER_DOT_H__ */ | 32 | #endif /* __MEMBER_DOT_H__ */ |
25 | 33 | ||
diff --git a/fs/dlm/rcom.c b/fs/dlm/rcom.c index f10a50f24e8f..ac5c616c9696 100644 --- a/fs/dlm/rcom.c +++ b/fs/dlm/rcom.c | |||
@@ -23,6 +23,7 @@ | |||
23 | #include "memory.h" | 23 | #include "memory.h" |
24 | #include "lock.h" | 24 | #include "lock.h" |
25 | #include "util.h" | 25 | #include "util.h" |
26 | #include "member.h" | ||
26 | 27 | ||
27 | 28 | ||
28 | static int rcom_response(struct dlm_ls *ls) | 29 | static int rcom_response(struct dlm_ls *ls) |
@@ -72,20 +73,30 @@ static void send_rcom(struct dlm_ls *ls, struct dlm_mhandle *mh, | |||
72 | dlm_lowcomms_commit_buffer(mh); | 73 | dlm_lowcomms_commit_buffer(mh); |
73 | } | 74 | } |
74 | 75 | ||
76 | static void set_rcom_status(struct dlm_ls *ls, struct rcom_status *rs, | ||
77 | uint32_t flags) | ||
78 | { | ||
79 | rs->rs_flags = cpu_to_le32(flags); | ||
80 | } | ||
81 | |||
75 | /* When replying to a status request, a node also sends back its | 82 | /* When replying to a status request, a node also sends back its |
76 | configuration values. The requesting node then checks that the remote | 83 | configuration values. The requesting node then checks that the remote |
77 | node is configured the same way as itself. */ | 84 | node is configured the same way as itself. */ |
78 | 85 | ||
79 | static void make_config(struct dlm_ls *ls, struct rcom_config *rf) | 86 | static void set_rcom_config(struct dlm_ls *ls, struct rcom_config *rf, |
87 | uint32_t num_slots) | ||
80 | { | 88 | { |
81 | rf->rf_lvblen = cpu_to_le32(ls->ls_lvblen); | 89 | rf->rf_lvblen = cpu_to_le32(ls->ls_lvblen); |
82 | rf->rf_lsflags = cpu_to_le32(ls->ls_exflags); | 90 | rf->rf_lsflags = cpu_to_le32(ls->ls_exflags); |
91 | |||
92 | rf->rf_our_slot = cpu_to_le16(ls->ls_slot); | ||
93 | rf->rf_num_slots = cpu_to_le16(num_slots); | ||
94 | rf->rf_generation = cpu_to_le32(ls->ls_generation); | ||
83 | } | 95 | } |
84 | 96 | ||
85 | static int check_config(struct dlm_ls *ls, struct dlm_rcom *rc, int nodeid) | 97 | static int check_rcom_config(struct dlm_ls *ls, struct dlm_rcom *rc, int nodeid) |
86 | { | 98 | { |
87 | struct rcom_config *rf = (struct rcom_config *) rc->rc_buf; | 99 | struct rcom_config *rf = (struct rcom_config *) rc->rc_buf; |
88 | size_t conf_size = sizeof(struct dlm_rcom) + sizeof(struct rcom_config); | ||
89 | 100 | ||
90 | if ((rc->rc_header.h_version & 0xFFFF0000) != DLM_HEADER_MAJOR) { | 101 | if ((rc->rc_header.h_version & 0xFFFF0000) != DLM_HEADER_MAJOR) { |
91 | log_error(ls, "version mismatch: %x nodeid %d: %x", | 102 | log_error(ls, "version mismatch: %x nodeid %d: %x", |
@@ -94,12 +105,6 @@ static int check_config(struct dlm_ls *ls, struct dlm_rcom *rc, int nodeid) | |||
94 | return -EPROTO; | 105 | return -EPROTO; |
95 | } | 106 | } |
96 | 107 | ||
97 | if (rc->rc_header.h_length < conf_size) { | ||
98 | log_error(ls, "config too short: %d nodeid %d", | ||
99 | rc->rc_header.h_length, nodeid); | ||
100 | return -EPROTO; | ||
101 | } | ||
102 | |||
103 | if (le32_to_cpu(rf->rf_lvblen) != ls->ls_lvblen || | 108 | if (le32_to_cpu(rf->rf_lvblen) != ls->ls_lvblen || |
104 | le32_to_cpu(rf->rf_lsflags) != ls->ls_exflags) { | 109 | le32_to_cpu(rf->rf_lsflags) != ls->ls_exflags) { |
105 | log_error(ls, "config mismatch: %d,%x nodeid %d: %d,%x", | 110 | log_error(ls, "config mismatch: %d,%x nodeid %d: %d,%x", |
@@ -127,7 +132,18 @@ static void disallow_sync_reply(struct dlm_ls *ls) | |||
127 | spin_unlock(&ls->ls_rcom_spin); | 132 | spin_unlock(&ls->ls_rcom_spin); |
128 | } | 133 | } |
129 | 134 | ||
130 | int dlm_rcom_status(struct dlm_ls *ls, int nodeid) | 135 | /* |
136 | * low nodeid gathers one slot value at a time from each node. | ||
137 | * it sets need_slots=0, and saves rf_our_slot returned from each | ||
138 | * rcom_config. | ||
139 | * | ||
140 | * other nodes gather all slot values at once from the low nodeid. | ||
141 | * they set need_slots=1, and ignore the rf_our_slot returned from each | ||
142 | * rcom_config. they use the rf_num_slots returned from the low | ||
143 | * node's rcom_config. | ||
144 | */ | ||
145 | |||
146 | int dlm_rcom_status(struct dlm_ls *ls, int nodeid, uint32_t status_flags) | ||
131 | { | 147 | { |
132 | struct dlm_rcom *rc; | 148 | struct dlm_rcom *rc; |
133 | struct dlm_mhandle *mh; | 149 | struct dlm_mhandle *mh; |
@@ -141,10 +157,13 @@ int dlm_rcom_status(struct dlm_ls *ls, int nodeid) | |||
141 | goto out; | 157 | goto out; |
142 | } | 158 | } |
143 | 159 | ||
144 | error = create_rcom(ls, nodeid, DLM_RCOM_STATUS, 0, &rc, &mh); | 160 | error = create_rcom(ls, nodeid, DLM_RCOM_STATUS, |
161 | sizeof(struct rcom_status), &rc, &mh); | ||
145 | if (error) | 162 | if (error) |
146 | goto out; | 163 | goto out; |
147 | 164 | ||
165 | set_rcom_status(ls, (struct rcom_status *)rc->rc_buf, status_flags); | ||
166 | |||
148 | allow_sync_reply(ls, &rc->rc_id); | 167 | allow_sync_reply(ls, &rc->rc_id); |
149 | memset(ls->ls_recover_buf, 0, dlm_config.ci_buffer_size); | 168 | memset(ls->ls_recover_buf, 0, dlm_config.ci_buffer_size); |
150 | 169 | ||
@@ -161,8 +180,11 @@ int dlm_rcom_status(struct dlm_ls *ls, int nodeid) | |||
161 | /* we pretend the remote lockspace exists with 0 status */ | 180 | /* we pretend the remote lockspace exists with 0 status */ |
162 | log_debug(ls, "remote node %d not ready", nodeid); | 181 | log_debug(ls, "remote node %d not ready", nodeid); |
163 | rc->rc_result = 0; | 182 | rc->rc_result = 0; |
164 | } else | 183 | error = 0; |
165 | error = check_config(ls, rc, nodeid); | 184 | } else { |
185 | error = check_rcom_config(ls, rc, nodeid); | ||
186 | } | ||
187 | |||
166 | /* the caller looks at rc_result for the remote recovery status */ | 188 | /* the caller looks at rc_result for the remote recovery status */ |
167 | out: | 189 | out: |
168 | return error; | 190 | return error; |
@@ -172,17 +194,60 @@ static void receive_rcom_status(struct dlm_ls *ls, struct dlm_rcom *rc_in) | |||
172 | { | 194 | { |
173 | struct dlm_rcom *rc; | 195 | struct dlm_rcom *rc; |
174 | struct dlm_mhandle *mh; | 196 | struct dlm_mhandle *mh; |
175 | int error, nodeid = rc_in->rc_header.h_nodeid; | 197 | struct rcom_status *rs; |
198 | uint32_t status; | ||
199 | int nodeid = rc_in->rc_header.h_nodeid; | ||
200 | int len = sizeof(struct rcom_config); | ||
201 | int num_slots = 0; | ||
202 | int error; | ||
203 | |||
204 | if (!dlm_slots_version(&rc_in->rc_header)) { | ||
205 | status = dlm_recover_status(ls); | ||
206 | goto do_create; | ||
207 | } | ||
208 | |||
209 | rs = (struct rcom_status *)rc_in->rc_buf; | ||
176 | 210 | ||
211 | if (!(rs->rs_flags & DLM_RSF_NEED_SLOTS)) { | ||
212 | status = dlm_recover_status(ls); | ||
213 | goto do_create; | ||
214 | } | ||
215 | |||
216 | spin_lock(&ls->ls_recover_lock); | ||
217 | status = ls->ls_recover_status; | ||
218 | num_slots = ls->ls_num_slots; | ||
219 | spin_unlock(&ls->ls_recover_lock); | ||
220 | len += num_slots * sizeof(struct rcom_slot); | ||
221 | |||
222 | do_create: | ||
177 | error = create_rcom(ls, nodeid, DLM_RCOM_STATUS_REPLY, | 223 | error = create_rcom(ls, nodeid, DLM_RCOM_STATUS_REPLY, |
178 | sizeof(struct rcom_config), &rc, &mh); | 224 | len, &rc, &mh); |
179 | if (error) | 225 | if (error) |
180 | return; | 226 | return; |
227 | |||
181 | rc->rc_id = rc_in->rc_id; | 228 | rc->rc_id = rc_in->rc_id; |
182 | rc->rc_seq_reply = rc_in->rc_seq; | 229 | rc->rc_seq_reply = rc_in->rc_seq; |
183 | rc->rc_result = dlm_recover_status(ls); | 230 | rc->rc_result = status; |
184 | make_config(ls, (struct rcom_config *) rc->rc_buf); | 231 | |
232 | set_rcom_config(ls, (struct rcom_config *)rc->rc_buf, num_slots); | ||
233 | |||
234 | if (!num_slots) | ||
235 | goto do_send; | ||
236 | |||
237 | spin_lock(&ls->ls_recover_lock); | ||
238 | if (ls->ls_num_slots != num_slots) { | ||
239 | spin_unlock(&ls->ls_recover_lock); | ||
240 | log_debug(ls, "receive_rcom_status num_slots %d to %d", | ||
241 | num_slots, ls->ls_num_slots); | ||
242 | rc->rc_result = 0; | ||
243 | set_rcom_config(ls, (struct rcom_config *)rc->rc_buf, 0); | ||
244 | goto do_send; | ||
245 | } | ||
246 | |||
247 | dlm_slots_copy_out(ls, rc); | ||
248 | spin_unlock(&ls->ls_recover_lock); | ||
185 | 249 | ||
250 | do_send: | ||
186 | send_rcom(ls, mh, rc); | 251 | send_rcom(ls, mh, rc); |
187 | } | 252 | } |
188 | 253 | ||
diff --git a/fs/dlm/rcom.h b/fs/dlm/rcom.h index b09abd29ba38..206723ab744d 100644 --- a/fs/dlm/rcom.h +++ b/fs/dlm/rcom.h | |||
@@ -14,7 +14,7 @@ | |||
14 | #ifndef __RCOM_DOT_H__ | 14 | #ifndef __RCOM_DOT_H__ |
15 | #define __RCOM_DOT_H__ | 15 | #define __RCOM_DOT_H__ |
16 | 16 | ||
17 | int dlm_rcom_status(struct dlm_ls *ls, int nodeid); | 17 | int dlm_rcom_status(struct dlm_ls *ls, int nodeid, uint32_t status_flags); |
18 | int dlm_rcom_names(struct dlm_ls *ls, int nodeid, char *last_name,int last_len); | 18 | int dlm_rcom_names(struct dlm_ls *ls, int nodeid, char *last_name,int last_len); |
19 | int dlm_send_rcom_lookup(struct dlm_rsb *r, int dir_nodeid); | 19 | int dlm_send_rcom_lookup(struct dlm_rsb *r, int dir_nodeid); |
20 | int dlm_send_rcom_lock(struct dlm_rsb *r, struct dlm_lkb *lkb); | 20 | int dlm_send_rcom_lock(struct dlm_rsb *r, struct dlm_lkb *lkb); |
diff --git a/fs/dlm/recover.c b/fs/dlm/recover.c index 14638235f7b2..34d5adf1fce7 100644 --- a/fs/dlm/recover.c +++ b/fs/dlm/recover.c | |||
@@ -85,14 +85,20 @@ uint32_t dlm_recover_status(struct dlm_ls *ls) | |||
85 | return status; | 85 | return status; |
86 | } | 86 | } |
87 | 87 | ||
88 | static void _set_recover_status(struct dlm_ls *ls, uint32_t status) | ||
89 | { | ||
90 | ls->ls_recover_status |= status; | ||
91 | } | ||
92 | |||
88 | void dlm_set_recover_status(struct dlm_ls *ls, uint32_t status) | 93 | void dlm_set_recover_status(struct dlm_ls *ls, uint32_t status) |
89 | { | 94 | { |
90 | spin_lock(&ls->ls_recover_lock); | 95 | spin_lock(&ls->ls_recover_lock); |
91 | ls->ls_recover_status |= status; | 96 | _set_recover_status(ls, status); |
92 | spin_unlock(&ls->ls_recover_lock); | 97 | spin_unlock(&ls->ls_recover_lock); |
93 | } | 98 | } |
94 | 99 | ||
95 | static int wait_status_all(struct dlm_ls *ls, uint32_t wait_status) | 100 | static int wait_status_all(struct dlm_ls *ls, uint32_t wait_status, |
101 | int save_slots) | ||
96 | { | 102 | { |
97 | struct dlm_rcom *rc = ls->ls_recover_buf; | 103 | struct dlm_rcom *rc = ls->ls_recover_buf; |
98 | struct dlm_member *memb; | 104 | struct dlm_member *memb; |
@@ -106,10 +112,13 @@ static int wait_status_all(struct dlm_ls *ls, uint32_t wait_status) | |||
106 | goto out; | 112 | goto out; |
107 | } | 113 | } |
108 | 114 | ||
109 | error = dlm_rcom_status(ls, memb->nodeid); | 115 | error = dlm_rcom_status(ls, memb->nodeid, 0); |
110 | if (error) | 116 | if (error) |
111 | goto out; | 117 | goto out; |
112 | 118 | ||
119 | if (save_slots) | ||
120 | dlm_slot_save(ls, rc, memb); | ||
121 | |||
113 | if (rc->rc_result & wait_status) | 122 | if (rc->rc_result & wait_status) |
114 | break; | 123 | break; |
115 | if (delay < 1000) | 124 | if (delay < 1000) |
@@ -121,7 +130,8 @@ static int wait_status_all(struct dlm_ls *ls, uint32_t wait_status) | |||
121 | return error; | 130 | return error; |
122 | } | 131 | } |
123 | 132 | ||
124 | static int wait_status_low(struct dlm_ls *ls, uint32_t wait_status) | 133 | static int wait_status_low(struct dlm_ls *ls, uint32_t wait_status, |
134 | uint32_t status_flags) | ||
125 | { | 135 | { |
126 | struct dlm_rcom *rc = ls->ls_recover_buf; | 136 | struct dlm_rcom *rc = ls->ls_recover_buf; |
127 | int error = 0, delay = 0, nodeid = ls->ls_low_nodeid; | 137 | int error = 0, delay = 0, nodeid = ls->ls_low_nodeid; |
@@ -132,7 +142,7 @@ static int wait_status_low(struct dlm_ls *ls, uint32_t wait_status) | |||
132 | goto out; | 142 | goto out; |
133 | } | 143 | } |
134 | 144 | ||
135 | error = dlm_rcom_status(ls, nodeid); | 145 | error = dlm_rcom_status(ls, nodeid, status_flags); |
136 | if (error) | 146 | if (error) |
137 | break; | 147 | break; |
138 | 148 | ||
@@ -152,18 +162,56 @@ static int wait_status(struct dlm_ls *ls, uint32_t status) | |||
152 | int error; | 162 | int error; |
153 | 163 | ||
154 | if (ls->ls_low_nodeid == dlm_our_nodeid()) { | 164 | if (ls->ls_low_nodeid == dlm_our_nodeid()) { |
155 | error = wait_status_all(ls, status); | 165 | error = wait_status_all(ls, status, 0); |
156 | if (!error) | 166 | if (!error) |
157 | dlm_set_recover_status(ls, status_all); | 167 | dlm_set_recover_status(ls, status_all); |
158 | } else | 168 | } else |
159 | error = wait_status_low(ls, status_all); | 169 | error = wait_status_low(ls, status_all, 0); |
160 | 170 | ||
161 | return error; | 171 | return error; |
162 | } | 172 | } |
163 | 173 | ||
164 | int dlm_recover_members_wait(struct dlm_ls *ls) | 174 | int dlm_recover_members_wait(struct dlm_ls *ls) |
165 | { | 175 | { |
166 | return wait_status(ls, DLM_RS_NODES); | 176 | struct dlm_member *memb; |
177 | struct dlm_slot *slots; | ||
178 | int num_slots, slots_size; | ||
179 | int error, rv; | ||
180 | uint32_t gen; | ||
181 | |||
182 | list_for_each_entry(memb, &ls->ls_nodes, list) { | ||
183 | memb->slot = -1; | ||
184 | memb->generation = 0; | ||
185 | } | ||
186 | |||
187 | if (ls->ls_low_nodeid == dlm_our_nodeid()) { | ||
188 | error = wait_status_all(ls, DLM_RS_NODES, 1); | ||
189 | if (error) | ||
190 | goto out; | ||
191 | |||
192 | /* slots array is sparse, slots_size may be > num_slots */ | ||
193 | |||
194 | rv = dlm_slots_assign(ls, &num_slots, &slots_size, &slots, &gen); | ||
195 | if (!rv) { | ||
196 | spin_lock(&ls->ls_recover_lock); | ||
197 | _set_recover_status(ls, DLM_RS_NODES_ALL); | ||
198 | ls->ls_num_slots = num_slots; | ||
199 | ls->ls_slots_size = slots_size; | ||
200 | ls->ls_slots = slots; | ||
201 | ls->ls_generation = gen; | ||
202 | spin_unlock(&ls->ls_recover_lock); | ||
203 | } else { | ||
204 | dlm_set_recover_status(ls, DLM_RS_NODES_ALL); | ||
205 | } | ||
206 | } else { | ||
207 | error = wait_status_low(ls, DLM_RS_NODES_ALL, DLM_RSF_NEED_SLOTS); | ||
208 | if (error) | ||
209 | goto out; | ||
210 | |||
211 | dlm_slots_copy_in(ls); | ||
212 | } | ||
213 | out: | ||
214 | return error; | ||
167 | } | 215 | } |
168 | 216 | ||
169 | int dlm_recover_directory_wait(struct dlm_ls *ls) | 217 | int dlm_recover_directory_wait(struct dlm_ls *ls) |
@@ -542,8 +590,6 @@ int dlm_recover_locks(struct dlm_ls *ls) | |||
542 | out: | 590 | out: |
543 | if (error) | 591 | if (error) |
544 | recover_list_clear(ls); | 592 | recover_list_clear(ls); |
545 | else | ||
546 | dlm_set_recover_status(ls, DLM_RS_LOCKS); | ||
547 | return error; | 593 | return error; |
548 | } | 594 | } |
549 | 595 | ||
@@ -715,6 +761,7 @@ void dlm_recover_rsbs(struct dlm_ls *ls) | |||
715 | 761 | ||
716 | int dlm_create_root_list(struct dlm_ls *ls) | 762 | int dlm_create_root_list(struct dlm_ls *ls) |
717 | { | 763 | { |
764 | struct rb_node *n; | ||
718 | struct dlm_rsb *r; | 765 | struct dlm_rsb *r; |
719 | int i, error = 0; | 766 | int i, error = 0; |
720 | 767 | ||
@@ -727,7 +774,8 @@ int dlm_create_root_list(struct dlm_ls *ls) | |||
727 | 774 | ||
728 | for (i = 0; i < ls->ls_rsbtbl_size; i++) { | 775 | for (i = 0; i < ls->ls_rsbtbl_size; i++) { |
729 | spin_lock(&ls->ls_rsbtbl[i].lock); | 776 | spin_lock(&ls->ls_rsbtbl[i].lock); |
730 | list_for_each_entry(r, &ls->ls_rsbtbl[i].list, res_hashchain) { | 777 | for (n = rb_first(&ls->ls_rsbtbl[i].keep); n; n = rb_next(n)) { |
778 | r = rb_entry(n, struct dlm_rsb, res_hashnode); | ||
731 | list_add(&r->res_root_list, &ls->ls_root_list); | 779 | list_add(&r->res_root_list, &ls->ls_root_list); |
732 | dlm_hold_rsb(r); | 780 | dlm_hold_rsb(r); |
733 | } | 781 | } |
@@ -741,7 +789,8 @@ int dlm_create_root_list(struct dlm_ls *ls) | |||
741 | continue; | 789 | continue; |
742 | } | 790 | } |
743 | 791 | ||
744 | list_for_each_entry(r, &ls->ls_rsbtbl[i].toss, res_hashchain) { | 792 | for (n = rb_first(&ls->ls_rsbtbl[i].toss); n; n = rb_next(n)) { |
793 | r = rb_entry(n, struct dlm_rsb, res_hashnode); | ||
745 | list_add(&r->res_root_list, &ls->ls_root_list); | 794 | list_add(&r->res_root_list, &ls->ls_root_list); |
746 | dlm_hold_rsb(r); | 795 | dlm_hold_rsb(r); |
747 | } | 796 | } |
@@ -771,16 +820,18 @@ void dlm_release_root_list(struct dlm_ls *ls) | |||
771 | 820 | ||
772 | void dlm_clear_toss_list(struct dlm_ls *ls) | 821 | void dlm_clear_toss_list(struct dlm_ls *ls) |
773 | { | 822 | { |
774 | struct dlm_rsb *r, *safe; | 823 | struct rb_node *n, *next; |
824 | struct dlm_rsb *rsb; | ||
775 | int i; | 825 | int i; |
776 | 826 | ||
777 | for (i = 0; i < ls->ls_rsbtbl_size; i++) { | 827 | for (i = 0; i < ls->ls_rsbtbl_size; i++) { |
778 | spin_lock(&ls->ls_rsbtbl[i].lock); | 828 | spin_lock(&ls->ls_rsbtbl[i].lock); |
779 | list_for_each_entry_safe(r, safe, &ls->ls_rsbtbl[i].toss, | 829 | for (n = rb_first(&ls->ls_rsbtbl[i].toss); n; n = next) { |
780 | res_hashchain) { | 830 | next = rb_next(n);; |
781 | if (dlm_no_directory(ls) || !is_master(r)) { | 831 | rsb = rb_entry(n, struct dlm_rsb, res_hashnode); |
782 | list_del(&r->res_hashchain); | 832 | if (dlm_no_directory(ls) || !is_master(rsb)) { |
783 | dlm_free_rsb(r); | 833 | rb_erase(n, &ls->ls_rsbtbl[i].toss); |
834 | dlm_free_rsb(rsb); | ||
784 | } | 835 | } |
785 | } | 836 | } |
786 | spin_unlock(&ls->ls_rsbtbl[i].lock); | 837 | spin_unlock(&ls->ls_rsbtbl[i].lock); |
diff --git a/fs/dlm/recoverd.c b/fs/dlm/recoverd.c index 774da3cf92c6..3780caf7ae0c 100644 --- a/fs/dlm/recoverd.c +++ b/fs/dlm/recoverd.c | |||
@@ -2,7 +2,7 @@ | |||
2 | ******************************************************************************* | 2 | ******************************************************************************* |
3 | ** | 3 | ** |
4 | ** Copyright (C) Sistina Software, Inc. 1997-2003 All rights reserved. | 4 | ** Copyright (C) Sistina Software, Inc. 1997-2003 All rights reserved. |
5 | ** Copyright (C) 2004-2007 Red Hat, Inc. All rights reserved. | 5 | ** Copyright (C) 2004-2011 Red Hat, Inc. All rights reserved. |
6 | ** | 6 | ** |
7 | ** This copyrighted material is made available to anyone wishing to use, | 7 | ** This copyrighted material is made available to anyone wishing to use, |
8 | ** modify, copy, or redistribute it subject to the terms and conditions | 8 | ** modify, copy, or redistribute it subject to the terms and conditions |
@@ -54,7 +54,7 @@ static int ls_recover(struct dlm_ls *ls, struct dlm_recover *rv) | |||
54 | unsigned long start; | 54 | unsigned long start; |
55 | int error, neg = 0; | 55 | int error, neg = 0; |
56 | 56 | ||
57 | log_debug(ls, "recover %llx", (unsigned long long)rv->seq); | 57 | log_debug(ls, "dlm_recover %llx", (unsigned long long)rv->seq); |
58 | 58 | ||
59 | mutex_lock(&ls->ls_recoverd_active); | 59 | mutex_lock(&ls->ls_recoverd_active); |
60 | 60 | ||
@@ -76,14 +76,22 @@ static int ls_recover(struct dlm_ls *ls, struct dlm_recover *rv) | |||
76 | 76 | ||
77 | /* | 77 | /* |
78 | * Add or remove nodes from the lockspace's ls_nodes list. | 78 | * Add or remove nodes from the lockspace's ls_nodes list. |
79 | * Also waits for all nodes to complete dlm_recover_members. | ||
80 | */ | 79 | */ |
81 | 80 | ||
82 | error = dlm_recover_members(ls, rv, &neg); | 81 | error = dlm_recover_members(ls, rv, &neg); |
83 | if (error) { | 82 | if (error) { |
84 | log_debug(ls, "recover_members failed %d", error); | 83 | log_debug(ls, "dlm_recover_members error %d", error); |
85 | goto fail; | 84 | goto fail; |
86 | } | 85 | } |
86 | |||
87 | dlm_set_recover_status(ls, DLM_RS_NODES); | ||
88 | |||
89 | error = dlm_recover_members_wait(ls); | ||
90 | if (error) { | ||
91 | log_debug(ls, "dlm_recover_members_wait error %d", error); | ||
92 | goto fail; | ||
93 | } | ||
94 | |||
87 | start = jiffies; | 95 | start = jiffies; |
88 | 96 | ||
89 | /* | 97 | /* |
@@ -93,17 +101,15 @@ static int ls_recover(struct dlm_ls *ls, struct dlm_recover *rv) | |||
93 | 101 | ||
94 | error = dlm_recover_directory(ls); | 102 | error = dlm_recover_directory(ls); |
95 | if (error) { | 103 | if (error) { |
96 | log_debug(ls, "recover_directory failed %d", error); | 104 | log_debug(ls, "dlm_recover_directory error %d", error); |
97 | goto fail; | 105 | goto fail; |
98 | } | 106 | } |
99 | 107 | ||
100 | /* | 108 | dlm_set_recover_status(ls, DLM_RS_DIR); |
101 | * Wait for all nodes to complete directory rebuild. | ||
102 | */ | ||
103 | 109 | ||
104 | error = dlm_recover_directory_wait(ls); | 110 | error = dlm_recover_directory_wait(ls); |
105 | if (error) { | 111 | if (error) { |
106 | log_debug(ls, "recover_directory_wait failed %d", error); | 112 | log_debug(ls, "dlm_recover_directory_wait error %d", error); |
107 | goto fail; | 113 | goto fail; |
108 | } | 114 | } |
109 | 115 | ||
@@ -133,7 +139,7 @@ static int ls_recover(struct dlm_ls *ls, struct dlm_recover *rv) | |||
133 | 139 | ||
134 | error = dlm_recover_masters(ls); | 140 | error = dlm_recover_masters(ls); |
135 | if (error) { | 141 | if (error) { |
136 | log_debug(ls, "recover_masters failed %d", error); | 142 | log_debug(ls, "dlm_recover_masters error %d", error); |
137 | goto fail; | 143 | goto fail; |
138 | } | 144 | } |
139 | 145 | ||
@@ -143,13 +149,15 @@ static int ls_recover(struct dlm_ls *ls, struct dlm_recover *rv) | |||
143 | 149 | ||
144 | error = dlm_recover_locks(ls); | 150 | error = dlm_recover_locks(ls); |
145 | if (error) { | 151 | if (error) { |
146 | log_debug(ls, "recover_locks failed %d", error); | 152 | log_debug(ls, "dlm_recover_locks error %d", error); |
147 | goto fail; | 153 | goto fail; |
148 | } | 154 | } |
149 | 155 | ||
156 | dlm_set_recover_status(ls, DLM_RS_LOCKS); | ||
157 | |||
150 | error = dlm_recover_locks_wait(ls); | 158 | error = dlm_recover_locks_wait(ls); |
151 | if (error) { | 159 | if (error) { |
152 | log_debug(ls, "recover_locks_wait failed %d", error); | 160 | log_debug(ls, "dlm_recover_locks_wait error %d", error); |
153 | goto fail; | 161 | goto fail; |
154 | } | 162 | } |
155 | 163 | ||
@@ -170,7 +178,7 @@ static int ls_recover(struct dlm_ls *ls, struct dlm_recover *rv) | |||
170 | 178 | ||
171 | error = dlm_recover_locks_wait(ls); | 179 | error = dlm_recover_locks_wait(ls); |
172 | if (error) { | 180 | if (error) { |
173 | log_debug(ls, "recover_locks_wait failed %d", error); | 181 | log_debug(ls, "dlm_recover_locks_wait error %d", error); |
174 | goto fail; | 182 | goto fail; |
175 | } | 183 | } |
176 | } | 184 | } |
@@ -186,9 +194,10 @@ static int ls_recover(struct dlm_ls *ls, struct dlm_recover *rv) | |||
186 | dlm_purge_requestqueue(ls); | 194 | dlm_purge_requestqueue(ls); |
187 | 195 | ||
188 | dlm_set_recover_status(ls, DLM_RS_DONE); | 196 | dlm_set_recover_status(ls, DLM_RS_DONE); |
197 | |||
189 | error = dlm_recover_done_wait(ls); | 198 | error = dlm_recover_done_wait(ls); |
190 | if (error) { | 199 | if (error) { |
191 | log_debug(ls, "recover_done_wait failed %d", error); | 200 | log_debug(ls, "dlm_recover_done_wait error %d", error); |
192 | goto fail; | 201 | goto fail; |
193 | } | 202 | } |
194 | 203 | ||
@@ -200,34 +209,35 @@ static int ls_recover(struct dlm_ls *ls, struct dlm_recover *rv) | |||
200 | 209 | ||
201 | error = enable_locking(ls, rv->seq); | 210 | error = enable_locking(ls, rv->seq); |
202 | if (error) { | 211 | if (error) { |
203 | log_debug(ls, "enable_locking failed %d", error); | 212 | log_debug(ls, "enable_locking error %d", error); |
204 | goto fail; | 213 | goto fail; |
205 | } | 214 | } |
206 | 215 | ||
207 | error = dlm_process_requestqueue(ls); | 216 | error = dlm_process_requestqueue(ls); |
208 | if (error) { | 217 | if (error) { |
209 | log_debug(ls, "process_requestqueue failed %d", error); | 218 | log_debug(ls, "dlm_process_requestqueue error %d", error); |
210 | goto fail; | 219 | goto fail; |
211 | } | 220 | } |
212 | 221 | ||
213 | error = dlm_recover_waiters_post(ls); | 222 | error = dlm_recover_waiters_post(ls); |
214 | if (error) { | 223 | if (error) { |
215 | log_debug(ls, "recover_waiters_post failed %d", error); | 224 | log_debug(ls, "dlm_recover_waiters_post error %d", error); |
216 | goto fail; | 225 | goto fail; |
217 | } | 226 | } |
218 | 227 | ||
219 | dlm_grant_after_purge(ls); | 228 | dlm_grant_after_purge(ls); |
220 | 229 | ||
221 | log_debug(ls, "recover %llx done: %u ms", | 230 | log_debug(ls, "dlm_recover %llx generation %u done: %u ms", |
222 | (unsigned long long)rv->seq, | 231 | (unsigned long long)rv->seq, ls->ls_generation, |
223 | jiffies_to_msecs(jiffies - start)); | 232 | jiffies_to_msecs(jiffies - start)); |
224 | mutex_unlock(&ls->ls_recoverd_active); | 233 | mutex_unlock(&ls->ls_recoverd_active); |
225 | 234 | ||
235 | dlm_lsop_recover_done(ls); | ||
226 | return 0; | 236 | return 0; |
227 | 237 | ||
228 | fail: | 238 | fail: |
229 | dlm_release_root_list(ls); | 239 | dlm_release_root_list(ls); |
230 | log_debug(ls, "recover %llx error %d", | 240 | log_debug(ls, "dlm_recover %llx error %d", |
231 | (unsigned long long)rv->seq, error); | 241 | (unsigned long long)rv->seq, error); |
232 | mutex_unlock(&ls->ls_recoverd_active); | 242 | mutex_unlock(&ls->ls_recoverd_active); |
233 | return error; | 243 | return error; |
@@ -250,8 +260,7 @@ static void do_ls_recovery(struct dlm_ls *ls) | |||
250 | 260 | ||
251 | if (rv) { | 261 | if (rv) { |
252 | ls_recover(ls, rv); | 262 | ls_recover(ls, rv); |
253 | kfree(rv->nodeids); | 263 | kfree(rv->nodes); |
254 | kfree(rv->new); | ||
255 | kfree(rv); | 264 | kfree(rv); |
256 | } | 265 | } |
257 | } | 266 | } |
diff --git a/fs/dlm/user.c b/fs/dlm/user.c index d8ea60756403..eb4ed9ba3098 100644 --- a/fs/dlm/user.c +++ b/fs/dlm/user.c | |||
@@ -392,8 +392,9 @@ static int device_create_lockspace(struct dlm_lspace_params *params) | |||
392 | if (!capable(CAP_SYS_ADMIN)) | 392 | if (!capable(CAP_SYS_ADMIN)) |
393 | return -EPERM; | 393 | return -EPERM; |
394 | 394 | ||
395 | error = dlm_new_lockspace(params->name, strlen(params->name), | 395 | error = dlm_new_lockspace(params->name, NULL, params->flags, |
396 | &lockspace, params->flags, DLM_USER_LVB_LEN); | 396 | DLM_USER_LVB_LEN, NULL, NULL, NULL, |
397 | &lockspace); | ||
397 | if (error) | 398 | if (error) |
398 | return error; | 399 | return error; |
399 | 400 | ||
diff --git a/fs/ecryptfs/crypto.c b/fs/ecryptfs/crypto.c index 2a834255c75d..63ab24510649 100644 --- a/fs/ecryptfs/crypto.c +++ b/fs/ecryptfs/crypto.c | |||
@@ -417,17 +417,6 @@ static int ecryptfs_encrypt_extent(struct page *enc_extent_page, | |||
417 | (unsigned long long)(extent_base + extent_offset), rc); | 417 | (unsigned long long)(extent_base + extent_offset), rc); |
418 | goto out; | 418 | goto out; |
419 | } | 419 | } |
420 | if (unlikely(ecryptfs_verbosity > 0)) { | ||
421 | ecryptfs_printk(KERN_DEBUG, "Encrypting extent " | ||
422 | "with iv:\n"); | ||
423 | ecryptfs_dump_hex(extent_iv, crypt_stat->iv_bytes); | ||
424 | ecryptfs_printk(KERN_DEBUG, "First 8 bytes before " | ||
425 | "encryption:\n"); | ||
426 | ecryptfs_dump_hex((char *) | ||
427 | (page_address(page) | ||
428 | + (extent_offset * crypt_stat->extent_size)), | ||
429 | 8); | ||
430 | } | ||
431 | rc = ecryptfs_encrypt_page_offset(crypt_stat, enc_extent_page, 0, | 420 | rc = ecryptfs_encrypt_page_offset(crypt_stat, enc_extent_page, 0, |
432 | page, (extent_offset | 421 | page, (extent_offset |
433 | * crypt_stat->extent_size), | 422 | * crypt_stat->extent_size), |
@@ -440,14 +429,6 @@ static int ecryptfs_encrypt_extent(struct page *enc_extent_page, | |||
440 | goto out; | 429 | goto out; |
441 | } | 430 | } |
442 | rc = 0; | 431 | rc = 0; |
443 | if (unlikely(ecryptfs_verbosity > 0)) { | ||
444 | ecryptfs_printk(KERN_DEBUG, "Encrypt extent [0x%.16llx]; " | ||
445 | "rc = [%d]\n", | ||
446 | (unsigned long long)(extent_base + extent_offset), rc); | ||
447 | ecryptfs_printk(KERN_DEBUG, "First 8 bytes after " | ||
448 | "encryption:\n"); | ||
449 | ecryptfs_dump_hex((char *)(page_address(enc_extent_page)), 8); | ||
450 | } | ||
451 | out: | 432 | out: |
452 | return rc; | 433 | return rc; |
453 | } | 434 | } |
@@ -543,17 +524,6 @@ static int ecryptfs_decrypt_extent(struct page *page, | |||
543 | (unsigned long long)(extent_base + extent_offset), rc); | 524 | (unsigned long long)(extent_base + extent_offset), rc); |
544 | goto out; | 525 | goto out; |
545 | } | 526 | } |
546 | if (unlikely(ecryptfs_verbosity > 0)) { | ||
547 | ecryptfs_printk(KERN_DEBUG, "Decrypting extent " | ||
548 | "with iv:\n"); | ||
549 | ecryptfs_dump_hex(extent_iv, crypt_stat->iv_bytes); | ||
550 | ecryptfs_printk(KERN_DEBUG, "First 8 bytes before " | ||
551 | "decryption:\n"); | ||
552 | ecryptfs_dump_hex((char *) | ||
553 | (page_address(enc_extent_page) | ||
554 | + (extent_offset * crypt_stat->extent_size)), | ||
555 | 8); | ||
556 | } | ||
557 | rc = ecryptfs_decrypt_page_offset(crypt_stat, page, | 527 | rc = ecryptfs_decrypt_page_offset(crypt_stat, page, |
558 | (extent_offset | 528 | (extent_offset |
559 | * crypt_stat->extent_size), | 529 | * crypt_stat->extent_size), |
@@ -567,16 +537,6 @@ static int ecryptfs_decrypt_extent(struct page *page, | |||
567 | goto out; | 537 | goto out; |
568 | } | 538 | } |
569 | rc = 0; | 539 | rc = 0; |
570 | if (unlikely(ecryptfs_verbosity > 0)) { | ||
571 | ecryptfs_printk(KERN_DEBUG, "Decrypt extent [0x%.16llx]; " | ||
572 | "rc = [%d]\n", | ||
573 | (unsigned long long)(extent_base + extent_offset), rc); | ||
574 | ecryptfs_printk(KERN_DEBUG, "First 8 bytes after " | ||
575 | "decryption:\n"); | ||
576 | ecryptfs_dump_hex((char *)(page_address(page) | ||
577 | + (extent_offset | ||
578 | * crypt_stat->extent_size)), 8); | ||
579 | } | ||
580 | out: | 540 | out: |
581 | return rc; | 541 | return rc; |
582 | } | 542 | } |
@@ -1590,8 +1550,8 @@ int ecryptfs_read_and_validate_xattr_region(struct dentry *dentry, | |||
1590 | */ | 1550 | */ |
1591 | int ecryptfs_read_metadata(struct dentry *ecryptfs_dentry) | 1551 | int ecryptfs_read_metadata(struct dentry *ecryptfs_dentry) |
1592 | { | 1552 | { |
1593 | int rc = 0; | 1553 | int rc; |
1594 | char *page_virt = NULL; | 1554 | char *page_virt; |
1595 | struct inode *ecryptfs_inode = ecryptfs_dentry->d_inode; | 1555 | struct inode *ecryptfs_inode = ecryptfs_dentry->d_inode; |
1596 | struct ecryptfs_crypt_stat *crypt_stat = | 1556 | struct ecryptfs_crypt_stat *crypt_stat = |
1597 | &ecryptfs_inode_to_private(ecryptfs_inode)->crypt_stat; | 1557 | &ecryptfs_inode_to_private(ecryptfs_inode)->crypt_stat; |
@@ -1616,11 +1576,13 @@ int ecryptfs_read_metadata(struct dentry *ecryptfs_dentry) | |||
1616 | ecryptfs_dentry, | 1576 | ecryptfs_dentry, |
1617 | ECRYPTFS_VALIDATE_HEADER_SIZE); | 1577 | ECRYPTFS_VALIDATE_HEADER_SIZE); |
1618 | if (rc) { | 1578 | if (rc) { |
1579 | /* metadata is not in the file header, so try xattrs */ | ||
1619 | memset(page_virt, 0, PAGE_CACHE_SIZE); | 1580 | memset(page_virt, 0, PAGE_CACHE_SIZE); |
1620 | rc = ecryptfs_read_xattr_region(page_virt, ecryptfs_inode); | 1581 | rc = ecryptfs_read_xattr_region(page_virt, ecryptfs_inode); |
1621 | if (rc) { | 1582 | if (rc) { |
1622 | printk(KERN_DEBUG "Valid eCryptfs headers not found in " | 1583 | printk(KERN_DEBUG "Valid eCryptfs headers not found in " |
1623 | "file header region or xattr region\n"); | 1584 | "file header region or xattr region, inode %lu\n", |
1585 | ecryptfs_inode->i_ino); | ||
1624 | rc = -EINVAL; | 1586 | rc = -EINVAL; |
1625 | goto out; | 1587 | goto out; |
1626 | } | 1588 | } |
@@ -1629,7 +1591,8 @@ int ecryptfs_read_metadata(struct dentry *ecryptfs_dentry) | |||
1629 | ECRYPTFS_DONT_VALIDATE_HEADER_SIZE); | 1591 | ECRYPTFS_DONT_VALIDATE_HEADER_SIZE); |
1630 | if (rc) { | 1592 | if (rc) { |
1631 | printk(KERN_DEBUG "Valid eCryptfs headers not found in " | 1593 | printk(KERN_DEBUG "Valid eCryptfs headers not found in " |
1632 | "file xattr region either\n"); | 1594 | "file xattr region either, inode %lu\n", |
1595 | ecryptfs_inode->i_ino); | ||
1633 | rc = -EINVAL; | 1596 | rc = -EINVAL; |
1634 | } | 1597 | } |
1635 | if (crypt_stat->mount_crypt_stat->flags | 1598 | if (crypt_stat->mount_crypt_stat->flags |
@@ -1640,7 +1603,8 @@ int ecryptfs_read_metadata(struct dentry *ecryptfs_dentry) | |||
1640 | "crypto metadata only in the extended attribute " | 1603 | "crypto metadata only in the extended attribute " |
1641 | "region, but eCryptfs was mounted without " | 1604 | "region, but eCryptfs was mounted without " |
1642 | "xattr support enabled. eCryptfs will not treat " | 1605 | "xattr support enabled. eCryptfs will not treat " |
1643 | "this like an encrypted file.\n"); | 1606 | "this like an encrypted file, inode %lu\n", |
1607 | ecryptfs_inode->i_ino); | ||
1644 | rc = -EINVAL; | 1608 | rc = -EINVAL; |
1645 | } | 1609 | } |
1646 | } | 1610 | } |
diff --git a/fs/ecryptfs/ecryptfs_kernel.h b/fs/ecryptfs/ecryptfs_kernel.h index a9f29b12fbf2..a2362df58ae8 100644 --- a/fs/ecryptfs/ecryptfs_kernel.h +++ b/fs/ecryptfs/ecryptfs_kernel.h | |||
@@ -151,6 +151,11 @@ ecryptfs_get_key_payload_data(struct key *key) | |||
151 | * dentry name */ | 151 | * dentry name */ |
152 | #define ECRYPTFS_TAG_73_PACKET_TYPE 0x49 /* FEK-encrypted filename as | 152 | #define ECRYPTFS_TAG_73_PACKET_TYPE 0x49 /* FEK-encrypted filename as |
153 | * metadata */ | 153 | * metadata */ |
154 | #define ECRYPTFS_MIN_PKT_LEN_SIZE 1 /* Min size to specify packet length */ | ||
155 | #define ECRYPTFS_MAX_PKT_LEN_SIZE 2 /* Pass at least this many bytes to | ||
156 | * ecryptfs_parse_packet_length() and | ||
157 | * ecryptfs_write_packet_length() | ||
158 | */ | ||
154 | /* Constraint: ECRYPTFS_FILENAME_MIN_RANDOM_PREPEND_BYTES >= | 159 | /* Constraint: ECRYPTFS_FILENAME_MIN_RANDOM_PREPEND_BYTES >= |
155 | * ECRYPTFS_MAX_IV_BYTES */ | 160 | * ECRYPTFS_MAX_IV_BYTES */ |
156 | #define ECRYPTFS_FILENAME_MIN_RANDOM_PREPEND_BYTES 16 | 161 | #define ECRYPTFS_FILENAME_MIN_RANDOM_PREPEND_BYTES 16 |
diff --git a/fs/ecryptfs/inode.c b/fs/ecryptfs/inode.c index 19a8ca4ab1dd..19892d7d2ed1 100644 --- a/fs/ecryptfs/inode.c +++ b/fs/ecryptfs/inode.c | |||
@@ -822,18 +822,6 @@ static int truncate_upper(struct dentry *dentry, struct iattr *ia, | |||
822 | size_t num_zeros = (PAGE_CACHE_SIZE | 822 | size_t num_zeros = (PAGE_CACHE_SIZE |
823 | - (ia->ia_size & ~PAGE_CACHE_MASK)); | 823 | - (ia->ia_size & ~PAGE_CACHE_MASK)); |
824 | 824 | ||
825 | |||
826 | /* | ||
827 | * XXX(truncate) this should really happen at the begginning | ||
828 | * of ->setattr. But the code is too messy to that as part | ||
829 | * of a larger patch. ecryptfs is also totally missing out | ||
830 | * on the inode_change_ok check at the beginning of | ||
831 | * ->setattr while would include this. | ||
832 | */ | ||
833 | rc = inode_newsize_ok(inode, ia->ia_size); | ||
834 | if (rc) | ||
835 | goto out; | ||
836 | |||
837 | if (!(crypt_stat->flags & ECRYPTFS_ENCRYPTED)) { | 825 | if (!(crypt_stat->flags & ECRYPTFS_ENCRYPTED)) { |
838 | truncate_setsize(inode, ia->ia_size); | 826 | truncate_setsize(inode, ia->ia_size); |
839 | lower_ia->ia_size = ia->ia_size; | 827 | lower_ia->ia_size = ia->ia_size; |
@@ -883,6 +871,28 @@ out: | |||
883 | return rc; | 871 | return rc; |
884 | } | 872 | } |
885 | 873 | ||
874 | static int ecryptfs_inode_newsize_ok(struct inode *inode, loff_t offset) | ||
875 | { | ||
876 | struct ecryptfs_crypt_stat *crypt_stat; | ||
877 | loff_t lower_oldsize, lower_newsize; | ||
878 | |||
879 | crypt_stat = &ecryptfs_inode_to_private(inode)->crypt_stat; | ||
880 | lower_oldsize = upper_size_to_lower_size(crypt_stat, | ||
881 | i_size_read(inode)); | ||
882 | lower_newsize = upper_size_to_lower_size(crypt_stat, offset); | ||
883 | if (lower_newsize > lower_oldsize) { | ||
884 | /* | ||
885 | * The eCryptfs inode and the new *lower* size are mixed here | ||
886 | * because we may not have the lower i_mutex held and/or it may | ||
887 | * not be appropriate to call inode_newsize_ok() with inodes | ||
888 | * from other filesystems. | ||
889 | */ | ||
890 | return inode_newsize_ok(inode, lower_newsize); | ||
891 | } | ||
892 | |||
893 | return 0; | ||
894 | } | ||
895 | |||
886 | /** | 896 | /** |
887 | * ecryptfs_truncate | 897 | * ecryptfs_truncate |
888 | * @dentry: The ecryptfs layer dentry | 898 | * @dentry: The ecryptfs layer dentry |
@@ -899,6 +909,10 @@ int ecryptfs_truncate(struct dentry *dentry, loff_t new_length) | |||
899 | struct iattr lower_ia = { .ia_valid = 0 }; | 909 | struct iattr lower_ia = { .ia_valid = 0 }; |
900 | int rc; | 910 | int rc; |
901 | 911 | ||
912 | rc = ecryptfs_inode_newsize_ok(dentry->d_inode, new_length); | ||
913 | if (rc) | ||
914 | return rc; | ||
915 | |||
902 | rc = truncate_upper(dentry, &ia, &lower_ia); | 916 | rc = truncate_upper(dentry, &ia, &lower_ia); |
903 | if (!rc && lower_ia.ia_valid & ATTR_SIZE) { | 917 | if (!rc && lower_ia.ia_valid & ATTR_SIZE) { |
904 | struct dentry *lower_dentry = ecryptfs_dentry_to_lower(dentry); | 918 | struct dentry *lower_dentry = ecryptfs_dentry_to_lower(dentry); |
@@ -978,6 +992,16 @@ static int ecryptfs_setattr(struct dentry *dentry, struct iattr *ia) | |||
978 | } | 992 | } |
979 | } | 993 | } |
980 | mutex_unlock(&crypt_stat->cs_mutex); | 994 | mutex_unlock(&crypt_stat->cs_mutex); |
995 | |||
996 | rc = inode_change_ok(inode, ia); | ||
997 | if (rc) | ||
998 | goto out; | ||
999 | if (ia->ia_valid & ATTR_SIZE) { | ||
1000 | rc = ecryptfs_inode_newsize_ok(inode, ia->ia_size); | ||
1001 | if (rc) | ||
1002 | goto out; | ||
1003 | } | ||
1004 | |||
981 | if (S_ISREG(inode->i_mode)) { | 1005 | if (S_ISREG(inode->i_mode)) { |
982 | rc = filemap_write_and_wait(inode->i_mapping); | 1006 | rc = filemap_write_and_wait(inode->i_mapping); |
983 | if (rc) | 1007 | if (rc) |
diff --git a/fs/ecryptfs/keystore.c b/fs/ecryptfs/keystore.c index ac1ad48c2376..8e3b943e330f 100644 --- a/fs/ecryptfs/keystore.c +++ b/fs/ecryptfs/keystore.c | |||
@@ -109,7 +109,7 @@ int ecryptfs_parse_packet_length(unsigned char *data, size_t *size, | |||
109 | (*size) += ((unsigned char)(data[1]) + 192); | 109 | (*size) += ((unsigned char)(data[1]) + 192); |
110 | (*length_size) = 2; | 110 | (*length_size) = 2; |
111 | } else if (data[0] == 255) { | 111 | } else if (data[0] == 255) { |
112 | /* Five-byte length; we're not supposed to see this */ | 112 | /* If support is added, adjust ECRYPTFS_MAX_PKT_LEN_SIZE */ |
113 | ecryptfs_printk(KERN_ERR, "Five-byte packet length not " | 113 | ecryptfs_printk(KERN_ERR, "Five-byte packet length not " |
114 | "supported\n"); | 114 | "supported\n"); |
115 | rc = -EINVAL; | 115 | rc = -EINVAL; |
@@ -126,7 +126,7 @@ out: | |||
126 | /** | 126 | /** |
127 | * ecryptfs_write_packet_length | 127 | * ecryptfs_write_packet_length |
128 | * @dest: The byte array target into which to write the length. Must | 128 | * @dest: The byte array target into which to write the length. Must |
129 | * have at least 5 bytes allocated. | 129 | * have at least ECRYPTFS_MAX_PKT_LEN_SIZE bytes allocated. |
130 | * @size: The length to write. | 130 | * @size: The length to write. |
131 | * @packet_size_length: The number of bytes used to encode the packet | 131 | * @packet_size_length: The number of bytes used to encode the packet |
132 | * length is written to this address. | 132 | * length is written to this address. |
@@ -146,6 +146,7 @@ int ecryptfs_write_packet_length(char *dest, size_t size, | |||
146 | dest[1] = ((size - 192) % 256); | 146 | dest[1] = ((size - 192) % 256); |
147 | (*packet_size_length) = 2; | 147 | (*packet_size_length) = 2; |
148 | } else { | 148 | } else { |
149 | /* If support is added, adjust ECRYPTFS_MAX_PKT_LEN_SIZE */ | ||
149 | rc = -EINVAL; | 150 | rc = -EINVAL; |
150 | ecryptfs_printk(KERN_WARNING, | 151 | ecryptfs_printk(KERN_WARNING, |
151 | "Unsupported packet size: [%zd]\n", size); | 152 | "Unsupported packet size: [%zd]\n", size); |
diff --git a/fs/ecryptfs/miscdev.c b/fs/ecryptfs/miscdev.c index 940a82e63dc3..349209dc6a91 100644 --- a/fs/ecryptfs/miscdev.c +++ b/fs/ecryptfs/miscdev.c | |||
@@ -218,6 +218,29 @@ out_unlock: | |||
218 | return rc; | 218 | return rc; |
219 | } | 219 | } |
220 | 220 | ||
221 | /* | ||
222 | * miscdevfs packet format: | ||
223 | * Octet 0: Type | ||
224 | * Octets 1-4: network byte order msg_ctx->counter | ||
225 | * Octets 5-N0: Size of struct ecryptfs_message to follow | ||
226 | * Octets N0-N1: struct ecryptfs_message (including data) | ||
227 | * | ||
228 | * Octets 5-N1 not written if the packet type does not include a message | ||
229 | */ | ||
230 | #define PKT_TYPE_SIZE 1 | ||
231 | #define PKT_CTR_SIZE 4 | ||
232 | #define MIN_NON_MSG_PKT_SIZE (PKT_TYPE_SIZE + PKT_CTR_SIZE) | ||
233 | #define MIN_MSG_PKT_SIZE (PKT_TYPE_SIZE + PKT_CTR_SIZE \ | ||
234 | + ECRYPTFS_MIN_PKT_LEN_SIZE) | ||
235 | /* 4 + ECRYPTFS_MAX_ENCRYPTED_KEY_BYTES comes from tag 65 packet format */ | ||
236 | #define MAX_MSG_PKT_SIZE (PKT_TYPE_SIZE + PKT_CTR_SIZE \ | ||
237 | + ECRYPTFS_MAX_PKT_LEN_SIZE \ | ||
238 | + sizeof(struct ecryptfs_message) \ | ||
239 | + 4 + ECRYPTFS_MAX_ENCRYPTED_KEY_BYTES) | ||
240 | #define PKT_TYPE_OFFSET 0 | ||
241 | #define PKT_CTR_OFFSET PKT_TYPE_SIZE | ||
242 | #define PKT_LEN_OFFSET (PKT_TYPE_SIZE + PKT_CTR_SIZE) | ||
243 | |||
221 | /** | 244 | /** |
222 | * ecryptfs_miscdev_read - format and send message from queue | 245 | * ecryptfs_miscdev_read - format and send message from queue |
223 | * @file: fs/ecryptfs/euid miscdevfs handle (ignored) | 246 | * @file: fs/ecryptfs/euid miscdevfs handle (ignored) |
@@ -237,7 +260,7 @@ ecryptfs_miscdev_read(struct file *file, char __user *buf, size_t count, | |||
237 | struct ecryptfs_daemon *daemon; | 260 | struct ecryptfs_daemon *daemon; |
238 | struct ecryptfs_msg_ctx *msg_ctx; | 261 | struct ecryptfs_msg_ctx *msg_ctx; |
239 | size_t packet_length_size; | 262 | size_t packet_length_size; |
240 | char packet_length[3]; | 263 | char packet_length[ECRYPTFS_MAX_PKT_LEN_SIZE]; |
241 | size_t i; | 264 | size_t i; |
242 | size_t total_length; | 265 | size_t total_length; |
243 | uid_t euid = current_euid(); | 266 | uid_t euid = current_euid(); |
@@ -305,15 +328,8 @@ check_list: | |||
305 | packet_length_size = 0; | 328 | packet_length_size = 0; |
306 | msg_ctx->msg_size = 0; | 329 | msg_ctx->msg_size = 0; |
307 | } | 330 | } |
308 | /* miscdevfs packet format: | 331 | total_length = (PKT_TYPE_SIZE + PKT_CTR_SIZE + packet_length_size |
309 | * Octet 0: Type | 332 | + msg_ctx->msg_size); |
310 | * Octets 1-4: network byte order msg_ctx->counter | ||
311 | * Octets 5-N0: Size of struct ecryptfs_message to follow | ||
312 | * Octets N0-N1: struct ecryptfs_message (including data) | ||
313 | * | ||
314 | * Octets 5-N1 not written if the packet type does not | ||
315 | * include a message */ | ||
316 | total_length = (1 + 4 + packet_length_size + msg_ctx->msg_size); | ||
317 | if (count < total_length) { | 333 | if (count < total_length) { |
318 | rc = 0; | 334 | rc = 0; |
319 | printk(KERN_WARNING "%s: Only given user buffer of " | 335 | printk(KERN_WARNING "%s: Only given user buffer of " |
@@ -324,9 +340,10 @@ check_list: | |||
324 | rc = -EFAULT; | 340 | rc = -EFAULT; |
325 | if (put_user(msg_ctx->type, buf)) | 341 | if (put_user(msg_ctx->type, buf)) |
326 | goto out_unlock_msg_ctx; | 342 | goto out_unlock_msg_ctx; |
327 | if (put_user(cpu_to_be32(msg_ctx->counter), (__be32 __user *)(buf + 1))) | 343 | if (put_user(cpu_to_be32(msg_ctx->counter), |
344 | (__be32 __user *)(&buf[PKT_CTR_OFFSET]))) | ||
328 | goto out_unlock_msg_ctx; | 345 | goto out_unlock_msg_ctx; |
329 | i = 5; | 346 | i = PKT_TYPE_SIZE + PKT_CTR_SIZE; |
330 | if (msg_ctx->msg) { | 347 | if (msg_ctx->msg) { |
331 | if (copy_to_user(&buf[i], packet_length, packet_length_size)) | 348 | if (copy_to_user(&buf[i], packet_length, packet_length_size)) |
332 | goto out_unlock_msg_ctx; | 349 | goto out_unlock_msg_ctx; |
@@ -391,12 +408,6 @@ out: | |||
391 | * @count: Amount of data in @buf | 408 | * @count: Amount of data in @buf |
392 | * @ppos: Pointer to offset in file (ignored) | 409 | * @ppos: Pointer to offset in file (ignored) |
393 | * | 410 | * |
394 | * miscdevfs packet format: | ||
395 | * Octet 0: Type | ||
396 | * Octets 1-4: network byte order msg_ctx->counter (0's for non-response) | ||
397 | * Octets 5-N0: Size of struct ecryptfs_message to follow | ||
398 | * Octets N0-N1: struct ecryptfs_message (including data) | ||
399 | * | ||
400 | * Returns the number of bytes read from @buf | 411 | * Returns the number of bytes read from @buf |
401 | */ | 412 | */ |
402 | static ssize_t | 413 | static ssize_t |
@@ -405,60 +416,78 @@ ecryptfs_miscdev_write(struct file *file, const char __user *buf, | |||
405 | { | 416 | { |
406 | __be32 counter_nbo; | 417 | __be32 counter_nbo; |
407 | u32 seq; | 418 | u32 seq; |
408 | size_t packet_size, packet_size_length, i; | 419 | size_t packet_size, packet_size_length; |
409 | ssize_t sz = 0; | ||
410 | char *data; | 420 | char *data; |
411 | uid_t euid = current_euid(); | 421 | uid_t euid = current_euid(); |
412 | int rc; | 422 | unsigned char packet_size_peek[ECRYPTFS_MAX_PKT_LEN_SIZE]; |
423 | ssize_t rc; | ||
413 | 424 | ||
414 | if (count == 0) | 425 | if (count == 0) { |
415 | goto out; | 426 | return 0; |
427 | } else if (count == MIN_NON_MSG_PKT_SIZE) { | ||
428 | /* Likely a harmless MSG_HELO or MSG_QUIT - no packet length */ | ||
429 | goto memdup; | ||
430 | } else if (count < MIN_MSG_PKT_SIZE || count > MAX_MSG_PKT_SIZE) { | ||
431 | printk(KERN_WARNING "%s: Acceptable packet size range is " | ||
432 | "[%d-%lu], but amount of data written is [%zu].", | ||
433 | __func__, MIN_MSG_PKT_SIZE, MAX_MSG_PKT_SIZE, count); | ||
434 | return -EINVAL; | ||
435 | } | ||
436 | |||
437 | if (copy_from_user(packet_size_peek, &buf[PKT_LEN_OFFSET], | ||
438 | sizeof(packet_size_peek))) { | ||
439 | printk(KERN_WARNING "%s: Error while inspecting packet size\n", | ||
440 | __func__); | ||
441 | return -EFAULT; | ||
442 | } | ||
416 | 443 | ||
444 | rc = ecryptfs_parse_packet_length(packet_size_peek, &packet_size, | ||
445 | &packet_size_length); | ||
446 | if (rc) { | ||
447 | printk(KERN_WARNING "%s: Error parsing packet length; " | ||
448 | "rc = [%zd]\n", __func__, rc); | ||
449 | return rc; | ||
450 | } | ||
451 | |||
452 | if ((PKT_TYPE_SIZE + PKT_CTR_SIZE + packet_size_length + packet_size) | ||
453 | != count) { | ||
454 | printk(KERN_WARNING "%s: Invalid packet size [%zu]\n", __func__, | ||
455 | packet_size); | ||
456 | return -EINVAL; | ||
457 | } | ||
458 | |||
459 | memdup: | ||
417 | data = memdup_user(buf, count); | 460 | data = memdup_user(buf, count); |
418 | if (IS_ERR(data)) { | 461 | if (IS_ERR(data)) { |
419 | printk(KERN_ERR "%s: memdup_user returned error [%ld]\n", | 462 | printk(KERN_ERR "%s: memdup_user returned error [%ld]\n", |
420 | __func__, PTR_ERR(data)); | 463 | __func__, PTR_ERR(data)); |
421 | goto out; | 464 | return PTR_ERR(data); |
422 | } | 465 | } |
423 | sz = count; | 466 | switch (data[PKT_TYPE_OFFSET]) { |
424 | i = 0; | ||
425 | switch (data[i++]) { | ||
426 | case ECRYPTFS_MSG_RESPONSE: | 467 | case ECRYPTFS_MSG_RESPONSE: |
427 | if (count < (1 + 4 + 1 + sizeof(struct ecryptfs_message))) { | 468 | if (count < (MIN_MSG_PKT_SIZE |
469 | + sizeof(struct ecryptfs_message))) { | ||
428 | printk(KERN_WARNING "%s: Minimum acceptable packet " | 470 | printk(KERN_WARNING "%s: Minimum acceptable packet " |
429 | "size is [%zd], but amount of data written is " | 471 | "size is [%zd], but amount of data written is " |
430 | "only [%zd]. Discarding response packet.\n", | 472 | "only [%zd]. Discarding response packet.\n", |
431 | __func__, | 473 | __func__, |
432 | (1 + 4 + 1 + sizeof(struct ecryptfs_message)), | 474 | (MIN_MSG_PKT_SIZE |
433 | count); | 475 | + sizeof(struct ecryptfs_message)), count); |
476 | rc = -EINVAL; | ||
434 | goto out_free; | 477 | goto out_free; |
435 | } | 478 | } |
436 | memcpy(&counter_nbo, &data[i], 4); | 479 | memcpy(&counter_nbo, &data[PKT_CTR_OFFSET], PKT_CTR_SIZE); |
437 | seq = be32_to_cpu(counter_nbo); | 480 | seq = be32_to_cpu(counter_nbo); |
438 | i += 4; | 481 | rc = ecryptfs_miscdev_response( |
439 | rc = ecryptfs_parse_packet_length(&data[i], &packet_size, | 482 | &data[PKT_LEN_OFFSET + packet_size_length], |
440 | &packet_size_length); | 483 | packet_size, euid, current_user_ns(), |
484 | task_pid(current), seq); | ||
441 | if (rc) { | 485 | if (rc) { |
442 | printk(KERN_WARNING "%s: Error parsing packet length; " | ||
443 | "rc = [%d]\n", __func__, rc); | ||
444 | goto out_free; | ||
445 | } | ||
446 | i += packet_size_length; | ||
447 | if ((1 + 4 + packet_size_length + packet_size) != count) { | ||
448 | printk(KERN_WARNING "%s: (1 + packet_size_length([%zd])" | ||
449 | " + packet_size([%zd]))([%zd]) != " | ||
450 | "count([%zd]). Invalid packet format.\n", | ||
451 | __func__, packet_size_length, packet_size, | ||
452 | (1 + packet_size_length + packet_size), count); | ||
453 | goto out_free; | ||
454 | } | ||
455 | rc = ecryptfs_miscdev_response(&data[i], packet_size, | ||
456 | euid, current_user_ns(), | ||
457 | task_pid(current), seq); | ||
458 | if (rc) | ||
459 | printk(KERN_WARNING "%s: Failed to deliver miscdev " | 486 | printk(KERN_WARNING "%s: Failed to deliver miscdev " |
460 | "response to requesting operation; rc = [%d]\n", | 487 | "response to requesting operation; rc = [%zd]\n", |
461 | __func__, rc); | 488 | __func__, rc); |
489 | goto out_free; | ||
490 | } | ||
462 | break; | 491 | break; |
463 | case ECRYPTFS_MSG_HELO: | 492 | case ECRYPTFS_MSG_HELO: |
464 | case ECRYPTFS_MSG_QUIT: | 493 | case ECRYPTFS_MSG_QUIT: |
@@ -467,12 +496,13 @@ ecryptfs_miscdev_write(struct file *file, const char __user *buf, | |||
467 | ecryptfs_printk(KERN_WARNING, "Dropping miscdev " | 496 | ecryptfs_printk(KERN_WARNING, "Dropping miscdev " |
468 | "message of unrecognized type [%d]\n", | 497 | "message of unrecognized type [%d]\n", |
469 | data[0]); | 498 | data[0]); |
470 | break; | 499 | rc = -EINVAL; |
500 | goto out_free; | ||
471 | } | 501 | } |
502 | rc = count; | ||
472 | out_free: | 503 | out_free: |
473 | kfree(data); | 504 | kfree(data); |
474 | out: | 505 | return rc; |
475 | return sz; | ||
476 | } | 506 | } |
477 | 507 | ||
478 | 508 | ||
diff --git a/fs/ecryptfs/mmap.c b/fs/ecryptfs/mmap.c index 6a44148c5fb9..10ec695ccd68 100644 --- a/fs/ecryptfs/mmap.c +++ b/fs/ecryptfs/mmap.c | |||
@@ -57,6 +57,10 @@ struct page *ecryptfs_get_locked_page(struct inode *inode, loff_t index) | |||
57 | * @page: Page that is locked before this call is made | 57 | * @page: Page that is locked before this call is made |
58 | * | 58 | * |
59 | * Returns zero on success; non-zero otherwise | 59 | * Returns zero on success; non-zero otherwise |
60 | * | ||
61 | * This is where we encrypt the data and pass the encrypted data to | ||
62 | * the lower filesystem. In OpenPGP-compatible mode, we operate on | ||
63 | * entire underlying packets. | ||
60 | */ | 64 | */ |
61 | static int ecryptfs_writepage(struct page *page, struct writeback_control *wbc) | 65 | static int ecryptfs_writepage(struct page *page, struct writeback_control *wbc) |
62 | { | 66 | { |
@@ -481,10 +485,6 @@ int ecryptfs_write_inode_size_to_metadata(struct inode *ecryptfs_inode) | |||
481 | * @copied: The amount of data copied | 485 | * @copied: The amount of data copied |
482 | * @page: The eCryptfs page | 486 | * @page: The eCryptfs page |
483 | * @fsdata: The fsdata (unused) | 487 | * @fsdata: The fsdata (unused) |
484 | * | ||
485 | * This is where we encrypt the data and pass the encrypted data to | ||
486 | * the lower filesystem. In OpenPGP-compatible mode, we operate on | ||
487 | * entire underlying packets. | ||
488 | */ | 488 | */ |
489 | static int ecryptfs_write_end(struct file *file, | 489 | static int ecryptfs_write_end(struct file *file, |
490 | struct address_space *mapping, | 490 | struct address_space *mapping, |
diff --git a/fs/ecryptfs/read_write.c b/fs/ecryptfs/read_write.c index 3745f7c2b9c2..5c0106f75775 100644 --- a/fs/ecryptfs/read_write.c +++ b/fs/ecryptfs/read_write.c | |||
@@ -130,13 +130,18 @@ int ecryptfs_write(struct inode *ecryptfs_inode, char *data, loff_t offset, | |||
130 | pgoff_t ecryptfs_page_idx = (pos >> PAGE_CACHE_SHIFT); | 130 | pgoff_t ecryptfs_page_idx = (pos >> PAGE_CACHE_SHIFT); |
131 | size_t start_offset_in_page = (pos & ~PAGE_CACHE_MASK); | 131 | size_t start_offset_in_page = (pos & ~PAGE_CACHE_MASK); |
132 | size_t num_bytes = (PAGE_CACHE_SIZE - start_offset_in_page); | 132 | size_t num_bytes = (PAGE_CACHE_SIZE - start_offset_in_page); |
133 | size_t total_remaining_bytes = ((offset + size) - pos); | 133 | loff_t total_remaining_bytes = ((offset + size) - pos); |
134 | |||
135 | if (fatal_signal_pending(current)) { | ||
136 | rc = -EINTR; | ||
137 | break; | ||
138 | } | ||
134 | 139 | ||
135 | if (num_bytes > total_remaining_bytes) | 140 | if (num_bytes > total_remaining_bytes) |
136 | num_bytes = total_remaining_bytes; | 141 | num_bytes = total_remaining_bytes; |
137 | if (pos < offset) { | 142 | if (pos < offset) { |
138 | /* remaining zeros to write, up to destination offset */ | 143 | /* remaining zeros to write, up to destination offset */ |
139 | size_t total_remaining_zeros = (offset - pos); | 144 | loff_t total_remaining_zeros = (offset - pos); |
140 | 145 | ||
141 | if (num_bytes > total_remaining_zeros) | 146 | if (num_bytes > total_remaining_zeros) |
142 | num_bytes = total_remaining_zeros; | 147 | num_bytes = total_remaining_zeros; |
@@ -193,15 +198,19 @@ int ecryptfs_write(struct inode *ecryptfs_inode, char *data, loff_t offset, | |||
193 | } | 198 | } |
194 | pos += num_bytes; | 199 | pos += num_bytes; |
195 | } | 200 | } |
196 | if ((offset + size) > ecryptfs_file_size) { | 201 | if (pos > ecryptfs_file_size) { |
197 | i_size_write(ecryptfs_inode, (offset + size)); | 202 | i_size_write(ecryptfs_inode, pos); |
198 | if (crypt_stat->flags & ECRYPTFS_ENCRYPTED) { | 203 | if (crypt_stat->flags & ECRYPTFS_ENCRYPTED) { |
199 | rc = ecryptfs_write_inode_size_to_metadata( | 204 | int rc2; |
205 | |||
206 | rc2 = ecryptfs_write_inode_size_to_metadata( | ||
200 | ecryptfs_inode); | 207 | ecryptfs_inode); |
201 | if (rc) { | 208 | if (rc2) { |
202 | printk(KERN_ERR "Problem with " | 209 | printk(KERN_ERR "Problem with " |
203 | "ecryptfs_write_inode_size_to_metadata; " | 210 | "ecryptfs_write_inode_size_to_metadata; " |
204 | "rc = [%d]\n", rc); | 211 | "rc = [%d]\n", rc2); |
212 | if (!rc) | ||
213 | rc = rc2; | ||
205 | goto out; | 214 | goto out; |
206 | } | 215 | } |
207 | } | 216 | } |
@@ -273,76 +282,3 @@ int ecryptfs_read_lower_page_segment(struct page *page_for_ecryptfs, | |||
273 | flush_dcache_page(page_for_ecryptfs); | 282 | flush_dcache_page(page_for_ecryptfs); |
274 | return rc; | 283 | return rc; |
275 | } | 284 | } |
276 | |||
277 | #if 0 | ||
278 | /** | ||
279 | * ecryptfs_read | ||
280 | * @data: The virtual address into which to write the data read (and | ||
281 | * possibly decrypted) from the lower file | ||
282 | * @offset: The offset in the decrypted view of the file from which to | ||
283 | * read into @data | ||
284 | * @size: The number of bytes to read into @data | ||
285 | * @ecryptfs_file: The eCryptfs file from which to read | ||
286 | * | ||
287 | * Read an arbitrary amount of data from an arbitrary location in the | ||
288 | * eCryptfs page cache. This is done on an extent-by-extent basis; | ||
289 | * individual extents are decrypted and read from the lower page | ||
290 | * cache (via VFS reads). This function takes care of all the | ||
291 | * address translation to locations in the lower filesystem. | ||
292 | * | ||
293 | * Returns zero on success; non-zero otherwise | ||
294 | */ | ||
295 | int ecryptfs_read(char *data, loff_t offset, size_t size, | ||
296 | struct file *ecryptfs_file) | ||
297 | { | ||
298 | struct inode *ecryptfs_inode = ecryptfs_file->f_dentry->d_inode; | ||
299 | struct page *ecryptfs_page; | ||
300 | char *ecryptfs_page_virt; | ||
301 | loff_t ecryptfs_file_size = i_size_read(ecryptfs_inode); | ||
302 | loff_t data_offset = 0; | ||
303 | loff_t pos; | ||
304 | int rc = 0; | ||
305 | |||
306 | if ((offset + size) > ecryptfs_file_size) { | ||
307 | rc = -EINVAL; | ||
308 | printk(KERN_ERR "%s: Attempt to read data past the end of the " | ||
309 | "file; offset = [%lld]; size = [%td]; " | ||
310 | "ecryptfs_file_size = [%lld]\n", | ||
311 | __func__, offset, size, ecryptfs_file_size); | ||
312 | goto out; | ||
313 | } | ||
314 | pos = offset; | ||
315 | while (pos < (offset + size)) { | ||
316 | pgoff_t ecryptfs_page_idx = (pos >> PAGE_CACHE_SHIFT); | ||
317 | size_t start_offset_in_page = (pos & ~PAGE_CACHE_MASK); | ||
318 | size_t num_bytes = (PAGE_CACHE_SIZE - start_offset_in_page); | ||
319 | size_t total_remaining_bytes = ((offset + size) - pos); | ||
320 | |||
321 | if (num_bytes > total_remaining_bytes) | ||
322 | num_bytes = total_remaining_bytes; | ||
323 | ecryptfs_page = ecryptfs_get_locked_page(ecryptfs_inode, | ||
324 | ecryptfs_page_idx); | ||
325 | if (IS_ERR(ecryptfs_page)) { | ||
326 | rc = PTR_ERR(ecryptfs_page); | ||
327 | printk(KERN_ERR "%s: Error getting page at " | ||
328 | "index [%ld] from eCryptfs inode " | ||
329 | "mapping; rc = [%d]\n", __func__, | ||
330 | ecryptfs_page_idx, rc); | ||
331 | goto out; | ||
332 | } | ||
333 | ecryptfs_page_virt = kmap_atomic(ecryptfs_page, KM_USER0); | ||
334 | memcpy((data + data_offset), | ||
335 | ((char *)ecryptfs_page_virt + start_offset_in_page), | ||
336 | num_bytes); | ||
337 | kunmap_atomic(ecryptfs_page_virt, KM_USER0); | ||
338 | flush_dcache_page(ecryptfs_page); | ||
339 | SetPageUptodate(ecryptfs_page); | ||
340 | unlock_page(ecryptfs_page); | ||
341 | page_cache_release(ecryptfs_page); | ||
342 | pos += num_bytes; | ||
343 | data_offset += num_bytes; | ||
344 | } | ||
345 | out: | ||
346 | return rc; | ||
347 | } | ||
348 | #endif /* 0 */ | ||
diff --git a/fs/eventpoll.c b/fs/eventpoll.c index 828e750af23a..aabdfc38cf24 100644 --- a/fs/eventpoll.c +++ b/fs/eventpoll.c | |||
@@ -197,6 +197,12 @@ struct eventpoll { | |||
197 | 197 | ||
198 | /* The user that created the eventpoll descriptor */ | 198 | /* The user that created the eventpoll descriptor */ |
199 | struct user_struct *user; | 199 | struct user_struct *user; |
200 | |||
201 | struct file *file; | ||
202 | |||
203 | /* used to optimize loop detection check */ | ||
204 | int visited; | ||
205 | struct list_head visited_list_link; | ||
200 | }; | 206 | }; |
201 | 207 | ||
202 | /* Wait structure used by the poll hooks */ | 208 | /* Wait structure used by the poll hooks */ |
@@ -255,6 +261,15 @@ static struct kmem_cache *epi_cache __read_mostly; | |||
255 | /* Slab cache used to allocate "struct eppoll_entry" */ | 261 | /* Slab cache used to allocate "struct eppoll_entry" */ |
256 | static struct kmem_cache *pwq_cache __read_mostly; | 262 | static struct kmem_cache *pwq_cache __read_mostly; |
257 | 263 | ||
264 | /* Visited nodes during ep_loop_check(), so we can unset them when we finish */ | ||
265 | static LIST_HEAD(visited_list); | ||
266 | |||
267 | /* | ||
268 | * List of files with newly added links, where we may need to limit the number | ||
269 | * of emanating paths. Protected by the epmutex. | ||
270 | */ | ||
271 | static LIST_HEAD(tfile_check_list); | ||
272 | |||
258 | #ifdef CONFIG_SYSCTL | 273 | #ifdef CONFIG_SYSCTL |
259 | 274 | ||
260 | #include <linux/sysctl.h> | 275 | #include <linux/sysctl.h> |
@@ -276,6 +291,12 @@ ctl_table epoll_table[] = { | |||
276 | }; | 291 | }; |
277 | #endif /* CONFIG_SYSCTL */ | 292 | #endif /* CONFIG_SYSCTL */ |
278 | 293 | ||
294 | static const struct file_operations eventpoll_fops; | ||
295 | |||
296 | static inline int is_file_epoll(struct file *f) | ||
297 | { | ||
298 | return f->f_op == &eventpoll_fops; | ||
299 | } | ||
279 | 300 | ||
280 | /* Setup the structure that is used as key for the RB tree */ | 301 | /* Setup the structure that is used as key for the RB tree */ |
281 | static inline void ep_set_ffd(struct epoll_filefd *ffd, | 302 | static inline void ep_set_ffd(struct epoll_filefd *ffd, |
@@ -711,12 +732,6 @@ static const struct file_operations eventpoll_fops = { | |||
711 | .llseek = noop_llseek, | 732 | .llseek = noop_llseek, |
712 | }; | 733 | }; |
713 | 734 | ||
714 | /* Fast test to see if the file is an eventpoll file */ | ||
715 | static inline int is_file_epoll(struct file *f) | ||
716 | { | ||
717 | return f->f_op == &eventpoll_fops; | ||
718 | } | ||
719 | |||
720 | /* | 735 | /* |
721 | * This is called from eventpoll_release() to unlink files from the eventpoll | 736 | * This is called from eventpoll_release() to unlink files from the eventpoll |
722 | * interface. We need to have this facility to cleanup correctly files that are | 737 | * interface. We need to have this facility to cleanup correctly files that are |
@@ -926,6 +941,99 @@ static void ep_rbtree_insert(struct eventpoll *ep, struct epitem *epi) | |||
926 | rb_insert_color(&epi->rbn, &ep->rbr); | 941 | rb_insert_color(&epi->rbn, &ep->rbr); |
927 | } | 942 | } |
928 | 943 | ||
944 | |||
945 | |||
946 | #define PATH_ARR_SIZE 5 | ||
947 | /* | ||
948 | * These are the number paths of length 1 to 5, that we are allowing to emanate | ||
949 | * from a single file of interest. For example, we allow 1000 paths of length | ||
950 | * 1, to emanate from each file of interest. This essentially represents the | ||
951 | * potential wakeup paths, which need to be limited in order to avoid massive | ||
952 | * uncontrolled wakeup storms. The common use case should be a single ep which | ||
953 | * is connected to n file sources. In this case each file source has 1 path | ||
954 | * of length 1. Thus, the numbers below should be more than sufficient. These | ||
955 | * path limits are enforced during an EPOLL_CTL_ADD operation, since a modify | ||
956 | * and delete can't add additional paths. Protected by the epmutex. | ||
957 | */ | ||
958 | static const int path_limits[PATH_ARR_SIZE] = { 1000, 500, 100, 50, 10 }; | ||
959 | static int path_count[PATH_ARR_SIZE]; | ||
960 | |||
961 | static int path_count_inc(int nests) | ||
962 | { | ||
963 | if (++path_count[nests] > path_limits[nests]) | ||
964 | return -1; | ||
965 | return 0; | ||
966 | } | ||
967 | |||
968 | static void path_count_init(void) | ||
969 | { | ||
970 | int i; | ||
971 | |||
972 | for (i = 0; i < PATH_ARR_SIZE; i++) | ||
973 | path_count[i] = 0; | ||
974 | } | ||
975 | |||
976 | static int reverse_path_check_proc(void *priv, void *cookie, int call_nests) | ||
977 | { | ||
978 | int error = 0; | ||
979 | struct file *file = priv; | ||
980 | struct file *child_file; | ||
981 | struct epitem *epi; | ||
982 | |||
983 | list_for_each_entry(epi, &file->f_ep_links, fllink) { | ||
984 | child_file = epi->ep->file; | ||
985 | if (is_file_epoll(child_file)) { | ||
986 | if (list_empty(&child_file->f_ep_links)) { | ||
987 | if (path_count_inc(call_nests)) { | ||
988 | error = -1; | ||
989 | break; | ||
990 | } | ||
991 | } else { | ||
992 | error = ep_call_nested(&poll_loop_ncalls, | ||
993 | EP_MAX_NESTS, | ||
994 | reverse_path_check_proc, | ||
995 | child_file, child_file, | ||
996 | current); | ||
997 | } | ||
998 | if (error != 0) | ||
999 | break; | ||
1000 | } else { | ||
1001 | printk(KERN_ERR "reverse_path_check_proc: " | ||
1002 | "file is not an ep!\n"); | ||
1003 | } | ||
1004 | } | ||
1005 | return error; | ||
1006 | } | ||
1007 | |||
1008 | /** | ||
1009 | * reverse_path_check - The tfile_check_list is list of file *, which have | ||
1010 | * links that are proposed to be newly added. We need to | ||
1011 | * make sure that those added links don't add too many | ||
1012 | * paths such that we will spend all our time waking up | ||
1013 | * eventpoll objects. | ||
1014 | * | ||
1015 | * Returns: Returns zero if the proposed links don't create too many paths, | ||
1016 | * -1 otherwise. | ||
1017 | */ | ||
1018 | static int reverse_path_check(void) | ||
1019 | { | ||
1020 | int length = 0; | ||
1021 | int error = 0; | ||
1022 | struct file *current_file; | ||
1023 | |||
1024 | /* let's call this for all tfiles */ | ||
1025 | list_for_each_entry(current_file, &tfile_check_list, f_tfile_llink) { | ||
1026 | length++; | ||
1027 | path_count_init(); | ||
1028 | error = ep_call_nested(&poll_loop_ncalls, EP_MAX_NESTS, | ||
1029 | reverse_path_check_proc, current_file, | ||
1030 | current_file, current); | ||
1031 | if (error) | ||
1032 | break; | ||
1033 | } | ||
1034 | return error; | ||
1035 | } | ||
1036 | |||
929 | /* | 1037 | /* |
930 | * Must be called with "mtx" held. | 1038 | * Must be called with "mtx" held. |
931 | */ | 1039 | */ |
@@ -987,6 +1095,11 @@ static int ep_insert(struct eventpoll *ep, struct epoll_event *event, | |||
987 | */ | 1095 | */ |
988 | ep_rbtree_insert(ep, epi); | 1096 | ep_rbtree_insert(ep, epi); |
989 | 1097 | ||
1098 | /* now check if we've created too many backpaths */ | ||
1099 | error = -EINVAL; | ||
1100 | if (reverse_path_check()) | ||
1101 | goto error_remove_epi; | ||
1102 | |||
990 | /* We have to drop the new item inside our item list to keep track of it */ | 1103 | /* We have to drop the new item inside our item list to keep track of it */ |
991 | spin_lock_irqsave(&ep->lock, flags); | 1104 | spin_lock_irqsave(&ep->lock, flags); |
992 | 1105 | ||
@@ -1011,6 +1124,14 @@ static int ep_insert(struct eventpoll *ep, struct epoll_event *event, | |||
1011 | 1124 | ||
1012 | return 0; | 1125 | return 0; |
1013 | 1126 | ||
1127 | error_remove_epi: | ||
1128 | spin_lock(&tfile->f_lock); | ||
1129 | if (ep_is_linked(&epi->fllink)) | ||
1130 | list_del_init(&epi->fllink); | ||
1131 | spin_unlock(&tfile->f_lock); | ||
1132 | |||
1133 | rb_erase(&epi->rbn, &ep->rbr); | ||
1134 | |||
1014 | error_unregister: | 1135 | error_unregister: |
1015 | ep_unregister_pollwait(ep, epi); | 1136 | ep_unregister_pollwait(ep, epi); |
1016 | 1137 | ||
@@ -1275,18 +1396,36 @@ static int ep_loop_check_proc(void *priv, void *cookie, int call_nests) | |||
1275 | int error = 0; | 1396 | int error = 0; |
1276 | struct file *file = priv; | 1397 | struct file *file = priv; |
1277 | struct eventpoll *ep = file->private_data; | 1398 | struct eventpoll *ep = file->private_data; |
1399 | struct eventpoll *ep_tovisit; | ||
1278 | struct rb_node *rbp; | 1400 | struct rb_node *rbp; |
1279 | struct epitem *epi; | 1401 | struct epitem *epi; |
1280 | 1402 | ||
1281 | mutex_lock_nested(&ep->mtx, call_nests + 1); | 1403 | mutex_lock_nested(&ep->mtx, call_nests + 1); |
1404 | ep->visited = 1; | ||
1405 | list_add(&ep->visited_list_link, &visited_list); | ||
1282 | for (rbp = rb_first(&ep->rbr); rbp; rbp = rb_next(rbp)) { | 1406 | for (rbp = rb_first(&ep->rbr); rbp; rbp = rb_next(rbp)) { |
1283 | epi = rb_entry(rbp, struct epitem, rbn); | 1407 | epi = rb_entry(rbp, struct epitem, rbn); |
1284 | if (unlikely(is_file_epoll(epi->ffd.file))) { | 1408 | if (unlikely(is_file_epoll(epi->ffd.file))) { |
1409 | ep_tovisit = epi->ffd.file->private_data; | ||
1410 | if (ep_tovisit->visited) | ||
1411 | continue; | ||
1285 | error = ep_call_nested(&poll_loop_ncalls, EP_MAX_NESTS, | 1412 | error = ep_call_nested(&poll_loop_ncalls, EP_MAX_NESTS, |
1286 | ep_loop_check_proc, epi->ffd.file, | 1413 | ep_loop_check_proc, epi->ffd.file, |
1287 | epi->ffd.file->private_data, current); | 1414 | ep_tovisit, current); |
1288 | if (error != 0) | 1415 | if (error != 0) |
1289 | break; | 1416 | break; |
1417 | } else { | ||
1418 | /* | ||
1419 | * If we've reached a file that is not associated with | ||
1420 | * an ep, then we need to check if the newly added | ||
1421 | * links are going to add too many wakeup paths. We do | ||
1422 | * this by adding it to the tfile_check_list, if it's | ||
1423 | * not already there, and calling reverse_path_check() | ||
1424 | * during ep_insert(). | ||
1425 | */ | ||
1426 | if (list_empty(&epi->ffd.file->f_tfile_llink)) | ||
1427 | list_add(&epi->ffd.file->f_tfile_llink, | ||
1428 | &tfile_check_list); | ||
1290 | } | 1429 | } |
1291 | } | 1430 | } |
1292 | mutex_unlock(&ep->mtx); | 1431 | mutex_unlock(&ep->mtx); |
@@ -1307,8 +1446,31 @@ static int ep_loop_check_proc(void *priv, void *cookie, int call_nests) | |||
1307 | */ | 1446 | */ |
1308 | static int ep_loop_check(struct eventpoll *ep, struct file *file) | 1447 | static int ep_loop_check(struct eventpoll *ep, struct file *file) |
1309 | { | 1448 | { |
1310 | return ep_call_nested(&poll_loop_ncalls, EP_MAX_NESTS, | 1449 | int ret; |
1450 | struct eventpoll *ep_cur, *ep_next; | ||
1451 | |||
1452 | ret = ep_call_nested(&poll_loop_ncalls, EP_MAX_NESTS, | ||
1311 | ep_loop_check_proc, file, ep, current); | 1453 | ep_loop_check_proc, file, ep, current); |
1454 | /* clear visited list */ | ||
1455 | list_for_each_entry_safe(ep_cur, ep_next, &visited_list, | ||
1456 | visited_list_link) { | ||
1457 | ep_cur->visited = 0; | ||
1458 | list_del(&ep_cur->visited_list_link); | ||
1459 | } | ||
1460 | return ret; | ||
1461 | } | ||
1462 | |||
1463 | static void clear_tfile_check_list(void) | ||
1464 | { | ||
1465 | struct file *file; | ||
1466 | |||
1467 | /* first clear the tfile_check_list */ | ||
1468 | while (!list_empty(&tfile_check_list)) { | ||
1469 | file = list_first_entry(&tfile_check_list, struct file, | ||
1470 | f_tfile_llink); | ||
1471 | list_del_init(&file->f_tfile_llink); | ||
1472 | } | ||
1473 | INIT_LIST_HEAD(&tfile_check_list); | ||
1312 | } | 1474 | } |
1313 | 1475 | ||
1314 | /* | 1476 | /* |
@@ -1316,8 +1478,9 @@ static int ep_loop_check(struct eventpoll *ep, struct file *file) | |||
1316 | */ | 1478 | */ |
1317 | SYSCALL_DEFINE1(epoll_create1, int, flags) | 1479 | SYSCALL_DEFINE1(epoll_create1, int, flags) |
1318 | { | 1480 | { |
1319 | int error; | 1481 | int error, fd; |
1320 | struct eventpoll *ep = NULL; | 1482 | struct eventpoll *ep = NULL; |
1483 | struct file *file; | ||
1321 | 1484 | ||
1322 | /* Check the EPOLL_* constant for consistency. */ | 1485 | /* Check the EPOLL_* constant for consistency. */ |
1323 | BUILD_BUG_ON(EPOLL_CLOEXEC != O_CLOEXEC); | 1486 | BUILD_BUG_ON(EPOLL_CLOEXEC != O_CLOEXEC); |
@@ -1334,11 +1497,25 @@ SYSCALL_DEFINE1(epoll_create1, int, flags) | |||
1334 | * Creates all the items needed to setup an eventpoll file. That is, | 1497 | * Creates all the items needed to setup an eventpoll file. That is, |
1335 | * a file structure and a free file descriptor. | 1498 | * a file structure and a free file descriptor. |
1336 | */ | 1499 | */ |
1337 | error = anon_inode_getfd("[eventpoll]", &eventpoll_fops, ep, | 1500 | fd = get_unused_fd_flags(O_RDWR | (flags & O_CLOEXEC)); |
1501 | if (fd < 0) { | ||
1502 | error = fd; | ||
1503 | goto out_free_ep; | ||
1504 | } | ||
1505 | file = anon_inode_getfile("[eventpoll]", &eventpoll_fops, ep, | ||
1338 | O_RDWR | (flags & O_CLOEXEC)); | 1506 | O_RDWR | (flags & O_CLOEXEC)); |
1339 | if (error < 0) | 1507 | if (IS_ERR(file)) { |
1340 | ep_free(ep); | 1508 | error = PTR_ERR(file); |
1341 | 1509 | goto out_free_fd; | |
1510 | } | ||
1511 | fd_install(fd, file); | ||
1512 | ep->file = file; | ||
1513 | return fd; | ||
1514 | |||
1515 | out_free_fd: | ||
1516 | put_unused_fd(fd); | ||
1517 | out_free_ep: | ||
1518 | ep_free(ep); | ||
1342 | return error; | 1519 | return error; |
1343 | } | 1520 | } |
1344 | 1521 | ||
@@ -1404,21 +1581,27 @@ SYSCALL_DEFINE4(epoll_ctl, int, epfd, int, op, int, fd, | |||
1404 | /* | 1581 | /* |
1405 | * When we insert an epoll file descriptor, inside another epoll file | 1582 | * When we insert an epoll file descriptor, inside another epoll file |
1406 | * descriptor, there is the change of creating closed loops, which are | 1583 | * descriptor, there is the change of creating closed loops, which are |
1407 | * better be handled here, than in more critical paths. | 1584 | * better be handled here, than in more critical paths. While we are |
1585 | * checking for loops we also determine the list of files reachable | ||
1586 | * and hang them on the tfile_check_list, so we can check that we | ||
1587 | * haven't created too many possible wakeup paths. | ||
1408 | * | 1588 | * |
1409 | * We hold epmutex across the loop check and the insert in this case, in | 1589 | * We need to hold the epmutex across both ep_insert and ep_remove |
1410 | * order to prevent two separate inserts from racing and each doing the | 1590 | * b/c we want to make sure we are looking at a coherent view of |
1411 | * insert "at the same time" such that ep_loop_check passes on both | 1591 | * epoll network. |
1412 | * before either one does the insert, thereby creating a cycle. | ||
1413 | */ | 1592 | */ |
1414 | if (unlikely(is_file_epoll(tfile) && op == EPOLL_CTL_ADD)) { | 1593 | if (op == EPOLL_CTL_ADD || op == EPOLL_CTL_DEL) { |
1415 | mutex_lock(&epmutex); | 1594 | mutex_lock(&epmutex); |
1416 | did_lock_epmutex = 1; | 1595 | did_lock_epmutex = 1; |
1417 | error = -ELOOP; | ||
1418 | if (ep_loop_check(ep, tfile) != 0) | ||
1419 | goto error_tgt_fput; | ||
1420 | } | 1596 | } |
1421 | 1597 | if (op == EPOLL_CTL_ADD) { | |
1598 | if (is_file_epoll(tfile)) { | ||
1599 | error = -ELOOP; | ||
1600 | if (ep_loop_check(ep, tfile) != 0) | ||
1601 | goto error_tgt_fput; | ||
1602 | } else | ||
1603 | list_add(&tfile->f_tfile_llink, &tfile_check_list); | ||
1604 | } | ||
1422 | 1605 | ||
1423 | mutex_lock_nested(&ep->mtx, 0); | 1606 | mutex_lock_nested(&ep->mtx, 0); |
1424 | 1607 | ||
@@ -1437,6 +1620,7 @@ SYSCALL_DEFINE4(epoll_ctl, int, epfd, int, op, int, fd, | |||
1437 | error = ep_insert(ep, &epds, tfile, fd); | 1620 | error = ep_insert(ep, &epds, tfile, fd); |
1438 | } else | 1621 | } else |
1439 | error = -EEXIST; | 1622 | error = -EEXIST; |
1623 | clear_tfile_check_list(); | ||
1440 | break; | 1624 | break; |
1441 | case EPOLL_CTL_DEL: | 1625 | case EPOLL_CTL_DEL: |
1442 | if (epi) | 1626 | if (epi) |
@@ -1455,7 +1639,7 @@ SYSCALL_DEFINE4(epoll_ctl, int, epfd, int, op, int, fd, | |||
1455 | mutex_unlock(&ep->mtx); | 1639 | mutex_unlock(&ep->mtx); |
1456 | 1640 | ||
1457 | error_tgt_fput: | 1641 | error_tgt_fput: |
1458 | if (unlikely(did_lock_epmutex)) | 1642 | if (did_lock_epmutex) |
1459 | mutex_unlock(&epmutex); | 1643 | mutex_unlock(&epmutex); |
1460 | 1644 | ||
1461 | fput(tfile); | 1645 | fput(tfile); |
@@ -59,6 +59,8 @@ | |||
59 | #include <asm/uaccess.h> | 59 | #include <asm/uaccess.h> |
60 | #include <asm/mmu_context.h> | 60 | #include <asm/mmu_context.h> |
61 | #include <asm/tlb.h> | 61 | #include <asm/tlb.h> |
62 | |||
63 | #include <trace/events/task.h> | ||
62 | #include "internal.h" | 64 | #include "internal.h" |
63 | 65 | ||
64 | int core_uses_pid; | 66 | int core_uses_pid; |
@@ -1054,6 +1056,8 @@ void set_task_comm(struct task_struct *tsk, char *buf) | |||
1054 | { | 1056 | { |
1055 | task_lock(tsk); | 1057 | task_lock(tsk); |
1056 | 1058 | ||
1059 | trace_task_rename(tsk, buf); | ||
1060 | |||
1057 | /* | 1061 | /* |
1058 | * Threads may access current->comm without holding | 1062 | * Threads may access current->comm without holding |
1059 | * the task lock, so write the string carefully. | 1063 | * the task lock, so write the string carefully. |
diff --git a/fs/exofs/Kconfig b/fs/exofs/Kconfig index da42f32c49be..86194b2f799d 100644 --- a/fs/exofs/Kconfig +++ b/fs/exofs/Kconfig | |||
@@ -1,14 +1,3 @@ | |||
1 | # Note ORE needs to "select ASYNC_XOR". So Not to force multiple selects | ||
2 | # for every ORE user we do it like this. Any user should add itself here | ||
3 | # at the "depends on EXOFS_FS || ..." with an ||. The dependencies are | ||
4 | # selected here, and we default to "ON". So in effect it is like been | ||
5 | # selected by any of the users. | ||
6 | config ORE | ||
7 | tristate | ||
8 | depends on EXOFS_FS || PNFS_OBJLAYOUT | ||
9 | select ASYNC_XOR | ||
10 | default SCSI_OSD_ULD | ||
11 | |||
12 | config EXOFS_FS | 1 | config EXOFS_FS |
13 | tristate "exofs: OSD based file system support" | 2 | tristate "exofs: OSD based file system support" |
14 | depends on SCSI_OSD_ULD | 3 | depends on SCSI_OSD_ULD |
diff --git a/fs/exofs/Kconfig.ore b/fs/exofs/Kconfig.ore new file mode 100644 index 000000000000..1ca7fb7b6ba8 --- /dev/null +++ b/fs/exofs/Kconfig.ore | |||
@@ -0,0 +1,12 @@ | |||
1 | # ORE - Objects Raid Engine (libore.ko) | ||
2 | # | ||
3 | # Note ORE needs to "select ASYNC_XOR". So Not to force multiple selects | ||
4 | # for every ORE user we do it like this. Any user should add itself here | ||
5 | # at the "depends on EXOFS_FS || ..." with an ||. The dependencies are | ||
6 | # selected here, and we default to "ON". So in effect it is like been | ||
7 | # selected by any of the users. | ||
8 | config ORE | ||
9 | tristate | ||
10 | depends on EXOFS_FS || PNFS_OBJLAYOUT | ||
11 | select ASYNC_XOR | ||
12 | default SCSI_OSD_ULD | ||
diff --git a/fs/exofs/ore.c b/fs/exofs/ore.c index d271ad837202..49cf230554a2 100644 --- a/fs/exofs/ore.c +++ b/fs/exofs/ore.c | |||
@@ -266,7 +266,7 @@ int ore_get_rw_state(struct ore_layout *layout, struct ore_components *oc, | |||
266 | 266 | ||
267 | /* first/last seg is split */ | 267 | /* first/last seg is split */ |
268 | num_raid_units += layout->group_width; | 268 | num_raid_units += layout->group_width; |
269 | sgs_per_dev = div_u64(num_raid_units, data_devs); | 269 | sgs_per_dev = div_u64(num_raid_units, data_devs) + 2; |
270 | } else { | 270 | } else { |
271 | /* For Writes add parity pages array. */ | 271 | /* For Writes add parity pages array. */ |
272 | max_par_pages = num_raid_units * pages_in_unit * | 272 | max_par_pages = num_raid_units * pages_in_unit * |
@@ -445,10 +445,10 @@ int ore_check_io(struct ore_io_state *ios, ore_on_dev_error on_dev_error) | |||
445 | u64 residual = ios->reading ? | 445 | u64 residual = ios->reading ? |
446 | or->in.residual : or->out.residual; | 446 | or->in.residual : or->out.residual; |
447 | u64 offset = (ios->offset + ios->length) - residual; | 447 | u64 offset = (ios->offset + ios->length) - residual; |
448 | struct ore_dev *od = ios->oc->ods[ | 448 | unsigned dev = per_dev->dev - ios->oc->first_dev; |
449 | per_dev->dev - ios->oc->first_dev]; | 449 | struct ore_dev *od = ios->oc->ods[dev]; |
450 | 450 | ||
451 | on_dev_error(ios, od, per_dev->dev, osi.osd_err_pri, | 451 | on_dev_error(ios, od, dev, osi.osd_err_pri, |
452 | offset, residual); | 452 | offset, residual); |
453 | } | 453 | } |
454 | if (osi.osd_err_pri >= acumulated_osd_err) { | 454 | if (osi.osd_err_pri >= acumulated_osd_err) { |
diff --git a/fs/exofs/ore_raid.c b/fs/exofs/ore_raid.c index 29c47e5c4a86..d222c77cfa1b 100644 --- a/fs/exofs/ore_raid.c +++ b/fs/exofs/ore_raid.c | |||
@@ -328,8 +328,8 @@ static int _alloc_read_4_write(struct ore_io_state *ios) | |||
328 | /* @si contains info of the to-be-inserted page. Update of @si should be | 328 | /* @si contains info of the to-be-inserted page. Update of @si should be |
329 | * maintained by caller. Specificaly si->dev, si->obj_offset, ... | 329 | * maintained by caller. Specificaly si->dev, si->obj_offset, ... |
330 | */ | 330 | */ |
331 | static int _add_to_read_4_write(struct ore_io_state *ios, | 331 | static int _add_to_r4w(struct ore_io_state *ios, struct ore_striping_info *si, |
332 | struct ore_striping_info *si, struct page *page) | 332 | struct page *page, unsigned pg_len) |
333 | { | 333 | { |
334 | struct request_queue *q; | 334 | struct request_queue *q; |
335 | struct ore_per_dev_state *per_dev; | 335 | struct ore_per_dev_state *per_dev; |
@@ -366,17 +366,60 @@ static int _add_to_read_4_write(struct ore_io_state *ios, | |||
366 | _ore_add_sg_seg(per_dev, gap, true); | 366 | _ore_add_sg_seg(per_dev, gap, true); |
367 | } | 367 | } |
368 | q = osd_request_queue(ore_comp_dev(read_ios->oc, per_dev->dev)); | 368 | q = osd_request_queue(ore_comp_dev(read_ios->oc, per_dev->dev)); |
369 | added_len = bio_add_pc_page(q, per_dev->bio, page, PAGE_SIZE, 0); | 369 | added_len = bio_add_pc_page(q, per_dev->bio, page, pg_len, |
370 | if (unlikely(added_len != PAGE_SIZE)) { | 370 | si->obj_offset % PAGE_SIZE); |
371 | if (unlikely(added_len != pg_len)) { | ||
371 | ORE_DBGMSG("Failed to bio_add_pc_page bi_vcnt=%d\n", | 372 | ORE_DBGMSG("Failed to bio_add_pc_page bi_vcnt=%d\n", |
372 | per_dev->bio->bi_vcnt); | 373 | per_dev->bio->bi_vcnt); |
373 | return -ENOMEM; | 374 | return -ENOMEM; |
374 | } | 375 | } |
375 | 376 | ||
376 | per_dev->length += PAGE_SIZE; | 377 | per_dev->length += pg_len; |
377 | return 0; | 378 | return 0; |
378 | } | 379 | } |
379 | 380 | ||
381 | /* read the beginning of an unaligned first page */ | ||
382 | static int _add_to_r4w_first_page(struct ore_io_state *ios, struct page *page) | ||
383 | { | ||
384 | struct ore_striping_info si; | ||
385 | unsigned pg_len; | ||
386 | |||
387 | ore_calc_stripe_info(ios->layout, ios->offset, 0, &si); | ||
388 | |||
389 | pg_len = si.obj_offset % PAGE_SIZE; | ||
390 | si.obj_offset -= pg_len; | ||
391 | |||
392 | ORE_DBGMSG("offset=0x%llx len=0x%x index=0x%lx dev=%x\n", | ||
393 | _LLU(si.obj_offset), pg_len, page->index, si.dev); | ||
394 | |||
395 | return _add_to_r4w(ios, &si, page, pg_len); | ||
396 | } | ||
397 | |||
398 | /* read the end of an incomplete last page */ | ||
399 | static int _add_to_r4w_last_page(struct ore_io_state *ios, u64 *offset) | ||
400 | { | ||
401 | struct ore_striping_info si; | ||
402 | struct page *page; | ||
403 | unsigned pg_len, p, c; | ||
404 | |||
405 | ore_calc_stripe_info(ios->layout, *offset, 0, &si); | ||
406 | |||
407 | p = si.unit_off / PAGE_SIZE; | ||
408 | c = _dev_order(ios->layout->group_width * ios->layout->mirrors_p1, | ||
409 | ios->layout->mirrors_p1, si.par_dev, si.dev); | ||
410 | page = ios->sp2d->_1p_stripes[p].pages[c]; | ||
411 | |||
412 | pg_len = PAGE_SIZE - (si.unit_off % PAGE_SIZE); | ||
413 | *offset += pg_len; | ||
414 | |||
415 | ORE_DBGMSG("p=%d, c=%d next-offset=0x%llx len=0x%x dev=%x par_dev=%d\n", | ||
416 | p, c, _LLU(*offset), pg_len, si.dev, si.par_dev); | ||
417 | |||
418 | BUG_ON(!page); | ||
419 | |||
420 | return _add_to_r4w(ios, &si, page, pg_len); | ||
421 | } | ||
422 | |||
380 | static void _mark_read4write_pages_uptodate(struct ore_io_state *ios, int ret) | 423 | static void _mark_read4write_pages_uptodate(struct ore_io_state *ios, int ret) |
381 | { | 424 | { |
382 | struct bio_vec *bv; | 425 | struct bio_vec *bv; |
@@ -444,9 +487,13 @@ static int _read_4_write(struct ore_io_state *ios) | |||
444 | struct page **pp = &_1ps->pages[c]; | 487 | struct page **pp = &_1ps->pages[c]; |
445 | bool uptodate; | 488 | bool uptodate; |
446 | 489 | ||
447 | if (*pp) | 490 | if (*pp) { |
491 | if (ios->offset % PAGE_SIZE) | ||
492 | /* Read the remainder of the page */ | ||
493 | _add_to_r4w_first_page(ios, *pp); | ||
448 | /* to-be-written pages start here */ | 494 | /* to-be-written pages start here */ |
449 | goto read_last_stripe; | 495 | goto read_last_stripe; |
496 | } | ||
450 | 497 | ||
451 | *pp = ios->r4w->get_page(ios->private, offset, | 498 | *pp = ios->r4w->get_page(ios->private, offset, |
452 | &uptodate); | 499 | &uptodate); |
@@ -454,7 +501,7 @@ static int _read_4_write(struct ore_io_state *ios) | |||
454 | return -ENOMEM; | 501 | return -ENOMEM; |
455 | 502 | ||
456 | if (!uptodate) | 503 | if (!uptodate) |
457 | _add_to_read_4_write(ios, &read_si, *pp); | 504 | _add_to_r4w(ios, &read_si, *pp, PAGE_SIZE); |
458 | 505 | ||
459 | /* Mark read-pages to be cache_released */ | 506 | /* Mark read-pages to be cache_released */ |
460 | _1ps->page_is_read[c] = true; | 507 | _1ps->page_is_read[c] = true; |
@@ -465,8 +512,11 @@ static int _read_4_write(struct ore_io_state *ios) | |||
465 | } | 512 | } |
466 | 513 | ||
467 | read_last_stripe: | 514 | read_last_stripe: |
468 | offset = ios->offset + (ios->length + PAGE_SIZE - 1) / | 515 | offset = ios->offset + ios->length; |
469 | PAGE_SIZE * PAGE_SIZE; | 516 | if (offset % PAGE_SIZE) |
517 | _add_to_r4w_last_page(ios, &offset); | ||
518 | /* offset will be aligned to next page */ | ||
519 | |||
470 | last_stripe_end = div_u64(offset + bytes_in_stripe - 1, bytes_in_stripe) | 520 | last_stripe_end = div_u64(offset + bytes_in_stripe - 1, bytes_in_stripe) |
471 | * bytes_in_stripe; | 521 | * bytes_in_stripe; |
472 | if (offset == last_stripe_end) /* Optimize for the aligned case */ | 522 | if (offset == last_stripe_end) /* Optimize for the aligned case */ |
@@ -503,7 +553,7 @@ read_last_stripe: | |||
503 | /* Mark read-pages to be cache_released */ | 553 | /* Mark read-pages to be cache_released */ |
504 | _1ps->page_is_read[c] = true; | 554 | _1ps->page_is_read[c] = true; |
505 | if (!uptodate) | 555 | if (!uptodate) |
506 | _add_to_read_4_write(ios, &read_si, page); | 556 | _add_to_r4w(ios, &read_si, page, PAGE_SIZE); |
507 | } | 557 | } |
508 | 558 | ||
509 | offset += PAGE_SIZE; | 559 | offset += PAGE_SIZE; |
@@ -551,7 +601,11 @@ int _ore_add_parity_unit(struct ore_io_state *ios, | |||
551 | unsigned cur_len) | 601 | unsigned cur_len) |
552 | { | 602 | { |
553 | if (ios->reading) { | 603 | if (ios->reading) { |
554 | BUG_ON(per_dev->cur_sg >= ios->sgs_per_dev); | 604 | if (per_dev->cur_sg >= ios->sgs_per_dev) { |
605 | ORE_DBGMSG("cur_sg(%d) >= sgs_per_dev(%d)\n" , | ||
606 | per_dev->cur_sg, ios->sgs_per_dev); | ||
607 | return -ENOMEM; | ||
608 | } | ||
555 | _ore_add_sg_seg(per_dev, cur_len, true); | 609 | _ore_add_sg_seg(per_dev, cur_len, true); |
556 | } else { | 610 | } else { |
557 | struct __stripe_pages_2d *sp2d = ios->sp2d; | 611 | struct __stripe_pages_2d *sp2d = ios->sp2d; |
@@ -612,8 +666,6 @@ int _ore_post_alloc_raid_stuff(struct ore_io_state *ios) | |||
612 | return -ENOMEM; | 666 | return -ENOMEM; |
613 | } | 667 | } |
614 | 668 | ||
615 | BUG_ON(ios->offset % PAGE_SIZE); | ||
616 | |||
617 | /* Round io down to last full strip */ | 669 | /* Round io down to last full strip */ |
618 | first_stripe = div_u64(ios->offset, stripe_size); | 670 | first_stripe = div_u64(ios->offset, stripe_size); |
619 | last_stripe = div_u64(ios->offset + ios->length, stripe_size); | 671 | last_stripe = div_u64(ios->offset + ios->length, stripe_size); |
diff --git a/fs/exofs/super.c b/fs/exofs/super.c index 8addfe314dc7..d22cd168c6ee 100644 --- a/fs/exofs/super.c +++ b/fs/exofs/super.c | |||
@@ -838,6 +838,8 @@ static int exofs_fill_super(struct super_block *sb, void *data, int silent) | |||
838 | ret = bdi_setup_and_register(&sbi->bdi, "exofs", BDI_CAP_MAP_COPY); | 838 | ret = bdi_setup_and_register(&sbi->bdi, "exofs", BDI_CAP_MAP_COPY); |
839 | if (ret) { | 839 | if (ret) { |
840 | EXOFS_DBGMSG("Failed to bdi_setup_and_register\n"); | 840 | EXOFS_DBGMSG("Failed to bdi_setup_and_register\n"); |
841 | dput(sb->s_root); | ||
842 | sb->s_root = NULL; | ||
841 | goto free_sbi; | 843 | goto free_sbi; |
842 | } | 844 | } |
843 | 845 | ||
diff --git a/fs/ext2/ialloc.c b/fs/ext2/ialloc.c index cd7f5f424a75..8b15cf8cef37 100644 --- a/fs/ext2/ialloc.c +++ b/fs/ext2/ialloc.c | |||
@@ -573,8 +573,11 @@ got: | |||
573 | inode->i_generation = sbi->s_next_generation++; | 573 | inode->i_generation = sbi->s_next_generation++; |
574 | spin_unlock(&sbi->s_next_gen_lock); | 574 | spin_unlock(&sbi->s_next_gen_lock); |
575 | if (insert_inode_locked(inode) < 0) { | 575 | if (insert_inode_locked(inode) < 0) { |
576 | err = -EINVAL; | 576 | ext2_error(sb, "ext2_new_inode", |
577 | goto fail_drop; | 577 | "inode number already in use - inode=%lu", |
578 | (unsigned long) ino); | ||
579 | err = -EIO; | ||
580 | goto fail; | ||
578 | } | 581 | } |
579 | 582 | ||
580 | dquot_initialize(inode); | 583 | dquot_initialize(inode); |
diff --git a/fs/ext2/inode.c b/fs/ext2/inode.c index 91a6945af6d8..740cad8dcd8d 100644 --- a/fs/ext2/inode.c +++ b/fs/ext2/inode.c | |||
@@ -26,7 +26,6 @@ | |||
26 | #include <linux/highuid.h> | 26 | #include <linux/highuid.h> |
27 | #include <linux/pagemap.h> | 27 | #include <linux/pagemap.h> |
28 | #include <linux/quotaops.h> | 28 | #include <linux/quotaops.h> |
29 | #include <linux/module.h> | ||
30 | #include <linux/writeback.h> | 29 | #include <linux/writeback.h> |
31 | #include <linux/buffer_head.h> | 30 | #include <linux/buffer_head.h> |
32 | #include <linux/mpage.h> | 31 | #include <linux/mpage.h> |
@@ -36,10 +35,6 @@ | |||
36 | #include "acl.h" | 35 | #include "acl.h" |
37 | #include "xip.h" | 36 | #include "xip.h" |
38 | 37 | ||
39 | MODULE_AUTHOR("Remy Card and others"); | ||
40 | MODULE_DESCRIPTION("Second Extended Filesystem"); | ||
41 | MODULE_LICENSE("GPL"); | ||
42 | |||
43 | static int __ext2_write_inode(struct inode *inode, int do_sync); | 38 | static int __ext2_write_inode(struct inode *inode, int do_sync); |
44 | 39 | ||
45 | /* | 40 | /* |
diff --git a/fs/ext2/ioctl.c b/fs/ext2/ioctl.c index 1089f760c847..2de655f5d625 100644 --- a/fs/ext2/ioctl.c +++ b/fs/ext2/ioctl.c | |||
@@ -77,10 +77,11 @@ long ext2_ioctl(struct file *filp, unsigned int cmd, unsigned long arg) | |||
77 | flags = flags & EXT2_FL_USER_MODIFIABLE; | 77 | flags = flags & EXT2_FL_USER_MODIFIABLE; |
78 | flags |= oldflags & ~EXT2_FL_USER_MODIFIABLE; | 78 | flags |= oldflags & ~EXT2_FL_USER_MODIFIABLE; |
79 | ei->i_flags = flags; | 79 | ei->i_flags = flags; |
80 | mutex_unlock(&inode->i_mutex); | ||
81 | 80 | ||
82 | ext2_set_inode_flags(inode); | 81 | ext2_set_inode_flags(inode); |
83 | inode->i_ctime = CURRENT_TIME_SEC; | 82 | inode->i_ctime = CURRENT_TIME_SEC; |
83 | mutex_unlock(&inode->i_mutex); | ||
84 | |||
84 | mark_inode_dirty(inode); | 85 | mark_inode_dirty(inode); |
85 | setflags_out: | 86 | setflags_out: |
86 | mnt_drop_write_file(filp); | 87 | mnt_drop_write_file(filp); |
@@ -88,20 +89,29 @@ setflags_out: | |||
88 | } | 89 | } |
89 | case EXT2_IOC_GETVERSION: | 90 | case EXT2_IOC_GETVERSION: |
90 | return put_user(inode->i_generation, (int __user *) arg); | 91 | return put_user(inode->i_generation, (int __user *) arg); |
91 | case EXT2_IOC_SETVERSION: | 92 | case EXT2_IOC_SETVERSION: { |
93 | __u32 generation; | ||
94 | |||
92 | if (!inode_owner_or_capable(inode)) | 95 | if (!inode_owner_or_capable(inode)) |
93 | return -EPERM; | 96 | return -EPERM; |
94 | ret = mnt_want_write_file(filp); | 97 | ret = mnt_want_write_file(filp); |
95 | if (ret) | 98 | if (ret) |
96 | return ret; | 99 | return ret; |
97 | if (get_user(inode->i_generation, (int __user *) arg)) { | 100 | if (get_user(generation, (int __user *) arg)) { |
98 | ret = -EFAULT; | 101 | ret = -EFAULT; |
99 | } else { | 102 | goto setversion_out; |
100 | inode->i_ctime = CURRENT_TIME_SEC; | ||
101 | mark_inode_dirty(inode); | ||
102 | } | 103 | } |
104 | |||
105 | mutex_lock(&inode->i_mutex); | ||
106 | inode->i_ctime = CURRENT_TIME_SEC; | ||
107 | inode->i_generation = generation; | ||
108 | mutex_unlock(&inode->i_mutex); | ||
109 | |||
110 | mark_inode_dirty(inode); | ||
111 | setversion_out: | ||
103 | mnt_drop_write_file(filp); | 112 | mnt_drop_write_file(filp); |
104 | return ret; | 113 | return ret; |
114 | } | ||
105 | case EXT2_IOC_GETRSVSZ: | 115 | case EXT2_IOC_GETRSVSZ: |
106 | if (test_opt(inode->i_sb, RESERVATION) | 116 | if (test_opt(inode->i_sb, RESERVATION) |
107 | && S_ISREG(inode->i_mode) | 117 | && S_ISREG(inode->i_mode) |
diff --git a/fs/ext2/super.c b/fs/ext2/super.c index 9b403f064ce0..0090595beb28 100644 --- a/fs/ext2/super.c +++ b/fs/ext2/super.c | |||
@@ -1520,5 +1520,8 @@ static void __exit exit_ext2_fs(void) | |||
1520 | exit_ext2_xattr(); | 1520 | exit_ext2_xattr(); |
1521 | } | 1521 | } |
1522 | 1522 | ||
1523 | MODULE_AUTHOR("Remy Card and others"); | ||
1524 | MODULE_DESCRIPTION("Second Extended Filesystem"); | ||
1525 | MODULE_LICENSE("GPL"); | ||
1523 | module_init(init_ext2_fs) | 1526 | module_init(init_ext2_fs) |
1524 | module_exit(exit_ext2_fs) | 1527 | module_exit(exit_ext2_fs) |
diff --git a/fs/ext2/xattr.c b/fs/ext2/xattr.c index d27b71f1d183..6dcafc7efdfd 100644 --- a/fs/ext2/xattr.c +++ b/fs/ext2/xattr.c | |||
@@ -54,7 +54,6 @@ | |||
54 | */ | 54 | */ |
55 | 55 | ||
56 | #include <linux/buffer_head.h> | 56 | #include <linux/buffer_head.h> |
57 | #include <linux/module.h> | ||
58 | #include <linux/init.h> | 57 | #include <linux/init.h> |
59 | #include <linux/slab.h> | 58 | #include <linux/slab.h> |
60 | #include <linux/mbcache.h> | 59 | #include <linux/mbcache.h> |
diff --git a/fs/ext2/xattr_security.c b/fs/ext2/xattr_security.c index c922adc8ef41..be7a8d02c9a7 100644 --- a/fs/ext2/xattr_security.c +++ b/fs/ext2/xattr_security.c | |||
@@ -3,7 +3,6 @@ | |||
3 | * Handler for storing security labels as extended attributes. | 3 | * Handler for storing security labels as extended attributes. |
4 | */ | 4 | */ |
5 | 5 | ||
6 | #include <linux/module.h> | ||
7 | #include <linux/slab.h> | 6 | #include <linux/slab.h> |
8 | #include <linux/string.h> | 7 | #include <linux/string.h> |
9 | #include <linux/fs.h> | 8 | #include <linux/fs.h> |
diff --git a/fs/ext2/xattr_trusted.c b/fs/ext2/xattr_trusted.c index 667e46a8d62d..2989467d3595 100644 --- a/fs/ext2/xattr_trusted.c +++ b/fs/ext2/xattr_trusted.c | |||
@@ -5,7 +5,6 @@ | |||
5 | * Copyright (C) 2003 by Andreas Gruenbacher, <a.gruenbacher@computer.org> | 5 | * Copyright (C) 2003 by Andreas Gruenbacher, <a.gruenbacher@computer.org> |
6 | */ | 6 | */ |
7 | 7 | ||
8 | #include <linux/module.h> | ||
9 | #include <linux/string.h> | 8 | #include <linux/string.h> |
10 | #include <linux/capability.h> | 9 | #include <linux/capability.h> |
11 | #include <linux/fs.h> | 10 | #include <linux/fs.h> |
diff --git a/fs/ext2/xattr_user.c b/fs/ext2/xattr_user.c index 099d20f47163..f470e44c4b8d 100644 --- a/fs/ext2/xattr_user.c +++ b/fs/ext2/xattr_user.c | |||
@@ -6,7 +6,6 @@ | |||
6 | */ | 6 | */ |
7 | 7 | ||
8 | #include <linux/init.h> | 8 | #include <linux/init.h> |
9 | #include <linux/module.h> | ||
10 | #include <linux/string.h> | 9 | #include <linux/string.h> |
11 | #include "ext2.h" | 10 | #include "ext2.h" |
12 | #include "xattr.h" | 11 | #include "xattr.h" |
diff --git a/fs/ext3/ialloc.c b/fs/ext3/ialloc.c index 92cc86dfa23d..1cde28438014 100644 --- a/fs/ext3/ialloc.c +++ b/fs/ext3/ialloc.c | |||
@@ -525,8 +525,12 @@ got: | |||
525 | if (IS_DIRSYNC(inode)) | 525 | if (IS_DIRSYNC(inode)) |
526 | handle->h_sync = 1; | 526 | handle->h_sync = 1; |
527 | if (insert_inode_locked(inode) < 0) { | 527 | if (insert_inode_locked(inode) < 0) { |
528 | err = -EINVAL; | 528 | /* |
529 | goto fail_drop; | 529 | * Likely a bitmap corruption causing inode to be allocated |
530 | * twice. | ||
531 | */ | ||
532 | err = -EIO; | ||
533 | goto fail; | ||
530 | } | 534 | } |
531 | spin_lock(&sbi->s_next_gen_lock); | 535 | spin_lock(&sbi->s_next_gen_lock); |
532 | inode->i_generation = sbi->s_next_generation++; | 536 | inode->i_generation = sbi->s_next_generation++; |
diff --git a/fs/ext3/inode.c b/fs/ext3/inode.c index 15cb47088aac..2d0afeca0b47 100644 --- a/fs/ext3/inode.c +++ b/fs/ext3/inode.c | |||
@@ -22,7 +22,6 @@ | |||
22 | * Assorted race fixes, rewrite of ext3_get_block() by Al Viro, 2000 | 22 | * Assorted race fixes, rewrite of ext3_get_block() by Al Viro, 2000 |
23 | */ | 23 | */ |
24 | 24 | ||
25 | #include <linux/module.h> | ||
26 | #include <linux/fs.h> | 25 | #include <linux/fs.h> |
27 | #include <linux/time.h> | 26 | #include <linux/time.h> |
28 | #include <linux/ext3_jbd.h> | 27 | #include <linux/ext3_jbd.h> |
@@ -223,8 +222,12 @@ void ext3_evict_inode (struct inode *inode) | |||
223 | * | 222 | * |
224 | * Note that directories do not have this problem because they don't | 223 | * Note that directories do not have this problem because they don't |
225 | * use page cache. | 224 | * use page cache. |
225 | * | ||
226 | * The s_journal check handles the case when ext3_get_journal() fails | ||
227 | * and puts the journal inode. | ||
226 | */ | 228 | */ |
227 | if (inode->i_nlink && ext3_should_journal_data(inode) && | 229 | if (inode->i_nlink && ext3_should_journal_data(inode) && |
230 | EXT3_SB(inode->i_sb)->s_journal && | ||
228 | (S_ISLNK(inode->i_mode) || S_ISREG(inode->i_mode))) { | 231 | (S_ISLNK(inode->i_mode) || S_ISREG(inode->i_mode))) { |
229 | tid_t commit_tid = atomic_read(&ei->i_datasync_tid); | 232 | tid_t commit_tid = atomic_read(&ei->i_datasync_tid); |
230 | journal_t *journal = EXT3_SB(inode->i_sb)->s_journal; | 233 | journal_t *journal = EXT3_SB(inode->i_sb)->s_journal; |
@@ -1132,9 +1135,11 @@ struct buffer_head *ext3_bread(handle_t *handle, struct inode *inode, | |||
1132 | bh = ext3_getblk(handle, inode, block, create, err); | 1135 | bh = ext3_getblk(handle, inode, block, create, err); |
1133 | if (!bh) | 1136 | if (!bh) |
1134 | return bh; | 1137 | return bh; |
1135 | if (buffer_uptodate(bh)) | 1138 | if (bh_uptodate_or_lock(bh)) |
1136 | return bh; | 1139 | return bh; |
1137 | ll_rw_block(READ | REQ_META | REQ_PRIO, 1, &bh); | 1140 | get_bh(bh); |
1141 | bh->b_end_io = end_buffer_read_sync; | ||
1142 | submit_bh(READ | REQ_META | REQ_PRIO, bh); | ||
1138 | wait_on_buffer(bh); | 1143 | wait_on_buffer(bh); |
1139 | if (buffer_uptodate(bh)) | 1144 | if (buffer_uptodate(bh)) |
1140 | return bh; | 1145 | return bh; |
@@ -1617,7 +1622,13 @@ static int ext3_ordered_writepage(struct page *page, | |||
1617 | int err; | 1622 | int err; |
1618 | 1623 | ||
1619 | J_ASSERT(PageLocked(page)); | 1624 | J_ASSERT(PageLocked(page)); |
1620 | WARN_ON_ONCE(IS_RDONLY(inode)); | 1625 | /* |
1626 | * We don't want to warn for emergency remount. The condition is | ||
1627 | * ordered to avoid dereferencing inode->i_sb in non-error case to | ||
1628 | * avoid slow-downs. | ||
1629 | */ | ||
1630 | WARN_ON_ONCE(IS_RDONLY(inode) && | ||
1631 | !(EXT3_SB(inode->i_sb)->s_mount_state & EXT3_ERROR_FS)); | ||
1621 | 1632 | ||
1622 | /* | 1633 | /* |
1623 | * We give up here if we're reentered, because it might be for a | 1634 | * We give up here if we're reentered, because it might be for a |
@@ -1692,7 +1703,13 @@ static int ext3_writeback_writepage(struct page *page, | |||
1692 | int err; | 1703 | int err; |
1693 | 1704 | ||
1694 | J_ASSERT(PageLocked(page)); | 1705 | J_ASSERT(PageLocked(page)); |
1695 | WARN_ON_ONCE(IS_RDONLY(inode)); | 1706 | /* |
1707 | * We don't want to warn for emergency remount. The condition is | ||
1708 | * ordered to avoid dereferencing inode->i_sb in non-error case to | ||
1709 | * avoid slow-downs. | ||
1710 | */ | ||
1711 | WARN_ON_ONCE(IS_RDONLY(inode) && | ||
1712 | !(EXT3_SB(inode->i_sb)->s_mount_state & EXT3_ERROR_FS)); | ||
1696 | 1713 | ||
1697 | if (ext3_journal_current_handle()) | 1714 | if (ext3_journal_current_handle()) |
1698 | goto out_fail; | 1715 | goto out_fail; |
@@ -1735,7 +1752,13 @@ static int ext3_journalled_writepage(struct page *page, | |||
1735 | int err; | 1752 | int err; |
1736 | 1753 | ||
1737 | J_ASSERT(PageLocked(page)); | 1754 | J_ASSERT(PageLocked(page)); |
1738 | WARN_ON_ONCE(IS_RDONLY(inode)); | 1755 | /* |
1756 | * We don't want to warn for emergency remount. The condition is | ||
1757 | * ordered to avoid dereferencing inode->i_sb in non-error case to | ||
1758 | * avoid slow-downs. | ||
1759 | */ | ||
1760 | WARN_ON_ONCE(IS_RDONLY(inode) && | ||
1761 | !(EXT3_SB(inode->i_sb)->s_mount_state & EXT3_ERROR_FS)); | ||
1739 | 1762 | ||
1740 | if (ext3_journal_current_handle()) | 1763 | if (ext3_journal_current_handle()) |
1741 | goto no_write; | 1764 | goto no_write; |
@@ -2064,12 +2087,10 @@ static int ext3_block_truncate_page(struct inode *inode, loff_t from) | |||
2064 | if (PageUptodate(page)) | 2087 | if (PageUptodate(page)) |
2065 | set_buffer_uptodate(bh); | 2088 | set_buffer_uptodate(bh); |
2066 | 2089 | ||
2067 | if (!buffer_uptodate(bh)) { | 2090 | if (!bh_uptodate_or_lock(bh)) { |
2068 | err = -EIO; | 2091 | err = bh_submit_read(bh); |
2069 | ll_rw_block(READ, 1, &bh); | ||
2070 | wait_on_buffer(bh); | ||
2071 | /* Uhhuh. Read error. Complain and punt. */ | 2092 | /* Uhhuh. Read error. Complain and punt. */ |
2072 | if (!buffer_uptodate(bh)) | 2093 | if (err) |
2073 | goto unlock; | 2094 | goto unlock; |
2074 | } | 2095 | } |
2075 | 2096 | ||
diff --git a/fs/ext3/ioctl.c b/fs/ext3/ioctl.c index 8e37c41a071b..4af574ce4a46 100644 --- a/fs/ext3/ioctl.c +++ b/fs/ext3/ioctl.c | |||
@@ -134,10 +134,11 @@ flags_out: | |||
134 | goto setversion_out; | 134 | goto setversion_out; |
135 | } | 135 | } |
136 | 136 | ||
137 | mutex_lock(&inode->i_mutex); | ||
137 | handle = ext3_journal_start(inode, 1); | 138 | handle = ext3_journal_start(inode, 1); |
138 | if (IS_ERR(handle)) { | 139 | if (IS_ERR(handle)) { |
139 | err = PTR_ERR(handle); | 140 | err = PTR_ERR(handle); |
140 | goto setversion_out; | 141 | goto unlock_out; |
141 | } | 142 | } |
142 | err = ext3_reserve_inode_write(handle, inode, &iloc); | 143 | err = ext3_reserve_inode_write(handle, inode, &iloc); |
143 | if (err == 0) { | 144 | if (err == 0) { |
@@ -146,6 +147,9 @@ flags_out: | |||
146 | err = ext3_mark_iloc_dirty(handle, inode, &iloc); | 147 | err = ext3_mark_iloc_dirty(handle, inode, &iloc); |
147 | } | 148 | } |
148 | ext3_journal_stop(handle); | 149 | ext3_journal_stop(handle); |
150 | |||
151 | unlock_out: | ||
152 | mutex_unlock(&inode->i_mutex); | ||
149 | setversion_out: | 153 | setversion_out: |
150 | mnt_drop_write_file(filp); | 154 | mnt_drop_write_file(filp); |
151 | return err; | 155 | return err; |
diff --git a/fs/ext3/namei.c b/fs/ext3/namei.c index 4f35b2f315d4..e8e211795e9f 100644 --- a/fs/ext3/namei.c +++ b/fs/ext3/namei.c | |||
@@ -921,9 +921,12 @@ restart: | |||
921 | num++; | 921 | num++; |
922 | bh = ext3_getblk(NULL, dir, b++, 0, &err); | 922 | bh = ext3_getblk(NULL, dir, b++, 0, &err); |
923 | bh_use[ra_max] = bh; | 923 | bh_use[ra_max] = bh; |
924 | if (bh) | 924 | if (bh && !bh_uptodate_or_lock(bh)) { |
925 | ll_rw_block(READ | REQ_META | REQ_PRIO, | 925 | get_bh(bh); |
926 | 1, &bh); | 926 | bh->b_end_io = end_buffer_read_sync; |
927 | submit_bh(READ | REQ_META | REQ_PRIO, | ||
928 | bh); | ||
929 | } | ||
927 | } | 930 | } |
928 | } | 931 | } |
929 | if ((bh = bh_use[ra_ptr++]) == NULL) | 932 | if ((bh = bh_use[ra_ptr++]) == NULL) |
@@ -2272,7 +2275,7 @@ retry: | |||
2272 | err = PTR_ERR(handle); | 2275 | err = PTR_ERR(handle); |
2273 | goto err_drop_inode; | 2276 | goto err_drop_inode; |
2274 | } | 2277 | } |
2275 | inc_nlink(inode); | 2278 | set_nlink(inode, 1); |
2276 | err = ext3_orphan_del(handle, inode); | 2279 | err = ext3_orphan_del(handle, inode); |
2277 | if (err) { | 2280 | if (err) { |
2278 | ext3_journal_stop(handle); | 2281 | ext3_journal_stop(handle); |
diff --git a/fs/ext3/super.c b/fs/ext3/super.c index 3a10b884e1be..726c7ef6cdf1 100644 --- a/fs/ext3/super.c +++ b/fs/ext3/super.c | |||
@@ -2059,9 +2059,10 @@ static int ext3_fill_super (struct super_block *sb, void *data, int silent) | |||
2059 | EXT3_SB(sb)->s_mount_state |= EXT3_ORPHAN_FS; | 2059 | EXT3_SB(sb)->s_mount_state |= EXT3_ORPHAN_FS; |
2060 | ext3_orphan_cleanup(sb, es); | 2060 | ext3_orphan_cleanup(sb, es); |
2061 | EXT3_SB(sb)->s_mount_state &= ~EXT3_ORPHAN_FS; | 2061 | EXT3_SB(sb)->s_mount_state &= ~EXT3_ORPHAN_FS; |
2062 | if (needs_recovery) | 2062 | if (needs_recovery) { |
2063 | ext3_mark_recovery_complete(sb, es); | ||
2063 | ext3_msg(sb, KERN_INFO, "recovery complete"); | 2064 | ext3_msg(sb, KERN_INFO, "recovery complete"); |
2064 | ext3_mark_recovery_complete(sb, es); | 2065 | } |
2065 | ext3_msg(sb, KERN_INFO, "mounted filesystem with %s data mode", | 2066 | ext3_msg(sb, KERN_INFO, "mounted filesystem with %s data mode", |
2066 | test_opt(sb,DATA_FLAGS) == EXT3_MOUNT_JOURNAL_DATA ? "journal": | 2067 | test_opt(sb,DATA_FLAGS) == EXT3_MOUNT_JOURNAL_DATA ? "journal": |
2067 | test_opt(sb,DATA_FLAGS) == EXT3_MOUNT_ORDERED_DATA ? "ordered": | 2068 | test_opt(sb,DATA_FLAGS) == EXT3_MOUNT_ORDERED_DATA ? "ordered": |
@@ -2229,11 +2230,11 @@ static journal_t *ext3_get_dev_journal(struct super_block *sb, | |||
2229 | goto out_bdev; | 2230 | goto out_bdev; |
2230 | } | 2231 | } |
2231 | journal->j_private = sb; | 2232 | journal->j_private = sb; |
2232 | ll_rw_block(READ, 1, &journal->j_sb_buffer); | 2233 | if (!bh_uptodate_or_lock(journal->j_sb_buffer)) { |
2233 | wait_on_buffer(journal->j_sb_buffer); | 2234 | if (bh_submit_read(journal->j_sb_buffer)) { |
2234 | if (!buffer_uptodate(journal->j_sb_buffer)) { | 2235 | ext3_msg(sb, KERN_ERR, "I/O error on journal device"); |
2235 | ext3_msg(sb, KERN_ERR, "I/O error on journal device"); | 2236 | goto out_journal; |
2236 | goto out_journal; | 2237 | } |
2237 | } | 2238 | } |
2238 | if (be32_to_cpu(journal->j_superblock->s_nr_users) != 1) { | 2239 | if (be32_to_cpu(journal->j_superblock->s_nr_users) != 1) { |
2239 | ext3_msg(sb, KERN_ERR, | 2240 | ext3_msg(sb, KERN_ERR, |
diff --git a/fs/ext3/xattr_security.c b/fs/ext3/xattr_security.c index 3c218b8a51d4..ea26f2acab94 100644 --- a/fs/ext3/xattr_security.c +++ b/fs/ext3/xattr_security.c | |||
@@ -3,7 +3,6 @@ | |||
3 | * Handler for storing security labels as extended attributes. | 3 | * Handler for storing security labels as extended attributes. |
4 | */ | 4 | */ |
5 | 5 | ||
6 | #include <linux/module.h> | ||
7 | #include <linux/slab.h> | 6 | #include <linux/slab.h> |
8 | #include <linux/string.h> | 7 | #include <linux/string.h> |
9 | #include <linux/fs.h> | 8 | #include <linux/fs.h> |
diff --git a/fs/ext3/xattr_trusted.c b/fs/ext3/xattr_trusted.c index dc8edda9ffe0..2526a8829de8 100644 --- a/fs/ext3/xattr_trusted.c +++ b/fs/ext3/xattr_trusted.c | |||
@@ -5,7 +5,6 @@ | |||
5 | * Copyright (C) 2003 by Andreas Gruenbacher, <a.gruenbacher@computer.org> | 5 | * Copyright (C) 2003 by Andreas Gruenbacher, <a.gruenbacher@computer.org> |
6 | */ | 6 | */ |
7 | 7 | ||
8 | #include <linux/module.h> | ||
9 | #include <linux/string.h> | 8 | #include <linux/string.h> |
10 | #include <linux/capability.h> | 9 | #include <linux/capability.h> |
11 | #include <linux/fs.h> | 10 | #include <linux/fs.h> |
diff --git a/fs/ext3/xattr_user.c b/fs/ext3/xattr_user.c index 7a321974d584..b32e473a1e33 100644 --- a/fs/ext3/xattr_user.c +++ b/fs/ext3/xattr_user.c | |||
@@ -5,7 +5,6 @@ | |||
5 | * Copyright (C) 2001 by Andreas Gruenbacher, <a.gruenbacher@computer.org> | 5 | * Copyright (C) 2001 by Andreas Gruenbacher, <a.gruenbacher@computer.org> |
6 | */ | 6 | */ |
7 | 7 | ||
8 | #include <linux/module.h> | ||
9 | #include <linux/string.h> | 8 | #include <linux/string.h> |
10 | #include <linux/fs.h> | 9 | #include <linux/fs.h> |
11 | #include <linux/ext3_jbd.h> | 10 | #include <linux/ext3_jbd.h> |
diff --git a/fs/ext4/balloc.c b/fs/ext4/balloc.c index 12ccacda44e0..f9e2cd8cf711 100644 --- a/fs/ext4/balloc.c +++ b/fs/ext4/balloc.c | |||
@@ -23,6 +23,8 @@ | |||
23 | 23 | ||
24 | #include <trace/events/ext4.h> | 24 | #include <trace/events/ext4.h> |
25 | 25 | ||
26 | static unsigned ext4_num_base_meta_clusters(struct super_block *sb, | ||
27 | ext4_group_t block_group); | ||
26 | /* | 28 | /* |
27 | * balloc.c contains the blocks allocation and deallocation routines | 29 | * balloc.c contains the blocks allocation and deallocation routines |
28 | */ | 30 | */ |
@@ -668,7 +670,7 @@ unsigned long ext4_bg_num_gdb(struct super_block *sb, ext4_group_t group) | |||
668 | * This function returns the number of file system metadata clusters at | 670 | * This function returns the number of file system metadata clusters at |
669 | * the beginning of a block group, including the reserved gdt blocks. | 671 | * the beginning of a block group, including the reserved gdt blocks. |
670 | */ | 672 | */ |
671 | unsigned ext4_num_base_meta_clusters(struct super_block *sb, | 673 | static unsigned ext4_num_base_meta_clusters(struct super_block *sb, |
672 | ext4_group_t block_group) | 674 | ext4_group_t block_group) |
673 | { | 675 | { |
674 | struct ext4_sb_info *sbi = EXT4_SB(sb); | 676 | struct ext4_sb_info *sbi = EXT4_SB(sb); |
diff --git a/fs/ext4/block_validity.c b/fs/ext4/block_validity.c index 8efb2f0a3447..3f11656bd72e 100644 --- a/fs/ext4/block_validity.c +++ b/fs/ext4/block_validity.c | |||
@@ -13,7 +13,6 @@ | |||
13 | #include <linux/namei.h> | 13 | #include <linux/namei.h> |
14 | #include <linux/quotaops.h> | 14 | #include <linux/quotaops.h> |
15 | #include <linux/buffer_head.h> | 15 | #include <linux/buffer_head.h> |
16 | #include <linux/module.h> | ||
17 | #include <linux/swap.h> | 16 | #include <linux/swap.h> |
18 | #include <linux/pagemap.h> | 17 | #include <linux/pagemap.h> |
19 | #include <linux/blkdev.h> | 18 | #include <linux/blkdev.h> |
diff --git a/fs/ext4/ext4.h b/fs/ext4/ext4.h index 1554b15f91bc..513004fc3d84 100644 --- a/fs/ext4/ext4.h +++ b/fs/ext4/ext4.h | |||
@@ -511,6 +511,14 @@ struct ext4_new_group_data { | |||
511 | __u32 free_blocks_count; | 511 | __u32 free_blocks_count; |
512 | }; | 512 | }; |
513 | 513 | ||
514 | /* Indexes used to index group tables in ext4_new_group_data */ | ||
515 | enum { | ||
516 | BLOCK_BITMAP = 0, /* block bitmap */ | ||
517 | INODE_BITMAP, /* inode bitmap */ | ||
518 | INODE_TABLE, /* inode tables */ | ||
519 | GROUP_TABLE_COUNT, | ||
520 | }; | ||
521 | |||
514 | /* | 522 | /* |
515 | * Flags used by ext4_map_blocks() | 523 | * Flags used by ext4_map_blocks() |
516 | */ | 524 | */ |
@@ -575,6 +583,7 @@ struct ext4_new_group_data { | |||
575 | /* note ioctl 11 reserved for filesystem-independent FIEMAP ioctl */ | 583 | /* note ioctl 11 reserved for filesystem-independent FIEMAP ioctl */ |
576 | #define EXT4_IOC_ALLOC_DA_BLKS _IO('f', 12) | 584 | #define EXT4_IOC_ALLOC_DA_BLKS _IO('f', 12) |
577 | #define EXT4_IOC_MOVE_EXT _IOWR('f', 15, struct move_extent) | 585 | #define EXT4_IOC_MOVE_EXT _IOWR('f', 15, struct move_extent) |
586 | #define EXT4_IOC_RESIZE_FS _IOW('f', 16, __u64) | ||
578 | 587 | ||
579 | #if defined(__KERNEL__) && defined(CONFIG_COMPAT) | 588 | #if defined(__KERNEL__) && defined(CONFIG_COMPAT) |
580 | /* | 589 | /* |
@@ -957,12 +966,13 @@ struct ext4_inode_info { | |||
957 | #define test_opt2(sb, opt) (EXT4_SB(sb)->s_mount_opt2 & \ | 966 | #define test_opt2(sb, opt) (EXT4_SB(sb)->s_mount_opt2 & \ |
958 | EXT4_MOUNT2_##opt) | 967 | EXT4_MOUNT2_##opt) |
959 | 968 | ||
960 | #define ext4_set_bit __test_and_set_bit_le | 969 | #define ext4_test_and_set_bit __test_and_set_bit_le |
970 | #define ext4_set_bit __set_bit_le | ||
961 | #define ext4_set_bit_atomic ext2_set_bit_atomic | 971 | #define ext4_set_bit_atomic ext2_set_bit_atomic |
962 | #define ext4_clear_bit __test_and_clear_bit_le | 972 | #define ext4_test_and_clear_bit __test_and_clear_bit_le |
973 | #define ext4_clear_bit __clear_bit_le | ||
963 | #define ext4_clear_bit_atomic ext2_clear_bit_atomic | 974 | #define ext4_clear_bit_atomic ext2_clear_bit_atomic |
964 | #define ext4_test_bit test_bit_le | 975 | #define ext4_test_bit test_bit_le |
965 | #define ext4_find_first_zero_bit find_first_zero_bit_le | ||
966 | #define ext4_find_next_zero_bit find_next_zero_bit_le | 976 | #define ext4_find_next_zero_bit find_next_zero_bit_le |
967 | #define ext4_find_next_bit find_next_bit_le | 977 | #define ext4_find_next_bit find_next_bit_le |
968 | 978 | ||
@@ -1397,6 +1407,7 @@ static inline void ext4_clear_state_flags(struct ext4_inode_info *ei) | |||
1397 | #define EXT4_FEATURE_RO_COMPAT_EXTRA_ISIZE 0x0040 | 1407 | #define EXT4_FEATURE_RO_COMPAT_EXTRA_ISIZE 0x0040 |
1398 | #define EXT4_FEATURE_RO_COMPAT_QUOTA 0x0100 | 1408 | #define EXT4_FEATURE_RO_COMPAT_QUOTA 0x0100 |
1399 | #define EXT4_FEATURE_RO_COMPAT_BIGALLOC 0x0200 | 1409 | #define EXT4_FEATURE_RO_COMPAT_BIGALLOC 0x0200 |
1410 | #define EXT4_FEATURE_RO_COMPAT_METADATA_CSUM 0x0400 | ||
1400 | 1411 | ||
1401 | #define EXT4_FEATURE_INCOMPAT_COMPRESSION 0x0001 | 1412 | #define EXT4_FEATURE_INCOMPAT_COMPRESSION 0x0001 |
1402 | #define EXT4_FEATURE_INCOMPAT_FILETYPE 0x0002 | 1413 | #define EXT4_FEATURE_INCOMPAT_FILETYPE 0x0002 |
@@ -1409,6 +1420,8 @@ static inline void ext4_clear_state_flags(struct ext4_inode_info *ei) | |||
1409 | #define EXT4_FEATURE_INCOMPAT_FLEX_BG 0x0200 | 1420 | #define EXT4_FEATURE_INCOMPAT_FLEX_BG 0x0200 |
1410 | #define EXT4_FEATURE_INCOMPAT_EA_INODE 0x0400 /* EA in inode */ | 1421 | #define EXT4_FEATURE_INCOMPAT_EA_INODE 0x0400 /* EA in inode */ |
1411 | #define EXT4_FEATURE_INCOMPAT_DIRDATA 0x1000 /* data in dirent */ | 1422 | #define EXT4_FEATURE_INCOMPAT_DIRDATA 0x1000 /* data in dirent */ |
1423 | #define EXT4_FEATURE_INCOMPAT_INLINEDATA 0x2000 /* data in inode */ | ||
1424 | #define EXT4_FEATURE_INCOMPAT_LARGEDIR 0x4000 /* >2GB or 3-lvl htree */ | ||
1412 | 1425 | ||
1413 | #define EXT2_FEATURE_COMPAT_SUPP EXT4_FEATURE_COMPAT_EXT_ATTR | 1426 | #define EXT2_FEATURE_COMPAT_SUPP EXT4_FEATURE_COMPAT_EXT_ATTR |
1414 | #define EXT2_FEATURE_INCOMPAT_SUPP (EXT4_FEATURE_INCOMPAT_FILETYPE| \ | 1427 | #define EXT2_FEATURE_INCOMPAT_SUPP (EXT4_FEATURE_INCOMPAT_FILETYPE| \ |
@@ -1790,8 +1803,6 @@ extern void ext4_init_block_bitmap(struct super_block *sb, | |||
1790 | extern unsigned ext4_free_clusters_after_init(struct super_block *sb, | 1803 | extern unsigned ext4_free_clusters_after_init(struct super_block *sb, |
1791 | ext4_group_t block_group, | 1804 | ext4_group_t block_group, |
1792 | struct ext4_group_desc *gdp); | 1805 | struct ext4_group_desc *gdp); |
1793 | extern unsigned ext4_num_base_meta_clusters(struct super_block *sb, | ||
1794 | ext4_group_t block_group); | ||
1795 | extern unsigned ext4_num_overhead_clusters(struct super_block *sb, | 1806 | extern unsigned ext4_num_overhead_clusters(struct super_block *sb, |
1796 | ext4_group_t block_group, | 1807 | ext4_group_t block_group, |
1797 | struct ext4_group_desc *gdp); | 1808 | struct ext4_group_desc *gdp); |
@@ -1880,16 +1891,9 @@ extern int ext4_alloc_da_blocks(struct inode *inode); | |||
1880 | extern void ext4_set_aops(struct inode *inode); | 1891 | extern void ext4_set_aops(struct inode *inode); |
1881 | extern int ext4_writepage_trans_blocks(struct inode *); | 1892 | extern int ext4_writepage_trans_blocks(struct inode *); |
1882 | extern int ext4_chunk_trans_blocks(struct inode *, int nrblocks); | 1893 | extern int ext4_chunk_trans_blocks(struct inode *, int nrblocks); |
1883 | extern int ext4_block_truncate_page(handle_t *handle, | ||
1884 | struct address_space *mapping, loff_t from); | ||
1885 | extern int ext4_block_zero_page_range(handle_t *handle, | ||
1886 | struct address_space *mapping, loff_t from, loff_t length); | ||
1887 | extern int ext4_discard_partial_page_buffers(handle_t *handle, | 1894 | extern int ext4_discard_partial_page_buffers(handle_t *handle, |
1888 | struct address_space *mapping, loff_t from, | 1895 | struct address_space *mapping, loff_t from, |
1889 | loff_t length, int flags); | 1896 | loff_t length, int flags); |
1890 | extern int ext4_discard_partial_page_buffers_no_lock(handle_t *handle, | ||
1891 | struct inode *inode, struct page *page, loff_t from, | ||
1892 | loff_t length, int flags); | ||
1893 | extern int ext4_page_mkwrite(struct vm_area_struct *vma, struct vm_fault *vmf); | 1897 | extern int ext4_page_mkwrite(struct vm_area_struct *vma, struct vm_fault *vmf); |
1894 | extern qsize_t *ext4_get_reserved_space(struct inode *inode); | 1898 | extern qsize_t *ext4_get_reserved_space(struct inode *inode); |
1895 | extern void ext4_da_update_reserve_space(struct inode *inode, | 1899 | extern void ext4_da_update_reserve_space(struct inode *inode, |
@@ -1924,6 +1928,7 @@ extern int ext4_group_add(struct super_block *sb, | |||
1924 | extern int ext4_group_extend(struct super_block *sb, | 1928 | extern int ext4_group_extend(struct super_block *sb, |
1925 | struct ext4_super_block *es, | 1929 | struct ext4_super_block *es, |
1926 | ext4_fsblk_t n_blocks_count); | 1930 | ext4_fsblk_t n_blocks_count); |
1931 | extern int ext4_resize_fs(struct super_block *sb, ext4_fsblk_t n_blocks_count); | ||
1927 | 1932 | ||
1928 | /* super.c */ | 1933 | /* super.c */ |
1929 | extern void *ext4_kvmalloc(size_t size, gfp_t flags); | 1934 | extern void *ext4_kvmalloc(size_t size, gfp_t flags); |
diff --git a/fs/ext4/extents.c b/fs/ext4/extents.c index 607b1557d292..74f23c292e1b 100644 --- a/fs/ext4/extents.c +++ b/fs/ext4/extents.c | |||
@@ -29,7 +29,6 @@ | |||
29 | * - smart tree reduction | 29 | * - smart tree reduction |
30 | */ | 30 | */ |
31 | 31 | ||
32 | #include <linux/module.h> | ||
33 | #include <linux/fs.h> | 32 | #include <linux/fs.h> |
34 | #include <linux/time.h> | 33 | #include <linux/time.h> |
35 | #include <linux/jbd2.h> | 34 | #include <linux/jbd2.h> |
@@ -3281,6 +3280,9 @@ static int ext4_find_delalloc_range(struct inode *inode, | |||
3281 | ext4_lblk_t i, pg_lblk; | 3280 | ext4_lblk_t i, pg_lblk; |
3282 | pgoff_t index; | 3281 | pgoff_t index; |
3283 | 3282 | ||
3283 | if (!test_opt(inode->i_sb, DELALLOC)) | ||
3284 | return 0; | ||
3285 | |||
3284 | /* reverse search wont work if fs block size is less than page size */ | 3286 | /* reverse search wont work if fs block size is less than page size */ |
3285 | if (inode->i_blkbits < PAGE_CACHE_SHIFT) | 3287 | if (inode->i_blkbits < PAGE_CACHE_SHIFT) |
3286 | search_hint_reverse = 0; | 3288 | search_hint_reverse = 0; |
@@ -3453,8 +3455,8 @@ ext4_ext_handle_uninitialized_extents(handle_t *handle, struct inode *inode, | |||
3453 | int err = 0; | 3455 | int err = 0; |
3454 | ext4_io_end_t *io = EXT4_I(inode)->cur_aio_dio; | 3456 | ext4_io_end_t *io = EXT4_I(inode)->cur_aio_dio; |
3455 | 3457 | ||
3456 | ext_debug("ext4_ext_handle_uninitialized_extents: inode %lu, logical" | 3458 | ext_debug("ext4_ext_handle_uninitialized_extents: inode %lu, logical " |
3457 | "block %llu, max_blocks %u, flags %d, allocated %u", | 3459 | "block %llu, max_blocks %u, flags %x, allocated %u\n", |
3458 | inode->i_ino, (unsigned long long)map->m_lblk, map->m_len, | 3460 | inode->i_ino, (unsigned long long)map->m_lblk, map->m_len, |
3459 | flags, allocated); | 3461 | flags, allocated); |
3460 | ext4_ext_show_leaf(inode, path); | 3462 | ext4_ext_show_leaf(inode, path); |
@@ -3625,7 +3627,7 @@ static int get_implied_cluster_alloc(struct super_block *sb, | |||
3625 | struct ext4_sb_info *sbi = EXT4_SB(sb); | 3627 | struct ext4_sb_info *sbi = EXT4_SB(sb); |
3626 | ext4_lblk_t c_offset = map->m_lblk & (sbi->s_cluster_ratio-1); | 3628 | ext4_lblk_t c_offset = map->m_lblk & (sbi->s_cluster_ratio-1); |
3627 | ext4_lblk_t ex_cluster_start, ex_cluster_end; | 3629 | ext4_lblk_t ex_cluster_start, ex_cluster_end; |
3628 | ext4_lblk_t rr_cluster_start, rr_cluster_end; | 3630 | ext4_lblk_t rr_cluster_start; |
3629 | ext4_lblk_t ee_block = le32_to_cpu(ex->ee_block); | 3631 | ext4_lblk_t ee_block = le32_to_cpu(ex->ee_block); |
3630 | ext4_fsblk_t ee_start = ext4_ext_pblock(ex); | 3632 | ext4_fsblk_t ee_start = ext4_ext_pblock(ex); |
3631 | unsigned short ee_len = ext4_ext_get_actual_len(ex); | 3633 | unsigned short ee_len = ext4_ext_get_actual_len(ex); |
@@ -3636,7 +3638,6 @@ static int get_implied_cluster_alloc(struct super_block *sb, | |||
3636 | 3638 | ||
3637 | /* The requested region passed into ext4_map_blocks() */ | 3639 | /* The requested region passed into ext4_map_blocks() */ |
3638 | rr_cluster_start = EXT4_B2C(sbi, map->m_lblk); | 3640 | rr_cluster_start = EXT4_B2C(sbi, map->m_lblk); |
3639 | rr_cluster_end = EXT4_B2C(sbi, map->m_lblk + map->m_len - 1); | ||
3640 | 3641 | ||
3641 | if ((rr_cluster_start == ex_cluster_end) || | 3642 | if ((rr_cluster_start == ex_cluster_end) || |
3642 | (rr_cluster_start == ex_cluster_start)) { | 3643 | (rr_cluster_start == ex_cluster_start)) { |
diff --git a/fs/ext4/ialloc.c b/fs/ext4/ialloc.c index 4637af036d9c..25d8c9781ad9 100644 --- a/fs/ext4/ialloc.c +++ b/fs/ext4/ialloc.c | |||
@@ -252,7 +252,7 @@ void ext4_free_inode(handle_t *handle, struct inode *inode) | |||
252 | fatal = ext4_journal_get_write_access(handle, bh2); | 252 | fatal = ext4_journal_get_write_access(handle, bh2); |
253 | } | 253 | } |
254 | ext4_lock_group(sb, block_group); | 254 | ext4_lock_group(sb, block_group); |
255 | cleared = ext4_clear_bit(bit, bitmap_bh->b_data); | 255 | cleared = ext4_test_and_clear_bit(bit, bitmap_bh->b_data); |
256 | if (fatal || !cleared) { | 256 | if (fatal || !cleared) { |
257 | ext4_unlock_group(sb, block_group); | 257 | ext4_unlock_group(sb, block_group); |
258 | goto out; | 258 | goto out; |
@@ -358,7 +358,7 @@ static int find_group_orlov(struct super_block *sb, struct inode *parent, | |||
358 | struct ext4_sb_info *sbi = EXT4_SB(sb); | 358 | struct ext4_sb_info *sbi = EXT4_SB(sb); |
359 | ext4_group_t real_ngroups = ext4_get_groups_count(sb); | 359 | ext4_group_t real_ngroups = ext4_get_groups_count(sb); |
360 | int inodes_per_group = EXT4_INODES_PER_GROUP(sb); | 360 | int inodes_per_group = EXT4_INODES_PER_GROUP(sb); |
361 | unsigned int freei, avefreei; | 361 | unsigned int freei, avefreei, grp_free; |
362 | ext4_fsblk_t freeb, avefreec; | 362 | ext4_fsblk_t freeb, avefreec; |
363 | unsigned int ndirs; | 363 | unsigned int ndirs; |
364 | int max_dirs, min_inodes; | 364 | int max_dirs, min_inodes; |
@@ -477,8 +477,8 @@ fallback_retry: | |||
477 | for (i = 0; i < ngroups; i++) { | 477 | for (i = 0; i < ngroups; i++) { |
478 | grp = (parent_group + i) % ngroups; | 478 | grp = (parent_group + i) % ngroups; |
479 | desc = ext4_get_group_desc(sb, grp, NULL); | 479 | desc = ext4_get_group_desc(sb, grp, NULL); |
480 | if (desc && ext4_free_inodes_count(sb, desc) && | 480 | grp_free = ext4_free_inodes_count(sb, desc); |
481 | ext4_free_inodes_count(sb, desc) >= avefreei) { | 481 | if (desc && grp_free && grp_free >= avefreei) { |
482 | *group = grp; | 482 | *group = grp; |
483 | return 0; | 483 | return 0; |
484 | } | 484 | } |
@@ -618,7 +618,7 @@ static int ext4_claim_inode(struct super_block *sb, | |||
618 | */ | 618 | */ |
619 | down_read(&grp->alloc_sem); | 619 | down_read(&grp->alloc_sem); |
620 | ext4_lock_group(sb, group); | 620 | ext4_lock_group(sb, group); |
621 | if (ext4_set_bit(ino, inode_bitmap_bh->b_data)) { | 621 | if (ext4_test_and_set_bit(ino, inode_bitmap_bh->b_data)) { |
622 | /* not a free inode */ | 622 | /* not a free inode */ |
623 | retval = 1; | 623 | retval = 1; |
624 | goto err_ret; | 624 | goto err_ret; |
@@ -885,8 +885,12 @@ got: | |||
885 | if (IS_DIRSYNC(inode)) | 885 | if (IS_DIRSYNC(inode)) |
886 | ext4_handle_sync(handle); | 886 | ext4_handle_sync(handle); |
887 | if (insert_inode_locked(inode) < 0) { | 887 | if (insert_inode_locked(inode) < 0) { |
888 | err = -EINVAL; | 888 | /* |
889 | goto fail_drop; | 889 | * Likely a bitmap corruption causing inode to be allocated |
890 | * twice. | ||
891 | */ | ||
892 | err = -EIO; | ||
893 | goto fail; | ||
890 | } | 894 | } |
891 | spin_lock(&sbi->s_next_gen_lock); | 895 | spin_lock(&sbi->s_next_gen_lock); |
892 | inode->i_generation = sbi->s_next_generation++; | 896 | inode->i_generation = sbi->s_next_generation++; |
diff --git a/fs/ext4/indirect.c b/fs/ext4/indirect.c index 3cfc73fbca8e..830e1b2bf145 100644 --- a/fs/ext4/indirect.c +++ b/fs/ext4/indirect.c | |||
@@ -20,7 +20,6 @@ | |||
20 | * (sct@redhat.com), 1993, 1998 | 20 | * (sct@redhat.com), 1993, 1998 |
21 | */ | 21 | */ |
22 | 22 | ||
23 | #include <linux/module.h> | ||
24 | #include "ext4_jbd2.h" | 23 | #include "ext4_jbd2.h" |
25 | #include "truncate.h" | 24 | #include "truncate.h" |
26 | 25 | ||
diff --git a/fs/ext4/inode.c b/fs/ext4/inode.c index 7dbcc3e84570..feaa82fe629d 100644 --- a/fs/ext4/inode.c +++ b/fs/ext4/inode.c | |||
@@ -18,7 +18,6 @@ | |||
18 | * Assorted race fixes, rewrite of ext4_get_block() by Al Viro, 2000 | 18 | * Assorted race fixes, rewrite of ext4_get_block() by Al Viro, 2000 |
19 | */ | 19 | */ |
20 | 20 | ||
21 | #include <linux/module.h> | ||
22 | #include <linux/fs.h> | 21 | #include <linux/fs.h> |
23 | #include <linux/time.h> | 22 | #include <linux/time.h> |
24 | #include <linux/jbd2.h> | 23 | #include <linux/jbd2.h> |
@@ -72,6 +71,9 @@ static int ext4_set_bh_endio(struct buffer_head *bh, struct inode *inode); | |||
72 | static void ext4_end_io_buffer_write(struct buffer_head *bh, int uptodate); | 71 | static void ext4_end_io_buffer_write(struct buffer_head *bh, int uptodate); |
73 | static int __ext4_journalled_writepage(struct page *page, unsigned int len); | 72 | static int __ext4_journalled_writepage(struct page *page, unsigned int len); |
74 | static int ext4_bh_delay_or_unwritten(handle_t *handle, struct buffer_head *bh); | 73 | static int ext4_bh_delay_or_unwritten(handle_t *handle, struct buffer_head *bh); |
74 | static int ext4_discard_partial_page_buffers_no_lock(handle_t *handle, | ||
75 | struct inode *inode, struct page *page, loff_t from, | ||
76 | loff_t length, int flags); | ||
75 | 77 | ||
76 | /* | 78 | /* |
77 | * Test whether an inode is a fast symlink. | 79 | * Test whether an inode is a fast symlink. |
@@ -2760,7 +2762,7 @@ static void ext4_end_io_dio(struct kiocb *iocb, loff_t offset, | |||
2760 | if (!io_end || !size) | 2762 | if (!io_end || !size) |
2761 | goto out; | 2763 | goto out; |
2762 | 2764 | ||
2763 | ext_debug("ext4_end_io_dio(): io_end 0x%p" | 2765 | ext_debug("ext4_end_io_dio(): io_end 0x%p " |
2764 | "for inode %lu, iocb 0x%p, offset %llu, size %llu\n", | 2766 | "for inode %lu, iocb 0x%p, offset %llu, size %llu\n", |
2765 | iocb->private, io_end->inode->i_ino, iocb, offset, | 2767 | iocb->private, io_end->inode->i_ino, iocb, offset, |
2766 | size); | 2768 | size); |
@@ -3161,7 +3163,7 @@ int ext4_discard_partial_page_buffers(handle_t *handle, | |||
3161 | * | 3163 | * |
3162 | * Returns zero on sucess or negative on failure. | 3164 | * Returns zero on sucess or negative on failure. |
3163 | */ | 3165 | */ |
3164 | int ext4_discard_partial_page_buffers_no_lock(handle_t *handle, | 3166 | static int ext4_discard_partial_page_buffers_no_lock(handle_t *handle, |
3165 | struct inode *inode, struct page *page, loff_t from, | 3167 | struct inode *inode, struct page *page, loff_t from, |
3166 | loff_t length, int flags) | 3168 | loff_t length, int flags) |
3167 | { | 3169 | { |
@@ -3301,126 +3303,6 @@ next: | |||
3301 | return err; | 3303 | return err; |
3302 | } | 3304 | } |
3303 | 3305 | ||
3304 | /* | ||
3305 | * ext4_block_truncate_page() zeroes out a mapping from file offset `from' | ||
3306 | * up to the end of the block which corresponds to `from'. | ||
3307 | * This required during truncate. We need to physically zero the tail end | ||
3308 | * of that block so it doesn't yield old data if the file is later grown. | ||
3309 | */ | ||
3310 | int ext4_block_truncate_page(handle_t *handle, | ||
3311 | struct address_space *mapping, loff_t from) | ||
3312 | { | ||
3313 | unsigned offset = from & (PAGE_CACHE_SIZE-1); | ||
3314 | unsigned length; | ||
3315 | unsigned blocksize; | ||
3316 | struct inode *inode = mapping->host; | ||
3317 | |||
3318 | blocksize = inode->i_sb->s_blocksize; | ||
3319 | length = blocksize - (offset & (blocksize - 1)); | ||
3320 | |||
3321 | return ext4_block_zero_page_range(handle, mapping, from, length); | ||
3322 | } | ||
3323 | |||
3324 | /* | ||
3325 | * ext4_block_zero_page_range() zeros out a mapping of length 'length' | ||
3326 | * starting from file offset 'from'. The range to be zero'd must | ||
3327 | * be contained with in one block. If the specified range exceeds | ||
3328 | * the end of the block it will be shortened to end of the block | ||
3329 | * that cooresponds to 'from' | ||
3330 | */ | ||
3331 | int ext4_block_zero_page_range(handle_t *handle, | ||
3332 | struct address_space *mapping, loff_t from, loff_t length) | ||
3333 | { | ||
3334 | ext4_fsblk_t index = from >> PAGE_CACHE_SHIFT; | ||
3335 | unsigned offset = from & (PAGE_CACHE_SIZE-1); | ||
3336 | unsigned blocksize, max, pos; | ||
3337 | ext4_lblk_t iblock; | ||
3338 | struct inode *inode = mapping->host; | ||
3339 | struct buffer_head *bh; | ||
3340 | struct page *page; | ||
3341 | int err = 0; | ||
3342 | |||
3343 | page = find_or_create_page(mapping, from >> PAGE_CACHE_SHIFT, | ||
3344 | mapping_gfp_mask(mapping) & ~__GFP_FS); | ||
3345 | if (!page) | ||
3346 | return -ENOMEM; | ||
3347 | |||
3348 | blocksize = inode->i_sb->s_blocksize; | ||
3349 | max = blocksize - (offset & (blocksize - 1)); | ||
3350 | |||
3351 | /* | ||
3352 | * correct length if it does not fall between | ||
3353 | * 'from' and the end of the block | ||
3354 | */ | ||
3355 | if (length > max || length < 0) | ||
3356 | length = max; | ||
3357 | |||
3358 | iblock = index << (PAGE_CACHE_SHIFT - inode->i_sb->s_blocksize_bits); | ||
3359 | |||
3360 | if (!page_has_buffers(page)) | ||
3361 | create_empty_buffers(page, blocksize, 0); | ||
3362 | |||
3363 | /* Find the buffer that contains "offset" */ | ||
3364 | bh = page_buffers(page); | ||
3365 | pos = blocksize; | ||
3366 | while (offset >= pos) { | ||
3367 | bh = bh->b_this_page; | ||
3368 | iblock++; | ||
3369 | pos += blocksize; | ||
3370 | } | ||
3371 | |||
3372 | err = 0; | ||
3373 | if (buffer_freed(bh)) { | ||
3374 | BUFFER_TRACE(bh, "freed: skip"); | ||
3375 | goto unlock; | ||
3376 | } | ||
3377 | |||
3378 | if (!buffer_mapped(bh)) { | ||
3379 | BUFFER_TRACE(bh, "unmapped"); | ||
3380 | ext4_get_block(inode, iblock, bh, 0); | ||
3381 | /* unmapped? It's a hole - nothing to do */ | ||
3382 | if (!buffer_mapped(bh)) { | ||
3383 | BUFFER_TRACE(bh, "still unmapped"); | ||
3384 | goto unlock; | ||
3385 | } | ||
3386 | } | ||
3387 | |||
3388 | /* Ok, it's mapped. Make sure it's up-to-date */ | ||
3389 | if (PageUptodate(page)) | ||
3390 | set_buffer_uptodate(bh); | ||
3391 | |||
3392 | if (!buffer_uptodate(bh)) { | ||
3393 | err = -EIO; | ||
3394 | ll_rw_block(READ, 1, &bh); | ||
3395 | wait_on_buffer(bh); | ||
3396 | /* Uhhuh. Read error. Complain and punt. */ | ||
3397 | if (!buffer_uptodate(bh)) | ||
3398 | goto unlock; | ||
3399 | } | ||
3400 | |||
3401 | if (ext4_should_journal_data(inode)) { | ||
3402 | BUFFER_TRACE(bh, "get write access"); | ||
3403 | err = ext4_journal_get_write_access(handle, bh); | ||
3404 | if (err) | ||
3405 | goto unlock; | ||
3406 | } | ||
3407 | |||
3408 | zero_user(page, offset, length); | ||
3409 | |||
3410 | BUFFER_TRACE(bh, "zeroed end of block"); | ||
3411 | |||
3412 | err = 0; | ||
3413 | if (ext4_should_journal_data(inode)) { | ||
3414 | err = ext4_handle_dirty_metadata(handle, inode, bh); | ||
3415 | } else | ||
3416 | mark_buffer_dirty(bh); | ||
3417 | |||
3418 | unlock: | ||
3419 | unlock_page(page); | ||
3420 | page_cache_release(page); | ||
3421 | return err; | ||
3422 | } | ||
3423 | |||
3424 | int ext4_can_truncate(struct inode *inode) | 3306 | int ext4_can_truncate(struct inode *inode) |
3425 | { | 3307 | { |
3426 | if (S_ISREG(inode->i_mode)) | 3308 | if (S_ISREG(inode->i_mode)) |
@@ -4647,9 +4529,19 @@ int ext4_change_inode_journal_flag(struct inode *inode, int val) | |||
4647 | return 0; | 4529 | return 0; |
4648 | if (is_journal_aborted(journal)) | 4530 | if (is_journal_aborted(journal)) |
4649 | return -EROFS; | 4531 | return -EROFS; |
4532 | /* We have to allocate physical blocks for delalloc blocks | ||
4533 | * before flushing journal. otherwise delalloc blocks can not | ||
4534 | * be allocated any more. even more truncate on delalloc blocks | ||
4535 | * could trigger BUG by flushing delalloc blocks in journal. | ||
4536 | * There is no delalloc block in non-journal data mode. | ||
4537 | */ | ||
4538 | if (val && test_opt(inode->i_sb, DELALLOC)) { | ||
4539 | err = ext4_alloc_da_blocks(inode); | ||
4540 | if (err < 0) | ||
4541 | return err; | ||
4542 | } | ||
4650 | 4543 | ||
4651 | jbd2_journal_lock_updates(journal); | 4544 | jbd2_journal_lock_updates(journal); |
4652 | jbd2_journal_flush(journal); | ||
4653 | 4545 | ||
4654 | /* | 4546 | /* |
4655 | * OK, there are no updates running now, and all cached data is | 4547 | * OK, there are no updates running now, and all cached data is |
@@ -4661,8 +4553,10 @@ int ext4_change_inode_journal_flag(struct inode *inode, int val) | |||
4661 | 4553 | ||
4662 | if (val) | 4554 | if (val) |
4663 | ext4_set_inode_flag(inode, EXT4_INODE_JOURNAL_DATA); | 4555 | ext4_set_inode_flag(inode, EXT4_INODE_JOURNAL_DATA); |
4664 | else | 4556 | else { |
4557 | jbd2_journal_flush(journal); | ||
4665 | ext4_clear_inode_flag(inode, EXT4_INODE_JOURNAL_DATA); | 4558 | ext4_clear_inode_flag(inode, EXT4_INODE_JOURNAL_DATA); |
4559 | } | ||
4666 | ext4_set_aops(inode); | 4560 | ext4_set_aops(inode); |
4667 | 4561 | ||
4668 | jbd2_journal_unlock_updates(journal); | 4562 | jbd2_journal_unlock_updates(journal); |
diff --git a/fs/ext4/ioctl.c b/fs/ext4/ioctl.c index d37b3bb2a3b8..6eee25591b81 100644 --- a/fs/ext4/ioctl.c +++ b/fs/ext4/ioctl.c | |||
@@ -18,6 +18,8 @@ | |||
18 | #include "ext4_jbd2.h" | 18 | #include "ext4_jbd2.h" |
19 | #include "ext4.h" | 19 | #include "ext4.h" |
20 | 20 | ||
21 | #define MAX_32_NUM ((((unsigned long long) 1) << 32) - 1) | ||
22 | |||
21 | long ext4_ioctl(struct file *filp, unsigned int cmd, unsigned long arg) | 23 | long ext4_ioctl(struct file *filp, unsigned int cmd, unsigned long arg) |
22 | { | 24 | { |
23 | struct inode *inode = filp->f_dentry->d_inode; | 25 | struct inode *inode = filp->f_dentry->d_inode; |
@@ -158,10 +160,11 @@ flags_out: | |||
158 | goto setversion_out; | 160 | goto setversion_out; |
159 | } | 161 | } |
160 | 162 | ||
163 | mutex_lock(&inode->i_mutex); | ||
161 | handle = ext4_journal_start(inode, 1); | 164 | handle = ext4_journal_start(inode, 1); |
162 | if (IS_ERR(handle)) { | 165 | if (IS_ERR(handle)) { |
163 | err = PTR_ERR(handle); | 166 | err = PTR_ERR(handle); |
164 | goto setversion_out; | 167 | goto unlock_out; |
165 | } | 168 | } |
166 | err = ext4_reserve_inode_write(handle, inode, &iloc); | 169 | err = ext4_reserve_inode_write(handle, inode, &iloc); |
167 | if (err == 0) { | 170 | if (err == 0) { |
@@ -170,6 +173,9 @@ flags_out: | |||
170 | err = ext4_mark_iloc_dirty(handle, inode, &iloc); | 173 | err = ext4_mark_iloc_dirty(handle, inode, &iloc); |
171 | } | 174 | } |
172 | ext4_journal_stop(handle); | 175 | ext4_journal_stop(handle); |
176 | |||
177 | unlock_out: | ||
178 | mutex_unlock(&inode->i_mutex); | ||
173 | setversion_out: | 179 | setversion_out: |
174 | mnt_drop_write_file(filp); | 180 | mnt_drop_write_file(filp); |
175 | return err; | 181 | return err; |
@@ -182,19 +188,22 @@ setversion_out: | |||
182 | if (err) | 188 | if (err) |
183 | return err; | 189 | return err; |
184 | 190 | ||
185 | if (get_user(n_blocks_count, (__u32 __user *)arg)) | 191 | if (get_user(n_blocks_count, (__u32 __user *)arg)) { |
186 | return -EFAULT; | 192 | err = -EFAULT; |
193 | goto group_extend_out; | ||
194 | } | ||
187 | 195 | ||
188 | if (EXT4_HAS_RO_COMPAT_FEATURE(sb, | 196 | if (EXT4_HAS_RO_COMPAT_FEATURE(sb, |
189 | EXT4_FEATURE_RO_COMPAT_BIGALLOC)) { | 197 | EXT4_FEATURE_RO_COMPAT_BIGALLOC)) { |
190 | ext4_msg(sb, KERN_ERR, | 198 | ext4_msg(sb, KERN_ERR, |
191 | "Online resizing not supported with bigalloc"); | 199 | "Online resizing not supported with bigalloc"); |
192 | return -EOPNOTSUPP; | 200 | err = -EOPNOTSUPP; |
201 | goto group_extend_out; | ||
193 | } | 202 | } |
194 | 203 | ||
195 | err = mnt_want_write_file(filp); | 204 | err = mnt_want_write_file(filp); |
196 | if (err) | 205 | if (err) |
197 | return err; | 206 | goto group_extend_out; |
198 | 207 | ||
199 | err = ext4_group_extend(sb, EXT4_SB(sb)->s_es, n_blocks_count); | 208 | err = ext4_group_extend(sb, EXT4_SB(sb)->s_es, n_blocks_count); |
200 | if (EXT4_SB(sb)->s_journal) { | 209 | if (EXT4_SB(sb)->s_journal) { |
@@ -205,8 +214,8 @@ setversion_out: | |||
205 | if (err == 0) | 214 | if (err == 0) |
206 | err = err2; | 215 | err = err2; |
207 | mnt_drop_write_file(filp); | 216 | mnt_drop_write_file(filp); |
217 | group_extend_out: | ||
208 | ext4_resize_end(sb); | 218 | ext4_resize_end(sb); |
209 | |||
210 | return err; | 219 | return err; |
211 | } | 220 | } |
212 | 221 | ||
@@ -247,8 +256,7 @@ setversion_out: | |||
247 | err = ext4_move_extents(filp, donor_filp, me.orig_start, | 256 | err = ext4_move_extents(filp, donor_filp, me.orig_start, |
248 | me.donor_start, me.len, &me.moved_len); | 257 | me.donor_start, me.len, &me.moved_len); |
249 | mnt_drop_write_file(filp); | 258 | mnt_drop_write_file(filp); |
250 | if (me.moved_len > 0) | 259 | mnt_drop_write(filp->f_path.mnt); |
251 | file_remove_suid(donor_filp); | ||
252 | 260 | ||
253 | if (copy_to_user((struct move_extent __user *)arg, | 261 | if (copy_to_user((struct move_extent __user *)arg, |
254 | &me, sizeof(me))) | 262 | &me, sizeof(me))) |
@@ -267,19 +275,22 @@ mext_out: | |||
267 | return err; | 275 | return err; |
268 | 276 | ||
269 | if (copy_from_user(&input, (struct ext4_new_group_input __user *)arg, | 277 | if (copy_from_user(&input, (struct ext4_new_group_input __user *)arg, |
270 | sizeof(input))) | 278 | sizeof(input))) { |
271 | return -EFAULT; | 279 | err = -EFAULT; |
280 | goto group_add_out; | ||
281 | } | ||
272 | 282 | ||
273 | if (EXT4_HAS_RO_COMPAT_FEATURE(sb, | 283 | if (EXT4_HAS_RO_COMPAT_FEATURE(sb, |
274 | EXT4_FEATURE_RO_COMPAT_BIGALLOC)) { | 284 | EXT4_FEATURE_RO_COMPAT_BIGALLOC)) { |
275 | ext4_msg(sb, KERN_ERR, | 285 | ext4_msg(sb, KERN_ERR, |
276 | "Online resizing not supported with bigalloc"); | 286 | "Online resizing not supported with bigalloc"); |
277 | return -EOPNOTSUPP; | 287 | err = -EOPNOTSUPP; |
288 | goto group_add_out; | ||
278 | } | 289 | } |
279 | 290 | ||
280 | err = mnt_want_write_file(filp); | 291 | err = mnt_want_write_file(filp); |
281 | if (err) | 292 | if (err) |
282 | return err; | 293 | goto group_add_out; |
283 | 294 | ||
284 | err = ext4_group_add(sb, &input); | 295 | err = ext4_group_add(sb, &input); |
285 | if (EXT4_SB(sb)->s_journal) { | 296 | if (EXT4_SB(sb)->s_journal) { |
@@ -290,8 +301,8 @@ mext_out: | |||
290 | if (err == 0) | 301 | if (err == 0) |
291 | err = err2; | 302 | err = err2; |
292 | mnt_drop_write_file(filp); | 303 | mnt_drop_write_file(filp); |
304 | group_add_out: | ||
293 | ext4_resize_end(sb); | 305 | ext4_resize_end(sb); |
294 | |||
295 | return err; | 306 | return err; |
296 | } | 307 | } |
297 | 308 | ||
@@ -331,6 +342,60 @@ mext_out: | |||
331 | return err; | 342 | return err; |
332 | } | 343 | } |
333 | 344 | ||
345 | case EXT4_IOC_RESIZE_FS: { | ||
346 | ext4_fsblk_t n_blocks_count; | ||
347 | struct super_block *sb = inode->i_sb; | ||
348 | int err = 0, err2 = 0; | ||
349 | |||
350 | if (EXT4_HAS_RO_COMPAT_FEATURE(sb, | ||
351 | EXT4_FEATURE_RO_COMPAT_BIGALLOC)) { | ||
352 | ext4_msg(sb, KERN_ERR, | ||
353 | "Online resizing not (yet) supported with bigalloc"); | ||
354 | return -EOPNOTSUPP; | ||
355 | } | ||
356 | |||
357 | if (EXT4_HAS_INCOMPAT_FEATURE(sb, | ||
358 | EXT4_FEATURE_INCOMPAT_META_BG)) { | ||
359 | ext4_msg(sb, KERN_ERR, | ||
360 | "Online resizing not (yet) supported with meta_bg"); | ||
361 | return -EOPNOTSUPP; | ||
362 | } | ||
363 | |||
364 | if (copy_from_user(&n_blocks_count, (__u64 __user *)arg, | ||
365 | sizeof(__u64))) { | ||
366 | return -EFAULT; | ||
367 | } | ||
368 | |||
369 | if (n_blocks_count > MAX_32_NUM && | ||
370 | !EXT4_HAS_INCOMPAT_FEATURE(sb, | ||
371 | EXT4_FEATURE_INCOMPAT_64BIT)) { | ||
372 | ext4_msg(sb, KERN_ERR, | ||
373 | "File system only supports 32-bit block numbers"); | ||
374 | return -EOPNOTSUPP; | ||
375 | } | ||
376 | |||
377 | err = ext4_resize_begin(sb); | ||
378 | if (err) | ||
379 | return err; | ||
380 | |||
381 | err = mnt_want_write(filp->f_path.mnt); | ||
382 | if (err) | ||
383 | goto resizefs_out; | ||
384 | |||
385 | err = ext4_resize_fs(sb, n_blocks_count); | ||
386 | if (EXT4_SB(sb)->s_journal) { | ||
387 | jbd2_journal_lock_updates(EXT4_SB(sb)->s_journal); | ||
388 | err2 = jbd2_journal_flush(EXT4_SB(sb)->s_journal); | ||
389 | jbd2_journal_unlock_updates(EXT4_SB(sb)->s_journal); | ||
390 | } | ||
391 | if (err == 0) | ||
392 | err = err2; | ||
393 | mnt_drop_write(filp->f_path.mnt); | ||
394 | resizefs_out: | ||
395 | ext4_resize_end(sb); | ||
396 | return err; | ||
397 | } | ||
398 | |||
334 | case FITRIM: | 399 | case FITRIM: |
335 | { | 400 | { |
336 | struct request_queue *q = bdev_get_queue(sb->s_bdev); | 401 | struct request_queue *q = bdev_get_queue(sb->s_bdev); |
@@ -429,6 +494,7 @@ long ext4_compat_ioctl(struct file *file, unsigned int cmd, unsigned long arg) | |||
429 | } | 494 | } |
430 | case EXT4_IOC_MOVE_EXT: | 495 | case EXT4_IOC_MOVE_EXT: |
431 | case FITRIM: | 496 | case FITRIM: |
497 | case EXT4_IOC_RESIZE_FS: | ||
432 | break; | 498 | break; |
433 | default: | 499 | default: |
434 | return -ENOIOCTLCMD; | 500 | return -ENOIOCTLCMD; |
diff --git a/fs/ext4/mballoc.c b/fs/ext4/mballoc.c index e2d8be8f28bf..cb990b21c698 100644 --- a/fs/ext4/mballoc.c +++ b/fs/ext4/mballoc.c | |||
@@ -3671,7 +3671,7 @@ ext4_mb_release_group_pa(struct ext4_buddy *e4b, | |||
3671 | ext4_group_t group; | 3671 | ext4_group_t group; |
3672 | ext4_grpblk_t bit; | 3672 | ext4_grpblk_t bit; |
3673 | 3673 | ||
3674 | trace_ext4_mb_release_group_pa(pa); | 3674 | trace_ext4_mb_release_group_pa(sb, pa); |
3675 | BUG_ON(pa->pa_deleted == 0); | 3675 | BUG_ON(pa->pa_deleted == 0); |
3676 | ext4_get_group_no_and_offset(sb, pa->pa_pstart, &group, &bit); | 3676 | ext4_get_group_no_and_offset(sb, pa->pa_pstart, &group, &bit); |
3677 | BUG_ON(group != e4b->bd_group && pa->pa_len != 0); | 3677 | BUG_ON(group != e4b->bd_group && pa->pa_len != 0); |
diff --git a/fs/ext4/migrate.c b/fs/ext4/migrate.c index 16ac228dbec6..e7d6bb0acfa6 100644 --- a/fs/ext4/migrate.c +++ b/fs/ext4/migrate.c | |||
@@ -12,7 +12,6 @@ | |||
12 | * | 12 | * |
13 | */ | 13 | */ |
14 | 14 | ||
15 | #include <linux/module.h> | ||
16 | #include <linux/slab.h> | 15 | #include <linux/slab.h> |
17 | #include "ext4_jbd2.h" | 16 | #include "ext4_jbd2.h" |
18 | 17 | ||
diff --git a/fs/ext4/namei.c b/fs/ext4/namei.c index 86edc45b52a4..2043f482375d 100644 --- a/fs/ext4/namei.c +++ b/fs/ext4/namei.c | |||
@@ -2315,7 +2315,7 @@ retry: | |||
2315 | err = PTR_ERR(handle); | 2315 | err = PTR_ERR(handle); |
2316 | goto err_drop_inode; | 2316 | goto err_drop_inode; |
2317 | } | 2317 | } |
2318 | inc_nlink(inode); | 2318 | set_nlink(inode, 1); |
2319 | err = ext4_orphan_del(handle, inode); | 2319 | err = ext4_orphan_del(handle, inode); |
2320 | if (err) { | 2320 | if (err) { |
2321 | ext4_journal_stop(handle); | 2321 | ext4_journal_stop(handle); |
diff --git a/fs/ext4/page-io.c b/fs/ext4/page-io.c index 7e106c810c62..475851896518 100644 --- a/fs/ext4/page-io.c +++ b/fs/ext4/page-io.c | |||
@@ -6,7 +6,6 @@ | |||
6 | * Written by Theodore Ts'o, 2010. | 6 | * Written by Theodore Ts'o, 2010. |
7 | */ | 7 | */ |
8 | 8 | ||
9 | #include <linux/module.h> | ||
10 | #include <linux/fs.h> | 9 | #include <linux/fs.h> |
11 | #include <linux/time.h> | 10 | #include <linux/time.h> |
12 | #include <linux/jbd2.h> | 11 | #include <linux/jbd2.h> |
diff --git a/fs/ext4/resize.c b/fs/ext4/resize.c index 996780ab4f4e..f9d948f0eb86 100644 --- a/fs/ext4/resize.c +++ b/fs/ext4/resize.c | |||
@@ -134,6 +134,172 @@ static int verify_group_input(struct super_block *sb, | |||
134 | return err; | 134 | return err; |
135 | } | 135 | } |
136 | 136 | ||
137 | /* | ||
138 | * ext4_new_flex_group_data is used by 64bit-resize interface to add a flex | ||
139 | * group each time. | ||
140 | */ | ||
141 | struct ext4_new_flex_group_data { | ||
142 | struct ext4_new_group_data *groups; /* new_group_data for groups | ||
143 | in the flex group */ | ||
144 | __u16 *bg_flags; /* block group flags of groups | ||
145 | in @groups */ | ||
146 | ext4_group_t count; /* number of groups in @groups | ||
147 | */ | ||
148 | }; | ||
149 | |||
150 | /* | ||
151 | * alloc_flex_gd() allocates a ext4_new_flex_group_data with size of | ||
152 | * @flexbg_size. | ||
153 | * | ||
154 | * Returns NULL on failure otherwise address of the allocated structure. | ||
155 | */ | ||
156 | static struct ext4_new_flex_group_data *alloc_flex_gd(unsigned long flexbg_size) | ||
157 | { | ||
158 | struct ext4_new_flex_group_data *flex_gd; | ||
159 | |||
160 | flex_gd = kmalloc(sizeof(*flex_gd), GFP_NOFS); | ||
161 | if (flex_gd == NULL) | ||
162 | goto out3; | ||
163 | |||
164 | flex_gd->count = flexbg_size; | ||
165 | |||
166 | flex_gd->groups = kmalloc(sizeof(struct ext4_new_group_data) * | ||
167 | flexbg_size, GFP_NOFS); | ||
168 | if (flex_gd->groups == NULL) | ||
169 | goto out2; | ||
170 | |||
171 | flex_gd->bg_flags = kmalloc(flexbg_size * sizeof(__u16), GFP_NOFS); | ||
172 | if (flex_gd->bg_flags == NULL) | ||
173 | goto out1; | ||
174 | |||
175 | return flex_gd; | ||
176 | |||
177 | out1: | ||
178 | kfree(flex_gd->groups); | ||
179 | out2: | ||
180 | kfree(flex_gd); | ||
181 | out3: | ||
182 | return NULL; | ||
183 | } | ||
184 | |||
185 | static void free_flex_gd(struct ext4_new_flex_group_data *flex_gd) | ||
186 | { | ||
187 | kfree(flex_gd->bg_flags); | ||
188 | kfree(flex_gd->groups); | ||
189 | kfree(flex_gd); | ||
190 | } | ||
191 | |||
192 | /* | ||
193 | * ext4_alloc_group_tables() allocates block bitmaps, inode bitmaps | ||
194 | * and inode tables for a flex group. | ||
195 | * | ||
196 | * This function is used by 64bit-resize. Note that this function allocates | ||
197 | * group tables from the 1st group of groups contained by @flexgd, which may | ||
198 | * be a partial of a flex group. | ||
199 | * | ||
200 | * @sb: super block of fs to which the groups belongs | ||
201 | */ | ||
202 | static void ext4_alloc_group_tables(struct super_block *sb, | ||
203 | struct ext4_new_flex_group_data *flex_gd, | ||
204 | int flexbg_size) | ||
205 | { | ||
206 | struct ext4_new_group_data *group_data = flex_gd->groups; | ||
207 | struct ext4_super_block *es = EXT4_SB(sb)->s_es; | ||
208 | ext4_fsblk_t start_blk; | ||
209 | ext4_fsblk_t last_blk; | ||
210 | ext4_group_t src_group; | ||
211 | ext4_group_t bb_index = 0; | ||
212 | ext4_group_t ib_index = 0; | ||
213 | ext4_group_t it_index = 0; | ||
214 | ext4_group_t group; | ||
215 | ext4_group_t last_group; | ||
216 | unsigned overhead; | ||
217 | |||
218 | BUG_ON(flex_gd->count == 0 || group_data == NULL); | ||
219 | |||
220 | src_group = group_data[0].group; | ||
221 | last_group = src_group + flex_gd->count - 1; | ||
222 | |||
223 | BUG_ON((flexbg_size > 1) && ((src_group & ~(flexbg_size - 1)) != | ||
224 | (last_group & ~(flexbg_size - 1)))); | ||
225 | next_group: | ||
226 | group = group_data[0].group; | ||
227 | start_blk = ext4_group_first_block_no(sb, src_group); | ||
228 | last_blk = start_blk + group_data[src_group - group].blocks_count; | ||
229 | |||
230 | overhead = ext4_bg_has_super(sb, src_group) ? | ||
231 | (1 + ext4_bg_num_gdb(sb, src_group) + | ||
232 | le16_to_cpu(es->s_reserved_gdt_blocks)) : 0; | ||
233 | |||
234 | start_blk += overhead; | ||
235 | |||
236 | BUG_ON(src_group >= group_data[0].group + flex_gd->count); | ||
237 | /* We collect contiguous blocks as much as possible. */ | ||
238 | src_group++; | ||
239 | for (; src_group <= last_group; src_group++) | ||
240 | if (!ext4_bg_has_super(sb, src_group)) | ||
241 | last_blk += group_data[src_group - group].blocks_count; | ||
242 | else | ||
243 | break; | ||
244 | |||
245 | /* Allocate block bitmaps */ | ||
246 | for (; bb_index < flex_gd->count; bb_index++) { | ||
247 | if (start_blk >= last_blk) | ||
248 | goto next_group; | ||
249 | group_data[bb_index].block_bitmap = start_blk++; | ||
250 | ext4_get_group_no_and_offset(sb, start_blk - 1, &group, NULL); | ||
251 | group -= group_data[0].group; | ||
252 | group_data[group].free_blocks_count--; | ||
253 | if (flexbg_size > 1) | ||
254 | flex_gd->bg_flags[group] &= ~EXT4_BG_BLOCK_UNINIT; | ||
255 | } | ||
256 | |||
257 | /* Allocate inode bitmaps */ | ||
258 | for (; ib_index < flex_gd->count; ib_index++) { | ||
259 | if (start_blk >= last_blk) | ||
260 | goto next_group; | ||
261 | group_data[ib_index].inode_bitmap = start_blk++; | ||
262 | ext4_get_group_no_and_offset(sb, start_blk - 1, &group, NULL); | ||
263 | group -= group_data[0].group; | ||
264 | group_data[group].free_blocks_count--; | ||
265 | if (flexbg_size > 1) | ||
266 | flex_gd->bg_flags[group] &= ~EXT4_BG_BLOCK_UNINIT; | ||
267 | } | ||
268 | |||
269 | /* Allocate inode tables */ | ||
270 | for (; it_index < flex_gd->count; it_index++) { | ||
271 | if (start_blk + EXT4_SB(sb)->s_itb_per_group > last_blk) | ||
272 | goto next_group; | ||
273 | group_data[it_index].inode_table = start_blk; | ||
274 | ext4_get_group_no_and_offset(sb, start_blk, &group, NULL); | ||
275 | group -= group_data[0].group; | ||
276 | group_data[group].free_blocks_count -= | ||
277 | EXT4_SB(sb)->s_itb_per_group; | ||
278 | if (flexbg_size > 1) | ||
279 | flex_gd->bg_flags[group] &= ~EXT4_BG_BLOCK_UNINIT; | ||
280 | |||
281 | start_blk += EXT4_SB(sb)->s_itb_per_group; | ||
282 | } | ||
283 | |||
284 | if (test_opt(sb, DEBUG)) { | ||
285 | int i; | ||
286 | group = group_data[0].group; | ||
287 | |||
288 | printk(KERN_DEBUG "EXT4-fs: adding a flex group with " | ||
289 | "%d groups, flexbg size is %d:\n", flex_gd->count, | ||
290 | flexbg_size); | ||
291 | |||
292 | for (i = 0; i < flex_gd->count; i++) { | ||
293 | printk(KERN_DEBUG "adding %s group %u: %u " | ||
294 | "blocks (%d free)\n", | ||
295 | ext4_bg_has_super(sb, group + i) ? "normal" : | ||
296 | "no-super", group + i, | ||
297 | group_data[i].blocks_count, | ||
298 | group_data[i].free_blocks_count); | ||
299 | } | ||
300 | } | ||
301 | } | ||
302 | |||
137 | static struct buffer_head *bclean(handle_t *handle, struct super_block *sb, | 303 | static struct buffer_head *bclean(handle_t *handle, struct super_block *sb, |
138 | ext4_fsblk_t blk) | 304 | ext4_fsblk_t blk) |
139 | { | 305 | { |
@@ -179,131 +345,250 @@ static int extend_or_restart_transaction(handle_t *handle, int thresh) | |||
179 | } | 345 | } |
180 | 346 | ||
181 | /* | 347 | /* |
182 | * Set up the block and inode bitmaps, and the inode table for the new group. | 348 | * set_flexbg_block_bitmap() mark @count blocks starting from @block used. |
349 | * | ||
350 | * Helper function for ext4_setup_new_group_blocks() which set . | ||
351 | * | ||
352 | * @sb: super block | ||
353 | * @handle: journal handle | ||
354 | * @flex_gd: flex group data | ||
355 | */ | ||
356 | static int set_flexbg_block_bitmap(struct super_block *sb, handle_t *handle, | ||
357 | struct ext4_new_flex_group_data *flex_gd, | ||
358 | ext4_fsblk_t block, ext4_group_t count) | ||
359 | { | ||
360 | ext4_group_t count2; | ||
361 | |||
362 | ext4_debug("mark blocks [%llu/%u] used\n", block, count); | ||
363 | for (count2 = count; count > 0; count -= count2, block += count2) { | ||
364 | ext4_fsblk_t start; | ||
365 | struct buffer_head *bh; | ||
366 | ext4_group_t group; | ||
367 | int err; | ||
368 | |||
369 | ext4_get_group_no_and_offset(sb, block, &group, NULL); | ||
370 | start = ext4_group_first_block_no(sb, group); | ||
371 | group -= flex_gd->groups[0].group; | ||
372 | |||
373 | count2 = sb->s_blocksize * 8 - (block - start); | ||
374 | if (count2 > count) | ||
375 | count2 = count; | ||
376 | |||
377 | if (flex_gd->bg_flags[group] & EXT4_BG_BLOCK_UNINIT) { | ||
378 | BUG_ON(flex_gd->count > 1); | ||
379 | continue; | ||
380 | } | ||
381 | |||
382 | err = extend_or_restart_transaction(handle, 1); | ||
383 | if (err) | ||
384 | return err; | ||
385 | |||
386 | bh = sb_getblk(sb, flex_gd->groups[group].block_bitmap); | ||
387 | if (!bh) | ||
388 | return -EIO; | ||
389 | |||
390 | err = ext4_journal_get_write_access(handle, bh); | ||
391 | if (err) | ||
392 | return err; | ||
393 | ext4_debug("mark block bitmap %#04llx (+%llu/%u)\n", block, | ||
394 | block - start, count2); | ||
395 | ext4_set_bits(bh->b_data, block - start, count2); | ||
396 | |||
397 | err = ext4_handle_dirty_metadata(handle, NULL, bh); | ||
398 | if (unlikely(err)) | ||
399 | return err; | ||
400 | brelse(bh); | ||
401 | } | ||
402 | |||
403 | return 0; | ||
404 | } | ||
405 | |||
406 | /* | ||
407 | * Set up the block and inode bitmaps, and the inode table for the new groups. | ||
183 | * This doesn't need to be part of the main transaction, since we are only | 408 | * This doesn't need to be part of the main transaction, since we are only |
184 | * changing blocks outside the actual filesystem. We still do journaling to | 409 | * changing blocks outside the actual filesystem. We still do journaling to |
185 | * ensure the recovery is correct in case of a failure just after resize. | 410 | * ensure the recovery is correct in case of a failure just after resize. |
186 | * If any part of this fails, we simply abort the resize. | 411 | * If any part of this fails, we simply abort the resize. |
412 | * | ||
413 | * setup_new_flex_group_blocks handles a flex group as follow: | ||
414 | * 1. copy super block and GDT, and initialize group tables if necessary. | ||
415 | * In this step, we only set bits in blocks bitmaps for blocks taken by | ||
416 | * super block and GDT. | ||
417 | * 2. allocate group tables in block bitmaps, that is, set bits in block | ||
418 | * bitmap for blocks taken by group tables. | ||
187 | */ | 419 | */ |
188 | static int setup_new_group_blocks(struct super_block *sb, | 420 | static int setup_new_flex_group_blocks(struct super_block *sb, |
189 | struct ext4_new_group_data *input) | 421 | struct ext4_new_flex_group_data *flex_gd) |
190 | { | 422 | { |
423 | int group_table_count[] = {1, 1, EXT4_SB(sb)->s_itb_per_group}; | ||
424 | ext4_fsblk_t start; | ||
425 | ext4_fsblk_t block; | ||
191 | struct ext4_sb_info *sbi = EXT4_SB(sb); | 426 | struct ext4_sb_info *sbi = EXT4_SB(sb); |
192 | ext4_fsblk_t start = ext4_group_first_block_no(sb, input->group); | 427 | struct ext4_super_block *es = sbi->s_es; |
193 | int reserved_gdb = ext4_bg_has_super(sb, input->group) ? | 428 | struct ext4_new_group_data *group_data = flex_gd->groups; |
194 | le16_to_cpu(sbi->s_es->s_reserved_gdt_blocks) : 0; | 429 | __u16 *bg_flags = flex_gd->bg_flags; |
195 | unsigned long gdblocks = ext4_bg_num_gdb(sb, input->group); | ||
196 | struct buffer_head *bh; | ||
197 | handle_t *handle; | 430 | handle_t *handle; |
198 | ext4_fsblk_t block; | 431 | ext4_group_t group, count; |
199 | ext4_grpblk_t bit; | 432 | struct buffer_head *bh = NULL; |
200 | int i; | 433 | int reserved_gdb, i, j, err = 0, err2; |
201 | int err = 0, err2; | 434 | |
435 | BUG_ON(!flex_gd->count || !group_data || | ||
436 | group_data[0].group != sbi->s_groups_count); | ||
437 | |||
438 | reserved_gdb = le16_to_cpu(es->s_reserved_gdt_blocks); | ||
202 | 439 | ||
203 | /* This transaction may be extended/restarted along the way */ | 440 | /* This transaction may be extended/restarted along the way */ |
204 | handle = ext4_journal_start_sb(sb, EXT4_MAX_TRANS_DATA); | 441 | handle = ext4_journal_start_sb(sb, EXT4_MAX_TRANS_DATA); |
205 | |||
206 | if (IS_ERR(handle)) | 442 | if (IS_ERR(handle)) |
207 | return PTR_ERR(handle); | 443 | return PTR_ERR(handle); |
208 | 444 | ||
209 | BUG_ON(input->group != sbi->s_groups_count); | 445 | group = group_data[0].group; |
446 | for (i = 0; i < flex_gd->count; i++, group++) { | ||
447 | unsigned long gdblocks; | ||
210 | 448 | ||
211 | /* Copy all of the GDT blocks into the backup in this group */ | 449 | gdblocks = ext4_bg_num_gdb(sb, group); |
212 | for (i = 0, bit = 1, block = start + 1; | 450 | start = ext4_group_first_block_no(sb, group); |
213 | i < gdblocks; i++, block++, bit++) { | ||
214 | struct buffer_head *gdb; | ||
215 | 451 | ||
216 | ext4_debug("update backup group %#04llx (+%d)\n", block, bit); | 452 | /* Copy all of the GDT blocks into the backup in this group */ |
217 | err = extend_or_restart_transaction(handle, 1); | 453 | for (j = 0, block = start + 1; j < gdblocks; j++, block++) { |
218 | if (err) | 454 | struct buffer_head *gdb; |
219 | goto exit_journal; | ||
220 | 455 | ||
221 | gdb = sb_getblk(sb, block); | 456 | ext4_debug("update backup group %#04llx\n", block); |
222 | if (!gdb) { | 457 | err = extend_or_restart_transaction(handle, 1); |
223 | err = -EIO; | 458 | if (err) |
224 | goto exit_journal; | 459 | goto out; |
225 | } | 460 | |
226 | if ((err = ext4_journal_get_write_access(handle, gdb))) { | 461 | gdb = sb_getblk(sb, block); |
462 | if (!gdb) { | ||
463 | err = -EIO; | ||
464 | goto out; | ||
465 | } | ||
466 | |||
467 | err = ext4_journal_get_write_access(handle, gdb); | ||
468 | if (err) { | ||
469 | brelse(gdb); | ||
470 | goto out; | ||
471 | } | ||
472 | memcpy(gdb->b_data, sbi->s_group_desc[j]->b_data, | ||
473 | gdb->b_size); | ||
474 | set_buffer_uptodate(gdb); | ||
475 | |||
476 | err = ext4_handle_dirty_metadata(handle, NULL, gdb); | ||
477 | if (unlikely(err)) { | ||
478 | brelse(gdb); | ||
479 | goto out; | ||
480 | } | ||
227 | brelse(gdb); | 481 | brelse(gdb); |
228 | goto exit_journal; | ||
229 | } | 482 | } |
230 | memcpy(gdb->b_data, sbi->s_group_desc[i]->b_data, gdb->b_size); | 483 | |
231 | set_buffer_uptodate(gdb); | 484 | /* Zero out all of the reserved backup group descriptor |
232 | err = ext4_handle_dirty_metadata(handle, NULL, gdb); | 485 | * table blocks |
233 | if (unlikely(err)) { | 486 | */ |
234 | brelse(gdb); | 487 | if (ext4_bg_has_super(sb, group)) { |
235 | goto exit_journal; | 488 | err = sb_issue_zeroout(sb, gdblocks + start + 1, |
489 | reserved_gdb, GFP_NOFS); | ||
490 | if (err) | ||
491 | goto out; | ||
236 | } | 492 | } |
237 | brelse(gdb); | ||
238 | } | ||
239 | 493 | ||
240 | /* Zero out all of the reserved backup group descriptor table blocks */ | 494 | /* Initialize group tables of the grop @group */ |
241 | ext4_debug("clear inode table blocks %#04llx -> %#04lx\n", | 495 | if (!(bg_flags[i] & EXT4_BG_INODE_ZEROED)) |
242 | block, sbi->s_itb_per_group); | 496 | goto handle_bb; |
243 | err = sb_issue_zeroout(sb, gdblocks + start + 1, reserved_gdb, | ||
244 | GFP_NOFS); | ||
245 | if (err) | ||
246 | goto exit_journal; | ||
247 | 497 | ||
248 | err = extend_or_restart_transaction(handle, 2); | 498 | /* Zero out all of the inode table blocks */ |
249 | if (err) | 499 | block = group_data[i].inode_table; |
250 | goto exit_journal; | 500 | ext4_debug("clear inode table blocks %#04llx -> %#04lx\n", |
501 | block, sbi->s_itb_per_group); | ||
502 | err = sb_issue_zeroout(sb, block, sbi->s_itb_per_group, | ||
503 | GFP_NOFS); | ||
504 | if (err) | ||
505 | goto out; | ||
251 | 506 | ||
252 | bh = bclean(handle, sb, input->block_bitmap); | 507 | handle_bb: |
253 | if (IS_ERR(bh)) { | 508 | if (bg_flags[i] & EXT4_BG_BLOCK_UNINIT) |
254 | err = PTR_ERR(bh); | 509 | goto handle_ib; |
255 | goto exit_journal; | ||
256 | } | ||
257 | 510 | ||
258 | if (ext4_bg_has_super(sb, input->group)) { | 511 | /* Initialize block bitmap of the @group */ |
259 | ext4_debug("mark backup group tables %#04llx (+0)\n", start); | 512 | block = group_data[i].block_bitmap; |
260 | ext4_set_bits(bh->b_data, 0, gdblocks + reserved_gdb + 1); | 513 | err = extend_or_restart_transaction(handle, 1); |
261 | } | 514 | if (err) |
515 | goto out; | ||
262 | 516 | ||
263 | ext4_debug("mark block bitmap %#04llx (+%llu)\n", input->block_bitmap, | 517 | bh = bclean(handle, sb, block); |
264 | input->block_bitmap - start); | 518 | if (IS_ERR(bh)) { |
265 | ext4_set_bit(input->block_bitmap - start, bh->b_data); | 519 | err = PTR_ERR(bh); |
266 | ext4_debug("mark inode bitmap %#04llx (+%llu)\n", input->inode_bitmap, | 520 | goto out; |
267 | input->inode_bitmap - start); | 521 | } |
268 | ext4_set_bit(input->inode_bitmap - start, bh->b_data); | 522 | if (ext4_bg_has_super(sb, group)) { |
269 | 523 | ext4_debug("mark backup superblock %#04llx (+0)\n", | |
270 | /* Zero out all of the inode table blocks */ | 524 | start); |
271 | block = input->inode_table; | 525 | ext4_set_bits(bh->b_data, 0, gdblocks + reserved_gdb + |
272 | ext4_debug("clear inode table blocks %#04llx -> %#04lx\n", | 526 | 1); |
273 | block, sbi->s_itb_per_group); | 527 | } |
274 | err = sb_issue_zeroout(sb, block, sbi->s_itb_per_group, GFP_NOFS); | 528 | ext4_mark_bitmap_end(group_data[i].blocks_count, |
275 | if (err) | 529 | sb->s_blocksize * 8, bh->b_data); |
276 | goto exit_bh; | 530 | err = ext4_handle_dirty_metadata(handle, NULL, bh); |
277 | ext4_set_bits(bh->b_data, input->inode_table - start, | 531 | if (err) |
278 | sbi->s_itb_per_group); | 532 | goto out; |
533 | brelse(bh); | ||
279 | 534 | ||
535 | handle_ib: | ||
536 | if (bg_flags[i] & EXT4_BG_INODE_UNINIT) | ||
537 | continue; | ||
280 | 538 | ||
281 | ext4_mark_bitmap_end(input->blocks_count, sb->s_blocksize * 8, | 539 | /* Initialize inode bitmap of the @group */ |
282 | bh->b_data); | 540 | block = group_data[i].inode_bitmap; |
283 | err = ext4_handle_dirty_metadata(handle, NULL, bh); | 541 | err = extend_or_restart_transaction(handle, 1); |
284 | if (unlikely(err)) { | 542 | if (err) |
285 | ext4_std_error(sb, err); | 543 | goto out; |
286 | goto exit_bh; | 544 | /* Mark unused entries in inode bitmap used */ |
545 | bh = bclean(handle, sb, block); | ||
546 | if (IS_ERR(bh)) { | ||
547 | err = PTR_ERR(bh); | ||
548 | goto out; | ||
549 | } | ||
550 | |||
551 | ext4_mark_bitmap_end(EXT4_INODES_PER_GROUP(sb), | ||
552 | sb->s_blocksize * 8, bh->b_data); | ||
553 | err = ext4_handle_dirty_metadata(handle, NULL, bh); | ||
554 | if (err) | ||
555 | goto out; | ||
556 | brelse(bh); | ||
287 | } | 557 | } |
288 | brelse(bh); | 558 | bh = NULL; |
289 | /* Mark unused entries in inode bitmap used */ | 559 | |
290 | ext4_debug("clear inode bitmap %#04llx (+%llu)\n", | 560 | /* Mark group tables in block bitmap */ |
291 | input->inode_bitmap, input->inode_bitmap - start); | 561 | for (j = 0; j < GROUP_TABLE_COUNT; j++) { |
292 | if (IS_ERR(bh = bclean(handle, sb, input->inode_bitmap))) { | 562 | count = group_table_count[j]; |
293 | err = PTR_ERR(bh); | 563 | start = (&group_data[0].block_bitmap)[j]; |
294 | goto exit_journal; | 564 | block = start; |
565 | for (i = 1; i < flex_gd->count; i++) { | ||
566 | block += group_table_count[j]; | ||
567 | if (block == (&group_data[i].block_bitmap)[j]) { | ||
568 | count += group_table_count[j]; | ||
569 | continue; | ||
570 | } | ||
571 | err = set_flexbg_block_bitmap(sb, handle, | ||
572 | flex_gd, start, count); | ||
573 | if (err) | ||
574 | goto out; | ||
575 | count = group_table_count[j]; | ||
576 | start = group_data[i].block_bitmap; | ||
577 | block = start; | ||
578 | } | ||
579 | |||
580 | if (count) { | ||
581 | err = set_flexbg_block_bitmap(sb, handle, | ||
582 | flex_gd, start, count); | ||
583 | if (err) | ||
584 | goto out; | ||
585 | } | ||
295 | } | 586 | } |
296 | 587 | ||
297 | ext4_mark_bitmap_end(EXT4_INODES_PER_GROUP(sb), sb->s_blocksize * 8, | 588 | out: |
298 | bh->b_data); | ||
299 | err = ext4_handle_dirty_metadata(handle, NULL, bh); | ||
300 | if (unlikely(err)) | ||
301 | ext4_std_error(sb, err); | ||
302 | exit_bh: | ||
303 | brelse(bh); | 589 | brelse(bh); |
304 | 590 | err2 = ext4_journal_stop(handle); | |
305 | exit_journal: | 591 | if (err2 && !err) |
306 | if ((err2 = ext4_journal_stop(handle)) && !err) | ||
307 | err = err2; | 592 | err = err2; |
308 | 593 | ||
309 | return err; | 594 | return err; |
@@ -351,10 +636,10 @@ static unsigned ext4_list_backups(struct super_block *sb, unsigned *three, | |||
351 | * groups in current filesystem that have BACKUPS, or -ve error code. | 636 | * groups in current filesystem that have BACKUPS, or -ve error code. |
352 | */ | 637 | */ |
353 | static int verify_reserved_gdb(struct super_block *sb, | 638 | static int verify_reserved_gdb(struct super_block *sb, |
639 | ext4_group_t end, | ||
354 | struct buffer_head *primary) | 640 | struct buffer_head *primary) |
355 | { | 641 | { |
356 | const ext4_fsblk_t blk = primary->b_blocknr; | 642 | const ext4_fsblk_t blk = primary->b_blocknr; |
357 | const ext4_group_t end = EXT4_SB(sb)->s_groups_count; | ||
358 | unsigned three = 1; | 643 | unsigned three = 1; |
359 | unsigned five = 5; | 644 | unsigned five = 5; |
360 | unsigned seven = 7; | 645 | unsigned seven = 7; |
@@ -429,7 +714,7 @@ static int add_new_gdb(handle_t *handle, struct inode *inode, | |||
429 | if (!gdb_bh) | 714 | if (!gdb_bh) |
430 | return -EIO; | 715 | return -EIO; |
431 | 716 | ||
432 | gdbackups = verify_reserved_gdb(sb, gdb_bh); | 717 | gdbackups = verify_reserved_gdb(sb, group, gdb_bh); |
433 | if (gdbackups < 0) { | 718 | if (gdbackups < 0) { |
434 | err = gdbackups; | 719 | err = gdbackups; |
435 | goto exit_bh; | 720 | goto exit_bh; |
@@ -592,7 +877,8 @@ static int reserve_backup_gdb(handle_t *handle, struct inode *inode, | |||
592 | err = -EIO; | 877 | err = -EIO; |
593 | goto exit_bh; | 878 | goto exit_bh; |
594 | } | 879 | } |
595 | if ((gdbackups = verify_reserved_gdb(sb, primary[res])) < 0) { | 880 | gdbackups = verify_reserved_gdb(sb, group, primary[res]); |
881 | if (gdbackups < 0) { | ||
596 | brelse(primary[res]); | 882 | brelse(primary[res]); |
597 | err = gdbackups; | 883 | err = gdbackups; |
598 | goto exit_bh; | 884 | goto exit_bh; |
@@ -735,6 +1021,348 @@ exit_err: | |||
735 | } | 1021 | } |
736 | } | 1022 | } |
737 | 1023 | ||
1024 | /* | ||
1025 | * ext4_add_new_descs() adds @count group descriptor of groups | ||
1026 | * starting at @group | ||
1027 | * | ||
1028 | * @handle: journal handle | ||
1029 | * @sb: super block | ||
1030 | * @group: the group no. of the first group desc to be added | ||
1031 | * @resize_inode: the resize inode | ||
1032 | * @count: number of group descriptors to be added | ||
1033 | */ | ||
1034 | static int ext4_add_new_descs(handle_t *handle, struct super_block *sb, | ||
1035 | ext4_group_t group, struct inode *resize_inode, | ||
1036 | ext4_group_t count) | ||
1037 | { | ||
1038 | struct ext4_sb_info *sbi = EXT4_SB(sb); | ||
1039 | struct ext4_super_block *es = sbi->s_es; | ||
1040 | struct buffer_head *gdb_bh; | ||
1041 | int i, gdb_off, gdb_num, err = 0; | ||
1042 | |||
1043 | for (i = 0; i < count; i++, group++) { | ||
1044 | int reserved_gdb = ext4_bg_has_super(sb, group) ? | ||
1045 | le16_to_cpu(es->s_reserved_gdt_blocks) : 0; | ||
1046 | |||
1047 | gdb_off = group % EXT4_DESC_PER_BLOCK(sb); | ||
1048 | gdb_num = group / EXT4_DESC_PER_BLOCK(sb); | ||
1049 | |||
1050 | /* | ||
1051 | * We will only either add reserved group blocks to a backup group | ||
1052 | * or remove reserved blocks for the first group in a new group block. | ||
1053 | * Doing both would be mean more complex code, and sane people don't | ||
1054 | * use non-sparse filesystems anymore. This is already checked above. | ||
1055 | */ | ||
1056 | if (gdb_off) { | ||
1057 | gdb_bh = sbi->s_group_desc[gdb_num]; | ||
1058 | err = ext4_journal_get_write_access(handle, gdb_bh); | ||
1059 | |||
1060 | if (!err && reserved_gdb && ext4_bg_num_gdb(sb, group)) | ||
1061 | err = reserve_backup_gdb(handle, resize_inode, group); | ||
1062 | } else | ||
1063 | err = add_new_gdb(handle, resize_inode, group); | ||
1064 | if (err) | ||
1065 | break; | ||
1066 | } | ||
1067 | return err; | ||
1068 | } | ||
1069 | |||
1070 | /* | ||
1071 | * ext4_setup_new_descs() will set up the group descriptor descriptors of a flex bg | ||
1072 | */ | ||
1073 | static int ext4_setup_new_descs(handle_t *handle, struct super_block *sb, | ||
1074 | struct ext4_new_flex_group_data *flex_gd) | ||
1075 | { | ||
1076 | struct ext4_new_group_data *group_data = flex_gd->groups; | ||
1077 | struct ext4_group_desc *gdp; | ||
1078 | struct ext4_sb_info *sbi = EXT4_SB(sb); | ||
1079 | struct buffer_head *gdb_bh; | ||
1080 | ext4_group_t group; | ||
1081 | __u16 *bg_flags = flex_gd->bg_flags; | ||
1082 | int i, gdb_off, gdb_num, err = 0; | ||
1083 | |||
1084 | |||
1085 | for (i = 0; i < flex_gd->count; i++, group_data++, bg_flags++) { | ||
1086 | group = group_data->group; | ||
1087 | |||
1088 | gdb_off = group % EXT4_DESC_PER_BLOCK(sb); | ||
1089 | gdb_num = group / EXT4_DESC_PER_BLOCK(sb); | ||
1090 | |||
1091 | /* | ||
1092 | * get_write_access() has been called on gdb_bh by ext4_add_new_desc(). | ||
1093 | */ | ||
1094 | gdb_bh = sbi->s_group_desc[gdb_num]; | ||
1095 | /* Update group descriptor block for new group */ | ||
1096 | gdp = (struct ext4_group_desc *)((char *)gdb_bh->b_data + | ||
1097 | gdb_off * EXT4_DESC_SIZE(sb)); | ||
1098 | |||
1099 | memset(gdp, 0, EXT4_DESC_SIZE(sb)); | ||
1100 | ext4_block_bitmap_set(sb, gdp, group_data->block_bitmap); | ||
1101 | ext4_inode_bitmap_set(sb, gdp, group_data->inode_bitmap); | ||
1102 | ext4_inode_table_set(sb, gdp, group_data->inode_table); | ||
1103 | ext4_free_group_clusters_set(sb, gdp, | ||
1104 | EXT4_B2C(sbi, group_data->free_blocks_count)); | ||
1105 | ext4_free_inodes_set(sb, gdp, EXT4_INODES_PER_GROUP(sb)); | ||
1106 | gdp->bg_flags = cpu_to_le16(*bg_flags); | ||
1107 | gdp->bg_checksum = ext4_group_desc_csum(sbi, group, gdp); | ||
1108 | |||
1109 | err = ext4_handle_dirty_metadata(handle, NULL, gdb_bh); | ||
1110 | if (unlikely(err)) { | ||
1111 | ext4_std_error(sb, err); | ||
1112 | break; | ||
1113 | } | ||
1114 | |||
1115 | /* | ||
1116 | * We can allocate memory for mb_alloc based on the new group | ||
1117 | * descriptor | ||
1118 | */ | ||
1119 | err = ext4_mb_add_groupinfo(sb, group, gdp); | ||
1120 | if (err) | ||
1121 | break; | ||
1122 | } | ||
1123 | return err; | ||
1124 | } | ||
1125 | |||
1126 | /* | ||
1127 | * ext4_update_super() updates the super block so that the newly added | ||
1128 | * groups can be seen by the filesystem. | ||
1129 | * | ||
1130 | * @sb: super block | ||
1131 | * @flex_gd: new added groups | ||
1132 | */ | ||
1133 | static void ext4_update_super(struct super_block *sb, | ||
1134 | struct ext4_new_flex_group_data *flex_gd) | ||
1135 | { | ||
1136 | ext4_fsblk_t blocks_count = 0; | ||
1137 | ext4_fsblk_t free_blocks = 0; | ||
1138 | ext4_fsblk_t reserved_blocks = 0; | ||
1139 | struct ext4_new_group_data *group_data = flex_gd->groups; | ||
1140 | struct ext4_sb_info *sbi = EXT4_SB(sb); | ||
1141 | struct ext4_super_block *es = sbi->s_es; | ||
1142 | int i; | ||
1143 | |||
1144 | BUG_ON(flex_gd->count == 0 || group_data == NULL); | ||
1145 | /* | ||
1146 | * Make the new blocks and inodes valid next. We do this before | ||
1147 | * increasing the group count so that once the group is enabled, | ||
1148 | * all of its blocks and inodes are already valid. | ||
1149 | * | ||
1150 | * We always allocate group-by-group, then block-by-block or | ||
1151 | * inode-by-inode within a group, so enabling these | ||
1152 | * blocks/inodes before the group is live won't actually let us | ||
1153 | * allocate the new space yet. | ||
1154 | */ | ||
1155 | for (i = 0; i < flex_gd->count; i++) { | ||
1156 | blocks_count += group_data[i].blocks_count; | ||
1157 | free_blocks += group_data[i].free_blocks_count; | ||
1158 | } | ||
1159 | |||
1160 | reserved_blocks = ext4_r_blocks_count(es) * 100; | ||
1161 | do_div(reserved_blocks, ext4_blocks_count(es)); | ||
1162 | reserved_blocks *= blocks_count; | ||
1163 | do_div(reserved_blocks, 100); | ||
1164 | |||
1165 | ext4_blocks_count_set(es, ext4_blocks_count(es) + blocks_count); | ||
1166 | le32_add_cpu(&es->s_inodes_count, EXT4_INODES_PER_GROUP(sb) * | ||
1167 | flex_gd->count); | ||
1168 | |||
1169 | /* | ||
1170 | * We need to protect s_groups_count against other CPUs seeing | ||
1171 | * inconsistent state in the superblock. | ||
1172 | * | ||
1173 | * The precise rules we use are: | ||
1174 | * | ||
1175 | * * Writers must perform a smp_wmb() after updating all | ||
1176 | * dependent data and before modifying the groups count | ||
1177 | * | ||
1178 | * * Readers must perform an smp_rmb() after reading the groups | ||
1179 | * count and before reading any dependent data. | ||
1180 | * | ||
1181 | * NB. These rules can be relaxed when checking the group count | ||
1182 | * while freeing data, as we can only allocate from a block | ||
1183 | * group after serialising against the group count, and we can | ||
1184 | * only then free after serialising in turn against that | ||
1185 | * allocation. | ||
1186 | */ | ||
1187 | smp_wmb(); | ||
1188 | |||
1189 | /* Update the global fs size fields */ | ||
1190 | sbi->s_groups_count += flex_gd->count; | ||
1191 | |||
1192 | /* Update the reserved block counts only once the new group is | ||
1193 | * active. */ | ||
1194 | ext4_r_blocks_count_set(es, ext4_r_blocks_count(es) + | ||
1195 | reserved_blocks); | ||
1196 | |||
1197 | /* Update the free space counts */ | ||
1198 | percpu_counter_add(&sbi->s_freeclusters_counter, | ||
1199 | EXT4_B2C(sbi, free_blocks)); | ||
1200 | percpu_counter_add(&sbi->s_freeinodes_counter, | ||
1201 | EXT4_INODES_PER_GROUP(sb) * flex_gd->count); | ||
1202 | |||
1203 | if (EXT4_HAS_INCOMPAT_FEATURE(sb, | ||
1204 | EXT4_FEATURE_INCOMPAT_FLEX_BG) && | ||
1205 | sbi->s_log_groups_per_flex) { | ||
1206 | ext4_group_t flex_group; | ||
1207 | flex_group = ext4_flex_group(sbi, group_data[0].group); | ||
1208 | atomic_add(EXT4_B2C(sbi, free_blocks), | ||
1209 | &sbi->s_flex_groups[flex_group].free_clusters); | ||
1210 | atomic_add(EXT4_INODES_PER_GROUP(sb) * flex_gd->count, | ||
1211 | &sbi->s_flex_groups[flex_group].free_inodes); | ||
1212 | } | ||
1213 | |||
1214 | if (test_opt(sb, DEBUG)) | ||
1215 | printk(KERN_DEBUG "EXT4-fs: added group %u:" | ||
1216 | "%llu blocks(%llu free %llu reserved)\n", flex_gd->count, | ||
1217 | blocks_count, free_blocks, reserved_blocks); | ||
1218 | } | ||
1219 | |||
1220 | /* Add a flex group to an fs. Ensure we handle all possible error conditions | ||
1221 | * _before_ we start modifying the filesystem, because we cannot abort the | ||
1222 | * transaction and not have it write the data to disk. | ||
1223 | */ | ||
1224 | static int ext4_flex_group_add(struct super_block *sb, | ||
1225 | struct inode *resize_inode, | ||
1226 | struct ext4_new_flex_group_data *flex_gd) | ||
1227 | { | ||
1228 | struct ext4_sb_info *sbi = EXT4_SB(sb); | ||
1229 | struct ext4_super_block *es = sbi->s_es; | ||
1230 | ext4_fsblk_t o_blocks_count; | ||
1231 | ext4_grpblk_t last; | ||
1232 | ext4_group_t group; | ||
1233 | handle_t *handle; | ||
1234 | unsigned reserved_gdb; | ||
1235 | int err = 0, err2 = 0, credit; | ||
1236 | |||
1237 | BUG_ON(!flex_gd->count || !flex_gd->groups || !flex_gd->bg_flags); | ||
1238 | |||
1239 | reserved_gdb = le16_to_cpu(es->s_reserved_gdt_blocks); | ||
1240 | o_blocks_count = ext4_blocks_count(es); | ||
1241 | ext4_get_group_no_and_offset(sb, o_blocks_count, &group, &last); | ||
1242 | BUG_ON(last); | ||
1243 | |||
1244 | err = setup_new_flex_group_blocks(sb, flex_gd); | ||
1245 | if (err) | ||
1246 | goto exit; | ||
1247 | /* | ||
1248 | * We will always be modifying at least the superblock and GDT | ||
1249 | * block. If we are adding a group past the last current GDT block, | ||
1250 | * we will also modify the inode and the dindirect block. If we | ||
1251 | * are adding a group with superblock/GDT backups we will also | ||
1252 | * modify each of the reserved GDT dindirect blocks. | ||
1253 | */ | ||
1254 | credit = flex_gd->count * 4 + reserved_gdb; | ||
1255 | handle = ext4_journal_start_sb(sb, credit); | ||
1256 | if (IS_ERR(handle)) { | ||
1257 | err = PTR_ERR(handle); | ||
1258 | goto exit; | ||
1259 | } | ||
1260 | |||
1261 | err = ext4_journal_get_write_access(handle, sbi->s_sbh); | ||
1262 | if (err) | ||
1263 | goto exit_journal; | ||
1264 | |||
1265 | group = flex_gd->groups[0].group; | ||
1266 | BUG_ON(group != EXT4_SB(sb)->s_groups_count); | ||
1267 | err = ext4_add_new_descs(handle, sb, group, | ||
1268 | resize_inode, flex_gd->count); | ||
1269 | if (err) | ||
1270 | goto exit_journal; | ||
1271 | |||
1272 | err = ext4_setup_new_descs(handle, sb, flex_gd); | ||
1273 | if (err) | ||
1274 | goto exit_journal; | ||
1275 | |||
1276 | ext4_update_super(sb, flex_gd); | ||
1277 | |||
1278 | err = ext4_handle_dirty_super(handle, sb); | ||
1279 | |||
1280 | exit_journal: | ||
1281 | err2 = ext4_journal_stop(handle); | ||
1282 | if (!err) | ||
1283 | err = err2; | ||
1284 | |||
1285 | if (!err) { | ||
1286 | int i; | ||
1287 | update_backups(sb, sbi->s_sbh->b_blocknr, (char *)es, | ||
1288 | sizeof(struct ext4_super_block)); | ||
1289 | for (i = 0; i < flex_gd->count; i++, group++) { | ||
1290 | struct buffer_head *gdb_bh; | ||
1291 | int gdb_num; | ||
1292 | gdb_num = group / EXT4_BLOCKS_PER_GROUP(sb); | ||
1293 | gdb_bh = sbi->s_group_desc[gdb_num]; | ||
1294 | update_backups(sb, gdb_bh->b_blocknr, gdb_bh->b_data, | ||
1295 | gdb_bh->b_size); | ||
1296 | } | ||
1297 | } | ||
1298 | exit: | ||
1299 | return err; | ||
1300 | } | ||
1301 | |||
1302 | static int ext4_setup_next_flex_gd(struct super_block *sb, | ||
1303 | struct ext4_new_flex_group_data *flex_gd, | ||
1304 | ext4_fsblk_t n_blocks_count, | ||
1305 | unsigned long flexbg_size) | ||
1306 | { | ||
1307 | struct ext4_super_block *es = EXT4_SB(sb)->s_es; | ||
1308 | struct ext4_new_group_data *group_data = flex_gd->groups; | ||
1309 | ext4_fsblk_t o_blocks_count; | ||
1310 | ext4_group_t n_group; | ||
1311 | ext4_group_t group; | ||
1312 | ext4_group_t last_group; | ||
1313 | ext4_grpblk_t last; | ||
1314 | ext4_grpblk_t blocks_per_group; | ||
1315 | unsigned long i; | ||
1316 | |||
1317 | blocks_per_group = EXT4_BLOCKS_PER_GROUP(sb); | ||
1318 | |||
1319 | o_blocks_count = ext4_blocks_count(es); | ||
1320 | |||
1321 | if (o_blocks_count == n_blocks_count) | ||
1322 | return 0; | ||
1323 | |||
1324 | ext4_get_group_no_and_offset(sb, o_blocks_count, &group, &last); | ||
1325 | BUG_ON(last); | ||
1326 | ext4_get_group_no_and_offset(sb, n_blocks_count - 1, &n_group, &last); | ||
1327 | |||
1328 | last_group = group | (flexbg_size - 1); | ||
1329 | if (last_group > n_group) | ||
1330 | last_group = n_group; | ||
1331 | |||
1332 | flex_gd->count = last_group - group + 1; | ||
1333 | |||
1334 | for (i = 0; i < flex_gd->count; i++) { | ||
1335 | int overhead; | ||
1336 | |||
1337 | group_data[i].group = group + i; | ||
1338 | group_data[i].blocks_count = blocks_per_group; | ||
1339 | overhead = ext4_bg_has_super(sb, group + i) ? | ||
1340 | (1 + ext4_bg_num_gdb(sb, group + i) + | ||
1341 | le16_to_cpu(es->s_reserved_gdt_blocks)) : 0; | ||
1342 | group_data[i].free_blocks_count = blocks_per_group - overhead; | ||
1343 | if (EXT4_HAS_RO_COMPAT_FEATURE(sb, | ||
1344 | EXT4_FEATURE_RO_COMPAT_GDT_CSUM)) | ||
1345 | flex_gd->bg_flags[i] = EXT4_BG_BLOCK_UNINIT | | ||
1346 | EXT4_BG_INODE_UNINIT; | ||
1347 | else | ||
1348 | flex_gd->bg_flags[i] = EXT4_BG_INODE_ZEROED; | ||
1349 | } | ||
1350 | |||
1351 | if (last_group == n_group && | ||
1352 | EXT4_HAS_RO_COMPAT_FEATURE(sb, | ||
1353 | EXT4_FEATURE_RO_COMPAT_GDT_CSUM)) | ||
1354 | /* We need to initialize block bitmap of last group. */ | ||
1355 | flex_gd->bg_flags[i - 1] &= ~EXT4_BG_BLOCK_UNINIT; | ||
1356 | |||
1357 | if ((last_group == n_group) && (last != blocks_per_group - 1)) { | ||
1358 | group_data[i - 1].blocks_count = last + 1; | ||
1359 | group_data[i - 1].free_blocks_count -= blocks_per_group- | ||
1360 | last - 1; | ||
1361 | } | ||
1362 | |||
1363 | return 1; | ||
1364 | } | ||
1365 | |||
738 | /* Add group descriptor data to an existing or new group descriptor block. | 1366 | /* Add group descriptor data to an existing or new group descriptor block. |
739 | * Ensure we handle all possible error conditions _before_ we start modifying | 1367 | * Ensure we handle all possible error conditions _before_ we start modifying |
740 | * the filesystem, because we cannot abort the transaction and not have it | 1368 | * the filesystem, because we cannot abort the transaction and not have it |
@@ -750,16 +1378,15 @@ exit_err: | |||
750 | */ | 1378 | */ |
751 | int ext4_group_add(struct super_block *sb, struct ext4_new_group_data *input) | 1379 | int ext4_group_add(struct super_block *sb, struct ext4_new_group_data *input) |
752 | { | 1380 | { |
1381 | struct ext4_new_flex_group_data flex_gd; | ||
753 | struct ext4_sb_info *sbi = EXT4_SB(sb); | 1382 | struct ext4_sb_info *sbi = EXT4_SB(sb); |
754 | struct ext4_super_block *es = sbi->s_es; | 1383 | struct ext4_super_block *es = sbi->s_es; |
755 | int reserved_gdb = ext4_bg_has_super(sb, input->group) ? | 1384 | int reserved_gdb = ext4_bg_has_super(sb, input->group) ? |
756 | le16_to_cpu(es->s_reserved_gdt_blocks) : 0; | 1385 | le16_to_cpu(es->s_reserved_gdt_blocks) : 0; |
757 | struct buffer_head *primary = NULL; | ||
758 | struct ext4_group_desc *gdp; | ||
759 | struct inode *inode = NULL; | 1386 | struct inode *inode = NULL; |
760 | handle_t *handle; | ||
761 | int gdb_off, gdb_num; | 1387 | int gdb_off, gdb_num; |
762 | int err, err2; | 1388 | int err; |
1389 | __u16 bg_flags = 0; | ||
763 | 1390 | ||
764 | gdb_num = input->group / EXT4_DESC_PER_BLOCK(sb); | 1391 | gdb_num = input->group / EXT4_DESC_PER_BLOCK(sb); |
765 | gdb_off = input->group % EXT4_DESC_PER_BLOCK(sb); | 1392 | gdb_off = input->group % EXT4_DESC_PER_BLOCK(sb); |
@@ -798,175 +1425,69 @@ int ext4_group_add(struct super_block *sb, struct ext4_new_group_data *input) | |||
798 | } | 1425 | } |
799 | 1426 | ||
800 | 1427 | ||
801 | if ((err = verify_group_input(sb, input))) | 1428 | err = verify_group_input(sb, input); |
802 | goto exit_put; | 1429 | if (err) |
1430 | goto out; | ||
803 | 1431 | ||
804 | if ((err = setup_new_group_blocks(sb, input))) | 1432 | flex_gd.count = 1; |
805 | goto exit_put; | 1433 | flex_gd.groups = input; |
1434 | flex_gd.bg_flags = &bg_flags; | ||
1435 | err = ext4_flex_group_add(sb, inode, &flex_gd); | ||
1436 | out: | ||
1437 | iput(inode); | ||
1438 | return err; | ||
1439 | } /* ext4_group_add */ | ||
806 | 1440 | ||
807 | /* | 1441 | /* |
808 | * We will always be modifying at least the superblock and a GDT | 1442 | * extend a group without checking assuming that checking has been done. |
809 | * block. If we are adding a group past the last current GDT block, | 1443 | */ |
810 | * we will also modify the inode and the dindirect block. If we | 1444 | static int ext4_group_extend_no_check(struct super_block *sb, |
811 | * are adding a group with superblock/GDT backups we will also | 1445 | ext4_fsblk_t o_blocks_count, ext4_grpblk_t add) |
812 | * modify each of the reserved GDT dindirect blocks. | 1446 | { |
1447 | struct ext4_super_block *es = EXT4_SB(sb)->s_es; | ||
1448 | handle_t *handle; | ||
1449 | int err = 0, err2; | ||
1450 | |||
1451 | /* We will update the superblock, one block bitmap, and | ||
1452 | * one group descriptor via ext4_group_add_blocks(). | ||
813 | */ | 1453 | */ |
814 | handle = ext4_journal_start_sb(sb, | 1454 | handle = ext4_journal_start_sb(sb, 3); |
815 | ext4_bg_has_super(sb, input->group) ? | ||
816 | 3 + reserved_gdb : 4); | ||
817 | if (IS_ERR(handle)) { | 1455 | if (IS_ERR(handle)) { |
818 | err = PTR_ERR(handle); | 1456 | err = PTR_ERR(handle); |
819 | goto exit_put; | 1457 | ext4_warning(sb, "error %d on journal start", err); |
1458 | return err; | ||
820 | } | 1459 | } |
821 | 1460 | ||
822 | if ((err = ext4_journal_get_write_access(handle, sbi->s_sbh))) | 1461 | err = ext4_journal_get_write_access(handle, EXT4_SB(sb)->s_sbh); |
823 | goto exit_journal; | 1462 | if (err) { |
824 | 1463 | ext4_warning(sb, "error %d on journal write access", err); | |
825 | /* | 1464 | goto errout; |
826 | * We will only either add reserved group blocks to a backup group | ||
827 | * or remove reserved blocks for the first group in a new group block. | ||
828 | * Doing both would be mean more complex code, and sane people don't | ||
829 | * use non-sparse filesystems anymore. This is already checked above. | ||
830 | */ | ||
831 | if (gdb_off) { | ||
832 | primary = sbi->s_group_desc[gdb_num]; | ||
833 | if ((err = ext4_journal_get_write_access(handle, primary))) | ||
834 | goto exit_journal; | ||
835 | |||
836 | if (reserved_gdb && ext4_bg_num_gdb(sb, input->group)) { | ||
837 | err = reserve_backup_gdb(handle, inode, input->group); | ||
838 | if (err) | ||
839 | goto exit_journal; | ||
840 | } | ||
841 | } else { | ||
842 | /* | ||
843 | * Note that we can access new group descriptor block safely | ||
844 | * only if add_new_gdb() succeeds. | ||
845 | */ | ||
846 | err = add_new_gdb(handle, inode, input->group); | ||
847 | if (err) | ||
848 | goto exit_journal; | ||
849 | primary = sbi->s_group_desc[gdb_num]; | ||
850 | } | 1465 | } |
851 | 1466 | ||
852 | /* | 1467 | ext4_blocks_count_set(es, o_blocks_count + add); |
853 | * OK, now we've set up the new group. Time to make it active. | 1468 | ext4_debug("freeing blocks %llu through %llu\n", o_blocks_count, |
854 | * | 1469 | o_blocks_count + add); |
855 | * so we have to be safe wrt. concurrent accesses the group | 1470 | /* We add the blocks to the bitmap and set the group need init bit */ |
856 | * data. So we need to be careful to set all of the relevant | 1471 | err = ext4_group_add_blocks(handle, sb, o_blocks_count, add); |
857 | * group descriptor data etc. *before* we enable the group. | ||
858 | * | ||
859 | * The key field here is sbi->s_groups_count: as long as | ||
860 | * that retains its old value, nobody is going to access the new | ||
861 | * group. | ||
862 | * | ||
863 | * So first we update all the descriptor metadata for the new | ||
864 | * group; then we update the total disk blocks count; then we | ||
865 | * update the groups count to enable the group; then finally we | ||
866 | * update the free space counts so that the system can start | ||
867 | * using the new disk blocks. | ||
868 | */ | ||
869 | |||
870 | /* Update group descriptor block for new group */ | ||
871 | gdp = (struct ext4_group_desc *)((char *)primary->b_data + | ||
872 | gdb_off * EXT4_DESC_SIZE(sb)); | ||
873 | |||
874 | memset(gdp, 0, EXT4_DESC_SIZE(sb)); | ||
875 | ext4_block_bitmap_set(sb, gdp, input->block_bitmap); /* LV FIXME */ | ||
876 | ext4_inode_bitmap_set(sb, gdp, input->inode_bitmap); /* LV FIXME */ | ||
877 | ext4_inode_table_set(sb, gdp, input->inode_table); /* LV FIXME */ | ||
878 | ext4_free_group_clusters_set(sb, gdp, input->free_blocks_count); | ||
879 | ext4_free_inodes_set(sb, gdp, EXT4_INODES_PER_GROUP(sb)); | ||
880 | gdp->bg_flags = cpu_to_le16(EXT4_BG_INODE_ZEROED); | ||
881 | gdp->bg_checksum = ext4_group_desc_csum(sbi, input->group, gdp); | ||
882 | |||
883 | /* | ||
884 | * We can allocate memory for mb_alloc based on the new group | ||
885 | * descriptor | ||
886 | */ | ||
887 | err = ext4_mb_add_groupinfo(sb, input->group, gdp); | ||
888 | if (err) | 1472 | if (err) |
889 | goto exit_journal; | 1473 | goto errout; |
890 | |||
891 | /* | ||
892 | * Make the new blocks and inodes valid next. We do this before | ||
893 | * increasing the group count so that once the group is enabled, | ||
894 | * all of its blocks and inodes are already valid. | ||
895 | * | ||
896 | * We always allocate group-by-group, then block-by-block or | ||
897 | * inode-by-inode within a group, so enabling these | ||
898 | * blocks/inodes before the group is live won't actually let us | ||
899 | * allocate the new space yet. | ||
900 | */ | ||
901 | ext4_blocks_count_set(es, ext4_blocks_count(es) + | ||
902 | input->blocks_count); | ||
903 | le32_add_cpu(&es->s_inodes_count, EXT4_INODES_PER_GROUP(sb)); | ||
904 | |||
905 | /* | ||
906 | * We need to protect s_groups_count against other CPUs seeing | ||
907 | * inconsistent state in the superblock. | ||
908 | * | ||
909 | * The precise rules we use are: | ||
910 | * | ||
911 | * * Writers must perform a smp_wmb() after updating all dependent | ||
912 | * data and before modifying the groups count | ||
913 | * | ||
914 | * * Readers must perform an smp_rmb() after reading the groups count | ||
915 | * and before reading any dependent data. | ||
916 | * | ||
917 | * NB. These rules can be relaxed when checking the group count | ||
918 | * while freeing data, as we can only allocate from a block | ||
919 | * group after serialising against the group count, and we can | ||
920 | * only then free after serialising in turn against that | ||
921 | * allocation. | ||
922 | */ | ||
923 | smp_wmb(); | ||
924 | |||
925 | /* Update the global fs size fields */ | ||
926 | sbi->s_groups_count++; | ||
927 | |||
928 | err = ext4_handle_dirty_metadata(handle, NULL, primary); | ||
929 | if (unlikely(err)) { | ||
930 | ext4_std_error(sb, err); | ||
931 | goto exit_journal; | ||
932 | } | ||
933 | |||
934 | /* Update the reserved block counts only once the new group is | ||
935 | * active. */ | ||
936 | ext4_r_blocks_count_set(es, ext4_r_blocks_count(es) + | ||
937 | input->reserved_blocks); | ||
938 | |||
939 | /* Update the free space counts */ | ||
940 | percpu_counter_add(&sbi->s_freeclusters_counter, | ||
941 | EXT4_B2C(sbi, input->free_blocks_count)); | ||
942 | percpu_counter_add(&sbi->s_freeinodes_counter, | ||
943 | EXT4_INODES_PER_GROUP(sb)); | ||
944 | |||
945 | if (EXT4_HAS_INCOMPAT_FEATURE(sb, EXT4_FEATURE_INCOMPAT_FLEX_BG) && | ||
946 | sbi->s_log_groups_per_flex) { | ||
947 | ext4_group_t flex_group; | ||
948 | flex_group = ext4_flex_group(sbi, input->group); | ||
949 | atomic_add(EXT4_B2C(sbi, input->free_blocks_count), | ||
950 | &sbi->s_flex_groups[flex_group].free_clusters); | ||
951 | atomic_add(EXT4_INODES_PER_GROUP(sb), | ||
952 | &sbi->s_flex_groups[flex_group].free_inodes); | ||
953 | } | ||
954 | |||
955 | ext4_handle_dirty_super(handle, sb); | 1474 | ext4_handle_dirty_super(handle, sb); |
956 | 1475 | ext4_debug("freed blocks %llu through %llu\n", o_blocks_count, | |
957 | exit_journal: | 1476 | o_blocks_count + add); |
958 | if ((err2 = ext4_journal_stop(handle)) && !err) | 1477 | errout: |
1478 | err2 = ext4_journal_stop(handle); | ||
1479 | if (err2 && !err) | ||
959 | err = err2; | 1480 | err = err2; |
960 | if (!err && primary) { | 1481 | |
961 | update_backups(sb, sbi->s_sbh->b_blocknr, (char *)es, | 1482 | if (!err) { |
1483 | if (test_opt(sb, DEBUG)) | ||
1484 | printk(KERN_DEBUG "EXT4-fs: extended group to %llu " | ||
1485 | "blocks\n", ext4_blocks_count(es)); | ||
1486 | update_backups(sb, EXT4_SB(sb)->s_sbh->b_blocknr, (char *)es, | ||
962 | sizeof(struct ext4_super_block)); | 1487 | sizeof(struct ext4_super_block)); |
963 | update_backups(sb, primary->b_blocknr, primary->b_data, | ||
964 | primary->b_size); | ||
965 | } | 1488 | } |
966 | exit_put: | ||
967 | iput(inode); | ||
968 | return err; | 1489 | return err; |
969 | } /* ext4_group_add */ | 1490 | } |
970 | 1491 | ||
971 | /* | 1492 | /* |
972 | * Extend the filesystem to the new number of blocks specified. This entry | 1493 | * Extend the filesystem to the new number of blocks specified. This entry |
@@ -985,8 +1506,7 @@ int ext4_group_extend(struct super_block *sb, struct ext4_super_block *es, | |||
985 | ext4_grpblk_t last; | 1506 | ext4_grpblk_t last; |
986 | ext4_grpblk_t add; | 1507 | ext4_grpblk_t add; |
987 | struct buffer_head *bh; | 1508 | struct buffer_head *bh; |
988 | handle_t *handle; | 1509 | int err; |
989 | int err, err2; | ||
990 | ext4_group_t group; | 1510 | ext4_group_t group; |
991 | 1511 | ||
992 | o_blocks_count = ext4_blocks_count(es); | 1512 | o_blocks_count = ext4_blocks_count(es); |
@@ -1042,42 +1562,119 @@ int ext4_group_extend(struct super_block *sb, struct ext4_super_block *es, | |||
1042 | } | 1562 | } |
1043 | brelse(bh); | 1563 | brelse(bh); |
1044 | 1564 | ||
1045 | /* We will update the superblock, one block bitmap, and | 1565 | err = ext4_group_extend_no_check(sb, o_blocks_count, add); |
1046 | * one group descriptor via ext4_free_blocks(). | 1566 | return err; |
1047 | */ | 1567 | } /* ext4_group_extend */ |
1048 | handle = ext4_journal_start_sb(sb, 3); | 1568 | |
1049 | if (IS_ERR(handle)) { | 1569 | /* |
1050 | err = PTR_ERR(handle); | 1570 | * ext4_resize_fs() resizes a fs to new size specified by @n_blocks_count |
1051 | ext4_warning(sb, "error %d on journal start", err); | 1571 | * |
1052 | goto exit_put; | 1572 | * @sb: super block of the fs to be resized |
1573 | * @n_blocks_count: the number of blocks resides in the resized fs | ||
1574 | */ | ||
1575 | int ext4_resize_fs(struct super_block *sb, ext4_fsblk_t n_blocks_count) | ||
1576 | { | ||
1577 | struct ext4_new_flex_group_data *flex_gd = NULL; | ||
1578 | struct ext4_sb_info *sbi = EXT4_SB(sb); | ||
1579 | struct ext4_super_block *es = sbi->s_es; | ||
1580 | struct buffer_head *bh; | ||
1581 | struct inode *resize_inode; | ||
1582 | ext4_fsblk_t o_blocks_count; | ||
1583 | ext4_group_t o_group; | ||
1584 | ext4_group_t n_group; | ||
1585 | ext4_grpblk_t offset; | ||
1586 | unsigned long n_desc_blocks; | ||
1587 | unsigned long o_desc_blocks; | ||
1588 | unsigned long desc_blocks; | ||
1589 | int err = 0, flexbg_size = 1; | ||
1590 | |||
1591 | o_blocks_count = ext4_blocks_count(es); | ||
1592 | |||
1593 | if (test_opt(sb, DEBUG)) | ||
1594 | printk(KERN_DEBUG "EXT4-fs: resizing filesystem from %llu " | ||
1595 | "upto %llu blocks\n", o_blocks_count, n_blocks_count); | ||
1596 | |||
1597 | if (n_blocks_count < o_blocks_count) { | ||
1598 | /* On-line shrinking not supported */ | ||
1599 | ext4_warning(sb, "can't shrink FS - resize aborted"); | ||
1600 | return -EINVAL; | ||
1053 | } | 1601 | } |
1054 | 1602 | ||
1055 | if ((err = ext4_journal_get_write_access(handle, | 1603 | if (n_blocks_count == o_blocks_count) |
1056 | EXT4_SB(sb)->s_sbh))) { | 1604 | /* Nothing need to do */ |
1057 | ext4_warning(sb, "error %d on journal write access", err); | 1605 | return 0; |
1058 | ext4_journal_stop(handle); | 1606 | |
1059 | goto exit_put; | 1607 | ext4_get_group_no_and_offset(sb, n_blocks_count - 1, &n_group, &offset); |
1608 | ext4_get_group_no_and_offset(sb, o_blocks_count, &o_group, &offset); | ||
1609 | |||
1610 | n_desc_blocks = (n_group + EXT4_DESC_PER_BLOCK(sb)) / | ||
1611 | EXT4_DESC_PER_BLOCK(sb); | ||
1612 | o_desc_blocks = (sbi->s_groups_count + EXT4_DESC_PER_BLOCK(sb) - 1) / | ||
1613 | EXT4_DESC_PER_BLOCK(sb); | ||
1614 | desc_blocks = n_desc_blocks - o_desc_blocks; | ||
1615 | |||
1616 | if (desc_blocks && | ||
1617 | (!EXT4_HAS_COMPAT_FEATURE(sb, EXT4_FEATURE_COMPAT_RESIZE_INODE) || | ||
1618 | le16_to_cpu(es->s_reserved_gdt_blocks) < desc_blocks)) { | ||
1619 | ext4_warning(sb, "No reserved GDT blocks, can't resize"); | ||
1620 | return -EPERM; | ||
1060 | } | 1621 | } |
1061 | ext4_blocks_count_set(es, o_blocks_count + add); | ||
1062 | ext4_debug("freeing blocks %llu through %llu\n", o_blocks_count, | ||
1063 | o_blocks_count + add); | ||
1064 | /* We add the blocks to the bitmap and set the group need init bit */ | ||
1065 | err = ext4_group_add_blocks(handle, sb, o_blocks_count, add); | ||
1066 | ext4_handle_dirty_super(handle, sb); | ||
1067 | ext4_debug("freed blocks %llu through %llu\n", o_blocks_count, | ||
1068 | o_blocks_count + add); | ||
1069 | err2 = ext4_journal_stop(handle); | ||
1070 | if (!err && err2) | ||
1071 | err = err2; | ||
1072 | 1622 | ||
1073 | if (err) | 1623 | resize_inode = ext4_iget(sb, EXT4_RESIZE_INO); |
1074 | goto exit_put; | 1624 | if (IS_ERR(resize_inode)) { |
1625 | ext4_warning(sb, "Error opening resize inode"); | ||
1626 | return PTR_ERR(resize_inode); | ||
1627 | } | ||
1075 | 1628 | ||
1629 | /* See if the device is actually as big as what was requested */ | ||
1630 | bh = sb_bread(sb, n_blocks_count - 1); | ||
1631 | if (!bh) { | ||
1632 | ext4_warning(sb, "can't read last block, resize aborted"); | ||
1633 | return -ENOSPC; | ||
1634 | } | ||
1635 | brelse(bh); | ||
1636 | |||
1637 | if (offset != 0) { | ||
1638 | /* extend the last group */ | ||
1639 | ext4_grpblk_t add; | ||
1640 | add = EXT4_BLOCKS_PER_GROUP(sb) - offset; | ||
1641 | err = ext4_group_extend_no_check(sb, o_blocks_count, add); | ||
1642 | if (err) | ||
1643 | goto out; | ||
1644 | } | ||
1645 | |||
1646 | if (EXT4_HAS_INCOMPAT_FEATURE(sb, EXT4_FEATURE_INCOMPAT_FLEX_BG) && | ||
1647 | es->s_log_groups_per_flex) | ||
1648 | flexbg_size = 1 << es->s_log_groups_per_flex; | ||
1649 | |||
1650 | o_blocks_count = ext4_blocks_count(es); | ||
1651 | if (o_blocks_count == n_blocks_count) | ||
1652 | goto out; | ||
1653 | |||
1654 | flex_gd = alloc_flex_gd(flexbg_size); | ||
1655 | if (flex_gd == NULL) { | ||
1656 | err = -ENOMEM; | ||
1657 | goto out; | ||
1658 | } | ||
1659 | |||
1660 | /* Add flex groups. Note that a regular group is a | ||
1661 | * flex group with 1 group. | ||
1662 | */ | ||
1663 | while (ext4_setup_next_flex_gd(sb, flex_gd, n_blocks_count, | ||
1664 | flexbg_size)) { | ||
1665 | ext4_alloc_group_tables(sb, flex_gd, flexbg_size); | ||
1666 | err = ext4_flex_group_add(sb, resize_inode, flex_gd); | ||
1667 | if (unlikely(err)) | ||
1668 | break; | ||
1669 | } | ||
1670 | |||
1671 | out: | ||
1672 | if (flex_gd) | ||
1673 | free_flex_gd(flex_gd); | ||
1674 | |||
1675 | iput(resize_inode); | ||
1076 | if (test_opt(sb, DEBUG)) | 1676 | if (test_opt(sb, DEBUG)) |
1077 | printk(KERN_DEBUG "EXT4-fs: extended group to %llu blocks\n", | 1677 | printk(KERN_DEBUG "EXT4-fs: resized filesystem from %llu " |
1078 | ext4_blocks_count(es)); | 1678 | "upto %llu blocks\n", o_blocks_count, n_blocks_count); |
1079 | update_backups(sb, EXT4_SB(sb)->s_sbh->b_blocknr, (char *)es, | ||
1080 | sizeof(struct ext4_super_block)); | ||
1081 | exit_put: | ||
1082 | return err; | 1679 | return err; |
1083 | } /* ext4_group_extend */ | 1680 | } |
diff --git a/fs/ext4/super.c b/fs/ext4/super.c index 64e2529ae9bb..502c61fd7392 100644 --- a/fs/ext4/super.c +++ b/fs/ext4/super.c | |||
@@ -1095,7 +1095,7 @@ static int ext4_show_options(struct seq_file *seq, struct dentry *root) | |||
1095 | } | 1095 | } |
1096 | if (sbi->s_max_batch_time != EXT4_DEF_MAX_BATCH_TIME) { | 1096 | if (sbi->s_max_batch_time != EXT4_DEF_MAX_BATCH_TIME) { |
1097 | seq_printf(seq, ",max_batch_time=%u", | 1097 | seq_printf(seq, ",max_batch_time=%u", |
1098 | (unsigned) sbi->s_min_batch_time); | 1098 | (unsigned) sbi->s_max_batch_time); |
1099 | } | 1099 | } |
1100 | 1100 | ||
1101 | /* | 1101 | /* |
@@ -2005,17 +2005,16 @@ static int ext4_fill_flex_info(struct super_block *sb) | |||
2005 | struct ext4_group_desc *gdp = NULL; | 2005 | struct ext4_group_desc *gdp = NULL; |
2006 | ext4_group_t flex_group_count; | 2006 | ext4_group_t flex_group_count; |
2007 | ext4_group_t flex_group; | 2007 | ext4_group_t flex_group; |
2008 | int groups_per_flex = 0; | 2008 | unsigned int groups_per_flex = 0; |
2009 | size_t size; | 2009 | size_t size; |
2010 | int i; | 2010 | int i; |
2011 | 2011 | ||
2012 | sbi->s_log_groups_per_flex = sbi->s_es->s_log_groups_per_flex; | 2012 | sbi->s_log_groups_per_flex = sbi->s_es->s_log_groups_per_flex; |
2013 | groups_per_flex = 1 << sbi->s_log_groups_per_flex; | 2013 | if (sbi->s_log_groups_per_flex < 1 || sbi->s_log_groups_per_flex > 31) { |
2014 | |||
2015 | if (groups_per_flex < 2) { | ||
2016 | sbi->s_log_groups_per_flex = 0; | 2014 | sbi->s_log_groups_per_flex = 0; |
2017 | return 1; | 2015 | return 1; |
2018 | } | 2016 | } |
2017 | groups_per_flex = 1 << sbi->s_log_groups_per_flex; | ||
2019 | 2018 | ||
2020 | /* We allocate both existing and potentially added groups */ | 2019 | /* We allocate both existing and potentially added groups */ |
2021 | flex_group_count = ((sbi->s_groups_count + groups_per_flex - 1) + | 2020 | flex_group_count = ((sbi->s_groups_count + groups_per_flex - 1) + |
@@ -3506,7 +3505,7 @@ static int ext4_fill_super(struct super_block *sb, void *data, int silent) | |||
3506 | * of the filesystem. | 3505 | * of the filesystem. |
3507 | */ | 3506 | */ |
3508 | if (le32_to_cpu(es->s_first_data_block) >= ext4_blocks_count(es)) { | 3507 | if (le32_to_cpu(es->s_first_data_block) >= ext4_blocks_count(es)) { |
3509 | ext4_msg(sb, KERN_WARNING, "bad geometry: first data" | 3508 | ext4_msg(sb, KERN_WARNING, "bad geometry: first data " |
3510 | "block %u is beyond end of filesystem (%llu)", | 3509 | "block %u is beyond end of filesystem (%llu)", |
3511 | le32_to_cpu(es->s_first_data_block), | 3510 | le32_to_cpu(es->s_first_data_block), |
3512 | ext4_blocks_count(es)); | 3511 | ext4_blocks_count(es)); |
@@ -3733,10 +3732,12 @@ no_journal: | |||
3733 | } | 3732 | } |
3734 | if (!S_ISDIR(root->i_mode) || !root->i_blocks || !root->i_size) { | 3733 | if (!S_ISDIR(root->i_mode) || !root->i_blocks || !root->i_size) { |
3735 | ext4_msg(sb, KERN_ERR, "corrupt root inode, run e2fsck"); | 3734 | ext4_msg(sb, KERN_ERR, "corrupt root inode, run e2fsck"); |
3735 | iput(root); | ||
3736 | goto failed_mount4; | 3736 | goto failed_mount4; |
3737 | } | 3737 | } |
3738 | sb->s_root = d_alloc_root(root); | 3738 | sb->s_root = d_alloc_root(root); |
3739 | if (!sb->s_root) { | 3739 | if (!sb->s_root) { |
3740 | iput(root); | ||
3740 | ext4_msg(sb, KERN_ERR, "get root dentry failed"); | 3741 | ext4_msg(sb, KERN_ERR, "get root dentry failed"); |
3741 | ret = -ENOMEM; | 3742 | ret = -ENOMEM; |
3742 | goto failed_mount4; | 3743 | goto failed_mount4; |
@@ -3773,7 +3774,7 @@ no_journal: | |||
3773 | if (err) { | 3774 | if (err) { |
3774 | ext4_msg(sb, KERN_ERR, "failed to initialize system " | 3775 | ext4_msg(sb, KERN_ERR, "failed to initialize system " |
3775 | "zone (%d)", err); | 3776 | "zone (%d)", err); |
3776 | goto failed_mount4; | 3777 | goto failed_mount4a; |
3777 | } | 3778 | } |
3778 | 3779 | ||
3779 | ext4_ext_init(sb); | 3780 | ext4_ext_init(sb); |
@@ -3830,13 +3831,14 @@ cantfind_ext4: | |||
3830 | failed_mount7: | 3831 | failed_mount7: |
3831 | ext4_unregister_li_request(sb); | 3832 | ext4_unregister_li_request(sb); |
3832 | failed_mount6: | 3833 | failed_mount6: |
3833 | ext4_ext_release(sb); | ||
3834 | failed_mount5: | ||
3835 | ext4_mb_release(sb); | 3834 | ext4_mb_release(sb); |
3835 | failed_mount5: | ||
3836 | ext4_ext_release(sb); | ||
3836 | ext4_release_system_zone(sb); | 3837 | ext4_release_system_zone(sb); |
3837 | failed_mount4: | 3838 | failed_mount4a: |
3838 | iput(root); | 3839 | dput(sb->s_root); |
3839 | sb->s_root = NULL; | 3840 | sb->s_root = NULL; |
3841 | failed_mount4: | ||
3840 | ext4_msg(sb, KERN_ERR, "mount failed"); | 3842 | ext4_msg(sb, KERN_ERR, "mount failed"); |
3841 | destroy_workqueue(EXT4_SB(sb)->dio_unwritten_wq); | 3843 | destroy_workqueue(EXT4_SB(sb)->dio_unwritten_wq); |
3842 | failed_mount_wq: | 3844 | failed_mount_wq: |
diff --git a/fs/ext4/xattr_security.c b/fs/ext4/xattr_security.c index 34e4350dd4d9..d2a200624af5 100644 --- a/fs/ext4/xattr_security.c +++ b/fs/ext4/xattr_security.c | |||
@@ -3,7 +3,6 @@ | |||
3 | * Handler for storing security labels as extended attributes. | 3 | * Handler for storing security labels as extended attributes. |
4 | */ | 4 | */ |
5 | 5 | ||
6 | #include <linux/module.h> | ||
7 | #include <linux/string.h> | 6 | #include <linux/string.h> |
8 | #include <linux/fs.h> | 7 | #include <linux/fs.h> |
9 | #include <linux/security.h> | 8 | #include <linux/security.h> |
@@ -48,8 +47,9 @@ ext4_xattr_security_set(struct dentry *dentry, const char *name, | |||
48 | name, value, size, flags); | 47 | name, value, size, flags); |
49 | } | 48 | } |
50 | 49 | ||
51 | int ext4_initxattrs(struct inode *inode, const struct xattr *xattr_array, | 50 | static int |
52 | void *fs_info) | 51 | ext4_initxattrs(struct inode *inode, const struct xattr *xattr_array, |
52 | void *fs_info) | ||
53 | { | 53 | { |
54 | const struct xattr *xattr; | 54 | const struct xattr *xattr; |
55 | handle_t *handle = fs_info; | 55 | handle_t *handle = fs_info; |
diff --git a/fs/ext4/xattr_trusted.c b/fs/ext4/xattr_trusted.c index 37e6ebca2cc3..95f1f4ab59a4 100644 --- a/fs/ext4/xattr_trusted.c +++ b/fs/ext4/xattr_trusted.c | |||
@@ -5,7 +5,6 @@ | |||
5 | * Copyright (C) 2003 by Andreas Gruenbacher, <a.gruenbacher@computer.org> | 5 | * Copyright (C) 2003 by Andreas Gruenbacher, <a.gruenbacher@computer.org> |
6 | */ | 6 | */ |
7 | 7 | ||
8 | #include <linux/module.h> | ||
9 | #include <linux/string.h> | 8 | #include <linux/string.h> |
10 | #include <linux/capability.h> | 9 | #include <linux/capability.h> |
11 | #include <linux/fs.h> | 10 | #include <linux/fs.h> |
diff --git a/fs/ext4/xattr_user.c b/fs/ext4/xattr_user.c index 98c375352d0e..0edb7611ffbe 100644 --- a/fs/ext4/xattr_user.c +++ b/fs/ext4/xattr_user.c | |||
@@ -5,7 +5,6 @@ | |||
5 | * Copyright (C) 2001 by Andreas Gruenbacher, <a.gruenbacher@computer.org> | 5 | * Copyright (C) 2001 by Andreas Gruenbacher, <a.gruenbacher@computer.org> |
6 | */ | 6 | */ |
7 | 7 | ||
8 | #include <linux/module.h> | ||
9 | #include <linux/string.h> | 8 | #include <linux/string.h> |
10 | #include <linux/fs.h> | 9 | #include <linux/fs.h> |
11 | #include "ext4_jbd2.h" | 10 | #include "ext4_jbd2.h" |
diff --git a/fs/fat/namei_vfat.c b/fs/fat/namei_vfat.c index 3a444b4e2368..a81eb2367d39 100644 --- a/fs/fat/namei_vfat.c +++ b/fs/fat/namei_vfat.c | |||
@@ -512,7 +512,8 @@ xlate_to_uni(const unsigned char *name, int len, unsigned char *outname, | |||
512 | int charlen; | 512 | int charlen; |
513 | 513 | ||
514 | if (utf8) { | 514 | if (utf8) { |
515 | *outlen = utf8s_to_utf16s(name, len, (wchar_t *)outname); | 515 | *outlen = utf8s_to_utf16s(name, len, UTF16_HOST_ENDIAN, |
516 | (wchar_t *) outname, FAT_LFN_LEN + 2); | ||
516 | if (*outlen < 0) | 517 | if (*outlen < 0) |
517 | return *outlen; | 518 | return *outlen; |
518 | else if (*outlen > FAT_LFN_LEN) | 519 | else if (*outlen > FAT_LFN_LEN) |
diff --git a/fs/fs-writeback.c b/fs/fs-writeback.c index e2951506434d..f855916657ba 100644 --- a/fs/fs-writeback.c +++ b/fs/fs-writeback.c | |||
@@ -20,6 +20,7 @@ | |||
20 | #include <linux/sched.h> | 20 | #include <linux/sched.h> |
21 | #include <linux/fs.h> | 21 | #include <linux/fs.h> |
22 | #include <linux/mm.h> | 22 | #include <linux/mm.h> |
23 | #include <linux/pagemap.h> | ||
23 | #include <linux/kthread.h> | 24 | #include <linux/kthread.h> |
24 | #include <linux/freezer.h> | 25 | #include <linux/freezer.h> |
25 | #include <linux/writeback.h> | 26 | #include <linux/writeback.h> |
@@ -29,6 +30,11 @@ | |||
29 | #include "internal.h" | 30 | #include "internal.h" |
30 | 31 | ||
31 | /* | 32 | /* |
33 | * 4MB minimal write chunk size | ||
34 | */ | ||
35 | #define MIN_WRITEBACK_PAGES (4096UL >> (PAGE_CACHE_SHIFT - 10)) | ||
36 | |||
37 | /* | ||
32 | * Passed into wb_writeback(), essentially a subset of writeback_control | 38 | * Passed into wb_writeback(), essentially a subset of writeback_control |
33 | */ | 39 | */ |
34 | struct wb_writeback_work { | 40 | struct wb_writeback_work { |
@@ -742,11 +748,17 @@ static long wb_writeback(struct bdi_writeback *wb, | |||
742 | if (work->for_background && !over_bground_thresh(wb->bdi)) | 748 | if (work->for_background && !over_bground_thresh(wb->bdi)) |
743 | break; | 749 | break; |
744 | 750 | ||
751 | /* | ||
752 | * Kupdate and background works are special and we want to | ||
753 | * include all inodes that need writing. Livelock avoidance is | ||
754 | * handled by these works yielding to any other work so we are | ||
755 | * safe. | ||
756 | */ | ||
745 | if (work->for_kupdate) { | 757 | if (work->for_kupdate) { |
746 | oldest_jif = jiffies - | 758 | oldest_jif = jiffies - |
747 | msecs_to_jiffies(dirty_expire_interval * 10); | 759 | msecs_to_jiffies(dirty_expire_interval * 10); |
748 | work->older_than_this = &oldest_jif; | 760 | } else if (work->for_background) |
749 | } | 761 | oldest_jif = jiffies; |
750 | 762 | ||
751 | trace_writeback_start(wb->bdi, work); | 763 | trace_writeback_start(wb->bdi, work); |
752 | if (list_empty(&wb->b_io)) | 764 | if (list_empty(&wb->b_io)) |
diff --git a/fs/fuse/dev.c b/fs/fuse/dev.c index 2aaf3eaaf13d..5f3368ab0fa9 100644 --- a/fs/fuse/dev.c +++ b/fs/fuse/dev.c | |||
@@ -1378,7 +1378,59 @@ static int fuse_notify_inval_entry(struct fuse_conn *fc, unsigned int size, | |||
1378 | down_read(&fc->killsb); | 1378 | down_read(&fc->killsb); |
1379 | err = -ENOENT; | 1379 | err = -ENOENT; |
1380 | if (fc->sb) | 1380 | if (fc->sb) |
1381 | err = fuse_reverse_inval_entry(fc->sb, outarg.parent, &name); | 1381 | err = fuse_reverse_inval_entry(fc->sb, outarg.parent, 0, &name); |
1382 | up_read(&fc->killsb); | ||
1383 | kfree(buf); | ||
1384 | return err; | ||
1385 | |||
1386 | err: | ||
1387 | kfree(buf); | ||
1388 | fuse_copy_finish(cs); | ||
1389 | return err; | ||
1390 | } | ||
1391 | |||
1392 | static int fuse_notify_delete(struct fuse_conn *fc, unsigned int size, | ||
1393 | struct fuse_copy_state *cs) | ||
1394 | { | ||
1395 | struct fuse_notify_delete_out outarg; | ||
1396 | int err = -ENOMEM; | ||
1397 | char *buf; | ||
1398 | struct qstr name; | ||
1399 | |||
1400 | buf = kzalloc(FUSE_NAME_MAX + 1, GFP_KERNEL); | ||
1401 | if (!buf) | ||
1402 | goto err; | ||
1403 | |||
1404 | err = -EINVAL; | ||
1405 | if (size < sizeof(outarg)) | ||
1406 | goto err; | ||
1407 | |||
1408 | err = fuse_copy_one(cs, &outarg, sizeof(outarg)); | ||
1409 | if (err) | ||
1410 | goto err; | ||
1411 | |||
1412 | err = -ENAMETOOLONG; | ||
1413 | if (outarg.namelen > FUSE_NAME_MAX) | ||
1414 | goto err; | ||
1415 | |||
1416 | err = -EINVAL; | ||
1417 | if (size != sizeof(outarg) + outarg.namelen + 1) | ||
1418 | goto err; | ||
1419 | |||
1420 | name.name = buf; | ||
1421 | name.len = outarg.namelen; | ||
1422 | err = fuse_copy_one(cs, buf, outarg.namelen + 1); | ||
1423 | if (err) | ||
1424 | goto err; | ||
1425 | fuse_copy_finish(cs); | ||
1426 | buf[outarg.namelen] = 0; | ||
1427 | name.hash = full_name_hash(name.name, name.len); | ||
1428 | |||
1429 | down_read(&fc->killsb); | ||
1430 | err = -ENOENT; | ||
1431 | if (fc->sb) | ||
1432 | err = fuse_reverse_inval_entry(fc->sb, outarg.parent, | ||
1433 | outarg.child, &name); | ||
1382 | up_read(&fc->killsb); | 1434 | up_read(&fc->killsb); |
1383 | kfree(buf); | 1435 | kfree(buf); |
1384 | return err; | 1436 | return err; |
@@ -1597,6 +1649,9 @@ static int fuse_notify(struct fuse_conn *fc, enum fuse_notify_code code, | |||
1597 | case FUSE_NOTIFY_RETRIEVE: | 1649 | case FUSE_NOTIFY_RETRIEVE: |
1598 | return fuse_notify_retrieve(fc, size, cs); | 1650 | return fuse_notify_retrieve(fc, size, cs); |
1599 | 1651 | ||
1652 | case FUSE_NOTIFY_DELETE: | ||
1653 | return fuse_notify_delete(fc, size, cs); | ||
1654 | |||
1600 | default: | 1655 | default: |
1601 | fuse_copy_finish(cs); | 1656 | fuse_copy_finish(cs); |
1602 | return -EINVAL; | 1657 | return -EINVAL; |
diff --git a/fs/fuse/dir.c b/fs/fuse/dir.c index 5ddd6ea8f839..206632887bb4 100644 --- a/fs/fuse/dir.c +++ b/fs/fuse/dir.c | |||
@@ -868,7 +868,7 @@ int fuse_update_attributes(struct inode *inode, struct kstat *stat, | |||
868 | } | 868 | } |
869 | 869 | ||
870 | int fuse_reverse_inval_entry(struct super_block *sb, u64 parent_nodeid, | 870 | int fuse_reverse_inval_entry(struct super_block *sb, u64 parent_nodeid, |
871 | struct qstr *name) | 871 | u64 child_nodeid, struct qstr *name) |
872 | { | 872 | { |
873 | int err = -ENOTDIR; | 873 | int err = -ENOTDIR; |
874 | struct inode *parent; | 874 | struct inode *parent; |
@@ -895,8 +895,36 @@ int fuse_reverse_inval_entry(struct super_block *sb, u64 parent_nodeid, | |||
895 | 895 | ||
896 | fuse_invalidate_attr(parent); | 896 | fuse_invalidate_attr(parent); |
897 | fuse_invalidate_entry(entry); | 897 | fuse_invalidate_entry(entry); |
898 | |||
899 | if (child_nodeid != 0 && entry->d_inode) { | ||
900 | mutex_lock(&entry->d_inode->i_mutex); | ||
901 | if (get_node_id(entry->d_inode) != child_nodeid) { | ||
902 | err = -ENOENT; | ||
903 | goto badentry; | ||
904 | } | ||
905 | if (d_mountpoint(entry)) { | ||
906 | err = -EBUSY; | ||
907 | goto badentry; | ||
908 | } | ||
909 | if (S_ISDIR(entry->d_inode->i_mode)) { | ||
910 | shrink_dcache_parent(entry); | ||
911 | if (!simple_empty(entry)) { | ||
912 | err = -ENOTEMPTY; | ||
913 | goto badentry; | ||
914 | } | ||
915 | entry->d_inode->i_flags |= S_DEAD; | ||
916 | } | ||
917 | dont_mount(entry); | ||
918 | clear_nlink(entry->d_inode); | ||
919 | err = 0; | ||
920 | badentry: | ||
921 | mutex_unlock(&entry->d_inode->i_mutex); | ||
922 | if (!err) | ||
923 | d_delete(entry); | ||
924 | } else { | ||
925 | err = 0; | ||
926 | } | ||
898 | dput(entry); | 927 | dput(entry); |
899 | err = 0; | ||
900 | 928 | ||
901 | unlock: | 929 | unlock: |
902 | mutex_unlock(&parent->i_mutex); | 930 | mutex_unlock(&parent->i_mutex); |
@@ -1182,6 +1210,30 @@ static int fuse_dir_fsync(struct file *file, loff_t start, loff_t end, | |||
1182 | return fuse_fsync_common(file, start, end, datasync, 1); | 1210 | return fuse_fsync_common(file, start, end, datasync, 1); |
1183 | } | 1211 | } |
1184 | 1212 | ||
1213 | static long fuse_dir_ioctl(struct file *file, unsigned int cmd, | ||
1214 | unsigned long arg) | ||
1215 | { | ||
1216 | struct fuse_conn *fc = get_fuse_conn(file->f_mapping->host); | ||
1217 | |||
1218 | /* FUSE_IOCTL_DIR only supported for API version >= 7.18 */ | ||
1219 | if (fc->minor < 18) | ||
1220 | return -ENOTTY; | ||
1221 | |||
1222 | return fuse_ioctl_common(file, cmd, arg, FUSE_IOCTL_DIR); | ||
1223 | } | ||
1224 | |||
1225 | static long fuse_dir_compat_ioctl(struct file *file, unsigned int cmd, | ||
1226 | unsigned long arg) | ||
1227 | { | ||
1228 | struct fuse_conn *fc = get_fuse_conn(file->f_mapping->host); | ||
1229 | |||
1230 | if (fc->minor < 18) | ||
1231 | return -ENOTTY; | ||
1232 | |||
1233 | return fuse_ioctl_common(file, cmd, arg, | ||
1234 | FUSE_IOCTL_COMPAT | FUSE_IOCTL_DIR); | ||
1235 | } | ||
1236 | |||
1185 | static bool update_mtime(unsigned ivalid) | 1237 | static bool update_mtime(unsigned ivalid) |
1186 | { | 1238 | { |
1187 | /* Always update if mtime is explicitly set */ | 1239 | /* Always update if mtime is explicitly set */ |
@@ -1596,6 +1648,8 @@ static const struct file_operations fuse_dir_operations = { | |||
1596 | .open = fuse_dir_open, | 1648 | .open = fuse_dir_open, |
1597 | .release = fuse_dir_release, | 1649 | .release = fuse_dir_release, |
1598 | .fsync = fuse_dir_fsync, | 1650 | .fsync = fuse_dir_fsync, |
1651 | .unlocked_ioctl = fuse_dir_ioctl, | ||
1652 | .compat_ioctl = fuse_dir_compat_ioctl, | ||
1599 | }; | 1653 | }; |
1600 | 1654 | ||
1601 | static const struct inode_operations fuse_common_inode_operations = { | 1655 | static const struct inode_operations fuse_common_inode_operations = { |
diff --git a/fs/fuse/file.c b/fs/fuse/file.c index 0c84100acd44..4a199fd93fbd 100644 --- a/fs/fuse/file.c +++ b/fs/fuse/file.c | |||
@@ -1555,48 +1555,16 @@ static loff_t fuse_file_llseek(struct file *file, loff_t offset, int origin) | |||
1555 | loff_t retval; | 1555 | loff_t retval; |
1556 | struct inode *inode = file->f_path.dentry->d_inode; | 1556 | struct inode *inode = file->f_path.dentry->d_inode; |
1557 | 1557 | ||
1558 | mutex_lock(&inode->i_mutex); | 1558 | /* No i_mutex protection necessary for SEEK_CUR and SEEK_SET */ |
1559 | if (origin != SEEK_CUR && origin != SEEK_SET) { | 1559 | if (origin == SEEK_CUR || origin == SEEK_SET) |
1560 | retval = fuse_update_attributes(inode, NULL, file, NULL); | 1560 | return generic_file_llseek(file, offset, origin); |
1561 | if (retval) | ||
1562 | goto exit; | ||
1563 | } | ||
1564 | 1561 | ||
1565 | switch (origin) { | 1562 | mutex_lock(&inode->i_mutex); |
1566 | case SEEK_END: | 1563 | retval = fuse_update_attributes(inode, NULL, file, NULL); |
1567 | offset += i_size_read(inode); | 1564 | if (!retval) |
1568 | break; | 1565 | retval = generic_file_llseek(file, offset, origin); |
1569 | case SEEK_CUR: | ||
1570 | if (offset == 0) { | ||
1571 | retval = file->f_pos; | ||
1572 | goto exit; | ||
1573 | } | ||
1574 | offset += file->f_pos; | ||
1575 | break; | ||
1576 | case SEEK_DATA: | ||
1577 | if (offset >= i_size_read(inode)) { | ||
1578 | retval = -ENXIO; | ||
1579 | goto exit; | ||
1580 | } | ||
1581 | break; | ||
1582 | case SEEK_HOLE: | ||
1583 | if (offset >= i_size_read(inode)) { | ||
1584 | retval = -ENXIO; | ||
1585 | goto exit; | ||
1586 | } | ||
1587 | offset = i_size_read(inode); | ||
1588 | break; | ||
1589 | } | ||
1590 | retval = -EINVAL; | ||
1591 | if (offset >= 0 && offset <= inode->i_sb->s_maxbytes) { | ||
1592 | if (offset != file->f_pos) { | ||
1593 | file->f_pos = offset; | ||
1594 | file->f_version = 0; | ||
1595 | } | ||
1596 | retval = offset; | ||
1597 | } | ||
1598 | exit: | ||
1599 | mutex_unlock(&inode->i_mutex); | 1566 | mutex_unlock(&inode->i_mutex); |
1567 | |||
1600 | return retval; | 1568 | return retval; |
1601 | } | 1569 | } |
1602 | 1570 | ||
@@ -1808,7 +1776,7 @@ long fuse_do_ioctl(struct file *file, unsigned int cmd, unsigned long arg, | |||
1808 | BUILD_BUG_ON(sizeof(struct fuse_ioctl_iovec) * FUSE_IOCTL_MAX_IOV > PAGE_SIZE); | 1776 | BUILD_BUG_ON(sizeof(struct fuse_ioctl_iovec) * FUSE_IOCTL_MAX_IOV > PAGE_SIZE); |
1809 | 1777 | ||
1810 | err = -ENOMEM; | 1778 | err = -ENOMEM; |
1811 | pages = kzalloc(sizeof(pages[0]) * FUSE_MAX_PAGES_PER_REQ, GFP_KERNEL); | 1779 | pages = kcalloc(FUSE_MAX_PAGES_PER_REQ, sizeof(pages[0]), GFP_KERNEL); |
1812 | iov_page = (struct iovec *) __get_free_page(GFP_KERNEL); | 1780 | iov_page = (struct iovec *) __get_free_page(GFP_KERNEL); |
1813 | if (!pages || !iov_page) | 1781 | if (!pages || !iov_page) |
1814 | goto out; | 1782 | goto out; |
@@ -1958,8 +1926,8 @@ long fuse_do_ioctl(struct file *file, unsigned int cmd, unsigned long arg, | |||
1958 | } | 1926 | } |
1959 | EXPORT_SYMBOL_GPL(fuse_do_ioctl); | 1927 | EXPORT_SYMBOL_GPL(fuse_do_ioctl); |
1960 | 1928 | ||
1961 | static long fuse_file_ioctl_common(struct file *file, unsigned int cmd, | 1929 | long fuse_ioctl_common(struct file *file, unsigned int cmd, |
1962 | unsigned long arg, unsigned int flags) | 1930 | unsigned long arg, unsigned int flags) |
1963 | { | 1931 | { |
1964 | struct inode *inode = file->f_dentry->d_inode; | 1932 | struct inode *inode = file->f_dentry->d_inode; |
1965 | struct fuse_conn *fc = get_fuse_conn(inode); | 1933 | struct fuse_conn *fc = get_fuse_conn(inode); |
@@ -1976,13 +1944,13 @@ static long fuse_file_ioctl_common(struct file *file, unsigned int cmd, | |||
1976 | static long fuse_file_ioctl(struct file *file, unsigned int cmd, | 1944 | static long fuse_file_ioctl(struct file *file, unsigned int cmd, |
1977 | unsigned long arg) | 1945 | unsigned long arg) |
1978 | { | 1946 | { |
1979 | return fuse_file_ioctl_common(file, cmd, arg, 0); | 1947 | return fuse_ioctl_common(file, cmd, arg, 0); |
1980 | } | 1948 | } |
1981 | 1949 | ||
1982 | static long fuse_file_compat_ioctl(struct file *file, unsigned int cmd, | 1950 | static long fuse_file_compat_ioctl(struct file *file, unsigned int cmd, |
1983 | unsigned long arg) | 1951 | unsigned long arg) |
1984 | { | 1952 | { |
1985 | return fuse_file_ioctl_common(file, cmd, arg, FUSE_IOCTL_COMPAT); | 1953 | return fuse_ioctl_common(file, cmd, arg, FUSE_IOCTL_COMPAT); |
1986 | } | 1954 | } |
1987 | 1955 | ||
1988 | /* | 1956 | /* |
diff --git a/fs/fuse/fuse_i.h b/fs/fuse/fuse_i.h index 1964da0257d9..572cefc78012 100644 --- a/fs/fuse/fuse_i.h +++ b/fs/fuse/fuse_i.h | |||
@@ -755,9 +755,15 @@ int fuse_reverse_inval_inode(struct super_block *sb, u64 nodeid, | |||
755 | /** | 755 | /** |
756 | * File-system tells the kernel to invalidate parent attributes and | 756 | * File-system tells the kernel to invalidate parent attributes and |
757 | * the dentry matching parent/name. | 757 | * the dentry matching parent/name. |
758 | * | ||
759 | * If the child_nodeid is non-zero and: | ||
760 | * - matches the inode number for the dentry matching parent/name, | ||
761 | * - is not a mount point | ||
762 | * - is a file or oan empty directory | ||
763 | * then the dentry is unhashed (d_delete()). | ||
758 | */ | 764 | */ |
759 | int fuse_reverse_inval_entry(struct super_block *sb, u64 parent_nodeid, | 765 | int fuse_reverse_inval_entry(struct super_block *sb, u64 parent_nodeid, |
760 | struct qstr *name); | 766 | u64 child_nodeid, struct qstr *name); |
761 | 767 | ||
762 | int fuse_do_open(struct fuse_conn *fc, u64 nodeid, struct file *file, | 768 | int fuse_do_open(struct fuse_conn *fc, u64 nodeid, struct file *file, |
763 | bool isdir); | 769 | bool isdir); |
@@ -765,6 +771,8 @@ ssize_t fuse_direct_io(struct file *file, const char __user *buf, | |||
765 | size_t count, loff_t *ppos, int write); | 771 | size_t count, loff_t *ppos, int write); |
766 | long fuse_do_ioctl(struct file *file, unsigned int cmd, unsigned long arg, | 772 | long fuse_do_ioctl(struct file *file, unsigned int cmd, unsigned long arg, |
767 | unsigned int flags); | 773 | unsigned int flags); |
774 | long fuse_ioctl_common(struct file *file, unsigned int cmd, | ||
775 | unsigned long arg, unsigned int flags); | ||
768 | unsigned fuse_file_poll(struct file *file, poll_table *wait); | 776 | unsigned fuse_file_poll(struct file *file, poll_table *wait); |
769 | int fuse_dev_release(struct inode *inode, struct file *file); | 777 | int fuse_dev_release(struct inode *inode, struct file *file); |
770 | 778 | ||
diff --git a/fs/gfs2/glock.c b/fs/gfs2/glock.c index 88e8a23d0026..376816fcd040 100644 --- a/fs/gfs2/glock.c +++ b/fs/gfs2/glock.c | |||
@@ -1353,7 +1353,7 @@ void gfs2_glock_complete(struct gfs2_glock *gl, int ret) | |||
1353 | spin_lock(&gl->gl_spin); | 1353 | spin_lock(&gl->gl_spin); |
1354 | gl->gl_reply = ret; | 1354 | gl->gl_reply = ret; |
1355 | 1355 | ||
1356 | if (unlikely(test_bit(DFL_BLOCK_LOCKS, &ls->ls_flags))) { | 1356 | if (unlikely(test_bit(DFL_BLOCK_LOCKS, &ls->ls_recover_flags))) { |
1357 | if (gfs2_should_freeze(gl)) { | 1357 | if (gfs2_should_freeze(gl)) { |
1358 | set_bit(GLF_FROZEN, &gl->gl_flags); | 1358 | set_bit(GLF_FROZEN, &gl->gl_flags); |
1359 | spin_unlock(&gl->gl_spin); | 1359 | spin_unlock(&gl->gl_spin); |
diff --git a/fs/gfs2/glock.h b/fs/gfs2/glock.h index 2553b858a72e..307ac31df781 100644 --- a/fs/gfs2/glock.h +++ b/fs/gfs2/glock.h | |||
@@ -121,8 +121,11 @@ enum { | |||
121 | 121 | ||
122 | struct lm_lockops { | 122 | struct lm_lockops { |
123 | const char *lm_proto_name; | 123 | const char *lm_proto_name; |
124 | int (*lm_mount) (struct gfs2_sbd *sdp, const char *fsname); | 124 | int (*lm_mount) (struct gfs2_sbd *sdp, const char *table); |
125 | void (*lm_unmount) (struct gfs2_sbd *sdp); | 125 | void (*lm_first_done) (struct gfs2_sbd *sdp); |
126 | void (*lm_recovery_result) (struct gfs2_sbd *sdp, unsigned int jid, | ||
127 | unsigned int result); | ||
128 | void (*lm_unmount) (struct gfs2_sbd *sdp); | ||
126 | void (*lm_withdraw) (struct gfs2_sbd *sdp); | 129 | void (*lm_withdraw) (struct gfs2_sbd *sdp); |
127 | void (*lm_put_lock) (struct gfs2_glock *gl); | 130 | void (*lm_put_lock) (struct gfs2_glock *gl); |
128 | int (*lm_lock) (struct gfs2_glock *gl, unsigned int req_state, | 131 | int (*lm_lock) (struct gfs2_glock *gl, unsigned int req_state, |
diff --git a/fs/gfs2/incore.h b/fs/gfs2/incore.h index e1d3bb59945c..97742a7ea9cc 100644 --- a/fs/gfs2/incore.h +++ b/fs/gfs2/incore.h | |||
@@ -139,8 +139,45 @@ struct gfs2_bufdata { | |||
139 | #define GDLM_STRNAME_BYTES 25 | 139 | #define GDLM_STRNAME_BYTES 25 |
140 | #define GDLM_LVB_SIZE 32 | 140 | #define GDLM_LVB_SIZE 32 |
141 | 141 | ||
142 | /* | ||
143 | * ls_recover_flags: | ||
144 | * | ||
145 | * DFL_BLOCK_LOCKS: dlm is in recovery and will grant locks that had been | ||
146 | * held by failed nodes whose journals need recovery. Those locks should | ||
147 | * only be used for journal recovery until the journal recovery is done. | ||
148 | * This is set by the dlm recover_prep callback and cleared by the | ||
149 | * gfs2_control thread when journal recovery is complete. To avoid | ||
150 | * races between recover_prep setting and gfs2_control clearing, recover_spin | ||
151 | * is held while changing this bit and reading/writing recover_block | ||
152 | * and recover_start. | ||
153 | * | ||
154 | * DFL_NO_DLM_OPS: dlm lockspace ops/callbacks are not being used. | ||
155 | * | ||
156 | * DFL_FIRST_MOUNT: this node is the first to mount this fs and is doing | ||
157 | * recovery of all journals before allowing other nodes to mount the fs. | ||
158 | * This is cleared when FIRST_MOUNT_DONE is set. | ||
159 | * | ||
160 | * DFL_FIRST_MOUNT_DONE: this node was the first mounter, and has finished | ||
161 | * recovery of all journals, and now allows other nodes to mount the fs. | ||
162 | * | ||
163 | * DFL_MOUNT_DONE: gdlm_mount has completed successfully and cleared | ||
164 | * BLOCK_LOCKS for the first time. The gfs2_control thread should now | ||
165 | * control clearing BLOCK_LOCKS for further recoveries. | ||
166 | * | ||
167 | * DFL_UNMOUNT: gdlm_unmount sets to keep sdp off gfs2_control_wq. | ||
168 | * | ||
169 | * DFL_DLM_RECOVERY: set while dlm is in recovery, between recover_prep() | ||
170 | * and recover_done(), i.e. set while recover_block == recover_start. | ||
171 | */ | ||
172 | |||
142 | enum { | 173 | enum { |
143 | DFL_BLOCK_LOCKS = 0, | 174 | DFL_BLOCK_LOCKS = 0, |
175 | DFL_NO_DLM_OPS = 1, | ||
176 | DFL_FIRST_MOUNT = 2, | ||
177 | DFL_FIRST_MOUNT_DONE = 3, | ||
178 | DFL_MOUNT_DONE = 4, | ||
179 | DFL_UNMOUNT = 5, | ||
180 | DFL_DLM_RECOVERY = 6, | ||
144 | }; | 181 | }; |
145 | 182 | ||
146 | struct lm_lockname { | 183 | struct lm_lockname { |
@@ -392,6 +429,7 @@ struct gfs2_jdesc { | |||
392 | #define JDF_RECOVERY 1 | 429 | #define JDF_RECOVERY 1 |
393 | unsigned int jd_jid; | 430 | unsigned int jd_jid; |
394 | unsigned int jd_blocks; | 431 | unsigned int jd_blocks; |
432 | int jd_recover_error; | ||
395 | }; | 433 | }; |
396 | 434 | ||
397 | struct gfs2_statfs_change_host { | 435 | struct gfs2_statfs_change_host { |
@@ -461,6 +499,7 @@ enum { | |||
461 | SDF_NORECOVERY = 4, | 499 | SDF_NORECOVERY = 4, |
462 | SDF_DEMOTE = 5, | 500 | SDF_DEMOTE = 5, |
463 | SDF_NOJOURNALID = 6, | 501 | SDF_NOJOURNALID = 6, |
502 | SDF_RORECOVERY = 7, /* read only recovery */ | ||
464 | }; | 503 | }; |
465 | 504 | ||
466 | #define GFS2_FSNAME_LEN 256 | 505 | #define GFS2_FSNAME_LEN 256 |
@@ -499,14 +538,26 @@ struct gfs2_sb_host { | |||
499 | struct lm_lockstruct { | 538 | struct lm_lockstruct { |
500 | int ls_jid; | 539 | int ls_jid; |
501 | unsigned int ls_first; | 540 | unsigned int ls_first; |
502 | unsigned int ls_first_done; | ||
503 | unsigned int ls_nodir; | 541 | unsigned int ls_nodir; |
504 | const struct lm_lockops *ls_ops; | 542 | const struct lm_lockops *ls_ops; |
505 | unsigned long ls_flags; | ||
506 | dlm_lockspace_t *ls_dlm; | 543 | dlm_lockspace_t *ls_dlm; |
507 | 544 | ||
508 | int ls_recover_jid_done; | 545 | int ls_recover_jid_done; /* These two are deprecated, */ |
509 | int ls_recover_jid_status; | 546 | int ls_recover_jid_status; /* used previously by gfs_controld */ |
547 | |||
548 | struct dlm_lksb ls_mounted_lksb; /* mounted_lock */ | ||
549 | struct dlm_lksb ls_control_lksb; /* control_lock */ | ||
550 | char ls_control_lvb[GDLM_LVB_SIZE]; /* control_lock lvb */ | ||
551 | struct completion ls_sync_wait; /* {control,mounted}_{lock,unlock} */ | ||
552 | |||
553 | spinlock_t ls_recover_spin; /* protects following fields */ | ||
554 | unsigned long ls_recover_flags; /* DFL_ */ | ||
555 | uint32_t ls_recover_mount; /* gen in first recover_done cb */ | ||
556 | uint32_t ls_recover_start; /* gen in last recover_done cb */ | ||
557 | uint32_t ls_recover_block; /* copy recover_start in last recover_prep */ | ||
558 | uint32_t ls_recover_size; /* size of recover_submit, recover_result */ | ||
559 | uint32_t *ls_recover_submit; /* gen in last recover_slot cb per jid */ | ||
560 | uint32_t *ls_recover_result; /* result of last jid recovery */ | ||
510 | }; | 561 | }; |
511 | 562 | ||
512 | struct gfs2_sbd { | 563 | struct gfs2_sbd { |
@@ -544,6 +595,7 @@ struct gfs2_sbd { | |||
544 | wait_queue_head_t sd_glock_wait; | 595 | wait_queue_head_t sd_glock_wait; |
545 | atomic_t sd_glock_disposal; | 596 | atomic_t sd_glock_disposal; |
546 | struct completion sd_locking_init; | 597 | struct completion sd_locking_init; |
598 | struct delayed_work sd_control_work; | ||
547 | 599 | ||
548 | /* Inode Stuff */ | 600 | /* Inode Stuff */ |
549 | 601 | ||
diff --git a/fs/gfs2/inode.c b/fs/gfs2/inode.c index 017960cf1d7a..a7d611b93f0f 100644 --- a/fs/gfs2/inode.c +++ b/fs/gfs2/inode.c | |||
@@ -599,9 +599,7 @@ static int link_dinode(struct gfs2_inode *dip, const struct qstr *name, | |||
599 | error = gfs2_meta_inode_buffer(ip, &dibh); | 599 | error = gfs2_meta_inode_buffer(ip, &dibh); |
600 | if (error) | 600 | if (error) |
601 | goto fail_end_trans; | 601 | goto fail_end_trans; |
602 | inc_nlink(&ip->i_inode); | 602 | set_nlink(&ip->i_inode, S_ISDIR(ip->i_inode.i_mode) ? 2 : 1); |
603 | if (S_ISDIR(ip->i_inode.i_mode)) | ||
604 | inc_nlink(&ip->i_inode); | ||
605 | gfs2_trans_add_bh(ip->i_gl, dibh, 1); | 603 | gfs2_trans_add_bh(ip->i_gl, dibh, 1); |
606 | gfs2_dinode_out(ip, dibh->b_data); | 604 | gfs2_dinode_out(ip, dibh->b_data); |
607 | brelse(dibh); | 605 | brelse(dibh); |
diff --git a/fs/gfs2/lock_dlm.c b/fs/gfs2/lock_dlm.c index 98c80d8c2a62..8944d1e32ab5 100644 --- a/fs/gfs2/lock_dlm.c +++ b/fs/gfs2/lock_dlm.c | |||
@@ -1,6 +1,6 @@ | |||
1 | /* | 1 | /* |
2 | * Copyright (C) Sistina Software, Inc. 1997-2003 All rights reserved. | 2 | * Copyright (C) Sistina Software, Inc. 1997-2003 All rights reserved. |
3 | * Copyright (C) 2004-2009 Red Hat, Inc. All rights reserved. | 3 | * Copyright 2004-2011 Red Hat, Inc. |
4 | * | 4 | * |
5 | * This copyrighted material is made available to anyone wishing to use, | 5 | * This copyrighted material is made available to anyone wishing to use, |
6 | * modify, copy, or redistribute it subject to the terms and conditions | 6 | * modify, copy, or redistribute it subject to the terms and conditions |
@@ -11,12 +11,15 @@ | |||
11 | #include <linux/dlm.h> | 11 | #include <linux/dlm.h> |
12 | #include <linux/slab.h> | 12 | #include <linux/slab.h> |
13 | #include <linux/types.h> | 13 | #include <linux/types.h> |
14 | #include <linux/delay.h> | ||
14 | #include <linux/gfs2_ondisk.h> | 15 | #include <linux/gfs2_ondisk.h> |
15 | 16 | ||
16 | #include "incore.h" | 17 | #include "incore.h" |
17 | #include "glock.h" | 18 | #include "glock.h" |
18 | #include "util.h" | 19 | #include "util.h" |
20 | #include "sys.h" | ||
19 | 21 | ||
22 | extern struct workqueue_struct *gfs2_control_wq; | ||
20 | 23 | ||
21 | static void gdlm_ast(void *arg) | 24 | static void gdlm_ast(void *arg) |
22 | { | 25 | { |
@@ -185,34 +188,1002 @@ static void gdlm_cancel(struct gfs2_glock *gl) | |||
185 | dlm_unlock(ls->ls_dlm, gl->gl_lksb.sb_lkid, DLM_LKF_CANCEL, NULL, gl); | 188 | dlm_unlock(ls->ls_dlm, gl->gl_lksb.sb_lkid, DLM_LKF_CANCEL, NULL, gl); |
186 | } | 189 | } |
187 | 190 | ||
188 | static int gdlm_mount(struct gfs2_sbd *sdp, const char *fsname) | 191 | /* |
192 | * dlm/gfs2 recovery coordination using dlm_recover callbacks | ||
193 | * | ||
194 | * 1. dlm_controld sees lockspace members change | ||
195 | * 2. dlm_controld blocks dlm-kernel locking activity | ||
196 | * 3. dlm_controld within dlm-kernel notifies gfs2 (recover_prep) | ||
197 | * 4. dlm_controld starts and finishes its own user level recovery | ||
198 | * 5. dlm_controld starts dlm-kernel dlm_recoverd to do kernel recovery | ||
199 | * 6. dlm_recoverd notifies gfs2 of failed nodes (recover_slot) | ||
200 | * 7. dlm_recoverd does its own lock recovery | ||
201 | * 8. dlm_recoverd unblocks dlm-kernel locking activity | ||
202 | * 9. dlm_recoverd notifies gfs2 when done (recover_done with new generation) | ||
203 | * 10. gfs2_control updates control_lock lvb with new generation and jid bits | ||
204 | * 11. gfs2_control enqueues journals for gfs2_recover to recover (maybe none) | ||
205 | * 12. gfs2_recover dequeues and recovers journals of failed nodes | ||
206 | * 13. gfs2_recover provides recovery results to gfs2_control (recovery_result) | ||
207 | * 14. gfs2_control updates control_lock lvb jid bits for recovered journals | ||
208 | * 15. gfs2_control unblocks normal locking when all journals are recovered | ||
209 | * | ||
210 | * - failures during recovery | ||
211 | * | ||
212 | * recover_prep() may set BLOCK_LOCKS (step 3) again before gfs2_control | ||
213 | * clears BLOCK_LOCKS (step 15), e.g. another node fails while still | ||
214 | * recovering for a prior failure. gfs2_control needs a way to detect | ||
215 | * this so it can leave BLOCK_LOCKS set in step 15. This is managed using | ||
216 | * the recover_block and recover_start values. | ||
217 | * | ||
218 | * recover_done() provides a new lockspace generation number each time it | ||
219 | * is called (step 9). This generation number is saved as recover_start. | ||
220 | * When recover_prep() is called, it sets BLOCK_LOCKS and sets | ||
221 | * recover_block = recover_start. So, while recover_block is equal to | ||
222 | * recover_start, BLOCK_LOCKS should remain set. (recover_spin must | ||
223 | * be held around the BLOCK_LOCKS/recover_block/recover_start logic.) | ||
224 | * | ||
225 | * - more specific gfs2 steps in sequence above | ||
226 | * | ||
227 | * 3. recover_prep sets BLOCK_LOCKS and sets recover_block = recover_start | ||
228 | * 6. recover_slot records any failed jids (maybe none) | ||
229 | * 9. recover_done sets recover_start = new generation number | ||
230 | * 10. gfs2_control sets control_lock lvb = new gen + bits for failed jids | ||
231 | * 12. gfs2_recover does journal recoveries for failed jids identified above | ||
232 | * 14. gfs2_control clears control_lock lvb bits for recovered jids | ||
233 | * 15. gfs2_control checks if recover_block == recover_start (step 3 occured | ||
234 | * again) then do nothing, otherwise if recover_start > recover_block | ||
235 | * then clear BLOCK_LOCKS. | ||
236 | * | ||
237 | * - parallel recovery steps across all nodes | ||
238 | * | ||
239 | * All nodes attempt to update the control_lock lvb with the new generation | ||
240 | * number and jid bits, but only the first to get the control_lock EX will | ||
241 | * do so; others will see that it's already done (lvb already contains new | ||
242 | * generation number.) | ||
243 | * | ||
244 | * . All nodes get the same recover_prep/recover_slot/recover_done callbacks | ||
245 | * . All nodes attempt to set control_lock lvb gen + bits for the new gen | ||
246 | * . One node gets control_lock first and writes the lvb, others see it's done | ||
247 | * . All nodes attempt to recover jids for which they see control_lock bits set | ||
248 | * . One node succeeds for a jid, and that one clears the jid bit in the lvb | ||
249 | * . All nodes will eventually see all lvb bits clear and unblock locks | ||
250 | * | ||
251 | * - is there a problem with clearing an lvb bit that should be set | ||
252 | * and missing a journal recovery? | ||
253 | * | ||
254 | * 1. jid fails | ||
255 | * 2. lvb bit set for step 1 | ||
256 | * 3. jid recovered for step 1 | ||
257 | * 4. jid taken again (new mount) | ||
258 | * 5. jid fails (for step 4) | ||
259 | * 6. lvb bit set for step 5 (will already be set) | ||
260 | * 7. lvb bit cleared for step 3 | ||
261 | * | ||
262 | * This is not a problem because the failure in step 5 does not | ||
263 | * require recovery, because the mount in step 4 could not have | ||
264 | * progressed far enough to unblock locks and access the fs. The | ||
265 | * control_mount() function waits for all recoveries to be complete | ||
266 | * for the latest lockspace generation before ever unblocking locks | ||
267 | * and returning. The mount in step 4 waits until the recovery in | ||
268 | * step 1 is done. | ||
269 | * | ||
270 | * - special case of first mounter: first node to mount the fs | ||
271 | * | ||
272 | * The first node to mount a gfs2 fs needs to check all the journals | ||
273 | * and recover any that need recovery before other nodes are allowed | ||
274 | * to mount the fs. (Others may begin mounting, but they must wait | ||
275 | * for the first mounter to be done before taking locks on the fs | ||
276 | * or accessing the fs.) This has two parts: | ||
277 | * | ||
278 | * 1. The mounted_lock tells a node it's the first to mount the fs. | ||
279 | * Each node holds the mounted_lock in PR while it's mounted. | ||
280 | * Each node tries to acquire the mounted_lock in EX when it mounts. | ||
281 | * If a node is granted the mounted_lock EX it means there are no | ||
282 | * other mounted nodes (no PR locks exist), and it is the first mounter. | ||
283 | * The mounted_lock is demoted to PR when first recovery is done, so | ||
284 | * others will fail to get an EX lock, but will get a PR lock. | ||
285 | * | ||
286 | * 2. The control_lock blocks others in control_mount() while the first | ||
287 | * mounter is doing first mount recovery of all journals. | ||
288 | * A mounting node needs to acquire control_lock in EX mode before | ||
289 | * it can proceed. The first mounter holds control_lock in EX while doing | ||
290 | * the first mount recovery, blocking mounts from other nodes, then demotes | ||
291 | * control_lock to NL when it's done (others_may_mount/first_done), | ||
292 | * allowing other nodes to continue mounting. | ||
293 | * | ||
294 | * first mounter: | ||
295 | * control_lock EX/NOQUEUE success | ||
296 | * mounted_lock EX/NOQUEUE success (no other PR, so no other mounters) | ||
297 | * set first=1 | ||
298 | * do first mounter recovery | ||
299 | * mounted_lock EX->PR | ||
300 | * control_lock EX->NL, write lvb generation | ||
301 | * | ||
302 | * other mounter: | ||
303 | * control_lock EX/NOQUEUE success (if fail -EAGAIN, retry) | ||
304 | * mounted_lock EX/NOQUEUE fail -EAGAIN (expected due to other mounters PR) | ||
305 | * mounted_lock PR/NOQUEUE success | ||
306 | * read lvb generation | ||
307 | * control_lock EX->NL | ||
308 | * set first=0 | ||
309 | * | ||
310 | * - mount during recovery | ||
311 | * | ||
312 | * If a node mounts while others are doing recovery (not first mounter), | ||
313 | * the mounting node will get its initial recover_done() callback without | ||
314 | * having seen any previous failures/callbacks. | ||
315 | * | ||
316 | * It must wait for all recoveries preceding its mount to be finished | ||
317 | * before it unblocks locks. It does this by repeating the "other mounter" | ||
318 | * steps above until the lvb generation number is >= its mount generation | ||
319 | * number (from initial recover_done) and all lvb bits are clear. | ||
320 | * | ||
321 | * - control_lock lvb format | ||
322 | * | ||
323 | * 4 bytes generation number: the latest dlm lockspace generation number | ||
324 | * from recover_done callback. Indicates the jid bitmap has been updated | ||
325 | * to reflect all slot failures through that generation. | ||
326 | * 4 bytes unused. | ||
327 | * GDLM_LVB_SIZE-8 bytes of jid bit map. If bit N is set, it indicates | ||
328 | * that jid N needs recovery. | ||
329 | */ | ||
330 | |||
331 | #define JID_BITMAP_OFFSET 8 /* 4 byte generation number + 4 byte unused */ | ||
332 | |||
333 | static void control_lvb_read(struct lm_lockstruct *ls, uint32_t *lvb_gen, | ||
334 | char *lvb_bits) | ||
335 | { | ||
336 | uint32_t gen; | ||
337 | memcpy(lvb_bits, ls->ls_control_lvb, GDLM_LVB_SIZE); | ||
338 | memcpy(&gen, lvb_bits, sizeof(uint32_t)); | ||
339 | *lvb_gen = le32_to_cpu(gen); | ||
340 | } | ||
341 | |||
342 | static void control_lvb_write(struct lm_lockstruct *ls, uint32_t lvb_gen, | ||
343 | char *lvb_bits) | ||
344 | { | ||
345 | uint32_t gen; | ||
346 | memcpy(ls->ls_control_lvb, lvb_bits, GDLM_LVB_SIZE); | ||
347 | gen = cpu_to_le32(lvb_gen); | ||
348 | memcpy(ls->ls_control_lvb, &gen, sizeof(uint32_t)); | ||
349 | } | ||
350 | |||
351 | static int all_jid_bits_clear(char *lvb) | ||
352 | { | ||
353 | int i; | ||
354 | for (i = JID_BITMAP_OFFSET; i < GDLM_LVB_SIZE; i++) { | ||
355 | if (lvb[i]) | ||
356 | return 0; | ||
357 | } | ||
358 | return 1; | ||
359 | } | ||
360 | |||
361 | static void sync_wait_cb(void *arg) | ||
362 | { | ||
363 | struct lm_lockstruct *ls = arg; | ||
364 | complete(&ls->ls_sync_wait); | ||
365 | } | ||
366 | |||
367 | static int sync_unlock(struct gfs2_sbd *sdp, struct dlm_lksb *lksb, char *name) | ||
189 | { | 368 | { |
190 | struct lm_lockstruct *ls = &sdp->sd_lockstruct; | 369 | struct lm_lockstruct *ls = &sdp->sd_lockstruct; |
191 | int error; | 370 | int error; |
192 | 371 | ||
193 | if (fsname == NULL) { | 372 | error = dlm_unlock(ls->ls_dlm, lksb->sb_lkid, 0, lksb, ls); |
194 | fs_info(sdp, "no fsname found\n"); | 373 | if (error) { |
195 | return -EINVAL; | 374 | fs_err(sdp, "%s lkid %x error %d\n", |
375 | name, lksb->sb_lkid, error); | ||
376 | return error; | ||
377 | } | ||
378 | |||
379 | wait_for_completion(&ls->ls_sync_wait); | ||
380 | |||
381 | if (lksb->sb_status != -DLM_EUNLOCK) { | ||
382 | fs_err(sdp, "%s lkid %x status %d\n", | ||
383 | name, lksb->sb_lkid, lksb->sb_status); | ||
384 | return -1; | ||
385 | } | ||
386 | return 0; | ||
387 | } | ||
388 | |||
389 | static int sync_lock(struct gfs2_sbd *sdp, int mode, uint32_t flags, | ||
390 | unsigned int num, struct dlm_lksb *lksb, char *name) | ||
391 | { | ||
392 | struct lm_lockstruct *ls = &sdp->sd_lockstruct; | ||
393 | char strname[GDLM_STRNAME_BYTES]; | ||
394 | int error, status; | ||
395 | |||
396 | memset(strname, 0, GDLM_STRNAME_BYTES); | ||
397 | snprintf(strname, GDLM_STRNAME_BYTES, "%8x%16x", LM_TYPE_NONDISK, num); | ||
398 | |||
399 | error = dlm_lock(ls->ls_dlm, mode, lksb, flags, | ||
400 | strname, GDLM_STRNAME_BYTES - 1, | ||
401 | 0, sync_wait_cb, ls, NULL); | ||
402 | if (error) { | ||
403 | fs_err(sdp, "%s lkid %x flags %x mode %d error %d\n", | ||
404 | name, lksb->sb_lkid, flags, mode, error); | ||
405 | return error; | ||
406 | } | ||
407 | |||
408 | wait_for_completion(&ls->ls_sync_wait); | ||
409 | |||
410 | status = lksb->sb_status; | ||
411 | |||
412 | if (status && status != -EAGAIN) { | ||
413 | fs_err(sdp, "%s lkid %x flags %x mode %d status %d\n", | ||
414 | name, lksb->sb_lkid, flags, mode, status); | ||
415 | } | ||
416 | |||
417 | return status; | ||
418 | } | ||
419 | |||
420 | static int mounted_unlock(struct gfs2_sbd *sdp) | ||
421 | { | ||
422 | struct lm_lockstruct *ls = &sdp->sd_lockstruct; | ||
423 | return sync_unlock(sdp, &ls->ls_mounted_lksb, "mounted_lock"); | ||
424 | } | ||
425 | |||
426 | static int mounted_lock(struct gfs2_sbd *sdp, int mode, uint32_t flags) | ||
427 | { | ||
428 | struct lm_lockstruct *ls = &sdp->sd_lockstruct; | ||
429 | return sync_lock(sdp, mode, flags, GFS2_MOUNTED_LOCK, | ||
430 | &ls->ls_mounted_lksb, "mounted_lock"); | ||
431 | } | ||
432 | |||
433 | static int control_unlock(struct gfs2_sbd *sdp) | ||
434 | { | ||
435 | struct lm_lockstruct *ls = &sdp->sd_lockstruct; | ||
436 | return sync_unlock(sdp, &ls->ls_control_lksb, "control_lock"); | ||
437 | } | ||
438 | |||
439 | static int control_lock(struct gfs2_sbd *sdp, int mode, uint32_t flags) | ||
440 | { | ||
441 | struct lm_lockstruct *ls = &sdp->sd_lockstruct; | ||
442 | return sync_lock(sdp, mode, flags, GFS2_CONTROL_LOCK, | ||
443 | &ls->ls_control_lksb, "control_lock"); | ||
444 | } | ||
445 | |||
446 | static void gfs2_control_func(struct work_struct *work) | ||
447 | { | ||
448 | struct gfs2_sbd *sdp = container_of(work, struct gfs2_sbd, sd_control_work.work); | ||
449 | struct lm_lockstruct *ls = &sdp->sd_lockstruct; | ||
450 | char lvb_bits[GDLM_LVB_SIZE]; | ||
451 | uint32_t block_gen, start_gen, lvb_gen, flags; | ||
452 | int recover_set = 0; | ||
453 | int write_lvb = 0; | ||
454 | int recover_size; | ||
455 | int i, error; | ||
456 | |||
457 | spin_lock(&ls->ls_recover_spin); | ||
458 | /* | ||
459 | * No MOUNT_DONE means we're still mounting; control_mount() | ||
460 | * will set this flag, after which this thread will take over | ||
461 | * all further clearing of BLOCK_LOCKS. | ||
462 | * | ||
463 | * FIRST_MOUNT means this node is doing first mounter recovery, | ||
464 | * for which recovery control is handled by | ||
465 | * control_mount()/control_first_done(), not this thread. | ||
466 | */ | ||
467 | if (!test_bit(DFL_MOUNT_DONE, &ls->ls_recover_flags) || | ||
468 | test_bit(DFL_FIRST_MOUNT, &ls->ls_recover_flags)) { | ||
469 | spin_unlock(&ls->ls_recover_spin); | ||
470 | return; | ||
471 | } | ||
472 | block_gen = ls->ls_recover_block; | ||
473 | start_gen = ls->ls_recover_start; | ||
474 | spin_unlock(&ls->ls_recover_spin); | ||
475 | |||
476 | /* | ||
477 | * Equal block_gen and start_gen implies we are between | ||
478 | * recover_prep and recover_done callbacks, which means | ||
479 | * dlm recovery is in progress and dlm locking is blocked. | ||
480 | * There's no point trying to do any work until recover_done. | ||
481 | */ | ||
482 | |||
483 | if (block_gen == start_gen) | ||
484 | return; | ||
485 | |||
486 | /* | ||
487 | * Propagate recover_submit[] and recover_result[] to lvb: | ||
488 | * dlm_recoverd adds to recover_submit[] jids needing recovery | ||
489 | * gfs2_recover adds to recover_result[] journal recovery results | ||
490 | * | ||
491 | * set lvb bit for jids in recover_submit[] if the lvb has not | ||
492 | * yet been updated for the generation of the failure | ||
493 | * | ||
494 | * clear lvb bit for jids in recover_result[] if the result of | ||
495 | * the journal recovery is SUCCESS | ||
496 | */ | ||
497 | |||
498 | error = control_lock(sdp, DLM_LOCK_EX, DLM_LKF_CONVERT|DLM_LKF_VALBLK); | ||
499 | if (error) { | ||
500 | fs_err(sdp, "control lock EX error %d\n", error); | ||
501 | return; | ||
502 | } | ||
503 | |||
504 | control_lvb_read(ls, &lvb_gen, lvb_bits); | ||
505 | |||
506 | spin_lock(&ls->ls_recover_spin); | ||
507 | if (block_gen != ls->ls_recover_block || | ||
508 | start_gen != ls->ls_recover_start) { | ||
509 | fs_info(sdp, "recover generation %u block1 %u %u\n", | ||
510 | start_gen, block_gen, ls->ls_recover_block); | ||
511 | spin_unlock(&ls->ls_recover_spin); | ||
512 | control_lock(sdp, DLM_LOCK_NL, DLM_LKF_CONVERT); | ||
513 | return; | ||
514 | } | ||
515 | |||
516 | recover_size = ls->ls_recover_size; | ||
517 | |||
518 | if (lvb_gen <= start_gen) { | ||
519 | /* | ||
520 | * Clear lvb bits for jids we've successfully recovered. | ||
521 | * Because all nodes attempt to recover failed journals, | ||
522 | * a journal can be recovered multiple times successfully | ||
523 | * in succession. Only the first will really do recovery, | ||
524 | * the others find it clean, but still report a successful | ||
525 | * recovery. So, another node may have already recovered | ||
526 | * the jid and cleared the lvb bit for it. | ||
527 | */ | ||
528 | for (i = 0; i < recover_size; i++) { | ||
529 | if (ls->ls_recover_result[i] != LM_RD_SUCCESS) | ||
530 | continue; | ||
531 | |||
532 | ls->ls_recover_result[i] = 0; | ||
533 | |||
534 | if (!test_bit_le(i, lvb_bits + JID_BITMAP_OFFSET)) | ||
535 | continue; | ||
536 | |||
537 | __clear_bit_le(i, lvb_bits + JID_BITMAP_OFFSET); | ||
538 | write_lvb = 1; | ||
539 | } | ||
540 | } | ||
541 | |||
542 | if (lvb_gen == start_gen) { | ||
543 | /* | ||
544 | * Failed slots before start_gen are already set in lvb. | ||
545 | */ | ||
546 | for (i = 0; i < recover_size; i++) { | ||
547 | if (!ls->ls_recover_submit[i]) | ||
548 | continue; | ||
549 | if (ls->ls_recover_submit[i] < lvb_gen) | ||
550 | ls->ls_recover_submit[i] = 0; | ||
551 | } | ||
552 | } else if (lvb_gen < start_gen) { | ||
553 | /* | ||
554 | * Failed slots before start_gen are not yet set in lvb. | ||
555 | */ | ||
556 | for (i = 0; i < recover_size; i++) { | ||
557 | if (!ls->ls_recover_submit[i]) | ||
558 | continue; | ||
559 | if (ls->ls_recover_submit[i] < start_gen) { | ||
560 | ls->ls_recover_submit[i] = 0; | ||
561 | __set_bit_le(i, lvb_bits + JID_BITMAP_OFFSET); | ||
562 | } | ||
563 | } | ||
564 | /* even if there are no bits to set, we need to write the | ||
565 | latest generation to the lvb */ | ||
566 | write_lvb = 1; | ||
567 | } else { | ||
568 | /* | ||
569 | * we should be getting a recover_done() for lvb_gen soon | ||
570 | */ | ||
571 | } | ||
572 | spin_unlock(&ls->ls_recover_spin); | ||
573 | |||
574 | if (write_lvb) { | ||
575 | control_lvb_write(ls, start_gen, lvb_bits); | ||
576 | flags = DLM_LKF_CONVERT | DLM_LKF_VALBLK; | ||
577 | } else { | ||
578 | flags = DLM_LKF_CONVERT; | ||
579 | } | ||
580 | |||
581 | error = control_lock(sdp, DLM_LOCK_NL, flags); | ||
582 | if (error) { | ||
583 | fs_err(sdp, "control lock NL error %d\n", error); | ||
584 | return; | ||
585 | } | ||
586 | |||
587 | /* | ||
588 | * Everyone will see jid bits set in the lvb, run gfs2_recover_set(), | ||
589 | * and clear a jid bit in the lvb if the recovery is a success. | ||
590 | * Eventually all journals will be recovered, all jid bits will | ||
591 | * be cleared in the lvb, and everyone will clear BLOCK_LOCKS. | ||
592 | */ | ||
593 | |||
594 | for (i = 0; i < recover_size; i++) { | ||
595 | if (test_bit_le(i, lvb_bits + JID_BITMAP_OFFSET)) { | ||
596 | fs_info(sdp, "recover generation %u jid %d\n", | ||
597 | start_gen, i); | ||
598 | gfs2_recover_set(sdp, i); | ||
599 | recover_set++; | ||
600 | } | ||
601 | } | ||
602 | if (recover_set) | ||
603 | return; | ||
604 | |||
605 | /* | ||
606 | * No more jid bits set in lvb, all recovery is done, unblock locks | ||
607 | * (unless a new recover_prep callback has occured blocking locks | ||
608 | * again while working above) | ||
609 | */ | ||
610 | |||
611 | spin_lock(&ls->ls_recover_spin); | ||
612 | if (ls->ls_recover_block == block_gen && | ||
613 | ls->ls_recover_start == start_gen) { | ||
614 | clear_bit(DFL_BLOCK_LOCKS, &ls->ls_recover_flags); | ||
615 | spin_unlock(&ls->ls_recover_spin); | ||
616 | fs_info(sdp, "recover generation %u done\n", start_gen); | ||
617 | gfs2_glock_thaw(sdp); | ||
618 | } else { | ||
619 | fs_info(sdp, "recover generation %u block2 %u %u\n", | ||
620 | start_gen, block_gen, ls->ls_recover_block); | ||
621 | spin_unlock(&ls->ls_recover_spin); | ||
622 | } | ||
623 | } | ||
624 | |||
625 | static int control_mount(struct gfs2_sbd *sdp) | ||
626 | { | ||
627 | struct lm_lockstruct *ls = &sdp->sd_lockstruct; | ||
628 | char lvb_bits[GDLM_LVB_SIZE]; | ||
629 | uint32_t start_gen, block_gen, mount_gen, lvb_gen; | ||
630 | int mounted_mode; | ||
631 | int retries = 0; | ||
632 | int error; | ||
633 | |||
634 | memset(&ls->ls_mounted_lksb, 0, sizeof(struct dlm_lksb)); | ||
635 | memset(&ls->ls_control_lksb, 0, sizeof(struct dlm_lksb)); | ||
636 | memset(&ls->ls_control_lvb, 0, GDLM_LVB_SIZE); | ||
637 | ls->ls_control_lksb.sb_lvbptr = ls->ls_control_lvb; | ||
638 | init_completion(&ls->ls_sync_wait); | ||
639 | |||
640 | set_bit(DFL_BLOCK_LOCKS, &ls->ls_recover_flags); | ||
641 | |||
642 | error = control_lock(sdp, DLM_LOCK_NL, DLM_LKF_VALBLK); | ||
643 | if (error) { | ||
644 | fs_err(sdp, "control_mount control_lock NL error %d\n", error); | ||
645 | return error; | ||
646 | } | ||
647 | |||
648 | error = mounted_lock(sdp, DLM_LOCK_NL, 0); | ||
649 | if (error) { | ||
650 | fs_err(sdp, "control_mount mounted_lock NL error %d\n", error); | ||
651 | control_unlock(sdp); | ||
652 | return error; | ||
653 | } | ||
654 | mounted_mode = DLM_LOCK_NL; | ||
655 | |||
656 | restart: | ||
657 | if (retries++ && signal_pending(current)) { | ||
658 | error = -EINTR; | ||
659 | goto fail; | ||
660 | } | ||
661 | |||
662 | /* | ||
663 | * We always start with both locks in NL. control_lock is | ||
664 | * demoted to NL below so we don't need to do it here. | ||
665 | */ | ||
666 | |||
667 | if (mounted_mode != DLM_LOCK_NL) { | ||
668 | error = mounted_lock(sdp, DLM_LOCK_NL, DLM_LKF_CONVERT); | ||
669 | if (error) | ||
670 | goto fail; | ||
671 | mounted_mode = DLM_LOCK_NL; | ||
672 | } | ||
673 | |||
674 | /* | ||
675 | * Other nodes need to do some work in dlm recovery and gfs2_control | ||
676 | * before the recover_done and control_lock will be ready for us below. | ||
677 | * A delay here is not required but often avoids having to retry. | ||
678 | */ | ||
679 | |||
680 | msleep_interruptible(500); | ||
681 | |||
682 | /* | ||
683 | * Acquire control_lock in EX and mounted_lock in either EX or PR. | ||
684 | * control_lock lvb keeps track of any pending journal recoveries. | ||
685 | * mounted_lock indicates if any other nodes have the fs mounted. | ||
686 | */ | ||
687 | |||
688 | error = control_lock(sdp, DLM_LOCK_EX, DLM_LKF_CONVERT|DLM_LKF_NOQUEUE|DLM_LKF_VALBLK); | ||
689 | if (error == -EAGAIN) { | ||
690 | goto restart; | ||
691 | } else if (error) { | ||
692 | fs_err(sdp, "control_mount control_lock EX error %d\n", error); | ||
693 | goto fail; | ||
694 | } | ||
695 | |||
696 | error = mounted_lock(sdp, DLM_LOCK_EX, DLM_LKF_CONVERT|DLM_LKF_NOQUEUE); | ||
697 | if (!error) { | ||
698 | mounted_mode = DLM_LOCK_EX; | ||
699 | goto locks_done; | ||
700 | } else if (error != -EAGAIN) { | ||
701 | fs_err(sdp, "control_mount mounted_lock EX error %d\n", error); | ||
702 | goto fail; | ||
703 | } | ||
704 | |||
705 | error = mounted_lock(sdp, DLM_LOCK_PR, DLM_LKF_CONVERT|DLM_LKF_NOQUEUE); | ||
706 | if (!error) { | ||
707 | mounted_mode = DLM_LOCK_PR; | ||
708 | goto locks_done; | ||
709 | } else { | ||
710 | /* not even -EAGAIN should happen here */ | ||
711 | fs_err(sdp, "control_mount mounted_lock PR error %d\n", error); | ||
712 | goto fail; | ||
713 | } | ||
714 | |||
715 | locks_done: | ||
716 | /* | ||
717 | * If we got both locks above in EX, then we're the first mounter. | ||
718 | * If not, then we need to wait for the control_lock lvb to be | ||
719 | * updated by other mounted nodes to reflect our mount generation. | ||
720 | * | ||
721 | * In simple first mounter cases, first mounter will see zero lvb_gen, | ||
722 | * but in cases where all existing nodes leave/fail before mounting | ||
723 | * nodes finish control_mount, then all nodes will be mounting and | ||
724 | * lvb_gen will be non-zero. | ||
725 | */ | ||
726 | |||
727 | control_lvb_read(ls, &lvb_gen, lvb_bits); | ||
728 | |||
729 | if (lvb_gen == 0xFFFFFFFF) { | ||
730 | /* special value to force mount attempts to fail */ | ||
731 | fs_err(sdp, "control_mount control_lock disabled\n"); | ||
732 | error = -EINVAL; | ||
733 | goto fail; | ||
734 | } | ||
735 | |||
736 | if (mounted_mode == DLM_LOCK_EX) { | ||
737 | /* first mounter, keep both EX while doing first recovery */ | ||
738 | spin_lock(&ls->ls_recover_spin); | ||
739 | clear_bit(DFL_BLOCK_LOCKS, &ls->ls_recover_flags); | ||
740 | set_bit(DFL_MOUNT_DONE, &ls->ls_recover_flags); | ||
741 | set_bit(DFL_FIRST_MOUNT, &ls->ls_recover_flags); | ||
742 | spin_unlock(&ls->ls_recover_spin); | ||
743 | fs_info(sdp, "first mounter control generation %u\n", lvb_gen); | ||
744 | return 0; | ||
745 | } | ||
746 | |||
747 | error = control_lock(sdp, DLM_LOCK_NL, DLM_LKF_CONVERT); | ||
748 | if (error) | ||
749 | goto fail; | ||
750 | |||
751 | /* | ||
752 | * We are not first mounter, now we need to wait for the control_lock | ||
753 | * lvb generation to be >= the generation from our first recover_done | ||
754 | * and all lvb bits to be clear (no pending journal recoveries.) | ||
755 | */ | ||
756 | |||
757 | if (!all_jid_bits_clear(lvb_bits)) { | ||
758 | /* journals need recovery, wait until all are clear */ | ||
759 | fs_info(sdp, "control_mount wait for journal recovery\n"); | ||
760 | goto restart; | ||
761 | } | ||
762 | |||
763 | spin_lock(&ls->ls_recover_spin); | ||
764 | block_gen = ls->ls_recover_block; | ||
765 | start_gen = ls->ls_recover_start; | ||
766 | mount_gen = ls->ls_recover_mount; | ||
767 | |||
768 | if (lvb_gen < mount_gen) { | ||
769 | /* wait for mounted nodes to update control_lock lvb to our | ||
770 | generation, which might include new recovery bits set */ | ||
771 | fs_info(sdp, "control_mount wait1 block %u start %u mount %u " | ||
772 | "lvb %u flags %lx\n", block_gen, start_gen, mount_gen, | ||
773 | lvb_gen, ls->ls_recover_flags); | ||
774 | spin_unlock(&ls->ls_recover_spin); | ||
775 | goto restart; | ||
776 | } | ||
777 | |||
778 | if (lvb_gen != start_gen) { | ||
779 | /* wait for mounted nodes to update control_lock lvb to the | ||
780 | latest recovery generation */ | ||
781 | fs_info(sdp, "control_mount wait2 block %u start %u mount %u " | ||
782 | "lvb %u flags %lx\n", block_gen, start_gen, mount_gen, | ||
783 | lvb_gen, ls->ls_recover_flags); | ||
784 | spin_unlock(&ls->ls_recover_spin); | ||
785 | goto restart; | ||
786 | } | ||
787 | |||
788 | if (block_gen == start_gen) { | ||
789 | /* dlm recovery in progress, wait for it to finish */ | ||
790 | fs_info(sdp, "control_mount wait3 block %u start %u mount %u " | ||
791 | "lvb %u flags %lx\n", block_gen, start_gen, mount_gen, | ||
792 | lvb_gen, ls->ls_recover_flags); | ||
793 | spin_unlock(&ls->ls_recover_spin); | ||
794 | goto restart; | ||
196 | } | 795 | } |
197 | 796 | ||
198 | error = dlm_new_lockspace(fsname, strlen(fsname), &ls->ls_dlm, | 797 | clear_bit(DFL_BLOCK_LOCKS, &ls->ls_recover_flags); |
199 | DLM_LSFL_FS | DLM_LSFL_NEWEXCL | | 798 | set_bit(DFL_MOUNT_DONE, &ls->ls_recover_flags); |
200 | (ls->ls_nodir ? DLM_LSFL_NODIR : 0), | 799 | memset(ls->ls_recover_submit, 0, ls->ls_recover_size*sizeof(uint32_t)); |
201 | GDLM_LVB_SIZE); | 800 | memset(ls->ls_recover_result, 0, ls->ls_recover_size*sizeof(uint32_t)); |
801 | spin_unlock(&ls->ls_recover_spin); | ||
802 | return 0; | ||
803 | |||
804 | fail: | ||
805 | mounted_unlock(sdp); | ||
806 | control_unlock(sdp); | ||
807 | return error; | ||
808 | } | ||
809 | |||
810 | static int dlm_recovery_wait(void *word) | ||
811 | { | ||
812 | schedule(); | ||
813 | return 0; | ||
814 | } | ||
815 | |||
816 | static int control_first_done(struct gfs2_sbd *sdp) | ||
817 | { | ||
818 | struct lm_lockstruct *ls = &sdp->sd_lockstruct; | ||
819 | char lvb_bits[GDLM_LVB_SIZE]; | ||
820 | uint32_t start_gen, block_gen; | ||
821 | int error; | ||
822 | |||
823 | restart: | ||
824 | spin_lock(&ls->ls_recover_spin); | ||
825 | start_gen = ls->ls_recover_start; | ||
826 | block_gen = ls->ls_recover_block; | ||
827 | |||
828 | if (test_bit(DFL_BLOCK_LOCKS, &ls->ls_recover_flags) || | ||
829 | !test_bit(DFL_MOUNT_DONE, &ls->ls_recover_flags) || | ||
830 | !test_bit(DFL_FIRST_MOUNT, &ls->ls_recover_flags)) { | ||
831 | /* sanity check, should not happen */ | ||
832 | fs_err(sdp, "control_first_done start %u block %u flags %lx\n", | ||
833 | start_gen, block_gen, ls->ls_recover_flags); | ||
834 | spin_unlock(&ls->ls_recover_spin); | ||
835 | control_unlock(sdp); | ||
836 | return -1; | ||
837 | } | ||
838 | |||
839 | if (start_gen == block_gen) { | ||
840 | /* | ||
841 | * Wait for the end of a dlm recovery cycle to switch from | ||
842 | * first mounter recovery. We can ignore any recover_slot | ||
843 | * callbacks between the recover_prep and next recover_done | ||
844 | * because we are still the first mounter and any failed nodes | ||
845 | * have not fully mounted, so they don't need recovery. | ||
846 | */ | ||
847 | spin_unlock(&ls->ls_recover_spin); | ||
848 | fs_info(sdp, "control_first_done wait gen %u\n", start_gen); | ||
849 | |||
850 | wait_on_bit(&ls->ls_recover_flags, DFL_DLM_RECOVERY, | ||
851 | dlm_recovery_wait, TASK_UNINTERRUPTIBLE); | ||
852 | goto restart; | ||
853 | } | ||
854 | |||
855 | clear_bit(DFL_FIRST_MOUNT, &ls->ls_recover_flags); | ||
856 | set_bit(DFL_FIRST_MOUNT_DONE, &ls->ls_recover_flags); | ||
857 | memset(ls->ls_recover_submit, 0, ls->ls_recover_size*sizeof(uint32_t)); | ||
858 | memset(ls->ls_recover_result, 0, ls->ls_recover_size*sizeof(uint32_t)); | ||
859 | spin_unlock(&ls->ls_recover_spin); | ||
860 | |||
861 | memset(lvb_bits, 0, sizeof(lvb_bits)); | ||
862 | control_lvb_write(ls, start_gen, lvb_bits); | ||
863 | |||
864 | error = mounted_lock(sdp, DLM_LOCK_PR, DLM_LKF_CONVERT); | ||
865 | if (error) | ||
866 | fs_err(sdp, "control_first_done mounted PR error %d\n", error); | ||
867 | |||
868 | error = control_lock(sdp, DLM_LOCK_NL, DLM_LKF_CONVERT|DLM_LKF_VALBLK); | ||
202 | if (error) | 869 | if (error) |
203 | printk(KERN_ERR "dlm_new_lockspace error %d", error); | 870 | fs_err(sdp, "control_first_done control NL error %d\n", error); |
204 | 871 | ||
205 | return error; | 872 | return error; |
206 | } | 873 | } |
207 | 874 | ||
875 | /* | ||
876 | * Expand static jid arrays if necessary (by increments of RECOVER_SIZE_INC) | ||
877 | * to accomodate the largest slot number. (NB dlm slot numbers start at 1, | ||
878 | * gfs2 jids start at 0, so jid = slot - 1) | ||
879 | */ | ||
880 | |||
881 | #define RECOVER_SIZE_INC 16 | ||
882 | |||
883 | static int set_recover_size(struct gfs2_sbd *sdp, struct dlm_slot *slots, | ||
884 | int num_slots) | ||
885 | { | ||
886 | struct lm_lockstruct *ls = &sdp->sd_lockstruct; | ||
887 | uint32_t *submit = NULL; | ||
888 | uint32_t *result = NULL; | ||
889 | uint32_t old_size, new_size; | ||
890 | int i, max_jid; | ||
891 | |||
892 | max_jid = 0; | ||
893 | for (i = 0; i < num_slots; i++) { | ||
894 | if (max_jid < slots[i].slot - 1) | ||
895 | max_jid = slots[i].slot - 1; | ||
896 | } | ||
897 | |||
898 | old_size = ls->ls_recover_size; | ||
899 | |||
900 | if (old_size >= max_jid + 1) | ||
901 | return 0; | ||
902 | |||
903 | new_size = old_size + RECOVER_SIZE_INC; | ||
904 | |||
905 | submit = kzalloc(new_size * sizeof(uint32_t), GFP_NOFS); | ||
906 | result = kzalloc(new_size * sizeof(uint32_t), GFP_NOFS); | ||
907 | if (!submit || !result) { | ||
908 | kfree(submit); | ||
909 | kfree(result); | ||
910 | return -ENOMEM; | ||
911 | } | ||
912 | |||
913 | spin_lock(&ls->ls_recover_spin); | ||
914 | memcpy(submit, ls->ls_recover_submit, old_size * sizeof(uint32_t)); | ||
915 | memcpy(result, ls->ls_recover_result, old_size * sizeof(uint32_t)); | ||
916 | kfree(ls->ls_recover_submit); | ||
917 | kfree(ls->ls_recover_result); | ||
918 | ls->ls_recover_submit = submit; | ||
919 | ls->ls_recover_result = result; | ||
920 | ls->ls_recover_size = new_size; | ||
921 | spin_unlock(&ls->ls_recover_spin); | ||
922 | return 0; | ||
923 | } | ||
924 | |||
925 | static void free_recover_size(struct lm_lockstruct *ls) | ||
926 | { | ||
927 | kfree(ls->ls_recover_submit); | ||
928 | kfree(ls->ls_recover_result); | ||
929 | ls->ls_recover_submit = NULL; | ||
930 | ls->ls_recover_result = NULL; | ||
931 | ls->ls_recover_size = 0; | ||
932 | } | ||
933 | |||
934 | /* dlm calls before it does lock recovery */ | ||
935 | |||
936 | static void gdlm_recover_prep(void *arg) | ||
937 | { | ||
938 | struct gfs2_sbd *sdp = arg; | ||
939 | struct lm_lockstruct *ls = &sdp->sd_lockstruct; | ||
940 | |||
941 | spin_lock(&ls->ls_recover_spin); | ||
942 | ls->ls_recover_block = ls->ls_recover_start; | ||
943 | set_bit(DFL_DLM_RECOVERY, &ls->ls_recover_flags); | ||
944 | |||
945 | if (!test_bit(DFL_MOUNT_DONE, &ls->ls_recover_flags) || | ||
946 | test_bit(DFL_FIRST_MOUNT, &ls->ls_recover_flags)) { | ||
947 | spin_unlock(&ls->ls_recover_spin); | ||
948 | return; | ||
949 | } | ||
950 | set_bit(DFL_BLOCK_LOCKS, &ls->ls_recover_flags); | ||
951 | spin_unlock(&ls->ls_recover_spin); | ||
952 | } | ||
953 | |||
954 | /* dlm calls after recover_prep has been completed on all lockspace members; | ||
955 | identifies slot/jid of failed member */ | ||
956 | |||
957 | static void gdlm_recover_slot(void *arg, struct dlm_slot *slot) | ||
958 | { | ||
959 | struct gfs2_sbd *sdp = arg; | ||
960 | struct lm_lockstruct *ls = &sdp->sd_lockstruct; | ||
961 | int jid = slot->slot - 1; | ||
962 | |||
963 | spin_lock(&ls->ls_recover_spin); | ||
964 | if (ls->ls_recover_size < jid + 1) { | ||
965 | fs_err(sdp, "recover_slot jid %d gen %u short size %d", | ||
966 | jid, ls->ls_recover_block, ls->ls_recover_size); | ||
967 | spin_unlock(&ls->ls_recover_spin); | ||
968 | return; | ||
969 | } | ||
970 | |||
971 | if (ls->ls_recover_submit[jid]) { | ||
972 | fs_info(sdp, "recover_slot jid %d gen %u prev %u", | ||
973 | jid, ls->ls_recover_block, ls->ls_recover_submit[jid]); | ||
974 | } | ||
975 | ls->ls_recover_submit[jid] = ls->ls_recover_block; | ||
976 | spin_unlock(&ls->ls_recover_spin); | ||
977 | } | ||
978 | |||
979 | /* dlm calls after recover_slot and after it completes lock recovery */ | ||
980 | |||
981 | static void gdlm_recover_done(void *arg, struct dlm_slot *slots, int num_slots, | ||
982 | int our_slot, uint32_t generation) | ||
983 | { | ||
984 | struct gfs2_sbd *sdp = arg; | ||
985 | struct lm_lockstruct *ls = &sdp->sd_lockstruct; | ||
986 | |||
987 | /* ensure the ls jid arrays are large enough */ | ||
988 | set_recover_size(sdp, slots, num_slots); | ||
989 | |||
990 | spin_lock(&ls->ls_recover_spin); | ||
991 | ls->ls_recover_start = generation; | ||
992 | |||
993 | if (!ls->ls_recover_mount) { | ||
994 | ls->ls_recover_mount = generation; | ||
995 | ls->ls_jid = our_slot - 1; | ||
996 | } | ||
997 | |||
998 | if (!test_bit(DFL_UNMOUNT, &ls->ls_recover_flags)) | ||
999 | queue_delayed_work(gfs2_control_wq, &sdp->sd_control_work, 0); | ||
1000 | |||
1001 | clear_bit(DFL_DLM_RECOVERY, &ls->ls_recover_flags); | ||
1002 | smp_mb__after_clear_bit(); | ||
1003 | wake_up_bit(&ls->ls_recover_flags, DFL_DLM_RECOVERY); | ||
1004 | spin_unlock(&ls->ls_recover_spin); | ||
1005 | } | ||
1006 | |||
1007 | /* gfs2_recover thread has a journal recovery result */ | ||
1008 | |||
1009 | static void gdlm_recovery_result(struct gfs2_sbd *sdp, unsigned int jid, | ||
1010 | unsigned int result) | ||
1011 | { | ||
1012 | struct lm_lockstruct *ls = &sdp->sd_lockstruct; | ||
1013 | |||
1014 | if (test_bit(DFL_NO_DLM_OPS, &ls->ls_recover_flags)) | ||
1015 | return; | ||
1016 | |||
1017 | /* don't care about the recovery of own journal during mount */ | ||
1018 | if (jid == ls->ls_jid) | ||
1019 | return; | ||
1020 | |||
1021 | spin_lock(&ls->ls_recover_spin); | ||
1022 | if (test_bit(DFL_FIRST_MOUNT, &ls->ls_recover_flags)) { | ||
1023 | spin_unlock(&ls->ls_recover_spin); | ||
1024 | return; | ||
1025 | } | ||
1026 | if (ls->ls_recover_size < jid + 1) { | ||
1027 | fs_err(sdp, "recovery_result jid %d short size %d", | ||
1028 | jid, ls->ls_recover_size); | ||
1029 | spin_unlock(&ls->ls_recover_spin); | ||
1030 | return; | ||
1031 | } | ||
1032 | |||
1033 | fs_info(sdp, "recover jid %d result %s\n", jid, | ||
1034 | result == LM_RD_GAVEUP ? "busy" : "success"); | ||
1035 | |||
1036 | ls->ls_recover_result[jid] = result; | ||
1037 | |||
1038 | /* GAVEUP means another node is recovering the journal; delay our | ||
1039 | next attempt to recover it, to give the other node a chance to | ||
1040 | finish before trying again */ | ||
1041 | |||
1042 | if (!test_bit(DFL_UNMOUNT, &ls->ls_recover_flags)) | ||
1043 | queue_delayed_work(gfs2_control_wq, &sdp->sd_control_work, | ||
1044 | result == LM_RD_GAVEUP ? HZ : 0); | ||
1045 | spin_unlock(&ls->ls_recover_spin); | ||
1046 | } | ||
1047 | |||
1048 | const struct dlm_lockspace_ops gdlm_lockspace_ops = { | ||
1049 | .recover_prep = gdlm_recover_prep, | ||
1050 | .recover_slot = gdlm_recover_slot, | ||
1051 | .recover_done = gdlm_recover_done, | ||
1052 | }; | ||
1053 | |||
1054 | static int gdlm_mount(struct gfs2_sbd *sdp, const char *table) | ||
1055 | { | ||
1056 | struct lm_lockstruct *ls = &sdp->sd_lockstruct; | ||
1057 | char cluster[GFS2_LOCKNAME_LEN]; | ||
1058 | const char *fsname; | ||
1059 | uint32_t flags; | ||
1060 | int error, ops_result; | ||
1061 | |||
1062 | /* | ||
1063 | * initialize everything | ||
1064 | */ | ||
1065 | |||
1066 | INIT_DELAYED_WORK(&sdp->sd_control_work, gfs2_control_func); | ||
1067 | spin_lock_init(&ls->ls_recover_spin); | ||
1068 | ls->ls_recover_flags = 0; | ||
1069 | ls->ls_recover_mount = 0; | ||
1070 | ls->ls_recover_start = 0; | ||
1071 | ls->ls_recover_block = 0; | ||
1072 | ls->ls_recover_size = 0; | ||
1073 | ls->ls_recover_submit = NULL; | ||
1074 | ls->ls_recover_result = NULL; | ||
1075 | |||
1076 | error = set_recover_size(sdp, NULL, 0); | ||
1077 | if (error) | ||
1078 | goto fail; | ||
1079 | |||
1080 | /* | ||
1081 | * prepare dlm_new_lockspace args | ||
1082 | */ | ||
1083 | |||
1084 | fsname = strchr(table, ':'); | ||
1085 | if (!fsname) { | ||
1086 | fs_info(sdp, "no fsname found\n"); | ||
1087 | error = -EINVAL; | ||
1088 | goto fail_free; | ||
1089 | } | ||
1090 | memset(cluster, 0, sizeof(cluster)); | ||
1091 | memcpy(cluster, table, strlen(table) - strlen(fsname)); | ||
1092 | fsname++; | ||
1093 | |||
1094 | flags = DLM_LSFL_FS | DLM_LSFL_NEWEXCL; | ||
1095 | if (ls->ls_nodir) | ||
1096 | flags |= DLM_LSFL_NODIR; | ||
1097 | |||
1098 | /* | ||
1099 | * create/join lockspace | ||
1100 | */ | ||
1101 | |||
1102 | error = dlm_new_lockspace(fsname, cluster, flags, GDLM_LVB_SIZE, | ||
1103 | &gdlm_lockspace_ops, sdp, &ops_result, | ||
1104 | &ls->ls_dlm); | ||
1105 | if (error) { | ||
1106 | fs_err(sdp, "dlm_new_lockspace error %d\n", error); | ||
1107 | goto fail_free; | ||
1108 | } | ||
1109 | |||
1110 | if (ops_result < 0) { | ||
1111 | /* | ||
1112 | * dlm does not support ops callbacks, | ||
1113 | * old dlm_controld/gfs_controld are used, try without ops. | ||
1114 | */ | ||
1115 | fs_info(sdp, "dlm lockspace ops not used\n"); | ||
1116 | free_recover_size(ls); | ||
1117 | set_bit(DFL_NO_DLM_OPS, &ls->ls_recover_flags); | ||
1118 | return 0; | ||
1119 | } | ||
1120 | |||
1121 | if (!test_bit(SDF_NOJOURNALID, &sdp->sd_flags)) { | ||
1122 | fs_err(sdp, "dlm lockspace ops disallow jid preset\n"); | ||
1123 | error = -EINVAL; | ||
1124 | goto fail_release; | ||
1125 | } | ||
1126 | |||
1127 | /* | ||
1128 | * control_mount() uses control_lock to determine first mounter, | ||
1129 | * and for later mounts, waits for any recoveries to be cleared. | ||
1130 | */ | ||
1131 | |||
1132 | error = control_mount(sdp); | ||
1133 | if (error) { | ||
1134 | fs_err(sdp, "mount control error %d\n", error); | ||
1135 | goto fail_release; | ||
1136 | } | ||
1137 | |||
1138 | ls->ls_first = !!test_bit(DFL_FIRST_MOUNT, &ls->ls_recover_flags); | ||
1139 | clear_bit(SDF_NOJOURNALID, &sdp->sd_flags); | ||
1140 | smp_mb__after_clear_bit(); | ||
1141 | wake_up_bit(&sdp->sd_flags, SDF_NOJOURNALID); | ||
1142 | return 0; | ||
1143 | |||
1144 | fail_release: | ||
1145 | dlm_release_lockspace(ls->ls_dlm, 2); | ||
1146 | fail_free: | ||
1147 | free_recover_size(ls); | ||
1148 | fail: | ||
1149 | return error; | ||
1150 | } | ||
1151 | |||
1152 | static void gdlm_first_done(struct gfs2_sbd *sdp) | ||
1153 | { | ||
1154 | struct lm_lockstruct *ls = &sdp->sd_lockstruct; | ||
1155 | int error; | ||
1156 | |||
1157 | if (test_bit(DFL_NO_DLM_OPS, &ls->ls_recover_flags)) | ||
1158 | return; | ||
1159 | |||
1160 | error = control_first_done(sdp); | ||
1161 | if (error) | ||
1162 | fs_err(sdp, "mount first_done error %d\n", error); | ||
1163 | } | ||
1164 | |||
208 | static void gdlm_unmount(struct gfs2_sbd *sdp) | 1165 | static void gdlm_unmount(struct gfs2_sbd *sdp) |
209 | { | 1166 | { |
210 | struct lm_lockstruct *ls = &sdp->sd_lockstruct; | 1167 | struct lm_lockstruct *ls = &sdp->sd_lockstruct; |
211 | 1168 | ||
1169 | if (test_bit(DFL_NO_DLM_OPS, &ls->ls_recover_flags)) | ||
1170 | goto release; | ||
1171 | |||
1172 | /* wait for gfs2_control_wq to be done with this mount */ | ||
1173 | |||
1174 | spin_lock(&ls->ls_recover_spin); | ||
1175 | set_bit(DFL_UNMOUNT, &ls->ls_recover_flags); | ||
1176 | spin_unlock(&ls->ls_recover_spin); | ||
1177 | flush_delayed_work_sync(&sdp->sd_control_work); | ||
1178 | |||
1179 | /* mounted_lock and control_lock will be purged in dlm recovery */ | ||
1180 | release: | ||
212 | if (ls->ls_dlm) { | 1181 | if (ls->ls_dlm) { |
213 | dlm_release_lockspace(ls->ls_dlm, 2); | 1182 | dlm_release_lockspace(ls->ls_dlm, 2); |
214 | ls->ls_dlm = NULL; | 1183 | ls->ls_dlm = NULL; |
215 | } | 1184 | } |
1185 | |||
1186 | free_recover_size(ls); | ||
216 | } | 1187 | } |
217 | 1188 | ||
218 | static const match_table_t dlm_tokens = { | 1189 | static const match_table_t dlm_tokens = { |
@@ -226,6 +1197,8 @@ static const match_table_t dlm_tokens = { | |||
226 | const struct lm_lockops gfs2_dlm_ops = { | 1197 | const struct lm_lockops gfs2_dlm_ops = { |
227 | .lm_proto_name = "lock_dlm", | 1198 | .lm_proto_name = "lock_dlm", |
228 | .lm_mount = gdlm_mount, | 1199 | .lm_mount = gdlm_mount, |
1200 | .lm_first_done = gdlm_first_done, | ||
1201 | .lm_recovery_result = gdlm_recovery_result, | ||
229 | .lm_unmount = gdlm_unmount, | 1202 | .lm_unmount = gdlm_unmount, |
230 | .lm_put_lock = gdlm_put_lock, | 1203 | .lm_put_lock = gdlm_put_lock, |
231 | .lm_lock = gdlm_lock, | 1204 | .lm_lock = gdlm_lock, |
diff --git a/fs/gfs2/main.c b/fs/gfs2/main.c index c150298e2d8e..a8d9bcd0e19c 100644 --- a/fs/gfs2/main.c +++ b/fs/gfs2/main.c | |||
@@ -28,6 +28,8 @@ | |||
28 | #include "recovery.h" | 28 | #include "recovery.h" |
29 | #include "dir.h" | 29 | #include "dir.h" |
30 | 30 | ||
31 | struct workqueue_struct *gfs2_control_wq; | ||
32 | |||
31 | static struct shrinker qd_shrinker = { | 33 | static struct shrinker qd_shrinker = { |
32 | .shrink = gfs2_shrink_qd_memory, | 34 | .shrink = gfs2_shrink_qd_memory, |
33 | .seeks = DEFAULT_SEEKS, | 35 | .seeks = DEFAULT_SEEKS, |
@@ -146,12 +148,19 @@ static int __init init_gfs2_fs(void) | |||
146 | if (!gfs_recovery_wq) | 148 | if (!gfs_recovery_wq) |
147 | goto fail_wq; | 149 | goto fail_wq; |
148 | 150 | ||
151 | gfs2_control_wq = alloc_workqueue("gfs2_control", | ||
152 | WQ_NON_REENTRANT | WQ_UNBOUND | WQ_FREEZABLE, 0); | ||
153 | if (!gfs2_control_wq) | ||
154 | goto fail_control; | ||
155 | |||
149 | gfs2_register_debugfs(); | 156 | gfs2_register_debugfs(); |
150 | 157 | ||
151 | printk("GFS2 installed\n"); | 158 | printk("GFS2 installed\n"); |
152 | 159 | ||
153 | return 0; | 160 | return 0; |
154 | 161 | ||
162 | fail_control: | ||
163 | destroy_workqueue(gfs_recovery_wq); | ||
155 | fail_wq: | 164 | fail_wq: |
156 | unregister_filesystem(&gfs2meta_fs_type); | 165 | unregister_filesystem(&gfs2meta_fs_type); |
157 | fail_unregister: | 166 | fail_unregister: |
@@ -195,6 +204,7 @@ static void __exit exit_gfs2_fs(void) | |||
195 | unregister_filesystem(&gfs2_fs_type); | 204 | unregister_filesystem(&gfs2_fs_type); |
196 | unregister_filesystem(&gfs2meta_fs_type); | 205 | unregister_filesystem(&gfs2meta_fs_type); |
197 | destroy_workqueue(gfs_recovery_wq); | 206 | destroy_workqueue(gfs_recovery_wq); |
207 | destroy_workqueue(gfs2_control_wq); | ||
198 | 208 | ||
199 | rcu_barrier(); | 209 | rcu_barrier(); |
200 | 210 | ||
diff --git a/fs/gfs2/ops_fstype.c b/fs/gfs2/ops_fstype.c index fe72e79e6ff9..6aacf3f230a2 100644 --- a/fs/gfs2/ops_fstype.c +++ b/fs/gfs2/ops_fstype.c | |||
@@ -562,8 +562,12 @@ static void gfs2_others_may_mount(struct gfs2_sbd *sdp) | |||
562 | { | 562 | { |
563 | char *message = "FIRSTMOUNT=Done"; | 563 | char *message = "FIRSTMOUNT=Done"; |
564 | char *envp[] = { message, NULL }; | 564 | char *envp[] = { message, NULL }; |
565 | struct lm_lockstruct *ls = &sdp->sd_lockstruct; | 565 | |
566 | ls->ls_first_done = 1; | 566 | fs_info(sdp, "first mount done, others may mount\n"); |
567 | |||
568 | if (sdp->sd_lockstruct.ls_ops->lm_first_done) | ||
569 | sdp->sd_lockstruct.ls_ops->lm_first_done(sdp); | ||
570 | |||
567 | kobject_uevent_env(&sdp->sd_kobj, KOBJ_CHANGE, envp); | 571 | kobject_uevent_env(&sdp->sd_kobj, KOBJ_CHANGE, envp); |
568 | } | 572 | } |
569 | 573 | ||
@@ -944,7 +948,6 @@ static int gfs2_lm_mount(struct gfs2_sbd *sdp, int silent) | |||
944 | struct gfs2_args *args = &sdp->sd_args; | 948 | struct gfs2_args *args = &sdp->sd_args; |
945 | const char *proto = sdp->sd_proto_name; | 949 | const char *proto = sdp->sd_proto_name; |
946 | const char *table = sdp->sd_table_name; | 950 | const char *table = sdp->sd_table_name; |
947 | const char *fsname; | ||
948 | char *o, *options; | 951 | char *o, *options; |
949 | int ret; | 952 | int ret; |
950 | 953 | ||
@@ -1004,21 +1007,12 @@ hostdata_error: | |||
1004 | } | 1007 | } |
1005 | } | 1008 | } |
1006 | 1009 | ||
1007 | if (sdp->sd_args.ar_spectator) | ||
1008 | snprintf(sdp->sd_fsname, GFS2_FSNAME_LEN, "%s.s", table); | ||
1009 | else | ||
1010 | snprintf(sdp->sd_fsname, GFS2_FSNAME_LEN, "%s.%u", table, | ||
1011 | sdp->sd_lockstruct.ls_jid); | ||
1012 | |||
1013 | fsname = strchr(table, ':'); | ||
1014 | if (fsname) | ||
1015 | fsname++; | ||
1016 | if (lm->lm_mount == NULL) { | 1010 | if (lm->lm_mount == NULL) { |
1017 | fs_info(sdp, "Now mounting FS...\n"); | 1011 | fs_info(sdp, "Now mounting FS...\n"); |
1018 | complete_all(&sdp->sd_locking_init); | 1012 | complete_all(&sdp->sd_locking_init); |
1019 | return 0; | 1013 | return 0; |
1020 | } | 1014 | } |
1021 | ret = lm->lm_mount(sdp, fsname); | 1015 | ret = lm->lm_mount(sdp, table); |
1022 | if (ret == 0) | 1016 | if (ret == 0) |
1023 | fs_info(sdp, "Joined cluster. Now mounting FS...\n"); | 1017 | fs_info(sdp, "Joined cluster. Now mounting FS...\n"); |
1024 | complete_all(&sdp->sd_locking_init); | 1018 | complete_all(&sdp->sd_locking_init); |
@@ -1084,7 +1078,7 @@ static int fill_super(struct super_block *sb, struct gfs2_args *args, int silent | |||
1084 | 1078 | ||
1085 | if (sdp->sd_args.ar_spectator) { | 1079 | if (sdp->sd_args.ar_spectator) { |
1086 | sb->s_flags |= MS_RDONLY; | 1080 | sb->s_flags |= MS_RDONLY; |
1087 | set_bit(SDF_NORECOVERY, &sdp->sd_flags); | 1081 | set_bit(SDF_RORECOVERY, &sdp->sd_flags); |
1088 | } | 1082 | } |
1089 | if (sdp->sd_args.ar_posix_acl) | 1083 | if (sdp->sd_args.ar_posix_acl) |
1090 | sb->s_flags |= MS_POSIXACL; | 1084 | sb->s_flags |= MS_POSIXACL; |
@@ -1124,6 +1118,8 @@ static int fill_super(struct super_block *sb, struct gfs2_args *args, int silent | |||
1124 | if (error) | 1118 | if (error) |
1125 | goto fail; | 1119 | goto fail; |
1126 | 1120 | ||
1121 | snprintf(sdp->sd_fsname, GFS2_FSNAME_LEN, "%s", sdp->sd_table_name); | ||
1122 | |||
1127 | gfs2_create_debugfs_file(sdp); | 1123 | gfs2_create_debugfs_file(sdp); |
1128 | 1124 | ||
1129 | error = gfs2_sys_fs_add(sdp); | 1125 | error = gfs2_sys_fs_add(sdp); |
@@ -1160,6 +1156,13 @@ static int fill_super(struct super_block *sb, struct gfs2_args *args, int silent | |||
1160 | goto fail_sb; | 1156 | goto fail_sb; |
1161 | } | 1157 | } |
1162 | 1158 | ||
1159 | if (sdp->sd_args.ar_spectator) | ||
1160 | snprintf(sdp->sd_fsname, GFS2_FSNAME_LEN, "%s.s", | ||
1161 | sdp->sd_table_name); | ||
1162 | else | ||
1163 | snprintf(sdp->sd_fsname, GFS2_FSNAME_LEN, "%s.%u", | ||
1164 | sdp->sd_table_name, sdp->sd_lockstruct.ls_jid); | ||
1165 | |||
1163 | error = init_inodes(sdp, DO); | 1166 | error = init_inodes(sdp, DO); |
1164 | if (error) | 1167 | if (error) |
1165 | goto fail_sb; | 1168 | goto fail_sb; |
diff --git a/fs/gfs2/recovery.c b/fs/gfs2/recovery.c index f2a02edcac8f..963b2d75200c 100644 --- a/fs/gfs2/recovery.c +++ b/fs/gfs2/recovery.c | |||
@@ -436,12 +436,16 @@ static void gfs2_recovery_done(struct gfs2_sbd *sdp, unsigned int jid, | |||
436 | char env_status[20]; | 436 | char env_status[20]; |
437 | char *envp[] = { env_jid, env_status, NULL }; | 437 | char *envp[] = { env_jid, env_status, NULL }; |
438 | struct lm_lockstruct *ls = &sdp->sd_lockstruct; | 438 | struct lm_lockstruct *ls = &sdp->sd_lockstruct; |
439 | |||
439 | ls->ls_recover_jid_done = jid; | 440 | ls->ls_recover_jid_done = jid; |
440 | ls->ls_recover_jid_status = message; | 441 | ls->ls_recover_jid_status = message; |
441 | sprintf(env_jid, "JID=%d", jid); | 442 | sprintf(env_jid, "JID=%d", jid); |
442 | sprintf(env_status, "RECOVERY=%s", | 443 | sprintf(env_status, "RECOVERY=%s", |
443 | message == LM_RD_SUCCESS ? "Done" : "Failed"); | 444 | message == LM_RD_SUCCESS ? "Done" : "Failed"); |
444 | kobject_uevent_env(&sdp->sd_kobj, KOBJ_CHANGE, envp); | 445 | kobject_uevent_env(&sdp->sd_kobj, KOBJ_CHANGE, envp); |
446 | |||
447 | if (sdp->sd_lockstruct.ls_ops->lm_recovery_result) | ||
448 | sdp->sd_lockstruct.ls_ops->lm_recovery_result(sdp, jid, message); | ||
445 | } | 449 | } |
446 | 450 | ||
447 | void gfs2_recover_func(struct work_struct *work) | 451 | void gfs2_recover_func(struct work_struct *work) |
@@ -512,7 +516,9 @@ void gfs2_recover_func(struct work_struct *work) | |||
512 | if (error) | 516 | if (error) |
513 | goto fail_gunlock_ji; | 517 | goto fail_gunlock_ji; |
514 | 518 | ||
515 | if (test_bit(SDF_JOURNAL_CHECKED, &sdp->sd_flags)) { | 519 | if (test_bit(SDF_RORECOVERY, &sdp->sd_flags)) { |
520 | ro = 1; | ||
521 | } else if (test_bit(SDF_JOURNAL_CHECKED, &sdp->sd_flags)) { | ||
516 | if (!test_bit(SDF_JOURNAL_LIVE, &sdp->sd_flags)) | 522 | if (!test_bit(SDF_JOURNAL_LIVE, &sdp->sd_flags)) |
517 | ro = 1; | 523 | ro = 1; |
518 | } else { | 524 | } else { |
@@ -577,6 +583,7 @@ fail_gunlock_j: | |||
577 | 583 | ||
578 | fs_info(sdp, "jid=%u: %s\n", jd->jd_jid, (error) ? "Failed" : "Done"); | 584 | fs_info(sdp, "jid=%u: %s\n", jd->jd_jid, (error) ? "Failed" : "Done"); |
579 | fail: | 585 | fail: |
586 | jd->jd_recover_error = error; | ||
580 | gfs2_recovery_done(sdp, jd->jd_jid, LM_RD_GAVEUP); | 587 | gfs2_recovery_done(sdp, jd->jd_jid, LM_RD_GAVEUP); |
581 | done: | 588 | done: |
582 | clear_bit(JDF_RECOVERY, &jd->jd_flags); | 589 | clear_bit(JDF_RECOVERY, &jd->jd_flags); |
@@ -605,6 +612,6 @@ int gfs2_recover_journal(struct gfs2_jdesc *jd, bool wait) | |||
605 | wait_on_bit(&jd->jd_flags, JDF_RECOVERY, gfs2_recovery_wait, | 612 | wait_on_bit(&jd->jd_flags, JDF_RECOVERY, gfs2_recovery_wait, |
606 | TASK_UNINTERRUPTIBLE); | 613 | TASK_UNINTERRUPTIBLE); |
607 | 614 | ||
608 | return 0; | 615 | return wait ? jd->jd_recover_error : 0; |
609 | } | 616 | } |
610 | 617 | ||
diff --git a/fs/gfs2/rgrp.c b/fs/gfs2/rgrp.c index 22234627f684..981bfa32121a 100644 --- a/fs/gfs2/rgrp.c +++ b/fs/gfs2/rgrp.c | |||
@@ -1108,9 +1108,9 @@ void gfs2_inplace_release(struct gfs2_inode *ip) | |||
1108 | { | 1108 | { |
1109 | struct gfs2_blkreserv *rs = ip->i_res; | 1109 | struct gfs2_blkreserv *rs = ip->i_res; |
1110 | 1110 | ||
1111 | gfs2_blkrsv_put(ip); | ||
1112 | if (rs->rs_rgd_gh.gh_gl) | 1111 | if (rs->rs_rgd_gh.gh_gl) |
1113 | gfs2_glock_dq_uninit(&rs->rs_rgd_gh); | 1112 | gfs2_glock_dq_uninit(&rs->rs_rgd_gh); |
1113 | gfs2_blkrsv_put(ip); | ||
1114 | } | 1114 | } |
1115 | 1115 | ||
1116 | /** | 1116 | /** |
diff --git a/fs/gfs2/sys.c b/fs/gfs2/sys.c index 443cabcfcd23..d33172c291ba 100644 --- a/fs/gfs2/sys.c +++ b/fs/gfs2/sys.c | |||
@@ -298,7 +298,7 @@ static ssize_t block_show(struct gfs2_sbd *sdp, char *buf) | |||
298 | ssize_t ret; | 298 | ssize_t ret; |
299 | int val = 0; | 299 | int val = 0; |
300 | 300 | ||
301 | if (test_bit(DFL_BLOCK_LOCKS, &ls->ls_flags)) | 301 | if (test_bit(DFL_BLOCK_LOCKS, &ls->ls_recover_flags)) |
302 | val = 1; | 302 | val = 1; |
303 | ret = sprintf(buf, "%d\n", val); | 303 | ret = sprintf(buf, "%d\n", val); |
304 | return ret; | 304 | return ret; |
@@ -313,9 +313,9 @@ static ssize_t block_store(struct gfs2_sbd *sdp, const char *buf, size_t len) | |||
313 | val = simple_strtol(buf, NULL, 0); | 313 | val = simple_strtol(buf, NULL, 0); |
314 | 314 | ||
315 | if (val == 1) | 315 | if (val == 1) |
316 | set_bit(DFL_BLOCK_LOCKS, &ls->ls_flags); | 316 | set_bit(DFL_BLOCK_LOCKS, &ls->ls_recover_flags); |
317 | else if (val == 0) { | 317 | else if (val == 0) { |
318 | clear_bit(DFL_BLOCK_LOCKS, &ls->ls_flags); | 318 | clear_bit(DFL_BLOCK_LOCKS, &ls->ls_recover_flags); |
319 | smp_mb__after_clear_bit(); | 319 | smp_mb__after_clear_bit(); |
320 | gfs2_glock_thaw(sdp); | 320 | gfs2_glock_thaw(sdp); |
321 | } else { | 321 | } else { |
@@ -350,8 +350,8 @@ static ssize_t lkfirst_store(struct gfs2_sbd *sdp, const char *buf, size_t len) | |||
350 | goto out; | 350 | goto out; |
351 | if (sdp->sd_lockstruct.ls_ops->lm_mount == NULL) | 351 | if (sdp->sd_lockstruct.ls_ops->lm_mount == NULL) |
352 | goto out; | 352 | goto out; |
353 | sdp->sd_lockstruct.ls_first = first; | 353 | sdp->sd_lockstruct.ls_first = first; |
354 | rv = 0; | 354 | rv = 0; |
355 | out: | 355 | out: |
356 | spin_unlock(&sdp->sd_jindex_spin); | 356 | spin_unlock(&sdp->sd_jindex_spin); |
357 | return rv ? rv : len; | 357 | return rv ? rv : len; |
@@ -360,19 +360,14 @@ out: | |||
360 | static ssize_t first_done_show(struct gfs2_sbd *sdp, char *buf) | 360 | static ssize_t first_done_show(struct gfs2_sbd *sdp, char *buf) |
361 | { | 361 | { |
362 | struct lm_lockstruct *ls = &sdp->sd_lockstruct; | 362 | struct lm_lockstruct *ls = &sdp->sd_lockstruct; |
363 | return sprintf(buf, "%d\n", ls->ls_first_done); | 363 | return sprintf(buf, "%d\n", !!test_bit(DFL_FIRST_MOUNT_DONE, &ls->ls_recover_flags)); |
364 | } | 364 | } |
365 | 365 | ||
366 | static ssize_t recover_store(struct gfs2_sbd *sdp, const char *buf, size_t len) | 366 | int gfs2_recover_set(struct gfs2_sbd *sdp, unsigned jid) |
367 | { | 367 | { |
368 | unsigned jid; | ||
369 | struct gfs2_jdesc *jd; | 368 | struct gfs2_jdesc *jd; |
370 | int rv; | 369 | int rv; |
371 | 370 | ||
372 | rv = sscanf(buf, "%u", &jid); | ||
373 | if (rv != 1) | ||
374 | return -EINVAL; | ||
375 | |||
376 | rv = -ESHUTDOWN; | 371 | rv = -ESHUTDOWN; |
377 | spin_lock(&sdp->sd_jindex_spin); | 372 | spin_lock(&sdp->sd_jindex_spin); |
378 | if (test_bit(SDF_NORECOVERY, &sdp->sd_flags)) | 373 | if (test_bit(SDF_NORECOVERY, &sdp->sd_flags)) |
@@ -389,6 +384,20 @@ static ssize_t recover_store(struct gfs2_sbd *sdp, const char *buf, size_t len) | |||
389 | } | 384 | } |
390 | out: | 385 | out: |
391 | spin_unlock(&sdp->sd_jindex_spin); | 386 | spin_unlock(&sdp->sd_jindex_spin); |
387 | return rv; | ||
388 | } | ||
389 | |||
390 | static ssize_t recover_store(struct gfs2_sbd *sdp, const char *buf, size_t len) | ||
391 | { | ||
392 | unsigned jid; | ||
393 | int rv; | ||
394 | |||
395 | rv = sscanf(buf, "%u", &jid); | ||
396 | if (rv != 1) | ||
397 | return -EINVAL; | ||
398 | |||
399 | rv = gfs2_recover_set(sdp, jid); | ||
400 | |||
392 | return rv ? rv : len; | 401 | return rv ? rv : len; |
393 | } | 402 | } |
394 | 403 | ||
diff --git a/fs/gfs2/sys.h b/fs/gfs2/sys.h index e94560e836d7..79182d6ad6ac 100644 --- a/fs/gfs2/sys.h +++ b/fs/gfs2/sys.h | |||
@@ -19,5 +19,7 @@ void gfs2_sys_fs_del(struct gfs2_sbd *sdp); | |||
19 | int gfs2_sys_init(void); | 19 | int gfs2_sys_init(void); |
20 | void gfs2_sys_uninit(void); | 20 | void gfs2_sys_uninit(void); |
21 | 21 | ||
22 | int gfs2_recover_set(struct gfs2_sbd *sdp, unsigned jid); | ||
23 | |||
22 | #endif /* __SYS_DOT_H__ */ | 24 | #endif /* __SYS_DOT_H__ */ |
23 | 25 | ||
diff --git a/fs/hfsplus/super.c b/fs/hfsplus/super.c index edf0a801446b..427682ca9e48 100644 --- a/fs/hfsplus/super.c +++ b/fs/hfsplus/super.c | |||
@@ -499,9 +499,16 @@ static int hfsplus_fill_super(struct super_block *sb, void *data, int silent) | |||
499 | if (!sbi->hidden_dir) { | 499 | if (!sbi->hidden_dir) { |
500 | mutex_lock(&sbi->vh_mutex); | 500 | mutex_lock(&sbi->vh_mutex); |
501 | sbi->hidden_dir = hfsplus_new_inode(sb, S_IFDIR); | 501 | sbi->hidden_dir = hfsplus_new_inode(sb, S_IFDIR); |
502 | hfsplus_create_cat(sbi->hidden_dir->i_ino, root, &str, | 502 | if (!sbi->hidden_dir) { |
503 | sbi->hidden_dir); | 503 | mutex_unlock(&sbi->vh_mutex); |
504 | err = -ENOMEM; | ||
505 | goto out_put_root; | ||
506 | } | ||
507 | err = hfsplus_create_cat(sbi->hidden_dir->i_ino, root, | ||
508 | &str, sbi->hidden_dir); | ||
504 | mutex_unlock(&sbi->vh_mutex); | 509 | mutex_unlock(&sbi->vh_mutex); |
510 | if (err) | ||
511 | goto out_put_hidden_dir; | ||
505 | 512 | ||
506 | hfsplus_mark_inode_dirty(sbi->hidden_dir, | 513 | hfsplus_mark_inode_dirty(sbi->hidden_dir, |
507 | HFSPLUS_I_CAT_DIRTY); | 514 | HFSPLUS_I_CAT_DIRTY); |
diff --git a/fs/hugetlbfs/inode.c b/fs/hugetlbfs/inode.c index e425ad9d0490..1e85a7ac0217 100644 --- a/fs/hugetlbfs/inode.c +++ b/fs/hugetlbfs/inode.c | |||
@@ -583,7 +583,8 @@ static int hugetlbfs_set_page_dirty(struct page *page) | |||
583 | } | 583 | } |
584 | 584 | ||
585 | static int hugetlbfs_migrate_page(struct address_space *mapping, | 585 | static int hugetlbfs_migrate_page(struct address_space *mapping, |
586 | struct page *newpage, struct page *page) | 586 | struct page *newpage, struct page *page, |
587 | enum migrate_mode mode) | ||
587 | { | 588 | { |
588 | int rc; | 589 | int rc; |
589 | 590 | ||
diff --git a/fs/inode.c b/fs/inode.c index 87535753ab04..fb10d86ffad7 100644 --- a/fs/inode.c +++ b/fs/inode.c | |||
@@ -322,9 +322,6 @@ EXPORT_SYMBOL(clear_nlink); | |||
322 | void set_nlink(struct inode *inode, unsigned int nlink) | 322 | void set_nlink(struct inode *inode, unsigned int nlink) |
323 | { | 323 | { |
324 | if (!nlink) { | 324 | if (!nlink) { |
325 | printk_ratelimited(KERN_INFO | ||
326 | "set_nlink() clearing i_nlink on %s inode %li\n", | ||
327 | inode->i_sb->s_type->name, inode->i_ino); | ||
328 | clear_nlink(inode); | 325 | clear_nlink(inode); |
329 | } else { | 326 | } else { |
330 | /* Yes, some filesystems do change nlink from zero to one */ | 327 | /* Yes, some filesystems do change nlink from zero to one */ |
@@ -776,6 +773,8 @@ void prune_icache_sb(struct super_block *sb, int nr_to_scan) | |||
776 | else | 773 | else |
777 | __count_vm_events(PGINODESTEAL, reap); | 774 | __count_vm_events(PGINODESTEAL, reap); |
778 | spin_unlock(&sb->s_inode_lru_lock); | 775 | spin_unlock(&sb->s_inode_lru_lock); |
776 | if (current->reclaim_state) | ||
777 | current->reclaim_state->reclaimed_slab += reap; | ||
779 | 778 | ||
780 | dispose_list(&freeable); | 779 | dispose_list(&freeable); |
781 | } | 780 | } |
diff --git a/fs/ioprio.c b/fs/ioprio.c index f79dab83e17b..f84b380d65e5 100644 --- a/fs/ioprio.c +++ b/fs/ioprio.c | |||
@@ -48,28 +48,12 @@ int set_task_ioprio(struct task_struct *task, int ioprio) | |||
48 | if (err) | 48 | if (err) |
49 | return err; | 49 | return err; |
50 | 50 | ||
51 | task_lock(task); | 51 | ioc = get_task_io_context(task, GFP_ATOMIC, NUMA_NO_NODE); |
52 | do { | 52 | if (ioc) { |
53 | ioc = task->io_context; | 53 | ioc_ioprio_changed(ioc, ioprio); |
54 | /* see wmb() in current_io_context() */ | 54 | put_io_context(ioc, NULL); |
55 | smp_read_barrier_depends(); | ||
56 | if (ioc) | ||
57 | break; | ||
58 | |||
59 | ioc = alloc_io_context(GFP_ATOMIC, -1); | ||
60 | if (!ioc) { | ||
61 | err = -ENOMEM; | ||
62 | break; | ||
63 | } | ||
64 | task->io_context = ioc; | ||
65 | } while (1); | ||
66 | |||
67 | if (!err) { | ||
68 | ioc->ioprio = ioprio; | ||
69 | ioc->ioprio_changed = 1; | ||
70 | } | 55 | } |
71 | 56 | ||
72 | task_unlock(task); | ||
73 | return err; | 57 | return err; |
74 | } | 58 | } |
75 | EXPORT_SYMBOL_GPL(set_task_ioprio); | 59 | EXPORT_SYMBOL_GPL(set_task_ioprio); |
diff --git a/fs/isofs/inode.c b/fs/isofs/inode.c index 7b99f5f460be..bd62c76fb5df 100644 --- a/fs/isofs/inode.c +++ b/fs/isofs/inode.c | |||
@@ -948,8 +948,11 @@ root_found: | |||
948 | 948 | ||
949 | /* get the root dentry */ | 949 | /* get the root dentry */ |
950 | s->s_root = d_alloc_root(inode); | 950 | s->s_root = d_alloc_root(inode); |
951 | if (!(s->s_root)) | 951 | if (!(s->s_root)) { |
952 | goto out_no_root; | 952 | iput(inode); |
953 | error = -ENOMEM; | ||
954 | goto out_no_inode; | ||
955 | } | ||
953 | 956 | ||
954 | kfree(opt.iocharset); | 957 | kfree(opt.iocharset); |
955 | 958 | ||
diff --git a/fs/jbd/checkpoint.c b/fs/jbd/checkpoint.c index 5d1a00a5041b..05f0754f2b46 100644 --- a/fs/jbd/checkpoint.c +++ b/fs/jbd/checkpoint.c | |||
@@ -453,8 +453,6 @@ out: | |||
453 | * | 453 | * |
454 | * Return <0 on error, 0 on success, 1 if there was nothing to clean up. | 454 | * Return <0 on error, 0 on success, 1 if there was nothing to clean up. |
455 | * | 455 | * |
456 | * Called with the journal lock held. | ||
457 | * | ||
458 | * This is the only part of the journaling code which really needs to be | 456 | * This is the only part of the journaling code which really needs to be |
459 | * aware of transaction aborts. Checkpointing involves writing to the | 457 | * aware of transaction aborts. Checkpointing involves writing to the |
460 | * main filesystem area rather than to the journal, so it can proceed | 458 | * main filesystem area rather than to the journal, so it can proceed |
@@ -472,13 +470,14 @@ int cleanup_journal_tail(journal_t *journal) | |||
472 | if (is_journal_aborted(journal)) | 470 | if (is_journal_aborted(journal)) |
473 | return 1; | 471 | return 1; |
474 | 472 | ||
475 | /* OK, work out the oldest transaction remaining in the log, and | 473 | /* |
474 | * OK, work out the oldest transaction remaining in the log, and | ||
476 | * the log block it starts at. | 475 | * the log block it starts at. |
477 | * | 476 | * |
478 | * If the log is now empty, we need to work out which is the | 477 | * If the log is now empty, we need to work out which is the |
479 | * next transaction ID we will write, and where it will | 478 | * next transaction ID we will write, and where it will |
480 | * start. */ | 479 | * start. |
481 | 480 | */ | |
482 | spin_lock(&journal->j_state_lock); | 481 | spin_lock(&journal->j_state_lock); |
483 | spin_lock(&journal->j_list_lock); | 482 | spin_lock(&journal->j_list_lock); |
484 | transaction = journal->j_checkpoint_transactions; | 483 | transaction = journal->j_checkpoint_transactions; |
@@ -504,7 +503,25 @@ int cleanup_journal_tail(journal_t *journal) | |||
504 | spin_unlock(&journal->j_state_lock); | 503 | spin_unlock(&journal->j_state_lock); |
505 | return 1; | 504 | return 1; |
506 | } | 505 | } |
506 | spin_unlock(&journal->j_state_lock); | ||
507 | |||
508 | /* | ||
509 | * We need to make sure that any blocks that were recently written out | ||
510 | * --- perhaps by log_do_checkpoint() --- are flushed out before we | ||
511 | * drop the transactions from the journal. It's unlikely this will be | ||
512 | * necessary, especially with an appropriately sized journal, but we | ||
513 | * need this to guarantee correctness. Fortunately | ||
514 | * cleanup_journal_tail() doesn't get called all that often. | ||
515 | */ | ||
516 | if (journal->j_flags & JFS_BARRIER) | ||
517 | blkdev_issue_flush(journal->j_fs_dev, GFP_KERNEL, NULL); | ||
507 | 518 | ||
519 | spin_lock(&journal->j_state_lock); | ||
520 | if (!tid_gt(first_tid, journal->j_tail_sequence)) { | ||
521 | spin_unlock(&journal->j_state_lock); | ||
522 | /* Someone else cleaned up journal so return 0 */ | ||
523 | return 0; | ||
524 | } | ||
508 | /* OK, update the superblock to recover the freed space. | 525 | /* OK, update the superblock to recover the freed space. |
509 | * Physical blocks come first: have we wrapped beyond the end of | 526 | * Physical blocks come first: have we wrapped beyond the end of |
510 | * the log? */ | 527 | * the log? */ |
diff --git a/fs/jbd/commit.c b/fs/jbd/commit.c index 8799207df058..f2b9a571f4cf 100644 --- a/fs/jbd/commit.c +++ b/fs/jbd/commit.c | |||
@@ -392,6 +392,12 @@ void journal_commit_transaction(journal_t *journal) | |||
392 | jbd_debug (3, "JBD: commit phase 1\n"); | 392 | jbd_debug (3, "JBD: commit phase 1\n"); |
393 | 393 | ||
394 | /* | 394 | /* |
395 | * Clear revoked flag to reflect there is no revoked buffers | ||
396 | * in the next transaction which is going to be started. | ||
397 | */ | ||
398 | journal_clear_buffer_revoked_flags(journal); | ||
399 | |||
400 | /* | ||
395 | * Switch to a new revoke table. | 401 | * Switch to a new revoke table. |
396 | */ | 402 | */ |
397 | journal_switch_revoke_table(journal); | 403 | journal_switch_revoke_table(journal); |
diff --git a/fs/jbd/journal.c b/fs/jbd/journal.c index a96cff0c5f1d..59c09f9541b5 100644 --- a/fs/jbd/journal.c +++ b/fs/jbd/journal.c | |||
@@ -721,7 +721,6 @@ static journal_t * journal_init_common (void) | |||
721 | init_waitqueue_head(&journal->j_wait_checkpoint); | 721 | init_waitqueue_head(&journal->j_wait_checkpoint); |
722 | init_waitqueue_head(&journal->j_wait_commit); | 722 | init_waitqueue_head(&journal->j_wait_commit); |
723 | init_waitqueue_head(&journal->j_wait_updates); | 723 | init_waitqueue_head(&journal->j_wait_updates); |
724 | mutex_init(&journal->j_barrier); | ||
725 | mutex_init(&journal->j_checkpoint_mutex); | 724 | mutex_init(&journal->j_checkpoint_mutex); |
726 | spin_lock_init(&journal->j_revoke_lock); | 725 | spin_lock_init(&journal->j_revoke_lock); |
727 | spin_lock_init(&journal->j_list_lock); | 726 | spin_lock_init(&journal->j_list_lock); |
diff --git a/fs/jbd/recovery.c b/fs/jbd/recovery.c index 5b43e96788e6..008bf062fd26 100644 --- a/fs/jbd/recovery.c +++ b/fs/jbd/recovery.c | |||
@@ -20,6 +20,7 @@ | |||
20 | #include <linux/fs.h> | 20 | #include <linux/fs.h> |
21 | #include <linux/jbd.h> | 21 | #include <linux/jbd.h> |
22 | #include <linux/errno.h> | 22 | #include <linux/errno.h> |
23 | #include <linux/blkdev.h> | ||
23 | #endif | 24 | #endif |
24 | 25 | ||
25 | /* | 26 | /* |
@@ -263,6 +264,9 @@ int journal_recover(journal_t *journal) | |||
263 | err2 = sync_blockdev(journal->j_fs_dev); | 264 | err2 = sync_blockdev(journal->j_fs_dev); |
264 | if (!err) | 265 | if (!err) |
265 | err = err2; | 266 | err = err2; |
267 | /* Flush disk caches to get replayed data on the permanent storage */ | ||
268 | if (journal->j_flags & JFS_BARRIER) | ||
269 | blkdev_issue_flush(journal->j_fs_dev, GFP_KERNEL, NULL); | ||
266 | 270 | ||
267 | return err; | 271 | return err; |
268 | } | 272 | } |
diff --git a/fs/jbd/revoke.c b/fs/jbd/revoke.c index 305a90763154..25c713e7071c 100644 --- a/fs/jbd/revoke.c +++ b/fs/jbd/revoke.c | |||
@@ -47,6 +47,10 @@ | |||
47 | * overwriting the new data. We don't even need to clear the revoke | 47 | * overwriting the new data. We don't even need to clear the revoke |
48 | * bit here. | 48 | * bit here. |
49 | * | 49 | * |
50 | * We cache revoke status of a buffer in the current transaction in b_states | ||
51 | * bits. As the name says, revokevalid flag indicates that the cached revoke | ||
52 | * status of a buffer is valid and we can rely on the cached status. | ||
53 | * | ||
50 | * Revoke information on buffers is a tri-state value: | 54 | * Revoke information on buffers is a tri-state value: |
51 | * | 55 | * |
52 | * RevokeValid clear: no cached revoke status, need to look it up | 56 | * RevokeValid clear: no cached revoke status, need to look it up |
@@ -479,6 +483,36 @@ int journal_cancel_revoke(handle_t *handle, struct journal_head *jh) | |||
479 | return did_revoke; | 483 | return did_revoke; |
480 | } | 484 | } |
481 | 485 | ||
486 | /* | ||
487 | * journal_clear_revoked_flags clears revoked flag of buffers in | ||
488 | * revoke table to reflect there is no revoked buffer in the next | ||
489 | * transaction which is going to be started. | ||
490 | */ | ||
491 | void journal_clear_buffer_revoked_flags(journal_t *journal) | ||
492 | { | ||
493 | struct jbd_revoke_table_s *revoke = journal->j_revoke; | ||
494 | int i = 0; | ||
495 | |||
496 | for (i = 0; i < revoke->hash_size; i++) { | ||
497 | struct list_head *hash_list; | ||
498 | struct list_head *list_entry; | ||
499 | hash_list = &revoke->hash_table[i]; | ||
500 | |||
501 | list_for_each(list_entry, hash_list) { | ||
502 | struct jbd_revoke_record_s *record; | ||
503 | struct buffer_head *bh; | ||
504 | record = (struct jbd_revoke_record_s *)list_entry; | ||
505 | bh = __find_get_block(journal->j_fs_dev, | ||
506 | record->blocknr, | ||
507 | journal->j_blocksize); | ||
508 | if (bh) { | ||
509 | clear_buffer_revoked(bh); | ||
510 | __brelse(bh); | ||
511 | } | ||
512 | } | ||
513 | } | ||
514 | } | ||
515 | |||
482 | /* journal_switch_revoke table select j_revoke for next transaction | 516 | /* journal_switch_revoke table select j_revoke for next transaction |
483 | * we do not want to suspend any processing until all revokes are | 517 | * we do not want to suspend any processing until all revokes are |
484 | * written -bzzz | 518 | * written -bzzz |
diff --git a/fs/jbd/transaction.c b/fs/jbd/transaction.c index 7e59c6e66f9b..7fce94b04bc3 100644 --- a/fs/jbd/transaction.c +++ b/fs/jbd/transaction.c | |||
@@ -426,17 +426,34 @@ int journal_restart(handle_t *handle, int nblocks) | |||
426 | * void journal_lock_updates () - establish a transaction barrier. | 426 | * void journal_lock_updates () - establish a transaction barrier. |
427 | * @journal: Journal to establish a barrier on. | 427 | * @journal: Journal to establish a barrier on. |
428 | * | 428 | * |
429 | * This locks out any further updates from being started, and blocks | 429 | * This locks out any further updates from being started, and blocks until all |
430 | * until all existing updates have completed, returning only once the | 430 | * existing updates have completed, returning only once the journal is in a |
431 | * journal is in a quiescent state with no updates running. | 431 | * quiescent state with no updates running. |
432 | * | 432 | * |
433 | * The journal lock should not be held on entry. | 433 | * We do not use simple mutex for synchronization as there are syscalls which |
434 | * want to return with filesystem locked and that trips up lockdep. Also | ||
435 | * hibernate needs to lock filesystem but locked mutex then blocks hibernation. | ||
436 | * Since locking filesystem is rare operation, we use simple counter and | ||
437 | * waitqueue for locking. | ||
434 | */ | 438 | */ |
435 | void journal_lock_updates(journal_t *journal) | 439 | void journal_lock_updates(journal_t *journal) |
436 | { | 440 | { |
437 | DEFINE_WAIT(wait); | 441 | DEFINE_WAIT(wait); |
438 | 442 | ||
443 | wait: | ||
444 | /* Wait for previous locked operation to finish */ | ||
445 | wait_event(journal->j_wait_transaction_locked, | ||
446 | journal->j_barrier_count == 0); | ||
447 | |||
439 | spin_lock(&journal->j_state_lock); | 448 | spin_lock(&journal->j_state_lock); |
449 | /* | ||
450 | * Check reliably under the lock whether we are the ones winning the race | ||
451 | * and locking the journal | ||
452 | */ | ||
453 | if (journal->j_barrier_count > 0) { | ||
454 | spin_unlock(&journal->j_state_lock); | ||
455 | goto wait; | ||
456 | } | ||
440 | ++journal->j_barrier_count; | 457 | ++journal->j_barrier_count; |
441 | 458 | ||
442 | /* Wait until there are no running updates */ | 459 | /* Wait until there are no running updates */ |
@@ -460,14 +477,6 @@ void journal_lock_updates(journal_t *journal) | |||
460 | spin_lock(&journal->j_state_lock); | 477 | spin_lock(&journal->j_state_lock); |
461 | } | 478 | } |
462 | spin_unlock(&journal->j_state_lock); | 479 | spin_unlock(&journal->j_state_lock); |
463 | |||
464 | /* | ||
465 | * We have now established a barrier against other normal updates, but | ||
466 | * we also need to barrier against other journal_lock_updates() calls | ||
467 | * to make sure that we serialise special journal-locked operations | ||
468 | * too. | ||
469 | */ | ||
470 | mutex_lock(&journal->j_barrier); | ||
471 | } | 480 | } |
472 | 481 | ||
473 | /** | 482 | /** |
@@ -475,14 +484,11 @@ void journal_lock_updates(journal_t *journal) | |||
475 | * @journal: Journal to release the barrier on. | 484 | * @journal: Journal to release the barrier on. |
476 | * | 485 | * |
477 | * Release a transaction barrier obtained with journal_lock_updates(). | 486 | * Release a transaction barrier obtained with journal_lock_updates(). |
478 | * | ||
479 | * Should be called without the journal lock held. | ||
480 | */ | 487 | */ |
481 | void journal_unlock_updates (journal_t *journal) | 488 | void journal_unlock_updates (journal_t *journal) |
482 | { | 489 | { |
483 | J_ASSERT(journal->j_barrier_count != 0); | 490 | J_ASSERT(journal->j_barrier_count != 0); |
484 | 491 | ||
485 | mutex_unlock(&journal->j_barrier); | ||
486 | spin_lock(&journal->j_state_lock); | 492 | spin_lock(&journal->j_state_lock); |
487 | --journal->j_barrier_count; | 493 | --journal->j_barrier_count; |
488 | spin_unlock(&journal->j_state_lock); | 494 | spin_unlock(&journal->j_state_lock); |
diff --git a/fs/jbd2/commit.c b/fs/jbd2/commit.c index 68d704db787f..5069b8475150 100644 --- a/fs/jbd2/commit.c +++ b/fs/jbd2/commit.c | |||
@@ -430,6 +430,12 @@ void jbd2_journal_commit_transaction(journal_t *journal) | |||
430 | jbd_debug(3, "JBD2: commit phase 1\n"); | 430 | jbd_debug(3, "JBD2: commit phase 1\n"); |
431 | 431 | ||
432 | /* | 432 | /* |
433 | * Clear revoked flag to reflect there is no revoked buffers | ||
434 | * in the next transaction which is going to be started. | ||
435 | */ | ||
436 | jbd2_clear_buffer_revoked_flags(journal); | ||
437 | |||
438 | /* | ||
433 | * Switch to a new revoke table. | 439 | * Switch to a new revoke table. |
434 | */ | 440 | */ |
435 | jbd2_journal_switch_revoke_table(journal); | 441 | jbd2_journal_switch_revoke_table(journal); |
diff --git a/fs/jbd2/revoke.c b/fs/jbd2/revoke.c index 69fd93588118..30b2867d6cc9 100644 --- a/fs/jbd2/revoke.c +++ b/fs/jbd2/revoke.c | |||
@@ -47,6 +47,10 @@ | |||
47 | * overwriting the new data. We don't even need to clear the revoke | 47 | * overwriting the new data. We don't even need to clear the revoke |
48 | * bit here. | 48 | * bit here. |
49 | * | 49 | * |
50 | * We cache revoke status of a buffer in the current transaction in b_states | ||
51 | * bits. As the name says, revokevalid flag indicates that the cached revoke | ||
52 | * status of a buffer is valid and we can rely on the cached status. | ||
53 | * | ||
50 | * Revoke information on buffers is a tri-state value: | 54 | * Revoke information on buffers is a tri-state value: |
51 | * | 55 | * |
52 | * RevokeValid clear: no cached revoke status, need to look it up | 56 | * RevokeValid clear: no cached revoke status, need to look it up |
@@ -478,6 +482,36 @@ int jbd2_journal_cancel_revoke(handle_t *handle, struct journal_head *jh) | |||
478 | return did_revoke; | 482 | return did_revoke; |
479 | } | 483 | } |
480 | 484 | ||
485 | /* | ||
486 | * journal_clear_revoked_flag clears revoked flag of buffers in | ||
487 | * revoke table to reflect there is no revoked buffers in the next | ||
488 | * transaction which is going to be started. | ||
489 | */ | ||
490 | void jbd2_clear_buffer_revoked_flags(journal_t *journal) | ||
491 | { | ||
492 | struct jbd2_revoke_table_s *revoke = journal->j_revoke; | ||
493 | int i = 0; | ||
494 | |||
495 | for (i = 0; i < revoke->hash_size; i++) { | ||
496 | struct list_head *hash_list; | ||
497 | struct list_head *list_entry; | ||
498 | hash_list = &revoke->hash_table[i]; | ||
499 | |||
500 | list_for_each(list_entry, hash_list) { | ||
501 | struct jbd2_revoke_record_s *record; | ||
502 | struct buffer_head *bh; | ||
503 | record = (struct jbd2_revoke_record_s *)list_entry; | ||
504 | bh = __find_get_block(journal->j_fs_dev, | ||
505 | record->blocknr, | ||
506 | journal->j_blocksize); | ||
507 | if (bh) { | ||
508 | clear_buffer_revoked(bh); | ||
509 | __brelse(bh); | ||
510 | } | ||
511 | } | ||
512 | } | ||
513 | } | ||
514 | |||
481 | /* journal_switch_revoke table select j_revoke for next transaction | 515 | /* journal_switch_revoke table select j_revoke for next transaction |
482 | * we do not want to suspend any processing until all revokes are | 516 | * we do not want to suspend any processing until all revokes are |
483 | * written -bzzz | 517 | * written -bzzz |
diff --git a/fs/jbd2/transaction.c b/fs/jbd2/transaction.c index a0e41a4c080e..35ae096bed5d 100644 --- a/fs/jbd2/transaction.c +++ b/fs/jbd2/transaction.c | |||
@@ -517,12 +517,13 @@ void jbd2_journal_lock_updates(journal_t *journal) | |||
517 | break; | 517 | break; |
518 | 518 | ||
519 | spin_lock(&transaction->t_handle_lock); | 519 | spin_lock(&transaction->t_handle_lock); |
520 | prepare_to_wait(&journal->j_wait_updates, &wait, | ||
521 | TASK_UNINTERRUPTIBLE); | ||
520 | if (!atomic_read(&transaction->t_updates)) { | 522 | if (!atomic_read(&transaction->t_updates)) { |
521 | spin_unlock(&transaction->t_handle_lock); | 523 | spin_unlock(&transaction->t_handle_lock); |
524 | finish_wait(&journal->j_wait_updates, &wait); | ||
522 | break; | 525 | break; |
523 | } | 526 | } |
524 | prepare_to_wait(&journal->j_wait_updates, &wait, | ||
525 | TASK_UNINTERRUPTIBLE); | ||
526 | spin_unlock(&transaction->t_handle_lock); | 527 | spin_unlock(&transaction->t_handle_lock); |
527 | write_unlock(&journal->j_state_lock); | 528 | write_unlock(&journal->j_state_lock); |
528 | schedule(); | 529 | schedule(); |
diff --git a/fs/jffs2/erase.c b/fs/jffs2/erase.c index e513f1913c15..a01cdad6aad1 100644 --- a/fs/jffs2/erase.c +++ b/fs/jffs2/erase.c | |||
@@ -74,7 +74,7 @@ static void jffs2_erase_block(struct jffs2_sb_info *c, | |||
74 | ((struct erase_priv_struct *)instr->priv)->jeb = jeb; | 74 | ((struct erase_priv_struct *)instr->priv)->jeb = jeb; |
75 | ((struct erase_priv_struct *)instr->priv)->c = c; | 75 | ((struct erase_priv_struct *)instr->priv)->c = c; |
76 | 76 | ||
77 | ret = c->mtd->erase(c->mtd, instr); | 77 | ret = mtd_erase(c->mtd, instr); |
78 | if (!ret) | 78 | if (!ret) |
79 | return; | 79 | return; |
80 | 80 | ||
@@ -336,12 +336,11 @@ static int jffs2_block_check_erase(struct jffs2_sb_info *c, struct jffs2_erasebl | |||
336 | uint32_t ofs; | 336 | uint32_t ofs; |
337 | size_t retlen; | 337 | size_t retlen; |
338 | int ret = -EIO; | 338 | int ret = -EIO; |
339 | unsigned long *wordebuf; | ||
339 | 340 | ||
340 | if (c->mtd->point) { | 341 | ret = mtd_point(c->mtd, jeb->offset, c->sector_size, &retlen, |
341 | unsigned long *wordebuf; | 342 | &ebuf, NULL); |
342 | 343 | if (ret != -EOPNOTSUPP) { | |
343 | ret = c->mtd->point(c->mtd, jeb->offset, c->sector_size, | ||
344 | &retlen, &ebuf, NULL); | ||
345 | if (ret) { | 344 | if (ret) { |
346 | D1(printk(KERN_DEBUG "MTD point failed %d\n", ret)); | 345 | D1(printk(KERN_DEBUG "MTD point failed %d\n", ret)); |
347 | goto do_flash_read; | 346 | goto do_flash_read; |
@@ -349,7 +348,7 @@ static int jffs2_block_check_erase(struct jffs2_sb_info *c, struct jffs2_erasebl | |||
349 | if (retlen < c->sector_size) { | 348 | if (retlen < c->sector_size) { |
350 | /* Don't muck about if it won't let us point to the whole erase sector */ | 349 | /* Don't muck about if it won't let us point to the whole erase sector */ |
351 | D1(printk(KERN_DEBUG "MTD point returned len too short: 0x%zx\n", retlen)); | 350 | D1(printk(KERN_DEBUG "MTD point returned len too short: 0x%zx\n", retlen)); |
352 | c->mtd->unpoint(c->mtd, jeb->offset, retlen); | 351 | mtd_unpoint(c->mtd, jeb->offset, retlen); |
353 | goto do_flash_read; | 352 | goto do_flash_read; |
354 | } | 353 | } |
355 | wordebuf = ebuf-sizeof(*wordebuf); | 354 | wordebuf = ebuf-sizeof(*wordebuf); |
@@ -358,7 +357,7 @@ static int jffs2_block_check_erase(struct jffs2_sb_info *c, struct jffs2_erasebl | |||
358 | if (*++wordebuf != ~0) | 357 | if (*++wordebuf != ~0) |
359 | break; | 358 | break; |
360 | } while(--retlen); | 359 | } while(--retlen); |
361 | c->mtd->unpoint(c->mtd, jeb->offset, c->sector_size); | 360 | mtd_unpoint(c->mtd, jeb->offset, c->sector_size); |
362 | if (retlen) { | 361 | if (retlen) { |
363 | printk(KERN_WARNING "Newly-erased block contained word 0x%lx at offset 0x%08tx\n", | 362 | printk(KERN_WARNING "Newly-erased block contained word 0x%lx at offset 0x%08tx\n", |
364 | *wordebuf, jeb->offset + c->sector_size-retlen*sizeof(*wordebuf)); | 363 | *wordebuf, jeb->offset + c->sector_size-retlen*sizeof(*wordebuf)); |
@@ -381,7 +380,7 @@ static int jffs2_block_check_erase(struct jffs2_sb_info *c, struct jffs2_erasebl | |||
381 | 380 | ||
382 | *bad_offset = ofs; | 381 | *bad_offset = ofs; |
383 | 382 | ||
384 | ret = c->mtd->read(c->mtd, ofs, readlen, &retlen, ebuf); | 383 | ret = mtd_read(c->mtd, ofs, readlen, &retlen, ebuf); |
385 | if (ret) { | 384 | if (ret) { |
386 | printk(KERN_WARNING "Read of newly-erased block at 0x%08x failed: %d. Putting on bad_list\n", ofs, ret); | 385 | printk(KERN_WARNING "Read of newly-erased block at 0x%08x failed: %d. Putting on bad_list\n", ofs, ret); |
387 | ret = -EIO; | 386 | ret = -EIO; |
diff --git a/fs/jffs2/fs.c b/fs/jffs2/fs.c index 4b8afe39a87f..2e0123867cb1 100644 --- a/fs/jffs2/fs.c +++ b/fs/jffs2/fs.c | |||
@@ -466,7 +466,6 @@ struct inode *jffs2_new_inode (struct inode *dir_i, umode_t mode, struct jffs2_r | |||
466 | 466 | ||
467 | if (insert_inode_locked(inode) < 0) { | 467 | if (insert_inode_locked(inode) < 0) { |
468 | make_bad_inode(inode); | 468 | make_bad_inode(inode); |
469 | unlock_new_inode(inode); | ||
470 | iput(inode); | 469 | iput(inode); |
471 | return ERR_PTR(-EINVAL); | 470 | return ERR_PTR(-EINVAL); |
472 | } | 471 | } |
diff --git a/fs/jffs2/readinode.c b/fs/jffs2/readinode.c index ee57bac1ba6d..3093ac4fb24c 100644 --- a/fs/jffs2/readinode.c +++ b/fs/jffs2/readinode.c | |||
@@ -62,17 +62,15 @@ static int check_node_data(struct jffs2_sb_info *c, struct jffs2_tmp_dnode_info | |||
62 | #ifndef __ECOS | 62 | #ifndef __ECOS |
63 | /* TODO: instead, incapsulate point() stuff to jffs2_flash_read(), | 63 | /* TODO: instead, incapsulate point() stuff to jffs2_flash_read(), |
64 | * adding and jffs2_flash_read_end() interface. */ | 64 | * adding and jffs2_flash_read_end() interface. */ |
65 | if (c->mtd->point) { | 65 | err = mtd_point(c->mtd, ofs, len, &retlen, (void **)&buffer, NULL); |
66 | err = c->mtd->point(c->mtd, ofs, len, &retlen, | 66 | if (!err && retlen < len) { |
67 | (void **)&buffer, NULL); | 67 | JFFS2_WARNING("MTD point returned len too short: %zu instead of %u.\n", retlen, tn->csize); |
68 | if (!err && retlen < len) { | 68 | mtd_unpoint(c->mtd, ofs, retlen); |
69 | JFFS2_WARNING("MTD point returned len too short: %zu instead of %u.\n", retlen, tn->csize); | 69 | } else if (err) { |
70 | c->mtd->unpoint(c->mtd, ofs, retlen); | 70 | if (err != -EOPNOTSUPP) |
71 | } else if (err) | ||
72 | JFFS2_WARNING("MTD point failed: error code %d.\n", err); | 71 | JFFS2_WARNING("MTD point failed: error code %d.\n", err); |
73 | else | 72 | } else |
74 | pointed = 1; /* succefully pointed to device */ | 73 | pointed = 1; /* succefully pointed to device */ |
75 | } | ||
76 | #endif | 74 | #endif |
77 | 75 | ||
78 | if (!pointed) { | 76 | if (!pointed) { |
@@ -101,7 +99,7 @@ static int check_node_data(struct jffs2_sb_info *c, struct jffs2_tmp_dnode_info | |||
101 | kfree(buffer); | 99 | kfree(buffer); |
102 | #ifndef __ECOS | 100 | #ifndef __ECOS |
103 | else | 101 | else |
104 | c->mtd->unpoint(c->mtd, ofs, len); | 102 | mtd_unpoint(c->mtd, ofs, len); |
105 | #endif | 103 | #endif |
106 | 104 | ||
107 | if (crc != tn->data_crc) { | 105 | if (crc != tn->data_crc) { |
@@ -137,7 +135,7 @@ free_out: | |||
137 | kfree(buffer); | 135 | kfree(buffer); |
138 | #ifndef __ECOS | 136 | #ifndef __ECOS |
139 | else | 137 | else |
140 | c->mtd->unpoint(c->mtd, ofs, len); | 138 | mtd_unpoint(c->mtd, ofs, len); |
141 | #endif | 139 | #endif |
142 | return err; | 140 | return err; |
143 | } | 141 | } |
diff --git a/fs/jffs2/scan.c b/fs/jffs2/scan.c index 28107ca136e4..f99464833bb2 100644 --- a/fs/jffs2/scan.c +++ b/fs/jffs2/scan.c | |||
@@ -97,15 +97,15 @@ int jffs2_scan_medium(struct jffs2_sb_info *c) | |||
97 | size_t pointlen, try_size; | 97 | size_t pointlen, try_size; |
98 | 98 | ||
99 | if (c->mtd->point) { | 99 | if (c->mtd->point) { |
100 | ret = c->mtd->point(c->mtd, 0, c->mtd->size, &pointlen, | 100 | ret = mtd_point(c->mtd, 0, c->mtd->size, &pointlen, |
101 | (void **)&flashbuf, NULL); | 101 | (void **)&flashbuf, NULL); |
102 | if (!ret && pointlen < c->mtd->size) { | 102 | if (!ret && pointlen < c->mtd->size) { |
103 | /* Don't muck about if it won't let us point to the whole flash */ | 103 | /* Don't muck about if it won't let us point to the whole flash */ |
104 | D1(printk(KERN_DEBUG "MTD point returned len too short: 0x%zx\n", pointlen)); | 104 | D1(printk(KERN_DEBUG "MTD point returned len too short: 0x%zx\n", pointlen)); |
105 | c->mtd->unpoint(c->mtd, 0, pointlen); | 105 | mtd_unpoint(c->mtd, 0, pointlen); |
106 | flashbuf = NULL; | 106 | flashbuf = NULL; |
107 | } | 107 | } |
108 | if (ret) | 108 | if (ret && ret != -EOPNOTSUPP) |
109 | D1(printk(KERN_DEBUG "MTD point failed %d\n", ret)); | 109 | D1(printk(KERN_DEBUG "MTD point failed %d\n", ret)); |
110 | } | 110 | } |
111 | #endif | 111 | #endif |
@@ -273,7 +273,7 @@ int jffs2_scan_medium(struct jffs2_sb_info *c) | |||
273 | kfree(flashbuf); | 273 | kfree(flashbuf); |
274 | #ifndef __ECOS | 274 | #ifndef __ECOS |
275 | else | 275 | else |
276 | c->mtd->unpoint(c->mtd, 0, c->mtd->size); | 276 | mtd_unpoint(c->mtd, 0, c->mtd->size); |
277 | #endif | 277 | #endif |
278 | kfree(s); | 278 | kfree(s); |
279 | return ret; | 279 | return ret; |
@@ -455,7 +455,7 @@ static int jffs2_scan_eraseblock (struct jffs2_sb_info *c, struct jffs2_eraseblo | |||
455 | if (jffs2_cleanmarker_oob(c)) { | 455 | if (jffs2_cleanmarker_oob(c)) { |
456 | int ret; | 456 | int ret; |
457 | 457 | ||
458 | if (c->mtd->block_isbad(c->mtd, jeb->offset)) | 458 | if (mtd_block_isbad(c->mtd, jeb->offset)) |
459 | return BLK_STATE_BADBLOCK; | 459 | return BLK_STATE_BADBLOCK; |
460 | 460 | ||
461 | ret = jffs2_check_nand_cleanmarker(c, jeb); | 461 | ret = jffs2_check_nand_cleanmarker(c, jeb); |
diff --git a/fs/jffs2/super.c b/fs/jffs2/super.c index 8be4925296cf..f2d96b5e64f6 100644 --- a/fs/jffs2/super.c +++ b/fs/jffs2/super.c | |||
@@ -335,9 +335,7 @@ static void jffs2_put_super (struct super_block *sb) | |||
335 | jffs2_flash_cleanup(c); | 335 | jffs2_flash_cleanup(c); |
336 | kfree(c->inocache_list); | 336 | kfree(c->inocache_list); |
337 | jffs2_clear_xattr_subsystem(c); | 337 | jffs2_clear_xattr_subsystem(c); |
338 | if (c->mtd->sync) | 338 | mtd_sync(c->mtd); |
339 | c->mtd->sync(c->mtd); | ||
340 | |||
341 | D1(printk(KERN_DEBUG "jffs2_put_super returning\n")); | 339 | D1(printk(KERN_DEBUG "jffs2_put_super returning\n")); |
342 | } | 340 | } |
343 | 341 | ||
diff --git a/fs/jffs2/wbuf.c b/fs/jffs2/wbuf.c index b09e51d2f81f..30e8f47e8a23 100644 --- a/fs/jffs2/wbuf.c +++ b/fs/jffs2/wbuf.c | |||
@@ -228,7 +228,7 @@ static int jffs2_verify_write(struct jffs2_sb_info *c, unsigned char *buf, | |||
228 | size_t retlen; | 228 | size_t retlen; |
229 | char *eccstr; | 229 | char *eccstr; |
230 | 230 | ||
231 | ret = c->mtd->read(c->mtd, ofs, c->wbuf_pagesize, &retlen, c->wbuf_verify); | 231 | ret = mtd_read(c->mtd, ofs, c->wbuf_pagesize, &retlen, c->wbuf_verify); |
232 | if (ret && ret != -EUCLEAN && ret != -EBADMSG) { | 232 | if (ret && ret != -EUCLEAN && ret != -EBADMSG) { |
233 | printk(KERN_WARNING "jffs2_verify_write(): Read back of page at %08x failed: %d\n", c->wbuf_ofs, ret); | 233 | printk(KERN_WARNING "jffs2_verify_write(): Read back of page at %08x failed: %d\n", c->wbuf_ofs, ret); |
234 | return ret; | 234 | return ret; |
@@ -337,7 +337,8 @@ static void jffs2_wbuf_recover(struct jffs2_sb_info *c) | |||
337 | } | 337 | } |
338 | 338 | ||
339 | /* Do the read... */ | 339 | /* Do the read... */ |
340 | ret = c->mtd->read(c->mtd, start, c->wbuf_ofs - start, &retlen, buf); | 340 | ret = mtd_read(c->mtd, start, c->wbuf_ofs - start, &retlen, |
341 | buf); | ||
341 | 342 | ||
342 | /* ECC recovered ? */ | 343 | /* ECC recovered ? */ |
343 | if ((ret == -EUCLEAN || ret == -EBADMSG) && | 344 | if ((ret == -EUCLEAN || ret == -EBADMSG) && |
@@ -413,13 +414,12 @@ static void jffs2_wbuf_recover(struct jffs2_sb_info *c) | |||
413 | if (breakme++ == 20) { | 414 | if (breakme++ == 20) { |
414 | printk(KERN_NOTICE "Faking write error at 0x%08x\n", ofs); | 415 | printk(KERN_NOTICE "Faking write error at 0x%08x\n", ofs); |
415 | breakme = 0; | 416 | breakme = 0; |
416 | c->mtd->write(c->mtd, ofs, towrite, &retlen, | 417 | mtd_write(c->mtd, ofs, towrite, &retlen, brokenbuf); |
417 | brokenbuf); | ||
418 | ret = -EIO; | 418 | ret = -EIO; |
419 | } else | 419 | } else |
420 | #endif | 420 | #endif |
421 | ret = c->mtd->write(c->mtd, ofs, towrite, &retlen, | 421 | ret = mtd_write(c->mtd, ofs, towrite, &retlen, |
422 | rewrite_buf); | 422 | rewrite_buf); |
423 | 423 | ||
424 | if (ret || retlen != towrite || jffs2_verify_write(c, rewrite_buf, ofs)) { | 424 | if (ret || retlen != towrite || jffs2_verify_write(c, rewrite_buf, ofs)) { |
425 | /* Argh. We tried. Really we did. */ | 425 | /* Argh. We tried. Really we did. */ |
@@ -619,13 +619,14 @@ static int __jffs2_flush_wbuf(struct jffs2_sb_info *c, int pad) | |||
619 | if (breakme++ == 20) { | 619 | if (breakme++ == 20) { |
620 | printk(KERN_NOTICE "Faking write error at 0x%08x\n", c->wbuf_ofs); | 620 | printk(KERN_NOTICE "Faking write error at 0x%08x\n", c->wbuf_ofs); |
621 | breakme = 0; | 621 | breakme = 0; |
622 | c->mtd->write(c->mtd, c->wbuf_ofs, c->wbuf_pagesize, &retlen, | 622 | mtd_write(c->mtd, c->wbuf_ofs, c->wbuf_pagesize, &retlen, |
623 | brokenbuf); | 623 | brokenbuf); |
624 | ret = -EIO; | 624 | ret = -EIO; |
625 | } else | 625 | } else |
626 | #endif | 626 | #endif |
627 | 627 | ||
628 | ret = c->mtd->write(c->mtd, c->wbuf_ofs, c->wbuf_pagesize, &retlen, c->wbuf); | 628 | ret = mtd_write(c->mtd, c->wbuf_ofs, c->wbuf_pagesize, |
629 | &retlen, c->wbuf); | ||
629 | 630 | ||
630 | if (ret) { | 631 | if (ret) { |
631 | printk(KERN_WARNING "jffs2_flush_wbuf(): Write failed with %d\n", ret); | 632 | printk(KERN_WARNING "jffs2_flush_wbuf(): Write failed with %d\n", ret); |
@@ -861,8 +862,8 @@ int jffs2_flash_writev(struct jffs2_sb_info *c, const struct kvec *invecs, | |||
861 | v += wbuf_retlen; | 862 | v += wbuf_retlen; |
862 | 863 | ||
863 | if (vlen >= c->wbuf_pagesize) { | 864 | if (vlen >= c->wbuf_pagesize) { |
864 | ret = c->mtd->write(c->mtd, outvec_to, PAGE_DIV(vlen), | 865 | ret = mtd_write(c->mtd, outvec_to, PAGE_DIV(vlen), |
865 | &wbuf_retlen, v); | 866 | &wbuf_retlen, v); |
866 | if (ret < 0 || wbuf_retlen != PAGE_DIV(vlen)) | 867 | if (ret < 0 || wbuf_retlen != PAGE_DIV(vlen)) |
867 | goto outfile; | 868 | goto outfile; |
868 | 869 | ||
@@ -948,11 +949,11 @@ int jffs2_flash_read(struct jffs2_sb_info *c, loff_t ofs, size_t len, size_t *re | |||
948 | int ret; | 949 | int ret; |
949 | 950 | ||
950 | if (!jffs2_is_writebuffered(c)) | 951 | if (!jffs2_is_writebuffered(c)) |
951 | return c->mtd->read(c->mtd, ofs, len, retlen, buf); | 952 | return mtd_read(c->mtd, ofs, len, retlen, buf); |
952 | 953 | ||
953 | /* Read flash */ | 954 | /* Read flash */ |
954 | down_read(&c->wbuf_sem); | 955 | down_read(&c->wbuf_sem); |
955 | ret = c->mtd->read(c->mtd, ofs, len, retlen, buf); | 956 | ret = mtd_read(c->mtd, ofs, len, retlen, buf); |
956 | 957 | ||
957 | if ( (ret == -EBADMSG || ret == -EUCLEAN) && (*retlen == len) ) { | 958 | if ( (ret == -EBADMSG || ret == -EUCLEAN) && (*retlen == len) ) { |
958 | if (ret == -EBADMSG) | 959 | if (ret == -EBADMSG) |
@@ -1031,7 +1032,7 @@ int jffs2_check_oob_empty(struct jffs2_sb_info *c, | |||
1031 | ops.len = ops.ooboffs = ops.retlen = ops.oobretlen = 0; | 1032 | ops.len = ops.ooboffs = ops.retlen = ops.oobretlen = 0; |
1032 | ops.datbuf = NULL; | 1033 | ops.datbuf = NULL; |
1033 | 1034 | ||
1034 | ret = c->mtd->read_oob(c->mtd, jeb->offset, &ops); | 1035 | ret = mtd_read_oob(c->mtd, jeb->offset, &ops); |
1035 | if (ret || ops.oobretlen != ops.ooblen) { | 1036 | if (ret || ops.oobretlen != ops.ooblen) { |
1036 | printk(KERN_ERR "cannot read OOB for EB at %08x, requested %zd" | 1037 | printk(KERN_ERR "cannot read OOB for EB at %08x, requested %zd" |
1037 | " bytes, read %zd bytes, error %d\n", | 1038 | " bytes, read %zd bytes, error %d\n", |
@@ -1074,7 +1075,7 @@ int jffs2_check_nand_cleanmarker(struct jffs2_sb_info *c, | |||
1074 | ops.len = ops.ooboffs = ops.retlen = ops.oobretlen = 0; | 1075 | ops.len = ops.ooboffs = ops.retlen = ops.oobretlen = 0; |
1075 | ops.datbuf = NULL; | 1076 | ops.datbuf = NULL; |
1076 | 1077 | ||
1077 | ret = c->mtd->read_oob(c->mtd, jeb->offset, &ops); | 1078 | ret = mtd_read_oob(c->mtd, jeb->offset, &ops); |
1078 | if (ret || ops.oobretlen != ops.ooblen) { | 1079 | if (ret || ops.oobretlen != ops.ooblen) { |
1079 | printk(KERN_ERR "cannot read OOB for EB at %08x, requested %zd" | 1080 | printk(KERN_ERR "cannot read OOB for EB at %08x, requested %zd" |
1080 | " bytes, read %zd bytes, error %d\n", | 1081 | " bytes, read %zd bytes, error %d\n", |
@@ -1100,7 +1101,7 @@ int jffs2_write_nand_cleanmarker(struct jffs2_sb_info *c, | |||
1100 | ops.len = ops.ooboffs = ops.retlen = ops.oobretlen = 0; | 1101 | ops.len = ops.ooboffs = ops.retlen = ops.oobretlen = 0; |
1101 | ops.datbuf = NULL; | 1102 | ops.datbuf = NULL; |
1102 | 1103 | ||
1103 | ret = c->mtd->write_oob(c->mtd, jeb->offset, &ops); | 1104 | ret = mtd_write_oob(c->mtd, jeb->offset, &ops); |
1104 | if (ret || ops.oobretlen != ops.ooblen) { | 1105 | if (ret || ops.oobretlen != ops.ooblen) { |
1105 | printk(KERN_ERR "cannot write OOB for EB at %08x, requested %zd" | 1106 | printk(KERN_ERR "cannot write OOB for EB at %08x, requested %zd" |
1106 | " bytes, read %zd bytes, error %d\n", | 1107 | " bytes, read %zd bytes, error %d\n", |
@@ -1129,11 +1130,8 @@ int jffs2_write_nand_badblock(struct jffs2_sb_info *c, struct jffs2_eraseblock * | |||
1129 | if( ++jeb->bad_count < MAX_ERASE_FAILURES) | 1130 | if( ++jeb->bad_count < MAX_ERASE_FAILURES) |
1130 | return 0; | 1131 | return 0; |
1131 | 1132 | ||
1132 | if (!c->mtd->block_markbad) | ||
1133 | return 1; // What else can we do? | ||
1134 | |||
1135 | printk(KERN_WARNING "JFFS2: marking eraseblock at %08x\n as bad", bad_offset); | 1133 | printk(KERN_WARNING "JFFS2: marking eraseblock at %08x\n as bad", bad_offset); |
1136 | ret = c->mtd->block_markbad(c->mtd, bad_offset); | 1134 | ret = mtd_block_markbad(c->mtd, bad_offset); |
1137 | 1135 | ||
1138 | if (ret) { | 1136 | if (ret) { |
1139 | D1(printk(KERN_WARNING "jffs2_write_nand_badblock(): Write failed for block at %08x: error %d\n", jeb->offset, ret)); | 1137 | D1(printk(KERN_WARNING "jffs2_write_nand_badblock(): Write failed for block at %08x: error %d\n", jeb->offset, ret)); |
diff --git a/fs/jffs2/writev.c b/fs/jffs2/writev.c index b9276b11bac6..a1bda9dab3f8 100644 --- a/fs/jffs2/writev.c +++ b/fs/jffs2/writev.c | |||
@@ -13,30 +13,6 @@ | |||
13 | #include <linux/mtd/mtd.h> | 13 | #include <linux/mtd/mtd.h> |
14 | #include "nodelist.h" | 14 | #include "nodelist.h" |
15 | 15 | ||
16 | /* This ought to be in core MTD code. All registered MTD devices | ||
17 | without writev should have this put in place. Bug the MTD | ||
18 | maintainer */ | ||
19 | static inline int mtd_fake_writev(struct mtd_info *mtd, const struct kvec *vecs, | ||
20 | unsigned long count, loff_t to, size_t *retlen) | ||
21 | { | ||
22 | unsigned long i; | ||
23 | size_t totlen = 0, thislen; | ||
24 | int ret = 0; | ||
25 | |||
26 | for (i=0; i<count; i++) { | ||
27 | if (!vecs[i].iov_len) | ||
28 | continue; | ||
29 | ret = mtd->write(mtd, to, vecs[i].iov_len, &thislen, vecs[i].iov_base); | ||
30 | totlen += thislen; | ||
31 | if (ret || thislen != vecs[i].iov_len) | ||
32 | break; | ||
33 | to += vecs[i].iov_len; | ||
34 | } | ||
35 | if (retlen) | ||
36 | *retlen = totlen; | ||
37 | return ret; | ||
38 | } | ||
39 | |||
40 | int jffs2_flash_direct_writev(struct jffs2_sb_info *c, const struct kvec *vecs, | 16 | int jffs2_flash_direct_writev(struct jffs2_sb_info *c, const struct kvec *vecs, |
41 | unsigned long count, loff_t to, size_t *retlen) | 17 | unsigned long count, loff_t to, size_t *retlen) |
42 | { | 18 | { |
@@ -50,18 +26,14 @@ int jffs2_flash_direct_writev(struct jffs2_sb_info *c, const struct kvec *vecs, | |||
50 | } | 26 | } |
51 | } | 27 | } |
52 | 28 | ||
53 | if (c->mtd->writev) | 29 | return mtd_writev(c->mtd, vecs, count, to, retlen); |
54 | return c->mtd->writev(c->mtd, vecs, count, to, retlen); | ||
55 | else { | ||
56 | return mtd_fake_writev(c->mtd, vecs, count, to, retlen); | ||
57 | } | ||
58 | } | 30 | } |
59 | 31 | ||
60 | int jffs2_flash_direct_write(struct jffs2_sb_info *c, loff_t ofs, size_t len, | 32 | int jffs2_flash_direct_write(struct jffs2_sb_info *c, loff_t ofs, size_t len, |
61 | size_t *retlen, const u_char *buf) | 33 | size_t *retlen, const u_char *buf) |
62 | { | 34 | { |
63 | int ret; | 35 | int ret; |
64 | ret = c->mtd->write(c->mtd, ofs, len, retlen, buf); | 36 | ret = mtd_write(c->mtd, ofs, len, retlen, buf); |
65 | 37 | ||
66 | if (jffs2_sum_active()) { | 38 | if (jffs2_sum_active()) { |
67 | struct kvec vecs[1]; | 39 | struct kvec vecs[1]; |
diff --git a/fs/lockd/mon.c b/fs/lockd/mon.c index 23d7451b2938..65ba36b80a9e 100644 --- a/fs/lockd/mon.c +++ b/fs/lockd/mon.c | |||
@@ -55,7 +55,7 @@ static DEFINE_SPINLOCK(nsm_lock); | |||
55 | * Local NSM state | 55 | * Local NSM state |
56 | */ | 56 | */ |
57 | u32 __read_mostly nsm_local_state; | 57 | u32 __read_mostly nsm_local_state; |
58 | int __read_mostly nsm_use_hostnames; | 58 | bool __read_mostly nsm_use_hostnames; |
59 | 59 | ||
60 | static inline struct sockaddr *nsm_addr(const struct nsm_handle *nsm) | 60 | static inline struct sockaddr *nsm_addr(const struct nsm_handle *nsm) |
61 | { | 61 | { |
diff --git a/fs/logfs/dev_mtd.c b/fs/logfs/dev_mtd.c index 339e17e9133d..9c501449450d 100644 --- a/fs/logfs/dev_mtd.c +++ b/fs/logfs/dev_mtd.c | |||
@@ -13,13 +13,14 @@ | |||
13 | 13 | ||
14 | #define PAGE_OFS(ofs) ((ofs) & (PAGE_SIZE-1)) | 14 | #define PAGE_OFS(ofs) ((ofs) & (PAGE_SIZE-1)) |
15 | 15 | ||
16 | static int mtd_read(struct super_block *sb, loff_t ofs, size_t len, void *buf) | 16 | static int logfs_mtd_read(struct super_block *sb, loff_t ofs, size_t len, |
17 | void *buf) | ||
17 | { | 18 | { |
18 | struct mtd_info *mtd = logfs_super(sb)->s_mtd; | 19 | struct mtd_info *mtd = logfs_super(sb)->s_mtd; |
19 | size_t retlen; | 20 | size_t retlen; |
20 | int ret; | 21 | int ret; |
21 | 22 | ||
22 | ret = mtd->read(mtd, ofs, len, &retlen, buf); | 23 | ret = mtd_read(mtd, ofs, len, &retlen, buf); |
23 | BUG_ON(ret == -EINVAL); | 24 | BUG_ON(ret == -EINVAL); |
24 | if (ret) | 25 | if (ret) |
25 | return ret; | 26 | return ret; |
@@ -31,7 +32,8 @@ static int mtd_read(struct super_block *sb, loff_t ofs, size_t len, void *buf) | |||
31 | return 0; | 32 | return 0; |
32 | } | 33 | } |
33 | 34 | ||
34 | static int mtd_write(struct super_block *sb, loff_t ofs, size_t len, void *buf) | 35 | static int loffs_mtd_write(struct super_block *sb, loff_t ofs, size_t len, |
36 | void *buf) | ||
35 | { | 37 | { |
36 | struct logfs_super *super = logfs_super(sb); | 38 | struct logfs_super *super = logfs_super(sb); |
37 | struct mtd_info *mtd = super->s_mtd; | 39 | struct mtd_info *mtd = super->s_mtd; |
@@ -47,7 +49,7 @@ static int mtd_write(struct super_block *sb, loff_t ofs, size_t len, void *buf) | |||
47 | BUG_ON(len > PAGE_CACHE_SIZE); | 49 | BUG_ON(len > PAGE_CACHE_SIZE); |
48 | page_start = ofs & PAGE_CACHE_MASK; | 50 | page_start = ofs & PAGE_CACHE_MASK; |
49 | page_end = PAGE_CACHE_ALIGN(ofs + len) - 1; | 51 | page_end = PAGE_CACHE_ALIGN(ofs + len) - 1; |
50 | ret = mtd->write(mtd, ofs, len, &retlen, buf); | 52 | ret = mtd_write(mtd, ofs, len, &retlen, buf); |
51 | if (ret || (retlen != len)) | 53 | if (ret || (retlen != len)) |
52 | return -EIO; | 54 | return -EIO; |
53 | 55 | ||
@@ -60,14 +62,15 @@ static int mtd_write(struct super_block *sb, loff_t ofs, size_t len, void *buf) | |||
60 | * asynchronous properties. So just to prevent the first implementor of such | 62 | * asynchronous properties. So just to prevent the first implementor of such |
61 | * a thing from breaking logfs in 2350, we do the usual pointless dance to | 63 | * a thing from breaking logfs in 2350, we do the usual pointless dance to |
62 | * declare a completion variable and wait for completion before returning | 64 | * declare a completion variable and wait for completion before returning |
63 | * from mtd_erase(). What an exercise in futility! | 65 | * from logfs_mtd_erase(). What an exercise in futility! |
64 | */ | 66 | */ |
65 | static void logfs_erase_callback(struct erase_info *ei) | 67 | static void logfs_erase_callback(struct erase_info *ei) |
66 | { | 68 | { |
67 | complete((struct completion *)ei->priv); | 69 | complete((struct completion *)ei->priv); |
68 | } | 70 | } |
69 | 71 | ||
70 | static int mtd_erase_mapping(struct super_block *sb, loff_t ofs, size_t len) | 72 | static int logfs_mtd_erase_mapping(struct super_block *sb, loff_t ofs, |
73 | size_t len) | ||
71 | { | 74 | { |
72 | struct logfs_super *super = logfs_super(sb); | 75 | struct logfs_super *super = logfs_super(sb); |
73 | struct address_space *mapping = super->s_mapping_inode->i_mapping; | 76 | struct address_space *mapping = super->s_mapping_inode->i_mapping; |
@@ -84,7 +87,7 @@ static int mtd_erase_mapping(struct super_block *sb, loff_t ofs, size_t len) | |||
84 | return 0; | 87 | return 0; |
85 | } | 88 | } |
86 | 89 | ||
87 | static int mtd_erase(struct super_block *sb, loff_t ofs, size_t len, | 90 | static int logfs_mtd_erase(struct super_block *sb, loff_t ofs, size_t len, |
88 | int ensure_write) | 91 | int ensure_write) |
89 | { | 92 | { |
90 | struct mtd_info *mtd = logfs_super(sb)->s_mtd; | 93 | struct mtd_info *mtd = logfs_super(sb)->s_mtd; |
@@ -102,30 +105,29 @@ static int mtd_erase(struct super_block *sb, loff_t ofs, size_t len, | |||
102 | ei.len = len; | 105 | ei.len = len; |
103 | ei.callback = logfs_erase_callback; | 106 | ei.callback = logfs_erase_callback; |
104 | ei.priv = (long)&complete; | 107 | ei.priv = (long)&complete; |
105 | ret = mtd->erase(mtd, &ei); | 108 | ret = mtd_erase(mtd, &ei); |
106 | if (ret) | 109 | if (ret) |
107 | return -EIO; | 110 | return -EIO; |
108 | 111 | ||
109 | wait_for_completion(&complete); | 112 | wait_for_completion(&complete); |
110 | if (ei.state != MTD_ERASE_DONE) | 113 | if (ei.state != MTD_ERASE_DONE) |
111 | return -EIO; | 114 | return -EIO; |
112 | return mtd_erase_mapping(sb, ofs, len); | 115 | return logfs_mtd_erase_mapping(sb, ofs, len); |
113 | } | 116 | } |
114 | 117 | ||
115 | static void mtd_sync(struct super_block *sb) | 118 | static void logfs_mtd_sync(struct super_block *sb) |
116 | { | 119 | { |
117 | struct mtd_info *mtd = logfs_super(sb)->s_mtd; | 120 | struct mtd_info *mtd = logfs_super(sb)->s_mtd; |
118 | 121 | ||
119 | if (mtd->sync) | 122 | mtd_sync(mtd); |
120 | mtd->sync(mtd); | ||
121 | } | 123 | } |
122 | 124 | ||
123 | static int mtd_readpage(void *_sb, struct page *page) | 125 | static int logfs_mtd_readpage(void *_sb, struct page *page) |
124 | { | 126 | { |
125 | struct super_block *sb = _sb; | 127 | struct super_block *sb = _sb; |
126 | int err; | 128 | int err; |
127 | 129 | ||
128 | err = mtd_read(sb, page->index << PAGE_SHIFT, PAGE_SIZE, | 130 | err = logfs_mtd_read(sb, page->index << PAGE_SHIFT, PAGE_SIZE, |
129 | page_address(page)); | 131 | page_address(page)); |
130 | if (err == -EUCLEAN || err == -EBADMSG) { | 132 | if (err == -EUCLEAN || err == -EBADMSG) { |
131 | /* -EBADMSG happens regularly on power failures */ | 133 | /* -EBADMSG happens regularly on power failures */ |
@@ -143,18 +145,15 @@ static int mtd_readpage(void *_sb, struct page *page) | |||
143 | return err; | 145 | return err; |
144 | } | 146 | } |
145 | 147 | ||
146 | static struct page *mtd_find_first_sb(struct super_block *sb, u64 *ofs) | 148 | static struct page *logfs_mtd_find_first_sb(struct super_block *sb, u64 *ofs) |
147 | { | 149 | { |
148 | struct logfs_super *super = logfs_super(sb); | 150 | struct logfs_super *super = logfs_super(sb); |
149 | struct address_space *mapping = super->s_mapping_inode->i_mapping; | 151 | struct address_space *mapping = super->s_mapping_inode->i_mapping; |
150 | filler_t *filler = mtd_readpage; | 152 | filler_t *filler = logfs_mtd_readpage; |
151 | struct mtd_info *mtd = super->s_mtd; | 153 | struct mtd_info *mtd = super->s_mtd; |
152 | 154 | ||
153 | if (!mtd->block_isbad) | ||
154 | return NULL; | ||
155 | |||
156 | *ofs = 0; | 155 | *ofs = 0; |
157 | while (mtd->block_isbad(mtd, *ofs)) { | 156 | while (mtd_block_isbad(mtd, *ofs)) { |
158 | *ofs += mtd->erasesize; | 157 | *ofs += mtd->erasesize; |
159 | if (*ofs >= mtd->size) | 158 | if (*ofs >= mtd->size) |
160 | return NULL; | 159 | return NULL; |
@@ -163,18 +162,15 @@ static struct page *mtd_find_first_sb(struct super_block *sb, u64 *ofs) | |||
163 | return read_cache_page(mapping, *ofs >> PAGE_SHIFT, filler, sb); | 162 | return read_cache_page(mapping, *ofs >> PAGE_SHIFT, filler, sb); |
164 | } | 163 | } |
165 | 164 | ||
166 | static struct page *mtd_find_last_sb(struct super_block *sb, u64 *ofs) | 165 | static struct page *logfs_mtd_find_last_sb(struct super_block *sb, u64 *ofs) |
167 | { | 166 | { |
168 | struct logfs_super *super = logfs_super(sb); | 167 | struct logfs_super *super = logfs_super(sb); |
169 | struct address_space *mapping = super->s_mapping_inode->i_mapping; | 168 | struct address_space *mapping = super->s_mapping_inode->i_mapping; |
170 | filler_t *filler = mtd_readpage; | 169 | filler_t *filler = logfs_mtd_readpage; |
171 | struct mtd_info *mtd = super->s_mtd; | 170 | struct mtd_info *mtd = super->s_mtd; |
172 | 171 | ||
173 | if (!mtd->block_isbad) | ||
174 | return NULL; | ||
175 | |||
176 | *ofs = mtd->size - mtd->erasesize; | 172 | *ofs = mtd->size - mtd->erasesize; |
177 | while (mtd->block_isbad(mtd, *ofs)) { | 173 | while (mtd_block_isbad(mtd, *ofs)) { |
178 | *ofs -= mtd->erasesize; | 174 | *ofs -= mtd->erasesize; |
179 | if (*ofs <= 0) | 175 | if (*ofs <= 0) |
180 | return NULL; | 176 | return NULL; |
@@ -184,7 +180,7 @@ static struct page *mtd_find_last_sb(struct super_block *sb, u64 *ofs) | |||
184 | return read_cache_page(mapping, *ofs >> PAGE_SHIFT, filler, sb); | 180 | return read_cache_page(mapping, *ofs >> PAGE_SHIFT, filler, sb); |
185 | } | 181 | } |
186 | 182 | ||
187 | static int __mtd_writeseg(struct super_block *sb, u64 ofs, pgoff_t index, | 183 | static int __logfs_mtd_writeseg(struct super_block *sb, u64 ofs, pgoff_t index, |
188 | size_t nr_pages) | 184 | size_t nr_pages) |
189 | { | 185 | { |
190 | struct logfs_super *super = logfs_super(sb); | 186 | struct logfs_super *super = logfs_super(sb); |
@@ -196,8 +192,8 @@ static int __mtd_writeseg(struct super_block *sb, u64 ofs, pgoff_t index, | |||
196 | page = find_lock_page(mapping, index + i); | 192 | page = find_lock_page(mapping, index + i); |
197 | BUG_ON(!page); | 193 | BUG_ON(!page); |
198 | 194 | ||
199 | err = mtd_write(sb, page->index << PAGE_SHIFT, PAGE_SIZE, | 195 | err = loffs_mtd_write(sb, page->index << PAGE_SHIFT, PAGE_SIZE, |
200 | page_address(page)); | 196 | page_address(page)); |
201 | unlock_page(page); | 197 | unlock_page(page); |
202 | page_cache_release(page); | 198 | page_cache_release(page); |
203 | if (err) | 199 | if (err) |
@@ -206,7 +202,7 @@ static int __mtd_writeseg(struct super_block *sb, u64 ofs, pgoff_t index, | |||
206 | return 0; | 202 | return 0; |
207 | } | 203 | } |
208 | 204 | ||
209 | static void mtd_writeseg(struct super_block *sb, u64 ofs, size_t len) | 205 | static void logfs_mtd_writeseg(struct super_block *sb, u64 ofs, size_t len) |
210 | { | 206 | { |
211 | struct logfs_super *super = logfs_super(sb); | 207 | struct logfs_super *super = logfs_super(sb); |
212 | int head; | 208 | int head; |
@@ -227,15 +223,15 @@ static void mtd_writeseg(struct super_block *sb, u64 ofs, size_t len) | |||
227 | len += head; | 223 | len += head; |
228 | } | 224 | } |
229 | len = PAGE_ALIGN(len); | 225 | len = PAGE_ALIGN(len); |
230 | __mtd_writeseg(sb, ofs, ofs >> PAGE_SHIFT, len >> PAGE_SHIFT); | 226 | __logfs_mtd_writeseg(sb, ofs, ofs >> PAGE_SHIFT, len >> PAGE_SHIFT); |
231 | } | 227 | } |
232 | 228 | ||
233 | static void mtd_put_device(struct logfs_super *s) | 229 | static void logfs_mtd_put_device(struct logfs_super *s) |
234 | { | 230 | { |
235 | put_mtd_device(s->s_mtd); | 231 | put_mtd_device(s->s_mtd); |
236 | } | 232 | } |
237 | 233 | ||
238 | static int mtd_can_write_buf(struct super_block *sb, u64 ofs) | 234 | static int logfs_mtd_can_write_buf(struct super_block *sb, u64 ofs) |
239 | { | 235 | { |
240 | struct logfs_super *super = logfs_super(sb); | 236 | struct logfs_super *super = logfs_super(sb); |
241 | void *buf; | 237 | void *buf; |
@@ -244,7 +240,7 @@ static int mtd_can_write_buf(struct super_block *sb, u64 ofs) | |||
244 | buf = kmalloc(super->s_writesize, GFP_KERNEL); | 240 | buf = kmalloc(super->s_writesize, GFP_KERNEL); |
245 | if (!buf) | 241 | if (!buf) |
246 | return -ENOMEM; | 242 | return -ENOMEM; |
247 | err = mtd_read(sb, ofs, super->s_writesize, buf); | 243 | err = logfs_mtd_read(sb, ofs, super->s_writesize, buf); |
248 | if (err) | 244 | if (err) |
249 | goto out; | 245 | goto out; |
250 | if (memchr_inv(buf, 0xff, super->s_writesize)) | 246 | if (memchr_inv(buf, 0xff, super->s_writesize)) |
@@ -255,14 +251,14 @@ out: | |||
255 | } | 251 | } |
256 | 252 | ||
257 | static const struct logfs_device_ops mtd_devops = { | 253 | static const struct logfs_device_ops mtd_devops = { |
258 | .find_first_sb = mtd_find_first_sb, | 254 | .find_first_sb = logfs_mtd_find_first_sb, |
259 | .find_last_sb = mtd_find_last_sb, | 255 | .find_last_sb = logfs_mtd_find_last_sb, |
260 | .readpage = mtd_readpage, | 256 | .readpage = logfs_mtd_readpage, |
261 | .writeseg = mtd_writeseg, | 257 | .writeseg = logfs_mtd_writeseg, |
262 | .erase = mtd_erase, | 258 | .erase = logfs_mtd_erase, |
263 | .can_write_buf = mtd_can_write_buf, | 259 | .can_write_buf = logfs_mtd_can_write_buf, |
264 | .sync = mtd_sync, | 260 | .sync = logfs_mtd_sync, |
265 | .put_device = mtd_put_device, | 261 | .put_device = logfs_mtd_put_device, |
266 | }; | 262 | }; |
267 | 263 | ||
268 | int logfs_get_sb_mtd(struct logfs_super *s, int mtdnr) | 264 | int logfs_get_sb_mtd(struct logfs_super *s, int mtdnr) |
diff --git a/fs/logfs/dir.c b/fs/logfs/dir.c index 501043e8966c..3de7a32cadbe 100644 --- a/fs/logfs/dir.c +++ b/fs/logfs/dir.c | |||
@@ -71,7 +71,7 @@ static int write_dir(struct inode *dir, struct logfs_disk_dentry *dd, | |||
71 | 71 | ||
72 | static int write_inode(struct inode *inode) | 72 | static int write_inode(struct inode *inode) |
73 | { | 73 | { |
74 | return __logfs_write_inode(inode, WF_LOCK); | 74 | return __logfs_write_inode(inode, NULL, WF_LOCK); |
75 | } | 75 | } |
76 | 76 | ||
77 | static s64 dir_seek_data(struct inode *inode, s64 pos) | 77 | static s64 dir_seek_data(struct inode *inode, s64 pos) |
diff --git a/fs/logfs/file.c b/fs/logfs/file.c index b548c87a86f1..3886cded283c 100644 --- a/fs/logfs/file.c +++ b/fs/logfs/file.c | |||
@@ -230,7 +230,9 @@ int logfs_fsync(struct file *file, loff_t start, loff_t end, int datasync) | |||
230 | return ret; | 230 | return ret; |
231 | 231 | ||
232 | mutex_lock(&inode->i_mutex); | 232 | mutex_lock(&inode->i_mutex); |
233 | logfs_get_wblocks(sb, NULL, WF_LOCK); | ||
233 | logfs_write_anchor(sb); | 234 | logfs_write_anchor(sb); |
235 | logfs_put_wblocks(sb, NULL, WF_LOCK); | ||
234 | mutex_unlock(&inode->i_mutex); | 236 | mutex_unlock(&inode->i_mutex); |
235 | 237 | ||
236 | return 0; | 238 | return 0; |
diff --git a/fs/logfs/gc.c b/fs/logfs/gc.c index caa4419285dc..d4efb061bdc5 100644 --- a/fs/logfs/gc.c +++ b/fs/logfs/gc.c | |||
@@ -367,7 +367,7 @@ static struct gc_candidate *get_candidate(struct super_block *sb) | |||
367 | int i, max_dist; | 367 | int i, max_dist; |
368 | struct gc_candidate *cand = NULL, *this; | 368 | struct gc_candidate *cand = NULL, *this; |
369 | 369 | ||
370 | max_dist = min(no_free_segments(sb), LOGFS_NO_AREAS); | 370 | max_dist = min(no_free_segments(sb), LOGFS_NO_AREAS - 1); |
371 | 371 | ||
372 | for (i = max_dist; i >= 0; i--) { | 372 | for (i = max_dist; i >= 0; i--) { |
373 | this = first_in_list(&super->s_low_list[i]); | 373 | this = first_in_list(&super->s_low_list[i]); |
diff --git a/fs/logfs/inode.c b/fs/logfs/inode.c index 388df1aa35e5..a422f42238b2 100644 --- a/fs/logfs/inode.c +++ b/fs/logfs/inode.c | |||
@@ -286,7 +286,7 @@ static int logfs_write_inode(struct inode *inode, struct writeback_control *wbc) | |||
286 | if (logfs_inode(inode)->li_flags & LOGFS_IF_STILLBORN) | 286 | if (logfs_inode(inode)->li_flags & LOGFS_IF_STILLBORN) |
287 | return 0; | 287 | return 0; |
288 | 288 | ||
289 | ret = __logfs_write_inode(inode, flags); | 289 | ret = __logfs_write_inode(inode, NULL, flags); |
290 | LOGFS_BUG_ON(ret, inode->i_sb); | 290 | LOGFS_BUG_ON(ret, inode->i_sb); |
291 | return ret; | 291 | return ret; |
292 | } | 292 | } |
@@ -363,7 +363,9 @@ static void logfs_init_once(void *_li) | |||
363 | 363 | ||
364 | static int logfs_sync_fs(struct super_block *sb, int wait) | 364 | static int logfs_sync_fs(struct super_block *sb, int wait) |
365 | { | 365 | { |
366 | logfs_get_wblocks(sb, NULL, WF_LOCK); | ||
366 | logfs_write_anchor(sb); | 367 | logfs_write_anchor(sb); |
368 | logfs_put_wblocks(sb, NULL, WF_LOCK); | ||
367 | return 0; | 369 | return 0; |
368 | } | 370 | } |
369 | 371 | ||
diff --git a/fs/logfs/journal.c b/fs/logfs/journal.c index 9da29706f91c..1e1c369df22b 100644 --- a/fs/logfs/journal.c +++ b/fs/logfs/journal.c | |||
@@ -612,7 +612,6 @@ static size_t __logfs_write_je(struct super_block *sb, void *buf, u16 type, | |||
612 | if (len == 0) | 612 | if (len == 0) |
613 | return logfs_write_header(super, header, 0, type); | 613 | return logfs_write_header(super, header, 0, type); |
614 | 614 | ||
615 | BUG_ON(len > sb->s_blocksize); | ||
616 | compr_len = logfs_compress(buf, data, len, sb->s_blocksize); | 615 | compr_len = logfs_compress(buf, data, len, sb->s_blocksize); |
617 | if (compr_len < 0 || type == JE_ANCHOR) { | 616 | if (compr_len < 0 || type == JE_ANCHOR) { |
618 | memcpy(data, buf, len); | 617 | memcpy(data, buf, len); |
diff --git a/fs/logfs/logfs.h b/fs/logfs/logfs.h index 926373866a55..5f0937609465 100644 --- a/fs/logfs/logfs.h +++ b/fs/logfs/logfs.h | |||
@@ -528,7 +528,7 @@ void logfs_destroy_inode_cache(void); | |||
528 | void logfs_set_blocks(struct inode *inode, u64 no); | 528 | void logfs_set_blocks(struct inode *inode, u64 no); |
529 | /* these logically belong into inode.c but actually reside in readwrite.c */ | 529 | /* these logically belong into inode.c but actually reside in readwrite.c */ |
530 | int logfs_read_inode(struct inode *inode); | 530 | int logfs_read_inode(struct inode *inode); |
531 | int __logfs_write_inode(struct inode *inode, long flags); | 531 | int __logfs_write_inode(struct inode *inode, struct page *, long flags); |
532 | void logfs_evict_inode(struct inode *inode); | 532 | void logfs_evict_inode(struct inode *inode); |
533 | 533 | ||
534 | /* journal.c */ | 534 | /* journal.c */ |
@@ -577,6 +577,8 @@ void initialize_block_counters(struct page *page, struct logfs_block *block, | |||
577 | __be64 *array, int page_is_empty); | 577 | __be64 *array, int page_is_empty); |
578 | int logfs_exist_block(struct inode *inode, u64 bix); | 578 | int logfs_exist_block(struct inode *inode, u64 bix); |
579 | int get_page_reserve(struct inode *inode, struct page *page); | 579 | int get_page_reserve(struct inode *inode, struct page *page); |
580 | void logfs_get_wblocks(struct super_block *sb, struct page *page, int lock); | ||
581 | void logfs_put_wblocks(struct super_block *sb, struct page *page, int lock); | ||
580 | extern struct logfs_block_ops indirect_block_ops; | 582 | extern struct logfs_block_ops indirect_block_ops; |
581 | 583 | ||
582 | /* segment.c */ | 584 | /* segment.c */ |
@@ -594,6 +596,7 @@ int logfs_init_mapping(struct super_block *sb); | |||
594 | void logfs_sync_area(struct logfs_area *area); | 596 | void logfs_sync_area(struct logfs_area *area); |
595 | void logfs_sync_segments(struct super_block *sb); | 597 | void logfs_sync_segments(struct super_block *sb); |
596 | void freeseg(struct super_block *sb, u32 segno); | 598 | void freeseg(struct super_block *sb, u32 segno); |
599 | void free_areas(struct super_block *sb); | ||
597 | 600 | ||
598 | /* area handling */ | 601 | /* area handling */ |
599 | int logfs_init_areas(struct super_block *sb); | 602 | int logfs_init_areas(struct super_block *sb); |
diff --git a/fs/logfs/readwrite.c b/fs/logfs/readwrite.c index 2ac4217b7901..4153e65b0148 100644 --- a/fs/logfs/readwrite.c +++ b/fs/logfs/readwrite.c | |||
@@ -244,8 +244,7 @@ static void preunlock_page(struct super_block *sb, struct page *page, int lock) | |||
244 | * is waiting for s_write_mutex. We annotate this fact by setting PG_pre_locked | 244 | * is waiting for s_write_mutex. We annotate this fact by setting PG_pre_locked |
245 | * in addition to PG_locked. | 245 | * in addition to PG_locked. |
246 | */ | 246 | */ |
247 | static void logfs_get_wblocks(struct super_block *sb, struct page *page, | 247 | void logfs_get_wblocks(struct super_block *sb, struct page *page, int lock) |
248 | int lock) | ||
249 | { | 248 | { |
250 | struct logfs_super *super = logfs_super(sb); | 249 | struct logfs_super *super = logfs_super(sb); |
251 | 250 | ||
@@ -260,8 +259,7 @@ static void logfs_get_wblocks(struct super_block *sb, struct page *page, | |||
260 | } | 259 | } |
261 | } | 260 | } |
262 | 261 | ||
263 | static void logfs_put_wblocks(struct super_block *sb, struct page *page, | 262 | void logfs_put_wblocks(struct super_block *sb, struct page *page, int lock) |
264 | int lock) | ||
265 | { | 263 | { |
266 | struct logfs_super *super = logfs_super(sb); | 264 | struct logfs_super *super = logfs_super(sb); |
267 | 265 | ||
@@ -424,7 +422,7 @@ static void inode_write_block(struct logfs_block *block) | |||
424 | if (inode->i_ino == LOGFS_INO_MASTER) | 422 | if (inode->i_ino == LOGFS_INO_MASTER) |
425 | logfs_write_anchor(inode->i_sb); | 423 | logfs_write_anchor(inode->i_sb); |
426 | else { | 424 | else { |
427 | ret = __logfs_write_inode(inode, 0); | 425 | ret = __logfs_write_inode(inode, NULL, 0); |
428 | /* see indirect_write_block comment */ | 426 | /* see indirect_write_block comment */ |
429 | BUG_ON(ret); | 427 | BUG_ON(ret); |
430 | } | 428 | } |
@@ -560,8 +558,13 @@ static void inode_free_block(struct super_block *sb, struct logfs_block *block) | |||
560 | static void indirect_free_block(struct super_block *sb, | 558 | static void indirect_free_block(struct super_block *sb, |
561 | struct logfs_block *block) | 559 | struct logfs_block *block) |
562 | { | 560 | { |
563 | ClearPagePrivate(block->page); | 561 | struct page *page = block->page; |
564 | block->page->private = 0; | 562 | |
563 | if (PagePrivate(page)) { | ||
564 | ClearPagePrivate(page); | ||
565 | page_cache_release(page); | ||
566 | set_page_private(page, 0); | ||
567 | } | ||
565 | __free_block(sb, block); | 568 | __free_block(sb, block); |
566 | } | 569 | } |
567 | 570 | ||
@@ -650,8 +653,11 @@ static void alloc_data_block(struct inode *inode, struct page *page) | |||
650 | logfs_unpack_index(page->index, &bix, &level); | 653 | logfs_unpack_index(page->index, &bix, &level); |
651 | block = __alloc_block(inode->i_sb, inode->i_ino, bix, level); | 654 | block = __alloc_block(inode->i_sb, inode->i_ino, bix, level); |
652 | block->page = page; | 655 | block->page = page; |
656 | |||
653 | SetPagePrivate(page); | 657 | SetPagePrivate(page); |
654 | page->private = (unsigned long)block; | 658 | page_cache_get(page); |
659 | set_page_private(page, (unsigned long) block); | ||
660 | |||
655 | block->ops = &indirect_block_ops; | 661 | block->ops = &indirect_block_ops; |
656 | } | 662 | } |
657 | 663 | ||
@@ -1570,11 +1576,15 @@ int logfs_write_buf(struct inode *inode, struct page *page, long flags) | |||
1570 | static int __logfs_delete(struct inode *inode, struct page *page) | 1576 | static int __logfs_delete(struct inode *inode, struct page *page) |
1571 | { | 1577 | { |
1572 | long flags = WF_DELETE; | 1578 | long flags = WF_DELETE; |
1579 | int err; | ||
1573 | 1580 | ||
1574 | inode->i_ctime = inode->i_mtime = CURRENT_TIME; | 1581 | inode->i_ctime = inode->i_mtime = CURRENT_TIME; |
1575 | 1582 | ||
1576 | if (page->index < I0_BLOCKS) | 1583 | if (page->index < I0_BLOCKS) |
1577 | return logfs_write_direct(inode, page, flags); | 1584 | return logfs_write_direct(inode, page, flags); |
1585 | err = grow_inode(inode, page->index, 0); | ||
1586 | if (err) | ||
1587 | return err; | ||
1578 | return logfs_write_rec(inode, page, page->index, 0, flags); | 1588 | return logfs_write_rec(inode, page, page->index, 0, flags); |
1579 | } | 1589 | } |
1580 | 1590 | ||
@@ -1623,7 +1633,7 @@ int logfs_rewrite_block(struct inode *inode, u64 bix, u64 ofs, | |||
1623 | if (inode->i_ino == LOGFS_INO_MASTER) | 1633 | if (inode->i_ino == LOGFS_INO_MASTER) |
1624 | logfs_write_anchor(inode->i_sb); | 1634 | logfs_write_anchor(inode->i_sb); |
1625 | else { | 1635 | else { |
1626 | err = __logfs_write_inode(inode, flags); | 1636 | err = __logfs_write_inode(inode, page, flags); |
1627 | } | 1637 | } |
1628 | } | 1638 | } |
1629 | } | 1639 | } |
@@ -1873,7 +1883,7 @@ int logfs_truncate(struct inode *inode, u64 target) | |||
1873 | logfs_get_wblocks(sb, NULL, 1); | 1883 | logfs_get_wblocks(sb, NULL, 1); |
1874 | err = __logfs_truncate(inode, size); | 1884 | err = __logfs_truncate(inode, size); |
1875 | if (!err) | 1885 | if (!err) |
1876 | err = __logfs_write_inode(inode, 0); | 1886 | err = __logfs_write_inode(inode, NULL, 0); |
1877 | logfs_put_wblocks(sb, NULL, 1); | 1887 | logfs_put_wblocks(sb, NULL, 1); |
1878 | } | 1888 | } |
1879 | 1889 | ||
@@ -1901,8 +1911,11 @@ static void move_page_to_inode(struct inode *inode, struct page *page) | |||
1901 | li->li_block = block; | 1911 | li->li_block = block; |
1902 | 1912 | ||
1903 | block->page = NULL; | 1913 | block->page = NULL; |
1904 | page->private = 0; | 1914 | if (PagePrivate(page)) { |
1905 | ClearPagePrivate(page); | 1915 | ClearPagePrivate(page); |
1916 | page_cache_release(page); | ||
1917 | set_page_private(page, 0); | ||
1918 | } | ||
1906 | } | 1919 | } |
1907 | 1920 | ||
1908 | static void move_inode_to_page(struct page *page, struct inode *inode) | 1921 | static void move_inode_to_page(struct page *page, struct inode *inode) |
@@ -1918,8 +1931,12 @@ static void move_inode_to_page(struct page *page, struct inode *inode) | |||
1918 | BUG_ON(PagePrivate(page)); | 1931 | BUG_ON(PagePrivate(page)); |
1919 | block->ops = &indirect_block_ops; | 1932 | block->ops = &indirect_block_ops; |
1920 | block->page = page; | 1933 | block->page = page; |
1921 | page->private = (unsigned long)block; | 1934 | |
1922 | SetPagePrivate(page); | 1935 | if (!PagePrivate(page)) { |
1936 | SetPagePrivate(page); | ||
1937 | page_cache_get(page); | ||
1938 | set_page_private(page, (unsigned long) block); | ||
1939 | } | ||
1923 | 1940 | ||
1924 | block->inode = NULL; | 1941 | block->inode = NULL; |
1925 | li->li_block = NULL; | 1942 | li->li_block = NULL; |
@@ -2106,14 +2123,14 @@ void logfs_set_segment_unreserved(struct super_block *sb, u32 segno, u32 ec) | |||
2106 | ec_level); | 2123 | ec_level); |
2107 | } | 2124 | } |
2108 | 2125 | ||
2109 | int __logfs_write_inode(struct inode *inode, long flags) | 2126 | int __logfs_write_inode(struct inode *inode, struct page *page, long flags) |
2110 | { | 2127 | { |
2111 | struct super_block *sb = inode->i_sb; | 2128 | struct super_block *sb = inode->i_sb; |
2112 | int ret; | 2129 | int ret; |
2113 | 2130 | ||
2114 | logfs_get_wblocks(sb, NULL, flags & WF_LOCK); | 2131 | logfs_get_wblocks(sb, page, flags & WF_LOCK); |
2115 | ret = do_write_inode(inode); | 2132 | ret = do_write_inode(inode); |
2116 | logfs_put_wblocks(sb, NULL, flags & WF_LOCK); | 2133 | logfs_put_wblocks(sb, page, flags & WF_LOCK); |
2117 | return ret; | 2134 | return ret; |
2118 | } | 2135 | } |
2119 | 2136 | ||
diff --git a/fs/logfs/segment.c b/fs/logfs/segment.c index 9d5187353255..ab798ed1cc88 100644 --- a/fs/logfs/segment.c +++ b/fs/logfs/segment.c | |||
@@ -86,7 +86,11 @@ int __logfs_buf_write(struct logfs_area *area, u64 ofs, void *buf, size_t len, | |||
86 | BUG_ON(!page); /* FIXME: reserve a pool */ | 86 | BUG_ON(!page); /* FIXME: reserve a pool */ |
87 | SetPageUptodate(page); | 87 | SetPageUptodate(page); |
88 | memcpy(page_address(page) + offset, buf, copylen); | 88 | memcpy(page_address(page) + offset, buf, copylen); |
89 | SetPagePrivate(page); | 89 | |
90 | if (!PagePrivate(page)) { | ||
91 | SetPagePrivate(page); | ||
92 | page_cache_get(page); | ||
93 | } | ||
90 | page_cache_release(page); | 94 | page_cache_release(page); |
91 | 95 | ||
92 | buf += copylen; | 96 | buf += copylen; |
@@ -110,7 +114,10 @@ static void pad_partial_page(struct logfs_area *area) | |||
110 | page = get_mapping_page(sb, index, 0); | 114 | page = get_mapping_page(sb, index, 0); |
111 | BUG_ON(!page); /* FIXME: reserve a pool */ | 115 | BUG_ON(!page); /* FIXME: reserve a pool */ |
112 | memset(page_address(page) + offset, 0xff, len); | 116 | memset(page_address(page) + offset, 0xff, len); |
113 | SetPagePrivate(page); | 117 | if (!PagePrivate(page)) { |
118 | SetPagePrivate(page); | ||
119 | page_cache_get(page); | ||
120 | } | ||
114 | page_cache_release(page); | 121 | page_cache_release(page); |
115 | } | 122 | } |
116 | } | 123 | } |
@@ -130,7 +137,10 @@ static void pad_full_pages(struct logfs_area *area) | |||
130 | BUG_ON(!page); /* FIXME: reserve a pool */ | 137 | BUG_ON(!page); /* FIXME: reserve a pool */ |
131 | SetPageUptodate(page); | 138 | SetPageUptodate(page); |
132 | memset(page_address(page), 0xff, PAGE_CACHE_SIZE); | 139 | memset(page_address(page), 0xff, PAGE_CACHE_SIZE); |
133 | SetPagePrivate(page); | 140 | if (!PagePrivate(page)) { |
141 | SetPagePrivate(page); | ||
142 | page_cache_get(page); | ||
143 | } | ||
134 | page_cache_release(page); | 144 | page_cache_release(page); |
135 | index++; | 145 | index++; |
136 | no_indizes--; | 146 | no_indizes--; |
@@ -485,8 +495,12 @@ static void move_btree_to_page(struct inode *inode, struct page *page, | |||
485 | mempool_free(item, super->s_alias_pool); | 495 | mempool_free(item, super->s_alias_pool); |
486 | } | 496 | } |
487 | block->page = page; | 497 | block->page = page; |
488 | SetPagePrivate(page); | 498 | |
489 | page->private = (unsigned long)block; | 499 | if (!PagePrivate(page)) { |
500 | SetPagePrivate(page); | ||
501 | page_cache_get(page); | ||
502 | set_page_private(page, (unsigned long) block); | ||
503 | } | ||
490 | block->ops = &indirect_block_ops; | 504 | block->ops = &indirect_block_ops; |
491 | initialize_block_counters(page, block, data, 0); | 505 | initialize_block_counters(page, block, data, 0); |
492 | } | 506 | } |
@@ -536,8 +550,12 @@ void move_page_to_btree(struct page *page) | |||
536 | list_add(&item->list, &block->item_list); | 550 | list_add(&item->list, &block->item_list); |
537 | } | 551 | } |
538 | block->page = NULL; | 552 | block->page = NULL; |
539 | ClearPagePrivate(page); | 553 | |
540 | page->private = 0; | 554 | if (PagePrivate(page)) { |
555 | ClearPagePrivate(page); | ||
556 | page_cache_release(page); | ||
557 | set_page_private(page, 0); | ||
558 | } | ||
541 | block->ops = &btree_block_ops; | 559 | block->ops = &btree_block_ops; |
542 | err = alias_tree_insert(block->sb, block->ino, block->bix, block->level, | 560 | err = alias_tree_insert(block->sb, block->ino, block->bix, block->level, |
543 | block); | 561 | block); |
@@ -702,7 +720,10 @@ void freeseg(struct super_block *sb, u32 segno) | |||
702 | page = find_get_page(mapping, ofs >> PAGE_SHIFT); | 720 | page = find_get_page(mapping, ofs >> PAGE_SHIFT); |
703 | if (!page) | 721 | if (!page) |
704 | continue; | 722 | continue; |
705 | ClearPagePrivate(page); | 723 | if (PagePrivate(page)) { |
724 | ClearPagePrivate(page); | ||
725 | page_cache_release(page); | ||
726 | } | ||
706 | page_cache_release(page); | 727 | page_cache_release(page); |
707 | } | 728 | } |
708 | } | 729 | } |
@@ -841,6 +862,16 @@ static void free_area(struct logfs_area *area) | |||
841 | kfree(area); | 862 | kfree(area); |
842 | } | 863 | } |
843 | 864 | ||
865 | void free_areas(struct super_block *sb) | ||
866 | { | ||
867 | struct logfs_super *super = logfs_super(sb); | ||
868 | int i; | ||
869 | |||
870 | for_each_area(i) | ||
871 | free_area(super->s_area[i]); | ||
872 | free_area(super->s_journal_area); | ||
873 | } | ||
874 | |||
844 | static struct logfs_area *alloc_area(struct super_block *sb) | 875 | static struct logfs_area *alloc_area(struct super_block *sb) |
845 | { | 876 | { |
846 | struct logfs_area *area; | 877 | struct logfs_area *area; |
@@ -923,10 +954,6 @@ err: | |||
923 | void logfs_cleanup_areas(struct super_block *sb) | 954 | void logfs_cleanup_areas(struct super_block *sb) |
924 | { | 955 | { |
925 | struct logfs_super *super = logfs_super(sb); | 956 | struct logfs_super *super = logfs_super(sb); |
926 | int i; | ||
927 | 957 | ||
928 | btree_grim_visitor128(&super->s_object_alias_tree, 0, kill_alias); | 958 | btree_grim_visitor128(&super->s_object_alias_tree, 0, kill_alias); |
929 | for_each_area(i) | ||
930 | free_area(super->s_area[i]); | ||
931 | free_area(super->s_journal_area); | ||
932 | } | 959 | } |
diff --git a/fs/logfs/super.c b/fs/logfs/super.c index e795c234ea33..c9ee7f5d1caf 100644 --- a/fs/logfs/super.c +++ b/fs/logfs/super.c | |||
@@ -486,14 +486,15 @@ static void logfs_kill_sb(struct super_block *sb) | |||
486 | /* Alias entries slow down mount, so evict as many as possible */ | 486 | /* Alias entries slow down mount, so evict as many as possible */ |
487 | sync_filesystem(sb); | 487 | sync_filesystem(sb); |
488 | logfs_write_anchor(sb); | 488 | logfs_write_anchor(sb); |
489 | free_areas(sb); | ||
489 | 490 | ||
490 | /* | 491 | /* |
491 | * From this point on alias entries are simply dropped - and any | 492 | * From this point on alias entries are simply dropped - and any |
492 | * writes to the object store are considered bugs. | 493 | * writes to the object store are considered bugs. |
493 | */ | 494 | */ |
494 | super->s_flags |= LOGFS_SB_FLAG_SHUTDOWN; | ||
495 | log_super("LogFS: Now in shutdown\n"); | 495 | log_super("LogFS: Now in shutdown\n"); |
496 | generic_shutdown_super(sb); | 496 | generic_shutdown_super(sb); |
497 | super->s_flags |= LOGFS_SB_FLAG_SHUTDOWN; | ||
497 | 498 | ||
498 | BUG_ON(super->s_dirty_used_bytes || super->s_dirty_free_bytes); | 499 | BUG_ON(super->s_dirty_used_bytes || super->s_dirty_free_bytes); |
499 | 500 | ||
diff --git a/fs/mpage.c b/fs/mpage.c index fdfae9fa98cd..643e9f55ef29 100644 --- a/fs/mpage.c +++ b/fs/mpage.c | |||
@@ -371,9 +371,6 @@ mpage_readpages(struct address_space *mapping, struct list_head *pages, | |||
371 | sector_t last_block_in_bio = 0; | 371 | sector_t last_block_in_bio = 0; |
372 | struct buffer_head map_bh; | 372 | struct buffer_head map_bh; |
373 | unsigned long first_logical_block = 0; | 373 | unsigned long first_logical_block = 0; |
374 | struct blk_plug plug; | ||
375 | |||
376 | blk_start_plug(&plug); | ||
377 | 374 | ||
378 | map_bh.b_state = 0; | 375 | map_bh.b_state = 0; |
379 | map_bh.b_size = 0; | 376 | map_bh.b_size = 0; |
@@ -395,7 +392,6 @@ mpage_readpages(struct address_space *mapping, struct list_head *pages, | |||
395 | BUG_ON(!list_empty(pages)); | 392 | BUG_ON(!list_empty(pages)); |
396 | if (bio) | 393 | if (bio) |
397 | mpage_bio_submit(READ, bio); | 394 | mpage_bio_submit(READ, bio); |
398 | blk_finish_plug(&plug); | ||
399 | return 0; | 395 | return 0; |
400 | } | 396 | } |
401 | EXPORT_SYMBOL(mpage_readpages); | 397 | EXPORT_SYMBOL(mpage_readpages); |
diff --git a/fs/namei.c b/fs/namei.c index c283a1ec008e..208c6aa4a989 100644 --- a/fs/namei.c +++ b/fs/namei.c | |||
@@ -140,21 +140,19 @@ static int do_getname(const char __user *filename, char *page) | |||
140 | 140 | ||
141 | static char *getname_flags(const char __user *filename, int flags, int *empty) | 141 | static char *getname_flags(const char __user *filename, int flags, int *empty) |
142 | { | 142 | { |
143 | char *tmp, *result; | 143 | char *result = __getname(); |
144 | 144 | int retval; | |
145 | result = ERR_PTR(-ENOMEM); | 145 | |
146 | tmp = __getname(); | 146 | if (!result) |
147 | if (tmp) { | 147 | return ERR_PTR(-ENOMEM); |
148 | int retval = do_getname(filename, tmp); | 148 | |
149 | 149 | retval = do_getname(filename, result); | |
150 | result = tmp; | 150 | if (retval < 0) { |
151 | if (retval < 0) { | 151 | if (retval == -ENOENT && empty) |
152 | if (retval == -ENOENT && empty) | 152 | *empty = 1; |
153 | *empty = 1; | 153 | if (retval != -ENOENT || !(flags & LOOKUP_EMPTY)) { |
154 | if (retval != -ENOENT || !(flags & LOOKUP_EMPTY)) { | 154 | __putname(result); |
155 | __putname(tmp); | 155 | return ERR_PTR(retval); |
156 | result = ERR_PTR(retval); | ||
157 | } | ||
158 | } | 156 | } |
159 | } | 157 | } |
160 | audit_getname(result); | 158 | audit_getname(result); |
diff --git a/fs/nfs/blocklayout/blocklayout.c b/fs/nfs/blocklayout/blocklayout.c index 281ae95932c9..48cfac31f64c 100644 --- a/fs/nfs/blocklayout/blocklayout.c +++ b/fs/nfs/blocklayout/blocklayout.c | |||
@@ -90,9 +90,9 @@ static int is_writable(struct pnfs_block_extent *be, sector_t isect) | |||
90 | */ | 90 | */ |
91 | struct parallel_io { | 91 | struct parallel_io { |
92 | struct kref refcnt; | 92 | struct kref refcnt; |
93 | struct rpc_call_ops call_ops; | 93 | void (*pnfs_callback) (void *data, int num_se); |
94 | void (*pnfs_callback) (void *data); | ||
95 | void *data; | 94 | void *data; |
95 | int bse_count; | ||
96 | }; | 96 | }; |
97 | 97 | ||
98 | static inline struct parallel_io *alloc_parallel(void *data) | 98 | static inline struct parallel_io *alloc_parallel(void *data) |
@@ -103,6 +103,7 @@ static inline struct parallel_io *alloc_parallel(void *data) | |||
103 | if (rv) { | 103 | if (rv) { |
104 | rv->data = data; | 104 | rv->data = data; |
105 | kref_init(&rv->refcnt); | 105 | kref_init(&rv->refcnt); |
106 | rv->bse_count = 0; | ||
106 | } | 107 | } |
107 | return rv; | 108 | return rv; |
108 | } | 109 | } |
@@ -117,7 +118,7 @@ static void destroy_parallel(struct kref *kref) | |||
117 | struct parallel_io *p = container_of(kref, struct parallel_io, refcnt); | 118 | struct parallel_io *p = container_of(kref, struct parallel_io, refcnt); |
118 | 119 | ||
119 | dprintk("%s enter\n", __func__); | 120 | dprintk("%s enter\n", __func__); |
120 | p->pnfs_callback(p->data); | 121 | p->pnfs_callback(p->data, p->bse_count); |
121 | kfree(p); | 122 | kfree(p); |
122 | } | 123 | } |
123 | 124 | ||
@@ -146,14 +147,19 @@ static struct bio *bl_alloc_init_bio(int npg, sector_t isect, | |||
146 | { | 147 | { |
147 | struct bio *bio; | 148 | struct bio *bio; |
148 | 149 | ||
150 | npg = min(npg, BIO_MAX_PAGES); | ||
149 | bio = bio_alloc(GFP_NOIO, npg); | 151 | bio = bio_alloc(GFP_NOIO, npg); |
150 | if (!bio) | 152 | if (!bio && (current->flags & PF_MEMALLOC)) { |
151 | return NULL; | 153 | while (!bio && (npg /= 2)) |
154 | bio = bio_alloc(GFP_NOIO, npg); | ||
155 | } | ||
152 | 156 | ||
153 | bio->bi_sector = isect - be->be_f_offset + be->be_v_offset; | 157 | if (bio) { |
154 | bio->bi_bdev = be->be_mdev; | 158 | bio->bi_sector = isect - be->be_f_offset + be->be_v_offset; |
155 | bio->bi_end_io = end_io; | 159 | bio->bi_bdev = be->be_mdev; |
156 | bio->bi_private = par; | 160 | bio->bi_end_io = end_io; |
161 | bio->bi_private = par; | ||
162 | } | ||
157 | return bio; | 163 | return bio; |
158 | } | 164 | } |
159 | 165 | ||
@@ -212,22 +218,15 @@ static void bl_read_cleanup(struct work_struct *work) | |||
212 | } | 218 | } |
213 | 219 | ||
214 | static void | 220 | static void |
215 | bl_end_par_io_read(void *data) | 221 | bl_end_par_io_read(void *data, int unused) |
216 | { | 222 | { |
217 | struct nfs_read_data *rdata = data; | 223 | struct nfs_read_data *rdata = data; |
218 | 224 | ||
225 | rdata->task.tk_status = rdata->pnfs_error; | ||
219 | INIT_WORK(&rdata->task.u.tk_work, bl_read_cleanup); | 226 | INIT_WORK(&rdata->task.u.tk_work, bl_read_cleanup); |
220 | schedule_work(&rdata->task.u.tk_work); | 227 | schedule_work(&rdata->task.u.tk_work); |
221 | } | 228 | } |
222 | 229 | ||
223 | /* We don't want normal .rpc_call_done callback used, so we replace it | ||
224 | * with this stub. | ||
225 | */ | ||
226 | static void bl_rpc_do_nothing(struct rpc_task *task, void *calldata) | ||
227 | { | ||
228 | return; | ||
229 | } | ||
230 | |||
231 | static enum pnfs_try_status | 230 | static enum pnfs_try_status |
232 | bl_read_pagelist(struct nfs_read_data *rdata) | 231 | bl_read_pagelist(struct nfs_read_data *rdata) |
233 | { | 232 | { |
@@ -247,8 +246,6 @@ bl_read_pagelist(struct nfs_read_data *rdata) | |||
247 | par = alloc_parallel(rdata); | 246 | par = alloc_parallel(rdata); |
248 | if (!par) | 247 | if (!par) |
249 | goto use_mds; | 248 | goto use_mds; |
250 | par->call_ops = *rdata->mds_ops; | ||
251 | par->call_ops.rpc_call_done = bl_rpc_do_nothing; | ||
252 | par->pnfs_callback = bl_end_par_io_read; | 249 | par->pnfs_callback = bl_end_par_io_read; |
253 | /* At this point, we can no longer jump to use_mds */ | 250 | /* At this point, we can no longer jump to use_mds */ |
254 | 251 | ||
@@ -322,6 +319,7 @@ static void mark_extents_written(struct pnfs_block_layout *bl, | |||
322 | { | 319 | { |
323 | sector_t isect, end; | 320 | sector_t isect, end; |
324 | struct pnfs_block_extent *be; | 321 | struct pnfs_block_extent *be; |
322 | struct pnfs_block_short_extent *se; | ||
325 | 323 | ||
326 | dprintk("%s(%llu, %u)\n", __func__, offset, count); | 324 | dprintk("%s(%llu, %u)\n", __func__, offset, count); |
327 | if (count == 0) | 325 | if (count == 0) |
@@ -334,8 +332,11 @@ static void mark_extents_written(struct pnfs_block_layout *bl, | |||
334 | be = bl_find_get_extent(bl, isect, NULL); | 332 | be = bl_find_get_extent(bl, isect, NULL); |
335 | BUG_ON(!be); /* FIXME */ | 333 | BUG_ON(!be); /* FIXME */ |
336 | len = min(end, be->be_f_offset + be->be_length) - isect; | 334 | len = min(end, be->be_f_offset + be->be_length) - isect; |
337 | if (be->be_state == PNFS_BLOCK_INVALID_DATA) | 335 | if (be->be_state == PNFS_BLOCK_INVALID_DATA) { |
338 | bl_mark_for_commit(be, isect, len); /* What if fails? */ | 336 | se = bl_pop_one_short_extent(be->be_inval); |
337 | BUG_ON(!se); | ||
338 | bl_mark_for_commit(be, isect, len, se); | ||
339 | } | ||
339 | isect += len; | 340 | isect += len; |
340 | bl_put_extent(be); | 341 | bl_put_extent(be); |
341 | } | 342 | } |
@@ -357,7 +358,8 @@ static void bl_end_io_write_zero(struct bio *bio, int err) | |||
357 | end_page_writeback(page); | 358 | end_page_writeback(page); |
358 | page_cache_release(page); | 359 | page_cache_release(page); |
359 | } while (bvec >= bio->bi_io_vec); | 360 | } while (bvec >= bio->bi_io_vec); |
360 | if (!uptodate) { | 361 | |
362 | if (unlikely(!uptodate)) { | ||
361 | if (!wdata->pnfs_error) | 363 | if (!wdata->pnfs_error) |
362 | wdata->pnfs_error = -EIO; | 364 | wdata->pnfs_error = -EIO; |
363 | pnfs_set_lo_fail(wdata->lseg); | 365 | pnfs_set_lo_fail(wdata->lseg); |
@@ -366,7 +368,6 @@ static void bl_end_io_write_zero(struct bio *bio, int err) | |||
366 | put_parallel(par); | 368 | put_parallel(par); |
367 | } | 369 | } |
368 | 370 | ||
369 | /* This is basically copied from mpage_end_io_read */ | ||
370 | static void bl_end_io_write(struct bio *bio, int err) | 371 | static void bl_end_io_write(struct bio *bio, int err) |
371 | { | 372 | { |
372 | struct parallel_io *par = bio->bi_private; | 373 | struct parallel_io *par = bio->bi_private; |
@@ -392,7 +393,7 @@ static void bl_write_cleanup(struct work_struct *work) | |||
392 | dprintk("%s enter\n", __func__); | 393 | dprintk("%s enter\n", __func__); |
393 | task = container_of(work, struct rpc_task, u.tk_work); | 394 | task = container_of(work, struct rpc_task, u.tk_work); |
394 | wdata = container_of(task, struct nfs_write_data, task); | 395 | wdata = container_of(task, struct nfs_write_data, task); |
395 | if (!wdata->pnfs_error) { | 396 | if (likely(!wdata->pnfs_error)) { |
396 | /* Marks for LAYOUTCOMMIT */ | 397 | /* Marks for LAYOUTCOMMIT */ |
397 | mark_extents_written(BLK_LSEG2EXT(wdata->lseg), | 398 | mark_extents_written(BLK_LSEG2EXT(wdata->lseg), |
398 | wdata->args.offset, wdata->args.count); | 399 | wdata->args.offset, wdata->args.count); |
@@ -401,11 +402,16 @@ static void bl_write_cleanup(struct work_struct *work) | |||
401 | } | 402 | } |
402 | 403 | ||
403 | /* Called when last of bios associated with a bl_write_pagelist call finishes */ | 404 | /* Called when last of bios associated with a bl_write_pagelist call finishes */ |
404 | static void bl_end_par_io_write(void *data) | 405 | static void bl_end_par_io_write(void *data, int num_se) |
405 | { | 406 | { |
406 | struct nfs_write_data *wdata = data; | 407 | struct nfs_write_data *wdata = data; |
407 | 408 | ||
408 | wdata->task.tk_status = 0; | 409 | if (unlikely(wdata->pnfs_error)) { |
410 | bl_free_short_extents(&BLK_LSEG2EXT(wdata->lseg)->bl_inval, | ||
411 | num_se); | ||
412 | } | ||
413 | |||
414 | wdata->task.tk_status = wdata->pnfs_error; | ||
409 | wdata->verf.committed = NFS_FILE_SYNC; | 415 | wdata->verf.committed = NFS_FILE_SYNC; |
410 | INIT_WORK(&wdata->task.u.tk_work, bl_write_cleanup); | 416 | INIT_WORK(&wdata->task.u.tk_work, bl_write_cleanup); |
411 | schedule_work(&wdata->task.u.tk_work); | 417 | schedule_work(&wdata->task.u.tk_work); |
@@ -484,6 +490,55 @@ cleanup: | |||
484 | return ret; | 490 | return ret; |
485 | } | 491 | } |
486 | 492 | ||
493 | /* Find or create a zeroing page marked being writeback. | ||
494 | * Return ERR_PTR on error, NULL to indicate skip this page and page itself | ||
495 | * to indicate write out. | ||
496 | */ | ||
497 | static struct page * | ||
498 | bl_find_get_zeroing_page(struct inode *inode, pgoff_t index, | ||
499 | struct pnfs_block_extent *cow_read) | ||
500 | { | ||
501 | struct page *page; | ||
502 | int locked = 0; | ||
503 | page = find_get_page(inode->i_mapping, index); | ||
504 | if (page) | ||
505 | goto check_page; | ||
506 | |||
507 | page = find_or_create_page(inode->i_mapping, index, GFP_NOFS); | ||
508 | if (unlikely(!page)) { | ||
509 | dprintk("%s oom\n", __func__); | ||
510 | return ERR_PTR(-ENOMEM); | ||
511 | } | ||
512 | locked = 1; | ||
513 | |||
514 | check_page: | ||
515 | /* PageDirty: Other will write this out | ||
516 | * PageWriteback: Other is writing this out | ||
517 | * PageUptodate: It was read before | ||
518 | */ | ||
519 | if (PageDirty(page) || PageWriteback(page)) { | ||
520 | print_page(page); | ||
521 | if (locked) | ||
522 | unlock_page(page); | ||
523 | page_cache_release(page); | ||
524 | return NULL; | ||
525 | } | ||
526 | |||
527 | if (!locked) { | ||
528 | lock_page(page); | ||
529 | locked = 1; | ||
530 | goto check_page; | ||
531 | } | ||
532 | if (!PageUptodate(page)) { | ||
533 | /* New page, readin or zero it */ | ||
534 | init_page_for_write(page, cow_read); | ||
535 | } | ||
536 | set_page_writeback(page); | ||
537 | unlock_page(page); | ||
538 | |||
539 | return page; | ||
540 | } | ||
541 | |||
487 | static enum pnfs_try_status | 542 | static enum pnfs_try_status |
488 | bl_write_pagelist(struct nfs_write_data *wdata, int sync) | 543 | bl_write_pagelist(struct nfs_write_data *wdata, int sync) |
489 | { | 544 | { |
@@ -508,9 +563,7 @@ bl_write_pagelist(struct nfs_write_data *wdata, int sync) | |||
508 | */ | 563 | */ |
509 | par = alloc_parallel(wdata); | 564 | par = alloc_parallel(wdata); |
510 | if (!par) | 565 | if (!par) |
511 | return PNFS_NOT_ATTEMPTED; | 566 | goto out_mds; |
512 | par->call_ops = *wdata->mds_ops; | ||
513 | par->call_ops.rpc_call_done = bl_rpc_do_nothing; | ||
514 | par->pnfs_callback = bl_end_par_io_write; | 567 | par->pnfs_callback = bl_end_par_io_write; |
515 | /* At this point, have to be more careful with error handling */ | 568 | /* At this point, have to be more careful with error handling */ |
516 | 569 | ||
@@ -518,12 +571,15 @@ bl_write_pagelist(struct nfs_write_data *wdata, int sync) | |||
518 | be = bl_find_get_extent(BLK_LSEG2EXT(wdata->lseg), isect, &cow_read); | 571 | be = bl_find_get_extent(BLK_LSEG2EXT(wdata->lseg), isect, &cow_read); |
519 | if (!be || !is_writable(be, isect)) { | 572 | if (!be || !is_writable(be, isect)) { |
520 | dprintk("%s no matching extents!\n", __func__); | 573 | dprintk("%s no matching extents!\n", __func__); |
521 | wdata->pnfs_error = -EINVAL; | 574 | goto out_mds; |
522 | goto out; | ||
523 | } | 575 | } |
524 | 576 | ||
525 | /* First page inside INVALID extent */ | 577 | /* First page inside INVALID extent */ |
526 | if (be->be_state == PNFS_BLOCK_INVALID_DATA) { | 578 | if (be->be_state == PNFS_BLOCK_INVALID_DATA) { |
579 | if (likely(!bl_push_one_short_extent(be->be_inval))) | ||
580 | par->bse_count++; | ||
581 | else | ||
582 | goto out_mds; | ||
527 | temp = offset >> PAGE_CACHE_SHIFT; | 583 | temp = offset >> PAGE_CACHE_SHIFT; |
528 | npg_zero = do_div(temp, npg_per_block); | 584 | npg_zero = do_div(temp, npg_per_block); |
529 | isect = (sector_t) (((offset - npg_zero * PAGE_CACHE_SIZE) & | 585 | isect = (sector_t) (((offset - npg_zero * PAGE_CACHE_SIZE) & |
@@ -543,36 +599,16 @@ fill_invalid_ext: | |||
543 | dprintk("%s zero %dth page: index %lu isect %llu\n", | 599 | dprintk("%s zero %dth page: index %lu isect %llu\n", |
544 | __func__, npg_zero, index, | 600 | __func__, npg_zero, index, |
545 | (unsigned long long)isect); | 601 | (unsigned long long)isect); |
546 | page = | 602 | page = bl_find_get_zeroing_page(wdata->inode, index, |
547 | find_or_create_page(wdata->inode->i_mapping, index, | 603 | cow_read); |
548 | GFP_NOFS); | 604 | if (unlikely(IS_ERR(page))) { |
549 | if (!page) { | 605 | wdata->pnfs_error = PTR_ERR(page); |
550 | dprintk("%s oom\n", __func__); | ||
551 | wdata->pnfs_error = -ENOMEM; | ||
552 | goto out; | 606 | goto out; |
553 | } | 607 | } else if (page == NULL) |
554 | |||
555 | /* PageDirty: Other will write this out | ||
556 | * PageWriteback: Other is writing this out | ||
557 | * PageUptodate: It was read before | ||
558 | * sector_initialized: already written out | ||
559 | */ | ||
560 | if (PageDirty(page) || PageWriteback(page)) { | ||
561 | print_page(page); | ||
562 | unlock_page(page); | ||
563 | page_cache_release(page); | ||
564 | goto next_page; | 608 | goto next_page; |
565 | } | ||
566 | if (!PageUptodate(page)) { | ||
567 | /* New page, readin or zero it */ | ||
568 | init_page_for_write(page, cow_read); | ||
569 | } | ||
570 | set_page_writeback(page); | ||
571 | unlock_page(page); | ||
572 | 609 | ||
573 | ret = bl_mark_sectors_init(be->be_inval, isect, | 610 | ret = bl_mark_sectors_init(be->be_inval, isect, |
574 | PAGE_CACHE_SECTORS, | 611 | PAGE_CACHE_SECTORS); |
575 | NULL); | ||
576 | if (unlikely(ret)) { | 612 | if (unlikely(ret)) { |
577 | dprintk("%s bl_mark_sectors_init fail %d\n", | 613 | dprintk("%s bl_mark_sectors_init fail %d\n", |
578 | __func__, ret); | 614 | __func__, ret); |
@@ -581,6 +617,19 @@ fill_invalid_ext: | |||
581 | wdata->pnfs_error = ret; | 617 | wdata->pnfs_error = ret; |
582 | goto out; | 618 | goto out; |
583 | } | 619 | } |
620 | if (likely(!bl_push_one_short_extent(be->be_inval))) | ||
621 | par->bse_count++; | ||
622 | else { | ||
623 | end_page_writeback(page); | ||
624 | page_cache_release(page); | ||
625 | wdata->pnfs_error = -ENOMEM; | ||
626 | goto out; | ||
627 | } | ||
628 | /* FIXME: This should be done in bi_end_io */ | ||
629 | mark_extents_written(BLK_LSEG2EXT(wdata->lseg), | ||
630 | page->index << PAGE_CACHE_SHIFT, | ||
631 | PAGE_CACHE_SIZE); | ||
632 | |||
584 | bio = bl_add_page_to_bio(bio, npg_zero, WRITE, | 633 | bio = bl_add_page_to_bio(bio, npg_zero, WRITE, |
585 | isect, page, be, | 634 | isect, page, be, |
586 | bl_end_io_write_zero, par); | 635 | bl_end_io_write_zero, par); |
@@ -589,10 +638,6 @@ fill_invalid_ext: | |||
589 | bio = NULL; | 638 | bio = NULL; |
590 | goto out; | 639 | goto out; |
591 | } | 640 | } |
592 | /* FIXME: This should be done in bi_end_io */ | ||
593 | mark_extents_written(BLK_LSEG2EXT(wdata->lseg), | ||
594 | page->index << PAGE_CACHE_SHIFT, | ||
595 | PAGE_CACHE_SIZE); | ||
596 | next_page: | 641 | next_page: |
597 | isect += PAGE_CACHE_SECTORS; | 642 | isect += PAGE_CACHE_SECTORS; |
598 | extent_length -= PAGE_CACHE_SECTORS; | 643 | extent_length -= PAGE_CACHE_SECTORS; |
@@ -616,13 +661,21 @@ next_page: | |||
616 | wdata->pnfs_error = -EINVAL; | 661 | wdata->pnfs_error = -EINVAL; |
617 | goto out; | 662 | goto out; |
618 | } | 663 | } |
664 | if (be->be_state == PNFS_BLOCK_INVALID_DATA) { | ||
665 | if (likely(!bl_push_one_short_extent( | ||
666 | be->be_inval))) | ||
667 | par->bse_count++; | ||
668 | else { | ||
669 | wdata->pnfs_error = -ENOMEM; | ||
670 | goto out; | ||
671 | } | ||
672 | } | ||
619 | extent_length = be->be_length - | 673 | extent_length = be->be_length - |
620 | (isect - be->be_f_offset); | 674 | (isect - be->be_f_offset); |
621 | } | 675 | } |
622 | if (be->be_state == PNFS_BLOCK_INVALID_DATA) { | 676 | if (be->be_state == PNFS_BLOCK_INVALID_DATA) { |
623 | ret = bl_mark_sectors_init(be->be_inval, isect, | 677 | ret = bl_mark_sectors_init(be->be_inval, isect, |
624 | PAGE_CACHE_SECTORS, | 678 | PAGE_CACHE_SECTORS); |
625 | NULL); | ||
626 | if (unlikely(ret)) { | 679 | if (unlikely(ret)) { |
627 | dprintk("%s bl_mark_sectors_init fail %d\n", | 680 | dprintk("%s bl_mark_sectors_init fail %d\n", |
628 | __func__, ret); | 681 | __func__, ret); |
@@ -664,6 +717,10 @@ out: | |||
664 | bl_submit_bio(WRITE, bio); | 717 | bl_submit_bio(WRITE, bio); |
665 | put_parallel(par); | 718 | put_parallel(par); |
666 | return PNFS_ATTEMPTED; | 719 | return PNFS_ATTEMPTED; |
720 | out_mds: | ||
721 | bl_put_extent(be); | ||
722 | kfree(par); | ||
723 | return PNFS_NOT_ATTEMPTED; | ||
667 | } | 724 | } |
668 | 725 | ||
669 | /* FIXME - range ignored */ | 726 | /* FIXME - range ignored */ |
@@ -690,11 +747,17 @@ static void | |||
690 | release_inval_marks(struct pnfs_inval_markings *marks) | 747 | release_inval_marks(struct pnfs_inval_markings *marks) |
691 | { | 748 | { |
692 | struct pnfs_inval_tracking *pos, *temp; | 749 | struct pnfs_inval_tracking *pos, *temp; |
750 | struct pnfs_block_short_extent *se, *stemp; | ||
693 | 751 | ||
694 | list_for_each_entry_safe(pos, temp, &marks->im_tree.mtt_stub, it_link) { | 752 | list_for_each_entry_safe(pos, temp, &marks->im_tree.mtt_stub, it_link) { |
695 | list_del(&pos->it_link); | 753 | list_del(&pos->it_link); |
696 | kfree(pos); | 754 | kfree(pos); |
697 | } | 755 | } |
756 | |||
757 | list_for_each_entry_safe(se, stemp, &marks->im_extents, bse_node) { | ||
758 | list_del(&se->bse_node); | ||
759 | kfree(se); | ||
760 | } | ||
698 | return; | 761 | return; |
699 | } | 762 | } |
700 | 763 | ||
@@ -779,16 +842,13 @@ bl_cleanup_layoutcommit(struct nfs4_layoutcommit_data *lcdata) | |||
779 | static void free_blk_mountid(struct block_mount_id *mid) | 842 | static void free_blk_mountid(struct block_mount_id *mid) |
780 | { | 843 | { |
781 | if (mid) { | 844 | if (mid) { |
782 | struct pnfs_block_dev *dev; | 845 | struct pnfs_block_dev *dev, *tmp; |
783 | spin_lock(&mid->bm_lock); | 846 | |
784 | while (!list_empty(&mid->bm_devlist)) { | 847 | /* No need to take bm_lock as we are last user freeing bm_devlist */ |
785 | dev = list_first_entry(&mid->bm_devlist, | 848 | list_for_each_entry_safe(dev, tmp, &mid->bm_devlist, bm_node) { |
786 | struct pnfs_block_dev, | ||
787 | bm_node); | ||
788 | list_del(&dev->bm_node); | 849 | list_del(&dev->bm_node); |
789 | bl_free_block_dev(dev); | 850 | bl_free_block_dev(dev); |
790 | } | 851 | } |
791 | spin_unlock(&mid->bm_lock); | ||
792 | kfree(mid); | 852 | kfree(mid); |
793 | } | 853 | } |
794 | } | 854 | } |
diff --git a/fs/nfs/blocklayout/blocklayout.h b/fs/nfs/blocklayout/blocklayout.h index 42acf7ef5992..e31a2df28e70 100644 --- a/fs/nfs/blocklayout/blocklayout.h +++ b/fs/nfs/blocklayout/blocklayout.h | |||
@@ -70,6 +70,7 @@ struct pnfs_inval_markings { | |||
70 | spinlock_t im_lock; | 70 | spinlock_t im_lock; |
71 | struct my_tree im_tree; /* Sectors that need LAYOUTCOMMIT */ | 71 | struct my_tree im_tree; /* Sectors that need LAYOUTCOMMIT */ |
72 | sector_t im_block_size; /* Server blocksize in sectors */ | 72 | sector_t im_block_size; /* Server blocksize in sectors */ |
73 | struct list_head im_extents; /* Short extents for INVAL->RW conversion */ | ||
73 | }; | 74 | }; |
74 | 75 | ||
75 | struct pnfs_inval_tracking { | 76 | struct pnfs_inval_tracking { |
@@ -105,6 +106,7 @@ BL_INIT_INVAL_MARKS(struct pnfs_inval_markings *marks, sector_t blocksize) | |||
105 | { | 106 | { |
106 | spin_lock_init(&marks->im_lock); | 107 | spin_lock_init(&marks->im_lock); |
107 | INIT_LIST_HEAD(&marks->im_tree.mtt_stub); | 108 | INIT_LIST_HEAD(&marks->im_tree.mtt_stub); |
109 | INIT_LIST_HEAD(&marks->im_extents); | ||
108 | marks->im_block_size = blocksize; | 110 | marks->im_block_size = blocksize; |
109 | marks->im_tree.mtt_step_size = min((sector_t)PAGE_CACHE_SECTORS, | 111 | marks->im_tree.mtt_step_size = min((sector_t)PAGE_CACHE_SECTORS, |
110 | blocksize); | 112 | blocksize); |
@@ -186,8 +188,7 @@ struct pnfs_block_extent * | |||
186 | bl_find_get_extent(struct pnfs_block_layout *bl, sector_t isect, | 188 | bl_find_get_extent(struct pnfs_block_layout *bl, sector_t isect, |
187 | struct pnfs_block_extent **cow_read); | 189 | struct pnfs_block_extent **cow_read); |
188 | int bl_mark_sectors_init(struct pnfs_inval_markings *marks, | 190 | int bl_mark_sectors_init(struct pnfs_inval_markings *marks, |
189 | sector_t offset, sector_t length, | 191 | sector_t offset, sector_t length); |
190 | sector_t **pages); | ||
191 | void bl_put_extent(struct pnfs_block_extent *be); | 192 | void bl_put_extent(struct pnfs_block_extent *be); |
192 | struct pnfs_block_extent *bl_alloc_extent(void); | 193 | struct pnfs_block_extent *bl_alloc_extent(void); |
193 | int bl_is_sector_init(struct pnfs_inval_markings *marks, sector_t isect); | 194 | int bl_is_sector_init(struct pnfs_inval_markings *marks, sector_t isect); |
@@ -200,6 +201,11 @@ void clean_pnfs_block_layoutupdate(struct pnfs_block_layout *bl, | |||
200 | int bl_add_merge_extent(struct pnfs_block_layout *bl, | 201 | int bl_add_merge_extent(struct pnfs_block_layout *bl, |
201 | struct pnfs_block_extent *new); | 202 | struct pnfs_block_extent *new); |
202 | int bl_mark_for_commit(struct pnfs_block_extent *be, | 203 | int bl_mark_for_commit(struct pnfs_block_extent *be, |
203 | sector_t offset, sector_t length); | 204 | sector_t offset, sector_t length, |
205 | struct pnfs_block_short_extent *new); | ||
206 | int bl_push_one_short_extent(struct pnfs_inval_markings *marks); | ||
207 | struct pnfs_block_short_extent * | ||
208 | bl_pop_one_short_extent(struct pnfs_inval_markings *marks); | ||
209 | void bl_free_short_extents(struct pnfs_inval_markings *marks, int num_to_free); | ||
204 | 210 | ||
205 | #endif /* FS_NFS_NFS4BLOCKLAYOUT_H */ | 211 | #endif /* FS_NFS_NFS4BLOCKLAYOUT_H */ |
diff --git a/fs/nfs/blocklayout/extents.c b/fs/nfs/blocklayout/extents.c index 19fa7b0b8c00..1abac09f7cd5 100644 --- a/fs/nfs/blocklayout/extents.c +++ b/fs/nfs/blocklayout/extents.c | |||
@@ -110,13 +110,7 @@ static int _add_entry(struct my_tree *tree, u64 s, int32_t tag, | |||
110 | return 0; | 110 | return 0; |
111 | } else { | 111 | } else { |
112 | struct pnfs_inval_tracking *new; | 112 | struct pnfs_inval_tracking *new; |
113 | if (storage) | 113 | new = storage; |
114 | new = storage; | ||
115 | else { | ||
116 | new = kmalloc(sizeof(*new), GFP_NOFS); | ||
117 | if (!new) | ||
118 | return -ENOMEM; | ||
119 | } | ||
120 | new->it_sector = s; | 114 | new->it_sector = s; |
121 | new->it_tags = (1 << tag); | 115 | new->it_tags = (1 << tag); |
122 | list_add(&new->it_link, &pos->it_link); | 116 | list_add(&new->it_link, &pos->it_link); |
@@ -139,11 +133,13 @@ static int _set_range(struct my_tree *tree, int32_t tag, u64 s, u64 length) | |||
139 | } | 133 | } |
140 | 134 | ||
141 | /* Ensure that future operations on given range of tree will not malloc */ | 135 | /* Ensure that future operations on given range of tree will not malloc */ |
142 | static int _preload_range(struct my_tree *tree, u64 offset, u64 length) | 136 | static int _preload_range(struct pnfs_inval_markings *marks, |
137 | u64 offset, u64 length) | ||
143 | { | 138 | { |
144 | u64 start, end, s; | 139 | u64 start, end, s; |
145 | int count, i, used = 0, status = -ENOMEM; | 140 | int count, i, used = 0, status = -ENOMEM; |
146 | struct pnfs_inval_tracking **storage; | 141 | struct pnfs_inval_tracking **storage; |
142 | struct my_tree *tree = &marks->im_tree; | ||
147 | 143 | ||
148 | dprintk("%s(%llu, %llu) enter\n", __func__, offset, length); | 144 | dprintk("%s(%llu, %llu) enter\n", __func__, offset, length); |
149 | start = normalize(offset, tree->mtt_step_size); | 145 | start = normalize(offset, tree->mtt_step_size); |
@@ -161,12 +157,11 @@ static int _preload_range(struct my_tree *tree, u64 offset, u64 length) | |||
161 | goto out_cleanup; | 157 | goto out_cleanup; |
162 | } | 158 | } |
163 | 159 | ||
164 | /* Now need lock - HOW??? */ | 160 | spin_lock_bh(&marks->im_lock); |
165 | |||
166 | for (s = start; s < end; s += tree->mtt_step_size) | 161 | for (s = start; s < end; s += tree->mtt_step_size) |
167 | used += _add_entry(tree, s, INTERNAL_EXISTS, storage[used]); | 162 | used += _add_entry(tree, s, INTERNAL_EXISTS, storage[used]); |
163 | spin_unlock_bh(&marks->im_lock); | ||
168 | 164 | ||
169 | /* Unlock - HOW??? */ | ||
170 | status = 0; | 165 | status = 0; |
171 | 166 | ||
172 | out_cleanup: | 167 | out_cleanup: |
@@ -179,41 +174,14 @@ static int _preload_range(struct my_tree *tree, u64 offset, u64 length) | |||
179 | return status; | 174 | return status; |
180 | } | 175 | } |
181 | 176 | ||
182 | static void set_needs_init(sector_t *array, sector_t offset) | ||
183 | { | ||
184 | sector_t *p = array; | ||
185 | |||
186 | dprintk("%s enter\n", __func__); | ||
187 | if (!p) | ||
188 | return; | ||
189 | while (*p < offset) | ||
190 | p++; | ||
191 | if (*p == offset) | ||
192 | return; | ||
193 | else if (*p == ~0) { | ||
194 | *p++ = offset; | ||
195 | *p = ~0; | ||
196 | return; | ||
197 | } else { | ||
198 | sector_t *save = p; | ||
199 | dprintk("%s Adding %llu\n", __func__, (u64)offset); | ||
200 | while (*p != ~0) | ||
201 | p++; | ||
202 | p++; | ||
203 | memmove(save + 1, save, (char *)p - (char *)save); | ||
204 | *save = offset; | ||
205 | return; | ||
206 | } | ||
207 | } | ||
208 | |||
209 | /* We are relying on page lock to serialize this */ | 177 | /* We are relying on page lock to serialize this */ |
210 | int bl_is_sector_init(struct pnfs_inval_markings *marks, sector_t isect) | 178 | int bl_is_sector_init(struct pnfs_inval_markings *marks, sector_t isect) |
211 | { | 179 | { |
212 | int rv; | 180 | int rv; |
213 | 181 | ||
214 | spin_lock(&marks->im_lock); | 182 | spin_lock_bh(&marks->im_lock); |
215 | rv = _has_tag(&marks->im_tree, isect, EXTENT_INITIALIZED); | 183 | rv = _has_tag(&marks->im_tree, isect, EXTENT_INITIALIZED); |
216 | spin_unlock(&marks->im_lock); | 184 | spin_unlock_bh(&marks->im_lock); |
217 | return rv; | 185 | return rv; |
218 | } | 186 | } |
219 | 187 | ||
@@ -253,78 +221,39 @@ static int is_range_written(struct pnfs_inval_markings *marks, | |||
253 | { | 221 | { |
254 | int rv; | 222 | int rv; |
255 | 223 | ||
256 | spin_lock(&marks->im_lock); | 224 | spin_lock_bh(&marks->im_lock); |
257 | rv = _range_has_tag(&marks->im_tree, start, end, EXTENT_WRITTEN); | 225 | rv = _range_has_tag(&marks->im_tree, start, end, EXTENT_WRITTEN); |
258 | spin_unlock(&marks->im_lock); | 226 | spin_unlock_bh(&marks->im_lock); |
259 | return rv; | 227 | return rv; |
260 | } | 228 | } |
261 | 229 | ||
262 | /* Marks sectors in [offest, offset_length) as having been initialized. | 230 | /* Marks sectors in [offest, offset_length) as having been initialized. |
263 | * All lengths are step-aligned, where step is min(pagesize, blocksize). | 231 | * All lengths are step-aligned, where step is min(pagesize, blocksize). |
264 | * Notes where partial block is initialized, and helps prepare it for | 232 | * Currently assumes offset is page-aligned |
265 | * complete initialization later. | ||
266 | */ | 233 | */ |
267 | /* Currently assumes offset is page-aligned */ | ||
268 | int bl_mark_sectors_init(struct pnfs_inval_markings *marks, | 234 | int bl_mark_sectors_init(struct pnfs_inval_markings *marks, |
269 | sector_t offset, sector_t length, | 235 | sector_t offset, sector_t length) |
270 | sector_t **pages) | ||
271 | { | 236 | { |
272 | sector_t s, start, end; | 237 | sector_t start, end; |
273 | sector_t *array = NULL; /* Pages to mark */ | ||
274 | 238 | ||
275 | dprintk("%s(offset=%llu,len=%llu) enter\n", | 239 | dprintk("%s(offset=%llu,len=%llu) enter\n", |
276 | __func__, (u64)offset, (u64)length); | 240 | __func__, (u64)offset, (u64)length); |
277 | s = max((sector_t) 3, | ||
278 | 2 * (marks->im_block_size / (PAGE_CACHE_SECTORS))); | ||
279 | dprintk("%s set max=%llu\n", __func__, (u64)s); | ||
280 | if (pages) { | ||
281 | array = kmalloc(s * sizeof(sector_t), GFP_NOFS); | ||
282 | if (!array) | ||
283 | goto outerr; | ||
284 | array[0] = ~0; | ||
285 | } | ||
286 | 241 | ||
287 | start = normalize(offset, marks->im_block_size); | 242 | start = normalize(offset, marks->im_block_size); |
288 | end = normalize_up(offset + length, marks->im_block_size); | 243 | end = normalize_up(offset + length, marks->im_block_size); |
289 | if (_preload_range(&marks->im_tree, start, end - start)) | 244 | if (_preload_range(marks, start, end - start)) |
290 | goto outerr; | 245 | goto outerr; |
291 | 246 | ||
292 | spin_lock(&marks->im_lock); | 247 | spin_lock_bh(&marks->im_lock); |
293 | |||
294 | for (s = normalize_up(start, PAGE_CACHE_SECTORS); | ||
295 | s < offset; s += PAGE_CACHE_SECTORS) { | ||
296 | dprintk("%s pre-area pages\n", __func__); | ||
297 | /* Portion of used block is not initialized */ | ||
298 | if (!_has_tag(&marks->im_tree, s, EXTENT_INITIALIZED)) | ||
299 | set_needs_init(array, s); | ||
300 | } | ||
301 | if (_set_range(&marks->im_tree, EXTENT_INITIALIZED, offset, length)) | 248 | if (_set_range(&marks->im_tree, EXTENT_INITIALIZED, offset, length)) |
302 | goto out_unlock; | 249 | goto out_unlock; |
303 | for (s = normalize_up(offset + length, PAGE_CACHE_SECTORS); | 250 | spin_unlock_bh(&marks->im_lock); |
304 | s < end; s += PAGE_CACHE_SECTORS) { | ||
305 | dprintk("%s post-area pages\n", __func__); | ||
306 | if (!_has_tag(&marks->im_tree, s, EXTENT_INITIALIZED)) | ||
307 | set_needs_init(array, s); | ||
308 | } | ||
309 | |||
310 | spin_unlock(&marks->im_lock); | ||
311 | 251 | ||
312 | if (pages) { | ||
313 | if (array[0] == ~0) { | ||
314 | kfree(array); | ||
315 | *pages = NULL; | ||
316 | } else | ||
317 | *pages = array; | ||
318 | } | ||
319 | return 0; | 252 | return 0; |
320 | 253 | ||
321 | out_unlock: | 254 | out_unlock: |
322 | spin_unlock(&marks->im_lock); | 255 | spin_unlock_bh(&marks->im_lock); |
323 | outerr: | 256 | outerr: |
324 | if (pages) { | ||
325 | kfree(array); | ||
326 | *pages = NULL; | ||
327 | } | ||
328 | return -ENOMEM; | 257 | return -ENOMEM; |
329 | } | 258 | } |
330 | 259 | ||
@@ -338,9 +267,9 @@ static int mark_written_sectors(struct pnfs_inval_markings *marks, | |||
338 | 267 | ||
339 | dprintk("%s(offset=%llu,len=%llu) enter\n", __func__, | 268 | dprintk("%s(offset=%llu,len=%llu) enter\n", __func__, |
340 | (u64)offset, (u64)length); | 269 | (u64)offset, (u64)length); |
341 | spin_lock(&marks->im_lock); | 270 | spin_lock_bh(&marks->im_lock); |
342 | status = _set_range(&marks->im_tree, EXTENT_WRITTEN, offset, length); | 271 | status = _set_range(&marks->im_tree, EXTENT_WRITTEN, offset, length); |
343 | spin_unlock(&marks->im_lock); | 272 | spin_unlock_bh(&marks->im_lock); |
344 | return status; | 273 | return status; |
345 | } | 274 | } |
346 | 275 | ||
@@ -440,20 +369,18 @@ static void add_to_commitlist(struct pnfs_block_layout *bl, | |||
440 | 369 | ||
441 | /* Note the range described by offset, length is guaranteed to be contained | 370 | /* Note the range described by offset, length is guaranteed to be contained |
442 | * within be. | 371 | * within be. |
372 | * new will be freed, either by this function or add_to_commitlist if they | ||
373 | * decide not to use it, or after LAYOUTCOMMIT uses it in the commitlist. | ||
443 | */ | 374 | */ |
444 | int bl_mark_for_commit(struct pnfs_block_extent *be, | 375 | int bl_mark_for_commit(struct pnfs_block_extent *be, |
445 | sector_t offset, sector_t length) | 376 | sector_t offset, sector_t length, |
377 | struct pnfs_block_short_extent *new) | ||
446 | { | 378 | { |
447 | sector_t new_end, end = offset + length; | 379 | sector_t new_end, end = offset + length; |
448 | struct pnfs_block_short_extent *new; | ||
449 | struct pnfs_block_layout *bl = container_of(be->be_inval, | 380 | struct pnfs_block_layout *bl = container_of(be->be_inval, |
450 | struct pnfs_block_layout, | 381 | struct pnfs_block_layout, |
451 | bl_inval); | 382 | bl_inval); |
452 | 383 | ||
453 | new = kmalloc(sizeof(*new), GFP_NOFS); | ||
454 | if (!new) | ||
455 | return -ENOMEM; | ||
456 | |||
457 | mark_written_sectors(be->be_inval, offset, length); | 384 | mark_written_sectors(be->be_inval, offset, length); |
458 | /* We want to add the range to commit list, but it must be | 385 | /* We want to add the range to commit list, but it must be |
459 | * block-normalized, and verified that the normalized range has | 386 | * block-normalized, and verified that the normalized range has |
@@ -483,9 +410,6 @@ int bl_mark_for_commit(struct pnfs_block_extent *be, | |||
483 | new->bse_mdev = be->be_mdev; | 410 | new->bse_mdev = be->be_mdev; |
484 | 411 | ||
485 | spin_lock(&bl->bl_ext_lock); | 412 | spin_lock(&bl->bl_ext_lock); |
486 | /* new will be freed, either by add_to_commitlist if it decides not | ||
487 | * to use it, or after LAYOUTCOMMIT uses it in the commitlist. | ||
488 | */ | ||
489 | add_to_commitlist(bl, new); | 413 | add_to_commitlist(bl, new); |
490 | spin_unlock(&bl->bl_ext_lock); | 414 | spin_unlock(&bl->bl_ext_lock); |
491 | return 0; | 415 | return 0; |
@@ -933,3 +857,53 @@ clean_pnfs_block_layoutupdate(struct pnfs_block_layout *bl, | |||
933 | } | 857 | } |
934 | } | 858 | } |
935 | } | 859 | } |
860 | |||
861 | int bl_push_one_short_extent(struct pnfs_inval_markings *marks) | ||
862 | { | ||
863 | struct pnfs_block_short_extent *new; | ||
864 | |||
865 | new = kmalloc(sizeof(*new), GFP_NOFS); | ||
866 | if (unlikely(!new)) | ||
867 | return -ENOMEM; | ||
868 | |||
869 | spin_lock_bh(&marks->im_lock); | ||
870 | list_add(&new->bse_node, &marks->im_extents); | ||
871 | spin_unlock_bh(&marks->im_lock); | ||
872 | |||
873 | return 0; | ||
874 | } | ||
875 | |||
876 | struct pnfs_block_short_extent * | ||
877 | bl_pop_one_short_extent(struct pnfs_inval_markings *marks) | ||
878 | { | ||
879 | struct pnfs_block_short_extent *rv = NULL; | ||
880 | |||
881 | spin_lock_bh(&marks->im_lock); | ||
882 | if (!list_empty(&marks->im_extents)) { | ||
883 | rv = list_entry((&marks->im_extents)->next, | ||
884 | struct pnfs_block_short_extent, bse_node); | ||
885 | list_del_init(&rv->bse_node); | ||
886 | } | ||
887 | spin_unlock_bh(&marks->im_lock); | ||
888 | |||
889 | return rv; | ||
890 | } | ||
891 | |||
892 | void bl_free_short_extents(struct pnfs_inval_markings *marks, int num_to_free) | ||
893 | { | ||
894 | struct pnfs_block_short_extent *se = NULL, *tmp; | ||
895 | |||
896 | if (num_to_free <= 0) | ||
897 | return; | ||
898 | |||
899 | spin_lock(&marks->im_lock); | ||
900 | list_for_each_entry_safe(se, tmp, &marks->im_extents, bse_node) { | ||
901 | list_del(&se->bse_node); | ||
902 | kfree(se); | ||
903 | if (--num_to_free == 0) | ||
904 | break; | ||
905 | } | ||
906 | spin_unlock(&marks->im_lock); | ||
907 | |||
908 | BUG_ON(num_to_free > 0); | ||
909 | } | ||
diff --git a/fs/nfs/callback.h b/fs/nfs/callback.h index 07df5f1d85e5..c89d3b9e483c 100644 --- a/fs/nfs/callback.h +++ b/fs/nfs/callback.h | |||
@@ -162,7 +162,7 @@ struct cb_layoutrecallargs { | |||
162 | }; | 162 | }; |
163 | }; | 163 | }; |
164 | 164 | ||
165 | extern unsigned nfs4_callback_layoutrecall( | 165 | extern __be32 nfs4_callback_layoutrecall( |
166 | struct cb_layoutrecallargs *args, | 166 | struct cb_layoutrecallargs *args, |
167 | void *dummy, struct cb_process_state *cps); | 167 | void *dummy, struct cb_process_state *cps); |
168 | 168 | ||
diff --git a/fs/nfs/callback_proc.c b/fs/nfs/callback_proc.c index 43926add945b..54cea8ad5a76 100644 --- a/fs/nfs/callback_proc.c +++ b/fs/nfs/callback_proc.c | |||
@@ -339,7 +339,7 @@ validate_seqid(struct nfs4_slot_table *tbl, struct cb_sequenceargs * args) | |||
339 | dprintk("%s enter. slotid %d seqid %d\n", | 339 | dprintk("%s enter. slotid %d seqid %d\n", |
340 | __func__, args->csa_slotid, args->csa_sequenceid); | 340 | __func__, args->csa_slotid, args->csa_sequenceid); |
341 | 341 | ||
342 | if (args->csa_slotid > NFS41_BC_MAX_CALLBACKS) | 342 | if (args->csa_slotid >= NFS41_BC_MAX_CALLBACKS) |
343 | return htonl(NFS4ERR_BADSLOT); | 343 | return htonl(NFS4ERR_BADSLOT); |
344 | 344 | ||
345 | slot = tbl->slots + args->csa_slotid; | 345 | slot = tbl->slots + args->csa_slotid; |
diff --git a/fs/nfs/callback_xdr.c b/fs/nfs/callback_xdr.c index 726e59a9e50f..d50b2742f23b 100644 --- a/fs/nfs/callback_xdr.c +++ b/fs/nfs/callback_xdr.c | |||
@@ -305,6 +305,10 @@ __be32 decode_devicenotify_args(struct svc_rqst *rqstp, | |||
305 | n = ntohl(*p++); | 305 | n = ntohl(*p++); |
306 | if (n <= 0) | 306 | if (n <= 0) |
307 | goto out; | 307 | goto out; |
308 | if (n > ULONG_MAX / sizeof(*args->devs)) { | ||
309 | status = htonl(NFS4ERR_BADXDR); | ||
310 | goto out; | ||
311 | } | ||
308 | 312 | ||
309 | args->devs = kmalloc(n * sizeof(*args->devs), GFP_KERNEL); | 313 | args->devs = kmalloc(n * sizeof(*args->devs), GFP_KERNEL); |
310 | if (!args->devs) { | 314 | if (!args->devs) { |
diff --git a/fs/nfs/client.c b/fs/nfs/client.c index 873bf00d51a2..31778f74357d 100644 --- a/fs/nfs/client.c +++ b/fs/nfs/client.c | |||
@@ -84,7 +84,7 @@ retry: | |||
84 | /* | 84 | /* |
85 | * Turn off NFSv4 uid/gid mapping when using AUTH_SYS | 85 | * Turn off NFSv4 uid/gid mapping when using AUTH_SYS |
86 | */ | 86 | */ |
87 | static int nfs4_disable_idmapping = 0; | 87 | static bool nfs4_disable_idmapping = true; |
88 | 88 | ||
89 | /* | 89 | /* |
90 | * RPC cruft for NFS | 90 | * RPC cruft for NFS |
@@ -185,7 +185,7 @@ static struct nfs_client *nfs_alloc_client(const struct nfs_client_initdata *cl_ | |||
185 | clp->cl_minorversion = cl_init->minorversion; | 185 | clp->cl_minorversion = cl_init->minorversion; |
186 | clp->cl_mvops = nfs_v4_minor_ops[cl_init->minorversion]; | 186 | clp->cl_mvops = nfs_v4_minor_ops[cl_init->minorversion]; |
187 | #endif | 187 | #endif |
188 | cred = rpc_lookup_machine_cred(); | 188 | cred = rpc_lookup_machine_cred("*"); |
189 | if (!IS_ERR(cred)) | 189 | if (!IS_ERR(cred)) |
190 | clp->cl_machine_cred = cred; | 190 | clp->cl_machine_cred = cred; |
191 | nfs_fscache_get_client_cookie(clp); | 191 | nfs_fscache_get_client_cookie(clp); |
@@ -250,6 +250,11 @@ static void pnfs_init_server(struct nfs_server *server) | |||
250 | rpc_init_wait_queue(&server->roc_rpcwaitq, "pNFS ROC"); | 250 | rpc_init_wait_queue(&server->roc_rpcwaitq, "pNFS ROC"); |
251 | } | 251 | } |
252 | 252 | ||
253 | static void nfs4_destroy_server(struct nfs_server *server) | ||
254 | { | ||
255 | nfs4_purge_state_owners(server); | ||
256 | } | ||
257 | |||
253 | #else | 258 | #else |
254 | static void nfs4_shutdown_client(struct nfs_client *clp) | 259 | static void nfs4_shutdown_client(struct nfs_client *clp) |
255 | { | 260 | { |
@@ -1065,6 +1070,7 @@ static struct nfs_server *nfs_alloc_server(void) | |||
1065 | INIT_LIST_HEAD(&server->master_link); | 1070 | INIT_LIST_HEAD(&server->master_link); |
1066 | INIT_LIST_HEAD(&server->delegations); | 1071 | INIT_LIST_HEAD(&server->delegations); |
1067 | INIT_LIST_HEAD(&server->layouts); | 1072 | INIT_LIST_HEAD(&server->layouts); |
1073 | INIT_LIST_HEAD(&server->state_owners_lru); | ||
1068 | 1074 | ||
1069 | atomic_set(&server->active, 0); | 1075 | atomic_set(&server->active, 0); |
1070 | 1076 | ||
@@ -1538,6 +1544,7 @@ static int nfs4_server_common_setup(struct nfs_server *server, | |||
1538 | 1544 | ||
1539 | nfs_server_insert_lists(server); | 1545 | nfs_server_insert_lists(server); |
1540 | server->mount_time = jiffies; | 1546 | server->mount_time = jiffies; |
1547 | server->destroy = nfs4_destroy_server; | ||
1541 | out: | 1548 | out: |
1542 | nfs_free_fattr(fattr); | 1549 | nfs_free_fattr(fattr); |
1543 | return error; | 1550 | return error; |
@@ -1719,6 +1726,7 @@ struct nfs_server *nfs_clone_server(struct nfs_server *source, | |||
1719 | 1726 | ||
1720 | /* Copy data from the source */ | 1727 | /* Copy data from the source */ |
1721 | server->nfs_client = source->nfs_client; | 1728 | server->nfs_client = source->nfs_client; |
1729 | server->destroy = source->destroy; | ||
1722 | atomic_inc(&server->nfs_client->cl_count); | 1730 | atomic_inc(&server->nfs_client->cl_count); |
1723 | nfs_server_copy_userdata(server, source); | 1731 | nfs_server_copy_userdata(server, source); |
1724 | 1732 | ||
diff --git a/fs/nfs/file.c b/fs/nfs/file.c index 606ef0f20aed..c43a452f7da2 100644 --- a/fs/nfs/file.c +++ b/fs/nfs/file.c | |||
@@ -272,13 +272,13 @@ nfs_file_fsync(struct file *file, loff_t start, loff_t end, int datasync) | |||
272 | datasync); | 272 | datasync); |
273 | 273 | ||
274 | ret = filemap_write_and_wait_range(inode->i_mapping, start, end); | 274 | ret = filemap_write_and_wait_range(inode->i_mapping, start, end); |
275 | if (ret) | ||
276 | return ret; | ||
277 | mutex_lock(&inode->i_mutex); | 275 | mutex_lock(&inode->i_mutex); |
278 | 276 | ||
279 | nfs_inc_stats(inode, NFSIOS_VFSFSYNC); | 277 | nfs_inc_stats(inode, NFSIOS_VFSFSYNC); |
280 | have_error = test_and_clear_bit(NFS_CONTEXT_ERROR_WRITE, &ctx->flags); | 278 | have_error = test_and_clear_bit(NFS_CONTEXT_ERROR_WRITE, &ctx->flags); |
281 | status = nfs_commit_inode(inode, FLUSH_SYNC); | 279 | status = nfs_commit_inode(inode, FLUSH_SYNC); |
280 | if (status >= 0 && ret < 0) | ||
281 | status = ret; | ||
282 | have_error |= test_bit(NFS_CONTEXT_ERROR_WRITE, &ctx->flags); | 282 | have_error |= test_bit(NFS_CONTEXT_ERROR_WRITE, &ctx->flags); |
283 | if (have_error) | 283 | if (have_error) |
284 | ret = xchg(&ctx->error, 0); | 284 | ret = xchg(&ctx->error, 0); |
diff --git a/fs/nfs/idmap.c b/fs/nfs/idmap.c index 47d1c6ff2d8e..2c05f1991e1e 100644 --- a/fs/nfs/idmap.c +++ b/fs/nfs/idmap.c | |||
@@ -38,6 +38,89 @@ | |||
38 | #include <linux/kernel.h> | 38 | #include <linux/kernel.h> |
39 | #include <linux/slab.h> | 39 | #include <linux/slab.h> |
40 | #include <linux/nfs_idmap.h> | 40 | #include <linux/nfs_idmap.h> |
41 | #include <linux/nfs_fs.h> | ||
42 | |||
43 | /** | ||
44 | * nfs_fattr_init_names - initialise the nfs_fattr owner_name/group_name fields | ||
45 | * @fattr: fully initialised struct nfs_fattr | ||
46 | * @owner_name: owner name string cache | ||
47 | * @group_name: group name string cache | ||
48 | */ | ||
49 | void nfs_fattr_init_names(struct nfs_fattr *fattr, | ||
50 | struct nfs4_string *owner_name, | ||
51 | struct nfs4_string *group_name) | ||
52 | { | ||
53 | fattr->owner_name = owner_name; | ||
54 | fattr->group_name = group_name; | ||
55 | } | ||
56 | |||
57 | static void nfs_fattr_free_owner_name(struct nfs_fattr *fattr) | ||
58 | { | ||
59 | fattr->valid &= ~NFS_ATTR_FATTR_OWNER_NAME; | ||
60 | kfree(fattr->owner_name->data); | ||
61 | } | ||
62 | |||
63 | static void nfs_fattr_free_group_name(struct nfs_fattr *fattr) | ||
64 | { | ||
65 | fattr->valid &= ~NFS_ATTR_FATTR_GROUP_NAME; | ||
66 | kfree(fattr->group_name->data); | ||
67 | } | ||
68 | |||
69 | static bool nfs_fattr_map_owner_name(struct nfs_server *server, struct nfs_fattr *fattr) | ||
70 | { | ||
71 | struct nfs4_string *owner = fattr->owner_name; | ||
72 | __u32 uid; | ||
73 | |||
74 | if (!(fattr->valid & NFS_ATTR_FATTR_OWNER_NAME)) | ||
75 | return false; | ||
76 | if (nfs_map_name_to_uid(server, owner->data, owner->len, &uid) == 0) { | ||
77 | fattr->uid = uid; | ||
78 | fattr->valid |= NFS_ATTR_FATTR_OWNER; | ||
79 | } | ||
80 | return true; | ||
81 | } | ||
82 | |||
83 | static bool nfs_fattr_map_group_name(struct nfs_server *server, struct nfs_fattr *fattr) | ||
84 | { | ||
85 | struct nfs4_string *group = fattr->group_name; | ||
86 | __u32 gid; | ||
87 | |||
88 | if (!(fattr->valid & NFS_ATTR_FATTR_GROUP_NAME)) | ||
89 | return false; | ||
90 | if (nfs_map_group_to_gid(server, group->data, group->len, &gid) == 0) { | ||
91 | fattr->gid = gid; | ||
92 | fattr->valid |= NFS_ATTR_FATTR_GROUP; | ||
93 | } | ||
94 | return true; | ||
95 | } | ||
96 | |||
97 | /** | ||
98 | * nfs_fattr_free_names - free up the NFSv4 owner and group strings | ||
99 | * @fattr: a fully initialised nfs_fattr structure | ||
100 | */ | ||
101 | void nfs_fattr_free_names(struct nfs_fattr *fattr) | ||
102 | { | ||
103 | if (fattr->valid & NFS_ATTR_FATTR_OWNER_NAME) | ||
104 | nfs_fattr_free_owner_name(fattr); | ||
105 | if (fattr->valid & NFS_ATTR_FATTR_GROUP_NAME) | ||
106 | nfs_fattr_free_group_name(fattr); | ||
107 | } | ||
108 | |||
109 | /** | ||
110 | * nfs_fattr_map_and_free_names - map owner/group strings into uid/gid and free | ||
111 | * @server: pointer to the filesystem nfs_server structure | ||
112 | * @fattr: a fully initialised nfs_fattr structure | ||
113 | * | ||
114 | * This helper maps the cached NFSv4 owner/group strings in fattr into | ||
115 | * their numeric uid/gid equivalents, and then frees the cached strings. | ||
116 | */ | ||
117 | void nfs_fattr_map_and_free_names(struct nfs_server *server, struct nfs_fattr *fattr) | ||
118 | { | ||
119 | if (nfs_fattr_map_owner_name(server, fattr)) | ||
120 | nfs_fattr_free_owner_name(fattr); | ||
121 | if (nfs_fattr_map_group_name(server, fattr)) | ||
122 | nfs_fattr_free_group_name(fattr); | ||
123 | } | ||
41 | 124 | ||
42 | static int nfs_map_string_to_numeric(const char *name, size_t namelen, __u32 *res) | 125 | static int nfs_map_string_to_numeric(const char *name, size_t namelen, __u32 *res) |
43 | { | 126 | { |
diff --git a/fs/nfs/inode.c b/fs/nfs/inode.c index 81db25e92e10..f649fba8c384 100644 --- a/fs/nfs/inode.c +++ b/fs/nfs/inode.c | |||
@@ -57,7 +57,7 @@ | |||
57 | #define NFS_64_BIT_INODE_NUMBERS_ENABLED 1 | 57 | #define NFS_64_BIT_INODE_NUMBERS_ENABLED 1 |
58 | 58 | ||
59 | /* Default is to see 64-bit inode numbers */ | 59 | /* Default is to see 64-bit inode numbers */ |
60 | static int enable_ino64 = NFS_64_BIT_INODE_NUMBERS_ENABLED; | 60 | static bool enable_ino64 = NFS_64_BIT_INODE_NUMBERS_ENABLED; |
61 | 61 | ||
62 | static void nfs_invalidate_inode(struct inode *); | 62 | static void nfs_invalidate_inode(struct inode *); |
63 | static int nfs_update_inode(struct inode *, struct nfs_fattr *); | 63 | static int nfs_update_inode(struct inode *, struct nfs_fattr *); |
@@ -1020,6 +1020,8 @@ void nfs_fattr_init(struct nfs_fattr *fattr) | |||
1020 | fattr->valid = 0; | 1020 | fattr->valid = 0; |
1021 | fattr->time_start = jiffies; | 1021 | fattr->time_start = jiffies; |
1022 | fattr->gencount = nfs_inc_attr_generation_counter(); | 1022 | fattr->gencount = nfs_inc_attr_generation_counter(); |
1023 | fattr->owner_name = NULL; | ||
1024 | fattr->group_name = NULL; | ||
1023 | } | 1025 | } |
1024 | 1026 | ||
1025 | struct nfs_fattr *nfs_alloc_fattr(void) | 1027 | struct nfs_fattr *nfs_alloc_fattr(void) |
diff --git a/fs/nfs/internal.h b/fs/nfs/internal.h index 3f4d95751d52..8102db9b926c 100644 --- a/fs/nfs/internal.h +++ b/fs/nfs/internal.h | |||
@@ -307,6 +307,8 @@ extern void nfs_readdata_release(struct nfs_read_data *rdata); | |||
307 | /* write.c */ | 307 | /* write.c */ |
308 | extern int nfs_generic_flush(struct nfs_pageio_descriptor *desc, | 308 | extern int nfs_generic_flush(struct nfs_pageio_descriptor *desc, |
309 | struct list_head *head); | 309 | struct list_head *head); |
310 | extern void nfs_pageio_init_write_mds(struct nfs_pageio_descriptor *pgio, | ||
311 | struct inode *inode, int ioflags); | ||
310 | extern void nfs_pageio_reset_write_mds(struct nfs_pageio_descriptor *pgio); | 312 | extern void nfs_pageio_reset_write_mds(struct nfs_pageio_descriptor *pgio); |
311 | extern void nfs_writedata_release(struct nfs_write_data *wdata); | 313 | extern void nfs_writedata_release(struct nfs_write_data *wdata); |
312 | extern void nfs_commit_free(struct nfs_write_data *p); | 314 | extern void nfs_commit_free(struct nfs_write_data *p); |
@@ -330,7 +332,7 @@ void nfs_commit_release_pages(struct nfs_write_data *data); | |||
330 | 332 | ||
331 | #ifdef CONFIG_MIGRATION | 333 | #ifdef CONFIG_MIGRATION |
332 | extern int nfs_migrate_page(struct address_space *, | 334 | extern int nfs_migrate_page(struct address_space *, |
333 | struct page *, struct page *); | 335 | struct page *, struct page *, enum migrate_mode); |
334 | #else | 336 | #else |
335 | #define nfs_migrate_page NULL | 337 | #define nfs_migrate_page NULL |
336 | #endif | 338 | #endif |
diff --git a/fs/nfs/nfs4_fs.h b/fs/nfs/nfs4_fs.h index 693ae22f8731..4d7d0aedc101 100644 --- a/fs/nfs/nfs4_fs.h +++ b/fs/nfs/nfs4_fs.h | |||
@@ -94,6 +94,8 @@ struct nfs_unique_id { | |||
94 | struct nfs4_state_owner { | 94 | struct nfs4_state_owner { |
95 | struct nfs_unique_id so_owner_id; | 95 | struct nfs_unique_id so_owner_id; |
96 | struct nfs_server *so_server; | 96 | struct nfs_server *so_server; |
97 | struct list_head so_lru; | ||
98 | unsigned long so_expires; | ||
97 | struct rb_node so_server_node; | 99 | struct rb_node so_server_node; |
98 | 100 | ||
99 | struct rpc_cred *so_cred; /* Associated cred */ | 101 | struct rpc_cred *so_cred; /* Associated cred */ |
@@ -319,6 +321,7 @@ static inline void nfs4_schedule_session_recovery(struct nfs4_session *session) | |||
319 | 321 | ||
320 | extern struct nfs4_state_owner * nfs4_get_state_owner(struct nfs_server *, struct rpc_cred *); | 322 | extern struct nfs4_state_owner * nfs4_get_state_owner(struct nfs_server *, struct rpc_cred *); |
321 | extern void nfs4_put_state_owner(struct nfs4_state_owner *); | 323 | extern void nfs4_put_state_owner(struct nfs4_state_owner *); |
324 | extern void nfs4_purge_state_owners(struct nfs_server *); | ||
322 | extern struct nfs4_state * nfs4_get_open_state(struct inode *, struct nfs4_state_owner *); | 325 | extern struct nfs4_state * nfs4_get_open_state(struct inode *, struct nfs4_state_owner *); |
323 | extern void nfs4_put_open_state(struct nfs4_state *); | 326 | extern void nfs4_put_open_state(struct nfs4_state *); |
324 | extern void nfs4_close_state(struct nfs4_state *, fmode_t); | 327 | extern void nfs4_close_state(struct nfs4_state *, fmode_t); |
diff --git a/fs/nfs/nfs4filelayout.c b/fs/nfs/nfs4filelayout.c index a62d36b9a99e..71ec08617e23 100644 --- a/fs/nfs/nfs4filelayout.c +++ b/fs/nfs/nfs4filelayout.c | |||
@@ -49,13 +49,14 @@ filelayout_get_dense_offset(struct nfs4_filelayout_segment *flseg, | |||
49 | loff_t offset) | 49 | loff_t offset) |
50 | { | 50 | { |
51 | u32 stripe_width = flseg->stripe_unit * flseg->dsaddr->stripe_count; | 51 | u32 stripe_width = flseg->stripe_unit * flseg->dsaddr->stripe_count; |
52 | u64 tmp; | 52 | u64 stripe_no; |
53 | u32 rem; | ||
53 | 54 | ||
54 | offset -= flseg->pattern_offset; | 55 | offset -= flseg->pattern_offset; |
55 | tmp = offset; | 56 | stripe_no = div_u64(offset, stripe_width); |
56 | do_div(tmp, stripe_width); | 57 | div_u64_rem(offset, flseg->stripe_unit, &rem); |
57 | 58 | ||
58 | return tmp * flseg->stripe_unit + do_div(offset, flseg->stripe_unit); | 59 | return stripe_no * flseg->stripe_unit + rem; |
59 | } | 60 | } |
60 | 61 | ||
61 | /* This function is used by the layout driver to calculate the | 62 | /* This function is used by the layout driver to calculate the |
diff --git a/fs/nfs/nfs4filelayoutdev.c b/fs/nfs/nfs4filelayoutdev.c index ed388aae9689..8ae91908f5aa 100644 --- a/fs/nfs/nfs4filelayoutdev.c +++ b/fs/nfs/nfs4filelayoutdev.c | |||
@@ -382,7 +382,7 @@ decode_ds_addr(struct xdr_stream *streamp, gfp_t gfp_flags) | |||
382 | { | 382 | { |
383 | struct nfs4_pnfs_ds_addr *da = NULL; | 383 | struct nfs4_pnfs_ds_addr *da = NULL; |
384 | char *buf, *portstr; | 384 | char *buf, *portstr; |
385 | u32 port; | 385 | __be16 port; |
386 | int nlen, rlen; | 386 | int nlen, rlen; |
387 | int tmp[2]; | 387 | int tmp[2]; |
388 | __be32 *p; | 388 | __be32 *p; |
diff --git a/fs/nfs/nfs4proc.c b/fs/nfs/nfs4proc.c index dcda0ba7af60..f0c849c98fe4 100644 --- a/fs/nfs/nfs4proc.c +++ b/fs/nfs/nfs4proc.c | |||
@@ -52,6 +52,7 @@ | |||
52 | #include <linux/namei.h> | 52 | #include <linux/namei.h> |
53 | #include <linux/mount.h> | 53 | #include <linux/mount.h> |
54 | #include <linux/module.h> | 54 | #include <linux/module.h> |
55 | #include <linux/nfs_idmap.h> | ||
55 | #include <linux/sunrpc/bc_xprt.h> | 56 | #include <linux/sunrpc/bc_xprt.h> |
56 | #include <linux/xattr.h> | 57 | #include <linux/xattr.h> |
57 | #include <linux/utsname.h> | 58 | #include <linux/utsname.h> |
@@ -364,9 +365,8 @@ static void renew_lease(const struct nfs_server *server, unsigned long timestamp | |||
364 | * Must be called while holding tbl->slot_tbl_lock | 365 | * Must be called while holding tbl->slot_tbl_lock |
365 | */ | 366 | */ |
366 | static void | 367 | static void |
367 | nfs4_free_slot(struct nfs4_slot_table *tbl, struct nfs4_slot *free_slot) | 368 | nfs4_free_slot(struct nfs4_slot_table *tbl, u8 free_slotid) |
368 | { | 369 | { |
369 | int free_slotid = free_slot - tbl->slots; | ||
370 | int slotid = free_slotid; | 370 | int slotid = free_slotid; |
371 | 371 | ||
372 | BUG_ON(slotid < 0 || slotid >= NFS4_MAX_SLOT_TABLE); | 372 | BUG_ON(slotid < 0 || slotid >= NFS4_MAX_SLOT_TABLE); |
@@ -431,7 +431,7 @@ static void nfs41_sequence_free_slot(struct nfs4_sequence_res *res) | |||
431 | } | 431 | } |
432 | 432 | ||
433 | spin_lock(&tbl->slot_tbl_lock); | 433 | spin_lock(&tbl->slot_tbl_lock); |
434 | nfs4_free_slot(tbl, res->sr_slot); | 434 | nfs4_free_slot(tbl, res->sr_slot - tbl->slots); |
435 | nfs4_check_drain_fc_complete(res->sr_session); | 435 | nfs4_check_drain_fc_complete(res->sr_session); |
436 | spin_unlock(&tbl->slot_tbl_lock); | 436 | spin_unlock(&tbl->slot_tbl_lock); |
437 | res->sr_slot = NULL; | 437 | res->sr_slot = NULL; |
@@ -554,13 +554,10 @@ int nfs41_setup_sequence(struct nfs4_session *session, | |||
554 | spin_lock(&tbl->slot_tbl_lock); | 554 | spin_lock(&tbl->slot_tbl_lock); |
555 | if (test_bit(NFS4_SESSION_DRAINING, &session->session_state) && | 555 | if (test_bit(NFS4_SESSION_DRAINING, &session->session_state) && |
556 | !rpc_task_has_priority(task, RPC_PRIORITY_PRIVILEGED)) { | 556 | !rpc_task_has_priority(task, RPC_PRIORITY_PRIVILEGED)) { |
557 | /* | 557 | /* The state manager will wait until the slot table is empty */ |
558 | * The state manager will wait until the slot table is empty. | ||
559 | * Schedule the reset thread | ||
560 | */ | ||
561 | rpc_sleep_on(&tbl->slot_tbl_waitq, task, NULL); | 558 | rpc_sleep_on(&tbl->slot_tbl_waitq, task, NULL); |
562 | spin_unlock(&tbl->slot_tbl_lock); | 559 | spin_unlock(&tbl->slot_tbl_lock); |
563 | dprintk("%s Schedule Session Reset\n", __func__); | 560 | dprintk("%s session is draining\n", __func__); |
564 | return -EAGAIN; | 561 | return -EAGAIN; |
565 | } | 562 | } |
566 | 563 | ||
@@ -765,6 +762,8 @@ struct nfs4_opendata { | |||
765 | struct nfs_openres o_res; | 762 | struct nfs_openres o_res; |
766 | struct nfs_open_confirmargs c_arg; | 763 | struct nfs_open_confirmargs c_arg; |
767 | struct nfs_open_confirmres c_res; | 764 | struct nfs_open_confirmres c_res; |
765 | struct nfs4_string owner_name; | ||
766 | struct nfs4_string group_name; | ||
768 | struct nfs_fattr f_attr; | 767 | struct nfs_fattr f_attr; |
769 | struct nfs_fattr dir_attr; | 768 | struct nfs_fattr dir_attr; |
770 | struct dentry *dir; | 769 | struct dentry *dir; |
@@ -788,6 +787,7 @@ static void nfs4_init_opendata_res(struct nfs4_opendata *p) | |||
788 | p->o_res.server = p->o_arg.server; | 787 | p->o_res.server = p->o_arg.server; |
789 | nfs_fattr_init(&p->f_attr); | 788 | nfs_fattr_init(&p->f_attr); |
790 | nfs_fattr_init(&p->dir_attr); | 789 | nfs_fattr_init(&p->dir_attr); |
790 | nfs_fattr_init_names(&p->f_attr, &p->owner_name, &p->group_name); | ||
791 | } | 791 | } |
792 | 792 | ||
793 | static struct nfs4_opendata *nfs4_opendata_alloc(struct dentry *dentry, | 793 | static struct nfs4_opendata *nfs4_opendata_alloc(struct dentry *dentry, |
@@ -819,6 +819,7 @@ static struct nfs4_opendata *nfs4_opendata_alloc(struct dentry *dentry, | |||
819 | p->o_arg.name = &dentry->d_name; | 819 | p->o_arg.name = &dentry->d_name; |
820 | p->o_arg.server = server; | 820 | p->o_arg.server = server; |
821 | p->o_arg.bitmask = server->attr_bitmask; | 821 | p->o_arg.bitmask = server->attr_bitmask; |
822 | p->o_arg.dir_bitmask = server->cache_consistency_bitmask; | ||
822 | p->o_arg.claim = NFS4_OPEN_CLAIM_NULL; | 823 | p->o_arg.claim = NFS4_OPEN_CLAIM_NULL; |
823 | if (flags & O_CREAT) { | 824 | if (flags & O_CREAT) { |
824 | u32 *s; | 825 | u32 *s; |
@@ -855,6 +856,7 @@ static void nfs4_opendata_free(struct kref *kref) | |||
855 | dput(p->dir); | 856 | dput(p->dir); |
856 | dput(p->dentry); | 857 | dput(p->dentry); |
857 | nfs_sb_deactive(sb); | 858 | nfs_sb_deactive(sb); |
859 | nfs_fattr_free_names(&p->f_attr); | ||
858 | kfree(p); | 860 | kfree(p); |
859 | } | 861 | } |
860 | 862 | ||
@@ -1579,6 +1581,8 @@ static int _nfs4_recover_proc_open(struct nfs4_opendata *data) | |||
1579 | if (status != 0 || !data->rpc_done) | 1581 | if (status != 0 || !data->rpc_done) |
1580 | return status; | 1582 | return status; |
1581 | 1583 | ||
1584 | nfs_fattr_map_and_free_names(NFS_SERVER(dir), &data->f_attr); | ||
1585 | |||
1582 | nfs_refresh_inode(dir, o_res->dir_attr); | 1586 | nfs_refresh_inode(dir, o_res->dir_attr); |
1583 | 1587 | ||
1584 | if (o_res->rflags & NFS4_OPEN_RESULT_CONFIRM) { | 1588 | if (o_res->rflags & NFS4_OPEN_RESULT_CONFIRM) { |
@@ -1611,6 +1615,8 @@ static int _nfs4_proc_open(struct nfs4_opendata *data) | |||
1611 | return status; | 1615 | return status; |
1612 | } | 1616 | } |
1613 | 1617 | ||
1618 | nfs_fattr_map_and_free_names(server, &data->f_attr); | ||
1619 | |||
1614 | if (o_arg->open_flags & O_CREAT) { | 1620 | if (o_arg->open_flags & O_CREAT) { |
1615 | update_changeattr(dir, &o_res->cinfo); | 1621 | update_changeattr(dir, &o_res->cinfo); |
1616 | nfs_post_op_update_inode(dir, o_res->dir_attr); | 1622 | nfs_post_op_update_inode(dir, o_res->dir_attr); |
@@ -3431,19 +3437,6 @@ static inline int nfs4_server_supports_acls(struct nfs_server *server) | |||
3431 | */ | 3437 | */ |
3432 | #define NFS4ACL_MAXPAGES (XATTR_SIZE_MAX >> PAGE_CACHE_SHIFT) | 3438 | #define NFS4ACL_MAXPAGES (XATTR_SIZE_MAX >> PAGE_CACHE_SHIFT) |
3433 | 3439 | ||
3434 | static void buf_to_pages(const void *buf, size_t buflen, | ||
3435 | struct page **pages, unsigned int *pgbase) | ||
3436 | { | ||
3437 | const void *p = buf; | ||
3438 | |||
3439 | *pgbase = offset_in_page(buf); | ||
3440 | p -= *pgbase; | ||
3441 | while (p < buf + buflen) { | ||
3442 | *(pages++) = virt_to_page(p); | ||
3443 | p += PAGE_CACHE_SIZE; | ||
3444 | } | ||
3445 | } | ||
3446 | |||
3447 | static int buf_to_pages_noslab(const void *buf, size_t buflen, | 3440 | static int buf_to_pages_noslab(const void *buf, size_t buflen, |
3448 | struct page **pages, unsigned int *pgbase) | 3441 | struct page **pages, unsigned int *pgbase) |
3449 | { | 3442 | { |
@@ -3540,9 +3533,19 @@ out: | |||
3540 | nfs4_set_cached_acl(inode, acl); | 3533 | nfs4_set_cached_acl(inode, acl); |
3541 | } | 3534 | } |
3542 | 3535 | ||
3536 | /* | ||
3537 | * The getxattr API returns the required buffer length when called with a | ||
3538 | * NULL buf. The NFSv4 acl tool then calls getxattr again after allocating | ||
3539 | * the required buf. On a NULL buf, we send a page of data to the server | ||
3540 | * guessing that the ACL request can be serviced by a page. If so, we cache | ||
3541 | * up to the page of ACL data, and the 2nd call to getxattr is serviced by | ||
3542 | * the cache. If not so, we throw away the page, and cache the required | ||
3543 | * length. The next getxattr call will then produce another round trip to | ||
3544 | * the server, this time with the input buf of the required size. | ||
3545 | */ | ||
3543 | static ssize_t __nfs4_get_acl_uncached(struct inode *inode, void *buf, size_t buflen) | 3546 | static ssize_t __nfs4_get_acl_uncached(struct inode *inode, void *buf, size_t buflen) |
3544 | { | 3547 | { |
3545 | struct page *pages[NFS4ACL_MAXPAGES]; | 3548 | struct page *pages[NFS4ACL_MAXPAGES] = {NULL, }; |
3546 | struct nfs_getaclargs args = { | 3549 | struct nfs_getaclargs args = { |
3547 | .fh = NFS_FH(inode), | 3550 | .fh = NFS_FH(inode), |
3548 | .acl_pages = pages, | 3551 | .acl_pages = pages, |
@@ -3557,41 +3560,60 @@ static ssize_t __nfs4_get_acl_uncached(struct inode *inode, void *buf, size_t bu | |||
3557 | .rpc_argp = &args, | 3560 | .rpc_argp = &args, |
3558 | .rpc_resp = &res, | 3561 | .rpc_resp = &res, |
3559 | }; | 3562 | }; |
3560 | struct page *localpage = NULL; | 3563 | int ret = -ENOMEM, npages, i, acl_len = 0; |
3561 | int ret; | ||
3562 | 3564 | ||
3563 | if (buflen < PAGE_SIZE) { | 3565 | npages = (buflen + PAGE_SIZE - 1) >> PAGE_SHIFT; |
3564 | /* As long as we're doing a round trip to the server anyway, | 3566 | /* As long as we're doing a round trip to the server anyway, |
3565 | * let's be prepared for a page of acl data. */ | 3567 | * let's be prepared for a page of acl data. */ |
3566 | localpage = alloc_page(GFP_KERNEL); | 3568 | if (npages == 0) |
3567 | resp_buf = page_address(localpage); | 3569 | npages = 1; |
3568 | if (localpage == NULL) | 3570 | |
3569 | return -ENOMEM; | 3571 | for (i = 0; i < npages; i++) { |
3570 | args.acl_pages[0] = localpage; | 3572 | pages[i] = alloc_page(GFP_KERNEL); |
3571 | args.acl_pgbase = 0; | 3573 | if (!pages[i]) |
3572 | args.acl_len = PAGE_SIZE; | 3574 | goto out_free; |
3573 | } else { | ||
3574 | resp_buf = buf; | ||
3575 | buf_to_pages(buf, buflen, args.acl_pages, &args.acl_pgbase); | ||
3576 | } | 3575 | } |
3577 | ret = nfs4_call_sync(NFS_SERVER(inode)->client, NFS_SERVER(inode), &msg, &args.seq_args, &res.seq_res, 0); | 3576 | if (npages > 1) { |
3577 | /* for decoding across pages */ | ||
3578 | args.acl_scratch = alloc_page(GFP_KERNEL); | ||
3579 | if (!args.acl_scratch) | ||
3580 | goto out_free; | ||
3581 | } | ||
3582 | args.acl_len = npages * PAGE_SIZE; | ||
3583 | args.acl_pgbase = 0; | ||
3584 | /* Let decode_getfacl know not to fail if the ACL data is larger than | ||
3585 | * the page we send as a guess */ | ||
3586 | if (buf == NULL) | ||
3587 | res.acl_flags |= NFS4_ACL_LEN_REQUEST; | ||
3588 | resp_buf = page_address(pages[0]); | ||
3589 | |||
3590 | dprintk("%s buf %p buflen %zu npages %d args.acl_len %zu\n", | ||
3591 | __func__, buf, buflen, npages, args.acl_len); | ||
3592 | ret = nfs4_call_sync(NFS_SERVER(inode)->client, NFS_SERVER(inode), | ||
3593 | &msg, &args.seq_args, &res.seq_res, 0); | ||
3578 | if (ret) | 3594 | if (ret) |
3579 | goto out_free; | 3595 | goto out_free; |
3580 | if (res.acl_len > args.acl_len) | 3596 | |
3581 | nfs4_write_cached_acl(inode, NULL, res.acl_len); | 3597 | acl_len = res.acl_len - res.acl_data_offset; |
3598 | if (acl_len > args.acl_len) | ||
3599 | nfs4_write_cached_acl(inode, NULL, acl_len); | ||
3582 | else | 3600 | else |
3583 | nfs4_write_cached_acl(inode, resp_buf, res.acl_len); | 3601 | nfs4_write_cached_acl(inode, resp_buf + res.acl_data_offset, |
3602 | acl_len); | ||
3584 | if (buf) { | 3603 | if (buf) { |
3585 | ret = -ERANGE; | 3604 | ret = -ERANGE; |
3586 | if (res.acl_len > buflen) | 3605 | if (acl_len > buflen) |
3587 | goto out_free; | 3606 | goto out_free; |
3588 | if (localpage) | 3607 | _copy_from_pages(buf, pages, res.acl_data_offset, |
3589 | memcpy(buf, resp_buf, res.acl_len); | 3608 | res.acl_len); |
3590 | } | 3609 | } |
3591 | ret = res.acl_len; | 3610 | ret = acl_len; |
3592 | out_free: | 3611 | out_free: |
3593 | if (localpage) | 3612 | for (i = 0; i < npages; i++) |
3594 | __free_page(localpage); | 3613 | if (pages[i]) |
3614 | __free_page(pages[i]); | ||
3615 | if (args.acl_scratch) | ||
3616 | __free_page(args.acl_scratch); | ||
3595 | return ret; | 3617 | return ret; |
3596 | } | 3618 | } |
3597 | 3619 | ||
@@ -3622,6 +3644,8 @@ static ssize_t nfs4_proc_get_acl(struct inode *inode, void *buf, size_t buflen) | |||
3622 | nfs_zap_acl_cache(inode); | 3644 | nfs_zap_acl_cache(inode); |
3623 | ret = nfs4_read_cached_acl(inode, buf, buflen); | 3645 | ret = nfs4_read_cached_acl(inode, buf, buflen); |
3624 | if (ret != -ENOENT) | 3646 | if (ret != -ENOENT) |
3647 | /* -ENOENT is returned if there is no ACL or if there is an ACL | ||
3648 | * but no cached acl data, just the acl length */ | ||
3625 | return ret; | 3649 | return ret; |
3626 | return nfs4_get_acl_uncached(inode, buf, buflen); | 3650 | return nfs4_get_acl_uncached(inode, buf, buflen); |
3627 | } | 3651 | } |
@@ -5022,23 +5046,6 @@ out: | |||
5022 | return ret; | 5046 | return ret; |
5023 | } | 5047 | } |
5024 | 5048 | ||
5025 | /* | ||
5026 | * Reset the forechannel and backchannel slot tables | ||
5027 | */ | ||
5028 | static int nfs4_reset_slot_tables(struct nfs4_session *session) | ||
5029 | { | ||
5030 | int status; | ||
5031 | |||
5032 | status = nfs4_reset_slot_table(&session->fc_slot_table, | ||
5033 | session->fc_attrs.max_reqs, 1); | ||
5034 | if (status) | ||
5035 | return status; | ||
5036 | |||
5037 | status = nfs4_reset_slot_table(&session->bc_slot_table, | ||
5038 | session->bc_attrs.max_reqs, 0); | ||
5039 | return status; | ||
5040 | } | ||
5041 | |||
5042 | /* Destroy the slot table */ | 5049 | /* Destroy the slot table */ |
5043 | static void nfs4_destroy_slot_tables(struct nfs4_session *session) | 5050 | static void nfs4_destroy_slot_tables(struct nfs4_session *session) |
5044 | { | 5051 | { |
@@ -5084,29 +5091,35 @@ out: | |||
5084 | } | 5091 | } |
5085 | 5092 | ||
5086 | /* | 5093 | /* |
5087 | * Initialize the forechannel and backchannel tables | 5094 | * Initialize or reset the forechannel and backchannel tables |
5088 | */ | 5095 | */ |
5089 | static int nfs4_init_slot_tables(struct nfs4_session *session) | 5096 | static int nfs4_setup_session_slot_tables(struct nfs4_session *ses) |
5090 | { | 5097 | { |
5091 | struct nfs4_slot_table *tbl; | 5098 | struct nfs4_slot_table *tbl; |
5092 | int status = 0; | 5099 | int status; |
5093 | 5100 | ||
5094 | tbl = &session->fc_slot_table; | 5101 | dprintk("--> %s\n", __func__); |
5102 | /* Fore channel */ | ||
5103 | tbl = &ses->fc_slot_table; | ||
5095 | if (tbl->slots == NULL) { | 5104 | if (tbl->slots == NULL) { |
5096 | status = nfs4_init_slot_table(tbl, | 5105 | status = nfs4_init_slot_table(tbl, ses->fc_attrs.max_reqs, 1); |
5097 | session->fc_attrs.max_reqs, 1); | 5106 | if (status) /* -ENOMEM */ |
5107 | return status; | ||
5108 | } else { | ||
5109 | status = nfs4_reset_slot_table(tbl, ses->fc_attrs.max_reqs, 1); | ||
5098 | if (status) | 5110 | if (status) |
5099 | return status; | 5111 | return status; |
5100 | } | 5112 | } |
5101 | 5113 | /* Back channel */ | |
5102 | tbl = &session->bc_slot_table; | 5114 | tbl = &ses->bc_slot_table; |
5103 | if (tbl->slots == NULL) { | 5115 | if (tbl->slots == NULL) { |
5104 | status = nfs4_init_slot_table(tbl, | 5116 | status = nfs4_init_slot_table(tbl, ses->bc_attrs.max_reqs, 0); |
5105 | session->bc_attrs.max_reqs, 0); | ||
5106 | if (status) | 5117 | if (status) |
5107 | nfs4_destroy_slot_tables(session); | 5118 | /* Fore and back channel share a connection so get |
5108 | } | 5119 | * both slot tables or neither */ |
5109 | 5120 | nfs4_destroy_slot_tables(ses); | |
5121 | } else | ||
5122 | status = nfs4_reset_slot_table(tbl, ses->bc_attrs.max_reqs, 0); | ||
5110 | return status; | 5123 | return status; |
5111 | } | 5124 | } |
5112 | 5125 | ||
@@ -5294,13 +5307,9 @@ int nfs4_proc_create_session(struct nfs_client *clp) | |||
5294 | if (status) | 5307 | if (status) |
5295 | goto out; | 5308 | goto out; |
5296 | 5309 | ||
5297 | /* Init and reset the fore channel */ | 5310 | /* Init or reset the session slot tables */ |
5298 | status = nfs4_init_slot_tables(session); | 5311 | status = nfs4_setup_session_slot_tables(session); |
5299 | dprintk("slot table initialization returned %d\n", status); | 5312 | dprintk("slot table setup returned %d\n", status); |
5300 | if (status) | ||
5301 | goto out; | ||
5302 | status = nfs4_reset_slot_tables(session); | ||
5303 | dprintk("slot table reset returned %d\n", status); | ||
5304 | if (status) | 5313 | if (status) |
5305 | goto out; | 5314 | goto out; |
5306 | 5315 | ||
diff --git a/fs/nfs/nfs4state.c b/fs/nfs/nfs4state.c index 6a7107ae6b72..a53f33b4ac3a 100644 --- a/fs/nfs/nfs4state.c +++ b/fs/nfs/nfs4state.c | |||
@@ -49,6 +49,7 @@ | |||
49 | #include <linux/ratelimit.h> | 49 | #include <linux/ratelimit.h> |
50 | #include <linux/workqueue.h> | 50 | #include <linux/workqueue.h> |
51 | #include <linux/bitops.h> | 51 | #include <linux/bitops.h> |
52 | #include <linux/jiffies.h> | ||
52 | 53 | ||
53 | #include "nfs4_fs.h" | 54 | #include "nfs4_fs.h" |
54 | #include "callback.h" | 55 | #include "callback.h" |
@@ -377,31 +378,24 @@ nfs4_find_state_owner_locked(struct nfs_server *server, struct rpc_cred *cred) | |||
377 | { | 378 | { |
378 | struct rb_node **p = &server->state_owners.rb_node, | 379 | struct rb_node **p = &server->state_owners.rb_node, |
379 | *parent = NULL; | 380 | *parent = NULL; |
380 | struct nfs4_state_owner *sp, *res = NULL; | 381 | struct nfs4_state_owner *sp; |
381 | 382 | ||
382 | while (*p != NULL) { | 383 | while (*p != NULL) { |
383 | parent = *p; | 384 | parent = *p; |
384 | sp = rb_entry(parent, struct nfs4_state_owner, so_server_node); | 385 | sp = rb_entry(parent, struct nfs4_state_owner, so_server_node); |
385 | 386 | ||
386 | if (server < sp->so_server) { | ||
387 | p = &parent->rb_left; | ||
388 | continue; | ||
389 | } | ||
390 | if (server > sp->so_server) { | ||
391 | p = &parent->rb_right; | ||
392 | continue; | ||
393 | } | ||
394 | if (cred < sp->so_cred) | 387 | if (cred < sp->so_cred) |
395 | p = &parent->rb_left; | 388 | p = &parent->rb_left; |
396 | else if (cred > sp->so_cred) | 389 | else if (cred > sp->so_cred) |
397 | p = &parent->rb_right; | 390 | p = &parent->rb_right; |
398 | else { | 391 | else { |
392 | if (!list_empty(&sp->so_lru)) | ||
393 | list_del_init(&sp->so_lru); | ||
399 | atomic_inc(&sp->so_count); | 394 | atomic_inc(&sp->so_count); |
400 | res = sp; | 395 | return sp; |
401 | break; | ||
402 | } | 396 | } |
403 | } | 397 | } |
404 | return res; | 398 | return NULL; |
405 | } | 399 | } |
406 | 400 | ||
407 | static struct nfs4_state_owner * | 401 | static struct nfs4_state_owner * |
@@ -421,6 +415,8 @@ nfs4_insert_state_owner_locked(struct nfs4_state_owner *new) | |||
421 | else if (new->so_cred > sp->so_cred) | 415 | else if (new->so_cred > sp->so_cred) |
422 | p = &parent->rb_right; | 416 | p = &parent->rb_right; |
423 | else { | 417 | else { |
418 | if (!list_empty(&sp->so_lru)) | ||
419 | list_del_init(&sp->so_lru); | ||
424 | atomic_inc(&sp->so_count); | 420 | atomic_inc(&sp->so_count); |
425 | return sp; | 421 | return sp; |
426 | } | 422 | } |
@@ -462,6 +458,7 @@ nfs4_alloc_state_owner(void) | |||
462 | spin_lock_init(&sp->so_sequence.lock); | 458 | spin_lock_init(&sp->so_sequence.lock); |
463 | INIT_LIST_HEAD(&sp->so_sequence.list); | 459 | INIT_LIST_HEAD(&sp->so_sequence.list); |
464 | atomic_set(&sp->so_count, 1); | 460 | atomic_set(&sp->so_count, 1); |
461 | INIT_LIST_HEAD(&sp->so_lru); | ||
465 | return sp; | 462 | return sp; |
466 | } | 463 | } |
467 | 464 | ||
@@ -479,6 +476,38 @@ nfs4_drop_state_owner(struct nfs4_state_owner *sp) | |||
479 | } | 476 | } |
480 | } | 477 | } |
481 | 478 | ||
479 | static void nfs4_free_state_owner(struct nfs4_state_owner *sp) | ||
480 | { | ||
481 | rpc_destroy_wait_queue(&sp->so_sequence.wait); | ||
482 | put_rpccred(sp->so_cred); | ||
483 | kfree(sp); | ||
484 | } | ||
485 | |||
486 | static void nfs4_gc_state_owners(struct nfs_server *server) | ||
487 | { | ||
488 | struct nfs_client *clp = server->nfs_client; | ||
489 | struct nfs4_state_owner *sp, *tmp; | ||
490 | unsigned long time_min, time_max; | ||
491 | LIST_HEAD(doomed); | ||
492 | |||
493 | spin_lock(&clp->cl_lock); | ||
494 | time_max = jiffies; | ||
495 | time_min = (long)time_max - (long)clp->cl_lease_time; | ||
496 | list_for_each_entry_safe(sp, tmp, &server->state_owners_lru, so_lru) { | ||
497 | /* NB: LRU is sorted so that oldest is at the head */ | ||
498 | if (time_in_range(sp->so_expires, time_min, time_max)) | ||
499 | break; | ||
500 | list_move(&sp->so_lru, &doomed); | ||
501 | nfs4_remove_state_owner_locked(sp); | ||
502 | } | ||
503 | spin_unlock(&clp->cl_lock); | ||
504 | |||
505 | list_for_each_entry_safe(sp, tmp, &doomed, so_lru) { | ||
506 | list_del(&sp->so_lru); | ||
507 | nfs4_free_state_owner(sp); | ||
508 | } | ||
509 | } | ||
510 | |||
482 | /** | 511 | /** |
483 | * nfs4_get_state_owner - Look up a state owner given a credential | 512 | * nfs4_get_state_owner - Look up a state owner given a credential |
484 | * @server: nfs_server to search | 513 | * @server: nfs_server to search |
@@ -496,10 +525,10 @@ struct nfs4_state_owner *nfs4_get_state_owner(struct nfs_server *server, | |||
496 | sp = nfs4_find_state_owner_locked(server, cred); | 525 | sp = nfs4_find_state_owner_locked(server, cred); |
497 | spin_unlock(&clp->cl_lock); | 526 | spin_unlock(&clp->cl_lock); |
498 | if (sp != NULL) | 527 | if (sp != NULL) |
499 | return sp; | 528 | goto out; |
500 | new = nfs4_alloc_state_owner(); | 529 | new = nfs4_alloc_state_owner(); |
501 | if (new == NULL) | 530 | if (new == NULL) |
502 | return NULL; | 531 | goto out; |
503 | new->so_server = server; | 532 | new->so_server = server; |
504 | new->so_cred = cred; | 533 | new->so_cred = cred; |
505 | spin_lock(&clp->cl_lock); | 534 | spin_lock(&clp->cl_lock); |
@@ -511,26 +540,58 @@ struct nfs4_state_owner *nfs4_get_state_owner(struct nfs_server *server, | |||
511 | rpc_destroy_wait_queue(&new->so_sequence.wait); | 540 | rpc_destroy_wait_queue(&new->so_sequence.wait); |
512 | kfree(new); | 541 | kfree(new); |
513 | } | 542 | } |
543 | out: | ||
544 | nfs4_gc_state_owners(server); | ||
514 | return sp; | 545 | return sp; |
515 | } | 546 | } |
516 | 547 | ||
517 | /** | 548 | /** |
518 | * nfs4_put_state_owner - Release a nfs4_state_owner | 549 | * nfs4_put_state_owner - Release a nfs4_state_owner |
519 | * @sp: state owner data to release | 550 | * @sp: state owner data to release |
520 | * | ||
521 | */ | 551 | */ |
522 | void nfs4_put_state_owner(struct nfs4_state_owner *sp) | 552 | void nfs4_put_state_owner(struct nfs4_state_owner *sp) |
523 | { | 553 | { |
524 | struct nfs_client *clp = sp->so_server->nfs_client; | 554 | struct nfs_server *server = sp->so_server; |
525 | struct rpc_cred *cred = sp->so_cred; | 555 | struct nfs_client *clp = server->nfs_client; |
526 | 556 | ||
527 | if (!atomic_dec_and_lock(&sp->so_count, &clp->cl_lock)) | 557 | if (!atomic_dec_and_lock(&sp->so_count, &clp->cl_lock)) |
528 | return; | 558 | return; |
529 | nfs4_remove_state_owner_locked(sp); | 559 | |
560 | if (!RB_EMPTY_NODE(&sp->so_server_node)) { | ||
561 | sp->so_expires = jiffies; | ||
562 | list_add_tail(&sp->so_lru, &server->state_owners_lru); | ||
563 | spin_unlock(&clp->cl_lock); | ||
564 | } else { | ||
565 | nfs4_remove_state_owner_locked(sp); | ||
566 | spin_unlock(&clp->cl_lock); | ||
567 | nfs4_free_state_owner(sp); | ||
568 | } | ||
569 | } | ||
570 | |||
571 | /** | ||
572 | * nfs4_purge_state_owners - Release all cached state owners | ||
573 | * @server: nfs_server with cached state owners to release | ||
574 | * | ||
575 | * Called at umount time. Remaining state owners will be on | ||
576 | * the LRU with ref count of zero. | ||
577 | */ | ||
578 | void nfs4_purge_state_owners(struct nfs_server *server) | ||
579 | { | ||
580 | struct nfs_client *clp = server->nfs_client; | ||
581 | struct nfs4_state_owner *sp, *tmp; | ||
582 | LIST_HEAD(doomed); | ||
583 | |||
584 | spin_lock(&clp->cl_lock); | ||
585 | list_for_each_entry_safe(sp, tmp, &server->state_owners_lru, so_lru) { | ||
586 | list_move(&sp->so_lru, &doomed); | ||
587 | nfs4_remove_state_owner_locked(sp); | ||
588 | } | ||
530 | spin_unlock(&clp->cl_lock); | 589 | spin_unlock(&clp->cl_lock); |
531 | rpc_destroy_wait_queue(&sp->so_sequence.wait); | 590 | |
532 | put_rpccred(cred); | 591 | list_for_each_entry_safe(sp, tmp, &doomed, so_lru) { |
533 | kfree(sp); | 592 | list_del(&sp->so_lru); |
593 | nfs4_free_state_owner(sp); | ||
594 | } | ||
534 | } | 595 | } |
535 | 596 | ||
536 | static struct nfs4_state * | 597 | static struct nfs4_state * |
@@ -1402,6 +1463,7 @@ static int nfs4_do_reclaim(struct nfs_client *clp, const struct nfs4_state_recov | |||
1402 | restart: | 1463 | restart: |
1403 | rcu_read_lock(); | 1464 | rcu_read_lock(); |
1404 | list_for_each_entry_rcu(server, &clp->cl_superblocks, client_link) { | 1465 | list_for_each_entry_rcu(server, &clp->cl_superblocks, client_link) { |
1466 | nfs4_purge_state_owners(server); | ||
1405 | spin_lock(&clp->cl_lock); | 1467 | spin_lock(&clp->cl_lock); |
1406 | for (pos = rb_first(&server->state_owners); | 1468 | for (pos = rb_first(&server->state_owners); |
1407 | pos != NULL; | 1469 | pos != NULL; |
diff --git a/fs/nfs/nfs4xdr.c b/fs/nfs/nfs4xdr.c index e6161b213ed1..95e92e438407 100644 --- a/fs/nfs/nfs4xdr.c +++ b/fs/nfs/nfs4xdr.c | |||
@@ -2298,7 +2298,7 @@ static void nfs4_xdr_enc_open(struct rpc_rqst *req, struct xdr_stream *xdr, | |||
2298 | encode_getfh(xdr, &hdr); | 2298 | encode_getfh(xdr, &hdr); |
2299 | encode_getfattr(xdr, args->bitmask, &hdr); | 2299 | encode_getfattr(xdr, args->bitmask, &hdr); |
2300 | encode_restorefh(xdr, &hdr); | 2300 | encode_restorefh(xdr, &hdr); |
2301 | encode_getfattr(xdr, args->bitmask, &hdr); | 2301 | encode_getfattr(xdr, args->dir_bitmask, &hdr); |
2302 | encode_nops(&hdr); | 2302 | encode_nops(&hdr); |
2303 | } | 2303 | } |
2304 | 2304 | ||
@@ -2517,11 +2517,13 @@ static void nfs4_xdr_enc_getacl(struct rpc_rqst *req, struct xdr_stream *xdr, | |||
2517 | encode_compound_hdr(xdr, req, &hdr); | 2517 | encode_compound_hdr(xdr, req, &hdr); |
2518 | encode_sequence(xdr, &args->seq_args, &hdr); | 2518 | encode_sequence(xdr, &args->seq_args, &hdr); |
2519 | encode_putfh(xdr, args->fh, &hdr); | 2519 | encode_putfh(xdr, args->fh, &hdr); |
2520 | replen = hdr.replen + op_decode_hdr_maxsz + nfs4_fattr_bitmap_maxsz + 1; | 2520 | replen = hdr.replen + op_decode_hdr_maxsz + 1; |
2521 | encode_getattr_two(xdr, FATTR4_WORD0_ACL, 0, &hdr); | 2521 | encode_getattr_two(xdr, FATTR4_WORD0_ACL, 0, &hdr); |
2522 | 2522 | ||
2523 | xdr_inline_pages(&req->rq_rcv_buf, replen << 2, | 2523 | xdr_inline_pages(&req->rq_rcv_buf, replen << 2, |
2524 | args->acl_pages, args->acl_pgbase, args->acl_len); | 2524 | args->acl_pages, args->acl_pgbase, args->acl_len); |
2525 | xdr_set_scratch_buffer(xdr, page_address(args->acl_scratch), PAGE_SIZE); | ||
2526 | |||
2525 | encode_nops(&hdr); | 2527 | encode_nops(&hdr); |
2526 | } | 2528 | } |
2527 | 2529 | ||
@@ -3790,7 +3792,8 @@ out_overflow: | |||
3790 | } | 3792 | } |
3791 | 3793 | ||
3792 | static int decode_attr_owner(struct xdr_stream *xdr, uint32_t *bitmap, | 3794 | static int decode_attr_owner(struct xdr_stream *xdr, uint32_t *bitmap, |
3793 | const struct nfs_server *server, uint32_t *uid, int may_sleep) | 3795 | const struct nfs_server *server, uint32_t *uid, |
3796 | struct nfs4_string *owner_name) | ||
3794 | { | 3797 | { |
3795 | uint32_t len; | 3798 | uint32_t len; |
3796 | __be32 *p; | 3799 | __be32 *p; |
@@ -3807,8 +3810,12 @@ static int decode_attr_owner(struct xdr_stream *xdr, uint32_t *bitmap, | |||
3807 | p = xdr_inline_decode(xdr, len); | 3810 | p = xdr_inline_decode(xdr, len); |
3808 | if (unlikely(!p)) | 3811 | if (unlikely(!p)) |
3809 | goto out_overflow; | 3812 | goto out_overflow; |
3810 | if (!may_sleep) { | 3813 | if (owner_name != NULL) { |
3811 | /* do nothing */ | 3814 | owner_name->data = kmemdup(p, len, GFP_NOWAIT); |
3815 | if (owner_name->data != NULL) { | ||
3816 | owner_name->len = len; | ||
3817 | ret = NFS_ATTR_FATTR_OWNER_NAME; | ||
3818 | } | ||
3812 | } else if (len < XDR_MAX_NETOBJ) { | 3819 | } else if (len < XDR_MAX_NETOBJ) { |
3813 | if (nfs_map_name_to_uid(server, (char *)p, len, uid) == 0) | 3820 | if (nfs_map_name_to_uid(server, (char *)p, len, uid) == 0) |
3814 | ret = NFS_ATTR_FATTR_OWNER; | 3821 | ret = NFS_ATTR_FATTR_OWNER; |
@@ -3828,7 +3835,8 @@ out_overflow: | |||
3828 | } | 3835 | } |
3829 | 3836 | ||
3830 | static int decode_attr_group(struct xdr_stream *xdr, uint32_t *bitmap, | 3837 | static int decode_attr_group(struct xdr_stream *xdr, uint32_t *bitmap, |
3831 | const struct nfs_server *server, uint32_t *gid, int may_sleep) | 3838 | const struct nfs_server *server, uint32_t *gid, |
3839 | struct nfs4_string *group_name) | ||
3832 | { | 3840 | { |
3833 | uint32_t len; | 3841 | uint32_t len; |
3834 | __be32 *p; | 3842 | __be32 *p; |
@@ -3845,8 +3853,12 @@ static int decode_attr_group(struct xdr_stream *xdr, uint32_t *bitmap, | |||
3845 | p = xdr_inline_decode(xdr, len); | 3853 | p = xdr_inline_decode(xdr, len); |
3846 | if (unlikely(!p)) | 3854 | if (unlikely(!p)) |
3847 | goto out_overflow; | 3855 | goto out_overflow; |
3848 | if (!may_sleep) { | 3856 | if (group_name != NULL) { |
3849 | /* do nothing */ | 3857 | group_name->data = kmemdup(p, len, GFP_NOWAIT); |
3858 | if (group_name->data != NULL) { | ||
3859 | group_name->len = len; | ||
3860 | ret = NFS_ATTR_FATTR_GROUP_NAME; | ||
3861 | } | ||
3850 | } else if (len < XDR_MAX_NETOBJ) { | 3862 | } else if (len < XDR_MAX_NETOBJ) { |
3851 | if (nfs_map_group_to_gid(server, (char *)p, len, gid) == 0) | 3863 | if (nfs_map_group_to_gid(server, (char *)p, len, gid) == 0) |
3852 | ret = NFS_ATTR_FATTR_GROUP; | 3864 | ret = NFS_ATTR_FATTR_GROUP; |
@@ -4283,7 +4295,7 @@ xdr_error: | |||
4283 | 4295 | ||
4284 | static int decode_getfattr_attrs(struct xdr_stream *xdr, uint32_t *bitmap, | 4296 | static int decode_getfattr_attrs(struct xdr_stream *xdr, uint32_t *bitmap, |
4285 | struct nfs_fattr *fattr, struct nfs_fh *fh, | 4297 | struct nfs_fattr *fattr, struct nfs_fh *fh, |
4286 | const struct nfs_server *server, int may_sleep) | 4298 | const struct nfs_server *server) |
4287 | { | 4299 | { |
4288 | int status; | 4300 | int status; |
4289 | umode_t fmode = 0; | 4301 | umode_t fmode = 0; |
@@ -4350,12 +4362,12 @@ static int decode_getfattr_attrs(struct xdr_stream *xdr, uint32_t *bitmap, | |||
4350 | goto xdr_error; | 4362 | goto xdr_error; |
4351 | fattr->valid |= status; | 4363 | fattr->valid |= status; |
4352 | 4364 | ||
4353 | status = decode_attr_owner(xdr, bitmap, server, &fattr->uid, may_sleep); | 4365 | status = decode_attr_owner(xdr, bitmap, server, &fattr->uid, fattr->owner_name); |
4354 | if (status < 0) | 4366 | if (status < 0) |
4355 | goto xdr_error; | 4367 | goto xdr_error; |
4356 | fattr->valid |= status; | 4368 | fattr->valid |= status; |
4357 | 4369 | ||
4358 | status = decode_attr_group(xdr, bitmap, server, &fattr->gid, may_sleep); | 4370 | status = decode_attr_group(xdr, bitmap, server, &fattr->gid, fattr->group_name); |
4359 | if (status < 0) | 4371 | if (status < 0) |
4360 | goto xdr_error; | 4372 | goto xdr_error; |
4361 | fattr->valid |= status; | 4373 | fattr->valid |= status; |
@@ -4396,7 +4408,7 @@ xdr_error: | |||
4396 | } | 4408 | } |
4397 | 4409 | ||
4398 | static int decode_getfattr_generic(struct xdr_stream *xdr, struct nfs_fattr *fattr, | 4410 | static int decode_getfattr_generic(struct xdr_stream *xdr, struct nfs_fattr *fattr, |
4399 | struct nfs_fh *fh, const struct nfs_server *server, int may_sleep) | 4411 | struct nfs_fh *fh, const struct nfs_server *server) |
4400 | { | 4412 | { |
4401 | __be32 *savep; | 4413 | __be32 *savep; |
4402 | uint32_t attrlen, | 4414 | uint32_t attrlen, |
@@ -4415,7 +4427,7 @@ static int decode_getfattr_generic(struct xdr_stream *xdr, struct nfs_fattr *fat | |||
4415 | if (status < 0) | 4427 | if (status < 0) |
4416 | goto xdr_error; | 4428 | goto xdr_error; |
4417 | 4429 | ||
4418 | status = decode_getfattr_attrs(xdr, bitmap, fattr, fh, server, may_sleep); | 4430 | status = decode_getfattr_attrs(xdr, bitmap, fattr, fh, server); |
4419 | if (status < 0) | 4431 | if (status < 0) |
4420 | goto xdr_error; | 4432 | goto xdr_error; |
4421 | 4433 | ||
@@ -4426,9 +4438,9 @@ xdr_error: | |||
4426 | } | 4438 | } |
4427 | 4439 | ||
4428 | static int decode_getfattr(struct xdr_stream *xdr, struct nfs_fattr *fattr, | 4440 | static int decode_getfattr(struct xdr_stream *xdr, struct nfs_fattr *fattr, |
4429 | const struct nfs_server *server, int may_sleep) | 4441 | const struct nfs_server *server) |
4430 | { | 4442 | { |
4431 | return decode_getfattr_generic(xdr, fattr, NULL, server, may_sleep); | 4443 | return decode_getfattr_generic(xdr, fattr, NULL, server); |
4432 | } | 4444 | } |
4433 | 4445 | ||
4434 | /* | 4446 | /* |
@@ -4957,17 +4969,18 @@ decode_restorefh(struct xdr_stream *xdr) | |||
4957 | } | 4969 | } |
4958 | 4970 | ||
4959 | static int decode_getacl(struct xdr_stream *xdr, struct rpc_rqst *req, | 4971 | static int decode_getacl(struct xdr_stream *xdr, struct rpc_rqst *req, |
4960 | size_t *acl_len) | 4972 | struct nfs_getaclres *res) |
4961 | { | 4973 | { |
4962 | __be32 *savep; | 4974 | __be32 *savep, *bm_p; |
4963 | uint32_t attrlen, | 4975 | uint32_t attrlen, |
4964 | bitmap[3] = {0}; | 4976 | bitmap[3] = {0}; |
4965 | struct kvec *iov = req->rq_rcv_buf.head; | 4977 | struct kvec *iov = req->rq_rcv_buf.head; |
4966 | int status; | 4978 | int status; |
4967 | 4979 | ||
4968 | *acl_len = 0; | 4980 | res->acl_len = 0; |
4969 | if ((status = decode_op_hdr(xdr, OP_GETATTR)) != 0) | 4981 | if ((status = decode_op_hdr(xdr, OP_GETATTR)) != 0) |
4970 | goto out; | 4982 | goto out; |
4983 | bm_p = xdr->p; | ||
4971 | if ((status = decode_attr_bitmap(xdr, bitmap)) != 0) | 4984 | if ((status = decode_attr_bitmap(xdr, bitmap)) != 0) |
4972 | goto out; | 4985 | goto out; |
4973 | if ((status = decode_attr_length(xdr, &attrlen, &savep)) != 0) | 4986 | if ((status = decode_attr_length(xdr, &attrlen, &savep)) != 0) |
@@ -4979,18 +4992,30 @@ static int decode_getacl(struct xdr_stream *xdr, struct rpc_rqst *req, | |||
4979 | size_t hdrlen; | 4992 | size_t hdrlen; |
4980 | u32 recvd; | 4993 | u32 recvd; |
4981 | 4994 | ||
4995 | /* The bitmap (xdr len + bitmaps) and the attr xdr len words | ||
4996 | * are stored with the acl data to handle the problem of | ||
4997 | * variable length bitmaps.*/ | ||
4998 | xdr->p = bm_p; | ||
4999 | res->acl_data_offset = be32_to_cpup(bm_p) + 2; | ||
5000 | res->acl_data_offset <<= 2; | ||
5001 | |||
4982 | /* We ignore &savep and don't do consistency checks on | 5002 | /* We ignore &savep and don't do consistency checks on |
4983 | * the attr length. Let userspace figure it out.... */ | 5003 | * the attr length. Let userspace figure it out.... */ |
4984 | hdrlen = (u8 *)xdr->p - (u8 *)iov->iov_base; | 5004 | hdrlen = (u8 *)xdr->p - (u8 *)iov->iov_base; |
5005 | attrlen += res->acl_data_offset; | ||
4985 | recvd = req->rq_rcv_buf.len - hdrlen; | 5006 | recvd = req->rq_rcv_buf.len - hdrlen; |
4986 | if (attrlen > recvd) { | 5007 | if (attrlen > recvd) { |
4987 | dprintk("NFS: server cheating in getattr" | 5008 | if (res->acl_flags & NFS4_ACL_LEN_REQUEST) { |
4988 | " acl reply: attrlen %u > recvd %u\n", | 5009 | /* getxattr interface called with a NULL buf */ |
5010 | res->acl_len = attrlen; | ||
5011 | goto out; | ||
5012 | } | ||
5013 | dprintk("NFS: acl reply: attrlen %u > recvd %u\n", | ||
4989 | attrlen, recvd); | 5014 | attrlen, recvd); |
4990 | return -EINVAL; | 5015 | return -EINVAL; |
4991 | } | 5016 | } |
4992 | xdr_read_pages(xdr, attrlen); | 5017 | xdr_read_pages(xdr, attrlen); |
4993 | *acl_len = attrlen; | 5018 | res->acl_len = attrlen; |
4994 | } else | 5019 | } else |
4995 | status = -EOPNOTSUPP; | 5020 | status = -EOPNOTSUPP; |
4996 | 5021 | ||
@@ -5696,8 +5721,7 @@ static int nfs4_xdr_dec_open_downgrade(struct rpc_rqst *rqstp, | |||
5696 | status = decode_open_downgrade(xdr, res); | 5721 | status = decode_open_downgrade(xdr, res); |
5697 | if (status != 0) | 5722 | if (status != 0) |
5698 | goto out; | 5723 | goto out; |
5699 | decode_getfattr(xdr, res->fattr, res->server, | 5724 | decode_getfattr(xdr, res->fattr, res->server); |
5700 | !RPC_IS_ASYNC(rqstp->rq_task)); | ||
5701 | out: | 5725 | out: |
5702 | return status; | 5726 | return status; |
5703 | } | 5727 | } |
@@ -5723,8 +5747,7 @@ static int nfs4_xdr_dec_access(struct rpc_rqst *rqstp, struct xdr_stream *xdr, | |||
5723 | status = decode_access(xdr, res); | 5747 | status = decode_access(xdr, res); |
5724 | if (status != 0) | 5748 | if (status != 0) |
5725 | goto out; | 5749 | goto out; |
5726 | decode_getfattr(xdr, res->fattr, res->server, | 5750 | decode_getfattr(xdr, res->fattr, res->server); |
5727 | !RPC_IS_ASYNC(rqstp->rq_task)); | ||
5728 | out: | 5751 | out: |
5729 | return status; | 5752 | return status; |
5730 | } | 5753 | } |
@@ -5753,8 +5776,7 @@ static int nfs4_xdr_dec_lookup(struct rpc_rqst *rqstp, struct xdr_stream *xdr, | |||
5753 | status = decode_getfh(xdr, res->fh); | 5776 | status = decode_getfh(xdr, res->fh); |
5754 | if (status) | 5777 | if (status) |
5755 | goto out; | 5778 | goto out; |
5756 | status = decode_getfattr(xdr, res->fattr, res->server | 5779 | status = decode_getfattr(xdr, res->fattr, res->server); |
5757 | ,!RPC_IS_ASYNC(rqstp->rq_task)); | ||
5758 | out: | 5780 | out: |
5759 | return status; | 5781 | return status; |
5760 | } | 5782 | } |
@@ -5780,8 +5802,7 @@ static int nfs4_xdr_dec_lookup_root(struct rpc_rqst *rqstp, | |||
5780 | goto out; | 5802 | goto out; |
5781 | status = decode_getfh(xdr, res->fh); | 5803 | status = decode_getfh(xdr, res->fh); |
5782 | if (status == 0) | 5804 | if (status == 0) |
5783 | status = decode_getfattr(xdr, res->fattr, res->server, | 5805 | status = decode_getfattr(xdr, res->fattr, res->server); |
5784 | !RPC_IS_ASYNC(rqstp->rq_task)); | ||
5785 | out: | 5806 | out: |
5786 | return status; | 5807 | return status; |
5787 | } | 5808 | } |
@@ -5807,8 +5828,7 @@ static int nfs4_xdr_dec_remove(struct rpc_rqst *rqstp, struct xdr_stream *xdr, | |||
5807 | status = decode_remove(xdr, &res->cinfo); | 5828 | status = decode_remove(xdr, &res->cinfo); |
5808 | if (status) | 5829 | if (status) |
5809 | goto out; | 5830 | goto out; |
5810 | decode_getfattr(xdr, res->dir_attr, res->server, | 5831 | decode_getfattr(xdr, res->dir_attr, res->server); |
5811 | !RPC_IS_ASYNC(rqstp->rq_task)); | ||
5812 | out: | 5832 | out: |
5813 | return status; | 5833 | return status; |
5814 | } | 5834 | } |
@@ -5841,14 +5861,12 @@ static int nfs4_xdr_dec_rename(struct rpc_rqst *rqstp, struct xdr_stream *xdr, | |||
5841 | if (status) | 5861 | if (status) |
5842 | goto out; | 5862 | goto out; |
5843 | /* Current FH is target directory */ | 5863 | /* Current FH is target directory */ |
5844 | if (decode_getfattr(xdr, res->new_fattr, res->server, | 5864 | if (decode_getfattr(xdr, res->new_fattr, res->server)) |
5845 | !RPC_IS_ASYNC(rqstp->rq_task)) != 0) | ||
5846 | goto out; | 5865 | goto out; |
5847 | status = decode_restorefh(xdr); | 5866 | status = decode_restorefh(xdr); |
5848 | if (status) | 5867 | if (status) |
5849 | goto out; | 5868 | goto out; |
5850 | decode_getfattr(xdr, res->old_fattr, res->server, | 5869 | decode_getfattr(xdr, res->old_fattr, res->server); |
5851 | !RPC_IS_ASYNC(rqstp->rq_task)); | ||
5852 | out: | 5870 | out: |
5853 | return status; | 5871 | return status; |
5854 | } | 5872 | } |
@@ -5884,14 +5902,12 @@ static int nfs4_xdr_dec_link(struct rpc_rqst *rqstp, struct xdr_stream *xdr, | |||
5884 | * Note order: OP_LINK leaves the directory as the current | 5902 | * Note order: OP_LINK leaves the directory as the current |
5885 | * filehandle. | 5903 | * filehandle. |
5886 | */ | 5904 | */ |
5887 | if (decode_getfattr(xdr, res->dir_attr, res->server, | 5905 | if (decode_getfattr(xdr, res->dir_attr, res->server)) |
5888 | !RPC_IS_ASYNC(rqstp->rq_task)) != 0) | ||
5889 | goto out; | 5906 | goto out; |
5890 | status = decode_restorefh(xdr); | 5907 | status = decode_restorefh(xdr); |
5891 | if (status) | 5908 | if (status) |
5892 | goto out; | 5909 | goto out; |
5893 | decode_getfattr(xdr, res->fattr, res->server, | 5910 | decode_getfattr(xdr, res->fattr, res->server); |
5894 | !RPC_IS_ASYNC(rqstp->rq_task)); | ||
5895 | out: | 5911 | out: |
5896 | return status; | 5912 | return status; |
5897 | } | 5913 | } |
@@ -5923,14 +5939,12 @@ static int nfs4_xdr_dec_create(struct rpc_rqst *rqstp, struct xdr_stream *xdr, | |||
5923 | status = decode_getfh(xdr, res->fh); | 5939 | status = decode_getfh(xdr, res->fh); |
5924 | if (status) | 5940 | if (status) |
5925 | goto out; | 5941 | goto out; |
5926 | if (decode_getfattr(xdr, res->fattr, res->server, | 5942 | if (decode_getfattr(xdr, res->fattr, res->server)) |
5927 | !RPC_IS_ASYNC(rqstp->rq_task)) != 0) | ||
5928 | goto out; | 5943 | goto out; |
5929 | status = decode_restorefh(xdr); | 5944 | status = decode_restorefh(xdr); |
5930 | if (status) | 5945 | if (status) |
5931 | goto out; | 5946 | goto out; |
5932 | decode_getfattr(xdr, res->dir_fattr, res->server, | 5947 | decode_getfattr(xdr, res->dir_fattr, res->server); |
5933 | !RPC_IS_ASYNC(rqstp->rq_task)); | ||
5934 | out: | 5948 | out: |
5935 | return status; | 5949 | return status; |
5936 | } | 5950 | } |
@@ -5962,8 +5976,7 @@ static int nfs4_xdr_dec_getattr(struct rpc_rqst *rqstp, struct xdr_stream *xdr, | |||
5962 | status = decode_putfh(xdr); | 5976 | status = decode_putfh(xdr); |
5963 | if (status) | 5977 | if (status) |
5964 | goto out; | 5978 | goto out; |
5965 | status = decode_getfattr(xdr, res->fattr, res->server, | 5979 | status = decode_getfattr(xdr, res->fattr, res->server); |
5966 | !RPC_IS_ASYNC(rqstp->rq_task)); | ||
5967 | out: | 5980 | out: |
5968 | return status; | 5981 | return status; |
5969 | } | 5982 | } |
@@ -6028,7 +6041,7 @@ nfs4_xdr_dec_getacl(struct rpc_rqst *rqstp, struct xdr_stream *xdr, | |||
6028 | status = decode_putfh(xdr); | 6041 | status = decode_putfh(xdr); |
6029 | if (status) | 6042 | if (status) |
6030 | goto out; | 6043 | goto out; |
6031 | status = decode_getacl(xdr, rqstp, &res->acl_len); | 6044 | status = decode_getacl(xdr, rqstp, res); |
6032 | 6045 | ||
6033 | out: | 6046 | out: |
6034 | return status; | 6047 | return status; |
@@ -6061,8 +6074,7 @@ static int nfs4_xdr_dec_close(struct rpc_rqst *rqstp, struct xdr_stream *xdr, | |||
6061 | * an ESTALE error. Shouldn't be a problem, | 6074 | * an ESTALE error. Shouldn't be a problem, |
6062 | * though, since fattr->valid will remain unset. | 6075 | * though, since fattr->valid will remain unset. |
6063 | */ | 6076 | */ |
6064 | decode_getfattr(xdr, res->fattr, res->server, | 6077 | decode_getfattr(xdr, res->fattr, res->server); |
6065 | !RPC_IS_ASYNC(rqstp->rq_task)); | ||
6066 | out: | 6078 | out: |
6067 | return status; | 6079 | return status; |
6068 | } | 6080 | } |
@@ -6093,13 +6105,11 @@ static int nfs4_xdr_dec_open(struct rpc_rqst *rqstp, struct xdr_stream *xdr, | |||
6093 | goto out; | 6105 | goto out; |
6094 | if (decode_getfh(xdr, &res->fh) != 0) | 6106 | if (decode_getfh(xdr, &res->fh) != 0) |
6095 | goto out; | 6107 | goto out; |
6096 | if (decode_getfattr(xdr, res->f_attr, res->server, | 6108 | if (decode_getfattr(xdr, res->f_attr, res->server) != 0) |
6097 | !RPC_IS_ASYNC(rqstp->rq_task)) != 0) | ||
6098 | goto out; | 6109 | goto out; |
6099 | if (decode_restorefh(xdr) != 0) | 6110 | if (decode_restorefh(xdr) != 0) |
6100 | goto out; | 6111 | goto out; |
6101 | decode_getfattr(xdr, res->dir_attr, res->server, | 6112 | decode_getfattr(xdr, res->dir_attr, res->server); |
6102 | !RPC_IS_ASYNC(rqstp->rq_task)); | ||
6103 | out: | 6113 | out: |
6104 | return status; | 6114 | return status; |
6105 | } | 6115 | } |
@@ -6147,8 +6157,7 @@ static int nfs4_xdr_dec_open_noattr(struct rpc_rqst *rqstp, | |||
6147 | status = decode_open(xdr, res); | 6157 | status = decode_open(xdr, res); |
6148 | if (status) | 6158 | if (status) |
6149 | goto out; | 6159 | goto out; |
6150 | decode_getfattr(xdr, res->f_attr, res->server, | 6160 | decode_getfattr(xdr, res->f_attr, res->server); |
6151 | !RPC_IS_ASYNC(rqstp->rq_task)); | ||
6152 | out: | 6161 | out: |
6153 | return status; | 6162 | return status; |
6154 | } | 6163 | } |
@@ -6175,8 +6184,7 @@ static int nfs4_xdr_dec_setattr(struct rpc_rqst *rqstp, | |||
6175 | status = decode_setattr(xdr); | 6184 | status = decode_setattr(xdr); |
6176 | if (status) | 6185 | if (status) |
6177 | goto out; | 6186 | goto out; |
6178 | decode_getfattr(xdr, res->fattr, res->server, | 6187 | decode_getfattr(xdr, res->fattr, res->server); |
6179 | !RPC_IS_ASYNC(rqstp->rq_task)); | ||
6180 | out: | 6188 | out: |
6181 | return status; | 6189 | return status; |
6182 | } | 6190 | } |
@@ -6356,8 +6364,7 @@ static int nfs4_xdr_dec_write(struct rpc_rqst *rqstp, struct xdr_stream *xdr, | |||
6356 | if (status) | 6364 | if (status) |
6357 | goto out; | 6365 | goto out; |
6358 | if (res->fattr) | 6366 | if (res->fattr) |
6359 | decode_getfattr(xdr, res->fattr, res->server, | 6367 | decode_getfattr(xdr, res->fattr, res->server); |
6360 | !RPC_IS_ASYNC(rqstp->rq_task)); | ||
6361 | if (!status) | 6368 | if (!status) |
6362 | status = res->count; | 6369 | status = res->count; |
6363 | out: | 6370 | out: |
@@ -6386,8 +6393,7 @@ static int nfs4_xdr_dec_commit(struct rpc_rqst *rqstp, struct xdr_stream *xdr, | |||
6386 | if (status) | 6393 | if (status) |
6387 | goto out; | 6394 | goto out; |
6388 | if (res->fattr) | 6395 | if (res->fattr) |
6389 | decode_getfattr(xdr, res->fattr, res->server, | 6396 | decode_getfattr(xdr, res->fattr, res->server); |
6390 | !RPC_IS_ASYNC(rqstp->rq_task)); | ||
6391 | out: | 6397 | out: |
6392 | return status; | 6398 | return status; |
6393 | } | 6399 | } |
@@ -6546,8 +6552,7 @@ static int nfs4_xdr_dec_delegreturn(struct rpc_rqst *rqstp, | |||
6546 | status = decode_delegreturn(xdr); | 6552 | status = decode_delegreturn(xdr); |
6547 | if (status != 0) | 6553 | if (status != 0) |
6548 | goto out; | 6554 | goto out; |
6549 | decode_getfattr(xdr, res->fattr, res->server, | 6555 | decode_getfattr(xdr, res->fattr, res->server); |
6550 | !RPC_IS_ASYNC(rqstp->rq_task)); | ||
6551 | out: | 6556 | out: |
6552 | return status; | 6557 | return status; |
6553 | } | 6558 | } |
@@ -6576,8 +6581,7 @@ static int nfs4_xdr_dec_fs_locations(struct rpc_rqst *req, | |||
6576 | goto out; | 6581 | goto out; |
6577 | xdr_enter_page(xdr, PAGE_SIZE); | 6582 | xdr_enter_page(xdr, PAGE_SIZE); |
6578 | status = decode_getfattr(xdr, &res->fs_locations->fattr, | 6583 | status = decode_getfattr(xdr, &res->fs_locations->fattr, |
6579 | res->fs_locations->server, | 6584 | res->fs_locations->server); |
6580 | !RPC_IS_ASYNC(req->rq_task)); | ||
6581 | out: | 6585 | out: |
6582 | return status; | 6586 | return status; |
6583 | } | 6587 | } |
@@ -6826,8 +6830,7 @@ static int nfs4_xdr_dec_layoutcommit(struct rpc_rqst *rqstp, | |||
6826 | status = decode_layoutcommit(xdr, rqstp, res); | 6830 | status = decode_layoutcommit(xdr, rqstp, res); |
6827 | if (status) | 6831 | if (status) |
6828 | goto out; | 6832 | goto out; |
6829 | decode_getfattr(xdr, res->fattr, res->server, | 6833 | decode_getfattr(xdr, res->fattr, res->server); |
6830 | !RPC_IS_ASYNC(rqstp->rq_task)); | ||
6831 | out: | 6834 | out: |
6832 | return status; | 6835 | return status; |
6833 | } | 6836 | } |
@@ -6958,7 +6961,7 @@ int nfs4_decode_dirent(struct xdr_stream *xdr, struct nfs_entry *entry, | |||
6958 | goto out_overflow; | 6961 | goto out_overflow; |
6959 | 6962 | ||
6960 | if (decode_getfattr_attrs(xdr, bitmap, entry->fattr, entry->fh, | 6963 | if (decode_getfattr_attrs(xdr, bitmap, entry->fattr, entry->fh, |
6961 | entry->server, 1) < 0) | 6964 | entry->server) < 0) |
6962 | goto out_overflow; | 6965 | goto out_overflow; |
6963 | if (entry->fattr->valid & NFS_ATTR_FATTR_MOUNTED_ON_FILEID) | 6966 | if (entry->fattr->valid & NFS_ATTR_FATTR_MOUNTED_ON_FILEID) |
6964 | entry->ino = entry->fattr->mounted_on_fileid; | 6967 | entry->ino = entry->fattr->mounted_on_fileid; |
diff --git a/fs/nfs/objlayout/objio_osd.c b/fs/nfs/objlayout/objio_osd.c index c807ab93140e..55d01280a609 100644 --- a/fs/nfs/objlayout/objio_osd.c +++ b/fs/nfs/objlayout/objio_osd.c | |||
@@ -551,7 +551,8 @@ static const struct nfs_pageio_ops objio_pg_write_ops = { | |||
551 | static struct pnfs_layoutdriver_type objlayout_type = { | 551 | static struct pnfs_layoutdriver_type objlayout_type = { |
552 | .id = LAYOUT_OSD2_OBJECTS, | 552 | .id = LAYOUT_OSD2_OBJECTS, |
553 | .name = "LAYOUT_OSD2_OBJECTS", | 553 | .name = "LAYOUT_OSD2_OBJECTS", |
554 | .flags = PNFS_LAYOUTRET_ON_SETATTR, | 554 | .flags = PNFS_LAYOUTRET_ON_SETATTR | |
555 | PNFS_LAYOUTRET_ON_ERROR, | ||
555 | 556 | ||
556 | .alloc_layout_hdr = objlayout_alloc_layout_hdr, | 557 | .alloc_layout_hdr = objlayout_alloc_layout_hdr, |
557 | .free_layout_hdr = objlayout_free_layout_hdr, | 558 | .free_layout_hdr = objlayout_free_layout_hdr, |
diff --git a/fs/nfs/objlayout/objlayout.c b/fs/nfs/objlayout/objlayout.c index 72074e3a04f9..b3c29039f5b8 100644 --- a/fs/nfs/objlayout/objlayout.c +++ b/fs/nfs/objlayout/objlayout.c | |||
@@ -254,6 +254,8 @@ objlayout_read_done(struct objlayout_io_res *oir, ssize_t status, bool sync) | |||
254 | oir->status = rdata->task.tk_status = status; | 254 | oir->status = rdata->task.tk_status = status; |
255 | if (status >= 0) | 255 | if (status >= 0) |
256 | rdata->res.count = status; | 256 | rdata->res.count = status; |
257 | else | ||
258 | rdata->pnfs_error = status; | ||
257 | objlayout_iodone(oir); | 259 | objlayout_iodone(oir); |
258 | /* must not use oir after this point */ | 260 | /* must not use oir after this point */ |
259 | 261 | ||
@@ -334,6 +336,8 @@ objlayout_write_done(struct objlayout_io_res *oir, ssize_t status, bool sync) | |||
334 | if (status >= 0) { | 336 | if (status >= 0) { |
335 | wdata->res.count = status; | 337 | wdata->res.count = status; |
336 | wdata->verf.committed = oir->committed; | 338 | wdata->verf.committed = oir->committed; |
339 | } else { | ||
340 | wdata->pnfs_error = status; | ||
337 | } | 341 | } |
338 | objlayout_iodone(oir); | 342 | objlayout_iodone(oir); |
339 | /* must not use oir after this point */ | 343 | /* must not use oir after this point */ |
diff --git a/fs/nfs/pnfs.c b/fs/nfs/pnfs.c index 8e672a2b2d69..17149a490065 100644 --- a/fs/nfs/pnfs.c +++ b/fs/nfs/pnfs.c | |||
@@ -1166,6 +1166,33 @@ pnfs_generic_pg_test(struct nfs_pageio_descriptor *pgio, struct nfs_page *prev, | |||
1166 | } | 1166 | } |
1167 | EXPORT_SYMBOL_GPL(pnfs_generic_pg_test); | 1167 | EXPORT_SYMBOL_GPL(pnfs_generic_pg_test); |
1168 | 1168 | ||
1169 | static int pnfs_write_done_resend_to_mds(struct inode *inode, struct list_head *head) | ||
1170 | { | ||
1171 | struct nfs_pageio_descriptor pgio; | ||
1172 | LIST_HEAD(failed); | ||
1173 | |||
1174 | /* Resend all requests through the MDS */ | ||
1175 | nfs_pageio_init_write_mds(&pgio, inode, FLUSH_STABLE); | ||
1176 | while (!list_empty(head)) { | ||
1177 | struct nfs_page *req = nfs_list_entry(head->next); | ||
1178 | |||
1179 | nfs_list_remove_request(req); | ||
1180 | if (!nfs_pageio_add_request(&pgio, req)) | ||
1181 | nfs_list_add_request(req, &failed); | ||
1182 | } | ||
1183 | nfs_pageio_complete(&pgio); | ||
1184 | |||
1185 | if (!list_empty(&failed)) { | ||
1186 | /* For some reason our attempt to resend pages. Mark the | ||
1187 | * overall send request as having failed, and let | ||
1188 | * nfs_writeback_release_full deal with the error. | ||
1189 | */ | ||
1190 | list_move(&failed, head); | ||
1191 | return -EIO; | ||
1192 | } | ||
1193 | return 0; | ||
1194 | } | ||
1195 | |||
1169 | /* | 1196 | /* |
1170 | * Called by non rpc-based layout drivers | 1197 | * Called by non rpc-based layout drivers |
1171 | */ | 1198 | */ |
@@ -1175,9 +1202,17 @@ void pnfs_ld_write_done(struct nfs_write_data *data) | |||
1175 | pnfs_set_layoutcommit(data); | 1202 | pnfs_set_layoutcommit(data); |
1176 | data->mds_ops->rpc_call_done(&data->task, data); | 1203 | data->mds_ops->rpc_call_done(&data->task, data); |
1177 | } else { | 1204 | } else { |
1178 | put_lseg(data->lseg); | ||
1179 | data->lseg = NULL; | ||
1180 | dprintk("pnfs write error = %d\n", data->pnfs_error); | 1205 | dprintk("pnfs write error = %d\n", data->pnfs_error); |
1206 | if (NFS_SERVER(data->inode)->pnfs_curr_ld->flags & | ||
1207 | PNFS_LAYOUTRET_ON_ERROR) { | ||
1208 | /* Don't lo_commit on error, Server will needs to | ||
1209 | * preform a file recovery. | ||
1210 | */ | ||
1211 | clear_bit(NFS_INO_LAYOUTCOMMIT, | ||
1212 | &NFS_I(data->inode)->flags); | ||
1213 | pnfs_return_layout(data->inode); | ||
1214 | } | ||
1215 | data->task.tk_status = pnfs_write_done_resend_to_mds(data->inode, &data->pages); | ||
1181 | } | 1216 | } |
1182 | data->mds_ops->rpc_release(data); | 1217 | data->mds_ops->rpc_release(data); |
1183 | } | 1218 | } |
@@ -1267,6 +1302,9 @@ static void pnfs_ld_handle_read_error(struct nfs_read_data *data) | |||
1267 | put_lseg(data->lseg); | 1302 | put_lseg(data->lseg); |
1268 | data->lseg = NULL; | 1303 | data->lseg = NULL; |
1269 | dprintk("pnfs write error = %d\n", data->pnfs_error); | 1304 | dprintk("pnfs write error = %d\n", data->pnfs_error); |
1305 | if (NFS_SERVER(data->inode)->pnfs_curr_ld->flags & | ||
1306 | PNFS_LAYOUTRET_ON_ERROR) | ||
1307 | pnfs_return_layout(data->inode); | ||
1270 | 1308 | ||
1271 | nfs_pageio_init_read_mds(&pgio, data->inode); | 1309 | nfs_pageio_init_read_mds(&pgio, data->inode); |
1272 | 1310 | ||
diff --git a/fs/nfs/pnfs.h b/fs/nfs/pnfs.h index 1509530cb111..53d593a0a4f2 100644 --- a/fs/nfs/pnfs.h +++ b/fs/nfs/pnfs.h | |||
@@ -68,6 +68,7 @@ enum { | |||
68 | enum layoutdriver_policy_flags { | 68 | enum layoutdriver_policy_flags { |
69 | /* Should the pNFS client commit and return the layout upon a setattr */ | 69 | /* Should the pNFS client commit and return the layout upon a setattr */ |
70 | PNFS_LAYOUTRET_ON_SETATTR = 1 << 0, | 70 | PNFS_LAYOUTRET_ON_SETATTR = 1 << 0, |
71 | PNFS_LAYOUTRET_ON_ERROR = 1 << 1, | ||
71 | }; | 72 | }; |
72 | 73 | ||
73 | struct nfs4_deviceid_node; | 74 | struct nfs4_deviceid_node; |
diff --git a/fs/nfs/super.c b/fs/nfs/super.c index e463967aafb8..3dfa4f112c0a 100644 --- a/fs/nfs/super.c +++ b/fs/nfs/super.c | |||
@@ -908,10 +908,24 @@ static struct nfs_parsed_mount_data *nfs_alloc_parsed_mount_data(unsigned int ve | |||
908 | data->auth_flavor_len = 1; | 908 | data->auth_flavor_len = 1; |
909 | data->version = version; | 909 | data->version = version; |
910 | data->minorversion = 0; | 910 | data->minorversion = 0; |
911 | security_init_mnt_opts(&data->lsm_opts); | ||
911 | } | 912 | } |
912 | return data; | 913 | return data; |
913 | } | 914 | } |
914 | 915 | ||
916 | static void nfs_free_parsed_mount_data(struct nfs_parsed_mount_data *data) | ||
917 | { | ||
918 | if (data) { | ||
919 | kfree(data->client_address); | ||
920 | kfree(data->mount_server.hostname); | ||
921 | kfree(data->nfs_server.export_path); | ||
922 | kfree(data->nfs_server.hostname); | ||
923 | kfree(data->fscache_uniq); | ||
924 | security_free_mnt_opts(&data->lsm_opts); | ||
925 | kfree(data); | ||
926 | } | ||
927 | } | ||
928 | |||
915 | /* | 929 | /* |
916 | * Sanity-check a server address provided by the mount command. | 930 | * Sanity-check a server address provided by the mount command. |
917 | * | 931 | * |
@@ -2219,9 +2233,7 @@ static struct dentry *nfs_fs_mount(struct file_system_type *fs_type, | |||
2219 | data = nfs_alloc_parsed_mount_data(NFS_DEFAULT_VERSION); | 2233 | data = nfs_alloc_parsed_mount_data(NFS_DEFAULT_VERSION); |
2220 | mntfh = nfs_alloc_fhandle(); | 2234 | mntfh = nfs_alloc_fhandle(); |
2221 | if (data == NULL || mntfh == NULL) | 2235 | if (data == NULL || mntfh == NULL) |
2222 | goto out_free_fh; | 2236 | goto out; |
2223 | |||
2224 | security_init_mnt_opts(&data->lsm_opts); | ||
2225 | 2237 | ||
2226 | /* Validate the mount data */ | 2238 | /* Validate the mount data */ |
2227 | error = nfs_validate_mount_data(raw_data, data, mntfh, dev_name); | 2239 | error = nfs_validate_mount_data(raw_data, data, mntfh, dev_name); |
@@ -2233,8 +2245,6 @@ static struct dentry *nfs_fs_mount(struct file_system_type *fs_type, | |||
2233 | #ifdef CONFIG_NFS_V4 | 2245 | #ifdef CONFIG_NFS_V4 |
2234 | if (data->version == 4) { | 2246 | if (data->version == 4) { |
2235 | mntroot = nfs4_try_mount(flags, dev_name, data); | 2247 | mntroot = nfs4_try_mount(flags, dev_name, data); |
2236 | kfree(data->client_address); | ||
2237 | kfree(data->nfs_server.export_path); | ||
2238 | goto out; | 2248 | goto out; |
2239 | } | 2249 | } |
2240 | #endif /* CONFIG_NFS_V4 */ | 2250 | #endif /* CONFIG_NFS_V4 */ |
@@ -2289,13 +2299,8 @@ static struct dentry *nfs_fs_mount(struct file_system_type *fs_type, | |||
2289 | s->s_flags |= MS_ACTIVE; | 2299 | s->s_flags |= MS_ACTIVE; |
2290 | 2300 | ||
2291 | out: | 2301 | out: |
2292 | kfree(data->nfs_server.hostname); | 2302 | nfs_free_parsed_mount_data(data); |
2293 | kfree(data->mount_server.hostname); | ||
2294 | kfree(data->fscache_uniq); | ||
2295 | security_free_mnt_opts(&data->lsm_opts); | ||
2296 | out_free_fh: | ||
2297 | nfs_free_fhandle(mntfh); | 2303 | nfs_free_fhandle(mntfh); |
2298 | kfree(data); | ||
2299 | return mntroot; | 2304 | return mntroot; |
2300 | 2305 | ||
2301 | out_err_nosb: | 2306 | out_err_nosb: |
@@ -2622,9 +2627,7 @@ nfs4_remote_mount(struct file_system_type *fs_type, int flags, | |||
2622 | 2627 | ||
2623 | mntfh = nfs_alloc_fhandle(); | 2628 | mntfh = nfs_alloc_fhandle(); |
2624 | if (data == NULL || mntfh == NULL) | 2629 | if (data == NULL || mntfh == NULL) |
2625 | goto out_free_fh; | 2630 | goto out; |
2626 | |||
2627 | security_init_mnt_opts(&data->lsm_opts); | ||
2628 | 2631 | ||
2629 | /* Get a volume representation */ | 2632 | /* Get a volume representation */ |
2630 | server = nfs4_create_server(data, mntfh); | 2633 | server = nfs4_create_server(data, mntfh); |
@@ -2676,13 +2679,10 @@ nfs4_remote_mount(struct file_system_type *fs_type, int flags, | |||
2676 | 2679 | ||
2677 | s->s_flags |= MS_ACTIVE; | 2680 | s->s_flags |= MS_ACTIVE; |
2678 | 2681 | ||
2679 | security_free_mnt_opts(&data->lsm_opts); | ||
2680 | nfs_free_fhandle(mntfh); | 2682 | nfs_free_fhandle(mntfh); |
2681 | return mntroot; | 2683 | return mntroot; |
2682 | 2684 | ||
2683 | out: | 2685 | out: |
2684 | security_free_mnt_opts(&data->lsm_opts); | ||
2685 | out_free_fh: | ||
2686 | nfs_free_fhandle(mntfh); | 2686 | nfs_free_fhandle(mntfh); |
2687 | return ERR_PTR(error); | 2687 | return ERR_PTR(error); |
2688 | 2688 | ||
@@ -2839,7 +2839,7 @@ static struct dentry *nfs4_mount(struct file_system_type *fs_type, | |||
2839 | 2839 | ||
2840 | data = nfs_alloc_parsed_mount_data(4); | 2840 | data = nfs_alloc_parsed_mount_data(4); |
2841 | if (data == NULL) | 2841 | if (data == NULL) |
2842 | goto out_free_data; | 2842 | goto out; |
2843 | 2843 | ||
2844 | /* Validate the mount data */ | 2844 | /* Validate the mount data */ |
2845 | error = nfs4_validate_mount_data(raw_data, data, dev_name); | 2845 | error = nfs4_validate_mount_data(raw_data, data, dev_name); |
@@ -2853,12 +2853,7 @@ static struct dentry *nfs4_mount(struct file_system_type *fs_type, | |||
2853 | error = PTR_ERR(res); | 2853 | error = PTR_ERR(res); |
2854 | 2854 | ||
2855 | out: | 2855 | out: |
2856 | kfree(data->client_address); | 2856 | nfs_free_parsed_mount_data(data); |
2857 | kfree(data->nfs_server.export_path); | ||
2858 | kfree(data->nfs_server.hostname); | ||
2859 | kfree(data->fscache_uniq); | ||
2860 | out_free_data: | ||
2861 | kfree(data); | ||
2862 | dprintk("<-- nfs4_mount() = %d%s\n", error, | 2857 | dprintk("<-- nfs4_mount() = %d%s\n", error, |
2863 | error != 0 ? " [error]" : ""); | 2858 | error != 0 ? " [error]" : ""); |
2864 | return res; | 2859 | return res; |
diff --git a/fs/nfs/write.c b/fs/nfs/write.c index 1dda78db6a73..834f0fe96f89 100644 --- a/fs/nfs/write.c +++ b/fs/nfs/write.c | |||
@@ -1052,7 +1052,7 @@ static const struct nfs_pageio_ops nfs_pageio_write_ops = { | |||
1052 | .pg_doio = nfs_generic_pg_writepages, | 1052 | .pg_doio = nfs_generic_pg_writepages, |
1053 | }; | 1053 | }; |
1054 | 1054 | ||
1055 | static void nfs_pageio_init_write_mds(struct nfs_pageio_descriptor *pgio, | 1055 | void nfs_pageio_init_write_mds(struct nfs_pageio_descriptor *pgio, |
1056 | struct inode *inode, int ioflags) | 1056 | struct inode *inode, int ioflags) |
1057 | { | 1057 | { |
1058 | nfs_pageio_init(pgio, inode, &nfs_pageio_write_ops, | 1058 | nfs_pageio_init(pgio, inode, &nfs_pageio_write_ops, |
@@ -1166,13 +1166,7 @@ static void nfs_writeback_done_full(struct rpc_task *task, void *calldata) | |||
1166 | static void nfs_writeback_release_full(void *calldata) | 1166 | static void nfs_writeback_release_full(void *calldata) |
1167 | { | 1167 | { |
1168 | struct nfs_write_data *data = calldata; | 1168 | struct nfs_write_data *data = calldata; |
1169 | int ret, status = data->task.tk_status; | 1169 | int status = data->task.tk_status; |
1170 | struct nfs_pageio_descriptor pgio; | ||
1171 | |||
1172 | if (data->pnfs_error) { | ||
1173 | nfs_pageio_init_write_mds(&pgio, data->inode, FLUSH_STABLE); | ||
1174 | pgio.pg_recoalesce = 1; | ||
1175 | } | ||
1176 | 1170 | ||
1177 | /* Update attributes as result of writeback. */ | 1171 | /* Update attributes as result of writeback. */ |
1178 | while (!list_empty(&data->pages)) { | 1172 | while (!list_empty(&data->pages)) { |
@@ -1188,11 +1182,6 @@ static void nfs_writeback_release_full(void *calldata) | |||
1188 | req->wb_bytes, | 1182 | req->wb_bytes, |
1189 | (long long)req_offset(req)); | 1183 | (long long)req_offset(req)); |
1190 | 1184 | ||
1191 | if (data->pnfs_error) { | ||
1192 | dprintk(", pnfs error = %d\n", data->pnfs_error); | ||
1193 | goto next; | ||
1194 | } | ||
1195 | |||
1196 | if (status < 0) { | 1185 | if (status < 0) { |
1197 | nfs_set_pageerror(page); | 1186 | nfs_set_pageerror(page); |
1198 | nfs_context_set_write_error(req->wb_context, status); | 1187 | nfs_context_set_write_error(req->wb_context, status); |
@@ -1212,19 +1201,7 @@ remove_request: | |||
1212 | next: | 1201 | next: |
1213 | nfs_clear_page_tag_locked(req); | 1202 | nfs_clear_page_tag_locked(req); |
1214 | nfs_end_page_writeback(page); | 1203 | nfs_end_page_writeback(page); |
1215 | if (data->pnfs_error) { | ||
1216 | lock_page(page); | ||
1217 | nfs_pageio_cond_complete(&pgio, page->index); | ||
1218 | ret = nfs_page_async_flush(&pgio, page, 0); | ||
1219 | if (ret) { | ||
1220 | nfs_set_pageerror(page); | ||
1221 | dprintk("rewrite to MDS error = %d\n", ret); | ||
1222 | } | ||
1223 | unlock_page(page); | ||
1224 | } | ||
1225 | } | 1204 | } |
1226 | if (data->pnfs_error) | ||
1227 | nfs_pageio_complete(&pgio); | ||
1228 | nfs_writedata_release(calldata); | 1205 | nfs_writedata_release(calldata); |
1229 | } | 1206 | } |
1230 | 1207 | ||
@@ -1711,7 +1688,7 @@ out_error: | |||
1711 | 1688 | ||
1712 | #ifdef CONFIG_MIGRATION | 1689 | #ifdef CONFIG_MIGRATION |
1713 | int nfs_migrate_page(struct address_space *mapping, struct page *newpage, | 1690 | int nfs_migrate_page(struct address_space *mapping, struct page *newpage, |
1714 | struct page *page) | 1691 | struct page *page, enum migrate_mode mode) |
1715 | { | 1692 | { |
1716 | /* | 1693 | /* |
1717 | * If PagePrivate is set, then the page is currently associated with | 1694 | * If PagePrivate is set, then the page is currently associated with |
@@ -1726,7 +1703,7 @@ int nfs_migrate_page(struct address_space *mapping, struct page *newpage, | |||
1726 | 1703 | ||
1727 | nfs_fscache_release_page(page, GFP_KERNEL); | 1704 | nfs_fscache_release_page(page, GFP_KERNEL); |
1728 | 1705 | ||
1729 | return migrate_page(mapping, newpage, page); | 1706 | return migrate_page(mapping, newpage, page, mode); |
1730 | } | 1707 | } |
1731 | #endif | 1708 | #endif |
1732 | 1709 | ||
diff --git a/fs/nfsd/Kconfig b/fs/nfsd/Kconfig index 10e6366608f2..8df1ea4a6ff9 100644 --- a/fs/nfsd/Kconfig +++ b/fs/nfsd/Kconfig | |||
@@ -80,3 +80,13 @@ config NFSD_V4 | |||
80 | available from http://linux-nfs.org/. | 80 | available from http://linux-nfs.org/. |
81 | 81 | ||
82 | If unsure, say N. | 82 | If unsure, say N. |
83 | |||
84 | config NFSD_FAULT_INJECTION | ||
85 | bool "NFS server manual fault injection" | ||
86 | depends on NFSD_V4 && DEBUG_KERNEL | ||
87 | help | ||
88 | This option enables support for manually injecting faults | ||
89 | into the NFS server. This is intended to be used for | ||
90 | testing error recovery on the NFS client. | ||
91 | |||
92 | If unsure, say N. | ||
diff --git a/fs/nfsd/Makefile b/fs/nfsd/Makefile index 9b118ee20193..af32ef06b4fe 100644 --- a/fs/nfsd/Makefile +++ b/fs/nfsd/Makefile | |||
@@ -6,6 +6,7 @@ obj-$(CONFIG_NFSD) += nfsd.o | |||
6 | 6 | ||
7 | nfsd-y := nfssvc.o nfsctl.o nfsproc.o nfsfh.o vfs.o \ | 7 | nfsd-y := nfssvc.o nfsctl.o nfsproc.o nfsfh.o vfs.o \ |
8 | export.o auth.o lockd.o nfscache.o nfsxdr.o stats.o | 8 | export.o auth.o lockd.o nfscache.o nfsxdr.o stats.o |
9 | nfsd-$(CONFIG_NFSD_FAULT_INJECTION) += fault_inject.o | ||
9 | nfsd-$(CONFIG_NFSD_V2_ACL) += nfs2acl.o | 10 | nfsd-$(CONFIG_NFSD_V2_ACL) += nfs2acl.o |
10 | nfsd-$(CONFIG_NFSD_V3) += nfs3proc.o nfs3xdr.o | 11 | nfsd-$(CONFIG_NFSD_V3) += nfs3proc.o nfs3xdr.o |
11 | nfsd-$(CONFIG_NFSD_V3_ACL) += nfs3acl.o | 12 | nfsd-$(CONFIG_NFSD_V3_ACL) += nfs3acl.o |
diff --git a/fs/nfsd/export.c b/fs/nfsd/export.c index 62f3b9074e84..cf8a6bd062fa 100644 --- a/fs/nfsd/export.c +++ b/fs/nfsd/export.c | |||
@@ -87,7 +87,7 @@ static int expkey_parse(struct cache_detail *cd, char *mesg, int mlen) | |||
87 | struct svc_expkey key; | 87 | struct svc_expkey key; |
88 | struct svc_expkey *ek = NULL; | 88 | struct svc_expkey *ek = NULL; |
89 | 89 | ||
90 | if (mesg[mlen-1] != '\n') | 90 | if (mlen < 1 || mesg[mlen-1] != '\n') |
91 | return -EINVAL; | 91 | return -EINVAL; |
92 | mesg[mlen-1] = 0; | 92 | mesg[mlen-1] = 0; |
93 | 93 | ||
@@ -1226,12 +1226,12 @@ nfsd_export_init(void) | |||
1226 | int rv; | 1226 | int rv; |
1227 | dprintk("nfsd: initializing export module.\n"); | 1227 | dprintk("nfsd: initializing export module.\n"); |
1228 | 1228 | ||
1229 | rv = cache_register(&svc_export_cache); | 1229 | rv = cache_register_net(&svc_export_cache, &init_net); |
1230 | if (rv) | 1230 | if (rv) |
1231 | return rv; | 1231 | return rv; |
1232 | rv = cache_register(&svc_expkey_cache); | 1232 | rv = cache_register_net(&svc_expkey_cache, &init_net); |
1233 | if (rv) | 1233 | if (rv) |
1234 | cache_unregister(&svc_export_cache); | 1234 | cache_unregister_net(&svc_export_cache, &init_net); |
1235 | return rv; | 1235 | return rv; |
1236 | 1236 | ||
1237 | } | 1237 | } |
@@ -1255,8 +1255,8 @@ nfsd_export_shutdown(void) | |||
1255 | 1255 | ||
1256 | dprintk("nfsd: shutting down export module.\n"); | 1256 | dprintk("nfsd: shutting down export module.\n"); |
1257 | 1257 | ||
1258 | cache_unregister(&svc_expkey_cache); | 1258 | cache_unregister_net(&svc_expkey_cache, &init_net); |
1259 | cache_unregister(&svc_export_cache); | 1259 | cache_unregister_net(&svc_export_cache, &init_net); |
1260 | svcauth_unix_purge(); | 1260 | svcauth_unix_purge(); |
1261 | 1261 | ||
1262 | dprintk("nfsd: export shutdown complete.\n"); | 1262 | dprintk("nfsd: export shutdown complete.\n"); |
diff --git a/fs/nfsd/fault_inject.c b/fs/nfsd/fault_inject.c new file mode 100644 index 000000000000..ce7f0758d84c --- /dev/null +++ b/fs/nfsd/fault_inject.c | |||
@@ -0,0 +1,91 @@ | |||
1 | /* | ||
2 | * Copyright (c) 2011 Bryan Schumaker <bjschuma@netapp.com> | ||
3 | * | ||
4 | * Uses debugfs to create fault injection points for client testing | ||
5 | */ | ||
6 | |||
7 | #include <linux/types.h> | ||
8 | #include <linux/fs.h> | ||
9 | #include <linux/debugfs.h> | ||
10 | #include <linux/module.h> | ||
11 | |||
12 | #include "state.h" | ||
13 | #include "fault_inject.h" | ||
14 | |||
15 | struct nfsd_fault_inject_op { | ||
16 | char *file; | ||
17 | void (*func)(u64); | ||
18 | }; | ||
19 | |||
20 | static struct nfsd_fault_inject_op inject_ops[] = { | ||
21 | { | ||
22 | .file = "forget_clients", | ||
23 | .func = nfsd_forget_clients, | ||
24 | }, | ||
25 | { | ||
26 | .file = "forget_locks", | ||
27 | .func = nfsd_forget_locks, | ||
28 | }, | ||
29 | { | ||
30 | .file = "forget_openowners", | ||
31 | .func = nfsd_forget_openowners, | ||
32 | }, | ||
33 | { | ||
34 | .file = "forget_delegations", | ||
35 | .func = nfsd_forget_delegations, | ||
36 | }, | ||
37 | { | ||
38 | .file = "recall_delegations", | ||
39 | .func = nfsd_recall_delegations, | ||
40 | }, | ||
41 | }; | ||
42 | |||
43 | static long int NUM_INJECT_OPS = sizeof(inject_ops) / sizeof(struct nfsd_fault_inject_op); | ||
44 | static struct dentry *debug_dir; | ||
45 | |||
46 | static int nfsd_inject_set(void *op_ptr, u64 val) | ||
47 | { | ||
48 | struct nfsd_fault_inject_op *op = op_ptr; | ||
49 | |||
50 | if (val == 0) | ||
51 | printk(KERN_INFO "NFSD Fault Injection: %s (all)", op->file); | ||
52 | else | ||
53 | printk(KERN_INFO "NFSD Fault Injection: %s (n = %llu)", op->file, val); | ||
54 | |||
55 | op->func(val); | ||
56 | return 0; | ||
57 | } | ||
58 | |||
59 | static int nfsd_inject_get(void *data, u64 *val) | ||
60 | { | ||
61 | return 0; | ||
62 | } | ||
63 | |||
64 | DEFINE_SIMPLE_ATTRIBUTE(fops_nfsd, nfsd_inject_get, nfsd_inject_set, "%llu\n"); | ||
65 | |||
66 | void nfsd_fault_inject_cleanup(void) | ||
67 | { | ||
68 | debugfs_remove_recursive(debug_dir); | ||
69 | } | ||
70 | |||
71 | int nfsd_fault_inject_init(void) | ||
72 | { | ||
73 | unsigned int i; | ||
74 | struct nfsd_fault_inject_op *op; | ||
75 | mode_t mode = S_IFREG | S_IRUSR | S_IWUSR; | ||
76 | |||
77 | debug_dir = debugfs_create_dir("nfsd", NULL); | ||
78 | if (!debug_dir) | ||
79 | goto fail; | ||
80 | |||
81 | for (i = 0; i < NUM_INJECT_OPS; i++) { | ||
82 | op = &inject_ops[i]; | ||
83 | if (!debugfs_create_file(op->file, mode, debug_dir, op, &fops_nfsd)) | ||
84 | goto fail; | ||
85 | } | ||
86 | return 0; | ||
87 | |||
88 | fail: | ||
89 | nfsd_fault_inject_cleanup(); | ||
90 | return -ENOMEM; | ||
91 | } | ||
diff --git a/fs/nfsd/fault_inject.h b/fs/nfsd/fault_inject.h new file mode 100644 index 000000000000..90bd0570956c --- /dev/null +++ b/fs/nfsd/fault_inject.h | |||
@@ -0,0 +1,28 @@ | |||
1 | /* | ||
2 | * Copyright (c) 2011 Bryan Schumaker <bjschuma@netapp.com> | ||
3 | * | ||
4 | * Function definitions for fault injection | ||
5 | */ | ||
6 | |||
7 | #ifndef LINUX_NFSD_FAULT_INJECT_H | ||
8 | #define LINUX_NFSD_FAULT_INJECT_H | ||
9 | |||
10 | #ifdef CONFIG_NFSD_FAULT_INJECTION | ||
11 | int nfsd_fault_inject_init(void); | ||
12 | void nfsd_fault_inject_cleanup(void); | ||
13 | void nfsd_forget_clients(u64); | ||
14 | void nfsd_forget_locks(u64); | ||
15 | void nfsd_forget_openowners(u64); | ||
16 | void nfsd_forget_delegations(u64); | ||
17 | void nfsd_recall_delegations(u64); | ||
18 | #else /* CONFIG_NFSD_FAULT_INJECTION */ | ||
19 | static inline int nfsd_fault_inject_init(void) { return 0; } | ||
20 | static inline void nfsd_fault_inject_cleanup(void) {} | ||
21 | static inline void nfsd_forget_clients(u64 num) {} | ||
22 | static inline void nfsd_forget_locks(u64 num) {} | ||
23 | static inline void nfsd_forget_openowners(u64 num) {} | ||
24 | static inline void nfsd_forget_delegations(u64 num) {} | ||
25 | static inline void nfsd_recall_delegations(u64 num) {} | ||
26 | #endif /* CONFIG_NFSD_FAULT_INJECTION */ | ||
27 | |||
28 | #endif /* LINUX_NFSD_FAULT_INJECT_H */ | ||
diff --git a/fs/nfsd/nfs4callback.c b/fs/nfsd/nfs4callback.c index 7748d6a18d97..6f3ebb48b12f 100644 --- a/fs/nfsd/nfs4callback.c +++ b/fs/nfsd/nfs4callback.c | |||
@@ -718,7 +718,7 @@ int set_callback_cred(void) | |||
718 | { | 718 | { |
719 | if (callback_cred) | 719 | if (callback_cred) |
720 | return 0; | 720 | return 0; |
721 | callback_cred = rpc_lookup_machine_cred(); | 721 | callback_cred = rpc_lookup_machine_cred("nfs"); |
722 | if (!callback_cred) | 722 | if (!callback_cred) |
723 | return -ENOMEM; | 723 | return -ENOMEM; |
724 | return 0; | 724 | return 0; |
diff --git a/fs/nfsd/nfs4idmap.c b/fs/nfsd/nfs4idmap.c index 55780a22fdbd..94096273cd6c 100644 --- a/fs/nfsd/nfs4idmap.c +++ b/fs/nfsd/nfs4idmap.c | |||
@@ -36,6 +36,7 @@ | |||
36 | #include <linux/seq_file.h> | 36 | #include <linux/seq_file.h> |
37 | #include <linux/sched.h> | 37 | #include <linux/sched.h> |
38 | #include <linux/slab.h> | 38 | #include <linux/slab.h> |
39 | #include <net/net_namespace.h> | ||
39 | #include "idmap.h" | 40 | #include "idmap.h" |
40 | #include "nfsd.h" | 41 | #include "nfsd.h" |
41 | 42 | ||
@@ -466,20 +467,20 @@ nfsd_idmap_init(void) | |||
466 | { | 467 | { |
467 | int rv; | 468 | int rv; |
468 | 469 | ||
469 | rv = cache_register(&idtoname_cache); | 470 | rv = cache_register_net(&idtoname_cache, &init_net); |
470 | if (rv) | 471 | if (rv) |
471 | return rv; | 472 | return rv; |
472 | rv = cache_register(&nametoid_cache); | 473 | rv = cache_register_net(&nametoid_cache, &init_net); |
473 | if (rv) | 474 | if (rv) |
474 | cache_unregister(&idtoname_cache); | 475 | cache_unregister_net(&idtoname_cache, &init_net); |
475 | return rv; | 476 | return rv; |
476 | } | 477 | } |
477 | 478 | ||
478 | void | 479 | void |
479 | nfsd_idmap_shutdown(void) | 480 | nfsd_idmap_shutdown(void) |
480 | { | 481 | { |
481 | cache_unregister(&idtoname_cache); | 482 | cache_unregister_net(&idtoname_cache, &init_net); |
482 | cache_unregister(&nametoid_cache); | 483 | cache_unregister_net(&nametoid_cache, &init_net); |
483 | } | 484 | } |
484 | 485 | ||
485 | static int | 486 | static int |
diff --git a/fs/nfsd/nfs4proc.c b/fs/nfsd/nfs4proc.c index c5e28ed8bca0..896da74ec563 100644 --- a/fs/nfsd/nfs4proc.c +++ b/fs/nfsd/nfs4proc.c | |||
@@ -266,10 +266,6 @@ do_open_fhandle(struct svc_rqst *rqstp, struct svc_fh *current_fh, struct nfsd4_ | |||
266 | { | 266 | { |
267 | __be32 status; | 267 | __be32 status; |
268 | 268 | ||
269 | /* Only reclaims from previously confirmed clients are valid */ | ||
270 | if ((status = nfs4_check_open_reclaim(&open->op_clientid))) | ||
271 | return status; | ||
272 | |||
273 | /* We don't know the target directory, and therefore can not | 269 | /* We don't know the target directory, and therefore can not |
274 | * set the change info | 270 | * set the change info |
275 | */ | 271 | */ |
@@ -373,6 +369,9 @@ nfsd4_open(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate, | |||
373 | break; | 369 | break; |
374 | case NFS4_OPEN_CLAIM_PREVIOUS: | 370 | case NFS4_OPEN_CLAIM_PREVIOUS: |
375 | open->op_openowner->oo_flags |= NFS4_OO_CONFIRMED; | 371 | open->op_openowner->oo_flags |= NFS4_OO_CONFIRMED; |
372 | status = nfs4_check_open_reclaim(&open->op_clientid); | ||
373 | if (status) | ||
374 | goto out; | ||
376 | case NFS4_OPEN_CLAIM_FH: | 375 | case NFS4_OPEN_CLAIM_FH: |
377 | case NFS4_OPEN_CLAIM_DELEG_CUR_FH: | 376 | case NFS4_OPEN_CLAIM_DELEG_CUR_FH: |
378 | status = do_open_fhandle(rqstp, &cstate->current_fh, | 377 | status = do_open_fhandle(rqstp, &cstate->current_fh, |
diff --git a/fs/nfsd/nfs4recover.c b/fs/nfsd/nfs4recover.c index 80a0be9ed008..0b3e875d1abd 100644 --- a/fs/nfsd/nfs4recover.c +++ b/fs/nfsd/nfs4recover.c | |||
@@ -117,8 +117,7 @@ out_no_tfm: | |||
117 | return status; | 117 | return status; |
118 | } | 118 | } |
119 | 119 | ||
120 | int | 120 | void nfsd4_create_clid_dir(struct nfs4_client *clp) |
121 | nfsd4_create_clid_dir(struct nfs4_client *clp) | ||
122 | { | 121 | { |
123 | const struct cred *original_cred; | 122 | const struct cred *original_cred; |
124 | char *dname = clp->cl_recdir; | 123 | char *dname = clp->cl_recdir; |
@@ -127,13 +126,14 @@ nfsd4_create_clid_dir(struct nfs4_client *clp) | |||
127 | 126 | ||
128 | dprintk("NFSD: nfsd4_create_clid_dir for \"%s\"\n", dname); | 127 | dprintk("NFSD: nfsd4_create_clid_dir for \"%s\"\n", dname); |
129 | 128 | ||
130 | if (!rec_file || clp->cl_firststate) | 129 | if (clp->cl_firststate) |
131 | return 0; | 130 | return; |
132 | |||
133 | clp->cl_firststate = 1; | 131 | clp->cl_firststate = 1; |
132 | if (!rec_file) | ||
133 | return; | ||
134 | status = nfs4_save_creds(&original_cred); | 134 | status = nfs4_save_creds(&original_cred); |
135 | if (status < 0) | 135 | if (status < 0) |
136 | return status; | 136 | return; |
137 | 137 | ||
138 | dir = rec_file->f_path.dentry; | 138 | dir = rec_file->f_path.dentry; |
139 | /* lock the parent */ | 139 | /* lock the parent */ |
@@ -144,8 +144,15 @@ nfsd4_create_clid_dir(struct nfs4_client *clp) | |||
144 | status = PTR_ERR(dentry); | 144 | status = PTR_ERR(dentry); |
145 | goto out_unlock; | 145 | goto out_unlock; |
146 | } | 146 | } |
147 | status = -EEXIST; | ||
148 | if (dentry->d_inode) | 147 | if (dentry->d_inode) |
148 | /* | ||
149 | * In the 4.1 case, where we're called from | ||
150 | * reclaim_complete(), records from the previous reboot | ||
151 | * may still be left, so this is OK. | ||
152 | * | ||
153 | * In the 4.0 case, we should never get here; but we may | ||
154 | * as well be forgiving and just succeed silently. | ||
155 | */ | ||
149 | goto out_put; | 156 | goto out_put; |
150 | status = mnt_want_write_file(rec_file); | 157 | status = mnt_want_write_file(rec_file); |
151 | if (status) | 158 | if (status) |
@@ -164,7 +171,6 @@ out_unlock: | |||
164 | " and is writeable", status, | 171 | " and is writeable", status, |
165 | user_recovery_dirname); | 172 | user_recovery_dirname); |
166 | nfs4_reset_creds(original_cred); | 173 | nfs4_reset_creds(original_cred); |
167 | return status; | ||
168 | } | 174 | } |
169 | 175 | ||
170 | typedef int (recdir_func)(struct dentry *, struct dentry *); | 176 | typedef int (recdir_func)(struct dentry *, struct dentry *); |
diff --git a/fs/nfsd/nfs4state.c b/fs/nfsd/nfs4state.c index 9ca16dc09e04..e8c98f009670 100644 --- a/fs/nfsd/nfs4state.c +++ b/fs/nfsd/nfs4state.c | |||
@@ -49,12 +49,20 @@ | |||
49 | time_t nfsd4_lease = 90; /* default lease time */ | 49 | time_t nfsd4_lease = 90; /* default lease time */ |
50 | time_t nfsd4_grace = 90; | 50 | time_t nfsd4_grace = 90; |
51 | static time_t boot_time; | 51 | static time_t boot_time; |
52 | static stateid_t zerostateid; /* bits all 0 */ | 52 | |
53 | static stateid_t onestateid; /* bits all 1 */ | 53 | #define all_ones {{~0,~0},~0} |
54 | static const stateid_t one_stateid = { | ||
55 | .si_generation = ~0, | ||
56 | .si_opaque = all_ones, | ||
57 | }; | ||
58 | static const stateid_t zero_stateid = { | ||
59 | /* all fields zero */ | ||
60 | }; | ||
61 | |||
54 | static u64 current_sessionid = 1; | 62 | static u64 current_sessionid = 1; |
55 | 63 | ||
56 | #define ZERO_STATEID(stateid) (!memcmp((stateid), &zerostateid, sizeof(stateid_t))) | 64 | #define ZERO_STATEID(stateid) (!memcmp((stateid), &zero_stateid, sizeof(stateid_t))) |
57 | #define ONE_STATEID(stateid) (!memcmp((stateid), &onestateid, sizeof(stateid_t))) | 65 | #define ONE_STATEID(stateid) (!memcmp((stateid), &one_stateid, sizeof(stateid_t))) |
58 | 66 | ||
59 | /* forward declarations */ | 67 | /* forward declarations */ |
60 | static int check_for_locks(struct nfs4_file *filp, struct nfs4_lockowner *lowner); | 68 | static int check_for_locks(struct nfs4_file *filp, struct nfs4_lockowner *lowner); |
@@ -133,21 +141,21 @@ unsigned int max_delegations; | |||
133 | * Open owner state (share locks) | 141 | * Open owner state (share locks) |
134 | */ | 142 | */ |
135 | 143 | ||
136 | /* hash tables for open owners */ | 144 | /* hash tables for lock and open owners */ |
137 | #define OPEN_OWNER_HASH_BITS 8 | 145 | #define OWNER_HASH_BITS 8 |
138 | #define OPEN_OWNER_HASH_SIZE (1 << OPEN_OWNER_HASH_BITS) | 146 | #define OWNER_HASH_SIZE (1 << OWNER_HASH_BITS) |
139 | #define OPEN_OWNER_HASH_MASK (OPEN_OWNER_HASH_SIZE - 1) | 147 | #define OWNER_HASH_MASK (OWNER_HASH_SIZE - 1) |
140 | 148 | ||
141 | static unsigned int open_ownerstr_hashval(u32 clientid, struct xdr_netobj *ownername) | 149 | static unsigned int ownerstr_hashval(u32 clientid, struct xdr_netobj *ownername) |
142 | { | 150 | { |
143 | unsigned int ret; | 151 | unsigned int ret; |
144 | 152 | ||
145 | ret = opaque_hashval(ownername->data, ownername->len); | 153 | ret = opaque_hashval(ownername->data, ownername->len); |
146 | ret += clientid; | 154 | ret += clientid; |
147 | return ret & OPEN_OWNER_HASH_MASK; | 155 | return ret & OWNER_HASH_MASK; |
148 | } | 156 | } |
149 | 157 | ||
150 | static struct list_head open_ownerstr_hashtbl[OPEN_OWNER_HASH_SIZE]; | 158 | static struct list_head ownerstr_hashtbl[OWNER_HASH_SIZE]; |
151 | 159 | ||
152 | /* hash table for nfs4_file */ | 160 | /* hash table for nfs4_file */ |
153 | #define FILE_HASH_BITS 8 | 161 | #define FILE_HASH_BITS 8 |
@@ -514,6 +522,7 @@ static void unhash_lockowner(struct nfs4_lockowner *lo) | |||
514 | 522 | ||
515 | list_del(&lo->lo_owner.so_strhash); | 523 | list_del(&lo->lo_owner.so_strhash); |
516 | list_del(&lo->lo_perstateid); | 524 | list_del(&lo->lo_perstateid); |
525 | list_del(&lo->lo_owner_ino_hash); | ||
517 | while (!list_empty(&lo->lo_owner.so_stateids)) { | 526 | while (!list_empty(&lo->lo_owner.so_stateids)) { |
518 | stp = list_first_entry(&lo->lo_owner.so_stateids, | 527 | stp = list_first_entry(&lo->lo_owner.so_stateids, |
519 | struct nfs4_ol_stateid, st_perstateowner); | 528 | struct nfs4_ol_stateid, st_perstateowner); |
@@ -985,12 +994,11 @@ static struct nfs4_client *alloc_client(struct xdr_netobj name) | |||
985 | clp = kzalloc(sizeof(struct nfs4_client), GFP_KERNEL); | 994 | clp = kzalloc(sizeof(struct nfs4_client), GFP_KERNEL); |
986 | if (clp == NULL) | 995 | if (clp == NULL) |
987 | return NULL; | 996 | return NULL; |
988 | clp->cl_name.data = kmalloc(name.len, GFP_KERNEL); | 997 | clp->cl_name.data = kmemdup(name.data, name.len, GFP_KERNEL); |
989 | if (clp->cl_name.data == NULL) { | 998 | if (clp->cl_name.data == NULL) { |
990 | kfree(clp); | 999 | kfree(clp); |
991 | return NULL; | 1000 | return NULL; |
992 | } | 1001 | } |
993 | memcpy(clp->cl_name.data, name.data, name.len); | ||
994 | clp->cl_name.len = name.len; | 1002 | clp->cl_name.len = name.len; |
995 | return clp; | 1003 | return clp; |
996 | } | 1004 | } |
@@ -1058,7 +1066,6 @@ expire_client(struct nfs4_client *clp) | |||
1058 | spin_unlock(&recall_lock); | 1066 | spin_unlock(&recall_lock); |
1059 | while (!list_empty(&reaplist)) { | 1067 | while (!list_empty(&reaplist)) { |
1060 | dp = list_entry(reaplist.next, struct nfs4_delegation, dl_recall_lru); | 1068 | dp = list_entry(reaplist.next, struct nfs4_delegation, dl_recall_lru); |
1061 | list_del_init(&dp->dl_recall_lru); | ||
1062 | unhash_delegation(dp); | 1069 | unhash_delegation(dp); |
1063 | } | 1070 | } |
1064 | while (!list_empty(&clp->cl_openowners)) { | 1071 | while (!list_empty(&clp->cl_openowners)) { |
@@ -2301,7 +2308,7 @@ nfsd4_free_slabs(void) | |||
2301 | nfsd4_free_slab(&deleg_slab); | 2308 | nfsd4_free_slab(&deleg_slab); |
2302 | } | 2309 | } |
2303 | 2310 | ||
2304 | static int | 2311 | int |
2305 | nfsd4_init_slabs(void) | 2312 | nfsd4_init_slabs(void) |
2306 | { | 2313 | { |
2307 | openowner_slab = kmem_cache_create("nfsd4_openowners", | 2314 | openowner_slab = kmem_cache_create("nfsd4_openowners", |
@@ -2373,7 +2380,7 @@ static inline void *alloc_stateowner(struct kmem_cache *slab, struct xdr_netobj | |||
2373 | 2380 | ||
2374 | static void hash_openowner(struct nfs4_openowner *oo, struct nfs4_client *clp, unsigned int strhashval) | 2381 | static void hash_openowner(struct nfs4_openowner *oo, struct nfs4_client *clp, unsigned int strhashval) |
2375 | { | 2382 | { |
2376 | list_add(&oo->oo_owner.so_strhash, &open_ownerstr_hashtbl[strhashval]); | 2383 | list_add(&oo->oo_owner.so_strhash, &ownerstr_hashtbl[strhashval]); |
2377 | list_add(&oo->oo_perclient, &clp->cl_openowners); | 2384 | list_add(&oo->oo_perclient, &clp->cl_openowners); |
2378 | } | 2385 | } |
2379 | 2386 | ||
@@ -2436,7 +2443,9 @@ find_openstateowner_str(unsigned int hashval, struct nfsd4_open *open) | |||
2436 | struct nfs4_stateowner *so; | 2443 | struct nfs4_stateowner *so; |
2437 | struct nfs4_openowner *oo; | 2444 | struct nfs4_openowner *oo; |
2438 | 2445 | ||
2439 | list_for_each_entry(so, &open_ownerstr_hashtbl[hashval], so_strhash) { | 2446 | list_for_each_entry(so, &ownerstr_hashtbl[hashval], so_strhash) { |
2447 | if (!so->so_is_open_owner) | ||
2448 | continue; | ||
2440 | if (same_owner_str(so, &open->op_owner, &open->op_clientid)) { | 2449 | if (same_owner_str(so, &open->op_owner, &open->op_clientid)) { |
2441 | oo = openowner(so); | 2450 | oo = openowner(so); |
2442 | renew_client(oo->oo_owner.so_client); | 2451 | renew_client(oo->oo_owner.so_client); |
@@ -2580,7 +2589,7 @@ nfsd4_process_open1(struct nfsd4_compound_state *cstate, | |||
2580 | if (open->op_file == NULL) | 2589 | if (open->op_file == NULL) |
2581 | return nfserr_jukebox; | 2590 | return nfserr_jukebox; |
2582 | 2591 | ||
2583 | strhashval = open_ownerstr_hashval(clientid->cl_id, &open->op_owner); | 2592 | strhashval = ownerstr_hashval(clientid->cl_id, &open->op_owner); |
2584 | oo = find_openstateowner_str(strhashval, open); | 2593 | oo = find_openstateowner_str(strhashval, open); |
2585 | open->op_openowner = oo; | 2594 | open->op_openowner = oo; |
2586 | if (!oo) { | 2595 | if (!oo) { |
@@ -3123,7 +3132,6 @@ nfs4_laundromat(void) | |||
3123 | spin_unlock(&recall_lock); | 3132 | spin_unlock(&recall_lock); |
3124 | list_for_each_safe(pos, next, &reaplist) { | 3133 | list_for_each_safe(pos, next, &reaplist) { |
3125 | dp = list_entry (pos, struct nfs4_delegation, dl_recall_lru); | 3134 | dp = list_entry (pos, struct nfs4_delegation, dl_recall_lru); |
3126 | list_del_init(&dp->dl_recall_lru); | ||
3127 | unhash_delegation(dp); | 3135 | unhash_delegation(dp); |
3128 | } | 3136 | } |
3129 | test_val = nfsd4_lease; | 3137 | test_val = nfsd4_lease; |
@@ -3718,13 +3726,11 @@ out: | |||
3718 | } | 3726 | } |
3719 | 3727 | ||
3720 | 3728 | ||
3721 | /* | ||
3722 | * Lock owner state (byte-range locks) | ||
3723 | */ | ||
3724 | #define LOFF_OVERFLOW(start, len) ((u64)(len) > ~(u64)(start)) | 3729 | #define LOFF_OVERFLOW(start, len) ((u64)(len) > ~(u64)(start)) |
3725 | #define LOCK_HASH_BITS 8 | 3730 | |
3726 | #define LOCK_HASH_SIZE (1 << LOCK_HASH_BITS) | 3731 | #define LOCKOWNER_INO_HASH_BITS 8 |
3727 | #define LOCK_HASH_MASK (LOCK_HASH_SIZE - 1) | 3732 | #define LOCKOWNER_INO_HASH_SIZE (1 << LOCKOWNER_INO_HASH_BITS) |
3733 | #define LOCKOWNER_INO_HASH_MASK (LOCKOWNER_INO_HASH_SIZE - 1) | ||
3728 | 3734 | ||
3729 | static inline u64 | 3735 | static inline u64 |
3730 | end_offset(u64 start, u64 len) | 3736 | end_offset(u64 start, u64 len) |
@@ -3746,16 +3752,14 @@ last_byte_offset(u64 start, u64 len) | |||
3746 | return end > start ? end - 1: NFS4_MAX_UINT64; | 3752 | return end > start ? end - 1: NFS4_MAX_UINT64; |
3747 | } | 3753 | } |
3748 | 3754 | ||
3749 | static inline unsigned int | 3755 | static unsigned int lockowner_ino_hashval(struct inode *inode, u32 cl_id, struct xdr_netobj *ownername) |
3750 | lock_ownerstr_hashval(struct inode *inode, u32 cl_id, | ||
3751 | struct xdr_netobj *ownername) | ||
3752 | { | 3756 | { |
3753 | return (file_hashval(inode) + cl_id | 3757 | return (file_hashval(inode) + cl_id |
3754 | + opaque_hashval(ownername->data, ownername->len)) | 3758 | + opaque_hashval(ownername->data, ownername->len)) |
3755 | & LOCK_HASH_MASK; | 3759 | & LOCKOWNER_INO_HASH_MASK; |
3756 | } | 3760 | } |
3757 | 3761 | ||
3758 | static struct list_head lock_ownerstr_hashtbl[LOCK_HASH_SIZE]; | 3762 | static struct list_head lockowner_ino_hashtbl[LOCKOWNER_INO_HASH_SIZE]; |
3759 | 3763 | ||
3760 | /* | 3764 | /* |
3761 | * TODO: Linux file offsets are _signed_ 64-bit quantities, which means that | 3765 | * TODO: Linux file offsets are _signed_ 64-bit quantities, which means that |
@@ -3809,23 +3813,39 @@ nevermind: | |||
3809 | deny->ld_type = NFS4_WRITE_LT; | 3813 | deny->ld_type = NFS4_WRITE_LT; |
3810 | } | 3814 | } |
3811 | 3815 | ||
3816 | static bool same_lockowner_ino(struct nfs4_lockowner *lo, struct inode *inode, clientid_t *clid, struct xdr_netobj *owner) | ||
3817 | { | ||
3818 | struct nfs4_ol_stateid *lst; | ||
3819 | |||
3820 | if (!same_owner_str(&lo->lo_owner, owner, clid)) | ||
3821 | return false; | ||
3822 | lst = list_first_entry(&lo->lo_owner.so_stateids, | ||
3823 | struct nfs4_ol_stateid, st_perstateowner); | ||
3824 | return lst->st_file->fi_inode == inode; | ||
3825 | } | ||
3826 | |||
3812 | static struct nfs4_lockowner * | 3827 | static struct nfs4_lockowner * |
3813 | find_lockowner_str(struct inode *inode, clientid_t *clid, | 3828 | find_lockowner_str(struct inode *inode, clientid_t *clid, |
3814 | struct xdr_netobj *owner) | 3829 | struct xdr_netobj *owner) |
3815 | { | 3830 | { |
3816 | unsigned int hashval = lock_ownerstr_hashval(inode, clid->cl_id, owner); | 3831 | unsigned int hashval = lockowner_ino_hashval(inode, clid->cl_id, owner); |
3817 | struct nfs4_stateowner *op; | 3832 | struct nfs4_lockowner *lo; |
3818 | 3833 | ||
3819 | list_for_each_entry(op, &lock_ownerstr_hashtbl[hashval], so_strhash) { | 3834 | list_for_each_entry(lo, &lockowner_ino_hashtbl[hashval], lo_owner_ino_hash) { |
3820 | if (same_owner_str(op, owner, clid)) | 3835 | if (same_lockowner_ino(lo, inode, clid, owner)) |
3821 | return lockowner(op); | 3836 | return lo; |
3822 | } | 3837 | } |
3823 | return NULL; | 3838 | return NULL; |
3824 | } | 3839 | } |
3825 | 3840 | ||
3826 | static void hash_lockowner(struct nfs4_lockowner *lo, unsigned int strhashval, struct nfs4_client *clp, struct nfs4_ol_stateid *open_stp) | 3841 | static void hash_lockowner(struct nfs4_lockowner *lo, unsigned int strhashval, struct nfs4_client *clp, struct nfs4_ol_stateid *open_stp) |
3827 | { | 3842 | { |
3828 | list_add(&lo->lo_owner.so_strhash, &lock_ownerstr_hashtbl[strhashval]); | 3843 | struct inode *inode = open_stp->st_file->fi_inode; |
3844 | unsigned int inohash = lockowner_ino_hashval(inode, | ||
3845 | clp->cl_clientid.cl_id, &lo->lo_owner.so_owner); | ||
3846 | |||
3847 | list_add(&lo->lo_owner.so_strhash, &ownerstr_hashtbl[strhashval]); | ||
3848 | list_add(&lo->lo_owner_ino_hash, &lockowner_ino_hashtbl[inohash]); | ||
3829 | list_add(&lo->lo_perstateid, &open_stp->st_lockowners); | 3849 | list_add(&lo->lo_perstateid, &open_stp->st_lockowners); |
3830 | } | 3850 | } |
3831 | 3851 | ||
@@ -3834,7 +3854,7 @@ static void hash_lockowner(struct nfs4_lockowner *lo, unsigned int strhashval, s | |||
3834 | * Called in nfsd4_lock - therefore, OPEN and OPEN_CONFIRM (if needed) has | 3854 | * Called in nfsd4_lock - therefore, OPEN and OPEN_CONFIRM (if needed) has |
3835 | * occurred. | 3855 | * occurred. |
3836 | * | 3856 | * |
3837 | * strhashval = lock_ownerstr_hashval | 3857 | * strhashval = ownerstr_hashval |
3838 | */ | 3858 | */ |
3839 | 3859 | ||
3840 | static struct nfs4_lockowner * | 3860 | static struct nfs4_lockowner * |
@@ -3892,6 +3912,37 @@ static void get_lock_access(struct nfs4_ol_stateid *lock_stp, u32 access) | |||
3892 | __set_bit(access, &lock_stp->st_access_bmap); | 3912 | __set_bit(access, &lock_stp->st_access_bmap); |
3893 | } | 3913 | } |
3894 | 3914 | ||
3915 | __be32 lookup_or_create_lock_state(struct nfsd4_compound_state *cstate, struct nfs4_ol_stateid *ost, struct nfsd4_lock *lock, struct nfs4_ol_stateid **lst, bool *new) | ||
3916 | { | ||
3917 | struct nfs4_file *fi = ost->st_file; | ||
3918 | struct nfs4_openowner *oo = openowner(ost->st_stateowner); | ||
3919 | struct nfs4_client *cl = oo->oo_owner.so_client; | ||
3920 | struct nfs4_lockowner *lo; | ||
3921 | unsigned int strhashval; | ||
3922 | |||
3923 | lo = find_lockowner_str(fi->fi_inode, &cl->cl_clientid, &lock->v.new.owner); | ||
3924 | if (lo) { | ||
3925 | if (!cstate->minorversion) | ||
3926 | return nfserr_bad_seqid; | ||
3927 | /* XXX: a lockowner always has exactly one stateid: */ | ||
3928 | *lst = list_first_entry(&lo->lo_owner.so_stateids, | ||
3929 | struct nfs4_ol_stateid, st_perstateowner); | ||
3930 | return nfs_ok; | ||
3931 | } | ||
3932 | strhashval = ownerstr_hashval(cl->cl_clientid.cl_id, | ||
3933 | &lock->v.new.owner); | ||
3934 | lo = alloc_init_lock_stateowner(strhashval, cl, ost, lock); | ||
3935 | if (lo == NULL) | ||
3936 | return nfserr_jukebox; | ||
3937 | *lst = alloc_init_lock_stateid(lo, fi, ost); | ||
3938 | if (*lst == NULL) { | ||
3939 | release_lockowner(lo); | ||
3940 | return nfserr_jukebox; | ||
3941 | } | ||
3942 | *new = true; | ||
3943 | return nfs_ok; | ||
3944 | } | ||
3945 | |||
3895 | /* | 3946 | /* |
3896 | * LOCK operation | 3947 | * LOCK operation |
3897 | */ | 3948 | */ |
@@ -3907,7 +3958,7 @@ nfsd4_lock(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate, | |||
3907 | struct file_lock file_lock; | 3958 | struct file_lock file_lock; |
3908 | struct file_lock conflock; | 3959 | struct file_lock conflock; |
3909 | __be32 status = 0; | 3960 | __be32 status = 0; |
3910 | unsigned int strhashval; | 3961 | bool new_state = false; |
3911 | int lkflg; | 3962 | int lkflg; |
3912 | int err; | 3963 | int err; |
3913 | 3964 | ||
@@ -3933,10 +3984,15 @@ nfsd4_lock(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate, | |||
3933 | * lock stateid. | 3984 | * lock stateid. |
3934 | */ | 3985 | */ |
3935 | struct nfs4_ol_stateid *open_stp = NULL; | 3986 | struct nfs4_ol_stateid *open_stp = NULL; |
3936 | 3987 | ||
3988 | if (nfsd4_has_session(cstate)) | ||
3989 | /* See rfc 5661 18.10.3: given clientid is ignored: */ | ||
3990 | memcpy(&lock->v.new.clientid, | ||
3991 | &cstate->session->se_client->cl_clientid, | ||
3992 | sizeof(clientid_t)); | ||
3993 | |||
3937 | status = nfserr_stale_clientid; | 3994 | status = nfserr_stale_clientid; |
3938 | if (!nfsd4_has_session(cstate) && | 3995 | if (STALE_CLIENTID(&lock->lk_new_clientid)) |
3939 | STALE_CLIENTID(&lock->lk_new_clientid)) | ||
3940 | goto out; | 3996 | goto out; |
3941 | 3997 | ||
3942 | /* validate and update open stateid and open seqid */ | 3998 | /* validate and update open stateid and open seqid */ |
@@ -3948,25 +4004,12 @@ nfsd4_lock(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate, | |||
3948 | goto out; | 4004 | goto out; |
3949 | open_sop = openowner(open_stp->st_stateowner); | 4005 | open_sop = openowner(open_stp->st_stateowner); |
3950 | status = nfserr_bad_stateid; | 4006 | status = nfserr_bad_stateid; |
3951 | if (!nfsd4_has_session(cstate) && | 4007 | if (!same_clid(&open_sop->oo_owner.so_client->cl_clientid, |
3952 | !same_clid(&open_sop->oo_owner.so_client->cl_clientid, | ||
3953 | &lock->v.new.clientid)) | 4008 | &lock->v.new.clientid)) |
3954 | goto out; | 4009 | goto out; |
3955 | /* create lockowner and lock stateid */ | 4010 | status = lookup_or_create_lock_state(cstate, open_stp, lock, |
3956 | fp = open_stp->st_file; | 4011 | &lock_stp, &new_state); |
3957 | strhashval = lock_ownerstr_hashval(fp->fi_inode, | 4012 | if (status) |
3958 | open_sop->oo_owner.so_client->cl_clientid.cl_id, | ||
3959 | &lock->v.new.owner); | ||
3960 | /* XXX: Do we need to check for duplicate stateowners on | ||
3961 | * the same file, or should they just be allowed (and | ||
3962 | * create new stateids)? */ | ||
3963 | status = nfserr_jukebox; | ||
3964 | lock_sop = alloc_init_lock_stateowner(strhashval, | ||
3965 | open_sop->oo_owner.so_client, open_stp, lock); | ||
3966 | if (lock_sop == NULL) | ||
3967 | goto out; | ||
3968 | lock_stp = alloc_init_lock_stateid(lock_sop, fp, open_stp); | ||
3969 | if (lock_stp == NULL) | ||
3970 | goto out; | 4013 | goto out; |
3971 | } else { | 4014 | } else { |
3972 | /* lock (lock owner + lock stateid) already exists */ | 4015 | /* lock (lock owner + lock stateid) already exists */ |
@@ -3976,10 +4019,9 @@ nfsd4_lock(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate, | |||
3976 | NFS4_LOCK_STID, &lock_stp); | 4019 | NFS4_LOCK_STID, &lock_stp); |
3977 | if (status) | 4020 | if (status) |
3978 | goto out; | 4021 | goto out; |
3979 | lock_sop = lockowner(lock_stp->st_stateowner); | ||
3980 | fp = lock_stp->st_file; | ||
3981 | } | 4022 | } |
3982 | /* lock_sop and lock_stp have been created or found */ | 4023 | lock_sop = lockowner(lock_stp->st_stateowner); |
4024 | fp = lock_stp->st_file; | ||
3983 | 4025 | ||
3984 | lkflg = setlkflg(lock->lk_type); | 4026 | lkflg = setlkflg(lock->lk_type); |
3985 | status = nfs4_check_openmode(lock_stp, lkflg); | 4027 | status = nfs4_check_openmode(lock_stp, lkflg); |
@@ -4054,7 +4096,7 @@ nfsd4_lock(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate, | |||
4054 | break; | 4096 | break; |
4055 | } | 4097 | } |
4056 | out: | 4098 | out: |
4057 | if (status && lock->lk_is_new && lock_sop) | 4099 | if (status && new_state) |
4058 | release_lockowner(lock_sop); | 4100 | release_lockowner(lock_sop); |
4059 | if (!cstate->replay_owner) | 4101 | if (!cstate->replay_owner) |
4060 | nfs4_unlock_state(); | 4102 | nfs4_unlock_state(); |
@@ -4251,7 +4293,7 @@ nfsd4_release_lockowner(struct svc_rqst *rqstp, | |||
4251 | struct nfs4_ol_stateid *stp; | 4293 | struct nfs4_ol_stateid *stp; |
4252 | struct xdr_netobj *owner = &rlockowner->rl_owner; | 4294 | struct xdr_netobj *owner = &rlockowner->rl_owner; |
4253 | struct list_head matches; | 4295 | struct list_head matches; |
4254 | int i; | 4296 | unsigned int hashval = ownerstr_hashval(clid->cl_id, owner); |
4255 | __be32 status; | 4297 | __be32 status; |
4256 | 4298 | ||
4257 | dprintk("nfsd4_release_lockowner clientid: (%08x/%08x):\n", | 4299 | dprintk("nfsd4_release_lockowner clientid: (%08x/%08x):\n", |
@@ -4266,22 +4308,19 @@ nfsd4_release_lockowner(struct svc_rqst *rqstp, | |||
4266 | nfs4_lock_state(); | 4308 | nfs4_lock_state(); |
4267 | 4309 | ||
4268 | status = nfserr_locks_held; | 4310 | status = nfserr_locks_held; |
4269 | /* XXX: we're doing a linear search through all the lockowners. | ||
4270 | * Yipes! For now we'll just hope clients aren't really using | ||
4271 | * release_lockowner much, but eventually we have to fix these | ||
4272 | * data structures. */ | ||
4273 | INIT_LIST_HEAD(&matches); | 4311 | INIT_LIST_HEAD(&matches); |
4274 | for (i = 0; i < LOCK_HASH_SIZE; i++) { | 4312 | |
4275 | list_for_each_entry(sop, &lock_ownerstr_hashtbl[i], so_strhash) { | 4313 | list_for_each_entry(sop, &ownerstr_hashtbl[hashval], so_strhash) { |
4276 | if (!same_owner_str(sop, owner, clid)) | 4314 | if (sop->so_is_open_owner) |
4277 | continue; | 4315 | continue; |
4278 | list_for_each_entry(stp, &sop->so_stateids, | 4316 | if (!same_owner_str(sop, owner, clid)) |
4279 | st_perstateowner) { | 4317 | continue; |
4280 | lo = lockowner(sop); | 4318 | list_for_each_entry(stp, &sop->so_stateids, |
4281 | if (check_for_locks(stp->st_file, lo)) | 4319 | st_perstateowner) { |
4282 | goto out; | 4320 | lo = lockowner(sop); |
4283 | list_add(&lo->lo_list, &matches); | 4321 | if (check_for_locks(stp->st_file, lo)) |
4284 | } | 4322 | goto out; |
4323 | list_add(&lo->lo_list, &matches); | ||
4285 | } | 4324 | } |
4286 | } | 4325 | } |
4287 | /* Clients probably won't expect us to return with some (but not all) | 4326 | /* Clients probably won't expect us to return with some (but not all) |
@@ -4394,16 +4433,127 @@ nfs4_check_open_reclaim(clientid_t *clid) | |||
4394 | return nfs4_find_reclaim_client(clid) ? nfs_ok : nfserr_reclaim_bad; | 4433 | return nfs4_find_reclaim_client(clid) ? nfs_ok : nfserr_reclaim_bad; |
4395 | } | 4434 | } |
4396 | 4435 | ||
4436 | #ifdef CONFIG_NFSD_FAULT_INJECTION | ||
4437 | |||
4438 | void nfsd_forget_clients(u64 num) | ||
4439 | { | ||
4440 | struct nfs4_client *clp, *next; | ||
4441 | int count = 0; | ||
4442 | |||
4443 | nfs4_lock_state(); | ||
4444 | list_for_each_entry_safe(clp, next, &client_lru, cl_lru) { | ||
4445 | nfsd4_remove_clid_dir(clp); | ||
4446 | expire_client(clp); | ||
4447 | if (++count == num) | ||
4448 | break; | ||
4449 | } | ||
4450 | nfs4_unlock_state(); | ||
4451 | |||
4452 | printk(KERN_INFO "NFSD: Forgot %d clients", count); | ||
4453 | } | ||
4454 | |||
4455 | static void release_lockowner_sop(struct nfs4_stateowner *sop) | ||
4456 | { | ||
4457 | release_lockowner(lockowner(sop)); | ||
4458 | } | ||
4459 | |||
4460 | static void release_openowner_sop(struct nfs4_stateowner *sop) | ||
4461 | { | ||
4462 | release_openowner(openowner(sop)); | ||
4463 | } | ||
4464 | |||
4465 | static int nfsd_release_n_owners(u64 num, bool is_open_owner, | ||
4466 | void (*release_sop)(struct nfs4_stateowner *)) | ||
4467 | { | ||
4468 | int i, count = 0; | ||
4469 | struct nfs4_stateowner *sop, *next; | ||
4470 | |||
4471 | for (i = 0; i < OWNER_HASH_SIZE; i++) { | ||
4472 | list_for_each_entry_safe(sop, next, &ownerstr_hashtbl[i], so_strhash) { | ||
4473 | if (sop->so_is_open_owner != is_open_owner) | ||
4474 | continue; | ||
4475 | release_sop(sop); | ||
4476 | if (++count == num) | ||
4477 | return count; | ||
4478 | } | ||
4479 | } | ||
4480 | return count; | ||
4481 | } | ||
4482 | |||
4483 | void nfsd_forget_locks(u64 num) | ||
4484 | { | ||
4485 | int count; | ||
4486 | |||
4487 | nfs4_lock_state(); | ||
4488 | count = nfsd_release_n_owners(num, false, release_lockowner_sop); | ||
4489 | nfs4_unlock_state(); | ||
4490 | |||
4491 | printk(KERN_INFO "NFSD: Forgot %d locks", count); | ||
4492 | } | ||
4493 | |||
4494 | void nfsd_forget_openowners(u64 num) | ||
4495 | { | ||
4496 | int count; | ||
4497 | |||
4498 | nfs4_lock_state(); | ||
4499 | count = nfsd_release_n_owners(num, true, release_openowner_sop); | ||
4500 | nfs4_unlock_state(); | ||
4501 | |||
4502 | printk(KERN_INFO "NFSD: Forgot %d open owners", count); | ||
4503 | } | ||
4504 | |||
4505 | int nfsd_process_n_delegations(u64 num, void (*deleg_func)(struct nfs4_delegation *)) | ||
4506 | { | ||
4507 | int i, count = 0; | ||
4508 | struct nfs4_file *fp, *fnext; | ||
4509 | struct nfs4_delegation *dp, *dnext; | ||
4510 | |||
4511 | for (i = 0; i < FILE_HASH_SIZE; i++) { | ||
4512 | list_for_each_entry_safe(fp, fnext, &file_hashtbl[i], fi_hash) { | ||
4513 | list_for_each_entry_safe(dp, dnext, &fp->fi_delegations, dl_perfile) { | ||
4514 | deleg_func(dp); | ||
4515 | if (++count == num) | ||
4516 | return count; | ||
4517 | } | ||
4518 | } | ||
4519 | } | ||
4520 | |||
4521 | return count; | ||
4522 | } | ||
4523 | |||
4524 | void nfsd_forget_delegations(u64 num) | ||
4525 | { | ||
4526 | unsigned int count; | ||
4527 | |||
4528 | nfs4_lock_state(); | ||
4529 | count = nfsd_process_n_delegations(num, unhash_delegation); | ||
4530 | nfs4_unlock_state(); | ||
4531 | |||
4532 | printk(KERN_INFO "NFSD: Forgot %d delegations", count); | ||
4533 | } | ||
4534 | |||
4535 | void nfsd_recall_delegations(u64 num) | ||
4536 | { | ||
4537 | unsigned int count; | ||
4538 | |||
4539 | nfs4_lock_state(); | ||
4540 | spin_lock(&recall_lock); | ||
4541 | count = nfsd_process_n_delegations(num, nfsd_break_one_deleg); | ||
4542 | spin_unlock(&recall_lock); | ||
4543 | nfs4_unlock_state(); | ||
4544 | |||
4545 | printk(KERN_INFO "NFSD: Recalled %d delegations", count); | ||
4546 | } | ||
4547 | |||
4548 | #endif /* CONFIG_NFSD_FAULT_INJECTION */ | ||
4549 | |||
4397 | /* initialization to perform at module load time: */ | 4550 | /* initialization to perform at module load time: */ |
4398 | 4551 | ||
4399 | int | 4552 | void |
4400 | nfs4_state_init(void) | 4553 | nfs4_state_init(void) |
4401 | { | 4554 | { |
4402 | int i, status; | 4555 | int i; |
4403 | 4556 | ||
4404 | status = nfsd4_init_slabs(); | ||
4405 | if (status) | ||
4406 | return status; | ||
4407 | for (i = 0; i < CLIENT_HASH_SIZE; i++) { | 4557 | for (i = 0; i < CLIENT_HASH_SIZE; i++) { |
4408 | INIT_LIST_HEAD(&conf_id_hashtbl[i]); | 4558 | INIT_LIST_HEAD(&conf_id_hashtbl[i]); |
4409 | INIT_LIST_HEAD(&conf_str_hashtbl[i]); | 4559 | INIT_LIST_HEAD(&conf_str_hashtbl[i]); |
@@ -4416,18 +4566,15 @@ nfs4_state_init(void) | |||
4416 | for (i = 0; i < FILE_HASH_SIZE; i++) { | 4566 | for (i = 0; i < FILE_HASH_SIZE; i++) { |
4417 | INIT_LIST_HEAD(&file_hashtbl[i]); | 4567 | INIT_LIST_HEAD(&file_hashtbl[i]); |
4418 | } | 4568 | } |
4419 | for (i = 0; i < OPEN_OWNER_HASH_SIZE; i++) { | 4569 | for (i = 0; i < OWNER_HASH_SIZE; i++) { |
4420 | INIT_LIST_HEAD(&open_ownerstr_hashtbl[i]); | 4570 | INIT_LIST_HEAD(&ownerstr_hashtbl[i]); |
4421 | } | ||
4422 | for (i = 0; i < LOCK_HASH_SIZE; i++) { | ||
4423 | INIT_LIST_HEAD(&lock_ownerstr_hashtbl[i]); | ||
4424 | } | 4571 | } |
4425 | memset(&onestateid, ~0, sizeof(stateid_t)); | 4572 | for (i = 0; i < LOCKOWNER_INO_HASH_SIZE; i++) |
4573 | INIT_LIST_HEAD(&lockowner_ino_hashtbl[i]); | ||
4426 | INIT_LIST_HEAD(&close_lru); | 4574 | INIT_LIST_HEAD(&close_lru); |
4427 | INIT_LIST_HEAD(&client_lru); | 4575 | INIT_LIST_HEAD(&client_lru); |
4428 | INIT_LIST_HEAD(&del_recall_lru); | 4576 | INIT_LIST_HEAD(&del_recall_lru); |
4429 | reclaim_str_hashtbl_size = 0; | 4577 | reclaim_str_hashtbl_size = 0; |
4430 | return 0; | ||
4431 | } | 4578 | } |
4432 | 4579 | ||
4433 | static void | 4580 | static void |
@@ -4526,7 +4673,6 @@ __nfs4_state_shutdown(void) | |||
4526 | spin_unlock(&recall_lock); | 4673 | spin_unlock(&recall_lock); |
4527 | list_for_each_safe(pos, next, &reaplist) { | 4674 | list_for_each_safe(pos, next, &reaplist) { |
4528 | dp = list_entry (pos, struct nfs4_delegation, dl_recall_lru); | 4675 | dp = list_entry (pos, struct nfs4_delegation, dl_recall_lru); |
4529 | list_del_init(&dp->dl_recall_lru); | ||
4530 | unhash_delegation(dp); | 4676 | unhash_delegation(dp); |
4531 | } | 4677 | } |
4532 | 4678 | ||
diff --git a/fs/nfsd/nfs4xdr.c b/fs/nfsd/nfs4xdr.c index b6fa792d6b85..0ec5a1b9700e 100644 --- a/fs/nfsd/nfs4xdr.c +++ b/fs/nfsd/nfs4xdr.c | |||
@@ -215,10 +215,9 @@ defer_free(struct nfsd4_compoundargs *argp, | |||
215 | static char *savemem(struct nfsd4_compoundargs *argp, __be32 *p, int nbytes) | 215 | static char *savemem(struct nfsd4_compoundargs *argp, __be32 *p, int nbytes) |
216 | { | 216 | { |
217 | if (p == argp->tmp) { | 217 | if (p == argp->tmp) { |
218 | p = kmalloc(nbytes, GFP_KERNEL); | 218 | p = kmemdup(argp->tmp, nbytes, GFP_KERNEL); |
219 | if (!p) | 219 | if (!p) |
220 | return NULL; | 220 | return NULL; |
221 | memcpy(p, argp->tmp, nbytes); | ||
222 | } else { | 221 | } else { |
223 | BUG_ON(p != argp->tmpp); | 222 | BUG_ON(p != argp->tmpp); |
224 | argp->tmpp = NULL; | 223 | argp->tmpp = NULL; |
diff --git a/fs/nfsd/nfsctl.c b/fs/nfsd/nfsctl.c index bb4a11d58a5a..748eda93ce59 100644 --- a/fs/nfsd/nfsctl.c +++ b/fs/nfsd/nfsctl.c | |||
@@ -18,6 +18,7 @@ | |||
18 | #include "idmap.h" | 18 | #include "idmap.h" |
19 | #include "nfsd.h" | 19 | #include "nfsd.h" |
20 | #include "cache.h" | 20 | #include "cache.h" |
21 | #include "fault_inject.h" | ||
21 | 22 | ||
22 | /* | 23 | /* |
23 | * We have a single directory with several nodes in it. | 24 | * We have a single directory with several nodes in it. |
@@ -1128,9 +1129,13 @@ static int __init init_nfsd(void) | |||
1128 | int retval; | 1129 | int retval; |
1129 | printk(KERN_INFO "Installing knfsd (copyright (C) 1996 okir@monad.swb.de).\n"); | 1130 | printk(KERN_INFO "Installing knfsd (copyright (C) 1996 okir@monad.swb.de).\n"); |
1130 | 1131 | ||
1131 | retval = nfs4_state_init(); /* nfs4 locking state */ | 1132 | retval = nfsd4_init_slabs(); |
1132 | if (retval) | 1133 | if (retval) |
1133 | return retval; | 1134 | return retval; |
1135 | nfs4_state_init(); | ||
1136 | retval = nfsd_fault_inject_init(); /* nfsd fault injection controls */ | ||
1137 | if (retval) | ||
1138 | goto out_free_slabs; | ||
1134 | nfsd_stat_init(); /* Statistics */ | 1139 | nfsd_stat_init(); /* Statistics */ |
1135 | retval = nfsd_reply_cache_init(); | 1140 | retval = nfsd_reply_cache_init(); |
1136 | if (retval) | 1141 | if (retval) |
@@ -1161,6 +1166,8 @@ out_free_cache: | |||
1161 | nfsd_reply_cache_shutdown(); | 1166 | nfsd_reply_cache_shutdown(); |
1162 | out_free_stat: | 1167 | out_free_stat: |
1163 | nfsd_stat_shutdown(); | 1168 | nfsd_stat_shutdown(); |
1169 | nfsd_fault_inject_cleanup(); | ||
1170 | out_free_slabs: | ||
1164 | nfsd4_free_slabs(); | 1171 | nfsd4_free_slabs(); |
1165 | return retval; | 1172 | return retval; |
1166 | } | 1173 | } |
@@ -1175,6 +1182,7 @@ static void __exit exit_nfsd(void) | |||
1175 | nfsd_lockd_shutdown(); | 1182 | nfsd_lockd_shutdown(); |
1176 | nfsd_idmap_shutdown(); | 1183 | nfsd_idmap_shutdown(); |
1177 | nfsd4_free_slabs(); | 1184 | nfsd4_free_slabs(); |
1185 | nfsd_fault_inject_cleanup(); | ||
1178 | unregister_filesystem(&nfsd_fs_type); | 1186 | unregister_filesystem(&nfsd_fs_type); |
1179 | } | 1187 | } |
1180 | 1188 | ||
diff --git a/fs/nfsd/nfsd.h b/fs/nfsd/nfsd.h index 58134a23fdfb..1d1e8589b4ce 100644 --- a/fs/nfsd/nfsd.h +++ b/fs/nfsd/nfsd.h | |||
@@ -104,14 +104,16 @@ static inline int nfsd_v4client(struct svc_rqst *rq) | |||
104 | */ | 104 | */ |
105 | #ifdef CONFIG_NFSD_V4 | 105 | #ifdef CONFIG_NFSD_V4 |
106 | extern unsigned int max_delegations; | 106 | extern unsigned int max_delegations; |
107 | int nfs4_state_init(void); | 107 | void nfs4_state_init(void); |
108 | int nfsd4_init_slabs(void); | ||
108 | void nfsd4_free_slabs(void); | 109 | void nfsd4_free_slabs(void); |
109 | int nfs4_state_start(void); | 110 | int nfs4_state_start(void); |
110 | void nfs4_state_shutdown(void); | 111 | void nfs4_state_shutdown(void); |
111 | void nfs4_reset_lease(time_t leasetime); | 112 | void nfs4_reset_lease(time_t leasetime); |
112 | int nfs4_reset_recoverydir(char *recdir); | 113 | int nfs4_reset_recoverydir(char *recdir); |
113 | #else | 114 | #else |
114 | static inline int nfs4_state_init(void) { return 0; } | 115 | static inline void nfs4_state_init(void) { } |
116 | static inline int nfsd4_init_slabs(void) { return 0; } | ||
115 | static inline void nfsd4_free_slabs(void) { } | 117 | static inline void nfsd4_free_slabs(void) { } |
116 | static inline int nfs4_state_start(void) { return 0; } | 118 | static inline int nfs4_state_start(void) { return 0; } |
117 | static inline void nfs4_state_shutdown(void) { } | 119 | static inline void nfs4_state_shutdown(void) { } |
@@ -338,15 +340,15 @@ static inline u32 nfsd_suppattrs2(u32 minorversion) | |||
338 | } | 340 | } |
339 | 341 | ||
340 | /* These will return ERR_INVAL if specified in GETATTR or READDIR. */ | 342 | /* These will return ERR_INVAL if specified in GETATTR or READDIR. */ |
341 | #define NFSD_WRITEONLY_ATTRS_WORD1 \ | 343 | #define NFSD_WRITEONLY_ATTRS_WORD1 \ |
342 | (FATTR4_WORD1_TIME_ACCESS_SET | FATTR4_WORD1_TIME_MODIFY_SET) | 344 | (FATTR4_WORD1_TIME_ACCESS_SET | FATTR4_WORD1_TIME_MODIFY_SET) |
343 | 345 | ||
344 | /* These are the only attrs allowed in CREATE/OPEN/SETATTR. */ | 346 | /* These are the only attrs allowed in CREATE/OPEN/SETATTR. */ |
345 | #define NFSD_WRITEABLE_ATTRS_WORD0 \ | 347 | #define NFSD_WRITEABLE_ATTRS_WORD0 \ |
346 | (FATTR4_WORD0_SIZE | FATTR4_WORD0_ACL ) | 348 | (FATTR4_WORD0_SIZE | FATTR4_WORD0_ACL) |
347 | #define NFSD_WRITEABLE_ATTRS_WORD1 \ | 349 | #define NFSD_WRITEABLE_ATTRS_WORD1 \ |
348 | (FATTR4_WORD1_MODE | FATTR4_WORD1_OWNER | FATTR4_WORD1_OWNER_GROUP \ | 350 | (FATTR4_WORD1_MODE | FATTR4_WORD1_OWNER | FATTR4_WORD1_OWNER_GROUP \ |
349 | | FATTR4_WORD1_TIME_ACCESS_SET | FATTR4_WORD1_TIME_MODIFY_SET) | 351 | | FATTR4_WORD1_TIME_ACCESS_SET | FATTR4_WORD1_TIME_MODIFY_SET) |
350 | #define NFSD_WRITEABLE_ATTRS_WORD2 0 | 352 | #define NFSD_WRITEABLE_ATTRS_WORD2 0 |
351 | 353 | ||
352 | #define NFSD_SUPPATTR_EXCLCREAT_WORD0 \ | 354 | #define NFSD_SUPPATTR_EXCLCREAT_WORD0 \ |
diff --git a/fs/nfsd/state.h b/fs/nfsd/state.h index a3cf38476a1b..ffb5df1db94f 100644 --- a/fs/nfsd/state.h +++ b/fs/nfsd/state.h | |||
@@ -366,6 +366,7 @@ struct nfs4_openowner { | |||
366 | 366 | ||
367 | struct nfs4_lockowner { | 367 | struct nfs4_lockowner { |
368 | struct nfs4_stateowner lo_owner; /* must be first element */ | 368 | struct nfs4_stateowner lo_owner; /* must be first element */ |
369 | struct list_head lo_owner_ino_hash; /* hash by owner,file */ | ||
369 | struct list_head lo_perstateid; /* for lockowners only */ | 370 | struct list_head lo_perstateid; /* for lockowners only */ |
370 | struct list_head lo_list; /* for temporary uses */ | 371 | struct list_head lo_list; /* for temporary uses */ |
371 | }; | 372 | }; |
@@ -482,7 +483,7 @@ extern void nfsd4_shutdown_recdir(void); | |||
482 | extern int nfs4_client_to_reclaim(const char *name); | 483 | extern int nfs4_client_to_reclaim(const char *name); |
483 | extern int nfs4_has_reclaimed_state(const char *name, bool use_exchange_id); | 484 | extern int nfs4_has_reclaimed_state(const char *name, bool use_exchange_id); |
484 | extern void nfsd4_recdir_purge_old(void); | 485 | extern void nfsd4_recdir_purge_old(void); |
485 | extern int nfsd4_create_clid_dir(struct nfs4_client *clp); | 486 | extern void nfsd4_create_clid_dir(struct nfs4_client *clp); |
486 | extern void nfsd4_remove_clid_dir(struct nfs4_client *clp); | 487 | extern void nfsd4_remove_clid_dir(struct nfs4_client *clp); |
487 | extern void release_session_client(struct nfsd4_session *); | 488 | extern void release_session_client(struct nfsd4_session *); |
488 | extern __be32 nfs4_validate_stateid(struct nfs4_client *, stateid_t *); | 489 | extern __be32 nfs4_validate_stateid(struct nfs4_client *, stateid_t *); |
diff --git a/fs/nfsd/vfs.c b/fs/nfsd/vfs.c index d25a723b68ad..edf6d3ed8777 100644 --- a/fs/nfsd/vfs.c +++ b/fs/nfsd/vfs.c | |||
@@ -594,8 +594,19 @@ nfsd4_get_nfs4_acl(struct svc_rqst *rqstp, struct dentry *dentry, struct nfs4_ac | |||
594 | return error; | 594 | return error; |
595 | } | 595 | } |
596 | 596 | ||
597 | #define NFSD_XATTR_JUNCTION_PREFIX XATTR_TRUSTED_PREFIX "junction." | 597 | /* |
598 | #define NFSD_XATTR_JUNCTION_TYPE NFSD_XATTR_JUNCTION_PREFIX "type" | 598 | * NFS junction information is stored in an extended attribute. |
599 | */ | ||
600 | #define NFSD_JUNCTION_XATTR_NAME XATTR_TRUSTED_PREFIX "junction.nfs" | ||
601 | |||
602 | /** | ||
603 | * nfsd4_is_junction - Test if an object could be an NFS junction | ||
604 | * | ||
605 | * @dentry: object to test | ||
606 | * | ||
607 | * Returns 1 if "dentry" appears to contain NFS junction information. | ||
608 | * Otherwise 0 is returned. | ||
609 | */ | ||
599 | int nfsd4_is_junction(struct dentry *dentry) | 610 | int nfsd4_is_junction(struct dentry *dentry) |
600 | { | 611 | { |
601 | struct inode *inode = dentry->d_inode; | 612 | struct inode *inode = dentry->d_inode; |
@@ -606,7 +617,7 @@ int nfsd4_is_junction(struct dentry *dentry) | |||
606 | return 0; | 617 | return 0; |
607 | if (!(inode->i_mode & S_ISVTX)) | 618 | if (!(inode->i_mode & S_ISVTX)) |
608 | return 0; | 619 | return 0; |
609 | if (vfs_getxattr(dentry, NFSD_XATTR_JUNCTION_TYPE, NULL, 0) <= 0) | 620 | if (vfs_getxattr(dentry, NFSD_JUNCTION_XATTR_NAME, NULL, 0) <= 0) |
610 | return 0; | 621 | return 0; |
611 | return 1; | 622 | return 1; |
612 | } | 623 | } |
diff --git a/fs/nls/nls_base.c b/fs/nls/nls_base.c index 44a88a9fa2c8..fea6bd5831dc 100644 --- a/fs/nls/nls_base.c +++ b/fs/nls/nls_base.c | |||
@@ -52,7 +52,7 @@ static const struct utf8_table utf8_table[] = | |||
52 | #define SURROGATE_LOW 0x00000400 | 52 | #define SURROGATE_LOW 0x00000400 |
53 | #define SURROGATE_BITS 0x000003ff | 53 | #define SURROGATE_BITS 0x000003ff |
54 | 54 | ||
55 | int utf8_to_utf32(const u8 *s, int len, unicode_t *pu) | 55 | int utf8_to_utf32(const u8 *s, int inlen, unicode_t *pu) |
56 | { | 56 | { |
57 | unsigned long l; | 57 | unsigned long l; |
58 | int c0, c, nc; | 58 | int c0, c, nc; |
@@ -71,7 +71,7 @@ int utf8_to_utf32(const u8 *s, int len, unicode_t *pu) | |||
71 | *pu = (unicode_t) l; | 71 | *pu = (unicode_t) l; |
72 | return nc; | 72 | return nc; |
73 | } | 73 | } |
74 | if (len <= nc) | 74 | if (inlen <= nc) |
75 | return -1; | 75 | return -1; |
76 | s++; | 76 | s++; |
77 | c = (*s ^ 0x80) & 0xFF; | 77 | c = (*s ^ 0x80) & 0xFF; |
@@ -83,7 +83,7 @@ int utf8_to_utf32(const u8 *s, int len, unicode_t *pu) | |||
83 | } | 83 | } |
84 | EXPORT_SYMBOL(utf8_to_utf32); | 84 | EXPORT_SYMBOL(utf8_to_utf32); |
85 | 85 | ||
86 | int utf32_to_utf8(unicode_t u, u8 *s, int maxlen) | 86 | int utf32_to_utf8(unicode_t u, u8 *s, int maxout) |
87 | { | 87 | { |
88 | unsigned long l; | 88 | unsigned long l; |
89 | int c, nc; | 89 | int c, nc; |
@@ -97,7 +97,7 @@ int utf32_to_utf8(unicode_t u, u8 *s, int maxlen) | |||
97 | return -1; | 97 | return -1; |
98 | 98 | ||
99 | nc = 0; | 99 | nc = 0; |
100 | for (t = utf8_table; t->cmask && maxlen; t++, maxlen--) { | 100 | for (t = utf8_table; t->cmask && maxout; t++, maxout--) { |
101 | nc++; | 101 | nc++; |
102 | if (l <= t->lmask) { | 102 | if (l <= t->lmask) { |
103 | c = t->shift; | 103 | c = t->shift; |
@@ -114,34 +114,57 @@ int utf32_to_utf8(unicode_t u, u8 *s, int maxlen) | |||
114 | } | 114 | } |
115 | EXPORT_SYMBOL(utf32_to_utf8); | 115 | EXPORT_SYMBOL(utf32_to_utf8); |
116 | 116 | ||
117 | int utf8s_to_utf16s(const u8 *s, int len, wchar_t *pwcs) | 117 | static inline void put_utf16(wchar_t *s, unsigned c, enum utf16_endian endian) |
118 | { | ||
119 | switch (endian) { | ||
120 | default: | ||
121 | *s = (wchar_t) c; | ||
122 | break; | ||
123 | case UTF16_LITTLE_ENDIAN: | ||
124 | *s = __cpu_to_le16(c); | ||
125 | break; | ||
126 | case UTF16_BIG_ENDIAN: | ||
127 | *s = __cpu_to_be16(c); | ||
128 | break; | ||
129 | } | ||
130 | } | ||
131 | |||
132 | int utf8s_to_utf16s(const u8 *s, int inlen, enum utf16_endian endian, | ||
133 | wchar_t *pwcs, int maxout) | ||
118 | { | 134 | { |
119 | u16 *op; | 135 | u16 *op; |
120 | int size; | 136 | int size; |
121 | unicode_t u; | 137 | unicode_t u; |
122 | 138 | ||
123 | op = pwcs; | 139 | op = pwcs; |
124 | while (*s && len > 0) { | 140 | while (inlen > 0 && maxout > 0 && *s) { |
125 | if (*s & 0x80) { | 141 | if (*s & 0x80) { |
126 | size = utf8_to_utf32(s, len, &u); | 142 | size = utf8_to_utf32(s, inlen, &u); |
127 | if (size < 0) | 143 | if (size < 0) |
128 | return -EINVAL; | 144 | return -EINVAL; |
145 | s += size; | ||
146 | inlen -= size; | ||
129 | 147 | ||
130 | if (u >= PLANE_SIZE) { | 148 | if (u >= PLANE_SIZE) { |
149 | if (maxout < 2) | ||
150 | break; | ||
131 | u -= PLANE_SIZE; | 151 | u -= PLANE_SIZE; |
132 | *op++ = (wchar_t) (SURROGATE_PAIR | | 152 | put_utf16(op++, SURROGATE_PAIR | |
133 | ((u >> 10) & SURROGATE_BITS)); | 153 | ((u >> 10) & SURROGATE_BITS), |
134 | *op++ = (wchar_t) (SURROGATE_PAIR | | 154 | endian); |
155 | put_utf16(op++, SURROGATE_PAIR | | ||
135 | SURROGATE_LOW | | 156 | SURROGATE_LOW | |
136 | (u & SURROGATE_BITS)); | 157 | (u & SURROGATE_BITS), |
158 | endian); | ||
159 | maxout -= 2; | ||
137 | } else { | 160 | } else { |
138 | *op++ = (wchar_t) u; | 161 | put_utf16(op++, u, endian); |
162 | maxout--; | ||
139 | } | 163 | } |
140 | s += size; | ||
141 | len -= size; | ||
142 | } else { | 164 | } else { |
143 | *op++ = *s++; | 165 | put_utf16(op++, *s++, endian); |
144 | len--; | 166 | inlen--; |
167 | maxout--; | ||
145 | } | 168 | } |
146 | } | 169 | } |
147 | return op - pwcs; | 170 | return op - pwcs; |
@@ -160,27 +183,27 @@ static inline unsigned long get_utf16(unsigned c, enum utf16_endian endian) | |||
160 | } | 183 | } |
161 | } | 184 | } |
162 | 185 | ||
163 | int utf16s_to_utf8s(const wchar_t *pwcs, int len, enum utf16_endian endian, | 186 | int utf16s_to_utf8s(const wchar_t *pwcs, int inlen, enum utf16_endian endian, |
164 | u8 *s, int maxlen) | 187 | u8 *s, int maxout) |
165 | { | 188 | { |
166 | u8 *op; | 189 | u8 *op; |
167 | int size; | 190 | int size; |
168 | unsigned long u, v; | 191 | unsigned long u, v; |
169 | 192 | ||
170 | op = s; | 193 | op = s; |
171 | while (len > 0 && maxlen > 0) { | 194 | while (inlen > 0 && maxout > 0) { |
172 | u = get_utf16(*pwcs, endian); | 195 | u = get_utf16(*pwcs, endian); |
173 | if (!u) | 196 | if (!u) |
174 | break; | 197 | break; |
175 | pwcs++; | 198 | pwcs++; |
176 | len--; | 199 | inlen--; |
177 | if (u > 0x7f) { | 200 | if (u > 0x7f) { |
178 | if ((u & SURROGATE_MASK) == SURROGATE_PAIR) { | 201 | if ((u & SURROGATE_MASK) == SURROGATE_PAIR) { |
179 | if (u & SURROGATE_LOW) { | 202 | if (u & SURROGATE_LOW) { |
180 | /* Ignore character and move on */ | 203 | /* Ignore character and move on */ |
181 | continue; | 204 | continue; |
182 | } | 205 | } |
183 | if (len <= 0) | 206 | if (inlen <= 0) |
184 | break; | 207 | break; |
185 | v = get_utf16(*pwcs, endian); | 208 | v = get_utf16(*pwcs, endian); |
186 | if ((v & SURROGATE_MASK) != SURROGATE_PAIR || | 209 | if ((v & SURROGATE_MASK) != SURROGATE_PAIR || |
@@ -191,18 +214,18 @@ int utf16s_to_utf8s(const wchar_t *pwcs, int len, enum utf16_endian endian, | |||
191 | u = PLANE_SIZE + ((u & SURROGATE_BITS) << 10) | 214 | u = PLANE_SIZE + ((u & SURROGATE_BITS) << 10) |
192 | + (v & SURROGATE_BITS); | 215 | + (v & SURROGATE_BITS); |
193 | pwcs++; | 216 | pwcs++; |
194 | len--; | 217 | inlen--; |
195 | } | 218 | } |
196 | size = utf32_to_utf8(u, op, maxlen); | 219 | size = utf32_to_utf8(u, op, maxout); |
197 | if (size == -1) { | 220 | if (size == -1) { |
198 | /* Ignore character and move on */ | 221 | /* Ignore character and move on */ |
199 | } else { | 222 | } else { |
200 | op += size; | 223 | op += size; |
201 | maxlen -= size; | 224 | maxout -= size; |
202 | } | 225 | } |
203 | } else { | 226 | } else { |
204 | *op++ = (u8) u; | 227 | *op++ = (u8) u; |
205 | maxlen--; | 228 | maxout--; |
206 | } | 229 | } |
207 | } | 230 | } |
208 | return op - s; | 231 | return op - s; |
diff --git a/fs/notify/mark.c b/fs/notify/mark.c index e14587d55689..f104d565b682 100644 --- a/fs/notify/mark.c +++ b/fs/notify/mark.c | |||
@@ -135,9 +135,6 @@ void fsnotify_destroy_mark(struct fsnotify_mark *mark) | |||
135 | 135 | ||
136 | mark->flags &= ~FSNOTIFY_MARK_FLAG_ALIVE; | 136 | mark->flags &= ~FSNOTIFY_MARK_FLAG_ALIVE; |
137 | 137 | ||
138 | /* 1 from caller and 1 for being on i_list/g_list */ | ||
139 | BUG_ON(atomic_read(&mark->refcnt) < 2); | ||
140 | |||
141 | spin_lock(&group->mark_lock); | 138 | spin_lock(&group->mark_lock); |
142 | 139 | ||
143 | if (mark->flags & FSNOTIFY_MARK_FLAG_INODE) { | 140 | if (mark->flags & FSNOTIFY_MARK_FLAG_INODE) { |
@@ -182,6 +179,11 @@ void fsnotify_destroy_mark(struct fsnotify_mark *mark) | |||
182 | iput(inode); | 179 | iput(inode); |
183 | 180 | ||
184 | /* | 181 | /* |
182 | * We don't necessarily have a ref on mark from caller so the above iput | ||
183 | * may have already destroyed it. Don't touch from now on. | ||
184 | */ | ||
185 | |||
186 | /* | ||
185 | * it's possible that this group tried to destroy itself, but this | 187 | * it's possible that this group tried to destroy itself, but this |
186 | * this mark was simultaneously being freed by inode. If that's the | 188 | * this mark was simultaneously being freed by inode. If that's the |
187 | * case, we finish freeing the group here. | 189 | * case, we finish freeing the group here. |
diff --git a/fs/ntfs/super.c b/fs/ntfs/super.c index 608be4516091..5a4a8af5c406 100644 --- a/fs/ntfs/super.c +++ b/fs/ntfs/super.c | |||
@@ -3198,7 +3198,7 @@ MODULE_DESCRIPTION("NTFS 1.2/3.x driver - Copyright (c) 2001-2011 Anton Altaparm | |||
3198 | MODULE_VERSION(NTFS_VERSION); | 3198 | MODULE_VERSION(NTFS_VERSION); |
3199 | MODULE_LICENSE("GPL"); | 3199 | MODULE_LICENSE("GPL"); |
3200 | #ifdef DEBUG | 3200 | #ifdef DEBUG |
3201 | module_param(debug_msgs, bool, 0); | 3201 | module_param(debug_msgs, bint, 0); |
3202 | MODULE_PARM_DESC(debug_msgs, "Enable debug messages."); | 3202 | MODULE_PARM_DESC(debug_msgs, "Enable debug messages."); |
3203 | #endif | 3203 | #endif |
3204 | 3204 | ||
diff --git a/fs/ocfs2/stack_user.c b/fs/ocfs2/stack_user.c index a5ebe421195f..286edf1e231f 100644 --- a/fs/ocfs2/stack_user.c +++ b/fs/ocfs2/stack_user.c | |||
@@ -827,8 +827,8 @@ static int user_cluster_connect(struct ocfs2_cluster_connection *conn) | |||
827 | goto out; | 827 | goto out; |
828 | } | 828 | } |
829 | 829 | ||
830 | rc = dlm_new_lockspace(conn->cc_name, strlen(conn->cc_name), | 830 | rc = dlm_new_lockspace(conn->cc_name, NULL, DLM_LSFL_FS, DLM_LVB_LEN, |
831 | &fsdlm, DLM_LSFL_FS, DLM_LVB_LEN); | 831 | NULL, NULL, NULL, &fsdlm); |
832 | if (rc) { | 832 | if (rc) { |
833 | ocfs2_live_connection_drop(control); | 833 | ocfs2_live_connection_drop(control); |
834 | goto out; | 834 | goto out; |
@@ -1137,7 +1137,7 @@ static long pipe_set_size(struct pipe_inode_info *pipe, unsigned long nr_pages) | |||
1137 | if (nr_pages < pipe->nrbufs) | 1137 | if (nr_pages < pipe->nrbufs) |
1138 | return -EBUSY; | 1138 | return -EBUSY; |
1139 | 1139 | ||
1140 | bufs = kcalloc(nr_pages, sizeof(struct pipe_buffer), GFP_KERNEL); | 1140 | bufs = kcalloc(nr_pages, sizeof(*bufs), GFP_KERNEL | __GFP_NOWARN); |
1141 | if (unlikely(!bufs)) | 1141 | if (unlikely(!bufs)) |
1142 | return -ENOMEM; | 1142 | return -ENOMEM; |
1143 | 1143 | ||
diff --git a/fs/proc/array.c b/fs/proc/array.c index 8c344f037bd0..c602b8d20f06 100644 --- a/fs/proc/array.c +++ b/fs/proc/array.c | |||
@@ -380,7 +380,7 @@ static int do_task_stat(struct seq_file *m, struct pid_namespace *ns, | |||
380 | 380 | ||
381 | state = *get_task_state(task); | 381 | state = *get_task_state(task); |
382 | vsize = eip = esp = 0; | 382 | vsize = eip = esp = 0; |
383 | permitted = ptrace_may_access(task, PTRACE_MODE_READ); | 383 | permitted = ptrace_may_access(task, PTRACE_MODE_READ | PTRACE_MODE_NOAUDIT); |
384 | mm = get_task_mm(task); | 384 | mm = get_task_mm(task); |
385 | if (mm) { | 385 | if (mm) { |
386 | vsize = task_vsize(mm); | 386 | vsize = task_vsize(mm); |
@@ -464,7 +464,7 @@ static int do_task_stat(struct seq_file *m, struct pid_namespace *ns, | |||
464 | 464 | ||
465 | seq_printf(m, "%d (%s) %c %d %d %d %d %d %u %lu \ | 465 | seq_printf(m, "%d (%s) %c %d %d %d %d %d %u %lu \ |
466 | %lu %lu %lu %lu %lu %ld %ld %ld %ld %d 0 %llu %lu %ld %lu %lu %lu %lu %lu \ | 466 | %lu %lu %lu %lu %lu %ld %ld %ld %ld %d 0 %llu %lu %ld %lu %lu %lu %lu %lu \ |
467 | %lu %lu %lu %lu %lu %lu %lu %lu %d %d %u %u %llu %lu %ld\n", | 467 | %lu %lu %lu %lu %lu %lu %lu %lu %d %d %u %u %llu %lu %ld %lu %lu %lu\n", |
468 | pid_nr_ns(pid, ns), | 468 | pid_nr_ns(pid, ns), |
469 | tcomm, | 469 | tcomm, |
470 | state, | 470 | state, |
@@ -511,7 +511,10 @@ static int do_task_stat(struct seq_file *m, struct pid_namespace *ns, | |||
511 | task->policy, | 511 | task->policy, |
512 | (unsigned long long)delayacct_blkio_ticks(task), | 512 | (unsigned long long)delayacct_blkio_ticks(task), |
513 | cputime_to_clock_t(gtime), | 513 | cputime_to_clock_t(gtime), |
514 | cputime_to_clock_t(cgtime)); | 514 | cputime_to_clock_t(cgtime), |
515 | (mm && permitted) ? mm->start_data : 0, | ||
516 | (mm && permitted) ? mm->end_data : 0, | ||
517 | (mm && permitted) ? mm->start_brk : 0); | ||
515 | if (mm) | 518 | if (mm) |
516 | mmput(mm); | 519 | mmput(mm); |
517 | return 0; | 520 | return 0; |
diff --git a/fs/proc/base.c b/fs/proc/base.c index a1dddda999f2..d4548dd49b02 100644 --- a/fs/proc/base.c +++ b/fs/proc/base.c | |||
@@ -83,9 +83,11 @@ | |||
83 | #include <linux/pid_namespace.h> | 83 | #include <linux/pid_namespace.h> |
84 | #include <linux/fs_struct.h> | 84 | #include <linux/fs_struct.h> |
85 | #include <linux/slab.h> | 85 | #include <linux/slab.h> |
86 | #include <linux/flex_array.h> | ||
86 | #ifdef CONFIG_HARDWALL | 87 | #ifdef CONFIG_HARDWALL |
87 | #include <asm/hardwall.h> | 88 | #include <asm/hardwall.h> |
88 | #endif | 89 | #endif |
90 | #include <trace/events/oom.h> | ||
89 | #include "internal.h" | 91 | #include "internal.h" |
90 | 92 | ||
91 | /* NOTE: | 93 | /* NOTE: |
@@ -133,6 +135,8 @@ struct pid_entry { | |||
133 | NULL, &proc_single_file_operations, \ | 135 | NULL, &proc_single_file_operations, \ |
134 | { .proc_show = show } ) | 136 | { .proc_show = show } ) |
135 | 137 | ||
138 | static int proc_fd_permission(struct inode *inode, int mask); | ||
139 | |||
136 | /* | 140 | /* |
137 | * Count the number of hardlinks for the pid_entry table, excluding the . | 141 | * Count the number of hardlinks for the pid_entry table, excluding the . |
138 | * and .. links. | 142 | * and .. links. |
@@ -165,9 +169,9 @@ static int get_task_root(struct task_struct *task, struct path *root) | |||
165 | return result; | 169 | return result; |
166 | } | 170 | } |
167 | 171 | ||
168 | static int proc_cwd_link(struct inode *inode, struct path *path) | 172 | static int proc_cwd_link(struct dentry *dentry, struct path *path) |
169 | { | 173 | { |
170 | struct task_struct *task = get_proc_task(inode); | 174 | struct task_struct *task = get_proc_task(dentry->d_inode); |
171 | int result = -ENOENT; | 175 | int result = -ENOENT; |
172 | 176 | ||
173 | if (task) { | 177 | if (task) { |
@@ -182,9 +186,9 @@ static int proc_cwd_link(struct inode *inode, struct path *path) | |||
182 | return result; | 186 | return result; |
183 | } | 187 | } |
184 | 188 | ||
185 | static int proc_root_link(struct inode *inode, struct path *path) | 189 | static int proc_root_link(struct dentry *dentry, struct path *path) |
186 | { | 190 | { |
187 | struct task_struct *task = get_proc_task(inode); | 191 | struct task_struct *task = get_proc_task(dentry->d_inode); |
188 | int result = -ENOENT; | 192 | int result = -ENOENT; |
189 | 193 | ||
190 | if (task) { | 194 | if (task) { |
@@ -194,82 +198,9 @@ static int proc_root_link(struct inode *inode, struct path *path) | |||
194 | return result; | 198 | return result; |
195 | } | 199 | } |
196 | 200 | ||
197 | static struct mm_struct *__check_mem_permission(struct task_struct *task) | ||
198 | { | ||
199 | struct mm_struct *mm; | ||
200 | |||
201 | mm = get_task_mm(task); | ||
202 | if (!mm) | ||
203 | return ERR_PTR(-EINVAL); | ||
204 | |||
205 | /* | ||
206 | * A task can always look at itself, in case it chooses | ||
207 | * to use system calls instead of load instructions. | ||
208 | */ | ||
209 | if (task == current) | ||
210 | return mm; | ||
211 | |||
212 | /* | ||
213 | * If current is actively ptrace'ing, and would also be | ||
214 | * permitted to freshly attach with ptrace now, permit it. | ||
215 | */ | ||
216 | if (task_is_stopped_or_traced(task)) { | ||
217 | int match; | ||
218 | rcu_read_lock(); | ||
219 | match = (ptrace_parent(task) == current); | ||
220 | rcu_read_unlock(); | ||
221 | if (match && ptrace_may_access(task, PTRACE_MODE_ATTACH)) | ||
222 | return mm; | ||
223 | } | ||
224 | |||
225 | /* | ||
226 | * No one else is allowed. | ||
227 | */ | ||
228 | mmput(mm); | ||
229 | return ERR_PTR(-EPERM); | ||
230 | } | ||
231 | |||
232 | /* | ||
233 | * If current may access user memory in @task return a reference to the | ||
234 | * corresponding mm, otherwise ERR_PTR. | ||
235 | */ | ||
236 | static struct mm_struct *check_mem_permission(struct task_struct *task) | ||
237 | { | ||
238 | struct mm_struct *mm; | ||
239 | int err; | ||
240 | |||
241 | /* | ||
242 | * Avoid racing if task exec's as we might get a new mm but validate | ||
243 | * against old credentials. | ||
244 | */ | ||
245 | err = mutex_lock_killable(&task->signal->cred_guard_mutex); | ||
246 | if (err) | ||
247 | return ERR_PTR(err); | ||
248 | |||
249 | mm = __check_mem_permission(task); | ||
250 | mutex_unlock(&task->signal->cred_guard_mutex); | ||
251 | |||
252 | return mm; | ||
253 | } | ||
254 | |||
255 | struct mm_struct *mm_for_maps(struct task_struct *task) | 201 | struct mm_struct *mm_for_maps(struct task_struct *task) |
256 | { | 202 | { |
257 | struct mm_struct *mm; | 203 | return mm_access(task, PTRACE_MODE_READ); |
258 | int err; | ||
259 | |||
260 | err = mutex_lock_killable(&task->signal->cred_guard_mutex); | ||
261 | if (err) | ||
262 | return ERR_PTR(err); | ||
263 | |||
264 | mm = get_task_mm(task); | ||
265 | if (mm && mm != current->mm && | ||
266 | !ptrace_may_access(task, PTRACE_MODE_READ)) { | ||
267 | mmput(mm); | ||
268 | mm = ERR_PTR(-EACCES); | ||
269 | } | ||
270 | mutex_unlock(&task->signal->cred_guard_mutex); | ||
271 | |||
272 | return mm; | ||
273 | } | 204 | } |
274 | 205 | ||
275 | static int proc_pid_cmdline(struct task_struct *task, char * buffer) | 206 | static int proc_pid_cmdline(struct task_struct *task, char * buffer) |
@@ -627,6 +558,52 @@ int proc_setattr(struct dentry *dentry, struct iattr *attr) | |||
627 | return 0; | 558 | return 0; |
628 | } | 559 | } |
629 | 560 | ||
561 | /* | ||
562 | * May current process learn task's sched/cmdline info (for hide_pid_min=1) | ||
563 | * or euid/egid (for hide_pid_min=2)? | ||
564 | */ | ||
565 | static bool has_pid_permissions(struct pid_namespace *pid, | ||
566 | struct task_struct *task, | ||
567 | int hide_pid_min) | ||
568 | { | ||
569 | if (pid->hide_pid < hide_pid_min) | ||
570 | return true; | ||
571 | if (in_group_p(pid->pid_gid)) | ||
572 | return true; | ||
573 | return ptrace_may_access(task, PTRACE_MODE_READ); | ||
574 | } | ||
575 | |||
576 | |||
577 | static int proc_pid_permission(struct inode *inode, int mask) | ||
578 | { | ||
579 | struct pid_namespace *pid = inode->i_sb->s_fs_info; | ||
580 | struct task_struct *task; | ||
581 | bool has_perms; | ||
582 | |||
583 | task = get_proc_task(inode); | ||
584 | if (!task) | ||
585 | return -ESRCH; | ||
586 | has_perms = has_pid_permissions(pid, task, 1); | ||
587 | put_task_struct(task); | ||
588 | |||
589 | if (!has_perms) { | ||
590 | if (pid->hide_pid == 2) { | ||
591 | /* | ||
592 | * Let's make getdents(), stat(), and open() | ||
593 | * consistent with each other. If a process | ||
594 | * may not stat() a file, it shouldn't be seen | ||
595 | * in procfs at all. | ||
596 | */ | ||
597 | return -ENOENT; | ||
598 | } | ||
599 | |||
600 | return -EPERM; | ||
601 | } | ||
602 | return generic_permission(inode, mask); | ||
603 | } | ||
604 | |||
605 | |||
606 | |||
630 | static const struct inode_operations proc_def_inode_operations = { | 607 | static const struct inode_operations proc_def_inode_operations = { |
631 | .setattr = proc_setattr, | 608 | .setattr = proc_setattr, |
632 | }; | 609 | }; |
@@ -702,133 +679,96 @@ static const struct file_operations proc_single_file_operations = { | |||
702 | 679 | ||
703 | static int mem_open(struct inode* inode, struct file* file) | 680 | static int mem_open(struct inode* inode, struct file* file) |
704 | { | 681 | { |
705 | file->private_data = (void*)((long)current->self_exec_id); | ||
706 | /* OK to pass negative loff_t, we can catch out-of-range */ | ||
707 | file->f_mode |= FMODE_UNSIGNED_OFFSET; | ||
708 | return 0; | ||
709 | } | ||
710 | |||
711 | static ssize_t mem_read(struct file * file, char __user * buf, | ||
712 | size_t count, loff_t *ppos) | ||
713 | { | ||
714 | struct task_struct *task = get_proc_task(file->f_path.dentry->d_inode); | 682 | struct task_struct *task = get_proc_task(file->f_path.dentry->d_inode); |
715 | char *page; | ||
716 | unsigned long src = *ppos; | ||
717 | int ret = -ESRCH; | ||
718 | struct mm_struct *mm; | 683 | struct mm_struct *mm; |
719 | 684 | ||
720 | if (!task) | 685 | if (!task) |
721 | goto out_no_task; | 686 | return -ESRCH; |
722 | 687 | ||
723 | ret = -ENOMEM; | 688 | mm = mm_access(task, PTRACE_MODE_ATTACH); |
724 | page = (char *)__get_free_page(GFP_TEMPORARY); | 689 | put_task_struct(task); |
725 | if (!page) | ||
726 | goto out; | ||
727 | 690 | ||
728 | mm = check_mem_permission(task); | ||
729 | ret = PTR_ERR(mm); | ||
730 | if (IS_ERR(mm)) | 691 | if (IS_ERR(mm)) |
731 | goto out_free; | 692 | return PTR_ERR(mm); |
732 | |||
733 | ret = -EIO; | ||
734 | |||
735 | if (file->private_data != (void*)((long)current->self_exec_id)) | ||
736 | goto out_put; | ||
737 | 693 | ||
738 | ret = 0; | 694 | if (mm) { |
739 | 695 | /* ensure this mm_struct can't be freed */ | |
740 | while (count > 0) { | 696 | atomic_inc(&mm->mm_count); |
741 | int this_len, retval; | 697 | /* but do not pin its memory */ |
742 | 698 | mmput(mm); | |
743 | this_len = (count > PAGE_SIZE) ? PAGE_SIZE : count; | ||
744 | retval = access_remote_vm(mm, src, page, this_len, 0); | ||
745 | if (!retval) { | ||
746 | if (!ret) | ||
747 | ret = -EIO; | ||
748 | break; | ||
749 | } | ||
750 | |||
751 | if (copy_to_user(buf, page, retval)) { | ||
752 | ret = -EFAULT; | ||
753 | break; | ||
754 | } | ||
755 | |||
756 | ret += retval; | ||
757 | src += retval; | ||
758 | buf += retval; | ||
759 | count -= retval; | ||
760 | } | 699 | } |
761 | *ppos = src; | ||
762 | 700 | ||
763 | out_put: | 701 | /* OK to pass negative loff_t, we can catch out-of-range */ |
764 | mmput(mm); | 702 | file->f_mode |= FMODE_UNSIGNED_OFFSET; |
765 | out_free: | 703 | file->private_data = mm; |
766 | free_page((unsigned long) page); | 704 | |
767 | out: | 705 | return 0; |
768 | put_task_struct(task); | ||
769 | out_no_task: | ||
770 | return ret; | ||
771 | } | 706 | } |
772 | 707 | ||
773 | static ssize_t mem_write(struct file * file, const char __user *buf, | 708 | static ssize_t mem_rw(struct file *file, char __user *buf, |
774 | size_t count, loff_t *ppos) | 709 | size_t count, loff_t *ppos, int write) |
775 | { | 710 | { |
776 | int copied; | 711 | struct mm_struct *mm = file->private_data; |
712 | unsigned long addr = *ppos; | ||
713 | ssize_t copied; | ||
777 | char *page; | 714 | char *page; |
778 | struct task_struct *task = get_proc_task(file->f_path.dentry->d_inode); | ||
779 | unsigned long dst = *ppos; | ||
780 | struct mm_struct *mm; | ||
781 | 715 | ||
782 | copied = -ESRCH; | 716 | if (!mm) |
783 | if (!task) | 717 | return 0; |
784 | goto out_no_task; | ||
785 | 718 | ||
786 | copied = -ENOMEM; | ||
787 | page = (char *)__get_free_page(GFP_TEMPORARY); | 719 | page = (char *)__get_free_page(GFP_TEMPORARY); |
788 | if (!page) | 720 | if (!page) |
789 | goto out_task; | 721 | return -ENOMEM; |
790 | |||
791 | mm = check_mem_permission(task); | ||
792 | copied = PTR_ERR(mm); | ||
793 | if (IS_ERR(mm)) | ||
794 | goto out_free; | ||
795 | |||
796 | copied = -EIO; | ||
797 | if (file->private_data != (void *)((long)current->self_exec_id)) | ||
798 | goto out_mm; | ||
799 | 722 | ||
800 | copied = 0; | 723 | copied = 0; |
724 | if (!atomic_inc_not_zero(&mm->mm_users)) | ||
725 | goto free; | ||
726 | |||
801 | while (count > 0) { | 727 | while (count > 0) { |
802 | int this_len, retval; | 728 | int this_len = min_t(int, count, PAGE_SIZE); |
803 | 729 | ||
804 | this_len = (count > PAGE_SIZE) ? PAGE_SIZE : count; | 730 | if (write && copy_from_user(page, buf, this_len)) { |
805 | if (copy_from_user(page, buf, this_len)) { | ||
806 | copied = -EFAULT; | 731 | copied = -EFAULT; |
807 | break; | 732 | break; |
808 | } | 733 | } |
809 | retval = access_remote_vm(mm, dst, page, this_len, 1); | 734 | |
810 | if (!retval) { | 735 | this_len = access_remote_vm(mm, addr, page, this_len, write); |
736 | if (!this_len) { | ||
811 | if (!copied) | 737 | if (!copied) |
812 | copied = -EIO; | 738 | copied = -EIO; |
813 | break; | 739 | break; |
814 | } | 740 | } |
815 | copied += retval; | 741 | |
816 | buf += retval; | 742 | if (!write && copy_to_user(buf, page, this_len)) { |
817 | dst += retval; | 743 | copied = -EFAULT; |
818 | count -= retval; | 744 | break; |
745 | } | ||
746 | |||
747 | buf += this_len; | ||
748 | addr += this_len; | ||
749 | copied += this_len; | ||
750 | count -= this_len; | ||
819 | } | 751 | } |
820 | *ppos = dst; | 752 | *ppos = addr; |
821 | 753 | ||
822 | out_mm: | ||
823 | mmput(mm); | 754 | mmput(mm); |
824 | out_free: | 755 | free: |
825 | free_page((unsigned long) page); | 756 | free_page((unsigned long) page); |
826 | out_task: | ||
827 | put_task_struct(task); | ||
828 | out_no_task: | ||
829 | return copied; | 757 | return copied; |
830 | } | 758 | } |
831 | 759 | ||
760 | static ssize_t mem_read(struct file *file, char __user *buf, | ||
761 | size_t count, loff_t *ppos) | ||
762 | { | ||
763 | return mem_rw(file, buf, count, ppos, 0); | ||
764 | } | ||
765 | |||
766 | static ssize_t mem_write(struct file *file, const char __user *buf, | ||
767 | size_t count, loff_t *ppos) | ||
768 | { | ||
769 | return mem_rw(file, (char __user*)buf, count, ppos, 1); | ||
770 | } | ||
771 | |||
832 | loff_t mem_lseek(struct file *file, loff_t offset, int orig) | 772 | loff_t mem_lseek(struct file *file, loff_t offset, int orig) |
833 | { | 773 | { |
834 | switch (orig) { | 774 | switch (orig) { |
@@ -845,11 +785,20 @@ loff_t mem_lseek(struct file *file, loff_t offset, int orig) | |||
845 | return file->f_pos; | 785 | return file->f_pos; |
846 | } | 786 | } |
847 | 787 | ||
788 | static int mem_release(struct inode *inode, struct file *file) | ||
789 | { | ||
790 | struct mm_struct *mm = file->private_data; | ||
791 | if (mm) | ||
792 | mmdrop(mm); | ||
793 | return 0; | ||
794 | } | ||
795 | |||
848 | static const struct file_operations proc_mem_operations = { | 796 | static const struct file_operations proc_mem_operations = { |
849 | .llseek = mem_lseek, | 797 | .llseek = mem_lseek, |
850 | .read = mem_read, | 798 | .read = mem_read, |
851 | .write = mem_write, | 799 | .write = mem_write, |
852 | .open = mem_open, | 800 | .open = mem_open, |
801 | .release = mem_release, | ||
853 | }; | 802 | }; |
854 | 803 | ||
855 | static ssize_t environ_read(struct file *file, char __user *buf, | 804 | static ssize_t environ_read(struct file *file, char __user *buf, |
@@ -1010,6 +959,7 @@ static ssize_t oom_adjust_write(struct file *file, const char __user *buf, | |||
1010 | else | 959 | else |
1011 | task->signal->oom_score_adj = (oom_adjust * OOM_SCORE_ADJ_MAX) / | 960 | task->signal->oom_score_adj = (oom_adjust * OOM_SCORE_ADJ_MAX) / |
1012 | -OOM_DISABLE; | 961 | -OOM_DISABLE; |
962 | trace_oom_score_adj_update(task); | ||
1013 | err_sighand: | 963 | err_sighand: |
1014 | unlock_task_sighand(task, &flags); | 964 | unlock_task_sighand(task, &flags); |
1015 | err_task_lock: | 965 | err_task_lock: |
@@ -1097,6 +1047,7 @@ static ssize_t oom_score_adj_write(struct file *file, const char __user *buf, | |||
1097 | task->signal->oom_score_adj = oom_score_adj; | 1047 | task->signal->oom_score_adj = oom_score_adj; |
1098 | if (has_capability_noaudit(current, CAP_SYS_RESOURCE)) | 1048 | if (has_capability_noaudit(current, CAP_SYS_RESOURCE)) |
1099 | task->signal->oom_score_adj_min = oom_score_adj; | 1049 | task->signal->oom_score_adj_min = oom_score_adj; |
1050 | trace_oom_score_adj_update(task); | ||
1100 | /* | 1051 | /* |
1101 | * Scale /proc/pid/oom_adj appropriately ensuring that OOM_DISABLE is | 1052 | * Scale /proc/pid/oom_adj appropriately ensuring that OOM_DISABLE is |
1102 | * always attainable. | 1053 | * always attainable. |
@@ -1147,9 +1098,6 @@ static ssize_t proc_loginuid_write(struct file * file, const char __user * buf, | |||
1147 | ssize_t length; | 1098 | ssize_t length; |
1148 | uid_t loginuid; | 1099 | uid_t loginuid; |
1149 | 1100 | ||
1150 | if (!capable(CAP_AUDIT_CONTROL)) | ||
1151 | return -EPERM; | ||
1152 | |||
1153 | rcu_read_lock(); | 1101 | rcu_read_lock(); |
1154 | if (current != pid_task(proc_pid(inode), PIDTYPE_PID)) { | 1102 | if (current != pid_task(proc_pid(inode), PIDTYPE_PID)) { |
1155 | rcu_read_unlock(); | 1103 | rcu_read_unlock(); |
@@ -1178,7 +1126,7 @@ static ssize_t proc_loginuid_write(struct file * file, const char __user * buf, | |||
1178 | goto out_free_page; | 1126 | goto out_free_page; |
1179 | 1127 | ||
1180 | } | 1128 | } |
1181 | length = audit_set_loginuid(current, loginuid); | 1129 | length = audit_set_loginuid(loginuid); |
1182 | if (likely(length == 0)) | 1130 | if (likely(length == 0)) |
1183 | length = count; | 1131 | length = count; |
1184 | 1132 | ||
@@ -1453,13 +1401,13 @@ static const struct file_operations proc_pid_set_comm_operations = { | |||
1453 | .release = single_release, | 1401 | .release = single_release, |
1454 | }; | 1402 | }; |
1455 | 1403 | ||
1456 | static int proc_exe_link(struct inode *inode, struct path *exe_path) | 1404 | static int proc_exe_link(struct dentry *dentry, struct path *exe_path) |
1457 | { | 1405 | { |
1458 | struct task_struct *task; | 1406 | struct task_struct *task; |
1459 | struct mm_struct *mm; | 1407 | struct mm_struct *mm; |
1460 | struct file *exe_file; | 1408 | struct file *exe_file; |
1461 | 1409 | ||
1462 | task = get_proc_task(inode); | 1410 | task = get_proc_task(dentry->d_inode); |
1463 | if (!task) | 1411 | if (!task) |
1464 | return -ENOENT; | 1412 | return -ENOENT; |
1465 | mm = get_task_mm(task); | 1413 | mm = get_task_mm(task); |
@@ -1489,7 +1437,7 @@ static void *proc_pid_follow_link(struct dentry *dentry, struct nameidata *nd) | |||
1489 | if (!proc_fd_access_allowed(inode)) | 1437 | if (!proc_fd_access_allowed(inode)) |
1490 | goto out; | 1438 | goto out; |
1491 | 1439 | ||
1492 | error = PROC_I(inode)->op.proc_get_link(inode, &nd->path); | 1440 | error = PROC_I(inode)->op.proc_get_link(dentry, &nd->path); |
1493 | out: | 1441 | out: |
1494 | return ERR_PTR(error); | 1442 | return ERR_PTR(error); |
1495 | } | 1443 | } |
@@ -1528,7 +1476,7 @@ static int proc_pid_readlink(struct dentry * dentry, char __user * buffer, int b | |||
1528 | if (!proc_fd_access_allowed(inode)) | 1476 | if (!proc_fd_access_allowed(inode)) |
1529 | goto out; | 1477 | goto out; |
1530 | 1478 | ||
1531 | error = PROC_I(inode)->op.proc_get_link(inode, &path); | 1479 | error = PROC_I(inode)->op.proc_get_link(dentry, &path); |
1532 | if (error) | 1480 | if (error) |
1533 | goto out; | 1481 | goto out; |
1534 | 1482 | ||
@@ -1609,6 +1557,7 @@ int pid_getattr(struct vfsmount *mnt, struct dentry *dentry, struct kstat *stat) | |||
1609 | struct inode *inode = dentry->d_inode; | 1557 | struct inode *inode = dentry->d_inode; |
1610 | struct task_struct *task; | 1558 | struct task_struct *task; |
1611 | const struct cred *cred; | 1559 | const struct cred *cred; |
1560 | struct pid_namespace *pid = dentry->d_sb->s_fs_info; | ||
1612 | 1561 | ||
1613 | generic_fillattr(inode, stat); | 1562 | generic_fillattr(inode, stat); |
1614 | 1563 | ||
@@ -1617,6 +1566,14 @@ int pid_getattr(struct vfsmount *mnt, struct dentry *dentry, struct kstat *stat) | |||
1617 | stat->gid = 0; | 1566 | stat->gid = 0; |
1618 | task = pid_task(proc_pid(inode), PIDTYPE_PID); | 1567 | task = pid_task(proc_pid(inode), PIDTYPE_PID); |
1619 | if (task) { | 1568 | if (task) { |
1569 | if (!has_pid_permissions(pid, task, 2)) { | ||
1570 | rcu_read_unlock(); | ||
1571 | /* | ||
1572 | * This doesn't prevent learning whether PID exists, | ||
1573 | * it only makes getattr() consistent with readdir(). | ||
1574 | */ | ||
1575 | return -ENOENT; | ||
1576 | } | ||
1620 | if ((inode->i_mode == (S_IFDIR|S_IRUGO|S_IXUGO)) || | 1577 | if ((inode->i_mode == (S_IFDIR|S_IRUGO|S_IXUGO)) || |
1621 | task_dumpable(task)) { | 1578 | task_dumpable(task)) { |
1622 | cred = __task_cred(task); | 1579 | cred = __task_cred(task); |
@@ -1820,9 +1777,9 @@ static int proc_fd_info(struct inode *inode, struct path *path, char *info) | |||
1820 | return -ENOENT; | 1777 | return -ENOENT; |
1821 | } | 1778 | } |
1822 | 1779 | ||
1823 | static int proc_fd_link(struct inode *inode, struct path *path) | 1780 | static int proc_fd_link(struct dentry *dentry, struct path *path) |
1824 | { | 1781 | { |
1825 | return proc_fd_info(inode, path, NULL); | 1782 | return proc_fd_info(dentry->d_inode, path, NULL); |
1826 | } | 1783 | } |
1827 | 1784 | ||
1828 | static int tid_fd_revalidate(struct dentry *dentry, struct nameidata *nd) | 1785 | static int tid_fd_revalidate(struct dentry *dentry, struct nameidata *nd) |
@@ -2043,6 +2000,355 @@ static const struct file_operations proc_fd_operations = { | |||
2043 | .llseek = default_llseek, | 2000 | .llseek = default_llseek, |
2044 | }; | 2001 | }; |
2045 | 2002 | ||
2003 | #ifdef CONFIG_CHECKPOINT_RESTORE | ||
2004 | |||
2005 | /* | ||
2006 | * dname_to_vma_addr - maps a dentry name into two unsigned longs | ||
2007 | * which represent vma start and end addresses. | ||
2008 | */ | ||
2009 | static int dname_to_vma_addr(struct dentry *dentry, | ||
2010 | unsigned long *start, unsigned long *end) | ||
2011 | { | ||
2012 | if (sscanf(dentry->d_name.name, "%lx-%lx", start, end) != 2) | ||
2013 | return -EINVAL; | ||
2014 | |||
2015 | return 0; | ||
2016 | } | ||
2017 | |||
2018 | static int map_files_d_revalidate(struct dentry *dentry, struct nameidata *nd) | ||
2019 | { | ||
2020 | unsigned long vm_start, vm_end; | ||
2021 | bool exact_vma_exists = false; | ||
2022 | struct mm_struct *mm = NULL; | ||
2023 | struct task_struct *task; | ||
2024 | const struct cred *cred; | ||
2025 | struct inode *inode; | ||
2026 | int status = 0; | ||
2027 | |||
2028 | if (nd && nd->flags & LOOKUP_RCU) | ||
2029 | return -ECHILD; | ||
2030 | |||
2031 | if (!capable(CAP_SYS_ADMIN)) { | ||
2032 | status = -EACCES; | ||
2033 | goto out_notask; | ||
2034 | } | ||
2035 | |||
2036 | inode = dentry->d_inode; | ||
2037 | task = get_proc_task(inode); | ||
2038 | if (!task) | ||
2039 | goto out_notask; | ||
2040 | |||
2041 | if (!ptrace_may_access(task, PTRACE_MODE_READ)) | ||
2042 | goto out; | ||
2043 | |||
2044 | mm = get_task_mm(task); | ||
2045 | if (!mm) | ||
2046 | goto out; | ||
2047 | |||
2048 | if (!dname_to_vma_addr(dentry, &vm_start, &vm_end)) { | ||
2049 | down_read(&mm->mmap_sem); | ||
2050 | exact_vma_exists = !!find_exact_vma(mm, vm_start, vm_end); | ||
2051 | up_read(&mm->mmap_sem); | ||
2052 | } | ||
2053 | |||
2054 | mmput(mm); | ||
2055 | |||
2056 | if (exact_vma_exists) { | ||
2057 | if (task_dumpable(task)) { | ||
2058 | rcu_read_lock(); | ||
2059 | cred = __task_cred(task); | ||
2060 | inode->i_uid = cred->euid; | ||
2061 | inode->i_gid = cred->egid; | ||
2062 | rcu_read_unlock(); | ||
2063 | } else { | ||
2064 | inode->i_uid = 0; | ||
2065 | inode->i_gid = 0; | ||
2066 | } | ||
2067 | security_task_to_inode(task, inode); | ||
2068 | status = 1; | ||
2069 | } | ||
2070 | |||
2071 | out: | ||
2072 | put_task_struct(task); | ||
2073 | |||
2074 | out_notask: | ||
2075 | if (status <= 0) | ||
2076 | d_drop(dentry); | ||
2077 | |||
2078 | return status; | ||
2079 | } | ||
2080 | |||
2081 | static const struct dentry_operations tid_map_files_dentry_operations = { | ||
2082 | .d_revalidate = map_files_d_revalidate, | ||
2083 | .d_delete = pid_delete_dentry, | ||
2084 | }; | ||
2085 | |||
2086 | static int proc_map_files_get_link(struct dentry *dentry, struct path *path) | ||
2087 | { | ||
2088 | unsigned long vm_start, vm_end; | ||
2089 | struct vm_area_struct *vma; | ||
2090 | struct task_struct *task; | ||
2091 | struct mm_struct *mm; | ||
2092 | int rc; | ||
2093 | |||
2094 | rc = -ENOENT; | ||
2095 | task = get_proc_task(dentry->d_inode); | ||
2096 | if (!task) | ||
2097 | goto out; | ||
2098 | |||
2099 | mm = get_task_mm(task); | ||
2100 | put_task_struct(task); | ||
2101 | if (!mm) | ||
2102 | goto out; | ||
2103 | |||
2104 | rc = dname_to_vma_addr(dentry, &vm_start, &vm_end); | ||
2105 | if (rc) | ||
2106 | goto out_mmput; | ||
2107 | |||
2108 | down_read(&mm->mmap_sem); | ||
2109 | vma = find_exact_vma(mm, vm_start, vm_end); | ||
2110 | if (vma && vma->vm_file) { | ||
2111 | *path = vma->vm_file->f_path; | ||
2112 | path_get(path); | ||
2113 | rc = 0; | ||
2114 | } | ||
2115 | up_read(&mm->mmap_sem); | ||
2116 | |||
2117 | out_mmput: | ||
2118 | mmput(mm); | ||
2119 | out: | ||
2120 | return rc; | ||
2121 | } | ||
2122 | |||
2123 | struct map_files_info { | ||
2124 | struct file *file; | ||
2125 | unsigned long len; | ||
2126 | unsigned char name[4*sizeof(long)+2]; /* max: %lx-%lx\0 */ | ||
2127 | }; | ||
2128 | |||
2129 | static struct dentry * | ||
2130 | proc_map_files_instantiate(struct inode *dir, struct dentry *dentry, | ||
2131 | struct task_struct *task, const void *ptr) | ||
2132 | { | ||
2133 | const struct file *file = ptr; | ||
2134 | struct proc_inode *ei; | ||
2135 | struct inode *inode; | ||
2136 | |||
2137 | if (!file) | ||
2138 | return ERR_PTR(-ENOENT); | ||
2139 | |||
2140 | inode = proc_pid_make_inode(dir->i_sb, task); | ||
2141 | if (!inode) | ||
2142 | return ERR_PTR(-ENOENT); | ||
2143 | |||
2144 | ei = PROC_I(inode); | ||
2145 | ei->op.proc_get_link = proc_map_files_get_link; | ||
2146 | |||
2147 | inode->i_op = &proc_pid_link_inode_operations; | ||
2148 | inode->i_size = 64; | ||
2149 | inode->i_mode = S_IFLNK; | ||
2150 | |||
2151 | if (file->f_mode & FMODE_READ) | ||
2152 | inode->i_mode |= S_IRUSR; | ||
2153 | if (file->f_mode & FMODE_WRITE) | ||
2154 | inode->i_mode |= S_IWUSR; | ||
2155 | |||
2156 | d_set_d_op(dentry, &tid_map_files_dentry_operations); | ||
2157 | d_add(dentry, inode); | ||
2158 | |||
2159 | return NULL; | ||
2160 | } | ||
2161 | |||
2162 | static struct dentry *proc_map_files_lookup(struct inode *dir, | ||
2163 | struct dentry *dentry, struct nameidata *nd) | ||
2164 | { | ||
2165 | unsigned long vm_start, vm_end; | ||
2166 | struct vm_area_struct *vma; | ||
2167 | struct task_struct *task; | ||
2168 | struct dentry *result; | ||
2169 | struct mm_struct *mm; | ||
2170 | |||
2171 | result = ERR_PTR(-EACCES); | ||
2172 | if (!capable(CAP_SYS_ADMIN)) | ||
2173 | goto out; | ||
2174 | |||
2175 | result = ERR_PTR(-ENOENT); | ||
2176 | task = get_proc_task(dir); | ||
2177 | if (!task) | ||
2178 | goto out; | ||
2179 | |||
2180 | result = ERR_PTR(-EACCES); | ||
2181 | if (lock_trace(task)) | ||
2182 | goto out_put_task; | ||
2183 | |||
2184 | result = ERR_PTR(-ENOENT); | ||
2185 | if (dname_to_vma_addr(dentry, &vm_start, &vm_end)) | ||
2186 | goto out_unlock; | ||
2187 | |||
2188 | mm = get_task_mm(task); | ||
2189 | if (!mm) | ||
2190 | goto out_unlock; | ||
2191 | |||
2192 | down_read(&mm->mmap_sem); | ||
2193 | vma = find_exact_vma(mm, vm_start, vm_end); | ||
2194 | if (!vma) | ||
2195 | goto out_no_vma; | ||
2196 | |||
2197 | result = proc_map_files_instantiate(dir, dentry, task, vma->vm_file); | ||
2198 | |||
2199 | out_no_vma: | ||
2200 | up_read(&mm->mmap_sem); | ||
2201 | mmput(mm); | ||
2202 | out_unlock: | ||
2203 | unlock_trace(task); | ||
2204 | out_put_task: | ||
2205 | put_task_struct(task); | ||
2206 | out: | ||
2207 | return result; | ||
2208 | } | ||
2209 | |||
2210 | static const struct inode_operations proc_map_files_inode_operations = { | ||
2211 | .lookup = proc_map_files_lookup, | ||
2212 | .permission = proc_fd_permission, | ||
2213 | .setattr = proc_setattr, | ||
2214 | }; | ||
2215 | |||
2216 | static int | ||
2217 | proc_map_files_readdir(struct file *filp, void *dirent, filldir_t filldir) | ||
2218 | { | ||
2219 | struct dentry *dentry = filp->f_path.dentry; | ||
2220 | struct inode *inode = dentry->d_inode; | ||
2221 | struct vm_area_struct *vma; | ||
2222 | struct task_struct *task; | ||
2223 | struct mm_struct *mm; | ||
2224 | ino_t ino; | ||
2225 | int ret; | ||
2226 | |||
2227 | ret = -EACCES; | ||
2228 | if (!capable(CAP_SYS_ADMIN)) | ||
2229 | goto out; | ||
2230 | |||
2231 | ret = -ENOENT; | ||
2232 | task = get_proc_task(inode); | ||
2233 | if (!task) | ||
2234 | goto out; | ||
2235 | |||
2236 | ret = -EACCES; | ||
2237 | if (lock_trace(task)) | ||
2238 | goto out_put_task; | ||
2239 | |||
2240 | ret = 0; | ||
2241 | switch (filp->f_pos) { | ||
2242 | case 0: | ||
2243 | ino = inode->i_ino; | ||
2244 | if (filldir(dirent, ".", 1, 0, ino, DT_DIR) < 0) | ||
2245 | goto out_unlock; | ||
2246 | filp->f_pos++; | ||
2247 | case 1: | ||
2248 | ino = parent_ino(dentry); | ||
2249 | if (filldir(dirent, "..", 2, 1, ino, DT_DIR) < 0) | ||
2250 | goto out_unlock; | ||
2251 | filp->f_pos++; | ||
2252 | default: | ||
2253 | { | ||
2254 | unsigned long nr_files, pos, i; | ||
2255 | struct flex_array *fa = NULL; | ||
2256 | struct map_files_info info; | ||
2257 | struct map_files_info *p; | ||
2258 | |||
2259 | mm = get_task_mm(task); | ||
2260 | if (!mm) | ||
2261 | goto out_unlock; | ||
2262 | down_read(&mm->mmap_sem); | ||
2263 | |||
2264 | nr_files = 0; | ||
2265 | |||
2266 | /* | ||
2267 | * We need two passes here: | ||
2268 | * | ||
2269 | * 1) Collect vmas of mapped files with mmap_sem taken | ||
2270 | * 2) Release mmap_sem and instantiate entries | ||
2271 | * | ||
2272 | * otherwise we get lockdep complained, since filldir() | ||
2273 | * routine might require mmap_sem taken in might_fault(). | ||
2274 | */ | ||
2275 | |||
2276 | for (vma = mm->mmap, pos = 2; vma; vma = vma->vm_next) { | ||
2277 | if (vma->vm_file && ++pos > filp->f_pos) | ||
2278 | nr_files++; | ||
2279 | } | ||
2280 | |||
2281 | if (nr_files) { | ||
2282 | fa = flex_array_alloc(sizeof(info), nr_files, | ||
2283 | GFP_KERNEL); | ||
2284 | if (!fa || flex_array_prealloc(fa, 0, nr_files, | ||
2285 | GFP_KERNEL)) { | ||
2286 | ret = -ENOMEM; | ||
2287 | if (fa) | ||
2288 | flex_array_free(fa); | ||
2289 | up_read(&mm->mmap_sem); | ||
2290 | mmput(mm); | ||
2291 | goto out_unlock; | ||
2292 | } | ||
2293 | for (i = 0, vma = mm->mmap, pos = 2; vma; | ||
2294 | vma = vma->vm_next) { | ||
2295 | if (!vma->vm_file) | ||
2296 | continue; | ||
2297 | if (++pos <= filp->f_pos) | ||
2298 | continue; | ||
2299 | |||
2300 | get_file(vma->vm_file); | ||
2301 | info.file = vma->vm_file; | ||
2302 | info.len = snprintf(info.name, | ||
2303 | sizeof(info.name), "%lx-%lx", | ||
2304 | vma->vm_start, vma->vm_end); | ||
2305 | if (flex_array_put(fa, i++, &info, GFP_KERNEL)) | ||
2306 | BUG(); | ||
2307 | } | ||
2308 | } | ||
2309 | up_read(&mm->mmap_sem); | ||
2310 | |||
2311 | for (i = 0; i < nr_files; i++) { | ||
2312 | p = flex_array_get(fa, i); | ||
2313 | ret = proc_fill_cache(filp, dirent, filldir, | ||
2314 | p->name, p->len, | ||
2315 | proc_map_files_instantiate, | ||
2316 | task, p->file); | ||
2317 | if (ret) | ||
2318 | break; | ||
2319 | filp->f_pos++; | ||
2320 | fput(p->file); | ||
2321 | } | ||
2322 | for (; i < nr_files; i++) { | ||
2323 | /* | ||
2324 | * In case of error don't forget | ||
2325 | * to put rest of file refs. | ||
2326 | */ | ||
2327 | p = flex_array_get(fa, i); | ||
2328 | fput(p->file); | ||
2329 | } | ||
2330 | if (fa) | ||
2331 | flex_array_free(fa); | ||
2332 | mmput(mm); | ||
2333 | } | ||
2334 | } | ||
2335 | |||
2336 | out_unlock: | ||
2337 | unlock_trace(task); | ||
2338 | out_put_task: | ||
2339 | put_task_struct(task); | ||
2340 | out: | ||
2341 | return ret; | ||
2342 | } | ||
2343 | |||
2344 | static const struct file_operations proc_map_files_operations = { | ||
2345 | .read = generic_read_dir, | ||
2346 | .readdir = proc_map_files_readdir, | ||
2347 | .llseek = default_llseek, | ||
2348 | }; | ||
2349 | |||
2350 | #endif /* CONFIG_CHECKPOINT_RESTORE */ | ||
2351 | |||
2046 | /* | 2352 | /* |
2047 | * /proc/pid/fd needs a special permission handler so that a process can still | 2353 | * /proc/pid/fd needs a special permission handler so that a process can still |
2048 | * access /proc/self/fd after it has executed a setuid(). | 2354 | * access /proc/self/fd after it has executed a setuid(). |
@@ -2658,6 +2964,9 @@ static const struct inode_operations proc_task_inode_operations; | |||
2658 | static const struct pid_entry tgid_base_stuff[] = { | 2964 | static const struct pid_entry tgid_base_stuff[] = { |
2659 | DIR("task", S_IRUGO|S_IXUGO, proc_task_inode_operations, proc_task_operations), | 2965 | DIR("task", S_IRUGO|S_IXUGO, proc_task_inode_operations, proc_task_operations), |
2660 | DIR("fd", S_IRUSR|S_IXUSR, proc_fd_inode_operations, proc_fd_operations), | 2966 | DIR("fd", S_IRUSR|S_IXUSR, proc_fd_inode_operations, proc_fd_operations), |
2967 | #ifdef CONFIG_CHECKPOINT_RESTORE | ||
2968 | DIR("map_files", S_IRUSR|S_IXUSR, proc_map_files_inode_operations, proc_map_files_operations), | ||
2969 | #endif | ||
2661 | DIR("fdinfo", S_IRUSR|S_IXUSR, proc_fdinfo_inode_operations, proc_fdinfo_operations), | 2970 | DIR("fdinfo", S_IRUSR|S_IXUSR, proc_fdinfo_inode_operations, proc_fdinfo_operations), |
2662 | DIR("ns", S_IRUSR|S_IXUGO, proc_ns_dir_inode_operations, proc_ns_dir_operations), | 2971 | DIR("ns", S_IRUSR|S_IXUGO, proc_ns_dir_inode_operations, proc_ns_dir_operations), |
2663 | #ifdef CONFIG_NET | 2972 | #ifdef CONFIG_NET |
@@ -2761,6 +3070,7 @@ static const struct inode_operations proc_tgid_base_inode_operations = { | |||
2761 | .lookup = proc_tgid_base_lookup, | 3070 | .lookup = proc_tgid_base_lookup, |
2762 | .getattr = pid_getattr, | 3071 | .getattr = pid_getattr, |
2763 | .setattr = proc_setattr, | 3072 | .setattr = proc_setattr, |
3073 | .permission = proc_pid_permission, | ||
2764 | }; | 3074 | }; |
2765 | 3075 | ||
2766 | static void proc_flush_task_mnt(struct vfsmount *mnt, pid_t pid, pid_t tgid) | 3076 | static void proc_flush_task_mnt(struct vfsmount *mnt, pid_t pid, pid_t tgid) |
@@ -2964,6 +3274,12 @@ static int proc_pid_fill_cache(struct file *filp, void *dirent, filldir_t filldi | |||
2964 | proc_pid_instantiate, iter.task, NULL); | 3274 | proc_pid_instantiate, iter.task, NULL); |
2965 | } | 3275 | } |
2966 | 3276 | ||
3277 | static int fake_filldir(void *buf, const char *name, int namelen, | ||
3278 | loff_t offset, u64 ino, unsigned d_type) | ||
3279 | { | ||
3280 | return 0; | ||
3281 | } | ||
3282 | |||
2967 | /* for the /proc/ directory itself, after non-process stuff has been done */ | 3283 | /* for the /proc/ directory itself, after non-process stuff has been done */ |
2968 | int proc_pid_readdir(struct file * filp, void * dirent, filldir_t filldir) | 3284 | int proc_pid_readdir(struct file * filp, void * dirent, filldir_t filldir) |
2969 | { | 3285 | { |
@@ -2971,6 +3287,7 @@ int proc_pid_readdir(struct file * filp, void * dirent, filldir_t filldir) | |||
2971 | struct task_struct *reaper; | 3287 | struct task_struct *reaper; |
2972 | struct tgid_iter iter; | 3288 | struct tgid_iter iter; |
2973 | struct pid_namespace *ns; | 3289 | struct pid_namespace *ns; |
3290 | filldir_t __filldir; | ||
2974 | 3291 | ||
2975 | if (filp->f_pos >= PID_MAX_LIMIT + TGID_OFFSET) | 3292 | if (filp->f_pos >= PID_MAX_LIMIT + TGID_OFFSET) |
2976 | goto out_no_task; | 3293 | goto out_no_task; |
@@ -2992,8 +3309,13 @@ int proc_pid_readdir(struct file * filp, void * dirent, filldir_t filldir) | |||
2992 | for (iter = next_tgid(ns, iter); | 3309 | for (iter = next_tgid(ns, iter); |
2993 | iter.task; | 3310 | iter.task; |
2994 | iter.tgid += 1, iter = next_tgid(ns, iter)) { | 3311 | iter.tgid += 1, iter = next_tgid(ns, iter)) { |
3312 | if (has_pid_permissions(ns, iter.task, 2)) | ||
3313 | __filldir = filldir; | ||
3314 | else | ||
3315 | __filldir = fake_filldir; | ||
3316 | |||
2995 | filp->f_pos = iter.tgid + TGID_OFFSET; | 3317 | filp->f_pos = iter.tgid + TGID_OFFSET; |
2996 | if (proc_pid_fill_cache(filp, dirent, filldir, iter) < 0) { | 3318 | if (proc_pid_fill_cache(filp, dirent, __filldir, iter) < 0) { |
2997 | put_task_struct(iter.task); | 3319 | put_task_struct(iter.task); |
2998 | goto out; | 3320 | goto out; |
2999 | } | 3321 | } |
@@ -3328,6 +3650,7 @@ static const struct inode_operations proc_task_inode_operations = { | |||
3328 | .lookup = proc_task_lookup, | 3650 | .lookup = proc_task_lookup, |
3329 | .getattr = proc_task_getattr, | 3651 | .getattr = proc_task_getattr, |
3330 | .setattr = proc_setattr, | 3652 | .setattr = proc_setattr, |
3653 | .permission = proc_pid_permission, | ||
3331 | }; | 3654 | }; |
3332 | 3655 | ||
3333 | static const struct file_operations proc_task_operations = { | 3656 | static const struct file_operations proc_task_operations = { |
diff --git a/fs/proc/inode.c b/fs/proc/inode.c index 51a176622b8f..84fd3235a590 100644 --- a/fs/proc/inode.c +++ b/fs/proc/inode.c | |||
@@ -7,6 +7,7 @@ | |||
7 | #include <linux/time.h> | 7 | #include <linux/time.h> |
8 | #include <linux/proc_fs.h> | 8 | #include <linux/proc_fs.h> |
9 | #include <linux/kernel.h> | 9 | #include <linux/kernel.h> |
10 | #include <linux/pid_namespace.h> | ||
10 | #include <linux/mm.h> | 11 | #include <linux/mm.h> |
11 | #include <linux/string.h> | 12 | #include <linux/string.h> |
12 | #include <linux/stat.h> | 13 | #include <linux/stat.h> |
@@ -17,7 +18,9 @@ | |||
17 | #include <linux/init.h> | 18 | #include <linux/init.h> |
18 | #include <linux/module.h> | 19 | #include <linux/module.h> |
19 | #include <linux/sysctl.h> | 20 | #include <linux/sysctl.h> |
21 | #include <linux/seq_file.h> | ||
20 | #include <linux/slab.h> | 22 | #include <linux/slab.h> |
23 | #include <linux/mount.h> | ||
21 | 24 | ||
22 | #include <asm/system.h> | 25 | #include <asm/system.h> |
23 | #include <asm/uaccess.h> | 26 | #include <asm/uaccess.h> |
@@ -101,12 +104,27 @@ void __init proc_init_inodecache(void) | |||
101 | init_once); | 104 | init_once); |
102 | } | 105 | } |
103 | 106 | ||
107 | static int proc_show_options(struct seq_file *seq, struct dentry *root) | ||
108 | { | ||
109 | struct super_block *sb = root->d_sb; | ||
110 | struct pid_namespace *pid = sb->s_fs_info; | ||
111 | |||
112 | if (pid->pid_gid) | ||
113 | seq_printf(seq, ",gid=%lu", (unsigned long)pid->pid_gid); | ||
114 | if (pid->hide_pid != 0) | ||
115 | seq_printf(seq, ",hidepid=%u", pid->hide_pid); | ||
116 | |||
117 | return 0; | ||
118 | } | ||
119 | |||
104 | static const struct super_operations proc_sops = { | 120 | static const struct super_operations proc_sops = { |
105 | .alloc_inode = proc_alloc_inode, | 121 | .alloc_inode = proc_alloc_inode, |
106 | .destroy_inode = proc_destroy_inode, | 122 | .destroy_inode = proc_destroy_inode, |
107 | .drop_inode = generic_delete_inode, | 123 | .drop_inode = generic_delete_inode, |
108 | .evict_inode = proc_evict_inode, | 124 | .evict_inode = proc_evict_inode, |
109 | .statfs = simple_statfs, | 125 | .statfs = simple_statfs, |
126 | .remount_fs = proc_remount, | ||
127 | .show_options = proc_show_options, | ||
110 | }; | 128 | }; |
111 | 129 | ||
112 | static void __pde_users_dec(struct proc_dir_entry *pde) | 130 | static void __pde_users_dec(struct proc_dir_entry *pde) |
diff --git a/fs/proc/internal.h b/fs/proc/internal.h index 7838e5cfec14..292577531ad1 100644 --- a/fs/proc/internal.h +++ b/fs/proc/internal.h | |||
@@ -117,6 +117,7 @@ void pde_put(struct proc_dir_entry *pde); | |||
117 | 117 | ||
118 | int proc_fill_super(struct super_block *); | 118 | int proc_fill_super(struct super_block *); |
119 | struct inode *proc_get_inode(struct super_block *, struct proc_dir_entry *); | 119 | struct inode *proc_get_inode(struct super_block *, struct proc_dir_entry *); |
120 | int proc_remount(struct super_block *sb, int *flags, char *data); | ||
120 | 121 | ||
121 | /* | 122 | /* |
122 | * These are generic /proc routines that use the internal | 123 | * These are generic /proc routines that use the internal |
diff --git a/fs/proc/root.c b/fs/proc/root.c index 03102d978180..46a15d8a29ca 100644 --- a/fs/proc/root.c +++ b/fs/proc/root.c | |||
@@ -18,6 +18,7 @@ | |||
18 | #include <linux/bitops.h> | 18 | #include <linux/bitops.h> |
19 | #include <linux/mount.h> | 19 | #include <linux/mount.h> |
20 | #include <linux/pid_namespace.h> | 20 | #include <linux/pid_namespace.h> |
21 | #include <linux/parser.h> | ||
21 | 22 | ||
22 | #include "internal.h" | 23 | #include "internal.h" |
23 | 24 | ||
@@ -36,6 +37,63 @@ static int proc_set_super(struct super_block *sb, void *data) | |||
36 | return err; | 37 | return err; |
37 | } | 38 | } |
38 | 39 | ||
40 | enum { | ||
41 | Opt_gid, Opt_hidepid, Opt_err, | ||
42 | }; | ||
43 | |||
44 | static const match_table_t tokens = { | ||
45 | {Opt_hidepid, "hidepid=%u"}, | ||
46 | {Opt_gid, "gid=%u"}, | ||
47 | {Opt_err, NULL}, | ||
48 | }; | ||
49 | |||
50 | static int proc_parse_options(char *options, struct pid_namespace *pid) | ||
51 | { | ||
52 | char *p; | ||
53 | substring_t args[MAX_OPT_ARGS]; | ||
54 | int option; | ||
55 | |||
56 | if (!options) | ||
57 | return 1; | ||
58 | |||
59 | while ((p = strsep(&options, ",")) != NULL) { | ||
60 | int token; | ||
61 | if (!*p) | ||
62 | continue; | ||
63 | |||
64 | args[0].to = args[0].from = 0; | ||
65 | token = match_token(p, tokens, args); | ||
66 | switch (token) { | ||
67 | case Opt_gid: | ||
68 | if (match_int(&args[0], &option)) | ||
69 | return 0; | ||
70 | pid->pid_gid = option; | ||
71 | break; | ||
72 | case Opt_hidepid: | ||
73 | if (match_int(&args[0], &option)) | ||
74 | return 0; | ||
75 | if (option < 0 || option > 2) { | ||
76 | pr_err("proc: hidepid value must be between 0 and 2.\n"); | ||
77 | return 0; | ||
78 | } | ||
79 | pid->hide_pid = option; | ||
80 | break; | ||
81 | default: | ||
82 | pr_err("proc: unrecognized mount option \"%s\" " | ||
83 | "or missing value\n", p); | ||
84 | return 0; | ||
85 | } | ||
86 | } | ||
87 | |||
88 | return 1; | ||
89 | } | ||
90 | |||
91 | int proc_remount(struct super_block *sb, int *flags, char *data) | ||
92 | { | ||
93 | struct pid_namespace *pid = sb->s_fs_info; | ||
94 | return !proc_parse_options(data, pid); | ||
95 | } | ||
96 | |||
39 | static struct dentry *proc_mount(struct file_system_type *fs_type, | 97 | static struct dentry *proc_mount(struct file_system_type *fs_type, |
40 | int flags, const char *dev_name, void *data) | 98 | int flags, const char *dev_name, void *data) |
41 | { | 99 | { |
@@ -43,11 +101,15 @@ static struct dentry *proc_mount(struct file_system_type *fs_type, | |||
43 | struct super_block *sb; | 101 | struct super_block *sb; |
44 | struct pid_namespace *ns; | 102 | struct pid_namespace *ns; |
45 | struct proc_inode *ei; | 103 | struct proc_inode *ei; |
104 | char *options; | ||
46 | 105 | ||
47 | if (flags & MS_KERNMOUNT) | 106 | if (flags & MS_KERNMOUNT) { |
48 | ns = (struct pid_namespace *)data; | 107 | ns = (struct pid_namespace *)data; |
49 | else | 108 | options = NULL; |
109 | } else { | ||
50 | ns = current->nsproxy->pid_ns; | 110 | ns = current->nsproxy->pid_ns; |
111 | options = data; | ||
112 | } | ||
51 | 113 | ||
52 | sb = sget(fs_type, proc_test_super, proc_set_super, ns); | 114 | sb = sget(fs_type, proc_test_super, proc_set_super, ns); |
53 | if (IS_ERR(sb)) | 115 | if (IS_ERR(sb)) |
@@ -55,6 +117,10 @@ static struct dentry *proc_mount(struct file_system_type *fs_type, | |||
55 | 117 | ||
56 | if (!sb->s_root) { | 118 | if (!sb->s_root) { |
57 | sb->s_flags = flags; | 119 | sb->s_flags = flags; |
120 | if (!proc_parse_options(options, ns)) { | ||
121 | deactivate_locked_super(sb); | ||
122 | return ERR_PTR(-EINVAL); | ||
123 | } | ||
58 | err = proc_fill_super(sb); | 124 | err = proc_fill_super(sb); |
59 | if (err) { | 125 | if (err) { |
60 | deactivate_locked_super(sb); | 126 | deactivate_locked_super(sb); |
diff --git a/fs/proc/stat.c b/fs/proc/stat.c index d76ca6ae2b1b..121f77cfef76 100644 --- a/fs/proc/stat.c +++ b/fs/proc/stat.c | |||
@@ -77,6 +77,8 @@ static int show_stat(struct seq_file *p, void *v) | |||
77 | steal += kcpustat_cpu(i).cpustat[CPUTIME_STEAL]; | 77 | steal += kcpustat_cpu(i).cpustat[CPUTIME_STEAL]; |
78 | guest += kcpustat_cpu(i).cpustat[CPUTIME_GUEST]; | 78 | guest += kcpustat_cpu(i).cpustat[CPUTIME_GUEST]; |
79 | guest_nice += kcpustat_cpu(i).cpustat[CPUTIME_GUEST_NICE]; | 79 | guest_nice += kcpustat_cpu(i).cpustat[CPUTIME_GUEST_NICE]; |
80 | sum += kstat_cpu_irqs_sum(i); | ||
81 | sum += arch_irq_stat_cpu(i); | ||
80 | 82 | ||
81 | for (j = 0; j < NR_SOFTIRQS; j++) { | 83 | for (j = 0; j < NR_SOFTIRQS; j++) { |
82 | unsigned int softirq_stat = kstat_softirqs_cpu(j, i); | 84 | unsigned int softirq_stat = kstat_softirqs_cpu(j, i); |
diff --git a/fs/proc/task_mmu.c b/fs/proc/task_mmu.c index e418c5abdb0e..7dcd2a250495 100644 --- a/fs/proc/task_mmu.c +++ b/fs/proc/task_mmu.c | |||
@@ -518,6 +518,9 @@ static int clear_refs_pte_range(pmd_t *pmd, unsigned long addr, | |||
518 | if (!page) | 518 | if (!page) |
519 | continue; | 519 | continue; |
520 | 520 | ||
521 | if (PageReserved(page)) | ||
522 | continue; | ||
523 | |||
521 | /* Clear accessed and referenced bits. */ | 524 | /* Clear accessed and referenced bits. */ |
522 | ptep_test_and_clear_young(vma, addr, pte); | 525 | ptep_test_and_clear_young(vma, addr, pte); |
523 | ClearPageReferenced(page); | 526 | ClearPageReferenced(page); |
diff --git a/fs/qnx4/inode.c b/fs/qnx4/inode.c index 2bfd987f4853..6b009548d2e0 100644 --- a/fs/qnx4/inode.c +++ b/fs/qnx4/inode.c | |||
@@ -179,47 +179,33 @@ static const char *qnx4_checkroot(struct super_block *sb) | |||
179 | struct qnx4_inode_entry *rootdir; | 179 | struct qnx4_inode_entry *rootdir; |
180 | int rd, rl; | 180 | int rd, rl; |
181 | int i, j; | 181 | int i, j; |
182 | int found = 0; | ||
183 | 182 | ||
184 | if (*(qnx4_sb(sb)->sb->RootDir.di_fname) != '/') { | 183 | if (*(qnx4_sb(sb)->sb->RootDir.di_fname) != '/') |
185 | return "no qnx4 filesystem (no root dir)."; | 184 | return "no qnx4 filesystem (no root dir)."; |
186 | } else { | 185 | QNX4DEBUG((KERN_NOTICE "QNX4 filesystem found on dev %s.\n", sb->s_id)); |
187 | QNX4DEBUG((KERN_NOTICE "QNX4 filesystem found on dev %s.\n", sb->s_id)); | 186 | rd = le32_to_cpu(qnx4_sb(sb)->sb->RootDir.di_first_xtnt.xtnt_blk) - 1; |
188 | rd = le32_to_cpu(qnx4_sb(sb)->sb->RootDir.di_first_xtnt.xtnt_blk) - 1; | 187 | rl = le32_to_cpu(qnx4_sb(sb)->sb->RootDir.di_first_xtnt.xtnt_size); |
189 | rl = le32_to_cpu(qnx4_sb(sb)->sb->RootDir.di_first_xtnt.xtnt_size); | 188 | for (j = 0; j < rl; j++) { |
190 | for (j = 0; j < rl; j++) { | 189 | bh = sb_bread(sb, rd + j); /* root dir, first block */ |
191 | bh = sb_bread(sb, rd + j); /* root dir, first block */ | 190 | if (bh == NULL) |
192 | if (bh == NULL) { | 191 | return "unable to read root entry."; |
193 | return "unable to read root entry."; | 192 | rootdir = (struct qnx4_inode_entry *) bh->b_data; |
194 | } | 193 | for (i = 0; i < QNX4_INODES_PER_BLOCK; i++, rootdir++) { |
195 | for (i = 0; i < QNX4_INODES_PER_BLOCK; i++) { | 194 | QNX4DEBUG((KERN_INFO "rootdir entry found : [%s]\n", rootdir->di_fname)); |
196 | rootdir = (struct qnx4_inode_entry *) (bh->b_data + i * QNX4_DIR_ENTRY_SIZE); | 195 | if (strcmp(rootdir->di_fname, QNX4_BMNAME) != 0) |
197 | if (rootdir->di_fname != NULL) { | 196 | continue; |
198 | QNX4DEBUG((KERN_INFO "rootdir entry found : [%s]\n", rootdir->di_fname)); | 197 | qnx4_sb(sb)->BitMap = kmemdup(rootdir, |
199 | if (!strcmp(rootdir->di_fname, | 198 | sizeof(struct qnx4_inode_entry), |
200 | QNX4_BMNAME)) { | 199 | GFP_KERNEL); |
201 | found = 1; | ||
202 | qnx4_sb(sb)->BitMap = kmemdup(rootdir, | ||
203 | sizeof(struct qnx4_inode_entry), | ||
204 | GFP_KERNEL); | ||
205 | if (!qnx4_sb(sb)->BitMap) { | ||
206 | brelse (bh); | ||
207 | return "not enough memory for bitmap inode"; | ||
208 | }/* keep bitmap inode known */ | ||
209 | break; | ||
210 | } | ||
211 | } | ||
212 | } | ||
213 | brelse(bh); | 200 | brelse(bh); |
214 | if (found != 0) { | 201 | if (!qnx4_sb(sb)->BitMap) |
215 | break; | 202 | return "not enough memory for bitmap inode"; |
216 | } | 203 | /* keep bitmap inode known */ |
217 | } | 204 | return NULL; |
218 | if (found == 0) { | ||
219 | return "bitmap file not found."; | ||
220 | } | 205 | } |
206 | brelse(bh); | ||
221 | } | 207 | } |
222 | return NULL; | 208 | return "bitmap file not found."; |
223 | } | 209 | } |
224 | 210 | ||
225 | static int qnx4_fill_super(struct super_block *s, void *data, int silent) | 211 | static int qnx4_fill_super(struct super_block *s, void *data, int silent) |
@@ -270,7 +256,7 @@ static int qnx4_fill_super(struct super_block *s, void *data, int silent) | |||
270 | if (IS_ERR(root)) { | 256 | if (IS_ERR(root)) { |
271 | printk(KERN_ERR "qnx4: get inode failed\n"); | 257 | printk(KERN_ERR "qnx4: get inode failed\n"); |
272 | ret = PTR_ERR(root); | 258 | ret = PTR_ERR(root); |
273 | goto out; | 259 | goto outb; |
274 | } | 260 | } |
275 | 261 | ||
276 | ret = -ENOMEM; | 262 | ret = -ENOMEM; |
@@ -283,6 +269,8 @@ static int qnx4_fill_super(struct super_block *s, void *data, int silent) | |||
283 | 269 | ||
284 | outi: | 270 | outi: |
285 | iput(root); | 271 | iput(root); |
272 | outb: | ||
273 | kfree(qs->BitMap); | ||
286 | out: | 274 | out: |
287 | brelse(bh); | 275 | brelse(bh); |
288 | outnobh: | 276 | outnobh: |
diff --git a/fs/quota/dquot.c b/fs/quota/dquot.c index 5ec59b20cf76..46741970371b 100644 --- a/fs/quota/dquot.c +++ b/fs/quota/dquot.c | |||
@@ -2125,6 +2125,8 @@ static int vfs_load_quota_inode(struct inode *inode, int type, int format_id, | |||
2125 | mutex_unlock(&dqopt->dqio_mutex); | 2125 | mutex_unlock(&dqopt->dqio_mutex); |
2126 | goto out_file_init; | 2126 | goto out_file_init; |
2127 | } | 2127 | } |
2128 | if (dqopt->flags & DQUOT_QUOTA_SYS_FILE) | ||
2129 | dqopt->info[type].dqi_flags |= DQF_SYS_FILE; | ||
2128 | mutex_unlock(&dqopt->dqio_mutex); | 2130 | mutex_unlock(&dqopt->dqio_mutex); |
2129 | spin_lock(&dq_state_lock); | 2131 | spin_lock(&dq_state_lock); |
2130 | dqopt->flags |= dquot_state_flag(flags, type); | 2132 | dqopt->flags |= dquot_state_flag(flags, type); |
@@ -2464,7 +2466,7 @@ int dquot_get_dqinfo(struct super_block *sb, int type, struct if_dqinfo *ii) | |||
2464 | spin_lock(&dq_data_lock); | 2466 | spin_lock(&dq_data_lock); |
2465 | ii->dqi_bgrace = mi->dqi_bgrace; | 2467 | ii->dqi_bgrace = mi->dqi_bgrace; |
2466 | ii->dqi_igrace = mi->dqi_igrace; | 2468 | ii->dqi_igrace = mi->dqi_igrace; |
2467 | ii->dqi_flags = mi->dqi_flags & DQF_MASK; | 2469 | ii->dqi_flags = mi->dqi_flags & DQF_GETINFO_MASK; |
2468 | ii->dqi_valid = IIF_ALL; | 2470 | ii->dqi_valid = IIF_ALL; |
2469 | spin_unlock(&dq_data_lock); | 2471 | spin_unlock(&dq_data_lock); |
2470 | mutex_unlock(&sb_dqopt(sb)->dqonoff_mutex); | 2472 | mutex_unlock(&sb_dqopt(sb)->dqonoff_mutex); |
@@ -2490,8 +2492,8 @@ int dquot_set_dqinfo(struct super_block *sb, int type, struct if_dqinfo *ii) | |||
2490 | if (ii->dqi_valid & IIF_IGRACE) | 2492 | if (ii->dqi_valid & IIF_IGRACE) |
2491 | mi->dqi_igrace = ii->dqi_igrace; | 2493 | mi->dqi_igrace = ii->dqi_igrace; |
2492 | if (ii->dqi_valid & IIF_FLAGS) | 2494 | if (ii->dqi_valid & IIF_FLAGS) |
2493 | mi->dqi_flags = (mi->dqi_flags & ~DQF_MASK) | | 2495 | mi->dqi_flags = (mi->dqi_flags & ~DQF_SETINFO_MASK) | |
2494 | (ii->dqi_flags & DQF_MASK); | 2496 | (ii->dqi_flags & DQF_SETINFO_MASK); |
2495 | spin_unlock(&dq_data_lock); | 2497 | spin_unlock(&dq_data_lock); |
2496 | mark_info_dirty(sb, type); | 2498 | mark_info_dirty(sb, type); |
2497 | /* Force write to disk */ | 2499 | /* Force write to disk */ |
diff --git a/fs/reiserfs/bitmap.c b/fs/reiserfs/bitmap.c index a945cd265228..70de42f09f1d 100644 --- a/fs/reiserfs/bitmap.c +++ b/fs/reiserfs/bitmap.c | |||
@@ -1364,10 +1364,7 @@ int reiserfs_init_bitmap_cache(struct super_block *sb) | |||
1364 | struct reiserfs_bitmap_info *bitmap; | 1364 | struct reiserfs_bitmap_info *bitmap; |
1365 | unsigned int bmap_nr = reiserfs_bmap_count(sb); | 1365 | unsigned int bmap_nr = reiserfs_bmap_count(sb); |
1366 | 1366 | ||
1367 | /* Avoid lock recursion in fault case */ | ||
1368 | reiserfs_write_unlock(sb); | ||
1369 | bitmap = vmalloc(sizeof(*bitmap) * bmap_nr); | 1367 | bitmap = vmalloc(sizeof(*bitmap) * bmap_nr); |
1370 | reiserfs_write_lock(sb); | ||
1371 | if (bitmap == NULL) | 1368 | if (bitmap == NULL) |
1372 | return -ENOMEM; | 1369 | return -ENOMEM; |
1373 | 1370 | ||
diff --git a/fs/reiserfs/journal.c b/fs/reiserfs/journal.c index eb711060a6f2..c3cf54fd4de3 100644 --- a/fs/reiserfs/journal.c +++ b/fs/reiserfs/journal.c | |||
@@ -2678,16 +2678,10 @@ int journal_init(struct super_block *sb, const char *j_dev_name, | |||
2678 | char b[BDEVNAME_SIZE]; | 2678 | char b[BDEVNAME_SIZE]; |
2679 | int ret; | 2679 | int ret; |
2680 | 2680 | ||
2681 | /* | ||
2682 | * Unlock here to avoid various RECLAIM-FS-ON <-> IN-RECLAIM-FS | ||
2683 | * dependency inversion warnings. | ||
2684 | */ | ||
2685 | reiserfs_write_unlock(sb); | ||
2686 | journal = SB_JOURNAL(sb) = vzalloc(sizeof(struct reiserfs_journal)); | 2681 | journal = SB_JOURNAL(sb) = vzalloc(sizeof(struct reiserfs_journal)); |
2687 | if (!journal) { | 2682 | if (!journal) { |
2688 | reiserfs_warning(sb, "journal-1256", | 2683 | reiserfs_warning(sb, "journal-1256", |
2689 | "unable to get memory for journal structure"); | 2684 | "unable to get memory for journal structure"); |
2690 | reiserfs_write_lock(sb); | ||
2691 | return 1; | 2685 | return 1; |
2692 | } | 2686 | } |
2693 | INIT_LIST_HEAD(&journal->j_bitmap_nodes); | 2687 | INIT_LIST_HEAD(&journal->j_bitmap_nodes); |
@@ -2695,10 +2689,8 @@ int journal_init(struct super_block *sb, const char *j_dev_name, | |||
2695 | INIT_LIST_HEAD(&journal->j_working_list); | 2689 | INIT_LIST_HEAD(&journal->j_working_list); |
2696 | INIT_LIST_HEAD(&journal->j_journal_list); | 2690 | INIT_LIST_HEAD(&journal->j_journal_list); |
2697 | journal->j_persistent_trans = 0; | 2691 | journal->j_persistent_trans = 0; |
2698 | ret = reiserfs_allocate_list_bitmaps(sb, journal->j_list_bitmap, | 2692 | if (reiserfs_allocate_list_bitmaps(sb, journal->j_list_bitmap, |
2699 | reiserfs_bmap_count(sb)); | 2693 | reiserfs_bmap_count(sb))) |
2700 | reiserfs_write_lock(sb); | ||
2701 | if (ret) | ||
2702 | goto free_and_return; | 2694 | goto free_and_return; |
2703 | 2695 | ||
2704 | allocate_bitmap_nodes(sb); | 2696 | allocate_bitmap_nodes(sb); |
@@ -2727,27 +2719,11 @@ int journal_init(struct super_block *sb, const char *j_dev_name, | |||
2727 | goto free_and_return; | 2719 | goto free_and_return; |
2728 | } | 2720 | } |
2729 | 2721 | ||
2730 | /* | ||
2731 | * We need to unlock here to avoid creating the following | ||
2732 | * dependency: | ||
2733 | * reiserfs_lock -> sysfs_mutex | ||
2734 | * Because the reiserfs mmap path creates the following dependency: | ||
2735 | * mm->mmap -> reiserfs_lock, hence we have | ||
2736 | * mm->mmap -> reiserfs_lock ->sysfs_mutex | ||
2737 | * This would ends up in a circular dependency with sysfs readdir path | ||
2738 | * which does sysfs_mutex -> mm->mmap_sem | ||
2739 | * This is fine because the reiserfs lock is useless in mount path, | ||
2740 | * at least until we call journal_begin. We keep it for paranoid | ||
2741 | * reasons. | ||
2742 | */ | ||
2743 | reiserfs_write_unlock(sb); | ||
2744 | if (journal_init_dev(sb, journal, j_dev_name) != 0) { | 2722 | if (journal_init_dev(sb, journal, j_dev_name) != 0) { |
2745 | reiserfs_write_lock(sb); | ||
2746 | reiserfs_warning(sb, "sh-462", | 2723 | reiserfs_warning(sb, "sh-462", |
2747 | "unable to initialize jornal device"); | 2724 | "unable to initialize jornal device"); |
2748 | goto free_and_return; | 2725 | goto free_and_return; |
2749 | } | 2726 | } |
2750 | reiserfs_write_lock(sb); | ||
2751 | 2727 | ||
2752 | rs = SB_DISK_SUPER_BLOCK(sb); | 2728 | rs = SB_DISK_SUPER_BLOCK(sb); |
2753 | 2729 | ||
@@ -2829,9 +2805,7 @@ int journal_init(struct super_block *sb, const char *j_dev_name, | |||
2829 | journal->j_mount_id = 10; | 2805 | journal->j_mount_id = 10; |
2830 | journal->j_state = 0; | 2806 | journal->j_state = 0; |
2831 | atomic_set(&(journal->j_jlock), 0); | 2807 | atomic_set(&(journal->j_jlock), 0); |
2832 | reiserfs_write_unlock(sb); | ||
2833 | journal->j_cnode_free_list = allocate_cnodes(num_cnodes); | 2808 | journal->j_cnode_free_list = allocate_cnodes(num_cnodes); |
2834 | reiserfs_write_lock(sb); | ||
2835 | journal->j_cnode_free_orig = journal->j_cnode_free_list; | 2809 | journal->j_cnode_free_orig = journal->j_cnode_free_list; |
2836 | journal->j_cnode_free = journal->j_cnode_free_list ? num_cnodes : 0; | 2810 | journal->j_cnode_free = journal->j_cnode_free_list ? num_cnodes : 0; |
2837 | journal->j_cnode_used = 0; | 2811 | journal->j_cnode_used = 0; |
@@ -2848,24 +2822,37 @@ int journal_init(struct super_block *sb, const char *j_dev_name, | |||
2848 | 2822 | ||
2849 | init_journal_hash(sb); | 2823 | init_journal_hash(sb); |
2850 | jl = journal->j_current_jl; | 2824 | jl = journal->j_current_jl; |
2825 | |||
2826 | /* | ||
2827 | * get_list_bitmap() may call flush_commit_list() which | ||
2828 | * requires the lock. Calling flush_commit_list() shouldn't happen | ||
2829 | * this early but I like to be paranoid. | ||
2830 | */ | ||
2831 | reiserfs_write_lock(sb); | ||
2851 | jl->j_list_bitmap = get_list_bitmap(sb, jl); | 2832 | jl->j_list_bitmap = get_list_bitmap(sb, jl); |
2833 | reiserfs_write_unlock(sb); | ||
2852 | if (!jl->j_list_bitmap) { | 2834 | if (!jl->j_list_bitmap) { |
2853 | reiserfs_warning(sb, "journal-2005", | 2835 | reiserfs_warning(sb, "journal-2005", |
2854 | "get_list_bitmap failed for journal list 0"); | 2836 | "get_list_bitmap failed for journal list 0"); |
2855 | goto free_and_return; | 2837 | goto free_and_return; |
2856 | } | 2838 | } |
2857 | if (journal_read(sb) < 0) { | 2839 | |
2840 | /* | ||
2841 | * Journal_read needs to be inspected in order to push down | ||
2842 | * the lock further inside (or even remove it). | ||
2843 | */ | ||
2844 | reiserfs_write_lock(sb); | ||
2845 | ret = journal_read(sb); | ||
2846 | reiserfs_write_unlock(sb); | ||
2847 | if (ret < 0) { | ||
2858 | reiserfs_warning(sb, "reiserfs-2006", | 2848 | reiserfs_warning(sb, "reiserfs-2006", |
2859 | "Replay Failure, unable to mount"); | 2849 | "Replay Failure, unable to mount"); |
2860 | goto free_and_return; | 2850 | goto free_and_return; |
2861 | } | 2851 | } |
2862 | 2852 | ||
2863 | reiserfs_mounted_fs_count++; | 2853 | reiserfs_mounted_fs_count++; |
2864 | if (reiserfs_mounted_fs_count <= 1) { | 2854 | if (reiserfs_mounted_fs_count <= 1) |
2865 | reiserfs_write_unlock(sb); | ||
2866 | commit_wq = alloc_workqueue("reiserfs", WQ_MEM_RECLAIM, 0); | 2855 | commit_wq = alloc_workqueue("reiserfs", WQ_MEM_RECLAIM, 0); |
2867 | reiserfs_write_lock(sb); | ||
2868 | } | ||
2869 | 2856 | ||
2870 | INIT_DELAYED_WORK(&journal->j_work, flush_async_commits); | 2857 | INIT_DELAYED_WORK(&journal->j_work, flush_async_commits); |
2871 | journal->j_work_sb = sb; | 2858 | journal->j_work_sb = sb; |
@@ -2896,14 +2883,13 @@ int journal_transaction_should_end(struct reiserfs_transaction_handle *th, | |||
2896 | journal->j_cnode_free < (journal->j_trans_max * 3)) { | 2883 | journal->j_cnode_free < (journal->j_trans_max * 3)) { |
2897 | return 1; | 2884 | return 1; |
2898 | } | 2885 | } |
2899 | /* protected by the BKL here */ | 2886 | |
2900 | journal->j_len_alloc += new_alloc; | 2887 | journal->j_len_alloc += new_alloc; |
2901 | th->t_blocks_allocated += new_alloc ; | 2888 | th->t_blocks_allocated += new_alloc ; |
2902 | return 0; | 2889 | return 0; |
2903 | } | 2890 | } |
2904 | 2891 | ||
2905 | /* this must be called inside a transaction, and requires the | 2892 | /* this must be called inside a transaction |
2906 | ** kernel_lock to be held | ||
2907 | */ | 2893 | */ |
2908 | void reiserfs_block_writes(struct reiserfs_transaction_handle *th) | 2894 | void reiserfs_block_writes(struct reiserfs_transaction_handle *th) |
2909 | { | 2895 | { |
@@ -2914,8 +2900,7 @@ void reiserfs_block_writes(struct reiserfs_transaction_handle *th) | |||
2914 | return; | 2900 | return; |
2915 | } | 2901 | } |
2916 | 2902 | ||
2917 | /* this must be called without a transaction started, and does not | 2903 | /* this must be called without a transaction started |
2918 | ** require BKL | ||
2919 | */ | 2904 | */ |
2920 | void reiserfs_allow_writes(struct super_block *s) | 2905 | void reiserfs_allow_writes(struct super_block *s) |
2921 | { | 2906 | { |
@@ -2924,8 +2909,7 @@ void reiserfs_allow_writes(struct super_block *s) | |||
2924 | wake_up(&journal->j_join_wait); | 2909 | wake_up(&journal->j_join_wait); |
2925 | } | 2910 | } |
2926 | 2911 | ||
2927 | /* this must be called without a transaction started, and does not | 2912 | /* this must be called without a transaction started |
2928 | ** require BKL | ||
2929 | */ | 2913 | */ |
2930 | void reiserfs_wait_on_write_block(struct super_block *s) | 2914 | void reiserfs_wait_on_write_block(struct super_block *s) |
2931 | { | 2915 | { |
diff --git a/fs/reiserfs/super.c b/fs/reiserfs/super.c index 19c454e61b79..e12d8b97cd4d 100644 --- a/fs/reiserfs/super.c +++ b/fs/reiserfs/super.c | |||
@@ -455,16 +455,20 @@ int remove_save_link(struct inode *inode, int truncate) | |||
455 | static void reiserfs_kill_sb(struct super_block *s) | 455 | static void reiserfs_kill_sb(struct super_block *s) |
456 | { | 456 | { |
457 | if (REISERFS_SB(s)) { | 457 | if (REISERFS_SB(s)) { |
458 | if (REISERFS_SB(s)->xattr_root) { | 458 | /* |
459 | d_invalidate(REISERFS_SB(s)->xattr_root); | 459 | * Force any pending inode evictions to occur now. Any |
460 | dput(REISERFS_SB(s)->xattr_root); | 460 | * inodes to be removed that have extended attributes |
461 | REISERFS_SB(s)->xattr_root = NULL; | 461 | * associated with them need to clean them up before |
462 | } | 462 | * we can release the extended attribute root dentries. |
463 | if (REISERFS_SB(s)->priv_root) { | 463 | * shrink_dcache_for_umount will BUG if we don't release |
464 | d_invalidate(REISERFS_SB(s)->priv_root); | 464 | * those before it's called so ->put_super is too late. |
465 | dput(REISERFS_SB(s)->priv_root); | 465 | */ |
466 | REISERFS_SB(s)->priv_root = NULL; | 466 | shrink_dcache_sb(s); |
467 | } | 467 | |
468 | dput(REISERFS_SB(s)->xattr_root); | ||
469 | REISERFS_SB(s)->xattr_root = NULL; | ||
470 | dput(REISERFS_SB(s)->priv_root); | ||
471 | REISERFS_SB(s)->priv_root = NULL; | ||
468 | } | 472 | } |
469 | 473 | ||
470 | kill_block_super(s); | 474 | kill_block_super(s); |
@@ -1249,7 +1253,8 @@ static void handle_quota_files(struct super_block *s, char **qf_names, | |||
1249 | kfree(REISERFS_SB(s)->s_qf_names[i]); | 1253 | kfree(REISERFS_SB(s)->s_qf_names[i]); |
1250 | REISERFS_SB(s)->s_qf_names[i] = qf_names[i]; | 1254 | REISERFS_SB(s)->s_qf_names[i] = qf_names[i]; |
1251 | } | 1255 | } |
1252 | REISERFS_SB(s)->s_jquota_fmt = *qfmt; | 1256 | if (*qfmt) |
1257 | REISERFS_SB(s)->s_jquota_fmt = *qfmt; | ||
1253 | } | 1258 | } |
1254 | #endif | 1259 | #endif |
1255 | 1260 | ||
@@ -1514,9 +1519,7 @@ static int read_super_block(struct super_block *s, int offset) | |||
1514 | static int reread_meta_blocks(struct super_block *s) | 1519 | static int reread_meta_blocks(struct super_block *s) |
1515 | { | 1520 | { |
1516 | ll_rw_block(READ, 1, &(SB_BUFFER_WITH_SB(s))); | 1521 | ll_rw_block(READ, 1, &(SB_BUFFER_WITH_SB(s))); |
1517 | reiserfs_write_unlock(s); | ||
1518 | wait_on_buffer(SB_BUFFER_WITH_SB(s)); | 1522 | wait_on_buffer(SB_BUFFER_WITH_SB(s)); |
1519 | reiserfs_write_lock(s); | ||
1520 | if (!buffer_uptodate(SB_BUFFER_WITH_SB(s))) { | 1523 | if (!buffer_uptodate(SB_BUFFER_WITH_SB(s))) { |
1521 | reiserfs_warning(s, "reiserfs-2504", "error reading the super"); | 1524 | reiserfs_warning(s, "reiserfs-2504", "error reading the super"); |
1522 | return 1; | 1525 | return 1; |
@@ -1741,22 +1744,11 @@ static int reiserfs_fill_super(struct super_block *s, void *data, int silent) | |||
1741 | mutex_init(&REISERFS_SB(s)->lock); | 1744 | mutex_init(&REISERFS_SB(s)->lock); |
1742 | REISERFS_SB(s)->lock_depth = -1; | 1745 | REISERFS_SB(s)->lock_depth = -1; |
1743 | 1746 | ||
1744 | /* | ||
1745 | * This function is called with the bkl, which also was the old | ||
1746 | * locking used here. | ||
1747 | * do_journal_begin() will soon check if we hold the lock (ie: was the | ||
1748 | * bkl). This is likely because do_journal_begin() has several another | ||
1749 | * callers because at this time, it doesn't seem to be necessary to | ||
1750 | * protect against anything. | ||
1751 | * Anyway, let's be conservative and lock for now. | ||
1752 | */ | ||
1753 | reiserfs_write_lock(s); | ||
1754 | |||
1755 | jdev_name = NULL; | 1747 | jdev_name = NULL; |
1756 | if (reiserfs_parse_options | 1748 | if (reiserfs_parse_options |
1757 | (s, (char *)data, &(sbi->s_mount_opt), &blocks, &jdev_name, | 1749 | (s, (char *)data, &(sbi->s_mount_opt), &blocks, &jdev_name, |
1758 | &commit_max_age, qf_names, &qfmt) == 0) { | 1750 | &commit_max_age, qf_names, &qfmt) == 0) { |
1759 | goto error; | 1751 | goto error_unlocked; |
1760 | } | 1752 | } |
1761 | if (jdev_name && jdev_name[0]) { | 1753 | if (jdev_name && jdev_name[0]) { |
1762 | REISERFS_SB(s)->s_jdev = kstrdup(jdev_name, GFP_KERNEL); | 1754 | REISERFS_SB(s)->s_jdev = kstrdup(jdev_name, GFP_KERNEL); |
@@ -1772,7 +1764,7 @@ static int reiserfs_fill_super(struct super_block *s, void *data, int silent) | |||
1772 | 1764 | ||
1773 | if (blocks) { | 1765 | if (blocks) { |
1774 | SWARN(silent, s, "jmacd-7", "resize option for remount only"); | 1766 | SWARN(silent, s, "jmacd-7", "resize option for remount only"); |
1775 | goto error; | 1767 | goto error_unlocked; |
1776 | } | 1768 | } |
1777 | 1769 | ||
1778 | /* try old format (undistributed bitmap, super block in 8-th 1k block of a device) */ | 1770 | /* try old format (undistributed bitmap, super block in 8-th 1k block of a device) */ |
@@ -1782,7 +1774,7 @@ static int reiserfs_fill_super(struct super_block *s, void *data, int silent) | |||
1782 | else if (read_super_block(s, REISERFS_DISK_OFFSET_IN_BYTES)) { | 1774 | else if (read_super_block(s, REISERFS_DISK_OFFSET_IN_BYTES)) { |
1783 | SWARN(silent, s, "sh-2021", "can not find reiserfs on %s", | 1775 | SWARN(silent, s, "sh-2021", "can not find reiserfs on %s", |
1784 | reiserfs_bdevname(s)); | 1776 | reiserfs_bdevname(s)); |
1785 | goto error; | 1777 | goto error_unlocked; |
1786 | } | 1778 | } |
1787 | 1779 | ||
1788 | rs = SB_DISK_SUPER_BLOCK(s); | 1780 | rs = SB_DISK_SUPER_BLOCK(s); |
@@ -1798,7 +1790,7 @@ static int reiserfs_fill_super(struct super_block *s, void *data, int silent) | |||
1798 | "or increase size of your LVM partition"); | 1790 | "or increase size of your LVM partition"); |
1799 | SWARN(silent, s, "", "Or may be you forgot to " | 1791 | SWARN(silent, s, "", "Or may be you forgot to " |
1800 | "reboot after fdisk when it told you to"); | 1792 | "reboot after fdisk when it told you to"); |
1801 | goto error; | 1793 | goto error_unlocked; |
1802 | } | 1794 | } |
1803 | 1795 | ||
1804 | sbi->s_mount_state = SB_REISERFS_STATE(s); | 1796 | sbi->s_mount_state = SB_REISERFS_STATE(s); |
@@ -1806,8 +1798,9 @@ static int reiserfs_fill_super(struct super_block *s, void *data, int silent) | |||
1806 | 1798 | ||
1807 | if ((errval = reiserfs_init_bitmap_cache(s))) { | 1799 | if ((errval = reiserfs_init_bitmap_cache(s))) { |
1808 | SWARN(silent, s, "jmacd-8", "unable to read bitmap"); | 1800 | SWARN(silent, s, "jmacd-8", "unable to read bitmap"); |
1809 | goto error; | 1801 | goto error_unlocked; |
1810 | } | 1802 | } |
1803 | |||
1811 | errval = -EINVAL; | 1804 | errval = -EINVAL; |
1812 | #ifdef CONFIG_REISERFS_CHECK | 1805 | #ifdef CONFIG_REISERFS_CHECK |
1813 | SWARN(silent, s, "", "CONFIG_REISERFS_CHECK is set ON"); | 1806 | SWARN(silent, s, "", "CONFIG_REISERFS_CHECK is set ON"); |
@@ -1830,24 +1823,26 @@ static int reiserfs_fill_super(struct super_block *s, void *data, int silent) | |||
1830 | if (reiserfs_barrier_flush(s)) { | 1823 | if (reiserfs_barrier_flush(s)) { |
1831 | printk("reiserfs: using flush barriers\n"); | 1824 | printk("reiserfs: using flush barriers\n"); |
1832 | } | 1825 | } |
1826 | |||
1833 | // set_device_ro(s->s_dev, 1) ; | 1827 | // set_device_ro(s->s_dev, 1) ; |
1834 | if (journal_init(s, jdev_name, old_format, commit_max_age)) { | 1828 | if (journal_init(s, jdev_name, old_format, commit_max_age)) { |
1835 | SWARN(silent, s, "sh-2022", | 1829 | SWARN(silent, s, "sh-2022", |
1836 | "unable to initialize journal space"); | 1830 | "unable to initialize journal space"); |
1837 | goto error; | 1831 | goto error_unlocked; |
1838 | } else { | 1832 | } else { |
1839 | jinit_done = 1; /* once this is set, journal_release must be called | 1833 | jinit_done = 1; /* once this is set, journal_release must be called |
1840 | ** if we error out of the mount | 1834 | ** if we error out of the mount |
1841 | */ | 1835 | */ |
1842 | } | 1836 | } |
1837 | |||
1843 | if (reread_meta_blocks(s)) { | 1838 | if (reread_meta_blocks(s)) { |
1844 | SWARN(silent, s, "jmacd-9", | 1839 | SWARN(silent, s, "jmacd-9", |
1845 | "unable to reread meta blocks after journal init"); | 1840 | "unable to reread meta blocks after journal init"); |
1846 | goto error; | 1841 | goto error_unlocked; |
1847 | } | 1842 | } |
1848 | 1843 | ||
1849 | if (replay_only(s)) | 1844 | if (replay_only(s)) |
1850 | goto error; | 1845 | goto error_unlocked; |
1851 | 1846 | ||
1852 | if (bdev_read_only(s->s_bdev) && !(s->s_flags & MS_RDONLY)) { | 1847 | if (bdev_read_only(s->s_bdev) && !(s->s_flags & MS_RDONLY)) { |
1853 | SWARN(silent, s, "clm-7000", | 1848 | SWARN(silent, s, "clm-7000", |
@@ -1861,9 +1856,19 @@ static int reiserfs_fill_super(struct super_block *s, void *data, int silent) | |||
1861 | reiserfs_init_locked_inode, (void *)(&args)); | 1856 | reiserfs_init_locked_inode, (void *)(&args)); |
1862 | if (!root_inode) { | 1857 | if (!root_inode) { |
1863 | SWARN(silent, s, "jmacd-10", "get root inode failed"); | 1858 | SWARN(silent, s, "jmacd-10", "get root inode failed"); |
1864 | goto error; | 1859 | goto error_unlocked; |
1865 | } | 1860 | } |
1866 | 1861 | ||
1862 | /* | ||
1863 | * This path assumed to be called with the BKL in the old times. | ||
1864 | * Now we have inherited the big reiserfs lock from it and many | ||
1865 | * reiserfs helpers called in the mount path and elsewhere require | ||
1866 | * this lock to be held even if it's not always necessary. Let's be | ||
1867 | * conservative and hold it early. The window can be reduced after | ||
1868 | * careful review of the code. | ||
1869 | */ | ||
1870 | reiserfs_write_lock(s); | ||
1871 | |||
1867 | if (root_inode->i_state & I_NEW) { | 1872 | if (root_inode->i_state & I_NEW) { |
1868 | reiserfs_read_locked_inode(root_inode, &args); | 1873 | reiserfs_read_locked_inode(root_inode, &args); |
1869 | unlock_new_inode(root_inode); | 1874 | unlock_new_inode(root_inode); |
@@ -1990,12 +1995,16 @@ static int reiserfs_fill_super(struct super_block *s, void *data, int silent) | |||
1990 | return (0); | 1995 | return (0); |
1991 | 1996 | ||
1992 | error: | 1997 | error: |
1993 | if (jinit_done) { /* kill the commit thread, free journal ram */ | 1998 | reiserfs_write_unlock(s); |
1999 | |||
2000 | error_unlocked: | ||
2001 | /* kill the commit thread, free journal ram */ | ||
2002 | if (jinit_done) { | ||
2003 | reiserfs_write_lock(s); | ||
1994 | journal_release_error(NULL, s); | 2004 | journal_release_error(NULL, s); |
2005 | reiserfs_write_unlock(s); | ||
1995 | } | 2006 | } |
1996 | 2007 | ||
1997 | reiserfs_write_unlock(s); | ||
1998 | |||
1999 | reiserfs_free_bitmap_cache(s); | 2008 | reiserfs_free_bitmap_cache(s); |
2000 | if (SB_BUFFER_WITH_SB(s)) | 2009 | if (SB_BUFFER_WITH_SB(s)) |
2001 | brelse(SB_BUFFER_WITH_SB(s)); | 2010 | brelse(SB_BUFFER_WITH_SB(s)); |
diff --git a/fs/romfs/mmap-nommu.c b/fs/romfs/mmap-nommu.c index eed99428f104..e1a7779dd3cb 100644 --- a/fs/romfs/mmap-nommu.c +++ b/fs/romfs/mmap-nommu.c | |||
@@ -28,9 +28,10 @@ static unsigned long romfs_get_unmapped_area(struct file *file, | |||
28 | struct inode *inode = file->f_mapping->host; | 28 | struct inode *inode = file->f_mapping->host; |
29 | struct mtd_info *mtd = inode->i_sb->s_mtd; | 29 | struct mtd_info *mtd = inode->i_sb->s_mtd; |
30 | unsigned long isize, offset, maxpages, lpages; | 30 | unsigned long isize, offset, maxpages, lpages; |
31 | int ret; | ||
31 | 32 | ||
32 | if (!mtd) | 33 | if (!mtd) |
33 | goto cant_map_directly; | 34 | return (unsigned long) -ENOSYS; |
34 | 35 | ||
35 | /* the mapping mustn't extend beyond the EOF */ | 36 | /* the mapping mustn't extend beyond the EOF */ |
36 | lpages = (len + PAGE_SIZE - 1) >> PAGE_SHIFT; | 37 | lpages = (len + PAGE_SIZE - 1) >> PAGE_SHIFT; |
@@ -41,23 +42,20 @@ static unsigned long romfs_get_unmapped_area(struct file *file, | |||
41 | if ((pgoff >= maxpages) || (maxpages - pgoff < lpages)) | 42 | if ((pgoff >= maxpages) || (maxpages - pgoff < lpages)) |
42 | return (unsigned long) -EINVAL; | 43 | return (unsigned long) -EINVAL; |
43 | 44 | ||
44 | /* we need to call down to the MTD layer to do the actual mapping */ | 45 | if (addr != 0) |
45 | if (mtd->get_unmapped_area) { | 46 | return (unsigned long) -EINVAL; |
46 | if (addr != 0) | ||
47 | return (unsigned long) -EINVAL; | ||
48 | |||
49 | if (len > mtd->size || pgoff >= (mtd->size >> PAGE_SHIFT)) | ||
50 | return (unsigned long) -EINVAL; | ||
51 | 47 | ||
52 | offset += ROMFS_I(inode)->i_dataoffset; | 48 | if (len > mtd->size || pgoff >= (mtd->size >> PAGE_SHIFT)) |
53 | if (offset > mtd->size - len) | 49 | return (unsigned long) -EINVAL; |
54 | return (unsigned long) -EINVAL; | ||
55 | 50 | ||
56 | return mtd->get_unmapped_area(mtd, len, offset, flags); | 51 | offset += ROMFS_I(inode)->i_dataoffset; |
57 | } | 52 | if (offset > mtd->size - len) |
53 | return (unsigned long) -EINVAL; | ||
58 | 54 | ||
59 | cant_map_directly: | 55 | ret = mtd_get_unmapped_area(mtd, len, offset, flags); |
60 | return (unsigned long) -ENOSYS; | 56 | if (ret == -EOPNOTSUPP) |
57 | ret = -ENOSYS; | ||
58 | return (unsigned long) ret; | ||
61 | } | 59 | } |
62 | 60 | ||
63 | /* | 61 | /* |
diff --git a/fs/squashfs/cache.c b/fs/squashfs/cache.c index f744be98cd5a..af0b73802592 100644 --- a/fs/squashfs/cache.c +++ b/fs/squashfs/cache.c | |||
@@ -70,11 +70,15 @@ struct squashfs_cache_entry *squashfs_cache_get(struct super_block *sb, | |||
70 | spin_lock(&cache->lock); | 70 | spin_lock(&cache->lock); |
71 | 71 | ||
72 | while (1) { | 72 | while (1) { |
73 | for (i = 0; i < cache->entries; i++) | 73 | for (i = cache->curr_blk, n = 0; n < cache->entries; n++) { |
74 | if (cache->entry[i].block == block) | 74 | if (cache->entry[i].block == block) { |
75 | cache->curr_blk = i; | ||
75 | break; | 76 | break; |
77 | } | ||
78 | i = (i + 1) % cache->entries; | ||
79 | } | ||
76 | 80 | ||
77 | if (i == cache->entries) { | 81 | if (n == cache->entries) { |
78 | /* | 82 | /* |
79 | * Block not in cache, if all cache entries are used | 83 | * Block not in cache, if all cache entries are used |
80 | * go to sleep waiting for one to become available. | 84 | * go to sleep waiting for one to become available. |
@@ -245,6 +249,7 @@ struct squashfs_cache *squashfs_cache_init(char *name, int entries, | |||
245 | goto cleanup; | 249 | goto cleanup; |
246 | } | 250 | } |
247 | 251 | ||
252 | cache->curr_blk = 0; | ||
248 | cache->next_blk = 0; | 253 | cache->next_blk = 0; |
249 | cache->unused = entries; | 254 | cache->unused = entries; |
250 | cache->entries = entries; | 255 | cache->entries = entries; |
@@ -332,17 +337,20 @@ int squashfs_read_metadata(struct super_block *sb, void *buffer, | |||
332 | u64 *block, int *offset, int length) | 337 | u64 *block, int *offset, int length) |
333 | { | 338 | { |
334 | struct squashfs_sb_info *msblk = sb->s_fs_info; | 339 | struct squashfs_sb_info *msblk = sb->s_fs_info; |
335 | int bytes, copied = length; | 340 | int bytes, res = length; |
336 | struct squashfs_cache_entry *entry; | 341 | struct squashfs_cache_entry *entry; |
337 | 342 | ||
338 | TRACE("Entered squashfs_read_metadata [%llx:%x]\n", *block, *offset); | 343 | TRACE("Entered squashfs_read_metadata [%llx:%x]\n", *block, *offset); |
339 | 344 | ||
340 | while (length) { | 345 | while (length) { |
341 | entry = squashfs_cache_get(sb, msblk->block_cache, *block, 0); | 346 | entry = squashfs_cache_get(sb, msblk->block_cache, *block, 0); |
342 | if (entry->error) | 347 | if (entry->error) { |
343 | return entry->error; | 348 | res = entry->error; |
344 | else if (*offset >= entry->length) | 349 | goto error; |
345 | return -EIO; | 350 | } else if (*offset >= entry->length) { |
351 | res = -EIO; | ||
352 | goto error; | ||
353 | } | ||
346 | 354 | ||
347 | bytes = squashfs_copy_data(buffer, entry, *offset, length); | 355 | bytes = squashfs_copy_data(buffer, entry, *offset, length); |
348 | if (buffer) | 356 | if (buffer) |
@@ -358,7 +366,11 @@ int squashfs_read_metadata(struct super_block *sb, void *buffer, | |||
358 | squashfs_cache_put(entry); | 366 | squashfs_cache_put(entry); |
359 | } | 367 | } |
360 | 368 | ||
361 | return copied; | 369 | return res; |
370 | |||
371 | error: | ||
372 | squashfs_cache_put(entry); | ||
373 | return res; | ||
362 | } | 374 | } |
363 | 375 | ||
364 | 376 | ||
diff --git a/fs/squashfs/inode.c b/fs/squashfs/inode.c index fd7b3b3bda13..81afbccfa843 100644 --- a/fs/squashfs/inode.c +++ b/fs/squashfs/inode.c | |||
@@ -208,8 +208,8 @@ int squashfs_read_inode(struct inode *inode, long long ino) | |||
208 | inode->i_op = &squashfs_inode_ops; | 208 | inode->i_op = &squashfs_inode_ops; |
209 | inode->i_fop = &generic_ro_fops; | 209 | inode->i_fop = &generic_ro_fops; |
210 | inode->i_mode |= S_IFREG; | 210 | inode->i_mode |= S_IFREG; |
211 | inode->i_blocks = ((inode->i_size - | 211 | inode->i_blocks = (inode->i_size - |
212 | le64_to_cpu(sqsh_ino->sparse) - 1) >> 9) + 1; | 212 | le64_to_cpu(sqsh_ino->sparse) + 511) >> 9; |
213 | 213 | ||
214 | squashfs_i(inode)->fragment_block = frag_blk; | 214 | squashfs_i(inode)->fragment_block = frag_blk; |
215 | squashfs_i(inode)->fragment_size = frag_size; | 215 | squashfs_i(inode)->fragment_size = frag_size; |
diff --git a/fs/squashfs/squashfs_fs_sb.h b/fs/squashfs/squashfs_fs_sb.h index 651f0b31d296..52934a22f296 100644 --- a/fs/squashfs/squashfs_fs_sb.h +++ b/fs/squashfs/squashfs_fs_sb.h | |||
@@ -28,6 +28,7 @@ | |||
28 | struct squashfs_cache { | 28 | struct squashfs_cache { |
29 | char *name; | 29 | char *name; |
30 | int entries; | 30 | int entries; |
31 | int curr_blk; | ||
31 | int next_blk; | 32 | int next_blk; |
32 | int num_waiters; | 33 | int num_waiters; |
33 | int unused; | 34 | int unused; |
diff --git a/fs/squashfs/super.c b/fs/squashfs/super.c index d0858c2d9a47..ecaa2f7bdb8f 100644 --- a/fs/squashfs/super.c +++ b/fs/squashfs/super.c | |||
@@ -290,7 +290,7 @@ handle_fragments: | |||
290 | 290 | ||
291 | check_directory_table: | 291 | check_directory_table: |
292 | /* Sanity check directory_table */ | 292 | /* Sanity check directory_table */ |
293 | if (msblk->directory_table >= next_table) { | 293 | if (msblk->directory_table > next_table) { |
294 | err = -EINVAL; | 294 | err = -EINVAL; |
295 | goto failed_mount; | 295 | goto failed_mount; |
296 | } | 296 | } |
diff --git a/fs/super.c b/fs/super.c index de41e1e46f09..6015c02296b7 100644 --- a/fs/super.c +++ b/fs/super.c | |||
@@ -1186,6 +1186,8 @@ int freeze_super(struct super_block *sb) | |||
1186 | printk(KERN_ERR | 1186 | printk(KERN_ERR |
1187 | "VFS:Filesystem freeze failed\n"); | 1187 | "VFS:Filesystem freeze failed\n"); |
1188 | sb->s_frozen = SB_UNFROZEN; | 1188 | sb->s_frozen = SB_UNFROZEN; |
1189 | smp_wmb(); | ||
1190 | wake_up(&sb->s_wait_unfrozen); | ||
1189 | deactivate_locked_super(sb); | 1191 | deactivate_locked_super(sb); |
1190 | return ret; | 1192 | return ret; |
1191 | } | 1193 | } |
diff --git a/fs/sysfs/file.c b/fs/sysfs/file.c index 62f4fb37789e..00012e31829d 100644 --- a/fs/sysfs/file.c +++ b/fs/sysfs/file.c | |||
@@ -493,6 +493,12 @@ int sysfs_attr_ns(struct kobject *kobj, const struct attribute *attr, | |||
493 | const void *ns = NULL; | 493 | const void *ns = NULL; |
494 | int err; | 494 | int err; |
495 | 495 | ||
496 | if (!dir_sd) { | ||
497 | WARN(1, KERN_ERR "sysfs: kobject %s without dirent\n", | ||
498 | kobject_name(kobj)); | ||
499 | return -ENOENT; | ||
500 | } | ||
501 | |||
496 | err = 0; | 502 | err = 0; |
497 | if (!sysfs_ns_type(dir_sd)) | 503 | if (!sysfs_ns_type(dir_sd)) |
498 | goto out; | 504 | goto out; |
diff --git a/fs/sysfs/inode.c b/fs/sysfs/inode.c index 4a802b4a9056..85eb81683a29 100644 --- a/fs/sysfs/inode.c +++ b/fs/sysfs/inode.c | |||
@@ -318,8 +318,11 @@ int sysfs_hash_and_remove(struct sysfs_dirent *dir_sd, const void *ns, const cha | |||
318 | struct sysfs_addrm_cxt acxt; | 318 | struct sysfs_addrm_cxt acxt; |
319 | struct sysfs_dirent *sd; | 319 | struct sysfs_dirent *sd; |
320 | 320 | ||
321 | if (!dir_sd) | 321 | if (!dir_sd) { |
322 | WARN(1, KERN_WARNING "sysfs: can not remove '%s', no directory\n", | ||
323 | name); | ||
322 | return -ENOENT; | 324 | return -ENOENT; |
325 | } | ||
323 | 326 | ||
324 | sysfs_addrm_start(&acxt, dir_sd); | 327 | sysfs_addrm_start(&acxt, dir_sd); |
325 | 328 | ||
diff --git a/fs/ubifs/debug.c b/fs/ubifs/debug.c index b09ba2dd8b62..f922cbacdb96 100644 --- a/fs/ubifs/debug.c +++ b/fs/ubifs/debug.c | |||
@@ -38,9 +38,6 @@ | |||
38 | 38 | ||
39 | DEFINE_SPINLOCK(dbg_lock); | 39 | DEFINE_SPINLOCK(dbg_lock); |
40 | 40 | ||
41 | static char dbg_key_buf0[128]; | ||
42 | static char dbg_key_buf1[128]; | ||
43 | |||
44 | static const char *get_key_fmt(int fmt) | 41 | static const char *get_key_fmt(int fmt) |
45 | { | 42 | { |
46 | switch (fmt) { | 43 | switch (fmt) { |
@@ -103,8 +100,8 @@ static const char *get_dent_type(int type) | |||
103 | } | 100 | } |
104 | } | 101 | } |
105 | 102 | ||
106 | static void sprintf_key(const struct ubifs_info *c, const union ubifs_key *key, | 103 | const char *dbg_snprintf_key(const struct ubifs_info *c, |
107 | char *buffer) | 104 | const union ubifs_key *key, char *buffer, int len) |
108 | { | 105 | { |
109 | char *p = buffer; | 106 | char *p = buffer; |
110 | int type = key_type(c, key); | 107 | int type = key_type(c, key); |
@@ -112,45 +109,34 @@ static void sprintf_key(const struct ubifs_info *c, const union ubifs_key *key, | |||
112 | if (c->key_fmt == UBIFS_SIMPLE_KEY_FMT) { | 109 | if (c->key_fmt == UBIFS_SIMPLE_KEY_FMT) { |
113 | switch (type) { | 110 | switch (type) { |
114 | case UBIFS_INO_KEY: | 111 | case UBIFS_INO_KEY: |
115 | sprintf(p, "(%lu, %s)", (unsigned long)key_inum(c, key), | 112 | len -= snprintf(p, len, "(%lu, %s)", |
116 | get_key_type(type)); | 113 | (unsigned long)key_inum(c, key), |
114 | get_key_type(type)); | ||
117 | break; | 115 | break; |
118 | case UBIFS_DENT_KEY: | 116 | case UBIFS_DENT_KEY: |
119 | case UBIFS_XENT_KEY: | 117 | case UBIFS_XENT_KEY: |
120 | sprintf(p, "(%lu, %s, %#08x)", | 118 | len -= snprintf(p, len, "(%lu, %s, %#08x)", |
121 | (unsigned long)key_inum(c, key), | 119 | (unsigned long)key_inum(c, key), |
122 | get_key_type(type), key_hash(c, key)); | 120 | get_key_type(type), key_hash(c, key)); |
123 | break; | 121 | break; |
124 | case UBIFS_DATA_KEY: | 122 | case UBIFS_DATA_KEY: |
125 | sprintf(p, "(%lu, %s, %u)", | 123 | len -= snprintf(p, len, "(%lu, %s, %u)", |
126 | (unsigned long)key_inum(c, key), | 124 | (unsigned long)key_inum(c, key), |
127 | get_key_type(type), key_block(c, key)); | 125 | get_key_type(type), key_block(c, key)); |
128 | break; | 126 | break; |
129 | case UBIFS_TRUN_KEY: | 127 | case UBIFS_TRUN_KEY: |
130 | sprintf(p, "(%lu, %s)", | 128 | len -= snprintf(p, len, "(%lu, %s)", |
131 | (unsigned long)key_inum(c, key), | 129 | (unsigned long)key_inum(c, key), |
132 | get_key_type(type)); | 130 | get_key_type(type)); |
133 | break; | 131 | break; |
134 | default: | 132 | default: |
135 | sprintf(p, "(bad key type: %#08x, %#08x)", | 133 | len -= snprintf(p, len, "(bad key type: %#08x, %#08x)", |
136 | key->u32[0], key->u32[1]); | 134 | key->u32[0], key->u32[1]); |
137 | } | 135 | } |
138 | } else | 136 | } else |
139 | sprintf(p, "bad key format %d", c->key_fmt); | 137 | len -= snprintf(p, len, "bad key format %d", c->key_fmt); |
140 | } | 138 | ubifs_assert(len > 0); |
141 | 139 | return p; | |
142 | const char *dbg_key_str0(const struct ubifs_info *c, const union ubifs_key *key) | ||
143 | { | ||
144 | /* dbg_lock must be held */ | ||
145 | sprintf_key(c, key, dbg_key_buf0); | ||
146 | return dbg_key_buf0; | ||
147 | } | ||
148 | |||
149 | const char *dbg_key_str1(const struct ubifs_info *c, const union ubifs_key *key) | ||
150 | { | ||
151 | /* dbg_lock must be held */ | ||
152 | sprintf_key(c, key, dbg_key_buf1); | ||
153 | return dbg_key_buf1; | ||
154 | } | 140 | } |
155 | 141 | ||
156 | const char *dbg_ntype(int type) | 142 | const char *dbg_ntype(int type) |
@@ -319,6 +305,7 @@ void dbg_dump_node(const struct ubifs_info *c, const void *node) | |||
319 | int i, n; | 305 | int i, n; |
320 | union ubifs_key key; | 306 | union ubifs_key key; |
321 | const struct ubifs_ch *ch = node; | 307 | const struct ubifs_ch *ch = node; |
308 | char key_buf[DBG_KEY_BUF_LEN]; | ||
322 | 309 | ||
323 | if (dbg_is_tst_rcvry(c)) | 310 | if (dbg_is_tst_rcvry(c)) |
324 | return; | 311 | return; |
@@ -474,7 +461,8 @@ void dbg_dump_node(const struct ubifs_info *c, const void *node) | |||
474 | const struct ubifs_ino_node *ino = node; | 461 | const struct ubifs_ino_node *ino = node; |
475 | 462 | ||
476 | key_read(c, &ino->key, &key); | 463 | key_read(c, &ino->key, &key); |
477 | printk(KERN_DEBUG "\tkey %s\n", DBGKEY(&key)); | 464 | printk(KERN_DEBUG "\tkey %s\n", |
465 | dbg_snprintf_key(c, &key, key_buf, DBG_KEY_BUF_LEN)); | ||
478 | printk(KERN_DEBUG "\tcreat_sqnum %llu\n", | 466 | printk(KERN_DEBUG "\tcreat_sqnum %llu\n", |
479 | (unsigned long long)le64_to_cpu(ino->creat_sqnum)); | 467 | (unsigned long long)le64_to_cpu(ino->creat_sqnum)); |
480 | printk(KERN_DEBUG "\tsize %llu\n", | 468 | printk(KERN_DEBUG "\tsize %llu\n", |
@@ -517,7 +505,8 @@ void dbg_dump_node(const struct ubifs_info *c, const void *node) | |||
517 | int nlen = le16_to_cpu(dent->nlen); | 505 | int nlen = le16_to_cpu(dent->nlen); |
518 | 506 | ||
519 | key_read(c, &dent->key, &key); | 507 | key_read(c, &dent->key, &key); |
520 | printk(KERN_DEBUG "\tkey %s\n", DBGKEY(&key)); | 508 | printk(KERN_DEBUG "\tkey %s\n", |
509 | dbg_snprintf_key(c, &key, key_buf, DBG_KEY_BUF_LEN)); | ||
521 | printk(KERN_DEBUG "\tinum %llu\n", | 510 | printk(KERN_DEBUG "\tinum %llu\n", |
522 | (unsigned long long)le64_to_cpu(dent->inum)); | 511 | (unsigned long long)le64_to_cpu(dent->inum)); |
523 | printk(KERN_DEBUG "\ttype %d\n", (int)dent->type); | 512 | printk(KERN_DEBUG "\ttype %d\n", (int)dent->type); |
@@ -541,7 +530,8 @@ void dbg_dump_node(const struct ubifs_info *c, const void *node) | |||
541 | int dlen = le32_to_cpu(ch->len) - UBIFS_DATA_NODE_SZ; | 530 | int dlen = le32_to_cpu(ch->len) - UBIFS_DATA_NODE_SZ; |
542 | 531 | ||
543 | key_read(c, &dn->key, &key); | 532 | key_read(c, &dn->key, &key); |
544 | printk(KERN_DEBUG "\tkey %s\n", DBGKEY(&key)); | 533 | printk(KERN_DEBUG "\tkey %s\n", |
534 | dbg_snprintf_key(c, &key, key_buf, DBG_KEY_BUF_LEN)); | ||
545 | printk(KERN_DEBUG "\tsize %u\n", | 535 | printk(KERN_DEBUG "\tsize %u\n", |
546 | le32_to_cpu(dn->size)); | 536 | le32_to_cpu(dn->size)); |
547 | printk(KERN_DEBUG "\tcompr_typ %d\n", | 537 | printk(KERN_DEBUG "\tcompr_typ %d\n", |
@@ -582,7 +572,9 @@ void dbg_dump_node(const struct ubifs_info *c, const void *node) | |||
582 | key_read(c, &br->key, &key); | 572 | key_read(c, &br->key, &key); |
583 | printk(KERN_DEBUG "\t%d: LEB %d:%d len %d key %s\n", | 573 | printk(KERN_DEBUG "\t%d: LEB %d:%d len %d key %s\n", |
584 | i, le32_to_cpu(br->lnum), le32_to_cpu(br->offs), | 574 | i, le32_to_cpu(br->lnum), le32_to_cpu(br->offs), |
585 | le32_to_cpu(br->len), DBGKEY(&key)); | 575 | le32_to_cpu(br->len), |
576 | dbg_snprintf_key(c, &key, key_buf, | ||
577 | DBG_KEY_BUF_LEN)); | ||
586 | } | 578 | } |
587 | break; | 579 | break; |
588 | } | 580 | } |
@@ -934,6 +926,7 @@ void dbg_dump_znode(const struct ubifs_info *c, | |||
934 | { | 926 | { |
935 | int n; | 927 | int n; |
936 | const struct ubifs_zbranch *zbr; | 928 | const struct ubifs_zbranch *zbr; |
929 | char key_buf[DBG_KEY_BUF_LEN]; | ||
937 | 930 | ||
938 | spin_lock(&dbg_lock); | 931 | spin_lock(&dbg_lock); |
939 | if (znode->parent) | 932 | if (znode->parent) |
@@ -958,12 +951,16 @@ void dbg_dump_znode(const struct ubifs_info *c, | |||
958 | printk(KERN_DEBUG "\t%d: znode %p LEB %d:%d len %d key " | 951 | printk(KERN_DEBUG "\t%d: znode %p LEB %d:%d len %d key " |
959 | "%s\n", n, zbr->znode, zbr->lnum, | 952 | "%s\n", n, zbr->znode, zbr->lnum, |
960 | zbr->offs, zbr->len, | 953 | zbr->offs, zbr->len, |
961 | DBGKEY(&zbr->key)); | 954 | dbg_snprintf_key(c, &zbr->key, |
955 | key_buf, | ||
956 | DBG_KEY_BUF_LEN)); | ||
962 | else | 957 | else |
963 | printk(KERN_DEBUG "\t%d: LNC %p LEB %d:%d len %d key " | 958 | printk(KERN_DEBUG "\t%d: LNC %p LEB %d:%d len %d key " |
964 | "%s\n", n, zbr->znode, zbr->lnum, | 959 | "%s\n", n, zbr->znode, zbr->lnum, |
965 | zbr->offs, zbr->len, | 960 | zbr->offs, zbr->len, |
966 | DBGKEY(&zbr->key)); | 961 | dbg_snprintf_key(c, &zbr->key, |
962 | key_buf, | ||
963 | DBG_KEY_BUF_LEN)); | ||
967 | } | 964 | } |
968 | spin_unlock(&dbg_lock); | 965 | spin_unlock(&dbg_lock); |
969 | } | 966 | } |
@@ -1260,6 +1257,7 @@ static int dbg_check_key_order(struct ubifs_info *c, struct ubifs_zbranch *zbr1, | |||
1260 | int err, nlen1, nlen2, cmp; | 1257 | int err, nlen1, nlen2, cmp; |
1261 | struct ubifs_dent_node *dent1, *dent2; | 1258 | struct ubifs_dent_node *dent1, *dent2; |
1262 | union ubifs_key key; | 1259 | union ubifs_key key; |
1260 | char key_buf[DBG_KEY_BUF_LEN]; | ||
1263 | 1261 | ||
1264 | ubifs_assert(!keys_cmp(c, &zbr1->key, &zbr2->key)); | 1262 | ubifs_assert(!keys_cmp(c, &zbr1->key, &zbr2->key)); |
1265 | dent1 = kmalloc(UBIFS_MAX_DENT_NODE_SZ, GFP_NOFS); | 1263 | dent1 = kmalloc(UBIFS_MAX_DENT_NODE_SZ, GFP_NOFS); |
@@ -1290,9 +1288,11 @@ static int dbg_check_key_order(struct ubifs_info *c, struct ubifs_zbranch *zbr1, | |||
1290 | key_read(c, &dent1->key, &key); | 1288 | key_read(c, &dent1->key, &key); |
1291 | if (keys_cmp(c, &zbr1->key, &key)) { | 1289 | if (keys_cmp(c, &zbr1->key, &key)) { |
1292 | dbg_err("1st entry at %d:%d has key %s", zbr1->lnum, | 1290 | dbg_err("1st entry at %d:%d has key %s", zbr1->lnum, |
1293 | zbr1->offs, DBGKEY(&key)); | 1291 | zbr1->offs, dbg_snprintf_key(c, &key, key_buf, |
1292 | DBG_KEY_BUF_LEN)); | ||
1294 | dbg_err("but it should have key %s according to tnc", | 1293 | dbg_err("but it should have key %s according to tnc", |
1295 | DBGKEY(&zbr1->key)); | 1294 | dbg_snprintf_key(c, &zbr1->key, key_buf, |
1295 | DBG_KEY_BUF_LEN)); | ||
1296 | dbg_dump_node(c, dent1); | 1296 | dbg_dump_node(c, dent1); |
1297 | goto out_free; | 1297 | goto out_free; |
1298 | } | 1298 | } |
@@ -1300,9 +1300,11 @@ static int dbg_check_key_order(struct ubifs_info *c, struct ubifs_zbranch *zbr1, | |||
1300 | key_read(c, &dent2->key, &key); | 1300 | key_read(c, &dent2->key, &key); |
1301 | if (keys_cmp(c, &zbr2->key, &key)) { | 1301 | if (keys_cmp(c, &zbr2->key, &key)) { |
1302 | dbg_err("2nd entry at %d:%d has key %s", zbr1->lnum, | 1302 | dbg_err("2nd entry at %d:%d has key %s", zbr1->lnum, |
1303 | zbr1->offs, DBGKEY(&key)); | 1303 | zbr1->offs, dbg_snprintf_key(c, &key, key_buf, |
1304 | DBG_KEY_BUF_LEN)); | ||
1304 | dbg_err("but it should have key %s according to tnc", | 1305 | dbg_err("but it should have key %s according to tnc", |
1305 | DBGKEY(&zbr2->key)); | 1306 | dbg_snprintf_key(c, &zbr2->key, key_buf, |
1307 | DBG_KEY_BUF_LEN)); | ||
1306 | dbg_dump_node(c, dent2); | 1308 | dbg_dump_node(c, dent2); |
1307 | goto out_free; | 1309 | goto out_free; |
1308 | } | 1310 | } |
@@ -1319,7 +1321,7 @@ static int dbg_check_key_order(struct ubifs_info *c, struct ubifs_zbranch *zbr1, | |||
1319 | dbg_err("2 xent/dent nodes with the same name"); | 1321 | dbg_err("2 xent/dent nodes with the same name"); |
1320 | else | 1322 | else |
1321 | dbg_err("bad order of colliding key %s", | 1323 | dbg_err("bad order of colliding key %s", |
1322 | DBGKEY(&key)); | 1324 | dbg_snprintf_key(c, &key, key_buf, DBG_KEY_BUF_LEN)); |
1323 | 1325 | ||
1324 | ubifs_msg("first node at %d:%d\n", zbr1->lnum, zbr1->offs); | 1326 | ubifs_msg("first node at %d:%d\n", zbr1->lnum, zbr1->offs); |
1325 | dbg_dump_node(c, dent1); | 1327 | dbg_dump_node(c, dent1); |
diff --git a/fs/ubifs/debug.h b/fs/ubifs/debug.h index 8d9c46810189..ad1a6fee6010 100644 --- a/fs/ubifs/debug.h +++ b/fs/ubifs/debug.h | |||
@@ -169,40 +169,39 @@ struct ubifs_global_debug_info { | |||
169 | spin_unlock(&dbg_lock); \ | 169 | spin_unlock(&dbg_lock); \ |
170 | } while (0) | 170 | } while (0) |
171 | 171 | ||
172 | const char *dbg_key_str0(const struct ubifs_info *c, | 172 | #define ubifs_dbg_msg(type, fmt, ...) \ |
173 | const union ubifs_key *key); | 173 | pr_debug("UBIFS DBG " type ": " fmt "\n", ##__VA_ARGS__) |
174 | const char *dbg_key_str1(const struct ubifs_info *c, | 174 | |
175 | const union ubifs_key *key); | 175 | #define DBG_KEY_BUF_LEN 32 |
176 | 176 | #define ubifs_dbg_msg_key(type, key, fmt, ...) do { \ | |
177 | /* | 177 | char __tmp_key_buf[DBG_KEY_BUF_LEN]; \ |
178 | * DBGKEY macros require @dbg_lock to be held, which it is in the dbg message | 178 | pr_debug("UBIFS DBG " type ": " fmt "%s\n", ##__VA_ARGS__, \ |
179 | * macros. | 179 | dbg_snprintf_key(c, key, __tmp_key_buf, DBG_KEY_BUF_LEN)); \ |
180 | */ | ||
181 | #define DBGKEY(key) dbg_key_str0(c, (key)) | ||
182 | #define DBGKEY1(key) dbg_key_str1(c, (key)) | ||
183 | |||
184 | extern spinlock_t dbg_lock; | ||
185 | |||
186 | #define ubifs_dbg_msg(type, fmt, ...) do { \ | ||
187 | spin_lock(&dbg_lock); \ | ||
188 | pr_debug("UBIFS DBG " type ": " fmt "\n", ##__VA_ARGS__); \ | ||
189 | spin_unlock(&dbg_lock); \ | ||
190 | } while (0) | 180 | } while (0) |
191 | 181 | ||
192 | /* Just a debugging messages not related to any specific UBIFS subsystem */ | 182 | /* Just a debugging messages not related to any specific UBIFS subsystem */ |
193 | #define dbg_msg(fmt, ...) ubifs_dbg_msg("msg", fmt, ##__VA_ARGS__) | 183 | #define dbg_msg(fmt, ...) \ |
184 | printk(KERN_DEBUG "UBIFS DBG (pid %d): %s: " fmt "\n", current->pid, \ | ||
185 | __func__, ##__VA_ARGS__) | ||
186 | |||
194 | /* General messages */ | 187 | /* General messages */ |
195 | #define dbg_gen(fmt, ...) ubifs_dbg_msg("gen", fmt, ##__VA_ARGS__) | 188 | #define dbg_gen(fmt, ...) ubifs_dbg_msg("gen", fmt, ##__VA_ARGS__) |
196 | /* Additional journal messages */ | 189 | /* Additional journal messages */ |
197 | #define dbg_jnl(fmt, ...) ubifs_dbg_msg("jnl", fmt, ##__VA_ARGS__) | 190 | #define dbg_jnl(fmt, ...) ubifs_dbg_msg("jnl", fmt, ##__VA_ARGS__) |
191 | #define dbg_jnlk(key, fmt, ...) \ | ||
192 | ubifs_dbg_msg_key("jnl", key, fmt, ##__VA_ARGS__) | ||
198 | /* Additional TNC messages */ | 193 | /* Additional TNC messages */ |
199 | #define dbg_tnc(fmt, ...) ubifs_dbg_msg("tnc", fmt, ##__VA_ARGS__) | 194 | #define dbg_tnc(fmt, ...) ubifs_dbg_msg("tnc", fmt, ##__VA_ARGS__) |
195 | #define dbg_tnck(key, fmt, ...) \ | ||
196 | ubifs_dbg_msg_key("tnc", key, fmt, ##__VA_ARGS__) | ||
200 | /* Additional lprops messages */ | 197 | /* Additional lprops messages */ |
201 | #define dbg_lp(fmt, ...) ubifs_dbg_msg("lp", fmt, ##__VA_ARGS__) | 198 | #define dbg_lp(fmt, ...) ubifs_dbg_msg("lp", fmt, ##__VA_ARGS__) |
202 | /* Additional LEB find messages */ | 199 | /* Additional LEB find messages */ |
203 | #define dbg_find(fmt, ...) ubifs_dbg_msg("find", fmt, ##__VA_ARGS__) | 200 | #define dbg_find(fmt, ...) ubifs_dbg_msg("find", fmt, ##__VA_ARGS__) |
204 | /* Additional mount messages */ | 201 | /* Additional mount messages */ |
205 | #define dbg_mnt(fmt, ...) ubifs_dbg_msg("mnt", fmt, ##__VA_ARGS__) | 202 | #define dbg_mnt(fmt, ...) ubifs_dbg_msg("mnt", fmt, ##__VA_ARGS__) |
203 | #define dbg_mntk(key, fmt, ...) \ | ||
204 | ubifs_dbg_msg_key("mnt", key, fmt, ##__VA_ARGS__) | ||
206 | /* Additional I/O messages */ | 205 | /* Additional I/O messages */ |
207 | #define dbg_io(fmt, ...) ubifs_dbg_msg("io", fmt, ##__VA_ARGS__) | 206 | #define dbg_io(fmt, ...) ubifs_dbg_msg("io", fmt, ##__VA_ARGS__) |
208 | /* Additional commit messages */ | 207 | /* Additional commit messages */ |
@@ -218,6 +217,7 @@ extern spinlock_t dbg_lock; | |||
218 | /* Additional recovery messages */ | 217 | /* Additional recovery messages */ |
219 | #define dbg_rcvry(fmt, ...) ubifs_dbg_msg("rcvry", fmt, ##__VA_ARGS__) | 218 | #define dbg_rcvry(fmt, ...) ubifs_dbg_msg("rcvry", fmt, ##__VA_ARGS__) |
220 | 219 | ||
220 | extern spinlock_t dbg_lock; | ||
221 | extern struct ubifs_global_debug_info ubifs_dbg; | 221 | extern struct ubifs_global_debug_info ubifs_dbg; |
222 | 222 | ||
223 | static inline int dbg_is_chk_gen(const struct ubifs_info *c) | 223 | static inline int dbg_is_chk_gen(const struct ubifs_info *c) |
@@ -258,6 +258,8 @@ const char *dbg_cstate(int cmt_state); | |||
258 | const char *dbg_jhead(int jhead); | 258 | const char *dbg_jhead(int jhead); |
259 | const char *dbg_get_key_dump(const struct ubifs_info *c, | 259 | const char *dbg_get_key_dump(const struct ubifs_info *c, |
260 | const union ubifs_key *key); | 260 | const union ubifs_key *key); |
261 | const char *dbg_snprintf_key(const struct ubifs_info *c, | ||
262 | const union ubifs_key *key, char *buffer, int len); | ||
261 | void dbg_dump_inode(struct ubifs_info *c, const struct inode *inode); | 263 | void dbg_dump_inode(struct ubifs_info *c, const struct inode *inode); |
262 | void dbg_dump_node(const struct ubifs_info *c, const void *node); | 264 | void dbg_dump_node(const struct ubifs_info *c, const void *node); |
263 | void dbg_dump_lpt_node(const struct ubifs_info *c, void *node, int lnum, | 265 | void dbg_dump_lpt_node(const struct ubifs_info *c, void *node, int lnum, |
@@ -345,20 +347,23 @@ void dbg_debugfs_exit_fs(struct ubifs_info *c); | |||
345 | #define dbg_dump_stack() | 347 | #define dbg_dump_stack() |
346 | #define ubifs_assert_cmt_locked(c) | 348 | #define ubifs_assert_cmt_locked(c) |
347 | 349 | ||
348 | #define dbg_msg(fmt, ...) ubifs_dbg_msg(fmt, ##__VA_ARGS__) | 350 | #define dbg_msg(fmt, ...) ubifs_dbg_msg(fmt, ##__VA_ARGS__) |
349 | #define dbg_gen(fmt, ...) ubifs_dbg_msg(fmt, ##__VA_ARGS__) | 351 | #define dbg_gen(fmt, ...) ubifs_dbg_msg(fmt, ##__VA_ARGS__) |
350 | #define dbg_jnl(fmt, ...) ubifs_dbg_msg(fmt, ##__VA_ARGS__) | 352 | #define dbg_jnl(fmt, ...) ubifs_dbg_msg(fmt, ##__VA_ARGS__) |
351 | #define dbg_tnc(fmt, ...) ubifs_dbg_msg(fmt, ##__VA_ARGS__) | 353 | #define dbg_jnlk(key, fmt, ...) ubifs_dbg_msg(fmt, ##__VA_ARGS__) |
352 | #define dbg_lp(fmt, ...) ubifs_dbg_msg(fmt, ##__VA_ARGS__) | 354 | #define dbg_tnc(fmt, ...) ubifs_dbg_msg(fmt, ##__VA_ARGS__) |
353 | #define dbg_find(fmt, ...) ubifs_dbg_msg(fmt, ##__VA_ARGS__) | 355 | #define dbg_tnck(key, fmt, ...) ubifs_dbg_msg(fmt, ##__VA_ARGS__) |
354 | #define dbg_mnt(fmt, ...) ubifs_dbg_msg(fmt, ##__VA_ARGS__) | 356 | #define dbg_lp(fmt, ...) ubifs_dbg_msg(fmt, ##__VA_ARGS__) |
355 | #define dbg_io(fmt, ...) ubifs_dbg_msg(fmt, ##__VA_ARGS__) | 357 | #define dbg_find(fmt, ...) ubifs_dbg_msg(fmt, ##__VA_ARGS__) |
356 | #define dbg_cmt(fmt, ...) ubifs_dbg_msg(fmt, ##__VA_ARGS__) | 358 | #define dbg_mnt(fmt, ...) ubifs_dbg_msg(fmt, ##__VA_ARGS__) |
357 | #define dbg_budg(fmt, ...) ubifs_dbg_msg(fmt, ##__VA_ARGS__) | 359 | #define dbg_mntk(key, fmt, ...) ubifs_dbg_msg(fmt, ##__VA_ARGS__) |
358 | #define dbg_log(fmt, ...) ubifs_dbg_msg(fmt, ##__VA_ARGS__) | 360 | #define dbg_io(fmt, ...) ubifs_dbg_msg(fmt, ##__VA_ARGS__) |
359 | #define dbg_gc(fmt, ...) ubifs_dbg_msg(fmt, ##__VA_ARGS__) | 361 | #define dbg_cmt(fmt, ...) ubifs_dbg_msg(fmt, ##__VA_ARGS__) |
360 | #define dbg_scan(fmt, ...) ubifs_dbg_msg(fmt, ##__VA_ARGS__) | 362 | #define dbg_budg(fmt, ...) ubifs_dbg_msg(fmt, ##__VA_ARGS__) |
361 | #define dbg_rcvry(fmt, ...) ubifs_dbg_msg(fmt, ##__VA_ARGS__) | 363 | #define dbg_log(fmt, ...) ubifs_dbg_msg(fmt, ##__VA_ARGS__) |
364 | #define dbg_gc(fmt, ...) ubifs_dbg_msg(fmt, ##__VA_ARGS__) | ||
365 | #define dbg_scan(fmt, ...) ubifs_dbg_msg(fmt, ##__VA_ARGS__) | ||
366 | #define dbg_rcvry(fmt, ...) ubifs_dbg_msg(fmt, ##__VA_ARGS__) | ||
362 | 367 | ||
363 | static inline int ubifs_debugging_init(struct ubifs_info *c) { return 0; } | 368 | static inline int ubifs_debugging_init(struct ubifs_info *c) { return 0; } |
364 | static inline void ubifs_debugging_exit(struct ubifs_info *c) { return; } | 369 | static inline void ubifs_debugging_exit(struct ubifs_info *c) { return; } |
@@ -368,6 +373,10 @@ static inline const char *dbg_jhead(int jhead) { return ""; } | |||
368 | static inline const char * | 373 | static inline const char * |
369 | dbg_get_key_dump(const struct ubifs_info *c, | 374 | dbg_get_key_dump(const struct ubifs_info *c, |
370 | const union ubifs_key *key) { return ""; } | 375 | const union ubifs_key *key) { return ""; } |
376 | static inline const char * | ||
377 | dbg_snprintf_key(const struct ubifs_info *c, | ||
378 | const union ubifs_key *key, char *buffer, | ||
379 | int len) { return ""; } | ||
371 | static inline void dbg_dump_inode(struct ubifs_info *c, | 380 | static inline void dbg_dump_inode(struct ubifs_info *c, |
372 | const struct inode *inode) { return; } | 381 | const struct inode *inode) { return; } |
373 | static inline void dbg_dump_node(const struct ubifs_info *c, | 382 | static inline void dbg_dump_node(const struct ubifs_info *c, |
diff --git a/fs/ubifs/journal.c b/fs/ubifs/journal.c index cef0460f4c54..2f438ab2e7a2 100644 --- a/fs/ubifs/journal.c +++ b/fs/ubifs/journal.c | |||
@@ -697,9 +697,8 @@ int ubifs_jnl_write_data(struct ubifs_info *c, const struct inode *inode, | |||
697 | int dlen = COMPRESSED_DATA_NODE_BUF_SZ, allocated = 1; | 697 | int dlen = COMPRESSED_DATA_NODE_BUF_SZ, allocated = 1; |
698 | struct ubifs_inode *ui = ubifs_inode(inode); | 698 | struct ubifs_inode *ui = ubifs_inode(inode); |
699 | 699 | ||
700 | dbg_jnl("ino %lu, blk %u, len %d, key %s", | 700 | dbg_jnlk(key, "ino %lu, blk %u, len %d, key ", |
701 | (unsigned long)key_inum(c, key), key_block(c, key), len, | 701 | (unsigned long)key_inum(c, key), key_block(c, key), len); |
702 | DBGKEY(key)); | ||
703 | ubifs_assert(len <= UBIFS_BLOCK_SIZE); | 702 | ubifs_assert(len <= UBIFS_BLOCK_SIZE); |
704 | 703 | ||
705 | data = kmalloc(dlen, GFP_NOFS | __GFP_NOWARN); | 704 | data = kmalloc(dlen, GFP_NOFS | __GFP_NOWARN); |
@@ -1177,7 +1176,7 @@ int ubifs_jnl_truncate(struct ubifs_info *c, const struct inode *inode, | |||
1177 | dn = (void *)trun + UBIFS_TRUN_NODE_SZ; | 1176 | dn = (void *)trun + UBIFS_TRUN_NODE_SZ; |
1178 | blk = new_size >> UBIFS_BLOCK_SHIFT; | 1177 | blk = new_size >> UBIFS_BLOCK_SHIFT; |
1179 | data_key_init(c, &key, inum, blk); | 1178 | data_key_init(c, &key, inum, blk); |
1180 | dbg_jnl("last block key %s", DBGKEY(&key)); | 1179 | dbg_jnlk(&key, "last block key "); |
1181 | err = ubifs_tnc_lookup(c, &key, dn); | 1180 | err = ubifs_tnc_lookup(c, &key, dn); |
1182 | if (err == -ENOENT) | 1181 | if (err == -ENOENT) |
1183 | dlen = 0; /* Not found (so it is a hole) */ | 1182 | dlen = 0; /* Not found (so it is a hole) */ |
diff --git a/fs/ubifs/lpt.c b/fs/ubifs/lpt.c index 6189c74d97f0..66d59d0a1402 100644 --- a/fs/ubifs/lpt.c +++ b/fs/ubifs/lpt.c | |||
@@ -1986,12 +1986,11 @@ again: | |||
1986 | 1986 | ||
1987 | if (path[h].in_tree) | 1987 | if (path[h].in_tree) |
1988 | continue; | 1988 | continue; |
1989 | nnode = kmalloc(sz, GFP_NOFS); | 1989 | nnode = kmemdup(&path[h].nnode, sz, GFP_NOFS); |
1990 | if (!nnode) { | 1990 | if (!nnode) { |
1991 | err = -ENOMEM; | 1991 | err = -ENOMEM; |
1992 | goto out; | 1992 | goto out; |
1993 | } | 1993 | } |
1994 | memcpy(nnode, &path[h].nnode, sz); | ||
1995 | parent = nnode->parent; | 1994 | parent = nnode->parent; |
1996 | parent->nbranch[nnode->iip].nnode = nnode; | 1995 | parent->nbranch[nnode->iip].nnode = nnode; |
1997 | path[h].ptr.nnode = nnode; | 1996 | path[h].ptr.nnode = nnode; |
@@ -2004,12 +2003,11 @@ again: | |||
2004 | const size_t sz = sizeof(struct ubifs_pnode); | 2003 | const size_t sz = sizeof(struct ubifs_pnode); |
2005 | struct ubifs_nnode *parent; | 2004 | struct ubifs_nnode *parent; |
2006 | 2005 | ||
2007 | pnode = kmalloc(sz, GFP_NOFS); | 2006 | pnode = kmemdup(&path[h].pnode, sz, GFP_NOFS); |
2008 | if (!pnode) { | 2007 | if (!pnode) { |
2009 | err = -ENOMEM; | 2008 | err = -ENOMEM; |
2010 | goto out; | 2009 | goto out; |
2011 | } | 2010 | } |
2012 | memcpy(pnode, &path[h].pnode, sz); | ||
2013 | parent = pnode->parent; | 2011 | parent = pnode->parent; |
2014 | parent->nbranch[pnode->iip].pnode = pnode; | 2012 | parent->nbranch[pnode->iip].pnode = pnode; |
2015 | path[h].ptr.pnode = pnode; | 2013 | path[h].ptr.pnode = pnode; |
diff --git a/fs/ubifs/replay.c b/fs/ubifs/replay.c index ccabaf1164b3..b007637f0406 100644 --- a/fs/ubifs/replay.c +++ b/fs/ubifs/replay.c | |||
@@ -221,8 +221,8 @@ static int apply_replay_entry(struct ubifs_info *c, struct replay_entry *r) | |||
221 | { | 221 | { |
222 | int err; | 222 | int err; |
223 | 223 | ||
224 | dbg_mnt("LEB %d:%d len %d deletion %d sqnum %llu %s", r->lnum, | 224 | dbg_mntk(&r->key, "LEB %d:%d len %d deletion %d sqnum %llu key ", |
225 | r->offs, r->len, r->deletion, r->sqnum, DBGKEY(&r->key)); | 225 | r->lnum, r->offs, r->len, r->deletion, r->sqnum); |
226 | 226 | ||
227 | /* Set c->replay_sqnum to help deal with dangling branches. */ | 227 | /* Set c->replay_sqnum to help deal with dangling branches. */ |
228 | c->replay_sqnum = r->sqnum; | 228 | c->replay_sqnum = r->sqnum; |
@@ -361,7 +361,7 @@ static int insert_node(struct ubifs_info *c, int lnum, int offs, int len, | |||
361 | { | 361 | { |
362 | struct replay_entry *r; | 362 | struct replay_entry *r; |
363 | 363 | ||
364 | dbg_mnt("add LEB %d:%d, key %s", lnum, offs, DBGKEY(key)); | 364 | dbg_mntk(key, "add LEB %d:%d, key ", lnum, offs); |
365 | 365 | ||
366 | if (key_inum(c, key) >= c->highest_inum) | 366 | if (key_inum(c, key) >= c->highest_inum) |
367 | c->highest_inum = key_inum(c, key); | 367 | c->highest_inum = key_inum(c, key); |
@@ -409,7 +409,7 @@ static int insert_dent(struct ubifs_info *c, int lnum, int offs, int len, | |||
409 | struct replay_entry *r; | 409 | struct replay_entry *r; |
410 | char *nbuf; | 410 | char *nbuf; |
411 | 411 | ||
412 | dbg_mnt("add LEB %d:%d, key %s", lnum, offs, DBGKEY(key)); | 412 | dbg_mntk(key, "add LEB %d:%d, key ", lnum, offs); |
413 | if (key_inum(c, key) >= c->highest_inum) | 413 | if (key_inum(c, key) >= c->highest_inum) |
414 | c->highest_inum = key_inum(c, key); | 414 | c->highest_inum = key_inum(c, key); |
415 | 415 | ||
diff --git a/fs/ubifs/tnc.c b/fs/ubifs/tnc.c index 066738647685..16ad84d8402f 100644 --- a/fs/ubifs/tnc.c +++ b/fs/ubifs/tnc.c | |||
@@ -344,12 +344,11 @@ static int lnc_add(struct ubifs_info *c, struct ubifs_zbranch *zbr, | |||
344 | return err; | 344 | return err; |
345 | } | 345 | } |
346 | 346 | ||
347 | lnc_node = kmalloc(zbr->len, GFP_NOFS); | 347 | lnc_node = kmemdup(node, zbr->len, GFP_NOFS); |
348 | if (!lnc_node) | 348 | if (!lnc_node) |
349 | /* We don't have to have the cache, so no error */ | 349 | /* We don't have to have the cache, so no error */ |
350 | return 0; | 350 | return 0; |
351 | 351 | ||
352 | memcpy(lnc_node, node, zbr->len); | ||
353 | zbr->leaf = lnc_node; | 352 | zbr->leaf = lnc_node; |
354 | return 0; | 353 | return 0; |
355 | } | 354 | } |
@@ -506,7 +505,7 @@ static int fallible_read_node(struct ubifs_info *c, const union ubifs_key *key, | |||
506 | { | 505 | { |
507 | int ret; | 506 | int ret; |
508 | 507 | ||
509 | dbg_tnc("LEB %d:%d, key %s", zbr->lnum, zbr->offs, DBGKEY(key)); | 508 | dbg_tnck(key, "LEB %d:%d, key ", zbr->lnum, zbr->offs); |
510 | 509 | ||
511 | ret = try_read_node(c, node, key_type(c, key), zbr->len, zbr->lnum, | 510 | ret = try_read_node(c, node, key_type(c, key), zbr->len, zbr->lnum, |
512 | zbr->offs); | 511 | zbr->offs); |
@@ -520,8 +519,8 @@ static int fallible_read_node(struct ubifs_info *c, const union ubifs_key *key, | |||
520 | ret = 0; | 519 | ret = 0; |
521 | } | 520 | } |
522 | if (ret == 0 && c->replaying) | 521 | if (ret == 0 && c->replaying) |
523 | dbg_mnt("dangling branch LEB %d:%d len %d, key %s", | 522 | dbg_mntk(key, "dangling branch LEB %d:%d len %d, key ", |
524 | zbr->lnum, zbr->offs, zbr->len, DBGKEY(key)); | 523 | zbr->lnum, zbr->offs, zbr->len); |
525 | return ret; | 524 | return ret; |
526 | } | 525 | } |
527 | 526 | ||
@@ -996,9 +995,9 @@ static int fallible_resolve_collision(struct ubifs_info *c, | |||
996 | if (adding || !o_znode) | 995 | if (adding || !o_znode) |
997 | return 0; | 996 | return 0; |
998 | 997 | ||
999 | dbg_mnt("dangling match LEB %d:%d len %d %s", | 998 | dbg_mntk(key, "dangling match LEB %d:%d len %d key ", |
1000 | o_znode->zbranch[o_n].lnum, o_znode->zbranch[o_n].offs, | 999 | o_znode->zbranch[o_n].lnum, o_znode->zbranch[o_n].offs, |
1001 | o_znode->zbranch[o_n].len, DBGKEY(key)); | 1000 | o_znode->zbranch[o_n].len); |
1002 | *zn = o_znode; | 1001 | *zn = o_znode; |
1003 | *n = o_n; | 1002 | *n = o_n; |
1004 | return 1; | 1003 | return 1; |
@@ -1180,7 +1179,7 @@ int ubifs_lookup_level0(struct ubifs_info *c, const union ubifs_key *key, | |||
1180 | struct ubifs_znode *znode; | 1179 | struct ubifs_znode *znode; |
1181 | unsigned long time = get_seconds(); | 1180 | unsigned long time = get_seconds(); |
1182 | 1181 | ||
1183 | dbg_tnc("search key %s", DBGKEY(key)); | 1182 | dbg_tnck(key, "search key "); |
1184 | ubifs_assert(key_type(c, key) < UBIFS_INVALID_KEY); | 1183 | ubifs_assert(key_type(c, key) < UBIFS_INVALID_KEY); |
1185 | 1184 | ||
1186 | znode = c->zroot.znode; | 1185 | znode = c->zroot.znode; |
@@ -1316,7 +1315,7 @@ static int lookup_level0_dirty(struct ubifs_info *c, const union ubifs_key *key, | |||
1316 | struct ubifs_znode *znode; | 1315 | struct ubifs_znode *znode; |
1317 | unsigned long time = get_seconds(); | 1316 | unsigned long time = get_seconds(); |
1318 | 1317 | ||
1319 | dbg_tnc("search and dirty key %s", DBGKEY(key)); | 1318 | dbg_tnck(key, "search and dirty key "); |
1320 | 1319 | ||
1321 | znode = c->zroot.znode; | 1320 | znode = c->zroot.znode; |
1322 | if (unlikely(!znode)) { | 1321 | if (unlikely(!znode)) { |
@@ -1723,8 +1722,8 @@ static int validate_data_node(struct ubifs_info *c, void *buf, | |||
1723 | if (!keys_eq(c, &zbr->key, &key1)) { | 1722 | if (!keys_eq(c, &zbr->key, &key1)) { |
1724 | ubifs_err("bad key in node at LEB %d:%d", | 1723 | ubifs_err("bad key in node at LEB %d:%d", |
1725 | zbr->lnum, zbr->offs); | 1724 | zbr->lnum, zbr->offs); |
1726 | dbg_tnc("looked for key %s found node's key %s", | 1725 | dbg_tnck(&zbr->key, "looked for key "); |
1727 | DBGKEY(&zbr->key), DBGKEY1(&key1)); | 1726 | dbg_tnck(&key1, "found node's key "); |
1728 | goto out_err; | 1727 | goto out_err; |
1729 | } | 1728 | } |
1730 | 1729 | ||
@@ -1777,7 +1776,7 @@ int ubifs_tnc_bulk_read(struct ubifs_info *c, struct bu_info *bu) | |||
1777 | ubifs_err("failed to read from LEB %d:%d, error %d", | 1776 | ubifs_err("failed to read from LEB %d:%d, error %d", |
1778 | lnum, offs, err); | 1777 | lnum, offs, err); |
1779 | dbg_dump_stack(); | 1778 | dbg_dump_stack(); |
1780 | dbg_tnc("key %s", DBGKEY(&bu->key)); | 1779 | dbg_tnck(&bu->key, "key "); |
1781 | return err; | 1780 | return err; |
1782 | } | 1781 | } |
1783 | 1782 | ||
@@ -1812,7 +1811,7 @@ static int do_lookup_nm(struct ubifs_info *c, const union ubifs_key *key, | |||
1812 | int found, n, err; | 1811 | int found, n, err; |
1813 | struct ubifs_znode *znode; | 1812 | struct ubifs_znode *znode; |
1814 | 1813 | ||
1815 | dbg_tnc("name '%.*s' key %s", nm->len, nm->name, DBGKEY(key)); | 1814 | dbg_tnck(key, "name '%.*s' key ", nm->len, nm->name); |
1816 | mutex_lock(&c->tnc_mutex); | 1815 | mutex_lock(&c->tnc_mutex); |
1817 | found = ubifs_lookup_level0(c, key, &znode, &n); | 1816 | found = ubifs_lookup_level0(c, key, &znode, &n); |
1818 | if (!found) { | 1817 | if (!found) { |
@@ -1986,8 +1985,7 @@ again: | |||
1986 | zp = znode->parent; | 1985 | zp = znode->parent; |
1987 | if (znode->child_cnt < c->fanout) { | 1986 | if (znode->child_cnt < c->fanout) { |
1988 | ubifs_assert(n != c->fanout); | 1987 | ubifs_assert(n != c->fanout); |
1989 | dbg_tnc("inserted at %d level %d, key %s", n, znode->level, | 1988 | dbg_tnck(key, "inserted at %d level %d, key ", n, znode->level); |
1990 | DBGKEY(key)); | ||
1991 | 1989 | ||
1992 | insert_zbranch(znode, zbr, n); | 1990 | insert_zbranch(znode, zbr, n); |
1993 | 1991 | ||
@@ -2002,7 +2000,7 @@ again: | |||
2002 | * Unfortunately, @znode does not have more empty slots and we have to | 2000 | * Unfortunately, @znode does not have more empty slots and we have to |
2003 | * split it. | 2001 | * split it. |
2004 | */ | 2002 | */ |
2005 | dbg_tnc("splitting level %d, key %s", znode->level, DBGKEY(key)); | 2003 | dbg_tnck(key, "splitting level %d, key ", znode->level); |
2006 | 2004 | ||
2007 | if (znode->alt) | 2005 | if (znode->alt) |
2008 | /* | 2006 | /* |
@@ -2096,7 +2094,7 @@ do_split: | |||
2096 | } | 2094 | } |
2097 | 2095 | ||
2098 | /* Insert new key and branch */ | 2096 | /* Insert new key and branch */ |
2099 | dbg_tnc("inserting at %d level %d, key %s", n, zn->level, DBGKEY(key)); | 2097 | dbg_tnck(key, "inserting at %d level %d, key ", n, zn->level); |
2100 | 2098 | ||
2101 | insert_zbranch(zi, zbr, n); | 2099 | insert_zbranch(zi, zbr, n); |
2102 | 2100 | ||
@@ -2172,7 +2170,7 @@ int ubifs_tnc_add(struct ubifs_info *c, const union ubifs_key *key, int lnum, | |||
2172 | struct ubifs_znode *znode; | 2170 | struct ubifs_znode *znode; |
2173 | 2171 | ||
2174 | mutex_lock(&c->tnc_mutex); | 2172 | mutex_lock(&c->tnc_mutex); |
2175 | dbg_tnc("%d:%d, len %d, key %s", lnum, offs, len, DBGKEY(key)); | 2173 | dbg_tnck(key, "%d:%d, len %d, key ", lnum, offs, len); |
2176 | found = lookup_level0_dirty(c, key, &znode, &n); | 2174 | found = lookup_level0_dirty(c, key, &znode, &n); |
2177 | if (!found) { | 2175 | if (!found) { |
2178 | struct ubifs_zbranch zbr; | 2176 | struct ubifs_zbranch zbr; |
@@ -2221,8 +2219,8 @@ int ubifs_tnc_replace(struct ubifs_info *c, const union ubifs_key *key, | |||
2221 | struct ubifs_znode *znode; | 2219 | struct ubifs_znode *znode; |
2222 | 2220 | ||
2223 | mutex_lock(&c->tnc_mutex); | 2221 | mutex_lock(&c->tnc_mutex); |
2224 | dbg_tnc("old LEB %d:%d, new LEB %d:%d, len %d, key %s", old_lnum, | 2222 | dbg_tnck(key, "old LEB %d:%d, new LEB %d:%d, len %d, key ", old_lnum, |
2225 | old_offs, lnum, offs, len, DBGKEY(key)); | 2223 | old_offs, lnum, offs, len); |
2226 | found = lookup_level0_dirty(c, key, &znode, &n); | 2224 | found = lookup_level0_dirty(c, key, &znode, &n); |
2227 | if (found < 0) { | 2225 | if (found < 0) { |
2228 | err = found; | 2226 | err = found; |
@@ -2304,8 +2302,8 @@ int ubifs_tnc_add_nm(struct ubifs_info *c, const union ubifs_key *key, | |||
2304 | struct ubifs_znode *znode; | 2302 | struct ubifs_znode *znode; |
2305 | 2303 | ||
2306 | mutex_lock(&c->tnc_mutex); | 2304 | mutex_lock(&c->tnc_mutex); |
2307 | dbg_tnc("LEB %d:%d, name '%.*s', key %s", lnum, offs, nm->len, nm->name, | 2305 | dbg_tnck(key, "LEB %d:%d, name '%.*s', key ", |
2308 | DBGKEY(key)); | 2306 | lnum, offs, nm->len, nm->name); |
2309 | found = lookup_level0_dirty(c, key, &znode, &n); | 2307 | found = lookup_level0_dirty(c, key, &znode, &n); |
2310 | if (found < 0) { | 2308 | if (found < 0) { |
2311 | err = found; | 2309 | err = found; |
@@ -2398,7 +2396,7 @@ static int tnc_delete(struct ubifs_info *c, struct ubifs_znode *znode, int n) | |||
2398 | /* Delete without merge for now */ | 2396 | /* Delete without merge for now */ |
2399 | ubifs_assert(znode->level == 0); | 2397 | ubifs_assert(znode->level == 0); |
2400 | ubifs_assert(n >= 0 && n < c->fanout); | 2398 | ubifs_assert(n >= 0 && n < c->fanout); |
2401 | dbg_tnc("deleting %s", DBGKEY(&znode->zbranch[n].key)); | 2399 | dbg_tnck(&znode->zbranch[n].key, "deleting key "); |
2402 | 2400 | ||
2403 | zbr = &znode->zbranch[n]; | 2401 | zbr = &znode->zbranch[n]; |
2404 | lnc_free(zbr); | 2402 | lnc_free(zbr); |
@@ -2508,7 +2506,7 @@ int ubifs_tnc_remove(struct ubifs_info *c, const union ubifs_key *key) | |||
2508 | struct ubifs_znode *znode; | 2506 | struct ubifs_znode *znode; |
2509 | 2507 | ||
2510 | mutex_lock(&c->tnc_mutex); | 2508 | mutex_lock(&c->tnc_mutex); |
2511 | dbg_tnc("key %s", DBGKEY(key)); | 2509 | dbg_tnck(key, "key "); |
2512 | found = lookup_level0_dirty(c, key, &znode, &n); | 2510 | found = lookup_level0_dirty(c, key, &znode, &n); |
2513 | if (found < 0) { | 2511 | if (found < 0) { |
2514 | err = found; | 2512 | err = found; |
@@ -2539,7 +2537,7 @@ int ubifs_tnc_remove_nm(struct ubifs_info *c, const union ubifs_key *key, | |||
2539 | struct ubifs_znode *znode; | 2537 | struct ubifs_znode *znode; |
2540 | 2538 | ||
2541 | mutex_lock(&c->tnc_mutex); | 2539 | mutex_lock(&c->tnc_mutex); |
2542 | dbg_tnc("%.*s, key %s", nm->len, nm->name, DBGKEY(key)); | 2540 | dbg_tnck(key, "%.*s, key ", nm->len, nm->name); |
2543 | err = lookup_level0_dirty(c, key, &znode, &n); | 2541 | err = lookup_level0_dirty(c, key, &znode, &n); |
2544 | if (err < 0) | 2542 | if (err < 0) |
2545 | goto out_unlock; | 2543 | goto out_unlock; |
@@ -2654,7 +2652,7 @@ int ubifs_tnc_remove_range(struct ubifs_info *c, union ubifs_key *from_key, | |||
2654 | dbg_dump_znode(c, znode); | 2652 | dbg_dump_znode(c, znode); |
2655 | goto out_unlock; | 2653 | goto out_unlock; |
2656 | } | 2654 | } |
2657 | dbg_tnc("removing %s", DBGKEY(key)); | 2655 | dbg_tnck(key, "removing key "); |
2658 | } | 2656 | } |
2659 | if (k) { | 2657 | if (k) { |
2660 | for (i = n + 1 + k; i < znode->child_cnt; i++) | 2658 | for (i = n + 1 + k; i < znode->child_cnt; i++) |
@@ -2774,7 +2772,7 @@ struct ubifs_dent_node *ubifs_tnc_next_ent(struct ubifs_info *c, | |||
2774 | struct ubifs_zbranch *zbr; | 2772 | struct ubifs_zbranch *zbr; |
2775 | union ubifs_key *dkey; | 2773 | union ubifs_key *dkey; |
2776 | 2774 | ||
2777 | dbg_tnc("%s %s", nm->name ? (char *)nm->name : "(lowest)", DBGKEY(key)); | 2775 | dbg_tnck(key, "%s ", nm->name ? (char *)nm->name : "(lowest)"); |
2778 | ubifs_assert(is_hash_key(c, key)); | 2776 | ubifs_assert(is_hash_key(c, key)); |
2779 | 2777 | ||
2780 | mutex_lock(&c->tnc_mutex); | 2778 | mutex_lock(&c->tnc_mutex); |
@@ -3333,9 +3331,9 @@ int dbg_check_inode_size(struct ubifs_info *c, const struct inode *inode, | |||
3333 | 3331 | ||
3334 | out_dump: | 3332 | out_dump: |
3335 | block = key_block(c, key); | 3333 | block = key_block(c, key); |
3336 | ubifs_err("inode %lu has size %lld, but there are data at offset %lld " | 3334 | ubifs_err("inode %lu has size %lld, but there are data at offset %lld", |
3337 | "(data key %s)", (unsigned long)inode->i_ino, size, | 3335 | (unsigned long)inode->i_ino, size, |
3338 | ((loff_t)block) << UBIFS_BLOCK_SHIFT, DBGKEY(key)); | 3336 | ((loff_t)block) << UBIFS_BLOCK_SHIFT); |
3339 | mutex_unlock(&c->tnc_mutex); | 3337 | mutex_unlock(&c->tnc_mutex); |
3340 | dbg_dump_inode(c, inode); | 3338 | dbg_dump_inode(c, inode); |
3341 | dbg_dump_stack(); | 3339 | dbg_dump_stack(); |
diff --git a/fs/ubifs/tnc_misc.c b/fs/ubifs/tnc_misc.c index b48db999903e..dc28fe6ec07a 100644 --- a/fs/ubifs/tnc_misc.c +++ b/fs/ubifs/tnc_misc.c | |||
@@ -328,8 +328,8 @@ static int read_znode(struct ubifs_info *c, int lnum, int offs, int len, | |||
328 | case UBIFS_XENT_KEY: | 328 | case UBIFS_XENT_KEY: |
329 | break; | 329 | break; |
330 | default: | 330 | default: |
331 | dbg_msg("bad key type at slot %d: %s", i, | 331 | dbg_msg("bad key type at slot %d: %d", |
332 | DBGKEY(&zbr->key)); | 332 | i, key_type(c, &zbr->key)); |
333 | err = 3; | 333 | err = 3; |
334 | goto out_dump; | 334 | goto out_dump; |
335 | } | 335 | } |
@@ -475,7 +475,7 @@ int ubifs_tnc_read_node(struct ubifs_info *c, struct ubifs_zbranch *zbr, | |||
475 | zbr->offs); | 475 | zbr->offs); |
476 | 476 | ||
477 | if (err) { | 477 | if (err) { |
478 | dbg_tnc("key %s", DBGKEY(key)); | 478 | dbg_tnck(key, "key "); |
479 | return err; | 479 | return err; |
480 | } | 480 | } |
481 | 481 | ||
@@ -484,8 +484,8 @@ int ubifs_tnc_read_node(struct ubifs_info *c, struct ubifs_zbranch *zbr, | |||
484 | if (!keys_eq(c, key, &key1)) { | 484 | if (!keys_eq(c, key, &key1)) { |
485 | ubifs_err("bad key in node at LEB %d:%d", | 485 | ubifs_err("bad key in node at LEB %d:%d", |
486 | zbr->lnum, zbr->offs); | 486 | zbr->lnum, zbr->offs); |
487 | dbg_tnc("looked for key %s found node's key %s", | 487 | dbg_tnck(key, "looked for key "); |
488 | DBGKEY(key), DBGKEY1(&key1)); | 488 | dbg_tnck(&key1, "but found node's key "); |
489 | dbg_dump_node(c, node); | 489 | dbg_dump_node(c, node); |
490 | return -EINVAL; | 490 | return -EINVAL; |
491 | } | 491 | } |
diff --git a/fs/ubifs/xattr.c b/fs/ubifs/xattr.c index bf18f7a04544..85b272268754 100644 --- a/fs/ubifs/xattr.c +++ b/fs/ubifs/xattr.c | |||
@@ -138,12 +138,11 @@ static int create_xattr(struct ubifs_info *c, struct inode *host, | |||
138 | ui = ubifs_inode(inode); | 138 | ui = ubifs_inode(inode); |
139 | ui->xattr = 1; | 139 | ui->xattr = 1; |
140 | ui->flags |= UBIFS_XATTR_FL; | 140 | ui->flags |= UBIFS_XATTR_FL; |
141 | ui->data = kmalloc(size, GFP_NOFS); | 141 | ui->data = kmemdup(value, size, GFP_NOFS); |
142 | if (!ui->data) { | 142 | if (!ui->data) { |
143 | err = -ENOMEM; | 143 | err = -ENOMEM; |
144 | goto out_free; | 144 | goto out_free; |
145 | } | 145 | } |
146 | memcpy(ui->data, value, size); | ||
147 | inode->i_size = ui->ui_size = size; | 146 | inode->i_size = ui->ui_size = size; |
148 | ui->data_len = size; | 147 | ui->data_len = size; |
149 | 148 | ||
@@ -204,12 +203,11 @@ static int change_xattr(struct ubifs_info *c, struct inode *host, | |||
204 | return err; | 203 | return err; |
205 | 204 | ||
206 | kfree(ui->data); | 205 | kfree(ui->data); |
207 | ui->data = kmalloc(size, GFP_NOFS); | 206 | ui->data = kmemdup(value, size, GFP_NOFS); |
208 | if (!ui->data) { | 207 | if (!ui->data) { |
209 | err = -ENOMEM; | 208 | err = -ENOMEM; |
210 | goto out_free; | 209 | goto out_free; |
211 | } | 210 | } |
212 | memcpy(ui->data, value, size); | ||
213 | inode->i_size = ui->ui_size = size; | 211 | inode->i_size = ui->ui_size = size; |
214 | ui->data_len = size; | 212 | ui->data_len = size; |
215 | 213 | ||
diff --git a/fs/udf/file.c b/fs/udf/file.c index d8ffa7cc661d..dca0c3881e82 100644 --- a/fs/udf/file.c +++ b/fs/udf/file.c | |||
@@ -125,7 +125,6 @@ static ssize_t udf_file_aio_write(struct kiocb *iocb, const struct iovec *iov, | |||
125 | err = udf_expand_file_adinicb(inode); | 125 | err = udf_expand_file_adinicb(inode); |
126 | if (err) { | 126 | if (err) { |
127 | udf_debug("udf_expand_adinicb: err=%d\n", err); | 127 | udf_debug("udf_expand_adinicb: err=%d\n", err); |
128 | up_write(&iinfo->i_data_sem); | ||
129 | return err; | 128 | return err; |
130 | } | 129 | } |
131 | } else { | 130 | } else { |
@@ -133,9 +132,10 @@ static ssize_t udf_file_aio_write(struct kiocb *iocb, const struct iovec *iov, | |||
133 | iinfo->i_lenAlloc = pos + count; | 132 | iinfo->i_lenAlloc = pos + count; |
134 | else | 133 | else |
135 | iinfo->i_lenAlloc = inode->i_size; | 134 | iinfo->i_lenAlloc = inode->i_size; |
135 | up_write(&iinfo->i_data_sem); | ||
136 | } | 136 | } |
137 | } | 137 | } else |
138 | up_write(&iinfo->i_data_sem); | 138 | up_write(&iinfo->i_data_sem); |
139 | 139 | ||
140 | retval = generic_file_aio_write(iocb, iov, nr_segs, ppos); | 140 | retval = generic_file_aio_write(iocb, iov, nr_segs, ppos); |
141 | if (retval > 0) | 141 | if (retval > 0) |
diff --git a/fs/udf/inode.c b/fs/udf/inode.c index 4598904be1bb..7699df7b3198 100644 --- a/fs/udf/inode.c +++ b/fs/udf/inode.c | |||
@@ -53,8 +53,7 @@ static int udf_update_inode(struct inode *, int); | |||
53 | static void udf_fill_inode(struct inode *, struct buffer_head *); | 53 | static void udf_fill_inode(struct inode *, struct buffer_head *); |
54 | static int udf_sync_inode(struct inode *inode); | 54 | static int udf_sync_inode(struct inode *inode); |
55 | static int udf_alloc_i_data(struct inode *inode, size_t size); | 55 | static int udf_alloc_i_data(struct inode *inode, size_t size); |
56 | static struct buffer_head *inode_getblk(struct inode *, sector_t, int *, | 56 | static sector_t inode_getblk(struct inode *, sector_t, int *, int *); |
57 | sector_t *, int *); | ||
58 | static int8_t udf_insert_aext(struct inode *, struct extent_position, | 57 | static int8_t udf_insert_aext(struct inode *, struct extent_position, |
59 | struct kernel_lb_addr, uint32_t); | 58 | struct kernel_lb_addr, uint32_t); |
60 | static void udf_split_extents(struct inode *, int *, int, int, | 59 | static void udf_split_extents(struct inode *, int *, int, int, |
@@ -151,6 +150,12 @@ const struct address_space_operations udf_aops = { | |||
151 | .bmap = udf_bmap, | 150 | .bmap = udf_bmap, |
152 | }; | 151 | }; |
153 | 152 | ||
153 | /* | ||
154 | * Expand file stored in ICB to a normal one-block-file | ||
155 | * | ||
156 | * This function requires i_data_sem for writing and releases it. | ||
157 | * This function requires i_mutex held | ||
158 | */ | ||
154 | int udf_expand_file_adinicb(struct inode *inode) | 159 | int udf_expand_file_adinicb(struct inode *inode) |
155 | { | 160 | { |
156 | struct page *page; | 161 | struct page *page; |
@@ -169,9 +174,15 @@ int udf_expand_file_adinicb(struct inode *inode) | |||
169 | iinfo->i_alloc_type = ICBTAG_FLAG_AD_LONG; | 174 | iinfo->i_alloc_type = ICBTAG_FLAG_AD_LONG; |
170 | /* from now on we have normal address_space methods */ | 175 | /* from now on we have normal address_space methods */ |
171 | inode->i_data.a_ops = &udf_aops; | 176 | inode->i_data.a_ops = &udf_aops; |
177 | up_write(&iinfo->i_data_sem); | ||
172 | mark_inode_dirty(inode); | 178 | mark_inode_dirty(inode); |
173 | return 0; | 179 | return 0; |
174 | } | 180 | } |
181 | /* | ||
182 | * Release i_data_sem so that we can lock a page - page lock ranks | ||
183 | * above i_data_sem. i_mutex still protects us against file changes. | ||
184 | */ | ||
185 | up_write(&iinfo->i_data_sem); | ||
175 | 186 | ||
176 | page = find_or_create_page(inode->i_mapping, 0, GFP_NOFS); | 187 | page = find_or_create_page(inode->i_mapping, 0, GFP_NOFS); |
177 | if (!page) | 188 | if (!page) |
@@ -187,6 +198,7 @@ int udf_expand_file_adinicb(struct inode *inode) | |||
187 | SetPageUptodate(page); | 198 | SetPageUptodate(page); |
188 | kunmap(page); | 199 | kunmap(page); |
189 | } | 200 | } |
201 | down_write(&iinfo->i_data_sem); | ||
190 | memset(iinfo->i_ext.i_data + iinfo->i_lenEAttr, 0x00, | 202 | memset(iinfo->i_ext.i_data + iinfo->i_lenEAttr, 0x00, |
191 | iinfo->i_lenAlloc); | 203 | iinfo->i_lenAlloc); |
192 | iinfo->i_lenAlloc = 0; | 204 | iinfo->i_lenAlloc = 0; |
@@ -196,17 +208,20 @@ int udf_expand_file_adinicb(struct inode *inode) | |||
196 | iinfo->i_alloc_type = ICBTAG_FLAG_AD_LONG; | 208 | iinfo->i_alloc_type = ICBTAG_FLAG_AD_LONG; |
197 | /* from now on we have normal address_space methods */ | 209 | /* from now on we have normal address_space methods */ |
198 | inode->i_data.a_ops = &udf_aops; | 210 | inode->i_data.a_ops = &udf_aops; |
211 | up_write(&iinfo->i_data_sem); | ||
199 | err = inode->i_data.a_ops->writepage(page, &udf_wbc); | 212 | err = inode->i_data.a_ops->writepage(page, &udf_wbc); |
200 | if (err) { | 213 | if (err) { |
201 | /* Restore everything back so that we don't lose data... */ | 214 | /* Restore everything back so that we don't lose data... */ |
202 | lock_page(page); | 215 | lock_page(page); |
203 | kaddr = kmap(page); | 216 | kaddr = kmap(page); |
217 | down_write(&iinfo->i_data_sem); | ||
204 | memcpy(iinfo->i_ext.i_data + iinfo->i_lenEAttr, kaddr, | 218 | memcpy(iinfo->i_ext.i_data + iinfo->i_lenEAttr, kaddr, |
205 | inode->i_size); | 219 | inode->i_size); |
206 | kunmap(page); | 220 | kunmap(page); |
207 | unlock_page(page); | 221 | unlock_page(page); |
208 | iinfo->i_alloc_type = ICBTAG_FLAG_AD_IN_ICB; | 222 | iinfo->i_alloc_type = ICBTAG_FLAG_AD_IN_ICB; |
209 | inode->i_data.a_ops = &udf_adinicb_aops; | 223 | inode->i_data.a_ops = &udf_adinicb_aops; |
224 | up_write(&iinfo->i_data_sem); | ||
210 | } | 225 | } |
211 | page_cache_release(page); | 226 | page_cache_release(page); |
212 | mark_inode_dirty(inode); | 227 | mark_inode_dirty(inode); |
@@ -310,7 +325,6 @@ static int udf_get_block(struct inode *inode, sector_t block, | |||
310 | struct buffer_head *bh_result, int create) | 325 | struct buffer_head *bh_result, int create) |
311 | { | 326 | { |
312 | int err, new; | 327 | int err, new; |
313 | struct buffer_head *bh; | ||
314 | sector_t phys = 0; | 328 | sector_t phys = 0; |
315 | struct udf_inode_info *iinfo; | 329 | struct udf_inode_info *iinfo; |
316 | 330 | ||
@@ -323,7 +337,6 @@ static int udf_get_block(struct inode *inode, sector_t block, | |||
323 | 337 | ||
324 | err = -EIO; | 338 | err = -EIO; |
325 | new = 0; | 339 | new = 0; |
326 | bh = NULL; | ||
327 | iinfo = UDF_I(inode); | 340 | iinfo = UDF_I(inode); |
328 | 341 | ||
329 | down_write(&iinfo->i_data_sem); | 342 | down_write(&iinfo->i_data_sem); |
@@ -332,13 +345,10 @@ static int udf_get_block(struct inode *inode, sector_t block, | |||
332 | iinfo->i_next_alloc_goal++; | 345 | iinfo->i_next_alloc_goal++; |
333 | } | 346 | } |
334 | 347 | ||
335 | err = 0; | ||
336 | 348 | ||
337 | bh = inode_getblk(inode, block, &err, &phys, &new); | 349 | phys = inode_getblk(inode, block, &err, &new); |
338 | BUG_ON(bh); | 350 | if (!phys) |
339 | if (err) | ||
340 | goto abort; | 351 | goto abort; |
341 | BUG_ON(!phys); | ||
342 | 352 | ||
343 | if (new) | 353 | if (new) |
344 | set_buffer_new(bh_result); | 354 | set_buffer_new(bh_result); |
@@ -547,11 +557,10 @@ out: | |||
547 | return err; | 557 | return err; |
548 | } | 558 | } |
549 | 559 | ||
550 | static struct buffer_head *inode_getblk(struct inode *inode, sector_t block, | 560 | static sector_t inode_getblk(struct inode *inode, sector_t block, |
551 | int *err, sector_t *phys, int *new) | 561 | int *err, int *new) |
552 | { | 562 | { |
553 | static sector_t last_block; | 563 | static sector_t last_block; |
554 | struct buffer_head *result = NULL; | ||
555 | struct kernel_long_ad laarr[EXTENT_MERGE_SIZE]; | 564 | struct kernel_long_ad laarr[EXTENT_MERGE_SIZE]; |
556 | struct extent_position prev_epos, cur_epos, next_epos; | 565 | struct extent_position prev_epos, cur_epos, next_epos; |
557 | int count = 0, startnum = 0, endnum = 0; | 566 | int count = 0, startnum = 0, endnum = 0; |
@@ -566,6 +575,8 @@ static struct buffer_head *inode_getblk(struct inode *inode, sector_t block, | |||
566 | int goal = 0, pgoal = iinfo->i_location.logicalBlockNum; | 575 | int goal = 0, pgoal = iinfo->i_location.logicalBlockNum; |
567 | int lastblock = 0; | 576 | int lastblock = 0; |
568 | 577 | ||
578 | *err = 0; | ||
579 | *new = 0; | ||
569 | prev_epos.offset = udf_file_entry_alloc_offset(inode); | 580 | prev_epos.offset = udf_file_entry_alloc_offset(inode); |
570 | prev_epos.block = iinfo->i_location; | 581 | prev_epos.block = iinfo->i_location; |
571 | prev_epos.bh = NULL; | 582 | prev_epos.bh = NULL; |
@@ -635,8 +646,7 @@ static struct buffer_head *inode_getblk(struct inode *inode, sector_t block, | |||
635 | brelse(cur_epos.bh); | 646 | brelse(cur_epos.bh); |
636 | brelse(next_epos.bh); | 647 | brelse(next_epos.bh); |
637 | newblock = udf_get_lb_pblock(inode->i_sb, &eloc, offset); | 648 | newblock = udf_get_lb_pblock(inode->i_sb, &eloc, offset); |
638 | *phys = newblock; | 649 | return newblock; |
639 | return NULL; | ||
640 | } | 650 | } |
641 | 651 | ||
642 | last_block = block; | 652 | last_block = block; |
@@ -664,7 +674,7 @@ static struct buffer_head *inode_getblk(struct inode *inode, sector_t block, | |||
664 | brelse(cur_epos.bh); | 674 | brelse(cur_epos.bh); |
665 | brelse(next_epos.bh); | 675 | brelse(next_epos.bh); |
666 | *err = ret; | 676 | *err = ret; |
667 | return NULL; | 677 | return 0; |
668 | } | 678 | } |
669 | c = 0; | 679 | c = 0; |
670 | offset = 0; | 680 | offset = 0; |
@@ -729,7 +739,7 @@ static struct buffer_head *inode_getblk(struct inode *inode, sector_t block, | |||
729 | if (!newblocknum) { | 739 | if (!newblocknum) { |
730 | brelse(prev_epos.bh); | 740 | brelse(prev_epos.bh); |
731 | *err = -ENOSPC; | 741 | *err = -ENOSPC; |
732 | return NULL; | 742 | return 0; |
733 | } | 743 | } |
734 | iinfo->i_lenExtents += inode->i_sb->s_blocksize; | 744 | iinfo->i_lenExtents += inode->i_sb->s_blocksize; |
735 | } | 745 | } |
@@ -761,10 +771,10 @@ static struct buffer_head *inode_getblk(struct inode *inode, sector_t block, | |||
761 | 771 | ||
762 | newblock = udf_get_pblock(inode->i_sb, newblocknum, | 772 | newblock = udf_get_pblock(inode->i_sb, newblocknum, |
763 | iinfo->i_location.partitionReferenceNum, 0); | 773 | iinfo->i_location.partitionReferenceNum, 0); |
764 | if (!newblock) | 774 | if (!newblock) { |
765 | return NULL; | 775 | *err = -EIO; |
766 | *phys = newblock; | 776 | return 0; |
767 | *err = 0; | 777 | } |
768 | *new = 1; | 778 | *new = 1; |
769 | iinfo->i_next_alloc_block = block; | 779 | iinfo->i_next_alloc_block = block; |
770 | iinfo->i_next_alloc_goal = newblocknum; | 780 | iinfo->i_next_alloc_goal = newblocknum; |
@@ -775,7 +785,7 @@ static struct buffer_head *inode_getblk(struct inode *inode, sector_t block, | |||
775 | else | 785 | else |
776 | mark_inode_dirty(inode); | 786 | mark_inode_dirty(inode); |
777 | 787 | ||
778 | return result; | 788 | return newblock; |
779 | } | 789 | } |
780 | 790 | ||
781 | static void udf_split_extents(struct inode *inode, int *c, int offset, | 791 | static void udf_split_extents(struct inode *inode, int *c, int offset, |
@@ -1111,10 +1121,9 @@ int udf_setsize(struct inode *inode, loff_t newsize) | |||
1111 | if (bsize < | 1121 | if (bsize < |
1112 | (udf_file_entry_alloc_offset(inode) + newsize)) { | 1122 | (udf_file_entry_alloc_offset(inode) + newsize)) { |
1113 | err = udf_expand_file_adinicb(inode); | 1123 | err = udf_expand_file_adinicb(inode); |
1114 | if (err) { | 1124 | if (err) |
1115 | up_write(&iinfo->i_data_sem); | ||
1116 | return err; | 1125 | return err; |
1117 | } | 1126 | down_write(&iinfo->i_data_sem); |
1118 | } else | 1127 | } else |
1119 | iinfo->i_lenAlloc = newsize; | 1128 | iinfo->i_lenAlloc = newsize; |
1120 | } | 1129 | } |
diff --git a/fs/udf/super.c b/fs/udf/super.c index 0c33225647a0..c09a84daaf50 100644 --- a/fs/udf/super.c +++ b/fs/udf/super.c | |||
@@ -1798,6 +1798,12 @@ static void udf_close_lvid(struct super_block *sb) | |||
1798 | le16_to_cpu(lvid->descTag.descCRCLength))); | 1798 | le16_to_cpu(lvid->descTag.descCRCLength))); |
1799 | 1799 | ||
1800 | lvid->descTag.tagChecksum = udf_tag_checksum(&lvid->descTag); | 1800 | lvid->descTag.tagChecksum = udf_tag_checksum(&lvid->descTag); |
1801 | /* | ||
1802 | * We set buffer uptodate unconditionally here to avoid spurious | ||
1803 | * warnings from mark_buffer_dirty() when previous EIO has marked | ||
1804 | * the buffer as !uptodate | ||
1805 | */ | ||
1806 | set_buffer_uptodate(bh); | ||
1801 | mark_buffer_dirty(bh); | 1807 | mark_buffer_dirty(bh); |
1802 | sbi->s_lvid_dirty = 0; | 1808 | sbi->s_lvid_dirty = 0; |
1803 | mutex_unlock(&sbi->s_alloc_mutex); | 1809 | mutex_unlock(&sbi->s_alloc_mutex); |
diff --git a/fs/udf/symlink.c b/fs/udf/symlink.c index b1d4488b0f14..d7c6dbe4194b 100644 --- a/fs/udf/symlink.c +++ b/fs/udf/symlink.c | |||
@@ -41,10 +41,16 @@ static void udf_pc_to_char(struct super_block *sb, unsigned char *from, | |||
41 | pc = (struct pathComponent *)(from + elen); | 41 | pc = (struct pathComponent *)(from + elen); |
42 | switch (pc->componentType) { | 42 | switch (pc->componentType) { |
43 | case 1: | 43 | case 1: |
44 | if (pc->lengthComponentIdent == 0) { | 44 | /* |
45 | p = to; | 45 | * Symlink points to some place which should be agreed |
46 | *p++ = '/'; | 46 | * upon between originator and receiver of the media. Ignore. |
47 | } | 47 | */ |
48 | if (pc->lengthComponentIdent > 0) | ||
49 | break; | ||
50 | /* Fall through */ | ||
51 | case 2: | ||
52 | p = to; | ||
53 | *p++ = '/'; | ||
48 | break; | 54 | break; |
49 | case 3: | 55 | case 3: |
50 | memcpy(p, "../", 3); | 56 | memcpy(p, "../", 3); |
diff --git a/fs/xfs/xfs_aops.c b/fs/xfs/xfs_aops.c index 574d4ee9b625..74b9baf36ac3 100644 --- a/fs/xfs/xfs_aops.c +++ b/fs/xfs/xfs_aops.c | |||
@@ -111,8 +111,7 @@ xfs_ioend_new_eof( | |||
111 | xfs_fsize_t bsize; | 111 | xfs_fsize_t bsize; |
112 | 112 | ||
113 | bsize = ioend->io_offset + ioend->io_size; | 113 | bsize = ioend->io_offset + ioend->io_size; |
114 | isize = MAX(ip->i_size, ip->i_new_size); | 114 | isize = MIN(i_size_read(VFS_I(ip)), bsize); |
115 | isize = MIN(isize, bsize); | ||
116 | return isize > ip->i_d.di_size ? isize : 0; | 115 | return isize > ip->i_d.di_size ? isize : 0; |
117 | } | 116 | } |
118 | 117 | ||
@@ -126,11 +125,7 @@ static inline bool xfs_ioend_is_append(struct xfs_ioend *ioend) | |||
126 | } | 125 | } |
127 | 126 | ||
128 | /* | 127 | /* |
129 | * Update on-disk file size now that data has been written to disk. The | 128 | * Update on-disk file size now that data has been written to disk. |
130 | * current in-memory file size is i_size. If a write is beyond eof i_new_size | ||
131 | * will be the intended file size until i_size is updated. If this write does | ||
132 | * not extend all the way to the valid file size then restrict this update to | ||
133 | * the end of the write. | ||
134 | * | 129 | * |
135 | * This function does not block as blocking on the inode lock in IO completion | 130 | * This function does not block as blocking on the inode lock in IO completion |
136 | * can lead to IO completion order dependency deadlocks.. If it can't get the | 131 | * can lead to IO completion order dependency deadlocks.. If it can't get the |
@@ -1279,6 +1274,15 @@ xfs_end_io_direct_write( | |||
1279 | struct xfs_ioend *ioend = iocb->private; | 1274 | struct xfs_ioend *ioend = iocb->private; |
1280 | 1275 | ||
1281 | /* | 1276 | /* |
1277 | * While the generic direct I/O code updates the inode size, it does | ||
1278 | * so only after the end_io handler is called, which means our | ||
1279 | * end_io handler thinks the on-disk size is outside the in-core | ||
1280 | * size. To prevent this just update it a little bit earlier here. | ||
1281 | */ | ||
1282 | if (offset + size > i_size_read(ioend->io_inode)) | ||
1283 | i_size_write(ioend->io_inode, offset + size); | ||
1284 | |||
1285 | /* | ||
1282 | * blockdev_direct_IO can return an error even after the I/O | 1286 | * blockdev_direct_IO can return an error even after the I/O |
1283 | * completion handler was called. Thus we need to protect | 1287 | * completion handler was called. Thus we need to protect |
1284 | * against double-freeing. | 1288 | * against double-freeing. |
@@ -1340,12 +1344,11 @@ xfs_vm_write_failed( | |||
1340 | 1344 | ||
1341 | if (to > inode->i_size) { | 1345 | if (to > inode->i_size) { |
1342 | /* | 1346 | /* |
1343 | * punch out the delalloc blocks we have already allocated. We | 1347 | * Punch out the delalloc blocks we have already allocated. |
1344 | * don't call xfs_setattr() to do this as we may be in the | 1348 | * |
1345 | * middle of a multi-iovec write and so the vfs inode->i_size | 1349 | * Don't bother with xfs_setattr given that nothing can have |
1346 | * will not match the xfs ip->i_size and so it will zero too | 1350 | * made it to disk yet as the page is still locked at this |
1347 | * much. Hence we jus truncate the page cache to zero what is | 1351 | * point. |
1348 | * necessary and punch the delalloc blocks directly. | ||
1349 | */ | 1352 | */ |
1350 | struct xfs_inode *ip = XFS_I(inode); | 1353 | struct xfs_inode *ip = XFS_I(inode); |
1351 | xfs_fileoff_t start_fsb; | 1354 | xfs_fileoff_t start_fsb; |
diff --git a/fs/xfs/xfs_attr.c b/fs/xfs/xfs_attr.c index 1e5d97f86ea8..08b9ac644c31 100644 --- a/fs/xfs/xfs_attr.c +++ b/fs/xfs/xfs_attr.c | |||
@@ -827,10 +827,6 @@ xfs_attr_inactive(xfs_inode_t *dp) | |||
827 | if (error) | 827 | if (error) |
828 | goto out; | 828 | goto out; |
829 | 829 | ||
830 | /* | ||
831 | * Commit the last in the sequence of transactions. | ||
832 | */ | ||
833 | xfs_trans_log_inode(trans, dp, XFS_ILOG_CORE); | ||
834 | error = xfs_trans_commit(trans, XFS_TRANS_RELEASE_LOG_RES); | 830 | error = xfs_trans_commit(trans, XFS_TRANS_RELEASE_LOG_RES); |
835 | xfs_iunlock(dp, XFS_ILOCK_EXCL); | 831 | xfs_iunlock(dp, XFS_ILOCK_EXCL); |
836 | 832 | ||
diff --git a/fs/xfs/xfs_attr_leaf.c b/fs/xfs/xfs_attr_leaf.c index c1b55e596551..d25eafd4d28d 100644 --- a/fs/xfs/xfs_attr_leaf.c +++ b/fs/xfs/xfs_attr_leaf.c | |||
@@ -271,10 +271,6 @@ xfs_attr_shortform_add(xfs_da_args_t *args, int forkoff) | |||
271 | dp = args->dp; | 271 | dp = args->dp; |
272 | mp = dp->i_mount; | 272 | mp = dp->i_mount; |
273 | dp->i_d.di_forkoff = forkoff; | 273 | dp->i_d.di_forkoff = forkoff; |
274 | dp->i_df.if_ext_max = | ||
275 | XFS_IFORK_DSIZE(dp) / (uint)sizeof(xfs_bmbt_rec_t); | ||
276 | dp->i_afp->if_ext_max = | ||
277 | XFS_IFORK_ASIZE(dp) / (uint)sizeof(xfs_bmbt_rec_t); | ||
278 | 274 | ||
279 | ifp = dp->i_afp; | 275 | ifp = dp->i_afp; |
280 | ASSERT(ifp->if_flags & XFS_IFINLINE); | 276 | ASSERT(ifp->if_flags & XFS_IFINLINE); |
@@ -326,7 +322,6 @@ xfs_attr_fork_reset( | |||
326 | ASSERT(ip->i_d.di_anextents == 0); | 322 | ASSERT(ip->i_d.di_anextents == 0); |
327 | ASSERT(ip->i_afp == NULL); | 323 | ASSERT(ip->i_afp == NULL); |
328 | 324 | ||
329 | ip->i_df.if_ext_max = XFS_IFORK_DSIZE(ip) / sizeof(xfs_bmbt_rec_t); | ||
330 | xfs_trans_log_inode(tp, ip, XFS_ILOG_CORE); | 325 | xfs_trans_log_inode(tp, ip, XFS_ILOG_CORE); |
331 | } | 326 | } |
332 | 327 | ||
@@ -389,10 +384,6 @@ xfs_attr_shortform_remove(xfs_da_args_t *args) | |||
389 | (args->op_flags & XFS_DA_OP_ADDNAME) || | 384 | (args->op_flags & XFS_DA_OP_ADDNAME) || |
390 | !(mp->m_flags & XFS_MOUNT_ATTR2) || | 385 | !(mp->m_flags & XFS_MOUNT_ATTR2) || |
391 | dp->i_d.di_format == XFS_DINODE_FMT_BTREE); | 386 | dp->i_d.di_format == XFS_DINODE_FMT_BTREE); |
392 | dp->i_afp->if_ext_max = | ||
393 | XFS_IFORK_ASIZE(dp) / (uint)sizeof(xfs_bmbt_rec_t); | ||
394 | dp->i_df.if_ext_max = | ||
395 | XFS_IFORK_DSIZE(dp) / (uint)sizeof(xfs_bmbt_rec_t); | ||
396 | xfs_trans_log_inode(args->trans, dp, | 387 | xfs_trans_log_inode(args->trans, dp, |
397 | XFS_ILOG_CORE | XFS_ILOG_ADATA); | 388 | XFS_ILOG_CORE | XFS_ILOG_ADATA); |
398 | } | 389 | } |
diff --git a/fs/xfs/xfs_bmap.c b/fs/xfs/xfs_bmap.c index d0ab78837057..188ef2fbd628 100644 --- a/fs/xfs/xfs_bmap.c +++ b/fs/xfs/xfs_bmap.c | |||
@@ -249,7 +249,27 @@ xfs_bmbt_lookup_ge( | |||
249 | } | 249 | } |
250 | 250 | ||
251 | /* | 251 | /* |
252 | * Update the record referred to by cur to the value given | 252 | * Check if the inode needs to be converted to btree format. |
253 | */ | ||
254 | static inline bool xfs_bmap_needs_btree(struct xfs_inode *ip, int whichfork) | ||
255 | { | ||
256 | return XFS_IFORK_FORMAT(ip, whichfork) == XFS_DINODE_FMT_EXTENTS && | ||
257 | XFS_IFORK_NEXTENTS(ip, whichfork) > | ||
258 | XFS_IFORK_MAXEXT(ip, whichfork); | ||
259 | } | ||
260 | |||
261 | /* | ||
262 | * Check if the inode should be converted to extent format. | ||
263 | */ | ||
264 | static inline bool xfs_bmap_wants_extents(struct xfs_inode *ip, int whichfork) | ||
265 | { | ||
266 | return XFS_IFORK_FORMAT(ip, whichfork) == XFS_DINODE_FMT_BTREE && | ||
267 | XFS_IFORK_NEXTENTS(ip, whichfork) <= | ||
268 | XFS_IFORK_MAXEXT(ip, whichfork); | ||
269 | } | ||
270 | |||
271 | /* | ||
272 | * Update the record referred to by cur to the value given | ||
253 | * by [off, bno, len, state]. | 273 | * by [off, bno, len, state]. |
254 | * This either works (return 0) or gets an EFSCORRUPTED error. | 274 | * This either works (return 0) or gets an EFSCORRUPTED error. |
255 | */ | 275 | */ |
@@ -683,8 +703,8 @@ xfs_bmap_add_extent_delay_real( | |||
683 | goto done; | 703 | goto done; |
684 | XFS_WANT_CORRUPTED_GOTO(i == 1, done); | 704 | XFS_WANT_CORRUPTED_GOTO(i == 1, done); |
685 | } | 705 | } |
686 | if (bma->ip->i_d.di_format == XFS_DINODE_FMT_EXTENTS && | 706 | |
687 | bma->ip->i_d.di_nextents > bma->ip->i_df.if_ext_max) { | 707 | if (xfs_bmap_needs_btree(bma->ip, XFS_DATA_FORK)) { |
688 | error = xfs_bmap_extents_to_btree(bma->tp, bma->ip, | 708 | error = xfs_bmap_extents_to_btree(bma->tp, bma->ip, |
689 | bma->firstblock, bma->flist, | 709 | bma->firstblock, bma->flist, |
690 | &bma->cur, 1, &tmp_rval, XFS_DATA_FORK); | 710 | &bma->cur, 1, &tmp_rval, XFS_DATA_FORK); |
@@ -767,8 +787,8 @@ xfs_bmap_add_extent_delay_real( | |||
767 | goto done; | 787 | goto done; |
768 | XFS_WANT_CORRUPTED_GOTO(i == 1, done); | 788 | XFS_WANT_CORRUPTED_GOTO(i == 1, done); |
769 | } | 789 | } |
770 | if (bma->ip->i_d.di_format == XFS_DINODE_FMT_EXTENTS && | 790 | |
771 | bma->ip->i_d.di_nextents > bma->ip->i_df.if_ext_max) { | 791 | if (xfs_bmap_needs_btree(bma->ip, XFS_DATA_FORK)) { |
772 | error = xfs_bmap_extents_to_btree(bma->tp, bma->ip, | 792 | error = xfs_bmap_extents_to_btree(bma->tp, bma->ip, |
773 | bma->firstblock, bma->flist, &bma->cur, 1, | 793 | bma->firstblock, bma->flist, &bma->cur, 1, |
774 | &tmp_rval, XFS_DATA_FORK); | 794 | &tmp_rval, XFS_DATA_FORK); |
@@ -836,8 +856,8 @@ xfs_bmap_add_extent_delay_real( | |||
836 | goto done; | 856 | goto done; |
837 | XFS_WANT_CORRUPTED_GOTO(i == 1, done); | 857 | XFS_WANT_CORRUPTED_GOTO(i == 1, done); |
838 | } | 858 | } |
839 | if (bma->ip->i_d.di_format == XFS_DINODE_FMT_EXTENTS && | 859 | |
840 | bma->ip->i_d.di_nextents > bma->ip->i_df.if_ext_max) { | 860 | if (xfs_bmap_needs_btree(bma->ip, XFS_DATA_FORK)) { |
841 | error = xfs_bmap_extents_to_btree(bma->tp, bma->ip, | 861 | error = xfs_bmap_extents_to_btree(bma->tp, bma->ip, |
842 | bma->firstblock, bma->flist, &bma->cur, | 862 | bma->firstblock, bma->flist, &bma->cur, |
843 | 1, &tmp_rval, XFS_DATA_FORK); | 863 | 1, &tmp_rval, XFS_DATA_FORK); |
@@ -884,8 +904,7 @@ xfs_bmap_add_extent_delay_real( | |||
884 | } | 904 | } |
885 | 905 | ||
886 | /* convert to a btree if necessary */ | 906 | /* convert to a btree if necessary */ |
887 | if (XFS_IFORK_FORMAT(bma->ip, XFS_DATA_FORK) == XFS_DINODE_FMT_EXTENTS && | 907 | if (xfs_bmap_needs_btree(bma->ip, XFS_DATA_FORK)) { |
888 | XFS_IFORK_NEXTENTS(bma->ip, XFS_DATA_FORK) > ifp->if_ext_max) { | ||
889 | int tmp_logflags; /* partial log flag return val */ | 908 | int tmp_logflags; /* partial log flag return val */ |
890 | 909 | ||
891 | ASSERT(bma->cur == NULL); | 910 | ASSERT(bma->cur == NULL); |
@@ -1421,8 +1440,7 @@ xfs_bmap_add_extent_unwritten_real( | |||
1421 | } | 1440 | } |
1422 | 1441 | ||
1423 | /* convert to a btree if necessary */ | 1442 | /* convert to a btree if necessary */ |
1424 | if (XFS_IFORK_FORMAT(ip, XFS_DATA_FORK) == XFS_DINODE_FMT_EXTENTS && | 1443 | if (xfs_bmap_needs_btree(ip, XFS_DATA_FORK)) { |
1425 | XFS_IFORK_NEXTENTS(ip, XFS_DATA_FORK) > ifp->if_ext_max) { | ||
1426 | int tmp_logflags; /* partial log flag return val */ | 1444 | int tmp_logflags; /* partial log flag return val */ |
1427 | 1445 | ||
1428 | ASSERT(cur == NULL); | 1446 | ASSERT(cur == NULL); |
@@ -1812,8 +1830,7 @@ xfs_bmap_add_extent_hole_real( | |||
1812 | } | 1830 | } |
1813 | 1831 | ||
1814 | /* convert to a btree if necessary */ | 1832 | /* convert to a btree if necessary */ |
1815 | if (XFS_IFORK_FORMAT(bma->ip, whichfork) == XFS_DINODE_FMT_EXTENTS && | 1833 | if (xfs_bmap_needs_btree(bma->ip, whichfork)) { |
1816 | XFS_IFORK_NEXTENTS(bma->ip, whichfork) > ifp->if_ext_max) { | ||
1817 | int tmp_logflags; /* partial log flag return val */ | 1834 | int tmp_logflags; /* partial log flag return val */ |
1818 | 1835 | ||
1819 | ASSERT(bma->cur == NULL); | 1836 | ASSERT(bma->cur == NULL); |
@@ -3037,8 +3054,7 @@ xfs_bmap_extents_to_btree( | |||
3037 | 3054 | ||
3038 | ifp = XFS_IFORK_PTR(ip, whichfork); | 3055 | ifp = XFS_IFORK_PTR(ip, whichfork); |
3039 | ASSERT(XFS_IFORK_FORMAT(ip, whichfork) == XFS_DINODE_FMT_EXTENTS); | 3056 | ASSERT(XFS_IFORK_FORMAT(ip, whichfork) == XFS_DINODE_FMT_EXTENTS); |
3040 | ASSERT(ifp->if_ext_max == | 3057 | |
3041 | XFS_IFORK_SIZE(ip, whichfork) / (uint)sizeof(xfs_bmbt_rec_t)); | ||
3042 | /* | 3058 | /* |
3043 | * Make space in the inode incore. | 3059 | * Make space in the inode incore. |
3044 | */ | 3060 | */ |
@@ -3184,13 +3200,8 @@ xfs_bmap_forkoff_reset( | |||
3184 | ip->i_d.di_format != XFS_DINODE_FMT_BTREE) { | 3200 | ip->i_d.di_format != XFS_DINODE_FMT_BTREE) { |
3185 | uint dfl_forkoff = xfs_default_attroffset(ip) >> 3; | 3201 | uint dfl_forkoff = xfs_default_attroffset(ip) >> 3; |
3186 | 3202 | ||
3187 | if (dfl_forkoff > ip->i_d.di_forkoff) { | 3203 | if (dfl_forkoff > ip->i_d.di_forkoff) |
3188 | ip->i_d.di_forkoff = dfl_forkoff; | 3204 | ip->i_d.di_forkoff = dfl_forkoff; |
3189 | ip->i_df.if_ext_max = | ||
3190 | XFS_IFORK_DSIZE(ip) / sizeof(xfs_bmbt_rec_t); | ||
3191 | ip->i_afp->if_ext_max = | ||
3192 | XFS_IFORK_ASIZE(ip) / sizeof(xfs_bmbt_rec_t); | ||
3193 | } | ||
3194 | } | 3205 | } |
3195 | } | 3206 | } |
3196 | 3207 | ||
@@ -3430,8 +3441,6 @@ xfs_bmap_add_attrfork( | |||
3430 | int error; /* error return value */ | 3441 | int error; /* error return value */ |
3431 | 3442 | ||
3432 | ASSERT(XFS_IFORK_Q(ip) == 0); | 3443 | ASSERT(XFS_IFORK_Q(ip) == 0); |
3433 | ASSERT(ip->i_df.if_ext_max == | ||
3434 | XFS_IFORK_DSIZE(ip) / (uint)sizeof(xfs_bmbt_rec_t)); | ||
3435 | 3444 | ||
3436 | mp = ip->i_mount; | 3445 | mp = ip->i_mount; |
3437 | ASSERT(!XFS_NOT_DQATTACHED(mp, ip)); | 3446 | ASSERT(!XFS_NOT_DQATTACHED(mp, ip)); |
@@ -3486,12 +3495,9 @@ xfs_bmap_add_attrfork( | |||
3486 | error = XFS_ERROR(EINVAL); | 3495 | error = XFS_ERROR(EINVAL); |
3487 | goto error1; | 3496 | goto error1; |
3488 | } | 3497 | } |
3489 | ip->i_df.if_ext_max = | 3498 | |
3490 | XFS_IFORK_DSIZE(ip) / (uint)sizeof(xfs_bmbt_rec_t); | ||
3491 | ASSERT(ip->i_afp == NULL); | 3499 | ASSERT(ip->i_afp == NULL); |
3492 | ip->i_afp = kmem_zone_zalloc(xfs_ifork_zone, KM_SLEEP); | 3500 | ip->i_afp = kmem_zone_zalloc(xfs_ifork_zone, KM_SLEEP); |
3493 | ip->i_afp->if_ext_max = | ||
3494 | XFS_IFORK_ASIZE(ip) / (uint)sizeof(xfs_bmbt_rec_t); | ||
3495 | ip->i_afp->if_flags = XFS_IFEXTENTS; | 3501 | ip->i_afp->if_flags = XFS_IFEXTENTS; |
3496 | logflags = 0; | 3502 | logflags = 0; |
3497 | xfs_bmap_init(&flist, &firstblock); | 3503 | xfs_bmap_init(&flist, &firstblock); |
@@ -3535,20 +3541,17 @@ xfs_bmap_add_attrfork( | |||
3535 | } else | 3541 | } else |
3536 | spin_unlock(&mp->m_sb_lock); | 3542 | spin_unlock(&mp->m_sb_lock); |
3537 | } | 3543 | } |
3538 | if ((error = xfs_bmap_finish(&tp, &flist, &committed))) | 3544 | |
3545 | error = xfs_bmap_finish(&tp, &flist, &committed); | ||
3546 | if (error) | ||
3539 | goto error2; | 3547 | goto error2; |
3540 | error = xfs_trans_commit(tp, XFS_TRANS_RELEASE_LOG_RES); | 3548 | return xfs_trans_commit(tp, XFS_TRANS_RELEASE_LOG_RES); |
3541 | ASSERT(ip->i_df.if_ext_max == | ||
3542 | XFS_IFORK_DSIZE(ip) / (uint)sizeof(xfs_bmbt_rec_t)); | ||
3543 | return error; | ||
3544 | error2: | 3549 | error2: |
3545 | xfs_bmap_cancel(&flist); | 3550 | xfs_bmap_cancel(&flist); |
3546 | error1: | 3551 | error1: |
3547 | xfs_iunlock(ip, XFS_ILOCK_EXCL); | 3552 | xfs_iunlock(ip, XFS_ILOCK_EXCL); |
3548 | error0: | 3553 | error0: |
3549 | xfs_trans_cancel(tp, XFS_TRANS_RELEASE_LOG_RES|XFS_TRANS_ABORT); | 3554 | xfs_trans_cancel(tp, XFS_TRANS_RELEASE_LOG_RES|XFS_TRANS_ABORT); |
3550 | ASSERT(ip->i_df.if_ext_max == | ||
3551 | XFS_IFORK_DSIZE(ip) / (uint)sizeof(xfs_bmbt_rec_t)); | ||
3552 | return error; | 3555 | return error; |
3553 | } | 3556 | } |
3554 | 3557 | ||
@@ -3994,11 +3997,8 @@ xfs_bmap_one_block( | |||
3994 | xfs_bmbt_irec_t s; /* internal version of extent */ | 3997 | xfs_bmbt_irec_t s; /* internal version of extent */ |
3995 | 3998 | ||
3996 | #ifndef DEBUG | 3999 | #ifndef DEBUG |
3997 | if (whichfork == XFS_DATA_FORK) { | 4000 | if (whichfork == XFS_DATA_FORK) |
3998 | return S_ISREG(ip->i_d.di_mode) ? | 4001 | return XFS_ISIZE(ip) == ip->i_mount->m_sb.sb_blocksize; |
3999 | (ip->i_size == ip->i_mount->m_sb.sb_blocksize) : | ||
4000 | (ip->i_d.di_size == ip->i_mount->m_sb.sb_blocksize); | ||
4001 | } | ||
4002 | #endif /* !DEBUG */ | 4002 | #endif /* !DEBUG */ |
4003 | if (XFS_IFORK_NEXTENTS(ip, whichfork) != 1) | 4003 | if (XFS_IFORK_NEXTENTS(ip, whichfork) != 1) |
4004 | return 0; | 4004 | return 0; |
@@ -4010,7 +4010,7 @@ xfs_bmap_one_block( | |||
4010 | xfs_bmbt_get_all(ep, &s); | 4010 | xfs_bmbt_get_all(ep, &s); |
4011 | rval = s.br_startoff == 0 && s.br_blockcount == 1; | 4011 | rval = s.br_startoff == 0 && s.br_blockcount == 1; |
4012 | if (rval && whichfork == XFS_DATA_FORK) | 4012 | if (rval && whichfork == XFS_DATA_FORK) |
4013 | ASSERT(ip->i_size == ip->i_mount->m_sb.sb_blocksize); | 4013 | ASSERT(XFS_ISIZE(ip) == ip->i_mount->m_sb.sb_blocksize); |
4014 | return rval; | 4014 | return rval; |
4015 | } | 4015 | } |
4016 | 4016 | ||
@@ -4379,8 +4379,6 @@ xfs_bmapi_read( | |||
4379 | XFS_STATS_INC(xs_blk_mapr); | 4379 | XFS_STATS_INC(xs_blk_mapr); |
4380 | 4380 | ||
4381 | ifp = XFS_IFORK_PTR(ip, whichfork); | 4381 | ifp = XFS_IFORK_PTR(ip, whichfork); |
4382 | ASSERT(ifp->if_ext_max == | ||
4383 | XFS_IFORK_SIZE(ip, whichfork) / (uint)sizeof(xfs_bmbt_rec_t)); | ||
4384 | 4382 | ||
4385 | if (!(ifp->if_flags & XFS_IFEXTENTS)) { | 4383 | if (!(ifp->if_flags & XFS_IFEXTENTS)) { |
4386 | error = xfs_iread_extents(NULL, ip, whichfork); | 4384 | error = xfs_iread_extents(NULL, ip, whichfork); |
@@ -4871,8 +4869,6 @@ xfs_bmapi_write( | |||
4871 | return XFS_ERROR(EIO); | 4869 | return XFS_ERROR(EIO); |
4872 | 4870 | ||
4873 | ifp = XFS_IFORK_PTR(ip, whichfork); | 4871 | ifp = XFS_IFORK_PTR(ip, whichfork); |
4874 | ASSERT(ifp->if_ext_max == | ||
4875 | XFS_IFORK_SIZE(ip, whichfork) / (uint)sizeof(xfs_bmbt_rec_t)); | ||
4876 | 4872 | ||
4877 | XFS_STATS_INC(xs_blk_mapw); | 4873 | XFS_STATS_INC(xs_blk_mapw); |
4878 | 4874 | ||
@@ -4981,8 +4977,7 @@ xfs_bmapi_write( | |||
4981 | /* | 4977 | /* |
4982 | * Transform from btree to extents, give it cur. | 4978 | * Transform from btree to extents, give it cur. |
4983 | */ | 4979 | */ |
4984 | if (XFS_IFORK_FORMAT(ip, whichfork) == XFS_DINODE_FMT_BTREE && | 4980 | if (xfs_bmap_wants_extents(ip, whichfork)) { |
4985 | XFS_IFORK_NEXTENTS(ip, whichfork) <= ifp->if_ext_max) { | ||
4986 | int tmp_logflags = 0; | 4981 | int tmp_logflags = 0; |
4987 | 4982 | ||
4988 | ASSERT(bma.cur); | 4983 | ASSERT(bma.cur); |
@@ -4992,10 +4987,10 @@ xfs_bmapi_write( | |||
4992 | if (error) | 4987 | if (error) |
4993 | goto error0; | 4988 | goto error0; |
4994 | } | 4989 | } |
4995 | ASSERT(ifp->if_ext_max == | 4990 | |
4996 | XFS_IFORK_SIZE(ip, whichfork) / (uint)sizeof(xfs_bmbt_rec_t)); | ||
4997 | ASSERT(XFS_IFORK_FORMAT(ip, whichfork) != XFS_DINODE_FMT_BTREE || | 4991 | ASSERT(XFS_IFORK_FORMAT(ip, whichfork) != XFS_DINODE_FMT_BTREE || |
4998 | XFS_IFORK_NEXTENTS(ip, whichfork) > ifp->if_ext_max); | 4992 | XFS_IFORK_NEXTENTS(ip, whichfork) > |
4993 | XFS_IFORK_MAXEXT(ip, whichfork)); | ||
4999 | error = 0; | 4994 | error = 0; |
5000 | error0: | 4995 | error0: |
5001 | /* | 4996 | /* |
@@ -5095,8 +5090,7 @@ xfs_bunmapi( | |||
5095 | 5090 | ||
5096 | ASSERT(len > 0); | 5091 | ASSERT(len > 0); |
5097 | ASSERT(nexts >= 0); | 5092 | ASSERT(nexts >= 0); |
5098 | ASSERT(ifp->if_ext_max == | 5093 | |
5099 | XFS_IFORK_SIZE(ip, whichfork) / (uint)sizeof(xfs_bmbt_rec_t)); | ||
5100 | if (!(ifp->if_flags & XFS_IFEXTENTS) && | 5094 | if (!(ifp->if_flags & XFS_IFEXTENTS) && |
5101 | (error = xfs_iread_extents(tp, ip, whichfork))) | 5095 | (error = xfs_iread_extents(tp, ip, whichfork))) |
5102 | return error; | 5096 | return error; |
@@ -5322,7 +5316,8 @@ xfs_bunmapi( | |||
5322 | */ | 5316 | */ |
5323 | if (!wasdel && xfs_trans_get_block_res(tp) == 0 && | 5317 | if (!wasdel && xfs_trans_get_block_res(tp) == 0 && |
5324 | XFS_IFORK_FORMAT(ip, whichfork) == XFS_DINODE_FMT_EXTENTS && | 5318 | XFS_IFORK_FORMAT(ip, whichfork) == XFS_DINODE_FMT_EXTENTS && |
5325 | XFS_IFORK_NEXTENTS(ip, whichfork) >= ifp->if_ext_max && | 5319 | XFS_IFORK_NEXTENTS(ip, whichfork) >= /* Note the >= */ |
5320 | XFS_IFORK_MAXEXT(ip, whichfork) && | ||
5326 | del.br_startoff > got.br_startoff && | 5321 | del.br_startoff > got.br_startoff && |
5327 | del.br_startoff + del.br_blockcount < | 5322 | del.br_startoff + del.br_blockcount < |
5328 | got.br_startoff + got.br_blockcount) { | 5323 | got.br_startoff + got.br_blockcount) { |
@@ -5353,13 +5348,11 @@ nodelete: | |||
5353 | } | 5348 | } |
5354 | } | 5349 | } |
5355 | *done = bno == (xfs_fileoff_t)-1 || bno < start || lastx < 0; | 5350 | *done = bno == (xfs_fileoff_t)-1 || bno < start || lastx < 0; |
5356 | ASSERT(ifp->if_ext_max == | 5351 | |
5357 | XFS_IFORK_SIZE(ip, whichfork) / (uint)sizeof(xfs_bmbt_rec_t)); | ||
5358 | /* | 5352 | /* |
5359 | * Convert to a btree if necessary. | 5353 | * Convert to a btree if necessary. |
5360 | */ | 5354 | */ |
5361 | if (XFS_IFORK_FORMAT(ip, whichfork) == XFS_DINODE_FMT_EXTENTS && | 5355 | if (xfs_bmap_needs_btree(ip, whichfork)) { |
5362 | XFS_IFORK_NEXTENTS(ip, whichfork) > ifp->if_ext_max) { | ||
5363 | ASSERT(cur == NULL); | 5356 | ASSERT(cur == NULL); |
5364 | error = xfs_bmap_extents_to_btree(tp, ip, firstblock, flist, | 5357 | error = xfs_bmap_extents_to_btree(tp, ip, firstblock, flist, |
5365 | &cur, 0, &tmp_logflags, whichfork); | 5358 | &cur, 0, &tmp_logflags, whichfork); |
@@ -5370,8 +5363,7 @@ nodelete: | |||
5370 | /* | 5363 | /* |
5371 | * transform from btree to extents, give it cur | 5364 | * transform from btree to extents, give it cur |
5372 | */ | 5365 | */ |
5373 | else if (XFS_IFORK_FORMAT(ip, whichfork) == XFS_DINODE_FMT_BTREE && | 5366 | else if (xfs_bmap_wants_extents(ip, whichfork)) { |
5374 | XFS_IFORK_NEXTENTS(ip, whichfork) <= ifp->if_ext_max) { | ||
5375 | ASSERT(cur != NULL); | 5367 | ASSERT(cur != NULL); |
5376 | error = xfs_bmap_btree_to_extents(tp, ip, cur, &tmp_logflags, | 5368 | error = xfs_bmap_btree_to_extents(tp, ip, cur, &tmp_logflags, |
5377 | whichfork); | 5369 | whichfork); |
@@ -5382,8 +5374,6 @@ nodelete: | |||
5382 | /* | 5374 | /* |
5383 | * transform from extents to local? | 5375 | * transform from extents to local? |
5384 | */ | 5376 | */ |
5385 | ASSERT(ifp->if_ext_max == | ||
5386 | XFS_IFORK_SIZE(ip, whichfork) / (uint)sizeof(xfs_bmbt_rec_t)); | ||
5387 | error = 0; | 5377 | error = 0; |
5388 | error0: | 5378 | error0: |
5389 | /* | 5379 | /* |
@@ -5434,7 +5424,7 @@ xfs_getbmapx_fix_eof_hole( | |||
5434 | if (startblock == HOLESTARTBLOCK) { | 5424 | if (startblock == HOLESTARTBLOCK) { |
5435 | mp = ip->i_mount; | 5425 | mp = ip->i_mount; |
5436 | out->bmv_block = -1; | 5426 | out->bmv_block = -1; |
5437 | fixlen = XFS_FSB_TO_BB(mp, XFS_B_TO_FSB(mp, ip->i_size)); | 5427 | fixlen = XFS_FSB_TO_BB(mp, XFS_B_TO_FSB(mp, XFS_ISIZE(ip))); |
5438 | fixlen -= out->bmv_offset; | 5428 | fixlen -= out->bmv_offset; |
5439 | if (prealloced && out->bmv_offset + out->bmv_length == end) { | 5429 | if (prealloced && out->bmv_offset + out->bmv_length == end) { |
5440 | /* Came to hole at EOF. Trim it. */ | 5430 | /* Came to hole at EOF. Trim it. */ |
@@ -5522,7 +5512,7 @@ xfs_getbmap( | |||
5522 | fixlen = XFS_MAXIOFFSET(mp); | 5512 | fixlen = XFS_MAXIOFFSET(mp); |
5523 | } else { | 5513 | } else { |
5524 | prealloced = 0; | 5514 | prealloced = 0; |
5525 | fixlen = ip->i_size; | 5515 | fixlen = XFS_ISIZE(ip); |
5526 | } | 5516 | } |
5527 | } | 5517 | } |
5528 | 5518 | ||
@@ -5551,7 +5541,7 @@ xfs_getbmap( | |||
5551 | 5541 | ||
5552 | xfs_ilock(ip, XFS_IOLOCK_SHARED); | 5542 | xfs_ilock(ip, XFS_IOLOCK_SHARED); |
5553 | if (whichfork == XFS_DATA_FORK && !(iflags & BMV_IF_DELALLOC)) { | 5543 | if (whichfork == XFS_DATA_FORK && !(iflags & BMV_IF_DELALLOC)) { |
5554 | if (ip->i_delayed_blks || ip->i_size > ip->i_d.di_size) { | 5544 | if (ip->i_delayed_blks || XFS_ISIZE(ip) > ip->i_d.di_size) { |
5555 | error = xfs_flush_pages(ip, 0, -1, 0, FI_REMAPF); | 5545 | error = xfs_flush_pages(ip, 0, -1, 0, FI_REMAPF); |
5556 | if (error) | 5546 | if (error) |
5557 | goto out_unlock_iolock; | 5547 | goto out_unlock_iolock; |
diff --git a/fs/xfs/xfs_dfrag.c b/fs/xfs/xfs_dfrag.c index 654dc6f05bac..dd974a55c77d 100644 --- a/fs/xfs/xfs_dfrag.c +++ b/fs/xfs/xfs_dfrag.c | |||
@@ -163,12 +163,14 @@ xfs_swap_extents_check_format( | |||
163 | 163 | ||
164 | /* Check temp in extent form to max in target */ | 164 | /* Check temp in extent form to max in target */ |
165 | if (tip->i_d.di_format == XFS_DINODE_FMT_EXTENTS && | 165 | if (tip->i_d.di_format == XFS_DINODE_FMT_EXTENTS && |
166 | XFS_IFORK_NEXTENTS(tip, XFS_DATA_FORK) > ip->i_df.if_ext_max) | 166 | XFS_IFORK_NEXTENTS(tip, XFS_DATA_FORK) > |
167 | XFS_IFORK_MAXEXT(ip, XFS_DATA_FORK)) | ||
167 | return EINVAL; | 168 | return EINVAL; |
168 | 169 | ||
169 | /* Check target in extent form to max in temp */ | 170 | /* Check target in extent form to max in temp */ |
170 | if (ip->i_d.di_format == XFS_DINODE_FMT_EXTENTS && | 171 | if (ip->i_d.di_format == XFS_DINODE_FMT_EXTENTS && |
171 | XFS_IFORK_NEXTENTS(ip, XFS_DATA_FORK) > tip->i_df.if_ext_max) | 172 | XFS_IFORK_NEXTENTS(ip, XFS_DATA_FORK) > |
173 | XFS_IFORK_MAXEXT(tip, XFS_DATA_FORK)) | ||
172 | return EINVAL; | 174 | return EINVAL; |
173 | 175 | ||
174 | /* | 176 | /* |
@@ -180,18 +182,25 @@ xfs_swap_extents_check_format( | |||
180 | * (a common defrag case) which will occur when the temp inode is in | 182 | * (a common defrag case) which will occur when the temp inode is in |
181 | * extent format... | 183 | * extent format... |
182 | */ | 184 | */ |
183 | if (tip->i_d.di_format == XFS_DINODE_FMT_BTREE && | 185 | if (tip->i_d.di_format == XFS_DINODE_FMT_BTREE) { |
184 | ((XFS_IFORK_BOFF(ip) && | 186 | if (XFS_IFORK_BOFF(ip) && |
185 | tip->i_df.if_broot_bytes > XFS_IFORK_BOFF(ip)) || | 187 | tip->i_df.if_broot_bytes > XFS_IFORK_BOFF(ip)) |
186 | XFS_IFORK_NEXTENTS(tip, XFS_DATA_FORK) <= ip->i_df.if_ext_max)) | 188 | return EINVAL; |
187 | return EINVAL; | 189 | if (XFS_IFORK_NEXTENTS(tip, XFS_DATA_FORK) <= |
190 | XFS_IFORK_MAXEXT(ip, XFS_DATA_FORK)) | ||
191 | return EINVAL; | ||
192 | } | ||
188 | 193 | ||
189 | /* Reciprocal target->temp btree format checks */ | 194 | /* Reciprocal target->temp btree format checks */ |
190 | if (ip->i_d.di_format == XFS_DINODE_FMT_BTREE && | 195 | if (ip->i_d.di_format == XFS_DINODE_FMT_BTREE) { |
191 | ((XFS_IFORK_BOFF(tip) && | 196 | if (XFS_IFORK_BOFF(tip) && |
192 | ip->i_df.if_broot_bytes > XFS_IFORK_BOFF(tip)) || | 197 | ip->i_df.if_broot_bytes > XFS_IFORK_BOFF(tip)) |
193 | XFS_IFORK_NEXTENTS(ip, XFS_DATA_FORK) <= tip->i_df.if_ext_max)) | 198 | return EINVAL; |
194 | return EINVAL; | 199 | |
200 | if (XFS_IFORK_NEXTENTS(ip, XFS_DATA_FORK) <= | ||
201 | XFS_IFORK_MAXEXT(tip, XFS_DATA_FORK)) | ||
202 | return EINVAL; | ||
203 | } | ||
195 | 204 | ||
196 | return 0; | 205 | return 0; |
197 | } | 206 | } |
@@ -349,16 +358,6 @@ xfs_swap_extents( | |||
349 | *tifp = *tempifp; /* struct copy */ | 358 | *tifp = *tempifp; /* struct copy */ |
350 | 359 | ||
351 | /* | 360 | /* |
352 | * Fix the in-memory data fork values that are dependent on the fork | ||
353 | * offset in the inode. We can't assume they remain the same as attr2 | ||
354 | * has dynamic fork offsets. | ||
355 | */ | ||
356 | ifp->if_ext_max = XFS_IFORK_SIZE(ip, XFS_DATA_FORK) / | ||
357 | (uint)sizeof(xfs_bmbt_rec_t); | ||
358 | tifp->if_ext_max = XFS_IFORK_SIZE(tip, XFS_DATA_FORK) / | ||
359 | (uint)sizeof(xfs_bmbt_rec_t); | ||
360 | |||
361 | /* | ||
362 | * Fix the on-disk inode values | 361 | * Fix the on-disk inode values |
363 | */ | 362 | */ |
364 | tmp = (__uint64_t)ip->i_d.di_nblocks; | 363 | tmp = (__uint64_t)ip->i_d.di_nblocks; |
diff --git a/fs/xfs/xfs_discard.c b/fs/xfs/xfs_discard.c index 8a24f0c6c860..286a051f12cf 100644 --- a/fs/xfs/xfs_discard.c +++ b/fs/xfs/xfs_discard.c | |||
@@ -68,7 +68,7 @@ xfs_trim_extents( | |||
68 | * Look up the longest btree in the AGF and start with it. | 68 | * Look up the longest btree in the AGF and start with it. |
69 | */ | 69 | */ |
70 | error = xfs_alloc_lookup_le(cur, 0, | 70 | error = xfs_alloc_lookup_le(cur, 0, |
71 | XFS_BUF_TO_AGF(agbp)->agf_longest, &i); | 71 | be32_to_cpu(XFS_BUF_TO_AGF(agbp)->agf_longest), &i); |
72 | if (error) | 72 | if (error) |
73 | goto out_del_cursor; | 73 | goto out_del_cursor; |
74 | 74 | ||
@@ -84,7 +84,7 @@ xfs_trim_extents( | |||
84 | if (error) | 84 | if (error) |
85 | goto out_del_cursor; | 85 | goto out_del_cursor; |
86 | XFS_WANT_CORRUPTED_GOTO(i == 1, out_del_cursor); | 86 | XFS_WANT_CORRUPTED_GOTO(i == 1, out_del_cursor); |
87 | ASSERT(flen <= XFS_BUF_TO_AGF(agbp)->agf_longest); | 87 | ASSERT(flen <= be32_to_cpu(XFS_BUF_TO_AGF(agbp)->agf_longest)); |
88 | 88 | ||
89 | /* | 89 | /* |
90 | * Too small? Give up. | 90 | * Too small? Give up. |
diff --git a/fs/xfs/xfs_file.c b/fs/xfs/xfs_file.c index f675f3d9d7b3..7e5bc872f2b4 100644 --- a/fs/xfs/xfs_file.c +++ b/fs/xfs/xfs_file.c | |||
@@ -327,7 +327,7 @@ xfs_file_aio_read( | |||
327 | mp->m_rtdev_targp : mp->m_ddev_targp; | 327 | mp->m_rtdev_targp : mp->m_ddev_targp; |
328 | if ((iocb->ki_pos & target->bt_smask) || | 328 | if ((iocb->ki_pos & target->bt_smask) || |
329 | (size & target->bt_smask)) { | 329 | (size & target->bt_smask)) { |
330 | if (iocb->ki_pos == ip->i_size) | 330 | if (iocb->ki_pos == i_size_read(inode)) |
331 | return 0; | 331 | return 0; |
332 | return -XFS_ERROR(EINVAL); | 332 | return -XFS_ERROR(EINVAL); |
333 | } | 333 | } |
@@ -412,51 +412,6 @@ xfs_file_splice_read( | |||
412 | return ret; | 412 | return ret; |
413 | } | 413 | } |
414 | 414 | ||
415 | STATIC void | ||
416 | xfs_aio_write_isize_update( | ||
417 | struct inode *inode, | ||
418 | loff_t *ppos, | ||
419 | ssize_t bytes_written) | ||
420 | { | ||
421 | struct xfs_inode *ip = XFS_I(inode); | ||
422 | xfs_fsize_t isize = i_size_read(inode); | ||
423 | |||
424 | if (bytes_written > 0) | ||
425 | XFS_STATS_ADD(xs_write_bytes, bytes_written); | ||
426 | |||
427 | if (unlikely(bytes_written < 0 && bytes_written != -EFAULT && | ||
428 | *ppos > isize)) | ||
429 | *ppos = isize; | ||
430 | |||
431 | if (*ppos > ip->i_size) { | ||
432 | xfs_rw_ilock(ip, XFS_ILOCK_EXCL); | ||
433 | if (*ppos > ip->i_size) | ||
434 | ip->i_size = *ppos; | ||
435 | xfs_rw_iunlock(ip, XFS_ILOCK_EXCL); | ||
436 | } | ||
437 | } | ||
438 | |||
439 | /* | ||
440 | * If this was a direct or synchronous I/O that failed (such as ENOSPC) then | ||
441 | * part of the I/O may have been written to disk before the error occurred. In | ||
442 | * this case the on-disk file size may have been adjusted beyond the in-memory | ||
443 | * file size and now needs to be truncated back. | ||
444 | */ | ||
445 | STATIC void | ||
446 | xfs_aio_write_newsize_update( | ||
447 | struct xfs_inode *ip, | ||
448 | xfs_fsize_t new_size) | ||
449 | { | ||
450 | if (new_size == ip->i_new_size) { | ||
451 | xfs_rw_ilock(ip, XFS_ILOCK_EXCL); | ||
452 | if (new_size == ip->i_new_size) | ||
453 | ip->i_new_size = 0; | ||
454 | if (ip->i_d.di_size > ip->i_size) | ||
455 | ip->i_d.di_size = ip->i_size; | ||
456 | xfs_rw_iunlock(ip, XFS_ILOCK_EXCL); | ||
457 | } | ||
458 | } | ||
459 | |||
460 | /* | 415 | /* |
461 | * xfs_file_splice_write() does not use xfs_rw_ilock() because | 416 | * xfs_file_splice_write() does not use xfs_rw_ilock() because |
462 | * generic_file_splice_write() takes the i_mutex itself. This, in theory, | 417 | * generic_file_splice_write() takes the i_mutex itself. This, in theory, |
@@ -475,7 +430,6 @@ xfs_file_splice_write( | |||
475 | { | 430 | { |
476 | struct inode *inode = outfilp->f_mapping->host; | 431 | struct inode *inode = outfilp->f_mapping->host; |
477 | struct xfs_inode *ip = XFS_I(inode); | 432 | struct xfs_inode *ip = XFS_I(inode); |
478 | xfs_fsize_t new_size; | ||
479 | int ioflags = 0; | 433 | int ioflags = 0; |
480 | ssize_t ret; | 434 | ssize_t ret; |
481 | 435 | ||
@@ -489,19 +443,12 @@ xfs_file_splice_write( | |||
489 | 443 | ||
490 | xfs_ilock(ip, XFS_IOLOCK_EXCL); | 444 | xfs_ilock(ip, XFS_IOLOCK_EXCL); |
491 | 445 | ||
492 | new_size = *ppos + count; | ||
493 | |||
494 | xfs_ilock(ip, XFS_ILOCK_EXCL); | ||
495 | if (new_size > ip->i_size) | ||
496 | ip->i_new_size = new_size; | ||
497 | xfs_iunlock(ip, XFS_ILOCK_EXCL); | ||
498 | |||
499 | trace_xfs_file_splice_write(ip, count, *ppos, ioflags); | 446 | trace_xfs_file_splice_write(ip, count, *ppos, ioflags); |
500 | 447 | ||
501 | ret = generic_file_splice_write(pipe, outfilp, ppos, count, flags); | 448 | ret = generic_file_splice_write(pipe, outfilp, ppos, count, flags); |
449 | if (ret > 0) | ||
450 | XFS_STATS_ADD(xs_write_bytes, ret); | ||
502 | 451 | ||
503 | xfs_aio_write_isize_update(inode, ppos, ret); | ||
504 | xfs_aio_write_newsize_update(ip, new_size); | ||
505 | xfs_iunlock(ip, XFS_IOLOCK_EXCL); | 452 | xfs_iunlock(ip, XFS_IOLOCK_EXCL); |
506 | return ret; | 453 | return ret; |
507 | } | 454 | } |
@@ -689,28 +636,26 @@ out_lock: | |||
689 | /* | 636 | /* |
690 | * Common pre-write limit and setup checks. | 637 | * Common pre-write limit and setup checks. |
691 | * | 638 | * |
692 | * Returns with iolock held according to @iolock. | 639 | * Called with the iolocked held either shared and exclusive according to |
640 | * @iolock, and returns with it held. Might upgrade the iolock to exclusive | ||
641 | * if called for a direct write beyond i_size. | ||
693 | */ | 642 | */ |
694 | STATIC ssize_t | 643 | STATIC ssize_t |
695 | xfs_file_aio_write_checks( | 644 | xfs_file_aio_write_checks( |
696 | struct file *file, | 645 | struct file *file, |
697 | loff_t *pos, | 646 | loff_t *pos, |
698 | size_t *count, | 647 | size_t *count, |
699 | xfs_fsize_t *new_sizep, | ||
700 | int *iolock) | 648 | int *iolock) |
701 | { | 649 | { |
702 | struct inode *inode = file->f_mapping->host; | 650 | struct inode *inode = file->f_mapping->host; |
703 | struct xfs_inode *ip = XFS_I(inode); | 651 | struct xfs_inode *ip = XFS_I(inode); |
704 | xfs_fsize_t new_size; | ||
705 | int error = 0; | 652 | int error = 0; |
706 | 653 | ||
707 | xfs_rw_ilock(ip, XFS_ILOCK_EXCL); | 654 | xfs_rw_ilock(ip, XFS_ILOCK_EXCL); |
708 | *new_sizep = 0; | ||
709 | restart: | 655 | restart: |
710 | error = generic_write_checks(file, pos, count, S_ISBLK(inode->i_mode)); | 656 | error = generic_write_checks(file, pos, count, S_ISBLK(inode->i_mode)); |
711 | if (error) { | 657 | if (error) { |
712 | xfs_rw_iunlock(ip, XFS_ILOCK_EXCL | *iolock); | 658 | xfs_rw_iunlock(ip, XFS_ILOCK_EXCL); |
713 | *iolock = 0; | ||
714 | return error; | 659 | return error; |
715 | } | 660 | } |
716 | 661 | ||
@@ -720,36 +665,21 @@ restart: | |||
720 | /* | 665 | /* |
721 | * If the offset is beyond the size of the file, we need to zero any | 666 | * If the offset is beyond the size of the file, we need to zero any |
722 | * blocks that fall between the existing EOF and the start of this | 667 | * blocks that fall between the existing EOF and the start of this |
723 | * write. There is no need to issue zeroing if another in-flght IO ends | 668 | * write. If zeroing is needed and we are currently holding the |
724 | * at or before this one If zeronig is needed and we are currently | 669 | * iolock shared, we need to update it to exclusive which involves |
725 | * holding the iolock shared, we need to update it to exclusive which | 670 | * dropping all locks and relocking to maintain correct locking order. |
726 | * involves dropping all locks and relocking to maintain correct locking | 671 | * If we do this, restart the function to ensure all checks and values |
727 | * order. If we do this, restart the function to ensure all checks and | 672 | * are still valid. |
728 | * values are still valid. | ||
729 | */ | 673 | */ |
730 | if ((ip->i_new_size && *pos > ip->i_new_size) || | 674 | if (*pos > i_size_read(inode)) { |
731 | (!ip->i_new_size && *pos > ip->i_size)) { | ||
732 | if (*iolock == XFS_IOLOCK_SHARED) { | 675 | if (*iolock == XFS_IOLOCK_SHARED) { |
733 | xfs_rw_iunlock(ip, XFS_ILOCK_EXCL | *iolock); | 676 | xfs_rw_iunlock(ip, XFS_ILOCK_EXCL | *iolock); |
734 | *iolock = XFS_IOLOCK_EXCL; | 677 | *iolock = XFS_IOLOCK_EXCL; |
735 | xfs_rw_ilock(ip, XFS_ILOCK_EXCL | *iolock); | 678 | xfs_rw_ilock(ip, XFS_ILOCK_EXCL | *iolock); |
736 | goto restart; | 679 | goto restart; |
737 | } | 680 | } |
738 | error = -xfs_zero_eof(ip, *pos, ip->i_size); | 681 | error = -xfs_zero_eof(ip, *pos, i_size_read(inode)); |
739 | } | 682 | } |
740 | |||
741 | /* | ||
742 | * If this IO extends beyond EOF, we may need to update ip->i_new_size. | ||
743 | * We have already zeroed space beyond EOF (if necessary). Only update | ||
744 | * ip->i_new_size if this IO ends beyond any other in-flight writes. | ||
745 | */ | ||
746 | new_size = *pos + *count; | ||
747 | if (new_size > ip->i_size) { | ||
748 | if (new_size > ip->i_new_size) | ||
749 | ip->i_new_size = new_size; | ||
750 | *new_sizep = new_size; | ||
751 | } | ||
752 | |||
753 | xfs_rw_iunlock(ip, XFS_ILOCK_EXCL); | 683 | xfs_rw_iunlock(ip, XFS_ILOCK_EXCL); |
754 | if (error) | 684 | if (error) |
755 | return error; | 685 | return error; |
@@ -794,9 +724,7 @@ xfs_file_dio_aio_write( | |||
794 | const struct iovec *iovp, | 724 | const struct iovec *iovp, |
795 | unsigned long nr_segs, | 725 | unsigned long nr_segs, |
796 | loff_t pos, | 726 | loff_t pos, |
797 | size_t ocount, | 727 | size_t ocount) |
798 | xfs_fsize_t *new_size, | ||
799 | int *iolock) | ||
800 | { | 728 | { |
801 | struct file *file = iocb->ki_filp; | 729 | struct file *file = iocb->ki_filp; |
802 | struct address_space *mapping = file->f_mapping; | 730 | struct address_space *mapping = file->f_mapping; |
@@ -806,10 +734,10 @@ xfs_file_dio_aio_write( | |||
806 | ssize_t ret = 0; | 734 | ssize_t ret = 0; |
807 | size_t count = ocount; | 735 | size_t count = ocount; |
808 | int unaligned_io = 0; | 736 | int unaligned_io = 0; |
737 | int iolock; | ||
809 | struct xfs_buftarg *target = XFS_IS_REALTIME_INODE(ip) ? | 738 | struct xfs_buftarg *target = XFS_IS_REALTIME_INODE(ip) ? |
810 | mp->m_rtdev_targp : mp->m_ddev_targp; | 739 | mp->m_rtdev_targp : mp->m_ddev_targp; |
811 | 740 | ||
812 | *iolock = 0; | ||
813 | if ((pos & target->bt_smask) || (count & target->bt_smask)) | 741 | if ((pos & target->bt_smask) || (count & target->bt_smask)) |
814 | return -XFS_ERROR(EINVAL); | 742 | return -XFS_ERROR(EINVAL); |
815 | 743 | ||
@@ -824,31 +752,31 @@ xfs_file_dio_aio_write( | |||
824 | * EOF zeroing cases and fill out the new inode size as appropriate. | 752 | * EOF zeroing cases and fill out the new inode size as appropriate. |
825 | */ | 753 | */ |
826 | if (unaligned_io || mapping->nrpages) | 754 | if (unaligned_io || mapping->nrpages) |
827 | *iolock = XFS_IOLOCK_EXCL; | 755 | iolock = XFS_IOLOCK_EXCL; |
828 | else | 756 | else |
829 | *iolock = XFS_IOLOCK_SHARED; | 757 | iolock = XFS_IOLOCK_SHARED; |
830 | xfs_rw_ilock(ip, *iolock); | 758 | xfs_rw_ilock(ip, iolock); |
831 | 759 | ||
832 | /* | 760 | /* |
833 | * Recheck if there are cached pages that need invalidate after we got | 761 | * Recheck if there are cached pages that need invalidate after we got |
834 | * the iolock to protect against other threads adding new pages while | 762 | * the iolock to protect against other threads adding new pages while |
835 | * we were waiting for the iolock. | 763 | * we were waiting for the iolock. |
836 | */ | 764 | */ |
837 | if (mapping->nrpages && *iolock == XFS_IOLOCK_SHARED) { | 765 | if (mapping->nrpages && iolock == XFS_IOLOCK_SHARED) { |
838 | xfs_rw_iunlock(ip, *iolock); | 766 | xfs_rw_iunlock(ip, iolock); |
839 | *iolock = XFS_IOLOCK_EXCL; | 767 | iolock = XFS_IOLOCK_EXCL; |
840 | xfs_rw_ilock(ip, *iolock); | 768 | xfs_rw_ilock(ip, iolock); |
841 | } | 769 | } |
842 | 770 | ||
843 | ret = xfs_file_aio_write_checks(file, &pos, &count, new_size, iolock); | 771 | ret = xfs_file_aio_write_checks(file, &pos, &count, &iolock); |
844 | if (ret) | 772 | if (ret) |
845 | return ret; | 773 | goto out; |
846 | 774 | ||
847 | if (mapping->nrpages) { | 775 | if (mapping->nrpages) { |
848 | ret = -xfs_flushinval_pages(ip, (pos & PAGE_CACHE_MASK), -1, | 776 | ret = -xfs_flushinval_pages(ip, (pos & PAGE_CACHE_MASK), -1, |
849 | FI_REMAPF_LOCKED); | 777 | FI_REMAPF_LOCKED); |
850 | if (ret) | 778 | if (ret) |
851 | return ret; | 779 | goto out; |
852 | } | 780 | } |
853 | 781 | ||
854 | /* | 782 | /* |
@@ -857,15 +785,18 @@ xfs_file_dio_aio_write( | |||
857 | */ | 785 | */ |
858 | if (unaligned_io) | 786 | if (unaligned_io) |
859 | inode_dio_wait(inode); | 787 | inode_dio_wait(inode); |
860 | else if (*iolock == XFS_IOLOCK_EXCL) { | 788 | else if (iolock == XFS_IOLOCK_EXCL) { |
861 | xfs_rw_ilock_demote(ip, XFS_IOLOCK_EXCL); | 789 | xfs_rw_ilock_demote(ip, XFS_IOLOCK_EXCL); |
862 | *iolock = XFS_IOLOCK_SHARED; | 790 | iolock = XFS_IOLOCK_SHARED; |
863 | } | 791 | } |
864 | 792 | ||
865 | trace_xfs_file_direct_write(ip, count, iocb->ki_pos, 0); | 793 | trace_xfs_file_direct_write(ip, count, iocb->ki_pos, 0); |
866 | ret = generic_file_direct_write(iocb, iovp, | 794 | ret = generic_file_direct_write(iocb, iovp, |
867 | &nr_segs, pos, &iocb->ki_pos, count, ocount); | 795 | &nr_segs, pos, &iocb->ki_pos, count, ocount); |
868 | 796 | ||
797 | out: | ||
798 | xfs_rw_iunlock(ip, iolock); | ||
799 | |||
869 | /* No fallback to buffered IO on errors for XFS. */ | 800 | /* No fallback to buffered IO on errors for XFS. */ |
870 | ASSERT(ret < 0 || ret == count); | 801 | ASSERT(ret < 0 || ret == count); |
871 | return ret; | 802 | return ret; |
@@ -877,9 +808,7 @@ xfs_file_buffered_aio_write( | |||
877 | const struct iovec *iovp, | 808 | const struct iovec *iovp, |
878 | unsigned long nr_segs, | 809 | unsigned long nr_segs, |
879 | loff_t pos, | 810 | loff_t pos, |
880 | size_t ocount, | 811 | size_t ocount) |
881 | xfs_fsize_t *new_size, | ||
882 | int *iolock) | ||
883 | { | 812 | { |
884 | struct file *file = iocb->ki_filp; | 813 | struct file *file = iocb->ki_filp; |
885 | struct address_space *mapping = file->f_mapping; | 814 | struct address_space *mapping = file->f_mapping; |
@@ -887,14 +816,14 @@ xfs_file_buffered_aio_write( | |||
887 | struct xfs_inode *ip = XFS_I(inode); | 816 | struct xfs_inode *ip = XFS_I(inode); |
888 | ssize_t ret; | 817 | ssize_t ret; |
889 | int enospc = 0; | 818 | int enospc = 0; |
819 | int iolock = XFS_IOLOCK_EXCL; | ||
890 | size_t count = ocount; | 820 | size_t count = ocount; |
891 | 821 | ||
892 | *iolock = XFS_IOLOCK_EXCL; | 822 | xfs_rw_ilock(ip, iolock); |
893 | xfs_rw_ilock(ip, *iolock); | ||
894 | 823 | ||
895 | ret = xfs_file_aio_write_checks(file, &pos, &count, new_size, iolock); | 824 | ret = xfs_file_aio_write_checks(file, &pos, &count, &iolock); |
896 | if (ret) | 825 | if (ret) |
897 | return ret; | 826 | goto out; |
898 | 827 | ||
899 | /* We can write back this queue in page reclaim */ | 828 | /* We can write back this queue in page reclaim */ |
900 | current->backing_dev_info = mapping->backing_dev_info; | 829 | current->backing_dev_info = mapping->backing_dev_info; |
@@ -908,13 +837,15 @@ write_retry: | |||
908 | * page locks and retry *once* | 837 | * page locks and retry *once* |
909 | */ | 838 | */ |
910 | if (ret == -ENOSPC && !enospc) { | 839 | if (ret == -ENOSPC && !enospc) { |
911 | ret = -xfs_flush_pages(ip, 0, -1, 0, FI_NONE); | ||
912 | if (ret) | ||
913 | return ret; | ||
914 | enospc = 1; | 840 | enospc = 1; |
915 | goto write_retry; | 841 | ret = -xfs_flush_pages(ip, 0, -1, 0, FI_NONE); |
842 | if (!ret) | ||
843 | goto write_retry; | ||
916 | } | 844 | } |
845 | |||
917 | current->backing_dev_info = NULL; | 846 | current->backing_dev_info = NULL; |
847 | out: | ||
848 | xfs_rw_iunlock(ip, iolock); | ||
918 | return ret; | 849 | return ret; |
919 | } | 850 | } |
920 | 851 | ||
@@ -930,9 +861,7 @@ xfs_file_aio_write( | |||
930 | struct inode *inode = mapping->host; | 861 | struct inode *inode = mapping->host; |
931 | struct xfs_inode *ip = XFS_I(inode); | 862 | struct xfs_inode *ip = XFS_I(inode); |
932 | ssize_t ret; | 863 | ssize_t ret; |
933 | int iolock; | ||
934 | size_t ocount = 0; | 864 | size_t ocount = 0; |
935 | xfs_fsize_t new_size = 0; | ||
936 | 865 | ||
937 | XFS_STATS_INC(xs_write_calls); | 866 | XFS_STATS_INC(xs_write_calls); |
938 | 867 | ||
@@ -951,33 +880,22 @@ xfs_file_aio_write( | |||
951 | return -EIO; | 880 | return -EIO; |
952 | 881 | ||
953 | if (unlikely(file->f_flags & O_DIRECT)) | 882 | if (unlikely(file->f_flags & O_DIRECT)) |
954 | ret = xfs_file_dio_aio_write(iocb, iovp, nr_segs, pos, | 883 | ret = xfs_file_dio_aio_write(iocb, iovp, nr_segs, pos, ocount); |
955 | ocount, &new_size, &iolock); | ||
956 | else | 884 | else |
957 | ret = xfs_file_buffered_aio_write(iocb, iovp, nr_segs, pos, | 885 | ret = xfs_file_buffered_aio_write(iocb, iovp, nr_segs, pos, |
958 | ocount, &new_size, &iolock); | 886 | ocount); |
959 | |||
960 | xfs_aio_write_isize_update(inode, &iocb->ki_pos, ret); | ||
961 | 887 | ||
962 | if (ret <= 0) | 888 | if (ret > 0) { |
963 | goto out_unlock; | 889 | ssize_t err; |
964 | 890 | ||
965 | /* Handle various SYNC-type writes */ | 891 | XFS_STATS_ADD(xs_write_bytes, ret); |
966 | if ((file->f_flags & O_DSYNC) || IS_SYNC(inode)) { | ||
967 | loff_t end = pos + ret - 1; | ||
968 | int error; | ||
969 | 892 | ||
970 | xfs_rw_iunlock(ip, iolock); | 893 | /* Handle various SYNC-type writes */ |
971 | error = xfs_file_fsync(file, pos, end, | 894 | err = generic_write_sync(file, pos, ret); |
972 | (file->f_flags & __O_SYNC) ? 0 : 1); | 895 | if (err < 0) |
973 | xfs_rw_ilock(ip, iolock); | 896 | ret = err; |
974 | if (error) | ||
975 | ret = error; | ||
976 | } | 897 | } |
977 | 898 | ||
978 | out_unlock: | ||
979 | xfs_aio_write_newsize_update(ip, new_size); | ||
980 | xfs_rw_iunlock(ip, iolock); | ||
981 | return ret; | 899 | return ret; |
982 | } | 900 | } |
983 | 901 | ||
diff --git a/fs/xfs/xfs_fs_subr.c b/fs/xfs/xfs_fs_subr.c index ed88ed16811c..652b875a9d4c 100644 --- a/fs/xfs/xfs_fs_subr.c +++ b/fs/xfs/xfs_fs_subr.c | |||
@@ -90,7 +90,7 @@ xfs_wait_on_pages( | |||
90 | 90 | ||
91 | if (mapping_tagged(mapping, PAGECACHE_TAG_WRITEBACK)) { | 91 | if (mapping_tagged(mapping, PAGECACHE_TAG_WRITEBACK)) { |
92 | return -filemap_fdatawait_range(mapping, first, | 92 | return -filemap_fdatawait_range(mapping, first, |
93 | last == -1 ? ip->i_size - 1 : last); | 93 | last == -1 ? XFS_ISIZE(ip) - 1 : last); |
94 | } | 94 | } |
95 | return 0; | 95 | return 0; |
96 | } | 96 | } |
diff --git a/fs/xfs/xfs_iget.c b/fs/xfs/xfs_iget.c index 3960a066d7ff..8c3e46394d48 100644 --- a/fs/xfs/xfs_iget.c +++ b/fs/xfs/xfs_iget.c | |||
@@ -77,7 +77,7 @@ xfs_inode_alloc( | |||
77 | 77 | ||
78 | ASSERT(atomic_read(&ip->i_pincount) == 0); | 78 | ASSERT(atomic_read(&ip->i_pincount) == 0); |
79 | ASSERT(!spin_is_locked(&ip->i_flags_lock)); | 79 | ASSERT(!spin_is_locked(&ip->i_flags_lock)); |
80 | ASSERT(completion_done(&ip->i_flush)); | 80 | ASSERT(!xfs_isiflocked(ip)); |
81 | ASSERT(ip->i_ino == 0); | 81 | ASSERT(ip->i_ino == 0); |
82 | 82 | ||
83 | mrlock_init(&ip->i_iolock, MRLOCK_BARRIER, "xfsio", ip->i_ino); | 83 | mrlock_init(&ip->i_iolock, MRLOCK_BARRIER, "xfsio", ip->i_ino); |
@@ -94,8 +94,6 @@ xfs_inode_alloc( | |||
94 | ip->i_update_core = 0; | 94 | ip->i_update_core = 0; |
95 | ip->i_delayed_blks = 0; | 95 | ip->i_delayed_blks = 0; |
96 | memset(&ip->i_d, 0, sizeof(xfs_icdinode_t)); | 96 | memset(&ip->i_d, 0, sizeof(xfs_icdinode_t)); |
97 | ip->i_size = 0; | ||
98 | ip->i_new_size = 0; | ||
99 | 97 | ||
100 | return ip; | 98 | return ip; |
101 | } | 99 | } |
@@ -150,7 +148,7 @@ xfs_inode_free( | |||
150 | /* asserts to verify all state is correct here */ | 148 | /* asserts to verify all state is correct here */ |
151 | ASSERT(atomic_read(&ip->i_pincount) == 0); | 149 | ASSERT(atomic_read(&ip->i_pincount) == 0); |
152 | ASSERT(!spin_is_locked(&ip->i_flags_lock)); | 150 | ASSERT(!spin_is_locked(&ip->i_flags_lock)); |
153 | ASSERT(completion_done(&ip->i_flush)); | 151 | ASSERT(!xfs_isiflocked(ip)); |
154 | 152 | ||
155 | /* | 153 | /* |
156 | * Because we use RCU freeing we need to ensure the inode always | 154 | * Because we use RCU freeing we need to ensure the inode always |
@@ -450,8 +448,6 @@ again: | |||
450 | 448 | ||
451 | *ipp = ip; | 449 | *ipp = ip; |
452 | 450 | ||
453 | ASSERT(ip->i_df.if_ext_max == | ||
454 | XFS_IFORK_DSIZE(ip) / sizeof(xfs_bmbt_rec_t)); | ||
455 | /* | 451 | /* |
456 | * If we have a real type for an on-disk inode, we can set ops(&unlock) | 452 | * If we have a real type for an on-disk inode, we can set ops(&unlock) |
457 | * now. If it's a new inode being created, xfs_ialloc will handle it. | 453 | * now. If it's a new inode being created, xfs_ialloc will handle it. |
@@ -715,3 +711,19 @@ xfs_isilocked( | |||
715 | return 0; | 711 | return 0; |
716 | } | 712 | } |
717 | #endif | 713 | #endif |
714 | |||
715 | void | ||
716 | __xfs_iflock( | ||
717 | struct xfs_inode *ip) | ||
718 | { | ||
719 | wait_queue_head_t *wq = bit_waitqueue(&ip->i_flags, __XFS_IFLOCK_BIT); | ||
720 | DEFINE_WAIT_BIT(wait, &ip->i_flags, __XFS_IFLOCK_BIT); | ||
721 | |||
722 | do { | ||
723 | prepare_to_wait_exclusive(wq, &wait.wait, TASK_UNINTERRUPTIBLE); | ||
724 | if (xfs_isiflocked(ip)) | ||
725 | io_schedule(); | ||
726 | } while (!xfs_iflock_nowait(ip)); | ||
727 | |||
728 | finish_wait(wq, &wait.wait); | ||
729 | } | ||
diff --git a/fs/xfs/xfs_inode.c b/fs/xfs/xfs_inode.c index 9dda7cc32848..b21022499c2e 100644 --- a/fs/xfs/xfs_inode.c +++ b/fs/xfs/xfs_inode.c | |||
@@ -299,11 +299,8 @@ xfs_iformat( | |||
299 | { | 299 | { |
300 | xfs_attr_shortform_t *atp; | 300 | xfs_attr_shortform_t *atp; |
301 | int size; | 301 | int size; |
302 | int error; | 302 | int error = 0; |
303 | xfs_fsize_t di_size; | 303 | xfs_fsize_t di_size; |
304 | ip->i_df.if_ext_max = | ||
305 | XFS_IFORK_DSIZE(ip) / (uint)sizeof(xfs_bmbt_rec_t); | ||
306 | error = 0; | ||
307 | 304 | ||
308 | if (unlikely(be32_to_cpu(dip->di_nextents) + | 305 | if (unlikely(be32_to_cpu(dip->di_nextents) + |
309 | be16_to_cpu(dip->di_anextents) > | 306 | be16_to_cpu(dip->di_anextents) > |
@@ -350,7 +347,6 @@ xfs_iformat( | |||
350 | return XFS_ERROR(EFSCORRUPTED); | 347 | return XFS_ERROR(EFSCORRUPTED); |
351 | } | 348 | } |
352 | ip->i_d.di_size = 0; | 349 | ip->i_d.di_size = 0; |
353 | ip->i_size = 0; | ||
354 | ip->i_df.if_u2.if_rdev = xfs_dinode_get_rdev(dip); | 350 | ip->i_df.if_u2.if_rdev = xfs_dinode_get_rdev(dip); |
355 | break; | 351 | break; |
356 | 352 | ||
@@ -409,10 +405,10 @@ xfs_iformat( | |||
409 | } | 405 | } |
410 | if (!XFS_DFORK_Q(dip)) | 406 | if (!XFS_DFORK_Q(dip)) |
411 | return 0; | 407 | return 0; |
408 | |||
412 | ASSERT(ip->i_afp == NULL); | 409 | ASSERT(ip->i_afp == NULL); |
413 | ip->i_afp = kmem_zone_zalloc(xfs_ifork_zone, KM_SLEEP | KM_NOFS); | 410 | ip->i_afp = kmem_zone_zalloc(xfs_ifork_zone, KM_SLEEP | KM_NOFS); |
414 | ip->i_afp->if_ext_max = | 411 | |
415 | XFS_IFORK_ASIZE(ip) / (uint)sizeof(xfs_bmbt_rec_t); | ||
416 | switch (dip->di_aformat) { | 412 | switch (dip->di_aformat) { |
417 | case XFS_DINODE_FMT_LOCAL: | 413 | case XFS_DINODE_FMT_LOCAL: |
418 | atp = (xfs_attr_shortform_t *)XFS_DFORK_APTR(dip); | 414 | atp = (xfs_attr_shortform_t *)XFS_DFORK_APTR(dip); |
@@ -604,10 +600,11 @@ xfs_iformat_btree( | |||
604 | * or the number of extents is greater than the number of | 600 | * or the number of extents is greater than the number of |
605 | * blocks. | 601 | * blocks. |
606 | */ | 602 | */ |
607 | if (unlikely(XFS_IFORK_NEXTENTS(ip, whichfork) <= ifp->if_ext_max | 603 | if (unlikely(XFS_IFORK_NEXTENTS(ip, whichfork) <= |
608 | || XFS_BMDR_SPACE_CALC(nrecs) > | 604 | XFS_IFORK_MAXEXT(ip, whichfork) || |
609 | XFS_DFORK_SIZE(dip, ip->i_mount, whichfork) | 605 | XFS_BMDR_SPACE_CALC(nrecs) > |
610 | || XFS_IFORK_NEXTENTS(ip, whichfork) > ip->i_d.di_nblocks)) { | 606 | XFS_DFORK_SIZE(dip, ip->i_mount, whichfork) || |
607 | XFS_IFORK_NEXTENTS(ip, whichfork) > ip->i_d.di_nblocks)) { | ||
611 | xfs_warn(ip->i_mount, "corrupt inode %Lu (btree).", | 608 | xfs_warn(ip->i_mount, "corrupt inode %Lu (btree).", |
612 | (unsigned long long) ip->i_ino); | 609 | (unsigned long long) ip->i_ino); |
613 | XFS_CORRUPTION_ERROR("xfs_iformat_btree", XFS_ERRLEVEL_LOW, | 610 | XFS_CORRUPTION_ERROR("xfs_iformat_btree", XFS_ERRLEVEL_LOW, |
@@ -835,12 +832,6 @@ xfs_iread( | |||
835 | * with the uninitialized part of it. | 832 | * with the uninitialized part of it. |
836 | */ | 833 | */ |
837 | ip->i_d.di_mode = 0; | 834 | ip->i_d.di_mode = 0; |
838 | /* | ||
839 | * Initialize the per-fork minima and maxima for a new | ||
840 | * inode here. xfs_iformat will do it for old inodes. | ||
841 | */ | ||
842 | ip->i_df.if_ext_max = | ||
843 | XFS_IFORK_DSIZE(ip) / (uint)sizeof(xfs_bmbt_rec_t); | ||
844 | } | 835 | } |
845 | 836 | ||
846 | /* | 837 | /* |
@@ -861,7 +852,6 @@ xfs_iread( | |||
861 | } | 852 | } |
862 | 853 | ||
863 | ip->i_delayed_blks = 0; | 854 | ip->i_delayed_blks = 0; |
864 | ip->i_size = ip->i_d.di_size; | ||
865 | 855 | ||
866 | /* | 856 | /* |
867 | * Mark the buffer containing the inode as something to keep | 857 | * Mark the buffer containing the inode as something to keep |
@@ -1051,7 +1041,6 @@ xfs_ialloc( | |||
1051 | } | 1041 | } |
1052 | 1042 | ||
1053 | ip->i_d.di_size = 0; | 1043 | ip->i_d.di_size = 0; |
1054 | ip->i_size = 0; | ||
1055 | ip->i_d.di_nextents = 0; | 1044 | ip->i_d.di_nextents = 0; |
1056 | ASSERT(ip->i_d.di_nblocks == 0); | 1045 | ASSERT(ip->i_d.di_nblocks == 0); |
1057 | 1046 | ||
@@ -1166,52 +1155,6 @@ xfs_ialloc( | |||
1166 | } | 1155 | } |
1167 | 1156 | ||
1168 | /* | 1157 | /* |
1169 | * Check to make sure that there are no blocks allocated to the | ||
1170 | * file beyond the size of the file. We don't check this for | ||
1171 | * files with fixed size extents or real time extents, but we | ||
1172 | * at least do it for regular files. | ||
1173 | */ | ||
1174 | #ifdef DEBUG | ||
1175 | STATIC void | ||
1176 | xfs_isize_check( | ||
1177 | struct xfs_inode *ip, | ||
1178 | xfs_fsize_t isize) | ||
1179 | { | ||
1180 | struct xfs_mount *mp = ip->i_mount; | ||
1181 | xfs_fileoff_t map_first; | ||
1182 | int nimaps; | ||
1183 | xfs_bmbt_irec_t imaps[2]; | ||
1184 | int error; | ||
1185 | |||
1186 | if (!S_ISREG(ip->i_d.di_mode)) | ||
1187 | return; | ||
1188 | |||
1189 | if (XFS_IS_REALTIME_INODE(ip)) | ||
1190 | return; | ||
1191 | |||
1192 | if (ip->i_d.di_flags & XFS_DIFLAG_EXTSIZE) | ||
1193 | return; | ||
1194 | |||
1195 | nimaps = 2; | ||
1196 | map_first = XFS_B_TO_FSB(mp, (xfs_ufsize_t)isize); | ||
1197 | /* | ||
1198 | * The filesystem could be shutting down, so bmapi may return | ||
1199 | * an error. | ||
1200 | */ | ||
1201 | error = xfs_bmapi_read(ip, map_first, | ||
1202 | (XFS_B_TO_FSB(mp, | ||
1203 | (xfs_ufsize_t)XFS_MAXIOFFSET(mp)) - map_first), | ||
1204 | imaps, &nimaps, XFS_BMAPI_ENTIRE); | ||
1205 | if (error) | ||
1206 | return; | ||
1207 | ASSERT(nimaps == 1); | ||
1208 | ASSERT(imaps[0].br_startblock == HOLESTARTBLOCK); | ||
1209 | } | ||
1210 | #else /* DEBUG */ | ||
1211 | #define xfs_isize_check(ip, isize) | ||
1212 | #endif /* DEBUG */ | ||
1213 | |||
1214 | /* | ||
1215 | * Free up the underlying blocks past new_size. The new size must be smaller | 1158 | * Free up the underlying blocks past new_size. The new size must be smaller |
1216 | * than the current size. This routine can be used both for the attribute and | 1159 | * than the current size. This routine can be used both for the attribute and |
1217 | * data fork, and does not modify the inode size, which is left to the caller. | 1160 | * data fork, and does not modify the inode size, which is left to the caller. |
@@ -1252,12 +1195,14 @@ xfs_itruncate_extents( | |||
1252 | int done = 0; | 1195 | int done = 0; |
1253 | 1196 | ||
1254 | ASSERT(xfs_isilocked(ip, XFS_ILOCK_EXCL|XFS_IOLOCK_EXCL)); | 1197 | ASSERT(xfs_isilocked(ip, XFS_ILOCK_EXCL|XFS_IOLOCK_EXCL)); |
1255 | ASSERT(new_size <= ip->i_size); | 1198 | ASSERT(new_size <= XFS_ISIZE(ip)); |
1256 | ASSERT(tp->t_flags & XFS_TRANS_PERM_LOG_RES); | 1199 | ASSERT(tp->t_flags & XFS_TRANS_PERM_LOG_RES); |
1257 | ASSERT(ip->i_itemp != NULL); | 1200 | ASSERT(ip->i_itemp != NULL); |
1258 | ASSERT(ip->i_itemp->ili_lock_flags == 0); | 1201 | ASSERT(ip->i_itemp->ili_lock_flags == 0); |
1259 | ASSERT(!XFS_NOT_DQATTACHED(mp, ip)); | 1202 | ASSERT(!XFS_NOT_DQATTACHED(mp, ip)); |
1260 | 1203 | ||
1204 | trace_xfs_itruncate_extents_start(ip, new_size); | ||
1205 | |||
1261 | /* | 1206 | /* |
1262 | * Since it is possible for space to become allocated beyond | 1207 | * Since it is possible for space to become allocated beyond |
1263 | * the end of the file (in a crash where the space is allocated | 1208 | * the end of the file (in a crash where the space is allocated |
@@ -1325,6 +1270,14 @@ xfs_itruncate_extents( | |||
1325 | goto out; | 1270 | goto out; |
1326 | } | 1271 | } |
1327 | 1272 | ||
1273 | /* | ||
1274 | * Always re-log the inode so that our permanent transaction can keep | ||
1275 | * on rolling it forward in the log. | ||
1276 | */ | ||
1277 | xfs_trans_log_inode(tp, ip, XFS_ILOG_CORE); | ||
1278 | |||
1279 | trace_xfs_itruncate_extents_end(ip, new_size); | ||
1280 | |||
1328 | out: | 1281 | out: |
1329 | *tpp = tp; | 1282 | *tpp = tp; |
1330 | return error; | 1283 | return error; |
@@ -1338,74 +1291,6 @@ out_bmap_cancel: | |||
1338 | goto out; | 1291 | goto out; |
1339 | } | 1292 | } |
1340 | 1293 | ||
1341 | int | ||
1342 | xfs_itruncate_data( | ||
1343 | struct xfs_trans **tpp, | ||
1344 | struct xfs_inode *ip, | ||
1345 | xfs_fsize_t new_size) | ||
1346 | { | ||
1347 | int error; | ||
1348 | |||
1349 | trace_xfs_itruncate_data_start(ip, new_size); | ||
1350 | |||
1351 | /* | ||
1352 | * The first thing we do is set the size to new_size permanently on | ||
1353 | * disk. This way we don't have to worry about anyone ever being able | ||
1354 | * to look at the data being freed even in the face of a crash. | ||
1355 | * What we're getting around here is the case where we free a block, it | ||
1356 | * is allocated to another file, it is written to, and then we crash. | ||
1357 | * If the new data gets written to the file but the log buffers | ||
1358 | * containing the free and reallocation don't, then we'd end up with | ||
1359 | * garbage in the blocks being freed. As long as we make the new_size | ||
1360 | * permanent before actually freeing any blocks it doesn't matter if | ||
1361 | * they get written to. | ||
1362 | */ | ||
1363 | if (ip->i_d.di_nextents > 0) { | ||
1364 | /* | ||
1365 | * If we are not changing the file size then do not update | ||
1366 | * the on-disk file size - we may be called from | ||
1367 | * xfs_inactive_free_eofblocks(). If we update the on-disk | ||
1368 | * file size and then the system crashes before the contents | ||
1369 | * of the file are flushed to disk then the files may be | ||
1370 | * full of holes (ie NULL files bug). | ||
1371 | */ | ||
1372 | if (ip->i_size != new_size) { | ||
1373 | ip->i_d.di_size = new_size; | ||
1374 | ip->i_size = new_size; | ||
1375 | xfs_trans_log_inode(*tpp, ip, XFS_ILOG_CORE); | ||
1376 | } | ||
1377 | } | ||
1378 | |||
1379 | error = xfs_itruncate_extents(tpp, ip, XFS_DATA_FORK, new_size); | ||
1380 | if (error) | ||
1381 | return error; | ||
1382 | |||
1383 | /* | ||
1384 | * If we are not changing the file size then do not update the on-disk | ||
1385 | * file size - we may be called from xfs_inactive_free_eofblocks(). | ||
1386 | * If we update the on-disk file size and then the system crashes | ||
1387 | * before the contents of the file are flushed to disk then the files | ||
1388 | * may be full of holes (ie NULL files bug). | ||
1389 | */ | ||
1390 | xfs_isize_check(ip, new_size); | ||
1391 | if (ip->i_size != new_size) { | ||
1392 | ip->i_d.di_size = new_size; | ||
1393 | ip->i_size = new_size; | ||
1394 | } | ||
1395 | |||
1396 | ASSERT(new_size != 0 || ip->i_delayed_blks == 0); | ||
1397 | ASSERT(new_size != 0 || ip->i_d.di_nextents == 0); | ||
1398 | |||
1399 | /* | ||
1400 | * Always re-log the inode so that our permanent transaction can keep | ||
1401 | * on rolling it forward in the log. | ||
1402 | */ | ||
1403 | xfs_trans_log_inode(*tpp, ip, XFS_ILOG_CORE); | ||
1404 | |||
1405 | trace_xfs_itruncate_data_end(ip, new_size); | ||
1406 | return 0; | ||
1407 | } | ||
1408 | |||
1409 | /* | 1294 | /* |
1410 | * This is called when the inode's link count goes to 0. | 1295 | * This is called when the inode's link count goes to 0. |
1411 | * We place the on-disk inode on a list in the AGI. It | 1296 | * We place the on-disk inode on a list in the AGI. It |
@@ -1824,8 +1709,7 @@ xfs_ifree( | |||
1824 | ASSERT(ip->i_d.di_nlink == 0); | 1709 | ASSERT(ip->i_d.di_nlink == 0); |
1825 | ASSERT(ip->i_d.di_nextents == 0); | 1710 | ASSERT(ip->i_d.di_nextents == 0); |
1826 | ASSERT(ip->i_d.di_anextents == 0); | 1711 | ASSERT(ip->i_d.di_anextents == 0); |
1827 | ASSERT((ip->i_d.di_size == 0 && ip->i_size == 0) || | 1712 | ASSERT(ip->i_d.di_size == 0 || !S_ISREG(ip->i_d.di_mode)); |
1828 | (!S_ISREG(ip->i_d.di_mode))); | ||
1829 | ASSERT(ip->i_d.di_nblocks == 0); | 1713 | ASSERT(ip->i_d.di_nblocks == 0); |
1830 | 1714 | ||
1831 | /* | 1715 | /* |
@@ -1844,8 +1728,6 @@ xfs_ifree( | |||
1844 | ip->i_d.di_flags = 0; | 1728 | ip->i_d.di_flags = 0; |
1845 | ip->i_d.di_dmevmask = 0; | 1729 | ip->i_d.di_dmevmask = 0; |
1846 | ip->i_d.di_forkoff = 0; /* mark the attr fork not in use */ | 1730 | ip->i_d.di_forkoff = 0; /* mark the attr fork not in use */ |
1847 | ip->i_df.if_ext_max = | ||
1848 | XFS_IFORK_DSIZE(ip) / (uint)sizeof(xfs_bmbt_rec_t); | ||
1849 | ip->i_d.di_format = XFS_DINODE_FMT_EXTENTS; | 1731 | ip->i_d.di_format = XFS_DINODE_FMT_EXTENTS; |
1850 | ip->i_d.di_aformat = XFS_DINODE_FMT_EXTENTS; | 1732 | ip->i_d.di_aformat = XFS_DINODE_FMT_EXTENTS; |
1851 | /* | 1733 | /* |
@@ -2151,7 +2033,7 @@ xfs_idestroy_fork( | |||
2151 | * once someone is waiting for it to be unpinned. | 2033 | * once someone is waiting for it to be unpinned. |
2152 | */ | 2034 | */ |
2153 | static void | 2035 | static void |
2154 | xfs_iunpin_nowait( | 2036 | xfs_iunpin( |
2155 | struct xfs_inode *ip) | 2037 | struct xfs_inode *ip) |
2156 | { | 2038 | { |
2157 | ASSERT(xfs_isilocked(ip, XFS_ILOCK_EXCL|XFS_ILOCK_SHARED)); | 2039 | ASSERT(xfs_isilocked(ip, XFS_ILOCK_EXCL|XFS_ILOCK_SHARED)); |
@@ -2163,14 +2045,29 @@ xfs_iunpin_nowait( | |||
2163 | 2045 | ||
2164 | } | 2046 | } |
2165 | 2047 | ||
2048 | static void | ||
2049 | __xfs_iunpin_wait( | ||
2050 | struct xfs_inode *ip) | ||
2051 | { | ||
2052 | wait_queue_head_t *wq = bit_waitqueue(&ip->i_flags, __XFS_IPINNED_BIT); | ||
2053 | DEFINE_WAIT_BIT(wait, &ip->i_flags, __XFS_IPINNED_BIT); | ||
2054 | |||
2055 | xfs_iunpin(ip); | ||
2056 | |||
2057 | do { | ||
2058 | prepare_to_wait(wq, &wait.wait, TASK_UNINTERRUPTIBLE); | ||
2059 | if (xfs_ipincount(ip)) | ||
2060 | io_schedule(); | ||
2061 | } while (xfs_ipincount(ip)); | ||
2062 | finish_wait(wq, &wait.wait); | ||
2063 | } | ||
2064 | |||
2166 | void | 2065 | void |
2167 | xfs_iunpin_wait( | 2066 | xfs_iunpin_wait( |
2168 | struct xfs_inode *ip) | 2067 | struct xfs_inode *ip) |
2169 | { | 2068 | { |
2170 | if (xfs_ipincount(ip)) { | 2069 | if (xfs_ipincount(ip)) |
2171 | xfs_iunpin_nowait(ip); | 2070 | __xfs_iunpin_wait(ip); |
2172 | wait_event(ip->i_ipin_wait, (xfs_ipincount(ip) == 0)); | ||
2173 | } | ||
2174 | } | 2071 | } |
2175 | 2072 | ||
2176 | /* | 2073 | /* |
@@ -2510,9 +2407,9 @@ xfs_iflush( | |||
2510 | XFS_STATS_INC(xs_iflush_count); | 2407 | XFS_STATS_INC(xs_iflush_count); |
2511 | 2408 | ||
2512 | ASSERT(xfs_isilocked(ip, XFS_ILOCK_EXCL|XFS_ILOCK_SHARED)); | 2409 | ASSERT(xfs_isilocked(ip, XFS_ILOCK_EXCL|XFS_ILOCK_SHARED)); |
2513 | ASSERT(!completion_done(&ip->i_flush)); | 2410 | ASSERT(xfs_isiflocked(ip)); |
2514 | ASSERT(ip->i_d.di_format != XFS_DINODE_FMT_BTREE || | 2411 | ASSERT(ip->i_d.di_format != XFS_DINODE_FMT_BTREE || |
2515 | ip->i_d.di_nextents > ip->i_df.if_ext_max); | 2412 | ip->i_d.di_nextents > XFS_IFORK_MAXEXT(ip, XFS_DATA_FORK)); |
2516 | 2413 | ||
2517 | iip = ip->i_itemp; | 2414 | iip = ip->i_itemp; |
2518 | mp = ip->i_mount; | 2415 | mp = ip->i_mount; |
@@ -2529,7 +2426,7 @@ xfs_iflush( | |||
2529 | * out for us if they occur after the log force completes. | 2426 | * out for us if they occur after the log force completes. |
2530 | */ | 2427 | */ |
2531 | if (!(flags & SYNC_WAIT) && xfs_ipincount(ip)) { | 2428 | if (!(flags & SYNC_WAIT) && xfs_ipincount(ip)) { |
2532 | xfs_iunpin_nowait(ip); | 2429 | xfs_iunpin(ip); |
2533 | xfs_ifunlock(ip); | 2430 | xfs_ifunlock(ip); |
2534 | return EAGAIN; | 2431 | return EAGAIN; |
2535 | } | 2432 | } |
@@ -2626,9 +2523,9 @@ xfs_iflush_int( | |||
2626 | #endif | 2523 | #endif |
2627 | 2524 | ||
2628 | ASSERT(xfs_isilocked(ip, XFS_ILOCK_EXCL|XFS_ILOCK_SHARED)); | 2525 | ASSERT(xfs_isilocked(ip, XFS_ILOCK_EXCL|XFS_ILOCK_SHARED)); |
2629 | ASSERT(!completion_done(&ip->i_flush)); | 2526 | ASSERT(xfs_isiflocked(ip)); |
2630 | ASSERT(ip->i_d.di_format != XFS_DINODE_FMT_BTREE || | 2527 | ASSERT(ip->i_d.di_format != XFS_DINODE_FMT_BTREE || |
2631 | ip->i_d.di_nextents > ip->i_df.if_ext_max); | 2528 | ip->i_d.di_nextents > XFS_IFORK_MAXEXT(ip, XFS_DATA_FORK)); |
2632 | 2529 | ||
2633 | iip = ip->i_itemp; | 2530 | iip = ip->i_itemp; |
2634 | mp = ip->i_mount; | 2531 | mp = ip->i_mount; |
diff --git a/fs/xfs/xfs_inode.h b/fs/xfs/xfs_inode.h index f0e6b151ba37..2f27b7454085 100644 --- a/fs/xfs/xfs_inode.h +++ b/fs/xfs/xfs_inode.h | |||
@@ -66,7 +66,6 @@ typedef struct xfs_ifork { | |||
66 | struct xfs_btree_block *if_broot; /* file's incore btree root */ | 66 | struct xfs_btree_block *if_broot; /* file's incore btree root */ |
67 | short if_broot_bytes; /* bytes allocated for root */ | 67 | short if_broot_bytes; /* bytes allocated for root */ |
68 | unsigned char if_flags; /* per-fork flags */ | 68 | unsigned char if_flags; /* per-fork flags */ |
69 | unsigned char if_ext_max; /* max # of extent records */ | ||
70 | union { | 69 | union { |
71 | xfs_bmbt_rec_host_t *if_extents;/* linear map file exts */ | 70 | xfs_bmbt_rec_host_t *if_extents;/* linear map file exts */ |
72 | xfs_ext_irec_t *if_ext_irec; /* irec map file exts */ | 71 | xfs_ext_irec_t *if_ext_irec; /* irec map file exts */ |
@@ -206,12 +205,12 @@ typedef struct xfs_icdinode { | |||
206 | ((w) == XFS_DATA_FORK ? \ | 205 | ((w) == XFS_DATA_FORK ? \ |
207 | ((ip)->i_d.di_nextents = (n)) : \ | 206 | ((ip)->i_d.di_nextents = (n)) : \ |
208 | ((ip)->i_d.di_anextents = (n))) | 207 | ((ip)->i_d.di_anextents = (n))) |
209 | 208 | #define XFS_IFORK_MAXEXT(ip, w) \ | |
209 | (XFS_IFORK_SIZE(ip, w) / sizeof(xfs_bmbt_rec_t)) | ||
210 | 210 | ||
211 | 211 | ||
212 | #ifdef __KERNEL__ | 212 | #ifdef __KERNEL__ |
213 | 213 | ||
214 | struct bhv_desc; | ||
215 | struct xfs_buf; | 214 | struct xfs_buf; |
216 | struct xfs_bmap_free; | 215 | struct xfs_bmap_free; |
217 | struct xfs_bmbt_irec; | 216 | struct xfs_bmbt_irec; |
@@ -220,12 +219,6 @@ struct xfs_mount; | |||
220 | struct xfs_trans; | 219 | struct xfs_trans; |
221 | struct xfs_dquot; | 220 | struct xfs_dquot; |
222 | 221 | ||
223 | typedef struct dm_attrs_s { | ||
224 | __uint32_t da_dmevmask; /* DMIG event mask */ | ||
225 | __uint16_t da_dmstate; /* DMIG state info */ | ||
226 | __uint16_t da_pad; /* DMIG extra padding */ | ||
227 | } dm_attrs_t; | ||
228 | |||
229 | typedef struct xfs_inode { | 222 | typedef struct xfs_inode { |
230 | /* Inode linking and identification information. */ | 223 | /* Inode linking and identification information. */ |
231 | struct xfs_mount *i_mount; /* fs mount struct ptr */ | 224 | struct xfs_mount *i_mount; /* fs mount struct ptr */ |
@@ -244,27 +237,19 @@ typedef struct xfs_inode { | |||
244 | struct xfs_inode_log_item *i_itemp; /* logging information */ | 237 | struct xfs_inode_log_item *i_itemp; /* logging information */ |
245 | mrlock_t i_lock; /* inode lock */ | 238 | mrlock_t i_lock; /* inode lock */ |
246 | mrlock_t i_iolock; /* inode IO lock */ | 239 | mrlock_t i_iolock; /* inode IO lock */ |
247 | struct completion i_flush; /* inode flush completion q */ | ||
248 | atomic_t i_pincount; /* inode pin count */ | 240 | atomic_t i_pincount; /* inode pin count */ |
249 | wait_queue_head_t i_ipin_wait; /* inode pinning wait queue */ | ||
250 | spinlock_t i_flags_lock; /* inode i_flags lock */ | 241 | spinlock_t i_flags_lock; /* inode i_flags lock */ |
251 | /* Miscellaneous state. */ | 242 | /* Miscellaneous state. */ |
252 | unsigned short i_flags; /* see defined flags below */ | 243 | unsigned long i_flags; /* see defined flags below */ |
253 | unsigned char i_update_core; /* timestamps/size is dirty */ | 244 | unsigned char i_update_core; /* timestamps/size is dirty */ |
254 | unsigned int i_delayed_blks; /* count of delay alloc blks */ | 245 | unsigned int i_delayed_blks; /* count of delay alloc blks */ |
255 | 246 | ||
256 | xfs_icdinode_t i_d; /* most of ondisk inode */ | 247 | xfs_icdinode_t i_d; /* most of ondisk inode */ |
257 | 248 | ||
258 | xfs_fsize_t i_size; /* in-memory size */ | ||
259 | xfs_fsize_t i_new_size; /* size when write completes */ | ||
260 | |||
261 | /* VFS inode */ | 249 | /* VFS inode */ |
262 | struct inode i_vnode; /* embedded VFS inode */ | 250 | struct inode i_vnode; /* embedded VFS inode */ |
263 | } xfs_inode_t; | 251 | } xfs_inode_t; |
264 | 252 | ||
265 | #define XFS_ISIZE(ip) S_ISREG((ip)->i_d.di_mode) ? \ | ||
266 | (ip)->i_size : (ip)->i_d.di_size; | ||
267 | |||
268 | /* Convert from vfs inode to xfs inode */ | 253 | /* Convert from vfs inode to xfs inode */ |
269 | static inline struct xfs_inode *XFS_I(struct inode *inode) | 254 | static inline struct xfs_inode *XFS_I(struct inode *inode) |
270 | { | 255 | { |
@@ -278,6 +263,18 @@ static inline struct inode *VFS_I(struct xfs_inode *ip) | |||
278 | } | 263 | } |
279 | 264 | ||
280 | /* | 265 | /* |
266 | * For regular files we only update the on-disk filesize when actually | ||
267 | * writing data back to disk. Until then only the copy in the VFS inode | ||
268 | * is uptodate. | ||
269 | */ | ||
270 | static inline xfs_fsize_t XFS_ISIZE(struct xfs_inode *ip) | ||
271 | { | ||
272 | if (S_ISREG(ip->i_d.di_mode)) | ||
273 | return i_size_read(VFS_I(ip)); | ||
274 | return ip->i_d.di_size; | ||
275 | } | ||
276 | |||
277 | /* | ||
281 | * i_flags helper functions | 278 | * i_flags helper functions |
282 | */ | 279 | */ |
283 | static inline void | 280 | static inline void |
@@ -331,6 +328,19 @@ xfs_iflags_test_and_clear(xfs_inode_t *ip, unsigned short flags) | |||
331 | return ret; | 328 | return ret; |
332 | } | 329 | } |
333 | 330 | ||
331 | static inline int | ||
332 | xfs_iflags_test_and_set(xfs_inode_t *ip, unsigned short flags) | ||
333 | { | ||
334 | int ret; | ||
335 | |||
336 | spin_lock(&ip->i_flags_lock); | ||
337 | ret = ip->i_flags & flags; | ||
338 | if (!ret) | ||
339 | ip->i_flags |= flags; | ||
340 | spin_unlock(&ip->i_flags_lock); | ||
341 | return ret; | ||
342 | } | ||
343 | |||
334 | /* | 344 | /* |
335 | * Project quota id helpers (previously projid was 16bit only | 345 | * Project quota id helpers (previously projid was 16bit only |
336 | * and using two 16bit values to hold new 32bit projid was chosen | 346 | * and using two 16bit values to hold new 32bit projid was chosen |
@@ -351,35 +361,19 @@ xfs_set_projid(struct xfs_inode *ip, | |||
351 | } | 361 | } |
352 | 362 | ||
353 | /* | 363 | /* |
354 | * Manage the i_flush queue embedded in the inode. This completion | ||
355 | * queue synchronizes processes attempting to flush the in-core | ||
356 | * inode back to disk. | ||
357 | */ | ||
358 | static inline void xfs_iflock(xfs_inode_t *ip) | ||
359 | { | ||
360 | wait_for_completion(&ip->i_flush); | ||
361 | } | ||
362 | |||
363 | static inline int xfs_iflock_nowait(xfs_inode_t *ip) | ||
364 | { | ||
365 | return try_wait_for_completion(&ip->i_flush); | ||
366 | } | ||
367 | |||
368 | static inline void xfs_ifunlock(xfs_inode_t *ip) | ||
369 | { | ||
370 | complete(&ip->i_flush); | ||
371 | } | ||
372 | |||
373 | /* | ||
374 | * In-core inode flags. | 364 | * In-core inode flags. |
375 | */ | 365 | */ |
376 | #define XFS_IRECLAIM 0x0001 /* started reclaiming this inode */ | 366 | #define XFS_IRECLAIM (1 << 0) /* started reclaiming this inode */ |
377 | #define XFS_ISTALE 0x0002 /* inode has been staled */ | 367 | #define XFS_ISTALE (1 << 1) /* inode has been staled */ |
378 | #define XFS_IRECLAIMABLE 0x0004 /* inode can be reclaimed */ | 368 | #define XFS_IRECLAIMABLE (1 << 2) /* inode can be reclaimed */ |
379 | #define XFS_INEW 0x0008 /* inode has just been allocated */ | 369 | #define XFS_INEW (1 << 3) /* inode has just been allocated */ |
380 | #define XFS_IFILESTREAM 0x0010 /* inode is in a filestream directory */ | 370 | #define XFS_IFILESTREAM (1 << 4) /* inode is in a filestream dir. */ |
381 | #define XFS_ITRUNCATED 0x0020 /* truncated down so flush-on-close */ | 371 | #define XFS_ITRUNCATED (1 << 5) /* truncated down so flush-on-close */ |
382 | #define XFS_IDIRTY_RELEASE 0x0040 /* dirty release already seen */ | 372 | #define XFS_IDIRTY_RELEASE (1 << 6) /* dirty release already seen */ |
373 | #define __XFS_IFLOCK_BIT 7 /* inode is being flushed right now */ | ||
374 | #define XFS_IFLOCK (1 << __XFS_IFLOCK_BIT) | ||
375 | #define __XFS_IPINNED_BIT 8 /* wakeup key for zero pin count */ | ||
376 | #define XFS_IPINNED (1 << __XFS_IPINNED_BIT) | ||
383 | 377 | ||
384 | /* | 378 | /* |
385 | * Per-lifetime flags need to be reset when re-using a reclaimable inode during | 379 | * Per-lifetime flags need to be reset when re-using a reclaimable inode during |
@@ -392,6 +386,34 @@ static inline void xfs_ifunlock(xfs_inode_t *ip) | |||
392 | XFS_IFILESTREAM); | 386 | XFS_IFILESTREAM); |
393 | 387 | ||
394 | /* | 388 | /* |
389 | * Synchronize processes attempting to flush the in-core inode back to disk. | ||
390 | */ | ||
391 | |||
392 | extern void __xfs_iflock(struct xfs_inode *ip); | ||
393 | |||
394 | static inline int xfs_iflock_nowait(struct xfs_inode *ip) | ||
395 | { | ||
396 | return !xfs_iflags_test_and_set(ip, XFS_IFLOCK); | ||
397 | } | ||
398 | |||
399 | static inline void xfs_iflock(struct xfs_inode *ip) | ||
400 | { | ||
401 | if (!xfs_iflock_nowait(ip)) | ||
402 | __xfs_iflock(ip); | ||
403 | } | ||
404 | |||
405 | static inline void xfs_ifunlock(struct xfs_inode *ip) | ||
406 | { | ||
407 | xfs_iflags_clear(ip, XFS_IFLOCK); | ||
408 | wake_up_bit(&ip->i_flags, __XFS_IFLOCK_BIT); | ||
409 | } | ||
410 | |||
411 | static inline int xfs_isiflocked(struct xfs_inode *ip) | ||
412 | { | ||
413 | return xfs_iflags_test(ip, XFS_IFLOCK); | ||
414 | } | ||
415 | |||
416 | /* | ||
395 | * Flags for inode locking. | 417 | * Flags for inode locking. |
396 | * Bit ranges: 1<<1 - 1<<16-1 -- iolock/ilock modes (bitfield) | 418 | * Bit ranges: 1<<1 - 1<<16-1 -- iolock/ilock modes (bitfield) |
397 | * 1<<16 - 1<<32-1 -- lockdep annotation (integers) | 419 | * 1<<16 - 1<<32-1 -- lockdep annotation (integers) |
@@ -491,8 +513,6 @@ int xfs_ifree(struct xfs_trans *, xfs_inode_t *, | |||
491 | struct xfs_bmap_free *); | 513 | struct xfs_bmap_free *); |
492 | int xfs_itruncate_extents(struct xfs_trans **, struct xfs_inode *, | 514 | int xfs_itruncate_extents(struct xfs_trans **, struct xfs_inode *, |
493 | int, xfs_fsize_t); | 515 | int, xfs_fsize_t); |
494 | int xfs_itruncate_data(struct xfs_trans **, struct xfs_inode *, | ||
495 | xfs_fsize_t); | ||
496 | int xfs_iunlink(struct xfs_trans *, xfs_inode_t *); | 516 | int xfs_iunlink(struct xfs_trans *, xfs_inode_t *); |
497 | 517 | ||
498 | void xfs_iext_realloc(xfs_inode_t *, int, int); | 518 | void xfs_iext_realloc(xfs_inode_t *, int, int); |
diff --git a/fs/xfs/xfs_inode_item.c b/fs/xfs/xfs_inode_item.c index cfd6c7f8cc3c..91d71dcd4852 100644 --- a/fs/xfs/xfs_inode_item.c +++ b/fs/xfs/xfs_inode_item.c | |||
@@ -79,8 +79,6 @@ xfs_inode_item_size( | |||
79 | break; | 79 | break; |
80 | 80 | ||
81 | case XFS_DINODE_FMT_BTREE: | 81 | case XFS_DINODE_FMT_BTREE: |
82 | ASSERT(ip->i_df.if_ext_max == | ||
83 | XFS_IFORK_DSIZE(ip) / (uint)sizeof(xfs_bmbt_rec_t)); | ||
84 | iip->ili_format.ilf_fields &= | 82 | iip->ili_format.ilf_fields &= |
85 | ~(XFS_ILOG_DDATA | XFS_ILOG_DEXT | | 83 | ~(XFS_ILOG_DDATA | XFS_ILOG_DEXT | |
86 | XFS_ILOG_DEV | XFS_ILOG_UUID); | 84 | XFS_ILOG_DEV | XFS_ILOG_UUID); |
@@ -557,7 +555,7 @@ xfs_inode_item_unpin( | |||
557 | trace_xfs_inode_unpin(ip, _RET_IP_); | 555 | trace_xfs_inode_unpin(ip, _RET_IP_); |
558 | ASSERT(atomic_read(&ip->i_pincount) > 0); | 556 | ASSERT(atomic_read(&ip->i_pincount) > 0); |
559 | if (atomic_dec_and_test(&ip->i_pincount)) | 557 | if (atomic_dec_and_test(&ip->i_pincount)) |
560 | wake_up(&ip->i_ipin_wait); | 558 | wake_up_bit(&ip->i_flags, __XFS_IPINNED_BIT); |
561 | } | 559 | } |
562 | 560 | ||
563 | /* | 561 | /* |
@@ -719,7 +717,7 @@ xfs_inode_item_pushbuf( | |||
719 | * If a flush is not in progress anymore, chances are that the | 717 | * If a flush is not in progress anymore, chances are that the |
720 | * inode was taken off the AIL. So, just get out. | 718 | * inode was taken off the AIL. So, just get out. |
721 | */ | 719 | */ |
722 | if (completion_done(&ip->i_flush) || | 720 | if (!xfs_isiflocked(ip) || |
723 | !(lip->li_flags & XFS_LI_IN_AIL)) { | 721 | !(lip->li_flags & XFS_LI_IN_AIL)) { |
724 | xfs_iunlock(ip, XFS_ILOCK_SHARED); | 722 | xfs_iunlock(ip, XFS_ILOCK_SHARED); |
725 | return true; | 723 | return true; |
@@ -752,7 +750,7 @@ xfs_inode_item_push( | |||
752 | struct xfs_inode *ip = iip->ili_inode; | 750 | struct xfs_inode *ip = iip->ili_inode; |
753 | 751 | ||
754 | ASSERT(xfs_isilocked(ip, XFS_ILOCK_SHARED)); | 752 | ASSERT(xfs_isilocked(ip, XFS_ILOCK_SHARED)); |
755 | ASSERT(!completion_done(&ip->i_flush)); | 753 | ASSERT(xfs_isiflocked(ip)); |
756 | 754 | ||
757 | /* | 755 | /* |
758 | * Since we were able to lock the inode's flush lock and | 756 | * Since we were able to lock the inode's flush lock and |
diff --git a/fs/xfs/xfs_iomap.c b/fs/xfs/xfs_iomap.c index 9afa282aa937..246c7d57c6f9 100644 --- a/fs/xfs/xfs_iomap.c +++ b/fs/xfs/xfs_iomap.c | |||
@@ -57,26 +57,26 @@ xfs_iomap_eof_align_last_fsb( | |||
57 | xfs_fileoff_t *last_fsb) | 57 | xfs_fileoff_t *last_fsb) |
58 | { | 58 | { |
59 | xfs_fileoff_t new_last_fsb = 0; | 59 | xfs_fileoff_t new_last_fsb = 0; |
60 | xfs_extlen_t align; | 60 | xfs_extlen_t align = 0; |
61 | int eof, error; | 61 | int eof, error; |
62 | 62 | ||
63 | if (XFS_IS_REALTIME_INODE(ip)) | 63 | if (!XFS_IS_REALTIME_INODE(ip)) { |
64 | ; | 64 | /* |
65 | /* | 65 | * Round up the allocation request to a stripe unit |
66 | * If mounted with the "-o swalloc" option, roundup the allocation | 66 | * (m_dalign) boundary if the file size is >= stripe unit |
67 | * request to a stripe width boundary if the file size is >= | 67 | * size, and we are allocating past the allocation eof. |
68 | * stripe width and we are allocating past the allocation eof. | 68 | * |
69 | */ | 69 | * If mounted with the "-o swalloc" option the alignment is |
70 | else if (mp->m_swidth && (mp->m_flags & XFS_MOUNT_SWALLOC) && | 70 | * increased from the strip unit size to the stripe width. |
71 | (ip->i_size >= XFS_FSB_TO_B(mp, mp->m_swidth))) | 71 | */ |
72 | new_last_fsb = roundup_64(*last_fsb, mp->m_swidth); | 72 | if (mp->m_swidth && (mp->m_flags & XFS_MOUNT_SWALLOC)) |
73 | /* | 73 | align = mp->m_swidth; |
74 | * Roundup the allocation request to a stripe unit (m_dalign) boundary | 74 | else if (mp->m_dalign) |
75 | * if the file size is >= stripe unit size, and we are allocating past | 75 | align = mp->m_dalign; |
76 | * the allocation eof. | 76 | |
77 | */ | 77 | if (align && XFS_ISIZE(ip) >= XFS_FSB_TO_B(mp, align)) |
78 | else if (mp->m_dalign && (ip->i_size >= XFS_FSB_TO_B(mp, mp->m_dalign))) | 78 | new_last_fsb = roundup_64(*last_fsb, align); |
79 | new_last_fsb = roundup_64(*last_fsb, mp->m_dalign); | 79 | } |
80 | 80 | ||
81 | /* | 81 | /* |
82 | * Always round up the allocation request to an extent boundary | 82 | * Always round up the allocation request to an extent boundary |
@@ -154,7 +154,7 @@ xfs_iomap_write_direct( | |||
154 | 154 | ||
155 | offset_fsb = XFS_B_TO_FSBT(mp, offset); | 155 | offset_fsb = XFS_B_TO_FSBT(mp, offset); |
156 | last_fsb = XFS_B_TO_FSB(mp, ((xfs_ufsize_t)(offset + count))); | 156 | last_fsb = XFS_B_TO_FSB(mp, ((xfs_ufsize_t)(offset + count))); |
157 | if ((offset + count) > ip->i_size) { | 157 | if ((offset + count) > XFS_ISIZE(ip)) { |
158 | error = xfs_iomap_eof_align_last_fsb(mp, ip, extsz, &last_fsb); | 158 | error = xfs_iomap_eof_align_last_fsb(mp, ip, extsz, &last_fsb); |
159 | if (error) | 159 | if (error) |
160 | goto error_out; | 160 | goto error_out; |
@@ -211,7 +211,7 @@ xfs_iomap_write_direct( | |||
211 | xfs_trans_ijoin(tp, ip, 0); | 211 | xfs_trans_ijoin(tp, ip, 0); |
212 | 212 | ||
213 | bmapi_flag = 0; | 213 | bmapi_flag = 0; |
214 | if (offset < ip->i_size || extsz) | 214 | if (offset < XFS_ISIZE(ip) || extsz) |
215 | bmapi_flag |= XFS_BMAPI_PREALLOC; | 215 | bmapi_flag |= XFS_BMAPI_PREALLOC; |
216 | 216 | ||
217 | /* | 217 | /* |
@@ -286,7 +286,7 @@ xfs_iomap_eof_want_preallocate( | |||
286 | int found_delalloc = 0; | 286 | int found_delalloc = 0; |
287 | 287 | ||
288 | *prealloc = 0; | 288 | *prealloc = 0; |
289 | if ((offset + count) <= ip->i_size) | 289 | if (offset + count <= XFS_ISIZE(ip)) |
290 | return 0; | 290 | return 0; |
291 | 291 | ||
292 | /* | 292 | /* |
@@ -340,7 +340,7 @@ xfs_iomap_prealloc_size( | |||
340 | * if we pass in alloc_blocks = 0. Hence the "+ 1" to | 340 | * if we pass in alloc_blocks = 0. Hence the "+ 1" to |
341 | * ensure we always pass in a non-zero value. | 341 | * ensure we always pass in a non-zero value. |
342 | */ | 342 | */ |
343 | alloc_blocks = XFS_B_TO_FSB(mp, ip->i_size) + 1; | 343 | alloc_blocks = XFS_B_TO_FSB(mp, XFS_ISIZE(ip)) + 1; |
344 | alloc_blocks = XFS_FILEOFF_MIN(MAXEXTLEN, | 344 | alloc_blocks = XFS_FILEOFF_MIN(MAXEXTLEN, |
345 | rounddown_pow_of_two(alloc_blocks)); | 345 | rounddown_pow_of_two(alloc_blocks)); |
346 | 346 | ||
@@ -564,7 +564,7 @@ xfs_iomap_write_allocate( | |||
564 | * back.... | 564 | * back.... |
565 | */ | 565 | */ |
566 | nimaps = 1; | 566 | nimaps = 1; |
567 | end_fsb = XFS_B_TO_FSB(mp, ip->i_size); | 567 | end_fsb = XFS_B_TO_FSB(mp, XFS_ISIZE(ip)); |
568 | error = xfs_bmap_last_offset(NULL, ip, &last_block, | 568 | error = xfs_bmap_last_offset(NULL, ip, &last_block, |
569 | XFS_DATA_FORK); | 569 | XFS_DATA_FORK); |
570 | if (error) | 570 | if (error) |
diff --git a/fs/xfs/xfs_iops.c b/fs/xfs/xfs_iops.c index f9babd179223..ab302539e5b9 100644 --- a/fs/xfs/xfs_iops.c +++ b/fs/xfs/xfs_iops.c | |||
@@ -750,6 +750,7 @@ xfs_setattr_size( | |||
750 | struct xfs_mount *mp = ip->i_mount; | 750 | struct xfs_mount *mp = ip->i_mount; |
751 | struct inode *inode = VFS_I(ip); | 751 | struct inode *inode = VFS_I(ip); |
752 | int mask = iattr->ia_valid; | 752 | int mask = iattr->ia_valid; |
753 | xfs_off_t oldsize, newsize; | ||
753 | struct xfs_trans *tp; | 754 | struct xfs_trans *tp; |
754 | int error; | 755 | int error; |
755 | uint lock_flags; | 756 | uint lock_flags; |
@@ -777,11 +778,13 @@ xfs_setattr_size( | |||
777 | lock_flags |= XFS_IOLOCK_EXCL; | 778 | lock_flags |= XFS_IOLOCK_EXCL; |
778 | xfs_ilock(ip, lock_flags); | 779 | xfs_ilock(ip, lock_flags); |
779 | 780 | ||
781 | oldsize = inode->i_size; | ||
782 | newsize = iattr->ia_size; | ||
783 | |||
780 | /* | 784 | /* |
781 | * Short circuit the truncate case for zero length files. | 785 | * Short circuit the truncate case for zero length files. |
782 | */ | 786 | */ |
783 | if (iattr->ia_size == 0 && | 787 | if (newsize == 0 && oldsize == 0 && ip->i_d.di_nextents == 0) { |
784 | ip->i_size == 0 && ip->i_d.di_nextents == 0) { | ||
785 | if (!(mask & (ATTR_CTIME|ATTR_MTIME))) | 788 | if (!(mask & (ATTR_CTIME|ATTR_MTIME))) |
786 | goto out_unlock; | 789 | goto out_unlock; |
787 | 790 | ||
@@ -807,14 +810,14 @@ xfs_setattr_size( | |||
807 | * the inode to the transaction, because the inode cannot be unlocked | 810 | * the inode to the transaction, because the inode cannot be unlocked |
808 | * once it is a part of the transaction. | 811 | * once it is a part of the transaction. |
809 | */ | 812 | */ |
810 | if (iattr->ia_size > ip->i_size) { | 813 | if (newsize > oldsize) { |
811 | /* | 814 | /* |
812 | * Do the first part of growing a file: zero any data in the | 815 | * Do the first part of growing a file: zero any data in the |
813 | * last block that is beyond the old EOF. We need to do this | 816 | * last block that is beyond the old EOF. We need to do this |
814 | * before the inode is joined to the transaction to modify | 817 | * before the inode is joined to the transaction to modify |
815 | * i_size. | 818 | * i_size. |
816 | */ | 819 | */ |
817 | error = xfs_zero_eof(ip, iattr->ia_size, ip->i_size); | 820 | error = xfs_zero_eof(ip, newsize, oldsize); |
818 | if (error) | 821 | if (error) |
819 | goto out_unlock; | 822 | goto out_unlock; |
820 | } | 823 | } |
@@ -833,8 +836,8 @@ xfs_setattr_size( | |||
833 | * here and prevents waiting for other data not within the range we | 836 | * here and prevents waiting for other data not within the range we |
834 | * care about here. | 837 | * care about here. |
835 | */ | 838 | */ |
836 | if (ip->i_size != ip->i_d.di_size && iattr->ia_size > ip->i_d.di_size) { | 839 | if (oldsize != ip->i_d.di_size && newsize > ip->i_d.di_size) { |
837 | error = xfs_flush_pages(ip, ip->i_d.di_size, iattr->ia_size, 0, | 840 | error = xfs_flush_pages(ip, ip->i_d.di_size, newsize, 0, |
838 | FI_NONE); | 841 | FI_NONE); |
839 | if (error) | 842 | if (error) |
840 | goto out_unlock; | 843 | goto out_unlock; |
@@ -845,8 +848,7 @@ xfs_setattr_size( | |||
845 | */ | 848 | */ |
846 | inode_dio_wait(inode); | 849 | inode_dio_wait(inode); |
847 | 850 | ||
848 | error = -block_truncate_page(inode->i_mapping, iattr->ia_size, | 851 | error = -block_truncate_page(inode->i_mapping, newsize, xfs_get_blocks); |
849 | xfs_get_blocks); | ||
850 | if (error) | 852 | if (error) |
851 | goto out_unlock; | 853 | goto out_unlock; |
852 | 854 | ||
@@ -857,7 +859,7 @@ xfs_setattr_size( | |||
857 | if (error) | 859 | if (error) |
858 | goto out_trans_cancel; | 860 | goto out_trans_cancel; |
859 | 861 | ||
860 | truncate_setsize(inode, iattr->ia_size); | 862 | truncate_setsize(inode, newsize); |
861 | 863 | ||
862 | commit_flags = XFS_TRANS_RELEASE_LOG_RES; | 864 | commit_flags = XFS_TRANS_RELEASE_LOG_RES; |
863 | lock_flags |= XFS_ILOCK_EXCL; | 865 | lock_flags |= XFS_ILOCK_EXCL; |
@@ -876,19 +878,29 @@ xfs_setattr_size( | |||
876 | * these flags set. For all other operations the VFS set these flags | 878 | * these flags set. For all other operations the VFS set these flags |
877 | * explicitly if it wants a timestamp update. | 879 | * explicitly if it wants a timestamp update. |
878 | */ | 880 | */ |
879 | if (iattr->ia_size != ip->i_size && | 881 | if (newsize != oldsize && (!(mask & (ATTR_CTIME | ATTR_MTIME)))) { |
880 | (!(mask & (ATTR_CTIME | ATTR_MTIME)))) { | ||
881 | iattr->ia_ctime = iattr->ia_mtime = | 882 | iattr->ia_ctime = iattr->ia_mtime = |
882 | current_fs_time(inode->i_sb); | 883 | current_fs_time(inode->i_sb); |
883 | mask |= ATTR_CTIME | ATTR_MTIME; | 884 | mask |= ATTR_CTIME | ATTR_MTIME; |
884 | } | 885 | } |
885 | 886 | ||
886 | if (iattr->ia_size > ip->i_size) { | 887 | /* |
887 | ip->i_d.di_size = iattr->ia_size; | 888 | * The first thing we do is set the size to new_size permanently on |
888 | ip->i_size = iattr->ia_size; | 889 | * disk. This way we don't have to worry about anyone ever being able |
889 | } else if (iattr->ia_size <= ip->i_size || | 890 | * to look at the data being freed even in the face of a crash. |
890 | (iattr->ia_size == 0 && ip->i_d.di_nextents)) { | 891 | * What we're getting around here is the case where we free a block, it |
891 | error = xfs_itruncate_data(&tp, ip, iattr->ia_size); | 892 | * is allocated to another file, it is written to, and then we crash. |
893 | * If the new data gets written to the file but the log buffers | ||
894 | * containing the free and reallocation don't, then we'd end up with | ||
895 | * garbage in the blocks being freed. As long as we make the new size | ||
896 | * permanent before actually freeing any blocks it doesn't matter if | ||
897 | * they get written to. | ||
898 | */ | ||
899 | ip->i_d.di_size = newsize; | ||
900 | xfs_trans_log_inode(tp, ip, XFS_ILOG_CORE); | ||
901 | |||
902 | if (newsize <= oldsize) { | ||
903 | error = xfs_itruncate_extents(&tp, ip, XFS_DATA_FORK, newsize); | ||
892 | if (error) | 904 | if (error) |
893 | goto out_trans_abort; | 905 | goto out_trans_abort; |
894 | 906 | ||
diff --git a/fs/xfs/xfs_qm_syscalls.c b/fs/xfs/xfs_qm_syscalls.c index 5cc3dde1bc90..eafbcff81f3a 100644 --- a/fs/xfs/xfs_qm_syscalls.c +++ b/fs/xfs/xfs_qm_syscalls.c | |||
@@ -31,6 +31,7 @@ | |||
31 | #include "xfs_mount.h" | 31 | #include "xfs_mount.h" |
32 | #include "xfs_bmap_btree.h" | 32 | #include "xfs_bmap_btree.h" |
33 | #include "xfs_inode.h" | 33 | #include "xfs_inode.h" |
34 | #include "xfs_inode_item.h" | ||
34 | #include "xfs_itable.h" | 35 | #include "xfs_itable.h" |
35 | #include "xfs_bmap.h" | 36 | #include "xfs_bmap.h" |
36 | #include "xfs_rtalloc.h" | 37 | #include "xfs_rtalloc.h" |
@@ -263,13 +264,18 @@ xfs_qm_scall_trunc_qfile( | |||
263 | xfs_ilock(ip, XFS_ILOCK_EXCL); | 264 | xfs_ilock(ip, XFS_ILOCK_EXCL); |
264 | xfs_trans_ijoin(tp, ip, 0); | 265 | xfs_trans_ijoin(tp, ip, 0); |
265 | 266 | ||
266 | error = xfs_itruncate_data(&tp, ip, 0); | 267 | ip->i_d.di_size = 0; |
268 | xfs_trans_log_inode(tp, ip, XFS_ILOG_CORE); | ||
269 | |||
270 | error = xfs_itruncate_extents(&tp, ip, XFS_DATA_FORK, 0); | ||
267 | if (error) { | 271 | if (error) { |
268 | xfs_trans_cancel(tp, XFS_TRANS_RELEASE_LOG_RES | | 272 | xfs_trans_cancel(tp, XFS_TRANS_RELEASE_LOG_RES | |
269 | XFS_TRANS_ABORT); | 273 | XFS_TRANS_ABORT); |
270 | goto out_unlock; | 274 | goto out_unlock; |
271 | } | 275 | } |
272 | 276 | ||
277 | ASSERT(ip->i_d.di_nextents == 0); | ||
278 | |||
273 | xfs_trans_ichgtime(tp, ip, XFS_ICHGTIME_MOD | XFS_ICHGTIME_CHG); | 279 | xfs_trans_ichgtime(tp, ip, XFS_ICHGTIME_MOD | XFS_ICHGTIME_CHG); |
274 | error = xfs_trans_commit(tp, XFS_TRANS_RELEASE_LOG_RES); | 280 | error = xfs_trans_commit(tp, XFS_TRANS_RELEASE_LOG_RES); |
275 | 281 | ||
diff --git a/fs/xfs/xfs_super.c b/fs/xfs/xfs_super.c index 281961c1d81a..ee5b695c99a7 100644 --- a/fs/xfs/xfs_super.c +++ b/fs/xfs/xfs_super.c | |||
@@ -828,14 +828,6 @@ xfs_fs_inode_init_once( | |||
828 | /* xfs inode */ | 828 | /* xfs inode */ |
829 | atomic_set(&ip->i_pincount, 0); | 829 | atomic_set(&ip->i_pincount, 0); |
830 | spin_lock_init(&ip->i_flags_lock); | 830 | spin_lock_init(&ip->i_flags_lock); |
831 | init_waitqueue_head(&ip->i_ipin_wait); | ||
832 | /* | ||
833 | * Because we want to use a counting completion, complete | ||
834 | * the flush completion once to allow a single access to | ||
835 | * the flush completion without blocking. | ||
836 | */ | ||
837 | init_completion(&ip->i_flush); | ||
838 | complete(&ip->i_flush); | ||
839 | 831 | ||
840 | mrlock_init(&ip->i_lock, MRLOCK_ALLOW_EQUAL_PRI|MRLOCK_BARRIER, | 832 | mrlock_init(&ip->i_lock, MRLOCK_ALLOW_EQUAL_PRI|MRLOCK_BARRIER, |
841 | "xfsino", ip->i_ino); | 833 | "xfsino", ip->i_ino); |
diff --git a/fs/xfs/xfs_sync.c b/fs/xfs/xfs_sync.c index 72c01a1c16e7..40b75eecd2b4 100644 --- a/fs/xfs/xfs_sync.c +++ b/fs/xfs/xfs_sync.c | |||
@@ -707,14 +707,13 @@ xfs_reclaim_inode_grab( | |||
707 | return 1; | 707 | return 1; |
708 | 708 | ||
709 | /* | 709 | /* |
710 | * do some unlocked checks first to avoid unnecessary lock traffic. | 710 | * If we are asked for non-blocking operation, do unlocked checks to |
711 | * The first is a flush lock check, the second is a already in reclaim | 711 | * see if the inode already is being flushed or in reclaim to avoid |
712 | * check. Only do these checks if we are not going to block on locks. | 712 | * lock traffic. |
713 | */ | 713 | */ |
714 | if ((flags & SYNC_TRYLOCK) && | 714 | if ((flags & SYNC_TRYLOCK) && |
715 | (!ip->i_flush.done || __xfs_iflags_test(ip, XFS_IRECLAIM))) { | 715 | __xfs_iflags_test(ip, XFS_IFLOCK | XFS_IRECLAIM)) |
716 | return 1; | 716 | return 1; |
717 | } | ||
718 | 717 | ||
719 | /* | 718 | /* |
720 | * The radix tree lock here protects a thread in xfs_iget from racing | 719 | * The radix tree lock here protects a thread in xfs_iget from racing |
diff --git a/fs/xfs/xfs_trace.h b/fs/xfs/xfs_trace.h index a9d5b1e06efe..6b6df5802e95 100644 --- a/fs/xfs/xfs_trace.h +++ b/fs/xfs/xfs_trace.h | |||
@@ -891,7 +891,6 @@ DECLARE_EVENT_CLASS(xfs_file_class, | |||
891 | __field(dev_t, dev) | 891 | __field(dev_t, dev) |
892 | __field(xfs_ino_t, ino) | 892 | __field(xfs_ino_t, ino) |
893 | __field(xfs_fsize_t, size) | 893 | __field(xfs_fsize_t, size) |
894 | __field(xfs_fsize_t, new_size) | ||
895 | __field(loff_t, offset) | 894 | __field(loff_t, offset) |
896 | __field(size_t, count) | 895 | __field(size_t, count) |
897 | __field(int, flags) | 896 | __field(int, flags) |
@@ -900,17 +899,15 @@ DECLARE_EVENT_CLASS(xfs_file_class, | |||
900 | __entry->dev = VFS_I(ip)->i_sb->s_dev; | 899 | __entry->dev = VFS_I(ip)->i_sb->s_dev; |
901 | __entry->ino = ip->i_ino; | 900 | __entry->ino = ip->i_ino; |
902 | __entry->size = ip->i_d.di_size; | 901 | __entry->size = ip->i_d.di_size; |
903 | __entry->new_size = ip->i_new_size; | ||
904 | __entry->offset = offset; | 902 | __entry->offset = offset; |
905 | __entry->count = count; | 903 | __entry->count = count; |
906 | __entry->flags = flags; | 904 | __entry->flags = flags; |
907 | ), | 905 | ), |
908 | TP_printk("dev %d:%d ino 0x%llx size 0x%llx new_size 0x%llx " | 906 | TP_printk("dev %d:%d ino 0x%llx size 0x%llx " |
909 | "offset 0x%llx count 0x%zx ioflags %s", | 907 | "offset 0x%llx count 0x%zx ioflags %s", |
910 | MAJOR(__entry->dev), MINOR(__entry->dev), | 908 | MAJOR(__entry->dev), MINOR(__entry->dev), |
911 | __entry->ino, | 909 | __entry->ino, |
912 | __entry->size, | 910 | __entry->size, |
913 | __entry->new_size, | ||
914 | __entry->offset, | 911 | __entry->offset, |
915 | __entry->count, | 912 | __entry->count, |
916 | __print_flags(__entry->flags, "|", XFS_IO_FLAGS)) | 913 | __print_flags(__entry->flags, "|", XFS_IO_FLAGS)) |
@@ -978,7 +975,6 @@ DECLARE_EVENT_CLASS(xfs_imap_class, | |||
978 | __field(dev_t, dev) | 975 | __field(dev_t, dev) |
979 | __field(xfs_ino_t, ino) | 976 | __field(xfs_ino_t, ino) |
980 | __field(loff_t, size) | 977 | __field(loff_t, size) |
981 | __field(loff_t, new_size) | ||
982 | __field(loff_t, offset) | 978 | __field(loff_t, offset) |
983 | __field(size_t, count) | 979 | __field(size_t, count) |
984 | __field(int, type) | 980 | __field(int, type) |
@@ -990,7 +986,6 @@ DECLARE_EVENT_CLASS(xfs_imap_class, | |||
990 | __entry->dev = VFS_I(ip)->i_sb->s_dev; | 986 | __entry->dev = VFS_I(ip)->i_sb->s_dev; |
991 | __entry->ino = ip->i_ino; | 987 | __entry->ino = ip->i_ino; |
992 | __entry->size = ip->i_d.di_size; | 988 | __entry->size = ip->i_d.di_size; |
993 | __entry->new_size = ip->i_new_size; | ||
994 | __entry->offset = offset; | 989 | __entry->offset = offset; |
995 | __entry->count = count; | 990 | __entry->count = count; |
996 | __entry->type = type; | 991 | __entry->type = type; |
@@ -998,13 +993,11 @@ DECLARE_EVENT_CLASS(xfs_imap_class, | |||
998 | __entry->startblock = irec ? irec->br_startblock : 0; | 993 | __entry->startblock = irec ? irec->br_startblock : 0; |
999 | __entry->blockcount = irec ? irec->br_blockcount : 0; | 994 | __entry->blockcount = irec ? irec->br_blockcount : 0; |
1000 | ), | 995 | ), |
1001 | TP_printk("dev %d:%d ino 0x%llx size 0x%llx new_size 0x%llx " | 996 | TP_printk("dev %d:%d ino 0x%llx size 0x%llx offset 0x%llx count %zd " |
1002 | "offset 0x%llx count %zd type %s " | 997 | "type %s startoff 0x%llx startblock %lld blockcount 0x%llx", |
1003 | "startoff 0x%llx startblock %lld blockcount 0x%llx", | ||
1004 | MAJOR(__entry->dev), MINOR(__entry->dev), | 998 | MAJOR(__entry->dev), MINOR(__entry->dev), |
1005 | __entry->ino, | 999 | __entry->ino, |
1006 | __entry->size, | 1000 | __entry->size, |
1007 | __entry->new_size, | ||
1008 | __entry->offset, | 1001 | __entry->offset, |
1009 | __entry->count, | 1002 | __entry->count, |
1010 | __print_symbolic(__entry->type, XFS_IO_TYPES), | 1003 | __print_symbolic(__entry->type, XFS_IO_TYPES), |
@@ -1031,26 +1024,23 @@ DECLARE_EVENT_CLASS(xfs_simple_io_class, | |||
1031 | __field(xfs_ino_t, ino) | 1024 | __field(xfs_ino_t, ino) |
1032 | __field(loff_t, isize) | 1025 | __field(loff_t, isize) |
1033 | __field(loff_t, disize) | 1026 | __field(loff_t, disize) |
1034 | __field(loff_t, new_size) | ||
1035 | __field(loff_t, offset) | 1027 | __field(loff_t, offset) |
1036 | __field(size_t, count) | 1028 | __field(size_t, count) |
1037 | ), | 1029 | ), |
1038 | TP_fast_assign( | 1030 | TP_fast_assign( |
1039 | __entry->dev = VFS_I(ip)->i_sb->s_dev; | 1031 | __entry->dev = VFS_I(ip)->i_sb->s_dev; |
1040 | __entry->ino = ip->i_ino; | 1032 | __entry->ino = ip->i_ino; |
1041 | __entry->isize = ip->i_size; | 1033 | __entry->isize = VFS_I(ip)->i_size; |
1042 | __entry->disize = ip->i_d.di_size; | 1034 | __entry->disize = ip->i_d.di_size; |
1043 | __entry->new_size = ip->i_new_size; | ||
1044 | __entry->offset = offset; | 1035 | __entry->offset = offset; |
1045 | __entry->count = count; | 1036 | __entry->count = count; |
1046 | ), | 1037 | ), |
1047 | TP_printk("dev %d:%d ino 0x%llx isize 0x%llx disize 0x%llx new_size 0x%llx " | 1038 | TP_printk("dev %d:%d ino 0x%llx isize 0x%llx disize 0x%llx " |
1048 | "offset 0x%llx count %zd", | 1039 | "offset 0x%llx count %zd", |
1049 | MAJOR(__entry->dev), MINOR(__entry->dev), | 1040 | MAJOR(__entry->dev), MINOR(__entry->dev), |
1050 | __entry->ino, | 1041 | __entry->ino, |
1051 | __entry->isize, | 1042 | __entry->isize, |
1052 | __entry->disize, | 1043 | __entry->disize, |
1053 | __entry->new_size, | ||
1054 | __entry->offset, | 1044 | __entry->offset, |
1055 | __entry->count) | 1045 | __entry->count) |
1056 | ); | 1046 | ); |
@@ -1090,8 +1080,8 @@ DECLARE_EVENT_CLASS(xfs_itrunc_class, | |||
1090 | DEFINE_EVENT(xfs_itrunc_class, name, \ | 1080 | DEFINE_EVENT(xfs_itrunc_class, name, \ |
1091 | TP_PROTO(struct xfs_inode *ip, xfs_fsize_t new_size), \ | 1081 | TP_PROTO(struct xfs_inode *ip, xfs_fsize_t new_size), \ |
1092 | TP_ARGS(ip, new_size)) | 1082 | TP_ARGS(ip, new_size)) |
1093 | DEFINE_ITRUNC_EVENT(xfs_itruncate_data_start); | 1083 | DEFINE_ITRUNC_EVENT(xfs_itruncate_extents_start); |
1094 | DEFINE_ITRUNC_EVENT(xfs_itruncate_data_end); | 1084 | DEFINE_ITRUNC_EVENT(xfs_itruncate_extents_end); |
1095 | 1085 | ||
1096 | TRACE_EVENT(xfs_pagecache_inval, | 1086 | TRACE_EVENT(xfs_pagecache_inval, |
1097 | TP_PROTO(struct xfs_inode *ip, xfs_off_t start, xfs_off_t finish), | 1087 | TP_PROTO(struct xfs_inode *ip, xfs_off_t start, xfs_off_t finish), |
@@ -1568,7 +1558,6 @@ DECLARE_EVENT_CLASS(xfs_swap_extent_class, | |||
1568 | __field(xfs_ino_t, ino) | 1558 | __field(xfs_ino_t, ino) |
1569 | __field(int, format) | 1559 | __field(int, format) |
1570 | __field(int, nex) | 1560 | __field(int, nex) |
1571 | __field(int, max_nex) | ||
1572 | __field(int, broot_size) | 1561 | __field(int, broot_size) |
1573 | __field(int, fork_off) | 1562 | __field(int, fork_off) |
1574 | ), | 1563 | ), |
@@ -1578,18 +1567,16 @@ DECLARE_EVENT_CLASS(xfs_swap_extent_class, | |||
1578 | __entry->ino = ip->i_ino; | 1567 | __entry->ino = ip->i_ino; |
1579 | __entry->format = ip->i_d.di_format; | 1568 | __entry->format = ip->i_d.di_format; |
1580 | __entry->nex = ip->i_d.di_nextents; | 1569 | __entry->nex = ip->i_d.di_nextents; |
1581 | __entry->max_nex = ip->i_df.if_ext_max; | ||
1582 | __entry->broot_size = ip->i_df.if_broot_bytes; | 1570 | __entry->broot_size = ip->i_df.if_broot_bytes; |
1583 | __entry->fork_off = XFS_IFORK_BOFF(ip); | 1571 | __entry->fork_off = XFS_IFORK_BOFF(ip); |
1584 | ), | 1572 | ), |
1585 | TP_printk("dev %d:%d ino 0x%llx (%s), %s format, num_extents %d, " | 1573 | TP_printk("dev %d:%d ino 0x%llx (%s), %s format, num_extents %d, " |
1586 | "Max in-fork extents %d, broot size %d, fork offset %d", | 1574 | "broot size %d, fork offset %d", |
1587 | MAJOR(__entry->dev), MINOR(__entry->dev), | 1575 | MAJOR(__entry->dev), MINOR(__entry->dev), |
1588 | __entry->ino, | 1576 | __entry->ino, |
1589 | __print_symbolic(__entry->which, XFS_SWAPEXT_INODES), | 1577 | __print_symbolic(__entry->which, XFS_SWAPEXT_INODES), |
1590 | __print_symbolic(__entry->format, XFS_INODE_FORMAT_STR), | 1578 | __print_symbolic(__entry->format, XFS_INODE_FORMAT_STR), |
1591 | __entry->nex, | 1579 | __entry->nex, |
1592 | __entry->max_nex, | ||
1593 | __entry->broot_size, | 1580 | __entry->broot_size, |
1594 | __entry->fork_off) | 1581 | __entry->fork_off) |
1595 | ) | 1582 | ) |
diff --git a/fs/xfs/xfs_vnodeops.c b/fs/xfs/xfs_vnodeops.c index f2fea868d4db..ebdb88840a47 100644 --- a/fs/xfs/xfs_vnodeops.c +++ b/fs/xfs/xfs_vnodeops.c | |||
@@ -131,7 +131,8 @@ xfs_readlink( | |||
131 | __func__, (unsigned long long) ip->i_ino, | 131 | __func__, (unsigned long long) ip->i_ino, |
132 | (long long) pathlen); | 132 | (long long) pathlen); |
133 | ASSERT(0); | 133 | ASSERT(0); |
134 | return XFS_ERROR(EFSCORRUPTED); | 134 | error = XFS_ERROR(EFSCORRUPTED); |
135 | goto out; | ||
135 | } | 136 | } |
136 | 137 | ||
137 | 138 | ||
@@ -175,7 +176,7 @@ xfs_free_eofblocks( | |||
175 | * Figure out if there are any blocks beyond the end | 176 | * Figure out if there are any blocks beyond the end |
176 | * of the file. If not, then there is nothing to do. | 177 | * of the file. If not, then there is nothing to do. |
177 | */ | 178 | */ |
178 | end_fsb = XFS_B_TO_FSB(mp, ((xfs_ufsize_t)ip->i_size)); | 179 | end_fsb = XFS_B_TO_FSB(mp, (xfs_ufsize_t)XFS_ISIZE(ip)); |
179 | last_fsb = XFS_B_TO_FSB(mp, (xfs_ufsize_t)XFS_MAXIOFFSET(mp)); | 180 | last_fsb = XFS_B_TO_FSB(mp, (xfs_ufsize_t)XFS_MAXIOFFSET(mp)); |
180 | if (last_fsb <= end_fsb) | 181 | if (last_fsb <= end_fsb) |
181 | return 0; | 182 | return 0; |
@@ -226,7 +227,14 @@ xfs_free_eofblocks( | |||
226 | xfs_ilock(ip, XFS_ILOCK_EXCL); | 227 | xfs_ilock(ip, XFS_ILOCK_EXCL); |
227 | xfs_trans_ijoin(tp, ip, 0); | 228 | xfs_trans_ijoin(tp, ip, 0); |
228 | 229 | ||
229 | error = xfs_itruncate_data(&tp, ip, ip->i_size); | 230 | /* |
231 | * Do not update the on-disk file size. If we update the | ||
232 | * on-disk file size and then the system crashes before the | ||
233 | * contents of the file are flushed to disk then the files | ||
234 | * may be full of holes (ie NULL files bug). | ||
235 | */ | ||
236 | error = xfs_itruncate_extents(&tp, ip, XFS_DATA_FORK, | ||
237 | XFS_ISIZE(ip)); | ||
230 | if (error) { | 238 | if (error) { |
231 | /* | 239 | /* |
232 | * If we get an error at this point we simply don't | 240 | * If we get an error at this point we simply don't |
@@ -540,8 +548,8 @@ xfs_release( | |||
540 | return 0; | 548 | return 0; |
541 | 549 | ||
542 | if ((S_ISREG(ip->i_d.di_mode) && | 550 | if ((S_ISREG(ip->i_d.di_mode) && |
543 | ((ip->i_size > 0) || (VN_CACHED(VFS_I(ip)) > 0 || | 551 | (VFS_I(ip)->i_size > 0 || |
544 | ip->i_delayed_blks > 0)) && | 552 | (VN_CACHED(VFS_I(ip)) > 0 || ip->i_delayed_blks > 0)) && |
545 | (ip->i_df.if_flags & XFS_IFEXTENTS)) && | 553 | (ip->i_df.if_flags & XFS_IFEXTENTS)) && |
546 | (!(ip->i_d.di_flags & (XFS_DIFLAG_PREALLOC | XFS_DIFLAG_APPEND)))) { | 554 | (!(ip->i_d.di_flags & (XFS_DIFLAG_PREALLOC | XFS_DIFLAG_APPEND)))) { |
547 | 555 | ||
@@ -618,7 +626,7 @@ xfs_inactive( | |||
618 | * only one with a reference to the inode. | 626 | * only one with a reference to the inode. |
619 | */ | 627 | */ |
620 | truncate = ((ip->i_d.di_nlink == 0) && | 628 | truncate = ((ip->i_d.di_nlink == 0) && |
621 | ((ip->i_d.di_size != 0) || (ip->i_size != 0) || | 629 | ((ip->i_d.di_size != 0) || XFS_ISIZE(ip) != 0 || |
622 | (ip->i_d.di_nextents > 0) || (ip->i_delayed_blks > 0)) && | 630 | (ip->i_d.di_nextents > 0) || (ip->i_delayed_blks > 0)) && |
623 | S_ISREG(ip->i_d.di_mode)); | 631 | S_ISREG(ip->i_d.di_mode)); |
624 | 632 | ||
@@ -632,12 +640,12 @@ xfs_inactive( | |||
632 | 640 | ||
633 | if (ip->i_d.di_nlink != 0) { | 641 | if (ip->i_d.di_nlink != 0) { |
634 | if ((S_ISREG(ip->i_d.di_mode) && | 642 | if ((S_ISREG(ip->i_d.di_mode) && |
635 | ((ip->i_size > 0) || (VN_CACHED(VFS_I(ip)) > 0 || | 643 | (VFS_I(ip)->i_size > 0 || |
636 | ip->i_delayed_blks > 0)) && | 644 | (VN_CACHED(VFS_I(ip)) > 0 || ip->i_delayed_blks > 0)) && |
637 | (ip->i_df.if_flags & XFS_IFEXTENTS) && | 645 | (ip->i_df.if_flags & XFS_IFEXTENTS) && |
638 | (!(ip->i_d.di_flags & | 646 | (!(ip->i_d.di_flags & |
639 | (XFS_DIFLAG_PREALLOC | XFS_DIFLAG_APPEND)) || | 647 | (XFS_DIFLAG_PREALLOC | XFS_DIFLAG_APPEND)) || |
640 | (ip->i_delayed_blks != 0)))) { | 648 | ip->i_delayed_blks != 0))) { |
641 | error = xfs_free_eofblocks(mp, ip, 0); | 649 | error = xfs_free_eofblocks(mp, ip, 0); |
642 | if (error) | 650 | if (error) |
643 | return VN_INACTIVE_CACHE; | 651 | return VN_INACTIVE_CACHE; |
@@ -670,13 +678,18 @@ xfs_inactive( | |||
670 | xfs_ilock(ip, XFS_ILOCK_EXCL); | 678 | xfs_ilock(ip, XFS_ILOCK_EXCL); |
671 | xfs_trans_ijoin(tp, ip, 0); | 679 | xfs_trans_ijoin(tp, ip, 0); |
672 | 680 | ||
673 | error = xfs_itruncate_data(&tp, ip, 0); | 681 | ip->i_d.di_size = 0; |
682 | xfs_trans_log_inode(tp, ip, XFS_ILOG_CORE); | ||
683 | |||
684 | error = xfs_itruncate_extents(&tp, ip, XFS_DATA_FORK, 0); | ||
674 | if (error) { | 685 | if (error) { |
675 | xfs_trans_cancel(tp, | 686 | xfs_trans_cancel(tp, |
676 | XFS_TRANS_RELEASE_LOG_RES | XFS_TRANS_ABORT); | 687 | XFS_TRANS_RELEASE_LOG_RES | XFS_TRANS_ABORT); |
677 | xfs_iunlock(ip, XFS_IOLOCK_EXCL | XFS_ILOCK_EXCL); | 688 | xfs_iunlock(ip, XFS_IOLOCK_EXCL | XFS_ILOCK_EXCL); |
678 | return VN_INACTIVE_CACHE; | 689 | return VN_INACTIVE_CACHE; |
679 | } | 690 | } |
691 | |||
692 | ASSERT(ip->i_d.di_nextents == 0); | ||
680 | } else if (S_ISLNK(ip->i_d.di_mode)) { | 693 | } else if (S_ISLNK(ip->i_d.di_mode)) { |
681 | 694 | ||
682 | /* | 695 | /* |
@@ -1961,11 +1974,11 @@ xfs_zero_remaining_bytes( | |||
1961 | * since nothing can read beyond eof. The space will | 1974 | * since nothing can read beyond eof. The space will |
1962 | * be zeroed when the file is extended anyway. | 1975 | * be zeroed when the file is extended anyway. |
1963 | */ | 1976 | */ |
1964 | if (startoff >= ip->i_size) | 1977 | if (startoff >= XFS_ISIZE(ip)) |
1965 | return 0; | 1978 | return 0; |
1966 | 1979 | ||
1967 | if (endoff > ip->i_size) | 1980 | if (endoff > XFS_ISIZE(ip)) |
1968 | endoff = ip->i_size; | 1981 | endoff = XFS_ISIZE(ip); |
1969 | 1982 | ||
1970 | bp = xfs_buf_get_uncached(XFS_IS_REALTIME_INODE(ip) ? | 1983 | bp = xfs_buf_get_uncached(XFS_IS_REALTIME_INODE(ip) ? |
1971 | mp->m_rtdev_targp : mp->m_ddev_targp, | 1984 | mp->m_rtdev_targp : mp->m_ddev_targp, |
@@ -2260,7 +2273,7 @@ xfs_change_file_space( | |||
2260 | bf->l_start += offset; | 2273 | bf->l_start += offset; |
2261 | break; | 2274 | break; |
2262 | case 2: /*SEEK_END*/ | 2275 | case 2: /*SEEK_END*/ |
2263 | bf->l_start += ip->i_size; | 2276 | bf->l_start += XFS_ISIZE(ip); |
2264 | break; | 2277 | break; |
2265 | default: | 2278 | default: |
2266 | return XFS_ERROR(EINVAL); | 2279 | return XFS_ERROR(EINVAL); |
@@ -2277,7 +2290,7 @@ xfs_change_file_space( | |||
2277 | bf->l_whence = 0; | 2290 | bf->l_whence = 0; |
2278 | 2291 | ||
2279 | startoffset = bf->l_start; | 2292 | startoffset = bf->l_start; |
2280 | fsize = ip->i_size; | 2293 | fsize = XFS_ISIZE(ip); |
2281 | 2294 | ||
2282 | /* | 2295 | /* |
2283 | * XFS_IOC_RESVSP and XFS_IOC_UNRESVSP will reserve or unreserve | 2296 | * XFS_IOC_RESVSP and XFS_IOC_UNRESVSP will reserve or unreserve |