diff options
author | Sage Weil <sage@inktank.com> | 2013-08-15 14:11:45 -0400 |
---|---|---|
committer | Sage Weil <sage@inktank.com> | 2013-08-15 14:11:45 -0400 |
commit | ee3e542fec6e69bc9fb668698889a37d93950ddf (patch) | |
tree | e74ee766a4764769ef1d3d45d266b4dea64101d3 /fs | |
parent | fe2a801b50c0bb8039d627e5ae1fec249d10ff39 (diff) | |
parent | f1d6e17f540af37bb1891480143669ba7636c4cf (diff) |
Merge remote-tracking branch 'linus/master' into testing
Diffstat (limited to 'fs')
470 files changed, 16631 insertions, 11991 deletions
diff --git a/fs/9p/Kconfig b/fs/9p/Kconfig index 55abfd62654a..6489e1fc1afd 100644 --- a/fs/9p/Kconfig +++ b/fs/9p/Kconfig | |||
@@ -31,3 +31,16 @@ config 9P_FS_POSIX_ACL | |||
31 | If you don't know what Access Control Lists are, say N | 31 | If you don't know what Access Control Lists are, say N |
32 | 32 | ||
33 | endif | 33 | endif |
34 | |||
35 | |||
36 | config 9P_FS_SECURITY | ||
37 | bool "9P Security Labels" | ||
38 | depends on 9P_FS | ||
39 | help | ||
40 | Security labels support alternative access control models | ||
41 | implemented by security modules like SELinux. This option | ||
42 | enables an extended attribute handler for file security | ||
43 | labels in the 9P filesystem. | ||
44 | |||
45 | If you are not using a security module that requires using | ||
46 | extended attributes for file security labels, say N. | ||
diff --git a/fs/9p/Makefile b/fs/9p/Makefile index ab8c12780634..ff7be98f84f2 100644 --- a/fs/9p/Makefile +++ b/fs/9p/Makefile | |||
@@ -11,7 +11,9 @@ obj-$(CONFIG_9P_FS) := 9p.o | |||
11 | v9fs.o \ | 11 | v9fs.o \ |
12 | fid.o \ | 12 | fid.o \ |
13 | xattr.o \ | 13 | xattr.o \ |
14 | xattr_user.o | 14 | xattr_user.o \ |
15 | xattr_trusted.o | ||
15 | 16 | ||
16 | 9p-$(CONFIG_9P_FSCACHE) += cache.o | 17 | 9p-$(CONFIG_9P_FSCACHE) += cache.o |
17 | 9p-$(CONFIG_9P_FS_POSIX_ACL) += acl.o | 18 | 9p-$(CONFIG_9P_FS_POSIX_ACL) += acl.o |
19 | 9p-$(CONFIG_9P_FS_SECURITY) += xattr_security.o | ||
diff --git a/fs/9p/vfs_addr.c b/fs/9p/vfs_addr.c index 055562c580b4..9ff073f4090a 100644 --- a/fs/9p/vfs_addr.c +++ b/fs/9p/vfs_addr.c | |||
@@ -148,13 +148,14 @@ static int v9fs_release_page(struct page *page, gfp_t gfp) | |||
148 | * @offset: offset in the page | 148 | * @offset: offset in the page |
149 | */ | 149 | */ |
150 | 150 | ||
151 | static void v9fs_invalidate_page(struct page *page, unsigned long offset) | 151 | static void v9fs_invalidate_page(struct page *page, unsigned int offset, |
152 | unsigned int length) | ||
152 | { | 153 | { |
153 | /* | 154 | /* |
154 | * If called with zero offset, we should release | 155 | * If called with zero offset, we should release |
155 | * the private state assocated with the page | 156 | * the private state assocated with the page |
156 | */ | 157 | */ |
157 | if (offset == 0) | 158 | if (offset == 0 && length == PAGE_CACHE_SIZE) |
158 | v9fs_fscache_invalidate_page(page); | 159 | v9fs_fscache_invalidate_page(page); |
159 | } | 160 | } |
160 | 161 | ||
diff --git a/fs/9p/vfs_dir.c b/fs/9p/vfs_dir.c index be1e34adc3c6..4d0c2e0be7e5 100644 --- a/fs/9p/vfs_dir.c +++ b/fs/9p/vfs_dir.c | |||
@@ -101,16 +101,15 @@ static struct p9_rdir *v9fs_alloc_rdir_buf(struct file *filp, int buflen) | |||
101 | } | 101 | } |
102 | 102 | ||
103 | /** | 103 | /** |
104 | * v9fs_dir_readdir - read a directory | 104 | * v9fs_dir_readdir - iterate through a directory |
105 | * @filp: opened file structure | 105 | * @file: opened file structure |
106 | * @dirent: directory structure ??? | 106 | * @ctx: actor we feed the entries to |
107 | * @filldir: function to populate directory structure ??? | ||
108 | * | 107 | * |
109 | */ | 108 | */ |
110 | 109 | ||
111 | static int v9fs_dir_readdir(struct file *filp, void *dirent, filldir_t filldir) | 110 | static int v9fs_dir_readdir(struct file *file, struct dir_context *ctx) |
112 | { | 111 | { |
113 | int over; | 112 | bool over; |
114 | struct p9_wstat st; | 113 | struct p9_wstat st; |
115 | int err = 0; | 114 | int err = 0; |
116 | struct p9_fid *fid; | 115 | struct p9_fid *fid; |
@@ -118,19 +117,19 @@ static int v9fs_dir_readdir(struct file *filp, void *dirent, filldir_t filldir) | |||
118 | int reclen = 0; | 117 | int reclen = 0; |
119 | struct p9_rdir *rdir; | 118 | struct p9_rdir *rdir; |
120 | 119 | ||
121 | p9_debug(P9_DEBUG_VFS, "name %s\n", filp->f_path.dentry->d_name.name); | 120 | p9_debug(P9_DEBUG_VFS, "name %s\n", file->f_path.dentry->d_name.name); |
122 | fid = filp->private_data; | 121 | fid = file->private_data; |
123 | 122 | ||
124 | buflen = fid->clnt->msize - P9_IOHDRSZ; | 123 | buflen = fid->clnt->msize - P9_IOHDRSZ; |
125 | 124 | ||
126 | rdir = v9fs_alloc_rdir_buf(filp, buflen); | 125 | rdir = v9fs_alloc_rdir_buf(file, buflen); |
127 | if (!rdir) | 126 | if (!rdir) |
128 | return -ENOMEM; | 127 | return -ENOMEM; |
129 | 128 | ||
130 | while (1) { | 129 | while (1) { |
131 | if (rdir->tail == rdir->head) { | 130 | if (rdir->tail == rdir->head) { |
132 | err = v9fs_file_readn(filp, rdir->buf, NULL, | 131 | err = v9fs_file_readn(file, rdir->buf, NULL, |
133 | buflen, filp->f_pos); | 132 | buflen, ctx->pos); |
134 | if (err <= 0) | 133 | if (err <= 0) |
135 | return err; | 134 | return err; |
136 | 135 | ||
@@ -148,51 +147,45 @@ static int v9fs_dir_readdir(struct file *filp, void *dirent, filldir_t filldir) | |||
148 | } | 147 | } |
149 | reclen = st.size+2; | 148 | reclen = st.size+2; |
150 | 149 | ||
151 | over = filldir(dirent, st.name, strlen(st.name), | 150 | over = !dir_emit(ctx, st.name, strlen(st.name), |
152 | filp->f_pos, v9fs_qid2ino(&st.qid), dt_type(&st)); | 151 | v9fs_qid2ino(&st.qid), dt_type(&st)); |
153 | |||
154 | p9stat_free(&st); | 152 | p9stat_free(&st); |
155 | |||
156 | if (over) | 153 | if (over) |
157 | return 0; | 154 | return 0; |
158 | 155 | ||
159 | rdir->head += reclen; | 156 | rdir->head += reclen; |
160 | filp->f_pos += reclen; | 157 | ctx->pos += reclen; |
161 | } | 158 | } |
162 | } | 159 | } |
163 | } | 160 | } |
164 | 161 | ||
165 | /** | 162 | /** |
166 | * v9fs_dir_readdir_dotl - read a directory | 163 | * v9fs_dir_readdir_dotl - iterate through a directory |
167 | * @filp: opened file structure | 164 | * @file: opened file structure |
168 | * @dirent: buffer to fill dirent structures | 165 | * @ctx: actor we feed the entries to |
169 | * @filldir: function to populate dirent structures | ||
170 | * | 166 | * |
171 | */ | 167 | */ |
172 | static int v9fs_dir_readdir_dotl(struct file *filp, void *dirent, | 168 | static int v9fs_dir_readdir_dotl(struct file *file, struct dir_context *ctx) |
173 | filldir_t filldir) | ||
174 | { | 169 | { |
175 | int over; | ||
176 | int err = 0; | 170 | int err = 0; |
177 | struct p9_fid *fid; | 171 | struct p9_fid *fid; |
178 | int buflen; | 172 | int buflen; |
179 | struct p9_rdir *rdir; | 173 | struct p9_rdir *rdir; |
180 | struct p9_dirent curdirent; | 174 | struct p9_dirent curdirent; |
181 | u64 oldoffset = 0; | ||
182 | 175 | ||
183 | p9_debug(P9_DEBUG_VFS, "name %s\n", filp->f_path.dentry->d_name.name); | 176 | p9_debug(P9_DEBUG_VFS, "name %s\n", file->f_path.dentry->d_name.name); |
184 | fid = filp->private_data; | 177 | fid = file->private_data; |
185 | 178 | ||
186 | buflen = fid->clnt->msize - P9_READDIRHDRSZ; | 179 | buflen = fid->clnt->msize - P9_READDIRHDRSZ; |
187 | 180 | ||
188 | rdir = v9fs_alloc_rdir_buf(filp, buflen); | 181 | rdir = v9fs_alloc_rdir_buf(file, buflen); |
189 | if (!rdir) | 182 | if (!rdir) |
190 | return -ENOMEM; | 183 | return -ENOMEM; |
191 | 184 | ||
192 | while (1) { | 185 | while (1) { |
193 | if (rdir->tail == rdir->head) { | 186 | if (rdir->tail == rdir->head) { |
194 | err = p9_client_readdir(fid, rdir->buf, buflen, | 187 | err = p9_client_readdir(fid, rdir->buf, buflen, |
195 | filp->f_pos); | 188 | ctx->pos); |
196 | if (err <= 0) | 189 | if (err <= 0) |
197 | return err; | 190 | return err; |
198 | 191 | ||
@@ -210,22 +203,13 @@ static int v9fs_dir_readdir_dotl(struct file *filp, void *dirent, | |||
210 | return -EIO; | 203 | return -EIO; |
211 | } | 204 | } |
212 | 205 | ||
213 | /* d_off in dirent structure tracks the offset into | 206 | if (!dir_emit(ctx, curdirent.d_name, |
214 | * the next dirent in the dir. However, filldir() | 207 | strlen(curdirent.d_name), |
215 | * expects offset into the current dirent. Hence | 208 | v9fs_qid2ino(&curdirent.qid), |
216 | * while calling filldir send the offset from the | 209 | curdirent.d_type)) |
217 | * previous dirent structure. | ||
218 | */ | ||
219 | over = filldir(dirent, curdirent.d_name, | ||
220 | strlen(curdirent.d_name), | ||
221 | oldoffset, v9fs_qid2ino(&curdirent.qid), | ||
222 | curdirent.d_type); | ||
223 | oldoffset = curdirent.d_off; | ||
224 | |||
225 | if (over) | ||
226 | return 0; | 210 | return 0; |
227 | 211 | ||
228 | filp->f_pos = curdirent.d_off; | 212 | ctx->pos = curdirent.d_off; |
229 | rdir->head += err; | 213 | rdir->head += err; |
230 | } | 214 | } |
231 | } | 215 | } |
@@ -254,7 +238,7 @@ int v9fs_dir_release(struct inode *inode, struct file *filp) | |||
254 | const struct file_operations v9fs_dir_operations = { | 238 | const struct file_operations v9fs_dir_operations = { |
255 | .read = generic_read_dir, | 239 | .read = generic_read_dir, |
256 | .llseek = generic_file_llseek, | 240 | .llseek = generic_file_llseek, |
257 | .readdir = v9fs_dir_readdir, | 241 | .iterate = v9fs_dir_readdir, |
258 | .open = v9fs_file_open, | 242 | .open = v9fs_file_open, |
259 | .release = v9fs_dir_release, | 243 | .release = v9fs_dir_release, |
260 | }; | 244 | }; |
@@ -262,7 +246,7 @@ const struct file_operations v9fs_dir_operations = { | |||
262 | const struct file_operations v9fs_dir_operations_dotl = { | 246 | const struct file_operations v9fs_dir_operations_dotl = { |
263 | .read = generic_read_dir, | 247 | .read = generic_read_dir, |
264 | .llseek = generic_file_llseek, | 248 | .llseek = generic_file_llseek, |
265 | .readdir = v9fs_dir_readdir_dotl, | 249 | .iterate = v9fs_dir_readdir_dotl, |
266 | .open = v9fs_file_open, | 250 | .open = v9fs_file_open, |
267 | .release = v9fs_dir_release, | 251 | .release = v9fs_dir_release, |
268 | .fsync = v9fs_file_fsync_dotl, | 252 | .fsync = v9fs_file_fsync_dotl, |
diff --git a/fs/9p/vfs_inode.c b/fs/9p/vfs_inode.c index d86edc8d3fd0..25b018efb8ab 100644 --- a/fs/9p/vfs_inode.c +++ b/fs/9p/vfs_inode.c | |||
@@ -1054,13 +1054,11 @@ static int | |||
1054 | v9fs_vfs_getattr(struct vfsmount *mnt, struct dentry *dentry, | 1054 | v9fs_vfs_getattr(struct vfsmount *mnt, struct dentry *dentry, |
1055 | struct kstat *stat) | 1055 | struct kstat *stat) |
1056 | { | 1056 | { |
1057 | int err; | ||
1058 | struct v9fs_session_info *v9ses; | 1057 | struct v9fs_session_info *v9ses; |
1059 | struct p9_fid *fid; | 1058 | struct p9_fid *fid; |
1060 | struct p9_wstat *st; | 1059 | struct p9_wstat *st; |
1061 | 1060 | ||
1062 | p9_debug(P9_DEBUG_VFS, "dentry: %p\n", dentry); | 1061 | p9_debug(P9_DEBUG_VFS, "dentry: %p\n", dentry); |
1063 | err = -EPERM; | ||
1064 | v9ses = v9fs_dentry2v9ses(dentry); | 1062 | v9ses = v9fs_dentry2v9ses(dentry); |
1065 | if (v9ses->cache == CACHE_LOOSE || v9ses->cache == CACHE_FSCACHE) { | 1063 | if (v9ses->cache == CACHE_LOOSE || v9ses->cache == CACHE_FSCACHE) { |
1066 | generic_fillattr(dentry->d_inode, stat); | 1064 | generic_fillattr(dentry->d_inode, stat); |
diff --git a/fs/9p/xattr.c b/fs/9p/xattr.c index c45e016b190f..3c28cdfb8c47 100644 --- a/fs/9p/xattr.c +++ b/fs/9p/xattr.c | |||
@@ -167,9 +167,13 @@ ssize_t v9fs_listxattr(struct dentry *dentry, char *buffer, size_t buffer_size) | |||
167 | 167 | ||
168 | const struct xattr_handler *v9fs_xattr_handlers[] = { | 168 | const struct xattr_handler *v9fs_xattr_handlers[] = { |
169 | &v9fs_xattr_user_handler, | 169 | &v9fs_xattr_user_handler, |
170 | &v9fs_xattr_trusted_handler, | ||
170 | #ifdef CONFIG_9P_FS_POSIX_ACL | 171 | #ifdef CONFIG_9P_FS_POSIX_ACL |
171 | &v9fs_xattr_acl_access_handler, | 172 | &v9fs_xattr_acl_access_handler, |
172 | &v9fs_xattr_acl_default_handler, | 173 | &v9fs_xattr_acl_default_handler, |
173 | #endif | 174 | #endif |
175 | #ifdef CONFIG_9P_FS_SECURITY | ||
176 | &v9fs_xattr_security_handler, | ||
177 | #endif | ||
174 | NULL | 178 | NULL |
175 | }; | 179 | }; |
diff --git a/fs/9p/xattr.h b/fs/9p/xattr.h index eec348a3df71..d3e2ea3840be 100644 --- a/fs/9p/xattr.h +++ b/fs/9p/xattr.h | |||
@@ -20,6 +20,8 @@ | |||
20 | 20 | ||
21 | extern const struct xattr_handler *v9fs_xattr_handlers[]; | 21 | extern const struct xattr_handler *v9fs_xattr_handlers[]; |
22 | extern struct xattr_handler v9fs_xattr_user_handler; | 22 | extern struct xattr_handler v9fs_xattr_user_handler; |
23 | extern struct xattr_handler v9fs_xattr_trusted_handler; | ||
24 | extern struct xattr_handler v9fs_xattr_security_handler; | ||
23 | extern const struct xattr_handler v9fs_xattr_acl_access_handler; | 25 | extern const struct xattr_handler v9fs_xattr_acl_access_handler; |
24 | extern const struct xattr_handler v9fs_xattr_acl_default_handler; | 26 | extern const struct xattr_handler v9fs_xattr_acl_default_handler; |
25 | 27 | ||
diff --git a/fs/9p/xattr_security.c b/fs/9p/xattr_security.c new file mode 100644 index 000000000000..cb247a142a6e --- /dev/null +++ b/fs/9p/xattr_security.c | |||
@@ -0,0 +1,80 @@ | |||
1 | /* | ||
2 | * Copyright IBM Corporation, 2010 | ||
3 | * Author Aneesh Kumar K.V <aneesh.kumar@linux.vnet.ibm.com> | ||
4 | * | ||
5 | * This program is free software; you can redistribute it and/or modify it | ||
6 | * under the terms of version 2.1 of the GNU Lesser General Public License | ||
7 | * as published by the Free Software Foundation. | ||
8 | * | ||
9 | * This program is distributed in the hope that it would be useful, but | ||
10 | * WITHOUT ANY WARRANTY; without even the implied warranty of | ||
11 | * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. | ||
12 | * | ||
13 | */ | ||
14 | |||
15 | |||
16 | #include <linux/module.h> | ||
17 | #include <linux/string.h> | ||
18 | #include <linux/fs.h> | ||
19 | #include <linux/slab.h> | ||
20 | #include "xattr.h" | ||
21 | |||
22 | static int v9fs_xattr_security_get(struct dentry *dentry, const char *name, | ||
23 | void *buffer, size_t size, int type) | ||
24 | { | ||
25 | int retval; | ||
26 | char *full_name; | ||
27 | size_t name_len; | ||
28 | size_t prefix_len = XATTR_SECURITY_PREFIX_LEN; | ||
29 | |||
30 | if (name == NULL) | ||
31 | return -EINVAL; | ||
32 | |||
33 | if (strcmp(name, "") == 0) | ||
34 | return -EINVAL; | ||
35 | |||
36 | name_len = strlen(name); | ||
37 | full_name = kmalloc(prefix_len + name_len + 1 , GFP_KERNEL); | ||
38 | if (!full_name) | ||
39 | return -ENOMEM; | ||
40 | memcpy(full_name, XATTR_SECURITY_PREFIX, prefix_len); | ||
41 | memcpy(full_name+prefix_len, name, name_len); | ||
42 | full_name[prefix_len + name_len] = '\0'; | ||
43 | |||
44 | retval = v9fs_xattr_get(dentry, full_name, buffer, size); | ||
45 | kfree(full_name); | ||
46 | return retval; | ||
47 | } | ||
48 | |||
49 | static int v9fs_xattr_security_set(struct dentry *dentry, const char *name, | ||
50 | const void *value, size_t size, int flags, int type) | ||
51 | { | ||
52 | int retval; | ||
53 | char *full_name; | ||
54 | size_t name_len; | ||
55 | size_t prefix_len = XATTR_SECURITY_PREFIX_LEN; | ||
56 | |||
57 | if (name == NULL) | ||
58 | return -EINVAL; | ||
59 | |||
60 | if (strcmp(name, "") == 0) | ||
61 | return -EINVAL; | ||
62 | |||
63 | name_len = strlen(name); | ||
64 | full_name = kmalloc(prefix_len + name_len + 1 , GFP_KERNEL); | ||
65 | if (!full_name) | ||
66 | return -ENOMEM; | ||
67 | memcpy(full_name, XATTR_SECURITY_PREFIX, prefix_len); | ||
68 | memcpy(full_name + prefix_len, name, name_len); | ||
69 | full_name[prefix_len + name_len] = '\0'; | ||
70 | |||
71 | retval = v9fs_xattr_set(dentry, full_name, value, size, flags); | ||
72 | kfree(full_name); | ||
73 | return retval; | ||
74 | } | ||
75 | |||
76 | struct xattr_handler v9fs_xattr_security_handler = { | ||
77 | .prefix = XATTR_SECURITY_PREFIX, | ||
78 | .get = v9fs_xattr_security_get, | ||
79 | .set = v9fs_xattr_security_set, | ||
80 | }; | ||
diff --git a/fs/9p/xattr_trusted.c b/fs/9p/xattr_trusted.c new file mode 100644 index 000000000000..e30d33b8a3fb --- /dev/null +++ b/fs/9p/xattr_trusted.c | |||
@@ -0,0 +1,80 @@ | |||
1 | /* | ||
2 | * Copyright IBM Corporation, 2010 | ||
3 | * Author Aneesh Kumar K.V <aneesh.kumar@linux.vnet.ibm.com> | ||
4 | * | ||
5 | * This program is free software; you can redistribute it and/or modify it | ||
6 | * under the terms of version 2.1 of the GNU Lesser General Public License | ||
7 | * as published by the Free Software Foundation. | ||
8 | * | ||
9 | * This program is distributed in the hope that it would be useful, but | ||
10 | * WITHOUT ANY WARRANTY; without even the implied warranty of | ||
11 | * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. | ||
12 | * | ||
13 | */ | ||
14 | |||
15 | |||
16 | #include <linux/module.h> | ||
17 | #include <linux/string.h> | ||
18 | #include <linux/fs.h> | ||
19 | #include <linux/slab.h> | ||
20 | #include "xattr.h" | ||
21 | |||
22 | static int v9fs_xattr_trusted_get(struct dentry *dentry, const char *name, | ||
23 | void *buffer, size_t size, int type) | ||
24 | { | ||
25 | int retval; | ||
26 | char *full_name; | ||
27 | size_t name_len; | ||
28 | size_t prefix_len = XATTR_TRUSTED_PREFIX_LEN; | ||
29 | |||
30 | if (name == NULL) | ||
31 | return -EINVAL; | ||
32 | |||
33 | if (strcmp(name, "") == 0) | ||
34 | return -EINVAL; | ||
35 | |||
36 | name_len = strlen(name); | ||
37 | full_name = kmalloc(prefix_len + name_len + 1 , GFP_KERNEL); | ||
38 | if (!full_name) | ||
39 | return -ENOMEM; | ||
40 | memcpy(full_name, XATTR_TRUSTED_PREFIX, prefix_len); | ||
41 | memcpy(full_name+prefix_len, name, name_len); | ||
42 | full_name[prefix_len + name_len] = '\0'; | ||
43 | |||
44 | retval = v9fs_xattr_get(dentry, full_name, buffer, size); | ||
45 | kfree(full_name); | ||
46 | return retval; | ||
47 | } | ||
48 | |||
49 | static int v9fs_xattr_trusted_set(struct dentry *dentry, const char *name, | ||
50 | const void *value, size_t size, int flags, int type) | ||
51 | { | ||
52 | int retval; | ||
53 | char *full_name; | ||
54 | size_t name_len; | ||
55 | size_t prefix_len = XATTR_TRUSTED_PREFIX_LEN; | ||
56 | |||
57 | if (name == NULL) | ||
58 | return -EINVAL; | ||
59 | |||
60 | if (strcmp(name, "") == 0) | ||
61 | return -EINVAL; | ||
62 | |||
63 | name_len = strlen(name); | ||
64 | full_name = kmalloc(prefix_len + name_len + 1 , GFP_KERNEL); | ||
65 | if (!full_name) | ||
66 | return -ENOMEM; | ||
67 | memcpy(full_name, XATTR_TRUSTED_PREFIX, prefix_len); | ||
68 | memcpy(full_name + prefix_len, name, name_len); | ||
69 | full_name[prefix_len + name_len] = '\0'; | ||
70 | |||
71 | retval = v9fs_xattr_set(dentry, full_name, value, size, flags); | ||
72 | kfree(full_name); | ||
73 | return retval; | ||
74 | } | ||
75 | |||
76 | struct xattr_handler v9fs_xattr_trusted_handler = { | ||
77 | .prefix = XATTR_TRUSTED_PREFIX, | ||
78 | .get = v9fs_xattr_trusted_get, | ||
79 | .set = v9fs_xattr_trusted_set, | ||
80 | }; | ||
diff --git a/fs/adfs/dir.c b/fs/adfs/dir.c index 9cf874ce8336..0d138c0de293 100644 --- a/fs/adfs/dir.c +++ b/fs/adfs/dir.c | |||
@@ -17,47 +17,43 @@ | |||
17 | static DEFINE_RWLOCK(adfs_dir_lock); | 17 | static DEFINE_RWLOCK(adfs_dir_lock); |
18 | 18 | ||
19 | static int | 19 | static int |
20 | adfs_readdir(struct file *filp, void *dirent, filldir_t filldir) | 20 | adfs_readdir(struct file *file, struct dir_context *ctx) |
21 | { | 21 | { |
22 | struct inode *inode = file_inode(filp); | 22 | struct inode *inode = file_inode(file); |
23 | struct super_block *sb = inode->i_sb; | 23 | struct super_block *sb = inode->i_sb; |
24 | struct adfs_dir_ops *ops = ADFS_SB(sb)->s_dir; | 24 | struct adfs_dir_ops *ops = ADFS_SB(sb)->s_dir; |
25 | struct object_info obj; | 25 | struct object_info obj; |
26 | struct adfs_dir dir; | 26 | struct adfs_dir dir; |
27 | int ret = 0; | 27 | int ret = 0; |
28 | 28 | ||
29 | if (filp->f_pos >> 32) | 29 | if (ctx->pos >> 32) |
30 | goto out; | 30 | return 0; |
31 | 31 | ||
32 | ret = ops->read(sb, inode->i_ino, inode->i_size, &dir); | 32 | ret = ops->read(sb, inode->i_ino, inode->i_size, &dir); |
33 | if (ret) | 33 | if (ret) |
34 | goto out; | 34 | return ret; |
35 | 35 | ||
36 | switch ((unsigned long)filp->f_pos) { | 36 | if (ctx->pos == 0) { |
37 | case 0: | 37 | if (!dir_emit_dot(file, ctx)) |
38 | if (filldir(dirent, ".", 1, 0, inode->i_ino, DT_DIR) < 0) | ||
39 | goto free_out; | 38 | goto free_out; |
40 | filp->f_pos += 1; | 39 | ctx->pos = 1; |
41 | 40 | } | |
42 | case 1: | 41 | if (ctx->pos == 1) { |
43 | if (filldir(dirent, "..", 2, 1, dir.parent_id, DT_DIR) < 0) | 42 | if (!dir_emit(ctx, "..", 2, dir.parent_id, DT_DIR)) |
44 | goto free_out; | 43 | goto free_out; |
45 | filp->f_pos += 1; | 44 | ctx->pos = 2; |
46 | |||
47 | default: | ||
48 | break; | ||
49 | } | 45 | } |
50 | 46 | ||
51 | read_lock(&adfs_dir_lock); | 47 | read_lock(&adfs_dir_lock); |
52 | 48 | ||
53 | ret = ops->setpos(&dir, filp->f_pos - 2); | 49 | ret = ops->setpos(&dir, ctx->pos - 2); |
54 | if (ret) | 50 | if (ret) |
55 | goto unlock_out; | 51 | goto unlock_out; |
56 | while (ops->getnext(&dir, &obj) == 0) { | 52 | while (ops->getnext(&dir, &obj) == 0) { |
57 | if (filldir(dirent, obj.name, obj.name_len, | 53 | if (!dir_emit(ctx, obj.name, obj.name_len, |
58 | filp->f_pos, obj.file_id, DT_UNKNOWN) < 0) | 54 | obj.file_id, DT_UNKNOWN)) |
59 | goto unlock_out; | 55 | break; |
60 | filp->f_pos += 1; | 56 | ctx->pos++; |
61 | } | 57 | } |
62 | 58 | ||
63 | unlock_out: | 59 | unlock_out: |
@@ -65,8 +61,6 @@ unlock_out: | |||
65 | 61 | ||
66 | free_out: | 62 | free_out: |
67 | ops->free(&dir); | 63 | ops->free(&dir); |
68 | |||
69 | out: | ||
70 | return ret; | 64 | return ret; |
71 | } | 65 | } |
72 | 66 | ||
@@ -192,13 +186,12 @@ out: | |||
192 | const struct file_operations adfs_dir_operations = { | 186 | const struct file_operations adfs_dir_operations = { |
193 | .read = generic_read_dir, | 187 | .read = generic_read_dir, |
194 | .llseek = generic_file_llseek, | 188 | .llseek = generic_file_llseek, |
195 | .readdir = adfs_readdir, | 189 | .iterate = adfs_readdir, |
196 | .fsync = generic_file_fsync, | 190 | .fsync = generic_file_fsync, |
197 | }; | 191 | }; |
198 | 192 | ||
199 | static int | 193 | static int |
200 | adfs_hash(const struct dentry *parent, const struct inode *inode, | 194 | adfs_hash(const struct dentry *parent, struct qstr *qstr) |
201 | struct qstr *qstr) | ||
202 | { | 195 | { |
203 | const unsigned int name_len = ADFS_SB(parent->d_sb)->s_namelen; | 196 | const unsigned int name_len = ADFS_SB(parent->d_sb)->s_namelen; |
204 | const unsigned char *name; | 197 | const unsigned char *name; |
@@ -234,8 +227,7 @@ adfs_hash(const struct dentry *parent, const struct inode *inode, | |||
234 | * requirements of the underlying filesystem. | 227 | * requirements of the underlying filesystem. |
235 | */ | 228 | */ |
236 | static int | 229 | static int |
237 | adfs_compare(const struct dentry *parent, const struct inode *pinode, | 230 | adfs_compare(const struct dentry *parent, const struct dentry *dentry, |
238 | const struct dentry *dentry, const struct inode *inode, | ||
239 | unsigned int len, const char *str, const struct qstr *name) | 231 | unsigned int len, const char *str, const struct qstr *name) |
240 | { | 232 | { |
241 | int i; | 233 | int i; |
diff --git a/fs/affs/dir.c b/fs/affs/dir.c index fd11a6d608ee..f1eba8c3644e 100644 --- a/fs/affs/dir.c +++ b/fs/affs/dir.c | |||
@@ -15,12 +15,12 @@ | |||
15 | 15 | ||
16 | #include "affs.h" | 16 | #include "affs.h" |
17 | 17 | ||
18 | static int affs_readdir(struct file *, void *, filldir_t); | 18 | static int affs_readdir(struct file *, struct dir_context *); |
19 | 19 | ||
20 | const struct file_operations affs_dir_operations = { | 20 | const struct file_operations affs_dir_operations = { |
21 | .read = generic_read_dir, | 21 | .read = generic_read_dir, |
22 | .llseek = generic_file_llseek, | 22 | .llseek = generic_file_llseek, |
23 | .readdir = affs_readdir, | 23 | .iterate = affs_readdir, |
24 | .fsync = affs_file_fsync, | 24 | .fsync = affs_file_fsync, |
25 | }; | 25 | }; |
26 | 26 | ||
@@ -40,52 +40,35 @@ const struct inode_operations affs_dir_inode_operations = { | |||
40 | }; | 40 | }; |
41 | 41 | ||
42 | static int | 42 | static int |
43 | affs_readdir(struct file *filp, void *dirent, filldir_t filldir) | 43 | affs_readdir(struct file *file, struct dir_context *ctx) |
44 | { | 44 | { |
45 | struct inode *inode = file_inode(filp); | 45 | struct inode *inode = file_inode(file); |
46 | struct super_block *sb = inode->i_sb; | 46 | struct super_block *sb = inode->i_sb; |
47 | struct buffer_head *dir_bh; | 47 | struct buffer_head *dir_bh = NULL; |
48 | struct buffer_head *fh_bh; | 48 | struct buffer_head *fh_bh = NULL; |
49 | unsigned char *name; | 49 | unsigned char *name; |
50 | int namelen; | 50 | int namelen; |
51 | u32 i; | 51 | u32 i; |
52 | int hash_pos; | 52 | int hash_pos; |
53 | int chain_pos; | 53 | int chain_pos; |
54 | u32 f_pos; | ||
55 | u32 ino; | 54 | u32 ino; |
56 | int stored; | ||
57 | int res; | ||
58 | 55 | ||
59 | pr_debug("AFFS: readdir(ino=%lu,f_pos=%lx)\n",inode->i_ino,(unsigned long)filp->f_pos); | 56 | pr_debug("AFFS: readdir(ino=%lu,f_pos=%lx)\n",inode->i_ino,(unsigned long)ctx->pos); |
60 | 57 | ||
61 | stored = 0; | 58 | if (ctx->pos < 2) { |
62 | res = -EIO; | 59 | file->private_data = (void *)0; |
63 | dir_bh = NULL; | 60 | if (!dir_emit_dots(file, ctx)) |
64 | fh_bh = NULL; | ||
65 | f_pos = filp->f_pos; | ||
66 | |||
67 | if (f_pos == 0) { | ||
68 | filp->private_data = (void *)0; | ||
69 | if (filldir(dirent, ".", 1, f_pos, inode->i_ino, DT_DIR) < 0) | ||
70 | return 0; | 61 | return 0; |
71 | filp->f_pos = f_pos = 1; | ||
72 | stored++; | ||
73 | } | ||
74 | if (f_pos == 1) { | ||
75 | if (filldir(dirent, "..", 2, f_pos, parent_ino(filp->f_path.dentry), DT_DIR) < 0) | ||
76 | return stored; | ||
77 | filp->f_pos = f_pos = 2; | ||
78 | stored++; | ||
79 | } | 62 | } |
80 | 63 | ||
81 | affs_lock_dir(inode); | 64 | affs_lock_dir(inode); |
82 | chain_pos = (f_pos - 2) & 0xffff; | 65 | chain_pos = (ctx->pos - 2) & 0xffff; |
83 | hash_pos = (f_pos - 2) >> 16; | 66 | hash_pos = (ctx->pos - 2) >> 16; |
84 | if (chain_pos == 0xffff) { | 67 | if (chain_pos == 0xffff) { |
85 | affs_warning(sb, "readdir", "More than 65535 entries in chain"); | 68 | affs_warning(sb, "readdir", "More than 65535 entries in chain"); |
86 | chain_pos = 0; | 69 | chain_pos = 0; |
87 | hash_pos++; | 70 | hash_pos++; |
88 | filp->f_pos = ((hash_pos << 16) | chain_pos) + 2; | 71 | ctx->pos = ((hash_pos << 16) | chain_pos) + 2; |
89 | } | 72 | } |
90 | dir_bh = affs_bread(sb, inode->i_ino); | 73 | dir_bh = affs_bread(sb, inode->i_ino); |
91 | if (!dir_bh) | 74 | if (!dir_bh) |
@@ -94,8 +77,8 @@ affs_readdir(struct file *filp, void *dirent, filldir_t filldir) | |||
94 | /* If the directory hasn't changed since the last call to readdir(), | 77 | /* If the directory hasn't changed since the last call to readdir(), |
95 | * we can jump directly to where we left off. | 78 | * we can jump directly to where we left off. |
96 | */ | 79 | */ |
97 | ino = (u32)(long)filp->private_data; | 80 | ino = (u32)(long)file->private_data; |
98 | if (ino && filp->f_version == inode->i_version) { | 81 | if (ino && file->f_version == inode->i_version) { |
99 | pr_debug("AFFS: readdir() left off=%d\n", ino); | 82 | pr_debug("AFFS: readdir() left off=%d\n", ino); |
100 | goto inside; | 83 | goto inside; |
101 | } | 84 | } |
@@ -105,7 +88,7 @@ affs_readdir(struct file *filp, void *dirent, filldir_t filldir) | |||
105 | fh_bh = affs_bread(sb, ino); | 88 | fh_bh = affs_bread(sb, ino); |
106 | if (!fh_bh) { | 89 | if (!fh_bh) { |
107 | affs_error(sb, "readdir","Cannot read block %d", i); | 90 | affs_error(sb, "readdir","Cannot read block %d", i); |
108 | goto readdir_out; | 91 | return -EIO; |
109 | } | 92 | } |
110 | ino = be32_to_cpu(AFFS_TAIL(sb, fh_bh)->hash_chain); | 93 | ino = be32_to_cpu(AFFS_TAIL(sb, fh_bh)->hash_chain); |
111 | affs_brelse(fh_bh); | 94 | affs_brelse(fh_bh); |
@@ -119,38 +102,34 @@ affs_readdir(struct file *filp, void *dirent, filldir_t filldir) | |||
119 | ino = be32_to_cpu(AFFS_HEAD(dir_bh)->table[hash_pos]); | 102 | ino = be32_to_cpu(AFFS_HEAD(dir_bh)->table[hash_pos]); |
120 | if (!ino) | 103 | if (!ino) |
121 | continue; | 104 | continue; |
122 | f_pos = (hash_pos << 16) + 2; | 105 | ctx->pos = (hash_pos << 16) + 2; |
123 | inside: | 106 | inside: |
124 | do { | 107 | do { |
125 | fh_bh = affs_bread(sb, ino); | 108 | fh_bh = affs_bread(sb, ino); |
126 | if (!fh_bh) { | 109 | if (!fh_bh) { |
127 | affs_error(sb, "readdir","Cannot read block %d", ino); | 110 | affs_error(sb, "readdir","Cannot read block %d", ino); |
128 | goto readdir_done; | 111 | break; |
129 | } | 112 | } |
130 | 113 | ||
131 | namelen = min(AFFS_TAIL(sb, fh_bh)->name[0], (u8)30); | 114 | namelen = min(AFFS_TAIL(sb, fh_bh)->name[0], (u8)30); |
132 | name = AFFS_TAIL(sb, fh_bh)->name + 1; | 115 | name = AFFS_TAIL(sb, fh_bh)->name + 1; |
133 | pr_debug("AFFS: readdir(): filldir(\"%.*s\", ino=%u), hash=%d, f_pos=%x\n", | 116 | pr_debug("AFFS: readdir(): filldir(\"%.*s\", ino=%u), hash=%d, f_pos=%x\n", |
134 | namelen, name, ino, hash_pos, f_pos); | 117 | namelen, name, ino, hash_pos, (u32)ctx->pos); |
135 | if (filldir(dirent, name, namelen, f_pos, ino, DT_UNKNOWN) < 0) | 118 | if (!dir_emit(ctx, name, namelen, ino, DT_UNKNOWN)) |
136 | goto readdir_done; | 119 | goto readdir_done; |
137 | stored++; | 120 | ctx->pos++; |
138 | f_pos++; | ||
139 | ino = be32_to_cpu(AFFS_TAIL(sb, fh_bh)->hash_chain); | 121 | ino = be32_to_cpu(AFFS_TAIL(sb, fh_bh)->hash_chain); |
140 | affs_brelse(fh_bh); | 122 | affs_brelse(fh_bh); |
141 | fh_bh = NULL; | 123 | fh_bh = NULL; |
142 | } while (ino); | 124 | } while (ino); |
143 | } | 125 | } |
144 | readdir_done: | 126 | readdir_done: |
145 | filp->f_pos = f_pos; | 127 | file->f_version = inode->i_version; |
146 | filp->f_version = inode->i_version; | 128 | file->private_data = (void *)(long)ino; |
147 | filp->private_data = (void *)(long)ino; | ||
148 | res = stored; | ||
149 | 129 | ||
150 | readdir_out: | 130 | readdir_out: |
151 | affs_brelse(dir_bh); | 131 | affs_brelse(dir_bh); |
152 | affs_brelse(fh_bh); | 132 | affs_brelse(fh_bh); |
153 | affs_unlock_dir(inode); | 133 | affs_unlock_dir(inode); |
154 | pr_debug("AFFS: readdir()=%d\n", stored); | 134 | return 0; |
155 | return res; | ||
156 | } | 135 | } |
diff --git a/fs/affs/namei.c b/fs/affs/namei.c index ff65884a7839..c36cbb4537a2 100644 --- a/fs/affs/namei.c +++ b/fs/affs/namei.c | |||
@@ -13,18 +13,12 @@ | |||
13 | typedef int (*toupper_t)(int); | 13 | typedef int (*toupper_t)(int); |
14 | 14 | ||
15 | static int affs_toupper(int ch); | 15 | static int affs_toupper(int ch); |
16 | static int affs_hash_dentry(const struct dentry *, | 16 | static int affs_hash_dentry(const struct dentry *, struct qstr *); |
17 | const struct inode *, struct qstr *); | 17 | static int affs_compare_dentry(const struct dentry *parent, const struct dentry *dentry, |
18 | static int affs_compare_dentry(const struct dentry *parent, | ||
19 | const struct inode *pinode, | ||
20 | const struct dentry *dentry, const struct inode *inode, | ||
21 | unsigned int len, const char *str, const struct qstr *name); | 18 | unsigned int len, const char *str, const struct qstr *name); |
22 | static int affs_intl_toupper(int ch); | 19 | static int affs_intl_toupper(int ch); |
23 | static int affs_intl_hash_dentry(const struct dentry *, | 20 | static int affs_intl_hash_dentry(const struct dentry *, struct qstr *); |
24 | const struct inode *, struct qstr *); | 21 | static int affs_intl_compare_dentry(const struct dentry *parent, const struct dentry *dentry, |
25 | static int affs_intl_compare_dentry(const struct dentry *parent, | ||
26 | const struct inode *pinode, | ||
27 | const struct dentry *dentry, const struct inode *inode, | ||
28 | unsigned int len, const char *str, const struct qstr *name); | 22 | unsigned int len, const char *str, const struct qstr *name); |
29 | 23 | ||
30 | const struct dentry_operations affs_dentry_operations = { | 24 | const struct dentry_operations affs_dentry_operations = { |
@@ -86,14 +80,12 @@ __affs_hash_dentry(struct qstr *qstr, toupper_t toupper) | |||
86 | } | 80 | } |
87 | 81 | ||
88 | static int | 82 | static int |
89 | affs_hash_dentry(const struct dentry *dentry, const struct inode *inode, | 83 | affs_hash_dentry(const struct dentry *dentry, struct qstr *qstr) |
90 | struct qstr *qstr) | ||
91 | { | 84 | { |
92 | return __affs_hash_dentry(qstr, affs_toupper); | 85 | return __affs_hash_dentry(qstr, affs_toupper); |
93 | } | 86 | } |
94 | static int | 87 | static int |
95 | affs_intl_hash_dentry(const struct dentry *dentry, const struct inode *inode, | 88 | affs_intl_hash_dentry(const struct dentry *dentry, struct qstr *qstr) |
96 | struct qstr *qstr) | ||
97 | { | 89 | { |
98 | return __affs_hash_dentry(qstr, affs_intl_toupper); | 90 | return __affs_hash_dentry(qstr, affs_intl_toupper); |
99 | } | 91 | } |
@@ -131,15 +123,13 @@ static inline int __affs_compare_dentry(unsigned int len, | |||
131 | } | 123 | } |
132 | 124 | ||
133 | static int | 125 | static int |
134 | affs_compare_dentry(const struct dentry *parent, const struct inode *pinode, | 126 | affs_compare_dentry(const struct dentry *parent, const struct dentry *dentry, |
135 | const struct dentry *dentry, const struct inode *inode, | ||
136 | unsigned int len, const char *str, const struct qstr *name) | 127 | unsigned int len, const char *str, const struct qstr *name) |
137 | { | 128 | { |
138 | return __affs_compare_dentry(len, str, name, affs_toupper); | 129 | return __affs_compare_dentry(len, str, name, affs_toupper); |
139 | } | 130 | } |
140 | static int | 131 | static int |
141 | affs_intl_compare_dentry(const struct dentry *parent,const struct inode *pinode, | 132 | affs_intl_compare_dentry(const struct dentry *parent, const struct dentry *dentry, |
142 | const struct dentry *dentry, const struct inode *inode, | ||
143 | unsigned int len, const char *str, const struct qstr *name) | 133 | unsigned int len, const char *str, const struct qstr *name) |
144 | { | 134 | { |
145 | return __affs_compare_dentry(len, str, name, affs_intl_toupper); | 135 | return __affs_compare_dentry(len, str, name, affs_intl_toupper); |
diff --git a/fs/afs/dir.c b/fs/afs/dir.c index 7a465ed04444..34494fbead0a 100644 --- a/fs/afs/dir.c +++ b/fs/afs/dir.c | |||
@@ -22,7 +22,7 @@ | |||
22 | static struct dentry *afs_lookup(struct inode *dir, struct dentry *dentry, | 22 | static struct dentry *afs_lookup(struct inode *dir, struct dentry *dentry, |
23 | unsigned int flags); | 23 | unsigned int flags); |
24 | static int afs_dir_open(struct inode *inode, struct file *file); | 24 | static int afs_dir_open(struct inode *inode, struct file *file); |
25 | static int afs_readdir(struct file *file, void *dirent, filldir_t filldir); | 25 | static int afs_readdir(struct file *file, struct dir_context *ctx); |
26 | static int afs_d_revalidate(struct dentry *dentry, unsigned int flags); | 26 | static int afs_d_revalidate(struct dentry *dentry, unsigned int flags); |
27 | static int afs_d_delete(const struct dentry *dentry); | 27 | static int afs_d_delete(const struct dentry *dentry); |
28 | static void afs_d_release(struct dentry *dentry); | 28 | static void afs_d_release(struct dentry *dentry); |
@@ -43,7 +43,7 @@ static int afs_rename(struct inode *old_dir, struct dentry *old_dentry, | |||
43 | const struct file_operations afs_dir_file_operations = { | 43 | const struct file_operations afs_dir_file_operations = { |
44 | .open = afs_dir_open, | 44 | .open = afs_dir_open, |
45 | .release = afs_release, | 45 | .release = afs_release, |
46 | .readdir = afs_readdir, | 46 | .iterate = afs_readdir, |
47 | .lock = afs_lock, | 47 | .lock = afs_lock, |
48 | .llseek = generic_file_llseek, | 48 | .llseek = generic_file_llseek, |
49 | }; | 49 | }; |
@@ -119,9 +119,9 @@ struct afs_dir_page { | |||
119 | }; | 119 | }; |
120 | 120 | ||
121 | struct afs_lookup_cookie { | 121 | struct afs_lookup_cookie { |
122 | struct dir_context ctx; | ||
122 | struct afs_fid fid; | 123 | struct afs_fid fid; |
123 | const char *name; | 124 | struct qstr name; |
124 | size_t nlen; | ||
125 | int found; | 125 | int found; |
126 | }; | 126 | }; |
127 | 127 | ||
@@ -228,20 +228,18 @@ static int afs_dir_open(struct inode *inode, struct file *file) | |||
228 | /* | 228 | /* |
229 | * deal with one block in an AFS directory | 229 | * deal with one block in an AFS directory |
230 | */ | 230 | */ |
231 | static int afs_dir_iterate_block(unsigned *fpos, | 231 | static int afs_dir_iterate_block(struct dir_context *ctx, |
232 | union afs_dir_block *block, | 232 | union afs_dir_block *block, |
233 | unsigned blkoff, | 233 | unsigned blkoff) |
234 | void *cookie, | ||
235 | filldir_t filldir) | ||
236 | { | 234 | { |
237 | union afs_dirent *dire; | 235 | union afs_dirent *dire; |
238 | unsigned offset, next, curr; | 236 | unsigned offset, next, curr; |
239 | size_t nlen; | 237 | size_t nlen; |
240 | int tmp, ret; | 238 | int tmp; |
241 | 239 | ||
242 | _enter("%u,%x,%p,,",*fpos,blkoff,block); | 240 | _enter("%u,%x,%p,,",(unsigned)ctx->pos,blkoff,block); |
243 | 241 | ||
244 | curr = (*fpos - blkoff) / sizeof(union afs_dirent); | 242 | curr = (ctx->pos - blkoff) / sizeof(union afs_dirent); |
245 | 243 | ||
246 | /* walk through the block, an entry at a time */ | 244 | /* walk through the block, an entry at a time */ |
247 | for (offset = AFS_DIRENT_PER_BLOCK - block->pagehdr.nentries; | 245 | for (offset = AFS_DIRENT_PER_BLOCK - block->pagehdr.nentries; |
@@ -256,7 +254,7 @@ static int afs_dir_iterate_block(unsigned *fpos, | |||
256 | _debug("ENT[%Zu.%u]: unused", | 254 | _debug("ENT[%Zu.%u]: unused", |
257 | blkoff / sizeof(union afs_dir_block), offset); | 255 | blkoff / sizeof(union afs_dir_block), offset); |
258 | if (offset >= curr) | 256 | if (offset >= curr) |
259 | *fpos = blkoff + | 257 | ctx->pos = blkoff + |
260 | next * sizeof(union afs_dirent); | 258 | next * sizeof(union afs_dirent); |
261 | continue; | 259 | continue; |
262 | } | 260 | } |
@@ -302,19 +300,15 @@ static int afs_dir_iterate_block(unsigned *fpos, | |||
302 | continue; | 300 | continue; |
303 | 301 | ||
304 | /* found the next entry */ | 302 | /* found the next entry */ |
305 | ret = filldir(cookie, | 303 | if (!dir_emit(ctx, dire->u.name, nlen, |
306 | dire->u.name, | ||
307 | nlen, | ||
308 | blkoff + offset * sizeof(union afs_dirent), | ||
309 | ntohl(dire->u.vnode), | 304 | ntohl(dire->u.vnode), |
310 | filldir == afs_lookup_filldir ? | 305 | ctx->actor == afs_lookup_filldir ? |
311 | ntohl(dire->u.unique) : DT_UNKNOWN); | 306 | ntohl(dire->u.unique) : DT_UNKNOWN)) { |
312 | if (ret < 0) { | ||
313 | _leave(" = 0 [full]"); | 307 | _leave(" = 0 [full]"); |
314 | return 0; | 308 | return 0; |
315 | } | 309 | } |
316 | 310 | ||
317 | *fpos = blkoff + next * sizeof(union afs_dirent); | 311 | ctx->pos = blkoff + next * sizeof(union afs_dirent); |
318 | } | 312 | } |
319 | 313 | ||
320 | _leave(" = 1 [more]"); | 314 | _leave(" = 1 [more]"); |
@@ -324,8 +318,8 @@ static int afs_dir_iterate_block(unsigned *fpos, | |||
324 | /* | 318 | /* |
325 | * iterate through the data blob that lists the contents of an AFS directory | 319 | * iterate through the data blob that lists the contents of an AFS directory |
326 | */ | 320 | */ |
327 | static int afs_dir_iterate(struct inode *dir, unsigned *fpos, void *cookie, | 321 | static int afs_dir_iterate(struct inode *dir, struct dir_context *ctx, |
328 | filldir_t filldir, struct key *key) | 322 | struct key *key) |
329 | { | 323 | { |
330 | union afs_dir_block *dblock; | 324 | union afs_dir_block *dblock; |
331 | struct afs_dir_page *dbuf; | 325 | struct afs_dir_page *dbuf; |
@@ -333,7 +327,7 @@ static int afs_dir_iterate(struct inode *dir, unsigned *fpos, void *cookie, | |||
333 | unsigned blkoff, limit; | 327 | unsigned blkoff, limit; |
334 | int ret; | 328 | int ret; |
335 | 329 | ||
336 | _enter("{%lu},%u,,", dir->i_ino, *fpos); | 330 | _enter("{%lu},%u,,", dir->i_ino, (unsigned)ctx->pos); |
337 | 331 | ||
338 | if (test_bit(AFS_VNODE_DELETED, &AFS_FS_I(dir)->flags)) { | 332 | if (test_bit(AFS_VNODE_DELETED, &AFS_FS_I(dir)->flags)) { |
339 | _leave(" = -ESTALE"); | 333 | _leave(" = -ESTALE"); |
@@ -341,13 +335,13 @@ static int afs_dir_iterate(struct inode *dir, unsigned *fpos, void *cookie, | |||
341 | } | 335 | } |
342 | 336 | ||
343 | /* round the file position up to the next entry boundary */ | 337 | /* round the file position up to the next entry boundary */ |
344 | *fpos += sizeof(union afs_dirent) - 1; | 338 | ctx->pos += sizeof(union afs_dirent) - 1; |
345 | *fpos &= ~(sizeof(union afs_dirent) - 1); | 339 | ctx->pos &= ~(sizeof(union afs_dirent) - 1); |
346 | 340 | ||
347 | /* walk through the blocks in sequence */ | 341 | /* walk through the blocks in sequence */ |
348 | ret = 0; | 342 | ret = 0; |
349 | while (*fpos < dir->i_size) { | 343 | while (ctx->pos < dir->i_size) { |
350 | blkoff = *fpos & ~(sizeof(union afs_dir_block) - 1); | 344 | blkoff = ctx->pos & ~(sizeof(union afs_dir_block) - 1); |
351 | 345 | ||
352 | /* fetch the appropriate page from the directory */ | 346 | /* fetch the appropriate page from the directory */ |
353 | page = afs_dir_get_page(dir, blkoff / PAGE_SIZE, key); | 347 | page = afs_dir_get_page(dir, blkoff / PAGE_SIZE, key); |
@@ -364,8 +358,7 @@ static int afs_dir_iterate(struct inode *dir, unsigned *fpos, void *cookie, | |||
364 | do { | 358 | do { |
365 | dblock = &dbuf->blocks[(blkoff % PAGE_SIZE) / | 359 | dblock = &dbuf->blocks[(blkoff % PAGE_SIZE) / |
366 | sizeof(union afs_dir_block)]; | 360 | sizeof(union afs_dir_block)]; |
367 | ret = afs_dir_iterate_block(fpos, dblock, blkoff, | 361 | ret = afs_dir_iterate_block(ctx, dblock, blkoff); |
368 | cookie, filldir); | ||
369 | if (ret != 1) { | 362 | if (ret != 1) { |
370 | afs_dir_put_page(page); | 363 | afs_dir_put_page(page); |
371 | goto out; | 364 | goto out; |
@@ -373,7 +366,7 @@ static int afs_dir_iterate(struct inode *dir, unsigned *fpos, void *cookie, | |||
373 | 366 | ||
374 | blkoff += sizeof(union afs_dir_block); | 367 | blkoff += sizeof(union afs_dir_block); |
375 | 368 | ||
376 | } while (*fpos < dir->i_size && blkoff < limit); | 369 | } while (ctx->pos < dir->i_size && blkoff < limit); |
377 | 370 | ||
378 | afs_dir_put_page(page); | 371 | afs_dir_put_page(page); |
379 | ret = 0; | 372 | ret = 0; |
@@ -387,23 +380,10 @@ out: | |||
387 | /* | 380 | /* |
388 | * read an AFS directory | 381 | * read an AFS directory |
389 | */ | 382 | */ |
390 | static int afs_readdir(struct file *file, void *cookie, filldir_t filldir) | 383 | static int afs_readdir(struct file *file, struct dir_context *ctx) |
391 | { | 384 | { |
392 | unsigned fpos; | 385 | return afs_dir_iterate(file_inode(file), |
393 | int ret; | 386 | ctx, file->private_data); |
394 | |||
395 | _enter("{%Ld,{%lu}}", | ||
396 | file->f_pos, file_inode(file)->i_ino); | ||
397 | |||
398 | ASSERT(file->private_data != NULL); | ||
399 | |||
400 | fpos = file->f_pos; | ||
401 | ret = afs_dir_iterate(file_inode(file), &fpos, | ||
402 | cookie, filldir, file->private_data); | ||
403 | file->f_pos = fpos; | ||
404 | |||
405 | _leave(" = %d", ret); | ||
406 | return ret; | ||
407 | } | 387 | } |
408 | 388 | ||
409 | /* | 389 | /* |
@@ -416,15 +396,16 @@ static int afs_lookup_filldir(void *_cookie, const char *name, int nlen, | |||
416 | { | 396 | { |
417 | struct afs_lookup_cookie *cookie = _cookie; | 397 | struct afs_lookup_cookie *cookie = _cookie; |
418 | 398 | ||
419 | _enter("{%s,%Zu},%s,%u,,%llu,%u", | 399 | _enter("{%s,%u},%s,%u,,%llu,%u", |
420 | cookie->name, cookie->nlen, name, nlen, | 400 | cookie->name.name, cookie->name.len, name, nlen, |
421 | (unsigned long long) ino, dtype); | 401 | (unsigned long long) ino, dtype); |
422 | 402 | ||
423 | /* insanity checks first */ | 403 | /* insanity checks first */ |
424 | BUILD_BUG_ON(sizeof(union afs_dir_block) != 2048); | 404 | BUILD_BUG_ON(sizeof(union afs_dir_block) != 2048); |
425 | BUILD_BUG_ON(sizeof(union afs_dirent) != 32); | 405 | BUILD_BUG_ON(sizeof(union afs_dirent) != 32); |
426 | 406 | ||
427 | if (cookie->nlen != nlen || memcmp(cookie->name, name, nlen) != 0) { | 407 | if (cookie->name.len != nlen || |
408 | memcmp(cookie->name.name, name, nlen) != 0) { | ||
428 | _leave(" = 0 [no]"); | 409 | _leave(" = 0 [no]"); |
429 | return 0; | 410 | return 0; |
430 | } | 411 | } |
@@ -444,24 +425,18 @@ static int afs_lookup_filldir(void *_cookie, const char *name, int nlen, | |||
444 | static int afs_do_lookup(struct inode *dir, struct dentry *dentry, | 425 | static int afs_do_lookup(struct inode *dir, struct dentry *dentry, |
445 | struct afs_fid *fid, struct key *key) | 426 | struct afs_fid *fid, struct key *key) |
446 | { | 427 | { |
447 | struct afs_lookup_cookie cookie; | 428 | struct afs_super_info *as = dir->i_sb->s_fs_info; |
448 | struct afs_super_info *as; | 429 | struct afs_lookup_cookie cookie = { |
449 | unsigned fpos; | 430 | .ctx.actor = afs_lookup_filldir, |
431 | .name = dentry->d_name, | ||
432 | .fid.vid = as->volume->vid | ||
433 | }; | ||
450 | int ret; | 434 | int ret; |
451 | 435 | ||
452 | _enter("{%lu},%p{%s},", dir->i_ino, dentry, dentry->d_name.name); | 436 | _enter("{%lu},%p{%s},", dir->i_ino, dentry, dentry->d_name.name); |
453 | 437 | ||
454 | as = dir->i_sb->s_fs_info; | ||
455 | |||
456 | /* search the directory */ | 438 | /* search the directory */ |
457 | cookie.name = dentry->d_name.name; | 439 | ret = afs_dir_iterate(dir, &cookie.ctx, key); |
458 | cookie.nlen = dentry->d_name.len; | ||
459 | cookie.fid.vid = as->volume->vid; | ||
460 | cookie.found = 0; | ||
461 | |||
462 | fpos = 0; | ||
463 | ret = afs_dir_iterate(dir, &fpos, &cookie, afs_lookup_filldir, | ||
464 | key); | ||
465 | if (ret < 0) { | 440 | if (ret < 0) { |
466 | _leave(" = %d [iter]", ret); | 441 | _leave(" = %d [iter]", ret); |
467 | return ret; | 442 | return ret; |
diff --git a/fs/afs/file.c b/fs/afs/file.c index 8f6e9234d565..66d50fe2ee45 100644 --- a/fs/afs/file.c +++ b/fs/afs/file.c | |||
@@ -19,7 +19,8 @@ | |||
19 | #include "internal.h" | 19 | #include "internal.h" |
20 | 20 | ||
21 | static int afs_readpage(struct file *file, struct page *page); | 21 | static int afs_readpage(struct file *file, struct page *page); |
22 | static void afs_invalidatepage(struct page *page, unsigned long offset); | 22 | static void afs_invalidatepage(struct page *page, unsigned int offset, |
23 | unsigned int length); | ||
23 | static int afs_releasepage(struct page *page, gfp_t gfp_flags); | 24 | static int afs_releasepage(struct page *page, gfp_t gfp_flags); |
24 | static int afs_launder_page(struct page *page); | 25 | static int afs_launder_page(struct page *page); |
25 | 26 | ||
@@ -310,16 +311,17 @@ static int afs_launder_page(struct page *page) | |||
310 | * - release a page and clean up its private data if offset is 0 (indicating | 311 | * - release a page and clean up its private data if offset is 0 (indicating |
311 | * the entire page) | 312 | * the entire page) |
312 | */ | 313 | */ |
313 | static void afs_invalidatepage(struct page *page, unsigned long offset) | 314 | static void afs_invalidatepage(struct page *page, unsigned int offset, |
315 | unsigned int length) | ||
314 | { | 316 | { |
315 | struct afs_writeback *wb = (struct afs_writeback *) page_private(page); | 317 | struct afs_writeback *wb = (struct afs_writeback *) page_private(page); |
316 | 318 | ||
317 | _enter("{%lu},%lu", page->index, offset); | 319 | _enter("{%lu},%u,%u", page->index, offset, length); |
318 | 320 | ||
319 | BUG_ON(!PageLocked(page)); | 321 | BUG_ON(!PageLocked(page)); |
320 | 322 | ||
321 | /* we clean up only if the entire page is being invalidated */ | 323 | /* we clean up only if the entire page is being invalidated */ |
322 | if (offset == 0) { | 324 | if (offset == 0 && length == PAGE_CACHE_SIZE) { |
323 | #ifdef CONFIG_AFS_FSCACHE | 325 | #ifdef CONFIG_AFS_FSCACHE |
324 | if (PageFsCache(page)) { | 326 | if (PageFsCache(page)) { |
325 | struct afs_vnode *vnode = AFS_FS_I(page->mapping->host); | 327 | struct afs_vnode *vnode = AFS_FS_I(page->mapping->host); |
diff --git a/fs/afs/flock.c b/fs/afs/flock.c index 2497bf306c70..a8cf2cff836c 100644 --- a/fs/afs/flock.c +++ b/fs/afs/flock.c | |||
@@ -252,7 +252,8 @@ static void afs_defer_unlock(struct afs_vnode *vnode, struct key *key) | |||
252 | */ | 252 | */ |
253 | static int afs_do_setlk(struct file *file, struct file_lock *fl) | 253 | static int afs_do_setlk(struct file *file, struct file_lock *fl) |
254 | { | 254 | { |
255 | struct afs_vnode *vnode = AFS_FS_I(file->f_mapping->host); | 255 | struct inode *inode = file_inode(file); |
256 | struct afs_vnode *vnode = AFS_FS_I(inode); | ||
256 | afs_lock_type_t type; | 257 | afs_lock_type_t type; |
257 | struct key *key = file->private_data; | 258 | struct key *key = file->private_data; |
258 | int ret; | 259 | int ret; |
@@ -273,7 +274,7 @@ static int afs_do_setlk(struct file *file, struct file_lock *fl) | |||
273 | 274 | ||
274 | type = (fl->fl_type == F_RDLCK) ? AFS_LOCK_READ : AFS_LOCK_WRITE; | 275 | type = (fl->fl_type == F_RDLCK) ? AFS_LOCK_READ : AFS_LOCK_WRITE; |
275 | 276 | ||
276 | lock_flocks(); | 277 | spin_lock(&inode->i_lock); |
277 | 278 | ||
278 | /* make sure we've got a callback on this file and that our view of the | 279 | /* make sure we've got a callback on this file and that our view of the |
279 | * data version is up to date */ | 280 | * data version is up to date */ |
@@ -420,7 +421,7 @@ given_lock: | |||
420 | afs_vnode_fetch_status(vnode, NULL, key); | 421 | afs_vnode_fetch_status(vnode, NULL, key); |
421 | 422 | ||
422 | error: | 423 | error: |
423 | unlock_flocks(); | 424 | spin_unlock(&inode->i_lock); |
424 | _leave(" = %d", ret); | 425 | _leave(" = %d", ret); |
425 | return ret; | 426 | return ret; |
426 | 427 | ||
@@ -39,6 +39,8 @@ | |||
39 | #include <asm/kmap_types.h> | 39 | #include <asm/kmap_types.h> |
40 | #include <asm/uaccess.h> | 40 | #include <asm/uaccess.h> |
41 | 41 | ||
42 | #include "internal.h" | ||
43 | |||
42 | #define AIO_RING_MAGIC 0xa10a10a1 | 44 | #define AIO_RING_MAGIC 0xa10a10a1 |
43 | #define AIO_RING_COMPAT_FEATURES 1 | 45 | #define AIO_RING_COMPAT_FEATURES 1 |
44 | #define AIO_RING_INCOMPAT_FEATURES 0 | 46 | #define AIO_RING_INCOMPAT_FEATURES 0 |
@@ -623,7 +625,7 @@ void aio_complete(struct kiocb *iocb, long res, long res2) | |||
623 | 625 | ||
624 | /* | 626 | /* |
625 | * Add a completion event to the ring buffer. Must be done holding | 627 | * Add a completion event to the ring buffer. Must be done holding |
626 | * ctx->ctx_lock to prevent other code from messing with the tail | 628 | * ctx->completion_lock to prevent other code from messing with the tail |
627 | * pointer since we might be called from irq context. | 629 | * pointer since we might be called from irq context. |
628 | */ | 630 | */ |
629 | spin_lock_irqsave(&ctx->completion_lock, flags); | 631 | spin_lock_irqsave(&ctx->completion_lock, flags); |
diff --git a/fs/autofs4/expire.c b/fs/autofs4/expire.c index 13ddec92341c..3d9d3f5d5dda 100644 --- a/fs/autofs4/expire.c +++ b/fs/autofs4/expire.c | |||
@@ -109,7 +109,7 @@ cont: | |||
109 | 109 | ||
110 | spin_lock_nested(&q->d_lock, DENTRY_D_LOCK_NESTED); | 110 | spin_lock_nested(&q->d_lock, DENTRY_D_LOCK_NESTED); |
111 | /* Already gone or negative dentry (under construction) - try next */ | 111 | /* Already gone or negative dentry (under construction) - try next */ |
112 | if (q->d_count == 0 || !simple_positive(q)) { | 112 | if (!d_count(q) || !simple_positive(q)) { |
113 | spin_unlock(&q->d_lock); | 113 | spin_unlock(&q->d_lock); |
114 | next = q->d_u.d_child.next; | 114 | next = q->d_u.d_child.next; |
115 | goto cont; | 115 | goto cont; |
@@ -267,7 +267,7 @@ static int autofs4_tree_busy(struct vfsmount *mnt, | |||
267 | else | 267 | else |
268 | ino_count++; | 268 | ino_count++; |
269 | 269 | ||
270 | if (p->d_count > ino_count) { | 270 | if (d_count(p) > ino_count) { |
271 | top_ino->last_used = jiffies; | 271 | top_ino->last_used = jiffies; |
272 | dput(p); | 272 | dput(p); |
273 | return 1; | 273 | return 1; |
@@ -409,7 +409,7 @@ struct dentry *autofs4_expire_indirect(struct super_block *sb, | |||
409 | if (!exp_leaves) { | 409 | if (!exp_leaves) { |
410 | /* Path walk currently on this dentry? */ | 410 | /* Path walk currently on this dentry? */ |
411 | ino_count = atomic_read(&ino->count) + 1; | 411 | ino_count = atomic_read(&ino->count) + 1; |
412 | if (dentry->d_count > ino_count) | 412 | if (d_count(dentry) > ino_count) |
413 | goto next; | 413 | goto next; |
414 | 414 | ||
415 | if (!autofs4_tree_busy(mnt, dentry, timeout, do_now)) { | 415 | if (!autofs4_tree_busy(mnt, dentry, timeout, do_now)) { |
@@ -423,7 +423,7 @@ struct dentry *autofs4_expire_indirect(struct super_block *sb, | |||
423 | } else { | 423 | } else { |
424 | /* Path walk currently on this dentry? */ | 424 | /* Path walk currently on this dentry? */ |
425 | ino_count = atomic_read(&ino->count) + 1; | 425 | ino_count = atomic_read(&ino->count) + 1; |
426 | if (dentry->d_count > ino_count) | 426 | if (d_count(dentry) > ino_count) |
427 | goto next; | 427 | goto next; |
428 | 428 | ||
429 | expired = autofs4_check_leaves(mnt, dentry, timeout, do_now); | 429 | expired = autofs4_check_leaves(mnt, dentry, timeout, do_now); |
diff --git a/fs/autofs4/root.c b/fs/autofs4/root.c index 085da86e07c2..92ef341ba0cf 100644 --- a/fs/autofs4/root.c +++ b/fs/autofs4/root.c | |||
@@ -41,7 +41,7 @@ const struct file_operations autofs4_root_operations = { | |||
41 | .open = dcache_dir_open, | 41 | .open = dcache_dir_open, |
42 | .release = dcache_dir_close, | 42 | .release = dcache_dir_close, |
43 | .read = generic_read_dir, | 43 | .read = generic_read_dir, |
44 | .readdir = dcache_readdir, | 44 | .iterate = dcache_readdir, |
45 | .llseek = dcache_dir_lseek, | 45 | .llseek = dcache_dir_lseek, |
46 | .unlocked_ioctl = autofs4_root_ioctl, | 46 | .unlocked_ioctl = autofs4_root_ioctl, |
47 | #ifdef CONFIG_COMPAT | 47 | #ifdef CONFIG_COMPAT |
@@ -53,7 +53,7 @@ const struct file_operations autofs4_dir_operations = { | |||
53 | .open = autofs4_dir_open, | 53 | .open = autofs4_dir_open, |
54 | .release = dcache_dir_close, | 54 | .release = dcache_dir_close, |
55 | .read = generic_read_dir, | 55 | .read = generic_read_dir, |
56 | .readdir = dcache_readdir, | 56 | .iterate = dcache_readdir, |
57 | .llseek = dcache_dir_lseek, | 57 | .llseek = dcache_dir_lseek, |
58 | }; | 58 | }; |
59 | 59 | ||
@@ -179,7 +179,7 @@ static struct dentry *autofs4_lookup_active(struct dentry *dentry) | |||
179 | spin_lock(&active->d_lock); | 179 | spin_lock(&active->d_lock); |
180 | 180 | ||
181 | /* Already gone? */ | 181 | /* Already gone? */ |
182 | if (active->d_count == 0) | 182 | if (!d_count(active)) |
183 | goto next; | 183 | goto next; |
184 | 184 | ||
185 | qstr = &active->d_name; | 185 | qstr = &active->d_name; |
diff --git a/fs/bad_inode.c b/fs/bad_inode.c index 922ad460bff9..7c93953030fb 100644 --- a/fs/bad_inode.c +++ b/fs/bad_inode.c | |||
@@ -45,7 +45,7 @@ static ssize_t bad_file_aio_write(struct kiocb *iocb, const struct iovec *iov, | |||
45 | return -EIO; | 45 | return -EIO; |
46 | } | 46 | } |
47 | 47 | ||
48 | static int bad_file_readdir(struct file *filp, void *dirent, filldir_t filldir) | 48 | static int bad_file_readdir(struct file *file, struct dir_context *ctx) |
49 | { | 49 | { |
50 | return -EIO; | 50 | return -EIO; |
51 | } | 51 | } |
@@ -152,7 +152,7 @@ static const struct file_operations bad_file_ops = | |||
152 | .write = bad_file_write, | 152 | .write = bad_file_write, |
153 | .aio_read = bad_file_aio_read, | 153 | .aio_read = bad_file_aio_read, |
154 | .aio_write = bad_file_aio_write, | 154 | .aio_write = bad_file_aio_write, |
155 | .readdir = bad_file_readdir, | 155 | .iterate = bad_file_readdir, |
156 | .poll = bad_file_poll, | 156 | .poll = bad_file_poll, |
157 | .unlocked_ioctl = bad_file_unlocked_ioctl, | 157 | .unlocked_ioctl = bad_file_unlocked_ioctl, |
158 | .compat_ioctl = bad_file_compat_ioctl, | 158 | .compat_ioctl = bad_file_compat_ioctl, |
diff --git a/fs/befs/linuxvfs.c b/fs/befs/linuxvfs.c index f95dddced968..e9c75e20db32 100644 --- a/fs/befs/linuxvfs.c +++ b/fs/befs/linuxvfs.c | |||
@@ -31,7 +31,7 @@ MODULE_LICENSE("GPL"); | |||
31 | /* The units the vfs expects inode->i_blocks to be in */ | 31 | /* The units the vfs expects inode->i_blocks to be in */ |
32 | #define VFS_BLOCK_SIZE 512 | 32 | #define VFS_BLOCK_SIZE 512 |
33 | 33 | ||
34 | static int befs_readdir(struct file *, void *, filldir_t); | 34 | static int befs_readdir(struct file *, struct dir_context *); |
35 | static int befs_get_block(struct inode *, sector_t, struct buffer_head *, int); | 35 | static int befs_get_block(struct inode *, sector_t, struct buffer_head *, int); |
36 | static int befs_readpage(struct file *file, struct page *page); | 36 | static int befs_readpage(struct file *file, struct page *page); |
37 | static sector_t befs_bmap(struct address_space *mapping, sector_t block); | 37 | static sector_t befs_bmap(struct address_space *mapping, sector_t block); |
@@ -66,7 +66,7 @@ static struct kmem_cache *befs_inode_cachep; | |||
66 | 66 | ||
67 | static const struct file_operations befs_dir_operations = { | 67 | static const struct file_operations befs_dir_operations = { |
68 | .read = generic_read_dir, | 68 | .read = generic_read_dir, |
69 | .readdir = befs_readdir, | 69 | .iterate = befs_readdir, |
70 | .llseek = generic_file_llseek, | 70 | .llseek = generic_file_llseek, |
71 | }; | 71 | }; |
72 | 72 | ||
@@ -211,9 +211,9 @@ befs_lookup(struct inode *dir, struct dentry *dentry, unsigned int flags) | |||
211 | } | 211 | } |
212 | 212 | ||
213 | static int | 213 | static int |
214 | befs_readdir(struct file *filp, void *dirent, filldir_t filldir) | 214 | befs_readdir(struct file *file, struct dir_context *ctx) |
215 | { | 215 | { |
216 | struct inode *inode = file_inode(filp); | 216 | struct inode *inode = file_inode(file); |
217 | struct super_block *sb = inode->i_sb; | 217 | struct super_block *sb = inode->i_sb; |
218 | befs_data_stream *ds = &BEFS_I(inode)->i_data.ds; | 218 | befs_data_stream *ds = &BEFS_I(inode)->i_data.ds; |
219 | befs_off_t value; | 219 | befs_off_t value; |
@@ -221,15 +221,14 @@ befs_readdir(struct file *filp, void *dirent, filldir_t filldir) | |||
221 | size_t keysize; | 221 | size_t keysize; |
222 | unsigned char d_type; | 222 | unsigned char d_type; |
223 | char keybuf[BEFS_NAME_LEN + 1]; | 223 | char keybuf[BEFS_NAME_LEN + 1]; |
224 | char *nlsname; | 224 | const char *dirname = file->f_path.dentry->d_name.name; |
225 | int nlsnamelen; | ||
226 | const char *dirname = filp->f_path.dentry->d_name.name; | ||
227 | 225 | ||
228 | befs_debug(sb, "---> befs_readdir() " | 226 | befs_debug(sb, "---> befs_readdir() " |
229 | "name %s, inode %ld, filp->f_pos %Ld", | 227 | "name %s, inode %ld, ctx->pos %Ld", |
230 | dirname, inode->i_ino, filp->f_pos); | 228 | dirname, inode->i_ino, ctx->pos); |
231 | 229 | ||
232 | result = befs_btree_read(sb, ds, filp->f_pos, BEFS_NAME_LEN + 1, | 230 | more: |
231 | result = befs_btree_read(sb, ds, ctx->pos, BEFS_NAME_LEN + 1, | ||
233 | keybuf, &keysize, &value); | 232 | keybuf, &keysize, &value); |
234 | 233 | ||
235 | if (result == BEFS_ERR) { | 234 | if (result == BEFS_ERR) { |
@@ -251,24 +250,29 @@ befs_readdir(struct file *filp, void *dirent, filldir_t filldir) | |||
251 | 250 | ||
252 | /* Convert to NLS */ | 251 | /* Convert to NLS */ |
253 | if (BEFS_SB(sb)->nls) { | 252 | if (BEFS_SB(sb)->nls) { |
253 | char *nlsname; | ||
254 | int nlsnamelen; | ||
254 | result = | 255 | result = |
255 | befs_utf2nls(sb, keybuf, keysize, &nlsname, &nlsnamelen); | 256 | befs_utf2nls(sb, keybuf, keysize, &nlsname, &nlsnamelen); |
256 | if (result < 0) { | 257 | if (result < 0) { |
257 | befs_debug(sb, "<--- befs_readdir() ERROR"); | 258 | befs_debug(sb, "<--- befs_readdir() ERROR"); |
258 | return result; | 259 | return result; |
259 | } | 260 | } |
260 | result = filldir(dirent, nlsname, nlsnamelen, filp->f_pos, | 261 | if (!dir_emit(ctx, nlsname, nlsnamelen, |
261 | (ino_t) value, d_type); | 262 | (ino_t) value, d_type)) { |
263 | kfree(nlsname); | ||
264 | return 0; | ||
265 | } | ||
262 | kfree(nlsname); | 266 | kfree(nlsname); |
263 | |||
264 | } else { | 267 | } else { |
265 | result = filldir(dirent, keybuf, keysize, filp->f_pos, | 268 | if (!dir_emit(ctx, keybuf, keysize, |
266 | (ino_t) value, d_type); | 269 | (ino_t) value, d_type)) |
270 | return 0; | ||
267 | } | 271 | } |
268 | if (!result) | 272 | ctx->pos++; |
269 | filp->f_pos++; | 273 | goto more; |
270 | 274 | ||
271 | befs_debug(sb, "<--- befs_readdir() filp->f_pos %Ld", filp->f_pos); | 275 | befs_debug(sb, "<--- befs_readdir() pos %Ld", ctx->pos); |
272 | 276 | ||
273 | return 0; | 277 | return 0; |
274 | } | 278 | } |
diff --git a/fs/bfs/dir.c b/fs/bfs/dir.c index 3f422f6bb5ca..a399e6d9dc74 100644 --- a/fs/bfs/dir.c +++ b/fs/bfs/dir.c | |||
@@ -26,58 +26,51 @@ static struct buffer_head *bfs_find_entry(struct inode *dir, | |||
26 | const unsigned char *name, int namelen, | 26 | const unsigned char *name, int namelen, |
27 | struct bfs_dirent **res_dir); | 27 | struct bfs_dirent **res_dir); |
28 | 28 | ||
29 | static int bfs_readdir(struct file *f, void *dirent, filldir_t filldir) | 29 | static int bfs_readdir(struct file *f, struct dir_context *ctx) |
30 | { | 30 | { |
31 | struct inode *dir = file_inode(f); | 31 | struct inode *dir = file_inode(f); |
32 | struct buffer_head *bh; | 32 | struct buffer_head *bh; |
33 | struct bfs_dirent *de; | 33 | struct bfs_dirent *de; |
34 | struct bfs_sb_info *info = BFS_SB(dir->i_sb); | ||
35 | unsigned int offset; | 34 | unsigned int offset; |
36 | int block; | 35 | int block; |
37 | 36 | ||
38 | mutex_lock(&info->bfs_lock); | 37 | if (ctx->pos & (BFS_DIRENT_SIZE - 1)) { |
39 | |||
40 | if (f->f_pos & (BFS_DIRENT_SIZE - 1)) { | ||
41 | printf("Bad f_pos=%08lx for %s:%08lx\n", | 38 | printf("Bad f_pos=%08lx for %s:%08lx\n", |
42 | (unsigned long)f->f_pos, | 39 | (unsigned long)ctx->pos, |
43 | dir->i_sb->s_id, dir->i_ino); | 40 | dir->i_sb->s_id, dir->i_ino); |
44 | mutex_unlock(&info->bfs_lock); | 41 | return -EINVAL; |
45 | return -EBADF; | ||
46 | } | 42 | } |
47 | 43 | ||
48 | while (f->f_pos < dir->i_size) { | 44 | while (ctx->pos < dir->i_size) { |
49 | offset = f->f_pos & (BFS_BSIZE - 1); | 45 | offset = ctx->pos & (BFS_BSIZE - 1); |
50 | block = BFS_I(dir)->i_sblock + (f->f_pos >> BFS_BSIZE_BITS); | 46 | block = BFS_I(dir)->i_sblock + (ctx->pos >> BFS_BSIZE_BITS); |
51 | bh = sb_bread(dir->i_sb, block); | 47 | bh = sb_bread(dir->i_sb, block); |
52 | if (!bh) { | 48 | if (!bh) { |
53 | f->f_pos += BFS_BSIZE - offset; | 49 | ctx->pos += BFS_BSIZE - offset; |
54 | continue; | 50 | continue; |
55 | } | 51 | } |
56 | do { | 52 | do { |
57 | de = (struct bfs_dirent *)(bh->b_data + offset); | 53 | de = (struct bfs_dirent *)(bh->b_data + offset); |
58 | if (de->ino) { | 54 | if (de->ino) { |
59 | int size = strnlen(de->name, BFS_NAMELEN); | 55 | int size = strnlen(de->name, BFS_NAMELEN); |
60 | if (filldir(dirent, de->name, size, f->f_pos, | 56 | if (!dir_emit(ctx, de->name, size, |
61 | le16_to_cpu(de->ino), | 57 | le16_to_cpu(de->ino), |
62 | DT_UNKNOWN) < 0) { | 58 | DT_UNKNOWN)) { |
63 | brelse(bh); | 59 | brelse(bh); |
64 | mutex_unlock(&info->bfs_lock); | ||
65 | return 0; | 60 | return 0; |
66 | } | 61 | } |
67 | } | 62 | } |
68 | offset += BFS_DIRENT_SIZE; | 63 | offset += BFS_DIRENT_SIZE; |
69 | f->f_pos += BFS_DIRENT_SIZE; | 64 | ctx->pos += BFS_DIRENT_SIZE; |
70 | } while ((offset < BFS_BSIZE) && (f->f_pos < dir->i_size)); | 65 | } while ((offset < BFS_BSIZE) && (ctx->pos < dir->i_size)); |
71 | brelse(bh); | 66 | brelse(bh); |
72 | } | 67 | } |
73 | 68 | return 0; | |
74 | mutex_unlock(&info->bfs_lock); | ||
75 | return 0; | ||
76 | } | 69 | } |
77 | 70 | ||
78 | const struct file_operations bfs_dir_operations = { | 71 | const struct file_operations bfs_dir_operations = { |
79 | .read = generic_read_dir, | 72 | .read = generic_read_dir, |
80 | .readdir = bfs_readdir, | 73 | .iterate = bfs_readdir, |
81 | .fsync = generic_file_fsync, | 74 | .fsync = generic_file_fsync, |
82 | .llseek = generic_file_llseek, | 75 | .llseek = generic_file_llseek, |
83 | }; | 76 | }; |
diff --git a/fs/binfmt_aout.c b/fs/binfmt_aout.c index bce87694f7b0..89dec7f789a4 100644 --- a/fs/binfmt_aout.c +++ b/fs/binfmt_aout.c | |||
@@ -255,8 +255,6 @@ static int load_aout_binary(struct linux_binprm * bprm) | |||
255 | (current->mm->start_data = N_DATADDR(ex)); | 255 | (current->mm->start_data = N_DATADDR(ex)); |
256 | current->mm->brk = ex.a_bss + | 256 | current->mm->brk = ex.a_bss + |
257 | (current->mm->start_brk = N_BSSADDR(ex)); | 257 | (current->mm->start_brk = N_BSSADDR(ex)); |
258 | current->mm->free_area_cache = current->mm->mmap_base; | ||
259 | current->mm->cached_hole_size = 0; | ||
260 | 258 | ||
261 | retval = setup_arg_pages(bprm, STACK_TOP, EXSTACK_DEFAULT); | 259 | retval = setup_arg_pages(bprm, STACK_TOP, EXSTACK_DEFAULT); |
262 | if (retval < 0) { | 260 | if (retval < 0) { |
diff --git a/fs/binfmt_elf.c b/fs/binfmt_elf.c index f8a0b0efda44..100edcc5e312 100644 --- a/fs/binfmt_elf.c +++ b/fs/binfmt_elf.c | |||
@@ -738,8 +738,6 @@ static int load_elf_binary(struct linux_binprm *bprm) | |||
738 | 738 | ||
739 | /* Do this so that we can load the interpreter, if need be. We will | 739 | /* Do this so that we can load the interpreter, if need be. We will |
740 | change some of these later */ | 740 | change some of these later */ |
741 | current->mm->free_area_cache = current->mm->mmap_base; | ||
742 | current->mm->cached_hole_size = 0; | ||
743 | retval = setup_arg_pages(bprm, randomize_stack_top(STACK_TOP), | 741 | retval = setup_arg_pages(bprm, randomize_stack_top(STACK_TOP), |
744 | executable_stack); | 742 | executable_stack); |
745 | if (retval < 0) { | 743 | if (retval < 0) { |
diff --git a/fs/block_dev.c b/fs/block_dev.c index 2091db8cdd78..c7bda5cd3da7 100644 --- a/fs/block_dev.c +++ b/fs/block_dev.c | |||
@@ -58,17 +58,24 @@ static void bdev_inode_switch_bdi(struct inode *inode, | |||
58 | struct backing_dev_info *dst) | 58 | struct backing_dev_info *dst) |
59 | { | 59 | { |
60 | struct backing_dev_info *old = inode->i_data.backing_dev_info; | 60 | struct backing_dev_info *old = inode->i_data.backing_dev_info; |
61 | bool wakeup_bdi = false; | ||
61 | 62 | ||
62 | if (unlikely(dst == old)) /* deadlock avoidance */ | 63 | if (unlikely(dst == old)) /* deadlock avoidance */ |
63 | return; | 64 | return; |
64 | bdi_lock_two(&old->wb, &dst->wb); | 65 | bdi_lock_two(&old->wb, &dst->wb); |
65 | spin_lock(&inode->i_lock); | 66 | spin_lock(&inode->i_lock); |
66 | inode->i_data.backing_dev_info = dst; | 67 | inode->i_data.backing_dev_info = dst; |
67 | if (inode->i_state & I_DIRTY) | 68 | if (inode->i_state & I_DIRTY) { |
69 | if (bdi_cap_writeback_dirty(dst) && !wb_has_dirty_io(&dst->wb)) | ||
70 | wakeup_bdi = true; | ||
68 | list_move(&inode->i_wb_list, &dst->wb.b_dirty); | 71 | list_move(&inode->i_wb_list, &dst->wb.b_dirty); |
72 | } | ||
69 | spin_unlock(&inode->i_lock); | 73 | spin_unlock(&inode->i_lock); |
70 | spin_unlock(&old->wb.list_lock); | 74 | spin_unlock(&old->wb.list_lock); |
71 | spin_unlock(&dst->wb.list_lock); | 75 | spin_unlock(&dst->wb.list_lock); |
76 | |||
77 | if (wakeup_bdi) | ||
78 | bdi_wakeup_thread_delayed(dst); | ||
72 | } | 79 | } |
73 | 80 | ||
74 | /* Kill _all_ buffers and pagecache , dirty or not.. */ | 81 | /* Kill _all_ buffers and pagecache , dirty or not.. */ |
@@ -325,31 +332,10 @@ static int blkdev_write_end(struct file *file, struct address_space *mapping, | |||
325 | static loff_t block_llseek(struct file *file, loff_t offset, int whence) | 332 | static loff_t block_llseek(struct file *file, loff_t offset, int whence) |
326 | { | 333 | { |
327 | struct inode *bd_inode = file->f_mapping->host; | 334 | struct inode *bd_inode = file->f_mapping->host; |
328 | loff_t size; | ||
329 | loff_t retval; | 335 | loff_t retval; |
330 | 336 | ||
331 | mutex_lock(&bd_inode->i_mutex); | 337 | mutex_lock(&bd_inode->i_mutex); |
332 | size = i_size_read(bd_inode); | 338 | retval = fixed_size_llseek(file, offset, whence, i_size_read(bd_inode)); |
333 | |||
334 | retval = -EINVAL; | ||
335 | switch (whence) { | ||
336 | case SEEK_END: | ||
337 | offset += size; | ||
338 | break; | ||
339 | case SEEK_CUR: | ||
340 | offset += file->f_pos; | ||
341 | case SEEK_SET: | ||
342 | break; | ||
343 | default: | ||
344 | goto out; | ||
345 | } | ||
346 | if (offset >= 0 && offset <= size) { | ||
347 | if (offset != file->f_pos) { | ||
348 | file->f_pos = offset; | ||
349 | } | ||
350 | retval = offset; | ||
351 | } | ||
352 | out: | ||
353 | mutex_unlock(&bd_inode->i_mutex); | 339 | mutex_unlock(&bd_inode->i_mutex); |
354 | return retval; | 340 | return retval; |
355 | } | 341 | } |
@@ -1583,6 +1569,7 @@ static const struct address_space_operations def_blk_aops = { | |||
1583 | .writepages = generic_writepages, | 1569 | .writepages = generic_writepages, |
1584 | .releasepage = blkdev_releasepage, | 1570 | .releasepage = blkdev_releasepage, |
1585 | .direct_IO = blkdev_direct_IO, | 1571 | .direct_IO = blkdev_direct_IO, |
1572 | .is_dirty_writeback = buffer_check_dirty_writeback, | ||
1586 | }; | 1573 | }; |
1587 | 1574 | ||
1588 | const struct file_operations def_blk_fops = { | 1575 | const struct file_operations def_blk_fops = { |
diff --git a/fs/btrfs/backref.c b/fs/btrfs/backref.c index 290e347b6db3..8bc5e8ccb091 100644 --- a/fs/btrfs/backref.c +++ b/fs/btrfs/backref.c | |||
@@ -36,16 +36,23 @@ static int check_extent_in_eb(struct btrfs_key *key, struct extent_buffer *eb, | |||
36 | u64 extent_item_pos, | 36 | u64 extent_item_pos, |
37 | struct extent_inode_elem **eie) | 37 | struct extent_inode_elem **eie) |
38 | { | 38 | { |
39 | u64 data_offset; | 39 | u64 offset = 0; |
40 | u64 data_len; | ||
41 | struct extent_inode_elem *e; | 40 | struct extent_inode_elem *e; |
42 | 41 | ||
43 | data_offset = btrfs_file_extent_offset(eb, fi); | 42 | if (!btrfs_file_extent_compression(eb, fi) && |
44 | data_len = btrfs_file_extent_num_bytes(eb, fi); | 43 | !btrfs_file_extent_encryption(eb, fi) && |
44 | !btrfs_file_extent_other_encoding(eb, fi)) { | ||
45 | u64 data_offset; | ||
46 | u64 data_len; | ||
45 | 47 | ||
46 | if (extent_item_pos < data_offset || | 48 | data_offset = btrfs_file_extent_offset(eb, fi); |
47 | extent_item_pos >= data_offset + data_len) | 49 | data_len = btrfs_file_extent_num_bytes(eb, fi); |
48 | return 1; | 50 | |
51 | if (extent_item_pos < data_offset || | ||
52 | extent_item_pos >= data_offset + data_len) | ||
53 | return 1; | ||
54 | offset = extent_item_pos - data_offset; | ||
55 | } | ||
49 | 56 | ||
50 | e = kmalloc(sizeof(*e), GFP_NOFS); | 57 | e = kmalloc(sizeof(*e), GFP_NOFS); |
51 | if (!e) | 58 | if (!e) |
@@ -53,7 +60,7 @@ static int check_extent_in_eb(struct btrfs_key *key, struct extent_buffer *eb, | |||
53 | 60 | ||
54 | e->next = *eie; | 61 | e->next = *eie; |
55 | e->inum = key->objectid; | 62 | e->inum = key->objectid; |
56 | e->offset = key->offset + (extent_item_pos - data_offset); | 63 | e->offset = key->offset + offset; |
57 | *eie = e; | 64 | *eie = e; |
58 | 65 | ||
59 | return 0; | 66 | return 0; |
@@ -189,7 +196,7 @@ static int add_all_parents(struct btrfs_root *root, struct btrfs_path *path, | |||
189 | struct extent_buffer *eb; | 196 | struct extent_buffer *eb; |
190 | struct btrfs_key key; | 197 | struct btrfs_key key; |
191 | struct btrfs_file_extent_item *fi; | 198 | struct btrfs_file_extent_item *fi; |
192 | struct extent_inode_elem *eie = NULL; | 199 | struct extent_inode_elem *eie = NULL, *old = NULL; |
193 | u64 disk_byte; | 200 | u64 disk_byte; |
194 | 201 | ||
195 | if (level != 0) { | 202 | if (level != 0) { |
@@ -223,6 +230,7 @@ static int add_all_parents(struct btrfs_root *root, struct btrfs_path *path, | |||
223 | 230 | ||
224 | if (disk_byte == wanted_disk_byte) { | 231 | if (disk_byte == wanted_disk_byte) { |
225 | eie = NULL; | 232 | eie = NULL; |
233 | old = NULL; | ||
226 | if (extent_item_pos) { | 234 | if (extent_item_pos) { |
227 | ret = check_extent_in_eb(&key, eb, fi, | 235 | ret = check_extent_in_eb(&key, eb, fi, |
228 | *extent_item_pos, | 236 | *extent_item_pos, |
@@ -230,18 +238,20 @@ static int add_all_parents(struct btrfs_root *root, struct btrfs_path *path, | |||
230 | if (ret < 0) | 238 | if (ret < 0) |
231 | break; | 239 | break; |
232 | } | 240 | } |
233 | if (!ret) { | 241 | if (ret > 0) |
234 | ret = ulist_add(parents, eb->start, | 242 | goto next; |
235 | (uintptr_t)eie, GFP_NOFS); | 243 | ret = ulist_add_merge(parents, eb->start, |
236 | if (ret < 0) | 244 | (uintptr_t)eie, |
237 | break; | 245 | (u64 *)&old, GFP_NOFS); |
238 | if (!extent_item_pos) { | 246 | if (ret < 0) |
239 | ret = btrfs_next_old_leaf(root, path, | 247 | break; |
240 | time_seq); | 248 | if (!ret && extent_item_pos) { |
241 | continue; | 249 | while (old->next) |
242 | } | 250 | old = old->next; |
251 | old->next = eie; | ||
243 | } | 252 | } |
244 | } | 253 | } |
254 | next: | ||
245 | ret = btrfs_next_old_item(root, path, time_seq); | 255 | ret = btrfs_next_old_item(root, path, time_seq); |
246 | } | 256 | } |
247 | 257 | ||
@@ -255,13 +265,11 @@ static int add_all_parents(struct btrfs_root *root, struct btrfs_path *path, | |||
255 | * to a logical address | 265 | * to a logical address |
256 | */ | 266 | */ |
257 | static int __resolve_indirect_ref(struct btrfs_fs_info *fs_info, | 267 | static int __resolve_indirect_ref(struct btrfs_fs_info *fs_info, |
258 | int search_commit_root, | 268 | struct btrfs_path *path, u64 time_seq, |
259 | u64 time_seq, | 269 | struct __prelim_ref *ref, |
260 | struct __prelim_ref *ref, | 270 | struct ulist *parents, |
261 | struct ulist *parents, | 271 | const u64 *extent_item_pos) |
262 | const u64 *extent_item_pos) | ||
263 | { | 272 | { |
264 | struct btrfs_path *path; | ||
265 | struct btrfs_root *root; | 273 | struct btrfs_root *root; |
266 | struct btrfs_key root_key; | 274 | struct btrfs_key root_key; |
267 | struct extent_buffer *eb; | 275 | struct extent_buffer *eb; |
@@ -269,11 +277,6 @@ static int __resolve_indirect_ref(struct btrfs_fs_info *fs_info, | |||
269 | int root_level; | 277 | int root_level; |
270 | int level = ref->level; | 278 | int level = ref->level; |
271 | 279 | ||
272 | path = btrfs_alloc_path(); | ||
273 | if (!path) | ||
274 | return -ENOMEM; | ||
275 | path->search_commit_root = !!search_commit_root; | ||
276 | |||
277 | root_key.objectid = ref->root_id; | 280 | root_key.objectid = ref->root_id; |
278 | root_key.type = BTRFS_ROOT_ITEM_KEY; | 281 | root_key.type = BTRFS_ROOT_ITEM_KEY; |
279 | root_key.offset = (u64)-1; | 282 | root_key.offset = (u64)-1; |
@@ -314,7 +317,8 @@ static int __resolve_indirect_ref(struct btrfs_fs_info *fs_info, | |||
314 | time_seq, ref->wanted_disk_byte, | 317 | time_seq, ref->wanted_disk_byte, |
315 | extent_item_pos); | 318 | extent_item_pos); |
316 | out: | 319 | out: |
317 | btrfs_free_path(path); | 320 | path->lowest_level = 0; |
321 | btrfs_release_path(path); | ||
318 | return ret; | 322 | return ret; |
319 | } | 323 | } |
320 | 324 | ||
@@ -322,7 +326,7 @@ out: | |||
322 | * resolve all indirect backrefs from the list | 326 | * resolve all indirect backrefs from the list |
323 | */ | 327 | */ |
324 | static int __resolve_indirect_refs(struct btrfs_fs_info *fs_info, | 328 | static int __resolve_indirect_refs(struct btrfs_fs_info *fs_info, |
325 | int search_commit_root, u64 time_seq, | 329 | struct btrfs_path *path, u64 time_seq, |
326 | struct list_head *head, | 330 | struct list_head *head, |
327 | const u64 *extent_item_pos) | 331 | const u64 *extent_item_pos) |
328 | { | 332 | { |
@@ -349,9 +353,8 @@ static int __resolve_indirect_refs(struct btrfs_fs_info *fs_info, | |||
349 | continue; | 353 | continue; |
350 | if (ref->count == 0) | 354 | if (ref->count == 0) |
351 | continue; | 355 | continue; |
352 | err = __resolve_indirect_ref(fs_info, search_commit_root, | 356 | err = __resolve_indirect_ref(fs_info, path, time_seq, ref, |
353 | time_seq, ref, parents, | 357 | parents, extent_item_pos); |
354 | extent_item_pos); | ||
355 | if (err == -ENOMEM) | 358 | if (err == -ENOMEM) |
356 | goto out; | 359 | goto out; |
357 | if (err) | 360 | if (err) |
@@ -604,6 +607,7 @@ static int __add_inline_refs(struct btrfs_fs_info *fs_info, | |||
604 | int slot; | 607 | int slot; |
605 | struct extent_buffer *leaf; | 608 | struct extent_buffer *leaf; |
606 | struct btrfs_key key; | 609 | struct btrfs_key key; |
610 | struct btrfs_key found_key; | ||
607 | unsigned long ptr; | 611 | unsigned long ptr; |
608 | unsigned long end; | 612 | unsigned long end; |
609 | struct btrfs_extent_item *ei; | 613 | struct btrfs_extent_item *ei; |
@@ -621,17 +625,21 @@ static int __add_inline_refs(struct btrfs_fs_info *fs_info, | |||
621 | 625 | ||
622 | ei = btrfs_item_ptr(leaf, slot, struct btrfs_extent_item); | 626 | ei = btrfs_item_ptr(leaf, slot, struct btrfs_extent_item); |
623 | flags = btrfs_extent_flags(leaf, ei); | 627 | flags = btrfs_extent_flags(leaf, ei); |
628 | btrfs_item_key_to_cpu(leaf, &found_key, slot); | ||
624 | 629 | ||
625 | ptr = (unsigned long)(ei + 1); | 630 | ptr = (unsigned long)(ei + 1); |
626 | end = (unsigned long)ei + item_size; | 631 | end = (unsigned long)ei + item_size; |
627 | 632 | ||
628 | if (flags & BTRFS_EXTENT_FLAG_TREE_BLOCK) { | 633 | if (found_key.type == BTRFS_EXTENT_ITEM_KEY && |
634 | flags & BTRFS_EXTENT_FLAG_TREE_BLOCK) { | ||
629 | struct btrfs_tree_block_info *info; | 635 | struct btrfs_tree_block_info *info; |
630 | 636 | ||
631 | info = (struct btrfs_tree_block_info *)ptr; | 637 | info = (struct btrfs_tree_block_info *)ptr; |
632 | *info_level = btrfs_tree_block_level(leaf, info); | 638 | *info_level = btrfs_tree_block_level(leaf, info); |
633 | ptr += sizeof(struct btrfs_tree_block_info); | 639 | ptr += sizeof(struct btrfs_tree_block_info); |
634 | BUG_ON(ptr > end); | 640 | BUG_ON(ptr > end); |
641 | } else if (found_key.type == BTRFS_METADATA_ITEM_KEY) { | ||
642 | *info_level = found_key.offset; | ||
635 | } else { | 643 | } else { |
636 | BUG_ON(!(flags & BTRFS_EXTENT_FLAG_DATA)); | 644 | BUG_ON(!(flags & BTRFS_EXTENT_FLAG_DATA)); |
637 | } | 645 | } |
@@ -795,7 +803,6 @@ static int find_parent_nodes(struct btrfs_trans_handle *trans, | |||
795 | struct btrfs_delayed_ref_head *head; | 803 | struct btrfs_delayed_ref_head *head; |
796 | int info_level = 0; | 804 | int info_level = 0; |
797 | int ret; | 805 | int ret; |
798 | int search_commit_root = (trans == BTRFS_BACKREF_SEARCH_COMMIT_ROOT); | ||
799 | struct list_head prefs_delayed; | 806 | struct list_head prefs_delayed; |
800 | struct list_head prefs; | 807 | struct list_head prefs; |
801 | struct __prelim_ref *ref; | 808 | struct __prelim_ref *ref; |
@@ -804,13 +811,17 @@ static int find_parent_nodes(struct btrfs_trans_handle *trans, | |||
804 | INIT_LIST_HEAD(&prefs_delayed); | 811 | INIT_LIST_HEAD(&prefs_delayed); |
805 | 812 | ||
806 | key.objectid = bytenr; | 813 | key.objectid = bytenr; |
807 | key.type = BTRFS_EXTENT_ITEM_KEY; | ||
808 | key.offset = (u64)-1; | 814 | key.offset = (u64)-1; |
815 | if (btrfs_fs_incompat(fs_info, SKINNY_METADATA)) | ||
816 | key.type = BTRFS_METADATA_ITEM_KEY; | ||
817 | else | ||
818 | key.type = BTRFS_EXTENT_ITEM_KEY; | ||
809 | 819 | ||
810 | path = btrfs_alloc_path(); | 820 | path = btrfs_alloc_path(); |
811 | if (!path) | 821 | if (!path) |
812 | return -ENOMEM; | 822 | return -ENOMEM; |
813 | path->search_commit_root = !!search_commit_root; | 823 | if (!trans) |
824 | path->search_commit_root = 1; | ||
814 | 825 | ||
815 | /* | 826 | /* |
816 | * grab both a lock on the path and a lock on the delayed ref head. | 827 | * grab both a lock on the path and a lock on the delayed ref head. |
@@ -825,7 +836,7 @@ again: | |||
825 | goto out; | 836 | goto out; |
826 | BUG_ON(ret == 0); | 837 | BUG_ON(ret == 0); |
827 | 838 | ||
828 | if (trans != BTRFS_BACKREF_SEARCH_COMMIT_ROOT) { | 839 | if (trans) { |
829 | /* | 840 | /* |
830 | * look if there are updates for this ref queued and lock the | 841 | * look if there are updates for this ref queued and lock the |
831 | * head | 842 | * head |
@@ -869,7 +880,8 @@ again: | |||
869 | slot = path->slots[0]; | 880 | slot = path->slots[0]; |
870 | btrfs_item_key_to_cpu(leaf, &key, slot); | 881 | btrfs_item_key_to_cpu(leaf, &key, slot); |
871 | if (key.objectid == bytenr && | 882 | if (key.objectid == bytenr && |
872 | key.type == BTRFS_EXTENT_ITEM_KEY) { | 883 | (key.type == BTRFS_EXTENT_ITEM_KEY || |
884 | key.type == BTRFS_METADATA_ITEM_KEY)) { | ||
873 | ret = __add_inline_refs(fs_info, path, bytenr, | 885 | ret = __add_inline_refs(fs_info, path, bytenr, |
874 | &info_level, &prefs); | 886 | &info_level, &prefs); |
875 | if (ret) | 887 | if (ret) |
@@ -890,8 +902,8 @@ again: | |||
890 | 902 | ||
891 | __merge_refs(&prefs, 1); | 903 | __merge_refs(&prefs, 1); |
892 | 904 | ||
893 | ret = __resolve_indirect_refs(fs_info, search_commit_root, time_seq, | 905 | ret = __resolve_indirect_refs(fs_info, path, time_seq, &prefs, |
894 | &prefs, extent_item_pos); | 906 | extent_item_pos); |
895 | if (ret) | 907 | if (ret) |
896 | goto out; | 908 | goto out; |
897 | 909 | ||
@@ -1283,12 +1295,16 @@ int extent_from_logical(struct btrfs_fs_info *fs_info, u64 logical, | |||
1283 | { | 1295 | { |
1284 | int ret; | 1296 | int ret; |
1285 | u64 flags; | 1297 | u64 flags; |
1298 | u64 size = 0; | ||
1286 | u32 item_size; | 1299 | u32 item_size; |
1287 | struct extent_buffer *eb; | 1300 | struct extent_buffer *eb; |
1288 | struct btrfs_extent_item *ei; | 1301 | struct btrfs_extent_item *ei; |
1289 | struct btrfs_key key; | 1302 | struct btrfs_key key; |
1290 | 1303 | ||
1291 | key.type = BTRFS_EXTENT_ITEM_KEY; | 1304 | if (btrfs_fs_incompat(fs_info, SKINNY_METADATA)) |
1305 | key.type = BTRFS_METADATA_ITEM_KEY; | ||
1306 | else | ||
1307 | key.type = BTRFS_EXTENT_ITEM_KEY; | ||
1292 | key.objectid = logical; | 1308 | key.objectid = logical; |
1293 | key.offset = (u64)-1; | 1309 | key.offset = (u64)-1; |
1294 | 1310 | ||
@@ -1301,9 +1317,15 @@ int extent_from_logical(struct btrfs_fs_info *fs_info, u64 logical, | |||
1301 | return ret; | 1317 | return ret; |
1302 | 1318 | ||
1303 | btrfs_item_key_to_cpu(path->nodes[0], found_key, path->slots[0]); | 1319 | btrfs_item_key_to_cpu(path->nodes[0], found_key, path->slots[0]); |
1304 | if (found_key->type != BTRFS_EXTENT_ITEM_KEY || | 1320 | if (found_key->type == BTRFS_METADATA_ITEM_KEY) |
1321 | size = fs_info->extent_root->leafsize; | ||
1322 | else if (found_key->type == BTRFS_EXTENT_ITEM_KEY) | ||
1323 | size = found_key->offset; | ||
1324 | |||
1325 | if ((found_key->type != BTRFS_EXTENT_ITEM_KEY && | ||
1326 | found_key->type != BTRFS_METADATA_ITEM_KEY) || | ||
1305 | found_key->objectid > logical || | 1327 | found_key->objectid > logical || |
1306 | found_key->objectid + found_key->offset <= logical) { | 1328 | found_key->objectid + size <= logical) { |
1307 | pr_debug("logical %llu is not within any extent\n", | 1329 | pr_debug("logical %llu is not within any extent\n", |
1308 | (unsigned long long)logical); | 1330 | (unsigned long long)logical); |
1309 | return -ENOENT; | 1331 | return -ENOENT; |
@@ -1459,7 +1481,7 @@ int iterate_extent_inodes(struct btrfs_fs_info *fs_info, | |||
1459 | iterate_extent_inodes_t *iterate, void *ctx) | 1481 | iterate_extent_inodes_t *iterate, void *ctx) |
1460 | { | 1482 | { |
1461 | int ret; | 1483 | int ret; |
1462 | struct btrfs_trans_handle *trans; | 1484 | struct btrfs_trans_handle *trans = NULL; |
1463 | struct ulist *refs = NULL; | 1485 | struct ulist *refs = NULL; |
1464 | struct ulist *roots = NULL; | 1486 | struct ulist *roots = NULL; |
1465 | struct ulist_node *ref_node = NULL; | 1487 | struct ulist_node *ref_node = NULL; |
@@ -1471,9 +1493,7 @@ int iterate_extent_inodes(struct btrfs_fs_info *fs_info, | |||
1471 | pr_debug("resolving all inodes for extent %llu\n", | 1493 | pr_debug("resolving all inodes for extent %llu\n", |
1472 | extent_item_objectid); | 1494 | extent_item_objectid); |
1473 | 1495 | ||
1474 | if (search_commit_root) { | 1496 | if (!search_commit_root) { |
1475 | trans = BTRFS_BACKREF_SEARCH_COMMIT_ROOT; | ||
1476 | } else { | ||
1477 | trans = btrfs_join_transaction(fs_info->extent_root); | 1497 | trans = btrfs_join_transaction(fs_info->extent_root); |
1478 | if (IS_ERR(trans)) | 1498 | if (IS_ERR(trans)) |
1479 | return PTR_ERR(trans); | 1499 | return PTR_ERR(trans); |
diff --git a/fs/btrfs/backref.h b/fs/btrfs/backref.h index 0f446d7ca2c0..8f2e76702932 100644 --- a/fs/btrfs/backref.h +++ b/fs/btrfs/backref.h | |||
@@ -23,8 +23,6 @@ | |||
23 | #include "ulist.h" | 23 | #include "ulist.h" |
24 | #include "extent_io.h" | 24 | #include "extent_io.h" |
25 | 25 | ||
26 | #define BTRFS_BACKREF_SEARCH_COMMIT_ROOT ((struct btrfs_trans_handle *)0) | ||
27 | |||
28 | struct inode_fs_paths { | 26 | struct inode_fs_paths { |
29 | struct btrfs_path *btrfs_path; | 27 | struct btrfs_path *btrfs_path; |
30 | struct btrfs_root *fs_root; | 28 | struct btrfs_root *fs_root; |
diff --git a/fs/btrfs/ctree.c b/fs/btrfs/ctree.c index 02fae7f7e42c..ed504607d8ec 100644 --- a/fs/btrfs/ctree.c +++ b/fs/btrfs/ctree.c | |||
@@ -1089,7 +1089,8 @@ static noinline int __btrfs_cow_block(struct btrfs_trans_handle *trans, | |||
1089 | btrfs_set_node_ptr_generation(parent, parent_slot, | 1089 | btrfs_set_node_ptr_generation(parent, parent_slot, |
1090 | trans->transid); | 1090 | trans->transid); |
1091 | btrfs_mark_buffer_dirty(parent); | 1091 | btrfs_mark_buffer_dirty(parent); |
1092 | tree_mod_log_free_eb(root->fs_info, buf); | 1092 | if (last_ref) |
1093 | tree_mod_log_free_eb(root->fs_info, buf); | ||
1093 | btrfs_free_tree_block(trans, root, buf, parent_start, | 1094 | btrfs_free_tree_block(trans, root, buf, parent_start, |
1094 | last_ref); | 1095 | last_ref); |
1095 | } | 1096 | } |
@@ -1161,8 +1162,8 @@ __tree_mod_log_oldest_root(struct btrfs_fs_info *fs_info, | |||
1161 | * time_seq). | 1162 | * time_seq). |
1162 | */ | 1163 | */ |
1163 | static void | 1164 | static void |
1164 | __tree_mod_log_rewind(struct extent_buffer *eb, u64 time_seq, | 1165 | __tree_mod_log_rewind(struct btrfs_fs_info *fs_info, struct extent_buffer *eb, |
1165 | struct tree_mod_elem *first_tm) | 1166 | u64 time_seq, struct tree_mod_elem *first_tm) |
1166 | { | 1167 | { |
1167 | u32 n; | 1168 | u32 n; |
1168 | struct rb_node *next; | 1169 | struct rb_node *next; |
@@ -1172,6 +1173,7 @@ __tree_mod_log_rewind(struct extent_buffer *eb, u64 time_seq, | |||
1172 | unsigned long p_size = sizeof(struct btrfs_key_ptr); | 1173 | unsigned long p_size = sizeof(struct btrfs_key_ptr); |
1173 | 1174 | ||
1174 | n = btrfs_header_nritems(eb); | 1175 | n = btrfs_header_nritems(eb); |
1176 | tree_mod_log_read_lock(fs_info); | ||
1175 | while (tm && tm->seq >= time_seq) { | 1177 | while (tm && tm->seq >= time_seq) { |
1176 | /* | 1178 | /* |
1177 | * all the operations are recorded with the operator used for | 1179 | * all the operations are recorded with the operator used for |
@@ -1226,6 +1228,7 @@ __tree_mod_log_rewind(struct extent_buffer *eb, u64 time_seq, | |||
1226 | if (tm->index != first_tm->index) | 1228 | if (tm->index != first_tm->index) |
1227 | break; | 1229 | break; |
1228 | } | 1230 | } |
1231 | tree_mod_log_read_unlock(fs_info); | ||
1229 | btrfs_set_header_nritems(eb, n); | 1232 | btrfs_set_header_nritems(eb, n); |
1230 | } | 1233 | } |
1231 | 1234 | ||
@@ -1268,13 +1271,12 @@ tree_mod_log_rewind(struct btrfs_fs_info *fs_info, struct extent_buffer *eb, | |||
1268 | BUG_ON(!eb_rewin); | 1271 | BUG_ON(!eb_rewin); |
1269 | } | 1272 | } |
1270 | 1273 | ||
1271 | extent_buffer_get(eb_rewin); | ||
1272 | btrfs_tree_read_unlock(eb); | 1274 | btrfs_tree_read_unlock(eb); |
1273 | free_extent_buffer(eb); | 1275 | free_extent_buffer(eb); |
1274 | 1276 | ||
1275 | extent_buffer_get(eb_rewin); | 1277 | extent_buffer_get(eb_rewin); |
1276 | btrfs_tree_read_lock(eb_rewin); | 1278 | btrfs_tree_read_lock(eb_rewin); |
1277 | __tree_mod_log_rewind(eb_rewin, time_seq, tm); | 1279 | __tree_mod_log_rewind(fs_info, eb_rewin, time_seq, tm); |
1278 | WARN_ON(btrfs_header_nritems(eb_rewin) > | 1280 | WARN_ON(btrfs_header_nritems(eb_rewin) > |
1279 | BTRFS_NODEPTRS_PER_BLOCK(fs_info->tree_root)); | 1281 | BTRFS_NODEPTRS_PER_BLOCK(fs_info->tree_root)); |
1280 | 1282 | ||
@@ -1350,7 +1352,7 @@ get_old_root(struct btrfs_root *root, u64 time_seq) | |||
1350 | btrfs_set_header_generation(eb, old_generation); | 1352 | btrfs_set_header_generation(eb, old_generation); |
1351 | } | 1353 | } |
1352 | if (tm) | 1354 | if (tm) |
1353 | __tree_mod_log_rewind(eb, time_seq, tm); | 1355 | __tree_mod_log_rewind(root->fs_info, eb, time_seq, tm); |
1354 | else | 1356 | else |
1355 | WARN_ON(btrfs_header_level(eb) != 0); | 1357 | WARN_ON(btrfs_header_level(eb) != 0); |
1356 | WARN_ON(btrfs_header_nritems(eb) > BTRFS_NODEPTRS_PER_BLOCK(root)); | 1358 | WARN_ON(btrfs_header_nritems(eb) > BTRFS_NODEPTRS_PER_BLOCK(root)); |
@@ -2178,12 +2180,8 @@ static void reada_for_search(struct btrfs_root *root, | |||
2178 | } | 2180 | } |
2179 | } | 2181 | } |
2180 | 2182 | ||
2181 | /* | 2183 | static noinline void reada_for_balance(struct btrfs_root *root, |
2182 | * returns -EAGAIN if it had to drop the path, or zero if everything was in | 2184 | struct btrfs_path *path, int level) |
2183 | * cache | ||
2184 | */ | ||
2185 | static noinline int reada_for_balance(struct btrfs_root *root, | ||
2186 | struct btrfs_path *path, int level) | ||
2187 | { | 2185 | { |
2188 | int slot; | 2186 | int slot; |
2189 | int nritems; | 2187 | int nritems; |
@@ -2192,12 +2190,11 @@ static noinline int reada_for_balance(struct btrfs_root *root, | |||
2192 | u64 gen; | 2190 | u64 gen; |
2193 | u64 block1 = 0; | 2191 | u64 block1 = 0; |
2194 | u64 block2 = 0; | 2192 | u64 block2 = 0; |
2195 | int ret = 0; | ||
2196 | int blocksize; | 2193 | int blocksize; |
2197 | 2194 | ||
2198 | parent = path->nodes[level + 1]; | 2195 | parent = path->nodes[level + 1]; |
2199 | if (!parent) | 2196 | if (!parent) |
2200 | return 0; | 2197 | return; |
2201 | 2198 | ||
2202 | nritems = btrfs_header_nritems(parent); | 2199 | nritems = btrfs_header_nritems(parent); |
2203 | slot = path->slots[level + 1]; | 2200 | slot = path->slots[level + 1]; |
@@ -2224,28 +2221,11 @@ static noinline int reada_for_balance(struct btrfs_root *root, | |||
2224 | block2 = 0; | 2221 | block2 = 0; |
2225 | free_extent_buffer(eb); | 2222 | free_extent_buffer(eb); |
2226 | } | 2223 | } |
2227 | if (block1 || block2) { | ||
2228 | ret = -EAGAIN; | ||
2229 | |||
2230 | /* release the whole path */ | ||
2231 | btrfs_release_path(path); | ||
2232 | 2224 | ||
2233 | /* read the blocks */ | 2225 | if (block1) |
2234 | if (block1) | 2226 | readahead_tree_block(root, block1, blocksize, 0); |
2235 | readahead_tree_block(root, block1, blocksize, 0); | 2227 | if (block2) |
2236 | if (block2) | 2228 | readahead_tree_block(root, block2, blocksize, 0); |
2237 | readahead_tree_block(root, block2, blocksize, 0); | ||
2238 | |||
2239 | if (block1) { | ||
2240 | eb = read_tree_block(root, block1, blocksize, 0); | ||
2241 | free_extent_buffer(eb); | ||
2242 | } | ||
2243 | if (block2) { | ||
2244 | eb = read_tree_block(root, block2, blocksize, 0); | ||
2245 | free_extent_buffer(eb); | ||
2246 | } | ||
2247 | } | ||
2248 | return ret; | ||
2249 | } | 2229 | } |
2250 | 2230 | ||
2251 | 2231 | ||
@@ -2359,35 +2339,28 @@ read_block_for_search(struct btrfs_trans_handle *trans, | |||
2359 | tmp = btrfs_find_tree_block(root, blocknr, blocksize); | 2339 | tmp = btrfs_find_tree_block(root, blocknr, blocksize); |
2360 | if (tmp) { | 2340 | if (tmp) { |
2361 | /* first we do an atomic uptodate check */ | 2341 | /* first we do an atomic uptodate check */ |
2362 | if (btrfs_buffer_uptodate(tmp, 0, 1) > 0) { | 2342 | if (btrfs_buffer_uptodate(tmp, gen, 1) > 0) { |
2363 | if (btrfs_buffer_uptodate(tmp, gen, 1) > 0) { | 2343 | *eb_ret = tmp; |
2364 | /* | 2344 | return 0; |
2365 | * we found an up to date block without | 2345 | } |
2366 | * sleeping, return | ||
2367 | * right away | ||
2368 | */ | ||
2369 | *eb_ret = tmp; | ||
2370 | return 0; | ||
2371 | } | ||
2372 | /* the pages were up to date, but we failed | ||
2373 | * the generation number check. Do a full | ||
2374 | * read for the generation number that is correct. | ||
2375 | * We must do this without dropping locks so | ||
2376 | * we can trust our generation number | ||
2377 | */ | ||
2378 | free_extent_buffer(tmp); | ||
2379 | btrfs_set_path_blocking(p); | ||
2380 | 2346 | ||
2381 | /* now we're allowed to do a blocking uptodate check */ | 2347 | /* the pages were up to date, but we failed |
2382 | tmp = read_tree_block(root, blocknr, blocksize, gen); | 2348 | * the generation number check. Do a full |
2383 | if (tmp && btrfs_buffer_uptodate(tmp, gen, 0) > 0) { | 2349 | * read for the generation number that is correct. |
2384 | *eb_ret = tmp; | 2350 | * We must do this without dropping locks so |
2385 | return 0; | 2351 | * we can trust our generation number |
2386 | } | 2352 | */ |
2387 | free_extent_buffer(tmp); | 2353 | btrfs_set_path_blocking(p); |
2388 | btrfs_release_path(p); | 2354 | |
2389 | return -EIO; | 2355 | /* now we're allowed to do a blocking uptodate check */ |
2356 | ret = btrfs_read_buffer(tmp, gen); | ||
2357 | if (!ret) { | ||
2358 | *eb_ret = tmp; | ||
2359 | return 0; | ||
2390 | } | 2360 | } |
2361 | free_extent_buffer(tmp); | ||
2362 | btrfs_release_path(p); | ||
2363 | return -EIO; | ||
2391 | } | 2364 | } |
2392 | 2365 | ||
2393 | /* | 2366 | /* |
@@ -2448,11 +2421,8 @@ setup_nodes_for_search(struct btrfs_trans_handle *trans, | |||
2448 | goto again; | 2421 | goto again; |
2449 | } | 2422 | } |
2450 | 2423 | ||
2451 | sret = reada_for_balance(root, p, level); | ||
2452 | if (sret) | ||
2453 | goto again; | ||
2454 | |||
2455 | btrfs_set_path_blocking(p); | 2424 | btrfs_set_path_blocking(p); |
2425 | reada_for_balance(root, p, level); | ||
2456 | sret = split_node(trans, root, p, level); | 2426 | sret = split_node(trans, root, p, level); |
2457 | btrfs_clear_path_blocking(p, NULL, 0); | 2427 | btrfs_clear_path_blocking(p, NULL, 0); |
2458 | 2428 | ||
@@ -2472,11 +2442,8 @@ setup_nodes_for_search(struct btrfs_trans_handle *trans, | |||
2472 | goto again; | 2442 | goto again; |
2473 | } | 2443 | } |
2474 | 2444 | ||
2475 | sret = reada_for_balance(root, p, level); | ||
2476 | if (sret) | ||
2477 | goto again; | ||
2478 | |||
2479 | btrfs_set_path_blocking(p); | 2445 | btrfs_set_path_blocking(p); |
2446 | reada_for_balance(root, p, level); | ||
2480 | sret = balance_level(trans, root, p, level); | 2447 | sret = balance_level(trans, root, p, level); |
2481 | btrfs_clear_path_blocking(p, NULL, 0); | 2448 | btrfs_clear_path_blocking(p, NULL, 0); |
2482 | 2449 | ||
@@ -3143,7 +3110,7 @@ static int balance_node_right(struct btrfs_trans_handle *trans, | |||
3143 | */ | 3110 | */ |
3144 | static noinline int insert_new_root(struct btrfs_trans_handle *trans, | 3111 | static noinline int insert_new_root(struct btrfs_trans_handle *trans, |
3145 | struct btrfs_root *root, | 3112 | struct btrfs_root *root, |
3146 | struct btrfs_path *path, int level, int log_removal) | 3113 | struct btrfs_path *path, int level) |
3147 | { | 3114 | { |
3148 | u64 lower_gen; | 3115 | u64 lower_gen; |
3149 | struct extent_buffer *lower; | 3116 | struct extent_buffer *lower; |
@@ -3194,7 +3161,7 @@ static noinline int insert_new_root(struct btrfs_trans_handle *trans, | |||
3194 | btrfs_mark_buffer_dirty(c); | 3161 | btrfs_mark_buffer_dirty(c); |
3195 | 3162 | ||
3196 | old = root->node; | 3163 | old = root->node; |
3197 | tree_mod_log_set_root_pointer(root, c, log_removal); | 3164 | tree_mod_log_set_root_pointer(root, c, 0); |
3198 | rcu_assign_pointer(root->node, c); | 3165 | rcu_assign_pointer(root->node, c); |
3199 | 3166 | ||
3200 | /* the super has an extra ref to root->node */ | 3167 | /* the super has an extra ref to root->node */ |
@@ -3278,14 +3245,14 @@ static noinline int split_node(struct btrfs_trans_handle *trans, | |||
3278 | /* | 3245 | /* |
3279 | * trying to split the root, lets make a new one | 3246 | * trying to split the root, lets make a new one |
3280 | * | 3247 | * |
3281 | * tree mod log: We pass 0 as log_removal parameter to | 3248 | * tree mod log: We don't log_removal old root in |
3282 | * insert_new_root, because that root buffer will be kept as a | 3249 | * insert_new_root, because that root buffer will be kept as a |
3283 | * normal node. We are going to log removal of half of the | 3250 | * normal node. We are going to log removal of half of the |
3284 | * elements below with tree_mod_log_eb_copy. We're holding a | 3251 | * elements below with tree_mod_log_eb_copy. We're holding a |
3285 | * tree lock on the buffer, which is why we cannot race with | 3252 | * tree lock on the buffer, which is why we cannot race with |
3286 | * other tree_mod_log users. | 3253 | * other tree_mod_log users. |
3287 | */ | 3254 | */ |
3288 | ret = insert_new_root(trans, root, path, level + 1, 0); | 3255 | ret = insert_new_root(trans, root, path, level + 1); |
3289 | if (ret) | 3256 | if (ret) |
3290 | return ret; | 3257 | return ret; |
3291 | } else { | 3258 | } else { |
@@ -3986,7 +3953,7 @@ static noinline int split_leaf(struct btrfs_trans_handle *trans, | |||
3986 | return -EOVERFLOW; | 3953 | return -EOVERFLOW; |
3987 | 3954 | ||
3988 | /* first try to make some room by pushing left and right */ | 3955 | /* first try to make some room by pushing left and right */ |
3989 | if (data_size) { | 3956 | if (data_size && path->nodes[1]) { |
3990 | wret = push_leaf_right(trans, root, path, data_size, | 3957 | wret = push_leaf_right(trans, root, path, data_size, |
3991 | data_size, 0, 0); | 3958 | data_size, 0, 0); |
3992 | if (wret < 0) | 3959 | if (wret < 0) |
@@ -4005,7 +3972,7 @@ static noinline int split_leaf(struct btrfs_trans_handle *trans, | |||
4005 | } | 3972 | } |
4006 | 3973 | ||
4007 | if (!path->nodes[1]) { | 3974 | if (!path->nodes[1]) { |
4008 | ret = insert_new_root(trans, root, path, 1, 1); | 3975 | ret = insert_new_root(trans, root, path, 1); |
4009 | if (ret) | 3976 | if (ret) |
4010 | return ret; | 3977 | return ret; |
4011 | } | 3978 | } |
@@ -4430,7 +4397,7 @@ void btrfs_truncate_item(struct btrfs_root *root, struct btrfs_path *path, | |||
4430 | } | 4397 | } |
4431 | 4398 | ||
4432 | /* | 4399 | /* |
4433 | * make the item pointed to by the path bigger, data_size is the new size. | 4400 | * make the item pointed to by the path bigger, data_size is the added size. |
4434 | */ | 4401 | */ |
4435 | void btrfs_extend_item(struct btrfs_root *root, struct btrfs_path *path, | 4402 | void btrfs_extend_item(struct btrfs_root *root, struct btrfs_path *path, |
4436 | u32 data_size) | 4403 | u32 data_size) |
diff --git a/fs/btrfs/ctree.h b/fs/btrfs/ctree.h index d6dd49b51ba8..e795bf135e80 100644 --- a/fs/btrfs/ctree.h +++ b/fs/btrfs/ctree.h | |||
@@ -961,8 +961,8 @@ struct btrfs_dev_replace_item { | |||
961 | #define BTRFS_BLOCK_GROUP_RAID1 (1ULL << 4) | 961 | #define BTRFS_BLOCK_GROUP_RAID1 (1ULL << 4) |
962 | #define BTRFS_BLOCK_GROUP_DUP (1ULL << 5) | 962 | #define BTRFS_BLOCK_GROUP_DUP (1ULL << 5) |
963 | #define BTRFS_BLOCK_GROUP_RAID10 (1ULL << 6) | 963 | #define BTRFS_BLOCK_GROUP_RAID10 (1ULL << 6) |
964 | #define BTRFS_BLOCK_GROUP_RAID5 (1 << 7) | 964 | #define BTRFS_BLOCK_GROUP_RAID5 (1ULL << 7) |
965 | #define BTRFS_BLOCK_GROUP_RAID6 (1 << 8) | 965 | #define BTRFS_BLOCK_GROUP_RAID6 (1ULL << 8) |
966 | #define BTRFS_BLOCK_GROUP_RESERVED BTRFS_AVAIL_ALLOC_BIT_SINGLE | 966 | #define BTRFS_BLOCK_GROUP_RESERVED BTRFS_AVAIL_ALLOC_BIT_SINGLE |
967 | 967 | ||
968 | enum btrfs_raid_types { | 968 | enum btrfs_raid_types { |
@@ -1102,6 +1102,18 @@ struct btrfs_space_info { | |||
1102 | account */ | 1102 | account */ |
1103 | 1103 | ||
1104 | /* | 1104 | /* |
1105 | * bytes_pinned is kept in line with what is actually pinned, as in | ||
1106 | * we've called update_block_group and dropped the bytes_used counter | ||
1107 | * and increased the bytes_pinned counter. However this means that | ||
1108 | * bytes_pinned does not reflect the bytes that will be pinned once the | ||
1109 | * delayed refs are flushed, so this counter is inc'ed everytime we call | ||
1110 | * btrfs_free_extent so it is a realtime count of what will be freed | ||
1111 | * once the transaction is committed. It will be zero'ed everytime the | ||
1112 | * transaction commits. | ||
1113 | */ | ||
1114 | struct percpu_counter total_bytes_pinned; | ||
1115 | |||
1116 | /* | ||
1105 | * we bump reservation progress every time we decrement | 1117 | * we bump reservation progress every time we decrement |
1106 | * bytes_reserved. This way people waiting for reservations | 1118 | * bytes_reserved. This way people waiting for reservations |
1107 | * know something good has happened and they can check | 1119 | * know something good has happened and they can check |
@@ -1437,25 +1449,22 @@ struct btrfs_fs_info { | |||
1437 | atomic_t open_ioctl_trans; | 1449 | atomic_t open_ioctl_trans; |
1438 | 1450 | ||
1439 | /* | 1451 | /* |
1440 | * this is used by the balancing code to wait for all the pending | 1452 | * this is used to protect the following list -- ordered_roots. |
1441 | * ordered extents | ||
1442 | */ | 1453 | */ |
1443 | spinlock_t ordered_extent_lock; | 1454 | spinlock_t ordered_root_lock; |
1444 | 1455 | ||
1445 | /* | 1456 | /* |
1446 | * all of the data=ordered extents pending writeback | 1457 | * all fs/file tree roots in which there are data=ordered extents |
1458 | * pending writeback are added into this list. | ||
1459 | * | ||
1447 | * these can span multiple transactions and basically include | 1460 | * these can span multiple transactions and basically include |
1448 | * every dirty data page that isn't from nodatacow | 1461 | * every dirty data page that isn't from nodatacow |
1449 | */ | 1462 | */ |
1450 | struct list_head ordered_extents; | 1463 | struct list_head ordered_roots; |
1451 | 1464 | ||
1452 | spinlock_t delalloc_lock; | 1465 | spinlock_t delalloc_root_lock; |
1453 | /* | 1466 | /* all fs/file tree roots that have delalloc inodes. */ |
1454 | * all of the inodes that have delalloc bytes. It is possible for | 1467 | struct list_head delalloc_roots; |
1455 | * this list to be empty even when there is still dirty data=ordered | ||
1456 | * extents waiting to finish IO. | ||
1457 | */ | ||
1458 | struct list_head delalloc_inodes; | ||
1459 | 1468 | ||
1460 | /* | 1469 | /* |
1461 | * there is a pool of worker threads for checksumming during writes | 1470 | * there is a pool of worker threads for checksumming during writes |
@@ -1498,8 +1507,6 @@ struct btrfs_fs_info { | |||
1498 | int do_barriers; | 1507 | int do_barriers; |
1499 | int closing; | 1508 | int closing; |
1500 | int log_root_recovering; | 1509 | int log_root_recovering; |
1501 | int enospc_unlink; | ||
1502 | int trans_no_join; | ||
1503 | 1510 | ||
1504 | u64 total_pinned; | 1511 | u64 total_pinned; |
1505 | 1512 | ||
@@ -1594,6 +1601,12 @@ struct btrfs_fs_info { | |||
1594 | struct rb_root qgroup_tree; | 1601 | struct rb_root qgroup_tree; |
1595 | spinlock_t qgroup_lock; | 1602 | spinlock_t qgroup_lock; |
1596 | 1603 | ||
1604 | /* | ||
1605 | * used to avoid frequently calling ulist_alloc()/ulist_free() | ||
1606 | * when doing qgroup accounting, it must be protected by qgroup_lock. | ||
1607 | */ | ||
1608 | struct ulist *qgroup_ulist; | ||
1609 | |||
1597 | /* protect user change for quota operations */ | 1610 | /* protect user change for quota operations */ |
1598 | struct mutex qgroup_ioctl_lock; | 1611 | struct mutex qgroup_ioctl_lock; |
1599 | 1612 | ||
@@ -1607,6 +1620,8 @@ struct btrfs_fs_info { | |||
1607 | struct mutex qgroup_rescan_lock; /* protects the progress item */ | 1620 | struct mutex qgroup_rescan_lock; /* protects the progress item */ |
1608 | struct btrfs_key qgroup_rescan_progress; | 1621 | struct btrfs_key qgroup_rescan_progress; |
1609 | struct btrfs_workers qgroup_rescan_workers; | 1622 | struct btrfs_workers qgroup_rescan_workers; |
1623 | struct completion qgroup_rescan_completion; | ||
1624 | struct btrfs_work qgroup_rescan_work; | ||
1610 | 1625 | ||
1611 | /* filesystem state */ | 1626 | /* filesystem state */ |
1612 | unsigned long fs_state; | 1627 | unsigned long fs_state; |
@@ -1739,6 +1754,31 @@ struct btrfs_root { | |||
1739 | int force_cow; | 1754 | int force_cow; |
1740 | 1755 | ||
1741 | spinlock_t root_item_lock; | 1756 | spinlock_t root_item_lock; |
1757 | atomic_t refs; | ||
1758 | |||
1759 | spinlock_t delalloc_lock; | ||
1760 | /* | ||
1761 | * all of the inodes that have delalloc bytes. It is possible for | ||
1762 | * this list to be empty even when there is still dirty data=ordered | ||
1763 | * extents waiting to finish IO. | ||
1764 | */ | ||
1765 | struct list_head delalloc_inodes; | ||
1766 | struct list_head delalloc_root; | ||
1767 | u64 nr_delalloc_inodes; | ||
1768 | /* | ||
1769 | * this is used by the balancing code to wait for all the pending | ||
1770 | * ordered extents | ||
1771 | */ | ||
1772 | spinlock_t ordered_extent_lock; | ||
1773 | |||
1774 | /* | ||
1775 | * all of the data=ordered extents pending writeback | ||
1776 | * these can span multiple transactions and basically include | ||
1777 | * every dirty data page that isn't from nodatacow | ||
1778 | */ | ||
1779 | struct list_head ordered_extents; | ||
1780 | struct list_head ordered_root; | ||
1781 | u64 nr_ordered_extents; | ||
1742 | }; | 1782 | }; |
1743 | 1783 | ||
1744 | struct btrfs_ioctl_defrag_range_args { | 1784 | struct btrfs_ioctl_defrag_range_args { |
@@ -3028,6 +3068,8 @@ static inline u64 btrfs_calc_trunc_metadata_size(struct btrfs_root *root, | |||
3028 | num_items; | 3068 | num_items; |
3029 | } | 3069 | } |
3030 | 3070 | ||
3071 | int btrfs_should_throttle_delayed_refs(struct btrfs_trans_handle *trans, | ||
3072 | struct btrfs_root *root); | ||
3031 | void btrfs_put_block_group(struct btrfs_block_group_cache *cache); | 3073 | void btrfs_put_block_group(struct btrfs_block_group_cache *cache); |
3032 | int btrfs_run_delayed_refs(struct btrfs_trans_handle *trans, | 3074 | int btrfs_run_delayed_refs(struct btrfs_trans_handle *trans, |
3033 | struct btrfs_root *root, unsigned long count); | 3075 | struct btrfs_root *root, unsigned long count); |
@@ -3039,6 +3081,8 @@ int btrfs_pin_extent(struct btrfs_root *root, | |||
3039 | u64 bytenr, u64 num, int reserved); | 3081 | u64 bytenr, u64 num, int reserved); |
3040 | int btrfs_pin_extent_for_log_replay(struct btrfs_root *root, | 3082 | int btrfs_pin_extent_for_log_replay(struct btrfs_root *root, |
3041 | u64 bytenr, u64 num_bytes); | 3083 | u64 bytenr, u64 num_bytes); |
3084 | int btrfs_exclude_logged_extents(struct btrfs_root *root, | ||
3085 | struct extent_buffer *eb); | ||
3042 | int btrfs_cross_ref_exist(struct btrfs_trans_handle *trans, | 3086 | int btrfs_cross_ref_exist(struct btrfs_trans_handle *trans, |
3043 | struct btrfs_root *root, | 3087 | struct btrfs_root *root, |
3044 | u64 objectid, u64 offset, u64 bytenr); | 3088 | u64 objectid, u64 offset, u64 bytenr); |
@@ -3155,6 +3199,9 @@ int btrfs_block_rsv_refill(struct btrfs_root *root, | |||
3155 | int btrfs_block_rsv_migrate(struct btrfs_block_rsv *src_rsv, | 3199 | int btrfs_block_rsv_migrate(struct btrfs_block_rsv *src_rsv, |
3156 | struct btrfs_block_rsv *dst_rsv, | 3200 | struct btrfs_block_rsv *dst_rsv, |
3157 | u64 num_bytes); | 3201 | u64 num_bytes); |
3202 | int btrfs_cond_migrate_bytes(struct btrfs_fs_info *fs_info, | ||
3203 | struct btrfs_block_rsv *dest, u64 num_bytes, | ||
3204 | int min_factor); | ||
3158 | void btrfs_block_rsv_release(struct btrfs_root *root, | 3205 | void btrfs_block_rsv_release(struct btrfs_root *root, |
3159 | struct btrfs_block_rsv *block_rsv, | 3206 | struct btrfs_block_rsv *block_rsv, |
3160 | u64 num_bytes); | 3207 | u64 num_bytes); |
@@ -3311,6 +3358,18 @@ static inline int btrfs_fs_closing(struct btrfs_fs_info *fs_info) | |||
3311 | smp_mb(); | 3358 | smp_mb(); |
3312 | return fs_info->closing; | 3359 | return fs_info->closing; |
3313 | } | 3360 | } |
3361 | |||
3362 | /* | ||
3363 | * If we remount the fs to be R/O or umount the fs, the cleaner needn't do | ||
3364 | * anything except sleeping. This function is used to check the status of | ||
3365 | * the fs. | ||
3366 | */ | ||
3367 | static inline int btrfs_need_cleaner_sleep(struct btrfs_root *root) | ||
3368 | { | ||
3369 | return (root->fs_info->sb->s_flags & MS_RDONLY || | ||
3370 | btrfs_fs_closing(root->fs_info)); | ||
3371 | } | ||
3372 | |||
3314 | static inline void free_fs_info(struct btrfs_fs_info *fs_info) | 3373 | static inline void free_fs_info(struct btrfs_fs_info *fs_info) |
3315 | { | 3374 | { |
3316 | kfree(fs_info->balance_ctl); | 3375 | kfree(fs_info->balance_ctl); |
@@ -3357,9 +3416,9 @@ int __must_check btrfs_update_root(struct btrfs_trans_handle *trans, | |||
3357 | struct btrfs_root_item *item); | 3416 | struct btrfs_root_item *item); |
3358 | void btrfs_read_root_item(struct extent_buffer *eb, int slot, | 3417 | void btrfs_read_root_item(struct extent_buffer *eb, int slot, |
3359 | struct btrfs_root_item *item); | 3418 | struct btrfs_root_item *item); |
3360 | int btrfs_find_last_root(struct btrfs_root *root, u64 objectid, struct | 3419 | int btrfs_find_root(struct btrfs_root *root, struct btrfs_key *search_key, |
3361 | btrfs_root_item *item, struct btrfs_key *key); | 3420 | struct btrfs_path *path, struct btrfs_root_item *root_item, |
3362 | int btrfs_find_dead_roots(struct btrfs_root *root, u64 objectid); | 3421 | struct btrfs_key *root_key); |
3363 | int btrfs_find_orphan_roots(struct btrfs_root *tree_root); | 3422 | int btrfs_find_orphan_roots(struct btrfs_root *tree_root); |
3364 | void btrfs_set_root_node(struct btrfs_root_item *item, | 3423 | void btrfs_set_root_node(struct btrfs_root_item *item, |
3365 | struct extent_buffer *node); | 3424 | struct extent_buffer *node); |
@@ -3493,6 +3552,10 @@ void btrfs_wait_and_free_delalloc_work(struct btrfs_delalloc_work *work); | |||
3493 | struct extent_map *btrfs_get_extent_fiemap(struct inode *inode, struct page *page, | 3552 | struct extent_map *btrfs_get_extent_fiemap(struct inode *inode, struct page *page, |
3494 | size_t pg_offset, u64 start, u64 len, | 3553 | size_t pg_offset, u64 start, u64 len, |
3495 | int create); | 3554 | int create); |
3555 | noinline int can_nocow_extent(struct btrfs_trans_handle *trans, | ||
3556 | struct inode *inode, u64 offset, u64 *len, | ||
3557 | u64 *orig_start, u64 *orig_block_len, | ||
3558 | u64 *ram_bytes); | ||
3496 | 3559 | ||
3497 | /* RHEL and EL kernels have a patch that renames PG_checked to FsMisc */ | 3560 | /* RHEL and EL kernels have a patch that renames PG_checked to FsMisc */ |
3498 | #if defined(ClearPageFsMisc) && !defined(ClearPageChecked) | 3561 | #if defined(ClearPageFsMisc) && !defined(ClearPageChecked) |
@@ -3530,6 +3593,8 @@ int btrfs_truncate_inode_items(struct btrfs_trans_handle *trans, | |||
3530 | u32 min_type); | 3593 | u32 min_type); |
3531 | 3594 | ||
3532 | int btrfs_start_delalloc_inodes(struct btrfs_root *root, int delay_iput); | 3595 | int btrfs_start_delalloc_inodes(struct btrfs_root *root, int delay_iput); |
3596 | int btrfs_start_all_delalloc_inodes(struct btrfs_fs_info *fs_info, | ||
3597 | int delay_iput); | ||
3533 | int btrfs_set_extent_delalloc(struct inode *inode, u64 start, u64 end, | 3598 | int btrfs_set_extent_delalloc(struct inode *inode, u64 start, u64 end, |
3534 | struct extent_state **cached_state); | 3599 | struct extent_state **cached_state); |
3535 | int btrfs_create_subvol_root(struct btrfs_trans_handle *trans, | 3600 | int btrfs_create_subvol_root(struct btrfs_trans_handle *trans, |
@@ -3814,6 +3879,8 @@ int btrfs_quota_enable(struct btrfs_trans_handle *trans, | |||
3814 | int btrfs_quota_disable(struct btrfs_trans_handle *trans, | 3879 | int btrfs_quota_disable(struct btrfs_trans_handle *trans, |
3815 | struct btrfs_fs_info *fs_info); | 3880 | struct btrfs_fs_info *fs_info); |
3816 | int btrfs_qgroup_rescan(struct btrfs_fs_info *fs_info); | 3881 | int btrfs_qgroup_rescan(struct btrfs_fs_info *fs_info); |
3882 | void btrfs_qgroup_rescan_resume(struct btrfs_fs_info *fs_info); | ||
3883 | int btrfs_qgroup_wait_for_completion(struct btrfs_fs_info *fs_info); | ||
3817 | int btrfs_add_qgroup_relation(struct btrfs_trans_handle *trans, | 3884 | int btrfs_add_qgroup_relation(struct btrfs_trans_handle *trans, |
3818 | struct btrfs_fs_info *fs_info, u64 src, u64 dst); | 3885 | struct btrfs_fs_info *fs_info, u64 src, u64 dst); |
3819 | int btrfs_del_qgroup_relation(struct btrfs_trans_handle *trans, | 3886 | int btrfs_del_qgroup_relation(struct btrfs_trans_handle *trans, |
diff --git a/fs/btrfs/delayed-inode.c b/fs/btrfs/delayed-inode.c index f26f38ccd194..375510913fe7 100644 --- a/fs/btrfs/delayed-inode.c +++ b/fs/btrfs/delayed-inode.c | |||
@@ -535,20 +535,6 @@ static struct btrfs_delayed_item *__btrfs_next_delayed_item( | |||
535 | return next; | 535 | return next; |
536 | } | 536 | } |
537 | 537 | ||
538 | static inline struct btrfs_root *btrfs_get_fs_root(struct btrfs_root *root, | ||
539 | u64 root_id) | ||
540 | { | ||
541 | struct btrfs_key root_key; | ||
542 | |||
543 | if (root->objectid == root_id) | ||
544 | return root; | ||
545 | |||
546 | root_key.objectid = root_id; | ||
547 | root_key.type = BTRFS_ROOT_ITEM_KEY; | ||
548 | root_key.offset = (u64)-1; | ||
549 | return btrfs_read_fs_root_no_name(root->fs_info, &root_key); | ||
550 | } | ||
551 | |||
552 | static int btrfs_delayed_item_reserve_metadata(struct btrfs_trans_handle *trans, | 538 | static int btrfs_delayed_item_reserve_metadata(struct btrfs_trans_handle *trans, |
553 | struct btrfs_root *root, | 539 | struct btrfs_root *root, |
554 | struct btrfs_delayed_item *item) | 540 | struct btrfs_delayed_item *item) |
@@ -1681,8 +1667,7 @@ int btrfs_should_delete_dir_index(struct list_head *del_list, | |||
1681 | * btrfs_readdir_delayed_dir_index - read dir info stored in the delayed tree | 1667 | * btrfs_readdir_delayed_dir_index - read dir info stored in the delayed tree |
1682 | * | 1668 | * |
1683 | */ | 1669 | */ |
1684 | int btrfs_readdir_delayed_dir_index(struct file *filp, void *dirent, | 1670 | int btrfs_readdir_delayed_dir_index(struct dir_context *ctx, |
1685 | filldir_t filldir, | ||
1686 | struct list_head *ins_list) | 1671 | struct list_head *ins_list) |
1687 | { | 1672 | { |
1688 | struct btrfs_dir_item *di; | 1673 | struct btrfs_dir_item *di; |
@@ -1704,13 +1689,13 @@ int btrfs_readdir_delayed_dir_index(struct file *filp, void *dirent, | |||
1704 | list_for_each_entry_safe(curr, next, ins_list, readdir_list) { | 1689 | list_for_each_entry_safe(curr, next, ins_list, readdir_list) { |
1705 | list_del(&curr->readdir_list); | 1690 | list_del(&curr->readdir_list); |
1706 | 1691 | ||
1707 | if (curr->key.offset < filp->f_pos) { | 1692 | if (curr->key.offset < ctx->pos) { |
1708 | if (atomic_dec_and_test(&curr->refs)) | 1693 | if (atomic_dec_and_test(&curr->refs)) |
1709 | kfree(curr); | 1694 | kfree(curr); |
1710 | continue; | 1695 | continue; |
1711 | } | 1696 | } |
1712 | 1697 | ||
1713 | filp->f_pos = curr->key.offset; | 1698 | ctx->pos = curr->key.offset; |
1714 | 1699 | ||
1715 | di = (struct btrfs_dir_item *)curr->data; | 1700 | di = (struct btrfs_dir_item *)curr->data; |
1716 | name = (char *)(di + 1); | 1701 | name = (char *)(di + 1); |
@@ -1719,7 +1704,7 @@ int btrfs_readdir_delayed_dir_index(struct file *filp, void *dirent, | |||
1719 | d_type = btrfs_filetype_table[di->type]; | 1704 | d_type = btrfs_filetype_table[di->type]; |
1720 | btrfs_disk_key_to_cpu(&location, &di->location); | 1705 | btrfs_disk_key_to_cpu(&location, &di->location); |
1721 | 1706 | ||
1722 | over = filldir(dirent, name, name_len, curr->key.offset, | 1707 | over = !dir_emit(ctx, name, name_len, |
1723 | location.objectid, d_type); | 1708 | location.objectid, d_type); |
1724 | 1709 | ||
1725 | if (atomic_dec_and_test(&curr->refs)) | 1710 | if (atomic_dec_and_test(&curr->refs)) |
diff --git a/fs/btrfs/delayed-inode.h b/fs/btrfs/delayed-inode.h index 1d5c5f7abe3e..a4b38f934d14 100644 --- a/fs/btrfs/delayed-inode.h +++ b/fs/btrfs/delayed-inode.h | |||
@@ -139,8 +139,7 @@ void btrfs_put_delayed_items(struct list_head *ins_list, | |||
139 | struct list_head *del_list); | 139 | struct list_head *del_list); |
140 | int btrfs_should_delete_dir_index(struct list_head *del_list, | 140 | int btrfs_should_delete_dir_index(struct list_head *del_list, |
141 | u64 index); | 141 | u64 index); |
142 | int btrfs_readdir_delayed_dir_index(struct file *filp, void *dirent, | 142 | int btrfs_readdir_delayed_dir_index(struct dir_context *ctx, |
143 | filldir_t filldir, | ||
144 | struct list_head *ins_list); | 143 | struct list_head *ins_list); |
145 | 144 | ||
146 | /* for init */ | 145 | /* for init */ |
diff --git a/fs/btrfs/dev-replace.c b/fs/btrfs/dev-replace.c index 65241f32d3f8..4253ad580e39 100644 --- a/fs/btrfs/dev-replace.c +++ b/fs/btrfs/dev-replace.c | |||
@@ -400,7 +400,7 @@ int btrfs_dev_replace_start(struct btrfs_root *root, | |||
400 | args->result = BTRFS_IOCTL_DEV_REPLACE_RESULT_NO_ERROR; | 400 | args->result = BTRFS_IOCTL_DEV_REPLACE_RESULT_NO_ERROR; |
401 | btrfs_dev_replace_unlock(dev_replace); | 401 | btrfs_dev_replace_unlock(dev_replace); |
402 | 402 | ||
403 | btrfs_wait_ordered_extents(root, 0); | 403 | btrfs_wait_all_ordered_extents(root->fs_info, 0); |
404 | 404 | ||
405 | /* force writing the updated state information to disk */ | 405 | /* force writing the updated state information to disk */ |
406 | trans = btrfs_start_transaction(root, 0); | 406 | trans = btrfs_start_transaction(root, 0); |
@@ -470,12 +470,12 @@ static int btrfs_dev_replace_finishing(struct btrfs_fs_info *fs_info, | |||
470 | * flush all outstanding I/O and inode extent mappings before the | 470 | * flush all outstanding I/O and inode extent mappings before the |
471 | * copy operation is declared as being finished | 471 | * copy operation is declared as being finished |
472 | */ | 472 | */ |
473 | ret = btrfs_start_delalloc_inodes(root, 0); | 473 | ret = btrfs_start_all_delalloc_inodes(root->fs_info, 0); |
474 | if (ret) { | 474 | if (ret) { |
475 | mutex_unlock(&dev_replace->lock_finishing_cancel_unmount); | 475 | mutex_unlock(&dev_replace->lock_finishing_cancel_unmount); |
476 | return ret; | 476 | return ret; |
477 | } | 477 | } |
478 | btrfs_wait_ordered_extents(root, 0); | 478 | btrfs_wait_all_ordered_extents(root->fs_info, 0); |
479 | 479 | ||
480 | trans = btrfs_start_transaction(root, 0); | 480 | trans = btrfs_start_transaction(root, 0); |
481 | if (IS_ERR(trans)) { | 481 | if (IS_ERR(trans)) { |
diff --git a/fs/btrfs/disk-io.c b/fs/btrfs/disk-io.c index b8b60b660c8f..6b092a1c4e37 100644 --- a/fs/btrfs/disk-io.c +++ b/fs/btrfs/disk-io.c | |||
@@ -1013,7 +1013,8 @@ static int btree_releasepage(struct page *page, gfp_t gfp_flags) | |||
1013 | return try_release_extent_buffer(page); | 1013 | return try_release_extent_buffer(page); |
1014 | } | 1014 | } |
1015 | 1015 | ||
1016 | static void btree_invalidatepage(struct page *page, unsigned long offset) | 1016 | static void btree_invalidatepage(struct page *page, unsigned int offset, |
1017 | unsigned int length) | ||
1017 | { | 1018 | { |
1018 | struct extent_io_tree *tree; | 1019 | struct extent_io_tree *tree; |
1019 | tree = &BTRFS_I(page->mapping->host)->io_tree; | 1020 | tree = &BTRFS_I(page->mapping->host)->io_tree; |
@@ -1191,6 +1192,8 @@ static void __setup_root(u32 nodesize, u32 leafsize, u32 sectorsize, | |||
1191 | root->objectid = objectid; | 1192 | root->objectid = objectid; |
1192 | root->last_trans = 0; | 1193 | root->last_trans = 0; |
1193 | root->highest_objectid = 0; | 1194 | root->highest_objectid = 0; |
1195 | root->nr_delalloc_inodes = 0; | ||
1196 | root->nr_ordered_extents = 0; | ||
1194 | root->name = NULL; | 1197 | root->name = NULL; |
1195 | root->inode_tree = RB_ROOT; | 1198 | root->inode_tree = RB_ROOT; |
1196 | INIT_RADIX_TREE(&root->delayed_nodes_tree, GFP_ATOMIC); | 1199 | INIT_RADIX_TREE(&root->delayed_nodes_tree, GFP_ATOMIC); |
@@ -1199,10 +1202,16 @@ static void __setup_root(u32 nodesize, u32 leafsize, u32 sectorsize, | |||
1199 | 1202 | ||
1200 | INIT_LIST_HEAD(&root->dirty_list); | 1203 | INIT_LIST_HEAD(&root->dirty_list); |
1201 | INIT_LIST_HEAD(&root->root_list); | 1204 | INIT_LIST_HEAD(&root->root_list); |
1205 | INIT_LIST_HEAD(&root->delalloc_inodes); | ||
1206 | INIT_LIST_HEAD(&root->delalloc_root); | ||
1207 | INIT_LIST_HEAD(&root->ordered_extents); | ||
1208 | INIT_LIST_HEAD(&root->ordered_root); | ||
1202 | INIT_LIST_HEAD(&root->logged_list[0]); | 1209 | INIT_LIST_HEAD(&root->logged_list[0]); |
1203 | INIT_LIST_HEAD(&root->logged_list[1]); | 1210 | INIT_LIST_HEAD(&root->logged_list[1]); |
1204 | spin_lock_init(&root->orphan_lock); | 1211 | spin_lock_init(&root->orphan_lock); |
1205 | spin_lock_init(&root->inode_lock); | 1212 | spin_lock_init(&root->inode_lock); |
1213 | spin_lock_init(&root->delalloc_lock); | ||
1214 | spin_lock_init(&root->ordered_extent_lock); | ||
1206 | spin_lock_init(&root->accounting_lock); | 1215 | spin_lock_init(&root->accounting_lock); |
1207 | spin_lock_init(&root->log_extents_lock[0]); | 1216 | spin_lock_init(&root->log_extents_lock[0]); |
1208 | spin_lock_init(&root->log_extents_lock[1]); | 1217 | spin_lock_init(&root->log_extents_lock[1]); |
@@ -1216,6 +1225,7 @@ static void __setup_root(u32 nodesize, u32 leafsize, u32 sectorsize, | |||
1216 | atomic_set(&root->log_writers, 0); | 1225 | atomic_set(&root->log_writers, 0); |
1217 | atomic_set(&root->log_batch, 0); | 1226 | atomic_set(&root->log_batch, 0); |
1218 | atomic_set(&root->orphan_inodes, 0); | 1227 | atomic_set(&root->orphan_inodes, 0); |
1228 | atomic_set(&root->refs, 1); | ||
1219 | root->log_transid = 0; | 1229 | root->log_transid = 0; |
1220 | root->last_log_commit = 0; | 1230 | root->last_log_commit = 0; |
1221 | extent_io_tree_init(&root->dirty_log_pages, | 1231 | extent_io_tree_init(&root->dirty_log_pages, |
@@ -1234,39 +1244,6 @@ static void __setup_root(u32 nodesize, u32 leafsize, u32 sectorsize, | |||
1234 | spin_lock_init(&root->root_item_lock); | 1244 | spin_lock_init(&root->root_item_lock); |
1235 | } | 1245 | } |
1236 | 1246 | ||
1237 | static int __must_check find_and_setup_root(struct btrfs_root *tree_root, | ||
1238 | struct btrfs_fs_info *fs_info, | ||
1239 | u64 objectid, | ||
1240 | struct btrfs_root *root) | ||
1241 | { | ||
1242 | int ret; | ||
1243 | u32 blocksize; | ||
1244 | u64 generation; | ||
1245 | |||
1246 | __setup_root(tree_root->nodesize, tree_root->leafsize, | ||
1247 | tree_root->sectorsize, tree_root->stripesize, | ||
1248 | root, fs_info, objectid); | ||
1249 | ret = btrfs_find_last_root(tree_root, objectid, | ||
1250 | &root->root_item, &root->root_key); | ||
1251 | if (ret > 0) | ||
1252 | return -ENOENT; | ||
1253 | else if (ret < 0) | ||
1254 | return ret; | ||
1255 | |||
1256 | generation = btrfs_root_generation(&root->root_item); | ||
1257 | blocksize = btrfs_level_size(root, btrfs_root_level(&root->root_item)); | ||
1258 | root->commit_root = NULL; | ||
1259 | root->node = read_tree_block(root, btrfs_root_bytenr(&root->root_item), | ||
1260 | blocksize, generation); | ||
1261 | if (!root->node || !btrfs_buffer_uptodate(root->node, generation, 0)) { | ||
1262 | free_extent_buffer(root->node); | ||
1263 | root->node = NULL; | ||
1264 | return -EIO; | ||
1265 | } | ||
1266 | root->commit_root = btrfs_root_node(root); | ||
1267 | return 0; | ||
1268 | } | ||
1269 | |||
1270 | static struct btrfs_root *btrfs_alloc_root(struct btrfs_fs_info *fs_info) | 1247 | static struct btrfs_root *btrfs_alloc_root(struct btrfs_fs_info *fs_info) |
1271 | { | 1248 | { |
1272 | struct btrfs_root *root = kzalloc(sizeof(*root), GFP_NOFS); | 1249 | struct btrfs_root *root = kzalloc(sizeof(*root), GFP_NOFS); |
@@ -1451,70 +1428,73 @@ int btrfs_add_log_tree(struct btrfs_trans_handle *trans, | |||
1451 | return 0; | 1428 | return 0; |
1452 | } | 1429 | } |
1453 | 1430 | ||
1454 | struct btrfs_root *btrfs_read_fs_root_no_radix(struct btrfs_root *tree_root, | 1431 | struct btrfs_root *btrfs_read_tree_root(struct btrfs_root *tree_root, |
1455 | struct btrfs_key *location) | 1432 | struct btrfs_key *key) |
1456 | { | 1433 | { |
1457 | struct btrfs_root *root; | 1434 | struct btrfs_root *root; |
1458 | struct btrfs_fs_info *fs_info = tree_root->fs_info; | 1435 | struct btrfs_fs_info *fs_info = tree_root->fs_info; |
1459 | struct btrfs_path *path; | 1436 | struct btrfs_path *path; |
1460 | struct extent_buffer *l; | ||
1461 | u64 generation; | 1437 | u64 generation; |
1462 | u32 blocksize; | 1438 | u32 blocksize; |
1463 | int ret = 0; | 1439 | int ret; |
1464 | int slot; | ||
1465 | 1440 | ||
1466 | root = btrfs_alloc_root(fs_info); | 1441 | path = btrfs_alloc_path(); |
1467 | if (!root) | 1442 | if (!path) |
1468 | return ERR_PTR(-ENOMEM); | 1443 | return ERR_PTR(-ENOMEM); |
1469 | if (location->offset == (u64)-1) { | 1444 | |
1470 | ret = find_and_setup_root(tree_root, fs_info, | 1445 | root = btrfs_alloc_root(fs_info); |
1471 | location->objectid, root); | 1446 | if (!root) { |
1472 | if (ret) { | 1447 | ret = -ENOMEM; |
1473 | kfree(root); | 1448 | goto alloc_fail; |
1474 | return ERR_PTR(ret); | ||
1475 | } | ||
1476 | goto out; | ||
1477 | } | 1449 | } |
1478 | 1450 | ||
1479 | __setup_root(tree_root->nodesize, tree_root->leafsize, | 1451 | __setup_root(tree_root->nodesize, tree_root->leafsize, |
1480 | tree_root->sectorsize, tree_root->stripesize, | 1452 | tree_root->sectorsize, tree_root->stripesize, |
1481 | root, fs_info, location->objectid); | 1453 | root, fs_info, key->objectid); |
1482 | 1454 | ||
1483 | path = btrfs_alloc_path(); | 1455 | ret = btrfs_find_root(tree_root, key, path, |
1484 | if (!path) { | 1456 | &root->root_item, &root->root_key); |
1485 | kfree(root); | ||
1486 | return ERR_PTR(-ENOMEM); | ||
1487 | } | ||
1488 | ret = btrfs_search_slot(NULL, tree_root, location, path, 0, 0); | ||
1489 | if (ret == 0) { | ||
1490 | l = path->nodes[0]; | ||
1491 | slot = path->slots[0]; | ||
1492 | btrfs_read_root_item(l, slot, &root->root_item); | ||
1493 | memcpy(&root->root_key, location, sizeof(*location)); | ||
1494 | } | ||
1495 | btrfs_free_path(path); | ||
1496 | if (ret) { | 1457 | if (ret) { |
1497 | kfree(root); | ||
1498 | if (ret > 0) | 1458 | if (ret > 0) |
1499 | ret = -ENOENT; | 1459 | ret = -ENOENT; |
1500 | return ERR_PTR(ret); | 1460 | goto find_fail; |
1501 | } | 1461 | } |
1502 | 1462 | ||
1503 | generation = btrfs_root_generation(&root->root_item); | 1463 | generation = btrfs_root_generation(&root->root_item); |
1504 | blocksize = btrfs_level_size(root, btrfs_root_level(&root->root_item)); | 1464 | blocksize = btrfs_level_size(root, btrfs_root_level(&root->root_item)); |
1505 | root->node = read_tree_block(root, btrfs_root_bytenr(&root->root_item), | 1465 | root->node = read_tree_block(root, btrfs_root_bytenr(&root->root_item), |
1506 | blocksize, generation); | 1466 | blocksize, generation); |
1507 | if (!root->node || !extent_buffer_uptodate(root->node)) { | 1467 | if (!root->node) { |
1508 | ret = (!root->node) ? -ENOMEM : -EIO; | 1468 | ret = -ENOMEM; |
1509 | 1469 | goto find_fail; | |
1510 | free_extent_buffer(root->node); | 1470 | } else if (!btrfs_buffer_uptodate(root->node, generation, 0)) { |
1511 | kfree(root); | 1471 | ret = -EIO; |
1512 | return ERR_PTR(ret); | 1472 | goto read_fail; |
1513 | } | 1473 | } |
1514 | |||
1515 | root->commit_root = btrfs_root_node(root); | 1474 | root->commit_root = btrfs_root_node(root); |
1516 | out: | 1475 | out: |
1517 | if (location->objectid != BTRFS_TREE_LOG_OBJECTID) { | 1476 | btrfs_free_path(path); |
1477 | return root; | ||
1478 | |||
1479 | read_fail: | ||
1480 | free_extent_buffer(root->node); | ||
1481 | find_fail: | ||
1482 | kfree(root); | ||
1483 | alloc_fail: | ||
1484 | root = ERR_PTR(ret); | ||
1485 | goto out; | ||
1486 | } | ||
1487 | |||
1488 | struct btrfs_root *btrfs_read_fs_root(struct btrfs_root *tree_root, | ||
1489 | struct btrfs_key *location) | ||
1490 | { | ||
1491 | struct btrfs_root *root; | ||
1492 | |||
1493 | root = btrfs_read_tree_root(tree_root, location); | ||
1494 | if (IS_ERR(root)) | ||
1495 | return root; | ||
1496 | |||
1497 | if (root->root_key.objectid != BTRFS_TREE_LOG_OBJECTID) { | ||
1518 | root->ref_cows = 1; | 1498 | root->ref_cows = 1; |
1519 | btrfs_check_and_init_root_item(&root->root_item); | 1499 | btrfs_check_and_init_root_item(&root->root_item); |
1520 | } | 1500 | } |
@@ -1522,6 +1502,66 @@ out: | |||
1522 | return root; | 1502 | return root; |
1523 | } | 1503 | } |
1524 | 1504 | ||
1505 | int btrfs_init_fs_root(struct btrfs_root *root) | ||
1506 | { | ||
1507 | int ret; | ||
1508 | |||
1509 | root->free_ino_ctl = kzalloc(sizeof(*root->free_ino_ctl), GFP_NOFS); | ||
1510 | root->free_ino_pinned = kzalloc(sizeof(*root->free_ino_pinned), | ||
1511 | GFP_NOFS); | ||
1512 | if (!root->free_ino_pinned || !root->free_ino_ctl) { | ||
1513 | ret = -ENOMEM; | ||
1514 | goto fail; | ||
1515 | } | ||
1516 | |||
1517 | btrfs_init_free_ino_ctl(root); | ||
1518 | mutex_init(&root->fs_commit_mutex); | ||
1519 | spin_lock_init(&root->cache_lock); | ||
1520 | init_waitqueue_head(&root->cache_wait); | ||
1521 | |||
1522 | ret = get_anon_bdev(&root->anon_dev); | ||
1523 | if (ret) | ||
1524 | goto fail; | ||
1525 | return 0; | ||
1526 | fail: | ||
1527 | kfree(root->free_ino_ctl); | ||
1528 | kfree(root->free_ino_pinned); | ||
1529 | return ret; | ||
1530 | } | ||
1531 | |||
1532 | struct btrfs_root *btrfs_lookup_fs_root(struct btrfs_fs_info *fs_info, | ||
1533 | u64 root_id) | ||
1534 | { | ||
1535 | struct btrfs_root *root; | ||
1536 | |||
1537 | spin_lock(&fs_info->fs_roots_radix_lock); | ||
1538 | root = radix_tree_lookup(&fs_info->fs_roots_radix, | ||
1539 | (unsigned long)root_id); | ||
1540 | spin_unlock(&fs_info->fs_roots_radix_lock); | ||
1541 | return root; | ||
1542 | } | ||
1543 | |||
1544 | int btrfs_insert_fs_root(struct btrfs_fs_info *fs_info, | ||
1545 | struct btrfs_root *root) | ||
1546 | { | ||
1547 | int ret; | ||
1548 | |||
1549 | ret = radix_tree_preload(GFP_NOFS & ~__GFP_HIGHMEM); | ||
1550 | if (ret) | ||
1551 | return ret; | ||
1552 | |||
1553 | spin_lock(&fs_info->fs_roots_radix_lock); | ||
1554 | ret = radix_tree_insert(&fs_info->fs_roots_radix, | ||
1555 | (unsigned long)root->root_key.objectid, | ||
1556 | root); | ||
1557 | if (ret == 0) | ||
1558 | root->in_radix = 1; | ||
1559 | spin_unlock(&fs_info->fs_roots_radix_lock); | ||
1560 | radix_tree_preload_end(); | ||
1561 | |||
1562 | return ret; | ||
1563 | } | ||
1564 | |||
1525 | struct btrfs_root *btrfs_read_fs_root_no_name(struct btrfs_fs_info *fs_info, | 1565 | struct btrfs_root *btrfs_read_fs_root_no_name(struct btrfs_fs_info *fs_info, |
1526 | struct btrfs_key *location) | 1566 | struct btrfs_key *location) |
1527 | { | 1567 | { |
@@ -1542,58 +1582,30 @@ struct btrfs_root *btrfs_read_fs_root_no_name(struct btrfs_fs_info *fs_info, | |||
1542 | return fs_info->quota_root ? fs_info->quota_root : | 1582 | return fs_info->quota_root ? fs_info->quota_root : |
1543 | ERR_PTR(-ENOENT); | 1583 | ERR_PTR(-ENOENT); |
1544 | again: | 1584 | again: |
1545 | spin_lock(&fs_info->fs_roots_radix_lock); | 1585 | root = btrfs_lookup_fs_root(fs_info, location->objectid); |
1546 | root = radix_tree_lookup(&fs_info->fs_roots_radix, | ||
1547 | (unsigned long)location->objectid); | ||
1548 | spin_unlock(&fs_info->fs_roots_radix_lock); | ||
1549 | if (root) | 1586 | if (root) |
1550 | return root; | 1587 | return root; |
1551 | 1588 | ||
1552 | root = btrfs_read_fs_root_no_radix(fs_info->tree_root, location); | 1589 | root = btrfs_read_fs_root(fs_info->tree_root, location); |
1553 | if (IS_ERR(root)) | 1590 | if (IS_ERR(root)) |
1554 | return root; | 1591 | return root; |
1555 | 1592 | ||
1556 | root->free_ino_ctl = kzalloc(sizeof(*root->free_ino_ctl), GFP_NOFS); | 1593 | if (btrfs_root_refs(&root->root_item) == 0) { |
1557 | root->free_ino_pinned = kzalloc(sizeof(*root->free_ino_pinned), | 1594 | ret = -ENOENT; |
1558 | GFP_NOFS); | ||
1559 | if (!root->free_ino_pinned || !root->free_ino_ctl) { | ||
1560 | ret = -ENOMEM; | ||
1561 | goto fail; | 1595 | goto fail; |
1562 | } | 1596 | } |
1563 | 1597 | ||
1564 | btrfs_init_free_ino_ctl(root); | 1598 | ret = btrfs_init_fs_root(root); |
1565 | mutex_init(&root->fs_commit_mutex); | ||
1566 | spin_lock_init(&root->cache_lock); | ||
1567 | init_waitqueue_head(&root->cache_wait); | ||
1568 | |||
1569 | ret = get_anon_bdev(&root->anon_dev); | ||
1570 | if (ret) | 1599 | if (ret) |
1571 | goto fail; | 1600 | goto fail; |
1572 | 1601 | ||
1573 | if (btrfs_root_refs(&root->root_item) == 0) { | ||
1574 | ret = -ENOENT; | ||
1575 | goto fail; | ||
1576 | } | ||
1577 | |||
1578 | ret = btrfs_find_orphan_item(fs_info->tree_root, location->objectid); | 1602 | ret = btrfs_find_orphan_item(fs_info->tree_root, location->objectid); |
1579 | if (ret < 0) | 1603 | if (ret < 0) |
1580 | goto fail; | 1604 | goto fail; |
1581 | if (ret == 0) | 1605 | if (ret == 0) |
1582 | root->orphan_item_inserted = 1; | 1606 | root->orphan_item_inserted = 1; |
1583 | 1607 | ||
1584 | ret = radix_tree_preload(GFP_NOFS & ~__GFP_HIGHMEM); | 1608 | ret = btrfs_insert_fs_root(fs_info, root); |
1585 | if (ret) | ||
1586 | goto fail; | ||
1587 | |||
1588 | spin_lock(&fs_info->fs_roots_radix_lock); | ||
1589 | ret = radix_tree_insert(&fs_info->fs_roots_radix, | ||
1590 | (unsigned long)root->root_key.objectid, | ||
1591 | root); | ||
1592 | if (ret == 0) | ||
1593 | root->in_radix = 1; | ||
1594 | |||
1595 | spin_unlock(&fs_info->fs_roots_radix_lock); | ||
1596 | radix_tree_preload_end(); | ||
1597 | if (ret) { | 1609 | if (ret) { |
1598 | if (ret == -EEXIST) { | 1610 | if (ret == -EEXIST) { |
1599 | free_fs_root(root); | 1611 | free_fs_root(root); |
@@ -1601,10 +1613,6 @@ again: | |||
1601 | } | 1613 | } |
1602 | goto fail; | 1614 | goto fail; |
1603 | } | 1615 | } |
1604 | |||
1605 | ret = btrfs_find_dead_roots(fs_info->tree_root, | ||
1606 | root->root_key.objectid); | ||
1607 | WARN_ON(ret); | ||
1608 | return root; | 1616 | return root; |
1609 | fail: | 1617 | fail: |
1610 | free_fs_root(root); | 1618 | free_fs_root(root); |
@@ -1676,21 +1684,37 @@ static void end_workqueue_fn(struct btrfs_work *work) | |||
1676 | static int cleaner_kthread(void *arg) | 1684 | static int cleaner_kthread(void *arg) |
1677 | { | 1685 | { |
1678 | struct btrfs_root *root = arg; | 1686 | struct btrfs_root *root = arg; |
1687 | int again; | ||
1679 | 1688 | ||
1680 | do { | 1689 | do { |
1681 | int again = 0; | 1690 | again = 0; |
1682 | 1691 | ||
1683 | if (!(root->fs_info->sb->s_flags & MS_RDONLY) && | 1692 | /* Make the cleaner go to sleep early. */ |
1684 | down_read_trylock(&root->fs_info->sb->s_umount)) { | 1693 | if (btrfs_need_cleaner_sleep(root)) |
1685 | if (mutex_trylock(&root->fs_info->cleaner_mutex)) { | 1694 | goto sleep; |
1686 | btrfs_run_delayed_iputs(root); | 1695 | |
1687 | again = btrfs_clean_one_deleted_snapshot(root); | 1696 | if (!mutex_trylock(&root->fs_info->cleaner_mutex)) |
1688 | mutex_unlock(&root->fs_info->cleaner_mutex); | 1697 | goto sleep; |
1689 | } | 1698 | |
1690 | btrfs_run_defrag_inodes(root->fs_info); | 1699 | /* |
1691 | up_read(&root->fs_info->sb->s_umount); | 1700 | * Avoid the problem that we change the status of the fs |
1701 | * during the above check and trylock. | ||
1702 | */ | ||
1703 | if (btrfs_need_cleaner_sleep(root)) { | ||
1704 | mutex_unlock(&root->fs_info->cleaner_mutex); | ||
1705 | goto sleep; | ||
1692 | } | 1706 | } |
1693 | 1707 | ||
1708 | btrfs_run_delayed_iputs(root); | ||
1709 | again = btrfs_clean_one_deleted_snapshot(root); | ||
1710 | mutex_unlock(&root->fs_info->cleaner_mutex); | ||
1711 | |||
1712 | /* | ||
1713 | * The defragger has dealt with the R/O remount and umount, | ||
1714 | * needn't do anything special here. | ||
1715 | */ | ||
1716 | btrfs_run_defrag_inodes(root->fs_info); | ||
1717 | sleep: | ||
1694 | if (!try_to_freeze() && !again) { | 1718 | if (!try_to_freeze() && !again) { |
1695 | set_current_state(TASK_INTERRUPTIBLE); | 1719 | set_current_state(TASK_INTERRUPTIBLE); |
1696 | if (!kthread_should_stop()) | 1720 | if (!kthread_should_stop()) |
@@ -1724,7 +1748,7 @@ static int transaction_kthread(void *arg) | |||
1724 | } | 1748 | } |
1725 | 1749 | ||
1726 | now = get_seconds(); | 1750 | now = get_seconds(); |
1727 | if (!cur->blocked && | 1751 | if (cur->state < TRANS_STATE_BLOCKED && |
1728 | (now < cur->start_time || now - cur->start_time < 30)) { | 1752 | (now < cur->start_time || now - cur->start_time < 30)) { |
1729 | spin_unlock(&root->fs_info->trans_lock); | 1753 | spin_unlock(&root->fs_info->trans_lock); |
1730 | delay = HZ * 5; | 1754 | delay = HZ * 5; |
@@ -2034,11 +2058,11 @@ static void del_fs_roots(struct btrfs_fs_info *fs_info) | |||
2034 | list_del(&gang[0]->root_list); | 2058 | list_del(&gang[0]->root_list); |
2035 | 2059 | ||
2036 | if (gang[0]->in_radix) { | 2060 | if (gang[0]->in_radix) { |
2037 | btrfs_free_fs_root(fs_info, gang[0]); | 2061 | btrfs_drop_and_free_fs_root(fs_info, gang[0]); |
2038 | } else { | 2062 | } else { |
2039 | free_extent_buffer(gang[0]->node); | 2063 | free_extent_buffer(gang[0]->node); |
2040 | free_extent_buffer(gang[0]->commit_root); | 2064 | free_extent_buffer(gang[0]->commit_root); |
2041 | kfree(gang[0]); | 2065 | btrfs_put_fs_root(gang[0]); |
2042 | } | 2066 | } |
2043 | } | 2067 | } |
2044 | 2068 | ||
@@ -2049,7 +2073,7 @@ static void del_fs_roots(struct btrfs_fs_info *fs_info) | |||
2049 | if (!ret) | 2073 | if (!ret) |
2050 | break; | 2074 | break; |
2051 | for (i = 0; i < ret; i++) | 2075 | for (i = 0; i < ret; i++) |
2052 | btrfs_free_fs_root(fs_info, gang[i]); | 2076 | btrfs_drop_and_free_fs_root(fs_info, gang[i]); |
2053 | } | 2077 | } |
2054 | } | 2078 | } |
2055 | 2079 | ||
@@ -2081,14 +2105,8 @@ int open_ctree(struct super_block *sb, | |||
2081 | int backup_index = 0; | 2105 | int backup_index = 0; |
2082 | 2106 | ||
2083 | tree_root = fs_info->tree_root = btrfs_alloc_root(fs_info); | 2107 | tree_root = fs_info->tree_root = btrfs_alloc_root(fs_info); |
2084 | extent_root = fs_info->extent_root = btrfs_alloc_root(fs_info); | ||
2085 | csum_root = fs_info->csum_root = btrfs_alloc_root(fs_info); | ||
2086 | chunk_root = fs_info->chunk_root = btrfs_alloc_root(fs_info); | 2108 | chunk_root = fs_info->chunk_root = btrfs_alloc_root(fs_info); |
2087 | dev_root = fs_info->dev_root = btrfs_alloc_root(fs_info); | 2109 | if (!tree_root || !chunk_root) { |
2088 | quota_root = fs_info->quota_root = btrfs_alloc_root(fs_info); | ||
2089 | |||
2090 | if (!tree_root || !extent_root || !csum_root || | ||
2091 | !chunk_root || !dev_root || !quota_root) { | ||
2092 | err = -ENOMEM; | 2110 | err = -ENOMEM; |
2093 | goto fail; | 2111 | goto fail; |
2094 | } | 2112 | } |
@@ -2131,9 +2149,9 @@ int open_ctree(struct super_block *sb, | |||
2131 | INIT_LIST_HEAD(&fs_info->trans_list); | 2149 | INIT_LIST_HEAD(&fs_info->trans_list); |
2132 | INIT_LIST_HEAD(&fs_info->dead_roots); | 2150 | INIT_LIST_HEAD(&fs_info->dead_roots); |
2133 | INIT_LIST_HEAD(&fs_info->delayed_iputs); | 2151 | INIT_LIST_HEAD(&fs_info->delayed_iputs); |
2134 | INIT_LIST_HEAD(&fs_info->delalloc_inodes); | 2152 | INIT_LIST_HEAD(&fs_info->delalloc_roots); |
2135 | INIT_LIST_HEAD(&fs_info->caching_block_groups); | 2153 | INIT_LIST_HEAD(&fs_info->caching_block_groups); |
2136 | spin_lock_init(&fs_info->delalloc_lock); | 2154 | spin_lock_init(&fs_info->delalloc_root_lock); |
2137 | spin_lock_init(&fs_info->trans_lock); | 2155 | spin_lock_init(&fs_info->trans_lock); |
2138 | spin_lock_init(&fs_info->fs_roots_radix_lock); | 2156 | spin_lock_init(&fs_info->fs_roots_radix_lock); |
2139 | spin_lock_init(&fs_info->delayed_iput_lock); | 2157 | spin_lock_init(&fs_info->delayed_iput_lock); |
@@ -2169,7 +2187,6 @@ int open_ctree(struct super_block *sb, | |||
2169 | fs_info->max_inline = 8192 * 1024; | 2187 | fs_info->max_inline = 8192 * 1024; |
2170 | fs_info->metadata_ratio = 0; | 2188 | fs_info->metadata_ratio = 0; |
2171 | fs_info->defrag_inodes = RB_ROOT; | 2189 | fs_info->defrag_inodes = RB_ROOT; |
2172 | fs_info->trans_no_join = 0; | ||
2173 | fs_info->free_chunk_space = 0; | 2190 | fs_info->free_chunk_space = 0; |
2174 | fs_info->tree_mod_log = RB_ROOT; | 2191 | fs_info->tree_mod_log = RB_ROOT; |
2175 | 2192 | ||
@@ -2180,8 +2197,8 @@ int open_ctree(struct super_block *sb, | |||
2180 | fs_info->thread_pool_size = min_t(unsigned long, | 2197 | fs_info->thread_pool_size = min_t(unsigned long, |
2181 | num_online_cpus() + 2, 8); | 2198 | num_online_cpus() + 2, 8); |
2182 | 2199 | ||
2183 | INIT_LIST_HEAD(&fs_info->ordered_extents); | 2200 | INIT_LIST_HEAD(&fs_info->ordered_roots); |
2184 | spin_lock_init(&fs_info->ordered_extent_lock); | 2201 | spin_lock_init(&fs_info->ordered_root_lock); |
2185 | fs_info->delayed_root = kmalloc(sizeof(struct btrfs_delayed_root), | 2202 | fs_info->delayed_root = kmalloc(sizeof(struct btrfs_delayed_root), |
2186 | GFP_NOFS); | 2203 | GFP_NOFS); |
2187 | if (!fs_info->delayed_root) { | 2204 | if (!fs_info->delayed_root) { |
@@ -2274,6 +2291,7 @@ int open_ctree(struct super_block *sb, | |||
2274 | fs_info->qgroup_seq = 1; | 2291 | fs_info->qgroup_seq = 1; |
2275 | fs_info->quota_enabled = 0; | 2292 | fs_info->quota_enabled = 0; |
2276 | fs_info->pending_quota_state = 0; | 2293 | fs_info->pending_quota_state = 0; |
2294 | fs_info->qgroup_ulist = NULL; | ||
2277 | mutex_init(&fs_info->qgroup_rescan_lock); | 2295 | mutex_init(&fs_info->qgroup_rescan_lock); |
2278 | 2296 | ||
2279 | btrfs_init_free_cluster(&fs_info->meta_alloc_cluster); | 2297 | btrfs_init_free_cluster(&fs_info->meta_alloc_cluster); |
@@ -2638,33 +2656,44 @@ retry_root_backup: | |||
2638 | btrfs_set_root_node(&tree_root->root_item, tree_root->node); | 2656 | btrfs_set_root_node(&tree_root->root_item, tree_root->node); |
2639 | tree_root->commit_root = btrfs_root_node(tree_root); | 2657 | tree_root->commit_root = btrfs_root_node(tree_root); |
2640 | 2658 | ||
2641 | ret = find_and_setup_root(tree_root, fs_info, | 2659 | location.objectid = BTRFS_EXTENT_TREE_OBJECTID; |
2642 | BTRFS_EXTENT_TREE_OBJECTID, extent_root); | 2660 | location.type = BTRFS_ROOT_ITEM_KEY; |
2643 | if (ret) | 2661 | location.offset = 0; |
2662 | |||
2663 | extent_root = btrfs_read_tree_root(tree_root, &location); | ||
2664 | if (IS_ERR(extent_root)) { | ||
2665 | ret = PTR_ERR(extent_root); | ||
2644 | goto recovery_tree_root; | 2666 | goto recovery_tree_root; |
2667 | } | ||
2645 | extent_root->track_dirty = 1; | 2668 | extent_root->track_dirty = 1; |
2669 | fs_info->extent_root = extent_root; | ||
2646 | 2670 | ||
2647 | ret = find_and_setup_root(tree_root, fs_info, | 2671 | location.objectid = BTRFS_DEV_TREE_OBJECTID; |
2648 | BTRFS_DEV_TREE_OBJECTID, dev_root); | 2672 | dev_root = btrfs_read_tree_root(tree_root, &location); |
2649 | if (ret) | 2673 | if (IS_ERR(dev_root)) { |
2674 | ret = PTR_ERR(dev_root); | ||
2650 | goto recovery_tree_root; | 2675 | goto recovery_tree_root; |
2676 | } | ||
2651 | dev_root->track_dirty = 1; | 2677 | dev_root->track_dirty = 1; |
2678 | fs_info->dev_root = dev_root; | ||
2679 | btrfs_init_devices_late(fs_info); | ||
2652 | 2680 | ||
2653 | ret = find_and_setup_root(tree_root, fs_info, | 2681 | location.objectid = BTRFS_CSUM_TREE_OBJECTID; |
2654 | BTRFS_CSUM_TREE_OBJECTID, csum_root); | 2682 | csum_root = btrfs_read_tree_root(tree_root, &location); |
2655 | if (ret) | 2683 | if (IS_ERR(csum_root)) { |
2684 | ret = PTR_ERR(csum_root); | ||
2656 | goto recovery_tree_root; | 2685 | goto recovery_tree_root; |
2686 | } | ||
2657 | csum_root->track_dirty = 1; | 2687 | csum_root->track_dirty = 1; |
2688 | fs_info->csum_root = csum_root; | ||
2658 | 2689 | ||
2659 | ret = find_and_setup_root(tree_root, fs_info, | 2690 | location.objectid = BTRFS_QUOTA_TREE_OBJECTID; |
2660 | BTRFS_QUOTA_TREE_OBJECTID, quota_root); | 2691 | quota_root = btrfs_read_tree_root(tree_root, &location); |
2661 | if (ret) { | 2692 | if (!IS_ERR(quota_root)) { |
2662 | kfree(quota_root); | ||
2663 | quota_root = fs_info->quota_root = NULL; | ||
2664 | } else { | ||
2665 | quota_root->track_dirty = 1; | 2693 | quota_root->track_dirty = 1; |
2666 | fs_info->quota_enabled = 1; | 2694 | fs_info->quota_enabled = 1; |
2667 | fs_info->pending_quota_state = 1; | 2695 | fs_info->pending_quota_state = 1; |
2696 | fs_info->quota_root = quota_root; | ||
2668 | } | 2697 | } |
2669 | 2698 | ||
2670 | fs_info->generation = generation; | 2699 | fs_info->generation = generation; |
@@ -2817,11 +2846,9 @@ retry_root_backup: | |||
2817 | 2846 | ||
2818 | location.objectid = BTRFS_FS_TREE_OBJECTID; | 2847 | location.objectid = BTRFS_FS_TREE_OBJECTID; |
2819 | location.type = BTRFS_ROOT_ITEM_KEY; | 2848 | location.type = BTRFS_ROOT_ITEM_KEY; |
2820 | location.offset = (u64)-1; | 2849 | location.offset = 0; |
2821 | 2850 | ||
2822 | fs_info->fs_root = btrfs_read_fs_root_no_name(fs_info, &location); | 2851 | fs_info->fs_root = btrfs_read_fs_root_no_name(fs_info, &location); |
2823 | if (!fs_info->fs_root) | ||
2824 | goto fail_qgroup; | ||
2825 | if (IS_ERR(fs_info->fs_root)) { | 2852 | if (IS_ERR(fs_info->fs_root)) { |
2826 | err = PTR_ERR(fs_info->fs_root); | 2853 | err = PTR_ERR(fs_info->fs_root); |
2827 | goto fail_qgroup; | 2854 | goto fail_qgroup; |
@@ -2853,6 +2880,8 @@ retry_root_backup: | |||
2853 | return ret; | 2880 | return ret; |
2854 | } | 2881 | } |
2855 | 2882 | ||
2883 | btrfs_qgroup_rescan_resume(fs_info); | ||
2884 | |||
2856 | return 0; | 2885 | return 0; |
2857 | 2886 | ||
2858 | fail_qgroup: | 2887 | fail_qgroup: |
@@ -3258,7 +3287,7 @@ int btrfs_calc_num_tolerated_disk_barrier_failures( | |||
3258 | BTRFS_BLOCK_GROUP_RAID10)) { | 3287 | BTRFS_BLOCK_GROUP_RAID10)) { |
3259 | num_tolerated_disk_barrier_failures = 1; | 3288 | num_tolerated_disk_barrier_failures = 1; |
3260 | } else if (flags & | 3289 | } else if (flags & |
3261 | BTRFS_BLOCK_GROUP_RAID5) { | 3290 | BTRFS_BLOCK_GROUP_RAID6) { |
3262 | num_tolerated_disk_barrier_failures = 2; | 3291 | num_tolerated_disk_barrier_failures = 2; |
3263 | } | 3292 | } |
3264 | } | 3293 | } |
@@ -3366,7 +3395,9 @@ int write_ctree_super(struct btrfs_trans_handle *trans, | |||
3366 | return ret; | 3395 | return ret; |
3367 | } | 3396 | } |
3368 | 3397 | ||
3369 | void btrfs_free_fs_root(struct btrfs_fs_info *fs_info, struct btrfs_root *root) | 3398 | /* Drop a fs root from the radix tree and free it. */ |
3399 | void btrfs_drop_and_free_fs_root(struct btrfs_fs_info *fs_info, | ||
3400 | struct btrfs_root *root) | ||
3370 | { | 3401 | { |
3371 | spin_lock(&fs_info->fs_roots_radix_lock); | 3402 | spin_lock(&fs_info->fs_roots_radix_lock); |
3372 | radix_tree_delete(&fs_info->fs_roots_radix, | 3403 | radix_tree_delete(&fs_info->fs_roots_radix, |
@@ -3397,7 +3428,12 @@ static void free_fs_root(struct btrfs_root *root) | |||
3397 | kfree(root->free_ino_ctl); | 3428 | kfree(root->free_ino_ctl); |
3398 | kfree(root->free_ino_pinned); | 3429 | kfree(root->free_ino_pinned); |
3399 | kfree(root->name); | 3430 | kfree(root->name); |
3400 | kfree(root); | 3431 | btrfs_put_fs_root(root); |
3432 | } | ||
3433 | |||
3434 | void btrfs_free_fs_root(struct btrfs_root *root) | ||
3435 | { | ||
3436 | free_fs_root(root); | ||
3401 | } | 3437 | } |
3402 | 3438 | ||
3403 | int btrfs_cleanup_fs_roots(struct btrfs_fs_info *fs_info) | 3439 | int btrfs_cleanup_fs_roots(struct btrfs_fs_info *fs_info) |
@@ -3653,7 +3689,7 @@ static void btrfs_destroy_ordered_operations(struct btrfs_transaction *t, | |||
3653 | INIT_LIST_HEAD(&splice); | 3689 | INIT_LIST_HEAD(&splice); |
3654 | 3690 | ||
3655 | mutex_lock(&root->fs_info->ordered_operations_mutex); | 3691 | mutex_lock(&root->fs_info->ordered_operations_mutex); |
3656 | spin_lock(&root->fs_info->ordered_extent_lock); | 3692 | spin_lock(&root->fs_info->ordered_root_lock); |
3657 | 3693 | ||
3658 | list_splice_init(&t->ordered_operations, &splice); | 3694 | list_splice_init(&t->ordered_operations, &splice); |
3659 | while (!list_empty(&splice)) { | 3695 | while (!list_empty(&splice)) { |
@@ -3661,14 +3697,14 @@ static void btrfs_destroy_ordered_operations(struct btrfs_transaction *t, | |||
3661 | ordered_operations); | 3697 | ordered_operations); |
3662 | 3698 | ||
3663 | list_del_init(&btrfs_inode->ordered_operations); | 3699 | list_del_init(&btrfs_inode->ordered_operations); |
3664 | spin_unlock(&root->fs_info->ordered_extent_lock); | 3700 | spin_unlock(&root->fs_info->ordered_root_lock); |
3665 | 3701 | ||
3666 | btrfs_invalidate_inodes(btrfs_inode->root); | 3702 | btrfs_invalidate_inodes(btrfs_inode->root); |
3667 | 3703 | ||
3668 | spin_lock(&root->fs_info->ordered_extent_lock); | 3704 | spin_lock(&root->fs_info->ordered_root_lock); |
3669 | } | 3705 | } |
3670 | 3706 | ||
3671 | spin_unlock(&root->fs_info->ordered_extent_lock); | 3707 | spin_unlock(&root->fs_info->ordered_root_lock); |
3672 | mutex_unlock(&root->fs_info->ordered_operations_mutex); | 3708 | mutex_unlock(&root->fs_info->ordered_operations_mutex); |
3673 | } | 3709 | } |
3674 | 3710 | ||
@@ -3676,15 +3712,36 @@ static void btrfs_destroy_ordered_extents(struct btrfs_root *root) | |||
3676 | { | 3712 | { |
3677 | struct btrfs_ordered_extent *ordered; | 3713 | struct btrfs_ordered_extent *ordered; |
3678 | 3714 | ||
3679 | spin_lock(&root->fs_info->ordered_extent_lock); | 3715 | spin_lock(&root->ordered_extent_lock); |
3680 | /* | 3716 | /* |
3681 | * This will just short circuit the ordered completion stuff which will | 3717 | * This will just short circuit the ordered completion stuff which will |
3682 | * make sure the ordered extent gets properly cleaned up. | 3718 | * make sure the ordered extent gets properly cleaned up. |
3683 | */ | 3719 | */ |
3684 | list_for_each_entry(ordered, &root->fs_info->ordered_extents, | 3720 | list_for_each_entry(ordered, &root->ordered_extents, |
3685 | root_extent_list) | 3721 | root_extent_list) |
3686 | set_bit(BTRFS_ORDERED_IOERR, &ordered->flags); | 3722 | set_bit(BTRFS_ORDERED_IOERR, &ordered->flags); |
3687 | spin_unlock(&root->fs_info->ordered_extent_lock); | 3723 | spin_unlock(&root->ordered_extent_lock); |
3724 | } | ||
3725 | |||
3726 | static void btrfs_destroy_all_ordered_extents(struct btrfs_fs_info *fs_info) | ||
3727 | { | ||
3728 | struct btrfs_root *root; | ||
3729 | struct list_head splice; | ||
3730 | |||
3731 | INIT_LIST_HEAD(&splice); | ||
3732 | |||
3733 | spin_lock(&fs_info->ordered_root_lock); | ||
3734 | list_splice_init(&fs_info->ordered_roots, &splice); | ||
3735 | while (!list_empty(&splice)) { | ||
3736 | root = list_first_entry(&splice, struct btrfs_root, | ||
3737 | ordered_root); | ||
3738 | list_del_init(&root->ordered_root); | ||
3739 | |||
3740 | btrfs_destroy_ordered_extents(root); | ||
3741 | |||
3742 | cond_resched_lock(&fs_info->ordered_root_lock); | ||
3743 | } | ||
3744 | spin_unlock(&fs_info->ordered_root_lock); | ||
3688 | } | 3745 | } |
3689 | 3746 | ||
3690 | int btrfs_destroy_delayed_refs(struct btrfs_transaction *trans, | 3747 | int btrfs_destroy_delayed_refs(struct btrfs_transaction *trans, |
@@ -3706,6 +3763,7 @@ int btrfs_destroy_delayed_refs(struct btrfs_transaction *trans, | |||
3706 | 3763 | ||
3707 | while ((node = rb_first(&delayed_refs->root)) != NULL) { | 3764 | while ((node = rb_first(&delayed_refs->root)) != NULL) { |
3708 | struct btrfs_delayed_ref_head *head = NULL; | 3765 | struct btrfs_delayed_ref_head *head = NULL; |
3766 | bool pin_bytes = false; | ||
3709 | 3767 | ||
3710 | ref = rb_entry(node, struct btrfs_delayed_ref_node, rb_node); | 3768 | ref = rb_entry(node, struct btrfs_delayed_ref_node, rb_node); |
3711 | atomic_set(&ref->refs, 1); | 3769 | atomic_set(&ref->refs, 1); |
@@ -3726,8 +3784,7 @@ int btrfs_destroy_delayed_refs(struct btrfs_transaction *trans, | |||
3726 | } | 3784 | } |
3727 | 3785 | ||
3728 | if (head->must_insert_reserved) | 3786 | if (head->must_insert_reserved) |
3729 | btrfs_pin_extent(root, ref->bytenr, | 3787 | pin_bytes = true; |
3730 | ref->num_bytes, 1); | ||
3731 | btrfs_free_delayed_extent_op(head->extent_op); | 3788 | btrfs_free_delayed_extent_op(head->extent_op); |
3732 | delayed_refs->num_heads--; | 3789 | delayed_refs->num_heads--; |
3733 | if (list_empty(&head->cluster)) | 3790 | if (list_empty(&head->cluster)) |
@@ -3738,9 +3795,13 @@ int btrfs_destroy_delayed_refs(struct btrfs_transaction *trans, | |||
3738 | ref->in_tree = 0; | 3795 | ref->in_tree = 0; |
3739 | rb_erase(&ref->rb_node, &delayed_refs->root); | 3796 | rb_erase(&ref->rb_node, &delayed_refs->root); |
3740 | delayed_refs->num_entries--; | 3797 | delayed_refs->num_entries--; |
3741 | if (head) | ||
3742 | mutex_unlock(&head->mutex); | ||
3743 | spin_unlock(&delayed_refs->lock); | 3798 | spin_unlock(&delayed_refs->lock); |
3799 | if (head) { | ||
3800 | if (pin_bytes) | ||
3801 | btrfs_pin_extent(root, ref->bytenr, | ||
3802 | ref->num_bytes, 1); | ||
3803 | mutex_unlock(&head->mutex); | ||
3804 | } | ||
3744 | btrfs_put_delayed_ref(ref); | 3805 | btrfs_put_delayed_ref(ref); |
3745 | 3806 | ||
3746 | cond_resched(); | 3807 | cond_resched(); |
@@ -3777,24 +3838,49 @@ static void btrfs_destroy_delalloc_inodes(struct btrfs_root *root) | |||
3777 | 3838 | ||
3778 | INIT_LIST_HEAD(&splice); | 3839 | INIT_LIST_HEAD(&splice); |
3779 | 3840 | ||
3780 | spin_lock(&root->fs_info->delalloc_lock); | 3841 | spin_lock(&root->delalloc_lock); |
3781 | list_splice_init(&root->fs_info->delalloc_inodes, &splice); | 3842 | list_splice_init(&root->delalloc_inodes, &splice); |
3782 | 3843 | ||
3783 | while (!list_empty(&splice)) { | 3844 | while (!list_empty(&splice)) { |
3784 | btrfs_inode = list_entry(splice.next, struct btrfs_inode, | 3845 | btrfs_inode = list_first_entry(&splice, struct btrfs_inode, |
3785 | delalloc_inodes); | 3846 | delalloc_inodes); |
3786 | 3847 | ||
3787 | list_del_init(&btrfs_inode->delalloc_inodes); | 3848 | list_del_init(&btrfs_inode->delalloc_inodes); |
3788 | clear_bit(BTRFS_INODE_IN_DELALLOC_LIST, | 3849 | clear_bit(BTRFS_INODE_IN_DELALLOC_LIST, |
3789 | &btrfs_inode->runtime_flags); | 3850 | &btrfs_inode->runtime_flags); |
3790 | spin_unlock(&root->fs_info->delalloc_lock); | 3851 | spin_unlock(&root->delalloc_lock); |
3791 | 3852 | ||
3792 | btrfs_invalidate_inodes(btrfs_inode->root); | 3853 | btrfs_invalidate_inodes(btrfs_inode->root); |
3793 | 3854 | ||
3794 | spin_lock(&root->fs_info->delalloc_lock); | 3855 | spin_lock(&root->delalloc_lock); |
3795 | } | 3856 | } |
3796 | 3857 | ||
3797 | spin_unlock(&root->fs_info->delalloc_lock); | 3858 | spin_unlock(&root->delalloc_lock); |
3859 | } | ||
3860 | |||
3861 | static void btrfs_destroy_all_delalloc_inodes(struct btrfs_fs_info *fs_info) | ||
3862 | { | ||
3863 | struct btrfs_root *root; | ||
3864 | struct list_head splice; | ||
3865 | |||
3866 | INIT_LIST_HEAD(&splice); | ||
3867 | |||
3868 | spin_lock(&fs_info->delalloc_root_lock); | ||
3869 | list_splice_init(&fs_info->delalloc_roots, &splice); | ||
3870 | while (!list_empty(&splice)) { | ||
3871 | root = list_first_entry(&splice, struct btrfs_root, | ||
3872 | delalloc_root); | ||
3873 | list_del_init(&root->delalloc_root); | ||
3874 | root = btrfs_grab_fs_root(root); | ||
3875 | BUG_ON(!root); | ||
3876 | spin_unlock(&fs_info->delalloc_root_lock); | ||
3877 | |||
3878 | btrfs_destroy_delalloc_inodes(root); | ||
3879 | btrfs_put_fs_root(root); | ||
3880 | |||
3881 | spin_lock(&fs_info->delalloc_root_lock); | ||
3882 | } | ||
3883 | spin_unlock(&fs_info->delalloc_root_lock); | ||
3798 | } | 3884 | } |
3799 | 3885 | ||
3800 | static int btrfs_destroy_marked_extents(struct btrfs_root *root, | 3886 | static int btrfs_destroy_marked_extents(struct btrfs_root *root, |
@@ -3878,19 +3964,14 @@ void btrfs_cleanup_one_transaction(struct btrfs_transaction *cur_trans, | |||
3878 | btrfs_block_rsv_release(root, &root->fs_info->trans_block_rsv, | 3964 | btrfs_block_rsv_release(root, &root->fs_info->trans_block_rsv, |
3879 | cur_trans->dirty_pages.dirty_bytes); | 3965 | cur_trans->dirty_pages.dirty_bytes); |
3880 | 3966 | ||
3881 | /* FIXME: cleanup wait for commit */ | 3967 | cur_trans->state = TRANS_STATE_COMMIT_START; |
3882 | cur_trans->in_commit = 1; | ||
3883 | cur_trans->blocked = 1; | ||
3884 | wake_up(&root->fs_info->transaction_blocked_wait); | 3968 | wake_up(&root->fs_info->transaction_blocked_wait); |
3885 | 3969 | ||
3886 | btrfs_evict_pending_snapshots(cur_trans); | 3970 | btrfs_evict_pending_snapshots(cur_trans); |
3887 | 3971 | ||
3888 | cur_trans->blocked = 0; | 3972 | cur_trans->state = TRANS_STATE_UNBLOCKED; |
3889 | wake_up(&root->fs_info->transaction_wait); | 3973 | wake_up(&root->fs_info->transaction_wait); |
3890 | 3974 | ||
3891 | cur_trans->commit_done = 1; | ||
3892 | wake_up(&cur_trans->commit_wait); | ||
3893 | |||
3894 | btrfs_destroy_delayed_inodes(root); | 3975 | btrfs_destroy_delayed_inodes(root); |
3895 | btrfs_assert_delayed_root_empty(root); | 3976 | btrfs_assert_delayed_root_empty(root); |
3896 | 3977 | ||
@@ -3899,6 +3980,9 @@ void btrfs_cleanup_one_transaction(struct btrfs_transaction *cur_trans, | |||
3899 | btrfs_destroy_pinned_extent(root, | 3980 | btrfs_destroy_pinned_extent(root, |
3900 | root->fs_info->pinned_extents); | 3981 | root->fs_info->pinned_extents); |
3901 | 3982 | ||
3983 | cur_trans->state =TRANS_STATE_COMPLETED; | ||
3984 | wake_up(&cur_trans->commit_wait); | ||
3985 | |||
3902 | /* | 3986 | /* |
3903 | memset(cur_trans, 0, sizeof(*cur_trans)); | 3987 | memset(cur_trans, 0, sizeof(*cur_trans)); |
3904 | kmem_cache_free(btrfs_transaction_cachep, cur_trans); | 3988 | kmem_cache_free(btrfs_transaction_cachep, cur_trans); |
@@ -3914,7 +3998,7 @@ static int btrfs_cleanup_transaction(struct btrfs_root *root) | |||
3914 | 3998 | ||
3915 | spin_lock(&root->fs_info->trans_lock); | 3999 | spin_lock(&root->fs_info->trans_lock); |
3916 | list_splice_init(&root->fs_info->trans_list, &list); | 4000 | list_splice_init(&root->fs_info->trans_list, &list); |
3917 | root->fs_info->trans_no_join = 1; | 4001 | root->fs_info->running_transaction = NULL; |
3918 | spin_unlock(&root->fs_info->trans_lock); | 4002 | spin_unlock(&root->fs_info->trans_lock); |
3919 | 4003 | ||
3920 | while (!list_empty(&list)) { | 4004 | while (!list_empty(&list)) { |
@@ -3922,37 +4006,31 @@ static int btrfs_cleanup_transaction(struct btrfs_root *root) | |||
3922 | 4006 | ||
3923 | btrfs_destroy_ordered_operations(t, root); | 4007 | btrfs_destroy_ordered_operations(t, root); |
3924 | 4008 | ||
3925 | btrfs_destroy_ordered_extents(root); | 4009 | btrfs_destroy_all_ordered_extents(root->fs_info); |
3926 | 4010 | ||
3927 | btrfs_destroy_delayed_refs(t, root); | 4011 | btrfs_destroy_delayed_refs(t, root); |
3928 | 4012 | ||
3929 | /* FIXME: cleanup wait for commit */ | 4013 | /* |
3930 | t->in_commit = 1; | 4014 | * FIXME: cleanup wait for commit |
3931 | t->blocked = 1; | 4015 | * We needn't acquire the lock here, because we are during |
4016 | * the umount, there is no other task which will change it. | ||
4017 | */ | ||
4018 | t->state = TRANS_STATE_COMMIT_START; | ||
3932 | smp_mb(); | 4019 | smp_mb(); |
3933 | if (waitqueue_active(&root->fs_info->transaction_blocked_wait)) | 4020 | if (waitqueue_active(&root->fs_info->transaction_blocked_wait)) |
3934 | wake_up(&root->fs_info->transaction_blocked_wait); | 4021 | wake_up(&root->fs_info->transaction_blocked_wait); |
3935 | 4022 | ||
3936 | btrfs_evict_pending_snapshots(t); | 4023 | btrfs_evict_pending_snapshots(t); |
3937 | 4024 | ||
3938 | t->blocked = 0; | 4025 | t->state = TRANS_STATE_UNBLOCKED; |
3939 | smp_mb(); | 4026 | smp_mb(); |
3940 | if (waitqueue_active(&root->fs_info->transaction_wait)) | 4027 | if (waitqueue_active(&root->fs_info->transaction_wait)) |
3941 | wake_up(&root->fs_info->transaction_wait); | 4028 | wake_up(&root->fs_info->transaction_wait); |
3942 | 4029 | ||
3943 | t->commit_done = 1; | ||
3944 | smp_mb(); | ||
3945 | if (waitqueue_active(&t->commit_wait)) | ||
3946 | wake_up(&t->commit_wait); | ||
3947 | |||
3948 | btrfs_destroy_delayed_inodes(root); | 4030 | btrfs_destroy_delayed_inodes(root); |
3949 | btrfs_assert_delayed_root_empty(root); | 4031 | btrfs_assert_delayed_root_empty(root); |
3950 | 4032 | ||
3951 | btrfs_destroy_delalloc_inodes(root); | 4033 | btrfs_destroy_all_delalloc_inodes(root->fs_info); |
3952 | |||
3953 | spin_lock(&root->fs_info->trans_lock); | ||
3954 | root->fs_info->running_transaction = NULL; | ||
3955 | spin_unlock(&root->fs_info->trans_lock); | ||
3956 | 4034 | ||
3957 | btrfs_destroy_marked_extents(root, &t->dirty_pages, | 4035 | btrfs_destroy_marked_extents(root, &t->dirty_pages, |
3958 | EXTENT_DIRTY); | 4036 | EXTENT_DIRTY); |
@@ -3960,15 +4038,17 @@ static int btrfs_cleanup_transaction(struct btrfs_root *root) | |||
3960 | btrfs_destroy_pinned_extent(root, | 4038 | btrfs_destroy_pinned_extent(root, |
3961 | root->fs_info->pinned_extents); | 4039 | root->fs_info->pinned_extents); |
3962 | 4040 | ||
4041 | t->state = TRANS_STATE_COMPLETED; | ||
4042 | smp_mb(); | ||
4043 | if (waitqueue_active(&t->commit_wait)) | ||
4044 | wake_up(&t->commit_wait); | ||
4045 | |||
3963 | atomic_set(&t->use_count, 0); | 4046 | atomic_set(&t->use_count, 0); |
3964 | list_del_init(&t->list); | 4047 | list_del_init(&t->list); |
3965 | memset(t, 0, sizeof(*t)); | 4048 | memset(t, 0, sizeof(*t)); |
3966 | kmem_cache_free(btrfs_transaction_cachep, t); | 4049 | kmem_cache_free(btrfs_transaction_cachep, t); |
3967 | } | 4050 | } |
3968 | 4051 | ||
3969 | spin_lock(&root->fs_info->trans_lock); | ||
3970 | root->fs_info->trans_no_join = 0; | ||
3971 | spin_unlock(&root->fs_info->trans_lock); | ||
3972 | mutex_unlock(&root->fs_info->transaction_kthread_mutex); | 4052 | mutex_unlock(&root->fs_info->transaction_kthread_mutex); |
3973 | 4053 | ||
3974 | return 0; | 4054 | return 0; |
diff --git a/fs/btrfs/disk-io.h b/fs/btrfs/disk-io.h index be69ce1b07a2..b71acd6e1e5b 100644 --- a/fs/btrfs/disk-io.h +++ b/fs/btrfs/disk-io.h | |||
@@ -63,14 +63,40 @@ struct buffer_head *btrfs_read_dev_super(struct block_device *bdev); | |||
63 | int btrfs_commit_super(struct btrfs_root *root); | 63 | int btrfs_commit_super(struct btrfs_root *root); |
64 | struct extent_buffer *btrfs_find_tree_block(struct btrfs_root *root, | 64 | struct extent_buffer *btrfs_find_tree_block(struct btrfs_root *root, |
65 | u64 bytenr, u32 blocksize); | 65 | u64 bytenr, u32 blocksize); |
66 | struct btrfs_root *btrfs_read_fs_root_no_radix(struct btrfs_root *tree_root, | 66 | struct btrfs_root *btrfs_read_fs_root(struct btrfs_root *tree_root, |
67 | struct btrfs_key *location); | 67 | struct btrfs_key *location); |
68 | int btrfs_init_fs_root(struct btrfs_root *root); | ||
69 | int btrfs_insert_fs_root(struct btrfs_fs_info *fs_info, | ||
70 | struct btrfs_root *root); | ||
68 | struct btrfs_root *btrfs_read_fs_root_no_name(struct btrfs_fs_info *fs_info, | 71 | struct btrfs_root *btrfs_read_fs_root_no_name(struct btrfs_fs_info *fs_info, |
69 | struct btrfs_key *location); | 72 | struct btrfs_key *location); |
70 | int btrfs_cleanup_fs_roots(struct btrfs_fs_info *fs_info); | 73 | int btrfs_cleanup_fs_roots(struct btrfs_fs_info *fs_info); |
71 | void btrfs_btree_balance_dirty(struct btrfs_root *root); | 74 | void btrfs_btree_balance_dirty(struct btrfs_root *root); |
72 | void btrfs_btree_balance_dirty_nodelay(struct btrfs_root *root); | 75 | void btrfs_btree_balance_dirty_nodelay(struct btrfs_root *root); |
73 | void btrfs_free_fs_root(struct btrfs_fs_info *fs_info, struct btrfs_root *root); | 76 | void btrfs_drop_and_free_fs_root(struct btrfs_fs_info *fs_info, |
77 | struct btrfs_root *root); | ||
78 | void btrfs_free_fs_root(struct btrfs_root *root); | ||
79 | |||
80 | /* | ||
81 | * This function is used to grab the root, and avoid it is freed when we | ||
82 | * access it. But it doesn't ensure that the tree is not dropped. | ||
83 | * | ||
84 | * If you want to ensure the whole tree is safe, you should use | ||
85 | * fs_info->subvol_srcu | ||
86 | */ | ||
87 | static inline struct btrfs_root *btrfs_grab_fs_root(struct btrfs_root *root) | ||
88 | { | ||
89 | if (atomic_inc_not_zero(&root->refs)) | ||
90 | return root; | ||
91 | return NULL; | ||
92 | } | ||
93 | |||
94 | static inline void btrfs_put_fs_root(struct btrfs_root *root) | ||
95 | { | ||
96 | if (atomic_dec_and_test(&root->refs)) | ||
97 | kfree(root); | ||
98 | } | ||
99 | |||
74 | void btrfs_mark_buffer_dirty(struct extent_buffer *buf); | 100 | void btrfs_mark_buffer_dirty(struct extent_buffer *buf); |
75 | int btrfs_buffer_uptodate(struct extent_buffer *buf, u64 parent_transid, | 101 | int btrfs_buffer_uptodate(struct extent_buffer *buf, u64 parent_transid, |
76 | int atomic); | 102 | int atomic); |
diff --git a/fs/btrfs/export.c b/fs/btrfs/export.c index 81ee29eeb7ca..4b8691607373 100644 --- a/fs/btrfs/export.c +++ b/fs/btrfs/export.c | |||
@@ -82,11 +82,6 @@ static struct dentry *btrfs_get_dentry(struct super_block *sb, u64 objectid, | |||
82 | goto fail; | 82 | goto fail; |
83 | } | 83 | } |
84 | 84 | ||
85 | if (btrfs_root_refs(&root->root_item) == 0) { | ||
86 | err = -ENOENT; | ||
87 | goto fail; | ||
88 | } | ||
89 | |||
90 | key.objectid = objectid; | 85 | key.objectid = objectid; |
91 | btrfs_set_key_type(&key, BTRFS_INODE_ITEM_KEY); | 86 | btrfs_set_key_type(&key, BTRFS_INODE_ITEM_KEY); |
92 | key.offset = 0; | 87 | key.offset = 0; |
diff --git a/fs/btrfs/extent-tree.c b/fs/btrfs/extent-tree.c index df472ab1b5ac..1204c8ef6f32 100644 --- a/fs/btrfs/extent-tree.c +++ b/fs/btrfs/extent-tree.c | |||
@@ -24,6 +24,7 @@ | |||
24 | #include <linux/kthread.h> | 24 | #include <linux/kthread.h> |
25 | #include <linux/slab.h> | 25 | #include <linux/slab.h> |
26 | #include <linux/ratelimit.h> | 26 | #include <linux/ratelimit.h> |
27 | #include <linux/percpu_counter.h> | ||
27 | #include "compat.h" | 28 | #include "compat.h" |
28 | #include "hash.h" | 29 | #include "hash.h" |
29 | #include "ctree.h" | 30 | #include "ctree.h" |
@@ -2526,6 +2527,51 @@ static int refs_newer(struct btrfs_delayed_ref_root *delayed_refs, int seq, | |||
2526 | return 0; | 2527 | return 0; |
2527 | } | 2528 | } |
2528 | 2529 | ||
2530 | static inline u64 heads_to_leaves(struct btrfs_root *root, u64 heads) | ||
2531 | { | ||
2532 | u64 num_bytes; | ||
2533 | |||
2534 | num_bytes = heads * (sizeof(struct btrfs_extent_item) + | ||
2535 | sizeof(struct btrfs_extent_inline_ref)); | ||
2536 | if (!btrfs_fs_incompat(root->fs_info, SKINNY_METADATA)) | ||
2537 | num_bytes += heads * sizeof(struct btrfs_tree_block_info); | ||
2538 | |||
2539 | /* | ||
2540 | * We don't ever fill up leaves all the way so multiply by 2 just to be | ||
2541 | * closer to what we're really going to want to ouse. | ||
2542 | */ | ||
2543 | return div64_u64(num_bytes, BTRFS_LEAF_DATA_SIZE(root)); | ||
2544 | } | ||
2545 | |||
2546 | int btrfs_should_throttle_delayed_refs(struct btrfs_trans_handle *trans, | ||
2547 | struct btrfs_root *root) | ||
2548 | { | ||
2549 | struct btrfs_block_rsv *global_rsv; | ||
2550 | u64 num_heads = trans->transaction->delayed_refs.num_heads_ready; | ||
2551 | u64 num_bytes; | ||
2552 | int ret = 0; | ||
2553 | |||
2554 | num_bytes = btrfs_calc_trans_metadata_size(root, 1); | ||
2555 | num_heads = heads_to_leaves(root, num_heads); | ||
2556 | if (num_heads > 1) | ||
2557 | num_bytes += (num_heads - 1) * root->leafsize; | ||
2558 | num_bytes <<= 1; | ||
2559 | global_rsv = &root->fs_info->global_block_rsv; | ||
2560 | |||
2561 | /* | ||
2562 | * If we can't allocate any more chunks lets make sure we have _lots_ of | ||
2563 | * wiggle room since running delayed refs can create more delayed refs. | ||
2564 | */ | ||
2565 | if (global_rsv->space_info->full) | ||
2566 | num_bytes <<= 1; | ||
2567 | |||
2568 | spin_lock(&global_rsv->lock); | ||
2569 | if (global_rsv->reserved <= num_bytes) | ||
2570 | ret = 1; | ||
2571 | spin_unlock(&global_rsv->lock); | ||
2572 | return ret; | ||
2573 | } | ||
2574 | |||
2529 | /* | 2575 | /* |
2530 | * this starts processing the delayed reference count updates and | 2576 | * this starts processing the delayed reference count updates and |
2531 | * extent insertions we have queued up so far. count can be | 2577 | * extent insertions we have queued up so far. count can be |
@@ -2573,7 +2619,8 @@ progress: | |||
2573 | old = atomic_cmpxchg(&delayed_refs->procs_running_refs, 0, 1); | 2619 | old = atomic_cmpxchg(&delayed_refs->procs_running_refs, 0, 1); |
2574 | if (old) { | 2620 | if (old) { |
2575 | DEFINE_WAIT(__wait); | 2621 | DEFINE_WAIT(__wait); |
2576 | if (delayed_refs->num_entries < 16348) | 2622 | if (delayed_refs->flushing || |
2623 | !btrfs_should_throttle_delayed_refs(trans, root)) | ||
2577 | return 0; | 2624 | return 0; |
2578 | 2625 | ||
2579 | prepare_to_wait(&delayed_refs->wait, &__wait, | 2626 | prepare_to_wait(&delayed_refs->wait, &__wait, |
@@ -2608,7 +2655,7 @@ again: | |||
2608 | 2655 | ||
2609 | while (1) { | 2656 | while (1) { |
2610 | if (!(run_all || run_most) && | 2657 | if (!(run_all || run_most) && |
2611 | delayed_refs->num_heads_ready < 64) | 2658 | !btrfs_should_throttle_delayed_refs(trans, root)) |
2612 | break; | 2659 | break; |
2613 | 2660 | ||
2614 | /* | 2661 | /* |
@@ -2629,6 +2676,7 @@ again: | |||
2629 | spin_unlock(&delayed_refs->lock); | 2676 | spin_unlock(&delayed_refs->lock); |
2630 | btrfs_abort_transaction(trans, root, ret); | 2677 | btrfs_abort_transaction(trans, root, ret); |
2631 | atomic_dec(&delayed_refs->procs_running_refs); | 2678 | atomic_dec(&delayed_refs->procs_running_refs); |
2679 | wake_up(&delayed_refs->wait); | ||
2632 | return ret; | 2680 | return ret; |
2633 | } | 2681 | } |
2634 | 2682 | ||
@@ -3310,6 +3358,7 @@ static int update_space_info(struct btrfs_fs_info *info, u64 flags, | |||
3310 | struct btrfs_space_info *found; | 3358 | struct btrfs_space_info *found; |
3311 | int i; | 3359 | int i; |
3312 | int factor; | 3360 | int factor; |
3361 | int ret; | ||
3313 | 3362 | ||
3314 | if (flags & (BTRFS_BLOCK_GROUP_DUP | BTRFS_BLOCK_GROUP_RAID1 | | 3363 | if (flags & (BTRFS_BLOCK_GROUP_DUP | BTRFS_BLOCK_GROUP_RAID1 | |
3315 | BTRFS_BLOCK_GROUP_RAID10)) | 3364 | BTRFS_BLOCK_GROUP_RAID10)) |
@@ -3333,6 +3382,12 @@ static int update_space_info(struct btrfs_fs_info *info, u64 flags, | |||
3333 | if (!found) | 3382 | if (!found) |
3334 | return -ENOMEM; | 3383 | return -ENOMEM; |
3335 | 3384 | ||
3385 | ret = percpu_counter_init(&found->total_bytes_pinned, 0); | ||
3386 | if (ret) { | ||
3387 | kfree(found); | ||
3388 | return ret; | ||
3389 | } | ||
3390 | |||
3336 | for (i = 0; i < BTRFS_NR_RAID_TYPES; i++) | 3391 | for (i = 0; i < BTRFS_NR_RAID_TYPES; i++) |
3337 | INIT_LIST_HEAD(&found->block_groups[i]); | 3392 | INIT_LIST_HEAD(&found->block_groups[i]); |
3338 | init_rwsem(&found->groups_sem); | 3393 | init_rwsem(&found->groups_sem); |
@@ -3565,10 +3620,11 @@ alloc: | |||
3565 | } | 3620 | } |
3566 | 3621 | ||
3567 | /* | 3622 | /* |
3568 | * If we have less pinned bytes than we want to allocate then | 3623 | * If we don't have enough pinned space to deal with this |
3569 | * don't bother committing the transaction, it won't help us. | 3624 | * allocation don't bother committing the transaction. |
3570 | */ | 3625 | */ |
3571 | if (data_sinfo->bytes_pinned < bytes) | 3626 | if (percpu_counter_compare(&data_sinfo->total_bytes_pinned, |
3627 | bytes) < 0) | ||
3572 | committed = 1; | 3628 | committed = 1; |
3573 | spin_unlock(&data_sinfo->lock); | 3629 | spin_unlock(&data_sinfo->lock); |
3574 | 3630 | ||
@@ -3577,6 +3633,7 @@ commit_trans: | |||
3577 | if (!committed && | 3633 | if (!committed && |
3578 | !atomic_read(&root->fs_info->open_ioctl_trans)) { | 3634 | !atomic_read(&root->fs_info->open_ioctl_trans)) { |
3579 | committed = 1; | 3635 | committed = 1; |
3636 | |||
3580 | trans = btrfs_join_transaction(root); | 3637 | trans = btrfs_join_transaction(root); |
3581 | if (IS_ERR(trans)) | 3638 | if (IS_ERR(trans)) |
3582 | return PTR_ERR(trans); | 3639 | return PTR_ERR(trans); |
@@ -3609,6 +3666,7 @@ void btrfs_free_reserved_data_space(struct inode *inode, u64 bytes) | |||
3609 | 3666 | ||
3610 | data_sinfo = root->fs_info->data_sinfo; | 3667 | data_sinfo = root->fs_info->data_sinfo; |
3611 | spin_lock(&data_sinfo->lock); | 3668 | spin_lock(&data_sinfo->lock); |
3669 | WARN_ON(data_sinfo->bytes_may_use < bytes); | ||
3612 | data_sinfo->bytes_may_use -= bytes; | 3670 | data_sinfo->bytes_may_use -= bytes; |
3613 | trace_btrfs_space_reservation(root->fs_info, "space_info", | 3671 | trace_btrfs_space_reservation(root->fs_info, "space_info", |
3614 | data_sinfo->flags, bytes, 0); | 3672 | data_sinfo->flags, bytes, 0); |
@@ -3886,12 +3944,11 @@ static void btrfs_writeback_inodes_sb_nr(struct btrfs_root *root, | |||
3886 | unsigned long nr_pages) | 3944 | unsigned long nr_pages) |
3887 | { | 3945 | { |
3888 | struct super_block *sb = root->fs_info->sb; | 3946 | struct super_block *sb = root->fs_info->sb; |
3889 | int started; | ||
3890 | 3947 | ||
3891 | /* If we can not start writeback, just sync all the delalloc file. */ | 3948 | if (down_read_trylock(&sb->s_umount)) { |
3892 | started = try_to_writeback_inodes_sb_nr(sb, nr_pages, | 3949 | writeback_inodes_sb_nr(sb, nr_pages, WB_REASON_FS_FREE_SPACE); |
3893 | WB_REASON_FS_FREE_SPACE); | 3950 | up_read(&sb->s_umount); |
3894 | if (!started) { | 3951 | } else { |
3895 | /* | 3952 | /* |
3896 | * We needn't worry the filesystem going from r/w to r/o though | 3953 | * We needn't worry the filesystem going from r/w to r/o though |
3897 | * we don't acquire ->s_umount mutex, because the filesystem | 3954 | * we don't acquire ->s_umount mutex, because the filesystem |
@@ -3899,9 +3956,9 @@ static void btrfs_writeback_inodes_sb_nr(struct btrfs_root *root, | |||
3899 | * the filesystem is readonly(all dirty pages are written to | 3956 | * the filesystem is readonly(all dirty pages are written to |
3900 | * the disk). | 3957 | * the disk). |
3901 | */ | 3958 | */ |
3902 | btrfs_start_delalloc_inodes(root, 0); | 3959 | btrfs_start_all_delalloc_inodes(root->fs_info, 0); |
3903 | if (!current->journal_info) | 3960 | if (!current->journal_info) |
3904 | btrfs_wait_ordered_extents(root, 0); | 3961 | btrfs_wait_all_ordered_extents(root->fs_info, 0); |
3905 | } | 3962 | } |
3906 | } | 3963 | } |
3907 | 3964 | ||
@@ -3931,7 +3988,7 @@ static void shrink_delalloc(struct btrfs_root *root, u64 to_reclaim, u64 orig, | |||
3931 | if (delalloc_bytes == 0) { | 3988 | if (delalloc_bytes == 0) { |
3932 | if (trans) | 3989 | if (trans) |
3933 | return; | 3990 | return; |
3934 | btrfs_wait_ordered_extents(root, 0); | 3991 | btrfs_wait_all_ordered_extents(root->fs_info, 0); |
3935 | return; | 3992 | return; |
3936 | } | 3993 | } |
3937 | 3994 | ||
@@ -3959,7 +4016,7 @@ static void shrink_delalloc(struct btrfs_root *root, u64 to_reclaim, u64 orig, | |||
3959 | 4016 | ||
3960 | loops++; | 4017 | loops++; |
3961 | if (wait_ordered && !trans) { | 4018 | if (wait_ordered && !trans) { |
3962 | btrfs_wait_ordered_extents(root, 0); | 4019 | btrfs_wait_all_ordered_extents(root->fs_info, 0); |
3963 | } else { | 4020 | } else { |
3964 | time_left = schedule_timeout_killable(1); | 4021 | time_left = schedule_timeout_killable(1); |
3965 | if (time_left) | 4022 | if (time_left) |
@@ -3997,7 +4054,8 @@ static int may_commit_transaction(struct btrfs_root *root, | |||
3997 | 4054 | ||
3998 | /* See if there is enough pinned space to make this reservation */ | 4055 | /* See if there is enough pinned space to make this reservation */ |
3999 | spin_lock(&space_info->lock); | 4056 | spin_lock(&space_info->lock); |
4000 | if (space_info->bytes_pinned >= bytes) { | 4057 | if (percpu_counter_compare(&space_info->total_bytes_pinned, |
4058 | bytes) >= 0) { | ||
4001 | spin_unlock(&space_info->lock); | 4059 | spin_unlock(&space_info->lock); |
4002 | goto commit; | 4060 | goto commit; |
4003 | } | 4061 | } |
@@ -4012,7 +4070,8 @@ static int may_commit_transaction(struct btrfs_root *root, | |||
4012 | 4070 | ||
4013 | spin_lock(&space_info->lock); | 4071 | spin_lock(&space_info->lock); |
4014 | spin_lock(&delayed_rsv->lock); | 4072 | spin_lock(&delayed_rsv->lock); |
4015 | if (space_info->bytes_pinned + delayed_rsv->size < bytes) { | 4073 | if (percpu_counter_compare(&space_info->total_bytes_pinned, |
4074 | bytes - delayed_rsv->size) >= 0) { | ||
4016 | spin_unlock(&delayed_rsv->lock); | 4075 | spin_unlock(&delayed_rsv->lock); |
4017 | spin_unlock(&space_info->lock); | 4076 | spin_unlock(&space_info->lock); |
4018 | return -ENOSPC; | 4077 | return -ENOSPC; |
@@ -4297,6 +4356,31 @@ static void block_rsv_add_bytes(struct btrfs_block_rsv *block_rsv, | |||
4297 | spin_unlock(&block_rsv->lock); | 4356 | spin_unlock(&block_rsv->lock); |
4298 | } | 4357 | } |
4299 | 4358 | ||
4359 | int btrfs_cond_migrate_bytes(struct btrfs_fs_info *fs_info, | ||
4360 | struct btrfs_block_rsv *dest, u64 num_bytes, | ||
4361 | int min_factor) | ||
4362 | { | ||
4363 | struct btrfs_block_rsv *global_rsv = &fs_info->global_block_rsv; | ||
4364 | u64 min_bytes; | ||
4365 | |||
4366 | if (global_rsv->space_info != dest->space_info) | ||
4367 | return -ENOSPC; | ||
4368 | |||
4369 | spin_lock(&global_rsv->lock); | ||
4370 | min_bytes = div_factor(global_rsv->size, min_factor); | ||
4371 | if (global_rsv->reserved < min_bytes + num_bytes) { | ||
4372 | spin_unlock(&global_rsv->lock); | ||
4373 | return -ENOSPC; | ||
4374 | } | ||
4375 | global_rsv->reserved -= num_bytes; | ||
4376 | if (global_rsv->reserved < global_rsv->size) | ||
4377 | global_rsv->full = 0; | ||
4378 | spin_unlock(&global_rsv->lock); | ||
4379 | |||
4380 | block_rsv_add_bytes(dest, num_bytes, 1); | ||
4381 | return 0; | ||
4382 | } | ||
4383 | |||
4300 | static void block_rsv_release_bytes(struct btrfs_fs_info *fs_info, | 4384 | static void block_rsv_release_bytes(struct btrfs_fs_info *fs_info, |
4301 | struct btrfs_block_rsv *block_rsv, | 4385 | struct btrfs_block_rsv *block_rsv, |
4302 | struct btrfs_block_rsv *dest, u64 num_bytes) | 4386 | struct btrfs_block_rsv *dest, u64 num_bytes) |
@@ -5030,14 +5114,14 @@ static int update_block_group(struct btrfs_root *root, | |||
5030 | int factor; | 5114 | int factor; |
5031 | 5115 | ||
5032 | /* block accounting for super block */ | 5116 | /* block accounting for super block */ |
5033 | spin_lock(&info->delalloc_lock); | 5117 | spin_lock(&info->delalloc_root_lock); |
5034 | old_val = btrfs_super_bytes_used(info->super_copy); | 5118 | old_val = btrfs_super_bytes_used(info->super_copy); |
5035 | if (alloc) | 5119 | if (alloc) |
5036 | old_val += num_bytes; | 5120 | old_val += num_bytes; |
5037 | else | 5121 | else |
5038 | old_val -= num_bytes; | 5122 | old_val -= num_bytes; |
5039 | btrfs_set_super_bytes_used(info->super_copy, old_val); | 5123 | btrfs_set_super_bytes_used(info->super_copy, old_val); |
5040 | spin_unlock(&info->delalloc_lock); | 5124 | spin_unlock(&info->delalloc_root_lock); |
5041 | 5125 | ||
5042 | while (total) { | 5126 | while (total) { |
5043 | cache = btrfs_lookup_block_group(info, bytenr); | 5127 | cache = btrfs_lookup_block_group(info, bytenr); |
@@ -5189,6 +5273,80 @@ int btrfs_pin_extent_for_log_replay(struct btrfs_root *root, | |||
5189 | return ret; | 5273 | return ret; |
5190 | } | 5274 | } |
5191 | 5275 | ||
5276 | static int __exclude_logged_extent(struct btrfs_root *root, u64 start, u64 num_bytes) | ||
5277 | { | ||
5278 | int ret; | ||
5279 | struct btrfs_block_group_cache *block_group; | ||
5280 | struct btrfs_caching_control *caching_ctl; | ||
5281 | |||
5282 | block_group = btrfs_lookup_block_group(root->fs_info, start); | ||
5283 | if (!block_group) | ||
5284 | return -EINVAL; | ||
5285 | |||
5286 | cache_block_group(block_group, 0); | ||
5287 | caching_ctl = get_caching_control(block_group); | ||
5288 | |||
5289 | if (!caching_ctl) { | ||
5290 | /* Logic error */ | ||
5291 | BUG_ON(!block_group_cache_done(block_group)); | ||
5292 | ret = btrfs_remove_free_space(block_group, start, num_bytes); | ||
5293 | } else { | ||
5294 | mutex_lock(&caching_ctl->mutex); | ||
5295 | |||
5296 | if (start >= caching_ctl->progress) { | ||
5297 | ret = add_excluded_extent(root, start, num_bytes); | ||
5298 | } else if (start + num_bytes <= caching_ctl->progress) { | ||
5299 | ret = btrfs_remove_free_space(block_group, | ||
5300 | start, num_bytes); | ||
5301 | } else { | ||
5302 | num_bytes = caching_ctl->progress - start; | ||
5303 | ret = btrfs_remove_free_space(block_group, | ||
5304 | start, num_bytes); | ||
5305 | if (ret) | ||
5306 | goto out_lock; | ||
5307 | |||
5308 | num_bytes = (start + num_bytes) - | ||
5309 | caching_ctl->progress; | ||
5310 | start = caching_ctl->progress; | ||
5311 | ret = add_excluded_extent(root, start, num_bytes); | ||
5312 | } | ||
5313 | out_lock: | ||
5314 | mutex_unlock(&caching_ctl->mutex); | ||
5315 | put_caching_control(caching_ctl); | ||
5316 | } | ||
5317 | btrfs_put_block_group(block_group); | ||
5318 | return ret; | ||
5319 | } | ||
5320 | |||
5321 | int btrfs_exclude_logged_extents(struct btrfs_root *log, | ||
5322 | struct extent_buffer *eb) | ||
5323 | { | ||
5324 | struct btrfs_file_extent_item *item; | ||
5325 | struct btrfs_key key; | ||
5326 | int found_type; | ||
5327 | int i; | ||
5328 | |||
5329 | if (!btrfs_fs_incompat(log->fs_info, MIXED_GROUPS)) | ||
5330 | return 0; | ||
5331 | |||
5332 | for (i = 0; i < btrfs_header_nritems(eb); i++) { | ||
5333 | btrfs_item_key_to_cpu(eb, &key, i); | ||
5334 | if (key.type != BTRFS_EXTENT_DATA_KEY) | ||
5335 | continue; | ||
5336 | item = btrfs_item_ptr(eb, i, struct btrfs_file_extent_item); | ||
5337 | found_type = btrfs_file_extent_type(eb, item); | ||
5338 | if (found_type == BTRFS_FILE_EXTENT_INLINE) | ||
5339 | continue; | ||
5340 | if (btrfs_file_extent_disk_bytenr(eb, item) == 0) | ||
5341 | continue; | ||
5342 | key.objectid = btrfs_file_extent_disk_bytenr(eb, item); | ||
5343 | key.offset = btrfs_file_extent_disk_num_bytes(eb, item); | ||
5344 | __exclude_logged_extent(log, key.objectid, key.offset); | ||
5345 | } | ||
5346 | |||
5347 | return 0; | ||
5348 | } | ||
5349 | |||
5192 | /** | 5350 | /** |
5193 | * btrfs_update_reserved_bytes - update the block_group and space info counters | 5351 | * btrfs_update_reserved_bytes - update the block_group and space info counters |
5194 | * @cache: The cache we are manipulating | 5352 | * @cache: The cache we are manipulating |
@@ -5251,6 +5409,7 @@ void btrfs_prepare_extent_commit(struct btrfs_trans_handle *trans, | |||
5251 | struct btrfs_caching_control *next; | 5409 | struct btrfs_caching_control *next; |
5252 | struct btrfs_caching_control *caching_ctl; | 5410 | struct btrfs_caching_control *caching_ctl; |
5253 | struct btrfs_block_group_cache *cache; | 5411 | struct btrfs_block_group_cache *cache; |
5412 | struct btrfs_space_info *space_info; | ||
5254 | 5413 | ||
5255 | down_write(&fs_info->extent_commit_sem); | 5414 | down_write(&fs_info->extent_commit_sem); |
5256 | 5415 | ||
@@ -5273,6 +5432,9 @@ void btrfs_prepare_extent_commit(struct btrfs_trans_handle *trans, | |||
5273 | 5432 | ||
5274 | up_write(&fs_info->extent_commit_sem); | 5433 | up_write(&fs_info->extent_commit_sem); |
5275 | 5434 | ||
5435 | list_for_each_entry_rcu(space_info, &fs_info->space_info, list) | ||
5436 | percpu_counter_set(&space_info->total_bytes_pinned, 0); | ||
5437 | |||
5276 | update_global_block_rsv(fs_info); | 5438 | update_global_block_rsv(fs_info); |
5277 | } | 5439 | } |
5278 | 5440 | ||
@@ -5370,6 +5532,27 @@ int btrfs_finish_extent_commit(struct btrfs_trans_handle *trans, | |||
5370 | return 0; | 5532 | return 0; |
5371 | } | 5533 | } |
5372 | 5534 | ||
5535 | static void add_pinned_bytes(struct btrfs_fs_info *fs_info, u64 num_bytes, | ||
5536 | u64 owner, u64 root_objectid) | ||
5537 | { | ||
5538 | struct btrfs_space_info *space_info; | ||
5539 | u64 flags; | ||
5540 | |||
5541 | if (owner < BTRFS_FIRST_FREE_OBJECTID) { | ||
5542 | if (root_objectid == BTRFS_CHUNK_TREE_OBJECTID) | ||
5543 | flags = BTRFS_BLOCK_GROUP_SYSTEM; | ||
5544 | else | ||
5545 | flags = BTRFS_BLOCK_GROUP_METADATA; | ||
5546 | } else { | ||
5547 | flags = BTRFS_BLOCK_GROUP_DATA; | ||
5548 | } | ||
5549 | |||
5550 | space_info = __find_space_info(fs_info, flags); | ||
5551 | BUG_ON(!space_info); /* Logic bug */ | ||
5552 | percpu_counter_add(&space_info->total_bytes_pinned, num_bytes); | ||
5553 | } | ||
5554 | |||
5555 | |||
5373 | static int __btrfs_free_extent(struct btrfs_trans_handle *trans, | 5556 | static int __btrfs_free_extent(struct btrfs_trans_handle *trans, |
5374 | struct btrfs_root *root, | 5557 | struct btrfs_root *root, |
5375 | u64 bytenr, u64 num_bytes, u64 parent, | 5558 | u64 bytenr, u64 num_bytes, u64 parent, |
@@ -5590,6 +5773,8 @@ static int __btrfs_free_extent(struct btrfs_trans_handle *trans, | |||
5590 | goto out; | 5773 | goto out; |
5591 | } | 5774 | } |
5592 | } | 5775 | } |
5776 | add_pinned_bytes(root->fs_info, -num_bytes, owner_objectid, | ||
5777 | root_objectid); | ||
5593 | } else { | 5778 | } else { |
5594 | if (found_extent) { | 5779 | if (found_extent) { |
5595 | BUG_ON(is_data && refs_to_drop != | 5780 | BUG_ON(is_data && refs_to_drop != |
@@ -5713,6 +5898,7 @@ void btrfs_free_tree_block(struct btrfs_trans_handle *trans, | |||
5713 | u64 parent, int last_ref) | 5898 | u64 parent, int last_ref) |
5714 | { | 5899 | { |
5715 | struct btrfs_block_group_cache *cache = NULL; | 5900 | struct btrfs_block_group_cache *cache = NULL; |
5901 | int pin = 1; | ||
5716 | int ret; | 5902 | int ret; |
5717 | 5903 | ||
5718 | if (root->root_key.objectid != BTRFS_TREE_LOG_OBJECTID) { | 5904 | if (root->root_key.objectid != BTRFS_TREE_LOG_OBJECTID) { |
@@ -5745,8 +5931,14 @@ void btrfs_free_tree_block(struct btrfs_trans_handle *trans, | |||
5745 | 5931 | ||
5746 | btrfs_add_free_space(cache, buf->start, buf->len); | 5932 | btrfs_add_free_space(cache, buf->start, buf->len); |
5747 | btrfs_update_reserved_bytes(cache, buf->len, RESERVE_FREE); | 5933 | btrfs_update_reserved_bytes(cache, buf->len, RESERVE_FREE); |
5934 | pin = 0; | ||
5748 | } | 5935 | } |
5749 | out: | 5936 | out: |
5937 | if (pin) | ||
5938 | add_pinned_bytes(root->fs_info, buf->len, | ||
5939 | btrfs_header_level(buf), | ||
5940 | root->root_key.objectid); | ||
5941 | |||
5750 | /* | 5942 | /* |
5751 | * Deleting the buffer, clear the corrupt flag since it doesn't matter | 5943 | * Deleting the buffer, clear the corrupt flag since it doesn't matter |
5752 | * anymore. | 5944 | * anymore. |
@@ -5763,6 +5955,8 @@ int btrfs_free_extent(struct btrfs_trans_handle *trans, struct btrfs_root *root, | |||
5763 | int ret; | 5955 | int ret; |
5764 | struct btrfs_fs_info *fs_info = root->fs_info; | 5956 | struct btrfs_fs_info *fs_info = root->fs_info; |
5765 | 5957 | ||
5958 | add_pinned_bytes(root->fs_info, num_bytes, owner, root_objectid); | ||
5959 | |||
5766 | /* | 5960 | /* |
5767 | * tree log blocks never actually go into the extent allocation | 5961 | * tree log blocks never actually go into the extent allocation |
5768 | * tree, just update pinning info and exit early. | 5962 | * tree, just update pinning info and exit early. |
@@ -6560,52 +6754,26 @@ int btrfs_alloc_logged_file_extent(struct btrfs_trans_handle *trans, | |||
6560 | { | 6754 | { |
6561 | int ret; | 6755 | int ret; |
6562 | struct btrfs_block_group_cache *block_group; | 6756 | struct btrfs_block_group_cache *block_group; |
6563 | struct btrfs_caching_control *caching_ctl; | ||
6564 | u64 start = ins->objectid; | ||
6565 | u64 num_bytes = ins->offset; | ||
6566 | |||
6567 | block_group = btrfs_lookup_block_group(root->fs_info, ins->objectid); | ||
6568 | cache_block_group(block_group, 0); | ||
6569 | caching_ctl = get_caching_control(block_group); | ||
6570 | |||
6571 | if (!caching_ctl) { | ||
6572 | BUG_ON(!block_group_cache_done(block_group)); | ||
6573 | ret = btrfs_remove_free_space(block_group, start, num_bytes); | ||
6574 | if (ret) | ||
6575 | goto out; | ||
6576 | } else { | ||
6577 | mutex_lock(&caching_ctl->mutex); | ||
6578 | |||
6579 | if (start >= caching_ctl->progress) { | ||
6580 | ret = add_excluded_extent(root, start, num_bytes); | ||
6581 | } else if (start + num_bytes <= caching_ctl->progress) { | ||
6582 | ret = btrfs_remove_free_space(block_group, | ||
6583 | start, num_bytes); | ||
6584 | } else { | ||
6585 | num_bytes = caching_ctl->progress - start; | ||
6586 | ret = btrfs_remove_free_space(block_group, | ||
6587 | start, num_bytes); | ||
6588 | if (ret) | ||
6589 | goto out_lock; | ||
6590 | 6757 | ||
6591 | start = caching_ctl->progress; | 6758 | /* |
6592 | num_bytes = ins->objectid + ins->offset - | 6759 | * Mixed block groups will exclude before processing the log so we only |
6593 | caching_ctl->progress; | 6760 | * need to do the exlude dance if this fs isn't mixed. |
6594 | ret = add_excluded_extent(root, start, num_bytes); | 6761 | */ |
6595 | } | 6762 | if (!btrfs_fs_incompat(root->fs_info, MIXED_GROUPS)) { |
6596 | out_lock: | 6763 | ret = __exclude_logged_extent(root, ins->objectid, ins->offset); |
6597 | mutex_unlock(&caching_ctl->mutex); | ||
6598 | put_caching_control(caching_ctl); | ||
6599 | if (ret) | 6764 | if (ret) |
6600 | goto out; | 6765 | return ret; |
6601 | } | 6766 | } |
6602 | 6767 | ||
6768 | block_group = btrfs_lookup_block_group(root->fs_info, ins->objectid); | ||
6769 | if (!block_group) | ||
6770 | return -EINVAL; | ||
6771 | |||
6603 | ret = btrfs_update_reserved_bytes(block_group, ins->offset, | 6772 | ret = btrfs_update_reserved_bytes(block_group, ins->offset, |
6604 | RESERVE_ALLOC_NO_ACCOUNT); | 6773 | RESERVE_ALLOC_NO_ACCOUNT); |
6605 | BUG_ON(ret); /* logic error */ | 6774 | BUG_ON(ret); /* logic error */ |
6606 | ret = alloc_reserved_file_extent(trans, root, 0, root_objectid, | 6775 | ret = alloc_reserved_file_extent(trans, root, 0, root_objectid, |
6607 | 0, owner, offset, ins, 1); | 6776 | 0, owner, offset, ins, 1); |
6608 | out: | ||
6609 | btrfs_put_block_group(block_group); | 6777 | btrfs_put_block_group(block_group); |
6610 | return ret; | 6778 | return ret; |
6611 | } | 6779 | } |
@@ -7298,6 +7466,7 @@ int btrfs_drop_snapshot(struct btrfs_root *root, | |||
7298 | int err = 0; | 7466 | int err = 0; |
7299 | int ret; | 7467 | int ret; |
7300 | int level; | 7468 | int level; |
7469 | bool root_dropped = false; | ||
7301 | 7470 | ||
7302 | path = btrfs_alloc_path(); | 7471 | path = btrfs_alloc_path(); |
7303 | if (!path) { | 7472 | if (!path) { |
@@ -7355,6 +7524,7 @@ int btrfs_drop_snapshot(struct btrfs_root *root, | |||
7355 | while (1) { | 7524 | while (1) { |
7356 | btrfs_tree_lock(path->nodes[level]); | 7525 | btrfs_tree_lock(path->nodes[level]); |
7357 | btrfs_set_lock_blocking(path->nodes[level]); | 7526 | btrfs_set_lock_blocking(path->nodes[level]); |
7527 | path->locks[level] = BTRFS_WRITE_LOCK_BLOCKING; | ||
7358 | 7528 | ||
7359 | ret = btrfs_lookup_extent_info(trans, root, | 7529 | ret = btrfs_lookup_extent_info(trans, root, |
7360 | path->nodes[level]->start, | 7530 | path->nodes[level]->start, |
@@ -7370,6 +7540,7 @@ int btrfs_drop_snapshot(struct btrfs_root *root, | |||
7370 | break; | 7540 | break; |
7371 | 7541 | ||
7372 | btrfs_tree_unlock(path->nodes[level]); | 7542 | btrfs_tree_unlock(path->nodes[level]); |
7543 | path->locks[level] = 0; | ||
7373 | WARN_ON(wc->refs[level] != 1); | 7544 | WARN_ON(wc->refs[level] != 1); |
7374 | level--; | 7545 | level--; |
7375 | } | 7546 | } |
@@ -7384,11 +7555,6 @@ int btrfs_drop_snapshot(struct btrfs_root *root, | |||
7384 | wc->reada_count = BTRFS_NODEPTRS_PER_BLOCK(root); | 7555 | wc->reada_count = BTRFS_NODEPTRS_PER_BLOCK(root); |
7385 | 7556 | ||
7386 | while (1) { | 7557 | while (1) { |
7387 | if (!for_reloc && btrfs_fs_closing(root->fs_info)) { | ||
7388 | pr_debug("btrfs: drop snapshot early exit\n"); | ||
7389 | err = -EAGAIN; | ||
7390 | goto out_end_trans; | ||
7391 | } | ||
7392 | 7558 | ||
7393 | ret = walk_down_tree(trans, root, path, wc); | 7559 | ret = walk_down_tree(trans, root, path, wc); |
7394 | if (ret < 0) { | 7560 | if (ret < 0) { |
@@ -7416,7 +7582,8 @@ int btrfs_drop_snapshot(struct btrfs_root *root, | |||
7416 | } | 7582 | } |
7417 | 7583 | ||
7418 | BUG_ON(wc->level == 0); | 7584 | BUG_ON(wc->level == 0); |
7419 | if (btrfs_should_end_transaction(trans, tree_root)) { | 7585 | if (btrfs_should_end_transaction(trans, tree_root) || |
7586 | (!for_reloc && btrfs_need_cleaner_sleep(root))) { | ||
7420 | ret = btrfs_update_root(trans, tree_root, | 7587 | ret = btrfs_update_root(trans, tree_root, |
7421 | &root->root_key, | 7588 | &root->root_key, |
7422 | root_item); | 7589 | root_item); |
@@ -7427,6 +7594,12 @@ int btrfs_drop_snapshot(struct btrfs_root *root, | |||
7427 | } | 7594 | } |
7428 | 7595 | ||
7429 | btrfs_end_transaction_throttle(trans, tree_root); | 7596 | btrfs_end_transaction_throttle(trans, tree_root); |
7597 | if (!for_reloc && btrfs_need_cleaner_sleep(root)) { | ||
7598 | pr_debug("btrfs: drop snapshot early exit\n"); | ||
7599 | err = -EAGAIN; | ||
7600 | goto out_free; | ||
7601 | } | ||
7602 | |||
7430 | trans = btrfs_start_transaction(tree_root, 0); | 7603 | trans = btrfs_start_transaction(tree_root, 0); |
7431 | if (IS_ERR(trans)) { | 7604 | if (IS_ERR(trans)) { |
7432 | err = PTR_ERR(trans); | 7605 | err = PTR_ERR(trans); |
@@ -7447,8 +7620,8 @@ int btrfs_drop_snapshot(struct btrfs_root *root, | |||
7447 | } | 7620 | } |
7448 | 7621 | ||
7449 | if (root->root_key.objectid != BTRFS_TREE_RELOC_OBJECTID) { | 7622 | if (root->root_key.objectid != BTRFS_TREE_RELOC_OBJECTID) { |
7450 | ret = btrfs_find_last_root(tree_root, root->root_key.objectid, | 7623 | ret = btrfs_find_root(tree_root, &root->root_key, path, |
7451 | NULL, NULL); | 7624 | NULL, NULL); |
7452 | if (ret < 0) { | 7625 | if (ret < 0) { |
7453 | btrfs_abort_transaction(trans, tree_root, ret); | 7626 | btrfs_abort_transaction(trans, tree_root, ret); |
7454 | err = ret; | 7627 | err = ret; |
@@ -7465,18 +7638,28 @@ int btrfs_drop_snapshot(struct btrfs_root *root, | |||
7465 | } | 7638 | } |
7466 | 7639 | ||
7467 | if (root->in_radix) { | 7640 | if (root->in_radix) { |
7468 | btrfs_free_fs_root(tree_root->fs_info, root); | 7641 | btrfs_drop_and_free_fs_root(tree_root->fs_info, root); |
7469 | } else { | 7642 | } else { |
7470 | free_extent_buffer(root->node); | 7643 | free_extent_buffer(root->node); |
7471 | free_extent_buffer(root->commit_root); | 7644 | free_extent_buffer(root->commit_root); |
7472 | kfree(root); | 7645 | btrfs_put_fs_root(root); |
7473 | } | 7646 | } |
7647 | root_dropped = true; | ||
7474 | out_end_trans: | 7648 | out_end_trans: |
7475 | btrfs_end_transaction_throttle(trans, tree_root); | 7649 | btrfs_end_transaction_throttle(trans, tree_root); |
7476 | out_free: | 7650 | out_free: |
7477 | kfree(wc); | 7651 | kfree(wc); |
7478 | btrfs_free_path(path); | 7652 | btrfs_free_path(path); |
7479 | out: | 7653 | out: |
7654 | /* | ||
7655 | * So if we need to stop dropping the snapshot for whatever reason we | ||
7656 | * need to make sure to add it back to the dead root list so that we | ||
7657 | * keep trying to do the work later. This also cleans up roots if we | ||
7658 | * don't have it in the radix (like when we recover after a power fail | ||
7659 | * or unmount) so we don't leak memory. | ||
7660 | */ | ||
7661 | if (root_dropped == false) | ||
7662 | btrfs_add_dead_root(root); | ||
7480 | if (err) | 7663 | if (err) |
7481 | btrfs_std_error(root->fs_info, err); | 7664 | btrfs_std_error(root->fs_info, err); |
7482 | return err; | 7665 | return err; |
@@ -7782,6 +7965,7 @@ int btrfs_can_relocate(struct btrfs_root *root, u64 bytenr) | |||
7782 | struct btrfs_space_info *space_info; | 7965 | struct btrfs_space_info *space_info; |
7783 | struct btrfs_fs_devices *fs_devices = root->fs_info->fs_devices; | 7966 | struct btrfs_fs_devices *fs_devices = root->fs_info->fs_devices; |
7784 | struct btrfs_device *device; | 7967 | struct btrfs_device *device; |
7968 | struct btrfs_trans_handle *trans; | ||
7785 | u64 min_free; | 7969 | u64 min_free; |
7786 | u64 dev_min = 1; | 7970 | u64 dev_min = 1; |
7787 | u64 dev_nr = 0; | 7971 | u64 dev_nr = 0; |
@@ -7868,6 +8052,13 @@ int btrfs_can_relocate(struct btrfs_root *root, u64 bytenr) | |||
7868 | do_div(min_free, dev_min); | 8052 | do_div(min_free, dev_min); |
7869 | } | 8053 | } |
7870 | 8054 | ||
8055 | /* We need to do this so that we can look at pending chunks */ | ||
8056 | trans = btrfs_join_transaction(root); | ||
8057 | if (IS_ERR(trans)) { | ||
8058 | ret = PTR_ERR(trans); | ||
8059 | goto out; | ||
8060 | } | ||
8061 | |||
7871 | mutex_lock(&root->fs_info->chunk_mutex); | 8062 | mutex_lock(&root->fs_info->chunk_mutex); |
7872 | list_for_each_entry(device, &fs_devices->alloc_list, dev_alloc_list) { | 8063 | list_for_each_entry(device, &fs_devices->alloc_list, dev_alloc_list) { |
7873 | u64 dev_offset; | 8064 | u64 dev_offset; |
@@ -7878,7 +8069,7 @@ int btrfs_can_relocate(struct btrfs_root *root, u64 bytenr) | |||
7878 | */ | 8069 | */ |
7879 | if (device->total_bytes > device->bytes_used + min_free && | 8070 | if (device->total_bytes > device->bytes_used + min_free && |
7880 | !device->is_tgtdev_for_dev_replace) { | 8071 | !device->is_tgtdev_for_dev_replace) { |
7881 | ret = find_free_dev_extent(device, min_free, | 8072 | ret = find_free_dev_extent(trans, device, min_free, |
7882 | &dev_offset, NULL); | 8073 | &dev_offset, NULL); |
7883 | if (!ret) | 8074 | if (!ret) |
7884 | dev_nr++; | 8075 | dev_nr++; |
@@ -7890,6 +8081,7 @@ int btrfs_can_relocate(struct btrfs_root *root, u64 bytenr) | |||
7890 | } | 8081 | } |
7891 | } | 8082 | } |
7892 | mutex_unlock(&root->fs_info->chunk_mutex); | 8083 | mutex_unlock(&root->fs_info->chunk_mutex); |
8084 | btrfs_end_transaction(trans, root); | ||
7893 | out: | 8085 | out: |
7894 | btrfs_put_block_group(block_group); | 8086 | btrfs_put_block_group(block_group); |
7895 | return ret; | 8087 | return ret; |
@@ -8032,6 +8224,7 @@ int btrfs_free_block_groups(struct btrfs_fs_info *info) | |||
8032 | dump_space_info(space_info, 0, 0); | 8224 | dump_space_info(space_info, 0, 0); |
8033 | } | 8225 | } |
8034 | } | 8226 | } |
8227 | percpu_counter_destroy(&space_info->total_bytes_pinned); | ||
8035 | list_del(&space_info->list); | 8228 | list_del(&space_info->list); |
8036 | kfree(space_info); | 8229 | kfree(space_info); |
8037 | } | 8230 | } |
@@ -8254,6 +8447,10 @@ void btrfs_create_pending_block_groups(struct btrfs_trans_handle *trans, | |||
8254 | sizeof(item)); | 8447 | sizeof(item)); |
8255 | if (ret) | 8448 | if (ret) |
8256 | btrfs_abort_transaction(trans, extent_root, ret); | 8449 | btrfs_abort_transaction(trans, extent_root, ret); |
8450 | ret = btrfs_finish_chunk_alloc(trans, extent_root, | ||
8451 | key.objectid, key.offset); | ||
8452 | if (ret) | ||
8453 | btrfs_abort_transaction(trans, extent_root, ret); | ||
8257 | } | 8454 | } |
8258 | } | 8455 | } |
8259 | 8456 | ||
@@ -8591,8 +8788,15 @@ int btrfs_trim_fs(struct btrfs_root *root, struct fstrim_range *range) | |||
8591 | if (end - start >= range->minlen) { | 8788 | if (end - start >= range->minlen) { |
8592 | if (!block_group_cache_done(cache)) { | 8789 | if (!block_group_cache_done(cache)) { |
8593 | ret = cache_block_group(cache, 0); | 8790 | ret = cache_block_group(cache, 0); |
8594 | if (!ret) | 8791 | if (ret) { |
8595 | wait_block_group_cache_done(cache); | 8792 | btrfs_put_block_group(cache); |
8793 | break; | ||
8794 | } | ||
8795 | ret = wait_block_group_cache_done(cache); | ||
8796 | if (ret) { | ||
8797 | btrfs_put_block_group(cache); | ||
8798 | break; | ||
8799 | } | ||
8596 | } | 8800 | } |
8597 | ret = btrfs_trim_block_group(cache, | 8801 | ret = btrfs_trim_block_group(cache, |
8598 | &group_trimmed, | 8802 | &group_trimmed, |
diff --git a/fs/btrfs/extent_io.c b/fs/btrfs/extent_io.c index e7e7afb4a872..fe443fece851 100644 --- a/fs/btrfs/extent_io.c +++ b/fs/btrfs/extent_io.c | |||
@@ -77,10 +77,29 @@ void btrfs_leak_debug_check(void) | |||
77 | kmem_cache_free(extent_buffer_cache, eb); | 77 | kmem_cache_free(extent_buffer_cache, eb); |
78 | } | 78 | } |
79 | } | 79 | } |
80 | |||
81 | #define btrfs_debug_check_extent_io_range(inode, start, end) \ | ||
82 | __btrfs_debug_check_extent_io_range(__func__, (inode), (start), (end)) | ||
83 | static inline void __btrfs_debug_check_extent_io_range(const char *caller, | ||
84 | struct inode *inode, u64 start, u64 end) | ||
85 | { | ||
86 | u64 isize = i_size_read(inode); | ||
87 | |||
88 | if (end >= PAGE_SIZE && (end % 2) == 0 && end != isize - 1) { | ||
89 | printk_ratelimited(KERN_DEBUG | ||
90 | "btrfs: %s: ino %llu isize %llu odd range [%llu,%llu]\n", | ||
91 | caller, | ||
92 | (unsigned long long)btrfs_ino(inode), | ||
93 | (unsigned long long)isize, | ||
94 | (unsigned long long)start, | ||
95 | (unsigned long long)end); | ||
96 | } | ||
97 | } | ||
80 | #else | 98 | #else |
81 | #define btrfs_leak_debug_add(new, head) do {} while (0) | 99 | #define btrfs_leak_debug_add(new, head) do {} while (0) |
82 | #define btrfs_leak_debug_del(entry) do {} while (0) | 100 | #define btrfs_leak_debug_del(entry) do {} while (0) |
83 | #define btrfs_leak_debug_check() do {} while (0) | 101 | #define btrfs_leak_debug_check() do {} while (0) |
102 | #define btrfs_debug_check_extent_io_range(c, s, e) do {} while (0) | ||
84 | #endif | 103 | #endif |
85 | 104 | ||
86 | #define BUFFER_LRU_MAX 64 | 105 | #define BUFFER_LRU_MAX 64 |
@@ -522,6 +541,11 @@ int clear_extent_bit(struct extent_io_tree *tree, u64 start, u64 end, | |||
522 | int err; | 541 | int err; |
523 | int clear = 0; | 542 | int clear = 0; |
524 | 543 | ||
544 | btrfs_debug_check_extent_io_range(tree->mapping->host, start, end); | ||
545 | |||
546 | if (bits & EXTENT_DELALLOC) | ||
547 | bits |= EXTENT_NORESERVE; | ||
548 | |||
525 | if (delete) | 549 | if (delete) |
526 | bits |= ~EXTENT_CTLBITS; | 550 | bits |= ~EXTENT_CTLBITS; |
527 | bits |= EXTENT_FIRST_DELALLOC; | 551 | bits |= EXTENT_FIRST_DELALLOC; |
@@ -677,6 +701,8 @@ static void wait_extent_bit(struct extent_io_tree *tree, u64 start, u64 end, | |||
677 | struct extent_state *state; | 701 | struct extent_state *state; |
678 | struct rb_node *node; | 702 | struct rb_node *node; |
679 | 703 | ||
704 | btrfs_debug_check_extent_io_range(tree->mapping->host, start, end); | ||
705 | |||
680 | spin_lock(&tree->lock); | 706 | spin_lock(&tree->lock); |
681 | again: | 707 | again: |
682 | while (1) { | 708 | while (1) { |
@@ -769,6 +795,8 @@ __set_extent_bit(struct extent_io_tree *tree, u64 start, u64 end, | |||
769 | u64 last_start; | 795 | u64 last_start; |
770 | u64 last_end; | 796 | u64 last_end; |
771 | 797 | ||
798 | btrfs_debug_check_extent_io_range(tree->mapping->host, start, end); | ||
799 | |||
772 | bits |= EXTENT_FIRST_DELALLOC; | 800 | bits |= EXTENT_FIRST_DELALLOC; |
773 | again: | 801 | again: |
774 | if (!prealloc && (mask & __GFP_WAIT)) { | 802 | if (!prealloc && (mask & __GFP_WAIT)) { |
@@ -989,6 +1017,8 @@ int convert_extent_bit(struct extent_io_tree *tree, u64 start, u64 end, | |||
989 | u64 last_start; | 1017 | u64 last_start; |
990 | u64 last_end; | 1018 | u64 last_end; |
991 | 1019 | ||
1020 | btrfs_debug_check_extent_io_range(tree->mapping->host, start, end); | ||
1021 | |||
992 | again: | 1022 | again: |
993 | if (!prealloc && (mask & __GFP_WAIT)) { | 1023 | if (!prealloc && (mask & __GFP_WAIT)) { |
994 | prealloc = alloc_extent_state(mask); | 1024 | prealloc = alloc_extent_state(mask); |
@@ -2450,11 +2480,12 @@ static void end_bio_extent_readpage(struct bio *bio, int err) | |||
2450 | struct extent_state *cached = NULL; | 2480 | struct extent_state *cached = NULL; |
2451 | struct extent_state *state; | 2481 | struct extent_state *state; |
2452 | struct btrfs_io_bio *io_bio = btrfs_io_bio(bio); | 2482 | struct btrfs_io_bio *io_bio = btrfs_io_bio(bio); |
2483 | struct inode *inode = page->mapping->host; | ||
2453 | 2484 | ||
2454 | pr_debug("end_bio_extent_readpage: bi_sector=%llu, err=%d, " | 2485 | pr_debug("end_bio_extent_readpage: bi_sector=%llu, err=%d, " |
2455 | "mirror=%lu\n", (u64)bio->bi_sector, err, | 2486 | "mirror=%lu\n", (u64)bio->bi_sector, err, |
2456 | io_bio->mirror_num); | 2487 | io_bio->mirror_num); |
2457 | tree = &BTRFS_I(page->mapping->host)->io_tree; | 2488 | tree = &BTRFS_I(inode)->io_tree; |
2458 | 2489 | ||
2459 | /* We always issue full-page reads, but if some block | 2490 | /* We always issue full-page reads, but if some block |
2460 | * in a page fails to read, blk_update_request() will | 2491 | * in a page fails to read, blk_update_request() will |
@@ -2528,6 +2559,14 @@ static void end_bio_extent_readpage(struct bio *bio, int err) | |||
2528 | unlock_extent_cached(tree, start, end, &cached, GFP_ATOMIC); | 2559 | unlock_extent_cached(tree, start, end, &cached, GFP_ATOMIC); |
2529 | 2560 | ||
2530 | if (uptodate) { | 2561 | if (uptodate) { |
2562 | loff_t i_size = i_size_read(inode); | ||
2563 | pgoff_t end_index = i_size >> PAGE_CACHE_SHIFT; | ||
2564 | unsigned offset; | ||
2565 | |||
2566 | /* Zero out the end if this page straddles i_size */ | ||
2567 | offset = i_size & (PAGE_CACHE_SIZE-1); | ||
2568 | if (page->index == end_index && offset) | ||
2569 | zero_user_segment(page, offset, PAGE_CACHE_SIZE); | ||
2531 | SetPageUptodate(page); | 2570 | SetPageUptodate(page); |
2532 | } else { | 2571 | } else { |
2533 | ClearPageUptodate(page); | 2572 | ClearPageUptodate(page); |
@@ -2957,7 +2996,7 @@ static int __extent_writepage(struct page *page, struct writeback_control *wbc, | |||
2957 | pg_offset = i_size & (PAGE_CACHE_SIZE - 1); | 2996 | pg_offset = i_size & (PAGE_CACHE_SIZE - 1); |
2958 | if (page->index > end_index || | 2997 | if (page->index > end_index || |
2959 | (page->index == end_index && !pg_offset)) { | 2998 | (page->index == end_index && !pg_offset)) { |
2960 | page->mapping->a_ops->invalidatepage(page, 0); | 2999 | page->mapping->a_ops->invalidatepage(page, 0, PAGE_CACHE_SIZE); |
2961 | unlock_page(page); | 3000 | unlock_page(page); |
2962 | return 0; | 3001 | return 0; |
2963 | } | 3002 | } |
@@ -4009,7 +4048,7 @@ int extent_fiemap(struct inode *inode, struct fiemap_extent_info *fieinfo, | |||
4009 | } | 4048 | } |
4010 | 4049 | ||
4011 | while (!end) { | 4050 | while (!end) { |
4012 | u64 offset_in_extent; | 4051 | u64 offset_in_extent = 0; |
4013 | 4052 | ||
4014 | /* break if the extent we found is outside the range */ | 4053 | /* break if the extent we found is outside the range */ |
4015 | if (em->start >= max || extent_map_end(em) < off) | 4054 | if (em->start >= max || extent_map_end(em) < off) |
@@ -4025,9 +4064,12 @@ int extent_fiemap(struct inode *inode, struct fiemap_extent_info *fieinfo, | |||
4025 | 4064 | ||
4026 | /* | 4065 | /* |
4027 | * record the offset from the start of the extent | 4066 | * record the offset from the start of the extent |
4028 | * for adjusting the disk offset below | 4067 | * for adjusting the disk offset below. Only do this if the |
4068 | * extent isn't compressed since our in ram offset may be past | ||
4069 | * what we have actually allocated on disk. | ||
4029 | */ | 4070 | */ |
4030 | offset_in_extent = em_start - em->start; | 4071 | if (!test_bit(EXTENT_FLAG_COMPRESSED, &em->flags)) |
4072 | offset_in_extent = em_start - em->start; | ||
4031 | em_end = extent_map_end(em); | 4073 | em_end = extent_map_end(em); |
4032 | em_len = em_end - em_start; | 4074 | em_len = em_end - em_start; |
4033 | emflags = em->flags; | 4075 | emflags = em->flags; |
diff --git a/fs/btrfs/extent_io.h b/fs/btrfs/extent_io.h index 41fb81e7ec53..3b8c4e26e1da 100644 --- a/fs/btrfs/extent_io.h +++ b/fs/btrfs/extent_io.h | |||
@@ -19,6 +19,7 @@ | |||
19 | #define EXTENT_FIRST_DELALLOC (1 << 12) | 19 | #define EXTENT_FIRST_DELALLOC (1 << 12) |
20 | #define EXTENT_NEED_WAIT (1 << 13) | 20 | #define EXTENT_NEED_WAIT (1 << 13) |
21 | #define EXTENT_DAMAGED (1 << 14) | 21 | #define EXTENT_DAMAGED (1 << 14) |
22 | #define EXTENT_NORESERVE (1 << 15) | ||
22 | #define EXTENT_IOBITS (EXTENT_LOCKED | EXTENT_WRITEBACK) | 23 | #define EXTENT_IOBITS (EXTENT_LOCKED | EXTENT_WRITEBACK) |
23 | #define EXTENT_CTLBITS (EXTENT_DO_ACCOUNTING | EXTENT_FIRST_DELALLOC) | 24 | #define EXTENT_CTLBITS (EXTENT_DO_ACCOUNTING | EXTENT_FIRST_DELALLOC) |
24 | 25 | ||
diff --git a/fs/btrfs/file-item.c b/fs/btrfs/file-item.c index b193bf324a41..a7bfc9541803 100644 --- a/fs/btrfs/file-item.c +++ b/fs/btrfs/file-item.c | |||
@@ -34,8 +34,7 @@ | |||
34 | 34 | ||
35 | #define MAX_ORDERED_SUM_BYTES(r) ((PAGE_SIZE - \ | 35 | #define MAX_ORDERED_SUM_BYTES(r) ((PAGE_SIZE - \ |
36 | sizeof(struct btrfs_ordered_sum)) / \ | 36 | sizeof(struct btrfs_ordered_sum)) / \ |
37 | sizeof(struct btrfs_sector_sum) * \ | 37 | sizeof(u32) * (r)->sectorsize) |
38 | (r)->sectorsize - (r)->sectorsize) | ||
39 | 38 | ||
40 | int btrfs_insert_file_extent(struct btrfs_trans_handle *trans, | 39 | int btrfs_insert_file_extent(struct btrfs_trans_handle *trans, |
41 | struct btrfs_root *root, | 40 | struct btrfs_root *root, |
@@ -297,7 +296,6 @@ int btrfs_lookup_csums_range(struct btrfs_root *root, u64 start, u64 end, | |||
297 | struct btrfs_path *path; | 296 | struct btrfs_path *path; |
298 | struct extent_buffer *leaf; | 297 | struct extent_buffer *leaf; |
299 | struct btrfs_ordered_sum *sums; | 298 | struct btrfs_ordered_sum *sums; |
300 | struct btrfs_sector_sum *sector_sum; | ||
301 | struct btrfs_csum_item *item; | 299 | struct btrfs_csum_item *item; |
302 | LIST_HEAD(tmplist); | 300 | LIST_HEAD(tmplist); |
303 | unsigned long offset; | 301 | unsigned long offset; |
@@ -368,34 +366,28 @@ int btrfs_lookup_csums_range(struct btrfs_root *root, u64 start, u64 end, | |||
368 | struct btrfs_csum_item); | 366 | struct btrfs_csum_item); |
369 | while (start < csum_end) { | 367 | while (start < csum_end) { |
370 | size = min_t(size_t, csum_end - start, | 368 | size = min_t(size_t, csum_end - start, |
371 | MAX_ORDERED_SUM_BYTES(root)); | 369 | MAX_ORDERED_SUM_BYTES(root)); |
372 | sums = kzalloc(btrfs_ordered_sum_size(root, size), | 370 | sums = kzalloc(btrfs_ordered_sum_size(root, size), |
373 | GFP_NOFS); | 371 | GFP_NOFS); |
374 | if (!sums) { | 372 | if (!sums) { |
375 | ret = -ENOMEM; | 373 | ret = -ENOMEM; |
376 | goto fail; | 374 | goto fail; |
377 | } | 375 | } |
378 | 376 | ||
379 | sector_sum = sums->sums; | ||
380 | sums->bytenr = start; | 377 | sums->bytenr = start; |
381 | sums->len = size; | 378 | sums->len = (int)size; |
382 | 379 | ||
383 | offset = (start - key.offset) >> | 380 | offset = (start - key.offset) >> |
384 | root->fs_info->sb->s_blocksize_bits; | 381 | root->fs_info->sb->s_blocksize_bits; |
385 | offset *= csum_size; | 382 | offset *= csum_size; |
383 | size >>= root->fs_info->sb->s_blocksize_bits; | ||
386 | 384 | ||
387 | while (size > 0) { | 385 | read_extent_buffer(path->nodes[0], |
388 | read_extent_buffer(path->nodes[0], | 386 | sums->sums, |
389 | §or_sum->sum, | 387 | ((unsigned long)item) + offset, |
390 | ((unsigned long)item) + | 388 | csum_size * size); |
391 | offset, csum_size); | 389 | |
392 | sector_sum->bytenr = start; | 390 | start += root->sectorsize * size; |
393 | |||
394 | size -= root->sectorsize; | ||
395 | start += root->sectorsize; | ||
396 | offset += csum_size; | ||
397 | sector_sum++; | ||
398 | } | ||
399 | list_add_tail(&sums->list, &tmplist); | 391 | list_add_tail(&sums->list, &tmplist); |
400 | } | 392 | } |
401 | path->slots[0]++; | 393 | path->slots[0]++; |
@@ -417,23 +409,20 @@ int btrfs_csum_one_bio(struct btrfs_root *root, struct inode *inode, | |||
417 | struct bio *bio, u64 file_start, int contig) | 409 | struct bio *bio, u64 file_start, int contig) |
418 | { | 410 | { |
419 | struct btrfs_ordered_sum *sums; | 411 | struct btrfs_ordered_sum *sums; |
420 | struct btrfs_sector_sum *sector_sum; | ||
421 | struct btrfs_ordered_extent *ordered; | 412 | struct btrfs_ordered_extent *ordered; |
422 | char *data; | 413 | char *data; |
423 | struct bio_vec *bvec = bio->bi_io_vec; | 414 | struct bio_vec *bvec = bio->bi_io_vec; |
424 | int bio_index = 0; | 415 | int bio_index = 0; |
416 | int index; | ||
425 | unsigned long total_bytes = 0; | 417 | unsigned long total_bytes = 0; |
426 | unsigned long this_sum_bytes = 0; | 418 | unsigned long this_sum_bytes = 0; |
427 | u64 offset; | 419 | u64 offset; |
428 | u64 disk_bytenr; | ||
429 | 420 | ||
430 | WARN_ON(bio->bi_vcnt <= 0); | 421 | WARN_ON(bio->bi_vcnt <= 0); |
431 | sums = kzalloc(btrfs_ordered_sum_size(root, bio->bi_size), GFP_NOFS); | 422 | sums = kzalloc(btrfs_ordered_sum_size(root, bio->bi_size), GFP_NOFS); |
432 | if (!sums) | 423 | if (!sums) |
433 | return -ENOMEM; | 424 | return -ENOMEM; |
434 | 425 | ||
435 | sector_sum = sums->sums; | ||
436 | disk_bytenr = (u64)bio->bi_sector << 9; | ||
437 | sums->len = bio->bi_size; | 426 | sums->len = bio->bi_size; |
438 | INIT_LIST_HEAD(&sums->list); | 427 | INIT_LIST_HEAD(&sums->list); |
439 | 428 | ||
@@ -444,7 +433,8 @@ int btrfs_csum_one_bio(struct btrfs_root *root, struct inode *inode, | |||
444 | 433 | ||
445 | ordered = btrfs_lookup_ordered_extent(inode, offset); | 434 | ordered = btrfs_lookup_ordered_extent(inode, offset); |
446 | BUG_ON(!ordered); /* Logic error */ | 435 | BUG_ON(!ordered); /* Logic error */ |
447 | sums->bytenr = ordered->start; | 436 | sums->bytenr = (u64)bio->bi_sector << 9; |
437 | index = 0; | ||
448 | 438 | ||
449 | while (bio_index < bio->bi_vcnt) { | 439 | while (bio_index < bio->bi_vcnt) { |
450 | if (!contig) | 440 | if (!contig) |
@@ -463,28 +453,27 @@ int btrfs_csum_one_bio(struct btrfs_root *root, struct inode *inode, | |||
463 | sums = kzalloc(btrfs_ordered_sum_size(root, bytes_left), | 453 | sums = kzalloc(btrfs_ordered_sum_size(root, bytes_left), |
464 | GFP_NOFS); | 454 | GFP_NOFS); |
465 | BUG_ON(!sums); /* -ENOMEM */ | 455 | BUG_ON(!sums); /* -ENOMEM */ |
466 | sector_sum = sums->sums; | ||
467 | sums->len = bytes_left; | 456 | sums->len = bytes_left; |
468 | ordered = btrfs_lookup_ordered_extent(inode, offset); | 457 | ordered = btrfs_lookup_ordered_extent(inode, offset); |
469 | BUG_ON(!ordered); /* Logic error */ | 458 | BUG_ON(!ordered); /* Logic error */ |
470 | sums->bytenr = ordered->start; | 459 | sums->bytenr = ((u64)bio->bi_sector << 9) + |
460 | total_bytes; | ||
461 | index = 0; | ||
471 | } | 462 | } |
472 | 463 | ||
473 | data = kmap_atomic(bvec->bv_page); | 464 | data = kmap_atomic(bvec->bv_page); |
474 | sector_sum->sum = ~(u32)0; | 465 | sums->sums[index] = ~(u32)0; |
475 | sector_sum->sum = btrfs_csum_data(data + bvec->bv_offset, | 466 | sums->sums[index] = btrfs_csum_data(data + bvec->bv_offset, |
476 | sector_sum->sum, | 467 | sums->sums[index], |
477 | bvec->bv_len); | 468 | bvec->bv_len); |
478 | kunmap_atomic(data); | 469 | kunmap_atomic(data); |
479 | btrfs_csum_final(sector_sum->sum, | 470 | btrfs_csum_final(sums->sums[index], |
480 | (char *)§or_sum->sum); | 471 | (char *)(sums->sums + index)); |
481 | sector_sum->bytenr = disk_bytenr; | ||
482 | 472 | ||
483 | sector_sum++; | ||
484 | bio_index++; | 473 | bio_index++; |
474 | index++; | ||
485 | total_bytes += bvec->bv_len; | 475 | total_bytes += bvec->bv_len; |
486 | this_sum_bytes += bvec->bv_len; | 476 | this_sum_bytes += bvec->bv_len; |
487 | disk_bytenr += bvec->bv_len; | ||
488 | offset += bvec->bv_len; | 477 | offset += bvec->bv_len; |
489 | bvec++; | 478 | bvec++; |
490 | } | 479 | } |
@@ -672,62 +661,46 @@ out: | |||
672 | return ret; | 661 | return ret; |
673 | } | 662 | } |
674 | 663 | ||
675 | static u64 btrfs_sector_sum_left(struct btrfs_ordered_sum *sums, | ||
676 | struct btrfs_sector_sum *sector_sum, | ||
677 | u64 total_bytes, u64 sectorsize) | ||
678 | { | ||
679 | u64 tmp = sectorsize; | ||
680 | u64 next_sector = sector_sum->bytenr; | ||
681 | struct btrfs_sector_sum *next = sector_sum + 1; | ||
682 | |||
683 | while ((tmp + total_bytes) < sums->len) { | ||
684 | if (next_sector + sectorsize != next->bytenr) | ||
685 | break; | ||
686 | tmp += sectorsize; | ||
687 | next_sector = next->bytenr; | ||
688 | next++; | ||
689 | } | ||
690 | return tmp; | ||
691 | } | ||
692 | |||
693 | int btrfs_csum_file_blocks(struct btrfs_trans_handle *trans, | 664 | int btrfs_csum_file_blocks(struct btrfs_trans_handle *trans, |
694 | struct btrfs_root *root, | 665 | struct btrfs_root *root, |
695 | struct btrfs_ordered_sum *sums) | 666 | struct btrfs_ordered_sum *sums) |
696 | { | 667 | { |
697 | u64 bytenr; | ||
698 | int ret; | ||
699 | struct btrfs_key file_key; | 668 | struct btrfs_key file_key; |
700 | struct btrfs_key found_key; | 669 | struct btrfs_key found_key; |
701 | u64 next_offset; | ||
702 | u64 total_bytes = 0; | ||
703 | int found_next; | ||
704 | struct btrfs_path *path; | 670 | struct btrfs_path *path; |
705 | struct btrfs_csum_item *item; | 671 | struct btrfs_csum_item *item; |
706 | struct btrfs_csum_item *item_end; | 672 | struct btrfs_csum_item *item_end; |
707 | struct extent_buffer *leaf = NULL; | 673 | struct extent_buffer *leaf = NULL; |
674 | u64 next_offset; | ||
675 | u64 total_bytes = 0; | ||
708 | u64 csum_offset; | 676 | u64 csum_offset; |
709 | struct btrfs_sector_sum *sector_sum; | 677 | u64 bytenr; |
710 | u32 nritems; | 678 | u32 nritems; |
711 | u32 ins_size; | 679 | u32 ins_size; |
680 | int index = 0; | ||
681 | int found_next; | ||
682 | int ret; | ||
712 | u16 csum_size = btrfs_super_csum_size(root->fs_info->super_copy); | 683 | u16 csum_size = btrfs_super_csum_size(root->fs_info->super_copy); |
713 | 684 | ||
714 | path = btrfs_alloc_path(); | 685 | path = btrfs_alloc_path(); |
715 | if (!path) | 686 | if (!path) |
716 | return -ENOMEM; | 687 | return -ENOMEM; |
717 | |||
718 | sector_sum = sums->sums; | ||
719 | again: | 688 | again: |
720 | next_offset = (u64)-1; | 689 | next_offset = (u64)-1; |
721 | found_next = 0; | 690 | found_next = 0; |
691 | bytenr = sums->bytenr + total_bytes; | ||
722 | file_key.objectid = BTRFS_EXTENT_CSUM_OBJECTID; | 692 | file_key.objectid = BTRFS_EXTENT_CSUM_OBJECTID; |
723 | file_key.offset = sector_sum->bytenr; | 693 | file_key.offset = bytenr; |
724 | bytenr = sector_sum->bytenr; | ||
725 | btrfs_set_key_type(&file_key, BTRFS_EXTENT_CSUM_KEY); | 694 | btrfs_set_key_type(&file_key, BTRFS_EXTENT_CSUM_KEY); |
726 | 695 | ||
727 | item = btrfs_lookup_csum(trans, root, path, sector_sum->bytenr, 1); | 696 | item = btrfs_lookup_csum(trans, root, path, bytenr, 1); |
728 | if (!IS_ERR(item)) { | 697 | if (!IS_ERR(item)) { |
729 | leaf = path->nodes[0]; | ||
730 | ret = 0; | 698 | ret = 0; |
699 | leaf = path->nodes[0]; | ||
700 | item_end = btrfs_item_ptr(leaf, path->slots[0], | ||
701 | struct btrfs_csum_item); | ||
702 | item_end = (struct btrfs_csum_item *)((char *)item_end + | ||
703 | btrfs_item_size_nr(leaf, path->slots[0])); | ||
731 | goto found; | 704 | goto found; |
732 | } | 705 | } |
733 | ret = PTR_ERR(item); | 706 | ret = PTR_ERR(item); |
@@ -807,8 +780,7 @@ again: | |||
807 | 780 | ||
808 | free_space = btrfs_leaf_free_space(root, leaf) - | 781 | free_space = btrfs_leaf_free_space(root, leaf) - |
809 | sizeof(struct btrfs_item) - csum_size; | 782 | sizeof(struct btrfs_item) - csum_size; |
810 | tmp = btrfs_sector_sum_left(sums, sector_sum, total_bytes, | 783 | tmp = sums->len - total_bytes; |
811 | root->sectorsize); | ||
812 | tmp >>= root->fs_info->sb->s_blocksize_bits; | 784 | tmp >>= root->fs_info->sb->s_blocksize_bits; |
813 | WARN_ON(tmp < 1); | 785 | WARN_ON(tmp < 1); |
814 | 786 | ||
@@ -822,6 +794,7 @@ again: | |||
822 | diff *= csum_size; | 794 | diff *= csum_size; |
823 | 795 | ||
824 | btrfs_extend_item(root, path, diff); | 796 | btrfs_extend_item(root, path, diff); |
797 | ret = 0; | ||
825 | goto csum; | 798 | goto csum; |
826 | } | 799 | } |
827 | 800 | ||
@@ -831,8 +804,7 @@ insert: | |||
831 | if (found_next) { | 804 | if (found_next) { |
832 | u64 tmp; | 805 | u64 tmp; |
833 | 806 | ||
834 | tmp = btrfs_sector_sum_left(sums, sector_sum, total_bytes, | 807 | tmp = sums->len - total_bytes; |
835 | root->sectorsize); | ||
836 | tmp >>= root->fs_info->sb->s_blocksize_bits; | 808 | tmp >>= root->fs_info->sb->s_blocksize_bits; |
837 | tmp = min(tmp, (next_offset - file_key.offset) >> | 809 | tmp = min(tmp, (next_offset - file_key.offset) >> |
838 | root->fs_info->sb->s_blocksize_bits); | 810 | root->fs_info->sb->s_blocksize_bits); |
@@ -853,31 +825,25 @@ insert: | |||
853 | WARN_ON(1); | 825 | WARN_ON(1); |
854 | goto fail_unlock; | 826 | goto fail_unlock; |
855 | } | 827 | } |
856 | csum: | ||
857 | leaf = path->nodes[0]; | 828 | leaf = path->nodes[0]; |
829 | csum: | ||
858 | item = btrfs_item_ptr(leaf, path->slots[0], struct btrfs_csum_item); | 830 | item = btrfs_item_ptr(leaf, path->slots[0], struct btrfs_csum_item); |
859 | ret = 0; | 831 | item_end = (struct btrfs_csum_item *)((unsigned char *)item + |
832 | btrfs_item_size_nr(leaf, path->slots[0])); | ||
860 | item = (struct btrfs_csum_item *)((unsigned char *)item + | 833 | item = (struct btrfs_csum_item *)((unsigned char *)item + |
861 | csum_offset * csum_size); | 834 | csum_offset * csum_size); |
862 | found: | 835 | found: |
863 | item_end = btrfs_item_ptr(leaf, path->slots[0], struct btrfs_csum_item); | 836 | ins_size = (u32)(sums->len - total_bytes) >> |
864 | item_end = (struct btrfs_csum_item *)((unsigned char *)item_end + | 837 | root->fs_info->sb->s_blocksize_bits; |
865 | btrfs_item_size_nr(leaf, path->slots[0])); | 838 | ins_size *= csum_size; |
866 | next_sector: | 839 | ins_size = min_t(u32, (unsigned long)item_end - (unsigned long)item, |
867 | 840 | ins_size); | |
868 | write_extent_buffer(leaf, §or_sum->sum, (unsigned long)item, csum_size); | 841 | write_extent_buffer(leaf, sums->sums + index, (unsigned long)item, |
869 | 842 | ins_size); | |
870 | total_bytes += root->sectorsize; | 843 | |
871 | sector_sum++; | 844 | ins_size /= csum_size; |
872 | if (total_bytes < sums->len) { | 845 | total_bytes += ins_size * root->sectorsize; |
873 | item = (struct btrfs_csum_item *)((char *)item + | 846 | index += ins_size; |
874 | csum_size); | ||
875 | if (item < item_end && bytenr + PAGE_CACHE_SIZE == | ||
876 | sector_sum->bytenr) { | ||
877 | bytenr = sector_sum->bytenr; | ||
878 | goto next_sector; | ||
879 | } | ||
880 | } | ||
881 | 847 | ||
882 | btrfs_mark_buffer_dirty(path->nodes[0]); | 848 | btrfs_mark_buffer_dirty(path->nodes[0]); |
883 | if (total_bytes < sums->len) { | 849 | if (total_bytes < sums->len) { |
diff --git a/fs/btrfs/file.c b/fs/btrfs/file.c index 4205ba752d40..8e686a427ce2 100644 --- a/fs/btrfs/file.c +++ b/fs/btrfs/file.c | |||
@@ -309,10 +309,6 @@ static int __btrfs_run_defrag_inode(struct btrfs_fs_info *fs_info, | |||
309 | ret = PTR_ERR(inode_root); | 309 | ret = PTR_ERR(inode_root); |
310 | goto cleanup; | 310 | goto cleanup; |
311 | } | 311 | } |
312 | if (btrfs_root_refs(&inode_root->root_item) == 0) { | ||
313 | ret = -ENOENT; | ||
314 | goto cleanup; | ||
315 | } | ||
316 | 312 | ||
317 | key.objectid = defrag->ino; | 313 | key.objectid = defrag->ino; |
318 | btrfs_set_key_type(&key, BTRFS_INODE_ITEM_KEY); | 314 | btrfs_set_key_type(&key, BTRFS_INODE_ITEM_KEY); |
@@ -600,20 +596,29 @@ void btrfs_drop_extent_cache(struct inode *inode, u64 start, u64 end, | |||
600 | if (no_splits) | 596 | if (no_splits) |
601 | goto next; | 597 | goto next; |
602 | 598 | ||
603 | if (em->block_start < EXTENT_MAP_LAST_BYTE && | 599 | if (em->start < start) { |
604 | em->start < start) { | ||
605 | split->start = em->start; | 600 | split->start = em->start; |
606 | split->len = start - em->start; | 601 | split->len = start - em->start; |
607 | split->orig_start = em->orig_start; | ||
608 | split->block_start = em->block_start; | ||
609 | 602 | ||
610 | if (compressed) | 603 | if (em->block_start < EXTENT_MAP_LAST_BYTE) { |
611 | split->block_len = em->block_len; | 604 | split->orig_start = em->orig_start; |
612 | else | 605 | split->block_start = em->block_start; |
613 | split->block_len = split->len; | 606 | |
614 | split->ram_bytes = em->ram_bytes; | 607 | if (compressed) |
615 | split->orig_block_len = max(split->block_len, | 608 | split->block_len = em->block_len; |
616 | em->orig_block_len); | 609 | else |
610 | split->block_len = split->len; | ||
611 | split->orig_block_len = max(split->block_len, | ||
612 | em->orig_block_len); | ||
613 | split->ram_bytes = em->ram_bytes; | ||
614 | } else { | ||
615 | split->orig_start = split->start; | ||
616 | split->block_len = 0; | ||
617 | split->block_start = em->block_start; | ||
618 | split->orig_block_len = 0; | ||
619 | split->ram_bytes = split->len; | ||
620 | } | ||
621 | |||
617 | split->generation = gen; | 622 | split->generation = gen; |
618 | split->bdev = em->bdev; | 623 | split->bdev = em->bdev; |
619 | split->flags = flags; | 624 | split->flags = flags; |
@@ -624,8 +629,7 @@ void btrfs_drop_extent_cache(struct inode *inode, u64 start, u64 end, | |||
624 | split = split2; | 629 | split = split2; |
625 | split2 = NULL; | 630 | split2 = NULL; |
626 | } | 631 | } |
627 | if (em->block_start < EXTENT_MAP_LAST_BYTE && | 632 | if (testend && em->start + em->len > start + len) { |
628 | testend && em->start + em->len > start + len) { | ||
629 | u64 diff = start + len - em->start; | 633 | u64 diff = start + len - em->start; |
630 | 634 | ||
631 | split->start = start + len; | 635 | split->start = start + len; |
@@ -634,18 +638,28 @@ void btrfs_drop_extent_cache(struct inode *inode, u64 start, u64 end, | |||
634 | split->flags = flags; | 638 | split->flags = flags; |
635 | split->compress_type = em->compress_type; | 639 | split->compress_type = em->compress_type; |
636 | split->generation = gen; | 640 | split->generation = gen; |
637 | split->orig_block_len = max(em->block_len, | 641 | |
642 | if (em->block_start < EXTENT_MAP_LAST_BYTE) { | ||
643 | split->orig_block_len = max(em->block_len, | ||
638 | em->orig_block_len); | 644 | em->orig_block_len); |
639 | split->ram_bytes = em->ram_bytes; | ||
640 | 645 | ||
641 | if (compressed) { | 646 | split->ram_bytes = em->ram_bytes; |
642 | split->block_len = em->block_len; | 647 | if (compressed) { |
643 | split->block_start = em->block_start; | 648 | split->block_len = em->block_len; |
644 | split->orig_start = em->orig_start; | 649 | split->block_start = em->block_start; |
650 | split->orig_start = em->orig_start; | ||
651 | } else { | ||
652 | split->block_len = split->len; | ||
653 | split->block_start = em->block_start | ||
654 | + diff; | ||
655 | split->orig_start = em->orig_start; | ||
656 | } | ||
645 | } else { | 657 | } else { |
646 | split->block_len = split->len; | 658 | split->ram_bytes = split->len; |
647 | split->block_start = em->block_start + diff; | 659 | split->orig_start = split->start; |
648 | split->orig_start = em->orig_start; | 660 | split->block_len = 0; |
661 | split->block_start = em->block_start; | ||
662 | split->orig_block_len = 0; | ||
649 | } | 663 | } |
650 | 664 | ||
651 | ret = add_extent_mapping(em_tree, split, modified); | 665 | ret = add_extent_mapping(em_tree, split, modified); |
@@ -1317,6 +1331,56 @@ fail: | |||
1317 | 1331 | ||
1318 | } | 1332 | } |
1319 | 1333 | ||
1334 | static noinline int check_can_nocow(struct inode *inode, loff_t pos, | ||
1335 | size_t *write_bytes) | ||
1336 | { | ||
1337 | struct btrfs_trans_handle *trans; | ||
1338 | struct btrfs_root *root = BTRFS_I(inode)->root; | ||
1339 | struct btrfs_ordered_extent *ordered; | ||
1340 | u64 lockstart, lockend; | ||
1341 | u64 num_bytes; | ||
1342 | int ret; | ||
1343 | |||
1344 | lockstart = round_down(pos, root->sectorsize); | ||
1345 | lockend = lockstart + round_up(*write_bytes, root->sectorsize) - 1; | ||
1346 | |||
1347 | while (1) { | ||
1348 | lock_extent(&BTRFS_I(inode)->io_tree, lockstart, lockend); | ||
1349 | ordered = btrfs_lookup_ordered_range(inode, lockstart, | ||
1350 | lockend - lockstart + 1); | ||
1351 | if (!ordered) { | ||
1352 | break; | ||
1353 | } | ||
1354 | unlock_extent(&BTRFS_I(inode)->io_tree, lockstart, lockend); | ||
1355 | btrfs_start_ordered_extent(inode, ordered, 1); | ||
1356 | btrfs_put_ordered_extent(ordered); | ||
1357 | } | ||
1358 | |||
1359 | trans = btrfs_join_transaction(root); | ||
1360 | if (IS_ERR(trans)) { | ||
1361 | unlock_extent(&BTRFS_I(inode)->io_tree, lockstart, lockend); | ||
1362 | return PTR_ERR(trans); | ||
1363 | } | ||
1364 | |||
1365 | num_bytes = lockend - lockstart + 1; | ||
1366 | ret = can_nocow_extent(trans, inode, lockstart, &num_bytes, NULL, NULL, | ||
1367 | NULL); | ||
1368 | btrfs_end_transaction(trans, root); | ||
1369 | if (ret <= 0) { | ||
1370 | ret = 0; | ||
1371 | } else { | ||
1372 | clear_extent_bit(&BTRFS_I(inode)->io_tree, lockstart, lockend, | ||
1373 | EXTENT_DIRTY | EXTENT_DELALLOC | | ||
1374 | EXTENT_DO_ACCOUNTING | EXTENT_DEFRAG, 0, 0, | ||
1375 | NULL, GFP_NOFS); | ||
1376 | *write_bytes = min_t(size_t, *write_bytes, num_bytes); | ||
1377 | } | ||
1378 | |||
1379 | unlock_extent(&BTRFS_I(inode)->io_tree, lockstart, lockend); | ||
1380 | |||
1381 | return ret; | ||
1382 | } | ||
1383 | |||
1320 | static noinline ssize_t __btrfs_buffered_write(struct file *file, | 1384 | static noinline ssize_t __btrfs_buffered_write(struct file *file, |
1321 | struct iov_iter *i, | 1385 | struct iov_iter *i, |
1322 | loff_t pos) | 1386 | loff_t pos) |
@@ -1324,10 +1388,12 @@ static noinline ssize_t __btrfs_buffered_write(struct file *file, | |||
1324 | struct inode *inode = file_inode(file); | 1388 | struct inode *inode = file_inode(file); |
1325 | struct btrfs_root *root = BTRFS_I(inode)->root; | 1389 | struct btrfs_root *root = BTRFS_I(inode)->root; |
1326 | struct page **pages = NULL; | 1390 | struct page **pages = NULL; |
1391 | u64 release_bytes = 0; | ||
1327 | unsigned long first_index; | 1392 | unsigned long first_index; |
1328 | size_t num_written = 0; | 1393 | size_t num_written = 0; |
1329 | int nrptrs; | 1394 | int nrptrs; |
1330 | int ret = 0; | 1395 | int ret = 0; |
1396 | bool only_release_metadata = false; | ||
1331 | bool force_page_uptodate = false; | 1397 | bool force_page_uptodate = false; |
1332 | 1398 | ||
1333 | nrptrs = min((iov_iter_count(i) + PAGE_CACHE_SIZE - 1) / | 1399 | nrptrs = min((iov_iter_count(i) + PAGE_CACHE_SIZE - 1) / |
@@ -1348,6 +1414,7 @@ static noinline ssize_t __btrfs_buffered_write(struct file *file, | |||
1348 | offset); | 1414 | offset); |
1349 | size_t num_pages = (write_bytes + offset + | 1415 | size_t num_pages = (write_bytes + offset + |
1350 | PAGE_CACHE_SIZE - 1) >> PAGE_CACHE_SHIFT; | 1416 | PAGE_CACHE_SIZE - 1) >> PAGE_CACHE_SHIFT; |
1417 | size_t reserve_bytes; | ||
1351 | size_t dirty_pages; | 1418 | size_t dirty_pages; |
1352 | size_t copied; | 1419 | size_t copied; |
1353 | 1420 | ||
@@ -1362,11 +1429,41 @@ static noinline ssize_t __btrfs_buffered_write(struct file *file, | |||
1362 | break; | 1429 | break; |
1363 | } | 1430 | } |
1364 | 1431 | ||
1365 | ret = btrfs_delalloc_reserve_space(inode, | 1432 | reserve_bytes = num_pages << PAGE_CACHE_SHIFT; |
1366 | num_pages << PAGE_CACHE_SHIFT); | 1433 | ret = btrfs_check_data_free_space(inode, reserve_bytes); |
1434 | if (ret == -ENOSPC && | ||
1435 | (BTRFS_I(inode)->flags & (BTRFS_INODE_NODATACOW | | ||
1436 | BTRFS_INODE_PREALLOC))) { | ||
1437 | ret = check_can_nocow(inode, pos, &write_bytes); | ||
1438 | if (ret > 0) { | ||
1439 | only_release_metadata = true; | ||
1440 | /* | ||
1441 | * our prealloc extent may be smaller than | ||
1442 | * write_bytes, so scale down. | ||
1443 | */ | ||
1444 | num_pages = (write_bytes + offset + | ||
1445 | PAGE_CACHE_SIZE - 1) >> | ||
1446 | PAGE_CACHE_SHIFT; | ||
1447 | reserve_bytes = num_pages << PAGE_CACHE_SHIFT; | ||
1448 | ret = 0; | ||
1449 | } else { | ||
1450 | ret = -ENOSPC; | ||
1451 | } | ||
1452 | } | ||
1453 | |||
1367 | if (ret) | 1454 | if (ret) |
1368 | break; | 1455 | break; |
1369 | 1456 | ||
1457 | ret = btrfs_delalloc_reserve_metadata(inode, reserve_bytes); | ||
1458 | if (ret) { | ||
1459 | if (!only_release_metadata) | ||
1460 | btrfs_free_reserved_data_space(inode, | ||
1461 | reserve_bytes); | ||
1462 | break; | ||
1463 | } | ||
1464 | |||
1465 | release_bytes = reserve_bytes; | ||
1466 | |||
1370 | /* | 1467 | /* |
1371 | * This is going to setup the pages array with the number of | 1468 | * This is going to setup the pages array with the number of |
1372 | * pages we want, so we don't really need to worry about the | 1469 | * pages we want, so we don't really need to worry about the |
@@ -1375,11 +1472,8 @@ static noinline ssize_t __btrfs_buffered_write(struct file *file, | |||
1375 | ret = prepare_pages(root, file, pages, num_pages, | 1472 | ret = prepare_pages(root, file, pages, num_pages, |
1376 | pos, first_index, write_bytes, | 1473 | pos, first_index, write_bytes, |
1377 | force_page_uptodate); | 1474 | force_page_uptodate); |
1378 | if (ret) { | 1475 | if (ret) |
1379 | btrfs_delalloc_release_space(inode, | ||
1380 | num_pages << PAGE_CACHE_SHIFT); | ||
1381 | break; | 1476 | break; |
1382 | } | ||
1383 | 1477 | ||
1384 | copied = btrfs_copy_from_user(pos, num_pages, | 1478 | copied = btrfs_copy_from_user(pos, num_pages, |
1385 | write_bytes, pages, i); | 1479 | write_bytes, pages, i); |
@@ -1409,30 +1503,46 @@ static noinline ssize_t __btrfs_buffered_write(struct file *file, | |||
1409 | * managed to copy. | 1503 | * managed to copy. |
1410 | */ | 1504 | */ |
1411 | if (num_pages > dirty_pages) { | 1505 | if (num_pages > dirty_pages) { |
1506 | release_bytes = (num_pages - dirty_pages) << | ||
1507 | PAGE_CACHE_SHIFT; | ||
1412 | if (copied > 0) { | 1508 | if (copied > 0) { |
1413 | spin_lock(&BTRFS_I(inode)->lock); | 1509 | spin_lock(&BTRFS_I(inode)->lock); |
1414 | BTRFS_I(inode)->outstanding_extents++; | 1510 | BTRFS_I(inode)->outstanding_extents++; |
1415 | spin_unlock(&BTRFS_I(inode)->lock); | 1511 | spin_unlock(&BTRFS_I(inode)->lock); |
1416 | } | 1512 | } |
1417 | btrfs_delalloc_release_space(inode, | 1513 | if (only_release_metadata) |
1418 | (num_pages - dirty_pages) << | 1514 | btrfs_delalloc_release_metadata(inode, |
1419 | PAGE_CACHE_SHIFT); | 1515 | release_bytes); |
1516 | else | ||
1517 | btrfs_delalloc_release_space(inode, | ||
1518 | release_bytes); | ||
1420 | } | 1519 | } |
1421 | 1520 | ||
1521 | release_bytes = dirty_pages << PAGE_CACHE_SHIFT; | ||
1422 | if (copied > 0) { | 1522 | if (copied > 0) { |
1423 | ret = btrfs_dirty_pages(root, inode, pages, | 1523 | ret = btrfs_dirty_pages(root, inode, pages, |
1424 | dirty_pages, pos, copied, | 1524 | dirty_pages, pos, copied, |
1425 | NULL); | 1525 | NULL); |
1426 | if (ret) { | 1526 | if (ret) { |
1427 | btrfs_delalloc_release_space(inode, | ||
1428 | dirty_pages << PAGE_CACHE_SHIFT); | ||
1429 | btrfs_drop_pages(pages, num_pages); | 1527 | btrfs_drop_pages(pages, num_pages); |
1430 | break; | 1528 | break; |
1431 | } | 1529 | } |
1432 | } | 1530 | } |
1433 | 1531 | ||
1532 | release_bytes = 0; | ||
1434 | btrfs_drop_pages(pages, num_pages); | 1533 | btrfs_drop_pages(pages, num_pages); |
1435 | 1534 | ||
1535 | if (only_release_metadata && copied > 0) { | ||
1536 | u64 lockstart = round_down(pos, root->sectorsize); | ||
1537 | u64 lockend = lockstart + | ||
1538 | (dirty_pages << PAGE_CACHE_SHIFT) - 1; | ||
1539 | |||
1540 | set_extent_bit(&BTRFS_I(inode)->io_tree, lockstart, | ||
1541 | lockend, EXTENT_NORESERVE, NULL, | ||
1542 | NULL, GFP_NOFS); | ||
1543 | only_release_metadata = false; | ||
1544 | } | ||
1545 | |||
1436 | cond_resched(); | 1546 | cond_resched(); |
1437 | 1547 | ||
1438 | balance_dirty_pages_ratelimited(inode->i_mapping); | 1548 | balance_dirty_pages_ratelimited(inode->i_mapping); |
@@ -1445,6 +1555,13 @@ static noinline ssize_t __btrfs_buffered_write(struct file *file, | |||
1445 | 1555 | ||
1446 | kfree(pages); | 1556 | kfree(pages); |
1447 | 1557 | ||
1558 | if (release_bytes) { | ||
1559 | if (only_release_metadata) | ||
1560 | btrfs_delalloc_release_metadata(inode, release_bytes); | ||
1561 | else | ||
1562 | btrfs_delalloc_release_space(inode, release_bytes); | ||
1563 | } | ||
1564 | |||
1448 | return num_written ? num_written : ret; | 1565 | return num_written ? num_written : ret; |
1449 | } | 1566 | } |
1450 | 1567 | ||
@@ -2175,12 +2292,6 @@ static long btrfs_fallocate(struct file *file, int mode, | |||
2175 | goto out_reserve_fail; | 2292 | goto out_reserve_fail; |
2176 | } | 2293 | } |
2177 | 2294 | ||
2178 | /* | ||
2179 | * wait for ordered IO before we have any locks. We'll loop again | ||
2180 | * below with the locks held. | ||
2181 | */ | ||
2182 | btrfs_wait_ordered_range(inode, alloc_start, alloc_end - alloc_start); | ||
2183 | |||
2184 | mutex_lock(&inode->i_mutex); | 2295 | mutex_lock(&inode->i_mutex); |
2185 | ret = inode_newsize_ok(inode, alloc_end); | 2296 | ret = inode_newsize_ok(inode, alloc_end); |
2186 | if (ret) | 2297 | if (ret) |
@@ -2191,8 +2302,23 @@ static long btrfs_fallocate(struct file *file, int mode, | |||
2191 | alloc_start); | 2302 | alloc_start); |
2192 | if (ret) | 2303 | if (ret) |
2193 | goto out; | 2304 | goto out; |
2305 | } else { | ||
2306 | /* | ||
2307 | * If we are fallocating from the end of the file onward we | ||
2308 | * need to zero out the end of the page if i_size lands in the | ||
2309 | * middle of a page. | ||
2310 | */ | ||
2311 | ret = btrfs_truncate_page(inode, inode->i_size, 0, 0); | ||
2312 | if (ret) | ||
2313 | goto out; | ||
2194 | } | 2314 | } |
2195 | 2315 | ||
2316 | /* | ||
2317 | * wait for ordered IO before we have any locks. We'll loop again | ||
2318 | * below with the locks held. | ||
2319 | */ | ||
2320 | btrfs_wait_ordered_range(inode, alloc_start, alloc_end - alloc_start); | ||
2321 | |||
2196 | locked_end = alloc_end - 1; | 2322 | locked_end = alloc_end - 1; |
2197 | while (1) { | 2323 | while (1) { |
2198 | struct btrfs_ordered_extent *ordered; | 2324 | struct btrfs_ordered_extent *ordered; |
@@ -2425,20 +2551,7 @@ static loff_t btrfs_file_llseek(struct file *file, loff_t offset, int whence) | |||
2425 | } | 2551 | } |
2426 | } | 2552 | } |
2427 | 2553 | ||
2428 | if (offset < 0 && !(file->f_mode & FMODE_UNSIGNED_OFFSET)) { | 2554 | offset = vfs_setpos(file, offset, inode->i_sb->s_maxbytes); |
2429 | offset = -EINVAL; | ||
2430 | goto out; | ||
2431 | } | ||
2432 | if (offset > inode->i_sb->s_maxbytes) { | ||
2433 | offset = -EINVAL; | ||
2434 | goto out; | ||
2435 | } | ||
2436 | |||
2437 | /* Special lock needed here? */ | ||
2438 | if (offset != file->f_pos) { | ||
2439 | file->f_pos = offset; | ||
2440 | file->f_version = 0; | ||
2441 | } | ||
2442 | out: | 2555 | out: |
2443 | mutex_unlock(&inode->i_mutex); | 2556 | mutex_unlock(&inode->i_mutex); |
2444 | return offset; | 2557 | return offset; |
diff --git a/fs/btrfs/free-space-cache.c b/fs/btrfs/free-space-cache.c index e53009657f0e..b21a3cd667d8 100644 --- a/fs/btrfs/free-space-cache.c +++ b/fs/btrfs/free-space-cache.c | |||
@@ -213,7 +213,7 @@ int btrfs_check_trunc_cache_free_space(struct btrfs_root *root, | |||
213 | else | 213 | else |
214 | ret = 0; | 214 | ret = 0; |
215 | spin_unlock(&rsv->lock); | 215 | spin_unlock(&rsv->lock); |
216 | return 0; | 216 | return ret; |
217 | } | 217 | } |
218 | 218 | ||
219 | int btrfs_truncate_free_space_cache(struct btrfs_root *root, | 219 | int btrfs_truncate_free_space_cache(struct btrfs_root *root, |
@@ -3150,6 +3150,8 @@ again: | |||
3150 | return 0; | 3150 | return 0; |
3151 | } | 3151 | } |
3152 | 3152 | ||
3153 | #define test_msg(fmt, ...) printk(KERN_INFO "btrfs: selftest: " fmt, ##__VA_ARGS__) | ||
3154 | |||
3153 | /* | 3155 | /* |
3154 | * This test just does basic sanity checking, making sure we can add an exten | 3156 | * This test just does basic sanity checking, making sure we can add an exten |
3155 | * entry and remove space from either end and the middle, and make sure we can | 3157 | * entry and remove space from either end and the middle, and make sure we can |
@@ -3159,63 +3161,63 @@ static int test_extents(struct btrfs_block_group_cache *cache) | |||
3159 | { | 3161 | { |
3160 | int ret = 0; | 3162 | int ret = 0; |
3161 | 3163 | ||
3162 | printk(KERN_ERR "Running extent only tests\n"); | 3164 | test_msg("Running extent only tests\n"); |
3163 | 3165 | ||
3164 | /* First just make sure we can remove an entire entry */ | 3166 | /* First just make sure we can remove an entire entry */ |
3165 | ret = btrfs_add_free_space(cache, 0, 4 * 1024 * 1024); | 3167 | ret = btrfs_add_free_space(cache, 0, 4 * 1024 * 1024); |
3166 | if (ret) { | 3168 | if (ret) { |
3167 | printk(KERN_ERR "Error adding initial extents %d\n", ret); | 3169 | test_msg("Error adding initial extents %d\n", ret); |
3168 | return ret; | 3170 | return ret; |
3169 | } | 3171 | } |
3170 | 3172 | ||
3171 | ret = btrfs_remove_free_space(cache, 0, 4 * 1024 * 1024); | 3173 | ret = btrfs_remove_free_space(cache, 0, 4 * 1024 * 1024); |
3172 | if (ret) { | 3174 | if (ret) { |
3173 | printk(KERN_ERR "Error removing extent %d\n", ret); | 3175 | test_msg("Error removing extent %d\n", ret); |
3174 | return ret; | 3176 | return ret; |
3175 | } | 3177 | } |
3176 | 3178 | ||
3177 | if (check_exists(cache, 0, 4 * 1024 * 1024)) { | 3179 | if (check_exists(cache, 0, 4 * 1024 * 1024)) { |
3178 | printk(KERN_ERR "Full remove left some lingering space\n"); | 3180 | test_msg("Full remove left some lingering space\n"); |
3179 | return -1; | 3181 | return -1; |
3180 | } | 3182 | } |
3181 | 3183 | ||
3182 | /* Ok edge and middle cases now */ | 3184 | /* Ok edge and middle cases now */ |
3183 | ret = btrfs_add_free_space(cache, 0, 4 * 1024 * 1024); | 3185 | ret = btrfs_add_free_space(cache, 0, 4 * 1024 * 1024); |
3184 | if (ret) { | 3186 | if (ret) { |
3185 | printk(KERN_ERR "Error adding half extent %d\n", ret); | 3187 | test_msg("Error adding half extent %d\n", ret); |
3186 | return ret; | 3188 | return ret; |
3187 | } | 3189 | } |
3188 | 3190 | ||
3189 | ret = btrfs_remove_free_space(cache, 3 * 1024 * 1024, 1 * 1024 * 1024); | 3191 | ret = btrfs_remove_free_space(cache, 3 * 1024 * 1024, 1 * 1024 * 1024); |
3190 | if (ret) { | 3192 | if (ret) { |
3191 | printk(KERN_ERR "Error removing tail end %d\n", ret); | 3193 | test_msg("Error removing tail end %d\n", ret); |
3192 | return ret; | 3194 | return ret; |
3193 | } | 3195 | } |
3194 | 3196 | ||
3195 | ret = btrfs_remove_free_space(cache, 0, 1 * 1024 * 1024); | 3197 | ret = btrfs_remove_free_space(cache, 0, 1 * 1024 * 1024); |
3196 | if (ret) { | 3198 | if (ret) { |
3197 | printk(KERN_ERR "Error removing front end %d\n", ret); | 3199 | test_msg("Error removing front end %d\n", ret); |
3198 | return ret; | 3200 | return ret; |
3199 | } | 3201 | } |
3200 | 3202 | ||
3201 | ret = btrfs_remove_free_space(cache, 2 * 1024 * 1024, 4096); | 3203 | ret = btrfs_remove_free_space(cache, 2 * 1024 * 1024, 4096); |
3202 | if (ret) { | 3204 | if (ret) { |
3203 | printk(KERN_ERR "Error removing middle peice %d\n", ret); | 3205 | test_msg("Error removing middle piece %d\n", ret); |
3204 | return ret; | 3206 | return ret; |
3205 | } | 3207 | } |
3206 | 3208 | ||
3207 | if (check_exists(cache, 0, 1 * 1024 * 1024)) { | 3209 | if (check_exists(cache, 0, 1 * 1024 * 1024)) { |
3208 | printk(KERN_ERR "Still have space at the front\n"); | 3210 | test_msg("Still have space at the front\n"); |
3209 | return -1; | 3211 | return -1; |
3210 | } | 3212 | } |
3211 | 3213 | ||
3212 | if (check_exists(cache, 2 * 1024 * 1024, 4096)) { | 3214 | if (check_exists(cache, 2 * 1024 * 1024, 4096)) { |
3213 | printk(KERN_ERR "Still have space in the middle\n"); | 3215 | test_msg("Still have space in the middle\n"); |
3214 | return -1; | 3216 | return -1; |
3215 | } | 3217 | } |
3216 | 3218 | ||
3217 | if (check_exists(cache, 3 * 1024 * 1024, 1 * 1024 * 1024)) { | 3219 | if (check_exists(cache, 3 * 1024 * 1024, 1 * 1024 * 1024)) { |
3218 | printk(KERN_ERR "Still have space at the end\n"); | 3220 | test_msg("Still have space at the end\n"); |
3219 | return -1; | 3221 | return -1; |
3220 | } | 3222 | } |
3221 | 3223 | ||
@@ -3230,34 +3232,34 @@ static int test_bitmaps(struct btrfs_block_group_cache *cache) | |||
3230 | u64 next_bitmap_offset; | 3232 | u64 next_bitmap_offset; |
3231 | int ret; | 3233 | int ret; |
3232 | 3234 | ||
3233 | printk(KERN_ERR "Running bitmap only tests\n"); | 3235 | test_msg("Running bitmap only tests\n"); |
3234 | 3236 | ||
3235 | ret = add_free_space_entry(cache, 0, 4 * 1024 * 1024, 1); | 3237 | ret = add_free_space_entry(cache, 0, 4 * 1024 * 1024, 1); |
3236 | if (ret) { | 3238 | if (ret) { |
3237 | printk(KERN_ERR "Couldn't create a bitmap entry %d\n", ret); | 3239 | test_msg("Couldn't create a bitmap entry %d\n", ret); |
3238 | return ret; | 3240 | return ret; |
3239 | } | 3241 | } |
3240 | 3242 | ||
3241 | ret = btrfs_remove_free_space(cache, 0, 4 * 1024 * 1024); | 3243 | ret = btrfs_remove_free_space(cache, 0, 4 * 1024 * 1024); |
3242 | if (ret) { | 3244 | if (ret) { |
3243 | printk(KERN_ERR "Error removing bitmap full range %d\n", ret); | 3245 | test_msg("Error removing bitmap full range %d\n", ret); |
3244 | return ret; | 3246 | return ret; |
3245 | } | 3247 | } |
3246 | 3248 | ||
3247 | if (check_exists(cache, 0, 4 * 1024 * 1024)) { | 3249 | if (check_exists(cache, 0, 4 * 1024 * 1024)) { |
3248 | printk(KERN_ERR "Left some space in bitmap\n"); | 3250 | test_msg("Left some space in bitmap\n"); |
3249 | return -1; | 3251 | return -1; |
3250 | } | 3252 | } |
3251 | 3253 | ||
3252 | ret = add_free_space_entry(cache, 0, 4 * 1024 * 1024, 1); | 3254 | ret = add_free_space_entry(cache, 0, 4 * 1024 * 1024, 1); |
3253 | if (ret) { | 3255 | if (ret) { |
3254 | printk(KERN_ERR "Couldn't add to our bitmap entry %d\n", ret); | 3256 | test_msg("Couldn't add to our bitmap entry %d\n", ret); |
3255 | return ret; | 3257 | return ret; |
3256 | } | 3258 | } |
3257 | 3259 | ||
3258 | ret = btrfs_remove_free_space(cache, 1 * 1024 * 1024, 2 * 1024 * 1024); | 3260 | ret = btrfs_remove_free_space(cache, 1 * 1024 * 1024, 2 * 1024 * 1024); |
3259 | if (ret) { | 3261 | if (ret) { |
3260 | printk(KERN_ERR "Couldn't remove middle chunk %d\n", ret); | 3262 | test_msg("Couldn't remove middle chunk %d\n", ret); |
3261 | return ret; | 3263 | return ret; |
3262 | } | 3264 | } |
3263 | 3265 | ||
@@ -3271,21 +3273,21 @@ static int test_bitmaps(struct btrfs_block_group_cache *cache) | |||
3271 | ret = add_free_space_entry(cache, next_bitmap_offset - | 3273 | ret = add_free_space_entry(cache, next_bitmap_offset - |
3272 | (2 * 1024 * 1024), 4 * 1024 * 1024, 1); | 3274 | (2 * 1024 * 1024), 4 * 1024 * 1024, 1); |
3273 | if (ret) { | 3275 | if (ret) { |
3274 | printk(KERN_ERR "Couldn't add space that straddles two bitmaps" | 3276 | test_msg("Couldn't add space that straddles two bitmaps %d\n", |
3275 | " %d\n", ret); | 3277 | ret); |
3276 | return ret; | 3278 | return ret; |
3277 | } | 3279 | } |
3278 | 3280 | ||
3279 | ret = btrfs_remove_free_space(cache, next_bitmap_offset - | 3281 | ret = btrfs_remove_free_space(cache, next_bitmap_offset - |
3280 | (1 * 1024 * 1024), 2 * 1024 * 1024); | 3282 | (1 * 1024 * 1024), 2 * 1024 * 1024); |
3281 | if (ret) { | 3283 | if (ret) { |
3282 | printk(KERN_ERR "Couldn't remove overlapping space %d\n", ret); | 3284 | test_msg("Couldn't remove overlapping space %d\n", ret); |
3283 | return ret; | 3285 | return ret; |
3284 | } | 3286 | } |
3285 | 3287 | ||
3286 | if (check_exists(cache, next_bitmap_offset - (1 * 1024 * 1024), | 3288 | if (check_exists(cache, next_bitmap_offset - (1 * 1024 * 1024), |
3287 | 2 * 1024 * 1024)) { | 3289 | 2 * 1024 * 1024)) { |
3288 | printk(KERN_ERR "Left some space when removing overlapping\n"); | 3290 | test_msg("Left some space when removing overlapping\n"); |
3289 | return -1; | 3291 | return -1; |
3290 | } | 3292 | } |
3291 | 3293 | ||
@@ -3300,7 +3302,7 @@ static int test_bitmaps_and_extents(struct btrfs_block_group_cache *cache) | |||
3300 | u64 bitmap_offset = (u64)(BITS_PER_BITMAP * 4096); | 3302 | u64 bitmap_offset = (u64)(BITS_PER_BITMAP * 4096); |
3301 | int ret; | 3303 | int ret; |
3302 | 3304 | ||
3303 | printk(KERN_ERR "Running bitmap and extent tests\n"); | 3305 | test_msg("Running bitmap and extent tests\n"); |
3304 | 3306 | ||
3305 | /* | 3307 | /* |
3306 | * First let's do something simple, an extent at the same offset as the | 3308 | * First let's do something simple, an extent at the same offset as the |
@@ -3309,42 +3311,42 @@ static int test_bitmaps_and_extents(struct btrfs_block_group_cache *cache) | |||
3309 | */ | 3311 | */ |
3310 | ret = add_free_space_entry(cache, 4 * 1024 * 1024, 1 * 1024 * 1024, 1); | 3312 | ret = add_free_space_entry(cache, 4 * 1024 * 1024, 1 * 1024 * 1024, 1); |
3311 | if (ret) { | 3313 | if (ret) { |
3312 | printk(KERN_ERR "Couldn't create bitmap entry %d\n", ret); | 3314 | test_msg("Couldn't create bitmap entry %d\n", ret); |
3313 | return ret; | 3315 | return ret; |
3314 | } | 3316 | } |
3315 | 3317 | ||
3316 | ret = add_free_space_entry(cache, 0, 1 * 1024 * 1024, 0); | 3318 | ret = add_free_space_entry(cache, 0, 1 * 1024 * 1024, 0); |
3317 | if (ret) { | 3319 | if (ret) { |
3318 | printk(KERN_ERR "Couldn't add extent entry %d\n", ret); | 3320 | test_msg("Couldn't add extent entry %d\n", ret); |
3319 | return ret; | 3321 | return ret; |
3320 | } | 3322 | } |
3321 | 3323 | ||
3322 | ret = btrfs_remove_free_space(cache, 0, 1 * 1024 * 1024); | 3324 | ret = btrfs_remove_free_space(cache, 0, 1 * 1024 * 1024); |
3323 | if (ret) { | 3325 | if (ret) { |
3324 | printk(KERN_ERR "Couldn't remove extent entry %d\n", ret); | 3326 | test_msg("Couldn't remove extent entry %d\n", ret); |
3325 | return ret; | 3327 | return ret; |
3326 | } | 3328 | } |
3327 | 3329 | ||
3328 | if (check_exists(cache, 0, 1 * 1024 * 1024)) { | 3330 | if (check_exists(cache, 0, 1 * 1024 * 1024)) { |
3329 | printk(KERN_ERR "Left remnants after our remove\n"); | 3331 | test_msg("Left remnants after our remove\n"); |
3330 | return -1; | 3332 | return -1; |
3331 | } | 3333 | } |
3332 | 3334 | ||
3333 | /* Now to add back the extent entry and remove from the bitmap */ | 3335 | /* Now to add back the extent entry and remove from the bitmap */ |
3334 | ret = add_free_space_entry(cache, 0, 1 * 1024 * 1024, 0); | 3336 | ret = add_free_space_entry(cache, 0, 1 * 1024 * 1024, 0); |
3335 | if (ret) { | 3337 | if (ret) { |
3336 | printk(KERN_ERR "Couldn't re-add extent entry %d\n", ret); | 3338 | test_msg("Couldn't re-add extent entry %d\n", ret); |
3337 | return ret; | 3339 | return ret; |
3338 | } | 3340 | } |
3339 | 3341 | ||
3340 | ret = btrfs_remove_free_space(cache, 4 * 1024 * 1024, 1 * 1024 * 1024); | 3342 | ret = btrfs_remove_free_space(cache, 4 * 1024 * 1024, 1 * 1024 * 1024); |
3341 | if (ret) { | 3343 | if (ret) { |
3342 | printk(KERN_ERR "Couldn't remove from bitmap %d\n", ret); | 3344 | test_msg("Couldn't remove from bitmap %d\n", ret); |
3343 | return ret; | 3345 | return ret; |
3344 | } | 3346 | } |
3345 | 3347 | ||
3346 | if (check_exists(cache, 4 * 1024 * 1024, 1 * 1024 * 1024)) { | 3348 | if (check_exists(cache, 4 * 1024 * 1024, 1 * 1024 * 1024)) { |
3347 | printk(KERN_ERR "Left remnants in the bitmap\n"); | 3349 | test_msg("Left remnants in the bitmap\n"); |
3348 | return -1; | 3350 | return -1; |
3349 | } | 3351 | } |
3350 | 3352 | ||
@@ -3354,19 +3356,18 @@ static int test_bitmaps_and_extents(struct btrfs_block_group_cache *cache) | |||
3354 | */ | 3356 | */ |
3355 | ret = add_free_space_entry(cache, 1 * 1024 * 1024, 4 * 1024 * 1024, 1); | 3357 | ret = add_free_space_entry(cache, 1 * 1024 * 1024, 4 * 1024 * 1024, 1); |
3356 | if (ret) { | 3358 | if (ret) { |
3357 | printk(KERN_ERR "Couldn't add to a bitmap %d\n", ret); | 3359 | test_msg("Couldn't add to a bitmap %d\n", ret); |
3358 | return ret; | 3360 | return ret; |
3359 | } | 3361 | } |
3360 | 3362 | ||
3361 | ret = btrfs_remove_free_space(cache, 512 * 1024, 3 * 1024 * 1024); | 3363 | ret = btrfs_remove_free_space(cache, 512 * 1024, 3 * 1024 * 1024); |
3362 | if (ret) { | 3364 | if (ret) { |
3363 | printk(KERN_ERR "Couldn't remove overlapping space %d\n", ret); | 3365 | test_msg("Couldn't remove overlapping space %d\n", ret); |
3364 | return ret; | 3366 | return ret; |
3365 | } | 3367 | } |
3366 | 3368 | ||
3367 | if (check_exists(cache, 512 * 1024, 3 * 1024 * 1024)) { | 3369 | if (check_exists(cache, 512 * 1024, 3 * 1024 * 1024)) { |
3368 | printk(KERN_ERR "Left over peices after removing " | 3370 | test_msg("Left over peices after removing overlapping\n"); |
3369 | "overlapping\n"); | ||
3370 | return -1; | 3371 | return -1; |
3371 | } | 3372 | } |
3372 | 3373 | ||
@@ -3375,24 +3376,24 @@ static int test_bitmaps_and_extents(struct btrfs_block_group_cache *cache) | |||
3375 | /* Now with the extent entry offset into the bitmap */ | 3376 | /* Now with the extent entry offset into the bitmap */ |
3376 | ret = add_free_space_entry(cache, 4 * 1024 * 1024, 4 * 1024 * 1024, 1); | 3377 | ret = add_free_space_entry(cache, 4 * 1024 * 1024, 4 * 1024 * 1024, 1); |
3377 | if (ret) { | 3378 | if (ret) { |
3378 | printk(KERN_ERR "Couldn't add space to the bitmap %d\n", ret); | 3379 | test_msg("Couldn't add space to the bitmap %d\n", ret); |
3379 | return ret; | 3380 | return ret; |
3380 | } | 3381 | } |
3381 | 3382 | ||
3382 | ret = add_free_space_entry(cache, 2 * 1024 * 1024, 2 * 1024 * 1024, 0); | 3383 | ret = add_free_space_entry(cache, 2 * 1024 * 1024, 2 * 1024 * 1024, 0); |
3383 | if (ret) { | 3384 | if (ret) { |
3384 | printk(KERN_ERR "Couldn't add extent to the cache %d\n", ret); | 3385 | test_msg("Couldn't add extent to the cache %d\n", ret); |
3385 | return ret; | 3386 | return ret; |
3386 | } | 3387 | } |
3387 | 3388 | ||
3388 | ret = btrfs_remove_free_space(cache, 3 * 1024 * 1024, 4 * 1024 * 1024); | 3389 | ret = btrfs_remove_free_space(cache, 3 * 1024 * 1024, 4 * 1024 * 1024); |
3389 | if (ret) { | 3390 | if (ret) { |
3390 | printk(KERN_ERR "Problem removing overlapping space %d\n", ret); | 3391 | test_msg("Problem removing overlapping space %d\n", ret); |
3391 | return ret; | 3392 | return ret; |
3392 | } | 3393 | } |
3393 | 3394 | ||
3394 | if (check_exists(cache, 3 * 1024 * 1024, 4 * 1024 * 1024)) { | 3395 | if (check_exists(cache, 3 * 1024 * 1024, 4 * 1024 * 1024)) { |
3395 | printk(KERN_ERR "Left something behind when removing space"); | 3396 | test_msg("Left something behind when removing space"); |
3396 | return -1; | 3397 | return -1; |
3397 | } | 3398 | } |
3398 | 3399 | ||
@@ -3410,27 +3411,27 @@ static int test_bitmaps_and_extents(struct btrfs_block_group_cache *cache) | |||
3410 | ret = add_free_space_entry(cache, bitmap_offset + 4 * 1024 * 1024, | 3411 | ret = add_free_space_entry(cache, bitmap_offset + 4 * 1024 * 1024, |
3411 | 4 * 1024 * 1024, 1); | 3412 | 4 * 1024 * 1024, 1); |
3412 | if (ret) { | 3413 | if (ret) { |
3413 | printk(KERN_ERR "Couldn't add bitmap %d\n", ret); | 3414 | test_msg("Couldn't add bitmap %d\n", ret); |
3414 | return ret; | 3415 | return ret; |
3415 | } | 3416 | } |
3416 | 3417 | ||
3417 | ret = add_free_space_entry(cache, bitmap_offset - 1 * 1024 * 1024, | 3418 | ret = add_free_space_entry(cache, bitmap_offset - 1 * 1024 * 1024, |
3418 | 5 * 1024 * 1024, 0); | 3419 | 5 * 1024 * 1024, 0); |
3419 | if (ret) { | 3420 | if (ret) { |
3420 | printk(KERN_ERR "Couldn't add extent entry %d\n", ret); | 3421 | test_msg("Couldn't add extent entry %d\n", ret); |
3421 | return ret; | 3422 | return ret; |
3422 | } | 3423 | } |
3423 | 3424 | ||
3424 | ret = btrfs_remove_free_space(cache, bitmap_offset + 1 * 1024 * 1024, | 3425 | ret = btrfs_remove_free_space(cache, bitmap_offset + 1 * 1024 * 1024, |
3425 | 5 * 1024 * 1024); | 3426 | 5 * 1024 * 1024); |
3426 | if (ret) { | 3427 | if (ret) { |
3427 | printk(KERN_ERR "Failed to free our space %d\n", ret); | 3428 | test_msg("Failed to free our space %d\n", ret); |
3428 | return ret; | 3429 | return ret; |
3429 | } | 3430 | } |
3430 | 3431 | ||
3431 | if (check_exists(cache, bitmap_offset + 1 * 1024 * 1024, | 3432 | if (check_exists(cache, bitmap_offset + 1 * 1024 * 1024, |
3432 | 5 * 1024 * 1024)) { | 3433 | 5 * 1024 * 1024)) { |
3433 | printk(KERN_ERR "Left stuff over\n"); | 3434 | test_msg("Left stuff over\n"); |
3434 | return -1; | 3435 | return -1; |
3435 | } | 3436 | } |
3436 | 3437 | ||
@@ -3444,20 +3445,19 @@ static int test_bitmaps_and_extents(struct btrfs_block_group_cache *cache) | |||
3444 | */ | 3445 | */ |
3445 | ret = add_free_space_entry(cache, 1 * 1024 * 1024, 2 * 1024 * 1024, 1); | 3446 | ret = add_free_space_entry(cache, 1 * 1024 * 1024, 2 * 1024 * 1024, 1); |
3446 | if (ret) { | 3447 | if (ret) { |
3447 | printk(KERN_ERR "Couldn't add bitmap entry %d\n", ret); | 3448 | test_msg("Couldn't add bitmap entry %d\n", ret); |
3448 | return ret; | 3449 | return ret; |
3449 | } | 3450 | } |
3450 | 3451 | ||
3451 | ret = add_free_space_entry(cache, 3 * 1024 * 1024, 1 * 1024 * 1024, 0); | 3452 | ret = add_free_space_entry(cache, 3 * 1024 * 1024, 1 * 1024 * 1024, 0); |
3452 | if (ret) { | 3453 | if (ret) { |
3453 | printk(KERN_ERR "Couldn't add extent entry %d\n", ret); | 3454 | test_msg("Couldn't add extent entry %d\n", ret); |
3454 | return ret; | 3455 | return ret; |
3455 | } | 3456 | } |
3456 | 3457 | ||
3457 | ret = btrfs_remove_free_space(cache, 1 * 1024 * 1024, 3 * 1024 * 1024); | 3458 | ret = btrfs_remove_free_space(cache, 1 * 1024 * 1024, 3 * 1024 * 1024); |
3458 | if (ret) { | 3459 | if (ret) { |
3459 | printk(KERN_ERR "Error removing bitmap and extent " | 3460 | test_msg("Error removing bitmap and extent overlapping %d\n", ret); |
3460 | "overlapping %d\n", ret); | ||
3461 | return ret; | 3461 | return ret; |
3462 | } | 3462 | } |
3463 | 3463 | ||
@@ -3469,11 +3469,11 @@ void btrfs_test_free_space_cache(void) | |||
3469 | { | 3469 | { |
3470 | struct btrfs_block_group_cache *cache; | 3470 | struct btrfs_block_group_cache *cache; |
3471 | 3471 | ||
3472 | printk(KERN_ERR "Running btrfs free space cache tests\n"); | 3472 | test_msg("Running btrfs free space cache tests\n"); |
3473 | 3473 | ||
3474 | cache = init_test_block_group(); | 3474 | cache = init_test_block_group(); |
3475 | if (!cache) { | 3475 | if (!cache) { |
3476 | printk(KERN_ERR "Couldn't run the tests\n"); | 3476 | test_msg("Couldn't run the tests\n"); |
3477 | return; | 3477 | return; |
3478 | } | 3478 | } |
3479 | 3479 | ||
@@ -3487,6 +3487,9 @@ out: | |||
3487 | __btrfs_remove_free_space_cache(cache->free_space_ctl); | 3487 | __btrfs_remove_free_space_cache(cache->free_space_ctl); |
3488 | kfree(cache->free_space_ctl); | 3488 | kfree(cache->free_space_ctl); |
3489 | kfree(cache); | 3489 | kfree(cache); |
3490 | printk(KERN_ERR "Free space cache tests finished\n"); | 3490 | test_msg("Free space cache tests finished\n"); |
3491 | } | 3491 | } |
3492 | #endif /* CONFIG_BTRFS_FS_RUN_SANITY_TESTS */ | 3492 | #undef test_msg |
3493 | #else /* !CONFIG_BTRFS_FS_RUN_SANITY_TESTS */ | ||
3494 | void btrfs_test_free_space_cache(void) {} | ||
3495 | #endif /* !CONFIG_BTRFS_FS_RUN_SANITY_TESTS */ | ||
diff --git a/fs/btrfs/free-space-cache.h b/fs/btrfs/free-space-cache.h index 8b7f19f44961..894116b71304 100644 --- a/fs/btrfs/free-space-cache.h +++ b/fs/btrfs/free-space-cache.h | |||
@@ -113,8 +113,6 @@ int btrfs_return_cluster_to_free_space( | |||
113 | int btrfs_trim_block_group(struct btrfs_block_group_cache *block_group, | 113 | int btrfs_trim_block_group(struct btrfs_block_group_cache *block_group, |
114 | u64 *trimmed, u64 start, u64 end, u64 minlen); | 114 | u64 *trimmed, u64 start, u64 end, u64 minlen); |
115 | 115 | ||
116 | #ifdef CONFIG_BTRFS_FS_RUN_SANITY_TESTS | ||
117 | void btrfs_test_free_space_cache(void); | 116 | void btrfs_test_free_space_cache(void); |
118 | #endif | ||
119 | 117 | ||
120 | #endif | 118 | #endif |
diff --git a/fs/btrfs/inode.c b/fs/btrfs/inode.c index 17f3064b4a3e..021694c08181 100644 --- a/fs/btrfs/inode.c +++ b/fs/btrfs/inode.c | |||
@@ -42,6 +42,7 @@ | |||
42 | #include <linux/mount.h> | 42 | #include <linux/mount.h> |
43 | #include <linux/btrfs.h> | 43 | #include <linux/btrfs.h> |
44 | #include <linux/blkdev.h> | 44 | #include <linux/blkdev.h> |
45 | #include <linux/posix_acl_xattr.h> | ||
45 | #include "compat.h" | 46 | #include "compat.h" |
46 | #include "ctree.h" | 47 | #include "ctree.h" |
47 | #include "disk-io.h" | 48 | #include "disk-io.h" |
@@ -57,6 +58,7 @@ | |||
57 | #include "free-space-cache.h" | 58 | #include "free-space-cache.h" |
58 | #include "inode-map.h" | 59 | #include "inode-map.h" |
59 | #include "backref.h" | 60 | #include "backref.h" |
61 | #include "hash.h" | ||
60 | 62 | ||
61 | struct btrfs_iget_args { | 63 | struct btrfs_iget_args { |
62 | u64 ino; | 64 | u64 ino; |
@@ -701,8 +703,12 @@ retry: | |||
701 | async_extent->nr_pages = 0; | 703 | async_extent->nr_pages = 0; |
702 | async_extent->pages = NULL; | 704 | async_extent->pages = NULL; |
703 | 705 | ||
704 | if (ret == -ENOSPC) | 706 | if (ret == -ENOSPC) { |
707 | unlock_extent(io_tree, async_extent->start, | ||
708 | async_extent->start + | ||
709 | async_extent->ram_size - 1); | ||
705 | goto retry; | 710 | goto retry; |
711 | } | ||
706 | goto out_free; | 712 | goto out_free; |
707 | } | 713 | } |
708 | 714 | ||
@@ -1529,6 +1535,46 @@ static void btrfs_merge_extent_hook(struct inode *inode, | |||
1529 | spin_unlock(&BTRFS_I(inode)->lock); | 1535 | spin_unlock(&BTRFS_I(inode)->lock); |
1530 | } | 1536 | } |
1531 | 1537 | ||
1538 | static void btrfs_add_delalloc_inodes(struct btrfs_root *root, | ||
1539 | struct inode *inode) | ||
1540 | { | ||
1541 | spin_lock(&root->delalloc_lock); | ||
1542 | if (list_empty(&BTRFS_I(inode)->delalloc_inodes)) { | ||
1543 | list_add_tail(&BTRFS_I(inode)->delalloc_inodes, | ||
1544 | &root->delalloc_inodes); | ||
1545 | set_bit(BTRFS_INODE_IN_DELALLOC_LIST, | ||
1546 | &BTRFS_I(inode)->runtime_flags); | ||
1547 | root->nr_delalloc_inodes++; | ||
1548 | if (root->nr_delalloc_inodes == 1) { | ||
1549 | spin_lock(&root->fs_info->delalloc_root_lock); | ||
1550 | BUG_ON(!list_empty(&root->delalloc_root)); | ||
1551 | list_add_tail(&root->delalloc_root, | ||
1552 | &root->fs_info->delalloc_roots); | ||
1553 | spin_unlock(&root->fs_info->delalloc_root_lock); | ||
1554 | } | ||
1555 | } | ||
1556 | spin_unlock(&root->delalloc_lock); | ||
1557 | } | ||
1558 | |||
1559 | static void btrfs_del_delalloc_inode(struct btrfs_root *root, | ||
1560 | struct inode *inode) | ||
1561 | { | ||
1562 | spin_lock(&root->delalloc_lock); | ||
1563 | if (!list_empty(&BTRFS_I(inode)->delalloc_inodes)) { | ||
1564 | list_del_init(&BTRFS_I(inode)->delalloc_inodes); | ||
1565 | clear_bit(BTRFS_INODE_IN_DELALLOC_LIST, | ||
1566 | &BTRFS_I(inode)->runtime_flags); | ||
1567 | root->nr_delalloc_inodes--; | ||
1568 | if (!root->nr_delalloc_inodes) { | ||
1569 | spin_lock(&root->fs_info->delalloc_root_lock); | ||
1570 | BUG_ON(list_empty(&root->delalloc_root)); | ||
1571 | list_del_init(&root->delalloc_root); | ||
1572 | spin_unlock(&root->fs_info->delalloc_root_lock); | ||
1573 | } | ||
1574 | } | ||
1575 | spin_unlock(&root->delalloc_lock); | ||
1576 | } | ||
1577 | |||
1532 | /* | 1578 | /* |
1533 | * extent_io.c set_bit_hook, used to track delayed allocation | 1579 | * extent_io.c set_bit_hook, used to track delayed allocation |
1534 | * bytes in this file, and to maintain the list of inodes that | 1580 | * bytes in this file, and to maintain the list of inodes that |
@@ -1561,16 +1607,8 @@ static void btrfs_set_bit_hook(struct inode *inode, | |||
1561 | spin_lock(&BTRFS_I(inode)->lock); | 1607 | spin_lock(&BTRFS_I(inode)->lock); |
1562 | BTRFS_I(inode)->delalloc_bytes += len; | 1608 | BTRFS_I(inode)->delalloc_bytes += len; |
1563 | if (do_list && !test_bit(BTRFS_INODE_IN_DELALLOC_LIST, | 1609 | if (do_list && !test_bit(BTRFS_INODE_IN_DELALLOC_LIST, |
1564 | &BTRFS_I(inode)->runtime_flags)) { | 1610 | &BTRFS_I(inode)->runtime_flags)) |
1565 | spin_lock(&root->fs_info->delalloc_lock); | 1611 | btrfs_add_delalloc_inodes(root, inode); |
1566 | if (list_empty(&BTRFS_I(inode)->delalloc_inodes)) { | ||
1567 | list_add_tail(&BTRFS_I(inode)->delalloc_inodes, | ||
1568 | &root->fs_info->delalloc_inodes); | ||
1569 | set_bit(BTRFS_INODE_IN_DELALLOC_LIST, | ||
1570 | &BTRFS_I(inode)->runtime_flags); | ||
1571 | } | ||
1572 | spin_unlock(&root->fs_info->delalloc_lock); | ||
1573 | } | ||
1574 | spin_unlock(&BTRFS_I(inode)->lock); | 1612 | spin_unlock(&BTRFS_I(inode)->lock); |
1575 | } | 1613 | } |
1576 | } | 1614 | } |
@@ -1604,7 +1642,7 @@ static void btrfs_clear_bit_hook(struct inode *inode, | |||
1604 | btrfs_delalloc_release_metadata(inode, len); | 1642 | btrfs_delalloc_release_metadata(inode, len); |
1605 | 1643 | ||
1606 | if (root->root_key.objectid != BTRFS_DATA_RELOC_TREE_OBJECTID | 1644 | if (root->root_key.objectid != BTRFS_DATA_RELOC_TREE_OBJECTID |
1607 | && do_list) | 1645 | && do_list && !(state->state & EXTENT_NORESERVE)) |
1608 | btrfs_free_reserved_data_space(inode, len); | 1646 | btrfs_free_reserved_data_space(inode, len); |
1609 | 1647 | ||
1610 | __percpu_counter_add(&root->fs_info->delalloc_bytes, -len, | 1648 | __percpu_counter_add(&root->fs_info->delalloc_bytes, -len, |
@@ -1613,15 +1651,8 @@ static void btrfs_clear_bit_hook(struct inode *inode, | |||
1613 | BTRFS_I(inode)->delalloc_bytes -= len; | 1651 | BTRFS_I(inode)->delalloc_bytes -= len; |
1614 | if (do_list && BTRFS_I(inode)->delalloc_bytes == 0 && | 1652 | if (do_list && BTRFS_I(inode)->delalloc_bytes == 0 && |
1615 | test_bit(BTRFS_INODE_IN_DELALLOC_LIST, | 1653 | test_bit(BTRFS_INODE_IN_DELALLOC_LIST, |
1616 | &BTRFS_I(inode)->runtime_flags)) { | 1654 | &BTRFS_I(inode)->runtime_flags)) |
1617 | spin_lock(&root->fs_info->delalloc_lock); | 1655 | btrfs_del_delalloc_inode(root, inode); |
1618 | if (!list_empty(&BTRFS_I(inode)->delalloc_inodes)) { | ||
1619 | list_del_init(&BTRFS_I(inode)->delalloc_inodes); | ||
1620 | clear_bit(BTRFS_INODE_IN_DELALLOC_LIST, | ||
1621 | &BTRFS_I(inode)->runtime_flags); | ||
1622 | } | ||
1623 | spin_unlock(&root->fs_info->delalloc_lock); | ||
1624 | } | ||
1625 | spin_unlock(&BTRFS_I(inode)->lock); | 1656 | spin_unlock(&BTRFS_I(inode)->lock); |
1626 | } | 1657 | } |
1627 | } | 1658 | } |
@@ -2135,16 +2166,23 @@ static noinline int record_one_backref(u64 inum, u64 offset, u64 root_id, | |||
2135 | if (btrfs_file_extent_disk_bytenr(leaf, extent) != old->bytenr) | 2166 | if (btrfs_file_extent_disk_bytenr(leaf, extent) != old->bytenr) |
2136 | continue; | 2167 | continue; |
2137 | 2168 | ||
2138 | extent_offset = btrfs_file_extent_offset(leaf, extent); | 2169 | /* |
2139 | if (key.offset - extent_offset != offset) | 2170 | * 'offset' refers to the exact key.offset, |
2171 | * NOT the 'offset' field in btrfs_extent_data_ref, ie. | ||
2172 | * (key.offset - extent_offset). | ||
2173 | */ | ||
2174 | if (key.offset != offset) | ||
2140 | continue; | 2175 | continue; |
2141 | 2176 | ||
2177 | extent_offset = btrfs_file_extent_offset(leaf, extent); | ||
2142 | num_bytes = btrfs_file_extent_num_bytes(leaf, extent); | 2178 | num_bytes = btrfs_file_extent_num_bytes(leaf, extent); |
2179 | |||
2143 | if (extent_offset >= old->extent_offset + old->offset + | 2180 | if (extent_offset >= old->extent_offset + old->offset + |
2144 | old->len || extent_offset + num_bytes <= | 2181 | old->len || extent_offset + num_bytes <= |
2145 | old->extent_offset + old->offset) | 2182 | old->extent_offset + old->offset) |
2146 | continue; | 2183 | continue; |
2147 | 2184 | ||
2185 | ret = 0; | ||
2148 | break; | 2186 | break; |
2149 | } | 2187 | } |
2150 | 2188 | ||
@@ -2156,7 +2194,7 @@ static noinline int record_one_backref(u64 inum, u64 offset, u64 root_id, | |||
2156 | 2194 | ||
2157 | backref->root_id = root_id; | 2195 | backref->root_id = root_id; |
2158 | backref->inum = inum; | 2196 | backref->inum = inum; |
2159 | backref->file_pos = offset + extent_offset; | 2197 | backref->file_pos = offset; |
2160 | backref->num_bytes = num_bytes; | 2198 | backref->num_bytes = num_bytes; |
2161 | backref->extent_offset = extent_offset; | 2199 | backref->extent_offset = extent_offset; |
2162 | backref->generation = btrfs_file_extent_generation(leaf, extent); | 2200 | backref->generation = btrfs_file_extent_generation(leaf, extent); |
@@ -2179,7 +2217,8 @@ static noinline bool record_extent_backrefs(struct btrfs_path *path, | |||
2179 | new->path = path; | 2217 | new->path = path; |
2180 | 2218 | ||
2181 | list_for_each_entry_safe(old, tmp, &new->head, list) { | 2219 | list_for_each_entry_safe(old, tmp, &new->head, list) { |
2182 | ret = iterate_inodes_from_logical(old->bytenr, fs_info, | 2220 | ret = iterate_inodes_from_logical(old->bytenr + |
2221 | old->extent_offset, fs_info, | ||
2183 | path, record_one_backref, | 2222 | path, record_one_backref, |
2184 | old); | 2223 | old); |
2185 | BUG_ON(ret < 0 && ret != -ENOENT); | 2224 | BUG_ON(ret < 0 && ret != -ENOENT); |
@@ -2263,11 +2302,6 @@ static noinline int relink_extent_backref(struct btrfs_path *path, | |||
2263 | return 0; | 2302 | return 0; |
2264 | return PTR_ERR(root); | 2303 | return PTR_ERR(root); |
2265 | } | 2304 | } |
2266 | if (btrfs_root_refs(&root->root_item) == 0) { | ||
2267 | srcu_read_unlock(&fs_info->subvol_srcu, index); | ||
2268 | /* parse ENOENT to 0 */ | ||
2269 | return 0; | ||
2270 | } | ||
2271 | 2305 | ||
2272 | /* step 2: get inode */ | 2306 | /* step 2: get inode */ |
2273 | key.objectid = backref->inum; | 2307 | key.objectid = backref->inum; |
@@ -3215,13 +3249,16 @@ int btrfs_orphan_cleanup(struct btrfs_root *root) | |||
3215 | /* 1 for the orphan item deletion. */ | 3249 | /* 1 for the orphan item deletion. */ |
3216 | trans = btrfs_start_transaction(root, 1); | 3250 | trans = btrfs_start_transaction(root, 1); |
3217 | if (IS_ERR(trans)) { | 3251 | if (IS_ERR(trans)) { |
3252 | iput(inode); | ||
3218 | ret = PTR_ERR(trans); | 3253 | ret = PTR_ERR(trans); |
3219 | goto out; | 3254 | goto out; |
3220 | } | 3255 | } |
3221 | ret = btrfs_orphan_add(trans, inode); | 3256 | ret = btrfs_orphan_add(trans, inode); |
3222 | btrfs_end_transaction(trans, root); | 3257 | btrfs_end_transaction(trans, root); |
3223 | if (ret) | 3258 | if (ret) { |
3259 | iput(inode); | ||
3224 | goto out; | 3260 | goto out; |
3261 | } | ||
3225 | 3262 | ||
3226 | ret = btrfs_truncate(inode); | 3263 | ret = btrfs_truncate(inode); |
3227 | if (ret) | 3264 | if (ret) |
@@ -3274,8 +3311,17 @@ static noinline int acls_after_inode_item(struct extent_buffer *leaf, | |||
3274 | { | 3311 | { |
3275 | u32 nritems = btrfs_header_nritems(leaf); | 3312 | u32 nritems = btrfs_header_nritems(leaf); |
3276 | struct btrfs_key found_key; | 3313 | struct btrfs_key found_key; |
3314 | static u64 xattr_access = 0; | ||
3315 | static u64 xattr_default = 0; | ||
3277 | int scanned = 0; | 3316 | int scanned = 0; |
3278 | 3317 | ||
3318 | if (!xattr_access) { | ||
3319 | xattr_access = btrfs_name_hash(POSIX_ACL_XATTR_ACCESS, | ||
3320 | strlen(POSIX_ACL_XATTR_ACCESS)); | ||
3321 | xattr_default = btrfs_name_hash(POSIX_ACL_XATTR_DEFAULT, | ||
3322 | strlen(POSIX_ACL_XATTR_DEFAULT)); | ||
3323 | } | ||
3324 | |||
3279 | slot++; | 3325 | slot++; |
3280 | while (slot < nritems) { | 3326 | while (slot < nritems) { |
3281 | btrfs_item_key_to_cpu(leaf, &found_key, slot); | 3327 | btrfs_item_key_to_cpu(leaf, &found_key, slot); |
@@ -3285,8 +3331,11 @@ static noinline int acls_after_inode_item(struct extent_buffer *leaf, | |||
3285 | return 0; | 3331 | return 0; |
3286 | 3332 | ||
3287 | /* we found an xattr, assume we've got an acl */ | 3333 | /* we found an xattr, assume we've got an acl */ |
3288 | if (found_key.type == BTRFS_XATTR_ITEM_KEY) | 3334 | if (found_key.type == BTRFS_XATTR_ITEM_KEY) { |
3289 | return 1; | 3335 | if (found_key.offset == xattr_access || |
3336 | found_key.offset == xattr_default) | ||
3337 | return 1; | ||
3338 | } | ||
3290 | 3339 | ||
3291 | /* | 3340 | /* |
3292 | * we found a key greater than an xattr key, there can't | 3341 | * we found a key greater than an xattr key, there can't |
@@ -3660,53 +3709,20 @@ int btrfs_unlink_inode(struct btrfs_trans_handle *trans, | |||
3660 | } | 3709 | } |
3661 | return ret; | 3710 | return ret; |
3662 | } | 3711 | } |
3663 | |||
3664 | |||
3665 | /* helper to check if there is any shared block in the path */ | ||
3666 | static int check_path_shared(struct btrfs_root *root, | ||
3667 | struct btrfs_path *path) | ||
3668 | { | ||
3669 | struct extent_buffer *eb; | ||
3670 | int level; | ||
3671 | u64 refs = 1; | ||
3672 | |||
3673 | for (level = 0; level < BTRFS_MAX_LEVEL; level++) { | ||
3674 | int ret; | ||
3675 | |||
3676 | if (!path->nodes[level]) | ||
3677 | break; | ||
3678 | eb = path->nodes[level]; | ||
3679 | if (!btrfs_block_can_be_shared(root, eb)) | ||
3680 | continue; | ||
3681 | ret = btrfs_lookup_extent_info(NULL, root, eb->start, level, 1, | ||
3682 | &refs, NULL); | ||
3683 | if (refs > 1) | ||
3684 | return 1; | ||
3685 | } | ||
3686 | return 0; | ||
3687 | } | ||
3688 | 3712 | ||
3689 | /* | 3713 | /* |
3690 | * helper to start transaction for unlink and rmdir. | 3714 | * helper to start transaction for unlink and rmdir. |
3691 | * | 3715 | * |
3692 | * unlink and rmdir are special in btrfs, they do not always free space. | 3716 | * unlink and rmdir are special in btrfs, they do not always free space, so |
3693 | * so in enospc case, we should make sure they will free space before | 3717 | * if we cannot make our reservations the normal way try and see if there is |
3694 | * allowing them to use the global metadata reservation. | 3718 | * plenty of slack room in the global reserve to migrate, otherwise we cannot |
3719 | * allow the unlink to occur. | ||
3695 | */ | 3720 | */ |
3696 | static struct btrfs_trans_handle *__unlink_start_trans(struct inode *dir, | 3721 | static struct btrfs_trans_handle *__unlink_start_trans(struct inode *dir) |
3697 | struct dentry *dentry) | ||
3698 | { | 3722 | { |
3699 | struct btrfs_trans_handle *trans; | 3723 | struct btrfs_trans_handle *trans; |
3700 | struct btrfs_root *root = BTRFS_I(dir)->root; | 3724 | struct btrfs_root *root = BTRFS_I(dir)->root; |
3701 | struct btrfs_path *path; | ||
3702 | struct btrfs_dir_item *di; | ||
3703 | struct inode *inode = dentry->d_inode; | ||
3704 | u64 index; | ||
3705 | int check_link = 1; | ||
3706 | int err = -ENOSPC; | ||
3707 | int ret; | 3725 | int ret; |
3708 | u64 ino = btrfs_ino(inode); | ||
3709 | u64 dir_ino = btrfs_ino(dir); | ||
3710 | 3726 | ||
3711 | /* | 3727 | /* |
3712 | * 1 for the possible orphan item | 3728 | * 1 for the possible orphan item |
@@ -3719,158 +3735,23 @@ static struct btrfs_trans_handle *__unlink_start_trans(struct inode *dir, | |||
3719 | if (!IS_ERR(trans) || PTR_ERR(trans) != -ENOSPC) | 3735 | if (!IS_ERR(trans) || PTR_ERR(trans) != -ENOSPC) |
3720 | return trans; | 3736 | return trans; |
3721 | 3737 | ||
3722 | if (ino == BTRFS_EMPTY_SUBVOL_DIR_OBJECTID) | 3738 | if (PTR_ERR(trans) == -ENOSPC) { |
3723 | return ERR_PTR(-ENOSPC); | 3739 | u64 num_bytes = btrfs_calc_trans_metadata_size(root, 5); |
3724 | 3740 | ||
3725 | /* check if there is someone else holds reference */ | 3741 | trans = btrfs_start_transaction(root, 0); |
3726 | if (S_ISDIR(inode->i_mode) && atomic_read(&inode->i_count) > 1) | 3742 | if (IS_ERR(trans)) |
3727 | return ERR_PTR(-ENOSPC); | 3743 | return trans; |
3728 | 3744 | ret = btrfs_cond_migrate_bytes(root->fs_info, | |
3729 | if (atomic_read(&inode->i_count) > 2) | 3745 | &root->fs_info->trans_block_rsv, |
3730 | return ERR_PTR(-ENOSPC); | 3746 | num_bytes, 5); |
3731 | 3747 | if (ret) { | |
3732 | if (xchg(&root->fs_info->enospc_unlink, 1)) | 3748 | btrfs_end_transaction(trans, root); |
3733 | return ERR_PTR(-ENOSPC); | 3749 | return ERR_PTR(ret); |
3734 | |||
3735 | path = btrfs_alloc_path(); | ||
3736 | if (!path) { | ||
3737 | root->fs_info->enospc_unlink = 0; | ||
3738 | return ERR_PTR(-ENOMEM); | ||
3739 | } | ||
3740 | |||
3741 | /* 1 for the orphan item */ | ||
3742 | trans = btrfs_start_transaction(root, 1); | ||
3743 | if (IS_ERR(trans)) { | ||
3744 | btrfs_free_path(path); | ||
3745 | root->fs_info->enospc_unlink = 0; | ||
3746 | return trans; | ||
3747 | } | ||
3748 | |||
3749 | path->skip_locking = 1; | ||
3750 | path->search_commit_root = 1; | ||
3751 | |||
3752 | ret = btrfs_lookup_inode(trans, root, path, | ||
3753 | &BTRFS_I(dir)->location, 0); | ||
3754 | if (ret < 0) { | ||
3755 | err = ret; | ||
3756 | goto out; | ||
3757 | } | ||
3758 | if (ret == 0) { | ||
3759 | if (check_path_shared(root, path)) | ||
3760 | goto out; | ||
3761 | } else { | ||
3762 | check_link = 0; | ||
3763 | } | ||
3764 | btrfs_release_path(path); | ||
3765 | |||
3766 | ret = btrfs_lookup_inode(trans, root, path, | ||
3767 | &BTRFS_I(inode)->location, 0); | ||
3768 | if (ret < 0) { | ||
3769 | err = ret; | ||
3770 | goto out; | ||
3771 | } | ||
3772 | if (ret == 0) { | ||
3773 | if (check_path_shared(root, path)) | ||
3774 | goto out; | ||
3775 | } else { | ||
3776 | check_link = 0; | ||
3777 | } | ||
3778 | btrfs_release_path(path); | ||
3779 | |||
3780 | if (ret == 0 && S_ISREG(inode->i_mode)) { | ||
3781 | ret = btrfs_lookup_file_extent(trans, root, path, | ||
3782 | ino, (u64)-1, 0); | ||
3783 | if (ret < 0) { | ||
3784 | err = ret; | ||
3785 | goto out; | ||
3786 | } | 3750 | } |
3787 | BUG_ON(ret == 0); /* Corruption */ | ||
3788 | if (check_path_shared(root, path)) | ||
3789 | goto out; | ||
3790 | btrfs_release_path(path); | ||
3791 | } | ||
3792 | |||
3793 | if (!check_link) { | ||
3794 | err = 0; | ||
3795 | goto out; | ||
3796 | } | ||
3797 | |||
3798 | di = btrfs_lookup_dir_item(trans, root, path, dir_ino, | ||
3799 | dentry->d_name.name, dentry->d_name.len, 0); | ||
3800 | if (IS_ERR(di)) { | ||
3801 | err = PTR_ERR(di); | ||
3802 | goto out; | ||
3803 | } | ||
3804 | if (di) { | ||
3805 | if (check_path_shared(root, path)) | ||
3806 | goto out; | ||
3807 | } else { | ||
3808 | err = 0; | ||
3809 | goto out; | ||
3810 | } | ||
3811 | btrfs_release_path(path); | ||
3812 | |||
3813 | ret = btrfs_get_inode_ref_index(trans, root, path, dentry->d_name.name, | ||
3814 | dentry->d_name.len, ino, dir_ino, 0, | ||
3815 | &index); | ||
3816 | if (ret) { | ||
3817 | err = ret; | ||
3818 | goto out; | ||
3819 | } | ||
3820 | |||
3821 | if (check_path_shared(root, path)) | ||
3822 | goto out; | ||
3823 | |||
3824 | btrfs_release_path(path); | ||
3825 | |||
3826 | /* | ||
3827 | * This is a commit root search, if we can lookup inode item and other | ||
3828 | * relative items in the commit root, it means the transaction of | ||
3829 | * dir/file creation has been committed, and the dir index item that we | ||
3830 | * delay to insert has also been inserted into the commit root. So | ||
3831 | * we needn't worry about the delayed insertion of the dir index item | ||
3832 | * here. | ||
3833 | */ | ||
3834 | di = btrfs_lookup_dir_index_item(trans, root, path, dir_ino, index, | ||
3835 | dentry->d_name.name, dentry->d_name.len, 0); | ||
3836 | if (IS_ERR(di)) { | ||
3837 | err = PTR_ERR(di); | ||
3838 | goto out; | ||
3839 | } | ||
3840 | BUG_ON(ret == -ENOENT); | ||
3841 | if (check_path_shared(root, path)) | ||
3842 | goto out; | ||
3843 | |||
3844 | err = 0; | ||
3845 | out: | ||
3846 | btrfs_free_path(path); | ||
3847 | /* Migrate the orphan reservation over */ | ||
3848 | if (!err) | ||
3849 | err = btrfs_block_rsv_migrate(trans->block_rsv, | ||
3850 | &root->fs_info->global_block_rsv, | ||
3851 | trans->bytes_reserved); | ||
3852 | |||
3853 | if (err) { | ||
3854 | btrfs_end_transaction(trans, root); | ||
3855 | root->fs_info->enospc_unlink = 0; | ||
3856 | return ERR_PTR(err); | ||
3857 | } | ||
3858 | |||
3859 | trans->block_rsv = &root->fs_info->global_block_rsv; | ||
3860 | return trans; | ||
3861 | } | ||
3862 | |||
3863 | static void __unlink_end_trans(struct btrfs_trans_handle *trans, | ||
3864 | struct btrfs_root *root) | ||
3865 | { | ||
3866 | if (trans->block_rsv->type == BTRFS_BLOCK_RSV_GLOBAL) { | ||
3867 | btrfs_block_rsv_release(root, trans->block_rsv, | ||
3868 | trans->bytes_reserved); | ||
3869 | trans->block_rsv = &root->fs_info->trans_block_rsv; | 3751 | trans->block_rsv = &root->fs_info->trans_block_rsv; |
3870 | BUG_ON(!root->fs_info->enospc_unlink); | 3752 | trans->bytes_reserved = num_bytes; |
3871 | root->fs_info->enospc_unlink = 0; | ||
3872 | } | 3753 | } |
3873 | btrfs_end_transaction(trans, root); | 3754 | return trans; |
3874 | } | 3755 | } |
3875 | 3756 | ||
3876 | static int btrfs_unlink(struct inode *dir, struct dentry *dentry) | 3757 | static int btrfs_unlink(struct inode *dir, struct dentry *dentry) |
@@ -3880,7 +3761,7 @@ static int btrfs_unlink(struct inode *dir, struct dentry *dentry) | |||
3880 | struct inode *inode = dentry->d_inode; | 3761 | struct inode *inode = dentry->d_inode; |
3881 | int ret; | 3762 | int ret; |
3882 | 3763 | ||
3883 | trans = __unlink_start_trans(dir, dentry); | 3764 | trans = __unlink_start_trans(dir); |
3884 | if (IS_ERR(trans)) | 3765 | if (IS_ERR(trans)) |
3885 | return PTR_ERR(trans); | 3766 | return PTR_ERR(trans); |
3886 | 3767 | ||
@@ -3898,7 +3779,7 @@ static int btrfs_unlink(struct inode *dir, struct dentry *dentry) | |||
3898 | } | 3779 | } |
3899 | 3780 | ||
3900 | out: | 3781 | out: |
3901 | __unlink_end_trans(trans, root); | 3782 | btrfs_end_transaction(trans, root); |
3902 | btrfs_btree_balance_dirty(root); | 3783 | btrfs_btree_balance_dirty(root); |
3903 | return ret; | 3784 | return ret; |
3904 | } | 3785 | } |
@@ -3995,7 +3876,7 @@ static int btrfs_rmdir(struct inode *dir, struct dentry *dentry) | |||
3995 | if (btrfs_ino(inode) == BTRFS_FIRST_FREE_OBJECTID) | 3876 | if (btrfs_ino(inode) == BTRFS_FIRST_FREE_OBJECTID) |
3996 | return -EPERM; | 3877 | return -EPERM; |
3997 | 3878 | ||
3998 | trans = __unlink_start_trans(dir, dentry); | 3879 | trans = __unlink_start_trans(dir); |
3999 | if (IS_ERR(trans)) | 3880 | if (IS_ERR(trans)) |
4000 | return PTR_ERR(trans); | 3881 | return PTR_ERR(trans); |
4001 | 3882 | ||
@@ -4017,7 +3898,7 @@ static int btrfs_rmdir(struct inode *dir, struct dentry *dentry) | |||
4017 | if (!err) | 3898 | if (!err) |
4018 | btrfs_i_size_write(inode, 0); | 3899 | btrfs_i_size_write(inode, 0); |
4019 | out: | 3900 | out: |
4020 | __unlink_end_trans(trans, root); | 3901 | btrfs_end_transaction(trans, root); |
4021 | btrfs_btree_balance_dirty(root); | 3902 | btrfs_btree_balance_dirty(root); |
4022 | 3903 | ||
4023 | return err; | 3904 | return err; |
@@ -4395,6 +4276,15 @@ int btrfs_cont_expand(struct inode *inode, loff_t oldsize, loff_t size) | |||
4395 | u64 hole_size; | 4276 | u64 hole_size; |
4396 | int err = 0; | 4277 | int err = 0; |
4397 | 4278 | ||
4279 | /* | ||
4280 | * If our size started in the middle of a page we need to zero out the | ||
4281 | * rest of the page before we expand the i_size, otherwise we could | ||
4282 | * expose stale data. | ||
4283 | */ | ||
4284 | err = btrfs_truncate_page(inode, oldsize, 0, 0); | ||
4285 | if (err) | ||
4286 | return err; | ||
4287 | |||
4398 | if (size <= hole_start) | 4288 | if (size <= hole_start) |
4399 | return 0; | 4289 | return 0; |
4400 | 4290 | ||
@@ -4509,9 +4399,6 @@ static int btrfs_setsize(struct inode *inode, struct iattr *attr) | |||
4509 | int mask = attr->ia_valid; | 4399 | int mask = attr->ia_valid; |
4510 | int ret; | 4400 | int ret; |
4511 | 4401 | ||
4512 | if (newsize == oldsize) | ||
4513 | return 0; | ||
4514 | |||
4515 | /* | 4402 | /* |
4516 | * The regular truncate() case without ATTR_CTIME and ATTR_MTIME is a | 4403 | * The regular truncate() case without ATTR_CTIME and ATTR_MTIME is a |
4517 | * special case where we need to update the times despite not having | 4404 | * special case where we need to update the times despite not having |
@@ -4822,11 +4709,6 @@ static int fixup_tree_root_location(struct btrfs_root *root, | |||
4822 | goto out; | 4709 | goto out; |
4823 | } | 4710 | } |
4824 | 4711 | ||
4825 | if (btrfs_root_refs(&new_root->root_item) == 0) { | ||
4826 | err = -ENOENT; | ||
4827 | goto out; | ||
4828 | } | ||
4829 | |||
4830 | *sub_root = new_root; | 4712 | *sub_root = new_root; |
4831 | location->objectid = btrfs_root_dirid(&new_root->root_item); | 4713 | location->objectid = btrfs_root_dirid(&new_root->root_item); |
4832 | location->type = BTRFS_INODE_ITEM_KEY; | 4714 | location->type = BTRFS_INODE_ITEM_KEY; |
@@ -5092,8 +4974,10 @@ struct inode *btrfs_lookup_dentry(struct inode *dir, struct dentry *dentry) | |||
5092 | if (!(inode->i_sb->s_flags & MS_RDONLY)) | 4974 | if (!(inode->i_sb->s_flags & MS_RDONLY)) |
5093 | ret = btrfs_orphan_cleanup(sub_root); | 4975 | ret = btrfs_orphan_cleanup(sub_root); |
5094 | up_read(&root->fs_info->cleanup_work_sem); | 4976 | up_read(&root->fs_info->cleanup_work_sem); |
5095 | if (ret) | 4977 | if (ret) { |
4978 | iput(inode); | ||
5096 | inode = ERR_PTR(ret); | 4979 | inode = ERR_PTR(ret); |
4980 | } | ||
5097 | } | 4981 | } |
5098 | 4982 | ||
5099 | return inode; | 4983 | return inode; |
@@ -5137,10 +5021,9 @@ unsigned char btrfs_filetype_table[] = { | |||
5137 | DT_UNKNOWN, DT_REG, DT_DIR, DT_CHR, DT_BLK, DT_FIFO, DT_SOCK, DT_LNK | 5021 | DT_UNKNOWN, DT_REG, DT_DIR, DT_CHR, DT_BLK, DT_FIFO, DT_SOCK, DT_LNK |
5138 | }; | 5022 | }; |
5139 | 5023 | ||
5140 | static int btrfs_real_readdir(struct file *filp, void *dirent, | 5024 | static int btrfs_real_readdir(struct file *file, struct dir_context *ctx) |
5141 | filldir_t filldir) | ||
5142 | { | 5025 | { |
5143 | struct inode *inode = file_inode(filp); | 5026 | struct inode *inode = file_inode(file); |
5144 | struct btrfs_root *root = BTRFS_I(inode)->root; | 5027 | struct btrfs_root *root = BTRFS_I(inode)->root; |
5145 | struct btrfs_item *item; | 5028 | struct btrfs_item *item; |
5146 | struct btrfs_dir_item *di; | 5029 | struct btrfs_dir_item *di; |
@@ -5161,29 +5044,15 @@ static int btrfs_real_readdir(struct file *filp, void *dirent, | |||
5161 | char tmp_name[32]; | 5044 | char tmp_name[32]; |
5162 | char *name_ptr; | 5045 | char *name_ptr; |
5163 | int name_len; | 5046 | int name_len; |
5164 | int is_curr = 0; /* filp->f_pos points to the current index? */ | 5047 | int is_curr = 0; /* ctx->pos points to the current index? */ |
5165 | 5048 | ||
5166 | /* FIXME, use a real flag for deciding about the key type */ | 5049 | /* FIXME, use a real flag for deciding about the key type */ |
5167 | if (root->fs_info->tree_root == root) | 5050 | if (root->fs_info->tree_root == root) |
5168 | key_type = BTRFS_DIR_ITEM_KEY; | 5051 | key_type = BTRFS_DIR_ITEM_KEY; |
5169 | 5052 | ||
5170 | /* special case for "." */ | 5053 | if (!dir_emit_dots(file, ctx)) |
5171 | if (filp->f_pos == 0) { | 5054 | return 0; |
5172 | over = filldir(dirent, ".", 1, | 5055 | |
5173 | filp->f_pos, btrfs_ino(inode), DT_DIR); | ||
5174 | if (over) | ||
5175 | return 0; | ||
5176 | filp->f_pos = 1; | ||
5177 | } | ||
5178 | /* special case for .., just use the back ref */ | ||
5179 | if (filp->f_pos == 1) { | ||
5180 | u64 pino = parent_ino(filp->f_path.dentry); | ||
5181 | over = filldir(dirent, "..", 2, | ||
5182 | filp->f_pos, pino, DT_DIR); | ||
5183 | if (over) | ||
5184 | return 0; | ||
5185 | filp->f_pos = 2; | ||
5186 | } | ||
5187 | path = btrfs_alloc_path(); | 5056 | path = btrfs_alloc_path(); |
5188 | if (!path) | 5057 | if (!path) |
5189 | return -ENOMEM; | 5058 | return -ENOMEM; |
@@ -5197,7 +5066,7 @@ static int btrfs_real_readdir(struct file *filp, void *dirent, | |||
5197 | } | 5066 | } |
5198 | 5067 | ||
5199 | btrfs_set_key_type(&key, key_type); | 5068 | btrfs_set_key_type(&key, key_type); |
5200 | key.offset = filp->f_pos; | 5069 | key.offset = ctx->pos; |
5201 | key.objectid = btrfs_ino(inode); | 5070 | key.objectid = btrfs_ino(inode); |
5202 | 5071 | ||
5203 | ret = btrfs_search_slot(NULL, root, &key, path, 0, 0); | 5072 | ret = btrfs_search_slot(NULL, root, &key, path, 0, 0); |
@@ -5223,14 +5092,14 @@ static int btrfs_real_readdir(struct file *filp, void *dirent, | |||
5223 | break; | 5092 | break; |
5224 | if (btrfs_key_type(&found_key) != key_type) | 5093 | if (btrfs_key_type(&found_key) != key_type) |
5225 | break; | 5094 | break; |
5226 | if (found_key.offset < filp->f_pos) | 5095 | if (found_key.offset < ctx->pos) |
5227 | goto next; | 5096 | goto next; |
5228 | if (key_type == BTRFS_DIR_INDEX_KEY && | 5097 | if (key_type == BTRFS_DIR_INDEX_KEY && |
5229 | btrfs_should_delete_dir_index(&del_list, | 5098 | btrfs_should_delete_dir_index(&del_list, |
5230 | found_key.offset)) | 5099 | found_key.offset)) |
5231 | goto next; | 5100 | goto next; |
5232 | 5101 | ||
5233 | filp->f_pos = found_key.offset; | 5102 | ctx->pos = found_key.offset; |
5234 | is_curr = 1; | 5103 | is_curr = 1; |
5235 | 5104 | ||
5236 | di = btrfs_item_ptr(leaf, slot, struct btrfs_dir_item); | 5105 | di = btrfs_item_ptr(leaf, slot, struct btrfs_dir_item); |
@@ -5274,9 +5143,8 @@ static int btrfs_real_readdir(struct file *filp, void *dirent, | |||
5274 | over = 0; | 5143 | over = 0; |
5275 | goto skip; | 5144 | goto skip; |
5276 | } | 5145 | } |
5277 | over = filldir(dirent, name_ptr, name_len, | 5146 | over = !dir_emit(ctx, name_ptr, name_len, |
5278 | found_key.offset, location.objectid, | 5147 | location.objectid, d_type); |
5279 | d_type); | ||
5280 | 5148 | ||
5281 | skip: | 5149 | skip: |
5282 | if (name_ptr != tmp_name) | 5150 | if (name_ptr != tmp_name) |
@@ -5295,22 +5163,38 @@ next: | |||
5295 | 5163 | ||
5296 | if (key_type == BTRFS_DIR_INDEX_KEY) { | 5164 | if (key_type == BTRFS_DIR_INDEX_KEY) { |
5297 | if (is_curr) | 5165 | if (is_curr) |
5298 | filp->f_pos++; | 5166 | ctx->pos++; |
5299 | ret = btrfs_readdir_delayed_dir_index(filp, dirent, filldir, | 5167 | ret = btrfs_readdir_delayed_dir_index(ctx, &ins_list); |
5300 | &ins_list); | ||
5301 | if (ret) | 5168 | if (ret) |
5302 | goto nopos; | 5169 | goto nopos; |
5303 | } | 5170 | } |
5304 | 5171 | ||
5305 | /* Reached end of directory/root. Bump pos past the last item. */ | 5172 | /* Reached end of directory/root. Bump pos past the last item. */ |
5306 | if (key_type == BTRFS_DIR_INDEX_KEY) | 5173 | ctx->pos++; |
5307 | /* | 5174 | |
5308 | * 32-bit glibc will use getdents64, but then strtol - | 5175 | /* |
5309 | * so the last number we can serve is this. | 5176 | * Stop new entries from being returned after we return the last |
5310 | */ | 5177 | * entry. |
5311 | filp->f_pos = 0x7fffffff; | 5178 | * |
5312 | else | 5179 | * New directory entries are assigned a strictly increasing |
5313 | filp->f_pos++; | 5180 | * offset. This means that new entries created during readdir |
5181 | * are *guaranteed* to be seen in the future by that readdir. | ||
5182 | * This has broken buggy programs which operate on names as | ||
5183 | * they're returned by readdir. Until we re-use freed offsets | ||
5184 | * we have this hack to stop new entries from being returned | ||
5185 | * under the assumption that they'll never reach this huge | ||
5186 | * offset. | ||
5187 | * | ||
5188 | * This is being careful not to overflow 32bit loff_t unless the | ||
5189 | * last entry requires it because doing so has broken 32bit apps | ||
5190 | * in the past. | ||
5191 | */ | ||
5192 | if (key_type == BTRFS_DIR_INDEX_KEY) { | ||
5193 | if (ctx->pos >= INT_MAX) | ||
5194 | ctx->pos = LLONG_MAX; | ||
5195 | else | ||
5196 | ctx->pos = INT_MAX; | ||
5197 | } | ||
5314 | nopos: | 5198 | nopos: |
5315 | ret = 0; | 5199 | ret = 0; |
5316 | err: | 5200 | err: |
@@ -6518,10 +6402,10 @@ out: | |||
6518 | * returns 1 when the nocow is safe, < 1 on error, 0 if the | 6402 | * returns 1 when the nocow is safe, < 1 on error, 0 if the |
6519 | * block must be cow'd | 6403 | * block must be cow'd |
6520 | */ | 6404 | */ |
6521 | static noinline int can_nocow_odirect(struct btrfs_trans_handle *trans, | 6405 | noinline int can_nocow_extent(struct btrfs_trans_handle *trans, |
6522 | struct inode *inode, u64 offset, u64 *len, | 6406 | struct inode *inode, u64 offset, u64 *len, |
6523 | u64 *orig_start, u64 *orig_block_len, | 6407 | u64 *orig_start, u64 *orig_block_len, |
6524 | u64 *ram_bytes) | 6408 | u64 *ram_bytes) |
6525 | { | 6409 | { |
6526 | struct btrfs_path *path; | 6410 | struct btrfs_path *path; |
6527 | int ret; | 6411 | int ret; |
@@ -6535,7 +6419,7 @@ static noinline int can_nocow_odirect(struct btrfs_trans_handle *trans, | |||
6535 | u64 num_bytes; | 6419 | u64 num_bytes; |
6536 | int slot; | 6420 | int slot; |
6537 | int found_type; | 6421 | int found_type; |
6538 | 6422 | bool nocow = (BTRFS_I(inode)->flags & BTRFS_INODE_NODATACOW); | |
6539 | path = btrfs_alloc_path(); | 6423 | path = btrfs_alloc_path(); |
6540 | if (!path) | 6424 | if (!path) |
6541 | return -ENOMEM; | 6425 | return -ENOMEM; |
@@ -6575,18 +6459,28 @@ static noinline int can_nocow_odirect(struct btrfs_trans_handle *trans, | |||
6575 | /* not a regular extent, must cow */ | 6459 | /* not a regular extent, must cow */ |
6576 | goto out; | 6460 | goto out; |
6577 | } | 6461 | } |
6462 | |||
6463 | if (!nocow && found_type == BTRFS_FILE_EXTENT_REG) | ||
6464 | goto out; | ||
6465 | |||
6578 | disk_bytenr = btrfs_file_extent_disk_bytenr(leaf, fi); | 6466 | disk_bytenr = btrfs_file_extent_disk_bytenr(leaf, fi); |
6467 | if (disk_bytenr == 0) | ||
6468 | goto out; | ||
6469 | |||
6470 | if (btrfs_file_extent_compression(leaf, fi) || | ||
6471 | btrfs_file_extent_encryption(leaf, fi) || | ||
6472 | btrfs_file_extent_other_encoding(leaf, fi)) | ||
6473 | goto out; | ||
6474 | |||
6579 | backref_offset = btrfs_file_extent_offset(leaf, fi); | 6475 | backref_offset = btrfs_file_extent_offset(leaf, fi); |
6580 | 6476 | ||
6581 | *orig_start = key.offset - backref_offset; | 6477 | if (orig_start) { |
6582 | *orig_block_len = btrfs_file_extent_disk_num_bytes(leaf, fi); | 6478 | *orig_start = key.offset - backref_offset; |
6583 | *ram_bytes = btrfs_file_extent_ram_bytes(leaf, fi); | 6479 | *orig_block_len = btrfs_file_extent_disk_num_bytes(leaf, fi); |
6480 | *ram_bytes = btrfs_file_extent_ram_bytes(leaf, fi); | ||
6481 | } | ||
6584 | 6482 | ||
6585 | extent_end = key.offset + btrfs_file_extent_num_bytes(leaf, fi); | 6483 | extent_end = key.offset + btrfs_file_extent_num_bytes(leaf, fi); |
6586 | if (extent_end < offset + *len) { | ||
6587 | /* extent doesn't include our full range, must cow */ | ||
6588 | goto out; | ||
6589 | } | ||
6590 | 6484 | ||
6591 | if (btrfs_extent_readonly(root, disk_bytenr)) | 6485 | if (btrfs_extent_readonly(root, disk_bytenr)) |
6592 | goto out; | 6486 | goto out; |
@@ -6830,8 +6724,8 @@ static int btrfs_get_blocks_direct(struct inode *inode, sector_t iblock, | |||
6830 | if (IS_ERR(trans)) | 6724 | if (IS_ERR(trans)) |
6831 | goto must_cow; | 6725 | goto must_cow; |
6832 | 6726 | ||
6833 | if (can_nocow_odirect(trans, inode, start, &len, &orig_start, | 6727 | if (can_nocow_extent(trans, inode, start, &len, &orig_start, |
6834 | &orig_block_len, &ram_bytes) == 1) { | 6728 | &orig_block_len, &ram_bytes) == 1) { |
6835 | if (type == BTRFS_ORDERED_PREALLOC) { | 6729 | if (type == BTRFS_ORDERED_PREALLOC) { |
6836 | free_extent_map(em); | 6730 | free_extent_map(em); |
6837 | em = create_pinned_em(inode, start, len, | 6731 | em = create_pinned_em(inode, start, len, |
@@ -7260,7 +7154,6 @@ static void btrfs_submit_direct(int rw, struct bio *dio_bio, | |||
7260 | { | 7154 | { |
7261 | struct btrfs_root *root = BTRFS_I(inode)->root; | 7155 | struct btrfs_root *root = BTRFS_I(inode)->root; |
7262 | struct btrfs_dio_private *dip; | 7156 | struct btrfs_dio_private *dip; |
7263 | struct bio_vec *bvec = dio_bio->bi_io_vec; | ||
7264 | struct bio *io_bio; | 7157 | struct bio *io_bio; |
7265 | int skip_sum; | 7158 | int skip_sum; |
7266 | int write = rw & REQ_WRITE; | 7159 | int write = rw & REQ_WRITE; |
@@ -7282,16 +7175,9 @@ static void btrfs_submit_direct(int rw, struct bio *dio_bio, | |||
7282 | } | 7175 | } |
7283 | 7176 | ||
7284 | dip->private = dio_bio->bi_private; | 7177 | dip->private = dio_bio->bi_private; |
7285 | io_bio->bi_private = dio_bio->bi_private; | ||
7286 | dip->inode = inode; | 7178 | dip->inode = inode; |
7287 | dip->logical_offset = file_offset; | 7179 | dip->logical_offset = file_offset; |
7288 | 7180 | dip->bytes = dio_bio->bi_size; | |
7289 | dip->bytes = 0; | ||
7290 | do { | ||
7291 | dip->bytes += bvec->bv_len; | ||
7292 | bvec++; | ||
7293 | } while (bvec <= (dio_bio->bi_io_vec + dio_bio->bi_vcnt - 1)); | ||
7294 | |||
7295 | dip->disk_bytenr = (u64)dio_bio->bi_sector << 9; | 7181 | dip->disk_bytenr = (u64)dio_bio->bi_sector << 9; |
7296 | io_bio->bi_private = dip; | 7182 | io_bio->bi_private = dip; |
7297 | dip->errors = 0; | 7183 | dip->errors = 0; |
@@ -7390,8 +7276,16 @@ static ssize_t btrfs_direct_IO(int rw, struct kiocb *iocb, | |||
7390 | atomic_inc(&inode->i_dio_count); | 7276 | atomic_inc(&inode->i_dio_count); |
7391 | smp_mb__after_atomic_inc(); | 7277 | smp_mb__after_atomic_inc(); |
7392 | 7278 | ||
7279 | /* | ||
7280 | * The generic stuff only does filemap_write_and_wait_range, which isn't | ||
7281 | * enough if we've written compressed pages to this area, so we need to | ||
7282 | * call btrfs_wait_ordered_range to make absolutely sure that any | ||
7283 | * outstanding dirty pages are on disk. | ||
7284 | */ | ||
7285 | count = iov_length(iov, nr_segs); | ||
7286 | btrfs_wait_ordered_range(inode, offset, count); | ||
7287 | |||
7393 | if (rw & WRITE) { | 7288 | if (rw & WRITE) { |
7394 | count = iov_length(iov, nr_segs); | ||
7395 | /* | 7289 | /* |
7396 | * If the write DIO is beyond the EOF, we need update | 7290 | * If the write DIO is beyond the EOF, we need update |
7397 | * the isize, but it is protected by i_mutex. So we can | 7291 | * the isize, but it is protected by i_mutex. So we can |
@@ -7510,7 +7404,8 @@ static int btrfs_releasepage(struct page *page, gfp_t gfp_flags) | |||
7510 | return __btrfs_releasepage(page, gfp_flags & GFP_NOFS); | 7404 | return __btrfs_releasepage(page, gfp_flags & GFP_NOFS); |
7511 | } | 7405 | } |
7512 | 7406 | ||
7513 | static void btrfs_invalidatepage(struct page *page, unsigned long offset) | 7407 | static void btrfs_invalidatepage(struct page *page, unsigned int offset, |
7408 | unsigned int length) | ||
7514 | { | 7409 | { |
7515 | struct inode *inode = page->mapping->host; | 7410 | struct inode *inode = page->mapping->host; |
7516 | struct extent_io_tree *tree; | 7411 | struct extent_io_tree *tree; |
@@ -7710,16 +7605,12 @@ static int btrfs_truncate(struct inode *inode) | |||
7710 | { | 7605 | { |
7711 | struct btrfs_root *root = BTRFS_I(inode)->root; | 7606 | struct btrfs_root *root = BTRFS_I(inode)->root; |
7712 | struct btrfs_block_rsv *rsv; | 7607 | struct btrfs_block_rsv *rsv; |
7713 | int ret; | 7608 | int ret = 0; |
7714 | int err = 0; | 7609 | int err = 0; |
7715 | struct btrfs_trans_handle *trans; | 7610 | struct btrfs_trans_handle *trans; |
7716 | u64 mask = root->sectorsize - 1; | 7611 | u64 mask = root->sectorsize - 1; |
7717 | u64 min_size = btrfs_calc_trunc_metadata_size(root, 1); | 7612 | u64 min_size = btrfs_calc_trunc_metadata_size(root, 1); |
7718 | 7613 | ||
7719 | ret = btrfs_truncate_page(inode, inode->i_size, 0, 0); | ||
7720 | if (ret) | ||
7721 | return ret; | ||
7722 | |||
7723 | btrfs_wait_ordered_range(inode, inode->i_size & (~mask), (u64)-1); | 7614 | btrfs_wait_ordered_range(inode, inode->i_size & (~mask), (u64)-1); |
7724 | btrfs_ordered_update_i_size(inode, inode->i_size, NULL); | 7615 | btrfs_ordered_update_i_size(inode, inode->i_size, NULL); |
7725 | 7616 | ||
@@ -7977,9 +7868,9 @@ void btrfs_destroy_inode(struct inode *inode) | |||
7977 | */ | 7868 | */ |
7978 | smp_mb(); | 7869 | smp_mb(); |
7979 | if (!list_empty(&BTRFS_I(inode)->ordered_operations)) { | 7870 | if (!list_empty(&BTRFS_I(inode)->ordered_operations)) { |
7980 | spin_lock(&root->fs_info->ordered_extent_lock); | 7871 | spin_lock(&root->fs_info->ordered_root_lock); |
7981 | list_del_init(&BTRFS_I(inode)->ordered_operations); | 7872 | list_del_init(&BTRFS_I(inode)->ordered_operations); |
7982 | spin_unlock(&root->fs_info->ordered_extent_lock); | 7873 | spin_unlock(&root->fs_info->ordered_root_lock); |
7983 | } | 7874 | } |
7984 | 7875 | ||
7985 | if (test_bit(BTRFS_INODE_HAS_ORPHAN_ITEM, | 7876 | if (test_bit(BTRFS_INODE_HAS_ORPHAN_ITEM, |
@@ -8349,7 +8240,7 @@ void btrfs_wait_and_free_delalloc_work(struct btrfs_delalloc_work *work) | |||
8349 | * some fairly slow code that needs optimization. This walks the list | 8240 | * some fairly slow code that needs optimization. This walks the list |
8350 | * of all the inodes with pending delalloc and forces them to disk. | 8241 | * of all the inodes with pending delalloc and forces them to disk. |
8351 | */ | 8242 | */ |
8352 | int btrfs_start_delalloc_inodes(struct btrfs_root *root, int delay_iput) | 8243 | static int __start_delalloc_inodes(struct btrfs_root *root, int delay_iput) |
8353 | { | 8244 | { |
8354 | struct btrfs_inode *binode; | 8245 | struct btrfs_inode *binode; |
8355 | struct inode *inode; | 8246 | struct inode *inode; |
@@ -8358,30 +8249,23 @@ int btrfs_start_delalloc_inodes(struct btrfs_root *root, int delay_iput) | |||
8358 | struct list_head splice; | 8249 | struct list_head splice; |
8359 | int ret = 0; | 8250 | int ret = 0; |
8360 | 8251 | ||
8361 | if (root->fs_info->sb->s_flags & MS_RDONLY) | ||
8362 | return -EROFS; | ||
8363 | |||
8364 | INIT_LIST_HEAD(&works); | 8252 | INIT_LIST_HEAD(&works); |
8365 | INIT_LIST_HEAD(&splice); | 8253 | INIT_LIST_HEAD(&splice); |
8366 | 8254 | ||
8367 | spin_lock(&root->fs_info->delalloc_lock); | 8255 | spin_lock(&root->delalloc_lock); |
8368 | list_splice_init(&root->fs_info->delalloc_inodes, &splice); | 8256 | list_splice_init(&root->delalloc_inodes, &splice); |
8369 | while (!list_empty(&splice)) { | 8257 | while (!list_empty(&splice)) { |
8370 | binode = list_entry(splice.next, struct btrfs_inode, | 8258 | binode = list_entry(splice.next, struct btrfs_inode, |
8371 | delalloc_inodes); | 8259 | delalloc_inodes); |
8372 | 8260 | ||
8373 | list_del_init(&binode->delalloc_inodes); | 8261 | list_move_tail(&binode->delalloc_inodes, |
8374 | 8262 | &root->delalloc_inodes); | |
8375 | inode = igrab(&binode->vfs_inode); | 8263 | inode = igrab(&binode->vfs_inode); |
8376 | if (!inode) { | 8264 | if (!inode) { |
8377 | clear_bit(BTRFS_INODE_IN_DELALLOC_LIST, | 8265 | cond_resched_lock(&root->delalloc_lock); |
8378 | &binode->runtime_flags); | ||
8379 | continue; | 8266 | continue; |
8380 | } | 8267 | } |
8381 | 8268 | spin_unlock(&root->delalloc_lock); | |
8382 | list_add_tail(&binode->delalloc_inodes, | ||
8383 | &root->fs_info->delalloc_inodes); | ||
8384 | spin_unlock(&root->fs_info->delalloc_lock); | ||
8385 | 8269 | ||
8386 | work = btrfs_alloc_delalloc_work(inode, 0, delay_iput); | 8270 | work = btrfs_alloc_delalloc_work(inode, 0, delay_iput); |
8387 | if (unlikely(!work)) { | 8271 | if (unlikely(!work)) { |
@@ -8393,16 +8277,39 @@ int btrfs_start_delalloc_inodes(struct btrfs_root *root, int delay_iput) | |||
8393 | &work->work); | 8277 | &work->work); |
8394 | 8278 | ||
8395 | cond_resched(); | 8279 | cond_resched(); |
8396 | spin_lock(&root->fs_info->delalloc_lock); | 8280 | spin_lock(&root->delalloc_lock); |
8397 | } | 8281 | } |
8398 | spin_unlock(&root->fs_info->delalloc_lock); | 8282 | spin_unlock(&root->delalloc_lock); |
8399 | 8283 | ||
8400 | list_for_each_entry_safe(work, next, &works, list) { | 8284 | list_for_each_entry_safe(work, next, &works, list) { |
8401 | list_del_init(&work->list); | 8285 | list_del_init(&work->list); |
8402 | btrfs_wait_and_free_delalloc_work(work); | 8286 | btrfs_wait_and_free_delalloc_work(work); |
8403 | } | 8287 | } |
8288 | return 0; | ||
8289 | out: | ||
8290 | list_for_each_entry_safe(work, next, &works, list) { | ||
8291 | list_del_init(&work->list); | ||
8292 | btrfs_wait_and_free_delalloc_work(work); | ||
8293 | } | ||
8294 | |||
8295 | if (!list_empty_careful(&splice)) { | ||
8296 | spin_lock(&root->delalloc_lock); | ||
8297 | list_splice_tail(&splice, &root->delalloc_inodes); | ||
8298 | spin_unlock(&root->delalloc_lock); | ||
8299 | } | ||
8300 | return ret; | ||
8301 | } | ||
8302 | |||
8303 | int btrfs_start_delalloc_inodes(struct btrfs_root *root, int delay_iput) | ||
8304 | { | ||
8305 | int ret; | ||
8306 | |||
8307 | if (root->fs_info->sb->s_flags & MS_RDONLY) | ||
8308 | return -EROFS; | ||
8404 | 8309 | ||
8405 | /* the filemap_flush will queue IO into the worker threads, but | 8310 | ret = __start_delalloc_inodes(root, delay_iput); |
8311 | /* | ||
8312 | * the filemap_flush will queue IO into the worker threads, but | ||
8406 | * we have to make sure the IO is actually started and that | 8313 | * we have to make sure the IO is actually started and that |
8407 | * ordered extents get created before we return | 8314 | * ordered extents get created before we return |
8408 | */ | 8315 | */ |
@@ -8414,17 +8321,55 @@ int btrfs_start_delalloc_inodes(struct btrfs_root *root, int delay_iput) | |||
8414 | atomic_read(&root->fs_info->async_delalloc_pages) == 0)); | 8321 | atomic_read(&root->fs_info->async_delalloc_pages) == 0)); |
8415 | } | 8322 | } |
8416 | atomic_dec(&root->fs_info->async_submit_draining); | 8323 | atomic_dec(&root->fs_info->async_submit_draining); |
8417 | return 0; | 8324 | return ret; |
8418 | out: | 8325 | } |
8419 | list_for_each_entry_safe(work, next, &works, list) { | 8326 | |
8420 | list_del_init(&work->list); | 8327 | int btrfs_start_all_delalloc_inodes(struct btrfs_fs_info *fs_info, |
8421 | btrfs_wait_and_free_delalloc_work(work); | 8328 | int delay_iput) |
8329 | { | ||
8330 | struct btrfs_root *root; | ||
8331 | struct list_head splice; | ||
8332 | int ret; | ||
8333 | |||
8334 | if (fs_info->sb->s_flags & MS_RDONLY) | ||
8335 | return -EROFS; | ||
8336 | |||
8337 | INIT_LIST_HEAD(&splice); | ||
8338 | |||
8339 | spin_lock(&fs_info->delalloc_root_lock); | ||
8340 | list_splice_init(&fs_info->delalloc_roots, &splice); | ||
8341 | while (!list_empty(&splice)) { | ||
8342 | root = list_first_entry(&splice, struct btrfs_root, | ||
8343 | delalloc_root); | ||
8344 | root = btrfs_grab_fs_root(root); | ||
8345 | BUG_ON(!root); | ||
8346 | list_move_tail(&root->delalloc_root, | ||
8347 | &fs_info->delalloc_roots); | ||
8348 | spin_unlock(&fs_info->delalloc_root_lock); | ||
8349 | |||
8350 | ret = __start_delalloc_inodes(root, delay_iput); | ||
8351 | btrfs_put_fs_root(root); | ||
8352 | if (ret) | ||
8353 | goto out; | ||
8354 | |||
8355 | spin_lock(&fs_info->delalloc_root_lock); | ||
8422 | } | 8356 | } |
8357 | spin_unlock(&fs_info->delalloc_root_lock); | ||
8423 | 8358 | ||
8359 | atomic_inc(&fs_info->async_submit_draining); | ||
8360 | while (atomic_read(&fs_info->nr_async_submits) || | ||
8361 | atomic_read(&fs_info->async_delalloc_pages)) { | ||
8362 | wait_event(fs_info->async_submit_wait, | ||
8363 | (atomic_read(&fs_info->nr_async_submits) == 0 && | ||
8364 | atomic_read(&fs_info->async_delalloc_pages) == 0)); | ||
8365 | } | ||
8366 | atomic_dec(&fs_info->async_submit_draining); | ||
8367 | return 0; | ||
8368 | out: | ||
8424 | if (!list_empty_careful(&splice)) { | 8369 | if (!list_empty_careful(&splice)) { |
8425 | spin_lock(&root->fs_info->delalloc_lock); | 8370 | spin_lock(&fs_info->delalloc_root_lock); |
8426 | list_splice_tail(&splice, &root->fs_info->delalloc_inodes); | 8371 | list_splice_tail(&splice, &fs_info->delalloc_roots); |
8427 | spin_unlock(&root->fs_info->delalloc_lock); | 8372 | spin_unlock(&fs_info->delalloc_root_lock); |
8428 | } | 8373 | } |
8429 | return ret; | 8374 | return ret; |
8430 | } | 8375 | } |
@@ -8731,7 +8676,7 @@ static const struct inode_operations btrfs_dir_ro_inode_operations = { | |||
8731 | static const struct file_operations btrfs_dir_file_operations = { | 8676 | static const struct file_operations btrfs_dir_file_operations = { |
8732 | .llseek = generic_file_llseek, | 8677 | .llseek = generic_file_llseek, |
8733 | .read = generic_read_dir, | 8678 | .read = generic_read_dir, |
8734 | .readdir = btrfs_real_readdir, | 8679 | .iterate = btrfs_real_readdir, |
8735 | .unlocked_ioctl = btrfs_ioctl, | 8680 | .unlocked_ioctl = btrfs_ioctl, |
8736 | #ifdef CONFIG_COMPAT | 8681 | #ifdef CONFIG_COMPAT |
8737 | .compat_ioctl = btrfs_ioctl, | 8682 | .compat_ioctl = btrfs_ioctl, |
diff --git a/fs/btrfs/ioctl.c b/fs/btrfs/ioctl.c index 0f81d67cdc8d..238a05545ee2 100644 --- a/fs/btrfs/ioctl.c +++ b/fs/btrfs/ioctl.c | |||
@@ -555,6 +555,12 @@ static int create_snapshot(struct btrfs_root *root, struct inode *dir, | |||
555 | if (!root->ref_cows) | 555 | if (!root->ref_cows) |
556 | return -EINVAL; | 556 | return -EINVAL; |
557 | 557 | ||
558 | ret = btrfs_start_delalloc_inodes(root, 0); | ||
559 | if (ret) | ||
560 | return ret; | ||
561 | |||
562 | btrfs_wait_ordered_extents(root, 0); | ||
563 | |||
558 | pending_snapshot = kzalloc(sizeof(*pending_snapshot), GFP_NOFS); | 564 | pending_snapshot = kzalloc(sizeof(*pending_snapshot), GFP_NOFS); |
559 | if (!pending_snapshot) | 565 | if (!pending_snapshot) |
560 | return -ENOMEM; | 566 | return -ENOMEM; |
@@ -2354,14 +2360,6 @@ static long btrfs_ioctl_rm_dev(struct file *file, void __user *arg) | |||
2354 | if (ret) | 2360 | if (ret) |
2355 | return ret; | 2361 | return ret; |
2356 | 2362 | ||
2357 | if (atomic_xchg(&root->fs_info->mutually_exclusive_operation_running, | ||
2358 | 1)) { | ||
2359 | pr_info("btrfs: dev add/delete/balance/replace/resize operation in progress\n"); | ||
2360 | mnt_drop_write_file(file); | ||
2361 | return -EINVAL; | ||
2362 | } | ||
2363 | |||
2364 | mutex_lock(&root->fs_info->volume_mutex); | ||
2365 | vol_args = memdup_user(arg, sizeof(*vol_args)); | 2363 | vol_args = memdup_user(arg, sizeof(*vol_args)); |
2366 | if (IS_ERR(vol_args)) { | 2364 | if (IS_ERR(vol_args)) { |
2367 | ret = PTR_ERR(vol_args); | 2365 | ret = PTR_ERR(vol_args); |
@@ -2369,12 +2367,20 @@ static long btrfs_ioctl_rm_dev(struct file *file, void __user *arg) | |||
2369 | } | 2367 | } |
2370 | 2368 | ||
2371 | vol_args->name[BTRFS_PATH_NAME_MAX] = '\0'; | 2369 | vol_args->name[BTRFS_PATH_NAME_MAX] = '\0'; |
2372 | ret = btrfs_rm_device(root, vol_args->name); | ||
2373 | 2370 | ||
2374 | kfree(vol_args); | 2371 | if (atomic_xchg(&root->fs_info->mutually_exclusive_operation_running, |
2375 | out: | 2372 | 1)) { |
2373 | ret = BTRFS_ERROR_DEV_EXCL_RUN_IN_PROGRESS; | ||
2374 | goto out; | ||
2375 | } | ||
2376 | |||
2377 | mutex_lock(&root->fs_info->volume_mutex); | ||
2378 | ret = btrfs_rm_device(root, vol_args->name); | ||
2376 | mutex_unlock(&root->fs_info->volume_mutex); | 2379 | mutex_unlock(&root->fs_info->volume_mutex); |
2377 | atomic_set(&root->fs_info->mutually_exclusive_operation_running, 0); | 2380 | atomic_set(&root->fs_info->mutually_exclusive_operation_running, 0); |
2381 | |||
2382 | out: | ||
2383 | kfree(vol_args); | ||
2378 | mnt_drop_write_file(file); | 2384 | mnt_drop_write_file(file); |
2379 | return ret; | 2385 | return ret; |
2380 | } | 2386 | } |
@@ -2480,6 +2486,7 @@ static noinline long btrfs_ioctl_clone(struct file *file, unsigned long srcfd, | |||
2480 | int ret; | 2486 | int ret; |
2481 | u64 len = olen; | 2487 | u64 len = olen; |
2482 | u64 bs = root->fs_info->sb->s_blocksize; | 2488 | u64 bs = root->fs_info->sb->s_blocksize; |
2489 | int same_inode = 0; | ||
2483 | 2490 | ||
2484 | /* | 2491 | /* |
2485 | * TODO: | 2492 | * TODO: |
@@ -2516,7 +2523,7 @@ static noinline long btrfs_ioctl_clone(struct file *file, unsigned long srcfd, | |||
2516 | 2523 | ||
2517 | ret = -EINVAL; | 2524 | ret = -EINVAL; |
2518 | if (src == inode) | 2525 | if (src == inode) |
2519 | goto out_fput; | 2526 | same_inode = 1; |
2520 | 2527 | ||
2521 | /* the src must be open for reading */ | 2528 | /* the src must be open for reading */ |
2522 | if (!(src_file.file->f_mode & FMODE_READ)) | 2529 | if (!(src_file.file->f_mode & FMODE_READ)) |
@@ -2547,12 +2554,16 @@ static noinline long btrfs_ioctl_clone(struct file *file, unsigned long srcfd, | |||
2547 | } | 2554 | } |
2548 | path->reada = 2; | 2555 | path->reada = 2; |
2549 | 2556 | ||
2550 | if (inode < src) { | 2557 | if (!same_inode) { |
2551 | mutex_lock_nested(&inode->i_mutex, I_MUTEX_PARENT); | 2558 | if (inode < src) { |
2552 | mutex_lock_nested(&src->i_mutex, I_MUTEX_CHILD); | 2559 | mutex_lock_nested(&inode->i_mutex, I_MUTEX_PARENT); |
2560 | mutex_lock_nested(&src->i_mutex, I_MUTEX_CHILD); | ||
2561 | } else { | ||
2562 | mutex_lock_nested(&src->i_mutex, I_MUTEX_PARENT); | ||
2563 | mutex_lock_nested(&inode->i_mutex, I_MUTEX_CHILD); | ||
2564 | } | ||
2553 | } else { | 2565 | } else { |
2554 | mutex_lock_nested(&src->i_mutex, I_MUTEX_PARENT); | 2566 | mutex_lock(&src->i_mutex); |
2555 | mutex_lock_nested(&inode->i_mutex, I_MUTEX_CHILD); | ||
2556 | } | 2567 | } |
2557 | 2568 | ||
2558 | /* determine range to clone */ | 2569 | /* determine range to clone */ |
@@ -2570,6 +2581,12 @@ static noinline long btrfs_ioctl_clone(struct file *file, unsigned long srcfd, | |||
2570 | !IS_ALIGNED(destoff, bs)) | 2581 | !IS_ALIGNED(destoff, bs)) |
2571 | goto out_unlock; | 2582 | goto out_unlock; |
2572 | 2583 | ||
2584 | /* verify if ranges are overlapped within the same file */ | ||
2585 | if (same_inode) { | ||
2586 | if (destoff + len > off && destoff < off + len) | ||
2587 | goto out_unlock; | ||
2588 | } | ||
2589 | |||
2573 | if (destoff > inode->i_size) { | 2590 | if (destoff > inode->i_size) { |
2574 | ret = btrfs_cont_expand(inode, inode->i_size, destoff); | 2591 | ret = btrfs_cont_expand(inode, inode->i_size, destoff); |
2575 | if (ret) | 2592 | if (ret) |
@@ -2846,7 +2863,8 @@ out: | |||
2846 | unlock_extent(&BTRFS_I(src)->io_tree, off, off + len - 1); | 2863 | unlock_extent(&BTRFS_I(src)->io_tree, off, off + len - 1); |
2847 | out_unlock: | 2864 | out_unlock: |
2848 | mutex_unlock(&src->i_mutex); | 2865 | mutex_unlock(&src->i_mutex); |
2849 | mutex_unlock(&inode->i_mutex); | 2866 | if (!same_inode) |
2867 | mutex_unlock(&inode->i_mutex); | ||
2850 | vfree(buf); | 2868 | vfree(buf); |
2851 | btrfs_free_path(path); | 2869 | btrfs_free_path(path); |
2852 | out_fput: | 2870 | out_fput: |
@@ -2951,11 +2969,6 @@ static long btrfs_ioctl_default_subvol(struct file *file, void __user *argp) | |||
2951 | goto out; | 2969 | goto out; |
2952 | } | 2970 | } |
2953 | 2971 | ||
2954 | if (btrfs_root_refs(&new_root->root_item) == 0) { | ||
2955 | ret = -ENOENT; | ||
2956 | goto out; | ||
2957 | } | ||
2958 | |||
2959 | path = btrfs_alloc_path(); | 2972 | path = btrfs_alloc_path(); |
2960 | if (!path) { | 2973 | if (!path) { |
2961 | ret = -ENOMEM; | 2974 | ret = -ENOMEM; |
@@ -3719,9 +3732,6 @@ static long btrfs_ioctl_quota_ctl(struct file *file, void __user *arg) | |||
3719 | break; | 3732 | break; |
3720 | } | 3733 | } |
3721 | 3734 | ||
3722 | if (copy_to_user(arg, sa, sizeof(*sa))) | ||
3723 | ret = -EFAULT; | ||
3724 | |||
3725 | err = btrfs_commit_transaction(trans, root->fs_info->tree_root); | 3735 | err = btrfs_commit_transaction(trans, root->fs_info->tree_root); |
3726 | if (err && !ret) | 3736 | if (err && !ret) |
3727 | ret = err; | 3737 | ret = err; |
@@ -3881,7 +3891,7 @@ drop_write: | |||
3881 | 3891 | ||
3882 | static long btrfs_ioctl_quota_rescan(struct file *file, void __user *arg) | 3892 | static long btrfs_ioctl_quota_rescan(struct file *file, void __user *arg) |
3883 | { | 3893 | { |
3884 | struct btrfs_root *root = BTRFS_I(fdentry(file)->d_inode)->root; | 3894 | struct btrfs_root *root = BTRFS_I(file_inode(file))->root; |
3885 | struct btrfs_ioctl_quota_rescan_args *qsa; | 3895 | struct btrfs_ioctl_quota_rescan_args *qsa; |
3886 | int ret; | 3896 | int ret; |
3887 | 3897 | ||
@@ -3914,7 +3924,7 @@ drop_write: | |||
3914 | 3924 | ||
3915 | static long btrfs_ioctl_quota_rescan_status(struct file *file, void __user *arg) | 3925 | static long btrfs_ioctl_quota_rescan_status(struct file *file, void __user *arg) |
3916 | { | 3926 | { |
3917 | struct btrfs_root *root = BTRFS_I(fdentry(file)->d_inode)->root; | 3927 | struct btrfs_root *root = BTRFS_I(file_inode(file))->root; |
3918 | struct btrfs_ioctl_quota_rescan_args *qsa; | 3928 | struct btrfs_ioctl_quota_rescan_args *qsa; |
3919 | int ret = 0; | 3929 | int ret = 0; |
3920 | 3930 | ||
@@ -3937,6 +3947,16 @@ static long btrfs_ioctl_quota_rescan_status(struct file *file, void __user *arg) | |||
3937 | return ret; | 3947 | return ret; |
3938 | } | 3948 | } |
3939 | 3949 | ||
3950 | static long btrfs_ioctl_quota_rescan_wait(struct file *file, void __user *arg) | ||
3951 | { | ||
3952 | struct btrfs_root *root = BTRFS_I(fdentry(file)->d_inode)->root; | ||
3953 | |||
3954 | if (!capable(CAP_SYS_ADMIN)) | ||
3955 | return -EPERM; | ||
3956 | |||
3957 | return btrfs_qgroup_wait_for_completion(root->fs_info); | ||
3958 | } | ||
3959 | |||
3940 | static long btrfs_ioctl_set_received_subvol(struct file *file, | 3960 | static long btrfs_ioctl_set_received_subvol(struct file *file, |
3941 | void __user *arg) | 3961 | void __user *arg) |
3942 | { | 3962 | { |
@@ -4020,7 +4040,7 @@ out: | |||
4020 | 4040 | ||
4021 | static int btrfs_ioctl_get_fslabel(struct file *file, void __user *arg) | 4041 | static int btrfs_ioctl_get_fslabel(struct file *file, void __user *arg) |
4022 | { | 4042 | { |
4023 | struct btrfs_root *root = BTRFS_I(fdentry(file)->d_inode)->root; | 4043 | struct btrfs_root *root = BTRFS_I(file_inode(file))->root; |
4024 | const char *label = root->fs_info->super_copy->label; | 4044 | const char *label = root->fs_info->super_copy->label; |
4025 | size_t len = strnlen(label, BTRFS_LABEL_SIZE); | 4045 | size_t len = strnlen(label, BTRFS_LABEL_SIZE); |
4026 | int ret; | 4046 | int ret; |
@@ -4039,7 +4059,7 @@ static int btrfs_ioctl_get_fslabel(struct file *file, void __user *arg) | |||
4039 | 4059 | ||
4040 | static int btrfs_ioctl_set_fslabel(struct file *file, void __user *arg) | 4060 | static int btrfs_ioctl_set_fslabel(struct file *file, void __user *arg) |
4041 | { | 4061 | { |
4042 | struct btrfs_root *root = BTRFS_I(fdentry(file)->d_inode)->root; | 4062 | struct btrfs_root *root = BTRFS_I(file_inode(file))->root; |
4043 | struct btrfs_super_block *super_block = root->fs_info->super_copy; | 4063 | struct btrfs_super_block *super_block = root->fs_info->super_copy; |
4044 | struct btrfs_trans_handle *trans; | 4064 | struct btrfs_trans_handle *trans; |
4045 | char label[BTRFS_LABEL_SIZE]; | 4065 | char label[BTRFS_LABEL_SIZE]; |
@@ -4179,6 +4199,8 @@ long btrfs_ioctl(struct file *file, unsigned int | |||
4179 | return btrfs_ioctl_quota_rescan(file, argp); | 4199 | return btrfs_ioctl_quota_rescan(file, argp); |
4180 | case BTRFS_IOC_QUOTA_RESCAN_STATUS: | 4200 | case BTRFS_IOC_QUOTA_RESCAN_STATUS: |
4181 | return btrfs_ioctl_quota_rescan_status(file, argp); | 4201 | return btrfs_ioctl_quota_rescan_status(file, argp); |
4202 | case BTRFS_IOC_QUOTA_RESCAN_WAIT: | ||
4203 | return btrfs_ioctl_quota_rescan_wait(file, argp); | ||
4182 | case BTRFS_IOC_DEV_REPLACE: | 4204 | case BTRFS_IOC_DEV_REPLACE: |
4183 | return btrfs_ioctl_dev_replace(root, argp); | 4205 | return btrfs_ioctl_dev_replace(root, argp); |
4184 | case BTRFS_IOC_GET_FSLABEL: | 4206 | case BTRFS_IOC_GET_FSLABEL: |
diff --git a/fs/btrfs/lzo.c b/fs/btrfs/lzo.c index 743b86fa4fcb..f93151a98886 100644 --- a/fs/btrfs/lzo.c +++ b/fs/btrfs/lzo.c | |||
@@ -31,8 +31,8 @@ | |||
31 | 31 | ||
32 | struct workspace { | 32 | struct workspace { |
33 | void *mem; | 33 | void *mem; |
34 | void *buf; /* where compressed data goes */ | 34 | void *buf; /* where decompressed data goes */ |
35 | void *cbuf; /* where decompressed data goes */ | 35 | void *cbuf; /* where compressed data goes */ |
36 | struct list_head list; | 36 | struct list_head list; |
37 | }; | 37 | }; |
38 | 38 | ||
diff --git a/fs/btrfs/ordered-data.c b/fs/btrfs/ordered-data.c index 1ddd728541ee..81369827e514 100644 --- a/fs/btrfs/ordered-data.c +++ b/fs/btrfs/ordered-data.c | |||
@@ -24,6 +24,7 @@ | |||
24 | #include "transaction.h" | 24 | #include "transaction.h" |
25 | #include "btrfs_inode.h" | 25 | #include "btrfs_inode.h" |
26 | #include "extent_io.h" | 26 | #include "extent_io.h" |
27 | #include "disk-io.h" | ||
27 | 28 | ||
28 | static struct kmem_cache *btrfs_ordered_extent_cache; | 29 | static struct kmem_cache *btrfs_ordered_extent_cache; |
29 | 30 | ||
@@ -184,6 +185,7 @@ static int __btrfs_add_ordered_extent(struct inode *inode, u64 file_offset, | |||
184 | u64 start, u64 len, u64 disk_len, | 185 | u64 start, u64 len, u64 disk_len, |
185 | int type, int dio, int compress_type) | 186 | int type, int dio, int compress_type) |
186 | { | 187 | { |
188 | struct btrfs_root *root = BTRFS_I(inode)->root; | ||
187 | struct btrfs_ordered_inode_tree *tree; | 189 | struct btrfs_ordered_inode_tree *tree; |
188 | struct rb_node *node; | 190 | struct rb_node *node; |
189 | struct btrfs_ordered_extent *entry; | 191 | struct btrfs_ordered_extent *entry; |
@@ -227,10 +229,18 @@ static int __btrfs_add_ordered_extent(struct inode *inode, u64 file_offset, | |||
227 | ordered_data_tree_panic(inode, -EEXIST, file_offset); | 229 | ordered_data_tree_panic(inode, -EEXIST, file_offset); |
228 | spin_unlock_irq(&tree->lock); | 230 | spin_unlock_irq(&tree->lock); |
229 | 231 | ||
230 | spin_lock(&BTRFS_I(inode)->root->fs_info->ordered_extent_lock); | 232 | spin_lock(&root->ordered_extent_lock); |
231 | list_add_tail(&entry->root_extent_list, | 233 | list_add_tail(&entry->root_extent_list, |
232 | &BTRFS_I(inode)->root->fs_info->ordered_extents); | 234 | &root->ordered_extents); |
233 | spin_unlock(&BTRFS_I(inode)->root->fs_info->ordered_extent_lock); | 235 | root->nr_ordered_extents++; |
236 | if (root->nr_ordered_extents == 1) { | ||
237 | spin_lock(&root->fs_info->ordered_root_lock); | ||
238 | BUG_ON(!list_empty(&root->ordered_root)); | ||
239 | list_add_tail(&root->ordered_root, | ||
240 | &root->fs_info->ordered_roots); | ||
241 | spin_unlock(&root->fs_info->ordered_root_lock); | ||
242 | } | ||
243 | spin_unlock(&root->ordered_extent_lock); | ||
234 | 244 | ||
235 | return 0; | 245 | return 0; |
236 | } | 246 | } |
@@ -516,8 +526,9 @@ void btrfs_remove_ordered_extent(struct inode *inode, | |||
516 | set_bit(BTRFS_ORDERED_COMPLETE, &entry->flags); | 526 | set_bit(BTRFS_ORDERED_COMPLETE, &entry->flags); |
517 | spin_unlock_irq(&tree->lock); | 527 | spin_unlock_irq(&tree->lock); |
518 | 528 | ||
519 | spin_lock(&root->fs_info->ordered_extent_lock); | 529 | spin_lock(&root->ordered_extent_lock); |
520 | list_del_init(&entry->root_extent_list); | 530 | list_del_init(&entry->root_extent_list); |
531 | root->nr_ordered_extents--; | ||
521 | 532 | ||
522 | trace_btrfs_ordered_extent_remove(inode, entry); | 533 | trace_btrfs_ordered_extent_remove(inode, entry); |
523 | 534 | ||
@@ -530,7 +541,14 @@ void btrfs_remove_ordered_extent(struct inode *inode, | |||
530 | !mapping_tagged(inode->i_mapping, PAGECACHE_TAG_DIRTY)) { | 541 | !mapping_tagged(inode->i_mapping, PAGECACHE_TAG_DIRTY)) { |
531 | list_del_init(&BTRFS_I(inode)->ordered_operations); | 542 | list_del_init(&BTRFS_I(inode)->ordered_operations); |
532 | } | 543 | } |
533 | spin_unlock(&root->fs_info->ordered_extent_lock); | 544 | |
545 | if (!root->nr_ordered_extents) { | ||
546 | spin_lock(&root->fs_info->ordered_root_lock); | ||
547 | BUG_ON(list_empty(&root->ordered_root)); | ||
548 | list_del_init(&root->ordered_root); | ||
549 | spin_unlock(&root->fs_info->ordered_root_lock); | ||
550 | } | ||
551 | spin_unlock(&root->ordered_extent_lock); | ||
534 | wake_up(&entry->wait); | 552 | wake_up(&entry->wait); |
535 | } | 553 | } |
536 | 554 | ||
@@ -550,7 +568,6 @@ static void btrfs_run_ordered_extent_work(struct btrfs_work *work) | |||
550 | void btrfs_wait_ordered_extents(struct btrfs_root *root, int delay_iput) | 568 | void btrfs_wait_ordered_extents(struct btrfs_root *root, int delay_iput) |
551 | { | 569 | { |
552 | struct list_head splice, works; | 570 | struct list_head splice, works; |
553 | struct list_head *cur; | ||
554 | struct btrfs_ordered_extent *ordered, *next; | 571 | struct btrfs_ordered_extent *ordered, *next; |
555 | struct inode *inode; | 572 | struct inode *inode; |
556 | 573 | ||
@@ -558,35 +575,34 @@ void btrfs_wait_ordered_extents(struct btrfs_root *root, int delay_iput) | |||
558 | INIT_LIST_HEAD(&works); | 575 | INIT_LIST_HEAD(&works); |
559 | 576 | ||
560 | mutex_lock(&root->fs_info->ordered_operations_mutex); | 577 | mutex_lock(&root->fs_info->ordered_operations_mutex); |
561 | spin_lock(&root->fs_info->ordered_extent_lock); | 578 | spin_lock(&root->ordered_extent_lock); |
562 | list_splice_init(&root->fs_info->ordered_extents, &splice); | 579 | list_splice_init(&root->ordered_extents, &splice); |
563 | while (!list_empty(&splice)) { | 580 | while (!list_empty(&splice)) { |
564 | cur = splice.next; | 581 | ordered = list_first_entry(&splice, struct btrfs_ordered_extent, |
565 | ordered = list_entry(cur, struct btrfs_ordered_extent, | 582 | root_extent_list); |
566 | root_extent_list); | 583 | list_move_tail(&ordered->root_extent_list, |
567 | list_del_init(&ordered->root_extent_list); | 584 | &root->ordered_extents); |
568 | atomic_inc(&ordered->refs); | ||
569 | |||
570 | /* | 585 | /* |
571 | * the inode may be getting freed (in sys_unlink path). | 586 | * the inode may be getting freed (in sys_unlink path). |
572 | */ | 587 | */ |
573 | inode = igrab(ordered->inode); | 588 | inode = igrab(ordered->inode); |
589 | if (!inode) { | ||
590 | cond_resched_lock(&root->ordered_extent_lock); | ||
591 | continue; | ||
592 | } | ||
574 | 593 | ||
575 | spin_unlock(&root->fs_info->ordered_extent_lock); | 594 | atomic_inc(&ordered->refs); |
595 | spin_unlock(&root->ordered_extent_lock); | ||
576 | 596 | ||
577 | if (inode) { | 597 | ordered->flush_work.func = btrfs_run_ordered_extent_work; |
578 | ordered->flush_work.func = btrfs_run_ordered_extent_work; | 598 | list_add_tail(&ordered->work_list, &works); |
579 | list_add_tail(&ordered->work_list, &works); | 599 | btrfs_queue_worker(&root->fs_info->flush_workers, |
580 | btrfs_queue_worker(&root->fs_info->flush_workers, | 600 | &ordered->flush_work); |
581 | &ordered->flush_work); | ||
582 | } else { | ||
583 | btrfs_put_ordered_extent(ordered); | ||
584 | } | ||
585 | 601 | ||
586 | cond_resched(); | 602 | cond_resched(); |
587 | spin_lock(&root->fs_info->ordered_extent_lock); | 603 | spin_lock(&root->ordered_extent_lock); |
588 | } | 604 | } |
589 | spin_unlock(&root->fs_info->ordered_extent_lock); | 605 | spin_unlock(&root->ordered_extent_lock); |
590 | 606 | ||
591 | list_for_each_entry_safe(ordered, next, &works, work_list) { | 607 | list_for_each_entry_safe(ordered, next, &works, work_list) { |
592 | list_del_init(&ordered->work_list); | 608 | list_del_init(&ordered->work_list); |
@@ -604,6 +620,33 @@ void btrfs_wait_ordered_extents(struct btrfs_root *root, int delay_iput) | |||
604 | mutex_unlock(&root->fs_info->ordered_operations_mutex); | 620 | mutex_unlock(&root->fs_info->ordered_operations_mutex); |
605 | } | 621 | } |
606 | 622 | ||
623 | void btrfs_wait_all_ordered_extents(struct btrfs_fs_info *fs_info, | ||
624 | int delay_iput) | ||
625 | { | ||
626 | struct btrfs_root *root; | ||
627 | struct list_head splice; | ||
628 | |||
629 | INIT_LIST_HEAD(&splice); | ||
630 | |||
631 | spin_lock(&fs_info->ordered_root_lock); | ||
632 | list_splice_init(&fs_info->ordered_roots, &splice); | ||
633 | while (!list_empty(&splice)) { | ||
634 | root = list_first_entry(&splice, struct btrfs_root, | ||
635 | ordered_root); | ||
636 | root = btrfs_grab_fs_root(root); | ||
637 | BUG_ON(!root); | ||
638 | list_move_tail(&root->ordered_root, | ||
639 | &fs_info->ordered_roots); | ||
640 | spin_unlock(&fs_info->ordered_root_lock); | ||
641 | |||
642 | btrfs_wait_ordered_extents(root, delay_iput); | ||
643 | btrfs_put_fs_root(root); | ||
644 | |||
645 | spin_lock(&fs_info->ordered_root_lock); | ||
646 | } | ||
647 | spin_unlock(&fs_info->ordered_root_lock); | ||
648 | } | ||
649 | |||
607 | /* | 650 | /* |
608 | * this is used during transaction commit to write all the inodes | 651 | * this is used during transaction commit to write all the inodes |
609 | * added to the ordered operation list. These files must be fully on | 652 | * added to the ordered operation list. These files must be fully on |
@@ -629,7 +672,7 @@ int btrfs_run_ordered_operations(struct btrfs_trans_handle *trans, | |||
629 | INIT_LIST_HEAD(&works); | 672 | INIT_LIST_HEAD(&works); |
630 | 673 | ||
631 | mutex_lock(&root->fs_info->ordered_operations_mutex); | 674 | mutex_lock(&root->fs_info->ordered_operations_mutex); |
632 | spin_lock(&root->fs_info->ordered_extent_lock); | 675 | spin_lock(&root->fs_info->ordered_root_lock); |
633 | list_splice_init(&cur_trans->ordered_operations, &splice); | 676 | list_splice_init(&cur_trans->ordered_operations, &splice); |
634 | while (!list_empty(&splice)) { | 677 | while (!list_empty(&splice)) { |
635 | btrfs_inode = list_entry(splice.next, struct btrfs_inode, | 678 | btrfs_inode = list_entry(splice.next, struct btrfs_inode, |
@@ -648,17 +691,17 @@ int btrfs_run_ordered_operations(struct btrfs_trans_handle *trans, | |||
648 | if (!wait) | 691 | if (!wait) |
649 | list_add_tail(&BTRFS_I(inode)->ordered_operations, | 692 | list_add_tail(&BTRFS_I(inode)->ordered_operations, |
650 | &cur_trans->ordered_operations); | 693 | &cur_trans->ordered_operations); |
651 | spin_unlock(&root->fs_info->ordered_extent_lock); | 694 | spin_unlock(&root->fs_info->ordered_root_lock); |
652 | 695 | ||
653 | work = btrfs_alloc_delalloc_work(inode, wait, 1); | 696 | work = btrfs_alloc_delalloc_work(inode, wait, 1); |
654 | if (!work) { | 697 | if (!work) { |
655 | spin_lock(&root->fs_info->ordered_extent_lock); | 698 | spin_lock(&root->fs_info->ordered_root_lock); |
656 | if (list_empty(&BTRFS_I(inode)->ordered_operations)) | 699 | if (list_empty(&BTRFS_I(inode)->ordered_operations)) |
657 | list_add_tail(&btrfs_inode->ordered_operations, | 700 | list_add_tail(&btrfs_inode->ordered_operations, |
658 | &splice); | 701 | &splice); |
659 | list_splice_tail(&splice, | 702 | list_splice_tail(&splice, |
660 | &cur_trans->ordered_operations); | 703 | &cur_trans->ordered_operations); |
661 | spin_unlock(&root->fs_info->ordered_extent_lock); | 704 | spin_unlock(&root->fs_info->ordered_root_lock); |
662 | ret = -ENOMEM; | 705 | ret = -ENOMEM; |
663 | goto out; | 706 | goto out; |
664 | } | 707 | } |
@@ -667,9 +710,9 @@ int btrfs_run_ordered_operations(struct btrfs_trans_handle *trans, | |||
667 | &work->work); | 710 | &work->work); |
668 | 711 | ||
669 | cond_resched(); | 712 | cond_resched(); |
670 | spin_lock(&root->fs_info->ordered_extent_lock); | 713 | spin_lock(&root->fs_info->ordered_root_lock); |
671 | } | 714 | } |
672 | spin_unlock(&root->fs_info->ordered_extent_lock); | 715 | spin_unlock(&root->fs_info->ordered_root_lock); |
673 | out: | 716 | out: |
674 | list_for_each_entry_safe(work, next, &works, list) { | 717 | list_for_each_entry_safe(work, next, &works, list) { |
675 | list_del_init(&work->list); | 718 | list_del_init(&work->list); |
@@ -989,7 +1032,6 @@ int btrfs_find_ordered_sum(struct inode *inode, u64 offset, u64 disk_bytenr, | |||
989 | u32 *sum, int len) | 1032 | u32 *sum, int len) |
990 | { | 1033 | { |
991 | struct btrfs_ordered_sum *ordered_sum; | 1034 | struct btrfs_ordered_sum *ordered_sum; |
992 | struct btrfs_sector_sum *sector_sums; | ||
993 | struct btrfs_ordered_extent *ordered; | 1035 | struct btrfs_ordered_extent *ordered; |
994 | struct btrfs_ordered_inode_tree *tree = &BTRFS_I(inode)->ordered_tree; | 1036 | struct btrfs_ordered_inode_tree *tree = &BTRFS_I(inode)->ordered_tree; |
995 | unsigned long num_sectors; | 1037 | unsigned long num_sectors; |
@@ -1007,18 +1049,16 @@ int btrfs_find_ordered_sum(struct inode *inode, u64 offset, u64 disk_bytenr, | |||
1007 | disk_bytenr < ordered_sum->bytenr + ordered_sum->len) { | 1049 | disk_bytenr < ordered_sum->bytenr + ordered_sum->len) { |
1008 | i = (disk_bytenr - ordered_sum->bytenr) >> | 1050 | i = (disk_bytenr - ordered_sum->bytenr) >> |
1009 | inode->i_sb->s_blocksize_bits; | 1051 | inode->i_sb->s_blocksize_bits; |
1010 | sector_sums = ordered_sum->sums + i; | ||
1011 | num_sectors = ordered_sum->len >> | 1052 | num_sectors = ordered_sum->len >> |
1012 | inode->i_sb->s_blocksize_bits; | 1053 | inode->i_sb->s_blocksize_bits; |
1013 | for (; i < num_sectors; i++) { | 1054 | num_sectors = min_t(int, len - index, num_sectors - i); |
1014 | if (sector_sums[i].bytenr == disk_bytenr) { | 1055 | memcpy(sum + index, ordered_sum->sums + i, |
1015 | sum[index] = sector_sums[i].sum; | 1056 | num_sectors); |
1016 | index++; | 1057 | |
1017 | if (index == len) | 1058 | index += (int)num_sectors; |
1018 | goto out; | 1059 | if (index == len) |
1019 | disk_bytenr += sectorsize; | 1060 | goto out; |
1020 | } | 1061 | disk_bytenr += num_sectors * sectorsize; |
1021 | } | ||
1022 | } | 1062 | } |
1023 | } | 1063 | } |
1024 | out: | 1064 | out: |
@@ -1055,12 +1095,12 @@ void btrfs_add_ordered_operation(struct btrfs_trans_handle *trans, | |||
1055 | if (last_mod < root->fs_info->last_trans_committed) | 1095 | if (last_mod < root->fs_info->last_trans_committed) |
1056 | return; | 1096 | return; |
1057 | 1097 | ||
1058 | spin_lock(&root->fs_info->ordered_extent_lock); | 1098 | spin_lock(&root->fs_info->ordered_root_lock); |
1059 | if (list_empty(&BTRFS_I(inode)->ordered_operations)) { | 1099 | if (list_empty(&BTRFS_I(inode)->ordered_operations)) { |
1060 | list_add_tail(&BTRFS_I(inode)->ordered_operations, | 1100 | list_add_tail(&BTRFS_I(inode)->ordered_operations, |
1061 | &cur_trans->ordered_operations); | 1101 | &cur_trans->ordered_operations); |
1062 | } | 1102 | } |
1063 | spin_unlock(&root->fs_info->ordered_extent_lock); | 1103 | spin_unlock(&root->fs_info->ordered_root_lock); |
1064 | } | 1104 | } |
1065 | 1105 | ||
1066 | int __init ordered_data_init(void) | 1106 | int __init ordered_data_init(void) |
diff --git a/fs/btrfs/ordered-data.h b/fs/btrfs/ordered-data.h index 58b0e3b0ebad..68844d59ee6f 100644 --- a/fs/btrfs/ordered-data.h +++ b/fs/btrfs/ordered-data.h | |||
@@ -26,18 +26,6 @@ struct btrfs_ordered_inode_tree { | |||
26 | struct rb_node *last; | 26 | struct rb_node *last; |
27 | }; | 27 | }; |
28 | 28 | ||
29 | /* | ||
30 | * these are used to collect checksums done just before bios submission. | ||
31 | * They are attached via a list into the ordered extent, and | ||
32 | * checksum items are inserted into the tree after all the blocks in | ||
33 | * the ordered extent are on disk | ||
34 | */ | ||
35 | struct btrfs_sector_sum { | ||
36 | /* bytenr on disk */ | ||
37 | u64 bytenr; | ||
38 | u32 sum; | ||
39 | }; | ||
40 | |||
41 | struct btrfs_ordered_sum { | 29 | struct btrfs_ordered_sum { |
42 | /* bytenr is the start of this extent on disk */ | 30 | /* bytenr is the start of this extent on disk */ |
43 | u64 bytenr; | 31 | u64 bytenr; |
@@ -45,10 +33,10 @@ struct btrfs_ordered_sum { | |||
45 | /* | 33 | /* |
46 | * this is the length in bytes covered by the sums array below. | 34 | * this is the length in bytes covered by the sums array below. |
47 | */ | 35 | */ |
48 | unsigned long len; | 36 | int len; |
49 | struct list_head list; | 37 | struct list_head list; |
50 | /* last field is a variable length array of btrfs_sector_sums */ | 38 | /* last field is a variable length array of csums */ |
51 | struct btrfs_sector_sum sums[]; | 39 | u32 sums[]; |
52 | }; | 40 | }; |
53 | 41 | ||
54 | /* | 42 | /* |
@@ -149,11 +137,8 @@ struct btrfs_ordered_extent { | |||
149 | static inline int btrfs_ordered_sum_size(struct btrfs_root *root, | 137 | static inline int btrfs_ordered_sum_size(struct btrfs_root *root, |
150 | unsigned long bytes) | 138 | unsigned long bytes) |
151 | { | 139 | { |
152 | unsigned long num_sectors = (bytes + root->sectorsize - 1) / | 140 | int num_sectors = (int)DIV_ROUND_UP(bytes, root->sectorsize); |
153 | root->sectorsize; | 141 | return sizeof(struct btrfs_ordered_sum) + num_sectors * sizeof(u32); |
154 | num_sectors++; | ||
155 | return sizeof(struct btrfs_ordered_sum) + | ||
156 | num_sectors * sizeof(struct btrfs_sector_sum); | ||
157 | } | 142 | } |
158 | 143 | ||
159 | static inline void | 144 | static inline void |
@@ -204,6 +189,8 @@ void btrfs_add_ordered_operation(struct btrfs_trans_handle *trans, | |||
204 | struct btrfs_root *root, | 189 | struct btrfs_root *root, |
205 | struct inode *inode); | 190 | struct inode *inode); |
206 | void btrfs_wait_ordered_extents(struct btrfs_root *root, int delay_iput); | 191 | void btrfs_wait_ordered_extents(struct btrfs_root *root, int delay_iput); |
192 | void btrfs_wait_all_ordered_extents(struct btrfs_fs_info *fs_info, | ||
193 | int delay_iput); | ||
207 | void btrfs_get_logged_extents(struct btrfs_root *log, struct inode *inode); | 194 | void btrfs_get_logged_extents(struct btrfs_root *log, struct inode *inode); |
208 | void btrfs_wait_logged_extents(struct btrfs_root *log, u64 transid); | 195 | void btrfs_wait_logged_extents(struct btrfs_root *log, u64 transid); |
209 | void btrfs_free_logged_extents(struct btrfs_root *log, u64 transid); | 196 | void btrfs_free_logged_extents(struct btrfs_root *log, u64 transid); |
diff --git a/fs/btrfs/qgroup.c b/fs/btrfs/qgroup.c index 9d49c586995a..1280eff8af56 100644 --- a/fs/btrfs/qgroup.c +++ b/fs/btrfs/qgroup.c | |||
@@ -98,13 +98,10 @@ struct btrfs_qgroup_list { | |||
98 | struct btrfs_qgroup *member; | 98 | struct btrfs_qgroup *member; |
99 | }; | 99 | }; |
100 | 100 | ||
101 | struct qgroup_rescan { | 101 | static int |
102 | struct btrfs_work work; | 102 | qgroup_rescan_init(struct btrfs_fs_info *fs_info, u64 progress_objectid, |
103 | struct btrfs_fs_info *fs_info; | 103 | int init_flags); |
104 | }; | 104 | static void qgroup_rescan_zero_tracking(struct btrfs_fs_info *fs_info); |
105 | |||
106 | static void qgroup_rescan_start(struct btrfs_fs_info *fs_info, | ||
107 | struct qgroup_rescan *qscan); | ||
108 | 105 | ||
109 | /* must be called with qgroup_ioctl_lock held */ | 106 | /* must be called with qgroup_ioctl_lock held */ |
110 | static struct btrfs_qgroup *find_qgroup_rb(struct btrfs_fs_info *fs_info, | 107 | static struct btrfs_qgroup *find_qgroup_rb(struct btrfs_fs_info *fs_info, |
@@ -255,10 +252,17 @@ int btrfs_read_qgroup_config(struct btrfs_fs_info *fs_info) | |||
255 | int slot; | 252 | int slot; |
256 | int ret = 0; | 253 | int ret = 0; |
257 | u64 flags = 0; | 254 | u64 flags = 0; |
255 | u64 rescan_progress = 0; | ||
258 | 256 | ||
259 | if (!fs_info->quota_enabled) | 257 | if (!fs_info->quota_enabled) |
260 | return 0; | 258 | return 0; |
261 | 259 | ||
260 | fs_info->qgroup_ulist = ulist_alloc(GFP_NOFS); | ||
261 | if (!fs_info->qgroup_ulist) { | ||
262 | ret = -ENOMEM; | ||
263 | goto out; | ||
264 | } | ||
265 | |||
262 | path = btrfs_alloc_path(); | 266 | path = btrfs_alloc_path(); |
263 | if (!path) { | 267 | if (!path) { |
264 | ret = -ENOMEM; | 268 | ret = -ENOMEM; |
@@ -306,20 +310,7 @@ int btrfs_read_qgroup_config(struct btrfs_fs_info *fs_info) | |||
306 | } | 310 | } |
307 | fs_info->qgroup_flags = btrfs_qgroup_status_flags(l, | 311 | fs_info->qgroup_flags = btrfs_qgroup_status_flags(l, |
308 | ptr); | 312 | ptr); |
309 | fs_info->qgroup_rescan_progress.objectid = | 313 | rescan_progress = btrfs_qgroup_status_rescan(l, ptr); |
310 | btrfs_qgroup_status_rescan(l, ptr); | ||
311 | if (fs_info->qgroup_flags & | ||
312 | BTRFS_QGROUP_STATUS_FLAG_RESCAN) { | ||
313 | struct qgroup_rescan *qscan = | ||
314 | kmalloc(sizeof(*qscan), GFP_NOFS); | ||
315 | if (!qscan) { | ||
316 | ret = -ENOMEM; | ||
317 | goto out; | ||
318 | } | ||
319 | fs_info->qgroup_rescan_progress.type = 0; | ||
320 | fs_info->qgroup_rescan_progress.offset = 0; | ||
321 | qgroup_rescan_start(fs_info, qscan); | ||
322 | } | ||
323 | goto next1; | 314 | goto next1; |
324 | } | 315 | } |
325 | 316 | ||
@@ -421,9 +412,18 @@ out: | |||
421 | if (!(fs_info->qgroup_flags & BTRFS_QGROUP_STATUS_FLAG_ON)) { | 412 | if (!(fs_info->qgroup_flags & BTRFS_QGROUP_STATUS_FLAG_ON)) { |
422 | fs_info->quota_enabled = 0; | 413 | fs_info->quota_enabled = 0; |
423 | fs_info->pending_quota_state = 0; | 414 | fs_info->pending_quota_state = 0; |
415 | } else if (fs_info->qgroup_flags & BTRFS_QGROUP_STATUS_FLAG_RESCAN && | ||
416 | ret >= 0) { | ||
417 | ret = qgroup_rescan_init(fs_info, rescan_progress, 0); | ||
424 | } | 418 | } |
425 | btrfs_free_path(path); | 419 | btrfs_free_path(path); |
426 | 420 | ||
421 | if (ret < 0) { | ||
422 | ulist_free(fs_info->qgroup_ulist); | ||
423 | fs_info->qgroup_ulist = NULL; | ||
424 | fs_info->qgroup_flags &= ~BTRFS_QGROUP_STATUS_FLAG_RESCAN; | ||
425 | } | ||
426 | |||
427 | return ret < 0 ? ret : 0; | 427 | return ret < 0 ? ret : 0; |
428 | } | 428 | } |
429 | 429 | ||
@@ -460,6 +460,7 @@ void btrfs_free_qgroup_config(struct btrfs_fs_info *fs_info) | |||
460 | } | 460 | } |
461 | kfree(qgroup); | 461 | kfree(qgroup); |
462 | } | 462 | } |
463 | ulist_free(fs_info->qgroup_ulist); | ||
463 | } | 464 | } |
464 | 465 | ||
465 | static int add_qgroup_relation_item(struct btrfs_trans_handle *trans, | 466 | static int add_qgroup_relation_item(struct btrfs_trans_handle *trans, |
@@ -819,6 +820,12 @@ int btrfs_quota_enable(struct btrfs_trans_handle *trans, | |||
819 | goto out; | 820 | goto out; |
820 | } | 821 | } |
821 | 822 | ||
823 | fs_info->qgroup_ulist = ulist_alloc(GFP_NOFS); | ||
824 | if (!fs_info->qgroup_ulist) { | ||
825 | ret = -ENOMEM; | ||
826 | goto out; | ||
827 | } | ||
828 | |||
822 | /* | 829 | /* |
823 | * initially create the quota tree | 830 | * initially create the quota tree |
824 | */ | 831 | */ |
@@ -916,6 +923,10 @@ out_free_root: | |||
916 | kfree(quota_root); | 923 | kfree(quota_root); |
917 | } | 924 | } |
918 | out: | 925 | out: |
926 | if (ret) { | ||
927 | ulist_free(fs_info->qgroup_ulist); | ||
928 | fs_info->qgroup_ulist = NULL; | ||
929 | } | ||
919 | mutex_unlock(&fs_info->qgroup_ioctl_lock); | 930 | mutex_unlock(&fs_info->qgroup_ioctl_lock); |
920 | return ret; | 931 | return ret; |
921 | } | 932 | } |
@@ -1355,7 +1366,6 @@ int btrfs_qgroup_account_ref(struct btrfs_trans_handle *trans, | |||
1355 | u64 ref_root; | 1366 | u64 ref_root; |
1356 | struct btrfs_qgroup *qgroup; | 1367 | struct btrfs_qgroup *qgroup; |
1357 | struct ulist *roots = NULL; | 1368 | struct ulist *roots = NULL; |
1358 | struct ulist *tmp = NULL; | ||
1359 | u64 seq; | 1369 | u64 seq; |
1360 | int ret = 0; | 1370 | int ret = 0; |
1361 | int sgn; | 1371 | int sgn; |
@@ -1428,14 +1438,7 @@ int btrfs_qgroup_account_ref(struct btrfs_trans_handle *trans, | |||
1428 | if (ret < 0) | 1438 | if (ret < 0) |
1429 | return ret; | 1439 | return ret; |
1430 | 1440 | ||
1431 | mutex_lock(&fs_info->qgroup_rescan_lock); | ||
1432 | spin_lock(&fs_info->qgroup_lock); | 1441 | spin_lock(&fs_info->qgroup_lock); |
1433 | if (fs_info->qgroup_flags & BTRFS_QGROUP_STATUS_FLAG_RESCAN) { | ||
1434 | if (fs_info->qgroup_rescan_progress.objectid <= node->bytenr) { | ||
1435 | ret = 0; | ||
1436 | goto unlock; | ||
1437 | } | ||
1438 | } | ||
1439 | 1442 | ||
1440 | quota_root = fs_info->quota_root; | 1443 | quota_root = fs_info->quota_root; |
1441 | if (!quota_root) | 1444 | if (!quota_root) |
@@ -1448,39 +1451,34 @@ int btrfs_qgroup_account_ref(struct btrfs_trans_handle *trans, | |||
1448 | /* | 1451 | /* |
1449 | * step 1: for each old ref, visit all nodes once and inc refcnt | 1452 | * step 1: for each old ref, visit all nodes once and inc refcnt |
1450 | */ | 1453 | */ |
1451 | tmp = ulist_alloc(GFP_ATOMIC); | 1454 | ulist_reinit(fs_info->qgroup_ulist); |
1452 | if (!tmp) { | ||
1453 | ret = -ENOMEM; | ||
1454 | goto unlock; | ||
1455 | } | ||
1456 | seq = fs_info->qgroup_seq; | 1455 | seq = fs_info->qgroup_seq; |
1457 | fs_info->qgroup_seq += roots->nnodes + 1; /* max refcnt */ | 1456 | fs_info->qgroup_seq += roots->nnodes + 1; /* max refcnt */ |
1458 | 1457 | ||
1459 | ret = qgroup_account_ref_step1(fs_info, roots, tmp, seq); | 1458 | ret = qgroup_account_ref_step1(fs_info, roots, fs_info->qgroup_ulist, |
1459 | seq); | ||
1460 | if (ret) | 1460 | if (ret) |
1461 | goto unlock; | 1461 | goto unlock; |
1462 | 1462 | ||
1463 | /* | 1463 | /* |
1464 | * step 2: walk from the new root | 1464 | * step 2: walk from the new root |
1465 | */ | 1465 | */ |
1466 | ret = qgroup_account_ref_step2(fs_info, roots, tmp, seq, sgn, | 1466 | ret = qgroup_account_ref_step2(fs_info, roots, fs_info->qgroup_ulist, |
1467 | node->num_bytes, qgroup); | 1467 | seq, sgn, node->num_bytes, qgroup); |
1468 | if (ret) | 1468 | if (ret) |
1469 | goto unlock; | 1469 | goto unlock; |
1470 | 1470 | ||
1471 | /* | 1471 | /* |
1472 | * step 3: walk again from old refs | 1472 | * step 3: walk again from old refs |
1473 | */ | 1473 | */ |
1474 | ret = qgroup_account_ref_step3(fs_info, roots, tmp, seq, sgn, | 1474 | ret = qgroup_account_ref_step3(fs_info, roots, fs_info->qgroup_ulist, |
1475 | node->num_bytes); | 1475 | seq, sgn, node->num_bytes); |
1476 | if (ret) | 1476 | if (ret) |
1477 | goto unlock; | 1477 | goto unlock; |
1478 | 1478 | ||
1479 | unlock: | 1479 | unlock: |
1480 | spin_unlock(&fs_info->qgroup_lock); | 1480 | spin_unlock(&fs_info->qgroup_lock); |
1481 | mutex_unlock(&fs_info->qgroup_rescan_lock); | ||
1482 | ulist_free(roots); | 1481 | ulist_free(roots); |
1483 | ulist_free(tmp); | ||
1484 | 1482 | ||
1485 | return ret; | 1483 | return ret; |
1486 | } | 1484 | } |
@@ -1527,9 +1525,12 @@ int btrfs_run_qgroups(struct btrfs_trans_handle *trans, | |||
1527 | fs_info->qgroup_flags |= BTRFS_QGROUP_STATUS_FLAG_INCONSISTENT; | 1525 | fs_info->qgroup_flags |= BTRFS_QGROUP_STATUS_FLAG_INCONSISTENT; |
1528 | 1526 | ||
1529 | if (!ret && start_rescan_worker) { | 1527 | if (!ret && start_rescan_worker) { |
1530 | ret = btrfs_qgroup_rescan(fs_info); | 1528 | ret = qgroup_rescan_init(fs_info, 0, 1); |
1531 | if (ret) | 1529 | if (!ret) { |
1532 | pr_err("btrfs: start rescan quota failed: %d\n", ret); | 1530 | qgroup_rescan_zero_tracking(fs_info); |
1531 | btrfs_queue_worker(&fs_info->qgroup_rescan_workers, | ||
1532 | &fs_info->qgroup_rescan_work); | ||
1533 | } | ||
1533 | ret = 0; | 1534 | ret = 0; |
1534 | } | 1535 | } |
1535 | 1536 | ||
@@ -1720,7 +1721,6 @@ int btrfs_qgroup_reserve(struct btrfs_root *root, u64 num_bytes) | |||
1720 | struct btrfs_fs_info *fs_info = root->fs_info; | 1721 | struct btrfs_fs_info *fs_info = root->fs_info; |
1721 | u64 ref_root = root->root_key.objectid; | 1722 | u64 ref_root = root->root_key.objectid; |
1722 | int ret = 0; | 1723 | int ret = 0; |
1723 | struct ulist *ulist = NULL; | ||
1724 | struct ulist_node *unode; | 1724 | struct ulist_node *unode; |
1725 | struct ulist_iterator uiter; | 1725 | struct ulist_iterator uiter; |
1726 | 1726 | ||
@@ -1743,17 +1743,13 @@ int btrfs_qgroup_reserve(struct btrfs_root *root, u64 num_bytes) | |||
1743 | * in a first step, we check all affected qgroups if any limits would | 1743 | * in a first step, we check all affected qgroups if any limits would |
1744 | * be exceeded | 1744 | * be exceeded |
1745 | */ | 1745 | */ |
1746 | ulist = ulist_alloc(GFP_ATOMIC); | 1746 | ulist_reinit(fs_info->qgroup_ulist); |
1747 | if (!ulist) { | 1747 | ret = ulist_add(fs_info->qgroup_ulist, qgroup->qgroupid, |
1748 | ret = -ENOMEM; | ||
1749 | goto out; | ||
1750 | } | ||
1751 | ret = ulist_add(ulist, qgroup->qgroupid, | ||
1752 | (uintptr_t)qgroup, GFP_ATOMIC); | 1748 | (uintptr_t)qgroup, GFP_ATOMIC); |
1753 | if (ret < 0) | 1749 | if (ret < 0) |
1754 | goto out; | 1750 | goto out; |
1755 | ULIST_ITER_INIT(&uiter); | 1751 | ULIST_ITER_INIT(&uiter); |
1756 | while ((unode = ulist_next(ulist, &uiter))) { | 1752 | while ((unode = ulist_next(fs_info->qgroup_ulist, &uiter))) { |
1757 | struct btrfs_qgroup *qg; | 1753 | struct btrfs_qgroup *qg; |
1758 | struct btrfs_qgroup_list *glist; | 1754 | struct btrfs_qgroup_list *glist; |
1759 | 1755 | ||
@@ -1774,7 +1770,8 @@ int btrfs_qgroup_reserve(struct btrfs_root *root, u64 num_bytes) | |||
1774 | } | 1770 | } |
1775 | 1771 | ||
1776 | list_for_each_entry(glist, &qg->groups, next_group) { | 1772 | list_for_each_entry(glist, &qg->groups, next_group) { |
1777 | ret = ulist_add(ulist, glist->group->qgroupid, | 1773 | ret = ulist_add(fs_info->qgroup_ulist, |
1774 | glist->group->qgroupid, | ||
1778 | (uintptr_t)glist->group, GFP_ATOMIC); | 1775 | (uintptr_t)glist->group, GFP_ATOMIC); |
1779 | if (ret < 0) | 1776 | if (ret < 0) |
1780 | goto out; | 1777 | goto out; |
@@ -1785,7 +1782,7 @@ int btrfs_qgroup_reserve(struct btrfs_root *root, u64 num_bytes) | |||
1785 | * no limits exceeded, now record the reservation into all qgroups | 1782 | * no limits exceeded, now record the reservation into all qgroups |
1786 | */ | 1783 | */ |
1787 | ULIST_ITER_INIT(&uiter); | 1784 | ULIST_ITER_INIT(&uiter); |
1788 | while ((unode = ulist_next(ulist, &uiter))) { | 1785 | while ((unode = ulist_next(fs_info->qgroup_ulist, &uiter))) { |
1789 | struct btrfs_qgroup *qg; | 1786 | struct btrfs_qgroup *qg; |
1790 | 1787 | ||
1791 | qg = (struct btrfs_qgroup *)(uintptr_t)unode->aux; | 1788 | qg = (struct btrfs_qgroup *)(uintptr_t)unode->aux; |
@@ -1795,8 +1792,6 @@ int btrfs_qgroup_reserve(struct btrfs_root *root, u64 num_bytes) | |||
1795 | 1792 | ||
1796 | out: | 1793 | out: |
1797 | spin_unlock(&fs_info->qgroup_lock); | 1794 | spin_unlock(&fs_info->qgroup_lock); |
1798 | ulist_free(ulist); | ||
1799 | |||
1800 | return ret; | 1795 | return ret; |
1801 | } | 1796 | } |
1802 | 1797 | ||
@@ -1805,7 +1800,6 @@ void btrfs_qgroup_free(struct btrfs_root *root, u64 num_bytes) | |||
1805 | struct btrfs_root *quota_root; | 1800 | struct btrfs_root *quota_root; |
1806 | struct btrfs_qgroup *qgroup; | 1801 | struct btrfs_qgroup *qgroup; |
1807 | struct btrfs_fs_info *fs_info = root->fs_info; | 1802 | struct btrfs_fs_info *fs_info = root->fs_info; |
1808 | struct ulist *ulist = NULL; | ||
1809 | struct ulist_node *unode; | 1803 | struct ulist_node *unode; |
1810 | struct ulist_iterator uiter; | 1804 | struct ulist_iterator uiter; |
1811 | u64 ref_root = root->root_key.objectid; | 1805 | u64 ref_root = root->root_key.objectid; |
@@ -1827,17 +1821,13 @@ void btrfs_qgroup_free(struct btrfs_root *root, u64 num_bytes) | |||
1827 | if (!qgroup) | 1821 | if (!qgroup) |
1828 | goto out; | 1822 | goto out; |
1829 | 1823 | ||
1830 | ulist = ulist_alloc(GFP_ATOMIC); | 1824 | ulist_reinit(fs_info->qgroup_ulist); |
1831 | if (!ulist) { | 1825 | ret = ulist_add(fs_info->qgroup_ulist, qgroup->qgroupid, |
1832 | btrfs_std_error(fs_info, -ENOMEM); | ||
1833 | goto out; | ||
1834 | } | ||
1835 | ret = ulist_add(ulist, qgroup->qgroupid, | ||
1836 | (uintptr_t)qgroup, GFP_ATOMIC); | 1826 | (uintptr_t)qgroup, GFP_ATOMIC); |
1837 | if (ret < 0) | 1827 | if (ret < 0) |
1838 | goto out; | 1828 | goto out; |
1839 | ULIST_ITER_INIT(&uiter); | 1829 | ULIST_ITER_INIT(&uiter); |
1840 | while ((unode = ulist_next(ulist, &uiter))) { | 1830 | while ((unode = ulist_next(fs_info->qgroup_ulist, &uiter))) { |
1841 | struct btrfs_qgroup *qg; | 1831 | struct btrfs_qgroup *qg; |
1842 | struct btrfs_qgroup_list *glist; | 1832 | struct btrfs_qgroup_list *glist; |
1843 | 1833 | ||
@@ -1846,7 +1836,8 @@ void btrfs_qgroup_free(struct btrfs_root *root, u64 num_bytes) | |||
1846 | qg->reserved -= num_bytes; | 1836 | qg->reserved -= num_bytes; |
1847 | 1837 | ||
1848 | list_for_each_entry(glist, &qg->groups, next_group) { | 1838 | list_for_each_entry(glist, &qg->groups, next_group) { |
1849 | ret = ulist_add(ulist, glist->group->qgroupid, | 1839 | ret = ulist_add(fs_info->qgroup_ulist, |
1840 | glist->group->qgroupid, | ||
1850 | (uintptr_t)glist->group, GFP_ATOMIC); | 1841 | (uintptr_t)glist->group, GFP_ATOMIC); |
1851 | if (ret < 0) | 1842 | if (ret < 0) |
1852 | goto out; | 1843 | goto out; |
@@ -1855,7 +1846,6 @@ void btrfs_qgroup_free(struct btrfs_root *root, u64 num_bytes) | |||
1855 | 1846 | ||
1856 | out: | 1847 | out: |
1857 | spin_unlock(&fs_info->qgroup_lock); | 1848 | spin_unlock(&fs_info->qgroup_lock); |
1858 | ulist_free(ulist); | ||
1859 | } | 1849 | } |
1860 | 1850 | ||
1861 | void assert_qgroups_uptodate(struct btrfs_trans_handle *trans) | 1851 | void assert_qgroups_uptodate(struct btrfs_trans_handle *trans) |
@@ -1874,12 +1864,11 @@ void assert_qgroups_uptodate(struct btrfs_trans_handle *trans) | |||
1874 | * returns 1 when done, 2 when done and FLAG_INCONSISTENT was cleared. | 1864 | * returns 1 when done, 2 when done and FLAG_INCONSISTENT was cleared. |
1875 | */ | 1865 | */ |
1876 | static int | 1866 | static int |
1877 | qgroup_rescan_leaf(struct qgroup_rescan *qscan, struct btrfs_path *path, | 1867 | qgroup_rescan_leaf(struct btrfs_fs_info *fs_info, struct btrfs_path *path, |
1878 | struct btrfs_trans_handle *trans, struct ulist *tmp, | 1868 | struct btrfs_trans_handle *trans, struct ulist *tmp, |
1879 | struct extent_buffer *scratch_leaf) | 1869 | struct extent_buffer *scratch_leaf) |
1880 | { | 1870 | { |
1881 | struct btrfs_key found; | 1871 | struct btrfs_key found; |
1882 | struct btrfs_fs_info *fs_info = qscan->fs_info; | ||
1883 | struct ulist *roots = NULL; | 1872 | struct ulist *roots = NULL; |
1884 | struct ulist_node *unode; | 1873 | struct ulist_node *unode; |
1885 | struct ulist_iterator uiter; | 1874 | struct ulist_iterator uiter; |
@@ -2007,11 +1996,10 @@ out: | |||
2007 | 1996 | ||
2008 | static void btrfs_qgroup_rescan_worker(struct btrfs_work *work) | 1997 | static void btrfs_qgroup_rescan_worker(struct btrfs_work *work) |
2009 | { | 1998 | { |
2010 | struct qgroup_rescan *qscan = container_of(work, struct qgroup_rescan, | 1999 | struct btrfs_fs_info *fs_info = container_of(work, struct btrfs_fs_info, |
2011 | work); | 2000 | qgroup_rescan_work); |
2012 | struct btrfs_path *path; | 2001 | struct btrfs_path *path; |
2013 | struct btrfs_trans_handle *trans = NULL; | 2002 | struct btrfs_trans_handle *trans = NULL; |
2014 | struct btrfs_fs_info *fs_info = qscan->fs_info; | ||
2015 | struct ulist *tmp = NULL; | 2003 | struct ulist *tmp = NULL; |
2016 | struct extent_buffer *scratch_leaf = NULL; | 2004 | struct extent_buffer *scratch_leaf = NULL; |
2017 | int err = -ENOMEM; | 2005 | int err = -ENOMEM; |
@@ -2036,7 +2024,7 @@ static void btrfs_qgroup_rescan_worker(struct btrfs_work *work) | |||
2036 | if (!fs_info->quota_enabled) { | 2024 | if (!fs_info->quota_enabled) { |
2037 | err = -EINTR; | 2025 | err = -EINTR; |
2038 | } else { | 2026 | } else { |
2039 | err = qgroup_rescan_leaf(qscan, path, trans, | 2027 | err = qgroup_rescan_leaf(fs_info, path, trans, |
2040 | tmp, scratch_leaf); | 2028 | tmp, scratch_leaf); |
2041 | } | 2029 | } |
2042 | if (err > 0) | 2030 | if (err > 0) |
@@ -2049,7 +2037,6 @@ out: | |||
2049 | kfree(scratch_leaf); | 2037 | kfree(scratch_leaf); |
2050 | ulist_free(tmp); | 2038 | ulist_free(tmp); |
2051 | btrfs_free_path(path); | 2039 | btrfs_free_path(path); |
2052 | kfree(qscan); | ||
2053 | 2040 | ||
2054 | mutex_lock(&fs_info->qgroup_rescan_lock); | 2041 | mutex_lock(&fs_info->qgroup_rescan_lock); |
2055 | fs_info->qgroup_flags &= ~BTRFS_QGROUP_STATUS_FLAG_RESCAN; | 2042 | fs_info->qgroup_flags &= ~BTRFS_QGROUP_STATUS_FLAG_RESCAN; |
@@ -2068,47 +2055,74 @@ out: | |||
2068 | } else { | 2055 | } else { |
2069 | pr_err("btrfs: qgroup scan failed with %d\n", err); | 2056 | pr_err("btrfs: qgroup scan failed with %d\n", err); |
2070 | } | 2057 | } |
2071 | } | ||
2072 | 2058 | ||
2073 | static void | 2059 | complete_all(&fs_info->qgroup_rescan_completion); |
2074 | qgroup_rescan_start(struct btrfs_fs_info *fs_info, struct qgroup_rescan *qscan) | ||
2075 | { | ||
2076 | memset(&qscan->work, 0, sizeof(qscan->work)); | ||
2077 | qscan->work.func = btrfs_qgroup_rescan_worker; | ||
2078 | qscan->fs_info = fs_info; | ||
2079 | |||
2080 | pr_info("btrfs: qgroup scan started\n"); | ||
2081 | btrfs_queue_worker(&fs_info->qgroup_rescan_workers, &qscan->work); | ||
2082 | } | 2060 | } |
2083 | 2061 | ||
2084 | int | 2062 | /* |
2085 | btrfs_qgroup_rescan(struct btrfs_fs_info *fs_info) | 2063 | * Checks that (a) no rescan is running and (b) quota is enabled. Allocates all |
2064 | * memory required for the rescan context. | ||
2065 | */ | ||
2066 | static int | ||
2067 | qgroup_rescan_init(struct btrfs_fs_info *fs_info, u64 progress_objectid, | ||
2068 | int init_flags) | ||
2086 | { | 2069 | { |
2087 | int ret = 0; | 2070 | int ret = 0; |
2088 | struct rb_node *n; | ||
2089 | struct btrfs_qgroup *qgroup; | ||
2090 | struct qgroup_rescan *qscan = kmalloc(sizeof(*qscan), GFP_NOFS); | ||
2091 | 2071 | ||
2092 | if (!qscan) | 2072 | if (!init_flags && |
2093 | return -ENOMEM; | 2073 | (!(fs_info->qgroup_flags & BTRFS_QGROUP_STATUS_FLAG_RESCAN) || |
2074 | !(fs_info->qgroup_flags & BTRFS_QGROUP_STATUS_FLAG_ON))) { | ||
2075 | ret = -EINVAL; | ||
2076 | goto err; | ||
2077 | } | ||
2094 | 2078 | ||
2095 | mutex_lock(&fs_info->qgroup_rescan_lock); | 2079 | mutex_lock(&fs_info->qgroup_rescan_lock); |
2096 | spin_lock(&fs_info->qgroup_lock); | 2080 | spin_lock(&fs_info->qgroup_lock); |
2097 | if (fs_info->qgroup_flags & BTRFS_QGROUP_STATUS_FLAG_RESCAN) | 2081 | |
2098 | ret = -EINPROGRESS; | 2082 | if (init_flags) { |
2099 | else if (!(fs_info->qgroup_flags & BTRFS_QGROUP_STATUS_FLAG_ON)) | 2083 | if (fs_info->qgroup_flags & BTRFS_QGROUP_STATUS_FLAG_RESCAN) |
2100 | ret = -EINVAL; | 2084 | ret = -EINPROGRESS; |
2101 | if (ret) { | 2085 | else if (!(fs_info->qgroup_flags & BTRFS_QGROUP_STATUS_FLAG_ON)) |
2102 | spin_unlock(&fs_info->qgroup_lock); | 2086 | ret = -EINVAL; |
2103 | mutex_unlock(&fs_info->qgroup_rescan_lock); | 2087 | |
2104 | kfree(qscan); | 2088 | if (ret) { |
2105 | return ret; | 2089 | spin_unlock(&fs_info->qgroup_lock); |
2090 | mutex_unlock(&fs_info->qgroup_rescan_lock); | ||
2091 | goto err; | ||
2092 | } | ||
2093 | |||
2094 | fs_info->qgroup_flags |= BTRFS_QGROUP_STATUS_FLAG_RESCAN; | ||
2106 | } | 2095 | } |
2107 | 2096 | ||
2108 | fs_info->qgroup_flags |= BTRFS_QGROUP_STATUS_FLAG_RESCAN; | ||
2109 | memset(&fs_info->qgroup_rescan_progress, 0, | 2097 | memset(&fs_info->qgroup_rescan_progress, 0, |
2110 | sizeof(fs_info->qgroup_rescan_progress)); | 2098 | sizeof(fs_info->qgroup_rescan_progress)); |
2099 | fs_info->qgroup_rescan_progress.objectid = progress_objectid; | ||
2100 | |||
2101 | spin_unlock(&fs_info->qgroup_lock); | ||
2102 | mutex_unlock(&fs_info->qgroup_rescan_lock); | ||
2103 | |||
2104 | init_completion(&fs_info->qgroup_rescan_completion); | ||
2105 | |||
2106 | memset(&fs_info->qgroup_rescan_work, 0, | ||
2107 | sizeof(fs_info->qgroup_rescan_work)); | ||
2108 | fs_info->qgroup_rescan_work.func = btrfs_qgroup_rescan_worker; | ||
2109 | |||
2110 | if (ret) { | ||
2111 | err: | ||
2112 | pr_info("btrfs: qgroup_rescan_init failed with %d\n", ret); | ||
2113 | return ret; | ||
2114 | } | ||
2115 | |||
2116 | return 0; | ||
2117 | } | ||
2118 | |||
2119 | static void | ||
2120 | qgroup_rescan_zero_tracking(struct btrfs_fs_info *fs_info) | ||
2121 | { | ||
2122 | struct rb_node *n; | ||
2123 | struct btrfs_qgroup *qgroup; | ||
2111 | 2124 | ||
2125 | spin_lock(&fs_info->qgroup_lock); | ||
2112 | /* clear all current qgroup tracking information */ | 2126 | /* clear all current qgroup tracking information */ |
2113 | for (n = rb_first(&fs_info->qgroup_tree); n; n = rb_next(n)) { | 2127 | for (n = rb_first(&fs_info->qgroup_tree); n; n = rb_next(n)) { |
2114 | qgroup = rb_entry(n, struct btrfs_qgroup, node); | 2128 | qgroup = rb_entry(n, struct btrfs_qgroup, node); |
@@ -2118,9 +2132,74 @@ btrfs_qgroup_rescan(struct btrfs_fs_info *fs_info) | |||
2118 | qgroup->excl_cmpr = 0; | 2132 | qgroup->excl_cmpr = 0; |
2119 | } | 2133 | } |
2120 | spin_unlock(&fs_info->qgroup_lock); | 2134 | spin_unlock(&fs_info->qgroup_lock); |
2121 | mutex_unlock(&fs_info->qgroup_rescan_lock); | 2135 | } |
2136 | |||
2137 | int | ||
2138 | btrfs_qgroup_rescan(struct btrfs_fs_info *fs_info) | ||
2139 | { | ||
2140 | int ret = 0; | ||
2141 | struct btrfs_trans_handle *trans; | ||
2122 | 2142 | ||
2123 | qgroup_rescan_start(fs_info, qscan); | 2143 | ret = qgroup_rescan_init(fs_info, 0, 1); |
2144 | if (ret) | ||
2145 | return ret; | ||
2146 | |||
2147 | /* | ||
2148 | * We have set the rescan_progress to 0, which means no more | ||
2149 | * delayed refs will be accounted by btrfs_qgroup_account_ref. | ||
2150 | * However, btrfs_qgroup_account_ref may be right after its call | ||
2151 | * to btrfs_find_all_roots, in which case it would still do the | ||
2152 | * accounting. | ||
2153 | * To solve this, we're committing the transaction, which will | ||
2154 | * ensure we run all delayed refs and only after that, we are | ||
2155 | * going to clear all tracking information for a clean start. | ||
2156 | */ | ||
2157 | |||
2158 | trans = btrfs_join_transaction(fs_info->fs_root); | ||
2159 | if (IS_ERR(trans)) { | ||
2160 | fs_info->qgroup_flags &= ~BTRFS_QGROUP_STATUS_FLAG_RESCAN; | ||
2161 | return PTR_ERR(trans); | ||
2162 | } | ||
2163 | ret = btrfs_commit_transaction(trans, fs_info->fs_root); | ||
2164 | if (ret) { | ||
2165 | fs_info->qgroup_flags &= ~BTRFS_QGROUP_STATUS_FLAG_RESCAN; | ||
2166 | return ret; | ||
2167 | } | ||
2168 | |||
2169 | qgroup_rescan_zero_tracking(fs_info); | ||
2170 | |||
2171 | btrfs_queue_worker(&fs_info->qgroup_rescan_workers, | ||
2172 | &fs_info->qgroup_rescan_work); | ||
2124 | 2173 | ||
2125 | return 0; | 2174 | return 0; |
2126 | } | 2175 | } |
2176 | |||
2177 | int btrfs_qgroup_wait_for_completion(struct btrfs_fs_info *fs_info) | ||
2178 | { | ||
2179 | int running; | ||
2180 | int ret = 0; | ||
2181 | |||
2182 | mutex_lock(&fs_info->qgroup_rescan_lock); | ||
2183 | spin_lock(&fs_info->qgroup_lock); | ||
2184 | running = fs_info->qgroup_flags & BTRFS_QGROUP_STATUS_FLAG_RESCAN; | ||
2185 | spin_unlock(&fs_info->qgroup_lock); | ||
2186 | mutex_unlock(&fs_info->qgroup_rescan_lock); | ||
2187 | |||
2188 | if (running) | ||
2189 | ret = wait_for_completion_interruptible( | ||
2190 | &fs_info->qgroup_rescan_completion); | ||
2191 | |||
2192 | return ret; | ||
2193 | } | ||
2194 | |||
2195 | /* | ||
2196 | * this is only called from open_ctree where we're still single threaded, thus | ||
2197 | * locking is omitted here. | ||
2198 | */ | ||
2199 | void | ||
2200 | btrfs_qgroup_rescan_resume(struct btrfs_fs_info *fs_info) | ||
2201 | { | ||
2202 | if (fs_info->qgroup_flags & BTRFS_QGROUP_STATUS_FLAG_RESCAN) | ||
2203 | btrfs_queue_worker(&fs_info->qgroup_rescan_workers, | ||
2204 | &fs_info->qgroup_rescan_work); | ||
2205 | } | ||
diff --git a/fs/btrfs/relocation.c b/fs/btrfs/relocation.c index 4febca4fc2de..12096496cc99 100644 --- a/fs/btrfs/relocation.c +++ b/fs/btrfs/relocation.c | |||
@@ -1305,6 +1305,7 @@ static struct btrfs_root *create_reloc_root(struct btrfs_trans_handle *trans, | |||
1305 | struct extent_buffer *eb; | 1305 | struct extent_buffer *eb; |
1306 | struct btrfs_root_item *root_item; | 1306 | struct btrfs_root_item *root_item; |
1307 | struct btrfs_key root_key; | 1307 | struct btrfs_key root_key; |
1308 | u64 last_snap = 0; | ||
1308 | int ret; | 1309 | int ret; |
1309 | 1310 | ||
1310 | root_item = kmalloc(sizeof(*root_item), GFP_NOFS); | 1311 | root_item = kmalloc(sizeof(*root_item), GFP_NOFS); |
@@ -1320,6 +1321,7 @@ static struct btrfs_root *create_reloc_root(struct btrfs_trans_handle *trans, | |||
1320 | BTRFS_TREE_RELOC_OBJECTID); | 1321 | BTRFS_TREE_RELOC_OBJECTID); |
1321 | BUG_ON(ret); | 1322 | BUG_ON(ret); |
1322 | 1323 | ||
1324 | last_snap = btrfs_root_last_snapshot(&root->root_item); | ||
1323 | btrfs_set_root_last_snapshot(&root->root_item, | 1325 | btrfs_set_root_last_snapshot(&root->root_item, |
1324 | trans->transid - 1); | 1326 | trans->transid - 1); |
1325 | } else { | 1327 | } else { |
@@ -1345,6 +1347,12 @@ static struct btrfs_root *create_reloc_root(struct btrfs_trans_handle *trans, | |||
1345 | memset(&root_item->drop_progress, 0, | 1347 | memset(&root_item->drop_progress, 0, |
1346 | sizeof(struct btrfs_disk_key)); | 1348 | sizeof(struct btrfs_disk_key)); |
1347 | root_item->drop_level = 0; | 1349 | root_item->drop_level = 0; |
1350 | /* | ||
1351 | * abuse rtransid, it is safe because it is impossible to | ||
1352 | * receive data into a relocation tree. | ||
1353 | */ | ||
1354 | btrfs_set_root_rtransid(root_item, last_snap); | ||
1355 | btrfs_set_root_otransid(root_item, trans->transid); | ||
1348 | } | 1356 | } |
1349 | 1357 | ||
1350 | btrfs_tree_unlock(eb); | 1358 | btrfs_tree_unlock(eb); |
@@ -1355,8 +1363,7 @@ static struct btrfs_root *create_reloc_root(struct btrfs_trans_handle *trans, | |||
1355 | BUG_ON(ret); | 1363 | BUG_ON(ret); |
1356 | kfree(root_item); | 1364 | kfree(root_item); |
1357 | 1365 | ||
1358 | reloc_root = btrfs_read_fs_root_no_radix(root->fs_info->tree_root, | 1366 | reloc_root = btrfs_read_fs_root(root->fs_info->tree_root, &root_key); |
1359 | &root_key); | ||
1360 | BUG_ON(IS_ERR(reloc_root)); | 1367 | BUG_ON(IS_ERR(reloc_root)); |
1361 | reloc_root->last_trans = trans->transid; | 1368 | reloc_root->last_trans = trans->transid; |
1362 | return reloc_root; | 1369 | return reloc_root; |
@@ -2273,8 +2280,12 @@ void free_reloc_roots(struct list_head *list) | |||
2273 | static noinline_for_stack | 2280 | static noinline_for_stack |
2274 | int merge_reloc_roots(struct reloc_control *rc) | 2281 | int merge_reloc_roots(struct reloc_control *rc) |
2275 | { | 2282 | { |
2283 | struct btrfs_trans_handle *trans; | ||
2276 | struct btrfs_root *root; | 2284 | struct btrfs_root *root; |
2277 | struct btrfs_root *reloc_root; | 2285 | struct btrfs_root *reloc_root; |
2286 | u64 last_snap; | ||
2287 | u64 otransid; | ||
2288 | u64 objectid; | ||
2278 | LIST_HEAD(reloc_roots); | 2289 | LIST_HEAD(reloc_roots); |
2279 | int found = 0; | 2290 | int found = 0; |
2280 | int ret = 0; | 2291 | int ret = 0; |
@@ -2308,12 +2319,44 @@ again: | |||
2308 | } else { | 2319 | } else { |
2309 | list_del_init(&reloc_root->root_list); | 2320 | list_del_init(&reloc_root->root_list); |
2310 | } | 2321 | } |
2322 | |||
2323 | /* | ||
2324 | * we keep the old last snapshod transid in rtranid when we | ||
2325 | * created the relocation tree. | ||
2326 | */ | ||
2327 | last_snap = btrfs_root_rtransid(&reloc_root->root_item); | ||
2328 | otransid = btrfs_root_otransid(&reloc_root->root_item); | ||
2329 | objectid = reloc_root->root_key.offset; | ||
2330 | |||
2311 | ret = btrfs_drop_snapshot(reloc_root, rc->block_rsv, 0, 1); | 2331 | ret = btrfs_drop_snapshot(reloc_root, rc->block_rsv, 0, 1); |
2312 | if (ret < 0) { | 2332 | if (ret < 0) { |
2313 | if (list_empty(&reloc_root->root_list)) | 2333 | if (list_empty(&reloc_root->root_list)) |
2314 | list_add_tail(&reloc_root->root_list, | 2334 | list_add_tail(&reloc_root->root_list, |
2315 | &reloc_roots); | 2335 | &reloc_roots); |
2316 | goto out; | 2336 | goto out; |
2337 | } else if (!ret) { | ||
2338 | /* | ||
2339 | * recover the last snapshot tranid to avoid | ||
2340 | * the space balance break NOCOW. | ||
2341 | */ | ||
2342 | root = read_fs_root(rc->extent_root->fs_info, | ||
2343 | objectid); | ||
2344 | if (IS_ERR(root)) | ||
2345 | continue; | ||
2346 | |||
2347 | if (btrfs_root_refs(&root->root_item) == 0) | ||
2348 | continue; | ||
2349 | |||
2350 | trans = btrfs_join_transaction(root); | ||
2351 | BUG_ON(IS_ERR(trans)); | ||
2352 | |||
2353 | /* Check if the fs/file tree was snapshoted or not. */ | ||
2354 | if (btrfs_root_last_snapshot(&root->root_item) == | ||
2355 | otransid - 1) | ||
2356 | btrfs_set_root_last_snapshot(&root->root_item, | ||
2357 | last_snap); | ||
2358 | |||
2359 | btrfs_end_transaction(trans, root); | ||
2317 | } | 2360 | } |
2318 | } | 2361 | } |
2319 | 2362 | ||
@@ -3266,6 +3309,8 @@ static int __add_tree_block(struct reloc_control *rc, | |||
3266 | struct btrfs_path *path; | 3309 | struct btrfs_path *path; |
3267 | struct btrfs_key key; | 3310 | struct btrfs_key key; |
3268 | int ret; | 3311 | int ret; |
3312 | bool skinny = btrfs_fs_incompat(rc->extent_root->fs_info, | ||
3313 | SKINNY_METADATA); | ||
3269 | 3314 | ||
3270 | if (tree_block_processed(bytenr, blocksize, rc)) | 3315 | if (tree_block_processed(bytenr, blocksize, rc)) |
3271 | return 0; | 3316 | return 0; |
@@ -3276,10 +3321,15 @@ static int __add_tree_block(struct reloc_control *rc, | |||
3276 | path = btrfs_alloc_path(); | 3321 | path = btrfs_alloc_path(); |
3277 | if (!path) | 3322 | if (!path) |
3278 | return -ENOMEM; | 3323 | return -ENOMEM; |
3279 | 3324 | again: | |
3280 | key.objectid = bytenr; | 3325 | key.objectid = bytenr; |
3281 | key.type = BTRFS_EXTENT_ITEM_KEY; | 3326 | if (skinny) { |
3282 | key.offset = blocksize; | 3327 | key.type = BTRFS_METADATA_ITEM_KEY; |
3328 | key.offset = (u64)-1; | ||
3329 | } else { | ||
3330 | key.type = BTRFS_EXTENT_ITEM_KEY; | ||
3331 | key.offset = blocksize; | ||
3332 | } | ||
3283 | 3333 | ||
3284 | path->search_commit_root = 1; | 3334 | path->search_commit_root = 1; |
3285 | path->skip_locking = 1; | 3335 | path->skip_locking = 1; |
@@ -3287,11 +3337,23 @@ static int __add_tree_block(struct reloc_control *rc, | |||
3287 | if (ret < 0) | 3337 | if (ret < 0) |
3288 | goto out; | 3338 | goto out; |
3289 | 3339 | ||
3290 | btrfs_item_key_to_cpu(path->nodes[0], &key, path->slots[0]); | 3340 | if (ret > 0 && skinny) { |
3291 | if (ret > 0) { | 3341 | if (path->slots[0]) { |
3292 | if (key.objectid == bytenr && | 3342 | path->slots[0]--; |
3293 | key.type == BTRFS_METADATA_ITEM_KEY) | 3343 | btrfs_item_key_to_cpu(path->nodes[0], &key, |
3294 | ret = 0; | 3344 | path->slots[0]); |
3345 | if (key.objectid == bytenr && | ||
3346 | (key.type == BTRFS_METADATA_ITEM_KEY || | ||
3347 | (key.type == BTRFS_EXTENT_ITEM_KEY && | ||
3348 | key.offset == blocksize))) | ||
3349 | ret = 0; | ||
3350 | } | ||
3351 | |||
3352 | if (ret) { | ||
3353 | skinny = false; | ||
3354 | btrfs_release_path(path); | ||
3355 | goto again; | ||
3356 | } | ||
3295 | } | 3357 | } |
3296 | BUG_ON(ret); | 3358 | BUG_ON(ret); |
3297 | 3359 | ||
@@ -4160,12 +4222,12 @@ int btrfs_relocate_block_group(struct btrfs_root *extent_root, u64 group_start) | |||
4160 | (unsigned long long)rc->block_group->key.objectid, | 4222 | (unsigned long long)rc->block_group->key.objectid, |
4161 | (unsigned long long)rc->block_group->flags); | 4223 | (unsigned long long)rc->block_group->flags); |
4162 | 4224 | ||
4163 | ret = btrfs_start_delalloc_inodes(fs_info->tree_root, 0); | 4225 | ret = btrfs_start_all_delalloc_inodes(fs_info, 0); |
4164 | if (ret < 0) { | 4226 | if (ret < 0) { |
4165 | err = ret; | 4227 | err = ret; |
4166 | goto out; | 4228 | goto out; |
4167 | } | 4229 | } |
4168 | btrfs_wait_ordered_extents(fs_info->tree_root, 0); | 4230 | btrfs_wait_all_ordered_extents(fs_info, 0); |
4169 | 4231 | ||
4170 | while (1) { | 4232 | while (1) { |
4171 | mutex_lock(&fs_info->cleaner_mutex); | 4233 | mutex_lock(&fs_info->cleaner_mutex); |
@@ -4277,7 +4339,7 @@ int btrfs_recover_relocation(struct btrfs_root *root) | |||
4277 | key.type != BTRFS_ROOT_ITEM_KEY) | 4339 | key.type != BTRFS_ROOT_ITEM_KEY) |
4278 | break; | 4340 | break; |
4279 | 4341 | ||
4280 | reloc_root = btrfs_read_fs_root_no_radix(root, &key); | 4342 | reloc_root = btrfs_read_fs_root(root, &key); |
4281 | if (IS_ERR(reloc_root)) { | 4343 | if (IS_ERR(reloc_root)) { |
4282 | err = PTR_ERR(reloc_root); | 4344 | err = PTR_ERR(reloc_root); |
4283 | goto out; | 4345 | goto out; |
@@ -4396,10 +4458,8 @@ out: | |||
4396 | int btrfs_reloc_clone_csums(struct inode *inode, u64 file_pos, u64 len) | 4458 | int btrfs_reloc_clone_csums(struct inode *inode, u64 file_pos, u64 len) |
4397 | { | 4459 | { |
4398 | struct btrfs_ordered_sum *sums; | 4460 | struct btrfs_ordered_sum *sums; |
4399 | struct btrfs_sector_sum *sector_sum; | ||
4400 | struct btrfs_ordered_extent *ordered; | 4461 | struct btrfs_ordered_extent *ordered; |
4401 | struct btrfs_root *root = BTRFS_I(inode)->root; | 4462 | struct btrfs_root *root = BTRFS_I(inode)->root; |
4402 | size_t offset; | ||
4403 | int ret; | 4463 | int ret; |
4404 | u64 disk_bytenr; | 4464 | u64 disk_bytenr; |
4405 | LIST_HEAD(list); | 4465 | LIST_HEAD(list); |
@@ -4413,19 +4473,13 @@ int btrfs_reloc_clone_csums(struct inode *inode, u64 file_pos, u64 len) | |||
4413 | if (ret) | 4473 | if (ret) |
4414 | goto out; | 4474 | goto out; |
4415 | 4475 | ||
4476 | disk_bytenr = ordered->start; | ||
4416 | while (!list_empty(&list)) { | 4477 | while (!list_empty(&list)) { |
4417 | sums = list_entry(list.next, struct btrfs_ordered_sum, list); | 4478 | sums = list_entry(list.next, struct btrfs_ordered_sum, list); |
4418 | list_del_init(&sums->list); | 4479 | list_del_init(&sums->list); |
4419 | 4480 | ||
4420 | sector_sum = sums->sums; | 4481 | sums->bytenr = disk_bytenr; |
4421 | sums->bytenr = ordered->start; | 4482 | disk_bytenr += sums->len; |
4422 | |||
4423 | offset = 0; | ||
4424 | while (offset < sums->len) { | ||
4425 | sector_sum->bytenr += ordered->start - disk_bytenr; | ||
4426 | sector_sum++; | ||
4427 | offset += root->sectorsize; | ||
4428 | } | ||
4429 | 4483 | ||
4430 | btrfs_add_ordered_sum(inode, ordered, sums); | 4484 | btrfs_add_ordered_sum(inode, ordered, sums); |
4431 | } | 4485 | } |
diff --git a/fs/btrfs/root-tree.c b/fs/btrfs/root-tree.c index 5bf1ed57f178..ffb1036ef10d 100644 --- a/fs/btrfs/root-tree.c +++ b/fs/btrfs/root-tree.c | |||
@@ -64,52 +64,59 @@ void btrfs_read_root_item(struct extent_buffer *eb, int slot, | |||
64 | } | 64 | } |
65 | 65 | ||
66 | /* | 66 | /* |
67 | * lookup the root with the highest offset for a given objectid. The key we do | 67 | * btrfs_find_root - lookup the root by the key. |
68 | * find is copied into 'key'. If we find something return 0, otherwise 1, < 0 | 68 | * root: the root of the root tree |
69 | * on error. | 69 | * search_key: the key to search |
70 | * path: the path we search | ||
71 | * root_item: the root item of the tree we look for | ||
72 | * root_key: the reak key of the tree we look for | ||
73 | * | ||
74 | * If ->offset of 'seach_key' is -1ULL, it means we are not sure the offset | ||
75 | * of the search key, just lookup the root with the highest offset for a | ||
76 | * given objectid. | ||
77 | * | ||
78 | * If we find something return 0, otherwise > 0, < 0 on error. | ||
70 | */ | 79 | */ |
71 | int btrfs_find_last_root(struct btrfs_root *root, u64 objectid, | 80 | int btrfs_find_root(struct btrfs_root *root, struct btrfs_key *search_key, |
72 | struct btrfs_root_item *item, struct btrfs_key *key) | 81 | struct btrfs_path *path, struct btrfs_root_item *root_item, |
82 | struct btrfs_key *root_key) | ||
73 | { | 83 | { |
74 | struct btrfs_path *path; | ||
75 | struct btrfs_key search_key; | ||
76 | struct btrfs_key found_key; | 84 | struct btrfs_key found_key; |
77 | struct extent_buffer *l; | 85 | struct extent_buffer *l; |
78 | int ret; | 86 | int ret; |
79 | int slot; | 87 | int slot; |
80 | 88 | ||
81 | search_key.objectid = objectid; | 89 | ret = btrfs_search_slot(NULL, root, search_key, path, 0, 0); |
82 | search_key.type = BTRFS_ROOT_ITEM_KEY; | ||
83 | search_key.offset = (u64)-1; | ||
84 | |||
85 | path = btrfs_alloc_path(); | ||
86 | if (!path) | ||
87 | return -ENOMEM; | ||
88 | ret = btrfs_search_slot(NULL, root, &search_key, path, 0, 0); | ||
89 | if (ret < 0) | 90 | if (ret < 0) |
90 | goto out; | 91 | return ret; |
91 | 92 | ||
92 | BUG_ON(ret == 0); | 93 | if (search_key->offset != -1ULL) { /* the search key is exact */ |
93 | if (path->slots[0] == 0) { | 94 | if (ret > 0) |
94 | ret = 1; | 95 | goto out; |
95 | goto out; | 96 | } else { |
97 | BUG_ON(ret == 0); /* Logical error */ | ||
98 | if (path->slots[0] == 0) | ||
99 | goto out; | ||
100 | path->slots[0]--; | ||
101 | ret = 0; | ||
96 | } | 102 | } |
103 | |||
97 | l = path->nodes[0]; | 104 | l = path->nodes[0]; |
98 | slot = path->slots[0] - 1; | 105 | slot = path->slots[0]; |
106 | |||
99 | btrfs_item_key_to_cpu(l, &found_key, slot); | 107 | btrfs_item_key_to_cpu(l, &found_key, slot); |
100 | if (found_key.objectid != objectid || | 108 | if (found_key.objectid != search_key->objectid || |
101 | found_key.type != BTRFS_ROOT_ITEM_KEY) { | 109 | found_key.type != BTRFS_ROOT_ITEM_KEY) { |
102 | ret = 1; | 110 | ret = 1; |
103 | goto out; | 111 | goto out; |
104 | } | 112 | } |
105 | if (item) | ||
106 | btrfs_read_root_item(l, slot, item); | ||
107 | if (key) | ||
108 | memcpy(key, &found_key, sizeof(found_key)); | ||
109 | 113 | ||
110 | ret = 0; | 114 | if (root_item) |
115 | btrfs_read_root_item(l, slot, root_item); | ||
116 | if (root_key) | ||
117 | memcpy(root_key, &found_key, sizeof(found_key)); | ||
111 | out: | 118 | out: |
112 | btrfs_free_path(path); | 119 | btrfs_release_path(path); |
113 | return ret; | 120 | return ret; |
114 | } | 121 | } |
115 | 122 | ||
@@ -212,86 +219,6 @@ int btrfs_insert_root(struct btrfs_trans_handle *trans, struct btrfs_root *root, | |||
212 | return btrfs_insert_item(trans, root, key, item, sizeof(*item)); | 219 | return btrfs_insert_item(trans, root, key, item, sizeof(*item)); |
213 | } | 220 | } |
214 | 221 | ||
215 | /* | ||
216 | * at mount time we want to find all the old transaction snapshots that were in | ||
217 | * the process of being deleted if we crashed. This is any root item with an | ||
218 | * offset lower than the latest root. They need to be queued for deletion to | ||
219 | * finish what was happening when we crashed. | ||
220 | */ | ||
221 | int btrfs_find_dead_roots(struct btrfs_root *root, u64 objectid) | ||
222 | { | ||
223 | struct btrfs_root *dead_root; | ||
224 | struct btrfs_root_item *ri; | ||
225 | struct btrfs_key key; | ||
226 | struct btrfs_key found_key; | ||
227 | struct btrfs_path *path; | ||
228 | int ret; | ||
229 | u32 nritems; | ||
230 | struct extent_buffer *leaf; | ||
231 | int slot; | ||
232 | |||
233 | key.objectid = objectid; | ||
234 | btrfs_set_key_type(&key, BTRFS_ROOT_ITEM_KEY); | ||
235 | key.offset = 0; | ||
236 | path = btrfs_alloc_path(); | ||
237 | if (!path) | ||
238 | return -ENOMEM; | ||
239 | |||
240 | again: | ||
241 | ret = btrfs_search_slot(NULL, root, &key, path, 0, 0); | ||
242 | if (ret < 0) | ||
243 | goto err; | ||
244 | while (1) { | ||
245 | leaf = path->nodes[0]; | ||
246 | nritems = btrfs_header_nritems(leaf); | ||
247 | slot = path->slots[0]; | ||
248 | if (slot >= nritems) { | ||
249 | ret = btrfs_next_leaf(root, path); | ||
250 | if (ret) | ||
251 | break; | ||
252 | leaf = path->nodes[0]; | ||
253 | nritems = btrfs_header_nritems(leaf); | ||
254 | slot = path->slots[0]; | ||
255 | } | ||
256 | btrfs_item_key_to_cpu(leaf, &key, slot); | ||
257 | if (btrfs_key_type(&key) != BTRFS_ROOT_ITEM_KEY) | ||
258 | goto next; | ||
259 | |||
260 | if (key.objectid < objectid) | ||
261 | goto next; | ||
262 | |||
263 | if (key.objectid > objectid) | ||
264 | break; | ||
265 | |||
266 | ri = btrfs_item_ptr(leaf, slot, struct btrfs_root_item); | ||
267 | if (btrfs_disk_root_refs(leaf, ri) != 0) | ||
268 | goto next; | ||
269 | |||
270 | memcpy(&found_key, &key, sizeof(key)); | ||
271 | key.offset++; | ||
272 | btrfs_release_path(path); | ||
273 | dead_root = | ||
274 | btrfs_read_fs_root_no_radix(root->fs_info->tree_root, | ||
275 | &found_key); | ||
276 | if (IS_ERR(dead_root)) { | ||
277 | ret = PTR_ERR(dead_root); | ||
278 | goto err; | ||
279 | } | ||
280 | |||
281 | ret = btrfs_add_dead_root(dead_root); | ||
282 | if (ret) | ||
283 | goto err; | ||
284 | goto again; | ||
285 | next: | ||
286 | slot++; | ||
287 | path->slots[0]++; | ||
288 | } | ||
289 | ret = 0; | ||
290 | err: | ||
291 | btrfs_free_path(path); | ||
292 | return ret; | ||
293 | } | ||
294 | |||
295 | int btrfs_find_orphan_roots(struct btrfs_root *tree_root) | 222 | int btrfs_find_orphan_roots(struct btrfs_root *tree_root) |
296 | { | 223 | { |
297 | struct extent_buffer *leaf; | 224 | struct extent_buffer *leaf; |
@@ -301,6 +228,10 @@ int btrfs_find_orphan_roots(struct btrfs_root *tree_root) | |||
301 | struct btrfs_root *root; | 228 | struct btrfs_root *root; |
302 | int err = 0; | 229 | int err = 0; |
303 | int ret; | 230 | int ret; |
231 | bool can_recover = true; | ||
232 | |||
233 | if (tree_root->fs_info->sb->s_flags & MS_RDONLY) | ||
234 | can_recover = false; | ||
304 | 235 | ||
305 | path = btrfs_alloc_path(); | 236 | path = btrfs_alloc_path(); |
306 | if (!path) | 237 | if (!path) |
@@ -340,20 +271,52 @@ int btrfs_find_orphan_roots(struct btrfs_root *tree_root) | |||
340 | root_key.objectid = key.offset; | 271 | root_key.objectid = key.offset; |
341 | key.offset++; | 272 | key.offset++; |
342 | 273 | ||
343 | root = btrfs_read_fs_root_no_name(tree_root->fs_info, | 274 | root = btrfs_read_fs_root(tree_root, &root_key); |
344 | &root_key); | 275 | err = PTR_RET(root); |
345 | if (!IS_ERR(root)) | 276 | if (err && err != -ENOENT) { |
277 | break; | ||
278 | } else if (err == -ENOENT) { | ||
279 | struct btrfs_trans_handle *trans; | ||
280 | |||
281 | btrfs_release_path(path); | ||
282 | |||
283 | trans = btrfs_join_transaction(tree_root); | ||
284 | if (IS_ERR(trans)) { | ||
285 | err = PTR_ERR(trans); | ||
286 | btrfs_error(tree_root->fs_info, err, | ||
287 | "Failed to start trans to delete " | ||
288 | "orphan item"); | ||
289 | break; | ||
290 | } | ||
291 | err = btrfs_del_orphan_item(trans, tree_root, | ||
292 | root_key.objectid); | ||
293 | btrfs_end_transaction(trans, tree_root); | ||
294 | if (err) { | ||
295 | btrfs_error(tree_root->fs_info, err, | ||
296 | "Failed to delete root orphan " | ||
297 | "item"); | ||
298 | break; | ||
299 | } | ||
346 | continue; | 300 | continue; |
301 | } | ||
347 | 302 | ||
348 | ret = PTR_ERR(root); | 303 | if (btrfs_root_refs(&root->root_item) == 0) { |
349 | if (ret != -ENOENT) { | 304 | btrfs_add_dead_root(root); |
350 | err = ret; | 305 | continue; |
306 | } | ||
307 | |||
308 | err = btrfs_init_fs_root(root); | ||
309 | if (err) { | ||
310 | btrfs_free_fs_root(root); | ||
351 | break; | 311 | break; |
352 | } | 312 | } |
353 | 313 | ||
354 | ret = btrfs_find_dead_roots(tree_root, root_key.objectid); | 314 | root->orphan_item_inserted = 1; |
355 | if (ret) { | 315 | |
356 | err = ret; | 316 | err = btrfs_insert_fs_root(root->fs_info, root); |
317 | if (err) { | ||
318 | BUG_ON(err == -EEXIST); | ||
319 | btrfs_free_fs_root(root); | ||
357 | break; | 320 | break; |
358 | } | 321 | } |
359 | } | 322 | } |
@@ -368,8 +331,6 @@ int btrfs_del_root(struct btrfs_trans_handle *trans, struct btrfs_root *root, | |||
368 | { | 331 | { |
369 | struct btrfs_path *path; | 332 | struct btrfs_path *path; |
370 | int ret; | 333 | int ret; |
371 | struct btrfs_root_item *ri; | ||
372 | struct extent_buffer *leaf; | ||
373 | 334 | ||
374 | path = btrfs_alloc_path(); | 335 | path = btrfs_alloc_path(); |
375 | if (!path) | 336 | if (!path) |
@@ -379,8 +340,6 @@ int btrfs_del_root(struct btrfs_trans_handle *trans, struct btrfs_root *root, | |||
379 | goto out; | 340 | goto out; |
380 | 341 | ||
381 | BUG_ON(ret != 0); | 342 | BUG_ON(ret != 0); |
382 | leaf = path->nodes[0]; | ||
383 | ri = btrfs_item_ptr(leaf, path->slots[0], struct btrfs_root_item); | ||
384 | 343 | ||
385 | ret = btrfs_del_item(trans, root, path); | 344 | ret = btrfs_del_item(trans, root, path); |
386 | out: | 345 | out: |
diff --git a/fs/btrfs/scrub.c b/fs/btrfs/scrub.c index 79bd479317cb..64a157becbe5 100644 --- a/fs/btrfs/scrub.c +++ b/fs/btrfs/scrub.c | |||
@@ -2126,8 +2126,7 @@ static int scrub_find_csum(struct scrub_ctx *sctx, u64 logical, u64 len, | |||
2126 | u8 *csum) | 2126 | u8 *csum) |
2127 | { | 2127 | { |
2128 | struct btrfs_ordered_sum *sum = NULL; | 2128 | struct btrfs_ordered_sum *sum = NULL; |
2129 | int ret = 0; | 2129 | unsigned long index; |
2130 | unsigned long i; | ||
2131 | unsigned long num_sectors; | 2130 | unsigned long num_sectors; |
2132 | 2131 | ||
2133 | while (!list_empty(&sctx->csum_list)) { | 2132 | while (!list_empty(&sctx->csum_list)) { |
@@ -2146,19 +2145,14 @@ static int scrub_find_csum(struct scrub_ctx *sctx, u64 logical, u64 len, | |||
2146 | if (!sum) | 2145 | if (!sum) |
2147 | return 0; | 2146 | return 0; |
2148 | 2147 | ||
2148 | index = ((u32)(logical - sum->bytenr)) / sctx->sectorsize; | ||
2149 | num_sectors = sum->len / sctx->sectorsize; | 2149 | num_sectors = sum->len / sctx->sectorsize; |
2150 | for (i = 0; i < num_sectors; ++i) { | 2150 | memcpy(csum, sum->sums + index, sctx->csum_size); |
2151 | if (sum->sums[i].bytenr == logical) { | 2151 | if (index == num_sectors - 1) { |
2152 | memcpy(csum, &sum->sums[i].sum, sctx->csum_size); | ||
2153 | ret = 1; | ||
2154 | break; | ||
2155 | } | ||
2156 | } | ||
2157 | if (ret && i == num_sectors - 1) { | ||
2158 | list_del(&sum->list); | 2152 | list_del(&sum->list); |
2159 | kfree(sum); | 2153 | kfree(sum); |
2160 | } | 2154 | } |
2161 | return ret; | 2155 | return 1; |
2162 | } | 2156 | } |
2163 | 2157 | ||
2164 | /* scrub extent tries to collect up to 64 kB for each bio */ | 2158 | /* scrub extent tries to collect up to 64 kB for each bio */ |
@@ -2501,10 +2495,11 @@ again: | |||
2501 | ret = scrub_extent(sctx, extent_logical, extent_len, | 2495 | ret = scrub_extent(sctx, extent_logical, extent_len, |
2502 | extent_physical, extent_dev, flags, | 2496 | extent_physical, extent_dev, flags, |
2503 | generation, extent_mirror_num, | 2497 | generation, extent_mirror_num, |
2504 | extent_physical); | 2498 | extent_logical - logical + physical); |
2505 | if (ret) | 2499 | if (ret) |
2506 | goto out; | 2500 | goto out; |
2507 | 2501 | ||
2502 | scrub_free_csums(sctx); | ||
2508 | if (extent_logical + extent_len < | 2503 | if (extent_logical + extent_len < |
2509 | key.objectid + bytes) { | 2504 | key.objectid + bytes) { |
2510 | logical += increment; | 2505 | logical += increment; |
@@ -3204,16 +3199,18 @@ out: | |||
3204 | 3199 | ||
3205 | static int copy_nocow_pages_for_inode(u64 inum, u64 offset, u64 root, void *ctx) | 3200 | static int copy_nocow_pages_for_inode(u64 inum, u64 offset, u64 root, void *ctx) |
3206 | { | 3201 | { |
3207 | unsigned long index; | ||
3208 | struct scrub_copy_nocow_ctx *nocow_ctx = ctx; | 3202 | struct scrub_copy_nocow_ctx *nocow_ctx = ctx; |
3209 | int ret = 0; | 3203 | struct btrfs_fs_info *fs_info = nocow_ctx->sctx->dev_root->fs_info; |
3210 | struct btrfs_key key; | 3204 | struct btrfs_key key; |
3211 | struct inode *inode = NULL; | 3205 | struct inode *inode; |
3206 | struct page *page; | ||
3212 | struct btrfs_root *local_root; | 3207 | struct btrfs_root *local_root; |
3213 | u64 physical_for_dev_replace; | 3208 | u64 physical_for_dev_replace; |
3214 | u64 len; | 3209 | u64 len; |
3215 | struct btrfs_fs_info *fs_info = nocow_ctx->sctx->dev_root->fs_info; | 3210 | unsigned long index; |
3216 | int srcu_index; | 3211 | int srcu_index; |
3212 | int ret; | ||
3213 | int err; | ||
3217 | 3214 | ||
3218 | key.objectid = root; | 3215 | key.objectid = root; |
3219 | key.type = BTRFS_ROOT_ITEM_KEY; | 3216 | key.type = BTRFS_ROOT_ITEM_KEY; |
@@ -3227,6 +3224,11 @@ static int copy_nocow_pages_for_inode(u64 inum, u64 offset, u64 root, void *ctx) | |||
3227 | return PTR_ERR(local_root); | 3224 | return PTR_ERR(local_root); |
3228 | } | 3225 | } |
3229 | 3226 | ||
3227 | if (btrfs_root_refs(&local_root->root_item) == 0) { | ||
3228 | srcu_read_unlock(&fs_info->subvol_srcu, srcu_index); | ||
3229 | return -ENOENT; | ||
3230 | } | ||
3231 | |||
3230 | key.type = BTRFS_INODE_ITEM_KEY; | 3232 | key.type = BTRFS_INODE_ITEM_KEY; |
3231 | key.objectid = inum; | 3233 | key.objectid = inum; |
3232 | key.offset = 0; | 3234 | key.offset = 0; |
@@ -3235,19 +3237,21 @@ static int copy_nocow_pages_for_inode(u64 inum, u64 offset, u64 root, void *ctx) | |||
3235 | if (IS_ERR(inode)) | 3237 | if (IS_ERR(inode)) |
3236 | return PTR_ERR(inode); | 3238 | return PTR_ERR(inode); |
3237 | 3239 | ||
3240 | /* Avoid truncate/dio/punch hole.. */ | ||
3241 | mutex_lock(&inode->i_mutex); | ||
3242 | inode_dio_wait(inode); | ||
3243 | |||
3244 | ret = 0; | ||
3238 | physical_for_dev_replace = nocow_ctx->physical_for_dev_replace; | 3245 | physical_for_dev_replace = nocow_ctx->physical_for_dev_replace; |
3239 | len = nocow_ctx->len; | 3246 | len = nocow_ctx->len; |
3240 | while (len >= PAGE_CACHE_SIZE) { | 3247 | while (len >= PAGE_CACHE_SIZE) { |
3241 | struct page *page = NULL; | ||
3242 | int ret_sub; | ||
3243 | |||
3244 | index = offset >> PAGE_CACHE_SHIFT; | 3248 | index = offset >> PAGE_CACHE_SHIFT; |
3245 | 3249 | again: | |
3246 | page = find_or_create_page(inode->i_mapping, index, GFP_NOFS); | 3250 | page = find_or_create_page(inode->i_mapping, index, GFP_NOFS); |
3247 | if (!page) { | 3251 | if (!page) { |
3248 | pr_err("find_or_create_page() failed\n"); | 3252 | pr_err("find_or_create_page() failed\n"); |
3249 | ret = -ENOMEM; | 3253 | ret = -ENOMEM; |
3250 | goto next_page; | 3254 | goto out; |
3251 | } | 3255 | } |
3252 | 3256 | ||
3253 | if (PageUptodate(page)) { | 3257 | if (PageUptodate(page)) { |
@@ -3255,39 +3259,49 @@ static int copy_nocow_pages_for_inode(u64 inum, u64 offset, u64 root, void *ctx) | |||
3255 | goto next_page; | 3259 | goto next_page; |
3256 | } else { | 3260 | } else { |
3257 | ClearPageError(page); | 3261 | ClearPageError(page); |
3258 | ret_sub = extent_read_full_page(&BTRFS_I(inode)-> | 3262 | err = extent_read_full_page(&BTRFS_I(inode)-> |
3259 | io_tree, | 3263 | io_tree, |
3260 | page, btrfs_get_extent, | 3264 | page, btrfs_get_extent, |
3261 | nocow_ctx->mirror_num); | 3265 | nocow_ctx->mirror_num); |
3262 | if (ret_sub) { | 3266 | if (err) { |
3263 | ret = ret_sub; | 3267 | ret = err; |
3264 | goto next_page; | 3268 | goto next_page; |
3265 | } | 3269 | } |
3266 | wait_on_page_locked(page); | 3270 | |
3271 | lock_page(page); | ||
3272 | /* | ||
3273 | * If the page has been remove from the page cache, | ||
3274 | * the data on it is meaningless, because it may be | ||
3275 | * old one, the new data may be written into the new | ||
3276 | * page in the page cache. | ||
3277 | */ | ||
3278 | if (page->mapping != inode->i_mapping) { | ||
3279 | page_cache_release(page); | ||
3280 | goto again; | ||
3281 | } | ||
3267 | if (!PageUptodate(page)) { | 3282 | if (!PageUptodate(page)) { |
3268 | ret = -EIO; | 3283 | ret = -EIO; |
3269 | goto next_page; | 3284 | goto next_page; |
3270 | } | 3285 | } |
3271 | } | 3286 | } |
3272 | ret_sub = write_page_nocow(nocow_ctx->sctx, | 3287 | err = write_page_nocow(nocow_ctx->sctx, |
3273 | physical_for_dev_replace, page); | 3288 | physical_for_dev_replace, page); |
3274 | if (ret_sub) { | 3289 | if (err) |
3275 | ret = ret_sub; | 3290 | ret = err; |
3276 | goto next_page; | ||
3277 | } | ||
3278 | |||
3279 | next_page: | 3291 | next_page: |
3280 | if (page) { | 3292 | unlock_page(page); |
3281 | unlock_page(page); | 3293 | page_cache_release(page); |
3282 | put_page(page); | 3294 | |
3283 | } | 3295 | if (ret) |
3296 | break; | ||
3297 | |||
3284 | offset += PAGE_CACHE_SIZE; | 3298 | offset += PAGE_CACHE_SIZE; |
3285 | physical_for_dev_replace += PAGE_CACHE_SIZE; | 3299 | physical_for_dev_replace += PAGE_CACHE_SIZE; |
3286 | len -= PAGE_CACHE_SIZE; | 3300 | len -= PAGE_CACHE_SIZE; |
3287 | } | 3301 | } |
3288 | 3302 | out: | |
3289 | if (inode) | 3303 | mutex_unlock(&inode->i_mutex); |
3290 | iput(inode); | 3304 | iput(inode); |
3291 | return ret; | 3305 | return ret; |
3292 | } | 3306 | } |
3293 | 3307 | ||
diff --git a/fs/btrfs/send.c b/fs/btrfs/send.c index ff40f1c00ce3..d3f3b43cae0b 100644 --- a/fs/btrfs/send.c +++ b/fs/btrfs/send.c | |||
@@ -158,7 +158,7 @@ static void fs_path_reset(struct fs_path *p) | |||
158 | } | 158 | } |
159 | } | 159 | } |
160 | 160 | ||
161 | static struct fs_path *fs_path_alloc(struct send_ctx *sctx) | 161 | static struct fs_path *fs_path_alloc(void) |
162 | { | 162 | { |
163 | struct fs_path *p; | 163 | struct fs_path *p; |
164 | 164 | ||
@@ -173,11 +173,11 @@ static struct fs_path *fs_path_alloc(struct send_ctx *sctx) | |||
173 | return p; | 173 | return p; |
174 | } | 174 | } |
175 | 175 | ||
176 | static struct fs_path *fs_path_alloc_reversed(struct send_ctx *sctx) | 176 | static struct fs_path *fs_path_alloc_reversed(void) |
177 | { | 177 | { |
178 | struct fs_path *p; | 178 | struct fs_path *p; |
179 | 179 | ||
180 | p = fs_path_alloc(sctx); | 180 | p = fs_path_alloc(); |
181 | if (!p) | 181 | if (!p) |
182 | return NULL; | 182 | return NULL; |
183 | p->reversed = 1; | 183 | p->reversed = 1; |
@@ -185,7 +185,7 @@ static struct fs_path *fs_path_alloc_reversed(struct send_ctx *sctx) | |||
185 | return p; | 185 | return p; |
186 | } | 186 | } |
187 | 187 | ||
188 | static void fs_path_free(struct send_ctx *sctx, struct fs_path *p) | 188 | static void fs_path_free(struct fs_path *p) |
189 | { | 189 | { |
190 | if (!p) | 190 | if (!p) |
191 | return; | 191 | return; |
@@ -753,8 +753,7 @@ typedef int (*iterate_inode_ref_t)(int num, u64 dir, int index, | |||
753 | * | 753 | * |
754 | * path must point to the INODE_REF or INODE_EXTREF when called. | 754 | * path must point to the INODE_REF or INODE_EXTREF when called. |
755 | */ | 755 | */ |
756 | static int iterate_inode_ref(struct send_ctx *sctx, | 756 | static int iterate_inode_ref(struct btrfs_root *root, struct btrfs_path *path, |
757 | struct btrfs_root *root, struct btrfs_path *path, | ||
758 | struct btrfs_key *found_key, int resolve, | 757 | struct btrfs_key *found_key, int resolve, |
759 | iterate_inode_ref_t iterate, void *ctx) | 758 | iterate_inode_ref_t iterate, void *ctx) |
760 | { | 759 | { |
@@ -777,13 +776,13 @@ static int iterate_inode_ref(struct send_ctx *sctx, | |||
777 | unsigned long elem_size; | 776 | unsigned long elem_size; |
778 | unsigned long ptr; | 777 | unsigned long ptr; |
779 | 778 | ||
780 | p = fs_path_alloc_reversed(sctx); | 779 | p = fs_path_alloc_reversed(); |
781 | if (!p) | 780 | if (!p) |
782 | return -ENOMEM; | 781 | return -ENOMEM; |
783 | 782 | ||
784 | tmp_path = alloc_path_for_send(); | 783 | tmp_path = alloc_path_for_send(); |
785 | if (!tmp_path) { | 784 | if (!tmp_path) { |
786 | fs_path_free(sctx, p); | 785 | fs_path_free(p); |
787 | return -ENOMEM; | 786 | return -ENOMEM; |
788 | } | 787 | } |
789 | 788 | ||
@@ -858,7 +857,7 @@ static int iterate_inode_ref(struct send_ctx *sctx, | |||
858 | 857 | ||
859 | out: | 858 | out: |
860 | btrfs_free_path(tmp_path); | 859 | btrfs_free_path(tmp_path); |
861 | fs_path_free(sctx, p); | 860 | fs_path_free(p); |
862 | return ret; | 861 | return ret; |
863 | } | 862 | } |
864 | 863 | ||
@@ -874,8 +873,7 @@ typedef int (*iterate_dir_item_t)(int num, struct btrfs_key *di_key, | |||
874 | * | 873 | * |
875 | * path must point to the dir item when called. | 874 | * path must point to the dir item when called. |
876 | */ | 875 | */ |
877 | static int iterate_dir_item(struct send_ctx *sctx, | 876 | static int iterate_dir_item(struct btrfs_root *root, struct btrfs_path *path, |
878 | struct btrfs_root *root, struct btrfs_path *path, | ||
879 | struct btrfs_key *found_key, | 877 | struct btrfs_key *found_key, |
880 | iterate_dir_item_t iterate, void *ctx) | 878 | iterate_dir_item_t iterate, void *ctx) |
881 | { | 879 | { |
@@ -990,7 +988,7 @@ static int __copy_first_ref(int num, u64 dir, int index, | |||
990 | * Retrieve the first path of an inode. If an inode has more then one | 988 | * Retrieve the first path of an inode. If an inode has more then one |
991 | * ref/hardlink, this is ignored. | 989 | * ref/hardlink, this is ignored. |
992 | */ | 990 | */ |
993 | static int get_inode_path(struct send_ctx *sctx, struct btrfs_root *root, | 991 | static int get_inode_path(struct btrfs_root *root, |
994 | u64 ino, struct fs_path *path) | 992 | u64 ino, struct fs_path *path) |
995 | { | 993 | { |
996 | int ret; | 994 | int ret; |
@@ -1022,8 +1020,8 @@ static int get_inode_path(struct send_ctx *sctx, struct btrfs_root *root, | |||
1022 | goto out; | 1020 | goto out; |
1023 | } | 1021 | } |
1024 | 1022 | ||
1025 | ret = iterate_inode_ref(sctx, root, p, &found_key, 1, | 1023 | ret = iterate_inode_ref(root, p, &found_key, 1, |
1026 | __copy_first_ref, path); | 1024 | __copy_first_ref, path); |
1027 | if (ret < 0) | 1025 | if (ret < 0) |
1028 | goto out; | 1026 | goto out; |
1029 | ret = 0; | 1027 | ret = 0; |
@@ -1314,8 +1312,7 @@ out: | |||
1314 | return ret; | 1312 | return ret; |
1315 | } | 1313 | } |
1316 | 1314 | ||
1317 | static int read_symlink(struct send_ctx *sctx, | 1315 | static int read_symlink(struct btrfs_root *root, |
1318 | struct btrfs_root *root, | ||
1319 | u64 ino, | 1316 | u64 ino, |
1320 | struct fs_path *dest) | 1317 | struct fs_path *dest) |
1321 | { | 1318 | { |
@@ -1562,8 +1559,7 @@ out: | |||
1562 | * Looks up the first btrfs_inode_ref of a given ino. It returns the parent dir, | 1559 | * Looks up the first btrfs_inode_ref of a given ino. It returns the parent dir, |
1563 | * generation of the parent dir and the name of the dir entry. | 1560 | * generation of the parent dir and the name of the dir entry. |
1564 | */ | 1561 | */ |
1565 | static int get_first_ref(struct send_ctx *sctx, | 1562 | static int get_first_ref(struct btrfs_root *root, u64 ino, |
1566 | struct btrfs_root *root, u64 ino, | ||
1567 | u64 *dir, u64 *dir_gen, struct fs_path *name) | 1563 | u64 *dir, u64 *dir_gen, struct fs_path *name) |
1568 | { | 1564 | { |
1569 | int ret; | 1565 | int ret; |
@@ -1628,8 +1624,7 @@ out: | |||
1628 | return ret; | 1624 | return ret; |
1629 | } | 1625 | } |
1630 | 1626 | ||
1631 | static int is_first_ref(struct send_ctx *sctx, | 1627 | static int is_first_ref(struct btrfs_root *root, |
1632 | struct btrfs_root *root, | ||
1633 | u64 ino, u64 dir, | 1628 | u64 ino, u64 dir, |
1634 | const char *name, int name_len) | 1629 | const char *name, int name_len) |
1635 | { | 1630 | { |
@@ -1638,11 +1633,11 @@ static int is_first_ref(struct send_ctx *sctx, | |||
1638 | u64 tmp_dir; | 1633 | u64 tmp_dir; |
1639 | u64 tmp_dir_gen; | 1634 | u64 tmp_dir_gen; |
1640 | 1635 | ||
1641 | tmp_name = fs_path_alloc(sctx); | 1636 | tmp_name = fs_path_alloc(); |
1642 | if (!tmp_name) | 1637 | if (!tmp_name) |
1643 | return -ENOMEM; | 1638 | return -ENOMEM; |
1644 | 1639 | ||
1645 | ret = get_first_ref(sctx, root, ino, &tmp_dir, &tmp_dir_gen, tmp_name); | 1640 | ret = get_first_ref(root, ino, &tmp_dir, &tmp_dir_gen, tmp_name); |
1646 | if (ret < 0) | 1641 | if (ret < 0) |
1647 | goto out; | 1642 | goto out; |
1648 | 1643 | ||
@@ -1654,7 +1649,7 @@ static int is_first_ref(struct send_ctx *sctx, | |||
1654 | ret = !memcmp(tmp_name->start, name, name_len); | 1649 | ret = !memcmp(tmp_name->start, name, name_len); |
1655 | 1650 | ||
1656 | out: | 1651 | out: |
1657 | fs_path_free(sctx, tmp_name); | 1652 | fs_path_free(tmp_name); |
1658 | return ret; | 1653 | return ret; |
1659 | } | 1654 | } |
1660 | 1655 | ||
@@ -1783,11 +1778,11 @@ static int did_overwrite_first_ref(struct send_ctx *sctx, u64 ino, u64 gen) | |||
1783 | if (!sctx->parent_root) | 1778 | if (!sctx->parent_root) |
1784 | goto out; | 1779 | goto out; |
1785 | 1780 | ||
1786 | name = fs_path_alloc(sctx); | 1781 | name = fs_path_alloc(); |
1787 | if (!name) | 1782 | if (!name) |
1788 | return -ENOMEM; | 1783 | return -ENOMEM; |
1789 | 1784 | ||
1790 | ret = get_first_ref(sctx, sctx->parent_root, ino, &dir, &dir_gen, name); | 1785 | ret = get_first_ref(sctx->parent_root, ino, &dir, &dir_gen, name); |
1791 | if (ret < 0) | 1786 | if (ret < 0) |
1792 | goto out; | 1787 | goto out; |
1793 | 1788 | ||
@@ -1795,7 +1790,7 @@ static int did_overwrite_first_ref(struct send_ctx *sctx, u64 ino, u64 gen) | |||
1795 | name->start, fs_path_len(name)); | 1790 | name->start, fs_path_len(name)); |
1796 | 1791 | ||
1797 | out: | 1792 | out: |
1798 | fs_path_free(sctx, name); | 1793 | fs_path_free(name); |
1799 | return ret; | 1794 | return ret; |
1800 | } | 1795 | } |
1801 | 1796 | ||
@@ -1979,11 +1974,11 @@ static int __get_cur_name_and_parent(struct send_ctx *sctx, | |||
1979 | * send_root or parent_root for ref lookup. | 1974 | * send_root or parent_root for ref lookup. |
1980 | */ | 1975 | */ |
1981 | if (ino < sctx->send_progress) | 1976 | if (ino < sctx->send_progress) |
1982 | ret = get_first_ref(sctx, sctx->send_root, ino, | 1977 | ret = get_first_ref(sctx->send_root, ino, |
1983 | parent_ino, parent_gen, dest); | 1978 | parent_ino, parent_gen, dest); |
1984 | else | 1979 | else |
1985 | ret = get_first_ref(sctx, sctx->parent_root, ino, | 1980 | ret = get_first_ref(sctx->parent_root, ino, |
1986 | parent_ino, parent_gen, dest); | 1981 | parent_ino, parent_gen, dest); |
1987 | if (ret < 0) | 1982 | if (ret < 0) |
1988 | goto out; | 1983 | goto out; |
1989 | 1984 | ||
@@ -2070,7 +2065,7 @@ static int get_cur_path(struct send_ctx *sctx, u64 ino, u64 gen, | |||
2070 | u64 parent_gen = 0; | 2065 | u64 parent_gen = 0; |
2071 | int stop = 0; | 2066 | int stop = 0; |
2072 | 2067 | ||
2073 | name = fs_path_alloc(sctx); | 2068 | name = fs_path_alloc(); |
2074 | if (!name) { | 2069 | if (!name) { |
2075 | ret = -ENOMEM; | 2070 | ret = -ENOMEM; |
2076 | goto out; | 2071 | goto out; |
@@ -2098,7 +2093,7 @@ static int get_cur_path(struct send_ctx *sctx, u64 ino, u64 gen, | |||
2098 | } | 2093 | } |
2099 | 2094 | ||
2100 | out: | 2095 | out: |
2101 | fs_path_free(sctx, name); | 2096 | fs_path_free(name); |
2102 | if (!ret) | 2097 | if (!ret) |
2103 | fs_path_unreverse(dest); | 2098 | fs_path_unreverse(dest); |
2104 | return ret; | 2099 | return ret; |
@@ -2263,7 +2258,7 @@ static int send_truncate(struct send_ctx *sctx, u64 ino, u64 gen, u64 size) | |||
2263 | 2258 | ||
2264 | verbose_printk("btrfs: send_truncate %llu size=%llu\n", ino, size); | 2259 | verbose_printk("btrfs: send_truncate %llu size=%llu\n", ino, size); |
2265 | 2260 | ||
2266 | p = fs_path_alloc(sctx); | 2261 | p = fs_path_alloc(); |
2267 | if (!p) | 2262 | if (!p) |
2268 | return -ENOMEM; | 2263 | return -ENOMEM; |
2269 | 2264 | ||
@@ -2281,7 +2276,7 @@ verbose_printk("btrfs: send_truncate %llu size=%llu\n", ino, size); | |||
2281 | 2276 | ||
2282 | tlv_put_failure: | 2277 | tlv_put_failure: |
2283 | out: | 2278 | out: |
2284 | fs_path_free(sctx, p); | 2279 | fs_path_free(p); |
2285 | return ret; | 2280 | return ret; |
2286 | } | 2281 | } |
2287 | 2282 | ||
@@ -2292,7 +2287,7 @@ static int send_chmod(struct send_ctx *sctx, u64 ino, u64 gen, u64 mode) | |||
2292 | 2287 | ||
2293 | verbose_printk("btrfs: send_chmod %llu mode=%llu\n", ino, mode); | 2288 | verbose_printk("btrfs: send_chmod %llu mode=%llu\n", ino, mode); |
2294 | 2289 | ||
2295 | p = fs_path_alloc(sctx); | 2290 | p = fs_path_alloc(); |
2296 | if (!p) | 2291 | if (!p) |
2297 | return -ENOMEM; | 2292 | return -ENOMEM; |
2298 | 2293 | ||
@@ -2310,7 +2305,7 @@ verbose_printk("btrfs: send_chmod %llu mode=%llu\n", ino, mode); | |||
2310 | 2305 | ||
2311 | tlv_put_failure: | 2306 | tlv_put_failure: |
2312 | out: | 2307 | out: |
2313 | fs_path_free(sctx, p); | 2308 | fs_path_free(p); |
2314 | return ret; | 2309 | return ret; |
2315 | } | 2310 | } |
2316 | 2311 | ||
@@ -2321,7 +2316,7 @@ static int send_chown(struct send_ctx *sctx, u64 ino, u64 gen, u64 uid, u64 gid) | |||
2321 | 2316 | ||
2322 | verbose_printk("btrfs: send_chown %llu uid=%llu, gid=%llu\n", ino, uid, gid); | 2317 | verbose_printk("btrfs: send_chown %llu uid=%llu, gid=%llu\n", ino, uid, gid); |
2323 | 2318 | ||
2324 | p = fs_path_alloc(sctx); | 2319 | p = fs_path_alloc(); |
2325 | if (!p) | 2320 | if (!p) |
2326 | return -ENOMEM; | 2321 | return -ENOMEM; |
2327 | 2322 | ||
@@ -2340,7 +2335,7 @@ verbose_printk("btrfs: send_chown %llu uid=%llu, gid=%llu\n", ino, uid, gid); | |||
2340 | 2335 | ||
2341 | tlv_put_failure: | 2336 | tlv_put_failure: |
2342 | out: | 2337 | out: |
2343 | fs_path_free(sctx, p); | 2338 | fs_path_free(p); |
2344 | return ret; | 2339 | return ret; |
2345 | } | 2340 | } |
2346 | 2341 | ||
@@ -2356,7 +2351,7 @@ static int send_utimes(struct send_ctx *sctx, u64 ino, u64 gen) | |||
2356 | 2351 | ||
2357 | verbose_printk("btrfs: send_utimes %llu\n", ino); | 2352 | verbose_printk("btrfs: send_utimes %llu\n", ino); |
2358 | 2353 | ||
2359 | p = fs_path_alloc(sctx); | 2354 | p = fs_path_alloc(); |
2360 | if (!p) | 2355 | if (!p) |
2361 | return -ENOMEM; | 2356 | return -ENOMEM; |
2362 | 2357 | ||
@@ -2397,7 +2392,7 @@ verbose_printk("btrfs: send_utimes %llu\n", ino); | |||
2397 | 2392 | ||
2398 | tlv_put_failure: | 2393 | tlv_put_failure: |
2399 | out: | 2394 | out: |
2400 | fs_path_free(sctx, p); | 2395 | fs_path_free(p); |
2401 | btrfs_free_path(path); | 2396 | btrfs_free_path(path); |
2402 | return ret; | 2397 | return ret; |
2403 | } | 2398 | } |
@@ -2418,7 +2413,7 @@ static int send_create_inode(struct send_ctx *sctx, u64 ino) | |||
2418 | 2413 | ||
2419 | verbose_printk("btrfs: send_create_inode %llu\n", ino); | 2414 | verbose_printk("btrfs: send_create_inode %llu\n", ino); |
2420 | 2415 | ||
2421 | p = fs_path_alloc(sctx); | 2416 | p = fs_path_alloc(); |
2422 | if (!p) | 2417 | if (!p) |
2423 | return -ENOMEM; | 2418 | return -ENOMEM; |
2424 | 2419 | ||
@@ -2459,7 +2454,7 @@ verbose_printk("btrfs: send_create_inode %llu\n", ino); | |||
2459 | 2454 | ||
2460 | if (S_ISLNK(mode)) { | 2455 | if (S_ISLNK(mode)) { |
2461 | fs_path_reset(p); | 2456 | fs_path_reset(p); |
2462 | ret = read_symlink(sctx, sctx->send_root, ino, p); | 2457 | ret = read_symlink(sctx->send_root, ino, p); |
2463 | if (ret < 0) | 2458 | if (ret < 0) |
2464 | goto out; | 2459 | goto out; |
2465 | TLV_PUT_PATH(sctx, BTRFS_SEND_A_PATH_LINK, p); | 2460 | TLV_PUT_PATH(sctx, BTRFS_SEND_A_PATH_LINK, p); |
@@ -2476,7 +2471,7 @@ verbose_printk("btrfs: send_create_inode %llu\n", ino); | |||
2476 | 2471 | ||
2477 | tlv_put_failure: | 2472 | tlv_put_failure: |
2478 | out: | 2473 | out: |
2479 | fs_path_free(sctx, p); | 2474 | fs_path_free(p); |
2480 | return ret; | 2475 | return ret; |
2481 | } | 2476 | } |
2482 | 2477 | ||
@@ -2615,13 +2610,13 @@ static int record_ref(struct list_head *head, u64 dir, | |||
2615 | return 0; | 2610 | return 0; |
2616 | } | 2611 | } |
2617 | 2612 | ||
2618 | static void __free_recorded_refs(struct send_ctx *sctx, struct list_head *head) | 2613 | static void __free_recorded_refs(struct list_head *head) |
2619 | { | 2614 | { |
2620 | struct recorded_ref *cur; | 2615 | struct recorded_ref *cur; |
2621 | 2616 | ||
2622 | while (!list_empty(head)) { | 2617 | while (!list_empty(head)) { |
2623 | cur = list_entry(head->next, struct recorded_ref, list); | 2618 | cur = list_entry(head->next, struct recorded_ref, list); |
2624 | fs_path_free(sctx, cur->full_path); | 2619 | fs_path_free(cur->full_path); |
2625 | list_del(&cur->list); | 2620 | list_del(&cur->list); |
2626 | kfree(cur); | 2621 | kfree(cur); |
2627 | } | 2622 | } |
@@ -2629,8 +2624,8 @@ static void __free_recorded_refs(struct send_ctx *sctx, struct list_head *head) | |||
2629 | 2624 | ||
2630 | static void free_recorded_refs(struct send_ctx *sctx) | 2625 | static void free_recorded_refs(struct send_ctx *sctx) |
2631 | { | 2626 | { |
2632 | __free_recorded_refs(sctx, &sctx->new_refs); | 2627 | __free_recorded_refs(&sctx->new_refs); |
2633 | __free_recorded_refs(sctx, &sctx->deleted_refs); | 2628 | __free_recorded_refs(&sctx->deleted_refs); |
2634 | } | 2629 | } |
2635 | 2630 | ||
2636 | /* | 2631 | /* |
@@ -2644,7 +2639,7 @@ static int orphanize_inode(struct send_ctx *sctx, u64 ino, u64 gen, | |||
2644 | int ret; | 2639 | int ret; |
2645 | struct fs_path *orphan; | 2640 | struct fs_path *orphan; |
2646 | 2641 | ||
2647 | orphan = fs_path_alloc(sctx); | 2642 | orphan = fs_path_alloc(); |
2648 | if (!orphan) | 2643 | if (!orphan) |
2649 | return -ENOMEM; | 2644 | return -ENOMEM; |
2650 | 2645 | ||
@@ -2655,7 +2650,7 @@ static int orphanize_inode(struct send_ctx *sctx, u64 ino, u64 gen, | |||
2655 | ret = send_rename(sctx, path, orphan); | 2650 | ret = send_rename(sctx, path, orphan); |
2656 | 2651 | ||
2657 | out: | 2652 | out: |
2658 | fs_path_free(sctx, orphan); | 2653 | fs_path_free(orphan); |
2659 | return ret; | 2654 | return ret; |
2660 | } | 2655 | } |
2661 | 2656 | ||
@@ -2746,7 +2741,7 @@ verbose_printk("btrfs: process_recorded_refs %llu\n", sctx->cur_ino); | |||
2746 | */ | 2741 | */ |
2747 | BUG_ON(sctx->cur_ino <= BTRFS_FIRST_FREE_OBJECTID); | 2742 | BUG_ON(sctx->cur_ino <= BTRFS_FIRST_FREE_OBJECTID); |
2748 | 2743 | ||
2749 | valid_path = fs_path_alloc(sctx); | 2744 | valid_path = fs_path_alloc(); |
2750 | if (!valid_path) { | 2745 | if (!valid_path) { |
2751 | ret = -ENOMEM; | 2746 | ret = -ENOMEM; |
2752 | goto out; | 2747 | goto out; |
@@ -2843,9 +2838,9 @@ verbose_printk("btrfs: process_recorded_refs %llu\n", sctx->cur_ino); | |||
2843 | if (ret < 0) | 2838 | if (ret < 0) |
2844 | goto out; | 2839 | goto out; |
2845 | if (ret) { | 2840 | if (ret) { |
2846 | ret = is_first_ref(sctx, sctx->parent_root, | 2841 | ret = is_first_ref(sctx->parent_root, |
2847 | ow_inode, cur->dir, cur->name, | 2842 | ow_inode, cur->dir, cur->name, |
2848 | cur->name_len); | 2843 | cur->name_len); |
2849 | if (ret < 0) | 2844 | if (ret < 0) |
2850 | goto out; | 2845 | goto out; |
2851 | if (ret) { | 2846 | if (ret) { |
@@ -3024,7 +3019,7 @@ verbose_printk("btrfs: process_recorded_refs %llu\n", sctx->cur_ino); | |||
3024 | out: | 3019 | out: |
3025 | free_recorded_refs(sctx); | 3020 | free_recorded_refs(sctx); |
3026 | ulist_free(check_dirs); | 3021 | ulist_free(check_dirs); |
3027 | fs_path_free(sctx, valid_path); | 3022 | fs_path_free(valid_path); |
3028 | return ret; | 3023 | return ret; |
3029 | } | 3024 | } |
3030 | 3025 | ||
@@ -3037,7 +3032,7 @@ static int __record_new_ref(int num, u64 dir, int index, | |||
3037 | struct fs_path *p; | 3032 | struct fs_path *p; |
3038 | u64 gen; | 3033 | u64 gen; |
3039 | 3034 | ||
3040 | p = fs_path_alloc(sctx); | 3035 | p = fs_path_alloc(); |
3041 | if (!p) | 3036 | if (!p) |
3042 | return -ENOMEM; | 3037 | return -ENOMEM; |
3043 | 3038 | ||
@@ -3057,7 +3052,7 @@ static int __record_new_ref(int num, u64 dir, int index, | |||
3057 | 3052 | ||
3058 | out: | 3053 | out: |
3059 | if (ret) | 3054 | if (ret) |
3060 | fs_path_free(sctx, p); | 3055 | fs_path_free(p); |
3061 | return ret; | 3056 | return ret; |
3062 | } | 3057 | } |
3063 | 3058 | ||
@@ -3070,7 +3065,7 @@ static int __record_deleted_ref(int num, u64 dir, int index, | |||
3070 | struct fs_path *p; | 3065 | struct fs_path *p; |
3071 | u64 gen; | 3066 | u64 gen; |
3072 | 3067 | ||
3073 | p = fs_path_alloc(sctx); | 3068 | p = fs_path_alloc(); |
3074 | if (!p) | 3069 | if (!p) |
3075 | return -ENOMEM; | 3070 | return -ENOMEM; |
3076 | 3071 | ||
@@ -3090,7 +3085,7 @@ static int __record_deleted_ref(int num, u64 dir, int index, | |||
3090 | 3085 | ||
3091 | out: | 3086 | out: |
3092 | if (ret) | 3087 | if (ret) |
3093 | fs_path_free(sctx, p); | 3088 | fs_path_free(p); |
3094 | return ret; | 3089 | return ret; |
3095 | } | 3090 | } |
3096 | 3091 | ||
@@ -3098,8 +3093,8 @@ static int record_new_ref(struct send_ctx *sctx) | |||
3098 | { | 3093 | { |
3099 | int ret; | 3094 | int ret; |
3100 | 3095 | ||
3101 | ret = iterate_inode_ref(sctx, sctx->send_root, sctx->left_path, | 3096 | ret = iterate_inode_ref(sctx->send_root, sctx->left_path, |
3102 | sctx->cmp_key, 0, __record_new_ref, sctx); | 3097 | sctx->cmp_key, 0, __record_new_ref, sctx); |
3103 | if (ret < 0) | 3098 | if (ret < 0) |
3104 | goto out; | 3099 | goto out; |
3105 | ret = 0; | 3100 | ret = 0; |
@@ -3112,8 +3107,8 @@ static int record_deleted_ref(struct send_ctx *sctx) | |||
3112 | { | 3107 | { |
3113 | int ret; | 3108 | int ret; |
3114 | 3109 | ||
3115 | ret = iterate_inode_ref(sctx, sctx->parent_root, sctx->right_path, | 3110 | ret = iterate_inode_ref(sctx->parent_root, sctx->right_path, |
3116 | sctx->cmp_key, 0, __record_deleted_ref, sctx); | 3111 | sctx->cmp_key, 0, __record_deleted_ref, sctx); |
3117 | if (ret < 0) | 3112 | if (ret < 0) |
3118 | goto out; | 3113 | goto out; |
3119 | ret = 0; | 3114 | ret = 0; |
@@ -3142,8 +3137,7 @@ static int __find_iref(int num, u64 dir, int index, | |||
3142 | return 0; | 3137 | return 0; |
3143 | } | 3138 | } |
3144 | 3139 | ||
3145 | static int find_iref(struct send_ctx *sctx, | 3140 | static int find_iref(struct btrfs_root *root, |
3146 | struct btrfs_root *root, | ||
3147 | struct btrfs_path *path, | 3141 | struct btrfs_path *path, |
3148 | struct btrfs_key *key, | 3142 | struct btrfs_key *key, |
3149 | u64 dir, struct fs_path *name) | 3143 | u64 dir, struct fs_path *name) |
@@ -3155,7 +3149,7 @@ static int find_iref(struct send_ctx *sctx, | |||
3155 | ctx.name = name; | 3149 | ctx.name = name; |
3156 | ctx.found_idx = -1; | 3150 | ctx.found_idx = -1; |
3157 | 3151 | ||
3158 | ret = iterate_inode_ref(sctx, root, path, key, 0, __find_iref, &ctx); | 3152 | ret = iterate_inode_ref(root, path, key, 0, __find_iref, &ctx); |
3159 | if (ret < 0) | 3153 | if (ret < 0) |
3160 | return ret; | 3154 | return ret; |
3161 | 3155 | ||
@@ -3172,7 +3166,7 @@ static int __record_changed_new_ref(int num, u64 dir, int index, | |||
3172 | int ret; | 3166 | int ret; |
3173 | struct send_ctx *sctx = ctx; | 3167 | struct send_ctx *sctx = ctx; |
3174 | 3168 | ||
3175 | ret = find_iref(sctx, sctx->parent_root, sctx->right_path, | 3169 | ret = find_iref(sctx->parent_root, sctx->right_path, |
3176 | sctx->cmp_key, dir, name); | 3170 | sctx->cmp_key, dir, name); |
3177 | if (ret == -ENOENT) | 3171 | if (ret == -ENOENT) |
3178 | ret = __record_new_ref(num, dir, index, name, sctx); | 3172 | ret = __record_new_ref(num, dir, index, name, sctx); |
@@ -3189,7 +3183,7 @@ static int __record_changed_deleted_ref(int num, u64 dir, int index, | |||
3189 | int ret; | 3183 | int ret; |
3190 | struct send_ctx *sctx = ctx; | 3184 | struct send_ctx *sctx = ctx; |
3191 | 3185 | ||
3192 | ret = find_iref(sctx, sctx->send_root, sctx->left_path, sctx->cmp_key, | 3186 | ret = find_iref(sctx->send_root, sctx->left_path, sctx->cmp_key, |
3193 | dir, name); | 3187 | dir, name); |
3194 | if (ret == -ENOENT) | 3188 | if (ret == -ENOENT) |
3195 | ret = __record_deleted_ref(num, dir, index, name, sctx); | 3189 | ret = __record_deleted_ref(num, dir, index, name, sctx); |
@@ -3203,11 +3197,11 @@ static int record_changed_ref(struct send_ctx *sctx) | |||
3203 | { | 3197 | { |
3204 | int ret = 0; | 3198 | int ret = 0; |
3205 | 3199 | ||
3206 | ret = iterate_inode_ref(sctx, sctx->send_root, sctx->left_path, | 3200 | ret = iterate_inode_ref(sctx->send_root, sctx->left_path, |
3207 | sctx->cmp_key, 0, __record_changed_new_ref, sctx); | 3201 | sctx->cmp_key, 0, __record_changed_new_ref, sctx); |
3208 | if (ret < 0) | 3202 | if (ret < 0) |
3209 | goto out; | 3203 | goto out; |
3210 | ret = iterate_inode_ref(sctx, sctx->parent_root, sctx->right_path, | 3204 | ret = iterate_inode_ref(sctx->parent_root, sctx->right_path, |
3211 | sctx->cmp_key, 0, __record_changed_deleted_ref, sctx); | 3205 | sctx->cmp_key, 0, __record_changed_deleted_ref, sctx); |
3212 | if (ret < 0) | 3206 | if (ret < 0) |
3213 | goto out; | 3207 | goto out; |
@@ -3266,8 +3260,7 @@ static int process_all_refs(struct send_ctx *sctx, | |||
3266 | found_key.type != BTRFS_INODE_EXTREF_KEY)) | 3260 | found_key.type != BTRFS_INODE_EXTREF_KEY)) |
3267 | break; | 3261 | break; |
3268 | 3262 | ||
3269 | ret = iterate_inode_ref(sctx, root, path, &found_key, 0, cb, | 3263 | ret = iterate_inode_ref(root, path, &found_key, 0, cb, sctx); |
3270 | sctx); | ||
3271 | btrfs_release_path(path); | 3264 | btrfs_release_path(path); |
3272 | if (ret < 0) | 3265 | if (ret < 0) |
3273 | goto out; | 3266 | goto out; |
@@ -3335,7 +3328,7 @@ static int __process_new_xattr(int num, struct btrfs_key *di_key, | |||
3335 | struct fs_path *p; | 3328 | struct fs_path *p; |
3336 | posix_acl_xattr_header dummy_acl; | 3329 | posix_acl_xattr_header dummy_acl; |
3337 | 3330 | ||
3338 | p = fs_path_alloc(sctx); | 3331 | p = fs_path_alloc(); |
3339 | if (!p) | 3332 | if (!p) |
3340 | return -ENOMEM; | 3333 | return -ENOMEM; |
3341 | 3334 | ||
@@ -3362,7 +3355,7 @@ static int __process_new_xattr(int num, struct btrfs_key *di_key, | |||
3362 | ret = send_set_xattr(sctx, p, name, name_len, data, data_len); | 3355 | ret = send_set_xattr(sctx, p, name, name_len, data, data_len); |
3363 | 3356 | ||
3364 | out: | 3357 | out: |
3365 | fs_path_free(sctx, p); | 3358 | fs_path_free(p); |
3366 | return ret; | 3359 | return ret; |
3367 | } | 3360 | } |
3368 | 3361 | ||
@@ -3375,7 +3368,7 @@ static int __process_deleted_xattr(int num, struct btrfs_key *di_key, | |||
3375 | struct send_ctx *sctx = ctx; | 3368 | struct send_ctx *sctx = ctx; |
3376 | struct fs_path *p; | 3369 | struct fs_path *p; |
3377 | 3370 | ||
3378 | p = fs_path_alloc(sctx); | 3371 | p = fs_path_alloc(); |
3379 | if (!p) | 3372 | if (!p) |
3380 | return -ENOMEM; | 3373 | return -ENOMEM; |
3381 | 3374 | ||
@@ -3386,7 +3379,7 @@ static int __process_deleted_xattr(int num, struct btrfs_key *di_key, | |||
3386 | ret = send_remove_xattr(sctx, p, name, name_len); | 3379 | ret = send_remove_xattr(sctx, p, name, name_len); |
3387 | 3380 | ||
3388 | out: | 3381 | out: |
3389 | fs_path_free(sctx, p); | 3382 | fs_path_free(p); |
3390 | return ret; | 3383 | return ret; |
3391 | } | 3384 | } |
3392 | 3385 | ||
@@ -3394,8 +3387,8 @@ static int process_new_xattr(struct send_ctx *sctx) | |||
3394 | { | 3387 | { |
3395 | int ret = 0; | 3388 | int ret = 0; |
3396 | 3389 | ||
3397 | ret = iterate_dir_item(sctx, sctx->send_root, sctx->left_path, | 3390 | ret = iterate_dir_item(sctx->send_root, sctx->left_path, |
3398 | sctx->cmp_key, __process_new_xattr, sctx); | 3391 | sctx->cmp_key, __process_new_xattr, sctx); |
3399 | 3392 | ||
3400 | return ret; | 3393 | return ret; |
3401 | } | 3394 | } |
@@ -3404,8 +3397,8 @@ static int process_deleted_xattr(struct send_ctx *sctx) | |||
3404 | { | 3397 | { |
3405 | int ret; | 3398 | int ret; |
3406 | 3399 | ||
3407 | ret = iterate_dir_item(sctx, sctx->parent_root, sctx->right_path, | 3400 | ret = iterate_dir_item(sctx->parent_root, sctx->right_path, |
3408 | sctx->cmp_key, __process_deleted_xattr, sctx); | 3401 | sctx->cmp_key, __process_deleted_xattr, sctx); |
3409 | 3402 | ||
3410 | return ret; | 3403 | return ret; |
3411 | } | 3404 | } |
@@ -3429,17 +3422,15 @@ static int __find_xattr(int num, struct btrfs_key *di_key, | |||
3429 | strncmp(name, ctx->name, name_len) == 0) { | 3422 | strncmp(name, ctx->name, name_len) == 0) { |
3430 | ctx->found_idx = num; | 3423 | ctx->found_idx = num; |
3431 | ctx->found_data_len = data_len; | 3424 | ctx->found_data_len = data_len; |
3432 | ctx->found_data = kmalloc(data_len, GFP_NOFS); | 3425 | ctx->found_data = kmemdup(data, data_len, GFP_NOFS); |
3433 | if (!ctx->found_data) | 3426 | if (!ctx->found_data) |
3434 | return -ENOMEM; | 3427 | return -ENOMEM; |
3435 | memcpy(ctx->found_data, data, data_len); | ||
3436 | return 1; | 3428 | return 1; |
3437 | } | 3429 | } |
3438 | return 0; | 3430 | return 0; |
3439 | } | 3431 | } |
3440 | 3432 | ||
3441 | static int find_xattr(struct send_ctx *sctx, | 3433 | static int find_xattr(struct btrfs_root *root, |
3442 | struct btrfs_root *root, | ||
3443 | struct btrfs_path *path, | 3434 | struct btrfs_path *path, |
3444 | struct btrfs_key *key, | 3435 | struct btrfs_key *key, |
3445 | const char *name, int name_len, | 3436 | const char *name, int name_len, |
@@ -3454,7 +3445,7 @@ static int find_xattr(struct send_ctx *sctx, | |||
3454 | ctx.found_data = NULL; | 3445 | ctx.found_data = NULL; |
3455 | ctx.found_data_len = 0; | 3446 | ctx.found_data_len = 0; |
3456 | 3447 | ||
3457 | ret = iterate_dir_item(sctx, root, path, key, __find_xattr, &ctx); | 3448 | ret = iterate_dir_item(root, path, key, __find_xattr, &ctx); |
3458 | if (ret < 0) | 3449 | if (ret < 0) |
3459 | return ret; | 3450 | return ret; |
3460 | 3451 | ||
@@ -3480,9 +3471,9 @@ static int __process_changed_new_xattr(int num, struct btrfs_key *di_key, | |||
3480 | char *found_data = NULL; | 3471 | char *found_data = NULL; |
3481 | int found_data_len = 0; | 3472 | int found_data_len = 0; |
3482 | 3473 | ||
3483 | ret = find_xattr(sctx, sctx->parent_root, sctx->right_path, | 3474 | ret = find_xattr(sctx->parent_root, sctx->right_path, |
3484 | sctx->cmp_key, name, name_len, &found_data, | 3475 | sctx->cmp_key, name, name_len, &found_data, |
3485 | &found_data_len); | 3476 | &found_data_len); |
3486 | if (ret == -ENOENT) { | 3477 | if (ret == -ENOENT) { |
3487 | ret = __process_new_xattr(num, di_key, name, name_len, data, | 3478 | ret = __process_new_xattr(num, di_key, name, name_len, data, |
3488 | data_len, type, ctx); | 3479 | data_len, type, ctx); |
@@ -3508,8 +3499,8 @@ static int __process_changed_deleted_xattr(int num, struct btrfs_key *di_key, | |||
3508 | int ret; | 3499 | int ret; |
3509 | struct send_ctx *sctx = ctx; | 3500 | struct send_ctx *sctx = ctx; |
3510 | 3501 | ||
3511 | ret = find_xattr(sctx, sctx->send_root, sctx->left_path, sctx->cmp_key, | 3502 | ret = find_xattr(sctx->send_root, sctx->left_path, sctx->cmp_key, |
3512 | name, name_len, NULL, NULL); | 3503 | name, name_len, NULL, NULL); |
3513 | if (ret == -ENOENT) | 3504 | if (ret == -ENOENT) |
3514 | ret = __process_deleted_xattr(num, di_key, name, name_len, data, | 3505 | ret = __process_deleted_xattr(num, di_key, name, name_len, data, |
3515 | data_len, type, ctx); | 3506 | data_len, type, ctx); |
@@ -3523,11 +3514,11 @@ static int process_changed_xattr(struct send_ctx *sctx) | |||
3523 | { | 3514 | { |
3524 | int ret = 0; | 3515 | int ret = 0; |
3525 | 3516 | ||
3526 | ret = iterate_dir_item(sctx, sctx->send_root, sctx->left_path, | 3517 | ret = iterate_dir_item(sctx->send_root, sctx->left_path, |
3527 | sctx->cmp_key, __process_changed_new_xattr, sctx); | 3518 | sctx->cmp_key, __process_changed_new_xattr, sctx); |
3528 | if (ret < 0) | 3519 | if (ret < 0) |
3529 | goto out; | 3520 | goto out; |
3530 | ret = iterate_dir_item(sctx, sctx->parent_root, sctx->right_path, | 3521 | ret = iterate_dir_item(sctx->parent_root, sctx->right_path, |
3531 | sctx->cmp_key, __process_changed_deleted_xattr, sctx); | 3522 | sctx->cmp_key, __process_changed_deleted_xattr, sctx); |
3532 | 3523 | ||
3533 | out: | 3524 | out: |
@@ -3572,8 +3563,8 @@ static int process_all_new_xattrs(struct send_ctx *sctx) | |||
3572 | goto out; | 3563 | goto out; |
3573 | } | 3564 | } |
3574 | 3565 | ||
3575 | ret = iterate_dir_item(sctx, root, path, &found_key, | 3566 | ret = iterate_dir_item(root, path, &found_key, |
3576 | __process_new_xattr, sctx); | 3567 | __process_new_xattr, sctx); |
3577 | if (ret < 0) | 3568 | if (ret < 0) |
3578 | goto out; | 3569 | goto out; |
3579 | 3570 | ||
@@ -3598,7 +3589,7 @@ static int send_write(struct send_ctx *sctx, u64 offset, u32 len) | |||
3598 | int num_read = 0; | 3589 | int num_read = 0; |
3599 | mm_segment_t old_fs; | 3590 | mm_segment_t old_fs; |
3600 | 3591 | ||
3601 | p = fs_path_alloc(sctx); | 3592 | p = fs_path_alloc(); |
3602 | if (!p) | 3593 | if (!p) |
3603 | return -ENOMEM; | 3594 | return -ENOMEM; |
3604 | 3595 | ||
@@ -3640,7 +3631,7 @@ verbose_printk("btrfs: send_write offset=%llu, len=%d\n", offset, len); | |||
3640 | 3631 | ||
3641 | tlv_put_failure: | 3632 | tlv_put_failure: |
3642 | out: | 3633 | out: |
3643 | fs_path_free(sctx, p); | 3634 | fs_path_free(p); |
3644 | set_fs(old_fs); | 3635 | set_fs(old_fs); |
3645 | if (ret < 0) | 3636 | if (ret < 0) |
3646 | return ret; | 3637 | return ret; |
@@ -3663,7 +3654,7 @@ verbose_printk("btrfs: send_clone offset=%llu, len=%d, clone_root=%llu, " | |||
3663 | clone_root->root->objectid, clone_root->ino, | 3654 | clone_root->root->objectid, clone_root->ino, |
3664 | clone_root->offset); | 3655 | clone_root->offset); |
3665 | 3656 | ||
3666 | p = fs_path_alloc(sctx); | 3657 | p = fs_path_alloc(); |
3667 | if (!p) | 3658 | if (!p) |
3668 | return -ENOMEM; | 3659 | return -ENOMEM; |
3669 | 3660 | ||
@@ -3686,8 +3677,7 @@ verbose_printk("btrfs: send_clone offset=%llu, len=%d, clone_root=%llu, " | |||
3686 | goto out; | 3677 | goto out; |
3687 | ret = get_cur_path(sctx, clone_root->ino, gen, p); | 3678 | ret = get_cur_path(sctx, clone_root->ino, gen, p); |
3688 | } else { | 3679 | } else { |
3689 | ret = get_inode_path(sctx, clone_root->root, | 3680 | ret = get_inode_path(clone_root->root, clone_root->ino, p); |
3690 | clone_root->ino, p); | ||
3691 | } | 3681 | } |
3692 | if (ret < 0) | 3682 | if (ret < 0) |
3693 | goto out; | 3683 | goto out; |
@@ -3704,7 +3694,7 @@ verbose_printk("btrfs: send_clone offset=%llu, len=%d, clone_root=%llu, " | |||
3704 | 3694 | ||
3705 | tlv_put_failure: | 3695 | tlv_put_failure: |
3706 | out: | 3696 | out: |
3707 | fs_path_free(sctx, p); | 3697 | fs_path_free(p); |
3708 | return ret; | 3698 | return ret; |
3709 | } | 3699 | } |
3710 | 3700 | ||
@@ -3717,7 +3707,7 @@ static int send_update_extent(struct send_ctx *sctx, | |||
3717 | int ret = 0; | 3707 | int ret = 0; |
3718 | struct fs_path *p; | 3708 | struct fs_path *p; |
3719 | 3709 | ||
3720 | p = fs_path_alloc(sctx); | 3710 | p = fs_path_alloc(); |
3721 | if (!p) | 3711 | if (!p) |
3722 | return -ENOMEM; | 3712 | return -ENOMEM; |
3723 | 3713 | ||
@@ -3737,7 +3727,7 @@ static int send_update_extent(struct send_ctx *sctx, | |||
3737 | 3727 | ||
3738 | tlv_put_failure: | 3728 | tlv_put_failure: |
3739 | out: | 3729 | out: |
3740 | fs_path_free(sctx, p); | 3730 | fs_path_free(p); |
3741 | return ret; | 3731 | return ret; |
3742 | } | 3732 | } |
3743 | 3733 | ||
@@ -4579,6 +4569,41 @@ long btrfs_ioctl_send(struct file *mnt_file, void __user *arg_) | |||
4579 | send_root = BTRFS_I(file_inode(mnt_file))->root; | 4569 | send_root = BTRFS_I(file_inode(mnt_file))->root; |
4580 | fs_info = send_root->fs_info; | 4570 | fs_info = send_root->fs_info; |
4581 | 4571 | ||
4572 | /* | ||
4573 | * This is done when we lookup the root, it should already be complete | ||
4574 | * by the time we get here. | ||
4575 | */ | ||
4576 | WARN_ON(send_root->orphan_cleanup_state != ORPHAN_CLEANUP_DONE); | ||
4577 | |||
4578 | /* | ||
4579 | * If we just created this root we need to make sure that the orphan | ||
4580 | * cleanup has been done and committed since we search the commit root, | ||
4581 | * so check its commit root transid with our otransid and if they match | ||
4582 | * commit the transaction to make sure everything is updated. | ||
4583 | */ | ||
4584 | down_read(&send_root->fs_info->extent_commit_sem); | ||
4585 | if (btrfs_header_generation(send_root->commit_root) == | ||
4586 | btrfs_root_otransid(&send_root->root_item)) { | ||
4587 | struct btrfs_trans_handle *trans; | ||
4588 | |||
4589 | up_read(&send_root->fs_info->extent_commit_sem); | ||
4590 | |||
4591 | trans = btrfs_attach_transaction_barrier(send_root); | ||
4592 | if (IS_ERR(trans)) { | ||
4593 | if (PTR_ERR(trans) != -ENOENT) { | ||
4594 | ret = PTR_ERR(trans); | ||
4595 | goto out; | ||
4596 | } | ||
4597 | /* ENOENT means theres no transaction */ | ||
4598 | } else { | ||
4599 | ret = btrfs_commit_transaction(trans, send_root); | ||
4600 | if (ret) | ||
4601 | goto out; | ||
4602 | } | ||
4603 | } else { | ||
4604 | up_read(&send_root->fs_info->extent_commit_sem); | ||
4605 | } | ||
4606 | |||
4582 | arg = memdup_user(arg_, sizeof(*arg)); | 4607 | arg = memdup_user(arg_, sizeof(*arg)); |
4583 | if (IS_ERR(arg)) { | 4608 | if (IS_ERR(arg)) { |
4584 | ret = PTR_ERR(arg); | 4609 | ret = PTR_ERR(arg); |
@@ -4663,10 +4688,6 @@ long btrfs_ioctl_send(struct file *mnt_file, void __user *arg_) | |||
4663 | key.type = BTRFS_ROOT_ITEM_KEY; | 4688 | key.type = BTRFS_ROOT_ITEM_KEY; |
4664 | key.offset = (u64)-1; | 4689 | key.offset = (u64)-1; |
4665 | clone_root = btrfs_read_fs_root_no_name(fs_info, &key); | 4690 | clone_root = btrfs_read_fs_root_no_name(fs_info, &key); |
4666 | if (!clone_root) { | ||
4667 | ret = -EINVAL; | ||
4668 | goto out; | ||
4669 | } | ||
4670 | if (IS_ERR(clone_root)) { | 4691 | if (IS_ERR(clone_root)) { |
4671 | ret = PTR_ERR(clone_root); | 4692 | ret = PTR_ERR(clone_root); |
4672 | goto out; | 4693 | goto out; |
@@ -4682,8 +4703,8 @@ long btrfs_ioctl_send(struct file *mnt_file, void __user *arg_) | |||
4682 | key.type = BTRFS_ROOT_ITEM_KEY; | 4703 | key.type = BTRFS_ROOT_ITEM_KEY; |
4683 | key.offset = (u64)-1; | 4704 | key.offset = (u64)-1; |
4684 | sctx->parent_root = btrfs_read_fs_root_no_name(fs_info, &key); | 4705 | sctx->parent_root = btrfs_read_fs_root_no_name(fs_info, &key); |
4685 | if (!sctx->parent_root) { | 4706 | if (IS_ERR(sctx->parent_root)) { |
4686 | ret = -EINVAL; | 4707 | ret = PTR_ERR(sctx->parent_root); |
4687 | goto out; | 4708 | goto out; |
4688 | } | 4709 | } |
4689 | } | 4710 | } |
diff --git a/fs/btrfs/super.c b/fs/btrfs/super.c index f0857e092a3c..8eb6191d86da 100644 --- a/fs/btrfs/super.c +++ b/fs/btrfs/super.c | |||
@@ -51,7 +51,6 @@ | |||
51 | #include "print-tree.h" | 51 | #include "print-tree.h" |
52 | #include "xattr.h" | 52 | #include "xattr.h" |
53 | #include "volumes.h" | 53 | #include "volumes.h" |
54 | #include "version.h" | ||
55 | #include "export.h" | 54 | #include "export.h" |
56 | #include "compression.h" | 55 | #include "compression.h" |
57 | #include "rcu-string.h" | 56 | #include "rcu-string.h" |
@@ -266,6 +265,9 @@ void __btrfs_abort_transaction(struct btrfs_trans_handle *trans, | |||
266 | return; | 265 | return; |
267 | } | 266 | } |
268 | ACCESS_ONCE(trans->transaction->aborted) = errno; | 267 | ACCESS_ONCE(trans->transaction->aborted) = errno; |
268 | /* Wake up anybody who may be waiting on this transaction */ | ||
269 | wake_up(&root->fs_info->transaction_wait); | ||
270 | wake_up(&root->fs_info->transaction_blocked_wait); | ||
269 | __btrfs_std_error(root->fs_info, function, line, errno, NULL); | 271 | __btrfs_std_error(root->fs_info, function, line, errno, NULL); |
270 | } | 272 | } |
271 | /* | 273 | /* |
@@ -776,9 +778,6 @@ find_root: | |||
776 | if (IS_ERR(new_root)) | 778 | if (IS_ERR(new_root)) |
777 | return ERR_CAST(new_root); | 779 | return ERR_CAST(new_root); |
778 | 780 | ||
779 | if (btrfs_root_refs(&new_root->root_item) == 0) | ||
780 | return ERR_PTR(-ENOENT); | ||
781 | |||
782 | dir_id = btrfs_root_dirid(&new_root->root_item); | 781 | dir_id = btrfs_root_dirid(&new_root->root_item); |
783 | setup_root: | 782 | setup_root: |
784 | location.objectid = dir_id; | 783 | location.objectid = dir_id; |
@@ -866,7 +865,7 @@ int btrfs_sync_fs(struct super_block *sb, int wait) | |||
866 | return 0; | 865 | return 0; |
867 | } | 866 | } |
868 | 867 | ||
869 | btrfs_wait_ordered_extents(root, 1); | 868 | btrfs_wait_all_ordered_extents(fs_info, 1); |
870 | 869 | ||
871 | trans = btrfs_attach_transaction_barrier(root); | 870 | trans = btrfs_attach_transaction_barrier(root); |
872 | if (IS_ERR(trans)) { | 871 | if (IS_ERR(trans)) { |
@@ -1685,6 +1684,18 @@ static void btrfs_interface_exit(void) | |||
1685 | printk(KERN_INFO "btrfs: misc_deregister failed for control device\n"); | 1684 | printk(KERN_INFO "btrfs: misc_deregister failed for control device\n"); |
1686 | } | 1685 | } |
1687 | 1686 | ||
1687 | static void btrfs_print_info(void) | ||
1688 | { | ||
1689 | printk(KERN_INFO "Btrfs loaded" | ||
1690 | #ifdef CONFIG_BTRFS_DEBUG | ||
1691 | ", debug=on" | ||
1692 | #endif | ||
1693 | #ifdef CONFIG_BTRFS_FS_CHECK_INTEGRITY | ||
1694 | ", integrity-checker=on" | ||
1695 | #endif | ||
1696 | "\n"); | ||
1697 | } | ||
1698 | |||
1688 | static int __init init_btrfs_fs(void) | 1699 | static int __init init_btrfs_fs(void) |
1689 | { | 1700 | { |
1690 | int err; | 1701 | int err; |
@@ -1733,11 +1744,9 @@ static int __init init_btrfs_fs(void) | |||
1733 | 1744 | ||
1734 | btrfs_init_lockdep(); | 1745 | btrfs_init_lockdep(); |
1735 | 1746 | ||
1736 | #ifdef CONFIG_BTRFS_FS_RUN_SANITY_TESTS | 1747 | btrfs_print_info(); |
1737 | btrfs_test_free_space_cache(); | 1748 | btrfs_test_free_space_cache(); |
1738 | #endif | ||
1739 | 1749 | ||
1740 | printk(KERN_INFO "%s loaded\n", BTRFS_BUILD_VERSION); | ||
1741 | return 0; | 1750 | return 0; |
1742 | 1751 | ||
1743 | unregister_ioctl: | 1752 | unregister_ioctl: |
diff --git a/fs/btrfs/transaction.c b/fs/btrfs/transaction.c index 0544587d74f4..af1931a5960d 100644 --- a/fs/btrfs/transaction.c +++ b/fs/btrfs/transaction.c | |||
@@ -34,12 +34,43 @@ | |||
34 | 34 | ||
35 | #define BTRFS_ROOT_TRANS_TAG 0 | 35 | #define BTRFS_ROOT_TRANS_TAG 0 |
36 | 36 | ||
37 | static unsigned int btrfs_blocked_trans_types[TRANS_STATE_MAX] = { | ||
38 | [TRANS_STATE_RUNNING] = 0U, | ||
39 | [TRANS_STATE_BLOCKED] = (__TRANS_USERSPACE | | ||
40 | __TRANS_START), | ||
41 | [TRANS_STATE_COMMIT_START] = (__TRANS_USERSPACE | | ||
42 | __TRANS_START | | ||
43 | __TRANS_ATTACH), | ||
44 | [TRANS_STATE_COMMIT_DOING] = (__TRANS_USERSPACE | | ||
45 | __TRANS_START | | ||
46 | __TRANS_ATTACH | | ||
47 | __TRANS_JOIN), | ||
48 | [TRANS_STATE_UNBLOCKED] = (__TRANS_USERSPACE | | ||
49 | __TRANS_START | | ||
50 | __TRANS_ATTACH | | ||
51 | __TRANS_JOIN | | ||
52 | __TRANS_JOIN_NOLOCK), | ||
53 | [TRANS_STATE_COMPLETED] = (__TRANS_USERSPACE | | ||
54 | __TRANS_START | | ||
55 | __TRANS_ATTACH | | ||
56 | __TRANS_JOIN | | ||
57 | __TRANS_JOIN_NOLOCK), | ||
58 | }; | ||
59 | |||
37 | static void put_transaction(struct btrfs_transaction *transaction) | 60 | static void put_transaction(struct btrfs_transaction *transaction) |
38 | { | 61 | { |
39 | WARN_ON(atomic_read(&transaction->use_count) == 0); | 62 | WARN_ON(atomic_read(&transaction->use_count) == 0); |
40 | if (atomic_dec_and_test(&transaction->use_count)) { | 63 | if (atomic_dec_and_test(&transaction->use_count)) { |
41 | BUG_ON(!list_empty(&transaction->list)); | 64 | BUG_ON(!list_empty(&transaction->list)); |
42 | WARN_ON(transaction->delayed_refs.root.rb_node); | 65 | WARN_ON(transaction->delayed_refs.root.rb_node); |
66 | while (!list_empty(&transaction->pending_chunks)) { | ||
67 | struct extent_map *em; | ||
68 | |||
69 | em = list_first_entry(&transaction->pending_chunks, | ||
70 | struct extent_map, list); | ||
71 | list_del_init(&em->list); | ||
72 | free_extent_map(em); | ||
73 | } | ||
43 | kmem_cache_free(btrfs_transaction_cachep, transaction); | 74 | kmem_cache_free(btrfs_transaction_cachep, transaction); |
44 | } | 75 | } |
45 | } | 76 | } |
@@ -50,18 +81,35 @@ static noinline void switch_commit_root(struct btrfs_root *root) | |||
50 | root->commit_root = btrfs_root_node(root); | 81 | root->commit_root = btrfs_root_node(root); |
51 | } | 82 | } |
52 | 83 | ||
53 | static inline int can_join_transaction(struct btrfs_transaction *trans, | 84 | static inline void extwriter_counter_inc(struct btrfs_transaction *trans, |
54 | int type) | 85 | unsigned int type) |
86 | { | ||
87 | if (type & TRANS_EXTWRITERS) | ||
88 | atomic_inc(&trans->num_extwriters); | ||
89 | } | ||
90 | |||
91 | static inline void extwriter_counter_dec(struct btrfs_transaction *trans, | ||
92 | unsigned int type) | ||
93 | { | ||
94 | if (type & TRANS_EXTWRITERS) | ||
95 | atomic_dec(&trans->num_extwriters); | ||
96 | } | ||
97 | |||
98 | static inline void extwriter_counter_init(struct btrfs_transaction *trans, | ||
99 | unsigned int type) | ||
100 | { | ||
101 | atomic_set(&trans->num_extwriters, ((type & TRANS_EXTWRITERS) ? 1 : 0)); | ||
102 | } | ||
103 | |||
104 | static inline int extwriter_counter_read(struct btrfs_transaction *trans) | ||
55 | { | 105 | { |
56 | return !(trans->in_commit && | 106 | return atomic_read(&trans->num_extwriters); |
57 | type != TRANS_JOIN && | ||
58 | type != TRANS_JOIN_NOLOCK); | ||
59 | } | 107 | } |
60 | 108 | ||
61 | /* | 109 | /* |
62 | * either allocate a new transaction or hop into the existing one | 110 | * either allocate a new transaction or hop into the existing one |
63 | */ | 111 | */ |
64 | static noinline int join_transaction(struct btrfs_root *root, int type) | 112 | static noinline int join_transaction(struct btrfs_root *root, unsigned int type) |
65 | { | 113 | { |
66 | struct btrfs_transaction *cur_trans; | 114 | struct btrfs_transaction *cur_trans; |
67 | struct btrfs_fs_info *fs_info = root->fs_info; | 115 | struct btrfs_fs_info *fs_info = root->fs_info; |
@@ -74,32 +122,19 @@ loop: | |||
74 | return -EROFS; | 122 | return -EROFS; |
75 | } | 123 | } |
76 | 124 | ||
77 | if (fs_info->trans_no_join) { | ||
78 | /* | ||
79 | * If we are JOIN_NOLOCK we're already committing a current | ||
80 | * transaction, we just need a handle to deal with something | ||
81 | * when committing the transaction, such as inode cache and | ||
82 | * space cache. It is a special case. | ||
83 | */ | ||
84 | if (type != TRANS_JOIN_NOLOCK) { | ||
85 | spin_unlock(&fs_info->trans_lock); | ||
86 | return -EBUSY; | ||
87 | } | ||
88 | } | ||
89 | |||
90 | cur_trans = fs_info->running_transaction; | 125 | cur_trans = fs_info->running_transaction; |
91 | if (cur_trans) { | 126 | if (cur_trans) { |
92 | if (cur_trans->aborted) { | 127 | if (cur_trans->aborted) { |
93 | spin_unlock(&fs_info->trans_lock); | 128 | spin_unlock(&fs_info->trans_lock); |
94 | return cur_trans->aborted; | 129 | return cur_trans->aborted; |
95 | } | 130 | } |
96 | if (!can_join_transaction(cur_trans, type)) { | 131 | if (btrfs_blocked_trans_types[cur_trans->state] & type) { |
97 | spin_unlock(&fs_info->trans_lock); | 132 | spin_unlock(&fs_info->trans_lock); |
98 | return -EBUSY; | 133 | return -EBUSY; |
99 | } | 134 | } |
100 | atomic_inc(&cur_trans->use_count); | 135 | atomic_inc(&cur_trans->use_count); |
101 | atomic_inc(&cur_trans->num_writers); | 136 | atomic_inc(&cur_trans->num_writers); |
102 | cur_trans->num_joined++; | 137 | extwriter_counter_inc(cur_trans, type); |
103 | spin_unlock(&fs_info->trans_lock); | 138 | spin_unlock(&fs_info->trans_lock); |
104 | return 0; | 139 | return 0; |
105 | } | 140 | } |
@@ -112,6 +147,12 @@ loop: | |||
112 | if (type == TRANS_ATTACH) | 147 | if (type == TRANS_ATTACH) |
113 | return -ENOENT; | 148 | return -ENOENT; |
114 | 149 | ||
150 | /* | ||
151 | * JOIN_NOLOCK only happens during the transaction commit, so | ||
152 | * it is impossible that ->running_transaction is NULL | ||
153 | */ | ||
154 | BUG_ON(type == TRANS_JOIN_NOLOCK); | ||
155 | |||
115 | cur_trans = kmem_cache_alloc(btrfs_transaction_cachep, GFP_NOFS); | 156 | cur_trans = kmem_cache_alloc(btrfs_transaction_cachep, GFP_NOFS); |
116 | if (!cur_trans) | 157 | if (!cur_trans) |
117 | return -ENOMEM; | 158 | return -ENOMEM; |
@@ -120,7 +161,7 @@ loop: | |||
120 | if (fs_info->running_transaction) { | 161 | if (fs_info->running_transaction) { |
121 | /* | 162 | /* |
122 | * someone started a transaction after we unlocked. Make sure | 163 | * someone started a transaction after we unlocked. Make sure |
123 | * to redo the trans_no_join checks above | 164 | * to redo the checks above |
124 | */ | 165 | */ |
125 | kmem_cache_free(btrfs_transaction_cachep, cur_trans); | 166 | kmem_cache_free(btrfs_transaction_cachep, cur_trans); |
126 | goto loop; | 167 | goto loop; |
@@ -131,17 +172,15 @@ loop: | |||
131 | } | 172 | } |
132 | 173 | ||
133 | atomic_set(&cur_trans->num_writers, 1); | 174 | atomic_set(&cur_trans->num_writers, 1); |
134 | cur_trans->num_joined = 0; | 175 | extwriter_counter_init(cur_trans, type); |
135 | init_waitqueue_head(&cur_trans->writer_wait); | 176 | init_waitqueue_head(&cur_trans->writer_wait); |
136 | init_waitqueue_head(&cur_trans->commit_wait); | 177 | init_waitqueue_head(&cur_trans->commit_wait); |
137 | cur_trans->in_commit = 0; | 178 | cur_trans->state = TRANS_STATE_RUNNING; |
138 | cur_trans->blocked = 0; | ||
139 | /* | 179 | /* |
140 | * One for this trans handle, one so it will live on until we | 180 | * One for this trans handle, one so it will live on until we |
141 | * commit the transaction. | 181 | * commit the transaction. |
142 | */ | 182 | */ |
143 | atomic_set(&cur_trans->use_count, 2); | 183 | atomic_set(&cur_trans->use_count, 2); |
144 | cur_trans->commit_done = 0; | ||
145 | cur_trans->start_time = get_seconds(); | 184 | cur_trans->start_time = get_seconds(); |
146 | 185 | ||
147 | cur_trans->delayed_refs.root = RB_ROOT; | 186 | cur_trans->delayed_refs.root = RB_ROOT; |
@@ -164,7 +203,6 @@ loop: | |||
164 | "creating a fresh transaction\n"); | 203 | "creating a fresh transaction\n"); |
165 | atomic64_set(&fs_info->tree_mod_seq, 0); | 204 | atomic64_set(&fs_info->tree_mod_seq, 0); |
166 | 205 | ||
167 | spin_lock_init(&cur_trans->commit_lock); | ||
168 | spin_lock_init(&cur_trans->delayed_refs.lock); | 206 | spin_lock_init(&cur_trans->delayed_refs.lock); |
169 | atomic_set(&cur_trans->delayed_refs.procs_running_refs, 0); | 207 | atomic_set(&cur_trans->delayed_refs.procs_running_refs, 0); |
170 | atomic_set(&cur_trans->delayed_refs.ref_seq, 0); | 208 | atomic_set(&cur_trans->delayed_refs.ref_seq, 0); |
@@ -172,6 +210,7 @@ loop: | |||
172 | 210 | ||
173 | INIT_LIST_HEAD(&cur_trans->pending_snapshots); | 211 | INIT_LIST_HEAD(&cur_trans->pending_snapshots); |
174 | INIT_LIST_HEAD(&cur_trans->ordered_operations); | 212 | INIT_LIST_HEAD(&cur_trans->ordered_operations); |
213 | INIT_LIST_HEAD(&cur_trans->pending_chunks); | ||
175 | list_add_tail(&cur_trans->list, &fs_info->trans_list); | 214 | list_add_tail(&cur_trans->list, &fs_info->trans_list); |
176 | extent_io_tree_init(&cur_trans->dirty_pages, | 215 | extent_io_tree_init(&cur_trans->dirty_pages, |
177 | fs_info->btree_inode->i_mapping); | 216 | fs_info->btree_inode->i_mapping); |
@@ -269,6 +308,13 @@ int btrfs_record_root_in_trans(struct btrfs_trans_handle *trans, | |||
269 | return 0; | 308 | return 0; |
270 | } | 309 | } |
271 | 310 | ||
311 | static inline int is_transaction_blocked(struct btrfs_transaction *trans) | ||
312 | { | ||
313 | return (trans->state >= TRANS_STATE_BLOCKED && | ||
314 | trans->state < TRANS_STATE_UNBLOCKED && | ||
315 | !trans->aborted); | ||
316 | } | ||
317 | |||
272 | /* wait for commit against the current transaction to become unblocked | 318 | /* wait for commit against the current transaction to become unblocked |
273 | * when this is done, it is safe to start a new transaction, but the current | 319 | * when this is done, it is safe to start a new transaction, but the current |
274 | * transaction might not be fully on disk. | 320 | * transaction might not be fully on disk. |
@@ -279,12 +325,13 @@ static void wait_current_trans(struct btrfs_root *root) | |||
279 | 325 | ||
280 | spin_lock(&root->fs_info->trans_lock); | 326 | spin_lock(&root->fs_info->trans_lock); |
281 | cur_trans = root->fs_info->running_transaction; | 327 | cur_trans = root->fs_info->running_transaction; |
282 | if (cur_trans && cur_trans->blocked) { | 328 | if (cur_trans && is_transaction_blocked(cur_trans)) { |
283 | atomic_inc(&cur_trans->use_count); | 329 | atomic_inc(&cur_trans->use_count); |
284 | spin_unlock(&root->fs_info->trans_lock); | 330 | spin_unlock(&root->fs_info->trans_lock); |
285 | 331 | ||
286 | wait_event(root->fs_info->transaction_wait, | 332 | wait_event(root->fs_info->transaction_wait, |
287 | !cur_trans->blocked); | 333 | cur_trans->state >= TRANS_STATE_UNBLOCKED || |
334 | cur_trans->aborted); | ||
288 | put_transaction(cur_trans); | 335 | put_transaction(cur_trans); |
289 | } else { | 336 | } else { |
290 | spin_unlock(&root->fs_info->trans_lock); | 337 | spin_unlock(&root->fs_info->trans_lock); |
@@ -307,7 +354,7 @@ static int may_wait_transaction(struct btrfs_root *root, int type) | |||
307 | } | 354 | } |
308 | 355 | ||
309 | static struct btrfs_trans_handle * | 356 | static struct btrfs_trans_handle * |
310 | start_transaction(struct btrfs_root *root, u64 num_items, int type, | 357 | start_transaction(struct btrfs_root *root, u64 num_items, unsigned int type, |
311 | enum btrfs_reserve_flush_enum flush) | 358 | enum btrfs_reserve_flush_enum flush) |
312 | { | 359 | { |
313 | struct btrfs_trans_handle *h; | 360 | struct btrfs_trans_handle *h; |
@@ -320,7 +367,7 @@ start_transaction(struct btrfs_root *root, u64 num_items, int type, | |||
320 | return ERR_PTR(-EROFS); | 367 | return ERR_PTR(-EROFS); |
321 | 368 | ||
322 | if (current->journal_info) { | 369 | if (current->journal_info) { |
323 | WARN_ON(type != TRANS_JOIN && type != TRANS_JOIN_NOLOCK); | 370 | WARN_ON(type & TRANS_EXTWRITERS); |
324 | h = current->journal_info; | 371 | h = current->journal_info; |
325 | h->use_count++; | 372 | h->use_count++; |
326 | WARN_ON(h->use_count > 2); | 373 | WARN_ON(h->use_count > 2); |
@@ -366,7 +413,7 @@ again: | |||
366 | * If we are ATTACH, it means we just want to catch the current | 413 | * If we are ATTACH, it means we just want to catch the current |
367 | * transaction and commit it, so we needn't do sb_start_intwrite(). | 414 | * transaction and commit it, so we needn't do sb_start_intwrite(). |
368 | */ | 415 | */ |
369 | if (type < TRANS_JOIN_NOLOCK) | 416 | if (type & __TRANS_FREEZABLE) |
370 | sb_start_intwrite(root->fs_info->sb); | 417 | sb_start_intwrite(root->fs_info->sb); |
371 | 418 | ||
372 | if (may_wait_transaction(root, type)) | 419 | if (may_wait_transaction(root, type)) |
@@ -408,7 +455,8 @@ again: | |||
408 | INIT_LIST_HEAD(&h->new_bgs); | 455 | INIT_LIST_HEAD(&h->new_bgs); |
409 | 456 | ||
410 | smp_mb(); | 457 | smp_mb(); |
411 | if (cur_trans->blocked && may_wait_transaction(root, type)) { | 458 | if (cur_trans->state >= TRANS_STATE_BLOCKED && |
459 | may_wait_transaction(root, type)) { | ||
412 | btrfs_commit_transaction(h, root); | 460 | btrfs_commit_transaction(h, root); |
413 | goto again; | 461 | goto again; |
414 | } | 462 | } |
@@ -429,7 +477,7 @@ got_it: | |||
429 | return h; | 477 | return h; |
430 | 478 | ||
431 | join_fail: | 479 | join_fail: |
432 | if (type < TRANS_JOIN_NOLOCK) | 480 | if (type & __TRANS_FREEZABLE) |
433 | sb_end_intwrite(root->fs_info->sb); | 481 | sb_end_intwrite(root->fs_info->sb); |
434 | kmem_cache_free(btrfs_trans_handle_cachep, h); | 482 | kmem_cache_free(btrfs_trans_handle_cachep, h); |
435 | alloc_fail: | 483 | alloc_fail: |
@@ -490,7 +538,7 @@ struct btrfs_trans_handle *btrfs_attach_transaction(struct btrfs_root *root) | |||
490 | } | 538 | } |
491 | 539 | ||
492 | /* | 540 | /* |
493 | * btrfs_attach_transaction() - catch the running transaction | 541 | * btrfs_attach_transaction_barrier() - catch the running transaction |
494 | * | 542 | * |
495 | * It is similar to the above function, the differentia is this one | 543 | * It is similar to the above function, the differentia is this one |
496 | * will wait for all the inactive transactions until they fully | 544 | * will wait for all the inactive transactions until they fully |
@@ -512,7 +560,7 @@ btrfs_attach_transaction_barrier(struct btrfs_root *root) | |||
512 | static noinline void wait_for_commit(struct btrfs_root *root, | 560 | static noinline void wait_for_commit(struct btrfs_root *root, |
513 | struct btrfs_transaction *commit) | 561 | struct btrfs_transaction *commit) |
514 | { | 562 | { |
515 | wait_event(commit->commit_wait, commit->commit_done); | 563 | wait_event(commit->commit_wait, commit->state == TRANS_STATE_COMPLETED); |
516 | } | 564 | } |
517 | 565 | ||
518 | int btrfs_wait_for_commit(struct btrfs_root *root, u64 transid) | 566 | int btrfs_wait_for_commit(struct btrfs_root *root, u64 transid) |
@@ -548,8 +596,8 @@ int btrfs_wait_for_commit(struct btrfs_root *root, u64 transid) | |||
548 | spin_lock(&root->fs_info->trans_lock); | 596 | spin_lock(&root->fs_info->trans_lock); |
549 | list_for_each_entry_reverse(t, &root->fs_info->trans_list, | 597 | list_for_each_entry_reverse(t, &root->fs_info->trans_list, |
550 | list) { | 598 | list) { |
551 | if (t->in_commit) { | 599 | if (t->state >= TRANS_STATE_COMMIT_START) { |
552 | if (t->commit_done) | 600 | if (t->state == TRANS_STATE_COMPLETED) |
553 | break; | 601 | break; |
554 | cur_trans = t; | 602 | cur_trans = t; |
555 | atomic_inc(&cur_trans->use_count); | 603 | atomic_inc(&cur_trans->use_count); |
@@ -576,10 +624,11 @@ void btrfs_throttle(struct btrfs_root *root) | |||
576 | static int should_end_transaction(struct btrfs_trans_handle *trans, | 624 | static int should_end_transaction(struct btrfs_trans_handle *trans, |
577 | struct btrfs_root *root) | 625 | struct btrfs_root *root) |
578 | { | 626 | { |
579 | int ret; | 627 | if (root->fs_info->global_block_rsv.space_info->full && |
628 | btrfs_should_throttle_delayed_refs(trans, root)) | ||
629 | return 1; | ||
580 | 630 | ||
581 | ret = btrfs_block_rsv_check(root, &root->fs_info->global_block_rsv, 5); | 631 | return !!btrfs_block_rsv_check(root, &root->fs_info->global_block_rsv, 5); |
582 | return ret ? 1 : 0; | ||
583 | } | 632 | } |
584 | 633 | ||
585 | int btrfs_should_end_transaction(struct btrfs_trans_handle *trans, | 634 | int btrfs_should_end_transaction(struct btrfs_trans_handle *trans, |
@@ -590,7 +639,8 @@ int btrfs_should_end_transaction(struct btrfs_trans_handle *trans, | |||
590 | int err; | 639 | int err; |
591 | 640 | ||
592 | smp_mb(); | 641 | smp_mb(); |
593 | if (cur_trans->blocked || cur_trans->delayed_refs.flushing) | 642 | if (cur_trans->state >= TRANS_STATE_BLOCKED || |
643 | cur_trans->delayed_refs.flushing) | ||
594 | return 1; | 644 | return 1; |
595 | 645 | ||
596 | updates = trans->delayed_ref_updates; | 646 | updates = trans->delayed_ref_updates; |
@@ -609,7 +659,7 @@ static int __btrfs_end_transaction(struct btrfs_trans_handle *trans, | |||
609 | { | 659 | { |
610 | struct btrfs_transaction *cur_trans = trans->transaction; | 660 | struct btrfs_transaction *cur_trans = trans->transaction; |
611 | struct btrfs_fs_info *info = root->fs_info; | 661 | struct btrfs_fs_info *info = root->fs_info; |
612 | int count = 0; | 662 | unsigned long cur = trans->delayed_ref_updates; |
613 | int lock = (trans->type != TRANS_JOIN_NOLOCK); | 663 | int lock = (trans->type != TRANS_JOIN_NOLOCK); |
614 | int err = 0; | 664 | int err = 0; |
615 | 665 | ||
@@ -638,17 +688,11 @@ static int __btrfs_end_transaction(struct btrfs_trans_handle *trans, | |||
638 | if (!list_empty(&trans->new_bgs)) | 688 | if (!list_empty(&trans->new_bgs)) |
639 | btrfs_create_pending_block_groups(trans, root); | 689 | btrfs_create_pending_block_groups(trans, root); |
640 | 690 | ||
641 | while (count < 1) { | 691 | trans->delayed_ref_updates = 0; |
642 | unsigned long cur = trans->delayed_ref_updates; | 692 | if (btrfs_should_throttle_delayed_refs(trans, root)) { |
693 | cur = max_t(unsigned long, cur, 1); | ||
643 | trans->delayed_ref_updates = 0; | 694 | trans->delayed_ref_updates = 0; |
644 | if (cur && | 695 | btrfs_run_delayed_refs(trans, root, cur); |
645 | trans->transaction->delayed_refs.num_heads_ready > 64) { | ||
646 | trans->delayed_ref_updates = 0; | ||
647 | btrfs_run_delayed_refs(trans, root, cur); | ||
648 | } else { | ||
649 | break; | ||
650 | } | ||
651 | count++; | ||
652 | } | 696 | } |
653 | 697 | ||
654 | btrfs_trans_release_metadata(trans, root); | 698 | btrfs_trans_release_metadata(trans, root); |
@@ -658,12 +702,15 @@ static int __btrfs_end_transaction(struct btrfs_trans_handle *trans, | |||
658 | btrfs_create_pending_block_groups(trans, root); | 702 | btrfs_create_pending_block_groups(trans, root); |
659 | 703 | ||
660 | if (lock && !atomic_read(&root->fs_info->open_ioctl_trans) && | 704 | if (lock && !atomic_read(&root->fs_info->open_ioctl_trans) && |
661 | should_end_transaction(trans, root)) { | 705 | should_end_transaction(trans, root) && |
662 | trans->transaction->blocked = 1; | 706 | ACCESS_ONCE(cur_trans->state) == TRANS_STATE_RUNNING) { |
663 | smp_wmb(); | 707 | spin_lock(&info->trans_lock); |
708 | if (cur_trans->state == TRANS_STATE_RUNNING) | ||
709 | cur_trans->state = TRANS_STATE_BLOCKED; | ||
710 | spin_unlock(&info->trans_lock); | ||
664 | } | 711 | } |
665 | 712 | ||
666 | if (lock && cur_trans->blocked && !cur_trans->in_commit) { | 713 | if (lock && ACCESS_ONCE(cur_trans->state) == TRANS_STATE_BLOCKED) { |
667 | if (throttle) { | 714 | if (throttle) { |
668 | /* | 715 | /* |
669 | * We may race with somebody else here so end up having | 716 | * We may race with somebody else here so end up having |
@@ -677,12 +724,13 @@ static int __btrfs_end_transaction(struct btrfs_trans_handle *trans, | |||
677 | } | 724 | } |
678 | } | 725 | } |
679 | 726 | ||
680 | if (trans->type < TRANS_JOIN_NOLOCK) | 727 | if (trans->type & __TRANS_FREEZABLE) |
681 | sb_end_intwrite(root->fs_info->sb); | 728 | sb_end_intwrite(root->fs_info->sb); |
682 | 729 | ||
683 | WARN_ON(cur_trans != info->running_transaction); | 730 | WARN_ON(cur_trans != info->running_transaction); |
684 | WARN_ON(atomic_read(&cur_trans->num_writers) < 1); | 731 | WARN_ON(atomic_read(&cur_trans->num_writers) < 1); |
685 | atomic_dec(&cur_trans->num_writers); | 732 | atomic_dec(&cur_trans->num_writers); |
733 | extwriter_counter_dec(cur_trans, trans->type); | ||
686 | 734 | ||
687 | smp_mb(); | 735 | smp_mb(); |
688 | if (waitqueue_active(&cur_trans->writer_wait)) | 736 | if (waitqueue_active(&cur_trans->writer_wait)) |
@@ -736,9 +784,7 @@ int btrfs_write_marked_extents(struct btrfs_root *root, | |||
736 | struct extent_state *cached_state = NULL; | 784 | struct extent_state *cached_state = NULL; |
737 | u64 start = 0; | 785 | u64 start = 0; |
738 | u64 end; | 786 | u64 end; |
739 | struct blk_plug plug; | ||
740 | 787 | ||
741 | blk_start_plug(&plug); | ||
742 | while (!find_first_extent_bit(dirty_pages, start, &start, &end, | 788 | while (!find_first_extent_bit(dirty_pages, start, &start, &end, |
743 | mark, &cached_state)) { | 789 | mark, &cached_state)) { |
744 | convert_extent_bit(dirty_pages, start, end, EXTENT_NEED_WAIT, | 790 | convert_extent_bit(dirty_pages, start, end, EXTENT_NEED_WAIT, |
@@ -752,7 +798,6 @@ int btrfs_write_marked_extents(struct btrfs_root *root, | |||
752 | } | 798 | } |
753 | if (err) | 799 | if (err) |
754 | werr = err; | 800 | werr = err; |
755 | blk_finish_plug(&plug); | ||
756 | return werr; | 801 | return werr; |
757 | } | 802 | } |
758 | 803 | ||
@@ -797,8 +842,11 @@ int btrfs_write_and_wait_marked_extents(struct btrfs_root *root, | |||
797 | { | 842 | { |
798 | int ret; | 843 | int ret; |
799 | int ret2; | 844 | int ret2; |
845 | struct blk_plug plug; | ||
800 | 846 | ||
847 | blk_start_plug(&plug); | ||
801 | ret = btrfs_write_marked_extents(root, dirty_pages, mark); | 848 | ret = btrfs_write_marked_extents(root, dirty_pages, mark); |
849 | blk_finish_plug(&plug); | ||
802 | ret2 = btrfs_wait_marked_extents(root, dirty_pages, mark); | 850 | ret2 = btrfs_wait_marked_extents(root, dirty_pages, mark); |
803 | 851 | ||
804 | if (ret) | 852 | if (ret) |
@@ -935,12 +983,12 @@ static noinline int commit_cowonly_roots(struct btrfs_trans_handle *trans, | |||
935 | * a dirty root struct and adds it into the list of dead roots that need to | 983 | * a dirty root struct and adds it into the list of dead roots that need to |
936 | * be deleted | 984 | * be deleted |
937 | */ | 985 | */ |
938 | int btrfs_add_dead_root(struct btrfs_root *root) | 986 | void btrfs_add_dead_root(struct btrfs_root *root) |
939 | { | 987 | { |
940 | spin_lock(&root->fs_info->trans_lock); | 988 | spin_lock(&root->fs_info->trans_lock); |
941 | list_add_tail(&root->root_list, &root->fs_info->dead_roots); | 989 | if (list_empty(&root->root_list)) |
990 | list_add_tail(&root->root_list, &root->fs_info->dead_roots); | ||
942 | spin_unlock(&root->fs_info->trans_lock); | 991 | spin_unlock(&root->fs_info->trans_lock); |
943 | return 0; | ||
944 | } | 992 | } |
945 | 993 | ||
946 | /* | 994 | /* |
@@ -1318,20 +1366,26 @@ static void update_super_roots(struct btrfs_root *root) | |||
1318 | 1366 | ||
1319 | int btrfs_transaction_in_commit(struct btrfs_fs_info *info) | 1367 | int btrfs_transaction_in_commit(struct btrfs_fs_info *info) |
1320 | { | 1368 | { |
1369 | struct btrfs_transaction *trans; | ||
1321 | int ret = 0; | 1370 | int ret = 0; |
1371 | |||
1322 | spin_lock(&info->trans_lock); | 1372 | spin_lock(&info->trans_lock); |
1323 | if (info->running_transaction) | 1373 | trans = info->running_transaction; |
1324 | ret = info->running_transaction->in_commit; | 1374 | if (trans) |
1375 | ret = (trans->state >= TRANS_STATE_COMMIT_START); | ||
1325 | spin_unlock(&info->trans_lock); | 1376 | spin_unlock(&info->trans_lock); |
1326 | return ret; | 1377 | return ret; |
1327 | } | 1378 | } |
1328 | 1379 | ||
1329 | int btrfs_transaction_blocked(struct btrfs_fs_info *info) | 1380 | int btrfs_transaction_blocked(struct btrfs_fs_info *info) |
1330 | { | 1381 | { |
1382 | struct btrfs_transaction *trans; | ||
1331 | int ret = 0; | 1383 | int ret = 0; |
1384 | |||
1332 | spin_lock(&info->trans_lock); | 1385 | spin_lock(&info->trans_lock); |
1333 | if (info->running_transaction) | 1386 | trans = info->running_transaction; |
1334 | ret = info->running_transaction->blocked; | 1387 | if (trans) |
1388 | ret = is_transaction_blocked(trans); | ||
1335 | spin_unlock(&info->trans_lock); | 1389 | spin_unlock(&info->trans_lock); |
1336 | return ret; | 1390 | return ret; |
1337 | } | 1391 | } |
@@ -1343,7 +1397,9 @@ int btrfs_transaction_blocked(struct btrfs_fs_info *info) | |||
1343 | static void wait_current_trans_commit_start(struct btrfs_root *root, | 1397 | static void wait_current_trans_commit_start(struct btrfs_root *root, |
1344 | struct btrfs_transaction *trans) | 1398 | struct btrfs_transaction *trans) |
1345 | { | 1399 | { |
1346 | wait_event(root->fs_info->transaction_blocked_wait, trans->in_commit); | 1400 | wait_event(root->fs_info->transaction_blocked_wait, |
1401 | trans->state >= TRANS_STATE_COMMIT_START || | ||
1402 | trans->aborted); | ||
1347 | } | 1403 | } |
1348 | 1404 | ||
1349 | /* | 1405 | /* |
@@ -1354,7 +1410,8 @@ static void wait_current_trans_commit_start_and_unblock(struct btrfs_root *root, | |||
1354 | struct btrfs_transaction *trans) | 1410 | struct btrfs_transaction *trans) |
1355 | { | 1411 | { |
1356 | wait_event(root->fs_info->transaction_wait, | 1412 | wait_event(root->fs_info->transaction_wait, |
1357 | trans->commit_done || (trans->in_commit && !trans->blocked)); | 1413 | trans->state >= TRANS_STATE_UNBLOCKED || |
1414 | trans->aborted); | ||
1358 | } | 1415 | } |
1359 | 1416 | ||
1360 | /* | 1417 | /* |
@@ -1450,26 +1507,31 @@ static void cleanup_transaction(struct btrfs_trans_handle *trans, | |||
1450 | 1507 | ||
1451 | spin_lock(&root->fs_info->trans_lock); | 1508 | spin_lock(&root->fs_info->trans_lock); |
1452 | 1509 | ||
1453 | if (list_empty(&cur_trans->list)) { | 1510 | /* |
1454 | spin_unlock(&root->fs_info->trans_lock); | 1511 | * If the transaction is removed from the list, it means this |
1455 | btrfs_end_transaction(trans, root); | 1512 | * transaction has been committed successfully, so it is impossible |
1456 | return; | 1513 | * to call the cleanup function. |
1457 | } | 1514 | */ |
1515 | BUG_ON(list_empty(&cur_trans->list)); | ||
1458 | 1516 | ||
1459 | list_del_init(&cur_trans->list); | 1517 | list_del_init(&cur_trans->list); |
1460 | if (cur_trans == root->fs_info->running_transaction) { | 1518 | if (cur_trans == root->fs_info->running_transaction) { |
1461 | root->fs_info->trans_no_join = 1; | 1519 | cur_trans->state = TRANS_STATE_COMMIT_DOING; |
1462 | spin_unlock(&root->fs_info->trans_lock); | 1520 | spin_unlock(&root->fs_info->trans_lock); |
1463 | wait_event(cur_trans->writer_wait, | 1521 | wait_event(cur_trans->writer_wait, |
1464 | atomic_read(&cur_trans->num_writers) == 1); | 1522 | atomic_read(&cur_trans->num_writers) == 1); |
1465 | 1523 | ||
1466 | spin_lock(&root->fs_info->trans_lock); | 1524 | spin_lock(&root->fs_info->trans_lock); |
1467 | root->fs_info->running_transaction = NULL; | ||
1468 | } | 1525 | } |
1469 | spin_unlock(&root->fs_info->trans_lock); | 1526 | spin_unlock(&root->fs_info->trans_lock); |
1470 | 1527 | ||
1471 | btrfs_cleanup_one_transaction(trans->transaction, root); | 1528 | btrfs_cleanup_one_transaction(trans->transaction, root); |
1472 | 1529 | ||
1530 | spin_lock(&root->fs_info->trans_lock); | ||
1531 | if (cur_trans == root->fs_info->running_transaction) | ||
1532 | root->fs_info->running_transaction = NULL; | ||
1533 | spin_unlock(&root->fs_info->trans_lock); | ||
1534 | |||
1473 | put_transaction(cur_trans); | 1535 | put_transaction(cur_trans); |
1474 | put_transaction(cur_trans); | 1536 | put_transaction(cur_trans); |
1475 | 1537 | ||
@@ -1481,33 +1543,13 @@ static void cleanup_transaction(struct btrfs_trans_handle *trans, | |||
1481 | current->journal_info = NULL; | 1543 | current->journal_info = NULL; |
1482 | 1544 | ||
1483 | kmem_cache_free(btrfs_trans_handle_cachep, trans); | 1545 | kmem_cache_free(btrfs_trans_handle_cachep, trans); |
1484 | |||
1485 | spin_lock(&root->fs_info->trans_lock); | ||
1486 | root->fs_info->trans_no_join = 0; | ||
1487 | spin_unlock(&root->fs_info->trans_lock); | ||
1488 | } | 1546 | } |
1489 | 1547 | ||
1490 | static int btrfs_flush_all_pending_stuffs(struct btrfs_trans_handle *trans, | 1548 | static int btrfs_flush_all_pending_stuffs(struct btrfs_trans_handle *trans, |
1491 | struct btrfs_root *root) | 1549 | struct btrfs_root *root) |
1492 | { | 1550 | { |
1493 | int flush_on_commit = btrfs_test_opt(root, FLUSHONCOMMIT); | ||
1494 | int snap_pending = 0; | ||
1495 | int ret; | 1551 | int ret; |
1496 | 1552 | ||
1497 | if (!flush_on_commit) { | ||
1498 | spin_lock(&root->fs_info->trans_lock); | ||
1499 | if (!list_empty(&trans->transaction->pending_snapshots)) | ||
1500 | snap_pending = 1; | ||
1501 | spin_unlock(&root->fs_info->trans_lock); | ||
1502 | } | ||
1503 | |||
1504 | if (flush_on_commit || snap_pending) { | ||
1505 | ret = btrfs_start_delalloc_inodes(root, 1); | ||
1506 | if (ret) | ||
1507 | return ret; | ||
1508 | btrfs_wait_ordered_extents(root, 1); | ||
1509 | } | ||
1510 | |||
1511 | ret = btrfs_run_delayed_items(trans, root); | 1553 | ret = btrfs_run_delayed_items(trans, root); |
1512 | if (ret) | 1554 | if (ret) |
1513 | return ret; | 1555 | return ret; |
@@ -1531,23 +1573,25 @@ static int btrfs_flush_all_pending_stuffs(struct btrfs_trans_handle *trans, | |||
1531 | return ret; | 1573 | return ret; |
1532 | } | 1574 | } |
1533 | 1575 | ||
1534 | /* | 1576 | static inline int btrfs_start_delalloc_flush(struct btrfs_fs_info *fs_info) |
1535 | * btrfs_transaction state sequence: | 1577 | { |
1536 | * in_commit = 0, blocked = 0 (initial) | 1578 | if (btrfs_test_opt(fs_info->tree_root, FLUSHONCOMMIT)) |
1537 | * in_commit = 1, blocked = 1 | 1579 | return btrfs_start_all_delalloc_inodes(fs_info, 1); |
1538 | * blocked = 0 | 1580 | return 0; |
1539 | * commit_done = 1 | 1581 | } |
1540 | */ | 1582 | |
1583 | static inline void btrfs_wait_delalloc_flush(struct btrfs_fs_info *fs_info) | ||
1584 | { | ||
1585 | if (btrfs_test_opt(fs_info->tree_root, FLUSHONCOMMIT)) | ||
1586 | btrfs_wait_all_ordered_extents(fs_info, 1); | ||
1587 | } | ||
1588 | |||
1541 | int btrfs_commit_transaction(struct btrfs_trans_handle *trans, | 1589 | int btrfs_commit_transaction(struct btrfs_trans_handle *trans, |
1542 | struct btrfs_root *root) | 1590 | struct btrfs_root *root) |
1543 | { | 1591 | { |
1544 | unsigned long joined = 0; | ||
1545 | struct btrfs_transaction *cur_trans = trans->transaction; | 1592 | struct btrfs_transaction *cur_trans = trans->transaction; |
1546 | struct btrfs_transaction *prev_trans = NULL; | 1593 | struct btrfs_transaction *prev_trans = NULL; |
1547 | DEFINE_WAIT(wait); | ||
1548 | int ret; | 1594 | int ret; |
1549 | int should_grow = 0; | ||
1550 | unsigned long now = get_seconds(); | ||
1551 | 1595 | ||
1552 | ret = btrfs_run_ordered_operations(trans, root, 0); | 1596 | ret = btrfs_run_ordered_operations(trans, root, 0); |
1553 | if (ret) { | 1597 | if (ret) { |
@@ -1586,6 +1630,7 @@ int btrfs_commit_transaction(struct btrfs_trans_handle *trans, | |||
1586 | * start sending their work down. | 1630 | * start sending their work down. |
1587 | */ | 1631 | */ |
1588 | cur_trans->delayed_refs.flushing = 1; | 1632 | cur_trans->delayed_refs.flushing = 1; |
1633 | smp_wmb(); | ||
1589 | 1634 | ||
1590 | if (!list_empty(&trans->new_bgs)) | 1635 | if (!list_empty(&trans->new_bgs)) |
1591 | btrfs_create_pending_block_groups(trans, root); | 1636 | btrfs_create_pending_block_groups(trans, root); |
@@ -1596,9 +1641,9 @@ int btrfs_commit_transaction(struct btrfs_trans_handle *trans, | |||
1596 | return ret; | 1641 | return ret; |
1597 | } | 1642 | } |
1598 | 1643 | ||
1599 | spin_lock(&cur_trans->commit_lock); | 1644 | spin_lock(&root->fs_info->trans_lock); |
1600 | if (cur_trans->in_commit) { | 1645 | if (cur_trans->state >= TRANS_STATE_COMMIT_START) { |
1601 | spin_unlock(&cur_trans->commit_lock); | 1646 | spin_unlock(&root->fs_info->trans_lock); |
1602 | atomic_inc(&cur_trans->use_count); | 1647 | atomic_inc(&cur_trans->use_count); |
1603 | ret = btrfs_end_transaction(trans, root); | 1648 | ret = btrfs_end_transaction(trans, root); |
1604 | 1649 | ||
@@ -1609,16 +1654,13 @@ int btrfs_commit_transaction(struct btrfs_trans_handle *trans, | |||
1609 | return ret; | 1654 | return ret; |
1610 | } | 1655 | } |
1611 | 1656 | ||
1612 | trans->transaction->in_commit = 1; | 1657 | cur_trans->state = TRANS_STATE_COMMIT_START; |
1613 | trans->transaction->blocked = 1; | ||
1614 | spin_unlock(&cur_trans->commit_lock); | ||
1615 | wake_up(&root->fs_info->transaction_blocked_wait); | 1658 | wake_up(&root->fs_info->transaction_blocked_wait); |
1616 | 1659 | ||
1617 | spin_lock(&root->fs_info->trans_lock); | ||
1618 | if (cur_trans->list.prev != &root->fs_info->trans_list) { | 1660 | if (cur_trans->list.prev != &root->fs_info->trans_list) { |
1619 | prev_trans = list_entry(cur_trans->list.prev, | 1661 | prev_trans = list_entry(cur_trans->list.prev, |
1620 | struct btrfs_transaction, list); | 1662 | struct btrfs_transaction, list); |
1621 | if (!prev_trans->commit_done) { | 1663 | if (prev_trans->state != TRANS_STATE_COMPLETED) { |
1622 | atomic_inc(&prev_trans->use_count); | 1664 | atomic_inc(&prev_trans->use_count); |
1623 | spin_unlock(&root->fs_info->trans_lock); | 1665 | spin_unlock(&root->fs_info->trans_lock); |
1624 | 1666 | ||
@@ -1632,42 +1674,32 @@ int btrfs_commit_transaction(struct btrfs_trans_handle *trans, | |||
1632 | spin_unlock(&root->fs_info->trans_lock); | 1674 | spin_unlock(&root->fs_info->trans_lock); |
1633 | } | 1675 | } |
1634 | 1676 | ||
1635 | if (!btrfs_test_opt(root, SSD) && | 1677 | extwriter_counter_dec(cur_trans, trans->type); |
1636 | (now < cur_trans->start_time || now - cur_trans->start_time < 1)) | ||
1637 | should_grow = 1; | ||
1638 | |||
1639 | do { | ||
1640 | joined = cur_trans->num_joined; | ||
1641 | |||
1642 | WARN_ON(cur_trans != trans->transaction); | ||
1643 | |||
1644 | ret = btrfs_flush_all_pending_stuffs(trans, root); | ||
1645 | if (ret) | ||
1646 | goto cleanup_transaction; | ||
1647 | 1678 | ||
1648 | prepare_to_wait(&cur_trans->writer_wait, &wait, | 1679 | ret = btrfs_start_delalloc_flush(root->fs_info); |
1649 | TASK_UNINTERRUPTIBLE); | 1680 | if (ret) |
1681 | goto cleanup_transaction; | ||
1650 | 1682 | ||
1651 | if (atomic_read(&cur_trans->num_writers) > 1) | 1683 | ret = btrfs_flush_all_pending_stuffs(trans, root); |
1652 | schedule_timeout(MAX_SCHEDULE_TIMEOUT); | 1684 | if (ret) |
1653 | else if (should_grow) | 1685 | goto cleanup_transaction; |
1654 | schedule_timeout(1); | ||
1655 | 1686 | ||
1656 | finish_wait(&cur_trans->writer_wait, &wait); | 1687 | wait_event(cur_trans->writer_wait, |
1657 | } while (atomic_read(&cur_trans->num_writers) > 1 || | 1688 | extwriter_counter_read(cur_trans) == 0); |
1658 | (should_grow && cur_trans->num_joined != joined)); | ||
1659 | 1689 | ||
1690 | /* some pending stuffs might be added after the previous flush. */ | ||
1660 | ret = btrfs_flush_all_pending_stuffs(trans, root); | 1691 | ret = btrfs_flush_all_pending_stuffs(trans, root); |
1661 | if (ret) | 1692 | if (ret) |
1662 | goto cleanup_transaction; | 1693 | goto cleanup_transaction; |
1663 | 1694 | ||
1695 | btrfs_wait_delalloc_flush(root->fs_info); | ||
1664 | /* | 1696 | /* |
1665 | * Ok now we need to make sure to block out any other joins while we | 1697 | * Ok now we need to make sure to block out any other joins while we |
1666 | * commit the transaction. We could have started a join before setting | 1698 | * commit the transaction. We could have started a join before setting |
1667 | * no_join so make sure to wait for num_writers to == 1 again. | 1699 | * COMMIT_DOING so make sure to wait for num_writers to == 1 again. |
1668 | */ | 1700 | */ |
1669 | spin_lock(&root->fs_info->trans_lock); | 1701 | spin_lock(&root->fs_info->trans_lock); |
1670 | root->fs_info->trans_no_join = 1; | 1702 | cur_trans->state = TRANS_STATE_COMMIT_DOING; |
1671 | spin_unlock(&root->fs_info->trans_lock); | 1703 | spin_unlock(&root->fs_info->trans_lock); |
1672 | wait_event(cur_trans->writer_wait, | 1704 | wait_event(cur_trans->writer_wait, |
1673 | atomic_read(&cur_trans->num_writers) == 1); | 1705 | atomic_read(&cur_trans->num_writers) == 1); |
@@ -1794,10 +1826,9 @@ int btrfs_commit_transaction(struct btrfs_trans_handle *trans, | |||
1794 | memcpy(root->fs_info->super_for_commit, root->fs_info->super_copy, | 1826 | memcpy(root->fs_info->super_for_commit, root->fs_info->super_copy, |
1795 | sizeof(*root->fs_info->super_copy)); | 1827 | sizeof(*root->fs_info->super_copy)); |
1796 | 1828 | ||
1797 | trans->transaction->blocked = 0; | ||
1798 | spin_lock(&root->fs_info->trans_lock); | 1829 | spin_lock(&root->fs_info->trans_lock); |
1830 | cur_trans->state = TRANS_STATE_UNBLOCKED; | ||
1799 | root->fs_info->running_transaction = NULL; | 1831 | root->fs_info->running_transaction = NULL; |
1800 | root->fs_info->trans_no_join = 0; | ||
1801 | spin_unlock(&root->fs_info->trans_lock); | 1832 | spin_unlock(&root->fs_info->trans_lock); |
1802 | mutex_unlock(&root->fs_info->reloc_mutex); | 1833 | mutex_unlock(&root->fs_info->reloc_mutex); |
1803 | 1834 | ||
@@ -1825,10 +1856,12 @@ int btrfs_commit_transaction(struct btrfs_trans_handle *trans, | |||
1825 | 1856 | ||
1826 | btrfs_finish_extent_commit(trans, root); | 1857 | btrfs_finish_extent_commit(trans, root); |
1827 | 1858 | ||
1828 | cur_trans->commit_done = 1; | ||
1829 | |||
1830 | root->fs_info->last_trans_committed = cur_trans->transid; | 1859 | root->fs_info->last_trans_committed = cur_trans->transid; |
1831 | 1860 | /* | |
1861 | * We needn't acquire the lock here because there is no other task | ||
1862 | * which can change it. | ||
1863 | */ | ||
1864 | cur_trans->state = TRANS_STATE_COMPLETED; | ||
1832 | wake_up(&cur_trans->commit_wait); | 1865 | wake_up(&cur_trans->commit_wait); |
1833 | 1866 | ||
1834 | spin_lock(&root->fs_info->trans_lock); | 1867 | spin_lock(&root->fs_info->trans_lock); |
@@ -1838,7 +1871,7 @@ int btrfs_commit_transaction(struct btrfs_trans_handle *trans, | |||
1838 | put_transaction(cur_trans); | 1871 | put_transaction(cur_trans); |
1839 | put_transaction(cur_trans); | 1872 | put_transaction(cur_trans); |
1840 | 1873 | ||
1841 | if (trans->type < TRANS_JOIN_NOLOCK) | 1874 | if (trans->type & __TRANS_FREEZABLE) |
1842 | sb_end_intwrite(root->fs_info->sb); | 1875 | sb_end_intwrite(root->fs_info->sb); |
1843 | 1876 | ||
1844 | trace_btrfs_transaction_commit(root); | 1877 | trace_btrfs_transaction_commit(root); |
@@ -1885,11 +1918,6 @@ int btrfs_clean_one_deleted_snapshot(struct btrfs_root *root) | |||
1885 | int ret; | 1918 | int ret; |
1886 | struct btrfs_fs_info *fs_info = root->fs_info; | 1919 | struct btrfs_fs_info *fs_info = root->fs_info; |
1887 | 1920 | ||
1888 | if (fs_info->sb->s_flags & MS_RDONLY) { | ||
1889 | pr_debug("btrfs: cleaner called for RO fs!\n"); | ||
1890 | return 0; | ||
1891 | } | ||
1892 | |||
1893 | spin_lock(&fs_info->trans_lock); | 1921 | spin_lock(&fs_info->trans_lock); |
1894 | if (list_empty(&fs_info->dead_roots)) { | 1922 | if (list_empty(&fs_info->dead_roots)) { |
1895 | spin_unlock(&fs_info->trans_lock); | 1923 | spin_unlock(&fs_info->trans_lock); |
@@ -1897,7 +1925,7 @@ int btrfs_clean_one_deleted_snapshot(struct btrfs_root *root) | |||
1897 | } | 1925 | } |
1898 | root = list_first_entry(&fs_info->dead_roots, | 1926 | root = list_first_entry(&fs_info->dead_roots, |
1899 | struct btrfs_root, root_list); | 1927 | struct btrfs_root, root_list); |
1900 | list_del(&root->root_list); | 1928 | list_del_init(&root->root_list); |
1901 | spin_unlock(&fs_info->trans_lock); | 1929 | spin_unlock(&fs_info->trans_lock); |
1902 | 1930 | ||
1903 | pr_debug("btrfs: cleaner removing %llu\n", | 1931 | pr_debug("btrfs: cleaner removing %llu\n", |
diff --git a/fs/btrfs/transaction.h b/fs/btrfs/transaction.h index 24c97335a59f..defbc4269897 100644 --- a/fs/btrfs/transaction.h +++ b/fs/btrfs/transaction.h | |||
@@ -22,21 +22,33 @@ | |||
22 | #include "delayed-ref.h" | 22 | #include "delayed-ref.h" |
23 | #include "ctree.h" | 23 | #include "ctree.h" |
24 | 24 | ||
25 | enum btrfs_trans_state { | ||
26 | TRANS_STATE_RUNNING = 0, | ||
27 | TRANS_STATE_BLOCKED = 1, | ||
28 | TRANS_STATE_COMMIT_START = 2, | ||
29 | TRANS_STATE_COMMIT_DOING = 3, | ||
30 | TRANS_STATE_UNBLOCKED = 4, | ||
31 | TRANS_STATE_COMPLETED = 5, | ||
32 | TRANS_STATE_MAX = 6, | ||
33 | }; | ||
34 | |||
25 | struct btrfs_transaction { | 35 | struct btrfs_transaction { |
26 | u64 transid; | 36 | u64 transid; |
27 | /* | 37 | /* |
38 | * total external writers(USERSPACE/START/ATTACH) in this | ||
39 | * transaction, it must be zero before the transaction is | ||
40 | * being committed | ||
41 | */ | ||
42 | atomic_t num_extwriters; | ||
43 | /* | ||
28 | * total writers in this transaction, it must be zero before the | 44 | * total writers in this transaction, it must be zero before the |
29 | * transaction can end | 45 | * transaction can end |
30 | */ | 46 | */ |
31 | atomic_t num_writers; | 47 | atomic_t num_writers; |
32 | atomic_t use_count; | 48 | atomic_t use_count; |
33 | 49 | ||
34 | unsigned long num_joined; | 50 | /* Be protected by fs_info->trans_lock when we want to change it. */ |
35 | 51 | enum btrfs_trans_state state; | |
36 | spinlock_t commit_lock; | ||
37 | int in_commit; | ||
38 | int commit_done; | ||
39 | int blocked; | ||
40 | struct list_head list; | 52 | struct list_head list; |
41 | struct extent_io_tree dirty_pages; | 53 | struct extent_io_tree dirty_pages; |
42 | unsigned long start_time; | 54 | unsigned long start_time; |
@@ -44,17 +56,27 @@ struct btrfs_transaction { | |||
44 | wait_queue_head_t commit_wait; | 56 | wait_queue_head_t commit_wait; |
45 | struct list_head pending_snapshots; | 57 | struct list_head pending_snapshots; |
46 | struct list_head ordered_operations; | 58 | struct list_head ordered_operations; |
59 | struct list_head pending_chunks; | ||
47 | struct btrfs_delayed_ref_root delayed_refs; | 60 | struct btrfs_delayed_ref_root delayed_refs; |
48 | int aborted; | 61 | int aborted; |
49 | }; | 62 | }; |
50 | 63 | ||
51 | enum btrfs_trans_type { | 64 | #define __TRANS_FREEZABLE (1U << 0) |
52 | TRANS_START, | 65 | |
53 | TRANS_JOIN, | 66 | #define __TRANS_USERSPACE (1U << 8) |
54 | TRANS_USERSPACE, | 67 | #define __TRANS_START (1U << 9) |
55 | TRANS_JOIN_NOLOCK, | 68 | #define __TRANS_ATTACH (1U << 10) |
56 | TRANS_ATTACH, | 69 | #define __TRANS_JOIN (1U << 11) |
57 | }; | 70 | #define __TRANS_JOIN_NOLOCK (1U << 12) |
71 | |||
72 | #define TRANS_USERSPACE (__TRANS_USERSPACE | __TRANS_FREEZABLE) | ||
73 | #define TRANS_START (__TRANS_START | __TRANS_FREEZABLE) | ||
74 | #define TRANS_ATTACH (__TRANS_ATTACH) | ||
75 | #define TRANS_JOIN (__TRANS_JOIN | __TRANS_FREEZABLE) | ||
76 | #define TRANS_JOIN_NOLOCK (__TRANS_JOIN_NOLOCK) | ||
77 | |||
78 | #define TRANS_EXTWRITERS (__TRANS_USERSPACE | __TRANS_START | \ | ||
79 | __TRANS_ATTACH) | ||
58 | 80 | ||
59 | struct btrfs_trans_handle { | 81 | struct btrfs_trans_handle { |
60 | u64 transid; | 82 | u64 transid; |
@@ -70,7 +92,7 @@ struct btrfs_trans_handle { | |||
70 | short aborted; | 92 | short aborted; |
71 | short adding_csums; | 93 | short adding_csums; |
72 | bool allocating_chunk; | 94 | bool allocating_chunk; |
73 | enum btrfs_trans_type type; | 95 | unsigned int type; |
74 | /* | 96 | /* |
75 | * this root is only needed to validate that the root passed to | 97 | * this root is only needed to validate that the root passed to |
76 | * start_transaction is the same as the one passed to end_transaction. | 98 | * start_transaction is the same as the one passed to end_transaction. |
@@ -121,7 +143,7 @@ int btrfs_wait_for_commit(struct btrfs_root *root, u64 transid); | |||
121 | int btrfs_write_and_wait_transaction(struct btrfs_trans_handle *trans, | 143 | int btrfs_write_and_wait_transaction(struct btrfs_trans_handle *trans, |
122 | struct btrfs_root *root); | 144 | struct btrfs_root *root); |
123 | 145 | ||
124 | int btrfs_add_dead_root(struct btrfs_root *root); | 146 | void btrfs_add_dead_root(struct btrfs_root *root); |
125 | int btrfs_defrag_root(struct btrfs_root *root); | 147 | int btrfs_defrag_root(struct btrfs_root *root); |
126 | int btrfs_clean_one_deleted_snapshot(struct btrfs_root *root); | 148 | int btrfs_clean_one_deleted_snapshot(struct btrfs_root *root); |
127 | int btrfs_commit_transaction(struct btrfs_trans_handle *trans, | 149 | int btrfs_commit_transaction(struct btrfs_trans_handle *trans, |
diff --git a/fs/btrfs/tree-log.c b/fs/btrfs/tree-log.c index c276ac9a0ec3..ff60d8978ae2 100644 --- a/fs/btrfs/tree-log.c +++ b/fs/btrfs/tree-log.c | |||
@@ -18,6 +18,7 @@ | |||
18 | 18 | ||
19 | #include <linux/sched.h> | 19 | #include <linux/sched.h> |
20 | #include <linux/slab.h> | 20 | #include <linux/slab.h> |
21 | #include <linux/blkdev.h> | ||
21 | #include <linux/list_sort.h> | 22 | #include <linux/list_sort.h> |
22 | #include "ctree.h" | 23 | #include "ctree.h" |
23 | #include "transaction.h" | 24 | #include "transaction.h" |
@@ -279,11 +280,23 @@ static int process_one_buffer(struct btrfs_root *log, | |||
279 | { | 280 | { |
280 | int ret = 0; | 281 | int ret = 0; |
281 | 282 | ||
283 | /* | ||
284 | * If this fs is mixed then we need to be able to process the leaves to | ||
285 | * pin down any logged extents, so we have to read the block. | ||
286 | */ | ||
287 | if (btrfs_fs_incompat(log->fs_info, MIXED_GROUPS)) { | ||
288 | ret = btrfs_read_buffer(eb, gen); | ||
289 | if (ret) | ||
290 | return ret; | ||
291 | } | ||
292 | |||
282 | if (wc->pin) | 293 | if (wc->pin) |
283 | ret = btrfs_pin_extent_for_log_replay(log->fs_info->extent_root, | 294 | ret = btrfs_pin_extent_for_log_replay(log->fs_info->extent_root, |
284 | eb->start, eb->len); | 295 | eb->start, eb->len); |
285 | 296 | ||
286 | if (!ret && btrfs_buffer_uptodate(eb, gen, 0)) { | 297 | if (!ret && btrfs_buffer_uptodate(eb, gen, 0)) { |
298 | if (wc->pin && btrfs_header_level(eb) == 0) | ||
299 | ret = btrfs_exclude_logged_extents(log, eb); | ||
287 | if (wc->write) | 300 | if (wc->write) |
288 | btrfs_write_tree_block(eb); | 301 | btrfs_write_tree_block(eb); |
289 | if (wc->wait) | 302 | if (wc->wait) |
@@ -2016,13 +2029,8 @@ static int replay_one_buffer(struct btrfs_root *log, struct extent_buffer *eb, | |||
2016 | eb, i, &key); | 2029 | eb, i, &key); |
2017 | if (ret) | 2030 | if (ret) |
2018 | break; | 2031 | break; |
2019 | } else if (key.type == BTRFS_INODE_REF_KEY) { | 2032 | } else if (key.type == BTRFS_INODE_REF_KEY || |
2020 | ret = add_inode_ref(wc->trans, root, log, path, | 2033 | key.type == BTRFS_INODE_EXTREF_KEY) { |
2021 | eb, i, &key); | ||
2022 | if (ret && ret != -ENOENT) | ||
2023 | break; | ||
2024 | ret = 0; | ||
2025 | } else if (key.type == BTRFS_INODE_EXTREF_KEY) { | ||
2026 | ret = add_inode_ref(wc->trans, root, log, path, | 2034 | ret = add_inode_ref(wc->trans, root, log, path, |
2027 | eb, i, &key); | 2035 | eb, i, &key); |
2028 | if (ret && ret != -ENOENT) | 2036 | if (ret && ret != -ENOENT) |
@@ -2358,6 +2366,7 @@ int btrfs_sync_log(struct btrfs_trans_handle *trans, | |||
2358 | struct btrfs_root *log = root->log_root; | 2366 | struct btrfs_root *log = root->log_root; |
2359 | struct btrfs_root *log_root_tree = root->fs_info->log_root_tree; | 2367 | struct btrfs_root *log_root_tree = root->fs_info->log_root_tree; |
2360 | unsigned long log_transid = 0; | 2368 | unsigned long log_transid = 0; |
2369 | struct blk_plug plug; | ||
2361 | 2370 | ||
2362 | mutex_lock(&root->log_mutex); | 2371 | mutex_lock(&root->log_mutex); |
2363 | log_transid = root->log_transid; | 2372 | log_transid = root->log_transid; |
@@ -2401,8 +2410,10 @@ int btrfs_sync_log(struct btrfs_trans_handle *trans, | |||
2401 | /* we start IO on all the marked extents here, but we don't actually | 2410 | /* we start IO on all the marked extents here, but we don't actually |
2402 | * wait for them until later. | 2411 | * wait for them until later. |
2403 | */ | 2412 | */ |
2413 | blk_start_plug(&plug); | ||
2404 | ret = btrfs_write_marked_extents(log, &log->dirty_log_pages, mark); | 2414 | ret = btrfs_write_marked_extents(log, &log->dirty_log_pages, mark); |
2405 | if (ret) { | 2415 | if (ret) { |
2416 | blk_finish_plug(&plug); | ||
2406 | btrfs_abort_transaction(trans, root, ret); | 2417 | btrfs_abort_transaction(trans, root, ret); |
2407 | btrfs_free_logged_extents(log, log_transid); | 2418 | btrfs_free_logged_extents(log, log_transid); |
2408 | mutex_unlock(&root->log_mutex); | 2419 | mutex_unlock(&root->log_mutex); |
@@ -2437,6 +2448,7 @@ int btrfs_sync_log(struct btrfs_trans_handle *trans, | |||
2437 | } | 2448 | } |
2438 | 2449 | ||
2439 | if (ret) { | 2450 | if (ret) { |
2451 | blk_finish_plug(&plug); | ||
2440 | if (ret != -ENOSPC) { | 2452 | if (ret != -ENOSPC) { |
2441 | btrfs_abort_transaction(trans, root, ret); | 2453 | btrfs_abort_transaction(trans, root, ret); |
2442 | mutex_unlock(&log_root_tree->log_mutex); | 2454 | mutex_unlock(&log_root_tree->log_mutex); |
@@ -2452,6 +2464,7 @@ int btrfs_sync_log(struct btrfs_trans_handle *trans, | |||
2452 | 2464 | ||
2453 | index2 = log_root_tree->log_transid % 2; | 2465 | index2 = log_root_tree->log_transid % 2; |
2454 | if (atomic_read(&log_root_tree->log_commit[index2])) { | 2466 | if (atomic_read(&log_root_tree->log_commit[index2])) { |
2467 | blk_finish_plug(&plug); | ||
2455 | btrfs_wait_marked_extents(log, &log->dirty_log_pages, mark); | 2468 | btrfs_wait_marked_extents(log, &log->dirty_log_pages, mark); |
2456 | wait_log_commit(trans, log_root_tree, | 2469 | wait_log_commit(trans, log_root_tree, |
2457 | log_root_tree->log_transid); | 2470 | log_root_tree->log_transid); |
@@ -2474,6 +2487,7 @@ int btrfs_sync_log(struct btrfs_trans_handle *trans, | |||
2474 | * check the full commit flag again | 2487 | * check the full commit flag again |
2475 | */ | 2488 | */ |
2476 | if (root->fs_info->last_trans_log_full_commit == trans->transid) { | 2489 | if (root->fs_info->last_trans_log_full_commit == trans->transid) { |
2490 | blk_finish_plug(&plug); | ||
2477 | btrfs_wait_marked_extents(log, &log->dirty_log_pages, mark); | 2491 | btrfs_wait_marked_extents(log, &log->dirty_log_pages, mark); |
2478 | btrfs_free_logged_extents(log, log_transid); | 2492 | btrfs_free_logged_extents(log, log_transid); |
2479 | mutex_unlock(&log_root_tree->log_mutex); | 2493 | mutex_unlock(&log_root_tree->log_mutex); |
@@ -2481,9 +2495,10 @@ int btrfs_sync_log(struct btrfs_trans_handle *trans, | |||
2481 | goto out_wake_log_root; | 2495 | goto out_wake_log_root; |
2482 | } | 2496 | } |
2483 | 2497 | ||
2484 | ret = btrfs_write_and_wait_marked_extents(log_root_tree, | 2498 | ret = btrfs_write_marked_extents(log_root_tree, |
2485 | &log_root_tree->dirty_log_pages, | 2499 | &log_root_tree->dirty_log_pages, |
2486 | EXTENT_DIRTY | EXTENT_NEW); | 2500 | EXTENT_DIRTY | EXTENT_NEW); |
2501 | blk_finish_plug(&plug); | ||
2487 | if (ret) { | 2502 | if (ret) { |
2488 | btrfs_abort_transaction(trans, root, ret); | 2503 | btrfs_abort_transaction(trans, root, ret); |
2489 | btrfs_free_logged_extents(log, log_transid); | 2504 | btrfs_free_logged_extents(log, log_transid); |
@@ -2491,6 +2506,9 @@ int btrfs_sync_log(struct btrfs_trans_handle *trans, | |||
2491 | goto out_wake_log_root; | 2506 | goto out_wake_log_root; |
2492 | } | 2507 | } |
2493 | btrfs_wait_marked_extents(log, &log->dirty_log_pages, mark); | 2508 | btrfs_wait_marked_extents(log, &log->dirty_log_pages, mark); |
2509 | btrfs_wait_marked_extents(log_root_tree, | ||
2510 | &log_root_tree->dirty_log_pages, | ||
2511 | EXTENT_NEW | EXTENT_DIRTY); | ||
2494 | btrfs_wait_logged_extents(log, log_transid); | 2512 | btrfs_wait_logged_extents(log, log_transid); |
2495 | 2513 | ||
2496 | btrfs_set_super_log_root(root->fs_info->super_for_commit, | 2514 | btrfs_set_super_log_root(root->fs_info->super_for_commit, |
@@ -3728,8 +3746,9 @@ next_slot: | |||
3728 | } | 3746 | } |
3729 | 3747 | ||
3730 | log_extents: | 3748 | log_extents: |
3749 | btrfs_release_path(path); | ||
3750 | btrfs_release_path(dst_path); | ||
3731 | if (fast_search) { | 3751 | if (fast_search) { |
3732 | btrfs_release_path(dst_path); | ||
3733 | ret = btrfs_log_changed_extents(trans, root, inode, dst_path); | 3752 | ret = btrfs_log_changed_extents(trans, root, inode, dst_path); |
3734 | if (ret) { | 3753 | if (ret) { |
3735 | err = ret; | 3754 | err = ret; |
@@ -3746,8 +3765,6 @@ log_extents: | |||
3746 | } | 3765 | } |
3747 | 3766 | ||
3748 | if (inode_only == LOG_INODE_ALL && S_ISDIR(inode->i_mode)) { | 3767 | if (inode_only == LOG_INODE_ALL && S_ISDIR(inode->i_mode)) { |
3749 | btrfs_release_path(path); | ||
3750 | btrfs_release_path(dst_path); | ||
3751 | ret = log_directory_changes(trans, root, inode, path, dst_path); | 3768 | ret = log_directory_changes(trans, root, inode, path, dst_path); |
3752 | if (ret) { | 3769 | if (ret) { |
3753 | err = ret; | 3770 | err = ret; |
@@ -4016,8 +4033,7 @@ again: | |||
4016 | if (found_key.objectid != BTRFS_TREE_LOG_OBJECTID) | 4033 | if (found_key.objectid != BTRFS_TREE_LOG_OBJECTID) |
4017 | break; | 4034 | break; |
4018 | 4035 | ||
4019 | log = btrfs_read_fs_root_no_radix(log_root_tree, | 4036 | log = btrfs_read_fs_root(log_root_tree, &found_key); |
4020 | &found_key); | ||
4021 | if (IS_ERR(log)) { | 4037 | if (IS_ERR(log)) { |
4022 | ret = PTR_ERR(log); | 4038 | ret = PTR_ERR(log); |
4023 | btrfs_error(fs_info, ret, | 4039 | btrfs_error(fs_info, ret, |
diff --git a/fs/btrfs/ulist.c b/fs/btrfs/ulist.c index 7b417e20efe2..b0a523b2c60e 100644 --- a/fs/btrfs/ulist.c +++ b/fs/btrfs/ulist.c | |||
@@ -205,6 +205,10 @@ int ulist_add_merge(struct ulist *ulist, u64 val, u64 aux, | |||
205 | u64 new_alloced = ulist->nodes_alloced + 128; | 205 | u64 new_alloced = ulist->nodes_alloced + 128; |
206 | struct ulist_node *new_nodes; | 206 | struct ulist_node *new_nodes; |
207 | void *old = NULL; | 207 | void *old = NULL; |
208 | int i; | ||
209 | |||
210 | for (i = 0; i < ulist->nnodes; i++) | ||
211 | rb_erase(&ulist->nodes[i].rb_node, &ulist->root); | ||
208 | 212 | ||
209 | /* | 213 | /* |
210 | * if nodes_alloced == ULIST_SIZE no memory has been allocated | 214 | * if nodes_alloced == ULIST_SIZE no memory has been allocated |
@@ -224,6 +228,17 @@ int ulist_add_merge(struct ulist *ulist, u64 val, u64 aux, | |||
224 | 228 | ||
225 | ulist->nodes = new_nodes; | 229 | ulist->nodes = new_nodes; |
226 | ulist->nodes_alloced = new_alloced; | 230 | ulist->nodes_alloced = new_alloced; |
231 | |||
232 | /* | ||
233 | * krealloc actually uses memcpy, which does not copy rb_node | ||
234 | * pointers, so we have to do it ourselves. Otherwise we may | ||
235 | * be bitten by crashes. | ||
236 | */ | ||
237 | for (i = 0; i < ulist->nnodes; i++) { | ||
238 | ret = ulist_rbtree_insert(ulist, &ulist->nodes[i]); | ||
239 | if (ret < 0) | ||
240 | return ret; | ||
241 | } | ||
227 | } | 242 | } |
228 | ulist->nodes[ulist->nnodes].val = val; | 243 | ulist->nodes[ulist->nnodes].val = val; |
229 | ulist->nodes[ulist->nnodes].aux = aux; | 244 | ulist->nodes[ulist->nnodes].aux = aux; |
diff --git a/fs/btrfs/version.h b/fs/btrfs/version.h deleted file mode 100644 index 9bf3946d5ef2..000000000000 --- a/fs/btrfs/version.h +++ /dev/null | |||
@@ -1,4 +0,0 @@ | |||
1 | #ifndef __BTRFS_VERSION_H | ||
2 | #define __BTRFS_VERSION_H | ||
3 | #define BTRFS_BUILD_VERSION "Btrfs" | ||
4 | #endif | ||
diff --git a/fs/btrfs/volumes.c b/fs/btrfs/volumes.c index 8bffb9174afb..78b871753cb6 100644 --- a/fs/btrfs/volumes.c +++ b/fs/btrfs/volumes.c | |||
@@ -982,6 +982,35 @@ out: | |||
982 | return ret; | 982 | return ret; |
983 | } | 983 | } |
984 | 984 | ||
985 | static int contains_pending_extent(struct btrfs_trans_handle *trans, | ||
986 | struct btrfs_device *device, | ||
987 | u64 *start, u64 len) | ||
988 | { | ||
989 | struct extent_map *em; | ||
990 | int ret = 0; | ||
991 | |||
992 | list_for_each_entry(em, &trans->transaction->pending_chunks, list) { | ||
993 | struct map_lookup *map; | ||
994 | int i; | ||
995 | |||
996 | map = (struct map_lookup *)em->bdev; | ||
997 | for (i = 0; i < map->num_stripes; i++) { | ||
998 | if (map->stripes[i].dev != device) | ||
999 | continue; | ||
1000 | if (map->stripes[i].physical >= *start + len || | ||
1001 | map->stripes[i].physical + em->orig_block_len <= | ||
1002 | *start) | ||
1003 | continue; | ||
1004 | *start = map->stripes[i].physical + | ||
1005 | em->orig_block_len; | ||
1006 | ret = 1; | ||
1007 | } | ||
1008 | } | ||
1009 | |||
1010 | return ret; | ||
1011 | } | ||
1012 | |||
1013 | |||
985 | /* | 1014 | /* |
986 | * find_free_dev_extent - find free space in the specified device | 1015 | * find_free_dev_extent - find free space in the specified device |
987 | * @device: the device which we search the free space in | 1016 | * @device: the device which we search the free space in |
@@ -1002,7 +1031,8 @@ out: | |||
1002 | * But if we don't find suitable free space, it is used to store the size of | 1031 | * But if we don't find suitable free space, it is used to store the size of |
1003 | * the max free space. | 1032 | * the max free space. |
1004 | */ | 1033 | */ |
1005 | int find_free_dev_extent(struct btrfs_device *device, u64 num_bytes, | 1034 | int find_free_dev_extent(struct btrfs_trans_handle *trans, |
1035 | struct btrfs_device *device, u64 num_bytes, | ||
1006 | u64 *start, u64 *len) | 1036 | u64 *start, u64 *len) |
1007 | { | 1037 | { |
1008 | struct btrfs_key key; | 1038 | struct btrfs_key key; |
@@ -1026,21 +1056,22 @@ int find_free_dev_extent(struct btrfs_device *device, u64 num_bytes, | |||
1026 | */ | 1056 | */ |
1027 | search_start = max(root->fs_info->alloc_start, 1024ull * 1024); | 1057 | search_start = max(root->fs_info->alloc_start, 1024ull * 1024); |
1028 | 1058 | ||
1059 | path = btrfs_alloc_path(); | ||
1060 | if (!path) | ||
1061 | return -ENOMEM; | ||
1062 | again: | ||
1029 | max_hole_start = search_start; | 1063 | max_hole_start = search_start; |
1030 | max_hole_size = 0; | 1064 | max_hole_size = 0; |
1031 | hole_size = 0; | 1065 | hole_size = 0; |
1032 | 1066 | ||
1033 | if (search_start >= search_end || device->is_tgtdev_for_dev_replace) { | 1067 | if (search_start >= search_end || device->is_tgtdev_for_dev_replace) { |
1034 | ret = -ENOSPC; | 1068 | ret = -ENOSPC; |
1035 | goto error; | 1069 | goto out; |
1036 | } | 1070 | } |
1037 | 1071 | ||
1038 | path = btrfs_alloc_path(); | ||
1039 | if (!path) { | ||
1040 | ret = -ENOMEM; | ||
1041 | goto error; | ||
1042 | } | ||
1043 | path->reada = 2; | 1072 | path->reada = 2; |
1073 | path->search_commit_root = 1; | ||
1074 | path->skip_locking = 1; | ||
1044 | 1075 | ||
1045 | key.objectid = device->devid; | 1076 | key.objectid = device->devid; |
1046 | key.offset = search_start; | 1077 | key.offset = search_start; |
@@ -1081,6 +1112,15 @@ int find_free_dev_extent(struct btrfs_device *device, u64 num_bytes, | |||
1081 | if (key.offset > search_start) { | 1112 | if (key.offset > search_start) { |
1082 | hole_size = key.offset - search_start; | 1113 | hole_size = key.offset - search_start; |
1083 | 1114 | ||
1115 | /* | ||
1116 | * Have to check before we set max_hole_start, otherwise | ||
1117 | * we could end up sending back this offset anyway. | ||
1118 | */ | ||
1119 | if (contains_pending_extent(trans, device, | ||
1120 | &search_start, | ||
1121 | hole_size)) | ||
1122 | hole_size = 0; | ||
1123 | |||
1084 | if (hole_size > max_hole_size) { | 1124 | if (hole_size > max_hole_size) { |
1085 | max_hole_start = search_start; | 1125 | max_hole_start = search_start; |
1086 | max_hole_size = hole_size; | 1126 | max_hole_size = hole_size; |
@@ -1124,6 +1164,11 @@ next: | |||
1124 | max_hole_size = hole_size; | 1164 | max_hole_size = hole_size; |
1125 | } | 1165 | } |
1126 | 1166 | ||
1167 | if (contains_pending_extent(trans, device, &search_start, hole_size)) { | ||
1168 | btrfs_release_path(path); | ||
1169 | goto again; | ||
1170 | } | ||
1171 | |||
1127 | /* See above. */ | 1172 | /* See above. */ |
1128 | if (hole_size < num_bytes) | 1173 | if (hole_size < num_bytes) |
1129 | ret = -ENOSPC; | 1174 | ret = -ENOSPC; |
@@ -1132,7 +1177,6 @@ next: | |||
1132 | 1177 | ||
1133 | out: | 1178 | out: |
1134 | btrfs_free_path(path); | 1179 | btrfs_free_path(path); |
1135 | error: | ||
1136 | *start = max_hole_start; | 1180 | *start = max_hole_start; |
1137 | if (len) | 1181 | if (len) |
1138 | *len = max_hole_size; | 1182 | *len = max_hole_size; |
@@ -1244,47 +1288,22 @@ out: | |||
1244 | return ret; | 1288 | return ret; |
1245 | } | 1289 | } |
1246 | 1290 | ||
1247 | static noinline int find_next_chunk(struct btrfs_root *root, | 1291 | static u64 find_next_chunk(struct btrfs_fs_info *fs_info) |
1248 | u64 objectid, u64 *offset) | ||
1249 | { | 1292 | { |
1250 | struct btrfs_path *path; | 1293 | struct extent_map_tree *em_tree; |
1251 | int ret; | 1294 | struct extent_map *em; |
1252 | struct btrfs_key key; | 1295 | struct rb_node *n; |
1253 | struct btrfs_chunk *chunk; | 1296 | u64 ret = 0; |
1254 | struct btrfs_key found_key; | ||
1255 | |||
1256 | path = btrfs_alloc_path(); | ||
1257 | if (!path) | ||
1258 | return -ENOMEM; | ||
1259 | |||
1260 | key.objectid = objectid; | ||
1261 | key.offset = (u64)-1; | ||
1262 | key.type = BTRFS_CHUNK_ITEM_KEY; | ||
1263 | |||
1264 | ret = btrfs_search_slot(NULL, root, &key, path, 0, 0); | ||
1265 | if (ret < 0) | ||
1266 | goto error; | ||
1267 | |||
1268 | BUG_ON(ret == 0); /* Corruption */ | ||
1269 | 1297 | ||
1270 | ret = btrfs_previous_item(root, path, 0, BTRFS_CHUNK_ITEM_KEY); | 1298 | em_tree = &fs_info->mapping_tree.map_tree; |
1271 | if (ret) { | 1299 | read_lock(&em_tree->lock); |
1272 | *offset = 0; | 1300 | n = rb_last(&em_tree->map); |
1273 | } else { | 1301 | if (n) { |
1274 | btrfs_item_key_to_cpu(path->nodes[0], &found_key, | 1302 | em = rb_entry(n, struct extent_map, rb_node); |
1275 | path->slots[0]); | 1303 | ret = em->start + em->len; |
1276 | if (found_key.objectid != objectid) | ||
1277 | *offset = 0; | ||
1278 | else { | ||
1279 | chunk = btrfs_item_ptr(path->nodes[0], path->slots[0], | ||
1280 | struct btrfs_chunk); | ||
1281 | *offset = found_key.offset + | ||
1282 | btrfs_chunk_length(path->nodes[0], chunk); | ||
1283 | } | ||
1284 | } | 1304 | } |
1285 | ret = 0; | 1305 | read_unlock(&em_tree->lock); |
1286 | error: | 1306 | |
1287 | btrfs_free_path(path); | ||
1288 | return ret; | 1307 | return ret; |
1289 | } | 1308 | } |
1290 | 1309 | ||
@@ -1462,31 +1481,23 @@ int btrfs_rm_device(struct btrfs_root *root, char *device_path) | |||
1462 | btrfs_dev_replace_unlock(&root->fs_info->dev_replace); | 1481 | btrfs_dev_replace_unlock(&root->fs_info->dev_replace); |
1463 | 1482 | ||
1464 | if ((all_avail & BTRFS_BLOCK_GROUP_RAID10) && num_devices <= 4) { | 1483 | if ((all_avail & BTRFS_BLOCK_GROUP_RAID10) && num_devices <= 4) { |
1465 | printk(KERN_ERR "btrfs: unable to go below four devices " | 1484 | ret = BTRFS_ERROR_DEV_RAID10_MIN_NOT_MET; |
1466 | "on raid10\n"); | ||
1467 | ret = -EINVAL; | ||
1468 | goto out; | 1485 | goto out; |
1469 | } | 1486 | } |
1470 | 1487 | ||
1471 | if ((all_avail & BTRFS_BLOCK_GROUP_RAID1) && num_devices <= 2) { | 1488 | if ((all_avail & BTRFS_BLOCK_GROUP_RAID1) && num_devices <= 2) { |
1472 | printk(KERN_ERR "btrfs: unable to go below two " | 1489 | ret = BTRFS_ERROR_DEV_RAID1_MIN_NOT_MET; |
1473 | "devices on raid1\n"); | ||
1474 | ret = -EINVAL; | ||
1475 | goto out; | 1490 | goto out; |
1476 | } | 1491 | } |
1477 | 1492 | ||
1478 | if ((all_avail & BTRFS_BLOCK_GROUP_RAID5) && | 1493 | if ((all_avail & BTRFS_BLOCK_GROUP_RAID5) && |
1479 | root->fs_info->fs_devices->rw_devices <= 2) { | 1494 | root->fs_info->fs_devices->rw_devices <= 2) { |
1480 | printk(KERN_ERR "btrfs: unable to go below two " | 1495 | ret = BTRFS_ERROR_DEV_RAID5_MIN_NOT_MET; |
1481 | "devices on raid5\n"); | ||
1482 | ret = -EINVAL; | ||
1483 | goto out; | 1496 | goto out; |
1484 | } | 1497 | } |
1485 | if ((all_avail & BTRFS_BLOCK_GROUP_RAID6) && | 1498 | if ((all_avail & BTRFS_BLOCK_GROUP_RAID6) && |
1486 | root->fs_info->fs_devices->rw_devices <= 3) { | 1499 | root->fs_info->fs_devices->rw_devices <= 3) { |
1487 | printk(KERN_ERR "btrfs: unable to go below three " | 1500 | ret = BTRFS_ERROR_DEV_RAID6_MIN_NOT_MET; |
1488 | "devices on raid6\n"); | ||
1489 | ret = -EINVAL; | ||
1490 | goto out; | 1501 | goto out; |
1491 | } | 1502 | } |
1492 | 1503 | ||
@@ -1512,8 +1523,7 @@ int btrfs_rm_device(struct btrfs_root *root, char *device_path) | |||
1512 | bh = NULL; | 1523 | bh = NULL; |
1513 | disk_super = NULL; | 1524 | disk_super = NULL; |
1514 | if (!device) { | 1525 | if (!device) { |
1515 | printk(KERN_ERR "btrfs: no missing devices found to " | 1526 | ret = BTRFS_ERROR_DEV_MISSING_NOT_FOUND; |
1516 | "remove\n"); | ||
1517 | goto out; | 1527 | goto out; |
1518 | } | 1528 | } |
1519 | } else { | 1529 | } else { |
@@ -1535,15 +1545,12 @@ int btrfs_rm_device(struct btrfs_root *root, char *device_path) | |||
1535 | } | 1545 | } |
1536 | 1546 | ||
1537 | if (device->is_tgtdev_for_dev_replace) { | 1547 | if (device->is_tgtdev_for_dev_replace) { |
1538 | pr_err("btrfs: unable to remove the dev_replace target dev\n"); | 1548 | ret = BTRFS_ERROR_DEV_TGT_REPLACE; |
1539 | ret = -EINVAL; | ||
1540 | goto error_brelse; | 1549 | goto error_brelse; |
1541 | } | 1550 | } |
1542 | 1551 | ||
1543 | if (device->writeable && root->fs_info->fs_devices->rw_devices == 1) { | 1552 | if (device->writeable && root->fs_info->fs_devices->rw_devices == 1) { |
1544 | printk(KERN_ERR "btrfs: unable to remove the only writeable " | 1553 | ret = BTRFS_ERROR_DEV_ONLY_WRITABLE; |
1545 | "device\n"); | ||
1546 | ret = -EINVAL; | ||
1547 | goto error_brelse; | 1554 | goto error_brelse; |
1548 | } | 1555 | } |
1549 | 1556 | ||
@@ -3295,10 +3302,7 @@ int btrfs_resume_balance_async(struct btrfs_fs_info *fs_info) | |||
3295 | } | 3302 | } |
3296 | 3303 | ||
3297 | tsk = kthread_run(balance_kthread, fs_info, "btrfs-balance"); | 3304 | tsk = kthread_run(balance_kthread, fs_info, "btrfs-balance"); |
3298 | if (IS_ERR(tsk)) | 3305 | return PTR_RET(tsk); |
3299 | return PTR_ERR(tsk); | ||
3300 | |||
3301 | return 0; | ||
3302 | } | 3306 | } |
3303 | 3307 | ||
3304 | int btrfs_recover_balance(struct btrfs_fs_info *fs_info) | 3308 | int btrfs_recover_balance(struct btrfs_fs_info *fs_info) |
@@ -3681,10 +3685,8 @@ static void check_raid56_incompat_flag(struct btrfs_fs_info *info, u64 type) | |||
3681 | } | 3685 | } |
3682 | 3686 | ||
3683 | static int __btrfs_alloc_chunk(struct btrfs_trans_handle *trans, | 3687 | static int __btrfs_alloc_chunk(struct btrfs_trans_handle *trans, |
3684 | struct btrfs_root *extent_root, | 3688 | struct btrfs_root *extent_root, u64 start, |
3685 | struct map_lookup **map_ret, | 3689 | u64 type) |
3686 | u64 *num_bytes_out, u64 *stripe_size_out, | ||
3687 | u64 start, u64 type) | ||
3688 | { | 3690 | { |
3689 | struct btrfs_fs_info *info = extent_root->fs_info; | 3691 | struct btrfs_fs_info *info = extent_root->fs_info; |
3690 | struct btrfs_fs_devices *fs_devices = info->fs_devices; | 3692 | struct btrfs_fs_devices *fs_devices = info->fs_devices; |
@@ -3791,7 +3793,7 @@ static int __btrfs_alloc_chunk(struct btrfs_trans_handle *trans, | |||
3791 | if (total_avail == 0) | 3793 | if (total_avail == 0) |
3792 | continue; | 3794 | continue; |
3793 | 3795 | ||
3794 | ret = find_free_dev_extent(device, | 3796 | ret = find_free_dev_extent(trans, device, |
3795 | max_stripe_size * dev_stripes, | 3797 | max_stripe_size * dev_stripes, |
3796 | &dev_offset, &max_avail); | 3798 | &dev_offset, &max_avail); |
3797 | if (ret && ret != -ENOSPC) | 3799 | if (ret && ret != -ENOSPC) |
@@ -3903,12 +3905,8 @@ static int __btrfs_alloc_chunk(struct btrfs_trans_handle *trans, | |||
3903 | map->type = type; | 3905 | map->type = type; |
3904 | map->sub_stripes = sub_stripes; | 3906 | map->sub_stripes = sub_stripes; |
3905 | 3907 | ||
3906 | *map_ret = map; | ||
3907 | num_bytes = stripe_size * data_stripes; | 3908 | num_bytes = stripe_size * data_stripes; |
3908 | 3909 | ||
3909 | *stripe_size_out = stripe_size; | ||
3910 | *num_bytes_out = num_bytes; | ||
3911 | |||
3912 | trace_btrfs_chunk_alloc(info->chunk_root, map, start, num_bytes); | 3910 | trace_btrfs_chunk_alloc(info->chunk_root, map, start, num_bytes); |
3913 | 3911 | ||
3914 | em = alloc_extent_map(); | 3912 | em = alloc_extent_map(); |
@@ -3921,38 +3919,26 @@ static int __btrfs_alloc_chunk(struct btrfs_trans_handle *trans, | |||
3921 | em->len = num_bytes; | 3919 | em->len = num_bytes; |
3922 | em->block_start = 0; | 3920 | em->block_start = 0; |
3923 | em->block_len = em->len; | 3921 | em->block_len = em->len; |
3922 | em->orig_block_len = stripe_size; | ||
3924 | 3923 | ||
3925 | em_tree = &extent_root->fs_info->mapping_tree.map_tree; | 3924 | em_tree = &extent_root->fs_info->mapping_tree.map_tree; |
3926 | write_lock(&em_tree->lock); | 3925 | write_lock(&em_tree->lock); |
3927 | ret = add_extent_mapping(em_tree, em, 0); | 3926 | ret = add_extent_mapping(em_tree, em, 0); |
3927 | if (!ret) { | ||
3928 | list_add_tail(&em->list, &trans->transaction->pending_chunks); | ||
3929 | atomic_inc(&em->refs); | ||
3930 | } | ||
3928 | write_unlock(&em_tree->lock); | 3931 | write_unlock(&em_tree->lock); |
3929 | if (ret) { | 3932 | if (ret) { |
3930 | free_extent_map(em); | 3933 | free_extent_map(em); |
3931 | goto error; | 3934 | goto error; |
3932 | } | 3935 | } |
3933 | 3936 | ||
3934 | for (i = 0; i < map->num_stripes; ++i) { | ||
3935 | struct btrfs_device *device; | ||
3936 | u64 dev_offset; | ||
3937 | |||
3938 | device = map->stripes[i].dev; | ||
3939 | dev_offset = map->stripes[i].physical; | ||
3940 | |||
3941 | ret = btrfs_alloc_dev_extent(trans, device, | ||
3942 | info->chunk_root->root_key.objectid, | ||
3943 | BTRFS_FIRST_CHUNK_TREE_OBJECTID, | ||
3944 | start, dev_offset, stripe_size); | ||
3945 | if (ret) | ||
3946 | goto error_dev_extent; | ||
3947 | } | ||
3948 | |||
3949 | ret = btrfs_make_block_group(trans, extent_root, 0, type, | 3937 | ret = btrfs_make_block_group(trans, extent_root, 0, type, |
3950 | BTRFS_FIRST_CHUNK_TREE_OBJECTID, | 3938 | BTRFS_FIRST_CHUNK_TREE_OBJECTID, |
3951 | start, num_bytes); | 3939 | start, num_bytes); |
3952 | if (ret) { | 3940 | if (ret) |
3953 | i = map->num_stripes - 1; | 3941 | goto error_del_extent; |
3954 | goto error_dev_extent; | ||
3955 | } | ||
3956 | 3942 | ||
3957 | free_extent_map(em); | 3943 | free_extent_map(em); |
3958 | check_raid56_incompat_flag(extent_root->fs_info, type); | 3944 | check_raid56_incompat_flag(extent_root->fs_info, type); |
@@ -3960,18 +3946,7 @@ static int __btrfs_alloc_chunk(struct btrfs_trans_handle *trans, | |||
3960 | kfree(devices_info); | 3946 | kfree(devices_info); |
3961 | return 0; | 3947 | return 0; |
3962 | 3948 | ||
3963 | error_dev_extent: | 3949 | error_del_extent: |
3964 | for (; i >= 0; i--) { | ||
3965 | struct btrfs_device *device; | ||
3966 | int err; | ||
3967 | |||
3968 | device = map->stripes[i].dev; | ||
3969 | err = btrfs_free_dev_extent(trans, device, start); | ||
3970 | if (err) { | ||
3971 | btrfs_abort_transaction(trans, extent_root, err); | ||
3972 | break; | ||
3973 | } | ||
3974 | } | ||
3975 | write_lock(&em_tree->lock); | 3950 | write_lock(&em_tree->lock); |
3976 | remove_extent_mapping(em_tree, em); | 3951 | remove_extent_mapping(em_tree, em); |
3977 | write_unlock(&em_tree->lock); | 3952 | write_unlock(&em_tree->lock); |
@@ -3986,33 +3961,68 @@ error: | |||
3986 | return ret; | 3961 | return ret; |
3987 | } | 3962 | } |
3988 | 3963 | ||
3989 | static int __finish_chunk_alloc(struct btrfs_trans_handle *trans, | 3964 | int btrfs_finish_chunk_alloc(struct btrfs_trans_handle *trans, |
3990 | struct btrfs_root *extent_root, | 3965 | struct btrfs_root *extent_root, |
3991 | struct map_lookup *map, u64 chunk_offset, | 3966 | u64 chunk_offset, u64 chunk_size) |
3992 | u64 chunk_size, u64 stripe_size) | ||
3993 | { | 3967 | { |
3994 | u64 dev_offset; | ||
3995 | struct btrfs_key key; | 3968 | struct btrfs_key key; |
3996 | struct btrfs_root *chunk_root = extent_root->fs_info->chunk_root; | 3969 | struct btrfs_root *chunk_root = extent_root->fs_info->chunk_root; |
3997 | struct btrfs_device *device; | 3970 | struct btrfs_device *device; |
3998 | struct btrfs_chunk *chunk; | 3971 | struct btrfs_chunk *chunk; |
3999 | struct btrfs_stripe *stripe; | 3972 | struct btrfs_stripe *stripe; |
4000 | size_t item_size = btrfs_chunk_item_size(map->num_stripes); | 3973 | struct extent_map_tree *em_tree; |
4001 | int index = 0; | 3974 | struct extent_map *em; |
3975 | struct map_lookup *map; | ||
3976 | size_t item_size; | ||
3977 | u64 dev_offset; | ||
3978 | u64 stripe_size; | ||
3979 | int i = 0; | ||
4002 | int ret; | 3980 | int ret; |
4003 | 3981 | ||
3982 | em_tree = &extent_root->fs_info->mapping_tree.map_tree; | ||
3983 | read_lock(&em_tree->lock); | ||
3984 | em = lookup_extent_mapping(em_tree, chunk_offset, chunk_size); | ||
3985 | read_unlock(&em_tree->lock); | ||
3986 | |||
3987 | if (!em) { | ||
3988 | btrfs_crit(extent_root->fs_info, "unable to find logical " | ||
3989 | "%Lu len %Lu", chunk_offset, chunk_size); | ||
3990 | return -EINVAL; | ||
3991 | } | ||
3992 | |||
3993 | if (em->start != chunk_offset || em->len != chunk_size) { | ||
3994 | btrfs_crit(extent_root->fs_info, "found a bad mapping, wanted" | ||
3995 | " %Lu-%Lu, found %Lu-%Lu\n", chunk_offset, | ||
3996 | chunk_size, em->start, em->len); | ||
3997 | free_extent_map(em); | ||
3998 | return -EINVAL; | ||
3999 | } | ||
4000 | |||
4001 | map = (struct map_lookup *)em->bdev; | ||
4002 | item_size = btrfs_chunk_item_size(map->num_stripes); | ||
4003 | stripe_size = em->orig_block_len; | ||
4004 | |||
4004 | chunk = kzalloc(item_size, GFP_NOFS); | 4005 | chunk = kzalloc(item_size, GFP_NOFS); |
4005 | if (!chunk) | 4006 | if (!chunk) { |
4006 | return -ENOMEM; | 4007 | ret = -ENOMEM; |
4008 | goto out; | ||
4009 | } | ||
4010 | |||
4011 | for (i = 0; i < map->num_stripes; i++) { | ||
4012 | device = map->stripes[i].dev; | ||
4013 | dev_offset = map->stripes[i].physical; | ||
4007 | 4014 | ||
4008 | index = 0; | ||
4009 | while (index < map->num_stripes) { | ||
4010 | device = map->stripes[index].dev; | ||
4011 | device->bytes_used += stripe_size; | 4015 | device->bytes_used += stripe_size; |
4012 | ret = btrfs_update_device(trans, device); | 4016 | ret = btrfs_update_device(trans, device); |
4013 | if (ret) | 4017 | if (ret) |
4014 | goto out_free; | 4018 | goto out; |
4015 | index++; | 4019 | ret = btrfs_alloc_dev_extent(trans, device, |
4020 | chunk_root->root_key.objectid, | ||
4021 | BTRFS_FIRST_CHUNK_TREE_OBJECTID, | ||
4022 | chunk_offset, dev_offset, | ||
4023 | stripe_size); | ||
4024 | if (ret) | ||
4025 | goto out; | ||
4016 | } | 4026 | } |
4017 | 4027 | ||
4018 | spin_lock(&extent_root->fs_info->free_chunk_lock); | 4028 | spin_lock(&extent_root->fs_info->free_chunk_lock); |
@@ -4020,17 +4030,15 @@ static int __finish_chunk_alloc(struct btrfs_trans_handle *trans, | |||
4020 | map->num_stripes); | 4030 | map->num_stripes); |
4021 | spin_unlock(&extent_root->fs_info->free_chunk_lock); | 4031 | spin_unlock(&extent_root->fs_info->free_chunk_lock); |
4022 | 4032 | ||
4023 | index = 0; | ||
4024 | stripe = &chunk->stripe; | 4033 | stripe = &chunk->stripe; |
4025 | while (index < map->num_stripes) { | 4034 | for (i = 0; i < map->num_stripes; i++) { |
4026 | device = map->stripes[index].dev; | 4035 | device = map->stripes[i].dev; |
4027 | dev_offset = map->stripes[index].physical; | 4036 | dev_offset = map->stripes[i].physical; |
4028 | 4037 | ||
4029 | btrfs_set_stack_stripe_devid(stripe, device->devid); | 4038 | btrfs_set_stack_stripe_devid(stripe, device->devid); |
4030 | btrfs_set_stack_stripe_offset(stripe, dev_offset); | 4039 | btrfs_set_stack_stripe_offset(stripe, dev_offset); |
4031 | memcpy(stripe->dev_uuid, device->uuid, BTRFS_UUID_SIZE); | 4040 | memcpy(stripe->dev_uuid, device->uuid, BTRFS_UUID_SIZE); |
4032 | stripe++; | 4041 | stripe++; |
4033 | index++; | ||
4034 | } | 4042 | } |
4035 | 4043 | ||
4036 | btrfs_set_stack_chunk_length(chunk, chunk_size); | 4044 | btrfs_set_stack_chunk_length(chunk, chunk_size); |
@@ -4048,7 +4056,6 @@ static int __finish_chunk_alloc(struct btrfs_trans_handle *trans, | |||
4048 | key.offset = chunk_offset; | 4056 | key.offset = chunk_offset; |
4049 | 4057 | ||
4050 | ret = btrfs_insert_item(trans, chunk_root, &key, chunk, item_size); | 4058 | ret = btrfs_insert_item(trans, chunk_root, &key, chunk, item_size); |
4051 | |||
4052 | if (ret == 0 && map->type & BTRFS_BLOCK_GROUP_SYSTEM) { | 4059 | if (ret == 0 && map->type & BTRFS_BLOCK_GROUP_SYSTEM) { |
4053 | /* | 4060 | /* |
4054 | * TODO: Cleanup of inserted chunk root in case of | 4061 | * TODO: Cleanup of inserted chunk root in case of |
@@ -4058,8 +4065,9 @@ static int __finish_chunk_alloc(struct btrfs_trans_handle *trans, | |||
4058 | item_size); | 4065 | item_size); |
4059 | } | 4066 | } |
4060 | 4067 | ||
4061 | out_free: | 4068 | out: |
4062 | kfree(chunk); | 4069 | kfree(chunk); |
4070 | free_extent_map(em); | ||
4063 | return ret; | 4071 | return ret; |
4064 | } | 4072 | } |
4065 | 4073 | ||
@@ -4074,27 +4082,9 @@ int btrfs_alloc_chunk(struct btrfs_trans_handle *trans, | |||
4074 | struct btrfs_root *extent_root, u64 type) | 4082 | struct btrfs_root *extent_root, u64 type) |
4075 | { | 4083 | { |
4076 | u64 chunk_offset; | 4084 | u64 chunk_offset; |
4077 | u64 chunk_size; | ||
4078 | u64 stripe_size; | ||
4079 | struct map_lookup *map; | ||
4080 | struct btrfs_root *chunk_root = extent_root->fs_info->chunk_root; | ||
4081 | int ret; | ||
4082 | |||
4083 | ret = find_next_chunk(chunk_root, BTRFS_FIRST_CHUNK_TREE_OBJECTID, | ||
4084 | &chunk_offset); | ||
4085 | if (ret) | ||
4086 | return ret; | ||
4087 | 4085 | ||
4088 | ret = __btrfs_alloc_chunk(trans, extent_root, &map, &chunk_size, | 4086 | chunk_offset = find_next_chunk(extent_root->fs_info); |
4089 | &stripe_size, chunk_offset, type); | 4087 | return __btrfs_alloc_chunk(trans, extent_root, chunk_offset, type); |
4090 | if (ret) | ||
4091 | return ret; | ||
4092 | |||
4093 | ret = __finish_chunk_alloc(trans, extent_root, map, chunk_offset, | ||
4094 | chunk_size, stripe_size); | ||
4095 | if (ret) | ||
4096 | return ret; | ||
4097 | return 0; | ||
4098 | } | 4088 | } |
4099 | 4089 | ||
4100 | static noinline int init_first_rw_device(struct btrfs_trans_handle *trans, | 4090 | static noinline int init_first_rw_device(struct btrfs_trans_handle *trans, |
@@ -4103,66 +4093,31 @@ static noinline int init_first_rw_device(struct btrfs_trans_handle *trans, | |||
4103 | { | 4093 | { |
4104 | u64 chunk_offset; | 4094 | u64 chunk_offset; |
4105 | u64 sys_chunk_offset; | 4095 | u64 sys_chunk_offset; |
4106 | u64 chunk_size; | ||
4107 | u64 sys_chunk_size; | ||
4108 | u64 stripe_size; | ||
4109 | u64 sys_stripe_size; | ||
4110 | u64 alloc_profile; | 4096 | u64 alloc_profile; |
4111 | struct map_lookup *map; | ||
4112 | struct map_lookup *sys_map; | ||
4113 | struct btrfs_fs_info *fs_info = root->fs_info; | 4097 | struct btrfs_fs_info *fs_info = root->fs_info; |
4114 | struct btrfs_root *extent_root = fs_info->extent_root; | 4098 | struct btrfs_root *extent_root = fs_info->extent_root; |
4115 | int ret; | 4099 | int ret; |
4116 | 4100 | ||
4117 | ret = find_next_chunk(fs_info->chunk_root, | 4101 | chunk_offset = find_next_chunk(fs_info); |
4118 | BTRFS_FIRST_CHUNK_TREE_OBJECTID, &chunk_offset); | ||
4119 | if (ret) | ||
4120 | return ret; | ||
4121 | |||
4122 | alloc_profile = btrfs_get_alloc_profile(extent_root, 0); | 4102 | alloc_profile = btrfs_get_alloc_profile(extent_root, 0); |
4123 | ret = __btrfs_alloc_chunk(trans, extent_root, &map, &chunk_size, | 4103 | ret = __btrfs_alloc_chunk(trans, extent_root, chunk_offset, |
4124 | &stripe_size, chunk_offset, alloc_profile); | 4104 | alloc_profile); |
4125 | if (ret) | 4105 | if (ret) |
4126 | return ret; | 4106 | return ret; |
4127 | 4107 | ||
4128 | sys_chunk_offset = chunk_offset + chunk_size; | 4108 | sys_chunk_offset = find_next_chunk(root->fs_info); |
4129 | |||
4130 | alloc_profile = btrfs_get_alloc_profile(fs_info->chunk_root, 0); | 4109 | alloc_profile = btrfs_get_alloc_profile(fs_info->chunk_root, 0); |
4131 | ret = __btrfs_alloc_chunk(trans, extent_root, &sys_map, | 4110 | ret = __btrfs_alloc_chunk(trans, extent_root, sys_chunk_offset, |
4132 | &sys_chunk_size, &sys_stripe_size, | 4111 | alloc_profile); |
4133 | sys_chunk_offset, alloc_profile); | ||
4134 | if (ret) { | 4112 | if (ret) { |
4135 | btrfs_abort_transaction(trans, root, ret); | 4113 | btrfs_abort_transaction(trans, root, ret); |
4136 | goto out; | 4114 | goto out; |
4137 | } | 4115 | } |
4138 | 4116 | ||
4139 | ret = btrfs_add_device(trans, fs_info->chunk_root, device); | 4117 | ret = btrfs_add_device(trans, fs_info->chunk_root, device); |
4140 | if (ret) { | ||
4141 | btrfs_abort_transaction(trans, root, ret); | ||
4142 | goto out; | ||
4143 | } | ||
4144 | |||
4145 | /* | ||
4146 | * Modifying chunk tree needs allocating new blocks from both | ||
4147 | * system block group and metadata block group. So we only can | ||
4148 | * do operations require modifying the chunk tree after both | ||
4149 | * block groups were created. | ||
4150 | */ | ||
4151 | ret = __finish_chunk_alloc(trans, extent_root, map, chunk_offset, | ||
4152 | chunk_size, stripe_size); | ||
4153 | if (ret) { | ||
4154 | btrfs_abort_transaction(trans, root, ret); | ||
4155 | goto out; | ||
4156 | } | ||
4157 | |||
4158 | ret = __finish_chunk_alloc(trans, extent_root, sys_map, | ||
4159 | sys_chunk_offset, sys_chunk_size, | ||
4160 | sys_stripe_size); | ||
4161 | if (ret) | 4118 | if (ret) |
4162 | btrfs_abort_transaction(trans, root, ret); | 4119 | btrfs_abort_transaction(trans, root, ret); |
4163 | |||
4164 | out: | 4120 | out: |
4165 | |||
4166 | return ret; | 4121 | return ret; |
4167 | } | 4122 | } |
4168 | 4123 | ||
@@ -4435,9 +4390,6 @@ static int __btrfs_map_block(struct btrfs_fs_info *fs_info, int rw, | |||
4435 | map = (struct map_lookup *)em->bdev; | 4390 | map = (struct map_lookup *)em->bdev; |
4436 | offset = logical - em->start; | 4391 | offset = logical - em->start; |
4437 | 4392 | ||
4438 | if (mirror_num > map->num_stripes) | ||
4439 | mirror_num = 0; | ||
4440 | |||
4441 | stripe_len = map->stripe_len; | 4393 | stripe_len = map->stripe_len; |
4442 | stripe_nr = offset; | 4394 | stripe_nr = offset; |
4443 | /* | 4395 | /* |
@@ -5367,7 +5319,6 @@ static struct btrfs_device *add_missing_dev(struct btrfs_root *root, | |||
5367 | return NULL; | 5319 | return NULL; |
5368 | list_add(&device->dev_list, | 5320 | list_add(&device->dev_list, |
5369 | &fs_devices->devices); | 5321 | &fs_devices->devices); |
5370 | device->dev_root = root->fs_info->dev_root; | ||
5371 | device->devid = devid; | 5322 | device->devid = devid; |
5372 | device->work.func = pending_bios_fn; | 5323 | device->work.func = pending_bios_fn; |
5373 | device->fs_devices = fs_devices; | 5324 | device->fs_devices = fs_devices; |
@@ -5593,7 +5544,6 @@ static int read_one_dev(struct btrfs_root *root, | |||
5593 | } | 5544 | } |
5594 | 5545 | ||
5595 | fill_device_from_item(leaf, dev_item, device); | 5546 | fill_device_from_item(leaf, dev_item, device); |
5596 | device->dev_root = root->fs_info->dev_root; | ||
5597 | device->in_fs_metadata = 1; | 5547 | device->in_fs_metadata = 1; |
5598 | if (device->writeable && !device->is_tgtdev_for_dev_replace) { | 5548 | if (device->writeable && !device->is_tgtdev_for_dev_replace) { |
5599 | device->fs_devices->total_rw_bytes += device->total_bytes; | 5549 | device->fs_devices->total_rw_bytes += device->total_bytes; |
@@ -5751,6 +5701,17 @@ error: | |||
5751 | return ret; | 5701 | return ret; |
5752 | } | 5702 | } |
5753 | 5703 | ||
5704 | void btrfs_init_devices_late(struct btrfs_fs_info *fs_info) | ||
5705 | { | ||
5706 | struct btrfs_fs_devices *fs_devices = fs_info->fs_devices; | ||
5707 | struct btrfs_device *device; | ||
5708 | |||
5709 | mutex_lock(&fs_devices->device_list_mutex); | ||
5710 | list_for_each_entry(device, &fs_devices->devices, dev_list) | ||
5711 | device->dev_root = fs_info->dev_root; | ||
5712 | mutex_unlock(&fs_devices->device_list_mutex); | ||
5713 | } | ||
5714 | |||
5754 | static void __btrfs_reset_dev_stats(struct btrfs_device *dev) | 5715 | static void __btrfs_reset_dev_stats(struct btrfs_device *dev) |
5755 | { | 5716 | { |
5756 | int i; | 5717 | int i; |
diff --git a/fs/btrfs/volumes.h b/fs/btrfs/volumes.h index f6247e2a47f7..86705583480d 100644 --- a/fs/btrfs/volumes.h +++ b/fs/btrfs/volumes.h | |||
@@ -316,11 +316,13 @@ int btrfs_recover_balance(struct btrfs_fs_info *fs_info); | |||
316 | int btrfs_pause_balance(struct btrfs_fs_info *fs_info); | 316 | int btrfs_pause_balance(struct btrfs_fs_info *fs_info); |
317 | int btrfs_cancel_balance(struct btrfs_fs_info *fs_info); | 317 | int btrfs_cancel_balance(struct btrfs_fs_info *fs_info); |
318 | int btrfs_chunk_readonly(struct btrfs_root *root, u64 chunk_offset); | 318 | int btrfs_chunk_readonly(struct btrfs_root *root, u64 chunk_offset); |
319 | int find_free_dev_extent(struct btrfs_device *device, u64 num_bytes, | 319 | int find_free_dev_extent(struct btrfs_trans_handle *trans, |
320 | struct btrfs_device *device, u64 num_bytes, | ||
320 | u64 *start, u64 *max_avail); | 321 | u64 *start, u64 *max_avail); |
321 | void btrfs_dev_stat_inc_and_print(struct btrfs_device *dev, int index); | 322 | void btrfs_dev_stat_inc_and_print(struct btrfs_device *dev, int index); |
322 | int btrfs_get_dev_stats(struct btrfs_root *root, | 323 | int btrfs_get_dev_stats(struct btrfs_root *root, |
323 | struct btrfs_ioctl_get_dev_stats *stats); | 324 | struct btrfs_ioctl_get_dev_stats *stats); |
325 | void btrfs_init_devices_late(struct btrfs_fs_info *fs_info); | ||
324 | int btrfs_init_dev_stats(struct btrfs_fs_info *fs_info); | 326 | int btrfs_init_dev_stats(struct btrfs_fs_info *fs_info); |
325 | int btrfs_run_dev_stats(struct btrfs_trans_handle *trans, | 327 | int btrfs_run_dev_stats(struct btrfs_trans_handle *trans, |
326 | struct btrfs_fs_info *fs_info); | 328 | struct btrfs_fs_info *fs_info); |
@@ -336,6 +338,9 @@ int btrfs_is_parity_mirror(struct btrfs_mapping_tree *map_tree, | |||
336 | unsigned long btrfs_full_stripe_len(struct btrfs_root *root, | 338 | unsigned long btrfs_full_stripe_len(struct btrfs_root *root, |
337 | struct btrfs_mapping_tree *map_tree, | 339 | struct btrfs_mapping_tree *map_tree, |
338 | u64 logical); | 340 | u64 logical); |
341 | int btrfs_finish_chunk_alloc(struct btrfs_trans_handle *trans, | ||
342 | struct btrfs_root *extent_root, | ||
343 | u64 chunk_offset, u64 chunk_size); | ||
339 | static inline void btrfs_dev_stat_inc(struct btrfs_device *dev, | 344 | static inline void btrfs_dev_stat_inc(struct btrfs_device *dev, |
340 | int index) | 345 | int index) |
341 | { | 346 | { |
diff --git a/fs/buffer.c b/fs/buffer.c index d2a4d1bb2d57..4d7433534f5c 100644 --- a/fs/buffer.c +++ b/fs/buffer.c | |||
@@ -83,6 +83,40 @@ void unlock_buffer(struct buffer_head *bh) | |||
83 | EXPORT_SYMBOL(unlock_buffer); | 83 | EXPORT_SYMBOL(unlock_buffer); |
84 | 84 | ||
85 | /* | 85 | /* |
86 | * Returns if the page has dirty or writeback buffers. If all the buffers | ||
87 | * are unlocked and clean then the PageDirty information is stale. If | ||
88 | * any of the pages are locked, it is assumed they are locked for IO. | ||
89 | */ | ||
90 | void buffer_check_dirty_writeback(struct page *page, | ||
91 | bool *dirty, bool *writeback) | ||
92 | { | ||
93 | struct buffer_head *head, *bh; | ||
94 | *dirty = false; | ||
95 | *writeback = false; | ||
96 | |||
97 | BUG_ON(!PageLocked(page)); | ||
98 | |||
99 | if (!page_has_buffers(page)) | ||
100 | return; | ||
101 | |||
102 | if (PageWriteback(page)) | ||
103 | *writeback = true; | ||
104 | |||
105 | head = page_buffers(page); | ||
106 | bh = head; | ||
107 | do { | ||
108 | if (buffer_locked(bh)) | ||
109 | *writeback = true; | ||
110 | |||
111 | if (buffer_dirty(bh)) | ||
112 | *dirty = true; | ||
113 | |||
114 | bh = bh->b_this_page; | ||
115 | } while (bh != head); | ||
116 | } | ||
117 | EXPORT_SYMBOL(buffer_check_dirty_writeback); | ||
118 | |||
119 | /* | ||
86 | * Block until a buffer comes unlocked. This doesn't stop it | 120 | * Block until a buffer comes unlocked. This doesn't stop it |
87 | * from becoming locked again - you have to lock it yourself | 121 | * from becoming locked again - you have to lock it yourself |
88 | * if you want to preserve its state. | 122 | * if you want to preserve its state. |
@@ -1454,7 +1488,8 @@ static void discard_buffer(struct buffer_head * bh) | |||
1454 | * block_invalidatepage - invalidate part or all of a buffer-backed page | 1488 | * block_invalidatepage - invalidate part or all of a buffer-backed page |
1455 | * | 1489 | * |
1456 | * @page: the page which is affected | 1490 | * @page: the page which is affected |
1457 | * @offset: the index of the truncation point | 1491 | * @offset: start of the range to invalidate |
1492 | * @length: length of the range to invalidate | ||
1458 | * | 1493 | * |
1459 | * block_invalidatepage() is called when all or part of the page has become | 1494 | * block_invalidatepage() is called when all or part of the page has become |
1460 | * invalidated by a truncate operation. | 1495 | * invalidated by a truncate operation. |
@@ -1465,15 +1500,22 @@ static void discard_buffer(struct buffer_head * bh) | |||
1465 | * point. Because the caller is about to free (and possibly reuse) those | 1500 | * point. Because the caller is about to free (and possibly reuse) those |
1466 | * blocks on-disk. | 1501 | * blocks on-disk. |
1467 | */ | 1502 | */ |
1468 | void block_invalidatepage(struct page *page, unsigned long offset) | 1503 | void block_invalidatepage(struct page *page, unsigned int offset, |
1504 | unsigned int length) | ||
1469 | { | 1505 | { |
1470 | struct buffer_head *head, *bh, *next; | 1506 | struct buffer_head *head, *bh, *next; |
1471 | unsigned int curr_off = 0; | 1507 | unsigned int curr_off = 0; |
1508 | unsigned int stop = length + offset; | ||
1472 | 1509 | ||
1473 | BUG_ON(!PageLocked(page)); | 1510 | BUG_ON(!PageLocked(page)); |
1474 | if (!page_has_buffers(page)) | 1511 | if (!page_has_buffers(page)) |
1475 | goto out; | 1512 | goto out; |
1476 | 1513 | ||
1514 | /* | ||
1515 | * Check for overflow | ||
1516 | */ | ||
1517 | BUG_ON(stop > PAGE_CACHE_SIZE || stop < length); | ||
1518 | |||
1477 | head = page_buffers(page); | 1519 | head = page_buffers(page); |
1478 | bh = head; | 1520 | bh = head; |
1479 | do { | 1521 | do { |
@@ -1481,6 +1523,12 @@ void block_invalidatepage(struct page *page, unsigned long offset) | |||
1481 | next = bh->b_this_page; | 1523 | next = bh->b_this_page; |
1482 | 1524 | ||
1483 | /* | 1525 | /* |
1526 | * Are we still fully in range ? | ||
1527 | */ | ||
1528 | if (next_off > stop) | ||
1529 | goto out; | ||
1530 | |||
1531 | /* | ||
1484 | * is this block fully invalidated? | 1532 | * is this block fully invalidated? |
1485 | */ | 1533 | */ |
1486 | if (offset <= curr_off) | 1534 | if (offset <= curr_off) |
@@ -1501,6 +1549,7 @@ out: | |||
1501 | } | 1549 | } |
1502 | EXPORT_SYMBOL(block_invalidatepage); | 1550 | EXPORT_SYMBOL(block_invalidatepage); |
1503 | 1551 | ||
1552 | |||
1504 | /* | 1553 | /* |
1505 | * We attach and possibly dirty the buffers atomically wrt | 1554 | * We attach and possibly dirty the buffers atomically wrt |
1506 | * __set_page_dirty_buffers() via private_lock. try_to_free_buffers | 1555 | * __set_page_dirty_buffers() via private_lock. try_to_free_buffers |
@@ -2841,7 +2890,7 @@ int block_write_full_page_endio(struct page *page, get_block_t *get_block, | |||
2841 | * they may have been added in ext3_writepage(). Make them | 2890 | * they may have been added in ext3_writepage(). Make them |
2842 | * freeable here, so the page does not leak. | 2891 | * freeable here, so the page does not leak. |
2843 | */ | 2892 | */ |
2844 | do_invalidatepage(page, 0); | 2893 | do_invalidatepage(page, 0, PAGE_CACHE_SIZE); |
2845 | unlock_page(page); | 2894 | unlock_page(page); |
2846 | return 0; /* don't care */ | 2895 | return 0; /* don't care */ |
2847 | } | 2896 | } |
diff --git a/fs/cachefiles/interface.c b/fs/cachefiles/interface.c index 746ce532e130..d4c1206af9fc 100644 --- a/fs/cachefiles/interface.c +++ b/fs/cachefiles/interface.c | |||
@@ -13,8 +13,6 @@ | |||
13 | #include <linux/mount.h> | 13 | #include <linux/mount.h> |
14 | #include "internal.h" | 14 | #include "internal.h" |
15 | 15 | ||
16 | #define list_to_page(head) (list_entry((head)->prev, struct page, lru)) | ||
17 | |||
18 | struct cachefiles_lookup_data { | 16 | struct cachefiles_lookup_data { |
19 | struct cachefiles_xattr *auxdata; /* auxiliary data */ | 17 | struct cachefiles_xattr *auxdata; /* auxiliary data */ |
20 | char *key; /* key path */ | 18 | char *key; /* key path */ |
@@ -212,20 +210,29 @@ static void cachefiles_update_object(struct fscache_object *_object) | |||
212 | object = container_of(_object, struct cachefiles_object, fscache); | 210 | object = container_of(_object, struct cachefiles_object, fscache); |
213 | cache = container_of(object->fscache.cache, struct cachefiles_cache, | 211 | cache = container_of(object->fscache.cache, struct cachefiles_cache, |
214 | cache); | 212 | cache); |
213 | |||
214 | if (!fscache_use_cookie(_object)) { | ||
215 | _leave(" [relinq]"); | ||
216 | return; | ||
217 | } | ||
218 | |||
215 | cookie = object->fscache.cookie; | 219 | cookie = object->fscache.cookie; |
216 | 220 | ||
217 | if (!cookie->def->get_aux) { | 221 | if (!cookie->def->get_aux) { |
222 | fscache_unuse_cookie(_object); | ||
218 | _leave(" [no aux]"); | 223 | _leave(" [no aux]"); |
219 | return; | 224 | return; |
220 | } | 225 | } |
221 | 226 | ||
222 | auxdata = kmalloc(2 + 512 + 3, cachefiles_gfp); | 227 | auxdata = kmalloc(2 + 512 + 3, cachefiles_gfp); |
223 | if (!auxdata) { | 228 | if (!auxdata) { |
229 | fscache_unuse_cookie(_object); | ||
224 | _leave(" [nomem]"); | 230 | _leave(" [nomem]"); |
225 | return; | 231 | return; |
226 | } | 232 | } |
227 | 233 | ||
228 | auxlen = cookie->def->get_aux(cookie->netfs_data, auxdata->data, 511); | 234 | auxlen = cookie->def->get_aux(cookie->netfs_data, auxdata->data, 511); |
235 | fscache_unuse_cookie(_object); | ||
229 | ASSERTCMP(auxlen, <, 511); | 236 | ASSERTCMP(auxlen, <, 511); |
230 | 237 | ||
231 | auxdata->len = auxlen + 1; | 238 | auxdata->len = auxlen + 1; |
@@ -263,7 +270,7 @@ static void cachefiles_drop_object(struct fscache_object *_object) | |||
263 | #endif | 270 | #endif |
264 | 271 | ||
265 | /* delete retired objects */ | 272 | /* delete retired objects */ |
266 | if (object->fscache.state == FSCACHE_OBJECT_RECYCLING && | 273 | if (test_bit(FSCACHE_COOKIE_RETIRED, &object->fscache.cookie->flags) && |
267 | _object != cache->cache.fsdef | 274 | _object != cache->cache.fsdef |
268 | ) { | 275 | ) { |
269 | _debug("- retire object OBJ%x", object->fscache.debug_id); | 276 | _debug("- retire object OBJ%x", object->fscache.debug_id); |
diff --git a/fs/cachefiles/namei.c b/fs/cachefiles/namei.c index 8c01c5fcdf75..25badd1aec5c 100644 --- a/fs/cachefiles/namei.c +++ b/fs/cachefiles/namei.c | |||
@@ -38,7 +38,7 @@ void __cachefiles_printk_object(struct cachefiles_object *object, | |||
38 | printk(KERN_ERR "%sobject: OBJ%x\n", | 38 | printk(KERN_ERR "%sobject: OBJ%x\n", |
39 | prefix, object->fscache.debug_id); | 39 | prefix, object->fscache.debug_id); |
40 | printk(KERN_ERR "%sobjstate=%s fl=%lx wbusy=%x ev=%lx[%lx]\n", | 40 | printk(KERN_ERR "%sobjstate=%s fl=%lx wbusy=%x ev=%lx[%lx]\n", |
41 | prefix, fscache_object_states[object->fscache.state], | 41 | prefix, object->fscache.state->name, |
42 | object->fscache.flags, work_busy(&object->fscache.work), | 42 | object->fscache.flags, work_busy(&object->fscache.work), |
43 | object->fscache.events, object->fscache.event_mask); | 43 | object->fscache.events, object->fscache.event_mask); |
44 | printk(KERN_ERR "%sops=%u inp=%u exc=%u\n", | 44 | printk(KERN_ERR "%sops=%u inp=%u exc=%u\n", |
@@ -127,10 +127,10 @@ static void cachefiles_mark_object_buried(struct cachefiles_cache *cache, | |||
127 | found_dentry: | 127 | found_dentry: |
128 | kdebug("preemptive burial: OBJ%x [%s] %p", | 128 | kdebug("preemptive burial: OBJ%x [%s] %p", |
129 | object->fscache.debug_id, | 129 | object->fscache.debug_id, |
130 | fscache_object_states[object->fscache.state], | 130 | object->fscache.state->name, |
131 | dentry); | 131 | dentry); |
132 | 132 | ||
133 | if (object->fscache.state < FSCACHE_OBJECT_DYING) { | 133 | if (fscache_object_is_live(&object->fscache)) { |
134 | printk(KERN_ERR "\n"); | 134 | printk(KERN_ERR "\n"); |
135 | printk(KERN_ERR "CacheFiles: Error:" | 135 | printk(KERN_ERR "CacheFiles: Error:" |
136 | " Can't preemptively bury live object\n"); | 136 | " Can't preemptively bury live object\n"); |
@@ -192,7 +192,7 @@ try_again: | |||
192 | /* an old object from a previous incarnation is hogging the slot - we | 192 | /* an old object from a previous incarnation is hogging the slot - we |
193 | * need to wait for it to be destroyed */ | 193 | * need to wait for it to be destroyed */ |
194 | wait_for_old_object: | 194 | wait_for_old_object: |
195 | if (xobject->fscache.state < FSCACHE_OBJECT_DYING) { | 195 | if (fscache_object_is_live(&object->fscache)) { |
196 | printk(KERN_ERR "\n"); | 196 | printk(KERN_ERR "\n"); |
197 | printk(KERN_ERR "CacheFiles: Error:" | 197 | printk(KERN_ERR "CacheFiles: Error:" |
198 | " Unexpected object collision\n"); | 198 | " Unexpected object collision\n"); |
@@ -836,7 +836,7 @@ static struct dentry *cachefiles_check_active(struct cachefiles_cache *cache, | |||
836 | // dir->d_name.len, dir->d_name.len, dir->d_name.name, filename); | 836 | // dir->d_name.len, dir->d_name.len, dir->d_name.name, filename); |
837 | 837 | ||
838 | /* look up the victim */ | 838 | /* look up the victim */ |
839 | mutex_lock_nested(&dir->d_inode->i_mutex, 1); | 839 | mutex_lock_nested(&dir->d_inode->i_mutex, I_MUTEX_PARENT); |
840 | 840 | ||
841 | start = jiffies; | 841 | start = jiffies; |
842 | victim = lookup_one_len(filename, dir, strlen(filename)); | 842 | victim = lookup_one_len(filename, dir, strlen(filename)); |
diff --git a/fs/cachefiles/rdwr.c b/fs/cachefiles/rdwr.c index 317f9ee9c991..ebaff368120d 100644 --- a/fs/cachefiles/rdwr.c +++ b/fs/cachefiles/rdwr.c | |||
@@ -12,6 +12,7 @@ | |||
12 | #include <linux/mount.h> | 12 | #include <linux/mount.h> |
13 | #include <linux/slab.h> | 13 | #include <linux/slab.h> |
14 | #include <linux/file.h> | 14 | #include <linux/file.h> |
15 | #include <linux/swap.h> | ||
15 | #include "internal.h" | 16 | #include "internal.h" |
16 | 17 | ||
17 | /* | 18 | /* |
@@ -227,8 +228,7 @@ static void cachefiles_read_copier(struct fscache_operation *_op) | |||
227 | */ | 228 | */ |
228 | static int cachefiles_read_backing_file_one(struct cachefiles_object *object, | 229 | static int cachefiles_read_backing_file_one(struct cachefiles_object *object, |
229 | struct fscache_retrieval *op, | 230 | struct fscache_retrieval *op, |
230 | struct page *netpage, | 231 | struct page *netpage) |
231 | struct pagevec *pagevec) | ||
232 | { | 232 | { |
233 | struct cachefiles_one_read *monitor; | 233 | struct cachefiles_one_read *monitor; |
234 | struct address_space *bmapping; | 234 | struct address_space *bmapping; |
@@ -237,8 +237,6 @@ static int cachefiles_read_backing_file_one(struct cachefiles_object *object, | |||
237 | 237 | ||
238 | _enter(""); | 238 | _enter(""); |
239 | 239 | ||
240 | pagevec_reinit(pagevec); | ||
241 | |||
242 | _debug("read back %p{%lu,%d}", | 240 | _debug("read back %p{%lu,%d}", |
243 | netpage, netpage->index, page_count(netpage)); | 241 | netpage, netpage->index, page_count(netpage)); |
244 | 242 | ||
@@ -283,9 +281,7 @@ installed_new_backing_page: | |||
283 | backpage = newpage; | 281 | backpage = newpage; |
284 | newpage = NULL; | 282 | newpage = NULL; |
285 | 283 | ||
286 | page_cache_get(backpage); | 284 | lru_cache_add_file(backpage); |
287 | pagevec_add(pagevec, backpage); | ||
288 | __pagevec_lru_add_file(pagevec); | ||
289 | 285 | ||
290 | read_backing_page: | 286 | read_backing_page: |
291 | ret = bmapping->a_ops->readpage(NULL, backpage); | 287 | ret = bmapping->a_ops->readpage(NULL, backpage); |
@@ -452,8 +448,7 @@ int cachefiles_read_or_alloc_page(struct fscache_retrieval *op, | |||
452 | if (block) { | 448 | if (block) { |
453 | /* submit the apparently valid page to the backing fs to be | 449 | /* submit the apparently valid page to the backing fs to be |
454 | * read from disk */ | 450 | * read from disk */ |
455 | ret = cachefiles_read_backing_file_one(object, op, page, | 451 | ret = cachefiles_read_backing_file_one(object, op, page); |
456 | &pagevec); | ||
457 | } else if (cachefiles_has_space(cache, 0, 1) == 0) { | 452 | } else if (cachefiles_has_space(cache, 0, 1) == 0) { |
458 | /* there's space in the cache we can use */ | 453 | /* there's space in the cache we can use */ |
459 | fscache_mark_page_cached(op, page); | 454 | fscache_mark_page_cached(op, page); |
@@ -482,14 +477,11 @@ static int cachefiles_read_backing_file(struct cachefiles_object *object, | |||
482 | { | 477 | { |
483 | struct cachefiles_one_read *monitor = NULL; | 478 | struct cachefiles_one_read *monitor = NULL; |
484 | struct address_space *bmapping = object->backer->d_inode->i_mapping; | 479 | struct address_space *bmapping = object->backer->d_inode->i_mapping; |
485 | struct pagevec lru_pvec; | ||
486 | struct page *newpage = NULL, *netpage, *_n, *backpage = NULL; | 480 | struct page *newpage = NULL, *netpage, *_n, *backpage = NULL; |
487 | int ret = 0; | 481 | int ret = 0; |
488 | 482 | ||
489 | _enter(""); | 483 | _enter(""); |
490 | 484 | ||
491 | pagevec_init(&lru_pvec, 0); | ||
492 | |||
493 | list_for_each_entry_safe(netpage, _n, list, lru) { | 485 | list_for_each_entry_safe(netpage, _n, list, lru) { |
494 | list_del(&netpage->lru); | 486 | list_del(&netpage->lru); |
495 | 487 | ||
@@ -534,9 +526,7 @@ static int cachefiles_read_backing_file(struct cachefiles_object *object, | |||
534 | backpage = newpage; | 526 | backpage = newpage; |
535 | newpage = NULL; | 527 | newpage = NULL; |
536 | 528 | ||
537 | page_cache_get(backpage); | 529 | lru_cache_add_file(backpage); |
538 | if (!pagevec_add(&lru_pvec, backpage)) | ||
539 | __pagevec_lru_add_file(&lru_pvec); | ||
540 | 530 | ||
541 | reread_backing_page: | 531 | reread_backing_page: |
542 | ret = bmapping->a_ops->readpage(NULL, backpage); | 532 | ret = bmapping->a_ops->readpage(NULL, backpage); |
@@ -559,9 +549,7 @@ static int cachefiles_read_backing_file(struct cachefiles_object *object, | |||
559 | goto nomem; | 549 | goto nomem; |
560 | } | 550 | } |
561 | 551 | ||
562 | page_cache_get(netpage); | 552 | lru_cache_add_file(netpage); |
563 | if (!pagevec_add(&lru_pvec, netpage)) | ||
564 | __pagevec_lru_add_file(&lru_pvec); | ||
565 | 553 | ||
566 | /* install a monitor */ | 554 | /* install a monitor */ |
567 | page_cache_get(netpage); | 555 | page_cache_get(netpage); |
@@ -643,9 +631,7 @@ static int cachefiles_read_backing_file(struct cachefiles_object *object, | |||
643 | 631 | ||
644 | fscache_mark_page_cached(op, netpage); | 632 | fscache_mark_page_cached(op, netpage); |
645 | 633 | ||
646 | page_cache_get(netpage); | 634 | lru_cache_add_file(netpage); |
647 | if (!pagevec_add(&lru_pvec, netpage)) | ||
648 | __pagevec_lru_add_file(&lru_pvec); | ||
649 | 635 | ||
650 | /* the netpage is unlocked and marked up to date here */ | 636 | /* the netpage is unlocked and marked up to date here */ |
651 | fscache_end_io(op, netpage, 0); | 637 | fscache_end_io(op, netpage, 0); |
@@ -661,8 +647,6 @@ static int cachefiles_read_backing_file(struct cachefiles_object *object, | |||
661 | 647 | ||
662 | out: | 648 | out: |
663 | /* tidy up */ | 649 | /* tidy up */ |
664 | pagevec_lru_add_file(&lru_pvec); | ||
665 | |||
666 | if (newpage) | 650 | if (newpage) |
667 | page_cache_release(newpage); | 651 | page_cache_release(newpage); |
668 | if (netpage) | 652 | if (netpage) |
diff --git a/fs/cachefiles/xattr.c b/fs/cachefiles/xattr.c index 73b46288b54b..2476e5162609 100644 --- a/fs/cachefiles/xattr.c +++ b/fs/cachefiles/xattr.c | |||
@@ -109,13 +109,12 @@ int cachefiles_set_object_xattr(struct cachefiles_object *object, | |||
109 | struct dentry *dentry = object->dentry; | 109 | struct dentry *dentry = object->dentry; |
110 | int ret; | 110 | int ret; |
111 | 111 | ||
112 | ASSERT(object->fscache.cookie); | ||
113 | ASSERT(dentry); | 112 | ASSERT(dentry); |
114 | 113 | ||
115 | _enter("%p,#%d", object, auxdata->len); | 114 | _enter("%p,#%d", object, auxdata->len); |
116 | 115 | ||
117 | /* attempt to install the cache metadata directly */ | 116 | /* attempt to install the cache metadata directly */ |
118 | _debug("SET %s #%u", object->fscache.cookie->def->name, auxdata->len); | 117 | _debug("SET #%u", auxdata->len); |
119 | 118 | ||
120 | ret = vfs_setxattr(dentry, cachefiles_xattr_cache, | 119 | ret = vfs_setxattr(dentry, cachefiles_xattr_cache, |
121 | &auxdata->type, auxdata->len, | 120 | &auxdata->type, auxdata->len, |
@@ -138,13 +137,12 @@ int cachefiles_update_object_xattr(struct cachefiles_object *object, | |||
138 | struct dentry *dentry = object->dentry; | 137 | struct dentry *dentry = object->dentry; |
139 | int ret; | 138 | int ret; |
140 | 139 | ||
141 | ASSERT(object->fscache.cookie); | ||
142 | ASSERT(dentry); | 140 | ASSERT(dentry); |
143 | 141 | ||
144 | _enter("%p,#%d", object, auxdata->len); | 142 | _enter("%p,#%d", object, auxdata->len); |
145 | 143 | ||
146 | /* attempt to install the cache metadata directly */ | 144 | /* attempt to install the cache metadata directly */ |
147 | _debug("SET %s #%u", object->fscache.cookie->def->name, auxdata->len); | 145 | _debug("SET #%u", auxdata->len); |
148 | 146 | ||
149 | ret = vfs_setxattr(dentry, cachefiles_xattr_cache, | 147 | ret = vfs_setxattr(dentry, cachefiles_xattr_cache, |
150 | &auxdata->type, auxdata->len, | 148 | &auxdata->type, auxdata->len, |
diff --git a/fs/ceph/addr.c b/fs/ceph/addr.c index f1d6c60ab229..722585cd5c7e 100644 --- a/fs/ceph/addr.c +++ b/fs/ceph/addr.c | |||
@@ -143,7 +143,8 @@ static int ceph_set_page_dirty(struct page *page) | |||
143 | * dirty page counters appropriately. Only called if there is private | 143 | * dirty page counters appropriately. Only called if there is private |
144 | * data on the page. | 144 | * data on the page. |
145 | */ | 145 | */ |
146 | static void ceph_invalidatepage(struct page *page, unsigned long offset) | 146 | static void ceph_invalidatepage(struct page *page, unsigned int offset, |
147 | unsigned int length) | ||
147 | { | 148 | { |
148 | struct inode *inode; | 149 | struct inode *inode; |
149 | struct ceph_inode_info *ci; | 150 | struct ceph_inode_info *ci; |
@@ -159,20 +160,20 @@ static void ceph_invalidatepage(struct page *page, unsigned long offset) | |||
159 | if (!PageDirty(page)) | 160 | if (!PageDirty(page)) |
160 | pr_err("%p invalidatepage %p page not dirty\n", inode, page); | 161 | pr_err("%p invalidatepage %p page not dirty\n", inode, page); |
161 | 162 | ||
162 | if (offset == 0) | 163 | if (offset == 0 && length == PAGE_CACHE_SIZE) |
163 | ClearPageChecked(page); | 164 | ClearPageChecked(page); |
164 | 165 | ||
165 | ci = ceph_inode(inode); | 166 | ci = ceph_inode(inode); |
166 | if (offset == 0) { | 167 | if (offset == 0 && length == PAGE_CACHE_SIZE) { |
167 | dout("%p invalidatepage %p idx %lu full dirty page %lu\n", | 168 | dout("%p invalidatepage %p idx %lu full dirty page\n", |
168 | inode, page, page->index, offset); | 169 | inode, page, page->index); |
169 | ceph_put_wrbuffer_cap_refs(ci, 1, snapc); | 170 | ceph_put_wrbuffer_cap_refs(ci, 1, snapc); |
170 | ceph_put_snap_context(snapc); | 171 | ceph_put_snap_context(snapc); |
171 | page->private = 0; | 172 | page->private = 0; |
172 | ClearPagePrivate(page); | 173 | ClearPagePrivate(page); |
173 | } else { | 174 | } else { |
174 | dout("%p invalidatepage %p idx %lu partial dirty page\n", | 175 | dout("%p invalidatepage %p idx %lu partial dirty page %u(%u)\n", |
175 | inode, page, page->index); | 176 | inode, page, page->index, offset, length); |
176 | } | 177 | } |
177 | } | 178 | } |
178 | 179 | ||
diff --git a/fs/ceph/dir.c b/fs/ceph/dir.c index 0e4da4a9c213..868b61d56cac 100644 --- a/fs/ceph/dir.c +++ b/fs/ceph/dir.c | |||
@@ -111,11 +111,10 @@ static unsigned fpos_off(loff_t p) | |||
111 | * defined IFF we hold CEPH_CAP_FILE_SHARED (which will be revoked by | 111 | * defined IFF we hold CEPH_CAP_FILE_SHARED (which will be revoked by |
112 | * the MDS if/when the directory is modified). | 112 | * the MDS if/when the directory is modified). |
113 | */ | 113 | */ |
114 | static int __dcache_readdir(struct file *filp, | 114 | static int __dcache_readdir(struct file *file, struct dir_context *ctx) |
115 | void *dirent, filldir_t filldir) | ||
116 | { | 115 | { |
117 | struct ceph_file_info *fi = filp->private_data; | 116 | struct ceph_file_info *fi = file->private_data; |
118 | struct dentry *parent = filp->f_dentry; | 117 | struct dentry *parent = file->f_dentry; |
119 | struct inode *dir = parent->d_inode; | 118 | struct inode *dir = parent->d_inode; |
120 | struct list_head *p; | 119 | struct list_head *p; |
121 | struct dentry *dentry, *last; | 120 | struct dentry *dentry, *last; |
@@ -126,14 +125,14 @@ static int __dcache_readdir(struct file *filp, | |||
126 | last = fi->dentry; | 125 | last = fi->dentry; |
127 | fi->dentry = NULL; | 126 | fi->dentry = NULL; |
128 | 127 | ||
129 | dout("__dcache_readdir %p at %llu (last %p)\n", dir, filp->f_pos, | 128 | dout("__dcache_readdir %p at %llu (last %p)\n", dir, ctx->pos, |
130 | last); | 129 | last); |
131 | 130 | ||
132 | spin_lock(&parent->d_lock); | 131 | spin_lock(&parent->d_lock); |
133 | 132 | ||
134 | /* start at beginning? */ | 133 | /* start at beginning? */ |
135 | if (filp->f_pos == 2 || last == NULL || | 134 | if (ctx->pos == 2 || last == NULL || |
136 | filp->f_pos < ceph_dentry(last)->offset) { | 135 | ctx->pos < ceph_dentry(last)->offset) { |
137 | if (list_empty(&parent->d_subdirs)) | 136 | if (list_empty(&parent->d_subdirs)) |
138 | goto out_unlock; | 137 | goto out_unlock; |
139 | p = parent->d_subdirs.prev; | 138 | p = parent->d_subdirs.prev; |
@@ -157,11 +156,11 @@ more: | |||
157 | if (!d_unhashed(dentry) && dentry->d_inode && | 156 | if (!d_unhashed(dentry) && dentry->d_inode && |
158 | ceph_snap(dentry->d_inode) != CEPH_SNAPDIR && | 157 | ceph_snap(dentry->d_inode) != CEPH_SNAPDIR && |
159 | ceph_ino(dentry->d_inode) != CEPH_INO_CEPH && | 158 | ceph_ino(dentry->d_inode) != CEPH_INO_CEPH && |
160 | filp->f_pos <= di->offset) | 159 | ctx->pos <= di->offset) |
161 | break; | 160 | break; |
162 | dout(" skipping %p %.*s at %llu (%llu)%s%s\n", dentry, | 161 | dout(" skipping %p %.*s at %llu (%llu)%s%s\n", dentry, |
163 | dentry->d_name.len, dentry->d_name.name, di->offset, | 162 | dentry->d_name.len, dentry->d_name.name, di->offset, |
164 | filp->f_pos, d_unhashed(dentry) ? " unhashed" : "", | 163 | ctx->pos, d_unhashed(dentry) ? " unhashed" : "", |
165 | !dentry->d_inode ? " null" : ""); | 164 | !dentry->d_inode ? " null" : ""); |
166 | spin_unlock(&dentry->d_lock); | 165 | spin_unlock(&dentry->d_lock); |
167 | p = p->prev; | 166 | p = p->prev; |
@@ -173,29 +172,27 @@ more: | |||
173 | spin_unlock(&dentry->d_lock); | 172 | spin_unlock(&dentry->d_lock); |
174 | spin_unlock(&parent->d_lock); | 173 | spin_unlock(&parent->d_lock); |
175 | 174 | ||
176 | dout(" %llu (%llu) dentry %p %.*s %p\n", di->offset, filp->f_pos, | 175 | dout(" %llu (%llu) dentry %p %.*s %p\n", di->offset, ctx->pos, |
177 | dentry, dentry->d_name.len, dentry->d_name.name, dentry->d_inode); | 176 | dentry, dentry->d_name.len, dentry->d_name.name, dentry->d_inode); |
178 | filp->f_pos = di->offset; | 177 | ctx->pos = di->offset; |
179 | err = filldir(dirent, dentry->d_name.name, | 178 | if (!dir_emit(ctx, dentry->d_name.name, |
180 | dentry->d_name.len, di->offset, | 179 | dentry->d_name.len, |
181 | ceph_translate_ino(dentry->d_sb, dentry->d_inode->i_ino), | 180 | ceph_translate_ino(dentry->d_sb, dentry->d_inode->i_ino), |
182 | dentry->d_inode->i_mode >> 12); | 181 | dentry->d_inode->i_mode >> 12)) { |
183 | 182 | if (last) { | |
184 | if (last) { | ||
185 | if (err < 0) { | ||
186 | /* remember our position */ | 183 | /* remember our position */ |
187 | fi->dentry = last; | 184 | fi->dentry = last; |
188 | fi->next_offset = di->offset; | 185 | fi->next_offset = di->offset; |
189 | } else { | ||
190 | dput(last); | ||
191 | } | 186 | } |
187 | dput(dentry); | ||
188 | return 0; | ||
192 | } | 189 | } |
193 | last = dentry; | ||
194 | 190 | ||
195 | if (err < 0) | 191 | if (last) |
196 | goto out; | 192 | dput(last); |
193 | last = dentry; | ||
197 | 194 | ||
198 | filp->f_pos++; | 195 | ctx->pos++; |
199 | 196 | ||
200 | /* make sure a dentry wasn't dropped while we didn't have parent lock */ | 197 | /* make sure a dentry wasn't dropped while we didn't have parent lock */ |
201 | if (!ceph_dir_is_complete(dir)) { | 198 | if (!ceph_dir_is_complete(dir)) { |
@@ -235,59 +232,59 @@ static int note_last_dentry(struct ceph_file_info *fi, const char *name, | |||
235 | return 0; | 232 | return 0; |
236 | } | 233 | } |
237 | 234 | ||
238 | static int ceph_readdir(struct file *filp, void *dirent, filldir_t filldir) | 235 | static int ceph_readdir(struct file *file, struct dir_context *ctx) |
239 | { | 236 | { |
240 | struct ceph_file_info *fi = filp->private_data; | 237 | struct ceph_file_info *fi = file->private_data; |
241 | struct inode *inode = file_inode(filp); | 238 | struct inode *inode = file_inode(file); |
242 | struct ceph_inode_info *ci = ceph_inode(inode); | 239 | struct ceph_inode_info *ci = ceph_inode(inode); |
243 | struct ceph_fs_client *fsc = ceph_inode_to_client(inode); | 240 | struct ceph_fs_client *fsc = ceph_inode_to_client(inode); |
244 | struct ceph_mds_client *mdsc = fsc->mdsc; | 241 | struct ceph_mds_client *mdsc = fsc->mdsc; |
245 | unsigned frag = fpos_frag(filp->f_pos); | 242 | unsigned frag = fpos_frag(ctx->pos); |
246 | int off = fpos_off(filp->f_pos); | 243 | int off = fpos_off(ctx->pos); |
247 | int err; | 244 | int err; |
248 | u32 ftype; | 245 | u32 ftype; |
249 | struct ceph_mds_reply_info_parsed *rinfo; | 246 | struct ceph_mds_reply_info_parsed *rinfo; |
250 | const int max_entries = fsc->mount_options->max_readdir; | 247 | const int max_entries = fsc->mount_options->max_readdir; |
251 | const int max_bytes = fsc->mount_options->max_readdir_bytes; | 248 | const int max_bytes = fsc->mount_options->max_readdir_bytes; |
252 | 249 | ||
253 | dout("readdir %p filp %p frag %u off %u\n", inode, filp, frag, off); | 250 | dout("readdir %p file %p frag %u off %u\n", inode, file, frag, off); |
254 | if (fi->flags & CEPH_F_ATEND) | 251 | if (fi->flags & CEPH_F_ATEND) |
255 | return 0; | 252 | return 0; |
256 | 253 | ||
257 | /* always start with . and .. */ | 254 | /* always start with . and .. */ |
258 | if (filp->f_pos == 0) { | 255 | if (ctx->pos == 0) { |
259 | /* note dir version at start of readdir so we can tell | 256 | /* note dir version at start of readdir so we can tell |
260 | * if any dentries get dropped */ | 257 | * if any dentries get dropped */ |
261 | fi->dir_release_count = atomic_read(&ci->i_release_count); | 258 | fi->dir_release_count = atomic_read(&ci->i_release_count); |
262 | 259 | ||
263 | dout("readdir off 0 -> '.'\n"); | 260 | dout("readdir off 0 -> '.'\n"); |
264 | if (filldir(dirent, ".", 1, ceph_make_fpos(0, 0), | 261 | if (!dir_emit(ctx, ".", 1, |
265 | ceph_translate_ino(inode->i_sb, inode->i_ino), | 262 | ceph_translate_ino(inode->i_sb, inode->i_ino), |
266 | inode->i_mode >> 12) < 0) | 263 | inode->i_mode >> 12)) |
267 | return 0; | 264 | return 0; |
268 | filp->f_pos = 1; | 265 | ctx->pos = 1; |
269 | off = 1; | 266 | off = 1; |
270 | } | 267 | } |
271 | if (filp->f_pos == 1) { | 268 | if (ctx->pos == 1) { |
272 | ino_t ino = parent_ino(filp->f_dentry); | 269 | ino_t ino = parent_ino(file->f_dentry); |
273 | dout("readdir off 1 -> '..'\n"); | 270 | dout("readdir off 1 -> '..'\n"); |
274 | if (filldir(dirent, "..", 2, ceph_make_fpos(0, 1), | 271 | if (!dir_emit(ctx, "..", 2, |
275 | ceph_translate_ino(inode->i_sb, ino), | 272 | ceph_translate_ino(inode->i_sb, ino), |
276 | inode->i_mode >> 12) < 0) | 273 | inode->i_mode >> 12)) |
277 | return 0; | 274 | return 0; |
278 | filp->f_pos = 2; | 275 | ctx->pos = 2; |
279 | off = 2; | 276 | off = 2; |
280 | } | 277 | } |
281 | 278 | ||
282 | /* can we use the dcache? */ | 279 | /* can we use the dcache? */ |
283 | spin_lock(&ci->i_ceph_lock); | 280 | spin_lock(&ci->i_ceph_lock); |
284 | if ((filp->f_pos == 2 || fi->dentry) && | 281 | if ((ctx->pos == 2 || fi->dentry) && |
285 | !ceph_test_mount_opt(fsc, NOASYNCREADDIR) && | 282 | !ceph_test_mount_opt(fsc, NOASYNCREADDIR) && |
286 | ceph_snap(inode) != CEPH_SNAPDIR && | 283 | ceph_snap(inode) != CEPH_SNAPDIR && |
287 | __ceph_dir_is_complete(ci) && | 284 | __ceph_dir_is_complete(ci) && |
288 | __ceph_caps_issued_mask(ci, CEPH_CAP_FILE_SHARED, 1)) { | 285 | __ceph_caps_issued_mask(ci, CEPH_CAP_FILE_SHARED, 1)) { |
289 | spin_unlock(&ci->i_ceph_lock); | 286 | spin_unlock(&ci->i_ceph_lock); |
290 | err = __dcache_readdir(filp, dirent, filldir); | 287 | err = __dcache_readdir(file, ctx); |
291 | if (err != -EAGAIN) | 288 | if (err != -EAGAIN) |
292 | return err; | 289 | return err; |
293 | } else { | 290 | } else { |
@@ -327,7 +324,7 @@ more: | |||
327 | return PTR_ERR(req); | 324 | return PTR_ERR(req); |
328 | req->r_inode = inode; | 325 | req->r_inode = inode; |
329 | ihold(inode); | 326 | ihold(inode); |
330 | req->r_dentry = dget(filp->f_dentry); | 327 | req->r_dentry = dget(file->f_dentry); |
331 | /* hints to request -> mds selection code */ | 328 | /* hints to request -> mds selection code */ |
332 | req->r_direct_mode = USE_AUTH_MDS; | 329 | req->r_direct_mode = USE_AUTH_MDS; |
333 | req->r_direct_hash = ceph_frag_value(frag); | 330 | req->r_direct_hash = ceph_frag_value(frag); |
@@ -379,15 +376,16 @@ more: | |||
379 | rinfo = &fi->last_readdir->r_reply_info; | 376 | rinfo = &fi->last_readdir->r_reply_info; |
380 | dout("readdir frag %x num %d off %d chunkoff %d\n", frag, | 377 | dout("readdir frag %x num %d off %d chunkoff %d\n", frag, |
381 | rinfo->dir_nr, off, fi->offset); | 378 | rinfo->dir_nr, off, fi->offset); |
379 | |||
380 | ctx->pos = ceph_make_fpos(frag, off); | ||
382 | while (off >= fi->offset && off - fi->offset < rinfo->dir_nr) { | 381 | while (off >= fi->offset && off - fi->offset < rinfo->dir_nr) { |
383 | u64 pos = ceph_make_fpos(frag, off); | ||
384 | struct ceph_mds_reply_inode *in = | 382 | struct ceph_mds_reply_inode *in = |
385 | rinfo->dir_in[off - fi->offset].in; | 383 | rinfo->dir_in[off - fi->offset].in; |
386 | struct ceph_vino vino; | 384 | struct ceph_vino vino; |
387 | ino_t ino; | 385 | ino_t ino; |
388 | 386 | ||
389 | dout("readdir off %d (%d/%d) -> %lld '%.*s' %p\n", | 387 | dout("readdir off %d (%d/%d) -> %lld '%.*s' %p\n", |
390 | off, off - fi->offset, rinfo->dir_nr, pos, | 388 | off, off - fi->offset, rinfo->dir_nr, ctx->pos, |
391 | rinfo->dir_dname_len[off - fi->offset], | 389 | rinfo->dir_dname_len[off - fi->offset], |
392 | rinfo->dir_dname[off - fi->offset], in); | 390 | rinfo->dir_dname[off - fi->offset], in); |
393 | BUG_ON(!in); | 391 | BUG_ON(!in); |
@@ -395,16 +393,15 @@ more: | |||
395 | vino.ino = le64_to_cpu(in->ino); | 393 | vino.ino = le64_to_cpu(in->ino); |
396 | vino.snap = le64_to_cpu(in->snapid); | 394 | vino.snap = le64_to_cpu(in->snapid); |
397 | ino = ceph_vino_to_ino(vino); | 395 | ino = ceph_vino_to_ino(vino); |
398 | if (filldir(dirent, | 396 | if (!dir_emit(ctx, |
399 | rinfo->dir_dname[off - fi->offset], | 397 | rinfo->dir_dname[off - fi->offset], |
400 | rinfo->dir_dname_len[off - fi->offset], | 398 | rinfo->dir_dname_len[off - fi->offset], |
401 | pos, | 399 | ceph_translate_ino(inode->i_sb, ino), ftype)) { |
402 | ceph_translate_ino(inode->i_sb, ino), ftype) < 0) { | ||
403 | dout("filldir stopping us...\n"); | 400 | dout("filldir stopping us...\n"); |
404 | return 0; | 401 | return 0; |
405 | } | 402 | } |
406 | off++; | 403 | off++; |
407 | filp->f_pos = pos + 1; | 404 | ctx->pos++; |
408 | } | 405 | } |
409 | 406 | ||
410 | if (fi->last_name) { | 407 | if (fi->last_name) { |
@@ -417,7 +414,7 @@ more: | |||
417 | if (!ceph_frag_is_rightmost(frag)) { | 414 | if (!ceph_frag_is_rightmost(frag)) { |
418 | frag = ceph_frag_next(frag); | 415 | frag = ceph_frag_next(frag); |
419 | off = 0; | 416 | off = 0; |
420 | filp->f_pos = ceph_make_fpos(frag, off); | 417 | ctx->pos = ceph_make_fpos(frag, off); |
421 | dout("readdir next frag is %x\n", frag); | 418 | dout("readdir next frag is %x\n", frag); |
422 | goto more; | 419 | goto more; |
423 | } | 420 | } |
@@ -432,11 +429,11 @@ more: | |||
432 | if (atomic_read(&ci->i_release_count) == fi->dir_release_count) { | 429 | if (atomic_read(&ci->i_release_count) == fi->dir_release_count) { |
433 | dout(" marking %p complete\n", inode); | 430 | dout(" marking %p complete\n", inode); |
434 | __ceph_dir_set_complete(ci, fi->dir_release_count); | 431 | __ceph_dir_set_complete(ci, fi->dir_release_count); |
435 | ci->i_max_offset = filp->f_pos; | 432 | ci->i_max_offset = ctx->pos; |
436 | } | 433 | } |
437 | spin_unlock(&ci->i_ceph_lock); | 434 | spin_unlock(&ci->i_ceph_lock); |
438 | 435 | ||
439 | dout("readdir %p filp %p done.\n", inode, filp); | 436 | dout("readdir %p file %p done.\n", inode, file); |
440 | return 0; | 437 | return 0; |
441 | } | 438 | } |
442 | 439 | ||
@@ -1270,7 +1267,7 @@ unsigned ceph_dentry_hash(struct inode *dir, struct dentry *dn) | |||
1270 | 1267 | ||
1271 | const struct file_operations ceph_dir_fops = { | 1268 | const struct file_operations ceph_dir_fops = { |
1272 | .read = ceph_read_dir, | 1269 | .read = ceph_read_dir, |
1273 | .readdir = ceph_readdir, | 1270 | .iterate = ceph_readdir, |
1274 | .llseek = ceph_dir_llseek, | 1271 | .llseek = ceph_dir_llseek, |
1275 | .open = ceph_open, | 1272 | .open = ceph_open, |
1276 | .release = ceph_release, | 1273 | .release = ceph_release, |
diff --git a/fs/ceph/file.c b/fs/ceph/file.c index a17ffe4ec3ca..bc0735498d29 100644 --- a/fs/ceph/file.c +++ b/fs/ceph/file.c | |||
@@ -861,16 +861,7 @@ static loff_t ceph_llseek(struct file *file, loff_t offset, int whence) | |||
861 | break; | 861 | break; |
862 | } | 862 | } |
863 | 863 | ||
864 | if (offset < 0 || offset > inode->i_sb->s_maxbytes) { | 864 | offset = vfs_setpos(file, offset, inode->i_sb->s_maxbytes); |
865 | offset = -EINVAL; | ||
866 | goto out; | ||
867 | } | ||
868 | |||
869 | /* Special lock needed here? */ | ||
870 | if (offset != file->f_pos) { | ||
871 | file->f_pos = offset; | ||
872 | file->f_version = 0; | ||
873 | } | ||
874 | 865 | ||
875 | out: | 866 | out: |
876 | mutex_unlock(&inode->i_mutex); | 867 | mutex_unlock(&inode->i_mutex); |
diff --git a/fs/ceph/inode.c b/fs/ceph/inode.c index 3b0abed667c2..98b6e50bde04 100644 --- a/fs/ceph/inode.c +++ b/fs/ceph/inode.c | |||
@@ -911,8 +911,8 @@ static struct dentry *splice_dentry(struct dentry *dn, struct inode *in, | |||
911 | } else if (realdn) { | 911 | } else if (realdn) { |
912 | dout("dn %p (%d) spliced with %p (%d) " | 912 | dout("dn %p (%d) spliced with %p (%d) " |
913 | "inode %p ino %llx.%llx\n", | 913 | "inode %p ino %llx.%llx\n", |
914 | dn, dn->d_count, | 914 | dn, d_count(dn), |
915 | realdn, realdn->d_count, | 915 | realdn, d_count(realdn), |
916 | realdn->d_inode, ceph_vinop(realdn->d_inode)); | 916 | realdn->d_inode, ceph_vinop(realdn->d_inode)); |
917 | dput(dn); | 917 | dput(dn); |
918 | dn = realdn; | 918 | dn = realdn; |
diff --git a/fs/ceph/locks.c b/fs/ceph/locks.c index 89788515a63d..ae6d14e82b0f 100644 --- a/fs/ceph/locks.c +++ b/fs/ceph/locks.c | |||
@@ -192,7 +192,7 @@ void ceph_count_locks(struct inode *inode, int *fcntl_count, int *flock_count) | |||
192 | 192 | ||
193 | /** | 193 | /** |
194 | * Encode the flock and fcntl locks for the given inode into the ceph_filelock | 194 | * Encode the flock and fcntl locks for the given inode into the ceph_filelock |
195 | * array. Must be called with lock_flocks() already held. | 195 | * array. Must be called with inode->i_lock already held. |
196 | * If we encounter more of a specific lock type than expected, return -ENOSPC. | 196 | * If we encounter more of a specific lock type than expected, return -ENOSPC. |
197 | */ | 197 | */ |
198 | int ceph_encode_locks_to_buffer(struct inode *inode, | 198 | int ceph_encode_locks_to_buffer(struct inode *inode, |
diff --git a/fs/ceph/mds_client.c b/fs/ceph/mds_client.c index cbf08203e00d..603786b564be 100644 --- a/fs/ceph/mds_client.c +++ b/fs/ceph/mds_client.c | |||
@@ -1588,7 +1588,7 @@ retry: | |||
1588 | *base = ceph_ino(temp->d_inode); | 1588 | *base = ceph_ino(temp->d_inode); |
1589 | *plen = len; | 1589 | *plen = len; |
1590 | dout("build_path on %p %d built %llx '%.*s'\n", | 1590 | dout("build_path on %p %d built %llx '%.*s'\n", |
1591 | dentry, dentry->d_count, *base, len, path); | 1591 | dentry, d_count(dentry), *base, len, path); |
1592 | return path; | 1592 | return path; |
1593 | } | 1593 | } |
1594 | 1594 | ||
@@ -2517,20 +2517,20 @@ static int encode_caps_cb(struct inode *inode, struct ceph_cap *cap, | |||
2517 | struct ceph_filelock *flocks; | 2517 | struct ceph_filelock *flocks; |
2518 | 2518 | ||
2519 | encode_again: | 2519 | encode_again: |
2520 | lock_flocks(); | 2520 | spin_lock(&inode->i_lock); |
2521 | ceph_count_locks(inode, &num_fcntl_locks, &num_flock_locks); | 2521 | ceph_count_locks(inode, &num_fcntl_locks, &num_flock_locks); |
2522 | unlock_flocks(); | 2522 | spin_unlock(&inode->i_lock); |
2523 | flocks = kmalloc((num_fcntl_locks+num_flock_locks) * | 2523 | flocks = kmalloc((num_fcntl_locks+num_flock_locks) * |
2524 | sizeof(struct ceph_filelock), GFP_NOFS); | 2524 | sizeof(struct ceph_filelock), GFP_NOFS); |
2525 | if (!flocks) { | 2525 | if (!flocks) { |
2526 | err = -ENOMEM; | 2526 | err = -ENOMEM; |
2527 | goto out_free; | 2527 | goto out_free; |
2528 | } | 2528 | } |
2529 | lock_flocks(); | 2529 | spin_lock(&inode->i_lock); |
2530 | err = ceph_encode_locks_to_buffer(inode, flocks, | 2530 | err = ceph_encode_locks_to_buffer(inode, flocks, |
2531 | num_fcntl_locks, | 2531 | num_fcntl_locks, |
2532 | num_flock_locks); | 2532 | num_flock_locks); |
2533 | unlock_flocks(); | 2533 | spin_unlock(&inode->i_lock); |
2534 | if (err) { | 2534 | if (err) { |
2535 | kfree(flocks); | 2535 | kfree(flocks); |
2536 | if (err == -ENOSPC) | 2536 | if (err == -ENOSPC) |
diff --git a/fs/cifs/Kconfig b/fs/cifs/Kconfig index 2906ee276408..603f18a65c12 100644 --- a/fs/cifs/Kconfig +++ b/fs/cifs/Kconfig | |||
@@ -10,6 +10,7 @@ config CIFS | |||
10 | select CRYPTO_ECB | 10 | select CRYPTO_ECB |
11 | select CRYPTO_DES | 11 | select CRYPTO_DES |
12 | select CRYPTO_SHA256 | 12 | select CRYPTO_SHA256 |
13 | select CRYPTO_CMAC | ||
13 | help | 14 | help |
14 | This is the client VFS module for the Common Internet File System | 15 | This is the client VFS module for the Common Internet File System |
15 | (CIFS) protocol which is the successor to the Server Message Block | 16 | (CIFS) protocol which is the successor to the Server Message Block |
diff --git a/fs/cifs/cifs_debug.c b/fs/cifs/cifs_debug.c index d59748346020..f3ac4154cbb6 100644 --- a/fs/cifs/cifs_debug.c +++ b/fs/cifs/cifs_debug.c | |||
@@ -213,7 +213,7 @@ static int cifs_debug_data_proc_show(struct seq_file *m, void *v) | |||
213 | tcon->nativeFileSystem); | 213 | tcon->nativeFileSystem); |
214 | } | 214 | } |
215 | seq_printf(m, "DevInfo: 0x%x Attributes: 0x%x" | 215 | seq_printf(m, "DevInfo: 0x%x Attributes: 0x%x" |
216 | "\nPathComponentMax: %d Status: 0x%d", | 216 | "\n\tPathComponentMax: %d Status: 0x%d", |
217 | le32_to_cpu(tcon->fsDevInfo.DeviceCharacteristics), | 217 | le32_to_cpu(tcon->fsDevInfo.DeviceCharacteristics), |
218 | le32_to_cpu(tcon->fsAttrInfo.Attributes), | 218 | le32_to_cpu(tcon->fsAttrInfo.Attributes), |
219 | le32_to_cpu(tcon->fsAttrInfo.MaxPathNameComponentLength), | 219 | le32_to_cpu(tcon->fsAttrInfo.MaxPathNameComponentLength), |
@@ -224,6 +224,8 @@ static int cifs_debug_data_proc_show(struct seq_file *m, void *v) | |||
224 | seq_puts(m, " type: CDROM "); | 224 | seq_puts(m, " type: CDROM "); |
225 | else | 225 | else |
226 | seq_printf(m, " type: %d ", dev_type); | 226 | seq_printf(m, " type: %d ", dev_type); |
227 | if (server->ops->dump_share_caps) | ||
228 | server->ops->dump_share_caps(m, tcon); | ||
227 | 229 | ||
228 | if (tcon->need_reconnect) | 230 | if (tcon->need_reconnect) |
229 | seq_puts(m, "\tDISCONNECTED "); | 231 | seq_puts(m, "\tDISCONNECTED "); |
@@ -595,9 +597,36 @@ static int cifs_security_flags_proc_open(struct inode *inode, struct file *file) | |||
595 | return single_open(file, cifs_security_flags_proc_show, NULL); | 597 | return single_open(file, cifs_security_flags_proc_show, NULL); |
596 | } | 598 | } |
597 | 599 | ||
600 | /* | ||
601 | * Ensure that if someone sets a MUST flag, that we disable all other MAY | ||
602 | * flags except for the ones corresponding to the given MUST flag. If there are | ||
603 | * multiple MUST flags, then try to prefer more secure ones. | ||
604 | */ | ||
605 | static void | ||
606 | cifs_security_flags_handle_must_flags(unsigned int *flags) | ||
607 | { | ||
608 | unsigned int signflags = *flags & CIFSSEC_MUST_SIGN; | ||
609 | |||
610 | if ((*flags & CIFSSEC_MUST_KRB5) == CIFSSEC_MUST_KRB5) | ||
611 | *flags = CIFSSEC_MUST_KRB5; | ||
612 | else if ((*flags & CIFSSEC_MUST_NTLMSSP) == CIFSSEC_MUST_NTLMSSP) | ||
613 | *flags = CIFSSEC_MUST_NTLMSSP; | ||
614 | else if ((*flags & CIFSSEC_MUST_NTLMV2) == CIFSSEC_MUST_NTLMV2) | ||
615 | *flags = CIFSSEC_MUST_NTLMV2; | ||
616 | else if ((*flags & CIFSSEC_MUST_NTLM) == CIFSSEC_MUST_NTLM) | ||
617 | *flags = CIFSSEC_MUST_NTLM; | ||
618 | else if ((*flags & CIFSSEC_MUST_LANMAN) == CIFSSEC_MUST_LANMAN) | ||
619 | *flags = CIFSSEC_MUST_LANMAN; | ||
620 | else if ((*flags & CIFSSEC_MUST_PLNTXT) == CIFSSEC_MUST_PLNTXT) | ||
621 | *flags = CIFSSEC_MUST_PLNTXT; | ||
622 | |||
623 | *flags |= signflags; | ||
624 | } | ||
625 | |||
598 | static ssize_t cifs_security_flags_proc_write(struct file *file, | 626 | static ssize_t cifs_security_flags_proc_write(struct file *file, |
599 | const char __user *buffer, size_t count, loff_t *ppos) | 627 | const char __user *buffer, size_t count, loff_t *ppos) |
600 | { | 628 | { |
629 | int rc; | ||
601 | unsigned int flags; | 630 | unsigned int flags; |
602 | char flags_string[12]; | 631 | char flags_string[12]; |
603 | char c; | 632 | char c; |
@@ -620,26 +649,35 @@ static ssize_t cifs_security_flags_proc_write(struct file *file, | |||
620 | global_secflags = CIFSSEC_MAX; | 649 | global_secflags = CIFSSEC_MAX; |
621 | return count; | 650 | return count; |
622 | } else if (!isdigit(c)) { | 651 | } else if (!isdigit(c)) { |
623 | cifs_dbg(VFS, "invalid flag %c\n", c); | 652 | cifs_dbg(VFS, "Invalid SecurityFlags: %s\n", |
653 | flags_string); | ||
624 | return -EINVAL; | 654 | return -EINVAL; |
625 | } | 655 | } |
626 | } | 656 | } |
627 | /* else we have a number */ | ||
628 | 657 | ||
629 | flags = simple_strtoul(flags_string, NULL, 0); | 658 | /* else we have a number */ |
659 | rc = kstrtouint(flags_string, 0, &flags); | ||
660 | if (rc) { | ||
661 | cifs_dbg(VFS, "Invalid SecurityFlags: %s\n", | ||
662 | flags_string); | ||
663 | return rc; | ||
664 | } | ||
630 | 665 | ||
631 | cifs_dbg(FYI, "sec flags 0x%x\n", flags); | 666 | cifs_dbg(FYI, "sec flags 0x%x\n", flags); |
632 | 667 | ||
633 | if (flags <= 0) { | 668 | if (flags == 0) { |
634 | cifs_dbg(VFS, "invalid security flags %s\n", flags_string); | 669 | cifs_dbg(VFS, "Invalid SecurityFlags: %s\n", flags_string); |
635 | return -EINVAL; | 670 | return -EINVAL; |
636 | } | 671 | } |
637 | 672 | ||
638 | if (flags & ~CIFSSEC_MASK) { | 673 | if (flags & ~CIFSSEC_MASK) { |
639 | cifs_dbg(VFS, "attempt to set unsupported security flags 0x%x\n", | 674 | cifs_dbg(VFS, "Unsupported security flags: 0x%x\n", |
640 | flags & ~CIFSSEC_MASK); | 675 | flags & ~CIFSSEC_MASK); |
641 | return -EINVAL; | 676 | return -EINVAL; |
642 | } | 677 | } |
678 | |||
679 | cifs_security_flags_handle_must_flags(&flags); | ||
680 | |||
643 | /* flags look ok - update the global security flags for cifs module */ | 681 | /* flags look ok - update the global security flags for cifs module */ |
644 | global_secflags = flags; | 682 | global_secflags = flags; |
645 | if (global_secflags & CIFSSEC_MUST_SIGN) { | 683 | if (global_secflags & CIFSSEC_MUST_SIGN) { |
diff --git a/fs/cifs/cifs_unicode.h b/fs/cifs/cifs_unicode.h index 4fb097468e21..fe8d6276410a 100644 --- a/fs/cifs/cifs_unicode.h +++ b/fs/cifs/cifs_unicode.h | |||
@@ -327,14 +327,14 @@ UniToupper(register wchar_t uc) | |||
327 | /* | 327 | /* |
328 | * UniStrupr: Upper case a unicode string | 328 | * UniStrupr: Upper case a unicode string |
329 | */ | 329 | */ |
330 | static inline wchar_t * | 330 | static inline __le16 * |
331 | UniStrupr(register wchar_t *upin) | 331 | UniStrupr(register __le16 *upin) |
332 | { | 332 | { |
333 | register wchar_t *up; | 333 | register __le16 *up; |
334 | 334 | ||
335 | up = upin; | 335 | up = upin; |
336 | while (*up) { /* For all characters */ | 336 | while (*up) { /* For all characters */ |
337 | *up = UniToupper(*up); | 337 | *up = cpu_to_le16(UniToupper(le16_to_cpu(*up))); |
338 | up++; | 338 | up++; |
339 | } | 339 | } |
340 | return upin; /* Return input pointer */ | 340 | return upin; /* Return input pointer */ |
diff --git a/fs/cifs/cifsencrypt.c b/fs/cifs/cifsencrypt.c index 71436d1fca13..fc6f4f3a1a9d 100644 --- a/fs/cifs/cifsencrypt.c +++ b/fs/cifs/cifsencrypt.c | |||
@@ -1,7 +1,7 @@ | |||
1 | /* | 1 | /* |
2 | * fs/cifs/cifsencrypt.c | 2 | * fs/cifs/cifsencrypt.c |
3 | * | 3 | * |
4 | * Copyright (C) International Business Machines Corp., 2005,2006 | 4 | * Copyright (C) International Business Machines Corp., 2005,2013 |
5 | * Author(s): Steve French (sfrench@us.ibm.com) | 5 | * Author(s): Steve French (sfrench@us.ibm.com) |
6 | * | 6 | * |
7 | * This library is free software; you can redistribute it and/or modify | 7 | * This library is free software; you can redistribute it and/or modify |
@@ -31,6 +31,37 @@ | |||
31 | #include <linux/random.h> | 31 | #include <linux/random.h> |
32 | #include <linux/highmem.h> | 32 | #include <linux/highmem.h> |
33 | 33 | ||
34 | static int | ||
35 | cifs_crypto_shash_md5_allocate(struct TCP_Server_Info *server) | ||
36 | { | ||
37 | int rc; | ||
38 | unsigned int size; | ||
39 | |||
40 | if (server->secmech.sdescmd5 != NULL) | ||
41 | return 0; /* already allocated */ | ||
42 | |||
43 | server->secmech.md5 = crypto_alloc_shash("md5", 0, 0); | ||
44 | if (IS_ERR(server->secmech.md5)) { | ||
45 | cifs_dbg(VFS, "could not allocate crypto md5\n"); | ||
46 | rc = PTR_ERR(server->secmech.md5); | ||
47 | server->secmech.md5 = NULL; | ||
48 | return rc; | ||
49 | } | ||
50 | |||
51 | size = sizeof(struct shash_desc) + | ||
52 | crypto_shash_descsize(server->secmech.md5); | ||
53 | server->secmech.sdescmd5 = kmalloc(size, GFP_KERNEL); | ||
54 | if (!server->secmech.sdescmd5) { | ||
55 | crypto_free_shash(server->secmech.md5); | ||
56 | server->secmech.md5 = NULL; | ||
57 | return -ENOMEM; | ||
58 | } | ||
59 | server->secmech.sdescmd5->shash.tfm = server->secmech.md5; | ||
60 | server->secmech.sdescmd5->shash.flags = 0x0; | ||
61 | |||
62 | return 0; | ||
63 | } | ||
64 | |||
34 | /* | 65 | /* |
35 | * Calculate and return the CIFS signature based on the mac key and SMB PDU. | 66 | * Calculate and return the CIFS signature based on the mac key and SMB PDU. |
36 | * The 16 byte signature must be allocated by the caller. Note we only use the | 67 | * The 16 byte signature must be allocated by the caller. Note we only use the |
@@ -50,8 +81,11 @@ static int cifs_calc_signature(struct smb_rqst *rqst, | |||
50 | return -EINVAL; | 81 | return -EINVAL; |
51 | 82 | ||
52 | if (!server->secmech.sdescmd5) { | 83 | if (!server->secmech.sdescmd5) { |
53 | cifs_dbg(VFS, "%s: Can't generate signature\n", __func__); | 84 | rc = cifs_crypto_shash_md5_allocate(server); |
54 | return -1; | 85 | if (rc) { |
86 | cifs_dbg(VFS, "%s: Can't alloc md5 crypto\n", __func__); | ||
87 | return -1; | ||
88 | } | ||
55 | } | 89 | } |
56 | 90 | ||
57 | rc = crypto_shash_init(&server->secmech.sdescmd5->shash); | 91 | rc = crypto_shash_init(&server->secmech.sdescmd5->shash); |
@@ -276,7 +310,6 @@ int calc_lanman_hash(const char *password, const char *cryptkey, bool encrypt, | |||
276 | strncpy(password_with_pad, password, CIFS_ENCPWD_SIZE); | 310 | strncpy(password_with_pad, password, CIFS_ENCPWD_SIZE); |
277 | 311 | ||
278 | if (!encrypt && global_secflags & CIFSSEC_MAY_PLNTXT) { | 312 | if (!encrypt && global_secflags & CIFSSEC_MAY_PLNTXT) { |
279 | memset(lnm_session_key, 0, CIFS_SESS_KEY_SIZE); | ||
280 | memcpy(lnm_session_key, password_with_pad, | 313 | memcpy(lnm_session_key, password_with_pad, |
281 | CIFS_ENCPWD_SIZE); | 314 | CIFS_ENCPWD_SIZE); |
282 | return 0; | 315 | return 0; |
@@ -389,7 +422,7 @@ find_domain_name(struct cifs_ses *ses, const struct nls_table *nls_cp) | |||
389 | if (blobptr + attrsize > blobend) | 422 | if (blobptr + attrsize > blobend) |
390 | break; | 423 | break; |
391 | if (type == NTLMSSP_AV_NB_DOMAIN_NAME) { | 424 | if (type == NTLMSSP_AV_NB_DOMAIN_NAME) { |
392 | if (!attrsize) | 425 | if (!attrsize || attrsize >= CIFS_MAX_DOMAINNAME_LEN) |
393 | break; | 426 | break; |
394 | if (!ses->domainName) { | 427 | if (!ses->domainName) { |
395 | ses->domainName = | 428 | ses->domainName = |
@@ -414,7 +447,7 @@ static int calc_ntlmv2_hash(struct cifs_ses *ses, char *ntlmv2_hash, | |||
414 | int rc = 0; | 447 | int rc = 0; |
415 | int len; | 448 | int len; |
416 | char nt_hash[CIFS_NTHASH_SIZE]; | 449 | char nt_hash[CIFS_NTHASH_SIZE]; |
417 | wchar_t *user; | 450 | __le16 *user; |
418 | wchar_t *domain; | 451 | wchar_t *domain; |
419 | wchar_t *server; | 452 | wchar_t *server; |
420 | 453 | ||
@@ -439,7 +472,7 @@ static int calc_ntlmv2_hash(struct cifs_ses *ses, char *ntlmv2_hash, | |||
439 | return rc; | 472 | return rc; |
440 | } | 473 | } |
441 | 474 | ||
442 | /* convert ses->user_name to unicode and uppercase */ | 475 | /* convert ses->user_name to unicode */ |
443 | len = ses->user_name ? strlen(ses->user_name) : 0; | 476 | len = ses->user_name ? strlen(ses->user_name) : 0; |
444 | user = kmalloc(2 + (len * 2), GFP_KERNEL); | 477 | user = kmalloc(2 + (len * 2), GFP_KERNEL); |
445 | if (user == NULL) { | 478 | if (user == NULL) { |
@@ -448,7 +481,7 @@ static int calc_ntlmv2_hash(struct cifs_ses *ses, char *ntlmv2_hash, | |||
448 | } | 481 | } |
449 | 482 | ||
450 | if (len) { | 483 | if (len) { |
451 | len = cifs_strtoUTF16((__le16 *)user, ses->user_name, len, nls_cp); | 484 | len = cifs_strtoUTF16(user, ses->user_name, len, nls_cp); |
452 | UniStrupr(user); | 485 | UniStrupr(user); |
453 | } else { | 486 | } else { |
454 | memset(user, '\0', 2); | 487 | memset(user, '\0', 2); |
@@ -536,7 +569,7 @@ CalcNTLMv2_response(const struct cifs_ses *ses, char *ntlmv2_hash) | |||
536 | return rc; | 569 | return rc; |
537 | } | 570 | } |
538 | 571 | ||
539 | if (ses->server->secType == RawNTLMSSP) | 572 | if (ses->server->negflavor == CIFS_NEGFLAVOR_EXTENDED) |
540 | memcpy(ses->auth_key.response + offset, | 573 | memcpy(ses->auth_key.response + offset, |
541 | ses->ntlmssp->cryptkey, CIFS_SERVER_CHALLENGE_SIZE); | 574 | ses->ntlmssp->cryptkey, CIFS_SERVER_CHALLENGE_SIZE); |
542 | else | 575 | else |
@@ -557,6 +590,36 @@ CalcNTLMv2_response(const struct cifs_ses *ses, char *ntlmv2_hash) | |||
557 | return rc; | 590 | return rc; |
558 | } | 591 | } |
559 | 592 | ||
593 | static int crypto_hmacmd5_alloc(struct TCP_Server_Info *server) | ||
594 | { | ||
595 | int rc; | ||
596 | unsigned int size; | ||
597 | |||
598 | /* check if already allocated */ | ||
599 | if (server->secmech.sdeschmacmd5) | ||
600 | return 0; | ||
601 | |||
602 | server->secmech.hmacmd5 = crypto_alloc_shash("hmac(md5)", 0, 0); | ||
603 | if (IS_ERR(server->secmech.hmacmd5)) { | ||
604 | cifs_dbg(VFS, "could not allocate crypto hmacmd5\n"); | ||
605 | rc = PTR_ERR(server->secmech.hmacmd5); | ||
606 | server->secmech.hmacmd5 = NULL; | ||
607 | return rc; | ||
608 | } | ||
609 | |||
610 | size = sizeof(struct shash_desc) + | ||
611 | crypto_shash_descsize(server->secmech.hmacmd5); | ||
612 | server->secmech.sdeschmacmd5 = kmalloc(size, GFP_KERNEL); | ||
613 | if (!server->secmech.sdeschmacmd5) { | ||
614 | crypto_free_shash(server->secmech.hmacmd5); | ||
615 | server->secmech.hmacmd5 = NULL; | ||
616 | return -ENOMEM; | ||
617 | } | ||
618 | server->secmech.sdeschmacmd5->shash.tfm = server->secmech.hmacmd5; | ||
619 | server->secmech.sdeschmacmd5->shash.flags = 0x0; | ||
620 | |||
621 | return 0; | ||
622 | } | ||
560 | 623 | ||
561 | int | 624 | int |
562 | setup_ntlmv2_rsp(struct cifs_ses *ses, const struct nls_table *nls_cp) | 625 | setup_ntlmv2_rsp(struct cifs_ses *ses, const struct nls_table *nls_cp) |
@@ -568,7 +631,7 @@ setup_ntlmv2_rsp(struct cifs_ses *ses, const struct nls_table *nls_cp) | |||
568 | char ntlmv2_hash[16]; | 631 | char ntlmv2_hash[16]; |
569 | unsigned char *tiblob = NULL; /* target info blob */ | 632 | unsigned char *tiblob = NULL; /* target info blob */ |
570 | 633 | ||
571 | if (ses->server->secType == RawNTLMSSP) { | 634 | if (ses->server->negflavor == CIFS_NEGFLAVOR_EXTENDED) { |
572 | if (!ses->domainName) { | 635 | if (!ses->domainName) { |
573 | rc = find_domain_name(ses, nls_cp); | 636 | rc = find_domain_name(ses, nls_cp); |
574 | if (rc) { | 637 | if (rc) { |
@@ -607,6 +670,12 @@ setup_ntlmv2_rsp(struct cifs_ses *ses, const struct nls_table *nls_cp) | |||
607 | 670 | ||
608 | memcpy(ses->auth_key.response + baselen, tiblob, tilen); | 671 | memcpy(ses->auth_key.response + baselen, tiblob, tilen); |
609 | 672 | ||
673 | rc = crypto_hmacmd5_alloc(ses->server); | ||
674 | if (rc) { | ||
675 | cifs_dbg(VFS, "could not crypto alloc hmacmd5 rc %d\n", rc); | ||
676 | goto setup_ntlmv2_rsp_ret; | ||
677 | } | ||
678 | |||
610 | /* calculate ntlmv2_hash */ | 679 | /* calculate ntlmv2_hash */ |
611 | rc = calc_ntlmv2_hash(ses, ntlmv2_hash, nls_cp); | 680 | rc = calc_ntlmv2_hash(ses, ntlmv2_hash, nls_cp); |
612 | if (rc) { | 681 | if (rc) { |
@@ -706,94 +775,32 @@ calc_seckey(struct cifs_ses *ses) | |||
706 | void | 775 | void |
707 | cifs_crypto_shash_release(struct TCP_Server_Info *server) | 776 | cifs_crypto_shash_release(struct TCP_Server_Info *server) |
708 | { | 777 | { |
709 | if (server->secmech.hmacsha256) | 778 | if (server->secmech.cmacaes) { |
710 | crypto_free_shash(server->secmech.hmacsha256); | 779 | crypto_free_shash(server->secmech.cmacaes); |
711 | 780 | server->secmech.cmacaes = NULL; | |
712 | if (server->secmech.md5) | ||
713 | crypto_free_shash(server->secmech.md5); | ||
714 | |||
715 | if (server->secmech.hmacmd5) | ||
716 | crypto_free_shash(server->secmech.hmacmd5); | ||
717 | |||
718 | kfree(server->secmech.sdeschmacsha256); | ||
719 | |||
720 | kfree(server->secmech.sdeschmacmd5); | ||
721 | |||
722 | kfree(server->secmech.sdescmd5); | ||
723 | } | ||
724 | |||
725 | int | ||
726 | cifs_crypto_shash_allocate(struct TCP_Server_Info *server) | ||
727 | { | ||
728 | int rc; | ||
729 | unsigned int size; | ||
730 | |||
731 | server->secmech.hmacmd5 = crypto_alloc_shash("hmac(md5)", 0, 0); | ||
732 | if (IS_ERR(server->secmech.hmacmd5)) { | ||
733 | cifs_dbg(VFS, "could not allocate crypto hmacmd5\n"); | ||
734 | return PTR_ERR(server->secmech.hmacmd5); | ||
735 | } | ||
736 | |||
737 | server->secmech.md5 = crypto_alloc_shash("md5", 0, 0); | ||
738 | if (IS_ERR(server->secmech.md5)) { | ||
739 | cifs_dbg(VFS, "could not allocate crypto md5\n"); | ||
740 | rc = PTR_ERR(server->secmech.md5); | ||
741 | goto crypto_allocate_md5_fail; | ||
742 | } | 781 | } |
743 | 782 | ||
744 | server->secmech.hmacsha256 = crypto_alloc_shash("hmac(sha256)", 0, 0); | 783 | if (server->secmech.hmacsha256) { |
745 | if (IS_ERR(server->secmech.hmacsha256)) { | 784 | crypto_free_shash(server->secmech.hmacsha256); |
746 | cifs_dbg(VFS, "could not allocate crypto hmacsha256\n"); | 785 | server->secmech.hmacsha256 = NULL; |
747 | rc = PTR_ERR(server->secmech.hmacsha256); | ||
748 | goto crypto_allocate_hmacsha256_fail; | ||
749 | } | ||
750 | |||
751 | size = sizeof(struct shash_desc) + | ||
752 | crypto_shash_descsize(server->secmech.hmacmd5); | ||
753 | server->secmech.sdeschmacmd5 = kmalloc(size, GFP_KERNEL); | ||
754 | if (!server->secmech.sdeschmacmd5) { | ||
755 | rc = -ENOMEM; | ||
756 | goto crypto_allocate_hmacmd5_sdesc_fail; | ||
757 | } | 786 | } |
758 | server->secmech.sdeschmacmd5->shash.tfm = server->secmech.hmacmd5; | ||
759 | server->secmech.sdeschmacmd5->shash.flags = 0x0; | ||
760 | 787 | ||
761 | size = sizeof(struct shash_desc) + | 788 | if (server->secmech.md5) { |
762 | crypto_shash_descsize(server->secmech.md5); | 789 | crypto_free_shash(server->secmech.md5); |
763 | server->secmech.sdescmd5 = kmalloc(size, GFP_KERNEL); | 790 | server->secmech.md5 = NULL; |
764 | if (!server->secmech.sdescmd5) { | ||
765 | rc = -ENOMEM; | ||
766 | goto crypto_allocate_md5_sdesc_fail; | ||
767 | } | 791 | } |
768 | server->secmech.sdescmd5->shash.tfm = server->secmech.md5; | ||
769 | server->secmech.sdescmd5->shash.flags = 0x0; | ||
770 | 792 | ||
771 | size = sizeof(struct shash_desc) + | 793 | if (server->secmech.hmacmd5) { |
772 | crypto_shash_descsize(server->secmech.hmacsha256); | 794 | crypto_free_shash(server->secmech.hmacmd5); |
773 | server->secmech.sdeschmacsha256 = kmalloc(size, GFP_KERNEL); | 795 | server->secmech.hmacmd5 = NULL; |
774 | if (!server->secmech.sdeschmacsha256) { | ||
775 | rc = -ENOMEM; | ||
776 | goto crypto_allocate_hmacsha256_sdesc_fail; | ||
777 | } | 796 | } |
778 | server->secmech.sdeschmacsha256->shash.tfm = server->secmech.hmacsha256; | ||
779 | server->secmech.sdeschmacsha256->shash.flags = 0x0; | ||
780 | |||
781 | return 0; | ||
782 | |||
783 | crypto_allocate_hmacsha256_sdesc_fail: | ||
784 | kfree(server->secmech.sdescmd5); | ||
785 | 797 | ||
786 | crypto_allocate_md5_sdesc_fail: | 798 | kfree(server->secmech.sdesccmacaes); |
799 | server->secmech.sdesccmacaes = NULL; | ||
800 | kfree(server->secmech.sdeschmacsha256); | ||
801 | server->secmech.sdeschmacsha256 = NULL; | ||
787 | kfree(server->secmech.sdeschmacmd5); | 802 | kfree(server->secmech.sdeschmacmd5); |
788 | 803 | server->secmech.sdeschmacmd5 = NULL; | |
789 | crypto_allocate_hmacmd5_sdesc_fail: | 804 | kfree(server->secmech.sdescmd5); |
790 | crypto_free_shash(server->secmech.hmacsha256); | 805 | server->secmech.sdescmd5 = NULL; |
791 | |||
792 | crypto_allocate_hmacsha256_fail: | ||
793 | crypto_free_shash(server->secmech.md5); | ||
794 | |||
795 | crypto_allocate_md5_fail: | ||
796 | crypto_free_shash(server->secmech.hmacmd5); | ||
797 | |||
798 | return rc; | ||
799 | } | 806 | } |
diff --git a/fs/cifs/cifsfs.c b/fs/cifs/cifsfs.c index 3752b9f6d9e4..85ea98d139fc 100644 --- a/fs/cifs/cifsfs.c +++ b/fs/cifs/cifsfs.c | |||
@@ -147,18 +147,17 @@ cifs_read_super(struct super_block *sb) | |||
147 | goto out_no_root; | 147 | goto out_no_root; |
148 | } | 148 | } |
149 | 149 | ||
150 | if (cifs_sb_master_tcon(cifs_sb)->nocase) | ||
151 | sb->s_d_op = &cifs_ci_dentry_ops; | ||
152 | else | ||
153 | sb->s_d_op = &cifs_dentry_ops; | ||
154 | |||
150 | sb->s_root = d_make_root(inode); | 155 | sb->s_root = d_make_root(inode); |
151 | if (!sb->s_root) { | 156 | if (!sb->s_root) { |
152 | rc = -ENOMEM; | 157 | rc = -ENOMEM; |
153 | goto out_no_root; | 158 | goto out_no_root; |
154 | } | 159 | } |
155 | 160 | ||
156 | /* do that *after* d_make_root() - we want NULL ->d_op for root here */ | ||
157 | if (cifs_sb_master_tcon(cifs_sb)->nocase) | ||
158 | sb->s_d_op = &cifs_ci_dentry_ops; | ||
159 | else | ||
160 | sb->s_d_op = &cifs_dentry_ops; | ||
161 | |||
162 | #ifdef CONFIG_CIFS_NFSD_EXPORT | 161 | #ifdef CONFIG_CIFS_NFSD_EXPORT |
163 | if (cifs_sb->mnt_cifs_flags & CIFS_MOUNT_SERVER_INUM) { | 162 | if (cifs_sb->mnt_cifs_flags & CIFS_MOUNT_SERVER_INUM) { |
164 | cifs_dbg(FYI, "export ops supported\n"); | 163 | cifs_dbg(FYI, "export ops supported\n"); |
@@ -312,11 +311,14 @@ cifs_show_address(struct seq_file *s, struct TCP_Server_Info *server) | |||
312 | } | 311 | } |
313 | 312 | ||
314 | static void | 313 | static void |
315 | cifs_show_security(struct seq_file *s, struct TCP_Server_Info *server) | 314 | cifs_show_security(struct seq_file *s, struct cifs_ses *ses) |
316 | { | 315 | { |
316 | if (ses->sectype == Unspecified) | ||
317 | return; | ||
318 | |||
317 | seq_printf(s, ",sec="); | 319 | seq_printf(s, ",sec="); |
318 | 320 | ||
319 | switch (server->secType) { | 321 | switch (ses->sectype) { |
320 | case LANMAN: | 322 | case LANMAN: |
321 | seq_printf(s, "lanman"); | 323 | seq_printf(s, "lanman"); |
322 | break; | 324 | break; |
@@ -338,7 +340,7 @@ cifs_show_security(struct seq_file *s, struct TCP_Server_Info *server) | |||
338 | break; | 340 | break; |
339 | } | 341 | } |
340 | 342 | ||
341 | if (server->sec_mode & (SECMODE_SIGN_REQUIRED | SECMODE_SIGN_ENABLED)) | 343 | if (ses->sign) |
342 | seq_printf(s, "i"); | 344 | seq_printf(s, "i"); |
343 | } | 345 | } |
344 | 346 | ||
@@ -369,7 +371,7 @@ cifs_show_options(struct seq_file *s, struct dentry *root) | |||
369 | srcaddr = (struct sockaddr *)&tcon->ses->server->srcaddr; | 371 | srcaddr = (struct sockaddr *)&tcon->ses->server->srcaddr; |
370 | 372 | ||
371 | seq_printf(s, ",vers=%s", tcon->ses->server->vals->version_string); | 373 | seq_printf(s, ",vers=%s", tcon->ses->server->vals->version_string); |
372 | cifs_show_security(s, tcon->ses->server); | 374 | cifs_show_security(s, tcon->ses); |
373 | cifs_show_cache_flavor(s, cifs_sb); | 375 | cifs_show_cache_flavor(s, cifs_sb); |
374 | 376 | ||
375 | if (cifs_sb->mnt_cifs_flags & CIFS_MOUNT_MULTIUSER) | 377 | if (cifs_sb->mnt_cifs_flags & CIFS_MOUNT_MULTIUSER) |
@@ -765,7 +767,7 @@ static loff_t cifs_llseek(struct file *file, loff_t offset, int whence) | |||
765 | 767 | ||
766 | static int cifs_setlease(struct file *file, long arg, struct file_lock **lease) | 768 | static int cifs_setlease(struct file *file, long arg, struct file_lock **lease) |
767 | { | 769 | { |
768 | /* note that this is called by vfs setlease with lock_flocks held | 770 | /* note that this is called by vfs setlease with i_lock held |
769 | to protect *lease from going away */ | 771 | to protect *lease from going away */ |
770 | struct inode *inode = file_inode(file); | 772 | struct inode *inode = file_inode(file); |
771 | struct cifsFileInfo *cfile = file->private_data; | 773 | struct cifsFileInfo *cfile = file->private_data; |
@@ -968,7 +970,7 @@ const struct file_operations cifs_file_direct_nobrl_ops = { | |||
968 | }; | 970 | }; |
969 | 971 | ||
970 | const struct file_operations cifs_dir_ops = { | 972 | const struct file_operations cifs_dir_ops = { |
971 | .readdir = cifs_readdir, | 973 | .iterate = cifs_readdir, |
972 | .release = cifs_closedir, | 974 | .release = cifs_closedir, |
973 | .read = generic_read_dir, | 975 | .read = generic_read_dir, |
974 | .unlocked_ioctl = cifs_ioctl, | 976 | .unlocked_ioctl = cifs_ioctl, |
diff --git a/fs/cifs/cifsfs.h b/fs/cifs/cifsfs.h index 0e32c3446ce9..ea723a5e8226 100644 --- a/fs/cifs/cifsfs.h +++ b/fs/cifs/cifsfs.h | |||
@@ -101,7 +101,7 @@ extern int cifs_file_mmap(struct file * , struct vm_area_struct *); | |||
101 | extern int cifs_file_strict_mmap(struct file * , struct vm_area_struct *); | 101 | extern int cifs_file_strict_mmap(struct file * , struct vm_area_struct *); |
102 | extern const struct file_operations cifs_dir_ops; | 102 | extern const struct file_operations cifs_dir_ops; |
103 | extern int cifs_dir_open(struct inode *inode, struct file *file); | 103 | extern int cifs_dir_open(struct inode *inode, struct file *file); |
104 | extern int cifs_readdir(struct file *file, void *direntry, filldir_t filldir); | 104 | extern int cifs_readdir(struct file *file, struct dir_context *ctx); |
105 | 105 | ||
106 | /* Functions related to dir entries */ | 106 | /* Functions related to dir entries */ |
107 | extern const struct dentry_operations cifs_dentry_ops; | 107 | extern const struct dentry_operations cifs_dentry_ops; |
@@ -132,5 +132,5 @@ extern long cifs_ioctl(struct file *filep, unsigned int cmd, unsigned long arg); | |||
132 | extern const struct export_operations cifs_export_ops; | 132 | extern const struct export_operations cifs_export_ops; |
133 | #endif /* CONFIG_CIFS_NFSD_EXPORT */ | 133 | #endif /* CONFIG_CIFS_NFSD_EXPORT */ |
134 | 134 | ||
135 | #define CIFS_VERSION "2.0" | 135 | #define CIFS_VERSION "2.01" |
136 | #endif /* _CIFSFS_H */ | 136 | #endif /* _CIFSFS_H */ |
diff --git a/fs/cifs/cifsglob.h b/fs/cifs/cifsglob.h index 4f07f6fbe494..52ca861ed35e 100644 --- a/fs/cifs/cifsglob.h +++ b/fs/cifs/cifsglob.h | |||
@@ -44,6 +44,7 @@ | |||
44 | #define MAX_TREE_SIZE (2 + MAX_SERVER_SIZE + 1 + MAX_SHARE_SIZE + 1) | 44 | #define MAX_TREE_SIZE (2 + MAX_SERVER_SIZE + 1 + MAX_SHARE_SIZE + 1) |
45 | #define MAX_SERVER_SIZE 15 | 45 | #define MAX_SERVER_SIZE 15 |
46 | #define MAX_SHARE_SIZE 80 | 46 | #define MAX_SHARE_SIZE 80 |
47 | #define CIFS_MAX_DOMAINNAME_LEN 256 /* max domain name length */ | ||
47 | #define MAX_USERNAME_SIZE 256 /* reasonable maximum for current servers */ | 48 | #define MAX_USERNAME_SIZE 256 /* reasonable maximum for current servers */ |
48 | #define MAX_PASSWORD_SIZE 512 /* max for windows seems to be 256 wide chars */ | 49 | #define MAX_PASSWORD_SIZE 512 /* max for windows seems to be 256 wide chars */ |
49 | 50 | ||
@@ -101,20 +102,14 @@ enum statusEnum { | |||
101 | }; | 102 | }; |
102 | 103 | ||
103 | enum securityEnum { | 104 | enum securityEnum { |
104 | LANMAN = 0, /* Legacy LANMAN auth */ | 105 | Unspecified = 0, /* not specified */ |
106 | LANMAN, /* Legacy LANMAN auth */ | ||
105 | NTLM, /* Legacy NTLM012 auth with NTLM hash */ | 107 | NTLM, /* Legacy NTLM012 auth with NTLM hash */ |
106 | NTLMv2, /* Legacy NTLM auth with NTLMv2 hash */ | 108 | NTLMv2, /* Legacy NTLM auth with NTLMv2 hash */ |
107 | RawNTLMSSP, /* NTLMSSP without SPNEGO, NTLMv2 hash */ | 109 | RawNTLMSSP, /* NTLMSSP without SPNEGO, NTLMv2 hash */ |
108 | /* NTLMSSP, */ /* can use rawNTLMSSP instead of NTLMSSP via SPNEGO */ | ||
109 | Kerberos, /* Kerberos via SPNEGO */ | 110 | Kerberos, /* Kerberos via SPNEGO */ |
110 | }; | 111 | }; |
111 | 112 | ||
112 | enum protocolEnum { | ||
113 | TCP = 0, | ||
114 | SCTP | ||
115 | /* Netbios frames protocol not supported at this time */ | ||
116 | }; | ||
117 | |||
118 | struct session_key { | 113 | struct session_key { |
119 | unsigned int len; | 114 | unsigned int len; |
120 | char *response; | 115 | char *response; |
@@ -131,9 +126,11 @@ struct cifs_secmech { | |||
131 | struct crypto_shash *hmacmd5; /* hmac-md5 hash function */ | 126 | struct crypto_shash *hmacmd5; /* hmac-md5 hash function */ |
132 | struct crypto_shash *md5; /* md5 hash function */ | 127 | struct crypto_shash *md5; /* md5 hash function */ |
133 | struct crypto_shash *hmacsha256; /* hmac-sha256 hash function */ | 128 | struct crypto_shash *hmacsha256; /* hmac-sha256 hash function */ |
129 | struct crypto_shash *cmacaes; /* block-cipher based MAC function */ | ||
134 | struct sdesc *sdeschmacmd5; /* ctxt to generate ntlmv2 hash, CR1 */ | 130 | struct sdesc *sdeschmacmd5; /* ctxt to generate ntlmv2 hash, CR1 */ |
135 | struct sdesc *sdescmd5; /* ctxt to generate cifs/smb signature */ | 131 | struct sdesc *sdescmd5; /* ctxt to generate cifs/smb signature */ |
136 | struct sdesc *sdeschmacsha256; /* ctxt to generate smb2 signature */ | 132 | struct sdesc *sdeschmacsha256; /* ctxt to generate smb2 signature */ |
133 | struct sdesc *sdesccmacaes; /* ctxt to generate smb3 signature */ | ||
137 | }; | 134 | }; |
138 | 135 | ||
139 | /* per smb session structure/fields */ | 136 | /* per smb session structure/fields */ |
@@ -181,6 +178,7 @@ enum smb_version { | |||
181 | Smb_20, | 178 | Smb_20, |
182 | Smb_21, | 179 | Smb_21, |
183 | Smb_30, | 180 | Smb_30, |
181 | Smb_302, | ||
184 | }; | 182 | }; |
185 | 183 | ||
186 | struct mid_q_entry; | 184 | struct mid_q_entry; |
@@ -197,6 +195,7 @@ struct cifs_writedata; | |||
197 | struct cifs_io_parms; | 195 | struct cifs_io_parms; |
198 | struct cifs_search_info; | 196 | struct cifs_search_info; |
199 | struct cifsInodeInfo; | 197 | struct cifsInodeInfo; |
198 | struct cifs_open_parms; | ||
200 | 199 | ||
201 | struct smb_version_operations { | 200 | struct smb_version_operations { |
202 | int (*send_cancel)(struct TCP_Server_Info *, void *, | 201 | int (*send_cancel)(struct TCP_Server_Info *, void *, |
@@ -228,6 +227,7 @@ struct smb_version_operations { | |||
228 | void (*dump_detail)(void *); | 227 | void (*dump_detail)(void *); |
229 | void (*clear_stats)(struct cifs_tcon *); | 228 | void (*clear_stats)(struct cifs_tcon *); |
230 | void (*print_stats)(struct seq_file *m, struct cifs_tcon *); | 229 | void (*print_stats)(struct seq_file *m, struct cifs_tcon *); |
230 | void (*dump_share_caps)(struct seq_file *, struct cifs_tcon *); | ||
231 | /* verify the message */ | 231 | /* verify the message */ |
232 | int (*check_message)(char *, unsigned int); | 232 | int (*check_message)(char *, unsigned int); |
233 | bool (*is_oplock_break)(char *, struct TCP_Server_Info *); | 233 | bool (*is_oplock_break)(char *, struct TCP_Server_Info *); |
@@ -309,9 +309,8 @@ struct smb_version_operations { | |||
309 | const char *, const char *, | 309 | const char *, const char *, |
310 | struct cifs_sb_info *); | 310 | struct cifs_sb_info *); |
311 | /* open a file for non-posix mounts */ | 311 | /* open a file for non-posix mounts */ |
312 | int (*open)(const unsigned int, struct cifs_tcon *, const char *, int, | 312 | int (*open)(const unsigned int, struct cifs_open_parms *, |
313 | int, int, struct cifs_fid *, __u32 *, FILE_ALL_INFO *, | 313 | __u32 *, FILE_ALL_INFO *); |
314 | struct cifs_sb_info *); | ||
315 | /* set fid protocol-specific info */ | 314 | /* set fid protocol-specific info */ |
316 | void (*set_fid)(struct cifsFileInfo *, struct cifs_fid *, __u32); | 315 | void (*set_fid)(struct cifsFileInfo *, struct cifs_fid *, __u32); |
317 | /* close a file */ | 316 | /* close a file */ |
@@ -367,8 +366,13 @@ struct smb_version_operations { | |||
367 | void (*set_lease_key)(struct inode *, struct cifs_fid *fid); | 366 | void (*set_lease_key)(struct inode *, struct cifs_fid *fid); |
368 | /* generate new lease key */ | 367 | /* generate new lease key */ |
369 | void (*new_lease_key)(struct cifs_fid *fid); | 368 | void (*new_lease_key)(struct cifs_fid *fid); |
369 | /* The next two functions will need to be changed to per smb session */ | ||
370 | void (*generate_signingkey)(struct TCP_Server_Info *server); | ||
370 | int (*calc_signature)(struct smb_rqst *rqst, | 371 | int (*calc_signature)(struct smb_rqst *rqst, |
371 | struct TCP_Server_Info *server); | 372 | struct TCP_Server_Info *server); |
373 | int (*query_mf_symlink)(const unsigned char *path, char *pbuf, | ||
374 | unsigned int *pbytes_read, struct cifs_sb_info *cifs_sb, | ||
375 | unsigned int xid); | ||
372 | }; | 376 | }; |
373 | 377 | ||
374 | struct smb_version_values { | 378 | struct smb_version_values { |
@@ -387,6 +391,8 @@ struct smb_version_values { | |||
387 | unsigned int cap_nt_find; | 391 | unsigned int cap_nt_find; |
388 | unsigned int cap_large_files; | 392 | unsigned int cap_large_files; |
389 | unsigned int oplock_read; | 393 | unsigned int oplock_read; |
394 | __u16 signing_enabled; | ||
395 | __u16 signing_required; | ||
390 | }; | 396 | }; |
391 | 397 | ||
392 | #define HEADER_SIZE(server) (server->vals->header_size) | 398 | #define HEADER_SIZE(server) (server->vals->header_size) |
@@ -407,7 +413,8 @@ struct smb_vol { | |||
407 | kgid_t backupgid; | 413 | kgid_t backupgid; |
408 | umode_t file_mode; | 414 | umode_t file_mode; |
409 | umode_t dir_mode; | 415 | umode_t dir_mode; |
410 | unsigned secFlg; | 416 | enum securityEnum sectype; /* sectype requested via mnt opts */ |
417 | bool sign; /* was signing requested via mnt opts? */ | ||
411 | bool retry:1; | 418 | bool retry:1; |
412 | bool intr:1; | 419 | bool intr:1; |
413 | bool setuids:1; | 420 | bool setuids:1; |
@@ -441,6 +448,7 @@ struct smb_vol { | |||
441 | bool mfsymlinks:1; /* use Minshall+French Symlinks */ | 448 | bool mfsymlinks:1; /* use Minshall+French Symlinks */ |
442 | bool multiuser:1; | 449 | bool multiuser:1; |
443 | bool rwpidforward:1; /* pid forward for read/write operations */ | 450 | bool rwpidforward:1; /* pid forward for read/write operations */ |
451 | bool nosharesock; | ||
444 | unsigned int rsize; | 452 | unsigned int rsize; |
445 | unsigned int wsize; | 453 | unsigned int wsize; |
446 | bool sockopt_tcp_nodelay:1; | 454 | bool sockopt_tcp_nodelay:1; |
@@ -514,6 +522,7 @@ struct TCP_Server_Info { | |||
514 | struct task_struct *tsk; | 522 | struct task_struct *tsk; |
515 | char server_GUID[16]; | 523 | char server_GUID[16]; |
516 | __u16 sec_mode; | 524 | __u16 sec_mode; |
525 | bool sign; /* is signing enabled on this connection? */ | ||
517 | bool session_estab; /* mark when very first sess is established */ | 526 | bool session_estab; /* mark when very first sess is established */ |
518 | #ifdef CONFIG_CIFS_SMB2 | 527 | #ifdef CONFIG_CIFS_SMB2 |
519 | int echo_credits; /* echo reserved slots */ | 528 | int echo_credits; /* echo reserved slots */ |
@@ -521,7 +530,6 @@ struct TCP_Server_Info { | |||
521 | bool echoes:1; /* enable echoes */ | 530 | bool echoes:1; /* enable echoes */ |
522 | #endif | 531 | #endif |
523 | u16 dialect; /* dialect index that server chose */ | 532 | u16 dialect; /* dialect index that server chose */ |
524 | enum securityEnum secType; | ||
525 | bool oplocks:1; /* enable oplocks */ | 533 | bool oplocks:1; /* enable oplocks */ |
526 | unsigned int maxReq; /* Clients should submit no more */ | 534 | unsigned int maxReq; /* Clients should submit no more */ |
527 | /* than maxReq distinct unanswered SMBs to the server when using */ | 535 | /* than maxReq distinct unanswered SMBs to the server when using */ |
@@ -540,12 +548,17 @@ struct TCP_Server_Info { | |||
540 | int timeAdj; /* Adjust for difference in server time zone in sec */ | 548 | int timeAdj; /* Adjust for difference in server time zone in sec */ |
541 | __u64 CurrentMid; /* multiplex id - rotating counter */ | 549 | __u64 CurrentMid; /* multiplex id - rotating counter */ |
542 | char cryptkey[CIFS_CRYPTO_KEY_SIZE]; /* used by ntlm, ntlmv2 etc */ | 550 | char cryptkey[CIFS_CRYPTO_KEY_SIZE]; /* used by ntlm, ntlmv2 etc */ |
551 | char smb3signingkey[SMB3_SIGN_KEY_SIZE]; /* for signing smb3 packets */ | ||
543 | /* 16th byte of RFC1001 workstation name is always null */ | 552 | /* 16th byte of RFC1001 workstation name is always null */ |
544 | char workstation_RFC1001_name[RFC1001_NAME_LEN_WITH_NULL]; | 553 | char workstation_RFC1001_name[RFC1001_NAME_LEN_WITH_NULL]; |
545 | __u32 sequence_number; /* for signing, protected by srv_mutex */ | 554 | __u32 sequence_number; /* for signing, protected by srv_mutex */ |
546 | struct session_key session_key; | 555 | struct session_key session_key; |
547 | unsigned long lstrp; /* when we got last response from this server */ | 556 | unsigned long lstrp; /* when we got last response from this server */ |
548 | struct cifs_secmech secmech; /* crypto sec mech functs, descriptors */ | 557 | struct cifs_secmech secmech; /* crypto sec mech functs, descriptors */ |
558 | #define CIFS_NEGFLAVOR_LANMAN 0 /* wct == 13, LANMAN */ | ||
559 | #define CIFS_NEGFLAVOR_UNENCAP 1 /* wct == 17, but no ext_sec */ | ||
560 | #define CIFS_NEGFLAVOR_EXTENDED 2 /* wct == 17, ext_sec bit set */ | ||
561 | char negflavor; /* NEGOTIATE response flavor */ | ||
549 | /* extended security flavors that server supports */ | 562 | /* extended security flavors that server supports */ |
550 | bool sec_ntlmssp; /* supports NTLMSSP */ | 563 | bool sec_ntlmssp; /* supports NTLMSSP */ |
551 | bool sec_kerberosu2u; /* supports U2U Kerberos */ | 564 | bool sec_kerberosu2u; /* supports U2U Kerberos */ |
@@ -697,7 +710,6 @@ struct cifs_ses { | |||
697 | enum statusEnum status; | 710 | enum statusEnum status; |
698 | unsigned overrideSecFlg; /* if non-zero override global sec flags */ | 711 | unsigned overrideSecFlg; /* if non-zero override global sec flags */ |
699 | __u16 ipc_tid; /* special tid for connection to IPC share */ | 712 | __u16 ipc_tid; /* special tid for connection to IPC share */ |
700 | __u16 flags; | ||
701 | __u16 vcnum; | 713 | __u16 vcnum; |
702 | char *serverOS; /* name of operating system underlying server */ | 714 | char *serverOS; /* name of operating system underlying server */ |
703 | char *serverNOS; /* name of network operating system of server */ | 715 | char *serverNOS; /* name of network operating system of server */ |
@@ -714,21 +726,14 @@ struct cifs_ses { | |||
714 | char *password; | 726 | char *password; |
715 | struct session_key auth_key; | 727 | struct session_key auth_key; |
716 | struct ntlmssp_auth *ntlmssp; /* ciphertext, flags, server challenge */ | 728 | struct ntlmssp_auth *ntlmssp; /* ciphertext, flags, server challenge */ |
729 | enum securityEnum sectype; /* what security flavor was specified? */ | ||
730 | bool sign; /* is signing required? */ | ||
717 | bool need_reconnect:1; /* connection reset, uid now invalid */ | 731 | bool need_reconnect:1; /* connection reset, uid now invalid */ |
718 | #ifdef CONFIG_CIFS_SMB2 | 732 | #ifdef CONFIG_CIFS_SMB2 |
719 | __u16 session_flags; | 733 | __u16 session_flags; |
720 | #endif /* CONFIG_CIFS_SMB2 */ | 734 | #endif /* CONFIG_CIFS_SMB2 */ |
721 | }; | 735 | }; |
722 | 736 | ||
723 | /* no more than one of the following three session flags may be set */ | ||
724 | #define CIFS_SES_NT4 1 | ||
725 | #define CIFS_SES_OS2 2 | ||
726 | #define CIFS_SES_W9X 4 | ||
727 | /* following flag is set for old servers such as OS2 (and Win95?) | ||
728 | which do not negotiate NTLM or POSIX dialects, but instead | ||
729 | negotiate one of the older LANMAN dialects */ | ||
730 | #define CIFS_SES_LANMAN 8 | ||
731 | |||
732 | static inline bool | 737 | static inline bool |
733 | cap_unix(struct cifs_ses *ses) | 738 | cap_unix(struct cifs_ses *ses) |
734 | { | 739 | { |
@@ -816,7 +821,7 @@ struct cifs_tcon { | |||
816 | #ifdef CONFIG_CIFS_SMB2 | 821 | #ifdef CONFIG_CIFS_SMB2 |
817 | bool print:1; /* set if connection to printer share */ | 822 | bool print:1; /* set if connection to printer share */ |
818 | bool bad_network_name:1; /* set if ret status STATUS_BAD_NETWORK_NAME */ | 823 | bool bad_network_name:1; /* set if ret status STATUS_BAD_NETWORK_NAME */ |
819 | __u32 capabilities; | 824 | __le32 capabilities; |
820 | __u32 share_flags; | 825 | __u32 share_flags; |
821 | __u32 maximal_access; | 826 | __u32 maximal_access; |
822 | __u32 vol_serial_number; | 827 | __u32 vol_serial_number; |
@@ -911,6 +916,17 @@ struct cifs_search_info { | |||
911 | bool smallBuf:1; /* so we know which buf_release function to call */ | 916 | bool smallBuf:1; /* so we know which buf_release function to call */ |
912 | }; | 917 | }; |
913 | 918 | ||
919 | struct cifs_open_parms { | ||
920 | struct cifs_tcon *tcon; | ||
921 | struct cifs_sb_info *cifs_sb; | ||
922 | int disposition; | ||
923 | int desired_access; | ||
924 | int create_options; | ||
925 | const char *path; | ||
926 | struct cifs_fid *fid; | ||
927 | bool reconnect:1; | ||
928 | }; | ||
929 | |||
914 | struct cifs_fid { | 930 | struct cifs_fid { |
915 | __u16 netfid; | 931 | __u16 netfid; |
916 | #ifdef CONFIG_CIFS_SMB2 | 932 | #ifdef CONFIG_CIFS_SMB2 |
@@ -1348,7 +1364,7 @@ require use of the stronger protocol */ | |||
1348 | #define CIFSSEC_MUST_SEAL 0x40040 /* not supported yet */ | 1364 | #define CIFSSEC_MUST_SEAL 0x40040 /* not supported yet */ |
1349 | #define CIFSSEC_MUST_NTLMSSP 0x80080 /* raw ntlmssp with ntlmv2 */ | 1365 | #define CIFSSEC_MUST_NTLMSSP 0x80080 /* raw ntlmssp with ntlmv2 */ |
1350 | 1366 | ||
1351 | #define CIFSSEC_DEF (CIFSSEC_MAY_SIGN | CIFSSEC_MAY_NTLMSSP) | 1367 | #define CIFSSEC_DEF (CIFSSEC_MAY_SIGN | CIFSSEC_MAY_NTLMV2 | CIFSSEC_MAY_NTLMSSP) |
1352 | #define CIFSSEC_MAX (CIFSSEC_MUST_SIGN | CIFSSEC_MUST_NTLMV2) | 1368 | #define CIFSSEC_MAX (CIFSSEC_MUST_SIGN | CIFSSEC_MUST_NTLMV2) |
1353 | #define CIFSSEC_AUTH_MASK (CIFSSEC_MAY_NTLM | CIFSSEC_MAY_NTLMV2 | CIFSSEC_MAY_LANMAN | CIFSSEC_MAY_PLNTXT | CIFSSEC_MAY_KRB5 | CIFSSEC_MAY_NTLMSSP) | 1369 | #define CIFSSEC_AUTH_MASK (CIFSSEC_MAY_NTLM | CIFSSEC_MAY_NTLMV2 | CIFSSEC_MAY_LANMAN | CIFSSEC_MAY_PLNTXT | CIFSSEC_MAY_KRB5 | CIFSSEC_MAY_NTLMSSP) |
1354 | /* | 1370 | /* |
@@ -1494,4 +1510,7 @@ extern struct smb_version_values smb21_values; | |||
1494 | #define SMB30_VERSION_STRING "3.0" | 1510 | #define SMB30_VERSION_STRING "3.0" |
1495 | extern struct smb_version_operations smb30_operations; | 1511 | extern struct smb_version_operations smb30_operations; |
1496 | extern struct smb_version_values smb30_values; | 1512 | extern struct smb_version_values smb30_values; |
1513 | #define SMB302_VERSION_STRING "3.02" | ||
1514 | /*extern struct smb_version_operations smb302_operations;*/ /* not needed yet */ | ||
1515 | extern struct smb_version_values smb302_values; | ||
1497 | #endif /* _CIFS_GLOB_H */ | 1516 | #endif /* _CIFS_GLOB_H */ |
diff --git a/fs/cifs/cifspdu.h b/fs/cifs/cifspdu.h index e996ff6b26d1..11ca24a8e054 100644 --- a/fs/cifs/cifspdu.h +++ b/fs/cifs/cifspdu.h | |||
@@ -142,6 +142,11 @@ | |||
142 | */ | 142 | */ |
143 | #define CIFS_SESS_KEY_SIZE (16) | 143 | #define CIFS_SESS_KEY_SIZE (16) |
144 | 144 | ||
145 | /* | ||
146 | * Size of the smb3 signing key | ||
147 | */ | ||
148 | #define SMB3_SIGN_KEY_SIZE (16) | ||
149 | |||
145 | #define CIFS_CLIENT_CHALLENGE_SIZE (8) | 150 | #define CIFS_CLIENT_CHALLENGE_SIZE (8) |
146 | #define CIFS_SERVER_CHALLENGE_SIZE (8) | 151 | #define CIFS_SERVER_CHALLENGE_SIZE (8) |
147 | #define CIFS_HMAC_MD5_HASH_SIZE (16) | 152 | #define CIFS_HMAC_MD5_HASH_SIZE (16) |
@@ -531,7 +536,7 @@ typedef struct lanman_neg_rsp { | |||
531 | #define READ_RAW_ENABLE 1 | 536 | #define READ_RAW_ENABLE 1 |
532 | #define WRITE_RAW_ENABLE 2 | 537 | #define WRITE_RAW_ENABLE 2 |
533 | #define RAW_ENABLE (READ_RAW_ENABLE | WRITE_RAW_ENABLE) | 538 | #define RAW_ENABLE (READ_RAW_ENABLE | WRITE_RAW_ENABLE) |
534 | 539 | #define SMB1_CLIENT_GUID_SIZE (16) | |
535 | typedef struct negotiate_rsp { | 540 | typedef struct negotiate_rsp { |
536 | struct smb_hdr hdr; /* wct = 17 */ | 541 | struct smb_hdr hdr; /* wct = 17 */ |
537 | __le16 DialectIndex; /* 0xFFFF = no dialect acceptable */ | 542 | __le16 DialectIndex; /* 0xFFFF = no dialect acceptable */ |
@@ -553,7 +558,7 @@ typedef struct negotiate_rsp { | |||
553 | /* followed by 16 bytes of server GUID */ | 558 | /* followed by 16 bytes of server GUID */ |
554 | /* then security blob if cap_extended_security negotiated */ | 559 | /* then security blob if cap_extended_security negotiated */ |
555 | struct { | 560 | struct { |
556 | unsigned char GUID[16]; | 561 | unsigned char GUID[SMB1_CLIENT_GUID_SIZE]; |
557 | unsigned char SecurityBlob[1]; | 562 | unsigned char SecurityBlob[1]; |
558 | } __attribute__((packed)) extended_response; | 563 | } __attribute__((packed)) extended_response; |
559 | } __attribute__((packed)) u; | 564 | } __attribute__((packed)) u; |
@@ -1315,6 +1320,14 @@ typedef struct smb_com_ntransact_rsp { | |||
1315 | /* parms and data follow */ | 1320 | /* parms and data follow */ |
1316 | } __attribute__((packed)) NTRANSACT_RSP; | 1321 | } __attribute__((packed)) NTRANSACT_RSP; |
1317 | 1322 | ||
1323 | /* See MS-SMB 2.2.7.2.1.1 */ | ||
1324 | struct srv_copychunk { | ||
1325 | __le64 SourceOffset; | ||
1326 | __le64 DestinationOffset; | ||
1327 | __le32 CopyLength; | ||
1328 | __u32 Reserved; | ||
1329 | } __packed; | ||
1330 | |||
1318 | typedef struct smb_com_transaction_ioctl_req { | 1331 | typedef struct smb_com_transaction_ioctl_req { |
1319 | struct smb_hdr hdr; /* wct = 23 */ | 1332 | struct smb_hdr hdr; /* wct = 23 */ |
1320 | __u8 MaxSetupCount; | 1333 | __u8 MaxSetupCount; |
diff --git a/fs/cifs/cifsproto.h b/fs/cifs/cifsproto.h index dda188a94332..b29a012bed33 100644 --- a/fs/cifs/cifsproto.h +++ b/fs/cifs/cifsproto.h | |||
@@ -118,6 +118,8 @@ extern void header_assemble(struct smb_hdr *, char /* command */ , | |||
118 | extern int small_smb_init_no_tc(const int smb_cmd, const int wct, | 118 | extern int small_smb_init_no_tc(const int smb_cmd, const int wct, |
119 | struct cifs_ses *ses, | 119 | struct cifs_ses *ses, |
120 | void **request_buf); | 120 | void **request_buf); |
121 | extern enum securityEnum select_sectype(struct TCP_Server_Info *server, | ||
122 | enum securityEnum requested); | ||
121 | extern int CIFS_SessSetup(const unsigned int xid, struct cifs_ses *ses, | 123 | extern int CIFS_SessSetup(const unsigned int xid, struct cifs_ses *ses, |
122 | const struct nls_table *nls_cp); | 124 | const struct nls_table *nls_cp); |
123 | extern struct timespec cifs_NTtimeToUnix(__le64 utc_nanoseconds_since_1601); | 125 | extern struct timespec cifs_NTtimeToUnix(__le64 utc_nanoseconds_since_1601); |
@@ -212,6 +214,7 @@ extern int cifs_negotiate_protocol(const unsigned int xid, | |||
212 | struct cifs_ses *ses); | 214 | struct cifs_ses *ses); |
213 | extern int cifs_setup_session(const unsigned int xid, struct cifs_ses *ses, | 215 | extern int cifs_setup_session(const unsigned int xid, struct cifs_ses *ses, |
214 | struct nls_table *nls_info); | 216 | struct nls_table *nls_info); |
217 | extern int cifs_enable_signing(struct TCP_Server_Info *server, bool mnt_sign_required); | ||
215 | extern int CIFSSMBNegotiate(const unsigned int xid, struct cifs_ses *ses); | 218 | extern int CIFSSMBNegotiate(const unsigned int xid, struct cifs_ses *ses); |
216 | 219 | ||
217 | extern int CIFSTCon(const unsigned int xid, struct cifs_ses *ses, | 220 | extern int CIFSTCon(const unsigned int xid, struct cifs_ses *ses, |
@@ -430,9 +433,9 @@ extern int SMBNTencrypt(unsigned char *, unsigned char *, unsigned char *, | |||
430 | const struct nls_table *); | 433 | const struct nls_table *); |
431 | extern int setup_ntlm_response(struct cifs_ses *, const struct nls_table *); | 434 | extern int setup_ntlm_response(struct cifs_ses *, const struct nls_table *); |
432 | extern int setup_ntlmv2_rsp(struct cifs_ses *, const struct nls_table *); | 435 | extern int setup_ntlmv2_rsp(struct cifs_ses *, const struct nls_table *); |
433 | extern int cifs_crypto_shash_allocate(struct TCP_Server_Info *); | ||
434 | extern void cifs_crypto_shash_release(struct TCP_Server_Info *); | 436 | extern void cifs_crypto_shash_release(struct TCP_Server_Info *); |
435 | extern int calc_seckey(struct cifs_ses *); | 437 | extern int calc_seckey(struct cifs_ses *); |
438 | extern void generate_smb3signingkey(struct TCP_Server_Info *); | ||
436 | 439 | ||
437 | #ifdef CONFIG_CIFS_WEAK_PW_HASH | 440 | #ifdef CONFIG_CIFS_WEAK_PW_HASH |
438 | extern int calc_lanman_hash(const char *password, const char *cryptkey, | 441 | extern int calc_lanman_hash(const char *password, const char *cryptkey, |
@@ -494,5 +497,7 @@ void cifs_writev_complete(struct work_struct *work); | |||
494 | struct cifs_writedata *cifs_writedata_alloc(unsigned int nr_pages, | 497 | struct cifs_writedata *cifs_writedata_alloc(unsigned int nr_pages, |
495 | work_func_t complete); | 498 | work_func_t complete); |
496 | void cifs_writedata_release(struct kref *refcount); | 499 | void cifs_writedata_release(struct kref *refcount); |
497 | 500 | int open_query_close_cifs_symlink(const unsigned char *path, char *pbuf, | |
501 | unsigned int *pbytes_read, struct cifs_sb_info *cifs_sb, | ||
502 | unsigned int xid); | ||
498 | #endif /* _CIFSPROTO_H */ | 503 | #endif /* _CIFSPROTO_H */ |
diff --git a/fs/cifs/cifssmb.c b/fs/cifs/cifssmb.c index a58dc77cc443..a89c4cb4e6cf 100644 --- a/fs/cifs/cifssmb.c +++ b/fs/cifs/cifssmb.c | |||
@@ -367,6 +367,185 @@ vt2_err: | |||
367 | return -EINVAL; | 367 | return -EINVAL; |
368 | } | 368 | } |
369 | 369 | ||
370 | static int | ||
371 | decode_ext_sec_blob(struct cifs_ses *ses, NEGOTIATE_RSP *pSMBr) | ||
372 | { | ||
373 | int rc = 0; | ||
374 | u16 count; | ||
375 | char *guid = pSMBr->u.extended_response.GUID; | ||
376 | struct TCP_Server_Info *server = ses->server; | ||
377 | |||
378 | count = get_bcc(&pSMBr->hdr); | ||
379 | if (count < SMB1_CLIENT_GUID_SIZE) | ||
380 | return -EIO; | ||
381 | |||
382 | spin_lock(&cifs_tcp_ses_lock); | ||
383 | if (server->srv_count > 1) { | ||
384 | spin_unlock(&cifs_tcp_ses_lock); | ||
385 | if (memcmp(server->server_GUID, guid, SMB1_CLIENT_GUID_SIZE) != 0) { | ||
386 | cifs_dbg(FYI, "server UID changed\n"); | ||
387 | memcpy(server->server_GUID, guid, SMB1_CLIENT_GUID_SIZE); | ||
388 | } | ||
389 | } else { | ||
390 | spin_unlock(&cifs_tcp_ses_lock); | ||
391 | memcpy(server->server_GUID, guid, SMB1_CLIENT_GUID_SIZE); | ||
392 | } | ||
393 | |||
394 | if (count == SMB1_CLIENT_GUID_SIZE) { | ||
395 | server->sec_ntlmssp = true; | ||
396 | } else { | ||
397 | count -= SMB1_CLIENT_GUID_SIZE; | ||
398 | rc = decode_negTokenInit( | ||
399 | pSMBr->u.extended_response.SecurityBlob, count, server); | ||
400 | if (rc != 1) | ||
401 | return -EINVAL; | ||
402 | } | ||
403 | |||
404 | return 0; | ||
405 | } | ||
406 | |||
407 | int | ||
408 | cifs_enable_signing(struct TCP_Server_Info *server, bool mnt_sign_required) | ||
409 | { | ||
410 | bool srv_sign_required = server->sec_mode & server->vals->signing_required; | ||
411 | bool srv_sign_enabled = server->sec_mode & server->vals->signing_enabled; | ||
412 | bool mnt_sign_enabled = global_secflags & CIFSSEC_MAY_SIGN; | ||
413 | |||
414 | /* | ||
415 | * Is signing required by mnt options? If not then check | ||
416 | * global_secflags to see if it is there. | ||
417 | */ | ||
418 | if (!mnt_sign_required) | ||
419 | mnt_sign_required = ((global_secflags & CIFSSEC_MUST_SIGN) == | ||
420 | CIFSSEC_MUST_SIGN); | ||
421 | |||
422 | /* | ||
423 | * If signing is required then it's automatically enabled too, | ||
424 | * otherwise, check to see if the secflags allow it. | ||
425 | */ | ||
426 | mnt_sign_enabled = mnt_sign_required ? mnt_sign_required : | ||
427 | (global_secflags & CIFSSEC_MAY_SIGN); | ||
428 | |||
429 | /* If server requires signing, does client allow it? */ | ||
430 | if (srv_sign_required) { | ||
431 | if (!mnt_sign_enabled) { | ||
432 | cifs_dbg(VFS, "Server requires signing, but it's disabled in SecurityFlags!"); | ||
433 | return -ENOTSUPP; | ||
434 | } | ||
435 | server->sign = true; | ||
436 | } | ||
437 | |||
438 | /* If client requires signing, does server allow it? */ | ||
439 | if (mnt_sign_required) { | ||
440 | if (!srv_sign_enabled) { | ||
441 | cifs_dbg(VFS, "Server does not support signing!"); | ||
442 | return -ENOTSUPP; | ||
443 | } | ||
444 | server->sign = true; | ||
445 | } | ||
446 | |||
447 | return 0; | ||
448 | } | ||
449 | |||
450 | #ifdef CONFIG_CIFS_WEAK_PW_HASH | ||
451 | static int | ||
452 | decode_lanman_negprot_rsp(struct TCP_Server_Info *server, NEGOTIATE_RSP *pSMBr) | ||
453 | { | ||
454 | __s16 tmp; | ||
455 | struct lanman_neg_rsp *rsp = (struct lanman_neg_rsp *)pSMBr; | ||
456 | |||
457 | if (server->dialect != LANMAN_PROT && server->dialect != LANMAN2_PROT) | ||
458 | return -EOPNOTSUPP; | ||
459 | |||
460 | server->sec_mode = le16_to_cpu(rsp->SecurityMode); | ||
461 | server->maxReq = min_t(unsigned int, | ||
462 | le16_to_cpu(rsp->MaxMpxCount), | ||
463 | cifs_max_pending); | ||
464 | set_credits(server, server->maxReq); | ||
465 | server->maxBuf = le16_to_cpu(rsp->MaxBufSize); | ||
466 | server->max_vcs = le16_to_cpu(rsp->MaxNumberVcs); | ||
467 | /* even though we do not use raw we might as well set this | ||
468 | accurately, in case we ever find a need for it */ | ||
469 | if ((le16_to_cpu(rsp->RawMode) & RAW_ENABLE) == RAW_ENABLE) { | ||
470 | server->max_rw = 0xFF00; | ||
471 | server->capabilities = CAP_MPX_MODE | CAP_RAW_MODE; | ||
472 | } else { | ||
473 | server->max_rw = 0;/* do not need to use raw anyway */ | ||
474 | server->capabilities = CAP_MPX_MODE; | ||
475 | } | ||
476 | tmp = (__s16)le16_to_cpu(rsp->ServerTimeZone); | ||
477 | if (tmp == -1) { | ||
478 | /* OS/2 often does not set timezone therefore | ||
479 | * we must use server time to calc time zone. | ||
480 | * Could deviate slightly from the right zone. | ||
481 | * Smallest defined timezone difference is 15 minutes | ||
482 | * (i.e. Nepal). Rounding up/down is done to match | ||
483 | * this requirement. | ||
484 | */ | ||
485 | int val, seconds, remain, result; | ||
486 | struct timespec ts, utc; | ||
487 | utc = CURRENT_TIME; | ||
488 | ts = cnvrtDosUnixTm(rsp->SrvTime.Date, | ||
489 | rsp->SrvTime.Time, 0); | ||
490 | cifs_dbg(FYI, "SrvTime %d sec since 1970 (utc: %d) diff: %d\n", | ||
491 | (int)ts.tv_sec, (int)utc.tv_sec, | ||
492 | (int)(utc.tv_sec - ts.tv_sec)); | ||
493 | val = (int)(utc.tv_sec - ts.tv_sec); | ||
494 | seconds = abs(val); | ||
495 | result = (seconds / MIN_TZ_ADJ) * MIN_TZ_ADJ; | ||
496 | remain = seconds % MIN_TZ_ADJ; | ||
497 | if (remain >= (MIN_TZ_ADJ / 2)) | ||
498 | result += MIN_TZ_ADJ; | ||
499 | if (val < 0) | ||
500 | result = -result; | ||
501 | server->timeAdj = result; | ||
502 | } else { | ||
503 | server->timeAdj = (int)tmp; | ||
504 | server->timeAdj *= 60; /* also in seconds */ | ||
505 | } | ||
506 | cifs_dbg(FYI, "server->timeAdj: %d seconds\n", server->timeAdj); | ||
507 | |||
508 | |||
509 | /* BB get server time for time conversions and add | ||
510 | code to use it and timezone since this is not UTC */ | ||
511 | |||
512 | if (rsp->EncryptionKeyLength == | ||
513 | cpu_to_le16(CIFS_CRYPTO_KEY_SIZE)) { | ||
514 | memcpy(server->cryptkey, rsp->EncryptionKey, | ||
515 | CIFS_CRYPTO_KEY_SIZE); | ||
516 | } else if (server->sec_mode & SECMODE_PW_ENCRYPT) { | ||
517 | return -EIO; /* need cryptkey unless plain text */ | ||
518 | } | ||
519 | |||
520 | cifs_dbg(FYI, "LANMAN negotiated\n"); | ||
521 | return 0; | ||
522 | } | ||
523 | #else | ||
524 | static inline int | ||
525 | decode_lanman_negprot_rsp(struct TCP_Server_Info *server, NEGOTIATE_RSP *pSMBr) | ||
526 | { | ||
527 | cifs_dbg(VFS, "mount failed, cifs module not built with CIFS_WEAK_PW_HASH support\n"); | ||
528 | return -EOPNOTSUPP; | ||
529 | } | ||
530 | #endif | ||
531 | |||
532 | static bool | ||
533 | should_set_ext_sec_flag(enum securityEnum sectype) | ||
534 | { | ||
535 | switch (sectype) { | ||
536 | case RawNTLMSSP: | ||
537 | case Kerberos: | ||
538 | return true; | ||
539 | case Unspecified: | ||
540 | if (global_secflags & | ||
541 | (CIFSSEC_MAY_KRB5 | CIFSSEC_MAY_NTLMSSP)) | ||
542 | return true; | ||
543 | /* Fallthrough */ | ||
544 | default: | ||
545 | return false; | ||
546 | } | ||
547 | } | ||
548 | |||
370 | int | 549 | int |
371 | CIFSSMBNegotiate(const unsigned int xid, struct cifs_ses *ses) | 550 | CIFSSMBNegotiate(const unsigned int xid, struct cifs_ses *ses) |
372 | { | 551 | { |
@@ -375,41 +554,24 @@ CIFSSMBNegotiate(const unsigned int xid, struct cifs_ses *ses) | |||
375 | int rc = 0; | 554 | int rc = 0; |
376 | int bytes_returned; | 555 | int bytes_returned; |
377 | int i; | 556 | int i; |
378 | struct TCP_Server_Info *server; | 557 | struct TCP_Server_Info *server = ses->server; |
379 | u16 count; | 558 | u16 count; |
380 | unsigned int secFlags; | ||
381 | 559 | ||
382 | if (ses->server) | 560 | if (!server) { |
383 | server = ses->server; | 561 | WARN(1, "%s: server is NULL!\n", __func__); |
384 | else { | 562 | return -EIO; |
385 | rc = -EIO; | ||
386 | return rc; | ||
387 | } | 563 | } |
564 | |||
388 | rc = smb_init(SMB_COM_NEGOTIATE, 0, NULL /* no tcon yet */ , | 565 | rc = smb_init(SMB_COM_NEGOTIATE, 0, NULL /* no tcon yet */ , |
389 | (void **) &pSMB, (void **) &pSMBr); | 566 | (void **) &pSMB, (void **) &pSMBr); |
390 | if (rc) | 567 | if (rc) |
391 | return rc; | 568 | return rc; |
392 | 569 | ||
393 | /* if any of auth flags (ie not sign or seal) are overriden use them */ | ||
394 | if (ses->overrideSecFlg & (~(CIFSSEC_MUST_SIGN | CIFSSEC_MUST_SEAL))) | ||
395 | secFlags = ses->overrideSecFlg; /* BB FIXME fix sign flags? */ | ||
396 | else /* if override flags set only sign/seal OR them with global auth */ | ||
397 | secFlags = global_secflags | ses->overrideSecFlg; | ||
398 | |||
399 | cifs_dbg(FYI, "secFlags 0x%x\n", secFlags); | ||
400 | |||
401 | pSMB->hdr.Mid = get_next_mid(server); | 570 | pSMB->hdr.Mid = get_next_mid(server); |
402 | pSMB->hdr.Flags2 |= (SMBFLG2_UNICODE | SMBFLG2_ERR_STATUS); | 571 | pSMB->hdr.Flags2 |= (SMBFLG2_UNICODE | SMBFLG2_ERR_STATUS); |
403 | 572 | ||
404 | if ((secFlags & CIFSSEC_MUST_KRB5) == CIFSSEC_MUST_KRB5) | 573 | if (should_set_ext_sec_flag(ses->sectype)) { |
405 | pSMB->hdr.Flags2 |= SMBFLG2_EXT_SEC; | 574 | cifs_dbg(FYI, "Requesting extended security."); |
406 | else if ((secFlags & CIFSSEC_AUTH_MASK) == CIFSSEC_MAY_KRB5) { | ||
407 | cifs_dbg(FYI, "Kerberos only mechanism, enable extended security\n"); | ||
408 | pSMB->hdr.Flags2 |= SMBFLG2_EXT_SEC; | ||
409 | } else if ((secFlags & CIFSSEC_MUST_NTLMSSP) == CIFSSEC_MUST_NTLMSSP) | ||
410 | pSMB->hdr.Flags2 |= SMBFLG2_EXT_SEC; | ||
411 | else if ((secFlags & CIFSSEC_AUTH_MASK) == CIFSSEC_MAY_NTLMSSP) { | ||
412 | cifs_dbg(FYI, "NTLMSSP only mechanism, enable extended security\n"); | ||
413 | pSMB->hdr.Flags2 |= SMBFLG2_EXT_SEC; | 575 | pSMB->hdr.Flags2 |= SMBFLG2_EXT_SEC; |
414 | } | 576 | } |
415 | 577 | ||
@@ -436,127 +598,21 @@ CIFSSMBNegotiate(const unsigned int xid, struct cifs_ses *ses) | |||
436 | could not negotiate a common dialect */ | 598 | could not negotiate a common dialect */ |
437 | rc = -EOPNOTSUPP; | 599 | rc = -EOPNOTSUPP; |
438 | goto neg_err_exit; | 600 | goto neg_err_exit; |
439 | #ifdef CONFIG_CIFS_WEAK_PW_HASH | ||
440 | } else if ((pSMBr->hdr.WordCount == 13) | ||
441 | && ((server->dialect == LANMAN_PROT) | ||
442 | || (server->dialect == LANMAN2_PROT))) { | ||
443 | __s16 tmp; | ||
444 | struct lanman_neg_rsp *rsp = (struct lanman_neg_rsp *)pSMBr; | ||
445 | |||
446 | if ((secFlags & CIFSSEC_MAY_LANMAN) || | ||
447 | (secFlags & CIFSSEC_MAY_PLNTXT)) | ||
448 | server->secType = LANMAN; | ||
449 | else { | ||
450 | cifs_dbg(VFS, "mount failed weak security disabled in /proc/fs/cifs/SecurityFlags\n"); | ||
451 | rc = -EOPNOTSUPP; | ||
452 | goto neg_err_exit; | ||
453 | } | ||
454 | server->sec_mode = le16_to_cpu(rsp->SecurityMode); | ||
455 | server->maxReq = min_t(unsigned int, | ||
456 | le16_to_cpu(rsp->MaxMpxCount), | ||
457 | cifs_max_pending); | ||
458 | set_credits(server, server->maxReq); | ||
459 | server->maxBuf = le16_to_cpu(rsp->MaxBufSize); | ||
460 | server->max_vcs = le16_to_cpu(rsp->MaxNumberVcs); | ||
461 | /* even though we do not use raw we might as well set this | ||
462 | accurately, in case we ever find a need for it */ | ||
463 | if ((le16_to_cpu(rsp->RawMode) & RAW_ENABLE) == RAW_ENABLE) { | ||
464 | server->max_rw = 0xFF00; | ||
465 | server->capabilities = CAP_MPX_MODE | CAP_RAW_MODE; | ||
466 | } else { | ||
467 | server->max_rw = 0;/* do not need to use raw anyway */ | ||
468 | server->capabilities = CAP_MPX_MODE; | ||
469 | } | ||
470 | tmp = (__s16)le16_to_cpu(rsp->ServerTimeZone); | ||
471 | if (tmp == -1) { | ||
472 | /* OS/2 often does not set timezone therefore | ||
473 | * we must use server time to calc time zone. | ||
474 | * Could deviate slightly from the right zone. | ||
475 | * Smallest defined timezone difference is 15 minutes | ||
476 | * (i.e. Nepal). Rounding up/down is done to match | ||
477 | * this requirement. | ||
478 | */ | ||
479 | int val, seconds, remain, result; | ||
480 | struct timespec ts, utc; | ||
481 | utc = CURRENT_TIME; | ||
482 | ts = cnvrtDosUnixTm(rsp->SrvTime.Date, | ||
483 | rsp->SrvTime.Time, 0); | ||
484 | cifs_dbg(FYI, "SrvTime %d sec since 1970 (utc: %d) diff: %d\n", | ||
485 | (int)ts.tv_sec, (int)utc.tv_sec, | ||
486 | (int)(utc.tv_sec - ts.tv_sec)); | ||
487 | val = (int)(utc.tv_sec - ts.tv_sec); | ||
488 | seconds = abs(val); | ||
489 | result = (seconds / MIN_TZ_ADJ) * MIN_TZ_ADJ; | ||
490 | remain = seconds % MIN_TZ_ADJ; | ||
491 | if (remain >= (MIN_TZ_ADJ / 2)) | ||
492 | result += MIN_TZ_ADJ; | ||
493 | if (val < 0) | ||
494 | result = -result; | ||
495 | server->timeAdj = result; | ||
496 | } else { | ||
497 | server->timeAdj = (int)tmp; | ||
498 | server->timeAdj *= 60; /* also in seconds */ | ||
499 | } | ||
500 | cifs_dbg(FYI, "server->timeAdj: %d seconds\n", server->timeAdj); | ||
501 | |||
502 | |||
503 | /* BB get server time for time conversions and add | ||
504 | code to use it and timezone since this is not UTC */ | ||
505 | |||
506 | if (rsp->EncryptionKeyLength == | ||
507 | cpu_to_le16(CIFS_CRYPTO_KEY_SIZE)) { | ||
508 | memcpy(ses->server->cryptkey, rsp->EncryptionKey, | ||
509 | CIFS_CRYPTO_KEY_SIZE); | ||
510 | } else if (server->sec_mode & SECMODE_PW_ENCRYPT) { | ||
511 | rc = -EIO; /* need cryptkey unless plain text */ | ||
512 | goto neg_err_exit; | ||
513 | } | ||
514 | |||
515 | cifs_dbg(FYI, "LANMAN negotiated\n"); | ||
516 | /* we will not end up setting signing flags - as no signing | ||
517 | was in LANMAN and server did not return the flags on */ | ||
518 | goto signing_check; | ||
519 | #else /* weak security disabled */ | ||
520 | } else if (pSMBr->hdr.WordCount == 13) { | 601 | } else if (pSMBr->hdr.WordCount == 13) { |
521 | cifs_dbg(VFS, "mount failed, cifs module not built with CIFS_WEAK_PW_HASH support\n"); | 602 | server->negflavor = CIFS_NEGFLAVOR_LANMAN; |
522 | rc = -EOPNOTSUPP; | 603 | rc = decode_lanman_negprot_rsp(server, pSMBr); |
523 | #endif /* WEAK_PW_HASH */ | 604 | goto signing_check; |
524 | goto neg_err_exit; | ||
525 | } else if (pSMBr->hdr.WordCount != 17) { | 605 | } else if (pSMBr->hdr.WordCount != 17) { |
526 | /* unknown wct */ | 606 | /* unknown wct */ |
527 | rc = -EOPNOTSUPP; | 607 | rc = -EOPNOTSUPP; |
528 | goto neg_err_exit; | 608 | goto neg_err_exit; |
529 | } | 609 | } |
530 | /* else wct == 17 NTLM */ | 610 | /* else wct == 17, NTLM or better */ |
611 | |||
531 | server->sec_mode = pSMBr->SecurityMode; | 612 | server->sec_mode = pSMBr->SecurityMode; |
532 | if ((server->sec_mode & SECMODE_USER) == 0) | 613 | if ((server->sec_mode & SECMODE_USER) == 0) |
533 | cifs_dbg(FYI, "share mode security\n"); | 614 | cifs_dbg(FYI, "share mode security\n"); |
534 | 615 | ||
535 | if ((server->sec_mode & SECMODE_PW_ENCRYPT) == 0) | ||
536 | #ifdef CONFIG_CIFS_WEAK_PW_HASH | ||
537 | if ((secFlags & CIFSSEC_MAY_PLNTXT) == 0) | ||
538 | #endif /* CIFS_WEAK_PW_HASH */ | ||
539 | cifs_dbg(VFS, "Server requests plain text password but client support disabled\n"); | ||
540 | |||
541 | if ((secFlags & CIFSSEC_MUST_NTLMV2) == CIFSSEC_MUST_NTLMV2) | ||
542 | server->secType = NTLMv2; | ||
543 | else if (secFlags & CIFSSEC_MAY_NTLM) | ||
544 | server->secType = NTLM; | ||
545 | else if (secFlags & CIFSSEC_MAY_NTLMV2) | ||
546 | server->secType = NTLMv2; | ||
547 | else if (secFlags & CIFSSEC_MAY_KRB5) | ||
548 | server->secType = Kerberos; | ||
549 | else if (secFlags & CIFSSEC_MAY_NTLMSSP) | ||
550 | server->secType = RawNTLMSSP; | ||
551 | else if (secFlags & CIFSSEC_MAY_LANMAN) | ||
552 | server->secType = LANMAN; | ||
553 | else { | ||
554 | rc = -EOPNOTSUPP; | ||
555 | cifs_dbg(VFS, "Invalid security type\n"); | ||
556 | goto neg_err_exit; | ||
557 | } | ||
558 | /* else ... any others ...? */ | ||
559 | |||
560 | /* one byte, so no need to convert this or EncryptionKeyLen from | 616 | /* one byte, so no need to convert this or EncryptionKeyLen from |
561 | little endian */ | 617 | little endian */ |
562 | server->maxReq = min_t(unsigned int, le16_to_cpu(pSMBr->MaxMpxCount), | 618 | server->maxReq = min_t(unsigned int, le16_to_cpu(pSMBr->MaxMpxCount), |
@@ -569,90 +625,26 @@ CIFSSMBNegotiate(const unsigned int xid, struct cifs_ses *ses) | |||
569 | server->capabilities = le32_to_cpu(pSMBr->Capabilities); | 625 | server->capabilities = le32_to_cpu(pSMBr->Capabilities); |
570 | server->timeAdj = (int)(__s16)le16_to_cpu(pSMBr->ServerTimeZone); | 626 | server->timeAdj = (int)(__s16)le16_to_cpu(pSMBr->ServerTimeZone); |
571 | server->timeAdj *= 60; | 627 | server->timeAdj *= 60; |
628 | |||
572 | if (pSMBr->EncryptionKeyLength == CIFS_CRYPTO_KEY_SIZE) { | 629 | if (pSMBr->EncryptionKeyLength == CIFS_CRYPTO_KEY_SIZE) { |
630 | server->negflavor = CIFS_NEGFLAVOR_UNENCAP; | ||
573 | memcpy(ses->server->cryptkey, pSMBr->u.EncryptionKey, | 631 | memcpy(ses->server->cryptkey, pSMBr->u.EncryptionKey, |
574 | CIFS_CRYPTO_KEY_SIZE); | 632 | CIFS_CRYPTO_KEY_SIZE); |
575 | } else if ((pSMBr->hdr.Flags2 & SMBFLG2_EXT_SEC || | 633 | } else if ((pSMBr->hdr.Flags2 & SMBFLG2_EXT_SEC || |
576 | server->capabilities & CAP_EXTENDED_SECURITY) && | 634 | server->capabilities & CAP_EXTENDED_SECURITY) && |
577 | (pSMBr->EncryptionKeyLength == 0)) { | 635 | (pSMBr->EncryptionKeyLength == 0)) { |
578 | /* decode security blob */ | 636 | server->negflavor = CIFS_NEGFLAVOR_EXTENDED; |
579 | count = get_bcc(&pSMBr->hdr); | 637 | rc = decode_ext_sec_blob(ses, pSMBr); |
580 | if (count < 16) { | ||
581 | rc = -EIO; | ||
582 | goto neg_err_exit; | ||
583 | } | ||
584 | spin_lock(&cifs_tcp_ses_lock); | ||
585 | if (server->srv_count > 1) { | ||
586 | spin_unlock(&cifs_tcp_ses_lock); | ||
587 | if (memcmp(server->server_GUID, | ||
588 | pSMBr->u.extended_response. | ||
589 | GUID, 16) != 0) { | ||
590 | cifs_dbg(FYI, "server UID changed\n"); | ||
591 | memcpy(server->server_GUID, | ||
592 | pSMBr->u.extended_response.GUID, | ||
593 | 16); | ||
594 | } | ||
595 | } else { | ||
596 | spin_unlock(&cifs_tcp_ses_lock); | ||
597 | memcpy(server->server_GUID, | ||
598 | pSMBr->u.extended_response.GUID, 16); | ||
599 | } | ||
600 | |||
601 | if (count == 16) { | ||
602 | server->secType = RawNTLMSSP; | ||
603 | } else { | ||
604 | rc = decode_negTokenInit(pSMBr->u.extended_response. | ||
605 | SecurityBlob, count - 16, | ||
606 | server); | ||
607 | if (rc == 1) | ||
608 | rc = 0; | ||
609 | else | ||
610 | rc = -EINVAL; | ||
611 | if (server->secType == Kerberos) { | ||
612 | if (!server->sec_kerberos && | ||
613 | !server->sec_mskerberos) | ||
614 | rc = -EOPNOTSUPP; | ||
615 | } else if (server->secType == RawNTLMSSP) { | ||
616 | if (!server->sec_ntlmssp) | ||
617 | rc = -EOPNOTSUPP; | ||
618 | } else | ||
619 | rc = -EOPNOTSUPP; | ||
620 | } | ||
621 | } else if (server->sec_mode & SECMODE_PW_ENCRYPT) { | 638 | } else if (server->sec_mode & SECMODE_PW_ENCRYPT) { |
622 | rc = -EIO; /* no crypt key only if plain text pwd */ | 639 | rc = -EIO; /* no crypt key only if plain text pwd */ |
623 | goto neg_err_exit; | ||
624 | } else | ||
625 | server->capabilities &= ~CAP_EXTENDED_SECURITY; | ||
626 | |||
627 | #ifdef CONFIG_CIFS_WEAK_PW_HASH | ||
628 | signing_check: | ||
629 | #endif | ||
630 | if ((secFlags & CIFSSEC_MAY_SIGN) == 0) { | ||
631 | /* MUST_SIGN already includes the MAY_SIGN FLAG | ||
632 | so if this is zero it means that signing is disabled */ | ||
633 | cifs_dbg(FYI, "Signing disabled\n"); | ||
634 | if (server->sec_mode & SECMODE_SIGN_REQUIRED) { | ||
635 | cifs_dbg(VFS, "Server requires packet signing to be enabled in /proc/fs/cifs/SecurityFlags\n"); | ||
636 | rc = -EOPNOTSUPP; | ||
637 | } | ||
638 | server->sec_mode &= | ||
639 | ~(SECMODE_SIGN_ENABLED | SECMODE_SIGN_REQUIRED); | ||
640 | } else if ((secFlags & CIFSSEC_MUST_SIGN) == CIFSSEC_MUST_SIGN) { | ||
641 | /* signing required */ | ||
642 | cifs_dbg(FYI, "Must sign - secFlags 0x%x\n", secFlags); | ||
643 | if ((server->sec_mode & | ||
644 | (SECMODE_SIGN_ENABLED | SECMODE_SIGN_REQUIRED)) == 0) { | ||
645 | cifs_dbg(VFS, "signing required but server lacks support\n"); | ||
646 | rc = -EOPNOTSUPP; | ||
647 | } else | ||
648 | server->sec_mode |= SECMODE_SIGN_REQUIRED; | ||
649 | } else { | 640 | } else { |
650 | /* signing optional ie CIFSSEC_MAY_SIGN */ | 641 | server->negflavor = CIFS_NEGFLAVOR_UNENCAP; |
651 | if ((server->sec_mode & SECMODE_SIGN_REQUIRED) == 0) | 642 | server->capabilities &= ~CAP_EXTENDED_SECURITY; |
652 | server->sec_mode &= | ||
653 | ~(SECMODE_SIGN_ENABLED | SECMODE_SIGN_REQUIRED); | ||
654 | } | 643 | } |
655 | 644 | ||
645 | signing_check: | ||
646 | if (!rc) | ||
647 | rc = cifs_enable_signing(server, ses->sign); | ||
656 | neg_err_exit: | 648 | neg_err_exit: |
657 | cifs_buf_release(pSMB); | 649 | cifs_buf_release(pSMB); |
658 | 650 | ||
@@ -777,9 +769,8 @@ CIFSSMBLogoff(const unsigned int xid, struct cifs_ses *ses) | |||
777 | 769 | ||
778 | pSMB->hdr.Mid = get_next_mid(ses->server); | 770 | pSMB->hdr.Mid = get_next_mid(ses->server); |
779 | 771 | ||
780 | if (ses->server->sec_mode & | 772 | if (ses->server->sign) |
781 | (SECMODE_SIGN_REQUIRED | SECMODE_SIGN_ENABLED)) | 773 | pSMB->hdr.Flags2 |= SMBFLG2_SECURITY_SIGNATURE; |
782 | pSMB->hdr.Flags2 |= SMBFLG2_SECURITY_SIGNATURE; | ||
783 | 774 | ||
784 | pSMB->hdr.Uid = ses->Suid; | 775 | pSMB->hdr.Uid = ses->Suid; |
785 | 776 | ||
@@ -1540,8 +1531,7 @@ cifs_readv_callback(struct mid_q_entry *mid) | |||
1540 | switch (mid->mid_state) { | 1531 | switch (mid->mid_state) { |
1541 | case MID_RESPONSE_RECEIVED: | 1532 | case MID_RESPONSE_RECEIVED: |
1542 | /* result already set, check signature */ | 1533 | /* result already set, check signature */ |
1543 | if (server->sec_mode & | 1534 | if (server->sign) { |
1544 | (SECMODE_SIGN_REQUIRED | SECMODE_SIGN_ENABLED)) { | ||
1545 | int rc = 0; | 1535 | int rc = 0; |
1546 | 1536 | ||
1547 | rc = cifs_verify_signature(&rqst, server, | 1537 | rc = cifs_verify_signature(&rqst, server, |
@@ -3940,6 +3930,7 @@ QFileInfoRetry: | |||
3940 | pSMB->Pad = 0; | 3930 | pSMB->Pad = 0; |
3941 | pSMB->Fid = netfid; | 3931 | pSMB->Fid = netfid; |
3942 | inc_rfc1001_len(pSMB, byte_count); | 3932 | inc_rfc1001_len(pSMB, byte_count); |
3933 | pSMB->t2.ByteCount = cpu_to_le16(byte_count); | ||
3943 | 3934 | ||
3944 | rc = SendReceive(xid, tcon->ses, (struct smb_hdr *) pSMB, | 3935 | rc = SendReceive(xid, tcon->ses, (struct smb_hdr *) pSMB, |
3945 | (struct smb_hdr *) pSMBr, &bytes_returned, 0); | 3936 | (struct smb_hdr *) pSMBr, &bytes_returned, 0); |
@@ -4108,6 +4099,7 @@ UnixQFileInfoRetry: | |||
4108 | pSMB->Pad = 0; | 4099 | pSMB->Pad = 0; |
4109 | pSMB->Fid = netfid; | 4100 | pSMB->Fid = netfid; |
4110 | inc_rfc1001_len(pSMB, byte_count); | 4101 | inc_rfc1001_len(pSMB, byte_count); |
4102 | pSMB->t2.ByteCount = cpu_to_le16(byte_count); | ||
4111 | 4103 | ||
4112 | rc = SendReceive(xid, tcon->ses, (struct smb_hdr *) pSMB, | 4104 | rc = SendReceive(xid, tcon->ses, (struct smb_hdr *) pSMB, |
4113 | (struct smb_hdr *) pSMBr, &bytes_returned, 0); | 4105 | (struct smb_hdr *) pSMBr, &bytes_returned, 0); |
@@ -4794,11 +4786,8 @@ getDFSRetry: | |||
4794 | strncpy(pSMB->RequestFileName, search_name, name_len); | 4786 | strncpy(pSMB->RequestFileName, search_name, name_len); |
4795 | } | 4787 | } |
4796 | 4788 | ||
4797 | if (ses->server) { | 4789 | if (ses->server && ses->server->sign) |
4798 | if (ses->server->sec_mode & | 4790 | pSMB->hdr.Flags2 |= SMBFLG2_SECURITY_SIGNATURE; |
4799 | (SECMODE_SIGN_REQUIRED | SECMODE_SIGN_ENABLED)) | ||
4800 | pSMB->hdr.Flags2 |= SMBFLG2_SECURITY_SIGNATURE; | ||
4801 | } | ||
4802 | 4791 | ||
4803 | pSMB->hdr.Uid = ses->Suid; | 4792 | pSMB->hdr.Uid = ses->Suid; |
4804 | 4793 | ||
diff --git a/fs/cifs/connect.c b/fs/cifs/connect.c index e3bc39bb9d12..d67c550c4980 100644 --- a/fs/cifs/connect.c +++ b/fs/cifs/connect.c | |||
@@ -85,7 +85,7 @@ enum { | |||
85 | Opt_acl, Opt_noacl, Opt_locallease, | 85 | Opt_acl, Opt_noacl, Opt_locallease, |
86 | Opt_sign, Opt_seal, Opt_noac, | 86 | Opt_sign, Opt_seal, Opt_noac, |
87 | Opt_fsc, Opt_mfsymlinks, | 87 | Opt_fsc, Opt_mfsymlinks, |
88 | Opt_multiuser, Opt_sloppy, | 88 | Opt_multiuser, Opt_sloppy, Opt_nosharesock, |
89 | 89 | ||
90 | /* Mount options which take numeric value */ | 90 | /* Mount options which take numeric value */ |
91 | Opt_backupuid, Opt_backupgid, Opt_uid, | 91 | Opt_backupuid, Opt_backupgid, Opt_uid, |
@@ -165,6 +165,7 @@ static const match_table_t cifs_mount_option_tokens = { | |||
165 | { Opt_mfsymlinks, "mfsymlinks" }, | 165 | { Opt_mfsymlinks, "mfsymlinks" }, |
166 | { Opt_multiuser, "multiuser" }, | 166 | { Opt_multiuser, "multiuser" }, |
167 | { Opt_sloppy, "sloppy" }, | 167 | { Opt_sloppy, "sloppy" }, |
168 | { Opt_nosharesock, "nosharesock" }, | ||
168 | 169 | ||
169 | { Opt_backupuid, "backupuid=%s" }, | 170 | { Opt_backupuid, "backupuid=%s" }, |
170 | { Opt_backupgid, "backupgid=%s" }, | 171 | { Opt_backupgid, "backupgid=%s" }, |
@@ -275,6 +276,7 @@ static const match_table_t cifs_smb_version_tokens = { | |||
275 | { Smb_20, SMB20_VERSION_STRING}, | 276 | { Smb_20, SMB20_VERSION_STRING}, |
276 | { Smb_21, SMB21_VERSION_STRING }, | 277 | { Smb_21, SMB21_VERSION_STRING }, |
277 | { Smb_30, SMB30_VERSION_STRING }, | 278 | { Smb_30, SMB30_VERSION_STRING }, |
279 | { Smb_302, SMB302_VERSION_STRING }, | ||
278 | }; | 280 | }; |
279 | 281 | ||
280 | static int ip_connect(struct TCP_Server_Info *server); | 282 | static int ip_connect(struct TCP_Server_Info *server); |
@@ -1024,44 +1026,48 @@ static int cifs_parse_security_flavors(char *value, | |||
1024 | 1026 | ||
1025 | substring_t args[MAX_OPT_ARGS]; | 1027 | substring_t args[MAX_OPT_ARGS]; |
1026 | 1028 | ||
1029 | /* | ||
1030 | * With mount options, the last one should win. Reset any existing | ||
1031 | * settings back to default. | ||
1032 | */ | ||
1033 | vol->sectype = Unspecified; | ||
1034 | vol->sign = false; | ||
1035 | |||
1027 | switch (match_token(value, cifs_secflavor_tokens, args)) { | 1036 | switch (match_token(value, cifs_secflavor_tokens, args)) { |
1028 | case Opt_sec_krb5: | ||
1029 | vol->secFlg |= CIFSSEC_MAY_KRB5 | CIFSSEC_MAY_SIGN; | ||
1030 | break; | ||
1031 | case Opt_sec_krb5i: | ||
1032 | vol->secFlg |= CIFSSEC_MAY_KRB5 | CIFSSEC_MUST_SIGN; | ||
1033 | break; | ||
1034 | case Opt_sec_krb5p: | 1037 | case Opt_sec_krb5p: |
1035 | /* vol->secFlg |= CIFSSEC_MUST_SEAL | CIFSSEC_MAY_KRB5; */ | 1038 | cifs_dbg(VFS, "sec=krb5p is not supported!\n"); |
1036 | cifs_dbg(VFS, "Krb5 cifs privacy not supported\n"); | 1039 | return 1; |
1037 | break; | 1040 | case Opt_sec_krb5i: |
1038 | case Opt_sec_ntlmssp: | 1041 | vol->sign = true; |
1039 | vol->secFlg |= CIFSSEC_MAY_NTLMSSP; | 1042 | /* Fallthrough */ |
1043 | case Opt_sec_krb5: | ||
1044 | vol->sectype = Kerberos; | ||
1040 | break; | 1045 | break; |
1041 | case Opt_sec_ntlmsspi: | 1046 | case Opt_sec_ntlmsspi: |
1042 | vol->secFlg |= CIFSSEC_MAY_NTLMSSP | CIFSSEC_MUST_SIGN; | 1047 | vol->sign = true; |
1043 | break; | 1048 | /* Fallthrough */ |
1044 | case Opt_ntlm: | 1049 | case Opt_sec_ntlmssp: |
1045 | /* ntlm is default so can be turned off too */ | 1050 | vol->sectype = RawNTLMSSP; |
1046 | vol->secFlg |= CIFSSEC_MAY_NTLM; | ||
1047 | break; | 1051 | break; |
1048 | case Opt_sec_ntlmi: | 1052 | case Opt_sec_ntlmi: |
1049 | vol->secFlg |= CIFSSEC_MAY_NTLM | CIFSSEC_MUST_SIGN; | 1053 | vol->sign = true; |
1050 | break; | 1054 | /* Fallthrough */ |
1051 | case Opt_sec_ntlmv2: | 1055 | case Opt_ntlm: |
1052 | vol->secFlg |= CIFSSEC_MAY_NTLMV2; | 1056 | vol->sectype = NTLM; |
1053 | break; | 1057 | break; |
1054 | case Opt_sec_ntlmv2i: | 1058 | case Opt_sec_ntlmv2i: |
1055 | vol->secFlg |= CIFSSEC_MAY_NTLMV2 | CIFSSEC_MUST_SIGN; | 1059 | vol->sign = true; |
1060 | /* Fallthrough */ | ||
1061 | case Opt_sec_ntlmv2: | ||
1062 | vol->sectype = NTLMv2; | ||
1056 | break; | 1063 | break; |
1057 | #ifdef CONFIG_CIFS_WEAK_PW_HASH | 1064 | #ifdef CONFIG_CIFS_WEAK_PW_HASH |
1058 | case Opt_sec_lanman: | 1065 | case Opt_sec_lanman: |
1059 | vol->secFlg |= CIFSSEC_MAY_LANMAN; | 1066 | vol->sectype = LANMAN; |
1060 | break; | 1067 | break; |
1061 | #endif | 1068 | #endif |
1062 | case Opt_sec_none: | 1069 | case Opt_sec_none: |
1063 | vol->nullauth = 1; | 1070 | vol->nullauth = 1; |
1064 | vol->secFlg |= CIFSSEC_MAY_NTLM; | ||
1065 | break; | 1071 | break; |
1066 | default: | 1072 | default: |
1067 | cifs_dbg(VFS, "bad security option: %s\n", value); | 1073 | cifs_dbg(VFS, "bad security option: %s\n", value); |
@@ -1119,6 +1125,10 @@ cifs_parse_smb_version(char *value, struct smb_vol *vol) | |||
1119 | vol->ops = &smb30_operations; | 1125 | vol->ops = &smb30_operations; |
1120 | vol->vals = &smb30_values; | 1126 | vol->vals = &smb30_values; |
1121 | break; | 1127 | break; |
1128 | case Smb_302: | ||
1129 | vol->ops = &smb30_operations; /* currently identical with 3.0 */ | ||
1130 | vol->vals = &smb302_values; | ||
1131 | break; | ||
1122 | #endif | 1132 | #endif |
1123 | default: | 1133 | default: |
1124 | cifs_dbg(VFS, "Unknown vers= option specified: %s\n", value); | 1134 | cifs_dbg(VFS, "Unknown vers= option specified: %s\n", value); |
@@ -1424,7 +1434,7 @@ cifs_parse_mount_options(const char *mountdata, const char *devname, | |||
1424 | vol->local_lease = 1; | 1434 | vol->local_lease = 1; |
1425 | break; | 1435 | break; |
1426 | case Opt_sign: | 1436 | case Opt_sign: |
1427 | vol->secFlg |= CIFSSEC_MUST_SIGN; | 1437 | vol->sign = true; |
1428 | break; | 1438 | break; |
1429 | case Opt_seal: | 1439 | case Opt_seal: |
1430 | /* we do not do the following in secFlags because seal | 1440 | /* we do not do the following in secFlags because seal |
@@ -1455,6 +1465,9 @@ cifs_parse_mount_options(const char *mountdata, const char *devname, | |||
1455 | case Opt_sloppy: | 1465 | case Opt_sloppy: |
1456 | sloppy = true; | 1466 | sloppy = true; |
1457 | break; | 1467 | break; |
1468 | case Opt_nosharesock: | ||
1469 | vol->nosharesock = true; | ||
1470 | break; | ||
1458 | 1471 | ||
1459 | /* Numeric Values */ | 1472 | /* Numeric Values */ |
1460 | case Opt_backupuid: | 1473 | case Opt_backupuid: |
@@ -1662,7 +1675,8 @@ cifs_parse_mount_options(const char *mountdata, const char *devname, | |||
1662 | if (string == NULL) | 1675 | if (string == NULL) |
1663 | goto out_nomem; | 1676 | goto out_nomem; |
1664 | 1677 | ||
1665 | if (strnlen(string, 256) == 256) { | 1678 | if (strnlen(string, CIFS_MAX_DOMAINNAME_LEN) |
1679 | == CIFS_MAX_DOMAINNAME_LEN) { | ||
1666 | printk(KERN_WARNING "CIFS: domain name too" | 1680 | printk(KERN_WARNING "CIFS: domain name too" |
1667 | " long\n"); | 1681 | " long\n"); |
1668 | goto cifs_parse_mount_err; | 1682 | goto cifs_parse_mount_err; |
@@ -1978,47 +1992,21 @@ match_address(struct TCP_Server_Info *server, struct sockaddr *addr, | |||
1978 | static bool | 1992 | static bool |
1979 | match_security(struct TCP_Server_Info *server, struct smb_vol *vol) | 1993 | match_security(struct TCP_Server_Info *server, struct smb_vol *vol) |
1980 | { | 1994 | { |
1981 | unsigned int secFlags; | 1995 | /* |
1982 | 1996 | * The select_sectype function should either return the vol->sectype | |
1983 | if (vol->secFlg & (~(CIFSSEC_MUST_SIGN | CIFSSEC_MUST_SEAL))) | 1997 | * that was specified, or "Unspecified" if that sectype was not |
1984 | secFlags = vol->secFlg; | 1998 | * compatible with the given NEGOTIATE request. |
1985 | else | 1999 | */ |
1986 | secFlags = global_secflags | vol->secFlg; | 2000 | if (select_sectype(server, vol->sectype) == Unspecified) |
1987 | |||
1988 | switch (server->secType) { | ||
1989 | case LANMAN: | ||
1990 | if (!(secFlags & (CIFSSEC_MAY_LANMAN|CIFSSEC_MAY_PLNTXT))) | ||
1991 | return false; | ||
1992 | break; | ||
1993 | case NTLMv2: | ||
1994 | if (!(secFlags & CIFSSEC_MAY_NTLMV2)) | ||
1995 | return false; | ||
1996 | break; | ||
1997 | case NTLM: | ||
1998 | if (!(secFlags & CIFSSEC_MAY_NTLM)) | ||
1999 | return false; | ||
2000 | break; | ||
2001 | case Kerberos: | ||
2002 | if (!(secFlags & CIFSSEC_MAY_KRB5)) | ||
2003 | return false; | ||
2004 | break; | ||
2005 | case RawNTLMSSP: | ||
2006 | if (!(secFlags & CIFSSEC_MAY_NTLMSSP)) | ||
2007 | return false; | ||
2008 | break; | ||
2009 | default: | ||
2010 | /* shouldn't happen */ | ||
2011 | return false; | 2001 | return false; |
2012 | } | ||
2013 | 2002 | ||
2014 | /* now check if signing mode is acceptable */ | 2003 | /* |
2015 | if ((secFlags & CIFSSEC_MAY_SIGN) == 0 && | 2004 | * Now check if signing mode is acceptable. No need to check |
2016 | (server->sec_mode & SECMODE_SIGN_REQUIRED)) | 2005 | * global_secflags at this point since if MUST_SIGN is set then |
2017 | return false; | 2006 | * the server->sign had better be too. |
2018 | else if (((secFlags & CIFSSEC_MUST_SIGN) == CIFSSEC_MUST_SIGN) && | 2007 | */ |
2019 | (server->sec_mode & | 2008 | if (vol->sign && !server->sign) |
2020 | (SECMODE_SIGN_ENABLED|SECMODE_SIGN_REQUIRED)) == 0) | 2009 | return false; |
2021 | return false; | ||
2022 | 2010 | ||
2023 | return true; | 2011 | return true; |
2024 | } | 2012 | } |
@@ -2027,6 +2015,9 @@ static int match_server(struct TCP_Server_Info *server, struct smb_vol *vol) | |||
2027 | { | 2015 | { |
2028 | struct sockaddr *addr = (struct sockaddr *)&vol->dstaddr; | 2016 | struct sockaddr *addr = (struct sockaddr *)&vol->dstaddr; |
2029 | 2017 | ||
2018 | if (vol->nosharesock) | ||
2019 | return 0; | ||
2020 | |||
2030 | if ((server->vals != vol->vals) || (server->ops != vol->ops)) | 2021 | if ((server->vals != vol->vals) || (server->ops != vol->ops)) |
2031 | return 0; | 2022 | return 0; |
2032 | 2023 | ||
@@ -2118,12 +2109,6 @@ cifs_get_tcp_session(struct smb_vol *volume_info) | |||
2118 | goto out_err; | 2109 | goto out_err; |
2119 | } | 2110 | } |
2120 | 2111 | ||
2121 | rc = cifs_crypto_shash_allocate(tcp_ses); | ||
2122 | if (rc) { | ||
2123 | cifs_dbg(VFS, "could not setup hash structures rc %d\n", rc); | ||
2124 | goto out_err; | ||
2125 | } | ||
2126 | |||
2127 | tcp_ses->ops = volume_info->ops; | 2112 | tcp_ses->ops = volume_info->ops; |
2128 | tcp_ses->vals = volume_info->vals; | 2113 | tcp_ses->vals = volume_info->vals; |
2129 | cifs_set_net_ns(tcp_ses, get_net(current->nsproxy->net_ns)); | 2114 | cifs_set_net_ns(tcp_ses, get_net(current->nsproxy->net_ns)); |
@@ -2216,7 +2201,11 @@ out_err: | |||
2216 | 2201 | ||
2217 | static int match_session(struct cifs_ses *ses, struct smb_vol *vol) | 2202 | static int match_session(struct cifs_ses *ses, struct smb_vol *vol) |
2218 | { | 2203 | { |
2219 | switch (ses->server->secType) { | 2204 | if (vol->sectype != Unspecified && |
2205 | vol->sectype != ses->sectype) | ||
2206 | return 0; | ||
2207 | |||
2208 | switch (ses->sectype) { | ||
2220 | case Kerberos: | 2209 | case Kerberos: |
2221 | if (!uid_eq(vol->cred_uid, ses->cred_uid)) | 2210 | if (!uid_eq(vol->cred_uid, ses->cred_uid)) |
2222 | return 0; | 2211 | return 0; |
@@ -2288,8 +2277,8 @@ cifs_put_smb_ses(struct cifs_ses *ses) | |||
2288 | 2277 | ||
2289 | #ifdef CONFIG_KEYS | 2278 | #ifdef CONFIG_KEYS |
2290 | 2279 | ||
2291 | /* strlen("cifs:a:") + INET6_ADDRSTRLEN + 1 */ | 2280 | /* strlen("cifs:a:") + CIFS_MAX_DOMAINNAME_LEN + 1 */ |
2292 | #define CIFSCREDS_DESC_SIZE (7 + INET6_ADDRSTRLEN + 1) | 2281 | #define CIFSCREDS_DESC_SIZE (7 + CIFS_MAX_DOMAINNAME_LEN + 1) |
2293 | 2282 | ||
2294 | /* Populate username and pw fields from keyring if possible */ | 2283 | /* Populate username and pw fields from keyring if possible */ |
2295 | static int | 2284 | static int |
@@ -2493,7 +2482,8 @@ cifs_get_smb_ses(struct TCP_Server_Info *server, struct smb_vol *volume_info) | |||
2493 | ses->cred_uid = volume_info->cred_uid; | 2482 | ses->cred_uid = volume_info->cred_uid; |
2494 | ses->linux_uid = volume_info->linux_uid; | 2483 | ses->linux_uid = volume_info->linux_uid; |
2495 | 2484 | ||
2496 | ses->overrideSecFlg = volume_info->secFlg; | 2485 | ses->sectype = volume_info->sectype; |
2486 | ses->sign = volume_info->sign; | ||
2497 | 2487 | ||
2498 | mutex_lock(&ses->session_mutex); | 2488 | mutex_lock(&ses->session_mutex); |
2499 | rc = cifs_negotiate_protocol(xid, ses); | 2489 | rc = cifs_negotiate_protocol(xid, ses); |
@@ -3656,7 +3646,7 @@ CIFSTCon(const unsigned int xid, struct cifs_ses *ses, | |||
3656 | NTLMv2 password here) */ | 3646 | NTLMv2 password here) */ |
3657 | #ifdef CONFIG_CIFS_WEAK_PW_HASH | 3647 | #ifdef CONFIG_CIFS_WEAK_PW_HASH |
3658 | if ((global_secflags & CIFSSEC_MAY_LANMAN) && | 3648 | if ((global_secflags & CIFSSEC_MAY_LANMAN) && |
3659 | (ses->server->secType == LANMAN)) | 3649 | (ses->sectype == LANMAN)) |
3660 | calc_lanman_hash(tcon->password, ses->server->cryptkey, | 3650 | calc_lanman_hash(tcon->password, ses->server->cryptkey, |
3661 | ses->server->sec_mode & | 3651 | ses->server->sec_mode & |
3662 | SECMODE_PW_ENCRYPT ? true : false, | 3652 | SECMODE_PW_ENCRYPT ? true : false, |
@@ -3674,8 +3664,7 @@ CIFSTCon(const unsigned int xid, struct cifs_ses *ses, | |||
3674 | } | 3664 | } |
3675 | } | 3665 | } |
3676 | 3666 | ||
3677 | if (ses->server->sec_mode & | 3667 | if (ses->server->sign) |
3678 | (SECMODE_SIGN_REQUIRED | SECMODE_SIGN_ENABLED)) | ||
3679 | smb_buffer->Flags2 |= SMBFLG2_SECURITY_SIGNATURE; | 3668 | smb_buffer->Flags2 |= SMBFLG2_SECURITY_SIGNATURE; |
3680 | 3669 | ||
3681 | if (ses->capabilities & CAP_STATUS32) { | 3670 | if (ses->capabilities & CAP_STATUS32) { |
@@ -3738,7 +3727,7 @@ CIFSTCon(const unsigned int xid, struct cifs_ses *ses, | |||
3738 | } | 3727 | } |
3739 | bcc_ptr += length + 1; | 3728 | bcc_ptr += length + 1; |
3740 | bytes_left -= (length + 1); | 3729 | bytes_left -= (length + 1); |
3741 | strncpy(tcon->treeName, tree, MAX_TREE_SIZE); | 3730 | strlcpy(tcon->treeName, tree, sizeof(tcon->treeName)); |
3742 | 3731 | ||
3743 | /* mostly informational -- no need to fail on error here */ | 3732 | /* mostly informational -- no need to fail on error here */ |
3744 | kfree(tcon->nativeFileSystem); | 3733 | kfree(tcon->nativeFileSystem); |
@@ -3827,7 +3816,6 @@ cifs_setup_session(const unsigned int xid, struct cifs_ses *ses, | |||
3827 | int rc = -ENOSYS; | 3816 | int rc = -ENOSYS; |
3828 | struct TCP_Server_Info *server = ses->server; | 3817 | struct TCP_Server_Info *server = ses->server; |
3829 | 3818 | ||
3830 | ses->flags = 0; | ||
3831 | ses->capabilities = server->capabilities; | 3819 | ses->capabilities = server->capabilities; |
3832 | if (linuxExtEnabled == 0) | 3820 | if (linuxExtEnabled == 0) |
3833 | ses->capabilities &= (~server->vals->cap_unix); | 3821 | ses->capabilities &= (~server->vals->cap_unix); |
@@ -3848,6 +3836,8 @@ cifs_setup_session(const unsigned int xid, struct cifs_ses *ses, | |||
3848 | server->sequence_number = 0x2; | 3836 | server->sequence_number = 0x2; |
3849 | server->session_estab = true; | 3837 | server->session_estab = true; |
3850 | ses->auth_key.response = NULL; | 3838 | ses->auth_key.response = NULL; |
3839 | if (server->ops->generate_signingkey) | ||
3840 | server->ops->generate_signingkey(server); | ||
3851 | } | 3841 | } |
3852 | mutex_unlock(&server->srv_mutex); | 3842 | mutex_unlock(&server->srv_mutex); |
3853 | 3843 | ||
@@ -3870,23 +3860,11 @@ cifs_setup_session(const unsigned int xid, struct cifs_ses *ses, | |||
3870 | static int | 3860 | static int |
3871 | cifs_set_vol_auth(struct smb_vol *vol, struct cifs_ses *ses) | 3861 | cifs_set_vol_auth(struct smb_vol *vol, struct cifs_ses *ses) |
3872 | { | 3862 | { |
3873 | switch (ses->server->secType) { | 3863 | vol->sectype = ses->sectype; |
3874 | case Kerberos: | 3864 | |
3875 | vol->secFlg = CIFSSEC_MUST_KRB5; | 3865 | /* krb5 is special, since we don't need username or pw */ |
3866 | if (vol->sectype == Kerberos) | ||
3876 | return 0; | 3867 | return 0; |
3877 | case NTLMv2: | ||
3878 | vol->secFlg = CIFSSEC_MUST_NTLMV2; | ||
3879 | break; | ||
3880 | case NTLM: | ||
3881 | vol->secFlg = CIFSSEC_MUST_NTLM; | ||
3882 | break; | ||
3883 | case RawNTLMSSP: | ||
3884 | vol->secFlg = CIFSSEC_MUST_NTLMSSP; | ||
3885 | break; | ||
3886 | case LANMAN: | ||
3887 | vol->secFlg = CIFSSEC_MUST_LANMAN; | ||
3888 | break; | ||
3889 | } | ||
3890 | 3868 | ||
3891 | return cifs_set_cifscreds(vol, ses); | 3869 | return cifs_set_cifscreds(vol, ses); |
3892 | } | 3870 | } |
@@ -3912,6 +3890,8 @@ cifs_construct_tcon(struct cifs_sb_info *cifs_sb, kuid_t fsuid) | |||
3912 | vol_info->nocase = master_tcon->nocase; | 3890 | vol_info->nocase = master_tcon->nocase; |
3913 | vol_info->local_lease = master_tcon->local_lease; | 3891 | vol_info->local_lease = master_tcon->local_lease; |
3914 | vol_info->no_linux_ext = !master_tcon->unix_ext; | 3892 | vol_info->no_linux_ext = !master_tcon->unix_ext; |
3893 | vol_info->sectype = master_tcon->ses->sectype; | ||
3894 | vol_info->sign = master_tcon->ses->sign; | ||
3915 | 3895 | ||
3916 | rc = cifs_set_vol_auth(vol_info, master_tcon->ses); | 3896 | rc = cifs_set_vol_auth(vol_info, master_tcon->ses); |
3917 | if (rc) { | 3897 | if (rc) { |
diff --git a/fs/cifs/dir.c b/fs/cifs/dir.c index 5699b5036ed8..d62ce0d48141 100644 --- a/fs/cifs/dir.c +++ b/fs/cifs/dir.c | |||
@@ -204,6 +204,7 @@ cifs_do_create(struct inode *inode, struct dentry *direntry, unsigned int xid, | |||
204 | struct inode *newinode = NULL; | 204 | struct inode *newinode = NULL; |
205 | int disposition; | 205 | int disposition; |
206 | struct TCP_Server_Info *server = tcon->ses->server; | 206 | struct TCP_Server_Info *server = tcon->ses->server; |
207 | struct cifs_open_parms oparms; | ||
207 | 208 | ||
208 | *oplock = 0; | 209 | *oplock = 0; |
209 | if (tcon->ses->server->oplocks) | 210 | if (tcon->ses->server->oplocks) |
@@ -319,9 +320,16 @@ cifs_do_create(struct inode *inode, struct dentry *direntry, unsigned int xid, | |||
319 | if (backup_cred(cifs_sb)) | 320 | if (backup_cred(cifs_sb)) |
320 | create_options |= CREATE_OPEN_BACKUP_INTENT; | 321 | create_options |= CREATE_OPEN_BACKUP_INTENT; |
321 | 322 | ||
322 | rc = server->ops->open(xid, tcon, full_path, disposition, | 323 | oparms.tcon = tcon; |
323 | desired_access, create_options, fid, oplock, | 324 | oparms.cifs_sb = cifs_sb; |
324 | buf, cifs_sb); | 325 | oparms.desired_access = desired_access; |
326 | oparms.create_options = create_options; | ||
327 | oparms.disposition = disposition; | ||
328 | oparms.path = full_path; | ||
329 | oparms.fid = fid; | ||
330 | oparms.reconnect = false; | ||
331 | |||
332 | rc = server->ops->open(xid, &oparms, oplock, buf); | ||
325 | if (rc) { | 333 | if (rc) { |
326 | cifs_dbg(FYI, "cifs_create returned 0x%x\n", rc); | 334 | cifs_dbg(FYI, "cifs_create returned 0x%x\n", rc); |
327 | goto out; | 335 | goto out; |
@@ -822,8 +830,7 @@ const struct dentry_operations cifs_dentry_ops = { | |||
822 | /* d_delete: cifs_d_delete, */ /* not needed except for debugging */ | 830 | /* d_delete: cifs_d_delete, */ /* not needed except for debugging */ |
823 | }; | 831 | }; |
824 | 832 | ||
825 | static int cifs_ci_hash(const struct dentry *dentry, const struct inode *inode, | 833 | static int cifs_ci_hash(const struct dentry *dentry, struct qstr *q) |
826 | struct qstr *q) | ||
827 | { | 834 | { |
828 | struct nls_table *codepage = CIFS_SB(dentry->d_sb)->local_nls; | 835 | struct nls_table *codepage = CIFS_SB(dentry->d_sb)->local_nls; |
829 | unsigned long hash; | 836 | unsigned long hash; |
@@ -838,12 +845,10 @@ static int cifs_ci_hash(const struct dentry *dentry, const struct inode *inode, | |||
838 | return 0; | 845 | return 0; |
839 | } | 846 | } |
840 | 847 | ||
841 | static int cifs_ci_compare(const struct dentry *parent, | 848 | static int cifs_ci_compare(const struct dentry *parent, const struct dentry *dentry, |
842 | const struct inode *pinode, | ||
843 | const struct dentry *dentry, const struct inode *inode, | ||
844 | unsigned int len, const char *str, const struct qstr *name) | 849 | unsigned int len, const char *str, const struct qstr *name) |
845 | { | 850 | { |
846 | struct nls_table *codepage = CIFS_SB(pinode->i_sb)->local_nls; | 851 | struct nls_table *codepage = CIFS_SB(parent->d_sb)->local_nls; |
847 | 852 | ||
848 | if ((name->len == len) && | 853 | if ((name->len == len) && |
849 | (nls_strnicmp(codepage, name->name, str, len) == 0)) | 854 | (nls_strnicmp(codepage, name->name, str, len) == 0)) |
diff --git a/fs/cifs/file.c b/fs/cifs/file.c index 48b29d24c9f4..7e36ae34e947 100644 --- a/fs/cifs/file.c +++ b/fs/cifs/file.c | |||
@@ -183,6 +183,7 @@ cifs_nt_open(char *full_path, struct inode *inode, struct cifs_sb_info *cifs_sb, | |||
183 | int create_options = CREATE_NOT_DIR; | 183 | int create_options = CREATE_NOT_DIR; |
184 | FILE_ALL_INFO *buf; | 184 | FILE_ALL_INFO *buf; |
185 | struct TCP_Server_Info *server = tcon->ses->server; | 185 | struct TCP_Server_Info *server = tcon->ses->server; |
186 | struct cifs_open_parms oparms; | ||
186 | 187 | ||
187 | if (!server->ops->open) | 188 | if (!server->ops->open) |
188 | return -ENOSYS; | 189 | return -ENOSYS; |
@@ -224,9 +225,16 @@ cifs_nt_open(char *full_path, struct inode *inode, struct cifs_sb_info *cifs_sb, | |||
224 | if (backup_cred(cifs_sb)) | 225 | if (backup_cred(cifs_sb)) |
225 | create_options |= CREATE_OPEN_BACKUP_INTENT; | 226 | create_options |= CREATE_OPEN_BACKUP_INTENT; |
226 | 227 | ||
227 | rc = server->ops->open(xid, tcon, full_path, disposition, | 228 | oparms.tcon = tcon; |
228 | desired_access, create_options, fid, oplock, buf, | 229 | oparms.cifs_sb = cifs_sb; |
229 | cifs_sb); | 230 | oparms.desired_access = desired_access; |
231 | oparms.create_options = create_options; | ||
232 | oparms.disposition = disposition; | ||
233 | oparms.path = full_path; | ||
234 | oparms.fid = fid; | ||
235 | oparms.reconnect = false; | ||
236 | |||
237 | rc = server->ops->open(xid, &oparms, oplock, buf); | ||
230 | 238 | ||
231 | if (rc) | 239 | if (rc) |
232 | goto out; | 240 | goto out; |
@@ -553,11 +561,10 @@ cifs_relock_file(struct cifsFileInfo *cfile) | |||
553 | struct cifs_tcon *tcon = tlink_tcon(cfile->tlink); | 561 | struct cifs_tcon *tcon = tlink_tcon(cfile->tlink); |
554 | int rc = 0; | 562 | int rc = 0; |
555 | 563 | ||
556 | /* we are going to update can_cache_brlcks here - need a write access */ | 564 | down_read(&cinode->lock_sem); |
557 | down_write(&cinode->lock_sem); | ||
558 | if (cinode->can_cache_brlcks) { | 565 | if (cinode->can_cache_brlcks) { |
559 | /* can cache locks - no need to push them */ | 566 | /* can cache locks - no need to relock */ |
560 | up_write(&cinode->lock_sem); | 567 | up_read(&cinode->lock_sem); |
561 | return rc; | 568 | return rc; |
562 | } | 569 | } |
563 | 570 | ||
@@ -568,7 +575,7 @@ cifs_relock_file(struct cifsFileInfo *cfile) | |||
568 | else | 575 | else |
569 | rc = tcon->ses->server->ops->push_mand_locks(cfile); | 576 | rc = tcon->ses->server->ops->push_mand_locks(cfile); |
570 | 577 | ||
571 | up_write(&cinode->lock_sem); | 578 | up_read(&cinode->lock_sem); |
572 | return rc; | 579 | return rc; |
573 | } | 580 | } |
574 | 581 | ||
@@ -587,7 +594,7 @@ cifs_reopen_file(struct cifsFileInfo *cfile, bool can_flush) | |||
587 | int desired_access; | 594 | int desired_access; |
588 | int disposition = FILE_OPEN; | 595 | int disposition = FILE_OPEN; |
589 | int create_options = CREATE_NOT_DIR; | 596 | int create_options = CREATE_NOT_DIR; |
590 | struct cifs_fid fid; | 597 | struct cifs_open_parms oparms; |
591 | 598 | ||
592 | xid = get_xid(); | 599 | xid = get_xid(); |
593 | mutex_lock(&cfile->fh_mutex); | 600 | mutex_lock(&cfile->fh_mutex); |
@@ -637,9 +644,10 @@ cifs_reopen_file(struct cifsFileInfo *cfile, bool can_flush) | |||
637 | 644 | ||
638 | rc = cifs_posix_open(full_path, NULL, inode->i_sb, | 645 | rc = cifs_posix_open(full_path, NULL, inode->i_sb, |
639 | cifs_sb->mnt_file_mode /* ignored */, | 646 | cifs_sb->mnt_file_mode /* ignored */, |
640 | oflags, &oplock, &fid.netfid, xid); | 647 | oflags, &oplock, &cfile->fid.netfid, xid); |
641 | if (rc == 0) { | 648 | if (rc == 0) { |
642 | cifs_dbg(FYI, "posix reopen succeeded\n"); | 649 | cifs_dbg(FYI, "posix reopen succeeded\n"); |
650 | oparms.reconnect = true; | ||
643 | goto reopen_success; | 651 | goto reopen_success; |
644 | } | 652 | } |
645 | /* | 653 | /* |
@@ -654,7 +662,16 @@ cifs_reopen_file(struct cifsFileInfo *cfile, bool can_flush) | |||
654 | create_options |= CREATE_OPEN_BACKUP_INTENT; | 662 | create_options |= CREATE_OPEN_BACKUP_INTENT; |
655 | 663 | ||
656 | if (server->ops->get_lease_key) | 664 | if (server->ops->get_lease_key) |
657 | server->ops->get_lease_key(inode, &fid); | 665 | server->ops->get_lease_key(inode, &cfile->fid); |
666 | |||
667 | oparms.tcon = tcon; | ||
668 | oparms.cifs_sb = cifs_sb; | ||
669 | oparms.desired_access = desired_access; | ||
670 | oparms.create_options = create_options; | ||
671 | oparms.disposition = disposition; | ||
672 | oparms.path = full_path; | ||
673 | oparms.fid = &cfile->fid; | ||
674 | oparms.reconnect = true; | ||
658 | 675 | ||
659 | /* | 676 | /* |
660 | * Can not refresh inode by passing in file_info buf to be returned by | 677 | * Can not refresh inode by passing in file_info buf to be returned by |
@@ -663,9 +680,14 @@ cifs_reopen_file(struct cifsFileInfo *cfile, bool can_flush) | |||
663 | * version of file size can be stale. If we knew for sure that inode was | 680 | * version of file size can be stale. If we knew for sure that inode was |
664 | * not dirty locally we could do this. | 681 | * not dirty locally we could do this. |
665 | */ | 682 | */ |
666 | rc = server->ops->open(xid, tcon, full_path, disposition, | 683 | rc = server->ops->open(xid, &oparms, &oplock, NULL); |
667 | desired_access, create_options, &fid, &oplock, | 684 | if (rc == -ENOENT && oparms.reconnect == false) { |
668 | NULL, cifs_sb); | 685 | /* durable handle timeout is expired - open the file again */ |
686 | rc = server->ops->open(xid, &oparms, &oplock, NULL); | ||
687 | /* indicate that we need to relock the file */ | ||
688 | oparms.reconnect = true; | ||
689 | } | ||
690 | |||
669 | if (rc) { | 691 | if (rc) { |
670 | mutex_unlock(&cfile->fh_mutex); | 692 | mutex_unlock(&cfile->fh_mutex); |
671 | cifs_dbg(FYI, "cifs_reopen returned 0x%x\n", rc); | 693 | cifs_dbg(FYI, "cifs_reopen returned 0x%x\n", rc); |
@@ -696,8 +718,9 @@ reopen_success: | |||
696 | * to the server to get the new inode info. | 718 | * to the server to get the new inode info. |
697 | */ | 719 | */ |
698 | 720 | ||
699 | server->ops->set_fid(cfile, &fid, oplock); | 721 | server->ops->set_fid(cfile, &cfile->fid, oplock); |
700 | cifs_relock_file(cfile); | 722 | if (oparms.reconnect) |
723 | cifs_relock_file(cfile); | ||
701 | 724 | ||
702 | reopen_error_exit: | 725 | reopen_error_exit: |
703 | kfree(full_path); | 726 | kfree(full_path); |
@@ -999,7 +1022,7 @@ try_again: | |||
999 | rc = wait_event_interruptible(flock->fl_wait, !flock->fl_next); | 1022 | rc = wait_event_interruptible(flock->fl_wait, !flock->fl_next); |
1000 | if (!rc) | 1023 | if (!rc) |
1001 | goto try_again; | 1024 | goto try_again; |
1002 | locks_delete_block(flock); | 1025 | posix_unblock_lock(flock); |
1003 | } | 1026 | } |
1004 | return rc; | 1027 | return rc; |
1005 | } | 1028 | } |
@@ -1092,6 +1115,7 @@ struct lock_to_push { | |||
1092 | static int | 1115 | static int |
1093 | cifs_push_posix_locks(struct cifsFileInfo *cfile) | 1116 | cifs_push_posix_locks(struct cifsFileInfo *cfile) |
1094 | { | 1117 | { |
1118 | struct inode *inode = cfile->dentry->d_inode; | ||
1095 | struct cifs_tcon *tcon = tlink_tcon(cfile->tlink); | 1119 | struct cifs_tcon *tcon = tlink_tcon(cfile->tlink); |
1096 | struct file_lock *flock, **before; | 1120 | struct file_lock *flock, **before; |
1097 | unsigned int count = 0, i = 0; | 1121 | unsigned int count = 0, i = 0; |
@@ -1102,12 +1126,12 @@ cifs_push_posix_locks(struct cifsFileInfo *cfile) | |||
1102 | 1126 | ||
1103 | xid = get_xid(); | 1127 | xid = get_xid(); |
1104 | 1128 | ||
1105 | lock_flocks(); | 1129 | spin_lock(&inode->i_lock); |
1106 | cifs_for_each_lock(cfile->dentry->d_inode, before) { | 1130 | cifs_for_each_lock(inode, before) { |
1107 | if ((*before)->fl_flags & FL_POSIX) | 1131 | if ((*before)->fl_flags & FL_POSIX) |
1108 | count++; | 1132 | count++; |
1109 | } | 1133 | } |
1110 | unlock_flocks(); | 1134 | spin_unlock(&inode->i_lock); |
1111 | 1135 | ||
1112 | INIT_LIST_HEAD(&locks_to_send); | 1136 | INIT_LIST_HEAD(&locks_to_send); |
1113 | 1137 | ||
@@ -1126,8 +1150,8 @@ cifs_push_posix_locks(struct cifsFileInfo *cfile) | |||
1126 | } | 1150 | } |
1127 | 1151 | ||
1128 | el = locks_to_send.next; | 1152 | el = locks_to_send.next; |
1129 | lock_flocks(); | 1153 | spin_lock(&inode->i_lock); |
1130 | cifs_for_each_lock(cfile->dentry->d_inode, before) { | 1154 | cifs_for_each_lock(inode, before) { |
1131 | flock = *before; | 1155 | flock = *before; |
1132 | if ((flock->fl_flags & FL_POSIX) == 0) | 1156 | if ((flock->fl_flags & FL_POSIX) == 0) |
1133 | continue; | 1157 | continue; |
@@ -1152,7 +1176,7 @@ cifs_push_posix_locks(struct cifsFileInfo *cfile) | |||
1152 | lck->offset = flock->fl_start; | 1176 | lck->offset = flock->fl_start; |
1153 | el = el->next; | 1177 | el = el->next; |
1154 | } | 1178 | } |
1155 | unlock_flocks(); | 1179 | spin_unlock(&inode->i_lock); |
1156 | 1180 | ||
1157 | list_for_each_entry_safe(lck, tmp, &locks_to_send, llist) { | 1181 | list_for_each_entry_safe(lck, tmp, &locks_to_send, llist) { |
1158 | int stored_rc; | 1182 | int stored_rc; |
@@ -3546,11 +3570,12 @@ static int cifs_release_page(struct page *page, gfp_t gfp) | |||
3546 | return cifs_fscache_release_page(page, gfp); | 3570 | return cifs_fscache_release_page(page, gfp); |
3547 | } | 3571 | } |
3548 | 3572 | ||
3549 | static void cifs_invalidate_page(struct page *page, unsigned long offset) | 3573 | static void cifs_invalidate_page(struct page *page, unsigned int offset, |
3574 | unsigned int length) | ||
3550 | { | 3575 | { |
3551 | struct cifsInodeInfo *cifsi = CIFS_I(page->mapping->host); | 3576 | struct cifsInodeInfo *cifsi = CIFS_I(page->mapping->host); |
3552 | 3577 | ||
3553 | if (offset == 0) | 3578 | if (offset == 0 && length == PAGE_CACHE_SIZE) |
3554 | cifs_fscache_invalidate_page(page, &cifsi->vfs_inode); | 3579 | cifs_fscache_invalidate_page(page, &cifsi->vfs_inode); |
3555 | } | 3580 | } |
3556 | 3581 | ||
diff --git a/fs/cifs/inode.c b/fs/cifs/inode.c index 20efd81266c6..449b6cf09b09 100644 --- a/fs/cifs/inode.c +++ b/fs/cifs/inode.c | |||
@@ -558,6 +558,11 @@ cifs_all_info_to_fattr(struct cifs_fattr *fattr, FILE_ALL_INFO *info, | |||
558 | fattr->cf_mode &= ~(S_IWUGO); | 558 | fattr->cf_mode &= ~(S_IWUGO); |
559 | 559 | ||
560 | fattr->cf_nlink = le32_to_cpu(info->NumberOfLinks); | 560 | fattr->cf_nlink = le32_to_cpu(info->NumberOfLinks); |
561 | if (fattr->cf_nlink < 1) { | ||
562 | cifs_dbg(1, "replacing bogus file nlink value %u\n", | ||
563 | fattr->cf_nlink); | ||
564 | fattr->cf_nlink = 1; | ||
565 | } | ||
561 | } | 566 | } |
562 | 567 | ||
563 | fattr->cf_uid = cifs_sb->mnt_uid; | 568 | fattr->cf_uid = cifs_sb->mnt_uid; |
diff --git a/fs/cifs/link.c b/fs/cifs/link.c index b83c3f5646bd..562044f700e5 100644 --- a/fs/cifs/link.c +++ b/fs/cifs/link.c | |||
@@ -305,67 +305,89 @@ CIFSCouldBeMFSymlink(const struct cifs_fattr *fattr) | |||
305 | } | 305 | } |
306 | 306 | ||
307 | int | 307 | int |
308 | CIFSCheckMFSymlink(struct cifs_fattr *fattr, | 308 | open_query_close_cifs_symlink(const unsigned char *path, char *pbuf, |
309 | const unsigned char *path, | 309 | unsigned int *pbytes_read, struct cifs_sb_info *cifs_sb, |
310 | struct cifs_sb_info *cifs_sb, unsigned int xid) | 310 | unsigned int xid) |
311 | { | 311 | { |
312 | int rc; | 312 | int rc; |
313 | int oplock = 0; | 313 | int oplock = 0; |
314 | __u16 netfid = 0; | 314 | __u16 netfid = 0; |
315 | struct tcon_link *tlink; | 315 | struct tcon_link *tlink; |
316 | struct cifs_tcon *pTcon; | 316 | struct cifs_tcon *ptcon; |
317 | struct cifs_io_parms io_parms; | 317 | struct cifs_io_parms io_parms; |
318 | u8 *buf; | ||
319 | char *pbuf; | ||
320 | unsigned int bytes_read = 0; | ||
321 | int buf_type = CIFS_NO_BUFFER; | 318 | int buf_type = CIFS_NO_BUFFER; |
322 | unsigned int link_len = 0; | ||
323 | FILE_ALL_INFO file_info; | 319 | FILE_ALL_INFO file_info; |
324 | 320 | ||
325 | if (!CIFSCouldBeMFSymlink(fattr)) | ||
326 | /* it's not a symlink */ | ||
327 | return 0; | ||
328 | |||
329 | tlink = cifs_sb_tlink(cifs_sb); | 321 | tlink = cifs_sb_tlink(cifs_sb); |
330 | if (IS_ERR(tlink)) | 322 | if (IS_ERR(tlink)) |
331 | return PTR_ERR(tlink); | 323 | return PTR_ERR(tlink); |
332 | pTcon = tlink_tcon(tlink); | 324 | ptcon = tlink_tcon(tlink); |
333 | 325 | ||
334 | rc = CIFSSMBOpen(xid, pTcon, path, FILE_OPEN, GENERIC_READ, | 326 | rc = CIFSSMBOpen(xid, ptcon, path, FILE_OPEN, GENERIC_READ, |
335 | CREATE_NOT_DIR, &netfid, &oplock, &file_info, | 327 | CREATE_NOT_DIR, &netfid, &oplock, &file_info, |
336 | cifs_sb->local_nls, | 328 | cifs_sb->local_nls, |
337 | cifs_sb->mnt_cifs_flags & | 329 | cifs_sb->mnt_cifs_flags & |
338 | CIFS_MOUNT_MAP_SPECIAL_CHR); | 330 | CIFS_MOUNT_MAP_SPECIAL_CHR); |
339 | if (rc != 0) | 331 | if (rc != 0) { |
340 | goto out; | 332 | cifs_put_tlink(tlink); |
333 | return rc; | ||
334 | } | ||
341 | 335 | ||
342 | if (file_info.EndOfFile != cpu_to_le64(CIFS_MF_SYMLINK_FILE_SIZE)) { | 336 | if (file_info.EndOfFile != cpu_to_le64(CIFS_MF_SYMLINK_FILE_SIZE)) { |
343 | CIFSSMBClose(xid, pTcon, netfid); | 337 | CIFSSMBClose(xid, ptcon, netfid); |
338 | cifs_put_tlink(tlink); | ||
344 | /* it's not a symlink */ | 339 | /* it's not a symlink */ |
345 | goto out; | 340 | return rc; |
346 | } | 341 | } |
347 | 342 | ||
348 | buf = kmalloc(CIFS_MF_SYMLINK_FILE_SIZE, GFP_KERNEL); | ||
349 | if (!buf) { | ||
350 | rc = -ENOMEM; | ||
351 | goto out; | ||
352 | } | ||
353 | pbuf = buf; | ||
354 | io_parms.netfid = netfid; | 343 | io_parms.netfid = netfid; |
355 | io_parms.pid = current->tgid; | 344 | io_parms.pid = current->tgid; |
356 | io_parms.tcon = pTcon; | 345 | io_parms.tcon = ptcon; |
357 | io_parms.offset = 0; | 346 | io_parms.offset = 0; |
358 | io_parms.length = CIFS_MF_SYMLINK_FILE_SIZE; | 347 | io_parms.length = CIFS_MF_SYMLINK_FILE_SIZE; |
359 | 348 | ||
360 | rc = CIFSSMBRead(xid, &io_parms, &bytes_read, &pbuf, &buf_type); | 349 | rc = CIFSSMBRead(xid, &io_parms, pbytes_read, &pbuf, &buf_type); |
361 | CIFSSMBClose(xid, pTcon, netfid); | 350 | CIFSSMBClose(xid, ptcon, netfid); |
362 | if (rc != 0) { | 351 | cifs_put_tlink(tlink); |
363 | kfree(buf); | 352 | return rc; |
353 | } | ||
354 | |||
355 | |||
356 | int | ||
357 | CIFSCheckMFSymlink(struct cifs_fattr *fattr, | ||
358 | const unsigned char *path, | ||
359 | struct cifs_sb_info *cifs_sb, unsigned int xid) | ||
360 | { | ||
361 | int rc = 0; | ||
362 | u8 *buf = NULL; | ||
363 | unsigned int link_len = 0; | ||
364 | unsigned int bytes_read = 0; | ||
365 | struct cifs_tcon *ptcon; | ||
366 | |||
367 | if (!CIFSCouldBeMFSymlink(fattr)) | ||
368 | /* it's not a symlink */ | ||
369 | return 0; | ||
370 | |||
371 | buf = kmalloc(CIFS_MF_SYMLINK_FILE_SIZE, GFP_KERNEL); | ||
372 | if (!buf) { | ||
373 | rc = -ENOMEM; | ||
364 | goto out; | 374 | goto out; |
365 | } | 375 | } |
366 | 376 | ||
377 | ptcon = tlink_tcon(cifs_sb_tlink(cifs_sb)); | ||
378 | if ((ptcon->ses) && (ptcon->ses->server->ops->query_mf_symlink)) | ||
379 | rc = ptcon->ses->server->ops->query_mf_symlink(path, buf, | ||
380 | &bytes_read, cifs_sb, xid); | ||
381 | else | ||
382 | goto out; | ||
383 | |||
384 | if (rc != 0) | ||
385 | goto out; | ||
386 | |||
387 | if (bytes_read == 0) /* not a symlink */ | ||
388 | goto out; | ||
389 | |||
367 | rc = CIFSParseMFSymlink(buf, bytes_read, &link_len, NULL); | 390 | rc = CIFSParseMFSymlink(buf, bytes_read, &link_len, NULL); |
368 | kfree(buf); | ||
369 | if (rc == -EINVAL) { | 391 | if (rc == -EINVAL) { |
370 | /* it's not a symlink */ | 392 | /* it's not a symlink */ |
371 | rc = 0; | 393 | rc = 0; |
@@ -381,7 +403,7 @@ CIFSCheckMFSymlink(struct cifs_fattr *fattr, | |||
381 | fattr->cf_mode |= S_IFLNK | S_IRWXU | S_IRWXG | S_IRWXO; | 403 | fattr->cf_mode |= S_IFLNK | S_IRWXU | S_IRWXG | S_IRWXO; |
382 | fattr->cf_dtype = DT_LNK; | 404 | fattr->cf_dtype = DT_LNK; |
383 | out: | 405 | out: |
384 | cifs_put_tlink(tlink); | 406 | kfree(buf); |
385 | return rc; | 407 | return rc; |
386 | } | 408 | } |
387 | 409 | ||
diff --git a/fs/cifs/misc.c b/fs/cifs/misc.c index 1bec014779fd..f7d4b2285efe 100644 --- a/fs/cifs/misc.c +++ b/fs/cifs/misc.c | |||
@@ -267,8 +267,7 @@ header_assemble(struct smb_hdr *buffer, char smb_command /* command */ , | |||
267 | if (treeCon->nocase) | 267 | if (treeCon->nocase) |
268 | buffer->Flags |= SMBFLG_CASELESS; | 268 | buffer->Flags |= SMBFLG_CASELESS; |
269 | if ((treeCon->ses) && (treeCon->ses->server)) | 269 | if ((treeCon->ses) && (treeCon->ses->server)) |
270 | if (treeCon->ses->server->sec_mode & | 270 | if (treeCon->ses->server->sign) |
271 | (SECMODE_SIGN_REQUIRED | SECMODE_SIGN_ENABLED)) | ||
272 | buffer->Flags2 |= SMBFLG2_SECURITY_SIGNATURE; | 271 | buffer->Flags2 |= SMBFLG2_SECURITY_SIGNATURE; |
273 | } | 272 | } |
274 | 273 | ||
diff --git a/fs/cifs/readdir.c b/fs/cifs/readdir.c index 770d5a9781c1..69d2c826a23b 100644 --- a/fs/cifs/readdir.c +++ b/fs/cifs/readdir.c | |||
@@ -111,6 +111,14 @@ cifs_prime_dcache(struct dentry *parent, struct qstr *name, | |||
111 | return; | 111 | return; |
112 | } | 112 | } |
113 | 113 | ||
114 | /* | ||
115 | * If we know that the inode will need to be revalidated immediately, | ||
116 | * then don't create a new dentry for it. We'll end up doing an on | ||
117 | * the wire call either way and this spares us an invalidation. | ||
118 | */ | ||
119 | if (fattr->cf_flags & CIFS_FATTR_NEED_REVAL) | ||
120 | return; | ||
121 | |||
114 | dentry = d_alloc(parent, name); | 122 | dentry = d_alloc(parent, name); |
115 | if (!dentry) | 123 | if (!dentry) |
116 | return; | 124 | return; |
@@ -126,6 +134,22 @@ out: | |||
126 | dput(dentry); | 134 | dput(dentry); |
127 | } | 135 | } |
128 | 136 | ||
137 | /* | ||
138 | * Is it possible that this directory might turn out to be a DFS referral | ||
139 | * once we go to try and use it? | ||
140 | */ | ||
141 | static bool | ||
142 | cifs_dfs_is_possible(struct cifs_sb_info *cifs_sb) | ||
143 | { | ||
144 | #ifdef CONFIG_CIFS_DFS_UPCALL | ||
145 | struct cifs_tcon *tcon = cifs_sb_master_tcon(cifs_sb); | ||
146 | |||
147 | if (tcon->Flags & SMB_SHARE_IS_IN_DFS) | ||
148 | return true; | ||
149 | #endif | ||
150 | return false; | ||
151 | } | ||
152 | |||
129 | static void | 153 | static void |
130 | cifs_fill_common_info(struct cifs_fattr *fattr, struct cifs_sb_info *cifs_sb) | 154 | cifs_fill_common_info(struct cifs_fattr *fattr, struct cifs_sb_info *cifs_sb) |
131 | { | 155 | { |
@@ -135,6 +159,19 @@ cifs_fill_common_info(struct cifs_fattr *fattr, struct cifs_sb_info *cifs_sb) | |||
135 | if (fattr->cf_cifsattrs & ATTR_DIRECTORY) { | 159 | if (fattr->cf_cifsattrs & ATTR_DIRECTORY) { |
136 | fattr->cf_mode = S_IFDIR | cifs_sb->mnt_dir_mode; | 160 | fattr->cf_mode = S_IFDIR | cifs_sb->mnt_dir_mode; |
137 | fattr->cf_dtype = DT_DIR; | 161 | fattr->cf_dtype = DT_DIR; |
162 | /* | ||
163 | * Windows CIFS servers generally make DFS referrals look | ||
164 | * like directories in FIND_* responses with the reparse | ||
165 | * attribute flag also set (since DFS junctions are | ||
166 | * reparse points). We must revalidate at least these | ||
167 | * directory inodes before trying to use them (if | ||
168 | * they are DFS we will get PATH_NOT_COVERED back | ||
169 | * when queried directly and can then try to connect | ||
170 | * to the DFS target) | ||
171 | */ | ||
172 | if (cifs_dfs_is_possible(cifs_sb) && | ||
173 | (fattr->cf_cifsattrs & ATTR_REPARSE)) | ||
174 | fattr->cf_flags |= CIFS_FATTR_NEED_REVAL; | ||
138 | } else { | 175 | } else { |
139 | fattr->cf_mode = S_IFREG | cifs_sb->mnt_file_mode; | 176 | fattr->cf_mode = S_IFREG | cifs_sb->mnt_file_mode; |
140 | fattr->cf_dtype = DT_REG; | 177 | fattr->cf_dtype = DT_REG; |
@@ -537,14 +574,14 @@ static int cifs_save_resume_key(const char *current_entry, | |||
537 | * every entry (do not increment for . or .. entry). | 574 | * every entry (do not increment for . or .. entry). |
538 | */ | 575 | */ |
539 | static int | 576 | static int |
540 | find_cifs_entry(const unsigned int xid, struct cifs_tcon *tcon, | 577 | find_cifs_entry(const unsigned int xid, struct cifs_tcon *tcon, loff_t pos, |
541 | struct file *file, char **current_entry, int *num_to_ret) | 578 | struct file *file, char **current_entry, int *num_to_ret) |
542 | { | 579 | { |
543 | __u16 search_flags; | 580 | __u16 search_flags; |
544 | int rc = 0; | 581 | int rc = 0; |
545 | int pos_in_buf = 0; | 582 | int pos_in_buf = 0; |
546 | loff_t first_entry_in_buffer; | 583 | loff_t first_entry_in_buffer; |
547 | loff_t index_to_find = file->f_pos; | 584 | loff_t index_to_find = pos; |
548 | struct cifsFileInfo *cfile = file->private_data; | 585 | struct cifsFileInfo *cfile = file->private_data; |
549 | struct cifs_sb_info *cifs_sb = CIFS_SB(file->f_path.dentry->d_sb); | 586 | struct cifs_sb_info *cifs_sb = CIFS_SB(file->f_path.dentry->d_sb); |
550 | struct TCP_Server_Info *server = tcon->ses->server; | 587 | struct TCP_Server_Info *server = tcon->ses->server; |
@@ -659,8 +696,9 @@ find_cifs_entry(const unsigned int xid, struct cifs_tcon *tcon, | |||
659 | return rc; | 696 | return rc; |
660 | } | 697 | } |
661 | 698 | ||
662 | static int cifs_filldir(char *find_entry, struct file *file, filldir_t filldir, | 699 | static int cifs_filldir(char *find_entry, struct file *file, |
663 | void *dirent, char *scratch_buf, unsigned int max_len) | 700 | struct dir_context *ctx, |
701 | char *scratch_buf, unsigned int max_len) | ||
664 | { | 702 | { |
665 | struct cifsFileInfo *file_info = file->private_data; | 703 | struct cifsFileInfo *file_info = file->private_data; |
666 | struct super_block *sb = file->f_path.dentry->d_sb; | 704 | struct super_block *sb = file->f_path.dentry->d_sb; |
@@ -740,13 +778,11 @@ static int cifs_filldir(char *find_entry, struct file *file, filldir_t filldir, | |||
740 | cifs_prime_dcache(file->f_dentry, &name, &fattr); | 778 | cifs_prime_dcache(file->f_dentry, &name, &fattr); |
741 | 779 | ||
742 | ino = cifs_uniqueid_to_ino_t(fattr.cf_uniqueid); | 780 | ino = cifs_uniqueid_to_ino_t(fattr.cf_uniqueid); |
743 | rc = filldir(dirent, name.name, name.len, file->f_pos, ino, | 781 | return !dir_emit(ctx, name.name, name.len, ino, fattr.cf_dtype); |
744 | fattr.cf_dtype); | ||
745 | return rc; | ||
746 | } | 782 | } |
747 | 783 | ||
748 | 784 | ||
749 | int cifs_readdir(struct file *file, void *direntry, filldir_t filldir) | 785 | int cifs_readdir(struct file *file, struct dir_context *ctx) |
750 | { | 786 | { |
751 | int rc = 0; | 787 | int rc = 0; |
752 | unsigned int xid; | 788 | unsigned int xid; |
@@ -772,103 +808,86 @@ int cifs_readdir(struct file *file, void *direntry, filldir_t filldir) | |||
772 | goto rddir2_exit; | 808 | goto rddir2_exit; |
773 | } | 809 | } |
774 | 810 | ||
775 | switch ((int) file->f_pos) { | 811 | if (!dir_emit_dots(file, ctx)) |
776 | case 0: | 812 | goto rddir2_exit; |
777 | if (filldir(direntry, ".", 1, file->f_pos, | ||
778 | file_inode(file)->i_ino, DT_DIR) < 0) { | ||
779 | cifs_dbg(VFS, "Filldir for current dir failed\n"); | ||
780 | rc = -ENOMEM; | ||
781 | break; | ||
782 | } | ||
783 | file->f_pos++; | ||
784 | case 1: | ||
785 | if (filldir(direntry, "..", 2, file->f_pos, | ||
786 | parent_ino(file->f_path.dentry), DT_DIR) < 0) { | ||
787 | cifs_dbg(VFS, "Filldir for parent dir failed\n"); | ||
788 | rc = -ENOMEM; | ||
789 | break; | ||
790 | } | ||
791 | file->f_pos++; | ||
792 | default: | ||
793 | /* 1) If search is active, | ||
794 | is in current search buffer? | ||
795 | if it before then restart search | ||
796 | if after then keep searching till find it */ | ||
797 | |||
798 | if (file->private_data == NULL) { | ||
799 | rc = -EINVAL; | ||
800 | free_xid(xid); | ||
801 | return rc; | ||
802 | } | ||
803 | cifsFile = file->private_data; | ||
804 | if (cifsFile->srch_inf.endOfSearch) { | ||
805 | if (cifsFile->srch_inf.emptyDir) { | ||
806 | cifs_dbg(FYI, "End of search, empty dir\n"); | ||
807 | rc = 0; | ||
808 | break; | ||
809 | } | ||
810 | } /* else { | ||
811 | cifsFile->invalidHandle = true; | ||
812 | tcon->ses->server->close(xid, tcon, &cifsFile->fid); | ||
813 | } */ | ||
814 | 813 | ||
815 | tcon = tlink_tcon(cifsFile->tlink); | 814 | /* 1) If search is active, |
816 | rc = find_cifs_entry(xid, tcon, file, ¤t_entry, | 815 | is in current search buffer? |
817 | &num_to_fill); | 816 | if it before then restart search |
818 | if (rc) { | 817 | if after then keep searching till find it */ |
819 | cifs_dbg(FYI, "fce error %d\n", rc); | 818 | |
820 | goto rddir2_exit; | 819 | if (file->private_data == NULL) { |
821 | } else if (current_entry != NULL) { | 820 | rc = -EINVAL; |
822 | cifs_dbg(FYI, "entry %lld found\n", file->f_pos); | 821 | goto rddir2_exit; |
823 | } else { | 822 | } |
824 | cifs_dbg(FYI, "could not find entry\n"); | 823 | cifsFile = file->private_data; |
824 | if (cifsFile->srch_inf.endOfSearch) { | ||
825 | if (cifsFile->srch_inf.emptyDir) { | ||
826 | cifs_dbg(FYI, "End of search, empty dir\n"); | ||
827 | rc = 0; | ||
825 | goto rddir2_exit; | 828 | goto rddir2_exit; |
826 | } | 829 | } |
827 | cifs_dbg(FYI, "loop through %d times filling dir for net buf %p\n", | 830 | } /* else { |
828 | num_to_fill, cifsFile->srch_inf.ntwrk_buf_start); | 831 | cifsFile->invalidHandle = true; |
829 | max_len = tcon->ses->server->ops->calc_smb_size( | 832 | tcon->ses->server->close(xid, tcon, &cifsFile->fid); |
830 | cifsFile->srch_inf.ntwrk_buf_start); | 833 | } */ |
831 | end_of_smb = cifsFile->srch_inf.ntwrk_buf_start + max_len; | 834 | |
832 | 835 | tcon = tlink_tcon(cifsFile->tlink); | |
833 | tmp_buf = kmalloc(UNICODE_NAME_MAX, GFP_KERNEL); | 836 | rc = find_cifs_entry(xid, tcon, ctx->pos, file, ¤t_entry, |
834 | if (tmp_buf == NULL) { | 837 | &num_to_fill); |
835 | rc = -ENOMEM; | 838 | if (rc) { |
839 | cifs_dbg(FYI, "fce error %d\n", rc); | ||
840 | goto rddir2_exit; | ||
841 | } else if (current_entry != NULL) { | ||
842 | cifs_dbg(FYI, "entry %lld found\n", ctx->pos); | ||
843 | } else { | ||
844 | cifs_dbg(FYI, "could not find entry\n"); | ||
845 | goto rddir2_exit; | ||
846 | } | ||
847 | cifs_dbg(FYI, "loop through %d times filling dir for net buf %p\n", | ||
848 | num_to_fill, cifsFile->srch_inf.ntwrk_buf_start); | ||
849 | max_len = tcon->ses->server->ops->calc_smb_size( | ||
850 | cifsFile->srch_inf.ntwrk_buf_start); | ||
851 | end_of_smb = cifsFile->srch_inf.ntwrk_buf_start + max_len; | ||
852 | |||
853 | tmp_buf = kmalloc(UNICODE_NAME_MAX, GFP_KERNEL); | ||
854 | if (tmp_buf == NULL) { | ||
855 | rc = -ENOMEM; | ||
856 | goto rddir2_exit; | ||
857 | } | ||
858 | |||
859 | for (i = 0; i < num_to_fill; i++) { | ||
860 | if (current_entry == NULL) { | ||
861 | /* evaluate whether this case is an error */ | ||
862 | cifs_dbg(VFS, "past SMB end, num to fill %d i %d\n", | ||
863 | num_to_fill, i); | ||
836 | break; | 864 | break; |
837 | } | 865 | } |
838 | 866 | /* | |
839 | for (i = 0; (i < num_to_fill) && (rc == 0); i++) { | 867 | * if buggy server returns . and .. late do we want to |
840 | if (current_entry == NULL) { | 868 | * check for that here? |
841 | /* evaluate whether this case is an error */ | 869 | */ |
842 | cifs_dbg(VFS, "past SMB end, num to fill %d i %d\n", | 870 | rc = cifs_filldir(current_entry, file, ctx, |
843 | num_to_fill, i); | 871 | tmp_buf, max_len); |
844 | break; | 872 | if (rc) { |
845 | } | 873 | if (rc > 0) |
846 | /* | ||
847 | * if buggy server returns . and .. late do we want to | ||
848 | * check for that here? | ||
849 | */ | ||
850 | rc = cifs_filldir(current_entry, file, filldir, | ||
851 | direntry, tmp_buf, max_len); | ||
852 | if (rc == -EOVERFLOW) { | ||
853 | rc = 0; | 874 | rc = 0; |
854 | break; | 875 | break; |
855 | } | ||
856 | |||
857 | file->f_pos++; | ||
858 | if (file->f_pos == | ||
859 | cifsFile->srch_inf.index_of_last_entry) { | ||
860 | cifs_dbg(FYI, "last entry in buf at pos %lld %s\n", | ||
861 | file->f_pos, tmp_buf); | ||
862 | cifs_save_resume_key(current_entry, cifsFile); | ||
863 | break; | ||
864 | } else | ||
865 | current_entry = | ||
866 | nxt_dir_entry(current_entry, end_of_smb, | ||
867 | cifsFile->srch_inf.info_level); | ||
868 | } | 876 | } |
869 | kfree(tmp_buf); | 877 | |
870 | break; | 878 | ctx->pos++; |
871 | } /* end switch */ | 879 | if (ctx->pos == |
880 | cifsFile->srch_inf.index_of_last_entry) { | ||
881 | cifs_dbg(FYI, "last entry in buf at pos %lld %s\n", | ||
882 | ctx->pos, tmp_buf); | ||
883 | cifs_save_resume_key(current_entry, cifsFile); | ||
884 | break; | ||
885 | } else | ||
886 | current_entry = | ||
887 | nxt_dir_entry(current_entry, end_of_smb, | ||
888 | cifsFile->srch_inf.info_level); | ||
889 | } | ||
890 | kfree(tmp_buf); | ||
872 | 891 | ||
873 | rddir2_exit: | 892 | rddir2_exit: |
874 | free_xid(xid); | 893 | free_xid(xid); |
diff --git a/fs/cifs/sess.c b/fs/cifs/sess.c index f230571a7ab3..08dd37bb23aa 100644 --- a/fs/cifs/sess.c +++ b/fs/cifs/sess.c | |||
@@ -138,8 +138,7 @@ static __u32 cifs_ssetup_hdr(struct cifs_ses *ses, SESSION_SETUP_ANDX *pSMB) | |||
138 | capabilities = CAP_LARGE_FILES | CAP_NT_SMBS | CAP_LEVEL_II_OPLOCKS | | 138 | capabilities = CAP_LARGE_FILES | CAP_NT_SMBS | CAP_LEVEL_II_OPLOCKS | |
139 | CAP_LARGE_WRITE_X | CAP_LARGE_READ_X; | 139 | CAP_LARGE_WRITE_X | CAP_LARGE_READ_X; |
140 | 140 | ||
141 | if (ses->server->sec_mode & | 141 | if (ses->server->sign) |
142 | (SECMODE_SIGN_REQUIRED | SECMODE_SIGN_ENABLED)) | ||
143 | pSMB->req.hdr.Flags2 |= SMBFLG2_SECURITY_SIGNATURE; | 142 | pSMB->req.hdr.Flags2 |= SMBFLG2_SECURITY_SIGNATURE; |
144 | 143 | ||
145 | if (ses->capabilities & CAP_UNICODE) { | 144 | if (ses->capabilities & CAP_UNICODE) { |
@@ -198,7 +197,7 @@ static void unicode_domain_string(char **pbcc_area, struct cifs_ses *ses, | |||
198 | bytes_ret = 0; | 197 | bytes_ret = 0; |
199 | } else | 198 | } else |
200 | bytes_ret = cifs_strtoUTF16((__le16 *) bcc_ptr, ses->domainName, | 199 | bytes_ret = cifs_strtoUTF16((__le16 *) bcc_ptr, ses->domainName, |
201 | 256, nls_cp); | 200 | CIFS_MAX_DOMAINNAME_LEN, nls_cp); |
202 | bcc_ptr += 2 * bytes_ret; | 201 | bcc_ptr += 2 * bytes_ret; |
203 | bcc_ptr += 2; /* account for null terminator */ | 202 | bcc_ptr += 2; /* account for null terminator */ |
204 | 203 | ||
@@ -256,8 +255,8 @@ static void ascii_ssetup_strings(char **pbcc_area, struct cifs_ses *ses, | |||
256 | 255 | ||
257 | /* copy domain */ | 256 | /* copy domain */ |
258 | if (ses->domainName != NULL) { | 257 | if (ses->domainName != NULL) { |
259 | strncpy(bcc_ptr, ses->domainName, 256); | 258 | strncpy(bcc_ptr, ses->domainName, CIFS_MAX_DOMAINNAME_LEN); |
260 | bcc_ptr += strnlen(ses->domainName, 256); | 259 | bcc_ptr += strnlen(ses->domainName, CIFS_MAX_DOMAINNAME_LEN); |
261 | } /* else we will send a null domain name | 260 | } /* else we will send a null domain name |
262 | so the server will default to its own domain */ | 261 | so the server will default to its own domain */ |
263 | *bcc_ptr = 0; | 262 | *bcc_ptr = 0; |
@@ -310,11 +309,10 @@ decode_unicode_ssetup(char **pbcc_area, int bleft, struct cifs_ses *ses, | |||
310 | return; | 309 | return; |
311 | } | 310 | } |
312 | 311 | ||
313 | static int decode_ascii_ssetup(char **pbcc_area, __u16 bleft, | 312 | static void decode_ascii_ssetup(char **pbcc_area, __u16 bleft, |
314 | struct cifs_ses *ses, | 313 | struct cifs_ses *ses, |
315 | const struct nls_table *nls_cp) | 314 | const struct nls_table *nls_cp) |
316 | { | 315 | { |
317 | int rc = 0; | ||
318 | int len; | 316 | int len; |
319 | char *bcc_ptr = *pbcc_area; | 317 | char *bcc_ptr = *pbcc_area; |
320 | 318 | ||
@@ -322,24 +320,22 @@ static int decode_ascii_ssetup(char **pbcc_area, __u16 bleft, | |||
322 | 320 | ||
323 | len = strnlen(bcc_ptr, bleft); | 321 | len = strnlen(bcc_ptr, bleft); |
324 | if (len >= bleft) | 322 | if (len >= bleft) |
325 | return rc; | 323 | return; |
326 | 324 | ||
327 | kfree(ses->serverOS); | 325 | kfree(ses->serverOS); |
328 | 326 | ||
329 | ses->serverOS = kzalloc(len + 1, GFP_KERNEL); | 327 | ses->serverOS = kzalloc(len + 1, GFP_KERNEL); |
330 | if (ses->serverOS) | 328 | if (ses->serverOS) |
331 | strncpy(ses->serverOS, bcc_ptr, len); | 329 | strncpy(ses->serverOS, bcc_ptr, len); |
332 | if (strncmp(ses->serverOS, "OS/2", 4) == 0) { | 330 | if (strncmp(ses->serverOS, "OS/2", 4) == 0) |
333 | cifs_dbg(FYI, "OS/2 server\n"); | 331 | cifs_dbg(FYI, "OS/2 server\n"); |
334 | ses->flags |= CIFS_SES_OS2; | ||
335 | } | ||
336 | 332 | ||
337 | bcc_ptr += len + 1; | 333 | bcc_ptr += len + 1; |
338 | bleft -= len + 1; | 334 | bleft -= len + 1; |
339 | 335 | ||
340 | len = strnlen(bcc_ptr, bleft); | 336 | len = strnlen(bcc_ptr, bleft); |
341 | if (len >= bleft) | 337 | if (len >= bleft) |
342 | return rc; | 338 | return; |
343 | 339 | ||
344 | kfree(ses->serverNOS); | 340 | kfree(ses->serverNOS); |
345 | 341 | ||
@@ -352,7 +348,7 @@ static int decode_ascii_ssetup(char **pbcc_area, __u16 bleft, | |||
352 | 348 | ||
353 | len = strnlen(bcc_ptr, bleft); | 349 | len = strnlen(bcc_ptr, bleft); |
354 | if (len > bleft) | 350 | if (len > bleft) |
355 | return rc; | 351 | return; |
356 | 352 | ||
357 | /* No domain field in LANMAN case. Domain is | 353 | /* No domain field in LANMAN case. Domain is |
358 | returned by old servers in the SMB negprot response */ | 354 | returned by old servers in the SMB negprot response */ |
@@ -360,8 +356,6 @@ static int decode_ascii_ssetup(char **pbcc_area, __u16 bleft, | |||
360 | but thus do return domain here we could add parsing | 356 | but thus do return domain here we could add parsing |
361 | for it later, but it is not very important */ | 357 | for it later, but it is not very important */ |
362 | cifs_dbg(FYI, "ascii: bytes left %d\n", bleft); | 358 | cifs_dbg(FYI, "ascii: bytes left %d\n", bleft); |
363 | |||
364 | return rc; | ||
365 | } | 359 | } |
366 | 360 | ||
367 | int decode_ntlmssp_challenge(char *bcc_ptr, int blob_len, | 361 | int decode_ntlmssp_challenge(char *bcc_ptr, int blob_len, |
@@ -432,8 +426,7 @@ void build_ntlmssp_negotiate_blob(unsigned char *pbuffer, | |||
432 | flags = NTLMSSP_NEGOTIATE_56 | NTLMSSP_REQUEST_TARGET | | 426 | flags = NTLMSSP_NEGOTIATE_56 | NTLMSSP_REQUEST_TARGET | |
433 | NTLMSSP_NEGOTIATE_128 | NTLMSSP_NEGOTIATE_UNICODE | | 427 | NTLMSSP_NEGOTIATE_128 | NTLMSSP_NEGOTIATE_UNICODE | |
434 | NTLMSSP_NEGOTIATE_NTLM | NTLMSSP_NEGOTIATE_EXTENDED_SEC; | 428 | NTLMSSP_NEGOTIATE_NTLM | NTLMSSP_NEGOTIATE_EXTENDED_SEC; |
435 | if (ses->server->sec_mode & | 429 | if (ses->server->sign) { |
436 | (SECMODE_SIGN_REQUIRED | SECMODE_SIGN_ENABLED)) { | ||
437 | flags |= NTLMSSP_NEGOTIATE_SIGN; | 430 | flags |= NTLMSSP_NEGOTIATE_SIGN; |
438 | if (!ses->server->session_estab) | 431 | if (!ses->server->session_estab) |
439 | flags |= NTLMSSP_NEGOTIATE_KEY_XCH; | 432 | flags |= NTLMSSP_NEGOTIATE_KEY_XCH; |
@@ -471,8 +464,7 @@ int build_ntlmssp_auth_blob(unsigned char *pbuffer, | |||
471 | NTLMSSP_REQUEST_TARGET | NTLMSSP_NEGOTIATE_TARGET_INFO | | 464 | NTLMSSP_REQUEST_TARGET | NTLMSSP_NEGOTIATE_TARGET_INFO | |
472 | NTLMSSP_NEGOTIATE_128 | NTLMSSP_NEGOTIATE_UNICODE | | 465 | NTLMSSP_NEGOTIATE_128 | NTLMSSP_NEGOTIATE_UNICODE | |
473 | NTLMSSP_NEGOTIATE_NTLM | NTLMSSP_NEGOTIATE_EXTENDED_SEC; | 466 | NTLMSSP_NEGOTIATE_NTLM | NTLMSSP_NEGOTIATE_EXTENDED_SEC; |
474 | if (ses->server->sec_mode & | 467 | if (ses->server->sign) { |
475 | (SECMODE_SIGN_REQUIRED | SECMODE_SIGN_ENABLED)) { | ||
476 | flags |= NTLMSSP_NEGOTIATE_SIGN; | 468 | flags |= NTLMSSP_NEGOTIATE_SIGN; |
477 | if (!ses->server->session_estab) | 469 | if (!ses->server->session_estab) |
478 | flags |= NTLMSSP_NEGOTIATE_KEY_XCH; | 470 | flags |= NTLMSSP_NEGOTIATE_KEY_XCH; |
@@ -558,6 +550,56 @@ setup_ntlmv2_ret: | |||
558 | return rc; | 550 | return rc; |
559 | } | 551 | } |
560 | 552 | ||
553 | enum securityEnum | ||
554 | select_sectype(struct TCP_Server_Info *server, enum securityEnum requested) | ||
555 | { | ||
556 | switch (server->negflavor) { | ||
557 | case CIFS_NEGFLAVOR_EXTENDED: | ||
558 | switch (requested) { | ||
559 | case Kerberos: | ||
560 | case RawNTLMSSP: | ||
561 | return requested; | ||
562 | case Unspecified: | ||
563 | if (server->sec_ntlmssp && | ||
564 | (global_secflags & CIFSSEC_MAY_NTLMSSP)) | ||
565 | return RawNTLMSSP; | ||
566 | if ((server->sec_kerberos || server->sec_mskerberos) && | ||
567 | (global_secflags & CIFSSEC_MAY_KRB5)) | ||
568 | return Kerberos; | ||
569 | /* Fallthrough */ | ||
570 | default: | ||
571 | return Unspecified; | ||
572 | } | ||
573 | case CIFS_NEGFLAVOR_UNENCAP: | ||
574 | switch (requested) { | ||
575 | case NTLM: | ||
576 | case NTLMv2: | ||
577 | return requested; | ||
578 | case Unspecified: | ||
579 | if (global_secflags & CIFSSEC_MAY_NTLMV2) | ||
580 | return NTLMv2; | ||
581 | if (global_secflags & CIFSSEC_MAY_NTLM) | ||
582 | return NTLM; | ||
583 | /* Fallthrough */ | ||
584 | default: | ||
585 | return Unspecified; | ||
586 | } | ||
587 | case CIFS_NEGFLAVOR_LANMAN: | ||
588 | switch (requested) { | ||
589 | case LANMAN: | ||
590 | return requested; | ||
591 | case Unspecified: | ||
592 | if (global_secflags & CIFSSEC_MAY_LANMAN) | ||
593 | return LANMAN; | ||
594 | /* Fallthrough */ | ||
595 | default: | ||
596 | return Unspecified; | ||
597 | } | ||
598 | default: | ||
599 | return Unspecified; | ||
600 | } | ||
601 | } | ||
602 | |||
561 | int | 603 | int |
562 | CIFS_SessSetup(const unsigned int xid, struct cifs_ses *ses, | 604 | CIFS_SessSetup(const unsigned int xid, struct cifs_ses *ses, |
563 | const struct nls_table *nls_cp) | 605 | const struct nls_table *nls_cp) |
@@ -579,11 +621,18 @@ CIFS_SessSetup(const unsigned int xid, struct cifs_ses *ses, | |||
579 | u16 blob_len; | 621 | u16 blob_len; |
580 | char *ntlmsspblob = NULL; | 622 | char *ntlmsspblob = NULL; |
581 | 623 | ||
582 | if (ses == NULL) | 624 | if (ses == NULL) { |
625 | WARN(1, "%s: ses == NULL!", __func__); | ||
583 | return -EINVAL; | 626 | return -EINVAL; |
627 | } | ||
584 | 628 | ||
585 | type = ses->server->secType; | 629 | type = select_sectype(ses->server, ses->sectype); |
586 | cifs_dbg(FYI, "sess setup type %d\n", type); | 630 | cifs_dbg(FYI, "sess setup type %d\n", type); |
631 | if (type == Unspecified) { | ||
632 | cifs_dbg(VFS, "Unable to select appropriate authentication method!"); | ||
633 | return -EINVAL; | ||
634 | } | ||
635 | |||
587 | if (type == RawNTLMSSP) { | 636 | if (type == RawNTLMSSP) { |
588 | /* if memory allocation is successful, caller of this function | 637 | /* if memory allocation is successful, caller of this function |
589 | * frees it. | 638 | * frees it. |
@@ -643,8 +692,6 @@ ssetup_ntlmssp_authenticate: | |||
643 | } | 692 | } |
644 | bcc_ptr = str_area; | 693 | bcc_ptr = str_area; |
645 | 694 | ||
646 | ses->flags &= ~CIFS_SES_LANMAN; | ||
647 | |||
648 | iov[1].iov_base = NULL; | 695 | iov[1].iov_base = NULL; |
649 | iov[1].iov_len = 0; | 696 | iov[1].iov_len = 0; |
650 | 697 | ||
@@ -668,7 +715,6 @@ ssetup_ntlmssp_authenticate: | |||
668 | ses->server->sec_mode & SECMODE_PW_ENCRYPT ? | 715 | ses->server->sec_mode & SECMODE_PW_ENCRYPT ? |
669 | true : false, lnm_session_key); | 716 | true : false, lnm_session_key); |
670 | 717 | ||
671 | ses->flags |= CIFS_SES_LANMAN; | ||
672 | memcpy(bcc_ptr, (char *)lnm_session_key, CIFS_AUTH_RESP_SIZE); | 718 | memcpy(bcc_ptr, (char *)lnm_session_key, CIFS_AUTH_RESP_SIZE); |
673 | bcc_ptr += CIFS_AUTH_RESP_SIZE; | 719 | bcc_ptr += CIFS_AUTH_RESP_SIZE; |
674 | 720 | ||
@@ -938,8 +984,7 @@ ssetup_ntlmssp_authenticate: | |||
938 | } | 984 | } |
939 | decode_unicode_ssetup(&bcc_ptr, bytes_remaining, ses, nls_cp); | 985 | decode_unicode_ssetup(&bcc_ptr, bytes_remaining, ses, nls_cp); |
940 | } else { | 986 | } else { |
941 | rc = decode_ascii_ssetup(&bcc_ptr, bytes_remaining, | 987 | decode_ascii_ssetup(&bcc_ptr, bytes_remaining, ses, nls_cp); |
942 | ses, nls_cp); | ||
943 | } | 988 | } |
944 | 989 | ||
945 | ssetup_exit: | 990 | ssetup_exit: |
diff --git a/fs/cifs/smb1ops.c b/fs/cifs/smb1ops.c index 3efdb9d5c0b8..60943978aec3 100644 --- a/fs/cifs/smb1ops.c +++ b/fs/cifs/smb1ops.c | |||
@@ -449,8 +449,7 @@ cifs_negotiate_wsize(struct cifs_tcon *tcon, struct smb_vol *volume_info) | |||
449 | * WRITEX header, not including the 4 byte RFC1001 length. | 449 | * WRITEX header, not including the 4 byte RFC1001 length. |
450 | */ | 450 | */ |
451 | if (!(server->capabilities & CAP_LARGE_WRITE_X) || | 451 | if (!(server->capabilities & CAP_LARGE_WRITE_X) || |
452 | (!(server->capabilities & CAP_UNIX) && | 452 | (!(server->capabilities & CAP_UNIX) && server->sign)) |
453 | (server->sec_mode & (SECMODE_SIGN_ENABLED|SECMODE_SIGN_REQUIRED)))) | ||
454 | wsize = min_t(unsigned int, wsize, | 453 | wsize = min_t(unsigned int, wsize, |
455 | server->maxBuf - sizeof(WRITE_REQ) + 4); | 454 | server->maxBuf - sizeof(WRITE_REQ) + 4); |
456 | 455 | ||
@@ -675,20 +674,23 @@ cifs_mkdir_setinfo(struct inode *inode, const char *full_path, | |||
675 | } | 674 | } |
676 | 675 | ||
677 | static int | 676 | static int |
678 | cifs_open_file(const unsigned int xid, struct cifs_tcon *tcon, const char *path, | 677 | cifs_open_file(const unsigned int xid, struct cifs_open_parms *oparms, |
679 | int disposition, int desired_access, int create_options, | 678 | __u32 *oplock, FILE_ALL_INFO *buf) |
680 | struct cifs_fid *fid, __u32 *oplock, FILE_ALL_INFO *buf, | 679 | { |
681 | struct cifs_sb_info *cifs_sb) | 680 | if (!(oparms->tcon->ses->capabilities & CAP_NT_SMBS)) |
682 | { | 681 | return SMBLegacyOpen(xid, oparms->tcon, oparms->path, |
683 | if (!(tcon->ses->capabilities & CAP_NT_SMBS)) | 682 | oparms->disposition, |
684 | return SMBLegacyOpen(xid, tcon, path, disposition, | 683 | oparms->desired_access, |
685 | desired_access, create_options, | 684 | oparms->create_options, |
686 | &fid->netfid, oplock, buf, | 685 | &oparms->fid->netfid, oplock, buf, |
687 | cifs_sb->local_nls, cifs_sb->mnt_cifs_flags | 686 | oparms->cifs_sb->local_nls, |
687 | oparms->cifs_sb->mnt_cifs_flags | ||
688 | & CIFS_MOUNT_MAP_SPECIAL_CHR); | 688 | & CIFS_MOUNT_MAP_SPECIAL_CHR); |
689 | return CIFSSMBOpen(xid, tcon, path, disposition, desired_access, | 689 | return CIFSSMBOpen(xid, oparms->tcon, oparms->path, |
690 | create_options, &fid->netfid, oplock, buf, | 690 | oparms->disposition, oparms->desired_access, |
691 | cifs_sb->local_nls, cifs_sb->mnt_cifs_flags & | 691 | oparms->create_options, &oparms->fid->netfid, oplock, |
692 | buf, oparms->cifs_sb->local_nls, | ||
693 | oparms->cifs_sb->mnt_cifs_flags & | ||
692 | CIFS_MOUNT_MAP_SPECIAL_CHR); | 694 | CIFS_MOUNT_MAP_SPECIAL_CHR); |
693 | } | 695 | } |
694 | 696 | ||
@@ -765,20 +767,14 @@ smb_set_file_info(struct inode *inode, const char *full_path, | |||
765 | } | 767 | } |
766 | tcon = tlink_tcon(tlink); | 768 | tcon = tlink_tcon(tlink); |
767 | 769 | ||
768 | /* | 770 | rc = CIFSSMBSetPathInfo(xid, tcon, full_path, buf, cifs_sb->local_nls, |
769 | * NT4 apparently returns success on this call, but it doesn't really | ||
770 | * work. | ||
771 | */ | ||
772 | if (!(tcon->ses->flags & CIFS_SES_NT4)) { | ||
773 | rc = CIFSSMBSetPathInfo(xid, tcon, full_path, buf, | ||
774 | cifs_sb->local_nls, | ||
775 | cifs_sb->mnt_cifs_flags & | 771 | cifs_sb->mnt_cifs_flags & |
776 | CIFS_MOUNT_MAP_SPECIAL_CHR); | 772 | CIFS_MOUNT_MAP_SPECIAL_CHR); |
777 | if (rc == 0) { | 773 | if (rc == 0) { |
778 | cinode->cifsAttrs = le32_to_cpu(buf->Attributes); | 774 | cinode->cifsAttrs = le32_to_cpu(buf->Attributes); |
779 | goto out; | 775 | goto out; |
780 | } else if (rc != -EOPNOTSUPP && rc != -EINVAL) | 776 | } else if (rc != -EOPNOTSUPP && rc != -EINVAL) { |
781 | goto out; | 777 | goto out; |
782 | } | 778 | } |
783 | 779 | ||
784 | cifs_dbg(FYI, "calling SetFileInfo since SetPathInfo for times not supported by this server\n"); | 780 | cifs_dbg(FYI, "calling SetFileInfo since SetPathInfo for times not supported by this server\n"); |
@@ -948,6 +944,7 @@ struct smb_version_operations smb1_operations = { | |||
948 | .mand_lock = cifs_mand_lock, | 944 | .mand_lock = cifs_mand_lock, |
949 | .mand_unlock_range = cifs_unlock_range, | 945 | .mand_unlock_range = cifs_unlock_range, |
950 | .push_mand_locks = cifs_push_mandatory_locks, | 946 | .push_mand_locks = cifs_push_mandatory_locks, |
947 | .query_mf_symlink = open_query_close_cifs_symlink, | ||
951 | }; | 948 | }; |
952 | 949 | ||
953 | struct smb_version_values smb1_values = { | 950 | struct smb_version_values smb1_values = { |
@@ -964,4 +961,6 @@ struct smb_version_values smb1_values = { | |||
964 | .cap_nt_find = CAP_NT_SMBS | CAP_NT_FIND, | 961 | .cap_nt_find = CAP_NT_SMBS | CAP_NT_FIND, |
965 | .cap_large_files = CAP_LARGE_FILES, | 962 | .cap_large_files = CAP_LARGE_FILES, |
966 | .oplock_read = OPLOCK_READ, | 963 | .oplock_read = OPLOCK_READ, |
964 | .signing_enabled = SECMODE_SIGN_ENABLED, | ||
965 | .signing_required = SECMODE_SIGN_REQUIRED, | ||
967 | }; | 966 | }; |
diff --git a/fs/cifs/smb2file.c b/fs/cifs/smb2file.c index 5da1b55a2258..04a81a4142c3 100644 --- a/fs/cifs/smb2file.c +++ b/fs/cifs/smb2file.c | |||
@@ -40,7 +40,8 @@ smb2_set_oplock_level(struct cifsInodeInfo *cinode, __u32 oplock) | |||
40 | oplock &= 0xFF; | 40 | oplock &= 0xFF; |
41 | if (oplock == SMB2_OPLOCK_LEVEL_NOCHANGE) | 41 | if (oplock == SMB2_OPLOCK_LEVEL_NOCHANGE) |
42 | return; | 42 | return; |
43 | if (oplock == SMB2_OPLOCK_LEVEL_EXCLUSIVE) { | 43 | if (oplock == SMB2_OPLOCK_LEVEL_EXCLUSIVE || |
44 | oplock == SMB2_OPLOCK_LEVEL_BATCH) { | ||
44 | cinode->clientCanCacheAll = true; | 45 | cinode->clientCanCacheAll = true; |
45 | cinode->clientCanCacheRead = true; | 46 | cinode->clientCanCacheRead = true; |
46 | cifs_dbg(FYI, "Exclusive Oplock granted on inode %p\n", | 47 | cifs_dbg(FYI, "Exclusive Oplock granted on inode %p\n", |
@@ -57,17 +58,16 @@ smb2_set_oplock_level(struct cifsInodeInfo *cinode, __u32 oplock) | |||
57 | } | 58 | } |
58 | 59 | ||
59 | int | 60 | int |
60 | smb2_open_file(const unsigned int xid, struct cifs_tcon *tcon, const char *path, | 61 | smb2_open_file(const unsigned int xid, struct cifs_open_parms *oparms, |
61 | int disposition, int desired_access, int create_options, | 62 | __u32 *oplock, FILE_ALL_INFO *buf) |
62 | struct cifs_fid *fid, __u32 *oplock, FILE_ALL_INFO *buf, | ||
63 | struct cifs_sb_info *cifs_sb) | ||
64 | { | 63 | { |
65 | int rc; | 64 | int rc; |
66 | __le16 *smb2_path; | 65 | __le16 *smb2_path; |
67 | struct smb2_file_all_info *smb2_data = NULL; | 66 | struct smb2_file_all_info *smb2_data = NULL; |
68 | __u8 smb2_oplock[17]; | 67 | __u8 smb2_oplock[17]; |
68 | struct cifs_fid *fid = oparms->fid; | ||
69 | 69 | ||
70 | smb2_path = cifs_convert_path_to_utf16(path, cifs_sb); | 70 | smb2_path = cifs_convert_path_to_utf16(oparms->path, oparms->cifs_sb); |
71 | if (smb2_path == NULL) { | 71 | if (smb2_path == NULL) { |
72 | rc = -ENOMEM; | 72 | rc = -ENOMEM; |
73 | goto out; | 73 | goto out; |
@@ -80,21 +80,19 @@ smb2_open_file(const unsigned int xid, struct cifs_tcon *tcon, const char *path, | |||
80 | goto out; | 80 | goto out; |
81 | } | 81 | } |
82 | 82 | ||
83 | desired_access |= FILE_READ_ATTRIBUTES; | 83 | oparms->desired_access |= FILE_READ_ATTRIBUTES; |
84 | *smb2_oplock = SMB2_OPLOCK_LEVEL_EXCLUSIVE; | 84 | *smb2_oplock = SMB2_OPLOCK_LEVEL_BATCH; |
85 | 85 | ||
86 | if (tcon->ses->server->capabilities & SMB2_GLOBAL_CAP_LEASING) | 86 | if (oparms->tcon->ses->server->capabilities & SMB2_GLOBAL_CAP_LEASING) |
87 | memcpy(smb2_oplock + 1, fid->lease_key, SMB2_LEASE_KEY_SIZE); | 87 | memcpy(smb2_oplock + 1, fid->lease_key, SMB2_LEASE_KEY_SIZE); |
88 | 88 | ||
89 | rc = SMB2_open(xid, tcon, smb2_path, &fid->persistent_fid, | 89 | rc = SMB2_open(xid, oparms, smb2_path, smb2_oplock, smb2_data); |
90 | &fid->volatile_fid, desired_access, disposition, | ||
91 | 0, 0, smb2_oplock, smb2_data); | ||
92 | if (rc) | 90 | if (rc) |
93 | goto out; | 91 | goto out; |
94 | 92 | ||
95 | if (buf) { | 93 | if (buf) { |
96 | /* open response does not have IndexNumber field - get it */ | 94 | /* open response does not have IndexNumber field - get it */ |
97 | rc = SMB2_get_srv_num(xid, tcon, fid->persistent_fid, | 95 | rc = SMB2_get_srv_num(xid, oparms->tcon, fid->persistent_fid, |
98 | fid->volatile_fid, | 96 | fid->volatile_fid, |
99 | &smb2_data->IndexNumber); | 97 | &smb2_data->IndexNumber); |
100 | if (rc) { | 98 | if (rc) { |
diff --git a/fs/cifs/smb2glob.h b/fs/cifs/smb2glob.h index 7c0e2143e775..c38350851b08 100644 --- a/fs/cifs/smb2glob.h +++ b/fs/cifs/smb2glob.h | |||
@@ -54,5 +54,7 @@ | |||
54 | #define SMB2_SIGNATURE_SIZE (16) | 54 | #define SMB2_SIGNATURE_SIZE (16) |
55 | #define SMB2_NTLMV2_SESSKEY_SIZE (16) | 55 | #define SMB2_NTLMV2_SESSKEY_SIZE (16) |
56 | #define SMB2_HMACSHA256_SIZE (32) | 56 | #define SMB2_HMACSHA256_SIZE (32) |
57 | #define SMB2_CMACAES_SIZE (16) | ||
58 | #define SMB3_SIGNKEY_SIZE (16) | ||
57 | 59 | ||
58 | #endif /* _SMB2_GLOB_H */ | 60 | #endif /* _SMB2_GLOB_H */ |
diff --git a/fs/cifs/smb2inode.c b/fs/cifs/smb2inode.c index fff6dfba6204..c6ec1633309a 100644 --- a/fs/cifs/smb2inode.c +++ b/fs/cifs/smb2inode.c | |||
@@ -41,21 +41,26 @@ static int | |||
41 | smb2_open_op_close(const unsigned int xid, struct cifs_tcon *tcon, | 41 | smb2_open_op_close(const unsigned int xid, struct cifs_tcon *tcon, |
42 | struct cifs_sb_info *cifs_sb, const char *full_path, | 42 | struct cifs_sb_info *cifs_sb, const char *full_path, |
43 | __u32 desired_access, __u32 create_disposition, | 43 | __u32 desired_access, __u32 create_disposition, |
44 | __u32 file_attributes, __u32 create_options, | 44 | __u32 create_options, void *data, int command) |
45 | void *data, int command) | ||
46 | { | 45 | { |
47 | int rc, tmprc = 0; | 46 | int rc, tmprc = 0; |
48 | u64 persistent_fid, volatile_fid; | ||
49 | __le16 *utf16_path; | 47 | __le16 *utf16_path; |
50 | __u8 oplock = SMB2_OPLOCK_LEVEL_NONE; | 48 | __u8 oplock = SMB2_OPLOCK_LEVEL_NONE; |
49 | struct cifs_open_parms oparms; | ||
50 | struct cifs_fid fid; | ||
51 | 51 | ||
52 | utf16_path = cifs_convert_path_to_utf16(full_path, cifs_sb); | 52 | utf16_path = cifs_convert_path_to_utf16(full_path, cifs_sb); |
53 | if (!utf16_path) | 53 | if (!utf16_path) |
54 | return -ENOMEM; | 54 | return -ENOMEM; |
55 | 55 | ||
56 | rc = SMB2_open(xid, tcon, utf16_path, &persistent_fid, &volatile_fid, | 56 | oparms.tcon = tcon; |
57 | desired_access, create_disposition, file_attributes, | 57 | oparms.desired_access = desired_access; |
58 | create_options, &oplock, NULL); | 58 | oparms.disposition = create_disposition; |
59 | oparms.create_options = create_options; | ||
60 | oparms.fid = &fid; | ||
61 | oparms.reconnect = false; | ||
62 | |||
63 | rc = SMB2_open(xid, &oparms, utf16_path, &oplock, NULL); | ||
59 | if (rc) { | 64 | if (rc) { |
60 | kfree(utf16_path); | 65 | kfree(utf16_path); |
61 | return rc; | 66 | return rc; |
@@ -65,8 +70,8 @@ smb2_open_op_close(const unsigned int xid, struct cifs_tcon *tcon, | |||
65 | case SMB2_OP_DELETE: | 70 | case SMB2_OP_DELETE: |
66 | break; | 71 | break; |
67 | case SMB2_OP_QUERY_INFO: | 72 | case SMB2_OP_QUERY_INFO: |
68 | tmprc = SMB2_query_info(xid, tcon, persistent_fid, | 73 | tmprc = SMB2_query_info(xid, tcon, fid.persistent_fid, |
69 | volatile_fid, | 74 | fid.volatile_fid, |
70 | (struct smb2_file_all_info *)data); | 75 | (struct smb2_file_all_info *)data); |
71 | break; | 76 | break; |
72 | case SMB2_OP_MKDIR: | 77 | case SMB2_OP_MKDIR: |
@@ -76,19 +81,21 @@ smb2_open_op_close(const unsigned int xid, struct cifs_tcon *tcon, | |||
76 | */ | 81 | */ |
77 | break; | 82 | break; |
78 | case SMB2_OP_RENAME: | 83 | case SMB2_OP_RENAME: |
79 | tmprc = SMB2_rename(xid, tcon, persistent_fid, volatile_fid, | 84 | tmprc = SMB2_rename(xid, tcon, fid.persistent_fid, |
80 | (__le16 *)data); | 85 | fid.volatile_fid, (__le16 *)data); |
81 | break; | 86 | break; |
82 | case SMB2_OP_HARDLINK: | 87 | case SMB2_OP_HARDLINK: |
83 | tmprc = SMB2_set_hardlink(xid, tcon, persistent_fid, | 88 | tmprc = SMB2_set_hardlink(xid, tcon, fid.persistent_fid, |
84 | volatile_fid, (__le16 *)data); | 89 | fid.volatile_fid, (__le16 *)data); |
85 | break; | 90 | break; |
86 | case SMB2_OP_SET_EOF: | 91 | case SMB2_OP_SET_EOF: |
87 | tmprc = SMB2_set_eof(xid, tcon, persistent_fid, volatile_fid, | 92 | tmprc = SMB2_set_eof(xid, tcon, fid.persistent_fid, |
88 | current->tgid, (__le64 *)data); | 93 | fid.volatile_fid, current->tgid, |
94 | (__le64 *)data); | ||
89 | break; | 95 | break; |
90 | case SMB2_OP_SET_INFO: | 96 | case SMB2_OP_SET_INFO: |
91 | tmprc = SMB2_set_info(xid, tcon, persistent_fid, volatile_fid, | 97 | tmprc = SMB2_set_info(xid, tcon, fid.persistent_fid, |
98 | fid.volatile_fid, | ||
92 | (FILE_BASIC_INFO *)data); | 99 | (FILE_BASIC_INFO *)data); |
93 | break; | 100 | break; |
94 | default: | 101 | default: |
@@ -96,7 +103,7 @@ smb2_open_op_close(const unsigned int xid, struct cifs_tcon *tcon, | |||
96 | break; | 103 | break; |
97 | } | 104 | } |
98 | 105 | ||
99 | rc = SMB2_close(xid, tcon, persistent_fid, volatile_fid); | 106 | rc = SMB2_close(xid, tcon, fid.persistent_fid, fid.volatile_fid); |
100 | if (tmprc) | 107 | if (tmprc) |
101 | rc = tmprc; | 108 | rc = tmprc; |
102 | kfree(utf16_path); | 109 | kfree(utf16_path); |
@@ -129,8 +136,8 @@ smb2_query_path_info(const unsigned int xid, struct cifs_tcon *tcon, | |||
129 | return -ENOMEM; | 136 | return -ENOMEM; |
130 | 137 | ||
131 | rc = smb2_open_op_close(xid, tcon, cifs_sb, full_path, | 138 | rc = smb2_open_op_close(xid, tcon, cifs_sb, full_path, |
132 | FILE_READ_ATTRIBUTES, FILE_OPEN, 0, 0, | 139 | FILE_READ_ATTRIBUTES, FILE_OPEN, 0, smb2_data, |
133 | smb2_data, SMB2_OP_QUERY_INFO); | 140 | SMB2_OP_QUERY_INFO); |
134 | if (rc) | 141 | if (rc) |
135 | goto out; | 142 | goto out; |
136 | 143 | ||
@@ -145,7 +152,7 @@ smb2_mkdir(const unsigned int xid, struct cifs_tcon *tcon, const char *name, | |||
145 | struct cifs_sb_info *cifs_sb) | 152 | struct cifs_sb_info *cifs_sb) |
146 | { | 153 | { |
147 | return smb2_open_op_close(xid, tcon, cifs_sb, name, | 154 | return smb2_open_op_close(xid, tcon, cifs_sb, name, |
148 | FILE_WRITE_ATTRIBUTES, FILE_CREATE, 0, | 155 | FILE_WRITE_ATTRIBUTES, FILE_CREATE, |
149 | CREATE_NOT_FILE, NULL, SMB2_OP_MKDIR); | 156 | CREATE_NOT_FILE, NULL, SMB2_OP_MKDIR); |
150 | } | 157 | } |
151 | 158 | ||
@@ -164,7 +171,7 @@ smb2_mkdir_setinfo(struct inode *inode, const char *name, | |||
164 | dosattrs = cifs_i->cifsAttrs | ATTR_READONLY; | 171 | dosattrs = cifs_i->cifsAttrs | ATTR_READONLY; |
165 | data.Attributes = cpu_to_le32(dosattrs); | 172 | data.Attributes = cpu_to_le32(dosattrs); |
166 | tmprc = smb2_open_op_close(xid, tcon, cifs_sb, name, | 173 | tmprc = smb2_open_op_close(xid, tcon, cifs_sb, name, |
167 | FILE_WRITE_ATTRIBUTES, FILE_CREATE, 0, | 174 | FILE_WRITE_ATTRIBUTES, FILE_CREATE, |
168 | CREATE_NOT_FILE, &data, SMB2_OP_SET_INFO); | 175 | CREATE_NOT_FILE, &data, SMB2_OP_SET_INFO); |
169 | if (tmprc == 0) | 176 | if (tmprc == 0) |
170 | cifs_i->cifsAttrs = dosattrs; | 177 | cifs_i->cifsAttrs = dosattrs; |
@@ -175,7 +182,7 @@ smb2_rmdir(const unsigned int xid, struct cifs_tcon *tcon, const char *name, | |||
175 | struct cifs_sb_info *cifs_sb) | 182 | struct cifs_sb_info *cifs_sb) |
176 | { | 183 | { |
177 | return smb2_open_op_close(xid, tcon, cifs_sb, name, DELETE, FILE_OPEN, | 184 | return smb2_open_op_close(xid, tcon, cifs_sb, name, DELETE, FILE_OPEN, |
178 | 0, CREATE_NOT_FILE | CREATE_DELETE_ON_CLOSE, | 185 | CREATE_NOT_FILE | CREATE_DELETE_ON_CLOSE, |
179 | NULL, SMB2_OP_DELETE); | 186 | NULL, SMB2_OP_DELETE); |
180 | } | 187 | } |
181 | 188 | ||
@@ -184,7 +191,7 @@ smb2_unlink(const unsigned int xid, struct cifs_tcon *tcon, const char *name, | |||
184 | struct cifs_sb_info *cifs_sb) | 191 | struct cifs_sb_info *cifs_sb) |
185 | { | 192 | { |
186 | return smb2_open_op_close(xid, tcon, cifs_sb, name, DELETE, FILE_OPEN, | 193 | return smb2_open_op_close(xid, tcon, cifs_sb, name, DELETE, FILE_OPEN, |
187 | 0, CREATE_DELETE_ON_CLOSE, NULL, | 194 | CREATE_DELETE_ON_CLOSE, NULL, |
188 | SMB2_OP_DELETE); | 195 | SMB2_OP_DELETE); |
189 | } | 196 | } |
190 | 197 | ||
@@ -203,7 +210,7 @@ smb2_set_path_attr(const unsigned int xid, struct cifs_tcon *tcon, | |||
203 | } | 210 | } |
204 | 211 | ||
205 | rc = smb2_open_op_close(xid, tcon, cifs_sb, from_name, access, | 212 | rc = smb2_open_op_close(xid, tcon, cifs_sb, from_name, access, |
206 | FILE_OPEN, 0, 0, smb2_to_name, command); | 213 | FILE_OPEN, 0, smb2_to_name, command); |
207 | smb2_rename_path: | 214 | smb2_rename_path: |
208 | kfree(smb2_to_name); | 215 | kfree(smb2_to_name); |
209 | return rc; | 216 | return rc; |
@@ -234,7 +241,7 @@ smb2_set_path_size(const unsigned int xid, struct cifs_tcon *tcon, | |||
234 | { | 241 | { |
235 | __le64 eof = cpu_to_le64(size); | 242 | __le64 eof = cpu_to_le64(size); |
236 | return smb2_open_op_close(xid, tcon, cifs_sb, full_path, | 243 | return smb2_open_op_close(xid, tcon, cifs_sb, full_path, |
237 | FILE_WRITE_DATA, FILE_OPEN, 0, 0, &eof, | 244 | FILE_WRITE_DATA, FILE_OPEN, 0, &eof, |
238 | SMB2_OP_SET_EOF); | 245 | SMB2_OP_SET_EOF); |
239 | } | 246 | } |
240 | 247 | ||
@@ -250,7 +257,7 @@ smb2_set_file_info(struct inode *inode, const char *full_path, | |||
250 | if (IS_ERR(tlink)) | 257 | if (IS_ERR(tlink)) |
251 | return PTR_ERR(tlink); | 258 | return PTR_ERR(tlink); |
252 | rc = smb2_open_op_close(xid, tlink_tcon(tlink), cifs_sb, full_path, | 259 | rc = smb2_open_op_close(xid, tlink_tcon(tlink), cifs_sb, full_path, |
253 | FILE_WRITE_ATTRIBUTES, FILE_OPEN, 0, 0, buf, | 260 | FILE_WRITE_ATTRIBUTES, FILE_OPEN, 0, buf, |
254 | SMB2_OP_SET_INFO); | 261 | SMB2_OP_SET_INFO); |
255 | cifs_put_tlink(tlink); | 262 | cifs_put_tlink(tlink); |
256 | return rc; | 263 | return rc; |
diff --git a/fs/cifs/smb2misc.c b/fs/cifs/smb2misc.c index 10383d8c015b..b0c43345cd98 100644 --- a/fs/cifs/smb2misc.c +++ b/fs/cifs/smb2misc.c | |||
@@ -266,6 +266,10 @@ smb2_get_data_area_len(int *off, int *len, struct smb2_hdr *hdr) | |||
266 | ((struct smb2_query_directory_rsp *)hdr)->OutputBufferLength); | 266 | ((struct smb2_query_directory_rsp *)hdr)->OutputBufferLength); |
267 | break; | 267 | break; |
268 | case SMB2_IOCTL: | 268 | case SMB2_IOCTL: |
269 | *off = le32_to_cpu( | ||
270 | ((struct smb2_ioctl_rsp *)hdr)->OutputOffset); | ||
271 | *len = le32_to_cpu(((struct smb2_ioctl_rsp *)hdr)->OutputCount); | ||
272 | break; | ||
269 | case SMB2_CHANGE_NOTIFY: | 273 | case SMB2_CHANGE_NOTIFY: |
270 | default: | 274 | default: |
271 | /* BB FIXME for unimplemented cases above */ | 275 | /* BB FIXME for unimplemented cases above */ |
diff --git a/fs/cifs/smb2ops.c b/fs/cifs/smb2ops.c index f2e76f3b0c61..f259e6cc8357 100644 --- a/fs/cifs/smb2ops.c +++ b/fs/cifs/smb2ops.c | |||
@@ -213,22 +213,29 @@ smb2_is_path_accessible(const unsigned int xid, struct cifs_tcon *tcon, | |||
213 | struct cifs_sb_info *cifs_sb, const char *full_path) | 213 | struct cifs_sb_info *cifs_sb, const char *full_path) |
214 | { | 214 | { |
215 | int rc; | 215 | int rc; |
216 | __u64 persistent_fid, volatile_fid; | ||
217 | __le16 *utf16_path; | 216 | __le16 *utf16_path; |
218 | __u8 oplock = SMB2_OPLOCK_LEVEL_NONE; | 217 | __u8 oplock = SMB2_OPLOCK_LEVEL_NONE; |
218 | struct cifs_open_parms oparms; | ||
219 | struct cifs_fid fid; | ||
219 | 220 | ||
220 | utf16_path = cifs_convert_path_to_utf16(full_path, cifs_sb); | 221 | utf16_path = cifs_convert_path_to_utf16(full_path, cifs_sb); |
221 | if (!utf16_path) | 222 | if (!utf16_path) |
222 | return -ENOMEM; | 223 | return -ENOMEM; |
223 | 224 | ||
224 | rc = SMB2_open(xid, tcon, utf16_path, &persistent_fid, &volatile_fid, | 225 | oparms.tcon = tcon; |
225 | FILE_READ_ATTRIBUTES, FILE_OPEN, 0, 0, &oplock, NULL); | 226 | oparms.desired_access = FILE_READ_ATTRIBUTES; |
227 | oparms.disposition = FILE_OPEN; | ||
228 | oparms.create_options = 0; | ||
229 | oparms.fid = &fid; | ||
230 | oparms.reconnect = false; | ||
231 | |||
232 | rc = SMB2_open(xid, &oparms, utf16_path, &oplock, NULL); | ||
226 | if (rc) { | 233 | if (rc) { |
227 | kfree(utf16_path); | 234 | kfree(utf16_path); |
228 | return rc; | 235 | return rc; |
229 | } | 236 | } |
230 | 237 | ||
231 | rc = SMB2_close(xid, tcon, persistent_fid, volatile_fid); | 238 | rc = SMB2_close(xid, tcon, fid.persistent_fid, fid.volatile_fid); |
232 | kfree(utf16_path); | 239 | kfree(utf16_path); |
233 | return rc; | 240 | return rc; |
234 | } | 241 | } |
@@ -281,6 +288,25 @@ smb2_clear_stats(struct cifs_tcon *tcon) | |||
281 | } | 288 | } |
282 | 289 | ||
283 | static void | 290 | static void |
291 | smb2_dump_share_caps(struct seq_file *m, struct cifs_tcon *tcon) | ||
292 | { | ||
293 | seq_puts(m, "\n\tShare Capabilities:"); | ||
294 | if (tcon->capabilities & SMB2_SHARE_CAP_DFS) | ||
295 | seq_puts(m, " DFS,"); | ||
296 | if (tcon->capabilities & SMB2_SHARE_CAP_CONTINUOUS_AVAILABILITY) | ||
297 | seq_puts(m, " CONTINUOUS AVAILABILITY,"); | ||
298 | if (tcon->capabilities & SMB2_SHARE_CAP_SCALEOUT) | ||
299 | seq_puts(m, " SCALEOUT,"); | ||
300 | if (tcon->capabilities & SMB2_SHARE_CAP_CLUSTER) | ||
301 | seq_puts(m, " CLUSTER,"); | ||
302 | if (tcon->capabilities & SMB2_SHARE_CAP_ASYMMETRIC) | ||
303 | seq_puts(m, " ASYMMETRIC,"); | ||
304 | if (tcon->capabilities == 0) | ||
305 | seq_puts(m, " None"); | ||
306 | seq_printf(m, "\tShare Flags: 0x%x", tcon->share_flags); | ||
307 | } | ||
308 | |||
309 | static void | ||
284 | smb2_print_stats(struct seq_file *m, struct cifs_tcon *tcon) | 310 | smb2_print_stats(struct seq_file *m, struct cifs_tcon *tcon) |
285 | { | 311 | { |
286 | #ifdef CONFIG_CIFS_STATS | 312 | #ifdef CONFIG_CIFS_STATS |
@@ -292,7 +318,6 @@ smb2_print_stats(struct seq_file *m, struct cifs_tcon *tcon) | |||
292 | seq_printf(m, "\nSessionSetups: %d sent %d failed", | 318 | seq_printf(m, "\nSessionSetups: %d sent %d failed", |
293 | atomic_read(&sent[SMB2_SESSION_SETUP_HE]), | 319 | atomic_read(&sent[SMB2_SESSION_SETUP_HE]), |
294 | atomic_read(&failed[SMB2_SESSION_SETUP_HE])); | 320 | atomic_read(&failed[SMB2_SESSION_SETUP_HE])); |
295 | #define SMB2LOGOFF 0x0002 /* trivial request/resp */ | ||
296 | seq_printf(m, "\nLogoffs: %d sent %d failed", | 321 | seq_printf(m, "\nLogoffs: %d sent %d failed", |
297 | atomic_read(&sent[SMB2_LOGOFF_HE]), | 322 | atomic_read(&sent[SMB2_LOGOFF_HE]), |
298 | atomic_read(&failed[SMB2_LOGOFF_HE])); | 323 | atomic_read(&failed[SMB2_LOGOFF_HE])); |
@@ -425,15 +450,20 @@ smb2_query_dir_first(const unsigned int xid, struct cifs_tcon *tcon, | |||
425 | __le16 *utf16_path; | 450 | __le16 *utf16_path; |
426 | int rc; | 451 | int rc; |
427 | __u8 oplock = SMB2_OPLOCK_LEVEL_NONE; | 452 | __u8 oplock = SMB2_OPLOCK_LEVEL_NONE; |
428 | __u64 persistent_fid, volatile_fid; | 453 | struct cifs_open_parms oparms; |
429 | 454 | ||
430 | utf16_path = cifs_convert_path_to_utf16(path, cifs_sb); | 455 | utf16_path = cifs_convert_path_to_utf16(path, cifs_sb); |
431 | if (!utf16_path) | 456 | if (!utf16_path) |
432 | return -ENOMEM; | 457 | return -ENOMEM; |
433 | 458 | ||
434 | rc = SMB2_open(xid, tcon, utf16_path, &persistent_fid, &volatile_fid, | 459 | oparms.tcon = tcon; |
435 | FILE_READ_ATTRIBUTES | FILE_READ_DATA, FILE_OPEN, 0, 0, | 460 | oparms.desired_access = FILE_READ_ATTRIBUTES | FILE_READ_DATA; |
436 | &oplock, NULL); | 461 | oparms.disposition = FILE_OPEN; |
462 | oparms.create_options = 0; | ||
463 | oparms.fid = fid; | ||
464 | oparms.reconnect = false; | ||
465 | |||
466 | rc = SMB2_open(xid, &oparms, utf16_path, &oplock, NULL); | ||
437 | kfree(utf16_path); | 467 | kfree(utf16_path); |
438 | if (rc) { | 468 | if (rc) { |
439 | cifs_dbg(VFS, "open dir failed\n"); | 469 | cifs_dbg(VFS, "open dir failed\n"); |
@@ -442,14 +472,12 @@ smb2_query_dir_first(const unsigned int xid, struct cifs_tcon *tcon, | |||
442 | 472 | ||
443 | srch_inf->entries_in_buffer = 0; | 473 | srch_inf->entries_in_buffer = 0; |
444 | srch_inf->index_of_last_entry = 0; | 474 | srch_inf->index_of_last_entry = 0; |
445 | fid->persistent_fid = persistent_fid; | ||
446 | fid->volatile_fid = volatile_fid; | ||
447 | 475 | ||
448 | rc = SMB2_query_directory(xid, tcon, persistent_fid, volatile_fid, 0, | 476 | rc = SMB2_query_directory(xid, tcon, fid->persistent_fid, |
449 | srch_inf); | 477 | fid->volatile_fid, 0, srch_inf); |
450 | if (rc) { | 478 | if (rc) { |
451 | cifs_dbg(VFS, "query directory failed\n"); | 479 | cifs_dbg(VFS, "query directory failed\n"); |
452 | SMB2_close(xid, tcon, persistent_fid, volatile_fid); | 480 | SMB2_close(xid, tcon, fid->persistent_fid, fid->volatile_fid); |
453 | } | 481 | } |
454 | return rc; | 482 | return rc; |
455 | } | 483 | } |
@@ -510,17 +538,25 @@ smb2_queryfs(const unsigned int xid, struct cifs_tcon *tcon, | |||
510 | struct kstatfs *buf) | 538 | struct kstatfs *buf) |
511 | { | 539 | { |
512 | int rc; | 540 | int rc; |
513 | u64 persistent_fid, volatile_fid; | ||
514 | __le16 srch_path = 0; /* Null - open root of share */ | 541 | __le16 srch_path = 0; /* Null - open root of share */ |
515 | u8 oplock = SMB2_OPLOCK_LEVEL_NONE; | 542 | u8 oplock = SMB2_OPLOCK_LEVEL_NONE; |
543 | struct cifs_open_parms oparms; | ||
544 | struct cifs_fid fid; | ||
545 | |||
546 | oparms.tcon = tcon; | ||
547 | oparms.desired_access = FILE_READ_ATTRIBUTES; | ||
548 | oparms.disposition = FILE_OPEN; | ||
549 | oparms.create_options = 0; | ||
550 | oparms.fid = &fid; | ||
551 | oparms.reconnect = false; | ||
516 | 552 | ||
517 | rc = SMB2_open(xid, tcon, &srch_path, &persistent_fid, &volatile_fid, | 553 | rc = SMB2_open(xid, &oparms, &srch_path, &oplock, NULL); |
518 | FILE_READ_ATTRIBUTES, FILE_OPEN, 0, 0, &oplock, NULL); | ||
519 | if (rc) | 554 | if (rc) |
520 | return rc; | 555 | return rc; |
521 | buf->f_type = SMB2_MAGIC_NUMBER; | 556 | buf->f_type = SMB2_MAGIC_NUMBER; |
522 | rc = SMB2_QFS_info(xid, tcon, persistent_fid, volatile_fid, buf); | 557 | rc = SMB2_QFS_info(xid, tcon, fid.persistent_fid, fid.volatile_fid, |
523 | SMB2_close(xid, tcon, persistent_fid, volatile_fid); | 558 | buf); |
559 | SMB2_close(xid, tcon, fid.persistent_fid, fid.volatile_fid); | ||
524 | return rc; | 560 | return rc; |
525 | } | 561 | } |
526 | 562 | ||
@@ -645,6 +681,7 @@ struct smb_version_operations smb30_operations = { | |||
645 | .dump_detail = smb2_dump_detail, | 681 | .dump_detail = smb2_dump_detail, |
646 | .clear_stats = smb2_clear_stats, | 682 | .clear_stats = smb2_clear_stats, |
647 | .print_stats = smb2_print_stats, | 683 | .print_stats = smb2_print_stats, |
684 | .dump_share_caps = smb2_dump_share_caps, | ||
648 | .is_oplock_break = smb2_is_valid_oplock_break, | 685 | .is_oplock_break = smb2_is_valid_oplock_break, |
649 | .need_neg = smb2_need_neg, | 686 | .need_neg = smb2_need_neg, |
650 | .negotiate = smb2_negotiate, | 687 | .negotiate = smb2_negotiate, |
@@ -690,6 +727,7 @@ struct smb_version_operations smb30_operations = { | |||
690 | .get_lease_key = smb2_get_lease_key, | 727 | .get_lease_key = smb2_get_lease_key, |
691 | .set_lease_key = smb2_set_lease_key, | 728 | .set_lease_key = smb2_set_lease_key, |
692 | .new_lease_key = smb2_new_lease_key, | 729 | .new_lease_key = smb2_new_lease_key, |
730 | .generate_signingkey = generate_smb3signingkey, | ||
693 | .calc_signature = smb3_calc_signature, | 731 | .calc_signature = smb3_calc_signature, |
694 | }; | 732 | }; |
695 | 733 | ||
@@ -709,6 +747,8 @@ struct smb_version_values smb20_values = { | |||
709 | .cap_nt_find = SMB2_NT_FIND, | 747 | .cap_nt_find = SMB2_NT_FIND, |
710 | .cap_large_files = SMB2_LARGE_FILES, | 748 | .cap_large_files = SMB2_LARGE_FILES, |
711 | .oplock_read = SMB2_OPLOCK_LEVEL_II, | 749 | .oplock_read = SMB2_OPLOCK_LEVEL_II, |
750 | .signing_enabled = SMB2_NEGOTIATE_SIGNING_ENABLED | SMB2_NEGOTIATE_SIGNING_REQUIRED, | ||
751 | .signing_required = SMB2_NEGOTIATE_SIGNING_REQUIRED, | ||
712 | }; | 752 | }; |
713 | 753 | ||
714 | struct smb_version_values smb21_values = { | 754 | struct smb_version_values smb21_values = { |
@@ -727,6 +767,8 @@ struct smb_version_values smb21_values = { | |||
727 | .cap_nt_find = SMB2_NT_FIND, | 767 | .cap_nt_find = SMB2_NT_FIND, |
728 | .cap_large_files = SMB2_LARGE_FILES, | 768 | .cap_large_files = SMB2_LARGE_FILES, |
729 | .oplock_read = SMB2_OPLOCK_LEVEL_II, | 769 | .oplock_read = SMB2_OPLOCK_LEVEL_II, |
770 | .signing_enabled = SMB2_NEGOTIATE_SIGNING_ENABLED | SMB2_NEGOTIATE_SIGNING_REQUIRED, | ||
771 | .signing_required = SMB2_NEGOTIATE_SIGNING_REQUIRED, | ||
730 | }; | 772 | }; |
731 | 773 | ||
732 | struct smb_version_values smb30_values = { | 774 | struct smb_version_values smb30_values = { |
@@ -745,4 +787,26 @@ struct smb_version_values smb30_values = { | |||
745 | .cap_nt_find = SMB2_NT_FIND, | 787 | .cap_nt_find = SMB2_NT_FIND, |
746 | .cap_large_files = SMB2_LARGE_FILES, | 788 | .cap_large_files = SMB2_LARGE_FILES, |
747 | .oplock_read = SMB2_OPLOCK_LEVEL_II, | 789 | .oplock_read = SMB2_OPLOCK_LEVEL_II, |
790 | .signing_enabled = SMB2_NEGOTIATE_SIGNING_ENABLED | SMB2_NEGOTIATE_SIGNING_REQUIRED, | ||
791 | .signing_required = SMB2_NEGOTIATE_SIGNING_REQUIRED, | ||
792 | }; | ||
793 | |||
794 | struct smb_version_values smb302_values = { | ||
795 | .version_string = SMB302_VERSION_STRING, | ||
796 | .protocol_id = SMB302_PROT_ID, | ||
797 | .req_capabilities = SMB2_GLOBAL_CAP_DFS | SMB2_GLOBAL_CAP_LEASING | SMB2_GLOBAL_CAP_LARGE_MTU, | ||
798 | .large_lock_type = 0, | ||
799 | .exclusive_lock_type = SMB2_LOCKFLAG_EXCLUSIVE_LOCK, | ||
800 | .shared_lock_type = SMB2_LOCKFLAG_SHARED_LOCK, | ||
801 | .unlock_lock_type = SMB2_LOCKFLAG_UNLOCK, | ||
802 | .header_size = sizeof(struct smb2_hdr), | ||
803 | .max_header_size = MAX_SMB2_HDR_SIZE, | ||
804 | .read_rsp_size = sizeof(struct smb2_read_rsp) - 1, | ||
805 | .lock_cmd = SMB2_LOCK, | ||
806 | .cap_unix = 0, | ||
807 | .cap_nt_find = SMB2_NT_FIND, | ||
808 | .cap_large_files = SMB2_LARGE_FILES, | ||
809 | .oplock_read = SMB2_OPLOCK_LEVEL_II, | ||
810 | .signing_enabled = SMB2_NEGOTIATE_SIGNING_ENABLED | SMB2_NEGOTIATE_SIGNING_REQUIRED, | ||
811 | .signing_required = SMB2_NEGOTIATE_SIGNING_REQUIRED, | ||
748 | }; | 812 | }; |
diff --git a/fs/cifs/smb2pdu.c b/fs/cifs/smb2pdu.c index 2b95ce2b54e8..abc9c2809b51 100644 --- a/fs/cifs/smb2pdu.c +++ b/fs/cifs/smb2pdu.c | |||
@@ -1,7 +1,7 @@ | |||
1 | /* | 1 | /* |
2 | * fs/cifs/smb2pdu.c | 2 | * fs/cifs/smb2pdu.c |
3 | * | 3 | * |
4 | * Copyright (C) International Business Machines Corp., 2009, 2012 | 4 | * Copyright (C) International Business Machines Corp., 2009, 2013 |
5 | * Etersoft, 2012 | 5 | * Etersoft, 2012 |
6 | * Author(s): Steve French (sfrench@us.ibm.com) | 6 | * Author(s): Steve French (sfrench@us.ibm.com) |
7 | * Pavel Shilovsky (pshilovsky@samba.org) 2012 | 7 | * Pavel Shilovsky (pshilovsky@samba.org) 2012 |
@@ -108,19 +108,33 @@ smb2_hdr_assemble(struct smb2_hdr *hdr, __le16 smb2_cmd /* command */ , | |||
108 | if (!tcon) | 108 | if (!tcon) |
109 | goto out; | 109 | goto out; |
110 | 110 | ||
111 | /* BB FIXME when we do write > 64K add +1 for every 64K in req or rsp */ | ||
112 | /* GLOBAL_CAP_LARGE_MTU will only be set if dialect > SMB2.02 */ | ||
113 | /* See sections 2.2.4 and 3.2.4.1.5 of MS-SMB2 */ | ||
114 | if ((tcon->ses) && | ||
115 | (tcon->ses->server->capabilities & SMB2_GLOBAL_CAP_LARGE_MTU)) | ||
116 | hdr->CreditCharge = cpu_to_le16(1); | ||
117 | /* else CreditCharge MBZ */ | ||
118 | |||
111 | hdr->TreeId = tcon->tid; | 119 | hdr->TreeId = tcon->tid; |
112 | /* Uid is not converted */ | 120 | /* Uid is not converted */ |
113 | if (tcon->ses) | 121 | if (tcon->ses) |
114 | hdr->SessionId = tcon->ses->Suid; | 122 | hdr->SessionId = tcon->ses->Suid; |
115 | /* BB check following DFS flags BB */ | 123 | |
116 | /* BB do we have to add check for SHI1005_FLAGS_DFS_ROOT too? */ | 124 | /* |
117 | if (tcon->share_flags & SHI1005_FLAGS_DFS) | 125 | * If we would set SMB2_FLAGS_DFS_OPERATIONS on open we also would have |
118 | hdr->Flags |= SMB2_FLAGS_DFS_OPERATIONS; | 126 | * to pass the path on the Open SMB prefixed by \\server\share. |
119 | /* BB how does SMB2 do case sensitive? */ | 127 | * Not sure when we would need to do the augmented path (if ever) and |
120 | /* if (tcon->nocase) | 128 | * setting this flag breaks the SMB2 open operation since it is |
121 | hdr->Flags |= SMBFLG_CASELESS; */ | 129 | * illegal to send an empty path name (without \\server\share prefix) |
122 | if (tcon->ses && tcon->ses->server && | 130 | * when the DFS flag is set in the SMB open header. We could |
123 | (tcon->ses->server->sec_mode & SECMODE_SIGN_REQUIRED)) | 131 | * consider setting the flag on all operations other than open |
132 | * but it is safer to net set it for now. | ||
133 | */ | ||
134 | /* if (tcon->share_flags & SHI1005_FLAGS_DFS) | ||
135 | hdr->Flags |= SMB2_FLAGS_DFS_OPERATIONS; */ | ||
136 | |||
137 | if (tcon->ses && tcon->ses->server && tcon->ses->server->sign) | ||
124 | hdr->Flags |= SMB2_FLAGS_SIGNED; | 138 | hdr->Flags |= SMB2_FLAGS_SIGNED; |
125 | out: | 139 | out: |
126 | pdu->StructureSize2 = cpu_to_le16(parmsize); | 140 | pdu->StructureSize2 = cpu_to_le16(parmsize); |
@@ -328,34 +342,22 @@ SMB2_negotiate(const unsigned int xid, struct cifs_ses *ses) | |||
328 | struct kvec iov[1]; | 342 | struct kvec iov[1]; |
329 | int rc = 0; | 343 | int rc = 0; |
330 | int resp_buftype; | 344 | int resp_buftype; |
331 | struct TCP_Server_Info *server; | 345 | struct TCP_Server_Info *server = ses->server; |
332 | unsigned int sec_flags; | ||
333 | u16 temp = 0; | ||
334 | int blob_offset, blob_length; | 346 | int blob_offset, blob_length; |
335 | char *security_blob; | 347 | char *security_blob; |
336 | int flags = CIFS_NEG_OP; | 348 | int flags = CIFS_NEG_OP; |
337 | 349 | ||
338 | cifs_dbg(FYI, "Negotiate protocol\n"); | 350 | cifs_dbg(FYI, "Negotiate protocol\n"); |
339 | 351 | ||
340 | if (ses->server) | 352 | if (!server) { |
341 | server = ses->server; | 353 | WARN(1, "%s: server is NULL!\n", __func__); |
342 | else { | 354 | return -EIO; |
343 | rc = -EIO; | ||
344 | return rc; | ||
345 | } | 355 | } |
346 | 356 | ||
347 | rc = small_smb2_init(SMB2_NEGOTIATE, NULL, (void **) &req); | 357 | rc = small_smb2_init(SMB2_NEGOTIATE, NULL, (void **) &req); |
348 | if (rc) | 358 | if (rc) |
349 | return rc; | 359 | return rc; |
350 | 360 | ||
351 | /* if any of auth flags (ie not sign or seal) are overriden use them */ | ||
352 | if (ses->overrideSecFlg & (~(CIFSSEC_MUST_SIGN | CIFSSEC_MUST_SEAL))) | ||
353 | sec_flags = ses->overrideSecFlg; /* BB FIXME fix sign flags?*/ | ||
354 | else /* if override flags set only sign/seal OR them with global auth */ | ||
355 | sec_flags = global_secflags | ses->overrideSecFlg; | ||
356 | |||
357 | cifs_dbg(FYI, "sec_flags 0x%x\n", sec_flags); | ||
358 | |||
359 | req->hdr.SessionId = 0; | 361 | req->hdr.SessionId = 0; |
360 | 362 | ||
361 | req->Dialects[0] = cpu_to_le16(ses->server->vals->protocol_id); | 363 | req->Dialects[0] = cpu_to_le16(ses->server->vals->protocol_id); |
@@ -364,12 +366,12 @@ SMB2_negotiate(const unsigned int xid, struct cifs_ses *ses) | |||
364 | inc_rfc1001_len(req, 2); | 366 | inc_rfc1001_len(req, 2); |
365 | 367 | ||
366 | /* only one of SMB2 signing flags may be set in SMB2 request */ | 368 | /* only one of SMB2 signing flags may be set in SMB2 request */ |
367 | if ((sec_flags & CIFSSEC_MUST_SIGN) == CIFSSEC_MUST_SIGN) | 369 | if (ses->sign) |
368 | temp = SMB2_NEGOTIATE_SIGNING_REQUIRED; | 370 | req->SecurityMode = cpu_to_le16(SMB2_NEGOTIATE_SIGNING_REQUIRED); |
369 | else if (sec_flags & CIFSSEC_MAY_SIGN) /* MAY_SIGN is a single flag */ | 371 | else if (global_secflags & CIFSSEC_MAY_SIGN) |
370 | temp = SMB2_NEGOTIATE_SIGNING_ENABLED; | 372 | req->SecurityMode = cpu_to_le16(SMB2_NEGOTIATE_SIGNING_ENABLED); |
371 | 373 | else | |
372 | req->SecurityMode = cpu_to_le16(temp); | 374 | req->SecurityMode = 0; |
373 | 375 | ||
374 | req->Capabilities = cpu_to_le32(ses->server->vals->req_capabilities); | 376 | req->Capabilities = cpu_to_le32(ses->server->vals->req_capabilities); |
375 | 377 | ||
@@ -399,6 +401,8 @@ SMB2_negotiate(const unsigned int xid, struct cifs_ses *ses) | |||
399 | cifs_dbg(FYI, "negotiated smb2.1 dialect\n"); | 401 | cifs_dbg(FYI, "negotiated smb2.1 dialect\n"); |
400 | else if (rsp->DialectRevision == cpu_to_le16(SMB30_PROT_ID)) | 402 | else if (rsp->DialectRevision == cpu_to_le16(SMB30_PROT_ID)) |
401 | cifs_dbg(FYI, "negotiated smb3.0 dialect\n"); | 403 | cifs_dbg(FYI, "negotiated smb3.0 dialect\n"); |
404 | else if (rsp->DialectRevision == cpu_to_le16(SMB302_PROT_ID)) | ||
405 | cifs_dbg(FYI, "negotiated smb3.02 dialect\n"); | ||
402 | else { | 406 | else { |
403 | cifs_dbg(VFS, "Illegal dialect returned by server %d\n", | 407 | cifs_dbg(VFS, "Illegal dialect returned by server %d\n", |
404 | le16_to_cpu(rsp->DialectRevision)); | 408 | le16_to_cpu(rsp->DialectRevision)); |
@@ -407,6 +411,8 @@ SMB2_negotiate(const unsigned int xid, struct cifs_ses *ses) | |||
407 | } | 411 | } |
408 | server->dialect = le16_to_cpu(rsp->DialectRevision); | 412 | server->dialect = le16_to_cpu(rsp->DialectRevision); |
409 | 413 | ||
414 | /* SMB2 only has an extended negflavor */ | ||
415 | server->negflavor = CIFS_NEGFLAVOR_EXTENDED; | ||
410 | server->maxBuf = le32_to_cpu(rsp->MaxTransactSize); | 416 | server->maxBuf = le32_to_cpu(rsp->MaxTransactSize); |
411 | server->max_read = le32_to_cpu(rsp->MaxReadSize); | 417 | server->max_read = le32_to_cpu(rsp->MaxReadSize); |
412 | server->max_write = le32_to_cpu(rsp->MaxWriteSize); | 418 | server->max_write = le32_to_cpu(rsp->MaxWriteSize); |
@@ -418,44 +424,22 @@ SMB2_negotiate(const unsigned int xid, struct cifs_ses *ses) | |||
418 | 424 | ||
419 | security_blob = smb2_get_data_area_len(&blob_offset, &blob_length, | 425 | security_blob = smb2_get_data_area_len(&blob_offset, &blob_length, |
420 | &rsp->hdr); | 426 | &rsp->hdr); |
421 | if (blob_length == 0) { | 427 | /* |
422 | cifs_dbg(VFS, "missing security blob on negprot\n"); | 428 | * See MS-SMB2 section 2.2.4: if no blob, client picks default which |
423 | rc = -EIO; | 429 | * for us will be |
424 | goto neg_exit; | 430 | * ses->sectype = RawNTLMSSP; |
425 | } | 431 | * but for time being this is our only auth choice so doesn't matter. |
426 | 432 | * We just found a server which sets blob length to zero expecting raw. | |
427 | cifs_dbg(FYI, "sec_flags 0x%x\n", sec_flags); | 433 | */ |
428 | if ((sec_flags & CIFSSEC_MUST_SIGN) == CIFSSEC_MUST_SIGN) { | 434 | if (blob_length == 0) |
429 | cifs_dbg(FYI, "Signing required\n"); | 435 | cifs_dbg(FYI, "missing security blob on negprot\n"); |
430 | if (!(server->sec_mode & (SMB2_NEGOTIATE_SIGNING_REQUIRED | | ||
431 | SMB2_NEGOTIATE_SIGNING_ENABLED))) { | ||
432 | cifs_dbg(VFS, "signing required but server lacks support\n"); | ||
433 | rc = -EOPNOTSUPP; | ||
434 | goto neg_exit; | ||
435 | } | ||
436 | server->sec_mode |= SECMODE_SIGN_REQUIRED; | ||
437 | } else if (sec_flags & CIFSSEC_MAY_SIGN) { | ||
438 | cifs_dbg(FYI, "Signing optional\n"); | ||
439 | if (server->sec_mode & SMB2_NEGOTIATE_SIGNING_REQUIRED) { | ||
440 | cifs_dbg(FYI, "Server requires signing\n"); | ||
441 | server->sec_mode |= SECMODE_SIGN_REQUIRED; | ||
442 | } else { | ||
443 | server->sec_mode &= | ||
444 | ~(SECMODE_SIGN_ENABLED | SECMODE_SIGN_REQUIRED); | ||
445 | } | ||
446 | } else { | ||
447 | cifs_dbg(FYI, "Signing disabled\n"); | ||
448 | if (server->sec_mode & SMB2_NEGOTIATE_SIGNING_REQUIRED) { | ||
449 | cifs_dbg(VFS, "Server requires packet signing to be enabled in /proc/fs/cifs/SecurityFlags\n"); | ||
450 | rc = -EOPNOTSUPP; | ||
451 | goto neg_exit; | ||
452 | } | ||
453 | server->sec_mode &= | ||
454 | ~(SECMODE_SIGN_ENABLED | SECMODE_SIGN_REQUIRED); | ||
455 | } | ||
456 | 436 | ||
437 | rc = cifs_enable_signing(server, ses->sign); | ||
457 | #ifdef CONFIG_SMB2_ASN1 /* BB REMOVEME when updated asn1.c ready */ | 438 | #ifdef CONFIG_SMB2_ASN1 /* BB REMOVEME when updated asn1.c ready */ |
458 | rc = decode_neg_token_init(security_blob, blob_length, | 439 | if (rc) |
440 | goto neg_exit; | ||
441 | if (blob_length) | ||
442 | rc = decode_neg_token_init(security_blob, blob_length, | ||
459 | &server->sec_type); | 443 | &server->sec_type); |
460 | if (rc == 1) | 444 | if (rc == 1) |
461 | rc = 0; | 445 | rc = 0; |
@@ -480,9 +464,7 @@ SMB2_sess_setup(const unsigned int xid, struct cifs_ses *ses, | |||
480 | int rc = 0; | 464 | int rc = 0; |
481 | int resp_buftype; | 465 | int resp_buftype; |
482 | __le32 phase = NtLmNegotiate; /* NTLMSSP, if needed, is multistage */ | 466 | __le32 phase = NtLmNegotiate; /* NTLMSSP, if needed, is multistage */ |
483 | struct TCP_Server_Info *server; | 467 | struct TCP_Server_Info *server = ses->server; |
484 | unsigned int sec_flags; | ||
485 | u8 temp = 0; | ||
486 | u16 blob_length = 0; | 468 | u16 blob_length = 0; |
487 | char *security_blob; | 469 | char *security_blob; |
488 | char *ntlmssp_blob = NULL; | 470 | char *ntlmssp_blob = NULL; |
@@ -490,11 +472,9 @@ SMB2_sess_setup(const unsigned int xid, struct cifs_ses *ses, | |||
490 | 472 | ||
491 | cifs_dbg(FYI, "Session Setup\n"); | 473 | cifs_dbg(FYI, "Session Setup\n"); |
492 | 474 | ||
493 | if (ses->server) | 475 | if (!server) { |
494 | server = ses->server; | 476 | WARN(1, "%s: server is NULL!\n", __func__); |
495 | else { | 477 | return -EIO; |
496 | rc = -EIO; | ||
497 | return rc; | ||
498 | } | 478 | } |
499 | 479 | ||
500 | /* | 480 | /* |
@@ -505,7 +485,8 @@ SMB2_sess_setup(const unsigned int xid, struct cifs_ses *ses, | |||
505 | if (!ses->ntlmssp) | 485 | if (!ses->ntlmssp) |
506 | return -ENOMEM; | 486 | return -ENOMEM; |
507 | 487 | ||
508 | ses->server->secType = RawNTLMSSP; | 488 | /* FIXME: allow for other auth types besides NTLMSSP (e.g. krb5) */ |
489 | ses->sectype = RawNTLMSSP; | ||
509 | 490 | ||
510 | ssetup_ntlmssp_authenticate: | 491 | ssetup_ntlmssp_authenticate: |
511 | if (phase == NtLmChallenge) | 492 | if (phase == NtLmChallenge) |
@@ -515,28 +496,19 @@ ssetup_ntlmssp_authenticate: | |||
515 | if (rc) | 496 | if (rc) |
516 | return rc; | 497 | return rc; |
517 | 498 | ||
518 | /* if any of auth flags (ie not sign or seal) are overriden use them */ | ||
519 | if (ses->overrideSecFlg & (~(CIFSSEC_MUST_SIGN | CIFSSEC_MUST_SEAL))) | ||
520 | sec_flags = ses->overrideSecFlg; /* BB FIXME fix sign flags?*/ | ||
521 | else /* if override flags set only sign/seal OR them with global auth */ | ||
522 | sec_flags = global_secflags | ses->overrideSecFlg; | ||
523 | |||
524 | cifs_dbg(FYI, "sec_flags 0x%x\n", sec_flags); | ||
525 | |||
526 | req->hdr.SessionId = 0; /* First session, not a reauthenticate */ | 499 | req->hdr.SessionId = 0; /* First session, not a reauthenticate */ |
527 | req->VcNumber = 0; /* MBZ */ | 500 | req->VcNumber = 0; /* MBZ */ |
528 | /* to enable echos and oplocks */ | 501 | /* to enable echos and oplocks */ |
529 | req->hdr.CreditRequest = cpu_to_le16(3); | 502 | req->hdr.CreditRequest = cpu_to_le16(3); |
530 | 503 | ||
531 | /* only one of SMB2 signing flags may be set in SMB2 request */ | 504 | /* only one of SMB2 signing flags may be set in SMB2 request */ |
532 | if ((sec_flags & CIFSSEC_MUST_SIGN) == CIFSSEC_MUST_SIGN) | 505 | if (server->sign) |
533 | temp = SMB2_NEGOTIATE_SIGNING_REQUIRED; | 506 | req->SecurityMode = SMB2_NEGOTIATE_SIGNING_REQUIRED; |
534 | else if (ses->server->sec_mode & SMB2_NEGOTIATE_SIGNING_REQUIRED) | 507 | else if (global_secflags & CIFSSEC_MAY_SIGN) /* one flag unlike MUST_ */ |
535 | temp = SMB2_NEGOTIATE_SIGNING_REQUIRED; | 508 | req->SecurityMode = SMB2_NEGOTIATE_SIGNING_ENABLED; |
536 | else if (sec_flags & CIFSSEC_MAY_SIGN) /* MAY_SIGN is a single flag */ | 509 | else |
537 | temp = SMB2_NEGOTIATE_SIGNING_ENABLED; | 510 | req->SecurityMode = 0; |
538 | 511 | ||
539 | req->SecurityMode = temp; | ||
540 | req->Capabilities = 0; | 512 | req->Capabilities = 0; |
541 | req->Channel = 0; /* MBZ */ | 513 | req->Channel = 0; /* MBZ */ |
542 | 514 | ||
@@ -679,7 +651,7 @@ SMB2_logoff(const unsigned int xid, struct cifs_ses *ses) | |||
679 | 651 | ||
680 | /* since no tcon, smb2_init can not do this, so do here */ | 652 | /* since no tcon, smb2_init can not do this, so do here */ |
681 | req->hdr.SessionId = ses->Suid; | 653 | req->hdr.SessionId = ses->Suid; |
682 | if (server->sec_mode & SECMODE_SIGN_REQUIRED) | 654 | if (server->sign) |
683 | req->hdr.Flags |= SMB2_FLAGS_SIGNED; | 655 | req->hdr.Flags |= SMB2_FLAGS_SIGNED; |
684 | 656 | ||
685 | rc = SendReceiveNoRsp(xid, ses, (char *) &req->hdr, 0); | 657 | rc = SendReceiveNoRsp(xid, ses, (char *) &req->hdr, 0); |
@@ -788,11 +760,12 @@ SMB2_tcon(const unsigned int xid, struct cifs_ses *ses, const char *tree, | |||
788 | } | 760 | } |
789 | 761 | ||
790 | tcon->share_flags = le32_to_cpu(rsp->ShareFlags); | 762 | tcon->share_flags = le32_to_cpu(rsp->ShareFlags); |
763 | tcon->capabilities = rsp->Capabilities; /* we keep caps little endian */ | ||
791 | tcon->maximal_access = le32_to_cpu(rsp->MaximalAccess); | 764 | tcon->maximal_access = le32_to_cpu(rsp->MaximalAccess); |
792 | tcon->tidStatus = CifsGood; | 765 | tcon->tidStatus = CifsGood; |
793 | tcon->need_reconnect = false; | 766 | tcon->need_reconnect = false; |
794 | tcon->tid = rsp->hdr.TreeId; | 767 | tcon->tid = rsp->hdr.TreeId; |
795 | strncpy(tcon->treeName, tree, MAX_TREE_SIZE); | 768 | strlcpy(tcon->treeName, tree, sizeof(tcon->treeName)); |
796 | 769 | ||
797 | if ((rsp->Capabilities & SMB2_SHARE_CAP_DFS) && | 770 | if ((rsp->Capabilities & SMB2_SHARE_CAP_DFS) && |
798 | ((tcon->share_flags & SHI1005_FLAGS_DFS) == 0)) | 771 | ((tcon->share_flags & SHI1005_FLAGS_DFS) == 0)) |
@@ -874,29 +847,76 @@ create_lease_buf(u8 *lease_key, u8 oplock) | |||
874 | return buf; | 847 | return buf; |
875 | } | 848 | } |
876 | 849 | ||
850 | static struct create_durable * | ||
851 | create_durable_buf(void) | ||
852 | { | ||
853 | struct create_durable *buf; | ||
854 | |||
855 | buf = kzalloc(sizeof(struct create_durable), GFP_KERNEL); | ||
856 | if (!buf) | ||
857 | return NULL; | ||
858 | |||
859 | buf->ccontext.DataOffset = cpu_to_le16(offsetof | ||
860 | (struct create_durable, Data)); | ||
861 | buf->ccontext.DataLength = cpu_to_le32(16); | ||
862 | buf->ccontext.NameOffset = cpu_to_le16(offsetof | ||
863 | (struct create_durable, Name)); | ||
864 | buf->ccontext.NameLength = cpu_to_le16(4); | ||
865 | buf->Name[0] = 'D'; | ||
866 | buf->Name[1] = 'H'; | ||
867 | buf->Name[2] = 'n'; | ||
868 | buf->Name[3] = 'Q'; | ||
869 | return buf; | ||
870 | } | ||
871 | |||
872 | static struct create_durable * | ||
873 | create_reconnect_durable_buf(struct cifs_fid *fid) | ||
874 | { | ||
875 | struct create_durable *buf; | ||
876 | |||
877 | buf = kzalloc(sizeof(struct create_durable), GFP_KERNEL); | ||
878 | if (!buf) | ||
879 | return NULL; | ||
880 | |||
881 | buf->ccontext.DataOffset = cpu_to_le16(offsetof | ||
882 | (struct create_durable, Data)); | ||
883 | buf->ccontext.DataLength = cpu_to_le32(16); | ||
884 | buf->ccontext.NameOffset = cpu_to_le16(offsetof | ||
885 | (struct create_durable, Name)); | ||
886 | buf->ccontext.NameLength = cpu_to_le16(4); | ||
887 | buf->Data.Fid.PersistentFileId = fid->persistent_fid; | ||
888 | buf->Data.Fid.VolatileFileId = fid->volatile_fid; | ||
889 | buf->Name[0] = 'D'; | ||
890 | buf->Name[1] = 'H'; | ||
891 | buf->Name[2] = 'n'; | ||
892 | buf->Name[3] = 'C'; | ||
893 | return buf; | ||
894 | } | ||
895 | |||
877 | static __u8 | 896 | static __u8 |
878 | parse_lease_state(struct smb2_create_rsp *rsp) | 897 | parse_lease_state(struct smb2_create_rsp *rsp) |
879 | { | 898 | { |
880 | char *data_offset; | 899 | char *data_offset; |
881 | struct create_lease *lc; | 900 | struct create_lease *lc; |
882 | bool found = false; | 901 | bool found = false; |
902 | unsigned int next = 0; | ||
903 | char *name; | ||
883 | 904 | ||
884 | data_offset = (char *)rsp; | 905 | data_offset = (char *)rsp + 4 + le32_to_cpu(rsp->CreateContextsOffset); |
885 | data_offset += 4 + le32_to_cpu(rsp->CreateContextsOffset); | ||
886 | lc = (struct create_lease *)data_offset; | 906 | lc = (struct create_lease *)data_offset; |
887 | do { | 907 | do { |
888 | char *name = le16_to_cpu(lc->ccontext.NameOffset) + (char *)lc; | 908 | lc = (struct create_lease *)((char *)lc + next); |
909 | name = le16_to_cpu(lc->ccontext.NameOffset) + (char *)lc; | ||
889 | if (le16_to_cpu(lc->ccontext.NameLength) != 4 || | 910 | if (le16_to_cpu(lc->ccontext.NameLength) != 4 || |
890 | strncmp(name, "RqLs", 4)) { | 911 | strncmp(name, "RqLs", 4)) { |
891 | lc = (struct create_lease *)((char *)lc | 912 | next = le32_to_cpu(lc->ccontext.Next); |
892 | + le32_to_cpu(lc->ccontext.Next)); | ||
893 | continue; | 913 | continue; |
894 | } | 914 | } |
895 | if (lc->lcontext.LeaseFlags & SMB2_LEASE_FLAG_BREAK_IN_PROGRESS) | 915 | if (lc->lcontext.LeaseFlags & SMB2_LEASE_FLAG_BREAK_IN_PROGRESS) |
896 | return SMB2_OPLOCK_LEVEL_NOCHANGE; | 916 | return SMB2_OPLOCK_LEVEL_NOCHANGE; |
897 | found = true; | 917 | found = true; |
898 | break; | 918 | break; |
899 | } while (le32_to_cpu(lc->ccontext.Next) != 0); | 919 | } while (next != 0); |
900 | 920 | ||
901 | if (!found) | 921 | if (!found) |
902 | return 0; | 922 | return 0; |
@@ -904,23 +924,74 @@ parse_lease_state(struct smb2_create_rsp *rsp) | |||
904 | return smb2_map_lease_to_oplock(lc->lcontext.LeaseState); | 924 | return smb2_map_lease_to_oplock(lc->lcontext.LeaseState); |
905 | } | 925 | } |
906 | 926 | ||
927 | static int | ||
928 | add_lease_context(struct kvec *iov, unsigned int *num_iovec, __u8 *oplock) | ||
929 | { | ||
930 | struct smb2_create_req *req = iov[0].iov_base; | ||
931 | unsigned int num = *num_iovec; | ||
932 | |||
933 | iov[num].iov_base = create_lease_buf(oplock+1, *oplock); | ||
934 | if (iov[num].iov_base == NULL) | ||
935 | return -ENOMEM; | ||
936 | iov[num].iov_len = sizeof(struct create_lease); | ||
937 | req->RequestedOplockLevel = SMB2_OPLOCK_LEVEL_LEASE; | ||
938 | if (!req->CreateContextsOffset) | ||
939 | req->CreateContextsOffset = cpu_to_le32( | ||
940 | sizeof(struct smb2_create_req) - 4 + | ||
941 | iov[num - 1].iov_len); | ||
942 | req->CreateContextsLength = cpu_to_le32( | ||
943 | le32_to_cpu(req->CreateContextsLength) + | ||
944 | sizeof(struct create_lease)); | ||
945 | inc_rfc1001_len(&req->hdr, sizeof(struct create_lease)); | ||
946 | *num_iovec = num + 1; | ||
947 | return 0; | ||
948 | } | ||
949 | |||
950 | static int | ||
951 | add_durable_context(struct kvec *iov, unsigned int *num_iovec, | ||
952 | struct cifs_open_parms *oparms) | ||
953 | { | ||
954 | struct smb2_create_req *req = iov[0].iov_base; | ||
955 | unsigned int num = *num_iovec; | ||
956 | |||
957 | if (oparms->reconnect) { | ||
958 | iov[num].iov_base = create_reconnect_durable_buf(oparms->fid); | ||
959 | /* indicate that we don't need to relock the file */ | ||
960 | oparms->reconnect = false; | ||
961 | } else | ||
962 | iov[num].iov_base = create_durable_buf(); | ||
963 | if (iov[num].iov_base == NULL) | ||
964 | return -ENOMEM; | ||
965 | iov[num].iov_len = sizeof(struct create_durable); | ||
966 | if (!req->CreateContextsOffset) | ||
967 | req->CreateContextsOffset = | ||
968 | cpu_to_le32(sizeof(struct smb2_create_req) - 4 + | ||
969 | iov[1].iov_len); | ||
970 | req->CreateContextsLength = | ||
971 | cpu_to_le32(le32_to_cpu(req->CreateContextsLength) + | ||
972 | sizeof(struct create_durable)); | ||
973 | inc_rfc1001_len(&req->hdr, sizeof(struct create_durable)); | ||
974 | *num_iovec = num + 1; | ||
975 | return 0; | ||
976 | } | ||
977 | |||
907 | int | 978 | int |
908 | SMB2_open(const unsigned int xid, struct cifs_tcon *tcon, __le16 *path, | 979 | SMB2_open(const unsigned int xid, struct cifs_open_parms *oparms, __le16 *path, |
909 | u64 *persistent_fid, u64 *volatile_fid, __u32 desired_access, | ||
910 | __u32 create_disposition, __u32 file_attributes, __u32 create_options, | ||
911 | __u8 *oplock, struct smb2_file_all_info *buf) | 980 | __u8 *oplock, struct smb2_file_all_info *buf) |
912 | { | 981 | { |
913 | struct smb2_create_req *req; | 982 | struct smb2_create_req *req; |
914 | struct smb2_create_rsp *rsp; | 983 | struct smb2_create_rsp *rsp; |
915 | struct TCP_Server_Info *server; | 984 | struct TCP_Server_Info *server; |
985 | struct cifs_tcon *tcon = oparms->tcon; | ||
916 | struct cifs_ses *ses = tcon->ses; | 986 | struct cifs_ses *ses = tcon->ses; |
917 | struct kvec iov[3]; | 987 | struct kvec iov[4]; |
918 | int resp_buftype; | 988 | int resp_buftype; |
919 | int uni_path_len; | 989 | int uni_path_len; |
920 | __le16 *copy_path = NULL; | 990 | __le16 *copy_path = NULL; |
921 | int copy_size; | 991 | int copy_size; |
922 | int rc = 0; | 992 | int rc = 0; |
923 | int num_iovecs = 2; | 993 | unsigned int num_iovecs = 2; |
994 | __u32 file_attributes = 0; | ||
924 | 995 | ||
925 | cifs_dbg(FYI, "create/open\n"); | 996 | cifs_dbg(FYI, "create/open\n"); |
926 | 997 | ||
@@ -933,55 +1004,47 @@ SMB2_open(const unsigned int xid, struct cifs_tcon *tcon, __le16 *path, | |||
933 | if (rc) | 1004 | if (rc) |
934 | return rc; | 1005 | return rc; |
935 | 1006 | ||
1007 | if (oparms->create_options & CREATE_OPTION_READONLY) | ||
1008 | file_attributes |= ATTR_READONLY; | ||
1009 | |||
936 | req->ImpersonationLevel = IL_IMPERSONATION; | 1010 | req->ImpersonationLevel = IL_IMPERSONATION; |
937 | req->DesiredAccess = cpu_to_le32(desired_access); | 1011 | req->DesiredAccess = cpu_to_le32(oparms->desired_access); |
938 | /* File attributes ignored on open (used in create though) */ | 1012 | /* File attributes ignored on open (used in create though) */ |
939 | req->FileAttributes = cpu_to_le32(file_attributes); | 1013 | req->FileAttributes = cpu_to_le32(file_attributes); |
940 | req->ShareAccess = FILE_SHARE_ALL_LE; | 1014 | req->ShareAccess = FILE_SHARE_ALL_LE; |
941 | req->CreateDisposition = cpu_to_le32(create_disposition); | 1015 | req->CreateDisposition = cpu_to_le32(oparms->disposition); |
942 | req->CreateOptions = cpu_to_le32(create_options); | 1016 | req->CreateOptions = cpu_to_le32(oparms->create_options & CREATE_OPTIONS_MASK); |
943 | uni_path_len = (2 * UniStrnlen((wchar_t *)path, PATH_MAX)) + 2; | 1017 | uni_path_len = (2 * UniStrnlen((wchar_t *)path, PATH_MAX)) + 2; |
944 | req->NameOffset = cpu_to_le16(sizeof(struct smb2_create_req) | 1018 | /* do not count rfc1001 len field */ |
945 | - 8 /* pad */ - 4 /* do not count rfc1001 len field */); | 1019 | req->NameOffset = cpu_to_le16(sizeof(struct smb2_create_req) - 4); |
946 | 1020 | ||
947 | iov[0].iov_base = (char *)req; | 1021 | iov[0].iov_base = (char *)req; |
948 | /* 4 for rfc1002 length field */ | 1022 | /* 4 for rfc1002 length field */ |
949 | iov[0].iov_len = get_rfc1002_length(req) + 4; | 1023 | iov[0].iov_len = get_rfc1002_length(req) + 4; |
950 | 1024 | ||
951 | /* MUST set path len (NameLength) to 0 opening root of share */ | 1025 | /* MUST set path len (NameLength) to 0 opening root of share */ |
952 | if (uni_path_len >= 4) { | 1026 | req->NameLength = cpu_to_le16(uni_path_len - 2); |
953 | req->NameLength = cpu_to_le16(uni_path_len - 2); | 1027 | /* -1 since last byte is buf[0] which is sent below (path) */ |
954 | /* -1 since last byte is buf[0] which is sent below (path) */ | 1028 | iov[0].iov_len--; |
955 | iov[0].iov_len--; | 1029 | if (uni_path_len % 8 != 0) { |
956 | if (uni_path_len % 8 != 0) { | 1030 | copy_size = uni_path_len / 8 * 8; |
957 | copy_size = uni_path_len / 8 * 8; | 1031 | if (copy_size < uni_path_len) |
958 | if (copy_size < uni_path_len) | 1032 | copy_size += 8; |
959 | copy_size += 8; | 1033 | |
960 | 1034 | copy_path = kzalloc(copy_size, GFP_KERNEL); | |
961 | copy_path = kzalloc(copy_size, GFP_KERNEL); | 1035 | if (!copy_path) |
962 | if (!copy_path) | 1036 | return -ENOMEM; |
963 | return -ENOMEM; | 1037 | memcpy((char *)copy_path, (const char *)path, |
964 | memcpy((char *)copy_path, (const char *)path, | 1038 | uni_path_len); |
965 | uni_path_len); | 1039 | uni_path_len = copy_size; |
966 | uni_path_len = copy_size; | 1040 | path = copy_path; |
967 | path = copy_path; | ||
968 | } | ||
969 | |||
970 | iov[1].iov_len = uni_path_len; | ||
971 | iov[1].iov_base = path; | ||
972 | /* | ||
973 | * -1 since last byte is buf[0] which was counted in | ||
974 | * smb2_buf_len. | ||
975 | */ | ||
976 | inc_rfc1001_len(req, uni_path_len - 1); | ||
977 | } else { | ||
978 | iov[0].iov_len += 7; | ||
979 | req->hdr.smb2_buf_length = cpu_to_be32(be32_to_cpu( | ||
980 | req->hdr.smb2_buf_length) + 8 - 1); | ||
981 | num_iovecs = 1; | ||
982 | req->NameLength = 0; | ||
983 | } | 1041 | } |
984 | 1042 | ||
1043 | iov[1].iov_len = uni_path_len; | ||
1044 | iov[1].iov_base = path; | ||
1045 | /* -1 since last byte is buf[0] which was counted in smb2_buf_len */ | ||
1046 | inc_rfc1001_len(req, uni_path_len - 1); | ||
1047 | |||
985 | if (!server->oplocks) | 1048 | if (!server->oplocks) |
986 | *oplock = SMB2_OPLOCK_LEVEL_NONE; | 1049 | *oplock = SMB2_OPLOCK_LEVEL_NONE; |
987 | 1050 | ||
@@ -989,21 +1052,29 @@ SMB2_open(const unsigned int xid, struct cifs_tcon *tcon, __le16 *path, | |||
989 | *oplock == SMB2_OPLOCK_LEVEL_NONE) | 1052 | *oplock == SMB2_OPLOCK_LEVEL_NONE) |
990 | req->RequestedOplockLevel = *oplock; | 1053 | req->RequestedOplockLevel = *oplock; |
991 | else { | 1054 | else { |
992 | iov[num_iovecs].iov_base = create_lease_buf(oplock+1, *oplock); | 1055 | rc = add_lease_context(iov, &num_iovecs, oplock); |
993 | if (iov[num_iovecs].iov_base == NULL) { | 1056 | if (rc) { |
994 | cifs_small_buf_release(req); | 1057 | cifs_small_buf_release(req); |
995 | kfree(copy_path); | 1058 | kfree(copy_path); |
996 | return -ENOMEM; | 1059 | return rc; |
1060 | } | ||
1061 | } | ||
1062 | |||
1063 | if (*oplock == SMB2_OPLOCK_LEVEL_BATCH) { | ||
1064 | /* need to set Next field of lease context if we request it */ | ||
1065 | if (tcon->ses->server->capabilities & SMB2_GLOBAL_CAP_LEASING) { | ||
1066 | struct create_context *ccontext = | ||
1067 | (struct create_context *)iov[num_iovecs-1].iov_base; | ||
1068 | ccontext->Next = | ||
1069 | cpu_to_le32(sizeof(struct create_lease)); | ||
1070 | } | ||
1071 | rc = add_durable_context(iov, &num_iovecs, oparms); | ||
1072 | if (rc) { | ||
1073 | cifs_small_buf_release(req); | ||
1074 | kfree(copy_path); | ||
1075 | kfree(iov[num_iovecs-1].iov_base); | ||
1076 | return rc; | ||
997 | } | 1077 | } |
998 | iov[num_iovecs].iov_len = sizeof(struct create_lease); | ||
999 | req->RequestedOplockLevel = SMB2_OPLOCK_LEVEL_LEASE; | ||
1000 | req->CreateContextsOffset = cpu_to_le32( | ||
1001 | sizeof(struct smb2_create_req) - 4 - 8 + | ||
1002 | iov[num_iovecs-1].iov_len); | ||
1003 | req->CreateContextsLength = cpu_to_le32( | ||
1004 | sizeof(struct create_lease)); | ||
1005 | inc_rfc1001_len(&req->hdr, sizeof(struct create_lease)); | ||
1006 | num_iovecs++; | ||
1007 | } | 1078 | } |
1008 | 1079 | ||
1009 | rc = SendReceive2(xid, ses, iov, num_iovecs, &resp_buftype, 0); | 1080 | rc = SendReceive2(xid, ses, iov, num_iovecs, &resp_buftype, 0); |
@@ -1014,8 +1085,8 @@ SMB2_open(const unsigned int xid, struct cifs_tcon *tcon, __le16 *path, | |||
1014 | goto creat_exit; | 1085 | goto creat_exit; |
1015 | } | 1086 | } |
1016 | 1087 | ||
1017 | *persistent_fid = rsp->PersistentFileId; | 1088 | oparms->fid->persistent_fid = rsp->PersistentFileId; |
1018 | *volatile_fid = rsp->VolatileFileId; | 1089 | oparms->fid->volatile_fid = rsp->VolatileFileId; |
1019 | 1090 | ||
1020 | if (buf) { | 1091 | if (buf) { |
1021 | memcpy(buf, &rsp->CreationTime, 32); | 1092 | memcpy(buf, &rsp->CreationTime, 32); |
@@ -1036,6 +1107,122 @@ creat_exit: | |||
1036 | return rc; | 1107 | return rc; |
1037 | } | 1108 | } |
1038 | 1109 | ||
1110 | /* | ||
1111 | * SMB2 IOCTL is used for both IOCTLs and FSCTLs | ||
1112 | */ | ||
1113 | int | ||
1114 | SMB2_ioctl(const unsigned int xid, struct cifs_tcon *tcon, u64 persistent_fid, | ||
1115 | u64 volatile_fid, u32 opcode, bool is_fsctl, char *in_data, | ||
1116 | u32 indatalen, char **out_data, u32 *plen /* returned data len */) | ||
1117 | { | ||
1118 | struct smb2_ioctl_req *req; | ||
1119 | struct smb2_ioctl_rsp *rsp; | ||
1120 | struct TCP_Server_Info *server; | ||
1121 | struct cifs_ses *ses = tcon->ses; | ||
1122 | struct kvec iov[2]; | ||
1123 | int resp_buftype; | ||
1124 | int num_iovecs; | ||
1125 | int rc = 0; | ||
1126 | |||
1127 | cifs_dbg(FYI, "SMB2 IOCTL\n"); | ||
1128 | |||
1129 | /* zero out returned data len, in case of error */ | ||
1130 | if (plen) | ||
1131 | *plen = 0; | ||
1132 | |||
1133 | if (ses && (ses->server)) | ||
1134 | server = ses->server; | ||
1135 | else | ||
1136 | return -EIO; | ||
1137 | |||
1138 | rc = small_smb2_init(SMB2_IOCTL, tcon, (void **) &req); | ||
1139 | if (rc) | ||
1140 | return rc; | ||
1141 | |||
1142 | req->CtlCode = cpu_to_le32(opcode); | ||
1143 | req->PersistentFileId = persistent_fid; | ||
1144 | req->VolatileFileId = volatile_fid; | ||
1145 | |||
1146 | if (indatalen) { | ||
1147 | req->InputCount = cpu_to_le32(indatalen); | ||
1148 | /* do not set InputOffset if no input data */ | ||
1149 | req->InputOffset = | ||
1150 | cpu_to_le32(offsetof(struct smb2_ioctl_req, Buffer) - 4); | ||
1151 | iov[1].iov_base = in_data; | ||
1152 | iov[1].iov_len = indatalen; | ||
1153 | num_iovecs = 2; | ||
1154 | } else | ||
1155 | num_iovecs = 1; | ||
1156 | |||
1157 | req->OutputOffset = 0; | ||
1158 | req->OutputCount = 0; /* MBZ */ | ||
1159 | |||
1160 | /* | ||
1161 | * Could increase MaxOutputResponse, but that would require more | ||
1162 | * than one credit. Windows typically sets this smaller, but for some | ||
1163 | * ioctls it may be useful to allow server to send more. No point | ||
1164 | * limiting what the server can send as long as fits in one credit | ||
1165 | */ | ||
1166 | req->MaxOutputResponse = cpu_to_le32(0xFF00); /* < 64K uses 1 credit */ | ||
1167 | |||
1168 | if (is_fsctl) | ||
1169 | req->Flags = cpu_to_le32(SMB2_0_IOCTL_IS_FSCTL); | ||
1170 | else | ||
1171 | req->Flags = 0; | ||
1172 | |||
1173 | iov[0].iov_base = (char *)req; | ||
1174 | /* 4 for rfc1002 length field */ | ||
1175 | iov[0].iov_len = get_rfc1002_length(req) + 4; | ||
1176 | |||
1177 | if (indatalen) | ||
1178 | inc_rfc1001_len(req, indatalen); | ||
1179 | |||
1180 | rc = SendReceive2(xid, ses, iov, num_iovecs, &resp_buftype, 0); | ||
1181 | rsp = (struct smb2_ioctl_rsp *)iov[0].iov_base; | ||
1182 | |||
1183 | if (rc != 0) { | ||
1184 | if (tcon) | ||
1185 | cifs_stats_fail_inc(tcon, SMB2_IOCTL_HE); | ||
1186 | goto ioctl_exit; | ||
1187 | } | ||
1188 | |||
1189 | /* check if caller wants to look at return data or just return rc */ | ||
1190 | if ((plen == NULL) || (out_data == NULL)) | ||
1191 | goto ioctl_exit; | ||
1192 | |||
1193 | *plen = le32_to_cpu(rsp->OutputCount); | ||
1194 | |||
1195 | /* We check for obvious errors in the output buffer length and offset */ | ||
1196 | if (*plen == 0) | ||
1197 | goto ioctl_exit; /* server returned no data */ | ||
1198 | else if (*plen > 0xFF00) { | ||
1199 | cifs_dbg(VFS, "srv returned invalid ioctl length: %d\n", *plen); | ||
1200 | *plen = 0; | ||
1201 | rc = -EIO; | ||
1202 | goto ioctl_exit; | ||
1203 | } | ||
1204 | |||
1205 | if (get_rfc1002_length(rsp) < le32_to_cpu(rsp->OutputOffset) + *plen) { | ||
1206 | cifs_dbg(VFS, "Malformed ioctl resp: len %d offset %d\n", *plen, | ||
1207 | le32_to_cpu(rsp->OutputOffset)); | ||
1208 | *plen = 0; | ||
1209 | rc = -EIO; | ||
1210 | goto ioctl_exit; | ||
1211 | } | ||
1212 | |||
1213 | *out_data = kmalloc(*plen, GFP_KERNEL); | ||
1214 | if (*out_data == NULL) { | ||
1215 | rc = -ENOMEM; | ||
1216 | goto ioctl_exit; | ||
1217 | } | ||
1218 | |||
1219 | memcpy(*out_data, rsp->hdr.ProtocolId + le32_to_cpu(rsp->OutputOffset), | ||
1220 | *plen); | ||
1221 | ioctl_exit: | ||
1222 | free_rsp_buf(resp_buftype, rsp); | ||
1223 | return rc; | ||
1224 | } | ||
1225 | |||
1039 | int | 1226 | int |
1040 | SMB2_close(const unsigned int xid, struct cifs_tcon *tcon, | 1227 | SMB2_close(const unsigned int xid, struct cifs_tcon *tcon, |
1041 | u64 persistent_fid, u64 volatile_fid) | 1228 | u64 persistent_fid, u64 volatile_fid) |
@@ -1384,8 +1571,7 @@ smb2_readv_callback(struct mid_q_entry *mid) | |||
1384 | case MID_RESPONSE_RECEIVED: | 1571 | case MID_RESPONSE_RECEIVED: |
1385 | credits_received = le16_to_cpu(buf->CreditRequest); | 1572 | credits_received = le16_to_cpu(buf->CreditRequest); |
1386 | /* result already set, check signature */ | 1573 | /* result already set, check signature */ |
1387 | if (server->sec_mode & | 1574 | if (server->sign) { |
1388 | (SECMODE_SIGN_REQUIRED | SECMODE_SIGN_ENABLED)) { | ||
1389 | int rc; | 1575 | int rc; |
1390 | 1576 | ||
1391 | rc = smb2_verify_signature(&rqst, server); | 1577 | rc = smb2_verify_signature(&rqst, server); |
diff --git a/fs/cifs/smb2pdu.h b/fs/cifs/smb2pdu.h index 4cb4ced258cb..36b0d37ea69b 100644 --- a/fs/cifs/smb2pdu.h +++ b/fs/cifs/smb2pdu.h | |||
@@ -1,7 +1,7 @@ | |||
1 | /* | 1 | /* |
2 | * fs/cifs/smb2pdu.h | 2 | * fs/cifs/smb2pdu.h |
3 | * | 3 | * |
4 | * Copyright (c) International Business Machines Corp., 2009, 2010 | 4 | * Copyright (c) International Business Machines Corp., 2009, 2013 |
5 | * Etersoft, 2012 | 5 | * Etersoft, 2012 |
6 | * Author(s): Steve French (sfrench@us.ibm.com) | 6 | * Author(s): Steve French (sfrench@us.ibm.com) |
7 | * Pavel Shilovsky (pshilovsky@samba.org) 2012 | 7 | * Pavel Shilovsky (pshilovsky@samba.org) 2012 |
@@ -170,6 +170,7 @@ struct smb2_negotiate_req { | |||
170 | #define SMB20_PROT_ID 0x0202 | 170 | #define SMB20_PROT_ID 0x0202 |
171 | #define SMB21_PROT_ID 0x0210 | 171 | #define SMB21_PROT_ID 0x0210 |
172 | #define SMB30_PROT_ID 0x0300 | 172 | #define SMB30_PROT_ID 0x0300 |
173 | #define SMB302_PROT_ID 0x0302 | ||
173 | #define BAD_PROT_ID 0xFFFF | 174 | #define BAD_PROT_ID 0xFFFF |
174 | 175 | ||
175 | /* SecurityMode flags */ | 176 | /* SecurityMode flags */ |
@@ -283,10 +284,17 @@ struct smb2_tree_connect_rsp { | |||
283 | #define SHI1005_FLAGS_ALLOW_NAMESPACE_CACHING 0x00000400 | 284 | #define SHI1005_FLAGS_ALLOW_NAMESPACE_CACHING 0x00000400 |
284 | #define SHI1005_FLAGS_ACCESS_BASED_DIRECTORY_ENUM 0x00000800 | 285 | #define SHI1005_FLAGS_ACCESS_BASED_DIRECTORY_ENUM 0x00000800 |
285 | #define SHI1005_FLAGS_FORCE_LEVELII_OPLOCK 0x00001000 | 286 | #define SHI1005_FLAGS_FORCE_LEVELII_OPLOCK 0x00001000 |
286 | #define SHI1005_FLAGS_ENABLE_HASH 0x00002000 | 287 | #define SHI1005_FLAGS_ENABLE_HASH_V1 0x00002000 |
288 | #define SHI1005_FLAGS_ENABLE_HASH_V2 0x00004000 | ||
289 | #define SHI1005_FLAGS_ENCRYPT_DATA 0x00008000 | ||
290 | #define SHI1005_FLAGS_ALL 0x0000FF33 | ||
287 | 291 | ||
288 | /* Possible share capabilities */ | 292 | /* Possible share capabilities */ |
289 | #define SMB2_SHARE_CAP_DFS cpu_to_le32(0x00000008) | 293 | #define SMB2_SHARE_CAP_DFS cpu_to_le32(0x00000008) /* all dialects */ |
294 | #define SMB2_SHARE_CAP_CONTINUOUS_AVAILABILITY cpu_to_le32(0x00000010) /* 3.0 */ | ||
295 | #define SMB2_SHARE_CAP_SCALEOUT cpu_to_le32(0x00000020) /* 3.0 */ | ||
296 | #define SMB2_SHARE_CAP_CLUSTER cpu_to_le32(0x00000040) /* 3.0 */ | ||
297 | #define SMB2_SHARE_CAP_ASYMMETRIC cpu_to_le32(0x00000080) /* 3.02 */ | ||
290 | 298 | ||
291 | struct smb2_tree_disconnect_req { | 299 | struct smb2_tree_disconnect_req { |
292 | struct smb2_hdr hdr; | 300 | struct smb2_hdr hdr; |
@@ -420,7 +428,7 @@ struct smb2_create_req { | |||
420 | __le16 NameLength; | 428 | __le16 NameLength; |
421 | __le32 CreateContextsOffset; | 429 | __le32 CreateContextsOffset; |
422 | __le32 CreateContextsLength; | 430 | __le32 CreateContextsLength; |
423 | __u8 Buffer[8]; | 431 | __u8 Buffer[0]; |
424 | } __packed; | 432 | } __packed; |
425 | 433 | ||
426 | struct smb2_create_rsp { | 434 | struct smb2_create_rsp { |
@@ -477,6 +485,87 @@ struct create_lease { | |||
477 | struct lease_context lcontext; | 485 | struct lease_context lcontext; |
478 | } __packed; | 486 | } __packed; |
479 | 487 | ||
488 | struct create_durable { | ||
489 | struct create_context ccontext; | ||
490 | __u8 Name[8]; | ||
491 | union { | ||
492 | __u8 Reserved[16]; | ||
493 | struct { | ||
494 | __u64 PersistentFileId; | ||
495 | __u64 VolatileFileId; | ||
496 | } Fid; | ||
497 | } Data; | ||
498 | } __packed; | ||
499 | |||
500 | /* this goes in the ioctl buffer when doing a copychunk request */ | ||
501 | struct copychunk_ioctl { | ||
502 | char SourceKey[24]; | ||
503 | __le32 ChunkCount; /* we are only sending 1 */ | ||
504 | __le32 Reserved; | ||
505 | /* array will only be one chunk long for us */ | ||
506 | __le64 SourceOffset; | ||
507 | __le64 TargetOffset; | ||
508 | __le32 Length; /* how many bytes to copy */ | ||
509 | __u32 Reserved2; | ||
510 | } __packed; | ||
511 | |||
512 | /* Response and Request are the same format */ | ||
513 | struct validate_negotiate_info { | ||
514 | __le32 Capabilities; | ||
515 | __u8 Guid[SMB2_CLIENT_GUID_SIZE]; | ||
516 | __le16 SecurityMode; | ||
517 | __le16 DialectCount; | ||
518 | __le16 Dialect[1]; | ||
519 | } __packed; | ||
520 | |||
521 | #define RSS_CAPABLE 0x00000001 | ||
522 | #define RDMA_CAPABLE 0x00000002 | ||
523 | |||
524 | struct network_interface_info_ioctl_rsp { | ||
525 | __le32 Next; /* next interface. zero if this is last one */ | ||
526 | __le32 IfIndex; | ||
527 | __le32 Capability; /* RSS or RDMA Capable */ | ||
528 | __le32 Reserved; | ||
529 | __le64 LinkSpeed; | ||
530 | char SockAddr_Storage[128]; | ||
531 | } __packed; | ||
532 | |||
533 | #define NO_FILE_ID 0xFFFFFFFFFFFFFFFFULL /* general ioctls to srv not to file */ | ||
534 | |||
535 | struct smb2_ioctl_req { | ||
536 | struct smb2_hdr hdr; | ||
537 | __le16 StructureSize; /* Must be 57 */ | ||
538 | __u16 Reserved; | ||
539 | __le32 CtlCode; | ||
540 | __u64 PersistentFileId; /* opaque endianness */ | ||
541 | __u64 VolatileFileId; /* opaque endianness */ | ||
542 | __le32 InputOffset; | ||
543 | __le32 InputCount; | ||
544 | __le32 MaxInputResponse; | ||
545 | __le32 OutputOffset; | ||
546 | __le32 OutputCount; | ||
547 | __le32 MaxOutputResponse; | ||
548 | __le32 Flags; | ||
549 | __u32 Reserved2; | ||
550 | char Buffer[0]; | ||
551 | } __packed; | ||
552 | |||
553 | struct smb2_ioctl_rsp { | ||
554 | struct smb2_hdr hdr; | ||
555 | __le16 StructureSize; /* Must be 57 */ | ||
556 | __u16 Reserved; | ||
557 | __le32 CtlCode; | ||
558 | __u64 PersistentFileId; /* opaque endianness */ | ||
559 | __u64 VolatileFileId; /* opaque endianness */ | ||
560 | __le32 InputOffset; | ||
561 | __le32 InputCount; | ||
562 | __le32 OutputOffset; | ||
563 | __le32 OutputCount; | ||
564 | __le32 Flags; | ||
565 | __u32 Reserved2; | ||
566 | /* char * buffer[] */ | ||
567 | } __packed; | ||
568 | |||
480 | /* Currently defined values for close flags */ | 569 | /* Currently defined values for close flags */ |
481 | #define SMB2_CLOSE_FLAG_POSTQUERY_ATTRIB cpu_to_le16(0x0001) | 570 | #define SMB2_CLOSE_FLAG_POSTQUERY_ATTRIB cpu_to_le16(0x0001) |
482 | struct smb2_close_req { | 571 | struct smb2_close_req { |
@@ -517,17 +606,25 @@ struct smb2_flush_rsp { | |||
517 | __le16 Reserved; | 606 | __le16 Reserved; |
518 | } __packed; | 607 | } __packed; |
519 | 608 | ||
609 | /* For read request Flags field below, following flag is defined for SMB3.02 */ | ||
610 | #define SMB2_READFLAG_READ_UNBUFFERED 0x01 | ||
611 | |||
612 | /* Channel field for read and write: exactly one of following flags can be set*/ | ||
613 | #define SMB2_CHANNEL_NONE 0x00000000 | ||
614 | #define SMB2_CHANNEL_RDMA_V1 0x00000001 /* SMB3 or later */ | ||
615 | #define SMB2_CHANNEL_RDMA_V1_INVALIDATE 0x00000001 /* SMB3.02 or later */ | ||
616 | |||
520 | struct smb2_read_req { | 617 | struct smb2_read_req { |
521 | struct smb2_hdr hdr; | 618 | struct smb2_hdr hdr; |
522 | __le16 StructureSize; /* Must be 49 */ | 619 | __le16 StructureSize; /* Must be 49 */ |
523 | __u8 Padding; /* offset from start of SMB2 header to place read */ | 620 | __u8 Padding; /* offset from start of SMB2 header to place read */ |
524 | __u8 Reserved; | 621 | __u8 Flags; /* MBZ unless SMB3.02 or later */ |
525 | __le32 Length; | 622 | __le32 Length; |
526 | __le64 Offset; | 623 | __le64 Offset; |
527 | __u64 PersistentFileId; /* opaque endianness */ | 624 | __u64 PersistentFileId; /* opaque endianness */ |
528 | __u64 VolatileFileId; /* opaque endianness */ | 625 | __u64 VolatileFileId; /* opaque endianness */ |
529 | __le32 MinimumCount; | 626 | __le32 MinimumCount; |
530 | __le32 Channel; /* Reserved MBZ */ | 627 | __le32 Channel; /* MBZ except for SMB3 or later */ |
531 | __le32 RemainingBytes; | 628 | __le32 RemainingBytes; |
532 | __le16 ReadChannelInfoOffset; /* Reserved MBZ */ | 629 | __le16 ReadChannelInfoOffset; /* Reserved MBZ */ |
533 | __le16 ReadChannelInfoLength; /* Reserved MBZ */ | 630 | __le16 ReadChannelInfoLength; /* Reserved MBZ */ |
@@ -545,8 +642,9 @@ struct smb2_read_rsp { | |||
545 | __u8 Buffer[1]; | 642 | __u8 Buffer[1]; |
546 | } __packed; | 643 | } __packed; |
547 | 644 | ||
548 | /* For write request Flags field below the following flag is defined: */ | 645 | /* For write request Flags field below the following flags are defined: */ |
549 | #define SMB2_WRITEFLAG_WRITE_THROUGH 0x00000001 | 646 | #define SMB2_WRITEFLAG_WRITE_THROUGH 0x00000001 /* SMB2.1 or later */ |
647 | #define SMB2_WRITEFLAG_WRITE_UNBUFFERED 0x00000002 /* SMB3.02 or later */ | ||
550 | 648 | ||
551 | struct smb2_write_req { | 649 | struct smb2_write_req { |
552 | struct smb2_hdr hdr; | 650 | struct smb2_hdr hdr; |
diff --git a/fs/cifs/smb2proto.h b/fs/cifs/smb2proto.h index 2aa3535e38ce..1a5ecbed40ed 100644 --- a/fs/cifs/smb2proto.h +++ b/fs/cifs/smb2proto.h | |||
@@ -84,11 +84,9 @@ extern int smb2_create_hardlink(const unsigned int xid, struct cifs_tcon *tcon, | |||
84 | const char *from_name, const char *to_name, | 84 | const char *from_name, const char *to_name, |
85 | struct cifs_sb_info *cifs_sb); | 85 | struct cifs_sb_info *cifs_sb); |
86 | 86 | ||
87 | extern int smb2_open_file(const unsigned int xid, struct cifs_tcon *tcon, | 87 | extern int smb2_open_file(const unsigned int xid, |
88 | const char *full_path, int disposition, | 88 | struct cifs_open_parms *oparms, |
89 | int desired_access, int create_options, | 89 | __u32 *oplock, FILE_ALL_INFO *buf); |
90 | struct cifs_fid *fid, __u32 *oplock, | ||
91 | FILE_ALL_INFO *buf, struct cifs_sb_info *cifs_sb); | ||
92 | extern void smb2_set_oplock_level(struct cifsInodeInfo *cinode, __u32 oplock); | 90 | extern void smb2_set_oplock_level(struct cifsInodeInfo *cinode, __u32 oplock); |
93 | extern int smb2_unlock_range(struct cifsFileInfo *cfile, | 91 | extern int smb2_unlock_range(struct cifsFileInfo *cfile, |
94 | struct file_lock *flock, const unsigned int xid); | 92 | struct file_lock *flock, const unsigned int xid); |
@@ -106,11 +104,13 @@ extern int SMB2_tcon(const unsigned int xid, struct cifs_ses *ses, | |||
106 | const char *tree, struct cifs_tcon *tcon, | 104 | const char *tree, struct cifs_tcon *tcon, |
107 | const struct nls_table *); | 105 | const struct nls_table *); |
108 | extern int SMB2_tdis(const unsigned int xid, struct cifs_tcon *tcon); | 106 | extern int SMB2_tdis(const unsigned int xid, struct cifs_tcon *tcon); |
109 | extern int SMB2_open(const unsigned int xid, struct cifs_tcon *tcon, | 107 | extern int SMB2_open(const unsigned int xid, struct cifs_open_parms *oparms, |
110 | __le16 *path, u64 *persistent_fid, u64 *volatile_fid, | 108 | __le16 *path, __u8 *oplock, |
111 | __u32 desired_access, __u32 create_disposition, | 109 | struct smb2_file_all_info *buf); |
112 | __u32 file_attributes, __u32 create_options, | 110 | extern int SMB2_ioctl(const unsigned int xid, struct cifs_tcon *tcon, |
113 | __u8 *oplock, struct smb2_file_all_info *buf); | 111 | u64 persistent_fid, u64 volatile_fid, u32 opcode, |
112 | bool is_fsctl, char *in_data, u32 indatalen, | ||
113 | char **out_data, u32 *plen /* returned data len */); | ||
114 | extern int SMB2_close(const unsigned int xid, struct cifs_tcon *tcon, | 114 | extern int SMB2_close(const unsigned int xid, struct cifs_tcon *tcon, |
115 | u64 persistent_file_id, u64 volatile_file_id); | 115 | u64 persistent_file_id, u64 volatile_file_id); |
116 | extern int SMB2_flush(const unsigned int xid, struct cifs_tcon *tcon, | 116 | extern int SMB2_flush(const unsigned int xid, struct cifs_tcon *tcon, |
diff --git a/fs/cifs/smb2transport.c b/fs/cifs/smb2transport.c index 01f0ac800780..4f2300d020c7 100644 --- a/fs/cifs/smb2transport.c +++ b/fs/cifs/smb2transport.c | |||
@@ -39,6 +39,82 @@ | |||
39 | #include "smb2status.h" | 39 | #include "smb2status.h" |
40 | #include "smb2glob.h" | 40 | #include "smb2glob.h" |
41 | 41 | ||
42 | static int | ||
43 | smb2_crypto_shash_allocate(struct TCP_Server_Info *server) | ||
44 | { | ||
45 | int rc; | ||
46 | unsigned int size; | ||
47 | |||
48 | if (server->secmech.sdeschmacsha256 != NULL) | ||
49 | return 0; /* already allocated */ | ||
50 | |||
51 | server->secmech.hmacsha256 = crypto_alloc_shash("hmac(sha256)", 0, 0); | ||
52 | if (IS_ERR(server->secmech.hmacsha256)) { | ||
53 | cifs_dbg(VFS, "could not allocate crypto hmacsha256\n"); | ||
54 | rc = PTR_ERR(server->secmech.hmacsha256); | ||
55 | server->secmech.hmacsha256 = NULL; | ||
56 | return rc; | ||
57 | } | ||
58 | |||
59 | size = sizeof(struct shash_desc) + | ||
60 | crypto_shash_descsize(server->secmech.hmacsha256); | ||
61 | server->secmech.sdeschmacsha256 = kmalloc(size, GFP_KERNEL); | ||
62 | if (!server->secmech.sdeschmacsha256) { | ||
63 | crypto_free_shash(server->secmech.hmacsha256); | ||
64 | server->secmech.hmacsha256 = NULL; | ||
65 | return -ENOMEM; | ||
66 | } | ||
67 | server->secmech.sdeschmacsha256->shash.tfm = server->secmech.hmacsha256; | ||
68 | server->secmech.sdeschmacsha256->shash.flags = 0x0; | ||
69 | |||
70 | return 0; | ||
71 | } | ||
72 | |||
73 | static int | ||
74 | smb3_crypto_shash_allocate(struct TCP_Server_Info *server) | ||
75 | { | ||
76 | unsigned int size; | ||
77 | int rc; | ||
78 | |||
79 | if (server->secmech.sdesccmacaes != NULL) | ||
80 | return 0; /* already allocated */ | ||
81 | |||
82 | rc = smb2_crypto_shash_allocate(server); | ||
83 | if (rc) | ||
84 | return rc; | ||
85 | |||
86 | server->secmech.cmacaes = crypto_alloc_shash("cmac(aes)", 0, 0); | ||
87 | if (IS_ERR(server->secmech.cmacaes)) { | ||
88 | cifs_dbg(VFS, "could not allocate crypto cmac-aes"); | ||
89 | kfree(server->secmech.sdeschmacsha256); | ||
90 | server->secmech.sdeschmacsha256 = NULL; | ||
91 | crypto_free_shash(server->secmech.hmacsha256); | ||
92 | server->secmech.hmacsha256 = NULL; | ||
93 | rc = PTR_ERR(server->secmech.cmacaes); | ||
94 | server->secmech.cmacaes = NULL; | ||
95 | return rc; | ||
96 | } | ||
97 | |||
98 | size = sizeof(struct shash_desc) + | ||
99 | crypto_shash_descsize(server->secmech.cmacaes); | ||
100 | server->secmech.sdesccmacaes = kmalloc(size, GFP_KERNEL); | ||
101 | if (!server->secmech.sdesccmacaes) { | ||
102 | cifs_dbg(VFS, "%s: Can't alloc cmacaes\n", __func__); | ||
103 | kfree(server->secmech.sdeschmacsha256); | ||
104 | server->secmech.sdeschmacsha256 = NULL; | ||
105 | crypto_free_shash(server->secmech.hmacsha256); | ||
106 | crypto_free_shash(server->secmech.cmacaes); | ||
107 | server->secmech.hmacsha256 = NULL; | ||
108 | server->secmech.cmacaes = NULL; | ||
109 | return -ENOMEM; | ||
110 | } | ||
111 | server->secmech.sdesccmacaes->shash.tfm = server->secmech.cmacaes; | ||
112 | server->secmech.sdesccmacaes->shash.flags = 0x0; | ||
113 | |||
114 | return 0; | ||
115 | } | ||
116 | |||
117 | |||
42 | int | 118 | int |
43 | smb2_calc_signature(struct smb_rqst *rqst, struct TCP_Server_Info *server) | 119 | smb2_calc_signature(struct smb_rqst *rqst, struct TCP_Server_Info *server) |
44 | { | 120 | { |
@@ -52,6 +128,12 @@ smb2_calc_signature(struct smb_rqst *rqst, struct TCP_Server_Info *server) | |||
52 | memset(smb2_signature, 0x0, SMB2_HMACSHA256_SIZE); | 128 | memset(smb2_signature, 0x0, SMB2_HMACSHA256_SIZE); |
53 | memset(smb2_pdu->Signature, 0x0, SMB2_SIGNATURE_SIZE); | 129 | memset(smb2_pdu->Signature, 0x0, SMB2_SIGNATURE_SIZE); |
54 | 130 | ||
131 | rc = smb2_crypto_shash_allocate(server); | ||
132 | if (rc) { | ||
133 | cifs_dbg(VFS, "%s: shah256 alloc failed\n", __func__); | ||
134 | return rc; | ||
135 | } | ||
136 | |||
55 | rc = crypto_shash_setkey(server->secmech.hmacsha256, | 137 | rc = crypto_shash_setkey(server->secmech.hmacsha256, |
56 | server->session_key.response, SMB2_NTLMV2_SESSKEY_SIZE); | 138 | server->session_key.response, SMB2_NTLMV2_SESSKEY_SIZE); |
57 | if (rc) { | 139 | if (rc) { |
@@ -61,7 +143,7 @@ smb2_calc_signature(struct smb_rqst *rqst, struct TCP_Server_Info *server) | |||
61 | 143 | ||
62 | rc = crypto_shash_init(&server->secmech.sdeschmacsha256->shash); | 144 | rc = crypto_shash_init(&server->secmech.sdeschmacsha256->shash); |
63 | if (rc) { | 145 | if (rc) { |
64 | cifs_dbg(VFS, "%s: Could not init md5\n", __func__); | 146 | cifs_dbg(VFS, "%s: Could not init sha256", __func__); |
65 | return rc; | 147 | return rc; |
66 | } | 148 | } |
67 | 149 | ||
@@ -116,11 +198,166 @@ smb2_calc_signature(struct smb_rqst *rqst, struct TCP_Server_Info *server) | |||
116 | return rc; | 198 | return rc; |
117 | } | 199 | } |
118 | 200 | ||
201 | void | ||
202 | generate_smb3signingkey(struct TCP_Server_Info *server) | ||
203 | { | ||
204 | unsigned char zero = 0x0; | ||
205 | __u8 i[4] = {0, 0, 0, 1}; | ||
206 | __u8 L[4] = {0, 0, 0, 128}; | ||
207 | int rc = 0; | ||
208 | unsigned char prfhash[SMB2_HMACSHA256_SIZE]; | ||
209 | unsigned char *hashptr = prfhash; | ||
210 | |||
211 | memset(prfhash, 0x0, SMB2_HMACSHA256_SIZE); | ||
212 | memset(server->smb3signingkey, 0x0, SMB3_SIGNKEY_SIZE); | ||
213 | |||
214 | rc = smb3_crypto_shash_allocate(server); | ||
215 | if (rc) { | ||
216 | cifs_dbg(VFS, "%s: crypto alloc failed\n", __func__); | ||
217 | goto smb3signkey_ret; | ||
218 | } | ||
219 | |||
220 | rc = crypto_shash_setkey(server->secmech.hmacsha256, | ||
221 | server->session_key.response, SMB2_NTLMV2_SESSKEY_SIZE); | ||
222 | if (rc) { | ||
223 | cifs_dbg(VFS, "%s: Could not set with session key\n", __func__); | ||
224 | goto smb3signkey_ret; | ||
225 | } | ||
226 | |||
227 | rc = crypto_shash_init(&server->secmech.sdeschmacsha256->shash); | ||
228 | if (rc) { | ||
229 | cifs_dbg(VFS, "%s: Could not init sign hmac\n", __func__); | ||
230 | goto smb3signkey_ret; | ||
231 | } | ||
232 | |||
233 | rc = crypto_shash_update(&server->secmech.sdeschmacsha256->shash, | ||
234 | i, 4); | ||
235 | if (rc) { | ||
236 | cifs_dbg(VFS, "%s: Could not update with n\n", __func__); | ||
237 | goto smb3signkey_ret; | ||
238 | } | ||
239 | |||
240 | rc = crypto_shash_update(&server->secmech.sdeschmacsha256->shash, | ||
241 | "SMB2AESCMAC", 12); | ||
242 | if (rc) { | ||
243 | cifs_dbg(VFS, "%s: Could not update with label\n", __func__); | ||
244 | goto smb3signkey_ret; | ||
245 | } | ||
246 | |||
247 | rc = crypto_shash_update(&server->secmech.sdeschmacsha256->shash, | ||
248 | &zero, 1); | ||
249 | if (rc) { | ||
250 | cifs_dbg(VFS, "%s: Could not update with zero\n", __func__); | ||
251 | goto smb3signkey_ret; | ||
252 | } | ||
253 | |||
254 | rc = crypto_shash_update(&server->secmech.sdeschmacsha256->shash, | ||
255 | "SmbSign", 8); | ||
256 | if (rc) { | ||
257 | cifs_dbg(VFS, "%s: Could not update with context\n", __func__); | ||
258 | goto smb3signkey_ret; | ||
259 | } | ||
260 | |||
261 | rc = crypto_shash_update(&server->secmech.sdeschmacsha256->shash, | ||
262 | L, 4); | ||
263 | if (rc) { | ||
264 | cifs_dbg(VFS, "%s: Could not update with L\n", __func__); | ||
265 | goto smb3signkey_ret; | ||
266 | } | ||
267 | |||
268 | rc = crypto_shash_final(&server->secmech.sdeschmacsha256->shash, | ||
269 | hashptr); | ||
270 | if (rc) { | ||
271 | cifs_dbg(VFS, "%s: Could not generate sha256 hash\n", __func__); | ||
272 | goto smb3signkey_ret; | ||
273 | } | ||
274 | |||
275 | memcpy(server->smb3signingkey, hashptr, SMB3_SIGNKEY_SIZE); | ||
276 | |||
277 | smb3signkey_ret: | ||
278 | return; | ||
279 | } | ||
280 | |||
119 | int | 281 | int |
120 | smb3_calc_signature(struct smb_rqst *rqst, struct TCP_Server_Info *server) | 282 | smb3_calc_signature(struct smb_rqst *rqst, struct TCP_Server_Info *server) |
121 | { | 283 | { |
122 | cifs_dbg(FYI, "smb3 signatures not supported yet\n"); | 284 | int i, rc; |
123 | return -EOPNOTSUPP; | 285 | unsigned char smb3_signature[SMB2_CMACAES_SIZE]; |
286 | unsigned char *sigptr = smb3_signature; | ||
287 | struct kvec *iov = rqst->rq_iov; | ||
288 | int n_vec = rqst->rq_nvec; | ||
289 | struct smb2_hdr *smb2_pdu = (struct smb2_hdr *)iov[0].iov_base; | ||
290 | |||
291 | memset(smb3_signature, 0x0, SMB2_CMACAES_SIZE); | ||
292 | memset(smb2_pdu->Signature, 0x0, SMB2_SIGNATURE_SIZE); | ||
293 | |||
294 | rc = crypto_shash_setkey(server->secmech.cmacaes, | ||
295 | server->smb3signingkey, SMB2_CMACAES_SIZE); | ||
296 | if (rc) { | ||
297 | cifs_dbg(VFS, "%s: Could not set key for cmac aes\n", __func__); | ||
298 | return rc; | ||
299 | } | ||
300 | |||
301 | /* | ||
302 | * we already allocate sdesccmacaes when we init smb3 signing key, | ||
303 | * so unlike smb2 case we do not have to check here if secmech are | ||
304 | * initialized | ||
305 | */ | ||
306 | rc = crypto_shash_init(&server->secmech.sdesccmacaes->shash); | ||
307 | if (rc) { | ||
308 | cifs_dbg(VFS, "%s: Could not init cmac aes\n", __func__); | ||
309 | return rc; | ||
310 | } | ||
311 | |||
312 | for (i = 0; i < n_vec; i++) { | ||
313 | if (iov[i].iov_len == 0) | ||
314 | continue; | ||
315 | if (iov[i].iov_base == NULL) { | ||
316 | cifs_dbg(VFS, "null iovec entry"); | ||
317 | return -EIO; | ||
318 | } | ||
319 | /* | ||
320 | * The first entry includes a length field (which does not get | ||
321 | * signed that occupies the first 4 bytes before the header). | ||
322 | */ | ||
323 | if (i == 0) { | ||
324 | if (iov[0].iov_len <= 8) /* cmd field at offset 9 */ | ||
325 | break; /* nothing to sign or corrupt header */ | ||
326 | rc = | ||
327 | crypto_shash_update( | ||
328 | &server->secmech.sdesccmacaes->shash, | ||
329 | iov[i].iov_base + 4, iov[i].iov_len - 4); | ||
330 | } else { | ||
331 | rc = | ||
332 | crypto_shash_update( | ||
333 | &server->secmech.sdesccmacaes->shash, | ||
334 | iov[i].iov_base, iov[i].iov_len); | ||
335 | } | ||
336 | if (rc) { | ||
337 | cifs_dbg(VFS, "%s: Couldn't update cmac aes with payload\n", | ||
338 | __func__); | ||
339 | return rc; | ||
340 | } | ||
341 | } | ||
342 | |||
343 | /* now hash over the rq_pages array */ | ||
344 | for (i = 0; i < rqst->rq_npages; i++) { | ||
345 | struct kvec p_iov; | ||
346 | |||
347 | cifs_rqst_page_to_kvec(rqst, i, &p_iov); | ||
348 | crypto_shash_update(&server->secmech.sdesccmacaes->shash, | ||
349 | p_iov.iov_base, p_iov.iov_len); | ||
350 | kunmap(rqst->rq_pages[i]); | ||
351 | } | ||
352 | |||
353 | rc = crypto_shash_final(&server->secmech.sdesccmacaes->shash, | ||
354 | sigptr); | ||
355 | if (rc) | ||
356 | cifs_dbg(VFS, "%s: Could not generate cmac aes\n", __func__); | ||
357 | |||
358 | memcpy(smb2_pdu->Signature, sigptr, SMB2_SIGNATURE_SIZE); | ||
359 | |||
360 | return rc; | ||
124 | } | 361 | } |
125 | 362 | ||
126 | /* must be called with server->srv_mutex held */ | 363 | /* must be called with server->srv_mutex held */ |
@@ -275,8 +512,7 @@ smb2_check_receive(struct mid_q_entry *mid, struct TCP_Server_Info *server, | |||
275 | 512 | ||
276 | dump_smb(mid->resp_buf, min_t(u32, 80, len)); | 513 | dump_smb(mid->resp_buf, min_t(u32, 80, len)); |
277 | /* convert the length into a more usable form */ | 514 | /* convert the length into a more usable form */ |
278 | if ((len > 24) && | 515 | if (len > 24 && server->sign) { |
279 | (server->sec_mode & (SECMODE_SIGN_REQUIRED|SECMODE_SIGN_ENABLED))) { | ||
280 | int rc; | 516 | int rc; |
281 | 517 | ||
282 | rc = smb2_verify_signature(&rqst, server); | 518 | rc = smb2_verify_signature(&rqst, server); |
diff --git a/fs/cifs/smbfsctl.h b/fs/cifs/smbfsctl.h index 7056b891e087..d952ee48f4dc 100644 --- a/fs/cifs/smbfsctl.h +++ b/fs/cifs/smbfsctl.h | |||
@@ -1,7 +1,7 @@ | |||
1 | /* | 1 | /* |
2 | * fs/cifs/smbfsctl.h: SMB, CIFS, SMB2 FSCTL definitions | 2 | * fs/cifs/smbfsctl.h: SMB, CIFS, SMB2 FSCTL definitions |
3 | * | 3 | * |
4 | * Copyright (c) International Business Machines Corp., 2002,2009 | 4 | * Copyright (c) International Business Machines Corp., 2002,2013 |
5 | * Author(s): Steve French (sfrench@us.ibm.com) | 5 | * Author(s): Steve French (sfrench@us.ibm.com) |
6 | * | 6 | * |
7 | * This library is free software; you can redistribute it and/or modify | 7 | * This library is free software; you can redistribute it and/or modify |
@@ -22,7 +22,7 @@ | |||
22 | /* IOCTL information */ | 22 | /* IOCTL information */ |
23 | /* | 23 | /* |
24 | * List of ioctl/fsctl function codes that are or could be useful in the | 24 | * List of ioctl/fsctl function codes that are or could be useful in the |
25 | * future to remote clients like cifs or SMB2 client. There is probably | 25 | * future to remote clients like cifs or SMB2/SMB3 client. This is probably |
26 | * a slightly larger set of fsctls that NTFS local filesystem could handle, | 26 | * a slightly larger set of fsctls that NTFS local filesystem could handle, |
27 | * including the seven below that we do not have struct definitions for. | 27 | * including the seven below that we do not have struct definitions for. |
28 | * Even with protocol definitions for most of these now available, we still | 28 | * Even with protocol definitions for most of these now available, we still |
@@ -30,7 +30,13 @@ | |||
30 | * remotely. Some of the following, such as the encryption/compression ones | 30 | * remotely. Some of the following, such as the encryption/compression ones |
31 | * could be invoked from tools via a specialized hook into the VFS rather | 31 | * could be invoked from tools via a specialized hook into the VFS rather |
32 | * than via the standard vfs entry points | 32 | * than via the standard vfs entry points |
33 | * | ||
34 | * See MS-SMB2 Section 2.2.31 (last checked June 2013, all of that list are | ||
35 | * below). Additional detail on less common ones can be found in MS-FSCC | ||
36 | * section 2.3. | ||
33 | */ | 37 | */ |
38 | #define FSCTL_DFS_GET_REFERRALS 0x00060194 | ||
39 | #define FSCTL_DFS_GET_REFERRALS_EX 0x000601B0 | ||
34 | #define FSCTL_REQUEST_OPLOCK_LEVEL_1 0x00090000 | 40 | #define FSCTL_REQUEST_OPLOCK_LEVEL_1 0x00090000 |
35 | #define FSCTL_REQUEST_OPLOCK_LEVEL_2 0x00090004 | 41 | #define FSCTL_REQUEST_OPLOCK_LEVEL_2 0x00090004 |
36 | #define FSCTL_REQUEST_BATCH_OPLOCK 0x00090008 | 42 | #define FSCTL_REQUEST_BATCH_OPLOCK 0x00090008 |
@@ -71,14 +77,31 @@ | |||
71 | #define FSCTL_SET_SHORT_NAME_BEHAVIOR 0x000901B4 /* BB add struct */ | 77 | #define FSCTL_SET_SHORT_NAME_BEHAVIOR 0x000901B4 /* BB add struct */ |
72 | #define FSCTL_QUERY_ALLOCATED_RANGES 0x000940CF /* BB add struct */ | 78 | #define FSCTL_QUERY_ALLOCATED_RANGES 0x000940CF /* BB add struct */ |
73 | #define FSCTL_SET_DEFECT_MANAGEMENT 0x00098134 /* BB add struct */ | 79 | #define FSCTL_SET_DEFECT_MANAGEMENT 0x00098134 /* BB add struct */ |
80 | #define FSCTL_FILE_LEVEL_TRIM 0x00098208 /* BB add struct */ | ||
74 | #define FSCTL_SIS_LINK_FILES 0x0009C104 | 81 | #define FSCTL_SIS_LINK_FILES 0x0009C104 |
75 | #define FSCTL_PIPE_PEEK 0x0011400C /* BB add struct */ | 82 | #define FSCTL_PIPE_PEEK 0x0011400C /* BB add struct */ |
76 | #define FSCTL_PIPE_TRANSCEIVE 0x0011C017 /* BB add struct */ | 83 | #define FSCTL_PIPE_TRANSCEIVE 0x0011C017 /* BB add struct */ |
77 | /* strange that the number for this op is not sequential with previous op */ | 84 | /* strange that the number for this op is not sequential with previous op */ |
78 | #define FSCTL_PIPE_WAIT 0x00110018 /* BB add struct */ | 85 | #define FSCTL_PIPE_WAIT 0x00110018 /* BB add struct */ |
86 | /* Enumerate previous versions of a file */ | ||
87 | #define FSCTL_SRV_ENUMERATE_SNAPSHOTS 0x00144064 | ||
88 | /* Retrieve an opaque file reference for server-side data movement ie copy */ | ||
89 | #define FSCTL_SRV_REQUEST_RESUME_KEY 0x00140078 | ||
90 | #define FSCTL_LMR_REQUEST_RESILIENCY 0x001401D4 /* BB add struct */ | ||
79 | #define FSCTL_LMR_GET_LINK_TRACK_INF 0x001400E8 /* BB add struct */ | 91 | #define FSCTL_LMR_GET_LINK_TRACK_INF 0x001400E8 /* BB add struct */ |
80 | #define FSCTL_LMR_SET_LINK_TRACK_INF 0x001400EC /* BB add struct */ | 92 | #define FSCTL_LMR_SET_LINK_TRACK_INF 0x001400EC /* BB add struct */ |
93 | #define FSCTL_VALIDATE_NEGOTIATE_INFO 0x00140204 /* BB add struct */ | ||
94 | /* Perform server-side data movement */ | ||
95 | #define FSCTL_SRV_COPYCHUNK 0x001440F2 | ||
96 | #define FSCTL_SRV_COPYCHUNK_WRITE 0x001480F2 | ||
97 | #define FSCTL_QUERY_NETWORK_INTERFACE_INFO 0x001401FC /* BB add struct */ | ||
98 | #define FSCTL_SRV_READ_HASH 0x001441BB /* BB add struct */ | ||
81 | 99 | ||
82 | #define IO_REPARSE_TAG_MOUNT_POINT 0xA0000003 | 100 | #define IO_REPARSE_TAG_MOUNT_POINT 0xA0000003 |
83 | #define IO_REPARSE_TAG_HSM 0xC0000004 | 101 | #define IO_REPARSE_TAG_HSM 0xC0000004 |
84 | #define IO_REPARSE_TAG_SIS 0x80000007 | 102 | #define IO_REPARSE_TAG_SIS 0x80000007 |
103 | |||
104 | /* fsctl flags */ | ||
105 | /* If Flags is set to this value, the request is an FSCTL not ioctl request */ | ||
106 | #define SMB2_0_IOCTL_IS_FSCTL 0x00000001 | ||
107 | |||
diff --git a/fs/cifs/transport.c b/fs/cifs/transport.c index bfbf4700d160..6fdcb1b4a106 100644 --- a/fs/cifs/transport.c +++ b/fs/cifs/transport.c | |||
@@ -447,7 +447,7 @@ wait_for_response(struct TCP_Server_Info *server, struct mid_q_entry *midQ) | |||
447 | { | 447 | { |
448 | int error; | 448 | int error; |
449 | 449 | ||
450 | error = wait_event_freezekillable(server->response_q, | 450 | error = wait_event_freezekillable_unsafe(server->response_q, |
451 | midQ->mid_state != MID_REQUEST_SUBMITTED); | 451 | midQ->mid_state != MID_REQUEST_SUBMITTED); |
452 | if (error < 0) | 452 | if (error < 0) |
453 | return -ERESTARTSYS; | 453 | return -ERESTARTSYS; |
@@ -463,7 +463,7 @@ cifs_setup_async_request(struct TCP_Server_Info *server, struct smb_rqst *rqst) | |||
463 | struct mid_q_entry *mid; | 463 | struct mid_q_entry *mid; |
464 | 464 | ||
465 | /* enable signing if server requires it */ | 465 | /* enable signing if server requires it */ |
466 | if (server->sec_mode & (SECMODE_SIGN_REQUIRED | SECMODE_SIGN_ENABLED)) | 466 | if (server->sign) |
467 | hdr->Flags2 |= SMBFLG2_SECURITY_SIGNATURE; | 467 | hdr->Flags2 |= SMBFLG2_SECURITY_SIGNATURE; |
468 | 468 | ||
469 | mid = AllocMidQEntry(hdr, server); | 469 | mid = AllocMidQEntry(hdr, server); |
@@ -612,7 +612,7 @@ cifs_check_receive(struct mid_q_entry *mid, struct TCP_Server_Info *server, | |||
612 | dump_smb(mid->resp_buf, min_t(u32, 92, len)); | 612 | dump_smb(mid->resp_buf, min_t(u32, 92, len)); |
613 | 613 | ||
614 | /* convert the length into a more usable form */ | 614 | /* convert the length into a more usable form */ |
615 | if (server->sec_mode & (SECMODE_SIGN_REQUIRED | SECMODE_SIGN_ENABLED)) { | 615 | if (server->sign) { |
616 | struct kvec iov; | 616 | struct kvec iov; |
617 | int rc = 0; | 617 | int rc = 0; |
618 | struct smb_rqst rqst = { .rq_iov = &iov, | 618 | struct smb_rqst rqst = { .rq_iov = &iov, |
diff --git a/fs/coda/dir.c b/fs/coda/dir.c index b7d3a05c062c..190effc6a6fa 100644 --- a/fs/coda/dir.c +++ b/fs/coda/dir.c | |||
@@ -43,15 +43,14 @@ static int coda_rename(struct inode *old_inode, struct dentry *old_dentry, | |||
43 | struct inode *new_inode, struct dentry *new_dentry); | 43 | struct inode *new_inode, struct dentry *new_dentry); |
44 | 44 | ||
45 | /* dir file-ops */ | 45 | /* dir file-ops */ |
46 | static int coda_readdir(struct file *file, void *buf, filldir_t filldir); | 46 | static int coda_readdir(struct file *file, struct dir_context *ctx); |
47 | 47 | ||
48 | /* dentry ops */ | 48 | /* dentry ops */ |
49 | static int coda_dentry_revalidate(struct dentry *de, unsigned int flags); | 49 | static int coda_dentry_revalidate(struct dentry *de, unsigned int flags); |
50 | static int coda_dentry_delete(const struct dentry *); | 50 | static int coda_dentry_delete(const struct dentry *); |
51 | 51 | ||
52 | /* support routines */ | 52 | /* support routines */ |
53 | static int coda_venus_readdir(struct file *coda_file, void *buf, | 53 | static int coda_venus_readdir(struct file *, struct dir_context *); |
54 | filldir_t filldir); | ||
55 | 54 | ||
56 | /* same as fs/bad_inode.c */ | 55 | /* same as fs/bad_inode.c */ |
57 | static int coda_return_EIO(void) | 56 | static int coda_return_EIO(void) |
@@ -85,7 +84,7 @@ const struct inode_operations coda_dir_inode_operations = | |||
85 | const struct file_operations coda_dir_operations = { | 84 | const struct file_operations coda_dir_operations = { |
86 | .llseek = generic_file_llseek, | 85 | .llseek = generic_file_llseek, |
87 | .read = generic_read_dir, | 86 | .read = generic_read_dir, |
88 | .readdir = coda_readdir, | 87 | .iterate = coda_readdir, |
89 | .open = coda_open, | 88 | .open = coda_open, |
90 | .release = coda_release, | 89 | .release = coda_release, |
91 | .fsync = coda_fsync, | 90 | .fsync = coda_fsync, |
@@ -378,7 +377,7 @@ static int coda_rename(struct inode *old_dir, struct dentry *old_dentry, | |||
378 | 377 | ||
379 | 378 | ||
380 | /* file operations for directories */ | 379 | /* file operations for directories */ |
381 | static int coda_readdir(struct file *coda_file, void *buf, filldir_t filldir) | 380 | static int coda_readdir(struct file *coda_file, struct dir_context *ctx) |
382 | { | 381 | { |
383 | struct coda_file_info *cfi; | 382 | struct coda_file_info *cfi; |
384 | struct file *host_file; | 383 | struct file *host_file; |
@@ -391,30 +390,19 @@ static int coda_readdir(struct file *coda_file, void *buf, filldir_t filldir) | |||
391 | if (!host_file->f_op) | 390 | if (!host_file->f_op) |
392 | return -ENOTDIR; | 391 | return -ENOTDIR; |
393 | 392 | ||
394 | if (host_file->f_op->readdir) | 393 | if (host_file->f_op->iterate) { |
395 | { | ||
396 | /* potemkin case: we were handed a directory inode. | ||
397 | * We can't use vfs_readdir because we have to keep the file | ||
398 | * position in sync between the coda_file and the host_file. | ||
399 | * and as such we need grab the inode mutex. */ | ||
400 | struct inode *host_inode = file_inode(host_file); | 394 | struct inode *host_inode = file_inode(host_file); |
401 | |||
402 | mutex_lock(&host_inode->i_mutex); | 395 | mutex_lock(&host_inode->i_mutex); |
403 | host_file->f_pos = coda_file->f_pos; | ||
404 | |||
405 | ret = -ENOENT; | 396 | ret = -ENOENT; |
406 | if (!IS_DEADDIR(host_inode)) { | 397 | if (!IS_DEADDIR(host_inode)) { |
407 | ret = host_file->f_op->readdir(host_file, buf, filldir); | 398 | ret = host_file->f_op->iterate(host_file, ctx); |
408 | file_accessed(host_file); | 399 | file_accessed(host_file); |
409 | } | 400 | } |
410 | |||
411 | coda_file->f_pos = host_file->f_pos; | ||
412 | mutex_unlock(&host_inode->i_mutex); | 401 | mutex_unlock(&host_inode->i_mutex); |
402 | return ret; | ||
413 | } | 403 | } |
414 | else /* Venus: we must read Venus dirents from a file */ | 404 | /* Venus: we must read Venus dirents from a file */ |
415 | ret = coda_venus_readdir(coda_file, buf, filldir); | 405 | return coda_venus_readdir(coda_file, ctx); |
416 | |||
417 | return ret; | ||
418 | } | 406 | } |
419 | 407 | ||
420 | static inline unsigned int CDT2DT(unsigned char cdt) | 408 | static inline unsigned int CDT2DT(unsigned char cdt) |
@@ -437,10 +425,8 @@ static inline unsigned int CDT2DT(unsigned char cdt) | |||
437 | } | 425 | } |
438 | 426 | ||
439 | /* support routines */ | 427 | /* support routines */ |
440 | static int coda_venus_readdir(struct file *coda_file, void *buf, | 428 | static int coda_venus_readdir(struct file *coda_file, struct dir_context *ctx) |
441 | filldir_t filldir) | ||
442 | { | 429 | { |
443 | int result = 0; /* # of entries returned */ | ||
444 | struct coda_file_info *cfi; | 430 | struct coda_file_info *cfi; |
445 | struct coda_inode_info *cii; | 431 | struct coda_inode_info *cii; |
446 | struct file *host_file; | 432 | struct file *host_file; |
@@ -462,23 +448,12 @@ static int coda_venus_readdir(struct file *coda_file, void *buf, | |||
462 | vdir = kmalloc(sizeof(*vdir), GFP_KERNEL); | 448 | vdir = kmalloc(sizeof(*vdir), GFP_KERNEL); |
463 | if (!vdir) return -ENOMEM; | 449 | if (!vdir) return -ENOMEM; |
464 | 450 | ||
465 | if (coda_file->f_pos == 0) { | 451 | if (!dir_emit_dots(coda_file, ctx)) |
466 | ret = filldir(buf, ".", 1, 0, de->d_inode->i_ino, DT_DIR); | 452 | goto out; |
467 | if (ret < 0) | 453 | |
468 | goto out; | ||
469 | result++; | ||
470 | coda_file->f_pos++; | ||
471 | } | ||
472 | if (coda_file->f_pos == 1) { | ||
473 | ret = filldir(buf, "..", 2, 1, parent_ino(de), DT_DIR); | ||
474 | if (ret < 0) | ||
475 | goto out; | ||
476 | result++; | ||
477 | coda_file->f_pos++; | ||
478 | } | ||
479 | while (1) { | 454 | while (1) { |
480 | /* read entries from the directory file */ | 455 | /* read entries from the directory file */ |
481 | ret = kernel_read(host_file, coda_file->f_pos - 2, (char *)vdir, | 456 | ret = kernel_read(host_file, ctx->pos - 2, (char *)vdir, |
482 | sizeof(*vdir)); | 457 | sizeof(*vdir)); |
483 | if (ret < 0) { | 458 | if (ret < 0) { |
484 | printk(KERN_ERR "coda readdir: read dir %s failed %d\n", | 459 | printk(KERN_ERR "coda readdir: read dir %s failed %d\n", |
@@ -507,32 +482,23 @@ static int coda_venus_readdir(struct file *coda_file, void *buf, | |||
507 | 482 | ||
508 | /* Make sure we skip '.' and '..', we already got those */ | 483 | /* Make sure we skip '.' and '..', we already got those */ |
509 | if (name.name[0] == '.' && (name.len == 1 || | 484 | if (name.name[0] == '.' && (name.len == 1 || |
510 | (vdir->d_name[1] == '.' && name.len == 2))) | 485 | (name.name[1] == '.' && name.len == 2))) |
511 | vdir->d_fileno = name.len = 0; | 486 | vdir->d_fileno = name.len = 0; |
512 | 487 | ||
513 | /* skip null entries */ | 488 | /* skip null entries */ |
514 | if (vdir->d_fileno && name.len) { | 489 | if (vdir->d_fileno && name.len) { |
515 | /* try to look up this entry in the dcache, that way | 490 | ino = vdir->d_fileno; |
516 | * userspace doesn't have to worry about breaking | ||
517 | * getcwd by having mismatched inode numbers for | ||
518 | * internal volume mountpoints. */ | ||
519 | ino = find_inode_number(de, &name); | ||
520 | if (!ino) ino = vdir->d_fileno; | ||
521 | |||
522 | type = CDT2DT(vdir->d_type); | 491 | type = CDT2DT(vdir->d_type); |
523 | ret = filldir(buf, name.name, name.len, | 492 | if (!dir_emit(ctx, name.name, name.len, ino, type)) |
524 | coda_file->f_pos, ino, type); | 493 | break; |
525 | /* failure means no space for filling in this round */ | ||
526 | if (ret < 0) break; | ||
527 | result++; | ||
528 | } | 494 | } |
529 | /* we'll always have progress because d_reclen is unsigned and | 495 | /* we'll always have progress because d_reclen is unsigned and |
530 | * we've already established it is non-zero. */ | 496 | * we've already established it is non-zero. */ |
531 | coda_file->f_pos += vdir->d_reclen; | 497 | ctx->pos += vdir->d_reclen; |
532 | } | 498 | } |
533 | out: | 499 | out: |
534 | kfree(vdir); | 500 | kfree(vdir); |
535 | return result ? result : ret; | 501 | return 0; |
536 | } | 502 | } |
537 | 503 | ||
538 | /* called when a cache lookup succeeds */ | 504 | /* called when a cache lookup succeeds */ |
@@ -560,7 +526,7 @@ static int coda_dentry_revalidate(struct dentry *de, unsigned int flags) | |||
560 | if (cii->c_flags & C_FLUSH) | 526 | if (cii->c_flags & C_FLUSH) |
561 | coda_flag_inode_children(inode, C_FLUSH); | 527 | coda_flag_inode_children(inode, C_FLUSH); |
562 | 528 | ||
563 | if (de->d_count > 1) | 529 | if (d_count(de) > 1) |
564 | /* pretend it's valid, but don't change the flags */ | 530 | /* pretend it's valid, but don't change the flags */ |
565 | goto out; | 531 | goto out; |
566 | 532 | ||
diff --git a/fs/compat.c b/fs/compat.c index fc3b55dce184..6af20de2c1a3 100644 --- a/fs/compat.c +++ b/fs/compat.c | |||
@@ -832,6 +832,7 @@ struct compat_old_linux_dirent { | |||
832 | }; | 832 | }; |
833 | 833 | ||
834 | struct compat_readdir_callback { | 834 | struct compat_readdir_callback { |
835 | struct dir_context ctx; | ||
835 | struct compat_old_linux_dirent __user *dirent; | 836 | struct compat_old_linux_dirent __user *dirent; |
836 | int result; | 837 | int result; |
837 | }; | 838 | }; |
@@ -873,15 +874,15 @@ asmlinkage long compat_sys_old_readdir(unsigned int fd, | |||
873 | { | 874 | { |
874 | int error; | 875 | int error; |
875 | struct fd f = fdget(fd); | 876 | struct fd f = fdget(fd); |
876 | struct compat_readdir_callback buf; | 877 | struct compat_readdir_callback buf = { |
878 | .ctx.actor = compat_fillonedir, | ||
879 | .dirent = dirent | ||
880 | }; | ||
877 | 881 | ||
878 | if (!f.file) | 882 | if (!f.file) |
879 | return -EBADF; | 883 | return -EBADF; |
880 | 884 | ||
881 | buf.result = 0; | 885 | error = iterate_dir(f.file, &buf.ctx); |
882 | buf.dirent = dirent; | ||
883 | |||
884 | error = vfs_readdir(f.file, compat_fillonedir, &buf); | ||
885 | if (buf.result) | 886 | if (buf.result) |
886 | error = buf.result; | 887 | error = buf.result; |
887 | 888 | ||
@@ -897,6 +898,7 @@ struct compat_linux_dirent { | |||
897 | }; | 898 | }; |
898 | 899 | ||
899 | struct compat_getdents_callback { | 900 | struct compat_getdents_callback { |
901 | struct dir_context ctx; | ||
900 | struct compat_linux_dirent __user *current_dir; | 902 | struct compat_linux_dirent __user *current_dir; |
901 | struct compat_linux_dirent __user *previous; | 903 | struct compat_linux_dirent __user *previous; |
902 | int count; | 904 | int count; |
@@ -951,7 +953,11 @@ asmlinkage long compat_sys_getdents(unsigned int fd, | |||
951 | { | 953 | { |
952 | struct fd f; | 954 | struct fd f; |
953 | struct compat_linux_dirent __user * lastdirent; | 955 | struct compat_linux_dirent __user * lastdirent; |
954 | struct compat_getdents_callback buf; | 956 | struct compat_getdents_callback buf = { |
957 | .ctx.actor = compat_filldir, | ||
958 | .current_dir = dirent, | ||
959 | .count = count | ||
960 | }; | ||
955 | int error; | 961 | int error; |
956 | 962 | ||
957 | if (!access_ok(VERIFY_WRITE, dirent, count)) | 963 | if (!access_ok(VERIFY_WRITE, dirent, count)) |
@@ -961,17 +967,12 @@ asmlinkage long compat_sys_getdents(unsigned int fd, | |||
961 | if (!f.file) | 967 | if (!f.file) |
962 | return -EBADF; | 968 | return -EBADF; |
963 | 969 | ||
964 | buf.current_dir = dirent; | 970 | error = iterate_dir(f.file, &buf.ctx); |
965 | buf.previous = NULL; | ||
966 | buf.count = count; | ||
967 | buf.error = 0; | ||
968 | |||
969 | error = vfs_readdir(f.file, compat_filldir, &buf); | ||
970 | if (error >= 0) | 971 | if (error >= 0) |
971 | error = buf.error; | 972 | error = buf.error; |
972 | lastdirent = buf.previous; | 973 | lastdirent = buf.previous; |
973 | if (lastdirent) { | 974 | if (lastdirent) { |
974 | if (put_user(f.file->f_pos, &lastdirent->d_off)) | 975 | if (put_user(buf.ctx.pos, &lastdirent->d_off)) |
975 | error = -EFAULT; | 976 | error = -EFAULT; |
976 | else | 977 | else |
977 | error = count - buf.count; | 978 | error = count - buf.count; |
@@ -983,6 +984,7 @@ asmlinkage long compat_sys_getdents(unsigned int fd, | |||
983 | #ifndef __ARCH_OMIT_COMPAT_SYS_GETDENTS64 | 984 | #ifndef __ARCH_OMIT_COMPAT_SYS_GETDENTS64 |
984 | 985 | ||
985 | struct compat_getdents_callback64 { | 986 | struct compat_getdents_callback64 { |
987 | struct dir_context ctx; | ||
986 | struct linux_dirent64 __user *current_dir; | 988 | struct linux_dirent64 __user *current_dir; |
987 | struct linux_dirent64 __user *previous; | 989 | struct linux_dirent64 __user *previous; |
988 | int count; | 990 | int count; |
@@ -1036,7 +1038,11 @@ asmlinkage long compat_sys_getdents64(unsigned int fd, | |||
1036 | { | 1038 | { |
1037 | struct fd f; | 1039 | struct fd f; |
1038 | struct linux_dirent64 __user * lastdirent; | 1040 | struct linux_dirent64 __user * lastdirent; |
1039 | struct compat_getdents_callback64 buf; | 1041 | struct compat_getdents_callback64 buf = { |
1042 | .ctx.actor = compat_filldir64, | ||
1043 | .current_dir = dirent, | ||
1044 | .count = count | ||
1045 | }; | ||
1040 | int error; | 1046 | int error; |
1041 | 1047 | ||
1042 | if (!access_ok(VERIFY_WRITE, dirent, count)) | 1048 | if (!access_ok(VERIFY_WRITE, dirent, count)) |
@@ -1046,17 +1052,12 @@ asmlinkage long compat_sys_getdents64(unsigned int fd, | |||
1046 | if (!f.file) | 1052 | if (!f.file) |
1047 | return -EBADF; | 1053 | return -EBADF; |
1048 | 1054 | ||
1049 | buf.current_dir = dirent; | 1055 | error = iterate_dir(f.file, &buf.ctx); |
1050 | buf.previous = NULL; | ||
1051 | buf.count = count; | ||
1052 | buf.error = 0; | ||
1053 | |||
1054 | error = vfs_readdir(f.file, compat_filldir64, &buf); | ||
1055 | if (error >= 0) | 1056 | if (error >= 0) |
1056 | error = buf.error; | 1057 | error = buf.error; |
1057 | lastdirent = buf.previous; | 1058 | lastdirent = buf.previous; |
1058 | if (lastdirent) { | 1059 | if (lastdirent) { |
1059 | typeof(lastdirent->d_off) d_off = f.file->f_pos; | 1060 | typeof(lastdirent->d_off) d_off = buf.ctx.pos; |
1060 | if (__put_user_unaligned(d_off, &lastdirent->d_off)) | 1061 | if (__put_user_unaligned(d_off, &lastdirent->d_off)) |
1061 | error = -EFAULT; | 1062 | error = -EFAULT; |
1062 | else | 1063 | else |
diff --git a/fs/compat_ioctl.c b/fs/compat_ioctl.c index 996cdc5abb85..5d19acfa7c6c 100644 --- a/fs/compat_ioctl.c +++ b/fs/compat_ioctl.c | |||
@@ -66,7 +66,6 @@ | |||
66 | #include <linux/gigaset_dev.h> | 66 | #include <linux/gigaset_dev.h> |
67 | 67 | ||
68 | #ifdef CONFIG_BLOCK | 68 | #ifdef CONFIG_BLOCK |
69 | #include <linux/loop.h> | ||
70 | #include <linux/cdrom.h> | 69 | #include <linux/cdrom.h> |
71 | #include <linux/fd.h> | 70 | #include <linux/fd.h> |
72 | #include <scsi/scsi.h> | 71 | #include <scsi/scsi.h> |
@@ -954,8 +953,6 @@ COMPATIBLE_IOCTL(MTIOCTOP) | |||
954 | /* Socket level stuff */ | 953 | /* Socket level stuff */ |
955 | COMPATIBLE_IOCTL(FIOQSIZE) | 954 | COMPATIBLE_IOCTL(FIOQSIZE) |
956 | #ifdef CONFIG_BLOCK | 955 | #ifdef CONFIG_BLOCK |
957 | /* loop */ | ||
958 | IGNORE_IOCTL(LOOP_CLR_FD) | ||
959 | /* md calls this on random blockdevs */ | 956 | /* md calls this on random blockdevs */ |
960 | IGNORE_IOCTL(RAID_VERSION) | 957 | IGNORE_IOCTL(RAID_VERSION) |
961 | /* qemu/qemu-img might call these two on plain files for probing */ | 958 | /* qemu/qemu-img might call these two on plain files for probing */ |
diff --git a/fs/configfs/dir.c b/fs/configfs/dir.c index 7aabc6ad4e9b..277bd1be21fd 100644 --- a/fs/configfs/dir.c +++ b/fs/configfs/dir.c | |||
@@ -387,7 +387,7 @@ static void remove_dir(struct dentry * d) | |||
387 | if (d->d_inode) | 387 | if (d->d_inode) |
388 | simple_rmdir(parent->d_inode,d); | 388 | simple_rmdir(parent->d_inode,d); |
389 | 389 | ||
390 | pr_debug(" o %s removing done (%d)\n",d->d_name.name, d->d_count); | 390 | pr_debug(" o %s removing done (%d)\n",d->d_name.name, d_count(d)); |
391 | 391 | ||
392 | dput(parent); | 392 | dput(parent); |
393 | } | 393 | } |
@@ -660,19 +660,15 @@ static int create_default_group(struct config_group *parent_group, | |||
660 | struct config_group *group) | 660 | struct config_group *group) |
661 | { | 661 | { |
662 | int ret; | 662 | int ret; |
663 | struct qstr name; | ||
664 | struct configfs_dirent *sd; | 663 | struct configfs_dirent *sd; |
665 | /* We trust the caller holds a reference to parent */ | 664 | /* We trust the caller holds a reference to parent */ |
666 | struct dentry *child, *parent = parent_group->cg_item.ci_dentry; | 665 | struct dentry *child, *parent = parent_group->cg_item.ci_dentry; |
667 | 666 | ||
668 | if (!group->cg_item.ci_name) | 667 | if (!group->cg_item.ci_name) |
669 | group->cg_item.ci_name = group->cg_item.ci_namebuf; | 668 | group->cg_item.ci_name = group->cg_item.ci_namebuf; |
670 | name.name = group->cg_item.ci_name; | ||
671 | name.len = strlen(name.name); | ||
672 | name.hash = full_name_hash(name.name, name.len); | ||
673 | 669 | ||
674 | ret = -ENOMEM; | 670 | ret = -ENOMEM; |
675 | child = d_alloc(parent, &name); | 671 | child = d_alloc_name(parent, group->cg_item.ci_name); |
676 | if (child) { | 672 | if (child) { |
677 | d_add(child, NULL); | 673 | d_add(child, NULL); |
678 | 674 | ||
@@ -1532,84 +1528,66 @@ static inline unsigned char dt_type(struct configfs_dirent *sd) | |||
1532 | return (sd->s_mode >> 12) & 15; | 1528 | return (sd->s_mode >> 12) & 15; |
1533 | } | 1529 | } |
1534 | 1530 | ||
1535 | static int configfs_readdir(struct file * filp, void * dirent, filldir_t filldir) | 1531 | static int configfs_readdir(struct file *file, struct dir_context *ctx) |
1536 | { | 1532 | { |
1537 | struct dentry *dentry = filp->f_path.dentry; | 1533 | struct dentry *dentry = file->f_path.dentry; |
1538 | struct super_block *sb = dentry->d_sb; | 1534 | struct super_block *sb = dentry->d_sb; |
1539 | struct configfs_dirent * parent_sd = dentry->d_fsdata; | 1535 | struct configfs_dirent * parent_sd = dentry->d_fsdata; |
1540 | struct configfs_dirent *cursor = filp->private_data; | 1536 | struct configfs_dirent *cursor = file->private_data; |
1541 | struct list_head *p, *q = &cursor->s_sibling; | 1537 | struct list_head *p, *q = &cursor->s_sibling; |
1542 | ino_t ino = 0; | 1538 | ino_t ino = 0; |
1543 | int i = filp->f_pos; | ||
1544 | 1539 | ||
1545 | switch (i) { | 1540 | if (!dir_emit_dots(file, ctx)) |
1546 | case 0: | 1541 | return 0; |
1547 | ino = dentry->d_inode->i_ino; | 1542 | if (ctx->pos == 2) { |
1548 | if (filldir(dirent, ".", 1, i, ino, DT_DIR) < 0) | 1543 | spin_lock(&configfs_dirent_lock); |
1549 | break; | 1544 | list_move(q, &parent_sd->s_children); |
1550 | filp->f_pos++; | 1545 | spin_unlock(&configfs_dirent_lock); |
1551 | i++; | 1546 | } |
1552 | /* fallthrough */ | 1547 | for (p = q->next; p != &parent_sd->s_children; p = p->next) { |
1553 | case 1: | 1548 | struct configfs_dirent *next; |
1554 | ino = parent_ino(dentry); | 1549 | const char *name; |
1555 | if (filldir(dirent, "..", 2, i, ino, DT_DIR) < 0) | 1550 | int len; |
1556 | break; | 1551 | struct inode *inode = NULL; |
1557 | filp->f_pos++; | 1552 | |
1558 | i++; | 1553 | next = list_entry(p, struct configfs_dirent, s_sibling); |
1559 | /* fallthrough */ | 1554 | if (!next->s_element) |
1560 | default: | 1555 | continue; |
1561 | if (filp->f_pos == 2) { | ||
1562 | spin_lock(&configfs_dirent_lock); | ||
1563 | list_move(q, &parent_sd->s_children); | ||
1564 | spin_unlock(&configfs_dirent_lock); | ||
1565 | } | ||
1566 | for (p=q->next; p!= &parent_sd->s_children; p=p->next) { | ||
1567 | struct configfs_dirent *next; | ||
1568 | const char * name; | ||
1569 | int len; | ||
1570 | struct inode *inode = NULL; | ||
1571 | 1556 | ||
1572 | next = list_entry(p, struct configfs_dirent, | 1557 | name = configfs_get_name(next); |
1573 | s_sibling); | 1558 | len = strlen(name); |
1574 | if (!next->s_element) | ||
1575 | continue; | ||
1576 | |||
1577 | name = configfs_get_name(next); | ||
1578 | len = strlen(name); | ||
1579 | |||
1580 | /* | ||
1581 | * We'll have a dentry and an inode for | ||
1582 | * PINNED items and for open attribute | ||
1583 | * files. We lock here to prevent a race | ||
1584 | * with configfs_d_iput() clearing | ||
1585 | * s_dentry before calling iput(). | ||
1586 | * | ||
1587 | * Why do we go to the trouble? If | ||
1588 | * someone has an attribute file open, | ||
1589 | * the inode number should match until | ||
1590 | * they close it. Beyond that, we don't | ||
1591 | * care. | ||
1592 | */ | ||
1593 | spin_lock(&configfs_dirent_lock); | ||
1594 | dentry = next->s_dentry; | ||
1595 | if (dentry) | ||
1596 | inode = dentry->d_inode; | ||
1597 | if (inode) | ||
1598 | ino = inode->i_ino; | ||
1599 | spin_unlock(&configfs_dirent_lock); | ||
1600 | if (!inode) | ||
1601 | ino = iunique(sb, 2); | ||
1602 | 1559 | ||
1603 | if (filldir(dirent, name, len, filp->f_pos, ino, | 1560 | /* |
1604 | dt_type(next)) < 0) | 1561 | * We'll have a dentry and an inode for |
1605 | return 0; | 1562 | * PINNED items and for open attribute |
1563 | * files. We lock here to prevent a race | ||
1564 | * with configfs_d_iput() clearing | ||
1565 | * s_dentry before calling iput(). | ||
1566 | * | ||
1567 | * Why do we go to the trouble? If | ||
1568 | * someone has an attribute file open, | ||
1569 | * the inode number should match until | ||
1570 | * they close it. Beyond that, we don't | ||
1571 | * care. | ||
1572 | */ | ||
1573 | spin_lock(&configfs_dirent_lock); | ||
1574 | dentry = next->s_dentry; | ||
1575 | if (dentry) | ||
1576 | inode = dentry->d_inode; | ||
1577 | if (inode) | ||
1578 | ino = inode->i_ino; | ||
1579 | spin_unlock(&configfs_dirent_lock); | ||
1580 | if (!inode) | ||
1581 | ino = iunique(sb, 2); | ||
1606 | 1582 | ||
1607 | spin_lock(&configfs_dirent_lock); | 1583 | if (!dir_emit(ctx, name, len, ino, dt_type(next))) |
1608 | list_move(q, p); | 1584 | return 0; |
1609 | spin_unlock(&configfs_dirent_lock); | 1585 | |
1610 | p = q; | 1586 | spin_lock(&configfs_dirent_lock); |
1611 | filp->f_pos++; | 1587 | list_move(q, p); |
1612 | } | 1588 | spin_unlock(&configfs_dirent_lock); |
1589 | p = q; | ||
1590 | ctx->pos++; | ||
1613 | } | 1591 | } |
1614 | return 0; | 1592 | return 0; |
1615 | } | 1593 | } |
@@ -1661,14 +1639,13 @@ const struct file_operations configfs_dir_operations = { | |||
1661 | .release = configfs_dir_close, | 1639 | .release = configfs_dir_close, |
1662 | .llseek = configfs_dir_lseek, | 1640 | .llseek = configfs_dir_lseek, |
1663 | .read = generic_read_dir, | 1641 | .read = generic_read_dir, |
1664 | .readdir = configfs_readdir, | 1642 | .iterate = configfs_readdir, |
1665 | }; | 1643 | }; |
1666 | 1644 | ||
1667 | int configfs_register_subsystem(struct configfs_subsystem *subsys) | 1645 | int configfs_register_subsystem(struct configfs_subsystem *subsys) |
1668 | { | 1646 | { |
1669 | int err; | 1647 | int err; |
1670 | struct config_group *group = &subsys->su_group; | 1648 | struct config_group *group = &subsys->su_group; |
1671 | struct qstr name; | ||
1672 | struct dentry *dentry; | 1649 | struct dentry *dentry; |
1673 | struct dentry *root; | 1650 | struct dentry *root; |
1674 | struct configfs_dirent *sd; | 1651 | struct configfs_dirent *sd; |
@@ -1685,12 +1662,8 @@ int configfs_register_subsystem(struct configfs_subsystem *subsys) | |||
1685 | 1662 | ||
1686 | mutex_lock_nested(&root->d_inode->i_mutex, I_MUTEX_PARENT); | 1663 | mutex_lock_nested(&root->d_inode->i_mutex, I_MUTEX_PARENT); |
1687 | 1664 | ||
1688 | name.name = group->cg_item.ci_name; | ||
1689 | name.len = strlen(name.name); | ||
1690 | name.hash = full_name_hash(name.name, name.len); | ||
1691 | |||
1692 | err = -ENOMEM; | 1665 | err = -ENOMEM; |
1693 | dentry = d_alloc(root, &name); | 1666 | dentry = d_alloc_name(root, group->cg_item.ci_name); |
1694 | if (dentry) { | 1667 | if (dentry) { |
1695 | d_add(dentry, NULL); | 1668 | d_add(dentry, NULL); |
1696 | 1669 | ||
diff --git a/fs/configfs/file.c b/fs/configfs/file.c index 2b6cb23dd14e..1d1c41f1014d 100644 --- a/fs/configfs/file.c +++ b/fs/configfs/file.c | |||
@@ -203,7 +203,7 @@ configfs_write_file(struct file *file, const char __user *buf, size_t count, lof | |||
203 | mutex_lock(&buffer->mutex); | 203 | mutex_lock(&buffer->mutex); |
204 | len = fill_write_buffer(buffer, buf, count); | 204 | len = fill_write_buffer(buffer, buf, count); |
205 | if (len > 0) | 205 | if (len > 0) |
206 | len = flush_write_buffer(file->f_path.dentry, buffer, count); | 206 | len = flush_write_buffer(file->f_path.dentry, buffer, len); |
207 | if (len > 0) | 207 | if (len > 0) |
208 | *ppos += len; | 208 | *ppos += len; |
209 | mutex_unlock(&buffer->mutex); | 209 | mutex_unlock(&buffer->mutex); |
diff --git a/fs/coredump.c b/fs/coredump.c index dafafbafa731..72f816d6cad9 100644 --- a/fs/coredump.c +++ b/fs/coredump.c | |||
@@ -45,69 +45,79 @@ | |||
45 | #include <trace/events/sched.h> | 45 | #include <trace/events/sched.h> |
46 | 46 | ||
47 | int core_uses_pid; | 47 | int core_uses_pid; |
48 | char core_pattern[CORENAME_MAX_SIZE] = "core"; | ||
49 | unsigned int core_pipe_limit; | 48 | unsigned int core_pipe_limit; |
49 | char core_pattern[CORENAME_MAX_SIZE] = "core"; | ||
50 | static int core_name_size = CORENAME_MAX_SIZE; | ||
50 | 51 | ||
51 | struct core_name { | 52 | struct core_name { |
52 | char *corename; | 53 | char *corename; |
53 | int used, size; | 54 | int used, size; |
54 | }; | 55 | }; |
55 | static atomic_t call_count = ATOMIC_INIT(1); | ||
56 | 56 | ||
57 | /* The maximal length of core_pattern is also specified in sysctl.c */ | 57 | /* The maximal length of core_pattern is also specified in sysctl.c */ |
58 | 58 | ||
59 | static int expand_corename(struct core_name *cn) | 59 | static int expand_corename(struct core_name *cn, int size) |
60 | { | 60 | { |
61 | char *old_corename = cn->corename; | 61 | char *corename = krealloc(cn->corename, size, GFP_KERNEL); |
62 | |||
63 | cn->size = CORENAME_MAX_SIZE * atomic_inc_return(&call_count); | ||
64 | cn->corename = krealloc(old_corename, cn->size, GFP_KERNEL); | ||
65 | 62 | ||
66 | if (!cn->corename) { | 63 | if (!corename) |
67 | kfree(old_corename); | ||
68 | return -ENOMEM; | 64 | return -ENOMEM; |
69 | } | ||
70 | 65 | ||
66 | if (size > core_name_size) /* racy but harmless */ | ||
67 | core_name_size = size; | ||
68 | |||
69 | cn->size = ksize(corename); | ||
70 | cn->corename = corename; | ||
71 | return 0; | 71 | return 0; |
72 | } | 72 | } |
73 | 73 | ||
74 | static int cn_vprintf(struct core_name *cn, const char *fmt, va_list arg) | ||
75 | { | ||
76 | int free, need; | ||
77 | |||
78 | again: | ||
79 | free = cn->size - cn->used; | ||
80 | need = vsnprintf(cn->corename + cn->used, free, fmt, arg); | ||
81 | if (need < free) { | ||
82 | cn->used += need; | ||
83 | return 0; | ||
84 | } | ||
85 | |||
86 | if (!expand_corename(cn, cn->size + need - free + 1)) | ||
87 | goto again; | ||
88 | |||
89 | return -ENOMEM; | ||
90 | } | ||
91 | |||
74 | static int cn_printf(struct core_name *cn, const char *fmt, ...) | 92 | static int cn_printf(struct core_name *cn, const char *fmt, ...) |
75 | { | 93 | { |
76 | char *cur; | ||
77 | int need; | ||
78 | int ret; | ||
79 | va_list arg; | 94 | va_list arg; |
95 | int ret; | ||
80 | 96 | ||
81 | va_start(arg, fmt); | 97 | va_start(arg, fmt); |
82 | need = vsnprintf(NULL, 0, fmt, arg); | 98 | ret = cn_vprintf(cn, fmt, arg); |
83 | va_end(arg); | 99 | va_end(arg); |
84 | 100 | ||
85 | if (likely(need < cn->size - cn->used - 1)) | 101 | return ret; |
86 | goto out_printf; | 102 | } |
87 | 103 | ||
88 | ret = expand_corename(cn); | 104 | static int cn_esc_printf(struct core_name *cn, const char *fmt, ...) |
89 | if (ret) | 105 | { |
90 | goto expand_fail; | 106 | int cur = cn->used; |
107 | va_list arg; | ||
108 | int ret; | ||
91 | 109 | ||
92 | out_printf: | ||
93 | cur = cn->corename + cn->used; | ||
94 | va_start(arg, fmt); | 110 | va_start(arg, fmt); |
95 | vsnprintf(cur, need + 1, fmt, arg); | 111 | ret = cn_vprintf(cn, fmt, arg); |
96 | va_end(arg); | 112 | va_end(arg); |
97 | cn->used += need; | ||
98 | return 0; | ||
99 | 113 | ||
100 | expand_fail: | 114 | for (; cur < cn->used; ++cur) { |
115 | if (cn->corename[cur] == '/') | ||
116 | cn->corename[cur] = '!'; | ||
117 | } | ||
101 | return ret; | 118 | return ret; |
102 | } | 119 | } |
103 | 120 | ||
104 | static void cn_escape(char *str) | ||
105 | { | ||
106 | for (; *str; str++) | ||
107 | if (*str == '/') | ||
108 | *str = '!'; | ||
109 | } | ||
110 | |||
111 | static int cn_print_exe_file(struct core_name *cn) | 121 | static int cn_print_exe_file(struct core_name *cn) |
112 | { | 122 | { |
113 | struct file *exe_file; | 123 | struct file *exe_file; |
@@ -115,12 +125,8 @@ static int cn_print_exe_file(struct core_name *cn) | |||
115 | int ret; | 125 | int ret; |
116 | 126 | ||
117 | exe_file = get_mm_exe_file(current->mm); | 127 | exe_file = get_mm_exe_file(current->mm); |
118 | if (!exe_file) { | 128 | if (!exe_file) |
119 | char *commstart = cn->corename + cn->used; | 129 | return cn_esc_printf(cn, "%s (path unknown)", current->comm); |
120 | ret = cn_printf(cn, "%s (path unknown)", current->comm); | ||
121 | cn_escape(commstart); | ||
122 | return ret; | ||
123 | } | ||
124 | 130 | ||
125 | pathbuf = kmalloc(PATH_MAX, GFP_TEMPORARY); | 131 | pathbuf = kmalloc(PATH_MAX, GFP_TEMPORARY); |
126 | if (!pathbuf) { | 132 | if (!pathbuf) { |
@@ -134,9 +140,7 @@ static int cn_print_exe_file(struct core_name *cn) | |||
134 | goto free_buf; | 140 | goto free_buf; |
135 | } | 141 | } |
136 | 142 | ||
137 | cn_escape(path); | 143 | ret = cn_esc_printf(cn, "%s", path); |
138 | |||
139 | ret = cn_printf(cn, "%s", path); | ||
140 | 144 | ||
141 | free_buf: | 145 | free_buf: |
142 | kfree(pathbuf); | 146 | kfree(pathbuf); |
@@ -157,19 +161,19 @@ static int format_corename(struct core_name *cn, struct coredump_params *cprm) | |||
157 | int pid_in_pattern = 0; | 161 | int pid_in_pattern = 0; |
158 | int err = 0; | 162 | int err = 0; |
159 | 163 | ||
160 | cn->size = CORENAME_MAX_SIZE * atomic_read(&call_count); | ||
161 | cn->corename = kmalloc(cn->size, GFP_KERNEL); | ||
162 | cn->used = 0; | 164 | cn->used = 0; |
163 | 165 | cn->corename = NULL; | |
164 | if (!cn->corename) | 166 | if (expand_corename(cn, core_name_size)) |
165 | return -ENOMEM; | 167 | return -ENOMEM; |
168 | cn->corename[0] = '\0'; | ||
169 | |||
170 | if (ispipe) | ||
171 | ++pat_ptr; | ||
166 | 172 | ||
167 | /* Repeat as long as we have more pattern to process and more output | 173 | /* Repeat as long as we have more pattern to process and more output |
168 | space */ | 174 | space */ |
169 | while (*pat_ptr) { | 175 | while (*pat_ptr) { |
170 | if (*pat_ptr != '%') { | 176 | if (*pat_ptr != '%') { |
171 | if (*pat_ptr == 0) | ||
172 | goto out; | ||
173 | err = cn_printf(cn, "%c", *pat_ptr++); | 177 | err = cn_printf(cn, "%c", *pat_ptr++); |
174 | } else { | 178 | } else { |
175 | switch (*++pat_ptr) { | 179 | switch (*++pat_ptr) { |
@@ -210,22 +214,16 @@ static int format_corename(struct core_name *cn, struct coredump_params *cprm) | |||
210 | break; | 214 | break; |
211 | } | 215 | } |
212 | /* hostname */ | 216 | /* hostname */ |
213 | case 'h': { | 217 | case 'h': |
214 | char *namestart = cn->corename + cn->used; | ||
215 | down_read(&uts_sem); | 218 | down_read(&uts_sem); |
216 | err = cn_printf(cn, "%s", | 219 | err = cn_esc_printf(cn, "%s", |
217 | utsname()->nodename); | 220 | utsname()->nodename); |
218 | up_read(&uts_sem); | 221 | up_read(&uts_sem); |
219 | cn_escape(namestart); | ||
220 | break; | 222 | break; |
221 | } | ||
222 | /* executable */ | 223 | /* executable */ |
223 | case 'e': { | 224 | case 'e': |
224 | char *commstart = cn->corename + cn->used; | 225 | err = cn_esc_printf(cn, "%s", current->comm); |
225 | err = cn_printf(cn, "%s", current->comm); | ||
226 | cn_escape(commstart); | ||
227 | break; | 226 | break; |
228 | } | ||
229 | case 'E': | 227 | case 'E': |
230 | err = cn_print_exe_file(cn); | 228 | err = cn_print_exe_file(cn); |
231 | break; | 229 | break; |
@@ -244,6 +242,7 @@ static int format_corename(struct core_name *cn, struct coredump_params *cprm) | |||
244 | return err; | 242 | return err; |
245 | } | 243 | } |
246 | 244 | ||
245 | out: | ||
247 | /* Backward compatibility with core_uses_pid: | 246 | /* Backward compatibility with core_uses_pid: |
248 | * | 247 | * |
249 | * If core_pattern does not include a %p (as is the default) | 248 | * If core_pattern does not include a %p (as is the default) |
@@ -254,7 +253,6 @@ static int format_corename(struct core_name *cn, struct coredump_params *cprm) | |||
254 | if (err) | 253 | if (err) |
255 | return err; | 254 | return err; |
256 | } | 255 | } |
257 | out: | ||
258 | return ispipe; | 256 | return ispipe; |
259 | } | 257 | } |
260 | 258 | ||
@@ -549,7 +547,7 @@ void do_coredump(siginfo_t *siginfo) | |||
549 | if (ispipe < 0) { | 547 | if (ispipe < 0) { |
550 | printk(KERN_WARNING "format_corename failed\n"); | 548 | printk(KERN_WARNING "format_corename failed\n"); |
551 | printk(KERN_WARNING "Aborting core\n"); | 549 | printk(KERN_WARNING "Aborting core\n"); |
552 | goto fail_corename; | 550 | goto fail_unlock; |
553 | } | 551 | } |
554 | 552 | ||
555 | if (cprm.limit == 1) { | 553 | if (cprm.limit == 1) { |
@@ -584,7 +582,7 @@ void do_coredump(siginfo_t *siginfo) | |||
584 | goto fail_dropcount; | 582 | goto fail_dropcount; |
585 | } | 583 | } |
586 | 584 | ||
587 | helper_argv = argv_split(GFP_KERNEL, cn.corename+1, NULL); | 585 | helper_argv = argv_split(GFP_KERNEL, cn.corename, NULL); |
588 | if (!helper_argv) { | 586 | if (!helper_argv) { |
589 | printk(KERN_WARNING "%s failed to allocate memory\n", | 587 | printk(KERN_WARNING "%s failed to allocate memory\n", |
590 | __func__); | 588 | __func__); |
@@ -601,7 +599,7 @@ void do_coredump(siginfo_t *siginfo) | |||
601 | 599 | ||
602 | argv_free(helper_argv); | 600 | argv_free(helper_argv); |
603 | if (retval) { | 601 | if (retval) { |
604 | printk(KERN_INFO "Core dump to %s pipe failed\n", | 602 | printk(KERN_INFO "Core dump to |%s pipe failed\n", |
605 | cn.corename); | 603 | cn.corename); |
606 | goto close_fail; | 604 | goto close_fail; |
607 | } | 605 | } |
@@ -669,7 +667,6 @@ fail_dropcount: | |||
669 | atomic_dec(&core_dump_count); | 667 | atomic_dec(&core_dump_count); |
670 | fail_unlock: | 668 | fail_unlock: |
671 | kfree(cn.corename); | 669 | kfree(cn.corename); |
672 | fail_corename: | ||
673 | coredump_finish(mm, core_dumped); | 670 | coredump_finish(mm, core_dumped); |
674 | revert_creds(old_cred); | 671 | revert_creds(old_cred); |
675 | fail_creds: | 672 | fail_creds: |
diff --git a/fs/cramfs/inode.c b/fs/cramfs/inode.c index 35b1c7bd18b7..e501ac3a49ff 100644 --- a/fs/cramfs/inode.c +++ b/fs/cramfs/inode.c | |||
@@ -349,18 +349,17 @@ static int cramfs_statfs(struct dentry *dentry, struct kstatfs *buf) | |||
349 | /* | 349 | /* |
350 | * Read a cramfs directory entry. | 350 | * Read a cramfs directory entry. |
351 | */ | 351 | */ |
352 | static int cramfs_readdir(struct file *filp, void *dirent, filldir_t filldir) | 352 | static int cramfs_readdir(struct file *file, struct dir_context *ctx) |
353 | { | 353 | { |
354 | struct inode *inode = file_inode(filp); | 354 | struct inode *inode = file_inode(file); |
355 | struct super_block *sb = inode->i_sb; | 355 | struct super_block *sb = inode->i_sb; |
356 | char *buf; | 356 | char *buf; |
357 | unsigned int offset; | 357 | unsigned int offset; |
358 | int copied; | ||
359 | 358 | ||
360 | /* Offset within the thing. */ | 359 | /* Offset within the thing. */ |
361 | offset = filp->f_pos; | 360 | if (ctx->pos >= inode->i_size) |
362 | if (offset >= inode->i_size) | ||
363 | return 0; | 361 | return 0; |
362 | offset = ctx->pos; | ||
364 | /* Directory entries are always 4-byte aligned */ | 363 | /* Directory entries are always 4-byte aligned */ |
365 | if (offset & 3) | 364 | if (offset & 3) |
366 | return -EINVAL; | 365 | return -EINVAL; |
@@ -369,14 +368,13 @@ static int cramfs_readdir(struct file *filp, void *dirent, filldir_t filldir) | |||
369 | if (!buf) | 368 | if (!buf) |
370 | return -ENOMEM; | 369 | return -ENOMEM; |
371 | 370 | ||
372 | copied = 0; | ||
373 | while (offset < inode->i_size) { | 371 | while (offset < inode->i_size) { |
374 | struct cramfs_inode *de; | 372 | struct cramfs_inode *de; |
375 | unsigned long nextoffset; | 373 | unsigned long nextoffset; |
376 | char *name; | 374 | char *name; |
377 | ino_t ino; | 375 | ino_t ino; |
378 | umode_t mode; | 376 | umode_t mode; |
379 | int namelen, error; | 377 | int namelen; |
380 | 378 | ||
381 | mutex_lock(&read_mutex); | 379 | mutex_lock(&read_mutex); |
382 | de = cramfs_read(sb, OFFSET(inode) + offset, sizeof(*de)+CRAMFS_MAXPATHLEN); | 380 | de = cramfs_read(sb, OFFSET(inode) + offset, sizeof(*de)+CRAMFS_MAXPATHLEN); |
@@ -402,13 +400,10 @@ static int cramfs_readdir(struct file *filp, void *dirent, filldir_t filldir) | |||
402 | break; | 400 | break; |
403 | namelen--; | 401 | namelen--; |
404 | } | 402 | } |
405 | error = filldir(dirent, buf, namelen, offset, ino, mode >> 12); | 403 | if (!dir_emit(ctx, buf, namelen, ino, mode >> 12)) |
406 | if (error) | ||
407 | break; | 404 | break; |
408 | 405 | ||
409 | offset = nextoffset; | 406 | ctx->pos = offset = nextoffset; |
410 | filp->f_pos = offset; | ||
411 | copied++; | ||
412 | } | 407 | } |
413 | kfree(buf); | 408 | kfree(buf); |
414 | return 0; | 409 | return 0; |
@@ -547,7 +542,7 @@ static const struct address_space_operations cramfs_aops = { | |||
547 | static const struct file_operations cramfs_directory_operations = { | 542 | static const struct file_operations cramfs_directory_operations = { |
548 | .llseek = generic_file_llseek, | 543 | .llseek = generic_file_llseek, |
549 | .read = generic_read_dir, | 544 | .read = generic_read_dir, |
550 | .readdir = cramfs_readdir, | 545 | .iterate = cramfs_readdir, |
551 | }; | 546 | }; |
552 | 547 | ||
553 | static const struct inode_operations cramfs_dir_inode_operations = { | 548 | static const struct inode_operations cramfs_dir_inode_operations = { |
diff --git a/fs/dcache.c b/fs/dcache.c index f09b9085f7d8..87bdb5329c3c 100644 --- a/fs/dcache.c +++ b/fs/dcache.c | |||
@@ -1612,6 +1612,10 @@ EXPORT_SYMBOL(d_obtain_alias); | |||
1612 | * If a dentry was found and moved, then it is returned. Otherwise NULL | 1612 | * If a dentry was found and moved, then it is returned. Otherwise NULL |
1613 | * is returned. This matches the expected return value of ->lookup. | 1613 | * is returned. This matches the expected return value of ->lookup. |
1614 | * | 1614 | * |
1615 | * Cluster filesystems may call this function with a negative, hashed dentry. | ||
1616 | * In that case, we know that the inode will be a regular file, and also this | ||
1617 | * will only occur during atomic_open. So we need to check for the dentry | ||
1618 | * being already hashed only in the final case. | ||
1615 | */ | 1619 | */ |
1616 | struct dentry *d_splice_alias(struct inode *inode, struct dentry *dentry) | 1620 | struct dentry *d_splice_alias(struct inode *inode, struct dentry *dentry) |
1617 | { | 1621 | { |
@@ -1636,8 +1640,11 @@ struct dentry *d_splice_alias(struct inode *inode, struct dentry *dentry) | |||
1636 | security_d_instantiate(dentry, inode); | 1640 | security_d_instantiate(dentry, inode); |
1637 | d_rehash(dentry); | 1641 | d_rehash(dentry); |
1638 | } | 1642 | } |
1639 | } else | 1643 | } else { |
1640 | d_add(dentry, inode); | 1644 | d_instantiate(dentry, inode); |
1645 | if (d_unhashed(dentry)) | ||
1646 | d_rehash(dentry); | ||
1647 | } | ||
1641 | return new; | 1648 | return new; |
1642 | } | 1649 | } |
1643 | EXPORT_SYMBOL(d_splice_alias); | 1650 | EXPORT_SYMBOL(d_splice_alias); |
@@ -1723,7 +1730,7 @@ EXPORT_SYMBOL(d_add_ci); | |||
1723 | * Do the slow-case of the dentry name compare. | 1730 | * Do the slow-case of the dentry name compare. |
1724 | * | 1731 | * |
1725 | * Unlike the dentry_cmp() function, we need to atomically | 1732 | * Unlike the dentry_cmp() function, we need to atomically |
1726 | * load the name, length and inode information, so that the | 1733 | * load the name and length information, so that the |
1727 | * filesystem can rely on them, and can use the 'name' and | 1734 | * filesystem can rely on them, and can use the 'name' and |
1728 | * 'len' information without worrying about walking off the | 1735 | * 'len' information without worrying about walking off the |
1729 | * end of memory etc. | 1736 | * end of memory etc. |
@@ -1741,22 +1748,18 @@ enum slow_d_compare { | |||
1741 | 1748 | ||
1742 | static noinline enum slow_d_compare slow_dentry_cmp( | 1749 | static noinline enum slow_d_compare slow_dentry_cmp( |
1743 | const struct dentry *parent, | 1750 | const struct dentry *parent, |
1744 | struct inode *inode, | ||
1745 | struct dentry *dentry, | 1751 | struct dentry *dentry, |
1746 | unsigned int seq, | 1752 | unsigned int seq, |
1747 | const struct qstr *name) | 1753 | const struct qstr *name) |
1748 | { | 1754 | { |
1749 | int tlen = dentry->d_name.len; | 1755 | int tlen = dentry->d_name.len; |
1750 | const char *tname = dentry->d_name.name; | 1756 | const char *tname = dentry->d_name.name; |
1751 | struct inode *i = dentry->d_inode; | ||
1752 | 1757 | ||
1753 | if (read_seqcount_retry(&dentry->d_seq, seq)) { | 1758 | if (read_seqcount_retry(&dentry->d_seq, seq)) { |
1754 | cpu_relax(); | 1759 | cpu_relax(); |
1755 | return D_COMP_SEQRETRY; | 1760 | return D_COMP_SEQRETRY; |
1756 | } | 1761 | } |
1757 | if (parent->d_op->d_compare(parent, inode, | 1762 | if (parent->d_op->d_compare(parent, dentry, tlen, tname, name)) |
1758 | dentry, i, | ||
1759 | tlen, tname, name)) | ||
1760 | return D_COMP_NOMATCH; | 1763 | return D_COMP_NOMATCH; |
1761 | return D_COMP_OK; | 1764 | return D_COMP_OK; |
1762 | } | 1765 | } |
@@ -1766,7 +1769,6 @@ static noinline enum slow_d_compare slow_dentry_cmp( | |||
1766 | * @parent: parent dentry | 1769 | * @parent: parent dentry |
1767 | * @name: qstr of name we wish to find | 1770 | * @name: qstr of name we wish to find |
1768 | * @seqp: returns d_seq value at the point where the dentry was found | 1771 | * @seqp: returns d_seq value at the point where the dentry was found |
1769 | * @inode: returns dentry->d_inode when the inode was found valid. | ||
1770 | * Returns: dentry, or NULL | 1772 | * Returns: dentry, or NULL |
1771 | * | 1773 | * |
1772 | * __d_lookup_rcu is the dcache lookup function for rcu-walk name | 1774 | * __d_lookup_rcu is the dcache lookup function for rcu-walk name |
@@ -1793,7 +1795,7 @@ static noinline enum slow_d_compare slow_dentry_cmp( | |||
1793 | */ | 1795 | */ |
1794 | struct dentry *__d_lookup_rcu(const struct dentry *parent, | 1796 | struct dentry *__d_lookup_rcu(const struct dentry *parent, |
1795 | const struct qstr *name, | 1797 | const struct qstr *name, |
1796 | unsigned *seqp, struct inode *inode) | 1798 | unsigned *seqp) |
1797 | { | 1799 | { |
1798 | u64 hashlen = name->hash_len; | 1800 | u64 hashlen = name->hash_len; |
1799 | const unsigned char *str = name->name; | 1801 | const unsigned char *str = name->name; |
@@ -1827,11 +1829,10 @@ struct dentry *__d_lookup_rcu(const struct dentry *parent, | |||
1827 | seqretry: | 1829 | seqretry: |
1828 | /* | 1830 | /* |
1829 | * The dentry sequence count protects us from concurrent | 1831 | * The dentry sequence count protects us from concurrent |
1830 | * renames, and thus protects inode, parent and name fields. | 1832 | * renames, and thus protects parent and name fields. |
1831 | * | 1833 | * |
1832 | * The caller must perform a seqcount check in order | 1834 | * The caller must perform a seqcount check in order |
1833 | * to do anything useful with the returned dentry, | 1835 | * to do anything useful with the returned dentry. |
1834 | * including using the 'd_inode' pointer. | ||
1835 | * | 1836 | * |
1836 | * NOTE! We do a "raw" seqcount_begin here. That means that | 1837 | * NOTE! We do a "raw" seqcount_begin here. That means that |
1837 | * we don't wait for the sequence count to stabilize if it | 1838 | * we don't wait for the sequence count to stabilize if it |
@@ -1845,12 +1846,12 @@ seqretry: | |||
1845 | continue; | 1846 | continue; |
1846 | if (d_unhashed(dentry)) | 1847 | if (d_unhashed(dentry)) |
1847 | continue; | 1848 | continue; |
1848 | *seqp = seq; | ||
1849 | 1849 | ||
1850 | if (unlikely(parent->d_flags & DCACHE_OP_COMPARE)) { | 1850 | if (unlikely(parent->d_flags & DCACHE_OP_COMPARE)) { |
1851 | if (dentry->d_name.hash != hashlen_hash(hashlen)) | 1851 | if (dentry->d_name.hash != hashlen_hash(hashlen)) |
1852 | continue; | 1852 | continue; |
1853 | switch (slow_dentry_cmp(parent, inode, dentry, seq, name)) { | 1853 | *seqp = seq; |
1854 | switch (slow_dentry_cmp(parent, dentry, seq, name)) { | ||
1854 | case D_COMP_OK: | 1855 | case D_COMP_OK: |
1855 | return dentry; | 1856 | return dentry; |
1856 | case D_COMP_NOMATCH: | 1857 | case D_COMP_NOMATCH: |
@@ -1862,6 +1863,7 @@ seqretry: | |||
1862 | 1863 | ||
1863 | if (dentry->d_name.hash_len != hashlen) | 1864 | if (dentry->d_name.hash_len != hashlen) |
1864 | continue; | 1865 | continue; |
1866 | *seqp = seq; | ||
1865 | if (!dentry_cmp(dentry, str, hashlen_len(hashlen))) | 1867 | if (!dentry_cmp(dentry, str, hashlen_len(hashlen))) |
1866 | return dentry; | 1868 | return dentry; |
1867 | } | 1869 | } |
@@ -1959,9 +1961,7 @@ struct dentry *__d_lookup(const struct dentry *parent, const struct qstr *name) | |||
1959 | if (parent->d_flags & DCACHE_OP_COMPARE) { | 1961 | if (parent->d_flags & DCACHE_OP_COMPARE) { |
1960 | int tlen = dentry->d_name.len; | 1962 | int tlen = dentry->d_name.len; |
1961 | const char *tname = dentry->d_name.name; | 1963 | const char *tname = dentry->d_name.name; |
1962 | if (parent->d_op->d_compare(parent, parent->d_inode, | 1964 | if (parent->d_op->d_compare(parent, dentry, tlen, tname, name)) |
1963 | dentry, dentry->d_inode, | ||
1964 | tlen, tname, name)) | ||
1965 | goto next; | 1965 | goto next; |
1966 | } else { | 1966 | } else { |
1967 | if (dentry->d_name.len != len) | 1967 | if (dentry->d_name.len != len) |
@@ -1998,7 +1998,7 @@ struct dentry *d_hash_and_lookup(struct dentry *dir, struct qstr *name) | |||
1998 | */ | 1998 | */ |
1999 | name->hash = full_name_hash(name->name, name->len); | 1999 | name->hash = full_name_hash(name->name, name->len); |
2000 | if (dir->d_flags & DCACHE_OP_HASH) { | 2000 | if (dir->d_flags & DCACHE_OP_HASH) { |
2001 | int err = dir->d_op->d_hash(dir, dir->d_inode, name); | 2001 | int err = dir->d_op->d_hash(dir, name); |
2002 | if (unlikely(err < 0)) | 2002 | if (unlikely(err < 0)) |
2003 | return ERR_PTR(err); | 2003 | return ERR_PTR(err); |
2004 | } | 2004 | } |
@@ -2968,34 +2968,21 @@ rename_retry: | |||
2968 | goto again; | 2968 | goto again; |
2969 | } | 2969 | } |
2970 | 2970 | ||
2971 | /** | 2971 | void d_tmpfile(struct dentry *dentry, struct inode *inode) |
2972 | * find_inode_number - check for dentry with name | ||
2973 | * @dir: directory to check | ||
2974 | * @name: Name to find. | ||
2975 | * | ||
2976 | * Check whether a dentry already exists for the given name, | ||
2977 | * and return the inode number if it has an inode. Otherwise | ||
2978 | * 0 is returned. | ||
2979 | * | ||
2980 | * This routine is used to post-process directory listings for | ||
2981 | * filesystems using synthetic inode numbers, and is necessary | ||
2982 | * to keep getcwd() working. | ||
2983 | */ | ||
2984 | |||
2985 | ino_t find_inode_number(struct dentry *dir, struct qstr *name) | ||
2986 | { | 2972 | { |
2987 | struct dentry * dentry; | 2973 | inode_dec_link_count(inode); |
2988 | ino_t ino = 0; | 2974 | BUG_ON(dentry->d_name.name != dentry->d_iname || |
2989 | 2975 | !hlist_unhashed(&dentry->d_alias) || | |
2990 | dentry = d_hash_and_lookup(dir, name); | 2976 | !d_unlinked(dentry)); |
2991 | if (!IS_ERR_OR_NULL(dentry)) { | 2977 | spin_lock(&dentry->d_parent->d_lock); |
2992 | if (dentry->d_inode) | 2978 | spin_lock_nested(&dentry->d_lock, DENTRY_D_LOCK_NESTED); |
2993 | ino = dentry->d_inode->i_ino; | 2979 | dentry->d_name.len = sprintf(dentry->d_iname, "#%llu", |
2994 | dput(dentry); | 2980 | (unsigned long long)inode->i_ino); |
2995 | } | 2981 | spin_unlock(&dentry->d_lock); |
2996 | return ino; | 2982 | spin_unlock(&dentry->d_parent->d_lock); |
2983 | d_instantiate(dentry, inode); | ||
2997 | } | 2984 | } |
2998 | EXPORT_SYMBOL(find_inode_number); | 2985 | EXPORT_SYMBOL(d_tmpfile); |
2999 | 2986 | ||
3000 | static __initdata unsigned long dhash_entries; | 2987 | static __initdata unsigned long dhash_entries; |
3001 | static int __init set_dhash_entries(char *str) | 2988 | static int __init set_dhash_entries(char *str) |
diff --git a/fs/debugfs/file.c b/fs/debugfs/file.c index c5ca6ae5a30c..63146295153b 100644 --- a/fs/debugfs/file.c +++ b/fs/debugfs/file.c | |||
@@ -21,6 +21,7 @@ | |||
21 | #include <linux/debugfs.h> | 21 | #include <linux/debugfs.h> |
22 | #include <linux/io.h> | 22 | #include <linux/io.h> |
23 | #include <linux/slab.h> | 23 | #include <linux/slab.h> |
24 | #include <linux/atomic.h> | ||
24 | 25 | ||
25 | static ssize_t default_read_file(struct file *file, char __user *buf, | 26 | static ssize_t default_read_file(struct file *file, char __user *buf, |
26 | size_t count, loff_t *ppos) | 27 | size_t count, loff_t *ppos) |
@@ -403,6 +404,47 @@ struct dentry *debugfs_create_size_t(const char *name, umode_t mode, | |||
403 | } | 404 | } |
404 | EXPORT_SYMBOL_GPL(debugfs_create_size_t); | 405 | EXPORT_SYMBOL_GPL(debugfs_create_size_t); |
405 | 406 | ||
407 | static int debugfs_atomic_t_set(void *data, u64 val) | ||
408 | { | ||
409 | atomic_set((atomic_t *)data, val); | ||
410 | return 0; | ||
411 | } | ||
412 | static int debugfs_atomic_t_get(void *data, u64 *val) | ||
413 | { | ||
414 | *val = atomic_read((atomic_t *)data); | ||
415 | return 0; | ||
416 | } | ||
417 | DEFINE_SIMPLE_ATTRIBUTE(fops_atomic_t, debugfs_atomic_t_get, | ||
418 | debugfs_atomic_t_set, "%lld\n"); | ||
419 | DEFINE_SIMPLE_ATTRIBUTE(fops_atomic_t_ro, debugfs_atomic_t_get, NULL, "%lld\n"); | ||
420 | DEFINE_SIMPLE_ATTRIBUTE(fops_atomic_t_wo, NULL, debugfs_atomic_t_set, "%lld\n"); | ||
421 | |||
422 | /** | ||
423 | * debugfs_create_atomic_t - create a debugfs file that is used to read and | ||
424 | * write an atomic_t value | ||
425 | * @name: a pointer to a string containing the name of the file to create. | ||
426 | * @mode: the permission that the file should have | ||
427 | * @parent: a pointer to the parent dentry for this file. This should be a | ||
428 | * directory dentry if set. If this parameter is %NULL, then the | ||
429 | * file will be created in the root of the debugfs filesystem. | ||
430 | * @value: a pointer to the variable that the file should read to and write | ||
431 | * from. | ||
432 | */ | ||
433 | struct dentry *debugfs_create_atomic_t(const char *name, umode_t mode, | ||
434 | struct dentry *parent, atomic_t *value) | ||
435 | { | ||
436 | /* if there are no write bits set, make read only */ | ||
437 | if (!(mode & S_IWUGO)) | ||
438 | return debugfs_create_file(name, mode, parent, value, | ||
439 | &fops_atomic_t_ro); | ||
440 | /* if there are no read bits set, make write only */ | ||
441 | if (!(mode & S_IRUGO)) | ||
442 | return debugfs_create_file(name, mode, parent, value, | ||
443 | &fops_atomic_t_wo); | ||
444 | |||
445 | return debugfs_create_file(name, mode, parent, value, &fops_atomic_t); | ||
446 | } | ||
447 | EXPORT_SYMBOL_GPL(debugfs_create_atomic_t); | ||
406 | 448 | ||
407 | static ssize_t read_file_bool(struct file *file, char __user *user_buf, | 449 | static ssize_t read_file_bool(struct file *file, char __user *user_buf, |
408 | size_t count, loff_t *ppos) | 450 | size_t count, loff_t *ppos) |
@@ -431,6 +473,7 @@ static ssize_t write_file_bool(struct file *file, const char __user *user_buf, | |||
431 | if (copy_from_user(buf, user_buf, buf_size)) | 473 | if (copy_from_user(buf, user_buf, buf_size)) |
432 | return -EFAULT; | 474 | return -EFAULT; |
433 | 475 | ||
476 | buf[buf_size] = '\0'; | ||
434 | if (strtobool(buf, &bv) == 0) | 477 | if (strtobool(buf, &bv) == 0) |
435 | *val = bv; | 478 | *val = bv; |
436 | 479 | ||
diff --git a/fs/debugfs/inode.c b/fs/debugfs/inode.c index 4888cb3fdef7..c7c83ff0f752 100644 --- a/fs/debugfs/inode.c +++ b/fs/debugfs/inode.c | |||
@@ -533,8 +533,7 @@ EXPORT_SYMBOL_GPL(debugfs_remove); | |||
533 | */ | 533 | */ |
534 | void debugfs_remove_recursive(struct dentry *dentry) | 534 | void debugfs_remove_recursive(struct dentry *dentry) |
535 | { | 535 | { |
536 | struct dentry *child; | 536 | struct dentry *child, *next, *parent; |
537 | struct dentry *parent; | ||
538 | 537 | ||
539 | if (IS_ERR_OR_NULL(dentry)) | 538 | if (IS_ERR_OR_NULL(dentry)) |
540 | return; | 539 | return; |
@@ -544,61 +543,37 @@ void debugfs_remove_recursive(struct dentry *dentry) | |||
544 | return; | 543 | return; |
545 | 544 | ||
546 | parent = dentry; | 545 | parent = dentry; |
546 | down: | ||
547 | mutex_lock(&parent->d_inode->i_mutex); | 547 | mutex_lock(&parent->d_inode->i_mutex); |
548 | list_for_each_entry_safe(child, next, &parent->d_subdirs, d_u.d_child) { | ||
549 | if (!debugfs_positive(child)) | ||
550 | continue; | ||
548 | 551 | ||
549 | while (1) { | 552 | /* perhaps simple_empty(child) makes more sense */ |
550 | /* | ||
551 | * When all dentries under "parent" has been removed, | ||
552 | * walk up the tree until we reach our starting point. | ||
553 | */ | ||
554 | if (list_empty(&parent->d_subdirs)) { | ||
555 | mutex_unlock(&parent->d_inode->i_mutex); | ||
556 | if (parent == dentry) | ||
557 | break; | ||
558 | parent = parent->d_parent; | ||
559 | mutex_lock(&parent->d_inode->i_mutex); | ||
560 | } | ||
561 | child = list_entry(parent->d_subdirs.next, struct dentry, | ||
562 | d_u.d_child); | ||
563 | next_sibling: | ||
564 | |||
565 | /* | ||
566 | * If "child" isn't empty, walk down the tree and | ||
567 | * remove all its descendants first. | ||
568 | */ | ||
569 | if (!list_empty(&child->d_subdirs)) { | 553 | if (!list_empty(&child->d_subdirs)) { |
570 | mutex_unlock(&parent->d_inode->i_mutex); | 554 | mutex_unlock(&parent->d_inode->i_mutex); |
571 | parent = child; | 555 | parent = child; |
572 | mutex_lock(&parent->d_inode->i_mutex); | 556 | goto down; |
573 | continue; | ||
574 | } | 557 | } |
575 | __debugfs_remove(child, parent); | 558 | up: |
576 | if (parent->d_subdirs.next == &child->d_u.d_child) { | 559 | if (!__debugfs_remove(child, parent)) |
577 | /* | 560 | simple_release_fs(&debugfs_mount, &debugfs_mount_count); |
578 | * Try the next sibling. | ||
579 | */ | ||
580 | if (child->d_u.d_child.next != &parent->d_subdirs) { | ||
581 | child = list_entry(child->d_u.d_child.next, | ||
582 | struct dentry, | ||
583 | d_u.d_child); | ||
584 | goto next_sibling; | ||
585 | } | ||
586 | |||
587 | /* | ||
588 | * Avoid infinite loop if we fail to remove | ||
589 | * one dentry. | ||
590 | */ | ||
591 | mutex_unlock(&parent->d_inode->i_mutex); | ||
592 | break; | ||
593 | } | ||
594 | simple_release_fs(&debugfs_mount, &debugfs_mount_count); | ||
595 | } | 561 | } |
596 | 562 | ||
597 | parent = dentry->d_parent; | 563 | mutex_unlock(&parent->d_inode->i_mutex); |
564 | child = parent; | ||
565 | parent = parent->d_parent; | ||
598 | mutex_lock(&parent->d_inode->i_mutex); | 566 | mutex_lock(&parent->d_inode->i_mutex); |
599 | __debugfs_remove(dentry, parent); | 567 | |
568 | if (child != dentry) { | ||
569 | next = list_entry(child->d_u.d_child.next, struct dentry, | ||
570 | d_u.d_child); | ||
571 | goto up; | ||
572 | } | ||
573 | |||
574 | if (!__debugfs_remove(child, parent)) | ||
575 | simple_release_fs(&debugfs_mount, &debugfs_mount_count); | ||
600 | mutex_unlock(&parent->d_inode->i_mutex); | 576 | mutex_unlock(&parent->d_inode->i_mutex); |
601 | simple_release_fs(&debugfs_mount, &debugfs_mount_count); | ||
602 | } | 577 | } |
603 | EXPORT_SYMBOL_GPL(debugfs_remove_recursive); | 578 | EXPORT_SYMBOL_GPL(debugfs_remove_recursive); |
604 | 579 | ||
diff --git a/fs/dlm/config.c b/fs/dlm/config.c index 7d58d5b112b5..76feb4b60fa6 100644 --- a/fs/dlm/config.c +++ b/fs/dlm/config.c | |||
@@ -138,8 +138,9 @@ static ssize_t cluster_cluster_name_read(struct dlm_cluster *cl, char *buf) | |||
138 | static ssize_t cluster_cluster_name_write(struct dlm_cluster *cl, | 138 | static ssize_t cluster_cluster_name_write(struct dlm_cluster *cl, |
139 | const char *buf, size_t len) | 139 | const char *buf, size_t len) |
140 | { | 140 | { |
141 | strncpy(dlm_config.ci_cluster_name, buf, DLM_LOCKSPACE_LEN); | 141 | strlcpy(dlm_config.ci_cluster_name, buf, |
142 | strncpy(cl->cl_cluster_name, buf, DLM_LOCKSPACE_LEN); | 142 | sizeof(dlm_config.ci_cluster_name)); |
143 | strlcpy(cl->cl_cluster_name, buf, sizeof(cl->cl_cluster_name)); | ||
143 | return len; | 144 | return len; |
144 | } | 145 | } |
145 | 146 | ||
diff --git a/fs/dlm/lock.c b/fs/dlm/lock.c index 1b1146670c4b..e223a911a834 100644 --- a/fs/dlm/lock.c +++ b/fs/dlm/lock.c | |||
@@ -2038,8 +2038,8 @@ static void set_lvb_lock_pc(struct dlm_rsb *r, struct dlm_lkb *lkb, | |||
2038 | b = dlm_lvb_operations[lkb->lkb_grmode + 1][lkb->lkb_rqmode + 1]; | 2038 | b = dlm_lvb_operations[lkb->lkb_grmode + 1][lkb->lkb_rqmode + 1]; |
2039 | if (b == 1) { | 2039 | if (b == 1) { |
2040 | int len = receive_extralen(ms); | 2040 | int len = receive_extralen(ms); |
2041 | if (len > DLM_RESNAME_MAXLEN) | 2041 | if (len > r->res_ls->ls_lvblen) |
2042 | len = DLM_RESNAME_MAXLEN; | 2042 | len = r->res_ls->ls_lvblen; |
2043 | memcpy(lkb->lkb_lvbptr, ms->m_extra, len); | 2043 | memcpy(lkb->lkb_lvbptr, ms->m_extra, len); |
2044 | lkb->lkb_lvbseq = ms->m_lvbseq; | 2044 | lkb->lkb_lvbseq = ms->m_lvbseq; |
2045 | } | 2045 | } |
@@ -3893,8 +3893,8 @@ static int receive_lvb(struct dlm_ls *ls, struct dlm_lkb *lkb, | |||
3893 | if (!lkb->lkb_lvbptr) | 3893 | if (!lkb->lkb_lvbptr) |
3894 | return -ENOMEM; | 3894 | return -ENOMEM; |
3895 | len = receive_extralen(ms); | 3895 | len = receive_extralen(ms); |
3896 | if (len > DLM_RESNAME_MAXLEN) | 3896 | if (len > ls->ls_lvblen) |
3897 | len = DLM_RESNAME_MAXLEN; | 3897 | len = ls->ls_lvblen; |
3898 | memcpy(lkb->lkb_lvbptr, ms->m_extra, len); | 3898 | memcpy(lkb->lkb_lvbptr, ms->m_extra, len); |
3899 | } | 3899 | } |
3900 | return 0; | 3900 | return 0; |
diff --git a/fs/dlm/lockspace.c b/fs/dlm/lockspace.c index 3ca79d3253b9..88556dc0458e 100644 --- a/fs/dlm/lockspace.c +++ b/fs/dlm/lockspace.c | |||
@@ -883,17 +883,24 @@ int dlm_release_lockspace(void *lockspace, int force) | |||
883 | void dlm_stop_lockspaces(void) | 883 | void dlm_stop_lockspaces(void) |
884 | { | 884 | { |
885 | struct dlm_ls *ls; | 885 | struct dlm_ls *ls; |
886 | int count; | ||
886 | 887 | ||
887 | restart: | 888 | restart: |
889 | count = 0; | ||
888 | spin_lock(&lslist_lock); | 890 | spin_lock(&lslist_lock); |
889 | list_for_each_entry(ls, &lslist, ls_list) { | 891 | list_for_each_entry(ls, &lslist, ls_list) { |
890 | if (!test_bit(LSFL_RUNNING, &ls->ls_flags)) | 892 | if (!test_bit(LSFL_RUNNING, &ls->ls_flags)) { |
893 | count++; | ||
891 | continue; | 894 | continue; |
895 | } | ||
892 | spin_unlock(&lslist_lock); | 896 | spin_unlock(&lslist_lock); |
893 | log_error(ls, "no userland control daemon, stopping lockspace"); | 897 | log_error(ls, "no userland control daemon, stopping lockspace"); |
894 | dlm_ls_stop(ls); | 898 | dlm_ls_stop(ls); |
895 | goto restart; | 899 | goto restart; |
896 | } | 900 | } |
897 | spin_unlock(&lslist_lock); | 901 | spin_unlock(&lslist_lock); |
902 | |||
903 | if (count) | ||
904 | log_print("dlm user daemon left %d lockspaces", count); | ||
898 | } | 905 | } |
899 | 906 | ||
diff --git a/fs/dlm/lowcomms.c b/fs/dlm/lowcomms.c index d0ccd2fd79eb..d90909ec6aa6 100644 --- a/fs/dlm/lowcomms.c +++ b/fs/dlm/lowcomms.c | |||
@@ -52,7 +52,6 @@ | |||
52 | #include <linux/mutex.h> | 52 | #include <linux/mutex.h> |
53 | #include <linux/sctp.h> | 53 | #include <linux/sctp.h> |
54 | #include <linux/slab.h> | 54 | #include <linux/slab.h> |
55 | #include <linux/sctp.h> | ||
56 | #include <net/sctp/sctp.h> | 55 | #include <net/sctp/sctp.h> |
57 | #include <net/ipv6.h> | 56 | #include <net/ipv6.h> |
58 | 57 | ||
@@ -126,6 +125,7 @@ struct connection { | |||
126 | struct connection *othercon; | 125 | struct connection *othercon; |
127 | struct work_struct rwork; /* Receive workqueue */ | 126 | struct work_struct rwork; /* Receive workqueue */ |
128 | struct work_struct swork; /* Send workqueue */ | 127 | struct work_struct swork; /* Send workqueue */ |
128 | bool try_new_addr; | ||
129 | }; | 129 | }; |
130 | #define sock2con(x) ((struct connection *)(x)->sk_user_data) | 130 | #define sock2con(x) ((struct connection *)(x)->sk_user_data) |
131 | 131 | ||
@@ -144,6 +144,7 @@ struct dlm_node_addr { | |||
144 | struct list_head list; | 144 | struct list_head list; |
145 | int nodeid; | 145 | int nodeid; |
146 | int addr_count; | 146 | int addr_count; |
147 | int curr_addr_index; | ||
147 | struct sockaddr_storage *addr[DLM_MAX_ADDR_COUNT]; | 148 | struct sockaddr_storage *addr[DLM_MAX_ADDR_COUNT]; |
148 | }; | 149 | }; |
149 | 150 | ||
@@ -310,7 +311,7 @@ static int addr_compare(struct sockaddr_storage *x, struct sockaddr_storage *y) | |||
310 | } | 311 | } |
311 | 312 | ||
312 | static int nodeid_to_addr(int nodeid, struct sockaddr_storage *sas_out, | 313 | static int nodeid_to_addr(int nodeid, struct sockaddr_storage *sas_out, |
313 | struct sockaddr *sa_out) | 314 | struct sockaddr *sa_out, bool try_new_addr) |
314 | { | 315 | { |
315 | struct sockaddr_storage sas; | 316 | struct sockaddr_storage sas; |
316 | struct dlm_node_addr *na; | 317 | struct dlm_node_addr *na; |
@@ -320,8 +321,16 @@ static int nodeid_to_addr(int nodeid, struct sockaddr_storage *sas_out, | |||
320 | 321 | ||
321 | spin_lock(&dlm_node_addrs_spin); | 322 | spin_lock(&dlm_node_addrs_spin); |
322 | na = find_node_addr(nodeid); | 323 | na = find_node_addr(nodeid); |
323 | if (na && na->addr_count) | 324 | if (na && na->addr_count) { |
324 | memcpy(&sas, na->addr[0], sizeof(struct sockaddr_storage)); | 325 | if (try_new_addr) { |
326 | na->curr_addr_index++; | ||
327 | if (na->curr_addr_index == na->addr_count) | ||
328 | na->curr_addr_index = 0; | ||
329 | } | ||
330 | |||
331 | memcpy(&sas, na->addr[na->curr_addr_index ], | ||
332 | sizeof(struct sockaddr_storage)); | ||
333 | } | ||
325 | spin_unlock(&dlm_node_addrs_spin); | 334 | spin_unlock(&dlm_node_addrs_spin); |
326 | 335 | ||
327 | if (!na) | 336 | if (!na) |
@@ -353,19 +362,22 @@ static int addr_to_nodeid(struct sockaddr_storage *addr, int *nodeid) | |||
353 | { | 362 | { |
354 | struct dlm_node_addr *na; | 363 | struct dlm_node_addr *na; |
355 | int rv = -EEXIST; | 364 | int rv = -EEXIST; |
365 | int addr_i; | ||
356 | 366 | ||
357 | spin_lock(&dlm_node_addrs_spin); | 367 | spin_lock(&dlm_node_addrs_spin); |
358 | list_for_each_entry(na, &dlm_node_addrs, list) { | 368 | list_for_each_entry(na, &dlm_node_addrs, list) { |
359 | if (!na->addr_count) | 369 | if (!na->addr_count) |
360 | continue; | 370 | continue; |
361 | 371 | ||
362 | if (!addr_compare(na->addr[0], addr)) | 372 | for (addr_i = 0; addr_i < na->addr_count; addr_i++) { |
363 | continue; | 373 | if (addr_compare(na->addr[addr_i], addr)) { |
364 | 374 | *nodeid = na->nodeid; | |
365 | *nodeid = na->nodeid; | 375 | rv = 0; |
366 | rv = 0; | 376 | goto unlock; |
367 | break; | 377 | } |
378 | } | ||
368 | } | 379 | } |
380 | unlock: | ||
369 | spin_unlock(&dlm_node_addrs_spin); | 381 | spin_unlock(&dlm_node_addrs_spin); |
370 | return rv; | 382 | return rv; |
371 | } | 383 | } |
@@ -561,8 +573,23 @@ static void sctp_send_shutdown(sctp_assoc_t associd) | |||
561 | 573 | ||
562 | static void sctp_init_failed_foreach(struct connection *con) | 574 | static void sctp_init_failed_foreach(struct connection *con) |
563 | { | 575 | { |
576 | |||
577 | /* | ||
578 | * Don't try to recover base con and handle race where the | ||
579 | * other node's assoc init creates a assoc and we get that | ||
580 | * notification, then we get a notification that our attempt | ||
581 | * failed due. This happens when we are still trying the primary | ||
582 | * address, but the other node has already tried secondary addrs | ||
583 | * and found one that worked. | ||
584 | */ | ||
585 | if (!con->nodeid || con->sctp_assoc) | ||
586 | return; | ||
587 | |||
588 | log_print("Retrying SCTP association init for node %d\n", con->nodeid); | ||
589 | |||
590 | con->try_new_addr = true; | ||
564 | con->sctp_assoc = 0; | 591 | con->sctp_assoc = 0; |
565 | if (test_and_clear_bit(CF_CONNECT_PENDING, &con->flags)) { | 592 | if (test_and_clear_bit(CF_INIT_PENDING, &con->flags)) { |
566 | if (!test_and_set_bit(CF_WRITE_PENDING, &con->flags)) | 593 | if (!test_and_set_bit(CF_WRITE_PENDING, &con->flags)) |
567 | queue_work(send_workqueue, &con->swork); | 594 | queue_work(send_workqueue, &con->swork); |
568 | } | 595 | } |
@@ -579,15 +606,56 @@ static void sctp_init_failed(void) | |||
579 | mutex_unlock(&connections_lock); | 606 | mutex_unlock(&connections_lock); |
580 | } | 607 | } |
581 | 608 | ||
609 | static void retry_failed_sctp_send(struct connection *recv_con, | ||
610 | struct sctp_send_failed *sn_send_failed, | ||
611 | char *buf) | ||
612 | { | ||
613 | int len = sn_send_failed->ssf_length - sizeof(struct sctp_send_failed); | ||
614 | struct dlm_mhandle *mh; | ||
615 | struct connection *con; | ||
616 | char *retry_buf; | ||
617 | int nodeid = sn_send_failed->ssf_info.sinfo_ppid; | ||
618 | |||
619 | log_print("Retry sending %d bytes to node id %d", len, nodeid); | ||
620 | |||
621 | con = nodeid2con(nodeid, 0); | ||
622 | if (!con) { | ||
623 | log_print("Could not look up con for nodeid %d\n", | ||
624 | nodeid); | ||
625 | return; | ||
626 | } | ||
627 | |||
628 | mh = dlm_lowcomms_get_buffer(nodeid, len, GFP_NOFS, &retry_buf); | ||
629 | if (!mh) { | ||
630 | log_print("Could not allocate buf for retry."); | ||
631 | return; | ||
632 | } | ||
633 | memcpy(retry_buf, buf + sizeof(struct sctp_send_failed), len); | ||
634 | dlm_lowcomms_commit_buffer(mh); | ||
635 | |||
636 | /* | ||
637 | * If we got a assoc changed event before the send failed event then | ||
638 | * we only need to retry the send. | ||
639 | */ | ||
640 | if (con->sctp_assoc) { | ||
641 | if (!test_and_set_bit(CF_WRITE_PENDING, &con->flags)) | ||
642 | queue_work(send_workqueue, &con->swork); | ||
643 | } else | ||
644 | sctp_init_failed_foreach(con); | ||
645 | } | ||
646 | |||
582 | /* Something happened to an association */ | 647 | /* Something happened to an association */ |
583 | static void process_sctp_notification(struct connection *con, | 648 | static void process_sctp_notification(struct connection *con, |
584 | struct msghdr *msg, char *buf) | 649 | struct msghdr *msg, char *buf) |
585 | { | 650 | { |
586 | union sctp_notification *sn = (union sctp_notification *)buf; | 651 | union sctp_notification *sn = (union sctp_notification *)buf; |
587 | 652 | ||
588 | if (sn->sn_header.sn_type == SCTP_ASSOC_CHANGE) { | 653 | switch (sn->sn_header.sn_type) { |
654 | case SCTP_SEND_FAILED: | ||
655 | retry_failed_sctp_send(con, &sn->sn_send_failed, buf); | ||
656 | break; | ||
657 | case SCTP_ASSOC_CHANGE: | ||
589 | switch (sn->sn_assoc_change.sac_state) { | 658 | switch (sn->sn_assoc_change.sac_state) { |
590 | |||
591 | case SCTP_COMM_UP: | 659 | case SCTP_COMM_UP: |
592 | case SCTP_RESTART: | 660 | case SCTP_RESTART: |
593 | { | 661 | { |
@@ -662,9 +730,11 @@ static void process_sctp_notification(struct connection *con, | |||
662 | log_print("connecting to %d sctp association %d", | 730 | log_print("connecting to %d sctp association %d", |
663 | nodeid, (int)sn->sn_assoc_change.sac_assoc_id); | 731 | nodeid, (int)sn->sn_assoc_change.sac_assoc_id); |
664 | 732 | ||
733 | new_con->sctp_assoc = sn->sn_assoc_change.sac_assoc_id; | ||
734 | new_con->try_new_addr = false; | ||
665 | /* Send any pending writes */ | 735 | /* Send any pending writes */ |
666 | clear_bit(CF_CONNECT_PENDING, &new_con->flags); | 736 | clear_bit(CF_CONNECT_PENDING, &new_con->flags); |
667 | clear_bit(CF_INIT_PENDING, &con->flags); | 737 | clear_bit(CF_INIT_PENDING, &new_con->flags); |
668 | if (!test_and_set_bit(CF_WRITE_PENDING, &new_con->flags)) { | 738 | if (!test_and_set_bit(CF_WRITE_PENDING, &new_con->flags)) { |
669 | queue_work(send_workqueue, &new_con->swork); | 739 | queue_work(send_workqueue, &new_con->swork); |
670 | } | 740 | } |
@@ -683,14 +753,10 @@ static void process_sctp_notification(struct connection *con, | |||
683 | } | 753 | } |
684 | break; | 754 | break; |
685 | 755 | ||
686 | /* We don't know which INIT failed, so clear the PENDING flags | ||
687 | * on them all. if assoc_id is zero then it will then try | ||
688 | * again */ | ||
689 | |||
690 | case SCTP_CANT_STR_ASSOC: | 756 | case SCTP_CANT_STR_ASSOC: |
691 | { | 757 | { |
758 | /* Will retry init when we get the send failed notification */ | ||
692 | log_print("Can't start SCTP association - retrying"); | 759 | log_print("Can't start SCTP association - retrying"); |
693 | sctp_init_failed(); | ||
694 | } | 760 | } |
695 | break; | 761 | break; |
696 | 762 | ||
@@ -699,6 +765,8 @@ static void process_sctp_notification(struct connection *con, | |||
699 | (int)sn->sn_assoc_change.sac_assoc_id, | 765 | (int)sn->sn_assoc_change.sac_assoc_id, |
700 | sn->sn_assoc_change.sac_state); | 766 | sn->sn_assoc_change.sac_state); |
701 | } | 767 | } |
768 | default: | ||
769 | ; /* fall through */ | ||
702 | } | 770 | } |
703 | } | 771 | } |
704 | 772 | ||
@@ -958,6 +1026,24 @@ static void free_entry(struct writequeue_entry *e) | |||
958 | kfree(e); | 1026 | kfree(e); |
959 | } | 1027 | } |
960 | 1028 | ||
1029 | /* | ||
1030 | * writequeue_entry_complete - try to delete and free write queue entry | ||
1031 | * @e: write queue entry to try to delete | ||
1032 | * @completed: bytes completed | ||
1033 | * | ||
1034 | * writequeue_lock must be held. | ||
1035 | */ | ||
1036 | static void writequeue_entry_complete(struct writequeue_entry *e, int completed) | ||
1037 | { | ||
1038 | e->offset += completed; | ||
1039 | e->len -= completed; | ||
1040 | |||
1041 | if (e->len == 0 && e->users == 0) { | ||
1042 | list_del(&e->list); | ||
1043 | free_entry(e); | ||
1044 | } | ||
1045 | } | ||
1046 | |||
961 | /* Initiate an SCTP association. | 1047 | /* Initiate an SCTP association. |
962 | This is a special case of send_to_sock() in that we don't yet have a | 1048 | This is a special case of send_to_sock() in that we don't yet have a |
963 | peeled-off socket for this association, so we use the listening socket | 1049 | peeled-off socket for this association, so we use the listening socket |
@@ -977,15 +1063,14 @@ static void sctp_init_assoc(struct connection *con) | |||
977 | int addrlen; | 1063 | int addrlen; |
978 | struct kvec iov[1]; | 1064 | struct kvec iov[1]; |
979 | 1065 | ||
1066 | mutex_lock(&con->sock_mutex); | ||
980 | if (test_and_set_bit(CF_INIT_PENDING, &con->flags)) | 1067 | if (test_and_set_bit(CF_INIT_PENDING, &con->flags)) |
981 | return; | 1068 | goto unlock; |
982 | |||
983 | if (con->retries++ > MAX_CONNECT_RETRIES) | ||
984 | return; | ||
985 | 1069 | ||
986 | if (nodeid_to_addr(con->nodeid, NULL, (struct sockaddr *)&rem_addr)) { | 1070 | if (nodeid_to_addr(con->nodeid, NULL, (struct sockaddr *)&rem_addr, |
1071 | con->try_new_addr)) { | ||
987 | log_print("no address for nodeid %d", con->nodeid); | 1072 | log_print("no address for nodeid %d", con->nodeid); |
988 | return; | 1073 | goto unlock; |
989 | } | 1074 | } |
990 | base_con = nodeid2con(0, 0); | 1075 | base_con = nodeid2con(0, 0); |
991 | BUG_ON(base_con == NULL); | 1076 | BUG_ON(base_con == NULL); |
@@ -1003,17 +1088,25 @@ static void sctp_init_assoc(struct connection *con) | |||
1003 | if (list_empty(&con->writequeue)) { | 1088 | if (list_empty(&con->writequeue)) { |
1004 | spin_unlock(&con->writequeue_lock); | 1089 | spin_unlock(&con->writequeue_lock); |
1005 | log_print("writequeue empty for nodeid %d", con->nodeid); | 1090 | log_print("writequeue empty for nodeid %d", con->nodeid); |
1006 | return; | 1091 | goto unlock; |
1007 | } | 1092 | } |
1008 | 1093 | ||
1009 | e = list_first_entry(&con->writequeue, struct writequeue_entry, list); | 1094 | e = list_first_entry(&con->writequeue, struct writequeue_entry, list); |
1010 | len = e->len; | 1095 | len = e->len; |
1011 | offset = e->offset; | 1096 | offset = e->offset; |
1012 | spin_unlock(&con->writequeue_lock); | ||
1013 | 1097 | ||
1014 | /* Send the first block off the write queue */ | 1098 | /* Send the first block off the write queue */ |
1015 | iov[0].iov_base = page_address(e->page)+offset; | 1099 | iov[0].iov_base = page_address(e->page)+offset; |
1016 | iov[0].iov_len = len; | 1100 | iov[0].iov_len = len; |
1101 | spin_unlock(&con->writequeue_lock); | ||
1102 | |||
1103 | if (rem_addr.ss_family == AF_INET) { | ||
1104 | struct sockaddr_in *sin = (struct sockaddr_in *)&rem_addr; | ||
1105 | log_print("Trying to connect to %pI4", &sin->sin_addr.s_addr); | ||
1106 | } else { | ||
1107 | struct sockaddr_in6 *sin6 = (struct sockaddr_in6 *)&rem_addr; | ||
1108 | log_print("Trying to connect to %pI6", &sin6->sin6_addr); | ||
1109 | } | ||
1017 | 1110 | ||
1018 | cmsg = CMSG_FIRSTHDR(&outmessage); | 1111 | cmsg = CMSG_FIRSTHDR(&outmessage); |
1019 | cmsg->cmsg_level = IPPROTO_SCTP; | 1112 | cmsg->cmsg_level = IPPROTO_SCTP; |
@@ -1021,8 +1114,9 @@ static void sctp_init_assoc(struct connection *con) | |||
1021 | cmsg->cmsg_len = CMSG_LEN(sizeof(struct sctp_sndrcvinfo)); | 1114 | cmsg->cmsg_len = CMSG_LEN(sizeof(struct sctp_sndrcvinfo)); |
1022 | sinfo = CMSG_DATA(cmsg); | 1115 | sinfo = CMSG_DATA(cmsg); |
1023 | memset(sinfo, 0x00, sizeof(struct sctp_sndrcvinfo)); | 1116 | memset(sinfo, 0x00, sizeof(struct sctp_sndrcvinfo)); |
1024 | sinfo->sinfo_ppid = cpu_to_le32(dlm_our_nodeid()); | 1117 | sinfo->sinfo_ppid = cpu_to_le32(con->nodeid); |
1025 | outmessage.msg_controllen = cmsg->cmsg_len; | 1118 | outmessage.msg_controllen = cmsg->cmsg_len; |
1119 | sinfo->sinfo_flags |= SCTP_ADDR_OVER; | ||
1026 | 1120 | ||
1027 | ret = kernel_sendmsg(base_con->sock, &outmessage, iov, 1, len); | 1121 | ret = kernel_sendmsg(base_con->sock, &outmessage, iov, 1, len); |
1028 | if (ret < 0) { | 1122 | if (ret < 0) { |
@@ -1035,15 +1129,12 @@ static void sctp_init_assoc(struct connection *con) | |||
1035 | } | 1129 | } |
1036 | else { | 1130 | else { |
1037 | spin_lock(&con->writequeue_lock); | 1131 | spin_lock(&con->writequeue_lock); |
1038 | e->offset += ret; | 1132 | writequeue_entry_complete(e, ret); |
1039 | e->len -= ret; | ||
1040 | |||
1041 | if (e->len == 0 && e->users == 0) { | ||
1042 | list_del(&e->list); | ||
1043 | free_entry(e); | ||
1044 | } | ||
1045 | spin_unlock(&con->writequeue_lock); | 1133 | spin_unlock(&con->writequeue_lock); |
1046 | } | 1134 | } |
1135 | |||
1136 | unlock: | ||
1137 | mutex_unlock(&con->sock_mutex); | ||
1047 | } | 1138 | } |
1048 | 1139 | ||
1049 | /* Connect a new socket to its peer */ | 1140 | /* Connect a new socket to its peer */ |
@@ -1075,7 +1166,7 @@ static void tcp_connect_to_sock(struct connection *con) | |||
1075 | goto out_err; | 1166 | goto out_err; |
1076 | 1167 | ||
1077 | memset(&saddr, 0, sizeof(saddr)); | 1168 | memset(&saddr, 0, sizeof(saddr)); |
1078 | result = nodeid_to_addr(con->nodeid, &saddr, NULL); | 1169 | result = nodeid_to_addr(con->nodeid, &saddr, NULL, false); |
1079 | if (result < 0) { | 1170 | if (result < 0) { |
1080 | log_print("no address for nodeid %d", con->nodeid); | 1171 | log_print("no address for nodeid %d", con->nodeid); |
1081 | goto out_err; | 1172 | goto out_err; |
@@ -1254,6 +1345,7 @@ static int sctp_listen_for_all(void) | |||
1254 | int result = -EINVAL, num = 1, i, addr_len; | 1345 | int result = -EINVAL, num = 1, i, addr_len; |
1255 | struct connection *con = nodeid2con(0, GFP_NOFS); | 1346 | struct connection *con = nodeid2con(0, GFP_NOFS); |
1256 | int bufsize = NEEDED_RMEM; | 1347 | int bufsize = NEEDED_RMEM; |
1348 | int one = 1; | ||
1257 | 1349 | ||
1258 | if (!con) | 1350 | if (!con) |
1259 | return -ENOMEM; | 1351 | return -ENOMEM; |
@@ -1288,6 +1380,11 @@ static int sctp_listen_for_all(void) | |||
1288 | goto create_delsock; | 1380 | goto create_delsock; |
1289 | } | 1381 | } |
1290 | 1382 | ||
1383 | result = kernel_setsockopt(sock, SOL_SCTP, SCTP_NODELAY, (char *)&one, | ||
1384 | sizeof(one)); | ||
1385 | if (result < 0) | ||
1386 | log_print("Could not set SCTP NODELAY error %d\n", result); | ||
1387 | |||
1291 | /* Init con struct */ | 1388 | /* Init con struct */ |
1292 | sock->sk->sk_user_data = con; | 1389 | sock->sk->sk_user_data = con; |
1293 | con->sock = sock; | 1390 | con->sock = sock; |
@@ -1493,13 +1590,7 @@ static void send_to_sock(struct connection *con) | |||
1493 | } | 1590 | } |
1494 | 1591 | ||
1495 | spin_lock(&con->writequeue_lock); | 1592 | spin_lock(&con->writequeue_lock); |
1496 | e->offset += ret; | 1593 | writequeue_entry_complete(e, ret); |
1497 | e->len -= ret; | ||
1498 | |||
1499 | if (e->len == 0 && e->users == 0) { | ||
1500 | list_del(&e->list); | ||
1501 | free_entry(e); | ||
1502 | } | ||
1503 | } | 1594 | } |
1504 | spin_unlock(&con->writequeue_lock); | 1595 | spin_unlock(&con->writequeue_lock); |
1505 | out: | 1596 | out: |
diff --git a/fs/dlm/user.c b/fs/dlm/user.c index 911649a47dd5..812149119fa3 100644 --- a/fs/dlm/user.c +++ b/fs/dlm/user.c | |||
@@ -686,7 +686,6 @@ static int device_close(struct inode *inode, struct file *file) | |||
686 | device_remove_lockspace() */ | 686 | device_remove_lockspace() */ |
687 | 687 | ||
688 | sigprocmask(SIG_SETMASK, &tmpsig, NULL); | 688 | sigprocmask(SIG_SETMASK, &tmpsig, NULL); |
689 | recalc_sigpending(); | ||
690 | 689 | ||
691 | return 0; | 690 | return 0; |
692 | } | 691 | } |
diff --git a/fs/ecryptfs/crypto.c b/fs/ecryptfs/crypto.c index f71ec125290d..d10757635b9c 100644 --- a/fs/ecryptfs/crypto.c +++ b/fs/ecryptfs/crypto.c | |||
@@ -37,16 +37,8 @@ | |||
37 | #include <asm/unaligned.h> | 37 | #include <asm/unaligned.h> |
38 | #include "ecryptfs_kernel.h" | 38 | #include "ecryptfs_kernel.h" |
39 | 39 | ||
40 | static int | 40 | #define DECRYPT 0 |
41 | ecryptfs_decrypt_page_offset(struct ecryptfs_crypt_stat *crypt_stat, | 41 | #define ENCRYPT 1 |
42 | struct page *dst_page, int dst_offset, | ||
43 | struct page *src_page, int src_offset, int size, | ||
44 | unsigned char *iv); | ||
45 | static int | ||
46 | ecryptfs_encrypt_page_offset(struct ecryptfs_crypt_stat *crypt_stat, | ||
47 | struct page *dst_page, int dst_offset, | ||
48 | struct page *src_page, int src_offset, int size, | ||
49 | unsigned char *iv); | ||
50 | 42 | ||
51 | /** | 43 | /** |
52 | * ecryptfs_to_hex | 44 | * ecryptfs_to_hex |
@@ -336,19 +328,20 @@ static void extent_crypt_complete(struct crypto_async_request *req, int rc) | |||
336 | } | 328 | } |
337 | 329 | ||
338 | /** | 330 | /** |
339 | * encrypt_scatterlist | 331 | * crypt_scatterlist |
340 | * @crypt_stat: Pointer to the crypt_stat struct to initialize. | 332 | * @crypt_stat: Pointer to the crypt_stat struct to initialize. |
341 | * @dest_sg: Destination of encrypted data | 333 | * @dst_sg: Destination of the data after performing the crypto operation |
342 | * @src_sg: Data to be encrypted | 334 | * @src_sg: Data to be encrypted or decrypted |
343 | * @size: Length of data to be encrypted | 335 | * @size: Length of data |
344 | * @iv: iv to use during encryption | 336 | * @iv: IV to use |
337 | * @op: ENCRYPT or DECRYPT to indicate the desired operation | ||
345 | * | 338 | * |
346 | * Returns the number of bytes encrypted; negative value on error | 339 | * Returns the number of bytes encrypted or decrypted; negative value on error |
347 | */ | 340 | */ |
348 | static int encrypt_scatterlist(struct ecryptfs_crypt_stat *crypt_stat, | 341 | static int crypt_scatterlist(struct ecryptfs_crypt_stat *crypt_stat, |
349 | struct scatterlist *dest_sg, | 342 | struct scatterlist *dst_sg, |
350 | struct scatterlist *src_sg, int size, | 343 | struct scatterlist *src_sg, int size, |
351 | unsigned char *iv) | 344 | unsigned char *iv, int op) |
352 | { | 345 | { |
353 | struct ablkcipher_request *req = NULL; | 346 | struct ablkcipher_request *req = NULL; |
354 | struct extent_crypt_result ecr; | 347 | struct extent_crypt_result ecr; |
@@ -391,9 +384,9 @@ static int encrypt_scatterlist(struct ecryptfs_crypt_stat *crypt_stat, | |||
391 | crypt_stat->flags |= ECRYPTFS_KEY_SET; | 384 | crypt_stat->flags |= ECRYPTFS_KEY_SET; |
392 | } | 385 | } |
393 | mutex_unlock(&crypt_stat->cs_tfm_mutex); | 386 | mutex_unlock(&crypt_stat->cs_tfm_mutex); |
394 | ecryptfs_printk(KERN_DEBUG, "Encrypting [%d] bytes.\n", size); | 387 | ablkcipher_request_set_crypt(req, src_sg, dst_sg, size, iv); |
395 | ablkcipher_request_set_crypt(req, src_sg, dest_sg, size, iv); | 388 | rc = op == ENCRYPT ? crypto_ablkcipher_encrypt(req) : |
396 | rc = crypto_ablkcipher_encrypt(req); | 389 | crypto_ablkcipher_decrypt(req); |
397 | if (rc == -EINPROGRESS || rc == -EBUSY) { | 390 | if (rc == -EINPROGRESS || rc == -EBUSY) { |
398 | struct extent_crypt_result *ecr = req->base.data; | 391 | struct extent_crypt_result *ecr = req->base.data; |
399 | 392 | ||
@@ -407,41 +400,43 @@ out: | |||
407 | } | 400 | } |
408 | 401 | ||
409 | /** | 402 | /** |
410 | * ecryptfs_lower_offset_for_extent | 403 | * lower_offset_for_page |
411 | * | 404 | * |
412 | * Convert an eCryptfs page index into a lower byte offset | 405 | * Convert an eCryptfs page index into a lower byte offset |
413 | */ | 406 | */ |
414 | static void ecryptfs_lower_offset_for_extent(loff_t *offset, loff_t extent_num, | 407 | static loff_t lower_offset_for_page(struct ecryptfs_crypt_stat *crypt_stat, |
415 | struct ecryptfs_crypt_stat *crypt_stat) | 408 | struct page *page) |
416 | { | 409 | { |
417 | (*offset) = ecryptfs_lower_header_size(crypt_stat) | 410 | return ecryptfs_lower_header_size(crypt_stat) + |
418 | + (crypt_stat->extent_size * extent_num); | 411 | (page->index << PAGE_CACHE_SHIFT); |
419 | } | 412 | } |
420 | 413 | ||
421 | /** | 414 | /** |
422 | * ecryptfs_encrypt_extent | 415 | * crypt_extent |
423 | * @enc_extent_page: Allocated page into which to encrypt the data in | ||
424 | * @page | ||
425 | * @crypt_stat: crypt_stat containing cryptographic context for the | 416 | * @crypt_stat: crypt_stat containing cryptographic context for the |
426 | * encryption operation | 417 | * encryption operation |
427 | * @page: Page containing plaintext data extent to encrypt | 418 | * @dst_page: The page to write the result into |
419 | * @src_page: The page to read from | ||
428 | * @extent_offset: Page extent offset for use in generating IV | 420 | * @extent_offset: Page extent offset for use in generating IV |
421 | * @op: ENCRYPT or DECRYPT to indicate the desired operation | ||
429 | * | 422 | * |
430 | * Encrypts one extent of data. | 423 | * Encrypts or decrypts one extent of data. |
431 | * | 424 | * |
432 | * Return zero on success; non-zero otherwise | 425 | * Return zero on success; non-zero otherwise |
433 | */ | 426 | */ |
434 | static int ecryptfs_encrypt_extent(struct page *enc_extent_page, | 427 | static int crypt_extent(struct ecryptfs_crypt_stat *crypt_stat, |
435 | struct ecryptfs_crypt_stat *crypt_stat, | 428 | struct page *dst_page, |
436 | struct page *page, | 429 | struct page *src_page, |
437 | unsigned long extent_offset) | 430 | unsigned long extent_offset, int op) |
438 | { | 431 | { |
432 | pgoff_t page_index = op == ENCRYPT ? src_page->index : dst_page->index; | ||
439 | loff_t extent_base; | 433 | loff_t extent_base; |
440 | char extent_iv[ECRYPTFS_MAX_IV_BYTES]; | 434 | char extent_iv[ECRYPTFS_MAX_IV_BYTES]; |
435 | struct scatterlist src_sg, dst_sg; | ||
436 | size_t extent_size = crypt_stat->extent_size; | ||
441 | int rc; | 437 | int rc; |
442 | 438 | ||
443 | extent_base = (((loff_t)page->index) | 439 | extent_base = (((loff_t)page_index) * (PAGE_CACHE_SIZE / extent_size)); |
444 | * (PAGE_CACHE_SIZE / crypt_stat->extent_size)); | ||
445 | rc = ecryptfs_derive_iv(extent_iv, crypt_stat, | 440 | rc = ecryptfs_derive_iv(extent_iv, crypt_stat, |
446 | (extent_base + extent_offset)); | 441 | (extent_base + extent_offset)); |
447 | if (rc) { | 442 | if (rc) { |
@@ -450,15 +445,21 @@ static int ecryptfs_encrypt_extent(struct page *enc_extent_page, | |||
450 | (unsigned long long)(extent_base + extent_offset), rc); | 445 | (unsigned long long)(extent_base + extent_offset), rc); |
451 | goto out; | 446 | goto out; |
452 | } | 447 | } |
453 | rc = ecryptfs_encrypt_page_offset(crypt_stat, enc_extent_page, 0, | 448 | |
454 | page, (extent_offset | 449 | sg_init_table(&src_sg, 1); |
455 | * crypt_stat->extent_size), | 450 | sg_init_table(&dst_sg, 1); |
456 | crypt_stat->extent_size, extent_iv); | 451 | |
452 | sg_set_page(&src_sg, src_page, extent_size, | ||
453 | extent_offset * extent_size); | ||
454 | sg_set_page(&dst_sg, dst_page, extent_size, | ||
455 | extent_offset * extent_size); | ||
456 | |||
457 | rc = crypt_scatterlist(crypt_stat, &dst_sg, &src_sg, extent_size, | ||
458 | extent_iv, op); | ||
457 | if (rc < 0) { | 459 | if (rc < 0) { |
458 | printk(KERN_ERR "%s: Error attempting to encrypt page with " | 460 | printk(KERN_ERR "%s: Error attempting to crypt page with " |
459 | "page->index = [%ld], extent_offset = [%ld]; " | 461 | "page_index = [%ld], extent_offset = [%ld]; " |
460 | "rc = [%d]\n", __func__, page->index, extent_offset, | 462 | "rc = [%d]\n", __func__, page_index, extent_offset, rc); |
461 | rc); | ||
462 | goto out; | 463 | goto out; |
463 | } | 464 | } |
464 | rc = 0; | 465 | rc = 0; |
@@ -489,6 +490,7 @@ int ecryptfs_encrypt_page(struct page *page) | |||
489 | char *enc_extent_virt; | 490 | char *enc_extent_virt; |
490 | struct page *enc_extent_page = NULL; | 491 | struct page *enc_extent_page = NULL; |
491 | loff_t extent_offset; | 492 | loff_t extent_offset; |
493 | loff_t lower_offset; | ||
492 | int rc = 0; | 494 | int rc = 0; |
493 | 495 | ||
494 | ecryptfs_inode = page->mapping->host; | 496 | ecryptfs_inode = page->mapping->host; |
@@ -502,75 +504,35 @@ int ecryptfs_encrypt_page(struct page *page) | |||
502 | "encrypted extent\n"); | 504 | "encrypted extent\n"); |
503 | goto out; | 505 | goto out; |
504 | } | 506 | } |
505 | enc_extent_virt = kmap(enc_extent_page); | 507 | |
506 | for (extent_offset = 0; | 508 | for (extent_offset = 0; |
507 | extent_offset < (PAGE_CACHE_SIZE / crypt_stat->extent_size); | 509 | extent_offset < (PAGE_CACHE_SIZE / crypt_stat->extent_size); |
508 | extent_offset++) { | 510 | extent_offset++) { |
509 | loff_t offset; | 511 | rc = crypt_extent(crypt_stat, enc_extent_page, page, |
510 | 512 | extent_offset, ENCRYPT); | |
511 | rc = ecryptfs_encrypt_extent(enc_extent_page, crypt_stat, page, | ||
512 | extent_offset); | ||
513 | if (rc) { | 513 | if (rc) { |
514 | printk(KERN_ERR "%s: Error encrypting extent; " | 514 | printk(KERN_ERR "%s: Error encrypting extent; " |
515 | "rc = [%d]\n", __func__, rc); | 515 | "rc = [%d]\n", __func__, rc); |
516 | goto out; | 516 | goto out; |
517 | } | 517 | } |
518 | ecryptfs_lower_offset_for_extent( | ||
519 | &offset, ((((loff_t)page->index) | ||
520 | * (PAGE_CACHE_SIZE | ||
521 | / crypt_stat->extent_size)) | ||
522 | + extent_offset), crypt_stat); | ||
523 | rc = ecryptfs_write_lower(ecryptfs_inode, enc_extent_virt, | ||
524 | offset, crypt_stat->extent_size); | ||
525 | if (rc < 0) { | ||
526 | ecryptfs_printk(KERN_ERR, "Error attempting " | ||
527 | "to write lower page; rc = [%d]" | ||
528 | "\n", rc); | ||
529 | goto out; | ||
530 | } | ||
531 | } | ||
532 | rc = 0; | ||
533 | out: | ||
534 | if (enc_extent_page) { | ||
535 | kunmap(enc_extent_page); | ||
536 | __free_page(enc_extent_page); | ||
537 | } | 518 | } |
538 | return rc; | ||
539 | } | ||
540 | 519 | ||
541 | static int ecryptfs_decrypt_extent(struct page *page, | 520 | lower_offset = lower_offset_for_page(crypt_stat, page); |
542 | struct ecryptfs_crypt_stat *crypt_stat, | 521 | enc_extent_virt = kmap(enc_extent_page); |
543 | struct page *enc_extent_page, | 522 | rc = ecryptfs_write_lower(ecryptfs_inode, enc_extent_virt, lower_offset, |
544 | unsigned long extent_offset) | 523 | PAGE_CACHE_SIZE); |
545 | { | 524 | kunmap(enc_extent_page); |
546 | loff_t extent_base; | ||
547 | char extent_iv[ECRYPTFS_MAX_IV_BYTES]; | ||
548 | int rc; | ||
549 | |||
550 | extent_base = (((loff_t)page->index) | ||
551 | * (PAGE_CACHE_SIZE / crypt_stat->extent_size)); | ||
552 | rc = ecryptfs_derive_iv(extent_iv, crypt_stat, | ||
553 | (extent_base + extent_offset)); | ||
554 | if (rc) { | ||
555 | ecryptfs_printk(KERN_ERR, "Error attempting to derive IV for " | ||
556 | "extent [0x%.16llx]; rc = [%d]\n", | ||
557 | (unsigned long long)(extent_base + extent_offset), rc); | ||
558 | goto out; | ||
559 | } | ||
560 | rc = ecryptfs_decrypt_page_offset(crypt_stat, page, | ||
561 | (extent_offset | ||
562 | * crypt_stat->extent_size), | ||
563 | enc_extent_page, 0, | ||
564 | crypt_stat->extent_size, extent_iv); | ||
565 | if (rc < 0) { | 525 | if (rc < 0) { |
566 | printk(KERN_ERR "%s: Error attempting to decrypt to page with " | 526 | ecryptfs_printk(KERN_ERR, |
567 | "page->index = [%ld], extent_offset = [%ld]; " | 527 | "Error attempting to write lower page; rc = [%d]\n", |
568 | "rc = [%d]\n", __func__, page->index, extent_offset, | 528 | rc); |
569 | rc); | ||
570 | goto out; | 529 | goto out; |
571 | } | 530 | } |
572 | rc = 0; | 531 | rc = 0; |
573 | out: | 532 | out: |
533 | if (enc_extent_page) { | ||
534 | __free_page(enc_extent_page); | ||
535 | } | ||
574 | return rc; | 536 | return rc; |
575 | } | 537 | } |
576 | 538 | ||
@@ -594,43 +556,33 @@ int ecryptfs_decrypt_page(struct page *page) | |||
594 | { | 556 | { |
595 | struct inode *ecryptfs_inode; | 557 | struct inode *ecryptfs_inode; |
596 | struct ecryptfs_crypt_stat *crypt_stat; | 558 | struct ecryptfs_crypt_stat *crypt_stat; |
597 | char *enc_extent_virt; | 559 | char *page_virt; |
598 | struct page *enc_extent_page = NULL; | ||
599 | unsigned long extent_offset; | 560 | unsigned long extent_offset; |
561 | loff_t lower_offset; | ||
600 | int rc = 0; | 562 | int rc = 0; |
601 | 563 | ||
602 | ecryptfs_inode = page->mapping->host; | 564 | ecryptfs_inode = page->mapping->host; |
603 | crypt_stat = | 565 | crypt_stat = |
604 | &(ecryptfs_inode_to_private(ecryptfs_inode)->crypt_stat); | 566 | &(ecryptfs_inode_to_private(ecryptfs_inode)->crypt_stat); |
605 | BUG_ON(!(crypt_stat->flags & ECRYPTFS_ENCRYPTED)); | 567 | BUG_ON(!(crypt_stat->flags & ECRYPTFS_ENCRYPTED)); |
606 | enc_extent_page = alloc_page(GFP_USER); | 568 | |
607 | if (!enc_extent_page) { | 569 | lower_offset = lower_offset_for_page(crypt_stat, page); |
608 | rc = -ENOMEM; | 570 | page_virt = kmap(page); |
609 | ecryptfs_printk(KERN_ERR, "Error allocating memory for " | 571 | rc = ecryptfs_read_lower(page_virt, lower_offset, PAGE_CACHE_SIZE, |
610 | "encrypted extent\n"); | 572 | ecryptfs_inode); |
573 | kunmap(page); | ||
574 | if (rc < 0) { | ||
575 | ecryptfs_printk(KERN_ERR, | ||
576 | "Error attempting to read lower page; rc = [%d]\n", | ||
577 | rc); | ||
611 | goto out; | 578 | goto out; |
612 | } | 579 | } |
613 | enc_extent_virt = kmap(enc_extent_page); | 580 | |
614 | for (extent_offset = 0; | 581 | for (extent_offset = 0; |
615 | extent_offset < (PAGE_CACHE_SIZE / crypt_stat->extent_size); | 582 | extent_offset < (PAGE_CACHE_SIZE / crypt_stat->extent_size); |
616 | extent_offset++) { | 583 | extent_offset++) { |
617 | loff_t offset; | 584 | rc = crypt_extent(crypt_stat, page, page, |
618 | 585 | extent_offset, DECRYPT); | |
619 | ecryptfs_lower_offset_for_extent( | ||
620 | &offset, ((page->index * (PAGE_CACHE_SIZE | ||
621 | / crypt_stat->extent_size)) | ||
622 | + extent_offset), crypt_stat); | ||
623 | rc = ecryptfs_read_lower(enc_extent_virt, offset, | ||
624 | crypt_stat->extent_size, | ||
625 | ecryptfs_inode); | ||
626 | if (rc < 0) { | ||
627 | ecryptfs_printk(KERN_ERR, "Error attempting " | ||
628 | "to read lower page; rc = [%d]" | ||
629 | "\n", rc); | ||
630 | goto out; | ||
631 | } | ||
632 | rc = ecryptfs_decrypt_extent(page, crypt_stat, enc_extent_page, | ||
633 | extent_offset); | ||
634 | if (rc) { | 586 | if (rc) { |
635 | printk(KERN_ERR "%s: Error encrypting extent; " | 587 | printk(KERN_ERR "%s: Error encrypting extent; " |
636 | "rc = [%d]\n", __func__, rc); | 588 | "rc = [%d]\n", __func__, rc); |
@@ -638,142 +590,9 @@ int ecryptfs_decrypt_page(struct page *page) | |||
638 | } | 590 | } |
639 | } | 591 | } |
640 | out: | 592 | out: |
641 | if (enc_extent_page) { | ||
642 | kunmap(enc_extent_page); | ||
643 | __free_page(enc_extent_page); | ||
644 | } | ||
645 | return rc; | 593 | return rc; |
646 | } | 594 | } |
647 | 595 | ||
648 | /** | ||
649 | * decrypt_scatterlist | ||
650 | * @crypt_stat: Cryptographic context | ||
651 | * @dest_sg: The destination scatterlist to decrypt into | ||
652 | * @src_sg: The source scatterlist to decrypt from | ||
653 | * @size: The number of bytes to decrypt | ||
654 | * @iv: The initialization vector to use for the decryption | ||
655 | * | ||
656 | * Returns the number of bytes decrypted; negative value on error | ||
657 | */ | ||
658 | static int decrypt_scatterlist(struct ecryptfs_crypt_stat *crypt_stat, | ||
659 | struct scatterlist *dest_sg, | ||
660 | struct scatterlist *src_sg, int size, | ||
661 | unsigned char *iv) | ||
662 | { | ||
663 | struct ablkcipher_request *req = NULL; | ||
664 | struct extent_crypt_result ecr; | ||
665 | int rc = 0; | ||
666 | |||
667 | BUG_ON(!crypt_stat || !crypt_stat->tfm | ||
668 | || !(crypt_stat->flags & ECRYPTFS_STRUCT_INITIALIZED)); | ||
669 | if (unlikely(ecryptfs_verbosity > 0)) { | ||
670 | ecryptfs_printk(KERN_DEBUG, "Key size [%zd]; key:\n", | ||
671 | crypt_stat->key_size); | ||
672 | ecryptfs_dump_hex(crypt_stat->key, | ||
673 | crypt_stat->key_size); | ||
674 | } | ||
675 | |||
676 | init_completion(&ecr.completion); | ||
677 | |||
678 | mutex_lock(&crypt_stat->cs_tfm_mutex); | ||
679 | req = ablkcipher_request_alloc(crypt_stat->tfm, GFP_NOFS); | ||
680 | if (!req) { | ||
681 | mutex_unlock(&crypt_stat->cs_tfm_mutex); | ||
682 | rc = -ENOMEM; | ||
683 | goto out; | ||
684 | } | ||
685 | |||
686 | ablkcipher_request_set_callback(req, | ||
687 | CRYPTO_TFM_REQ_MAY_BACKLOG | CRYPTO_TFM_REQ_MAY_SLEEP, | ||
688 | extent_crypt_complete, &ecr); | ||
689 | /* Consider doing this once, when the file is opened */ | ||
690 | if (!(crypt_stat->flags & ECRYPTFS_KEY_SET)) { | ||
691 | rc = crypto_ablkcipher_setkey(crypt_stat->tfm, crypt_stat->key, | ||
692 | crypt_stat->key_size); | ||
693 | if (rc) { | ||
694 | ecryptfs_printk(KERN_ERR, | ||
695 | "Error setting key; rc = [%d]\n", | ||
696 | rc); | ||
697 | mutex_unlock(&crypt_stat->cs_tfm_mutex); | ||
698 | rc = -EINVAL; | ||
699 | goto out; | ||
700 | } | ||
701 | crypt_stat->flags |= ECRYPTFS_KEY_SET; | ||
702 | } | ||
703 | mutex_unlock(&crypt_stat->cs_tfm_mutex); | ||
704 | ecryptfs_printk(KERN_DEBUG, "Decrypting [%d] bytes.\n", size); | ||
705 | ablkcipher_request_set_crypt(req, src_sg, dest_sg, size, iv); | ||
706 | rc = crypto_ablkcipher_decrypt(req); | ||
707 | if (rc == -EINPROGRESS || rc == -EBUSY) { | ||
708 | struct extent_crypt_result *ecr = req->base.data; | ||
709 | |||
710 | wait_for_completion(&ecr->completion); | ||
711 | rc = ecr->rc; | ||
712 | INIT_COMPLETION(ecr->completion); | ||
713 | } | ||
714 | out: | ||
715 | ablkcipher_request_free(req); | ||
716 | return rc; | ||
717 | |||
718 | } | ||
719 | |||
720 | /** | ||
721 | * ecryptfs_encrypt_page_offset | ||
722 | * @crypt_stat: The cryptographic context | ||
723 | * @dst_page: The page to encrypt into | ||
724 | * @dst_offset: The offset in the page to encrypt into | ||
725 | * @src_page: The page to encrypt from | ||
726 | * @src_offset: The offset in the page to encrypt from | ||
727 | * @size: The number of bytes to encrypt | ||
728 | * @iv: The initialization vector to use for the encryption | ||
729 | * | ||
730 | * Returns the number of bytes encrypted | ||
731 | */ | ||
732 | static int | ||
733 | ecryptfs_encrypt_page_offset(struct ecryptfs_crypt_stat *crypt_stat, | ||
734 | struct page *dst_page, int dst_offset, | ||
735 | struct page *src_page, int src_offset, int size, | ||
736 | unsigned char *iv) | ||
737 | { | ||
738 | struct scatterlist src_sg, dst_sg; | ||
739 | |||
740 | sg_init_table(&src_sg, 1); | ||
741 | sg_init_table(&dst_sg, 1); | ||
742 | |||
743 | sg_set_page(&src_sg, src_page, size, src_offset); | ||
744 | sg_set_page(&dst_sg, dst_page, size, dst_offset); | ||
745 | return encrypt_scatterlist(crypt_stat, &dst_sg, &src_sg, size, iv); | ||
746 | } | ||
747 | |||
748 | /** | ||
749 | * ecryptfs_decrypt_page_offset | ||
750 | * @crypt_stat: The cryptographic context | ||
751 | * @dst_page: The page to decrypt into | ||
752 | * @dst_offset: The offset in the page to decrypt into | ||
753 | * @src_page: The page to decrypt from | ||
754 | * @src_offset: The offset in the page to decrypt from | ||
755 | * @size: The number of bytes to decrypt | ||
756 | * @iv: The initialization vector to use for the decryption | ||
757 | * | ||
758 | * Returns the number of bytes decrypted | ||
759 | */ | ||
760 | static int | ||
761 | ecryptfs_decrypt_page_offset(struct ecryptfs_crypt_stat *crypt_stat, | ||
762 | struct page *dst_page, int dst_offset, | ||
763 | struct page *src_page, int src_offset, int size, | ||
764 | unsigned char *iv) | ||
765 | { | ||
766 | struct scatterlist src_sg, dst_sg; | ||
767 | |||
768 | sg_init_table(&src_sg, 1); | ||
769 | sg_set_page(&src_sg, src_page, size, src_offset); | ||
770 | |||
771 | sg_init_table(&dst_sg, 1); | ||
772 | sg_set_page(&dst_sg, dst_page, size, dst_offset); | ||
773 | |||
774 | return decrypt_scatterlist(crypt_stat, &dst_sg, &src_sg, size, iv); | ||
775 | } | ||
776 | |||
777 | #define ECRYPTFS_MAX_SCATTERLIST_LEN 4 | 596 | #define ECRYPTFS_MAX_SCATTERLIST_LEN 4 |
778 | 597 | ||
779 | /** | 598 | /** |
@@ -2243,12 +2062,11 @@ out: | |||
2243 | */ | 2062 | */ |
2244 | int ecryptfs_decode_and_decrypt_filename(char **plaintext_name, | 2063 | int ecryptfs_decode_and_decrypt_filename(char **plaintext_name, |
2245 | size_t *plaintext_name_size, | 2064 | size_t *plaintext_name_size, |
2246 | struct dentry *ecryptfs_dir_dentry, | 2065 | struct super_block *sb, |
2247 | const char *name, size_t name_size) | 2066 | const char *name, size_t name_size) |
2248 | { | 2067 | { |
2249 | struct ecryptfs_mount_crypt_stat *mount_crypt_stat = | 2068 | struct ecryptfs_mount_crypt_stat *mount_crypt_stat = |
2250 | &ecryptfs_superblock_to_private( | 2069 | &ecryptfs_superblock_to_private(sb)->mount_crypt_stat; |
2251 | ecryptfs_dir_dentry->d_sb)->mount_crypt_stat; | ||
2252 | char *decoded_name; | 2070 | char *decoded_name; |
2253 | size_t decoded_name_size; | 2071 | size_t decoded_name_size; |
2254 | size_t packet_size; | 2072 | size_t packet_size; |
diff --git a/fs/ecryptfs/ecryptfs_kernel.h b/fs/ecryptfs/ecryptfs_kernel.h index f622a733f7ad..df19d34a033b 100644 --- a/fs/ecryptfs/ecryptfs_kernel.h +++ b/fs/ecryptfs/ecryptfs_kernel.h | |||
@@ -575,7 +575,7 @@ int ecryptfs_initialize_file(struct dentry *ecryptfs_dentry, | |||
575 | struct inode *ecryptfs_inode); | 575 | struct inode *ecryptfs_inode); |
576 | int ecryptfs_decode_and_decrypt_filename(char **decrypted_name, | 576 | int ecryptfs_decode_and_decrypt_filename(char **decrypted_name, |
577 | size_t *decrypted_name_size, | 577 | size_t *decrypted_name_size, |
578 | struct dentry *ecryptfs_dentry, | 578 | struct super_block *sb, |
579 | const char *name, size_t name_size); | 579 | const char *name, size_t name_size); |
580 | int ecryptfs_fill_zeros(struct file *file, loff_t new_length); | 580 | int ecryptfs_fill_zeros(struct file *file, loff_t new_length); |
581 | int ecryptfs_encrypt_and_encode_filename( | 581 | int ecryptfs_encrypt_and_encode_filename( |
diff --git a/fs/ecryptfs/file.c b/fs/ecryptfs/file.c index a7abbea2c096..992cf95830b5 100644 --- a/fs/ecryptfs/file.c +++ b/fs/ecryptfs/file.c | |||
@@ -49,7 +49,7 @@ static ssize_t ecryptfs_read_update_atime(struct kiocb *iocb, | |||
49 | unsigned long nr_segs, loff_t pos) | 49 | unsigned long nr_segs, loff_t pos) |
50 | { | 50 | { |
51 | ssize_t rc; | 51 | ssize_t rc; |
52 | struct path lower; | 52 | struct path *path; |
53 | struct file *file = iocb->ki_filp; | 53 | struct file *file = iocb->ki_filp; |
54 | 54 | ||
55 | rc = generic_file_aio_read(iocb, iov, nr_segs, pos); | 55 | rc = generic_file_aio_read(iocb, iov, nr_segs, pos); |
@@ -60,17 +60,16 @@ static ssize_t ecryptfs_read_update_atime(struct kiocb *iocb, | |||
60 | if (-EIOCBQUEUED == rc) | 60 | if (-EIOCBQUEUED == rc) |
61 | rc = wait_on_sync_kiocb(iocb); | 61 | rc = wait_on_sync_kiocb(iocb); |
62 | if (rc >= 0) { | 62 | if (rc >= 0) { |
63 | lower.dentry = ecryptfs_dentry_to_lower(file->f_path.dentry); | 63 | path = ecryptfs_dentry_to_lower_path(file->f_path.dentry); |
64 | lower.mnt = ecryptfs_dentry_to_lower_mnt(file->f_path.dentry); | 64 | touch_atime(path); |
65 | touch_atime(&lower); | ||
66 | } | 65 | } |
67 | return rc; | 66 | return rc; |
68 | } | 67 | } |
69 | 68 | ||
70 | struct ecryptfs_getdents_callback { | 69 | struct ecryptfs_getdents_callback { |
71 | void *dirent; | 70 | struct dir_context ctx; |
72 | struct dentry *dentry; | 71 | struct dir_context *caller; |
73 | filldir_t filldir; | 72 | struct super_block *sb; |
74 | int filldir_called; | 73 | int filldir_called; |
75 | int entries_written; | 74 | int entries_written; |
76 | }; | 75 | }; |
@@ -88,7 +87,7 @@ ecryptfs_filldir(void *dirent, const char *lower_name, int lower_namelen, | |||
88 | 87 | ||
89 | buf->filldir_called++; | 88 | buf->filldir_called++; |
90 | rc = ecryptfs_decode_and_decrypt_filename(&name, &name_size, | 89 | rc = ecryptfs_decode_and_decrypt_filename(&name, &name_size, |
91 | buf->dentry, lower_name, | 90 | buf->sb, lower_name, |
92 | lower_namelen); | 91 | lower_namelen); |
93 | if (rc) { | 92 | if (rc) { |
94 | printk(KERN_ERR "%s: Error attempting to decode and decrypt " | 93 | printk(KERN_ERR "%s: Error attempting to decode and decrypt " |
@@ -96,9 +95,10 @@ ecryptfs_filldir(void *dirent, const char *lower_name, int lower_namelen, | |||
96 | rc); | 95 | rc); |
97 | goto out; | 96 | goto out; |
98 | } | 97 | } |
99 | rc = buf->filldir(buf->dirent, name, name_size, offset, ino, d_type); | 98 | buf->caller->pos = buf->ctx.pos; |
99 | rc = !dir_emit(buf->caller, name, name_size, ino, d_type); | ||
100 | kfree(name); | 100 | kfree(name); |
101 | if (rc >= 0) | 101 | if (!rc) |
102 | buf->entries_written++; | 102 | buf->entries_written++; |
103 | out: | 103 | out: |
104 | return rc; | 104 | return rc; |
@@ -107,27 +107,22 @@ out: | |||
107 | /** | 107 | /** |
108 | * ecryptfs_readdir | 108 | * ecryptfs_readdir |
109 | * @file: The eCryptfs directory file | 109 | * @file: The eCryptfs directory file |
110 | * @dirent: Directory entry handle | 110 | * @ctx: The actor to feed the entries to |
111 | * @filldir: The filldir callback function | ||
112 | */ | 111 | */ |
113 | static int ecryptfs_readdir(struct file *file, void *dirent, filldir_t filldir) | 112 | static int ecryptfs_readdir(struct file *file, struct dir_context *ctx) |
114 | { | 113 | { |
115 | int rc; | 114 | int rc; |
116 | struct file *lower_file; | 115 | struct file *lower_file; |
117 | struct inode *inode; | 116 | struct inode *inode = file_inode(file); |
118 | struct ecryptfs_getdents_callback buf; | 117 | struct ecryptfs_getdents_callback buf = { |
119 | 118 | .ctx.actor = ecryptfs_filldir, | |
119 | .caller = ctx, | ||
120 | .sb = inode->i_sb, | ||
121 | }; | ||
120 | lower_file = ecryptfs_file_to_lower(file); | 122 | lower_file = ecryptfs_file_to_lower(file); |
121 | lower_file->f_pos = file->f_pos; | 123 | lower_file->f_pos = ctx->pos; |
122 | inode = file_inode(file); | 124 | rc = iterate_dir(lower_file, &buf.ctx); |
123 | memset(&buf, 0, sizeof(buf)); | 125 | ctx->pos = buf.ctx.pos; |
124 | buf.dirent = dirent; | ||
125 | buf.dentry = file->f_path.dentry; | ||
126 | buf.filldir = filldir; | ||
127 | buf.filldir_called = 0; | ||
128 | buf.entries_written = 0; | ||
129 | rc = vfs_readdir(lower_file, ecryptfs_filldir, (void *)&buf); | ||
130 | file->f_pos = lower_file->f_pos; | ||
131 | if (rc < 0) | 126 | if (rc < 0) |
132 | goto out; | 127 | goto out; |
133 | if (buf.filldir_called && !buf.entries_written) | 128 | if (buf.filldir_called && !buf.entries_written) |
@@ -344,7 +339,7 @@ ecryptfs_compat_ioctl(struct file *file, unsigned int cmd, unsigned long arg) | |||
344 | #endif | 339 | #endif |
345 | 340 | ||
346 | const struct file_operations ecryptfs_dir_fops = { | 341 | const struct file_operations ecryptfs_dir_fops = { |
347 | .readdir = ecryptfs_readdir, | 342 | .iterate = ecryptfs_readdir, |
348 | .read = generic_read_dir, | 343 | .read = generic_read_dir, |
349 | .unlocked_ioctl = ecryptfs_unlocked_ioctl, | 344 | .unlocked_ioctl = ecryptfs_unlocked_ioctl, |
350 | #ifdef CONFIG_COMPAT | 345 | #ifdef CONFIG_COMPAT |
@@ -365,7 +360,7 @@ const struct file_operations ecryptfs_main_fops = { | |||
365 | .aio_read = ecryptfs_read_update_atime, | 360 | .aio_read = ecryptfs_read_update_atime, |
366 | .write = do_sync_write, | 361 | .write = do_sync_write, |
367 | .aio_write = generic_file_aio_write, | 362 | .aio_write = generic_file_aio_write, |
368 | .readdir = ecryptfs_readdir, | 363 | .iterate = ecryptfs_readdir, |
369 | .unlocked_ioctl = ecryptfs_unlocked_ioctl, | 364 | .unlocked_ioctl = ecryptfs_unlocked_ioctl, |
370 | #ifdef CONFIG_COMPAT | 365 | #ifdef CONFIG_COMPAT |
371 | .compat_ioctl = ecryptfs_compat_ioctl, | 366 | .compat_ioctl = ecryptfs_compat_ioctl, |
diff --git a/fs/ecryptfs/inode.c b/fs/ecryptfs/inode.c index 5eab400e2590..67e9b6339691 100644 --- a/fs/ecryptfs/inode.c +++ b/fs/ecryptfs/inode.c | |||
@@ -358,7 +358,7 @@ static int ecryptfs_lookup_interpose(struct dentry *dentry, | |||
358 | 358 | ||
359 | lower_mnt = mntget(ecryptfs_dentry_to_lower_mnt(dentry->d_parent)); | 359 | lower_mnt = mntget(ecryptfs_dentry_to_lower_mnt(dentry->d_parent)); |
360 | fsstack_copy_attr_atime(dir_inode, lower_dentry->d_parent->d_inode); | 360 | fsstack_copy_attr_atime(dir_inode, lower_dentry->d_parent->d_inode); |
361 | BUG_ON(!lower_dentry->d_count); | 361 | BUG_ON(!d_count(lower_dentry)); |
362 | 362 | ||
363 | ecryptfs_set_dentry_private(dentry, dentry_info); | 363 | ecryptfs_set_dentry_private(dentry, dentry_info); |
364 | ecryptfs_set_dentry_lower(dentry, lower_dentry); | 364 | ecryptfs_set_dentry_lower(dentry, lower_dentry); |
@@ -679,7 +679,7 @@ static int ecryptfs_readlink_lower(struct dentry *dentry, char **buf, | |||
679 | set_fs(old_fs); | 679 | set_fs(old_fs); |
680 | if (rc < 0) | 680 | if (rc < 0) |
681 | goto out; | 681 | goto out; |
682 | rc = ecryptfs_decode_and_decrypt_filename(buf, bufsiz, dentry, | 682 | rc = ecryptfs_decode_and_decrypt_filename(buf, bufsiz, dentry->d_sb, |
683 | lower_buf, rc); | 683 | lower_buf, rc); |
684 | out: | 684 | out: |
685 | kfree(lower_buf); | 685 | kfree(lower_buf); |
diff --git a/fs/ecryptfs/main.c b/fs/ecryptfs/main.c index e924cf45aad9..eb1c5979ecaf 100644 --- a/fs/ecryptfs/main.c +++ b/fs/ecryptfs/main.c | |||
@@ -120,16 +120,15 @@ static int ecryptfs_init_lower_file(struct dentry *dentry, | |||
120 | struct file **lower_file) | 120 | struct file **lower_file) |
121 | { | 121 | { |
122 | const struct cred *cred = current_cred(); | 122 | const struct cred *cred = current_cred(); |
123 | struct dentry *lower_dentry = ecryptfs_dentry_to_lower(dentry); | 123 | struct path *path = ecryptfs_dentry_to_lower_path(dentry); |
124 | struct vfsmount *lower_mnt = ecryptfs_dentry_to_lower_mnt(dentry); | ||
125 | int rc; | 124 | int rc; |
126 | 125 | ||
127 | rc = ecryptfs_privileged_open(lower_file, lower_dentry, lower_mnt, | 126 | rc = ecryptfs_privileged_open(lower_file, path->dentry, path->mnt, |
128 | cred); | 127 | cred); |
129 | if (rc) { | 128 | if (rc) { |
130 | printk(KERN_ERR "Error opening lower file " | 129 | printk(KERN_ERR "Error opening lower file " |
131 | "for lower_dentry [0x%p] and lower_mnt [0x%p]; " | 130 | "for lower_dentry [0x%p] and lower_mnt [0x%p]; " |
132 | "rc = [%d]\n", lower_dentry, lower_mnt, rc); | 131 | "rc = [%d]\n", path->dentry, path->mnt, rc); |
133 | (*lower_file) = NULL; | 132 | (*lower_file) = NULL; |
134 | } | 133 | } |
135 | return rc; | 134 | return rc; |
diff --git a/fs/ecryptfs/messaging.c b/fs/ecryptfs/messaging.c index 49ff8ea08f1c..e57380e5f6bd 100644 --- a/fs/ecryptfs/messaging.c +++ b/fs/ecryptfs/messaging.c | |||
@@ -247,14 +247,13 @@ int ecryptfs_process_response(struct ecryptfs_daemon *daemon, | |||
247 | goto unlock; | 247 | goto unlock; |
248 | } | 248 | } |
249 | msg_size = (sizeof(*msg) + msg->data_len); | 249 | msg_size = (sizeof(*msg) + msg->data_len); |
250 | msg_ctx->msg = kmalloc(msg_size, GFP_KERNEL); | 250 | msg_ctx->msg = kmemdup(msg, msg_size, GFP_KERNEL); |
251 | if (!msg_ctx->msg) { | 251 | if (!msg_ctx->msg) { |
252 | rc = -ENOMEM; | 252 | rc = -ENOMEM; |
253 | printk(KERN_ERR "%s: Failed to allocate [%zd] bytes of " | 253 | printk(KERN_ERR "%s: Failed to allocate [%zd] bytes of " |
254 | "GFP_KERNEL memory\n", __func__, msg_size); | 254 | "GFP_KERNEL memory\n", __func__, msg_size); |
255 | goto unlock; | 255 | goto unlock; |
256 | } | 256 | } |
257 | memcpy(msg_ctx->msg, msg, msg_size); | ||
258 | msg_ctx->state = ECRYPTFS_MSG_CTX_STATE_DONE; | 257 | msg_ctx->state = ECRYPTFS_MSG_CTX_STATE_DONE; |
259 | wake_up_process(msg_ctx->task); | 258 | wake_up_process(msg_ctx->task); |
260 | rc = 0; | 259 | rc = 0; |
diff --git a/fs/efivarfs/inode.c b/fs/efivarfs/inode.c index 7e787fb90293..07ab49745e31 100644 --- a/fs/efivarfs/inode.c +++ b/fs/efivarfs/inode.c | |||
@@ -155,20 +155,8 @@ static int efivarfs_unlink(struct inode *dir, struct dentry *dentry) | |||
155 | return 0; | 155 | return 0; |
156 | }; | 156 | }; |
157 | 157 | ||
158 | /* | ||
159 | * Handle negative dentry. | ||
160 | */ | ||
161 | static struct dentry *efivarfs_lookup(struct inode *dir, struct dentry *dentry, | ||
162 | unsigned int flags) | ||
163 | { | ||
164 | if (dentry->d_name.len > NAME_MAX) | ||
165 | return ERR_PTR(-ENAMETOOLONG); | ||
166 | d_add(dentry, NULL); | ||
167 | return NULL; | ||
168 | } | ||
169 | |||
170 | const struct inode_operations efivarfs_dir_inode_operations = { | 158 | const struct inode_operations efivarfs_dir_inode_operations = { |
171 | .lookup = efivarfs_lookup, | 159 | .lookup = simple_lookup, |
172 | .unlink = efivarfs_unlink, | 160 | .unlink = efivarfs_unlink, |
173 | .create = efivarfs_create, | 161 | .create = efivarfs_create, |
174 | }; | 162 | }; |
diff --git a/fs/efivarfs/super.c b/fs/efivarfs/super.c index 141aee31884f..a8766b880c07 100644 --- a/fs/efivarfs/super.c +++ b/fs/efivarfs/super.c | |||
@@ -45,8 +45,8 @@ static struct super_block *efivarfs_sb; | |||
45 | * So we need to perform a case-sensitive match on part 1 and a | 45 | * So we need to perform a case-sensitive match on part 1 and a |
46 | * case-insensitive match on part 2. | 46 | * case-insensitive match on part 2. |
47 | */ | 47 | */ |
48 | static int efivarfs_d_compare(const struct dentry *parent, const struct inode *pinode, | 48 | static int efivarfs_d_compare(const struct dentry *parent, |
49 | const struct dentry *dentry, const struct inode *inode, | 49 | const struct dentry *dentry, |
50 | unsigned int len, const char *str, | 50 | unsigned int len, const char *str, |
51 | const struct qstr *name) | 51 | const struct qstr *name) |
52 | { | 52 | { |
@@ -63,8 +63,7 @@ static int efivarfs_d_compare(const struct dentry *parent, const struct inode *p | |||
63 | return strncasecmp(name->name + guid, str + guid, EFI_VARIABLE_GUID_LEN); | 63 | return strncasecmp(name->name + guid, str + guid, EFI_VARIABLE_GUID_LEN); |
64 | } | 64 | } |
65 | 65 | ||
66 | static int efivarfs_d_hash(const struct dentry *dentry, | 66 | static int efivarfs_d_hash(const struct dentry *dentry, struct qstr *qstr) |
67 | const struct inode *inode, struct qstr *qstr) | ||
68 | { | 67 | { |
69 | unsigned long hash = init_name_hash(); | 68 | unsigned long hash = init_name_hash(); |
70 | const unsigned char *s = qstr->name; | 69 | const unsigned char *s = qstr->name; |
@@ -108,7 +107,7 @@ static struct dentry *efivarfs_alloc_dentry(struct dentry *parent, char *name) | |||
108 | q.name = name; | 107 | q.name = name; |
109 | q.len = strlen(name); | 108 | q.len = strlen(name); |
110 | 109 | ||
111 | err = efivarfs_d_hash(NULL, NULL, &q); | 110 | err = efivarfs_d_hash(NULL, &q); |
112 | if (err) | 111 | if (err) |
113 | return ERR_PTR(err); | 112 | return ERR_PTR(err); |
114 | 113 | ||
diff --git a/fs/efs/dir.c b/fs/efs/dir.c index 055a9e9ca747..b72307ccdf7a 100644 --- a/fs/efs/dir.c +++ b/fs/efs/dir.c | |||
@@ -7,40 +7,38 @@ | |||
7 | #include <linux/buffer_head.h> | 7 | #include <linux/buffer_head.h> |
8 | #include "efs.h" | 8 | #include "efs.h" |
9 | 9 | ||
10 | static int efs_readdir(struct file *, void *, filldir_t); | 10 | static int efs_readdir(struct file *, struct dir_context *); |
11 | 11 | ||
12 | const struct file_operations efs_dir_operations = { | 12 | const struct file_operations efs_dir_operations = { |
13 | .llseek = generic_file_llseek, | 13 | .llseek = generic_file_llseek, |
14 | .read = generic_read_dir, | 14 | .read = generic_read_dir, |
15 | .readdir = efs_readdir, | 15 | .iterate = efs_readdir, |
16 | }; | 16 | }; |
17 | 17 | ||
18 | const struct inode_operations efs_dir_inode_operations = { | 18 | const struct inode_operations efs_dir_inode_operations = { |
19 | .lookup = efs_lookup, | 19 | .lookup = efs_lookup, |
20 | }; | 20 | }; |
21 | 21 | ||
22 | static int efs_readdir(struct file *filp, void *dirent, filldir_t filldir) { | 22 | static int efs_readdir(struct file *file, struct dir_context *ctx) |
23 | struct inode *inode = file_inode(filp); | 23 | { |
24 | struct buffer_head *bh; | 24 | struct inode *inode = file_inode(file); |
25 | |||
26 | struct efs_dir *dirblock; | ||
27 | struct efs_dentry *dirslot; | ||
28 | efs_ino_t inodenum; | ||
29 | efs_block_t block; | 25 | efs_block_t block; |
30 | int slot, namelen; | 26 | int slot; |
31 | char *nameptr; | ||
32 | 27 | ||
33 | if (inode->i_size & (EFS_DIRBSIZE-1)) | 28 | if (inode->i_size & (EFS_DIRBSIZE-1)) |
34 | printk(KERN_WARNING "EFS: WARNING: readdir(): directory size not a multiple of EFS_DIRBSIZE\n"); | 29 | printk(KERN_WARNING "EFS: WARNING: readdir(): directory size not a multiple of EFS_DIRBSIZE\n"); |
35 | 30 | ||
36 | /* work out where this entry can be found */ | 31 | /* work out where this entry can be found */ |
37 | block = filp->f_pos >> EFS_DIRBSIZE_BITS; | 32 | block = ctx->pos >> EFS_DIRBSIZE_BITS; |
38 | 33 | ||
39 | /* each block contains at most 256 slots */ | 34 | /* each block contains at most 256 slots */ |
40 | slot = filp->f_pos & 0xff; | 35 | slot = ctx->pos & 0xff; |
41 | 36 | ||
42 | /* look at all blocks */ | 37 | /* look at all blocks */ |
43 | while (block < inode->i_blocks) { | 38 | while (block < inode->i_blocks) { |
39 | struct efs_dir *dirblock; | ||
40 | struct buffer_head *bh; | ||
41 | |||
44 | /* read the dir block */ | 42 | /* read the dir block */ |
45 | bh = sb_bread(inode->i_sb, efs_bmap(inode, block)); | 43 | bh = sb_bread(inode->i_sb, efs_bmap(inode, block)); |
46 | 44 | ||
@@ -57,11 +55,14 @@ static int efs_readdir(struct file *filp, void *dirent, filldir_t filldir) { | |||
57 | break; | 55 | break; |
58 | } | 56 | } |
59 | 57 | ||
60 | while (slot < dirblock->slots) { | 58 | for (; slot < dirblock->slots; slot++) { |
61 | if (dirblock->space[slot] == 0) { | 59 | struct efs_dentry *dirslot; |
62 | slot++; | 60 | efs_ino_t inodenum; |
61 | const char *nameptr; | ||
62 | int namelen; | ||
63 | |||
64 | if (dirblock->space[slot] == 0) | ||
63 | continue; | 65 | continue; |
64 | } | ||
65 | 66 | ||
66 | dirslot = (struct efs_dentry *) (((char *) bh->b_data) + EFS_SLOTAT(dirblock, slot)); | 67 | dirslot = (struct efs_dentry *) (((char *) bh->b_data) + EFS_SLOTAT(dirblock, slot)); |
67 | 68 | ||
@@ -72,39 +73,29 @@ static int efs_readdir(struct file *filp, void *dirent, filldir_t filldir) { | |||
72 | #ifdef DEBUG | 73 | #ifdef DEBUG |
73 | printk(KERN_DEBUG "EFS: readdir(): block %d slot %d/%d: inode %u, name \"%s\", namelen %u\n", block, slot, dirblock->slots-1, inodenum, nameptr, namelen); | 74 | printk(KERN_DEBUG "EFS: readdir(): block %d slot %d/%d: inode %u, name \"%s\", namelen %u\n", block, slot, dirblock->slots-1, inodenum, nameptr, namelen); |
74 | #endif | 75 | #endif |
75 | if (namelen > 0) { | 76 | if (!namelen) |
76 | /* found the next entry */ | 77 | continue; |
77 | filp->f_pos = (block << EFS_DIRBSIZE_BITS) | slot; | 78 | /* found the next entry */ |
78 | 79 | ctx->pos = (block << EFS_DIRBSIZE_BITS) | slot; | |
79 | /* copy filename and data in dirslot */ | 80 | |
80 | filldir(dirent, nameptr, namelen, filp->f_pos, inodenum, DT_UNKNOWN); | 81 | /* sanity check */ |
81 | 82 | if (nameptr - (char *) dirblock + namelen > EFS_DIRBSIZE) { | |
82 | /* sanity check */ | 83 | printk(KERN_WARNING "EFS: directory entry %d exceeds directory block\n", slot); |
83 | if (nameptr - (char *) dirblock + namelen > EFS_DIRBSIZE) { | 84 | continue; |
84 | printk(KERN_WARNING "EFS: directory entry %d exceeds directory block\n", slot); | 85 | } |
85 | slot++; | 86 | |
86 | continue; | 87 | /* copy filename and data in dirslot */ |
87 | } | 88 | if (!dir_emit(ctx, nameptr, namelen, inodenum, DT_UNKNOWN)) { |
88 | |||
89 | /* store position of next slot */ | ||
90 | if (++slot == dirblock->slots) { | ||
91 | slot = 0; | ||
92 | block++; | ||
93 | } | ||
94 | brelse(bh); | 89 | brelse(bh); |
95 | filp->f_pos = (block << EFS_DIRBSIZE_BITS) | slot; | 90 | return 0; |
96 | goto out; | ||
97 | } | 91 | } |
98 | slot++; | ||
99 | } | 92 | } |
100 | brelse(bh); | 93 | brelse(bh); |
101 | 94 | ||
102 | slot = 0; | 95 | slot = 0; |
103 | block++; | 96 | block++; |
104 | } | 97 | } |
105 | 98 | ctx->pos = (block << EFS_DIRBSIZE_BITS) | slot; | |
106 | filp->f_pos = (block << EFS_DIRBSIZE_BITS) | slot; | ||
107 | out: | ||
108 | return 0; | 99 | return 0; |
109 | } | 100 | } |
110 | 101 | ||
diff --git a/fs/eventpoll.c b/fs/eventpoll.c index deecc7294a67..9ad17b15b454 100644 --- a/fs/eventpoll.c +++ b/fs/eventpoll.c | |||
@@ -34,6 +34,7 @@ | |||
34 | #include <linux/mutex.h> | 34 | #include <linux/mutex.h> |
35 | #include <linux/anon_inodes.h> | 35 | #include <linux/anon_inodes.h> |
36 | #include <linux/device.h> | 36 | #include <linux/device.h> |
37 | #include <linux/freezer.h> | ||
37 | #include <asm/uaccess.h> | 38 | #include <asm/uaccess.h> |
38 | #include <asm/io.h> | 39 | #include <asm/io.h> |
39 | #include <asm/mman.h> | 40 | #include <asm/mman.h> |
@@ -1602,7 +1603,8 @@ fetch_events: | |||
1602 | } | 1603 | } |
1603 | 1604 | ||
1604 | spin_unlock_irqrestore(&ep->lock, flags); | 1605 | spin_unlock_irqrestore(&ep->lock, flags); |
1605 | if (!schedule_hrtimeout_range(to, slack, HRTIMER_MODE_ABS)) | 1606 | if (!freezable_schedule_hrtimeout_range(to, slack, |
1607 | HRTIMER_MODE_ABS)) | ||
1606 | timed_out = 1; | 1608 | timed_out = 1; |
1607 | 1609 | ||
1608 | spin_lock_irqsave(&ep->lock, flags); | 1610 | spin_lock_irqsave(&ep->lock, flags); |
@@ -1975,8 +1977,8 @@ SYSCALL_DEFINE6(epoll_pwait, int, epfd, struct epoll_event __user *, events, | |||
1975 | return -EINVAL; | 1977 | return -EINVAL; |
1976 | if (copy_from_user(&ksigmask, sigmask, sizeof(ksigmask))) | 1978 | if (copy_from_user(&ksigmask, sigmask, sizeof(ksigmask))) |
1977 | return -EFAULT; | 1979 | return -EFAULT; |
1978 | sigdelsetmask(&ksigmask, sigmask(SIGKILL) | sigmask(SIGSTOP)); | 1980 | sigsaved = current->blocked; |
1979 | sigprocmask(SIG_SETMASK, &ksigmask, &sigsaved); | 1981 | set_current_blocked(&ksigmask); |
1980 | } | 1982 | } |
1981 | 1983 | ||
1982 | error = sys_epoll_wait(epfd, events, maxevents, timeout); | 1984 | error = sys_epoll_wait(epfd, events, maxevents, timeout); |
@@ -1993,7 +1995,7 @@ SYSCALL_DEFINE6(epoll_pwait, int, epfd, struct epoll_event __user *, events, | |||
1993 | sizeof(sigsaved)); | 1995 | sizeof(sigsaved)); |
1994 | set_restore_sigmask(); | 1996 | set_restore_sigmask(); |
1995 | } else | 1997 | } else |
1996 | sigprocmask(SIG_SETMASK, &sigsaved, NULL); | 1998 | set_current_blocked(&sigsaved); |
1997 | } | 1999 | } |
1998 | 2000 | ||
1999 | return error; | 2001 | return error; |
@@ -2020,8 +2022,8 @@ COMPAT_SYSCALL_DEFINE6(epoll_pwait, int, epfd, | |||
2020 | if (copy_from_user(&csigmask, sigmask, sizeof(csigmask))) | 2022 | if (copy_from_user(&csigmask, sigmask, sizeof(csigmask))) |
2021 | return -EFAULT; | 2023 | return -EFAULT; |
2022 | sigset_from_compat(&ksigmask, &csigmask); | 2024 | sigset_from_compat(&ksigmask, &csigmask); |
2023 | sigdelsetmask(&ksigmask, sigmask(SIGKILL) | sigmask(SIGSTOP)); | 2025 | sigsaved = current->blocked; |
2024 | sigprocmask(SIG_SETMASK, &ksigmask, &sigsaved); | 2026 | set_current_blocked(&ksigmask); |
2025 | } | 2027 | } |
2026 | 2028 | ||
2027 | err = sys_epoll_wait(epfd, events, maxevents, timeout); | 2029 | err = sys_epoll_wait(epfd, events, maxevents, timeout); |
@@ -2038,7 +2040,7 @@ COMPAT_SYSCALL_DEFINE6(epoll_pwait, int, epfd, | |||
2038 | sizeof(sigsaved)); | 2040 | sizeof(sigsaved)); |
2039 | set_restore_sigmask(); | 2041 | set_restore_sigmask(); |
2040 | } else | 2042 | } else |
2041 | sigprocmask(SIG_SETMASK, &sigsaved, NULL); | 2043 | set_current_blocked(&sigsaved); |
2042 | } | 2044 | } |
2043 | 2045 | ||
2044 | return err; | 2046 | return err; |
@@ -110,13 +110,14 @@ SYSCALL_DEFINE1(uselib, const char __user *, library) | |||
110 | static const struct open_flags uselib_flags = { | 110 | static const struct open_flags uselib_flags = { |
111 | .open_flag = O_LARGEFILE | O_RDONLY | __FMODE_EXEC, | 111 | .open_flag = O_LARGEFILE | O_RDONLY | __FMODE_EXEC, |
112 | .acc_mode = MAY_READ | MAY_EXEC | MAY_OPEN, | 112 | .acc_mode = MAY_READ | MAY_EXEC | MAY_OPEN, |
113 | .intent = LOOKUP_OPEN | 113 | .intent = LOOKUP_OPEN, |
114 | .lookup_flags = LOOKUP_FOLLOW, | ||
114 | }; | 115 | }; |
115 | 116 | ||
116 | if (IS_ERR(tmp)) | 117 | if (IS_ERR(tmp)) |
117 | goto out; | 118 | goto out; |
118 | 119 | ||
119 | file = do_filp_open(AT_FDCWD, tmp, &uselib_flags, LOOKUP_FOLLOW); | 120 | file = do_filp_open(AT_FDCWD, tmp, &uselib_flags); |
120 | putname(tmp); | 121 | putname(tmp); |
121 | error = PTR_ERR(file); | 122 | error = PTR_ERR(file); |
122 | if (IS_ERR(file)) | 123 | if (IS_ERR(file)) |
@@ -756,10 +757,11 @@ struct file *open_exec(const char *name) | |||
756 | static const struct open_flags open_exec_flags = { | 757 | static const struct open_flags open_exec_flags = { |
757 | .open_flag = O_LARGEFILE | O_RDONLY | __FMODE_EXEC, | 758 | .open_flag = O_LARGEFILE | O_RDONLY | __FMODE_EXEC, |
758 | .acc_mode = MAY_EXEC | MAY_OPEN, | 759 | .acc_mode = MAY_EXEC | MAY_OPEN, |
759 | .intent = LOOKUP_OPEN | 760 | .intent = LOOKUP_OPEN, |
761 | .lookup_flags = LOOKUP_FOLLOW, | ||
760 | }; | 762 | }; |
761 | 763 | ||
762 | file = do_filp_open(AT_FDCWD, &tmp, &open_exec_flags, LOOKUP_FOLLOW); | 764 | file = do_filp_open(AT_FDCWD, &tmp, &open_exec_flags); |
763 | if (IS_ERR(file)) | 765 | if (IS_ERR(file)) |
764 | goto out; | 766 | goto out; |
765 | 767 | ||
@@ -930,6 +932,7 @@ static int de_thread(struct task_struct *tsk) | |||
930 | * also take its birthdate (always earlier than our own). | 932 | * also take its birthdate (always earlier than our own). |
931 | */ | 933 | */ |
932 | tsk->start_time = leader->start_time; | 934 | tsk->start_time = leader->start_time; |
935 | tsk->real_start_time = leader->real_start_time; | ||
933 | 936 | ||
934 | BUG_ON(!same_thread_group(leader, tsk)); | 937 | BUG_ON(!same_thread_group(leader, tsk)); |
935 | BUG_ON(has_group_leader_pid(tsk)); | 938 | BUG_ON(has_group_leader_pid(tsk)); |
@@ -945,9 +948,8 @@ static int de_thread(struct task_struct *tsk) | |||
945 | * Note: The old leader also uses this pid until release_task | 948 | * Note: The old leader also uses this pid until release_task |
946 | * is called. Odd but simple and correct. | 949 | * is called. Odd but simple and correct. |
947 | */ | 950 | */ |
948 | detach_pid(tsk, PIDTYPE_PID); | ||
949 | tsk->pid = leader->pid; | 951 | tsk->pid = leader->pid; |
950 | attach_pid(tsk, PIDTYPE_PID, task_pid(leader)); | 952 | change_pid(tsk, PIDTYPE_PID, task_pid(leader)); |
951 | transfer_pid(leader, tsk, PIDTYPE_PGID); | 953 | transfer_pid(leader, tsk, PIDTYPE_PGID); |
952 | transfer_pid(leader, tsk, PIDTYPE_SID); | 954 | transfer_pid(leader, tsk, PIDTYPE_SID); |
953 | 955 | ||
@@ -1463,7 +1465,6 @@ static int do_execve_common(const char *filename, | |||
1463 | struct files_struct *displaced; | 1465 | struct files_struct *displaced; |
1464 | bool clear_in_exec; | 1466 | bool clear_in_exec; |
1465 | int retval; | 1467 | int retval; |
1466 | const struct cred *cred = current_cred(); | ||
1467 | 1468 | ||
1468 | /* | 1469 | /* |
1469 | * We move the actual failure in case of RLIMIT_NPROC excess from | 1470 | * We move the actual failure in case of RLIMIT_NPROC excess from |
@@ -1472,7 +1473,7 @@ static int do_execve_common(const char *filename, | |||
1472 | * whether NPROC limit is still exceeded. | 1473 | * whether NPROC limit is still exceeded. |
1473 | */ | 1474 | */ |
1474 | if ((current->flags & PF_NPROC_EXCEEDED) && | 1475 | if ((current->flags & PF_NPROC_EXCEEDED) && |
1475 | atomic_read(&cred->user->processes) > rlimit(RLIMIT_NPROC)) { | 1476 | atomic_read(¤t_user()->processes) > rlimit(RLIMIT_NPROC)) { |
1476 | retval = -EAGAIN; | 1477 | retval = -EAGAIN; |
1477 | goto out_ret; | 1478 | goto out_ret; |
1478 | } | 1479 | } |
diff --git a/fs/exofs/dir.c b/fs/exofs/dir.c index 46375896cfc0..49f51ab4caac 100644 --- a/fs/exofs/dir.c +++ b/fs/exofs/dir.c | |||
@@ -239,22 +239,19 @@ void exofs_set_de_type(struct exofs_dir_entry *de, struct inode *inode) | |||
239 | } | 239 | } |
240 | 240 | ||
241 | static int | 241 | static int |
242 | exofs_readdir(struct file *filp, void *dirent, filldir_t filldir) | 242 | exofs_readdir(struct file *file, struct dir_context *ctx) |
243 | { | 243 | { |
244 | loff_t pos = filp->f_pos; | 244 | loff_t pos = ctx->pos; |
245 | struct inode *inode = file_inode(filp); | 245 | struct inode *inode = file_inode(file); |
246 | unsigned int offset = pos & ~PAGE_CACHE_MASK; | 246 | unsigned int offset = pos & ~PAGE_CACHE_MASK; |
247 | unsigned long n = pos >> PAGE_CACHE_SHIFT; | 247 | unsigned long n = pos >> PAGE_CACHE_SHIFT; |
248 | unsigned long npages = dir_pages(inode); | 248 | unsigned long npages = dir_pages(inode); |
249 | unsigned chunk_mask = ~(exofs_chunk_size(inode)-1); | 249 | unsigned chunk_mask = ~(exofs_chunk_size(inode)-1); |
250 | unsigned char *types = NULL; | 250 | int need_revalidate = (file->f_version != inode->i_version); |
251 | int need_revalidate = (filp->f_version != inode->i_version); | ||
252 | 251 | ||
253 | if (pos > inode->i_size - EXOFS_DIR_REC_LEN(1)) | 252 | if (pos > inode->i_size - EXOFS_DIR_REC_LEN(1)) |
254 | return 0; | 253 | return 0; |
255 | 254 | ||
256 | types = exofs_filetype_table; | ||
257 | |||
258 | for ( ; n < npages; n++, offset = 0) { | 255 | for ( ; n < npages; n++, offset = 0) { |
259 | char *kaddr, *limit; | 256 | char *kaddr, *limit; |
260 | struct exofs_dir_entry *de; | 257 | struct exofs_dir_entry *de; |
@@ -263,7 +260,7 @@ exofs_readdir(struct file *filp, void *dirent, filldir_t filldir) | |||
263 | if (IS_ERR(page)) { | 260 | if (IS_ERR(page)) { |
264 | EXOFS_ERR("ERROR: bad page in directory(0x%lx)\n", | 261 | EXOFS_ERR("ERROR: bad page in directory(0x%lx)\n", |
265 | inode->i_ino); | 262 | inode->i_ino); |
266 | filp->f_pos += PAGE_CACHE_SIZE - offset; | 263 | ctx->pos += PAGE_CACHE_SIZE - offset; |
267 | return PTR_ERR(page); | 264 | return PTR_ERR(page); |
268 | } | 265 | } |
269 | kaddr = page_address(page); | 266 | kaddr = page_address(page); |
@@ -271,9 +268,9 @@ exofs_readdir(struct file *filp, void *dirent, filldir_t filldir) | |||
271 | if (offset) { | 268 | if (offset) { |
272 | offset = exofs_validate_entry(kaddr, offset, | 269 | offset = exofs_validate_entry(kaddr, offset, |
273 | chunk_mask); | 270 | chunk_mask); |
274 | filp->f_pos = (n<<PAGE_CACHE_SHIFT) + offset; | 271 | ctx->pos = (n<<PAGE_CACHE_SHIFT) + offset; |
275 | } | 272 | } |
276 | filp->f_version = inode->i_version; | 273 | file->f_version = inode->i_version; |
277 | need_revalidate = 0; | 274 | need_revalidate = 0; |
278 | } | 275 | } |
279 | de = (struct exofs_dir_entry *)(kaddr + offset); | 276 | de = (struct exofs_dir_entry *)(kaddr + offset); |
@@ -288,27 +285,24 @@ exofs_readdir(struct file *filp, void *dirent, filldir_t filldir) | |||
288 | return -EIO; | 285 | return -EIO; |
289 | } | 286 | } |
290 | if (de->inode_no) { | 287 | if (de->inode_no) { |
291 | int over; | 288 | unsigned char t; |
292 | unsigned char d_type = DT_UNKNOWN; | ||
293 | 289 | ||
294 | if (types && de->file_type < EXOFS_FT_MAX) | 290 | if (de->file_type < EXOFS_FT_MAX) |
295 | d_type = types[de->file_type]; | 291 | t = exofs_filetype_table[de->file_type]; |
292 | else | ||
293 | t = DT_UNKNOWN; | ||
296 | 294 | ||
297 | offset = (char *)de - kaddr; | 295 | if (!dir_emit(ctx, de->name, de->name_len, |
298 | over = filldir(dirent, de->name, de->name_len, | ||
299 | (n<<PAGE_CACHE_SHIFT) | offset, | ||
300 | le64_to_cpu(de->inode_no), | 296 | le64_to_cpu(de->inode_no), |
301 | d_type); | 297 | t)) { |
302 | if (over) { | ||
303 | exofs_put_page(page); | 298 | exofs_put_page(page); |
304 | return 0; | 299 | return 0; |
305 | } | 300 | } |
306 | } | 301 | } |
307 | filp->f_pos += le16_to_cpu(de->rec_len); | 302 | ctx->pos += le16_to_cpu(de->rec_len); |
308 | } | 303 | } |
309 | exofs_put_page(page); | 304 | exofs_put_page(page); |
310 | } | 305 | } |
311 | |||
312 | return 0; | 306 | return 0; |
313 | } | 307 | } |
314 | 308 | ||
@@ -669,5 +663,5 @@ not_empty: | |||
669 | const struct file_operations exofs_dir_operations = { | 663 | const struct file_operations exofs_dir_operations = { |
670 | .llseek = generic_file_llseek, | 664 | .llseek = generic_file_llseek, |
671 | .read = generic_read_dir, | 665 | .read = generic_read_dir, |
672 | .readdir = exofs_readdir, | 666 | .iterate = exofs_readdir, |
673 | }; | 667 | }; |
diff --git a/fs/exofs/inode.c b/fs/exofs/inode.c index d1f80abd8828..2ec8eb1ab269 100644 --- a/fs/exofs/inode.c +++ b/fs/exofs/inode.c | |||
@@ -953,9 +953,11 @@ static int exofs_releasepage(struct page *page, gfp_t gfp) | |||
953 | return 0; | 953 | return 0; |
954 | } | 954 | } |
955 | 955 | ||
956 | static void exofs_invalidatepage(struct page *page, unsigned long offset) | 956 | static void exofs_invalidatepage(struct page *page, unsigned int offset, |
957 | unsigned int length) | ||
957 | { | 958 | { |
958 | EXOFS_DBGMSG("page 0x%lx offset 0x%lx\n", page->index, offset); | 959 | EXOFS_DBGMSG("page 0x%lx offset 0x%x length 0x%x\n", |
960 | page->index, offset, length); | ||
959 | WARN_ON(1); | 961 | WARN_ON(1); |
960 | } | 962 | } |
961 | 963 | ||
diff --git a/fs/exportfs/expfs.c b/fs/exportfs/expfs.c index 262fc9940982..293bc2e47a73 100644 --- a/fs/exportfs/expfs.c +++ b/fs/exportfs/expfs.c | |||
@@ -212,6 +212,7 @@ reconnect_path(struct vfsmount *mnt, struct dentry *target_dir, char *nbuf) | |||
212 | } | 212 | } |
213 | 213 | ||
214 | struct getdents_callback { | 214 | struct getdents_callback { |
215 | struct dir_context ctx; | ||
215 | char *name; /* name that was found. It already points to a | 216 | char *name; /* name that was found. It already points to a |
216 | buffer NAME_MAX+1 is size */ | 217 | buffer NAME_MAX+1 is size */ |
217 | unsigned long ino; /* the inum we are looking for */ | 218 | unsigned long ino; /* the inum we are looking for */ |
@@ -254,7 +255,11 @@ static int get_name(const struct path *path, char *name, struct dentry *child) | |||
254 | struct inode *dir = path->dentry->d_inode; | 255 | struct inode *dir = path->dentry->d_inode; |
255 | int error; | 256 | int error; |
256 | struct file *file; | 257 | struct file *file; |
257 | struct getdents_callback buffer; | 258 | struct getdents_callback buffer = { |
259 | .ctx.actor = filldir_one, | ||
260 | .name = name, | ||
261 | .ino = child->d_inode->i_ino | ||
262 | }; | ||
258 | 263 | ||
259 | error = -ENOTDIR; | 264 | error = -ENOTDIR; |
260 | if (!dir || !S_ISDIR(dir->i_mode)) | 265 | if (!dir || !S_ISDIR(dir->i_mode)) |
@@ -271,17 +276,14 @@ static int get_name(const struct path *path, char *name, struct dentry *child) | |||
271 | goto out; | 276 | goto out; |
272 | 277 | ||
273 | error = -EINVAL; | 278 | error = -EINVAL; |
274 | if (!file->f_op->readdir) | 279 | if (!file->f_op->iterate) |
275 | goto out_close; | 280 | goto out_close; |
276 | 281 | ||
277 | buffer.name = name; | ||
278 | buffer.ino = child->d_inode->i_ino; | ||
279 | buffer.found = 0; | ||
280 | buffer.sequence = 0; | 282 | buffer.sequence = 0; |
281 | while (1) { | 283 | while (1) { |
282 | int old_seq = buffer.sequence; | 284 | int old_seq = buffer.sequence; |
283 | 285 | ||
284 | error = vfs_readdir(file, filldir_one, &buffer); | 286 | error = iterate_dir(file, &buffer.ctx); |
285 | if (buffer.found) { | 287 | if (buffer.found) { |
286 | error = 0; | 288 | error = 0; |
287 | break; | 289 | break; |
diff --git a/fs/ext2/dir.c b/fs/ext2/dir.c index 4237722bfd27..6e1d4ab09d72 100644 --- a/fs/ext2/dir.c +++ b/fs/ext2/dir.c | |||
@@ -287,17 +287,17 @@ static inline void ext2_set_de_type(ext2_dirent *de, struct inode *inode) | |||
287 | } | 287 | } |
288 | 288 | ||
289 | static int | 289 | static int |
290 | ext2_readdir (struct file * filp, void * dirent, filldir_t filldir) | 290 | ext2_readdir(struct file *file, struct dir_context *ctx) |
291 | { | 291 | { |
292 | loff_t pos = filp->f_pos; | 292 | loff_t pos = ctx->pos; |
293 | struct inode *inode = file_inode(filp); | 293 | struct inode *inode = file_inode(file); |
294 | struct super_block *sb = inode->i_sb; | 294 | struct super_block *sb = inode->i_sb; |
295 | unsigned int offset = pos & ~PAGE_CACHE_MASK; | 295 | unsigned int offset = pos & ~PAGE_CACHE_MASK; |
296 | unsigned long n = pos >> PAGE_CACHE_SHIFT; | 296 | unsigned long n = pos >> PAGE_CACHE_SHIFT; |
297 | unsigned long npages = dir_pages(inode); | 297 | unsigned long npages = dir_pages(inode); |
298 | unsigned chunk_mask = ~(ext2_chunk_size(inode)-1); | 298 | unsigned chunk_mask = ~(ext2_chunk_size(inode)-1); |
299 | unsigned char *types = NULL; | 299 | unsigned char *types = NULL; |
300 | int need_revalidate = filp->f_version != inode->i_version; | 300 | int need_revalidate = file->f_version != inode->i_version; |
301 | 301 | ||
302 | if (pos > inode->i_size - EXT2_DIR_REC_LEN(1)) | 302 | if (pos > inode->i_size - EXT2_DIR_REC_LEN(1)) |
303 | return 0; | 303 | return 0; |
@@ -314,16 +314,16 @@ ext2_readdir (struct file * filp, void * dirent, filldir_t filldir) | |||
314 | ext2_error(sb, __func__, | 314 | ext2_error(sb, __func__, |
315 | "bad page in #%lu", | 315 | "bad page in #%lu", |
316 | inode->i_ino); | 316 | inode->i_ino); |
317 | filp->f_pos += PAGE_CACHE_SIZE - offset; | 317 | ctx->pos += PAGE_CACHE_SIZE - offset; |
318 | return PTR_ERR(page); | 318 | return PTR_ERR(page); |
319 | } | 319 | } |
320 | kaddr = page_address(page); | 320 | kaddr = page_address(page); |
321 | if (unlikely(need_revalidate)) { | 321 | if (unlikely(need_revalidate)) { |
322 | if (offset) { | 322 | if (offset) { |
323 | offset = ext2_validate_entry(kaddr, offset, chunk_mask); | 323 | offset = ext2_validate_entry(kaddr, offset, chunk_mask); |
324 | filp->f_pos = (n<<PAGE_CACHE_SHIFT) + offset; | 324 | ctx->pos = (n<<PAGE_CACHE_SHIFT) + offset; |
325 | } | 325 | } |
326 | filp->f_version = inode->i_version; | 326 | file->f_version = inode->i_version; |
327 | need_revalidate = 0; | 327 | need_revalidate = 0; |
328 | } | 328 | } |
329 | de = (ext2_dirent *)(kaddr+offset); | 329 | de = (ext2_dirent *)(kaddr+offset); |
@@ -336,22 +336,19 @@ ext2_readdir (struct file * filp, void * dirent, filldir_t filldir) | |||
336 | return -EIO; | 336 | return -EIO; |
337 | } | 337 | } |
338 | if (de->inode) { | 338 | if (de->inode) { |
339 | int over; | ||
340 | unsigned char d_type = DT_UNKNOWN; | 339 | unsigned char d_type = DT_UNKNOWN; |
341 | 340 | ||
342 | if (types && de->file_type < EXT2_FT_MAX) | 341 | if (types && de->file_type < EXT2_FT_MAX) |
343 | d_type = types[de->file_type]; | 342 | d_type = types[de->file_type]; |
344 | 343 | ||
345 | offset = (char *)de - kaddr; | 344 | if (!dir_emit(ctx, de->name, de->name_len, |
346 | over = filldir(dirent, de->name, de->name_len, | 345 | le32_to_cpu(de->inode), |
347 | (n<<PAGE_CACHE_SHIFT) | offset, | 346 | d_type)) { |
348 | le32_to_cpu(de->inode), d_type); | ||
349 | if (over) { | ||
350 | ext2_put_page(page); | 347 | ext2_put_page(page); |
351 | return 0; | 348 | return 0; |
352 | } | 349 | } |
353 | } | 350 | } |
354 | filp->f_pos += ext2_rec_len_from_disk(de->rec_len); | 351 | ctx->pos += ext2_rec_len_from_disk(de->rec_len); |
355 | } | 352 | } |
356 | ext2_put_page(page); | 353 | ext2_put_page(page); |
357 | } | 354 | } |
@@ -724,7 +721,7 @@ not_empty: | |||
724 | const struct file_operations ext2_dir_operations = { | 721 | const struct file_operations ext2_dir_operations = { |
725 | .llseek = generic_file_llseek, | 722 | .llseek = generic_file_llseek, |
726 | .read = generic_read_dir, | 723 | .read = generic_read_dir, |
727 | .readdir = ext2_readdir, | 724 | .iterate = ext2_readdir, |
728 | .unlocked_ioctl = ext2_ioctl, | 725 | .unlocked_ioctl = ext2_ioctl, |
729 | #ifdef CONFIG_COMPAT | 726 | #ifdef CONFIG_COMPAT |
730 | .compat_ioctl = ext2_compat_ioctl, | 727 | .compat_ioctl = ext2_compat_ioctl, |
diff --git a/fs/ext2/namei.c b/fs/ext2/namei.c index 73b0d9519836..256dd5f4c1c4 100644 --- a/fs/ext2/namei.c +++ b/fs/ext2/namei.c | |||
@@ -119,6 +119,29 @@ static int ext2_create (struct inode * dir, struct dentry * dentry, umode_t mode | |||
119 | return ext2_add_nondir(dentry, inode); | 119 | return ext2_add_nondir(dentry, inode); |
120 | } | 120 | } |
121 | 121 | ||
122 | static int ext2_tmpfile(struct inode *dir, struct dentry *dentry, umode_t mode) | ||
123 | { | ||
124 | struct inode *inode = ext2_new_inode(dir, mode, NULL); | ||
125 | if (IS_ERR(inode)) | ||
126 | return PTR_ERR(inode); | ||
127 | |||
128 | inode->i_op = &ext2_file_inode_operations; | ||
129 | if (ext2_use_xip(inode->i_sb)) { | ||
130 | inode->i_mapping->a_ops = &ext2_aops_xip; | ||
131 | inode->i_fop = &ext2_xip_file_operations; | ||
132 | } else if (test_opt(inode->i_sb, NOBH)) { | ||
133 | inode->i_mapping->a_ops = &ext2_nobh_aops; | ||
134 | inode->i_fop = &ext2_file_operations; | ||
135 | } else { | ||
136 | inode->i_mapping->a_ops = &ext2_aops; | ||
137 | inode->i_fop = &ext2_file_operations; | ||
138 | } | ||
139 | mark_inode_dirty(inode); | ||
140 | d_tmpfile(dentry, inode); | ||
141 | unlock_new_inode(inode); | ||
142 | return 0; | ||
143 | } | ||
144 | |||
122 | static int ext2_mknod (struct inode * dir, struct dentry *dentry, umode_t mode, dev_t rdev) | 145 | static int ext2_mknod (struct inode * dir, struct dentry *dentry, umode_t mode, dev_t rdev) |
123 | { | 146 | { |
124 | struct inode * inode; | 147 | struct inode * inode; |
@@ -398,6 +421,7 @@ const struct inode_operations ext2_dir_inode_operations = { | |||
398 | #endif | 421 | #endif |
399 | .setattr = ext2_setattr, | 422 | .setattr = ext2_setattr, |
400 | .get_acl = ext2_get_acl, | 423 | .get_acl = ext2_get_acl, |
424 | .tmpfile = ext2_tmpfile, | ||
401 | }; | 425 | }; |
402 | 426 | ||
403 | const struct inode_operations ext2_special_inode_operations = { | 427 | const struct inode_operations ext2_special_inode_operations = { |
diff --git a/fs/ext3/dir.c b/fs/ext3/dir.c index 87eccbbca255..f522425aaa24 100644 --- a/fs/ext3/dir.c +++ b/fs/ext3/dir.c | |||
@@ -28,8 +28,7 @@ static unsigned char ext3_filetype_table[] = { | |||
28 | DT_UNKNOWN, DT_REG, DT_DIR, DT_CHR, DT_BLK, DT_FIFO, DT_SOCK, DT_LNK | 28 | DT_UNKNOWN, DT_REG, DT_DIR, DT_CHR, DT_BLK, DT_FIFO, DT_SOCK, DT_LNK |
29 | }; | 29 | }; |
30 | 30 | ||
31 | static int ext3_dx_readdir(struct file * filp, | 31 | static int ext3_dx_readdir(struct file *, struct dir_context *); |
32 | void * dirent, filldir_t filldir); | ||
33 | 32 | ||
34 | static unsigned char get_dtype(struct super_block *sb, int filetype) | 33 | static unsigned char get_dtype(struct super_block *sb, int filetype) |
35 | { | 34 | { |
@@ -91,36 +90,30 @@ int ext3_check_dir_entry (const char * function, struct inode * dir, | |||
91 | return error_msg == NULL ? 1 : 0; | 90 | return error_msg == NULL ? 1 : 0; |
92 | } | 91 | } |
93 | 92 | ||
94 | static int ext3_readdir(struct file * filp, | 93 | static int ext3_readdir(struct file *file, struct dir_context *ctx) |
95 | void * dirent, filldir_t filldir) | ||
96 | { | 94 | { |
97 | int error = 0; | ||
98 | unsigned long offset; | 95 | unsigned long offset; |
99 | int i, stored; | 96 | int i; |
100 | struct ext3_dir_entry_2 *de; | 97 | struct ext3_dir_entry_2 *de; |
101 | int err; | 98 | int err; |
102 | struct inode *inode = file_inode(filp); | 99 | struct inode *inode = file_inode(file); |
103 | struct super_block *sb = inode->i_sb; | 100 | struct super_block *sb = inode->i_sb; |
104 | int ret = 0; | ||
105 | int dir_has_error = 0; | 101 | int dir_has_error = 0; |
106 | 102 | ||
107 | if (is_dx_dir(inode)) { | 103 | if (is_dx_dir(inode)) { |
108 | err = ext3_dx_readdir(filp, dirent, filldir); | 104 | err = ext3_dx_readdir(file, ctx); |
109 | if (err != ERR_BAD_DX_DIR) { | 105 | if (err != ERR_BAD_DX_DIR) |
110 | ret = err; | 106 | return err; |
111 | goto out; | ||
112 | } | ||
113 | /* | 107 | /* |
114 | * We don't set the inode dirty flag since it's not | 108 | * We don't set the inode dirty flag since it's not |
115 | * critical that it get flushed back to the disk. | 109 | * critical that it get flushed back to the disk. |
116 | */ | 110 | */ |
117 | EXT3_I(file_inode(filp))->i_flags &= ~EXT3_INDEX_FL; | 111 | EXT3_I(inode)->i_flags &= ~EXT3_INDEX_FL; |
118 | } | 112 | } |
119 | stored = 0; | 113 | offset = ctx->pos & (sb->s_blocksize - 1); |
120 | offset = filp->f_pos & (sb->s_blocksize - 1); | ||
121 | 114 | ||
122 | while (!error && !stored && filp->f_pos < inode->i_size) { | 115 | while (ctx->pos < inode->i_size) { |
123 | unsigned long blk = filp->f_pos >> EXT3_BLOCK_SIZE_BITS(sb); | 116 | unsigned long blk = ctx->pos >> EXT3_BLOCK_SIZE_BITS(sb); |
124 | struct buffer_head map_bh; | 117 | struct buffer_head map_bh; |
125 | struct buffer_head *bh = NULL; | 118 | struct buffer_head *bh = NULL; |
126 | 119 | ||
@@ -129,12 +122,12 @@ static int ext3_readdir(struct file * filp, | |||
129 | if (err > 0) { | 122 | if (err > 0) { |
130 | pgoff_t index = map_bh.b_blocknr >> | 123 | pgoff_t index = map_bh.b_blocknr >> |
131 | (PAGE_CACHE_SHIFT - inode->i_blkbits); | 124 | (PAGE_CACHE_SHIFT - inode->i_blkbits); |
132 | if (!ra_has_index(&filp->f_ra, index)) | 125 | if (!ra_has_index(&file->f_ra, index)) |
133 | page_cache_sync_readahead( | 126 | page_cache_sync_readahead( |
134 | sb->s_bdev->bd_inode->i_mapping, | 127 | sb->s_bdev->bd_inode->i_mapping, |
135 | &filp->f_ra, filp, | 128 | &file->f_ra, file, |
136 | index, 1); | 129 | index, 1); |
137 | filp->f_ra.prev_pos = (loff_t)index << PAGE_CACHE_SHIFT; | 130 | file->f_ra.prev_pos = (loff_t)index << PAGE_CACHE_SHIFT; |
138 | bh = ext3_bread(NULL, inode, blk, 0, &err); | 131 | bh = ext3_bread(NULL, inode, blk, 0, &err); |
139 | } | 132 | } |
140 | 133 | ||
@@ -146,22 +139,21 @@ static int ext3_readdir(struct file * filp, | |||
146 | if (!dir_has_error) { | 139 | if (!dir_has_error) { |
147 | ext3_error(sb, __func__, "directory #%lu " | 140 | ext3_error(sb, __func__, "directory #%lu " |
148 | "contains a hole at offset %lld", | 141 | "contains a hole at offset %lld", |
149 | inode->i_ino, filp->f_pos); | 142 | inode->i_ino, ctx->pos); |
150 | dir_has_error = 1; | 143 | dir_has_error = 1; |
151 | } | 144 | } |
152 | /* corrupt size? Maybe no more blocks to read */ | 145 | /* corrupt size? Maybe no more blocks to read */ |
153 | if (filp->f_pos > inode->i_blocks << 9) | 146 | if (ctx->pos > inode->i_blocks << 9) |
154 | break; | 147 | break; |
155 | filp->f_pos += sb->s_blocksize - offset; | 148 | ctx->pos += sb->s_blocksize - offset; |
156 | continue; | 149 | continue; |
157 | } | 150 | } |
158 | 151 | ||
159 | revalidate: | ||
160 | /* If the dir block has changed since the last call to | 152 | /* If the dir block has changed since the last call to |
161 | * readdir(2), then we might be pointing to an invalid | 153 | * readdir(2), then we might be pointing to an invalid |
162 | * dirent right now. Scan from the start of the block | 154 | * dirent right now. Scan from the start of the block |
163 | * to make sure. */ | 155 | * to make sure. */ |
164 | if (filp->f_version != inode->i_version) { | 156 | if (offset && file->f_version != inode->i_version) { |
165 | for (i = 0; i < sb->s_blocksize && i < offset; ) { | 157 | for (i = 0; i < sb->s_blocksize && i < offset; ) { |
166 | de = (struct ext3_dir_entry_2 *) | 158 | de = (struct ext3_dir_entry_2 *) |
167 | (bh->b_data + i); | 159 | (bh->b_data + i); |
@@ -177,53 +169,40 @@ revalidate: | |||
177 | i += ext3_rec_len_from_disk(de->rec_len); | 169 | i += ext3_rec_len_from_disk(de->rec_len); |
178 | } | 170 | } |
179 | offset = i; | 171 | offset = i; |
180 | filp->f_pos = (filp->f_pos & ~(sb->s_blocksize - 1)) | 172 | ctx->pos = (ctx->pos & ~(sb->s_blocksize - 1)) |
181 | | offset; | 173 | | offset; |
182 | filp->f_version = inode->i_version; | 174 | file->f_version = inode->i_version; |
183 | } | 175 | } |
184 | 176 | ||
185 | while (!error && filp->f_pos < inode->i_size | 177 | while (ctx->pos < inode->i_size |
186 | && offset < sb->s_blocksize) { | 178 | && offset < sb->s_blocksize) { |
187 | de = (struct ext3_dir_entry_2 *) (bh->b_data + offset); | 179 | de = (struct ext3_dir_entry_2 *) (bh->b_data + offset); |
188 | if (!ext3_check_dir_entry ("ext3_readdir", inode, de, | 180 | if (!ext3_check_dir_entry ("ext3_readdir", inode, de, |
189 | bh, offset)) { | 181 | bh, offset)) { |
190 | /* On error, skip the f_pos to the | 182 | /* On error, skip the to the |
191 | next block. */ | 183 | next block. */ |
192 | filp->f_pos = (filp->f_pos | | 184 | ctx->pos = (ctx->pos | |
193 | (sb->s_blocksize - 1)) + 1; | 185 | (sb->s_blocksize - 1)) + 1; |
194 | brelse (bh); | 186 | break; |
195 | ret = stored; | ||
196 | goto out; | ||
197 | } | 187 | } |
198 | offset += ext3_rec_len_from_disk(de->rec_len); | 188 | offset += ext3_rec_len_from_disk(de->rec_len); |
199 | if (le32_to_cpu(de->inode)) { | 189 | if (le32_to_cpu(de->inode)) { |
200 | /* We might block in the next section | 190 | if (!dir_emit(ctx, de->name, de->name_len, |
201 | * if the data destination is | 191 | le32_to_cpu(de->inode), |
202 | * currently swapped out. So, use a | 192 | get_dtype(sb, de->file_type))) { |
203 | * version stamp to detect whether or | 193 | brelse(bh); |
204 | * not the directory has been modified | 194 | return 0; |
205 | * during the copy operation. | 195 | } |
206 | */ | ||
207 | u64 version = filp->f_version; | ||
208 | |||
209 | error = filldir(dirent, de->name, | ||
210 | de->name_len, | ||
211 | filp->f_pos, | ||
212 | le32_to_cpu(de->inode), | ||
213 | get_dtype(sb, de->file_type)); | ||
214 | if (error) | ||
215 | break; | ||
216 | if (version != filp->f_version) | ||
217 | goto revalidate; | ||
218 | stored ++; | ||
219 | } | 196 | } |
220 | filp->f_pos += ext3_rec_len_from_disk(de->rec_len); | 197 | ctx->pos += ext3_rec_len_from_disk(de->rec_len); |
221 | } | 198 | } |
222 | offset = 0; | 199 | offset = 0; |
223 | brelse (bh); | 200 | brelse (bh); |
201 | if (ctx->pos < inode->i_size) | ||
202 | if (!dir_relax(inode)) | ||
203 | return 0; | ||
224 | } | 204 | } |
225 | out: | 205 | return 0; |
226 | return ret; | ||
227 | } | 206 | } |
228 | 207 | ||
229 | static inline int is_32bit_api(void) | 208 | static inline int is_32bit_api(void) |
@@ -452,62 +431,54 @@ int ext3_htree_store_dirent(struct file *dir_file, __u32 hash, | |||
452 | * for all entres on the fname linked list. (Normally there is only | 431 | * for all entres on the fname linked list. (Normally there is only |
453 | * one entry on the linked list, unless there are 62 bit hash collisions.) | 432 | * one entry on the linked list, unless there are 62 bit hash collisions.) |
454 | */ | 433 | */ |
455 | static int call_filldir(struct file * filp, void * dirent, | 434 | static bool call_filldir(struct file *file, struct dir_context *ctx, |
456 | filldir_t filldir, struct fname *fname) | 435 | struct fname *fname) |
457 | { | 436 | { |
458 | struct dir_private_info *info = filp->private_data; | 437 | struct dir_private_info *info = file->private_data; |
459 | loff_t curr_pos; | 438 | struct inode *inode = file_inode(file); |
460 | struct inode *inode = file_inode(filp); | 439 | struct super_block *sb = inode->i_sb; |
461 | struct super_block * sb; | ||
462 | int error; | ||
463 | |||
464 | sb = inode->i_sb; | ||
465 | 440 | ||
466 | if (!fname) { | 441 | if (!fname) { |
467 | printk("call_filldir: called with null fname?!?\n"); | 442 | printk("call_filldir: called with null fname?!?\n"); |
468 | return 0; | 443 | return true; |
469 | } | 444 | } |
470 | curr_pos = hash2pos(filp, fname->hash, fname->minor_hash); | 445 | ctx->pos = hash2pos(file, fname->hash, fname->minor_hash); |
471 | while (fname) { | 446 | while (fname) { |
472 | error = filldir(dirent, fname->name, | 447 | if (!dir_emit(ctx, fname->name, fname->name_len, |
473 | fname->name_len, curr_pos, | ||
474 | fname->inode, | 448 | fname->inode, |
475 | get_dtype(sb, fname->file_type)); | 449 | get_dtype(sb, fname->file_type))) { |
476 | if (error) { | ||
477 | filp->f_pos = curr_pos; | ||
478 | info->extra_fname = fname; | 450 | info->extra_fname = fname; |
479 | return error; | 451 | return false; |
480 | } | 452 | } |
481 | fname = fname->next; | 453 | fname = fname->next; |
482 | } | 454 | } |
483 | return 0; | 455 | return true; |
484 | } | 456 | } |
485 | 457 | ||
486 | static int ext3_dx_readdir(struct file * filp, | 458 | static int ext3_dx_readdir(struct file *file, struct dir_context *ctx) |
487 | void * dirent, filldir_t filldir) | ||
488 | { | 459 | { |
489 | struct dir_private_info *info = filp->private_data; | 460 | struct dir_private_info *info = file->private_data; |
490 | struct inode *inode = file_inode(filp); | 461 | struct inode *inode = file_inode(file); |
491 | struct fname *fname; | 462 | struct fname *fname; |
492 | int ret; | 463 | int ret; |
493 | 464 | ||
494 | if (!info) { | 465 | if (!info) { |
495 | info = ext3_htree_create_dir_info(filp, filp->f_pos); | 466 | info = ext3_htree_create_dir_info(file, ctx->pos); |
496 | if (!info) | 467 | if (!info) |
497 | return -ENOMEM; | 468 | return -ENOMEM; |
498 | filp->private_data = info; | 469 | file->private_data = info; |
499 | } | 470 | } |
500 | 471 | ||
501 | if (filp->f_pos == ext3_get_htree_eof(filp)) | 472 | if (ctx->pos == ext3_get_htree_eof(file)) |
502 | return 0; /* EOF */ | 473 | return 0; /* EOF */ |
503 | 474 | ||
504 | /* Some one has messed with f_pos; reset the world */ | 475 | /* Some one has messed with f_pos; reset the world */ |
505 | if (info->last_pos != filp->f_pos) { | 476 | if (info->last_pos != ctx->pos) { |
506 | free_rb_tree_fname(&info->root); | 477 | free_rb_tree_fname(&info->root); |
507 | info->curr_node = NULL; | 478 | info->curr_node = NULL; |
508 | info->extra_fname = NULL; | 479 | info->extra_fname = NULL; |
509 | info->curr_hash = pos2maj_hash(filp, filp->f_pos); | 480 | info->curr_hash = pos2maj_hash(file, ctx->pos); |
510 | info->curr_minor_hash = pos2min_hash(filp, filp->f_pos); | 481 | info->curr_minor_hash = pos2min_hash(file, ctx->pos); |
511 | } | 482 | } |
512 | 483 | ||
513 | /* | 484 | /* |
@@ -515,7 +486,7 @@ static int ext3_dx_readdir(struct file * filp, | |||
515 | * chain, return them first. | 486 | * chain, return them first. |
516 | */ | 487 | */ |
517 | if (info->extra_fname) { | 488 | if (info->extra_fname) { |
518 | if (call_filldir(filp, dirent, filldir, info->extra_fname)) | 489 | if (!call_filldir(file, ctx, info->extra_fname)) |
519 | goto finished; | 490 | goto finished; |
520 | info->extra_fname = NULL; | 491 | info->extra_fname = NULL; |
521 | goto next_node; | 492 | goto next_node; |
@@ -529,17 +500,17 @@ static int ext3_dx_readdir(struct file * filp, | |||
529 | * cached entries. | 500 | * cached entries. |
530 | */ | 501 | */ |
531 | if ((!info->curr_node) || | 502 | if ((!info->curr_node) || |
532 | (filp->f_version != inode->i_version)) { | 503 | (file->f_version != inode->i_version)) { |
533 | info->curr_node = NULL; | 504 | info->curr_node = NULL; |
534 | free_rb_tree_fname(&info->root); | 505 | free_rb_tree_fname(&info->root); |
535 | filp->f_version = inode->i_version; | 506 | file->f_version = inode->i_version; |
536 | ret = ext3_htree_fill_tree(filp, info->curr_hash, | 507 | ret = ext3_htree_fill_tree(file, info->curr_hash, |
537 | info->curr_minor_hash, | 508 | info->curr_minor_hash, |
538 | &info->next_hash); | 509 | &info->next_hash); |
539 | if (ret < 0) | 510 | if (ret < 0) |
540 | return ret; | 511 | return ret; |
541 | if (ret == 0) { | 512 | if (ret == 0) { |
542 | filp->f_pos = ext3_get_htree_eof(filp); | 513 | ctx->pos = ext3_get_htree_eof(file); |
543 | break; | 514 | break; |
544 | } | 515 | } |
545 | info->curr_node = rb_first(&info->root); | 516 | info->curr_node = rb_first(&info->root); |
@@ -548,7 +519,7 @@ static int ext3_dx_readdir(struct file * filp, | |||
548 | fname = rb_entry(info->curr_node, struct fname, rb_hash); | 519 | fname = rb_entry(info->curr_node, struct fname, rb_hash); |
549 | info->curr_hash = fname->hash; | 520 | info->curr_hash = fname->hash; |
550 | info->curr_minor_hash = fname->minor_hash; | 521 | info->curr_minor_hash = fname->minor_hash; |
551 | if (call_filldir(filp, dirent, filldir, fname)) | 522 | if (!call_filldir(file, ctx, fname)) |
552 | break; | 523 | break; |
553 | next_node: | 524 | next_node: |
554 | info->curr_node = rb_next(info->curr_node); | 525 | info->curr_node = rb_next(info->curr_node); |
@@ -559,7 +530,7 @@ static int ext3_dx_readdir(struct file * filp, | |||
559 | info->curr_minor_hash = fname->minor_hash; | 530 | info->curr_minor_hash = fname->minor_hash; |
560 | } else { | 531 | } else { |
561 | if (info->next_hash == ~0) { | 532 | if (info->next_hash == ~0) { |
562 | filp->f_pos = ext3_get_htree_eof(filp); | 533 | ctx->pos = ext3_get_htree_eof(file); |
563 | break; | 534 | break; |
564 | } | 535 | } |
565 | info->curr_hash = info->next_hash; | 536 | info->curr_hash = info->next_hash; |
@@ -567,7 +538,7 @@ static int ext3_dx_readdir(struct file * filp, | |||
567 | } | 538 | } |
568 | } | 539 | } |
569 | finished: | 540 | finished: |
570 | info->last_pos = filp->f_pos; | 541 | info->last_pos = ctx->pos; |
571 | return 0; | 542 | return 0; |
572 | } | 543 | } |
573 | 544 | ||
@@ -582,7 +553,7 @@ static int ext3_release_dir (struct inode * inode, struct file * filp) | |||
582 | const struct file_operations ext3_dir_operations = { | 553 | const struct file_operations ext3_dir_operations = { |
583 | .llseek = ext3_dir_llseek, | 554 | .llseek = ext3_dir_llseek, |
584 | .read = generic_read_dir, | 555 | .read = generic_read_dir, |
585 | .readdir = ext3_readdir, | 556 | .iterate = ext3_readdir, |
586 | .unlocked_ioctl = ext3_ioctl, | 557 | .unlocked_ioctl = ext3_ioctl, |
587 | #ifdef CONFIG_COMPAT | 558 | #ifdef CONFIG_COMPAT |
588 | .compat_ioctl = ext3_compat_ioctl, | 559 | .compat_ioctl = ext3_compat_ioctl, |
diff --git a/fs/ext3/fsync.c b/fs/ext3/fsync.c index b31dbd4c46ad..1cb9c7e10c6f 100644 --- a/fs/ext3/fsync.c +++ b/fs/ext3/fsync.c | |||
@@ -48,9 +48,13 @@ int ext3_sync_file(struct file *file, loff_t start, loff_t end, int datasync) | |||
48 | 48 | ||
49 | trace_ext3_sync_file_enter(file, datasync); | 49 | trace_ext3_sync_file_enter(file, datasync); |
50 | 50 | ||
51 | if (inode->i_sb->s_flags & MS_RDONLY) | 51 | if (inode->i_sb->s_flags & MS_RDONLY) { |
52 | /* Make sure that we read updated state */ | ||
53 | smp_rmb(); | ||
54 | if (EXT3_SB(inode->i_sb)->s_mount_state & EXT3_ERROR_FS) | ||
55 | return -EROFS; | ||
52 | return 0; | 56 | return 0; |
53 | 57 | } | |
54 | ret = filemap_write_and_wait_range(inode->i_mapping, start, end); | 58 | ret = filemap_write_and_wait_range(inode->i_mapping, start, end); |
55 | if (ret) | 59 | if (ret) |
56 | goto out; | 60 | goto out; |
diff --git a/fs/ext3/inode.c b/fs/ext3/inode.c index 23c712825640..2bd85486b879 100644 --- a/fs/ext3/inode.c +++ b/fs/ext3/inode.c | |||
@@ -1825,19 +1825,20 @@ ext3_readpages(struct file *file, struct address_space *mapping, | |||
1825 | return mpage_readpages(mapping, pages, nr_pages, ext3_get_block); | 1825 | return mpage_readpages(mapping, pages, nr_pages, ext3_get_block); |
1826 | } | 1826 | } |
1827 | 1827 | ||
1828 | static void ext3_invalidatepage(struct page *page, unsigned long offset) | 1828 | static void ext3_invalidatepage(struct page *page, unsigned int offset, |
1829 | unsigned int length) | ||
1829 | { | 1830 | { |
1830 | journal_t *journal = EXT3_JOURNAL(page->mapping->host); | 1831 | journal_t *journal = EXT3_JOURNAL(page->mapping->host); |
1831 | 1832 | ||
1832 | trace_ext3_invalidatepage(page, offset); | 1833 | trace_ext3_invalidatepage(page, offset, length); |
1833 | 1834 | ||
1834 | /* | 1835 | /* |
1835 | * If it's a full truncate we just forget about the pending dirtying | 1836 | * If it's a full truncate we just forget about the pending dirtying |
1836 | */ | 1837 | */ |
1837 | if (offset == 0) | 1838 | if (offset == 0 && length == PAGE_CACHE_SIZE) |
1838 | ClearPageChecked(page); | 1839 | ClearPageChecked(page); |
1839 | 1840 | ||
1840 | journal_invalidatepage(journal, page, offset); | 1841 | journal_invalidatepage(journal, page, offset, length); |
1841 | } | 1842 | } |
1842 | 1843 | ||
1843 | static int ext3_releasepage(struct page *page, gfp_t wait) | 1844 | static int ext3_releasepage(struct page *page, gfp_t wait) |
@@ -1984,6 +1985,7 @@ static const struct address_space_operations ext3_ordered_aops = { | |||
1984 | .direct_IO = ext3_direct_IO, | 1985 | .direct_IO = ext3_direct_IO, |
1985 | .migratepage = buffer_migrate_page, | 1986 | .migratepage = buffer_migrate_page, |
1986 | .is_partially_uptodate = block_is_partially_uptodate, | 1987 | .is_partially_uptodate = block_is_partially_uptodate, |
1988 | .is_dirty_writeback = buffer_check_dirty_writeback, | ||
1987 | .error_remove_page = generic_error_remove_page, | 1989 | .error_remove_page = generic_error_remove_page, |
1988 | }; | 1990 | }; |
1989 | 1991 | ||
diff --git a/fs/ext3/namei.c b/fs/ext3/namei.c index 692de13e3596..1194b1f0f839 100644 --- a/fs/ext3/namei.c +++ b/fs/ext3/namei.c | |||
@@ -576,11 +576,8 @@ static int htree_dirblock_to_tree(struct file *dir_file, | |||
576 | if (!ext3_check_dir_entry("htree_dirblock_to_tree", dir, de, bh, | 576 | if (!ext3_check_dir_entry("htree_dirblock_to_tree", dir, de, bh, |
577 | (block<<EXT3_BLOCK_SIZE_BITS(dir->i_sb)) | 577 | (block<<EXT3_BLOCK_SIZE_BITS(dir->i_sb)) |
578 | +((char *)de - bh->b_data))) { | 578 | +((char *)de - bh->b_data))) { |
579 | /* On error, skip the f_pos to the next block. */ | 579 | /* silently ignore the rest of the block */ |
580 | dir_file->f_pos = (dir_file->f_pos | | 580 | break; |
581 | (dir->i_sb->s_blocksize - 1)) + 1; | ||
582 | brelse (bh); | ||
583 | return count; | ||
584 | } | 581 | } |
585 | ext3fs_dirhash(de->name, de->name_len, hinfo); | 582 | ext3fs_dirhash(de->name, de->name_len, hinfo); |
586 | if ((hinfo->hash < start_hash) || | 583 | if ((hinfo->hash < start_hash) || |
@@ -1762,6 +1759,45 @@ retry: | |||
1762 | return err; | 1759 | return err; |
1763 | } | 1760 | } |
1764 | 1761 | ||
1762 | static int ext3_tmpfile(struct inode *dir, struct dentry *dentry, umode_t mode) | ||
1763 | { | ||
1764 | handle_t *handle; | ||
1765 | struct inode *inode; | ||
1766 | int err, retries = 0; | ||
1767 | |||
1768 | dquot_initialize(dir); | ||
1769 | |||
1770 | retry: | ||
1771 | handle = ext3_journal_start(dir, EXT3_MAXQUOTAS_INIT_BLOCKS(dir->i_sb) + | ||
1772 | 4 + EXT3_XATTR_TRANS_BLOCKS); | ||
1773 | |||
1774 | if (IS_ERR(handle)) | ||
1775 | return PTR_ERR(handle); | ||
1776 | |||
1777 | inode = ext3_new_inode (handle, dir, NULL, mode); | ||
1778 | err = PTR_ERR(inode); | ||
1779 | if (!IS_ERR(inode)) { | ||
1780 | inode->i_op = &ext3_file_inode_operations; | ||
1781 | inode->i_fop = &ext3_file_operations; | ||
1782 | ext3_set_aops(inode); | ||
1783 | d_tmpfile(dentry, inode); | ||
1784 | err = ext3_orphan_add(handle, inode); | ||
1785 | if (err) | ||
1786 | goto err_drop_inode; | ||
1787 | mark_inode_dirty(inode); | ||
1788 | unlock_new_inode(inode); | ||
1789 | } | ||
1790 | ext3_journal_stop(handle); | ||
1791 | if (err == -ENOSPC && ext3_should_retry_alloc(dir->i_sb, &retries)) | ||
1792 | goto retry; | ||
1793 | return err; | ||
1794 | err_drop_inode: | ||
1795 | ext3_journal_stop(handle); | ||
1796 | unlock_new_inode(inode); | ||
1797 | iput(inode); | ||
1798 | return err; | ||
1799 | } | ||
1800 | |||
1765 | static int ext3_mkdir(struct inode * dir, struct dentry * dentry, umode_t mode) | 1801 | static int ext3_mkdir(struct inode * dir, struct dentry * dentry, umode_t mode) |
1766 | { | 1802 | { |
1767 | handle_t *handle; | 1803 | handle_t *handle; |
@@ -2303,7 +2339,7 @@ static int ext3_link (struct dentry * old_dentry, | |||
2303 | 2339 | ||
2304 | retry: | 2340 | retry: |
2305 | handle = ext3_journal_start(dir, EXT3_DATA_TRANS_BLOCKS(dir->i_sb) + | 2341 | handle = ext3_journal_start(dir, EXT3_DATA_TRANS_BLOCKS(dir->i_sb) + |
2306 | EXT3_INDEX_EXTRA_TRANS_BLOCKS); | 2342 | EXT3_INDEX_EXTRA_TRANS_BLOCKS + 1); |
2307 | if (IS_ERR(handle)) | 2343 | if (IS_ERR(handle)) |
2308 | return PTR_ERR(handle); | 2344 | return PTR_ERR(handle); |
2309 | 2345 | ||
@@ -2317,6 +2353,11 @@ retry: | |||
2317 | err = ext3_add_entry(handle, dentry, inode); | 2353 | err = ext3_add_entry(handle, dentry, inode); |
2318 | if (!err) { | 2354 | if (!err) { |
2319 | ext3_mark_inode_dirty(handle, inode); | 2355 | ext3_mark_inode_dirty(handle, inode); |
2356 | /* this can happen only for tmpfile being | ||
2357 | * linked the first time | ||
2358 | */ | ||
2359 | if (inode->i_nlink == 1) | ||
2360 | ext3_orphan_del(handle, inode); | ||
2320 | d_instantiate(dentry, inode); | 2361 | d_instantiate(dentry, inode); |
2321 | } else { | 2362 | } else { |
2322 | drop_nlink(inode); | 2363 | drop_nlink(inode); |
@@ -2519,6 +2560,7 @@ const struct inode_operations ext3_dir_inode_operations = { | |||
2519 | .mkdir = ext3_mkdir, | 2560 | .mkdir = ext3_mkdir, |
2520 | .rmdir = ext3_rmdir, | 2561 | .rmdir = ext3_rmdir, |
2521 | .mknod = ext3_mknod, | 2562 | .mknod = ext3_mknod, |
2563 | .tmpfile = ext3_tmpfile, | ||
2522 | .rename = ext3_rename, | 2564 | .rename = ext3_rename, |
2523 | .setattr = ext3_setattr, | 2565 | .setattr = ext3_setattr, |
2524 | #ifdef CONFIG_EXT3_FS_XATTR | 2566 | #ifdef CONFIG_EXT3_FS_XATTR |
diff --git a/fs/ext3/super.c b/fs/ext3/super.c index 6356665a74bb..c47f14750722 100644 --- a/fs/ext3/super.c +++ b/fs/ext3/super.c | |||
@@ -174,6 +174,11 @@ static void ext3_handle_error(struct super_block *sb) | |||
174 | if (test_opt (sb, ERRORS_RO)) { | 174 | if (test_opt (sb, ERRORS_RO)) { |
175 | ext3_msg(sb, KERN_CRIT, | 175 | ext3_msg(sb, KERN_CRIT, |
176 | "error: remounting filesystem read-only"); | 176 | "error: remounting filesystem read-only"); |
177 | /* | ||
178 | * Make sure updated value of ->s_mount_state will be visible | ||
179 | * before ->s_flags update. | ||
180 | */ | ||
181 | smp_wmb(); | ||
177 | sb->s_flags |= MS_RDONLY; | 182 | sb->s_flags |= MS_RDONLY; |
178 | } | 183 | } |
179 | ext3_commit_super(sb, es, 1); | 184 | ext3_commit_super(sb, es, 1); |
@@ -291,8 +296,14 @@ void ext3_abort(struct super_block *sb, const char *function, | |||
291 | ext3_msg(sb, KERN_CRIT, | 296 | ext3_msg(sb, KERN_CRIT, |
292 | "error: remounting filesystem read-only"); | 297 | "error: remounting filesystem read-only"); |
293 | EXT3_SB(sb)->s_mount_state |= EXT3_ERROR_FS; | 298 | EXT3_SB(sb)->s_mount_state |= EXT3_ERROR_FS; |
294 | sb->s_flags |= MS_RDONLY; | ||
295 | set_opt(EXT3_SB(sb)->s_mount_opt, ABORT); | 299 | set_opt(EXT3_SB(sb)->s_mount_opt, ABORT); |
300 | /* | ||
301 | * Make sure updated value of ->s_mount_state will be visible | ||
302 | * before ->s_flags update. | ||
303 | */ | ||
304 | smp_wmb(); | ||
305 | sb->s_flags |= MS_RDONLY; | ||
306 | |||
296 | if (EXT3_SB(sb)->s_journal) | 307 | if (EXT3_SB(sb)->s_journal) |
297 | journal_abort(EXT3_SB(sb)->s_journal, -EIO); | 308 | journal_abort(EXT3_SB(sb)->s_journal, -EIO); |
298 | } | 309 | } |
diff --git a/fs/ext4/balloc.c b/fs/ext4/balloc.c index d0f13eada0ed..ddd715e42a5c 100644 --- a/fs/ext4/balloc.c +++ b/fs/ext4/balloc.c | |||
@@ -38,8 +38,8 @@ ext4_group_t ext4_get_group_number(struct super_block *sb, | |||
38 | ext4_group_t group; | 38 | ext4_group_t group; |
39 | 39 | ||
40 | if (test_opt2(sb, STD_GROUP_SIZE)) | 40 | if (test_opt2(sb, STD_GROUP_SIZE)) |
41 | group = (le32_to_cpu(EXT4_SB(sb)->s_es->s_first_data_block) + | 41 | group = (block - |
42 | block) >> | 42 | le32_to_cpu(EXT4_SB(sb)->s_es->s_first_data_block)) >> |
43 | (EXT4_BLOCK_SIZE_BITS(sb) + EXT4_CLUSTER_BITS(sb) + 3); | 43 | (EXT4_BLOCK_SIZE_BITS(sb) + EXT4_CLUSTER_BITS(sb) + 3); |
44 | else | 44 | else |
45 | ext4_get_group_no_and_offset(sb, block, &group, NULL); | 45 | ext4_get_group_no_and_offset(sb, block, &group, NULL); |
@@ -682,11 +682,15 @@ ext4_fsblk_t ext4_count_free_clusters(struct super_block *sb) | |||
682 | 682 | ||
683 | static inline int test_root(ext4_group_t a, int b) | 683 | static inline int test_root(ext4_group_t a, int b) |
684 | { | 684 | { |
685 | int num = b; | 685 | while (1) { |
686 | 686 | if (a < b) | |
687 | while (a > num) | 687 | return 0; |
688 | num *= b; | 688 | if (a == b) |
689 | return num == a; | 689 | return 1; |
690 | if ((a % b) != 0) | ||
691 | return 0; | ||
692 | a = a / b; | ||
693 | } | ||
690 | } | 694 | } |
691 | 695 | ||
692 | static int ext4_group_sparse(ext4_group_t group) | 696 | static int ext4_group_sparse(ext4_group_t group) |
diff --git a/fs/ext4/dir.c b/fs/ext4/dir.c index f8d56e4254e0..3c7d288ae94c 100644 --- a/fs/ext4/dir.c +++ b/fs/ext4/dir.c | |||
@@ -29,8 +29,7 @@ | |||
29 | #include "ext4.h" | 29 | #include "ext4.h" |
30 | #include "xattr.h" | 30 | #include "xattr.h" |
31 | 31 | ||
32 | static int ext4_dx_readdir(struct file *filp, | 32 | static int ext4_dx_readdir(struct file *, struct dir_context *); |
33 | void *dirent, filldir_t filldir); | ||
34 | 33 | ||
35 | /** | 34 | /** |
36 | * Check if the given dir-inode refers to an htree-indexed directory | 35 | * Check if the given dir-inode refers to an htree-indexed directory |
@@ -103,60 +102,56 @@ int __ext4_check_dir_entry(const char *function, unsigned int line, | |||
103 | return 1; | 102 | return 1; |
104 | } | 103 | } |
105 | 104 | ||
106 | static int ext4_readdir(struct file *filp, | 105 | static int ext4_readdir(struct file *file, struct dir_context *ctx) |
107 | void *dirent, filldir_t filldir) | ||
108 | { | 106 | { |
109 | int error = 0; | ||
110 | unsigned int offset; | 107 | unsigned int offset; |
111 | int i, stored; | 108 | int i, stored; |
112 | struct ext4_dir_entry_2 *de; | 109 | struct ext4_dir_entry_2 *de; |
113 | int err; | 110 | int err; |
114 | struct inode *inode = file_inode(filp); | 111 | struct inode *inode = file_inode(file); |
115 | struct super_block *sb = inode->i_sb; | 112 | struct super_block *sb = inode->i_sb; |
116 | int ret = 0; | ||
117 | int dir_has_error = 0; | 113 | int dir_has_error = 0; |
118 | 114 | ||
119 | if (is_dx_dir(inode)) { | 115 | if (is_dx_dir(inode)) { |
120 | err = ext4_dx_readdir(filp, dirent, filldir); | 116 | err = ext4_dx_readdir(file, ctx); |
121 | if (err != ERR_BAD_DX_DIR) { | 117 | if (err != ERR_BAD_DX_DIR) { |
122 | ret = err; | 118 | return err; |
123 | goto out; | ||
124 | } | 119 | } |
125 | /* | 120 | /* |
126 | * We don't set the inode dirty flag since it's not | 121 | * We don't set the inode dirty flag since it's not |
127 | * critical that it get flushed back to the disk. | 122 | * critical that it get flushed back to the disk. |
128 | */ | 123 | */ |
129 | ext4_clear_inode_flag(file_inode(filp), | 124 | ext4_clear_inode_flag(file_inode(file), |
130 | EXT4_INODE_INDEX); | 125 | EXT4_INODE_INDEX); |
131 | } | 126 | } |
132 | 127 | ||
133 | if (ext4_has_inline_data(inode)) { | 128 | if (ext4_has_inline_data(inode)) { |
134 | int has_inline_data = 1; | 129 | int has_inline_data = 1; |
135 | ret = ext4_read_inline_dir(filp, dirent, filldir, | 130 | int ret = ext4_read_inline_dir(file, ctx, |
136 | &has_inline_data); | 131 | &has_inline_data); |
137 | if (has_inline_data) | 132 | if (has_inline_data) |
138 | return ret; | 133 | return ret; |
139 | } | 134 | } |
140 | 135 | ||
141 | stored = 0; | 136 | stored = 0; |
142 | offset = filp->f_pos & (sb->s_blocksize - 1); | 137 | offset = ctx->pos & (sb->s_blocksize - 1); |
143 | 138 | ||
144 | while (!error && !stored && filp->f_pos < inode->i_size) { | 139 | while (ctx->pos < inode->i_size) { |
145 | struct ext4_map_blocks map; | 140 | struct ext4_map_blocks map; |
146 | struct buffer_head *bh = NULL; | 141 | struct buffer_head *bh = NULL; |
147 | 142 | ||
148 | map.m_lblk = filp->f_pos >> EXT4_BLOCK_SIZE_BITS(sb); | 143 | map.m_lblk = ctx->pos >> EXT4_BLOCK_SIZE_BITS(sb); |
149 | map.m_len = 1; | 144 | map.m_len = 1; |
150 | err = ext4_map_blocks(NULL, inode, &map, 0); | 145 | err = ext4_map_blocks(NULL, inode, &map, 0); |
151 | if (err > 0) { | 146 | if (err > 0) { |
152 | pgoff_t index = map.m_pblk >> | 147 | pgoff_t index = map.m_pblk >> |
153 | (PAGE_CACHE_SHIFT - inode->i_blkbits); | 148 | (PAGE_CACHE_SHIFT - inode->i_blkbits); |
154 | if (!ra_has_index(&filp->f_ra, index)) | 149 | if (!ra_has_index(&file->f_ra, index)) |
155 | page_cache_sync_readahead( | 150 | page_cache_sync_readahead( |
156 | sb->s_bdev->bd_inode->i_mapping, | 151 | sb->s_bdev->bd_inode->i_mapping, |
157 | &filp->f_ra, filp, | 152 | &file->f_ra, file, |
158 | index, 1); | 153 | index, 1); |
159 | filp->f_ra.prev_pos = (loff_t)index << PAGE_CACHE_SHIFT; | 154 | file->f_ra.prev_pos = (loff_t)index << PAGE_CACHE_SHIFT; |
160 | bh = ext4_bread(NULL, inode, map.m_lblk, 0, &err); | 155 | bh = ext4_bread(NULL, inode, map.m_lblk, 0, &err); |
161 | } | 156 | } |
162 | 157 | ||
@@ -166,16 +161,16 @@ static int ext4_readdir(struct file *filp, | |||
166 | */ | 161 | */ |
167 | if (!bh) { | 162 | if (!bh) { |
168 | if (!dir_has_error) { | 163 | if (!dir_has_error) { |
169 | EXT4_ERROR_FILE(filp, 0, | 164 | EXT4_ERROR_FILE(file, 0, |
170 | "directory contains a " | 165 | "directory contains a " |
171 | "hole at offset %llu", | 166 | "hole at offset %llu", |
172 | (unsigned long long) filp->f_pos); | 167 | (unsigned long long) ctx->pos); |
173 | dir_has_error = 1; | 168 | dir_has_error = 1; |
174 | } | 169 | } |
175 | /* corrupt size? Maybe no more blocks to read */ | 170 | /* corrupt size? Maybe no more blocks to read */ |
176 | if (filp->f_pos > inode->i_blocks << 9) | 171 | if (ctx->pos > inode->i_blocks << 9) |
177 | break; | 172 | break; |
178 | filp->f_pos += sb->s_blocksize - offset; | 173 | ctx->pos += sb->s_blocksize - offset; |
179 | continue; | 174 | continue; |
180 | } | 175 | } |
181 | 176 | ||
@@ -183,21 +178,20 @@ static int ext4_readdir(struct file *filp, | |||
183 | if (!buffer_verified(bh) && | 178 | if (!buffer_verified(bh) && |
184 | !ext4_dirent_csum_verify(inode, | 179 | !ext4_dirent_csum_verify(inode, |
185 | (struct ext4_dir_entry *)bh->b_data)) { | 180 | (struct ext4_dir_entry *)bh->b_data)) { |
186 | EXT4_ERROR_FILE(filp, 0, "directory fails checksum " | 181 | EXT4_ERROR_FILE(file, 0, "directory fails checksum " |
187 | "at offset %llu", | 182 | "at offset %llu", |
188 | (unsigned long long)filp->f_pos); | 183 | (unsigned long long)ctx->pos); |
189 | filp->f_pos += sb->s_blocksize - offset; | 184 | ctx->pos += sb->s_blocksize - offset; |
190 | brelse(bh); | 185 | brelse(bh); |
191 | continue; | 186 | continue; |
192 | } | 187 | } |
193 | set_buffer_verified(bh); | 188 | set_buffer_verified(bh); |
194 | 189 | ||
195 | revalidate: | ||
196 | /* If the dir block has changed since the last call to | 190 | /* If the dir block has changed since the last call to |
197 | * readdir(2), then we might be pointing to an invalid | 191 | * readdir(2), then we might be pointing to an invalid |
198 | * dirent right now. Scan from the start of the block | 192 | * dirent right now. Scan from the start of the block |
199 | * to make sure. */ | 193 | * to make sure. */ |
200 | if (filp->f_version != inode->i_version) { | 194 | if (file->f_version != inode->i_version) { |
201 | for (i = 0; i < sb->s_blocksize && i < offset; ) { | 195 | for (i = 0; i < sb->s_blocksize && i < offset; ) { |
202 | de = (struct ext4_dir_entry_2 *) | 196 | de = (struct ext4_dir_entry_2 *) |
203 | (bh->b_data + i); | 197 | (bh->b_data + i); |
@@ -214,57 +208,46 @@ revalidate: | |||
214 | sb->s_blocksize); | 208 | sb->s_blocksize); |
215 | } | 209 | } |
216 | offset = i; | 210 | offset = i; |
217 | filp->f_pos = (filp->f_pos & ~(sb->s_blocksize - 1)) | 211 | ctx->pos = (ctx->pos & ~(sb->s_blocksize - 1)) |
218 | | offset; | 212 | | offset; |
219 | filp->f_version = inode->i_version; | 213 | file->f_version = inode->i_version; |
220 | } | 214 | } |
221 | 215 | ||
222 | while (!error && filp->f_pos < inode->i_size | 216 | while (ctx->pos < inode->i_size |
223 | && offset < sb->s_blocksize) { | 217 | && offset < sb->s_blocksize) { |
224 | de = (struct ext4_dir_entry_2 *) (bh->b_data + offset); | 218 | de = (struct ext4_dir_entry_2 *) (bh->b_data + offset); |
225 | if (ext4_check_dir_entry(inode, filp, de, bh, | 219 | if (ext4_check_dir_entry(inode, file, de, bh, |
226 | bh->b_data, bh->b_size, | 220 | bh->b_data, bh->b_size, |
227 | offset)) { | 221 | offset)) { |
228 | /* | 222 | /* |
229 | * On error, skip the f_pos to the next block | 223 | * On error, skip to the next block |
230 | */ | 224 | */ |
231 | filp->f_pos = (filp->f_pos | | 225 | ctx->pos = (ctx->pos | |
232 | (sb->s_blocksize - 1)) + 1; | 226 | (sb->s_blocksize - 1)) + 1; |
233 | brelse(bh); | 227 | break; |
234 | ret = stored; | ||
235 | goto out; | ||
236 | } | 228 | } |
237 | offset += ext4_rec_len_from_disk(de->rec_len, | 229 | offset += ext4_rec_len_from_disk(de->rec_len, |
238 | sb->s_blocksize); | 230 | sb->s_blocksize); |
239 | if (le32_to_cpu(de->inode)) { | 231 | if (le32_to_cpu(de->inode)) { |
240 | /* We might block in the next section | 232 | if (!dir_emit(ctx, de->name, |
241 | * if the data destination is | ||
242 | * currently swapped out. So, use a | ||
243 | * version stamp to detect whether or | ||
244 | * not the directory has been modified | ||
245 | * during the copy operation. | ||
246 | */ | ||
247 | u64 version = filp->f_version; | ||
248 | |||
249 | error = filldir(dirent, de->name, | ||
250 | de->name_len, | 233 | de->name_len, |
251 | filp->f_pos, | ||
252 | le32_to_cpu(de->inode), | 234 | le32_to_cpu(de->inode), |
253 | get_dtype(sb, de->file_type)); | 235 | get_dtype(sb, de->file_type))) { |
254 | if (error) | 236 | brelse(bh); |
255 | break; | 237 | return 0; |
256 | if (version != filp->f_version) | 238 | } |
257 | goto revalidate; | ||
258 | stored++; | ||
259 | } | 239 | } |
260 | filp->f_pos += ext4_rec_len_from_disk(de->rec_len, | 240 | ctx->pos += ext4_rec_len_from_disk(de->rec_len, |
261 | sb->s_blocksize); | 241 | sb->s_blocksize); |
262 | } | 242 | } |
263 | offset = 0; | 243 | offset = 0; |
264 | brelse(bh); | 244 | brelse(bh); |
245 | if (ctx->pos < inode->i_size) { | ||
246 | if (!dir_relax(inode)) | ||
247 | return 0; | ||
248 | } | ||
265 | } | 249 | } |
266 | out: | 250 | return 0; |
267 | return ret; | ||
268 | } | 251 | } |
269 | 252 | ||
270 | static inline int is_32bit_api(void) | 253 | static inline int is_32bit_api(void) |
@@ -492,16 +475,12 @@ int ext4_htree_store_dirent(struct file *dir_file, __u32 hash, | |||
492 | * for all entres on the fname linked list. (Normally there is only | 475 | * for all entres on the fname linked list. (Normally there is only |
493 | * one entry on the linked list, unless there are 62 bit hash collisions.) | 476 | * one entry on the linked list, unless there are 62 bit hash collisions.) |
494 | */ | 477 | */ |
495 | static int call_filldir(struct file *filp, void *dirent, | 478 | static int call_filldir(struct file *file, struct dir_context *ctx, |
496 | filldir_t filldir, struct fname *fname) | 479 | struct fname *fname) |
497 | { | 480 | { |
498 | struct dir_private_info *info = filp->private_data; | 481 | struct dir_private_info *info = file->private_data; |
499 | loff_t curr_pos; | 482 | struct inode *inode = file_inode(file); |
500 | struct inode *inode = file_inode(filp); | 483 | struct super_block *sb = inode->i_sb; |
501 | struct super_block *sb; | ||
502 | int error; | ||
503 | |||
504 | sb = inode->i_sb; | ||
505 | 484 | ||
506 | if (!fname) { | 485 | if (!fname) { |
507 | ext4_msg(sb, KERN_ERR, "%s:%d: inode #%lu: comm %s: " | 486 | ext4_msg(sb, KERN_ERR, "%s:%d: inode #%lu: comm %s: " |
@@ -509,47 +488,44 @@ static int call_filldir(struct file *filp, void *dirent, | |||
509 | inode->i_ino, current->comm); | 488 | inode->i_ino, current->comm); |
510 | return 0; | 489 | return 0; |
511 | } | 490 | } |
512 | curr_pos = hash2pos(filp, fname->hash, fname->minor_hash); | 491 | ctx->pos = hash2pos(file, fname->hash, fname->minor_hash); |
513 | while (fname) { | 492 | while (fname) { |
514 | error = filldir(dirent, fname->name, | 493 | if (!dir_emit(ctx, fname->name, |
515 | fname->name_len, curr_pos, | 494 | fname->name_len, |
516 | fname->inode, | 495 | fname->inode, |
517 | get_dtype(sb, fname->file_type)); | 496 | get_dtype(sb, fname->file_type))) { |
518 | if (error) { | ||
519 | filp->f_pos = curr_pos; | ||
520 | info->extra_fname = fname; | 497 | info->extra_fname = fname; |
521 | return error; | 498 | return 1; |
522 | } | 499 | } |
523 | fname = fname->next; | 500 | fname = fname->next; |
524 | } | 501 | } |
525 | return 0; | 502 | return 0; |
526 | } | 503 | } |
527 | 504 | ||
528 | static int ext4_dx_readdir(struct file *filp, | 505 | static int ext4_dx_readdir(struct file *file, struct dir_context *ctx) |
529 | void *dirent, filldir_t filldir) | ||
530 | { | 506 | { |
531 | struct dir_private_info *info = filp->private_data; | 507 | struct dir_private_info *info = file->private_data; |
532 | struct inode *inode = file_inode(filp); | 508 | struct inode *inode = file_inode(file); |
533 | struct fname *fname; | 509 | struct fname *fname; |
534 | int ret; | 510 | int ret; |
535 | 511 | ||
536 | if (!info) { | 512 | if (!info) { |
537 | info = ext4_htree_create_dir_info(filp, filp->f_pos); | 513 | info = ext4_htree_create_dir_info(file, ctx->pos); |
538 | if (!info) | 514 | if (!info) |
539 | return -ENOMEM; | 515 | return -ENOMEM; |
540 | filp->private_data = info; | 516 | file->private_data = info; |
541 | } | 517 | } |
542 | 518 | ||
543 | if (filp->f_pos == ext4_get_htree_eof(filp)) | 519 | if (ctx->pos == ext4_get_htree_eof(file)) |
544 | return 0; /* EOF */ | 520 | return 0; /* EOF */ |
545 | 521 | ||
546 | /* Some one has messed with f_pos; reset the world */ | 522 | /* Some one has messed with f_pos; reset the world */ |
547 | if (info->last_pos != filp->f_pos) { | 523 | if (info->last_pos != ctx->pos) { |
548 | free_rb_tree_fname(&info->root); | 524 | free_rb_tree_fname(&info->root); |
549 | info->curr_node = NULL; | 525 | info->curr_node = NULL; |
550 | info->extra_fname = NULL; | 526 | info->extra_fname = NULL; |
551 | info->curr_hash = pos2maj_hash(filp, filp->f_pos); | 527 | info->curr_hash = pos2maj_hash(file, ctx->pos); |
552 | info->curr_minor_hash = pos2min_hash(filp, filp->f_pos); | 528 | info->curr_minor_hash = pos2min_hash(file, ctx->pos); |
553 | } | 529 | } |
554 | 530 | ||
555 | /* | 531 | /* |
@@ -557,7 +533,7 @@ static int ext4_dx_readdir(struct file *filp, | |||
557 | * chain, return them first. | 533 | * chain, return them first. |
558 | */ | 534 | */ |
559 | if (info->extra_fname) { | 535 | if (info->extra_fname) { |
560 | if (call_filldir(filp, dirent, filldir, info->extra_fname)) | 536 | if (call_filldir(file, ctx, info->extra_fname)) |
561 | goto finished; | 537 | goto finished; |
562 | info->extra_fname = NULL; | 538 | info->extra_fname = NULL; |
563 | goto next_node; | 539 | goto next_node; |
@@ -571,17 +547,17 @@ static int ext4_dx_readdir(struct file *filp, | |||
571 | * cached entries. | 547 | * cached entries. |
572 | */ | 548 | */ |
573 | if ((!info->curr_node) || | 549 | if ((!info->curr_node) || |
574 | (filp->f_version != inode->i_version)) { | 550 | (file->f_version != inode->i_version)) { |
575 | info->curr_node = NULL; | 551 | info->curr_node = NULL; |
576 | free_rb_tree_fname(&info->root); | 552 | free_rb_tree_fname(&info->root); |
577 | filp->f_version = inode->i_version; | 553 | file->f_version = inode->i_version; |
578 | ret = ext4_htree_fill_tree(filp, info->curr_hash, | 554 | ret = ext4_htree_fill_tree(file, info->curr_hash, |
579 | info->curr_minor_hash, | 555 | info->curr_minor_hash, |
580 | &info->next_hash); | 556 | &info->next_hash); |
581 | if (ret < 0) | 557 | if (ret < 0) |
582 | return ret; | 558 | return ret; |
583 | if (ret == 0) { | 559 | if (ret == 0) { |
584 | filp->f_pos = ext4_get_htree_eof(filp); | 560 | ctx->pos = ext4_get_htree_eof(file); |
585 | break; | 561 | break; |
586 | } | 562 | } |
587 | info->curr_node = rb_first(&info->root); | 563 | info->curr_node = rb_first(&info->root); |
@@ -590,7 +566,7 @@ static int ext4_dx_readdir(struct file *filp, | |||
590 | fname = rb_entry(info->curr_node, struct fname, rb_hash); | 566 | fname = rb_entry(info->curr_node, struct fname, rb_hash); |
591 | info->curr_hash = fname->hash; | 567 | info->curr_hash = fname->hash; |
592 | info->curr_minor_hash = fname->minor_hash; | 568 | info->curr_minor_hash = fname->minor_hash; |
593 | if (call_filldir(filp, dirent, filldir, fname)) | 569 | if (call_filldir(file, ctx, fname)) |
594 | break; | 570 | break; |
595 | next_node: | 571 | next_node: |
596 | info->curr_node = rb_next(info->curr_node); | 572 | info->curr_node = rb_next(info->curr_node); |
@@ -601,7 +577,7 @@ static int ext4_dx_readdir(struct file *filp, | |||
601 | info->curr_minor_hash = fname->minor_hash; | 577 | info->curr_minor_hash = fname->minor_hash; |
602 | } else { | 578 | } else { |
603 | if (info->next_hash == ~0) { | 579 | if (info->next_hash == ~0) { |
604 | filp->f_pos = ext4_get_htree_eof(filp); | 580 | ctx->pos = ext4_get_htree_eof(file); |
605 | break; | 581 | break; |
606 | } | 582 | } |
607 | info->curr_hash = info->next_hash; | 583 | info->curr_hash = info->next_hash; |
@@ -609,7 +585,7 @@ static int ext4_dx_readdir(struct file *filp, | |||
609 | } | 585 | } |
610 | } | 586 | } |
611 | finished: | 587 | finished: |
612 | info->last_pos = filp->f_pos; | 588 | info->last_pos = ctx->pos; |
613 | return 0; | 589 | return 0; |
614 | } | 590 | } |
615 | 591 | ||
@@ -624,7 +600,7 @@ static int ext4_release_dir(struct inode *inode, struct file *filp) | |||
624 | const struct file_operations ext4_dir_operations = { | 600 | const struct file_operations ext4_dir_operations = { |
625 | .llseek = ext4_dir_llseek, | 601 | .llseek = ext4_dir_llseek, |
626 | .read = generic_read_dir, | 602 | .read = generic_read_dir, |
627 | .readdir = ext4_readdir, | 603 | .iterate = ext4_readdir, |
628 | .unlocked_ioctl = ext4_ioctl, | 604 | .unlocked_ioctl = ext4_ioctl, |
629 | #ifdef CONFIG_COMPAT | 605 | #ifdef CONFIG_COMPAT |
630 | .compat_ioctl = ext4_compat_ioctl, | 606 | .compat_ioctl = ext4_compat_ioctl, |
diff --git a/fs/ext4/ext4.h b/fs/ext4/ext4.h index 5aae3d12d400..b577e45425b0 100644 --- a/fs/ext4/ext4.h +++ b/fs/ext4/ext4.h | |||
@@ -177,38 +177,28 @@ struct ext4_map_blocks { | |||
177 | }; | 177 | }; |
178 | 178 | ||
179 | /* | 179 | /* |
180 | * For delayed allocation tracking | ||
181 | */ | ||
182 | struct mpage_da_data { | ||
183 | struct inode *inode; | ||
184 | sector_t b_blocknr; /* start block number of extent */ | ||
185 | size_t b_size; /* size of extent */ | ||
186 | unsigned long b_state; /* state of the extent */ | ||
187 | unsigned long first_page, next_page; /* extent of pages */ | ||
188 | struct writeback_control *wbc; | ||
189 | int io_done; | ||
190 | int pages_written; | ||
191 | int retval; | ||
192 | }; | ||
193 | |||
194 | /* | ||
195 | * Flags for ext4_io_end->flags | 180 | * Flags for ext4_io_end->flags |
196 | */ | 181 | */ |
197 | #define EXT4_IO_END_UNWRITTEN 0x0001 | 182 | #define EXT4_IO_END_UNWRITTEN 0x0001 |
198 | #define EXT4_IO_END_ERROR 0x0002 | 183 | #define EXT4_IO_END_DIRECT 0x0002 |
199 | #define EXT4_IO_END_DIRECT 0x0004 | ||
200 | 184 | ||
201 | /* | 185 | /* |
202 | * For converting uninitialized extents on a work queue. | 186 | * For converting uninitialized extents on a work queue. 'handle' is used for |
187 | * buffered writeback. | ||
203 | */ | 188 | */ |
204 | typedef struct ext4_io_end { | 189 | typedef struct ext4_io_end { |
205 | struct list_head list; /* per-file finished IO list */ | 190 | struct list_head list; /* per-file finished IO list */ |
191 | handle_t *handle; /* handle reserved for extent | ||
192 | * conversion */ | ||
206 | struct inode *inode; /* file being written to */ | 193 | struct inode *inode; /* file being written to */ |
194 | struct bio *bio; /* Linked list of completed | ||
195 | * bios covering the extent */ | ||
207 | unsigned int flag; /* unwritten or not */ | 196 | unsigned int flag; /* unwritten or not */ |
208 | loff_t offset; /* offset in the file */ | 197 | loff_t offset; /* offset in the file */ |
209 | ssize_t size; /* size of the extent */ | 198 | ssize_t size; /* size of the extent */ |
210 | struct kiocb *iocb; /* iocb struct for AIO */ | 199 | struct kiocb *iocb; /* iocb struct for AIO */ |
211 | int result; /* error value for AIO */ | 200 | int result; /* error value for AIO */ |
201 | atomic_t count; /* reference counter */ | ||
212 | } ext4_io_end_t; | 202 | } ext4_io_end_t; |
213 | 203 | ||
214 | struct ext4_io_submit { | 204 | struct ext4_io_submit { |
@@ -581,11 +571,6 @@ enum { | |||
581 | #define EXT4_FREE_BLOCKS_NOFREE_LAST_CLUSTER 0x0020 | 571 | #define EXT4_FREE_BLOCKS_NOFREE_LAST_CLUSTER 0x0020 |
582 | 572 | ||
583 | /* | 573 | /* |
584 | * Flags used by ext4_discard_partial_page_buffers | ||
585 | */ | ||
586 | #define EXT4_DISCARD_PARTIAL_PG_ZERO_UNMAPPED 0x0001 | ||
587 | |||
588 | /* | ||
589 | * ioctl commands | 574 | * ioctl commands |
590 | */ | 575 | */ |
591 | #define EXT4_IOC_GETFLAGS FS_IOC_GETFLAGS | 576 | #define EXT4_IOC_GETFLAGS FS_IOC_GETFLAGS |
@@ -879,6 +864,7 @@ struct ext4_inode_info { | |||
879 | rwlock_t i_es_lock; | 864 | rwlock_t i_es_lock; |
880 | struct list_head i_es_lru; | 865 | struct list_head i_es_lru; |
881 | unsigned int i_es_lru_nr; /* protected by i_es_lock */ | 866 | unsigned int i_es_lru_nr; /* protected by i_es_lock */ |
867 | unsigned long i_touch_when; /* jiffies of last accessing */ | ||
882 | 868 | ||
883 | /* ialloc */ | 869 | /* ialloc */ |
884 | ext4_group_t i_last_alloc_group; | 870 | ext4_group_t i_last_alloc_group; |
@@ -903,12 +889,22 @@ struct ext4_inode_info { | |||
903 | qsize_t i_reserved_quota; | 889 | qsize_t i_reserved_quota; |
904 | #endif | 890 | #endif |
905 | 891 | ||
906 | /* completed IOs that might need unwritten extents handling */ | 892 | /* Lock protecting lists below */ |
907 | struct list_head i_completed_io_list; | ||
908 | spinlock_t i_completed_io_lock; | 893 | spinlock_t i_completed_io_lock; |
894 | /* | ||
895 | * Completed IOs that need unwritten extents handling and have | ||
896 | * transaction reserved | ||
897 | */ | ||
898 | struct list_head i_rsv_conversion_list; | ||
899 | /* | ||
900 | * Completed IOs that need unwritten extents handling and don't have | ||
901 | * transaction reserved | ||
902 | */ | ||
903 | struct list_head i_unrsv_conversion_list; | ||
909 | atomic_t i_ioend_count; /* Number of outstanding io_end structs */ | 904 | atomic_t i_ioend_count; /* Number of outstanding io_end structs */ |
910 | atomic_t i_unwritten; /* Nr. of inflight conversions pending */ | 905 | atomic_t i_unwritten; /* Nr. of inflight conversions pending */ |
911 | struct work_struct i_unwritten_work; /* deferred extent conversion */ | 906 | struct work_struct i_rsv_conversion_work; |
907 | struct work_struct i_unrsv_conversion_work; | ||
912 | 908 | ||
913 | spinlock_t i_block_reservation_lock; | 909 | spinlock_t i_block_reservation_lock; |
914 | 910 | ||
@@ -1245,7 +1241,6 @@ struct ext4_sb_info { | |||
1245 | unsigned int s_mb_stats; | 1241 | unsigned int s_mb_stats; |
1246 | unsigned int s_mb_order2_reqs; | 1242 | unsigned int s_mb_order2_reqs; |
1247 | unsigned int s_mb_group_prealloc; | 1243 | unsigned int s_mb_group_prealloc; |
1248 | unsigned int s_max_writeback_mb_bump; | ||
1249 | unsigned int s_max_dir_size_kb; | 1244 | unsigned int s_max_dir_size_kb; |
1250 | /* where last allocation was done - for stream allocation */ | 1245 | /* where last allocation was done - for stream allocation */ |
1251 | unsigned long s_mb_last_group; | 1246 | unsigned long s_mb_last_group; |
@@ -1281,8 +1276,10 @@ struct ext4_sb_info { | |||
1281 | struct flex_groups *s_flex_groups; | 1276 | struct flex_groups *s_flex_groups; |
1282 | ext4_group_t s_flex_groups_allocated; | 1277 | ext4_group_t s_flex_groups_allocated; |
1283 | 1278 | ||
1284 | /* workqueue for dio unwritten */ | 1279 | /* workqueue for unreserved extent convertions (dio) */ |
1285 | struct workqueue_struct *dio_unwritten_wq; | 1280 | struct workqueue_struct *unrsv_conversion_wq; |
1281 | /* workqueue for reserved extent conversions (buffered io) */ | ||
1282 | struct workqueue_struct *rsv_conversion_wq; | ||
1286 | 1283 | ||
1287 | /* timer for periodic error stats printing */ | 1284 | /* timer for periodic error stats printing */ |
1288 | struct timer_list s_err_report; | 1285 | struct timer_list s_err_report; |
@@ -1307,6 +1304,7 @@ struct ext4_sb_info { | |||
1307 | /* Reclaim extents from extent status tree */ | 1304 | /* Reclaim extents from extent status tree */ |
1308 | struct shrinker s_es_shrinker; | 1305 | struct shrinker s_es_shrinker; |
1309 | struct list_head s_es_lru; | 1306 | struct list_head s_es_lru; |
1307 | unsigned long s_es_last_sorted; | ||
1310 | struct percpu_counter s_extent_cache_cnt; | 1308 | struct percpu_counter s_extent_cache_cnt; |
1311 | spinlock_t s_es_lru_lock ____cacheline_aligned_in_smp; | 1309 | spinlock_t s_es_lru_lock ____cacheline_aligned_in_smp; |
1312 | }; | 1310 | }; |
@@ -1342,6 +1340,9 @@ static inline void ext4_set_io_unwritten_flag(struct inode *inode, | |||
1342 | struct ext4_io_end *io_end) | 1340 | struct ext4_io_end *io_end) |
1343 | { | 1341 | { |
1344 | if (!(io_end->flag & EXT4_IO_END_UNWRITTEN)) { | 1342 | if (!(io_end->flag & EXT4_IO_END_UNWRITTEN)) { |
1343 | /* Writeback has to have coversion transaction reserved */ | ||
1344 | WARN_ON(EXT4_SB(inode->i_sb)->s_journal && !io_end->handle && | ||
1345 | !(io_end->flag & EXT4_IO_END_DIRECT)); | ||
1345 | io_end->flag |= EXT4_IO_END_UNWRITTEN; | 1346 | io_end->flag |= EXT4_IO_END_UNWRITTEN; |
1346 | atomic_inc(&EXT4_I(inode)->i_unwritten); | 1347 | atomic_inc(&EXT4_I(inode)->i_unwritten); |
1347 | } | 1348 | } |
@@ -1999,7 +2000,6 @@ static inline unsigned char get_dtype(struct super_block *sb, int filetype) | |||
1999 | 2000 | ||
2000 | /* fsync.c */ | 2001 | /* fsync.c */ |
2001 | extern int ext4_sync_file(struct file *, loff_t, loff_t, int); | 2002 | extern int ext4_sync_file(struct file *, loff_t, loff_t, int); |
2002 | extern int ext4_flush_unwritten_io(struct inode *); | ||
2003 | 2003 | ||
2004 | /* hash.c */ | 2004 | /* hash.c */ |
2005 | extern int ext4fs_dirhash(const char *name, int len, struct | 2005 | extern int ext4fs_dirhash(const char *name, int len, struct |
@@ -2088,7 +2088,7 @@ extern int ext4_change_inode_journal_flag(struct inode *, int); | |||
2088 | extern int ext4_get_inode_loc(struct inode *, struct ext4_iloc *); | 2088 | extern int ext4_get_inode_loc(struct inode *, struct ext4_iloc *); |
2089 | extern int ext4_can_truncate(struct inode *inode); | 2089 | extern int ext4_can_truncate(struct inode *inode); |
2090 | extern void ext4_truncate(struct inode *); | 2090 | extern void ext4_truncate(struct inode *); |
2091 | extern int ext4_punch_hole(struct file *file, loff_t offset, loff_t length); | 2091 | extern int ext4_punch_hole(struct inode *inode, loff_t offset, loff_t length); |
2092 | extern int ext4_truncate_restart_trans(handle_t *, struct inode *, int nblocks); | 2092 | extern int ext4_truncate_restart_trans(handle_t *, struct inode *, int nblocks); |
2093 | extern void ext4_set_inode_flags(struct inode *); | 2093 | extern void ext4_set_inode_flags(struct inode *); |
2094 | extern void ext4_get_inode_flags(struct ext4_inode_info *); | 2094 | extern void ext4_get_inode_flags(struct ext4_inode_info *); |
@@ -2096,9 +2096,12 @@ extern int ext4_alloc_da_blocks(struct inode *inode); | |||
2096 | extern void ext4_set_aops(struct inode *inode); | 2096 | extern void ext4_set_aops(struct inode *inode); |
2097 | extern int ext4_writepage_trans_blocks(struct inode *); | 2097 | extern int ext4_writepage_trans_blocks(struct inode *); |
2098 | extern int ext4_chunk_trans_blocks(struct inode *, int nrblocks); | 2098 | extern int ext4_chunk_trans_blocks(struct inode *, int nrblocks); |
2099 | extern int ext4_discard_partial_page_buffers(handle_t *handle, | 2099 | extern int ext4_block_truncate_page(handle_t *handle, |
2100 | struct address_space *mapping, loff_t from, | 2100 | struct address_space *mapping, loff_t from); |
2101 | loff_t length, int flags); | 2101 | extern int ext4_block_zero_page_range(handle_t *handle, |
2102 | struct address_space *mapping, loff_t from, loff_t length); | ||
2103 | extern int ext4_zero_partial_blocks(handle_t *handle, struct inode *inode, | ||
2104 | loff_t lstart, loff_t lend); | ||
2102 | extern int ext4_page_mkwrite(struct vm_area_struct *vma, struct vm_fault *vmf); | 2105 | extern int ext4_page_mkwrite(struct vm_area_struct *vma, struct vm_fault *vmf); |
2103 | extern qsize_t *ext4_get_reserved_space(struct inode *inode); | 2106 | extern qsize_t *ext4_get_reserved_space(struct inode *inode); |
2104 | extern void ext4_da_update_reserve_space(struct inode *inode, | 2107 | extern void ext4_da_update_reserve_space(struct inode *inode, |
@@ -2111,7 +2114,7 @@ extern ssize_t ext4_ind_direct_IO(int rw, struct kiocb *iocb, | |||
2111 | const struct iovec *iov, loff_t offset, | 2114 | const struct iovec *iov, loff_t offset, |
2112 | unsigned long nr_segs); | 2115 | unsigned long nr_segs); |
2113 | extern int ext4_ind_calc_metadata_amount(struct inode *inode, sector_t lblock); | 2116 | extern int ext4_ind_calc_metadata_amount(struct inode *inode, sector_t lblock); |
2114 | extern int ext4_ind_trans_blocks(struct inode *inode, int nrblocks, int chunk); | 2117 | extern int ext4_ind_trans_blocks(struct inode *inode, int nrblocks); |
2115 | extern void ext4_ind_truncate(handle_t *, struct inode *inode); | 2118 | extern void ext4_ind_truncate(handle_t *, struct inode *inode); |
2116 | extern int ext4_free_hole_blocks(handle_t *handle, struct inode *inode, | 2119 | extern int ext4_free_hole_blocks(handle_t *handle, struct inode *inode, |
2117 | ext4_lblk_t first, ext4_lblk_t stop); | 2120 | ext4_lblk_t first, ext4_lblk_t stop); |
@@ -2166,42 +2169,96 @@ extern int ext4_alloc_flex_bg_array(struct super_block *sb, | |||
2166 | ext4_group_t ngroup); | 2169 | ext4_group_t ngroup); |
2167 | extern const char *ext4_decode_error(struct super_block *sb, int errno, | 2170 | extern const char *ext4_decode_error(struct super_block *sb, int errno, |
2168 | char nbuf[16]); | 2171 | char nbuf[16]); |
2172 | |||
2169 | extern __printf(4, 5) | 2173 | extern __printf(4, 5) |
2170 | void __ext4_error(struct super_block *, const char *, unsigned int, | 2174 | void __ext4_error(struct super_block *, const char *, unsigned int, |
2171 | const char *, ...); | 2175 | const char *, ...); |
2172 | #define ext4_error(sb, message...) __ext4_error(sb, __func__, \ | ||
2173 | __LINE__, ## message) | ||
2174 | extern __printf(5, 6) | 2176 | extern __printf(5, 6) |
2175 | void ext4_error_inode(struct inode *, const char *, unsigned int, ext4_fsblk_t, | 2177 | void __ext4_error_inode(struct inode *, const char *, unsigned int, ext4_fsblk_t, |
2176 | const char *, ...); | 2178 | const char *, ...); |
2177 | extern __printf(5, 6) | 2179 | extern __printf(5, 6) |
2178 | void ext4_error_file(struct file *, const char *, unsigned int, ext4_fsblk_t, | 2180 | void __ext4_error_file(struct file *, const char *, unsigned int, ext4_fsblk_t, |
2179 | const char *, ...); | 2181 | const char *, ...); |
2180 | extern void __ext4_std_error(struct super_block *, const char *, | 2182 | extern void __ext4_std_error(struct super_block *, const char *, |
2181 | unsigned int, int); | 2183 | unsigned int, int); |
2182 | extern __printf(4, 5) | 2184 | extern __printf(4, 5) |
2183 | void __ext4_abort(struct super_block *, const char *, unsigned int, | 2185 | void __ext4_abort(struct super_block *, const char *, unsigned int, |
2184 | const char *, ...); | 2186 | const char *, ...); |
2185 | #define ext4_abort(sb, message...) __ext4_abort(sb, __func__, \ | ||
2186 | __LINE__, ## message) | ||
2187 | extern __printf(4, 5) | 2187 | extern __printf(4, 5) |
2188 | void __ext4_warning(struct super_block *, const char *, unsigned int, | 2188 | void __ext4_warning(struct super_block *, const char *, unsigned int, |
2189 | const char *, ...); | 2189 | const char *, ...); |
2190 | #define ext4_warning(sb, message...) __ext4_warning(sb, __func__, \ | ||
2191 | __LINE__, ## message) | ||
2192 | extern __printf(3, 4) | 2190 | extern __printf(3, 4) |
2193 | void ext4_msg(struct super_block *, const char *, const char *, ...); | 2191 | void __ext4_msg(struct super_block *, const char *, const char *, ...); |
2194 | extern void __dump_mmp_msg(struct super_block *, struct mmp_struct *mmp, | 2192 | extern void __dump_mmp_msg(struct super_block *, struct mmp_struct *mmp, |
2195 | const char *, unsigned int, const char *); | 2193 | const char *, unsigned int, const char *); |
2196 | #define dump_mmp_msg(sb, mmp, msg) __dump_mmp_msg(sb, mmp, __func__, \ | ||
2197 | __LINE__, msg) | ||
2198 | extern __printf(7, 8) | 2194 | extern __printf(7, 8) |
2199 | void __ext4_grp_locked_error(const char *, unsigned int, | 2195 | void __ext4_grp_locked_error(const char *, unsigned int, |
2200 | struct super_block *, ext4_group_t, | 2196 | struct super_block *, ext4_group_t, |
2201 | unsigned long, ext4_fsblk_t, | 2197 | unsigned long, ext4_fsblk_t, |
2202 | const char *, ...); | 2198 | const char *, ...); |
2203 | #define ext4_grp_locked_error(sb, grp, message...) \ | 2199 | |
2204 | __ext4_grp_locked_error(__func__, __LINE__, (sb), (grp), ## message) | 2200 | #ifdef CONFIG_PRINTK |
2201 | |||
2202 | #define ext4_error_inode(inode, func, line, block, fmt, ...) \ | ||
2203 | __ext4_error_inode(inode, func, line, block, fmt, ##__VA_ARGS__) | ||
2204 | #define ext4_error_file(file, func, line, block, fmt, ...) \ | ||
2205 | __ext4_error_file(file, func, line, block, fmt, ##__VA_ARGS__) | ||
2206 | #define ext4_error(sb, fmt, ...) \ | ||
2207 | __ext4_error(sb, __func__, __LINE__, fmt, ##__VA_ARGS__) | ||
2208 | #define ext4_abort(sb, fmt, ...) \ | ||
2209 | __ext4_abort(sb, __func__, __LINE__, fmt, ##__VA_ARGS__) | ||
2210 | #define ext4_warning(sb, fmt, ...) \ | ||
2211 | __ext4_warning(sb, __func__, __LINE__, fmt, ##__VA_ARGS__) | ||
2212 | #define ext4_msg(sb, level, fmt, ...) \ | ||
2213 | __ext4_msg(sb, level, fmt, ##__VA_ARGS__) | ||
2214 | #define dump_mmp_msg(sb, mmp, msg) \ | ||
2215 | __dump_mmp_msg(sb, mmp, __func__, __LINE__, msg) | ||
2216 | #define ext4_grp_locked_error(sb, grp, ino, block, fmt, ...) \ | ||
2217 | __ext4_grp_locked_error(__func__, __LINE__, sb, grp, ino, block, \ | ||
2218 | fmt, ##__VA_ARGS__) | ||
2219 | |||
2220 | #else | ||
2221 | |||
2222 | #define ext4_error_inode(inode, func, line, block, fmt, ...) \ | ||
2223 | do { \ | ||
2224 | no_printk(fmt, ##__VA_ARGS__); \ | ||
2225 | __ext4_error_inode(inode, "", 0, block, " "); \ | ||
2226 | } while (0) | ||
2227 | #define ext4_error_file(file, func, line, block, fmt, ...) \ | ||
2228 | do { \ | ||
2229 | no_printk(fmt, ##__VA_ARGS__); \ | ||
2230 | __ext4_error_file(file, "", 0, block, " "); \ | ||
2231 | } while (0) | ||
2232 | #define ext4_error(sb, fmt, ...) \ | ||
2233 | do { \ | ||
2234 | no_printk(fmt, ##__VA_ARGS__); \ | ||
2235 | __ext4_error(sb, "", 0, " "); \ | ||
2236 | } while (0) | ||
2237 | #define ext4_abort(sb, fmt, ...) \ | ||
2238 | do { \ | ||
2239 | no_printk(fmt, ##__VA_ARGS__); \ | ||
2240 | __ext4_abort(sb, "", 0, " "); \ | ||
2241 | } while (0) | ||
2242 | #define ext4_warning(sb, fmt, ...) \ | ||
2243 | do { \ | ||
2244 | no_printk(fmt, ##__VA_ARGS__); \ | ||
2245 | __ext4_warning(sb, "", 0, " "); \ | ||
2246 | } while (0) | ||
2247 | #define ext4_msg(sb, level, fmt, ...) \ | ||
2248 | do { \ | ||
2249 | no_printk(fmt, ##__VA_ARGS__); \ | ||
2250 | __ext4_msg(sb, "", " "); \ | ||
2251 | } while (0) | ||
2252 | #define dump_mmp_msg(sb, mmp, msg) \ | ||
2253 | __dump_mmp_msg(sb, mmp, "", 0, "") | ||
2254 | #define ext4_grp_locked_error(sb, grp, ino, block, fmt, ...) \ | ||
2255 | do { \ | ||
2256 | no_printk(fmt, ##__VA_ARGS__); \ | ||
2257 | __ext4_grp_locked_error("", 0, sb, grp, ino, block, " "); \ | ||
2258 | } while (0) | ||
2259 | |||
2260 | #endif | ||
2261 | |||
2205 | extern void ext4_update_dynamic_rev(struct super_block *sb); | 2262 | extern void ext4_update_dynamic_rev(struct super_block *sb); |
2206 | extern int ext4_update_compat_feature(handle_t *handle, struct super_block *sb, | 2263 | extern int ext4_update_compat_feature(handle_t *handle, struct super_block *sb, |
2207 | __u32 compat); | 2264 | __u32 compat); |
@@ -2312,6 +2369,7 @@ struct ext4_group_info *ext4_get_group_info(struct super_block *sb, | |||
2312 | { | 2369 | { |
2313 | struct ext4_group_info ***grp_info; | 2370 | struct ext4_group_info ***grp_info; |
2314 | long indexv, indexh; | 2371 | long indexv, indexh; |
2372 | BUG_ON(group >= EXT4_SB(sb)->s_groups_count); | ||
2315 | grp_info = EXT4_SB(sb)->s_group_info; | 2373 | grp_info = EXT4_SB(sb)->s_group_info; |
2316 | indexv = group >> (EXT4_DESC_PER_BLOCK_BITS(sb)); | 2374 | indexv = group >> (EXT4_DESC_PER_BLOCK_BITS(sb)); |
2317 | indexh = group & ((EXT4_DESC_PER_BLOCK(sb)) - 1); | 2375 | indexh = group & ((EXT4_DESC_PER_BLOCK(sb)) - 1); |
@@ -2515,7 +2573,7 @@ extern int ext4_try_create_inline_dir(handle_t *handle, | |||
2515 | struct inode *parent, | 2573 | struct inode *parent, |
2516 | struct inode *inode); | 2574 | struct inode *inode); |
2517 | extern int ext4_read_inline_dir(struct file *filp, | 2575 | extern int ext4_read_inline_dir(struct file *filp, |
2518 | void *dirent, filldir_t filldir, | 2576 | struct dir_context *ctx, |
2519 | int *has_inline_data); | 2577 | int *has_inline_data); |
2520 | extern int htree_inlinedir_to_tree(struct file *dir_file, | 2578 | extern int htree_inlinedir_to_tree(struct file *dir_file, |
2521 | struct inode *dir, ext4_lblk_t block, | 2579 | struct inode *dir, ext4_lblk_t block, |
@@ -2598,8 +2656,7 @@ struct ext4_extent; | |||
2598 | 2656 | ||
2599 | extern int ext4_ext_tree_init(handle_t *handle, struct inode *); | 2657 | extern int ext4_ext_tree_init(handle_t *handle, struct inode *); |
2600 | extern int ext4_ext_writepage_trans_blocks(struct inode *, int); | 2658 | extern int ext4_ext_writepage_trans_blocks(struct inode *, int); |
2601 | extern int ext4_ext_index_trans_blocks(struct inode *inode, int nrblocks, | 2659 | extern int ext4_ext_index_trans_blocks(struct inode *inode, int extents); |
2602 | int chunk); | ||
2603 | extern int ext4_ext_map_blocks(handle_t *handle, struct inode *inode, | 2660 | extern int ext4_ext_map_blocks(handle_t *handle, struct inode *inode, |
2604 | struct ext4_map_blocks *map, int flags); | 2661 | struct ext4_map_blocks *map, int flags); |
2605 | extern void ext4_ext_truncate(handle_t *, struct inode *); | 2662 | extern void ext4_ext_truncate(handle_t *, struct inode *); |
@@ -2609,8 +2666,8 @@ extern void ext4_ext_init(struct super_block *); | |||
2609 | extern void ext4_ext_release(struct super_block *); | 2666 | extern void ext4_ext_release(struct super_block *); |
2610 | extern long ext4_fallocate(struct file *file, int mode, loff_t offset, | 2667 | extern long ext4_fallocate(struct file *file, int mode, loff_t offset, |
2611 | loff_t len); | 2668 | loff_t len); |
2612 | extern int ext4_convert_unwritten_extents(struct inode *inode, loff_t offset, | 2669 | extern int ext4_convert_unwritten_extents(handle_t *handle, struct inode *inode, |
2613 | ssize_t len); | 2670 | loff_t offset, ssize_t len); |
2614 | extern int ext4_map_blocks(handle_t *handle, struct inode *inode, | 2671 | extern int ext4_map_blocks(handle_t *handle, struct inode *inode, |
2615 | struct ext4_map_blocks *map, int flags); | 2672 | struct ext4_map_blocks *map, int flags); |
2616 | extern int ext4_ext_calc_metadata_amount(struct inode *inode, | 2673 | extern int ext4_ext_calc_metadata_amount(struct inode *inode, |
@@ -2650,12 +2707,15 @@ extern int ext4_move_extents(struct file *o_filp, struct file *d_filp, | |||
2650 | 2707 | ||
2651 | /* page-io.c */ | 2708 | /* page-io.c */ |
2652 | extern int __init ext4_init_pageio(void); | 2709 | extern int __init ext4_init_pageio(void); |
2653 | extern void ext4_add_complete_io(ext4_io_end_t *io_end); | ||
2654 | extern void ext4_exit_pageio(void); | 2710 | extern void ext4_exit_pageio(void); |
2655 | extern void ext4_ioend_shutdown(struct inode *); | ||
2656 | extern void ext4_free_io_end(ext4_io_end_t *io); | ||
2657 | extern ext4_io_end_t *ext4_init_io_end(struct inode *inode, gfp_t flags); | 2711 | extern ext4_io_end_t *ext4_init_io_end(struct inode *inode, gfp_t flags); |
2658 | extern void ext4_end_io_work(struct work_struct *work); | 2712 | extern ext4_io_end_t *ext4_get_io_end(ext4_io_end_t *io_end); |
2713 | extern int ext4_put_io_end(ext4_io_end_t *io_end); | ||
2714 | extern void ext4_put_io_end_defer(ext4_io_end_t *io_end); | ||
2715 | extern void ext4_io_submit_init(struct ext4_io_submit *io, | ||
2716 | struct writeback_control *wbc); | ||
2717 | extern void ext4_end_io_rsv_work(struct work_struct *work); | ||
2718 | extern void ext4_end_io_unrsv_work(struct work_struct *work); | ||
2659 | extern void ext4_io_submit(struct ext4_io_submit *io); | 2719 | extern void ext4_io_submit(struct ext4_io_submit *io); |
2660 | extern int ext4_bio_write_page(struct ext4_io_submit *io, | 2720 | extern int ext4_bio_write_page(struct ext4_io_submit *io, |
2661 | struct page *page, | 2721 | struct page *page, |
@@ -2668,20 +2728,17 @@ extern void ext4_mmp_csum_set(struct super_block *sb, struct mmp_struct *mmp); | |||
2668 | extern int ext4_mmp_csum_verify(struct super_block *sb, | 2728 | extern int ext4_mmp_csum_verify(struct super_block *sb, |
2669 | struct mmp_struct *mmp); | 2729 | struct mmp_struct *mmp); |
2670 | 2730 | ||
2671 | /* BH_Uninit flag: blocks are allocated but uninitialized on disk */ | 2731 | /* |
2732 | * Note that these flags will never ever appear in a buffer_head's state flag. | ||
2733 | * See EXT4_MAP_... to see where this is used. | ||
2734 | */ | ||
2672 | enum ext4_state_bits { | 2735 | enum ext4_state_bits { |
2673 | BH_Uninit /* blocks are allocated but uninitialized on disk */ | 2736 | BH_Uninit /* blocks are allocated but uninitialized on disk */ |
2674 | = BH_JBDPrivateStart, | 2737 | = BH_JBDPrivateStart, |
2675 | BH_AllocFromCluster, /* allocated blocks were part of already | 2738 | BH_AllocFromCluster, /* allocated blocks were part of already |
2676 | * allocated cluster. Note that this flag will | 2739 | * allocated cluster. */ |
2677 | * never, ever appear in a buffer_head's state | ||
2678 | * flag. See EXT4_MAP_FROM_CLUSTER to see where | ||
2679 | * this is used. */ | ||
2680 | }; | 2740 | }; |
2681 | 2741 | ||
2682 | BUFFER_FNS(Uninit, uninit) | ||
2683 | TAS_BUFFER_FNS(Uninit, uninit) | ||
2684 | |||
2685 | /* | 2742 | /* |
2686 | * Add new method to test whether block and inode bitmaps are properly | 2743 | * Add new method to test whether block and inode bitmaps are properly |
2687 | * initialized. With uninit_bg reading the block from disk is not enough | 2744 | * initialized. With uninit_bg reading the block from disk is not enough |
diff --git a/fs/ext4/ext4_jbd2.c b/fs/ext4/ext4_jbd2.c index 451eb4045330..72a3600aedbd 100644 --- a/fs/ext4/ext4_jbd2.c +++ b/fs/ext4/ext4_jbd2.c | |||
@@ -38,31 +38,43 @@ static void ext4_put_nojournal(handle_t *handle) | |||
38 | /* | 38 | /* |
39 | * Wrappers for jbd2_journal_start/end. | 39 | * Wrappers for jbd2_journal_start/end. |
40 | */ | 40 | */ |
41 | handle_t *__ext4_journal_start_sb(struct super_block *sb, unsigned int line, | 41 | static int ext4_journal_check_start(struct super_block *sb) |
42 | int type, int nblocks) | ||
43 | { | 42 | { |
44 | journal_t *journal; | 43 | journal_t *journal; |
45 | 44 | ||
46 | might_sleep(); | 45 | might_sleep(); |
47 | |||
48 | trace_ext4_journal_start(sb, nblocks, _RET_IP_); | ||
49 | if (sb->s_flags & MS_RDONLY) | 46 | if (sb->s_flags & MS_RDONLY) |
50 | return ERR_PTR(-EROFS); | 47 | return -EROFS; |
51 | |||
52 | WARN_ON(sb->s_writers.frozen == SB_FREEZE_COMPLETE); | 48 | WARN_ON(sb->s_writers.frozen == SB_FREEZE_COMPLETE); |
53 | journal = EXT4_SB(sb)->s_journal; | 49 | journal = EXT4_SB(sb)->s_journal; |
54 | if (!journal) | ||
55 | return ext4_get_nojournal(); | ||
56 | /* | 50 | /* |
57 | * Special case here: if the journal has aborted behind our | 51 | * Special case here: if the journal has aborted behind our |
58 | * backs (eg. EIO in the commit thread), then we still need to | 52 | * backs (eg. EIO in the commit thread), then we still need to |
59 | * take the FS itself readonly cleanly. | 53 | * take the FS itself readonly cleanly. |
60 | */ | 54 | */ |
61 | if (is_journal_aborted(journal)) { | 55 | if (journal && is_journal_aborted(journal)) { |
62 | ext4_abort(sb, "Detected aborted journal"); | 56 | ext4_abort(sb, "Detected aborted journal"); |
63 | return ERR_PTR(-EROFS); | 57 | return -EROFS; |
64 | } | 58 | } |
65 | return jbd2__journal_start(journal, nblocks, GFP_NOFS, type, line); | 59 | return 0; |
60 | } | ||
61 | |||
62 | handle_t *__ext4_journal_start_sb(struct super_block *sb, unsigned int line, | ||
63 | int type, int blocks, int rsv_blocks) | ||
64 | { | ||
65 | journal_t *journal; | ||
66 | int err; | ||
67 | |||
68 | trace_ext4_journal_start(sb, blocks, rsv_blocks, _RET_IP_); | ||
69 | err = ext4_journal_check_start(sb); | ||
70 | if (err < 0) | ||
71 | return ERR_PTR(err); | ||
72 | |||
73 | journal = EXT4_SB(sb)->s_journal; | ||
74 | if (!journal) | ||
75 | return ext4_get_nojournal(); | ||
76 | return jbd2__journal_start(journal, blocks, rsv_blocks, GFP_NOFS, | ||
77 | type, line); | ||
66 | } | 78 | } |
67 | 79 | ||
68 | int __ext4_journal_stop(const char *where, unsigned int line, handle_t *handle) | 80 | int __ext4_journal_stop(const char *where, unsigned int line, handle_t *handle) |
@@ -86,6 +98,30 @@ int __ext4_journal_stop(const char *where, unsigned int line, handle_t *handle) | |||
86 | return err; | 98 | return err; |
87 | } | 99 | } |
88 | 100 | ||
101 | handle_t *__ext4_journal_start_reserved(handle_t *handle, unsigned int line, | ||
102 | int type) | ||
103 | { | ||
104 | struct super_block *sb; | ||
105 | int err; | ||
106 | |||
107 | if (!ext4_handle_valid(handle)) | ||
108 | return ext4_get_nojournal(); | ||
109 | |||
110 | sb = handle->h_journal->j_private; | ||
111 | trace_ext4_journal_start_reserved(sb, handle->h_buffer_credits, | ||
112 | _RET_IP_); | ||
113 | err = ext4_journal_check_start(sb); | ||
114 | if (err < 0) { | ||
115 | jbd2_journal_free_reserved(handle); | ||
116 | return ERR_PTR(err); | ||
117 | } | ||
118 | |||
119 | err = jbd2_journal_start_reserved(handle, type, line); | ||
120 | if (err < 0) | ||
121 | return ERR_PTR(err); | ||
122 | return handle; | ||
123 | } | ||
124 | |||
89 | void ext4_journal_abort_handle(const char *caller, unsigned int line, | 125 | void ext4_journal_abort_handle(const char *caller, unsigned int line, |
90 | const char *err_fn, struct buffer_head *bh, | 126 | const char *err_fn, struct buffer_head *bh, |
91 | handle_t *handle, int err) | 127 | handle_t *handle, int err) |
diff --git a/fs/ext4/ext4_jbd2.h b/fs/ext4/ext4_jbd2.h index c8c6885406db..2877258d9497 100644 --- a/fs/ext4/ext4_jbd2.h +++ b/fs/ext4/ext4_jbd2.h | |||
@@ -134,7 +134,8 @@ static inline int ext4_jbd2_credits_xattr(struct inode *inode) | |||
134 | #define EXT4_HT_MIGRATE 8 | 134 | #define EXT4_HT_MIGRATE 8 |
135 | #define EXT4_HT_MOVE_EXTENTS 9 | 135 | #define EXT4_HT_MOVE_EXTENTS 9 |
136 | #define EXT4_HT_XATTR 10 | 136 | #define EXT4_HT_XATTR 10 |
137 | #define EXT4_HT_MAX 11 | 137 | #define EXT4_HT_EXT_CONVERT 11 |
138 | #define EXT4_HT_MAX 12 | ||
138 | 139 | ||
139 | /** | 140 | /** |
140 | * struct ext4_journal_cb_entry - Base structure for callback information. | 141 | * struct ext4_journal_cb_entry - Base structure for callback information. |
@@ -265,7 +266,7 @@ int __ext4_handle_dirty_super(const char *where, unsigned int line, | |||
265 | __ext4_handle_dirty_super(__func__, __LINE__, (handle), (sb)) | 266 | __ext4_handle_dirty_super(__func__, __LINE__, (handle), (sb)) |
266 | 267 | ||
267 | handle_t *__ext4_journal_start_sb(struct super_block *sb, unsigned int line, | 268 | handle_t *__ext4_journal_start_sb(struct super_block *sb, unsigned int line, |
268 | int type, int nblocks); | 269 | int type, int blocks, int rsv_blocks); |
269 | int __ext4_journal_stop(const char *where, unsigned int line, handle_t *handle); | 270 | int __ext4_journal_stop(const char *where, unsigned int line, handle_t *handle); |
270 | 271 | ||
271 | #define EXT4_NOJOURNAL_MAX_REF_COUNT ((unsigned long) 4096) | 272 | #define EXT4_NOJOURNAL_MAX_REF_COUNT ((unsigned long) 4096) |
@@ -300,21 +301,37 @@ static inline int ext4_handle_has_enough_credits(handle_t *handle, int needed) | |||
300 | } | 301 | } |
301 | 302 | ||
302 | #define ext4_journal_start_sb(sb, type, nblocks) \ | 303 | #define ext4_journal_start_sb(sb, type, nblocks) \ |
303 | __ext4_journal_start_sb((sb), __LINE__, (type), (nblocks)) | 304 | __ext4_journal_start_sb((sb), __LINE__, (type), (nblocks), 0) |
304 | 305 | ||
305 | #define ext4_journal_start(inode, type, nblocks) \ | 306 | #define ext4_journal_start(inode, type, nblocks) \ |
306 | __ext4_journal_start((inode), __LINE__, (type), (nblocks)) | 307 | __ext4_journal_start((inode), __LINE__, (type), (nblocks), 0) |
308 | |||
309 | #define ext4_journal_start_with_reserve(inode, type, blocks, rsv_blocks) \ | ||
310 | __ext4_journal_start((inode), __LINE__, (type), (blocks), (rsv_blocks)) | ||
307 | 311 | ||
308 | static inline handle_t *__ext4_journal_start(struct inode *inode, | 312 | static inline handle_t *__ext4_journal_start(struct inode *inode, |
309 | unsigned int line, int type, | 313 | unsigned int line, int type, |
310 | int nblocks) | 314 | int blocks, int rsv_blocks) |
311 | { | 315 | { |
312 | return __ext4_journal_start_sb(inode->i_sb, line, type, nblocks); | 316 | return __ext4_journal_start_sb(inode->i_sb, line, type, blocks, |
317 | rsv_blocks); | ||
313 | } | 318 | } |
314 | 319 | ||
315 | #define ext4_journal_stop(handle) \ | 320 | #define ext4_journal_stop(handle) \ |
316 | __ext4_journal_stop(__func__, __LINE__, (handle)) | 321 | __ext4_journal_stop(__func__, __LINE__, (handle)) |
317 | 322 | ||
323 | #define ext4_journal_start_reserved(handle, type) \ | ||
324 | __ext4_journal_start_reserved((handle), __LINE__, (type)) | ||
325 | |||
326 | handle_t *__ext4_journal_start_reserved(handle_t *handle, unsigned int line, | ||
327 | int type); | ||
328 | |||
329 | static inline void ext4_journal_free_reserved(handle_t *handle) | ||
330 | { | ||
331 | if (ext4_handle_valid(handle)) | ||
332 | jbd2_journal_free_reserved(handle); | ||
333 | } | ||
334 | |||
318 | static inline handle_t *ext4_journal_current_handle(void) | 335 | static inline handle_t *ext4_journal_current_handle(void) |
319 | { | 336 | { |
320 | return journal_current_handle(); | 337 | return journal_current_handle(); |
diff --git a/fs/ext4/extents.c b/fs/ext4/extents.c index bc0f1910b9cf..72ba4705d4fa 100644 --- a/fs/ext4/extents.c +++ b/fs/ext4/extents.c | |||
@@ -2125,7 +2125,8 @@ static int ext4_fill_fiemap_extents(struct inode *inode, | |||
2125 | next_del = ext4_find_delayed_extent(inode, &es); | 2125 | next_del = ext4_find_delayed_extent(inode, &es); |
2126 | if (!exists && next_del) { | 2126 | if (!exists && next_del) { |
2127 | exists = 1; | 2127 | exists = 1; |
2128 | flags |= FIEMAP_EXTENT_DELALLOC; | 2128 | flags |= (FIEMAP_EXTENT_DELALLOC | |
2129 | FIEMAP_EXTENT_UNKNOWN); | ||
2129 | } | 2130 | } |
2130 | up_read(&EXT4_I(inode)->i_data_sem); | 2131 | up_read(&EXT4_I(inode)->i_data_sem); |
2131 | 2132 | ||
@@ -2328,17 +2329,15 @@ int ext4_ext_calc_credits_for_single_extent(struct inode *inode, int nrblocks, | |||
2328 | } | 2329 | } |
2329 | 2330 | ||
2330 | /* | 2331 | /* |
2331 | * How many index/leaf blocks need to change/allocate to modify nrblocks? | 2332 | * How many index/leaf blocks need to change/allocate to add @extents extents? |
2332 | * | 2333 | * |
2333 | * if nrblocks are fit in a single extent (chunk flag is 1), then | 2334 | * If we add a single extent, then in the worse case, each tree level |
2334 | * in the worse case, each tree level index/leaf need to be changed | 2335 | * index/leaf need to be changed in case of the tree split. |
2335 | * if the tree split due to insert a new extent, then the old tree | ||
2336 | * index/leaf need to be updated too | ||
2337 | * | 2336 | * |
2338 | * If the nrblocks are discontiguous, they could cause | 2337 | * If more extents are inserted, they could cause the whole tree split more |
2339 | * the whole tree split more than once, but this is really rare. | 2338 | * than once, but this is really rare. |
2340 | */ | 2339 | */ |
2341 | int ext4_ext_index_trans_blocks(struct inode *inode, int nrblocks, int chunk) | 2340 | int ext4_ext_index_trans_blocks(struct inode *inode, int extents) |
2342 | { | 2341 | { |
2343 | int index; | 2342 | int index; |
2344 | int depth; | 2343 | int depth; |
@@ -2349,7 +2348,7 @@ int ext4_ext_index_trans_blocks(struct inode *inode, int nrblocks, int chunk) | |||
2349 | 2348 | ||
2350 | depth = ext_depth(inode); | 2349 | depth = ext_depth(inode); |
2351 | 2350 | ||
2352 | if (chunk) | 2351 | if (extents <= 1) |
2353 | index = depth * 2; | 2352 | index = depth * 2; |
2354 | else | 2353 | else |
2355 | index = depth * 3; | 2354 | index = depth * 3; |
@@ -2357,20 +2356,24 @@ int ext4_ext_index_trans_blocks(struct inode *inode, int nrblocks, int chunk) | |||
2357 | return index; | 2356 | return index; |
2358 | } | 2357 | } |
2359 | 2358 | ||
2359 | static inline int get_default_free_blocks_flags(struct inode *inode) | ||
2360 | { | ||
2361 | if (S_ISDIR(inode->i_mode) || S_ISLNK(inode->i_mode)) | ||
2362 | return EXT4_FREE_BLOCKS_METADATA | EXT4_FREE_BLOCKS_FORGET; | ||
2363 | else if (ext4_should_journal_data(inode)) | ||
2364 | return EXT4_FREE_BLOCKS_FORGET; | ||
2365 | return 0; | ||
2366 | } | ||
2367 | |||
2360 | static int ext4_remove_blocks(handle_t *handle, struct inode *inode, | 2368 | static int ext4_remove_blocks(handle_t *handle, struct inode *inode, |
2361 | struct ext4_extent *ex, | 2369 | struct ext4_extent *ex, |
2362 | ext4_fsblk_t *partial_cluster, | 2370 | long long *partial_cluster, |
2363 | ext4_lblk_t from, ext4_lblk_t to) | 2371 | ext4_lblk_t from, ext4_lblk_t to) |
2364 | { | 2372 | { |
2365 | struct ext4_sb_info *sbi = EXT4_SB(inode->i_sb); | 2373 | struct ext4_sb_info *sbi = EXT4_SB(inode->i_sb); |
2366 | unsigned short ee_len = ext4_ext_get_actual_len(ex); | 2374 | unsigned short ee_len = ext4_ext_get_actual_len(ex); |
2367 | ext4_fsblk_t pblk; | 2375 | ext4_fsblk_t pblk; |
2368 | int flags = 0; | 2376 | int flags = get_default_free_blocks_flags(inode); |
2369 | |||
2370 | if (S_ISDIR(inode->i_mode) || S_ISLNK(inode->i_mode)) | ||
2371 | flags |= EXT4_FREE_BLOCKS_METADATA | EXT4_FREE_BLOCKS_FORGET; | ||
2372 | else if (ext4_should_journal_data(inode)) | ||
2373 | flags |= EXT4_FREE_BLOCKS_FORGET; | ||
2374 | 2377 | ||
2375 | /* | 2378 | /* |
2376 | * For bigalloc file systems, we never free a partial cluster | 2379 | * For bigalloc file systems, we never free a partial cluster |
@@ -2388,7 +2391,8 @@ static int ext4_remove_blocks(handle_t *handle, struct inode *inode, | |||
2388 | * partial cluster here. | 2391 | * partial cluster here. |
2389 | */ | 2392 | */ |
2390 | pblk = ext4_ext_pblock(ex) + ee_len - 1; | 2393 | pblk = ext4_ext_pblock(ex) + ee_len - 1; |
2391 | if (*partial_cluster && (EXT4_B2C(sbi, pblk) != *partial_cluster)) { | 2394 | if ((*partial_cluster > 0) && |
2395 | (EXT4_B2C(sbi, pblk) != *partial_cluster)) { | ||
2392 | ext4_free_blocks(handle, inode, NULL, | 2396 | ext4_free_blocks(handle, inode, NULL, |
2393 | EXT4_C2B(sbi, *partial_cluster), | 2397 | EXT4_C2B(sbi, *partial_cluster), |
2394 | sbi->s_cluster_ratio, flags); | 2398 | sbi->s_cluster_ratio, flags); |
@@ -2414,41 +2418,46 @@ static int ext4_remove_blocks(handle_t *handle, struct inode *inode, | |||
2414 | && to == le32_to_cpu(ex->ee_block) + ee_len - 1) { | 2418 | && to == le32_to_cpu(ex->ee_block) + ee_len - 1) { |
2415 | /* tail removal */ | 2419 | /* tail removal */ |
2416 | ext4_lblk_t num; | 2420 | ext4_lblk_t num; |
2421 | unsigned int unaligned; | ||
2417 | 2422 | ||
2418 | num = le32_to_cpu(ex->ee_block) + ee_len - from; | 2423 | num = le32_to_cpu(ex->ee_block) + ee_len - from; |
2419 | pblk = ext4_ext_pblock(ex) + ee_len - num; | 2424 | pblk = ext4_ext_pblock(ex) + ee_len - num; |
2420 | ext_debug("free last %u blocks starting %llu\n", num, pblk); | 2425 | /* |
2426 | * Usually we want to free partial cluster at the end of the | ||
2427 | * extent, except for the situation when the cluster is still | ||
2428 | * used by any other extent (partial_cluster is negative). | ||
2429 | */ | ||
2430 | if (*partial_cluster < 0 && | ||
2431 | -(*partial_cluster) == EXT4_B2C(sbi, pblk + num - 1)) | ||
2432 | flags |= EXT4_FREE_BLOCKS_NOFREE_LAST_CLUSTER; | ||
2433 | |||
2434 | ext_debug("free last %u blocks starting %llu partial %lld\n", | ||
2435 | num, pblk, *partial_cluster); | ||
2421 | ext4_free_blocks(handle, inode, NULL, pblk, num, flags); | 2436 | ext4_free_blocks(handle, inode, NULL, pblk, num, flags); |
2422 | /* | 2437 | /* |
2423 | * If the block range to be freed didn't start at the | 2438 | * If the block range to be freed didn't start at the |
2424 | * beginning of a cluster, and we removed the entire | 2439 | * beginning of a cluster, and we removed the entire |
2425 | * extent, save the partial cluster here, since we | 2440 | * extent and the cluster is not used by any other extent, |
2426 | * might need to delete if we determine that the | 2441 | * save the partial cluster here, since we might need to |
2427 | * truncate operation has removed all of the blocks in | 2442 | * delete if we determine that the truncate operation has |
2428 | * the cluster. | 2443 | * removed all of the blocks in the cluster. |
2444 | * | ||
2445 | * On the other hand, if we did not manage to free the whole | ||
2446 | * extent, we have to mark the cluster as used (store negative | ||
2447 | * cluster number in partial_cluster). | ||
2429 | */ | 2448 | */ |
2430 | if (pblk & (sbi->s_cluster_ratio - 1) && | 2449 | unaligned = pblk & (sbi->s_cluster_ratio - 1); |
2431 | (ee_len == num)) | 2450 | if (unaligned && (ee_len == num) && |
2451 | (*partial_cluster != -((long long)EXT4_B2C(sbi, pblk)))) | ||
2432 | *partial_cluster = EXT4_B2C(sbi, pblk); | 2452 | *partial_cluster = EXT4_B2C(sbi, pblk); |
2433 | else | 2453 | else if (unaligned) |
2454 | *partial_cluster = -((long long)EXT4_B2C(sbi, pblk)); | ||
2455 | else if (*partial_cluster > 0) | ||
2434 | *partial_cluster = 0; | 2456 | *partial_cluster = 0; |
2435 | } else if (from == le32_to_cpu(ex->ee_block) | 2457 | } else |
2436 | && to <= le32_to_cpu(ex->ee_block) + ee_len - 1) { | 2458 | ext4_error(sbi->s_sb, "strange request: removal(2) " |
2437 | /* head removal */ | 2459 | "%u-%u from %u:%u\n", |
2438 | ext4_lblk_t num; | 2460 | from, to, le32_to_cpu(ex->ee_block), ee_len); |
2439 | ext4_fsblk_t start; | ||
2440 | |||
2441 | num = to - from; | ||
2442 | start = ext4_ext_pblock(ex); | ||
2443 | |||
2444 | ext_debug("free first %u blocks starting %llu\n", num, start); | ||
2445 | ext4_free_blocks(handle, inode, NULL, start, num, flags); | ||
2446 | |||
2447 | } else { | ||
2448 | printk(KERN_INFO "strange request: removal(2) " | ||
2449 | "%u-%u from %u:%u\n", | ||
2450 | from, to, le32_to_cpu(ex->ee_block), ee_len); | ||
2451 | } | ||
2452 | return 0; | 2461 | return 0; |
2453 | } | 2462 | } |
2454 | 2463 | ||
@@ -2461,12 +2470,16 @@ static int ext4_remove_blocks(handle_t *handle, struct inode *inode, | |||
2461 | * @handle: The journal handle | 2470 | * @handle: The journal handle |
2462 | * @inode: The files inode | 2471 | * @inode: The files inode |
2463 | * @path: The path to the leaf | 2472 | * @path: The path to the leaf |
2473 | * @partial_cluster: The cluster which we'll have to free if all extents | ||
2474 | * has been released from it. It gets negative in case | ||
2475 | * that the cluster is still used. | ||
2464 | * @start: The first block to remove | 2476 | * @start: The first block to remove |
2465 | * @end: The last block to remove | 2477 | * @end: The last block to remove |
2466 | */ | 2478 | */ |
2467 | static int | 2479 | static int |
2468 | ext4_ext_rm_leaf(handle_t *handle, struct inode *inode, | 2480 | ext4_ext_rm_leaf(handle_t *handle, struct inode *inode, |
2469 | struct ext4_ext_path *path, ext4_fsblk_t *partial_cluster, | 2481 | struct ext4_ext_path *path, |
2482 | long long *partial_cluster, | ||
2470 | ext4_lblk_t start, ext4_lblk_t end) | 2483 | ext4_lblk_t start, ext4_lblk_t end) |
2471 | { | 2484 | { |
2472 | struct ext4_sb_info *sbi = EXT4_SB(inode->i_sb); | 2485 | struct ext4_sb_info *sbi = EXT4_SB(inode->i_sb); |
@@ -2479,6 +2492,7 @@ ext4_ext_rm_leaf(handle_t *handle, struct inode *inode, | |||
2479 | unsigned short ex_ee_len; | 2492 | unsigned short ex_ee_len; |
2480 | unsigned uninitialized = 0; | 2493 | unsigned uninitialized = 0; |
2481 | struct ext4_extent *ex; | 2494 | struct ext4_extent *ex; |
2495 | ext4_fsblk_t pblk; | ||
2482 | 2496 | ||
2483 | /* the header must be checked already in ext4_ext_remove_space() */ | 2497 | /* the header must be checked already in ext4_ext_remove_space() */ |
2484 | ext_debug("truncate since %u in leaf to %u\n", start, end); | 2498 | ext_debug("truncate since %u in leaf to %u\n", start, end); |
@@ -2490,7 +2504,9 @@ ext4_ext_rm_leaf(handle_t *handle, struct inode *inode, | |||
2490 | return -EIO; | 2504 | return -EIO; |
2491 | } | 2505 | } |
2492 | /* find where to start removing */ | 2506 | /* find where to start removing */ |
2493 | ex = EXT_LAST_EXTENT(eh); | 2507 | ex = path[depth].p_ext; |
2508 | if (!ex) | ||
2509 | ex = EXT_LAST_EXTENT(eh); | ||
2494 | 2510 | ||
2495 | ex_ee_block = le32_to_cpu(ex->ee_block); | 2511 | ex_ee_block = le32_to_cpu(ex->ee_block); |
2496 | ex_ee_len = ext4_ext_get_actual_len(ex); | 2512 | ex_ee_len = ext4_ext_get_actual_len(ex); |
@@ -2517,6 +2533,16 @@ ext4_ext_rm_leaf(handle_t *handle, struct inode *inode, | |||
2517 | 2533 | ||
2518 | /* If this extent is beyond the end of the hole, skip it */ | 2534 | /* If this extent is beyond the end of the hole, skip it */ |
2519 | if (end < ex_ee_block) { | 2535 | if (end < ex_ee_block) { |
2536 | /* | ||
2537 | * We're going to skip this extent and move to another, | ||
2538 | * so if this extent is not cluster aligned we have | ||
2539 | * to mark the current cluster as used to avoid | ||
2540 | * accidentally freeing it later on | ||
2541 | */ | ||
2542 | pblk = ext4_ext_pblock(ex); | ||
2543 | if (pblk & (sbi->s_cluster_ratio - 1)) | ||
2544 | *partial_cluster = | ||
2545 | -((long long)EXT4_B2C(sbi, pblk)); | ||
2520 | ex--; | 2546 | ex--; |
2521 | ex_ee_block = le32_to_cpu(ex->ee_block); | 2547 | ex_ee_block = le32_to_cpu(ex->ee_block); |
2522 | ex_ee_len = ext4_ext_get_actual_len(ex); | 2548 | ex_ee_len = ext4_ext_get_actual_len(ex); |
@@ -2592,7 +2618,7 @@ ext4_ext_rm_leaf(handle_t *handle, struct inode *inode, | |||
2592 | sizeof(struct ext4_extent)); | 2618 | sizeof(struct ext4_extent)); |
2593 | } | 2619 | } |
2594 | le16_add_cpu(&eh->eh_entries, -1); | 2620 | le16_add_cpu(&eh->eh_entries, -1); |
2595 | } else | 2621 | } else if (*partial_cluster > 0) |
2596 | *partial_cluster = 0; | 2622 | *partial_cluster = 0; |
2597 | 2623 | ||
2598 | err = ext4_ext_dirty(handle, inode, path + depth); | 2624 | err = ext4_ext_dirty(handle, inode, path + depth); |
@@ -2610,17 +2636,13 @@ ext4_ext_rm_leaf(handle_t *handle, struct inode *inode, | |||
2610 | err = ext4_ext_correct_indexes(handle, inode, path); | 2636 | err = ext4_ext_correct_indexes(handle, inode, path); |
2611 | 2637 | ||
2612 | /* | 2638 | /* |
2613 | * If there is still a entry in the leaf node, check to see if | 2639 | * Free the partial cluster only if the current extent does not |
2614 | * it references the partial cluster. This is the only place | 2640 | * reference it. Otherwise we might free used cluster. |
2615 | * where it could; if it doesn't, we can free the cluster. | ||
2616 | */ | 2641 | */ |
2617 | if (*partial_cluster && ex >= EXT_FIRST_EXTENT(eh) && | 2642 | if (*partial_cluster > 0 && |
2618 | (EXT4_B2C(sbi, ext4_ext_pblock(ex) + ex_ee_len - 1) != | 2643 | (EXT4_B2C(sbi, ext4_ext_pblock(ex) + ex_ee_len - 1) != |
2619 | *partial_cluster)) { | 2644 | *partial_cluster)) { |
2620 | int flags = EXT4_FREE_BLOCKS_FORGET; | 2645 | int flags = get_default_free_blocks_flags(inode); |
2621 | |||
2622 | if (S_ISDIR(inode->i_mode) || S_ISLNK(inode->i_mode)) | ||
2623 | flags |= EXT4_FREE_BLOCKS_METADATA; | ||
2624 | 2646 | ||
2625 | ext4_free_blocks(handle, inode, NULL, | 2647 | ext4_free_blocks(handle, inode, NULL, |
2626 | EXT4_C2B(sbi, *partial_cluster), | 2648 | EXT4_C2B(sbi, *partial_cluster), |
@@ -2664,7 +2686,7 @@ int ext4_ext_remove_space(struct inode *inode, ext4_lblk_t start, | |||
2664 | struct super_block *sb = inode->i_sb; | 2686 | struct super_block *sb = inode->i_sb; |
2665 | int depth = ext_depth(inode); | 2687 | int depth = ext_depth(inode); |
2666 | struct ext4_ext_path *path = NULL; | 2688 | struct ext4_ext_path *path = NULL; |
2667 | ext4_fsblk_t partial_cluster = 0; | 2689 | long long partial_cluster = 0; |
2668 | handle_t *handle; | 2690 | handle_t *handle; |
2669 | int i = 0, err = 0; | 2691 | int i = 0, err = 0; |
2670 | 2692 | ||
@@ -2676,7 +2698,7 @@ int ext4_ext_remove_space(struct inode *inode, ext4_lblk_t start, | |||
2676 | return PTR_ERR(handle); | 2698 | return PTR_ERR(handle); |
2677 | 2699 | ||
2678 | again: | 2700 | again: |
2679 | trace_ext4_ext_remove_space(inode, start, depth); | 2701 | trace_ext4_ext_remove_space(inode, start, end, depth); |
2680 | 2702 | ||
2681 | /* | 2703 | /* |
2682 | * Check if we are removing extents inside the extent tree. If that | 2704 | * Check if we are removing extents inside the extent tree. If that |
@@ -2813,6 +2835,9 @@ again: | |||
2813 | err = -EIO; | 2835 | err = -EIO; |
2814 | break; | 2836 | break; |
2815 | } | 2837 | } |
2838 | /* Yield here to deal with large extent trees. | ||
2839 | * Should be a no-op if we did IO above. */ | ||
2840 | cond_resched(); | ||
2816 | if (WARN_ON(i + 1 > depth)) { | 2841 | if (WARN_ON(i + 1 > depth)) { |
2817 | err = -EIO; | 2842 | err = -EIO; |
2818 | break; | 2843 | break; |
@@ -2844,17 +2869,14 @@ again: | |||
2844 | } | 2869 | } |
2845 | } | 2870 | } |
2846 | 2871 | ||
2847 | trace_ext4_ext_remove_space_done(inode, start, depth, partial_cluster, | 2872 | trace_ext4_ext_remove_space_done(inode, start, end, depth, |
2848 | path->p_hdr->eh_entries); | 2873 | partial_cluster, path->p_hdr->eh_entries); |
2849 | 2874 | ||
2850 | /* If we still have something in the partial cluster and we have removed | 2875 | /* If we still have something in the partial cluster and we have removed |
2851 | * even the first extent, then we should free the blocks in the partial | 2876 | * even the first extent, then we should free the blocks in the partial |
2852 | * cluster as well. */ | 2877 | * cluster as well. */ |
2853 | if (partial_cluster && path->p_hdr->eh_entries == 0) { | 2878 | if (partial_cluster > 0 && path->p_hdr->eh_entries == 0) { |
2854 | int flags = EXT4_FREE_BLOCKS_FORGET; | 2879 | int flags = get_default_free_blocks_flags(inode); |
2855 | |||
2856 | if (S_ISDIR(inode->i_mode) || S_ISLNK(inode->i_mode)) | ||
2857 | flags |= EXT4_FREE_BLOCKS_METADATA; | ||
2858 | 2880 | ||
2859 | ext4_free_blocks(handle, inode, NULL, | 2881 | ext4_free_blocks(handle, inode, NULL, |
2860 | EXT4_C2B(EXT4_SB(sb), partial_cluster), | 2882 | EXT4_C2B(EXT4_SB(sb), partial_cluster), |
@@ -4242,8 +4264,8 @@ got_allocated_blocks: | |||
4242 | /* not a good idea to call discard here directly, | 4264 | /* not a good idea to call discard here directly, |
4243 | * but otherwise we'd need to call it every free() */ | 4265 | * but otherwise we'd need to call it every free() */ |
4244 | ext4_discard_preallocations(inode); | 4266 | ext4_discard_preallocations(inode); |
4245 | ext4_free_blocks(handle, inode, NULL, ext4_ext_pblock(&newex), | 4267 | ext4_free_blocks(handle, inode, NULL, newblock, |
4246 | ext4_ext_get_actual_len(&newex), fb_flags); | 4268 | EXT4_C2B(sbi, allocated_clusters), fb_flags); |
4247 | goto out2; | 4269 | goto out2; |
4248 | } | 4270 | } |
4249 | 4271 | ||
@@ -4363,8 +4385,9 @@ out2: | |||
4363 | } | 4385 | } |
4364 | 4386 | ||
4365 | out3: | 4387 | out3: |
4366 | trace_ext4_ext_map_blocks_exit(inode, map, err ? err : allocated); | 4388 | trace_ext4_ext_map_blocks_exit(inode, flags, map, |
4367 | 4389 | err ? err : allocated); | |
4390 | ext4_es_lru_add(inode); | ||
4368 | return err ? err : allocated; | 4391 | return err ? err : allocated; |
4369 | } | 4392 | } |
4370 | 4393 | ||
@@ -4386,9 +4409,20 @@ void ext4_ext_truncate(handle_t *handle, struct inode *inode) | |||
4386 | 4409 | ||
4387 | last_block = (inode->i_size + sb->s_blocksize - 1) | 4410 | last_block = (inode->i_size + sb->s_blocksize - 1) |
4388 | >> EXT4_BLOCK_SIZE_BITS(sb); | 4411 | >> EXT4_BLOCK_SIZE_BITS(sb); |
4412 | retry: | ||
4389 | err = ext4_es_remove_extent(inode, last_block, | 4413 | err = ext4_es_remove_extent(inode, last_block, |
4390 | EXT_MAX_BLOCKS - last_block); | 4414 | EXT_MAX_BLOCKS - last_block); |
4415 | if (err == -ENOMEM) { | ||
4416 | cond_resched(); | ||
4417 | congestion_wait(BLK_RW_ASYNC, HZ/50); | ||
4418 | goto retry; | ||
4419 | } | ||
4420 | if (err) { | ||
4421 | ext4_std_error(inode->i_sb, err); | ||
4422 | return; | ||
4423 | } | ||
4391 | err = ext4_ext_remove_space(inode, last_block, EXT_MAX_BLOCKS - 1); | 4424 | err = ext4_ext_remove_space(inode, last_block, EXT_MAX_BLOCKS - 1); |
4425 | ext4_std_error(inode->i_sb, err); | ||
4392 | } | 4426 | } |
4393 | 4427 | ||
4394 | static void ext4_falloc_update_inode(struct inode *inode, | 4428 | static void ext4_falloc_update_inode(struct inode *inode, |
@@ -4446,7 +4480,7 @@ long ext4_fallocate(struct file *file, int mode, loff_t offset, loff_t len) | |||
4446 | return -EOPNOTSUPP; | 4480 | return -EOPNOTSUPP; |
4447 | 4481 | ||
4448 | if (mode & FALLOC_FL_PUNCH_HOLE) | 4482 | if (mode & FALLOC_FL_PUNCH_HOLE) |
4449 | return ext4_punch_hole(file, offset, len); | 4483 | return ext4_punch_hole(inode, offset, len); |
4450 | 4484 | ||
4451 | ret = ext4_convert_inline_data(inode); | 4485 | ret = ext4_convert_inline_data(inode); |
4452 | if (ret) | 4486 | if (ret) |
@@ -4548,10 +4582,9 @@ retry: | |||
4548 | * function, to convert the fallocated extents after IO is completed. | 4582 | * function, to convert the fallocated extents after IO is completed. |
4549 | * Returns 0 on success. | 4583 | * Returns 0 on success. |
4550 | */ | 4584 | */ |
4551 | int ext4_convert_unwritten_extents(struct inode *inode, loff_t offset, | 4585 | int ext4_convert_unwritten_extents(handle_t *handle, struct inode *inode, |
4552 | ssize_t len) | 4586 | loff_t offset, ssize_t len) |
4553 | { | 4587 | { |
4554 | handle_t *handle; | ||
4555 | unsigned int max_blocks; | 4588 | unsigned int max_blocks; |
4556 | int ret = 0; | 4589 | int ret = 0; |
4557 | int ret2 = 0; | 4590 | int ret2 = 0; |
@@ -4566,16 +4599,32 @@ int ext4_convert_unwritten_extents(struct inode *inode, loff_t offset, | |||
4566 | max_blocks = ((EXT4_BLOCK_ALIGN(len + offset, blkbits) >> blkbits) - | 4599 | max_blocks = ((EXT4_BLOCK_ALIGN(len + offset, blkbits) >> blkbits) - |
4567 | map.m_lblk); | 4600 | map.m_lblk); |
4568 | /* | 4601 | /* |
4569 | * credits to insert 1 extent into extent tree | 4602 | * This is somewhat ugly but the idea is clear: When transaction is |
4603 | * reserved, everything goes into it. Otherwise we rather start several | ||
4604 | * smaller transactions for conversion of each extent separately. | ||
4570 | */ | 4605 | */ |
4571 | credits = ext4_chunk_trans_blocks(inode, max_blocks); | 4606 | if (handle) { |
4607 | handle = ext4_journal_start_reserved(handle, | ||
4608 | EXT4_HT_EXT_CONVERT); | ||
4609 | if (IS_ERR(handle)) | ||
4610 | return PTR_ERR(handle); | ||
4611 | credits = 0; | ||
4612 | } else { | ||
4613 | /* | ||
4614 | * credits to insert 1 extent into extent tree | ||
4615 | */ | ||
4616 | credits = ext4_chunk_trans_blocks(inode, max_blocks); | ||
4617 | } | ||
4572 | while (ret >= 0 && ret < max_blocks) { | 4618 | while (ret >= 0 && ret < max_blocks) { |
4573 | map.m_lblk += ret; | 4619 | map.m_lblk += ret; |
4574 | map.m_len = (max_blocks -= ret); | 4620 | map.m_len = (max_blocks -= ret); |
4575 | handle = ext4_journal_start(inode, EXT4_HT_MAP_BLOCKS, credits); | 4621 | if (credits) { |
4576 | if (IS_ERR(handle)) { | 4622 | handle = ext4_journal_start(inode, EXT4_HT_MAP_BLOCKS, |
4577 | ret = PTR_ERR(handle); | 4623 | credits); |
4578 | break; | 4624 | if (IS_ERR(handle)) { |
4625 | ret = PTR_ERR(handle); | ||
4626 | break; | ||
4627 | } | ||
4579 | } | 4628 | } |
4580 | ret = ext4_map_blocks(handle, inode, &map, | 4629 | ret = ext4_map_blocks(handle, inode, &map, |
4581 | EXT4_GET_BLOCKS_IO_CONVERT_EXT); | 4630 | EXT4_GET_BLOCKS_IO_CONVERT_EXT); |
@@ -4586,10 +4635,13 @@ int ext4_convert_unwritten_extents(struct inode *inode, loff_t offset, | |||
4586 | inode->i_ino, map.m_lblk, | 4635 | inode->i_ino, map.m_lblk, |
4587 | map.m_len, ret); | 4636 | map.m_len, ret); |
4588 | ext4_mark_inode_dirty(handle, inode); | 4637 | ext4_mark_inode_dirty(handle, inode); |
4589 | ret2 = ext4_journal_stop(handle); | 4638 | if (credits) |
4590 | if (ret <= 0 || ret2 ) | 4639 | ret2 = ext4_journal_stop(handle); |
4640 | if (ret <= 0 || ret2) | ||
4591 | break; | 4641 | break; |
4592 | } | 4642 | } |
4643 | if (!credits) | ||
4644 | ret2 = ext4_journal_stop(handle); | ||
4593 | return ret > 0 ? ret2 : ret; | 4645 | return ret > 0 ? ret2 : ret; |
4594 | } | 4646 | } |
4595 | 4647 | ||
@@ -4659,7 +4711,7 @@ static int ext4_xattr_fiemap(struct inode *inode, | |||
4659 | error = ext4_get_inode_loc(inode, &iloc); | 4711 | error = ext4_get_inode_loc(inode, &iloc); |
4660 | if (error) | 4712 | if (error) |
4661 | return error; | 4713 | return error; |
4662 | physical = iloc.bh->b_blocknr << blockbits; | 4714 | physical = (__u64)iloc.bh->b_blocknr << blockbits; |
4663 | offset = EXT4_GOOD_OLD_INODE_SIZE + | 4715 | offset = EXT4_GOOD_OLD_INODE_SIZE + |
4664 | EXT4_I(inode)->i_extra_isize; | 4716 | EXT4_I(inode)->i_extra_isize; |
4665 | physical += offset; | 4717 | physical += offset; |
@@ -4667,7 +4719,7 @@ static int ext4_xattr_fiemap(struct inode *inode, | |||
4667 | flags |= FIEMAP_EXTENT_DATA_INLINE; | 4719 | flags |= FIEMAP_EXTENT_DATA_INLINE; |
4668 | brelse(iloc.bh); | 4720 | brelse(iloc.bh); |
4669 | } else { /* external block */ | 4721 | } else { /* external block */ |
4670 | physical = EXT4_I(inode)->i_file_acl << blockbits; | 4722 | physical = (__u64)EXT4_I(inode)->i_file_acl << blockbits; |
4671 | length = inode->i_sb->s_blocksize; | 4723 | length = inode->i_sb->s_blocksize; |
4672 | } | 4724 | } |
4673 | 4725 | ||
diff --git a/fs/ext4/extents_status.c b/fs/ext4/extents_status.c index e6941e622d31..91cb110da1b4 100644 --- a/fs/ext4/extents_status.c +++ b/fs/ext4/extents_status.c | |||
@@ -10,6 +10,7 @@ | |||
10 | * Ext4 extents status tree core functions. | 10 | * Ext4 extents status tree core functions. |
11 | */ | 11 | */ |
12 | #include <linux/rbtree.h> | 12 | #include <linux/rbtree.h> |
13 | #include <linux/list_sort.h> | ||
13 | #include "ext4.h" | 14 | #include "ext4.h" |
14 | #include "extents_status.h" | 15 | #include "extents_status.h" |
15 | #include "ext4_extents.h" | 16 | #include "ext4_extents.h" |
@@ -147,6 +148,8 @@ static int __es_remove_extent(struct inode *inode, ext4_lblk_t lblk, | |||
147 | ext4_lblk_t end); | 148 | ext4_lblk_t end); |
148 | static int __es_try_to_reclaim_extents(struct ext4_inode_info *ei, | 149 | static int __es_try_to_reclaim_extents(struct ext4_inode_info *ei, |
149 | int nr_to_scan); | 150 | int nr_to_scan); |
151 | static int __ext4_es_shrink(struct ext4_sb_info *sbi, int nr_to_scan, | ||
152 | struct ext4_inode_info *locked_ei); | ||
150 | 153 | ||
151 | int __init ext4_init_es(void) | 154 | int __init ext4_init_es(void) |
152 | { | 155 | { |
@@ -291,7 +294,6 @@ out: | |||
291 | 294 | ||
292 | read_unlock(&EXT4_I(inode)->i_es_lock); | 295 | read_unlock(&EXT4_I(inode)->i_es_lock); |
293 | 296 | ||
294 | ext4_es_lru_add(inode); | ||
295 | trace_ext4_es_find_delayed_extent_range_exit(inode, es); | 297 | trace_ext4_es_find_delayed_extent_range_exit(inode, es); |
296 | } | 298 | } |
297 | 299 | ||
@@ -439,7 +441,7 @@ static void ext4_es_insert_extent_ext_check(struct inode *inode, | |||
439 | */ | 441 | */ |
440 | if (!ext4_es_is_written(es) && !ext4_es_is_unwritten(es)) { | 442 | if (!ext4_es_is_written(es) && !ext4_es_is_unwritten(es)) { |
441 | if (in_range(es->es_lblk, ee_block, ee_len)) { | 443 | if (in_range(es->es_lblk, ee_block, ee_len)) { |
442 | pr_warn("ES insert assertation failed for " | 444 | pr_warn("ES insert assertion failed for " |
443 | "inode: %lu we can find an extent " | 445 | "inode: %lu we can find an extent " |
444 | "at block [%d/%d/%llu/%c], but we " | 446 | "at block [%d/%d/%llu/%c], but we " |
445 | "want to add an delayed/hole extent " | 447 | "want to add an delayed/hole extent " |
@@ -458,7 +460,7 @@ static void ext4_es_insert_extent_ext_check(struct inode *inode, | |||
458 | */ | 460 | */ |
459 | if (es->es_lblk < ee_block || | 461 | if (es->es_lblk < ee_block || |
460 | ext4_es_pblock(es) != ee_start + es->es_lblk - ee_block) { | 462 | ext4_es_pblock(es) != ee_start + es->es_lblk - ee_block) { |
461 | pr_warn("ES insert assertation failed for inode: %lu " | 463 | pr_warn("ES insert assertion failed for inode: %lu " |
462 | "ex_status [%d/%d/%llu/%c] != " | 464 | "ex_status [%d/%d/%llu/%c] != " |
463 | "es_status [%d/%d/%llu/%c]\n", inode->i_ino, | 465 | "es_status [%d/%d/%llu/%c]\n", inode->i_ino, |
464 | ee_block, ee_len, ee_start, | 466 | ee_block, ee_len, ee_start, |
@@ -468,7 +470,7 @@ static void ext4_es_insert_extent_ext_check(struct inode *inode, | |||
468 | } | 470 | } |
469 | 471 | ||
470 | if (ee_status ^ es_status) { | 472 | if (ee_status ^ es_status) { |
471 | pr_warn("ES insert assertation failed for inode: %lu " | 473 | pr_warn("ES insert assertion failed for inode: %lu " |
472 | "ex_status [%d/%d/%llu/%c] != " | 474 | "ex_status [%d/%d/%llu/%c] != " |
473 | "es_status [%d/%d/%llu/%c]\n", inode->i_ino, | 475 | "es_status [%d/%d/%llu/%c]\n", inode->i_ino, |
474 | ee_block, ee_len, ee_start, | 476 | ee_block, ee_len, ee_start, |
@@ -481,7 +483,7 @@ static void ext4_es_insert_extent_ext_check(struct inode *inode, | |||
481 | * that we don't want to add an written/unwritten extent. | 483 | * that we don't want to add an written/unwritten extent. |
482 | */ | 484 | */ |
483 | if (!ext4_es_is_delayed(es) && !ext4_es_is_hole(es)) { | 485 | if (!ext4_es_is_delayed(es) && !ext4_es_is_hole(es)) { |
484 | pr_warn("ES insert assertation failed for inode: %lu " | 486 | pr_warn("ES insert assertion failed for inode: %lu " |
485 | "can't find an extent at block %d but we want " | 487 | "can't find an extent at block %d but we want " |
486 | "to add an written/unwritten extent " | 488 | "to add an written/unwritten extent " |
487 | "[%d/%d/%llu/%llx]\n", inode->i_ino, | 489 | "[%d/%d/%llu/%llx]\n", inode->i_ino, |
@@ -519,7 +521,7 @@ static void ext4_es_insert_extent_ind_check(struct inode *inode, | |||
519 | * We want to add a delayed/hole extent but this | 521 | * We want to add a delayed/hole extent but this |
520 | * block has been allocated. | 522 | * block has been allocated. |
521 | */ | 523 | */ |
522 | pr_warn("ES insert assertation failed for inode: %lu " | 524 | pr_warn("ES insert assertion failed for inode: %lu " |
523 | "We can find blocks but we want to add a " | 525 | "We can find blocks but we want to add a " |
524 | "delayed/hole extent [%d/%d/%llu/%llx]\n", | 526 | "delayed/hole extent [%d/%d/%llu/%llx]\n", |
525 | inode->i_ino, es->es_lblk, es->es_len, | 527 | inode->i_ino, es->es_lblk, es->es_len, |
@@ -527,13 +529,13 @@ static void ext4_es_insert_extent_ind_check(struct inode *inode, | |||
527 | return; | 529 | return; |
528 | } else if (ext4_es_is_written(es)) { | 530 | } else if (ext4_es_is_written(es)) { |
529 | if (retval != es->es_len) { | 531 | if (retval != es->es_len) { |
530 | pr_warn("ES insert assertation failed for " | 532 | pr_warn("ES insert assertion failed for " |
531 | "inode: %lu retval %d != es_len %d\n", | 533 | "inode: %lu retval %d != es_len %d\n", |
532 | inode->i_ino, retval, es->es_len); | 534 | inode->i_ino, retval, es->es_len); |
533 | return; | 535 | return; |
534 | } | 536 | } |
535 | if (map.m_pblk != ext4_es_pblock(es)) { | 537 | if (map.m_pblk != ext4_es_pblock(es)) { |
536 | pr_warn("ES insert assertation failed for " | 538 | pr_warn("ES insert assertion failed for " |
537 | "inode: %lu m_pblk %llu != " | 539 | "inode: %lu m_pblk %llu != " |
538 | "es_pblk %llu\n", | 540 | "es_pblk %llu\n", |
539 | inode->i_ino, map.m_pblk, | 541 | inode->i_ino, map.m_pblk, |
@@ -549,7 +551,7 @@ static void ext4_es_insert_extent_ind_check(struct inode *inode, | |||
549 | } | 551 | } |
550 | } else if (retval == 0) { | 552 | } else if (retval == 0) { |
551 | if (ext4_es_is_written(es)) { | 553 | if (ext4_es_is_written(es)) { |
552 | pr_warn("ES insert assertation failed for inode: %lu " | 554 | pr_warn("ES insert assertion failed for inode: %lu " |
553 | "We can't find the block but we want to add " | 555 | "We can't find the block but we want to add " |
554 | "an written extent [%d/%d/%llu/%llx]\n", | 556 | "an written extent [%d/%d/%llu/%llx]\n", |
555 | inode->i_ino, es->es_lblk, es->es_len, | 557 | inode->i_ino, es->es_lblk, es->es_len, |
@@ -632,10 +634,8 @@ out: | |||
632 | } | 634 | } |
633 | 635 | ||
634 | /* | 636 | /* |
635 | * ext4_es_insert_extent() adds a space to a extent status tree. | 637 | * ext4_es_insert_extent() adds information to an inode's extent |
636 | * | 638 | * status tree. |
637 | * ext4_es_insert_extent is called by ext4_da_write_begin and | ||
638 | * ext4_es_remove_extent. | ||
639 | * | 639 | * |
640 | * Return 0 on success, error code on failure. | 640 | * Return 0 on success, error code on failure. |
641 | */ | 641 | */ |
@@ -667,12 +667,17 @@ int ext4_es_insert_extent(struct inode *inode, ext4_lblk_t lblk, | |||
667 | err = __es_remove_extent(inode, lblk, end); | 667 | err = __es_remove_extent(inode, lblk, end); |
668 | if (err != 0) | 668 | if (err != 0) |
669 | goto error; | 669 | goto error; |
670 | retry: | ||
670 | err = __es_insert_extent(inode, &newes); | 671 | err = __es_insert_extent(inode, &newes); |
672 | if (err == -ENOMEM && __ext4_es_shrink(EXT4_SB(inode->i_sb), 1, | ||
673 | EXT4_I(inode))) | ||
674 | goto retry; | ||
675 | if (err == -ENOMEM && !ext4_es_is_delayed(&newes)) | ||
676 | err = 0; | ||
671 | 677 | ||
672 | error: | 678 | error: |
673 | write_unlock(&EXT4_I(inode)->i_es_lock); | 679 | write_unlock(&EXT4_I(inode)->i_es_lock); |
674 | 680 | ||
675 | ext4_es_lru_add(inode); | ||
676 | ext4_es_print_tree(inode); | 681 | ext4_es_print_tree(inode); |
677 | 682 | ||
678 | return err; | 683 | return err; |
@@ -734,7 +739,6 @@ out: | |||
734 | 739 | ||
735 | read_unlock(&EXT4_I(inode)->i_es_lock); | 740 | read_unlock(&EXT4_I(inode)->i_es_lock); |
736 | 741 | ||
737 | ext4_es_lru_add(inode); | ||
738 | trace_ext4_es_lookup_extent_exit(inode, es, found); | 742 | trace_ext4_es_lookup_extent_exit(inode, es, found); |
739 | return found; | 743 | return found; |
740 | } | 744 | } |
@@ -748,8 +752,10 @@ static int __es_remove_extent(struct inode *inode, ext4_lblk_t lblk, | |||
748 | struct extent_status orig_es; | 752 | struct extent_status orig_es; |
749 | ext4_lblk_t len1, len2; | 753 | ext4_lblk_t len1, len2; |
750 | ext4_fsblk_t block; | 754 | ext4_fsblk_t block; |
751 | int err = 0; | 755 | int err; |
752 | 756 | ||
757 | retry: | ||
758 | err = 0; | ||
753 | es = __es_tree_search(&tree->root, lblk); | 759 | es = __es_tree_search(&tree->root, lblk); |
754 | if (!es) | 760 | if (!es) |
755 | goto out; | 761 | goto out; |
@@ -784,6 +790,10 @@ static int __es_remove_extent(struct inode *inode, ext4_lblk_t lblk, | |||
784 | if (err) { | 790 | if (err) { |
785 | es->es_lblk = orig_es.es_lblk; | 791 | es->es_lblk = orig_es.es_lblk; |
786 | es->es_len = orig_es.es_len; | 792 | es->es_len = orig_es.es_len; |
793 | if ((err == -ENOMEM) && | ||
794 | __ext4_es_shrink(EXT4_SB(inode->i_sb), 1, | ||
795 | EXT4_I(inode))) | ||
796 | goto retry; | ||
787 | goto out; | 797 | goto out; |
788 | } | 798 | } |
789 | } else { | 799 | } else { |
@@ -878,38 +888,64 @@ int ext4_es_zeroout(struct inode *inode, struct ext4_extent *ex) | |||
878 | EXTENT_STATUS_WRITTEN); | 888 | EXTENT_STATUS_WRITTEN); |
879 | } | 889 | } |
880 | 890 | ||
881 | static int ext4_es_shrink(struct shrinker *shrink, struct shrink_control *sc) | 891 | static int ext4_inode_touch_time_cmp(void *priv, struct list_head *a, |
892 | struct list_head *b) | ||
893 | { | ||
894 | struct ext4_inode_info *eia, *eib; | ||
895 | eia = list_entry(a, struct ext4_inode_info, i_es_lru); | ||
896 | eib = list_entry(b, struct ext4_inode_info, i_es_lru); | ||
897 | |||
898 | if (eia->i_touch_when == eib->i_touch_when) | ||
899 | return 0; | ||
900 | if (time_after(eia->i_touch_when, eib->i_touch_when)) | ||
901 | return 1; | ||
902 | else | ||
903 | return -1; | ||
904 | } | ||
905 | |||
906 | static int __ext4_es_shrink(struct ext4_sb_info *sbi, int nr_to_scan, | ||
907 | struct ext4_inode_info *locked_ei) | ||
882 | { | 908 | { |
883 | struct ext4_sb_info *sbi = container_of(shrink, | ||
884 | struct ext4_sb_info, s_es_shrinker); | ||
885 | struct ext4_inode_info *ei; | 909 | struct ext4_inode_info *ei; |
886 | struct list_head *cur, *tmp, scanned; | 910 | struct list_head *cur, *tmp; |
887 | int nr_to_scan = sc->nr_to_scan; | 911 | LIST_HEAD(skiped); |
888 | int ret, nr_shrunk = 0; | 912 | int ret, nr_shrunk = 0; |
889 | 913 | ||
890 | ret = percpu_counter_read_positive(&sbi->s_extent_cache_cnt); | 914 | spin_lock(&sbi->s_es_lru_lock); |
891 | trace_ext4_es_shrink_enter(sbi->s_sb, nr_to_scan, ret); | ||
892 | |||
893 | if (!nr_to_scan) | ||
894 | return ret; | ||
895 | 915 | ||
896 | INIT_LIST_HEAD(&scanned); | 916 | /* |
917 | * If the inode that is at the head of LRU list is newer than | ||
918 | * last_sorted time, that means that we need to sort this list. | ||
919 | */ | ||
920 | ei = list_first_entry(&sbi->s_es_lru, struct ext4_inode_info, i_es_lru); | ||
921 | if (sbi->s_es_last_sorted < ei->i_touch_when) { | ||
922 | list_sort(NULL, &sbi->s_es_lru, ext4_inode_touch_time_cmp); | ||
923 | sbi->s_es_last_sorted = jiffies; | ||
924 | } | ||
897 | 925 | ||
898 | spin_lock(&sbi->s_es_lru_lock); | ||
899 | list_for_each_safe(cur, tmp, &sbi->s_es_lru) { | 926 | list_for_each_safe(cur, tmp, &sbi->s_es_lru) { |
900 | list_move_tail(cur, &scanned); | 927 | /* |
928 | * If we have already reclaimed all extents from extent | ||
929 | * status tree, just stop the loop immediately. | ||
930 | */ | ||
931 | if (percpu_counter_read_positive(&sbi->s_extent_cache_cnt) == 0) | ||
932 | break; | ||
901 | 933 | ||
902 | ei = list_entry(cur, struct ext4_inode_info, i_es_lru); | 934 | ei = list_entry(cur, struct ext4_inode_info, i_es_lru); |
903 | 935 | ||
904 | read_lock(&ei->i_es_lock); | 936 | /* Skip the inode that is newer than the last_sorted time */ |
905 | if (ei->i_es_lru_nr == 0) { | 937 | if (sbi->s_es_last_sorted < ei->i_touch_when) { |
906 | read_unlock(&ei->i_es_lock); | 938 | list_move_tail(cur, &skiped); |
907 | continue; | 939 | continue; |
908 | } | 940 | } |
909 | read_unlock(&ei->i_es_lock); | 941 | |
942 | if (ei->i_es_lru_nr == 0 || ei == locked_ei) | ||
943 | continue; | ||
910 | 944 | ||
911 | write_lock(&ei->i_es_lock); | 945 | write_lock(&ei->i_es_lock); |
912 | ret = __es_try_to_reclaim_extents(ei, nr_to_scan); | 946 | ret = __es_try_to_reclaim_extents(ei, nr_to_scan); |
947 | if (ei->i_es_lru_nr == 0) | ||
948 | list_del_init(&ei->i_es_lru); | ||
913 | write_unlock(&ei->i_es_lock); | 949 | write_unlock(&ei->i_es_lock); |
914 | 950 | ||
915 | nr_shrunk += ret; | 951 | nr_shrunk += ret; |
@@ -917,29 +953,50 @@ static int ext4_es_shrink(struct shrinker *shrink, struct shrink_control *sc) | |||
917 | if (nr_to_scan == 0) | 953 | if (nr_to_scan == 0) |
918 | break; | 954 | break; |
919 | } | 955 | } |
920 | list_splice_tail(&scanned, &sbi->s_es_lru); | 956 | |
957 | /* Move the newer inodes into the tail of the LRU list. */ | ||
958 | list_splice_tail(&skiped, &sbi->s_es_lru); | ||
921 | spin_unlock(&sbi->s_es_lru_lock); | 959 | spin_unlock(&sbi->s_es_lru_lock); |
922 | 960 | ||
961 | if (locked_ei && nr_shrunk == 0) | ||
962 | nr_shrunk = __es_try_to_reclaim_extents(ei, nr_to_scan); | ||
963 | |||
964 | return nr_shrunk; | ||
965 | } | ||
966 | |||
967 | static int ext4_es_shrink(struct shrinker *shrink, struct shrink_control *sc) | ||
968 | { | ||
969 | struct ext4_sb_info *sbi = container_of(shrink, | ||
970 | struct ext4_sb_info, s_es_shrinker); | ||
971 | int nr_to_scan = sc->nr_to_scan; | ||
972 | int ret, nr_shrunk; | ||
973 | |||
974 | ret = percpu_counter_read_positive(&sbi->s_extent_cache_cnt); | ||
975 | trace_ext4_es_shrink_enter(sbi->s_sb, nr_to_scan, ret); | ||
976 | |||
977 | if (!nr_to_scan) | ||
978 | return ret; | ||
979 | |||
980 | nr_shrunk = __ext4_es_shrink(sbi, nr_to_scan, NULL); | ||
981 | |||
923 | ret = percpu_counter_read_positive(&sbi->s_extent_cache_cnt); | 982 | ret = percpu_counter_read_positive(&sbi->s_extent_cache_cnt); |
924 | trace_ext4_es_shrink_exit(sbi->s_sb, nr_shrunk, ret); | 983 | trace_ext4_es_shrink_exit(sbi->s_sb, nr_shrunk, ret); |
925 | return ret; | 984 | return ret; |
926 | } | 985 | } |
927 | 986 | ||
928 | void ext4_es_register_shrinker(struct super_block *sb) | 987 | void ext4_es_register_shrinker(struct ext4_sb_info *sbi) |
929 | { | 988 | { |
930 | struct ext4_sb_info *sbi; | ||
931 | |||
932 | sbi = EXT4_SB(sb); | ||
933 | INIT_LIST_HEAD(&sbi->s_es_lru); | 989 | INIT_LIST_HEAD(&sbi->s_es_lru); |
934 | spin_lock_init(&sbi->s_es_lru_lock); | 990 | spin_lock_init(&sbi->s_es_lru_lock); |
991 | sbi->s_es_last_sorted = 0; | ||
935 | sbi->s_es_shrinker.shrink = ext4_es_shrink; | 992 | sbi->s_es_shrinker.shrink = ext4_es_shrink; |
936 | sbi->s_es_shrinker.seeks = DEFAULT_SEEKS; | 993 | sbi->s_es_shrinker.seeks = DEFAULT_SEEKS; |
937 | register_shrinker(&sbi->s_es_shrinker); | 994 | register_shrinker(&sbi->s_es_shrinker); |
938 | } | 995 | } |
939 | 996 | ||
940 | void ext4_es_unregister_shrinker(struct super_block *sb) | 997 | void ext4_es_unregister_shrinker(struct ext4_sb_info *sbi) |
941 | { | 998 | { |
942 | unregister_shrinker(&EXT4_SB(sb)->s_es_shrinker); | 999 | unregister_shrinker(&sbi->s_es_shrinker); |
943 | } | 1000 | } |
944 | 1001 | ||
945 | void ext4_es_lru_add(struct inode *inode) | 1002 | void ext4_es_lru_add(struct inode *inode) |
@@ -947,11 +1004,14 @@ void ext4_es_lru_add(struct inode *inode) | |||
947 | struct ext4_inode_info *ei = EXT4_I(inode); | 1004 | struct ext4_inode_info *ei = EXT4_I(inode); |
948 | struct ext4_sb_info *sbi = EXT4_SB(inode->i_sb); | 1005 | struct ext4_sb_info *sbi = EXT4_SB(inode->i_sb); |
949 | 1006 | ||
1007 | ei->i_touch_when = jiffies; | ||
1008 | |||
1009 | if (!list_empty(&ei->i_es_lru)) | ||
1010 | return; | ||
1011 | |||
950 | spin_lock(&sbi->s_es_lru_lock); | 1012 | spin_lock(&sbi->s_es_lru_lock); |
951 | if (list_empty(&ei->i_es_lru)) | 1013 | if (list_empty(&ei->i_es_lru)) |
952 | list_add_tail(&ei->i_es_lru, &sbi->s_es_lru); | 1014 | list_add_tail(&ei->i_es_lru, &sbi->s_es_lru); |
953 | else | ||
954 | list_move_tail(&ei->i_es_lru, &sbi->s_es_lru); | ||
955 | spin_unlock(&sbi->s_es_lru_lock); | 1015 | spin_unlock(&sbi->s_es_lru_lock); |
956 | } | 1016 | } |
957 | 1017 | ||
diff --git a/fs/ext4/extents_status.h b/fs/ext4/extents_status.h index f740eb03b707..e936730cc5b0 100644 --- a/fs/ext4/extents_status.h +++ b/fs/ext4/extents_status.h | |||
@@ -39,6 +39,7 @@ | |||
39 | EXTENT_STATUS_DELAYED | \ | 39 | EXTENT_STATUS_DELAYED | \ |
40 | EXTENT_STATUS_HOLE) | 40 | EXTENT_STATUS_HOLE) |
41 | 41 | ||
42 | struct ext4_sb_info; | ||
42 | struct ext4_extent; | 43 | struct ext4_extent; |
43 | 44 | ||
44 | struct extent_status { | 45 | struct extent_status { |
@@ -119,8 +120,8 @@ static inline void ext4_es_store_status(struct extent_status *es, | |||
119 | es->es_pblk = block; | 120 | es->es_pblk = block; |
120 | } | 121 | } |
121 | 122 | ||
122 | extern void ext4_es_register_shrinker(struct super_block *sb); | 123 | extern void ext4_es_register_shrinker(struct ext4_sb_info *sbi); |
123 | extern void ext4_es_unregister_shrinker(struct super_block *sb); | 124 | extern void ext4_es_unregister_shrinker(struct ext4_sb_info *sbi); |
124 | extern void ext4_es_lru_add(struct inode *inode); | 125 | extern void ext4_es_lru_add(struct inode *inode); |
125 | extern void ext4_es_lru_del(struct inode *inode); | 126 | extern void ext4_es_lru_del(struct inode *inode); |
126 | 127 | ||
diff --git a/fs/ext4/file.c b/fs/ext4/file.c index b1b4d51b5d86..6f4cc567c382 100644 --- a/fs/ext4/file.c +++ b/fs/ext4/file.c | |||
@@ -312,7 +312,7 @@ static int ext4_find_unwritten_pgoff(struct inode *inode, | |||
312 | blkbits = inode->i_sb->s_blocksize_bits; | 312 | blkbits = inode->i_sb->s_blocksize_bits; |
313 | startoff = *offset; | 313 | startoff = *offset; |
314 | lastoff = startoff; | 314 | lastoff = startoff; |
315 | endoff = (map->m_lblk + map->m_len) << blkbits; | 315 | endoff = (loff_t)(map->m_lblk + map->m_len) << blkbits; |
316 | 316 | ||
317 | index = startoff >> PAGE_CACHE_SHIFT; | 317 | index = startoff >> PAGE_CACHE_SHIFT; |
318 | end = endoff >> PAGE_CACHE_SHIFT; | 318 | end = endoff >> PAGE_CACHE_SHIFT; |
@@ -457,7 +457,7 @@ static loff_t ext4_seek_data(struct file *file, loff_t offset, loff_t maxsize) | |||
457 | ret = ext4_map_blocks(NULL, inode, &map, 0); | 457 | ret = ext4_map_blocks(NULL, inode, &map, 0); |
458 | if (ret > 0 && !(map.m_flags & EXT4_MAP_UNWRITTEN)) { | 458 | if (ret > 0 && !(map.m_flags & EXT4_MAP_UNWRITTEN)) { |
459 | if (last != start) | 459 | if (last != start) |
460 | dataoff = last << blkbits; | 460 | dataoff = (loff_t)last << blkbits; |
461 | break; | 461 | break; |
462 | } | 462 | } |
463 | 463 | ||
@@ -468,7 +468,7 @@ static loff_t ext4_seek_data(struct file *file, loff_t offset, loff_t maxsize) | |||
468 | ext4_es_find_delayed_extent_range(inode, last, last, &es); | 468 | ext4_es_find_delayed_extent_range(inode, last, last, &es); |
469 | if (es.es_len != 0 && in_range(last, es.es_lblk, es.es_len)) { | 469 | if (es.es_len != 0 && in_range(last, es.es_lblk, es.es_len)) { |
470 | if (last != start) | 470 | if (last != start) |
471 | dataoff = last << blkbits; | 471 | dataoff = (loff_t)last << blkbits; |
472 | break; | 472 | break; |
473 | } | 473 | } |
474 | 474 | ||
@@ -486,7 +486,7 @@ static loff_t ext4_seek_data(struct file *file, loff_t offset, loff_t maxsize) | |||
486 | } | 486 | } |
487 | 487 | ||
488 | last++; | 488 | last++; |
489 | dataoff = last << blkbits; | 489 | dataoff = (loff_t)last << blkbits; |
490 | } while (last <= end); | 490 | } while (last <= end); |
491 | 491 | ||
492 | mutex_unlock(&inode->i_mutex); | 492 | mutex_unlock(&inode->i_mutex); |
@@ -494,17 +494,7 @@ static loff_t ext4_seek_data(struct file *file, loff_t offset, loff_t maxsize) | |||
494 | if (dataoff > isize) | 494 | if (dataoff > isize) |
495 | return -ENXIO; | 495 | return -ENXIO; |
496 | 496 | ||
497 | if (dataoff < 0 && !(file->f_mode & FMODE_UNSIGNED_OFFSET)) | 497 | return vfs_setpos(file, dataoff, maxsize); |
498 | return -EINVAL; | ||
499 | if (dataoff > maxsize) | ||
500 | return -EINVAL; | ||
501 | |||
502 | if (dataoff != file->f_pos) { | ||
503 | file->f_pos = dataoff; | ||
504 | file->f_version = 0; | ||
505 | } | ||
506 | |||
507 | return dataoff; | ||
508 | } | 498 | } |
509 | 499 | ||
510 | /* | 500 | /* |
@@ -540,7 +530,7 @@ static loff_t ext4_seek_hole(struct file *file, loff_t offset, loff_t maxsize) | |||
540 | ret = ext4_map_blocks(NULL, inode, &map, 0); | 530 | ret = ext4_map_blocks(NULL, inode, &map, 0); |
541 | if (ret > 0 && !(map.m_flags & EXT4_MAP_UNWRITTEN)) { | 531 | if (ret > 0 && !(map.m_flags & EXT4_MAP_UNWRITTEN)) { |
542 | last += ret; | 532 | last += ret; |
543 | holeoff = last << blkbits; | 533 | holeoff = (loff_t)last << blkbits; |
544 | continue; | 534 | continue; |
545 | } | 535 | } |
546 | 536 | ||
@@ -551,7 +541,7 @@ static loff_t ext4_seek_hole(struct file *file, loff_t offset, loff_t maxsize) | |||
551 | ext4_es_find_delayed_extent_range(inode, last, last, &es); | 541 | ext4_es_find_delayed_extent_range(inode, last, last, &es); |
552 | if (es.es_len != 0 && in_range(last, es.es_lblk, es.es_len)) { | 542 | if (es.es_len != 0 && in_range(last, es.es_lblk, es.es_len)) { |
553 | last = es.es_lblk + es.es_len; | 543 | last = es.es_lblk + es.es_len; |
554 | holeoff = last << blkbits; | 544 | holeoff = (loff_t)last << blkbits; |
555 | continue; | 545 | continue; |
556 | } | 546 | } |
557 | 547 | ||
@@ -566,7 +556,7 @@ static loff_t ext4_seek_hole(struct file *file, loff_t offset, loff_t maxsize) | |||
566 | &map, &holeoff); | 556 | &map, &holeoff); |
567 | if (!unwritten) { | 557 | if (!unwritten) { |
568 | last += ret; | 558 | last += ret; |
569 | holeoff = last << blkbits; | 559 | holeoff = (loff_t)last << blkbits; |
570 | continue; | 560 | continue; |
571 | } | 561 | } |
572 | } | 562 | } |
@@ -580,17 +570,7 @@ static loff_t ext4_seek_hole(struct file *file, loff_t offset, loff_t maxsize) | |||
580 | if (holeoff > isize) | 570 | if (holeoff > isize) |
581 | holeoff = isize; | 571 | holeoff = isize; |
582 | 572 | ||
583 | if (holeoff < 0 && !(file->f_mode & FMODE_UNSIGNED_OFFSET)) | 573 | return vfs_setpos(file, holeoff, maxsize); |
584 | return -EINVAL; | ||
585 | if (holeoff > maxsize) | ||
586 | return -EINVAL; | ||
587 | |||
588 | if (holeoff != file->f_pos) { | ||
589 | file->f_pos = holeoff; | ||
590 | file->f_version = 0; | ||
591 | } | ||
592 | |||
593 | return holeoff; | ||
594 | } | 574 | } |
595 | 575 | ||
596 | /* | 576 | /* |
diff --git a/fs/ext4/fsync.c b/fs/ext4/fsync.c index e0ba8a408def..a8bc47f75fa0 100644 --- a/fs/ext4/fsync.c +++ b/fs/ext4/fsync.c | |||
@@ -73,32 +73,6 @@ static int ext4_sync_parent(struct inode *inode) | |||
73 | return ret; | 73 | return ret; |
74 | } | 74 | } |
75 | 75 | ||
76 | /** | ||
77 | * __sync_file - generic_file_fsync without the locking and filemap_write | ||
78 | * @inode: inode to sync | ||
79 | * @datasync: only sync essential metadata if true | ||
80 | * | ||
81 | * This is just generic_file_fsync without the locking. This is needed for | ||
82 | * nojournal mode to make sure this inodes data/metadata makes it to disk | ||
83 | * properly. The i_mutex should be held already. | ||
84 | */ | ||
85 | static int __sync_inode(struct inode *inode, int datasync) | ||
86 | { | ||
87 | int err; | ||
88 | int ret; | ||
89 | |||
90 | ret = sync_mapping_buffers(inode->i_mapping); | ||
91 | if (!(inode->i_state & I_DIRTY)) | ||
92 | return ret; | ||
93 | if (datasync && !(inode->i_state & I_DIRTY_DATASYNC)) | ||
94 | return ret; | ||
95 | |||
96 | err = sync_inode_metadata(inode, 1); | ||
97 | if (ret == 0) | ||
98 | ret = err; | ||
99 | return ret; | ||
100 | } | ||
101 | |||
102 | /* | 76 | /* |
103 | * akpm: A new design for ext4_sync_file(). | 77 | * akpm: A new design for ext4_sync_file(). |
104 | * | 78 | * |
@@ -116,7 +90,7 @@ int ext4_sync_file(struct file *file, loff_t start, loff_t end, int datasync) | |||
116 | struct inode *inode = file->f_mapping->host; | 90 | struct inode *inode = file->f_mapping->host; |
117 | struct ext4_inode_info *ei = EXT4_I(inode); | 91 | struct ext4_inode_info *ei = EXT4_I(inode); |
118 | journal_t *journal = EXT4_SB(inode->i_sb)->s_journal; | 92 | journal_t *journal = EXT4_SB(inode->i_sb)->s_journal; |
119 | int ret, err; | 93 | int ret = 0, err; |
120 | tid_t commit_tid; | 94 | tid_t commit_tid; |
121 | bool needs_barrier = false; | 95 | bool needs_barrier = false; |
122 | 96 | ||
@@ -124,25 +98,24 @@ int ext4_sync_file(struct file *file, loff_t start, loff_t end, int datasync) | |||
124 | 98 | ||
125 | trace_ext4_sync_file_enter(file, datasync); | 99 | trace_ext4_sync_file_enter(file, datasync); |
126 | 100 | ||
127 | ret = filemap_write_and_wait_range(inode->i_mapping, start, end); | 101 | if (inode->i_sb->s_flags & MS_RDONLY) { |
128 | if (ret) | 102 | /* Make sure that we read updated s_mount_flags value */ |
129 | return ret; | 103 | smp_rmb(); |
130 | mutex_lock(&inode->i_mutex); | 104 | if (EXT4_SB(inode->i_sb)->s_mount_flags & EXT4_MF_FS_ABORTED) |
131 | 105 | ret = -EROFS; | |
132 | if (inode->i_sb->s_flags & MS_RDONLY) | ||
133 | goto out; | ||
134 | |||
135 | ret = ext4_flush_unwritten_io(inode); | ||
136 | if (ret < 0) | ||
137 | goto out; | 106 | goto out; |
107 | } | ||
138 | 108 | ||
139 | if (!journal) { | 109 | if (!journal) { |
140 | ret = __sync_inode(inode, datasync); | 110 | ret = generic_file_fsync(file, start, end, datasync); |
141 | if (!ret && !hlist_empty(&inode->i_dentry)) | 111 | if (!ret && !hlist_empty(&inode->i_dentry)) |
142 | ret = ext4_sync_parent(inode); | 112 | ret = ext4_sync_parent(inode); |
143 | goto out; | 113 | goto out; |
144 | } | 114 | } |
145 | 115 | ||
116 | ret = filemap_write_and_wait_range(inode->i_mapping, start, end); | ||
117 | if (ret) | ||
118 | return ret; | ||
146 | /* | 119 | /* |
147 | * data=writeback,ordered: | 120 | * data=writeback,ordered: |
148 | * The caller's filemap_fdatawrite()/wait will sync the data. | 121 | * The caller's filemap_fdatawrite()/wait will sync the data. |
@@ -172,8 +145,7 @@ int ext4_sync_file(struct file *file, loff_t start, loff_t end, int datasync) | |||
172 | if (!ret) | 145 | if (!ret) |
173 | ret = err; | 146 | ret = err; |
174 | } | 147 | } |
175 | out: | 148 | out: |
176 | mutex_unlock(&inode->i_mutex); | ||
177 | trace_ext4_sync_file_exit(inode, ret); | 149 | trace_ext4_sync_file_exit(inode, ret); |
178 | return ret; | 150 | return ret; |
179 | } | 151 | } |
diff --git a/fs/ext4/ialloc.c b/fs/ext4/ialloc.c index 00a818d67b54..8bf5999875ee 100644 --- a/fs/ext4/ialloc.c +++ b/fs/ext4/ialloc.c | |||
@@ -734,11 +734,8 @@ repeat_in_this_group: | |||
734 | ino = ext4_find_next_zero_bit((unsigned long *) | 734 | ino = ext4_find_next_zero_bit((unsigned long *) |
735 | inode_bitmap_bh->b_data, | 735 | inode_bitmap_bh->b_data, |
736 | EXT4_INODES_PER_GROUP(sb), ino); | 736 | EXT4_INODES_PER_GROUP(sb), ino); |
737 | if (ino >= EXT4_INODES_PER_GROUP(sb)) { | 737 | if (ino >= EXT4_INODES_PER_GROUP(sb)) |
738 | if (++group == ngroups) | 738 | goto next_group; |
739 | group = 0; | ||
740 | continue; | ||
741 | } | ||
742 | if (group == 0 && (ino+1) < EXT4_FIRST_INO(sb)) { | 739 | if (group == 0 && (ino+1) < EXT4_FIRST_INO(sb)) { |
743 | ext4_error(sb, "reserved inode found cleared - " | 740 | ext4_error(sb, "reserved inode found cleared - " |
744 | "inode=%lu", ino + 1); | 741 | "inode=%lu", ino + 1); |
@@ -747,7 +744,8 @@ repeat_in_this_group: | |||
747 | if (!handle) { | 744 | if (!handle) { |
748 | BUG_ON(nblocks <= 0); | 745 | BUG_ON(nblocks <= 0); |
749 | handle = __ext4_journal_start_sb(dir->i_sb, line_no, | 746 | handle = __ext4_journal_start_sb(dir->i_sb, line_no, |
750 | handle_type, nblocks); | 747 | handle_type, nblocks, |
748 | 0); | ||
751 | if (IS_ERR(handle)) { | 749 | if (IS_ERR(handle)) { |
752 | err = PTR_ERR(handle); | 750 | err = PTR_ERR(handle); |
753 | ext4_std_error(sb, err); | 751 | ext4_std_error(sb, err); |
@@ -768,6 +766,9 @@ repeat_in_this_group: | |||
768 | goto got; /* we grabbed the inode! */ | 766 | goto got; /* we grabbed the inode! */ |
769 | if (ino < EXT4_INODES_PER_GROUP(sb)) | 767 | if (ino < EXT4_INODES_PER_GROUP(sb)) |
770 | goto repeat_in_this_group; | 768 | goto repeat_in_this_group; |
769 | next_group: | ||
770 | if (++group == ngroups) | ||
771 | group = 0; | ||
771 | } | 772 | } |
772 | err = -ENOSPC; | 773 | err = -ENOSPC; |
773 | goto out; | 774 | goto out; |
diff --git a/fs/ext4/indirect.c b/fs/ext4/indirect.c index b8d5d351e24f..87b30cd357e7 100644 --- a/fs/ext4/indirect.c +++ b/fs/ext4/indirect.c | |||
@@ -624,7 +624,7 @@ cleanup: | |||
624 | partial--; | 624 | partial--; |
625 | } | 625 | } |
626 | out: | 626 | out: |
627 | trace_ext4_ind_map_blocks_exit(inode, map, err); | 627 | trace_ext4_ind_map_blocks_exit(inode, flags, map, err); |
628 | return err; | 628 | return err; |
629 | } | 629 | } |
630 | 630 | ||
@@ -675,11 +675,6 @@ ssize_t ext4_ind_direct_IO(int rw, struct kiocb *iocb, | |||
675 | 675 | ||
676 | retry: | 676 | retry: |
677 | if (rw == READ && ext4_should_dioread_nolock(inode)) { | 677 | if (rw == READ && ext4_should_dioread_nolock(inode)) { |
678 | if (unlikely(atomic_read(&EXT4_I(inode)->i_unwritten))) { | ||
679 | mutex_lock(&inode->i_mutex); | ||
680 | ext4_flush_unwritten_io(inode); | ||
681 | mutex_unlock(&inode->i_mutex); | ||
682 | } | ||
683 | /* | 678 | /* |
684 | * Nolock dioread optimization may be dynamically disabled | 679 | * Nolock dioread optimization may be dynamically disabled |
685 | * via ext4_inode_block_unlocked_dio(). Check inode's state | 680 | * via ext4_inode_block_unlocked_dio(). Check inode's state |
@@ -779,27 +774,18 @@ int ext4_ind_calc_metadata_amount(struct inode *inode, sector_t lblock) | |||
779 | return (blk_bits / EXT4_ADDR_PER_BLOCK_BITS(inode->i_sb)) + 1; | 774 | return (blk_bits / EXT4_ADDR_PER_BLOCK_BITS(inode->i_sb)) + 1; |
780 | } | 775 | } |
781 | 776 | ||
782 | int ext4_ind_trans_blocks(struct inode *inode, int nrblocks, int chunk) | 777 | /* |
778 | * Calculate number of indirect blocks touched by mapping @nrblocks logically | ||
779 | * contiguous blocks | ||
780 | */ | ||
781 | int ext4_ind_trans_blocks(struct inode *inode, int nrblocks) | ||
783 | { | 782 | { |
784 | int indirects; | ||
785 | |||
786 | /* if nrblocks are contiguous */ | ||
787 | if (chunk) { | ||
788 | /* | ||
789 | * With N contiguous data blocks, we need at most | ||
790 | * N/EXT4_ADDR_PER_BLOCK(inode->i_sb) + 1 indirect blocks, | ||
791 | * 2 dindirect blocks, and 1 tindirect block | ||
792 | */ | ||
793 | return DIV_ROUND_UP(nrblocks, | ||
794 | EXT4_ADDR_PER_BLOCK(inode->i_sb)) + 4; | ||
795 | } | ||
796 | /* | 783 | /* |
797 | * if nrblocks are not contiguous, worse case, each block touch | 784 | * With N contiguous data blocks, we need at most |
798 | * a indirect block, and each indirect block touch a double indirect | 785 | * N/EXT4_ADDR_PER_BLOCK(inode->i_sb) + 1 indirect blocks, |
799 | * block, plus a triple indirect block | 786 | * 2 dindirect blocks, and 1 tindirect block |
800 | */ | 787 | */ |
801 | indirects = nrblocks * 2 + 1; | 788 | return DIV_ROUND_UP(nrblocks, EXT4_ADDR_PER_BLOCK(inode->i_sb)) + 4; |
802 | return indirects; | ||
803 | } | 789 | } |
804 | 790 | ||
805 | /* | 791 | /* |
@@ -940,11 +926,13 @@ static int ext4_clear_blocks(handle_t *handle, struct inode *inode, | |||
940 | __le32 *last) | 926 | __le32 *last) |
941 | { | 927 | { |
942 | __le32 *p; | 928 | __le32 *p; |
943 | int flags = EXT4_FREE_BLOCKS_FORGET | EXT4_FREE_BLOCKS_VALIDATED; | 929 | int flags = EXT4_FREE_BLOCKS_VALIDATED; |
944 | int err; | 930 | int err; |
945 | 931 | ||
946 | if (S_ISDIR(inode->i_mode) || S_ISLNK(inode->i_mode)) | 932 | if (S_ISDIR(inode->i_mode) || S_ISLNK(inode->i_mode)) |
947 | flags |= EXT4_FREE_BLOCKS_METADATA; | 933 | flags |= EXT4_FREE_BLOCKS_FORGET | EXT4_FREE_BLOCKS_METADATA; |
934 | else if (ext4_should_journal_data(inode)) | ||
935 | flags |= EXT4_FREE_BLOCKS_FORGET; | ||
948 | 936 | ||
949 | if (!ext4_data_block_valid(EXT4_SB(inode->i_sb), block_to_free, | 937 | if (!ext4_data_block_valid(EXT4_SB(inode->i_sb), block_to_free, |
950 | count)) { | 938 | count)) { |
diff --git a/fs/ext4/inline.c b/fs/ext4/inline.c index 3e2bf873e8a8..d9ecbf1113a7 100644 --- a/fs/ext4/inline.c +++ b/fs/ext4/inline.c | |||
@@ -72,7 +72,7 @@ static int get_max_inline_xattr_value_size(struct inode *inode, | |||
72 | entry = (struct ext4_xattr_entry *) | 72 | entry = (struct ext4_xattr_entry *) |
73 | ((void *)raw_inode + EXT4_I(inode)->i_inline_off); | 73 | ((void *)raw_inode + EXT4_I(inode)->i_inline_off); |
74 | 74 | ||
75 | free += le32_to_cpu(entry->e_value_size); | 75 | free += EXT4_XATTR_SIZE(le32_to_cpu(entry->e_value_size)); |
76 | goto out; | 76 | goto out; |
77 | } | 77 | } |
78 | 78 | ||
@@ -1404,16 +1404,15 @@ out: | |||
1404 | * offset as if '.' and '..' really take place. | 1404 | * offset as if '.' and '..' really take place. |
1405 | * | 1405 | * |
1406 | */ | 1406 | */ |
1407 | int ext4_read_inline_dir(struct file *filp, | 1407 | int ext4_read_inline_dir(struct file *file, |
1408 | void *dirent, filldir_t filldir, | 1408 | struct dir_context *ctx, |
1409 | int *has_inline_data) | 1409 | int *has_inline_data) |
1410 | { | 1410 | { |
1411 | int error = 0; | ||
1412 | unsigned int offset, parent_ino; | 1411 | unsigned int offset, parent_ino; |
1413 | int i, stored; | 1412 | int i; |
1414 | struct ext4_dir_entry_2 *de; | 1413 | struct ext4_dir_entry_2 *de; |
1415 | struct super_block *sb; | 1414 | struct super_block *sb; |
1416 | struct inode *inode = file_inode(filp); | 1415 | struct inode *inode = file_inode(file); |
1417 | int ret, inline_size = 0; | 1416 | int ret, inline_size = 0; |
1418 | struct ext4_iloc iloc; | 1417 | struct ext4_iloc iloc; |
1419 | void *dir_buf = NULL; | 1418 | void *dir_buf = NULL; |
@@ -1444,9 +1443,8 @@ int ext4_read_inline_dir(struct file *filp, | |||
1444 | goto out; | 1443 | goto out; |
1445 | 1444 | ||
1446 | sb = inode->i_sb; | 1445 | sb = inode->i_sb; |
1447 | stored = 0; | ||
1448 | parent_ino = le32_to_cpu(((struct ext4_dir_entry_2 *)dir_buf)->inode); | 1446 | parent_ino = le32_to_cpu(((struct ext4_dir_entry_2 *)dir_buf)->inode); |
1449 | offset = filp->f_pos; | 1447 | offset = ctx->pos; |
1450 | 1448 | ||
1451 | /* | 1449 | /* |
1452 | * dotdot_offset and dotdot_size is the real offset and | 1450 | * dotdot_offset and dotdot_size is the real offset and |
@@ -1460,104 +1458,74 @@ int ext4_read_inline_dir(struct file *filp, | |||
1460 | extra_offset = dotdot_size - EXT4_INLINE_DOTDOT_SIZE; | 1458 | extra_offset = dotdot_size - EXT4_INLINE_DOTDOT_SIZE; |
1461 | extra_size = extra_offset + inline_size; | 1459 | extra_size = extra_offset + inline_size; |
1462 | 1460 | ||
1463 | while (!error && !stored && filp->f_pos < extra_size) { | 1461 | /* |
1464 | revalidate: | 1462 | * If the version has changed since the last call to |
1465 | /* | 1463 | * readdir(2), then we might be pointing to an invalid |
1466 | * If the version has changed since the last call to | 1464 | * dirent right now. Scan from the start of the inline |
1467 | * readdir(2), then we might be pointing to an invalid | 1465 | * dir to make sure. |
1468 | * dirent right now. Scan from the start of the inline | 1466 | */ |
1469 | * dir to make sure. | 1467 | if (file->f_version != inode->i_version) { |
1470 | */ | 1468 | for (i = 0; i < extra_size && i < offset;) { |
1471 | if (filp->f_version != inode->i_version) { | 1469 | /* |
1472 | for (i = 0; i < extra_size && i < offset;) { | 1470 | * "." is with offset 0 and |
1473 | /* | 1471 | * ".." is dotdot_offset. |
1474 | * "." is with offset 0 and | 1472 | */ |
1475 | * ".." is dotdot_offset. | 1473 | if (!i) { |
1476 | */ | 1474 | i = dotdot_offset; |
1477 | if (!i) { | 1475 | continue; |
1478 | i = dotdot_offset; | 1476 | } else if (i == dotdot_offset) { |
1479 | continue; | 1477 | i = dotdot_size; |
1480 | } else if (i == dotdot_offset) { | ||
1481 | i = dotdot_size; | ||
1482 | continue; | ||
1483 | } | ||
1484 | /* for other entry, the real offset in | ||
1485 | * the buf has to be tuned accordingly. | ||
1486 | */ | ||
1487 | de = (struct ext4_dir_entry_2 *) | ||
1488 | (dir_buf + i - extra_offset); | ||
1489 | /* It's too expensive to do a full | ||
1490 | * dirent test each time round this | ||
1491 | * loop, but we do have to test at | ||
1492 | * least that it is non-zero. A | ||
1493 | * failure will be detected in the | ||
1494 | * dirent test below. */ | ||
1495 | if (ext4_rec_len_from_disk(de->rec_len, | ||
1496 | extra_size) < EXT4_DIR_REC_LEN(1)) | ||
1497 | break; | ||
1498 | i += ext4_rec_len_from_disk(de->rec_len, | ||
1499 | extra_size); | ||
1500 | } | ||
1501 | offset = i; | ||
1502 | filp->f_pos = offset; | ||
1503 | filp->f_version = inode->i_version; | ||
1504 | } | ||
1505 | |||
1506 | while (!error && filp->f_pos < extra_size) { | ||
1507 | if (filp->f_pos == 0) { | ||
1508 | error = filldir(dirent, ".", 1, 0, inode->i_ino, | ||
1509 | DT_DIR); | ||
1510 | if (error) | ||
1511 | break; | ||
1512 | stored++; | ||
1513 | filp->f_pos = dotdot_offset; | ||
1514 | continue; | 1478 | continue; |
1515 | } | 1479 | } |
1480 | /* for other entry, the real offset in | ||
1481 | * the buf has to be tuned accordingly. | ||
1482 | */ | ||
1483 | de = (struct ext4_dir_entry_2 *) | ||
1484 | (dir_buf + i - extra_offset); | ||
1485 | /* It's too expensive to do a full | ||
1486 | * dirent test each time round this | ||
1487 | * loop, but we do have to test at | ||
1488 | * least that it is non-zero. A | ||
1489 | * failure will be detected in the | ||
1490 | * dirent test below. */ | ||
1491 | if (ext4_rec_len_from_disk(de->rec_len, extra_size) | ||
1492 | < EXT4_DIR_REC_LEN(1)) | ||
1493 | break; | ||
1494 | i += ext4_rec_len_from_disk(de->rec_len, | ||
1495 | extra_size); | ||
1496 | } | ||
1497 | offset = i; | ||
1498 | ctx->pos = offset; | ||
1499 | file->f_version = inode->i_version; | ||
1500 | } | ||
1516 | 1501 | ||
1517 | if (filp->f_pos == dotdot_offset) { | 1502 | while (ctx->pos < extra_size) { |
1518 | error = filldir(dirent, "..", 2, | 1503 | if (ctx->pos == 0) { |
1519 | dotdot_offset, | 1504 | if (!dir_emit(ctx, ".", 1, inode->i_ino, DT_DIR)) |
1520 | parent_ino, DT_DIR); | 1505 | goto out; |
1521 | if (error) | 1506 | ctx->pos = dotdot_offset; |
1522 | break; | 1507 | continue; |
1523 | stored++; | 1508 | } |
1524 | 1509 | ||
1525 | filp->f_pos = dotdot_size; | 1510 | if (ctx->pos == dotdot_offset) { |
1526 | continue; | 1511 | if (!dir_emit(ctx, "..", 2, parent_ino, DT_DIR)) |
1527 | } | 1512 | goto out; |
1513 | ctx->pos = dotdot_size; | ||
1514 | continue; | ||
1515 | } | ||
1528 | 1516 | ||
1529 | de = (struct ext4_dir_entry_2 *) | 1517 | de = (struct ext4_dir_entry_2 *) |
1530 | (dir_buf + filp->f_pos - extra_offset); | 1518 | (dir_buf + ctx->pos - extra_offset); |
1531 | if (ext4_check_dir_entry(inode, filp, de, | 1519 | if (ext4_check_dir_entry(inode, file, de, iloc.bh, dir_buf, |
1532 | iloc.bh, dir_buf, | 1520 | extra_size, ctx->pos)) |
1533 | extra_size, filp->f_pos)) { | 1521 | goto out; |
1534 | ret = stored; | 1522 | if (le32_to_cpu(de->inode)) { |
1523 | if (!dir_emit(ctx, de->name, de->name_len, | ||
1524 | le32_to_cpu(de->inode), | ||
1525 | get_dtype(sb, de->file_type))) | ||
1535 | goto out; | 1526 | goto out; |
1536 | } | ||
1537 | if (le32_to_cpu(de->inode)) { | ||
1538 | /* We might block in the next section | ||
1539 | * if the data destination is | ||
1540 | * currently swapped out. So, use a | ||
1541 | * version stamp to detect whether or | ||
1542 | * not the directory has been modified | ||
1543 | * during the copy operation. | ||
1544 | */ | ||
1545 | u64 version = filp->f_version; | ||
1546 | |||
1547 | error = filldir(dirent, de->name, | ||
1548 | de->name_len, | ||
1549 | filp->f_pos, | ||
1550 | le32_to_cpu(de->inode), | ||
1551 | get_dtype(sb, de->file_type)); | ||
1552 | if (error) | ||
1553 | break; | ||
1554 | if (version != filp->f_version) | ||
1555 | goto revalidate; | ||
1556 | stored++; | ||
1557 | } | ||
1558 | filp->f_pos += ext4_rec_len_from_disk(de->rec_len, | ||
1559 | extra_size); | ||
1560 | } | 1527 | } |
1528 | ctx->pos += ext4_rec_len_from_disk(de->rec_len, extra_size); | ||
1561 | } | 1529 | } |
1562 | out: | 1530 | out: |
1563 | kfree(dir_buf); | 1531 | kfree(dir_buf); |
@@ -1842,7 +1810,7 @@ int ext4_inline_data_fiemap(struct inode *inode, | |||
1842 | if (error) | 1810 | if (error) |
1843 | goto out; | 1811 | goto out; |
1844 | 1812 | ||
1845 | physical = iloc.bh->b_blocknr << inode->i_sb->s_blocksize_bits; | 1813 | physical = (__u64)iloc.bh->b_blocknr << inode->i_sb->s_blocksize_bits; |
1846 | physical += (char *)ext4_raw_inode(&iloc) - iloc.bh->b_data; | 1814 | physical += (char *)ext4_raw_inode(&iloc) - iloc.bh->b_data; |
1847 | physical += offsetof(struct ext4_inode, i_block); | 1815 | physical += offsetof(struct ext4_inode, i_block); |
1848 | length = i_size_read(inode); | 1816 | length = i_size_read(inode); |
diff --git a/fs/ext4/inode.c b/fs/ext4/inode.c index d6382b89ecbd..dd32a2eacd0d 100644 --- a/fs/ext4/inode.c +++ b/fs/ext4/inode.c | |||
@@ -132,12 +132,12 @@ static inline int ext4_begin_ordered_truncate(struct inode *inode, | |||
132 | new_size); | 132 | new_size); |
133 | } | 133 | } |
134 | 134 | ||
135 | static void ext4_invalidatepage(struct page *page, unsigned long offset); | 135 | static void ext4_invalidatepage(struct page *page, unsigned int offset, |
136 | unsigned int length); | ||
136 | static int __ext4_journalled_writepage(struct page *page, unsigned int len); | 137 | static int __ext4_journalled_writepage(struct page *page, unsigned int len); |
137 | static int ext4_bh_delay_or_unwritten(handle_t *handle, struct buffer_head *bh); | 138 | static int ext4_bh_delay_or_unwritten(handle_t *handle, struct buffer_head *bh); |
138 | static int ext4_discard_partial_page_buffers_no_lock(handle_t *handle, | 139 | static int ext4_meta_trans_blocks(struct inode *inode, int lblocks, |
139 | struct inode *inode, struct page *page, loff_t from, | 140 | int pextents); |
140 | loff_t length, int flags); | ||
141 | 141 | ||
142 | /* | 142 | /* |
143 | * Test whether an inode is a fast symlink. | 143 | * Test whether an inode is a fast symlink. |
@@ -215,7 +215,8 @@ void ext4_evict_inode(struct inode *inode) | |||
215 | filemap_write_and_wait(&inode->i_data); | 215 | filemap_write_and_wait(&inode->i_data); |
216 | } | 216 | } |
217 | truncate_inode_pages(&inode->i_data, 0); | 217 | truncate_inode_pages(&inode->i_data, 0); |
218 | ext4_ioend_shutdown(inode); | 218 | |
219 | WARN_ON(atomic_read(&EXT4_I(inode)->i_ioend_count)); | ||
219 | goto no_delete; | 220 | goto no_delete; |
220 | } | 221 | } |
221 | 222 | ||
@@ -225,8 +226,8 @@ void ext4_evict_inode(struct inode *inode) | |||
225 | if (ext4_should_order_data(inode)) | 226 | if (ext4_should_order_data(inode)) |
226 | ext4_begin_ordered_truncate(inode, 0); | 227 | ext4_begin_ordered_truncate(inode, 0); |
227 | truncate_inode_pages(&inode->i_data, 0); | 228 | truncate_inode_pages(&inode->i_data, 0); |
228 | ext4_ioend_shutdown(inode); | ||
229 | 229 | ||
230 | WARN_ON(atomic_read(&EXT4_I(inode)->i_ioend_count)); | ||
230 | if (is_bad_inode(inode)) | 231 | if (is_bad_inode(inode)) |
231 | goto no_delete; | 232 | goto no_delete; |
232 | 233 | ||
@@ -423,66 +424,6 @@ static int __check_block_validity(struct inode *inode, const char *func, | |||
423 | #define check_block_validity(inode, map) \ | 424 | #define check_block_validity(inode, map) \ |
424 | __check_block_validity((inode), __func__, __LINE__, (map)) | 425 | __check_block_validity((inode), __func__, __LINE__, (map)) |
425 | 426 | ||
426 | /* | ||
427 | * Return the number of contiguous dirty pages in a given inode | ||
428 | * starting at page frame idx. | ||
429 | */ | ||
430 | static pgoff_t ext4_num_dirty_pages(struct inode *inode, pgoff_t idx, | ||
431 | unsigned int max_pages) | ||
432 | { | ||
433 | struct address_space *mapping = inode->i_mapping; | ||
434 | pgoff_t index; | ||
435 | struct pagevec pvec; | ||
436 | pgoff_t num = 0; | ||
437 | int i, nr_pages, done = 0; | ||
438 | |||
439 | if (max_pages == 0) | ||
440 | return 0; | ||
441 | pagevec_init(&pvec, 0); | ||
442 | while (!done) { | ||
443 | index = idx; | ||
444 | nr_pages = pagevec_lookup_tag(&pvec, mapping, &index, | ||
445 | PAGECACHE_TAG_DIRTY, | ||
446 | (pgoff_t)PAGEVEC_SIZE); | ||
447 | if (nr_pages == 0) | ||
448 | break; | ||
449 | for (i = 0; i < nr_pages; i++) { | ||
450 | struct page *page = pvec.pages[i]; | ||
451 | struct buffer_head *bh, *head; | ||
452 | |||
453 | lock_page(page); | ||
454 | if (unlikely(page->mapping != mapping) || | ||
455 | !PageDirty(page) || | ||
456 | PageWriteback(page) || | ||
457 | page->index != idx) { | ||
458 | done = 1; | ||
459 | unlock_page(page); | ||
460 | break; | ||
461 | } | ||
462 | if (page_has_buffers(page)) { | ||
463 | bh = head = page_buffers(page); | ||
464 | do { | ||
465 | if (!buffer_delay(bh) && | ||
466 | !buffer_unwritten(bh)) | ||
467 | done = 1; | ||
468 | bh = bh->b_this_page; | ||
469 | } while (!done && (bh != head)); | ||
470 | } | ||
471 | unlock_page(page); | ||
472 | if (done) | ||
473 | break; | ||
474 | idx++; | ||
475 | num++; | ||
476 | if (num >= max_pages) { | ||
477 | done = 1; | ||
478 | break; | ||
479 | } | ||
480 | } | ||
481 | pagevec_release(&pvec); | ||
482 | } | ||
483 | return num; | ||
484 | } | ||
485 | |||
486 | #ifdef ES_AGGRESSIVE_TEST | 427 | #ifdef ES_AGGRESSIVE_TEST |
487 | static void ext4_map_blocks_es_recheck(handle_t *handle, | 428 | static void ext4_map_blocks_es_recheck(handle_t *handle, |
488 | struct inode *inode, | 429 | struct inode *inode, |
@@ -524,7 +465,7 @@ static void ext4_map_blocks_es_recheck(handle_t *handle, | |||
524 | if (es_map->m_lblk != map->m_lblk || | 465 | if (es_map->m_lblk != map->m_lblk || |
525 | es_map->m_flags != map->m_flags || | 466 | es_map->m_flags != map->m_flags || |
526 | es_map->m_pblk != map->m_pblk) { | 467 | es_map->m_pblk != map->m_pblk) { |
527 | printk("ES cache assertation failed for inode: %lu " | 468 | printk("ES cache assertion failed for inode: %lu " |
528 | "es_cached ex [%d/%d/%llu/%x] != " | 469 | "es_cached ex [%d/%d/%llu/%x] != " |
529 | "found ex [%d/%d/%llu/%x] retval %d flags %x\n", | 470 | "found ex [%d/%d/%llu/%x] retval %d flags %x\n", |
530 | inode->i_ino, es_map->m_lblk, es_map->m_len, | 471 | inode->i_ino, es_map->m_lblk, es_map->m_len, |
@@ -575,6 +516,7 @@ int ext4_map_blocks(handle_t *handle, struct inode *inode, | |||
575 | 516 | ||
576 | /* Lookup extent status tree firstly */ | 517 | /* Lookup extent status tree firstly */ |
577 | if (ext4_es_lookup_extent(inode, map->m_lblk, &es)) { | 518 | if (ext4_es_lookup_extent(inode, map->m_lblk, &es)) { |
519 | ext4_es_lru_add(inode); | ||
578 | if (ext4_es_is_written(&es) || ext4_es_is_unwritten(&es)) { | 520 | if (ext4_es_is_written(&es) || ext4_es_is_unwritten(&es)) { |
579 | map->m_pblk = ext4_es_pblock(&es) + | 521 | map->m_pblk = ext4_es_pblock(&es) + |
580 | map->m_lblk - es.es_lblk; | 522 | map->m_lblk - es.es_lblk; |
@@ -613,14 +555,13 @@ int ext4_map_blocks(handle_t *handle, struct inode *inode, | |||
613 | int ret; | 555 | int ret; |
614 | unsigned long long status; | 556 | unsigned long long status; |
615 | 557 | ||
616 | #ifdef ES_AGGRESSIVE_TEST | 558 | if (unlikely(retval != map->m_len)) { |
617 | if (retval != map->m_len) { | 559 | ext4_warning(inode->i_sb, |
618 | printk("ES len assertation failed for inode: %lu " | 560 | "ES len assertion failed for inode " |
619 | "retval %d != map->m_len %d " | 561 | "%lu: retval %d != map->m_len %d", |
620 | "in %s (lookup)\n", inode->i_ino, retval, | 562 | inode->i_ino, retval, map->m_len); |
621 | map->m_len, __func__); | 563 | WARN_ON(1); |
622 | } | 564 | } |
623 | #endif | ||
624 | 565 | ||
625 | status = map->m_flags & EXT4_MAP_UNWRITTEN ? | 566 | status = map->m_flags & EXT4_MAP_UNWRITTEN ? |
626 | EXTENT_STATUS_UNWRITTEN : EXTENT_STATUS_WRITTEN; | 567 | EXTENT_STATUS_UNWRITTEN : EXTENT_STATUS_WRITTEN; |
@@ -714,14 +655,13 @@ found: | |||
714 | int ret; | 655 | int ret; |
715 | unsigned long long status; | 656 | unsigned long long status; |
716 | 657 | ||
717 | #ifdef ES_AGGRESSIVE_TEST | 658 | if (unlikely(retval != map->m_len)) { |
718 | if (retval != map->m_len) { | 659 | ext4_warning(inode->i_sb, |
719 | printk("ES len assertation failed for inode: %lu " | 660 | "ES len assertion failed for inode " |
720 | "retval %d != map->m_len %d " | 661 | "%lu: retval %d != map->m_len %d", |
721 | "in %s (allocation)\n", inode->i_ino, retval, | 662 | inode->i_ino, retval, map->m_len); |
722 | map->m_len, __func__); | 663 | WARN_ON(1); |
723 | } | 664 | } |
724 | #endif | ||
725 | 665 | ||
726 | /* | 666 | /* |
727 | * If the extent has been zeroed out, we don't need to update | 667 | * If the extent has been zeroed out, we don't need to update |
@@ -1118,10 +1058,13 @@ static int ext4_write_end(struct file *file, | |||
1118 | } | 1058 | } |
1119 | } | 1059 | } |
1120 | 1060 | ||
1121 | if (ext4_has_inline_data(inode)) | 1061 | if (ext4_has_inline_data(inode)) { |
1122 | copied = ext4_write_inline_data_end(inode, pos, len, | 1062 | ret = ext4_write_inline_data_end(inode, pos, len, |
1123 | copied, page); | 1063 | copied, page); |
1124 | else | 1064 | if (ret < 0) |
1065 | goto errout; | ||
1066 | copied = ret; | ||
1067 | } else | ||
1125 | copied = block_write_end(file, mapping, pos, | 1068 | copied = block_write_end(file, mapping, pos, |
1126 | len, copied, page, fsdata); | 1069 | len, copied, page, fsdata); |
1127 | 1070 | ||
@@ -1157,8 +1100,6 @@ static int ext4_write_end(struct file *file, | |||
1157 | if (i_size_changed) | 1100 | if (i_size_changed) |
1158 | ext4_mark_inode_dirty(handle, inode); | 1101 | ext4_mark_inode_dirty(handle, inode); |
1159 | 1102 | ||
1160 | if (copied < 0) | ||
1161 | ret = copied; | ||
1162 | if (pos + len > inode->i_size && ext4_can_truncate(inode)) | 1103 | if (pos + len > inode->i_size && ext4_can_truncate(inode)) |
1163 | /* if we have allocated more blocks and copied | 1104 | /* if we have allocated more blocks and copied |
1164 | * less. We will have blocks allocated outside | 1105 | * less. We will have blocks allocated outside |
@@ -1415,21 +1356,28 @@ static void ext4_da_release_space(struct inode *inode, int to_free) | |||
1415 | } | 1356 | } |
1416 | 1357 | ||
1417 | static void ext4_da_page_release_reservation(struct page *page, | 1358 | static void ext4_da_page_release_reservation(struct page *page, |
1418 | unsigned long offset) | 1359 | unsigned int offset, |
1360 | unsigned int length) | ||
1419 | { | 1361 | { |
1420 | int to_release = 0; | 1362 | int to_release = 0; |
1421 | struct buffer_head *head, *bh; | 1363 | struct buffer_head *head, *bh; |
1422 | unsigned int curr_off = 0; | 1364 | unsigned int curr_off = 0; |
1423 | struct inode *inode = page->mapping->host; | 1365 | struct inode *inode = page->mapping->host; |
1424 | struct ext4_sb_info *sbi = EXT4_SB(inode->i_sb); | 1366 | struct ext4_sb_info *sbi = EXT4_SB(inode->i_sb); |
1367 | unsigned int stop = offset + length; | ||
1425 | int num_clusters; | 1368 | int num_clusters; |
1426 | ext4_fsblk_t lblk; | 1369 | ext4_fsblk_t lblk; |
1427 | 1370 | ||
1371 | BUG_ON(stop > PAGE_CACHE_SIZE || stop < length); | ||
1372 | |||
1428 | head = page_buffers(page); | 1373 | head = page_buffers(page); |
1429 | bh = head; | 1374 | bh = head; |
1430 | do { | 1375 | do { |
1431 | unsigned int next_off = curr_off + bh->b_size; | 1376 | unsigned int next_off = curr_off + bh->b_size; |
1432 | 1377 | ||
1378 | if (next_off > stop) | ||
1379 | break; | ||
1380 | |||
1433 | if ((offset <= curr_off) && (buffer_delay(bh))) { | 1381 | if ((offset <= curr_off) && (buffer_delay(bh))) { |
1434 | to_release++; | 1382 | to_release++; |
1435 | clear_buffer_delay(bh); | 1383 | clear_buffer_delay(bh); |
@@ -1460,140 +1408,43 @@ static void ext4_da_page_release_reservation(struct page *page, | |||
1460 | * Delayed allocation stuff | 1408 | * Delayed allocation stuff |
1461 | */ | 1409 | */ |
1462 | 1410 | ||
1463 | /* | 1411 | struct mpage_da_data { |
1464 | * mpage_da_submit_io - walks through extent of pages and try to write | 1412 | struct inode *inode; |
1465 | * them with writepage() call back | 1413 | struct writeback_control *wbc; |
1466 | * | ||
1467 | * @mpd->inode: inode | ||
1468 | * @mpd->first_page: first page of the extent | ||
1469 | * @mpd->next_page: page after the last page of the extent | ||
1470 | * | ||
1471 | * By the time mpage_da_submit_io() is called we expect all blocks | ||
1472 | * to be allocated. this may be wrong if allocation failed. | ||
1473 | * | ||
1474 | * As pages are already locked by write_cache_pages(), we can't use it | ||
1475 | */ | ||
1476 | static int mpage_da_submit_io(struct mpage_da_data *mpd, | ||
1477 | struct ext4_map_blocks *map) | ||
1478 | { | ||
1479 | struct pagevec pvec; | ||
1480 | unsigned long index, end; | ||
1481 | int ret = 0, err, nr_pages, i; | ||
1482 | struct inode *inode = mpd->inode; | ||
1483 | struct address_space *mapping = inode->i_mapping; | ||
1484 | loff_t size = i_size_read(inode); | ||
1485 | unsigned int len, block_start; | ||
1486 | struct buffer_head *bh, *page_bufs = NULL; | ||
1487 | sector_t pblock = 0, cur_logical = 0; | ||
1488 | struct ext4_io_submit io_submit; | ||
1489 | 1414 | ||
1490 | BUG_ON(mpd->next_page <= mpd->first_page); | 1415 | pgoff_t first_page; /* The first page to write */ |
1491 | memset(&io_submit, 0, sizeof(io_submit)); | 1416 | pgoff_t next_page; /* Current page to examine */ |
1417 | pgoff_t last_page; /* Last page to examine */ | ||
1492 | /* | 1418 | /* |
1493 | * We need to start from the first_page to the next_page - 1 | 1419 | * Extent to map - this can be after first_page because that can be |
1494 | * to make sure we also write the mapped dirty buffer_heads. | 1420 | * fully mapped. We somewhat abuse m_flags to store whether the extent |
1495 | * If we look at mpd->b_blocknr we would only be looking | 1421 | * is delalloc or unwritten. |
1496 | * at the currently mapped buffer_heads. | ||
1497 | */ | 1422 | */ |
1498 | index = mpd->first_page; | 1423 | struct ext4_map_blocks map; |
1499 | end = mpd->next_page - 1; | 1424 | struct ext4_io_submit io_submit; /* IO submission data */ |
1500 | 1425 | }; | |
1501 | pagevec_init(&pvec, 0); | ||
1502 | while (index <= end) { | ||
1503 | nr_pages = pagevec_lookup(&pvec, mapping, index, PAGEVEC_SIZE); | ||
1504 | if (nr_pages == 0) | ||
1505 | break; | ||
1506 | for (i = 0; i < nr_pages; i++) { | ||
1507 | int skip_page = 0; | ||
1508 | struct page *page = pvec.pages[i]; | ||
1509 | |||
1510 | index = page->index; | ||
1511 | if (index > end) | ||
1512 | break; | ||
1513 | |||
1514 | if (index == size >> PAGE_CACHE_SHIFT) | ||
1515 | len = size & ~PAGE_CACHE_MASK; | ||
1516 | else | ||
1517 | len = PAGE_CACHE_SIZE; | ||
1518 | if (map) { | ||
1519 | cur_logical = index << (PAGE_CACHE_SHIFT - | ||
1520 | inode->i_blkbits); | ||
1521 | pblock = map->m_pblk + (cur_logical - | ||
1522 | map->m_lblk); | ||
1523 | } | ||
1524 | index++; | ||
1525 | |||
1526 | BUG_ON(!PageLocked(page)); | ||
1527 | BUG_ON(PageWriteback(page)); | ||
1528 | |||
1529 | bh = page_bufs = page_buffers(page); | ||
1530 | block_start = 0; | ||
1531 | do { | ||
1532 | if (map && (cur_logical >= map->m_lblk) && | ||
1533 | (cur_logical <= (map->m_lblk + | ||
1534 | (map->m_len - 1)))) { | ||
1535 | if (buffer_delay(bh)) { | ||
1536 | clear_buffer_delay(bh); | ||
1537 | bh->b_blocknr = pblock; | ||
1538 | } | ||
1539 | if (buffer_unwritten(bh) || | ||
1540 | buffer_mapped(bh)) | ||
1541 | BUG_ON(bh->b_blocknr != pblock); | ||
1542 | if (map->m_flags & EXT4_MAP_UNINIT) | ||
1543 | set_buffer_uninit(bh); | ||
1544 | clear_buffer_unwritten(bh); | ||
1545 | } | ||
1546 | |||
1547 | /* | ||
1548 | * skip page if block allocation undone and | ||
1549 | * block is dirty | ||
1550 | */ | ||
1551 | if (ext4_bh_delay_or_unwritten(NULL, bh)) | ||
1552 | skip_page = 1; | ||
1553 | bh = bh->b_this_page; | ||
1554 | block_start += bh->b_size; | ||
1555 | cur_logical++; | ||
1556 | pblock++; | ||
1557 | } while (bh != page_bufs); | ||
1558 | |||
1559 | if (skip_page) { | ||
1560 | unlock_page(page); | ||
1561 | continue; | ||
1562 | } | ||
1563 | |||
1564 | clear_page_dirty_for_io(page); | ||
1565 | err = ext4_bio_write_page(&io_submit, page, len, | ||
1566 | mpd->wbc); | ||
1567 | if (!err) | ||
1568 | mpd->pages_written++; | ||
1569 | /* | ||
1570 | * In error case, we have to continue because | ||
1571 | * remaining pages are still locked | ||
1572 | */ | ||
1573 | if (ret == 0) | ||
1574 | ret = err; | ||
1575 | } | ||
1576 | pagevec_release(&pvec); | ||
1577 | } | ||
1578 | ext4_io_submit(&io_submit); | ||
1579 | return ret; | ||
1580 | } | ||
1581 | 1426 | ||
1582 | static void ext4_da_block_invalidatepages(struct mpage_da_data *mpd) | 1427 | static void mpage_release_unused_pages(struct mpage_da_data *mpd, |
1428 | bool invalidate) | ||
1583 | { | 1429 | { |
1584 | int nr_pages, i; | 1430 | int nr_pages, i; |
1585 | pgoff_t index, end; | 1431 | pgoff_t index, end; |
1586 | struct pagevec pvec; | 1432 | struct pagevec pvec; |
1587 | struct inode *inode = mpd->inode; | 1433 | struct inode *inode = mpd->inode; |
1588 | struct address_space *mapping = inode->i_mapping; | 1434 | struct address_space *mapping = inode->i_mapping; |
1589 | ext4_lblk_t start, last; | 1435 | |
1436 | /* This is necessary when next_page == 0. */ | ||
1437 | if (mpd->first_page >= mpd->next_page) | ||
1438 | return; | ||
1590 | 1439 | ||
1591 | index = mpd->first_page; | 1440 | index = mpd->first_page; |
1592 | end = mpd->next_page - 1; | 1441 | end = mpd->next_page - 1; |
1593 | 1442 | if (invalidate) { | |
1594 | start = index << (PAGE_CACHE_SHIFT - inode->i_blkbits); | 1443 | ext4_lblk_t start, last; |
1595 | last = end << (PAGE_CACHE_SHIFT - inode->i_blkbits); | 1444 | start = index << (PAGE_CACHE_SHIFT - inode->i_blkbits); |
1596 | ext4_es_remove_extent(inode, start, last - start + 1); | 1445 | last = end << (PAGE_CACHE_SHIFT - inode->i_blkbits); |
1446 | ext4_es_remove_extent(inode, start, last - start + 1); | ||
1447 | } | ||
1597 | 1448 | ||
1598 | pagevec_init(&pvec, 0); | 1449 | pagevec_init(&pvec, 0); |
1599 | while (index <= end) { | 1450 | while (index <= end) { |
@@ -1606,14 +1457,15 @@ static void ext4_da_block_invalidatepages(struct mpage_da_data *mpd) | |||
1606 | break; | 1457 | break; |
1607 | BUG_ON(!PageLocked(page)); | 1458 | BUG_ON(!PageLocked(page)); |
1608 | BUG_ON(PageWriteback(page)); | 1459 | BUG_ON(PageWriteback(page)); |
1609 | block_invalidatepage(page, 0); | 1460 | if (invalidate) { |
1610 | ClearPageUptodate(page); | 1461 | block_invalidatepage(page, 0, PAGE_CACHE_SIZE); |
1462 | ClearPageUptodate(page); | ||
1463 | } | ||
1611 | unlock_page(page); | 1464 | unlock_page(page); |
1612 | } | 1465 | } |
1613 | index = pvec.pages[nr_pages - 1]->index + 1; | 1466 | index = pvec.pages[nr_pages - 1]->index + 1; |
1614 | pagevec_release(&pvec); | 1467 | pagevec_release(&pvec); |
1615 | } | 1468 | } |
1616 | return; | ||
1617 | } | 1469 | } |
1618 | 1470 | ||
1619 | static void ext4_print_free_blocks(struct inode *inode) | 1471 | static void ext4_print_free_blocks(struct inode *inode) |
@@ -1642,215 +1494,6 @@ static void ext4_print_free_blocks(struct inode *inode) | |||
1642 | return; | 1494 | return; |
1643 | } | 1495 | } |
1644 | 1496 | ||
1645 | /* | ||
1646 | * mpage_da_map_and_submit - go through given space, map them | ||
1647 | * if necessary, and then submit them for I/O | ||
1648 | * | ||
1649 | * @mpd - bh describing space | ||
1650 | * | ||
1651 | * The function skips space we know is already mapped to disk blocks. | ||
1652 | * | ||
1653 | */ | ||
1654 | static void mpage_da_map_and_submit(struct mpage_da_data *mpd) | ||
1655 | { | ||
1656 | int err, blks, get_blocks_flags; | ||
1657 | struct ext4_map_blocks map, *mapp = NULL; | ||
1658 | sector_t next = mpd->b_blocknr; | ||
1659 | unsigned max_blocks = mpd->b_size >> mpd->inode->i_blkbits; | ||
1660 | loff_t disksize = EXT4_I(mpd->inode)->i_disksize; | ||
1661 | handle_t *handle = NULL; | ||
1662 | |||
1663 | /* | ||
1664 | * If the blocks are mapped already, or we couldn't accumulate | ||
1665 | * any blocks, then proceed immediately to the submission stage. | ||
1666 | */ | ||
1667 | if ((mpd->b_size == 0) || | ||
1668 | ((mpd->b_state & (1 << BH_Mapped)) && | ||
1669 | !(mpd->b_state & (1 << BH_Delay)) && | ||
1670 | !(mpd->b_state & (1 << BH_Unwritten)))) | ||
1671 | goto submit_io; | ||
1672 | |||
1673 | handle = ext4_journal_current_handle(); | ||
1674 | BUG_ON(!handle); | ||
1675 | |||
1676 | /* | ||
1677 | * Call ext4_map_blocks() to allocate any delayed allocation | ||
1678 | * blocks, or to convert an uninitialized extent to be | ||
1679 | * initialized (in the case where we have written into | ||
1680 | * one or more preallocated blocks). | ||
1681 | * | ||
1682 | * We pass in the magic EXT4_GET_BLOCKS_DELALLOC_RESERVE to | ||
1683 | * indicate that we are on the delayed allocation path. This | ||
1684 | * affects functions in many different parts of the allocation | ||
1685 | * call path. This flag exists primarily because we don't | ||
1686 | * want to change *many* call functions, so ext4_map_blocks() | ||
1687 | * will set the EXT4_STATE_DELALLOC_RESERVED flag once the | ||
1688 | * inode's allocation semaphore is taken. | ||
1689 | * | ||
1690 | * If the blocks in questions were delalloc blocks, set | ||
1691 | * EXT4_GET_BLOCKS_DELALLOC_RESERVE so the delalloc accounting | ||
1692 | * variables are updated after the blocks have been allocated. | ||
1693 | */ | ||
1694 | map.m_lblk = next; | ||
1695 | map.m_len = max_blocks; | ||
1696 | /* | ||
1697 | * We're in delalloc path and it is possible that we're going to | ||
1698 | * need more metadata blocks than previously reserved. However | ||
1699 | * we must not fail because we're in writeback and there is | ||
1700 | * nothing we can do about it so it might result in data loss. | ||
1701 | * So use reserved blocks to allocate metadata if possible. | ||
1702 | */ | ||
1703 | get_blocks_flags = EXT4_GET_BLOCKS_CREATE | | ||
1704 | EXT4_GET_BLOCKS_METADATA_NOFAIL; | ||
1705 | if (ext4_should_dioread_nolock(mpd->inode)) | ||
1706 | get_blocks_flags |= EXT4_GET_BLOCKS_IO_CREATE_EXT; | ||
1707 | if (mpd->b_state & (1 << BH_Delay)) | ||
1708 | get_blocks_flags |= EXT4_GET_BLOCKS_DELALLOC_RESERVE; | ||
1709 | |||
1710 | |||
1711 | blks = ext4_map_blocks(handle, mpd->inode, &map, get_blocks_flags); | ||
1712 | if (blks < 0) { | ||
1713 | struct super_block *sb = mpd->inode->i_sb; | ||
1714 | |||
1715 | err = blks; | ||
1716 | /* | ||
1717 | * If get block returns EAGAIN or ENOSPC and there | ||
1718 | * appears to be free blocks we will just let | ||
1719 | * mpage_da_submit_io() unlock all of the pages. | ||
1720 | */ | ||
1721 | if (err == -EAGAIN) | ||
1722 | goto submit_io; | ||
1723 | |||
1724 | if (err == -ENOSPC && ext4_count_free_clusters(sb)) { | ||
1725 | mpd->retval = err; | ||
1726 | goto submit_io; | ||
1727 | } | ||
1728 | |||
1729 | /* | ||
1730 | * get block failure will cause us to loop in | ||
1731 | * writepages, because a_ops->writepage won't be able | ||
1732 | * to make progress. The page will be redirtied by | ||
1733 | * writepage and writepages will again try to write | ||
1734 | * the same. | ||
1735 | */ | ||
1736 | if (!(EXT4_SB(sb)->s_mount_flags & EXT4_MF_FS_ABORTED)) { | ||
1737 | ext4_msg(sb, KERN_CRIT, | ||
1738 | "delayed block allocation failed for inode %lu " | ||
1739 | "at logical offset %llu with max blocks %zd " | ||
1740 | "with error %d", mpd->inode->i_ino, | ||
1741 | (unsigned long long) next, | ||
1742 | mpd->b_size >> mpd->inode->i_blkbits, err); | ||
1743 | ext4_msg(sb, KERN_CRIT, | ||
1744 | "This should not happen!! Data will be lost"); | ||
1745 | if (err == -ENOSPC) | ||
1746 | ext4_print_free_blocks(mpd->inode); | ||
1747 | } | ||
1748 | /* invalidate all the pages */ | ||
1749 | ext4_da_block_invalidatepages(mpd); | ||
1750 | |||
1751 | /* Mark this page range as having been completed */ | ||
1752 | mpd->io_done = 1; | ||
1753 | return; | ||
1754 | } | ||
1755 | BUG_ON(blks == 0); | ||
1756 | |||
1757 | mapp = ↦ | ||
1758 | if (map.m_flags & EXT4_MAP_NEW) { | ||
1759 | struct block_device *bdev = mpd->inode->i_sb->s_bdev; | ||
1760 | int i; | ||
1761 | |||
1762 | for (i = 0; i < map.m_len; i++) | ||
1763 | unmap_underlying_metadata(bdev, map.m_pblk + i); | ||
1764 | } | ||
1765 | |||
1766 | /* | ||
1767 | * Update on-disk size along with block allocation. | ||
1768 | */ | ||
1769 | disksize = ((loff_t) next + blks) << mpd->inode->i_blkbits; | ||
1770 | if (disksize > i_size_read(mpd->inode)) | ||
1771 | disksize = i_size_read(mpd->inode); | ||
1772 | if (disksize > EXT4_I(mpd->inode)->i_disksize) { | ||
1773 | ext4_update_i_disksize(mpd->inode, disksize); | ||
1774 | err = ext4_mark_inode_dirty(handle, mpd->inode); | ||
1775 | if (err) | ||
1776 | ext4_error(mpd->inode->i_sb, | ||
1777 | "Failed to mark inode %lu dirty", | ||
1778 | mpd->inode->i_ino); | ||
1779 | } | ||
1780 | |||
1781 | submit_io: | ||
1782 | mpage_da_submit_io(mpd, mapp); | ||
1783 | mpd->io_done = 1; | ||
1784 | } | ||
1785 | |||
1786 | #define BH_FLAGS ((1 << BH_Uptodate) | (1 << BH_Mapped) | \ | ||
1787 | (1 << BH_Delay) | (1 << BH_Unwritten)) | ||
1788 | |||
1789 | /* | ||
1790 | * mpage_add_bh_to_extent - try to add one more block to extent of blocks | ||
1791 | * | ||
1792 | * @mpd->lbh - extent of blocks | ||
1793 | * @logical - logical number of the block in the file | ||
1794 | * @b_state - b_state of the buffer head added | ||
1795 | * | ||
1796 | * the function is used to collect contig. blocks in same state | ||
1797 | */ | ||
1798 | static void mpage_add_bh_to_extent(struct mpage_da_data *mpd, sector_t logical, | ||
1799 | unsigned long b_state) | ||
1800 | { | ||
1801 | sector_t next; | ||
1802 | int blkbits = mpd->inode->i_blkbits; | ||
1803 | int nrblocks = mpd->b_size >> blkbits; | ||
1804 | |||
1805 | /* | ||
1806 | * XXX Don't go larger than mballoc is willing to allocate | ||
1807 | * This is a stopgap solution. We eventually need to fold | ||
1808 | * mpage_da_submit_io() into this function and then call | ||
1809 | * ext4_map_blocks() multiple times in a loop | ||
1810 | */ | ||
1811 | if (nrblocks >= (8*1024*1024 >> blkbits)) | ||
1812 | goto flush_it; | ||
1813 | |||
1814 | /* check if the reserved journal credits might overflow */ | ||
1815 | if (!ext4_test_inode_flag(mpd->inode, EXT4_INODE_EXTENTS)) { | ||
1816 | if (nrblocks >= EXT4_MAX_TRANS_DATA) { | ||
1817 | /* | ||
1818 | * With non-extent format we are limited by the journal | ||
1819 | * credit available. Total credit needed to insert | ||
1820 | * nrblocks contiguous blocks is dependent on the | ||
1821 | * nrblocks. So limit nrblocks. | ||
1822 | */ | ||
1823 | goto flush_it; | ||
1824 | } | ||
1825 | } | ||
1826 | /* | ||
1827 | * First block in the extent | ||
1828 | */ | ||
1829 | if (mpd->b_size == 0) { | ||
1830 | mpd->b_blocknr = logical; | ||
1831 | mpd->b_size = 1 << blkbits; | ||
1832 | mpd->b_state = b_state & BH_FLAGS; | ||
1833 | return; | ||
1834 | } | ||
1835 | |||
1836 | next = mpd->b_blocknr + nrblocks; | ||
1837 | /* | ||
1838 | * Can we merge the block to our big extent? | ||
1839 | */ | ||
1840 | if (logical == next && (b_state & BH_FLAGS) == mpd->b_state) { | ||
1841 | mpd->b_size += 1 << blkbits; | ||
1842 | return; | ||
1843 | } | ||
1844 | |||
1845 | flush_it: | ||
1846 | /* | ||
1847 | * We couldn't merge the block to our extent, so we | ||
1848 | * need to flush current extent and start new one | ||
1849 | */ | ||
1850 | mpage_da_map_and_submit(mpd); | ||
1851 | return; | ||
1852 | } | ||
1853 | |||
1854 | static int ext4_bh_delay_or_unwritten(handle_t *handle, struct buffer_head *bh) | 1497 | static int ext4_bh_delay_or_unwritten(handle_t *handle, struct buffer_head *bh) |
1855 | { | 1498 | { |
1856 | return (buffer_delay(bh) || buffer_unwritten(bh)) && buffer_dirty(bh); | 1499 | return (buffer_delay(bh) || buffer_unwritten(bh)) && buffer_dirty(bh); |
@@ -1885,7 +1528,7 @@ static int ext4_da_map_blocks(struct inode *inode, sector_t iblock, | |||
1885 | 1528 | ||
1886 | /* Lookup extent status tree firstly */ | 1529 | /* Lookup extent status tree firstly */ |
1887 | if (ext4_es_lookup_extent(inode, iblock, &es)) { | 1530 | if (ext4_es_lookup_extent(inode, iblock, &es)) { |
1888 | 1531 | ext4_es_lru_add(inode); | |
1889 | if (ext4_es_is_hole(&es)) { | 1532 | if (ext4_es_is_hole(&es)) { |
1890 | retval = 0; | 1533 | retval = 0; |
1891 | down_read((&EXT4_I(inode)->i_data_sem)); | 1534 | down_read((&EXT4_I(inode)->i_data_sem)); |
@@ -1992,14 +1635,13 @@ add_delayed: | |||
1992 | int ret; | 1635 | int ret; |
1993 | unsigned long long status; | 1636 | unsigned long long status; |
1994 | 1637 | ||
1995 | #ifdef ES_AGGRESSIVE_TEST | 1638 | if (unlikely(retval != map->m_len)) { |
1996 | if (retval != map->m_len) { | 1639 | ext4_warning(inode->i_sb, |
1997 | printk("ES len assertation failed for inode: %lu " | 1640 | "ES len assertion failed for inode " |
1998 | "retval %d != map->m_len %d " | 1641 | "%lu: retval %d != map->m_len %d", |
1999 | "in %s (lookup)\n", inode->i_ino, retval, | 1642 | inode->i_ino, retval, map->m_len); |
2000 | map->m_len, __func__); | 1643 | WARN_ON(1); |
2001 | } | 1644 | } |
2002 | #endif | ||
2003 | 1645 | ||
2004 | status = map->m_flags & EXT4_MAP_UNWRITTEN ? | 1646 | status = map->m_flags & EXT4_MAP_UNWRITTEN ? |
2005 | EXTENT_STATUS_UNWRITTEN : EXTENT_STATUS_WRITTEN; | 1647 | EXTENT_STATUS_UNWRITTEN : EXTENT_STATUS_WRITTEN; |
@@ -2156,7 +1798,7 @@ out: | |||
2156 | * lock so we have to do some magic. | 1798 | * lock so we have to do some magic. |
2157 | * | 1799 | * |
2158 | * This function can get called via... | 1800 | * This function can get called via... |
2159 | * - ext4_da_writepages after taking page lock (have journal handle) | 1801 | * - ext4_writepages after taking page lock (have journal handle) |
2160 | * - journal_submit_inode_data_buffers (no journal handle) | 1802 | * - journal_submit_inode_data_buffers (no journal handle) |
2161 | * - shrink_page_list via the kswapd/direct reclaim (no journal handle) | 1803 | * - shrink_page_list via the kswapd/direct reclaim (no journal handle) |
2162 | * - grab_page_cache when doing write_begin (have journal handle) | 1804 | * - grab_page_cache when doing write_begin (have journal handle) |
@@ -2234,76 +1876,405 @@ static int ext4_writepage(struct page *page, | |||
2234 | */ | 1876 | */ |
2235 | return __ext4_journalled_writepage(page, len); | 1877 | return __ext4_journalled_writepage(page, len); |
2236 | 1878 | ||
2237 | memset(&io_submit, 0, sizeof(io_submit)); | 1879 | ext4_io_submit_init(&io_submit, wbc); |
1880 | io_submit.io_end = ext4_init_io_end(inode, GFP_NOFS); | ||
1881 | if (!io_submit.io_end) { | ||
1882 | redirty_page_for_writepage(wbc, page); | ||
1883 | unlock_page(page); | ||
1884 | return -ENOMEM; | ||
1885 | } | ||
2238 | ret = ext4_bio_write_page(&io_submit, page, len, wbc); | 1886 | ret = ext4_bio_write_page(&io_submit, page, len, wbc); |
2239 | ext4_io_submit(&io_submit); | 1887 | ext4_io_submit(&io_submit); |
1888 | /* Drop io_end reference we got from init */ | ||
1889 | ext4_put_io_end_defer(io_submit.io_end); | ||
2240 | return ret; | 1890 | return ret; |
2241 | } | 1891 | } |
2242 | 1892 | ||
1893 | #define BH_FLAGS ((1 << BH_Unwritten) | (1 << BH_Delay)) | ||
1894 | |||
2243 | /* | 1895 | /* |
2244 | * This is called via ext4_da_writepages() to | 1896 | * mballoc gives us at most this number of blocks... |
2245 | * calculate the total number of credits to reserve to fit | 1897 | * XXX: That seems to be only a limitation of ext4_mb_normalize_request(). |
2246 | * a single extent allocation into a single transaction, | 1898 | * The rest of mballoc seems to handle chunks upto full group size. |
2247 | * ext4_da_writpeages() will loop calling this before | ||
2248 | * the block allocation. | ||
2249 | */ | 1899 | */ |
1900 | #define MAX_WRITEPAGES_EXTENT_LEN 2048 | ||
2250 | 1901 | ||
2251 | static int ext4_da_writepages_trans_blocks(struct inode *inode) | 1902 | /* |
1903 | * mpage_add_bh_to_extent - try to add bh to extent of blocks to map | ||
1904 | * | ||
1905 | * @mpd - extent of blocks | ||
1906 | * @lblk - logical number of the block in the file | ||
1907 | * @b_state - b_state of the buffer head added | ||
1908 | * | ||
1909 | * the function is used to collect contig. blocks in same state | ||
1910 | */ | ||
1911 | static int mpage_add_bh_to_extent(struct mpage_da_data *mpd, ext4_lblk_t lblk, | ||
1912 | unsigned long b_state) | ||
2252 | { | 1913 | { |
2253 | int max_blocks = EXT4_I(inode)->i_reserved_data_blocks; | 1914 | struct ext4_map_blocks *map = &mpd->map; |
1915 | |||
1916 | /* Don't go larger than mballoc is willing to allocate */ | ||
1917 | if (map->m_len >= MAX_WRITEPAGES_EXTENT_LEN) | ||
1918 | return 0; | ||
1919 | |||
1920 | /* First block in the extent? */ | ||
1921 | if (map->m_len == 0) { | ||
1922 | map->m_lblk = lblk; | ||
1923 | map->m_len = 1; | ||
1924 | map->m_flags = b_state & BH_FLAGS; | ||
1925 | return 1; | ||
1926 | } | ||
1927 | |||
1928 | /* Can we merge the block to our big extent? */ | ||
1929 | if (lblk == map->m_lblk + map->m_len && | ||
1930 | (b_state & BH_FLAGS) == map->m_flags) { | ||
1931 | map->m_len++; | ||
1932 | return 1; | ||
1933 | } | ||
1934 | return 0; | ||
1935 | } | ||
2254 | 1936 | ||
1937 | static bool add_page_bufs_to_extent(struct mpage_da_data *mpd, | ||
1938 | struct buffer_head *head, | ||
1939 | struct buffer_head *bh, | ||
1940 | ext4_lblk_t lblk) | ||
1941 | { | ||
1942 | struct inode *inode = mpd->inode; | ||
1943 | ext4_lblk_t blocks = (i_size_read(inode) + (1 << inode->i_blkbits) - 1) | ||
1944 | >> inode->i_blkbits; | ||
1945 | |||
1946 | do { | ||
1947 | BUG_ON(buffer_locked(bh)); | ||
1948 | |||
1949 | if (!buffer_dirty(bh) || !buffer_mapped(bh) || | ||
1950 | (!buffer_delay(bh) && !buffer_unwritten(bh)) || | ||
1951 | lblk >= blocks) { | ||
1952 | /* Found extent to map? */ | ||
1953 | if (mpd->map.m_len) | ||
1954 | return false; | ||
1955 | if (lblk >= blocks) | ||
1956 | return true; | ||
1957 | continue; | ||
1958 | } | ||
1959 | if (!mpage_add_bh_to_extent(mpd, lblk, bh->b_state)) | ||
1960 | return false; | ||
1961 | } while (lblk++, (bh = bh->b_this_page) != head); | ||
1962 | return true; | ||
1963 | } | ||
1964 | |||
1965 | static int mpage_submit_page(struct mpage_da_data *mpd, struct page *page) | ||
1966 | { | ||
1967 | int len; | ||
1968 | loff_t size = i_size_read(mpd->inode); | ||
1969 | int err; | ||
1970 | |||
1971 | BUG_ON(page->index != mpd->first_page); | ||
1972 | if (page->index == size >> PAGE_CACHE_SHIFT) | ||
1973 | len = size & ~PAGE_CACHE_MASK; | ||
1974 | else | ||
1975 | len = PAGE_CACHE_SIZE; | ||
1976 | clear_page_dirty_for_io(page); | ||
1977 | err = ext4_bio_write_page(&mpd->io_submit, page, len, mpd->wbc); | ||
1978 | if (!err) | ||
1979 | mpd->wbc->nr_to_write--; | ||
1980 | mpd->first_page++; | ||
1981 | |||
1982 | return err; | ||
1983 | } | ||
1984 | |||
1985 | /* | ||
1986 | * mpage_map_buffers - update buffers corresponding to changed extent and | ||
1987 | * submit fully mapped pages for IO | ||
1988 | * | ||
1989 | * @mpd - description of extent to map, on return next extent to map | ||
1990 | * | ||
1991 | * Scan buffers corresponding to changed extent (we expect corresponding pages | ||
1992 | * to be already locked) and update buffer state according to new extent state. | ||
1993 | * We map delalloc buffers to their physical location, clear unwritten bits, | ||
1994 | * and mark buffers as uninit when we perform writes to uninitialized extents | ||
1995 | * and do extent conversion after IO is finished. If the last page is not fully | ||
1996 | * mapped, we update @map to the next extent in the last page that needs | ||
1997 | * mapping. Otherwise we submit the page for IO. | ||
1998 | */ | ||
1999 | static int mpage_map_and_submit_buffers(struct mpage_da_data *mpd) | ||
2000 | { | ||
2001 | struct pagevec pvec; | ||
2002 | int nr_pages, i; | ||
2003 | struct inode *inode = mpd->inode; | ||
2004 | struct buffer_head *head, *bh; | ||
2005 | int bpp_bits = PAGE_CACHE_SHIFT - inode->i_blkbits; | ||
2006 | ext4_lblk_t blocks = (i_size_read(inode) + (1 << inode->i_blkbits) - 1) | ||
2007 | >> inode->i_blkbits; | ||
2008 | pgoff_t start, end; | ||
2009 | ext4_lblk_t lblk; | ||
2010 | sector_t pblock; | ||
2011 | int err; | ||
2012 | |||
2013 | start = mpd->map.m_lblk >> bpp_bits; | ||
2014 | end = (mpd->map.m_lblk + mpd->map.m_len - 1) >> bpp_bits; | ||
2015 | lblk = start << bpp_bits; | ||
2016 | pblock = mpd->map.m_pblk; | ||
2017 | |||
2018 | pagevec_init(&pvec, 0); | ||
2019 | while (start <= end) { | ||
2020 | nr_pages = pagevec_lookup(&pvec, inode->i_mapping, start, | ||
2021 | PAGEVEC_SIZE); | ||
2022 | if (nr_pages == 0) | ||
2023 | break; | ||
2024 | for (i = 0; i < nr_pages; i++) { | ||
2025 | struct page *page = pvec.pages[i]; | ||
2026 | |||
2027 | if (page->index > end) | ||
2028 | break; | ||
2029 | /* Upto 'end' pages must be contiguous */ | ||
2030 | BUG_ON(page->index != start); | ||
2031 | bh = head = page_buffers(page); | ||
2032 | do { | ||
2033 | if (lblk < mpd->map.m_lblk) | ||
2034 | continue; | ||
2035 | if (lblk >= mpd->map.m_lblk + mpd->map.m_len) { | ||
2036 | /* | ||
2037 | * Buffer after end of mapped extent. | ||
2038 | * Find next buffer in the page to map. | ||
2039 | */ | ||
2040 | mpd->map.m_len = 0; | ||
2041 | mpd->map.m_flags = 0; | ||
2042 | add_page_bufs_to_extent(mpd, head, bh, | ||
2043 | lblk); | ||
2044 | pagevec_release(&pvec); | ||
2045 | return 0; | ||
2046 | } | ||
2047 | if (buffer_delay(bh)) { | ||
2048 | clear_buffer_delay(bh); | ||
2049 | bh->b_blocknr = pblock++; | ||
2050 | } | ||
2051 | clear_buffer_unwritten(bh); | ||
2052 | } while (++lblk < blocks && | ||
2053 | (bh = bh->b_this_page) != head); | ||
2054 | |||
2055 | /* | ||
2056 | * FIXME: This is going to break if dioread_nolock | ||
2057 | * supports blocksize < pagesize as we will try to | ||
2058 | * convert potentially unmapped parts of inode. | ||
2059 | */ | ||
2060 | mpd->io_submit.io_end->size += PAGE_CACHE_SIZE; | ||
2061 | /* Page fully mapped - let IO run! */ | ||
2062 | err = mpage_submit_page(mpd, page); | ||
2063 | if (err < 0) { | ||
2064 | pagevec_release(&pvec); | ||
2065 | return err; | ||
2066 | } | ||
2067 | start++; | ||
2068 | } | ||
2069 | pagevec_release(&pvec); | ||
2070 | } | ||
2071 | /* Extent fully mapped and matches with page boundary. We are done. */ | ||
2072 | mpd->map.m_len = 0; | ||
2073 | mpd->map.m_flags = 0; | ||
2074 | return 0; | ||
2075 | } | ||
2076 | |||
2077 | static int mpage_map_one_extent(handle_t *handle, struct mpage_da_data *mpd) | ||
2078 | { | ||
2079 | struct inode *inode = mpd->inode; | ||
2080 | struct ext4_map_blocks *map = &mpd->map; | ||
2081 | int get_blocks_flags; | ||
2082 | int err; | ||
2083 | |||
2084 | trace_ext4_da_write_pages_extent(inode, map); | ||
2255 | /* | 2085 | /* |
2256 | * With non-extent format the journal credit needed to | 2086 | * Call ext4_map_blocks() to allocate any delayed allocation blocks, or |
2257 | * insert nrblocks contiguous block is dependent on | 2087 | * to convert an uninitialized extent to be initialized (in the case |
2258 | * number of contiguous block. So we will limit | 2088 | * where we have written into one or more preallocated blocks). It is |
2259 | * number of contiguous block to a sane value | 2089 | * possible that we're going to need more metadata blocks than |
2090 | * previously reserved. However we must not fail because we're in | ||
2091 | * writeback and there is nothing we can do about it so it might result | ||
2092 | * in data loss. So use reserved blocks to allocate metadata if | ||
2093 | * possible. | ||
2094 | * | ||
2095 | * We pass in the magic EXT4_GET_BLOCKS_DELALLOC_RESERVE if the blocks | ||
2096 | * in question are delalloc blocks. This affects functions in many | ||
2097 | * different parts of the allocation call path. This flag exists | ||
2098 | * primarily because we don't want to change *many* call functions, so | ||
2099 | * ext4_map_blocks() will set the EXT4_STATE_DELALLOC_RESERVED flag | ||
2100 | * once the inode's allocation semaphore is taken. | ||
2260 | */ | 2101 | */ |
2261 | if (!(ext4_test_inode_flag(inode, EXT4_INODE_EXTENTS)) && | 2102 | get_blocks_flags = EXT4_GET_BLOCKS_CREATE | |
2262 | (max_blocks > EXT4_MAX_TRANS_DATA)) | 2103 | EXT4_GET_BLOCKS_METADATA_NOFAIL; |
2263 | max_blocks = EXT4_MAX_TRANS_DATA; | 2104 | if (ext4_should_dioread_nolock(inode)) |
2105 | get_blocks_flags |= EXT4_GET_BLOCKS_IO_CREATE_EXT; | ||
2106 | if (map->m_flags & (1 << BH_Delay)) | ||
2107 | get_blocks_flags |= EXT4_GET_BLOCKS_DELALLOC_RESERVE; | ||
2264 | 2108 | ||
2265 | return ext4_chunk_trans_blocks(inode, max_blocks); | 2109 | err = ext4_map_blocks(handle, inode, map, get_blocks_flags); |
2110 | if (err < 0) | ||
2111 | return err; | ||
2112 | if (map->m_flags & EXT4_MAP_UNINIT) { | ||
2113 | if (!mpd->io_submit.io_end->handle && | ||
2114 | ext4_handle_valid(handle)) { | ||
2115 | mpd->io_submit.io_end->handle = handle->h_rsv_handle; | ||
2116 | handle->h_rsv_handle = NULL; | ||
2117 | } | ||
2118 | ext4_set_io_unwritten_flag(inode, mpd->io_submit.io_end); | ||
2119 | } | ||
2120 | |||
2121 | BUG_ON(map->m_len == 0); | ||
2122 | if (map->m_flags & EXT4_MAP_NEW) { | ||
2123 | struct block_device *bdev = inode->i_sb->s_bdev; | ||
2124 | int i; | ||
2125 | |||
2126 | for (i = 0; i < map->m_len; i++) | ||
2127 | unmap_underlying_metadata(bdev, map->m_pblk + i); | ||
2128 | } | ||
2129 | return 0; | ||
2266 | } | 2130 | } |
2267 | 2131 | ||
2268 | /* | 2132 | /* |
2269 | * write_cache_pages_da - walk the list of dirty pages of the given | 2133 | * mpage_map_and_submit_extent - map extent starting at mpd->lblk of length |
2270 | * address space and accumulate pages that need writing, and call | 2134 | * mpd->len and submit pages underlying it for IO |
2271 | * mpage_da_map_and_submit to map a single contiguous memory region | 2135 | * |
2272 | * and then write them. | 2136 | * @handle - handle for journal operations |
2137 | * @mpd - extent to map | ||
2138 | * | ||
2139 | * The function maps extent starting at mpd->lblk of length mpd->len. If it is | ||
2140 | * delayed, blocks are allocated, if it is unwritten, we may need to convert | ||
2141 | * them to initialized or split the described range from larger unwritten | ||
2142 | * extent. Note that we need not map all the described range since allocation | ||
2143 | * can return less blocks or the range is covered by more unwritten extents. We | ||
2144 | * cannot map more because we are limited by reserved transaction credits. On | ||
2145 | * the other hand we always make sure that the last touched page is fully | ||
2146 | * mapped so that it can be written out (and thus forward progress is | ||
2147 | * guaranteed). After mapping we submit all mapped pages for IO. | ||
2273 | */ | 2148 | */ |
2274 | static int write_cache_pages_da(handle_t *handle, | 2149 | static int mpage_map_and_submit_extent(handle_t *handle, |
2275 | struct address_space *mapping, | 2150 | struct mpage_da_data *mpd, |
2276 | struct writeback_control *wbc, | 2151 | bool *give_up_on_write) |
2277 | struct mpage_da_data *mpd, | ||
2278 | pgoff_t *done_index) | ||
2279 | { | 2152 | { |
2280 | struct buffer_head *bh, *head; | 2153 | struct inode *inode = mpd->inode; |
2281 | struct inode *inode = mapping->host; | 2154 | struct ext4_map_blocks *map = &mpd->map; |
2282 | struct pagevec pvec; | 2155 | int err; |
2283 | unsigned int nr_pages; | 2156 | loff_t disksize; |
2284 | sector_t logical; | ||
2285 | pgoff_t index, end; | ||
2286 | long nr_to_write = wbc->nr_to_write; | ||
2287 | int i, tag, ret = 0; | ||
2288 | |||
2289 | memset(mpd, 0, sizeof(struct mpage_da_data)); | ||
2290 | mpd->wbc = wbc; | ||
2291 | mpd->inode = inode; | ||
2292 | pagevec_init(&pvec, 0); | ||
2293 | index = wbc->range_start >> PAGE_CACHE_SHIFT; | ||
2294 | end = wbc->range_end >> PAGE_CACHE_SHIFT; | ||
2295 | 2157 | ||
2296 | if (wbc->sync_mode == WB_SYNC_ALL || wbc->tagged_writepages) | 2158 | mpd->io_submit.io_end->offset = |
2159 | ((loff_t)map->m_lblk) << inode->i_blkbits; | ||
2160 | do { | ||
2161 | err = mpage_map_one_extent(handle, mpd); | ||
2162 | if (err < 0) { | ||
2163 | struct super_block *sb = inode->i_sb; | ||
2164 | |||
2165 | if (EXT4_SB(sb)->s_mount_flags & EXT4_MF_FS_ABORTED) | ||
2166 | goto invalidate_dirty_pages; | ||
2167 | /* | ||
2168 | * Let the uper layers retry transient errors. | ||
2169 | * In the case of ENOSPC, if ext4_count_free_blocks() | ||
2170 | * is non-zero, a commit should free up blocks. | ||
2171 | */ | ||
2172 | if ((err == -ENOMEM) || | ||
2173 | (err == -ENOSPC && ext4_count_free_clusters(sb))) | ||
2174 | return err; | ||
2175 | ext4_msg(sb, KERN_CRIT, | ||
2176 | "Delayed block allocation failed for " | ||
2177 | "inode %lu at logical offset %llu with" | ||
2178 | " max blocks %u with error %d", | ||
2179 | inode->i_ino, | ||
2180 | (unsigned long long)map->m_lblk, | ||
2181 | (unsigned)map->m_len, -err); | ||
2182 | ext4_msg(sb, KERN_CRIT, | ||
2183 | "This should not happen!! Data will " | ||
2184 | "be lost\n"); | ||
2185 | if (err == -ENOSPC) | ||
2186 | ext4_print_free_blocks(inode); | ||
2187 | invalidate_dirty_pages: | ||
2188 | *give_up_on_write = true; | ||
2189 | return err; | ||
2190 | } | ||
2191 | /* | ||
2192 | * Update buffer state, submit mapped pages, and get us new | ||
2193 | * extent to map | ||
2194 | */ | ||
2195 | err = mpage_map_and_submit_buffers(mpd); | ||
2196 | if (err < 0) | ||
2197 | return err; | ||
2198 | } while (map->m_len); | ||
2199 | |||
2200 | /* Update on-disk size after IO is submitted */ | ||
2201 | disksize = ((loff_t)mpd->first_page) << PAGE_CACHE_SHIFT; | ||
2202 | if (disksize > i_size_read(inode)) | ||
2203 | disksize = i_size_read(inode); | ||
2204 | if (disksize > EXT4_I(inode)->i_disksize) { | ||
2205 | int err2; | ||
2206 | |||
2207 | ext4_update_i_disksize(inode, disksize); | ||
2208 | err2 = ext4_mark_inode_dirty(handle, inode); | ||
2209 | if (err2) | ||
2210 | ext4_error(inode->i_sb, | ||
2211 | "Failed to mark inode %lu dirty", | ||
2212 | inode->i_ino); | ||
2213 | if (!err) | ||
2214 | err = err2; | ||
2215 | } | ||
2216 | return err; | ||
2217 | } | ||
2218 | |||
2219 | /* | ||
2220 | * Calculate the total number of credits to reserve for one writepages | ||
2221 | * iteration. This is called from ext4_writepages(). We map an extent of | ||
2222 | * upto MAX_WRITEPAGES_EXTENT_LEN blocks and then we go on and finish mapping | ||
2223 | * the last partial page. So in total we can map MAX_WRITEPAGES_EXTENT_LEN + | ||
2224 | * bpp - 1 blocks in bpp different extents. | ||
2225 | */ | ||
2226 | static int ext4_da_writepages_trans_blocks(struct inode *inode) | ||
2227 | { | ||
2228 | int bpp = ext4_journal_blocks_per_page(inode); | ||
2229 | |||
2230 | return ext4_meta_trans_blocks(inode, | ||
2231 | MAX_WRITEPAGES_EXTENT_LEN + bpp - 1, bpp); | ||
2232 | } | ||
2233 | |||
2234 | /* | ||
2235 | * mpage_prepare_extent_to_map - find & lock contiguous range of dirty pages | ||
2236 | * and underlying extent to map | ||
2237 | * | ||
2238 | * @mpd - where to look for pages | ||
2239 | * | ||
2240 | * Walk dirty pages in the mapping. If they are fully mapped, submit them for | ||
2241 | * IO immediately. When we find a page which isn't mapped we start accumulating | ||
2242 | * extent of buffers underlying these pages that needs mapping (formed by | ||
2243 | * either delayed or unwritten buffers). We also lock the pages containing | ||
2244 | * these buffers. The extent found is returned in @mpd structure (starting at | ||
2245 | * mpd->lblk with length mpd->len blocks). | ||
2246 | * | ||
2247 | * Note that this function can attach bios to one io_end structure which are | ||
2248 | * neither logically nor physically contiguous. Although it may seem as an | ||
2249 | * unnecessary complication, it is actually inevitable in blocksize < pagesize | ||
2250 | * case as we need to track IO to all buffers underlying a page in one io_end. | ||
2251 | */ | ||
2252 | static int mpage_prepare_extent_to_map(struct mpage_da_data *mpd) | ||
2253 | { | ||
2254 | struct address_space *mapping = mpd->inode->i_mapping; | ||
2255 | struct pagevec pvec; | ||
2256 | unsigned int nr_pages; | ||
2257 | pgoff_t index = mpd->first_page; | ||
2258 | pgoff_t end = mpd->last_page; | ||
2259 | int tag; | ||
2260 | int i, err = 0; | ||
2261 | int blkbits = mpd->inode->i_blkbits; | ||
2262 | ext4_lblk_t lblk; | ||
2263 | struct buffer_head *head; | ||
2264 | |||
2265 | if (mpd->wbc->sync_mode == WB_SYNC_ALL || mpd->wbc->tagged_writepages) | ||
2297 | tag = PAGECACHE_TAG_TOWRITE; | 2266 | tag = PAGECACHE_TAG_TOWRITE; |
2298 | else | 2267 | else |
2299 | tag = PAGECACHE_TAG_DIRTY; | 2268 | tag = PAGECACHE_TAG_DIRTY; |
2300 | 2269 | ||
2301 | *done_index = index; | 2270 | pagevec_init(&pvec, 0); |
2271 | mpd->map.m_len = 0; | ||
2272 | mpd->next_page = index; | ||
2302 | while (index <= end) { | 2273 | while (index <= end) { |
2303 | nr_pages = pagevec_lookup_tag(&pvec, mapping, &index, tag, | 2274 | nr_pages = pagevec_lookup_tag(&pvec, mapping, &index, tag, |
2304 | min(end - index, (pgoff_t)PAGEVEC_SIZE-1) + 1); | 2275 | min(end - index, (pgoff_t)PAGEVEC_SIZE-1) + 1); |
2305 | if (nr_pages == 0) | 2276 | if (nr_pages == 0) |
2306 | return 0; | 2277 | goto out; |
2307 | 2278 | ||
2308 | for (i = 0; i < nr_pages; i++) { | 2279 | for (i = 0; i < nr_pages; i++) { |
2309 | struct page *page = pvec.pages[i]; | 2280 | struct page *page = pvec.pages[i]; |
@@ -2318,31 +2289,21 @@ static int write_cache_pages_da(handle_t *handle, | |||
2318 | if (page->index > end) | 2289 | if (page->index > end) |
2319 | goto out; | 2290 | goto out; |
2320 | 2291 | ||
2321 | *done_index = page->index + 1; | 2292 | /* If we can't merge this page, we are done. */ |
2322 | 2293 | if (mpd->map.m_len > 0 && mpd->next_page != page->index) | |
2323 | /* | 2294 | goto out; |
2324 | * If we can't merge this page, and we have | ||
2325 | * accumulated an contiguous region, write it | ||
2326 | */ | ||
2327 | if ((mpd->next_page != page->index) && | ||
2328 | (mpd->next_page != mpd->first_page)) { | ||
2329 | mpage_da_map_and_submit(mpd); | ||
2330 | goto ret_extent_tail; | ||
2331 | } | ||
2332 | 2295 | ||
2333 | lock_page(page); | 2296 | lock_page(page); |
2334 | |||
2335 | /* | 2297 | /* |
2336 | * If the page is no longer dirty, or its | 2298 | * If the page is no longer dirty, or its mapping no |
2337 | * mapping no longer corresponds to inode we | 2299 | * longer corresponds to inode we are writing (which |
2338 | * are writing (which means it has been | 2300 | * means it has been truncated or invalidated), or the |
2339 | * truncated or invalidated), or the page is | 2301 | * page is already under writeback and we are not doing |
2340 | * already under writeback and we are not | 2302 | * a data integrity writeback, skip the page |
2341 | * doing a data integrity writeback, skip the page | ||
2342 | */ | 2303 | */ |
2343 | if (!PageDirty(page) || | 2304 | if (!PageDirty(page) || |
2344 | (PageWriteback(page) && | 2305 | (PageWriteback(page) && |
2345 | (wbc->sync_mode == WB_SYNC_NONE)) || | 2306 | (mpd->wbc->sync_mode == WB_SYNC_NONE)) || |
2346 | unlikely(page->mapping != mapping)) { | 2307 | unlikely(page->mapping != mapping)) { |
2347 | unlock_page(page); | 2308 | unlock_page(page); |
2348 | continue; | 2309 | continue; |
@@ -2351,106 +2312,70 @@ static int write_cache_pages_da(handle_t *handle, | |||
2351 | wait_on_page_writeback(page); | 2312 | wait_on_page_writeback(page); |
2352 | BUG_ON(PageWriteback(page)); | 2313 | BUG_ON(PageWriteback(page)); |
2353 | 2314 | ||
2354 | /* | 2315 | if (mpd->map.m_len == 0) |
2355 | * If we have inline data and arrive here, it means that | ||
2356 | * we will soon create the block for the 1st page, so | ||
2357 | * we'd better clear the inline data here. | ||
2358 | */ | ||
2359 | if (ext4_has_inline_data(inode)) { | ||
2360 | BUG_ON(ext4_test_inode_state(inode, | ||
2361 | EXT4_STATE_MAY_INLINE_DATA)); | ||
2362 | ext4_destroy_inline_data(handle, inode); | ||
2363 | } | ||
2364 | |||
2365 | if (mpd->next_page != page->index) | ||
2366 | mpd->first_page = page->index; | 2316 | mpd->first_page = page->index; |
2367 | mpd->next_page = page->index + 1; | 2317 | mpd->next_page = page->index + 1; |
2368 | logical = (sector_t) page->index << | ||
2369 | (PAGE_CACHE_SHIFT - inode->i_blkbits); | ||
2370 | |||
2371 | /* Add all dirty buffers to mpd */ | 2318 | /* Add all dirty buffers to mpd */ |
2319 | lblk = ((ext4_lblk_t)page->index) << | ||
2320 | (PAGE_CACHE_SHIFT - blkbits); | ||
2372 | head = page_buffers(page); | 2321 | head = page_buffers(page); |
2373 | bh = head; | 2322 | if (!add_page_bufs_to_extent(mpd, head, head, lblk)) |
2374 | do { | 2323 | goto out; |
2375 | BUG_ON(buffer_locked(bh)); | 2324 | /* So far everything mapped? Submit the page for IO. */ |
2376 | /* | 2325 | if (mpd->map.m_len == 0) { |
2377 | * We need to try to allocate unmapped blocks | 2326 | err = mpage_submit_page(mpd, page); |
2378 | * in the same page. Otherwise we won't make | 2327 | if (err < 0) |
2379 | * progress with the page in ext4_writepage | ||
2380 | */ | ||
2381 | if (ext4_bh_delay_or_unwritten(NULL, bh)) { | ||
2382 | mpage_add_bh_to_extent(mpd, logical, | ||
2383 | bh->b_state); | ||
2384 | if (mpd->io_done) | ||
2385 | goto ret_extent_tail; | ||
2386 | } else if (buffer_dirty(bh) && | ||
2387 | buffer_mapped(bh)) { | ||
2388 | /* | ||
2389 | * mapped dirty buffer. We need to | ||
2390 | * update the b_state because we look | ||
2391 | * at b_state in mpage_da_map_blocks. | ||
2392 | * We don't update b_size because if we | ||
2393 | * find an unmapped buffer_head later | ||
2394 | * we need to use the b_state flag of | ||
2395 | * that buffer_head. | ||
2396 | */ | ||
2397 | if (mpd->b_size == 0) | ||
2398 | mpd->b_state = | ||
2399 | bh->b_state & BH_FLAGS; | ||
2400 | } | ||
2401 | logical++; | ||
2402 | } while ((bh = bh->b_this_page) != head); | ||
2403 | |||
2404 | if (nr_to_write > 0) { | ||
2405 | nr_to_write--; | ||
2406 | if (nr_to_write == 0 && | ||
2407 | wbc->sync_mode == WB_SYNC_NONE) | ||
2408 | /* | ||
2409 | * We stop writing back only if we are | ||
2410 | * not doing integrity sync. In case of | ||
2411 | * integrity sync we have to keep going | ||
2412 | * because someone may be concurrently | ||
2413 | * dirtying pages, and we might have | ||
2414 | * synced a lot of newly appeared dirty | ||
2415 | * pages, but have not synced all of the | ||
2416 | * old dirty pages. | ||
2417 | */ | ||
2418 | goto out; | 2328 | goto out; |
2419 | } | 2329 | } |
2330 | |||
2331 | /* | ||
2332 | * Accumulated enough dirty pages? This doesn't apply | ||
2333 | * to WB_SYNC_ALL mode. For integrity sync we have to | ||
2334 | * keep going because someone may be concurrently | ||
2335 | * dirtying pages, and we might have synced a lot of | ||
2336 | * newly appeared dirty pages, but have not synced all | ||
2337 | * of the old dirty pages. | ||
2338 | */ | ||
2339 | if (mpd->wbc->sync_mode == WB_SYNC_NONE && | ||
2340 | mpd->next_page - mpd->first_page >= | ||
2341 | mpd->wbc->nr_to_write) | ||
2342 | goto out; | ||
2420 | } | 2343 | } |
2421 | pagevec_release(&pvec); | 2344 | pagevec_release(&pvec); |
2422 | cond_resched(); | 2345 | cond_resched(); |
2423 | } | 2346 | } |
2424 | return 0; | 2347 | return 0; |
2425 | ret_extent_tail: | ||
2426 | ret = MPAGE_DA_EXTENT_TAIL; | ||
2427 | out: | 2348 | out: |
2428 | pagevec_release(&pvec); | 2349 | pagevec_release(&pvec); |
2429 | cond_resched(); | 2350 | return err; |
2430 | return ret; | ||
2431 | } | 2351 | } |
2432 | 2352 | ||
2353 | static int __writepage(struct page *page, struct writeback_control *wbc, | ||
2354 | void *data) | ||
2355 | { | ||
2356 | struct address_space *mapping = data; | ||
2357 | int ret = ext4_writepage(page, wbc); | ||
2358 | mapping_set_error(mapping, ret); | ||
2359 | return ret; | ||
2360 | } | ||
2433 | 2361 | ||
2434 | static int ext4_da_writepages(struct address_space *mapping, | 2362 | static int ext4_writepages(struct address_space *mapping, |
2435 | struct writeback_control *wbc) | 2363 | struct writeback_control *wbc) |
2436 | { | 2364 | { |
2437 | pgoff_t index; | 2365 | pgoff_t writeback_index = 0; |
2366 | long nr_to_write = wbc->nr_to_write; | ||
2438 | int range_whole = 0; | 2367 | int range_whole = 0; |
2368 | int cycled = 1; | ||
2439 | handle_t *handle = NULL; | 2369 | handle_t *handle = NULL; |
2440 | struct mpage_da_data mpd; | 2370 | struct mpage_da_data mpd; |
2441 | struct inode *inode = mapping->host; | 2371 | struct inode *inode = mapping->host; |
2442 | int pages_written = 0; | 2372 | int needed_blocks, rsv_blocks = 0, ret = 0; |
2443 | unsigned int max_pages; | ||
2444 | int range_cyclic, cycled = 1, io_done = 0; | ||
2445 | int needed_blocks, ret = 0; | ||
2446 | long desired_nr_to_write, nr_to_writebump = 0; | ||
2447 | loff_t range_start = wbc->range_start; | ||
2448 | struct ext4_sb_info *sbi = EXT4_SB(mapping->host->i_sb); | 2373 | struct ext4_sb_info *sbi = EXT4_SB(mapping->host->i_sb); |
2449 | pgoff_t done_index = 0; | 2374 | bool done; |
2450 | pgoff_t end; | ||
2451 | struct blk_plug plug; | 2375 | struct blk_plug plug; |
2376 | bool give_up_on_write = false; | ||
2452 | 2377 | ||
2453 | trace_ext4_da_writepages(inode, wbc); | 2378 | trace_ext4_writepages(inode, wbc); |
2454 | 2379 | ||
2455 | /* | 2380 | /* |
2456 | * No pages to write? This is mainly a kludge to avoid starting | 2381 | * No pages to write? This is mainly a kludge to avoid starting |
@@ -2460,164 +2385,165 @@ static int ext4_da_writepages(struct address_space *mapping, | |||
2460 | if (!mapping->nrpages || !mapping_tagged(mapping, PAGECACHE_TAG_DIRTY)) | 2385 | if (!mapping->nrpages || !mapping_tagged(mapping, PAGECACHE_TAG_DIRTY)) |
2461 | return 0; | 2386 | return 0; |
2462 | 2387 | ||
2388 | if (ext4_should_journal_data(inode)) { | ||
2389 | struct blk_plug plug; | ||
2390 | int ret; | ||
2391 | |||
2392 | blk_start_plug(&plug); | ||
2393 | ret = write_cache_pages(mapping, wbc, __writepage, mapping); | ||
2394 | blk_finish_plug(&plug); | ||
2395 | return ret; | ||
2396 | } | ||
2397 | |||
2463 | /* | 2398 | /* |
2464 | * If the filesystem has aborted, it is read-only, so return | 2399 | * If the filesystem has aborted, it is read-only, so return |
2465 | * right away instead of dumping stack traces later on that | 2400 | * right away instead of dumping stack traces later on that |
2466 | * will obscure the real source of the problem. We test | 2401 | * will obscure the real source of the problem. We test |
2467 | * EXT4_MF_FS_ABORTED instead of sb->s_flag's MS_RDONLY because | 2402 | * EXT4_MF_FS_ABORTED instead of sb->s_flag's MS_RDONLY because |
2468 | * the latter could be true if the filesystem is mounted | 2403 | * the latter could be true if the filesystem is mounted |
2469 | * read-only, and in that case, ext4_da_writepages should | 2404 | * read-only, and in that case, ext4_writepages should |
2470 | * *never* be called, so if that ever happens, we would want | 2405 | * *never* be called, so if that ever happens, we would want |
2471 | * the stack trace. | 2406 | * the stack trace. |
2472 | */ | 2407 | */ |
2473 | if (unlikely(sbi->s_mount_flags & EXT4_MF_FS_ABORTED)) | 2408 | if (unlikely(sbi->s_mount_flags & EXT4_MF_FS_ABORTED)) |
2474 | return -EROFS; | 2409 | return -EROFS; |
2475 | 2410 | ||
2411 | if (ext4_should_dioread_nolock(inode)) { | ||
2412 | /* | ||
2413 | * We may need to convert upto one extent per block in | ||
2414 | * the page and we may dirty the inode. | ||
2415 | */ | ||
2416 | rsv_blocks = 1 + (PAGE_CACHE_SIZE >> inode->i_blkbits); | ||
2417 | } | ||
2418 | |||
2419 | /* | ||
2420 | * If we have inline data and arrive here, it means that | ||
2421 | * we will soon create the block for the 1st page, so | ||
2422 | * we'd better clear the inline data here. | ||
2423 | */ | ||
2424 | if (ext4_has_inline_data(inode)) { | ||
2425 | /* Just inode will be modified... */ | ||
2426 | handle = ext4_journal_start(inode, EXT4_HT_INODE, 1); | ||
2427 | if (IS_ERR(handle)) { | ||
2428 | ret = PTR_ERR(handle); | ||
2429 | goto out_writepages; | ||
2430 | } | ||
2431 | BUG_ON(ext4_test_inode_state(inode, | ||
2432 | EXT4_STATE_MAY_INLINE_DATA)); | ||
2433 | ext4_destroy_inline_data(handle, inode); | ||
2434 | ext4_journal_stop(handle); | ||
2435 | } | ||
2436 | |||
2476 | if (wbc->range_start == 0 && wbc->range_end == LLONG_MAX) | 2437 | if (wbc->range_start == 0 && wbc->range_end == LLONG_MAX) |
2477 | range_whole = 1; | 2438 | range_whole = 1; |
2478 | 2439 | ||
2479 | range_cyclic = wbc->range_cyclic; | ||
2480 | if (wbc->range_cyclic) { | 2440 | if (wbc->range_cyclic) { |
2481 | index = mapping->writeback_index; | 2441 | writeback_index = mapping->writeback_index; |
2482 | if (index) | 2442 | if (writeback_index) |
2483 | cycled = 0; | 2443 | cycled = 0; |
2484 | wbc->range_start = index << PAGE_CACHE_SHIFT; | 2444 | mpd.first_page = writeback_index; |
2485 | wbc->range_end = LLONG_MAX; | 2445 | mpd.last_page = -1; |
2486 | wbc->range_cyclic = 0; | ||
2487 | end = -1; | ||
2488 | } else { | 2446 | } else { |
2489 | index = wbc->range_start >> PAGE_CACHE_SHIFT; | 2447 | mpd.first_page = wbc->range_start >> PAGE_CACHE_SHIFT; |
2490 | end = wbc->range_end >> PAGE_CACHE_SHIFT; | 2448 | mpd.last_page = wbc->range_end >> PAGE_CACHE_SHIFT; |
2491 | } | ||
2492 | |||
2493 | /* | ||
2494 | * This works around two forms of stupidity. The first is in | ||
2495 | * the writeback code, which caps the maximum number of pages | ||
2496 | * written to be 1024 pages. This is wrong on multiple | ||
2497 | * levels; different architectues have a different page size, | ||
2498 | * which changes the maximum amount of data which gets | ||
2499 | * written. Secondly, 4 megabytes is way too small. XFS | ||
2500 | * forces this value to be 16 megabytes by multiplying | ||
2501 | * nr_to_write parameter by four, and then relies on its | ||
2502 | * allocator to allocate larger extents to make them | ||
2503 | * contiguous. Unfortunately this brings us to the second | ||
2504 | * stupidity, which is that ext4's mballoc code only allocates | ||
2505 | * at most 2048 blocks. So we force contiguous writes up to | ||
2506 | * the number of dirty blocks in the inode, or | ||
2507 | * sbi->max_writeback_mb_bump whichever is smaller. | ||
2508 | */ | ||
2509 | max_pages = sbi->s_max_writeback_mb_bump << (20 - PAGE_CACHE_SHIFT); | ||
2510 | if (!range_cyclic && range_whole) { | ||
2511 | if (wbc->nr_to_write == LONG_MAX) | ||
2512 | desired_nr_to_write = wbc->nr_to_write; | ||
2513 | else | ||
2514 | desired_nr_to_write = wbc->nr_to_write * 8; | ||
2515 | } else | ||
2516 | desired_nr_to_write = ext4_num_dirty_pages(inode, index, | ||
2517 | max_pages); | ||
2518 | if (desired_nr_to_write > max_pages) | ||
2519 | desired_nr_to_write = max_pages; | ||
2520 | |||
2521 | if (wbc->nr_to_write < desired_nr_to_write) { | ||
2522 | nr_to_writebump = desired_nr_to_write - wbc->nr_to_write; | ||
2523 | wbc->nr_to_write = desired_nr_to_write; | ||
2524 | } | 2449 | } |
2525 | 2450 | ||
2451 | mpd.inode = inode; | ||
2452 | mpd.wbc = wbc; | ||
2453 | ext4_io_submit_init(&mpd.io_submit, wbc); | ||
2526 | retry: | 2454 | retry: |
2527 | if (wbc->sync_mode == WB_SYNC_ALL || wbc->tagged_writepages) | 2455 | if (wbc->sync_mode == WB_SYNC_ALL || wbc->tagged_writepages) |
2528 | tag_pages_for_writeback(mapping, index, end); | 2456 | tag_pages_for_writeback(mapping, mpd.first_page, mpd.last_page); |
2529 | 2457 | done = false; | |
2530 | blk_start_plug(&plug); | 2458 | blk_start_plug(&plug); |
2531 | while (!ret && wbc->nr_to_write > 0) { | 2459 | while (!done && mpd.first_page <= mpd.last_page) { |
2460 | /* For each extent of pages we use new io_end */ | ||
2461 | mpd.io_submit.io_end = ext4_init_io_end(inode, GFP_KERNEL); | ||
2462 | if (!mpd.io_submit.io_end) { | ||
2463 | ret = -ENOMEM; | ||
2464 | break; | ||
2465 | } | ||
2532 | 2466 | ||
2533 | /* | 2467 | /* |
2534 | * we insert one extent at a time. So we need | 2468 | * We have two constraints: We find one extent to map and we |
2535 | * credit needed for single extent allocation. | 2469 | * must always write out whole page (makes a difference when |
2536 | * journalled mode is currently not supported | 2470 | * blocksize < pagesize) so that we don't block on IO when we |
2537 | * by delalloc | 2471 | * try to write out the rest of the page. Journalled mode is |
2472 | * not supported by delalloc. | ||
2538 | */ | 2473 | */ |
2539 | BUG_ON(ext4_should_journal_data(inode)); | 2474 | BUG_ON(ext4_should_journal_data(inode)); |
2540 | needed_blocks = ext4_da_writepages_trans_blocks(inode); | 2475 | needed_blocks = ext4_da_writepages_trans_blocks(inode); |
2541 | 2476 | ||
2542 | /* start a new transaction*/ | 2477 | /* start a new transaction */ |
2543 | handle = ext4_journal_start(inode, EXT4_HT_WRITE_PAGE, | 2478 | handle = ext4_journal_start_with_reserve(inode, |
2544 | needed_blocks); | 2479 | EXT4_HT_WRITE_PAGE, needed_blocks, rsv_blocks); |
2545 | if (IS_ERR(handle)) { | 2480 | if (IS_ERR(handle)) { |
2546 | ret = PTR_ERR(handle); | 2481 | ret = PTR_ERR(handle); |
2547 | ext4_msg(inode->i_sb, KERN_CRIT, "%s: jbd2_start: " | 2482 | ext4_msg(inode->i_sb, KERN_CRIT, "%s: jbd2_start: " |
2548 | "%ld pages, ino %lu; err %d", __func__, | 2483 | "%ld pages, ino %lu; err %d", __func__, |
2549 | wbc->nr_to_write, inode->i_ino, ret); | 2484 | wbc->nr_to_write, inode->i_ino, ret); |
2550 | blk_finish_plug(&plug); | 2485 | /* Release allocated io_end */ |
2551 | goto out_writepages; | 2486 | ext4_put_io_end(mpd.io_submit.io_end); |
2487 | break; | ||
2552 | } | 2488 | } |
2553 | 2489 | ||
2554 | /* | 2490 | trace_ext4_da_write_pages(inode, mpd.first_page, mpd.wbc); |
2555 | * Now call write_cache_pages_da() to find the next | 2491 | ret = mpage_prepare_extent_to_map(&mpd); |
2556 | * contiguous region of logical blocks that need | 2492 | if (!ret) { |
2557 | * blocks to be allocated by ext4 and submit them. | 2493 | if (mpd.map.m_len) |
2558 | */ | 2494 | ret = mpage_map_and_submit_extent(handle, &mpd, |
2559 | ret = write_cache_pages_da(handle, mapping, | 2495 | &give_up_on_write); |
2560 | wbc, &mpd, &done_index); | 2496 | else { |
2561 | /* | 2497 | /* |
2562 | * If we have a contiguous extent of pages and we | 2498 | * We scanned the whole range (or exhausted |
2563 | * haven't done the I/O yet, map the blocks and submit | 2499 | * nr_to_write), submitted what was mapped and |
2564 | * them for I/O. | 2500 | * didn't find anything needing mapping. We are |
2565 | */ | 2501 | * done. |
2566 | if (!mpd.io_done && mpd.next_page != mpd.first_page) { | 2502 | */ |
2567 | mpage_da_map_and_submit(&mpd); | 2503 | done = true; |
2568 | ret = MPAGE_DA_EXTENT_TAIL; | 2504 | } |
2569 | } | 2505 | } |
2570 | trace_ext4_da_write_pages(inode, &mpd); | ||
2571 | wbc->nr_to_write -= mpd.pages_written; | ||
2572 | |||
2573 | ext4_journal_stop(handle); | 2506 | ext4_journal_stop(handle); |
2574 | 2507 | /* Submit prepared bio */ | |
2575 | if ((mpd.retval == -ENOSPC) && sbi->s_journal) { | 2508 | ext4_io_submit(&mpd.io_submit); |
2576 | /* commit the transaction which would | 2509 | /* Unlock pages we didn't use */ |
2510 | mpage_release_unused_pages(&mpd, give_up_on_write); | ||
2511 | /* Drop our io_end reference we got from init */ | ||
2512 | ext4_put_io_end(mpd.io_submit.io_end); | ||
2513 | |||
2514 | if (ret == -ENOSPC && sbi->s_journal) { | ||
2515 | /* | ||
2516 | * Commit the transaction which would | ||
2577 | * free blocks released in the transaction | 2517 | * free blocks released in the transaction |
2578 | * and try again | 2518 | * and try again |
2579 | */ | 2519 | */ |
2580 | jbd2_journal_force_commit_nested(sbi->s_journal); | 2520 | jbd2_journal_force_commit_nested(sbi->s_journal); |
2581 | ret = 0; | 2521 | ret = 0; |
2582 | } else if (ret == MPAGE_DA_EXTENT_TAIL) { | 2522 | continue; |
2583 | /* | 2523 | } |
2584 | * Got one extent now try with rest of the pages. | 2524 | /* Fatal error - ENOMEM, EIO... */ |
2585 | * If mpd.retval is set -EIO, journal is aborted. | 2525 | if (ret) |
2586 | * So we don't need to write any more. | ||
2587 | */ | ||
2588 | pages_written += mpd.pages_written; | ||
2589 | ret = mpd.retval; | ||
2590 | io_done = 1; | ||
2591 | } else if (wbc->nr_to_write) | ||
2592 | /* | ||
2593 | * There is no more writeout needed | ||
2594 | * or we requested for a noblocking writeout | ||
2595 | * and we found the device congested | ||
2596 | */ | ||
2597 | break; | 2526 | break; |
2598 | } | 2527 | } |
2599 | blk_finish_plug(&plug); | 2528 | blk_finish_plug(&plug); |
2600 | if (!io_done && !cycled) { | 2529 | if (!ret && !cycled) { |
2601 | cycled = 1; | 2530 | cycled = 1; |
2602 | index = 0; | 2531 | mpd.last_page = writeback_index - 1; |
2603 | wbc->range_start = index << PAGE_CACHE_SHIFT; | 2532 | mpd.first_page = 0; |
2604 | wbc->range_end = mapping->writeback_index - 1; | ||
2605 | goto retry; | 2533 | goto retry; |
2606 | } | 2534 | } |
2607 | 2535 | ||
2608 | /* Update index */ | 2536 | /* Update index */ |
2609 | wbc->range_cyclic = range_cyclic; | ||
2610 | if (wbc->range_cyclic || (range_whole && wbc->nr_to_write > 0)) | 2537 | if (wbc->range_cyclic || (range_whole && wbc->nr_to_write > 0)) |
2611 | /* | 2538 | /* |
2612 | * set the writeback_index so that range_cyclic | 2539 | * Set the writeback_index so that range_cyclic |
2613 | * mode will write it back later | 2540 | * mode will write it back later |
2614 | */ | 2541 | */ |
2615 | mapping->writeback_index = done_index; | 2542 | mapping->writeback_index = mpd.first_page; |
2616 | 2543 | ||
2617 | out_writepages: | 2544 | out_writepages: |
2618 | wbc->nr_to_write -= nr_to_writebump; | 2545 | trace_ext4_writepages_result(inode, wbc, ret, |
2619 | wbc->range_start = range_start; | 2546 | nr_to_write - wbc->nr_to_write); |
2620 | trace_ext4_da_writepages_result(inode, wbc, ret, pages_written); | ||
2621 | return ret; | 2547 | return ret; |
2622 | } | 2548 | } |
2623 | 2549 | ||
@@ -2829,7 +2755,8 @@ static int ext4_da_write_end(struct file *file, | |||
2829 | return ret ? ret : copied; | 2755 | return ret ? ret : copied; |
2830 | } | 2756 | } |
2831 | 2757 | ||
2832 | static void ext4_da_invalidatepage(struct page *page, unsigned long offset) | 2758 | static void ext4_da_invalidatepage(struct page *page, unsigned int offset, |
2759 | unsigned int length) | ||
2833 | { | 2760 | { |
2834 | /* | 2761 | /* |
2835 | * Drop reserved blocks | 2762 | * Drop reserved blocks |
@@ -2838,10 +2765,10 @@ static void ext4_da_invalidatepage(struct page *page, unsigned long offset) | |||
2838 | if (!page_has_buffers(page)) | 2765 | if (!page_has_buffers(page)) |
2839 | goto out; | 2766 | goto out; |
2840 | 2767 | ||
2841 | ext4_da_page_release_reservation(page, offset); | 2768 | ext4_da_page_release_reservation(page, offset, length); |
2842 | 2769 | ||
2843 | out: | 2770 | out: |
2844 | ext4_invalidatepage(page, offset); | 2771 | ext4_invalidatepage(page, offset, length); |
2845 | 2772 | ||
2846 | return; | 2773 | return; |
2847 | } | 2774 | } |
@@ -2864,7 +2791,7 @@ int ext4_alloc_da_blocks(struct inode *inode) | |||
2864 | * laptop_mode, not even desirable). However, to do otherwise | 2791 | * laptop_mode, not even desirable). However, to do otherwise |
2865 | * would require replicating code paths in: | 2792 | * would require replicating code paths in: |
2866 | * | 2793 | * |
2867 | * ext4_da_writepages() -> | 2794 | * ext4_writepages() -> |
2868 | * write_cache_pages() ---> (via passed in callback function) | 2795 | * write_cache_pages() ---> (via passed in callback function) |
2869 | * __mpage_da_writepage() --> | 2796 | * __mpage_da_writepage() --> |
2870 | * mpage_add_bh_to_extent() | 2797 | * mpage_add_bh_to_extent() |
@@ -2989,37 +2916,40 @@ ext4_readpages(struct file *file, struct address_space *mapping, | |||
2989 | return mpage_readpages(mapping, pages, nr_pages, ext4_get_block); | 2916 | return mpage_readpages(mapping, pages, nr_pages, ext4_get_block); |
2990 | } | 2917 | } |
2991 | 2918 | ||
2992 | static void ext4_invalidatepage(struct page *page, unsigned long offset) | 2919 | static void ext4_invalidatepage(struct page *page, unsigned int offset, |
2920 | unsigned int length) | ||
2993 | { | 2921 | { |
2994 | trace_ext4_invalidatepage(page, offset); | 2922 | trace_ext4_invalidatepage(page, offset, length); |
2995 | 2923 | ||
2996 | /* No journalling happens on data buffers when this function is used */ | 2924 | /* No journalling happens on data buffers when this function is used */ |
2997 | WARN_ON(page_has_buffers(page) && buffer_jbd(page_buffers(page))); | 2925 | WARN_ON(page_has_buffers(page) && buffer_jbd(page_buffers(page))); |
2998 | 2926 | ||
2999 | block_invalidatepage(page, offset); | 2927 | block_invalidatepage(page, offset, length); |
3000 | } | 2928 | } |
3001 | 2929 | ||
3002 | static int __ext4_journalled_invalidatepage(struct page *page, | 2930 | static int __ext4_journalled_invalidatepage(struct page *page, |
3003 | unsigned long offset) | 2931 | unsigned int offset, |
2932 | unsigned int length) | ||
3004 | { | 2933 | { |
3005 | journal_t *journal = EXT4_JOURNAL(page->mapping->host); | 2934 | journal_t *journal = EXT4_JOURNAL(page->mapping->host); |
3006 | 2935 | ||
3007 | trace_ext4_journalled_invalidatepage(page, offset); | 2936 | trace_ext4_journalled_invalidatepage(page, offset, length); |
3008 | 2937 | ||
3009 | /* | 2938 | /* |
3010 | * If it's a full truncate we just forget about the pending dirtying | 2939 | * If it's a full truncate we just forget about the pending dirtying |
3011 | */ | 2940 | */ |
3012 | if (offset == 0) | 2941 | if (offset == 0 && length == PAGE_CACHE_SIZE) |
3013 | ClearPageChecked(page); | 2942 | ClearPageChecked(page); |
3014 | 2943 | ||
3015 | return jbd2_journal_invalidatepage(journal, page, offset); | 2944 | return jbd2_journal_invalidatepage(journal, page, offset, length); |
3016 | } | 2945 | } |
3017 | 2946 | ||
3018 | /* Wrapper for aops... */ | 2947 | /* Wrapper for aops... */ |
3019 | static void ext4_journalled_invalidatepage(struct page *page, | 2948 | static void ext4_journalled_invalidatepage(struct page *page, |
3020 | unsigned long offset) | 2949 | unsigned int offset, |
2950 | unsigned int length) | ||
3021 | { | 2951 | { |
3022 | WARN_ON(__ext4_journalled_invalidatepage(page, offset) < 0); | 2952 | WARN_ON(__ext4_journalled_invalidatepage(page, offset, length) < 0); |
3023 | } | 2953 | } |
3024 | 2954 | ||
3025 | static int ext4_releasepage(struct page *page, gfp_t wait) | 2955 | static int ext4_releasepage(struct page *page, gfp_t wait) |
@@ -3067,9 +2997,13 @@ static void ext4_end_io_dio(struct kiocb *iocb, loff_t offset, | |||
3067 | struct inode *inode = file_inode(iocb->ki_filp); | 2997 | struct inode *inode = file_inode(iocb->ki_filp); |
3068 | ext4_io_end_t *io_end = iocb->private; | 2998 | ext4_io_end_t *io_end = iocb->private; |
3069 | 2999 | ||
3070 | /* if not async direct IO or dio with 0 bytes write, just return */ | 3000 | /* if not async direct IO just return */ |
3071 | if (!io_end || !size) | 3001 | if (!io_end) { |
3072 | goto out; | 3002 | inode_dio_done(inode); |
3003 | if (is_async) | ||
3004 | aio_complete(iocb, ret, 0); | ||
3005 | return; | ||
3006 | } | ||
3073 | 3007 | ||
3074 | ext_debug("ext4_end_io_dio(): io_end 0x%p " | 3008 | ext_debug("ext4_end_io_dio(): io_end 0x%p " |
3075 | "for inode %lu, iocb 0x%p, offset %llu, size %zd\n", | 3009 | "for inode %lu, iocb 0x%p, offset %llu, size %zd\n", |
@@ -3077,25 +3011,13 @@ static void ext4_end_io_dio(struct kiocb *iocb, loff_t offset, | |||
3077 | size); | 3011 | size); |
3078 | 3012 | ||
3079 | iocb->private = NULL; | 3013 | iocb->private = NULL; |
3080 | |||
3081 | /* if not aio dio with unwritten extents, just free io and return */ | ||
3082 | if (!(io_end->flag & EXT4_IO_END_UNWRITTEN)) { | ||
3083 | ext4_free_io_end(io_end); | ||
3084 | out: | ||
3085 | inode_dio_done(inode); | ||
3086 | if (is_async) | ||
3087 | aio_complete(iocb, ret, 0); | ||
3088 | return; | ||
3089 | } | ||
3090 | |||
3091 | io_end->offset = offset; | 3014 | io_end->offset = offset; |
3092 | io_end->size = size; | 3015 | io_end->size = size; |
3093 | if (is_async) { | 3016 | if (is_async) { |
3094 | io_end->iocb = iocb; | 3017 | io_end->iocb = iocb; |
3095 | io_end->result = ret; | 3018 | io_end->result = ret; |
3096 | } | 3019 | } |
3097 | 3020 | ext4_put_io_end_defer(io_end); | |
3098 | ext4_add_complete_io(io_end); | ||
3099 | } | 3021 | } |
3100 | 3022 | ||
3101 | /* | 3023 | /* |
@@ -3129,6 +3051,7 @@ static ssize_t ext4_ext_direct_IO(int rw, struct kiocb *iocb, | |||
3129 | get_block_t *get_block_func = NULL; | 3051 | get_block_t *get_block_func = NULL; |
3130 | int dio_flags = 0; | 3052 | int dio_flags = 0; |
3131 | loff_t final_size = offset + count; | 3053 | loff_t final_size = offset + count; |
3054 | ext4_io_end_t *io_end = NULL; | ||
3132 | 3055 | ||
3133 | /* Use the old path for reads and writes beyond i_size. */ | 3056 | /* Use the old path for reads and writes beyond i_size. */ |
3134 | if (rw != WRITE || final_size > inode->i_size) | 3057 | if (rw != WRITE || final_size > inode->i_size) |
@@ -3136,11 +3059,18 @@ static ssize_t ext4_ext_direct_IO(int rw, struct kiocb *iocb, | |||
3136 | 3059 | ||
3137 | BUG_ON(iocb->private == NULL); | 3060 | BUG_ON(iocb->private == NULL); |
3138 | 3061 | ||
3062 | /* | ||
3063 | * Make all waiters for direct IO properly wait also for extent | ||
3064 | * conversion. This also disallows race between truncate() and | ||
3065 | * overwrite DIO as i_dio_count needs to be incremented under i_mutex. | ||
3066 | */ | ||
3067 | if (rw == WRITE) | ||
3068 | atomic_inc(&inode->i_dio_count); | ||
3069 | |||
3139 | /* If we do a overwrite dio, i_mutex locking can be released */ | 3070 | /* If we do a overwrite dio, i_mutex locking can be released */ |
3140 | overwrite = *((int *)iocb->private); | 3071 | overwrite = *((int *)iocb->private); |
3141 | 3072 | ||
3142 | if (overwrite) { | 3073 | if (overwrite) { |
3143 | atomic_inc(&inode->i_dio_count); | ||
3144 | down_read(&EXT4_I(inode)->i_data_sem); | 3074 | down_read(&EXT4_I(inode)->i_data_sem); |
3145 | mutex_unlock(&inode->i_mutex); | 3075 | mutex_unlock(&inode->i_mutex); |
3146 | } | 3076 | } |
@@ -3167,13 +3097,16 @@ static ssize_t ext4_ext_direct_IO(int rw, struct kiocb *iocb, | |||
3167 | iocb->private = NULL; | 3097 | iocb->private = NULL; |
3168 | ext4_inode_aio_set(inode, NULL); | 3098 | ext4_inode_aio_set(inode, NULL); |
3169 | if (!is_sync_kiocb(iocb)) { | 3099 | if (!is_sync_kiocb(iocb)) { |
3170 | ext4_io_end_t *io_end = ext4_init_io_end(inode, GFP_NOFS); | 3100 | io_end = ext4_init_io_end(inode, GFP_NOFS); |
3171 | if (!io_end) { | 3101 | if (!io_end) { |
3172 | ret = -ENOMEM; | 3102 | ret = -ENOMEM; |
3173 | goto retake_lock; | 3103 | goto retake_lock; |
3174 | } | 3104 | } |
3175 | io_end->flag |= EXT4_IO_END_DIRECT; | 3105 | io_end->flag |= EXT4_IO_END_DIRECT; |
3176 | iocb->private = io_end; | 3106 | /* |
3107 | * Grab reference for DIO. Will be dropped in ext4_end_io_dio() | ||
3108 | */ | ||
3109 | iocb->private = ext4_get_io_end(io_end); | ||
3177 | /* | 3110 | /* |
3178 | * we save the io structure for current async direct | 3111 | * we save the io structure for current async direct |
3179 | * IO, so that later ext4_map_blocks() could flag the | 3112 | * IO, so that later ext4_map_blocks() could flag the |
@@ -3197,33 +3130,42 @@ static ssize_t ext4_ext_direct_IO(int rw, struct kiocb *iocb, | |||
3197 | NULL, | 3130 | NULL, |
3198 | dio_flags); | 3131 | dio_flags); |
3199 | 3132 | ||
3200 | if (iocb->private) | ||
3201 | ext4_inode_aio_set(inode, NULL); | ||
3202 | /* | 3133 | /* |
3203 | * The io_end structure takes a reference to the inode, that | 3134 | * Put our reference to io_end. This can free the io_end structure e.g. |
3204 | * structure needs to be destroyed and the reference to the | 3135 | * in sync IO case or in case of error. It can even perform extent |
3205 | * inode need to be dropped, when IO is complete, even with 0 | 3136 | * conversion if all bios we submitted finished before we got here. |
3206 | * byte write, or failed. | 3137 | * Note that in that case iocb->private can be already set to NULL |
3207 | * | 3138 | * here. |
3208 | * In the successful AIO DIO case, the io_end structure will | ||
3209 | * be destroyed and the reference to the inode will be dropped | ||
3210 | * after the end_io call back function is called. | ||
3211 | * | ||
3212 | * In the case there is 0 byte write, or error case, since VFS | ||
3213 | * direct IO won't invoke the end_io call back function, we | ||
3214 | * need to free the end_io structure here. | ||
3215 | */ | 3139 | */ |
3216 | if (ret != -EIOCBQUEUED && ret <= 0 && iocb->private) { | 3140 | if (io_end) { |
3217 | ext4_free_io_end(iocb->private); | 3141 | ext4_inode_aio_set(inode, NULL); |
3218 | iocb->private = NULL; | 3142 | ext4_put_io_end(io_end); |
3219 | } else if (ret > 0 && !overwrite && ext4_test_inode_state(inode, | 3143 | /* |
3144 | * When no IO was submitted ext4_end_io_dio() was not | ||
3145 | * called so we have to put iocb's reference. | ||
3146 | */ | ||
3147 | if (ret <= 0 && ret != -EIOCBQUEUED && iocb->private) { | ||
3148 | WARN_ON(iocb->private != io_end); | ||
3149 | WARN_ON(io_end->flag & EXT4_IO_END_UNWRITTEN); | ||
3150 | WARN_ON(io_end->iocb); | ||
3151 | /* | ||
3152 | * Generic code already did inode_dio_done() so we | ||
3153 | * have to clear EXT4_IO_END_DIRECT to not do it for | ||
3154 | * the second time. | ||
3155 | */ | ||
3156 | io_end->flag = 0; | ||
3157 | ext4_put_io_end(io_end); | ||
3158 | iocb->private = NULL; | ||
3159 | } | ||
3160 | } | ||
3161 | if (ret > 0 && !overwrite && ext4_test_inode_state(inode, | ||
3220 | EXT4_STATE_DIO_UNWRITTEN)) { | 3162 | EXT4_STATE_DIO_UNWRITTEN)) { |
3221 | int err; | 3163 | int err; |
3222 | /* | 3164 | /* |
3223 | * for non AIO case, since the IO is already | 3165 | * for non AIO case, since the IO is already |
3224 | * completed, we could do the conversion right here | 3166 | * completed, we could do the conversion right here |
3225 | */ | 3167 | */ |
3226 | err = ext4_convert_unwritten_extents(inode, | 3168 | err = ext4_convert_unwritten_extents(NULL, inode, |
3227 | offset, ret); | 3169 | offset, ret); |
3228 | if (err < 0) | 3170 | if (err < 0) |
3229 | ret = err; | 3171 | ret = err; |
@@ -3231,9 +3173,10 @@ static ssize_t ext4_ext_direct_IO(int rw, struct kiocb *iocb, | |||
3231 | } | 3173 | } |
3232 | 3174 | ||
3233 | retake_lock: | 3175 | retake_lock: |
3176 | if (rw == WRITE) | ||
3177 | inode_dio_done(inode); | ||
3234 | /* take i_mutex locking again if we do a ovewrite dio */ | 3178 | /* take i_mutex locking again if we do a ovewrite dio */ |
3235 | if (overwrite) { | 3179 | if (overwrite) { |
3236 | inode_dio_done(inode); | ||
3237 | up_read(&EXT4_I(inode)->i_data_sem); | 3180 | up_read(&EXT4_I(inode)->i_data_sem); |
3238 | mutex_lock(&inode->i_mutex); | 3181 | mutex_lock(&inode->i_mutex); |
3239 | } | 3182 | } |
@@ -3292,6 +3235,7 @@ static const struct address_space_operations ext4_aops = { | |||
3292 | .readpage = ext4_readpage, | 3235 | .readpage = ext4_readpage, |
3293 | .readpages = ext4_readpages, | 3236 | .readpages = ext4_readpages, |
3294 | .writepage = ext4_writepage, | 3237 | .writepage = ext4_writepage, |
3238 | .writepages = ext4_writepages, | ||
3295 | .write_begin = ext4_write_begin, | 3239 | .write_begin = ext4_write_begin, |
3296 | .write_end = ext4_write_end, | 3240 | .write_end = ext4_write_end, |
3297 | .bmap = ext4_bmap, | 3241 | .bmap = ext4_bmap, |
@@ -3307,6 +3251,7 @@ static const struct address_space_operations ext4_journalled_aops = { | |||
3307 | .readpage = ext4_readpage, | 3251 | .readpage = ext4_readpage, |
3308 | .readpages = ext4_readpages, | 3252 | .readpages = ext4_readpages, |
3309 | .writepage = ext4_writepage, | 3253 | .writepage = ext4_writepage, |
3254 | .writepages = ext4_writepages, | ||
3310 | .write_begin = ext4_write_begin, | 3255 | .write_begin = ext4_write_begin, |
3311 | .write_end = ext4_journalled_write_end, | 3256 | .write_end = ext4_journalled_write_end, |
3312 | .set_page_dirty = ext4_journalled_set_page_dirty, | 3257 | .set_page_dirty = ext4_journalled_set_page_dirty, |
@@ -3322,7 +3267,7 @@ static const struct address_space_operations ext4_da_aops = { | |||
3322 | .readpage = ext4_readpage, | 3267 | .readpage = ext4_readpage, |
3323 | .readpages = ext4_readpages, | 3268 | .readpages = ext4_readpages, |
3324 | .writepage = ext4_writepage, | 3269 | .writepage = ext4_writepage, |
3325 | .writepages = ext4_da_writepages, | 3270 | .writepages = ext4_writepages, |
3326 | .write_begin = ext4_da_write_begin, | 3271 | .write_begin = ext4_da_write_begin, |
3327 | .write_end = ext4_da_write_end, | 3272 | .write_end = ext4_da_write_end, |
3328 | .bmap = ext4_bmap, | 3273 | .bmap = ext4_bmap, |
@@ -3355,89 +3300,56 @@ void ext4_set_aops(struct inode *inode) | |||
3355 | inode->i_mapping->a_ops = &ext4_aops; | 3300 | inode->i_mapping->a_ops = &ext4_aops; |
3356 | } | 3301 | } |
3357 | 3302 | ||
3358 | |||
3359 | /* | 3303 | /* |
3360 | * ext4_discard_partial_page_buffers() | 3304 | * ext4_block_truncate_page() zeroes out a mapping from file offset `from' |
3361 | * Wrapper function for ext4_discard_partial_page_buffers_no_lock. | 3305 | * up to the end of the block which corresponds to `from'. |
3362 | * This function finds and locks the page containing the offset | 3306 | * This required during truncate. We need to physically zero the tail end |
3363 | * "from" and passes it to ext4_discard_partial_page_buffers_no_lock. | 3307 | * of that block so it doesn't yield old data if the file is later grown. |
3364 | * Calling functions that already have the page locked should call | ||
3365 | * ext4_discard_partial_page_buffers_no_lock directly. | ||
3366 | */ | 3308 | */ |
3367 | int ext4_discard_partial_page_buffers(handle_t *handle, | 3309 | int ext4_block_truncate_page(handle_t *handle, |
3368 | struct address_space *mapping, loff_t from, | 3310 | struct address_space *mapping, loff_t from) |
3369 | loff_t length, int flags) | ||
3370 | { | 3311 | { |
3312 | unsigned offset = from & (PAGE_CACHE_SIZE-1); | ||
3313 | unsigned length; | ||
3314 | unsigned blocksize; | ||
3371 | struct inode *inode = mapping->host; | 3315 | struct inode *inode = mapping->host; |
3372 | struct page *page; | ||
3373 | int err = 0; | ||
3374 | 3316 | ||
3375 | page = find_or_create_page(mapping, from >> PAGE_CACHE_SHIFT, | 3317 | blocksize = inode->i_sb->s_blocksize; |
3376 | mapping_gfp_mask(mapping) & ~__GFP_FS); | 3318 | length = blocksize - (offset & (blocksize - 1)); |
3377 | if (!page) | ||
3378 | return -ENOMEM; | ||
3379 | |||
3380 | err = ext4_discard_partial_page_buffers_no_lock(handle, inode, page, | ||
3381 | from, length, flags); | ||
3382 | 3319 | ||
3383 | unlock_page(page); | 3320 | return ext4_block_zero_page_range(handle, mapping, from, length); |
3384 | page_cache_release(page); | ||
3385 | return err; | ||
3386 | } | 3321 | } |
3387 | 3322 | ||
3388 | /* | 3323 | /* |
3389 | * ext4_discard_partial_page_buffers_no_lock() | 3324 | * ext4_block_zero_page_range() zeros out a mapping of length 'length' |
3390 | * Zeros a page range of length 'length' starting from offset 'from'. | 3325 | * starting from file offset 'from'. The range to be zero'd must |
3391 | * Buffer heads that correspond to the block aligned regions of the | 3326 | * be contained with in one block. If the specified range exceeds |
3392 | * zeroed range will be unmapped. Unblock aligned regions | 3327 | * the end of the block it will be shortened to end of the block |
3393 | * will have the corresponding buffer head mapped if needed so that | 3328 | * that cooresponds to 'from' |
3394 | * that region of the page can be updated with the partial zero out. | ||
3395 | * | ||
3396 | * This function assumes that the page has already been locked. The | ||
3397 | * The range to be discarded must be contained with in the given page. | ||
3398 | * If the specified range exceeds the end of the page it will be shortened | ||
3399 | * to the end of the page that corresponds to 'from'. This function is | ||
3400 | * appropriate for updating a page and it buffer heads to be unmapped and | ||
3401 | * zeroed for blocks that have been either released, or are going to be | ||
3402 | * released. | ||
3403 | * | ||
3404 | * handle: The journal handle | ||
3405 | * inode: The files inode | ||
3406 | * page: A locked page that contains the offset "from" | ||
3407 | * from: The starting byte offset (from the beginning of the file) | ||
3408 | * to begin discarding | ||
3409 | * len: The length of bytes to discard | ||
3410 | * flags: Optional flags that may be used: | ||
3411 | * | ||
3412 | * EXT4_DISCARD_PARTIAL_PG_ZERO_UNMAPPED | ||
3413 | * Only zero the regions of the page whose buffer heads | ||
3414 | * have already been unmapped. This flag is appropriate | ||
3415 | * for updating the contents of a page whose blocks may | ||
3416 | * have already been released, and we only want to zero | ||
3417 | * out the regions that correspond to those released blocks. | ||
3418 | * | ||
3419 | * Returns zero on success or negative on failure. | ||
3420 | */ | 3329 | */ |
3421 | static int ext4_discard_partial_page_buffers_no_lock(handle_t *handle, | 3330 | int ext4_block_zero_page_range(handle_t *handle, |
3422 | struct inode *inode, struct page *page, loff_t from, | 3331 | struct address_space *mapping, loff_t from, loff_t length) |
3423 | loff_t length, int flags) | ||
3424 | { | 3332 | { |
3425 | ext4_fsblk_t index = from >> PAGE_CACHE_SHIFT; | 3333 | ext4_fsblk_t index = from >> PAGE_CACHE_SHIFT; |
3426 | unsigned int offset = from & (PAGE_CACHE_SIZE-1); | 3334 | unsigned offset = from & (PAGE_CACHE_SIZE-1); |
3427 | unsigned int blocksize, max, pos; | 3335 | unsigned blocksize, max, pos; |
3428 | ext4_lblk_t iblock; | 3336 | ext4_lblk_t iblock; |
3337 | struct inode *inode = mapping->host; | ||
3429 | struct buffer_head *bh; | 3338 | struct buffer_head *bh; |
3339 | struct page *page; | ||
3430 | int err = 0; | 3340 | int err = 0; |
3431 | 3341 | ||
3432 | blocksize = inode->i_sb->s_blocksize; | 3342 | page = find_or_create_page(mapping, from >> PAGE_CACHE_SHIFT, |
3433 | max = PAGE_CACHE_SIZE - offset; | 3343 | mapping_gfp_mask(mapping) & ~__GFP_FS); |
3344 | if (!page) | ||
3345 | return -ENOMEM; | ||
3434 | 3346 | ||
3435 | if (index != page->index) | 3347 | blocksize = inode->i_sb->s_blocksize; |
3436 | return -EINVAL; | 3348 | max = blocksize - (offset & (blocksize - 1)); |
3437 | 3349 | ||
3438 | /* | 3350 | /* |
3439 | * correct length if it does not fall between | 3351 | * correct length if it does not fall between |
3440 | * 'from' and the end of the page | 3352 | * 'from' and the end of the block |
3441 | */ | 3353 | */ |
3442 | if (length > max || length < 0) | 3354 | if (length > max || length < 0) |
3443 | length = max; | 3355 | length = max; |
@@ -3455,106 +3367,91 @@ static int ext4_discard_partial_page_buffers_no_lock(handle_t *handle, | |||
3455 | iblock++; | 3367 | iblock++; |
3456 | pos += blocksize; | 3368 | pos += blocksize; |
3457 | } | 3369 | } |
3458 | 3370 | if (buffer_freed(bh)) { | |
3459 | pos = offset; | 3371 | BUFFER_TRACE(bh, "freed: skip"); |
3460 | while (pos < offset + length) { | 3372 | goto unlock; |
3461 | unsigned int end_of_block, range_to_discard; | 3373 | } |
3462 | 3374 | if (!buffer_mapped(bh)) { | |
3463 | err = 0; | 3375 | BUFFER_TRACE(bh, "unmapped"); |
3464 | 3376 | ext4_get_block(inode, iblock, bh, 0); | |
3465 | /* The length of space left to zero and unmap */ | 3377 | /* unmapped? It's a hole - nothing to do */ |
3466 | range_to_discard = offset + length - pos; | ||
3467 | |||
3468 | /* The length of space until the end of the block */ | ||
3469 | end_of_block = blocksize - (pos & (blocksize-1)); | ||
3470 | |||
3471 | /* | ||
3472 | * Do not unmap or zero past end of block | ||
3473 | * for this buffer head | ||
3474 | */ | ||
3475 | if (range_to_discard > end_of_block) | ||
3476 | range_to_discard = end_of_block; | ||
3477 | |||
3478 | |||
3479 | /* | ||
3480 | * Skip this buffer head if we are only zeroing unampped | ||
3481 | * regions of the page | ||
3482 | */ | ||
3483 | if (flags & EXT4_DISCARD_PARTIAL_PG_ZERO_UNMAPPED && | ||
3484 | buffer_mapped(bh)) | ||
3485 | goto next; | ||
3486 | |||
3487 | /* If the range is block aligned, unmap */ | ||
3488 | if (range_to_discard == blocksize) { | ||
3489 | clear_buffer_dirty(bh); | ||
3490 | bh->b_bdev = NULL; | ||
3491 | clear_buffer_mapped(bh); | ||
3492 | clear_buffer_req(bh); | ||
3493 | clear_buffer_new(bh); | ||
3494 | clear_buffer_delay(bh); | ||
3495 | clear_buffer_unwritten(bh); | ||
3496 | clear_buffer_uptodate(bh); | ||
3497 | zero_user(page, pos, range_to_discard); | ||
3498 | BUFFER_TRACE(bh, "Buffer discarded"); | ||
3499 | goto next; | ||
3500 | } | ||
3501 | |||
3502 | /* | ||
3503 | * If this block is not completely contained in the range | ||
3504 | * to be discarded, then it is not going to be released. Because | ||
3505 | * we need to keep this block, we need to make sure this part | ||
3506 | * of the page is uptodate before we modify it by writeing | ||
3507 | * partial zeros on it. | ||
3508 | */ | ||
3509 | if (!buffer_mapped(bh)) { | 3378 | if (!buffer_mapped(bh)) { |
3510 | /* | 3379 | BUFFER_TRACE(bh, "still unmapped"); |
3511 | * Buffer head must be mapped before we can read | 3380 | goto unlock; |
3512 | * from the block | ||
3513 | */ | ||
3514 | BUFFER_TRACE(bh, "unmapped"); | ||
3515 | ext4_get_block(inode, iblock, bh, 0); | ||
3516 | /* unmapped? It's a hole - nothing to do */ | ||
3517 | if (!buffer_mapped(bh)) { | ||
3518 | BUFFER_TRACE(bh, "still unmapped"); | ||
3519 | goto next; | ||
3520 | } | ||
3521 | } | 3381 | } |
3382 | } | ||
3522 | 3383 | ||
3523 | /* Ok, it's mapped. Make sure it's up-to-date */ | 3384 | /* Ok, it's mapped. Make sure it's up-to-date */ |
3524 | if (PageUptodate(page)) | 3385 | if (PageUptodate(page)) |
3525 | set_buffer_uptodate(bh); | 3386 | set_buffer_uptodate(bh); |
3526 | 3387 | ||
3527 | if (!buffer_uptodate(bh)) { | 3388 | if (!buffer_uptodate(bh)) { |
3528 | err = -EIO; | 3389 | err = -EIO; |
3529 | ll_rw_block(READ, 1, &bh); | 3390 | ll_rw_block(READ, 1, &bh); |
3530 | wait_on_buffer(bh); | 3391 | wait_on_buffer(bh); |
3531 | /* Uhhuh. Read error. Complain and punt.*/ | 3392 | /* Uhhuh. Read error. Complain and punt. */ |
3532 | if (!buffer_uptodate(bh)) | 3393 | if (!buffer_uptodate(bh)) |
3533 | goto next; | 3394 | goto unlock; |
3534 | } | 3395 | } |
3396 | if (ext4_should_journal_data(inode)) { | ||
3397 | BUFFER_TRACE(bh, "get write access"); | ||
3398 | err = ext4_journal_get_write_access(handle, bh); | ||
3399 | if (err) | ||
3400 | goto unlock; | ||
3401 | } | ||
3402 | zero_user(page, offset, length); | ||
3403 | BUFFER_TRACE(bh, "zeroed end of block"); | ||
3535 | 3404 | ||
3536 | if (ext4_should_journal_data(inode)) { | 3405 | if (ext4_should_journal_data(inode)) { |
3537 | BUFFER_TRACE(bh, "get write access"); | 3406 | err = ext4_handle_dirty_metadata(handle, inode, bh); |
3538 | err = ext4_journal_get_write_access(handle, bh); | 3407 | } else { |
3539 | if (err) | 3408 | err = 0; |
3540 | goto next; | 3409 | mark_buffer_dirty(bh); |
3541 | } | 3410 | if (ext4_test_inode_state(inode, EXT4_STATE_ORDERED_MODE)) |
3411 | err = ext4_jbd2_file_inode(handle, inode); | ||
3412 | } | ||
3542 | 3413 | ||
3543 | zero_user(page, pos, range_to_discard); | 3414 | unlock: |
3415 | unlock_page(page); | ||
3416 | page_cache_release(page); | ||
3417 | return err; | ||
3418 | } | ||
3544 | 3419 | ||
3545 | err = 0; | 3420 | int ext4_zero_partial_blocks(handle_t *handle, struct inode *inode, |
3546 | if (ext4_should_journal_data(inode)) { | 3421 | loff_t lstart, loff_t length) |
3547 | err = ext4_handle_dirty_metadata(handle, inode, bh); | 3422 | { |
3548 | } else | 3423 | struct super_block *sb = inode->i_sb; |
3549 | mark_buffer_dirty(bh); | 3424 | struct address_space *mapping = inode->i_mapping; |
3425 | unsigned partial_start, partial_end; | ||
3426 | ext4_fsblk_t start, end; | ||
3427 | loff_t byte_end = (lstart + length - 1); | ||
3428 | int err = 0; | ||
3550 | 3429 | ||
3551 | BUFFER_TRACE(bh, "Partial buffer zeroed"); | 3430 | partial_start = lstart & (sb->s_blocksize - 1); |
3552 | next: | 3431 | partial_end = byte_end & (sb->s_blocksize - 1); |
3553 | bh = bh->b_this_page; | ||
3554 | iblock++; | ||
3555 | pos += range_to_discard; | ||
3556 | } | ||
3557 | 3432 | ||
3433 | start = lstart >> sb->s_blocksize_bits; | ||
3434 | end = byte_end >> sb->s_blocksize_bits; | ||
3435 | |||
3436 | /* Handle partial zero within the single block */ | ||
3437 | if (start == end && | ||
3438 | (partial_start || (partial_end != sb->s_blocksize - 1))) { | ||
3439 | err = ext4_block_zero_page_range(handle, mapping, | ||
3440 | lstart, length); | ||
3441 | return err; | ||
3442 | } | ||
3443 | /* Handle partial zero out on the start of the range */ | ||
3444 | if (partial_start) { | ||
3445 | err = ext4_block_zero_page_range(handle, mapping, | ||
3446 | lstart, sb->s_blocksize); | ||
3447 | if (err) | ||
3448 | return err; | ||
3449 | } | ||
3450 | /* Handle partial zero out on the end of the range */ | ||
3451 | if (partial_end != sb->s_blocksize - 1) | ||
3452 | err = ext4_block_zero_page_range(handle, mapping, | ||
3453 | byte_end - partial_end, | ||
3454 | partial_end + 1); | ||
3558 | return err; | 3455 | return err; |
3559 | } | 3456 | } |
3560 | 3457 | ||
@@ -3580,14 +3477,12 @@ int ext4_can_truncate(struct inode *inode) | |||
3580 | * Returns: 0 on success or negative on failure | 3477 | * Returns: 0 on success or negative on failure |
3581 | */ | 3478 | */ |
3582 | 3479 | ||
3583 | int ext4_punch_hole(struct file *file, loff_t offset, loff_t length) | 3480 | int ext4_punch_hole(struct inode *inode, loff_t offset, loff_t length) |
3584 | { | 3481 | { |
3585 | struct inode *inode = file_inode(file); | ||
3586 | struct super_block *sb = inode->i_sb; | 3482 | struct super_block *sb = inode->i_sb; |
3587 | ext4_lblk_t first_block, stop_block; | 3483 | ext4_lblk_t first_block, stop_block; |
3588 | struct address_space *mapping = inode->i_mapping; | 3484 | struct address_space *mapping = inode->i_mapping; |
3589 | loff_t first_page, last_page, page_len; | 3485 | loff_t first_block_offset, last_block_offset; |
3590 | loff_t first_page_offset, last_page_offset; | ||
3591 | handle_t *handle; | 3486 | handle_t *handle; |
3592 | unsigned int credits; | 3487 | unsigned int credits; |
3593 | int ret = 0; | 3488 | int ret = 0; |
@@ -3638,23 +3533,16 @@ int ext4_punch_hole(struct file *file, loff_t offset, loff_t length) | |||
3638 | offset; | 3533 | offset; |
3639 | } | 3534 | } |
3640 | 3535 | ||
3641 | first_page = (offset + PAGE_CACHE_SIZE - 1) >> PAGE_CACHE_SHIFT; | 3536 | first_block_offset = round_up(offset, sb->s_blocksize); |
3642 | last_page = (offset + length) >> PAGE_CACHE_SHIFT; | 3537 | last_block_offset = round_down((offset + length), sb->s_blocksize) - 1; |
3643 | 3538 | ||
3644 | first_page_offset = first_page << PAGE_CACHE_SHIFT; | 3539 | /* Now release the pages and zero block aligned part of pages*/ |
3645 | last_page_offset = last_page << PAGE_CACHE_SHIFT; | 3540 | if (last_block_offset > first_block_offset) |
3646 | 3541 | truncate_pagecache_range(inode, first_block_offset, | |
3647 | /* Now release the pages */ | 3542 | last_block_offset); |
3648 | if (last_page_offset > first_page_offset) { | ||
3649 | truncate_pagecache_range(inode, first_page_offset, | ||
3650 | last_page_offset - 1); | ||
3651 | } | ||
3652 | 3543 | ||
3653 | /* Wait all existing dio workers, newcomers will block on i_mutex */ | 3544 | /* Wait all existing dio workers, newcomers will block on i_mutex */ |
3654 | ext4_inode_block_unlocked_dio(inode); | 3545 | ext4_inode_block_unlocked_dio(inode); |
3655 | ret = ext4_flush_unwritten_io(inode); | ||
3656 | if (ret) | ||
3657 | goto out_dio; | ||
3658 | inode_dio_wait(inode); | 3546 | inode_dio_wait(inode); |
3659 | 3547 | ||
3660 | if (ext4_test_inode_flag(inode, EXT4_INODE_EXTENTS)) | 3548 | if (ext4_test_inode_flag(inode, EXT4_INODE_EXTENTS)) |
@@ -3668,66 +3556,10 @@ int ext4_punch_hole(struct file *file, loff_t offset, loff_t length) | |||
3668 | goto out_dio; | 3556 | goto out_dio; |
3669 | } | 3557 | } |
3670 | 3558 | ||
3671 | /* | 3559 | ret = ext4_zero_partial_blocks(handle, inode, offset, |
3672 | * Now we need to zero out the non-page-aligned data in the | 3560 | length); |
3673 | * pages at the start and tail of the hole, and unmap the | 3561 | if (ret) |
3674 | * buffer heads for the block aligned regions of the page that | 3562 | goto out_stop; |
3675 | * were completely zeroed. | ||
3676 | */ | ||
3677 | if (first_page > last_page) { | ||
3678 | /* | ||
3679 | * If the file space being truncated is contained | ||
3680 | * within a page just zero out and unmap the middle of | ||
3681 | * that page | ||
3682 | */ | ||
3683 | ret = ext4_discard_partial_page_buffers(handle, | ||
3684 | mapping, offset, length, 0); | ||
3685 | |||
3686 | if (ret) | ||
3687 | goto out_stop; | ||
3688 | } else { | ||
3689 | /* | ||
3690 | * zero out and unmap the partial page that contains | ||
3691 | * the start of the hole | ||
3692 | */ | ||
3693 | page_len = first_page_offset - offset; | ||
3694 | if (page_len > 0) { | ||
3695 | ret = ext4_discard_partial_page_buffers(handle, mapping, | ||
3696 | offset, page_len, 0); | ||
3697 | if (ret) | ||
3698 | goto out_stop; | ||
3699 | } | ||
3700 | |||
3701 | /* | ||
3702 | * zero out and unmap the partial page that contains | ||
3703 | * the end of the hole | ||
3704 | */ | ||
3705 | page_len = offset + length - last_page_offset; | ||
3706 | if (page_len > 0) { | ||
3707 | ret = ext4_discard_partial_page_buffers(handle, mapping, | ||
3708 | last_page_offset, page_len, 0); | ||
3709 | if (ret) | ||
3710 | goto out_stop; | ||
3711 | } | ||
3712 | } | ||
3713 | |||
3714 | /* | ||
3715 | * If i_size is contained in the last page, we need to | ||
3716 | * unmap and zero the partial page after i_size | ||
3717 | */ | ||
3718 | if (inode->i_size >> PAGE_CACHE_SHIFT == last_page && | ||
3719 | inode->i_size % PAGE_CACHE_SIZE != 0) { | ||
3720 | page_len = PAGE_CACHE_SIZE - | ||
3721 | (inode->i_size & (PAGE_CACHE_SIZE - 1)); | ||
3722 | |||
3723 | if (page_len > 0) { | ||
3724 | ret = ext4_discard_partial_page_buffers(handle, | ||
3725 | mapping, inode->i_size, page_len, 0); | ||
3726 | |||
3727 | if (ret) | ||
3728 | goto out_stop; | ||
3729 | } | ||
3730 | } | ||
3731 | 3563 | ||
3732 | first_block = (offset + sb->s_blocksize - 1) >> | 3564 | first_block = (offset + sb->s_blocksize - 1) >> |
3733 | EXT4_BLOCK_SIZE_BITS(sb); | 3565 | EXT4_BLOCK_SIZE_BITS(sb); |
@@ -3803,7 +3635,6 @@ void ext4_truncate(struct inode *inode) | |||
3803 | unsigned int credits; | 3635 | unsigned int credits; |
3804 | handle_t *handle; | 3636 | handle_t *handle; |
3805 | struct address_space *mapping = inode->i_mapping; | 3637 | struct address_space *mapping = inode->i_mapping; |
3806 | loff_t page_len; | ||
3807 | 3638 | ||
3808 | /* | 3639 | /* |
3809 | * There is a possibility that we're either freeing the inode | 3640 | * There is a possibility that we're either freeing the inode |
@@ -3830,12 +3661,6 @@ void ext4_truncate(struct inode *inode) | |||
3830 | return; | 3661 | return; |
3831 | } | 3662 | } |
3832 | 3663 | ||
3833 | /* | ||
3834 | * finish any pending end_io work so we won't run the risk of | ||
3835 | * converting any truncated blocks to initialized later | ||
3836 | */ | ||
3837 | ext4_flush_unwritten_io(inode); | ||
3838 | |||
3839 | if (ext4_test_inode_flag(inode, EXT4_INODE_EXTENTS)) | 3664 | if (ext4_test_inode_flag(inode, EXT4_INODE_EXTENTS)) |
3840 | credits = ext4_writepage_trans_blocks(inode); | 3665 | credits = ext4_writepage_trans_blocks(inode); |
3841 | else | 3666 | else |
@@ -3847,14 +3672,8 @@ void ext4_truncate(struct inode *inode) | |||
3847 | return; | 3672 | return; |
3848 | } | 3673 | } |
3849 | 3674 | ||
3850 | if (inode->i_size % PAGE_CACHE_SIZE != 0) { | 3675 | if (inode->i_size & (inode->i_sb->s_blocksize - 1)) |
3851 | page_len = PAGE_CACHE_SIZE - | 3676 | ext4_block_truncate_page(handle, mapping, inode->i_size); |
3852 | (inode->i_size & (PAGE_CACHE_SIZE - 1)); | ||
3853 | |||
3854 | if (ext4_discard_partial_page_buffers(handle, | ||
3855 | mapping, inode->i_size, page_len, 0)) | ||
3856 | goto out_stop; | ||
3857 | } | ||
3858 | 3677 | ||
3859 | /* | 3678 | /* |
3860 | * We add the inode to the orphan list, so that if this | 3679 | * We add the inode to the orphan list, so that if this |
@@ -4623,7 +4442,8 @@ static void ext4_wait_for_tail_page_commit(struct inode *inode) | |||
4623 | inode->i_size >> PAGE_CACHE_SHIFT); | 4442 | inode->i_size >> PAGE_CACHE_SHIFT); |
4624 | if (!page) | 4443 | if (!page) |
4625 | return; | 4444 | return; |
4626 | ret = __ext4_journalled_invalidatepage(page, offset); | 4445 | ret = __ext4_journalled_invalidatepage(page, offset, |
4446 | PAGE_CACHE_SIZE - offset); | ||
4627 | unlock_page(page); | 4447 | unlock_page(page); |
4628 | page_cache_release(page); | 4448 | page_cache_release(page); |
4629 | if (ret != -EBUSY) | 4449 | if (ret != -EBUSY) |
@@ -4805,7 +4625,7 @@ int ext4_getattr(struct vfsmount *mnt, struct dentry *dentry, | |||
4805 | struct kstat *stat) | 4625 | struct kstat *stat) |
4806 | { | 4626 | { |
4807 | struct inode *inode; | 4627 | struct inode *inode; |
4808 | unsigned long delalloc_blocks; | 4628 | unsigned long long delalloc_blocks; |
4809 | 4629 | ||
4810 | inode = dentry->d_inode; | 4630 | inode = dentry->d_inode; |
4811 | generic_fillattr(inode, stat); | 4631 | generic_fillattr(inode, stat); |
@@ -4823,15 +4643,16 @@ int ext4_getattr(struct vfsmount *mnt, struct dentry *dentry, | |||
4823 | delalloc_blocks = EXT4_C2B(EXT4_SB(inode->i_sb), | 4643 | delalloc_blocks = EXT4_C2B(EXT4_SB(inode->i_sb), |
4824 | EXT4_I(inode)->i_reserved_data_blocks); | 4644 | EXT4_I(inode)->i_reserved_data_blocks); |
4825 | 4645 | ||
4826 | stat->blocks += (delalloc_blocks << inode->i_sb->s_blocksize_bits)>>9; | 4646 | stat->blocks += delalloc_blocks << (inode->i_sb->s_blocksize_bits-9); |
4827 | return 0; | 4647 | return 0; |
4828 | } | 4648 | } |
4829 | 4649 | ||
4830 | static int ext4_index_trans_blocks(struct inode *inode, int nrblocks, int chunk) | 4650 | static int ext4_index_trans_blocks(struct inode *inode, int lblocks, |
4651 | int pextents) | ||
4831 | { | 4652 | { |
4832 | if (!(ext4_test_inode_flag(inode, EXT4_INODE_EXTENTS))) | 4653 | if (!(ext4_test_inode_flag(inode, EXT4_INODE_EXTENTS))) |
4833 | return ext4_ind_trans_blocks(inode, nrblocks, chunk); | 4654 | return ext4_ind_trans_blocks(inode, lblocks); |
4834 | return ext4_ext_index_trans_blocks(inode, nrblocks, chunk); | 4655 | return ext4_ext_index_trans_blocks(inode, pextents); |
4835 | } | 4656 | } |
4836 | 4657 | ||
4837 | /* | 4658 | /* |
@@ -4845,7 +4666,8 @@ static int ext4_index_trans_blocks(struct inode *inode, int nrblocks, int chunk) | |||
4845 | * | 4666 | * |
4846 | * Also account for superblock, inode, quota and xattr blocks | 4667 | * Also account for superblock, inode, quota and xattr blocks |
4847 | */ | 4668 | */ |
4848 | static int ext4_meta_trans_blocks(struct inode *inode, int nrblocks, int chunk) | 4669 | static int ext4_meta_trans_blocks(struct inode *inode, int lblocks, |
4670 | int pextents) | ||
4849 | { | 4671 | { |
4850 | ext4_group_t groups, ngroups = ext4_get_groups_count(inode->i_sb); | 4672 | ext4_group_t groups, ngroups = ext4_get_groups_count(inode->i_sb); |
4851 | int gdpblocks; | 4673 | int gdpblocks; |
@@ -4853,14 +4675,10 @@ static int ext4_meta_trans_blocks(struct inode *inode, int nrblocks, int chunk) | |||
4853 | int ret = 0; | 4675 | int ret = 0; |
4854 | 4676 | ||
4855 | /* | 4677 | /* |
4856 | * How many index blocks need to touch to modify nrblocks? | 4678 | * How many index blocks need to touch to map @lblocks logical blocks |
4857 | * The "Chunk" flag indicating whether the nrblocks is | 4679 | * to @pextents physical extents? |
4858 | * physically contiguous on disk | ||
4859 | * | ||
4860 | * For Direct IO and fallocate, they calls get_block to allocate | ||
4861 | * one single extent at a time, so they could set the "Chunk" flag | ||
4862 | */ | 4680 | */ |
4863 | idxblocks = ext4_index_trans_blocks(inode, nrblocks, chunk); | 4681 | idxblocks = ext4_index_trans_blocks(inode, lblocks, pextents); |
4864 | 4682 | ||
4865 | ret = idxblocks; | 4683 | ret = idxblocks; |
4866 | 4684 | ||
@@ -4868,12 +4686,7 @@ static int ext4_meta_trans_blocks(struct inode *inode, int nrblocks, int chunk) | |||
4868 | * Now let's see how many group bitmaps and group descriptors need | 4686 | * Now let's see how many group bitmaps and group descriptors need |
4869 | * to account | 4687 | * to account |
4870 | */ | 4688 | */ |
4871 | groups = idxblocks; | 4689 | groups = idxblocks + pextents; |
4872 | if (chunk) | ||
4873 | groups += 1; | ||
4874 | else | ||
4875 | groups += nrblocks; | ||
4876 | |||
4877 | gdpblocks = groups; | 4690 | gdpblocks = groups; |
4878 | if (groups > ngroups) | 4691 | if (groups > ngroups) |
4879 | groups = ngroups; | 4692 | groups = ngroups; |
@@ -4904,7 +4717,7 @@ int ext4_writepage_trans_blocks(struct inode *inode) | |||
4904 | int bpp = ext4_journal_blocks_per_page(inode); | 4717 | int bpp = ext4_journal_blocks_per_page(inode); |
4905 | int ret; | 4718 | int ret; |
4906 | 4719 | ||
4907 | ret = ext4_meta_trans_blocks(inode, bpp, 0); | 4720 | ret = ext4_meta_trans_blocks(inode, bpp, bpp); |
4908 | 4721 | ||
4909 | /* Account for data blocks for journalled mode */ | 4722 | /* Account for data blocks for journalled mode */ |
4910 | if (ext4_should_journal_data(inode)) | 4723 | if (ext4_should_journal_data(inode)) |
diff --git a/fs/ext4/ioctl.c b/fs/ext4/ioctl.c index 9491ac0590f7..c0427e2f6648 100644 --- a/fs/ext4/ioctl.c +++ b/fs/ext4/ioctl.c | |||
@@ -77,8 +77,10 @@ static void swap_inode_data(struct inode *inode1, struct inode *inode2) | |||
77 | memswap(ei1->i_data, ei2->i_data, sizeof(ei1->i_data)); | 77 | memswap(ei1->i_data, ei2->i_data, sizeof(ei1->i_data)); |
78 | memswap(&ei1->i_flags, &ei2->i_flags, sizeof(ei1->i_flags)); | 78 | memswap(&ei1->i_flags, &ei2->i_flags, sizeof(ei1->i_flags)); |
79 | memswap(&ei1->i_disksize, &ei2->i_disksize, sizeof(ei1->i_disksize)); | 79 | memswap(&ei1->i_disksize, &ei2->i_disksize, sizeof(ei1->i_disksize)); |
80 | memswap(&ei1->i_es_tree, &ei2->i_es_tree, sizeof(ei1->i_es_tree)); | 80 | ext4_es_remove_extent(inode1, 0, EXT_MAX_BLOCKS); |
81 | memswap(&ei1->i_es_lru_nr, &ei2->i_es_lru_nr, sizeof(ei1->i_es_lru_nr)); | 81 | ext4_es_remove_extent(inode2, 0, EXT_MAX_BLOCKS); |
82 | ext4_es_lru_del(inode1); | ||
83 | ext4_es_lru_del(inode2); | ||
82 | 84 | ||
83 | isize = i_size_read(inode1); | 85 | isize = i_size_read(inode1); |
84 | i_size_write(inode1, i_size_read(inode2)); | 86 | i_size_write(inode1, i_size_read(inode2)); |
diff --git a/fs/ext4/mballoc.c b/fs/ext4/mballoc.c index def84082a9a9..4bbbf13bd743 100644 --- a/fs/ext4/mballoc.c +++ b/fs/ext4/mballoc.c | |||
@@ -2105,6 +2105,7 @@ repeat: | |||
2105 | group = ac->ac_g_ex.fe_group; | 2105 | group = ac->ac_g_ex.fe_group; |
2106 | 2106 | ||
2107 | for (i = 0; i < ngroups; group++, i++) { | 2107 | for (i = 0; i < ngroups; group++, i++) { |
2108 | cond_resched(); | ||
2108 | /* | 2109 | /* |
2109 | * Artificially restricted ngroups for non-extent | 2110 | * Artificially restricted ngroups for non-extent |
2110 | * files makes group > ngroups possible on first loop. | 2111 | * files makes group > ngroups possible on first loop. |
@@ -4405,17 +4406,20 @@ ext4_fsblk_t ext4_mb_new_blocks(handle_t *handle, | |||
4405 | repeat: | 4406 | repeat: |
4406 | /* allocate space in core */ | 4407 | /* allocate space in core */ |
4407 | *errp = ext4_mb_regular_allocator(ac); | 4408 | *errp = ext4_mb_regular_allocator(ac); |
4408 | if (*errp) { | 4409 | if (*errp) |
4409 | ext4_discard_allocated_blocks(ac); | 4410 | goto discard_and_exit; |
4410 | goto errout; | ||
4411 | } | ||
4412 | 4411 | ||
4413 | /* as we've just preallocated more space than | 4412 | /* as we've just preallocated more space than |
4414 | * user requested orinally, we store allocated | 4413 | * user requested originally, we store allocated |
4415 | * space in a special descriptor */ | 4414 | * space in a special descriptor */ |
4416 | if (ac->ac_status == AC_STATUS_FOUND && | 4415 | if (ac->ac_status == AC_STATUS_FOUND && |
4417 | ac->ac_o_ex.fe_len < ac->ac_b_ex.fe_len) | 4416 | ac->ac_o_ex.fe_len < ac->ac_b_ex.fe_len) |
4418 | ext4_mb_new_preallocation(ac); | 4417 | *errp = ext4_mb_new_preallocation(ac); |
4418 | if (*errp) { | ||
4419 | discard_and_exit: | ||
4420 | ext4_discard_allocated_blocks(ac); | ||
4421 | goto errout; | ||
4422 | } | ||
4419 | } | 4423 | } |
4420 | if (likely(ac->ac_status == AC_STATUS_FOUND)) { | 4424 | if (likely(ac->ac_status == AC_STATUS_FOUND)) { |
4421 | *errp = ext4_mb_mark_diskspace_used(ac, handle, reserv_clstrs); | 4425 | *errp = ext4_mb_mark_diskspace_used(ac, handle, reserv_clstrs); |
@@ -4612,10 +4616,11 @@ void ext4_free_blocks(handle_t *handle, struct inode *inode, | |||
4612 | BUG_ON(bh && (count > 1)); | 4616 | BUG_ON(bh && (count > 1)); |
4613 | 4617 | ||
4614 | for (i = 0; i < count; i++) { | 4618 | for (i = 0; i < count; i++) { |
4619 | cond_resched(); | ||
4615 | if (!bh) | 4620 | if (!bh) |
4616 | tbh = sb_find_get_block(inode->i_sb, | 4621 | tbh = sb_find_get_block(inode->i_sb, |
4617 | block + i); | 4622 | block + i); |
4618 | if (unlikely(!tbh)) | 4623 | if (!tbh) |
4619 | continue; | 4624 | continue; |
4620 | ext4_forget(handle, flags & EXT4_FREE_BLOCKS_METADATA, | 4625 | ext4_forget(handle, flags & EXT4_FREE_BLOCKS_METADATA, |
4621 | inode, tbh, block + i); | 4626 | inode, tbh, block + i); |
@@ -4735,11 +4740,16 @@ do_more: | |||
4735 | * blocks being freed are metadata. these blocks shouldn't | 4740 | * blocks being freed are metadata. these blocks shouldn't |
4736 | * be used until this transaction is committed | 4741 | * be used until this transaction is committed |
4737 | */ | 4742 | */ |
4743 | retry: | ||
4738 | new_entry = kmem_cache_alloc(ext4_free_data_cachep, GFP_NOFS); | 4744 | new_entry = kmem_cache_alloc(ext4_free_data_cachep, GFP_NOFS); |
4739 | if (!new_entry) { | 4745 | if (!new_entry) { |
4740 | ext4_mb_unload_buddy(&e4b); | 4746 | /* |
4741 | err = -ENOMEM; | 4747 | * We use a retry loop because |
4742 | goto error_return; | 4748 | * ext4_free_blocks() is not allowed to fail. |
4749 | */ | ||
4750 | cond_resched(); | ||
4751 | congestion_wait(BLK_RW_ASYNC, HZ/50); | ||
4752 | goto retry; | ||
4743 | } | 4753 | } |
4744 | new_entry->efd_start_cluster = bit; | 4754 | new_entry->efd_start_cluster = bit; |
4745 | new_entry->efd_group = block_group; | 4755 | new_entry->efd_group = block_group; |
diff --git a/fs/ext4/move_extent.c b/fs/ext4/move_extent.c index 3dcbf364022f..e86dddbd8296 100644 --- a/fs/ext4/move_extent.c +++ b/fs/ext4/move_extent.c | |||
@@ -912,7 +912,6 @@ move_extent_per_page(struct file *o_filp, struct inode *donor_inode, | |||
912 | struct page *pagep[2] = {NULL, NULL}; | 912 | struct page *pagep[2] = {NULL, NULL}; |
913 | handle_t *handle; | 913 | handle_t *handle; |
914 | ext4_lblk_t orig_blk_offset; | 914 | ext4_lblk_t orig_blk_offset; |
915 | long long offs = orig_page_offset << PAGE_CACHE_SHIFT; | ||
916 | unsigned long blocksize = orig_inode->i_sb->s_blocksize; | 915 | unsigned long blocksize = orig_inode->i_sb->s_blocksize; |
917 | unsigned int w_flags = 0; | 916 | unsigned int w_flags = 0; |
918 | unsigned int tmp_data_size, data_size, replaced_size; | 917 | unsigned int tmp_data_size, data_size, replaced_size; |
@@ -940,8 +939,6 @@ again: | |||
940 | orig_blk_offset = orig_page_offset * blocks_per_page + | 939 | orig_blk_offset = orig_page_offset * blocks_per_page + |
941 | data_offset_in_page; | 940 | data_offset_in_page; |
942 | 941 | ||
943 | offs = (long long)orig_blk_offset << orig_inode->i_blkbits; | ||
944 | |||
945 | /* Calculate data_size */ | 942 | /* Calculate data_size */ |
946 | if ((orig_blk_offset + block_len_in_page - 1) == | 943 | if ((orig_blk_offset + block_len_in_page - 1) == |
947 | ((orig_inode->i_size - 1) >> orig_inode->i_blkbits)) { | 944 | ((orig_inode->i_size - 1) >> orig_inode->i_blkbits)) { |
diff --git a/fs/ext4/namei.c b/fs/ext4/namei.c index 6653fc35ecb7..35f55a0dbc4b 100644 --- a/fs/ext4/namei.c +++ b/fs/ext4/namei.c | |||
@@ -918,11 +918,8 @@ static int htree_dirblock_to_tree(struct file *dir_file, | |||
918 | bh->b_data, bh->b_size, | 918 | bh->b_data, bh->b_size, |
919 | (block<<EXT4_BLOCK_SIZE_BITS(dir->i_sb)) | 919 | (block<<EXT4_BLOCK_SIZE_BITS(dir->i_sb)) |
920 | + ((char *)de - bh->b_data))) { | 920 | + ((char *)de - bh->b_data))) { |
921 | /* On error, skip the f_pos to the next block. */ | 921 | /* silently ignore the rest of the block */ |
922 | dir_file->f_pos = (dir_file->f_pos | | 922 | break; |
923 | (dir->i_sb->s_blocksize - 1)) + 1; | ||
924 | brelse(bh); | ||
925 | return count; | ||
926 | } | 923 | } |
927 | ext4fs_dirhash(de->name, de->name_len, hinfo); | 924 | ext4fs_dirhash(de->name, de->name_len, hinfo); |
928 | if ((hinfo->hash < start_hash) || | 925 | if ((hinfo->hash < start_hash) || |
@@ -2299,6 +2296,45 @@ retry: | |||
2299 | return err; | 2296 | return err; |
2300 | } | 2297 | } |
2301 | 2298 | ||
2299 | static int ext4_tmpfile(struct inode *dir, struct dentry *dentry, umode_t mode) | ||
2300 | { | ||
2301 | handle_t *handle; | ||
2302 | struct inode *inode; | ||
2303 | int err, retries = 0; | ||
2304 | |||
2305 | dquot_initialize(dir); | ||
2306 | |||
2307 | retry: | ||
2308 | inode = ext4_new_inode_start_handle(dir, mode, | ||
2309 | NULL, 0, NULL, | ||
2310 | EXT4_HT_DIR, | ||
2311 | EXT4_MAXQUOTAS_INIT_BLOCKS(dir->i_sb) + | ||
2312 | 4 + EXT4_XATTR_TRANS_BLOCKS); | ||
2313 | handle = ext4_journal_current_handle(); | ||
2314 | err = PTR_ERR(inode); | ||
2315 | if (!IS_ERR(inode)) { | ||
2316 | inode->i_op = &ext4_file_inode_operations; | ||
2317 | inode->i_fop = &ext4_file_operations; | ||
2318 | ext4_set_aops(inode); | ||
2319 | d_tmpfile(dentry, inode); | ||
2320 | err = ext4_orphan_add(handle, inode); | ||
2321 | if (err) | ||
2322 | goto err_drop_inode; | ||
2323 | mark_inode_dirty(inode); | ||
2324 | unlock_new_inode(inode); | ||
2325 | } | ||
2326 | if (handle) | ||
2327 | ext4_journal_stop(handle); | ||
2328 | if (err == -ENOSPC && ext4_should_retry_alloc(dir->i_sb, &retries)) | ||
2329 | goto retry; | ||
2330 | return err; | ||
2331 | err_drop_inode: | ||
2332 | ext4_journal_stop(handle); | ||
2333 | unlock_new_inode(inode); | ||
2334 | iput(inode); | ||
2335 | return err; | ||
2336 | } | ||
2337 | |||
2302 | struct ext4_dir_entry_2 *ext4_init_dot_dotdot(struct inode *inode, | 2338 | struct ext4_dir_entry_2 *ext4_init_dot_dotdot(struct inode *inode, |
2303 | struct ext4_dir_entry_2 *de, | 2339 | struct ext4_dir_entry_2 *de, |
2304 | int blocksize, int csum_size, | 2340 | int blocksize, int csum_size, |
@@ -2906,7 +2942,7 @@ static int ext4_link(struct dentry *old_dentry, | |||
2906 | retry: | 2942 | retry: |
2907 | handle = ext4_journal_start(dir, EXT4_HT_DIR, | 2943 | handle = ext4_journal_start(dir, EXT4_HT_DIR, |
2908 | (EXT4_DATA_TRANS_BLOCKS(dir->i_sb) + | 2944 | (EXT4_DATA_TRANS_BLOCKS(dir->i_sb) + |
2909 | EXT4_INDEX_EXTRA_TRANS_BLOCKS)); | 2945 | EXT4_INDEX_EXTRA_TRANS_BLOCKS) + 1); |
2910 | if (IS_ERR(handle)) | 2946 | if (IS_ERR(handle)) |
2911 | return PTR_ERR(handle); | 2947 | return PTR_ERR(handle); |
2912 | 2948 | ||
@@ -2920,6 +2956,11 @@ retry: | |||
2920 | err = ext4_add_entry(handle, dentry, inode); | 2956 | err = ext4_add_entry(handle, dentry, inode); |
2921 | if (!err) { | 2957 | if (!err) { |
2922 | ext4_mark_inode_dirty(handle, inode); | 2958 | ext4_mark_inode_dirty(handle, inode); |
2959 | /* this can happen only for tmpfile being | ||
2960 | * linked the first time | ||
2961 | */ | ||
2962 | if (inode->i_nlink == 1) | ||
2963 | ext4_orphan_del(handle, inode); | ||
2923 | d_instantiate(dentry, inode); | 2964 | d_instantiate(dentry, inode); |
2924 | } else { | 2965 | } else { |
2925 | drop_nlink(inode); | 2966 | drop_nlink(inode); |
@@ -3172,6 +3213,7 @@ const struct inode_operations ext4_dir_inode_operations = { | |||
3172 | .mkdir = ext4_mkdir, | 3213 | .mkdir = ext4_mkdir, |
3173 | .rmdir = ext4_rmdir, | 3214 | .rmdir = ext4_rmdir, |
3174 | .mknod = ext4_mknod, | 3215 | .mknod = ext4_mknod, |
3216 | .tmpfile = ext4_tmpfile, | ||
3175 | .rename = ext4_rename, | 3217 | .rename = ext4_rename, |
3176 | .setattr = ext4_setattr, | 3218 | .setattr = ext4_setattr, |
3177 | .setxattr = generic_setxattr, | 3219 | .setxattr = generic_setxattr, |
diff --git a/fs/ext4/page-io.c b/fs/ext4/page-io.c index 4acf1f78881b..6625d210fb45 100644 --- a/fs/ext4/page-io.c +++ b/fs/ext4/page-io.c | |||
@@ -25,6 +25,7 @@ | |||
25 | #include <linux/kernel.h> | 25 | #include <linux/kernel.h> |
26 | #include <linux/slab.h> | 26 | #include <linux/slab.h> |
27 | #include <linux/mm.h> | 27 | #include <linux/mm.h> |
28 | #include <linux/ratelimit.h> | ||
28 | 29 | ||
29 | #include "ext4_jbd2.h" | 30 | #include "ext4_jbd2.h" |
30 | #include "xattr.h" | 31 | #include "xattr.h" |
@@ -46,46 +47,121 @@ void ext4_exit_pageio(void) | |||
46 | } | 47 | } |
47 | 48 | ||
48 | /* | 49 | /* |
49 | * This function is called by ext4_evict_inode() to make sure there is | 50 | * Print an buffer I/O error compatible with the fs/buffer.c. This |
50 | * no more pending I/O completion work left to do. | 51 | * provides compatibility with dmesg scrapers that look for a specific |
52 | * buffer I/O error message. We really need a unified error reporting | ||
53 | * structure to userspace ala Digital Unix's uerf system, but it's | ||
54 | * probably not going to happen in my lifetime, due to LKML politics... | ||
51 | */ | 55 | */ |
52 | void ext4_ioend_shutdown(struct inode *inode) | 56 | static void buffer_io_error(struct buffer_head *bh) |
57 | { | ||
58 | char b[BDEVNAME_SIZE]; | ||
59 | printk_ratelimited(KERN_ERR "Buffer I/O error on device %s, logical block %llu\n", | ||
60 | bdevname(bh->b_bdev, b), | ||
61 | (unsigned long long)bh->b_blocknr); | ||
62 | } | ||
63 | |||
64 | static void ext4_finish_bio(struct bio *bio) | ||
53 | { | 65 | { |
54 | wait_queue_head_t *wq = ext4_ioend_wq(inode); | 66 | int i; |
67 | int error = !test_bit(BIO_UPTODATE, &bio->bi_flags); | ||
55 | 68 | ||
56 | wait_event(*wq, (atomic_read(&EXT4_I(inode)->i_ioend_count) == 0)); | 69 | for (i = 0; i < bio->bi_vcnt; i++) { |
57 | /* | 70 | struct bio_vec *bvec = &bio->bi_io_vec[i]; |
58 | * We need to make sure the work structure is finished being | 71 | struct page *page = bvec->bv_page; |
59 | * used before we let the inode get destroyed. | 72 | struct buffer_head *bh, *head; |
60 | */ | 73 | unsigned bio_start = bvec->bv_offset; |
61 | if (work_pending(&EXT4_I(inode)->i_unwritten_work)) | 74 | unsigned bio_end = bio_start + bvec->bv_len; |
62 | cancel_work_sync(&EXT4_I(inode)->i_unwritten_work); | 75 | unsigned under_io = 0; |
76 | unsigned long flags; | ||
77 | |||
78 | if (!page) | ||
79 | continue; | ||
80 | |||
81 | if (error) { | ||
82 | SetPageError(page); | ||
83 | set_bit(AS_EIO, &page->mapping->flags); | ||
84 | } | ||
85 | bh = head = page_buffers(page); | ||
86 | /* | ||
87 | * We check all buffers in the page under BH_Uptodate_Lock | ||
88 | * to avoid races with other end io clearing async_write flags | ||
89 | */ | ||
90 | local_irq_save(flags); | ||
91 | bit_spin_lock(BH_Uptodate_Lock, &head->b_state); | ||
92 | do { | ||
93 | if (bh_offset(bh) < bio_start || | ||
94 | bh_offset(bh) + bh->b_size > bio_end) { | ||
95 | if (buffer_async_write(bh)) | ||
96 | under_io++; | ||
97 | continue; | ||
98 | } | ||
99 | clear_buffer_async_write(bh); | ||
100 | if (error) | ||
101 | buffer_io_error(bh); | ||
102 | } while ((bh = bh->b_this_page) != head); | ||
103 | bit_spin_unlock(BH_Uptodate_Lock, &head->b_state); | ||
104 | local_irq_restore(flags); | ||
105 | if (!under_io) | ||
106 | end_page_writeback(page); | ||
107 | } | ||
108 | } | ||
109 | |||
110 | static void ext4_release_io_end(ext4_io_end_t *io_end) | ||
111 | { | ||
112 | struct bio *bio, *next_bio; | ||
113 | |||
114 | BUG_ON(!list_empty(&io_end->list)); | ||
115 | BUG_ON(io_end->flag & EXT4_IO_END_UNWRITTEN); | ||
116 | WARN_ON(io_end->handle); | ||
117 | |||
118 | if (atomic_dec_and_test(&EXT4_I(io_end->inode)->i_ioend_count)) | ||
119 | wake_up_all(ext4_ioend_wq(io_end->inode)); | ||
120 | |||
121 | for (bio = io_end->bio; bio; bio = next_bio) { | ||
122 | next_bio = bio->bi_private; | ||
123 | ext4_finish_bio(bio); | ||
124 | bio_put(bio); | ||
125 | } | ||
126 | if (io_end->flag & EXT4_IO_END_DIRECT) | ||
127 | inode_dio_done(io_end->inode); | ||
128 | if (io_end->iocb) | ||
129 | aio_complete(io_end->iocb, io_end->result, 0); | ||
130 | kmem_cache_free(io_end_cachep, io_end); | ||
63 | } | 131 | } |
64 | 132 | ||
65 | void ext4_free_io_end(ext4_io_end_t *io) | 133 | static void ext4_clear_io_unwritten_flag(ext4_io_end_t *io_end) |
66 | { | 134 | { |
67 | BUG_ON(!io); | 135 | struct inode *inode = io_end->inode; |
68 | BUG_ON(!list_empty(&io->list)); | ||
69 | BUG_ON(io->flag & EXT4_IO_END_UNWRITTEN); | ||
70 | 136 | ||
71 | if (atomic_dec_and_test(&EXT4_I(io->inode)->i_ioend_count)) | 137 | io_end->flag &= ~EXT4_IO_END_UNWRITTEN; |
72 | wake_up_all(ext4_ioend_wq(io->inode)); | 138 | /* Wake up anyone waiting on unwritten extent conversion */ |
73 | kmem_cache_free(io_end_cachep, io); | 139 | if (atomic_dec_and_test(&EXT4_I(inode)->i_unwritten)) |
140 | wake_up_all(ext4_ioend_wq(inode)); | ||
74 | } | 141 | } |
75 | 142 | ||
76 | /* check a range of space and convert unwritten extents to written. */ | 143 | /* |
144 | * Check a range of space and convert unwritten extents to written. Note that | ||
145 | * we are protected from truncate touching same part of extent tree by the | ||
146 | * fact that truncate code waits for all DIO to finish (thus exclusion from | ||
147 | * direct IO is achieved) and also waits for PageWriteback bits. Thus we | ||
148 | * cannot get to ext4_ext_truncate() before all IOs overlapping that range are | ||
149 | * completed (happens from ext4_free_ioend()). | ||
150 | */ | ||
77 | static int ext4_end_io(ext4_io_end_t *io) | 151 | static int ext4_end_io(ext4_io_end_t *io) |
78 | { | 152 | { |
79 | struct inode *inode = io->inode; | 153 | struct inode *inode = io->inode; |
80 | loff_t offset = io->offset; | 154 | loff_t offset = io->offset; |
81 | ssize_t size = io->size; | 155 | ssize_t size = io->size; |
156 | handle_t *handle = io->handle; | ||
82 | int ret = 0; | 157 | int ret = 0; |
83 | 158 | ||
84 | ext4_debug("ext4_end_io_nolock: io 0x%p from inode %lu,list->next 0x%p," | 159 | ext4_debug("ext4_end_io_nolock: io 0x%p from inode %lu,list->next 0x%p," |
85 | "list->prev 0x%p\n", | 160 | "list->prev 0x%p\n", |
86 | io, inode->i_ino, io->list.next, io->list.prev); | 161 | io, inode->i_ino, io->list.next, io->list.prev); |
87 | 162 | ||
88 | ret = ext4_convert_unwritten_extents(inode, offset, size); | 163 | io->handle = NULL; /* Following call will use up the handle */ |
164 | ret = ext4_convert_unwritten_extents(handle, inode, offset, size); | ||
89 | if (ret < 0) { | 165 | if (ret < 0) { |
90 | ext4_msg(inode->i_sb, KERN_EMERG, | 166 | ext4_msg(inode->i_sb, KERN_EMERG, |
91 | "failed to convert unwritten extents to written " | 167 | "failed to convert unwritten extents to written " |
@@ -93,30 +169,22 @@ static int ext4_end_io(ext4_io_end_t *io) | |||
93 | "(inode %lu, offset %llu, size %zd, error %d)", | 169 | "(inode %lu, offset %llu, size %zd, error %d)", |
94 | inode->i_ino, offset, size, ret); | 170 | inode->i_ino, offset, size, ret); |
95 | } | 171 | } |
96 | /* Wake up anyone waiting on unwritten extent conversion */ | 172 | ext4_clear_io_unwritten_flag(io); |
97 | if (atomic_dec_and_test(&EXT4_I(inode)->i_unwritten)) | 173 | ext4_release_io_end(io); |
98 | wake_up_all(ext4_ioend_wq(inode)); | ||
99 | if (io->flag & EXT4_IO_END_DIRECT) | ||
100 | inode_dio_done(inode); | ||
101 | if (io->iocb) | ||
102 | aio_complete(io->iocb, io->result, 0); | ||
103 | return ret; | 174 | return ret; |
104 | } | 175 | } |
105 | 176 | ||
106 | static void dump_completed_IO(struct inode *inode) | 177 | static void dump_completed_IO(struct inode *inode, struct list_head *head) |
107 | { | 178 | { |
108 | #ifdef EXT4FS_DEBUG | 179 | #ifdef EXT4FS_DEBUG |
109 | struct list_head *cur, *before, *after; | 180 | struct list_head *cur, *before, *after; |
110 | ext4_io_end_t *io, *io0, *io1; | 181 | ext4_io_end_t *io, *io0, *io1; |
111 | 182 | ||
112 | if (list_empty(&EXT4_I(inode)->i_completed_io_list)) { | 183 | if (list_empty(head)) |
113 | ext4_debug("inode %lu completed_io list is empty\n", | ||
114 | inode->i_ino); | ||
115 | return; | 184 | return; |
116 | } | ||
117 | 185 | ||
118 | ext4_debug("Dump inode %lu completed_io list\n", inode->i_ino); | 186 | ext4_debug("Dump inode %lu completed io list\n", inode->i_ino); |
119 | list_for_each_entry(io, &EXT4_I(inode)->i_completed_io_list, list) { | 187 | list_for_each_entry(io, head, list) { |
120 | cur = &io->list; | 188 | cur = &io->list; |
121 | before = cur->prev; | 189 | before = cur->prev; |
122 | io0 = container_of(before, ext4_io_end_t, list); | 190 | io0 = container_of(before, ext4_io_end_t, list); |
@@ -130,23 +198,30 @@ static void dump_completed_IO(struct inode *inode) | |||
130 | } | 198 | } |
131 | 199 | ||
132 | /* Add the io_end to per-inode completed end_io list. */ | 200 | /* Add the io_end to per-inode completed end_io list. */ |
133 | void ext4_add_complete_io(ext4_io_end_t *io_end) | 201 | static void ext4_add_complete_io(ext4_io_end_t *io_end) |
134 | { | 202 | { |
135 | struct ext4_inode_info *ei = EXT4_I(io_end->inode); | 203 | struct ext4_inode_info *ei = EXT4_I(io_end->inode); |
136 | struct workqueue_struct *wq; | 204 | struct workqueue_struct *wq; |
137 | unsigned long flags; | 205 | unsigned long flags; |
138 | 206 | ||
139 | BUG_ON(!(io_end->flag & EXT4_IO_END_UNWRITTEN)); | 207 | BUG_ON(!(io_end->flag & EXT4_IO_END_UNWRITTEN)); |
140 | wq = EXT4_SB(io_end->inode->i_sb)->dio_unwritten_wq; | ||
141 | |||
142 | spin_lock_irqsave(&ei->i_completed_io_lock, flags); | 208 | spin_lock_irqsave(&ei->i_completed_io_lock, flags); |
143 | if (list_empty(&ei->i_completed_io_list)) | 209 | if (io_end->handle) { |
144 | queue_work(wq, &ei->i_unwritten_work); | 210 | wq = EXT4_SB(io_end->inode->i_sb)->rsv_conversion_wq; |
145 | list_add_tail(&io_end->list, &ei->i_completed_io_list); | 211 | if (list_empty(&ei->i_rsv_conversion_list)) |
212 | queue_work(wq, &ei->i_rsv_conversion_work); | ||
213 | list_add_tail(&io_end->list, &ei->i_rsv_conversion_list); | ||
214 | } else { | ||
215 | wq = EXT4_SB(io_end->inode->i_sb)->unrsv_conversion_wq; | ||
216 | if (list_empty(&ei->i_unrsv_conversion_list)) | ||
217 | queue_work(wq, &ei->i_unrsv_conversion_work); | ||
218 | list_add_tail(&io_end->list, &ei->i_unrsv_conversion_list); | ||
219 | } | ||
146 | spin_unlock_irqrestore(&ei->i_completed_io_lock, flags); | 220 | spin_unlock_irqrestore(&ei->i_completed_io_lock, flags); |
147 | } | 221 | } |
148 | 222 | ||
149 | static int ext4_do_flush_completed_IO(struct inode *inode) | 223 | static int ext4_do_flush_completed_IO(struct inode *inode, |
224 | struct list_head *head) | ||
150 | { | 225 | { |
151 | ext4_io_end_t *io; | 226 | ext4_io_end_t *io; |
152 | struct list_head unwritten; | 227 | struct list_head unwritten; |
@@ -155,8 +230,8 @@ static int ext4_do_flush_completed_IO(struct inode *inode) | |||
155 | int err, ret = 0; | 230 | int err, ret = 0; |
156 | 231 | ||
157 | spin_lock_irqsave(&ei->i_completed_io_lock, flags); | 232 | spin_lock_irqsave(&ei->i_completed_io_lock, flags); |
158 | dump_completed_IO(inode); | 233 | dump_completed_IO(inode, head); |
159 | list_replace_init(&ei->i_completed_io_list, &unwritten); | 234 | list_replace_init(head, &unwritten); |
160 | spin_unlock_irqrestore(&ei->i_completed_io_lock, flags); | 235 | spin_unlock_irqrestore(&ei->i_completed_io_lock, flags); |
161 | 236 | ||
162 | while (!list_empty(&unwritten)) { | 237 | while (!list_empty(&unwritten)) { |
@@ -167,30 +242,25 @@ static int ext4_do_flush_completed_IO(struct inode *inode) | |||
167 | err = ext4_end_io(io); | 242 | err = ext4_end_io(io); |
168 | if (unlikely(!ret && err)) | 243 | if (unlikely(!ret && err)) |
169 | ret = err; | 244 | ret = err; |
170 | io->flag &= ~EXT4_IO_END_UNWRITTEN; | ||
171 | ext4_free_io_end(io); | ||
172 | } | 245 | } |
173 | return ret; | 246 | return ret; |
174 | } | 247 | } |
175 | 248 | ||
176 | /* | 249 | /* |
177 | * work on completed aio dio IO, to convert unwritten extents to extents | 250 | * work on completed IO, to convert unwritten extents to extents |
178 | */ | 251 | */ |
179 | void ext4_end_io_work(struct work_struct *work) | 252 | void ext4_end_io_rsv_work(struct work_struct *work) |
180 | { | 253 | { |
181 | struct ext4_inode_info *ei = container_of(work, struct ext4_inode_info, | 254 | struct ext4_inode_info *ei = container_of(work, struct ext4_inode_info, |
182 | i_unwritten_work); | 255 | i_rsv_conversion_work); |
183 | ext4_do_flush_completed_IO(&ei->vfs_inode); | 256 | ext4_do_flush_completed_IO(&ei->vfs_inode, &ei->i_rsv_conversion_list); |
184 | } | 257 | } |
185 | 258 | ||
186 | int ext4_flush_unwritten_io(struct inode *inode) | 259 | void ext4_end_io_unrsv_work(struct work_struct *work) |
187 | { | 260 | { |
188 | int ret; | 261 | struct ext4_inode_info *ei = container_of(work, struct ext4_inode_info, |
189 | WARN_ON_ONCE(!mutex_is_locked(&inode->i_mutex) && | 262 | i_unrsv_conversion_work); |
190 | !(inode->i_state & I_FREEING)); | 263 | ext4_do_flush_completed_IO(&ei->vfs_inode, &ei->i_unrsv_conversion_list); |
191 | ret = ext4_do_flush_completed_IO(inode); | ||
192 | ext4_unwritten_wait(inode); | ||
193 | return ret; | ||
194 | } | 264 | } |
195 | 265 | ||
196 | ext4_io_end_t *ext4_init_io_end(struct inode *inode, gfp_t flags) | 266 | ext4_io_end_t *ext4_init_io_end(struct inode *inode, gfp_t flags) |
@@ -200,83 +270,59 @@ ext4_io_end_t *ext4_init_io_end(struct inode *inode, gfp_t flags) | |||
200 | atomic_inc(&EXT4_I(inode)->i_ioend_count); | 270 | atomic_inc(&EXT4_I(inode)->i_ioend_count); |
201 | io->inode = inode; | 271 | io->inode = inode; |
202 | INIT_LIST_HEAD(&io->list); | 272 | INIT_LIST_HEAD(&io->list); |
273 | atomic_set(&io->count, 1); | ||
203 | } | 274 | } |
204 | return io; | 275 | return io; |
205 | } | 276 | } |
206 | 277 | ||
207 | /* | 278 | void ext4_put_io_end_defer(ext4_io_end_t *io_end) |
208 | * Print an buffer I/O error compatible with the fs/buffer.c. This | ||
209 | * provides compatibility with dmesg scrapers that look for a specific | ||
210 | * buffer I/O error message. We really need a unified error reporting | ||
211 | * structure to userspace ala Digital Unix's uerf system, but it's | ||
212 | * probably not going to happen in my lifetime, due to LKML politics... | ||
213 | */ | ||
214 | static void buffer_io_error(struct buffer_head *bh) | ||
215 | { | 279 | { |
216 | char b[BDEVNAME_SIZE]; | 280 | if (atomic_dec_and_test(&io_end->count)) { |
217 | printk(KERN_ERR "Buffer I/O error on device %s, logical block %llu\n", | 281 | if (!(io_end->flag & EXT4_IO_END_UNWRITTEN) || !io_end->size) { |
218 | bdevname(bh->b_bdev, b), | 282 | ext4_release_io_end(io_end); |
219 | (unsigned long long)bh->b_blocknr); | 283 | return; |
284 | } | ||
285 | ext4_add_complete_io(io_end); | ||
286 | } | ||
220 | } | 287 | } |
221 | 288 | ||
289 | int ext4_put_io_end(ext4_io_end_t *io_end) | ||
290 | { | ||
291 | int err = 0; | ||
292 | |||
293 | if (atomic_dec_and_test(&io_end->count)) { | ||
294 | if (io_end->flag & EXT4_IO_END_UNWRITTEN) { | ||
295 | err = ext4_convert_unwritten_extents(io_end->handle, | ||
296 | io_end->inode, io_end->offset, | ||
297 | io_end->size); | ||
298 | io_end->handle = NULL; | ||
299 | ext4_clear_io_unwritten_flag(io_end); | ||
300 | } | ||
301 | ext4_release_io_end(io_end); | ||
302 | } | ||
303 | return err; | ||
304 | } | ||
305 | |||
306 | ext4_io_end_t *ext4_get_io_end(ext4_io_end_t *io_end) | ||
307 | { | ||
308 | atomic_inc(&io_end->count); | ||
309 | return io_end; | ||
310 | } | ||
311 | |||
312 | /* BIO completion function for page writeback */ | ||
222 | static void ext4_end_bio(struct bio *bio, int error) | 313 | static void ext4_end_bio(struct bio *bio, int error) |
223 | { | 314 | { |
224 | ext4_io_end_t *io_end = bio->bi_private; | 315 | ext4_io_end_t *io_end = bio->bi_private; |
225 | struct inode *inode; | ||
226 | int i; | ||
227 | int blocksize; | ||
228 | sector_t bi_sector = bio->bi_sector; | 316 | sector_t bi_sector = bio->bi_sector; |
229 | 317 | ||
230 | BUG_ON(!io_end); | 318 | BUG_ON(!io_end); |
231 | inode = io_end->inode; | ||
232 | blocksize = 1 << inode->i_blkbits; | ||
233 | bio->bi_private = NULL; | ||
234 | bio->bi_end_io = NULL; | 319 | bio->bi_end_io = NULL; |
235 | if (test_bit(BIO_UPTODATE, &bio->bi_flags)) | 320 | if (test_bit(BIO_UPTODATE, &bio->bi_flags)) |
236 | error = 0; | 321 | error = 0; |
237 | for (i = 0; i < bio->bi_vcnt; i++) { | ||
238 | struct bio_vec *bvec = &bio->bi_io_vec[i]; | ||
239 | struct page *page = bvec->bv_page; | ||
240 | struct buffer_head *bh, *head; | ||
241 | unsigned bio_start = bvec->bv_offset; | ||
242 | unsigned bio_end = bio_start + bvec->bv_len; | ||
243 | unsigned under_io = 0; | ||
244 | unsigned long flags; | ||
245 | |||
246 | if (!page) | ||
247 | continue; | ||
248 | |||
249 | if (error) { | ||
250 | SetPageError(page); | ||
251 | set_bit(AS_EIO, &page->mapping->flags); | ||
252 | } | ||
253 | bh = head = page_buffers(page); | ||
254 | /* | ||
255 | * We check all buffers in the page under BH_Uptodate_Lock | ||
256 | * to avoid races with other end io clearing async_write flags | ||
257 | */ | ||
258 | local_irq_save(flags); | ||
259 | bit_spin_lock(BH_Uptodate_Lock, &head->b_state); | ||
260 | do { | ||
261 | if (bh_offset(bh) < bio_start || | ||
262 | bh_offset(bh) + blocksize > bio_end) { | ||
263 | if (buffer_async_write(bh)) | ||
264 | under_io++; | ||
265 | continue; | ||
266 | } | ||
267 | clear_buffer_async_write(bh); | ||
268 | if (error) | ||
269 | buffer_io_error(bh); | ||
270 | } while ((bh = bh->b_this_page) != head); | ||
271 | bit_spin_unlock(BH_Uptodate_Lock, &head->b_state); | ||
272 | local_irq_restore(flags); | ||
273 | if (!under_io) | ||
274 | end_page_writeback(page); | ||
275 | } | ||
276 | bio_put(bio); | ||
277 | 322 | ||
278 | if (error) { | 323 | if (error) { |
279 | io_end->flag |= EXT4_IO_END_ERROR; | 324 | struct inode *inode = io_end->inode; |
325 | |||
280 | ext4_warning(inode->i_sb, "I/O error writing to inode %lu " | 326 | ext4_warning(inode->i_sb, "I/O error writing to inode %lu " |
281 | "(offset %llu size %ld starting block %llu)", | 327 | "(offset %llu size %ld starting block %llu)", |
282 | inode->i_ino, | 328 | inode->i_ino, |
@@ -286,12 +332,23 @@ static void ext4_end_bio(struct bio *bio, int error) | |||
286 | bi_sector >> (inode->i_blkbits - 9)); | 332 | bi_sector >> (inode->i_blkbits - 9)); |
287 | } | 333 | } |
288 | 334 | ||
289 | if (!(io_end->flag & EXT4_IO_END_UNWRITTEN)) { | 335 | if (io_end->flag & EXT4_IO_END_UNWRITTEN) { |
290 | ext4_free_io_end(io_end); | 336 | /* |
291 | return; | 337 | * Link bio into list hanging from io_end. We have to do it |
338 | * atomically as bio completions can be racing against each | ||
339 | * other. | ||
340 | */ | ||
341 | bio->bi_private = xchg(&io_end->bio, bio); | ||
342 | ext4_put_io_end_defer(io_end); | ||
343 | } else { | ||
344 | /* | ||
345 | * Drop io_end reference early. Inode can get freed once | ||
346 | * we finish the bio. | ||
347 | */ | ||
348 | ext4_put_io_end_defer(io_end); | ||
349 | ext4_finish_bio(bio); | ||
350 | bio_put(bio); | ||
292 | } | 351 | } |
293 | |||
294 | ext4_add_complete_io(io_end); | ||
295 | } | 352 | } |
296 | 353 | ||
297 | void ext4_io_submit(struct ext4_io_submit *io) | 354 | void ext4_io_submit(struct ext4_io_submit *io) |
@@ -305,43 +362,38 @@ void ext4_io_submit(struct ext4_io_submit *io) | |||
305 | bio_put(io->io_bio); | 362 | bio_put(io->io_bio); |
306 | } | 363 | } |
307 | io->io_bio = NULL; | 364 | io->io_bio = NULL; |
308 | io->io_op = 0; | 365 | } |
366 | |||
367 | void ext4_io_submit_init(struct ext4_io_submit *io, | ||
368 | struct writeback_control *wbc) | ||
369 | { | ||
370 | io->io_op = (wbc->sync_mode == WB_SYNC_ALL ? WRITE_SYNC : WRITE); | ||
371 | io->io_bio = NULL; | ||
309 | io->io_end = NULL; | 372 | io->io_end = NULL; |
310 | } | 373 | } |
311 | 374 | ||
312 | static int io_submit_init(struct ext4_io_submit *io, | 375 | static int io_submit_init_bio(struct ext4_io_submit *io, |
313 | struct inode *inode, | 376 | struct buffer_head *bh) |
314 | struct writeback_control *wbc, | ||
315 | struct buffer_head *bh) | ||
316 | { | 377 | { |
317 | ext4_io_end_t *io_end; | ||
318 | struct page *page = bh->b_page; | ||
319 | int nvecs = bio_get_nr_vecs(bh->b_bdev); | 378 | int nvecs = bio_get_nr_vecs(bh->b_bdev); |
320 | struct bio *bio; | 379 | struct bio *bio; |
321 | 380 | ||
322 | io_end = ext4_init_io_end(inode, GFP_NOFS); | ||
323 | if (!io_end) | ||
324 | return -ENOMEM; | ||
325 | bio = bio_alloc(GFP_NOIO, min(nvecs, BIO_MAX_PAGES)); | 381 | bio = bio_alloc(GFP_NOIO, min(nvecs, BIO_MAX_PAGES)); |
382 | if (!bio) | ||
383 | return -ENOMEM; | ||
326 | bio->bi_sector = bh->b_blocknr * (bh->b_size >> 9); | 384 | bio->bi_sector = bh->b_blocknr * (bh->b_size >> 9); |
327 | bio->bi_bdev = bh->b_bdev; | 385 | bio->bi_bdev = bh->b_bdev; |
328 | bio->bi_private = io->io_end = io_end; | ||
329 | bio->bi_end_io = ext4_end_bio; | 386 | bio->bi_end_io = ext4_end_bio; |
330 | 387 | bio->bi_private = ext4_get_io_end(io->io_end); | |
331 | io_end->offset = (page->index << PAGE_CACHE_SHIFT) + bh_offset(bh); | ||
332 | |||
333 | io->io_bio = bio; | 388 | io->io_bio = bio; |
334 | io->io_op = (wbc->sync_mode == WB_SYNC_ALL ? WRITE_SYNC : WRITE); | ||
335 | io->io_next_block = bh->b_blocknr; | 389 | io->io_next_block = bh->b_blocknr; |
336 | return 0; | 390 | return 0; |
337 | } | 391 | } |
338 | 392 | ||
339 | static int io_submit_add_bh(struct ext4_io_submit *io, | 393 | static int io_submit_add_bh(struct ext4_io_submit *io, |
340 | struct inode *inode, | 394 | struct inode *inode, |
341 | struct writeback_control *wbc, | ||
342 | struct buffer_head *bh) | 395 | struct buffer_head *bh) |
343 | { | 396 | { |
344 | ext4_io_end_t *io_end; | ||
345 | int ret; | 397 | int ret; |
346 | 398 | ||
347 | if (io->io_bio && bh->b_blocknr != io->io_next_block) { | 399 | if (io->io_bio && bh->b_blocknr != io->io_next_block) { |
@@ -349,18 +401,14 @@ submit_and_retry: | |||
349 | ext4_io_submit(io); | 401 | ext4_io_submit(io); |
350 | } | 402 | } |
351 | if (io->io_bio == NULL) { | 403 | if (io->io_bio == NULL) { |
352 | ret = io_submit_init(io, inode, wbc, bh); | 404 | ret = io_submit_init_bio(io, bh); |
353 | if (ret) | 405 | if (ret) |
354 | return ret; | 406 | return ret; |
355 | } | 407 | } |
356 | io_end = io->io_end; | ||
357 | if (test_clear_buffer_uninit(bh)) | ||
358 | ext4_set_io_unwritten_flag(inode, io_end); | ||
359 | io->io_end->size += bh->b_size; | ||
360 | io->io_next_block++; | ||
361 | ret = bio_add_page(io->io_bio, bh->b_page, bh->b_size, bh_offset(bh)); | 408 | ret = bio_add_page(io->io_bio, bh->b_page, bh->b_size, bh_offset(bh)); |
362 | if (ret != bh->b_size) | 409 | if (ret != bh->b_size) |
363 | goto submit_and_retry; | 410 | goto submit_and_retry; |
411 | io->io_next_block++; | ||
364 | return 0; | 412 | return 0; |
365 | } | 413 | } |
366 | 414 | ||
@@ -432,7 +480,7 @@ int ext4_bio_write_page(struct ext4_io_submit *io, | |||
432 | do { | 480 | do { |
433 | if (!buffer_async_write(bh)) | 481 | if (!buffer_async_write(bh)) |
434 | continue; | 482 | continue; |
435 | ret = io_submit_add_bh(io, inode, wbc, bh); | 483 | ret = io_submit_add_bh(io, inode, bh); |
436 | if (ret) { | 484 | if (ret) { |
437 | /* | 485 | /* |
438 | * We only get here on ENOMEM. Not much else | 486 | * We only get here on ENOMEM. Not much else |
diff --git a/fs/ext4/resize.c b/fs/ext4/resize.c index b27c96d01965..c5adbb318a90 100644 --- a/fs/ext4/resize.c +++ b/fs/ext4/resize.c | |||
@@ -79,12 +79,20 @@ static int verify_group_input(struct super_block *sb, | |||
79 | ext4_fsblk_t end = start + input->blocks_count; | 79 | ext4_fsblk_t end = start + input->blocks_count; |
80 | ext4_group_t group = input->group; | 80 | ext4_group_t group = input->group; |
81 | ext4_fsblk_t itend = input->inode_table + sbi->s_itb_per_group; | 81 | ext4_fsblk_t itend = input->inode_table + sbi->s_itb_per_group; |
82 | unsigned overhead = ext4_group_overhead_blocks(sb, group); | 82 | unsigned overhead; |
83 | ext4_fsblk_t metaend = start + overhead; | 83 | ext4_fsblk_t metaend; |
84 | struct buffer_head *bh = NULL; | 84 | struct buffer_head *bh = NULL; |
85 | ext4_grpblk_t free_blocks_count, offset; | 85 | ext4_grpblk_t free_blocks_count, offset; |
86 | int err = -EINVAL; | 86 | int err = -EINVAL; |
87 | 87 | ||
88 | if (group != sbi->s_groups_count) { | ||
89 | ext4_warning(sb, "Cannot add at group %u (only %u groups)", | ||
90 | input->group, sbi->s_groups_count); | ||
91 | return -EINVAL; | ||
92 | } | ||
93 | |||
94 | overhead = ext4_group_overhead_blocks(sb, group); | ||
95 | metaend = start + overhead; | ||
88 | input->free_blocks_count = free_blocks_count = | 96 | input->free_blocks_count = free_blocks_count = |
89 | input->blocks_count - 2 - overhead - sbi->s_itb_per_group; | 97 | input->blocks_count - 2 - overhead - sbi->s_itb_per_group; |
90 | 98 | ||
@@ -96,10 +104,7 @@ static int verify_group_input(struct super_block *sb, | |||
96 | free_blocks_count, input->reserved_blocks); | 104 | free_blocks_count, input->reserved_blocks); |
97 | 105 | ||
98 | ext4_get_group_no_and_offset(sb, start, NULL, &offset); | 106 | ext4_get_group_no_and_offset(sb, start, NULL, &offset); |
99 | if (group != sbi->s_groups_count) | 107 | if (offset != 0) |
100 | ext4_warning(sb, "Cannot add at group %u (only %u groups)", | ||
101 | input->group, sbi->s_groups_count); | ||
102 | else if (offset != 0) | ||
103 | ext4_warning(sb, "Last group not full"); | 108 | ext4_warning(sb, "Last group not full"); |
104 | else if (input->reserved_blocks > input->blocks_count / 5) | 109 | else if (input->reserved_blocks > input->blocks_count / 5) |
105 | ext4_warning(sb, "Reserved blocks too high (%u)", | 110 | ext4_warning(sb, "Reserved blocks too high (%u)", |
@@ -1551,11 +1556,10 @@ int ext4_group_add(struct super_block *sb, struct ext4_new_group_data *input) | |||
1551 | int reserved_gdb = ext4_bg_has_super(sb, input->group) ? | 1556 | int reserved_gdb = ext4_bg_has_super(sb, input->group) ? |
1552 | le16_to_cpu(es->s_reserved_gdt_blocks) : 0; | 1557 | le16_to_cpu(es->s_reserved_gdt_blocks) : 0; |
1553 | struct inode *inode = NULL; | 1558 | struct inode *inode = NULL; |
1554 | int gdb_off, gdb_num; | 1559 | int gdb_off; |
1555 | int err; | 1560 | int err; |
1556 | __u16 bg_flags = 0; | 1561 | __u16 bg_flags = 0; |
1557 | 1562 | ||
1558 | gdb_num = input->group / EXT4_DESC_PER_BLOCK(sb); | ||
1559 | gdb_off = input->group % EXT4_DESC_PER_BLOCK(sb); | 1563 | gdb_off = input->group % EXT4_DESC_PER_BLOCK(sb); |
1560 | 1564 | ||
1561 | if (gdb_off == 0 && !EXT4_HAS_RO_COMPAT_FEATURE(sb, | 1565 | if (gdb_off == 0 && !EXT4_HAS_RO_COMPAT_FEATURE(sb, |
@@ -1656,12 +1660,10 @@ errout: | |||
1656 | err = err2; | 1660 | err = err2; |
1657 | 1661 | ||
1658 | if (!err) { | 1662 | if (!err) { |
1659 | ext4_fsblk_t first_block; | ||
1660 | first_block = ext4_group_first_block_no(sb, 0); | ||
1661 | if (test_opt(sb, DEBUG)) | 1663 | if (test_opt(sb, DEBUG)) |
1662 | printk(KERN_DEBUG "EXT4-fs: extended group to %llu " | 1664 | printk(KERN_DEBUG "EXT4-fs: extended group to %llu " |
1663 | "blocks\n", ext4_blocks_count(es)); | 1665 | "blocks\n", ext4_blocks_count(es)); |
1664 | update_backups(sb, EXT4_SB(sb)->s_sbh->b_blocknr - first_block, | 1666 | update_backups(sb, EXT4_SB(sb)->s_sbh->b_blocknr, |
1665 | (char *)es, sizeof(struct ext4_super_block), 0); | 1667 | (char *)es, sizeof(struct ext4_super_block), 0); |
1666 | } | 1668 | } |
1667 | return err; | 1669 | return err; |
diff --git a/fs/ext4/super.c b/fs/ext4/super.c index 94cc84db7c9a..b59373b625e9 100644 --- a/fs/ext4/super.c +++ b/fs/ext4/super.c | |||
@@ -69,6 +69,7 @@ static void ext4_mark_recovery_complete(struct super_block *sb, | |||
69 | static void ext4_clear_journal_err(struct super_block *sb, | 69 | static void ext4_clear_journal_err(struct super_block *sb, |
70 | struct ext4_super_block *es); | 70 | struct ext4_super_block *es); |
71 | static int ext4_sync_fs(struct super_block *sb, int wait); | 71 | static int ext4_sync_fs(struct super_block *sb, int wait); |
72 | static int ext4_sync_fs_nojournal(struct super_block *sb, int wait); | ||
72 | static int ext4_remount(struct super_block *sb, int *flags, char *data); | 73 | static int ext4_remount(struct super_block *sb, int *flags, char *data); |
73 | static int ext4_statfs(struct dentry *dentry, struct kstatfs *buf); | 74 | static int ext4_statfs(struct dentry *dentry, struct kstatfs *buf); |
74 | static int ext4_unfreeze(struct super_block *sb); | 75 | static int ext4_unfreeze(struct super_block *sb); |
@@ -398,6 +399,11 @@ static void ext4_handle_error(struct super_block *sb) | |||
398 | } | 399 | } |
399 | if (test_opt(sb, ERRORS_RO)) { | 400 | if (test_opt(sb, ERRORS_RO)) { |
400 | ext4_msg(sb, KERN_CRIT, "Remounting filesystem read-only"); | 401 | ext4_msg(sb, KERN_CRIT, "Remounting filesystem read-only"); |
402 | /* | ||
403 | * Make sure updated value of ->s_mount_flags will be visible | ||
404 | * before ->s_flags update | ||
405 | */ | ||
406 | smp_wmb(); | ||
401 | sb->s_flags |= MS_RDONLY; | 407 | sb->s_flags |= MS_RDONLY; |
402 | } | 408 | } |
403 | if (test_opt(sb, ERRORS_PANIC)) | 409 | if (test_opt(sb, ERRORS_PANIC)) |
@@ -422,9 +428,9 @@ void __ext4_error(struct super_block *sb, const char *function, | |||
422 | ext4_handle_error(sb); | 428 | ext4_handle_error(sb); |
423 | } | 429 | } |
424 | 430 | ||
425 | void ext4_error_inode(struct inode *inode, const char *function, | 431 | void __ext4_error_inode(struct inode *inode, const char *function, |
426 | unsigned int line, ext4_fsblk_t block, | 432 | unsigned int line, ext4_fsblk_t block, |
427 | const char *fmt, ...) | 433 | const char *fmt, ...) |
428 | { | 434 | { |
429 | va_list args; | 435 | va_list args; |
430 | struct va_format vaf; | 436 | struct va_format vaf; |
@@ -451,9 +457,9 @@ void ext4_error_inode(struct inode *inode, const char *function, | |||
451 | ext4_handle_error(inode->i_sb); | 457 | ext4_handle_error(inode->i_sb); |
452 | } | 458 | } |
453 | 459 | ||
454 | void ext4_error_file(struct file *file, const char *function, | 460 | void __ext4_error_file(struct file *file, const char *function, |
455 | unsigned int line, ext4_fsblk_t block, | 461 | unsigned int line, ext4_fsblk_t block, |
456 | const char *fmt, ...) | 462 | const char *fmt, ...) |
457 | { | 463 | { |
458 | va_list args; | 464 | va_list args; |
459 | struct va_format vaf; | 465 | struct va_format vaf; |
@@ -570,8 +576,13 @@ void __ext4_abort(struct super_block *sb, const char *function, | |||
570 | 576 | ||
571 | if ((sb->s_flags & MS_RDONLY) == 0) { | 577 | if ((sb->s_flags & MS_RDONLY) == 0) { |
572 | ext4_msg(sb, KERN_CRIT, "Remounting filesystem read-only"); | 578 | ext4_msg(sb, KERN_CRIT, "Remounting filesystem read-only"); |
573 | sb->s_flags |= MS_RDONLY; | ||
574 | EXT4_SB(sb)->s_mount_flags |= EXT4_MF_FS_ABORTED; | 579 | EXT4_SB(sb)->s_mount_flags |= EXT4_MF_FS_ABORTED; |
580 | /* | ||
581 | * Make sure updated value of ->s_mount_flags will be visible | ||
582 | * before ->s_flags update | ||
583 | */ | ||
584 | smp_wmb(); | ||
585 | sb->s_flags |= MS_RDONLY; | ||
575 | if (EXT4_SB(sb)->s_journal) | 586 | if (EXT4_SB(sb)->s_journal) |
576 | jbd2_journal_abort(EXT4_SB(sb)->s_journal, -EIO); | 587 | jbd2_journal_abort(EXT4_SB(sb)->s_journal, -EIO); |
577 | save_error_info(sb, function, line); | 588 | save_error_info(sb, function, line); |
@@ -580,7 +591,8 @@ void __ext4_abort(struct super_block *sb, const char *function, | |||
580 | panic("EXT4-fs panic from previous error\n"); | 591 | panic("EXT4-fs panic from previous error\n"); |
581 | } | 592 | } |
582 | 593 | ||
583 | void ext4_msg(struct super_block *sb, const char *prefix, const char *fmt, ...) | 594 | void __ext4_msg(struct super_block *sb, |
595 | const char *prefix, const char *fmt, ...) | ||
584 | { | 596 | { |
585 | struct va_format vaf; | 597 | struct va_format vaf; |
586 | va_list args; | 598 | va_list args; |
@@ -750,8 +762,10 @@ static void ext4_put_super(struct super_block *sb) | |||
750 | ext4_unregister_li_request(sb); | 762 | ext4_unregister_li_request(sb); |
751 | dquot_disable(sb, -1, DQUOT_USAGE_ENABLED | DQUOT_LIMITS_ENABLED); | 763 | dquot_disable(sb, -1, DQUOT_USAGE_ENABLED | DQUOT_LIMITS_ENABLED); |
752 | 764 | ||
753 | flush_workqueue(sbi->dio_unwritten_wq); | 765 | flush_workqueue(sbi->unrsv_conversion_wq); |
754 | destroy_workqueue(sbi->dio_unwritten_wq); | 766 | flush_workqueue(sbi->rsv_conversion_wq); |
767 | destroy_workqueue(sbi->unrsv_conversion_wq); | ||
768 | destroy_workqueue(sbi->rsv_conversion_wq); | ||
755 | 769 | ||
756 | if (sbi->s_journal) { | 770 | if (sbi->s_journal) { |
757 | err = jbd2_journal_destroy(sbi->s_journal); | 771 | err = jbd2_journal_destroy(sbi->s_journal); |
@@ -760,7 +774,7 @@ static void ext4_put_super(struct super_block *sb) | |||
760 | ext4_abort(sb, "Couldn't clean up the journal"); | 774 | ext4_abort(sb, "Couldn't clean up the journal"); |
761 | } | 775 | } |
762 | 776 | ||
763 | ext4_es_unregister_shrinker(sb); | 777 | ext4_es_unregister_shrinker(sbi); |
764 | del_timer(&sbi->s_err_report); | 778 | del_timer(&sbi->s_err_report); |
765 | ext4_release_system_zone(sb); | 779 | ext4_release_system_zone(sb); |
766 | ext4_mb_release(sb); | 780 | ext4_mb_release(sb); |
@@ -849,6 +863,7 @@ static struct inode *ext4_alloc_inode(struct super_block *sb) | |||
849 | rwlock_init(&ei->i_es_lock); | 863 | rwlock_init(&ei->i_es_lock); |
850 | INIT_LIST_HEAD(&ei->i_es_lru); | 864 | INIT_LIST_HEAD(&ei->i_es_lru); |
851 | ei->i_es_lru_nr = 0; | 865 | ei->i_es_lru_nr = 0; |
866 | ei->i_touch_when = 0; | ||
852 | ei->i_reserved_data_blocks = 0; | 867 | ei->i_reserved_data_blocks = 0; |
853 | ei->i_reserved_meta_blocks = 0; | 868 | ei->i_reserved_meta_blocks = 0; |
854 | ei->i_allocated_meta_blocks = 0; | 869 | ei->i_allocated_meta_blocks = 0; |
@@ -859,13 +874,15 @@ static struct inode *ext4_alloc_inode(struct super_block *sb) | |||
859 | ei->i_reserved_quota = 0; | 874 | ei->i_reserved_quota = 0; |
860 | #endif | 875 | #endif |
861 | ei->jinode = NULL; | 876 | ei->jinode = NULL; |
862 | INIT_LIST_HEAD(&ei->i_completed_io_list); | 877 | INIT_LIST_HEAD(&ei->i_rsv_conversion_list); |
878 | INIT_LIST_HEAD(&ei->i_unrsv_conversion_list); | ||
863 | spin_lock_init(&ei->i_completed_io_lock); | 879 | spin_lock_init(&ei->i_completed_io_lock); |
864 | ei->i_sync_tid = 0; | 880 | ei->i_sync_tid = 0; |
865 | ei->i_datasync_tid = 0; | 881 | ei->i_datasync_tid = 0; |
866 | atomic_set(&ei->i_ioend_count, 0); | 882 | atomic_set(&ei->i_ioend_count, 0); |
867 | atomic_set(&ei->i_unwritten, 0); | 883 | atomic_set(&ei->i_unwritten, 0); |
868 | INIT_WORK(&ei->i_unwritten_work, ext4_end_io_work); | 884 | INIT_WORK(&ei->i_rsv_conversion_work, ext4_end_io_rsv_work); |
885 | INIT_WORK(&ei->i_unrsv_conversion_work, ext4_end_io_unrsv_work); | ||
869 | 886 | ||
870 | return &ei->vfs_inode; | 887 | return &ei->vfs_inode; |
871 | } | 888 | } |
@@ -1093,6 +1110,7 @@ static const struct super_operations ext4_nojournal_sops = { | |||
1093 | .dirty_inode = ext4_dirty_inode, | 1110 | .dirty_inode = ext4_dirty_inode, |
1094 | .drop_inode = ext4_drop_inode, | 1111 | .drop_inode = ext4_drop_inode, |
1095 | .evict_inode = ext4_evict_inode, | 1112 | .evict_inode = ext4_evict_inode, |
1113 | .sync_fs = ext4_sync_fs_nojournal, | ||
1096 | .put_super = ext4_put_super, | 1114 | .put_super = ext4_put_super, |
1097 | .statfs = ext4_statfs, | 1115 | .statfs = ext4_statfs, |
1098 | .remount_fs = ext4_remount, | 1116 | .remount_fs = ext4_remount, |
@@ -1341,7 +1359,7 @@ static const struct mount_opts { | |||
1341 | {Opt_delalloc, EXT4_MOUNT_DELALLOC, | 1359 | {Opt_delalloc, EXT4_MOUNT_DELALLOC, |
1342 | MOPT_EXT4_ONLY | MOPT_SET | MOPT_EXPLICIT}, | 1360 | MOPT_EXT4_ONLY | MOPT_SET | MOPT_EXPLICIT}, |
1343 | {Opt_nodelalloc, EXT4_MOUNT_DELALLOC, | 1361 | {Opt_nodelalloc, EXT4_MOUNT_DELALLOC, |
1344 | MOPT_EXT4_ONLY | MOPT_CLEAR | MOPT_EXPLICIT}, | 1362 | MOPT_EXT4_ONLY | MOPT_CLEAR}, |
1345 | {Opt_journal_checksum, EXT4_MOUNT_JOURNAL_CHECKSUM, | 1363 | {Opt_journal_checksum, EXT4_MOUNT_JOURNAL_CHECKSUM, |
1346 | MOPT_EXT4_ONLY | MOPT_SET}, | 1364 | MOPT_EXT4_ONLY | MOPT_SET}, |
1347 | {Opt_journal_async_commit, (EXT4_MOUNT_JOURNAL_ASYNC_COMMIT | | 1365 | {Opt_journal_async_commit, (EXT4_MOUNT_JOURNAL_ASYNC_COMMIT | |
@@ -1684,12 +1702,6 @@ static inline void ext4_show_quota_options(struct seq_file *seq, | |||
1684 | 1702 | ||
1685 | if (sbi->s_qf_names[GRPQUOTA]) | 1703 | if (sbi->s_qf_names[GRPQUOTA]) |
1686 | seq_printf(seq, ",grpjquota=%s", sbi->s_qf_names[GRPQUOTA]); | 1704 | seq_printf(seq, ",grpjquota=%s", sbi->s_qf_names[GRPQUOTA]); |
1687 | |||
1688 | if (test_opt(sb, USRQUOTA)) | ||
1689 | seq_puts(seq, ",usrquota"); | ||
1690 | |||
1691 | if (test_opt(sb, GRPQUOTA)) | ||
1692 | seq_puts(seq, ",grpquota"); | ||
1693 | #endif | 1705 | #endif |
1694 | } | 1706 | } |
1695 | 1707 | ||
@@ -1908,7 +1920,6 @@ static int ext4_fill_flex_info(struct super_block *sb) | |||
1908 | struct ext4_sb_info *sbi = EXT4_SB(sb); | 1920 | struct ext4_sb_info *sbi = EXT4_SB(sb); |
1909 | struct ext4_group_desc *gdp = NULL; | 1921 | struct ext4_group_desc *gdp = NULL; |
1910 | ext4_group_t flex_group; | 1922 | ext4_group_t flex_group; |
1911 | unsigned int groups_per_flex = 0; | ||
1912 | int i, err; | 1923 | int i, err; |
1913 | 1924 | ||
1914 | sbi->s_log_groups_per_flex = sbi->s_es->s_log_groups_per_flex; | 1925 | sbi->s_log_groups_per_flex = sbi->s_es->s_log_groups_per_flex; |
@@ -1916,7 +1927,6 @@ static int ext4_fill_flex_info(struct super_block *sb) | |||
1916 | sbi->s_log_groups_per_flex = 0; | 1927 | sbi->s_log_groups_per_flex = 0; |
1917 | return 1; | 1928 | return 1; |
1918 | } | 1929 | } |
1919 | groups_per_flex = 1U << sbi->s_log_groups_per_flex; | ||
1920 | 1930 | ||
1921 | err = ext4_alloc_flex_bg_array(sb, sbi->s_groups_count); | 1931 | err = ext4_alloc_flex_bg_array(sb, sbi->s_groups_count); |
1922 | if (err) | 1932 | if (err) |
@@ -2164,19 +2174,22 @@ static void ext4_orphan_cleanup(struct super_block *sb, | |||
2164 | list_add(&EXT4_I(inode)->i_orphan, &EXT4_SB(sb)->s_orphan); | 2174 | list_add(&EXT4_I(inode)->i_orphan, &EXT4_SB(sb)->s_orphan); |
2165 | dquot_initialize(inode); | 2175 | dquot_initialize(inode); |
2166 | if (inode->i_nlink) { | 2176 | if (inode->i_nlink) { |
2167 | ext4_msg(sb, KERN_DEBUG, | 2177 | if (test_opt(sb, DEBUG)) |
2168 | "%s: truncating inode %lu to %lld bytes", | 2178 | ext4_msg(sb, KERN_DEBUG, |
2169 | __func__, inode->i_ino, inode->i_size); | 2179 | "%s: truncating inode %lu to %lld bytes", |
2180 | __func__, inode->i_ino, inode->i_size); | ||
2170 | jbd_debug(2, "truncating inode %lu to %lld bytes\n", | 2181 | jbd_debug(2, "truncating inode %lu to %lld bytes\n", |
2171 | inode->i_ino, inode->i_size); | 2182 | inode->i_ino, inode->i_size); |
2172 | mutex_lock(&inode->i_mutex); | 2183 | mutex_lock(&inode->i_mutex); |
2184 | truncate_inode_pages(inode->i_mapping, inode->i_size); | ||
2173 | ext4_truncate(inode); | 2185 | ext4_truncate(inode); |
2174 | mutex_unlock(&inode->i_mutex); | 2186 | mutex_unlock(&inode->i_mutex); |
2175 | nr_truncates++; | 2187 | nr_truncates++; |
2176 | } else { | 2188 | } else { |
2177 | ext4_msg(sb, KERN_DEBUG, | 2189 | if (test_opt(sb, DEBUG)) |
2178 | "%s: deleting unreferenced inode %lu", | 2190 | ext4_msg(sb, KERN_DEBUG, |
2179 | __func__, inode->i_ino); | 2191 | "%s: deleting unreferenced inode %lu", |
2192 | __func__, inode->i_ino); | ||
2180 | jbd_debug(2, "deleting unreferenced inode %lu\n", | 2193 | jbd_debug(2, "deleting unreferenced inode %lu\n", |
2181 | inode->i_ino); | 2194 | inode->i_ino); |
2182 | nr_orphans++; | 2195 | nr_orphans++; |
@@ -2377,7 +2390,10 @@ struct ext4_attr { | |||
2377 | ssize_t (*show)(struct ext4_attr *, struct ext4_sb_info *, char *); | 2390 | ssize_t (*show)(struct ext4_attr *, struct ext4_sb_info *, char *); |
2378 | ssize_t (*store)(struct ext4_attr *, struct ext4_sb_info *, | 2391 | ssize_t (*store)(struct ext4_attr *, struct ext4_sb_info *, |
2379 | const char *, size_t); | 2392 | const char *, size_t); |
2380 | int offset; | 2393 | union { |
2394 | int offset; | ||
2395 | int deprecated_val; | ||
2396 | } u; | ||
2381 | }; | 2397 | }; |
2382 | 2398 | ||
2383 | static int parse_strtoull(const char *buf, | 2399 | static int parse_strtoull(const char *buf, |
@@ -2446,7 +2462,7 @@ static ssize_t inode_readahead_blks_store(struct ext4_attr *a, | |||
2446 | static ssize_t sbi_ui_show(struct ext4_attr *a, | 2462 | static ssize_t sbi_ui_show(struct ext4_attr *a, |
2447 | struct ext4_sb_info *sbi, char *buf) | 2463 | struct ext4_sb_info *sbi, char *buf) |
2448 | { | 2464 | { |
2449 | unsigned int *ui = (unsigned int *) (((char *) sbi) + a->offset); | 2465 | unsigned int *ui = (unsigned int *) (((char *) sbi) + a->u.offset); |
2450 | 2466 | ||
2451 | return snprintf(buf, PAGE_SIZE, "%u\n", *ui); | 2467 | return snprintf(buf, PAGE_SIZE, "%u\n", *ui); |
2452 | } | 2468 | } |
@@ -2455,7 +2471,7 @@ static ssize_t sbi_ui_store(struct ext4_attr *a, | |||
2455 | struct ext4_sb_info *sbi, | 2471 | struct ext4_sb_info *sbi, |
2456 | const char *buf, size_t count) | 2472 | const char *buf, size_t count) |
2457 | { | 2473 | { |
2458 | unsigned int *ui = (unsigned int *) (((char *) sbi) + a->offset); | 2474 | unsigned int *ui = (unsigned int *) (((char *) sbi) + a->u.offset); |
2459 | unsigned long t; | 2475 | unsigned long t; |
2460 | int ret; | 2476 | int ret; |
2461 | 2477 | ||
@@ -2504,12 +2520,20 @@ static ssize_t trigger_test_error(struct ext4_attr *a, | |||
2504 | return count; | 2520 | return count; |
2505 | } | 2521 | } |
2506 | 2522 | ||
2523 | static ssize_t sbi_deprecated_show(struct ext4_attr *a, | ||
2524 | struct ext4_sb_info *sbi, char *buf) | ||
2525 | { | ||
2526 | return snprintf(buf, PAGE_SIZE, "%d\n", a->u.deprecated_val); | ||
2527 | } | ||
2528 | |||
2507 | #define EXT4_ATTR_OFFSET(_name,_mode,_show,_store,_elname) \ | 2529 | #define EXT4_ATTR_OFFSET(_name,_mode,_show,_store,_elname) \ |
2508 | static struct ext4_attr ext4_attr_##_name = { \ | 2530 | static struct ext4_attr ext4_attr_##_name = { \ |
2509 | .attr = {.name = __stringify(_name), .mode = _mode }, \ | 2531 | .attr = {.name = __stringify(_name), .mode = _mode }, \ |
2510 | .show = _show, \ | 2532 | .show = _show, \ |
2511 | .store = _store, \ | 2533 | .store = _store, \ |
2512 | .offset = offsetof(struct ext4_sb_info, _elname), \ | 2534 | .u = { \ |
2535 | .offset = offsetof(struct ext4_sb_info, _elname),\ | ||
2536 | }, \ | ||
2513 | } | 2537 | } |
2514 | #define EXT4_ATTR(name, mode, show, store) \ | 2538 | #define EXT4_ATTR(name, mode, show, store) \ |
2515 | static struct ext4_attr ext4_attr_##name = __ATTR(name, mode, show, store) | 2539 | static struct ext4_attr ext4_attr_##name = __ATTR(name, mode, show, store) |
@@ -2520,6 +2544,14 @@ static struct ext4_attr ext4_attr_##name = __ATTR(name, mode, show, store) | |||
2520 | #define EXT4_RW_ATTR_SBI_UI(name, elname) \ | 2544 | #define EXT4_RW_ATTR_SBI_UI(name, elname) \ |
2521 | EXT4_ATTR_OFFSET(name, 0644, sbi_ui_show, sbi_ui_store, elname) | 2545 | EXT4_ATTR_OFFSET(name, 0644, sbi_ui_show, sbi_ui_store, elname) |
2522 | #define ATTR_LIST(name) &ext4_attr_##name.attr | 2546 | #define ATTR_LIST(name) &ext4_attr_##name.attr |
2547 | #define EXT4_DEPRECATED_ATTR(_name, _val) \ | ||
2548 | static struct ext4_attr ext4_attr_##_name = { \ | ||
2549 | .attr = {.name = __stringify(_name), .mode = 0444 }, \ | ||
2550 | .show = sbi_deprecated_show, \ | ||
2551 | .u = { \ | ||
2552 | .deprecated_val = _val, \ | ||
2553 | }, \ | ||
2554 | } | ||
2523 | 2555 | ||
2524 | EXT4_RO_ATTR(delayed_allocation_blocks); | 2556 | EXT4_RO_ATTR(delayed_allocation_blocks); |
2525 | EXT4_RO_ATTR(session_write_kbytes); | 2557 | EXT4_RO_ATTR(session_write_kbytes); |
@@ -2534,7 +2566,7 @@ EXT4_RW_ATTR_SBI_UI(mb_min_to_scan, s_mb_min_to_scan); | |||
2534 | EXT4_RW_ATTR_SBI_UI(mb_order2_req, s_mb_order2_reqs); | 2566 | EXT4_RW_ATTR_SBI_UI(mb_order2_req, s_mb_order2_reqs); |
2535 | EXT4_RW_ATTR_SBI_UI(mb_stream_req, s_mb_stream_request); | 2567 | EXT4_RW_ATTR_SBI_UI(mb_stream_req, s_mb_stream_request); |
2536 | EXT4_RW_ATTR_SBI_UI(mb_group_prealloc, s_mb_group_prealloc); | 2568 | EXT4_RW_ATTR_SBI_UI(mb_group_prealloc, s_mb_group_prealloc); |
2537 | EXT4_RW_ATTR_SBI_UI(max_writeback_mb_bump, s_max_writeback_mb_bump); | 2569 | EXT4_DEPRECATED_ATTR(max_writeback_mb_bump, 128); |
2538 | EXT4_RW_ATTR_SBI_UI(extent_max_zeroout_kb, s_extent_max_zeroout_kb); | 2570 | EXT4_RW_ATTR_SBI_UI(extent_max_zeroout_kb, s_extent_max_zeroout_kb); |
2539 | EXT4_ATTR(trigger_fs_error, 0200, NULL, trigger_test_error); | 2571 | EXT4_ATTR(trigger_fs_error, 0200, NULL, trigger_test_error); |
2540 | 2572 | ||
@@ -3451,7 +3483,7 @@ static int ext4_fill_super(struct super_block *sb, void *data, int silent) | |||
3451 | } | 3483 | } |
3452 | if (test_opt(sb, DIOREAD_NOLOCK)) { | 3484 | if (test_opt(sb, DIOREAD_NOLOCK)) { |
3453 | ext4_msg(sb, KERN_ERR, "can't mount with " | 3485 | ext4_msg(sb, KERN_ERR, "can't mount with " |
3454 | "both data=journal and delalloc"); | 3486 | "both data=journal and dioread_nolock"); |
3455 | goto failed_mount; | 3487 | goto failed_mount; |
3456 | } | 3488 | } |
3457 | if (test_opt(sb, DELALLOC)) | 3489 | if (test_opt(sb, DELALLOC)) |
@@ -3586,10 +3618,6 @@ static int ext4_fill_super(struct super_block *sb, void *data, int silent) | |||
3586 | sbi->s_addr_per_block_bits = ilog2(EXT4_ADDR_PER_BLOCK(sb)); | 3618 | sbi->s_addr_per_block_bits = ilog2(EXT4_ADDR_PER_BLOCK(sb)); |
3587 | sbi->s_desc_per_block_bits = ilog2(EXT4_DESC_PER_BLOCK(sb)); | 3619 | sbi->s_desc_per_block_bits = ilog2(EXT4_DESC_PER_BLOCK(sb)); |
3588 | 3620 | ||
3589 | /* Do we have standard group size of blocksize * 8 blocks ? */ | ||
3590 | if (sbi->s_blocks_per_group == blocksize << 3) | ||
3591 | set_opt2(sb, STD_GROUP_SIZE); | ||
3592 | |||
3593 | for (i = 0; i < 4; i++) | 3621 | for (i = 0; i < 4; i++) |
3594 | sbi->s_hash_seed[i] = le32_to_cpu(es->s_hash_seed[i]); | 3622 | sbi->s_hash_seed[i] = le32_to_cpu(es->s_hash_seed[i]); |
3595 | sbi->s_def_hash_version = es->s_def_hash_version; | 3623 | sbi->s_def_hash_version = es->s_def_hash_version; |
@@ -3659,6 +3687,10 @@ static int ext4_fill_super(struct super_block *sb, void *data, int silent) | |||
3659 | goto failed_mount; | 3687 | goto failed_mount; |
3660 | } | 3688 | } |
3661 | 3689 | ||
3690 | /* Do we have standard group size of clustersize * 8 blocks ? */ | ||
3691 | if (sbi->s_blocks_per_group == clustersize << 3) | ||
3692 | set_opt2(sb, STD_GROUP_SIZE); | ||
3693 | |||
3662 | /* | 3694 | /* |
3663 | * Test whether we have more sectors than will fit in sector_t, | 3695 | * Test whether we have more sectors than will fit in sector_t, |
3664 | * and whether the max offset is addressable by the page cache. | 3696 | * and whether the max offset is addressable by the page cache. |
@@ -3763,7 +3795,7 @@ static int ext4_fill_super(struct super_block *sb, void *data, int silent) | |||
3763 | sbi->s_err_report.data = (unsigned long) sb; | 3795 | sbi->s_err_report.data = (unsigned long) sb; |
3764 | 3796 | ||
3765 | /* Register extent status tree shrinker */ | 3797 | /* Register extent status tree shrinker */ |
3766 | ext4_es_register_shrinker(sb); | 3798 | ext4_es_register_shrinker(sbi); |
3767 | 3799 | ||
3768 | err = percpu_counter_init(&sbi->s_freeclusters_counter, | 3800 | err = percpu_counter_init(&sbi->s_freeclusters_counter, |
3769 | ext4_count_free_clusters(sb)); | 3801 | ext4_count_free_clusters(sb)); |
@@ -3787,7 +3819,6 @@ static int ext4_fill_super(struct super_block *sb, void *data, int silent) | |||
3787 | } | 3819 | } |
3788 | 3820 | ||
3789 | sbi->s_stripe = ext4_get_stripe_size(sbi); | 3821 | sbi->s_stripe = ext4_get_stripe_size(sbi); |
3790 | sbi->s_max_writeback_mb_bump = 128; | ||
3791 | sbi->s_extent_max_zeroout_kb = 32; | 3822 | sbi->s_extent_max_zeroout_kb = 32; |
3792 | 3823 | ||
3793 | /* | 3824 | /* |
@@ -3915,12 +3946,20 @@ no_journal: | |||
3915 | * The maximum number of concurrent works can be high and | 3946 | * The maximum number of concurrent works can be high and |
3916 | * concurrency isn't really necessary. Limit it to 1. | 3947 | * concurrency isn't really necessary. Limit it to 1. |
3917 | */ | 3948 | */ |
3918 | EXT4_SB(sb)->dio_unwritten_wq = | 3949 | EXT4_SB(sb)->rsv_conversion_wq = |
3919 | alloc_workqueue("ext4-dio-unwritten", WQ_MEM_RECLAIM | WQ_UNBOUND, 1); | 3950 | alloc_workqueue("ext4-rsv-conversion", WQ_MEM_RECLAIM | WQ_UNBOUND, 1); |
3920 | if (!EXT4_SB(sb)->dio_unwritten_wq) { | 3951 | if (!EXT4_SB(sb)->rsv_conversion_wq) { |
3921 | printk(KERN_ERR "EXT4-fs: failed to create DIO workqueue\n"); | 3952 | printk(KERN_ERR "EXT4-fs: failed to create workqueue\n"); |
3922 | ret = -ENOMEM; | 3953 | ret = -ENOMEM; |
3923 | goto failed_mount_wq; | 3954 | goto failed_mount4; |
3955 | } | ||
3956 | |||
3957 | EXT4_SB(sb)->unrsv_conversion_wq = | ||
3958 | alloc_workqueue("ext4-unrsv-conversion", WQ_MEM_RECLAIM | WQ_UNBOUND, 1); | ||
3959 | if (!EXT4_SB(sb)->unrsv_conversion_wq) { | ||
3960 | printk(KERN_ERR "EXT4-fs: failed to create workqueue\n"); | ||
3961 | ret = -ENOMEM; | ||
3962 | goto failed_mount4; | ||
3924 | } | 3963 | } |
3925 | 3964 | ||
3926 | /* | 3965 | /* |
@@ -4074,14 +4113,17 @@ failed_mount4a: | |||
4074 | sb->s_root = NULL; | 4113 | sb->s_root = NULL; |
4075 | failed_mount4: | 4114 | failed_mount4: |
4076 | ext4_msg(sb, KERN_ERR, "mount failed"); | 4115 | ext4_msg(sb, KERN_ERR, "mount failed"); |
4077 | destroy_workqueue(EXT4_SB(sb)->dio_unwritten_wq); | 4116 | if (EXT4_SB(sb)->rsv_conversion_wq) |
4117 | destroy_workqueue(EXT4_SB(sb)->rsv_conversion_wq); | ||
4118 | if (EXT4_SB(sb)->unrsv_conversion_wq) | ||
4119 | destroy_workqueue(EXT4_SB(sb)->unrsv_conversion_wq); | ||
4078 | failed_mount_wq: | 4120 | failed_mount_wq: |
4079 | if (sbi->s_journal) { | 4121 | if (sbi->s_journal) { |
4080 | jbd2_journal_destroy(sbi->s_journal); | 4122 | jbd2_journal_destroy(sbi->s_journal); |
4081 | sbi->s_journal = NULL; | 4123 | sbi->s_journal = NULL; |
4082 | } | 4124 | } |
4083 | failed_mount3: | 4125 | failed_mount3: |
4084 | ext4_es_unregister_shrinker(sb); | 4126 | ext4_es_unregister_shrinker(sbi); |
4085 | del_timer(&sbi->s_err_report); | 4127 | del_timer(&sbi->s_err_report); |
4086 | if (sbi->s_flex_groups) | 4128 | if (sbi->s_flex_groups) |
4087 | ext4_kvfree(sbi->s_flex_groups); | 4129 | ext4_kvfree(sbi->s_flex_groups); |
@@ -4517,19 +4559,52 @@ static int ext4_sync_fs(struct super_block *sb, int wait) | |||
4517 | { | 4559 | { |
4518 | int ret = 0; | 4560 | int ret = 0; |
4519 | tid_t target; | 4561 | tid_t target; |
4562 | bool needs_barrier = false; | ||
4520 | struct ext4_sb_info *sbi = EXT4_SB(sb); | 4563 | struct ext4_sb_info *sbi = EXT4_SB(sb); |
4521 | 4564 | ||
4522 | trace_ext4_sync_fs(sb, wait); | 4565 | trace_ext4_sync_fs(sb, wait); |
4523 | flush_workqueue(sbi->dio_unwritten_wq); | 4566 | flush_workqueue(sbi->rsv_conversion_wq); |
4567 | flush_workqueue(sbi->unrsv_conversion_wq); | ||
4524 | /* | 4568 | /* |
4525 | * Writeback quota in non-journalled quota case - journalled quota has | 4569 | * Writeback quota in non-journalled quota case - journalled quota has |
4526 | * no dirty dquots | 4570 | * no dirty dquots |
4527 | */ | 4571 | */ |
4528 | dquot_writeback_dquots(sb, -1); | 4572 | dquot_writeback_dquots(sb, -1); |
4573 | /* | ||
4574 | * Data writeback is possible w/o journal transaction, so barrier must | ||
4575 | * being sent at the end of the function. But we can skip it if | ||
4576 | * transaction_commit will do it for us. | ||
4577 | */ | ||
4578 | target = jbd2_get_latest_transaction(sbi->s_journal); | ||
4579 | if (wait && sbi->s_journal->j_flags & JBD2_BARRIER && | ||
4580 | !jbd2_trans_will_send_data_barrier(sbi->s_journal, target)) | ||
4581 | needs_barrier = true; | ||
4582 | |||
4529 | if (jbd2_journal_start_commit(sbi->s_journal, &target)) { | 4583 | if (jbd2_journal_start_commit(sbi->s_journal, &target)) { |
4530 | if (wait) | 4584 | if (wait) |
4531 | jbd2_log_wait_commit(sbi->s_journal, target); | 4585 | ret = jbd2_log_wait_commit(sbi->s_journal, target); |
4532 | } | 4586 | } |
4587 | if (needs_barrier) { | ||
4588 | int err; | ||
4589 | err = blkdev_issue_flush(sb->s_bdev, GFP_KERNEL, NULL); | ||
4590 | if (!ret) | ||
4591 | ret = err; | ||
4592 | } | ||
4593 | |||
4594 | return ret; | ||
4595 | } | ||
4596 | |||
4597 | static int ext4_sync_fs_nojournal(struct super_block *sb, int wait) | ||
4598 | { | ||
4599 | int ret = 0; | ||
4600 | |||
4601 | trace_ext4_sync_fs(sb, wait); | ||
4602 | flush_workqueue(EXT4_SB(sb)->rsv_conversion_wq); | ||
4603 | flush_workqueue(EXT4_SB(sb)->unrsv_conversion_wq); | ||
4604 | dquot_writeback_dquots(sb, -1); | ||
4605 | if (wait && test_opt(sb, BARRIER)) | ||
4606 | ret = blkdev_issue_flush(sb->s_bdev, GFP_KERNEL, NULL); | ||
4607 | |||
4533 | return ret; | 4608 | return ret; |
4534 | } | 4609 | } |
4535 | 4610 | ||
@@ -4652,6 +4727,21 @@ static int ext4_remount(struct super_block *sb, int *flags, char *data) | |||
4652 | goto restore_opts; | 4727 | goto restore_opts; |
4653 | } | 4728 | } |
4654 | 4729 | ||
4730 | if (test_opt(sb, DATA_FLAGS) == EXT4_MOUNT_JOURNAL_DATA) { | ||
4731 | if (test_opt2(sb, EXPLICIT_DELALLOC)) { | ||
4732 | ext4_msg(sb, KERN_ERR, "can't mount with " | ||
4733 | "both data=journal and delalloc"); | ||
4734 | err = -EINVAL; | ||
4735 | goto restore_opts; | ||
4736 | } | ||
4737 | if (test_opt(sb, DIOREAD_NOLOCK)) { | ||
4738 | ext4_msg(sb, KERN_ERR, "can't mount with " | ||
4739 | "both data=journal and dioread_nolock"); | ||
4740 | err = -EINVAL; | ||
4741 | goto restore_opts; | ||
4742 | } | ||
4743 | } | ||
4744 | |||
4655 | if (sbi->s_mount_flags & EXT4_MF_FS_ABORTED) | 4745 | if (sbi->s_mount_flags & EXT4_MF_FS_ABORTED) |
4656 | ext4_abort(sb, "Abort forced by user"); | 4746 | ext4_abort(sb, "Abort forced by user"); |
4657 | 4747 | ||
@@ -5406,6 +5496,7 @@ static void __exit ext4_exit_fs(void) | |||
5406 | kset_unregister(ext4_kset); | 5496 | kset_unregister(ext4_kset); |
5407 | ext4_exit_system_zone(); | 5497 | ext4_exit_system_zone(); |
5408 | ext4_exit_pageio(); | 5498 | ext4_exit_pageio(); |
5499 | ext4_exit_es(); | ||
5409 | } | 5500 | } |
5410 | 5501 | ||
5411 | MODULE_AUTHOR("Remy Card, Stephen Tweedie, Andrew Morton, Andreas Dilger, Theodore Ts'o and others"); | 5502 | MODULE_AUTHOR("Remy Card, Stephen Tweedie, Andrew Morton, Andreas Dilger, Theodore Ts'o and others"); |
diff --git a/fs/f2fs/Kconfig b/fs/f2fs/Kconfig index fd27e7e6326e..e06e0995e00f 100644 --- a/fs/f2fs/Kconfig +++ b/fs/f2fs/Kconfig | |||
@@ -51,3 +51,15 @@ config F2FS_FS_POSIX_ACL | |||
51 | Linux website <http://acl.bestbits.at/>. | 51 | Linux website <http://acl.bestbits.at/>. |
52 | 52 | ||
53 | If you don't know what Access Control Lists are, say N | 53 | If you don't know what Access Control Lists are, say N |
54 | |||
55 | config F2FS_FS_SECURITY | ||
56 | bool "F2FS Security Labels" | ||
57 | depends on F2FS_FS_XATTR | ||
58 | help | ||
59 | Security labels provide an access control facility to support Linux | ||
60 | Security Models (LSMs) accepted by AppArmor, SELinux, Smack and TOMOYO | ||
61 | Linux. This option enables an extended attribute handler for file | ||
62 | security labels in the f2fs filesystem, so that it requires enabling | ||
63 | the extended attribute support in advance. | ||
64 | |||
65 | If you are not using a security module, say N. | ||
diff --git a/fs/f2fs/acl.c b/fs/f2fs/acl.c index 44abc2f286e0..b7826ec1b470 100644 --- a/fs/f2fs/acl.c +++ b/fs/f2fs/acl.c | |||
@@ -250,7 +250,7 @@ static int f2fs_set_acl(struct inode *inode, int type, struct posix_acl *acl) | |||
250 | } | 250 | } |
251 | } | 251 | } |
252 | 252 | ||
253 | error = f2fs_setxattr(inode, name_index, "", value, size); | 253 | error = f2fs_setxattr(inode, name_index, "", value, size, NULL); |
254 | 254 | ||
255 | kfree(value); | 255 | kfree(value); |
256 | if (!error) | 256 | if (!error) |
diff --git a/fs/f2fs/checkpoint.c b/fs/f2fs/checkpoint.c index b1de01da1a40..66a6b85a51d8 100644 --- a/fs/f2fs/checkpoint.c +++ b/fs/f2fs/checkpoint.c | |||
@@ -357,8 +357,8 @@ static struct page *validate_checkpoint(struct f2fs_sb_info *sbi, | |||
357 | unsigned long blk_size = sbi->blocksize; | 357 | unsigned long blk_size = sbi->blocksize; |
358 | struct f2fs_checkpoint *cp_block; | 358 | struct f2fs_checkpoint *cp_block; |
359 | unsigned long long cur_version = 0, pre_version = 0; | 359 | unsigned long long cur_version = 0, pre_version = 0; |
360 | unsigned int crc = 0; | ||
361 | size_t crc_offset; | 360 | size_t crc_offset; |
361 | __u32 crc = 0; | ||
362 | 362 | ||
363 | /* Read the 1st cp block in this CP pack */ | 363 | /* Read the 1st cp block in this CP pack */ |
364 | cp_page_1 = get_meta_page(sbi, cp_addr); | 364 | cp_page_1 = get_meta_page(sbi, cp_addr); |
@@ -369,7 +369,7 @@ static struct page *validate_checkpoint(struct f2fs_sb_info *sbi, | |||
369 | if (crc_offset >= blk_size) | 369 | if (crc_offset >= blk_size) |
370 | goto invalid_cp1; | 370 | goto invalid_cp1; |
371 | 371 | ||
372 | crc = *(unsigned int *)((unsigned char *)cp_block + crc_offset); | 372 | crc = le32_to_cpu(*((__u32 *)((unsigned char *)cp_block + crc_offset))); |
373 | if (!f2fs_crc_valid(crc, cp_block, crc_offset)) | 373 | if (!f2fs_crc_valid(crc, cp_block, crc_offset)) |
374 | goto invalid_cp1; | 374 | goto invalid_cp1; |
375 | 375 | ||
@@ -384,7 +384,7 @@ static struct page *validate_checkpoint(struct f2fs_sb_info *sbi, | |||
384 | if (crc_offset >= blk_size) | 384 | if (crc_offset >= blk_size) |
385 | goto invalid_cp2; | 385 | goto invalid_cp2; |
386 | 386 | ||
387 | crc = *(unsigned int *)((unsigned char *)cp_block + crc_offset); | 387 | crc = le32_to_cpu(*((__u32 *)((unsigned char *)cp_block + crc_offset))); |
388 | if (!f2fs_crc_valid(crc, cp_block, crc_offset)) | 388 | if (!f2fs_crc_valid(crc, cp_block, crc_offset)) |
389 | goto invalid_cp2; | 389 | goto invalid_cp2; |
390 | 390 | ||
@@ -450,13 +450,30 @@ fail_no_cp: | |||
450 | return -EINVAL; | 450 | return -EINVAL; |
451 | } | 451 | } |
452 | 452 | ||
453 | void set_dirty_dir_page(struct inode *inode, struct page *page) | 453 | static int __add_dirty_inode(struct inode *inode, struct dir_inode_entry *new) |
454 | { | 454 | { |
455 | struct f2fs_sb_info *sbi = F2FS_SB(inode->i_sb); | 455 | struct f2fs_sb_info *sbi = F2FS_SB(inode->i_sb); |
456 | struct list_head *head = &sbi->dir_inode_list; | 456 | struct list_head *head = &sbi->dir_inode_list; |
457 | struct dir_inode_entry *new; | ||
458 | struct list_head *this; | 457 | struct list_head *this; |
459 | 458 | ||
459 | list_for_each(this, head) { | ||
460 | struct dir_inode_entry *entry; | ||
461 | entry = list_entry(this, struct dir_inode_entry, list); | ||
462 | if (entry->inode == inode) | ||
463 | return -EEXIST; | ||
464 | } | ||
465 | list_add_tail(&new->list, head); | ||
466 | #ifdef CONFIG_F2FS_STAT_FS | ||
467 | sbi->n_dirty_dirs++; | ||
468 | #endif | ||
469 | return 0; | ||
470 | } | ||
471 | |||
472 | void set_dirty_dir_page(struct inode *inode, struct page *page) | ||
473 | { | ||
474 | struct f2fs_sb_info *sbi = F2FS_SB(inode->i_sb); | ||
475 | struct dir_inode_entry *new; | ||
476 | |||
460 | if (!S_ISDIR(inode->i_mode)) | 477 | if (!S_ISDIR(inode->i_mode)) |
461 | return; | 478 | return; |
462 | retry: | 479 | retry: |
@@ -469,23 +486,31 @@ retry: | |||
469 | INIT_LIST_HEAD(&new->list); | 486 | INIT_LIST_HEAD(&new->list); |
470 | 487 | ||
471 | spin_lock(&sbi->dir_inode_lock); | 488 | spin_lock(&sbi->dir_inode_lock); |
472 | list_for_each(this, head) { | 489 | if (__add_dirty_inode(inode, new)) |
473 | struct dir_inode_entry *entry; | 490 | kmem_cache_free(inode_entry_slab, new); |
474 | entry = list_entry(this, struct dir_inode_entry, list); | ||
475 | if (entry->inode == inode) { | ||
476 | kmem_cache_free(inode_entry_slab, new); | ||
477 | goto out; | ||
478 | } | ||
479 | } | ||
480 | list_add_tail(&new->list, head); | ||
481 | sbi->n_dirty_dirs++; | ||
482 | 491 | ||
483 | BUG_ON(!S_ISDIR(inode->i_mode)); | ||
484 | out: | ||
485 | inc_page_count(sbi, F2FS_DIRTY_DENTS); | 492 | inc_page_count(sbi, F2FS_DIRTY_DENTS); |
486 | inode_inc_dirty_dents(inode); | 493 | inode_inc_dirty_dents(inode); |
487 | SetPagePrivate(page); | 494 | SetPagePrivate(page); |
495 | spin_unlock(&sbi->dir_inode_lock); | ||
496 | } | ||
488 | 497 | ||
498 | void add_dirty_dir_inode(struct inode *inode) | ||
499 | { | ||
500 | struct f2fs_sb_info *sbi = F2FS_SB(inode->i_sb); | ||
501 | struct dir_inode_entry *new; | ||
502 | retry: | ||
503 | new = kmem_cache_alloc(inode_entry_slab, GFP_NOFS); | ||
504 | if (!new) { | ||
505 | cond_resched(); | ||
506 | goto retry; | ||
507 | } | ||
508 | new->inode = inode; | ||
509 | INIT_LIST_HEAD(&new->list); | ||
510 | |||
511 | spin_lock(&sbi->dir_inode_lock); | ||
512 | if (__add_dirty_inode(inode, new)) | ||
513 | kmem_cache_free(inode_entry_slab, new); | ||
489 | spin_unlock(&sbi->dir_inode_lock); | 514 | spin_unlock(&sbi->dir_inode_lock); |
490 | } | 515 | } |
491 | 516 | ||
@@ -499,8 +524,10 @@ void remove_dirty_dir_inode(struct inode *inode) | |||
499 | return; | 524 | return; |
500 | 525 | ||
501 | spin_lock(&sbi->dir_inode_lock); | 526 | spin_lock(&sbi->dir_inode_lock); |
502 | if (atomic_read(&F2FS_I(inode)->dirty_dents)) | 527 | if (atomic_read(&F2FS_I(inode)->dirty_dents)) { |
503 | goto out; | 528 | spin_unlock(&sbi->dir_inode_lock); |
529 | return; | ||
530 | } | ||
504 | 531 | ||
505 | list_for_each(this, head) { | 532 | list_for_each(this, head) { |
506 | struct dir_inode_entry *entry; | 533 | struct dir_inode_entry *entry; |
@@ -508,12 +535,38 @@ void remove_dirty_dir_inode(struct inode *inode) | |||
508 | if (entry->inode == inode) { | 535 | if (entry->inode == inode) { |
509 | list_del(&entry->list); | 536 | list_del(&entry->list); |
510 | kmem_cache_free(inode_entry_slab, entry); | 537 | kmem_cache_free(inode_entry_slab, entry); |
538 | #ifdef CONFIG_F2FS_STAT_FS | ||
511 | sbi->n_dirty_dirs--; | 539 | sbi->n_dirty_dirs--; |
540 | #endif | ||
541 | break; | ||
542 | } | ||
543 | } | ||
544 | spin_unlock(&sbi->dir_inode_lock); | ||
545 | |||
546 | /* Only from the recovery routine */ | ||
547 | if (is_inode_flag_set(F2FS_I(inode), FI_DELAY_IPUT)) { | ||
548 | clear_inode_flag(F2FS_I(inode), FI_DELAY_IPUT); | ||
549 | iput(inode); | ||
550 | } | ||
551 | } | ||
552 | |||
553 | struct inode *check_dirty_dir_inode(struct f2fs_sb_info *sbi, nid_t ino) | ||
554 | { | ||
555 | struct list_head *head = &sbi->dir_inode_list; | ||
556 | struct list_head *this; | ||
557 | struct inode *inode = NULL; | ||
558 | |||
559 | spin_lock(&sbi->dir_inode_lock); | ||
560 | list_for_each(this, head) { | ||
561 | struct dir_inode_entry *entry; | ||
562 | entry = list_entry(this, struct dir_inode_entry, list); | ||
563 | if (entry->inode->i_ino == ino) { | ||
564 | inode = entry->inode; | ||
512 | break; | 565 | break; |
513 | } | 566 | } |
514 | } | 567 | } |
515 | out: | ||
516 | spin_unlock(&sbi->dir_inode_lock); | 568 | spin_unlock(&sbi->dir_inode_lock); |
569 | return inode; | ||
517 | } | 570 | } |
518 | 571 | ||
519 | void sync_dirty_dir_inodes(struct f2fs_sb_info *sbi) | 572 | void sync_dirty_dir_inodes(struct f2fs_sb_info *sbi) |
@@ -595,7 +648,7 @@ static void do_checkpoint(struct f2fs_sb_info *sbi, bool is_umount) | |||
595 | block_t start_blk; | 648 | block_t start_blk; |
596 | struct page *cp_page; | 649 | struct page *cp_page; |
597 | unsigned int data_sum_blocks, orphan_blocks; | 650 | unsigned int data_sum_blocks, orphan_blocks; |
598 | unsigned int crc32 = 0; | 651 | __u32 crc32 = 0; |
599 | void *kaddr; | 652 | void *kaddr; |
600 | int i; | 653 | int i; |
601 | 654 | ||
@@ -664,8 +717,8 @@ static void do_checkpoint(struct f2fs_sb_info *sbi, bool is_umount) | |||
664 | get_nat_bitmap(sbi, __bitmap_ptr(sbi, NAT_BITMAP)); | 717 | get_nat_bitmap(sbi, __bitmap_ptr(sbi, NAT_BITMAP)); |
665 | 718 | ||
666 | crc32 = f2fs_crc32(ckpt, le32_to_cpu(ckpt->checksum_offset)); | 719 | crc32 = f2fs_crc32(ckpt, le32_to_cpu(ckpt->checksum_offset)); |
667 | *(__le32 *)((unsigned char *)ckpt + | 720 | *((__le32 *)((unsigned char *)ckpt + |
668 | le32_to_cpu(ckpt->checksum_offset)) | 721 | le32_to_cpu(ckpt->checksum_offset))) |
669 | = cpu_to_le32(crc32); | 722 | = cpu_to_le32(crc32); |
670 | 723 | ||
671 | start_blk = __start_cp_addr(sbi); | 724 | start_blk = __start_cp_addr(sbi); |
diff --git a/fs/f2fs/data.c b/fs/f2fs/data.c index 91ff93b0b0f4..035f9a345cdf 100644 --- a/fs/f2fs/data.c +++ b/fs/f2fs/data.c | |||
@@ -68,7 +68,9 @@ static int check_extent_cache(struct inode *inode, pgoff_t pgofs, | |||
68 | struct buffer_head *bh_result) | 68 | struct buffer_head *bh_result) |
69 | { | 69 | { |
70 | struct f2fs_inode_info *fi = F2FS_I(inode); | 70 | struct f2fs_inode_info *fi = F2FS_I(inode); |
71 | #ifdef CONFIG_F2FS_STAT_FS | ||
71 | struct f2fs_sb_info *sbi = F2FS_SB(inode->i_sb); | 72 | struct f2fs_sb_info *sbi = F2FS_SB(inode->i_sb); |
73 | #endif | ||
72 | pgoff_t start_fofs, end_fofs; | 74 | pgoff_t start_fofs, end_fofs; |
73 | block_t start_blkaddr; | 75 | block_t start_blkaddr; |
74 | 76 | ||
@@ -78,7 +80,9 @@ static int check_extent_cache(struct inode *inode, pgoff_t pgofs, | |||
78 | return 0; | 80 | return 0; |
79 | } | 81 | } |
80 | 82 | ||
83 | #ifdef CONFIG_F2FS_STAT_FS | ||
81 | sbi->total_hit_ext++; | 84 | sbi->total_hit_ext++; |
85 | #endif | ||
82 | start_fofs = fi->ext.fofs; | 86 | start_fofs = fi->ext.fofs; |
83 | end_fofs = fi->ext.fofs + fi->ext.len - 1; | 87 | end_fofs = fi->ext.fofs + fi->ext.len - 1; |
84 | start_blkaddr = fi->ext.blk_addr; | 88 | start_blkaddr = fi->ext.blk_addr; |
@@ -96,7 +100,9 @@ static int check_extent_cache(struct inode *inode, pgoff_t pgofs, | |||
96 | else | 100 | else |
97 | bh_result->b_size = UINT_MAX; | 101 | bh_result->b_size = UINT_MAX; |
98 | 102 | ||
103 | #ifdef CONFIG_F2FS_STAT_FS | ||
99 | sbi->read_hit_ext++; | 104 | sbi->read_hit_ext++; |
105 | #endif | ||
100 | read_unlock(&fi->ext.ext_lock); | 106 | read_unlock(&fi->ext.ext_lock); |
101 | return 1; | 107 | return 1; |
102 | } | 108 | } |
@@ -199,7 +205,7 @@ struct page *find_data_page(struct inode *inode, pgoff_t index, bool sync) | |||
199 | if (dn.data_blkaddr == NEW_ADDR) | 205 | if (dn.data_blkaddr == NEW_ADDR) |
200 | return ERR_PTR(-EINVAL); | 206 | return ERR_PTR(-EINVAL); |
201 | 207 | ||
202 | page = grab_cache_page(mapping, index); | 208 | page = grab_cache_page_write_begin(mapping, index, AOP_FLAG_NOFS); |
203 | if (!page) | 209 | if (!page) |
204 | return ERR_PTR(-ENOMEM); | 210 | return ERR_PTR(-ENOMEM); |
205 | 211 | ||
@@ -233,18 +239,23 @@ struct page *get_lock_data_page(struct inode *inode, pgoff_t index) | |||
233 | struct page *page; | 239 | struct page *page; |
234 | int err; | 240 | int err; |
235 | 241 | ||
242 | repeat: | ||
243 | page = grab_cache_page_write_begin(mapping, index, AOP_FLAG_NOFS); | ||
244 | if (!page) | ||
245 | return ERR_PTR(-ENOMEM); | ||
246 | |||
236 | set_new_dnode(&dn, inode, NULL, NULL, 0); | 247 | set_new_dnode(&dn, inode, NULL, NULL, 0); |
237 | err = get_dnode_of_data(&dn, index, LOOKUP_NODE); | 248 | err = get_dnode_of_data(&dn, index, LOOKUP_NODE); |
238 | if (err) | 249 | if (err) { |
250 | f2fs_put_page(page, 1); | ||
239 | return ERR_PTR(err); | 251 | return ERR_PTR(err); |
252 | } | ||
240 | f2fs_put_dnode(&dn); | 253 | f2fs_put_dnode(&dn); |
241 | 254 | ||
242 | if (dn.data_blkaddr == NULL_ADDR) | 255 | if (dn.data_blkaddr == NULL_ADDR) { |
256 | f2fs_put_page(page, 1); | ||
243 | return ERR_PTR(-ENOENT); | 257 | return ERR_PTR(-ENOENT); |
244 | repeat: | 258 | } |
245 | page = grab_cache_page(mapping, index); | ||
246 | if (!page) | ||
247 | return ERR_PTR(-ENOMEM); | ||
248 | 259 | ||
249 | if (PageUptodate(page)) | 260 | if (PageUptodate(page)) |
250 | return page; | 261 | return page; |
@@ -274,9 +285,10 @@ repeat: | |||
274 | * | 285 | * |
275 | * Also, caller should grab and release a mutex by calling mutex_lock_op() and | 286 | * Also, caller should grab and release a mutex by calling mutex_lock_op() and |
276 | * mutex_unlock_op(). | 287 | * mutex_unlock_op(). |
288 | * Note that, npage is set only by make_empty_dir. | ||
277 | */ | 289 | */ |
278 | struct page *get_new_data_page(struct inode *inode, pgoff_t index, | 290 | struct page *get_new_data_page(struct inode *inode, |
279 | bool new_i_size) | 291 | struct page *npage, pgoff_t index, bool new_i_size) |
280 | { | 292 | { |
281 | struct f2fs_sb_info *sbi = F2FS_SB(inode->i_sb); | 293 | struct f2fs_sb_info *sbi = F2FS_SB(inode->i_sb); |
282 | struct address_space *mapping = inode->i_mapping; | 294 | struct address_space *mapping = inode->i_mapping; |
@@ -284,18 +296,20 @@ struct page *get_new_data_page(struct inode *inode, pgoff_t index, | |||
284 | struct dnode_of_data dn; | 296 | struct dnode_of_data dn; |
285 | int err; | 297 | int err; |
286 | 298 | ||
287 | set_new_dnode(&dn, inode, NULL, NULL, 0); | 299 | set_new_dnode(&dn, inode, npage, npage, 0); |
288 | err = get_dnode_of_data(&dn, index, ALLOC_NODE); | 300 | err = get_dnode_of_data(&dn, index, ALLOC_NODE); |
289 | if (err) | 301 | if (err) |
290 | return ERR_PTR(err); | 302 | return ERR_PTR(err); |
291 | 303 | ||
292 | if (dn.data_blkaddr == NULL_ADDR) { | 304 | if (dn.data_blkaddr == NULL_ADDR) { |
293 | if (reserve_new_block(&dn)) { | 305 | if (reserve_new_block(&dn)) { |
294 | f2fs_put_dnode(&dn); | 306 | if (!npage) |
307 | f2fs_put_dnode(&dn); | ||
295 | return ERR_PTR(-ENOSPC); | 308 | return ERR_PTR(-ENOSPC); |
296 | } | 309 | } |
297 | } | 310 | } |
298 | f2fs_put_dnode(&dn); | 311 | if (!npage) |
312 | f2fs_put_dnode(&dn); | ||
299 | repeat: | 313 | repeat: |
300 | page = grab_cache_page(mapping, index); | 314 | page = grab_cache_page(mapping, index); |
301 | if (!page) | 315 | if (!page) |
@@ -325,6 +339,8 @@ repeat: | |||
325 | if (new_i_size && | 339 | if (new_i_size && |
326 | i_size_read(inode) < ((index + 1) << PAGE_CACHE_SHIFT)) { | 340 | i_size_read(inode) < ((index + 1) << PAGE_CACHE_SHIFT)) { |
327 | i_size_write(inode, ((index + 1) << PAGE_CACHE_SHIFT)); | 341 | i_size_write(inode, ((index + 1) << PAGE_CACHE_SHIFT)); |
342 | /* Only the directory inode sets new_i_size */ | ||
343 | set_inode_flag(F2FS_I(inode), FI_UPDATE_DIR); | ||
328 | mark_inode_dirty_sync(inode); | 344 | mark_inode_dirty_sync(inode); |
329 | } | 345 | } |
330 | return page; | 346 | return page; |
@@ -481,8 +497,9 @@ int do_write_data_page(struct page *page) | |||
481 | * If current allocation needs SSR, | 497 | * If current allocation needs SSR, |
482 | * it had better in-place writes for updated data. | 498 | * it had better in-place writes for updated data. |
483 | */ | 499 | */ |
484 | if (old_blk_addr != NEW_ADDR && !is_cold_data(page) && | 500 | if (unlikely(old_blk_addr != NEW_ADDR && |
485 | need_inplace_update(inode)) { | 501 | !is_cold_data(page) && |
502 | need_inplace_update(inode))) { | ||
486 | rewrite_data_page(F2FS_SB(inode->i_sb), page, | 503 | rewrite_data_page(F2FS_SB(inode->i_sb), page, |
487 | old_blk_addr); | 504 | old_blk_addr); |
488 | } else { | 505 | } else { |
@@ -684,6 +701,27 @@ err: | |||
684 | return err; | 701 | return err; |
685 | } | 702 | } |
686 | 703 | ||
704 | static int f2fs_write_end(struct file *file, | ||
705 | struct address_space *mapping, | ||
706 | loff_t pos, unsigned len, unsigned copied, | ||
707 | struct page *page, void *fsdata) | ||
708 | { | ||
709 | struct inode *inode = page->mapping->host; | ||
710 | |||
711 | SetPageUptodate(page); | ||
712 | set_page_dirty(page); | ||
713 | |||
714 | if (pos + copied > i_size_read(inode)) { | ||
715 | i_size_write(inode, pos + copied); | ||
716 | mark_inode_dirty(inode); | ||
717 | update_inode_page(inode); | ||
718 | } | ||
719 | |||
720 | unlock_page(page); | ||
721 | page_cache_release(page); | ||
722 | return copied; | ||
723 | } | ||
724 | |||
687 | static ssize_t f2fs_direct_IO(int rw, struct kiocb *iocb, | 725 | static ssize_t f2fs_direct_IO(int rw, struct kiocb *iocb, |
688 | const struct iovec *iov, loff_t offset, unsigned long nr_segs) | 726 | const struct iovec *iov, loff_t offset, unsigned long nr_segs) |
689 | { | 727 | { |
@@ -698,7 +736,8 @@ static ssize_t f2fs_direct_IO(int rw, struct kiocb *iocb, | |||
698 | get_data_block_ro); | 736 | get_data_block_ro); |
699 | } | 737 | } |
700 | 738 | ||
701 | static void f2fs_invalidate_data_page(struct page *page, unsigned long offset) | 739 | static void f2fs_invalidate_data_page(struct page *page, unsigned int offset, |
740 | unsigned int length) | ||
702 | { | 741 | { |
703 | struct inode *inode = page->mapping->host; | 742 | struct inode *inode = page->mapping->host; |
704 | struct f2fs_sb_info *sbi = F2FS_SB(inode->i_sb); | 743 | struct f2fs_sb_info *sbi = F2FS_SB(inode->i_sb); |
@@ -740,7 +779,7 @@ const struct address_space_operations f2fs_dblock_aops = { | |||
740 | .writepage = f2fs_write_data_page, | 779 | .writepage = f2fs_write_data_page, |
741 | .writepages = f2fs_write_data_pages, | 780 | .writepages = f2fs_write_data_pages, |
742 | .write_begin = f2fs_write_begin, | 781 | .write_begin = f2fs_write_begin, |
743 | .write_end = nobh_write_end, | 782 | .write_end = f2fs_write_end, |
744 | .set_page_dirty = f2fs_set_data_page_dirty, | 783 | .set_page_dirty = f2fs_set_data_page_dirty, |
745 | .invalidatepage = f2fs_invalidate_data_page, | 784 | .invalidatepage = f2fs_invalidate_data_page, |
746 | .releasepage = f2fs_release_data_page, | 785 | .releasepage = f2fs_release_data_page, |
diff --git a/fs/f2fs/debug.c b/fs/f2fs/debug.c index 8d9943786c31..0d6c6aafb235 100644 --- a/fs/f2fs/debug.c +++ b/fs/f2fs/debug.c | |||
@@ -175,12 +175,12 @@ get_cache: | |||
175 | 175 | ||
176 | static int stat_show(struct seq_file *s, void *v) | 176 | static int stat_show(struct seq_file *s, void *v) |
177 | { | 177 | { |
178 | struct f2fs_stat_info *si, *next; | 178 | struct f2fs_stat_info *si; |
179 | int i = 0; | 179 | int i = 0; |
180 | int j; | 180 | int j; |
181 | 181 | ||
182 | mutex_lock(&f2fs_stat_mutex); | 182 | mutex_lock(&f2fs_stat_mutex); |
183 | list_for_each_entry_safe(si, next, &f2fs_stat_list, stat_list) { | 183 | list_for_each_entry(si, &f2fs_stat_list, stat_list) { |
184 | char devname[BDEVNAME_SIZE]; | 184 | char devname[BDEVNAME_SIZE]; |
185 | 185 | ||
186 | update_general_status(si->sbi); | 186 | update_general_status(si->sbi); |
diff --git a/fs/f2fs/dir.c b/fs/f2fs/dir.c index 1ac6b93036b7..62f0d5977c64 100644 --- a/fs/f2fs/dir.c +++ b/fs/f2fs/dir.c | |||
@@ -13,6 +13,7 @@ | |||
13 | #include "f2fs.h" | 13 | #include "f2fs.h" |
14 | #include "node.h" | 14 | #include "node.h" |
15 | #include "acl.h" | 15 | #include "acl.h" |
16 | #include "xattr.h" | ||
16 | 17 | ||
17 | static unsigned long dir_blocks(struct inode *inode) | 18 | static unsigned long dir_blocks(struct inode *inode) |
18 | { | 19 | { |
@@ -215,9 +216,9 @@ struct f2fs_dir_entry *f2fs_find_entry(struct inode *dir, | |||
215 | 216 | ||
216 | struct f2fs_dir_entry *f2fs_parent_dir(struct inode *dir, struct page **p) | 217 | struct f2fs_dir_entry *f2fs_parent_dir(struct inode *dir, struct page **p) |
217 | { | 218 | { |
218 | struct page *page = NULL; | 219 | struct page *page; |
219 | struct f2fs_dir_entry *de = NULL; | 220 | struct f2fs_dir_entry *de; |
220 | struct f2fs_dentry_block *dentry_blk = NULL; | 221 | struct f2fs_dentry_block *dentry_blk; |
221 | 222 | ||
222 | page = get_lock_data_page(dir, 0); | 223 | page = get_lock_data_page(dir, 0); |
223 | if (IS_ERR(page)) | 224 | if (IS_ERR(page)) |
@@ -264,15 +265,10 @@ void f2fs_set_link(struct inode *dir, struct f2fs_dir_entry *de, | |||
264 | f2fs_put_page(page, 1); | 265 | f2fs_put_page(page, 1); |
265 | } | 266 | } |
266 | 267 | ||
267 | void init_dent_inode(const struct qstr *name, struct page *ipage) | 268 | static void init_dent_inode(const struct qstr *name, struct page *ipage) |
268 | { | 269 | { |
269 | struct f2fs_node *rn; | 270 | struct f2fs_node *rn; |
270 | 271 | ||
271 | if (IS_ERR(ipage)) | ||
272 | return; | ||
273 | |||
274 | wait_on_page_writeback(ipage); | ||
275 | |||
276 | /* copy name info. to this inode page */ | 272 | /* copy name info. to this inode page */ |
277 | rn = (struct f2fs_node *)page_address(ipage); | 273 | rn = (struct f2fs_node *)page_address(ipage); |
278 | rn->i.i_namelen = cpu_to_le32(name->len); | 274 | rn->i.i_namelen = cpu_to_le32(name->len); |
@@ -280,14 +276,15 @@ void init_dent_inode(const struct qstr *name, struct page *ipage) | |||
280 | set_page_dirty(ipage); | 276 | set_page_dirty(ipage); |
281 | } | 277 | } |
282 | 278 | ||
283 | static int make_empty_dir(struct inode *inode, struct inode *parent) | 279 | static int make_empty_dir(struct inode *inode, |
280 | struct inode *parent, struct page *page) | ||
284 | { | 281 | { |
285 | struct page *dentry_page; | 282 | struct page *dentry_page; |
286 | struct f2fs_dentry_block *dentry_blk; | 283 | struct f2fs_dentry_block *dentry_blk; |
287 | struct f2fs_dir_entry *de; | 284 | struct f2fs_dir_entry *de; |
288 | void *kaddr; | 285 | void *kaddr; |
289 | 286 | ||
290 | dentry_page = get_new_data_page(inode, 0, true); | 287 | dentry_page = get_new_data_page(inode, page, 0, true); |
291 | if (IS_ERR(dentry_page)) | 288 | if (IS_ERR(dentry_page)) |
292 | return PTR_ERR(dentry_page); | 289 | return PTR_ERR(dentry_page); |
293 | 290 | ||
@@ -317,63 +314,76 @@ static int make_empty_dir(struct inode *inode, struct inode *parent) | |||
317 | return 0; | 314 | return 0; |
318 | } | 315 | } |
319 | 316 | ||
320 | static int init_inode_metadata(struct inode *inode, | 317 | static struct page *init_inode_metadata(struct inode *inode, |
321 | struct inode *dir, const struct qstr *name) | 318 | struct inode *dir, const struct qstr *name) |
322 | { | 319 | { |
320 | struct page *page; | ||
321 | int err; | ||
322 | |||
323 | if (is_inode_flag_set(F2FS_I(inode), FI_NEW_INODE)) { | 323 | if (is_inode_flag_set(F2FS_I(inode), FI_NEW_INODE)) { |
324 | int err; | 324 | page = new_inode_page(inode, name); |
325 | err = new_inode_page(inode, name); | 325 | if (IS_ERR(page)) |
326 | if (err) | 326 | return page; |
327 | return err; | ||
328 | 327 | ||
329 | if (S_ISDIR(inode->i_mode)) { | 328 | if (S_ISDIR(inode->i_mode)) { |
330 | err = make_empty_dir(inode, dir); | 329 | err = make_empty_dir(inode, dir, page); |
331 | if (err) { | 330 | if (err) |
332 | remove_inode_page(inode); | 331 | goto error; |
333 | return err; | ||
334 | } | ||
335 | } | 332 | } |
336 | 333 | ||
337 | err = f2fs_init_acl(inode, dir); | 334 | err = f2fs_init_acl(inode, dir); |
338 | if (err) { | 335 | if (err) |
339 | remove_inode_page(inode); | 336 | goto error; |
340 | return err; | 337 | |
341 | } | 338 | err = f2fs_init_security(inode, dir, name, page); |
339 | if (err) | ||
340 | goto error; | ||
341 | |||
342 | wait_on_page_writeback(page); | ||
342 | } else { | 343 | } else { |
343 | struct page *ipage; | 344 | page = get_node_page(F2FS_SB(dir->i_sb), inode->i_ino); |
344 | ipage = get_node_page(F2FS_SB(dir->i_sb), inode->i_ino); | 345 | if (IS_ERR(page)) |
345 | if (IS_ERR(ipage)) | 346 | return page; |
346 | return PTR_ERR(ipage); | 347 | |
347 | set_cold_node(inode, ipage); | 348 | wait_on_page_writeback(page); |
348 | init_dent_inode(name, ipage); | 349 | set_cold_node(inode, page); |
349 | f2fs_put_page(ipage, 1); | ||
350 | } | 350 | } |
351 | |||
352 | init_dent_inode(name, page); | ||
353 | |||
354 | /* | ||
355 | * This file should be checkpointed during fsync. | ||
356 | * We lost i_pino from now on. | ||
357 | */ | ||
351 | if (is_inode_flag_set(F2FS_I(inode), FI_INC_LINK)) { | 358 | if (is_inode_flag_set(F2FS_I(inode), FI_INC_LINK)) { |
359 | file_lost_pino(inode); | ||
352 | inc_nlink(inode); | 360 | inc_nlink(inode); |
353 | update_inode_page(inode); | ||
354 | } | 361 | } |
355 | return 0; | 362 | return page; |
363 | |||
364 | error: | ||
365 | f2fs_put_page(page, 1); | ||
366 | remove_inode_page(inode); | ||
367 | return ERR_PTR(err); | ||
356 | } | 368 | } |
357 | 369 | ||
358 | static void update_parent_metadata(struct inode *dir, struct inode *inode, | 370 | static void update_parent_metadata(struct inode *dir, struct inode *inode, |
359 | unsigned int current_depth) | 371 | unsigned int current_depth) |
360 | { | 372 | { |
361 | bool need_dir_update = false; | ||
362 | |||
363 | if (is_inode_flag_set(F2FS_I(inode), FI_NEW_INODE)) { | 373 | if (is_inode_flag_set(F2FS_I(inode), FI_NEW_INODE)) { |
364 | if (S_ISDIR(inode->i_mode)) { | 374 | if (S_ISDIR(inode->i_mode)) { |
365 | inc_nlink(dir); | 375 | inc_nlink(dir); |
366 | need_dir_update = true; | 376 | set_inode_flag(F2FS_I(dir), FI_UPDATE_DIR); |
367 | } | 377 | } |
368 | clear_inode_flag(F2FS_I(inode), FI_NEW_INODE); | 378 | clear_inode_flag(F2FS_I(inode), FI_NEW_INODE); |
369 | } | 379 | } |
370 | dir->i_mtime = dir->i_ctime = CURRENT_TIME; | 380 | dir->i_mtime = dir->i_ctime = CURRENT_TIME; |
371 | if (F2FS_I(dir)->i_current_depth != current_depth) { | 381 | if (F2FS_I(dir)->i_current_depth != current_depth) { |
372 | F2FS_I(dir)->i_current_depth = current_depth; | 382 | F2FS_I(dir)->i_current_depth = current_depth; |
373 | need_dir_update = true; | 383 | set_inode_flag(F2FS_I(dir), FI_UPDATE_DIR); |
374 | } | 384 | } |
375 | 385 | ||
376 | if (need_dir_update) | 386 | if (is_inode_flag_set(F2FS_I(dir), FI_UPDATE_DIR)) |
377 | update_inode_page(dir); | 387 | update_inode_page(dir); |
378 | else | 388 | else |
379 | mark_inode_dirty(dir); | 389 | mark_inode_dirty(dir); |
@@ -423,6 +433,7 @@ int __f2fs_add_link(struct inode *dir, const struct qstr *name, struct inode *in | |||
423 | struct page *dentry_page = NULL; | 433 | struct page *dentry_page = NULL; |
424 | struct f2fs_dentry_block *dentry_blk = NULL; | 434 | struct f2fs_dentry_block *dentry_blk = NULL; |
425 | int slots = GET_DENTRY_SLOTS(namelen); | 435 | int slots = GET_DENTRY_SLOTS(namelen); |
436 | struct page *page; | ||
426 | int err = 0; | 437 | int err = 0; |
427 | int i; | 438 | int i; |
428 | 439 | ||
@@ -448,7 +459,7 @@ start: | |||
448 | bidx = dir_block_index(level, (le32_to_cpu(dentry_hash) % nbucket)); | 459 | bidx = dir_block_index(level, (le32_to_cpu(dentry_hash) % nbucket)); |
449 | 460 | ||
450 | for (block = bidx; block <= (bidx + nblock - 1); block++) { | 461 | for (block = bidx; block <= (bidx + nblock - 1); block++) { |
451 | dentry_page = get_new_data_page(dir, block, true); | 462 | dentry_page = get_new_data_page(dir, NULL, block, true); |
452 | if (IS_ERR(dentry_page)) | 463 | if (IS_ERR(dentry_page)) |
453 | return PTR_ERR(dentry_page); | 464 | return PTR_ERR(dentry_page); |
454 | 465 | ||
@@ -465,12 +476,13 @@ start: | |||
465 | ++level; | 476 | ++level; |
466 | goto start; | 477 | goto start; |
467 | add_dentry: | 478 | add_dentry: |
468 | err = init_inode_metadata(inode, dir, name); | ||
469 | if (err) | ||
470 | goto fail; | ||
471 | |||
472 | wait_on_page_writeback(dentry_page); | 479 | wait_on_page_writeback(dentry_page); |
473 | 480 | ||
481 | page = init_inode_metadata(inode, dir, name); | ||
482 | if (IS_ERR(page)) { | ||
483 | err = PTR_ERR(page); | ||
484 | goto fail; | ||
485 | } | ||
474 | de = &dentry_blk->dentry[bit_pos]; | 486 | de = &dentry_blk->dentry[bit_pos]; |
475 | de->hash_code = dentry_hash; | 487 | de->hash_code = dentry_hash; |
476 | de->name_len = cpu_to_le16(namelen); | 488 | de->name_len = cpu_to_le16(namelen); |
@@ -481,11 +493,14 @@ add_dentry: | |||
481 | test_and_set_bit_le(bit_pos + i, &dentry_blk->dentry_bitmap); | 493 | test_and_set_bit_le(bit_pos + i, &dentry_blk->dentry_bitmap); |
482 | set_page_dirty(dentry_page); | 494 | set_page_dirty(dentry_page); |
483 | 495 | ||
484 | update_parent_metadata(dir, inode, current_depth); | 496 | /* we don't need to mark_inode_dirty now */ |
485 | |||
486 | /* update parent inode number before releasing dentry page */ | ||
487 | F2FS_I(inode)->i_pino = dir->i_ino; | 497 | F2FS_I(inode)->i_pino = dir->i_ino; |
498 | update_inode(inode, page); | ||
499 | f2fs_put_page(page, 1); | ||
500 | |||
501 | update_parent_metadata(dir, inode, current_depth); | ||
488 | fail: | 502 | fail: |
503 | clear_inode_flag(F2FS_I(dir), FI_UPDATE_DIR); | ||
489 | kunmap(dentry_page); | 504 | kunmap(dentry_page); |
490 | f2fs_put_page(dentry_page, 1); | 505 | f2fs_put_page(dentry_page, 1); |
491 | return err; | 506 | return err; |
@@ -591,34 +606,26 @@ bool f2fs_empty_dir(struct inode *dir) | |||
591 | return true; | 606 | return true; |
592 | } | 607 | } |
593 | 608 | ||
594 | static int f2fs_readdir(struct file *file, void *dirent, filldir_t filldir) | 609 | static int f2fs_readdir(struct file *file, struct dir_context *ctx) |
595 | { | 610 | { |
596 | unsigned long pos = file->f_pos; | ||
597 | struct inode *inode = file_inode(file); | 611 | struct inode *inode = file_inode(file); |
598 | unsigned long npages = dir_blocks(inode); | 612 | unsigned long npages = dir_blocks(inode); |
599 | unsigned char *types = NULL; | 613 | unsigned int bit_pos = 0; |
600 | unsigned int bit_pos = 0, start_bit_pos = 0; | ||
601 | int over = 0; | ||
602 | struct f2fs_dentry_block *dentry_blk = NULL; | 614 | struct f2fs_dentry_block *dentry_blk = NULL; |
603 | struct f2fs_dir_entry *de = NULL; | 615 | struct f2fs_dir_entry *de = NULL; |
604 | struct page *dentry_page = NULL; | 616 | struct page *dentry_page = NULL; |
605 | unsigned int n = 0; | 617 | unsigned int n = ((unsigned long)ctx->pos / NR_DENTRY_IN_BLOCK); |
606 | unsigned char d_type = DT_UNKNOWN; | 618 | unsigned char d_type = DT_UNKNOWN; |
607 | int slots; | ||
608 | 619 | ||
609 | types = f2fs_filetype_table; | 620 | bit_pos = ((unsigned long)ctx->pos % NR_DENTRY_IN_BLOCK); |
610 | bit_pos = (pos % NR_DENTRY_IN_BLOCK); | ||
611 | n = (pos / NR_DENTRY_IN_BLOCK); | ||
612 | 621 | ||
613 | for ( ; n < npages; n++) { | 622 | for ( ; n < npages; n++) { |
614 | dentry_page = get_lock_data_page(inode, n); | 623 | dentry_page = get_lock_data_page(inode, n); |
615 | if (IS_ERR(dentry_page)) | 624 | if (IS_ERR(dentry_page)) |
616 | continue; | 625 | continue; |
617 | 626 | ||
618 | start_bit_pos = bit_pos; | ||
619 | dentry_blk = kmap(dentry_page); | 627 | dentry_blk = kmap(dentry_page); |
620 | while (bit_pos < NR_DENTRY_IN_BLOCK) { | 628 | while (bit_pos < NR_DENTRY_IN_BLOCK) { |
621 | d_type = DT_UNKNOWN; | ||
622 | bit_pos = find_next_bit_le(&dentry_blk->dentry_bitmap, | 629 | bit_pos = find_next_bit_le(&dentry_blk->dentry_bitmap, |
623 | NR_DENTRY_IN_BLOCK, | 630 | NR_DENTRY_IN_BLOCK, |
624 | bit_pos); | 631 | bit_pos); |
@@ -626,28 +633,26 @@ static int f2fs_readdir(struct file *file, void *dirent, filldir_t filldir) | |||
626 | break; | 633 | break; |
627 | 634 | ||
628 | de = &dentry_blk->dentry[bit_pos]; | 635 | de = &dentry_blk->dentry[bit_pos]; |
629 | if (types && de->file_type < F2FS_FT_MAX) | 636 | if (de->file_type < F2FS_FT_MAX) |
630 | d_type = types[de->file_type]; | 637 | d_type = f2fs_filetype_table[de->file_type]; |
631 | 638 | else | |
632 | over = filldir(dirent, | 639 | d_type = DT_UNKNOWN; |
640 | if (!dir_emit(ctx, | ||
633 | dentry_blk->filename[bit_pos], | 641 | dentry_blk->filename[bit_pos], |
634 | le16_to_cpu(de->name_len), | 642 | le16_to_cpu(de->name_len), |
635 | (n * NR_DENTRY_IN_BLOCK) + bit_pos, | 643 | le32_to_cpu(de->ino), d_type)) |
636 | le32_to_cpu(de->ino), d_type); | 644 | goto stop; |
637 | if (over) { | 645 | |
638 | file->f_pos += bit_pos - start_bit_pos; | 646 | bit_pos += GET_DENTRY_SLOTS(le16_to_cpu(de->name_len)); |
639 | goto success; | 647 | ctx->pos = n * NR_DENTRY_IN_BLOCK + bit_pos; |
640 | } | ||
641 | slots = GET_DENTRY_SLOTS(le16_to_cpu(de->name_len)); | ||
642 | bit_pos += slots; | ||
643 | } | 648 | } |
644 | bit_pos = 0; | 649 | bit_pos = 0; |
645 | file->f_pos = (n + 1) * NR_DENTRY_IN_BLOCK; | 650 | ctx->pos = (n + 1) * NR_DENTRY_IN_BLOCK; |
646 | kunmap(dentry_page); | 651 | kunmap(dentry_page); |
647 | f2fs_put_page(dentry_page, 1); | 652 | f2fs_put_page(dentry_page, 1); |
648 | dentry_page = NULL; | 653 | dentry_page = NULL; |
649 | } | 654 | } |
650 | success: | 655 | stop: |
651 | if (dentry_page && !IS_ERR(dentry_page)) { | 656 | if (dentry_page && !IS_ERR(dentry_page)) { |
652 | kunmap(dentry_page); | 657 | kunmap(dentry_page); |
653 | f2fs_put_page(dentry_page, 1); | 658 | f2fs_put_page(dentry_page, 1); |
@@ -659,7 +664,7 @@ success: | |||
659 | const struct file_operations f2fs_dir_operations = { | 664 | const struct file_operations f2fs_dir_operations = { |
660 | .llseek = generic_file_llseek, | 665 | .llseek = generic_file_llseek, |
661 | .read = generic_read_dir, | 666 | .read = generic_read_dir, |
662 | .readdir = f2fs_readdir, | 667 | .iterate = f2fs_readdir, |
663 | .fsync = f2fs_sync_file, | 668 | .fsync = f2fs_sync_file, |
664 | .unlocked_ioctl = f2fs_ioctl, | 669 | .unlocked_ioctl = f2fs_ioctl, |
665 | }; | 670 | }; |
diff --git a/fs/f2fs/f2fs.h b/fs/f2fs/f2fs.h index 20aab02f2a42..467d42d65c48 100644 --- a/fs/f2fs/f2fs.h +++ b/fs/f2fs/f2fs.h | |||
@@ -37,21 +37,35 @@ | |||
37 | typecheck(unsigned long long, b) && \ | 37 | typecheck(unsigned long long, b) && \ |
38 | ((long long)((a) - (b)) > 0)) | 38 | ((long long)((a) - (b)) > 0)) |
39 | 39 | ||
40 | typedef u64 block_t; | 40 | typedef u32 block_t; /* |
41 | * should not change u32, since it is the on-disk block | ||
42 | * address format, __le32. | ||
43 | */ | ||
41 | typedef u32 nid_t; | 44 | typedef u32 nid_t; |
42 | 45 | ||
43 | struct f2fs_mount_info { | 46 | struct f2fs_mount_info { |
44 | unsigned int opt; | 47 | unsigned int opt; |
45 | }; | 48 | }; |
46 | 49 | ||
47 | static inline __u32 f2fs_crc32(void *buff, size_t len) | 50 | #define CRCPOLY_LE 0xedb88320 |
51 | |||
52 | static inline __u32 f2fs_crc32(void *buf, size_t len) | ||
48 | { | 53 | { |
49 | return crc32_le(F2FS_SUPER_MAGIC, buff, len); | 54 | unsigned char *p = (unsigned char *)buf; |
55 | __u32 crc = F2FS_SUPER_MAGIC; | ||
56 | int i; | ||
57 | |||
58 | while (len--) { | ||
59 | crc ^= *p++; | ||
60 | for (i = 0; i < 8; i++) | ||
61 | crc = (crc >> 1) ^ ((crc & 1) ? CRCPOLY_LE : 0); | ||
62 | } | ||
63 | return crc; | ||
50 | } | 64 | } |
51 | 65 | ||
52 | static inline bool f2fs_crc_valid(__u32 blk_crc, void *buff, size_t buff_size) | 66 | static inline bool f2fs_crc_valid(__u32 blk_crc, void *buf, size_t buf_size) |
53 | { | 67 | { |
54 | return f2fs_crc32(buff, buff_size) == blk_crc; | 68 | return f2fs_crc32(buf, buf_size) == blk_crc; |
55 | } | 69 | } |
56 | 70 | ||
57 | /* | 71 | /* |
@@ -148,7 +162,7 @@ struct extent_info { | |||
148 | * i_advise uses FADVISE_XXX_BIT. We can add additional hints later. | 162 | * i_advise uses FADVISE_XXX_BIT. We can add additional hints later. |
149 | */ | 163 | */ |
150 | #define FADVISE_COLD_BIT 0x01 | 164 | #define FADVISE_COLD_BIT 0x01 |
151 | #define FADVISE_CP_BIT 0x02 | 165 | #define FADVISE_LOST_PINO_BIT 0x02 |
152 | 166 | ||
153 | struct f2fs_inode_info { | 167 | struct f2fs_inode_info { |
154 | struct inode vfs_inode; /* serve a vfs inode */ | 168 | struct inode vfs_inode; /* serve a vfs inode */ |
@@ -369,7 +383,6 @@ struct f2fs_sb_info { | |||
369 | /* for directory inode management */ | 383 | /* for directory inode management */ |
370 | struct list_head dir_inode_list; /* dir inode list */ | 384 | struct list_head dir_inode_list; /* dir inode list */ |
371 | spinlock_t dir_inode_lock; /* for dir inode list lock */ | 385 | spinlock_t dir_inode_lock; /* for dir inode list lock */ |
372 | unsigned int n_dirty_dirs; /* # of dir inodes */ | ||
373 | 386 | ||
374 | /* basic file system units */ | 387 | /* basic file system units */ |
375 | unsigned int log_sectors_per_block; /* log2 sectors per block */ | 388 | unsigned int log_sectors_per_block; /* log2 sectors per block */ |
@@ -406,12 +419,15 @@ struct f2fs_sb_info { | |||
406 | * for stat information. | 419 | * for stat information. |
407 | * one is for the LFS mode, and the other is for the SSR mode. | 420 | * one is for the LFS mode, and the other is for the SSR mode. |
408 | */ | 421 | */ |
422 | #ifdef CONFIG_F2FS_STAT_FS | ||
409 | struct f2fs_stat_info *stat_info; /* FS status information */ | 423 | struct f2fs_stat_info *stat_info; /* FS status information */ |
410 | unsigned int segment_count[2]; /* # of allocated segments */ | 424 | unsigned int segment_count[2]; /* # of allocated segments */ |
411 | unsigned int block_count[2]; /* # of allocated blocks */ | 425 | unsigned int block_count[2]; /* # of allocated blocks */ |
412 | unsigned int last_victim[2]; /* last victim segment # */ | ||
413 | int total_hit_ext, read_hit_ext; /* extent cache hit ratio */ | 426 | int total_hit_ext, read_hit_ext; /* extent cache hit ratio */ |
414 | int bg_gc; /* background gc calls */ | 427 | int bg_gc; /* background gc calls */ |
428 | unsigned int n_dirty_dirs; /* # of dir inodes */ | ||
429 | #endif | ||
430 | unsigned int last_victim[2]; /* last victim segment # */ | ||
415 | spinlock_t stat_lock; /* lock for stat operations */ | 431 | spinlock_t stat_lock; /* lock for stat operations */ |
416 | }; | 432 | }; |
417 | 433 | ||
@@ -495,9 +511,17 @@ static inline void clear_ckpt_flags(struct f2fs_checkpoint *cp, unsigned int f) | |||
495 | 511 | ||
496 | static inline void mutex_lock_all(struct f2fs_sb_info *sbi) | 512 | static inline void mutex_lock_all(struct f2fs_sb_info *sbi) |
497 | { | 513 | { |
498 | int i = 0; | 514 | int i; |
499 | for (; i < NR_GLOBAL_LOCKS; i++) | 515 | |
500 | mutex_lock(&sbi->fs_lock[i]); | 516 | for (i = 0; i < NR_GLOBAL_LOCKS; i++) { |
517 | /* | ||
518 | * This is the only time we take multiple fs_lock[] | ||
519 | * instances; the order is immaterial since we | ||
520 | * always hold cp_mutex, which serializes multiple | ||
521 | * such operations. | ||
522 | */ | ||
523 | mutex_lock_nest_lock(&sbi->fs_lock[i], &sbi->cp_mutex); | ||
524 | } | ||
501 | } | 525 | } |
502 | 526 | ||
503 | static inline void mutex_unlock_all(struct f2fs_sb_info *sbi) | 527 | static inline void mutex_unlock_all(struct f2fs_sb_info *sbi) |
@@ -843,9 +867,12 @@ static inline int f2fs_clear_bit(unsigned int nr, char *addr) | |||
843 | /* used for f2fs_inode_info->flags */ | 867 | /* used for f2fs_inode_info->flags */ |
844 | enum { | 868 | enum { |
845 | FI_NEW_INODE, /* indicate newly allocated inode */ | 869 | FI_NEW_INODE, /* indicate newly allocated inode */ |
870 | FI_DIRTY_INODE, /* indicate inode is dirty or not */ | ||
846 | FI_INC_LINK, /* need to increment i_nlink */ | 871 | FI_INC_LINK, /* need to increment i_nlink */ |
847 | FI_ACL_MODE, /* indicate acl mode */ | 872 | FI_ACL_MODE, /* indicate acl mode */ |
848 | FI_NO_ALLOC, /* should not allocate any blocks */ | 873 | FI_NO_ALLOC, /* should not allocate any blocks */ |
874 | FI_UPDATE_DIR, /* should update inode block for consistency */ | ||
875 | FI_DELAY_IPUT, /* used for the recovery */ | ||
849 | }; | 876 | }; |
850 | 877 | ||
851 | static inline void set_inode_flag(struct f2fs_inode_info *fi, int flag) | 878 | static inline void set_inode_flag(struct f2fs_inode_info *fi, int flag) |
@@ -878,14 +905,21 @@ static inline int cond_clear_inode_flag(struct f2fs_inode_info *fi, int flag) | |||
878 | return 0; | 905 | return 0; |
879 | } | 906 | } |
880 | 907 | ||
908 | static inline int f2fs_readonly(struct super_block *sb) | ||
909 | { | ||
910 | return sb->s_flags & MS_RDONLY; | ||
911 | } | ||
912 | |||
881 | /* | 913 | /* |
882 | * file.c | 914 | * file.c |
883 | */ | 915 | */ |
884 | int f2fs_sync_file(struct file *, loff_t, loff_t, int); | 916 | int f2fs_sync_file(struct file *, loff_t, loff_t, int); |
885 | void truncate_data_blocks(struct dnode_of_data *); | 917 | void truncate_data_blocks(struct dnode_of_data *); |
886 | void f2fs_truncate(struct inode *); | 918 | void f2fs_truncate(struct inode *); |
919 | int f2fs_getattr(struct vfsmount *, struct dentry *, struct kstat *); | ||
887 | int f2fs_setattr(struct dentry *, struct iattr *); | 920 | int f2fs_setattr(struct dentry *, struct iattr *); |
888 | int truncate_hole(struct inode *, pgoff_t, pgoff_t); | 921 | int truncate_hole(struct inode *, pgoff_t, pgoff_t); |
922 | int truncate_data_blocks_range(struct dnode_of_data *, int); | ||
889 | long f2fs_ioctl(struct file *, unsigned int, unsigned long); | 923 | long f2fs_ioctl(struct file *, unsigned int, unsigned long); |
890 | long f2fs_compat_ioctl(struct file *, unsigned int, unsigned long); | 924 | long f2fs_compat_ioctl(struct file *, unsigned int, unsigned long); |
891 | 925 | ||
@@ -913,7 +947,6 @@ struct f2fs_dir_entry *f2fs_parent_dir(struct inode *, struct page **); | |||
913 | ino_t f2fs_inode_by_name(struct inode *, struct qstr *); | 947 | ino_t f2fs_inode_by_name(struct inode *, struct qstr *); |
914 | void f2fs_set_link(struct inode *, struct f2fs_dir_entry *, | 948 | void f2fs_set_link(struct inode *, struct f2fs_dir_entry *, |
915 | struct page *, struct inode *); | 949 | struct page *, struct inode *); |
916 | void init_dent_inode(const struct qstr *, struct page *); | ||
917 | int __f2fs_add_link(struct inode *, const struct qstr *, struct inode *); | 950 | int __f2fs_add_link(struct inode *, const struct qstr *, struct inode *); |
918 | void f2fs_delete_entry(struct f2fs_dir_entry *, struct page *, struct inode *); | 951 | void f2fs_delete_entry(struct f2fs_dir_entry *, struct page *, struct inode *); |
919 | int f2fs_make_empty(struct inode *, struct inode *); | 952 | int f2fs_make_empty(struct inode *, struct inode *); |
@@ -948,8 +981,8 @@ void get_node_info(struct f2fs_sb_info *, nid_t, struct node_info *); | |||
948 | int get_dnode_of_data(struct dnode_of_data *, pgoff_t, int); | 981 | int get_dnode_of_data(struct dnode_of_data *, pgoff_t, int); |
949 | int truncate_inode_blocks(struct inode *, pgoff_t); | 982 | int truncate_inode_blocks(struct inode *, pgoff_t); |
950 | int remove_inode_page(struct inode *); | 983 | int remove_inode_page(struct inode *); |
951 | int new_inode_page(struct inode *, const struct qstr *); | 984 | struct page *new_inode_page(struct inode *, const struct qstr *); |
952 | struct page *new_node_page(struct dnode_of_data *, unsigned int); | 985 | struct page *new_node_page(struct dnode_of_data *, unsigned int, struct page *); |
953 | void ra_node_page(struct f2fs_sb_info *, nid_t); | 986 | void ra_node_page(struct f2fs_sb_info *, nid_t); |
954 | struct page *get_node_page(struct f2fs_sb_info *, pgoff_t); | 987 | struct page *get_node_page(struct f2fs_sb_info *, pgoff_t); |
955 | struct page *get_node_page_ra(struct page *, int); | 988 | struct page *get_node_page_ra(struct page *, int); |
@@ -974,7 +1007,6 @@ void destroy_node_manager_caches(void); | |||
974 | */ | 1007 | */ |
975 | void f2fs_balance_fs(struct f2fs_sb_info *); | 1008 | void f2fs_balance_fs(struct f2fs_sb_info *); |
976 | void invalidate_blocks(struct f2fs_sb_info *, block_t); | 1009 | void invalidate_blocks(struct f2fs_sb_info *, block_t); |
977 | void locate_dirty_segment(struct f2fs_sb_info *, unsigned int); | ||
978 | void clear_prefree_segments(struct f2fs_sb_info *); | 1010 | void clear_prefree_segments(struct f2fs_sb_info *); |
979 | int npages_for_summary_flush(struct f2fs_sb_info *); | 1011 | int npages_for_summary_flush(struct f2fs_sb_info *); |
980 | void allocate_new_segments(struct f2fs_sb_info *); | 1012 | void allocate_new_segments(struct f2fs_sb_info *); |
@@ -1011,7 +1043,9 @@ void remove_orphan_inode(struct f2fs_sb_info *, nid_t); | |||
1011 | int recover_orphan_inodes(struct f2fs_sb_info *); | 1043 | int recover_orphan_inodes(struct f2fs_sb_info *); |
1012 | int get_valid_checkpoint(struct f2fs_sb_info *); | 1044 | int get_valid_checkpoint(struct f2fs_sb_info *); |
1013 | void set_dirty_dir_page(struct inode *, struct page *); | 1045 | void set_dirty_dir_page(struct inode *, struct page *); |
1046 | void add_dirty_dir_inode(struct inode *); | ||
1014 | void remove_dirty_dir_inode(struct inode *); | 1047 | void remove_dirty_dir_inode(struct inode *); |
1048 | struct inode *check_dirty_dir_inode(struct f2fs_sb_info *, nid_t); | ||
1015 | void sync_dirty_dir_inodes(struct f2fs_sb_info *); | 1049 | void sync_dirty_dir_inodes(struct f2fs_sb_info *); |
1016 | void write_checkpoint(struct f2fs_sb_info *, bool); | 1050 | void write_checkpoint(struct f2fs_sb_info *, bool); |
1017 | void init_orphan_info(struct f2fs_sb_info *); | 1051 | void init_orphan_info(struct f2fs_sb_info *); |
@@ -1025,7 +1059,7 @@ int reserve_new_block(struct dnode_of_data *); | |||
1025 | void update_extent_cache(block_t, struct dnode_of_data *); | 1059 | void update_extent_cache(block_t, struct dnode_of_data *); |
1026 | struct page *find_data_page(struct inode *, pgoff_t, bool); | 1060 | struct page *find_data_page(struct inode *, pgoff_t, bool); |
1027 | struct page *get_lock_data_page(struct inode *, pgoff_t); | 1061 | struct page *get_lock_data_page(struct inode *, pgoff_t); |
1028 | struct page *get_new_data_page(struct inode *, pgoff_t, bool); | 1062 | struct page *get_new_data_page(struct inode *, struct page *, pgoff_t, bool); |
1029 | int f2fs_readpage(struct f2fs_sb_info *, struct page *, block_t, int); | 1063 | int f2fs_readpage(struct f2fs_sb_info *, struct page *, block_t, int); |
1030 | int do_write_data_page(struct page *); | 1064 | int do_write_data_page(struct page *); |
1031 | 1065 | ||
diff --git a/fs/f2fs/file.c b/fs/f2fs/file.c index 1cae864f8dfc..d2d2b7dbdcc1 100644 --- a/fs/f2fs/file.c +++ b/fs/f2fs/file.c | |||
@@ -63,9 +63,10 @@ static int f2fs_vm_page_mkwrite(struct vm_area_struct *vma, | |||
63 | f2fs_put_dnode(&dn); | 63 | f2fs_put_dnode(&dn); |
64 | mutex_unlock_op(sbi, ilock); | 64 | mutex_unlock_op(sbi, ilock); |
65 | 65 | ||
66 | file_update_time(vma->vm_file); | ||
66 | lock_page(page); | 67 | lock_page(page); |
67 | if (page->mapping != inode->i_mapping || | 68 | if (page->mapping != inode->i_mapping || |
68 | page_offset(page) >= i_size_read(inode) || | 69 | page_offset(page) > i_size_read(inode) || |
69 | !PageUptodate(page)) { | 70 | !PageUptodate(page)) { |
70 | unlock_page(page); | 71 | unlock_page(page); |
71 | err = -EFAULT; | 72 | err = -EFAULT; |
@@ -76,10 +77,7 @@ static int f2fs_vm_page_mkwrite(struct vm_area_struct *vma, | |||
76 | * check to see if the page is mapped already (no holes) | 77 | * check to see if the page is mapped already (no holes) |
77 | */ | 78 | */ |
78 | if (PageMappedToDisk(page)) | 79 | if (PageMappedToDisk(page)) |
79 | goto out; | 80 | goto mapped; |
80 | |||
81 | /* fill the page */ | ||
82 | wait_on_page_writeback(page); | ||
83 | 81 | ||
84 | /* page is wholly or partially inside EOF */ | 82 | /* page is wholly or partially inside EOF */ |
85 | if (((page->index + 1) << PAGE_CACHE_SHIFT) > i_size_read(inode)) { | 83 | if (((page->index + 1) << PAGE_CACHE_SHIFT) > i_size_read(inode)) { |
@@ -90,7 +88,9 @@ static int f2fs_vm_page_mkwrite(struct vm_area_struct *vma, | |||
90 | set_page_dirty(page); | 88 | set_page_dirty(page); |
91 | SetPageUptodate(page); | 89 | SetPageUptodate(page); |
92 | 90 | ||
93 | file_update_time(vma->vm_file); | 91 | mapped: |
92 | /* fill the page */ | ||
93 | wait_on_page_writeback(page); | ||
94 | out: | 94 | out: |
95 | sb_end_pagefault(inode->i_sb); | 95 | sb_end_pagefault(inode->i_sb); |
96 | return block_page_mkwrite_return(err); | 96 | return block_page_mkwrite_return(err); |
@@ -102,6 +102,24 @@ static const struct vm_operations_struct f2fs_file_vm_ops = { | |||
102 | .remap_pages = generic_file_remap_pages, | 102 | .remap_pages = generic_file_remap_pages, |
103 | }; | 103 | }; |
104 | 104 | ||
105 | static int get_parent_ino(struct inode *inode, nid_t *pino) | ||
106 | { | ||
107 | struct dentry *dentry; | ||
108 | |||
109 | inode = igrab(inode); | ||
110 | dentry = d_find_any_alias(inode); | ||
111 | iput(inode); | ||
112 | if (!dentry) | ||
113 | return 0; | ||
114 | |||
115 | inode = igrab(dentry->d_parent->d_inode); | ||
116 | dput(dentry); | ||
117 | |||
118 | *pino = inode->i_ino; | ||
119 | iput(inode); | ||
120 | return 1; | ||
121 | } | ||
122 | |||
105 | int f2fs_sync_file(struct file *file, loff_t start, loff_t end, int datasync) | 123 | int f2fs_sync_file(struct file *file, loff_t start, loff_t end, int datasync) |
106 | { | 124 | { |
107 | struct inode *inode = file->f_mapping->host; | 125 | struct inode *inode = file->f_mapping->host; |
@@ -114,7 +132,7 @@ int f2fs_sync_file(struct file *file, loff_t start, loff_t end, int datasync) | |||
114 | .for_reclaim = 0, | 132 | .for_reclaim = 0, |
115 | }; | 133 | }; |
116 | 134 | ||
117 | if (inode->i_sb->s_flags & MS_RDONLY) | 135 | if (f2fs_readonly(inode->i_sb)) |
118 | return 0; | 136 | return 0; |
119 | 137 | ||
120 | trace_f2fs_sync_file_enter(inode); | 138 | trace_f2fs_sync_file_enter(inode); |
@@ -134,7 +152,7 @@ int f2fs_sync_file(struct file *file, loff_t start, loff_t end, int datasync) | |||
134 | 152 | ||
135 | if (!S_ISREG(inode->i_mode) || inode->i_nlink != 1) | 153 | if (!S_ISREG(inode->i_mode) || inode->i_nlink != 1) |
136 | need_cp = true; | 154 | need_cp = true; |
137 | else if (is_cp_file(inode)) | 155 | else if (file_wrong_pino(inode)) |
138 | need_cp = true; | 156 | need_cp = true; |
139 | else if (!space_for_roll_forward(sbi)) | 157 | else if (!space_for_roll_forward(sbi)) |
140 | need_cp = true; | 158 | need_cp = true; |
@@ -142,11 +160,23 @@ int f2fs_sync_file(struct file *file, loff_t start, loff_t end, int datasync) | |||
142 | need_cp = true; | 160 | need_cp = true; |
143 | 161 | ||
144 | if (need_cp) { | 162 | if (need_cp) { |
163 | nid_t pino; | ||
164 | |||
145 | /* all the dirty node pages should be flushed for POR */ | 165 | /* all the dirty node pages should be flushed for POR */ |
146 | ret = f2fs_sync_fs(inode->i_sb, 1); | 166 | ret = f2fs_sync_fs(inode->i_sb, 1); |
167 | if (file_wrong_pino(inode) && inode->i_nlink == 1 && | ||
168 | get_parent_ino(inode, &pino)) { | ||
169 | F2FS_I(inode)->i_pino = pino; | ||
170 | file_got_pino(inode); | ||
171 | mark_inode_dirty_sync(inode); | ||
172 | ret = f2fs_write_inode(inode, NULL); | ||
173 | if (ret) | ||
174 | goto out; | ||
175 | } | ||
147 | } else { | 176 | } else { |
148 | /* if there is no written node page, write its inode page */ | 177 | /* if there is no written node page, write its inode page */ |
149 | while (!sync_node_pages(sbi, inode->i_ino, &wbc)) { | 178 | while (!sync_node_pages(sbi, inode->i_ino, &wbc)) { |
179 | mark_inode_dirty_sync(inode); | ||
150 | ret = f2fs_write_inode(inode, NULL); | 180 | ret = f2fs_write_inode(inode, NULL); |
151 | if (ret) | 181 | if (ret) |
152 | goto out; | 182 | goto out; |
@@ -168,7 +198,7 @@ static int f2fs_file_mmap(struct file *file, struct vm_area_struct *vma) | |||
168 | return 0; | 198 | return 0; |
169 | } | 199 | } |
170 | 200 | ||
171 | static int truncate_data_blocks_range(struct dnode_of_data *dn, int count) | 201 | int truncate_data_blocks_range(struct dnode_of_data *dn, int count) |
172 | { | 202 | { |
173 | int nr_free = 0, ofs = dn->ofs_in_node; | 203 | int nr_free = 0, ofs = dn->ofs_in_node; |
174 | struct f2fs_sb_info *sbi = F2FS_SB(dn->inode->i_sb); | 204 | struct f2fs_sb_info *sbi = F2FS_SB(dn->inode->i_sb); |
@@ -185,10 +215,10 @@ static int truncate_data_blocks_range(struct dnode_of_data *dn, int count) | |||
185 | 215 | ||
186 | update_extent_cache(NULL_ADDR, dn); | 216 | update_extent_cache(NULL_ADDR, dn); |
187 | invalidate_blocks(sbi, blkaddr); | 217 | invalidate_blocks(sbi, blkaddr); |
188 | dec_valid_block_count(sbi, dn->inode, 1); | ||
189 | nr_free++; | 218 | nr_free++; |
190 | } | 219 | } |
191 | if (nr_free) { | 220 | if (nr_free) { |
221 | dec_valid_block_count(sbi, dn->inode, nr_free); | ||
192 | set_page_dirty(dn->node_page); | 222 | set_page_dirty(dn->node_page); |
193 | sync_inode_page(dn); | 223 | sync_inode_page(dn); |
194 | } | 224 | } |
@@ -291,7 +321,7 @@ void f2fs_truncate(struct inode *inode) | |||
291 | } | 321 | } |
292 | } | 322 | } |
293 | 323 | ||
294 | static int f2fs_getattr(struct vfsmount *mnt, | 324 | int f2fs_getattr(struct vfsmount *mnt, |
295 | struct dentry *dentry, struct kstat *stat) | 325 | struct dentry *dentry, struct kstat *stat) |
296 | { | 326 | { |
297 | struct inode *inode = dentry->d_inode; | 327 | struct inode *inode = dentry->d_inode; |
@@ -387,7 +417,7 @@ static void fill_zero(struct inode *inode, pgoff_t index, | |||
387 | f2fs_balance_fs(sbi); | 417 | f2fs_balance_fs(sbi); |
388 | 418 | ||
389 | ilock = mutex_lock_op(sbi); | 419 | ilock = mutex_lock_op(sbi); |
390 | page = get_new_data_page(inode, index, false); | 420 | page = get_new_data_page(inode, NULL, index, false); |
391 | mutex_unlock_op(sbi, ilock); | 421 | mutex_unlock_op(sbi, ilock); |
392 | 422 | ||
393 | if (!IS_ERR(page)) { | 423 | if (!IS_ERR(page)) { |
@@ -575,10 +605,10 @@ long f2fs_ioctl(struct file *filp, unsigned int cmd, unsigned long arg) | |||
575 | int ret; | 605 | int ret; |
576 | 606 | ||
577 | switch (cmd) { | 607 | switch (cmd) { |
578 | case FS_IOC_GETFLAGS: | 608 | case F2FS_IOC_GETFLAGS: |
579 | flags = fi->i_flags & FS_FL_USER_VISIBLE; | 609 | flags = fi->i_flags & FS_FL_USER_VISIBLE; |
580 | return put_user(flags, (int __user *) arg); | 610 | return put_user(flags, (int __user *) arg); |
581 | case FS_IOC_SETFLAGS: | 611 | case F2FS_IOC_SETFLAGS: |
582 | { | 612 | { |
583 | unsigned int oldflags; | 613 | unsigned int oldflags; |
584 | 614 | ||
diff --git a/fs/f2fs/gc.c b/fs/f2fs/gc.c index 14961593e93c..35f9b1a196aa 100644 --- a/fs/f2fs/gc.c +++ b/fs/f2fs/gc.c | |||
@@ -76,7 +76,9 @@ static int gc_thread_func(void *data) | |||
76 | else | 76 | else |
77 | wait_ms = increase_sleep_time(wait_ms); | 77 | wait_ms = increase_sleep_time(wait_ms); |
78 | 78 | ||
79 | #ifdef CONFIG_F2FS_STAT_FS | ||
79 | sbi->bg_gc++; | 80 | sbi->bg_gc++; |
81 | #endif | ||
80 | 82 | ||
81 | /* if return value is not zero, no victim was selected */ | 83 | /* if return value is not zero, no victim was selected */ |
82 | if (f2fs_gc(sbi)) | 84 | if (f2fs_gc(sbi)) |
@@ -89,23 +91,28 @@ int start_gc_thread(struct f2fs_sb_info *sbi) | |||
89 | { | 91 | { |
90 | struct f2fs_gc_kthread *gc_th; | 92 | struct f2fs_gc_kthread *gc_th; |
91 | dev_t dev = sbi->sb->s_bdev->bd_dev; | 93 | dev_t dev = sbi->sb->s_bdev->bd_dev; |
94 | int err = 0; | ||
92 | 95 | ||
93 | if (!test_opt(sbi, BG_GC)) | 96 | if (!test_opt(sbi, BG_GC)) |
94 | return 0; | 97 | goto out; |
95 | gc_th = kmalloc(sizeof(struct f2fs_gc_kthread), GFP_KERNEL); | 98 | gc_th = kmalloc(sizeof(struct f2fs_gc_kthread), GFP_KERNEL); |
96 | if (!gc_th) | 99 | if (!gc_th) { |
97 | return -ENOMEM; | 100 | err = -ENOMEM; |
101 | goto out; | ||
102 | } | ||
98 | 103 | ||
99 | sbi->gc_thread = gc_th; | 104 | sbi->gc_thread = gc_th; |
100 | init_waitqueue_head(&sbi->gc_thread->gc_wait_queue_head); | 105 | init_waitqueue_head(&sbi->gc_thread->gc_wait_queue_head); |
101 | sbi->gc_thread->f2fs_gc_task = kthread_run(gc_thread_func, sbi, | 106 | sbi->gc_thread->f2fs_gc_task = kthread_run(gc_thread_func, sbi, |
102 | "f2fs_gc-%u:%u", MAJOR(dev), MINOR(dev)); | 107 | "f2fs_gc-%u:%u", MAJOR(dev), MINOR(dev)); |
103 | if (IS_ERR(gc_th->f2fs_gc_task)) { | 108 | if (IS_ERR(gc_th->f2fs_gc_task)) { |
109 | err = PTR_ERR(gc_th->f2fs_gc_task); | ||
104 | kfree(gc_th); | 110 | kfree(gc_th); |
105 | sbi->gc_thread = NULL; | 111 | sbi->gc_thread = NULL; |
106 | return -ENOMEM; | ||
107 | } | 112 | } |
108 | return 0; | 113 | |
114 | out: | ||
115 | return err; | ||
109 | } | 116 | } |
110 | 117 | ||
111 | void stop_gc_thread(struct f2fs_sb_info *sbi) | 118 | void stop_gc_thread(struct f2fs_sb_info *sbi) |
@@ -234,14 +241,14 @@ static int get_victim_by_default(struct f2fs_sb_info *sbi, | |||
234 | { | 241 | { |
235 | struct dirty_seglist_info *dirty_i = DIRTY_I(sbi); | 242 | struct dirty_seglist_info *dirty_i = DIRTY_I(sbi); |
236 | struct victim_sel_policy p; | 243 | struct victim_sel_policy p; |
237 | unsigned int secno; | 244 | unsigned int secno, max_cost; |
238 | int nsearched = 0; | 245 | int nsearched = 0; |
239 | 246 | ||
240 | p.alloc_mode = alloc_mode; | 247 | p.alloc_mode = alloc_mode; |
241 | select_policy(sbi, gc_type, type, &p); | 248 | select_policy(sbi, gc_type, type, &p); |
242 | 249 | ||
243 | p.min_segno = NULL_SEGNO; | 250 | p.min_segno = NULL_SEGNO; |
244 | p.min_cost = get_max_cost(sbi, &p); | 251 | p.min_cost = max_cost = get_max_cost(sbi, &p); |
245 | 252 | ||
246 | mutex_lock(&dirty_i->seglist_lock); | 253 | mutex_lock(&dirty_i->seglist_lock); |
247 | 254 | ||
@@ -280,7 +287,7 @@ static int get_victim_by_default(struct f2fs_sb_info *sbi, | |||
280 | p.min_cost = cost; | 287 | p.min_cost = cost; |
281 | } | 288 | } |
282 | 289 | ||
283 | if (cost == get_max_cost(sbi, &p)) | 290 | if (cost == max_cost) |
284 | continue; | 291 | continue; |
285 | 292 | ||
286 | if (nsearched++ >= MAX_VICTIM_SEARCH) { | 293 | if (nsearched++ >= MAX_VICTIM_SEARCH) { |
@@ -288,8 +295,8 @@ static int get_victim_by_default(struct f2fs_sb_info *sbi, | |||
288 | break; | 295 | break; |
289 | } | 296 | } |
290 | } | 297 | } |
291 | got_it: | ||
292 | if (p.min_segno != NULL_SEGNO) { | 298 | if (p.min_segno != NULL_SEGNO) { |
299 | got_it: | ||
293 | if (p.alloc_mode == LFS) { | 300 | if (p.alloc_mode == LFS) { |
294 | secno = GET_SECNO(sbi, p.min_segno); | 301 | secno = GET_SECNO(sbi, p.min_segno); |
295 | if (gc_type == FG_GC) | 302 | if (gc_type == FG_GC) |
@@ -314,28 +321,21 @@ static const struct victim_selection default_v_ops = { | |||
314 | 321 | ||
315 | static struct inode *find_gc_inode(nid_t ino, struct list_head *ilist) | 322 | static struct inode *find_gc_inode(nid_t ino, struct list_head *ilist) |
316 | { | 323 | { |
317 | struct list_head *this; | ||
318 | struct inode_entry *ie; | 324 | struct inode_entry *ie; |
319 | 325 | ||
320 | list_for_each(this, ilist) { | 326 | list_for_each_entry(ie, ilist, list) |
321 | ie = list_entry(this, struct inode_entry, list); | ||
322 | if (ie->inode->i_ino == ino) | 327 | if (ie->inode->i_ino == ino) |
323 | return ie->inode; | 328 | return ie->inode; |
324 | } | ||
325 | return NULL; | 329 | return NULL; |
326 | } | 330 | } |
327 | 331 | ||
328 | static void add_gc_inode(struct inode *inode, struct list_head *ilist) | 332 | static void add_gc_inode(struct inode *inode, struct list_head *ilist) |
329 | { | 333 | { |
330 | struct list_head *this; | 334 | struct inode_entry *new_ie; |
331 | struct inode_entry *new_ie, *ie; | ||
332 | 335 | ||
333 | list_for_each(this, ilist) { | 336 | if (inode == find_gc_inode(inode->i_ino, ilist)) { |
334 | ie = list_entry(this, struct inode_entry, list); | 337 | iput(inode); |
335 | if (ie->inode == inode) { | 338 | return; |
336 | iput(inode); | ||
337 | return; | ||
338 | } | ||
339 | } | 339 | } |
340 | repeat: | 340 | repeat: |
341 | new_ie = kmem_cache_alloc(winode_slab, GFP_NOFS); | 341 | new_ie = kmem_cache_alloc(winode_slab, GFP_NOFS); |
diff --git a/fs/f2fs/inode.c b/fs/f2fs/inode.c index 91ac7f9d88ee..2b2d45d19e3e 100644 --- a/fs/f2fs/inode.c +++ b/fs/f2fs/inode.c | |||
@@ -109,12 +109,6 @@ struct inode *f2fs_iget(struct super_block *sb, unsigned long ino) | |||
109 | ret = do_read_inode(inode); | 109 | ret = do_read_inode(inode); |
110 | if (ret) | 110 | if (ret) |
111 | goto bad_inode; | 111 | goto bad_inode; |
112 | |||
113 | if (!sbi->por_doing && inode->i_nlink == 0) { | ||
114 | ret = -ENOENT; | ||
115 | goto bad_inode; | ||
116 | } | ||
117 | |||
118 | make_now: | 112 | make_now: |
119 | if (ino == F2FS_NODE_INO(sbi)) { | 113 | if (ino == F2FS_NODE_INO(sbi)) { |
120 | inode->i_mapping->a_ops = &f2fs_node_aops; | 114 | inode->i_mapping->a_ops = &f2fs_node_aops; |
@@ -130,8 +124,7 @@ make_now: | |||
130 | inode->i_op = &f2fs_dir_inode_operations; | 124 | inode->i_op = &f2fs_dir_inode_operations; |
131 | inode->i_fop = &f2fs_dir_operations; | 125 | inode->i_fop = &f2fs_dir_operations; |
132 | inode->i_mapping->a_ops = &f2fs_dblock_aops; | 126 | inode->i_mapping->a_ops = &f2fs_dblock_aops; |
133 | mapping_set_gfp_mask(inode->i_mapping, GFP_HIGHUSER_MOVABLE | | 127 | mapping_set_gfp_mask(inode->i_mapping, GFP_F2FS_ZERO); |
134 | __GFP_ZERO); | ||
135 | } else if (S_ISLNK(inode->i_mode)) { | 128 | } else if (S_ISLNK(inode->i_mode)) { |
136 | inode->i_op = &f2fs_symlink_inode_operations; | 129 | inode->i_op = &f2fs_symlink_inode_operations; |
137 | inode->i_mapping->a_ops = &f2fs_dblock_aops; | 130 | inode->i_mapping->a_ops = &f2fs_dblock_aops; |
@@ -199,6 +192,7 @@ void update_inode(struct inode *inode, struct page *node_page) | |||
199 | 192 | ||
200 | set_cold_node(inode, node_page); | 193 | set_cold_node(inode, node_page); |
201 | set_page_dirty(node_page); | 194 | set_page_dirty(node_page); |
195 | clear_inode_flag(F2FS_I(inode), FI_DIRTY_INODE); | ||
202 | } | 196 | } |
203 | 197 | ||
204 | int update_inode_page(struct inode *inode) | 198 | int update_inode_page(struct inode *inode) |
@@ -224,6 +218,9 @@ int f2fs_write_inode(struct inode *inode, struct writeback_control *wbc) | |||
224 | inode->i_ino == F2FS_META_INO(sbi)) | 218 | inode->i_ino == F2FS_META_INO(sbi)) |
225 | return 0; | 219 | return 0; |
226 | 220 | ||
221 | if (!is_inode_flag_set(F2FS_I(inode), FI_DIRTY_INODE)) | ||
222 | return 0; | ||
223 | |||
227 | if (wbc) | 224 | if (wbc) |
228 | f2fs_balance_fs(sbi); | 225 | f2fs_balance_fs(sbi); |
229 | 226 | ||
diff --git a/fs/f2fs/namei.c b/fs/f2fs/namei.c index 47abc9722b17..64c07169df05 100644 --- a/fs/f2fs/namei.c +++ b/fs/f2fs/namei.c | |||
@@ -112,7 +112,7 @@ static inline void set_cold_files(struct f2fs_sb_info *sbi, struct inode *inode, | |||
112 | int count = le32_to_cpu(sbi->raw_super->extension_count); | 112 | int count = le32_to_cpu(sbi->raw_super->extension_count); |
113 | for (i = 0; i < count; i++) { | 113 | for (i = 0; i < count; i++) { |
114 | if (is_multimedia_file(name, extlist[i])) { | 114 | if (is_multimedia_file(name, extlist[i])) { |
115 | set_cold_file(inode); | 115 | file_set_cold(inode); |
116 | break; | 116 | break; |
117 | } | 117 | } |
118 | } | 118 | } |
@@ -149,8 +149,7 @@ static int f2fs_create(struct inode *dir, struct dentry *dentry, umode_t mode, | |||
149 | 149 | ||
150 | alloc_nid_done(sbi, ino); | 150 | alloc_nid_done(sbi, ino); |
151 | 151 | ||
152 | if (!sbi->por_doing) | 152 | d_instantiate(dentry, inode); |
153 | d_instantiate(dentry, inode); | ||
154 | unlock_new_inode(inode); | 153 | unlock_new_inode(inode); |
155 | return 0; | 154 | return 0; |
156 | out: | 155 | out: |
@@ -173,7 +172,7 @@ static int f2fs_link(struct dentry *old_dentry, struct inode *dir, | |||
173 | f2fs_balance_fs(sbi); | 172 | f2fs_balance_fs(sbi); |
174 | 173 | ||
175 | inode->i_ctime = CURRENT_TIME; | 174 | inode->i_ctime = CURRENT_TIME; |
176 | atomic_inc(&inode->i_count); | 175 | ihold(inode); |
177 | 176 | ||
178 | set_inode_flag(F2FS_I(inode), FI_INC_LINK); | 177 | set_inode_flag(F2FS_I(inode), FI_INC_LINK); |
179 | ilock = mutex_lock_op(sbi); | 178 | ilock = mutex_lock_op(sbi); |
@@ -182,17 +181,10 @@ static int f2fs_link(struct dentry *old_dentry, struct inode *dir, | |||
182 | if (err) | 181 | if (err) |
183 | goto out; | 182 | goto out; |
184 | 183 | ||
185 | /* | ||
186 | * This file should be checkpointed during fsync. | ||
187 | * We lost i_pino from now on. | ||
188 | */ | ||
189 | set_cp_file(inode); | ||
190 | |||
191 | d_instantiate(dentry, inode); | 184 | d_instantiate(dentry, inode); |
192 | return 0; | 185 | return 0; |
193 | out: | 186 | out: |
194 | clear_inode_flag(F2FS_I(inode), FI_INC_LINK); | 187 | clear_inode_flag(F2FS_I(inode), FI_INC_LINK); |
195 | make_bad_inode(inode); | ||
196 | iput(inode); | 188 | iput(inode); |
197 | return err; | 189 | return err; |
198 | } | 190 | } |
@@ -498,6 +490,7 @@ const struct inode_operations f2fs_dir_inode_operations = { | |||
498 | .rmdir = f2fs_rmdir, | 490 | .rmdir = f2fs_rmdir, |
499 | .mknod = f2fs_mknod, | 491 | .mknod = f2fs_mknod, |
500 | .rename = f2fs_rename, | 492 | .rename = f2fs_rename, |
493 | .getattr = f2fs_getattr, | ||
501 | .setattr = f2fs_setattr, | 494 | .setattr = f2fs_setattr, |
502 | .get_acl = f2fs_get_acl, | 495 | .get_acl = f2fs_get_acl, |
503 | #ifdef CONFIG_F2FS_FS_XATTR | 496 | #ifdef CONFIG_F2FS_FS_XATTR |
@@ -512,6 +505,7 @@ const struct inode_operations f2fs_symlink_inode_operations = { | |||
512 | .readlink = generic_readlink, | 505 | .readlink = generic_readlink, |
513 | .follow_link = page_follow_link_light, | 506 | .follow_link = page_follow_link_light, |
514 | .put_link = page_put_link, | 507 | .put_link = page_put_link, |
508 | .getattr = f2fs_getattr, | ||
515 | .setattr = f2fs_setattr, | 509 | .setattr = f2fs_setattr, |
516 | #ifdef CONFIG_F2FS_FS_XATTR | 510 | #ifdef CONFIG_F2FS_FS_XATTR |
517 | .setxattr = generic_setxattr, | 511 | .setxattr = generic_setxattr, |
@@ -522,6 +516,7 @@ const struct inode_operations f2fs_symlink_inode_operations = { | |||
522 | }; | 516 | }; |
523 | 517 | ||
524 | const struct inode_operations f2fs_special_inode_operations = { | 518 | const struct inode_operations f2fs_special_inode_operations = { |
519 | .getattr = f2fs_getattr, | ||
525 | .setattr = f2fs_setattr, | 520 | .setattr = f2fs_setattr, |
526 | .get_acl = f2fs_get_acl, | 521 | .get_acl = f2fs_get_acl, |
527 | #ifdef CONFIG_F2FS_FS_XATTR | 522 | #ifdef CONFIG_F2FS_FS_XATTR |
diff --git a/fs/f2fs/node.c b/fs/f2fs/node.c index 3df43b4efd89..b418aee09573 100644 --- a/fs/f2fs/node.c +++ b/fs/f2fs/node.c | |||
@@ -408,10 +408,13 @@ int get_dnode_of_data(struct dnode_of_data *dn, pgoff_t index, int mode) | |||
408 | level = get_node_path(index, offset, noffset); | 408 | level = get_node_path(index, offset, noffset); |
409 | 409 | ||
410 | nids[0] = dn->inode->i_ino; | 410 | nids[0] = dn->inode->i_ino; |
411 | npage[0] = get_node_page(sbi, nids[0]); | 411 | npage[0] = dn->inode_page; |
412 | if (IS_ERR(npage[0])) | ||
413 | return PTR_ERR(npage[0]); | ||
414 | 412 | ||
413 | if (!npage[0]) { | ||
414 | npage[0] = get_node_page(sbi, nids[0]); | ||
415 | if (IS_ERR(npage[0])) | ||
416 | return PTR_ERR(npage[0]); | ||
417 | } | ||
415 | parent = npage[0]; | 418 | parent = npage[0]; |
416 | if (level != 0) | 419 | if (level != 0) |
417 | nids[1] = get_nid(parent, offset[0], true); | 420 | nids[1] = get_nid(parent, offset[0], true); |
@@ -430,7 +433,7 @@ int get_dnode_of_data(struct dnode_of_data *dn, pgoff_t index, int mode) | |||
430 | } | 433 | } |
431 | 434 | ||
432 | dn->nid = nids[i]; | 435 | dn->nid = nids[i]; |
433 | npage[i] = new_node_page(dn, noffset[i]); | 436 | npage[i] = new_node_page(dn, noffset[i], NULL); |
434 | if (IS_ERR(npage[i])) { | 437 | if (IS_ERR(npage[i])) { |
435 | alloc_nid_failed(sbi, nids[i]); | 438 | alloc_nid_failed(sbi, nids[i]); |
436 | err = PTR_ERR(npage[i]); | 439 | err = PTR_ERR(npage[i]); |
@@ -803,22 +806,19 @@ int remove_inode_page(struct inode *inode) | |||
803 | return 0; | 806 | return 0; |
804 | } | 807 | } |
805 | 808 | ||
806 | int new_inode_page(struct inode *inode, const struct qstr *name) | 809 | struct page *new_inode_page(struct inode *inode, const struct qstr *name) |
807 | { | 810 | { |
808 | struct page *page; | ||
809 | struct dnode_of_data dn; | 811 | struct dnode_of_data dn; |
810 | 812 | ||
811 | /* allocate inode page for new inode */ | 813 | /* allocate inode page for new inode */ |
812 | set_new_dnode(&dn, inode, NULL, NULL, inode->i_ino); | 814 | set_new_dnode(&dn, inode, NULL, NULL, inode->i_ino); |
813 | page = new_node_page(&dn, 0); | 815 | |
814 | init_dent_inode(name, page); | 816 | /* caller should f2fs_put_page(page, 1); */ |
815 | if (IS_ERR(page)) | 817 | return new_node_page(&dn, 0, NULL); |
816 | return PTR_ERR(page); | ||
817 | f2fs_put_page(page, 1); | ||
818 | return 0; | ||
819 | } | 818 | } |
820 | 819 | ||
821 | struct page *new_node_page(struct dnode_of_data *dn, unsigned int ofs) | 820 | struct page *new_node_page(struct dnode_of_data *dn, |
821 | unsigned int ofs, struct page *ipage) | ||
822 | { | 822 | { |
823 | struct f2fs_sb_info *sbi = F2FS_SB(dn->inode->i_sb); | 823 | struct f2fs_sb_info *sbi = F2FS_SB(dn->inode->i_sb); |
824 | struct address_space *mapping = sbi->node_inode->i_mapping; | 824 | struct address_space *mapping = sbi->node_inode->i_mapping; |
@@ -851,7 +851,10 @@ struct page *new_node_page(struct dnode_of_data *dn, unsigned int ofs) | |||
851 | set_cold_node(dn->inode, page); | 851 | set_cold_node(dn->inode, page); |
852 | 852 | ||
853 | dn->node_page = page; | 853 | dn->node_page = page; |
854 | sync_inode_page(dn); | 854 | if (ipage) |
855 | update_inode(dn->inode, ipage); | ||
856 | else | ||
857 | sync_inode_page(dn); | ||
855 | set_page_dirty(page); | 858 | set_page_dirty(page); |
856 | if (ofs == 0) | 859 | if (ofs == 0) |
857 | inc_valid_inode_count(sbi); | 860 | inc_valid_inode_count(sbi); |
@@ -1205,7 +1208,8 @@ static int f2fs_set_node_page_dirty(struct page *page) | |||
1205 | return 0; | 1208 | return 0; |
1206 | } | 1209 | } |
1207 | 1210 | ||
1208 | static void f2fs_invalidate_node_page(struct page *page, unsigned long offset) | 1211 | static void f2fs_invalidate_node_page(struct page *page, unsigned int offset, |
1212 | unsigned int length) | ||
1209 | { | 1213 | { |
1210 | struct inode *inode = page->mapping->host; | 1214 | struct inode *inode = page->mapping->host; |
1211 | struct f2fs_sb_info *sbi = F2FS_SB(inode->i_sb); | 1215 | struct f2fs_sb_info *sbi = F2FS_SB(inode->i_sb); |
@@ -1492,9 +1496,10 @@ int recover_inode_page(struct f2fs_sb_info *sbi, struct page *page) | |||
1492 | new_ni = old_ni; | 1496 | new_ni = old_ni; |
1493 | new_ni.ino = ino; | 1497 | new_ni.ino = ino; |
1494 | 1498 | ||
1499 | if (!inc_valid_node_count(sbi, NULL, 1)) | ||
1500 | WARN_ON(1); | ||
1495 | set_node_addr(sbi, &new_ni, NEW_ADDR); | 1501 | set_node_addr(sbi, &new_ni, NEW_ADDR); |
1496 | inc_valid_inode_count(sbi); | 1502 | inc_valid_inode_count(sbi); |
1497 | |||
1498 | f2fs_put_page(ipage, 1); | 1503 | f2fs_put_page(ipage, 1); |
1499 | return 0; | 1504 | return 0; |
1500 | } | 1505 | } |
diff --git a/fs/f2fs/node.h b/fs/f2fs/node.h index 0a2d72f0024d..c65fb4f4230f 100644 --- a/fs/f2fs/node.h +++ b/fs/f2fs/node.h | |||
@@ -275,25 +275,27 @@ static inline nid_t get_nid(struct page *p, int off, bool i) | |||
275 | * - Mark cold node blocks in their node footer | 275 | * - Mark cold node blocks in their node footer |
276 | * - Mark cold data pages in page cache | 276 | * - Mark cold data pages in page cache |
277 | */ | 277 | */ |
278 | static inline int is_cold_file(struct inode *inode) | 278 | static inline int is_file(struct inode *inode, int type) |
279 | { | 279 | { |
280 | return F2FS_I(inode)->i_advise & FADVISE_COLD_BIT; | 280 | return F2FS_I(inode)->i_advise & type; |
281 | } | 281 | } |
282 | 282 | ||
283 | static inline void set_cold_file(struct inode *inode) | 283 | static inline void set_file(struct inode *inode, int type) |
284 | { | 284 | { |
285 | F2FS_I(inode)->i_advise |= FADVISE_COLD_BIT; | 285 | F2FS_I(inode)->i_advise |= type; |
286 | } | 286 | } |
287 | 287 | ||
288 | static inline int is_cp_file(struct inode *inode) | 288 | static inline void clear_file(struct inode *inode, int type) |
289 | { | 289 | { |
290 | return F2FS_I(inode)->i_advise & FADVISE_CP_BIT; | 290 | F2FS_I(inode)->i_advise &= ~type; |
291 | } | 291 | } |
292 | 292 | ||
293 | static inline void set_cp_file(struct inode *inode) | 293 | #define file_is_cold(inode) is_file(inode, FADVISE_COLD_BIT) |
294 | { | 294 | #define file_wrong_pino(inode) is_file(inode, FADVISE_LOST_PINO_BIT) |
295 | F2FS_I(inode)->i_advise |= FADVISE_CP_BIT; | 295 | #define file_set_cold(inode) set_file(inode, FADVISE_COLD_BIT) |
296 | } | 296 | #define file_lost_pino(inode) set_file(inode, FADVISE_LOST_PINO_BIT) |
297 | #define file_clear_cold(inode) clear_file(inode, FADVISE_COLD_BIT) | ||
298 | #define file_got_pino(inode) clear_file(inode, FADVISE_LOST_PINO_BIT) | ||
297 | 299 | ||
298 | static inline int is_cold_data(struct page *page) | 300 | static inline int is_cold_data(struct page *page) |
299 | { | 301 | { |
@@ -310,29 +312,16 @@ static inline void clear_cold_data(struct page *page) | |||
310 | ClearPageChecked(page); | 312 | ClearPageChecked(page); |
311 | } | 313 | } |
312 | 314 | ||
313 | static inline int is_cold_node(struct page *page) | 315 | static inline int is_node(struct page *page, int type) |
314 | { | 316 | { |
315 | void *kaddr = page_address(page); | 317 | void *kaddr = page_address(page); |
316 | struct f2fs_node *rn = (struct f2fs_node *)kaddr; | 318 | struct f2fs_node *rn = (struct f2fs_node *)kaddr; |
317 | unsigned int flag = le32_to_cpu(rn->footer.flag); | 319 | return le32_to_cpu(rn->footer.flag) & (1 << type); |
318 | return flag & (0x1 << COLD_BIT_SHIFT); | ||
319 | } | 320 | } |
320 | 321 | ||
321 | static inline unsigned char is_fsync_dnode(struct page *page) | 322 | #define is_cold_node(page) is_node(page, COLD_BIT_SHIFT) |
322 | { | 323 | #define is_fsync_dnode(page) is_node(page, FSYNC_BIT_SHIFT) |
323 | void *kaddr = page_address(page); | 324 | #define is_dent_dnode(page) is_node(page, DENT_BIT_SHIFT) |
324 | struct f2fs_node *rn = (struct f2fs_node *)kaddr; | ||
325 | unsigned int flag = le32_to_cpu(rn->footer.flag); | ||
326 | return flag & (0x1 << FSYNC_BIT_SHIFT); | ||
327 | } | ||
328 | |||
329 | static inline unsigned char is_dent_dnode(struct page *page) | ||
330 | { | ||
331 | void *kaddr = page_address(page); | ||
332 | struct f2fs_node *rn = (struct f2fs_node *)kaddr; | ||
333 | unsigned int flag = le32_to_cpu(rn->footer.flag); | ||
334 | return flag & (0x1 << DENT_BIT_SHIFT); | ||
335 | } | ||
336 | 325 | ||
337 | static inline void set_cold_node(struct inode *inode, struct page *page) | 326 | static inline void set_cold_node(struct inode *inode, struct page *page) |
338 | { | 327 | { |
@@ -346,26 +335,15 @@ static inline void set_cold_node(struct inode *inode, struct page *page) | |||
346 | rn->footer.flag = cpu_to_le32(flag); | 335 | rn->footer.flag = cpu_to_le32(flag); |
347 | } | 336 | } |
348 | 337 | ||
349 | static inline void set_fsync_mark(struct page *page, int mark) | 338 | static inline void set_mark(struct page *page, int mark, int type) |
350 | { | 339 | { |
351 | void *kaddr = page_address(page); | 340 | struct f2fs_node *rn = (struct f2fs_node *)page_address(page); |
352 | struct f2fs_node *rn = (struct f2fs_node *)kaddr; | ||
353 | unsigned int flag = le32_to_cpu(rn->footer.flag); | ||
354 | if (mark) | ||
355 | flag |= (0x1 << FSYNC_BIT_SHIFT); | ||
356 | else | ||
357 | flag &= ~(0x1 << FSYNC_BIT_SHIFT); | ||
358 | rn->footer.flag = cpu_to_le32(flag); | ||
359 | } | ||
360 | |||
361 | static inline void set_dentry_mark(struct page *page, int mark) | ||
362 | { | ||
363 | void *kaddr = page_address(page); | ||
364 | struct f2fs_node *rn = (struct f2fs_node *)kaddr; | ||
365 | unsigned int flag = le32_to_cpu(rn->footer.flag); | 341 | unsigned int flag = le32_to_cpu(rn->footer.flag); |
366 | if (mark) | 342 | if (mark) |
367 | flag |= (0x1 << DENT_BIT_SHIFT); | 343 | flag |= (0x1 << type); |
368 | else | 344 | else |
369 | flag &= ~(0x1 << DENT_BIT_SHIFT); | 345 | flag &= ~(0x1 << type); |
370 | rn->footer.flag = cpu_to_le32(flag); | 346 | rn->footer.flag = cpu_to_le32(flag); |
371 | } | 347 | } |
348 | #define set_dentry_mark(page, mark) set_mark(page, mark, DENT_BIT_SHIFT) | ||
349 | #define set_fsync_mark(page, mark) set_mark(page, mark, FSYNC_BIT_SHIFT) | ||
diff --git a/fs/f2fs/recovery.c b/fs/f2fs/recovery.c index 60c8a5097058..d56d951c2253 100644 --- a/fs/f2fs/recovery.c +++ b/fs/f2fs/recovery.c | |||
@@ -40,36 +40,54 @@ static struct fsync_inode_entry *get_fsync_inode(struct list_head *head, | |||
40 | 40 | ||
41 | static int recover_dentry(struct page *ipage, struct inode *inode) | 41 | static int recover_dentry(struct page *ipage, struct inode *inode) |
42 | { | 42 | { |
43 | struct f2fs_node *raw_node = (struct f2fs_node *)kmap(ipage); | 43 | void *kaddr = page_address(ipage); |
44 | struct f2fs_node *raw_node = (struct f2fs_node *)kaddr; | ||
44 | struct f2fs_inode *raw_inode = &(raw_node->i); | 45 | struct f2fs_inode *raw_inode = &(raw_node->i); |
45 | struct qstr name; | 46 | nid_t pino = le32_to_cpu(raw_inode->i_pino); |
46 | struct f2fs_dir_entry *de; | 47 | struct f2fs_dir_entry *de; |
48 | struct qstr name; | ||
47 | struct page *page; | 49 | struct page *page; |
48 | struct inode *dir; | 50 | struct inode *dir, *einode; |
49 | int err = 0; | 51 | int err = 0; |
50 | 52 | ||
51 | if (!is_dent_dnode(ipage)) | 53 | dir = check_dirty_dir_inode(F2FS_SB(inode->i_sb), pino); |
52 | goto out; | 54 | if (!dir) { |
53 | 55 | dir = f2fs_iget(inode->i_sb, pino); | |
54 | dir = f2fs_iget(inode->i_sb, le32_to_cpu(raw_inode->i_pino)); | 56 | if (IS_ERR(dir)) { |
55 | if (IS_ERR(dir)) { | 57 | err = PTR_ERR(dir); |
56 | err = PTR_ERR(dir); | 58 | goto out; |
57 | goto out; | 59 | } |
60 | set_inode_flag(F2FS_I(dir), FI_DELAY_IPUT); | ||
61 | add_dirty_dir_inode(dir); | ||
58 | } | 62 | } |
59 | 63 | ||
60 | name.len = le32_to_cpu(raw_inode->i_namelen); | 64 | name.len = le32_to_cpu(raw_inode->i_namelen); |
61 | name.name = raw_inode->i_name; | 65 | name.name = raw_inode->i_name; |
62 | 66 | retry: | |
63 | de = f2fs_find_entry(dir, &name, &page); | 67 | de = f2fs_find_entry(dir, &name, &page); |
64 | if (de) { | 68 | if (de && inode->i_ino == le32_to_cpu(de->ino)) { |
65 | kunmap(page); | 69 | kunmap(page); |
66 | f2fs_put_page(page, 0); | 70 | f2fs_put_page(page, 0); |
67 | } else { | 71 | goto out; |
68 | err = __f2fs_add_link(dir, &name, inode); | 72 | } |
73 | if (de) { | ||
74 | einode = f2fs_iget(inode->i_sb, le32_to_cpu(de->ino)); | ||
75 | if (IS_ERR(einode)) { | ||
76 | WARN_ON(1); | ||
77 | if (PTR_ERR(einode) == -ENOENT) | ||
78 | err = -EEXIST; | ||
79 | goto out; | ||
80 | } | ||
81 | f2fs_delete_entry(de, page, einode); | ||
82 | iput(einode); | ||
83 | goto retry; | ||
69 | } | 84 | } |
70 | iput(dir); | 85 | err = __f2fs_add_link(dir, &name, inode); |
71 | out: | 86 | out: |
72 | kunmap(ipage); | 87 | f2fs_msg(inode->i_sb, KERN_NOTICE, "recover_inode and its dentry: " |
88 | "ino = %x, name = %s, dir = %lx, err = %d", | ||
89 | ino_of_node(ipage), raw_inode->i_name, | ||
90 | IS_ERR(dir) ? 0 : dir->i_ino, err); | ||
73 | return err; | 91 | return err; |
74 | } | 92 | } |
75 | 93 | ||
@@ -79,6 +97,9 @@ static int recover_inode(struct inode *inode, struct page *node_page) | |||
79 | struct f2fs_node *raw_node = (struct f2fs_node *)kaddr; | 97 | struct f2fs_node *raw_node = (struct f2fs_node *)kaddr; |
80 | struct f2fs_inode *raw_inode = &(raw_node->i); | 98 | struct f2fs_inode *raw_inode = &(raw_node->i); |
81 | 99 | ||
100 | if (!IS_INODE(node_page)) | ||
101 | return 0; | ||
102 | |||
82 | inode->i_mode = le16_to_cpu(raw_inode->i_mode); | 103 | inode->i_mode = le16_to_cpu(raw_inode->i_mode); |
83 | i_size_write(inode, le64_to_cpu(raw_inode->i_size)); | 104 | i_size_write(inode, le64_to_cpu(raw_inode->i_size)); |
84 | inode->i_atime.tv_sec = le64_to_cpu(raw_inode->i_mtime); | 105 | inode->i_atime.tv_sec = le64_to_cpu(raw_inode->i_mtime); |
@@ -88,7 +109,12 @@ static int recover_inode(struct inode *inode, struct page *node_page) | |||
88 | inode->i_ctime.tv_nsec = le32_to_cpu(raw_inode->i_ctime_nsec); | 109 | inode->i_ctime.tv_nsec = le32_to_cpu(raw_inode->i_ctime_nsec); |
89 | inode->i_mtime.tv_nsec = le32_to_cpu(raw_inode->i_mtime_nsec); | 110 | inode->i_mtime.tv_nsec = le32_to_cpu(raw_inode->i_mtime_nsec); |
90 | 111 | ||
91 | return recover_dentry(node_page, inode); | 112 | if (is_dent_dnode(node_page)) |
113 | return recover_dentry(node_page, inode); | ||
114 | |||
115 | f2fs_msg(inode->i_sb, KERN_NOTICE, "recover_inode: ino = %x, name = %s", | ||
116 | ino_of_node(node_page), raw_inode->i_name); | ||
117 | return 0; | ||
92 | } | 118 | } |
93 | 119 | ||
94 | static int find_fsync_dnodes(struct f2fs_sb_info *sbi, struct list_head *head) | 120 | static int find_fsync_dnodes(struct f2fs_sb_info *sbi, struct list_head *head) |
@@ -119,14 +145,13 @@ static int find_fsync_dnodes(struct f2fs_sb_info *sbi, struct list_head *head) | |||
119 | lock_page(page); | 145 | lock_page(page); |
120 | 146 | ||
121 | if (cp_ver != cpver_of_node(page)) | 147 | if (cp_ver != cpver_of_node(page)) |
122 | goto unlock_out; | 148 | break; |
123 | 149 | ||
124 | if (!is_fsync_dnode(page)) | 150 | if (!is_fsync_dnode(page)) |
125 | goto next; | 151 | goto next; |
126 | 152 | ||
127 | entry = get_fsync_inode(head, ino_of_node(page)); | 153 | entry = get_fsync_inode(head, ino_of_node(page)); |
128 | if (entry) { | 154 | if (entry) { |
129 | entry->blkaddr = blkaddr; | ||
130 | if (IS_INODE(page) && is_dent_dnode(page)) | 155 | if (IS_INODE(page) && is_dent_dnode(page)) |
131 | set_inode_flag(F2FS_I(entry->inode), | 156 | set_inode_flag(F2FS_I(entry->inode), |
132 | FI_INC_LINK); | 157 | FI_INC_LINK); |
@@ -134,48 +159,40 @@ static int find_fsync_dnodes(struct f2fs_sb_info *sbi, struct list_head *head) | |||
134 | if (IS_INODE(page) && is_dent_dnode(page)) { | 159 | if (IS_INODE(page) && is_dent_dnode(page)) { |
135 | err = recover_inode_page(sbi, page); | 160 | err = recover_inode_page(sbi, page); |
136 | if (err) | 161 | if (err) |
137 | goto unlock_out; | 162 | break; |
138 | } | 163 | } |
139 | 164 | ||
140 | /* add this fsync inode to the list */ | 165 | /* add this fsync inode to the list */ |
141 | entry = kmem_cache_alloc(fsync_entry_slab, GFP_NOFS); | 166 | entry = kmem_cache_alloc(fsync_entry_slab, GFP_NOFS); |
142 | if (!entry) { | 167 | if (!entry) { |
143 | err = -ENOMEM; | 168 | err = -ENOMEM; |
144 | goto unlock_out; | 169 | break; |
145 | } | 170 | } |
146 | 171 | ||
147 | entry->inode = f2fs_iget(sbi->sb, ino_of_node(page)); | 172 | entry->inode = f2fs_iget(sbi->sb, ino_of_node(page)); |
148 | if (IS_ERR(entry->inode)) { | 173 | if (IS_ERR(entry->inode)) { |
149 | err = PTR_ERR(entry->inode); | 174 | err = PTR_ERR(entry->inode); |
150 | kmem_cache_free(fsync_entry_slab, entry); | 175 | kmem_cache_free(fsync_entry_slab, entry); |
151 | goto unlock_out; | 176 | break; |
152 | } | 177 | } |
153 | |||
154 | list_add_tail(&entry->list, head); | 178 | list_add_tail(&entry->list, head); |
155 | entry->blkaddr = blkaddr; | ||
156 | } | ||
157 | if (IS_INODE(page)) { | ||
158 | err = recover_inode(entry->inode, page); | ||
159 | if (err == -ENOENT) { | ||
160 | goto next; | ||
161 | } else if (err) { | ||
162 | err = -EINVAL; | ||
163 | goto unlock_out; | ||
164 | } | ||
165 | } | 179 | } |
180 | entry->blkaddr = blkaddr; | ||
181 | |||
182 | err = recover_inode(entry->inode, page); | ||
183 | if (err && err != -ENOENT) | ||
184 | break; | ||
166 | next: | 185 | next: |
167 | /* check next segment */ | 186 | /* check next segment */ |
168 | blkaddr = next_blkaddr_of_node(page); | 187 | blkaddr = next_blkaddr_of_node(page); |
169 | } | 188 | } |
170 | unlock_out: | ||
171 | unlock_page(page); | 189 | unlock_page(page); |
172 | out: | 190 | out: |
173 | __free_pages(page, 0); | 191 | __free_pages(page, 0); |
174 | return err; | 192 | return err; |
175 | } | 193 | } |
176 | 194 | ||
177 | static void destroy_fsync_dnodes(struct f2fs_sb_info *sbi, | 195 | static void destroy_fsync_dnodes(struct list_head *head) |
178 | struct list_head *head) | ||
179 | { | 196 | { |
180 | struct fsync_inode_entry *entry, *tmp; | 197 | struct fsync_inode_entry *entry, *tmp; |
181 | 198 | ||
@@ -186,15 +203,15 @@ static void destroy_fsync_dnodes(struct f2fs_sb_info *sbi, | |||
186 | } | 203 | } |
187 | } | 204 | } |
188 | 205 | ||
189 | static void check_index_in_prev_nodes(struct f2fs_sb_info *sbi, | 206 | static int check_index_in_prev_nodes(struct f2fs_sb_info *sbi, |
190 | block_t blkaddr) | 207 | block_t blkaddr, struct dnode_of_data *dn) |
191 | { | 208 | { |
192 | struct seg_entry *sentry; | 209 | struct seg_entry *sentry; |
193 | unsigned int segno = GET_SEGNO(sbi, blkaddr); | 210 | unsigned int segno = GET_SEGNO(sbi, blkaddr); |
194 | unsigned short blkoff = GET_SEGOFF_FROM_SEG0(sbi, blkaddr) & | 211 | unsigned short blkoff = GET_SEGOFF_FROM_SEG0(sbi, blkaddr) & |
195 | (sbi->blocks_per_seg - 1); | 212 | (sbi->blocks_per_seg - 1); |
196 | struct f2fs_summary sum; | 213 | struct f2fs_summary sum; |
197 | nid_t ino; | 214 | nid_t ino, nid; |
198 | void *kaddr; | 215 | void *kaddr; |
199 | struct inode *inode; | 216 | struct inode *inode; |
200 | struct page *node_page; | 217 | struct page *node_page; |
@@ -203,7 +220,7 @@ static void check_index_in_prev_nodes(struct f2fs_sb_info *sbi, | |||
203 | 220 | ||
204 | sentry = get_seg_entry(sbi, segno); | 221 | sentry = get_seg_entry(sbi, segno); |
205 | if (!f2fs_test_bit(blkoff, sentry->cur_valid_map)) | 222 | if (!f2fs_test_bit(blkoff, sentry->cur_valid_map)) |
206 | return; | 223 | return 0; |
207 | 224 | ||
208 | /* Get the previous summary */ | 225 | /* Get the previous summary */ |
209 | for (i = CURSEG_WARM_DATA; i <= CURSEG_COLD_DATA; i++) { | 226 | for (i = CURSEG_WARM_DATA; i <= CURSEG_COLD_DATA; i++) { |
@@ -222,20 +239,39 @@ static void check_index_in_prev_nodes(struct f2fs_sb_info *sbi, | |||
222 | f2fs_put_page(sum_page, 1); | 239 | f2fs_put_page(sum_page, 1); |
223 | } | 240 | } |
224 | 241 | ||
242 | /* Use the locked dnode page and inode */ | ||
243 | nid = le32_to_cpu(sum.nid); | ||
244 | if (dn->inode->i_ino == nid) { | ||
245 | struct dnode_of_data tdn = *dn; | ||
246 | tdn.nid = nid; | ||
247 | tdn.node_page = dn->inode_page; | ||
248 | tdn.ofs_in_node = le16_to_cpu(sum.ofs_in_node); | ||
249 | truncate_data_blocks_range(&tdn, 1); | ||
250 | return 0; | ||
251 | } else if (dn->nid == nid) { | ||
252 | struct dnode_of_data tdn = *dn; | ||
253 | tdn.ofs_in_node = le16_to_cpu(sum.ofs_in_node); | ||
254 | truncate_data_blocks_range(&tdn, 1); | ||
255 | return 0; | ||
256 | } | ||
257 | |||
225 | /* Get the node page */ | 258 | /* Get the node page */ |
226 | node_page = get_node_page(sbi, le32_to_cpu(sum.nid)); | 259 | node_page = get_node_page(sbi, nid); |
260 | if (IS_ERR(node_page)) | ||
261 | return PTR_ERR(node_page); | ||
227 | bidx = start_bidx_of_node(ofs_of_node(node_page)) + | 262 | bidx = start_bidx_of_node(ofs_of_node(node_page)) + |
228 | le16_to_cpu(sum.ofs_in_node); | 263 | le16_to_cpu(sum.ofs_in_node); |
229 | ino = ino_of_node(node_page); | 264 | ino = ino_of_node(node_page); |
230 | f2fs_put_page(node_page, 1); | 265 | f2fs_put_page(node_page, 1); |
231 | 266 | ||
232 | /* Deallocate previous index in the node page */ | 267 | /* Deallocate previous index in the node page */ |
233 | inode = f2fs_iget(sbi->sb, ino); | 268 | inode = f2fs_iget(sbi->sb, ino); |
234 | if (IS_ERR(inode)) | 269 | if (IS_ERR(inode)) |
235 | return; | 270 | return PTR_ERR(inode); |
236 | 271 | ||
237 | truncate_hole(inode, bidx, bidx + 1); | 272 | truncate_hole(inode, bidx, bidx + 1); |
238 | iput(inode); | 273 | iput(inode); |
274 | return 0; | ||
239 | } | 275 | } |
240 | 276 | ||
241 | static int do_recover_data(struct f2fs_sb_info *sbi, struct inode *inode, | 277 | static int do_recover_data(struct f2fs_sb_info *sbi, struct inode *inode, |
@@ -245,7 +281,7 @@ static int do_recover_data(struct f2fs_sb_info *sbi, struct inode *inode, | |||
245 | struct dnode_of_data dn; | 281 | struct dnode_of_data dn; |
246 | struct f2fs_summary sum; | 282 | struct f2fs_summary sum; |
247 | struct node_info ni; | 283 | struct node_info ni; |
248 | int err = 0; | 284 | int err = 0, recovered = 0; |
249 | int ilock; | 285 | int ilock; |
250 | 286 | ||
251 | start = start_bidx_of_node(ofs_of_node(page)); | 287 | start = start_bidx_of_node(ofs_of_node(page)); |
@@ -283,13 +319,16 @@ static int do_recover_data(struct f2fs_sb_info *sbi, struct inode *inode, | |||
283 | } | 319 | } |
284 | 320 | ||
285 | /* Check the previous node page having this index */ | 321 | /* Check the previous node page having this index */ |
286 | check_index_in_prev_nodes(sbi, dest); | 322 | err = check_index_in_prev_nodes(sbi, dest, &dn); |
323 | if (err) | ||
324 | goto err; | ||
287 | 325 | ||
288 | set_summary(&sum, dn.nid, dn.ofs_in_node, ni.version); | 326 | set_summary(&sum, dn.nid, dn.ofs_in_node, ni.version); |
289 | 327 | ||
290 | /* write dummy data page */ | 328 | /* write dummy data page */ |
291 | recover_data_page(sbi, NULL, &sum, src, dest); | 329 | recover_data_page(sbi, NULL, &sum, src, dest); |
292 | update_extent_cache(dest, &dn); | 330 | update_extent_cache(dest, &dn); |
331 | recovered++; | ||
293 | } | 332 | } |
294 | dn.ofs_in_node++; | 333 | dn.ofs_in_node++; |
295 | } | 334 | } |
@@ -305,9 +344,14 @@ static int do_recover_data(struct f2fs_sb_info *sbi, struct inode *inode, | |||
305 | set_page_dirty(dn.node_page); | 344 | set_page_dirty(dn.node_page); |
306 | 345 | ||
307 | recover_node_page(sbi, dn.node_page, &sum, &ni, blkaddr); | 346 | recover_node_page(sbi, dn.node_page, &sum, &ni, blkaddr); |
347 | err: | ||
308 | f2fs_put_dnode(&dn); | 348 | f2fs_put_dnode(&dn); |
309 | mutex_unlock_op(sbi, ilock); | 349 | mutex_unlock_op(sbi, ilock); |
310 | return 0; | 350 | |
351 | f2fs_msg(sbi->sb, KERN_NOTICE, "recover_data: ino = %lx, " | ||
352 | "recovered_data = %d blocks, err = %d", | ||
353 | inode->i_ino, recovered, err); | ||
354 | return err; | ||
311 | } | 355 | } |
312 | 356 | ||
313 | static int recover_data(struct f2fs_sb_info *sbi, | 357 | static int recover_data(struct f2fs_sb_info *sbi, |
@@ -340,7 +384,7 @@ static int recover_data(struct f2fs_sb_info *sbi, | |||
340 | lock_page(page); | 384 | lock_page(page); |
341 | 385 | ||
342 | if (cp_ver != cpver_of_node(page)) | 386 | if (cp_ver != cpver_of_node(page)) |
343 | goto unlock_out; | 387 | break; |
344 | 388 | ||
345 | entry = get_fsync_inode(head, ino_of_node(page)); | 389 | entry = get_fsync_inode(head, ino_of_node(page)); |
346 | if (!entry) | 390 | if (!entry) |
@@ -348,7 +392,7 @@ static int recover_data(struct f2fs_sb_info *sbi, | |||
348 | 392 | ||
349 | err = do_recover_data(sbi, entry->inode, page, blkaddr); | 393 | err = do_recover_data(sbi, entry->inode, page, blkaddr); |
350 | if (err) | 394 | if (err) |
351 | goto out; | 395 | break; |
352 | 396 | ||
353 | if (entry->blkaddr == blkaddr) { | 397 | if (entry->blkaddr == blkaddr) { |
354 | iput(entry->inode); | 398 | iput(entry->inode); |
@@ -359,7 +403,6 @@ next: | |||
359 | /* check next segment */ | 403 | /* check next segment */ |
360 | blkaddr = next_blkaddr_of_node(page); | 404 | blkaddr = next_blkaddr_of_node(page); |
361 | } | 405 | } |
362 | unlock_out: | ||
363 | unlock_page(page); | 406 | unlock_page(page); |
364 | out: | 407 | out: |
365 | __free_pages(page, 0); | 408 | __free_pages(page, 0); |
@@ -382,6 +425,7 @@ int recover_fsync_data(struct f2fs_sb_info *sbi) | |||
382 | INIT_LIST_HEAD(&inode_list); | 425 | INIT_LIST_HEAD(&inode_list); |
383 | 426 | ||
384 | /* step #1: find fsynced inode numbers */ | 427 | /* step #1: find fsynced inode numbers */ |
428 | sbi->por_doing = 1; | ||
385 | err = find_fsync_dnodes(sbi, &inode_list); | 429 | err = find_fsync_dnodes(sbi, &inode_list); |
386 | if (err) | 430 | if (err) |
387 | goto out; | 431 | goto out; |
@@ -390,13 +434,13 @@ int recover_fsync_data(struct f2fs_sb_info *sbi) | |||
390 | goto out; | 434 | goto out; |
391 | 435 | ||
392 | /* step #2: recover data */ | 436 | /* step #2: recover data */ |
393 | sbi->por_doing = 1; | ||
394 | err = recover_data(sbi, &inode_list, CURSEG_WARM_NODE); | 437 | err = recover_data(sbi, &inode_list, CURSEG_WARM_NODE); |
395 | sbi->por_doing = 0; | ||
396 | BUG_ON(!list_empty(&inode_list)); | 438 | BUG_ON(!list_empty(&inode_list)); |
397 | out: | 439 | out: |
398 | destroy_fsync_dnodes(sbi, &inode_list); | 440 | destroy_fsync_dnodes(&inode_list); |
399 | kmem_cache_destroy(fsync_entry_slab); | 441 | kmem_cache_destroy(fsync_entry_slab); |
400 | write_checkpoint(sbi, false); | 442 | sbi->por_doing = 0; |
443 | if (!err) | ||
444 | write_checkpoint(sbi, false); | ||
401 | return err; | 445 | return err; |
402 | } | 446 | } |
diff --git a/fs/f2fs/segment.c b/fs/f2fs/segment.c index d8e84e49a5c3..a86d125a9885 100644 --- a/fs/f2fs/segment.c +++ b/fs/f2fs/segment.c | |||
@@ -94,7 +94,7 @@ static void __remove_dirty_segment(struct f2fs_sb_info *sbi, unsigned int segno, | |||
94 | * Adding dirty entry into seglist is not critical operation. | 94 | * Adding dirty entry into seglist is not critical operation. |
95 | * If a given segment is one of current working segments, it won't be added. | 95 | * If a given segment is one of current working segments, it won't be added. |
96 | */ | 96 | */ |
97 | void locate_dirty_segment(struct f2fs_sb_info *sbi, unsigned int segno) | 97 | static void locate_dirty_segment(struct f2fs_sb_info *sbi, unsigned int segno) |
98 | { | 98 | { |
99 | struct dirty_seglist_info *dirty_i = DIRTY_I(sbi); | 99 | struct dirty_seglist_info *dirty_i = DIRTY_I(sbi); |
100 | unsigned short valid_blocks; | 100 | unsigned short valid_blocks; |
@@ -126,17 +126,16 @@ void locate_dirty_segment(struct f2fs_sb_info *sbi, unsigned int segno) | |||
126 | static void set_prefree_as_free_segments(struct f2fs_sb_info *sbi) | 126 | static void set_prefree_as_free_segments(struct f2fs_sb_info *sbi) |
127 | { | 127 | { |
128 | struct dirty_seglist_info *dirty_i = DIRTY_I(sbi); | 128 | struct dirty_seglist_info *dirty_i = DIRTY_I(sbi); |
129 | unsigned int segno, offset = 0; | 129 | unsigned int segno = -1; |
130 | unsigned int total_segs = TOTAL_SEGS(sbi); | 130 | unsigned int total_segs = TOTAL_SEGS(sbi); |
131 | 131 | ||
132 | mutex_lock(&dirty_i->seglist_lock); | 132 | mutex_lock(&dirty_i->seglist_lock); |
133 | while (1) { | 133 | while (1) { |
134 | segno = find_next_bit(dirty_i->dirty_segmap[PRE], total_segs, | 134 | segno = find_next_bit(dirty_i->dirty_segmap[PRE], total_segs, |
135 | offset); | 135 | segno + 1); |
136 | if (segno >= total_segs) | 136 | if (segno >= total_segs) |
137 | break; | 137 | break; |
138 | __set_test_and_free(sbi, segno); | 138 | __set_test_and_free(sbi, segno); |
139 | offset = segno + 1; | ||
140 | } | 139 | } |
141 | mutex_unlock(&dirty_i->seglist_lock); | 140 | mutex_unlock(&dirty_i->seglist_lock); |
142 | } | 141 | } |
@@ -144,17 +143,16 @@ static void set_prefree_as_free_segments(struct f2fs_sb_info *sbi) | |||
144 | void clear_prefree_segments(struct f2fs_sb_info *sbi) | 143 | void clear_prefree_segments(struct f2fs_sb_info *sbi) |
145 | { | 144 | { |
146 | struct dirty_seglist_info *dirty_i = DIRTY_I(sbi); | 145 | struct dirty_seglist_info *dirty_i = DIRTY_I(sbi); |
147 | unsigned int segno, offset = 0; | 146 | unsigned int segno = -1; |
148 | unsigned int total_segs = TOTAL_SEGS(sbi); | 147 | unsigned int total_segs = TOTAL_SEGS(sbi); |
149 | 148 | ||
150 | mutex_lock(&dirty_i->seglist_lock); | 149 | mutex_lock(&dirty_i->seglist_lock); |
151 | while (1) { | 150 | while (1) { |
152 | segno = find_next_bit(dirty_i->dirty_segmap[PRE], total_segs, | 151 | segno = find_next_bit(dirty_i->dirty_segmap[PRE], total_segs, |
153 | offset); | 152 | segno + 1); |
154 | if (segno >= total_segs) | 153 | if (segno >= total_segs) |
155 | break; | 154 | break; |
156 | 155 | ||
157 | offset = segno + 1; | ||
158 | if (test_and_clear_bit(segno, dirty_i->dirty_segmap[PRE])) | 156 | if (test_and_clear_bit(segno, dirty_i->dirty_segmap[PRE])) |
159 | dirty_i->nr_dirty[PRE]--; | 157 | dirty_i->nr_dirty[PRE]--; |
160 | 158 | ||
@@ -257,11 +255,11 @@ void invalidate_blocks(struct f2fs_sb_info *sbi, block_t addr) | |||
257 | * This function should be resided under the curseg_mutex lock | 255 | * This function should be resided under the curseg_mutex lock |
258 | */ | 256 | */ |
259 | static void __add_sum_entry(struct f2fs_sb_info *sbi, int type, | 257 | static void __add_sum_entry(struct f2fs_sb_info *sbi, int type, |
260 | struct f2fs_summary *sum, unsigned short offset) | 258 | struct f2fs_summary *sum) |
261 | { | 259 | { |
262 | struct curseg_info *curseg = CURSEG_I(sbi, type); | 260 | struct curseg_info *curseg = CURSEG_I(sbi, type); |
263 | void *addr = curseg->sum_blk; | 261 | void *addr = curseg->sum_blk; |
264 | addr += offset * sizeof(struct f2fs_summary); | 262 | addr += curseg->next_blkoff * sizeof(struct f2fs_summary); |
265 | memcpy(addr, sum, sizeof(struct f2fs_summary)); | 263 | memcpy(addr, sum, sizeof(struct f2fs_summary)); |
266 | return; | 264 | return; |
267 | } | 265 | } |
@@ -311,64 +309,14 @@ static void write_sum_page(struct f2fs_sb_info *sbi, | |||
311 | f2fs_put_page(page, 1); | 309 | f2fs_put_page(page, 1); |
312 | } | 310 | } |
313 | 311 | ||
314 | static unsigned int check_prefree_segments(struct f2fs_sb_info *sbi, int type) | ||
315 | { | ||
316 | struct dirty_seglist_info *dirty_i = DIRTY_I(sbi); | ||
317 | unsigned long *prefree_segmap = dirty_i->dirty_segmap[PRE]; | ||
318 | unsigned int segno; | ||
319 | unsigned int ofs = 0; | ||
320 | |||
321 | /* | ||
322 | * If there is not enough reserved sections, | ||
323 | * we should not reuse prefree segments. | ||
324 | */ | ||
325 | if (has_not_enough_free_secs(sbi, 0)) | ||
326 | return NULL_SEGNO; | ||
327 | |||
328 | /* | ||
329 | * NODE page should not reuse prefree segment, | ||
330 | * since those information is used for SPOR. | ||
331 | */ | ||
332 | if (IS_NODESEG(type)) | ||
333 | return NULL_SEGNO; | ||
334 | next: | ||
335 | segno = find_next_bit(prefree_segmap, TOTAL_SEGS(sbi), ofs); | ||
336 | ofs += sbi->segs_per_sec; | ||
337 | |||
338 | if (segno < TOTAL_SEGS(sbi)) { | ||
339 | int i; | ||
340 | |||
341 | /* skip intermediate segments in a section */ | ||
342 | if (segno % sbi->segs_per_sec) | ||
343 | goto next; | ||
344 | |||
345 | /* skip if the section is currently used */ | ||
346 | if (sec_usage_check(sbi, GET_SECNO(sbi, segno))) | ||
347 | goto next; | ||
348 | |||
349 | /* skip if whole section is not prefree */ | ||
350 | for (i = 1; i < sbi->segs_per_sec; i++) | ||
351 | if (!test_bit(segno + i, prefree_segmap)) | ||
352 | goto next; | ||
353 | |||
354 | /* skip if whole section was not free at the last checkpoint */ | ||
355 | for (i = 0; i < sbi->segs_per_sec; i++) | ||
356 | if (get_seg_entry(sbi, segno + i)->ckpt_valid_blocks) | ||
357 | goto next; | ||
358 | |||
359 | return segno; | ||
360 | } | ||
361 | return NULL_SEGNO; | ||
362 | } | ||
363 | |||
364 | static int is_next_segment_free(struct f2fs_sb_info *sbi, int type) | 312 | static int is_next_segment_free(struct f2fs_sb_info *sbi, int type) |
365 | { | 313 | { |
366 | struct curseg_info *curseg = CURSEG_I(sbi, type); | 314 | struct curseg_info *curseg = CURSEG_I(sbi, type); |
367 | unsigned int segno = curseg->segno; | 315 | unsigned int segno = curseg->segno + 1; |
368 | struct free_segmap_info *free_i = FREE_I(sbi); | 316 | struct free_segmap_info *free_i = FREE_I(sbi); |
369 | 317 | ||
370 | if (segno + 1 < TOTAL_SEGS(sbi) && (segno + 1) % sbi->segs_per_sec) | 318 | if (segno < TOTAL_SEGS(sbi) && segno % sbi->segs_per_sec) |
371 | return !test_bit(segno + 1, free_i->free_segmap); | 319 | return !test_bit(segno, free_i->free_segmap); |
372 | return 0; | 320 | return 0; |
373 | } | 321 | } |
374 | 322 | ||
@@ -495,7 +443,7 @@ static void new_curseg(struct f2fs_sb_info *sbi, int type, bool new_sec) | |||
495 | int dir = ALLOC_LEFT; | 443 | int dir = ALLOC_LEFT; |
496 | 444 | ||
497 | write_sum_page(sbi, curseg->sum_blk, | 445 | write_sum_page(sbi, curseg->sum_blk, |
498 | GET_SUM_BLOCK(sbi, curseg->segno)); | 446 | GET_SUM_BLOCK(sbi, segno)); |
499 | if (type == CURSEG_WARM_DATA || type == CURSEG_COLD_DATA) | 447 | if (type == CURSEG_WARM_DATA || type == CURSEG_COLD_DATA) |
500 | dir = ALLOC_RIGHT; | 448 | dir = ALLOC_RIGHT; |
501 | 449 | ||
@@ -599,11 +547,7 @@ static void allocate_segment_by_default(struct f2fs_sb_info *sbi, | |||
599 | goto out; | 547 | goto out; |
600 | } | 548 | } |
601 | 549 | ||
602 | curseg->next_segno = check_prefree_segments(sbi, type); | 550 | if (type == CURSEG_WARM_NODE) |
603 | |||
604 | if (curseg->next_segno != NULL_SEGNO) | ||
605 | change_curseg(sbi, type, false); | ||
606 | else if (type == CURSEG_WARM_NODE) | ||
607 | new_curseg(sbi, type, false); | 551 | new_curseg(sbi, type, false); |
608 | else if (curseg->alloc_type == LFS && is_next_segment_free(sbi, type)) | 552 | else if (curseg->alloc_type == LFS && is_next_segment_free(sbi, type)) |
609 | new_curseg(sbi, type, false); | 553 | new_curseg(sbi, type, false); |
@@ -612,7 +556,10 @@ static void allocate_segment_by_default(struct f2fs_sb_info *sbi, | |||
612 | else | 556 | else |
613 | new_curseg(sbi, type, false); | 557 | new_curseg(sbi, type, false); |
614 | out: | 558 | out: |
559 | #ifdef CONFIG_F2FS_STAT_FS | ||
615 | sbi->segment_count[curseg->alloc_type]++; | 560 | sbi->segment_count[curseg->alloc_type]++; |
561 | #endif | ||
562 | return; | ||
616 | } | 563 | } |
617 | 564 | ||
618 | void allocate_new_segments(struct f2fs_sb_info *sbi) | 565 | void allocate_new_segments(struct f2fs_sb_info *sbi) |
@@ -795,7 +742,7 @@ static int __get_segment_type_6(struct page *page, enum page_type p_type) | |||
795 | 742 | ||
796 | if (S_ISDIR(inode->i_mode)) | 743 | if (S_ISDIR(inode->i_mode)) |
797 | return CURSEG_HOT_DATA; | 744 | return CURSEG_HOT_DATA; |
798 | else if (is_cold_data(page) || is_cold_file(inode)) | 745 | else if (is_cold_data(page) || file_is_cold(inode)) |
799 | return CURSEG_COLD_DATA; | 746 | return CURSEG_COLD_DATA; |
800 | else | 747 | else |
801 | return CURSEG_WARM_DATA; | 748 | return CURSEG_WARM_DATA; |
@@ -844,11 +791,13 @@ static void do_write_page(struct f2fs_sb_info *sbi, struct page *page, | |||
844 | * because, this function updates a summary entry in the | 791 | * because, this function updates a summary entry in the |
845 | * current summary block. | 792 | * current summary block. |
846 | */ | 793 | */ |
847 | __add_sum_entry(sbi, type, sum, curseg->next_blkoff); | 794 | __add_sum_entry(sbi, type, sum); |
848 | 795 | ||
849 | mutex_lock(&sit_i->sentry_lock); | 796 | mutex_lock(&sit_i->sentry_lock); |
850 | __refresh_next_blkoff(sbi, curseg); | 797 | __refresh_next_blkoff(sbi, curseg); |
798 | #ifdef CONFIG_F2FS_STAT_FS | ||
851 | sbi->block_count[curseg->alloc_type]++; | 799 | sbi->block_count[curseg->alloc_type]++; |
800 | #endif | ||
852 | 801 | ||
853 | /* | 802 | /* |
854 | * SIT information should be updated before segment allocation, | 803 | * SIT information should be updated before segment allocation, |
@@ -943,7 +892,7 @@ void recover_data_page(struct f2fs_sb_info *sbi, | |||
943 | 892 | ||
944 | curseg->next_blkoff = GET_SEGOFF_FROM_SEG0(sbi, new_blkaddr) & | 893 | curseg->next_blkoff = GET_SEGOFF_FROM_SEG0(sbi, new_blkaddr) & |
945 | (sbi->blocks_per_seg - 1); | 894 | (sbi->blocks_per_seg - 1); |
946 | __add_sum_entry(sbi, type, sum, curseg->next_blkoff); | 895 | __add_sum_entry(sbi, type, sum); |
947 | 896 | ||
948 | refresh_sit_entry(sbi, old_blkaddr, new_blkaddr); | 897 | refresh_sit_entry(sbi, old_blkaddr, new_blkaddr); |
949 | 898 | ||
@@ -980,7 +929,7 @@ void rewrite_node_page(struct f2fs_sb_info *sbi, | |||
980 | } | 929 | } |
981 | curseg->next_blkoff = GET_SEGOFF_FROM_SEG0(sbi, new_blkaddr) & | 930 | curseg->next_blkoff = GET_SEGOFF_FROM_SEG0(sbi, new_blkaddr) & |
982 | (sbi->blocks_per_seg - 1); | 931 | (sbi->blocks_per_seg - 1); |
983 | __add_sum_entry(sbi, type, sum, curseg->next_blkoff); | 932 | __add_sum_entry(sbi, type, sum); |
984 | 933 | ||
985 | /* change the current log to the next block addr in advance */ | 934 | /* change the current log to the next block addr in advance */ |
986 | if (next_segno != segno) { | 935 | if (next_segno != segno) { |
@@ -1579,13 +1528,13 @@ static void init_dirty_segmap(struct f2fs_sb_info *sbi) | |||
1579 | { | 1528 | { |
1580 | struct dirty_seglist_info *dirty_i = DIRTY_I(sbi); | 1529 | struct dirty_seglist_info *dirty_i = DIRTY_I(sbi); |
1581 | struct free_segmap_info *free_i = FREE_I(sbi); | 1530 | struct free_segmap_info *free_i = FREE_I(sbi); |
1582 | unsigned int segno = 0, offset = 0; | 1531 | unsigned int segno = 0, offset = 0, total_segs = TOTAL_SEGS(sbi); |
1583 | unsigned short valid_blocks; | 1532 | unsigned short valid_blocks; |
1584 | 1533 | ||
1585 | while (segno < TOTAL_SEGS(sbi)) { | 1534 | while (1) { |
1586 | /* find dirty segment based on free segmap */ | 1535 | /* find dirty segment based on free segmap */ |
1587 | segno = find_next_inuse(free_i, TOTAL_SEGS(sbi), offset); | 1536 | segno = find_next_inuse(free_i, total_segs, offset); |
1588 | if (segno >= TOTAL_SEGS(sbi)) | 1537 | if (segno >= total_segs) |
1589 | break; | 1538 | break; |
1590 | offset = segno + 1; | 1539 | offset = segno + 1; |
1591 | valid_blocks = get_valid_blocks(sbi, segno, 0); | 1540 | valid_blocks = get_valid_blocks(sbi, segno, 0); |
diff --git a/fs/f2fs/super.c b/fs/f2fs/super.c index 8555f7df82c7..75c7dc363e92 100644 --- a/fs/f2fs/super.c +++ b/fs/f2fs/super.c | |||
@@ -34,7 +34,7 @@ | |||
34 | static struct kmem_cache *f2fs_inode_cachep; | 34 | static struct kmem_cache *f2fs_inode_cachep; |
35 | 35 | ||
36 | enum { | 36 | enum { |
37 | Opt_gc_background_off, | 37 | Opt_gc_background, |
38 | Opt_disable_roll_forward, | 38 | Opt_disable_roll_forward, |
39 | Opt_discard, | 39 | Opt_discard, |
40 | Opt_noheap, | 40 | Opt_noheap, |
@@ -46,7 +46,7 @@ enum { | |||
46 | }; | 46 | }; |
47 | 47 | ||
48 | static match_table_t f2fs_tokens = { | 48 | static match_table_t f2fs_tokens = { |
49 | {Opt_gc_background_off, "background_gc_off"}, | 49 | {Opt_gc_background, "background_gc=%s"}, |
50 | {Opt_disable_roll_forward, "disable_roll_forward"}, | 50 | {Opt_disable_roll_forward, "disable_roll_forward"}, |
51 | {Opt_discard, "discard"}, | 51 | {Opt_discard, "discard"}, |
52 | {Opt_noheap, "no_heap"}, | 52 | {Opt_noheap, "no_heap"}, |
@@ -76,6 +76,91 @@ static void init_once(void *foo) | |||
76 | inode_init_once(&fi->vfs_inode); | 76 | inode_init_once(&fi->vfs_inode); |
77 | } | 77 | } |
78 | 78 | ||
79 | static int parse_options(struct super_block *sb, char *options) | ||
80 | { | ||
81 | struct f2fs_sb_info *sbi = F2FS_SB(sb); | ||
82 | substring_t args[MAX_OPT_ARGS]; | ||
83 | char *p, *name; | ||
84 | int arg = 0; | ||
85 | |||
86 | if (!options) | ||
87 | return 0; | ||
88 | |||
89 | while ((p = strsep(&options, ",")) != NULL) { | ||
90 | int token; | ||
91 | if (!*p) | ||
92 | continue; | ||
93 | /* | ||
94 | * Initialize args struct so we know whether arg was | ||
95 | * found; some options take optional arguments. | ||
96 | */ | ||
97 | args[0].to = args[0].from = NULL; | ||
98 | token = match_token(p, f2fs_tokens, args); | ||
99 | |||
100 | switch (token) { | ||
101 | case Opt_gc_background: | ||
102 | name = match_strdup(&args[0]); | ||
103 | |||
104 | if (!name) | ||
105 | return -ENOMEM; | ||
106 | if (!strncmp(name, "on", 2)) | ||
107 | set_opt(sbi, BG_GC); | ||
108 | else if (!strncmp(name, "off", 3)) | ||
109 | clear_opt(sbi, BG_GC); | ||
110 | else { | ||
111 | kfree(name); | ||
112 | return -EINVAL; | ||
113 | } | ||
114 | kfree(name); | ||
115 | break; | ||
116 | case Opt_disable_roll_forward: | ||
117 | set_opt(sbi, DISABLE_ROLL_FORWARD); | ||
118 | break; | ||
119 | case Opt_discard: | ||
120 | set_opt(sbi, DISCARD); | ||
121 | break; | ||
122 | case Opt_noheap: | ||
123 | set_opt(sbi, NOHEAP); | ||
124 | break; | ||
125 | #ifdef CONFIG_F2FS_FS_XATTR | ||
126 | case Opt_nouser_xattr: | ||
127 | clear_opt(sbi, XATTR_USER); | ||
128 | break; | ||
129 | #else | ||
130 | case Opt_nouser_xattr: | ||
131 | f2fs_msg(sb, KERN_INFO, | ||
132 | "nouser_xattr options not supported"); | ||
133 | break; | ||
134 | #endif | ||
135 | #ifdef CONFIG_F2FS_FS_POSIX_ACL | ||
136 | case Opt_noacl: | ||
137 | clear_opt(sbi, POSIX_ACL); | ||
138 | break; | ||
139 | #else | ||
140 | case Opt_noacl: | ||
141 | f2fs_msg(sb, KERN_INFO, "noacl options not supported"); | ||
142 | break; | ||
143 | #endif | ||
144 | case Opt_active_logs: | ||
145 | if (args->from && match_int(args, &arg)) | ||
146 | return -EINVAL; | ||
147 | if (arg != 2 && arg != 4 && arg != NR_CURSEG_TYPE) | ||
148 | return -EINVAL; | ||
149 | sbi->active_logs = arg; | ||
150 | break; | ||
151 | case Opt_disable_ext_identify: | ||
152 | set_opt(sbi, DISABLE_EXT_IDENTIFY); | ||
153 | break; | ||
154 | default: | ||
155 | f2fs_msg(sb, KERN_ERR, | ||
156 | "Unrecognized mount option \"%s\" or missing value", | ||
157 | p); | ||
158 | return -EINVAL; | ||
159 | } | ||
160 | } | ||
161 | return 0; | ||
162 | } | ||
163 | |||
79 | static struct inode *f2fs_alloc_inode(struct super_block *sb) | 164 | static struct inode *f2fs_alloc_inode(struct super_block *sb) |
80 | { | 165 | { |
81 | struct f2fs_inode_info *fi; | 166 | struct f2fs_inode_info *fi; |
@@ -112,6 +197,17 @@ static int f2fs_drop_inode(struct inode *inode) | |||
112 | return generic_drop_inode(inode); | 197 | return generic_drop_inode(inode); |
113 | } | 198 | } |
114 | 199 | ||
200 | /* | ||
201 | * f2fs_dirty_inode() is called from __mark_inode_dirty() | ||
202 | * | ||
203 | * We should call set_dirty_inode to write the dirty inode through write_inode. | ||
204 | */ | ||
205 | static void f2fs_dirty_inode(struct inode *inode, int flags) | ||
206 | { | ||
207 | set_inode_flag(F2FS_I(inode), FI_DIRTY_INODE); | ||
208 | return; | ||
209 | } | ||
210 | |||
115 | static void f2fs_i_callback(struct rcu_head *head) | 211 | static void f2fs_i_callback(struct rcu_head *head) |
116 | { | 212 | { |
117 | struct inode *inode = container_of(head, struct inode, i_rcu); | 213 | struct inode *inode = container_of(head, struct inode, i_rcu); |
@@ -170,7 +266,7 @@ static int f2fs_freeze(struct super_block *sb) | |||
170 | { | 266 | { |
171 | int err; | 267 | int err; |
172 | 268 | ||
173 | if (sb->s_flags & MS_RDONLY) | 269 | if (f2fs_readonly(sb)) |
174 | return 0; | 270 | return 0; |
175 | 271 | ||
176 | err = f2fs_sync_fs(sb, 1); | 272 | err = f2fs_sync_fs(sb, 1); |
@@ -214,10 +310,10 @@ static int f2fs_show_options(struct seq_file *seq, struct dentry *root) | |||
214 | { | 310 | { |
215 | struct f2fs_sb_info *sbi = F2FS_SB(root->d_sb); | 311 | struct f2fs_sb_info *sbi = F2FS_SB(root->d_sb); |
216 | 312 | ||
217 | if (test_opt(sbi, BG_GC)) | 313 | if (!(root->d_sb->s_flags & MS_RDONLY) && test_opt(sbi, BG_GC)) |
218 | seq_puts(seq, ",background_gc_on"); | 314 | seq_printf(seq, ",background_gc=%s", "on"); |
219 | else | 315 | else |
220 | seq_puts(seq, ",background_gc_off"); | 316 | seq_printf(seq, ",background_gc=%s", "off"); |
221 | if (test_opt(sbi, DISABLE_ROLL_FORWARD)) | 317 | if (test_opt(sbi, DISABLE_ROLL_FORWARD)) |
222 | seq_puts(seq, ",disable_roll_forward"); | 318 | seq_puts(seq, ",disable_roll_forward"); |
223 | if (test_opt(sbi, DISCARD)) | 319 | if (test_opt(sbi, DISCARD)) |
@@ -244,11 +340,64 @@ static int f2fs_show_options(struct seq_file *seq, struct dentry *root) | |||
244 | return 0; | 340 | return 0; |
245 | } | 341 | } |
246 | 342 | ||
343 | static int f2fs_remount(struct super_block *sb, int *flags, char *data) | ||
344 | { | ||
345 | struct f2fs_sb_info *sbi = F2FS_SB(sb); | ||
346 | struct f2fs_mount_info org_mount_opt; | ||
347 | int err, active_logs; | ||
348 | |||
349 | /* | ||
350 | * Save the old mount options in case we | ||
351 | * need to restore them. | ||
352 | */ | ||
353 | org_mount_opt = sbi->mount_opt; | ||
354 | active_logs = sbi->active_logs; | ||
355 | |||
356 | /* parse mount options */ | ||
357 | err = parse_options(sb, data); | ||
358 | if (err) | ||
359 | goto restore_opts; | ||
360 | |||
361 | /* | ||
362 | * Previous and new state of filesystem is RO, | ||
363 | * so no point in checking GC conditions. | ||
364 | */ | ||
365 | if ((sb->s_flags & MS_RDONLY) && (*flags & MS_RDONLY)) | ||
366 | goto skip; | ||
367 | |||
368 | /* | ||
369 | * We stop the GC thread if FS is mounted as RO | ||
370 | * or if background_gc = off is passed in mount | ||
371 | * option. Also sync the filesystem. | ||
372 | */ | ||
373 | if ((*flags & MS_RDONLY) || !test_opt(sbi, BG_GC)) { | ||
374 | if (sbi->gc_thread) { | ||
375 | stop_gc_thread(sbi); | ||
376 | f2fs_sync_fs(sb, 1); | ||
377 | } | ||
378 | } else if (test_opt(sbi, BG_GC) && !sbi->gc_thread) { | ||
379 | err = start_gc_thread(sbi); | ||
380 | if (err) | ||
381 | goto restore_opts; | ||
382 | } | ||
383 | skip: | ||
384 | /* Update the POSIXACL Flag */ | ||
385 | sb->s_flags = (sb->s_flags & ~MS_POSIXACL) | | ||
386 | (test_opt(sbi, POSIX_ACL) ? MS_POSIXACL : 0); | ||
387 | return 0; | ||
388 | |||
389 | restore_opts: | ||
390 | sbi->mount_opt = org_mount_opt; | ||
391 | sbi->active_logs = active_logs; | ||
392 | return err; | ||
393 | } | ||
394 | |||
247 | static struct super_operations f2fs_sops = { | 395 | static struct super_operations f2fs_sops = { |
248 | .alloc_inode = f2fs_alloc_inode, | 396 | .alloc_inode = f2fs_alloc_inode, |
249 | .drop_inode = f2fs_drop_inode, | 397 | .drop_inode = f2fs_drop_inode, |
250 | .destroy_inode = f2fs_destroy_inode, | 398 | .destroy_inode = f2fs_destroy_inode, |
251 | .write_inode = f2fs_write_inode, | 399 | .write_inode = f2fs_write_inode, |
400 | .dirty_inode = f2fs_dirty_inode, | ||
252 | .show_options = f2fs_show_options, | 401 | .show_options = f2fs_show_options, |
253 | .evict_inode = f2fs_evict_inode, | 402 | .evict_inode = f2fs_evict_inode, |
254 | .put_super = f2fs_put_super, | 403 | .put_super = f2fs_put_super, |
@@ -256,6 +405,7 @@ static struct super_operations f2fs_sops = { | |||
256 | .freeze_fs = f2fs_freeze, | 405 | .freeze_fs = f2fs_freeze, |
257 | .unfreeze_fs = f2fs_unfreeze, | 406 | .unfreeze_fs = f2fs_unfreeze, |
258 | .statfs = f2fs_statfs, | 407 | .statfs = f2fs_statfs, |
408 | .remount_fs = f2fs_remount, | ||
259 | }; | 409 | }; |
260 | 410 | ||
261 | static struct inode *f2fs_nfs_get_inode(struct super_block *sb, | 411 | static struct inode *f2fs_nfs_get_inode(struct super_block *sb, |
@@ -303,79 +453,6 @@ static const struct export_operations f2fs_export_ops = { | |||
303 | .get_parent = f2fs_get_parent, | 453 | .get_parent = f2fs_get_parent, |
304 | }; | 454 | }; |
305 | 455 | ||
306 | static int parse_options(struct super_block *sb, struct f2fs_sb_info *sbi, | ||
307 | char *options) | ||
308 | { | ||
309 | substring_t args[MAX_OPT_ARGS]; | ||
310 | char *p; | ||
311 | int arg = 0; | ||
312 | |||
313 | if (!options) | ||
314 | return 0; | ||
315 | |||
316 | while ((p = strsep(&options, ",")) != NULL) { | ||
317 | int token; | ||
318 | if (!*p) | ||
319 | continue; | ||
320 | /* | ||
321 | * Initialize args struct so we know whether arg was | ||
322 | * found; some options take optional arguments. | ||
323 | */ | ||
324 | args[0].to = args[0].from = NULL; | ||
325 | token = match_token(p, f2fs_tokens, args); | ||
326 | |||
327 | switch (token) { | ||
328 | case Opt_gc_background_off: | ||
329 | clear_opt(sbi, BG_GC); | ||
330 | break; | ||
331 | case Opt_disable_roll_forward: | ||
332 | set_opt(sbi, DISABLE_ROLL_FORWARD); | ||
333 | break; | ||
334 | case Opt_discard: | ||
335 | set_opt(sbi, DISCARD); | ||
336 | break; | ||
337 | case Opt_noheap: | ||
338 | set_opt(sbi, NOHEAP); | ||
339 | break; | ||
340 | #ifdef CONFIG_F2FS_FS_XATTR | ||
341 | case Opt_nouser_xattr: | ||
342 | clear_opt(sbi, XATTR_USER); | ||
343 | break; | ||
344 | #else | ||
345 | case Opt_nouser_xattr: | ||
346 | f2fs_msg(sb, KERN_INFO, | ||
347 | "nouser_xattr options not supported"); | ||
348 | break; | ||
349 | #endif | ||
350 | #ifdef CONFIG_F2FS_FS_POSIX_ACL | ||
351 | case Opt_noacl: | ||
352 | clear_opt(sbi, POSIX_ACL); | ||
353 | break; | ||
354 | #else | ||
355 | case Opt_noacl: | ||
356 | f2fs_msg(sb, KERN_INFO, "noacl options not supported"); | ||
357 | break; | ||
358 | #endif | ||
359 | case Opt_active_logs: | ||
360 | if (args->from && match_int(args, &arg)) | ||
361 | return -EINVAL; | ||
362 | if (arg != 2 && arg != 4 && arg != NR_CURSEG_TYPE) | ||
363 | return -EINVAL; | ||
364 | sbi->active_logs = arg; | ||
365 | break; | ||
366 | case Opt_disable_ext_identify: | ||
367 | set_opt(sbi, DISABLE_EXT_IDENTIFY); | ||
368 | break; | ||
369 | default: | ||
370 | f2fs_msg(sb, KERN_ERR, | ||
371 | "Unrecognized mount option \"%s\" or missing value", | ||
372 | p); | ||
373 | return -EINVAL; | ||
374 | } | ||
375 | } | ||
376 | return 0; | ||
377 | } | ||
378 | |||
379 | static loff_t max_file_size(unsigned bits) | 456 | static loff_t max_file_size(unsigned bits) |
380 | { | 457 | { |
381 | loff_t result = ADDRS_PER_INODE; | 458 | loff_t result = ADDRS_PER_INODE; |
@@ -541,6 +618,7 @@ static int f2fs_fill_super(struct super_block *sb, void *data, int silent) | |||
541 | if (err) | 618 | if (err) |
542 | goto free_sb_buf; | 619 | goto free_sb_buf; |
543 | } | 620 | } |
621 | sb->s_fs_info = sbi; | ||
544 | /* init some FS parameters */ | 622 | /* init some FS parameters */ |
545 | sbi->active_logs = NR_CURSEG_TYPE; | 623 | sbi->active_logs = NR_CURSEG_TYPE; |
546 | 624 | ||
@@ -553,7 +631,7 @@ static int f2fs_fill_super(struct super_block *sb, void *data, int silent) | |||
553 | set_opt(sbi, POSIX_ACL); | 631 | set_opt(sbi, POSIX_ACL); |
554 | #endif | 632 | #endif |
555 | /* parse mount options */ | 633 | /* parse mount options */ |
556 | err = parse_options(sb, sbi, (char *)data); | 634 | err = parse_options(sb, (char *)data); |
557 | if (err) | 635 | if (err) |
558 | goto free_sb_buf; | 636 | goto free_sb_buf; |
559 | 637 | ||
@@ -565,7 +643,6 @@ static int f2fs_fill_super(struct super_block *sb, void *data, int silent) | |||
565 | sb->s_xattr = f2fs_xattr_handlers; | 643 | sb->s_xattr = f2fs_xattr_handlers; |
566 | sb->s_export_op = &f2fs_export_ops; | 644 | sb->s_export_op = &f2fs_export_ops; |
567 | sb->s_magic = F2FS_SUPER_MAGIC; | 645 | sb->s_magic = F2FS_SUPER_MAGIC; |
568 | sb->s_fs_info = sbi; | ||
569 | sb->s_time_gran = 1; | 646 | sb->s_time_gran = 1; |
570 | sb->s_flags = (sb->s_flags & ~MS_POSIXACL) | | 647 | sb->s_flags = (sb->s_flags & ~MS_POSIXACL) | |
571 | (test_opt(sbi, POSIX_ACL) ? MS_POSIXACL : 0); | 648 | (test_opt(sbi, POSIX_ACL) ? MS_POSIXACL : 0); |
@@ -674,10 +751,16 @@ static int f2fs_fill_super(struct super_block *sb, void *data, int silent) | |||
674 | "Cannot recover all fsync data errno=%ld", err); | 751 | "Cannot recover all fsync data errno=%ld", err); |
675 | } | 752 | } |
676 | 753 | ||
677 | /* After POR, we can run background GC thread */ | 754 | /* |
678 | err = start_gc_thread(sbi); | 755 | * If filesystem is not mounted as read-only then |
679 | if (err) | 756 | * do start the gc_thread. |
680 | goto fail; | 757 | */ |
758 | if (!(sb->s_flags & MS_RDONLY)) { | ||
759 | /* After POR, we can run background GC thread.*/ | ||
760 | err = start_gc_thread(sbi); | ||
761 | if (err) | ||
762 | goto fail; | ||
763 | } | ||
681 | 764 | ||
682 | err = f2fs_build_stats(sbi); | 765 | err = f2fs_build_stats(sbi); |
683 | if (err) | 766 | if (err) |
diff --git a/fs/f2fs/xattr.c b/fs/f2fs/xattr.c index 0b02dce31356..3ab07ecd86ca 100644 --- a/fs/f2fs/xattr.c +++ b/fs/f2fs/xattr.c | |||
@@ -20,6 +20,7 @@ | |||
20 | */ | 20 | */ |
21 | #include <linux/rwsem.h> | 21 | #include <linux/rwsem.h> |
22 | #include <linux/f2fs_fs.h> | 22 | #include <linux/f2fs_fs.h> |
23 | #include <linux/security.h> | ||
23 | #include "f2fs.h" | 24 | #include "f2fs.h" |
24 | #include "xattr.h" | 25 | #include "xattr.h" |
25 | 26 | ||
@@ -43,6 +44,10 @@ static size_t f2fs_xattr_generic_list(struct dentry *dentry, char *list, | |||
43 | prefix = XATTR_TRUSTED_PREFIX; | 44 | prefix = XATTR_TRUSTED_PREFIX; |
44 | prefix_len = XATTR_TRUSTED_PREFIX_LEN; | 45 | prefix_len = XATTR_TRUSTED_PREFIX_LEN; |
45 | break; | 46 | break; |
47 | case F2FS_XATTR_INDEX_SECURITY: | ||
48 | prefix = XATTR_SECURITY_PREFIX; | ||
49 | prefix_len = XATTR_SECURITY_PREFIX_LEN; | ||
50 | break; | ||
46 | default: | 51 | default: |
47 | return -EINVAL; | 52 | return -EINVAL; |
48 | } | 53 | } |
@@ -50,7 +55,7 @@ static size_t f2fs_xattr_generic_list(struct dentry *dentry, char *list, | |||
50 | total_len = prefix_len + name_len + 1; | 55 | total_len = prefix_len + name_len + 1; |
51 | if (list && total_len <= list_size) { | 56 | if (list && total_len <= list_size) { |
52 | memcpy(list, prefix, prefix_len); | 57 | memcpy(list, prefix, prefix_len); |
53 | memcpy(list+prefix_len, name, name_len); | 58 | memcpy(list + prefix_len, name, name_len); |
54 | list[prefix_len + name_len] = '\0'; | 59 | list[prefix_len + name_len] = '\0'; |
55 | } | 60 | } |
56 | return total_len; | 61 | return total_len; |
@@ -70,13 +75,14 @@ static int f2fs_xattr_generic_get(struct dentry *dentry, const char *name, | |||
70 | if (!capable(CAP_SYS_ADMIN)) | 75 | if (!capable(CAP_SYS_ADMIN)) |
71 | return -EPERM; | 76 | return -EPERM; |
72 | break; | 77 | break; |
78 | case F2FS_XATTR_INDEX_SECURITY: | ||
79 | break; | ||
73 | default: | 80 | default: |
74 | return -EINVAL; | 81 | return -EINVAL; |
75 | } | 82 | } |
76 | if (strcmp(name, "") == 0) | 83 | if (strcmp(name, "") == 0) |
77 | return -EINVAL; | 84 | return -EINVAL; |
78 | return f2fs_getxattr(dentry->d_inode, type, name, | 85 | return f2fs_getxattr(dentry->d_inode, type, name, buffer, size); |
79 | buffer, size); | ||
80 | } | 86 | } |
81 | 87 | ||
82 | static int f2fs_xattr_generic_set(struct dentry *dentry, const char *name, | 88 | static int f2fs_xattr_generic_set(struct dentry *dentry, const char *name, |
@@ -93,13 +99,15 @@ static int f2fs_xattr_generic_set(struct dentry *dentry, const char *name, | |||
93 | if (!capable(CAP_SYS_ADMIN)) | 99 | if (!capable(CAP_SYS_ADMIN)) |
94 | return -EPERM; | 100 | return -EPERM; |
95 | break; | 101 | break; |
102 | case F2FS_XATTR_INDEX_SECURITY: | ||
103 | break; | ||
96 | default: | 104 | default: |
97 | return -EINVAL; | 105 | return -EINVAL; |
98 | } | 106 | } |
99 | if (strcmp(name, "") == 0) | 107 | if (strcmp(name, "") == 0) |
100 | return -EINVAL; | 108 | return -EINVAL; |
101 | 109 | ||
102 | return f2fs_setxattr(dentry->d_inode, type, name, value, size); | 110 | return f2fs_setxattr(dentry->d_inode, type, name, value, size, NULL); |
103 | } | 111 | } |
104 | 112 | ||
105 | static size_t f2fs_xattr_advise_list(struct dentry *dentry, char *list, | 113 | static size_t f2fs_xattr_advise_list(struct dentry *dentry, char *list, |
@@ -145,6 +153,31 @@ static int f2fs_xattr_advise_set(struct dentry *dentry, const char *name, | |||
145 | return 0; | 153 | return 0; |
146 | } | 154 | } |
147 | 155 | ||
156 | #ifdef CONFIG_F2FS_FS_SECURITY | ||
157 | static int f2fs_initxattrs(struct inode *inode, const struct xattr *xattr_array, | ||
158 | void *page) | ||
159 | { | ||
160 | const struct xattr *xattr; | ||
161 | int err = 0; | ||
162 | |||
163 | for (xattr = xattr_array; xattr->name != NULL; xattr++) { | ||
164 | err = f2fs_setxattr(inode, F2FS_XATTR_INDEX_SECURITY, | ||
165 | xattr->name, xattr->value, | ||
166 | xattr->value_len, (struct page *)page); | ||
167 | if (err < 0) | ||
168 | break; | ||
169 | } | ||
170 | return err; | ||
171 | } | ||
172 | |||
173 | int f2fs_init_security(struct inode *inode, struct inode *dir, | ||
174 | const struct qstr *qstr, struct page *ipage) | ||
175 | { | ||
176 | return security_inode_init_security(inode, dir, qstr, | ||
177 | &f2fs_initxattrs, ipage); | ||
178 | } | ||
179 | #endif | ||
180 | |||
148 | const struct xattr_handler f2fs_xattr_user_handler = { | 181 | const struct xattr_handler f2fs_xattr_user_handler = { |
149 | .prefix = XATTR_USER_PREFIX, | 182 | .prefix = XATTR_USER_PREFIX, |
150 | .flags = F2FS_XATTR_INDEX_USER, | 183 | .flags = F2FS_XATTR_INDEX_USER, |
@@ -169,6 +202,14 @@ const struct xattr_handler f2fs_xattr_advise_handler = { | |||
169 | .set = f2fs_xattr_advise_set, | 202 | .set = f2fs_xattr_advise_set, |
170 | }; | 203 | }; |
171 | 204 | ||
205 | const struct xattr_handler f2fs_xattr_security_handler = { | ||
206 | .prefix = XATTR_SECURITY_PREFIX, | ||
207 | .flags = F2FS_XATTR_INDEX_SECURITY, | ||
208 | .list = f2fs_xattr_generic_list, | ||
209 | .get = f2fs_xattr_generic_get, | ||
210 | .set = f2fs_xattr_generic_set, | ||
211 | }; | ||
212 | |||
172 | static const struct xattr_handler *f2fs_xattr_handler_map[] = { | 213 | static const struct xattr_handler *f2fs_xattr_handler_map[] = { |
173 | [F2FS_XATTR_INDEX_USER] = &f2fs_xattr_user_handler, | 214 | [F2FS_XATTR_INDEX_USER] = &f2fs_xattr_user_handler, |
174 | #ifdef CONFIG_F2FS_FS_POSIX_ACL | 215 | #ifdef CONFIG_F2FS_FS_POSIX_ACL |
@@ -176,6 +217,9 @@ static const struct xattr_handler *f2fs_xattr_handler_map[] = { | |||
176 | [F2FS_XATTR_INDEX_POSIX_ACL_DEFAULT] = &f2fs_xattr_acl_default_handler, | 217 | [F2FS_XATTR_INDEX_POSIX_ACL_DEFAULT] = &f2fs_xattr_acl_default_handler, |
177 | #endif | 218 | #endif |
178 | [F2FS_XATTR_INDEX_TRUSTED] = &f2fs_xattr_trusted_handler, | 219 | [F2FS_XATTR_INDEX_TRUSTED] = &f2fs_xattr_trusted_handler, |
220 | #ifdef CONFIG_F2FS_FS_SECURITY | ||
221 | [F2FS_XATTR_INDEX_SECURITY] = &f2fs_xattr_security_handler, | ||
222 | #endif | ||
179 | [F2FS_XATTR_INDEX_ADVISE] = &f2fs_xattr_advise_handler, | 223 | [F2FS_XATTR_INDEX_ADVISE] = &f2fs_xattr_advise_handler, |
180 | }; | 224 | }; |
181 | 225 | ||
@@ -186,6 +230,9 @@ const struct xattr_handler *f2fs_xattr_handlers[] = { | |||
186 | &f2fs_xattr_acl_default_handler, | 230 | &f2fs_xattr_acl_default_handler, |
187 | #endif | 231 | #endif |
188 | &f2fs_xattr_trusted_handler, | 232 | &f2fs_xattr_trusted_handler, |
233 | #ifdef CONFIG_F2FS_FS_SECURITY | ||
234 | &f2fs_xattr_security_handler, | ||
235 | #endif | ||
189 | &f2fs_xattr_advise_handler, | 236 | &f2fs_xattr_advise_handler, |
190 | NULL, | 237 | NULL, |
191 | }; | 238 | }; |
@@ -218,6 +265,8 @@ int f2fs_getxattr(struct inode *inode, int name_index, const char *name, | |||
218 | return -ENODATA; | 265 | return -ENODATA; |
219 | 266 | ||
220 | page = get_node_page(sbi, fi->i_xattr_nid); | 267 | page = get_node_page(sbi, fi->i_xattr_nid); |
268 | if (IS_ERR(page)) | ||
269 | return PTR_ERR(page); | ||
221 | base_addr = page_address(page); | 270 | base_addr = page_address(page); |
222 | 271 | ||
223 | list_for_each_xattr(entry, base_addr) { | 272 | list_for_each_xattr(entry, base_addr) { |
@@ -268,6 +317,8 @@ ssize_t f2fs_listxattr(struct dentry *dentry, char *buffer, size_t buffer_size) | |||
268 | return 0; | 317 | return 0; |
269 | 318 | ||
270 | page = get_node_page(sbi, fi->i_xattr_nid); | 319 | page = get_node_page(sbi, fi->i_xattr_nid); |
320 | if (IS_ERR(page)) | ||
321 | return PTR_ERR(page); | ||
271 | base_addr = page_address(page); | 322 | base_addr = page_address(page); |
272 | 323 | ||
273 | list_for_each_xattr(entry, base_addr) { | 324 | list_for_each_xattr(entry, base_addr) { |
@@ -296,7 +347,7 @@ cleanup: | |||
296 | } | 347 | } |
297 | 348 | ||
298 | int f2fs_setxattr(struct inode *inode, int name_index, const char *name, | 349 | int f2fs_setxattr(struct inode *inode, int name_index, const char *name, |
299 | const void *value, size_t value_len) | 350 | const void *value, size_t value_len, struct page *ipage) |
300 | { | 351 | { |
301 | struct f2fs_sb_info *sbi = F2FS_SB(inode->i_sb); | 352 | struct f2fs_sb_info *sbi = F2FS_SB(inode->i_sb); |
302 | struct f2fs_inode_info *fi = F2FS_I(inode); | 353 | struct f2fs_inode_info *fi = F2FS_I(inode); |
@@ -335,7 +386,7 @@ int f2fs_setxattr(struct inode *inode, int name_index, const char *name, | |||
335 | set_new_dnode(&dn, inode, NULL, NULL, fi->i_xattr_nid); | 386 | set_new_dnode(&dn, inode, NULL, NULL, fi->i_xattr_nid); |
336 | mark_inode_dirty(inode); | 387 | mark_inode_dirty(inode); |
337 | 388 | ||
338 | page = new_node_page(&dn, XATTR_NODE_OFFSET); | 389 | page = new_node_page(&dn, XATTR_NODE_OFFSET, ipage); |
339 | if (IS_ERR(page)) { | 390 | if (IS_ERR(page)) { |
340 | alloc_nid_failed(sbi, fi->i_xattr_nid); | 391 | alloc_nid_failed(sbi, fi->i_xattr_nid); |
341 | fi->i_xattr_nid = 0; | 392 | fi->i_xattr_nid = 0; |
@@ -435,7 +486,10 @@ int f2fs_setxattr(struct inode *inode, int name_index, const char *name, | |||
435 | inode->i_ctime = CURRENT_TIME; | 486 | inode->i_ctime = CURRENT_TIME; |
436 | clear_inode_flag(fi, FI_ACL_MODE); | 487 | clear_inode_flag(fi, FI_ACL_MODE); |
437 | } | 488 | } |
438 | update_inode_page(inode); | 489 | if (ipage) |
490 | update_inode(inode, ipage); | ||
491 | else | ||
492 | update_inode_page(inode); | ||
439 | mutex_unlock_op(sbi, ilock); | 493 | mutex_unlock_op(sbi, ilock); |
440 | 494 | ||
441 | return 0; | 495 | return 0; |
diff --git a/fs/f2fs/xattr.h b/fs/f2fs/xattr.h index 49c9558305e3..3c0817bef25d 100644 --- a/fs/f2fs/xattr.h +++ b/fs/f2fs/xattr.h | |||
@@ -112,21 +112,19 @@ extern const struct xattr_handler f2fs_xattr_trusted_handler; | |||
112 | extern const struct xattr_handler f2fs_xattr_acl_access_handler; | 112 | extern const struct xattr_handler f2fs_xattr_acl_access_handler; |
113 | extern const struct xattr_handler f2fs_xattr_acl_default_handler; | 113 | extern const struct xattr_handler f2fs_xattr_acl_default_handler; |
114 | extern const struct xattr_handler f2fs_xattr_advise_handler; | 114 | extern const struct xattr_handler f2fs_xattr_advise_handler; |
115 | extern const struct xattr_handler f2fs_xattr_security_handler; | ||
115 | 116 | ||
116 | extern const struct xattr_handler *f2fs_xattr_handlers[]; | 117 | extern const struct xattr_handler *f2fs_xattr_handlers[]; |
117 | 118 | ||
118 | extern int f2fs_setxattr(struct inode *inode, int name_index, const char *name, | 119 | extern int f2fs_setxattr(struct inode *, int, const char *, |
119 | const void *value, size_t value_len); | 120 | const void *, size_t, struct page *); |
120 | extern int f2fs_getxattr(struct inode *inode, int name_index, const char *name, | 121 | extern int f2fs_getxattr(struct inode *, int, const char *, void *, size_t); |
121 | void *buffer, size_t buffer_size); | 122 | extern ssize_t f2fs_listxattr(struct dentry *, char *, size_t); |
122 | extern ssize_t f2fs_listxattr(struct dentry *dentry, char *buffer, | ||
123 | size_t buffer_size); | ||
124 | |||
125 | #else | 123 | #else |
126 | 124 | ||
127 | #define f2fs_xattr_handlers NULL | 125 | #define f2fs_xattr_handlers NULL |
128 | static inline int f2fs_setxattr(struct inode *inode, int name_index, | 126 | static inline int f2fs_setxattr(struct inode *inode, int name_index, |
129 | const char *name, const void *value, size_t value_len) | 127 | const char *name, const void *value, size_t value_len) |
130 | { | 128 | { |
131 | return -EOPNOTSUPP; | 129 | return -EOPNOTSUPP; |
132 | } | 130 | } |
@@ -142,4 +140,14 @@ static inline ssize_t f2fs_listxattr(struct dentry *dentry, char *buffer, | |||
142 | } | 140 | } |
143 | #endif | 141 | #endif |
144 | 142 | ||
143 | #ifdef CONFIG_F2FS_FS_SECURITY | ||
144 | extern int f2fs_init_security(struct inode *, struct inode *, | ||
145 | const struct qstr *, struct page *); | ||
146 | #else | ||
147 | static inline int f2fs_init_security(struct inode *inode, struct inode *dir, | ||
148 | const struct qstr *qstr, struct page *ipage) | ||
149 | { | ||
150 | return 0; | ||
151 | } | ||
152 | #endif | ||
145 | #endif /* __F2FS_XATTR_H__ */ | 153 | #endif /* __F2FS_XATTR_H__ */ |
diff --git a/fs/fat/dir.c b/fs/fat/dir.c index 7a6f02caf286..3963ede84eb0 100644 --- a/fs/fat/dir.c +++ b/fs/fat/dir.c | |||
@@ -543,6 +543,7 @@ end_of_dir: | |||
543 | EXPORT_SYMBOL_GPL(fat_search_long); | 543 | EXPORT_SYMBOL_GPL(fat_search_long); |
544 | 544 | ||
545 | struct fat_ioctl_filldir_callback { | 545 | struct fat_ioctl_filldir_callback { |
546 | struct dir_context ctx; | ||
546 | void __user *dirent; | 547 | void __user *dirent; |
547 | int result; | 548 | int result; |
548 | /* for dir ioctl */ | 549 | /* for dir ioctl */ |
@@ -552,8 +553,9 @@ struct fat_ioctl_filldir_callback { | |||
552 | int short_len; | 553 | int short_len; |
553 | }; | 554 | }; |
554 | 555 | ||
555 | static int __fat_readdir(struct inode *inode, struct file *filp, void *dirent, | 556 | static int __fat_readdir(struct inode *inode, struct file *file, |
556 | filldir_t filldir, int short_only, int both) | 557 | struct dir_context *ctx, int short_only, |
558 | struct fat_ioctl_filldir_callback *both) | ||
557 | { | 559 | { |
558 | struct super_block *sb = inode->i_sb; | 560 | struct super_block *sb = inode->i_sb; |
559 | struct msdos_sb_info *sbi = MSDOS_SB(sb); | 561 | struct msdos_sb_info *sbi = MSDOS_SB(sb); |
@@ -564,27 +566,20 @@ static int __fat_readdir(struct inode *inode, struct file *filp, void *dirent, | |||
564 | unsigned char bufname[FAT_MAX_SHORT_SIZE]; | 566 | unsigned char bufname[FAT_MAX_SHORT_SIZE]; |
565 | int isvfat = sbi->options.isvfat; | 567 | int isvfat = sbi->options.isvfat; |
566 | const char *fill_name = NULL; | 568 | const char *fill_name = NULL; |
567 | unsigned long inum; | 569 | int fake_offset = 0; |
568 | unsigned long lpos, dummy, *furrfu = &lpos; | ||
569 | loff_t cpos; | 570 | loff_t cpos; |
570 | int short_len = 0, fill_len = 0; | 571 | int short_len = 0, fill_len = 0; |
571 | int ret = 0; | 572 | int ret = 0; |
572 | 573 | ||
573 | mutex_lock(&sbi->s_lock); | 574 | mutex_lock(&sbi->s_lock); |
574 | 575 | ||
575 | cpos = filp->f_pos; | 576 | cpos = ctx->pos; |
576 | /* Fake . and .. for the root directory. */ | 577 | /* Fake . and .. for the root directory. */ |
577 | if (inode->i_ino == MSDOS_ROOT_INO) { | 578 | if (inode->i_ino == MSDOS_ROOT_INO) { |
578 | while (cpos < 2) { | 579 | if (!dir_emit_dots(file, ctx)) |
579 | if (filldir(dirent, "..", cpos+1, cpos, | 580 | goto out; |
580 | MSDOS_ROOT_INO, DT_DIR) < 0) | 581 | if (ctx->pos == 2) { |
581 | goto out; | 582 | fake_offset = 1; |
582 | cpos++; | ||
583 | filp->f_pos++; | ||
584 | } | ||
585 | if (cpos == 2) { | ||
586 | dummy = 2; | ||
587 | furrfu = &dummy; | ||
588 | cpos = 0; | 583 | cpos = 0; |
589 | } | 584 | } |
590 | } | 585 | } |
@@ -619,7 +614,7 @@ parse_record: | |||
619 | int status = fat_parse_long(inode, &cpos, &bh, &de, | 614 | int status = fat_parse_long(inode, &cpos, &bh, &de, |
620 | &unicode, &nr_slots); | 615 | &unicode, &nr_slots); |
621 | if (status < 0) { | 616 | if (status < 0) { |
622 | filp->f_pos = cpos; | 617 | ctx->pos = cpos; |
623 | ret = status; | 618 | ret = status; |
624 | goto out; | 619 | goto out; |
625 | } else if (status == PARSE_INVALID) | 620 | } else if (status == PARSE_INVALID) |
@@ -639,6 +634,19 @@ parse_record: | |||
639 | /* !both && !short_only, so we don't need shortname. */ | 634 | /* !both && !short_only, so we don't need shortname. */ |
640 | if (!both) | 635 | if (!both) |
641 | goto start_filldir; | 636 | goto start_filldir; |
637 | |||
638 | short_len = fat_parse_short(sb, de, bufname, | ||
639 | sbi->options.dotsOK); | ||
640 | if (short_len == 0) | ||
641 | goto record_end; | ||
642 | /* hack for fat_ioctl_filldir() */ | ||
643 | both->longname = fill_name; | ||
644 | both->long_len = fill_len; | ||
645 | both->shortname = bufname; | ||
646 | both->short_len = short_len; | ||
647 | fill_name = NULL; | ||
648 | fill_len = 0; | ||
649 | goto start_filldir; | ||
642 | } | 650 | } |
643 | } | 651 | } |
644 | 652 | ||
@@ -646,28 +654,21 @@ parse_record: | |||
646 | if (short_len == 0) | 654 | if (short_len == 0) |
647 | goto record_end; | 655 | goto record_end; |
648 | 656 | ||
649 | if (nr_slots) { | 657 | fill_name = bufname; |
650 | /* hack for fat_ioctl_filldir() */ | 658 | fill_len = short_len; |
651 | struct fat_ioctl_filldir_callback *p = dirent; | ||
652 | |||
653 | p->longname = fill_name; | ||
654 | p->long_len = fill_len; | ||
655 | p->shortname = bufname; | ||
656 | p->short_len = short_len; | ||
657 | fill_name = NULL; | ||
658 | fill_len = 0; | ||
659 | } else { | ||
660 | fill_name = bufname; | ||
661 | fill_len = short_len; | ||
662 | } | ||
663 | 659 | ||
664 | start_filldir: | 660 | start_filldir: |
665 | lpos = cpos - (nr_slots + 1) * sizeof(struct msdos_dir_entry); | 661 | if (!fake_offset) |
666 | if (!memcmp(de->name, MSDOS_DOT, MSDOS_NAME)) | 662 | ctx->pos = cpos - (nr_slots + 1) * sizeof(struct msdos_dir_entry); |
667 | inum = inode->i_ino; | 663 | |
668 | else if (!memcmp(de->name, MSDOS_DOTDOT, MSDOS_NAME)) { | 664 | if (!memcmp(de->name, MSDOS_DOT, MSDOS_NAME)) { |
669 | inum = parent_ino(filp->f_path.dentry); | 665 | if (!dir_emit_dot(file, ctx)) |
666 | goto fill_failed; | ||
667 | } else if (!memcmp(de->name, MSDOS_DOTDOT, MSDOS_NAME)) { | ||
668 | if (!dir_emit_dotdot(file, ctx)) | ||
669 | goto fill_failed; | ||
670 | } else { | 670 | } else { |
671 | unsigned long inum; | ||
671 | loff_t i_pos = fat_make_i_pos(sb, bh, de); | 672 | loff_t i_pos = fat_make_i_pos(sb, bh, de); |
672 | struct inode *tmp = fat_iget(sb, i_pos); | 673 | struct inode *tmp = fat_iget(sb, i_pos); |
673 | if (tmp) { | 674 | if (tmp) { |
@@ -675,18 +676,17 @@ start_filldir: | |||
675 | iput(tmp); | 676 | iput(tmp); |
676 | } else | 677 | } else |
677 | inum = iunique(sb, MSDOS_ROOT_INO); | 678 | inum = iunique(sb, MSDOS_ROOT_INO); |
679 | if (!dir_emit(ctx, fill_name, fill_len, inum, | ||
680 | (de->attr & ATTR_DIR) ? DT_DIR : DT_REG)) | ||
681 | goto fill_failed; | ||
678 | } | 682 | } |
679 | 683 | ||
680 | if (filldir(dirent, fill_name, fill_len, *furrfu, inum, | ||
681 | (de->attr & ATTR_DIR) ? DT_DIR : DT_REG) < 0) | ||
682 | goto fill_failed; | ||
683 | |||
684 | record_end: | 684 | record_end: |
685 | furrfu = &lpos; | 685 | fake_offset = 0; |
686 | filp->f_pos = cpos; | 686 | ctx->pos = cpos; |
687 | goto get_new; | 687 | goto get_new; |
688 | end_of_dir: | 688 | end_of_dir: |
689 | filp->f_pos = cpos; | 689 | ctx->pos = cpos; |
690 | fill_failed: | 690 | fill_failed: |
691 | brelse(bh); | 691 | brelse(bh); |
692 | if (unicode) | 692 | if (unicode) |
@@ -696,10 +696,9 @@ out: | |||
696 | return ret; | 696 | return ret; |
697 | } | 697 | } |
698 | 698 | ||
699 | static int fat_readdir(struct file *filp, void *dirent, filldir_t filldir) | 699 | static int fat_readdir(struct file *file, struct dir_context *ctx) |
700 | { | 700 | { |
701 | struct inode *inode = file_inode(filp); | 701 | return __fat_readdir(file_inode(file), file, ctx, 0, NULL); |
702 | return __fat_readdir(inode, filp, dirent, filldir, 0, 0); | ||
703 | } | 702 | } |
704 | 703 | ||
705 | #define FAT_IOCTL_FILLDIR_FUNC(func, dirent_type) \ | 704 | #define FAT_IOCTL_FILLDIR_FUNC(func, dirent_type) \ |
@@ -755,20 +754,25 @@ efault: \ | |||
755 | 754 | ||
756 | FAT_IOCTL_FILLDIR_FUNC(fat_ioctl_filldir, __fat_dirent) | 755 | FAT_IOCTL_FILLDIR_FUNC(fat_ioctl_filldir, __fat_dirent) |
757 | 756 | ||
758 | static int fat_ioctl_readdir(struct inode *inode, struct file *filp, | 757 | static int fat_ioctl_readdir(struct inode *inode, struct file *file, |
759 | void __user *dirent, filldir_t filldir, | 758 | void __user *dirent, filldir_t filldir, |
760 | int short_only, int both) | 759 | int short_only, int both) |
761 | { | 760 | { |
762 | struct fat_ioctl_filldir_callback buf; | 761 | struct fat_ioctl_filldir_callback buf = { |
762 | .ctx.actor = filldir, | ||
763 | .dirent = dirent | ||
764 | }; | ||
763 | int ret; | 765 | int ret; |
764 | 766 | ||
765 | buf.dirent = dirent; | 767 | buf.dirent = dirent; |
766 | buf.result = 0; | 768 | buf.result = 0; |
767 | mutex_lock(&inode->i_mutex); | 769 | mutex_lock(&inode->i_mutex); |
770 | buf.ctx.pos = file->f_pos; | ||
768 | ret = -ENOENT; | 771 | ret = -ENOENT; |
769 | if (!IS_DEADDIR(inode)) { | 772 | if (!IS_DEADDIR(inode)) { |
770 | ret = __fat_readdir(inode, filp, &buf, filldir, | 773 | ret = __fat_readdir(inode, file, &buf.ctx, |
771 | short_only, both); | 774 | short_only, both ? &buf : NULL); |
775 | file->f_pos = buf.ctx.pos; | ||
772 | } | 776 | } |
773 | mutex_unlock(&inode->i_mutex); | 777 | mutex_unlock(&inode->i_mutex); |
774 | if (ret >= 0) | 778 | if (ret >= 0) |
@@ -854,7 +858,7 @@ static long fat_compat_dir_ioctl(struct file *filp, unsigned cmd, | |||
854 | const struct file_operations fat_dir_operations = { | 858 | const struct file_operations fat_dir_operations = { |
855 | .llseek = generic_file_llseek, | 859 | .llseek = generic_file_llseek, |
856 | .read = generic_read_dir, | 860 | .read = generic_read_dir, |
857 | .readdir = fat_readdir, | 861 | .iterate = fat_readdir, |
858 | .unlocked_ioctl = fat_dir_ioctl, | 862 | .unlocked_ioctl = fat_dir_ioctl, |
859 | #ifdef CONFIG_COMPAT | 863 | #ifdef CONFIG_COMPAT |
860 | .compat_ioctl = fat_compat_dir_ioctl, | 864 | .compat_ioctl = fat_compat_dir_ioctl, |
diff --git a/fs/fat/fat.h b/fs/fat/fat.h index 21664fcf3616..4241e6f39e86 100644 --- a/fs/fat/fat.h +++ b/fs/fat/fat.h | |||
@@ -86,6 +86,7 @@ struct msdos_sb_info { | |||
86 | const void *dir_ops; /* Opaque; default directory operations */ | 86 | const void *dir_ops; /* Opaque; default directory operations */ |
87 | int dir_per_block; /* dir entries per block */ | 87 | int dir_per_block; /* dir entries per block */ |
88 | int dir_per_block_bits; /* log2(dir_per_block) */ | 88 | int dir_per_block_bits; /* log2(dir_per_block) */ |
89 | unsigned int vol_id; /*volume ID*/ | ||
89 | 90 | ||
90 | int fatent_shift; | 91 | int fatent_shift; |
91 | struct fatent_operations *fatent_ops; | 92 | struct fatent_operations *fatent_ops; |
diff --git a/fs/fat/file.c b/fs/fat/file.c index b0b632e50ddb..9b104f543056 100644 --- a/fs/fat/file.c +++ b/fs/fat/file.c | |||
@@ -114,6 +114,12 @@ out: | |||
114 | return err; | 114 | return err; |
115 | } | 115 | } |
116 | 116 | ||
117 | static int fat_ioctl_get_volume_id(struct inode *inode, u32 __user *user_attr) | ||
118 | { | ||
119 | struct msdos_sb_info *sbi = MSDOS_SB(inode->i_sb); | ||
120 | return put_user(sbi->vol_id, user_attr); | ||
121 | } | ||
122 | |||
117 | long fat_generic_ioctl(struct file *filp, unsigned int cmd, unsigned long arg) | 123 | long fat_generic_ioctl(struct file *filp, unsigned int cmd, unsigned long arg) |
118 | { | 124 | { |
119 | struct inode *inode = file_inode(filp); | 125 | struct inode *inode = file_inode(filp); |
@@ -124,6 +130,8 @@ long fat_generic_ioctl(struct file *filp, unsigned int cmd, unsigned long arg) | |||
124 | return fat_ioctl_get_attributes(inode, user_attr); | 130 | return fat_ioctl_get_attributes(inode, user_attr); |
125 | case FAT_IOCTL_SET_ATTRIBUTES: | 131 | case FAT_IOCTL_SET_ATTRIBUTES: |
126 | return fat_ioctl_set_attributes(filp, user_attr); | 132 | return fat_ioctl_set_attributes(filp, user_attr); |
133 | case FAT_IOCTL_GET_VOLUME_ID: | ||
134 | return fat_ioctl_get_volume_id(inode, user_attr); | ||
127 | default: | 135 | default: |
128 | return -ENOTTY; /* Inappropriate ioctl for device */ | 136 | return -ENOTTY; /* Inappropriate ioctl for device */ |
129 | } | 137 | } |
diff --git a/fs/fat/inode.c b/fs/fat/inode.c index 5d4513cb1b3c..11b51bb55b42 100644 --- a/fs/fat/inode.c +++ b/fs/fat/inode.c | |||
@@ -1415,6 +1415,18 @@ int fat_fill_super(struct super_block *sb, void *data, int silent, int isvfat, | |||
1415 | brelse(fsinfo_bh); | 1415 | brelse(fsinfo_bh); |
1416 | } | 1416 | } |
1417 | 1417 | ||
1418 | /* interpret volume ID as a little endian 32 bit integer */ | ||
1419 | if (sbi->fat_bits == 32) | ||
1420 | sbi->vol_id = (((u32)b->fat32.vol_id[0]) | | ||
1421 | ((u32)b->fat32.vol_id[1] << 8) | | ||
1422 | ((u32)b->fat32.vol_id[2] << 16) | | ||
1423 | ((u32)b->fat32.vol_id[3] << 24)); | ||
1424 | else /* fat 16 or 12 */ | ||
1425 | sbi->vol_id = (((u32)b->fat16.vol_id[0]) | | ||
1426 | ((u32)b->fat16.vol_id[1] << 8) | | ||
1427 | ((u32)b->fat16.vol_id[2] << 16) | | ||
1428 | ((u32)b->fat16.vol_id[3] << 24)); | ||
1429 | |||
1418 | sbi->dir_per_block = sb->s_blocksize / sizeof(struct msdos_dir_entry); | 1430 | sbi->dir_per_block = sb->s_blocksize / sizeof(struct msdos_dir_entry); |
1419 | sbi->dir_per_block_bits = ffs(sbi->dir_per_block) - 1; | 1431 | sbi->dir_per_block_bits = ffs(sbi->dir_per_block) - 1; |
1420 | 1432 | ||
diff --git a/fs/fat/misc.c b/fs/fat/misc.c index 359d307b5507..628e22a5a543 100644 --- a/fs/fat/misc.c +++ b/fs/fat/misc.c | |||
@@ -30,7 +30,7 @@ void __fat_fs_error(struct super_block *sb, int report, const char *fmt, ...) | |||
30 | va_start(args, fmt); | 30 | va_start(args, fmt); |
31 | vaf.fmt = fmt; | 31 | vaf.fmt = fmt; |
32 | vaf.va = &args; | 32 | vaf.va = &args; |
33 | printk(KERN_ERR "FAT-fs (%s): error, %pV\n", sb->s_id, &vaf); | 33 | fat_msg(sb, KERN_ERR, "error, %pV", &vaf); |
34 | va_end(args); | 34 | va_end(args); |
35 | } | 35 | } |
36 | 36 | ||
@@ -38,8 +38,7 @@ void __fat_fs_error(struct super_block *sb, int report, const char *fmt, ...) | |||
38 | panic("FAT-fs (%s): fs panic from previous error\n", sb->s_id); | 38 | panic("FAT-fs (%s): fs panic from previous error\n", sb->s_id); |
39 | else if (opts->errors == FAT_ERRORS_RO && !(sb->s_flags & MS_RDONLY)) { | 39 | else if (opts->errors == FAT_ERRORS_RO && !(sb->s_flags & MS_RDONLY)) { |
40 | sb->s_flags |= MS_RDONLY; | 40 | sb->s_flags |= MS_RDONLY; |
41 | printk(KERN_ERR "FAT-fs (%s): Filesystem has been " | 41 | fat_msg(sb, KERN_ERR, "Filesystem has been set read-only"); |
42 | "set read-only\n", sb->s_id); | ||
43 | } | 42 | } |
44 | } | 43 | } |
45 | EXPORT_SYMBOL_GPL(__fat_fs_error); | 44 | EXPORT_SYMBOL_GPL(__fat_fs_error); |
diff --git a/fs/fat/namei_msdos.c b/fs/fat/namei_msdos.c index 081b759cff83..a783b0e1272a 100644 --- a/fs/fat/namei_msdos.c +++ b/fs/fat/namei_msdos.c | |||
@@ -148,8 +148,7 @@ static int msdos_find(struct inode *dir, const unsigned char *name, int len, | |||
148 | * that the existing dentry can be used. The msdos fs routines will | 148 | * that the existing dentry can be used. The msdos fs routines will |
149 | * return ENOENT or EINVAL as appropriate. | 149 | * return ENOENT or EINVAL as appropriate. |
150 | */ | 150 | */ |
151 | static int msdos_hash(const struct dentry *dentry, const struct inode *inode, | 151 | static int msdos_hash(const struct dentry *dentry, struct qstr *qstr) |
152 | struct qstr *qstr) | ||
153 | { | 152 | { |
154 | struct fat_mount_options *options = &MSDOS_SB(dentry->d_sb)->options; | 153 | struct fat_mount_options *options = &MSDOS_SB(dentry->d_sb)->options; |
155 | unsigned char msdos_name[MSDOS_NAME]; | 154 | unsigned char msdos_name[MSDOS_NAME]; |
@@ -165,8 +164,7 @@ static int msdos_hash(const struct dentry *dentry, const struct inode *inode, | |||
165 | * Compare two msdos names. If either of the names are invalid, | 164 | * Compare two msdos names. If either of the names are invalid, |
166 | * we fall back to doing the standard name comparison. | 165 | * we fall back to doing the standard name comparison. |
167 | */ | 166 | */ |
168 | static int msdos_cmp(const struct dentry *parent, const struct inode *pinode, | 167 | static int msdos_cmp(const struct dentry *parent, const struct dentry *dentry, |
169 | const struct dentry *dentry, const struct inode *inode, | ||
170 | unsigned int len, const char *str, const struct qstr *name) | 168 | unsigned int len, const char *str, const struct qstr *name) |
171 | { | 169 | { |
172 | struct fat_mount_options *options = &MSDOS_SB(parent->d_sb)->options; | 170 | struct fat_mount_options *options = &MSDOS_SB(parent->d_sb)->options; |
diff --git a/fs/fat/namei_vfat.c b/fs/fat/namei_vfat.c index 2da952036a3d..6df8d3d885e5 100644 --- a/fs/fat/namei_vfat.c +++ b/fs/fat/namei_vfat.c | |||
@@ -107,8 +107,7 @@ static unsigned int vfat_striptail_len(const struct qstr *qstr) | |||
107 | * that the existing dentry can be used. The vfat fs routines will | 107 | * that the existing dentry can be used. The vfat fs routines will |
108 | * return ENOENT or EINVAL as appropriate. | 108 | * return ENOENT or EINVAL as appropriate. |
109 | */ | 109 | */ |
110 | static int vfat_hash(const struct dentry *dentry, const struct inode *inode, | 110 | static int vfat_hash(const struct dentry *dentry, struct qstr *qstr) |
111 | struct qstr *qstr) | ||
112 | { | 111 | { |
113 | qstr->hash = full_name_hash(qstr->name, vfat_striptail_len(qstr)); | 112 | qstr->hash = full_name_hash(qstr->name, vfat_striptail_len(qstr)); |
114 | return 0; | 113 | return 0; |
@@ -120,8 +119,7 @@ static int vfat_hash(const struct dentry *dentry, const struct inode *inode, | |||
120 | * that the existing dentry can be used. The vfat fs routines will | 119 | * that the existing dentry can be used. The vfat fs routines will |
121 | * return ENOENT or EINVAL as appropriate. | 120 | * return ENOENT or EINVAL as appropriate. |
122 | */ | 121 | */ |
123 | static int vfat_hashi(const struct dentry *dentry, const struct inode *inode, | 122 | static int vfat_hashi(const struct dentry *dentry, struct qstr *qstr) |
124 | struct qstr *qstr) | ||
125 | { | 123 | { |
126 | struct nls_table *t = MSDOS_SB(dentry->d_sb)->nls_io; | 124 | struct nls_table *t = MSDOS_SB(dentry->d_sb)->nls_io; |
127 | const unsigned char *name; | 125 | const unsigned char *name; |
@@ -142,8 +140,7 @@ static int vfat_hashi(const struct dentry *dentry, const struct inode *inode, | |||
142 | /* | 140 | /* |
143 | * Case insensitive compare of two vfat names. | 141 | * Case insensitive compare of two vfat names. |
144 | */ | 142 | */ |
145 | static int vfat_cmpi(const struct dentry *parent, const struct inode *pinode, | 143 | static int vfat_cmpi(const struct dentry *parent, const struct dentry *dentry, |
146 | const struct dentry *dentry, const struct inode *inode, | ||
147 | unsigned int len, const char *str, const struct qstr *name) | 144 | unsigned int len, const char *str, const struct qstr *name) |
148 | { | 145 | { |
149 | struct nls_table *t = MSDOS_SB(parent->d_sb)->nls_io; | 146 | struct nls_table *t = MSDOS_SB(parent->d_sb)->nls_io; |
@@ -162,8 +159,7 @@ static int vfat_cmpi(const struct dentry *parent, const struct inode *pinode, | |||
162 | /* | 159 | /* |
163 | * Case sensitive compare of two vfat names. | 160 | * Case sensitive compare of two vfat names. |
164 | */ | 161 | */ |
165 | static int vfat_cmp(const struct dentry *parent, const struct inode *pinode, | 162 | static int vfat_cmp(const struct dentry *parent, const struct dentry *dentry, |
166 | const struct dentry *dentry, const struct inode *inode, | ||
167 | unsigned int len, const char *str, const struct qstr *name) | 163 | unsigned int len, const char *str, const struct qstr *name) |
168 | { | 164 | { |
169 | unsigned int alen, blen; | 165 | unsigned int alen, blen; |
diff --git a/fs/fcntl.c b/fs/fcntl.c index 6599222536eb..65343c3741ff 100644 --- a/fs/fcntl.c +++ b/fs/fcntl.c | |||
@@ -730,14 +730,14 @@ static int __init fcntl_init(void) | |||
730 | * Exceptions: O_NONBLOCK is a two bit define on parisc; O_NDELAY | 730 | * Exceptions: O_NONBLOCK is a two bit define on parisc; O_NDELAY |
731 | * is defined as O_NONBLOCK on some platforms and not on others. | 731 | * is defined as O_NONBLOCK on some platforms and not on others. |
732 | */ | 732 | */ |
733 | BUILD_BUG_ON(19 - 1 /* for O_RDONLY being 0 */ != HWEIGHT32( | 733 | BUILD_BUG_ON(20 - 1 /* for O_RDONLY being 0 */ != HWEIGHT32( |
734 | O_RDONLY | O_WRONLY | O_RDWR | | 734 | O_RDONLY | O_WRONLY | O_RDWR | |
735 | O_CREAT | O_EXCL | O_NOCTTY | | 735 | O_CREAT | O_EXCL | O_NOCTTY | |
736 | O_TRUNC | O_APPEND | /* O_NONBLOCK | */ | 736 | O_TRUNC | O_APPEND | /* O_NONBLOCK | */ |
737 | __O_SYNC | O_DSYNC | FASYNC | | 737 | __O_SYNC | O_DSYNC | FASYNC | |
738 | O_DIRECT | O_LARGEFILE | O_DIRECTORY | | 738 | O_DIRECT | O_LARGEFILE | O_DIRECTORY | |
739 | O_NOFOLLOW | O_NOATIME | O_CLOEXEC | | 739 | O_NOFOLLOW | O_NOATIME | O_CLOEXEC | |
740 | __FMODE_EXEC | O_PATH | 740 | __FMODE_EXEC | O_PATH | __O_TMPFILE |
741 | )); | 741 | )); |
742 | 742 | ||
743 | fasync_cache = kmem_cache_create("fasync_cache", | 743 | fasync_cache = kmem_cache_create("fasync_cache", |
diff --git a/fs/file_table.c b/fs/file_table.c index 485dc0eddd67..b44e4c559786 100644 --- a/fs/file_table.c +++ b/fs/file_table.c | |||
@@ -227,7 +227,7 @@ static void __fput(struct file *file) | |||
227 | { | 227 | { |
228 | struct dentry *dentry = file->f_path.dentry; | 228 | struct dentry *dentry = file->f_path.dentry; |
229 | struct vfsmount *mnt = file->f_path.mnt; | 229 | struct vfsmount *mnt = file->f_path.mnt; |
230 | struct inode *inode = dentry->d_inode; | 230 | struct inode *inode = file->f_inode; |
231 | 231 | ||
232 | might_sleep(); | 232 | might_sleep(); |
233 | 233 | ||
@@ -265,18 +265,15 @@ static void __fput(struct file *file) | |||
265 | mntput(mnt); | 265 | mntput(mnt); |
266 | } | 266 | } |
267 | 267 | ||
268 | static DEFINE_SPINLOCK(delayed_fput_lock); | 268 | static LLIST_HEAD(delayed_fput_list); |
269 | static LIST_HEAD(delayed_fput_list); | ||
270 | static void delayed_fput(struct work_struct *unused) | 269 | static void delayed_fput(struct work_struct *unused) |
271 | { | 270 | { |
272 | LIST_HEAD(head); | 271 | struct llist_node *node = llist_del_all(&delayed_fput_list); |
273 | spin_lock_irq(&delayed_fput_lock); | 272 | struct llist_node *next; |
274 | list_splice_init(&delayed_fput_list, &head); | 273 | |
275 | spin_unlock_irq(&delayed_fput_lock); | 274 | for (; node; node = next) { |
276 | while (!list_empty(&head)) { | 275 | next = llist_next(node); |
277 | struct file *f = list_first_entry(&head, struct file, f_u.fu_list); | 276 | __fput(llist_entry(node, struct file, f_u.fu_llist)); |
278 | list_del_init(&f->f_u.fu_list); | ||
279 | __fput(f); | ||
280 | } | 277 | } |
281 | } | 278 | } |
282 | 279 | ||
@@ -306,18 +303,22 @@ void fput(struct file *file) | |||
306 | { | 303 | { |
307 | if (atomic_long_dec_and_test(&file->f_count)) { | 304 | if (atomic_long_dec_and_test(&file->f_count)) { |
308 | struct task_struct *task = current; | 305 | struct task_struct *task = current; |
309 | unsigned long flags; | ||
310 | 306 | ||
311 | file_sb_list_del(file); | 307 | file_sb_list_del(file); |
312 | if (likely(!in_interrupt() && !(task->flags & PF_KTHREAD))) { | 308 | if (likely(!in_interrupt() && !(task->flags & PF_KTHREAD))) { |
313 | init_task_work(&file->f_u.fu_rcuhead, ____fput); | 309 | init_task_work(&file->f_u.fu_rcuhead, ____fput); |
314 | if (!task_work_add(task, &file->f_u.fu_rcuhead, true)) | 310 | if (!task_work_add(task, &file->f_u.fu_rcuhead, true)) |
315 | return; | 311 | return; |
312 | /* | ||
313 | * After this task has run exit_task_work(), | ||
314 | * task_work_add() will fail. free_ipc_ns()-> | ||
315 | * shm_destroy() can do this. Fall through to delayed | ||
316 | * fput to avoid leaking *file. | ||
317 | */ | ||
316 | } | 318 | } |
317 | spin_lock_irqsave(&delayed_fput_lock, flags); | 319 | |
318 | list_add(&file->f_u.fu_list, &delayed_fput_list); | 320 | if (llist_add(&file->f_u.fu_llist, &delayed_fput_list)) |
319 | schedule_work(&delayed_fput_work); | 321 | schedule_work(&delayed_fput_work); |
320 | spin_unlock_irqrestore(&delayed_fput_lock, flags); | ||
321 | } | 322 | } |
322 | } | 323 | } |
323 | 324 | ||
diff --git a/fs/freevxfs/vxfs_lookup.c b/fs/freevxfs/vxfs_lookup.c index 664b07a53870..25d4099a4aea 100644 --- a/fs/freevxfs/vxfs_lookup.c +++ b/fs/freevxfs/vxfs_lookup.c | |||
@@ -49,7 +49,7 @@ | |||
49 | 49 | ||
50 | 50 | ||
51 | static struct dentry * vxfs_lookup(struct inode *, struct dentry *, unsigned int); | 51 | static struct dentry * vxfs_lookup(struct inode *, struct dentry *, unsigned int); |
52 | static int vxfs_readdir(struct file *, void *, filldir_t); | 52 | static int vxfs_readdir(struct file *, struct dir_context *); |
53 | 53 | ||
54 | const struct inode_operations vxfs_dir_inode_ops = { | 54 | const struct inode_operations vxfs_dir_inode_ops = { |
55 | .lookup = vxfs_lookup, | 55 | .lookup = vxfs_lookup, |
@@ -58,7 +58,7 @@ const struct inode_operations vxfs_dir_inode_ops = { | |||
58 | const struct file_operations vxfs_dir_operations = { | 58 | const struct file_operations vxfs_dir_operations = { |
59 | .llseek = generic_file_llseek, | 59 | .llseek = generic_file_llseek, |
60 | .read = generic_read_dir, | 60 | .read = generic_read_dir, |
61 | .readdir = vxfs_readdir, | 61 | .iterate = vxfs_readdir, |
62 | }; | 62 | }; |
63 | 63 | ||
64 | 64 | ||
@@ -235,7 +235,7 @@ vxfs_lookup(struct inode *dip, struct dentry *dp, unsigned int flags) | |||
235 | * Zero. | 235 | * Zero. |
236 | */ | 236 | */ |
237 | static int | 237 | static int |
238 | vxfs_readdir(struct file *fp, void *retp, filldir_t filler) | 238 | vxfs_readdir(struct file *fp, struct dir_context *ctx) |
239 | { | 239 | { |
240 | struct inode *ip = file_inode(fp); | 240 | struct inode *ip = file_inode(fp); |
241 | struct super_block *sbp = ip->i_sb; | 241 | struct super_block *sbp = ip->i_sb; |
@@ -243,20 +243,17 @@ vxfs_readdir(struct file *fp, void *retp, filldir_t filler) | |||
243 | u_long page, npages, block, pblocks, nblocks, offset; | 243 | u_long page, npages, block, pblocks, nblocks, offset; |
244 | loff_t pos; | 244 | loff_t pos; |
245 | 245 | ||
246 | switch ((long)fp->f_pos) { | 246 | if (ctx->pos == 0) { |
247 | case 0: | 247 | if (!dir_emit_dot(fp, ctx)) |
248 | if (filler(retp, ".", 1, fp->f_pos, ip->i_ino, DT_DIR) < 0) | 248 | return 0; |
249 | goto out; | 249 | ctx->pos = 1; |
250 | fp->f_pos++; | ||
251 | /* fallthrough */ | ||
252 | case 1: | ||
253 | if (filler(retp, "..", 2, fp->f_pos, VXFS_INO(ip)->vii_dotdot, DT_DIR) < 0) | ||
254 | goto out; | ||
255 | fp->f_pos++; | ||
256 | /* fallthrough */ | ||
257 | } | 250 | } |
258 | 251 | if (ctx->pos == 1) { | |
259 | pos = fp->f_pos - 2; | 252 | if (!dir_emit(ctx, "..", 2, VXFS_INO(ip)->vii_dotdot, DT_DIR)) |
253 | return 0; | ||
254 | ctx->pos = 2; | ||
255 | } | ||
256 | pos = ctx->pos - 2; | ||
260 | 257 | ||
261 | if (pos > VXFS_DIRROUND(ip->i_size)) | 258 | if (pos > VXFS_DIRROUND(ip->i_size)) |
262 | return 0; | 259 | return 0; |
@@ -270,16 +267,16 @@ vxfs_readdir(struct file *fp, void *retp, filldir_t filler) | |||
270 | block = (u_long)(pos >> sbp->s_blocksize_bits) % pblocks; | 267 | block = (u_long)(pos >> sbp->s_blocksize_bits) % pblocks; |
271 | 268 | ||
272 | for (; page < npages; page++, block = 0) { | 269 | for (; page < npages; page++, block = 0) { |
273 | caddr_t kaddr; | 270 | char *kaddr; |
274 | struct page *pp; | 271 | struct page *pp; |
275 | 272 | ||
276 | pp = vxfs_get_page(ip->i_mapping, page); | 273 | pp = vxfs_get_page(ip->i_mapping, page); |
277 | if (IS_ERR(pp)) | 274 | if (IS_ERR(pp)) |
278 | continue; | 275 | continue; |
279 | kaddr = (caddr_t)page_address(pp); | 276 | kaddr = (char *)page_address(pp); |
280 | 277 | ||
281 | for (; block <= nblocks && block <= pblocks; block++) { | 278 | for (; block <= nblocks && block <= pblocks; block++) { |
282 | caddr_t baddr, limit; | 279 | char *baddr, *limit; |
283 | struct vxfs_dirblk *dbp; | 280 | struct vxfs_dirblk *dbp; |
284 | struct vxfs_direct *de; | 281 | struct vxfs_direct *de; |
285 | 282 | ||
@@ -292,21 +289,18 @@ vxfs_readdir(struct file *fp, void *retp, filldir_t filler) | |||
292 | (kaddr + offset) : | 289 | (kaddr + offset) : |
293 | (baddr + VXFS_DIRBLKOV(dbp))); | 290 | (baddr + VXFS_DIRBLKOV(dbp))); |
294 | 291 | ||
295 | for (; (caddr_t)de <= limit; de = vxfs_next_entry(de)) { | 292 | for (; (char *)de <= limit; de = vxfs_next_entry(de)) { |
296 | int over; | ||
297 | |||
298 | if (!de->d_reclen) | 293 | if (!de->d_reclen) |
299 | break; | 294 | break; |
300 | if (!de->d_ino) | 295 | if (!de->d_ino) |
301 | continue; | 296 | continue; |
302 | 297 | ||
303 | offset = (caddr_t)de - kaddr; | 298 | offset = (char *)de - kaddr; |
304 | over = filler(retp, de->d_name, de->d_namelen, | 299 | ctx->pos = ((page << PAGE_CACHE_SHIFT) | offset) + 2; |
305 | ((page << PAGE_CACHE_SHIFT) | offset) + 2, | 300 | if (!dir_emit(ctx, de->d_name, de->d_namelen, |
306 | de->d_ino, DT_UNKNOWN); | 301 | de->d_ino, DT_UNKNOWN)) { |
307 | if (over) { | ||
308 | vxfs_put_page(pp); | 302 | vxfs_put_page(pp); |
309 | goto done; | 303 | return 0; |
310 | } | 304 | } |
311 | } | 305 | } |
312 | offset = 0; | 306 | offset = 0; |
@@ -314,9 +308,6 @@ vxfs_readdir(struct file *fp, void *retp, filldir_t filler) | |||
314 | vxfs_put_page(pp); | 308 | vxfs_put_page(pp); |
315 | offset = 0; | 309 | offset = 0; |
316 | } | 310 | } |
317 | 311 | ctx->pos = ((page << PAGE_CACHE_SHIFT) | offset) + 2; | |
318 | done: | ||
319 | fp->f_pos = ((page << PAGE_CACHE_SHIFT) | offset) + 2; | ||
320 | out: | ||
321 | return 0; | 312 | return 0; |
322 | } | 313 | } |
diff --git a/fs/fs-writeback.c b/fs/fs-writeback.c index 3be57189efd5..68851ff2fd41 100644 --- a/fs/fs-writeback.c +++ b/fs/fs-writeback.c | |||
@@ -45,6 +45,7 @@ struct wb_writeback_work { | |||
45 | unsigned int for_kupdate:1; | 45 | unsigned int for_kupdate:1; |
46 | unsigned int range_cyclic:1; | 46 | unsigned int range_cyclic:1; |
47 | unsigned int for_background:1; | 47 | unsigned int for_background:1; |
48 | unsigned int for_sync:1; /* sync(2) WB_SYNC_ALL writeback */ | ||
48 | enum wb_reason reason; /* why was writeback initiated? */ | 49 | enum wb_reason reason; /* why was writeback initiated? */ |
49 | 50 | ||
50 | struct list_head list; /* pending work list */ | 51 | struct list_head list; /* pending work list */ |
@@ -443,9 +444,11 @@ __writeback_single_inode(struct inode *inode, struct writeback_control *wbc) | |||
443 | /* | 444 | /* |
444 | * Make sure to wait on the data before writing out the metadata. | 445 | * Make sure to wait on the data before writing out the metadata. |
445 | * This is important for filesystems that modify metadata on data | 446 | * This is important for filesystems that modify metadata on data |
446 | * I/O completion. | 447 | * I/O completion. We don't do it for sync(2) writeback because it has a |
448 | * separate, external IO completion path and ->sync_fs for guaranteeing | ||
449 | * inode metadata is written back correctly. | ||
447 | */ | 450 | */ |
448 | if (wbc->sync_mode == WB_SYNC_ALL) { | 451 | if (wbc->sync_mode == WB_SYNC_ALL && !wbc->for_sync) { |
449 | int err = filemap_fdatawait(mapping); | 452 | int err = filemap_fdatawait(mapping); |
450 | if (ret == 0) | 453 | if (ret == 0) |
451 | ret = err; | 454 | ret = err; |
@@ -578,6 +581,7 @@ static long writeback_sb_inodes(struct super_block *sb, | |||
578 | .tagged_writepages = work->tagged_writepages, | 581 | .tagged_writepages = work->tagged_writepages, |
579 | .for_kupdate = work->for_kupdate, | 582 | .for_kupdate = work->for_kupdate, |
580 | .for_background = work->for_background, | 583 | .for_background = work->for_background, |
584 | .for_sync = work->for_sync, | ||
581 | .range_cyclic = work->range_cyclic, | 585 | .range_cyclic = work->range_cyclic, |
582 | .range_start = 0, | 586 | .range_start = 0, |
583 | .range_end = LLONG_MAX, | 587 | .range_end = LLONG_MAX, |
@@ -959,7 +963,7 @@ static long wb_check_old_data_flush(struct bdi_writeback *wb) | |||
959 | /* | 963 | /* |
960 | * Retrieve work items and do the writeback they describe | 964 | * Retrieve work items and do the writeback they describe |
961 | */ | 965 | */ |
962 | long wb_do_writeback(struct bdi_writeback *wb, int force_wait) | 966 | static long wb_do_writeback(struct bdi_writeback *wb) |
963 | { | 967 | { |
964 | struct backing_dev_info *bdi = wb->bdi; | 968 | struct backing_dev_info *bdi = wb->bdi; |
965 | struct wb_writeback_work *work; | 969 | struct wb_writeback_work *work; |
@@ -967,12 +971,6 @@ long wb_do_writeback(struct bdi_writeback *wb, int force_wait) | |||
967 | 971 | ||
968 | set_bit(BDI_writeback_running, &wb->bdi->state); | 972 | set_bit(BDI_writeback_running, &wb->bdi->state); |
969 | while ((work = get_next_work_item(bdi)) != NULL) { | 973 | while ((work = get_next_work_item(bdi)) != NULL) { |
970 | /* | ||
971 | * Override sync mode, in case we must wait for completion | ||
972 | * because this thread is exiting now. | ||
973 | */ | ||
974 | if (force_wait) | ||
975 | work->sync_mode = WB_SYNC_ALL; | ||
976 | 974 | ||
977 | trace_writeback_exec(bdi, work); | 975 | trace_writeback_exec(bdi, work); |
978 | 976 | ||
@@ -1021,7 +1019,7 @@ void bdi_writeback_workfn(struct work_struct *work) | |||
1021 | * rescuer as work_list needs to be drained. | 1019 | * rescuer as work_list needs to be drained. |
1022 | */ | 1020 | */ |
1023 | do { | 1021 | do { |
1024 | pages_written = wb_do_writeback(wb, 0); | 1022 | pages_written = wb_do_writeback(wb); |
1025 | trace_writeback_pages_written(pages_written); | 1023 | trace_writeback_pages_written(pages_written); |
1026 | } while (!list_empty(&bdi->work_list)); | 1024 | } while (!list_empty(&bdi->work_list)); |
1027 | } else { | 1025 | } else { |
@@ -1362,6 +1360,7 @@ void sync_inodes_sb(struct super_block *sb) | |||
1362 | .range_cyclic = 0, | 1360 | .range_cyclic = 0, |
1363 | .done = &done, | 1361 | .done = &done, |
1364 | .reason = WB_REASON_SYNC, | 1362 | .reason = WB_REASON_SYNC, |
1363 | .for_sync = 1, | ||
1365 | }; | 1364 | }; |
1366 | 1365 | ||
1367 | /* Nothing to do? */ | 1366 | /* Nothing to do? */ |
diff --git a/fs/fscache/cache.c b/fs/fscache/cache.c index b52aed1dca97..f7cff367db7f 100644 --- a/fs/fscache/cache.c +++ b/fs/fscache/cache.c | |||
@@ -115,7 +115,7 @@ struct fscache_cache *fscache_select_cache_for_object( | |||
115 | struct fscache_object, cookie_link); | 115 | struct fscache_object, cookie_link); |
116 | 116 | ||
117 | cache = object->cache; | 117 | cache = object->cache; |
118 | if (object->state >= FSCACHE_OBJECT_DYING || | 118 | if (fscache_object_is_dying(object) || |
119 | test_bit(FSCACHE_IOERROR, &cache->flags)) | 119 | test_bit(FSCACHE_IOERROR, &cache->flags)) |
120 | cache = NULL; | 120 | cache = NULL; |
121 | 121 | ||
@@ -224,8 +224,10 @@ int fscache_add_cache(struct fscache_cache *cache, | |||
224 | BUG_ON(!ifsdef); | 224 | BUG_ON(!ifsdef); |
225 | 225 | ||
226 | cache->flags = 0; | 226 | cache->flags = 0; |
227 | ifsdef->event_mask = ULONG_MAX & ~(1 << FSCACHE_OBJECT_EV_CLEARED); | 227 | ifsdef->event_mask = |
228 | ifsdef->state = FSCACHE_OBJECT_ACTIVE; | 228 | ((1 << NR_FSCACHE_OBJECT_EVENTS) - 1) & |
229 | ~(1 << FSCACHE_OBJECT_EV_CLEARED); | ||
230 | __set_bit(FSCACHE_OBJECT_IS_AVAILABLE, &ifsdef->flags); | ||
229 | 231 | ||
230 | if (!tagname) | 232 | if (!tagname) |
231 | tagname = cache->identifier; | 233 | tagname = cache->identifier; |
@@ -330,25 +332,25 @@ static void fscache_withdraw_all_objects(struct fscache_cache *cache, | |||
330 | { | 332 | { |
331 | struct fscache_object *object; | 333 | struct fscache_object *object; |
332 | 334 | ||
333 | spin_lock(&cache->object_list_lock); | ||
334 | |||
335 | while (!list_empty(&cache->object_list)) { | 335 | while (!list_empty(&cache->object_list)) { |
336 | object = list_entry(cache->object_list.next, | 336 | spin_lock(&cache->object_list_lock); |
337 | struct fscache_object, cache_link); | ||
338 | list_move_tail(&object->cache_link, dying_objects); | ||
339 | 337 | ||
340 | _debug("withdraw %p", object->cookie); | 338 | if (!list_empty(&cache->object_list)) { |
339 | object = list_entry(cache->object_list.next, | ||
340 | struct fscache_object, cache_link); | ||
341 | list_move_tail(&object->cache_link, dying_objects); | ||
341 | 342 | ||
342 | spin_lock(&object->lock); | 343 | _debug("withdraw %p", object->cookie); |
343 | spin_unlock(&cache->object_list_lock); | 344 | |
344 | fscache_raise_event(object, FSCACHE_OBJECT_EV_WITHDRAW); | 345 | /* This must be done under object_list_lock to prevent |
345 | spin_unlock(&object->lock); | 346 | * a race with fscache_drop_object(). |
347 | */ | ||
348 | fscache_raise_event(object, FSCACHE_OBJECT_EV_KILL); | ||
349 | } | ||
346 | 350 | ||
351 | spin_unlock(&cache->object_list_lock); | ||
347 | cond_resched(); | 352 | cond_resched(); |
348 | spin_lock(&cache->object_list_lock); | ||
349 | } | 353 | } |
350 | |||
351 | spin_unlock(&cache->object_list_lock); | ||
352 | } | 354 | } |
353 | 355 | ||
354 | /** | 356 | /** |
diff --git a/fs/fscache/cookie.c b/fs/fscache/cookie.c index e2cba1f60c21..0e91a3c9fdb2 100644 --- a/fs/fscache/cookie.c +++ b/fs/fscache/cookie.c | |||
@@ -95,6 +95,11 @@ struct fscache_cookie *__fscache_acquire_cookie( | |||
95 | atomic_set(&cookie->usage, 1); | 95 | atomic_set(&cookie->usage, 1); |
96 | atomic_set(&cookie->n_children, 0); | 96 | atomic_set(&cookie->n_children, 0); |
97 | 97 | ||
98 | /* We keep the active count elevated until relinquishment to prevent an | ||
99 | * attempt to wake up every time the object operations queue quiesces. | ||
100 | */ | ||
101 | atomic_set(&cookie->n_active, 1); | ||
102 | |||
98 | atomic_inc(&parent->usage); | 103 | atomic_inc(&parent->usage); |
99 | atomic_inc(&parent->n_children); | 104 | atomic_inc(&parent->n_children); |
100 | 105 | ||
@@ -177,7 +182,6 @@ static int fscache_acquire_non_index_cookie(struct fscache_cookie *cookie) | |||
177 | 182 | ||
178 | cookie->flags = | 183 | cookie->flags = |
179 | (1 << FSCACHE_COOKIE_LOOKING_UP) | | 184 | (1 << FSCACHE_COOKIE_LOOKING_UP) | |
180 | (1 << FSCACHE_COOKIE_CREATING) | | ||
181 | (1 << FSCACHE_COOKIE_NO_DATA_YET); | 185 | (1 << FSCACHE_COOKIE_NO_DATA_YET); |
182 | 186 | ||
183 | /* ask the cache to allocate objects for this cookie and its parent | 187 | /* ask the cache to allocate objects for this cookie and its parent |
@@ -205,7 +209,7 @@ static int fscache_acquire_non_index_cookie(struct fscache_cookie *cookie) | |||
205 | 209 | ||
206 | /* initiate the process of looking up all the objects in the chain | 210 | /* initiate the process of looking up all the objects in the chain |
207 | * (done by fscache_initialise_object()) */ | 211 | * (done by fscache_initialise_object()) */ |
208 | fscache_enqueue_object(object); | 212 | fscache_raise_event(object, FSCACHE_OBJECT_EV_NEW_CHILD); |
209 | 213 | ||
210 | spin_unlock(&cookie->lock); | 214 | spin_unlock(&cookie->lock); |
211 | 215 | ||
@@ -285,7 +289,7 @@ static int fscache_alloc_object(struct fscache_cache *cache, | |||
285 | 289 | ||
286 | object_already_extant: | 290 | object_already_extant: |
287 | ret = -ENOBUFS; | 291 | ret = -ENOBUFS; |
288 | if (object->state >= FSCACHE_OBJECT_DYING) { | 292 | if (fscache_object_is_dead(object)) { |
289 | spin_unlock(&cookie->lock); | 293 | spin_unlock(&cookie->lock); |
290 | goto error; | 294 | goto error; |
291 | } | 295 | } |
@@ -321,7 +325,7 @@ static int fscache_attach_object(struct fscache_cookie *cookie, | |||
321 | ret = -EEXIST; | 325 | ret = -EEXIST; |
322 | hlist_for_each_entry(p, &cookie->backing_objects, cookie_link) { | 326 | hlist_for_each_entry(p, &cookie->backing_objects, cookie_link) { |
323 | if (p->cache == object->cache) { | 327 | if (p->cache == object->cache) { |
324 | if (p->state >= FSCACHE_OBJECT_DYING) | 328 | if (fscache_object_is_dying(p)) |
325 | ret = -ENOBUFS; | 329 | ret = -ENOBUFS; |
326 | goto cant_attach_object; | 330 | goto cant_attach_object; |
327 | } | 331 | } |
@@ -332,7 +336,7 @@ static int fscache_attach_object(struct fscache_cookie *cookie, | |||
332 | hlist_for_each_entry(p, &cookie->parent->backing_objects, | 336 | hlist_for_each_entry(p, &cookie->parent->backing_objects, |
333 | cookie_link) { | 337 | cookie_link) { |
334 | if (p->cache == object->cache) { | 338 | if (p->cache == object->cache) { |
335 | if (p->state >= FSCACHE_OBJECT_DYING) { | 339 | if (fscache_object_is_dying(p)) { |
336 | ret = -ENOBUFS; | 340 | ret = -ENOBUFS; |
337 | spin_unlock(&cookie->parent->lock); | 341 | spin_unlock(&cookie->parent->lock); |
338 | goto cant_attach_object; | 342 | goto cant_attach_object; |
@@ -400,7 +404,7 @@ void __fscache_invalidate(struct fscache_cookie *cookie) | |||
400 | object = hlist_entry(cookie->backing_objects.first, | 404 | object = hlist_entry(cookie->backing_objects.first, |
401 | struct fscache_object, | 405 | struct fscache_object, |
402 | cookie_link); | 406 | cookie_link); |
403 | if (object->state < FSCACHE_OBJECT_DYING) | 407 | if (fscache_object_is_live(object)) |
404 | fscache_raise_event( | 408 | fscache_raise_event( |
405 | object, FSCACHE_OBJECT_EV_INVALIDATE); | 409 | object, FSCACHE_OBJECT_EV_INVALIDATE); |
406 | } | 410 | } |
@@ -467,9 +471,7 @@ EXPORT_SYMBOL(__fscache_update_cookie); | |||
467 | */ | 471 | */ |
468 | void __fscache_relinquish_cookie(struct fscache_cookie *cookie, int retire) | 472 | void __fscache_relinquish_cookie(struct fscache_cookie *cookie, int retire) |
469 | { | 473 | { |
470 | struct fscache_cache *cache; | ||
471 | struct fscache_object *object; | 474 | struct fscache_object *object; |
472 | unsigned long event; | ||
473 | 475 | ||
474 | fscache_stat(&fscache_n_relinquishes); | 476 | fscache_stat(&fscache_n_relinquishes); |
475 | if (retire) | 477 | if (retire) |
@@ -481,8 +483,11 @@ void __fscache_relinquish_cookie(struct fscache_cookie *cookie, int retire) | |||
481 | return; | 483 | return; |
482 | } | 484 | } |
483 | 485 | ||
484 | _enter("%p{%s,%p},%d", | 486 | _enter("%p{%s,%p,%d},%d", |
485 | cookie, cookie->def->name, cookie->netfs_data, retire); | 487 | cookie, cookie->def->name, cookie->netfs_data, |
488 | atomic_read(&cookie->n_active), retire); | ||
489 | |||
490 | ASSERTCMP(atomic_read(&cookie->n_active), >, 0); | ||
486 | 491 | ||
487 | if (atomic_read(&cookie->n_children) != 0) { | 492 | if (atomic_read(&cookie->n_children) != 0) { |
488 | printk(KERN_ERR "FS-Cache: Cookie '%s' still has children\n", | 493 | printk(KERN_ERR "FS-Cache: Cookie '%s' still has children\n", |
@@ -490,62 +495,28 @@ void __fscache_relinquish_cookie(struct fscache_cookie *cookie, int retire) | |||
490 | BUG(); | 495 | BUG(); |
491 | } | 496 | } |
492 | 497 | ||
493 | /* wait for the cookie to finish being instantiated (or to fail) */ | 498 | /* No further netfs-accessing operations on this cookie permitted */ |
494 | if (test_bit(FSCACHE_COOKIE_CREATING, &cookie->flags)) { | 499 | set_bit(FSCACHE_COOKIE_RELINQUISHED, &cookie->flags); |
495 | fscache_stat(&fscache_n_relinquishes_waitcrt); | 500 | if (retire) |
496 | wait_on_bit(&cookie->flags, FSCACHE_COOKIE_CREATING, | 501 | set_bit(FSCACHE_COOKIE_RETIRED, &cookie->flags); |
497 | fscache_wait_bit, TASK_UNINTERRUPTIBLE); | ||
498 | } | ||
499 | |||
500 | event = retire ? FSCACHE_OBJECT_EV_RETIRE : FSCACHE_OBJECT_EV_RELEASE; | ||
501 | 502 | ||
502 | try_again: | ||
503 | spin_lock(&cookie->lock); | 503 | spin_lock(&cookie->lock); |
504 | 504 | hlist_for_each_entry(object, &cookie->backing_objects, cookie_link) { | |
505 | /* break links with all the active objects */ | 505 | fscache_raise_event(object, FSCACHE_OBJECT_EV_KILL); |
506 | while (!hlist_empty(&cookie->backing_objects)) { | ||
507 | int n_reads; | ||
508 | object = hlist_entry(cookie->backing_objects.first, | ||
509 | struct fscache_object, | ||
510 | cookie_link); | ||
511 | |||
512 | _debug("RELEASE OBJ%x", object->debug_id); | ||
513 | |||
514 | set_bit(FSCACHE_COOKIE_WAITING_ON_READS, &cookie->flags); | ||
515 | n_reads = atomic_read(&object->n_reads); | ||
516 | if (n_reads) { | ||
517 | int n_ops = object->n_ops; | ||
518 | int n_in_progress = object->n_in_progress; | ||
519 | spin_unlock(&cookie->lock); | ||
520 | printk(KERN_ERR "FS-Cache:" | ||
521 | " Cookie '%s' still has %d outstanding reads (%d,%d)\n", | ||
522 | cookie->def->name, | ||
523 | n_reads, n_ops, n_in_progress); | ||
524 | wait_on_bit(&cookie->flags, FSCACHE_COOKIE_WAITING_ON_READS, | ||
525 | fscache_wait_bit, TASK_UNINTERRUPTIBLE); | ||
526 | printk("Wait finished\n"); | ||
527 | goto try_again; | ||
528 | } | ||
529 | |||
530 | /* detach each cache object from the object cookie */ | ||
531 | spin_lock(&object->lock); | ||
532 | hlist_del_init(&object->cookie_link); | ||
533 | |||
534 | cache = object->cache; | ||
535 | object->cookie = NULL; | ||
536 | fscache_raise_event(object, event); | ||
537 | spin_unlock(&object->lock); | ||
538 | |||
539 | if (atomic_dec_and_test(&cookie->usage)) | ||
540 | /* the cookie refcount shouldn't be reduced to 0 yet */ | ||
541 | BUG(); | ||
542 | } | 506 | } |
507 | spin_unlock(&cookie->lock); | ||
543 | 508 | ||
544 | /* detach pointers back to the netfs */ | 509 | /* Wait for cessation of activity requiring access to the netfs (when |
510 | * n_active reaches 0). | ||
511 | */ | ||
512 | if (!atomic_dec_and_test(&cookie->n_active)) | ||
513 | wait_on_atomic_t(&cookie->n_active, fscache_wait_atomic_t, | ||
514 | TASK_UNINTERRUPTIBLE); | ||
515 | |||
516 | /* Clear pointers back to the netfs */ | ||
545 | cookie->netfs_data = NULL; | 517 | cookie->netfs_data = NULL; |
546 | cookie->def = NULL; | 518 | cookie->def = NULL; |
547 | 519 | BUG_ON(cookie->stores.rnode); | |
548 | spin_unlock(&cookie->lock); | ||
549 | 520 | ||
550 | if (cookie->parent) { | 521 | if (cookie->parent) { |
551 | ASSERTCMP(atomic_read(&cookie->parent->usage), >, 0); | 522 | ASSERTCMP(atomic_read(&cookie->parent->usage), >, 0); |
@@ -553,7 +524,7 @@ try_again: | |||
553 | atomic_dec(&cookie->parent->n_children); | 524 | atomic_dec(&cookie->parent->n_children); |
554 | } | 525 | } |
555 | 526 | ||
556 | /* finally dispose of the cookie */ | 527 | /* Dispose of the netfs's link to the cookie */ |
557 | ASSERTCMP(atomic_read(&cookie->usage), >, 0); | 528 | ASSERTCMP(atomic_read(&cookie->usage), >, 0); |
558 | fscache_cookie_put(cookie); | 529 | fscache_cookie_put(cookie); |
559 | 530 | ||
diff --git a/fs/fscache/fsdef.c b/fs/fscache/fsdef.c index f5b4baee7352..10a2ade0bdf8 100644 --- a/fs/fscache/fsdef.c +++ b/fs/fscache/fsdef.c | |||
@@ -55,6 +55,7 @@ static struct fscache_cookie_def fscache_fsdef_index_def = { | |||
55 | 55 | ||
56 | struct fscache_cookie fscache_fsdef_index = { | 56 | struct fscache_cookie fscache_fsdef_index = { |
57 | .usage = ATOMIC_INIT(1), | 57 | .usage = ATOMIC_INIT(1), |
58 | .n_active = ATOMIC_INIT(1), | ||
58 | .lock = __SPIN_LOCK_UNLOCKED(fscache_fsdef_index.lock), | 59 | .lock = __SPIN_LOCK_UNLOCKED(fscache_fsdef_index.lock), |
59 | .backing_objects = HLIST_HEAD_INIT, | 60 | .backing_objects = HLIST_HEAD_INIT, |
60 | .def = &fscache_fsdef_index_def, | 61 | .def = &fscache_fsdef_index_def, |
diff --git a/fs/fscache/internal.h b/fs/fscache/internal.h index ee38fef4be51..12d505bedb5c 100644 --- a/fs/fscache/internal.h +++ b/fs/fscache/internal.h | |||
@@ -93,14 +93,11 @@ static inline bool fscache_object_congested(void) | |||
93 | 93 | ||
94 | extern int fscache_wait_bit(void *); | 94 | extern int fscache_wait_bit(void *); |
95 | extern int fscache_wait_bit_interruptible(void *); | 95 | extern int fscache_wait_bit_interruptible(void *); |
96 | extern int fscache_wait_atomic_t(atomic_t *); | ||
96 | 97 | ||
97 | /* | 98 | /* |
98 | * object.c | 99 | * object.c |
99 | */ | 100 | */ |
100 | extern const char fscache_object_states_short[FSCACHE_OBJECT__NSTATES][5]; | ||
101 | |||
102 | extern void fscache_withdrawing_object(struct fscache_cache *, | ||
103 | struct fscache_object *); | ||
104 | extern void fscache_enqueue_object(struct fscache_object *); | 101 | extern void fscache_enqueue_object(struct fscache_object *); |
105 | 102 | ||
106 | /* | 103 | /* |
@@ -110,8 +107,10 @@ extern void fscache_enqueue_object(struct fscache_object *); | |||
110 | extern const struct file_operations fscache_objlist_fops; | 107 | extern const struct file_operations fscache_objlist_fops; |
111 | 108 | ||
112 | extern void fscache_objlist_add(struct fscache_object *); | 109 | extern void fscache_objlist_add(struct fscache_object *); |
110 | extern void fscache_objlist_remove(struct fscache_object *); | ||
113 | #else | 111 | #else |
114 | #define fscache_objlist_add(object) do {} while(0) | 112 | #define fscache_objlist_add(object) do {} while(0) |
113 | #define fscache_objlist_remove(object) do {} while(0) | ||
115 | #endif | 114 | #endif |
116 | 115 | ||
117 | /* | 116 | /* |
@@ -291,6 +290,10 @@ static inline void fscache_raise_event(struct fscache_object *object, | |||
291 | unsigned event) | 290 | unsigned event) |
292 | { | 291 | { |
293 | BUG_ON(event >= NR_FSCACHE_OBJECT_EVENTS); | 292 | BUG_ON(event >= NR_FSCACHE_OBJECT_EVENTS); |
293 | #if 0 | ||
294 | printk("*** fscache_raise_event(OBJ%d{%lx},%x)\n", | ||
295 | object->debug_id, object->event_mask, (1 << event)); | ||
296 | #endif | ||
294 | if (!test_and_set_bit(event, &object->events) && | 297 | if (!test_and_set_bit(event, &object->events) && |
295 | test_bit(event, &object->event_mask)) | 298 | test_bit(event, &object->event_mask)) |
296 | fscache_enqueue_object(object); | 299 | fscache_enqueue_object(object); |
diff --git a/fs/fscache/main.c b/fs/fscache/main.c index f9d856773f79..7c27907e650c 100644 --- a/fs/fscache/main.c +++ b/fs/fscache/main.c | |||
@@ -205,7 +205,6 @@ int fscache_wait_bit(void *flags) | |||
205 | schedule(); | 205 | schedule(); |
206 | return 0; | 206 | return 0; |
207 | } | 207 | } |
208 | EXPORT_SYMBOL(fscache_wait_bit); | ||
209 | 208 | ||
210 | /* | 209 | /* |
211 | * wait_on_bit() sleep function for interruptible waiting | 210 | * wait_on_bit() sleep function for interruptible waiting |
@@ -215,4 +214,12 @@ int fscache_wait_bit_interruptible(void *flags) | |||
215 | schedule(); | 214 | schedule(); |
216 | return signal_pending(current); | 215 | return signal_pending(current); |
217 | } | 216 | } |
218 | EXPORT_SYMBOL(fscache_wait_bit_interruptible); | 217 | |
218 | /* | ||
219 | * wait_on_atomic_t() sleep function for uninterruptible waiting | ||
220 | */ | ||
221 | int fscache_wait_atomic_t(atomic_t *p) | ||
222 | { | ||
223 | schedule(); | ||
224 | return 0; | ||
225 | } | ||
diff --git a/fs/fscache/netfs.c b/fs/fscache/netfs.c index e028b8eb1c40..b1bb6117473a 100644 --- a/fs/fscache/netfs.c +++ b/fs/fscache/netfs.c | |||
@@ -40,6 +40,7 @@ int __fscache_register_netfs(struct fscache_netfs *netfs) | |||
40 | /* initialise the primary index cookie */ | 40 | /* initialise the primary index cookie */ |
41 | atomic_set(&netfs->primary_index->usage, 1); | 41 | atomic_set(&netfs->primary_index->usage, 1); |
42 | atomic_set(&netfs->primary_index->n_children, 0); | 42 | atomic_set(&netfs->primary_index->n_children, 0); |
43 | atomic_set(&netfs->primary_index->n_active, 1); | ||
43 | 44 | ||
44 | netfs->primary_index->def = &fscache_fsdef_netfs_def; | 45 | netfs->primary_index->def = &fscache_fsdef_netfs_def; |
45 | netfs->primary_index->parent = &fscache_fsdef_index; | 46 | netfs->primary_index->parent = &fscache_fsdef_index; |
diff --git a/fs/fscache/object-list.c b/fs/fscache/object-list.c index f27c89d17885..e1959efad64f 100644 --- a/fs/fscache/object-list.c +++ b/fs/fscache/object-list.c | |||
@@ -70,13 +70,10 @@ void fscache_objlist_add(struct fscache_object *obj) | |||
70 | write_unlock(&fscache_object_list_lock); | 70 | write_unlock(&fscache_object_list_lock); |
71 | } | 71 | } |
72 | 72 | ||
73 | /** | 73 | /* |
74 | * fscache_object_destroy - Note that a cache object is about to be destroyed | 74 | * Remove an object from the object list. |
75 | * @object: The object to be destroyed | ||
76 | * | ||
77 | * Note the imminent destruction and deallocation of a cache object record. | ||
78 | */ | 75 | */ |
79 | void fscache_object_destroy(struct fscache_object *obj) | 76 | void fscache_objlist_remove(struct fscache_object *obj) |
80 | { | 77 | { |
81 | write_lock(&fscache_object_list_lock); | 78 | write_lock(&fscache_object_list_lock); |
82 | 79 | ||
@@ -85,7 +82,6 @@ void fscache_object_destroy(struct fscache_object *obj) | |||
85 | 82 | ||
86 | write_unlock(&fscache_object_list_lock); | 83 | write_unlock(&fscache_object_list_lock); |
87 | } | 84 | } |
88 | EXPORT_SYMBOL(fscache_object_destroy); | ||
89 | 85 | ||
90 | /* | 86 | /* |
91 | * find the object in the tree on or after the specified index | 87 | * find the object in the tree on or after the specified index |
@@ -166,15 +162,14 @@ static int fscache_objlist_show(struct seq_file *m, void *v) | |||
166 | { | 162 | { |
167 | struct fscache_objlist_data *data = m->private; | 163 | struct fscache_objlist_data *data = m->private; |
168 | struct fscache_object *obj = v; | 164 | struct fscache_object *obj = v; |
165 | struct fscache_cookie *cookie; | ||
169 | unsigned long config = data->config; | 166 | unsigned long config = data->config; |
170 | uint16_t keylen, auxlen; | ||
171 | char _type[3], *type; | 167 | char _type[3], *type; |
172 | bool no_cookie; | ||
173 | u8 *buf = data->buf, *p; | 168 | u8 *buf = data->buf, *p; |
174 | 169 | ||
175 | if ((unsigned long) v == 1) { | 170 | if ((unsigned long) v == 1) { |
176 | seq_puts(m, "OBJECT PARENT STAT CHLDN OPS OOP IPR EX READS" | 171 | seq_puts(m, "OBJECT PARENT STAT CHLDN OPS OOP IPR EX READS" |
177 | " EM EV F S" | 172 | " EM EV FL S" |
178 | " | NETFS_COOKIE_DEF TY FL NETFS_DATA"); | 173 | " | NETFS_COOKIE_DEF TY FL NETFS_DATA"); |
179 | if (config & (FSCACHE_OBJLIST_CONFIG_KEY | | 174 | if (config & (FSCACHE_OBJLIST_CONFIG_KEY | |
180 | FSCACHE_OBJLIST_CONFIG_AUX)) | 175 | FSCACHE_OBJLIST_CONFIG_AUX)) |
@@ -193,7 +188,7 @@ static int fscache_objlist_show(struct seq_file *m, void *v) | |||
193 | 188 | ||
194 | if ((unsigned long) v == 2) { | 189 | if ((unsigned long) v == 2) { |
195 | seq_puts(m, "======== ======== ==== ===== === === === == =====" | 190 | seq_puts(m, "======== ======== ==== ===== === === === == =====" |
196 | " == == = =" | 191 | " == == == =" |
197 | " | ================ == == ================"); | 192 | " | ================ == == ================"); |
198 | if (config & (FSCACHE_OBJLIST_CONFIG_KEY | | 193 | if (config & (FSCACHE_OBJLIST_CONFIG_KEY | |
199 | FSCACHE_OBJLIST_CONFIG_AUX)) | 194 | FSCACHE_OBJLIST_CONFIG_AUX)) |
@@ -216,10 +211,11 @@ static int fscache_objlist_show(struct seq_file *m, void *v) | |||
216 | } \ | 211 | } \ |
217 | } while(0) | 212 | } while(0) |
218 | 213 | ||
214 | cookie = obj->cookie; | ||
219 | if (~config) { | 215 | if (~config) { |
220 | FILTER(obj->cookie, | 216 | FILTER(cookie->def, |
221 | COOKIE, NOCOOKIE); | 217 | COOKIE, NOCOOKIE); |
222 | FILTER(obj->state != FSCACHE_OBJECT_ACTIVE || | 218 | FILTER(fscache_object_is_active(obj) || |
223 | obj->n_ops != 0 || | 219 | obj->n_ops != 0 || |
224 | obj->n_obj_ops != 0 || | 220 | obj->n_obj_ops != 0 || |
225 | obj->flags || | 221 | obj->flags || |
@@ -235,10 +231,10 @@ static int fscache_objlist_show(struct seq_file *m, void *v) | |||
235 | } | 231 | } |
236 | 232 | ||
237 | seq_printf(m, | 233 | seq_printf(m, |
238 | "%8x %8x %s %5u %3u %3u %3u %2u %5u %2lx %2lx %1lx %1x | ", | 234 | "%8x %8x %s %5u %3u %3u %3u %2u %5u %2lx %2lx %2lx %1x | ", |
239 | obj->debug_id, | 235 | obj->debug_id, |
240 | obj->parent ? obj->parent->debug_id : -1, | 236 | obj->parent ? obj->parent->debug_id : -1, |
241 | fscache_object_states_short[obj->state], | 237 | obj->state->short_name, |
242 | obj->n_children, | 238 | obj->n_children, |
243 | obj->n_ops, | 239 | obj->n_ops, |
244 | obj->n_obj_ops, | 240 | obj->n_obj_ops, |
@@ -250,48 +246,40 @@ static int fscache_objlist_show(struct seq_file *m, void *v) | |||
250 | obj->flags, | 246 | obj->flags, |
251 | work_busy(&obj->work)); | 247 | work_busy(&obj->work)); |
252 | 248 | ||
253 | no_cookie = true; | 249 | if (fscache_use_cookie(obj)) { |
254 | keylen = auxlen = 0; | 250 | uint16_t keylen = 0, auxlen = 0; |
255 | if (obj->cookie) { | ||
256 | spin_lock(&obj->lock); | ||
257 | if (obj->cookie) { | ||
258 | switch (obj->cookie->def->type) { | ||
259 | case 0: | ||
260 | type = "IX"; | ||
261 | break; | ||
262 | case 1: | ||
263 | type = "DT"; | ||
264 | break; | ||
265 | default: | ||
266 | sprintf(_type, "%02u", | ||
267 | obj->cookie->def->type); | ||
268 | type = _type; | ||
269 | break; | ||
270 | } | ||
271 | 251 | ||
272 | seq_printf(m, "%-16s %s %2lx %16p", | 252 | switch (cookie->def->type) { |
273 | obj->cookie->def->name, | 253 | case 0: |
274 | type, | 254 | type = "IX"; |
275 | obj->cookie->flags, | 255 | break; |
276 | obj->cookie->netfs_data); | 256 | case 1: |
277 | 257 | type = "DT"; | |
278 | if (obj->cookie->def->get_key && | 258 | break; |
279 | config & FSCACHE_OBJLIST_CONFIG_KEY) | 259 | default: |
280 | keylen = obj->cookie->def->get_key( | 260 | sprintf(_type, "%02u", cookie->def->type); |
281 | obj->cookie->netfs_data, | 261 | type = _type; |
282 | buf, 400); | 262 | break; |
283 | |||
284 | if (obj->cookie->def->get_aux && | ||
285 | config & FSCACHE_OBJLIST_CONFIG_AUX) | ||
286 | auxlen = obj->cookie->def->get_aux( | ||
287 | obj->cookie->netfs_data, | ||
288 | buf + keylen, 512 - keylen); | ||
289 | |||
290 | no_cookie = false; | ||
291 | } | 263 | } |
292 | spin_unlock(&obj->lock); | ||
293 | 264 | ||
294 | if (!no_cookie && (keylen > 0 || auxlen > 0)) { | 265 | seq_printf(m, "%-16s %s %2lx %16p", |
266 | cookie->def->name, | ||
267 | type, | ||
268 | cookie->flags, | ||
269 | cookie->netfs_data); | ||
270 | |||
271 | if (cookie->def->get_key && | ||
272 | config & FSCACHE_OBJLIST_CONFIG_KEY) | ||
273 | keylen = cookie->def->get_key(cookie->netfs_data, | ||
274 | buf, 400); | ||
275 | |||
276 | if (cookie->def->get_aux && | ||
277 | config & FSCACHE_OBJLIST_CONFIG_AUX) | ||
278 | auxlen = cookie->def->get_aux(cookie->netfs_data, | ||
279 | buf + keylen, 512 - keylen); | ||
280 | fscache_unuse_cookie(obj); | ||
281 | |||
282 | if (keylen > 0 || auxlen > 0) { | ||
295 | seq_printf(m, " "); | 283 | seq_printf(m, " "); |
296 | for (p = buf; keylen > 0; keylen--) | 284 | for (p = buf; keylen > 0; keylen--) |
297 | seq_printf(m, "%02x", *p++); | 285 | seq_printf(m, "%02x", *p++); |
@@ -302,12 +290,11 @@ static int fscache_objlist_show(struct seq_file *m, void *v) | |||
302 | seq_printf(m, "%02x", *p++); | 290 | seq_printf(m, "%02x", *p++); |
303 | } | 291 | } |
304 | } | 292 | } |
305 | } | ||
306 | 293 | ||
307 | if (no_cookie) | ||
308 | seq_printf(m, "<no_cookie>\n"); | ||
309 | else | ||
310 | seq_printf(m, "\n"); | 294 | seq_printf(m, "\n"); |
295 | } else { | ||
296 | seq_printf(m, "<no_netfs>\n"); | ||
297 | } | ||
311 | return 0; | 298 | return 0; |
312 | } | 299 | } |
313 | 300 | ||
diff --git a/fs/fscache/object.c b/fs/fscache/object.c index 50d41c180211..86d75a60b20c 100644 --- a/fs/fscache/object.c +++ b/fs/fscache/object.c | |||
@@ -15,52 +15,131 @@ | |||
15 | #define FSCACHE_DEBUG_LEVEL COOKIE | 15 | #define FSCACHE_DEBUG_LEVEL COOKIE |
16 | #include <linux/module.h> | 16 | #include <linux/module.h> |
17 | #include <linux/slab.h> | 17 | #include <linux/slab.h> |
18 | #include <linux/prefetch.h> | ||
18 | #include "internal.h" | 19 | #include "internal.h" |
19 | 20 | ||
20 | const char *fscache_object_states[FSCACHE_OBJECT__NSTATES] = { | 21 | static const struct fscache_state *fscache_abort_initialisation(struct fscache_object *, int); |
21 | [FSCACHE_OBJECT_INIT] = "OBJECT_INIT", | 22 | static const struct fscache_state *fscache_kill_dependents(struct fscache_object *, int); |
22 | [FSCACHE_OBJECT_LOOKING_UP] = "OBJECT_LOOKING_UP", | 23 | static const struct fscache_state *fscache_drop_object(struct fscache_object *, int); |
23 | [FSCACHE_OBJECT_CREATING] = "OBJECT_CREATING", | 24 | static const struct fscache_state *fscache_initialise_object(struct fscache_object *, int); |
24 | [FSCACHE_OBJECT_AVAILABLE] = "OBJECT_AVAILABLE", | 25 | static const struct fscache_state *fscache_invalidate_object(struct fscache_object *, int); |
25 | [FSCACHE_OBJECT_ACTIVE] = "OBJECT_ACTIVE", | 26 | static const struct fscache_state *fscache_jumpstart_dependents(struct fscache_object *, int); |
26 | [FSCACHE_OBJECT_INVALIDATING] = "OBJECT_INVALIDATING", | 27 | static const struct fscache_state *fscache_kill_object(struct fscache_object *, int); |
27 | [FSCACHE_OBJECT_UPDATING] = "OBJECT_UPDATING", | 28 | static const struct fscache_state *fscache_lookup_failure(struct fscache_object *, int); |
28 | [FSCACHE_OBJECT_DYING] = "OBJECT_DYING", | 29 | static const struct fscache_state *fscache_look_up_object(struct fscache_object *, int); |
29 | [FSCACHE_OBJECT_LC_DYING] = "OBJECT_LC_DYING", | 30 | static const struct fscache_state *fscache_object_available(struct fscache_object *, int); |
30 | [FSCACHE_OBJECT_ABORT_INIT] = "OBJECT_ABORT_INIT", | 31 | static const struct fscache_state *fscache_parent_ready(struct fscache_object *, int); |
31 | [FSCACHE_OBJECT_RELEASING] = "OBJECT_RELEASING", | 32 | static const struct fscache_state *fscache_update_object(struct fscache_object *, int); |
32 | [FSCACHE_OBJECT_RECYCLING] = "OBJECT_RECYCLING", | 33 | |
33 | [FSCACHE_OBJECT_WITHDRAWING] = "OBJECT_WITHDRAWING", | 34 | #define __STATE_NAME(n) fscache_osm_##n |
34 | [FSCACHE_OBJECT_DEAD] = "OBJECT_DEAD", | 35 | #define STATE(n) (&__STATE_NAME(n)) |
36 | |||
37 | /* | ||
38 | * Define a work state. Work states are execution states. No event processing | ||
39 | * is performed by them. The function attached to a work state returns a | ||
40 | * pointer indicating the next state to which the state machine should | ||
41 | * transition. Returning NO_TRANSIT repeats the current state, but goes back | ||
42 | * to the scheduler first. | ||
43 | */ | ||
44 | #define WORK_STATE(n, sn, f) \ | ||
45 | const struct fscache_state __STATE_NAME(n) = { \ | ||
46 | .name = #n, \ | ||
47 | .short_name = sn, \ | ||
48 | .work = f \ | ||
49 | } | ||
50 | |||
51 | /* | ||
52 | * Returns from work states. | ||
53 | */ | ||
54 | #define transit_to(state) ({ prefetch(&STATE(state)->work); STATE(state); }) | ||
55 | |||
56 | #define NO_TRANSIT ((struct fscache_state *)NULL) | ||
57 | |||
58 | /* | ||
59 | * Define a wait state. Wait states are event processing states. No execution | ||
60 | * is performed by them. Wait states are just tables of "if event X occurs, | ||
61 | * clear it and transition to state Y". The dispatcher returns to the | ||
62 | * scheduler if none of the events in which the wait state has an interest are | ||
63 | * currently pending. | ||
64 | */ | ||
65 | #define WAIT_STATE(n, sn, ...) \ | ||
66 | const struct fscache_state __STATE_NAME(n) = { \ | ||
67 | .name = #n, \ | ||
68 | .short_name = sn, \ | ||
69 | .work = NULL, \ | ||
70 | .transitions = { __VA_ARGS__, { 0, NULL } } \ | ||
71 | } | ||
72 | |||
73 | #define TRANSIT_TO(state, emask) \ | ||
74 | { .events = (emask), .transit_to = STATE(state) } | ||
75 | |||
76 | /* | ||
77 | * The object state machine. | ||
78 | */ | ||
79 | static WORK_STATE(INIT_OBJECT, "INIT", fscache_initialise_object); | ||
80 | static WORK_STATE(PARENT_READY, "PRDY", fscache_parent_ready); | ||
81 | static WORK_STATE(ABORT_INIT, "ABRT", fscache_abort_initialisation); | ||
82 | static WORK_STATE(LOOK_UP_OBJECT, "LOOK", fscache_look_up_object); | ||
83 | static WORK_STATE(CREATE_OBJECT, "CRTO", fscache_look_up_object); | ||
84 | static WORK_STATE(OBJECT_AVAILABLE, "AVBL", fscache_object_available); | ||
85 | static WORK_STATE(JUMPSTART_DEPS, "JUMP", fscache_jumpstart_dependents); | ||
86 | |||
87 | static WORK_STATE(INVALIDATE_OBJECT, "INVL", fscache_invalidate_object); | ||
88 | static WORK_STATE(UPDATE_OBJECT, "UPDT", fscache_update_object); | ||
89 | |||
90 | static WORK_STATE(LOOKUP_FAILURE, "LCFL", fscache_lookup_failure); | ||
91 | static WORK_STATE(KILL_OBJECT, "KILL", fscache_kill_object); | ||
92 | static WORK_STATE(KILL_DEPENDENTS, "KDEP", fscache_kill_dependents); | ||
93 | static WORK_STATE(DROP_OBJECT, "DROP", fscache_drop_object); | ||
94 | static WORK_STATE(OBJECT_DEAD, "DEAD", (void*)2UL); | ||
95 | |||
96 | static WAIT_STATE(WAIT_FOR_INIT, "?INI", | ||
97 | TRANSIT_TO(INIT_OBJECT, 1 << FSCACHE_OBJECT_EV_NEW_CHILD)); | ||
98 | |||
99 | static WAIT_STATE(WAIT_FOR_PARENT, "?PRN", | ||
100 | TRANSIT_TO(PARENT_READY, 1 << FSCACHE_OBJECT_EV_PARENT_READY)); | ||
101 | |||
102 | static WAIT_STATE(WAIT_FOR_CMD, "?CMD", | ||
103 | TRANSIT_TO(INVALIDATE_OBJECT, 1 << FSCACHE_OBJECT_EV_INVALIDATE), | ||
104 | TRANSIT_TO(UPDATE_OBJECT, 1 << FSCACHE_OBJECT_EV_UPDATE), | ||
105 | TRANSIT_TO(JUMPSTART_DEPS, 1 << FSCACHE_OBJECT_EV_NEW_CHILD)); | ||
106 | |||
107 | static WAIT_STATE(WAIT_FOR_CLEARANCE, "?CLR", | ||
108 | TRANSIT_TO(KILL_OBJECT, 1 << FSCACHE_OBJECT_EV_CLEARED)); | ||
109 | |||
110 | /* | ||
111 | * Out-of-band event transition tables. These are for handling unexpected | ||
112 | * events, such as an I/O error. If an OOB event occurs, the state machine | ||
113 | * clears and disables the event and forces a transition to the nominated work | ||
114 | * state (acurrently executing work states will complete first). | ||
115 | * | ||
116 | * In such a situation, object->state remembers the state the machine should | ||
117 | * have been in/gone to and returning NO_TRANSIT returns to that. | ||
118 | */ | ||
119 | static const struct fscache_transition fscache_osm_init_oob[] = { | ||
120 | TRANSIT_TO(ABORT_INIT, | ||
121 | (1 << FSCACHE_OBJECT_EV_ERROR) | | ||
122 | (1 << FSCACHE_OBJECT_EV_KILL)), | ||
123 | { 0, NULL } | ||
124 | }; | ||
125 | |||
126 | static const struct fscache_transition fscache_osm_lookup_oob[] = { | ||
127 | TRANSIT_TO(LOOKUP_FAILURE, | ||
128 | (1 << FSCACHE_OBJECT_EV_ERROR) | | ||
129 | (1 << FSCACHE_OBJECT_EV_KILL)), | ||
130 | { 0, NULL } | ||
35 | }; | 131 | }; |
36 | EXPORT_SYMBOL(fscache_object_states); | 132 | |
37 | 133 | static const struct fscache_transition fscache_osm_run_oob[] = { | |
38 | const char fscache_object_states_short[FSCACHE_OBJECT__NSTATES][5] = { | 134 | TRANSIT_TO(KILL_OBJECT, |
39 | [FSCACHE_OBJECT_INIT] = "INIT", | 135 | (1 << FSCACHE_OBJECT_EV_ERROR) | |
40 | [FSCACHE_OBJECT_LOOKING_UP] = "LOOK", | 136 | (1 << FSCACHE_OBJECT_EV_KILL)), |
41 | [FSCACHE_OBJECT_CREATING] = "CRTN", | 137 | { 0, NULL } |
42 | [FSCACHE_OBJECT_AVAILABLE] = "AVBL", | ||
43 | [FSCACHE_OBJECT_ACTIVE] = "ACTV", | ||
44 | [FSCACHE_OBJECT_INVALIDATING] = "INVL", | ||
45 | [FSCACHE_OBJECT_UPDATING] = "UPDT", | ||
46 | [FSCACHE_OBJECT_DYING] = "DYNG", | ||
47 | [FSCACHE_OBJECT_LC_DYING] = "LCDY", | ||
48 | [FSCACHE_OBJECT_ABORT_INIT] = "ABTI", | ||
49 | [FSCACHE_OBJECT_RELEASING] = "RELS", | ||
50 | [FSCACHE_OBJECT_RECYCLING] = "RCYC", | ||
51 | [FSCACHE_OBJECT_WITHDRAWING] = "WTHD", | ||
52 | [FSCACHE_OBJECT_DEAD] = "DEAD", | ||
53 | }; | 138 | }; |
54 | 139 | ||
55 | static int fscache_get_object(struct fscache_object *); | 140 | static int fscache_get_object(struct fscache_object *); |
56 | static void fscache_put_object(struct fscache_object *); | 141 | static void fscache_put_object(struct fscache_object *); |
57 | static void fscache_initialise_object(struct fscache_object *); | 142 | static bool fscache_enqueue_dependents(struct fscache_object *, int); |
58 | static void fscache_lookup_object(struct fscache_object *); | ||
59 | static void fscache_object_available(struct fscache_object *); | ||
60 | static void fscache_invalidate_object(struct fscache_object *); | ||
61 | static void fscache_release_object(struct fscache_object *); | ||
62 | static void fscache_withdraw_object(struct fscache_object *); | ||
63 | static void fscache_enqueue_dependents(struct fscache_object *); | ||
64 | static void fscache_dequeue_object(struct fscache_object *); | 143 | static void fscache_dequeue_object(struct fscache_object *); |
65 | 144 | ||
66 | /* | 145 | /* |
@@ -75,295 +154,116 @@ static inline void fscache_done_parent_op(struct fscache_object *object) | |||
75 | object->debug_id, parent->debug_id, parent->n_ops); | 154 | object->debug_id, parent->debug_id, parent->n_ops); |
76 | 155 | ||
77 | spin_lock_nested(&parent->lock, 1); | 156 | spin_lock_nested(&parent->lock, 1); |
78 | parent->n_ops--; | ||
79 | parent->n_obj_ops--; | 157 | parent->n_obj_ops--; |
158 | parent->n_ops--; | ||
80 | if (parent->n_ops == 0) | 159 | if (parent->n_ops == 0) |
81 | fscache_raise_event(parent, FSCACHE_OBJECT_EV_CLEARED); | 160 | fscache_raise_event(parent, FSCACHE_OBJECT_EV_CLEARED); |
82 | spin_unlock(&parent->lock); | 161 | spin_unlock(&parent->lock); |
83 | } | 162 | } |
84 | 163 | ||
85 | /* | 164 | /* |
86 | * Notify netfs of invalidation completion. | 165 | * Object state machine dispatcher. |
87 | */ | 166 | */ |
88 | static inline void fscache_invalidation_complete(struct fscache_cookie *cookie) | 167 | static void fscache_object_sm_dispatcher(struct fscache_object *object) |
89 | { | 168 | { |
90 | if (test_and_clear_bit(FSCACHE_COOKIE_INVALIDATING, &cookie->flags)) | 169 | const struct fscache_transition *t; |
91 | wake_up_bit(&cookie->flags, FSCACHE_COOKIE_INVALIDATING); | 170 | const struct fscache_state *state, *new_state; |
92 | } | 171 | unsigned long events, event_mask; |
93 | 172 | int event = -1; | |
94 | /* | ||
95 | * process events that have been sent to an object's state machine | ||
96 | * - initiates parent lookup | ||
97 | * - does object lookup | ||
98 | * - does object creation | ||
99 | * - does object recycling and retirement | ||
100 | * - does object withdrawal | ||
101 | */ | ||
102 | static void fscache_object_state_machine(struct fscache_object *object) | ||
103 | { | ||
104 | enum fscache_object_state new_state; | ||
105 | struct fscache_cookie *cookie; | ||
106 | int event; | ||
107 | 173 | ||
108 | ASSERT(object != NULL); | 174 | ASSERT(object != NULL); |
109 | 175 | ||
110 | _enter("{OBJ%x,%s,%lx}", | 176 | _enter("{OBJ%x,%s,%lx}", |
111 | object->debug_id, fscache_object_states[object->state], | 177 | object->debug_id, object->state->name, object->events); |
112 | object->events); | 178 | |
113 | 179 | event_mask = object->event_mask; | |
114 | switch (object->state) { | 180 | restart: |
115 | /* wait for the parent object to become ready */ | 181 | object->event_mask = 0; /* Mask normal event handling */ |
116 | case FSCACHE_OBJECT_INIT: | 182 | state = object->state; |
117 | object->event_mask = | 183 | restart_masked: |
118 | FSCACHE_OBJECT_EVENTS_MASK & | 184 | events = object->events; |
119 | ~(1 << FSCACHE_OBJECT_EV_CLEARED); | 185 | |
120 | fscache_initialise_object(object); | 186 | /* Handle any out-of-band events (typically an error) */ |
121 | goto done; | 187 | if (events & object->oob_event_mask) { |
122 | 188 | _debug("{OBJ%x} oob %lx", | |
123 | /* look up the object metadata on disk */ | 189 | object->debug_id, events & object->oob_event_mask); |
124 | case FSCACHE_OBJECT_LOOKING_UP: | 190 | for (t = object->oob_table; t->events; t++) { |
125 | fscache_lookup_object(object); | 191 | if (events & t->events) { |
126 | goto lookup_transit; | 192 | state = t->transit_to; |
127 | 193 | ASSERT(state->work != NULL); | |
128 | /* create the object metadata on disk */ | 194 | event = fls(events & t->events) - 1; |
129 | case FSCACHE_OBJECT_CREATING: | 195 | __clear_bit(event, &object->oob_event_mask); |
130 | fscache_lookup_object(object); | 196 | clear_bit(event, &object->events); |
131 | goto lookup_transit; | 197 | goto execute_work_state; |
132 | 198 | } | |
133 | /* handle an object becoming available; start pending | ||
134 | * operations and queue dependent operations for processing */ | ||
135 | case FSCACHE_OBJECT_AVAILABLE: | ||
136 | fscache_object_available(object); | ||
137 | goto active_transit; | ||
138 | |||
139 | /* normal running state */ | ||
140 | case FSCACHE_OBJECT_ACTIVE: | ||
141 | goto active_transit; | ||
142 | |||
143 | /* Invalidate an object on disk */ | ||
144 | case FSCACHE_OBJECT_INVALIDATING: | ||
145 | clear_bit(FSCACHE_OBJECT_EV_INVALIDATE, &object->events); | ||
146 | fscache_stat(&fscache_n_invalidates_run); | ||
147 | fscache_stat(&fscache_n_cop_invalidate_object); | ||
148 | fscache_invalidate_object(object); | ||
149 | fscache_stat_d(&fscache_n_cop_invalidate_object); | ||
150 | fscache_raise_event(object, FSCACHE_OBJECT_EV_UPDATE); | ||
151 | goto active_transit; | ||
152 | |||
153 | /* update the object metadata on disk */ | ||
154 | case FSCACHE_OBJECT_UPDATING: | ||
155 | clear_bit(FSCACHE_OBJECT_EV_UPDATE, &object->events); | ||
156 | fscache_stat(&fscache_n_updates_run); | ||
157 | fscache_stat(&fscache_n_cop_update_object); | ||
158 | object->cache->ops->update_object(object); | ||
159 | fscache_stat_d(&fscache_n_cop_update_object); | ||
160 | goto active_transit; | ||
161 | |||
162 | /* handle an object dying during lookup or creation */ | ||
163 | case FSCACHE_OBJECT_LC_DYING: | ||
164 | object->event_mask &= ~(1 << FSCACHE_OBJECT_EV_UPDATE); | ||
165 | fscache_stat(&fscache_n_cop_lookup_complete); | ||
166 | object->cache->ops->lookup_complete(object); | ||
167 | fscache_stat_d(&fscache_n_cop_lookup_complete); | ||
168 | |||
169 | spin_lock(&object->lock); | ||
170 | object->state = FSCACHE_OBJECT_DYING; | ||
171 | cookie = object->cookie; | ||
172 | if (cookie) { | ||
173 | if (test_and_clear_bit(FSCACHE_COOKIE_LOOKING_UP, | ||
174 | &cookie->flags)) | ||
175 | wake_up_bit(&cookie->flags, | ||
176 | FSCACHE_COOKIE_LOOKING_UP); | ||
177 | if (test_and_clear_bit(FSCACHE_COOKIE_CREATING, | ||
178 | &cookie->flags)) | ||
179 | wake_up_bit(&cookie->flags, | ||
180 | FSCACHE_COOKIE_CREATING); | ||
181 | } | 199 | } |
182 | spin_unlock(&object->lock); | 200 | } |
183 | 201 | ||
184 | fscache_done_parent_op(object); | 202 | /* Wait states are just transition tables */ |
203 | if (!state->work) { | ||
204 | if (events & event_mask) { | ||
205 | for (t = state->transitions; t->events; t++) { | ||
206 | if (events & t->events) { | ||
207 | new_state = t->transit_to; | ||
208 | event = fls(events & t->events) - 1; | ||
209 | clear_bit(event, &object->events); | ||
210 | _debug("{OBJ%x} ev %d: %s -> %s", | ||
211 | object->debug_id, event, | ||
212 | state->name, new_state->name); | ||
213 | object->state = state = new_state; | ||
214 | goto execute_work_state; | ||
215 | } | ||
216 | } | ||
185 | 217 | ||
186 | /* wait for completion of all active operations on this object | 218 | /* The event mask didn't include all the tabled bits */ |
187 | * and the death of all child objects of this object */ | 219 | BUG(); |
188 | case FSCACHE_OBJECT_DYING: | ||
189 | dying: | ||
190 | clear_bit(FSCACHE_OBJECT_EV_CLEARED, &object->events); | ||
191 | spin_lock(&object->lock); | ||
192 | _debug("dying OBJ%x {%d,%d}", | ||
193 | object->debug_id, object->n_ops, object->n_children); | ||
194 | if (object->n_ops == 0 && object->n_children == 0) { | ||
195 | object->event_mask &= | ||
196 | ~(1 << FSCACHE_OBJECT_EV_CLEARED); | ||
197 | object->event_mask |= | ||
198 | (1 << FSCACHE_OBJECT_EV_WITHDRAW) | | ||
199 | (1 << FSCACHE_OBJECT_EV_RETIRE) | | ||
200 | (1 << FSCACHE_OBJECT_EV_RELEASE) | | ||
201 | (1 << FSCACHE_OBJECT_EV_ERROR); | ||
202 | } else { | ||
203 | object->event_mask &= | ||
204 | ~((1 << FSCACHE_OBJECT_EV_WITHDRAW) | | ||
205 | (1 << FSCACHE_OBJECT_EV_RETIRE) | | ||
206 | (1 << FSCACHE_OBJECT_EV_RELEASE) | | ||
207 | (1 << FSCACHE_OBJECT_EV_ERROR)); | ||
208 | object->event_mask |= | ||
209 | 1 << FSCACHE_OBJECT_EV_CLEARED; | ||
210 | } | 220 | } |
211 | spin_unlock(&object->lock); | 221 | /* Randomly woke up */ |
212 | fscache_enqueue_dependents(object); | 222 | goto unmask_events; |
213 | fscache_start_operations(object); | ||
214 | goto terminal_transit; | ||
215 | |||
216 | /* handle an abort during initialisation */ | ||
217 | case FSCACHE_OBJECT_ABORT_INIT: | ||
218 | _debug("handle abort init %lx", object->events); | ||
219 | object->event_mask &= ~(1 << FSCACHE_OBJECT_EV_UPDATE); | ||
220 | |||
221 | spin_lock(&object->lock); | ||
222 | fscache_dequeue_object(object); | ||
223 | |||
224 | object->state = FSCACHE_OBJECT_DYING; | ||
225 | if (test_and_clear_bit(FSCACHE_COOKIE_CREATING, | ||
226 | &object->cookie->flags)) | ||
227 | wake_up_bit(&object->cookie->flags, | ||
228 | FSCACHE_COOKIE_CREATING); | ||
229 | spin_unlock(&object->lock); | ||
230 | goto dying; | ||
231 | |||
232 | /* handle the netfs releasing an object and possibly marking it | ||
233 | * obsolete too */ | ||
234 | case FSCACHE_OBJECT_RELEASING: | ||
235 | case FSCACHE_OBJECT_RECYCLING: | ||
236 | object->event_mask &= | ||
237 | ~((1 << FSCACHE_OBJECT_EV_WITHDRAW) | | ||
238 | (1 << FSCACHE_OBJECT_EV_RETIRE) | | ||
239 | (1 << FSCACHE_OBJECT_EV_RELEASE) | | ||
240 | (1 << FSCACHE_OBJECT_EV_ERROR)); | ||
241 | fscache_release_object(object); | ||
242 | spin_lock(&object->lock); | ||
243 | object->state = FSCACHE_OBJECT_DEAD; | ||
244 | spin_unlock(&object->lock); | ||
245 | fscache_stat(&fscache_n_object_dead); | ||
246 | goto terminal_transit; | ||
247 | |||
248 | /* handle the parent cache of this object being withdrawn from | ||
249 | * active service */ | ||
250 | case FSCACHE_OBJECT_WITHDRAWING: | ||
251 | object->event_mask &= | ||
252 | ~((1 << FSCACHE_OBJECT_EV_WITHDRAW) | | ||
253 | (1 << FSCACHE_OBJECT_EV_RETIRE) | | ||
254 | (1 << FSCACHE_OBJECT_EV_RELEASE) | | ||
255 | (1 << FSCACHE_OBJECT_EV_ERROR)); | ||
256 | fscache_withdraw_object(object); | ||
257 | spin_lock(&object->lock); | ||
258 | object->state = FSCACHE_OBJECT_DEAD; | ||
259 | spin_unlock(&object->lock); | ||
260 | fscache_stat(&fscache_n_object_dead); | ||
261 | goto terminal_transit; | ||
262 | |||
263 | /* complain about the object being woken up once it is | ||
264 | * deceased */ | ||
265 | case FSCACHE_OBJECT_DEAD: | ||
266 | printk(KERN_ERR "FS-Cache:" | ||
267 | " Unexpected event in dead state %lx\n", | ||
268 | object->events & object->event_mask); | ||
269 | BUG(); | ||
270 | |||
271 | default: | ||
272 | printk(KERN_ERR "FS-Cache: Unknown object state %u\n", | ||
273 | object->state); | ||
274 | BUG(); | ||
275 | } | ||
276 | |||
277 | /* determine the transition from a lookup state */ | ||
278 | lookup_transit: | ||
279 | event = fls(object->events & object->event_mask) - 1; | ||
280 | switch (event) { | ||
281 | case FSCACHE_OBJECT_EV_WITHDRAW: | ||
282 | case FSCACHE_OBJECT_EV_RETIRE: | ||
283 | case FSCACHE_OBJECT_EV_RELEASE: | ||
284 | case FSCACHE_OBJECT_EV_ERROR: | ||
285 | new_state = FSCACHE_OBJECT_LC_DYING; | ||
286 | goto change_state; | ||
287 | case FSCACHE_OBJECT_EV_INVALIDATE: | ||
288 | new_state = FSCACHE_OBJECT_INVALIDATING; | ||
289 | goto change_state; | ||
290 | case FSCACHE_OBJECT_EV_REQUEUE: | ||
291 | goto done; | ||
292 | case -1: | ||
293 | goto done; /* sleep until event */ | ||
294 | default: | ||
295 | goto unsupported_event; | ||
296 | } | 223 | } |
297 | 224 | ||
298 | /* determine the transition from an active state */ | 225 | execute_work_state: |
299 | active_transit: | 226 | _debug("{OBJ%x} exec %s", object->debug_id, state->name); |
300 | event = fls(object->events & object->event_mask) - 1; | ||
301 | switch (event) { | ||
302 | case FSCACHE_OBJECT_EV_WITHDRAW: | ||
303 | case FSCACHE_OBJECT_EV_RETIRE: | ||
304 | case FSCACHE_OBJECT_EV_RELEASE: | ||
305 | case FSCACHE_OBJECT_EV_ERROR: | ||
306 | new_state = FSCACHE_OBJECT_DYING; | ||
307 | goto change_state; | ||
308 | case FSCACHE_OBJECT_EV_INVALIDATE: | ||
309 | new_state = FSCACHE_OBJECT_INVALIDATING; | ||
310 | goto change_state; | ||
311 | case FSCACHE_OBJECT_EV_UPDATE: | ||
312 | new_state = FSCACHE_OBJECT_UPDATING; | ||
313 | goto change_state; | ||
314 | case -1: | ||
315 | new_state = FSCACHE_OBJECT_ACTIVE; | ||
316 | goto change_state; /* sleep until event */ | ||
317 | default: | ||
318 | goto unsupported_event; | ||
319 | } | ||
320 | 227 | ||
321 | /* determine the transition from a terminal state */ | 228 | new_state = state->work(object, event); |
322 | terminal_transit: | 229 | event = -1; |
323 | event = fls(object->events & object->event_mask) - 1; | 230 | if (new_state == NO_TRANSIT) { |
324 | switch (event) { | 231 | _debug("{OBJ%x} %s notrans", object->debug_id, state->name); |
325 | case FSCACHE_OBJECT_EV_WITHDRAW: | 232 | fscache_enqueue_object(object); |
326 | new_state = FSCACHE_OBJECT_WITHDRAWING; | 233 | event_mask = object->oob_event_mask; |
327 | goto change_state; | 234 | goto unmask_events; |
328 | case FSCACHE_OBJECT_EV_RETIRE: | ||
329 | new_state = FSCACHE_OBJECT_RECYCLING; | ||
330 | goto change_state; | ||
331 | case FSCACHE_OBJECT_EV_RELEASE: | ||
332 | new_state = FSCACHE_OBJECT_RELEASING; | ||
333 | goto change_state; | ||
334 | case FSCACHE_OBJECT_EV_ERROR: | ||
335 | new_state = FSCACHE_OBJECT_WITHDRAWING; | ||
336 | goto change_state; | ||
337 | case FSCACHE_OBJECT_EV_CLEARED: | ||
338 | new_state = FSCACHE_OBJECT_DYING; | ||
339 | goto change_state; | ||
340 | case -1: | ||
341 | goto done; /* sleep until event */ | ||
342 | default: | ||
343 | goto unsupported_event; | ||
344 | } | 235 | } |
345 | 236 | ||
346 | change_state: | 237 | _debug("{OBJ%x} %s -> %s", |
347 | spin_lock(&object->lock); | 238 | object->debug_id, state->name, new_state->name); |
348 | object->state = new_state; | 239 | object->state = state = new_state; |
349 | spin_unlock(&object->lock); | ||
350 | 240 | ||
351 | done: | 241 | if (state->work) { |
352 | _leave(" [->%s]", fscache_object_states[object->state]); | 242 | if (unlikely(state->work == ((void *)2UL))) { |
353 | return; | 243 | _leave(" [dead]"); |
244 | return; | ||
245 | } | ||
246 | goto restart_masked; | ||
247 | } | ||
354 | 248 | ||
355 | unsupported_event: | 249 | /* Transited to wait state */ |
356 | printk(KERN_ERR "FS-Cache:" | 250 | event_mask = object->oob_event_mask; |
357 | " Unsupported event %d [%lx/%lx] in state %s\n", | 251 | for (t = state->transitions; t->events; t++) |
358 | event, object->events, object->event_mask, | 252 | event_mask |= t->events; |
359 | fscache_object_states[object->state]); | 253 | |
360 | BUG(); | 254 | unmask_events: |
255 | object->event_mask = event_mask; | ||
256 | smp_mb(); | ||
257 | events = object->events; | ||
258 | if (events & event_mask) | ||
259 | goto restart; | ||
260 | _leave(" [msk %lx]", event_mask); | ||
361 | } | 261 | } |
362 | 262 | ||
363 | /* | 263 | /* |
364 | * execute an object | 264 | * execute an object |
365 | */ | 265 | */ |
366 | void fscache_object_work_func(struct work_struct *work) | 266 | static void fscache_object_work_func(struct work_struct *work) |
367 | { | 267 | { |
368 | struct fscache_object *object = | 268 | struct fscache_object *object = |
369 | container_of(work, struct fscache_object, work); | 269 | container_of(work, struct fscache_object, work); |
@@ -372,14 +272,70 @@ void fscache_object_work_func(struct work_struct *work) | |||
372 | _enter("{OBJ%x}", object->debug_id); | 272 | _enter("{OBJ%x}", object->debug_id); |
373 | 273 | ||
374 | start = jiffies; | 274 | start = jiffies; |
375 | fscache_object_state_machine(object); | 275 | fscache_object_sm_dispatcher(object); |
376 | fscache_hist(fscache_objs_histogram, start); | 276 | fscache_hist(fscache_objs_histogram, start); |
377 | if (object->events & object->event_mask) | ||
378 | fscache_enqueue_object(object); | ||
379 | clear_bit(FSCACHE_OBJECT_EV_REQUEUE, &object->events); | ||
380 | fscache_put_object(object); | 277 | fscache_put_object(object); |
381 | } | 278 | } |
382 | EXPORT_SYMBOL(fscache_object_work_func); | 279 | |
280 | /** | ||
281 | * fscache_object_init - Initialise a cache object description | ||
282 | * @object: Object description | ||
283 | * @cookie: Cookie object will be attached to | ||
284 | * @cache: Cache in which backing object will be found | ||
285 | * | ||
286 | * Initialise a cache object description to its basic values. | ||
287 | * | ||
288 | * See Documentation/filesystems/caching/backend-api.txt for a complete | ||
289 | * description. | ||
290 | */ | ||
291 | void fscache_object_init(struct fscache_object *object, | ||
292 | struct fscache_cookie *cookie, | ||
293 | struct fscache_cache *cache) | ||
294 | { | ||
295 | const struct fscache_transition *t; | ||
296 | |||
297 | atomic_inc(&cache->object_count); | ||
298 | |||
299 | object->state = STATE(WAIT_FOR_INIT); | ||
300 | object->oob_table = fscache_osm_init_oob; | ||
301 | object->flags = 1 << FSCACHE_OBJECT_IS_LIVE; | ||
302 | spin_lock_init(&object->lock); | ||
303 | INIT_LIST_HEAD(&object->cache_link); | ||
304 | INIT_HLIST_NODE(&object->cookie_link); | ||
305 | INIT_WORK(&object->work, fscache_object_work_func); | ||
306 | INIT_LIST_HEAD(&object->dependents); | ||
307 | INIT_LIST_HEAD(&object->dep_link); | ||
308 | INIT_LIST_HEAD(&object->pending_ops); | ||
309 | object->n_children = 0; | ||
310 | object->n_ops = object->n_in_progress = object->n_exclusive = 0; | ||
311 | object->events = 0; | ||
312 | object->store_limit = 0; | ||
313 | object->store_limit_l = 0; | ||
314 | object->cache = cache; | ||
315 | object->cookie = cookie; | ||
316 | object->parent = NULL; | ||
317 | |||
318 | object->oob_event_mask = 0; | ||
319 | for (t = object->oob_table; t->events; t++) | ||
320 | object->oob_event_mask |= t->events; | ||
321 | object->event_mask = object->oob_event_mask; | ||
322 | for (t = object->state->transitions; t->events; t++) | ||
323 | object->event_mask |= t->events; | ||
324 | } | ||
325 | EXPORT_SYMBOL(fscache_object_init); | ||
326 | |||
327 | /* | ||
328 | * Abort object initialisation before we start it. | ||
329 | */ | ||
330 | static const struct fscache_state *fscache_abort_initialisation(struct fscache_object *object, | ||
331 | int event) | ||
332 | { | ||
333 | _enter("{OBJ%x},%d", object->debug_id, event); | ||
334 | |||
335 | object->oob_event_mask = 0; | ||
336 | fscache_dequeue_object(object); | ||
337 | return transit_to(KILL_OBJECT); | ||
338 | } | ||
383 | 339 | ||
384 | /* | 340 | /* |
385 | * initialise an object | 341 | * initialise an object |
@@ -387,130 +343,136 @@ EXPORT_SYMBOL(fscache_object_work_func); | |||
387 | * immediately to do a creation | 343 | * immediately to do a creation |
388 | * - we may need to start the process of creating a parent and we need to wait | 344 | * - we may need to start the process of creating a parent and we need to wait |
389 | * for the parent's lookup and creation to complete if it's not there yet | 345 | * for the parent's lookup and creation to complete if it's not there yet |
390 | * - an object's cookie is pinned until we clear FSCACHE_COOKIE_CREATING on the | ||
391 | * leaf-most cookies of the object and all its children | ||
392 | */ | 346 | */ |
393 | static void fscache_initialise_object(struct fscache_object *object) | 347 | static const struct fscache_state *fscache_initialise_object(struct fscache_object *object, |
348 | int event) | ||
394 | { | 349 | { |
395 | struct fscache_object *parent; | 350 | struct fscache_object *parent; |
351 | bool success; | ||
396 | 352 | ||
397 | _enter(""); | 353 | _enter("{OBJ%x},%d", object->debug_id, event); |
398 | ASSERT(object->cookie != NULL); | ||
399 | ASSERT(object->cookie->parent != NULL); | ||
400 | |||
401 | if (object->events & ((1 << FSCACHE_OBJECT_EV_ERROR) | | ||
402 | (1 << FSCACHE_OBJECT_EV_RELEASE) | | ||
403 | (1 << FSCACHE_OBJECT_EV_RETIRE) | | ||
404 | (1 << FSCACHE_OBJECT_EV_WITHDRAW))) { | ||
405 | _debug("abort init %lx", object->events); | ||
406 | spin_lock(&object->lock); | ||
407 | object->state = FSCACHE_OBJECT_ABORT_INIT; | ||
408 | spin_unlock(&object->lock); | ||
409 | return; | ||
410 | } | ||
411 | 354 | ||
412 | spin_lock(&object->cookie->lock); | 355 | ASSERT(list_empty(&object->dep_link)); |
413 | spin_lock_nested(&object->cookie->parent->lock, 1); | ||
414 | 356 | ||
415 | parent = object->parent; | 357 | parent = object->parent; |
416 | if (!parent) { | 358 | if (!parent) { |
417 | _debug("no parent"); | 359 | _leave(" [no parent]"); |
418 | set_bit(FSCACHE_OBJECT_EV_WITHDRAW, &object->events); | 360 | return transit_to(DROP_OBJECT); |
419 | } else { | 361 | } |
420 | spin_lock(&object->lock); | ||
421 | spin_lock_nested(&parent->lock, 1); | ||
422 | _debug("parent %s", fscache_object_states[parent->state]); | ||
423 | |||
424 | if (parent->state >= FSCACHE_OBJECT_DYING) { | ||
425 | _debug("bad parent"); | ||
426 | set_bit(FSCACHE_OBJECT_EV_WITHDRAW, &object->events); | ||
427 | } else if (parent->state < FSCACHE_OBJECT_AVAILABLE) { | ||
428 | _debug("wait"); | ||
429 | |||
430 | /* we may get woken up in this state by child objects | ||
431 | * binding on to us, so we need to make sure we don't | ||
432 | * add ourself to the list multiple times */ | ||
433 | if (list_empty(&object->dep_link)) { | ||
434 | fscache_stat(&fscache_n_cop_grab_object); | ||
435 | object->cache->ops->grab_object(object); | ||
436 | fscache_stat_d(&fscache_n_cop_grab_object); | ||
437 | list_add(&object->dep_link, | ||
438 | &parent->dependents); | ||
439 | |||
440 | /* fscache_acquire_non_index_cookie() uses this | ||
441 | * to wake the chain up */ | ||
442 | if (parent->state == FSCACHE_OBJECT_INIT) | ||
443 | fscache_enqueue_object(parent); | ||
444 | } | ||
445 | } else { | ||
446 | _debug("go"); | ||
447 | parent->n_ops++; | ||
448 | parent->n_obj_ops++; | ||
449 | object->lookup_jif = jiffies; | ||
450 | object->state = FSCACHE_OBJECT_LOOKING_UP; | ||
451 | set_bit(FSCACHE_OBJECT_EV_REQUEUE, &object->events); | ||
452 | } | ||
453 | 362 | ||
454 | spin_unlock(&parent->lock); | 363 | _debug("parent: %s of:%lx", parent->state->name, parent->flags); |
455 | spin_unlock(&object->lock); | 364 | |
365 | if (fscache_object_is_dying(parent)) { | ||
366 | _leave(" [bad parent]"); | ||
367 | return transit_to(DROP_OBJECT); | ||
456 | } | 368 | } |
457 | 369 | ||
458 | spin_unlock(&object->cookie->parent->lock); | 370 | if (fscache_object_is_available(parent)) { |
459 | spin_unlock(&object->cookie->lock); | 371 | _leave(" [ready]"); |
372 | return transit_to(PARENT_READY); | ||
373 | } | ||
374 | |||
375 | _debug("wait"); | ||
376 | |||
377 | spin_lock(&parent->lock); | ||
378 | fscache_stat(&fscache_n_cop_grab_object); | ||
379 | success = false; | ||
380 | if (fscache_object_is_live(parent) && | ||
381 | object->cache->ops->grab_object(object)) { | ||
382 | list_add(&object->dep_link, &parent->dependents); | ||
383 | success = true; | ||
384 | } | ||
385 | fscache_stat_d(&fscache_n_cop_grab_object); | ||
386 | spin_unlock(&parent->lock); | ||
387 | if (!success) { | ||
388 | _leave(" [grab failed]"); | ||
389 | return transit_to(DROP_OBJECT); | ||
390 | } | ||
391 | |||
392 | /* fscache_acquire_non_index_cookie() uses this | ||
393 | * to wake the chain up */ | ||
394 | fscache_raise_event(parent, FSCACHE_OBJECT_EV_NEW_CHILD); | ||
395 | _leave(" [wait]"); | ||
396 | return transit_to(WAIT_FOR_PARENT); | ||
397 | } | ||
398 | |||
399 | /* | ||
400 | * Once the parent object is ready, we should kick off our lookup op. | ||
401 | */ | ||
402 | static const struct fscache_state *fscache_parent_ready(struct fscache_object *object, | ||
403 | int event) | ||
404 | { | ||
405 | struct fscache_object *parent = object->parent; | ||
406 | |||
407 | _enter("{OBJ%x},%d", object->debug_id, event); | ||
408 | |||
409 | ASSERT(parent != NULL); | ||
410 | |||
411 | spin_lock(&parent->lock); | ||
412 | parent->n_ops++; | ||
413 | parent->n_obj_ops++; | ||
414 | object->lookup_jif = jiffies; | ||
415 | spin_unlock(&parent->lock); | ||
416 | |||
460 | _leave(""); | 417 | _leave(""); |
418 | return transit_to(LOOK_UP_OBJECT); | ||
461 | } | 419 | } |
462 | 420 | ||
463 | /* | 421 | /* |
464 | * look an object up in the cache from which it was allocated | 422 | * look an object up in the cache from which it was allocated |
465 | * - we hold an "access lock" on the parent object, so the parent object cannot | 423 | * - we hold an "access lock" on the parent object, so the parent object cannot |
466 | * be withdrawn by either party till we've finished | 424 | * be withdrawn by either party till we've finished |
467 | * - an object's cookie is pinned until we clear FSCACHE_COOKIE_CREATING on the | ||
468 | * leaf-most cookies of the object and all its children | ||
469 | */ | 425 | */ |
470 | static void fscache_lookup_object(struct fscache_object *object) | 426 | static const struct fscache_state *fscache_look_up_object(struct fscache_object *object, |
427 | int event) | ||
471 | { | 428 | { |
472 | struct fscache_cookie *cookie = object->cookie; | 429 | struct fscache_cookie *cookie = object->cookie; |
473 | struct fscache_object *parent; | 430 | struct fscache_object *parent = object->parent; |
474 | int ret; | 431 | int ret; |
475 | 432 | ||
476 | _enter(""); | 433 | _enter("{OBJ%x},%d", object->debug_id, event); |
434 | |||
435 | object->oob_table = fscache_osm_lookup_oob; | ||
477 | 436 | ||
478 | parent = object->parent; | ||
479 | ASSERT(parent != NULL); | 437 | ASSERT(parent != NULL); |
480 | ASSERTCMP(parent->n_ops, >, 0); | 438 | ASSERTCMP(parent->n_ops, >, 0); |
481 | ASSERTCMP(parent->n_obj_ops, >, 0); | 439 | ASSERTCMP(parent->n_obj_ops, >, 0); |
482 | 440 | ||
483 | /* make sure the parent is still available */ | 441 | /* make sure the parent is still available */ |
484 | ASSERTCMP(parent->state, >=, FSCACHE_OBJECT_AVAILABLE); | 442 | ASSERT(fscache_object_is_available(parent)); |
485 | 443 | ||
486 | if (parent->state >= FSCACHE_OBJECT_DYING || | 444 | if (fscache_object_is_dying(parent) || |
487 | test_bit(FSCACHE_IOERROR, &object->cache->flags)) { | 445 | test_bit(FSCACHE_IOERROR, &object->cache->flags) || |
488 | _debug("unavailable"); | 446 | !fscache_use_cookie(object)) { |
489 | set_bit(FSCACHE_OBJECT_EV_WITHDRAW, &object->events); | 447 | _leave(" [unavailable]"); |
490 | _leave(""); | 448 | return transit_to(LOOKUP_FAILURE); |
491 | return; | ||
492 | } | 449 | } |
493 | 450 | ||
494 | _debug("LOOKUP \"%s/%s\" in \"%s\"", | 451 | _debug("LOOKUP \"%s\" in \"%s\"", |
495 | parent->cookie->def->name, cookie->def->name, | 452 | cookie->def->name, object->cache->tag->name); |
496 | object->cache->tag->name); | ||
497 | 453 | ||
498 | fscache_stat(&fscache_n_object_lookups); | 454 | fscache_stat(&fscache_n_object_lookups); |
499 | fscache_stat(&fscache_n_cop_lookup_object); | 455 | fscache_stat(&fscache_n_cop_lookup_object); |
500 | ret = object->cache->ops->lookup_object(object); | 456 | ret = object->cache->ops->lookup_object(object); |
501 | fscache_stat_d(&fscache_n_cop_lookup_object); | 457 | fscache_stat_d(&fscache_n_cop_lookup_object); |
502 | 458 | ||
503 | if (test_bit(FSCACHE_OBJECT_EV_ERROR, &object->events)) | 459 | fscache_unuse_cookie(object); |
504 | set_bit(FSCACHE_COOKIE_UNAVAILABLE, &cookie->flags); | ||
505 | 460 | ||
506 | if (ret == -ETIMEDOUT) { | 461 | if (ret == -ETIMEDOUT) { |
507 | /* probably stuck behind another object, so move this one to | 462 | /* probably stuck behind another object, so move this one to |
508 | * the back of the queue */ | 463 | * the back of the queue */ |
509 | fscache_stat(&fscache_n_object_lookups_timed_out); | 464 | fscache_stat(&fscache_n_object_lookups_timed_out); |
510 | set_bit(FSCACHE_OBJECT_EV_REQUEUE, &object->events); | 465 | _leave(" [timeout]"); |
466 | return NO_TRANSIT; | ||
511 | } | 467 | } |
512 | 468 | ||
513 | _leave(""); | 469 | if (ret < 0) { |
470 | _leave(" [error]"); | ||
471 | return transit_to(LOOKUP_FAILURE); | ||
472 | } | ||
473 | |||
474 | _leave(" [ok]"); | ||
475 | return transit_to(OBJECT_AVAILABLE); | ||
514 | } | 476 | } |
515 | 477 | ||
516 | /** | 478 | /** |
@@ -524,32 +486,20 @@ void fscache_object_lookup_negative(struct fscache_object *object) | |||
524 | { | 486 | { |
525 | struct fscache_cookie *cookie = object->cookie; | 487 | struct fscache_cookie *cookie = object->cookie; |
526 | 488 | ||
527 | _enter("{OBJ%x,%s}", | 489 | _enter("{OBJ%x,%s}", object->debug_id, object->state->name); |
528 | object->debug_id, fscache_object_states[object->state]); | ||
529 | 490 | ||
530 | spin_lock(&object->lock); | 491 | if (!test_and_set_bit(FSCACHE_OBJECT_IS_LOOKED_UP, &object->flags)) { |
531 | if (object->state == FSCACHE_OBJECT_LOOKING_UP) { | ||
532 | fscache_stat(&fscache_n_object_lookups_negative); | 492 | fscache_stat(&fscache_n_object_lookups_negative); |
533 | 493 | ||
534 | /* transit here to allow write requests to begin stacking up | 494 | /* Allow write requests to begin stacking up and read requests to begin |
535 | * and read requests to begin returning ENODATA */ | 495 | * returning ENODATA. |
536 | object->state = FSCACHE_OBJECT_CREATING; | 496 | */ |
537 | spin_unlock(&object->lock); | ||
538 | |||
539 | set_bit(FSCACHE_COOKIE_PENDING_FILL, &cookie->flags); | ||
540 | set_bit(FSCACHE_COOKIE_NO_DATA_YET, &cookie->flags); | 497 | set_bit(FSCACHE_COOKIE_NO_DATA_YET, &cookie->flags); |
541 | 498 | ||
542 | _debug("wake up lookup %p", &cookie->flags); | 499 | _debug("wake up lookup %p", &cookie->flags); |
543 | smp_mb__before_clear_bit(); | 500 | clear_bit_unlock(FSCACHE_COOKIE_LOOKING_UP, &cookie->flags); |
544 | clear_bit(FSCACHE_COOKIE_LOOKING_UP, &cookie->flags); | ||
545 | smp_mb__after_clear_bit(); | ||
546 | wake_up_bit(&cookie->flags, FSCACHE_COOKIE_LOOKING_UP); | 501 | wake_up_bit(&cookie->flags, FSCACHE_COOKIE_LOOKING_UP); |
547 | set_bit(FSCACHE_OBJECT_EV_REQUEUE, &object->events); | ||
548 | } else { | ||
549 | ASSERTCMP(object->state, ==, FSCACHE_OBJECT_CREATING); | ||
550 | spin_unlock(&object->lock); | ||
551 | } | 502 | } |
552 | |||
553 | _leave(""); | 503 | _leave(""); |
554 | } | 504 | } |
555 | EXPORT_SYMBOL(fscache_object_lookup_negative); | 505 | EXPORT_SYMBOL(fscache_object_lookup_negative); |
@@ -568,38 +518,26 @@ void fscache_obtained_object(struct fscache_object *object) | |||
568 | { | 518 | { |
569 | struct fscache_cookie *cookie = object->cookie; | 519 | struct fscache_cookie *cookie = object->cookie; |
570 | 520 | ||
571 | _enter("{OBJ%x,%s}", | 521 | _enter("{OBJ%x,%s}", object->debug_id, object->state->name); |
572 | object->debug_id, fscache_object_states[object->state]); | ||
573 | 522 | ||
574 | /* if we were still looking up, then we must have a positive lookup | 523 | /* if we were still looking up, then we must have a positive lookup |
575 | * result, in which case there may be data available */ | 524 | * result, in which case there may be data available */ |
576 | spin_lock(&object->lock); | 525 | if (!test_and_set_bit(FSCACHE_OBJECT_IS_LOOKED_UP, &object->flags)) { |
577 | if (object->state == FSCACHE_OBJECT_LOOKING_UP) { | ||
578 | fscache_stat(&fscache_n_object_lookups_positive); | 526 | fscache_stat(&fscache_n_object_lookups_positive); |
579 | 527 | ||
580 | clear_bit(FSCACHE_COOKIE_NO_DATA_YET, &cookie->flags); | 528 | /* We do (presumably) have data */ |
529 | clear_bit_unlock(FSCACHE_COOKIE_NO_DATA_YET, &cookie->flags); | ||
581 | 530 | ||
582 | object->state = FSCACHE_OBJECT_AVAILABLE; | 531 | /* Allow write requests to begin stacking up and read requests |
583 | spin_unlock(&object->lock); | 532 | * to begin shovelling data. |
584 | 533 | */ | |
585 | smp_mb__before_clear_bit(); | 534 | clear_bit_unlock(FSCACHE_COOKIE_LOOKING_UP, &cookie->flags); |
586 | clear_bit(FSCACHE_COOKIE_LOOKING_UP, &cookie->flags); | ||
587 | smp_mb__after_clear_bit(); | ||
588 | wake_up_bit(&cookie->flags, FSCACHE_COOKIE_LOOKING_UP); | 535 | wake_up_bit(&cookie->flags, FSCACHE_COOKIE_LOOKING_UP); |
589 | set_bit(FSCACHE_OBJECT_EV_REQUEUE, &object->events); | ||
590 | } else { | 536 | } else { |
591 | ASSERTCMP(object->state, ==, FSCACHE_OBJECT_CREATING); | ||
592 | fscache_stat(&fscache_n_object_created); | 537 | fscache_stat(&fscache_n_object_created); |
593 | |||
594 | object->state = FSCACHE_OBJECT_AVAILABLE; | ||
595 | spin_unlock(&object->lock); | ||
596 | set_bit(FSCACHE_OBJECT_EV_REQUEUE, &object->events); | ||
597 | smp_wmb(); | ||
598 | } | 538 | } |
599 | 539 | ||
600 | if (test_and_clear_bit(FSCACHE_COOKIE_CREATING, &cookie->flags)) | 540 | set_bit(FSCACHE_OBJECT_IS_AVAILABLE, &object->flags); |
601 | wake_up_bit(&cookie->flags, FSCACHE_COOKIE_CREATING); | ||
602 | |||
603 | _leave(""); | 541 | _leave(""); |
604 | } | 542 | } |
605 | EXPORT_SYMBOL(fscache_obtained_object); | 543 | EXPORT_SYMBOL(fscache_obtained_object); |
@@ -607,15 +545,14 @@ EXPORT_SYMBOL(fscache_obtained_object); | |||
607 | /* | 545 | /* |
608 | * handle an object that has just become available | 546 | * handle an object that has just become available |
609 | */ | 547 | */ |
610 | static void fscache_object_available(struct fscache_object *object) | 548 | static const struct fscache_state *fscache_object_available(struct fscache_object *object, |
549 | int event) | ||
611 | { | 550 | { |
612 | _enter("{OBJ%x}", object->debug_id); | 551 | _enter("{OBJ%x},%d", object->debug_id, event); |
613 | 552 | ||
614 | spin_lock(&object->lock); | 553 | object->oob_table = fscache_osm_run_oob; |
615 | 554 | ||
616 | if (object->cookie && | 555 | spin_lock(&object->lock); |
617 | test_and_clear_bit(FSCACHE_COOKIE_CREATING, &object->cookie->flags)) | ||
618 | wake_up_bit(&object->cookie->flags, FSCACHE_COOKIE_CREATING); | ||
619 | 556 | ||
620 | fscache_done_parent_op(object); | 557 | fscache_done_parent_op(object); |
621 | if (object->n_in_progress == 0) { | 558 | if (object->n_in_progress == 0) { |
@@ -631,130 +568,158 @@ static void fscache_object_available(struct fscache_object *object) | |||
631 | fscache_stat(&fscache_n_cop_lookup_complete); | 568 | fscache_stat(&fscache_n_cop_lookup_complete); |
632 | object->cache->ops->lookup_complete(object); | 569 | object->cache->ops->lookup_complete(object); |
633 | fscache_stat_d(&fscache_n_cop_lookup_complete); | 570 | fscache_stat_d(&fscache_n_cop_lookup_complete); |
634 | fscache_enqueue_dependents(object); | ||
635 | 571 | ||
636 | fscache_hist(fscache_obj_instantiate_histogram, object->lookup_jif); | 572 | fscache_hist(fscache_obj_instantiate_histogram, object->lookup_jif); |
637 | fscache_stat(&fscache_n_object_avail); | 573 | fscache_stat(&fscache_n_object_avail); |
638 | 574 | ||
639 | _leave(""); | 575 | _leave(""); |
576 | return transit_to(JUMPSTART_DEPS); | ||
640 | } | 577 | } |
641 | 578 | ||
642 | /* | 579 | /* |
643 | * drop an object's attachments | 580 | * Wake up this object's dependent objects now that we've become available. |
644 | */ | 581 | */ |
645 | static void fscache_drop_object(struct fscache_object *object) | 582 | static const struct fscache_state *fscache_jumpstart_dependents(struct fscache_object *object, |
583 | int event) | ||
646 | { | 584 | { |
647 | struct fscache_object *parent = object->parent; | 585 | _enter("{OBJ%x},%d", object->debug_id, event); |
648 | struct fscache_cache *cache = object->cache; | ||
649 | 586 | ||
650 | _enter("{OBJ%x,%d}", object->debug_id, object->n_children); | 587 | if (!fscache_enqueue_dependents(object, FSCACHE_OBJECT_EV_PARENT_READY)) |
588 | return NO_TRANSIT; /* Not finished; requeue */ | ||
589 | return transit_to(WAIT_FOR_CMD); | ||
590 | } | ||
651 | 591 | ||
652 | ASSERTCMP(object->cookie, ==, NULL); | 592 | /* |
653 | ASSERT(hlist_unhashed(&object->cookie_link)); | 593 | * Handle lookup or creation failute. |
594 | */ | ||
595 | static const struct fscache_state *fscache_lookup_failure(struct fscache_object *object, | ||
596 | int event) | ||
597 | { | ||
598 | struct fscache_cookie *cookie; | ||
654 | 599 | ||
655 | spin_lock(&cache->object_list_lock); | 600 | _enter("{OBJ%x},%d", object->debug_id, event); |
656 | list_del_init(&object->cache_link); | ||
657 | spin_unlock(&cache->object_list_lock); | ||
658 | 601 | ||
659 | fscache_stat(&fscache_n_cop_drop_object); | 602 | object->oob_event_mask = 0; |
660 | cache->ops->drop_object(object); | ||
661 | fscache_stat_d(&fscache_n_cop_drop_object); | ||
662 | 603 | ||
663 | if (parent) { | 604 | fscache_stat(&fscache_n_cop_lookup_complete); |
664 | _debug("release parent OBJ%x {%d}", | 605 | object->cache->ops->lookup_complete(object); |
665 | parent->debug_id, parent->n_children); | 606 | fscache_stat_d(&fscache_n_cop_lookup_complete); |
666 | 607 | ||
667 | spin_lock(&parent->lock); | 608 | cookie = object->cookie; |
668 | parent->n_children--; | 609 | set_bit(FSCACHE_COOKIE_UNAVAILABLE, &cookie->flags); |
669 | if (parent->n_children == 0) | 610 | if (test_and_clear_bit(FSCACHE_COOKIE_LOOKING_UP, &cookie->flags)) |
670 | fscache_raise_event(parent, FSCACHE_OBJECT_EV_CLEARED); | 611 | wake_up_bit(&cookie->flags, FSCACHE_COOKIE_LOOKING_UP); |
671 | spin_unlock(&parent->lock); | 612 | |
672 | object->parent = NULL; | 613 | fscache_done_parent_op(object); |
614 | return transit_to(KILL_OBJECT); | ||
615 | } | ||
616 | |||
617 | /* | ||
618 | * Wait for completion of all active operations on this object and the death of | ||
619 | * all child objects of this object. | ||
620 | */ | ||
621 | static const struct fscache_state *fscache_kill_object(struct fscache_object *object, | ||
622 | int event) | ||
623 | { | ||
624 | _enter("{OBJ%x,%d,%d},%d", | ||
625 | object->debug_id, object->n_ops, object->n_children, event); | ||
626 | |||
627 | clear_bit(FSCACHE_OBJECT_IS_LIVE, &object->flags); | ||
628 | object->oob_event_mask = 0; | ||
629 | |||
630 | if (list_empty(&object->dependents) && | ||
631 | object->n_ops == 0 && | ||
632 | object->n_children == 0) | ||
633 | return transit_to(DROP_OBJECT); | ||
634 | |||
635 | if (object->n_in_progress == 0) { | ||
636 | spin_lock(&object->lock); | ||
637 | if (object->n_ops > 0 && object->n_in_progress == 0) | ||
638 | fscache_start_operations(object); | ||
639 | spin_unlock(&object->lock); | ||
673 | } | 640 | } |
674 | 641 | ||
675 | /* this just shifts the object release to the work processor */ | 642 | if (!list_empty(&object->dependents)) |
676 | fscache_put_object(object); | 643 | return transit_to(KILL_DEPENDENTS); |
677 | 644 | ||
678 | _leave(""); | 645 | return transit_to(WAIT_FOR_CLEARANCE); |
679 | } | 646 | } |
680 | 647 | ||
681 | /* | 648 | /* |
682 | * release or recycle an object that the netfs has discarded | 649 | * Kill dependent objects. |
683 | */ | 650 | */ |
684 | static void fscache_release_object(struct fscache_object *object) | 651 | static const struct fscache_state *fscache_kill_dependents(struct fscache_object *object, |
652 | int event) | ||
685 | { | 653 | { |
686 | _enter(""); | 654 | _enter("{OBJ%x},%d", object->debug_id, event); |
687 | 655 | ||
688 | fscache_drop_object(object); | 656 | if (!fscache_enqueue_dependents(object, FSCACHE_OBJECT_EV_KILL)) |
657 | return NO_TRANSIT; /* Not finished */ | ||
658 | return transit_to(WAIT_FOR_CLEARANCE); | ||
689 | } | 659 | } |
690 | 660 | ||
691 | /* | 661 | /* |
692 | * withdraw an object from active service | 662 | * Drop an object's attachments |
693 | */ | 663 | */ |
694 | static void fscache_withdraw_object(struct fscache_object *object) | 664 | static const struct fscache_state *fscache_drop_object(struct fscache_object *object, |
665 | int event) | ||
695 | { | 666 | { |
696 | struct fscache_cookie *cookie; | 667 | struct fscache_object *parent = object->parent; |
697 | bool detached; | 668 | struct fscache_cookie *cookie = object->cookie; |
669 | struct fscache_cache *cache = object->cache; | ||
670 | bool awaken = false; | ||
698 | 671 | ||
699 | _enter(""); | 672 | _enter("{OBJ%x,%d},%d", object->debug_id, object->n_children, event); |
700 | 673 | ||
701 | spin_lock(&object->lock); | 674 | ASSERT(cookie != NULL); |
702 | cookie = object->cookie; | 675 | ASSERT(!hlist_unhashed(&object->cookie_link)); |
703 | if (cookie) { | ||
704 | /* need to get the cookie lock before the object lock, starting | ||
705 | * from the object pointer */ | ||
706 | atomic_inc(&cookie->usage); | ||
707 | spin_unlock(&object->lock); | ||
708 | 676 | ||
709 | detached = false; | 677 | /* Make sure the cookie no longer points here and that the netfs isn't |
710 | spin_lock(&cookie->lock); | 678 | * waiting for us. |
711 | spin_lock(&object->lock); | 679 | */ |
680 | spin_lock(&cookie->lock); | ||
681 | hlist_del_init(&object->cookie_link); | ||
682 | if (test_and_clear_bit(FSCACHE_COOKIE_INVALIDATING, &cookie->flags)) | ||
683 | awaken = true; | ||
684 | spin_unlock(&cookie->lock); | ||
712 | 685 | ||
713 | if (object->cookie == cookie) { | 686 | if (awaken) |
714 | hlist_del_init(&object->cookie_link); | 687 | wake_up_bit(&cookie->flags, FSCACHE_COOKIE_INVALIDATING); |
715 | object->cookie = NULL; | ||
716 | fscache_invalidation_complete(cookie); | ||
717 | detached = true; | ||
718 | } | ||
719 | spin_unlock(&cookie->lock); | ||
720 | fscache_cookie_put(cookie); | ||
721 | if (detached) | ||
722 | fscache_cookie_put(cookie); | ||
723 | } | ||
724 | 688 | ||
689 | /* Prevent a race with our last child, which has to signal EV_CLEARED | ||
690 | * before dropping our spinlock. | ||
691 | */ | ||
692 | spin_lock(&object->lock); | ||
725 | spin_unlock(&object->lock); | 693 | spin_unlock(&object->lock); |
726 | 694 | ||
727 | fscache_drop_object(object); | 695 | /* Discard from the cache's collection of objects */ |
728 | } | 696 | spin_lock(&cache->object_list_lock); |
697 | list_del_init(&object->cache_link); | ||
698 | spin_unlock(&cache->object_list_lock); | ||
729 | 699 | ||
730 | /* | 700 | fscache_stat(&fscache_n_cop_drop_object); |
731 | * withdraw an object from active service at the behest of the cache | 701 | cache->ops->drop_object(object); |
732 | * - need break the links to a cached object cookie | 702 | fscache_stat_d(&fscache_n_cop_drop_object); |
733 | * - called under two situations: | ||
734 | * (1) recycler decides to reclaim an in-use object | ||
735 | * (2) a cache is unmounted | ||
736 | * - have to take care as the cookie can be being relinquished by the netfs | ||
737 | * simultaneously | ||
738 | * - the object is pinned by the caller holding a refcount on it | ||
739 | */ | ||
740 | void fscache_withdrawing_object(struct fscache_cache *cache, | ||
741 | struct fscache_object *object) | ||
742 | { | ||
743 | bool enqueue = false; | ||
744 | 703 | ||
745 | _enter(",OBJ%x", object->debug_id); | 704 | /* The parent object wants to know when all it dependents have gone */ |
705 | if (parent) { | ||
706 | _debug("release parent OBJ%x {%d}", | ||
707 | parent->debug_id, parent->n_children); | ||
746 | 708 | ||
747 | spin_lock(&object->lock); | 709 | spin_lock(&parent->lock); |
748 | if (object->state < FSCACHE_OBJECT_WITHDRAWING) { | 710 | parent->n_children--; |
749 | object->state = FSCACHE_OBJECT_WITHDRAWING; | 711 | if (parent->n_children == 0) |
750 | enqueue = true; | 712 | fscache_raise_event(parent, FSCACHE_OBJECT_EV_CLEARED); |
713 | spin_unlock(&parent->lock); | ||
714 | object->parent = NULL; | ||
751 | } | 715 | } |
752 | spin_unlock(&object->lock); | ||
753 | 716 | ||
754 | if (enqueue) | 717 | /* this just shifts the object release to the work processor */ |
755 | fscache_enqueue_object(object); | 718 | fscache_put_object(object); |
719 | fscache_stat(&fscache_n_object_dead); | ||
756 | 720 | ||
757 | _leave(""); | 721 | _leave(""); |
722 | return transit_to(OBJECT_DEAD); | ||
758 | } | 723 | } |
759 | 724 | ||
760 | /* | 725 | /* |
@@ -771,7 +736,7 @@ static int fscache_get_object(struct fscache_object *object) | |||
771 | } | 736 | } |
772 | 737 | ||
773 | /* | 738 | /* |
774 | * discard a ref on a work item | 739 | * Discard a ref on an object |
775 | */ | 740 | */ |
776 | static void fscache_put_object(struct fscache_object *object) | 741 | static void fscache_put_object(struct fscache_object *object) |
777 | { | 742 | { |
@@ -780,6 +745,22 @@ static void fscache_put_object(struct fscache_object *object) | |||
780 | fscache_stat_d(&fscache_n_cop_put_object); | 745 | fscache_stat_d(&fscache_n_cop_put_object); |
781 | } | 746 | } |
782 | 747 | ||
748 | /** | ||
749 | * fscache_object_destroy - Note that a cache object is about to be destroyed | ||
750 | * @object: The object to be destroyed | ||
751 | * | ||
752 | * Note the imminent destruction and deallocation of a cache object record. | ||
753 | */ | ||
754 | void fscache_object_destroy(struct fscache_object *object) | ||
755 | { | ||
756 | fscache_objlist_remove(object); | ||
757 | |||
758 | /* We can get rid of the cookie now */ | ||
759 | fscache_cookie_put(object->cookie); | ||
760 | object->cookie = NULL; | ||
761 | } | ||
762 | EXPORT_SYMBOL(fscache_object_destroy); | ||
763 | |||
783 | /* | 764 | /* |
784 | * enqueue an object for metadata-type processing | 765 | * enqueue an object for metadata-type processing |
785 | */ | 766 | */ |
@@ -803,7 +784,7 @@ void fscache_enqueue_object(struct fscache_object *object) | |||
803 | 784 | ||
804 | /** | 785 | /** |
805 | * fscache_object_sleep_till_congested - Sleep until object wq is congested | 786 | * fscache_object_sleep_till_congested - Sleep until object wq is congested |
806 | * @timoutp: Scheduler sleep timeout | 787 | * @timeoutp: Scheduler sleep timeout |
807 | * | 788 | * |
808 | * Allow an object handler to sleep until the object workqueue is congested. | 789 | * Allow an object handler to sleep until the object workqueue is congested. |
809 | * | 790 | * |
@@ -831,18 +812,21 @@ bool fscache_object_sleep_till_congested(signed long *timeoutp) | |||
831 | EXPORT_SYMBOL_GPL(fscache_object_sleep_till_congested); | 812 | EXPORT_SYMBOL_GPL(fscache_object_sleep_till_congested); |
832 | 813 | ||
833 | /* | 814 | /* |
834 | * enqueue the dependents of an object for metadata-type processing | 815 | * Enqueue the dependents of an object for metadata-type processing. |
835 | * - the caller must hold the object's lock | 816 | * |
836 | * - this may cause an already locked object to wind up being processed again | 817 | * If we don't manage to finish the list before the scheduler wants to run |
818 | * again then return false immediately. We return true if the list was | ||
819 | * cleared. | ||
837 | */ | 820 | */ |
838 | static void fscache_enqueue_dependents(struct fscache_object *object) | 821 | static bool fscache_enqueue_dependents(struct fscache_object *object, int event) |
839 | { | 822 | { |
840 | struct fscache_object *dep; | 823 | struct fscache_object *dep; |
824 | bool ret = true; | ||
841 | 825 | ||
842 | _enter("{OBJ%x}", object->debug_id); | 826 | _enter("{OBJ%x}", object->debug_id); |
843 | 827 | ||
844 | if (list_empty(&object->dependents)) | 828 | if (list_empty(&object->dependents)) |
845 | return; | 829 | return true; |
846 | 830 | ||
847 | spin_lock(&object->lock); | 831 | spin_lock(&object->lock); |
848 | 832 | ||
@@ -851,23 +835,23 @@ static void fscache_enqueue_dependents(struct fscache_object *object) | |||
851 | struct fscache_object, dep_link); | 835 | struct fscache_object, dep_link); |
852 | list_del_init(&dep->dep_link); | 836 | list_del_init(&dep->dep_link); |
853 | 837 | ||
854 | 838 | fscache_raise_event(dep, event); | |
855 | /* sort onto appropriate lists */ | ||
856 | fscache_enqueue_object(dep); | ||
857 | fscache_put_object(dep); | 839 | fscache_put_object(dep); |
858 | 840 | ||
859 | if (!list_empty(&object->dependents)) | 841 | if (!list_empty(&object->dependents) && need_resched()) { |
860 | cond_resched_lock(&object->lock); | 842 | ret = false; |
843 | break; | ||
844 | } | ||
861 | } | 845 | } |
862 | 846 | ||
863 | spin_unlock(&object->lock); | 847 | spin_unlock(&object->lock); |
848 | return ret; | ||
864 | } | 849 | } |
865 | 850 | ||
866 | /* | 851 | /* |
867 | * remove an object from whatever queue it's waiting on | 852 | * remove an object from whatever queue it's waiting on |
868 | * - the caller must hold object->lock | ||
869 | */ | 853 | */ |
870 | void fscache_dequeue_object(struct fscache_object *object) | 854 | static void fscache_dequeue_object(struct fscache_object *object) |
871 | { | 855 | { |
872 | _enter("{OBJ%x}", object->debug_id); | 856 | _enter("{OBJ%x}", object->debug_id); |
873 | 857 | ||
@@ -886,7 +870,10 @@ void fscache_dequeue_object(struct fscache_object *object) | |||
886 | * @data: The auxiliary data for the object | 870 | * @data: The auxiliary data for the object |
887 | * @datalen: The size of the auxiliary data | 871 | * @datalen: The size of the auxiliary data |
888 | * | 872 | * |
889 | * This function consults the netfs about the coherency state of an object | 873 | * This function consults the netfs about the coherency state of an object. |
874 | * The caller must be holding a ref on cookie->n_active (held by | ||
875 | * fscache_look_up_object() on behalf of the cache backend during object lookup | ||
876 | * and creation). | ||
890 | */ | 877 | */ |
891 | enum fscache_checkaux fscache_check_aux(struct fscache_object *object, | 878 | enum fscache_checkaux fscache_check_aux(struct fscache_object *object, |
892 | const void *data, uint16_t datalen) | 879 | const void *data, uint16_t datalen) |
@@ -927,12 +914,23 @@ EXPORT_SYMBOL(fscache_check_aux); | |||
927 | /* | 914 | /* |
928 | * Asynchronously invalidate an object. | 915 | * Asynchronously invalidate an object. |
929 | */ | 916 | */ |
930 | static void fscache_invalidate_object(struct fscache_object *object) | 917 | static const struct fscache_state *_fscache_invalidate_object(struct fscache_object *object, |
918 | int event) | ||
931 | { | 919 | { |
932 | struct fscache_operation *op; | 920 | struct fscache_operation *op; |
933 | struct fscache_cookie *cookie = object->cookie; | 921 | struct fscache_cookie *cookie = object->cookie; |
934 | 922 | ||
935 | _enter("{OBJ%x}", object->debug_id); | 923 | _enter("{OBJ%x},%d", object->debug_id, event); |
924 | |||
925 | /* We're going to need the cookie. If the cookie is not available then | ||
926 | * retire the object instead. | ||
927 | */ | ||
928 | if (!fscache_use_cookie(object)) { | ||
929 | ASSERT(object->cookie->stores.rnode == NULL); | ||
930 | set_bit(FSCACHE_COOKIE_RETIRED, &cookie->flags); | ||
931 | _leave(" [no cookie]"); | ||
932 | return transit_to(KILL_OBJECT); | ||
933 | } | ||
936 | 934 | ||
937 | /* Reject any new read/write ops and abort any that are pending. */ | 935 | /* Reject any new read/write ops and abort any that are pending. */ |
938 | fscache_invalidate_writes(cookie); | 936 | fscache_invalidate_writes(cookie); |
@@ -941,14 +939,13 @@ static void fscache_invalidate_object(struct fscache_object *object) | |||
941 | 939 | ||
942 | /* Now we have to wait for in-progress reads and writes */ | 940 | /* Now we have to wait for in-progress reads and writes */ |
943 | op = kzalloc(sizeof(*op), GFP_KERNEL); | 941 | op = kzalloc(sizeof(*op), GFP_KERNEL); |
944 | if (!op) { | 942 | if (!op) |
945 | fscache_raise_event(object, FSCACHE_OBJECT_EV_ERROR); | 943 | goto nomem; |
946 | _leave(" [ENOMEM]"); | ||
947 | return; | ||
948 | } | ||
949 | 944 | ||
950 | fscache_operation_init(op, object->cache->ops->invalidate_object, NULL); | 945 | fscache_operation_init(op, object->cache->ops->invalidate_object, NULL); |
951 | op->flags = FSCACHE_OP_ASYNC | (1 << FSCACHE_OP_EXCLUSIVE); | 946 | op->flags = FSCACHE_OP_ASYNC | |
947 | (1 << FSCACHE_OP_EXCLUSIVE) | | ||
948 | (1 << FSCACHE_OP_UNUSE_COOKIE); | ||
952 | 949 | ||
953 | spin_lock(&cookie->lock); | 950 | spin_lock(&cookie->lock); |
954 | if (fscache_submit_exclusive_op(object, op) < 0) | 951 | if (fscache_submit_exclusive_op(object, op) < 0) |
@@ -965,13 +962,50 @@ static void fscache_invalidate_object(struct fscache_object *object) | |||
965 | /* We can allow read and write requests to come in once again. They'll | 962 | /* We can allow read and write requests to come in once again. They'll |
966 | * queue up behind our exclusive invalidation operation. | 963 | * queue up behind our exclusive invalidation operation. |
967 | */ | 964 | */ |
968 | fscache_invalidation_complete(cookie); | 965 | if (test_and_clear_bit(FSCACHE_COOKIE_INVALIDATING, &cookie->flags)) |
969 | _leave(""); | 966 | wake_up_bit(&cookie->flags, FSCACHE_COOKIE_INVALIDATING); |
970 | return; | 967 | _leave(" [ok]"); |
968 | return transit_to(UPDATE_OBJECT); | ||
969 | |||
970 | nomem: | ||
971 | clear_bit(FSCACHE_OBJECT_IS_LIVE, &object->flags); | ||
972 | fscache_unuse_cookie(object); | ||
973 | _leave(" [ENOMEM]"); | ||
974 | return transit_to(KILL_OBJECT); | ||
971 | 975 | ||
972 | submit_op_failed: | 976 | submit_op_failed: |
977 | clear_bit(FSCACHE_OBJECT_IS_LIVE, &object->flags); | ||
973 | spin_unlock(&cookie->lock); | 978 | spin_unlock(&cookie->lock); |
974 | kfree(op); | 979 | kfree(op); |
975 | fscache_raise_event(object, FSCACHE_OBJECT_EV_ERROR); | ||
976 | _leave(" [EIO]"); | 980 | _leave(" [EIO]"); |
981 | return transit_to(KILL_OBJECT); | ||
982 | } | ||
983 | |||
984 | static const struct fscache_state *fscache_invalidate_object(struct fscache_object *object, | ||
985 | int event) | ||
986 | { | ||
987 | const struct fscache_state *s; | ||
988 | |||
989 | fscache_stat(&fscache_n_invalidates_run); | ||
990 | fscache_stat(&fscache_n_cop_invalidate_object); | ||
991 | s = _fscache_invalidate_object(object, event); | ||
992 | fscache_stat_d(&fscache_n_cop_invalidate_object); | ||
993 | return s; | ||
994 | } | ||
995 | |||
996 | /* | ||
997 | * Asynchronously update an object. | ||
998 | */ | ||
999 | static const struct fscache_state *fscache_update_object(struct fscache_object *object, | ||
1000 | int event) | ||
1001 | { | ||
1002 | _enter("{OBJ%x},%d", object->debug_id, event); | ||
1003 | |||
1004 | fscache_stat(&fscache_n_updates_run); | ||
1005 | fscache_stat(&fscache_n_cop_update_object); | ||
1006 | object->cache->ops->update_object(object); | ||
1007 | fscache_stat_d(&fscache_n_cop_update_object); | ||
1008 | |||
1009 | _leave(""); | ||
1010 | return transit_to(WAIT_FOR_CMD); | ||
977 | } | 1011 | } |
diff --git a/fs/fscache/operation.c b/fs/fscache/operation.c index 762a9ec4ffa4..318071aca217 100644 --- a/fs/fscache/operation.c +++ b/fs/fscache/operation.c | |||
@@ -35,7 +35,7 @@ void fscache_enqueue_operation(struct fscache_operation *op) | |||
35 | 35 | ||
36 | ASSERT(list_empty(&op->pend_link)); | 36 | ASSERT(list_empty(&op->pend_link)); |
37 | ASSERT(op->processor != NULL); | 37 | ASSERT(op->processor != NULL); |
38 | ASSERTCMP(op->object->state, >=, FSCACHE_OBJECT_AVAILABLE); | 38 | ASSERT(fscache_object_is_available(op->object)); |
39 | ASSERTCMP(atomic_read(&op->usage), >, 0); | 39 | ASSERTCMP(atomic_read(&op->usage), >, 0); |
40 | ASSERTCMP(op->state, ==, FSCACHE_OP_ST_IN_PROGRESS); | 40 | ASSERTCMP(op->state, ==, FSCACHE_OP_ST_IN_PROGRESS); |
41 | 41 | ||
@@ -119,7 +119,7 @@ int fscache_submit_exclusive_op(struct fscache_object *object, | |||
119 | /* need to issue a new write op after this */ | 119 | /* need to issue a new write op after this */ |
120 | clear_bit(FSCACHE_OBJECT_PENDING_WRITE, &object->flags); | 120 | clear_bit(FSCACHE_OBJECT_PENDING_WRITE, &object->flags); |
121 | ret = 0; | 121 | ret = 0; |
122 | } else if (object->state == FSCACHE_OBJECT_CREATING) { | 122 | } else if (test_bit(FSCACHE_OBJECT_IS_LOOKED_UP, &object->flags)) { |
123 | op->object = object; | 123 | op->object = object; |
124 | object->n_ops++; | 124 | object->n_ops++; |
125 | object->n_exclusive++; /* reads and writes must wait */ | 125 | object->n_exclusive++; /* reads and writes must wait */ |
@@ -144,7 +144,7 @@ int fscache_submit_exclusive_op(struct fscache_object *object, | |||
144 | */ | 144 | */ |
145 | static void fscache_report_unexpected_submission(struct fscache_object *object, | 145 | static void fscache_report_unexpected_submission(struct fscache_object *object, |
146 | struct fscache_operation *op, | 146 | struct fscache_operation *op, |
147 | unsigned long ostate) | 147 | const struct fscache_state *ostate) |
148 | { | 148 | { |
149 | static bool once_only; | 149 | static bool once_only; |
150 | struct fscache_operation *p; | 150 | struct fscache_operation *p; |
@@ -155,11 +155,8 @@ static void fscache_report_unexpected_submission(struct fscache_object *object, | |||
155 | once_only = true; | 155 | once_only = true; |
156 | 156 | ||
157 | kdebug("unexpected submission OP%x [OBJ%x %s]", | 157 | kdebug("unexpected submission OP%x [OBJ%x %s]", |
158 | op->debug_id, object->debug_id, | 158 | op->debug_id, object->debug_id, object->state->name); |
159 | fscache_object_states[object->state]); | 159 | kdebug("objstate=%s [%s]", object->state->name, ostate->name); |
160 | kdebug("objstate=%s [%s]", | ||
161 | fscache_object_states[object->state], | ||
162 | fscache_object_states[ostate]); | ||
163 | kdebug("objflags=%lx", object->flags); | 160 | kdebug("objflags=%lx", object->flags); |
164 | kdebug("objevent=%lx [%lx]", object->events, object->event_mask); | 161 | kdebug("objevent=%lx [%lx]", object->events, object->event_mask); |
165 | kdebug("ops=%u inp=%u exc=%u", | 162 | kdebug("ops=%u inp=%u exc=%u", |
@@ -190,7 +187,7 @@ static void fscache_report_unexpected_submission(struct fscache_object *object, | |||
190 | int fscache_submit_op(struct fscache_object *object, | 187 | int fscache_submit_op(struct fscache_object *object, |
191 | struct fscache_operation *op) | 188 | struct fscache_operation *op) |
192 | { | 189 | { |
193 | unsigned long ostate; | 190 | const struct fscache_state *ostate; |
194 | int ret; | 191 | int ret; |
195 | 192 | ||
196 | _enter("{OBJ%x OP%x},{%u}", | 193 | _enter("{OBJ%x OP%x},{%u}", |
@@ -226,16 +223,14 @@ int fscache_submit_op(struct fscache_object *object, | |||
226 | fscache_run_op(object, op); | 223 | fscache_run_op(object, op); |
227 | } | 224 | } |
228 | ret = 0; | 225 | ret = 0; |
229 | } else if (object->state == FSCACHE_OBJECT_CREATING) { | 226 | } else if (test_bit(FSCACHE_OBJECT_IS_LOOKED_UP, &object->flags)) { |
230 | op->object = object; | 227 | op->object = object; |
231 | object->n_ops++; | 228 | object->n_ops++; |
232 | atomic_inc(&op->usage); | 229 | atomic_inc(&op->usage); |
233 | list_add_tail(&op->pend_link, &object->pending_ops); | 230 | list_add_tail(&op->pend_link, &object->pending_ops); |
234 | fscache_stat(&fscache_n_op_pend); | 231 | fscache_stat(&fscache_n_op_pend); |
235 | ret = 0; | 232 | ret = 0; |
236 | } else if (object->state == FSCACHE_OBJECT_DYING || | 233 | } else if (fscache_object_is_dying(object)) { |
237 | object->state == FSCACHE_OBJECT_LC_DYING || | ||
238 | object->state == FSCACHE_OBJECT_WITHDRAWING) { | ||
239 | fscache_stat(&fscache_n_op_rejected); | 234 | fscache_stat(&fscache_n_op_rejected); |
240 | op->state = FSCACHE_OP_ST_CANCELLED; | 235 | op->state = FSCACHE_OP_ST_CANCELLED; |
241 | ret = -ENOBUFS; | 236 | ret = -ENOBUFS; |
@@ -265,8 +260,8 @@ void fscache_abort_object(struct fscache_object *object) | |||
265 | } | 260 | } |
266 | 261 | ||
267 | /* | 262 | /* |
268 | * jump start the operation processing on an object | 263 | * Jump start the operation processing on an object. The caller must hold |
269 | * - caller must hold object->lock | 264 | * object->lock. |
270 | */ | 265 | */ |
271 | void fscache_start_operations(struct fscache_object *object) | 266 | void fscache_start_operations(struct fscache_object *object) |
272 | { | 267 | { |
@@ -428,14 +423,10 @@ void fscache_put_operation(struct fscache_operation *op) | |||
428 | 423 | ||
429 | object = op->object; | 424 | object = op->object; |
430 | 425 | ||
431 | if (test_bit(FSCACHE_OP_DEC_READ_CNT, &op->flags)) { | 426 | if (test_bit(FSCACHE_OP_DEC_READ_CNT, &op->flags)) |
432 | if (atomic_dec_and_test(&object->n_reads)) { | 427 | atomic_dec(&object->n_reads); |
433 | clear_bit(FSCACHE_COOKIE_WAITING_ON_READS, | 428 | if (test_bit(FSCACHE_OP_UNUSE_COOKIE, &op->flags)) |
434 | &object->cookie->flags); | 429 | fscache_unuse_cookie(object); |
435 | wake_up_bit(&object->cookie->flags, | ||
436 | FSCACHE_COOKIE_WAITING_ON_READS); | ||
437 | } | ||
438 | } | ||
439 | 430 | ||
440 | /* now... we may get called with the object spinlock held, so we | 431 | /* now... we may get called with the object spinlock held, so we |
441 | * complete the cleanup here only if we can immediately acquire the | 432 | * complete the cleanup here only if we can immediately acquire the |
diff --git a/fs/fscache/page.c b/fs/fscache/page.c index ff000e52072d..d479ab3c63e4 100644 --- a/fs/fscache/page.c +++ b/fs/fscache/page.c | |||
@@ -109,7 +109,7 @@ page_busy: | |||
109 | * allocator as the work threads writing to the cache may all end up | 109 | * allocator as the work threads writing to the cache may all end up |
110 | * sleeping on memory allocation, so we may need to impose a timeout | 110 | * sleeping on memory allocation, so we may need to impose a timeout |
111 | * too. */ | 111 | * too. */ |
112 | if (!(gfp & __GFP_WAIT)) { | 112 | if (!(gfp & __GFP_WAIT) || !(gfp & __GFP_FS)) { |
113 | fscache_stat(&fscache_n_store_vmscan_busy); | 113 | fscache_stat(&fscache_n_store_vmscan_busy); |
114 | return false; | 114 | return false; |
115 | } | 115 | } |
@@ -163,10 +163,12 @@ static void fscache_attr_changed_op(struct fscache_operation *op) | |||
163 | 163 | ||
164 | fscache_stat(&fscache_n_attr_changed_calls); | 164 | fscache_stat(&fscache_n_attr_changed_calls); |
165 | 165 | ||
166 | if (fscache_object_is_active(object)) { | 166 | if (fscache_object_is_active(object) && |
167 | fscache_use_cookie(object)) { | ||
167 | fscache_stat(&fscache_n_cop_attr_changed); | 168 | fscache_stat(&fscache_n_cop_attr_changed); |
168 | ret = object->cache->ops->attr_changed(object); | 169 | ret = object->cache->ops->attr_changed(object); |
169 | fscache_stat_d(&fscache_n_cop_attr_changed); | 170 | fscache_stat_d(&fscache_n_cop_attr_changed); |
171 | fscache_unuse_cookie(object); | ||
170 | if (ret < 0) | 172 | if (ret < 0) |
171 | fscache_abort_object(object); | 173 | fscache_abort_object(object); |
172 | } | 174 | } |
@@ -233,7 +235,7 @@ static void fscache_release_retrieval_op(struct fscache_operation *_op) | |||
233 | 235 | ||
234 | _enter("{OP%x}", op->op.debug_id); | 236 | _enter("{OP%x}", op->op.debug_id); |
235 | 237 | ||
236 | ASSERTCMP(op->n_pages, ==, 0); | 238 | ASSERTCMP(atomic_read(&op->n_pages), ==, 0); |
237 | 239 | ||
238 | fscache_hist(fscache_retrieval_histogram, op->start_time); | 240 | fscache_hist(fscache_retrieval_histogram, op->start_time); |
239 | if (op->context) | 241 | if (op->context) |
@@ -246,6 +248,7 @@ static void fscache_release_retrieval_op(struct fscache_operation *_op) | |||
246 | * allocate a retrieval op | 248 | * allocate a retrieval op |
247 | */ | 249 | */ |
248 | static struct fscache_retrieval *fscache_alloc_retrieval( | 250 | static struct fscache_retrieval *fscache_alloc_retrieval( |
251 | struct fscache_cookie *cookie, | ||
249 | struct address_space *mapping, | 252 | struct address_space *mapping, |
250 | fscache_rw_complete_t end_io_func, | 253 | fscache_rw_complete_t end_io_func, |
251 | void *context) | 254 | void *context) |
@@ -260,7 +263,10 @@ static struct fscache_retrieval *fscache_alloc_retrieval( | |||
260 | } | 263 | } |
261 | 264 | ||
262 | fscache_operation_init(&op->op, NULL, fscache_release_retrieval_op); | 265 | fscache_operation_init(&op->op, NULL, fscache_release_retrieval_op); |
263 | op->op.flags = FSCACHE_OP_MYTHREAD | (1 << FSCACHE_OP_WAITING); | 266 | atomic_inc(&cookie->n_active); |
267 | op->op.flags = FSCACHE_OP_MYTHREAD | | ||
268 | (1UL << FSCACHE_OP_WAITING) | | ||
269 | (1UL << FSCACHE_OP_UNUSE_COOKIE); | ||
264 | op->mapping = mapping; | 270 | op->mapping = mapping; |
265 | op->end_io_func = end_io_func; | 271 | op->end_io_func = end_io_func; |
266 | op->context = context; | 272 | op->context = context; |
@@ -310,7 +316,7 @@ static void fscache_do_cancel_retrieval(struct fscache_operation *_op) | |||
310 | struct fscache_retrieval *op = | 316 | struct fscache_retrieval *op = |
311 | container_of(_op, struct fscache_retrieval, op); | 317 | container_of(_op, struct fscache_retrieval, op); |
312 | 318 | ||
313 | op->n_pages = 0; | 319 | atomic_set(&op->n_pages, 0); |
314 | } | 320 | } |
315 | 321 | ||
316 | /* | 322 | /* |
@@ -394,12 +400,13 @@ int __fscache_read_or_alloc_page(struct fscache_cookie *cookie, | |||
394 | if (fscache_wait_for_deferred_lookup(cookie) < 0) | 400 | if (fscache_wait_for_deferred_lookup(cookie) < 0) |
395 | return -ERESTARTSYS; | 401 | return -ERESTARTSYS; |
396 | 402 | ||
397 | op = fscache_alloc_retrieval(page->mapping, end_io_func, context); | 403 | op = fscache_alloc_retrieval(cookie, page->mapping, |
404 | end_io_func,context); | ||
398 | if (!op) { | 405 | if (!op) { |
399 | _leave(" = -ENOMEM"); | 406 | _leave(" = -ENOMEM"); |
400 | return -ENOMEM; | 407 | return -ENOMEM; |
401 | } | 408 | } |
402 | op->n_pages = 1; | 409 | atomic_set(&op->n_pages, 1); |
403 | 410 | ||
404 | spin_lock(&cookie->lock); | 411 | spin_lock(&cookie->lock); |
405 | 412 | ||
@@ -408,7 +415,7 @@ int __fscache_read_or_alloc_page(struct fscache_cookie *cookie, | |||
408 | object = hlist_entry(cookie->backing_objects.first, | 415 | object = hlist_entry(cookie->backing_objects.first, |
409 | struct fscache_object, cookie_link); | 416 | struct fscache_object, cookie_link); |
410 | 417 | ||
411 | ASSERTCMP(object->state, >, FSCACHE_OBJECT_LOOKING_UP); | 418 | ASSERT(test_bit(FSCACHE_OBJECT_IS_LOOKED_UP, &object->flags)); |
412 | 419 | ||
413 | atomic_inc(&object->n_reads); | 420 | atomic_inc(&object->n_reads); |
414 | __set_bit(FSCACHE_OP_DEC_READ_CNT, &op->op.flags); | 421 | __set_bit(FSCACHE_OP_DEC_READ_CNT, &op->op.flags); |
@@ -465,6 +472,7 @@ nobufs_unlock_dec: | |||
465 | atomic_dec(&object->n_reads); | 472 | atomic_dec(&object->n_reads); |
466 | nobufs_unlock: | 473 | nobufs_unlock: |
467 | spin_unlock(&cookie->lock); | 474 | spin_unlock(&cookie->lock); |
475 | atomic_dec(&cookie->n_active); | ||
468 | kfree(op); | 476 | kfree(op); |
469 | nobufs: | 477 | nobufs: |
470 | fscache_stat(&fscache_n_retrievals_nobufs); | 478 | fscache_stat(&fscache_n_retrievals_nobufs); |
@@ -522,10 +530,10 @@ int __fscache_read_or_alloc_pages(struct fscache_cookie *cookie, | |||
522 | if (fscache_wait_for_deferred_lookup(cookie) < 0) | 530 | if (fscache_wait_for_deferred_lookup(cookie) < 0) |
523 | return -ERESTARTSYS; | 531 | return -ERESTARTSYS; |
524 | 532 | ||
525 | op = fscache_alloc_retrieval(mapping, end_io_func, context); | 533 | op = fscache_alloc_retrieval(cookie, mapping, end_io_func, context); |
526 | if (!op) | 534 | if (!op) |
527 | return -ENOMEM; | 535 | return -ENOMEM; |
528 | op->n_pages = *nr_pages; | 536 | atomic_set(&op->n_pages, *nr_pages); |
529 | 537 | ||
530 | spin_lock(&cookie->lock); | 538 | spin_lock(&cookie->lock); |
531 | 539 | ||
@@ -589,6 +597,7 @@ nobufs_unlock_dec: | |||
589 | atomic_dec(&object->n_reads); | 597 | atomic_dec(&object->n_reads); |
590 | nobufs_unlock: | 598 | nobufs_unlock: |
591 | spin_unlock(&cookie->lock); | 599 | spin_unlock(&cookie->lock); |
600 | atomic_dec(&cookie->n_active); | ||
592 | kfree(op); | 601 | kfree(op); |
593 | nobufs: | 602 | nobufs: |
594 | fscache_stat(&fscache_n_retrievals_nobufs); | 603 | fscache_stat(&fscache_n_retrievals_nobufs); |
@@ -631,10 +640,10 @@ int __fscache_alloc_page(struct fscache_cookie *cookie, | |||
631 | if (fscache_wait_for_deferred_lookup(cookie) < 0) | 640 | if (fscache_wait_for_deferred_lookup(cookie) < 0) |
632 | return -ERESTARTSYS; | 641 | return -ERESTARTSYS; |
633 | 642 | ||
634 | op = fscache_alloc_retrieval(page->mapping, NULL, NULL); | 643 | op = fscache_alloc_retrieval(cookie, page->mapping, NULL, NULL); |
635 | if (!op) | 644 | if (!op) |
636 | return -ENOMEM; | 645 | return -ENOMEM; |
637 | op->n_pages = 1; | 646 | atomic_set(&op->n_pages, 1); |
638 | 647 | ||
639 | spin_lock(&cookie->lock); | 648 | spin_lock(&cookie->lock); |
640 | 649 | ||
@@ -675,6 +684,7 @@ error: | |||
675 | 684 | ||
676 | nobufs_unlock: | 685 | nobufs_unlock: |
677 | spin_unlock(&cookie->lock); | 686 | spin_unlock(&cookie->lock); |
687 | atomic_dec(&cookie->n_active); | ||
678 | kfree(op); | 688 | kfree(op); |
679 | nobufs: | 689 | nobufs: |
680 | fscache_stat(&fscache_n_allocs_nobufs); | 690 | fscache_stat(&fscache_n_allocs_nobufs); |
@@ -729,8 +739,9 @@ static void fscache_write_op(struct fscache_operation *_op) | |||
729 | */ | 739 | */ |
730 | spin_unlock(&object->lock); | 740 | spin_unlock(&object->lock); |
731 | fscache_op_complete(&op->op, false); | 741 | fscache_op_complete(&op->op, false); |
732 | _leave(" [cancel] op{f=%lx s=%u} obj{s=%u f=%lx}", | 742 | _leave(" [cancel] op{f=%lx s=%u} obj{s=%s f=%lx}", |
733 | _op->flags, _op->state, object->state, object->flags); | 743 | _op->flags, _op->state, object->state->short_name, |
744 | object->flags); | ||
734 | return; | 745 | return; |
735 | } | 746 | } |
736 | 747 | ||
@@ -796,11 +807,16 @@ void fscache_invalidate_writes(struct fscache_cookie *cookie) | |||
796 | 807 | ||
797 | _enter(""); | 808 | _enter(""); |
798 | 809 | ||
799 | while (spin_lock(&cookie->stores_lock), | 810 | for (;;) { |
800 | n = radix_tree_gang_lookup_tag(&cookie->stores, results, 0, | 811 | spin_lock(&cookie->stores_lock); |
801 | ARRAY_SIZE(results), | 812 | n = radix_tree_gang_lookup_tag(&cookie->stores, results, 0, |
802 | FSCACHE_COOKIE_PENDING_TAG), | 813 | ARRAY_SIZE(results), |
803 | n > 0) { | 814 | FSCACHE_COOKIE_PENDING_TAG); |
815 | if (n == 0) { | ||
816 | spin_unlock(&cookie->stores_lock); | ||
817 | break; | ||
818 | } | ||
819 | |||
804 | for (i = n - 1; i >= 0; i--) { | 820 | for (i = n - 1; i >= 0; i--) { |
805 | page = results[i]; | 821 | page = results[i]; |
806 | radix_tree_delete(&cookie->stores, page->index); | 822 | radix_tree_delete(&cookie->stores, page->index); |
@@ -812,7 +828,6 @@ void fscache_invalidate_writes(struct fscache_cookie *cookie) | |||
812 | page_cache_release(results[i]); | 828 | page_cache_release(results[i]); |
813 | } | 829 | } |
814 | 830 | ||
815 | spin_unlock(&cookie->stores_lock); | ||
816 | _leave(""); | 831 | _leave(""); |
817 | } | 832 | } |
818 | 833 | ||
@@ -829,14 +844,12 @@ void fscache_invalidate_writes(struct fscache_cookie *cookie) | |||
829 | * (1) negative lookup, object not yet created (FSCACHE_COOKIE_CREATING is | 844 | * (1) negative lookup, object not yet created (FSCACHE_COOKIE_CREATING is |
830 | * set) | 845 | * set) |
831 | * | 846 | * |
832 | * (a) no writes yet (set FSCACHE_COOKIE_PENDING_FILL and queue deferred | 847 | * (a) no writes yet |
833 | * fill op) | ||
834 | * | 848 | * |
835 | * (b) writes deferred till post-creation (mark page for writing and | 849 | * (b) writes deferred till post-creation (mark page for writing and |
836 | * return immediately) | 850 | * return immediately) |
837 | * | 851 | * |
838 | * (2) negative lookup, object created, initial fill being made from netfs | 852 | * (2) negative lookup, object created, initial fill being made from netfs |
839 | * (FSCACHE_COOKIE_INITIAL_FILL is set) | ||
840 | * | 853 | * |
841 | * (a) fill point not yet reached this page (mark page for writing and | 854 | * (a) fill point not yet reached this page (mark page for writing and |
842 | * return) | 855 | * return) |
@@ -873,7 +886,9 @@ int __fscache_write_page(struct fscache_cookie *cookie, | |||
873 | 886 | ||
874 | fscache_operation_init(&op->op, fscache_write_op, | 887 | fscache_operation_init(&op->op, fscache_write_op, |
875 | fscache_release_write_op); | 888 | fscache_release_write_op); |
876 | op->op.flags = FSCACHE_OP_ASYNC | (1 << FSCACHE_OP_WAITING); | 889 | op->op.flags = FSCACHE_OP_ASYNC | |
890 | (1 << FSCACHE_OP_WAITING) | | ||
891 | (1 << FSCACHE_OP_UNUSE_COOKIE); | ||
877 | 892 | ||
878 | ret = radix_tree_preload(gfp & ~__GFP_HIGHMEM); | 893 | ret = radix_tree_preload(gfp & ~__GFP_HIGHMEM); |
879 | if (ret < 0) | 894 | if (ret < 0) |
@@ -919,6 +934,7 @@ int __fscache_write_page(struct fscache_cookie *cookie, | |||
919 | op->op.debug_id = atomic_inc_return(&fscache_op_debug_id); | 934 | op->op.debug_id = atomic_inc_return(&fscache_op_debug_id); |
920 | op->store_limit = object->store_limit; | 935 | op->store_limit = object->store_limit; |
921 | 936 | ||
937 | atomic_inc(&cookie->n_active); | ||
922 | if (fscache_submit_op(object, &op->op) < 0) | 938 | if (fscache_submit_op(object, &op->op) < 0) |
923 | goto submit_failed; | 939 | goto submit_failed; |
924 | 940 | ||
@@ -945,6 +961,7 @@ already_pending: | |||
945 | return 0; | 961 | return 0; |
946 | 962 | ||
947 | submit_failed: | 963 | submit_failed: |
964 | atomic_dec(&cookie->n_active); | ||
948 | spin_lock(&cookie->stores_lock); | 965 | spin_lock(&cookie->stores_lock); |
949 | radix_tree_delete(&cookie->stores, page->index); | 966 | radix_tree_delete(&cookie->stores, page->index); |
950 | spin_unlock(&cookie->stores_lock); | 967 | spin_unlock(&cookie->stores_lock); |
diff --git a/fs/fuse/dir.c b/fs/fuse/dir.c index f3f783dc4f75..72a5d5b04494 100644 --- a/fs/fuse/dir.c +++ b/fs/fuse/dir.c | |||
@@ -14,7 +14,7 @@ | |||
14 | #include <linux/namei.h> | 14 | #include <linux/namei.h> |
15 | #include <linux/slab.h> | 15 | #include <linux/slab.h> |
16 | 16 | ||
17 | static bool fuse_use_readdirplus(struct inode *dir, struct file *filp) | 17 | static bool fuse_use_readdirplus(struct inode *dir, struct dir_context *ctx) |
18 | { | 18 | { |
19 | struct fuse_conn *fc = get_fuse_conn(dir); | 19 | struct fuse_conn *fc = get_fuse_conn(dir); |
20 | struct fuse_inode *fi = get_fuse_inode(dir); | 20 | struct fuse_inode *fi = get_fuse_inode(dir); |
@@ -25,7 +25,7 @@ static bool fuse_use_readdirplus(struct inode *dir, struct file *filp) | |||
25 | return true; | 25 | return true; |
26 | if (test_and_clear_bit(FUSE_I_ADVISE_RDPLUS, &fi->state)) | 26 | if (test_and_clear_bit(FUSE_I_ADVISE_RDPLUS, &fi->state)) |
27 | return true; | 27 | return true; |
28 | if (filp->f_pos == 0) | 28 | if (ctx->pos == 0) |
29 | return true; | 29 | return true; |
30 | return false; | 30 | return false; |
31 | } | 31 | } |
@@ -1165,25 +1165,23 @@ static int fuse_permission(struct inode *inode, int mask) | |||
1165 | } | 1165 | } |
1166 | 1166 | ||
1167 | static int parse_dirfile(char *buf, size_t nbytes, struct file *file, | 1167 | static int parse_dirfile(char *buf, size_t nbytes, struct file *file, |
1168 | void *dstbuf, filldir_t filldir) | 1168 | struct dir_context *ctx) |
1169 | { | 1169 | { |
1170 | while (nbytes >= FUSE_NAME_OFFSET) { | 1170 | while (nbytes >= FUSE_NAME_OFFSET) { |
1171 | struct fuse_dirent *dirent = (struct fuse_dirent *) buf; | 1171 | struct fuse_dirent *dirent = (struct fuse_dirent *) buf; |
1172 | size_t reclen = FUSE_DIRENT_SIZE(dirent); | 1172 | size_t reclen = FUSE_DIRENT_SIZE(dirent); |
1173 | int over; | ||
1174 | if (!dirent->namelen || dirent->namelen > FUSE_NAME_MAX) | 1173 | if (!dirent->namelen || dirent->namelen > FUSE_NAME_MAX) |
1175 | return -EIO; | 1174 | return -EIO; |
1176 | if (reclen > nbytes) | 1175 | if (reclen > nbytes) |
1177 | break; | 1176 | break; |
1178 | 1177 | ||
1179 | over = filldir(dstbuf, dirent->name, dirent->namelen, | 1178 | if (!dir_emit(ctx, dirent->name, dirent->namelen, |
1180 | file->f_pos, dirent->ino, dirent->type); | 1179 | dirent->ino, dirent->type)) |
1181 | if (over) | ||
1182 | break; | 1180 | break; |
1183 | 1181 | ||
1184 | buf += reclen; | 1182 | buf += reclen; |
1185 | nbytes -= reclen; | 1183 | nbytes -= reclen; |
1186 | file->f_pos = dirent->off; | 1184 | ctx->pos = dirent->off; |
1187 | } | 1185 | } |
1188 | 1186 | ||
1189 | return 0; | 1187 | return 0; |
@@ -1225,30 +1223,46 @@ static int fuse_direntplus_link(struct file *file, | |||
1225 | if (name.name[1] == '.' && name.len == 2) | 1223 | if (name.name[1] == '.' && name.len == 2) |
1226 | return 0; | 1224 | return 0; |
1227 | } | 1225 | } |
1226 | |||
1227 | if (invalid_nodeid(o->nodeid)) | ||
1228 | return -EIO; | ||
1229 | if (!fuse_valid_type(o->attr.mode)) | ||
1230 | return -EIO; | ||
1231 | |||
1228 | fc = get_fuse_conn(dir); | 1232 | fc = get_fuse_conn(dir); |
1229 | 1233 | ||
1230 | name.hash = full_name_hash(name.name, name.len); | 1234 | name.hash = full_name_hash(name.name, name.len); |
1231 | dentry = d_lookup(parent, &name); | 1235 | dentry = d_lookup(parent, &name); |
1232 | if (dentry && dentry->d_inode) { | 1236 | if (dentry) { |
1233 | inode = dentry->d_inode; | 1237 | inode = dentry->d_inode; |
1234 | if (get_node_id(inode) == o->nodeid) { | 1238 | if (!inode) { |
1239 | d_drop(dentry); | ||
1240 | } else if (get_node_id(inode) != o->nodeid || | ||
1241 | ((o->attr.mode ^ inode->i_mode) & S_IFMT)) { | ||
1242 | err = d_invalidate(dentry); | ||
1243 | if (err) | ||
1244 | goto out; | ||
1245 | } else if (is_bad_inode(inode)) { | ||
1246 | err = -EIO; | ||
1247 | goto out; | ||
1248 | } else { | ||
1235 | struct fuse_inode *fi; | 1249 | struct fuse_inode *fi; |
1236 | fi = get_fuse_inode(inode); | 1250 | fi = get_fuse_inode(inode); |
1237 | spin_lock(&fc->lock); | 1251 | spin_lock(&fc->lock); |
1238 | fi->nlookup++; | 1252 | fi->nlookup++; |
1239 | spin_unlock(&fc->lock); | 1253 | spin_unlock(&fc->lock); |
1240 | 1254 | ||
1255 | fuse_change_attributes(inode, &o->attr, | ||
1256 | entry_attr_timeout(o), | ||
1257 | attr_version); | ||
1258 | |||
1241 | /* | 1259 | /* |
1242 | * The other branch to 'found' comes via fuse_iget() | 1260 | * The other branch to 'found' comes via fuse_iget() |
1243 | * which bumps nlookup inside | 1261 | * which bumps nlookup inside |
1244 | */ | 1262 | */ |
1245 | goto found; | 1263 | goto found; |
1246 | } | 1264 | } |
1247 | err = d_invalidate(dentry); | ||
1248 | if (err) | ||
1249 | goto out; | ||
1250 | dput(dentry); | 1265 | dput(dentry); |
1251 | dentry = NULL; | ||
1252 | } | 1266 | } |
1253 | 1267 | ||
1254 | dentry = d_alloc(parent, &name); | 1268 | dentry = d_alloc(parent, &name); |
@@ -1261,30 +1275,35 @@ static int fuse_direntplus_link(struct file *file, | |||
1261 | if (!inode) | 1275 | if (!inode) |
1262 | goto out; | 1276 | goto out; |
1263 | 1277 | ||
1264 | alias = d_materialise_unique(dentry, inode); | 1278 | if (S_ISDIR(inode->i_mode)) { |
1265 | err = PTR_ERR(alias); | 1279 | mutex_lock(&fc->inst_mutex); |
1266 | if (IS_ERR(alias)) | 1280 | alias = fuse_d_add_directory(dentry, inode); |
1267 | goto out; | 1281 | mutex_unlock(&fc->inst_mutex); |
1282 | err = PTR_ERR(alias); | ||
1283 | if (IS_ERR(alias)) { | ||
1284 | iput(inode); | ||
1285 | goto out; | ||
1286 | } | ||
1287 | } else { | ||
1288 | alias = d_splice_alias(inode, dentry); | ||
1289 | } | ||
1290 | |||
1268 | if (alias) { | 1291 | if (alias) { |
1269 | dput(dentry); | 1292 | dput(dentry); |
1270 | dentry = alias; | 1293 | dentry = alias; |
1271 | } | 1294 | } |
1272 | 1295 | ||
1273 | found: | 1296 | found: |
1274 | fuse_change_attributes(inode, &o->attr, entry_attr_timeout(o), | ||
1275 | attr_version); | ||
1276 | |||
1277 | fuse_change_entry_timeout(dentry, o); | 1297 | fuse_change_entry_timeout(dentry, o); |
1278 | 1298 | ||
1279 | err = 0; | 1299 | err = 0; |
1280 | out: | 1300 | out: |
1281 | if (dentry) | 1301 | dput(dentry); |
1282 | dput(dentry); | ||
1283 | return err; | 1302 | return err; |
1284 | } | 1303 | } |
1285 | 1304 | ||
1286 | static int parse_dirplusfile(char *buf, size_t nbytes, struct file *file, | 1305 | static int parse_dirplusfile(char *buf, size_t nbytes, struct file *file, |
1287 | void *dstbuf, filldir_t filldir, u64 attr_version) | 1306 | struct dir_context *ctx, u64 attr_version) |
1288 | { | 1307 | { |
1289 | struct fuse_direntplus *direntplus; | 1308 | struct fuse_direntplus *direntplus; |
1290 | struct fuse_dirent *dirent; | 1309 | struct fuse_dirent *dirent; |
@@ -1309,10 +1328,9 @@ static int parse_dirplusfile(char *buf, size_t nbytes, struct file *file, | |||
1309 | we need to send a FORGET for each of those | 1328 | we need to send a FORGET for each of those |
1310 | which we did not link. | 1329 | which we did not link. |
1311 | */ | 1330 | */ |
1312 | over = filldir(dstbuf, dirent->name, dirent->namelen, | 1331 | over = !dir_emit(ctx, dirent->name, dirent->namelen, |
1313 | file->f_pos, dirent->ino, | 1332 | dirent->ino, dirent->type); |
1314 | dirent->type); | 1333 | ctx->pos = dirent->off; |
1315 | file->f_pos = dirent->off; | ||
1316 | } | 1334 | } |
1317 | 1335 | ||
1318 | buf += reclen; | 1336 | buf += reclen; |
@@ -1326,7 +1344,7 @@ static int parse_dirplusfile(char *buf, size_t nbytes, struct file *file, | |||
1326 | return 0; | 1344 | return 0; |
1327 | } | 1345 | } |
1328 | 1346 | ||
1329 | static int fuse_readdir(struct file *file, void *dstbuf, filldir_t filldir) | 1347 | static int fuse_readdir(struct file *file, struct dir_context *ctx) |
1330 | { | 1348 | { |
1331 | int plus, err; | 1349 | int plus, err; |
1332 | size_t nbytes; | 1350 | size_t nbytes; |
@@ -1349,17 +1367,17 @@ static int fuse_readdir(struct file *file, void *dstbuf, filldir_t filldir) | |||
1349 | return -ENOMEM; | 1367 | return -ENOMEM; |
1350 | } | 1368 | } |
1351 | 1369 | ||
1352 | plus = fuse_use_readdirplus(inode, file); | 1370 | plus = fuse_use_readdirplus(inode, ctx); |
1353 | req->out.argpages = 1; | 1371 | req->out.argpages = 1; |
1354 | req->num_pages = 1; | 1372 | req->num_pages = 1; |
1355 | req->pages[0] = page; | 1373 | req->pages[0] = page; |
1356 | req->page_descs[0].length = PAGE_SIZE; | 1374 | req->page_descs[0].length = PAGE_SIZE; |
1357 | if (plus) { | 1375 | if (plus) { |
1358 | attr_version = fuse_get_attr_version(fc); | 1376 | attr_version = fuse_get_attr_version(fc); |
1359 | fuse_read_fill(req, file, file->f_pos, PAGE_SIZE, | 1377 | fuse_read_fill(req, file, ctx->pos, PAGE_SIZE, |
1360 | FUSE_READDIRPLUS); | 1378 | FUSE_READDIRPLUS); |
1361 | } else { | 1379 | } else { |
1362 | fuse_read_fill(req, file, file->f_pos, PAGE_SIZE, | 1380 | fuse_read_fill(req, file, ctx->pos, PAGE_SIZE, |
1363 | FUSE_READDIR); | 1381 | FUSE_READDIR); |
1364 | } | 1382 | } |
1365 | fuse_request_send(fc, req); | 1383 | fuse_request_send(fc, req); |
@@ -1369,11 +1387,11 @@ static int fuse_readdir(struct file *file, void *dstbuf, filldir_t filldir) | |||
1369 | if (!err) { | 1387 | if (!err) { |
1370 | if (plus) { | 1388 | if (plus) { |
1371 | err = parse_dirplusfile(page_address(page), nbytes, | 1389 | err = parse_dirplusfile(page_address(page), nbytes, |
1372 | file, dstbuf, filldir, | 1390 | file, ctx, |
1373 | attr_version); | 1391 | attr_version); |
1374 | } else { | 1392 | } else { |
1375 | err = parse_dirfile(page_address(page), nbytes, file, | 1393 | err = parse_dirfile(page_address(page), nbytes, file, |
1376 | dstbuf, filldir); | 1394 | ctx); |
1377 | } | 1395 | } |
1378 | } | 1396 | } |
1379 | 1397 | ||
@@ -1886,7 +1904,7 @@ static const struct inode_operations fuse_dir_inode_operations = { | |||
1886 | static const struct file_operations fuse_dir_operations = { | 1904 | static const struct file_operations fuse_dir_operations = { |
1887 | .llseek = generic_file_llseek, | 1905 | .llseek = generic_file_llseek, |
1888 | .read = generic_read_dir, | 1906 | .read = generic_read_dir, |
1889 | .readdir = fuse_readdir, | 1907 | .iterate = fuse_readdir, |
1890 | .open = fuse_dir_open, | 1908 | .open = fuse_dir_open, |
1891 | .release = fuse_dir_release, | 1909 | .release = fuse_dir_release, |
1892 | .fsync = fuse_dir_fsync, | 1910 | .fsync = fuse_dir_fsync, |
diff --git a/fs/fuse/file.c b/fs/fuse/file.c index 35f281033142..5c121fe19c5f 100644 --- a/fs/fuse/file.c +++ b/fs/fuse/file.c | |||
@@ -548,8 +548,7 @@ static void fuse_aio_complete(struct fuse_io_priv *io, int err, ssize_t pos) | |||
548 | res = io->bytes < 0 ? io->size : io->bytes; | 548 | res = io->bytes < 0 ? io->size : io->bytes; |
549 | 549 | ||
550 | if (!is_sync_kiocb(io->iocb)) { | 550 | if (!is_sync_kiocb(io->iocb)) { |
551 | struct path *path = &io->iocb->ki_filp->f_path; | 551 | struct inode *inode = file_inode(io->iocb->ki_filp); |
552 | struct inode *inode = path->dentry->d_inode; | ||
553 | struct fuse_conn *fc = get_fuse_conn(inode); | 552 | struct fuse_conn *fc = get_fuse_conn(inode); |
554 | struct fuse_inode *fi = get_fuse_inode(inode); | 553 | struct fuse_inode *fi = get_fuse_inode(inode); |
555 | 554 | ||
diff --git a/fs/fuse/inode.c b/fs/fuse/inode.c index 9a0cdde14a08..0b578598c6ac 100644 --- a/fs/fuse/inode.c +++ b/fs/fuse/inode.c | |||
@@ -785,7 +785,7 @@ static const struct super_operations fuse_super_operations = { | |||
785 | static void sanitize_global_limit(unsigned *limit) | 785 | static void sanitize_global_limit(unsigned *limit) |
786 | { | 786 | { |
787 | if (*limit == 0) | 787 | if (*limit == 0) |
788 | *limit = ((num_physpages << PAGE_SHIFT) >> 13) / | 788 | *limit = ((totalram_pages << PAGE_SHIFT) >> 13) / |
789 | sizeof(struct fuse_req); | 789 | sizeof(struct fuse_req); |
790 | 790 | ||
791 | if (*limit >= 1 << 16) | 791 | if (*limit >= 1 << 16) |
diff --git a/fs/gfs2/Kconfig b/fs/gfs2/Kconfig index 5a376ab81feb..90c6a8faaecb 100644 --- a/fs/gfs2/Kconfig +++ b/fs/gfs2/Kconfig | |||
@@ -20,13 +20,12 @@ config GFS2_FS | |||
20 | be found here: http://sources.redhat.com/cluster | 20 | be found here: http://sources.redhat.com/cluster |
21 | 21 | ||
22 | The "nolock" lock module is now built in to GFS2 by default. If | 22 | The "nolock" lock module is now built in to GFS2 by default. If |
23 | you want to use the DLM, be sure to enable HOTPLUG and IPv4/6 | 23 | you want to use the DLM, be sure to enable IPv4/6 networking. |
24 | networking. | ||
25 | 24 | ||
26 | config GFS2_FS_LOCKING_DLM | 25 | config GFS2_FS_LOCKING_DLM |
27 | bool "GFS2 DLM locking" | 26 | bool "GFS2 DLM locking" |
28 | depends on (GFS2_FS!=n) && NET && INET && (IPV6 || IPV6=n) && \ | 27 | depends on (GFS2_FS!=n) && NET && INET && (IPV6 || IPV6=n) && \ |
29 | HOTPLUG && CONFIGFS_FS && SYSFS && (DLM=y || DLM=GFS2_FS) | 28 | CONFIGFS_FS && SYSFS && (DLM=y || DLM=GFS2_FS) |
30 | help | 29 | help |
31 | Multiple node locking module for GFS2 | 30 | Multiple node locking module for GFS2 |
32 | 31 | ||
diff --git a/fs/gfs2/aops.c b/fs/gfs2/aops.c index 0bad69ed6336..ee48ad37d9c0 100644 --- a/fs/gfs2/aops.c +++ b/fs/gfs2/aops.c | |||
@@ -110,7 +110,7 @@ static int gfs2_writepage_common(struct page *page, | |||
110 | /* Is the page fully outside i_size? (truncate in progress) */ | 110 | /* Is the page fully outside i_size? (truncate in progress) */ |
111 | offset = i_size & (PAGE_CACHE_SIZE-1); | 111 | offset = i_size & (PAGE_CACHE_SIZE-1); |
112 | if (page->index > end_index || (page->index == end_index && !offset)) { | 112 | if (page->index > end_index || (page->index == end_index && !offset)) { |
113 | page->mapping->a_ops->invalidatepage(page, 0); | 113 | page->mapping->a_ops->invalidatepage(page, 0, PAGE_CACHE_SIZE); |
114 | goto out; | 114 | goto out; |
115 | } | 115 | } |
116 | return 1; | 116 | return 1; |
@@ -299,7 +299,8 @@ static int gfs2_write_jdata_pagevec(struct address_space *mapping, | |||
299 | 299 | ||
300 | /* Is the page fully outside i_size? (truncate in progress) */ | 300 | /* Is the page fully outside i_size? (truncate in progress) */ |
301 | if (page->index > end_index || (page->index == end_index && !offset)) { | 301 | if (page->index > end_index || (page->index == end_index && !offset)) { |
302 | page->mapping->a_ops->invalidatepage(page, 0); | 302 | page->mapping->a_ops->invalidatepage(page, 0, |
303 | PAGE_CACHE_SIZE); | ||
303 | unlock_page(page); | 304 | unlock_page(page); |
304 | continue; | 305 | continue; |
305 | } | 306 | } |
@@ -943,27 +944,33 @@ static void gfs2_discard(struct gfs2_sbd *sdp, struct buffer_head *bh) | |||
943 | unlock_buffer(bh); | 944 | unlock_buffer(bh); |
944 | } | 945 | } |
945 | 946 | ||
946 | static void gfs2_invalidatepage(struct page *page, unsigned long offset) | 947 | static void gfs2_invalidatepage(struct page *page, unsigned int offset, |
948 | unsigned int length) | ||
947 | { | 949 | { |
948 | struct gfs2_sbd *sdp = GFS2_SB(page->mapping->host); | 950 | struct gfs2_sbd *sdp = GFS2_SB(page->mapping->host); |
951 | unsigned int stop = offset + length; | ||
952 | int partial_page = (offset || length < PAGE_CACHE_SIZE); | ||
949 | struct buffer_head *bh, *head; | 953 | struct buffer_head *bh, *head; |
950 | unsigned long pos = 0; | 954 | unsigned long pos = 0; |
951 | 955 | ||
952 | BUG_ON(!PageLocked(page)); | 956 | BUG_ON(!PageLocked(page)); |
953 | if (offset == 0) | 957 | if (!partial_page) |
954 | ClearPageChecked(page); | 958 | ClearPageChecked(page); |
955 | if (!page_has_buffers(page)) | 959 | if (!page_has_buffers(page)) |
956 | goto out; | 960 | goto out; |
957 | 961 | ||
958 | bh = head = page_buffers(page); | 962 | bh = head = page_buffers(page); |
959 | do { | 963 | do { |
964 | if (pos + bh->b_size > stop) | ||
965 | return; | ||
966 | |||
960 | if (offset <= pos) | 967 | if (offset <= pos) |
961 | gfs2_discard(sdp, bh); | 968 | gfs2_discard(sdp, bh); |
962 | pos += bh->b_size; | 969 | pos += bh->b_size; |
963 | bh = bh->b_this_page; | 970 | bh = bh->b_this_page; |
964 | } while (bh != head); | 971 | } while (bh != head); |
965 | out: | 972 | out: |
966 | if (offset == 0) | 973 | if (!partial_page) |
967 | try_to_release_page(page, 0); | 974 | try_to_release_page(page, 0); |
968 | } | 975 | } |
969 | 976 | ||
diff --git a/fs/gfs2/bmap.c b/fs/gfs2/bmap.c index 93b5809c20bb..5e2f56fccf6b 100644 --- a/fs/gfs2/bmap.c +++ b/fs/gfs2/bmap.c | |||
@@ -1232,7 +1232,9 @@ static int do_grow(struct inode *inode, u64 size) | |||
1232 | unstuff = 1; | 1232 | unstuff = 1; |
1233 | } | 1233 | } |
1234 | 1234 | ||
1235 | error = gfs2_trans_begin(sdp, RES_DINODE + RES_STATFS + RES_RG_BIT, 0); | 1235 | error = gfs2_trans_begin(sdp, RES_DINODE + RES_STATFS + RES_RG_BIT + |
1236 | (sdp->sd_args.ar_quota == GFS2_QUOTA_OFF ? | ||
1237 | 0 : RES_QUOTA), 0); | ||
1236 | if (error) | 1238 | if (error) |
1237 | goto do_grow_release; | 1239 | goto do_grow_release; |
1238 | 1240 | ||
diff --git a/fs/gfs2/dentry.c b/fs/gfs2/dentry.c index 4fddb3c22d25..f2448ab2aac5 100644 --- a/fs/gfs2/dentry.c +++ b/fs/gfs2/dentry.c | |||
@@ -109,8 +109,7 @@ fail: | |||
109 | return 0; | 109 | return 0; |
110 | } | 110 | } |
111 | 111 | ||
112 | static int gfs2_dhash(const struct dentry *dentry, const struct inode *inode, | 112 | static int gfs2_dhash(const struct dentry *dentry, struct qstr *str) |
113 | struct qstr *str) | ||
114 | { | 113 | { |
115 | str->hash = gfs2_disk_hash(str->name, str->len); | 114 | str->hash = gfs2_disk_hash(str->name, str->len); |
116 | return 0; | 115 | return 0; |
diff --git a/fs/gfs2/dir.c b/fs/gfs2/dir.c index b631c9043460..0cb4c1557f20 100644 --- a/fs/gfs2/dir.c +++ b/fs/gfs2/dir.c | |||
@@ -1125,13 +1125,14 @@ static int dir_double_exhash(struct gfs2_inode *dip) | |||
1125 | if (IS_ERR(hc)) | 1125 | if (IS_ERR(hc)) |
1126 | return PTR_ERR(hc); | 1126 | return PTR_ERR(hc); |
1127 | 1127 | ||
1128 | h = hc2 = kmalloc(hsize_bytes * 2, GFP_NOFS | __GFP_NOWARN); | 1128 | hc2 = kmalloc(hsize_bytes * 2, GFP_NOFS | __GFP_NOWARN); |
1129 | if (hc2 == NULL) | 1129 | if (hc2 == NULL) |
1130 | hc2 = __vmalloc(hsize_bytes * 2, GFP_NOFS, PAGE_KERNEL); | 1130 | hc2 = __vmalloc(hsize_bytes * 2, GFP_NOFS, PAGE_KERNEL); |
1131 | 1131 | ||
1132 | if (!hc2) | 1132 | if (!hc2) |
1133 | return -ENOMEM; | 1133 | return -ENOMEM; |
1134 | 1134 | ||
1135 | h = hc2; | ||
1135 | error = gfs2_meta_inode_buffer(dip, &dibh); | 1136 | error = gfs2_meta_inode_buffer(dip, &dibh); |
1136 | if (error) | 1137 | if (error) |
1137 | goto out_kfree; | 1138 | goto out_kfree; |
@@ -1212,9 +1213,7 @@ static int compare_dents(const void *a, const void *b) | |||
1212 | /** | 1213 | /** |
1213 | * do_filldir_main - read out directory entries | 1214 | * do_filldir_main - read out directory entries |
1214 | * @dip: The GFS2 inode | 1215 | * @dip: The GFS2 inode |
1215 | * @offset: The offset in the file to read from | 1216 | * @ctx: what to feed the entries to |
1216 | * @opaque: opaque data to pass to filldir | ||
1217 | * @filldir: The function to pass entries to | ||
1218 | * @darr: an array of struct gfs2_dirent pointers to read | 1217 | * @darr: an array of struct gfs2_dirent pointers to read |
1219 | * @entries: the number of entries in darr | 1218 | * @entries: the number of entries in darr |
1220 | * @copied: pointer to int that's non-zero if a entry has been copied out | 1219 | * @copied: pointer to int that's non-zero if a entry has been copied out |
@@ -1224,11 +1223,10 @@ static int compare_dents(const void *a, const void *b) | |||
1224 | * the possibility that they will fall into different readdir buffers or | 1223 | * the possibility that they will fall into different readdir buffers or |
1225 | * that someone will want to seek to that location. | 1224 | * that someone will want to seek to that location. |
1226 | * | 1225 | * |
1227 | * Returns: errno, >0 on exception from filldir | 1226 | * Returns: errno, >0 if the actor tells you to stop |
1228 | */ | 1227 | */ |
1229 | 1228 | ||
1230 | static int do_filldir_main(struct gfs2_inode *dip, u64 *offset, | 1229 | static int do_filldir_main(struct gfs2_inode *dip, struct dir_context *ctx, |
1231 | void *opaque, filldir_t filldir, | ||
1232 | const struct gfs2_dirent **darr, u32 entries, | 1230 | const struct gfs2_dirent **darr, u32 entries, |
1233 | int *copied) | 1231 | int *copied) |
1234 | { | 1232 | { |
@@ -1236,7 +1234,6 @@ static int do_filldir_main(struct gfs2_inode *dip, u64 *offset, | |||
1236 | u64 off, off_next; | 1234 | u64 off, off_next; |
1237 | unsigned int x, y; | 1235 | unsigned int x, y; |
1238 | int run = 0; | 1236 | int run = 0; |
1239 | int error = 0; | ||
1240 | 1237 | ||
1241 | sort(darr, entries, sizeof(struct gfs2_dirent *), compare_dents, NULL); | 1238 | sort(darr, entries, sizeof(struct gfs2_dirent *), compare_dents, NULL); |
1242 | 1239 | ||
@@ -1253,9 +1250,9 @@ static int do_filldir_main(struct gfs2_inode *dip, u64 *offset, | |||
1253 | off_next = be32_to_cpu(dent_next->de_hash); | 1250 | off_next = be32_to_cpu(dent_next->de_hash); |
1254 | off_next = gfs2_disk_hash2offset(off_next); | 1251 | off_next = gfs2_disk_hash2offset(off_next); |
1255 | 1252 | ||
1256 | if (off < *offset) | 1253 | if (off < ctx->pos) |
1257 | continue; | 1254 | continue; |
1258 | *offset = off; | 1255 | ctx->pos = off; |
1259 | 1256 | ||
1260 | if (off_next == off) { | 1257 | if (off_next == off) { |
1261 | if (*copied && !run) | 1258 | if (*copied && !run) |
@@ -1264,26 +1261,25 @@ static int do_filldir_main(struct gfs2_inode *dip, u64 *offset, | |||
1264 | } else | 1261 | } else |
1265 | run = 0; | 1262 | run = 0; |
1266 | } else { | 1263 | } else { |
1267 | if (off < *offset) | 1264 | if (off < ctx->pos) |
1268 | continue; | 1265 | continue; |
1269 | *offset = off; | 1266 | ctx->pos = off; |
1270 | } | 1267 | } |
1271 | 1268 | ||
1272 | error = filldir(opaque, (const char *)(dent + 1), | 1269 | if (!dir_emit(ctx, (const char *)(dent + 1), |
1273 | be16_to_cpu(dent->de_name_len), | 1270 | be16_to_cpu(dent->de_name_len), |
1274 | off, be64_to_cpu(dent->de_inum.no_addr), | 1271 | be64_to_cpu(dent->de_inum.no_addr), |
1275 | be16_to_cpu(dent->de_type)); | 1272 | be16_to_cpu(dent->de_type))) |
1276 | if (error) | ||
1277 | return 1; | 1273 | return 1; |
1278 | 1274 | ||
1279 | *copied = 1; | 1275 | *copied = 1; |
1280 | } | 1276 | } |
1281 | 1277 | ||
1282 | /* Increment the *offset by one, so the next time we come into the | 1278 | /* Increment the ctx->pos by one, so the next time we come into the |
1283 | do_filldir fxn, we get the next entry instead of the last one in the | 1279 | do_filldir fxn, we get the next entry instead of the last one in the |
1284 | current leaf */ | 1280 | current leaf */ |
1285 | 1281 | ||
1286 | (*offset)++; | 1282 | ctx->pos++; |
1287 | 1283 | ||
1288 | return 0; | 1284 | return 0; |
1289 | } | 1285 | } |
@@ -1307,8 +1303,8 @@ static void gfs2_free_sort_buffer(void *ptr) | |||
1307 | kfree(ptr); | 1303 | kfree(ptr); |
1308 | } | 1304 | } |
1309 | 1305 | ||
1310 | static int gfs2_dir_read_leaf(struct inode *inode, u64 *offset, void *opaque, | 1306 | static int gfs2_dir_read_leaf(struct inode *inode, struct dir_context *ctx, |
1311 | filldir_t filldir, int *copied, unsigned *depth, | 1307 | int *copied, unsigned *depth, |
1312 | u64 leaf_no) | 1308 | u64 leaf_no) |
1313 | { | 1309 | { |
1314 | struct gfs2_inode *ip = GFS2_I(inode); | 1310 | struct gfs2_inode *ip = GFS2_I(inode); |
@@ -1386,8 +1382,7 @@ static int gfs2_dir_read_leaf(struct inode *inode, u64 *offset, void *opaque, | |||
1386 | } while(lfn); | 1382 | } while(lfn); |
1387 | 1383 | ||
1388 | BUG_ON(entries2 != entries); | 1384 | BUG_ON(entries2 != entries); |
1389 | error = do_filldir_main(ip, offset, opaque, filldir, darr, | 1385 | error = do_filldir_main(ip, ctx, darr, entries, copied); |
1390 | entries, copied); | ||
1391 | out_free: | 1386 | out_free: |
1392 | for(i = 0; i < leaf; i++) | 1387 | for(i = 0; i < leaf; i++) |
1393 | brelse(larr[i]); | 1388 | brelse(larr[i]); |
@@ -1446,15 +1441,13 @@ static void gfs2_dir_readahead(struct inode *inode, unsigned hsize, u32 index, | |||
1446 | /** | 1441 | /** |
1447 | * dir_e_read - Reads the entries from a directory into a filldir buffer | 1442 | * dir_e_read - Reads the entries from a directory into a filldir buffer |
1448 | * @dip: dinode pointer | 1443 | * @dip: dinode pointer |
1449 | * @offset: the hash of the last entry read shifted to the right once | 1444 | * @ctx: actor to feed the entries to |
1450 | * @opaque: buffer for the filldir function to fill | ||
1451 | * @filldir: points to the filldir function to use | ||
1452 | * | 1445 | * |
1453 | * Returns: errno | 1446 | * Returns: errno |
1454 | */ | 1447 | */ |
1455 | 1448 | ||
1456 | static int dir_e_read(struct inode *inode, u64 *offset, void *opaque, | 1449 | static int dir_e_read(struct inode *inode, struct dir_context *ctx, |
1457 | filldir_t filldir, struct file_ra_state *f_ra) | 1450 | struct file_ra_state *f_ra) |
1458 | { | 1451 | { |
1459 | struct gfs2_inode *dip = GFS2_I(inode); | 1452 | struct gfs2_inode *dip = GFS2_I(inode); |
1460 | u32 hsize, len = 0; | 1453 | u32 hsize, len = 0; |
@@ -1465,7 +1458,7 @@ static int dir_e_read(struct inode *inode, u64 *offset, void *opaque, | |||
1465 | unsigned depth = 0; | 1458 | unsigned depth = 0; |
1466 | 1459 | ||
1467 | hsize = 1 << dip->i_depth; | 1460 | hsize = 1 << dip->i_depth; |
1468 | hash = gfs2_dir_offset2hash(*offset); | 1461 | hash = gfs2_dir_offset2hash(ctx->pos); |
1469 | index = hash >> (32 - dip->i_depth); | 1462 | index = hash >> (32 - dip->i_depth); |
1470 | 1463 | ||
1471 | if (dip->i_hash_cache == NULL) | 1464 | if (dip->i_hash_cache == NULL) |
@@ -1477,7 +1470,7 @@ static int dir_e_read(struct inode *inode, u64 *offset, void *opaque, | |||
1477 | gfs2_dir_readahead(inode, hsize, index, f_ra); | 1470 | gfs2_dir_readahead(inode, hsize, index, f_ra); |
1478 | 1471 | ||
1479 | while (index < hsize) { | 1472 | while (index < hsize) { |
1480 | error = gfs2_dir_read_leaf(inode, offset, opaque, filldir, | 1473 | error = gfs2_dir_read_leaf(inode, ctx, |
1481 | &copied, &depth, | 1474 | &copied, &depth, |
1482 | be64_to_cpu(lp[index])); | 1475 | be64_to_cpu(lp[index])); |
1483 | if (error) | 1476 | if (error) |
@@ -1492,8 +1485,8 @@ static int dir_e_read(struct inode *inode, u64 *offset, void *opaque, | |||
1492 | return error; | 1485 | return error; |
1493 | } | 1486 | } |
1494 | 1487 | ||
1495 | int gfs2_dir_read(struct inode *inode, u64 *offset, void *opaque, | 1488 | int gfs2_dir_read(struct inode *inode, struct dir_context *ctx, |
1496 | filldir_t filldir, struct file_ra_state *f_ra) | 1489 | struct file_ra_state *f_ra) |
1497 | { | 1490 | { |
1498 | struct gfs2_inode *dip = GFS2_I(inode); | 1491 | struct gfs2_inode *dip = GFS2_I(inode); |
1499 | struct gfs2_sbd *sdp = GFS2_SB(inode); | 1492 | struct gfs2_sbd *sdp = GFS2_SB(inode); |
@@ -1507,7 +1500,7 @@ int gfs2_dir_read(struct inode *inode, u64 *offset, void *opaque, | |||
1507 | return 0; | 1500 | return 0; |
1508 | 1501 | ||
1509 | if (dip->i_diskflags & GFS2_DIF_EXHASH) | 1502 | if (dip->i_diskflags & GFS2_DIF_EXHASH) |
1510 | return dir_e_read(inode, offset, opaque, filldir, f_ra); | 1503 | return dir_e_read(inode, ctx, f_ra); |
1511 | 1504 | ||
1512 | if (!gfs2_is_stuffed(dip)) { | 1505 | if (!gfs2_is_stuffed(dip)) { |
1513 | gfs2_consist_inode(dip); | 1506 | gfs2_consist_inode(dip); |
@@ -1539,7 +1532,7 @@ int gfs2_dir_read(struct inode *inode, u64 *offset, void *opaque, | |||
1539 | error = -EIO; | 1532 | error = -EIO; |
1540 | goto out; | 1533 | goto out; |
1541 | } | 1534 | } |
1542 | error = do_filldir_main(dip, offset, opaque, filldir, darr, | 1535 | error = do_filldir_main(dip, ctx, darr, |
1543 | dip->i_entries, &copied); | 1536 | dip->i_entries, &copied); |
1544 | out: | 1537 | out: |
1545 | kfree(darr); | 1538 | kfree(darr); |
@@ -1555,9 +1548,9 @@ out: | |||
1555 | 1548 | ||
1556 | /** | 1549 | /** |
1557 | * gfs2_dir_search - Search a directory | 1550 | * gfs2_dir_search - Search a directory |
1558 | * @dip: The GFS2 inode | 1551 | * @dip: The GFS2 dir inode |
1559 | * @filename: | 1552 | * @name: The name we are looking up |
1560 | * @inode: | 1553 | * @fail_on_exist: Fail if the name exists rather than looking it up |
1561 | * | 1554 | * |
1562 | * This routine searches a directory for a file or another directory. | 1555 | * This routine searches a directory for a file or another directory. |
1563 | * Assumes a glock is held on dip. | 1556 | * Assumes a glock is held on dip. |
@@ -1565,22 +1558,25 @@ out: | |||
1565 | * Returns: errno | 1558 | * Returns: errno |
1566 | */ | 1559 | */ |
1567 | 1560 | ||
1568 | struct inode *gfs2_dir_search(struct inode *dir, const struct qstr *name) | 1561 | struct inode *gfs2_dir_search(struct inode *dir, const struct qstr *name, |
1562 | bool fail_on_exist) | ||
1569 | { | 1563 | { |
1570 | struct buffer_head *bh; | 1564 | struct buffer_head *bh; |
1571 | struct gfs2_dirent *dent; | 1565 | struct gfs2_dirent *dent; |
1572 | struct inode *inode; | 1566 | u64 addr, formal_ino; |
1567 | u16 dtype; | ||
1573 | 1568 | ||
1574 | dent = gfs2_dirent_search(dir, name, gfs2_dirent_find, &bh); | 1569 | dent = gfs2_dirent_search(dir, name, gfs2_dirent_find, &bh); |
1575 | if (dent) { | 1570 | if (dent) { |
1576 | if (IS_ERR(dent)) | 1571 | if (IS_ERR(dent)) |
1577 | return ERR_CAST(dent); | 1572 | return ERR_CAST(dent); |
1578 | inode = gfs2_inode_lookup(dir->i_sb, | 1573 | dtype = be16_to_cpu(dent->de_type); |
1579 | be16_to_cpu(dent->de_type), | 1574 | addr = be64_to_cpu(dent->de_inum.no_addr); |
1580 | be64_to_cpu(dent->de_inum.no_addr), | 1575 | formal_ino = be64_to_cpu(dent->de_inum.no_formal_ino); |
1581 | be64_to_cpu(dent->de_inum.no_formal_ino), 0); | ||
1582 | brelse(bh); | 1576 | brelse(bh); |
1583 | return inode; | 1577 | if (fail_on_exist) |
1578 | return ERR_PTR(-EEXIST); | ||
1579 | return gfs2_inode_lookup(dir->i_sb, dtype, addr, formal_ino, 0); | ||
1584 | } | 1580 | } |
1585 | return ERR_PTR(-ENOENT); | 1581 | return ERR_PTR(-ENOENT); |
1586 | } | 1582 | } |
diff --git a/fs/gfs2/dir.h b/fs/gfs2/dir.h index 98c960beab35..4f03bbd1873f 100644 --- a/fs/gfs2/dir.h +++ b/fs/gfs2/dir.h | |||
@@ -18,14 +18,15 @@ struct gfs2_inode; | |||
18 | struct gfs2_inum; | 18 | struct gfs2_inum; |
19 | 19 | ||
20 | extern struct inode *gfs2_dir_search(struct inode *dir, | 20 | extern struct inode *gfs2_dir_search(struct inode *dir, |
21 | const struct qstr *filename); | 21 | const struct qstr *filename, |
22 | bool fail_on_exist); | ||
22 | extern int gfs2_dir_check(struct inode *dir, const struct qstr *filename, | 23 | extern int gfs2_dir_check(struct inode *dir, const struct qstr *filename, |
23 | const struct gfs2_inode *ip); | 24 | const struct gfs2_inode *ip); |
24 | extern int gfs2_dir_add(struct inode *inode, const struct qstr *filename, | 25 | extern int gfs2_dir_add(struct inode *inode, const struct qstr *filename, |
25 | const struct gfs2_inode *ip); | 26 | const struct gfs2_inode *ip); |
26 | extern int gfs2_dir_del(struct gfs2_inode *dip, const struct dentry *dentry); | 27 | extern int gfs2_dir_del(struct gfs2_inode *dip, const struct dentry *dentry); |
27 | extern int gfs2_dir_read(struct inode *inode, u64 *offset, void *opaque, | 28 | extern int gfs2_dir_read(struct inode *inode, struct dir_context *ctx, |
28 | filldir_t filldir, struct file_ra_state *f_ra); | 29 | struct file_ra_state *f_ra); |
29 | extern int gfs2_dir_mvino(struct gfs2_inode *dip, const struct qstr *filename, | 30 | extern int gfs2_dir_mvino(struct gfs2_inode *dip, const struct qstr *filename, |
30 | const struct gfs2_inode *nip, unsigned int new_type); | 31 | const struct gfs2_inode *nip, unsigned int new_type); |
31 | 32 | ||
diff --git a/fs/gfs2/export.c b/fs/gfs2/export.c index 9973df4ff565..8b9b3775e2e7 100644 --- a/fs/gfs2/export.c +++ b/fs/gfs2/export.c | |||
@@ -64,6 +64,7 @@ static int gfs2_encode_fh(struct inode *inode, __u32 *p, int *len, | |||
64 | } | 64 | } |
65 | 65 | ||
66 | struct get_name_filldir { | 66 | struct get_name_filldir { |
67 | struct dir_context ctx; | ||
67 | struct gfs2_inum_host inum; | 68 | struct gfs2_inum_host inum; |
68 | char *name; | 69 | char *name; |
69 | }; | 70 | }; |
@@ -88,9 +89,11 @@ static int gfs2_get_name(struct dentry *parent, char *name, | |||
88 | struct inode *dir = parent->d_inode; | 89 | struct inode *dir = parent->d_inode; |
89 | struct inode *inode = child->d_inode; | 90 | struct inode *inode = child->d_inode; |
90 | struct gfs2_inode *dip, *ip; | 91 | struct gfs2_inode *dip, *ip; |
91 | struct get_name_filldir gnfd; | 92 | struct get_name_filldir gnfd = { |
93 | .ctx.actor = get_name_filldir, | ||
94 | .name = name | ||
95 | }; | ||
92 | struct gfs2_holder gh; | 96 | struct gfs2_holder gh; |
93 | u64 offset = 0; | ||
94 | int error; | 97 | int error; |
95 | struct file_ra_state f_ra = { .start = 0 }; | 98 | struct file_ra_state f_ra = { .start = 0 }; |
96 | 99 | ||
@@ -106,13 +109,12 @@ static int gfs2_get_name(struct dentry *parent, char *name, | |||
106 | *name = 0; | 109 | *name = 0; |
107 | gnfd.inum.no_addr = ip->i_no_addr; | 110 | gnfd.inum.no_addr = ip->i_no_addr; |
108 | gnfd.inum.no_formal_ino = ip->i_no_formal_ino; | 111 | gnfd.inum.no_formal_ino = ip->i_no_formal_ino; |
109 | gnfd.name = name; | ||
110 | 112 | ||
111 | error = gfs2_glock_nq_init(dip->i_gl, LM_ST_SHARED, 0, &gh); | 113 | error = gfs2_glock_nq_init(dip->i_gl, LM_ST_SHARED, 0, &gh); |
112 | if (error) | 114 | if (error) |
113 | return error; | 115 | return error; |
114 | 116 | ||
115 | error = gfs2_dir_read(dir, &offset, &gnfd, get_name_filldir, &f_ra); | 117 | error = gfs2_dir_read(dir, &gnfd.ctx, &f_ra); |
116 | 118 | ||
117 | gfs2_glock_dq_uninit(&gh); | 119 | gfs2_glock_dq_uninit(&gh); |
118 | 120 | ||
diff --git a/fs/gfs2/file.c b/fs/gfs2/file.c index ad0dc38d87ab..72c3866a7320 100644 --- a/fs/gfs2/file.c +++ b/fs/gfs2/file.c | |||
@@ -82,35 +82,28 @@ static loff_t gfs2_llseek(struct file *file, loff_t offset, int whence) | |||
82 | } | 82 | } |
83 | 83 | ||
84 | /** | 84 | /** |
85 | * gfs2_readdir - Read directory entries from a directory | 85 | * gfs2_readdir - Iterator for a directory |
86 | * @file: The directory to read from | 86 | * @file: The directory to read from |
87 | * @dirent: Buffer for dirents | 87 | * @ctx: What to feed directory entries to |
88 | * @filldir: Function used to do the copying | ||
89 | * | 88 | * |
90 | * Returns: errno | 89 | * Returns: errno |
91 | */ | 90 | */ |
92 | 91 | ||
93 | static int gfs2_readdir(struct file *file, void *dirent, filldir_t filldir) | 92 | static int gfs2_readdir(struct file *file, struct dir_context *ctx) |
94 | { | 93 | { |
95 | struct inode *dir = file->f_mapping->host; | 94 | struct inode *dir = file->f_mapping->host; |
96 | struct gfs2_inode *dip = GFS2_I(dir); | 95 | struct gfs2_inode *dip = GFS2_I(dir); |
97 | struct gfs2_holder d_gh; | 96 | struct gfs2_holder d_gh; |
98 | u64 offset = file->f_pos; | ||
99 | int error; | 97 | int error; |
100 | 98 | ||
101 | gfs2_holder_init(dip->i_gl, LM_ST_SHARED, 0, &d_gh); | 99 | error = gfs2_glock_nq_init(dip->i_gl, LM_ST_SHARED, 0, &d_gh); |
102 | error = gfs2_glock_nq(&d_gh); | 100 | if (error) |
103 | if (error) { | ||
104 | gfs2_holder_uninit(&d_gh); | ||
105 | return error; | 101 | return error; |
106 | } | ||
107 | 102 | ||
108 | error = gfs2_dir_read(dir, &offset, dirent, filldir, &file->f_ra); | 103 | error = gfs2_dir_read(dir, ctx, &file->f_ra); |
109 | 104 | ||
110 | gfs2_glock_dq_uninit(&d_gh); | 105 | gfs2_glock_dq_uninit(&d_gh); |
111 | 106 | ||
112 | file->f_pos = offset; | ||
113 | |||
114 | return error; | 107 | return error; |
115 | } | 108 | } |
116 | 109 | ||
@@ -538,21 +531,30 @@ static int gfs2_mmap(struct file *file, struct vm_area_struct *vma) | |||
538 | } | 531 | } |
539 | 532 | ||
540 | /** | 533 | /** |
541 | * gfs2_open - open a file | 534 | * gfs2_open_common - This is common to open and atomic_open |
542 | * @inode: the inode to open | 535 | * @inode: The inode being opened |
543 | * @file: the struct file for this opening | 536 | * @file: The file being opened |
544 | * | 537 | * |
545 | * Returns: errno | 538 | * This maybe called under a glock or not depending upon how it has |
539 | * been called. We must always be called under a glock for regular | ||
540 | * files, however. For other file types, it does not matter whether | ||
541 | * we hold the glock or not. | ||
542 | * | ||
543 | * Returns: Error code or 0 for success | ||
546 | */ | 544 | */ |
547 | 545 | ||
548 | static int gfs2_open(struct inode *inode, struct file *file) | 546 | int gfs2_open_common(struct inode *inode, struct file *file) |
549 | { | 547 | { |
550 | struct gfs2_inode *ip = GFS2_I(inode); | ||
551 | struct gfs2_holder i_gh; | ||
552 | struct gfs2_file *fp; | 548 | struct gfs2_file *fp; |
553 | int error; | 549 | int ret; |
550 | |||
551 | if (S_ISREG(inode->i_mode)) { | ||
552 | ret = generic_file_open(inode, file); | ||
553 | if (ret) | ||
554 | return ret; | ||
555 | } | ||
554 | 556 | ||
555 | fp = kzalloc(sizeof(struct gfs2_file), GFP_KERNEL); | 557 | fp = kzalloc(sizeof(struct gfs2_file), GFP_NOFS); |
556 | if (!fp) | 558 | if (!fp) |
557 | return -ENOMEM; | 559 | return -ENOMEM; |
558 | 560 | ||
@@ -560,29 +562,43 @@ static int gfs2_open(struct inode *inode, struct file *file) | |||
560 | 562 | ||
561 | gfs2_assert_warn(GFS2_SB(inode), !file->private_data); | 563 | gfs2_assert_warn(GFS2_SB(inode), !file->private_data); |
562 | file->private_data = fp; | 564 | file->private_data = fp; |
565 | return 0; | ||
566 | } | ||
567 | |||
568 | /** | ||
569 | * gfs2_open - open a file | ||
570 | * @inode: the inode to open | ||
571 | * @file: the struct file for this opening | ||
572 | * | ||
573 | * After atomic_open, this function is only used for opening files | ||
574 | * which are already cached. We must still get the glock for regular | ||
575 | * files to ensure that we have the file size uptodate for the large | ||
576 | * file check which is in the common code. That is only an issue for | ||
577 | * regular files though. | ||
578 | * | ||
579 | * Returns: errno | ||
580 | */ | ||
581 | |||
582 | static int gfs2_open(struct inode *inode, struct file *file) | ||
583 | { | ||
584 | struct gfs2_inode *ip = GFS2_I(inode); | ||
585 | struct gfs2_holder i_gh; | ||
586 | int error; | ||
587 | bool need_unlock = false; | ||
563 | 588 | ||
564 | if (S_ISREG(ip->i_inode.i_mode)) { | 589 | if (S_ISREG(ip->i_inode.i_mode)) { |
565 | error = gfs2_glock_nq_init(ip->i_gl, LM_ST_SHARED, LM_FLAG_ANY, | 590 | error = gfs2_glock_nq_init(ip->i_gl, LM_ST_SHARED, LM_FLAG_ANY, |
566 | &i_gh); | 591 | &i_gh); |
567 | if (error) | 592 | if (error) |
568 | goto fail; | 593 | return error; |
594 | need_unlock = true; | ||
595 | } | ||
569 | 596 | ||
570 | if (!(file->f_flags & O_LARGEFILE) && | 597 | error = gfs2_open_common(inode, file); |
571 | i_size_read(inode) > MAX_NON_LFS) { | ||
572 | error = -EOVERFLOW; | ||
573 | goto fail_gunlock; | ||
574 | } | ||
575 | 598 | ||
599 | if (need_unlock) | ||
576 | gfs2_glock_dq_uninit(&i_gh); | 600 | gfs2_glock_dq_uninit(&i_gh); |
577 | } | ||
578 | 601 | ||
579 | return 0; | ||
580 | |||
581 | fail_gunlock: | ||
582 | gfs2_glock_dq_uninit(&i_gh); | ||
583 | fail: | ||
584 | file->private_data = NULL; | ||
585 | kfree(fp); | ||
586 | return error; | 602 | return error; |
587 | } | 603 | } |
588 | 604 | ||
@@ -896,7 +912,7 @@ out_uninit: | |||
896 | * cluster; until we do, disable leases (by just returning -EINVAL), | 912 | * cluster; until we do, disable leases (by just returning -EINVAL), |
897 | * unless the administrator has requested purely local locking. | 913 | * unless the administrator has requested purely local locking. |
898 | * | 914 | * |
899 | * Locking: called under lock_flocks | 915 | * Locking: called under i_lock |
900 | * | 916 | * |
901 | * Returns: errno | 917 | * Returns: errno |
902 | */ | 918 | */ |
@@ -1048,7 +1064,7 @@ const struct file_operations gfs2_file_fops = { | |||
1048 | }; | 1064 | }; |
1049 | 1065 | ||
1050 | const struct file_operations gfs2_dir_fops = { | 1066 | const struct file_operations gfs2_dir_fops = { |
1051 | .readdir = gfs2_readdir, | 1067 | .iterate = gfs2_readdir, |
1052 | .unlocked_ioctl = gfs2_ioctl, | 1068 | .unlocked_ioctl = gfs2_ioctl, |
1053 | .open = gfs2_open, | 1069 | .open = gfs2_open, |
1054 | .release = gfs2_release, | 1070 | .release = gfs2_release, |
@@ -1078,7 +1094,7 @@ const struct file_operations gfs2_file_fops_nolock = { | |||
1078 | }; | 1094 | }; |
1079 | 1095 | ||
1080 | const struct file_operations gfs2_dir_fops_nolock = { | 1096 | const struct file_operations gfs2_dir_fops_nolock = { |
1081 | .readdir = gfs2_readdir, | 1097 | .iterate = gfs2_readdir, |
1082 | .unlocked_ioctl = gfs2_ioctl, | 1098 | .unlocked_ioctl = gfs2_ioctl, |
1083 | .open = gfs2_open, | 1099 | .open = gfs2_open, |
1084 | .release = gfs2_release, | 1100 | .release = gfs2_release, |
diff --git a/fs/gfs2/glops.c b/fs/gfs2/glops.c index c66e99c97571..5f2e5224c51c 100644 --- a/fs/gfs2/glops.c +++ b/fs/gfs2/glops.c | |||
@@ -54,7 +54,6 @@ static void __gfs2_ail_flush(struct gfs2_glock *gl, bool fsync) | |||
54 | struct gfs2_bufdata *bd, *tmp; | 54 | struct gfs2_bufdata *bd, *tmp; |
55 | struct buffer_head *bh; | 55 | struct buffer_head *bh; |
56 | const unsigned long b_state = (1UL << BH_Dirty)|(1UL << BH_Pinned)|(1UL << BH_Lock); | 56 | const unsigned long b_state = (1UL << BH_Dirty)|(1UL << BH_Pinned)|(1UL << BH_Lock); |
57 | sector_t blocknr; | ||
58 | 57 | ||
59 | gfs2_log_lock(sdp); | 58 | gfs2_log_lock(sdp); |
60 | spin_lock(&sdp->sd_ail_lock); | 59 | spin_lock(&sdp->sd_ail_lock); |
@@ -65,13 +64,6 @@ static void __gfs2_ail_flush(struct gfs2_glock *gl, bool fsync) | |||
65 | continue; | 64 | continue; |
66 | gfs2_ail_error(gl, bh); | 65 | gfs2_ail_error(gl, bh); |
67 | } | 66 | } |
68 | blocknr = bh->b_blocknr; | ||
69 | bh->b_private = NULL; | ||
70 | gfs2_remove_from_ail(bd); /* drops ref on bh */ | ||
71 | |||
72 | bd->bd_bh = NULL; | ||
73 | bd->bd_blkno = blocknr; | ||
74 | |||
75 | gfs2_trans_add_revoke(sdp, bd); | 67 | gfs2_trans_add_revoke(sdp, bd); |
76 | } | 68 | } |
77 | GLOCK_BUG_ON(gl, !fsync && atomic_read(&gl->gl_ail_count)); | 69 | GLOCK_BUG_ON(gl, !fsync && atomic_read(&gl->gl_ail_count)); |
diff --git a/fs/gfs2/inode.c b/fs/gfs2/inode.c index 62b484e4a9e4..bbb2715171cd 100644 --- a/fs/gfs2/inode.c +++ b/fs/gfs2/inode.c | |||
@@ -313,7 +313,7 @@ struct inode *gfs2_lookupi(struct inode *dir, const struct qstr *name, | |||
313 | goto out; | 313 | goto out; |
314 | } | 314 | } |
315 | 315 | ||
316 | inode = gfs2_dir_search(dir, name); | 316 | inode = gfs2_dir_search(dir, name, false); |
317 | if (IS_ERR(inode)) | 317 | if (IS_ERR(inode)) |
318 | error = PTR_ERR(inode); | 318 | error = PTR_ERR(inode); |
319 | out: | 319 | out: |
@@ -346,17 +346,6 @@ static int create_ok(struct gfs2_inode *dip, const struct qstr *name, | |||
346 | if (!dip->i_inode.i_nlink) | 346 | if (!dip->i_inode.i_nlink) |
347 | return -ENOENT; | 347 | return -ENOENT; |
348 | 348 | ||
349 | error = gfs2_dir_check(&dip->i_inode, name, NULL); | ||
350 | switch (error) { | ||
351 | case -ENOENT: | ||
352 | error = 0; | ||
353 | break; | ||
354 | case 0: | ||
355 | return -EEXIST; | ||
356 | default: | ||
357 | return error; | ||
358 | } | ||
359 | |||
360 | if (dip->i_entries == (u32)-1) | 349 | if (dip->i_entries == (u32)-1) |
361 | return -EFBIG; | 350 | return -EFBIG; |
362 | if (S_ISDIR(mode) && dip->i_inode.i_nlink == (u32)-1) | 351 | if (S_ISDIR(mode) && dip->i_inode.i_nlink == (u32)-1) |
@@ -546,6 +535,7 @@ static int gfs2_security_init(struct gfs2_inode *dip, struct gfs2_inode *ip, | |||
546 | * gfs2_create_inode - Create a new inode | 535 | * gfs2_create_inode - Create a new inode |
547 | * @dir: The parent directory | 536 | * @dir: The parent directory |
548 | * @dentry: The new dentry | 537 | * @dentry: The new dentry |
538 | * @file: If non-NULL, the file which is being opened | ||
549 | * @mode: The permissions on the new inode | 539 | * @mode: The permissions on the new inode |
550 | * @dev: For device nodes, this is the device number | 540 | * @dev: For device nodes, this is the device number |
551 | * @symname: For symlinks, this is the link destination | 541 | * @symname: For symlinks, this is the link destination |
@@ -555,8 +545,9 @@ static int gfs2_security_init(struct gfs2_inode *dip, struct gfs2_inode *ip, | |||
555 | */ | 545 | */ |
556 | 546 | ||
557 | static int gfs2_create_inode(struct inode *dir, struct dentry *dentry, | 547 | static int gfs2_create_inode(struct inode *dir, struct dentry *dentry, |
548 | struct file *file, | ||
558 | umode_t mode, dev_t dev, const char *symname, | 549 | umode_t mode, dev_t dev, const char *symname, |
559 | unsigned int size, int excl) | 550 | unsigned int size, int excl, int *opened) |
560 | { | 551 | { |
561 | const struct qstr *name = &dentry->d_name; | 552 | const struct qstr *name = &dentry->d_name; |
562 | struct gfs2_holder ghs[2]; | 553 | struct gfs2_holder ghs[2]; |
@@ -564,6 +555,7 @@ static int gfs2_create_inode(struct inode *dir, struct dentry *dentry, | |||
564 | struct gfs2_inode *dip = GFS2_I(dir), *ip; | 555 | struct gfs2_inode *dip = GFS2_I(dir), *ip; |
565 | struct gfs2_sbd *sdp = GFS2_SB(&dip->i_inode); | 556 | struct gfs2_sbd *sdp = GFS2_SB(&dip->i_inode); |
566 | struct gfs2_glock *io_gl; | 557 | struct gfs2_glock *io_gl; |
558 | struct dentry *d; | ||
567 | int error; | 559 | int error; |
568 | u32 aflags = 0; | 560 | u32 aflags = 0; |
569 | int arq; | 561 | int arq; |
@@ -584,15 +576,30 @@ static int gfs2_create_inode(struct inode *dir, struct dentry *dentry, | |||
584 | goto fail; | 576 | goto fail; |
585 | 577 | ||
586 | error = create_ok(dip, name, mode); | 578 | error = create_ok(dip, name, mode); |
587 | if ((error == -EEXIST) && S_ISREG(mode) && !excl) { | ||
588 | inode = gfs2_lookupi(dir, &dentry->d_name, 0); | ||
589 | gfs2_glock_dq_uninit(ghs); | ||
590 | d_instantiate(dentry, inode); | ||
591 | return IS_ERR(inode) ? PTR_ERR(inode) : 0; | ||
592 | } | ||
593 | if (error) | 579 | if (error) |
594 | goto fail_gunlock; | 580 | goto fail_gunlock; |
595 | 581 | ||
582 | inode = gfs2_dir_search(dir, &dentry->d_name, !S_ISREG(mode) || excl); | ||
583 | error = PTR_ERR(inode); | ||
584 | if (!IS_ERR(inode)) { | ||
585 | d = d_splice_alias(inode, dentry); | ||
586 | error = 0; | ||
587 | if (file && !IS_ERR(d)) { | ||
588 | if (d == NULL) | ||
589 | d = dentry; | ||
590 | if (S_ISREG(inode->i_mode)) | ||
591 | error = finish_open(file, d, gfs2_open_common, opened); | ||
592 | else | ||
593 | error = finish_no_open(file, d); | ||
594 | } | ||
595 | gfs2_glock_dq_uninit(ghs); | ||
596 | if (IS_ERR(d)) | ||
597 | return PTR_RET(d); | ||
598 | return error; | ||
599 | } else if (error != -ENOENT) { | ||
600 | goto fail_gunlock; | ||
601 | } | ||
602 | |||
596 | arq = error = gfs2_diradd_alloc_required(dir, name); | 603 | arq = error = gfs2_diradd_alloc_required(dir, name); |
597 | if (error < 0) | 604 | if (error < 0) |
598 | goto fail_gunlock; | 605 | goto fail_gunlock; |
@@ -686,10 +693,12 @@ static int gfs2_create_inode(struct inode *dir, struct dentry *dentry, | |||
686 | goto fail_gunlock3; | 693 | goto fail_gunlock3; |
687 | 694 | ||
688 | mark_inode_dirty(inode); | 695 | mark_inode_dirty(inode); |
696 | d_instantiate(dentry, inode); | ||
697 | if (file) | ||
698 | error = finish_open(file, dentry, gfs2_open_common, opened); | ||
689 | gfs2_glock_dq_uninit(ghs); | 699 | gfs2_glock_dq_uninit(ghs); |
690 | gfs2_glock_dq_uninit(ghs + 1); | 700 | gfs2_glock_dq_uninit(ghs + 1); |
691 | d_instantiate(dentry, inode); | 701 | return error; |
692 | return 0; | ||
693 | 702 | ||
694 | fail_gunlock3: | 703 | fail_gunlock3: |
695 | gfs2_glock_dq_uninit(ghs + 1); | 704 | gfs2_glock_dq_uninit(ghs + 1); |
@@ -729,36 +738,56 @@ fail: | |||
729 | static int gfs2_create(struct inode *dir, struct dentry *dentry, | 738 | static int gfs2_create(struct inode *dir, struct dentry *dentry, |
730 | umode_t mode, bool excl) | 739 | umode_t mode, bool excl) |
731 | { | 740 | { |
732 | return gfs2_create_inode(dir, dentry, S_IFREG | mode, 0, NULL, 0, excl); | 741 | return gfs2_create_inode(dir, dentry, NULL, S_IFREG | mode, 0, NULL, 0, excl, NULL); |
733 | } | 742 | } |
734 | 743 | ||
735 | /** | 744 | /** |
736 | * gfs2_lookup - Look up a filename in a directory and return its inode | 745 | * __gfs2_lookup - Look up a filename in a directory and return its inode |
737 | * @dir: The directory inode | 746 | * @dir: The directory inode |
738 | * @dentry: The dentry of the new inode | 747 | * @dentry: The dentry of the new inode |
739 | * @nd: passed from Linux VFS, ignored by us | 748 | * @file: File to be opened |
749 | * @opened: atomic_open flags | ||
740 | * | 750 | * |
741 | * Called by the VFS layer. Lock dir and call gfs2_lookupi() | ||
742 | * | 751 | * |
743 | * Returns: errno | 752 | * Returns: errno |
744 | */ | 753 | */ |
745 | 754 | ||
746 | static struct dentry *gfs2_lookup(struct inode *dir, struct dentry *dentry, | 755 | static struct dentry *__gfs2_lookup(struct inode *dir, struct dentry *dentry, |
747 | unsigned int flags) | 756 | struct file *file, int *opened) |
748 | { | 757 | { |
749 | struct inode *inode = gfs2_lookupi(dir, &dentry->d_name, 0); | 758 | struct inode *inode; |
750 | if (inode && !IS_ERR(inode)) { | 759 | struct dentry *d; |
751 | struct gfs2_glock *gl = GFS2_I(inode)->i_gl; | 760 | struct gfs2_holder gh; |
752 | struct gfs2_holder gh; | 761 | struct gfs2_glock *gl; |
753 | int error; | 762 | int error; |
754 | error = gfs2_glock_nq_init(gl, LM_ST_SHARED, LM_FLAG_ANY, &gh); | 763 | |
755 | if (error) { | 764 | inode = gfs2_lookupi(dir, &dentry->d_name, 0); |
756 | iput(inode); | 765 | if (!inode) |
757 | return ERR_PTR(error); | 766 | return NULL; |
758 | } | 767 | if (IS_ERR(inode)) |
759 | gfs2_glock_dq_uninit(&gh); | 768 | return ERR_CAST(inode); |
769 | |||
770 | gl = GFS2_I(inode)->i_gl; | ||
771 | error = gfs2_glock_nq_init(gl, LM_ST_SHARED, LM_FLAG_ANY, &gh); | ||
772 | if (error) { | ||
773 | iput(inode); | ||
774 | return ERR_PTR(error); | ||
760 | } | 775 | } |
761 | return d_splice_alias(inode, dentry); | 776 | |
777 | d = d_splice_alias(inode, dentry); | ||
778 | if (file && S_ISREG(inode->i_mode)) | ||
779 | error = finish_open(file, dentry, gfs2_open_common, opened); | ||
780 | |||
781 | gfs2_glock_dq_uninit(&gh); | ||
782 | if (error) | ||
783 | return ERR_PTR(error); | ||
784 | return d; | ||
785 | } | ||
786 | |||
787 | static struct dentry *gfs2_lookup(struct inode *dir, struct dentry *dentry, | ||
788 | unsigned flags) | ||
789 | { | ||
790 | return __gfs2_lookup(dir, dentry, NULL, NULL); | ||
762 | } | 791 | } |
763 | 792 | ||
764 | /** | 793 | /** |
@@ -1076,7 +1105,7 @@ static int gfs2_symlink(struct inode *dir, struct dentry *dentry, | |||
1076 | if (size > sdp->sd_sb.sb_bsize - sizeof(struct gfs2_dinode) - 1) | 1105 | if (size > sdp->sd_sb.sb_bsize - sizeof(struct gfs2_dinode) - 1) |
1077 | return -ENAMETOOLONG; | 1106 | return -ENAMETOOLONG; |
1078 | 1107 | ||
1079 | return gfs2_create_inode(dir, dentry, S_IFLNK | S_IRWXUGO, 0, symname, size, 0); | 1108 | return gfs2_create_inode(dir, dentry, NULL, S_IFLNK | S_IRWXUGO, 0, symname, size, 0, NULL); |
1080 | } | 1109 | } |
1081 | 1110 | ||
1082 | /** | 1111 | /** |
@@ -1092,7 +1121,7 @@ static int gfs2_mkdir(struct inode *dir, struct dentry *dentry, umode_t mode) | |||
1092 | { | 1121 | { |
1093 | struct gfs2_sbd *sdp = GFS2_SB(dir); | 1122 | struct gfs2_sbd *sdp = GFS2_SB(dir); |
1094 | unsigned dsize = sdp->sd_sb.sb_bsize - sizeof(struct gfs2_dinode); | 1123 | unsigned dsize = sdp->sd_sb.sb_bsize - sizeof(struct gfs2_dinode); |
1095 | return gfs2_create_inode(dir, dentry, S_IFDIR | mode, 0, NULL, dsize, 0); | 1124 | return gfs2_create_inode(dir, dentry, NULL, S_IFDIR | mode, 0, NULL, dsize, 0, NULL); |
1096 | } | 1125 | } |
1097 | 1126 | ||
1098 | /** | 1127 | /** |
@@ -1107,7 +1136,43 @@ static int gfs2_mkdir(struct inode *dir, struct dentry *dentry, umode_t mode) | |||
1107 | static int gfs2_mknod(struct inode *dir, struct dentry *dentry, umode_t mode, | 1136 | static int gfs2_mknod(struct inode *dir, struct dentry *dentry, umode_t mode, |
1108 | dev_t dev) | 1137 | dev_t dev) |
1109 | { | 1138 | { |
1110 | return gfs2_create_inode(dir, dentry, mode, dev, NULL, 0, 0); | 1139 | return gfs2_create_inode(dir, dentry, NULL, mode, dev, NULL, 0, 0, NULL); |
1140 | } | ||
1141 | |||
1142 | /** | ||
1143 | * gfs2_atomic_open - Atomically open a file | ||
1144 | * @dir: The directory | ||
1145 | * @dentry: The proposed new entry | ||
1146 | * @file: The proposed new struct file | ||
1147 | * @flags: open flags | ||
1148 | * @mode: File mode | ||
1149 | * @opened: Flag to say whether the file has been opened or not | ||
1150 | * | ||
1151 | * Returns: error code or 0 for success | ||
1152 | */ | ||
1153 | |||
1154 | static int gfs2_atomic_open(struct inode *dir, struct dentry *dentry, | ||
1155 | struct file *file, unsigned flags, | ||
1156 | umode_t mode, int *opened) | ||
1157 | { | ||
1158 | struct dentry *d; | ||
1159 | bool excl = !!(flags & O_EXCL); | ||
1160 | |||
1161 | d = __gfs2_lookup(dir, dentry, file, opened); | ||
1162 | if (IS_ERR(d)) | ||
1163 | return PTR_ERR(d); | ||
1164 | if (d == NULL) | ||
1165 | d = dentry; | ||
1166 | if (d->d_inode) { | ||
1167 | if (!(*opened & FILE_OPENED)) | ||
1168 | return finish_no_open(file, d); | ||
1169 | return 0; | ||
1170 | } | ||
1171 | |||
1172 | if (!(flags & O_CREAT)) | ||
1173 | return -ENOENT; | ||
1174 | |||
1175 | return gfs2_create_inode(dir, dentry, file, S_IFREG | mode, 0, NULL, 0, excl, opened); | ||
1111 | } | 1176 | } |
1112 | 1177 | ||
1113 | /* | 1178 | /* |
@@ -1787,6 +1852,7 @@ const struct inode_operations gfs2_dir_iops = { | |||
1787 | .removexattr = gfs2_removexattr, | 1852 | .removexattr = gfs2_removexattr, |
1788 | .fiemap = gfs2_fiemap, | 1853 | .fiemap = gfs2_fiemap, |
1789 | .get_acl = gfs2_get_acl, | 1854 | .get_acl = gfs2_get_acl, |
1855 | .atomic_open = gfs2_atomic_open, | ||
1790 | }; | 1856 | }; |
1791 | 1857 | ||
1792 | const struct inode_operations gfs2_symlink_iops = { | 1858 | const struct inode_operations gfs2_symlink_iops = { |
diff --git a/fs/gfs2/inode.h b/fs/gfs2/inode.h index c53c7477f6da..ba4d9492d422 100644 --- a/fs/gfs2/inode.h +++ b/fs/gfs2/inode.h | |||
@@ -109,6 +109,7 @@ extern int gfs2_permission(struct inode *inode, int mask); | |||
109 | extern int gfs2_setattr_simple(struct inode *inode, struct iattr *attr); | 109 | extern int gfs2_setattr_simple(struct inode *inode, struct iattr *attr); |
110 | extern struct inode *gfs2_lookup_simple(struct inode *dip, const char *name); | 110 | extern struct inode *gfs2_lookup_simple(struct inode *dip, const char *name); |
111 | extern void gfs2_dinode_out(const struct gfs2_inode *ip, void *buf); | 111 | extern void gfs2_dinode_out(const struct gfs2_inode *ip, void *buf); |
112 | extern int gfs2_open_common(struct inode *inode, struct file *file); | ||
112 | 113 | ||
113 | extern const struct inode_operations gfs2_file_iops; | 114 | extern const struct inode_operations gfs2_file_iops; |
114 | extern const struct inode_operations gfs2_dir_iops; | 115 | extern const struct inode_operations gfs2_dir_iops; |
diff --git a/fs/gfs2/log.c b/fs/gfs2/log.c index b404f4853034..610613fb65b5 100644 --- a/fs/gfs2/log.c +++ b/fs/gfs2/log.c | |||
@@ -211,15 +211,16 @@ static void gfs2_ail1_empty_one(struct gfs2_sbd *sdp, struct gfs2_trans *tr) | |||
211 | static int gfs2_ail1_empty(struct gfs2_sbd *sdp) | 211 | static int gfs2_ail1_empty(struct gfs2_sbd *sdp) |
212 | { | 212 | { |
213 | struct gfs2_trans *tr, *s; | 213 | struct gfs2_trans *tr, *s; |
214 | int oldest_tr = 1; | ||
214 | int ret; | 215 | int ret; |
215 | 216 | ||
216 | spin_lock(&sdp->sd_ail_lock); | 217 | spin_lock(&sdp->sd_ail_lock); |
217 | list_for_each_entry_safe_reverse(tr, s, &sdp->sd_ail1_list, tr_list) { | 218 | list_for_each_entry_safe_reverse(tr, s, &sdp->sd_ail1_list, tr_list) { |
218 | gfs2_ail1_empty_one(sdp, tr); | 219 | gfs2_ail1_empty_one(sdp, tr); |
219 | if (list_empty(&tr->tr_ail1_list)) | 220 | if (list_empty(&tr->tr_ail1_list) && oldest_tr) |
220 | list_move(&tr->tr_list, &sdp->sd_ail2_list); | 221 | list_move(&tr->tr_list, &sdp->sd_ail2_list); |
221 | else | 222 | else |
222 | break; | 223 | oldest_tr = 0; |
223 | } | 224 | } |
224 | ret = list_empty(&sdp->sd_ail1_list); | 225 | ret = list_empty(&sdp->sd_ail1_list); |
225 | spin_unlock(&sdp->sd_ail_lock); | 226 | spin_unlock(&sdp->sd_ail_lock); |
@@ -317,7 +318,7 @@ static void ail2_empty(struct gfs2_sbd *sdp, unsigned int new_tail) | |||
317 | 318 | ||
318 | int gfs2_log_reserve(struct gfs2_sbd *sdp, unsigned int blks) | 319 | int gfs2_log_reserve(struct gfs2_sbd *sdp, unsigned int blks) |
319 | { | 320 | { |
320 | unsigned reserved_blks = 6 * (4096 / sdp->sd_vfs->s_blocksize); | 321 | unsigned reserved_blks = 7 * (4096 / sdp->sd_vfs->s_blocksize); |
321 | unsigned wanted = blks + reserved_blks; | 322 | unsigned wanted = blks + reserved_blks; |
322 | DEFINE_WAIT(wait); | 323 | DEFINE_WAIT(wait); |
323 | int did_wait = 0; | 324 | int did_wait = 0; |
@@ -545,6 +546,76 @@ void gfs2_ordered_del_inode(struct gfs2_inode *ip) | |||
545 | spin_unlock(&sdp->sd_ordered_lock); | 546 | spin_unlock(&sdp->sd_ordered_lock); |
546 | } | 547 | } |
547 | 548 | ||
549 | void gfs2_add_revoke(struct gfs2_sbd *sdp, struct gfs2_bufdata *bd) | ||
550 | { | ||
551 | struct buffer_head *bh = bd->bd_bh; | ||
552 | struct gfs2_glock *gl = bd->bd_gl; | ||
553 | |||
554 | gfs2_remove_from_ail(bd); | ||
555 | bd->bd_bh = NULL; | ||
556 | bh->b_private = NULL; | ||
557 | bd->bd_blkno = bh->b_blocknr; | ||
558 | bd->bd_ops = &gfs2_revoke_lops; | ||
559 | sdp->sd_log_num_revoke++; | ||
560 | atomic_inc(&gl->gl_revokes); | ||
561 | set_bit(GLF_LFLUSH, &gl->gl_flags); | ||
562 | list_add(&bd->bd_list, &sdp->sd_log_le_revoke); | ||
563 | } | ||
564 | |||
565 | void gfs2_write_revokes(struct gfs2_sbd *sdp) | ||
566 | { | ||
567 | struct gfs2_trans *tr; | ||
568 | struct gfs2_bufdata *bd, *tmp; | ||
569 | int have_revokes = 0; | ||
570 | int max_revokes = (sdp->sd_sb.sb_bsize - sizeof(struct gfs2_log_descriptor)) / sizeof(u64); | ||
571 | |||
572 | gfs2_ail1_empty(sdp); | ||
573 | spin_lock(&sdp->sd_ail_lock); | ||
574 | list_for_each_entry(tr, &sdp->sd_ail1_list, tr_list) { | ||
575 | list_for_each_entry(bd, &tr->tr_ail2_list, bd_ail_st_list) { | ||
576 | if (list_empty(&bd->bd_list)) { | ||
577 | have_revokes = 1; | ||
578 | goto done; | ||
579 | } | ||
580 | } | ||
581 | } | ||
582 | done: | ||
583 | spin_unlock(&sdp->sd_ail_lock); | ||
584 | if (have_revokes == 0) | ||
585 | return; | ||
586 | while (sdp->sd_log_num_revoke > max_revokes) | ||
587 | max_revokes += (sdp->sd_sb.sb_bsize - sizeof(struct gfs2_meta_header)) / sizeof(u64); | ||
588 | max_revokes -= sdp->sd_log_num_revoke; | ||
589 | if (!sdp->sd_log_num_revoke) { | ||
590 | atomic_dec(&sdp->sd_log_blks_free); | ||
591 | /* If no blocks have been reserved, we need to also | ||
592 | * reserve a block for the header */ | ||
593 | if (!sdp->sd_log_blks_reserved) | ||
594 | atomic_dec(&sdp->sd_log_blks_free); | ||
595 | } | ||
596 | gfs2_log_lock(sdp); | ||
597 | spin_lock(&sdp->sd_ail_lock); | ||
598 | list_for_each_entry(tr, &sdp->sd_ail1_list, tr_list) { | ||
599 | list_for_each_entry_safe(bd, tmp, &tr->tr_ail2_list, bd_ail_st_list) { | ||
600 | if (max_revokes == 0) | ||
601 | goto out_of_blocks; | ||
602 | if (!list_empty(&bd->bd_list)) | ||
603 | continue; | ||
604 | gfs2_add_revoke(sdp, bd); | ||
605 | max_revokes--; | ||
606 | } | ||
607 | } | ||
608 | out_of_blocks: | ||
609 | spin_unlock(&sdp->sd_ail_lock); | ||
610 | gfs2_log_unlock(sdp); | ||
611 | |||
612 | if (!sdp->sd_log_num_revoke) { | ||
613 | atomic_inc(&sdp->sd_log_blks_free); | ||
614 | if (!sdp->sd_log_blks_reserved) | ||
615 | atomic_inc(&sdp->sd_log_blks_free); | ||
616 | } | ||
617 | } | ||
618 | |||
548 | /** | 619 | /** |
549 | * log_write_header - Get and initialize a journal header buffer | 620 | * log_write_header - Get and initialize a journal header buffer |
550 | * @sdp: The GFS2 superblock | 621 | * @sdp: The GFS2 superblock |
@@ -562,7 +633,6 @@ static void log_write_header(struct gfs2_sbd *sdp, u32 flags) | |||
562 | lh = page_address(page); | 633 | lh = page_address(page); |
563 | clear_page(lh); | 634 | clear_page(lh); |
564 | 635 | ||
565 | gfs2_ail1_empty(sdp); | ||
566 | tail = current_tail(sdp); | 636 | tail = current_tail(sdp); |
567 | 637 | ||
568 | lh->lh_header.mh_magic = cpu_to_be32(GFS2_MAGIC); | 638 | lh->lh_header.mh_magic = cpu_to_be32(GFS2_MAGIC); |
diff --git a/fs/gfs2/log.h b/fs/gfs2/log.h index 3566f35915e0..37216634f0aa 100644 --- a/fs/gfs2/log.h +++ b/fs/gfs2/log.h | |||
@@ -72,5 +72,7 @@ extern void gfs2_ail1_flush(struct gfs2_sbd *sdp, struct writeback_control *wbc) | |||
72 | extern void gfs2_log_shutdown(struct gfs2_sbd *sdp); | 72 | extern void gfs2_log_shutdown(struct gfs2_sbd *sdp); |
73 | extern void gfs2_meta_syncfs(struct gfs2_sbd *sdp); | 73 | extern void gfs2_meta_syncfs(struct gfs2_sbd *sdp); |
74 | extern int gfs2_logd(void *data); | 74 | extern int gfs2_logd(void *data); |
75 | extern void gfs2_add_revoke(struct gfs2_sbd *sdp, struct gfs2_bufdata *bd); | ||
76 | extern void gfs2_write_revokes(struct gfs2_sbd *sdp); | ||
75 | 77 | ||
76 | #endif /* __LOG_DOT_H__ */ | 78 | #endif /* __LOG_DOT_H__ */ |
diff --git a/fs/gfs2/lops.c b/fs/gfs2/lops.c index 6c33d7b6e0c4..17c5b5d7dc88 100644 --- a/fs/gfs2/lops.c +++ b/fs/gfs2/lops.c | |||
@@ -16,6 +16,7 @@ | |||
16 | #include <linux/gfs2_ondisk.h> | 16 | #include <linux/gfs2_ondisk.h> |
17 | #include <linux/bio.h> | 17 | #include <linux/bio.h> |
18 | #include <linux/fs.h> | 18 | #include <linux/fs.h> |
19 | #include <linux/list_sort.h> | ||
19 | 20 | ||
20 | #include "gfs2.h" | 21 | #include "gfs2.h" |
21 | #include "incore.h" | 22 | #include "incore.h" |
@@ -401,6 +402,20 @@ static void gfs2_check_magic(struct buffer_head *bh) | |||
401 | kunmap_atomic(kaddr); | 402 | kunmap_atomic(kaddr); |
402 | } | 403 | } |
403 | 404 | ||
405 | static int blocknr_cmp(void *priv, struct list_head *a, struct list_head *b) | ||
406 | { | ||
407 | struct gfs2_bufdata *bda, *bdb; | ||
408 | |||
409 | bda = list_entry(a, struct gfs2_bufdata, bd_list); | ||
410 | bdb = list_entry(b, struct gfs2_bufdata, bd_list); | ||
411 | |||
412 | if (bda->bd_bh->b_blocknr < bdb->bd_bh->b_blocknr) | ||
413 | return -1; | ||
414 | if (bda->bd_bh->b_blocknr > bdb->bd_bh->b_blocknr) | ||
415 | return 1; | ||
416 | return 0; | ||
417 | } | ||
418 | |||
404 | static void gfs2_before_commit(struct gfs2_sbd *sdp, unsigned int limit, | 419 | static void gfs2_before_commit(struct gfs2_sbd *sdp, unsigned int limit, |
405 | unsigned int total, struct list_head *blist, | 420 | unsigned int total, struct list_head *blist, |
406 | bool is_databuf) | 421 | bool is_databuf) |
@@ -413,6 +428,7 @@ static void gfs2_before_commit(struct gfs2_sbd *sdp, unsigned int limit, | |||
413 | __be64 *ptr; | 428 | __be64 *ptr; |
414 | 429 | ||
415 | gfs2_log_lock(sdp); | 430 | gfs2_log_lock(sdp); |
431 | list_sort(NULL, blist, blocknr_cmp); | ||
416 | bd1 = bd2 = list_prepare_entry(bd1, blist, bd_list); | 432 | bd1 = bd2 = list_prepare_entry(bd1, blist, bd_list); |
417 | while(total) { | 433 | while(total) { |
418 | num = total; | 434 | num = total; |
@@ -590,6 +606,7 @@ static void revoke_lo_before_commit(struct gfs2_sbd *sdp) | |||
590 | struct page *page; | 606 | struct page *page; |
591 | unsigned int length; | 607 | unsigned int length; |
592 | 608 | ||
609 | gfs2_write_revokes(sdp); | ||
593 | if (!sdp->sd_log_num_revoke) | 610 | if (!sdp->sd_log_num_revoke) |
594 | return; | 611 | return; |
595 | 612 | ||
@@ -836,10 +853,6 @@ const struct gfs2_log_operations gfs2_revoke_lops = { | |||
836 | .lo_name = "revoke", | 853 | .lo_name = "revoke", |
837 | }; | 854 | }; |
838 | 855 | ||
839 | const struct gfs2_log_operations gfs2_rg_lops = { | ||
840 | .lo_name = "rg", | ||
841 | }; | ||
842 | |||
843 | const struct gfs2_log_operations gfs2_databuf_lops = { | 856 | const struct gfs2_log_operations gfs2_databuf_lops = { |
844 | .lo_before_commit = databuf_lo_before_commit, | 857 | .lo_before_commit = databuf_lo_before_commit, |
845 | .lo_after_commit = databuf_lo_after_commit, | 858 | .lo_after_commit = databuf_lo_after_commit, |
@@ -851,7 +864,6 @@ const struct gfs2_log_operations gfs2_databuf_lops = { | |||
851 | const struct gfs2_log_operations *gfs2_log_ops[] = { | 864 | const struct gfs2_log_operations *gfs2_log_ops[] = { |
852 | &gfs2_databuf_lops, | 865 | &gfs2_databuf_lops, |
853 | &gfs2_buf_lops, | 866 | &gfs2_buf_lops, |
854 | &gfs2_rg_lops, | ||
855 | &gfs2_revoke_lops, | 867 | &gfs2_revoke_lops, |
856 | NULL, | 868 | NULL, |
857 | }; | 869 | }; |
diff --git a/fs/gfs2/lops.h b/fs/gfs2/lops.h index 87e062e05c92..9ca2e6438419 100644 --- a/fs/gfs2/lops.h +++ b/fs/gfs2/lops.h | |||
@@ -23,7 +23,6 @@ | |||
23 | extern const struct gfs2_log_operations gfs2_glock_lops; | 23 | extern const struct gfs2_log_operations gfs2_glock_lops; |
24 | extern const struct gfs2_log_operations gfs2_buf_lops; | 24 | extern const struct gfs2_log_operations gfs2_buf_lops; |
25 | extern const struct gfs2_log_operations gfs2_revoke_lops; | 25 | extern const struct gfs2_log_operations gfs2_revoke_lops; |
26 | extern const struct gfs2_log_operations gfs2_rg_lops; | ||
27 | extern const struct gfs2_log_operations gfs2_databuf_lops; | 26 | extern const struct gfs2_log_operations gfs2_databuf_lops; |
28 | 27 | ||
29 | extern const struct gfs2_log_operations *gfs2_log_ops[]; | 28 | extern const struct gfs2_log_operations *gfs2_log_ops[]; |
diff --git a/fs/gfs2/meta_io.c b/fs/gfs2/meta_io.c index 1a89afb68472..0da390686c08 100644 --- a/fs/gfs2/meta_io.c +++ b/fs/gfs2/meta_io.c | |||
@@ -296,10 +296,6 @@ void gfs2_remove_from_journal(struct buffer_head *bh, struct gfs2_trans *tr, int | |||
296 | if (bd) { | 296 | if (bd) { |
297 | spin_lock(&sdp->sd_ail_lock); | 297 | spin_lock(&sdp->sd_ail_lock); |
298 | if (bd->bd_tr) { | 298 | if (bd->bd_tr) { |
299 | gfs2_remove_from_ail(bd); | ||
300 | bh->b_private = NULL; | ||
301 | bd->bd_bh = NULL; | ||
302 | bd->bd_blkno = bh->b_blocknr; | ||
303 | gfs2_trans_add_revoke(sdp, bd); | 299 | gfs2_trans_add_revoke(sdp, bd); |
304 | } | 300 | } |
305 | spin_unlock(&sdp->sd_ail_lock); | 301 | spin_unlock(&sdp->sd_ail_lock); |
diff --git a/fs/gfs2/ops_fstype.c b/fs/gfs2/ops_fstype.c index 60ede2a0f43f..0262c190b6f9 100644 --- a/fs/gfs2/ops_fstype.c +++ b/fs/gfs2/ops_fstype.c | |||
@@ -916,16 +916,16 @@ static int init_threads(struct gfs2_sbd *sdp, int undo) | |||
916 | goto fail_quotad; | 916 | goto fail_quotad; |
917 | 917 | ||
918 | p = kthread_run(gfs2_logd, sdp, "gfs2_logd"); | 918 | p = kthread_run(gfs2_logd, sdp, "gfs2_logd"); |
919 | error = IS_ERR(p); | 919 | if (IS_ERR(p)) { |
920 | if (error) { | 920 | error = PTR_ERR(p); |
921 | fs_err(sdp, "can't start logd thread: %d\n", error); | 921 | fs_err(sdp, "can't start logd thread: %d\n", error); |
922 | return error; | 922 | return error; |
923 | } | 923 | } |
924 | sdp->sd_logd_process = p; | 924 | sdp->sd_logd_process = p; |
925 | 925 | ||
926 | p = kthread_run(gfs2_quotad, sdp, "gfs2_quotad"); | 926 | p = kthread_run(gfs2_quotad, sdp, "gfs2_quotad"); |
927 | error = IS_ERR(p); | 927 | if (IS_ERR(p)) { |
928 | if (error) { | 928 | error = PTR_ERR(p); |
929 | fs_err(sdp, "can't start quotad thread: %d\n", error); | 929 | fs_err(sdp, "can't start quotad thread: %d\n", error); |
930 | goto fail; | 930 | goto fail; |
931 | } | 931 | } |
diff --git a/fs/gfs2/quota.c b/fs/gfs2/quota.c index c253b13722e8..3768c2f40e43 100644 --- a/fs/gfs2/quota.c +++ b/fs/gfs2/quota.c | |||
@@ -1154,11 +1154,6 @@ int gfs2_quota_sync(struct super_block *sb, int type) | |||
1154 | return error; | 1154 | return error; |
1155 | } | 1155 | } |
1156 | 1156 | ||
1157 | static int gfs2_quota_sync_timeo(struct super_block *sb, int type) | ||
1158 | { | ||
1159 | return gfs2_quota_sync(sb, type); | ||
1160 | } | ||
1161 | |||
1162 | int gfs2_quota_refresh(struct gfs2_sbd *sdp, struct kqid qid) | 1157 | int gfs2_quota_refresh(struct gfs2_sbd *sdp, struct kqid qid) |
1163 | { | 1158 | { |
1164 | struct gfs2_quota_data *qd; | 1159 | struct gfs2_quota_data *qd; |
@@ -1414,7 +1409,7 @@ int gfs2_quotad(void *data) | |||
1414 | &tune->gt_statfs_quantum); | 1409 | &tune->gt_statfs_quantum); |
1415 | 1410 | ||
1416 | /* Update quota file */ | 1411 | /* Update quota file */ |
1417 | quotad_check_timeo(sdp, "sync", gfs2_quota_sync_timeo, t, | 1412 | quotad_check_timeo(sdp, "sync", gfs2_quota_sync, t, |
1418 | "ad_timeo, &tune->gt_quota_quantum); | 1413 | "ad_timeo, &tune->gt_quota_quantum); |
1419 | 1414 | ||
1420 | /* Check for & recover partially truncated inodes */ | 1415 | /* Check for & recover partially truncated inodes */ |
diff --git a/fs/gfs2/rgrp.c b/fs/gfs2/rgrp.c index 9809156e3d04..69317435faa7 100644 --- a/fs/gfs2/rgrp.c +++ b/fs/gfs2/rgrp.c | |||
@@ -1288,13 +1288,15 @@ int gfs2_fitrim(struct file *filp, void __user *argp) | |||
1288 | minlen = max_t(u64, r.minlen, | 1288 | minlen = max_t(u64, r.minlen, |
1289 | q->limits.discard_granularity) >> bs_shift; | 1289 | q->limits.discard_granularity) >> bs_shift; |
1290 | 1290 | ||
1291 | if (end <= start || minlen > sdp->sd_max_rg_data) | ||
1292 | return -EINVAL; | ||
1293 | |||
1291 | rgd = gfs2_blk2rgrpd(sdp, start, 0); | 1294 | rgd = gfs2_blk2rgrpd(sdp, start, 0); |
1292 | rgd_end = gfs2_blk2rgrpd(sdp, end - 1, 0); | 1295 | rgd_end = gfs2_blk2rgrpd(sdp, end, 0); |
1293 | 1296 | ||
1294 | if (end <= start || | 1297 | if ((gfs2_rgrpd_get_first(sdp) == gfs2_rgrpd_get_next(rgd_end)) |
1295 | minlen > sdp->sd_max_rg_data || | 1298 | && (start > rgd_end->rd_data0 + rgd_end->rd_data)) |
1296 | start > rgd_end->rd_data0 + rgd_end->rd_data) | 1299 | return -EINVAL; /* start is beyond the end of the fs */ |
1297 | return -EINVAL; | ||
1298 | 1300 | ||
1299 | while (1) { | 1301 | while (1) { |
1300 | 1302 | ||
@@ -1336,7 +1338,7 @@ int gfs2_fitrim(struct file *filp, void __user *argp) | |||
1336 | } | 1338 | } |
1337 | 1339 | ||
1338 | out: | 1340 | out: |
1339 | r.len = trimmed << 9; | 1341 | r.len = trimmed << bs_shift; |
1340 | if (copy_to_user(argp, &r, sizeof(r))) | 1342 | if (copy_to_user(argp, &r, sizeof(r))) |
1341 | return -EFAULT; | 1343 | return -EFAULT; |
1342 | 1344 | ||
diff --git a/fs/gfs2/trans.c b/fs/gfs2/trans.c index 7374907742a8..2b20d7046bf3 100644 --- a/fs/gfs2/trans.c +++ b/fs/gfs2/trans.c | |||
@@ -270,19 +270,12 @@ void gfs2_trans_add_meta(struct gfs2_glock *gl, struct buffer_head *bh) | |||
270 | 270 | ||
271 | void gfs2_trans_add_revoke(struct gfs2_sbd *sdp, struct gfs2_bufdata *bd) | 271 | void gfs2_trans_add_revoke(struct gfs2_sbd *sdp, struct gfs2_bufdata *bd) |
272 | { | 272 | { |
273 | struct gfs2_glock *gl = bd->bd_gl; | ||
274 | struct gfs2_trans *tr = current->journal_info; | 273 | struct gfs2_trans *tr = current->journal_info; |
275 | 274 | ||
276 | BUG_ON(!list_empty(&bd->bd_list)); | 275 | BUG_ON(!list_empty(&bd->bd_list)); |
277 | BUG_ON(!list_empty(&bd->bd_ail_st_list)); | 276 | gfs2_add_revoke(sdp, bd); |
278 | BUG_ON(!list_empty(&bd->bd_ail_gl_list)); | ||
279 | bd->bd_ops = &gfs2_revoke_lops; | ||
280 | tr->tr_touched = 1; | 277 | tr->tr_touched = 1; |
281 | tr->tr_num_revoke++; | 278 | tr->tr_num_revoke++; |
282 | sdp->sd_log_num_revoke++; | ||
283 | atomic_inc(&gl->gl_revokes); | ||
284 | set_bit(GLF_LFLUSH, &gl->gl_flags); | ||
285 | list_add(&bd->bd_list, &sdp->sd_log_le_revoke); | ||
286 | } | 279 | } |
287 | 280 | ||
288 | void gfs2_trans_add_unrevoke(struct gfs2_sbd *sdp, u64 blkno, unsigned int len) | 281 | void gfs2_trans_add_unrevoke(struct gfs2_sbd *sdp, u64 blkno, unsigned int len) |
diff --git a/fs/hfs/dir.c b/fs/hfs/dir.c index e0101b6fb0d7..145566851e7a 100644 --- a/fs/hfs/dir.c +++ b/fs/hfs/dir.c | |||
@@ -51,9 +51,9 @@ done: | |||
51 | /* | 51 | /* |
52 | * hfs_readdir | 52 | * hfs_readdir |
53 | */ | 53 | */ |
54 | static int hfs_readdir(struct file *filp, void *dirent, filldir_t filldir) | 54 | static int hfs_readdir(struct file *file, struct dir_context *ctx) |
55 | { | 55 | { |
56 | struct inode *inode = file_inode(filp); | 56 | struct inode *inode = file_inode(file); |
57 | struct super_block *sb = inode->i_sb; | 57 | struct super_block *sb = inode->i_sb; |
58 | int len, err; | 58 | int len, err; |
59 | char strbuf[HFS_MAX_NAMELEN]; | 59 | char strbuf[HFS_MAX_NAMELEN]; |
@@ -62,7 +62,7 @@ static int hfs_readdir(struct file *filp, void *dirent, filldir_t filldir) | |||
62 | struct hfs_readdir_data *rd; | 62 | struct hfs_readdir_data *rd; |
63 | u16 type; | 63 | u16 type; |
64 | 64 | ||
65 | if (filp->f_pos >= inode->i_size) | 65 | if (ctx->pos >= inode->i_size) |
66 | return 0; | 66 | return 0; |
67 | 67 | ||
68 | err = hfs_find_init(HFS_SB(sb)->cat_tree, &fd); | 68 | err = hfs_find_init(HFS_SB(sb)->cat_tree, &fd); |
@@ -73,14 +73,13 @@ static int hfs_readdir(struct file *filp, void *dirent, filldir_t filldir) | |||
73 | if (err) | 73 | if (err) |
74 | goto out; | 74 | goto out; |
75 | 75 | ||
76 | switch ((u32)filp->f_pos) { | 76 | if (ctx->pos == 0) { |
77 | case 0: | ||
78 | /* This is completely artificial... */ | 77 | /* This is completely artificial... */ |
79 | if (filldir(dirent, ".", 1, 0, inode->i_ino, DT_DIR)) | 78 | if (!dir_emit_dot(file, ctx)) |
80 | goto out; | 79 | goto out; |
81 | filp->f_pos++; | 80 | ctx->pos = 1; |
82 | /* fall through */ | 81 | } |
83 | case 1: | 82 | if (ctx->pos == 1) { |
84 | if (fd.entrylength > sizeof(entry) || fd.entrylength < 0) { | 83 | if (fd.entrylength > sizeof(entry) || fd.entrylength < 0) { |
85 | err = -EIO; | 84 | err = -EIO; |
86 | goto out; | 85 | goto out; |
@@ -97,18 +96,16 @@ static int hfs_readdir(struct file *filp, void *dirent, filldir_t filldir) | |||
97 | // err = -EIO; | 96 | // err = -EIO; |
98 | // goto out; | 97 | // goto out; |
99 | //} | 98 | //} |
100 | if (filldir(dirent, "..", 2, 1, | 99 | if (!dir_emit(ctx, "..", 2, |
101 | be32_to_cpu(entry.thread.ParID), DT_DIR)) | 100 | be32_to_cpu(entry.thread.ParID), DT_DIR)) |
102 | goto out; | 101 | goto out; |
103 | filp->f_pos++; | 102 | ctx->pos = 2; |
104 | /* fall through */ | ||
105 | default: | ||
106 | if (filp->f_pos >= inode->i_size) | ||
107 | goto out; | ||
108 | err = hfs_brec_goto(&fd, filp->f_pos - 1); | ||
109 | if (err) | ||
110 | goto out; | ||
111 | } | 103 | } |
104 | if (ctx->pos >= inode->i_size) | ||
105 | goto out; | ||
106 | err = hfs_brec_goto(&fd, ctx->pos - 1); | ||
107 | if (err) | ||
108 | goto out; | ||
112 | 109 | ||
113 | for (;;) { | 110 | for (;;) { |
114 | if (be32_to_cpu(fd.key->cat.ParID) != inode->i_ino) { | 111 | if (be32_to_cpu(fd.key->cat.ParID) != inode->i_ino) { |
@@ -131,7 +128,7 @@ static int hfs_readdir(struct file *filp, void *dirent, filldir_t filldir) | |||
131 | err = -EIO; | 128 | err = -EIO; |
132 | goto out; | 129 | goto out; |
133 | } | 130 | } |
134 | if (filldir(dirent, strbuf, len, filp->f_pos, | 131 | if (!dir_emit(ctx, strbuf, len, |
135 | be32_to_cpu(entry.dir.DirID), DT_DIR)) | 132 | be32_to_cpu(entry.dir.DirID), DT_DIR)) |
136 | break; | 133 | break; |
137 | } else if (type == HFS_CDR_FIL) { | 134 | } else if (type == HFS_CDR_FIL) { |
@@ -140,7 +137,7 @@ static int hfs_readdir(struct file *filp, void *dirent, filldir_t filldir) | |||
140 | err = -EIO; | 137 | err = -EIO; |
141 | goto out; | 138 | goto out; |
142 | } | 139 | } |
143 | if (filldir(dirent, strbuf, len, filp->f_pos, | 140 | if (!dir_emit(ctx, strbuf, len, |
144 | be32_to_cpu(entry.file.FlNum), DT_REG)) | 141 | be32_to_cpu(entry.file.FlNum), DT_REG)) |
145 | break; | 142 | break; |
146 | } else { | 143 | } else { |
@@ -148,22 +145,22 @@ static int hfs_readdir(struct file *filp, void *dirent, filldir_t filldir) | |||
148 | err = -EIO; | 145 | err = -EIO; |
149 | goto out; | 146 | goto out; |
150 | } | 147 | } |
151 | filp->f_pos++; | 148 | ctx->pos++; |
152 | if (filp->f_pos >= inode->i_size) | 149 | if (ctx->pos >= inode->i_size) |
153 | goto out; | 150 | goto out; |
154 | err = hfs_brec_goto(&fd, 1); | 151 | err = hfs_brec_goto(&fd, 1); |
155 | if (err) | 152 | if (err) |
156 | goto out; | 153 | goto out; |
157 | } | 154 | } |
158 | rd = filp->private_data; | 155 | rd = file->private_data; |
159 | if (!rd) { | 156 | if (!rd) { |
160 | rd = kmalloc(sizeof(struct hfs_readdir_data), GFP_KERNEL); | 157 | rd = kmalloc(sizeof(struct hfs_readdir_data), GFP_KERNEL); |
161 | if (!rd) { | 158 | if (!rd) { |
162 | err = -ENOMEM; | 159 | err = -ENOMEM; |
163 | goto out; | 160 | goto out; |
164 | } | 161 | } |
165 | filp->private_data = rd; | 162 | file->private_data = rd; |
166 | rd->file = filp; | 163 | rd->file = file; |
167 | list_add(&rd->list, &HFS_I(inode)->open_dir_list); | 164 | list_add(&rd->list, &HFS_I(inode)->open_dir_list); |
168 | } | 165 | } |
169 | memcpy(&rd->key, &fd.key, sizeof(struct hfs_cat_key)); | 166 | memcpy(&rd->key, &fd.key, sizeof(struct hfs_cat_key)); |
@@ -306,7 +303,7 @@ static int hfs_rename(struct inode *old_dir, struct dentry *old_dentry, | |||
306 | 303 | ||
307 | const struct file_operations hfs_dir_operations = { | 304 | const struct file_operations hfs_dir_operations = { |
308 | .read = generic_read_dir, | 305 | .read = generic_read_dir, |
309 | .readdir = hfs_readdir, | 306 | .iterate = hfs_readdir, |
310 | .llseek = generic_file_llseek, | 307 | .llseek = generic_file_llseek, |
311 | .release = hfs_dir_release, | 308 | .release = hfs_dir_release, |
312 | }; | 309 | }; |
diff --git a/fs/hfs/hfs_fs.h b/fs/hfs/hfs_fs.h index a73b11839a41..0524cda47a6e 100644 --- a/fs/hfs/hfs_fs.h +++ b/fs/hfs/hfs_fs.h | |||
@@ -229,13 +229,10 @@ extern int hfs_part_find(struct super_block *, sector_t *, sector_t *); | |||
229 | /* string.c */ | 229 | /* string.c */ |
230 | extern const struct dentry_operations hfs_dentry_operations; | 230 | extern const struct dentry_operations hfs_dentry_operations; |
231 | 231 | ||
232 | extern int hfs_hash_dentry(const struct dentry *, const struct inode *, | 232 | extern int hfs_hash_dentry(const struct dentry *, struct qstr *); |
233 | struct qstr *); | ||
234 | extern int hfs_strcmp(const unsigned char *, unsigned int, | 233 | extern int hfs_strcmp(const unsigned char *, unsigned int, |
235 | const unsigned char *, unsigned int); | 234 | const unsigned char *, unsigned int); |
236 | extern int hfs_compare_dentry(const struct dentry *parent, | 235 | extern int hfs_compare_dentry(const struct dentry *parent, const struct dentry *dentry, |
237 | const struct inode *pinode, | ||
238 | const struct dentry *dentry, const struct inode *inode, | ||
239 | unsigned int len, const char *str, const struct qstr *name); | 236 | unsigned int len, const char *str, const struct qstr *name); |
240 | 237 | ||
241 | /* trans.c */ | 238 | /* trans.c */ |
diff --git a/fs/hfs/string.c b/fs/hfs/string.c index 495a976a3cc9..85b610c3909f 100644 --- a/fs/hfs/string.c +++ b/fs/hfs/string.c | |||
@@ -51,8 +51,7 @@ static unsigned char caseorder[256] = { | |||
51 | /* | 51 | /* |
52 | * Hash a string to an integer in a case-independent way | 52 | * Hash a string to an integer in a case-independent way |
53 | */ | 53 | */ |
54 | int hfs_hash_dentry(const struct dentry *dentry, const struct inode *inode, | 54 | int hfs_hash_dentry(const struct dentry *dentry, struct qstr *this) |
55 | struct qstr *this) | ||
56 | { | 55 | { |
57 | const unsigned char *name = this->name; | 56 | const unsigned char *name = this->name; |
58 | unsigned int hash, len = this->len; | 57 | unsigned int hash, len = this->len; |
@@ -93,8 +92,7 @@ int hfs_strcmp(const unsigned char *s1, unsigned int len1, | |||
93 | * Test for equality of two strings in the HFS filename character ordering. | 92 | * Test for equality of two strings in the HFS filename character ordering. |
94 | * return 1 on failure and 0 on success | 93 | * return 1 on failure and 0 on success |
95 | */ | 94 | */ |
96 | int hfs_compare_dentry(const struct dentry *parent, const struct inode *pinode, | 95 | int hfs_compare_dentry(const struct dentry *parent, const struct dentry *dentry, |
97 | const struct dentry *dentry, const struct inode *inode, | ||
98 | unsigned int len, const char *str, const struct qstr *name) | 96 | unsigned int len, const char *str, const struct qstr *name) |
99 | { | 97 | { |
100 | const unsigned char *n1, *n2; | 98 | const unsigned char *n1, *n2; |
diff --git a/fs/hfsplus/dir.c b/fs/hfsplus/dir.c index a37ac934732f..d8ce4bd17fc5 100644 --- a/fs/hfsplus/dir.c +++ b/fs/hfsplus/dir.c | |||
@@ -121,9 +121,9 @@ fail: | |||
121 | return ERR_PTR(err); | 121 | return ERR_PTR(err); |
122 | } | 122 | } |
123 | 123 | ||
124 | static int hfsplus_readdir(struct file *filp, void *dirent, filldir_t filldir) | 124 | static int hfsplus_readdir(struct file *file, struct dir_context *ctx) |
125 | { | 125 | { |
126 | struct inode *inode = file_inode(filp); | 126 | struct inode *inode = file_inode(file); |
127 | struct super_block *sb = inode->i_sb; | 127 | struct super_block *sb = inode->i_sb; |
128 | int len, err; | 128 | int len, err; |
129 | char strbuf[HFSPLUS_MAX_STRLEN + 1]; | 129 | char strbuf[HFSPLUS_MAX_STRLEN + 1]; |
@@ -132,7 +132,7 @@ static int hfsplus_readdir(struct file *filp, void *dirent, filldir_t filldir) | |||
132 | struct hfsplus_readdir_data *rd; | 132 | struct hfsplus_readdir_data *rd; |
133 | u16 type; | 133 | u16 type; |
134 | 134 | ||
135 | if (filp->f_pos >= inode->i_size) | 135 | if (file->f_pos >= inode->i_size) |
136 | return 0; | 136 | return 0; |
137 | 137 | ||
138 | err = hfs_find_init(HFSPLUS_SB(sb)->cat_tree, &fd); | 138 | err = hfs_find_init(HFSPLUS_SB(sb)->cat_tree, &fd); |
@@ -143,14 +143,13 @@ static int hfsplus_readdir(struct file *filp, void *dirent, filldir_t filldir) | |||
143 | if (err) | 143 | if (err) |
144 | goto out; | 144 | goto out; |
145 | 145 | ||
146 | switch ((u32)filp->f_pos) { | 146 | if (ctx->pos == 0) { |
147 | case 0: | ||
148 | /* This is completely artificial... */ | 147 | /* This is completely artificial... */ |
149 | if (filldir(dirent, ".", 1, 0, inode->i_ino, DT_DIR)) | 148 | if (!dir_emit_dot(file, ctx)) |
150 | goto out; | 149 | goto out; |
151 | filp->f_pos++; | 150 | ctx->pos = 1; |
152 | /* fall through */ | 151 | } |
153 | case 1: | 152 | if (ctx->pos == 1) { |
154 | if (fd.entrylength > sizeof(entry) || fd.entrylength < 0) { | 153 | if (fd.entrylength > sizeof(entry) || fd.entrylength < 0) { |
155 | err = -EIO; | 154 | err = -EIO; |
156 | goto out; | 155 | goto out; |
@@ -168,19 +167,16 @@ static int hfsplus_readdir(struct file *filp, void *dirent, filldir_t filldir) | |||
168 | err = -EIO; | 167 | err = -EIO; |
169 | goto out; | 168 | goto out; |
170 | } | 169 | } |
171 | if (filldir(dirent, "..", 2, 1, | 170 | if (!dir_emit(ctx, "..", 2, |
172 | be32_to_cpu(entry.thread.parentID), DT_DIR)) | 171 | be32_to_cpu(entry.thread.parentID), DT_DIR)) |
173 | goto out; | 172 | goto out; |
174 | filp->f_pos++; | 173 | ctx->pos = 2; |
175 | /* fall through */ | ||
176 | default: | ||
177 | if (filp->f_pos >= inode->i_size) | ||
178 | goto out; | ||
179 | err = hfs_brec_goto(&fd, filp->f_pos - 1); | ||
180 | if (err) | ||
181 | goto out; | ||
182 | } | 174 | } |
183 | 175 | if (ctx->pos >= inode->i_size) | |
176 | goto out; | ||
177 | err = hfs_brec_goto(&fd, ctx->pos - 1); | ||
178 | if (err) | ||
179 | goto out; | ||
184 | for (;;) { | 180 | for (;;) { |
185 | if (be32_to_cpu(fd.key->cat.parent) != inode->i_ino) { | 181 | if (be32_to_cpu(fd.key->cat.parent) != inode->i_ino) { |
186 | pr_err("walked past end of dir\n"); | 182 | pr_err("walked past end of dir\n"); |
@@ -211,7 +207,7 @@ static int hfsplus_readdir(struct file *filp, void *dirent, filldir_t filldir) | |||
211 | HFSPLUS_SB(sb)->hidden_dir->i_ino == | 207 | HFSPLUS_SB(sb)->hidden_dir->i_ino == |
212 | be32_to_cpu(entry.folder.id)) | 208 | be32_to_cpu(entry.folder.id)) |
213 | goto next; | 209 | goto next; |
214 | if (filldir(dirent, strbuf, len, filp->f_pos, | 210 | if (!dir_emit(ctx, strbuf, len, |
215 | be32_to_cpu(entry.folder.id), DT_DIR)) | 211 | be32_to_cpu(entry.folder.id), DT_DIR)) |
216 | break; | 212 | break; |
217 | } else if (type == HFSPLUS_FILE) { | 213 | } else if (type == HFSPLUS_FILE) { |
@@ -220,7 +216,7 @@ static int hfsplus_readdir(struct file *filp, void *dirent, filldir_t filldir) | |||
220 | err = -EIO; | 216 | err = -EIO; |
221 | goto out; | 217 | goto out; |
222 | } | 218 | } |
223 | if (filldir(dirent, strbuf, len, filp->f_pos, | 219 | if (!dir_emit(ctx, strbuf, len, |
224 | be32_to_cpu(entry.file.id), DT_REG)) | 220 | be32_to_cpu(entry.file.id), DT_REG)) |
225 | break; | 221 | break; |
226 | } else { | 222 | } else { |
@@ -229,22 +225,22 @@ static int hfsplus_readdir(struct file *filp, void *dirent, filldir_t filldir) | |||
229 | goto out; | 225 | goto out; |
230 | } | 226 | } |
231 | next: | 227 | next: |
232 | filp->f_pos++; | 228 | ctx->pos++; |
233 | if (filp->f_pos >= inode->i_size) | 229 | if (ctx->pos >= inode->i_size) |
234 | goto out; | 230 | goto out; |
235 | err = hfs_brec_goto(&fd, 1); | 231 | err = hfs_brec_goto(&fd, 1); |
236 | if (err) | 232 | if (err) |
237 | goto out; | 233 | goto out; |
238 | } | 234 | } |
239 | rd = filp->private_data; | 235 | rd = file->private_data; |
240 | if (!rd) { | 236 | if (!rd) { |
241 | rd = kmalloc(sizeof(struct hfsplus_readdir_data), GFP_KERNEL); | 237 | rd = kmalloc(sizeof(struct hfsplus_readdir_data), GFP_KERNEL); |
242 | if (!rd) { | 238 | if (!rd) { |
243 | err = -ENOMEM; | 239 | err = -ENOMEM; |
244 | goto out; | 240 | goto out; |
245 | } | 241 | } |
246 | filp->private_data = rd; | 242 | file->private_data = rd; |
247 | rd->file = filp; | 243 | rd->file = file; |
248 | list_add(&rd->list, &HFSPLUS_I(inode)->open_dir_list); | 244 | list_add(&rd->list, &HFSPLUS_I(inode)->open_dir_list); |
249 | } | 245 | } |
250 | memcpy(&rd->key, fd.key, sizeof(struct hfsplus_cat_key)); | 246 | memcpy(&rd->key, fd.key, sizeof(struct hfsplus_cat_key)); |
@@ -538,7 +534,7 @@ const struct inode_operations hfsplus_dir_inode_operations = { | |||
538 | const struct file_operations hfsplus_dir_operations = { | 534 | const struct file_operations hfsplus_dir_operations = { |
539 | .fsync = hfsplus_file_fsync, | 535 | .fsync = hfsplus_file_fsync, |
540 | .read = generic_read_dir, | 536 | .read = generic_read_dir, |
541 | .readdir = hfsplus_readdir, | 537 | .iterate = hfsplus_readdir, |
542 | .unlocked_ioctl = hfsplus_ioctl, | 538 | .unlocked_ioctl = hfsplus_ioctl, |
543 | .llseek = generic_file_llseek, | 539 | .llseek = generic_file_llseek, |
544 | .release = hfsplus_dir_release, | 540 | .release = hfsplus_dir_release, |
diff --git a/fs/hfsplus/hfsplus_fs.h b/fs/hfsplus/hfsplus_fs.h index 60b0a3388b26..ede79317cfb8 100644 --- a/fs/hfsplus/hfsplus_fs.h +++ b/fs/hfsplus/hfsplus_fs.h | |||
@@ -495,11 +495,8 @@ int hfsplus_uni2asc(struct super_block *, | |||
495 | const struct hfsplus_unistr *, char *, int *); | 495 | const struct hfsplus_unistr *, char *, int *); |
496 | int hfsplus_asc2uni(struct super_block *, | 496 | int hfsplus_asc2uni(struct super_block *, |
497 | struct hfsplus_unistr *, int, const char *, int); | 497 | struct hfsplus_unistr *, int, const char *, int); |
498 | int hfsplus_hash_dentry(const struct dentry *dentry, | 498 | int hfsplus_hash_dentry(const struct dentry *dentry, struct qstr *str); |
499 | const struct inode *inode, struct qstr *str); | 499 | int hfsplus_compare_dentry(const struct dentry *parent, const struct dentry *dentry, |
500 | int hfsplus_compare_dentry(const struct dentry *parent, | ||
501 | const struct inode *pinode, | ||
502 | const struct dentry *dentry, const struct inode *inode, | ||
503 | unsigned int len, const char *str, const struct qstr *name); | 500 | unsigned int len, const char *str, const struct qstr *name); |
504 | 501 | ||
505 | /* wrapper.c */ | 502 | /* wrapper.c */ |
diff --git a/fs/hfsplus/unicode.c b/fs/hfsplus/unicode.c index 2c2e47dcfdd8..e8ef121a4d8b 100644 --- a/fs/hfsplus/unicode.c +++ b/fs/hfsplus/unicode.c | |||
@@ -334,8 +334,7 @@ int hfsplus_asc2uni(struct super_block *sb, | |||
334 | * Composed unicode characters are decomposed and case-folding is performed | 334 | * Composed unicode characters are decomposed and case-folding is performed |
335 | * if the appropriate bits are (un)set on the superblock. | 335 | * if the appropriate bits are (un)set on the superblock. |
336 | */ | 336 | */ |
337 | int hfsplus_hash_dentry(const struct dentry *dentry, const struct inode *inode, | 337 | int hfsplus_hash_dentry(const struct dentry *dentry, struct qstr *str) |
338 | struct qstr *str) | ||
339 | { | 338 | { |
340 | struct super_block *sb = dentry->d_sb; | 339 | struct super_block *sb = dentry->d_sb; |
341 | const char *astr; | 340 | const char *astr; |
@@ -386,9 +385,7 @@ int hfsplus_hash_dentry(const struct dentry *dentry, const struct inode *inode, | |||
386 | * Composed unicode characters are decomposed and case-folding is performed | 385 | * Composed unicode characters are decomposed and case-folding is performed |
387 | * if the appropriate bits are (un)set on the superblock. | 386 | * if the appropriate bits are (un)set on the superblock. |
388 | */ | 387 | */ |
389 | int hfsplus_compare_dentry(const struct dentry *parent, | 388 | int hfsplus_compare_dentry(const struct dentry *parent, const struct dentry *dentry, |
390 | const struct inode *pinode, | ||
391 | const struct dentry *dentry, const struct inode *inode, | ||
392 | unsigned int len, const char *str, const struct qstr *name) | 389 | unsigned int len, const char *str, const struct qstr *name) |
393 | { | 390 | { |
394 | struct super_block *sb = parent->d_sb; | 391 | struct super_block *sb = parent->d_sb; |
diff --git a/fs/hostfs/hostfs_kern.c b/fs/hostfs/hostfs_kern.c index 32f35f187989..cddb05217512 100644 --- a/fs/hostfs/hostfs_kern.c +++ b/fs/hostfs/hostfs_kern.c | |||
@@ -277,7 +277,7 @@ static const struct super_operations hostfs_sbops = { | |||
277 | .show_options = hostfs_show_options, | 277 | .show_options = hostfs_show_options, |
278 | }; | 278 | }; |
279 | 279 | ||
280 | int hostfs_readdir(struct file *file, void *ent, filldir_t filldir) | 280 | int hostfs_readdir(struct file *file, struct dir_context *ctx) |
281 | { | 281 | { |
282 | void *dir; | 282 | void *dir; |
283 | char *name; | 283 | char *name; |
@@ -292,12 +292,11 @@ int hostfs_readdir(struct file *file, void *ent, filldir_t filldir) | |||
292 | __putname(name); | 292 | __putname(name); |
293 | if (dir == NULL) | 293 | if (dir == NULL) |
294 | return -error; | 294 | return -error; |
295 | next = file->f_pos; | 295 | next = ctx->pos; |
296 | while ((name = read_dir(dir, &next, &ino, &len, &type)) != NULL) { | 296 | while ((name = read_dir(dir, &next, &ino, &len, &type)) != NULL) { |
297 | error = (*filldir)(ent, name, len, file->f_pos, | 297 | if (!dir_emit(ctx, name, len, ino, type)) |
298 | ino, type); | 298 | break; |
299 | if (error) break; | 299 | ctx->pos = next; |
300 | file->f_pos = next; | ||
301 | } | 300 | } |
302 | close_dir(dir); | 301 | close_dir(dir); |
303 | return 0; | 302 | return 0; |
@@ -393,7 +392,7 @@ static const struct file_operations hostfs_file_fops = { | |||
393 | 392 | ||
394 | static const struct file_operations hostfs_dir_fops = { | 393 | static const struct file_operations hostfs_dir_fops = { |
395 | .llseek = generic_file_llseek, | 394 | .llseek = generic_file_llseek, |
396 | .readdir = hostfs_readdir, | 395 | .iterate = hostfs_readdir, |
397 | .read = generic_read_dir, | 396 | .read = generic_read_dir, |
398 | }; | 397 | }; |
399 | 398 | ||
diff --git a/fs/hpfs/buffer.c b/fs/hpfs/buffer.c index f49d1498aa2e..4d0a1afa058c 100644 --- a/fs/hpfs/buffer.c +++ b/fs/hpfs/buffer.c | |||
@@ -7,8 +7,37 @@ | |||
7 | */ | 7 | */ |
8 | #include <linux/sched.h> | 8 | #include <linux/sched.h> |
9 | #include <linux/slab.h> | 9 | #include <linux/slab.h> |
10 | #include <linux/blkdev.h> | ||
10 | #include "hpfs_fn.h" | 11 | #include "hpfs_fn.h" |
11 | 12 | ||
13 | void hpfs_prefetch_sectors(struct super_block *s, unsigned secno, int n) | ||
14 | { | ||
15 | struct buffer_head *bh; | ||
16 | struct blk_plug plug; | ||
17 | |||
18 | if (n <= 0 || unlikely(secno >= hpfs_sb(s)->sb_fs_size)) | ||
19 | return; | ||
20 | |||
21 | bh = sb_find_get_block(s, secno); | ||
22 | if (bh) { | ||
23 | if (buffer_uptodate(bh)) { | ||
24 | brelse(bh); | ||
25 | return; | ||
26 | } | ||
27 | brelse(bh); | ||
28 | }; | ||
29 | |||
30 | blk_start_plug(&plug); | ||
31 | while (n > 0) { | ||
32 | if (unlikely(secno >= hpfs_sb(s)->sb_fs_size)) | ||
33 | break; | ||
34 | sb_breadahead(s, secno); | ||
35 | secno++; | ||
36 | n--; | ||
37 | } | ||
38 | blk_finish_plug(&plug); | ||
39 | } | ||
40 | |||
12 | /* Map a sector into a buffer and return pointers to it and to the buffer. */ | 41 | /* Map a sector into a buffer and return pointers to it and to the buffer. */ |
13 | 42 | ||
14 | void *hpfs_map_sector(struct super_block *s, unsigned secno, struct buffer_head **bhp, | 43 | void *hpfs_map_sector(struct super_block *s, unsigned secno, struct buffer_head **bhp, |
@@ -18,6 +47,8 @@ void *hpfs_map_sector(struct super_block *s, unsigned secno, struct buffer_head | |||
18 | 47 | ||
19 | hpfs_lock_assert(s); | 48 | hpfs_lock_assert(s); |
20 | 49 | ||
50 | hpfs_prefetch_sectors(s, secno, ahead); | ||
51 | |||
21 | cond_resched(); | 52 | cond_resched(); |
22 | 53 | ||
23 | *bhp = bh = sb_bread(s, secno); | 54 | *bhp = bh = sb_bread(s, secno); |
@@ -67,6 +98,8 @@ void *hpfs_map_4sectors(struct super_block *s, unsigned secno, struct quad_buffe | |||
67 | return NULL; | 98 | return NULL; |
68 | } | 99 | } |
69 | 100 | ||
101 | hpfs_prefetch_sectors(s, secno, 4 + ahead); | ||
102 | |||
70 | qbh->data = data = kmalloc(2048, GFP_NOFS); | 103 | qbh->data = data = kmalloc(2048, GFP_NOFS); |
71 | if (!data) { | 104 | if (!data) { |
72 | printk("HPFS: hpfs_map_4sectors: out of memory\n"); | 105 | printk("HPFS: hpfs_map_4sectors: out of memory\n"); |
diff --git a/fs/hpfs/dentry.c b/fs/hpfs/dentry.c index 05d4816e4e77..fa27980f2229 100644 --- a/fs/hpfs/dentry.c +++ b/fs/hpfs/dentry.c | |||
@@ -12,8 +12,7 @@ | |||
12 | * Note: the dentry argument is the parent dentry. | 12 | * Note: the dentry argument is the parent dentry. |
13 | */ | 13 | */ |
14 | 14 | ||
15 | static int hpfs_hash_dentry(const struct dentry *dentry, const struct inode *inode, | 15 | static int hpfs_hash_dentry(const struct dentry *dentry, struct qstr *qstr) |
16 | struct qstr *qstr) | ||
17 | { | 16 | { |
18 | unsigned long hash; | 17 | unsigned long hash; |
19 | int i; | 18 | int i; |
@@ -35,9 +34,7 @@ static int hpfs_hash_dentry(const struct dentry *dentry, const struct inode *ino | |||
35 | return 0; | 34 | return 0; |
36 | } | 35 | } |
37 | 36 | ||
38 | static int hpfs_compare_dentry(const struct dentry *parent, | 37 | static int hpfs_compare_dentry(const struct dentry *parent, const struct dentry *dentry, |
39 | const struct inode *pinode, | ||
40 | const struct dentry *dentry, const struct inode *inode, | ||
41 | unsigned int len, const char *str, const struct qstr *name) | 38 | unsigned int len, const char *str, const struct qstr *name) |
42 | { | 39 | { |
43 | unsigned al = len; | 40 | unsigned al = len; |
diff --git a/fs/hpfs/dir.c b/fs/hpfs/dir.c index 834ac13c04b7..292b1acb9b81 100644 --- a/fs/hpfs/dir.c +++ b/fs/hpfs/dir.c | |||
@@ -57,14 +57,14 @@ fail: | |||
57 | return -ESPIPE; | 57 | return -ESPIPE; |
58 | } | 58 | } |
59 | 59 | ||
60 | static int hpfs_readdir(struct file *filp, void *dirent, filldir_t filldir) | 60 | static int hpfs_readdir(struct file *file, struct dir_context *ctx) |
61 | { | 61 | { |
62 | struct inode *inode = file_inode(filp); | 62 | struct inode *inode = file_inode(file); |
63 | struct hpfs_inode_info *hpfs_inode = hpfs_i(inode); | 63 | struct hpfs_inode_info *hpfs_inode = hpfs_i(inode); |
64 | struct quad_buffer_head qbh; | 64 | struct quad_buffer_head qbh; |
65 | struct hpfs_dirent *de; | 65 | struct hpfs_dirent *de; |
66 | int lc; | 66 | int lc; |
67 | long old_pos; | 67 | loff_t next_pos; |
68 | unsigned char *tempname; | 68 | unsigned char *tempname; |
69 | int c1, c2 = 0; | 69 | int c1, c2 = 0; |
70 | int ret = 0; | 70 | int ret = 0; |
@@ -105,11 +105,11 @@ static int hpfs_readdir(struct file *filp, void *dirent, filldir_t filldir) | |||
105 | } | 105 | } |
106 | } | 106 | } |
107 | lc = hpfs_sb(inode->i_sb)->sb_lowercase; | 107 | lc = hpfs_sb(inode->i_sb)->sb_lowercase; |
108 | if (filp->f_pos == 12) { /* diff -r requires this (note, that diff -r */ | 108 | if (ctx->pos == 12) { /* diff -r requires this (note, that diff -r */ |
109 | filp->f_pos = 13; /* also fails on msdos filesystem in 2.0) */ | 109 | ctx->pos = 13; /* also fails on msdos filesystem in 2.0) */ |
110 | goto out; | 110 | goto out; |
111 | } | 111 | } |
112 | if (filp->f_pos == 13) { | 112 | if (ctx->pos == 13) { |
113 | ret = -ENOENT; | 113 | ret = -ENOENT; |
114 | goto out; | 114 | goto out; |
115 | } | 115 | } |
@@ -120,33 +120,34 @@ static int hpfs_readdir(struct file *filp, void *dirent, filldir_t filldir) | |||
120 | accepted by filldir, but what can I do? | 120 | accepted by filldir, but what can I do? |
121 | maybe killall -9 ls helps */ | 121 | maybe killall -9 ls helps */ |
122 | if (hpfs_sb(inode->i_sb)->sb_chk) | 122 | if (hpfs_sb(inode->i_sb)->sb_chk) |
123 | if (hpfs_stop_cycles(inode->i_sb, filp->f_pos, &c1, &c2, "hpfs_readdir")) { | 123 | if (hpfs_stop_cycles(inode->i_sb, ctx->pos, &c1, &c2, "hpfs_readdir")) { |
124 | ret = -EFSERROR; | 124 | ret = -EFSERROR; |
125 | goto out; | 125 | goto out; |
126 | } | 126 | } |
127 | if (filp->f_pos == 12) | 127 | if (ctx->pos == 12) |
128 | goto out; | 128 | goto out; |
129 | if (filp->f_pos == 3 || filp->f_pos == 4 || filp->f_pos == 5) { | 129 | if (ctx->pos == 3 || ctx->pos == 4 || ctx->pos == 5) { |
130 | printk("HPFS: warning: pos==%d\n",(int)filp->f_pos); | 130 | printk("HPFS: warning: pos==%d\n",(int)ctx->pos); |
131 | goto out; | 131 | goto out; |
132 | } | 132 | } |
133 | if (filp->f_pos == 0) { | 133 | if (ctx->pos == 0) { |
134 | if (filldir(dirent, ".", 1, filp->f_pos, inode->i_ino, DT_DIR) < 0) | 134 | if (!dir_emit_dot(file, ctx)) |
135 | goto out; | 135 | goto out; |
136 | filp->f_pos = 11; | 136 | ctx->pos = 11; |
137 | } | 137 | } |
138 | if (filp->f_pos == 11) { | 138 | if (ctx->pos == 11) { |
139 | if (filldir(dirent, "..", 2, filp->f_pos, hpfs_inode->i_parent_dir, DT_DIR) < 0) | 139 | if (!dir_emit(ctx, "..", 2, hpfs_inode->i_parent_dir, DT_DIR)) |
140 | goto out; | 140 | goto out; |
141 | filp->f_pos = 1; | 141 | ctx->pos = 1; |
142 | } | 142 | } |
143 | if (filp->f_pos == 1) { | 143 | if (ctx->pos == 1) { |
144 | filp->f_pos = ((loff_t) hpfs_de_as_down_as_possible(inode->i_sb, hpfs_inode->i_dno) << 4) + 1; | 144 | ctx->pos = ((loff_t) hpfs_de_as_down_as_possible(inode->i_sb, hpfs_inode->i_dno) << 4) + 1; |
145 | hpfs_add_pos(inode, &filp->f_pos); | 145 | hpfs_add_pos(inode, &file->f_pos); |
146 | filp->f_version = inode->i_version; | 146 | file->f_version = inode->i_version; |
147 | } | 147 | } |
148 | old_pos = filp->f_pos; | 148 | next_pos = ctx->pos; |
149 | if (!(de = map_pos_dirent(inode, &filp->f_pos, &qbh))) { | 149 | if (!(de = map_pos_dirent(inode, &next_pos, &qbh))) { |
150 | ctx->pos = next_pos; | ||
150 | ret = -EIOERROR; | 151 | ret = -EIOERROR; |
151 | goto out; | 152 | goto out; |
152 | } | 153 | } |
@@ -154,20 +155,21 @@ static int hpfs_readdir(struct file *filp, void *dirent, filldir_t filldir) | |||
154 | if (hpfs_sb(inode->i_sb)->sb_chk) { | 155 | if (hpfs_sb(inode->i_sb)->sb_chk) { |
155 | if (de->first && !de->last && (de->namelen != 2 | 156 | if (de->first && !de->last && (de->namelen != 2 |
156 | || de ->name[0] != 1 || de->name[1] != 1)) | 157 | || de ->name[0] != 1 || de->name[1] != 1)) |
157 | hpfs_error(inode->i_sb, "hpfs_readdir: bad ^A^A entry; pos = %08lx", old_pos); | 158 | hpfs_error(inode->i_sb, "hpfs_readdir: bad ^A^A entry; pos = %08lx", (unsigned long)ctx->pos); |
158 | if (de->last && (de->namelen != 1 || de ->name[0] != 255)) | 159 | if (de->last && (de->namelen != 1 || de ->name[0] != 255)) |
159 | hpfs_error(inode->i_sb, "hpfs_readdir: bad \\377 entry; pos = %08lx", old_pos); | 160 | hpfs_error(inode->i_sb, "hpfs_readdir: bad \\377 entry; pos = %08lx", (unsigned long)ctx->pos); |
160 | } | 161 | } |
161 | hpfs_brelse4(&qbh); | 162 | hpfs_brelse4(&qbh); |
163 | ctx->pos = next_pos; | ||
162 | goto again; | 164 | goto again; |
163 | } | 165 | } |
164 | tempname = hpfs_translate_name(inode->i_sb, de->name, de->namelen, lc, de->not_8x3); | 166 | tempname = hpfs_translate_name(inode->i_sb, de->name, de->namelen, lc, de->not_8x3); |
165 | if (filldir(dirent, tempname, de->namelen, old_pos, le32_to_cpu(de->fnode), DT_UNKNOWN) < 0) { | 167 | if (!dir_emit(ctx, tempname, de->namelen, le32_to_cpu(de->fnode), DT_UNKNOWN)) { |
166 | filp->f_pos = old_pos; | ||
167 | if (tempname != de->name) kfree(tempname); | 168 | if (tempname != de->name) kfree(tempname); |
168 | hpfs_brelse4(&qbh); | 169 | hpfs_brelse4(&qbh); |
169 | goto out; | 170 | goto out; |
170 | } | 171 | } |
172 | ctx->pos = next_pos; | ||
171 | if (tempname != de->name) kfree(tempname); | 173 | if (tempname != de->name) kfree(tempname); |
172 | hpfs_brelse4(&qbh); | 174 | hpfs_brelse4(&qbh); |
173 | } | 175 | } |
@@ -322,7 +324,7 @@ const struct file_operations hpfs_dir_ops = | |||
322 | { | 324 | { |
323 | .llseek = hpfs_dir_lseek, | 325 | .llseek = hpfs_dir_lseek, |
324 | .read = generic_read_dir, | 326 | .read = generic_read_dir, |
325 | .readdir = hpfs_readdir, | 327 | .iterate = hpfs_readdir, |
326 | .release = hpfs_dir_release, | 328 | .release = hpfs_dir_release, |
327 | .fsync = hpfs_file_fsync, | 329 | .fsync = hpfs_file_fsync, |
328 | }; | 330 | }; |
diff --git a/fs/hpfs/file.c b/fs/hpfs/file.c index e4ba5fe4c3b5..4e9dabcf1f4c 100644 --- a/fs/hpfs/file.c +++ b/fs/hpfs/file.c | |||
@@ -7,6 +7,7 @@ | |||
7 | */ | 7 | */ |
8 | 8 | ||
9 | #include "hpfs_fn.h" | 9 | #include "hpfs_fn.h" |
10 | #include <linux/mpage.h> | ||
10 | 11 | ||
11 | #define BLOCKS(size) (((size) + 511) >> 9) | 12 | #define BLOCKS(size) (((size) + 511) >> 9) |
12 | 13 | ||
@@ -34,7 +35,7 @@ int hpfs_file_fsync(struct file *file, loff_t start, loff_t end, int datasync) | |||
34 | * so we must ignore such errors. | 35 | * so we must ignore such errors. |
35 | */ | 36 | */ |
36 | 37 | ||
37 | static secno hpfs_bmap(struct inode *inode, unsigned file_secno) | 38 | static secno hpfs_bmap(struct inode *inode, unsigned file_secno, unsigned *n_secs) |
38 | { | 39 | { |
39 | struct hpfs_inode_info *hpfs_inode = hpfs_i(inode); | 40 | struct hpfs_inode_info *hpfs_inode = hpfs_i(inode); |
40 | unsigned n, disk_secno; | 41 | unsigned n, disk_secno; |
@@ -42,11 +43,20 @@ static secno hpfs_bmap(struct inode *inode, unsigned file_secno) | |||
42 | struct buffer_head *bh; | 43 | struct buffer_head *bh; |
43 | if (BLOCKS(hpfs_i(inode)->mmu_private) <= file_secno) return 0; | 44 | if (BLOCKS(hpfs_i(inode)->mmu_private) <= file_secno) return 0; |
44 | n = file_secno - hpfs_inode->i_file_sec; | 45 | n = file_secno - hpfs_inode->i_file_sec; |
45 | if (n < hpfs_inode->i_n_secs) return hpfs_inode->i_disk_sec + n; | 46 | if (n < hpfs_inode->i_n_secs) { |
47 | *n_secs = hpfs_inode->i_n_secs - n; | ||
48 | return hpfs_inode->i_disk_sec + n; | ||
49 | } | ||
46 | if (!(fnode = hpfs_map_fnode(inode->i_sb, inode->i_ino, &bh))) return 0; | 50 | if (!(fnode = hpfs_map_fnode(inode->i_sb, inode->i_ino, &bh))) return 0; |
47 | disk_secno = hpfs_bplus_lookup(inode->i_sb, inode, &fnode->btree, file_secno, bh); | 51 | disk_secno = hpfs_bplus_lookup(inode->i_sb, inode, &fnode->btree, file_secno, bh); |
48 | if (disk_secno == -1) return 0; | 52 | if (disk_secno == -1) return 0; |
49 | if (hpfs_chk_sectors(inode->i_sb, disk_secno, 1, "bmap")) return 0; | 53 | if (hpfs_chk_sectors(inode->i_sb, disk_secno, 1, "bmap")) return 0; |
54 | n = file_secno - hpfs_inode->i_file_sec; | ||
55 | if (n < hpfs_inode->i_n_secs) { | ||
56 | *n_secs = hpfs_inode->i_n_secs - n; | ||
57 | return hpfs_inode->i_disk_sec + n; | ||
58 | } | ||
59 | *n_secs = 1; | ||
50 | return disk_secno; | 60 | return disk_secno; |
51 | } | 61 | } |
52 | 62 | ||
@@ -67,10 +77,14 @@ static int hpfs_get_block(struct inode *inode, sector_t iblock, struct buffer_he | |||
67 | { | 77 | { |
68 | int r; | 78 | int r; |
69 | secno s; | 79 | secno s; |
80 | unsigned n_secs; | ||
70 | hpfs_lock(inode->i_sb); | 81 | hpfs_lock(inode->i_sb); |
71 | s = hpfs_bmap(inode, iblock); | 82 | s = hpfs_bmap(inode, iblock, &n_secs); |
72 | if (s) { | 83 | if (s) { |
84 | if (bh_result->b_size >> 9 < n_secs) | ||
85 | n_secs = bh_result->b_size >> 9; | ||
73 | map_bh(bh_result, inode->i_sb, s); | 86 | map_bh(bh_result, inode->i_sb, s); |
87 | bh_result->b_size = n_secs << 9; | ||
74 | goto ret_0; | 88 | goto ret_0; |
75 | } | 89 | } |
76 | if (!create) goto ret_0; | 90 | if (!create) goto ret_0; |
@@ -95,14 +109,26 @@ static int hpfs_get_block(struct inode *inode, sector_t iblock, struct buffer_he | |||
95 | return r; | 109 | return r; |
96 | } | 110 | } |
97 | 111 | ||
112 | static int hpfs_readpage(struct file *file, struct page *page) | ||
113 | { | ||
114 | return mpage_readpage(page, hpfs_get_block); | ||
115 | } | ||
116 | |||
98 | static int hpfs_writepage(struct page *page, struct writeback_control *wbc) | 117 | static int hpfs_writepage(struct page *page, struct writeback_control *wbc) |
99 | { | 118 | { |
100 | return block_write_full_page(page,hpfs_get_block, wbc); | 119 | return block_write_full_page(page, hpfs_get_block, wbc); |
101 | } | 120 | } |
102 | 121 | ||
103 | static int hpfs_readpage(struct file *file, struct page *page) | 122 | static int hpfs_readpages(struct file *file, struct address_space *mapping, |
123 | struct list_head *pages, unsigned nr_pages) | ||
124 | { | ||
125 | return mpage_readpages(mapping, pages, nr_pages, hpfs_get_block); | ||
126 | } | ||
127 | |||
128 | static int hpfs_writepages(struct address_space *mapping, | ||
129 | struct writeback_control *wbc) | ||
104 | { | 130 | { |
105 | return block_read_full_page(page,hpfs_get_block); | 131 | return mpage_writepages(mapping, wbc, hpfs_get_block); |
106 | } | 132 | } |
107 | 133 | ||
108 | static void hpfs_write_failed(struct address_space *mapping, loff_t to) | 134 | static void hpfs_write_failed(struct address_space *mapping, loff_t to) |
@@ -161,6 +187,8 @@ static sector_t _hpfs_bmap(struct address_space *mapping, sector_t block) | |||
161 | const struct address_space_operations hpfs_aops = { | 187 | const struct address_space_operations hpfs_aops = { |
162 | .readpage = hpfs_readpage, | 188 | .readpage = hpfs_readpage, |
163 | .writepage = hpfs_writepage, | 189 | .writepage = hpfs_writepage, |
190 | .readpages = hpfs_readpages, | ||
191 | .writepages = hpfs_writepages, | ||
164 | .write_begin = hpfs_write_begin, | 192 | .write_begin = hpfs_write_begin, |
165 | .write_end = hpfs_write_end, | 193 | .write_end = hpfs_write_end, |
166 | .bmap = _hpfs_bmap | 194 | .bmap = _hpfs_bmap |
diff --git a/fs/hpfs/hpfs_fn.h b/fs/hpfs/hpfs_fn.h index b7ae286646b5..1b398636e990 100644 --- a/fs/hpfs/hpfs_fn.h +++ b/fs/hpfs/hpfs_fn.h | |||
@@ -27,8 +27,9 @@ | |||
27 | #define ALLOC_FWD_MAX 128 | 27 | #define ALLOC_FWD_MAX 128 |
28 | #define ALLOC_M 1 | 28 | #define ALLOC_M 1 |
29 | #define FNODE_RD_AHEAD 16 | 29 | #define FNODE_RD_AHEAD 16 |
30 | #define ANODE_RD_AHEAD 16 | 30 | #define ANODE_RD_AHEAD 0 |
31 | #define DNODE_RD_AHEAD 4 | 31 | #define DNODE_RD_AHEAD 72 |
32 | #define COUNT_RD_AHEAD 62 | ||
32 | 33 | ||
33 | #define FREE_DNODES_ADD 58 | 34 | #define FREE_DNODES_ADD 58 |
34 | #define FREE_DNODES_DEL 29 | 35 | #define FREE_DNODES_DEL 29 |
@@ -207,6 +208,7 @@ void hpfs_remove_fnode(struct super_block *, fnode_secno fno); | |||
207 | 208 | ||
208 | /* buffer.c */ | 209 | /* buffer.c */ |
209 | 210 | ||
211 | void hpfs_prefetch_sectors(struct super_block *, unsigned, int); | ||
210 | void *hpfs_map_sector(struct super_block *, unsigned, struct buffer_head **, int); | 212 | void *hpfs_map_sector(struct super_block *, unsigned, struct buffer_head **, int); |
211 | void *hpfs_get_sector(struct super_block *, unsigned, struct buffer_head **); | 213 | void *hpfs_get_sector(struct super_block *, unsigned, struct buffer_head **); |
212 | void *hpfs_map_4sectors(struct super_block *, unsigned, struct quad_buffer_head *, int); | 214 | void *hpfs_map_4sectors(struct super_block *, unsigned, struct quad_buffer_head *, int); |
@@ -271,6 +273,7 @@ void hpfs_evict_inode(struct inode *); | |||
271 | 273 | ||
272 | __le32 *hpfs_map_dnode_bitmap(struct super_block *, struct quad_buffer_head *); | 274 | __le32 *hpfs_map_dnode_bitmap(struct super_block *, struct quad_buffer_head *); |
273 | __le32 *hpfs_map_bitmap(struct super_block *, unsigned, struct quad_buffer_head *, char *); | 275 | __le32 *hpfs_map_bitmap(struct super_block *, unsigned, struct quad_buffer_head *, char *); |
276 | void hpfs_prefetch_bitmap(struct super_block *, unsigned); | ||
274 | unsigned char *hpfs_load_code_page(struct super_block *, secno); | 277 | unsigned char *hpfs_load_code_page(struct super_block *, secno); |
275 | __le32 *hpfs_load_bitmap_directory(struct super_block *, secno bmp); | 278 | __le32 *hpfs_load_bitmap_directory(struct super_block *, secno bmp); |
276 | struct fnode *hpfs_map_fnode(struct super_block *s, ino_t, struct buffer_head **); | 279 | struct fnode *hpfs_map_fnode(struct super_block *s, ino_t, struct buffer_head **); |
diff --git a/fs/hpfs/map.c b/fs/hpfs/map.c index 4acb19d78359..3aa66ae1031e 100644 --- a/fs/hpfs/map.c +++ b/fs/hpfs/map.c | |||
@@ -17,7 +17,9 @@ __le32 *hpfs_map_bitmap(struct super_block *s, unsigned bmp_block, | |||
17 | struct quad_buffer_head *qbh, char *id) | 17 | struct quad_buffer_head *qbh, char *id) |
18 | { | 18 | { |
19 | secno sec; | 19 | secno sec; |
20 | if (hpfs_sb(s)->sb_chk) if (bmp_block * 16384 > hpfs_sb(s)->sb_fs_size) { | 20 | __le32 *ret; |
21 | unsigned n_bands = (hpfs_sb(s)->sb_fs_size + 0x3fff) >> 14; | ||
22 | if (hpfs_sb(s)->sb_chk) if (bmp_block >= n_bands) { | ||
21 | hpfs_error(s, "hpfs_map_bitmap called with bad parameter: %08x at %s", bmp_block, id); | 23 | hpfs_error(s, "hpfs_map_bitmap called with bad parameter: %08x at %s", bmp_block, id); |
22 | return NULL; | 24 | return NULL; |
23 | } | 25 | } |
@@ -26,7 +28,23 @@ __le32 *hpfs_map_bitmap(struct super_block *s, unsigned bmp_block, | |||
26 | hpfs_error(s, "invalid bitmap block pointer %08x -> %08x at %s", bmp_block, sec, id); | 28 | hpfs_error(s, "invalid bitmap block pointer %08x -> %08x at %s", bmp_block, sec, id); |
27 | return NULL; | 29 | return NULL; |
28 | } | 30 | } |
29 | return hpfs_map_4sectors(s, sec, qbh, 4); | 31 | ret = hpfs_map_4sectors(s, sec, qbh, 4); |
32 | if (ret) hpfs_prefetch_bitmap(s, bmp_block + 1); | ||
33 | return ret; | ||
34 | } | ||
35 | |||
36 | void hpfs_prefetch_bitmap(struct super_block *s, unsigned bmp_block) | ||
37 | { | ||
38 | unsigned to_prefetch, next_prefetch; | ||
39 | unsigned n_bands = (hpfs_sb(s)->sb_fs_size + 0x3fff) >> 14; | ||
40 | if (unlikely(bmp_block >= n_bands)) | ||
41 | return; | ||
42 | to_prefetch = le32_to_cpu(hpfs_sb(s)->sb_bmp_dir[bmp_block]); | ||
43 | if (unlikely(bmp_block + 1 >= n_bands)) | ||
44 | next_prefetch = 0; | ||
45 | else | ||
46 | next_prefetch = le32_to_cpu(hpfs_sb(s)->sb_bmp_dir[bmp_block + 1]); | ||
47 | hpfs_prefetch_sectors(s, to_prefetch, 4 + 4 * (to_prefetch + 4 == next_prefetch)); | ||
30 | } | 48 | } |
31 | 49 | ||
32 | /* | 50 | /* |
diff --git a/fs/hpfs/super.c b/fs/hpfs/super.c index a0617e706957..4334cda8dba1 100644 --- a/fs/hpfs/super.c +++ b/fs/hpfs/super.c | |||
@@ -121,7 +121,7 @@ unsigned hpfs_count_one_bitmap(struct super_block *s, secno secno) | |||
121 | unsigned long *bits; | 121 | unsigned long *bits; |
122 | unsigned count; | 122 | unsigned count; |
123 | 123 | ||
124 | bits = hpfs_map_4sectors(s, secno, &qbh, 4); | 124 | bits = hpfs_map_4sectors(s, secno, &qbh, 0); |
125 | if (!bits) | 125 | if (!bits) |
126 | return 0; | 126 | return 0; |
127 | count = bitmap_weight(bits, 2048 * BITS_PER_BYTE); | 127 | count = bitmap_weight(bits, 2048 * BITS_PER_BYTE); |
@@ -134,8 +134,13 @@ static unsigned count_bitmaps(struct super_block *s) | |||
134 | unsigned n, count, n_bands; | 134 | unsigned n, count, n_bands; |
135 | n_bands = (hpfs_sb(s)->sb_fs_size + 0x3fff) >> 14; | 135 | n_bands = (hpfs_sb(s)->sb_fs_size + 0x3fff) >> 14; |
136 | count = 0; | 136 | count = 0; |
137 | for (n = 0; n < n_bands; n++) | 137 | for (n = 0; n < COUNT_RD_AHEAD; n++) { |
138 | hpfs_prefetch_bitmap(s, n); | ||
139 | } | ||
140 | for (n = 0; n < n_bands; n++) { | ||
141 | hpfs_prefetch_bitmap(s, n + COUNT_RD_AHEAD); | ||
138 | count += hpfs_count_one_bitmap(s, le32_to_cpu(hpfs_sb(s)->sb_bmp_dir[n])); | 142 | count += hpfs_count_one_bitmap(s, le32_to_cpu(hpfs_sb(s)->sb_bmp_dir[n])); |
143 | } | ||
139 | return count; | 144 | return count; |
140 | } | 145 | } |
141 | 146 | ||
@@ -558,7 +563,13 @@ static int hpfs_fill_super(struct super_block *s, void *options, int silent) | |||
558 | sbi->sb_cp_table = NULL; | 563 | sbi->sb_cp_table = NULL; |
559 | sbi->sb_c_bitmap = -1; | 564 | sbi->sb_c_bitmap = -1; |
560 | sbi->sb_max_fwd_alloc = 0xffffff; | 565 | sbi->sb_max_fwd_alloc = 0xffffff; |
561 | 566 | ||
567 | if (sbi->sb_fs_size >= 0x80000000) { | ||
568 | hpfs_error(s, "invalid size in superblock: %08x", | ||
569 | (unsigned)sbi->sb_fs_size); | ||
570 | goto bail4; | ||
571 | } | ||
572 | |||
562 | /* Load bitmap directory */ | 573 | /* Load bitmap directory */ |
563 | if (!(sbi->sb_bmp_dir = hpfs_load_bitmap_directory(s, le32_to_cpu(superblock->bitmaps)))) | 574 | if (!(sbi->sb_bmp_dir = hpfs_load_bitmap_directory(s, le32_to_cpu(superblock->bitmaps)))) |
564 | goto bail4; | 575 | goto bail4; |
diff --git a/fs/hppfs/hppfs.c b/fs/hppfs/hppfs.c index cd3e38972c86..4338ff32959d 100644 --- a/fs/hppfs/hppfs.c +++ b/fs/hppfs/hppfs.c | |||
@@ -69,7 +69,7 @@ static char *dentry_name(struct dentry *dentry, int extra) | |||
69 | struct dentry *parent; | 69 | struct dentry *parent; |
70 | char *root, *name; | 70 | char *root, *name; |
71 | const char *seg_name; | 71 | const char *seg_name; |
72 | int len, seg_len; | 72 | int len, seg_len, root_len; |
73 | 73 | ||
74 | len = 0; | 74 | len = 0; |
75 | parent = dentry; | 75 | parent = dentry; |
@@ -81,7 +81,8 @@ static char *dentry_name(struct dentry *dentry, int extra) | |||
81 | } | 81 | } |
82 | 82 | ||
83 | root = "proc"; | 83 | root = "proc"; |
84 | len += strlen(root); | 84 | root_len = strlen(root); |
85 | len += root_len; | ||
85 | name = kmalloc(len + extra + 1, GFP_KERNEL); | 86 | name = kmalloc(len + extra + 1, GFP_KERNEL); |
86 | if (name == NULL) | 87 | if (name == NULL) |
87 | return NULL; | 88 | return NULL; |
@@ -91,7 +92,7 @@ static char *dentry_name(struct dentry *dentry, int extra) | |||
91 | while (parent->d_parent != parent) { | 92 | while (parent->d_parent != parent) { |
92 | if (is_pid(parent)) { | 93 | if (is_pid(parent)) { |
93 | seg_name = "pid"; | 94 | seg_name = "pid"; |
94 | seg_len = strlen("pid"); | 95 | seg_len = strlen(seg_name); |
95 | } | 96 | } |
96 | else { | 97 | else { |
97 | seg_name = parent->d_name.name; | 98 | seg_name = parent->d_name.name; |
@@ -100,10 +101,10 @@ static char *dentry_name(struct dentry *dentry, int extra) | |||
100 | 101 | ||
101 | len -= seg_len + 1; | 102 | len -= seg_len + 1; |
102 | name[len] = '/'; | 103 | name[len] = '/'; |
103 | strncpy(&name[len + 1], seg_name, seg_len); | 104 | memcpy(&name[len + 1], seg_name, seg_len); |
104 | parent = parent->d_parent; | 105 | parent = parent->d_parent; |
105 | } | 106 | } |
106 | strncpy(name, root, strlen(root)); | 107 | memcpy(name, root, root_len); |
107 | return name; | 108 | return name; |
108 | } | 109 | } |
109 | 110 | ||
@@ -542,8 +543,8 @@ static const struct file_operations hppfs_file_fops = { | |||
542 | }; | 543 | }; |
543 | 544 | ||
544 | struct hppfs_dirent { | 545 | struct hppfs_dirent { |
545 | void *vfs_dirent; | 546 | struct dir_context ctx; |
546 | filldir_t filldir; | 547 | struct dir_context *caller; |
547 | struct dentry *dentry; | 548 | struct dentry *dentry; |
548 | }; | 549 | }; |
549 | 550 | ||
@@ -555,34 +556,29 @@ static int hppfs_filldir(void *d, const char *name, int size, | |||
555 | if (file_removed(dirent->dentry, name)) | 556 | if (file_removed(dirent->dentry, name)) |
556 | return 0; | 557 | return 0; |
557 | 558 | ||
558 | return (*dirent->filldir)(dirent->vfs_dirent, name, size, offset, | 559 | dirent->caller->pos = dirent->ctx.pos; |
559 | inode, type); | 560 | return !dir_emit(dirent->caller, name, size, inode, type); |
560 | } | 561 | } |
561 | 562 | ||
562 | static int hppfs_readdir(struct file *file, void *ent, filldir_t filldir) | 563 | static int hppfs_readdir(struct file *file, struct dir_context *ctx) |
563 | { | 564 | { |
564 | struct hppfs_private *data = file->private_data; | 565 | struct hppfs_private *data = file->private_data; |
565 | struct file *proc_file = data->proc_file; | 566 | struct file *proc_file = data->proc_file; |
566 | int (*readdir)(struct file *, void *, filldir_t); | 567 | struct hppfs_dirent d = { |
567 | struct hppfs_dirent dirent = ((struct hppfs_dirent) | 568 | .ctx.actor = hppfs_filldir, |
568 | { .vfs_dirent = ent, | 569 | .caller = ctx, |
569 | .filldir = filldir, | 570 | .dentry = file->f_path.dentry |
570 | .dentry = file->f_path.dentry | 571 | }; |
571 | }); | ||
572 | int err; | 572 | int err; |
573 | 573 | proc_file->f_pos = ctx->pos; | |
574 | readdir = file_inode(proc_file)->i_fop->readdir; | 574 | err = iterate_dir(proc_file, &d.ctx); |
575 | 575 | ctx->pos = d.ctx.pos; | |
576 | proc_file->f_pos = file->f_pos; | ||
577 | err = (*readdir)(proc_file, &dirent, hppfs_filldir); | ||
578 | file->f_pos = proc_file->f_pos; | ||
579 | |||
580 | return err; | 576 | return err; |
581 | } | 577 | } |
582 | 578 | ||
583 | static const struct file_operations hppfs_dir_fops = { | 579 | static const struct file_operations hppfs_dir_fops = { |
584 | .owner = NULL, | 580 | .owner = NULL, |
585 | .readdir = hppfs_readdir, | 581 | .iterate = hppfs_readdir, |
586 | .open = hppfs_dir_open, | 582 | .open = hppfs_dir_open, |
587 | .llseek = default_llseek, | 583 | .llseek = default_llseek, |
588 | .release = hppfs_release, | 584 | .release = hppfs_release, |
diff --git a/fs/hugetlbfs/inode.c b/fs/hugetlbfs/inode.c index a3f868ae3fd4..34423978b170 100644 --- a/fs/hugetlbfs/inode.c +++ b/fs/hugetlbfs/inode.c | |||
@@ -463,6 +463,14 @@ static struct inode *hugetlbfs_get_root(struct super_block *sb, | |||
463 | return inode; | 463 | return inode; |
464 | } | 464 | } |
465 | 465 | ||
466 | /* | ||
467 | * Hugetlbfs is not reclaimable; therefore its i_mmap_mutex will never | ||
468 | * be taken from reclaim -- unlike regular filesystems. This needs an | ||
469 | * annotation because huge_pmd_share() does an allocation under | ||
470 | * i_mmap_mutex. | ||
471 | */ | ||
472 | struct lock_class_key hugetlbfs_i_mmap_mutex_key; | ||
473 | |||
466 | static struct inode *hugetlbfs_get_inode(struct super_block *sb, | 474 | static struct inode *hugetlbfs_get_inode(struct super_block *sb, |
467 | struct inode *dir, | 475 | struct inode *dir, |
468 | umode_t mode, dev_t dev) | 476 | umode_t mode, dev_t dev) |
@@ -474,6 +482,8 @@ static struct inode *hugetlbfs_get_inode(struct super_block *sb, | |||
474 | struct hugetlbfs_inode_info *info; | 482 | struct hugetlbfs_inode_info *info; |
475 | inode->i_ino = get_next_ino(); | 483 | inode->i_ino = get_next_ino(); |
476 | inode_init_owner(inode, dir, mode); | 484 | inode_init_owner(inode, dir, mode); |
485 | lockdep_set_class(&inode->i_mapping->i_mmap_mutex, | ||
486 | &hugetlbfs_i_mmap_mutex_key); | ||
477 | inode->i_mapping->a_ops = &hugetlbfs_aops; | 487 | inode->i_mapping->a_ops = &hugetlbfs_aops; |
478 | inode->i_mapping->backing_dev_info =&hugetlbfs_backing_dev_info; | 488 | inode->i_mapping->backing_dev_info =&hugetlbfs_backing_dev_info; |
479 | inode->i_atime = inode->i_mtime = inode->i_ctime = CURRENT_TIME; | 489 | inode->i_atime = inode->i_mtime = inode->i_ctime = CURRENT_TIME; |
diff --git a/fs/inode.c b/fs/inode.c index 00d5fc3b86e1..d6dfb09c8280 100644 --- a/fs/inode.c +++ b/fs/inode.c | |||
@@ -333,8 +333,10 @@ EXPORT_SYMBOL(set_nlink); | |||
333 | */ | 333 | */ |
334 | void inc_nlink(struct inode *inode) | 334 | void inc_nlink(struct inode *inode) |
335 | { | 335 | { |
336 | if (WARN_ON(inode->i_nlink == 0)) | 336 | if (unlikely(inode->i_nlink == 0)) { |
337 | WARN_ON(!(inode->i_state & I_LINKABLE)); | ||
337 | atomic_long_dec(&inode->i_sb->s_remove_count); | 338 | atomic_long_dec(&inode->i_sb->s_remove_count); |
339 | } | ||
338 | 340 | ||
339 | inode->__i_nlink++; | 341 | inode->__i_nlink++; |
340 | } | 342 | } |
diff --git a/fs/internal.h b/fs/internal.h index 68121584ae37..7c5f01cf619d 100644 --- a/fs/internal.h +++ b/fs/internal.h | |||
@@ -96,11 +96,12 @@ struct open_flags { | |||
96 | umode_t mode; | 96 | umode_t mode; |
97 | int acc_mode; | 97 | int acc_mode; |
98 | int intent; | 98 | int intent; |
99 | int lookup_flags; | ||
99 | }; | 100 | }; |
100 | extern struct file *do_filp_open(int dfd, struct filename *pathname, | 101 | extern struct file *do_filp_open(int dfd, struct filename *pathname, |
101 | const struct open_flags *op, int flags); | 102 | const struct open_flags *op); |
102 | extern struct file *do_file_open_root(struct dentry *, struct vfsmount *, | 103 | extern struct file *do_file_open_root(struct dentry *, struct vfsmount *, |
103 | const char *, const struct open_flags *, int lookup_flags); | 104 | const char *, const struct open_flags *); |
104 | 105 | ||
105 | extern long do_handle_open(int mountdirfd, | 106 | extern long do_handle_open(int mountdirfd, |
106 | struct file_handle __user *ufh, int open_flag); | 107 | struct file_handle __user *ufh, int open_flag); |
@@ -130,6 +131,7 @@ extern struct dentry *__d_alloc(struct super_block *, const struct qstr *); | |||
130 | * read_write.c | 131 | * read_write.c |
131 | */ | 132 | */ |
132 | extern ssize_t __kernel_write(struct file *, const char *, size_t, loff_t *); | 133 | extern ssize_t __kernel_write(struct file *, const char *, size_t, loff_t *); |
134 | extern int rw_verify_area(int, struct file *, const loff_t *, size_t); | ||
133 | 135 | ||
134 | /* | 136 | /* |
135 | * splice.c | 137 | * splice.c |
diff --git a/fs/isofs/dir.c b/fs/isofs/dir.c index a7d5c3c3d4e6..b943cbd963bb 100644 --- a/fs/isofs/dir.c +++ b/fs/isofs/dir.c | |||
@@ -78,8 +78,8 @@ int get_acorn_filename(struct iso_directory_record *de, | |||
78 | /* | 78 | /* |
79 | * This should _really_ be cleaned up some day.. | 79 | * This should _really_ be cleaned up some day.. |
80 | */ | 80 | */ |
81 | static int do_isofs_readdir(struct inode *inode, struct file *filp, | 81 | static int do_isofs_readdir(struct inode *inode, struct file *file, |
82 | void *dirent, filldir_t filldir, | 82 | struct dir_context *ctx, |
83 | char *tmpname, struct iso_directory_record *tmpde) | 83 | char *tmpname, struct iso_directory_record *tmpde) |
84 | { | 84 | { |
85 | unsigned long bufsize = ISOFS_BUFFER_SIZE(inode); | 85 | unsigned long bufsize = ISOFS_BUFFER_SIZE(inode); |
@@ -94,10 +94,10 @@ static int do_isofs_readdir(struct inode *inode, struct file *filp, | |||
94 | struct iso_directory_record *de; | 94 | struct iso_directory_record *de; |
95 | struct isofs_sb_info *sbi = ISOFS_SB(inode->i_sb); | 95 | struct isofs_sb_info *sbi = ISOFS_SB(inode->i_sb); |
96 | 96 | ||
97 | offset = filp->f_pos & (bufsize - 1); | 97 | offset = ctx->pos & (bufsize - 1); |
98 | block = filp->f_pos >> bufbits; | 98 | block = ctx->pos >> bufbits; |
99 | 99 | ||
100 | while (filp->f_pos < inode->i_size) { | 100 | while (ctx->pos < inode->i_size) { |
101 | int de_len; | 101 | int de_len; |
102 | 102 | ||
103 | if (!bh) { | 103 | if (!bh) { |
@@ -108,7 +108,7 @@ static int do_isofs_readdir(struct inode *inode, struct file *filp, | |||
108 | 108 | ||
109 | de = (struct iso_directory_record *) (bh->b_data + offset); | 109 | de = (struct iso_directory_record *) (bh->b_data + offset); |
110 | 110 | ||
111 | de_len = *(unsigned char *) de; | 111 | de_len = *(unsigned char *)de; |
112 | 112 | ||
113 | /* | 113 | /* |
114 | * If the length byte is zero, we should move on to the next | 114 | * If the length byte is zero, we should move on to the next |
@@ -119,8 +119,8 @@ static int do_isofs_readdir(struct inode *inode, struct file *filp, | |||
119 | if (de_len == 0) { | 119 | if (de_len == 0) { |
120 | brelse(bh); | 120 | brelse(bh); |
121 | bh = NULL; | 121 | bh = NULL; |
122 | filp->f_pos = (filp->f_pos + ISOFS_BLOCK_SIZE) & ~(ISOFS_BLOCK_SIZE - 1); | 122 | ctx->pos = (ctx->pos + ISOFS_BLOCK_SIZE) & ~(ISOFS_BLOCK_SIZE - 1); |
123 | block = filp->f_pos >> bufbits; | 123 | block = ctx->pos >> bufbits; |
124 | offset = 0; | 124 | offset = 0; |
125 | continue; | 125 | continue; |
126 | } | 126 | } |
@@ -164,16 +164,16 @@ static int do_isofs_readdir(struct inode *inode, struct file *filp, | |||
164 | 164 | ||
165 | if (de->flags[-sbi->s_high_sierra] & 0x80) { | 165 | if (de->flags[-sbi->s_high_sierra] & 0x80) { |
166 | first_de = 0; | 166 | first_de = 0; |
167 | filp->f_pos += de_len; | 167 | ctx->pos += de_len; |
168 | continue; | 168 | continue; |
169 | } | 169 | } |
170 | first_de = 1; | 170 | first_de = 1; |
171 | 171 | ||
172 | /* Handle the case of the '.' directory */ | 172 | /* Handle the case of the '.' directory */ |
173 | if (de->name_len[0] == 1 && de->name[0] == 0) { | 173 | if (de->name_len[0] == 1 && de->name[0] == 0) { |
174 | if (filldir(dirent, ".", 1, filp->f_pos, inode->i_ino, DT_DIR) < 0) | 174 | if (!dir_emit_dot(file, ctx)) |
175 | break; | 175 | break; |
176 | filp->f_pos += de_len; | 176 | ctx->pos += de_len; |
177 | continue; | 177 | continue; |
178 | } | 178 | } |
179 | 179 | ||
@@ -181,10 +181,9 @@ static int do_isofs_readdir(struct inode *inode, struct file *filp, | |||
181 | 181 | ||
182 | /* Handle the case of the '..' directory */ | 182 | /* Handle the case of the '..' directory */ |
183 | if (de->name_len[0] == 1 && de->name[0] == 1) { | 183 | if (de->name_len[0] == 1 && de->name[0] == 1) { |
184 | inode_number = parent_ino(filp->f_path.dentry); | 184 | if (!dir_emit_dotdot(file, ctx)) |
185 | if (filldir(dirent, "..", 2, filp->f_pos, inode_number, DT_DIR) < 0) | ||
186 | break; | 185 | break; |
187 | filp->f_pos += de_len; | 186 | ctx->pos += de_len; |
188 | continue; | 187 | continue; |
189 | } | 188 | } |
190 | 189 | ||
@@ -198,7 +197,7 @@ static int do_isofs_readdir(struct inode *inode, struct file *filp, | |||
198 | if ((sbi->s_hide && (de->flags[-sbi->s_high_sierra] & 1)) || | 197 | if ((sbi->s_hide && (de->flags[-sbi->s_high_sierra] & 1)) || |
199 | (!sbi->s_showassoc && | 198 | (!sbi->s_showassoc && |
200 | (de->flags[-sbi->s_high_sierra] & 4))) { | 199 | (de->flags[-sbi->s_high_sierra] & 4))) { |
201 | filp->f_pos += de_len; | 200 | ctx->pos += de_len; |
202 | continue; | 201 | continue; |
203 | } | 202 | } |
204 | 203 | ||
@@ -230,10 +229,10 @@ static int do_isofs_readdir(struct inode *inode, struct file *filp, | |||
230 | } | 229 | } |
231 | } | 230 | } |
232 | if (len > 0) { | 231 | if (len > 0) { |
233 | if (filldir(dirent, p, len, filp->f_pos, inode_number, DT_UNKNOWN) < 0) | 232 | if (!dir_emit(ctx, p, len, inode_number, DT_UNKNOWN)) |
234 | break; | 233 | break; |
235 | } | 234 | } |
236 | filp->f_pos += de_len; | 235 | ctx->pos += de_len; |
237 | 236 | ||
238 | continue; | 237 | continue; |
239 | } | 238 | } |
@@ -247,13 +246,12 @@ static int do_isofs_readdir(struct inode *inode, struct file *filp, | |||
247 | * handling split directory entries.. The real work is done by | 246 | * handling split directory entries.. The real work is done by |
248 | * "do_isofs_readdir()". | 247 | * "do_isofs_readdir()". |
249 | */ | 248 | */ |
250 | static int isofs_readdir(struct file *filp, | 249 | static int isofs_readdir(struct file *file, struct dir_context *ctx) |
251 | void *dirent, filldir_t filldir) | ||
252 | { | 250 | { |
253 | int result; | 251 | int result; |
254 | char *tmpname; | 252 | char *tmpname; |
255 | struct iso_directory_record *tmpde; | 253 | struct iso_directory_record *tmpde; |
256 | struct inode *inode = file_inode(filp); | 254 | struct inode *inode = file_inode(file); |
257 | 255 | ||
258 | tmpname = (char *)__get_free_page(GFP_KERNEL); | 256 | tmpname = (char *)__get_free_page(GFP_KERNEL); |
259 | if (tmpname == NULL) | 257 | if (tmpname == NULL) |
@@ -261,7 +259,7 @@ static int isofs_readdir(struct file *filp, | |||
261 | 259 | ||
262 | tmpde = (struct iso_directory_record *) (tmpname+1024); | 260 | tmpde = (struct iso_directory_record *) (tmpname+1024); |
263 | 261 | ||
264 | result = do_isofs_readdir(inode, filp, dirent, filldir, tmpname, tmpde); | 262 | result = do_isofs_readdir(inode, file, ctx, tmpname, tmpde); |
265 | 263 | ||
266 | free_page((unsigned long) tmpname); | 264 | free_page((unsigned long) tmpname); |
267 | return result; | 265 | return result; |
@@ -271,7 +269,7 @@ const struct file_operations isofs_dir_operations = | |||
271 | { | 269 | { |
272 | .llseek = generic_file_llseek, | 270 | .llseek = generic_file_llseek, |
273 | .read = generic_read_dir, | 271 | .read = generic_read_dir, |
274 | .readdir = isofs_readdir, | 272 | .iterate = isofs_readdir, |
275 | }; | 273 | }; |
276 | 274 | ||
277 | /* | 275 | /* |
diff --git a/fs/isofs/inode.c b/fs/isofs/inode.c index d9b8aebdeb22..c348d6d88624 100644 --- a/fs/isofs/inode.c +++ b/fs/isofs/inode.c | |||
@@ -28,31 +28,23 @@ | |||
28 | 28 | ||
29 | #define BEQUIET | 29 | #define BEQUIET |
30 | 30 | ||
31 | static int isofs_hashi(const struct dentry *parent, const struct inode *inode, | 31 | static int isofs_hashi(const struct dentry *parent, struct qstr *qstr); |
32 | struct qstr *qstr); | 32 | static int isofs_hash(const struct dentry *parent, struct qstr *qstr); |
33 | static int isofs_hash(const struct dentry *parent, const struct inode *inode, | ||
34 | struct qstr *qstr); | ||
35 | static int isofs_dentry_cmpi(const struct dentry *parent, | 33 | static int isofs_dentry_cmpi(const struct dentry *parent, |
36 | const struct inode *pinode, | 34 | const struct dentry *dentry, |
37 | const struct dentry *dentry, const struct inode *inode, | ||
38 | unsigned int len, const char *str, const struct qstr *name); | 35 | unsigned int len, const char *str, const struct qstr *name); |
39 | static int isofs_dentry_cmp(const struct dentry *parent, | 36 | static int isofs_dentry_cmp(const struct dentry *parent, |
40 | const struct inode *pinode, | 37 | const struct dentry *dentry, |
41 | const struct dentry *dentry, const struct inode *inode, | ||
42 | unsigned int len, const char *str, const struct qstr *name); | 38 | unsigned int len, const char *str, const struct qstr *name); |
43 | 39 | ||
44 | #ifdef CONFIG_JOLIET | 40 | #ifdef CONFIG_JOLIET |
45 | static int isofs_hashi_ms(const struct dentry *parent, const struct inode *inode, | 41 | static int isofs_hashi_ms(const struct dentry *parent, struct qstr *qstr); |
46 | struct qstr *qstr); | 42 | static int isofs_hash_ms(const struct dentry *parent, struct qstr *qstr); |
47 | static int isofs_hash_ms(const struct dentry *parent, const struct inode *inode, | ||
48 | struct qstr *qstr); | ||
49 | static int isofs_dentry_cmpi_ms(const struct dentry *parent, | 43 | static int isofs_dentry_cmpi_ms(const struct dentry *parent, |
50 | const struct inode *pinode, | 44 | const struct dentry *dentry, |
51 | const struct dentry *dentry, const struct inode *inode, | ||
52 | unsigned int len, const char *str, const struct qstr *name); | 45 | unsigned int len, const char *str, const struct qstr *name); |
53 | static int isofs_dentry_cmp_ms(const struct dentry *parent, | 46 | static int isofs_dentry_cmp_ms(const struct dentry *parent, |
54 | const struct inode *pinode, | 47 | const struct dentry *dentry, |
55 | const struct dentry *dentry, const struct inode *inode, | ||
56 | unsigned int len, const char *str, const struct qstr *name); | 48 | unsigned int len, const char *str, const struct qstr *name); |
57 | #endif | 49 | #endif |
58 | 50 | ||
@@ -265,30 +257,26 @@ static int isofs_dentry_cmp_common( | |||
265 | } | 257 | } |
266 | 258 | ||
267 | static int | 259 | static int |
268 | isofs_hash(const struct dentry *dentry, const struct inode *inode, | 260 | isofs_hash(const struct dentry *dentry, struct qstr *qstr) |
269 | struct qstr *qstr) | ||
270 | { | 261 | { |
271 | return isofs_hash_common(dentry, qstr, 0); | 262 | return isofs_hash_common(dentry, qstr, 0); |
272 | } | 263 | } |
273 | 264 | ||
274 | static int | 265 | static int |
275 | isofs_hashi(const struct dentry *dentry, const struct inode *inode, | 266 | isofs_hashi(const struct dentry *dentry, struct qstr *qstr) |
276 | struct qstr *qstr) | ||
277 | { | 267 | { |
278 | return isofs_hashi_common(dentry, qstr, 0); | 268 | return isofs_hashi_common(dentry, qstr, 0); |
279 | } | 269 | } |
280 | 270 | ||
281 | static int | 271 | static int |
282 | isofs_dentry_cmp(const struct dentry *parent, const struct inode *pinode, | 272 | isofs_dentry_cmp(const struct dentry *parent, const struct dentry *dentry, |
283 | const struct dentry *dentry, const struct inode *inode, | ||
284 | unsigned int len, const char *str, const struct qstr *name) | 273 | unsigned int len, const char *str, const struct qstr *name) |
285 | { | 274 | { |
286 | return isofs_dentry_cmp_common(len, str, name, 0, 0); | 275 | return isofs_dentry_cmp_common(len, str, name, 0, 0); |
287 | } | 276 | } |
288 | 277 | ||
289 | static int | 278 | static int |
290 | isofs_dentry_cmpi(const struct dentry *parent, const struct inode *pinode, | 279 | isofs_dentry_cmpi(const struct dentry *parent, const struct dentry *dentry, |
291 | const struct dentry *dentry, const struct inode *inode, | ||
292 | unsigned int len, const char *str, const struct qstr *name) | 280 | unsigned int len, const char *str, const struct qstr *name) |
293 | { | 281 | { |
294 | return isofs_dentry_cmp_common(len, str, name, 0, 1); | 282 | return isofs_dentry_cmp_common(len, str, name, 0, 1); |
@@ -296,30 +284,26 @@ isofs_dentry_cmpi(const struct dentry *parent, const struct inode *pinode, | |||
296 | 284 | ||
297 | #ifdef CONFIG_JOLIET | 285 | #ifdef CONFIG_JOLIET |
298 | static int | 286 | static int |
299 | isofs_hash_ms(const struct dentry *dentry, const struct inode *inode, | 287 | isofs_hash_ms(const struct dentry *dentry, struct qstr *qstr) |
300 | struct qstr *qstr) | ||
301 | { | 288 | { |
302 | return isofs_hash_common(dentry, qstr, 1); | 289 | return isofs_hash_common(dentry, qstr, 1); |
303 | } | 290 | } |
304 | 291 | ||
305 | static int | 292 | static int |
306 | isofs_hashi_ms(const struct dentry *dentry, const struct inode *inode, | 293 | isofs_hashi_ms(const struct dentry *dentry, struct qstr *qstr) |
307 | struct qstr *qstr) | ||
308 | { | 294 | { |
309 | return isofs_hashi_common(dentry, qstr, 1); | 295 | return isofs_hashi_common(dentry, qstr, 1); |
310 | } | 296 | } |
311 | 297 | ||
312 | static int | 298 | static int |
313 | isofs_dentry_cmp_ms(const struct dentry *parent, const struct inode *pinode, | 299 | isofs_dentry_cmp_ms(const struct dentry *parent, const struct dentry *dentry, |
314 | const struct dentry *dentry, const struct inode *inode, | ||
315 | unsigned int len, const char *str, const struct qstr *name) | 300 | unsigned int len, const char *str, const struct qstr *name) |
316 | { | 301 | { |
317 | return isofs_dentry_cmp_common(len, str, name, 1, 0); | 302 | return isofs_dentry_cmp_common(len, str, name, 1, 0); |
318 | } | 303 | } |
319 | 304 | ||
320 | static int | 305 | static int |
321 | isofs_dentry_cmpi_ms(const struct dentry *parent, const struct inode *pinode, | 306 | isofs_dentry_cmpi_ms(const struct dentry *parent, const struct dentry *dentry, |
322 | const struct dentry *dentry, const struct inode *inode, | ||
323 | unsigned int len, const char *str, const struct qstr *name) | 307 | unsigned int len, const char *str, const struct qstr *name) |
324 | { | 308 | { |
325 | return isofs_dentry_cmp_common(len, str, name, 1, 1); | 309 | return isofs_dentry_cmp_common(len, str, name, 1, 1); |
diff --git a/fs/isofs/namei.c b/fs/isofs/namei.c index c167028844ed..95295640d9c8 100644 --- a/fs/isofs/namei.c +++ b/fs/isofs/namei.c | |||
@@ -37,8 +37,7 @@ isofs_cmp(struct dentry *dentry, const char *compare, int dlen) | |||
37 | 37 | ||
38 | qstr.name = compare; | 38 | qstr.name = compare; |
39 | qstr.len = dlen; | 39 | qstr.len = dlen; |
40 | return dentry->d_op->d_compare(NULL, NULL, NULL, NULL, | 40 | return dentry->d_op->d_compare(NULL, NULL, dentry->d_name.len, dentry->d_name.name, &qstr); |
41 | dentry->d_name.len, dentry->d_name.name, &qstr); | ||
42 | } | 41 | } |
43 | 42 | ||
44 | /* | 43 | /* |
diff --git a/fs/jbd/transaction.c b/fs/jbd/transaction.c index e3e255c0a509..be0c39b66fe0 100644 --- a/fs/jbd/transaction.c +++ b/fs/jbd/transaction.c | |||
@@ -2019,16 +2019,20 @@ zap_buffer_unlocked: | |||
2019 | * void journal_invalidatepage() - invalidate a journal page | 2019 | * void journal_invalidatepage() - invalidate a journal page |
2020 | * @journal: journal to use for flush | 2020 | * @journal: journal to use for flush |
2021 | * @page: page to flush | 2021 | * @page: page to flush |
2022 | * @offset: length of page to invalidate. | 2022 | * @offset: offset of the range to invalidate |
2023 | * @length: length of the range to invalidate | ||
2023 | * | 2024 | * |
2024 | * Reap page buffers containing data after offset in page. | 2025 | * Reap page buffers containing data in specified range in page. |
2025 | */ | 2026 | */ |
2026 | void journal_invalidatepage(journal_t *journal, | 2027 | void journal_invalidatepage(journal_t *journal, |
2027 | struct page *page, | 2028 | struct page *page, |
2028 | unsigned long offset) | 2029 | unsigned int offset, |
2030 | unsigned int length) | ||
2029 | { | 2031 | { |
2030 | struct buffer_head *head, *bh, *next; | 2032 | struct buffer_head *head, *bh, *next; |
2033 | unsigned int stop = offset + length; | ||
2031 | unsigned int curr_off = 0; | 2034 | unsigned int curr_off = 0; |
2035 | int partial_page = (offset || length < PAGE_CACHE_SIZE); | ||
2032 | int may_free = 1; | 2036 | int may_free = 1; |
2033 | 2037 | ||
2034 | if (!PageLocked(page)) | 2038 | if (!PageLocked(page)) |
@@ -2036,6 +2040,8 @@ void journal_invalidatepage(journal_t *journal, | |||
2036 | if (!page_has_buffers(page)) | 2040 | if (!page_has_buffers(page)) |
2037 | return; | 2041 | return; |
2038 | 2042 | ||
2043 | BUG_ON(stop > PAGE_CACHE_SIZE || stop < length); | ||
2044 | |||
2039 | /* We will potentially be playing with lists other than just the | 2045 | /* We will potentially be playing with lists other than just the |
2040 | * data lists (especially for journaled data mode), so be | 2046 | * data lists (especially for journaled data mode), so be |
2041 | * cautious in our locking. */ | 2047 | * cautious in our locking. */ |
@@ -2045,11 +2051,14 @@ void journal_invalidatepage(journal_t *journal, | |||
2045 | unsigned int next_off = curr_off + bh->b_size; | 2051 | unsigned int next_off = curr_off + bh->b_size; |
2046 | next = bh->b_this_page; | 2052 | next = bh->b_this_page; |
2047 | 2053 | ||
2054 | if (next_off > stop) | ||
2055 | return; | ||
2056 | |||
2048 | if (offset <= curr_off) { | 2057 | if (offset <= curr_off) { |
2049 | /* This block is wholly outside the truncation point */ | 2058 | /* This block is wholly outside the truncation point */ |
2050 | lock_buffer(bh); | 2059 | lock_buffer(bh); |
2051 | may_free &= journal_unmap_buffer(journal, bh, | 2060 | may_free &= journal_unmap_buffer(journal, bh, |
2052 | offset > 0); | 2061 | partial_page); |
2053 | unlock_buffer(bh); | 2062 | unlock_buffer(bh); |
2054 | } | 2063 | } |
2055 | curr_off = next_off; | 2064 | curr_off = next_off; |
@@ -2057,7 +2066,7 @@ void journal_invalidatepage(journal_t *journal, | |||
2057 | 2066 | ||
2058 | } while (bh != head); | 2067 | } while (bh != head); |
2059 | 2068 | ||
2060 | if (!offset) { | 2069 | if (!partial_page) { |
2061 | if (may_free && try_to_free_buffers(page)) | 2070 | if (may_free && try_to_free_buffers(page)) |
2062 | J_ASSERT(!page_has_buffers(page)); | 2071 | J_ASSERT(!page_has_buffers(page)); |
2063 | } | 2072 | } |
diff --git a/fs/jbd2/Kconfig b/fs/jbd2/Kconfig index 69a48c2944da..5a9f5534d57b 100644 --- a/fs/jbd2/Kconfig +++ b/fs/jbd2/Kconfig | |||
@@ -20,7 +20,7 @@ config JBD2 | |||
20 | 20 | ||
21 | config JBD2_DEBUG | 21 | config JBD2_DEBUG |
22 | bool "JBD2 (ext4) debugging support" | 22 | bool "JBD2 (ext4) debugging support" |
23 | depends on JBD2 && DEBUG_FS | 23 | depends on JBD2 |
24 | help | 24 | help |
25 | If you are using the ext4 journaled file system (or | 25 | If you are using the ext4 journaled file system (or |
26 | potentially any other filesystem/device using JBD2), this option | 26 | potentially any other filesystem/device using JBD2), this option |
@@ -29,7 +29,7 @@ config JBD2_DEBUG | |||
29 | By default, the debugging output will be turned off. | 29 | By default, the debugging output will be turned off. |
30 | 30 | ||
31 | If you select Y here, then you will be able to turn on debugging | 31 | If you select Y here, then you will be able to turn on debugging |
32 | with "echo N > /sys/kernel/debug/jbd2/jbd2-debug", where N is a | 32 | with "echo N > /sys/module/jbd2/parameters/jbd2_debug", where N is a |
33 | number between 1 and 5. The higher the number, the more debugging | 33 | number between 1 and 5. The higher the number, the more debugging |
34 | output is generated. To turn debugging off again, do | 34 | output is generated. To turn debugging off again, do |
35 | "echo 0 > /sys/kernel/debug/jbd2/jbd2-debug". | 35 | "echo 0 > /sys/module/jbd2/parameters/jbd2_debug". |
diff --git a/fs/jbd2/checkpoint.c b/fs/jbd2/checkpoint.c index c78841ee81cf..7f34f4716165 100644 --- a/fs/jbd2/checkpoint.c +++ b/fs/jbd2/checkpoint.c | |||
@@ -120,8 +120,8 @@ void __jbd2_log_wait_for_space(journal_t *journal) | |||
120 | int nblocks, space_left; | 120 | int nblocks, space_left; |
121 | /* assert_spin_locked(&journal->j_state_lock); */ | 121 | /* assert_spin_locked(&journal->j_state_lock); */ |
122 | 122 | ||
123 | nblocks = jbd_space_needed(journal); | 123 | nblocks = jbd2_space_needed(journal); |
124 | while (__jbd2_log_space_left(journal) < nblocks) { | 124 | while (jbd2_log_space_left(journal) < nblocks) { |
125 | if (journal->j_flags & JBD2_ABORT) | 125 | if (journal->j_flags & JBD2_ABORT) |
126 | return; | 126 | return; |
127 | write_unlock(&journal->j_state_lock); | 127 | write_unlock(&journal->j_state_lock); |
@@ -140,8 +140,8 @@ void __jbd2_log_wait_for_space(journal_t *journal) | |||
140 | */ | 140 | */ |
141 | write_lock(&journal->j_state_lock); | 141 | write_lock(&journal->j_state_lock); |
142 | spin_lock(&journal->j_list_lock); | 142 | spin_lock(&journal->j_list_lock); |
143 | nblocks = jbd_space_needed(journal); | 143 | nblocks = jbd2_space_needed(journal); |
144 | space_left = __jbd2_log_space_left(journal); | 144 | space_left = jbd2_log_space_left(journal); |
145 | if (space_left < nblocks) { | 145 | if (space_left < nblocks) { |
146 | int chkpt = journal->j_checkpoint_transactions != NULL; | 146 | int chkpt = journal->j_checkpoint_transactions != NULL; |
147 | tid_t tid = 0; | 147 | tid_t tid = 0; |
@@ -156,7 +156,15 @@ void __jbd2_log_wait_for_space(journal_t *journal) | |||
156 | /* We were able to recover space; yay! */ | 156 | /* We were able to recover space; yay! */ |
157 | ; | 157 | ; |
158 | } else if (tid) { | 158 | } else if (tid) { |
159 | /* | ||
160 | * jbd2_journal_commit_transaction() may want | ||
161 | * to take the checkpoint_mutex if JBD2_FLUSHED | ||
162 | * is set. So we need to temporarily drop it. | ||
163 | */ | ||
164 | mutex_unlock(&journal->j_checkpoint_mutex); | ||
159 | jbd2_log_wait_commit(journal, tid); | 165 | jbd2_log_wait_commit(journal, tid); |
166 | write_lock(&journal->j_state_lock); | ||
167 | continue; | ||
160 | } else { | 168 | } else { |
161 | printk(KERN_ERR "%s: needed %d blocks and " | 169 | printk(KERN_ERR "%s: needed %d blocks and " |
162 | "only had %d space available\n", | 170 | "only had %d space available\n", |
@@ -625,10 +633,6 @@ int __jbd2_journal_remove_checkpoint(struct journal_head *jh) | |||
625 | 633 | ||
626 | __jbd2_journal_drop_transaction(journal, transaction); | 634 | __jbd2_journal_drop_transaction(journal, transaction); |
627 | jbd2_journal_free_transaction(transaction); | 635 | jbd2_journal_free_transaction(transaction); |
628 | |||
629 | /* Just in case anybody was waiting for more transactions to be | ||
630 | checkpointed... */ | ||
631 | wake_up(&journal->j_wait_logspace); | ||
632 | ret = 1; | 636 | ret = 1; |
633 | out: | 637 | out: |
634 | return ret; | 638 | return ret; |
@@ -690,9 +694,7 @@ void __jbd2_journal_drop_transaction(journal_t *journal, transaction_t *transact | |||
690 | J_ASSERT(transaction->t_state == T_FINISHED); | 694 | J_ASSERT(transaction->t_state == T_FINISHED); |
691 | J_ASSERT(transaction->t_buffers == NULL); | 695 | J_ASSERT(transaction->t_buffers == NULL); |
692 | J_ASSERT(transaction->t_forget == NULL); | 696 | J_ASSERT(transaction->t_forget == NULL); |
693 | J_ASSERT(transaction->t_iobuf_list == NULL); | ||
694 | J_ASSERT(transaction->t_shadow_list == NULL); | 697 | J_ASSERT(transaction->t_shadow_list == NULL); |
695 | J_ASSERT(transaction->t_log_list == NULL); | ||
696 | J_ASSERT(transaction->t_checkpoint_list == NULL); | 698 | J_ASSERT(transaction->t_checkpoint_list == NULL); |
697 | J_ASSERT(transaction->t_checkpoint_io_list == NULL); | 699 | J_ASSERT(transaction->t_checkpoint_io_list == NULL); |
698 | J_ASSERT(atomic_read(&transaction->t_updates) == 0); | 700 | J_ASSERT(atomic_read(&transaction->t_updates) == 0); |
diff --git a/fs/jbd2/commit.c b/fs/jbd2/commit.c index 0f53946f13c1..559bec1a37b4 100644 --- a/fs/jbd2/commit.c +++ b/fs/jbd2/commit.c | |||
@@ -30,15 +30,22 @@ | |||
30 | #include <trace/events/jbd2.h> | 30 | #include <trace/events/jbd2.h> |
31 | 31 | ||
32 | /* | 32 | /* |
33 | * Default IO end handler for temporary BJ_IO buffer_heads. | 33 | * IO end handler for temporary buffer_heads handling writes to the journal. |
34 | */ | 34 | */ |
35 | static void journal_end_buffer_io_sync(struct buffer_head *bh, int uptodate) | 35 | static void journal_end_buffer_io_sync(struct buffer_head *bh, int uptodate) |
36 | { | 36 | { |
37 | struct buffer_head *orig_bh = bh->b_private; | ||
38 | |||
37 | BUFFER_TRACE(bh, ""); | 39 | BUFFER_TRACE(bh, ""); |
38 | if (uptodate) | 40 | if (uptodate) |
39 | set_buffer_uptodate(bh); | 41 | set_buffer_uptodate(bh); |
40 | else | 42 | else |
41 | clear_buffer_uptodate(bh); | 43 | clear_buffer_uptodate(bh); |
44 | if (orig_bh) { | ||
45 | clear_bit_unlock(BH_Shadow, &orig_bh->b_state); | ||
46 | smp_mb__after_clear_bit(); | ||
47 | wake_up_bit(&orig_bh->b_state, BH_Shadow); | ||
48 | } | ||
42 | unlock_buffer(bh); | 49 | unlock_buffer(bh); |
43 | } | 50 | } |
44 | 51 | ||
@@ -85,8 +92,7 @@ nope: | |||
85 | __brelse(bh); | 92 | __brelse(bh); |
86 | } | 93 | } |
87 | 94 | ||
88 | static void jbd2_commit_block_csum_set(journal_t *j, | 95 | static void jbd2_commit_block_csum_set(journal_t *j, struct buffer_head *bh) |
89 | struct journal_head *descriptor) | ||
90 | { | 96 | { |
91 | struct commit_header *h; | 97 | struct commit_header *h; |
92 | __u32 csum; | 98 | __u32 csum; |
@@ -94,12 +100,11 @@ static void jbd2_commit_block_csum_set(journal_t *j, | |||
94 | if (!JBD2_HAS_INCOMPAT_FEATURE(j, JBD2_FEATURE_INCOMPAT_CSUM_V2)) | 100 | if (!JBD2_HAS_INCOMPAT_FEATURE(j, JBD2_FEATURE_INCOMPAT_CSUM_V2)) |
95 | return; | 101 | return; |
96 | 102 | ||
97 | h = (struct commit_header *)(jh2bh(descriptor)->b_data); | 103 | h = (struct commit_header *)(bh->b_data); |
98 | h->h_chksum_type = 0; | 104 | h->h_chksum_type = 0; |
99 | h->h_chksum_size = 0; | 105 | h->h_chksum_size = 0; |
100 | h->h_chksum[0] = 0; | 106 | h->h_chksum[0] = 0; |
101 | csum = jbd2_chksum(j, j->j_csum_seed, jh2bh(descriptor)->b_data, | 107 | csum = jbd2_chksum(j, j->j_csum_seed, bh->b_data, j->j_blocksize); |
102 | j->j_blocksize); | ||
103 | h->h_chksum[0] = cpu_to_be32(csum); | 108 | h->h_chksum[0] = cpu_to_be32(csum); |
104 | } | 109 | } |
105 | 110 | ||
@@ -116,7 +121,6 @@ static int journal_submit_commit_record(journal_t *journal, | |||
116 | struct buffer_head **cbh, | 121 | struct buffer_head **cbh, |
117 | __u32 crc32_sum) | 122 | __u32 crc32_sum) |
118 | { | 123 | { |
119 | struct journal_head *descriptor; | ||
120 | struct commit_header *tmp; | 124 | struct commit_header *tmp; |
121 | struct buffer_head *bh; | 125 | struct buffer_head *bh; |
122 | int ret; | 126 | int ret; |
@@ -127,12 +131,10 @@ static int journal_submit_commit_record(journal_t *journal, | |||
127 | if (is_journal_aborted(journal)) | 131 | if (is_journal_aborted(journal)) |
128 | return 0; | 132 | return 0; |
129 | 133 | ||
130 | descriptor = jbd2_journal_get_descriptor_buffer(journal); | 134 | bh = jbd2_journal_get_descriptor_buffer(journal); |
131 | if (!descriptor) | 135 | if (!bh) |
132 | return 1; | 136 | return 1; |
133 | 137 | ||
134 | bh = jh2bh(descriptor); | ||
135 | |||
136 | tmp = (struct commit_header *)bh->b_data; | 138 | tmp = (struct commit_header *)bh->b_data; |
137 | tmp->h_magic = cpu_to_be32(JBD2_MAGIC_NUMBER); | 139 | tmp->h_magic = cpu_to_be32(JBD2_MAGIC_NUMBER); |
138 | tmp->h_blocktype = cpu_to_be32(JBD2_COMMIT_BLOCK); | 140 | tmp->h_blocktype = cpu_to_be32(JBD2_COMMIT_BLOCK); |
@@ -146,9 +148,9 @@ static int journal_submit_commit_record(journal_t *journal, | |||
146 | tmp->h_chksum_size = JBD2_CRC32_CHKSUM_SIZE; | 148 | tmp->h_chksum_size = JBD2_CRC32_CHKSUM_SIZE; |
147 | tmp->h_chksum[0] = cpu_to_be32(crc32_sum); | 149 | tmp->h_chksum[0] = cpu_to_be32(crc32_sum); |
148 | } | 150 | } |
149 | jbd2_commit_block_csum_set(journal, descriptor); | 151 | jbd2_commit_block_csum_set(journal, bh); |
150 | 152 | ||
151 | JBUFFER_TRACE(descriptor, "submit commit block"); | 153 | BUFFER_TRACE(bh, "submit commit block"); |
152 | lock_buffer(bh); | 154 | lock_buffer(bh); |
153 | clear_buffer_dirty(bh); | 155 | clear_buffer_dirty(bh); |
154 | set_buffer_uptodate(bh); | 156 | set_buffer_uptodate(bh); |
@@ -180,7 +182,6 @@ static int journal_wait_on_commit_record(journal_t *journal, | |||
180 | if (unlikely(!buffer_uptodate(bh))) | 182 | if (unlikely(!buffer_uptodate(bh))) |
181 | ret = -EIO; | 183 | ret = -EIO; |
182 | put_bh(bh); /* One for getblk() */ | 184 | put_bh(bh); /* One for getblk() */ |
183 | jbd2_journal_put_journal_head(bh2jh(bh)); | ||
184 | 185 | ||
185 | return ret; | 186 | return ret; |
186 | } | 187 | } |
@@ -321,7 +322,7 @@ static void write_tag_block(int tag_bytes, journal_block_tag_t *tag, | |||
321 | } | 322 | } |
322 | 323 | ||
323 | static void jbd2_descr_block_csum_set(journal_t *j, | 324 | static void jbd2_descr_block_csum_set(journal_t *j, |
324 | struct journal_head *descriptor) | 325 | struct buffer_head *bh) |
325 | { | 326 | { |
326 | struct jbd2_journal_block_tail *tail; | 327 | struct jbd2_journal_block_tail *tail; |
327 | __u32 csum; | 328 | __u32 csum; |
@@ -329,12 +330,10 @@ static void jbd2_descr_block_csum_set(journal_t *j, | |||
329 | if (!JBD2_HAS_INCOMPAT_FEATURE(j, JBD2_FEATURE_INCOMPAT_CSUM_V2)) | 330 | if (!JBD2_HAS_INCOMPAT_FEATURE(j, JBD2_FEATURE_INCOMPAT_CSUM_V2)) |
330 | return; | 331 | return; |
331 | 332 | ||
332 | tail = (struct jbd2_journal_block_tail *) | 333 | tail = (struct jbd2_journal_block_tail *)(bh->b_data + j->j_blocksize - |
333 | (jh2bh(descriptor)->b_data + j->j_blocksize - | ||
334 | sizeof(struct jbd2_journal_block_tail)); | 334 | sizeof(struct jbd2_journal_block_tail)); |
335 | tail->t_checksum = 0; | 335 | tail->t_checksum = 0; |
336 | csum = jbd2_chksum(j, j->j_csum_seed, jh2bh(descriptor)->b_data, | 336 | csum = jbd2_chksum(j, j->j_csum_seed, bh->b_data, j->j_blocksize); |
337 | j->j_blocksize); | ||
338 | tail->t_checksum = cpu_to_be32(csum); | 337 | tail->t_checksum = cpu_to_be32(csum); |
339 | } | 338 | } |
340 | 339 | ||
@@ -343,20 +342,21 @@ static void jbd2_block_tag_csum_set(journal_t *j, journal_block_tag_t *tag, | |||
343 | { | 342 | { |
344 | struct page *page = bh->b_page; | 343 | struct page *page = bh->b_page; |
345 | __u8 *addr; | 344 | __u8 *addr; |
346 | __u32 csum; | 345 | __u32 csum32; |
347 | 346 | ||
348 | if (!JBD2_HAS_INCOMPAT_FEATURE(j, JBD2_FEATURE_INCOMPAT_CSUM_V2)) | 347 | if (!JBD2_HAS_INCOMPAT_FEATURE(j, JBD2_FEATURE_INCOMPAT_CSUM_V2)) |
349 | return; | 348 | return; |
350 | 349 | ||
351 | sequence = cpu_to_be32(sequence); | 350 | sequence = cpu_to_be32(sequence); |
352 | addr = kmap_atomic(page); | 351 | addr = kmap_atomic(page); |
353 | csum = jbd2_chksum(j, j->j_csum_seed, (__u8 *)&sequence, | 352 | csum32 = jbd2_chksum(j, j->j_csum_seed, (__u8 *)&sequence, |
354 | sizeof(sequence)); | 353 | sizeof(sequence)); |
355 | csum = jbd2_chksum(j, csum, addr + offset_in_page(bh->b_data), | 354 | csum32 = jbd2_chksum(j, csum32, addr + offset_in_page(bh->b_data), |
356 | bh->b_size); | 355 | bh->b_size); |
357 | kunmap_atomic(addr); | 356 | kunmap_atomic(addr); |
358 | 357 | ||
359 | tag->t_checksum = cpu_to_be32(csum); | 358 | /* We only have space to store the lower 16 bits of the crc32c. */ |
359 | tag->t_checksum = cpu_to_be16(csum32); | ||
360 | } | 360 | } |
361 | /* | 361 | /* |
362 | * jbd2_journal_commit_transaction | 362 | * jbd2_journal_commit_transaction |
@@ -368,7 +368,8 @@ void jbd2_journal_commit_transaction(journal_t *journal) | |||
368 | { | 368 | { |
369 | struct transaction_stats_s stats; | 369 | struct transaction_stats_s stats; |
370 | transaction_t *commit_transaction; | 370 | transaction_t *commit_transaction; |
371 | struct journal_head *jh, *new_jh, *descriptor; | 371 | struct journal_head *jh; |
372 | struct buffer_head *descriptor; | ||
372 | struct buffer_head **wbuf = journal->j_wbuf; | 373 | struct buffer_head **wbuf = journal->j_wbuf; |
373 | int bufs; | 374 | int bufs; |
374 | int flags; | 375 | int flags; |
@@ -392,6 +393,8 @@ void jbd2_journal_commit_transaction(journal_t *journal) | |||
392 | tid_t first_tid; | 393 | tid_t first_tid; |
393 | int update_tail; | 394 | int update_tail; |
394 | int csum_size = 0; | 395 | int csum_size = 0; |
396 | LIST_HEAD(io_bufs); | ||
397 | LIST_HEAD(log_bufs); | ||
395 | 398 | ||
396 | if (JBD2_HAS_INCOMPAT_FEATURE(journal, JBD2_FEATURE_INCOMPAT_CSUM_V2)) | 399 | if (JBD2_HAS_INCOMPAT_FEATURE(journal, JBD2_FEATURE_INCOMPAT_CSUM_V2)) |
397 | csum_size = sizeof(struct jbd2_journal_block_tail); | 400 | csum_size = sizeof(struct jbd2_journal_block_tail); |
@@ -424,13 +427,13 @@ void jbd2_journal_commit_transaction(journal_t *journal) | |||
424 | J_ASSERT(journal->j_committing_transaction == NULL); | 427 | J_ASSERT(journal->j_committing_transaction == NULL); |
425 | 428 | ||
426 | commit_transaction = journal->j_running_transaction; | 429 | commit_transaction = journal->j_running_transaction; |
427 | J_ASSERT(commit_transaction->t_state == T_RUNNING); | ||
428 | 430 | ||
429 | trace_jbd2_start_commit(journal, commit_transaction); | 431 | trace_jbd2_start_commit(journal, commit_transaction); |
430 | jbd_debug(1, "JBD2: starting commit of transaction %d\n", | 432 | jbd_debug(1, "JBD2: starting commit of transaction %d\n", |
431 | commit_transaction->t_tid); | 433 | commit_transaction->t_tid); |
432 | 434 | ||
433 | write_lock(&journal->j_state_lock); | 435 | write_lock(&journal->j_state_lock); |
436 | J_ASSERT(commit_transaction->t_state == T_RUNNING); | ||
434 | commit_transaction->t_state = T_LOCKED; | 437 | commit_transaction->t_state = T_LOCKED; |
435 | 438 | ||
436 | trace_jbd2_commit_locking(journal, commit_transaction); | 439 | trace_jbd2_commit_locking(journal, commit_transaction); |
@@ -520,6 +523,12 @@ void jbd2_journal_commit_transaction(journal_t *journal) | |||
520 | */ | 523 | */ |
521 | jbd2_journal_switch_revoke_table(journal); | 524 | jbd2_journal_switch_revoke_table(journal); |
522 | 525 | ||
526 | /* | ||
527 | * Reserved credits cannot be claimed anymore, free them | ||
528 | */ | ||
529 | atomic_sub(atomic_read(&journal->j_reserved_credits), | ||
530 | &commit_transaction->t_outstanding_credits); | ||
531 | |||
523 | trace_jbd2_commit_flushing(journal, commit_transaction); | 532 | trace_jbd2_commit_flushing(journal, commit_transaction); |
524 | stats.run.rs_flushing = jiffies; | 533 | stats.run.rs_flushing = jiffies; |
525 | stats.run.rs_locked = jbd2_time_diff(stats.run.rs_locked, | 534 | stats.run.rs_locked = jbd2_time_diff(stats.run.rs_locked, |
@@ -533,7 +542,7 @@ void jbd2_journal_commit_transaction(journal_t *journal) | |||
533 | wake_up(&journal->j_wait_transaction_locked); | 542 | wake_up(&journal->j_wait_transaction_locked); |
534 | write_unlock(&journal->j_state_lock); | 543 | write_unlock(&journal->j_state_lock); |
535 | 544 | ||
536 | jbd_debug(3, "JBD2: commit phase 2\n"); | 545 | jbd_debug(3, "JBD2: commit phase 2a\n"); |
537 | 546 | ||
538 | /* | 547 | /* |
539 | * Now start flushing things to disk, in the order they appear | 548 | * Now start flushing things to disk, in the order they appear |
@@ -545,10 +554,10 @@ void jbd2_journal_commit_transaction(journal_t *journal) | |||
545 | 554 | ||
546 | blk_start_plug(&plug); | 555 | blk_start_plug(&plug); |
547 | jbd2_journal_write_revoke_records(journal, commit_transaction, | 556 | jbd2_journal_write_revoke_records(journal, commit_transaction, |
548 | WRITE_SYNC); | 557 | &log_bufs, WRITE_SYNC); |
549 | blk_finish_plug(&plug); | 558 | blk_finish_plug(&plug); |
550 | 559 | ||
551 | jbd_debug(3, "JBD2: commit phase 2\n"); | 560 | jbd_debug(3, "JBD2: commit phase 2b\n"); |
552 | 561 | ||
553 | /* | 562 | /* |
554 | * Way to go: we have now written out all of the data for a | 563 | * Way to go: we have now written out all of the data for a |
@@ -571,8 +580,8 @@ void jbd2_journal_commit_transaction(journal_t *journal) | |||
571 | atomic_read(&commit_transaction->t_outstanding_credits)); | 580 | atomic_read(&commit_transaction->t_outstanding_credits)); |
572 | 581 | ||
573 | err = 0; | 582 | err = 0; |
574 | descriptor = NULL; | ||
575 | bufs = 0; | 583 | bufs = 0; |
584 | descriptor = NULL; | ||
576 | blk_start_plug(&plug); | 585 | blk_start_plug(&plug); |
577 | while (commit_transaction->t_buffers) { | 586 | while (commit_transaction->t_buffers) { |
578 | 587 | ||
@@ -604,8 +613,6 @@ void jbd2_journal_commit_transaction(journal_t *journal) | |||
604 | record the metadata buffer. */ | 613 | record the metadata buffer. */ |
605 | 614 | ||
606 | if (!descriptor) { | 615 | if (!descriptor) { |
607 | struct buffer_head *bh; | ||
608 | |||
609 | J_ASSERT (bufs == 0); | 616 | J_ASSERT (bufs == 0); |
610 | 617 | ||
611 | jbd_debug(4, "JBD2: get descriptor\n"); | 618 | jbd_debug(4, "JBD2: get descriptor\n"); |
@@ -616,26 +623,26 @@ void jbd2_journal_commit_transaction(journal_t *journal) | |||
616 | continue; | 623 | continue; |
617 | } | 624 | } |
618 | 625 | ||
619 | bh = jh2bh(descriptor); | ||
620 | jbd_debug(4, "JBD2: got buffer %llu (%p)\n", | 626 | jbd_debug(4, "JBD2: got buffer %llu (%p)\n", |
621 | (unsigned long long)bh->b_blocknr, bh->b_data); | 627 | (unsigned long long)descriptor->b_blocknr, |
622 | header = (journal_header_t *)&bh->b_data[0]; | 628 | descriptor->b_data); |
629 | header = (journal_header_t *)descriptor->b_data; | ||
623 | header->h_magic = cpu_to_be32(JBD2_MAGIC_NUMBER); | 630 | header->h_magic = cpu_to_be32(JBD2_MAGIC_NUMBER); |
624 | header->h_blocktype = cpu_to_be32(JBD2_DESCRIPTOR_BLOCK); | 631 | header->h_blocktype = cpu_to_be32(JBD2_DESCRIPTOR_BLOCK); |
625 | header->h_sequence = cpu_to_be32(commit_transaction->t_tid); | 632 | header->h_sequence = cpu_to_be32(commit_transaction->t_tid); |
626 | 633 | ||
627 | tagp = &bh->b_data[sizeof(journal_header_t)]; | 634 | tagp = &descriptor->b_data[sizeof(journal_header_t)]; |
628 | space_left = bh->b_size - sizeof(journal_header_t); | 635 | space_left = descriptor->b_size - |
636 | sizeof(journal_header_t); | ||
629 | first_tag = 1; | 637 | first_tag = 1; |
630 | set_buffer_jwrite(bh); | 638 | set_buffer_jwrite(descriptor); |
631 | set_buffer_dirty(bh); | 639 | set_buffer_dirty(descriptor); |
632 | wbuf[bufs++] = bh; | 640 | wbuf[bufs++] = descriptor; |
633 | 641 | ||
634 | /* Record it so that we can wait for IO | 642 | /* Record it so that we can wait for IO |
635 | completion later */ | 643 | completion later */ |
636 | BUFFER_TRACE(bh, "ph3: file as descriptor"); | 644 | BUFFER_TRACE(descriptor, "ph3: file as descriptor"); |
637 | jbd2_journal_file_buffer(descriptor, commit_transaction, | 645 | jbd2_file_log_bh(&log_bufs, descriptor); |
638 | BJ_LogCtl); | ||
639 | } | 646 | } |
640 | 647 | ||
641 | /* Where is the buffer to be written? */ | 648 | /* Where is the buffer to be written? */ |
@@ -658,29 +665,22 @@ void jbd2_journal_commit_transaction(journal_t *journal) | |||
658 | 665 | ||
659 | /* Bump b_count to prevent truncate from stumbling over | 666 | /* Bump b_count to prevent truncate from stumbling over |
660 | the shadowed buffer! @@@ This can go if we ever get | 667 | the shadowed buffer! @@@ This can go if we ever get |
661 | rid of the BJ_IO/BJ_Shadow pairing of buffers. */ | 668 | rid of the shadow pairing of buffers. */ |
662 | atomic_inc(&jh2bh(jh)->b_count); | 669 | atomic_inc(&jh2bh(jh)->b_count); |
663 | 670 | ||
664 | /* Make a temporary IO buffer with which to write it out | ||
665 | (this will requeue both the metadata buffer and the | ||
666 | temporary IO buffer). new_bh goes on BJ_IO*/ | ||
667 | |||
668 | set_bit(BH_JWrite, &jh2bh(jh)->b_state); | ||
669 | /* | 671 | /* |
670 | * akpm: jbd2_journal_write_metadata_buffer() sets | 672 | * Make a temporary IO buffer with which to write it out |
671 | * new_bh->b_transaction to commit_transaction. | 673 | * (this will requeue the metadata buffer to BJ_Shadow). |
672 | * We need to clean this up before we release new_bh | ||
673 | * (which is of type BJ_IO) | ||
674 | */ | 674 | */ |
675 | set_bit(BH_JWrite, &jh2bh(jh)->b_state); | ||
675 | JBUFFER_TRACE(jh, "ph3: write metadata"); | 676 | JBUFFER_TRACE(jh, "ph3: write metadata"); |
676 | flags = jbd2_journal_write_metadata_buffer(commit_transaction, | 677 | flags = jbd2_journal_write_metadata_buffer(commit_transaction, |
677 | jh, &new_jh, blocknr); | 678 | jh, &wbuf[bufs], blocknr); |
678 | if (flags < 0) { | 679 | if (flags < 0) { |
679 | jbd2_journal_abort(journal, flags); | 680 | jbd2_journal_abort(journal, flags); |
680 | continue; | 681 | continue; |
681 | } | 682 | } |
682 | set_bit(BH_JWrite, &jh2bh(new_jh)->b_state); | 683 | jbd2_file_log_bh(&io_bufs, wbuf[bufs]); |
683 | wbuf[bufs++] = jh2bh(new_jh); | ||
684 | 684 | ||
685 | /* Record the new block's tag in the current descriptor | 685 | /* Record the new block's tag in the current descriptor |
686 | buffer */ | 686 | buffer */ |
@@ -694,10 +694,11 @@ void jbd2_journal_commit_transaction(journal_t *journal) | |||
694 | tag = (journal_block_tag_t *) tagp; | 694 | tag = (journal_block_tag_t *) tagp; |
695 | write_tag_block(tag_bytes, tag, jh2bh(jh)->b_blocknr); | 695 | write_tag_block(tag_bytes, tag, jh2bh(jh)->b_blocknr); |
696 | tag->t_flags = cpu_to_be16(tag_flag); | 696 | tag->t_flags = cpu_to_be16(tag_flag); |
697 | jbd2_block_tag_csum_set(journal, tag, jh2bh(new_jh), | 697 | jbd2_block_tag_csum_set(journal, tag, wbuf[bufs], |
698 | commit_transaction->t_tid); | 698 | commit_transaction->t_tid); |
699 | tagp += tag_bytes; | 699 | tagp += tag_bytes; |
700 | space_left -= tag_bytes; | 700 | space_left -= tag_bytes; |
701 | bufs++; | ||
701 | 702 | ||
702 | if (first_tag) { | 703 | if (first_tag) { |
703 | memcpy (tagp, journal->j_uuid, 16); | 704 | memcpy (tagp, journal->j_uuid, 16); |
@@ -809,7 +810,7 @@ start_journal_io: | |||
809 | the log. Before we can commit it, wait for the IO so far to | 810 | the log. Before we can commit it, wait for the IO so far to |
810 | complete. Control buffers being written are on the | 811 | complete. Control buffers being written are on the |
811 | transaction's t_log_list queue, and metadata buffers are on | 812 | transaction's t_log_list queue, and metadata buffers are on |
812 | the t_iobuf_list queue. | 813 | the io_bufs list. |
813 | 814 | ||
814 | Wait for the buffers in reverse order. That way we are | 815 | Wait for the buffers in reverse order. That way we are |
815 | less likely to be woken up until all IOs have completed, and | 816 | less likely to be woken up until all IOs have completed, and |
@@ -818,47 +819,33 @@ start_journal_io: | |||
818 | 819 | ||
819 | jbd_debug(3, "JBD2: commit phase 3\n"); | 820 | jbd_debug(3, "JBD2: commit phase 3\n"); |
820 | 821 | ||
821 | /* | 822 | while (!list_empty(&io_bufs)) { |
822 | * akpm: these are BJ_IO, and j_list_lock is not needed. | 823 | struct buffer_head *bh = list_entry(io_bufs.prev, |
823 | * See __journal_try_to_free_buffer. | 824 | struct buffer_head, |
824 | */ | 825 | b_assoc_buffers); |
825 | wait_for_iobuf: | ||
826 | while (commit_transaction->t_iobuf_list != NULL) { | ||
827 | struct buffer_head *bh; | ||
828 | 826 | ||
829 | jh = commit_transaction->t_iobuf_list->b_tprev; | 827 | wait_on_buffer(bh); |
830 | bh = jh2bh(jh); | 828 | cond_resched(); |
831 | if (buffer_locked(bh)) { | ||
832 | wait_on_buffer(bh); | ||
833 | goto wait_for_iobuf; | ||
834 | } | ||
835 | if (cond_resched()) | ||
836 | goto wait_for_iobuf; | ||
837 | 829 | ||
838 | if (unlikely(!buffer_uptodate(bh))) | 830 | if (unlikely(!buffer_uptodate(bh))) |
839 | err = -EIO; | 831 | err = -EIO; |
840 | 832 | jbd2_unfile_log_bh(bh); | |
841 | clear_buffer_jwrite(bh); | ||
842 | |||
843 | JBUFFER_TRACE(jh, "ph4: unfile after journal write"); | ||
844 | jbd2_journal_unfile_buffer(journal, jh); | ||
845 | 833 | ||
846 | /* | 834 | /* |
847 | * ->t_iobuf_list should contain only dummy buffer_heads | 835 | * The list contains temporary buffer heads created by |
848 | * which were created by jbd2_journal_write_metadata_buffer(). | 836 | * jbd2_journal_write_metadata_buffer(). |
849 | */ | 837 | */ |
850 | BUFFER_TRACE(bh, "dumping temporary bh"); | 838 | BUFFER_TRACE(bh, "dumping temporary bh"); |
851 | jbd2_journal_put_journal_head(jh); | ||
852 | __brelse(bh); | 839 | __brelse(bh); |
853 | J_ASSERT_BH(bh, atomic_read(&bh->b_count) == 0); | 840 | J_ASSERT_BH(bh, atomic_read(&bh->b_count) == 0); |
854 | free_buffer_head(bh); | 841 | free_buffer_head(bh); |
855 | 842 | ||
856 | /* We also have to unlock and free the corresponding | 843 | /* We also have to refile the corresponding shadowed buffer */ |
857 | shadowed buffer */ | ||
858 | jh = commit_transaction->t_shadow_list->b_tprev; | 844 | jh = commit_transaction->t_shadow_list->b_tprev; |
859 | bh = jh2bh(jh); | 845 | bh = jh2bh(jh); |
860 | clear_bit(BH_JWrite, &bh->b_state); | 846 | clear_buffer_jwrite(bh); |
861 | J_ASSERT_BH(bh, buffer_jbddirty(bh)); | 847 | J_ASSERT_BH(bh, buffer_jbddirty(bh)); |
848 | J_ASSERT_BH(bh, !buffer_shadow(bh)); | ||
862 | 849 | ||
863 | /* The metadata is now released for reuse, but we need | 850 | /* The metadata is now released for reuse, but we need |
864 | to remember it against this transaction so that when | 851 | to remember it against this transaction so that when |
@@ -866,14 +853,6 @@ wait_for_iobuf: | |||
866 | required. */ | 853 | required. */ |
867 | JBUFFER_TRACE(jh, "file as BJ_Forget"); | 854 | JBUFFER_TRACE(jh, "file as BJ_Forget"); |
868 | jbd2_journal_file_buffer(jh, commit_transaction, BJ_Forget); | 855 | jbd2_journal_file_buffer(jh, commit_transaction, BJ_Forget); |
869 | /* | ||
870 | * Wake up any transactions which were waiting for this IO to | ||
871 | * complete. The barrier must be here so that changes by | ||
872 | * jbd2_journal_file_buffer() take effect before wake_up_bit() | ||
873 | * does the waitqueue check. | ||
874 | */ | ||
875 | smp_mb(); | ||
876 | wake_up_bit(&bh->b_state, BH_Unshadow); | ||
877 | JBUFFER_TRACE(jh, "brelse shadowed buffer"); | 856 | JBUFFER_TRACE(jh, "brelse shadowed buffer"); |
878 | __brelse(bh); | 857 | __brelse(bh); |
879 | } | 858 | } |
@@ -883,26 +862,19 @@ wait_for_iobuf: | |||
883 | jbd_debug(3, "JBD2: commit phase 4\n"); | 862 | jbd_debug(3, "JBD2: commit phase 4\n"); |
884 | 863 | ||
885 | /* Here we wait for the revoke record and descriptor record buffers */ | 864 | /* Here we wait for the revoke record and descriptor record buffers */ |
886 | wait_for_ctlbuf: | 865 | while (!list_empty(&log_bufs)) { |
887 | while (commit_transaction->t_log_list != NULL) { | ||
888 | struct buffer_head *bh; | 866 | struct buffer_head *bh; |
889 | 867 | ||
890 | jh = commit_transaction->t_log_list->b_tprev; | 868 | bh = list_entry(log_bufs.prev, struct buffer_head, b_assoc_buffers); |
891 | bh = jh2bh(jh); | 869 | wait_on_buffer(bh); |
892 | if (buffer_locked(bh)) { | 870 | cond_resched(); |
893 | wait_on_buffer(bh); | ||
894 | goto wait_for_ctlbuf; | ||
895 | } | ||
896 | if (cond_resched()) | ||
897 | goto wait_for_ctlbuf; | ||
898 | 871 | ||
899 | if (unlikely(!buffer_uptodate(bh))) | 872 | if (unlikely(!buffer_uptodate(bh))) |
900 | err = -EIO; | 873 | err = -EIO; |
901 | 874 | ||
902 | BUFFER_TRACE(bh, "ph5: control buffer writeout done: unfile"); | 875 | BUFFER_TRACE(bh, "ph5: control buffer writeout done: unfile"); |
903 | clear_buffer_jwrite(bh); | 876 | clear_buffer_jwrite(bh); |
904 | jbd2_journal_unfile_buffer(journal, jh); | 877 | jbd2_unfile_log_bh(bh); |
905 | jbd2_journal_put_journal_head(jh); | ||
906 | __brelse(bh); /* One for getblk */ | 878 | __brelse(bh); /* One for getblk */ |
907 | /* AKPM: bforget here */ | 879 | /* AKPM: bforget here */ |
908 | } | 880 | } |
@@ -952,9 +924,7 @@ wait_for_iobuf: | |||
952 | J_ASSERT(list_empty(&commit_transaction->t_inode_list)); | 924 | J_ASSERT(list_empty(&commit_transaction->t_inode_list)); |
953 | J_ASSERT(commit_transaction->t_buffers == NULL); | 925 | J_ASSERT(commit_transaction->t_buffers == NULL); |
954 | J_ASSERT(commit_transaction->t_checkpoint_list == NULL); | 926 | J_ASSERT(commit_transaction->t_checkpoint_list == NULL); |
955 | J_ASSERT(commit_transaction->t_iobuf_list == NULL); | ||
956 | J_ASSERT(commit_transaction->t_shadow_list == NULL); | 927 | J_ASSERT(commit_transaction->t_shadow_list == NULL); |
957 | J_ASSERT(commit_transaction->t_log_list == NULL); | ||
958 | 928 | ||
959 | restart_loop: | 929 | restart_loop: |
960 | /* | 930 | /* |
diff --git a/fs/jbd2/journal.c b/fs/jbd2/journal.c index 95457576e434..02c7ad9d7a41 100644 --- a/fs/jbd2/journal.c +++ b/fs/jbd2/journal.c | |||
@@ -103,6 +103,24 @@ EXPORT_SYMBOL(jbd2_inode_cache); | |||
103 | static void __journal_abort_soft (journal_t *journal, int errno); | 103 | static void __journal_abort_soft (journal_t *journal, int errno); |
104 | static int jbd2_journal_create_slab(size_t slab_size); | 104 | static int jbd2_journal_create_slab(size_t slab_size); |
105 | 105 | ||
106 | #ifdef CONFIG_JBD2_DEBUG | ||
107 | void __jbd2_debug(int level, const char *file, const char *func, | ||
108 | unsigned int line, const char *fmt, ...) | ||
109 | { | ||
110 | struct va_format vaf; | ||
111 | va_list args; | ||
112 | |||
113 | if (level > jbd2_journal_enable_debug) | ||
114 | return; | ||
115 | va_start(args, fmt); | ||
116 | vaf.fmt = fmt; | ||
117 | vaf.va = &args; | ||
118 | printk(KERN_DEBUG "%s: (%s, %u): %pV\n", file, func, line, &vaf); | ||
119 | va_end(args); | ||
120 | } | ||
121 | EXPORT_SYMBOL(__jbd2_debug); | ||
122 | #endif | ||
123 | |||
106 | /* Checksumming functions */ | 124 | /* Checksumming functions */ |
107 | int jbd2_verify_csum_type(journal_t *j, journal_superblock_t *sb) | 125 | int jbd2_verify_csum_type(journal_t *j, journal_superblock_t *sb) |
108 | { | 126 | { |
@@ -310,14 +328,12 @@ static void journal_kill_thread(journal_t *journal) | |||
310 | * | 328 | * |
311 | * If the source buffer has already been modified by a new transaction | 329 | * If the source buffer has already been modified by a new transaction |
312 | * since we took the last commit snapshot, we use the frozen copy of | 330 | * since we took the last commit snapshot, we use the frozen copy of |
313 | * that data for IO. If we end up using the existing buffer_head's data | 331 | * that data for IO. If we end up using the existing buffer_head's data |
314 | * for the write, then we *have* to lock the buffer to prevent anyone | 332 | * for the write, then we have to make sure nobody modifies it while the |
315 | * else from using and possibly modifying it while the IO is in | 333 | * IO is in progress. do_get_write_access() handles this. |
316 | * progress. | ||
317 | * | 334 | * |
318 | * The function returns a pointer to the buffer_heads to be used for IO. | 335 | * The function returns a pointer to the buffer_head to be used for IO. |
319 | * | 336 | * |
320 | * We assume that the journal has already been locked in this function. | ||
321 | * | 337 | * |
322 | * Return value: | 338 | * Return value: |
323 | * <0: Error | 339 | * <0: Error |
@@ -330,15 +346,14 @@ static void journal_kill_thread(journal_t *journal) | |||
330 | 346 | ||
331 | int jbd2_journal_write_metadata_buffer(transaction_t *transaction, | 347 | int jbd2_journal_write_metadata_buffer(transaction_t *transaction, |
332 | struct journal_head *jh_in, | 348 | struct journal_head *jh_in, |
333 | struct journal_head **jh_out, | 349 | struct buffer_head **bh_out, |
334 | unsigned long long blocknr) | 350 | sector_t blocknr) |
335 | { | 351 | { |
336 | int need_copy_out = 0; | 352 | int need_copy_out = 0; |
337 | int done_copy_out = 0; | 353 | int done_copy_out = 0; |
338 | int do_escape = 0; | 354 | int do_escape = 0; |
339 | char *mapped_data; | 355 | char *mapped_data; |
340 | struct buffer_head *new_bh; | 356 | struct buffer_head *new_bh; |
341 | struct journal_head *new_jh; | ||
342 | struct page *new_page; | 357 | struct page *new_page; |
343 | unsigned int new_offset; | 358 | unsigned int new_offset; |
344 | struct buffer_head *bh_in = jh2bh(jh_in); | 359 | struct buffer_head *bh_in = jh2bh(jh_in); |
@@ -368,14 +383,13 @@ retry_alloc: | |||
368 | 383 | ||
369 | /* keep subsequent assertions sane */ | 384 | /* keep subsequent assertions sane */ |
370 | atomic_set(&new_bh->b_count, 1); | 385 | atomic_set(&new_bh->b_count, 1); |
371 | new_jh = jbd2_journal_add_journal_head(new_bh); /* This sleeps */ | ||
372 | 386 | ||
387 | jbd_lock_bh_state(bh_in); | ||
388 | repeat: | ||
373 | /* | 389 | /* |
374 | * If a new transaction has already done a buffer copy-out, then | 390 | * If a new transaction has already done a buffer copy-out, then |
375 | * we use that version of the data for the commit. | 391 | * we use that version of the data for the commit. |
376 | */ | 392 | */ |
377 | jbd_lock_bh_state(bh_in); | ||
378 | repeat: | ||
379 | if (jh_in->b_frozen_data) { | 393 | if (jh_in->b_frozen_data) { |
380 | done_copy_out = 1; | 394 | done_copy_out = 1; |
381 | new_page = virt_to_page(jh_in->b_frozen_data); | 395 | new_page = virt_to_page(jh_in->b_frozen_data); |
@@ -415,7 +429,7 @@ repeat: | |||
415 | jbd_unlock_bh_state(bh_in); | 429 | jbd_unlock_bh_state(bh_in); |
416 | tmp = jbd2_alloc(bh_in->b_size, GFP_NOFS); | 430 | tmp = jbd2_alloc(bh_in->b_size, GFP_NOFS); |
417 | if (!tmp) { | 431 | if (!tmp) { |
418 | jbd2_journal_put_journal_head(new_jh); | 432 | brelse(new_bh); |
419 | return -ENOMEM; | 433 | return -ENOMEM; |
420 | } | 434 | } |
421 | jbd_lock_bh_state(bh_in); | 435 | jbd_lock_bh_state(bh_in); |
@@ -426,7 +440,7 @@ repeat: | |||
426 | 440 | ||
427 | jh_in->b_frozen_data = tmp; | 441 | jh_in->b_frozen_data = tmp; |
428 | mapped_data = kmap_atomic(new_page); | 442 | mapped_data = kmap_atomic(new_page); |
429 | memcpy(tmp, mapped_data + new_offset, jh2bh(jh_in)->b_size); | 443 | memcpy(tmp, mapped_data + new_offset, bh_in->b_size); |
430 | kunmap_atomic(mapped_data); | 444 | kunmap_atomic(mapped_data); |
431 | 445 | ||
432 | new_page = virt_to_page(tmp); | 446 | new_page = virt_to_page(tmp); |
@@ -452,14 +466,14 @@ repeat: | |||
452 | } | 466 | } |
453 | 467 | ||
454 | set_bh_page(new_bh, new_page, new_offset); | 468 | set_bh_page(new_bh, new_page, new_offset); |
455 | new_jh->b_transaction = NULL; | 469 | new_bh->b_size = bh_in->b_size; |
456 | new_bh->b_size = jh2bh(jh_in)->b_size; | 470 | new_bh->b_bdev = journal->j_dev; |
457 | new_bh->b_bdev = transaction->t_journal->j_dev; | ||
458 | new_bh->b_blocknr = blocknr; | 471 | new_bh->b_blocknr = blocknr; |
472 | new_bh->b_private = bh_in; | ||
459 | set_buffer_mapped(new_bh); | 473 | set_buffer_mapped(new_bh); |
460 | set_buffer_dirty(new_bh); | 474 | set_buffer_dirty(new_bh); |
461 | 475 | ||
462 | *jh_out = new_jh; | 476 | *bh_out = new_bh; |
463 | 477 | ||
464 | /* | 478 | /* |
465 | * The to-be-written buffer needs to get moved to the io queue, | 479 | * The to-be-written buffer needs to get moved to the io queue, |
@@ -470,11 +484,9 @@ repeat: | |||
470 | spin_lock(&journal->j_list_lock); | 484 | spin_lock(&journal->j_list_lock); |
471 | __jbd2_journal_file_buffer(jh_in, transaction, BJ_Shadow); | 485 | __jbd2_journal_file_buffer(jh_in, transaction, BJ_Shadow); |
472 | spin_unlock(&journal->j_list_lock); | 486 | spin_unlock(&journal->j_list_lock); |
487 | set_buffer_shadow(bh_in); | ||
473 | jbd_unlock_bh_state(bh_in); | 488 | jbd_unlock_bh_state(bh_in); |
474 | 489 | ||
475 | JBUFFER_TRACE(new_jh, "file as BJ_IO"); | ||
476 | jbd2_journal_file_buffer(new_jh, transaction, BJ_IO); | ||
477 | |||
478 | return do_escape | (done_copy_out << 1); | 490 | return do_escape | (done_copy_out << 1); |
479 | } | 491 | } |
480 | 492 | ||
@@ -484,35 +496,6 @@ repeat: | |||
484 | */ | 496 | */ |
485 | 497 | ||
486 | /* | 498 | /* |
487 | * __jbd2_log_space_left: Return the number of free blocks left in the journal. | ||
488 | * | ||
489 | * Called with the journal already locked. | ||
490 | * | ||
491 | * Called under j_state_lock | ||
492 | */ | ||
493 | |||
494 | int __jbd2_log_space_left(journal_t *journal) | ||
495 | { | ||
496 | int left = journal->j_free; | ||
497 | |||
498 | /* assert_spin_locked(&journal->j_state_lock); */ | ||
499 | |||
500 | /* | ||
501 | * Be pessimistic here about the number of those free blocks which | ||
502 | * might be required for log descriptor control blocks. | ||
503 | */ | ||
504 | |||
505 | #define MIN_LOG_RESERVED_BLOCKS 32 /* Allow for rounding errors */ | ||
506 | |||
507 | left -= MIN_LOG_RESERVED_BLOCKS; | ||
508 | |||
509 | if (left <= 0) | ||
510 | return 0; | ||
511 | left -= (left >> 3); | ||
512 | return left; | ||
513 | } | ||
514 | |||
515 | /* | ||
516 | * Called with j_state_lock locked for writing. | 499 | * Called with j_state_lock locked for writing. |
517 | * Returns true if a transaction commit was started. | 500 | * Returns true if a transaction commit was started. |
518 | */ | 501 | */ |
@@ -564,20 +547,17 @@ int jbd2_log_start_commit(journal_t *journal, tid_t tid) | |||
564 | } | 547 | } |
565 | 548 | ||
566 | /* | 549 | /* |
567 | * Force and wait upon a commit if the calling process is not within | 550 | * Force and wait any uncommitted transactions. We can only force the running |
568 | * transaction. This is used for forcing out undo-protected data which contains | 551 | * transaction if we don't have an active handle, otherwise, we will deadlock. |
569 | * bitmaps, when the fs is running out of space. | 552 | * Returns: <0 in case of error, |
570 | * | 553 | * 0 if nothing to commit, |
571 | * We can only force the running transaction if we don't have an active handle; | 554 | * 1 if transaction was successfully committed. |
572 | * otherwise, we will deadlock. | ||
573 | * | ||
574 | * Returns true if a transaction was started. | ||
575 | */ | 555 | */ |
576 | int jbd2_journal_force_commit_nested(journal_t *journal) | 556 | static int __jbd2_journal_force_commit(journal_t *journal) |
577 | { | 557 | { |
578 | transaction_t *transaction = NULL; | 558 | transaction_t *transaction = NULL; |
579 | tid_t tid; | 559 | tid_t tid; |
580 | int need_to_start = 0; | 560 | int need_to_start = 0, ret = 0; |
581 | 561 | ||
582 | read_lock(&journal->j_state_lock); | 562 | read_lock(&journal->j_state_lock); |
583 | if (journal->j_running_transaction && !current->journal_info) { | 563 | if (journal->j_running_transaction && !current->journal_info) { |
@@ -588,16 +568,53 @@ int jbd2_journal_force_commit_nested(journal_t *journal) | |||
588 | transaction = journal->j_committing_transaction; | 568 | transaction = journal->j_committing_transaction; |
589 | 569 | ||
590 | if (!transaction) { | 570 | if (!transaction) { |
571 | /* Nothing to commit */ | ||
591 | read_unlock(&journal->j_state_lock); | 572 | read_unlock(&journal->j_state_lock); |
592 | return 0; /* Nothing to retry */ | 573 | return 0; |
593 | } | 574 | } |
594 | |||
595 | tid = transaction->t_tid; | 575 | tid = transaction->t_tid; |
596 | read_unlock(&journal->j_state_lock); | 576 | read_unlock(&journal->j_state_lock); |
597 | if (need_to_start) | 577 | if (need_to_start) |
598 | jbd2_log_start_commit(journal, tid); | 578 | jbd2_log_start_commit(journal, tid); |
599 | jbd2_log_wait_commit(journal, tid); | 579 | ret = jbd2_log_wait_commit(journal, tid); |
600 | return 1; | 580 | if (!ret) |
581 | ret = 1; | ||
582 | |||
583 | return ret; | ||
584 | } | ||
585 | |||
586 | /** | ||
587 | * Force and wait upon a commit if the calling process is not within | ||
588 | * transaction. This is used for forcing out undo-protected data which contains | ||
589 | * bitmaps, when the fs is running out of space. | ||
590 | * | ||
591 | * @journal: journal to force | ||
592 | * Returns true if progress was made. | ||
593 | */ | ||
594 | int jbd2_journal_force_commit_nested(journal_t *journal) | ||
595 | { | ||
596 | int ret; | ||
597 | |||
598 | ret = __jbd2_journal_force_commit(journal); | ||
599 | return ret > 0; | ||
600 | } | ||
601 | |||
602 | /** | ||
603 | * int journal_force_commit() - force any uncommitted transactions | ||
604 | * @journal: journal to force | ||
605 | * | ||
606 | * Caller want unconditional commit. We can only force the running transaction | ||
607 | * if we don't have an active handle, otherwise, we will deadlock. | ||
608 | */ | ||
609 | int jbd2_journal_force_commit(journal_t *journal) | ||
610 | { | ||
611 | int ret; | ||
612 | |||
613 | J_ASSERT(!current->journal_info); | ||
614 | ret = __jbd2_journal_force_commit(journal); | ||
615 | if (ret > 0) | ||
616 | ret = 0; | ||
617 | return ret; | ||
601 | } | 618 | } |
602 | 619 | ||
603 | /* | 620 | /* |
@@ -798,7 +815,7 @@ int jbd2_journal_bmap(journal_t *journal, unsigned long blocknr, | |||
798 | * But we don't bother doing that, so there will be coherency problems with | 815 | * But we don't bother doing that, so there will be coherency problems with |
799 | * mmaps of blockdevs which hold live JBD-controlled filesystems. | 816 | * mmaps of blockdevs which hold live JBD-controlled filesystems. |
800 | */ | 817 | */ |
801 | struct journal_head *jbd2_journal_get_descriptor_buffer(journal_t *journal) | 818 | struct buffer_head *jbd2_journal_get_descriptor_buffer(journal_t *journal) |
802 | { | 819 | { |
803 | struct buffer_head *bh; | 820 | struct buffer_head *bh; |
804 | unsigned long long blocknr; | 821 | unsigned long long blocknr; |
@@ -817,7 +834,7 @@ struct journal_head *jbd2_journal_get_descriptor_buffer(journal_t *journal) | |||
817 | set_buffer_uptodate(bh); | 834 | set_buffer_uptodate(bh); |
818 | unlock_buffer(bh); | 835 | unlock_buffer(bh); |
819 | BUFFER_TRACE(bh, "return this buffer"); | 836 | BUFFER_TRACE(bh, "return this buffer"); |
820 | return jbd2_journal_add_journal_head(bh); | 837 | return bh; |
821 | } | 838 | } |
822 | 839 | ||
823 | /* | 840 | /* |
@@ -1062,11 +1079,10 @@ static journal_t * journal_init_common (void) | |||
1062 | return NULL; | 1079 | return NULL; |
1063 | 1080 | ||
1064 | init_waitqueue_head(&journal->j_wait_transaction_locked); | 1081 | init_waitqueue_head(&journal->j_wait_transaction_locked); |
1065 | init_waitqueue_head(&journal->j_wait_logspace); | ||
1066 | init_waitqueue_head(&journal->j_wait_done_commit); | 1082 | init_waitqueue_head(&journal->j_wait_done_commit); |
1067 | init_waitqueue_head(&journal->j_wait_checkpoint); | ||
1068 | init_waitqueue_head(&journal->j_wait_commit); | 1083 | init_waitqueue_head(&journal->j_wait_commit); |
1069 | init_waitqueue_head(&journal->j_wait_updates); | 1084 | init_waitqueue_head(&journal->j_wait_updates); |
1085 | init_waitqueue_head(&journal->j_wait_reserved); | ||
1070 | mutex_init(&journal->j_barrier); | 1086 | mutex_init(&journal->j_barrier); |
1071 | mutex_init(&journal->j_checkpoint_mutex); | 1087 | mutex_init(&journal->j_checkpoint_mutex); |
1072 | spin_lock_init(&journal->j_revoke_lock); | 1088 | spin_lock_init(&journal->j_revoke_lock); |
@@ -1076,6 +1092,7 @@ static journal_t * journal_init_common (void) | |||
1076 | journal->j_commit_interval = (HZ * JBD2_DEFAULT_MAX_COMMIT_AGE); | 1092 | journal->j_commit_interval = (HZ * JBD2_DEFAULT_MAX_COMMIT_AGE); |
1077 | journal->j_min_batch_time = 0; | 1093 | journal->j_min_batch_time = 0; |
1078 | journal->j_max_batch_time = 15000; /* 15ms */ | 1094 | journal->j_max_batch_time = 15000; /* 15ms */ |
1095 | atomic_set(&journal->j_reserved_credits, 0); | ||
1079 | 1096 | ||
1080 | /* The journal is marked for error until we succeed with recovery! */ | 1097 | /* The journal is marked for error until we succeed with recovery! */ |
1081 | journal->j_flags = JBD2_ABORT; | 1098 | journal->j_flags = JBD2_ABORT; |
@@ -1318,6 +1335,7 @@ static int journal_reset(journal_t *journal) | |||
1318 | static void jbd2_write_superblock(journal_t *journal, int write_op) | 1335 | static void jbd2_write_superblock(journal_t *journal, int write_op) |
1319 | { | 1336 | { |
1320 | struct buffer_head *bh = journal->j_sb_buffer; | 1337 | struct buffer_head *bh = journal->j_sb_buffer; |
1338 | journal_superblock_t *sb = journal->j_superblock; | ||
1321 | int ret; | 1339 | int ret; |
1322 | 1340 | ||
1323 | trace_jbd2_write_superblock(journal, write_op); | 1341 | trace_jbd2_write_superblock(journal, write_op); |
@@ -1339,6 +1357,7 @@ static void jbd2_write_superblock(journal_t *journal, int write_op) | |||
1339 | clear_buffer_write_io_error(bh); | 1357 | clear_buffer_write_io_error(bh); |
1340 | set_buffer_uptodate(bh); | 1358 | set_buffer_uptodate(bh); |
1341 | } | 1359 | } |
1360 | jbd2_superblock_csum_set(journal, sb); | ||
1342 | get_bh(bh); | 1361 | get_bh(bh); |
1343 | bh->b_end_io = end_buffer_write_sync; | 1362 | bh->b_end_io = end_buffer_write_sync; |
1344 | ret = submit_bh(write_op, bh); | 1363 | ret = submit_bh(write_op, bh); |
@@ -1435,7 +1454,6 @@ void jbd2_journal_update_sb_errno(journal_t *journal) | |||
1435 | jbd_debug(1, "JBD2: updating superblock error (errno %d)\n", | 1454 | jbd_debug(1, "JBD2: updating superblock error (errno %d)\n", |
1436 | journal->j_errno); | 1455 | journal->j_errno); |
1437 | sb->s_errno = cpu_to_be32(journal->j_errno); | 1456 | sb->s_errno = cpu_to_be32(journal->j_errno); |
1438 | jbd2_superblock_csum_set(journal, sb); | ||
1439 | read_unlock(&journal->j_state_lock); | 1457 | read_unlock(&journal->j_state_lock); |
1440 | 1458 | ||
1441 | jbd2_write_superblock(journal, WRITE_SYNC); | 1459 | jbd2_write_superblock(journal, WRITE_SYNC); |
@@ -2325,13 +2343,13 @@ static struct journal_head *journal_alloc_journal_head(void) | |||
2325 | #ifdef CONFIG_JBD2_DEBUG | 2343 | #ifdef CONFIG_JBD2_DEBUG |
2326 | atomic_inc(&nr_journal_heads); | 2344 | atomic_inc(&nr_journal_heads); |
2327 | #endif | 2345 | #endif |
2328 | ret = kmem_cache_alloc(jbd2_journal_head_cache, GFP_NOFS); | 2346 | ret = kmem_cache_zalloc(jbd2_journal_head_cache, GFP_NOFS); |
2329 | if (!ret) { | 2347 | if (!ret) { |
2330 | jbd_debug(1, "out of memory for journal_head\n"); | 2348 | jbd_debug(1, "out of memory for journal_head\n"); |
2331 | pr_notice_ratelimited("ENOMEM in %s, retrying.\n", __func__); | 2349 | pr_notice_ratelimited("ENOMEM in %s, retrying.\n", __func__); |
2332 | while (!ret) { | 2350 | while (!ret) { |
2333 | yield(); | 2351 | yield(); |
2334 | ret = kmem_cache_alloc(jbd2_journal_head_cache, GFP_NOFS); | 2352 | ret = kmem_cache_zalloc(jbd2_journal_head_cache, GFP_NOFS); |
2335 | } | 2353 | } |
2336 | } | 2354 | } |
2337 | return ret; | 2355 | return ret; |
@@ -2393,10 +2411,8 @@ struct journal_head *jbd2_journal_add_journal_head(struct buffer_head *bh) | |||
2393 | struct journal_head *new_jh = NULL; | 2411 | struct journal_head *new_jh = NULL; |
2394 | 2412 | ||
2395 | repeat: | 2413 | repeat: |
2396 | if (!buffer_jbd(bh)) { | 2414 | if (!buffer_jbd(bh)) |
2397 | new_jh = journal_alloc_journal_head(); | 2415 | new_jh = journal_alloc_journal_head(); |
2398 | memset(new_jh, 0, sizeof(*new_jh)); | ||
2399 | } | ||
2400 | 2416 | ||
2401 | jbd_lock_bh_journal_head(bh); | 2417 | jbd_lock_bh_journal_head(bh); |
2402 | if (buffer_jbd(bh)) { | 2418 | if (buffer_jbd(bh)) { |
diff --git a/fs/jbd2/recovery.c b/fs/jbd2/recovery.c index 626846bac32f..d4851464b57e 100644 --- a/fs/jbd2/recovery.c +++ b/fs/jbd2/recovery.c | |||
@@ -399,18 +399,17 @@ static int jbd2_commit_block_csum_verify(journal_t *j, void *buf) | |||
399 | static int jbd2_block_tag_csum_verify(journal_t *j, journal_block_tag_t *tag, | 399 | static int jbd2_block_tag_csum_verify(journal_t *j, journal_block_tag_t *tag, |
400 | void *buf, __u32 sequence) | 400 | void *buf, __u32 sequence) |
401 | { | 401 | { |
402 | __u32 provided, calculated; | 402 | __u32 csum32; |
403 | 403 | ||
404 | if (!JBD2_HAS_INCOMPAT_FEATURE(j, JBD2_FEATURE_INCOMPAT_CSUM_V2)) | 404 | if (!JBD2_HAS_INCOMPAT_FEATURE(j, JBD2_FEATURE_INCOMPAT_CSUM_V2)) |
405 | return 1; | 405 | return 1; |
406 | 406 | ||
407 | sequence = cpu_to_be32(sequence); | 407 | sequence = cpu_to_be32(sequence); |
408 | calculated = jbd2_chksum(j, j->j_csum_seed, (__u8 *)&sequence, | 408 | csum32 = jbd2_chksum(j, j->j_csum_seed, (__u8 *)&sequence, |
409 | sizeof(sequence)); | 409 | sizeof(sequence)); |
410 | calculated = jbd2_chksum(j, calculated, buf, j->j_blocksize); | 410 | csum32 = jbd2_chksum(j, csum32, buf, j->j_blocksize); |
411 | provided = be32_to_cpu(tag->t_checksum); | ||
412 | 411 | ||
413 | return provided == cpu_to_be32(calculated); | 412 | return tag->t_checksum == cpu_to_be16(csum32); |
414 | } | 413 | } |
415 | 414 | ||
416 | static int do_one_pass(journal_t *journal, | 415 | static int do_one_pass(journal_t *journal, |
diff --git a/fs/jbd2/revoke.c b/fs/jbd2/revoke.c index f30b80b4ce8b..198c9c10276d 100644 --- a/fs/jbd2/revoke.c +++ b/fs/jbd2/revoke.c | |||
@@ -122,9 +122,10 @@ struct jbd2_revoke_table_s | |||
122 | 122 | ||
123 | #ifdef __KERNEL__ | 123 | #ifdef __KERNEL__ |
124 | static void write_one_revoke_record(journal_t *, transaction_t *, | 124 | static void write_one_revoke_record(journal_t *, transaction_t *, |
125 | struct journal_head **, int *, | 125 | struct list_head *, |
126 | struct buffer_head **, int *, | ||
126 | struct jbd2_revoke_record_s *, int); | 127 | struct jbd2_revoke_record_s *, int); |
127 | static void flush_descriptor(journal_t *, struct journal_head *, int, int); | 128 | static void flush_descriptor(journal_t *, struct buffer_head *, int, int); |
128 | #endif | 129 | #endif |
129 | 130 | ||
130 | /* Utility functions to maintain the revoke table */ | 131 | /* Utility functions to maintain the revoke table */ |
@@ -531,9 +532,10 @@ void jbd2_journal_switch_revoke_table(journal_t *journal) | |||
531 | */ | 532 | */ |
532 | void jbd2_journal_write_revoke_records(journal_t *journal, | 533 | void jbd2_journal_write_revoke_records(journal_t *journal, |
533 | transaction_t *transaction, | 534 | transaction_t *transaction, |
535 | struct list_head *log_bufs, | ||
534 | int write_op) | 536 | int write_op) |
535 | { | 537 | { |
536 | struct journal_head *descriptor; | 538 | struct buffer_head *descriptor; |
537 | struct jbd2_revoke_record_s *record; | 539 | struct jbd2_revoke_record_s *record; |
538 | struct jbd2_revoke_table_s *revoke; | 540 | struct jbd2_revoke_table_s *revoke; |
539 | struct list_head *hash_list; | 541 | struct list_head *hash_list; |
@@ -553,7 +555,7 @@ void jbd2_journal_write_revoke_records(journal_t *journal, | |||
553 | while (!list_empty(hash_list)) { | 555 | while (!list_empty(hash_list)) { |
554 | record = (struct jbd2_revoke_record_s *) | 556 | record = (struct jbd2_revoke_record_s *) |
555 | hash_list->next; | 557 | hash_list->next; |
556 | write_one_revoke_record(journal, transaction, | 558 | write_one_revoke_record(journal, transaction, log_bufs, |
557 | &descriptor, &offset, | 559 | &descriptor, &offset, |
558 | record, write_op); | 560 | record, write_op); |
559 | count++; | 561 | count++; |
@@ -573,13 +575,14 @@ void jbd2_journal_write_revoke_records(journal_t *journal, | |||
573 | 575 | ||
574 | static void write_one_revoke_record(journal_t *journal, | 576 | static void write_one_revoke_record(journal_t *journal, |
575 | transaction_t *transaction, | 577 | transaction_t *transaction, |
576 | struct journal_head **descriptorp, | 578 | struct list_head *log_bufs, |
579 | struct buffer_head **descriptorp, | ||
577 | int *offsetp, | 580 | int *offsetp, |
578 | struct jbd2_revoke_record_s *record, | 581 | struct jbd2_revoke_record_s *record, |
579 | int write_op) | 582 | int write_op) |
580 | { | 583 | { |
581 | int csum_size = 0; | 584 | int csum_size = 0; |
582 | struct journal_head *descriptor; | 585 | struct buffer_head *descriptor; |
583 | int offset; | 586 | int offset; |
584 | journal_header_t *header; | 587 | journal_header_t *header; |
585 | 588 | ||
@@ -609,26 +612,26 @@ static void write_one_revoke_record(journal_t *journal, | |||
609 | descriptor = jbd2_journal_get_descriptor_buffer(journal); | 612 | descriptor = jbd2_journal_get_descriptor_buffer(journal); |
610 | if (!descriptor) | 613 | if (!descriptor) |
611 | return; | 614 | return; |
612 | header = (journal_header_t *) &jh2bh(descriptor)->b_data[0]; | 615 | header = (journal_header_t *)descriptor->b_data; |
613 | header->h_magic = cpu_to_be32(JBD2_MAGIC_NUMBER); | 616 | header->h_magic = cpu_to_be32(JBD2_MAGIC_NUMBER); |
614 | header->h_blocktype = cpu_to_be32(JBD2_REVOKE_BLOCK); | 617 | header->h_blocktype = cpu_to_be32(JBD2_REVOKE_BLOCK); |
615 | header->h_sequence = cpu_to_be32(transaction->t_tid); | 618 | header->h_sequence = cpu_to_be32(transaction->t_tid); |
616 | 619 | ||
617 | /* Record it so that we can wait for IO completion later */ | 620 | /* Record it so that we can wait for IO completion later */ |
618 | JBUFFER_TRACE(descriptor, "file as BJ_LogCtl"); | 621 | BUFFER_TRACE(descriptor, "file in log_bufs"); |
619 | jbd2_journal_file_buffer(descriptor, transaction, BJ_LogCtl); | 622 | jbd2_file_log_bh(log_bufs, descriptor); |
620 | 623 | ||
621 | offset = sizeof(jbd2_journal_revoke_header_t); | 624 | offset = sizeof(jbd2_journal_revoke_header_t); |
622 | *descriptorp = descriptor; | 625 | *descriptorp = descriptor; |
623 | } | 626 | } |
624 | 627 | ||
625 | if (JBD2_HAS_INCOMPAT_FEATURE(journal, JBD2_FEATURE_INCOMPAT_64BIT)) { | 628 | if (JBD2_HAS_INCOMPAT_FEATURE(journal, JBD2_FEATURE_INCOMPAT_64BIT)) { |
626 | * ((__be64 *)(&jh2bh(descriptor)->b_data[offset])) = | 629 | * ((__be64 *)(&descriptor->b_data[offset])) = |
627 | cpu_to_be64(record->blocknr); | 630 | cpu_to_be64(record->blocknr); |
628 | offset += 8; | 631 | offset += 8; |
629 | 632 | ||
630 | } else { | 633 | } else { |
631 | * ((__be32 *)(&jh2bh(descriptor)->b_data[offset])) = | 634 | * ((__be32 *)(&descriptor->b_data[offset])) = |
632 | cpu_to_be32(record->blocknr); | 635 | cpu_to_be32(record->blocknr); |
633 | offset += 4; | 636 | offset += 4; |
634 | } | 637 | } |
@@ -636,8 +639,7 @@ static void write_one_revoke_record(journal_t *journal, | |||
636 | *offsetp = offset; | 639 | *offsetp = offset; |
637 | } | 640 | } |
638 | 641 | ||
639 | static void jbd2_revoke_csum_set(journal_t *j, | 642 | static void jbd2_revoke_csum_set(journal_t *j, struct buffer_head *bh) |
640 | struct journal_head *descriptor) | ||
641 | { | 643 | { |
642 | struct jbd2_journal_revoke_tail *tail; | 644 | struct jbd2_journal_revoke_tail *tail; |
643 | __u32 csum; | 645 | __u32 csum; |
@@ -645,12 +647,10 @@ static void jbd2_revoke_csum_set(journal_t *j, | |||
645 | if (!JBD2_HAS_INCOMPAT_FEATURE(j, JBD2_FEATURE_INCOMPAT_CSUM_V2)) | 647 | if (!JBD2_HAS_INCOMPAT_FEATURE(j, JBD2_FEATURE_INCOMPAT_CSUM_V2)) |
646 | return; | 648 | return; |
647 | 649 | ||
648 | tail = (struct jbd2_journal_revoke_tail *) | 650 | tail = (struct jbd2_journal_revoke_tail *)(bh->b_data + j->j_blocksize - |
649 | (jh2bh(descriptor)->b_data + j->j_blocksize - | ||
650 | sizeof(struct jbd2_journal_revoke_tail)); | 651 | sizeof(struct jbd2_journal_revoke_tail)); |
651 | tail->r_checksum = 0; | 652 | tail->r_checksum = 0; |
652 | csum = jbd2_chksum(j, j->j_csum_seed, jh2bh(descriptor)->b_data, | 653 | csum = jbd2_chksum(j, j->j_csum_seed, bh->b_data, j->j_blocksize); |
653 | j->j_blocksize); | ||
654 | tail->r_checksum = cpu_to_be32(csum); | 654 | tail->r_checksum = cpu_to_be32(csum); |
655 | } | 655 | } |
656 | 656 | ||
@@ -662,25 +662,24 @@ static void jbd2_revoke_csum_set(journal_t *j, | |||
662 | */ | 662 | */ |
663 | 663 | ||
664 | static void flush_descriptor(journal_t *journal, | 664 | static void flush_descriptor(journal_t *journal, |
665 | struct journal_head *descriptor, | 665 | struct buffer_head *descriptor, |
666 | int offset, int write_op) | 666 | int offset, int write_op) |
667 | { | 667 | { |
668 | jbd2_journal_revoke_header_t *header; | 668 | jbd2_journal_revoke_header_t *header; |
669 | struct buffer_head *bh = jh2bh(descriptor); | ||
670 | 669 | ||
671 | if (is_journal_aborted(journal)) { | 670 | if (is_journal_aborted(journal)) { |
672 | put_bh(bh); | 671 | put_bh(descriptor); |
673 | return; | 672 | return; |
674 | } | 673 | } |
675 | 674 | ||
676 | header = (jbd2_journal_revoke_header_t *) jh2bh(descriptor)->b_data; | 675 | header = (jbd2_journal_revoke_header_t *)descriptor->b_data; |
677 | header->r_count = cpu_to_be32(offset); | 676 | header->r_count = cpu_to_be32(offset); |
678 | jbd2_revoke_csum_set(journal, descriptor); | 677 | jbd2_revoke_csum_set(journal, descriptor); |
679 | 678 | ||
680 | set_buffer_jwrite(bh); | 679 | set_buffer_jwrite(descriptor); |
681 | BUFFER_TRACE(bh, "write"); | 680 | BUFFER_TRACE(descriptor, "write"); |
682 | set_buffer_dirty(bh); | 681 | set_buffer_dirty(descriptor); |
683 | write_dirty_buffer(bh, write_op); | 682 | write_dirty_buffer(descriptor, write_op); |
684 | } | 683 | } |
685 | #endif | 684 | #endif |
686 | 685 | ||
diff --git a/fs/jbd2/transaction.c b/fs/jbd2/transaction.c index 10f524c59ea8..7aa9a32573bb 100644 --- a/fs/jbd2/transaction.c +++ b/fs/jbd2/transaction.c | |||
@@ -89,7 +89,8 @@ jbd2_get_transaction(journal_t *journal, transaction_t *transaction) | |||
89 | transaction->t_expires = jiffies + journal->j_commit_interval; | 89 | transaction->t_expires = jiffies + journal->j_commit_interval; |
90 | spin_lock_init(&transaction->t_handle_lock); | 90 | spin_lock_init(&transaction->t_handle_lock); |
91 | atomic_set(&transaction->t_updates, 0); | 91 | atomic_set(&transaction->t_updates, 0); |
92 | atomic_set(&transaction->t_outstanding_credits, 0); | 92 | atomic_set(&transaction->t_outstanding_credits, |
93 | atomic_read(&journal->j_reserved_credits)); | ||
93 | atomic_set(&transaction->t_handle_count, 0); | 94 | atomic_set(&transaction->t_handle_count, 0); |
94 | INIT_LIST_HEAD(&transaction->t_inode_list); | 95 | INIT_LIST_HEAD(&transaction->t_inode_list); |
95 | INIT_LIST_HEAD(&transaction->t_private_list); | 96 | INIT_LIST_HEAD(&transaction->t_private_list); |
@@ -141,6 +142,112 @@ static inline void update_t_max_wait(transaction_t *transaction, | |||
141 | } | 142 | } |
142 | 143 | ||
143 | /* | 144 | /* |
145 | * Wait until running transaction passes T_LOCKED state. Also starts the commit | ||
146 | * if needed. The function expects running transaction to exist and releases | ||
147 | * j_state_lock. | ||
148 | */ | ||
149 | static void wait_transaction_locked(journal_t *journal) | ||
150 | __releases(journal->j_state_lock) | ||
151 | { | ||
152 | DEFINE_WAIT(wait); | ||
153 | int need_to_start; | ||
154 | tid_t tid = journal->j_running_transaction->t_tid; | ||
155 | |||
156 | prepare_to_wait(&journal->j_wait_transaction_locked, &wait, | ||
157 | TASK_UNINTERRUPTIBLE); | ||
158 | need_to_start = !tid_geq(journal->j_commit_request, tid); | ||
159 | read_unlock(&journal->j_state_lock); | ||
160 | if (need_to_start) | ||
161 | jbd2_log_start_commit(journal, tid); | ||
162 | schedule(); | ||
163 | finish_wait(&journal->j_wait_transaction_locked, &wait); | ||
164 | } | ||
165 | |||
166 | static void sub_reserved_credits(journal_t *journal, int blocks) | ||
167 | { | ||
168 | atomic_sub(blocks, &journal->j_reserved_credits); | ||
169 | wake_up(&journal->j_wait_reserved); | ||
170 | } | ||
171 | |||
172 | /* | ||
173 | * Wait until we can add credits for handle to the running transaction. Called | ||
174 | * with j_state_lock held for reading. Returns 0 if handle joined the running | ||
175 | * transaction. Returns 1 if we had to wait, j_state_lock is dropped, and | ||
176 | * caller must retry. | ||
177 | */ | ||
178 | static int add_transaction_credits(journal_t *journal, int blocks, | ||
179 | int rsv_blocks) | ||
180 | { | ||
181 | transaction_t *t = journal->j_running_transaction; | ||
182 | int needed; | ||
183 | int total = blocks + rsv_blocks; | ||
184 | |||
185 | /* | ||
186 | * If the current transaction is locked down for commit, wait | ||
187 | * for the lock to be released. | ||
188 | */ | ||
189 | if (t->t_state == T_LOCKED) { | ||
190 | wait_transaction_locked(journal); | ||
191 | return 1; | ||
192 | } | ||
193 | |||
194 | /* | ||
195 | * If there is not enough space left in the log to write all | ||
196 | * potential buffers requested by this operation, we need to | ||
197 | * stall pending a log checkpoint to free some more log space. | ||
198 | */ | ||
199 | needed = atomic_add_return(total, &t->t_outstanding_credits); | ||
200 | if (needed > journal->j_max_transaction_buffers) { | ||
201 | /* | ||
202 | * If the current transaction is already too large, | ||
203 | * then start to commit it: we can then go back and | ||
204 | * attach this handle to a new transaction. | ||
205 | */ | ||
206 | atomic_sub(total, &t->t_outstanding_credits); | ||
207 | wait_transaction_locked(journal); | ||
208 | return 1; | ||
209 | } | ||
210 | |||
211 | /* | ||
212 | * The commit code assumes that it can get enough log space | ||
213 | * without forcing a checkpoint. This is *critical* for | ||
214 | * correctness: a checkpoint of a buffer which is also | ||
215 | * associated with a committing transaction creates a deadlock, | ||
216 | * so commit simply cannot force through checkpoints. | ||
217 | * | ||
218 | * We must therefore ensure the necessary space in the journal | ||
219 | * *before* starting to dirty potentially checkpointed buffers | ||
220 | * in the new transaction. | ||
221 | */ | ||
222 | if (jbd2_log_space_left(journal) < jbd2_space_needed(journal)) { | ||
223 | atomic_sub(total, &t->t_outstanding_credits); | ||
224 | read_unlock(&journal->j_state_lock); | ||
225 | write_lock(&journal->j_state_lock); | ||
226 | if (jbd2_log_space_left(journal) < jbd2_space_needed(journal)) | ||
227 | __jbd2_log_wait_for_space(journal); | ||
228 | write_unlock(&journal->j_state_lock); | ||
229 | return 1; | ||
230 | } | ||
231 | |||
232 | /* No reservation? We are done... */ | ||
233 | if (!rsv_blocks) | ||
234 | return 0; | ||
235 | |||
236 | needed = atomic_add_return(rsv_blocks, &journal->j_reserved_credits); | ||
237 | /* We allow at most half of a transaction to be reserved */ | ||
238 | if (needed > journal->j_max_transaction_buffers / 2) { | ||
239 | sub_reserved_credits(journal, rsv_blocks); | ||
240 | atomic_sub(total, &t->t_outstanding_credits); | ||
241 | read_unlock(&journal->j_state_lock); | ||
242 | wait_event(journal->j_wait_reserved, | ||
243 | atomic_read(&journal->j_reserved_credits) + rsv_blocks | ||
244 | <= journal->j_max_transaction_buffers / 2); | ||
245 | return 1; | ||
246 | } | ||
247 | return 0; | ||
248 | } | ||
249 | |||
250 | /* | ||
144 | * start_this_handle: Given a handle, deal with any locking or stalling | 251 | * start_this_handle: Given a handle, deal with any locking or stalling |
145 | * needed to make sure that there is enough journal space for the handle | 252 | * needed to make sure that there is enough journal space for the handle |
146 | * to begin. Attach the handle to a transaction and set up the | 253 | * to begin. Attach the handle to a transaction and set up the |
@@ -151,18 +258,24 @@ static int start_this_handle(journal_t *journal, handle_t *handle, | |||
151 | gfp_t gfp_mask) | 258 | gfp_t gfp_mask) |
152 | { | 259 | { |
153 | transaction_t *transaction, *new_transaction = NULL; | 260 | transaction_t *transaction, *new_transaction = NULL; |
154 | tid_t tid; | 261 | int blocks = handle->h_buffer_credits; |
155 | int needed, need_to_start; | 262 | int rsv_blocks = 0; |
156 | int nblocks = handle->h_buffer_credits; | ||
157 | unsigned long ts = jiffies; | 263 | unsigned long ts = jiffies; |
158 | 264 | ||
159 | if (nblocks > journal->j_max_transaction_buffers) { | 265 | /* |
266 | * 1/2 of transaction can be reserved so we can practically handle | ||
267 | * only 1/2 of maximum transaction size per operation | ||
268 | */ | ||
269 | if (WARN_ON(blocks > journal->j_max_transaction_buffers / 2)) { | ||
160 | printk(KERN_ERR "JBD2: %s wants too many credits (%d > %d)\n", | 270 | printk(KERN_ERR "JBD2: %s wants too many credits (%d > %d)\n", |
161 | current->comm, nblocks, | 271 | current->comm, blocks, |
162 | journal->j_max_transaction_buffers); | 272 | journal->j_max_transaction_buffers / 2); |
163 | return -ENOSPC; | 273 | return -ENOSPC; |
164 | } | 274 | } |
165 | 275 | ||
276 | if (handle->h_rsv_handle) | ||
277 | rsv_blocks = handle->h_rsv_handle->h_buffer_credits; | ||
278 | |||
166 | alloc_transaction: | 279 | alloc_transaction: |
167 | if (!journal->j_running_transaction) { | 280 | if (!journal->j_running_transaction) { |
168 | new_transaction = kmem_cache_zalloc(transaction_cache, | 281 | new_transaction = kmem_cache_zalloc(transaction_cache, |
@@ -199,8 +312,12 @@ repeat: | |||
199 | return -EROFS; | 312 | return -EROFS; |
200 | } | 313 | } |
201 | 314 | ||
202 | /* Wait on the journal's transaction barrier if necessary */ | 315 | /* |
203 | if (journal->j_barrier_count) { | 316 | * Wait on the journal's transaction barrier if necessary. Specifically |
317 | * we allow reserved handles to proceed because otherwise commit could | ||
318 | * deadlock on page writeback not being able to complete. | ||
319 | */ | ||
320 | if (!handle->h_reserved && journal->j_barrier_count) { | ||
204 | read_unlock(&journal->j_state_lock); | 321 | read_unlock(&journal->j_state_lock); |
205 | wait_event(journal->j_wait_transaction_locked, | 322 | wait_event(journal->j_wait_transaction_locked, |
206 | journal->j_barrier_count == 0); | 323 | journal->j_barrier_count == 0); |
@@ -213,7 +330,7 @@ repeat: | |||
213 | goto alloc_transaction; | 330 | goto alloc_transaction; |
214 | write_lock(&journal->j_state_lock); | 331 | write_lock(&journal->j_state_lock); |
215 | if (!journal->j_running_transaction && | 332 | if (!journal->j_running_transaction && |
216 | !journal->j_barrier_count) { | 333 | (handle->h_reserved || !journal->j_barrier_count)) { |
217 | jbd2_get_transaction(journal, new_transaction); | 334 | jbd2_get_transaction(journal, new_transaction); |
218 | new_transaction = NULL; | 335 | new_transaction = NULL; |
219 | } | 336 | } |
@@ -223,85 +340,18 @@ repeat: | |||
223 | 340 | ||
224 | transaction = journal->j_running_transaction; | 341 | transaction = journal->j_running_transaction; |
225 | 342 | ||
226 | /* | 343 | if (!handle->h_reserved) { |
227 | * If the current transaction is locked down for commit, wait for the | 344 | /* We may have dropped j_state_lock - restart in that case */ |
228 | * lock to be released. | 345 | if (add_transaction_credits(journal, blocks, rsv_blocks)) |
229 | */ | 346 | goto repeat; |
230 | if (transaction->t_state == T_LOCKED) { | 347 | } else { |
231 | DEFINE_WAIT(wait); | ||
232 | |||
233 | prepare_to_wait(&journal->j_wait_transaction_locked, | ||
234 | &wait, TASK_UNINTERRUPTIBLE); | ||
235 | read_unlock(&journal->j_state_lock); | ||
236 | schedule(); | ||
237 | finish_wait(&journal->j_wait_transaction_locked, &wait); | ||
238 | goto repeat; | ||
239 | } | ||
240 | |||
241 | /* | ||
242 | * If there is not enough space left in the log to write all potential | ||
243 | * buffers requested by this operation, we need to stall pending a log | ||
244 | * checkpoint to free some more log space. | ||
245 | */ | ||
246 | needed = atomic_add_return(nblocks, | ||
247 | &transaction->t_outstanding_credits); | ||
248 | |||
249 | if (needed > journal->j_max_transaction_buffers) { | ||
250 | /* | 348 | /* |
251 | * If the current transaction is already too large, then start | 349 | * We have handle reserved so we are allowed to join T_LOCKED |
252 | * to commit it: we can then go back and attach this handle to | 350 | * transaction and we don't have to check for transaction size |
253 | * a new transaction. | 351 | * and journal space. |
254 | */ | 352 | */ |
255 | DEFINE_WAIT(wait); | 353 | sub_reserved_credits(journal, blocks); |
256 | 354 | handle->h_reserved = 0; | |
257 | jbd_debug(2, "Handle %p starting new commit...\n", handle); | ||
258 | atomic_sub(nblocks, &transaction->t_outstanding_credits); | ||
259 | prepare_to_wait(&journal->j_wait_transaction_locked, &wait, | ||
260 | TASK_UNINTERRUPTIBLE); | ||
261 | tid = transaction->t_tid; | ||
262 | need_to_start = !tid_geq(journal->j_commit_request, tid); | ||
263 | read_unlock(&journal->j_state_lock); | ||
264 | if (need_to_start) | ||
265 | jbd2_log_start_commit(journal, tid); | ||
266 | schedule(); | ||
267 | finish_wait(&journal->j_wait_transaction_locked, &wait); | ||
268 | goto repeat; | ||
269 | } | ||
270 | |||
271 | /* | ||
272 | * The commit code assumes that it can get enough log space | ||
273 | * without forcing a checkpoint. This is *critical* for | ||
274 | * correctness: a checkpoint of a buffer which is also | ||
275 | * associated with a committing transaction creates a deadlock, | ||
276 | * so commit simply cannot force through checkpoints. | ||
277 | * | ||
278 | * We must therefore ensure the necessary space in the journal | ||
279 | * *before* starting to dirty potentially checkpointed buffers | ||
280 | * in the new transaction. | ||
281 | * | ||
282 | * The worst part is, any transaction currently committing can | ||
283 | * reduce the free space arbitrarily. Be careful to account for | ||
284 | * those buffers when checkpointing. | ||
285 | */ | ||
286 | |||
287 | /* | ||
288 | * @@@ AKPM: This seems rather over-defensive. We're giving commit | ||
289 | * a _lot_ of headroom: 1/4 of the journal plus the size of | ||
290 | * the committing transaction. Really, we only need to give it | ||
291 | * committing_transaction->t_outstanding_credits plus "enough" for | ||
292 | * the log control blocks. | ||
293 | * Also, this test is inconsistent with the matching one in | ||
294 | * jbd2_journal_extend(). | ||
295 | */ | ||
296 | if (__jbd2_log_space_left(journal) < jbd_space_needed(journal)) { | ||
297 | jbd_debug(2, "Handle %p waiting for checkpoint...\n", handle); | ||
298 | atomic_sub(nblocks, &transaction->t_outstanding_credits); | ||
299 | read_unlock(&journal->j_state_lock); | ||
300 | write_lock(&journal->j_state_lock); | ||
301 | if (__jbd2_log_space_left(journal) < jbd_space_needed(journal)) | ||
302 | __jbd2_log_wait_for_space(journal); | ||
303 | write_unlock(&journal->j_state_lock); | ||
304 | goto repeat; | ||
305 | } | 355 | } |
306 | 356 | ||
307 | /* OK, account for the buffers that this operation expects to | 357 | /* OK, account for the buffers that this operation expects to |
@@ -309,15 +359,16 @@ repeat: | |||
309 | */ | 359 | */ |
310 | update_t_max_wait(transaction, ts); | 360 | update_t_max_wait(transaction, ts); |
311 | handle->h_transaction = transaction; | 361 | handle->h_transaction = transaction; |
312 | handle->h_requested_credits = nblocks; | 362 | handle->h_requested_credits = blocks; |
313 | handle->h_start_jiffies = jiffies; | 363 | handle->h_start_jiffies = jiffies; |
314 | atomic_inc(&transaction->t_updates); | 364 | atomic_inc(&transaction->t_updates); |
315 | atomic_inc(&transaction->t_handle_count); | 365 | atomic_inc(&transaction->t_handle_count); |
316 | jbd_debug(4, "Handle %p given %d credits (total %d, free %d)\n", | 366 | jbd_debug(4, "Handle %p given %d credits (total %d, free %lu)\n", |
317 | handle, nblocks, | 367 | handle, blocks, |
318 | atomic_read(&transaction->t_outstanding_credits), | 368 | atomic_read(&transaction->t_outstanding_credits), |
319 | __jbd2_log_space_left(journal)); | 369 | jbd2_log_space_left(journal)); |
320 | read_unlock(&journal->j_state_lock); | 370 | read_unlock(&journal->j_state_lock); |
371 | current->journal_info = handle; | ||
321 | 372 | ||
322 | lock_map_acquire(&handle->h_lockdep_map); | 373 | lock_map_acquire(&handle->h_lockdep_map); |
323 | jbd2_journal_free_transaction(new_transaction); | 374 | jbd2_journal_free_transaction(new_transaction); |
@@ -348,16 +399,21 @@ static handle_t *new_handle(int nblocks) | |||
348 | * | 399 | * |
349 | * We make sure that the transaction can guarantee at least nblocks of | 400 | * We make sure that the transaction can guarantee at least nblocks of |
350 | * modified buffers in the log. We block until the log can guarantee | 401 | * modified buffers in the log. We block until the log can guarantee |
351 | * that much space. | 402 | * that much space. Additionally, if rsv_blocks > 0, we also create another |
352 | * | 403 | * handle with rsv_blocks reserved blocks in the journal. This handle is |
353 | * This function is visible to journal users (like ext3fs), so is not | 404 | * is stored in h_rsv_handle. It is not attached to any particular transaction |
354 | * called with the journal already locked. | 405 | * and thus doesn't block transaction commit. If the caller uses this reserved |
406 | * handle, it has to set h_rsv_handle to NULL as otherwise jbd2_journal_stop() | ||
407 | * on the parent handle will dispose the reserved one. Reserved handle has to | ||
408 | * be converted to a normal handle using jbd2_journal_start_reserved() before | ||
409 | * it can be used. | ||
355 | * | 410 | * |
356 | * Return a pointer to a newly allocated handle, or an ERR_PTR() value | 411 | * Return a pointer to a newly allocated handle, or an ERR_PTR() value |
357 | * on failure. | 412 | * on failure. |
358 | */ | 413 | */ |
359 | handle_t *jbd2__journal_start(journal_t *journal, int nblocks, gfp_t gfp_mask, | 414 | handle_t *jbd2__journal_start(journal_t *journal, int nblocks, int rsv_blocks, |
360 | unsigned int type, unsigned int line_no) | 415 | gfp_t gfp_mask, unsigned int type, |
416 | unsigned int line_no) | ||
361 | { | 417 | { |
362 | handle_t *handle = journal_current_handle(); | 418 | handle_t *handle = journal_current_handle(); |
363 | int err; | 419 | int err; |
@@ -374,13 +430,24 @@ handle_t *jbd2__journal_start(journal_t *journal, int nblocks, gfp_t gfp_mask, | |||
374 | handle = new_handle(nblocks); | 430 | handle = new_handle(nblocks); |
375 | if (!handle) | 431 | if (!handle) |
376 | return ERR_PTR(-ENOMEM); | 432 | return ERR_PTR(-ENOMEM); |
433 | if (rsv_blocks) { | ||
434 | handle_t *rsv_handle; | ||
377 | 435 | ||
378 | current->journal_info = handle; | 436 | rsv_handle = new_handle(rsv_blocks); |
437 | if (!rsv_handle) { | ||
438 | jbd2_free_handle(handle); | ||
439 | return ERR_PTR(-ENOMEM); | ||
440 | } | ||
441 | rsv_handle->h_reserved = 1; | ||
442 | rsv_handle->h_journal = journal; | ||
443 | handle->h_rsv_handle = rsv_handle; | ||
444 | } | ||
379 | 445 | ||
380 | err = start_this_handle(journal, handle, gfp_mask); | 446 | err = start_this_handle(journal, handle, gfp_mask); |
381 | if (err < 0) { | 447 | if (err < 0) { |
448 | if (handle->h_rsv_handle) | ||
449 | jbd2_free_handle(handle->h_rsv_handle); | ||
382 | jbd2_free_handle(handle); | 450 | jbd2_free_handle(handle); |
383 | current->journal_info = NULL; | ||
384 | return ERR_PTR(err); | 451 | return ERR_PTR(err); |
385 | } | 452 | } |
386 | handle->h_type = type; | 453 | handle->h_type = type; |
@@ -395,10 +462,65 @@ EXPORT_SYMBOL(jbd2__journal_start); | |||
395 | 462 | ||
396 | handle_t *jbd2_journal_start(journal_t *journal, int nblocks) | 463 | handle_t *jbd2_journal_start(journal_t *journal, int nblocks) |
397 | { | 464 | { |
398 | return jbd2__journal_start(journal, nblocks, GFP_NOFS, 0, 0); | 465 | return jbd2__journal_start(journal, nblocks, 0, GFP_NOFS, 0, 0); |
399 | } | 466 | } |
400 | EXPORT_SYMBOL(jbd2_journal_start); | 467 | EXPORT_SYMBOL(jbd2_journal_start); |
401 | 468 | ||
469 | void jbd2_journal_free_reserved(handle_t *handle) | ||
470 | { | ||
471 | journal_t *journal = handle->h_journal; | ||
472 | |||
473 | WARN_ON(!handle->h_reserved); | ||
474 | sub_reserved_credits(journal, handle->h_buffer_credits); | ||
475 | jbd2_free_handle(handle); | ||
476 | } | ||
477 | EXPORT_SYMBOL(jbd2_journal_free_reserved); | ||
478 | |||
479 | /** | ||
480 | * int jbd2_journal_start_reserved(handle_t *handle) - start reserved handle | ||
481 | * @handle: handle to start | ||
482 | * | ||
483 | * Start handle that has been previously reserved with jbd2_journal_reserve(). | ||
484 | * This attaches @handle to the running transaction (or creates one if there's | ||
485 | * not transaction running). Unlike jbd2_journal_start() this function cannot | ||
486 | * block on journal commit, checkpointing, or similar stuff. It can block on | ||
487 | * memory allocation or frozen journal though. | ||
488 | * | ||
489 | * Return 0 on success, non-zero on error - handle is freed in that case. | ||
490 | */ | ||
491 | int jbd2_journal_start_reserved(handle_t *handle, unsigned int type, | ||
492 | unsigned int line_no) | ||
493 | { | ||
494 | journal_t *journal = handle->h_journal; | ||
495 | int ret = -EIO; | ||
496 | |||
497 | if (WARN_ON(!handle->h_reserved)) { | ||
498 | /* Someone passed in normal handle? Just stop it. */ | ||
499 | jbd2_journal_stop(handle); | ||
500 | return ret; | ||
501 | } | ||
502 | /* | ||
503 | * Usefulness of mixing of reserved and unreserved handles is | ||
504 | * questionable. So far nobody seems to need it so just error out. | ||
505 | */ | ||
506 | if (WARN_ON(current->journal_info)) { | ||
507 | jbd2_journal_free_reserved(handle); | ||
508 | return ret; | ||
509 | } | ||
510 | |||
511 | handle->h_journal = NULL; | ||
512 | /* | ||
513 | * GFP_NOFS is here because callers are likely from writeback or | ||
514 | * similarly constrained call sites | ||
515 | */ | ||
516 | ret = start_this_handle(journal, handle, GFP_NOFS); | ||
517 | if (ret < 0) | ||
518 | jbd2_journal_free_reserved(handle); | ||
519 | handle->h_type = type; | ||
520 | handle->h_line_no = line_no; | ||
521 | return ret; | ||
522 | } | ||
523 | EXPORT_SYMBOL(jbd2_journal_start_reserved); | ||
402 | 524 | ||
403 | /** | 525 | /** |
404 | * int jbd2_journal_extend() - extend buffer credits. | 526 | * int jbd2_journal_extend() - extend buffer credits. |
@@ -423,49 +545,53 @@ EXPORT_SYMBOL(jbd2_journal_start); | |||
423 | int jbd2_journal_extend(handle_t *handle, int nblocks) | 545 | int jbd2_journal_extend(handle_t *handle, int nblocks) |
424 | { | 546 | { |
425 | transaction_t *transaction = handle->h_transaction; | 547 | transaction_t *transaction = handle->h_transaction; |
426 | journal_t *journal = transaction->t_journal; | 548 | journal_t *journal; |
427 | int result; | 549 | int result; |
428 | int wanted; | 550 | int wanted; |
429 | 551 | ||
430 | result = -EIO; | 552 | WARN_ON(!transaction); |
431 | if (is_handle_aborted(handle)) | 553 | if (is_handle_aborted(handle)) |
432 | goto out; | 554 | return -EROFS; |
555 | journal = transaction->t_journal; | ||
433 | 556 | ||
434 | result = 1; | 557 | result = 1; |
435 | 558 | ||
436 | read_lock(&journal->j_state_lock); | 559 | read_lock(&journal->j_state_lock); |
437 | 560 | ||
438 | /* Don't extend a locked-down transaction! */ | 561 | /* Don't extend a locked-down transaction! */ |
439 | if (handle->h_transaction->t_state != T_RUNNING) { | 562 | if (transaction->t_state != T_RUNNING) { |
440 | jbd_debug(3, "denied handle %p %d blocks: " | 563 | jbd_debug(3, "denied handle %p %d blocks: " |
441 | "transaction not running\n", handle, nblocks); | 564 | "transaction not running\n", handle, nblocks); |
442 | goto error_out; | 565 | goto error_out; |
443 | } | 566 | } |
444 | 567 | ||
445 | spin_lock(&transaction->t_handle_lock); | 568 | spin_lock(&transaction->t_handle_lock); |
446 | wanted = atomic_read(&transaction->t_outstanding_credits) + nblocks; | 569 | wanted = atomic_add_return(nblocks, |
570 | &transaction->t_outstanding_credits); | ||
447 | 571 | ||
448 | if (wanted > journal->j_max_transaction_buffers) { | 572 | if (wanted > journal->j_max_transaction_buffers) { |
449 | jbd_debug(3, "denied handle %p %d blocks: " | 573 | jbd_debug(3, "denied handle %p %d blocks: " |
450 | "transaction too large\n", handle, nblocks); | 574 | "transaction too large\n", handle, nblocks); |
575 | atomic_sub(nblocks, &transaction->t_outstanding_credits); | ||
451 | goto unlock; | 576 | goto unlock; |
452 | } | 577 | } |
453 | 578 | ||
454 | if (wanted > __jbd2_log_space_left(journal)) { | 579 | if (wanted + (wanted >> JBD2_CONTROL_BLOCKS_SHIFT) > |
580 | jbd2_log_space_left(journal)) { | ||
455 | jbd_debug(3, "denied handle %p %d blocks: " | 581 | jbd_debug(3, "denied handle %p %d blocks: " |
456 | "insufficient log space\n", handle, nblocks); | 582 | "insufficient log space\n", handle, nblocks); |
583 | atomic_sub(nblocks, &transaction->t_outstanding_credits); | ||
457 | goto unlock; | 584 | goto unlock; |
458 | } | 585 | } |
459 | 586 | ||
460 | trace_jbd2_handle_extend(journal->j_fs_dev->bd_dev, | 587 | trace_jbd2_handle_extend(journal->j_fs_dev->bd_dev, |
461 | handle->h_transaction->t_tid, | 588 | transaction->t_tid, |
462 | handle->h_type, handle->h_line_no, | 589 | handle->h_type, handle->h_line_no, |
463 | handle->h_buffer_credits, | 590 | handle->h_buffer_credits, |
464 | nblocks); | 591 | nblocks); |
465 | 592 | ||
466 | handle->h_buffer_credits += nblocks; | 593 | handle->h_buffer_credits += nblocks; |
467 | handle->h_requested_credits += nblocks; | 594 | handle->h_requested_credits += nblocks; |
468 | atomic_add(nblocks, &transaction->t_outstanding_credits); | ||
469 | result = 0; | 595 | result = 0; |
470 | 596 | ||
471 | jbd_debug(3, "extended handle %p by %d\n", handle, nblocks); | 597 | jbd_debug(3, "extended handle %p by %d\n", handle, nblocks); |
@@ -473,7 +599,6 @@ unlock: | |||
473 | spin_unlock(&transaction->t_handle_lock); | 599 | spin_unlock(&transaction->t_handle_lock); |
474 | error_out: | 600 | error_out: |
475 | read_unlock(&journal->j_state_lock); | 601 | read_unlock(&journal->j_state_lock); |
476 | out: | ||
477 | return result; | 602 | return result; |
478 | } | 603 | } |
479 | 604 | ||
@@ -490,19 +615,22 @@ out: | |||
490 | * to a running handle, a call to jbd2_journal_restart will commit the | 615 | * to a running handle, a call to jbd2_journal_restart will commit the |
491 | * handle's transaction so far and reattach the handle to a new | 616 | * handle's transaction so far and reattach the handle to a new |
492 | * transaction capabable of guaranteeing the requested number of | 617 | * transaction capabable of guaranteeing the requested number of |
493 | * credits. | 618 | * credits. We preserve reserved handle if there's any attached to the |
619 | * passed in handle. | ||
494 | */ | 620 | */ |
495 | int jbd2__journal_restart(handle_t *handle, int nblocks, gfp_t gfp_mask) | 621 | int jbd2__journal_restart(handle_t *handle, int nblocks, gfp_t gfp_mask) |
496 | { | 622 | { |
497 | transaction_t *transaction = handle->h_transaction; | 623 | transaction_t *transaction = handle->h_transaction; |
498 | journal_t *journal = transaction->t_journal; | 624 | journal_t *journal; |
499 | tid_t tid; | 625 | tid_t tid; |
500 | int need_to_start, ret; | 626 | int need_to_start, ret; |
501 | 627 | ||
628 | WARN_ON(!transaction); | ||
502 | /* If we've had an abort of any type, don't even think about | 629 | /* If we've had an abort of any type, don't even think about |
503 | * actually doing the restart! */ | 630 | * actually doing the restart! */ |
504 | if (is_handle_aborted(handle)) | 631 | if (is_handle_aborted(handle)) |
505 | return 0; | 632 | return 0; |
633 | journal = transaction->t_journal; | ||
506 | 634 | ||
507 | /* | 635 | /* |
508 | * First unlink the handle from its current transaction, and start the | 636 | * First unlink the handle from its current transaction, and start the |
@@ -515,12 +643,18 @@ int jbd2__journal_restart(handle_t *handle, int nblocks, gfp_t gfp_mask) | |||
515 | spin_lock(&transaction->t_handle_lock); | 643 | spin_lock(&transaction->t_handle_lock); |
516 | atomic_sub(handle->h_buffer_credits, | 644 | atomic_sub(handle->h_buffer_credits, |
517 | &transaction->t_outstanding_credits); | 645 | &transaction->t_outstanding_credits); |
646 | if (handle->h_rsv_handle) { | ||
647 | sub_reserved_credits(journal, | ||
648 | handle->h_rsv_handle->h_buffer_credits); | ||
649 | } | ||
518 | if (atomic_dec_and_test(&transaction->t_updates)) | 650 | if (atomic_dec_and_test(&transaction->t_updates)) |
519 | wake_up(&journal->j_wait_updates); | 651 | wake_up(&journal->j_wait_updates); |
652 | tid = transaction->t_tid; | ||
520 | spin_unlock(&transaction->t_handle_lock); | 653 | spin_unlock(&transaction->t_handle_lock); |
654 | handle->h_transaction = NULL; | ||
655 | current->journal_info = NULL; | ||
521 | 656 | ||
522 | jbd_debug(2, "restarting handle %p\n", handle); | 657 | jbd_debug(2, "restarting handle %p\n", handle); |
523 | tid = transaction->t_tid; | ||
524 | need_to_start = !tid_geq(journal->j_commit_request, tid); | 658 | need_to_start = !tid_geq(journal->j_commit_request, tid); |
525 | read_unlock(&journal->j_state_lock); | 659 | read_unlock(&journal->j_state_lock); |
526 | if (need_to_start) | 660 | if (need_to_start) |
@@ -557,6 +691,14 @@ void jbd2_journal_lock_updates(journal_t *journal) | |||
557 | write_lock(&journal->j_state_lock); | 691 | write_lock(&journal->j_state_lock); |
558 | ++journal->j_barrier_count; | 692 | ++journal->j_barrier_count; |
559 | 693 | ||
694 | /* Wait until there are no reserved handles */ | ||
695 | if (atomic_read(&journal->j_reserved_credits)) { | ||
696 | write_unlock(&journal->j_state_lock); | ||
697 | wait_event(journal->j_wait_reserved, | ||
698 | atomic_read(&journal->j_reserved_credits) == 0); | ||
699 | write_lock(&journal->j_state_lock); | ||
700 | } | ||
701 | |||
560 | /* Wait until there are no running updates */ | 702 | /* Wait until there are no running updates */ |
561 | while (1) { | 703 | while (1) { |
562 | transaction_t *transaction = journal->j_running_transaction; | 704 | transaction_t *transaction = journal->j_running_transaction; |
@@ -619,6 +761,12 @@ static void warn_dirty_buffer(struct buffer_head *bh) | |||
619 | bdevname(bh->b_bdev, b), (unsigned long long)bh->b_blocknr); | 761 | bdevname(bh->b_bdev, b), (unsigned long long)bh->b_blocknr); |
620 | } | 762 | } |
621 | 763 | ||
764 | static int sleep_on_shadow_bh(void *word) | ||
765 | { | ||
766 | io_schedule(); | ||
767 | return 0; | ||
768 | } | ||
769 | |||
622 | /* | 770 | /* |
623 | * If the buffer is already part of the current transaction, then there | 771 | * If the buffer is already part of the current transaction, then there |
624 | * is nothing we need to do. If it is already part of a prior | 772 | * is nothing we need to do. If it is already part of a prior |
@@ -634,17 +782,16 @@ do_get_write_access(handle_t *handle, struct journal_head *jh, | |||
634 | int force_copy) | 782 | int force_copy) |
635 | { | 783 | { |
636 | struct buffer_head *bh; | 784 | struct buffer_head *bh; |
637 | transaction_t *transaction; | 785 | transaction_t *transaction = handle->h_transaction; |
638 | journal_t *journal; | 786 | journal_t *journal; |
639 | int error; | 787 | int error; |
640 | char *frozen_buffer = NULL; | 788 | char *frozen_buffer = NULL; |
641 | int need_copy = 0; | 789 | int need_copy = 0; |
642 | unsigned long start_lock, time_lock; | 790 | unsigned long start_lock, time_lock; |
643 | 791 | ||
792 | WARN_ON(!transaction); | ||
644 | if (is_handle_aborted(handle)) | 793 | if (is_handle_aborted(handle)) |
645 | return -EROFS; | 794 | return -EROFS; |
646 | |||
647 | transaction = handle->h_transaction; | ||
648 | journal = transaction->t_journal; | 795 | journal = transaction->t_journal; |
649 | 796 | ||
650 | jbd_debug(5, "journal_head %p, force_copy %d\n", jh, force_copy); | 797 | jbd_debug(5, "journal_head %p, force_copy %d\n", jh, force_copy); |
@@ -754,41 +901,29 @@ repeat: | |||
754 | * journaled. If the primary copy is already going to | 901 | * journaled. If the primary copy is already going to |
755 | * disk then we cannot do copy-out here. */ | 902 | * disk then we cannot do copy-out here. */ |
756 | 903 | ||
757 | if (jh->b_jlist == BJ_Shadow) { | 904 | if (buffer_shadow(bh)) { |
758 | DEFINE_WAIT_BIT(wait, &bh->b_state, BH_Unshadow); | ||
759 | wait_queue_head_t *wqh; | ||
760 | |||
761 | wqh = bit_waitqueue(&bh->b_state, BH_Unshadow); | ||
762 | |||
763 | JBUFFER_TRACE(jh, "on shadow: sleep"); | 905 | JBUFFER_TRACE(jh, "on shadow: sleep"); |
764 | jbd_unlock_bh_state(bh); | 906 | jbd_unlock_bh_state(bh); |
765 | /* commit wakes up all shadow buffers after IO */ | 907 | wait_on_bit(&bh->b_state, BH_Shadow, |
766 | for ( ; ; ) { | 908 | sleep_on_shadow_bh, TASK_UNINTERRUPTIBLE); |
767 | prepare_to_wait(wqh, &wait.wait, | ||
768 | TASK_UNINTERRUPTIBLE); | ||
769 | if (jh->b_jlist != BJ_Shadow) | ||
770 | break; | ||
771 | schedule(); | ||
772 | } | ||
773 | finish_wait(wqh, &wait.wait); | ||
774 | goto repeat; | 909 | goto repeat; |
775 | } | 910 | } |
776 | 911 | ||
777 | /* Only do the copy if the currently-owning transaction | 912 | /* |
778 | * still needs it. If it is on the Forget list, the | 913 | * Only do the copy if the currently-owning transaction still |
779 | * committing transaction is past that stage. The | 914 | * needs it. If buffer isn't on BJ_Metadata list, the |
780 | * buffer had better remain locked during the kmalloc, | 915 | * committing transaction is past that stage (here we use the |
781 | * but that should be true --- we hold the journal lock | 916 | * fact that BH_Shadow is set under bh_state lock together with |
782 | * still and the buffer is already on the BUF_JOURNAL | 917 | * refiling to BJ_Shadow list and at this point we know the |
783 | * list so won't be flushed. | 918 | * buffer doesn't have BH_Shadow set). |
784 | * | 919 | * |
785 | * Subtle point, though: if this is a get_undo_access, | 920 | * Subtle point, though: if this is a get_undo_access, |
786 | * then we will be relying on the frozen_data to contain | 921 | * then we will be relying on the frozen_data to contain |
787 | * the new value of the committed_data record after the | 922 | * the new value of the committed_data record after the |
788 | * transaction, so we HAVE to force the frozen_data copy | 923 | * transaction, so we HAVE to force the frozen_data copy |
789 | * in that case. */ | 924 | * in that case. |
790 | 925 | */ | |
791 | if (jh->b_jlist != BJ_Forget || force_copy) { | 926 | if (jh->b_jlist == BJ_Metadata || force_copy) { |
792 | JBUFFER_TRACE(jh, "generate frozen data"); | 927 | JBUFFER_TRACE(jh, "generate frozen data"); |
793 | if (!frozen_buffer) { | 928 | if (!frozen_buffer) { |
794 | JBUFFER_TRACE(jh, "allocate memory for buffer"); | 929 | JBUFFER_TRACE(jh, "allocate memory for buffer"); |
@@ -915,14 +1050,16 @@ int jbd2_journal_get_write_access(handle_t *handle, struct buffer_head *bh) | |||
915 | int jbd2_journal_get_create_access(handle_t *handle, struct buffer_head *bh) | 1050 | int jbd2_journal_get_create_access(handle_t *handle, struct buffer_head *bh) |
916 | { | 1051 | { |
917 | transaction_t *transaction = handle->h_transaction; | 1052 | transaction_t *transaction = handle->h_transaction; |
918 | journal_t *journal = transaction->t_journal; | 1053 | journal_t *journal; |
919 | struct journal_head *jh = jbd2_journal_add_journal_head(bh); | 1054 | struct journal_head *jh = jbd2_journal_add_journal_head(bh); |
920 | int err; | 1055 | int err; |
921 | 1056 | ||
922 | jbd_debug(5, "journal_head %p\n", jh); | 1057 | jbd_debug(5, "journal_head %p\n", jh); |
1058 | WARN_ON(!transaction); | ||
923 | err = -EROFS; | 1059 | err = -EROFS; |
924 | if (is_handle_aborted(handle)) | 1060 | if (is_handle_aborted(handle)) |
925 | goto out; | 1061 | goto out; |
1062 | journal = transaction->t_journal; | ||
926 | err = 0; | 1063 | err = 0; |
927 | 1064 | ||
928 | JBUFFER_TRACE(jh, "entry"); | 1065 | JBUFFER_TRACE(jh, "entry"); |
@@ -1128,12 +1265,14 @@ void jbd2_buffer_abort_trigger(struct journal_head *jh, | |||
1128 | int jbd2_journal_dirty_metadata(handle_t *handle, struct buffer_head *bh) | 1265 | int jbd2_journal_dirty_metadata(handle_t *handle, struct buffer_head *bh) |
1129 | { | 1266 | { |
1130 | transaction_t *transaction = handle->h_transaction; | 1267 | transaction_t *transaction = handle->h_transaction; |
1131 | journal_t *journal = transaction->t_journal; | 1268 | journal_t *journal; |
1132 | struct journal_head *jh; | 1269 | struct journal_head *jh; |
1133 | int ret = 0; | 1270 | int ret = 0; |
1134 | 1271 | ||
1272 | WARN_ON(!transaction); | ||
1135 | if (is_handle_aborted(handle)) | 1273 | if (is_handle_aborted(handle)) |
1136 | goto out; | 1274 | return -EROFS; |
1275 | journal = transaction->t_journal; | ||
1137 | jh = jbd2_journal_grab_journal_head(bh); | 1276 | jh = jbd2_journal_grab_journal_head(bh); |
1138 | if (!jh) { | 1277 | if (!jh) { |
1139 | ret = -EUCLEAN; | 1278 | ret = -EUCLEAN; |
@@ -1227,7 +1366,7 @@ int jbd2_journal_dirty_metadata(handle_t *handle, struct buffer_head *bh) | |||
1227 | 1366 | ||
1228 | JBUFFER_TRACE(jh, "file as BJ_Metadata"); | 1367 | JBUFFER_TRACE(jh, "file as BJ_Metadata"); |
1229 | spin_lock(&journal->j_list_lock); | 1368 | spin_lock(&journal->j_list_lock); |
1230 | __jbd2_journal_file_buffer(jh, handle->h_transaction, BJ_Metadata); | 1369 | __jbd2_journal_file_buffer(jh, transaction, BJ_Metadata); |
1231 | spin_unlock(&journal->j_list_lock); | 1370 | spin_unlock(&journal->j_list_lock); |
1232 | out_unlock_bh: | 1371 | out_unlock_bh: |
1233 | jbd_unlock_bh_state(bh); | 1372 | jbd_unlock_bh_state(bh); |
@@ -1258,12 +1397,17 @@ out: | |||
1258 | int jbd2_journal_forget (handle_t *handle, struct buffer_head *bh) | 1397 | int jbd2_journal_forget (handle_t *handle, struct buffer_head *bh) |
1259 | { | 1398 | { |
1260 | transaction_t *transaction = handle->h_transaction; | 1399 | transaction_t *transaction = handle->h_transaction; |
1261 | journal_t *journal = transaction->t_journal; | 1400 | journal_t *journal; |
1262 | struct journal_head *jh; | 1401 | struct journal_head *jh; |
1263 | int drop_reserve = 0; | 1402 | int drop_reserve = 0; |
1264 | int err = 0; | 1403 | int err = 0; |
1265 | int was_modified = 0; | 1404 | int was_modified = 0; |
1266 | 1405 | ||
1406 | WARN_ON(!transaction); | ||
1407 | if (is_handle_aborted(handle)) | ||
1408 | return -EROFS; | ||
1409 | journal = transaction->t_journal; | ||
1410 | |||
1267 | BUFFER_TRACE(bh, "entry"); | 1411 | BUFFER_TRACE(bh, "entry"); |
1268 | 1412 | ||
1269 | jbd_lock_bh_state(bh); | 1413 | jbd_lock_bh_state(bh); |
@@ -1290,7 +1434,7 @@ int jbd2_journal_forget (handle_t *handle, struct buffer_head *bh) | |||
1290 | */ | 1434 | */ |
1291 | jh->b_modified = 0; | 1435 | jh->b_modified = 0; |
1292 | 1436 | ||
1293 | if (jh->b_transaction == handle->h_transaction) { | 1437 | if (jh->b_transaction == transaction) { |
1294 | J_ASSERT_JH(jh, !jh->b_frozen_data); | 1438 | J_ASSERT_JH(jh, !jh->b_frozen_data); |
1295 | 1439 | ||
1296 | /* If we are forgetting a buffer which is already part | 1440 | /* If we are forgetting a buffer which is already part |
@@ -1385,19 +1529,21 @@ drop: | |||
1385 | int jbd2_journal_stop(handle_t *handle) | 1529 | int jbd2_journal_stop(handle_t *handle) |
1386 | { | 1530 | { |
1387 | transaction_t *transaction = handle->h_transaction; | 1531 | transaction_t *transaction = handle->h_transaction; |
1388 | journal_t *journal = transaction->t_journal; | 1532 | journal_t *journal; |
1389 | int err, wait_for_commit = 0; | 1533 | int err = 0, wait_for_commit = 0; |
1390 | tid_t tid; | 1534 | tid_t tid; |
1391 | pid_t pid; | 1535 | pid_t pid; |
1392 | 1536 | ||
1537 | if (!transaction) | ||
1538 | goto free_and_exit; | ||
1539 | journal = transaction->t_journal; | ||
1540 | |||
1393 | J_ASSERT(journal_current_handle() == handle); | 1541 | J_ASSERT(journal_current_handle() == handle); |
1394 | 1542 | ||
1395 | if (is_handle_aborted(handle)) | 1543 | if (is_handle_aborted(handle)) |
1396 | err = -EIO; | 1544 | err = -EIO; |
1397 | else { | 1545 | else |
1398 | J_ASSERT(atomic_read(&transaction->t_updates) > 0); | 1546 | J_ASSERT(atomic_read(&transaction->t_updates) > 0); |
1399 | err = 0; | ||
1400 | } | ||
1401 | 1547 | ||
1402 | if (--handle->h_ref > 0) { | 1548 | if (--handle->h_ref > 0) { |
1403 | jbd_debug(4, "h_ref %d -> %d\n", handle->h_ref + 1, | 1549 | jbd_debug(4, "h_ref %d -> %d\n", handle->h_ref + 1, |
@@ -1407,7 +1553,7 @@ int jbd2_journal_stop(handle_t *handle) | |||
1407 | 1553 | ||
1408 | jbd_debug(4, "Handle %p going down\n", handle); | 1554 | jbd_debug(4, "Handle %p going down\n", handle); |
1409 | trace_jbd2_handle_stats(journal->j_fs_dev->bd_dev, | 1555 | trace_jbd2_handle_stats(journal->j_fs_dev->bd_dev, |
1410 | handle->h_transaction->t_tid, | 1556 | transaction->t_tid, |
1411 | handle->h_type, handle->h_line_no, | 1557 | handle->h_type, handle->h_line_no, |
1412 | jiffies - handle->h_start_jiffies, | 1558 | jiffies - handle->h_start_jiffies, |
1413 | handle->h_sync, handle->h_requested_credits, | 1559 | handle->h_sync, handle->h_requested_credits, |
@@ -1518,33 +1664,13 @@ int jbd2_journal_stop(handle_t *handle) | |||
1518 | 1664 | ||
1519 | lock_map_release(&handle->h_lockdep_map); | 1665 | lock_map_release(&handle->h_lockdep_map); |
1520 | 1666 | ||
1667 | if (handle->h_rsv_handle) | ||
1668 | jbd2_journal_free_reserved(handle->h_rsv_handle); | ||
1669 | free_and_exit: | ||
1521 | jbd2_free_handle(handle); | 1670 | jbd2_free_handle(handle); |
1522 | return err; | 1671 | return err; |
1523 | } | 1672 | } |
1524 | 1673 | ||
1525 | /** | ||
1526 | * int jbd2_journal_force_commit() - force any uncommitted transactions | ||
1527 | * @journal: journal to force | ||
1528 | * | ||
1529 | * For synchronous operations: force any uncommitted transactions | ||
1530 | * to disk. May seem kludgy, but it reuses all the handle batching | ||
1531 | * code in a very simple manner. | ||
1532 | */ | ||
1533 | int jbd2_journal_force_commit(journal_t *journal) | ||
1534 | { | ||
1535 | handle_t *handle; | ||
1536 | int ret; | ||
1537 | |||
1538 | handle = jbd2_journal_start(journal, 1); | ||
1539 | if (IS_ERR(handle)) { | ||
1540 | ret = PTR_ERR(handle); | ||
1541 | } else { | ||
1542 | handle->h_sync = 1; | ||
1543 | ret = jbd2_journal_stop(handle); | ||
1544 | } | ||
1545 | return ret; | ||
1546 | } | ||
1547 | |||
1548 | /* | 1674 | /* |
1549 | * | 1675 | * |
1550 | * List management code snippets: various functions for manipulating the | 1676 | * List management code snippets: various functions for manipulating the |
@@ -1601,10 +1727,10 @@ __blist_del_buffer(struct journal_head **list, struct journal_head *jh) | |||
1601 | * Remove a buffer from the appropriate transaction list. | 1727 | * Remove a buffer from the appropriate transaction list. |
1602 | * | 1728 | * |
1603 | * Note that this function can *change* the value of | 1729 | * Note that this function can *change* the value of |
1604 | * bh->b_transaction->t_buffers, t_forget, t_iobuf_list, t_shadow_list, | 1730 | * bh->b_transaction->t_buffers, t_forget, t_shadow_list, t_log_list or |
1605 | * t_log_list or t_reserved_list. If the caller is holding onto a copy of one | 1731 | * t_reserved_list. If the caller is holding onto a copy of one of these |
1606 | * of these pointers, it could go bad. Generally the caller needs to re-read | 1732 | * pointers, it could go bad. Generally the caller needs to re-read the |
1607 | * the pointer from the transaction_t. | 1733 | * pointer from the transaction_t. |
1608 | * | 1734 | * |
1609 | * Called under j_list_lock. | 1735 | * Called under j_list_lock. |
1610 | */ | 1736 | */ |
@@ -1634,15 +1760,9 @@ static void __jbd2_journal_temp_unlink_buffer(struct journal_head *jh) | |||
1634 | case BJ_Forget: | 1760 | case BJ_Forget: |
1635 | list = &transaction->t_forget; | 1761 | list = &transaction->t_forget; |
1636 | break; | 1762 | break; |
1637 | case BJ_IO: | ||
1638 | list = &transaction->t_iobuf_list; | ||
1639 | break; | ||
1640 | case BJ_Shadow: | 1763 | case BJ_Shadow: |
1641 | list = &transaction->t_shadow_list; | 1764 | list = &transaction->t_shadow_list; |
1642 | break; | 1765 | break; |
1643 | case BJ_LogCtl: | ||
1644 | list = &transaction->t_log_list; | ||
1645 | break; | ||
1646 | case BJ_Reserved: | 1766 | case BJ_Reserved: |
1647 | list = &transaction->t_reserved_list; | 1767 | list = &transaction->t_reserved_list; |
1648 | break; | 1768 | break; |
@@ -2034,18 +2154,23 @@ zap_buffer_unlocked: | |||
2034 | * void jbd2_journal_invalidatepage() | 2154 | * void jbd2_journal_invalidatepage() |
2035 | * @journal: journal to use for flush... | 2155 | * @journal: journal to use for flush... |
2036 | * @page: page to flush | 2156 | * @page: page to flush |
2037 | * @offset: length of page to invalidate. | 2157 | * @offset: start of the range to invalidate |
2158 | * @length: length of the range to invalidate | ||
2038 | * | 2159 | * |
2039 | * Reap page buffers containing data after offset in page. Can return -EBUSY | 2160 | * Reap page buffers containing data after in the specified range in page. |
2040 | * if buffers are part of the committing transaction and the page is straddling | 2161 | * Can return -EBUSY if buffers are part of the committing transaction and |
2041 | * i_size. Caller then has to wait for current commit and try again. | 2162 | * the page is straddling i_size. Caller then has to wait for current commit |
2163 | * and try again. | ||
2042 | */ | 2164 | */ |
2043 | int jbd2_journal_invalidatepage(journal_t *journal, | 2165 | int jbd2_journal_invalidatepage(journal_t *journal, |
2044 | struct page *page, | 2166 | struct page *page, |
2045 | unsigned long offset) | 2167 | unsigned int offset, |
2168 | unsigned int length) | ||
2046 | { | 2169 | { |
2047 | struct buffer_head *head, *bh, *next; | 2170 | struct buffer_head *head, *bh, *next; |
2171 | unsigned int stop = offset + length; | ||
2048 | unsigned int curr_off = 0; | 2172 | unsigned int curr_off = 0; |
2173 | int partial_page = (offset || length < PAGE_CACHE_SIZE); | ||
2049 | int may_free = 1; | 2174 | int may_free = 1; |
2050 | int ret = 0; | 2175 | int ret = 0; |
2051 | 2176 | ||
@@ -2054,6 +2179,8 @@ int jbd2_journal_invalidatepage(journal_t *journal, | |||
2054 | if (!page_has_buffers(page)) | 2179 | if (!page_has_buffers(page)) |
2055 | return 0; | 2180 | return 0; |
2056 | 2181 | ||
2182 | BUG_ON(stop > PAGE_CACHE_SIZE || stop < length); | ||
2183 | |||
2057 | /* We will potentially be playing with lists other than just the | 2184 | /* We will potentially be playing with lists other than just the |
2058 | * data lists (especially for journaled data mode), so be | 2185 | * data lists (especially for journaled data mode), so be |
2059 | * cautious in our locking. */ | 2186 | * cautious in our locking. */ |
@@ -2063,10 +2190,13 @@ int jbd2_journal_invalidatepage(journal_t *journal, | |||
2063 | unsigned int next_off = curr_off + bh->b_size; | 2190 | unsigned int next_off = curr_off + bh->b_size; |
2064 | next = bh->b_this_page; | 2191 | next = bh->b_this_page; |
2065 | 2192 | ||
2193 | if (next_off > stop) | ||
2194 | return 0; | ||
2195 | |||
2066 | if (offset <= curr_off) { | 2196 | if (offset <= curr_off) { |
2067 | /* This block is wholly outside the truncation point */ | 2197 | /* This block is wholly outside the truncation point */ |
2068 | lock_buffer(bh); | 2198 | lock_buffer(bh); |
2069 | ret = journal_unmap_buffer(journal, bh, offset > 0); | 2199 | ret = journal_unmap_buffer(journal, bh, partial_page); |
2070 | unlock_buffer(bh); | 2200 | unlock_buffer(bh); |
2071 | if (ret < 0) | 2201 | if (ret < 0) |
2072 | return ret; | 2202 | return ret; |
@@ -2077,7 +2207,7 @@ int jbd2_journal_invalidatepage(journal_t *journal, | |||
2077 | 2207 | ||
2078 | } while (bh != head); | 2208 | } while (bh != head); |
2079 | 2209 | ||
2080 | if (!offset) { | 2210 | if (!partial_page) { |
2081 | if (may_free && try_to_free_buffers(page)) | 2211 | if (may_free && try_to_free_buffers(page)) |
2082 | J_ASSERT(!page_has_buffers(page)); | 2212 | J_ASSERT(!page_has_buffers(page)); |
2083 | } | 2213 | } |
@@ -2138,15 +2268,9 @@ void __jbd2_journal_file_buffer(struct journal_head *jh, | |||
2138 | case BJ_Forget: | 2268 | case BJ_Forget: |
2139 | list = &transaction->t_forget; | 2269 | list = &transaction->t_forget; |
2140 | break; | 2270 | break; |
2141 | case BJ_IO: | ||
2142 | list = &transaction->t_iobuf_list; | ||
2143 | break; | ||
2144 | case BJ_Shadow: | 2271 | case BJ_Shadow: |
2145 | list = &transaction->t_shadow_list; | 2272 | list = &transaction->t_shadow_list; |
2146 | break; | 2273 | break; |
2147 | case BJ_LogCtl: | ||
2148 | list = &transaction->t_log_list; | ||
2149 | break; | ||
2150 | case BJ_Reserved: | 2274 | case BJ_Reserved: |
2151 | list = &transaction->t_reserved_list; | 2275 | list = &transaction->t_reserved_list; |
2152 | break; | 2276 | break; |
@@ -2248,10 +2372,12 @@ void jbd2_journal_refile_buffer(journal_t *journal, struct journal_head *jh) | |||
2248 | int jbd2_journal_file_inode(handle_t *handle, struct jbd2_inode *jinode) | 2372 | int jbd2_journal_file_inode(handle_t *handle, struct jbd2_inode *jinode) |
2249 | { | 2373 | { |
2250 | transaction_t *transaction = handle->h_transaction; | 2374 | transaction_t *transaction = handle->h_transaction; |
2251 | journal_t *journal = transaction->t_journal; | 2375 | journal_t *journal; |
2252 | 2376 | ||
2377 | WARN_ON(!transaction); | ||
2253 | if (is_handle_aborted(handle)) | 2378 | if (is_handle_aborted(handle)) |
2254 | return -EIO; | 2379 | return -EROFS; |
2380 | journal = transaction->t_journal; | ||
2255 | 2381 | ||
2256 | jbd_debug(4, "Adding inode %lu, tid:%d\n", jinode->i_vfs_inode->i_ino, | 2382 | jbd_debug(4, "Adding inode %lu, tid:%d\n", jinode->i_vfs_inode->i_ino, |
2257 | transaction->t_tid); | 2383 | transaction->t_tid); |
diff --git a/fs/jffs2/dir.c b/fs/jffs2/dir.c index acd46a4160cb..e3aac222472e 100644 --- a/fs/jffs2/dir.c +++ b/fs/jffs2/dir.c | |||
@@ -22,7 +22,7 @@ | |||
22 | #include <linux/time.h> | 22 | #include <linux/time.h> |
23 | #include "nodelist.h" | 23 | #include "nodelist.h" |
24 | 24 | ||
25 | static int jffs2_readdir (struct file *, void *, filldir_t); | 25 | static int jffs2_readdir (struct file *, struct dir_context *); |
26 | 26 | ||
27 | static int jffs2_create (struct inode *,struct dentry *,umode_t, | 27 | static int jffs2_create (struct inode *,struct dentry *,umode_t, |
28 | bool); | 28 | bool); |
@@ -40,7 +40,7 @@ static int jffs2_rename (struct inode *, struct dentry *, | |||
40 | const struct file_operations jffs2_dir_operations = | 40 | const struct file_operations jffs2_dir_operations = |
41 | { | 41 | { |
42 | .read = generic_read_dir, | 42 | .read = generic_read_dir, |
43 | .readdir = jffs2_readdir, | 43 | .iterate = jffs2_readdir, |
44 | .unlocked_ioctl=jffs2_ioctl, | 44 | .unlocked_ioctl=jffs2_ioctl, |
45 | .fsync = jffs2_fsync, | 45 | .fsync = jffs2_fsync, |
46 | .llseek = generic_file_llseek, | 46 | .llseek = generic_file_llseek, |
@@ -114,60 +114,40 @@ static struct dentry *jffs2_lookup(struct inode *dir_i, struct dentry *target, | |||
114 | /***********************************************************************/ | 114 | /***********************************************************************/ |
115 | 115 | ||
116 | 116 | ||
117 | static int jffs2_readdir(struct file *filp, void *dirent, filldir_t filldir) | 117 | static int jffs2_readdir(struct file *file, struct dir_context *ctx) |
118 | { | 118 | { |
119 | struct jffs2_inode_info *f; | 119 | struct inode *inode = file_inode(file); |
120 | struct inode *inode = file_inode(filp); | 120 | struct jffs2_inode_info *f = JFFS2_INODE_INFO(inode); |
121 | struct jffs2_full_dirent *fd; | 121 | struct jffs2_full_dirent *fd; |
122 | unsigned long offset, curofs; | 122 | unsigned long curofs = 1; |
123 | 123 | ||
124 | jffs2_dbg(1, "jffs2_readdir() for dir_i #%lu\n", | 124 | jffs2_dbg(1, "jffs2_readdir() for dir_i #%lu\n", inode->i_ino); |
125 | file_inode(filp)->i_ino); | ||
126 | 125 | ||
127 | f = JFFS2_INODE_INFO(inode); | 126 | if (!dir_emit_dots(file, ctx)) |
128 | 127 | return 0; | |
129 | offset = filp->f_pos; | ||
130 | |||
131 | if (offset == 0) { | ||
132 | jffs2_dbg(1, "Dirent 0: \".\", ino #%lu\n", inode->i_ino); | ||
133 | if (filldir(dirent, ".", 1, 0, inode->i_ino, DT_DIR) < 0) | ||
134 | goto out; | ||
135 | offset++; | ||
136 | } | ||
137 | if (offset == 1) { | ||
138 | unsigned long pino = parent_ino(filp->f_path.dentry); | ||
139 | jffs2_dbg(1, "Dirent 1: \"..\", ino #%lu\n", pino); | ||
140 | if (filldir(dirent, "..", 2, 1, pino, DT_DIR) < 0) | ||
141 | goto out; | ||
142 | offset++; | ||
143 | } | ||
144 | 128 | ||
145 | curofs=1; | ||
146 | mutex_lock(&f->sem); | 129 | mutex_lock(&f->sem); |
147 | for (fd = f->dents; fd; fd = fd->next) { | 130 | for (fd = f->dents; fd; fd = fd->next) { |
148 | |||
149 | curofs++; | 131 | curofs++; |
150 | /* First loop: curofs = 2; offset = 2 */ | 132 | /* First loop: curofs = 2; pos = 2 */ |
151 | if (curofs < offset) { | 133 | if (curofs < ctx->pos) { |
152 | jffs2_dbg(2, "Skipping dirent: \"%s\", ino #%u, type %d, because curofs %ld < offset %ld\n", | 134 | jffs2_dbg(2, "Skipping dirent: \"%s\", ino #%u, type %d, because curofs %ld < offset %ld\n", |
153 | fd->name, fd->ino, fd->type, curofs, offset); | 135 | fd->name, fd->ino, fd->type, curofs, (unsigned long)ctx->pos); |
154 | continue; | 136 | continue; |
155 | } | 137 | } |
156 | if (!fd->ino) { | 138 | if (!fd->ino) { |
157 | jffs2_dbg(2, "Skipping deletion dirent \"%s\"\n", | 139 | jffs2_dbg(2, "Skipping deletion dirent \"%s\"\n", |
158 | fd->name); | 140 | fd->name); |
159 | offset++; | 141 | ctx->pos++; |
160 | continue; | 142 | continue; |
161 | } | 143 | } |
162 | jffs2_dbg(2, "Dirent %ld: \"%s\", ino #%u, type %d\n", | 144 | jffs2_dbg(2, "Dirent %ld: \"%s\", ino #%u, type %d\n", |
163 | offset, fd->name, fd->ino, fd->type); | 145 | (unsigned long)ctx->pos, fd->name, fd->ino, fd->type); |
164 | if (filldir(dirent, fd->name, strlen(fd->name), offset, fd->ino, fd->type) < 0) | 146 | if (!dir_emit(ctx, fd->name, strlen(fd->name), fd->ino, fd->type)) |
165 | break; | 147 | break; |
166 | offset++; | 148 | ctx->pos++; |
167 | } | 149 | } |
168 | mutex_unlock(&f->sem); | 150 | mutex_unlock(&f->sem); |
169 | out: | ||
170 | filp->f_pos = offset; | ||
171 | return 0; | 151 | return 0; |
172 | } | 152 | } |
173 | 153 | ||
diff --git a/fs/jfs/jfs_dmap.c b/fs/jfs/jfs_dmap.c index 9a55f53be5ff..370d7b6c5942 100644 --- a/fs/jfs/jfs_dmap.c +++ b/fs/jfs/jfs_dmap.c | |||
@@ -346,8 +346,7 @@ int dbFree(struct inode *ip, s64 blkno, s64 nblocks) | |||
346 | printk(KERN_ERR "blkno = %Lx, nblocks = %Lx\n", | 346 | printk(KERN_ERR "blkno = %Lx, nblocks = %Lx\n", |
347 | (unsigned long long) blkno, | 347 | (unsigned long long) blkno, |
348 | (unsigned long long) nblocks); | 348 | (unsigned long long) nblocks); |
349 | jfs_error(ip->i_sb, | 349 | jfs_error(ip->i_sb, "block to be freed is outside the map\n"); |
350 | "dbFree: block to be freed is outside the map"); | ||
351 | return -EIO; | 350 | return -EIO; |
352 | } | 351 | } |
353 | 352 | ||
@@ -384,7 +383,7 @@ int dbFree(struct inode *ip, s64 blkno, s64 nblocks) | |||
384 | 383 | ||
385 | /* free the blocks. */ | 384 | /* free the blocks. */ |
386 | if ((rc = dbFreeDmap(bmp, dp, blkno, nb))) { | 385 | if ((rc = dbFreeDmap(bmp, dp, blkno, nb))) { |
387 | jfs_error(ip->i_sb, "dbFree: error in block map\n"); | 386 | jfs_error(ip->i_sb, "error in block map\n"); |
388 | release_metapage(mp); | 387 | release_metapage(mp); |
389 | IREAD_UNLOCK(ipbmap); | 388 | IREAD_UNLOCK(ipbmap); |
390 | return (rc); | 389 | return (rc); |
@@ -441,8 +440,7 @@ dbUpdatePMap(struct inode *ipbmap, | |||
441 | printk(KERN_ERR "blkno = %Lx, nblocks = %Lx\n", | 440 | printk(KERN_ERR "blkno = %Lx, nblocks = %Lx\n", |
442 | (unsigned long long) blkno, | 441 | (unsigned long long) blkno, |
443 | (unsigned long long) nblocks); | 442 | (unsigned long long) nblocks); |
444 | jfs_error(ipbmap->i_sb, | 443 | jfs_error(ipbmap->i_sb, "blocks are outside the map\n"); |
445 | "dbUpdatePMap: blocks are outside the map"); | ||
446 | return -EIO; | 444 | return -EIO; |
447 | } | 445 | } |
448 | 446 | ||
@@ -726,7 +724,7 @@ int dbAlloc(struct inode *ip, s64 hint, s64 nblocks, s64 * results) | |||
726 | 724 | ||
727 | /* the hint should be within the map */ | 725 | /* the hint should be within the map */ |
728 | if (hint >= mapSize) { | 726 | if (hint >= mapSize) { |
729 | jfs_error(ip->i_sb, "dbAlloc: the hint is outside the map"); | 727 | jfs_error(ip->i_sb, "the hint is outside the map\n"); |
730 | return -EIO; | 728 | return -EIO; |
731 | } | 729 | } |
732 | 730 | ||
@@ -1057,8 +1055,7 @@ static int dbExtend(struct inode *ip, s64 blkno, s64 nblocks, s64 addnblocks) | |||
1057 | bmp = sbi->bmap; | 1055 | bmp = sbi->bmap; |
1058 | if (lastblkno < 0 || lastblkno >= bmp->db_mapsize) { | 1056 | if (lastblkno < 0 || lastblkno >= bmp->db_mapsize) { |
1059 | IREAD_UNLOCK(ipbmap); | 1057 | IREAD_UNLOCK(ipbmap); |
1060 | jfs_error(ip->i_sb, | 1058 | jfs_error(ip->i_sb, "the block is outside the filesystem\n"); |
1061 | "dbExtend: the block is outside the filesystem"); | ||
1062 | return -EIO; | 1059 | return -EIO; |
1063 | } | 1060 | } |
1064 | 1061 | ||
@@ -1134,8 +1131,7 @@ static int dbAllocNext(struct bmap * bmp, struct dmap * dp, s64 blkno, | |||
1134 | u32 mask; | 1131 | u32 mask; |
1135 | 1132 | ||
1136 | if (dp->tree.leafidx != cpu_to_le32(LEAFIND)) { | 1133 | if (dp->tree.leafidx != cpu_to_le32(LEAFIND)) { |
1137 | jfs_error(bmp->db_ipbmap->i_sb, | 1134 | jfs_error(bmp->db_ipbmap->i_sb, "Corrupt dmap page\n"); |
1138 | "dbAllocNext: Corrupt dmap page"); | ||
1139 | return -EIO; | 1135 | return -EIO; |
1140 | } | 1136 | } |
1141 | 1137 | ||
@@ -1265,8 +1261,7 @@ dbAllocNear(struct bmap * bmp, | |||
1265 | s8 *leaf; | 1261 | s8 *leaf; |
1266 | 1262 | ||
1267 | if (dp->tree.leafidx != cpu_to_le32(LEAFIND)) { | 1263 | if (dp->tree.leafidx != cpu_to_le32(LEAFIND)) { |
1268 | jfs_error(bmp->db_ipbmap->i_sb, | 1264 | jfs_error(bmp->db_ipbmap->i_sb, "Corrupt dmap page\n"); |
1269 | "dbAllocNear: Corrupt dmap page"); | ||
1270 | return -EIO; | 1265 | return -EIO; |
1271 | } | 1266 | } |
1272 | 1267 | ||
@@ -1381,8 +1376,7 @@ dbAllocAG(struct bmap * bmp, int agno, s64 nblocks, int l2nb, s64 * results) | |||
1381 | */ | 1376 | */ |
1382 | if (l2nb > bmp->db_agl2size) { | 1377 | if (l2nb > bmp->db_agl2size) { |
1383 | jfs_error(bmp->db_ipbmap->i_sb, | 1378 | jfs_error(bmp->db_ipbmap->i_sb, |
1384 | "dbAllocAG: allocation request is larger than the " | 1379 | "allocation request is larger than the allocation group size\n"); |
1385 | "allocation group size"); | ||
1386 | return -EIO; | 1380 | return -EIO; |
1387 | } | 1381 | } |
1388 | 1382 | ||
@@ -1417,7 +1411,7 @@ dbAllocAG(struct bmap * bmp, int agno, s64 nblocks, int l2nb, s64 * results) | |||
1417 | (unsigned long long) blkno, | 1411 | (unsigned long long) blkno, |
1418 | (unsigned long long) nblocks); | 1412 | (unsigned long long) nblocks); |
1419 | jfs_error(bmp->db_ipbmap->i_sb, | 1413 | jfs_error(bmp->db_ipbmap->i_sb, |
1420 | "dbAllocAG: dbAllocCtl failed in free AG"); | 1414 | "dbAllocCtl failed in free AG\n"); |
1421 | } | 1415 | } |
1422 | return (rc); | 1416 | return (rc); |
1423 | } | 1417 | } |
@@ -1433,8 +1427,7 @@ dbAllocAG(struct bmap * bmp, int agno, s64 nblocks, int l2nb, s64 * results) | |||
1433 | budmin = dcp->budmin; | 1427 | budmin = dcp->budmin; |
1434 | 1428 | ||
1435 | if (dcp->leafidx != cpu_to_le32(CTLLEAFIND)) { | 1429 | if (dcp->leafidx != cpu_to_le32(CTLLEAFIND)) { |
1436 | jfs_error(bmp->db_ipbmap->i_sb, | 1430 | jfs_error(bmp->db_ipbmap->i_sb, "Corrupt dmapctl page\n"); |
1437 | "dbAllocAG: Corrupt dmapctl page"); | ||
1438 | release_metapage(mp); | 1431 | release_metapage(mp); |
1439 | return -EIO; | 1432 | return -EIO; |
1440 | } | 1433 | } |
@@ -1475,7 +1468,7 @@ dbAllocAG(struct bmap * bmp, int agno, s64 nblocks, int l2nb, s64 * results) | |||
1475 | } | 1468 | } |
1476 | if (n == 4) { | 1469 | if (n == 4) { |
1477 | jfs_error(bmp->db_ipbmap->i_sb, | 1470 | jfs_error(bmp->db_ipbmap->i_sb, |
1478 | "dbAllocAG: failed descending stree"); | 1471 | "failed descending stree\n"); |
1479 | release_metapage(mp); | 1472 | release_metapage(mp); |
1480 | return -EIO; | 1473 | return -EIO; |
1481 | } | 1474 | } |
@@ -1515,8 +1508,7 @@ dbAllocAG(struct bmap * bmp, int agno, s64 nblocks, int l2nb, s64 * results) | |||
1515 | &blkno))) { | 1508 | &blkno))) { |
1516 | if (rc == -ENOSPC) { | 1509 | if (rc == -ENOSPC) { |
1517 | jfs_error(bmp->db_ipbmap->i_sb, | 1510 | jfs_error(bmp->db_ipbmap->i_sb, |
1518 | "dbAllocAG: control page " | 1511 | "control page inconsistent\n"); |
1519 | "inconsistent"); | ||
1520 | return -EIO; | 1512 | return -EIO; |
1521 | } | 1513 | } |
1522 | return (rc); | 1514 | return (rc); |
@@ -1528,7 +1520,7 @@ dbAllocAG(struct bmap * bmp, int agno, s64 nblocks, int l2nb, s64 * results) | |||
1528 | rc = dbAllocCtl(bmp, nblocks, l2nb, blkno, results); | 1520 | rc = dbAllocCtl(bmp, nblocks, l2nb, blkno, results); |
1529 | if (rc == -ENOSPC) { | 1521 | if (rc == -ENOSPC) { |
1530 | jfs_error(bmp->db_ipbmap->i_sb, | 1522 | jfs_error(bmp->db_ipbmap->i_sb, |
1531 | "dbAllocAG: unable to allocate blocks"); | 1523 | "unable to allocate blocks\n"); |
1532 | rc = -EIO; | 1524 | rc = -EIO; |
1533 | } | 1525 | } |
1534 | return (rc); | 1526 | return (rc); |
@@ -1587,8 +1579,7 @@ static int dbAllocAny(struct bmap * bmp, s64 nblocks, int l2nb, s64 * results) | |||
1587 | */ | 1579 | */ |
1588 | rc = dbAllocCtl(bmp, nblocks, l2nb, blkno, results); | 1580 | rc = dbAllocCtl(bmp, nblocks, l2nb, blkno, results); |
1589 | if (rc == -ENOSPC) { | 1581 | if (rc == -ENOSPC) { |
1590 | jfs_error(bmp->db_ipbmap->i_sb, | 1582 | jfs_error(bmp->db_ipbmap->i_sb, "unable to allocate blocks\n"); |
1591 | "dbAllocAny: unable to allocate blocks"); | ||
1592 | return -EIO; | 1583 | return -EIO; |
1593 | } | 1584 | } |
1594 | return (rc); | 1585 | return (rc); |
@@ -1652,8 +1643,7 @@ s64 dbDiscardAG(struct inode *ip, int agno, s64 minlen) | |||
1652 | range_cnt = min_t(u64, max_ranges + 1, 32 * 1024); | 1643 | range_cnt = min_t(u64, max_ranges + 1, 32 * 1024); |
1653 | totrim = kmalloc(sizeof(struct range2trim) * range_cnt, GFP_NOFS); | 1644 | totrim = kmalloc(sizeof(struct range2trim) * range_cnt, GFP_NOFS); |
1654 | if (totrim == NULL) { | 1645 | if (totrim == NULL) { |
1655 | jfs_error(bmp->db_ipbmap->i_sb, | 1646 | jfs_error(bmp->db_ipbmap->i_sb, "no memory for trim array\n"); |
1656 | "dbDiscardAG: no memory for trim array"); | ||
1657 | IWRITE_UNLOCK(ipbmap); | 1647 | IWRITE_UNLOCK(ipbmap); |
1658 | return 0; | 1648 | return 0; |
1659 | } | 1649 | } |
@@ -1682,8 +1672,7 @@ s64 dbDiscardAG(struct inode *ip, int agno, s64 minlen) | |||
1682 | nblocks = 1 << l2nb; | 1672 | nblocks = 1 << l2nb; |
1683 | } else { | 1673 | } else { |
1684 | /* Trim any already allocated blocks */ | 1674 | /* Trim any already allocated blocks */ |
1685 | jfs_error(bmp->db_ipbmap->i_sb, | 1675 | jfs_error(bmp->db_ipbmap->i_sb, "-EIO\n"); |
1686 | "dbDiscardAG: -EIO"); | ||
1687 | break; | 1676 | break; |
1688 | } | 1677 | } |
1689 | 1678 | ||
@@ -1761,7 +1750,7 @@ static int dbFindCtl(struct bmap * bmp, int l2nb, int level, s64 * blkno) | |||
1761 | 1750 | ||
1762 | if (dcp->leafidx != cpu_to_le32(CTLLEAFIND)) { | 1751 | if (dcp->leafidx != cpu_to_le32(CTLLEAFIND)) { |
1763 | jfs_error(bmp->db_ipbmap->i_sb, | 1752 | jfs_error(bmp->db_ipbmap->i_sb, |
1764 | "dbFindCtl: Corrupt dmapctl page"); | 1753 | "Corrupt dmapctl page\n"); |
1765 | release_metapage(mp); | 1754 | release_metapage(mp); |
1766 | return -EIO; | 1755 | return -EIO; |
1767 | } | 1756 | } |
@@ -1782,7 +1771,7 @@ static int dbFindCtl(struct bmap * bmp, int l2nb, int level, s64 * blkno) | |||
1782 | if (rc) { | 1771 | if (rc) { |
1783 | if (lev != level) { | 1772 | if (lev != level) { |
1784 | jfs_error(bmp->db_ipbmap->i_sb, | 1773 | jfs_error(bmp->db_ipbmap->i_sb, |
1785 | "dbFindCtl: dmap inconsistent"); | 1774 | "dmap inconsistent\n"); |
1786 | return -EIO; | 1775 | return -EIO; |
1787 | } | 1776 | } |
1788 | return -ENOSPC; | 1777 | return -ENOSPC; |
@@ -1906,7 +1895,7 @@ dbAllocCtl(struct bmap * bmp, s64 nblocks, int l2nb, s64 blkno, s64 * results) | |||
1906 | if (dp->tree.stree[ROOT] != L2BPERDMAP) { | 1895 | if (dp->tree.stree[ROOT] != L2BPERDMAP) { |
1907 | release_metapage(mp); | 1896 | release_metapage(mp); |
1908 | jfs_error(bmp->db_ipbmap->i_sb, | 1897 | jfs_error(bmp->db_ipbmap->i_sb, |
1909 | "dbAllocCtl: the dmap is not all free"); | 1898 | "the dmap is not all free\n"); |
1910 | rc = -EIO; | 1899 | rc = -EIO; |
1911 | goto backout; | 1900 | goto backout; |
1912 | } | 1901 | } |
@@ -1953,7 +1942,7 @@ dbAllocCtl(struct bmap * bmp, s64 nblocks, int l2nb, s64 blkno, s64 * results) | |||
1953 | * to indicate that we have leaked blocks. | 1942 | * to indicate that we have leaked blocks. |
1954 | */ | 1943 | */ |
1955 | jfs_error(bmp->db_ipbmap->i_sb, | 1944 | jfs_error(bmp->db_ipbmap->i_sb, |
1956 | "dbAllocCtl: I/O Error: Block Leakage."); | 1945 | "I/O Error: Block Leakage\n"); |
1957 | continue; | 1946 | continue; |
1958 | } | 1947 | } |
1959 | dp = (struct dmap *) mp->data; | 1948 | dp = (struct dmap *) mp->data; |
@@ -1965,8 +1954,7 @@ dbAllocCtl(struct bmap * bmp, s64 nblocks, int l2nb, s64 blkno, s64 * results) | |||
1965 | * to indicate that we have leaked blocks. | 1954 | * to indicate that we have leaked blocks. |
1966 | */ | 1955 | */ |
1967 | release_metapage(mp); | 1956 | release_metapage(mp); |
1968 | jfs_error(bmp->db_ipbmap->i_sb, | 1957 | jfs_error(bmp->db_ipbmap->i_sb, "Block Leakage\n"); |
1969 | "dbAllocCtl: Block Leakage."); | ||
1970 | continue; | 1958 | continue; |
1971 | } | 1959 | } |
1972 | 1960 | ||
@@ -2263,8 +2251,7 @@ static void dbAllocBits(struct bmap * bmp, struct dmap * dp, s64 blkno, | |||
2263 | for (; nwords > 0; nwords -= nw) { | 2251 | for (; nwords > 0; nwords -= nw) { |
2264 | if (leaf[word] < BUDMIN) { | 2252 | if (leaf[word] < BUDMIN) { |
2265 | jfs_error(bmp->db_ipbmap->i_sb, | 2253 | jfs_error(bmp->db_ipbmap->i_sb, |
2266 | "dbAllocBits: leaf page " | 2254 | "leaf page corrupt\n"); |
2267 | "corrupt"); | ||
2268 | break; | 2255 | break; |
2269 | } | 2256 | } |
2270 | 2257 | ||
@@ -2536,8 +2523,7 @@ dbAdjCtl(struct bmap * bmp, s64 blkno, int newval, int alloc, int level) | |||
2536 | dcp = (struct dmapctl *) mp->data; | 2523 | dcp = (struct dmapctl *) mp->data; |
2537 | 2524 | ||
2538 | if (dcp->leafidx != cpu_to_le32(CTLLEAFIND)) { | 2525 | if (dcp->leafidx != cpu_to_le32(CTLLEAFIND)) { |
2539 | jfs_error(bmp->db_ipbmap->i_sb, | 2526 | jfs_error(bmp->db_ipbmap->i_sb, "Corrupt dmapctl page\n"); |
2540 | "dbAdjCtl: Corrupt dmapctl page"); | ||
2541 | release_metapage(mp); | 2527 | release_metapage(mp); |
2542 | return -EIO; | 2528 | return -EIO; |
2543 | } | 2529 | } |
@@ -2638,8 +2624,7 @@ dbAdjCtl(struct bmap * bmp, s64 blkno, int newval, int alloc, int level) | |||
2638 | assert(level == bmp->db_maxlevel); | 2624 | assert(level == bmp->db_maxlevel); |
2639 | if (bmp->db_maxfreebud != oldroot) { | 2625 | if (bmp->db_maxfreebud != oldroot) { |
2640 | jfs_error(bmp->db_ipbmap->i_sb, | 2626 | jfs_error(bmp->db_ipbmap->i_sb, |
2641 | "dbAdjCtl: the maximum free buddy is " | 2627 | "the maximum free buddy is not the old root\n"); |
2642 | "not the old root"); | ||
2643 | } | 2628 | } |
2644 | bmp->db_maxfreebud = dcp->stree[ROOT]; | 2629 | bmp->db_maxfreebud = dcp->stree[ROOT]; |
2645 | } | 2630 | } |
@@ -3481,7 +3466,7 @@ int dbExtendFS(struct inode *ipbmap, s64 blkno, s64 nblocks) | |||
3481 | p = BMAPBLKNO + nbperpage; /* L2 page */ | 3466 | p = BMAPBLKNO + nbperpage; /* L2 page */ |
3482 | l2mp = read_metapage(ipbmap, p, PSIZE, 0); | 3467 | l2mp = read_metapage(ipbmap, p, PSIZE, 0); |
3483 | if (!l2mp) { | 3468 | if (!l2mp) { |
3484 | jfs_error(ipbmap->i_sb, "dbExtendFS: L2 page could not be read"); | 3469 | jfs_error(ipbmap->i_sb, "L2 page could not be read\n"); |
3485 | return -EIO; | 3470 | return -EIO; |
3486 | } | 3471 | } |
3487 | l2dcp = (struct dmapctl *) l2mp->data; | 3472 | l2dcp = (struct dmapctl *) l2mp->data; |
@@ -3646,8 +3631,7 @@ int dbExtendFS(struct inode *ipbmap, s64 blkno, s64 nblocks) | |||
3646 | } | 3631 | } |
3647 | } /* for each L1 in a L2 */ | 3632 | } /* for each L1 in a L2 */ |
3648 | 3633 | ||
3649 | jfs_error(ipbmap->i_sb, | 3634 | jfs_error(ipbmap->i_sb, "function has not returned as expected\n"); |
3650 | "dbExtendFS: function has not returned as expected"); | ||
3651 | errout: | 3635 | errout: |
3652 | if (l0mp) | 3636 | if (l0mp) |
3653 | release_metapage(l0mp); | 3637 | release_metapage(l0mp); |
@@ -3717,7 +3701,7 @@ void dbFinalizeBmap(struct inode *ipbmap) | |||
3717 | } | 3701 | } |
3718 | if (bmp->db_agpref >= bmp->db_numag) { | 3702 | if (bmp->db_agpref >= bmp->db_numag) { |
3719 | jfs_error(ipbmap->i_sb, | 3703 | jfs_error(ipbmap->i_sb, |
3720 | "cannot find ag with average freespace"); | 3704 | "cannot find ag with average freespace\n"); |
3721 | } | 3705 | } |
3722 | } | 3706 | } |
3723 | 3707 | ||
diff --git a/fs/jfs/jfs_dtree.c b/fs/jfs/jfs_dtree.c index 0ddbeceafc62..8743ba9c6742 100644 --- a/fs/jfs/jfs_dtree.c +++ b/fs/jfs/jfs_dtree.c | |||
@@ -124,21 +124,21 @@ struct dtsplit { | |||
124 | #define DT_PAGE(IP, MP) BT_PAGE(IP, MP, dtpage_t, i_dtroot) | 124 | #define DT_PAGE(IP, MP) BT_PAGE(IP, MP, dtpage_t, i_dtroot) |
125 | 125 | ||
126 | /* get page buffer for specified block address */ | 126 | /* get page buffer for specified block address */ |
127 | #define DT_GETPAGE(IP, BN, MP, SIZE, P, RC)\ | 127 | #define DT_GETPAGE(IP, BN, MP, SIZE, P, RC) \ |
128 | {\ | 128 | do { \ |
129 | BT_GETPAGE(IP, BN, MP, dtpage_t, SIZE, P, RC, i_dtroot)\ | 129 | BT_GETPAGE(IP, BN, MP, dtpage_t, SIZE, P, RC, i_dtroot); \ |
130 | if (!(RC))\ | 130 | if (!(RC)) { \ |
131 | {\ | 131 | if (((P)->header.nextindex > \ |
132 | if (((P)->header.nextindex > (((BN)==0)?DTROOTMAXSLOT:(P)->header.maxslot)) ||\ | 132 | (((BN) == 0) ? DTROOTMAXSLOT : (P)->header.maxslot)) || \ |
133 | ((BN) && ((P)->header.maxslot > DTPAGEMAXSLOT)))\ | 133 | ((BN) && ((P)->header.maxslot > DTPAGEMAXSLOT))) { \ |
134 | {\ | 134 | BT_PUTPAGE(MP); \ |
135 | BT_PUTPAGE(MP);\ | 135 | jfs_error((IP)->i_sb, \ |
136 | jfs_error((IP)->i_sb, "DT_GETPAGE: dtree page corrupt");\ | 136 | "DT_GETPAGE: dtree page corrupt\n"); \ |
137 | MP = NULL;\ | 137 | MP = NULL; \ |
138 | RC = -EIO;\ | 138 | RC = -EIO; \ |
139 | }\ | 139 | } \ |
140 | }\ | 140 | } \ |
141 | } | 141 | } while (0) |
142 | 142 | ||
143 | /* for consistency */ | 143 | /* for consistency */ |
144 | #define DT_PUTPAGE(MP) BT_PUTPAGE(MP) | 144 | #define DT_PUTPAGE(MP) BT_PUTPAGE(MP) |
@@ -776,7 +776,7 @@ int dtSearch(struct inode *ip, struct component_name * key, ino_t * data, | |||
776 | /* Something's corrupted, mark filesystem dirty so | 776 | /* Something's corrupted, mark filesystem dirty so |
777 | * chkdsk will fix it. | 777 | * chkdsk will fix it. |
778 | */ | 778 | */ |
779 | jfs_error(sb, "stack overrun in dtSearch!"); | 779 | jfs_error(sb, "stack overrun!\n"); |
780 | BT_STACK_DUMP(btstack); | 780 | BT_STACK_DUMP(btstack); |
781 | rc = -EIO; | 781 | rc = -EIO; |
782 | goto out; | 782 | goto out; |
@@ -3002,9 +3002,9 @@ static inline struct jfs_dirent *next_jfs_dirent(struct jfs_dirent *dirent) | |||
3002 | * return: offset = (pn, index) of start entry | 3002 | * return: offset = (pn, index) of start entry |
3003 | * of next jfs_readdir()/dtRead() | 3003 | * of next jfs_readdir()/dtRead() |
3004 | */ | 3004 | */ |
3005 | int jfs_readdir(struct file *filp, void *dirent, filldir_t filldir) | 3005 | int jfs_readdir(struct file *file, struct dir_context *ctx) |
3006 | { | 3006 | { |
3007 | struct inode *ip = file_inode(filp); | 3007 | struct inode *ip = file_inode(file); |
3008 | struct nls_table *codepage = JFS_SBI(ip->i_sb)->nls_tab; | 3008 | struct nls_table *codepage = JFS_SBI(ip->i_sb)->nls_tab; |
3009 | int rc = 0; | 3009 | int rc = 0; |
3010 | loff_t dtpos; /* legacy OS/2 style position */ | 3010 | loff_t dtpos; /* legacy OS/2 style position */ |
@@ -3033,7 +3033,7 @@ int jfs_readdir(struct file *filp, void *dirent, filldir_t filldir) | |||
3033 | int overflow, fix_page, page_fixed = 0; | 3033 | int overflow, fix_page, page_fixed = 0; |
3034 | static int unique_pos = 2; /* If we can't fix broken index */ | 3034 | static int unique_pos = 2; /* If we can't fix broken index */ |
3035 | 3035 | ||
3036 | if (filp->f_pos == DIREND) | 3036 | if (ctx->pos == DIREND) |
3037 | return 0; | 3037 | return 0; |
3038 | 3038 | ||
3039 | if (DO_INDEX(ip)) { | 3039 | if (DO_INDEX(ip)) { |
@@ -3045,7 +3045,7 @@ int jfs_readdir(struct file *filp, void *dirent, filldir_t filldir) | |||
3045 | */ | 3045 | */ |
3046 | do_index = 1; | 3046 | do_index = 1; |
3047 | 3047 | ||
3048 | dir_index = (u32) filp->f_pos; | 3048 | dir_index = (u32) ctx->pos; |
3049 | 3049 | ||
3050 | if (dir_index > 1) { | 3050 | if (dir_index > 1) { |
3051 | struct dir_table_slot dirtab_slot; | 3051 | struct dir_table_slot dirtab_slot; |
@@ -3053,25 +3053,25 @@ int jfs_readdir(struct file *filp, void *dirent, filldir_t filldir) | |||
3053 | if (dtEmpty(ip) || | 3053 | if (dtEmpty(ip) || |
3054 | (dir_index >= JFS_IP(ip)->next_index)) { | 3054 | (dir_index >= JFS_IP(ip)->next_index)) { |
3055 | /* Stale position. Directory has shrunk */ | 3055 | /* Stale position. Directory has shrunk */ |
3056 | filp->f_pos = DIREND; | 3056 | ctx->pos = DIREND; |
3057 | return 0; | 3057 | return 0; |
3058 | } | 3058 | } |
3059 | repeat: | 3059 | repeat: |
3060 | rc = read_index(ip, dir_index, &dirtab_slot); | 3060 | rc = read_index(ip, dir_index, &dirtab_slot); |
3061 | if (rc) { | 3061 | if (rc) { |
3062 | filp->f_pos = DIREND; | 3062 | ctx->pos = DIREND; |
3063 | return rc; | 3063 | return rc; |
3064 | } | 3064 | } |
3065 | if (dirtab_slot.flag == DIR_INDEX_FREE) { | 3065 | if (dirtab_slot.flag == DIR_INDEX_FREE) { |
3066 | if (loop_count++ > JFS_IP(ip)->next_index) { | 3066 | if (loop_count++ > JFS_IP(ip)->next_index) { |
3067 | jfs_err("jfs_readdir detected " | 3067 | jfs_err("jfs_readdir detected " |
3068 | "infinite loop!"); | 3068 | "infinite loop!"); |
3069 | filp->f_pos = DIREND; | 3069 | ctx->pos = DIREND; |
3070 | return 0; | 3070 | return 0; |
3071 | } | 3071 | } |
3072 | dir_index = le32_to_cpu(dirtab_slot.addr2); | 3072 | dir_index = le32_to_cpu(dirtab_slot.addr2); |
3073 | if (dir_index == -1) { | 3073 | if (dir_index == -1) { |
3074 | filp->f_pos = DIREND; | 3074 | ctx->pos = DIREND; |
3075 | return 0; | 3075 | return 0; |
3076 | } | 3076 | } |
3077 | goto repeat; | 3077 | goto repeat; |
@@ -3080,13 +3080,13 @@ int jfs_readdir(struct file *filp, void *dirent, filldir_t filldir) | |||
3080 | index = dirtab_slot.slot; | 3080 | index = dirtab_slot.slot; |
3081 | DT_GETPAGE(ip, bn, mp, PSIZE, p, rc); | 3081 | DT_GETPAGE(ip, bn, mp, PSIZE, p, rc); |
3082 | if (rc) { | 3082 | if (rc) { |
3083 | filp->f_pos = DIREND; | 3083 | ctx->pos = DIREND; |
3084 | return 0; | 3084 | return 0; |
3085 | } | 3085 | } |
3086 | if (p->header.flag & BT_INTERNAL) { | 3086 | if (p->header.flag & BT_INTERNAL) { |
3087 | jfs_err("jfs_readdir: bad index table"); | 3087 | jfs_err("jfs_readdir: bad index table"); |
3088 | DT_PUTPAGE(mp); | 3088 | DT_PUTPAGE(mp); |
3089 | filp->f_pos = -1; | 3089 | ctx->pos = -1; |
3090 | return 0; | 3090 | return 0; |
3091 | } | 3091 | } |
3092 | } else { | 3092 | } else { |
@@ -3094,23 +3094,22 @@ int jfs_readdir(struct file *filp, void *dirent, filldir_t filldir) | |||
3094 | /* | 3094 | /* |
3095 | * self "." | 3095 | * self "." |
3096 | */ | 3096 | */ |
3097 | filp->f_pos = 0; | 3097 | ctx->pos = 0; |
3098 | if (filldir(dirent, ".", 1, 0, ip->i_ino, | 3098 | if (!dir_emit(ctx, ".", 1, ip->i_ino, DT_DIR)) |
3099 | DT_DIR)) | ||
3100 | return 0; | 3099 | return 0; |
3101 | } | 3100 | } |
3102 | /* | 3101 | /* |
3103 | * parent ".." | 3102 | * parent ".." |
3104 | */ | 3103 | */ |
3105 | filp->f_pos = 1; | 3104 | ctx->pos = 1; |
3106 | if (filldir(dirent, "..", 2, 1, PARENT(ip), DT_DIR)) | 3105 | if (!dir_emit(ctx, "..", 2, PARENT(ip), DT_DIR)) |
3107 | return 0; | 3106 | return 0; |
3108 | 3107 | ||
3109 | /* | 3108 | /* |
3110 | * Find first entry of left-most leaf | 3109 | * Find first entry of left-most leaf |
3111 | */ | 3110 | */ |
3112 | if (dtEmpty(ip)) { | 3111 | if (dtEmpty(ip)) { |
3113 | filp->f_pos = DIREND; | 3112 | ctx->pos = DIREND; |
3114 | return 0; | 3113 | return 0; |
3115 | } | 3114 | } |
3116 | 3115 | ||
@@ -3128,23 +3127,19 @@ int jfs_readdir(struct file *filp, void *dirent, filldir_t filldir) | |||
3128 | * pn > 0: Real entries, pn=1 -> leftmost page | 3127 | * pn > 0: Real entries, pn=1 -> leftmost page |
3129 | * pn = index = -1: No more entries | 3128 | * pn = index = -1: No more entries |
3130 | */ | 3129 | */ |
3131 | dtpos = filp->f_pos; | 3130 | dtpos = ctx->pos; |
3132 | if (dtpos == 0) { | 3131 | if (dtpos == 0) { |
3133 | /* build "." entry */ | 3132 | /* build "." entry */ |
3134 | 3133 | if (!dir_emit(ctx, ".", 1, ip->i_ino, DT_DIR)) | |
3135 | if (filldir(dirent, ".", 1, filp->f_pos, ip->i_ino, | ||
3136 | DT_DIR)) | ||
3137 | return 0; | 3134 | return 0; |
3138 | dtoffset->index = 1; | 3135 | dtoffset->index = 1; |
3139 | filp->f_pos = dtpos; | 3136 | ctx->pos = dtpos; |
3140 | } | 3137 | } |
3141 | 3138 | ||
3142 | if (dtoffset->pn == 0) { | 3139 | if (dtoffset->pn == 0) { |
3143 | if (dtoffset->index == 1) { | 3140 | if (dtoffset->index == 1) { |
3144 | /* build ".." entry */ | 3141 | /* build ".." entry */ |
3145 | 3142 | if (!dir_emit(ctx, "..", 2, PARENT(ip), DT_DIR)) | |
3146 | if (filldir(dirent, "..", 2, filp->f_pos, | ||
3147 | PARENT(ip), DT_DIR)) | ||
3148 | return 0; | 3143 | return 0; |
3149 | } else { | 3144 | } else { |
3150 | jfs_err("jfs_readdir called with " | 3145 | jfs_err("jfs_readdir called with " |
@@ -3152,18 +3147,18 @@ int jfs_readdir(struct file *filp, void *dirent, filldir_t filldir) | |||
3152 | } | 3147 | } |
3153 | dtoffset->pn = 1; | 3148 | dtoffset->pn = 1; |
3154 | dtoffset->index = 0; | 3149 | dtoffset->index = 0; |
3155 | filp->f_pos = dtpos; | 3150 | ctx->pos = dtpos; |
3156 | } | 3151 | } |
3157 | 3152 | ||
3158 | if (dtEmpty(ip)) { | 3153 | if (dtEmpty(ip)) { |
3159 | filp->f_pos = DIREND; | 3154 | ctx->pos = DIREND; |
3160 | return 0; | 3155 | return 0; |
3161 | } | 3156 | } |
3162 | 3157 | ||
3163 | if ((rc = dtReadNext(ip, &filp->f_pos, &btstack))) { | 3158 | if ((rc = dtReadNext(ip, &ctx->pos, &btstack))) { |
3164 | jfs_err("jfs_readdir: unexpected rc = %d " | 3159 | jfs_err("jfs_readdir: unexpected rc = %d " |
3165 | "from dtReadNext", rc); | 3160 | "from dtReadNext", rc); |
3166 | filp->f_pos = DIREND; | 3161 | ctx->pos = DIREND; |
3167 | return 0; | 3162 | return 0; |
3168 | } | 3163 | } |
3169 | /* get start leaf page and index */ | 3164 | /* get start leaf page and index */ |
@@ -3171,7 +3166,7 @@ int jfs_readdir(struct file *filp, void *dirent, filldir_t filldir) | |||
3171 | 3166 | ||
3172 | /* offset beyond directory eof ? */ | 3167 | /* offset beyond directory eof ? */ |
3173 | if (bn < 0) { | 3168 | if (bn < 0) { |
3174 | filp->f_pos = DIREND; | 3169 | ctx->pos = DIREND; |
3175 | return 0; | 3170 | return 0; |
3176 | } | 3171 | } |
3177 | } | 3172 | } |
@@ -3180,7 +3175,7 @@ int jfs_readdir(struct file *filp, void *dirent, filldir_t filldir) | |||
3180 | if (dirent_buf == 0) { | 3175 | if (dirent_buf == 0) { |
3181 | DT_PUTPAGE(mp); | 3176 | DT_PUTPAGE(mp); |
3182 | jfs_warn("jfs_readdir: __get_free_page failed!"); | 3177 | jfs_warn("jfs_readdir: __get_free_page failed!"); |
3183 | filp->f_pos = DIREND; | 3178 | ctx->pos = DIREND; |
3184 | return -ENOMEM; | 3179 | return -ENOMEM; |
3185 | } | 3180 | } |
3186 | 3181 | ||
@@ -3252,8 +3247,7 @@ int jfs_readdir(struct file *filp, void *dirent, filldir_t filldir) | |||
3252 | /* Sanity Check */ | 3247 | /* Sanity Check */ |
3253 | if (d_namleft == 0) { | 3248 | if (d_namleft == 0) { |
3254 | jfs_error(ip->i_sb, | 3249 | jfs_error(ip->i_sb, |
3255 | "JFS:Dtree error: ino = " | 3250 | "JFS:Dtree error: ino = %ld, bn=%lld, index = %d\n", |
3256 | "%ld, bn=%Ld, index = %d", | ||
3257 | (long)ip->i_ino, | 3251 | (long)ip->i_ino, |
3258 | (long long)bn, | 3252 | (long long)bn, |
3259 | i); | 3253 | i); |
@@ -3295,9 +3289,9 @@ skip_one: | |||
3295 | 3289 | ||
3296 | jfs_dirent = (struct jfs_dirent *) dirent_buf; | 3290 | jfs_dirent = (struct jfs_dirent *) dirent_buf; |
3297 | while (jfs_dirents--) { | 3291 | while (jfs_dirents--) { |
3298 | filp->f_pos = jfs_dirent->position; | 3292 | ctx->pos = jfs_dirent->position; |
3299 | if (filldir(dirent, jfs_dirent->name, | 3293 | if (!dir_emit(ctx, jfs_dirent->name, |
3300 | jfs_dirent->name_len, filp->f_pos, | 3294 | jfs_dirent->name_len, |
3301 | jfs_dirent->ino, DT_UNKNOWN)) | 3295 | jfs_dirent->ino, DT_UNKNOWN)) |
3302 | goto out; | 3296 | goto out; |
3303 | jfs_dirent = next_jfs_dirent(jfs_dirent); | 3297 | jfs_dirent = next_jfs_dirent(jfs_dirent); |
@@ -3309,7 +3303,7 @@ skip_one: | |||
3309 | } | 3303 | } |
3310 | 3304 | ||
3311 | if (!overflow && (bn == 0)) { | 3305 | if (!overflow && (bn == 0)) { |
3312 | filp->f_pos = DIREND; | 3306 | ctx->pos = DIREND; |
3313 | break; | 3307 | break; |
3314 | } | 3308 | } |
3315 | 3309 | ||
@@ -3373,7 +3367,7 @@ static int dtReadFirst(struct inode *ip, struct btstack * btstack) | |||
3373 | */ | 3367 | */ |
3374 | if (BT_STACK_FULL(btstack)) { | 3368 | if (BT_STACK_FULL(btstack)) { |
3375 | DT_PUTPAGE(mp); | 3369 | DT_PUTPAGE(mp); |
3376 | jfs_error(ip->i_sb, "dtReadFirst: btstack overrun"); | 3370 | jfs_error(ip->i_sb, "btstack overrun\n"); |
3377 | BT_STACK_DUMP(btstack); | 3371 | BT_STACK_DUMP(btstack); |
3378 | return -EIO; | 3372 | return -EIO; |
3379 | } | 3373 | } |
diff --git a/fs/jfs/jfs_dtree.h b/fs/jfs/jfs_dtree.h index 2545bb317235..fd4169e6e698 100644 --- a/fs/jfs/jfs_dtree.h +++ b/fs/jfs/jfs_dtree.h | |||
@@ -265,5 +265,5 @@ extern int dtDelete(tid_t tid, struct inode *ip, struct component_name * key, | |||
265 | extern int dtModify(tid_t tid, struct inode *ip, struct component_name * key, | 265 | extern int dtModify(tid_t tid, struct inode *ip, struct component_name * key, |
266 | ino_t * orig_ino, ino_t new_ino, int flag); | 266 | ino_t * orig_ino, ino_t new_ino, int flag); |
267 | 267 | ||
268 | extern int jfs_readdir(struct file *filp, void *dirent, filldir_t filldir); | 268 | extern int jfs_readdir(struct file *file, struct dir_context *ctx); |
269 | #endif /* !_H_JFS_DTREE */ | 269 | #endif /* !_H_JFS_DTREE */ |
diff --git a/fs/jfs/jfs_extent.c b/fs/jfs/jfs_extent.c index e5fe8506ed16..2ae7d59ab10a 100644 --- a/fs/jfs/jfs_extent.c +++ b/fs/jfs/jfs_extent.c | |||
@@ -388,7 +388,7 @@ int extHint(struct inode *ip, s64 offset, xad_t * xp) | |||
388 | 388 | ||
389 | if ((rc == 0) && xlen) { | 389 | if ((rc == 0) && xlen) { |
390 | if (xlen != nbperpage) { | 390 | if (xlen != nbperpage) { |
391 | jfs_error(ip->i_sb, "extHint: corrupt xtree"); | 391 | jfs_error(ip->i_sb, "corrupt xtree\n"); |
392 | rc = -EIO; | 392 | rc = -EIO; |
393 | } | 393 | } |
394 | XADaddress(xp, xaddr); | 394 | XADaddress(xp, xaddr); |
diff --git a/fs/jfs/jfs_imap.c b/fs/jfs/jfs_imap.c index f7e042b63ddb..f321986e73d2 100644 --- a/fs/jfs/jfs_imap.c +++ b/fs/jfs/jfs_imap.c | |||
@@ -386,7 +386,7 @@ int diRead(struct inode *ip) | |||
386 | dp += rel_inode; | 386 | dp += rel_inode; |
387 | 387 | ||
388 | if (ip->i_ino != le32_to_cpu(dp->di_number)) { | 388 | if (ip->i_ino != le32_to_cpu(dp->di_number)) { |
389 | jfs_error(ip->i_sb, "diRead: i_ino != di_number"); | 389 | jfs_error(ip->i_sb, "i_ino != di_number\n"); |
390 | rc = -EIO; | 390 | rc = -EIO; |
391 | } else if (le32_to_cpu(dp->di_nlink) == 0) | 391 | } else if (le32_to_cpu(dp->di_nlink) == 0) |
392 | rc = -ESTALE; | 392 | rc = -ESTALE; |
@@ -625,7 +625,7 @@ int diWrite(tid_t tid, struct inode *ip) | |||
625 | if (!addressPXD(&(jfs_ip->ixpxd)) || | 625 | if (!addressPXD(&(jfs_ip->ixpxd)) || |
626 | (lengthPXD(&(jfs_ip->ixpxd)) != | 626 | (lengthPXD(&(jfs_ip->ixpxd)) != |
627 | JFS_IP(ipimap)->i_imap->im_nbperiext)) { | 627 | JFS_IP(ipimap)->i_imap->im_nbperiext)) { |
628 | jfs_error(ip->i_sb, "diWrite: ixpxd invalid"); | 628 | jfs_error(ip->i_sb, "ixpxd invalid\n"); |
629 | return -EIO; | 629 | return -EIO; |
630 | } | 630 | } |
631 | 631 | ||
@@ -893,8 +893,7 @@ int diFree(struct inode *ip) | |||
893 | if (iagno >= imap->im_nextiag) { | 893 | if (iagno >= imap->im_nextiag) { |
894 | print_hex_dump(KERN_ERR, "imap: ", DUMP_PREFIX_ADDRESS, 16, 4, | 894 | print_hex_dump(KERN_ERR, "imap: ", DUMP_PREFIX_ADDRESS, 16, 4, |
895 | imap, 32, 0); | 895 | imap, 32, 0); |
896 | jfs_error(ip->i_sb, | 896 | jfs_error(ip->i_sb, "inum = %d, iagno = %d, nextiag = %d\n", |
897 | "diFree: inum = %d, iagno = %d, nextiag = %d", | ||
898 | (uint) inum, iagno, imap->im_nextiag); | 897 | (uint) inum, iagno, imap->im_nextiag); |
899 | return -EIO; | 898 | return -EIO; |
900 | } | 899 | } |
@@ -930,15 +929,14 @@ int diFree(struct inode *ip) | |||
930 | mask = HIGHORDER >> bitno; | 929 | mask = HIGHORDER >> bitno; |
931 | 930 | ||
932 | if (!(le32_to_cpu(iagp->wmap[extno]) & mask)) { | 931 | if (!(le32_to_cpu(iagp->wmap[extno]) & mask)) { |
933 | jfs_error(ip->i_sb, | 932 | jfs_error(ip->i_sb, "wmap shows inode already free\n"); |
934 | "diFree: wmap shows inode already free"); | ||
935 | } | 933 | } |
936 | 934 | ||
937 | if (!addressPXD(&iagp->inoext[extno])) { | 935 | if (!addressPXD(&iagp->inoext[extno])) { |
938 | release_metapage(mp); | 936 | release_metapage(mp); |
939 | IREAD_UNLOCK(ipimap); | 937 | IREAD_UNLOCK(ipimap); |
940 | AG_UNLOCK(imap, agno); | 938 | AG_UNLOCK(imap, agno); |
941 | jfs_error(ip->i_sb, "diFree: invalid inoext"); | 939 | jfs_error(ip->i_sb, "invalid inoext\n"); |
942 | return -EIO; | 940 | return -EIO; |
943 | } | 941 | } |
944 | 942 | ||
@@ -950,7 +948,7 @@ int diFree(struct inode *ip) | |||
950 | release_metapage(mp); | 948 | release_metapage(mp); |
951 | IREAD_UNLOCK(ipimap); | 949 | IREAD_UNLOCK(ipimap); |
952 | AG_UNLOCK(imap, agno); | 950 | AG_UNLOCK(imap, agno); |
953 | jfs_error(ip->i_sb, "diFree: numfree > numinos"); | 951 | jfs_error(ip->i_sb, "numfree > numinos\n"); |
954 | return -EIO; | 952 | return -EIO; |
955 | } | 953 | } |
956 | /* | 954 | /* |
@@ -1199,7 +1197,7 @@ int diFree(struct inode *ip) | |||
1199 | * for the inode being freed. | 1197 | * for the inode being freed. |
1200 | */ | 1198 | */ |
1201 | if (iagp->pmap[extno] != 0) { | 1199 | if (iagp->pmap[extno] != 0) { |
1202 | jfs_error(ip->i_sb, "diFree: the pmap does not show inode free"); | 1200 | jfs_error(ip->i_sb, "the pmap does not show inode free\n"); |
1203 | } | 1201 | } |
1204 | iagp->wmap[extno] = 0; | 1202 | iagp->wmap[extno] = 0; |
1205 | PXDlength(&iagp->inoext[extno], 0); | 1203 | PXDlength(&iagp->inoext[extno], 0); |
@@ -1518,8 +1516,7 @@ int diAlloc(struct inode *pip, bool dir, struct inode *ip) | |||
1518 | release_metapage(mp); | 1516 | release_metapage(mp); |
1519 | AG_UNLOCK(imap, agno); | 1517 | AG_UNLOCK(imap, agno); |
1520 | jfs_error(ip->i_sb, | 1518 | jfs_error(ip->i_sb, |
1521 | "diAlloc: can't find free bit " | 1519 | "can't find free bit in wmap\n"); |
1522 | "in wmap"); | ||
1523 | return -EIO; | 1520 | return -EIO; |
1524 | } | 1521 | } |
1525 | 1522 | ||
@@ -1660,7 +1657,7 @@ diAllocAG(struct inomap * imap, int agno, bool dir, struct inode *ip) | |||
1660 | numinos = imap->im_agctl[agno].numinos; | 1657 | numinos = imap->im_agctl[agno].numinos; |
1661 | 1658 | ||
1662 | if (numfree > numinos) { | 1659 | if (numfree > numinos) { |
1663 | jfs_error(ip->i_sb, "diAllocAG: numfree > numinos"); | 1660 | jfs_error(ip->i_sb, "numfree > numinos\n"); |
1664 | return -EIO; | 1661 | return -EIO; |
1665 | } | 1662 | } |
1666 | 1663 | ||
@@ -1811,8 +1808,7 @@ static int diAllocIno(struct inomap * imap, int agno, struct inode *ip) | |||
1811 | if (!iagp->nfreeinos) { | 1808 | if (!iagp->nfreeinos) { |
1812 | IREAD_UNLOCK(imap->im_ipimap); | 1809 | IREAD_UNLOCK(imap->im_ipimap); |
1813 | release_metapage(mp); | 1810 | release_metapage(mp); |
1814 | jfs_error(ip->i_sb, | 1811 | jfs_error(ip->i_sb, "nfreeinos = 0, but iag on freelist\n"); |
1815 | "diAllocIno: nfreeinos = 0, but iag on freelist"); | ||
1816 | return -EIO; | 1812 | return -EIO; |
1817 | } | 1813 | } |
1818 | 1814 | ||
@@ -1824,7 +1820,7 @@ static int diAllocIno(struct inomap * imap, int agno, struct inode *ip) | |||
1824 | IREAD_UNLOCK(imap->im_ipimap); | 1820 | IREAD_UNLOCK(imap->im_ipimap); |
1825 | release_metapage(mp); | 1821 | release_metapage(mp); |
1826 | jfs_error(ip->i_sb, | 1822 | jfs_error(ip->i_sb, |
1827 | "diAllocIno: free inode not found in summary map"); | 1823 | "free inode not found in summary map\n"); |
1828 | return -EIO; | 1824 | return -EIO; |
1829 | } | 1825 | } |
1830 | 1826 | ||
@@ -1839,7 +1835,7 @@ static int diAllocIno(struct inomap * imap, int agno, struct inode *ip) | |||
1839 | if (rem >= EXTSPERSUM) { | 1835 | if (rem >= EXTSPERSUM) { |
1840 | IREAD_UNLOCK(imap->im_ipimap); | 1836 | IREAD_UNLOCK(imap->im_ipimap); |
1841 | release_metapage(mp); | 1837 | release_metapage(mp); |
1842 | jfs_error(ip->i_sb, "diAllocIno: no free extent found"); | 1838 | jfs_error(ip->i_sb, "no free extent found\n"); |
1843 | return -EIO; | 1839 | return -EIO; |
1844 | } | 1840 | } |
1845 | extno = (sword << L2EXTSPERSUM) + rem; | 1841 | extno = (sword << L2EXTSPERSUM) + rem; |
@@ -1850,7 +1846,7 @@ static int diAllocIno(struct inomap * imap, int agno, struct inode *ip) | |||
1850 | if (rem >= INOSPEREXT) { | 1846 | if (rem >= INOSPEREXT) { |
1851 | IREAD_UNLOCK(imap->im_ipimap); | 1847 | IREAD_UNLOCK(imap->im_ipimap); |
1852 | release_metapage(mp); | 1848 | release_metapage(mp); |
1853 | jfs_error(ip->i_sb, "diAllocIno: free inode not found"); | 1849 | jfs_error(ip->i_sb, "free inode not found\n"); |
1854 | return -EIO; | 1850 | return -EIO; |
1855 | } | 1851 | } |
1856 | 1852 | ||
@@ -1936,7 +1932,7 @@ static int diAllocExt(struct inomap * imap, int agno, struct inode *ip) | |||
1936 | IREAD_LOCK(imap->im_ipimap, RDWRLOCK_IMAP); | 1932 | IREAD_LOCK(imap->im_ipimap, RDWRLOCK_IMAP); |
1937 | if ((rc = diIAGRead(imap, iagno, &mp))) { | 1933 | if ((rc = diIAGRead(imap, iagno, &mp))) { |
1938 | IREAD_UNLOCK(imap->im_ipimap); | 1934 | IREAD_UNLOCK(imap->im_ipimap); |
1939 | jfs_error(ip->i_sb, "diAllocExt: error reading iag"); | 1935 | jfs_error(ip->i_sb, "error reading iag\n"); |
1940 | return rc; | 1936 | return rc; |
1941 | } | 1937 | } |
1942 | iagp = (struct iag *) mp->data; | 1938 | iagp = (struct iag *) mp->data; |
@@ -1948,8 +1944,7 @@ static int diAllocExt(struct inomap * imap, int agno, struct inode *ip) | |||
1948 | if (sword >= SMAPSZ) { | 1944 | if (sword >= SMAPSZ) { |
1949 | release_metapage(mp); | 1945 | release_metapage(mp); |
1950 | IREAD_UNLOCK(imap->im_ipimap); | 1946 | IREAD_UNLOCK(imap->im_ipimap); |
1951 | jfs_error(ip->i_sb, | 1947 | jfs_error(ip->i_sb, "free ext summary map not found\n"); |
1952 | "diAllocExt: free ext summary map not found"); | ||
1953 | return -EIO; | 1948 | return -EIO; |
1954 | } | 1949 | } |
1955 | if (~iagp->extsmap[sword]) | 1950 | if (~iagp->extsmap[sword]) |
@@ -1962,7 +1957,7 @@ static int diAllocExt(struct inomap * imap, int agno, struct inode *ip) | |||
1962 | if (rem >= EXTSPERSUM) { | 1957 | if (rem >= EXTSPERSUM) { |
1963 | release_metapage(mp); | 1958 | release_metapage(mp); |
1964 | IREAD_UNLOCK(imap->im_ipimap); | 1959 | IREAD_UNLOCK(imap->im_ipimap); |
1965 | jfs_error(ip->i_sb, "diAllocExt: free extent not found"); | 1960 | jfs_error(ip->i_sb, "free extent not found\n"); |
1966 | return -EIO; | 1961 | return -EIO; |
1967 | } | 1962 | } |
1968 | extno = (sword << L2EXTSPERSUM) + rem; | 1963 | extno = (sword << L2EXTSPERSUM) + rem; |
@@ -2081,8 +2076,7 @@ static int diAllocBit(struct inomap * imap, struct iag * iagp, int ino) | |||
2081 | if (bmp) | 2076 | if (bmp) |
2082 | release_metapage(bmp); | 2077 | release_metapage(bmp); |
2083 | 2078 | ||
2084 | jfs_error(imap->im_ipimap->i_sb, | 2079 | jfs_error(imap->im_ipimap->i_sb, "iag inconsistent\n"); |
2085 | "diAllocBit: iag inconsistent"); | ||
2086 | return -EIO; | 2080 | return -EIO; |
2087 | } | 2081 | } |
2088 | 2082 | ||
@@ -2189,7 +2183,7 @@ static int diNewExt(struct inomap * imap, struct iag * iagp, int extno) | |||
2189 | /* better have free extents. | 2183 | /* better have free extents. |
2190 | */ | 2184 | */ |
2191 | if (!iagp->nfreeexts) { | 2185 | if (!iagp->nfreeexts) { |
2192 | jfs_error(imap->im_ipimap->i_sb, "diNewExt: no free extents"); | 2186 | jfs_error(imap->im_ipimap->i_sb, "no free extents\n"); |
2193 | return -EIO; | 2187 | return -EIO; |
2194 | } | 2188 | } |
2195 | 2189 | ||
@@ -2261,7 +2255,7 @@ static int diNewExt(struct inomap * imap, struct iag * iagp, int extno) | |||
2261 | } | 2255 | } |
2262 | if (ciagp == NULL) { | 2256 | if (ciagp == NULL) { |
2263 | jfs_error(imap->im_ipimap->i_sb, | 2257 | jfs_error(imap->im_ipimap->i_sb, |
2264 | "diNewExt: ciagp == NULL"); | 2258 | "ciagp == NULL\n"); |
2265 | rc = -EIO; | 2259 | rc = -EIO; |
2266 | goto error_out; | 2260 | goto error_out; |
2267 | } | 2261 | } |
@@ -2498,7 +2492,7 @@ diNewIAG(struct inomap * imap, int *iagnop, int agno, struct metapage ** mpp) | |||
2498 | IWRITE_UNLOCK(ipimap); | 2492 | IWRITE_UNLOCK(ipimap); |
2499 | IAGFREE_UNLOCK(imap); | 2493 | IAGFREE_UNLOCK(imap); |
2500 | jfs_error(imap->im_ipimap->i_sb, | 2494 | jfs_error(imap->im_ipimap->i_sb, |
2501 | "diNewIAG: ipimap->i_size is wrong"); | 2495 | "ipimap->i_size is wrong\n"); |
2502 | return -EIO; | 2496 | return -EIO; |
2503 | } | 2497 | } |
2504 | 2498 | ||
@@ -2758,8 +2752,7 @@ diUpdatePMap(struct inode *ipimap, | |||
2758 | iagno = INOTOIAG(inum); | 2752 | iagno = INOTOIAG(inum); |
2759 | /* make sure that the iag is contained within the map */ | 2753 | /* make sure that the iag is contained within the map */ |
2760 | if (iagno >= imap->im_nextiag) { | 2754 | if (iagno >= imap->im_nextiag) { |
2761 | jfs_error(ipimap->i_sb, | 2755 | jfs_error(ipimap->i_sb, "the iag is outside the map\n"); |
2762 | "diUpdatePMap: the iag is outside the map"); | ||
2763 | return -EIO; | 2756 | return -EIO; |
2764 | } | 2757 | } |
2765 | /* read the iag */ | 2758 | /* read the iag */ |
@@ -2788,13 +2781,13 @@ diUpdatePMap(struct inode *ipimap, | |||
2788 | */ | 2781 | */ |
2789 | if (!(le32_to_cpu(iagp->wmap[extno]) & mask)) { | 2782 | if (!(le32_to_cpu(iagp->wmap[extno]) & mask)) { |
2790 | jfs_error(ipimap->i_sb, | 2783 | jfs_error(ipimap->i_sb, |
2791 | "diUpdatePMap: inode %ld not marked as " | 2784 | "inode %ld not marked as allocated in wmap!\n", |
2792 | "allocated in wmap!", inum); | 2785 | inum); |
2793 | } | 2786 | } |
2794 | if (!(le32_to_cpu(iagp->pmap[extno]) & mask)) { | 2787 | if (!(le32_to_cpu(iagp->pmap[extno]) & mask)) { |
2795 | jfs_error(ipimap->i_sb, | 2788 | jfs_error(ipimap->i_sb, |
2796 | "diUpdatePMap: inode %ld not marked as " | 2789 | "inode %ld not marked as allocated in pmap!\n", |
2797 | "allocated in pmap!", inum); | 2790 | inum); |
2798 | } | 2791 | } |
2799 | /* update the bitmap for the extent of the freed inode */ | 2792 | /* update the bitmap for the extent of the freed inode */ |
2800 | iagp->pmap[extno] &= cpu_to_le32(~mask); | 2793 | iagp->pmap[extno] &= cpu_to_le32(~mask); |
@@ -2809,15 +2802,13 @@ diUpdatePMap(struct inode *ipimap, | |||
2809 | if (!(le32_to_cpu(iagp->wmap[extno]) & mask)) { | 2802 | if (!(le32_to_cpu(iagp->wmap[extno]) & mask)) { |
2810 | release_metapage(mp); | 2803 | release_metapage(mp); |
2811 | jfs_error(ipimap->i_sb, | 2804 | jfs_error(ipimap->i_sb, |
2812 | "diUpdatePMap: the inode is not allocated in " | 2805 | "the inode is not allocated in the working map\n"); |
2813 | "the working map"); | ||
2814 | return -EIO; | 2806 | return -EIO; |
2815 | } | 2807 | } |
2816 | if ((le32_to_cpu(iagp->pmap[extno]) & mask) != 0) { | 2808 | if ((le32_to_cpu(iagp->pmap[extno]) & mask) != 0) { |
2817 | release_metapage(mp); | 2809 | release_metapage(mp); |
2818 | jfs_error(ipimap->i_sb, | 2810 | jfs_error(ipimap->i_sb, |
2819 | "diUpdatePMap: the inode is not free in the " | 2811 | "the inode is not free in the persistent map\n"); |
2820 | "persistent map"); | ||
2821 | return -EIO; | 2812 | return -EIO; |
2822 | } | 2813 | } |
2823 | /* update the bitmap for the extent of the allocated inode */ | 2814 | /* update the bitmap for the extent of the allocated inode */ |
@@ -2909,8 +2900,7 @@ int diExtendFS(struct inode *ipimap, struct inode *ipbmap) | |||
2909 | iagp = (struct iag *) bp->data; | 2900 | iagp = (struct iag *) bp->data; |
2910 | if (le32_to_cpu(iagp->iagnum) != i) { | 2901 | if (le32_to_cpu(iagp->iagnum) != i) { |
2911 | release_metapage(bp); | 2902 | release_metapage(bp); |
2912 | jfs_error(ipimap->i_sb, | 2903 | jfs_error(ipimap->i_sb, "unexpected value of iagnum\n"); |
2913 | "diExtendFs: unexpected value of iagnum"); | ||
2914 | return -EIO; | 2904 | return -EIO; |
2915 | } | 2905 | } |
2916 | 2906 | ||
@@ -2986,8 +2976,7 @@ int diExtendFS(struct inode *ipimap, struct inode *ipbmap) | |||
2986 | 2976 | ||
2987 | if (xnuminos != atomic_read(&imap->im_numinos) || | 2977 | if (xnuminos != atomic_read(&imap->im_numinos) || |
2988 | xnumfree != atomic_read(&imap->im_numfree)) { | 2978 | xnumfree != atomic_read(&imap->im_numfree)) { |
2989 | jfs_error(ipimap->i_sb, | 2979 | jfs_error(ipimap->i_sb, "numinos or numfree incorrect\n"); |
2990 | "diExtendFs: numinos or numfree incorrect"); | ||
2991 | return -EIO; | 2980 | return -EIO; |
2992 | } | 2981 | } |
2993 | 2982 | ||
diff --git a/fs/jfs/jfs_metapage.c b/fs/jfs/jfs_metapage.c index 6740d34cd82b..d165cde0c68d 100644 --- a/fs/jfs/jfs_metapage.c +++ b/fs/jfs/jfs_metapage.c | |||
@@ -571,9 +571,10 @@ static int metapage_releasepage(struct page *page, gfp_t gfp_mask) | |||
571 | return ret; | 571 | return ret; |
572 | } | 572 | } |
573 | 573 | ||
574 | static void metapage_invalidatepage(struct page *page, unsigned long offset) | 574 | static void metapage_invalidatepage(struct page *page, unsigned int offset, |
575 | unsigned int length) | ||
575 | { | 576 | { |
576 | BUG_ON(offset); | 577 | BUG_ON(offset || length < PAGE_CACHE_SIZE); |
577 | 578 | ||
578 | BUG_ON(PageWriteback(page)); | 579 | BUG_ON(PageWriteback(page)); |
579 | 580 | ||
@@ -646,7 +647,7 @@ struct metapage *__get_metapage(struct inode *inode, unsigned long lblock, | |||
646 | if (mp) { | 647 | if (mp) { |
647 | if (mp->logical_size != size) { | 648 | if (mp->logical_size != size) { |
648 | jfs_error(inode->i_sb, | 649 | jfs_error(inode->i_sb, |
649 | "__get_metapage: mp->logical_size != size"); | 650 | "get_mp->logical_size != size\n"); |
650 | jfs_err("logical_size = %d, size = %d", | 651 | jfs_err("logical_size = %d, size = %d", |
651 | mp->logical_size, size); | 652 | mp->logical_size, size); |
652 | dump_stack(); | 653 | dump_stack(); |
@@ -657,8 +658,7 @@ struct metapage *__get_metapage(struct inode *inode, unsigned long lblock, | |||
657 | if (test_bit(META_discard, &mp->flag)) { | 658 | if (test_bit(META_discard, &mp->flag)) { |
658 | if (!new) { | 659 | if (!new) { |
659 | jfs_error(inode->i_sb, | 660 | jfs_error(inode->i_sb, |
660 | "__get_metapage: using a " | 661 | "using a discarded metapage\n"); |
661 | "discarded metapage"); | ||
662 | discard_metapage(mp); | 662 | discard_metapage(mp); |
663 | goto unlock; | 663 | goto unlock; |
664 | } | 664 | } |
diff --git a/fs/jfs/jfs_superblock.h b/fs/jfs/jfs_superblock.h index 884fc21ab8ee..04847b8d3070 100644 --- a/fs/jfs/jfs_superblock.h +++ b/fs/jfs/jfs_superblock.h | |||
@@ -108,6 +108,7 @@ struct jfs_superblock { | |||
108 | 108 | ||
109 | extern int readSuper(struct super_block *, struct buffer_head **); | 109 | extern int readSuper(struct super_block *, struct buffer_head **); |
110 | extern int updateSuper(struct super_block *, uint); | 110 | extern int updateSuper(struct super_block *, uint); |
111 | __printf(2, 3) | ||
111 | extern void jfs_error(struct super_block *, const char *, ...); | 112 | extern void jfs_error(struct super_block *, const char *, ...); |
112 | extern int jfs_mount(struct super_block *); | 113 | extern int jfs_mount(struct super_block *); |
113 | extern int jfs_mount_rw(struct super_block *, int); | 114 | extern int jfs_mount_rw(struct super_block *, int); |
diff --git a/fs/jfs/jfs_txnmgr.c b/fs/jfs/jfs_txnmgr.c index 5fcc02eaa64c..564c4f279ac6 100644 --- a/fs/jfs/jfs_txnmgr.c +++ b/fs/jfs/jfs_txnmgr.c | |||
@@ -2684,7 +2684,7 @@ void txAbort(tid_t tid, int dirty) | |||
2684 | * mark filesystem dirty | 2684 | * mark filesystem dirty |
2685 | */ | 2685 | */ |
2686 | if (dirty) | 2686 | if (dirty) |
2687 | jfs_error(tblk->sb, "txAbort"); | 2687 | jfs_error(tblk->sb, "\n"); |
2688 | 2688 | ||
2689 | return; | 2689 | return; |
2690 | } | 2690 | } |
diff --git a/fs/jfs/jfs_xtree.c b/fs/jfs/jfs_xtree.c index 6c50871e6220..5ad7748860ce 100644 --- a/fs/jfs/jfs_xtree.c +++ b/fs/jfs/jfs_xtree.c | |||
@@ -64,22 +64,23 @@ | |||
64 | 64 | ||
65 | /* get page buffer for specified block address */ | 65 | /* get page buffer for specified block address */ |
66 | /* ToDo: Replace this ugly macro with a function */ | 66 | /* ToDo: Replace this ugly macro with a function */ |
67 | #define XT_GETPAGE(IP, BN, MP, SIZE, P, RC)\ | 67 | #define XT_GETPAGE(IP, BN, MP, SIZE, P, RC) \ |
68 | {\ | 68 | do { \ |
69 | BT_GETPAGE(IP, BN, MP, xtpage_t, SIZE, P, RC, i_xtroot)\ | 69 | BT_GETPAGE(IP, BN, MP, xtpage_t, SIZE, P, RC, i_xtroot); \ |
70 | if (!(RC))\ | 70 | if (!(RC)) { \ |
71 | {\ | 71 | if ((le16_to_cpu((P)->header.nextindex) < XTENTRYSTART) || \ |
72 | if ((le16_to_cpu((P)->header.nextindex) < XTENTRYSTART) ||\ | 72 | (le16_to_cpu((P)->header.nextindex) > \ |
73 | (le16_to_cpu((P)->header.nextindex) > le16_to_cpu((P)->header.maxentry)) ||\ | 73 | le16_to_cpu((P)->header.maxentry)) || \ |
74 | (le16_to_cpu((P)->header.maxentry) > (((BN)==0)?XTROOTMAXSLOT:PSIZE>>L2XTSLOTSIZE)))\ | 74 | (le16_to_cpu((P)->header.maxentry) > \ |
75 | {\ | 75 | (((BN) == 0) ? XTROOTMAXSLOT : PSIZE >> L2XTSLOTSIZE))) { \ |
76 | jfs_error((IP)->i_sb, "XT_GETPAGE: xtree page corrupt");\ | 76 | jfs_error((IP)->i_sb, \ |
77 | BT_PUTPAGE(MP);\ | 77 | "XT_GETPAGE: xtree page corrupt\n"); \ |
78 | MP = NULL;\ | 78 | BT_PUTPAGE(MP); \ |
79 | RC = -EIO;\ | 79 | MP = NULL; \ |
80 | }\ | 80 | RC = -EIO; \ |
81 | }\ | 81 | } \ |
82 | } | 82 | } \ |
83 | } while (0) | ||
83 | 84 | ||
84 | /* for consistency */ | 85 | /* for consistency */ |
85 | #define XT_PUTPAGE(MP) BT_PUTPAGE(MP) | 86 | #define XT_PUTPAGE(MP) BT_PUTPAGE(MP) |
@@ -499,7 +500,7 @@ static int xtSearch(struct inode *ip, s64 xoff, s64 *nextp, | |||
499 | 500 | ||
500 | /* push (bn, index) of the parent page/entry */ | 501 | /* push (bn, index) of the parent page/entry */ |
501 | if (BT_STACK_FULL(btstack)) { | 502 | if (BT_STACK_FULL(btstack)) { |
502 | jfs_error(ip->i_sb, "stack overrun in xtSearch!"); | 503 | jfs_error(ip->i_sb, "stack overrun!\n"); |
503 | XT_PUTPAGE(mp); | 504 | XT_PUTPAGE(mp); |
504 | return -EIO; | 505 | return -EIO; |
505 | } | 506 | } |
@@ -1385,7 +1386,7 @@ int xtExtend(tid_t tid, /* transaction id */ | |||
1385 | 1386 | ||
1386 | if (cmp != 0) { | 1387 | if (cmp != 0) { |
1387 | XT_PUTPAGE(mp); | 1388 | XT_PUTPAGE(mp); |
1388 | jfs_error(ip->i_sb, "xtExtend: xtSearch did not find extent"); | 1389 | jfs_error(ip->i_sb, "xtSearch did not find extent\n"); |
1389 | return -EIO; | 1390 | return -EIO; |
1390 | } | 1391 | } |
1391 | 1392 | ||
@@ -1393,7 +1394,7 @@ int xtExtend(tid_t tid, /* transaction id */ | |||
1393 | xad = &p->xad[index]; | 1394 | xad = &p->xad[index]; |
1394 | if ((offsetXAD(xad) + lengthXAD(xad)) != xoff) { | 1395 | if ((offsetXAD(xad) + lengthXAD(xad)) != xoff) { |
1395 | XT_PUTPAGE(mp); | 1396 | XT_PUTPAGE(mp); |
1396 | jfs_error(ip->i_sb, "xtExtend: extension is not contiguous"); | 1397 | jfs_error(ip->i_sb, "extension is not contiguous\n"); |
1397 | return -EIO; | 1398 | return -EIO; |
1398 | } | 1399 | } |
1399 | 1400 | ||
@@ -1552,7 +1553,7 @@ printf("xtTailgate: nxoff:0x%lx nxlen:0x%x nxaddr:0x%lx\n", | |||
1552 | 1553 | ||
1553 | if (cmp != 0) { | 1554 | if (cmp != 0) { |
1554 | XT_PUTPAGE(mp); | 1555 | XT_PUTPAGE(mp); |
1555 | jfs_error(ip->i_sb, "xtTailgate: couldn't find extent"); | 1556 | jfs_error(ip->i_sb, "couldn't find extent\n"); |
1556 | return -EIO; | 1557 | return -EIO; |
1557 | } | 1558 | } |
1558 | 1559 | ||
@@ -1560,8 +1561,7 @@ printf("xtTailgate: nxoff:0x%lx nxlen:0x%x nxaddr:0x%lx\n", | |||
1560 | nextindex = le16_to_cpu(p->header.nextindex); | 1561 | nextindex = le16_to_cpu(p->header.nextindex); |
1561 | if (index != nextindex - 1) { | 1562 | if (index != nextindex - 1) { |
1562 | XT_PUTPAGE(mp); | 1563 | XT_PUTPAGE(mp); |
1563 | jfs_error(ip->i_sb, | 1564 | jfs_error(ip->i_sb, "the entry found is not the last entry\n"); |
1564 | "xtTailgate: the entry found is not the last entry"); | ||
1565 | return -EIO; | 1565 | return -EIO; |
1566 | } | 1566 | } |
1567 | 1567 | ||
@@ -1734,7 +1734,7 @@ int xtUpdate(tid_t tid, struct inode *ip, xad_t * nxad) | |||
1734 | 1734 | ||
1735 | if (cmp != 0) { | 1735 | if (cmp != 0) { |
1736 | XT_PUTPAGE(mp); | 1736 | XT_PUTPAGE(mp); |
1737 | jfs_error(ip->i_sb, "xtUpdate: Could not find extent"); | 1737 | jfs_error(ip->i_sb, "Could not find extent\n"); |
1738 | return -EIO; | 1738 | return -EIO; |
1739 | } | 1739 | } |
1740 | 1740 | ||
@@ -1758,7 +1758,7 @@ int xtUpdate(tid_t tid, struct inode *ip, xad_t * nxad) | |||
1758 | (nxoff + nxlen > xoff + xlen)) { | 1758 | (nxoff + nxlen > xoff + xlen)) { |
1759 | XT_PUTPAGE(mp); | 1759 | XT_PUTPAGE(mp); |
1760 | jfs_error(ip->i_sb, | 1760 | jfs_error(ip->i_sb, |
1761 | "xtUpdate: nXAD in not completely contained within XAD"); | 1761 | "nXAD in not completely contained within XAD\n"); |
1762 | return -EIO; | 1762 | return -EIO; |
1763 | } | 1763 | } |
1764 | 1764 | ||
@@ -1907,7 +1907,7 @@ int xtUpdate(tid_t tid, struct inode *ip, xad_t * nxad) | |||
1907 | 1907 | ||
1908 | if (xoff >= nxoff) { | 1908 | if (xoff >= nxoff) { |
1909 | XT_PUTPAGE(mp); | 1909 | XT_PUTPAGE(mp); |
1910 | jfs_error(ip->i_sb, "xtUpdate: xoff >= nxoff"); | 1910 | jfs_error(ip->i_sb, "xoff >= nxoff\n"); |
1911 | return -EIO; | 1911 | return -EIO; |
1912 | } | 1912 | } |
1913 | /* #endif _JFS_WIP_COALESCE */ | 1913 | /* #endif _JFS_WIP_COALESCE */ |
@@ -2048,14 +2048,13 @@ int xtUpdate(tid_t tid, struct inode *ip, xad_t * nxad) | |||
2048 | 2048 | ||
2049 | if (cmp != 0) { | 2049 | if (cmp != 0) { |
2050 | XT_PUTPAGE(mp); | 2050 | XT_PUTPAGE(mp); |
2051 | jfs_error(ip->i_sb, "xtUpdate: xtSearch failed"); | 2051 | jfs_error(ip->i_sb, "xtSearch failed\n"); |
2052 | return -EIO; | 2052 | return -EIO; |
2053 | } | 2053 | } |
2054 | 2054 | ||
2055 | if (index0 != index) { | 2055 | if (index0 != index) { |
2056 | XT_PUTPAGE(mp); | 2056 | XT_PUTPAGE(mp); |
2057 | jfs_error(ip->i_sb, | 2057 | jfs_error(ip->i_sb, "unexpected value of index\n"); |
2058 | "xtUpdate: unexpected value of index"); | ||
2059 | return -EIO; | 2058 | return -EIO; |
2060 | } | 2059 | } |
2061 | } | 2060 | } |
@@ -3650,7 +3649,7 @@ s64 xtTruncate(tid_t tid, struct inode *ip, s64 newsize, int flag) | |||
3650 | getChild: | 3649 | getChild: |
3651 | /* save current parent entry for the child page */ | 3650 | /* save current parent entry for the child page */ |
3652 | if (BT_STACK_FULL(&btstack)) { | 3651 | if (BT_STACK_FULL(&btstack)) { |
3653 | jfs_error(ip->i_sb, "stack overrun in xtTruncate!"); | 3652 | jfs_error(ip->i_sb, "stack overrun!\n"); |
3654 | XT_PUTPAGE(mp); | 3653 | XT_PUTPAGE(mp); |
3655 | return -EIO; | 3654 | return -EIO; |
3656 | } | 3655 | } |
@@ -3751,8 +3750,7 @@ s64 xtTruncate_pmap(tid_t tid, struct inode *ip, s64 committed_size) | |||
3751 | 3750 | ||
3752 | if (cmp != 0) { | 3751 | if (cmp != 0) { |
3753 | XT_PUTPAGE(mp); | 3752 | XT_PUTPAGE(mp); |
3754 | jfs_error(ip->i_sb, | 3753 | jfs_error(ip->i_sb, "did not find extent\n"); |
3755 | "xtTruncate_pmap: did not find extent"); | ||
3756 | return -EIO; | 3754 | return -EIO; |
3757 | } | 3755 | } |
3758 | } else { | 3756 | } else { |
@@ -3851,7 +3849,7 @@ s64 xtTruncate_pmap(tid_t tid, struct inode *ip, s64 committed_size) | |||
3851 | getChild: | 3849 | getChild: |
3852 | /* save current parent entry for the child page */ | 3850 | /* save current parent entry for the child page */ |
3853 | if (BT_STACK_FULL(&btstack)) { | 3851 | if (BT_STACK_FULL(&btstack)) { |
3854 | jfs_error(ip->i_sb, "stack overrun in xtTruncate_pmap!"); | 3852 | jfs_error(ip->i_sb, "stack overrun!\n"); |
3855 | XT_PUTPAGE(mp); | 3853 | XT_PUTPAGE(mp); |
3856 | return -EIO; | 3854 | return -EIO; |
3857 | } | 3855 | } |
diff --git a/fs/jfs/namei.c b/fs/jfs/namei.c index 3b91a7ad6086..aa8a3370631b 100644 --- a/fs/jfs/namei.c +++ b/fs/jfs/namei.c | |||
@@ -1176,7 +1176,7 @@ static int jfs_rename(struct inode *old_dir, struct dentry *old_dentry, | |||
1176 | if (!S_ISDIR(old_ip->i_mode) && new_ip) | 1176 | if (!S_ISDIR(old_ip->i_mode) && new_ip) |
1177 | IWRITE_UNLOCK(new_ip); | 1177 | IWRITE_UNLOCK(new_ip); |
1178 | jfs_error(new_ip->i_sb, | 1178 | jfs_error(new_ip->i_sb, |
1179 | "jfs_rename: new_ip->i_nlink != 0"); | 1179 | "new_ip->i_nlink != 0\n"); |
1180 | return -EIO; | 1180 | return -EIO; |
1181 | } | 1181 | } |
1182 | tblk = tid_to_tblock(tid); | 1182 | tblk = tid_to_tblock(tid); |
@@ -1529,7 +1529,7 @@ const struct inode_operations jfs_dir_inode_operations = { | |||
1529 | 1529 | ||
1530 | const struct file_operations jfs_dir_operations = { | 1530 | const struct file_operations jfs_dir_operations = { |
1531 | .read = generic_read_dir, | 1531 | .read = generic_read_dir, |
1532 | .readdir = jfs_readdir, | 1532 | .iterate = jfs_readdir, |
1533 | .fsync = jfs_fsync, | 1533 | .fsync = jfs_fsync, |
1534 | .unlocked_ioctl = jfs_ioctl, | 1534 | .unlocked_ioctl = jfs_ioctl, |
1535 | #ifdef CONFIG_COMPAT | 1535 | #ifdef CONFIG_COMPAT |
@@ -1538,8 +1538,7 @@ const struct file_operations jfs_dir_operations = { | |||
1538 | .llseek = generic_file_llseek, | 1538 | .llseek = generic_file_llseek, |
1539 | }; | 1539 | }; |
1540 | 1540 | ||
1541 | static int jfs_ci_hash(const struct dentry *dir, const struct inode *inode, | 1541 | static int jfs_ci_hash(const struct dentry *dir, struct qstr *this) |
1542 | struct qstr *this) | ||
1543 | { | 1542 | { |
1544 | unsigned long hash; | 1543 | unsigned long hash; |
1545 | int i; | 1544 | int i; |
@@ -1552,9 +1551,7 @@ static int jfs_ci_hash(const struct dentry *dir, const struct inode *inode, | |||
1552 | return 0; | 1551 | return 0; |
1553 | } | 1552 | } |
1554 | 1553 | ||
1555 | static int jfs_ci_compare(const struct dentry *parent, | 1554 | static int jfs_ci_compare(const struct dentry *parent, const struct dentry *dentry, |
1556 | const struct inode *pinode, | ||
1557 | const struct dentry *dentry, const struct inode *inode, | ||
1558 | unsigned int len, const char *str, const struct qstr *name) | 1555 | unsigned int len, const char *str, const struct qstr *name) |
1559 | { | 1556 | { |
1560 | int i, result = 1; | 1557 | int i, result = 1; |
diff --git a/fs/jfs/resize.c b/fs/jfs/resize.c index 8d0c1c7c0820..90b3bc21e9b0 100644 --- a/fs/jfs/resize.c +++ b/fs/jfs/resize.c | |||
@@ -530,7 +530,7 @@ int jfs_extendfs(struct super_block *sb, s64 newLVSize, int newLogSize) | |||
530 | goto resume; | 530 | goto resume; |
531 | 531 | ||
532 | error_out: | 532 | error_out: |
533 | jfs_error(sb, "jfs_extendfs"); | 533 | jfs_error(sb, "\n"); |
534 | 534 | ||
535 | resume: | 535 | resume: |
536 | /* | 536 | /* |
diff --git a/fs/jfs/super.c b/fs/jfs/super.c index 788e0a9c1fb0..6669aa2042c3 100644 --- a/fs/jfs/super.c +++ b/fs/jfs/super.c | |||
@@ -92,16 +92,20 @@ static void jfs_handle_error(struct super_block *sb) | |||
92 | /* nothing is done for continue beyond marking the superblock dirty */ | 92 | /* nothing is done for continue beyond marking the superblock dirty */ |
93 | } | 93 | } |
94 | 94 | ||
95 | void jfs_error(struct super_block *sb, const char * function, ...) | 95 | void jfs_error(struct super_block *sb, const char *fmt, ...) |
96 | { | 96 | { |
97 | static char error_buf[256]; | 97 | struct va_format vaf; |
98 | va_list args; | 98 | va_list args; |
99 | 99 | ||
100 | va_start(args, function); | 100 | va_start(args, fmt); |
101 | vsnprintf(error_buf, sizeof(error_buf), function, args); | 101 | |
102 | va_end(args); | 102 | vaf.fmt = fmt; |
103 | vaf.va = &args; | ||
103 | 104 | ||
104 | pr_err("ERROR: (device %s): %s\n", sb->s_id, error_buf); | 105 | pr_err("ERROR: (device %s): %pf: %pV\n", |
106 | sb->s_id, __builtin_return_address(0), &vaf); | ||
107 | |||
108 | va_end(args); | ||
105 | 109 | ||
106 | jfs_handle_error(sb); | 110 | jfs_handle_error(sb); |
107 | } | 111 | } |
@@ -617,7 +621,7 @@ static int jfs_freeze(struct super_block *sb) | |||
617 | txQuiesce(sb); | 621 | txQuiesce(sb); |
618 | rc = lmLogShutdown(log); | 622 | rc = lmLogShutdown(log); |
619 | if (rc) { | 623 | if (rc) { |
620 | jfs_error(sb, "jfs_freeze: lmLogShutdown failed"); | 624 | jfs_error(sb, "lmLogShutdown failed\n"); |
621 | 625 | ||
622 | /* let operations fail rather than hang */ | 626 | /* let operations fail rather than hang */ |
623 | txResume(sb); | 627 | txResume(sb); |
@@ -646,12 +650,12 @@ static int jfs_unfreeze(struct super_block *sb) | |||
646 | if (!(sb->s_flags & MS_RDONLY)) { | 650 | if (!(sb->s_flags & MS_RDONLY)) { |
647 | rc = updateSuper(sb, FM_MOUNT); | 651 | rc = updateSuper(sb, FM_MOUNT); |
648 | if (rc) { | 652 | if (rc) { |
649 | jfs_error(sb, "jfs_unfreeze: updateSuper failed"); | 653 | jfs_error(sb, "updateSuper failed\n"); |
650 | goto out; | 654 | goto out; |
651 | } | 655 | } |
652 | rc = lmLogInit(log); | 656 | rc = lmLogInit(log); |
653 | if (rc) | 657 | if (rc) |
654 | jfs_error(sb, "jfs_unfreeze: lmLogInit failed"); | 658 | jfs_error(sb, "lmLogInit failed\n"); |
655 | out: | 659 | out: |
656 | txResume(sb); | 660 | txResume(sb); |
657 | } | 661 | } |
diff --git a/fs/jfs/xattr.c b/fs/jfs/xattr.c index 42d67f9757bf..d3472f4cd530 100644 --- a/fs/jfs/xattr.c +++ b/fs/jfs/xattr.c | |||
@@ -382,7 +382,7 @@ static int ea_read(struct inode *ip, struct jfs_ea_list *ealist) | |||
382 | 382 | ||
383 | nbytes = sizeDXD(&ji->ea); | 383 | nbytes = sizeDXD(&ji->ea); |
384 | if (!nbytes) { | 384 | if (!nbytes) { |
385 | jfs_error(sb, "ea_read: nbytes is 0"); | 385 | jfs_error(sb, "nbytes is 0\n"); |
386 | return -EIO; | 386 | return -EIO; |
387 | } | 387 | } |
388 | 388 | ||
@@ -482,7 +482,7 @@ static int ea_get(struct inode *inode, struct ea_buffer *ea_buf, int min_size) | |||
482 | current_blocks = 0; | 482 | current_blocks = 0; |
483 | } else { | 483 | } else { |
484 | if (!(ji->ea.flag & DXD_EXTENT)) { | 484 | if (!(ji->ea.flag & DXD_EXTENT)) { |
485 | jfs_error(sb, "ea_get: invalid ea.flag)"); | 485 | jfs_error(sb, "invalid ea.flag\n"); |
486 | return -EIO; | 486 | return -EIO; |
487 | } | 487 | } |
488 | current_blocks = (ea_size + sb->s_blocksize - 1) >> | 488 | current_blocks = (ea_size + sb->s_blocksize - 1) >> |
@@ -1089,8 +1089,8 @@ int jfs_removexattr(struct dentry *dentry, const char *name) | |||
1089 | } | 1089 | } |
1090 | 1090 | ||
1091 | #ifdef CONFIG_JFS_SECURITY | 1091 | #ifdef CONFIG_JFS_SECURITY |
1092 | int jfs_initxattrs(struct inode *inode, const struct xattr *xattr_array, | 1092 | static int jfs_initxattrs(struct inode *inode, const struct xattr *xattr_array, |
1093 | void *fs_info) | 1093 | void *fs_info) |
1094 | { | 1094 | { |
1095 | const struct xattr *xattr; | 1095 | const struct xattr *xattr; |
1096 | tid_t *tid = fs_info; | 1096 | tid_t *tid = fs_info; |
diff --git a/fs/libfs.c b/fs/libfs.c index 916da8c4158b..3a3a9b53bf5a 100644 --- a/fs/libfs.c +++ b/fs/libfs.c | |||
@@ -61,7 +61,8 @@ struct dentry *simple_lookup(struct inode *dir, struct dentry *dentry, unsigned | |||
61 | 61 | ||
62 | if (dentry->d_name.len > NAME_MAX) | 62 | if (dentry->d_name.len > NAME_MAX) |
63 | return ERR_PTR(-ENAMETOOLONG); | 63 | return ERR_PTR(-ENAMETOOLONG); |
64 | d_set_d_op(dentry, &simple_dentry_operations); | 64 | if (!dentry->d_sb->s_d_op) |
65 | d_set_d_op(dentry, &simple_dentry_operations); | ||
65 | d_add(dentry, NULL); | 66 | d_add(dentry, NULL); |
66 | return NULL; | 67 | return NULL; |
67 | } | 68 | } |
@@ -135,60 +136,40 @@ static inline unsigned char dt_type(struct inode *inode) | |||
135 | * both impossible due to the lock on directory. | 136 | * both impossible due to the lock on directory. |
136 | */ | 137 | */ |
137 | 138 | ||
138 | int dcache_readdir(struct file * filp, void * dirent, filldir_t filldir) | 139 | int dcache_readdir(struct file *file, struct dir_context *ctx) |
139 | { | 140 | { |
140 | struct dentry *dentry = filp->f_path.dentry; | 141 | struct dentry *dentry = file->f_path.dentry; |
141 | struct dentry *cursor = filp->private_data; | 142 | struct dentry *cursor = file->private_data; |
142 | struct list_head *p, *q = &cursor->d_u.d_child; | 143 | struct list_head *p, *q = &cursor->d_u.d_child; |
143 | ino_t ino; | ||
144 | int i = filp->f_pos; | ||
145 | 144 | ||
146 | switch (i) { | 145 | if (!dir_emit_dots(file, ctx)) |
147 | case 0: | 146 | return 0; |
148 | ino = dentry->d_inode->i_ino; | 147 | spin_lock(&dentry->d_lock); |
149 | if (filldir(dirent, ".", 1, i, ino, DT_DIR) < 0) | 148 | if (ctx->pos == 2) |
150 | break; | 149 | list_move(q, &dentry->d_subdirs); |
151 | filp->f_pos++; | 150 | |
152 | i++; | 151 | for (p = q->next; p != &dentry->d_subdirs; p = p->next) { |
153 | /* fallthrough */ | 152 | struct dentry *next = list_entry(p, struct dentry, d_u.d_child); |
154 | case 1: | 153 | spin_lock_nested(&next->d_lock, DENTRY_D_LOCK_NESTED); |
155 | ino = parent_ino(dentry); | 154 | if (!simple_positive(next)) { |
156 | if (filldir(dirent, "..", 2, i, ino, DT_DIR) < 0) | 155 | spin_unlock(&next->d_lock); |
157 | break; | 156 | continue; |
158 | filp->f_pos++; | 157 | } |
159 | i++; | ||
160 | /* fallthrough */ | ||
161 | default: | ||
162 | spin_lock(&dentry->d_lock); | ||
163 | if (filp->f_pos == 2) | ||
164 | list_move(q, &dentry->d_subdirs); | ||
165 | |||
166 | for (p=q->next; p != &dentry->d_subdirs; p=p->next) { | ||
167 | struct dentry *next; | ||
168 | next = list_entry(p, struct dentry, d_u.d_child); | ||
169 | spin_lock_nested(&next->d_lock, DENTRY_D_LOCK_NESTED); | ||
170 | if (!simple_positive(next)) { | ||
171 | spin_unlock(&next->d_lock); | ||
172 | continue; | ||
173 | } | ||
174 | 158 | ||
175 | spin_unlock(&next->d_lock); | 159 | spin_unlock(&next->d_lock); |
176 | spin_unlock(&dentry->d_lock); | 160 | spin_unlock(&dentry->d_lock); |
177 | if (filldir(dirent, next->d_name.name, | 161 | if (!dir_emit(ctx, next->d_name.name, next->d_name.len, |
178 | next->d_name.len, filp->f_pos, | 162 | next->d_inode->i_ino, dt_type(next->d_inode))) |
179 | next->d_inode->i_ino, | 163 | return 0; |
180 | dt_type(next->d_inode)) < 0) | 164 | spin_lock(&dentry->d_lock); |
181 | return 0; | 165 | spin_lock_nested(&next->d_lock, DENTRY_D_LOCK_NESTED); |
182 | spin_lock(&dentry->d_lock); | 166 | /* next is still alive */ |
183 | spin_lock_nested(&next->d_lock, DENTRY_D_LOCK_NESTED); | 167 | list_move(q, p); |
184 | /* next is still alive */ | 168 | spin_unlock(&next->d_lock); |
185 | list_move(q, p); | 169 | p = q; |
186 | spin_unlock(&next->d_lock); | 170 | ctx->pos++; |
187 | p = q; | ||
188 | filp->f_pos++; | ||
189 | } | ||
190 | spin_unlock(&dentry->d_lock); | ||
191 | } | 171 | } |
172 | spin_unlock(&dentry->d_lock); | ||
192 | return 0; | 173 | return 0; |
193 | } | 174 | } |
194 | 175 | ||
@@ -202,7 +183,7 @@ const struct file_operations simple_dir_operations = { | |||
202 | .release = dcache_dir_close, | 183 | .release = dcache_dir_close, |
203 | .llseek = dcache_dir_lseek, | 184 | .llseek = dcache_dir_lseek, |
204 | .read = generic_read_dir, | 185 | .read = generic_read_dir, |
205 | .readdir = dcache_readdir, | 186 | .iterate = dcache_readdir, |
206 | .fsync = noop_fsync, | 187 | .fsync = noop_fsync, |
207 | }; | 188 | }; |
208 | 189 | ||
diff --git a/fs/lockd/clntlock.c b/fs/lockd/clntlock.c index 01bfe7662751..41e491b8e5d7 100644 --- a/fs/lockd/clntlock.c +++ b/fs/lockd/clntlock.c | |||
@@ -64,12 +64,17 @@ struct nlm_host *nlmclnt_init(const struct nlmclnt_initdata *nlm_init) | |||
64 | nlm_init->protocol, nlm_version, | 64 | nlm_init->protocol, nlm_version, |
65 | nlm_init->hostname, nlm_init->noresvport, | 65 | nlm_init->hostname, nlm_init->noresvport, |
66 | nlm_init->net); | 66 | nlm_init->net); |
67 | if (host == NULL) { | 67 | if (host == NULL) |
68 | lockd_down(nlm_init->net); | 68 | goto out_nohost; |
69 | return ERR_PTR(-ENOLCK); | 69 | if (host->h_rpcclnt == NULL && nlm_bind_host(host) == NULL) |
70 | } | 70 | goto out_nobind; |
71 | 71 | ||
72 | return host; | 72 | return host; |
73 | out_nobind: | ||
74 | nlmclnt_release_host(host); | ||
75 | out_nohost: | ||
76 | lockd_down(nlm_init->net); | ||
77 | return ERR_PTR(-ENOLCK); | ||
73 | } | 78 | } |
74 | EXPORT_SYMBOL_GPL(nlmclnt_init); | 79 | EXPORT_SYMBOL_GPL(nlmclnt_init); |
75 | 80 | ||
diff --git a/fs/lockd/clntproc.c b/fs/lockd/clntproc.c index 9760ecb9b60f..acd394716349 100644 --- a/fs/lockd/clntproc.c +++ b/fs/lockd/clntproc.c | |||
@@ -125,14 +125,15 @@ static void nlmclnt_setlockargs(struct nlm_rqst *req, struct file_lock *fl) | |||
125 | { | 125 | { |
126 | struct nlm_args *argp = &req->a_args; | 126 | struct nlm_args *argp = &req->a_args; |
127 | struct nlm_lock *lock = &argp->lock; | 127 | struct nlm_lock *lock = &argp->lock; |
128 | char *nodename = req->a_host->h_rpcclnt->cl_nodename; | ||
128 | 129 | ||
129 | nlmclnt_next_cookie(&argp->cookie); | 130 | nlmclnt_next_cookie(&argp->cookie); |
130 | memcpy(&lock->fh, NFS_FH(file_inode(fl->fl_file)), sizeof(struct nfs_fh)); | 131 | memcpy(&lock->fh, NFS_FH(file_inode(fl->fl_file)), sizeof(struct nfs_fh)); |
131 | lock->caller = utsname()->nodename; | 132 | lock->caller = nodename; |
132 | lock->oh.data = req->a_owner; | 133 | lock->oh.data = req->a_owner; |
133 | lock->oh.len = snprintf(req->a_owner, sizeof(req->a_owner), "%u@%s", | 134 | lock->oh.len = snprintf(req->a_owner, sizeof(req->a_owner), "%u@%s", |
134 | (unsigned int)fl->fl_u.nfs_fl.owner->pid, | 135 | (unsigned int)fl->fl_u.nfs_fl.owner->pid, |
135 | utsname()->nodename); | 136 | nodename); |
136 | lock->svid = fl->fl_u.nfs_fl.owner->pid; | 137 | lock->svid = fl->fl_u.nfs_fl.owner->pid; |
137 | lock->fl.fl_start = fl->fl_start; | 138 | lock->fl.fl_start = fl->fl_start; |
138 | lock->fl.fl_end = fl->fl_end; | 139 | lock->fl.fl_end = fl->fl_end; |
diff --git a/fs/lockd/svc.c b/fs/lockd/svc.c index a2aa97d45670..10d6c41aecad 100644 --- a/fs/lockd/svc.c +++ b/fs/lockd/svc.c | |||
@@ -305,7 +305,7 @@ static int lockd_start_svc(struct svc_serv *serv) | |||
305 | svc_sock_update_bufs(serv); | 305 | svc_sock_update_bufs(serv); |
306 | serv->sv_maxconn = nlm_max_connections; | 306 | serv->sv_maxconn = nlm_max_connections; |
307 | 307 | ||
308 | nlmsvc_task = kthread_run(lockd, nlmsvc_rqst, serv->sv_name); | 308 | nlmsvc_task = kthread_run(lockd, nlmsvc_rqst, "%s", serv->sv_name); |
309 | if (IS_ERR(nlmsvc_task)) { | 309 | if (IS_ERR(nlmsvc_task)) { |
310 | error = PTR_ERR(nlmsvc_task); | 310 | error = PTR_ERR(nlmsvc_task); |
311 | printk(KERN_WARNING | 311 | printk(KERN_WARNING |
diff --git a/fs/lockd/svclock.c b/fs/lockd/svclock.c index e703318c41df..e066a3902973 100644 --- a/fs/lockd/svclock.c +++ b/fs/lockd/svclock.c | |||
@@ -276,7 +276,7 @@ static int nlmsvc_unlink_block(struct nlm_block *block) | |||
276 | dprintk("lockd: unlinking block %p...\n", block); | 276 | dprintk("lockd: unlinking block %p...\n", block); |
277 | 277 | ||
278 | /* Remove block from list */ | 278 | /* Remove block from list */ |
279 | status = posix_unblock_lock(block->b_file->f_file, &block->b_call->a_args.lock.fl); | 279 | status = posix_unblock_lock(&block->b_call->a_args.lock.fl); |
280 | nlmsvc_remove_block(block); | 280 | nlmsvc_remove_block(block); |
281 | return status; | 281 | return status; |
282 | } | 282 | } |
@@ -744,8 +744,20 @@ static int nlmsvc_same_owner(struct file_lock *fl1, struct file_lock *fl2) | |||
744 | return fl1->fl_owner == fl2->fl_owner && fl1->fl_pid == fl2->fl_pid; | 744 | return fl1->fl_owner == fl2->fl_owner && fl1->fl_pid == fl2->fl_pid; |
745 | } | 745 | } |
746 | 746 | ||
747 | /* | ||
748 | * Since NLM uses two "keys" for tracking locks, we need to hash them down | ||
749 | * to one for the blocked_hash. Here, we're just xor'ing the host address | ||
750 | * with the pid in order to create a key value for picking a hash bucket. | ||
751 | */ | ||
752 | static unsigned long | ||
753 | nlmsvc_owner_key(struct file_lock *fl) | ||
754 | { | ||
755 | return (unsigned long)fl->fl_owner ^ (unsigned long)fl->fl_pid; | ||
756 | } | ||
757 | |||
747 | const struct lock_manager_operations nlmsvc_lock_operations = { | 758 | const struct lock_manager_operations nlmsvc_lock_operations = { |
748 | .lm_compare_owner = nlmsvc_same_owner, | 759 | .lm_compare_owner = nlmsvc_same_owner, |
760 | .lm_owner_key = nlmsvc_owner_key, | ||
749 | .lm_notify = nlmsvc_notify_blocked, | 761 | .lm_notify = nlmsvc_notify_blocked, |
750 | .lm_grant = nlmsvc_grant_deferred, | 762 | .lm_grant = nlmsvc_grant_deferred, |
751 | }; | 763 | }; |
@@ -939,6 +951,7 @@ nlmsvc_retry_blocked(void) | |||
939 | unsigned long timeout = MAX_SCHEDULE_TIMEOUT; | 951 | unsigned long timeout = MAX_SCHEDULE_TIMEOUT; |
940 | struct nlm_block *block; | 952 | struct nlm_block *block; |
941 | 953 | ||
954 | spin_lock(&nlm_blocked_lock); | ||
942 | while (!list_empty(&nlm_blocked) && !kthread_should_stop()) { | 955 | while (!list_empty(&nlm_blocked) && !kthread_should_stop()) { |
943 | block = list_entry(nlm_blocked.next, struct nlm_block, b_list); | 956 | block = list_entry(nlm_blocked.next, struct nlm_block, b_list); |
944 | 957 | ||
@@ -948,6 +961,7 @@ nlmsvc_retry_blocked(void) | |||
948 | timeout = block->b_when - jiffies; | 961 | timeout = block->b_when - jiffies; |
949 | break; | 962 | break; |
950 | } | 963 | } |
964 | spin_unlock(&nlm_blocked_lock); | ||
951 | 965 | ||
952 | dprintk("nlmsvc_retry_blocked(%p, when=%ld)\n", | 966 | dprintk("nlmsvc_retry_blocked(%p, when=%ld)\n", |
953 | block, block->b_when); | 967 | block, block->b_when); |
@@ -957,7 +971,9 @@ nlmsvc_retry_blocked(void) | |||
957 | retry_deferred_block(block); | 971 | retry_deferred_block(block); |
958 | } else | 972 | } else |
959 | nlmsvc_grant_blocked(block); | 973 | nlmsvc_grant_blocked(block); |
974 | spin_lock(&nlm_blocked_lock); | ||
960 | } | 975 | } |
976 | spin_unlock(&nlm_blocked_lock); | ||
961 | 977 | ||
962 | return timeout; | 978 | return timeout; |
963 | } | 979 | } |
diff --git a/fs/lockd/svcsubs.c b/fs/lockd/svcsubs.c index 97e87415b145..dc5c75930f0f 100644 --- a/fs/lockd/svcsubs.c +++ b/fs/lockd/svcsubs.c | |||
@@ -169,7 +169,7 @@ nlm_traverse_locks(struct nlm_host *host, struct nlm_file *file, | |||
169 | 169 | ||
170 | again: | 170 | again: |
171 | file->f_locks = 0; | 171 | file->f_locks = 0; |
172 | lock_flocks(); /* protects i_flock list */ | 172 | spin_lock(&inode->i_lock); |
173 | for (fl = inode->i_flock; fl; fl = fl->fl_next) { | 173 | for (fl = inode->i_flock; fl; fl = fl->fl_next) { |
174 | if (fl->fl_lmops != &nlmsvc_lock_operations) | 174 | if (fl->fl_lmops != &nlmsvc_lock_operations) |
175 | continue; | 175 | continue; |
@@ -181,7 +181,7 @@ again: | |||
181 | if (match(lockhost, host)) { | 181 | if (match(lockhost, host)) { |
182 | struct file_lock lock = *fl; | 182 | struct file_lock lock = *fl; |
183 | 183 | ||
184 | unlock_flocks(); | 184 | spin_unlock(&inode->i_lock); |
185 | lock.fl_type = F_UNLCK; | 185 | lock.fl_type = F_UNLCK; |
186 | lock.fl_start = 0; | 186 | lock.fl_start = 0; |
187 | lock.fl_end = OFFSET_MAX; | 187 | lock.fl_end = OFFSET_MAX; |
@@ -193,7 +193,7 @@ again: | |||
193 | goto again; | 193 | goto again; |
194 | } | 194 | } |
195 | } | 195 | } |
196 | unlock_flocks(); | 196 | spin_unlock(&inode->i_lock); |
197 | 197 | ||
198 | return 0; | 198 | return 0; |
199 | } | 199 | } |
@@ -228,14 +228,14 @@ nlm_file_inuse(struct nlm_file *file) | |||
228 | if (file->f_count || !list_empty(&file->f_blocks) || file->f_shares) | 228 | if (file->f_count || !list_empty(&file->f_blocks) || file->f_shares) |
229 | return 1; | 229 | return 1; |
230 | 230 | ||
231 | lock_flocks(); | 231 | spin_lock(&inode->i_lock); |
232 | for (fl = inode->i_flock; fl; fl = fl->fl_next) { | 232 | for (fl = inode->i_flock; fl; fl = fl->fl_next) { |
233 | if (fl->fl_lmops == &nlmsvc_lock_operations) { | 233 | if (fl->fl_lmops == &nlmsvc_lock_operations) { |
234 | unlock_flocks(); | 234 | spin_unlock(&inode->i_lock); |
235 | return 1; | 235 | return 1; |
236 | } | 236 | } |
237 | } | 237 | } |
238 | unlock_flocks(); | 238 | spin_unlock(&inode->i_lock); |
239 | file->f_locks = 0; | 239 | file->f_locks = 0; |
240 | return 0; | 240 | return 0; |
241 | } | 241 | } |
diff --git a/fs/locks.c b/fs/locks.c index cb424a4fed71..b27a3005d78d 100644 --- a/fs/locks.c +++ b/fs/locks.c | |||
@@ -126,6 +126,9 @@ | |||
126 | #include <linux/time.h> | 126 | #include <linux/time.h> |
127 | #include <linux/rcupdate.h> | 127 | #include <linux/rcupdate.h> |
128 | #include <linux/pid_namespace.h> | 128 | #include <linux/pid_namespace.h> |
129 | #include <linux/hashtable.h> | ||
130 | #include <linux/percpu.h> | ||
131 | #include <linux/lglock.h> | ||
129 | 132 | ||
130 | #include <asm/uaccess.h> | 133 | #include <asm/uaccess.h> |
131 | 134 | ||
@@ -153,30 +156,53 @@ int lease_break_time = 45; | |||
153 | #define for_each_lock(inode, lockp) \ | 156 | #define for_each_lock(inode, lockp) \ |
154 | for (lockp = &inode->i_flock; *lockp != NULL; lockp = &(*lockp)->fl_next) | 157 | for (lockp = &inode->i_flock; *lockp != NULL; lockp = &(*lockp)->fl_next) |
155 | 158 | ||
156 | static LIST_HEAD(file_lock_list); | 159 | /* |
157 | static LIST_HEAD(blocked_list); | 160 | * The global file_lock_list is only used for displaying /proc/locks, so we |
158 | static DEFINE_SPINLOCK(file_lock_lock); | 161 | * keep a list on each CPU, with each list protected by its own spinlock via |
162 | * the file_lock_lglock. Note that alterations to the list also require that | ||
163 | * the relevant i_lock is held. | ||
164 | */ | ||
165 | DEFINE_STATIC_LGLOCK(file_lock_lglock); | ||
166 | static DEFINE_PER_CPU(struct hlist_head, file_lock_list); | ||
159 | 167 | ||
160 | /* | 168 | /* |
161 | * Protects the two list heads above, plus the inode->i_flock list | 169 | * The blocked_hash is used to find POSIX lock loops for deadlock detection. |
170 | * It is protected by blocked_lock_lock. | ||
171 | * | ||
172 | * We hash locks by lockowner in order to optimize searching for the lock a | ||
173 | * particular lockowner is waiting on. | ||
174 | * | ||
175 | * FIXME: make this value scale via some heuristic? We generally will want more | ||
176 | * buckets when we have more lockowners holding locks, but that's a little | ||
177 | * difficult to determine without knowing what the workload will look like. | ||
162 | */ | 178 | */ |
163 | void lock_flocks(void) | 179 | #define BLOCKED_HASH_BITS 7 |
164 | { | 180 | static DEFINE_HASHTABLE(blocked_hash, BLOCKED_HASH_BITS); |
165 | spin_lock(&file_lock_lock); | ||
166 | } | ||
167 | EXPORT_SYMBOL_GPL(lock_flocks); | ||
168 | 181 | ||
169 | void unlock_flocks(void) | 182 | /* |
170 | { | 183 | * This lock protects the blocked_hash. Generally, if you're accessing it, you |
171 | spin_unlock(&file_lock_lock); | 184 | * want to be holding this lock. |
172 | } | 185 | * |
173 | EXPORT_SYMBOL_GPL(unlock_flocks); | 186 | * In addition, it also protects the fl->fl_block list, and the fl->fl_next |
187 | * pointer for file_lock structures that are acting as lock requests (in | ||
188 | * contrast to those that are acting as records of acquired locks). | ||
189 | * | ||
190 | * Note that when we acquire this lock in order to change the above fields, | ||
191 | * we often hold the i_lock as well. In certain cases, when reading the fields | ||
192 | * protected by this lock, we can skip acquiring it iff we already hold the | ||
193 | * i_lock. | ||
194 | * | ||
195 | * In particular, adding an entry to the fl_block list requires that you hold | ||
196 | * both the i_lock and the blocked_lock_lock (acquired in that order). Deleting | ||
197 | * an entry from the list however only requires the file_lock_lock. | ||
198 | */ | ||
199 | static DEFINE_SPINLOCK(blocked_lock_lock); | ||
174 | 200 | ||
175 | static struct kmem_cache *filelock_cache __read_mostly; | 201 | static struct kmem_cache *filelock_cache __read_mostly; |
176 | 202 | ||
177 | static void locks_init_lock_heads(struct file_lock *fl) | 203 | static void locks_init_lock_heads(struct file_lock *fl) |
178 | { | 204 | { |
179 | INIT_LIST_HEAD(&fl->fl_link); | 205 | INIT_HLIST_NODE(&fl->fl_link); |
180 | INIT_LIST_HEAD(&fl->fl_block); | 206 | INIT_LIST_HEAD(&fl->fl_block); |
181 | init_waitqueue_head(&fl->fl_wait); | 207 | init_waitqueue_head(&fl->fl_wait); |
182 | } | 208 | } |
@@ -210,7 +236,7 @@ void locks_free_lock(struct file_lock *fl) | |||
210 | { | 236 | { |
211 | BUG_ON(waitqueue_active(&fl->fl_wait)); | 237 | BUG_ON(waitqueue_active(&fl->fl_wait)); |
212 | BUG_ON(!list_empty(&fl->fl_block)); | 238 | BUG_ON(!list_empty(&fl->fl_block)); |
213 | BUG_ON(!list_empty(&fl->fl_link)); | 239 | BUG_ON(!hlist_unhashed(&fl->fl_link)); |
214 | 240 | ||
215 | locks_release_private(fl); | 241 | locks_release_private(fl); |
216 | kmem_cache_free(filelock_cache, fl); | 242 | kmem_cache_free(filelock_cache, fl); |
@@ -484,47 +510,118 @@ static int posix_same_owner(struct file_lock *fl1, struct file_lock *fl2) | |||
484 | return fl1->fl_owner == fl2->fl_owner; | 510 | return fl1->fl_owner == fl2->fl_owner; |
485 | } | 511 | } |
486 | 512 | ||
513 | /* Must be called with the i_lock held! */ | ||
514 | static inline void | ||
515 | locks_insert_global_locks(struct file_lock *fl) | ||
516 | { | ||
517 | lg_local_lock(&file_lock_lglock); | ||
518 | fl->fl_link_cpu = smp_processor_id(); | ||
519 | hlist_add_head(&fl->fl_link, this_cpu_ptr(&file_lock_list)); | ||
520 | lg_local_unlock(&file_lock_lglock); | ||
521 | } | ||
522 | |||
523 | /* Must be called with the i_lock held! */ | ||
524 | static inline void | ||
525 | locks_delete_global_locks(struct file_lock *fl) | ||
526 | { | ||
527 | /* | ||
528 | * Avoid taking lock if already unhashed. This is safe since this check | ||
529 | * is done while holding the i_lock, and new insertions into the list | ||
530 | * also require that it be held. | ||
531 | */ | ||
532 | if (hlist_unhashed(&fl->fl_link)) | ||
533 | return; | ||
534 | lg_local_lock_cpu(&file_lock_lglock, fl->fl_link_cpu); | ||
535 | hlist_del_init(&fl->fl_link); | ||
536 | lg_local_unlock_cpu(&file_lock_lglock, fl->fl_link_cpu); | ||
537 | } | ||
538 | |||
539 | static unsigned long | ||
540 | posix_owner_key(struct file_lock *fl) | ||
541 | { | ||
542 | if (fl->fl_lmops && fl->fl_lmops->lm_owner_key) | ||
543 | return fl->fl_lmops->lm_owner_key(fl); | ||
544 | return (unsigned long)fl->fl_owner; | ||
545 | } | ||
546 | |||
547 | static inline void | ||
548 | locks_insert_global_blocked(struct file_lock *waiter) | ||
549 | { | ||
550 | hash_add(blocked_hash, &waiter->fl_link, posix_owner_key(waiter)); | ||
551 | } | ||
552 | |||
553 | static inline void | ||
554 | locks_delete_global_blocked(struct file_lock *waiter) | ||
555 | { | ||
556 | hash_del(&waiter->fl_link); | ||
557 | } | ||
558 | |||
487 | /* Remove waiter from blocker's block list. | 559 | /* Remove waiter from blocker's block list. |
488 | * When blocker ends up pointing to itself then the list is empty. | 560 | * When blocker ends up pointing to itself then the list is empty. |
561 | * | ||
562 | * Must be called with blocked_lock_lock held. | ||
489 | */ | 563 | */ |
490 | static void __locks_delete_block(struct file_lock *waiter) | 564 | static void __locks_delete_block(struct file_lock *waiter) |
491 | { | 565 | { |
566 | locks_delete_global_blocked(waiter); | ||
492 | list_del_init(&waiter->fl_block); | 567 | list_del_init(&waiter->fl_block); |
493 | list_del_init(&waiter->fl_link); | ||
494 | waiter->fl_next = NULL; | 568 | waiter->fl_next = NULL; |
495 | } | 569 | } |
496 | 570 | ||
497 | /* | 571 | static void locks_delete_block(struct file_lock *waiter) |
498 | */ | ||
499 | void locks_delete_block(struct file_lock *waiter) | ||
500 | { | 572 | { |
501 | lock_flocks(); | 573 | spin_lock(&blocked_lock_lock); |
502 | __locks_delete_block(waiter); | 574 | __locks_delete_block(waiter); |
503 | unlock_flocks(); | 575 | spin_unlock(&blocked_lock_lock); |
504 | } | 576 | } |
505 | EXPORT_SYMBOL(locks_delete_block); | ||
506 | 577 | ||
507 | /* Insert waiter into blocker's block list. | 578 | /* Insert waiter into blocker's block list. |
508 | * We use a circular list so that processes can be easily woken up in | 579 | * We use a circular list so that processes can be easily woken up in |
509 | * the order they blocked. The documentation doesn't require this but | 580 | * the order they blocked. The documentation doesn't require this but |
510 | * it seems like the reasonable thing to do. | 581 | * it seems like the reasonable thing to do. |
582 | * | ||
583 | * Must be called with both the i_lock and blocked_lock_lock held. The fl_block | ||
584 | * list itself is protected by the file_lock_list, but by ensuring that the | ||
585 | * i_lock is also held on insertions we can avoid taking the blocked_lock_lock | ||
586 | * in some cases when we see that the fl_block list is empty. | ||
511 | */ | 587 | */ |
512 | static void locks_insert_block(struct file_lock *blocker, | 588 | static void __locks_insert_block(struct file_lock *blocker, |
513 | struct file_lock *waiter) | 589 | struct file_lock *waiter) |
514 | { | 590 | { |
515 | BUG_ON(!list_empty(&waiter->fl_block)); | 591 | BUG_ON(!list_empty(&waiter->fl_block)); |
516 | list_add_tail(&waiter->fl_block, &blocker->fl_block); | ||
517 | waiter->fl_next = blocker; | 592 | waiter->fl_next = blocker; |
593 | list_add_tail(&waiter->fl_block, &blocker->fl_block); | ||
518 | if (IS_POSIX(blocker)) | 594 | if (IS_POSIX(blocker)) |
519 | list_add(&waiter->fl_link, &blocked_list); | 595 | locks_insert_global_blocked(waiter); |
520 | } | 596 | } |
521 | 597 | ||
522 | /* Wake up processes blocked waiting for blocker. | 598 | /* Must be called with i_lock held. */ |
523 | * If told to wait then schedule the processes until the block list | 599 | static void locks_insert_block(struct file_lock *blocker, |
524 | * is empty, otherwise empty the block list ourselves. | 600 | struct file_lock *waiter) |
601 | { | ||
602 | spin_lock(&blocked_lock_lock); | ||
603 | __locks_insert_block(blocker, waiter); | ||
604 | spin_unlock(&blocked_lock_lock); | ||
605 | } | ||
606 | |||
607 | /* | ||
608 | * Wake up processes blocked waiting for blocker. | ||
609 | * | ||
610 | * Must be called with the inode->i_lock held! | ||
525 | */ | 611 | */ |
526 | static void locks_wake_up_blocks(struct file_lock *blocker) | 612 | static void locks_wake_up_blocks(struct file_lock *blocker) |
527 | { | 613 | { |
614 | /* | ||
615 | * Avoid taking global lock if list is empty. This is safe since new | ||
616 | * blocked requests are only added to the list under the i_lock, and | ||
617 | * the i_lock is always held here. Note that removal from the fl_block | ||
618 | * list does not require the i_lock, so we must recheck list_empty() | ||
619 | * after acquiring the blocked_lock_lock. | ||
620 | */ | ||
621 | if (list_empty(&blocker->fl_block)) | ||
622 | return; | ||
623 | |||
624 | spin_lock(&blocked_lock_lock); | ||
528 | while (!list_empty(&blocker->fl_block)) { | 625 | while (!list_empty(&blocker->fl_block)) { |
529 | struct file_lock *waiter; | 626 | struct file_lock *waiter; |
530 | 627 | ||
@@ -536,20 +633,23 @@ static void locks_wake_up_blocks(struct file_lock *blocker) | |||
536 | else | 633 | else |
537 | wake_up(&waiter->fl_wait); | 634 | wake_up(&waiter->fl_wait); |
538 | } | 635 | } |
636 | spin_unlock(&blocked_lock_lock); | ||
539 | } | 637 | } |
540 | 638 | ||
541 | /* Insert file lock fl into an inode's lock list at the position indicated | 639 | /* Insert file lock fl into an inode's lock list at the position indicated |
542 | * by pos. At the same time add the lock to the global file lock list. | 640 | * by pos. At the same time add the lock to the global file lock list. |
641 | * | ||
642 | * Must be called with the i_lock held! | ||
543 | */ | 643 | */ |
544 | static void locks_insert_lock(struct file_lock **pos, struct file_lock *fl) | 644 | static void locks_insert_lock(struct file_lock **pos, struct file_lock *fl) |
545 | { | 645 | { |
546 | list_add(&fl->fl_link, &file_lock_list); | ||
547 | |||
548 | fl->fl_nspid = get_pid(task_tgid(current)); | 646 | fl->fl_nspid = get_pid(task_tgid(current)); |
549 | 647 | ||
550 | /* insert into file's list */ | 648 | /* insert into file's list */ |
551 | fl->fl_next = *pos; | 649 | fl->fl_next = *pos; |
552 | *pos = fl; | 650 | *pos = fl; |
651 | |||
652 | locks_insert_global_locks(fl); | ||
553 | } | 653 | } |
554 | 654 | ||
555 | /* | 655 | /* |
@@ -557,14 +657,17 @@ static void locks_insert_lock(struct file_lock **pos, struct file_lock *fl) | |||
557 | * Wake up processes that are blocked waiting for this lock, | 657 | * Wake up processes that are blocked waiting for this lock, |
558 | * notify the FS that the lock has been cleared and | 658 | * notify the FS that the lock has been cleared and |
559 | * finally free the lock. | 659 | * finally free the lock. |
660 | * | ||
661 | * Must be called with the i_lock held! | ||
560 | */ | 662 | */ |
561 | static void locks_delete_lock(struct file_lock **thisfl_p) | 663 | static void locks_delete_lock(struct file_lock **thisfl_p) |
562 | { | 664 | { |
563 | struct file_lock *fl = *thisfl_p; | 665 | struct file_lock *fl = *thisfl_p; |
564 | 666 | ||
667 | locks_delete_global_locks(fl); | ||
668 | |||
565 | *thisfl_p = fl->fl_next; | 669 | *thisfl_p = fl->fl_next; |
566 | fl->fl_next = NULL; | 670 | fl->fl_next = NULL; |
567 | list_del_init(&fl->fl_link); | ||
568 | 671 | ||
569 | if (fl->fl_nspid) { | 672 | if (fl->fl_nspid) { |
570 | put_pid(fl->fl_nspid); | 673 | put_pid(fl->fl_nspid); |
@@ -625,8 +728,9 @@ void | |||
625 | posix_test_lock(struct file *filp, struct file_lock *fl) | 728 | posix_test_lock(struct file *filp, struct file_lock *fl) |
626 | { | 729 | { |
627 | struct file_lock *cfl; | 730 | struct file_lock *cfl; |
731 | struct inode *inode = file_inode(filp); | ||
628 | 732 | ||
629 | lock_flocks(); | 733 | spin_lock(&inode->i_lock); |
630 | for (cfl = file_inode(filp)->i_flock; cfl; cfl = cfl->fl_next) { | 734 | for (cfl = file_inode(filp)->i_flock; cfl; cfl = cfl->fl_next) { |
631 | if (!IS_POSIX(cfl)) | 735 | if (!IS_POSIX(cfl)) |
632 | continue; | 736 | continue; |
@@ -639,7 +743,7 @@ posix_test_lock(struct file *filp, struct file_lock *fl) | |||
639 | fl->fl_pid = pid_vnr(cfl->fl_nspid); | 743 | fl->fl_pid = pid_vnr(cfl->fl_nspid); |
640 | } else | 744 | } else |
641 | fl->fl_type = F_UNLCK; | 745 | fl->fl_type = F_UNLCK; |
642 | unlock_flocks(); | 746 | spin_unlock(&inode->i_lock); |
643 | return; | 747 | return; |
644 | } | 748 | } |
645 | EXPORT_SYMBOL(posix_test_lock); | 749 | EXPORT_SYMBOL(posix_test_lock); |
@@ -676,13 +780,14 @@ static struct file_lock *what_owner_is_waiting_for(struct file_lock *block_fl) | |||
676 | { | 780 | { |
677 | struct file_lock *fl; | 781 | struct file_lock *fl; |
678 | 782 | ||
679 | list_for_each_entry(fl, &blocked_list, fl_link) { | 783 | hash_for_each_possible(blocked_hash, fl, fl_link, posix_owner_key(block_fl)) { |
680 | if (posix_same_owner(fl, block_fl)) | 784 | if (posix_same_owner(fl, block_fl)) |
681 | return fl->fl_next; | 785 | return fl->fl_next; |
682 | } | 786 | } |
683 | return NULL; | 787 | return NULL; |
684 | } | 788 | } |
685 | 789 | ||
790 | /* Must be called with the blocked_lock_lock held! */ | ||
686 | static int posix_locks_deadlock(struct file_lock *caller_fl, | 791 | static int posix_locks_deadlock(struct file_lock *caller_fl, |
687 | struct file_lock *block_fl) | 792 | struct file_lock *block_fl) |
688 | { | 793 | { |
@@ -718,7 +823,7 @@ static int flock_lock_file(struct file *filp, struct file_lock *request) | |||
718 | return -ENOMEM; | 823 | return -ENOMEM; |
719 | } | 824 | } |
720 | 825 | ||
721 | lock_flocks(); | 826 | spin_lock(&inode->i_lock); |
722 | if (request->fl_flags & FL_ACCESS) | 827 | if (request->fl_flags & FL_ACCESS) |
723 | goto find_conflict; | 828 | goto find_conflict; |
724 | 829 | ||
@@ -748,9 +853,9 @@ static int flock_lock_file(struct file *filp, struct file_lock *request) | |||
748 | * give it the opportunity to lock the file. | 853 | * give it the opportunity to lock the file. |
749 | */ | 854 | */ |
750 | if (found) { | 855 | if (found) { |
751 | unlock_flocks(); | 856 | spin_unlock(&inode->i_lock); |
752 | cond_resched(); | 857 | cond_resched(); |
753 | lock_flocks(); | 858 | spin_lock(&inode->i_lock); |
754 | } | 859 | } |
755 | 860 | ||
756 | find_conflict: | 861 | find_conflict: |
@@ -777,7 +882,7 @@ find_conflict: | |||
777 | error = 0; | 882 | error = 0; |
778 | 883 | ||
779 | out: | 884 | out: |
780 | unlock_flocks(); | 885 | spin_unlock(&inode->i_lock); |
781 | if (new_fl) | 886 | if (new_fl) |
782 | locks_free_lock(new_fl); | 887 | locks_free_lock(new_fl); |
783 | return error; | 888 | return error; |
@@ -791,7 +896,8 @@ static int __posix_lock_file(struct inode *inode, struct file_lock *request, str | |||
791 | struct file_lock *left = NULL; | 896 | struct file_lock *left = NULL; |
792 | struct file_lock *right = NULL; | 897 | struct file_lock *right = NULL; |
793 | struct file_lock **before; | 898 | struct file_lock **before; |
794 | int error, added = 0; | 899 | int error; |
900 | bool added = false; | ||
795 | 901 | ||
796 | /* | 902 | /* |
797 | * We may need two file_lock structures for this operation, | 903 | * We may need two file_lock structures for this operation, |
@@ -806,7 +912,12 @@ static int __posix_lock_file(struct inode *inode, struct file_lock *request, str | |||
806 | new_fl2 = locks_alloc_lock(); | 912 | new_fl2 = locks_alloc_lock(); |
807 | } | 913 | } |
808 | 914 | ||
809 | lock_flocks(); | 915 | spin_lock(&inode->i_lock); |
916 | /* | ||
917 | * New lock request. Walk all POSIX locks and look for conflicts. If | ||
918 | * there are any, either return error or put the request on the | ||
919 | * blocker's list of waiters and the global blocked_hash. | ||
920 | */ | ||
810 | if (request->fl_type != F_UNLCK) { | 921 | if (request->fl_type != F_UNLCK) { |
811 | for_each_lock(inode, before) { | 922 | for_each_lock(inode, before) { |
812 | fl = *before; | 923 | fl = *before; |
@@ -819,11 +930,17 @@ static int __posix_lock_file(struct inode *inode, struct file_lock *request, str | |||
819 | error = -EAGAIN; | 930 | error = -EAGAIN; |
820 | if (!(request->fl_flags & FL_SLEEP)) | 931 | if (!(request->fl_flags & FL_SLEEP)) |
821 | goto out; | 932 | goto out; |
933 | /* | ||
934 | * Deadlock detection and insertion into the blocked | ||
935 | * locks list must be done while holding the same lock! | ||
936 | */ | ||
822 | error = -EDEADLK; | 937 | error = -EDEADLK; |
823 | if (posix_locks_deadlock(request, fl)) | 938 | spin_lock(&blocked_lock_lock); |
824 | goto out; | 939 | if (likely(!posix_locks_deadlock(request, fl))) { |
825 | error = FILE_LOCK_DEFERRED; | 940 | error = FILE_LOCK_DEFERRED; |
826 | locks_insert_block(fl, request); | 941 | __locks_insert_block(fl, request); |
942 | } | ||
943 | spin_unlock(&blocked_lock_lock); | ||
827 | goto out; | 944 | goto out; |
828 | } | 945 | } |
829 | } | 946 | } |
@@ -845,7 +962,7 @@ static int __posix_lock_file(struct inode *inode, struct file_lock *request, str | |||
845 | before = &fl->fl_next; | 962 | before = &fl->fl_next; |
846 | } | 963 | } |
847 | 964 | ||
848 | /* Process locks with this owner. */ | 965 | /* Process locks with this owner. */ |
849 | while ((fl = *before) && posix_same_owner(request, fl)) { | 966 | while ((fl = *before) && posix_same_owner(request, fl)) { |
850 | /* Detect adjacent or overlapping regions (if same lock type) | 967 | /* Detect adjacent or overlapping regions (if same lock type) |
851 | */ | 968 | */ |
@@ -880,7 +997,7 @@ static int __posix_lock_file(struct inode *inode, struct file_lock *request, str | |||
880 | continue; | 997 | continue; |
881 | } | 998 | } |
882 | request = fl; | 999 | request = fl; |
883 | added = 1; | 1000 | added = true; |
884 | } | 1001 | } |
885 | else { | 1002 | else { |
886 | /* Processing for different lock types is a bit | 1003 | /* Processing for different lock types is a bit |
@@ -891,7 +1008,7 @@ static int __posix_lock_file(struct inode *inode, struct file_lock *request, str | |||
891 | if (fl->fl_start > request->fl_end) | 1008 | if (fl->fl_start > request->fl_end) |
892 | break; | 1009 | break; |
893 | if (request->fl_type == F_UNLCK) | 1010 | if (request->fl_type == F_UNLCK) |
894 | added = 1; | 1011 | added = true; |
895 | if (fl->fl_start < request->fl_start) | 1012 | if (fl->fl_start < request->fl_start) |
896 | left = fl; | 1013 | left = fl; |
897 | /* If the next lock in the list has a higher end | 1014 | /* If the next lock in the list has a higher end |
@@ -921,7 +1038,7 @@ static int __posix_lock_file(struct inode *inode, struct file_lock *request, str | |||
921 | locks_release_private(fl); | 1038 | locks_release_private(fl); |
922 | locks_copy_private(fl, request); | 1039 | locks_copy_private(fl, request); |
923 | request = fl; | 1040 | request = fl; |
924 | added = 1; | 1041 | added = true; |
925 | } | 1042 | } |
926 | } | 1043 | } |
927 | /* Go on to next lock. | 1044 | /* Go on to next lock. |
@@ -931,10 +1048,9 @@ static int __posix_lock_file(struct inode *inode, struct file_lock *request, str | |||
931 | } | 1048 | } |
932 | 1049 | ||
933 | /* | 1050 | /* |
934 | * The above code only modifies existing locks in case of | 1051 | * The above code only modifies existing locks in case of merging or |
935 | * merging or replacing. If new lock(s) need to be inserted | 1052 | * replacing. If new lock(s) need to be inserted all modifications are |
936 | * all modifications are done bellow this, so it's safe yet to | 1053 | * done below this, so it's safe yet to bail out. |
937 | * bail out. | ||
938 | */ | 1054 | */ |
939 | error = -ENOLCK; /* "no luck" */ | 1055 | error = -ENOLCK; /* "no luck" */ |
940 | if (right && left == right && !new_fl2) | 1056 | if (right && left == right && !new_fl2) |
@@ -974,7 +1090,7 @@ static int __posix_lock_file(struct inode *inode, struct file_lock *request, str | |||
974 | locks_wake_up_blocks(left); | 1090 | locks_wake_up_blocks(left); |
975 | } | 1091 | } |
976 | out: | 1092 | out: |
977 | unlock_flocks(); | 1093 | spin_unlock(&inode->i_lock); |
978 | /* | 1094 | /* |
979 | * Free any unused locks. | 1095 | * Free any unused locks. |
980 | */ | 1096 | */ |
@@ -1049,14 +1165,14 @@ int locks_mandatory_locked(struct inode *inode) | |||
1049 | /* | 1165 | /* |
1050 | * Search the lock list for this inode for any POSIX locks. | 1166 | * Search the lock list for this inode for any POSIX locks. |
1051 | */ | 1167 | */ |
1052 | lock_flocks(); | 1168 | spin_lock(&inode->i_lock); |
1053 | for (fl = inode->i_flock; fl != NULL; fl = fl->fl_next) { | 1169 | for (fl = inode->i_flock; fl != NULL; fl = fl->fl_next) { |
1054 | if (!IS_POSIX(fl)) | 1170 | if (!IS_POSIX(fl)) |
1055 | continue; | 1171 | continue; |
1056 | if (fl->fl_owner != owner) | 1172 | if (fl->fl_owner != owner) |
1057 | break; | 1173 | break; |
1058 | } | 1174 | } |
1059 | unlock_flocks(); | 1175 | spin_unlock(&inode->i_lock); |
1060 | return fl ? -EAGAIN : 0; | 1176 | return fl ? -EAGAIN : 0; |
1061 | } | 1177 | } |
1062 | 1178 | ||
@@ -1199,7 +1315,7 @@ int __break_lease(struct inode *inode, unsigned int mode) | |||
1199 | if (IS_ERR(new_fl)) | 1315 | if (IS_ERR(new_fl)) |
1200 | return PTR_ERR(new_fl); | 1316 | return PTR_ERR(new_fl); |
1201 | 1317 | ||
1202 | lock_flocks(); | 1318 | spin_lock(&inode->i_lock); |
1203 | 1319 | ||
1204 | time_out_leases(inode); | 1320 | time_out_leases(inode); |
1205 | 1321 | ||
@@ -1249,11 +1365,11 @@ restart: | |||
1249 | break_time++; | 1365 | break_time++; |
1250 | } | 1366 | } |
1251 | locks_insert_block(flock, new_fl); | 1367 | locks_insert_block(flock, new_fl); |
1252 | unlock_flocks(); | 1368 | spin_unlock(&inode->i_lock); |
1253 | error = wait_event_interruptible_timeout(new_fl->fl_wait, | 1369 | error = wait_event_interruptible_timeout(new_fl->fl_wait, |
1254 | !new_fl->fl_next, break_time); | 1370 | !new_fl->fl_next, break_time); |
1255 | lock_flocks(); | 1371 | spin_lock(&inode->i_lock); |
1256 | __locks_delete_block(new_fl); | 1372 | locks_delete_block(new_fl); |
1257 | if (error >= 0) { | 1373 | if (error >= 0) { |
1258 | if (error == 0) | 1374 | if (error == 0) |
1259 | time_out_leases(inode); | 1375 | time_out_leases(inode); |
@@ -1270,7 +1386,7 @@ restart: | |||
1270 | } | 1386 | } |
1271 | 1387 | ||
1272 | out: | 1388 | out: |
1273 | unlock_flocks(); | 1389 | spin_unlock(&inode->i_lock); |
1274 | locks_free_lock(new_fl); | 1390 | locks_free_lock(new_fl); |
1275 | return error; | 1391 | return error; |
1276 | } | 1392 | } |
@@ -1323,9 +1439,10 @@ EXPORT_SYMBOL(lease_get_mtime); | |||
1323 | int fcntl_getlease(struct file *filp) | 1439 | int fcntl_getlease(struct file *filp) |
1324 | { | 1440 | { |
1325 | struct file_lock *fl; | 1441 | struct file_lock *fl; |
1442 | struct inode *inode = file_inode(filp); | ||
1326 | int type = F_UNLCK; | 1443 | int type = F_UNLCK; |
1327 | 1444 | ||
1328 | lock_flocks(); | 1445 | spin_lock(&inode->i_lock); |
1329 | time_out_leases(file_inode(filp)); | 1446 | time_out_leases(file_inode(filp)); |
1330 | for (fl = file_inode(filp)->i_flock; fl && IS_LEASE(fl); | 1447 | for (fl = file_inode(filp)->i_flock; fl && IS_LEASE(fl); |
1331 | fl = fl->fl_next) { | 1448 | fl = fl->fl_next) { |
@@ -1334,11 +1451,11 @@ int fcntl_getlease(struct file *filp) | |||
1334 | break; | 1451 | break; |
1335 | } | 1452 | } |
1336 | } | 1453 | } |
1337 | unlock_flocks(); | 1454 | spin_unlock(&inode->i_lock); |
1338 | return type; | 1455 | return type; |
1339 | } | 1456 | } |
1340 | 1457 | ||
1341 | int generic_add_lease(struct file *filp, long arg, struct file_lock **flp) | 1458 | static int generic_add_lease(struct file *filp, long arg, struct file_lock **flp) |
1342 | { | 1459 | { |
1343 | struct file_lock *fl, **before, **my_before = NULL, *lease; | 1460 | struct file_lock *fl, **before, **my_before = NULL, *lease; |
1344 | struct dentry *dentry = filp->f_path.dentry; | 1461 | struct dentry *dentry = filp->f_path.dentry; |
@@ -1351,7 +1468,7 @@ int generic_add_lease(struct file *filp, long arg, struct file_lock **flp) | |||
1351 | if ((arg == F_RDLCK) && (atomic_read(&inode->i_writecount) > 0)) | 1468 | if ((arg == F_RDLCK) && (atomic_read(&inode->i_writecount) > 0)) |
1352 | goto out; | 1469 | goto out; |
1353 | if ((arg == F_WRLCK) | 1470 | if ((arg == F_WRLCK) |
1354 | && ((dentry->d_count > 1) | 1471 | && ((d_count(dentry) > 1) |
1355 | || (atomic_read(&inode->i_count) > 1))) | 1472 | || (atomic_read(&inode->i_count) > 1))) |
1356 | goto out; | 1473 | goto out; |
1357 | 1474 | ||
@@ -1403,7 +1520,7 @@ out: | |||
1403 | return error; | 1520 | return error; |
1404 | } | 1521 | } |
1405 | 1522 | ||
1406 | int generic_delete_lease(struct file *filp, struct file_lock **flp) | 1523 | static int generic_delete_lease(struct file *filp, struct file_lock **flp) |
1407 | { | 1524 | { |
1408 | struct file_lock *fl, **before; | 1525 | struct file_lock *fl, **before; |
1409 | struct dentry *dentry = filp->f_path.dentry; | 1526 | struct dentry *dentry = filp->f_path.dentry; |
@@ -1428,7 +1545,7 @@ int generic_delete_lease(struct file *filp, struct file_lock **flp) | |||
1428 | * The (input) flp->fl_lmops->lm_break function is required | 1545 | * The (input) flp->fl_lmops->lm_break function is required |
1429 | * by break_lease(). | 1546 | * by break_lease(). |
1430 | * | 1547 | * |
1431 | * Called with file_lock_lock held. | 1548 | * Called with inode->i_lock held. |
1432 | */ | 1549 | */ |
1433 | int generic_setlease(struct file *filp, long arg, struct file_lock **flp) | 1550 | int generic_setlease(struct file *filp, long arg, struct file_lock **flp) |
1434 | { | 1551 | { |
@@ -1497,11 +1614,12 @@ static int __vfs_setlease(struct file *filp, long arg, struct file_lock **lease) | |||
1497 | 1614 | ||
1498 | int vfs_setlease(struct file *filp, long arg, struct file_lock **lease) | 1615 | int vfs_setlease(struct file *filp, long arg, struct file_lock **lease) |
1499 | { | 1616 | { |
1617 | struct inode *inode = file_inode(filp); | ||
1500 | int error; | 1618 | int error; |
1501 | 1619 | ||
1502 | lock_flocks(); | 1620 | spin_lock(&inode->i_lock); |
1503 | error = __vfs_setlease(filp, arg, lease); | 1621 | error = __vfs_setlease(filp, arg, lease); |
1504 | unlock_flocks(); | 1622 | spin_unlock(&inode->i_lock); |
1505 | 1623 | ||
1506 | return error; | 1624 | return error; |
1507 | } | 1625 | } |
@@ -1519,6 +1637,7 @@ static int do_fcntl_delete_lease(struct file *filp) | |||
1519 | static int do_fcntl_add_lease(unsigned int fd, struct file *filp, long arg) | 1637 | static int do_fcntl_add_lease(unsigned int fd, struct file *filp, long arg) |
1520 | { | 1638 | { |
1521 | struct file_lock *fl, *ret; | 1639 | struct file_lock *fl, *ret; |
1640 | struct inode *inode = file_inode(filp); | ||
1522 | struct fasync_struct *new; | 1641 | struct fasync_struct *new; |
1523 | int error; | 1642 | int error; |
1524 | 1643 | ||
@@ -1532,10 +1651,10 @@ static int do_fcntl_add_lease(unsigned int fd, struct file *filp, long arg) | |||
1532 | return -ENOMEM; | 1651 | return -ENOMEM; |
1533 | } | 1652 | } |
1534 | ret = fl; | 1653 | ret = fl; |
1535 | lock_flocks(); | 1654 | spin_lock(&inode->i_lock); |
1536 | error = __vfs_setlease(filp, arg, &ret); | 1655 | error = __vfs_setlease(filp, arg, &ret); |
1537 | if (error) { | 1656 | if (error) { |
1538 | unlock_flocks(); | 1657 | spin_unlock(&inode->i_lock); |
1539 | locks_free_lock(fl); | 1658 | locks_free_lock(fl); |
1540 | goto out_free_fasync; | 1659 | goto out_free_fasync; |
1541 | } | 1660 | } |
@@ -1552,7 +1671,7 @@ static int do_fcntl_add_lease(unsigned int fd, struct file *filp, long arg) | |||
1552 | new = NULL; | 1671 | new = NULL; |
1553 | 1672 | ||
1554 | error = __f_setown(filp, task_pid(current), PIDTYPE_PID, 0); | 1673 | error = __f_setown(filp, task_pid(current), PIDTYPE_PID, 0); |
1555 | unlock_flocks(); | 1674 | spin_unlock(&inode->i_lock); |
1556 | 1675 | ||
1557 | out_free_fasync: | 1676 | out_free_fasync: |
1558 | if (new) | 1677 | if (new) |
@@ -2076,7 +2195,7 @@ void locks_remove_flock(struct file *filp) | |||
2076 | fl.fl_ops->fl_release_private(&fl); | 2195 | fl.fl_ops->fl_release_private(&fl); |
2077 | } | 2196 | } |
2078 | 2197 | ||
2079 | lock_flocks(); | 2198 | spin_lock(&inode->i_lock); |
2080 | before = &inode->i_flock; | 2199 | before = &inode->i_flock; |
2081 | 2200 | ||
2082 | while ((fl = *before) != NULL) { | 2201 | while ((fl = *before) != NULL) { |
@@ -2094,30 +2213,28 @@ void locks_remove_flock(struct file *filp) | |||
2094 | } | 2213 | } |
2095 | before = &fl->fl_next; | 2214 | before = &fl->fl_next; |
2096 | } | 2215 | } |
2097 | unlock_flocks(); | 2216 | spin_unlock(&inode->i_lock); |
2098 | } | 2217 | } |
2099 | 2218 | ||
2100 | /** | 2219 | /** |
2101 | * posix_unblock_lock - stop waiting for a file lock | 2220 | * posix_unblock_lock - stop waiting for a file lock |
2102 | * @filp: how the file was opened | ||
2103 | * @waiter: the lock which was waiting | 2221 | * @waiter: the lock which was waiting |
2104 | * | 2222 | * |
2105 | * lockd needs to block waiting for locks. | 2223 | * lockd needs to block waiting for locks. |
2106 | */ | 2224 | */ |
2107 | int | 2225 | int |
2108 | posix_unblock_lock(struct file *filp, struct file_lock *waiter) | 2226 | posix_unblock_lock(struct file_lock *waiter) |
2109 | { | 2227 | { |
2110 | int status = 0; | 2228 | int status = 0; |
2111 | 2229 | ||
2112 | lock_flocks(); | 2230 | spin_lock(&blocked_lock_lock); |
2113 | if (waiter->fl_next) | 2231 | if (waiter->fl_next) |
2114 | __locks_delete_block(waiter); | 2232 | __locks_delete_block(waiter); |
2115 | else | 2233 | else |
2116 | status = -ENOENT; | 2234 | status = -ENOENT; |
2117 | unlock_flocks(); | 2235 | spin_unlock(&blocked_lock_lock); |
2118 | return status; | 2236 | return status; |
2119 | } | 2237 | } |
2120 | |||
2121 | EXPORT_SYMBOL(posix_unblock_lock); | 2238 | EXPORT_SYMBOL(posix_unblock_lock); |
2122 | 2239 | ||
2123 | /** | 2240 | /** |
@@ -2140,6 +2257,11 @@ EXPORT_SYMBOL_GPL(vfs_cancel_lock); | |||
2140 | #include <linux/proc_fs.h> | 2257 | #include <linux/proc_fs.h> |
2141 | #include <linux/seq_file.h> | 2258 | #include <linux/seq_file.h> |
2142 | 2259 | ||
2260 | struct locks_iterator { | ||
2261 | int li_cpu; | ||
2262 | loff_t li_pos; | ||
2263 | }; | ||
2264 | |||
2143 | static void lock_get_status(struct seq_file *f, struct file_lock *fl, | 2265 | static void lock_get_status(struct seq_file *f, struct file_lock *fl, |
2144 | loff_t id, char *pfx) | 2266 | loff_t id, char *pfx) |
2145 | { | 2267 | { |
@@ -2213,37 +2335,41 @@ static void lock_get_status(struct seq_file *f, struct file_lock *fl, | |||
2213 | 2335 | ||
2214 | static int locks_show(struct seq_file *f, void *v) | 2336 | static int locks_show(struct seq_file *f, void *v) |
2215 | { | 2337 | { |
2338 | struct locks_iterator *iter = f->private; | ||
2216 | struct file_lock *fl, *bfl; | 2339 | struct file_lock *fl, *bfl; |
2217 | 2340 | ||
2218 | fl = list_entry(v, struct file_lock, fl_link); | 2341 | fl = hlist_entry(v, struct file_lock, fl_link); |
2219 | 2342 | ||
2220 | lock_get_status(f, fl, *((loff_t *)f->private), ""); | 2343 | lock_get_status(f, fl, iter->li_pos, ""); |
2221 | 2344 | ||
2222 | list_for_each_entry(bfl, &fl->fl_block, fl_block) | 2345 | list_for_each_entry(bfl, &fl->fl_block, fl_block) |
2223 | lock_get_status(f, bfl, *((loff_t *)f->private), " ->"); | 2346 | lock_get_status(f, bfl, iter->li_pos, " ->"); |
2224 | 2347 | ||
2225 | return 0; | 2348 | return 0; |
2226 | } | 2349 | } |
2227 | 2350 | ||
2228 | static void *locks_start(struct seq_file *f, loff_t *pos) | 2351 | static void *locks_start(struct seq_file *f, loff_t *pos) |
2229 | { | 2352 | { |
2230 | loff_t *p = f->private; | 2353 | struct locks_iterator *iter = f->private; |
2231 | 2354 | ||
2232 | lock_flocks(); | 2355 | iter->li_pos = *pos + 1; |
2233 | *p = (*pos + 1); | 2356 | lg_global_lock(&file_lock_lglock); |
2234 | return seq_list_start(&file_lock_list, *pos); | 2357 | spin_lock(&blocked_lock_lock); |
2358 | return seq_hlist_start_percpu(&file_lock_list, &iter->li_cpu, *pos); | ||
2235 | } | 2359 | } |
2236 | 2360 | ||
2237 | static void *locks_next(struct seq_file *f, void *v, loff_t *pos) | 2361 | static void *locks_next(struct seq_file *f, void *v, loff_t *pos) |
2238 | { | 2362 | { |
2239 | loff_t *p = f->private; | 2363 | struct locks_iterator *iter = f->private; |
2240 | ++*p; | 2364 | |
2241 | return seq_list_next(v, &file_lock_list, pos); | 2365 | ++iter->li_pos; |
2366 | return seq_hlist_next_percpu(v, &file_lock_list, &iter->li_cpu, pos); | ||
2242 | } | 2367 | } |
2243 | 2368 | ||
2244 | static void locks_stop(struct seq_file *f, void *v) | 2369 | static void locks_stop(struct seq_file *f, void *v) |
2245 | { | 2370 | { |
2246 | unlock_flocks(); | 2371 | spin_unlock(&blocked_lock_lock); |
2372 | lg_global_unlock(&file_lock_lglock); | ||
2247 | } | 2373 | } |
2248 | 2374 | ||
2249 | static const struct seq_operations locks_seq_operations = { | 2375 | static const struct seq_operations locks_seq_operations = { |
@@ -2255,7 +2381,8 @@ static const struct seq_operations locks_seq_operations = { | |||
2255 | 2381 | ||
2256 | static int locks_open(struct inode *inode, struct file *filp) | 2382 | static int locks_open(struct inode *inode, struct file *filp) |
2257 | { | 2383 | { |
2258 | return seq_open_private(filp, &locks_seq_operations, sizeof(loff_t)); | 2384 | return seq_open_private(filp, &locks_seq_operations, |
2385 | sizeof(struct locks_iterator)); | ||
2259 | } | 2386 | } |
2260 | 2387 | ||
2261 | static const struct file_operations proc_locks_operations = { | 2388 | static const struct file_operations proc_locks_operations = { |
@@ -2290,7 +2417,8 @@ int lock_may_read(struct inode *inode, loff_t start, unsigned long len) | |||
2290 | { | 2417 | { |
2291 | struct file_lock *fl; | 2418 | struct file_lock *fl; |
2292 | int result = 1; | 2419 | int result = 1; |
2293 | lock_flocks(); | 2420 | |
2421 | spin_lock(&inode->i_lock); | ||
2294 | for (fl = inode->i_flock; fl != NULL; fl = fl->fl_next) { | 2422 | for (fl = inode->i_flock; fl != NULL; fl = fl->fl_next) { |
2295 | if (IS_POSIX(fl)) { | 2423 | if (IS_POSIX(fl)) { |
2296 | if (fl->fl_type == F_RDLCK) | 2424 | if (fl->fl_type == F_RDLCK) |
@@ -2307,7 +2435,7 @@ int lock_may_read(struct inode *inode, loff_t start, unsigned long len) | |||
2307 | result = 0; | 2435 | result = 0; |
2308 | break; | 2436 | break; |
2309 | } | 2437 | } |
2310 | unlock_flocks(); | 2438 | spin_unlock(&inode->i_lock); |
2311 | return result; | 2439 | return result; |
2312 | } | 2440 | } |
2313 | 2441 | ||
@@ -2330,7 +2458,8 @@ int lock_may_write(struct inode *inode, loff_t start, unsigned long len) | |||
2330 | { | 2458 | { |
2331 | struct file_lock *fl; | 2459 | struct file_lock *fl; |
2332 | int result = 1; | 2460 | int result = 1; |
2333 | lock_flocks(); | 2461 | |
2462 | spin_lock(&inode->i_lock); | ||
2334 | for (fl = inode->i_flock; fl != NULL; fl = fl->fl_next) { | 2463 | for (fl = inode->i_flock; fl != NULL; fl = fl->fl_next) { |
2335 | if (IS_POSIX(fl)) { | 2464 | if (IS_POSIX(fl)) { |
2336 | if ((fl->fl_end < start) || (fl->fl_start > (start + len))) | 2465 | if ((fl->fl_end < start) || (fl->fl_start > (start + len))) |
@@ -2345,7 +2474,7 @@ int lock_may_write(struct inode *inode, loff_t start, unsigned long len) | |||
2345 | result = 0; | 2474 | result = 0; |
2346 | break; | 2475 | break; |
2347 | } | 2476 | } |
2348 | unlock_flocks(); | 2477 | spin_unlock(&inode->i_lock); |
2349 | return result; | 2478 | return result; |
2350 | } | 2479 | } |
2351 | 2480 | ||
@@ -2353,9 +2482,16 @@ EXPORT_SYMBOL(lock_may_write); | |||
2353 | 2482 | ||
2354 | static int __init filelock_init(void) | 2483 | static int __init filelock_init(void) |
2355 | { | 2484 | { |
2485 | int i; | ||
2486 | |||
2356 | filelock_cache = kmem_cache_create("file_lock_cache", | 2487 | filelock_cache = kmem_cache_create("file_lock_cache", |
2357 | sizeof(struct file_lock), 0, SLAB_PANIC, NULL); | 2488 | sizeof(struct file_lock), 0, SLAB_PANIC, NULL); |
2358 | 2489 | ||
2490 | lg_lock_init(&file_lock_lglock, "file_lock_lglock"); | ||
2491 | |||
2492 | for_each_possible_cpu(i) | ||
2493 | INIT_HLIST_HEAD(per_cpu_ptr(&file_lock_list, i)); | ||
2494 | |||
2359 | return 0; | 2495 | return 0; |
2360 | } | 2496 | } |
2361 | 2497 | ||
diff --git a/fs/logfs/dir.c b/fs/logfs/dir.c index b82751082112..6bdc347008f5 100644 --- a/fs/logfs/dir.c +++ b/fs/logfs/dir.c | |||
@@ -281,17 +281,23 @@ static int logfs_rmdir(struct inode *dir, struct dentry *dentry) | |||
281 | 281 | ||
282 | /* FIXME: readdir currently has it's own dir_walk code. I don't see a good | 282 | /* FIXME: readdir currently has it's own dir_walk code. I don't see a good |
283 | * way to combine the two copies */ | 283 | * way to combine the two copies */ |
284 | #define IMPLICIT_NODES 2 | 284 | static int logfs_readdir(struct file *file, struct dir_context *ctx) |
285 | static int __logfs_readdir(struct file *file, void *buf, filldir_t filldir) | ||
286 | { | 285 | { |
287 | struct inode *dir = file_inode(file); | 286 | struct inode *dir = file_inode(file); |
288 | loff_t pos = file->f_pos - IMPLICIT_NODES; | 287 | loff_t pos; |
289 | struct page *page; | 288 | struct page *page; |
290 | struct logfs_disk_dentry *dd; | 289 | struct logfs_disk_dentry *dd; |
291 | int full; | ||
292 | 290 | ||
291 | if (ctx->pos < 0) | ||
292 | return -EINVAL; | ||
293 | |||
294 | if (!dir_emit_dots(file, ctx)) | ||
295 | return 0; | ||
296 | |||
297 | pos = ctx->pos - 2; | ||
293 | BUG_ON(pos < 0); | 298 | BUG_ON(pos < 0); |
294 | for (;; pos++) { | 299 | for (;; pos++, ctx->pos++) { |
300 | bool full; | ||
295 | if (beyond_eof(dir, pos)) | 301 | if (beyond_eof(dir, pos)) |
296 | break; | 302 | break; |
297 | if (!logfs_exist_block(dir, pos)) { | 303 | if (!logfs_exist_block(dir, pos)) { |
@@ -306,42 +312,17 @@ static int __logfs_readdir(struct file *file, void *buf, filldir_t filldir) | |||
306 | dd = kmap(page); | 312 | dd = kmap(page); |
307 | BUG_ON(dd->namelen == 0); | 313 | BUG_ON(dd->namelen == 0); |
308 | 314 | ||
309 | full = filldir(buf, (char *)dd->name, be16_to_cpu(dd->namelen), | 315 | full = !dir_emit(ctx, (char *)dd->name, |
310 | pos, be64_to_cpu(dd->ino), dd->type); | 316 | be16_to_cpu(dd->namelen), |
317 | be64_to_cpu(dd->ino), dd->type); | ||
311 | kunmap(page); | 318 | kunmap(page); |
312 | page_cache_release(page); | 319 | page_cache_release(page); |
313 | if (full) | 320 | if (full) |
314 | break; | 321 | break; |
315 | } | 322 | } |
316 | |||
317 | file->f_pos = pos + IMPLICIT_NODES; | ||
318 | return 0; | 323 | return 0; |
319 | } | 324 | } |
320 | 325 | ||
321 | static int logfs_readdir(struct file *file, void *buf, filldir_t filldir) | ||
322 | { | ||
323 | struct inode *inode = file_inode(file); | ||
324 | ino_t pino = parent_ino(file->f_dentry); | ||
325 | int err; | ||
326 | |||
327 | if (file->f_pos < 0) | ||
328 | return -EINVAL; | ||
329 | |||
330 | if (file->f_pos == 0) { | ||
331 | if (filldir(buf, ".", 1, 1, inode->i_ino, DT_DIR) < 0) | ||
332 | return 0; | ||
333 | file->f_pos++; | ||
334 | } | ||
335 | if (file->f_pos == 1) { | ||
336 | if (filldir(buf, "..", 2, 2, pino, DT_DIR) < 0) | ||
337 | return 0; | ||
338 | file->f_pos++; | ||
339 | } | ||
340 | |||
341 | err = __logfs_readdir(file, buf, filldir); | ||
342 | return err; | ||
343 | } | ||
344 | |||
345 | static void logfs_set_name(struct logfs_disk_dentry *dd, struct qstr *name) | 326 | static void logfs_set_name(struct logfs_disk_dentry *dd, struct qstr *name) |
346 | { | 327 | { |
347 | dd->namelen = cpu_to_be16(name->len); | 328 | dd->namelen = cpu_to_be16(name->len); |
@@ -814,7 +795,7 @@ const struct inode_operations logfs_dir_iops = { | |||
814 | const struct file_operations logfs_dir_fops = { | 795 | const struct file_operations logfs_dir_fops = { |
815 | .fsync = logfs_fsync, | 796 | .fsync = logfs_fsync, |
816 | .unlocked_ioctl = logfs_ioctl, | 797 | .unlocked_ioctl = logfs_ioctl, |
817 | .readdir = logfs_readdir, | 798 | .iterate = logfs_readdir, |
818 | .read = generic_read_dir, | 799 | .read = generic_read_dir, |
819 | .llseek = default_llseek, | 800 | .llseek = default_llseek, |
820 | }; | 801 | }; |
diff --git a/fs/logfs/file.c b/fs/logfs/file.c index c2219a6dd3c8..57914fc32b62 100644 --- a/fs/logfs/file.c +++ b/fs/logfs/file.c | |||
@@ -159,7 +159,8 @@ static int logfs_writepage(struct page *page, struct writeback_control *wbc) | |||
159 | return __logfs_writepage(page); | 159 | return __logfs_writepage(page); |
160 | } | 160 | } |
161 | 161 | ||
162 | static void logfs_invalidatepage(struct page *page, unsigned long offset) | 162 | static void logfs_invalidatepage(struct page *page, unsigned int offset, |
163 | unsigned int length) | ||
163 | { | 164 | { |
164 | struct logfs_block *block = logfs_block(page); | 165 | struct logfs_block *block = logfs_block(page); |
165 | 166 | ||
diff --git a/fs/logfs/segment.c b/fs/logfs/segment.c index 038da0991794..d448a777166b 100644 --- a/fs/logfs/segment.c +++ b/fs/logfs/segment.c | |||
@@ -884,7 +884,8 @@ static struct logfs_area *alloc_area(struct super_block *sb) | |||
884 | return area; | 884 | return area; |
885 | } | 885 | } |
886 | 886 | ||
887 | static void map_invalidatepage(struct page *page, unsigned long l) | 887 | static void map_invalidatepage(struct page *page, unsigned int o, |
888 | unsigned int l) | ||
888 | { | 889 | { |
889 | return; | 890 | return; |
890 | } | 891 | } |
diff --git a/fs/minix/dir.c b/fs/minix/dir.c index a9ed6f36e6ea..dfaf6fa9b7b5 100644 --- a/fs/minix/dir.c +++ b/fs/minix/dir.c | |||
@@ -16,12 +16,12 @@ | |||
16 | typedef struct minix_dir_entry minix_dirent; | 16 | typedef struct minix_dir_entry minix_dirent; |
17 | typedef struct minix3_dir_entry minix3_dirent; | 17 | typedef struct minix3_dir_entry minix3_dirent; |
18 | 18 | ||
19 | static int minix_readdir(struct file *, void *, filldir_t); | 19 | static int minix_readdir(struct file *, struct dir_context *); |
20 | 20 | ||
21 | const struct file_operations minix_dir_operations = { | 21 | const struct file_operations minix_dir_operations = { |
22 | .llseek = generic_file_llseek, | 22 | .llseek = generic_file_llseek, |
23 | .read = generic_read_dir, | 23 | .read = generic_read_dir, |
24 | .readdir = minix_readdir, | 24 | .iterate = minix_readdir, |
25 | .fsync = generic_file_fsync, | 25 | .fsync = generic_file_fsync, |
26 | }; | 26 | }; |
27 | 27 | ||
@@ -82,22 +82,23 @@ static inline void *minix_next_entry(void *de, struct minix_sb_info *sbi) | |||
82 | return (void*)((char*)de + sbi->s_dirsize); | 82 | return (void*)((char*)de + sbi->s_dirsize); |
83 | } | 83 | } |
84 | 84 | ||
85 | static int minix_readdir(struct file * filp, void * dirent, filldir_t filldir) | 85 | static int minix_readdir(struct file *file, struct dir_context *ctx) |
86 | { | 86 | { |
87 | unsigned long pos = filp->f_pos; | 87 | struct inode *inode = file_inode(file); |
88 | struct inode *inode = file_inode(filp); | ||
89 | struct super_block *sb = inode->i_sb; | 88 | struct super_block *sb = inode->i_sb; |
90 | unsigned offset = pos & ~PAGE_CACHE_MASK; | ||
91 | unsigned long n = pos >> PAGE_CACHE_SHIFT; | ||
92 | unsigned long npages = dir_pages(inode); | ||
93 | struct minix_sb_info *sbi = minix_sb(sb); | 89 | struct minix_sb_info *sbi = minix_sb(sb); |
94 | unsigned chunk_size = sbi->s_dirsize; | 90 | unsigned chunk_size = sbi->s_dirsize; |
95 | char *name; | 91 | unsigned long npages = dir_pages(inode); |
96 | __u32 inumber; | 92 | unsigned long pos = ctx->pos; |
93 | unsigned offset; | ||
94 | unsigned long n; | ||
97 | 95 | ||
98 | pos = (pos + chunk_size-1) & ~(chunk_size-1); | 96 | ctx->pos = pos = ALIGN(pos, chunk_size); |
99 | if (pos >= inode->i_size) | 97 | if (pos >= inode->i_size) |
100 | goto done; | 98 | return 0; |
99 | |||
100 | offset = pos & ~PAGE_CACHE_MASK; | ||
101 | n = pos >> PAGE_CACHE_SHIFT; | ||
101 | 102 | ||
102 | for ( ; n < npages; n++, offset = 0) { | 103 | for ( ; n < npages; n++, offset = 0) { |
103 | char *p, *kaddr, *limit; | 104 | char *p, *kaddr, *limit; |
@@ -109,6 +110,8 @@ static int minix_readdir(struct file * filp, void * dirent, filldir_t filldir) | |||
109 | p = kaddr+offset; | 110 | p = kaddr+offset; |
110 | limit = kaddr + minix_last_byte(inode, n) - chunk_size; | 111 | limit = kaddr + minix_last_byte(inode, n) - chunk_size; |
111 | for ( ; p <= limit; p = minix_next_entry(p, sbi)) { | 112 | for ( ; p <= limit; p = minix_next_entry(p, sbi)) { |
113 | const char *name; | ||
114 | __u32 inumber; | ||
112 | if (sbi->s_version == MINIX_V3) { | 115 | if (sbi->s_version == MINIX_V3) { |
113 | minix3_dirent *de3 = (minix3_dirent *)p; | 116 | minix3_dirent *de3 = (minix3_dirent *)p; |
114 | name = de3->name; | 117 | name = de3->name; |
@@ -119,24 +122,17 @@ static int minix_readdir(struct file * filp, void * dirent, filldir_t filldir) | |||
119 | inumber = de->inode; | 122 | inumber = de->inode; |
120 | } | 123 | } |
121 | if (inumber) { | 124 | if (inumber) { |
122 | int over; | ||
123 | |||
124 | unsigned l = strnlen(name, sbi->s_namelen); | 125 | unsigned l = strnlen(name, sbi->s_namelen); |
125 | offset = p - kaddr; | 126 | if (!dir_emit(ctx, name, l, |
126 | over = filldir(dirent, name, l, | 127 | inumber, DT_UNKNOWN)) { |
127 | (n << PAGE_CACHE_SHIFT) | offset, | ||
128 | inumber, DT_UNKNOWN); | ||
129 | if (over) { | ||
130 | dir_put_page(page); | 128 | dir_put_page(page); |
131 | goto done; | 129 | return 0; |
132 | } | 130 | } |
133 | } | 131 | } |
132 | ctx->pos += chunk_size; | ||
134 | } | 133 | } |
135 | dir_put_page(page); | 134 | dir_put_page(page); |
136 | } | 135 | } |
137 | |||
138 | done: | ||
139 | filp->f_pos = (n << PAGE_CACHE_SHIFT) | offset; | ||
140 | return 0; | 136 | return 0; |
141 | } | 137 | } |
142 | 138 | ||
diff --git a/fs/minix/namei.c b/fs/minix/namei.c index 0db73d9dd668..cd950e2331b6 100644 --- a/fs/minix/namei.c +++ b/fs/minix/namei.c | |||
@@ -54,6 +54,18 @@ static int minix_mknod(struct inode * dir, struct dentry *dentry, umode_t mode, | |||
54 | return error; | 54 | return error; |
55 | } | 55 | } |
56 | 56 | ||
57 | static int minix_tmpfile(struct inode *dir, struct dentry *dentry, umode_t mode) | ||
58 | { | ||
59 | int error; | ||
60 | struct inode *inode = minix_new_inode(dir, mode, &error); | ||
61 | if (inode) { | ||
62 | minix_set_inode(inode, 0); | ||
63 | mark_inode_dirty(inode); | ||
64 | d_tmpfile(dentry, inode); | ||
65 | } | ||
66 | return error; | ||
67 | } | ||
68 | |||
57 | static int minix_create(struct inode *dir, struct dentry *dentry, umode_t mode, | 69 | static int minix_create(struct inode *dir, struct dentry *dentry, umode_t mode, |
58 | bool excl) | 70 | bool excl) |
59 | { | 71 | { |
@@ -254,4 +266,5 @@ const struct inode_operations minix_dir_inode_operations = { | |||
254 | .mknod = minix_mknod, | 266 | .mknod = minix_mknod, |
255 | .rename = minix_rename, | 267 | .rename = minix_rename, |
256 | .getattr = minix_getattr, | 268 | .getattr = minix_getattr, |
269 | .tmpfile = minix_tmpfile, | ||
257 | }; | 270 | }; |
diff --git a/fs/namei.c b/fs/namei.c index 9ed9361223c0..89a612e392eb 100644 --- a/fs/namei.c +++ b/fs/namei.c | |||
@@ -1352,7 +1352,7 @@ static int lookup_fast(struct nameidata *nd, | |||
1352 | */ | 1352 | */ |
1353 | if (nd->flags & LOOKUP_RCU) { | 1353 | if (nd->flags & LOOKUP_RCU) { |
1354 | unsigned seq; | 1354 | unsigned seq; |
1355 | dentry = __d_lookup_rcu(parent, &nd->last, &seq, nd->inode); | 1355 | dentry = __d_lookup_rcu(parent, &nd->last, &seq); |
1356 | if (!dentry) | 1356 | if (!dentry) |
1357 | goto unlazy; | 1357 | goto unlazy; |
1358 | 1358 | ||
@@ -1787,8 +1787,7 @@ static int link_path_walk(const char *name, struct nameidata *nd) | |||
1787 | struct dentry *parent = nd->path.dentry; | 1787 | struct dentry *parent = nd->path.dentry; |
1788 | nd->flags &= ~LOOKUP_JUMPED; | 1788 | nd->flags &= ~LOOKUP_JUMPED; |
1789 | if (unlikely(parent->d_flags & DCACHE_OP_HASH)) { | 1789 | if (unlikely(parent->d_flags & DCACHE_OP_HASH)) { |
1790 | err = parent->d_op->d_hash(parent, nd->inode, | 1790 | err = parent->d_op->d_hash(parent, &this); |
1791 | &this); | ||
1792 | if (err < 0) | 1791 | if (err < 0) |
1793 | break; | 1792 | break; |
1794 | } | 1793 | } |
@@ -2121,7 +2120,7 @@ struct dentry *lookup_one_len(const char *name, struct dentry *base, int len) | |||
2121 | * to use its own hash.. | 2120 | * to use its own hash.. |
2122 | */ | 2121 | */ |
2123 | if (base->d_flags & DCACHE_OP_HASH) { | 2122 | if (base->d_flags & DCACHE_OP_HASH) { |
2124 | int err = base->d_op->d_hash(base, base->d_inode, &this); | 2123 | int err = base->d_op->d_hash(base, &this); |
2125 | if (err < 0) | 2124 | if (err < 0) |
2126 | return ERR_PTR(err); | 2125 | return ERR_PTR(err); |
2127 | } | 2126 | } |
@@ -2690,28 +2689,10 @@ static int do_last(struct nameidata *nd, struct path *path, | |||
2690 | nd->flags &= ~LOOKUP_PARENT; | 2689 | nd->flags &= ~LOOKUP_PARENT; |
2691 | nd->flags |= op->intent; | 2690 | nd->flags |= op->intent; |
2692 | 2691 | ||
2693 | switch (nd->last_type) { | 2692 | if (nd->last_type != LAST_NORM) { |
2694 | case LAST_DOTDOT: | ||
2695 | case LAST_DOT: | ||
2696 | error = handle_dots(nd, nd->last_type); | 2693 | error = handle_dots(nd, nd->last_type); |
2697 | if (error) | 2694 | if (error) |
2698 | return error; | 2695 | return error; |
2699 | /* fallthrough */ | ||
2700 | case LAST_ROOT: | ||
2701 | error = complete_walk(nd); | ||
2702 | if (error) | ||
2703 | return error; | ||
2704 | audit_inode(name, nd->path.dentry, 0); | ||
2705 | if (open_flag & O_CREAT) { | ||
2706 | error = -EISDIR; | ||
2707 | goto out; | ||
2708 | } | ||
2709 | goto finish_open; | ||
2710 | case LAST_BIND: | ||
2711 | error = complete_walk(nd); | ||
2712 | if (error) | ||
2713 | return error; | ||
2714 | audit_inode(name, dir, 0); | ||
2715 | goto finish_open; | 2696 | goto finish_open; |
2716 | } | 2697 | } |
2717 | 2698 | ||
@@ -2841,19 +2822,19 @@ finish_lookup: | |||
2841 | } | 2822 | } |
2842 | nd->inode = inode; | 2823 | nd->inode = inode; |
2843 | /* Why this, you ask? _Now_ we might have grown LOOKUP_JUMPED... */ | 2824 | /* Why this, you ask? _Now_ we might have grown LOOKUP_JUMPED... */ |
2825 | finish_open: | ||
2844 | error = complete_walk(nd); | 2826 | error = complete_walk(nd); |
2845 | if (error) { | 2827 | if (error) { |
2846 | path_put(&save_parent); | 2828 | path_put(&save_parent); |
2847 | return error; | 2829 | return error; |
2848 | } | 2830 | } |
2831 | audit_inode(name, nd->path.dentry, 0); | ||
2849 | error = -EISDIR; | 2832 | error = -EISDIR; |
2850 | if ((open_flag & O_CREAT) && S_ISDIR(nd->inode->i_mode)) | 2833 | if ((open_flag & O_CREAT) && S_ISDIR(nd->inode->i_mode)) |
2851 | goto out; | 2834 | goto out; |
2852 | error = -ENOTDIR; | 2835 | error = -ENOTDIR; |
2853 | if ((nd->flags & LOOKUP_DIRECTORY) && !can_lookup(nd->inode)) | 2836 | if ((nd->flags & LOOKUP_DIRECTORY) && !can_lookup(nd->inode)) |
2854 | goto out; | 2837 | goto out; |
2855 | audit_inode(name, nd->path.dentry, 0); | ||
2856 | finish_open: | ||
2857 | if (!S_ISREG(nd->inode->i_mode)) | 2838 | if (!S_ISREG(nd->inode->i_mode)) |
2858 | will_truncate = false; | 2839 | will_truncate = false; |
2859 | 2840 | ||
@@ -2920,6 +2901,67 @@ stale_open: | |||
2920 | goto retry_lookup; | 2901 | goto retry_lookup; |
2921 | } | 2902 | } |
2922 | 2903 | ||
2904 | static int do_tmpfile(int dfd, struct filename *pathname, | ||
2905 | struct nameidata *nd, int flags, | ||
2906 | const struct open_flags *op, | ||
2907 | struct file *file, int *opened) | ||
2908 | { | ||
2909 | static const struct qstr name = QSTR_INIT("/", 1); | ||
2910 | struct dentry *dentry, *child; | ||
2911 | struct inode *dir; | ||
2912 | int error = path_lookupat(dfd, pathname->name, | ||
2913 | flags | LOOKUP_DIRECTORY, nd); | ||
2914 | if (unlikely(error)) | ||
2915 | return error; | ||
2916 | error = mnt_want_write(nd->path.mnt); | ||
2917 | if (unlikely(error)) | ||
2918 | goto out; | ||
2919 | /* we want directory to be writable */ | ||
2920 | error = inode_permission(nd->inode, MAY_WRITE | MAY_EXEC); | ||
2921 | if (error) | ||
2922 | goto out2; | ||
2923 | dentry = nd->path.dentry; | ||
2924 | dir = dentry->d_inode; | ||
2925 | if (!dir->i_op->tmpfile) { | ||
2926 | error = -EOPNOTSUPP; | ||
2927 | goto out2; | ||
2928 | } | ||
2929 | child = d_alloc(dentry, &name); | ||
2930 | if (unlikely(!child)) { | ||
2931 | error = -ENOMEM; | ||
2932 | goto out2; | ||
2933 | } | ||
2934 | nd->flags &= ~LOOKUP_DIRECTORY; | ||
2935 | nd->flags |= op->intent; | ||
2936 | dput(nd->path.dentry); | ||
2937 | nd->path.dentry = child; | ||
2938 | error = dir->i_op->tmpfile(dir, nd->path.dentry, op->mode); | ||
2939 | if (error) | ||
2940 | goto out2; | ||
2941 | audit_inode(pathname, nd->path.dentry, 0); | ||
2942 | error = may_open(&nd->path, op->acc_mode, op->open_flag); | ||
2943 | if (error) | ||
2944 | goto out2; | ||
2945 | file->f_path.mnt = nd->path.mnt; | ||
2946 | error = finish_open(file, nd->path.dentry, NULL, opened); | ||
2947 | if (error) | ||
2948 | goto out2; | ||
2949 | error = open_check_o_direct(file); | ||
2950 | if (error) { | ||
2951 | fput(file); | ||
2952 | } else if (!(op->open_flag & O_EXCL)) { | ||
2953 | struct inode *inode = file_inode(file); | ||
2954 | spin_lock(&inode->i_lock); | ||
2955 | inode->i_state |= I_LINKABLE; | ||
2956 | spin_unlock(&inode->i_lock); | ||
2957 | } | ||
2958 | out2: | ||
2959 | mnt_drop_write(nd->path.mnt); | ||
2960 | out: | ||
2961 | path_put(&nd->path); | ||
2962 | return error; | ||
2963 | } | ||
2964 | |||
2923 | static struct file *path_openat(int dfd, struct filename *pathname, | 2965 | static struct file *path_openat(int dfd, struct filename *pathname, |
2924 | struct nameidata *nd, const struct open_flags *op, int flags) | 2966 | struct nameidata *nd, const struct open_flags *op, int flags) |
2925 | { | 2967 | { |
@@ -2935,6 +2977,11 @@ static struct file *path_openat(int dfd, struct filename *pathname, | |||
2935 | 2977 | ||
2936 | file->f_flags = op->open_flag; | 2978 | file->f_flags = op->open_flag; |
2937 | 2979 | ||
2980 | if (unlikely(file->f_flags & __O_TMPFILE)) { | ||
2981 | error = do_tmpfile(dfd, pathname, nd, flags, op, file, &opened); | ||
2982 | goto out; | ||
2983 | } | ||
2984 | |||
2938 | error = path_init(dfd, pathname->name, flags | LOOKUP_PARENT, nd, &base); | 2985 | error = path_init(dfd, pathname->name, flags | LOOKUP_PARENT, nd, &base); |
2939 | if (unlikely(error)) | 2986 | if (unlikely(error)) |
2940 | goto out; | 2987 | goto out; |
@@ -2987,9 +3034,10 @@ out: | |||
2987 | } | 3034 | } |
2988 | 3035 | ||
2989 | struct file *do_filp_open(int dfd, struct filename *pathname, | 3036 | struct file *do_filp_open(int dfd, struct filename *pathname, |
2990 | const struct open_flags *op, int flags) | 3037 | const struct open_flags *op) |
2991 | { | 3038 | { |
2992 | struct nameidata nd; | 3039 | struct nameidata nd; |
3040 | int flags = op->lookup_flags; | ||
2993 | struct file *filp; | 3041 | struct file *filp; |
2994 | 3042 | ||
2995 | filp = path_openat(dfd, pathname, &nd, op, flags | LOOKUP_RCU); | 3043 | filp = path_openat(dfd, pathname, &nd, op, flags | LOOKUP_RCU); |
@@ -3001,17 +3049,16 @@ struct file *do_filp_open(int dfd, struct filename *pathname, | |||
3001 | } | 3049 | } |
3002 | 3050 | ||
3003 | struct file *do_file_open_root(struct dentry *dentry, struct vfsmount *mnt, | 3051 | struct file *do_file_open_root(struct dentry *dentry, struct vfsmount *mnt, |
3004 | const char *name, const struct open_flags *op, int flags) | 3052 | const char *name, const struct open_flags *op) |
3005 | { | 3053 | { |
3006 | struct nameidata nd; | 3054 | struct nameidata nd; |
3007 | struct file *file; | 3055 | struct file *file; |
3008 | struct filename filename = { .name = name }; | 3056 | struct filename filename = { .name = name }; |
3057 | int flags = op->lookup_flags | LOOKUP_ROOT; | ||
3009 | 3058 | ||
3010 | nd.root.mnt = mnt; | 3059 | nd.root.mnt = mnt; |
3011 | nd.root.dentry = dentry; | 3060 | nd.root.dentry = dentry; |
3012 | 3061 | ||
3013 | flags |= LOOKUP_ROOT; | ||
3014 | |||
3015 | if (dentry->d_inode->i_op->follow_link && op->intent & LOOKUP_OPEN) | 3062 | if (dentry->d_inode->i_op->follow_link && op->intent & LOOKUP_OPEN) |
3016 | return ERR_PTR(-ELOOP); | 3063 | return ERR_PTR(-ELOOP); |
3017 | 3064 | ||
@@ -3586,12 +3633,18 @@ int vfs_link(struct dentry *old_dentry, struct inode *dir, struct dentry *new_de | |||
3586 | 3633 | ||
3587 | mutex_lock(&inode->i_mutex); | 3634 | mutex_lock(&inode->i_mutex); |
3588 | /* Make sure we don't allow creating hardlink to an unlinked file */ | 3635 | /* Make sure we don't allow creating hardlink to an unlinked file */ |
3589 | if (inode->i_nlink == 0) | 3636 | if (inode->i_nlink == 0 && !(inode->i_state & I_LINKABLE)) |
3590 | error = -ENOENT; | 3637 | error = -ENOENT; |
3591 | else if (max_links && inode->i_nlink >= max_links) | 3638 | else if (max_links && inode->i_nlink >= max_links) |
3592 | error = -EMLINK; | 3639 | error = -EMLINK; |
3593 | else | 3640 | else |
3594 | error = dir->i_op->link(old_dentry, dir, new_dentry); | 3641 | error = dir->i_op->link(old_dentry, dir, new_dentry); |
3642 | |||
3643 | if (!error && (inode->i_state & I_LINKABLE)) { | ||
3644 | spin_lock(&inode->i_lock); | ||
3645 | inode->i_state &= ~I_LINKABLE; | ||
3646 | spin_unlock(&inode->i_lock); | ||
3647 | } | ||
3595 | mutex_unlock(&inode->i_mutex); | 3648 | mutex_unlock(&inode->i_mutex); |
3596 | if (!error) | 3649 | if (!error) |
3597 | fsnotify_link(dir, inode, new_dentry); | 3650 | fsnotify_link(dir, inode, new_dentry); |
@@ -3618,15 +3671,11 @@ SYSCALL_DEFINE5(linkat, int, olddfd, const char __user *, oldname, | |||
3618 | if ((flags & ~(AT_SYMLINK_FOLLOW | AT_EMPTY_PATH)) != 0) | 3671 | if ((flags & ~(AT_SYMLINK_FOLLOW | AT_EMPTY_PATH)) != 0) |
3619 | return -EINVAL; | 3672 | return -EINVAL; |
3620 | /* | 3673 | /* |
3621 | * To use null names we require CAP_DAC_READ_SEARCH | 3674 | * Using empty names is equivalent to using AT_SYMLINK_FOLLOW |
3622 | * This ensures that not everyone will be able to create | 3675 | * on /proc/self/fd/<fd>. |
3623 | * handlink using the passed filedescriptor. | ||
3624 | */ | 3676 | */ |
3625 | if (flags & AT_EMPTY_PATH) { | 3677 | if (flags & AT_EMPTY_PATH) |
3626 | if (!capable(CAP_DAC_READ_SEARCH)) | ||
3627 | return -ENOENT; | ||
3628 | how = LOOKUP_EMPTY; | 3678 | how = LOOKUP_EMPTY; |
3629 | } | ||
3630 | 3679 | ||
3631 | if (flags & AT_SYMLINK_FOLLOW) | 3680 | if (flags & AT_SYMLINK_FOLLOW) |
3632 | how |= LOOKUP_FOLLOW; | 3681 | how |= LOOKUP_FOLLOW; |
diff --git a/fs/ncpfs/dir.c b/fs/ncpfs/dir.c index 6792ce11f2bf..3be047474bfc 100644 --- a/fs/ncpfs/dir.c +++ b/fs/ncpfs/dir.c | |||
@@ -23,12 +23,12 @@ | |||
23 | 23 | ||
24 | #include "ncp_fs.h" | 24 | #include "ncp_fs.h" |
25 | 25 | ||
26 | static void ncp_read_volume_list(struct file *, void *, filldir_t, | 26 | static void ncp_read_volume_list(struct file *, struct dir_context *, |
27 | struct ncp_cache_control *); | 27 | struct ncp_cache_control *); |
28 | static void ncp_do_readdir(struct file *, void *, filldir_t, | 28 | static void ncp_do_readdir(struct file *, struct dir_context *, |
29 | struct ncp_cache_control *); | 29 | struct ncp_cache_control *); |
30 | 30 | ||
31 | static int ncp_readdir(struct file *, void *, filldir_t); | 31 | static int ncp_readdir(struct file *, struct dir_context *); |
32 | 32 | ||
33 | static int ncp_create(struct inode *, struct dentry *, umode_t, bool); | 33 | static int ncp_create(struct inode *, struct dentry *, umode_t, bool); |
34 | static struct dentry *ncp_lookup(struct inode *, struct dentry *, unsigned int); | 34 | static struct dentry *ncp_lookup(struct inode *, struct dentry *, unsigned int); |
@@ -49,7 +49,7 @@ const struct file_operations ncp_dir_operations = | |||
49 | { | 49 | { |
50 | .llseek = generic_file_llseek, | 50 | .llseek = generic_file_llseek, |
51 | .read = generic_read_dir, | 51 | .read = generic_read_dir, |
52 | .readdir = ncp_readdir, | 52 | .iterate = ncp_readdir, |
53 | .unlocked_ioctl = ncp_ioctl, | 53 | .unlocked_ioctl = ncp_ioctl, |
54 | #ifdef CONFIG_COMPAT | 54 | #ifdef CONFIG_COMPAT |
55 | .compat_ioctl = ncp_compat_ioctl, | 55 | .compat_ioctl = ncp_compat_ioctl, |
@@ -73,10 +73,8 @@ const struct inode_operations ncp_dir_inode_operations = | |||
73 | * Dentry operations routines | 73 | * Dentry operations routines |
74 | */ | 74 | */ |
75 | static int ncp_lookup_validate(struct dentry *, unsigned int); | 75 | static int ncp_lookup_validate(struct dentry *, unsigned int); |
76 | static int ncp_hash_dentry(const struct dentry *, const struct inode *, | 76 | static int ncp_hash_dentry(const struct dentry *, struct qstr *); |
77 | struct qstr *); | 77 | static int ncp_compare_dentry(const struct dentry *, const struct dentry *, |
78 | static int ncp_compare_dentry(const struct dentry *, const struct inode *, | ||
79 | const struct dentry *, const struct inode *, | ||
80 | unsigned int, const char *, const struct qstr *); | 78 | unsigned int, const char *, const struct qstr *); |
81 | static int ncp_delete_dentry(const struct dentry *); | 79 | static int ncp_delete_dentry(const struct dentry *); |
82 | 80 | ||
@@ -119,11 +117,19 @@ static inline int ncp_case_sensitive(const struct inode *i) | |||
119 | /* | 117 | /* |
120 | * Note: leave the hash unchanged if the directory | 118 | * Note: leave the hash unchanged if the directory |
121 | * is case-sensitive. | 119 | * is case-sensitive. |
120 | * | ||
121 | * Accessing the parent inode can be racy under RCU pathwalking. | ||
122 | * Use ACCESS_ONCE() to make sure we use _one_ particular inode, | ||
123 | * the callers will handle races. | ||
122 | */ | 124 | */ |
123 | static int | 125 | static int |
124 | ncp_hash_dentry(const struct dentry *dentry, const struct inode *inode, | 126 | ncp_hash_dentry(const struct dentry *dentry, struct qstr *this) |
125 | struct qstr *this) | ||
126 | { | 127 | { |
128 | struct inode *inode = ACCESS_ONCE(dentry->d_inode); | ||
129 | |||
130 | if (!inode) | ||
131 | return 0; | ||
132 | |||
127 | if (!ncp_case_sensitive(inode)) { | 133 | if (!ncp_case_sensitive(inode)) { |
128 | struct super_block *sb = dentry->d_sb; | 134 | struct super_block *sb = dentry->d_sb; |
129 | struct nls_table *t; | 135 | struct nls_table *t; |
@@ -140,14 +146,24 @@ ncp_hash_dentry(const struct dentry *dentry, const struct inode *inode, | |||
140 | return 0; | 146 | return 0; |
141 | } | 147 | } |
142 | 148 | ||
149 | /* | ||
150 | * Accessing the parent inode can be racy under RCU pathwalking. | ||
151 | * Use ACCESS_ONCE() to make sure we use _one_ particular inode, | ||
152 | * the callers will handle races. | ||
153 | */ | ||
143 | static int | 154 | static int |
144 | ncp_compare_dentry(const struct dentry *parent, const struct inode *pinode, | 155 | ncp_compare_dentry(const struct dentry *parent, const struct dentry *dentry, |
145 | const struct dentry *dentry, const struct inode *inode, | ||
146 | unsigned int len, const char *str, const struct qstr *name) | 156 | unsigned int len, const char *str, const struct qstr *name) |
147 | { | 157 | { |
158 | struct inode *pinode; | ||
159 | |||
148 | if (len != name->len) | 160 | if (len != name->len) |
149 | return 1; | 161 | return 1; |
150 | 162 | ||
163 | pinode = ACCESS_ONCE(parent->d_inode); | ||
164 | if (!pinode) | ||
165 | return 1; | ||
166 | |||
151 | if (ncp_case_sensitive(pinode)) | 167 | if (ncp_case_sensitive(pinode)) |
152 | return strncmp(str, name->name, len); | 168 | return strncmp(str, name->name, len); |
153 | 169 | ||
@@ -424,9 +440,9 @@ static time_t ncp_obtain_mtime(struct dentry *dentry) | |||
424 | return ncp_date_dos2unix(i.modifyTime, i.modifyDate); | 440 | return ncp_date_dos2unix(i.modifyTime, i.modifyDate); |
425 | } | 441 | } |
426 | 442 | ||
427 | static int ncp_readdir(struct file *filp, void *dirent, filldir_t filldir) | 443 | static int ncp_readdir(struct file *file, struct dir_context *ctx) |
428 | { | 444 | { |
429 | struct dentry *dentry = filp->f_path.dentry; | 445 | struct dentry *dentry = file->f_path.dentry; |
430 | struct inode *inode = dentry->d_inode; | 446 | struct inode *inode = dentry->d_inode; |
431 | struct page *page = NULL; | 447 | struct page *page = NULL; |
432 | struct ncp_server *server = NCP_SERVER(inode); | 448 | struct ncp_server *server = NCP_SERVER(inode); |
@@ -440,7 +456,7 @@ static int ncp_readdir(struct file *filp, void *dirent, filldir_t filldir) | |||
440 | 456 | ||
441 | DDPRINTK("ncp_readdir: reading %s/%s, pos=%d\n", | 457 | DDPRINTK("ncp_readdir: reading %s/%s, pos=%d\n", |
442 | dentry->d_parent->d_name.name, dentry->d_name.name, | 458 | dentry->d_parent->d_name.name, dentry->d_name.name, |
443 | (int) filp->f_pos); | 459 | (int) ctx->pos); |
444 | 460 | ||
445 | result = -EIO; | 461 | result = -EIO; |
446 | /* Do not generate '.' and '..' when server is dead. */ | 462 | /* Do not generate '.' and '..' when server is dead. */ |
@@ -448,16 +464,8 @@ static int ncp_readdir(struct file *filp, void *dirent, filldir_t filldir) | |||
448 | goto out; | 464 | goto out; |
449 | 465 | ||
450 | result = 0; | 466 | result = 0; |
451 | if (filp->f_pos == 0) { | 467 | if (!dir_emit_dots(file, ctx)) |
452 | if (filldir(dirent, ".", 1, 0, inode->i_ino, DT_DIR)) | 468 | goto out; |
453 | goto out; | ||
454 | filp->f_pos = 1; | ||
455 | } | ||
456 | if (filp->f_pos == 1) { | ||
457 | if (filldir(dirent, "..", 2, 1, parent_ino(dentry), DT_DIR)) | ||
458 | goto out; | ||
459 | filp->f_pos = 2; | ||
460 | } | ||
461 | 469 | ||
462 | page = grab_cache_page(&inode->i_data, 0); | 470 | page = grab_cache_page(&inode->i_data, 0); |
463 | if (!page) | 471 | if (!page) |
@@ -469,7 +477,7 @@ static int ncp_readdir(struct file *filp, void *dirent, filldir_t filldir) | |||
469 | if (!PageUptodate(page) || !ctl.head.eof) | 477 | if (!PageUptodate(page) || !ctl.head.eof) |
470 | goto init_cache; | 478 | goto init_cache; |
471 | 479 | ||
472 | if (filp->f_pos == 2) { | 480 | if (ctx->pos == 2) { |
473 | if (jiffies - ctl.head.time >= NCP_MAX_AGE(server)) | 481 | if (jiffies - ctl.head.time >= NCP_MAX_AGE(server)) |
474 | goto init_cache; | 482 | goto init_cache; |
475 | 483 | ||
@@ -479,10 +487,10 @@ static int ncp_readdir(struct file *filp, void *dirent, filldir_t filldir) | |||
479 | goto init_cache; | 487 | goto init_cache; |
480 | } | 488 | } |
481 | 489 | ||
482 | if (filp->f_pos > ctl.head.end) | 490 | if (ctx->pos > ctl.head.end) |
483 | goto finished; | 491 | goto finished; |
484 | 492 | ||
485 | ctl.fpos = filp->f_pos + (NCP_DIRCACHE_START - 2); | 493 | ctl.fpos = ctx->pos + (NCP_DIRCACHE_START - 2); |
486 | ctl.ofs = ctl.fpos / NCP_DIRCACHE_SIZE; | 494 | ctl.ofs = ctl.fpos / NCP_DIRCACHE_SIZE; |
487 | ctl.idx = ctl.fpos % NCP_DIRCACHE_SIZE; | 495 | ctl.idx = ctl.fpos % NCP_DIRCACHE_SIZE; |
488 | 496 | ||
@@ -497,21 +505,21 @@ static int ncp_readdir(struct file *filp, void *dirent, filldir_t filldir) | |||
497 | } | 505 | } |
498 | while (ctl.idx < NCP_DIRCACHE_SIZE) { | 506 | while (ctl.idx < NCP_DIRCACHE_SIZE) { |
499 | struct dentry *dent; | 507 | struct dentry *dent; |
500 | int res; | 508 | bool over; |
501 | 509 | ||
502 | dent = ncp_dget_fpos(ctl.cache->dentry[ctl.idx], | 510 | dent = ncp_dget_fpos(ctl.cache->dentry[ctl.idx], |
503 | dentry, filp->f_pos); | 511 | dentry, ctx->pos); |
504 | if (!dent) | 512 | if (!dent) |
505 | goto invalid_cache; | 513 | goto invalid_cache; |
506 | res = filldir(dirent, dent->d_name.name, | 514 | over = !dir_emit(ctx, dent->d_name.name, |
507 | dent->d_name.len, filp->f_pos, | 515 | dent->d_name.len, |
508 | dent->d_inode->i_ino, DT_UNKNOWN); | 516 | dent->d_inode->i_ino, DT_UNKNOWN); |
509 | dput(dent); | 517 | dput(dent); |
510 | if (res) | 518 | if (over) |
511 | goto finished; | 519 | goto finished; |
512 | filp->f_pos += 1; | 520 | ctx->pos += 1; |
513 | ctl.idx += 1; | 521 | ctl.idx += 1; |
514 | if (filp->f_pos > ctl.head.end) | 522 | if (ctx->pos > ctl.head.end) |
515 | goto finished; | 523 | goto finished; |
516 | } | 524 | } |
517 | if (ctl.page) { | 525 | if (ctl.page) { |
@@ -548,9 +556,9 @@ init_cache: | |||
548 | ctl.valid = 1; | 556 | ctl.valid = 1; |
549 | read_really: | 557 | read_really: |
550 | if (ncp_is_server_root(inode)) { | 558 | if (ncp_is_server_root(inode)) { |
551 | ncp_read_volume_list(filp, dirent, filldir, &ctl); | 559 | ncp_read_volume_list(file, ctx, &ctl); |
552 | } else { | 560 | } else { |
553 | ncp_do_readdir(filp, dirent, filldir, &ctl); | 561 | ncp_do_readdir(file, ctx, &ctl); |
554 | } | 562 | } |
555 | ctl.head.end = ctl.fpos - 1; | 563 | ctl.head.end = ctl.fpos - 1; |
556 | ctl.head.eof = ctl.valid; | 564 | ctl.head.eof = ctl.valid; |
@@ -573,11 +581,11 @@ out: | |||
573 | } | 581 | } |
574 | 582 | ||
575 | static int | 583 | static int |
576 | ncp_fill_cache(struct file *filp, void *dirent, filldir_t filldir, | 584 | ncp_fill_cache(struct file *file, struct dir_context *ctx, |
577 | struct ncp_cache_control *ctrl, struct ncp_entry_info *entry, | 585 | struct ncp_cache_control *ctrl, struct ncp_entry_info *entry, |
578 | int inval_childs) | 586 | int inval_childs) |
579 | { | 587 | { |
580 | struct dentry *newdent, *dentry = filp->f_path.dentry; | 588 | struct dentry *newdent, *dentry = file->f_path.dentry; |
581 | struct inode *dir = dentry->d_inode; | 589 | struct inode *dir = dentry->d_inode; |
582 | struct ncp_cache_control ctl = *ctrl; | 590 | struct ncp_cache_control ctl = *ctrl; |
583 | struct qstr qname; | 591 | struct qstr qname; |
@@ -666,15 +674,13 @@ ncp_fill_cache(struct file *filp, void *dirent, filldir_t filldir, | |||
666 | end_advance: | 674 | end_advance: |
667 | if (!valid) | 675 | if (!valid) |
668 | ctl.valid = 0; | 676 | ctl.valid = 0; |
669 | if (!ctl.filled && (ctl.fpos == filp->f_pos)) { | 677 | if (!ctl.filled && (ctl.fpos == ctx->pos)) { |
670 | if (!ino) | ||
671 | ino = find_inode_number(dentry, &qname); | ||
672 | if (!ino) | 678 | if (!ino) |
673 | ino = iunique(dir->i_sb, 2); | 679 | ino = iunique(dir->i_sb, 2); |
674 | ctl.filled = filldir(dirent, qname.name, qname.len, | 680 | ctl.filled = !dir_emit(ctx, qname.name, qname.len, |
675 | filp->f_pos, ino, DT_UNKNOWN); | 681 | ino, DT_UNKNOWN); |
676 | if (!ctl.filled) | 682 | if (!ctl.filled) |
677 | filp->f_pos += 1; | 683 | ctx->pos += 1; |
678 | } | 684 | } |
679 | ctl.fpos += 1; | 685 | ctl.fpos += 1; |
680 | ctl.idx += 1; | 686 | ctl.idx += 1; |
@@ -683,10 +689,10 @@ end_advance: | |||
683 | } | 689 | } |
684 | 690 | ||
685 | static void | 691 | static void |
686 | ncp_read_volume_list(struct file *filp, void *dirent, filldir_t filldir, | 692 | ncp_read_volume_list(struct file *file, struct dir_context *ctx, |
687 | struct ncp_cache_control *ctl) | 693 | struct ncp_cache_control *ctl) |
688 | { | 694 | { |
689 | struct dentry *dentry = filp->f_path.dentry; | 695 | struct dentry *dentry = file->f_path.dentry; |
690 | struct inode *inode = dentry->d_inode; | 696 | struct inode *inode = dentry->d_inode; |
691 | struct ncp_server *server = NCP_SERVER(inode); | 697 | struct ncp_server *server = NCP_SERVER(inode); |
692 | struct ncp_volume_info info; | 698 | struct ncp_volume_info info; |
@@ -694,7 +700,7 @@ ncp_read_volume_list(struct file *filp, void *dirent, filldir_t filldir, | |||
694 | int i; | 700 | int i; |
695 | 701 | ||
696 | DPRINTK("ncp_read_volume_list: pos=%ld\n", | 702 | DPRINTK("ncp_read_volume_list: pos=%ld\n", |
697 | (unsigned long) filp->f_pos); | 703 | (unsigned long) ctx->pos); |
698 | 704 | ||
699 | for (i = 0; i < NCP_NUMBER_OF_VOLUMES; i++) { | 705 | for (i = 0; i < NCP_NUMBER_OF_VOLUMES; i++) { |
700 | int inval_dentry; | 706 | int inval_dentry; |
@@ -715,16 +721,16 @@ ncp_read_volume_list(struct file *filp, void *dirent, filldir_t filldir, | |||
715 | } | 721 | } |
716 | inval_dentry = ncp_update_known_namespace(server, entry.i.volNumber, NULL); | 722 | inval_dentry = ncp_update_known_namespace(server, entry.i.volNumber, NULL); |
717 | entry.volume = entry.i.volNumber; | 723 | entry.volume = entry.i.volNumber; |
718 | if (!ncp_fill_cache(filp, dirent, filldir, ctl, &entry, inval_dentry)) | 724 | if (!ncp_fill_cache(file, ctx, ctl, &entry, inval_dentry)) |
719 | return; | 725 | return; |
720 | } | 726 | } |
721 | } | 727 | } |
722 | 728 | ||
723 | static void | 729 | static void |
724 | ncp_do_readdir(struct file *filp, void *dirent, filldir_t filldir, | 730 | ncp_do_readdir(struct file *file, struct dir_context *ctx, |
725 | struct ncp_cache_control *ctl) | 731 | struct ncp_cache_control *ctl) |
726 | { | 732 | { |
727 | struct dentry *dentry = filp->f_path.dentry; | 733 | struct dentry *dentry = file->f_path.dentry; |
728 | struct inode *dir = dentry->d_inode; | 734 | struct inode *dir = dentry->d_inode; |
729 | struct ncp_server *server = NCP_SERVER(dir); | 735 | struct ncp_server *server = NCP_SERVER(dir); |
730 | struct nw_search_sequence seq; | 736 | struct nw_search_sequence seq; |
@@ -736,7 +742,7 @@ ncp_do_readdir(struct file *filp, void *dirent, filldir_t filldir, | |||
736 | 742 | ||
737 | DPRINTK("ncp_do_readdir: %s/%s, fpos=%ld\n", | 743 | DPRINTK("ncp_do_readdir: %s/%s, fpos=%ld\n", |
738 | dentry->d_parent->d_name.name, dentry->d_name.name, | 744 | dentry->d_parent->d_name.name, dentry->d_name.name, |
739 | (unsigned long) filp->f_pos); | 745 | (unsigned long) ctx->pos); |
740 | PPRINTK("ncp_do_readdir: init %s, volnum=%d, dirent=%u\n", | 746 | PPRINTK("ncp_do_readdir: init %s, volnum=%d, dirent=%u\n", |
741 | dentry->d_name.name, NCP_FINFO(dir)->volNumber, | 747 | dentry->d_name.name, NCP_FINFO(dir)->volNumber, |
742 | NCP_FINFO(dir)->dirEntNum); | 748 | NCP_FINFO(dir)->dirEntNum); |
@@ -778,7 +784,7 @@ ncp_do_readdir(struct file *filp, void *dirent, filldir_t filldir, | |||
778 | rpl += onerpl; | 784 | rpl += onerpl; |
779 | rpls -= onerpl; | 785 | rpls -= onerpl; |
780 | entry.volume = entry.i.volNumber; | 786 | entry.volume = entry.i.volNumber; |
781 | if (!ncp_fill_cache(filp, dirent, filldir, ctl, &entry, 0)) | 787 | if (!ncp_fill_cache(file, ctx, ctl, &entry, 0)) |
782 | break; | 788 | break; |
783 | } | 789 | } |
784 | } while (more); | 790 | } while (more); |
@@ -1131,17 +1137,6 @@ static int ncp_rename(struct inode *old_dir, struct dentry *old_dentry, | |||
1131 | old_dentry->d_parent->d_name.name, old_dentry->d_name.name, | 1137 | old_dentry->d_parent->d_name.name, old_dentry->d_name.name, |
1132 | new_dentry->d_parent->d_name.name, new_dentry->d_name.name); | 1138 | new_dentry->d_parent->d_name.name, new_dentry->d_name.name); |
1133 | 1139 | ||
1134 | if (new_dentry->d_inode && S_ISDIR(new_dentry->d_inode->i_mode)) { | ||
1135 | /* | ||
1136 | * fail with EBUSY if there are still references to this | ||
1137 | * directory. | ||
1138 | */ | ||
1139 | dentry_unhash(new_dentry); | ||
1140 | error = -EBUSY; | ||
1141 | if (!d_unhashed(new_dentry)) | ||
1142 | goto out; | ||
1143 | } | ||
1144 | |||
1145 | ncp_age_dentry(server, old_dentry); | 1140 | ncp_age_dentry(server, old_dentry); |
1146 | ncp_age_dentry(server, new_dentry); | 1141 | ncp_age_dentry(server, new_dentry); |
1147 | 1142 | ||
diff --git a/fs/ncpfs/inode.c b/fs/ncpfs/inode.c index 26910c8154da..4659da67e7f6 100644 --- a/fs/ncpfs/inode.c +++ b/fs/ncpfs/inode.c | |||
@@ -403,18 +403,24 @@ static int ncp_parse_options(struct ncp_mount_data_kernel *data, char *options) | |||
403 | switch (optval) { | 403 | switch (optval) { |
404 | case 'u': | 404 | case 'u': |
405 | data->uid = make_kuid(current_user_ns(), optint); | 405 | data->uid = make_kuid(current_user_ns(), optint); |
406 | if (!uid_valid(data->uid)) | 406 | if (!uid_valid(data->uid)) { |
407 | ret = -EINVAL; | ||
407 | goto err; | 408 | goto err; |
409 | } | ||
408 | break; | 410 | break; |
409 | case 'g': | 411 | case 'g': |
410 | data->gid = make_kgid(current_user_ns(), optint); | 412 | data->gid = make_kgid(current_user_ns(), optint); |
411 | if (!gid_valid(data->gid)) | 413 | if (!gid_valid(data->gid)) { |
414 | ret = -EINVAL; | ||
412 | goto err; | 415 | goto err; |
416 | } | ||
413 | break; | 417 | break; |
414 | case 'o': | 418 | case 'o': |
415 | data->mounted_uid = make_kuid(current_user_ns(), optint); | 419 | data->mounted_uid = make_kuid(current_user_ns(), optint); |
416 | if (!uid_valid(data->mounted_uid)) | 420 | if (!uid_valid(data->mounted_uid)) { |
421 | ret = -EINVAL; | ||
417 | goto err; | 422 | goto err; |
423 | } | ||
418 | break; | 424 | break; |
419 | case 'm': | 425 | case 'm': |
420 | data->file_mode = optint; | 426 | data->file_mode = optint; |
@@ -891,6 +897,10 @@ int ncp_notify_change(struct dentry *dentry, struct iattr *attr) | |||
891 | if (!server) /* How this could happen? */ | 897 | if (!server) /* How this could happen? */ |
892 | goto out; | 898 | goto out; |
893 | 899 | ||
900 | result = -EPERM; | ||
901 | if (IS_DEADDIR(dentry->d_inode)) | ||
902 | goto out; | ||
903 | |||
894 | /* ageing the dentry to force validation */ | 904 | /* ageing the dentry to force validation */ |
895 | ncp_age_dentry(server, dentry); | 905 | ncp_age_dentry(server, dentry); |
896 | 906 | ||
diff --git a/fs/ncpfs/mmap.c b/fs/ncpfs/mmap.c index ee24df5af1f9..3c5dd55d284c 100644 --- a/fs/ncpfs/mmap.c +++ b/fs/ncpfs/mmap.c | |||
@@ -117,7 +117,7 @@ int ncp_mmap(struct file *file, struct vm_area_struct *vma) | |||
117 | return -EINVAL; | 117 | return -EINVAL; |
118 | /* we do not support files bigger than 4GB... We eventually | 118 | /* we do not support files bigger than 4GB... We eventually |
119 | supports just 4GB... */ | 119 | supports just 4GB... */ |
120 | if (((vma->vm_end - vma->vm_start) >> PAGE_SHIFT) + vma->vm_pgoff | 120 | if (vma_pages(vma) + vma->vm_pgoff |
121 | > (1U << (32 - PAGE_SHIFT))) | 121 | > (1U << (32 - PAGE_SHIFT))) |
122 | return -EFBIG; | 122 | return -EFBIG; |
123 | 123 | ||
diff --git a/fs/nfs/Kconfig b/fs/nfs/Kconfig index 13ca196385f5..b5e80b0af315 100644 --- a/fs/nfs/Kconfig +++ b/fs/nfs/Kconfig | |||
@@ -104,6 +104,15 @@ config NFS_V4_1 | |||
104 | 104 | ||
105 | If unsure, say N. | 105 | If unsure, say N. |
106 | 106 | ||
107 | config NFS_V4_2 | ||
108 | bool "NFS client support for NFSv4.2" | ||
109 | depends on NFS_V4_1 | ||
110 | help | ||
111 | This option enables support for minor version 2 of the NFSv4 protocol | ||
112 | in the kernel's NFS client. | ||
113 | |||
114 | If unsure, say N. | ||
115 | |||
107 | config PNFS_FILE_LAYOUT | 116 | config PNFS_FILE_LAYOUT |
108 | tristate | 117 | tristate |
109 | depends on NFS_V4_1 | 118 | depends on NFS_V4_1 |
@@ -131,6 +140,11 @@ config NFS_V4_1_IMPLEMENTATION_ID_DOMAIN | |||
131 | If the NFS client is unchanged from the upstream kernel, this | 140 | If the NFS client is unchanged from the upstream kernel, this |
132 | option should be set to the default "kernel.org". | 141 | option should be set to the default "kernel.org". |
133 | 142 | ||
143 | config NFS_V4_SECURITY_LABEL | ||
144 | bool | ||
145 | depends on NFS_V4_2 && SECURITY | ||
146 | default y | ||
147 | |||
134 | config ROOT_NFS | 148 | config ROOT_NFS |
135 | bool "Root file system on NFS" | 149 | bool "Root file system on NFS" |
136 | depends on NFS_FS=y && IP_PNP | 150 | depends on NFS_FS=y && IP_PNP |
diff --git a/fs/nfs/Makefile b/fs/nfs/Makefile index cce2c057bd2d..e0bb048e9576 100644 --- a/fs/nfs/Makefile +++ b/fs/nfs/Makefile | |||
@@ -6,8 +6,7 @@ obj-$(CONFIG_NFS_FS) += nfs.o | |||
6 | 6 | ||
7 | nfs-y := client.o dir.o file.o getroot.o inode.o super.o \ | 7 | nfs-y := client.o dir.o file.o getroot.o inode.o super.o \ |
8 | direct.o pagelist.o read.o symlink.o unlink.o \ | 8 | direct.o pagelist.o read.o symlink.o unlink.o \ |
9 | write.o namespace.o mount_clnt.o \ | 9 | write.o namespace.o mount_clnt.o |
10 | dns_resolve.o cache_lib.o | ||
11 | nfs-$(CONFIG_ROOT_NFS) += nfsroot.o | 10 | nfs-$(CONFIG_ROOT_NFS) += nfsroot.o |
12 | nfs-$(CONFIG_SYSCTL) += sysctl.o | 11 | nfs-$(CONFIG_SYSCTL) += sysctl.o |
13 | nfs-$(CONFIG_NFS_FSCACHE) += fscache.o fscache-index.o | 12 | nfs-$(CONFIG_NFS_FSCACHE) += fscache.o fscache-index.o |
@@ -22,7 +21,8 @@ nfsv3-$(CONFIG_NFS_V3_ACL) += nfs3acl.o | |||
22 | obj-$(CONFIG_NFS_V4) += nfsv4.o | 21 | obj-$(CONFIG_NFS_V4) += nfsv4.o |
23 | nfsv4-y := nfs4proc.o nfs4xdr.o nfs4state.o nfs4renewd.o nfs4super.o nfs4file.o \ | 22 | nfsv4-y := nfs4proc.o nfs4xdr.o nfs4state.o nfs4renewd.o nfs4super.o nfs4file.o \ |
24 | delegation.o idmap.o callback.o callback_xdr.o callback_proc.o \ | 23 | delegation.o idmap.o callback.o callback_xdr.o callback_proc.o \ |
25 | nfs4namespace.o nfs4getroot.o nfs4client.o | 24 | nfs4namespace.o nfs4getroot.o nfs4client.o dns_resolve.o |
25 | nfsv4-$(CONFIG_NFS_USE_LEGACY_DNS) += cache_lib.o | ||
26 | nfsv4-$(CONFIG_SYSCTL) += nfs4sysctl.o | 26 | nfsv4-$(CONFIG_SYSCTL) += nfs4sysctl.o |
27 | nfsv4-$(CONFIG_NFS_V4_1) += nfs4session.o pnfs.o pnfs_dev.o | 27 | nfsv4-$(CONFIG_NFS_V4_1) += nfs4session.o pnfs.o pnfs_dev.o |
28 | 28 | ||
diff --git a/fs/nfs/blocklayout/blocklayout.c b/fs/nfs/blocklayout/blocklayout.c index 434b93ec0970..e242bbf72972 100644 --- a/fs/nfs/blocklayout/blocklayout.c +++ b/fs/nfs/blocklayout/blocklayout.c | |||
@@ -1089,9 +1089,10 @@ nfs4_blk_get_deviceinfo(struct nfs_server *server, const struct nfs_fh *fh, | |||
1089 | dev->pgbase = 0; | 1089 | dev->pgbase = 0; |
1090 | dev->pglen = PAGE_SIZE * max_pages; | 1090 | dev->pglen = PAGE_SIZE * max_pages; |
1091 | dev->mincount = 0; | 1091 | dev->mincount = 0; |
1092 | dev->maxcount = max_resp_sz - nfs41_maxgetdevinfo_overhead; | ||
1092 | 1093 | ||
1093 | dprintk("%s: dev_id: %s\n", __func__, dev->dev_id.data); | 1094 | dprintk("%s: dev_id: %s\n", __func__, dev->dev_id.data); |
1094 | rc = nfs4_proc_getdeviceinfo(server, dev); | 1095 | rc = nfs4_proc_getdeviceinfo(server, dev, NULL); |
1095 | dprintk("%s getdevice info returns %d\n", __func__, rc); | 1096 | dprintk("%s getdevice info returns %d\n", __func__, rc); |
1096 | if (rc) { | 1097 | if (rc) { |
1097 | rv = ERR_PTR(rc); | 1098 | rv = ERR_PTR(rc); |
diff --git a/fs/nfs/callback.c b/fs/nfs/callback.c index cff089a412c7..67cd73213168 100644 --- a/fs/nfs/callback.c +++ b/fs/nfs/callback.c | |||
@@ -211,7 +211,6 @@ static int nfs_callback_start_svc(int minorversion, struct rpc_xprt *xprt, | |||
211 | struct svc_rqst *rqstp; | 211 | struct svc_rqst *rqstp; |
212 | int (*callback_svc)(void *vrqstp); | 212 | int (*callback_svc)(void *vrqstp); |
213 | struct nfs_callback_data *cb_info = &nfs_callback_info[minorversion]; | 213 | struct nfs_callback_data *cb_info = &nfs_callback_info[minorversion]; |
214 | char svc_name[12]; | ||
215 | int ret; | 214 | int ret; |
216 | 215 | ||
217 | nfs_callback_bc_serv(minorversion, xprt, serv); | 216 | nfs_callback_bc_serv(minorversion, xprt, serv); |
@@ -235,10 +234,10 @@ static int nfs_callback_start_svc(int minorversion, struct rpc_xprt *xprt, | |||
235 | 234 | ||
236 | svc_sock_update_bufs(serv); | 235 | svc_sock_update_bufs(serv); |
237 | 236 | ||
238 | sprintf(svc_name, "nfsv4.%u-svc", minorversion); | ||
239 | cb_info->serv = serv; | 237 | cb_info->serv = serv; |
240 | cb_info->rqst = rqstp; | 238 | cb_info->rqst = rqstp; |
241 | cb_info->task = kthread_run(callback_svc, cb_info->rqst, svc_name); | 239 | cb_info->task = kthread_run(callback_svc, cb_info->rqst, |
240 | "nfsv4.%u-svc", minorversion); | ||
242 | if (IS_ERR(cb_info->task)) { | 241 | if (IS_ERR(cb_info->task)) { |
243 | ret = PTR_ERR(cb_info->task); | 242 | ret = PTR_ERR(cb_info->task); |
244 | svc_exit_thread(cb_info->rqst); | 243 | svc_exit_thread(cb_info->rqst); |
@@ -282,6 +281,7 @@ static int nfs_callback_up_net(int minorversion, struct svc_serv *serv, struct n | |||
282 | ret = nfs4_callback_up_net(serv, net); | 281 | ret = nfs4_callback_up_net(serv, net); |
283 | break; | 282 | break; |
284 | case 1: | 283 | case 1: |
284 | case 2: | ||
285 | ret = nfs41_callback_up_net(serv, net); | 285 | ret = nfs41_callback_up_net(serv, net); |
286 | break; | 286 | break; |
287 | default: | 287 | default: |
diff --git a/fs/nfs/callback.h b/fs/nfs/callback.h index efd54f0a4c46..84326e9fb47a 100644 --- a/fs/nfs/callback.h +++ b/fs/nfs/callback.h | |||
@@ -32,6 +32,8 @@ enum nfs4_callback_opnum { | |||
32 | OP_CB_WANTS_CANCELLED = 12, | 32 | OP_CB_WANTS_CANCELLED = 12, |
33 | OP_CB_NOTIFY_LOCK = 13, | 33 | OP_CB_NOTIFY_LOCK = 13, |
34 | OP_CB_NOTIFY_DEVICEID = 14, | 34 | OP_CB_NOTIFY_DEVICEID = 14, |
35 | /* Callback operations new to NFSv4.2 */ | ||
36 | OP_CB_OFFLOAD = 15, | ||
35 | OP_CB_ILLEGAL = 10044, | 37 | OP_CB_ILLEGAL = 10044, |
36 | }; | 38 | }; |
37 | 39 | ||
@@ -39,6 +41,7 @@ struct cb_process_state { | |||
39 | __be32 drc_status; | 41 | __be32 drc_status; |
40 | struct nfs_client *clp; | 42 | struct nfs_client *clp; |
41 | u32 slotid; | 43 | u32 slotid; |
44 | u32 minorversion; | ||
42 | struct net *net; | 45 | struct net *net; |
43 | }; | 46 | }; |
44 | 47 | ||
diff --git a/fs/nfs/callback_proc.c b/fs/nfs/callback_proc.c index 0bc27684ebfa..e6ebc4c38c81 100644 --- a/fs/nfs/callback_proc.c +++ b/fs/nfs/callback_proc.c | |||
@@ -406,7 +406,8 @@ __be32 nfs4_callback_sequence(struct cb_sequenceargs *args, | |||
406 | int i; | 406 | int i; |
407 | __be32 status = htonl(NFS4ERR_BADSESSION); | 407 | __be32 status = htonl(NFS4ERR_BADSESSION); |
408 | 408 | ||
409 | clp = nfs4_find_client_sessionid(cps->net, args->csa_addr, &args->csa_sessionid); | 409 | clp = nfs4_find_client_sessionid(cps->net, args->csa_addr, |
410 | &args->csa_sessionid, cps->minorversion); | ||
410 | if (clp == NULL) | 411 | if (clp == NULL) |
411 | goto out; | 412 | goto out; |
412 | 413 | ||
diff --git a/fs/nfs/callback_xdr.c b/fs/nfs/callback_xdr.c index a35582c9d444..f4ccfe6521ec 100644 --- a/fs/nfs/callback_xdr.c +++ b/fs/nfs/callback_xdr.c | |||
@@ -166,9 +166,9 @@ static __be32 decode_compound_hdr_arg(struct xdr_stream *xdr, struct cb_compound | |||
166 | if (unlikely(p == NULL)) | 166 | if (unlikely(p == NULL)) |
167 | return htonl(NFS4ERR_RESOURCE); | 167 | return htonl(NFS4ERR_RESOURCE); |
168 | hdr->minorversion = ntohl(*p++); | 168 | hdr->minorversion = ntohl(*p++); |
169 | /* Check minor version is zero or one. */ | 169 | /* Check for minor version support */ |
170 | if (hdr->minorversion <= 1) { | 170 | if (hdr->minorversion <= NFS4_MAX_MINOR_VERSION) { |
171 | hdr->cb_ident = ntohl(*p++); /* ignored by v4.1 */ | 171 | hdr->cb_ident = ntohl(*p++); /* ignored by v4.1 and v4.2 */ |
172 | } else { | 172 | } else { |
173 | pr_warn_ratelimited("NFS: %s: NFSv4 server callback with " | 173 | pr_warn_ratelimited("NFS: %s: NFSv4 server callback with " |
174 | "illegal minor version %u!\n", | 174 | "illegal minor version %u!\n", |
@@ -786,6 +786,26 @@ static void nfs4_cb_free_slot(struct cb_process_state *cps) | |||
786 | } | 786 | } |
787 | #endif /* CONFIG_NFS_V4_1 */ | 787 | #endif /* CONFIG_NFS_V4_1 */ |
788 | 788 | ||
789 | #ifdef CONFIG_NFS_V4_2 | ||
790 | static __be32 | ||
791 | preprocess_nfs42_op(int nop, unsigned int op_nr, struct callback_op **op) | ||
792 | { | ||
793 | __be32 status = preprocess_nfs41_op(nop, op_nr, op); | ||
794 | if (status != htonl(NFS4ERR_OP_ILLEGAL)) | ||
795 | return status; | ||
796 | |||
797 | if (op_nr == OP_CB_OFFLOAD) | ||
798 | return htonl(NFS4ERR_NOTSUPP); | ||
799 | return htonl(NFS4ERR_OP_ILLEGAL); | ||
800 | } | ||
801 | #else /* CONFIG_NFS_V4_2 */ | ||
802 | static __be32 | ||
803 | preprocess_nfs42_op(int nop, unsigned int op_nr, struct callback_op **op) | ||
804 | { | ||
805 | return htonl(NFS4ERR_MINOR_VERS_MISMATCH); | ||
806 | } | ||
807 | #endif /* CONFIG_NFS_V4_2 */ | ||
808 | |||
789 | static __be32 | 809 | static __be32 |
790 | preprocess_nfs4_op(unsigned int op_nr, struct callback_op **op) | 810 | preprocess_nfs4_op(unsigned int op_nr, struct callback_op **op) |
791 | { | 811 | { |
@@ -801,8 +821,7 @@ preprocess_nfs4_op(unsigned int op_nr, struct callback_op **op) | |||
801 | return htonl(NFS_OK); | 821 | return htonl(NFS_OK); |
802 | } | 822 | } |
803 | 823 | ||
804 | static __be32 process_op(uint32_t minorversion, int nop, | 824 | static __be32 process_op(int nop, struct svc_rqst *rqstp, |
805 | struct svc_rqst *rqstp, | ||
806 | struct xdr_stream *xdr_in, void *argp, | 825 | struct xdr_stream *xdr_in, void *argp, |
807 | struct xdr_stream *xdr_out, void *resp, | 826 | struct xdr_stream *xdr_out, void *resp, |
808 | struct cb_process_state *cps) | 827 | struct cb_process_state *cps) |
@@ -819,10 +838,22 @@ static __be32 process_op(uint32_t minorversion, int nop, | |||
819 | return status; | 838 | return status; |
820 | 839 | ||
821 | dprintk("%s: minorversion=%d nop=%d op_nr=%u\n", | 840 | dprintk("%s: minorversion=%d nop=%d op_nr=%u\n", |
822 | __func__, minorversion, nop, op_nr); | 841 | __func__, cps->minorversion, nop, op_nr); |
842 | |||
843 | switch (cps->minorversion) { | ||
844 | case 0: | ||
845 | status = preprocess_nfs4_op(op_nr, &op); | ||
846 | break; | ||
847 | case 1: | ||
848 | status = preprocess_nfs41_op(nop, op_nr, &op); | ||
849 | break; | ||
850 | case 2: | ||
851 | status = preprocess_nfs42_op(nop, op_nr, &op); | ||
852 | break; | ||
853 | default: | ||
854 | status = htonl(NFS4ERR_MINOR_VERS_MISMATCH); | ||
855 | } | ||
823 | 856 | ||
824 | status = minorversion ? preprocess_nfs41_op(nop, op_nr, &op) : | ||
825 | preprocess_nfs4_op(op_nr, &op); | ||
826 | if (status == htonl(NFS4ERR_OP_ILLEGAL)) | 857 | if (status == htonl(NFS4ERR_OP_ILLEGAL)) |
827 | op_nr = OP_CB_ILLEGAL; | 858 | op_nr = OP_CB_ILLEGAL; |
828 | if (status) | 859 | if (status) |
@@ -885,14 +916,15 @@ static __be32 nfs4_callback_compound(struct svc_rqst *rqstp, void *argp, void *r | |||
885 | return rpc_drop_reply; | 916 | return rpc_drop_reply; |
886 | } | 917 | } |
887 | 918 | ||
919 | cps.minorversion = hdr_arg.minorversion; | ||
888 | hdr_res.taglen = hdr_arg.taglen; | 920 | hdr_res.taglen = hdr_arg.taglen; |
889 | hdr_res.tag = hdr_arg.tag; | 921 | hdr_res.tag = hdr_arg.tag; |
890 | if (encode_compound_hdr_res(&xdr_out, &hdr_res) != 0) | 922 | if (encode_compound_hdr_res(&xdr_out, &hdr_res) != 0) |
891 | return rpc_system_err; | 923 | return rpc_system_err; |
892 | 924 | ||
893 | while (status == 0 && nops != hdr_arg.nops) { | 925 | while (status == 0 && nops != hdr_arg.nops) { |
894 | status = process_op(hdr_arg.minorversion, nops, rqstp, | 926 | status = process_op(nops, rqstp, &xdr_in, |
895 | &xdr_in, argp, &xdr_out, resp, &cps); | 927 | argp, &xdr_out, resp, &cps); |
896 | nops++; | 928 | nops++; |
897 | } | 929 | } |
898 | 930 | ||
diff --git a/fs/nfs/client.c b/fs/nfs/client.c index c513b0cc835f..340b1eff0267 100644 --- a/fs/nfs/client.c +++ b/fs/nfs/client.c | |||
@@ -753,8 +753,6 @@ static int nfs_init_server(struct nfs_server *server, | |||
753 | data->timeo, data->retrans); | 753 | data->timeo, data->retrans); |
754 | if (data->flags & NFS_MOUNT_NORESVPORT) | 754 | if (data->flags & NFS_MOUNT_NORESVPORT) |
755 | set_bit(NFS_CS_NORESVPORT, &cl_init.init_flags); | 755 | set_bit(NFS_CS_NORESVPORT, &cl_init.init_flags); |
756 | if (server->options & NFS_OPTION_MIGRATION) | ||
757 | set_bit(NFS_CS_MIGRATION, &cl_init.init_flags); | ||
758 | 756 | ||
759 | /* Allocate or find a client reference we can use */ | 757 | /* Allocate or find a client reference we can use */ |
760 | clp = nfs_get_client(&cl_init, &timeparms, NULL, RPC_AUTH_UNIX); | 758 | clp = nfs_get_client(&cl_init, &timeparms, NULL, RPC_AUTH_UNIX); |
@@ -1076,7 +1074,7 @@ struct nfs_server *nfs_create_server(struct nfs_mount_info *mount_info, | |||
1076 | } | 1074 | } |
1077 | 1075 | ||
1078 | if (!(fattr->valid & NFS_ATTR_FATTR)) { | 1076 | if (!(fattr->valid & NFS_ATTR_FATTR)) { |
1079 | error = nfs_mod->rpc_ops->getattr(server, mount_info->mntfh, fattr); | 1077 | error = nfs_mod->rpc_ops->getattr(server, mount_info->mntfh, fattr, NULL); |
1080 | if (error < 0) { | 1078 | if (error < 0) { |
1081 | dprintk("nfs_create_server: getattr error = %d\n", -error); | 1079 | dprintk("nfs_create_server: getattr error = %d\n", -error); |
1082 | goto error; | 1080 | goto error; |
diff --git a/fs/nfs/delegation.c b/fs/nfs/delegation.c index 57db3244f4d9..7ec4814e298d 100644 --- a/fs/nfs/delegation.c +++ b/fs/nfs/delegation.c | |||
@@ -73,20 +73,20 @@ static int nfs_delegation_claim_locks(struct nfs_open_context *ctx, struct nfs4_ | |||
73 | if (inode->i_flock == NULL) | 73 | if (inode->i_flock == NULL) |
74 | goto out; | 74 | goto out; |
75 | 75 | ||
76 | /* Protect inode->i_flock using the file locks lock */ | 76 | /* Protect inode->i_flock using the i_lock */ |
77 | lock_flocks(); | 77 | spin_lock(&inode->i_lock); |
78 | for (fl = inode->i_flock; fl != NULL; fl = fl->fl_next) { | 78 | for (fl = inode->i_flock; fl != NULL; fl = fl->fl_next) { |
79 | if (!(fl->fl_flags & (FL_POSIX|FL_FLOCK))) | 79 | if (!(fl->fl_flags & (FL_POSIX|FL_FLOCK))) |
80 | continue; | 80 | continue; |
81 | if (nfs_file_open_context(fl->fl_file) != ctx) | 81 | if (nfs_file_open_context(fl->fl_file) != ctx) |
82 | continue; | 82 | continue; |
83 | unlock_flocks(); | 83 | spin_unlock(&inode->i_lock); |
84 | status = nfs4_lock_delegation_recall(fl, state, stateid); | 84 | status = nfs4_lock_delegation_recall(fl, state, stateid); |
85 | if (status < 0) | 85 | if (status < 0) |
86 | goto out; | 86 | goto out; |
87 | lock_flocks(); | 87 | spin_lock(&inode->i_lock); |
88 | } | 88 | } |
89 | unlock_flocks(); | 89 | spin_unlock(&inode->i_lock); |
90 | out: | 90 | out: |
91 | return status; | 91 | return status; |
92 | } | 92 | } |
diff --git a/fs/nfs/dir.c b/fs/nfs/dir.c index e093e73178b7..e474ca2b2bfe 100644 --- a/fs/nfs/dir.c +++ b/fs/nfs/dir.c | |||
@@ -33,6 +33,7 @@ | |||
33 | #include <linux/pagevec.h> | 33 | #include <linux/pagevec.h> |
34 | #include <linux/namei.h> | 34 | #include <linux/namei.h> |
35 | #include <linux/mount.h> | 35 | #include <linux/mount.h> |
36 | #include <linux/swap.h> | ||
36 | #include <linux/sched.h> | 37 | #include <linux/sched.h> |
37 | #include <linux/kmemleak.h> | 38 | #include <linux/kmemleak.h> |
38 | #include <linux/xattr.h> | 39 | #include <linux/xattr.h> |
@@ -46,7 +47,7 @@ | |||
46 | 47 | ||
47 | static int nfs_opendir(struct inode *, struct file *); | 48 | static int nfs_opendir(struct inode *, struct file *); |
48 | static int nfs_closedir(struct inode *, struct file *); | 49 | static int nfs_closedir(struct inode *, struct file *); |
49 | static int nfs_readdir(struct file *, void *, filldir_t); | 50 | static int nfs_readdir(struct file *, struct dir_context *); |
50 | static int nfs_fsync_dir(struct file *, loff_t, loff_t, int); | 51 | static int nfs_fsync_dir(struct file *, loff_t, loff_t, int); |
51 | static loff_t nfs_llseek_dir(struct file *, loff_t, int); | 52 | static loff_t nfs_llseek_dir(struct file *, loff_t, int); |
52 | static void nfs_readdir_clear_array(struct page*); | 53 | static void nfs_readdir_clear_array(struct page*); |
@@ -54,7 +55,7 @@ static void nfs_readdir_clear_array(struct page*); | |||
54 | const struct file_operations nfs_dir_operations = { | 55 | const struct file_operations nfs_dir_operations = { |
55 | .llseek = nfs_llseek_dir, | 56 | .llseek = nfs_llseek_dir, |
56 | .read = generic_read_dir, | 57 | .read = generic_read_dir, |
57 | .readdir = nfs_readdir, | 58 | .iterate = nfs_readdir, |
58 | .open = nfs_opendir, | 59 | .open = nfs_opendir, |
59 | .release = nfs_closedir, | 60 | .release = nfs_closedir, |
60 | .fsync = nfs_fsync_dir, | 61 | .fsync = nfs_fsync_dir, |
@@ -147,6 +148,7 @@ typedef int (*decode_dirent_t)(struct xdr_stream *, struct nfs_entry *, int); | |||
147 | typedef struct { | 148 | typedef struct { |
148 | struct file *file; | 149 | struct file *file; |
149 | struct page *page; | 150 | struct page *page; |
151 | struct dir_context *ctx; | ||
150 | unsigned long page_index; | 152 | unsigned long page_index; |
151 | u64 *dir_cookie; | 153 | u64 *dir_cookie; |
152 | u64 last_cookie; | 154 | u64 last_cookie; |
@@ -252,7 +254,7 @@ out: | |||
252 | static | 254 | static |
253 | int nfs_readdir_search_for_pos(struct nfs_cache_array *array, nfs_readdir_descriptor_t *desc) | 255 | int nfs_readdir_search_for_pos(struct nfs_cache_array *array, nfs_readdir_descriptor_t *desc) |
254 | { | 256 | { |
255 | loff_t diff = desc->file->f_pos - desc->current_index; | 257 | loff_t diff = desc->ctx->pos - desc->current_index; |
256 | unsigned int index; | 258 | unsigned int index; |
257 | 259 | ||
258 | if (diff < 0) | 260 | if (diff < 0) |
@@ -289,7 +291,7 @@ int nfs_readdir_search_for_cookie(struct nfs_cache_array *array, nfs_readdir_des | |||
289 | || (nfsi->cache_validity & (NFS_INO_INVALID_ATTR|NFS_INO_INVALID_DATA))) { | 291 | || (nfsi->cache_validity & (NFS_INO_INVALID_ATTR|NFS_INO_INVALID_DATA))) { |
290 | ctx->duped = 0; | 292 | ctx->duped = 0; |
291 | ctx->attr_gencount = nfsi->attr_gencount; | 293 | ctx->attr_gencount = nfsi->attr_gencount; |
292 | } else if (new_pos < desc->file->f_pos) { | 294 | } else if (new_pos < desc->ctx->pos) { |
293 | if (ctx->duped > 0 | 295 | if (ctx->duped > 0 |
294 | && ctx->dup_cookie == *desc->dir_cookie) { | 296 | && ctx->dup_cookie == *desc->dir_cookie) { |
295 | if (printk_ratelimit()) { | 297 | if (printk_ratelimit()) { |
@@ -307,7 +309,7 @@ int nfs_readdir_search_for_cookie(struct nfs_cache_array *array, nfs_readdir_des | |||
307 | ctx->dup_cookie = *desc->dir_cookie; | 309 | ctx->dup_cookie = *desc->dir_cookie; |
308 | ctx->duped = -1; | 310 | ctx->duped = -1; |
309 | } | 311 | } |
310 | desc->file->f_pos = new_pos; | 312 | desc->ctx->pos = new_pos; |
311 | desc->cache_entry_index = i; | 313 | desc->cache_entry_index = i; |
312 | return 0; | 314 | return 0; |
313 | } | 315 | } |
@@ -405,13 +407,13 @@ different: | |||
405 | } | 407 | } |
406 | 408 | ||
407 | static | 409 | static |
408 | bool nfs_use_readdirplus(struct inode *dir, struct file *filp) | 410 | bool nfs_use_readdirplus(struct inode *dir, struct dir_context *ctx) |
409 | { | 411 | { |
410 | if (!nfs_server_capable(dir, NFS_CAP_READDIRPLUS)) | 412 | if (!nfs_server_capable(dir, NFS_CAP_READDIRPLUS)) |
411 | return false; | 413 | return false; |
412 | if (test_and_clear_bit(NFS_INO_ADVISE_RDPLUS, &NFS_I(dir)->flags)) | 414 | if (test_and_clear_bit(NFS_INO_ADVISE_RDPLUS, &NFS_I(dir)->flags)) |
413 | return true; | 415 | return true; |
414 | if (filp->f_pos == 0) | 416 | if (ctx->pos == 0) |
415 | return true; | 417 | return true; |
416 | return false; | 418 | return false; |
417 | } | 419 | } |
@@ -435,6 +437,7 @@ void nfs_prime_dcache(struct dentry *parent, struct nfs_entry *entry) | |||
435 | struct dentry *alias; | 437 | struct dentry *alias; |
436 | struct inode *dir = parent->d_inode; | 438 | struct inode *dir = parent->d_inode; |
437 | struct inode *inode; | 439 | struct inode *inode; |
440 | int status; | ||
438 | 441 | ||
439 | if (filename.name[0] == '.') { | 442 | if (filename.name[0] == '.') { |
440 | if (filename.len == 1) | 443 | if (filename.len == 1) |
@@ -447,7 +450,10 @@ void nfs_prime_dcache(struct dentry *parent, struct nfs_entry *entry) | |||
447 | dentry = d_lookup(parent, &filename); | 450 | dentry = d_lookup(parent, &filename); |
448 | if (dentry != NULL) { | 451 | if (dentry != NULL) { |
449 | if (nfs_same_file(dentry, entry)) { | 452 | if (nfs_same_file(dentry, entry)) { |
450 | nfs_refresh_inode(dentry->d_inode, entry->fattr); | 453 | nfs_set_verifier(dentry, nfs_save_change_attribute(dir)); |
454 | status = nfs_refresh_inode(dentry->d_inode, entry->fattr); | ||
455 | if (!status) | ||
456 | nfs_setsecurity(dentry->d_inode, entry->fattr, entry->label); | ||
451 | goto out; | 457 | goto out; |
452 | } else { | 458 | } else { |
453 | if (d_invalidate(dentry) != 0) | 459 | if (d_invalidate(dentry) != 0) |
@@ -460,7 +466,7 @@ void nfs_prime_dcache(struct dentry *parent, struct nfs_entry *entry) | |||
460 | if (dentry == NULL) | 466 | if (dentry == NULL) |
461 | return; | 467 | return; |
462 | 468 | ||
463 | inode = nfs_fhget(dentry->d_sb, entry->fh, entry->fattr); | 469 | inode = nfs_fhget(dentry->d_sb, entry->fh, entry->fattr, entry->label); |
464 | if (IS_ERR(inode)) | 470 | if (IS_ERR(inode)) |
465 | goto out; | 471 | goto out; |
466 | 472 | ||
@@ -585,10 +591,16 @@ int nfs_readdir_xdr_to_array(nfs_readdir_descriptor_t *desc, struct page *page, | |||
585 | if (entry.fh == NULL || entry.fattr == NULL) | 591 | if (entry.fh == NULL || entry.fattr == NULL) |
586 | goto out; | 592 | goto out; |
587 | 593 | ||
594 | entry.label = nfs4_label_alloc(NFS_SERVER(inode), GFP_NOWAIT); | ||
595 | if (IS_ERR(entry.label)) { | ||
596 | status = PTR_ERR(entry.label); | ||
597 | goto out; | ||
598 | } | ||
599 | |||
588 | array = nfs_readdir_get_array(page); | 600 | array = nfs_readdir_get_array(page); |
589 | if (IS_ERR(array)) { | 601 | if (IS_ERR(array)) { |
590 | status = PTR_ERR(array); | 602 | status = PTR_ERR(array); |
591 | goto out; | 603 | goto out_label_free; |
592 | } | 604 | } |
593 | memset(array, 0, sizeof(struct nfs_cache_array)); | 605 | memset(array, 0, sizeof(struct nfs_cache_array)); |
594 | array->eof_index = -1; | 606 | array->eof_index = -1; |
@@ -614,6 +626,8 @@ int nfs_readdir_xdr_to_array(nfs_readdir_descriptor_t *desc, struct page *page, | |||
614 | nfs_readdir_free_large_page(pages_ptr, pages, array_size); | 626 | nfs_readdir_free_large_page(pages_ptr, pages, array_size); |
615 | out_release_array: | 627 | out_release_array: |
616 | nfs_readdir_release_array(page); | 628 | nfs_readdir_release_array(page); |
629 | out_label_free: | ||
630 | nfs4_label_free(entry.label); | ||
617 | out: | 631 | out: |
618 | nfs_free_fattr(entry.fattr); | 632 | nfs_free_fattr(entry.fattr); |
619 | nfs_free_fhandle(entry.fh); | 633 | nfs_free_fhandle(entry.fh); |
@@ -702,8 +716,7 @@ int readdir_search_pagecache(nfs_readdir_descriptor_t *desc) | |||
702 | * Once we've found the start of the dirent within a page: fill 'er up... | 716 | * Once we've found the start of the dirent within a page: fill 'er up... |
703 | */ | 717 | */ |
704 | static | 718 | static |
705 | int nfs_do_filldir(nfs_readdir_descriptor_t *desc, void *dirent, | 719 | int nfs_do_filldir(nfs_readdir_descriptor_t *desc) |
706 | filldir_t filldir) | ||
707 | { | 720 | { |
708 | struct file *file = desc->file; | 721 | struct file *file = desc->file; |
709 | int i = 0; | 722 | int i = 0; |
@@ -721,13 +734,12 @@ int nfs_do_filldir(nfs_readdir_descriptor_t *desc, void *dirent, | |||
721 | struct nfs_cache_array_entry *ent; | 734 | struct nfs_cache_array_entry *ent; |
722 | 735 | ||
723 | ent = &array->array[i]; | 736 | ent = &array->array[i]; |
724 | if (filldir(dirent, ent->string.name, ent->string.len, | 737 | if (!dir_emit(desc->ctx, ent->string.name, ent->string.len, |
725 | file->f_pos, nfs_compat_user_ino64(ent->ino), | 738 | nfs_compat_user_ino64(ent->ino), ent->d_type)) { |
726 | ent->d_type) < 0) { | ||
727 | desc->eof = 1; | 739 | desc->eof = 1; |
728 | break; | 740 | break; |
729 | } | 741 | } |
730 | file->f_pos++; | 742 | desc->ctx->pos++; |
731 | if (i < (array->size-1)) | 743 | if (i < (array->size-1)) |
732 | *desc->dir_cookie = array->array[i+1].cookie; | 744 | *desc->dir_cookie = array->array[i+1].cookie; |
733 | else | 745 | else |
@@ -759,8 +771,7 @@ out: | |||
759 | * directory in the page cache by the time we get here. | 771 | * directory in the page cache by the time we get here. |
760 | */ | 772 | */ |
761 | static inline | 773 | static inline |
762 | int uncached_readdir(nfs_readdir_descriptor_t *desc, void *dirent, | 774 | int uncached_readdir(nfs_readdir_descriptor_t *desc) |
763 | filldir_t filldir) | ||
764 | { | 775 | { |
765 | struct page *page = NULL; | 776 | struct page *page = NULL; |
766 | int status; | 777 | int status; |
@@ -785,7 +796,7 @@ int uncached_readdir(nfs_readdir_descriptor_t *desc, void *dirent, | |||
785 | if (status < 0) | 796 | if (status < 0) |
786 | goto out_release; | 797 | goto out_release; |
787 | 798 | ||
788 | status = nfs_do_filldir(desc, dirent, filldir); | 799 | status = nfs_do_filldir(desc); |
789 | 800 | ||
790 | out: | 801 | out: |
791 | dfprintk(DIRCACHE, "NFS: %s: returns %d\n", | 802 | dfprintk(DIRCACHE, "NFS: %s: returns %d\n", |
@@ -800,35 +811,37 @@ int uncached_readdir(nfs_readdir_descriptor_t *desc, void *dirent, | |||
800 | last cookie cache takes care of the common case of reading the | 811 | last cookie cache takes care of the common case of reading the |
801 | whole directory. | 812 | whole directory. |
802 | */ | 813 | */ |
803 | static int nfs_readdir(struct file *filp, void *dirent, filldir_t filldir) | 814 | static int nfs_readdir(struct file *file, struct dir_context *ctx) |
804 | { | 815 | { |
805 | struct dentry *dentry = filp->f_path.dentry; | 816 | struct dentry *dentry = file->f_path.dentry; |
806 | struct inode *inode = dentry->d_inode; | 817 | struct inode *inode = dentry->d_inode; |
807 | nfs_readdir_descriptor_t my_desc, | 818 | nfs_readdir_descriptor_t my_desc, |
808 | *desc = &my_desc; | 819 | *desc = &my_desc; |
809 | struct nfs_open_dir_context *dir_ctx = filp->private_data; | 820 | struct nfs_open_dir_context *dir_ctx = file->private_data; |
810 | int res; | 821 | int res = 0; |
811 | 822 | ||
812 | dfprintk(FILE, "NFS: readdir(%s/%s) starting at cookie %llu\n", | 823 | dfprintk(FILE, "NFS: readdir(%s/%s) starting at cookie %llu\n", |
813 | dentry->d_parent->d_name.name, dentry->d_name.name, | 824 | dentry->d_parent->d_name.name, dentry->d_name.name, |
814 | (long long)filp->f_pos); | 825 | (long long)ctx->pos); |
815 | nfs_inc_stats(inode, NFSIOS_VFSGETDENTS); | 826 | nfs_inc_stats(inode, NFSIOS_VFSGETDENTS); |
816 | 827 | ||
817 | /* | 828 | /* |
818 | * filp->f_pos points to the dirent entry number. | 829 | * ctx->pos points to the dirent entry number. |
819 | * *desc->dir_cookie has the cookie for the next entry. We have | 830 | * *desc->dir_cookie has the cookie for the next entry. We have |
820 | * to either find the entry with the appropriate number or | 831 | * to either find the entry with the appropriate number or |
821 | * revalidate the cookie. | 832 | * revalidate the cookie. |
822 | */ | 833 | */ |
823 | memset(desc, 0, sizeof(*desc)); | 834 | memset(desc, 0, sizeof(*desc)); |
824 | 835 | ||
825 | desc->file = filp; | 836 | desc->file = file; |
837 | desc->ctx = ctx; | ||
826 | desc->dir_cookie = &dir_ctx->dir_cookie; | 838 | desc->dir_cookie = &dir_ctx->dir_cookie; |
827 | desc->decode = NFS_PROTO(inode)->decode_dirent; | 839 | desc->decode = NFS_PROTO(inode)->decode_dirent; |
828 | desc->plus = nfs_use_readdirplus(inode, filp) ? 1 : 0; | 840 | desc->plus = nfs_use_readdirplus(inode, ctx) ? 1 : 0; |
829 | 841 | ||
830 | nfs_block_sillyrename(dentry); | 842 | nfs_block_sillyrename(dentry); |
831 | res = nfs_revalidate_mapping(inode, filp->f_mapping); | 843 | if (ctx->pos == 0 || nfs_attribute_cache_expired(inode)) |
844 | res = nfs_revalidate_mapping(inode, file->f_mapping); | ||
832 | if (res < 0) | 845 | if (res < 0) |
833 | goto out; | 846 | goto out; |
834 | 847 | ||
@@ -840,7 +853,7 @@ static int nfs_readdir(struct file *filp, void *dirent, filldir_t filldir) | |||
840 | /* This means either end of directory */ | 853 | /* This means either end of directory */ |
841 | if (*desc->dir_cookie && desc->eof == 0) { | 854 | if (*desc->dir_cookie && desc->eof == 0) { |
842 | /* Or that the server has 'lost' a cookie */ | 855 | /* Or that the server has 'lost' a cookie */ |
843 | res = uncached_readdir(desc, dirent, filldir); | 856 | res = uncached_readdir(desc); |
844 | if (res == 0) | 857 | if (res == 0) |
845 | continue; | 858 | continue; |
846 | } | 859 | } |
@@ -857,7 +870,7 @@ static int nfs_readdir(struct file *filp, void *dirent, filldir_t filldir) | |||
857 | if (res < 0) | 870 | if (res < 0) |
858 | break; | 871 | break; |
859 | 872 | ||
860 | res = nfs_do_filldir(desc, dirent, filldir); | 873 | res = nfs_do_filldir(desc); |
861 | if (res < 0) | 874 | if (res < 0) |
862 | break; | 875 | break; |
863 | } while (!desc->eof); | 876 | } while (!desc->eof); |
@@ -1040,6 +1053,7 @@ static int nfs_lookup_revalidate(struct dentry *dentry, unsigned int flags) | |||
1040 | struct dentry *parent; | 1053 | struct dentry *parent; |
1041 | struct nfs_fh *fhandle = NULL; | 1054 | struct nfs_fh *fhandle = NULL; |
1042 | struct nfs_fattr *fattr = NULL; | 1055 | struct nfs_fattr *fattr = NULL; |
1056 | struct nfs4_label *label = NULL; | ||
1043 | int error; | 1057 | int error; |
1044 | 1058 | ||
1045 | if (flags & LOOKUP_RCU) | 1059 | if (flags & LOOKUP_RCU) |
@@ -1082,7 +1096,11 @@ static int nfs_lookup_revalidate(struct dentry *dentry, unsigned int flags) | |||
1082 | if (fhandle == NULL || fattr == NULL) | 1096 | if (fhandle == NULL || fattr == NULL) |
1083 | goto out_error; | 1097 | goto out_error; |
1084 | 1098 | ||
1085 | error = NFS_PROTO(dir)->lookup(dir, &dentry->d_name, fhandle, fattr); | 1099 | label = nfs4_label_alloc(NFS_SERVER(inode), GFP_NOWAIT); |
1100 | if (IS_ERR(label)) | ||
1101 | goto out_error; | ||
1102 | |||
1103 | error = NFS_PROTO(dir)->lookup(dir, &dentry->d_name, fhandle, fattr, label); | ||
1086 | if (error) | 1104 | if (error) |
1087 | goto out_bad; | 1105 | goto out_bad; |
1088 | if (nfs_compare_fh(NFS_FH(inode), fhandle)) | 1106 | if (nfs_compare_fh(NFS_FH(inode), fhandle)) |
@@ -1090,8 +1108,12 @@ static int nfs_lookup_revalidate(struct dentry *dentry, unsigned int flags) | |||
1090 | if ((error = nfs_refresh_inode(inode, fattr)) != 0) | 1108 | if ((error = nfs_refresh_inode(inode, fattr)) != 0) |
1091 | goto out_bad; | 1109 | goto out_bad; |
1092 | 1110 | ||
1111 | nfs_setsecurity(inode, fattr, label); | ||
1112 | |||
1093 | nfs_free_fattr(fattr); | 1113 | nfs_free_fattr(fattr); |
1094 | nfs_free_fhandle(fhandle); | 1114 | nfs_free_fhandle(fhandle); |
1115 | nfs4_label_free(label); | ||
1116 | |||
1095 | out_set_verifier: | 1117 | out_set_verifier: |
1096 | nfs_set_verifier(dentry, nfs_save_change_attribute(dir)); | 1118 | nfs_set_verifier(dentry, nfs_save_change_attribute(dir)); |
1097 | out_valid: | 1119 | out_valid: |
@@ -1108,6 +1130,7 @@ out_zap_parent: | |||
1108 | out_bad: | 1130 | out_bad: |
1109 | nfs_free_fattr(fattr); | 1131 | nfs_free_fattr(fattr); |
1110 | nfs_free_fhandle(fhandle); | 1132 | nfs_free_fhandle(fhandle); |
1133 | nfs4_label_free(label); | ||
1111 | nfs_mark_for_revalidate(dir); | 1134 | nfs_mark_for_revalidate(dir); |
1112 | if (inode && S_ISDIR(inode->i_mode)) { | 1135 | if (inode && S_ISDIR(inode->i_mode)) { |
1113 | /* Purge readdir caches. */ | 1136 | /* Purge readdir caches. */ |
@@ -1128,6 +1151,7 @@ out_zap_parent: | |||
1128 | out_error: | 1151 | out_error: |
1129 | nfs_free_fattr(fattr); | 1152 | nfs_free_fattr(fattr); |
1130 | nfs_free_fhandle(fhandle); | 1153 | nfs_free_fhandle(fhandle); |
1154 | nfs4_label_free(label); | ||
1131 | dput(parent); | 1155 | dput(parent); |
1132 | dfprintk(LOOKUPCACHE, "NFS: %s(%s/%s) lookup returned error %d\n", | 1156 | dfprintk(LOOKUPCACHE, "NFS: %s(%s/%s) lookup returned error %d\n", |
1133 | __func__, dentry->d_parent->d_name.name, | 1157 | __func__, dentry->d_parent->d_name.name, |
@@ -1256,6 +1280,7 @@ struct dentry *nfs_lookup(struct inode *dir, struct dentry * dentry, unsigned in | |||
1256 | struct inode *inode = NULL; | 1280 | struct inode *inode = NULL; |
1257 | struct nfs_fh *fhandle = NULL; | 1281 | struct nfs_fh *fhandle = NULL; |
1258 | struct nfs_fattr *fattr = NULL; | 1282 | struct nfs_fattr *fattr = NULL; |
1283 | struct nfs4_label *label = NULL; | ||
1259 | int error; | 1284 | int error; |
1260 | 1285 | ||
1261 | dfprintk(VFS, "NFS: lookup(%s/%s)\n", | 1286 | dfprintk(VFS, "NFS: lookup(%s/%s)\n", |
@@ -1282,17 +1307,21 @@ struct dentry *nfs_lookup(struct inode *dir, struct dentry * dentry, unsigned in | |||
1282 | if (fhandle == NULL || fattr == NULL) | 1307 | if (fhandle == NULL || fattr == NULL) |
1283 | goto out; | 1308 | goto out; |
1284 | 1309 | ||
1310 | label = nfs4_label_alloc(NFS_SERVER(dir), GFP_NOWAIT); | ||
1311 | if (IS_ERR(label)) | ||
1312 | goto out; | ||
1313 | |||
1285 | parent = dentry->d_parent; | 1314 | parent = dentry->d_parent; |
1286 | /* Protect against concurrent sillydeletes */ | 1315 | /* Protect against concurrent sillydeletes */ |
1287 | nfs_block_sillyrename(parent); | 1316 | nfs_block_sillyrename(parent); |
1288 | error = NFS_PROTO(dir)->lookup(dir, &dentry->d_name, fhandle, fattr); | 1317 | error = NFS_PROTO(dir)->lookup(dir, &dentry->d_name, fhandle, fattr, label); |
1289 | if (error == -ENOENT) | 1318 | if (error == -ENOENT) |
1290 | goto no_entry; | 1319 | goto no_entry; |
1291 | if (error < 0) { | 1320 | if (error < 0) { |
1292 | res = ERR_PTR(error); | 1321 | res = ERR_PTR(error); |
1293 | goto out_unblock_sillyrename; | 1322 | goto out_unblock_sillyrename; |
1294 | } | 1323 | } |
1295 | inode = nfs_fhget(dentry->d_sb, fhandle, fattr); | 1324 | inode = nfs_fhget(dentry->d_sb, fhandle, fattr, label); |
1296 | res = ERR_CAST(inode); | 1325 | res = ERR_CAST(inode); |
1297 | if (IS_ERR(res)) | 1326 | if (IS_ERR(res)) |
1298 | goto out_unblock_sillyrename; | 1327 | goto out_unblock_sillyrename; |
@@ -1310,6 +1339,7 @@ no_entry: | |||
1310 | nfs_set_verifier(dentry, nfs_save_change_attribute(dir)); | 1339 | nfs_set_verifier(dentry, nfs_save_change_attribute(dir)); |
1311 | out_unblock_sillyrename: | 1340 | out_unblock_sillyrename: |
1312 | nfs_unblock_sillyrename(parent); | 1341 | nfs_unblock_sillyrename(parent); |
1342 | nfs4_label_free(label); | ||
1313 | out: | 1343 | out: |
1314 | nfs_free_fattr(fattr); | 1344 | nfs_free_fattr(fattr); |
1315 | nfs_free_fhandle(fhandle); | 1345 | nfs_free_fhandle(fhandle); |
@@ -1357,18 +1387,6 @@ static int nfs_finish_open(struct nfs_open_context *ctx, | |||
1357 | { | 1387 | { |
1358 | int err; | 1388 | int err; |
1359 | 1389 | ||
1360 | if (ctx->dentry != dentry) { | ||
1361 | dput(ctx->dentry); | ||
1362 | ctx->dentry = dget(dentry); | ||
1363 | } | ||
1364 | |||
1365 | /* If the open_intent is for execute, we have an extra check to make */ | ||
1366 | if (ctx->mode & FMODE_EXEC) { | ||
1367 | err = nfs_may_open(dentry->d_inode, ctx->cred, open_flags); | ||
1368 | if (err < 0) | ||
1369 | goto out; | ||
1370 | } | ||
1371 | |||
1372 | err = finish_open(file, dentry, do_open, opened); | 1390 | err = finish_open(file, dentry, do_open, opened); |
1373 | if (err) | 1391 | if (err) |
1374 | goto out; | 1392 | goto out; |
@@ -1427,13 +1445,13 @@ int nfs_atomic_open(struct inode *dir, struct dentry *dentry, | |||
1427 | 1445 | ||
1428 | nfs_block_sillyrename(dentry->d_parent); | 1446 | nfs_block_sillyrename(dentry->d_parent); |
1429 | inode = NFS_PROTO(dir)->open_context(dir, ctx, open_flags, &attr); | 1447 | inode = NFS_PROTO(dir)->open_context(dir, ctx, open_flags, &attr); |
1430 | d_drop(dentry); | 1448 | nfs_unblock_sillyrename(dentry->d_parent); |
1431 | if (IS_ERR(inode)) { | 1449 | if (IS_ERR(inode)) { |
1432 | nfs_unblock_sillyrename(dentry->d_parent); | ||
1433 | put_nfs_open_context(ctx); | 1450 | put_nfs_open_context(ctx); |
1434 | err = PTR_ERR(inode); | 1451 | err = PTR_ERR(inode); |
1435 | switch (err) { | 1452 | switch (err) { |
1436 | case -ENOENT: | 1453 | case -ENOENT: |
1454 | d_drop(dentry); | ||
1437 | d_add(dentry, NULL); | 1455 | d_add(dentry, NULL); |
1438 | break; | 1456 | break; |
1439 | case -EISDIR: | 1457 | case -EISDIR: |
@@ -1449,16 +1467,8 @@ int nfs_atomic_open(struct inode *dir, struct dentry *dentry, | |||
1449 | } | 1467 | } |
1450 | goto out; | 1468 | goto out; |
1451 | } | 1469 | } |
1452 | res = d_add_unique(dentry, inode); | ||
1453 | if (res != NULL) | ||
1454 | dentry = res; | ||
1455 | |||
1456 | nfs_unblock_sillyrename(dentry->d_parent); | ||
1457 | nfs_set_verifier(dentry, nfs_save_change_attribute(dir)); | ||
1458 | |||
1459 | err = nfs_finish_open(ctx, dentry, file, open_flags, opened); | ||
1460 | 1470 | ||
1461 | dput(res); | 1471 | err = nfs_finish_open(ctx, ctx->dentry, file, open_flags, opened); |
1462 | out: | 1472 | out: |
1463 | return err; | 1473 | return err; |
1464 | 1474 | ||
@@ -1528,7 +1538,8 @@ no_open: | |||
1528 | * Code common to create, mkdir, and mknod. | 1538 | * Code common to create, mkdir, and mknod. |
1529 | */ | 1539 | */ |
1530 | int nfs_instantiate(struct dentry *dentry, struct nfs_fh *fhandle, | 1540 | int nfs_instantiate(struct dentry *dentry, struct nfs_fh *fhandle, |
1531 | struct nfs_fattr *fattr) | 1541 | struct nfs_fattr *fattr, |
1542 | struct nfs4_label *label) | ||
1532 | { | 1543 | { |
1533 | struct dentry *parent = dget_parent(dentry); | 1544 | struct dentry *parent = dget_parent(dentry); |
1534 | struct inode *dir = parent->d_inode; | 1545 | struct inode *dir = parent->d_inode; |
@@ -1541,18 +1552,18 @@ int nfs_instantiate(struct dentry *dentry, struct nfs_fh *fhandle, | |||
1541 | if (dentry->d_inode) | 1552 | if (dentry->d_inode) |
1542 | goto out; | 1553 | goto out; |
1543 | if (fhandle->size == 0) { | 1554 | if (fhandle->size == 0) { |
1544 | error = NFS_PROTO(dir)->lookup(dir, &dentry->d_name, fhandle, fattr); | 1555 | error = NFS_PROTO(dir)->lookup(dir, &dentry->d_name, fhandle, fattr, NULL); |
1545 | if (error) | 1556 | if (error) |
1546 | goto out_error; | 1557 | goto out_error; |
1547 | } | 1558 | } |
1548 | nfs_set_verifier(dentry, nfs_save_change_attribute(dir)); | 1559 | nfs_set_verifier(dentry, nfs_save_change_attribute(dir)); |
1549 | if (!(fattr->valid & NFS_ATTR_FATTR)) { | 1560 | if (!(fattr->valid & NFS_ATTR_FATTR)) { |
1550 | struct nfs_server *server = NFS_SB(dentry->d_sb); | 1561 | struct nfs_server *server = NFS_SB(dentry->d_sb); |
1551 | error = server->nfs_client->rpc_ops->getattr(server, fhandle, fattr); | 1562 | error = server->nfs_client->rpc_ops->getattr(server, fhandle, fattr, NULL); |
1552 | if (error < 0) | 1563 | if (error < 0) |
1553 | goto out_error; | 1564 | goto out_error; |
1554 | } | 1565 | } |
1555 | inode = nfs_fhget(dentry->d_sb, fhandle, fattr); | 1566 | inode = nfs_fhget(dentry->d_sb, fhandle, fattr, label); |
1556 | error = PTR_ERR(inode); | 1567 | error = PTR_ERR(inode); |
1557 | if (IS_ERR(inode)) | 1568 | if (IS_ERR(inode)) |
1558 | goto out_error; | 1569 | goto out_error; |
@@ -1721,7 +1732,7 @@ int nfs_unlink(struct inode *dir, struct dentry *dentry) | |||
1721 | dir->i_ino, dentry->d_name.name); | 1732 | dir->i_ino, dentry->d_name.name); |
1722 | 1733 | ||
1723 | spin_lock(&dentry->d_lock); | 1734 | spin_lock(&dentry->d_lock); |
1724 | if (dentry->d_count > 1) { | 1735 | if (d_count(dentry) > 1) { |
1725 | spin_unlock(&dentry->d_lock); | 1736 | spin_unlock(&dentry->d_lock); |
1726 | /* Start asynchronous writeout of the inode */ | 1737 | /* Start asynchronous writeout of the inode */ |
1727 | write_inode_now(dentry->d_inode, 0); | 1738 | write_inode_now(dentry->d_inode, 0); |
@@ -1759,7 +1770,6 @@ EXPORT_SYMBOL_GPL(nfs_unlink); | |||
1759 | */ | 1770 | */ |
1760 | int nfs_symlink(struct inode *dir, struct dentry *dentry, const char *symname) | 1771 | int nfs_symlink(struct inode *dir, struct dentry *dentry, const char *symname) |
1761 | { | 1772 | { |
1762 | struct pagevec lru_pvec; | ||
1763 | struct page *page; | 1773 | struct page *page; |
1764 | char *kaddr; | 1774 | char *kaddr; |
1765 | struct iattr attr; | 1775 | struct iattr attr; |
@@ -1799,11 +1809,8 @@ int nfs_symlink(struct inode *dir, struct dentry *dentry, const char *symname) | |||
1799 | * No big deal if we can't add this page to the page cache here. | 1809 | * No big deal if we can't add this page to the page cache here. |
1800 | * READLINK will get the missing page from the server if needed. | 1810 | * READLINK will get the missing page from the server if needed. |
1801 | */ | 1811 | */ |
1802 | pagevec_init(&lru_pvec, 0); | 1812 | if (!add_to_page_cache_lru(page, dentry->d_inode->i_mapping, 0, |
1803 | if (!add_to_page_cache(page, dentry->d_inode->i_mapping, 0, | ||
1804 | GFP_KERNEL)) { | 1813 | GFP_KERNEL)) { |
1805 | pagevec_add(&lru_pvec, page); | ||
1806 | pagevec_lru_add_file(&lru_pvec); | ||
1807 | SetPageUptodate(page); | 1814 | SetPageUptodate(page); |
1808 | unlock_page(page); | 1815 | unlock_page(page); |
1809 | } else | 1816 | } else |
@@ -1870,7 +1877,7 @@ int nfs_rename(struct inode *old_dir, struct dentry *old_dentry, | |||
1870 | dfprintk(VFS, "NFS: rename(%s/%s -> %s/%s, ct=%d)\n", | 1877 | dfprintk(VFS, "NFS: rename(%s/%s -> %s/%s, ct=%d)\n", |
1871 | old_dentry->d_parent->d_name.name, old_dentry->d_name.name, | 1878 | old_dentry->d_parent->d_name.name, old_dentry->d_name.name, |
1872 | new_dentry->d_parent->d_name.name, new_dentry->d_name.name, | 1879 | new_dentry->d_parent->d_name.name, new_dentry->d_name.name, |
1873 | new_dentry->d_count); | 1880 | d_count(new_dentry)); |
1874 | 1881 | ||
1875 | /* | 1882 | /* |
1876 | * For non-directories, check whether the target is busy and if so, | 1883 | * For non-directories, check whether the target is busy and if so, |
@@ -1888,7 +1895,7 @@ int nfs_rename(struct inode *old_dir, struct dentry *old_dentry, | |||
1888 | rehash = new_dentry; | 1895 | rehash = new_dentry; |
1889 | } | 1896 | } |
1890 | 1897 | ||
1891 | if (new_dentry->d_count > 2) { | 1898 | if (d_count(new_dentry) > 2) { |
1892 | int err; | 1899 | int err; |
1893 | 1900 | ||
1894 | /* copy the target dentry's name */ | 1901 | /* copy the target dentry's name */ |
diff --git a/fs/nfs/dns_resolve.c b/fs/nfs/dns_resolve.c index 945527092295..fc0f95ec7358 100644 --- a/fs/nfs/dns_resolve.c +++ b/fs/nfs/dns_resolve.c | |||
@@ -29,7 +29,6 @@ ssize_t nfs_dns_resolve_name(struct net *net, char *name, size_t namelen, | |||
29 | kfree(ip_addr); | 29 | kfree(ip_addr); |
30 | return ret; | 30 | return ret; |
31 | } | 31 | } |
32 | EXPORT_SYMBOL_GPL(nfs_dns_resolve_name); | ||
33 | 32 | ||
34 | #else | 33 | #else |
35 | 34 | ||
@@ -351,7 +350,6 @@ ssize_t nfs_dns_resolve_name(struct net *net, char *name, | |||
351 | ret = -ESRCH; | 350 | ret = -ESRCH; |
352 | return ret; | 351 | return ret; |
353 | } | 352 | } |
354 | EXPORT_SYMBOL_GPL(nfs_dns_resolve_name); | ||
355 | 353 | ||
356 | static struct cache_detail nfs_dns_resolve_template = { | 354 | static struct cache_detail nfs_dns_resolve_template = { |
357 | .owner = THIS_MODULE, | 355 | .owner = THIS_MODULE, |
@@ -396,6 +394,21 @@ void nfs_dns_resolver_cache_destroy(struct net *net) | |||
396 | cache_destroy_net(nn->nfs_dns_resolve, net); | 394 | cache_destroy_net(nn->nfs_dns_resolve, net); |
397 | } | 395 | } |
398 | 396 | ||
397 | static int nfs4_dns_net_init(struct net *net) | ||
398 | { | ||
399 | return nfs_dns_resolver_cache_init(net); | ||
400 | } | ||
401 | |||
402 | static void nfs4_dns_net_exit(struct net *net) | ||
403 | { | ||
404 | nfs_dns_resolver_cache_destroy(net); | ||
405 | } | ||
406 | |||
407 | static struct pernet_operations nfs4_dns_resolver_ops = { | ||
408 | .init = nfs4_dns_net_init, | ||
409 | .exit = nfs4_dns_net_exit, | ||
410 | }; | ||
411 | |||
399 | static int rpc_pipefs_event(struct notifier_block *nb, unsigned long event, | 412 | static int rpc_pipefs_event(struct notifier_block *nb, unsigned long event, |
400 | void *ptr) | 413 | void *ptr) |
401 | { | 414 | { |
@@ -432,11 +445,24 @@ static struct notifier_block nfs_dns_resolver_block = { | |||
432 | 445 | ||
433 | int nfs_dns_resolver_init(void) | 446 | int nfs_dns_resolver_init(void) |
434 | { | 447 | { |
435 | return rpc_pipefs_notifier_register(&nfs_dns_resolver_block); | 448 | int err; |
449 | |||
450 | err = register_pernet_subsys(&nfs4_dns_resolver_ops); | ||
451 | if (err < 0) | ||
452 | goto out; | ||
453 | err = rpc_pipefs_notifier_register(&nfs_dns_resolver_block); | ||
454 | if (err < 0) | ||
455 | goto out1; | ||
456 | return 0; | ||
457 | out1: | ||
458 | unregister_pernet_subsys(&nfs4_dns_resolver_ops); | ||
459 | out: | ||
460 | return err; | ||
436 | } | 461 | } |
437 | 462 | ||
438 | void nfs_dns_resolver_destroy(void) | 463 | void nfs_dns_resolver_destroy(void) |
439 | { | 464 | { |
440 | rpc_pipefs_notifier_unregister(&nfs_dns_resolver_block); | 465 | rpc_pipefs_notifier_unregister(&nfs_dns_resolver_block); |
466 | unregister_pernet_subsys(&nfs4_dns_resolver_ops); | ||
441 | } | 467 | } |
442 | #endif | 468 | #endif |
diff --git a/fs/nfs/file.c b/fs/nfs/file.c index a87a44f84113..94e94bd11aae 100644 --- a/fs/nfs/file.c +++ b/fs/nfs/file.c | |||
@@ -451,11 +451,13 @@ static int nfs_write_end(struct file *file, struct address_space *mapping, | |||
451 | * - Called if either PG_private or PG_fscache is set on the page | 451 | * - Called if either PG_private or PG_fscache is set on the page |
452 | * - Caller holds page lock | 452 | * - Caller holds page lock |
453 | */ | 453 | */ |
454 | static void nfs_invalidate_page(struct page *page, unsigned long offset) | 454 | static void nfs_invalidate_page(struct page *page, unsigned int offset, |
455 | unsigned int length) | ||
455 | { | 456 | { |
456 | dfprintk(PAGECACHE, "NFS: invalidate_page(%p, %lu)\n", page, offset); | 457 | dfprintk(PAGECACHE, "NFS: invalidate_page(%p, %u, %u)\n", |
458 | page, offset, length); | ||
457 | 459 | ||
458 | if (offset != 0) | 460 | if (offset != 0 || length < PAGE_CACHE_SIZE) |
459 | return; | 461 | return; |
460 | /* Cancel any unstarted writes on this page */ | 462 | /* Cancel any unstarted writes on this page */ |
461 | nfs_wb_page_cancel(page_file_mapping(page)->host, page); | 463 | nfs_wb_page_cancel(page_file_mapping(page)->host, page); |
@@ -493,6 +495,35 @@ static int nfs_release_page(struct page *page, gfp_t gfp) | |||
493 | return nfs_fscache_release_page(page, gfp); | 495 | return nfs_fscache_release_page(page, gfp); |
494 | } | 496 | } |
495 | 497 | ||
498 | static void nfs_check_dirty_writeback(struct page *page, | ||
499 | bool *dirty, bool *writeback) | ||
500 | { | ||
501 | struct nfs_inode *nfsi; | ||
502 | struct address_space *mapping = page_file_mapping(page); | ||
503 | |||
504 | if (!mapping || PageSwapCache(page)) | ||
505 | return; | ||
506 | |||
507 | /* | ||
508 | * Check if an unstable page is currently being committed and | ||
509 | * if so, have the VM treat it as if the page is under writeback | ||
510 | * so it will not block due to pages that will shortly be freeable. | ||
511 | */ | ||
512 | nfsi = NFS_I(mapping->host); | ||
513 | if (test_bit(NFS_INO_COMMIT, &nfsi->flags)) { | ||
514 | *writeback = true; | ||
515 | return; | ||
516 | } | ||
517 | |||
518 | /* | ||
519 | * If PagePrivate() is set, then the page is not freeable and as the | ||
520 | * inode is not being committed, it's not going to be cleaned in the | ||
521 | * near future so treat it as dirty | ||
522 | */ | ||
523 | if (PagePrivate(page)) | ||
524 | *dirty = true; | ||
525 | } | ||
526 | |||
496 | /* | 527 | /* |
497 | * Attempt to clear the private state associated with a page when an error | 528 | * Attempt to clear the private state associated with a page when an error |
498 | * occurs that requires the cached contents of an inode to be written back or | 529 | * occurs that requires the cached contents of an inode to be written back or |
@@ -540,6 +571,7 @@ const struct address_space_operations nfs_file_aops = { | |||
540 | .direct_IO = nfs_direct_IO, | 571 | .direct_IO = nfs_direct_IO, |
541 | .migratepage = nfs_migrate_page, | 572 | .migratepage = nfs_migrate_page, |
542 | .launder_page = nfs_launder_page, | 573 | .launder_page = nfs_launder_page, |
574 | .is_dirty_writeback = nfs_check_dirty_writeback, | ||
543 | .error_remove_page = generic_error_remove_page, | 575 | .error_remove_page = generic_error_remove_page, |
544 | #ifdef CONFIG_NFS_SWAP | 576 | #ifdef CONFIG_NFS_SWAP |
545 | .swap_activate = nfs_swap_activate, | 577 | .swap_activate = nfs_swap_activate, |
diff --git a/fs/nfs/getroot.c b/fs/nfs/getroot.c index 44efaa8c5f78..66984a9aafaa 100644 --- a/fs/nfs/getroot.c +++ b/fs/nfs/getroot.c | |||
@@ -95,7 +95,7 @@ struct dentry *nfs_get_root(struct super_block *sb, struct nfs_fh *mntfh, | |||
95 | goto out; | 95 | goto out; |
96 | } | 96 | } |
97 | 97 | ||
98 | inode = nfs_fhget(sb, mntfh, fsinfo.fattr); | 98 | inode = nfs_fhget(sb, mntfh, fsinfo.fattr, NULL); |
99 | if (IS_ERR(inode)) { | 99 | if (IS_ERR(inode)) { |
100 | dprintk("nfs_get_root: get root inode failed\n"); | 100 | dprintk("nfs_get_root: get root inode failed\n"); |
101 | ret = ERR_CAST(inode); | 101 | ret = ERR_CAST(inode); |
diff --git a/fs/nfs/idmap.c b/fs/nfs/idmap.c index c516da5873fd..c2c4163d5683 100644 --- a/fs/nfs/idmap.c +++ b/fs/nfs/idmap.c | |||
@@ -262,29 +262,42 @@ static ssize_t nfs_idmap_get_desc(const char *name, size_t namelen, | |||
262 | return desclen; | 262 | return desclen; |
263 | } | 263 | } |
264 | 264 | ||
265 | static ssize_t nfs_idmap_request_key(struct key_type *key_type, | 265 | static struct key *nfs_idmap_request_key(const char *name, size_t namelen, |
266 | const char *name, size_t namelen, | 266 | const char *type, struct idmap *idmap) |
267 | const char *type, void *data, | ||
268 | size_t data_size, struct idmap *idmap) | ||
269 | { | 267 | { |
270 | const struct cred *saved_cred; | ||
271 | struct key *rkey; | ||
272 | char *desc; | 268 | char *desc; |
273 | struct user_key_payload *payload; | 269 | struct key *rkey; |
274 | ssize_t ret; | 270 | ssize_t ret; |
275 | 271 | ||
276 | ret = nfs_idmap_get_desc(name, namelen, type, strlen(type), &desc); | 272 | ret = nfs_idmap_get_desc(name, namelen, type, strlen(type), &desc); |
277 | if (ret <= 0) | 273 | if (ret <= 0) |
278 | goto out; | 274 | return ERR_PTR(ret); |
275 | |||
276 | rkey = request_key(&key_type_id_resolver, desc, ""); | ||
277 | if (IS_ERR(rkey)) { | ||
278 | mutex_lock(&idmap->idmap_mutex); | ||
279 | rkey = request_key_with_auxdata(&key_type_id_resolver_legacy, | ||
280 | desc, "", 0, idmap); | ||
281 | mutex_unlock(&idmap->idmap_mutex); | ||
282 | } | ||
283 | |||
284 | kfree(desc); | ||
285 | return rkey; | ||
286 | } | ||
287 | |||
288 | static ssize_t nfs_idmap_get_key(const char *name, size_t namelen, | ||
289 | const char *type, void *data, | ||
290 | size_t data_size, struct idmap *idmap) | ||
291 | { | ||
292 | const struct cred *saved_cred; | ||
293 | struct key *rkey; | ||
294 | struct user_key_payload *payload; | ||
295 | ssize_t ret; | ||
279 | 296 | ||
280 | saved_cred = override_creds(id_resolver_cache); | 297 | saved_cred = override_creds(id_resolver_cache); |
281 | if (idmap) | 298 | rkey = nfs_idmap_request_key(name, namelen, type, idmap); |
282 | rkey = request_key_with_auxdata(key_type, desc, "", 0, idmap); | ||
283 | else | ||
284 | rkey = request_key(&key_type_id_resolver, desc, ""); | ||
285 | revert_creds(saved_cred); | 299 | revert_creds(saved_cred); |
286 | 300 | ||
287 | kfree(desc); | ||
288 | if (IS_ERR(rkey)) { | 301 | if (IS_ERR(rkey)) { |
289 | ret = PTR_ERR(rkey); | 302 | ret = PTR_ERR(rkey); |
290 | goto out; | 303 | goto out; |
@@ -316,23 +329,6 @@ out: | |||
316 | return ret; | 329 | return ret; |
317 | } | 330 | } |
318 | 331 | ||
319 | static ssize_t nfs_idmap_get_key(const char *name, size_t namelen, | ||
320 | const char *type, void *data, | ||
321 | size_t data_size, struct idmap *idmap) | ||
322 | { | ||
323 | ssize_t ret = nfs_idmap_request_key(&key_type_id_resolver, | ||
324 | name, namelen, type, data, | ||
325 | data_size, NULL); | ||
326 | if (ret < 0) { | ||
327 | mutex_lock(&idmap->idmap_mutex); | ||
328 | ret = nfs_idmap_request_key(&key_type_id_resolver_legacy, | ||
329 | name, namelen, type, data, | ||
330 | data_size, idmap); | ||
331 | mutex_unlock(&idmap->idmap_mutex); | ||
332 | } | ||
333 | return ret; | ||
334 | } | ||
335 | |||
336 | /* ID -> Name */ | 332 | /* ID -> Name */ |
337 | static ssize_t nfs_idmap_lookup_name(__u32 id, const char *type, char *buf, | 333 | static ssize_t nfs_idmap_lookup_name(__u32 id, const char *type, char *buf, |
338 | size_t buflen, struct idmap *idmap) | 334 | size_t buflen, struct idmap *idmap) |
diff --git a/fs/nfs/inode.c b/fs/nfs/inode.c index c1c7a9d78722..941246f2b43d 100644 --- a/fs/nfs/inode.c +++ b/fs/nfs/inode.c | |||
@@ -48,7 +48,6 @@ | |||
48 | #include "iostat.h" | 48 | #include "iostat.h" |
49 | #include "internal.h" | 49 | #include "internal.h" |
50 | #include "fscache.h" | 50 | #include "fscache.h" |
51 | #include "dns_resolve.h" | ||
52 | #include "pnfs.h" | 51 | #include "pnfs.h" |
53 | #include "nfs.h" | 52 | #include "nfs.h" |
54 | #include "netns.h" | 53 | #include "netns.h" |
@@ -79,7 +78,7 @@ int nfs_wait_bit_killable(void *word) | |||
79 | { | 78 | { |
80 | if (fatal_signal_pending(current)) | 79 | if (fatal_signal_pending(current)) |
81 | return -ERESTARTSYS; | 80 | return -ERESTARTSYS; |
82 | freezable_schedule(); | 81 | freezable_schedule_unsafe(); |
83 | return 0; | 82 | return 0; |
84 | } | 83 | } |
85 | EXPORT_SYMBOL_GPL(nfs_wait_bit_killable); | 84 | EXPORT_SYMBOL_GPL(nfs_wait_bit_killable); |
@@ -162,11 +161,19 @@ static void nfs_zap_caches_locked(struct inode *inode) | |||
162 | 161 | ||
163 | memset(NFS_I(inode)->cookieverf, 0, sizeof(NFS_I(inode)->cookieverf)); | 162 | memset(NFS_I(inode)->cookieverf, 0, sizeof(NFS_I(inode)->cookieverf)); |
164 | if (S_ISREG(mode) || S_ISDIR(mode) || S_ISLNK(mode)) { | 163 | if (S_ISREG(mode) || S_ISDIR(mode) || S_ISLNK(mode)) { |
165 | nfsi->cache_validity |= NFS_INO_INVALID_ATTR|NFS_INO_INVALID_DATA|NFS_INO_INVALID_ACCESS|NFS_INO_INVALID_ACL|NFS_INO_REVAL_PAGECACHE; | ||
166 | nfs_fscache_invalidate(inode); | 164 | nfs_fscache_invalidate(inode); |
167 | } else { | 165 | nfsi->cache_validity |= NFS_INO_INVALID_ATTR |
168 | nfsi->cache_validity |= NFS_INO_INVALID_ATTR|NFS_INO_INVALID_ACCESS|NFS_INO_INVALID_ACL|NFS_INO_REVAL_PAGECACHE; | 166 | | NFS_INO_INVALID_LABEL |
169 | } | 167 | | NFS_INO_INVALID_DATA |
168 | | NFS_INO_INVALID_ACCESS | ||
169 | | NFS_INO_INVALID_ACL | ||
170 | | NFS_INO_REVAL_PAGECACHE; | ||
171 | } else | ||
172 | nfsi->cache_validity |= NFS_INO_INVALID_ATTR | ||
173 | | NFS_INO_INVALID_LABEL | ||
174 | | NFS_INO_INVALID_ACCESS | ||
175 | | NFS_INO_INVALID_ACL | ||
176 | | NFS_INO_REVAL_PAGECACHE; | ||
170 | } | 177 | } |
171 | 178 | ||
172 | void nfs_zap_caches(struct inode *inode) | 179 | void nfs_zap_caches(struct inode *inode) |
@@ -257,12 +264,72 @@ nfs_init_locked(struct inode *inode, void *opaque) | |||
257 | return 0; | 264 | return 0; |
258 | } | 265 | } |
259 | 266 | ||
267 | #ifdef CONFIG_NFS_V4_SECURITY_LABEL | ||
268 | void nfs_setsecurity(struct inode *inode, struct nfs_fattr *fattr, | ||
269 | struct nfs4_label *label) | ||
270 | { | ||
271 | int error; | ||
272 | |||
273 | if (label == NULL) | ||
274 | return; | ||
275 | |||
276 | if (nfs_server_capable(inode, NFS_CAP_SECURITY_LABEL) == 0) | ||
277 | return; | ||
278 | |||
279 | if (NFS_SERVER(inode)->nfs_client->cl_minorversion < 2) | ||
280 | return; | ||
281 | |||
282 | if ((fattr->valid & NFS_ATTR_FATTR_V4_SECURITY_LABEL) && inode->i_security) { | ||
283 | error = security_inode_notifysecctx(inode, label->label, | ||
284 | label->len); | ||
285 | if (error) | ||
286 | printk(KERN_ERR "%s() %s %d " | ||
287 | "security_inode_notifysecctx() %d\n", | ||
288 | __func__, | ||
289 | (char *)label->label, | ||
290 | label->len, error); | ||
291 | } | ||
292 | } | ||
293 | |||
294 | struct nfs4_label *nfs4_label_alloc(struct nfs_server *server, gfp_t flags) | ||
295 | { | ||
296 | struct nfs4_label *label = NULL; | ||
297 | int minor_version = server->nfs_client->cl_minorversion; | ||
298 | |||
299 | if (minor_version < 2) | ||
300 | return label; | ||
301 | |||
302 | if (!(server->caps & NFS_CAP_SECURITY_LABEL)) | ||
303 | return label; | ||
304 | |||
305 | label = kzalloc(sizeof(struct nfs4_label), flags); | ||
306 | if (label == NULL) | ||
307 | return ERR_PTR(-ENOMEM); | ||
308 | |||
309 | label->label = kzalloc(NFS4_MAXLABELLEN, flags); | ||
310 | if (label->label == NULL) { | ||
311 | kfree(label); | ||
312 | return ERR_PTR(-ENOMEM); | ||
313 | } | ||
314 | label->len = NFS4_MAXLABELLEN; | ||
315 | |||
316 | return label; | ||
317 | } | ||
318 | EXPORT_SYMBOL_GPL(nfs4_label_alloc); | ||
319 | #else | ||
320 | void inline nfs_setsecurity(struct inode *inode, struct nfs_fattr *fattr, | ||
321 | struct nfs4_label *label) | ||
322 | { | ||
323 | } | ||
324 | #endif | ||
325 | EXPORT_SYMBOL_GPL(nfs_setsecurity); | ||
326 | |||
260 | /* | 327 | /* |
261 | * This is our front-end to iget that looks up inodes by file handle | 328 | * This is our front-end to iget that looks up inodes by file handle |
262 | * instead of inode number. | 329 | * instead of inode number. |
263 | */ | 330 | */ |
264 | struct inode * | 331 | struct inode * |
265 | nfs_fhget(struct super_block *sb, struct nfs_fh *fh, struct nfs_fattr *fattr) | 332 | nfs_fhget(struct super_block *sb, struct nfs_fh *fh, struct nfs_fattr *fattr, struct nfs4_label *label) |
266 | { | 333 | { |
267 | struct nfs_find_desc desc = { | 334 | struct nfs_find_desc desc = { |
268 | .fh = fh, | 335 | .fh = fh, |
@@ -384,6 +451,9 @@ nfs_fhget(struct super_block *sb, struct nfs_fh *fh, struct nfs_fattr *fattr) | |||
384 | */ | 451 | */ |
385 | inode->i_blocks = nfs_calc_block_size(fattr->du.nfs3.used); | 452 | inode->i_blocks = nfs_calc_block_size(fattr->du.nfs3.used); |
386 | } | 453 | } |
454 | |||
455 | nfs_setsecurity(inode, fattr, label); | ||
456 | |||
387 | nfsi->attrtimeo = NFS_MINATTRTIMEO(inode); | 457 | nfsi->attrtimeo = NFS_MINATTRTIMEO(inode); |
388 | nfsi->attrtimeo_timestamp = now; | 458 | nfsi->attrtimeo_timestamp = now; |
389 | nfsi->access_cache = RB_ROOT; | 459 | nfsi->access_cache = RB_ROOT; |
@@ -449,7 +519,7 @@ nfs_setattr(struct dentry *dentry, struct iattr *attr) | |||
449 | NFS_PROTO(inode)->return_delegation(inode); | 519 | NFS_PROTO(inode)->return_delegation(inode); |
450 | error = NFS_PROTO(inode)->setattr(dentry, fattr, attr); | 520 | error = NFS_PROTO(inode)->setattr(dentry, fattr, attr); |
451 | if (error == 0) | 521 | if (error == 0) |
452 | nfs_refresh_inode(inode, fattr); | 522 | error = nfs_refresh_inode(inode, fattr); |
453 | nfs_free_fattr(fattr); | 523 | nfs_free_fattr(fattr); |
454 | out: | 524 | out: |
455 | return error; | 525 | return error; |
@@ -713,16 +783,23 @@ EXPORT_SYMBOL_GPL(put_nfs_open_context); | |||
713 | * Ensure that mmap has a recent RPC credential for use when writing out | 783 | * Ensure that mmap has a recent RPC credential for use when writing out |
714 | * shared pages | 784 | * shared pages |
715 | */ | 785 | */ |
716 | void nfs_file_set_open_context(struct file *filp, struct nfs_open_context *ctx) | 786 | void nfs_inode_attach_open_context(struct nfs_open_context *ctx) |
717 | { | 787 | { |
718 | struct inode *inode = file_inode(filp); | 788 | struct inode *inode = ctx->dentry->d_inode; |
719 | struct nfs_inode *nfsi = NFS_I(inode); | 789 | struct nfs_inode *nfsi = NFS_I(inode); |
720 | 790 | ||
721 | filp->private_data = get_nfs_open_context(ctx); | ||
722 | spin_lock(&inode->i_lock); | 791 | spin_lock(&inode->i_lock); |
723 | list_add(&ctx->list, &nfsi->open_files); | 792 | list_add(&ctx->list, &nfsi->open_files); |
724 | spin_unlock(&inode->i_lock); | 793 | spin_unlock(&inode->i_lock); |
725 | } | 794 | } |
795 | EXPORT_SYMBOL_GPL(nfs_inode_attach_open_context); | ||
796 | |||
797 | void nfs_file_set_open_context(struct file *filp, struct nfs_open_context *ctx) | ||
798 | { | ||
799 | filp->private_data = get_nfs_open_context(ctx); | ||
800 | if (list_empty(&ctx->list)) | ||
801 | nfs_inode_attach_open_context(ctx); | ||
802 | } | ||
726 | EXPORT_SYMBOL_GPL(nfs_file_set_open_context); | 803 | EXPORT_SYMBOL_GPL(nfs_file_set_open_context); |
727 | 804 | ||
728 | /* | 805 | /* |
@@ -748,10 +825,11 @@ struct nfs_open_context *nfs_find_open_context(struct inode *inode, struct rpc_c | |||
748 | 825 | ||
749 | static void nfs_file_clear_open_context(struct file *filp) | 826 | static void nfs_file_clear_open_context(struct file *filp) |
750 | { | 827 | { |
751 | struct inode *inode = file_inode(filp); | ||
752 | struct nfs_open_context *ctx = nfs_file_open_context(filp); | 828 | struct nfs_open_context *ctx = nfs_file_open_context(filp); |
753 | 829 | ||
754 | if (ctx) { | 830 | if (ctx) { |
831 | struct inode *inode = ctx->dentry->d_inode; | ||
832 | |||
755 | filp->private_data = NULL; | 833 | filp->private_data = NULL; |
756 | spin_lock(&inode->i_lock); | 834 | spin_lock(&inode->i_lock); |
757 | list_move_tail(&ctx->list, &NFS_I(inode)->open_files); | 835 | list_move_tail(&ctx->list, &NFS_I(inode)->open_files); |
@@ -790,6 +868,7 @@ int | |||
790 | __nfs_revalidate_inode(struct nfs_server *server, struct inode *inode) | 868 | __nfs_revalidate_inode(struct nfs_server *server, struct inode *inode) |
791 | { | 869 | { |
792 | int status = -ESTALE; | 870 | int status = -ESTALE; |
871 | struct nfs4_label *label = NULL; | ||
793 | struct nfs_fattr *fattr = NULL; | 872 | struct nfs_fattr *fattr = NULL; |
794 | struct nfs_inode *nfsi = NFS_I(inode); | 873 | struct nfs_inode *nfsi = NFS_I(inode); |
795 | 874 | ||
@@ -807,7 +886,14 @@ __nfs_revalidate_inode(struct nfs_server *server, struct inode *inode) | |||
807 | goto out; | 886 | goto out; |
808 | 887 | ||
809 | nfs_inc_stats(inode, NFSIOS_INODEREVALIDATE); | 888 | nfs_inc_stats(inode, NFSIOS_INODEREVALIDATE); |
810 | status = NFS_PROTO(inode)->getattr(server, NFS_FH(inode), fattr); | 889 | |
890 | label = nfs4_label_alloc(NFS_SERVER(inode), GFP_KERNEL); | ||
891 | if (IS_ERR(label)) { | ||
892 | status = PTR_ERR(label); | ||
893 | goto out; | ||
894 | } | ||
895 | |||
896 | status = NFS_PROTO(inode)->getattr(server, NFS_FH(inode), fattr, label); | ||
811 | if (status != 0) { | 897 | if (status != 0) { |
812 | dfprintk(PAGECACHE, "nfs_revalidate_inode: (%s/%Ld) getattr failed, error=%d\n", | 898 | dfprintk(PAGECACHE, "nfs_revalidate_inode: (%s/%Ld) getattr failed, error=%d\n", |
813 | inode->i_sb->s_id, | 899 | inode->i_sb->s_id, |
@@ -817,7 +903,7 @@ __nfs_revalidate_inode(struct nfs_server *server, struct inode *inode) | |||
817 | if (!S_ISDIR(inode->i_mode)) | 903 | if (!S_ISDIR(inode->i_mode)) |
818 | set_bit(NFS_INO_STALE, &NFS_I(inode)->flags); | 904 | set_bit(NFS_INO_STALE, &NFS_I(inode)->flags); |
819 | } | 905 | } |
820 | goto out; | 906 | goto err_out; |
821 | } | 907 | } |
822 | 908 | ||
823 | status = nfs_refresh_inode(inode, fattr); | 909 | status = nfs_refresh_inode(inode, fattr); |
@@ -825,7 +911,7 @@ __nfs_revalidate_inode(struct nfs_server *server, struct inode *inode) | |||
825 | dfprintk(PAGECACHE, "nfs_revalidate_inode: (%s/%Ld) refresh failed, error=%d\n", | 911 | dfprintk(PAGECACHE, "nfs_revalidate_inode: (%s/%Ld) refresh failed, error=%d\n", |
826 | inode->i_sb->s_id, | 912 | inode->i_sb->s_id, |
827 | (long long)NFS_FILEID(inode), status); | 913 | (long long)NFS_FILEID(inode), status); |
828 | goto out; | 914 | goto err_out; |
829 | } | 915 | } |
830 | 916 | ||
831 | if (nfsi->cache_validity & NFS_INO_INVALID_ACL) | 917 | if (nfsi->cache_validity & NFS_INO_INVALID_ACL) |
@@ -835,7 +921,9 @@ __nfs_revalidate_inode(struct nfs_server *server, struct inode *inode) | |||
835 | inode->i_sb->s_id, | 921 | inode->i_sb->s_id, |
836 | (long long)NFS_FILEID(inode)); | 922 | (long long)NFS_FILEID(inode)); |
837 | 923 | ||
838 | out: | 924 | err_out: |
925 | nfs4_label_free(label); | ||
926 | out: | ||
839 | nfs_free_fattr(fattr); | 927 | nfs_free_fattr(fattr); |
840 | return status; | 928 | return status; |
841 | } | 929 | } |
@@ -847,7 +935,7 @@ int nfs_attribute_timeout(struct inode *inode) | |||
847 | return !time_in_range_open(jiffies, nfsi->read_cache_jiffies, nfsi->read_cache_jiffies + nfsi->attrtimeo); | 935 | return !time_in_range_open(jiffies, nfsi->read_cache_jiffies, nfsi->read_cache_jiffies + nfsi->attrtimeo); |
848 | } | 936 | } |
849 | 937 | ||
850 | static int nfs_attribute_cache_expired(struct inode *inode) | 938 | int nfs_attribute_cache_expired(struct inode *inode) |
851 | { | 939 | { |
852 | if (nfs_have_delegated_attributes(inode)) | 940 | if (nfs_have_delegated_attributes(inode)) |
853 | return 0; | 941 | return 0; |
@@ -863,7 +951,8 @@ static int nfs_attribute_cache_expired(struct inode *inode) | |||
863 | */ | 951 | */ |
864 | int nfs_revalidate_inode(struct nfs_server *server, struct inode *inode) | 952 | int nfs_revalidate_inode(struct nfs_server *server, struct inode *inode) |
865 | { | 953 | { |
866 | if (!(NFS_I(inode)->cache_validity & NFS_INO_INVALID_ATTR) | 954 | if (!(NFS_I(inode)->cache_validity & |
955 | (NFS_INO_INVALID_ATTR|NFS_INO_INVALID_LABEL)) | ||
867 | && !nfs_attribute_cache_expired(inode)) | 956 | && !nfs_attribute_cache_expired(inode)) |
868 | return NFS_STALE(inode) ? -ESTALE : 0; | 957 | return NFS_STALE(inode) ? -ESTALE : 0; |
869 | return __nfs_revalidate_inode(server, inode); | 958 | return __nfs_revalidate_inode(server, inode); |
@@ -873,9 +962,15 @@ EXPORT_SYMBOL_GPL(nfs_revalidate_inode); | |||
873 | static int nfs_invalidate_mapping(struct inode *inode, struct address_space *mapping) | 962 | static int nfs_invalidate_mapping(struct inode *inode, struct address_space *mapping) |
874 | { | 963 | { |
875 | struct nfs_inode *nfsi = NFS_I(inode); | 964 | struct nfs_inode *nfsi = NFS_I(inode); |
876 | 965 | int ret; | |
966 | |||
877 | if (mapping->nrpages != 0) { | 967 | if (mapping->nrpages != 0) { |
878 | int ret = invalidate_inode_pages2(mapping); | 968 | if (S_ISREG(inode->i_mode)) { |
969 | ret = nfs_sync_mapping(mapping); | ||
970 | if (ret < 0) | ||
971 | return ret; | ||
972 | } | ||
973 | ret = invalidate_inode_pages2(mapping); | ||
879 | if (ret < 0) | 974 | if (ret < 0) |
880 | return ret; | 975 | return ret; |
881 | } | 976 | } |
@@ -1243,6 +1338,7 @@ int nfs_post_op_update_inode(struct inode *inode, struct nfs_fattr *fattr) | |||
1243 | spin_lock(&inode->i_lock); | 1338 | spin_lock(&inode->i_lock); |
1244 | status = nfs_post_op_update_inode_locked(inode, fattr); | 1339 | status = nfs_post_op_update_inode_locked(inode, fattr); |
1245 | spin_unlock(&inode->i_lock); | 1340 | spin_unlock(&inode->i_lock); |
1341 | |||
1246 | return status; | 1342 | return status; |
1247 | } | 1343 | } |
1248 | EXPORT_SYMBOL_GPL(nfs_post_op_update_inode); | 1344 | EXPORT_SYMBOL_GPL(nfs_post_op_update_inode); |
@@ -1483,7 +1579,7 @@ static int nfs_update_inode(struct inode *inode, struct nfs_fattr *fattr) | |||
1483 | inode->i_blocks = fattr->du.nfs2.blocks; | 1579 | inode->i_blocks = fattr->du.nfs2.blocks; |
1484 | 1580 | ||
1485 | /* Update attrtimeo value if we're out of the unstable period */ | 1581 | /* Update attrtimeo value if we're out of the unstable period */ |
1486 | if (invalid & NFS_INO_INVALID_ATTR) { | 1582 | if (invalid & (NFS_INO_INVALID_ATTR|NFS_INO_INVALID_LABEL)) { |
1487 | nfs_inc_stats(inode, NFSIOS_ATTRINVALIDATE); | 1583 | nfs_inc_stats(inode, NFSIOS_ATTRINVALIDATE); |
1488 | nfsi->attrtimeo = NFS_MINATTRTIMEO(inode); | 1584 | nfsi->attrtimeo = NFS_MINATTRTIMEO(inode); |
1489 | nfsi->attrtimeo_timestamp = now; | 1585 | nfsi->attrtimeo_timestamp = now; |
@@ -1496,6 +1592,7 @@ static int nfs_update_inode(struct inode *inode, struct nfs_fattr *fattr) | |||
1496 | } | 1592 | } |
1497 | } | 1593 | } |
1498 | invalid &= ~NFS_INO_INVALID_ATTR; | 1594 | invalid &= ~NFS_INO_INVALID_ATTR; |
1595 | invalid &= ~NFS_INO_INVALID_LABEL; | ||
1499 | /* Don't invalidate the data if we were to blame */ | 1596 | /* Don't invalidate the data if we were to blame */ |
1500 | if (!(S_ISREG(inode->i_mode) || S_ISDIR(inode->i_mode) | 1597 | if (!(S_ISREG(inode->i_mode) || S_ISDIR(inode->i_mode) |
1501 | || S_ISLNK(inode->i_mode))) | 1598 | || S_ISLNK(inode->i_mode))) |
@@ -1638,12 +1735,11 @@ EXPORT_SYMBOL_GPL(nfs_net_id); | |||
1638 | static int nfs_net_init(struct net *net) | 1735 | static int nfs_net_init(struct net *net) |
1639 | { | 1736 | { |
1640 | nfs_clients_init(net); | 1737 | nfs_clients_init(net); |
1641 | return nfs_dns_resolver_cache_init(net); | 1738 | return 0; |
1642 | } | 1739 | } |
1643 | 1740 | ||
1644 | static void nfs_net_exit(struct net *net) | 1741 | static void nfs_net_exit(struct net *net) |
1645 | { | 1742 | { |
1646 | nfs_dns_resolver_cache_destroy(net); | ||
1647 | nfs_cleanup_cb_ident_idr(net); | 1743 | nfs_cleanup_cb_ident_idr(net); |
1648 | } | 1744 | } |
1649 | 1745 | ||
@@ -1661,10 +1757,6 @@ static int __init init_nfs_fs(void) | |||
1661 | { | 1757 | { |
1662 | int err; | 1758 | int err; |
1663 | 1759 | ||
1664 | err = nfs_dns_resolver_init(); | ||
1665 | if (err < 0) | ||
1666 | goto out10;; | ||
1667 | |||
1668 | err = register_pernet_subsys(&nfs_net_ops); | 1760 | err = register_pernet_subsys(&nfs_net_ops); |
1669 | if (err < 0) | 1761 | if (err < 0) |
1670 | goto out9; | 1762 | goto out9; |
@@ -1730,8 +1822,6 @@ out7: | |||
1730 | out8: | 1822 | out8: |
1731 | unregister_pernet_subsys(&nfs_net_ops); | 1823 | unregister_pernet_subsys(&nfs_net_ops); |
1732 | out9: | 1824 | out9: |
1733 | nfs_dns_resolver_destroy(); | ||
1734 | out10: | ||
1735 | return err; | 1825 | return err; |
1736 | } | 1826 | } |
1737 | 1827 | ||
@@ -1744,7 +1834,6 @@ static void __exit exit_nfs_fs(void) | |||
1744 | nfs_destroy_nfspagecache(); | 1834 | nfs_destroy_nfspagecache(); |
1745 | nfs_fscache_unregister(); | 1835 | nfs_fscache_unregister(); |
1746 | unregister_pernet_subsys(&nfs_net_ops); | 1836 | unregister_pernet_subsys(&nfs_net_ops); |
1747 | nfs_dns_resolver_destroy(); | ||
1748 | #ifdef CONFIG_PROC_FS | 1837 | #ifdef CONFIG_PROC_FS |
1749 | rpc_proc_unregister(&init_net, "nfs"); | 1838 | rpc_proc_unregister(&init_net, "nfs"); |
1750 | #endif | 1839 | #endif |
diff --git a/fs/nfs/internal.h b/fs/nfs/internal.h index 91e59a39fc08..3c8373f90ab3 100644 --- a/fs/nfs/internal.h +++ b/fs/nfs/internal.h | |||
@@ -165,7 +165,7 @@ extern void nfs_free_client(struct nfs_client *); | |||
165 | extern struct nfs_client *nfs4_find_client_ident(struct net *, int); | 165 | extern struct nfs_client *nfs4_find_client_ident(struct net *, int); |
166 | extern struct nfs_client * | 166 | extern struct nfs_client * |
167 | nfs4_find_client_sessionid(struct net *, const struct sockaddr *, | 167 | nfs4_find_client_sessionid(struct net *, const struct sockaddr *, |
168 | struct nfs4_sessionid *); | 168 | struct nfs4_sessionid *, u32); |
169 | extern struct nfs_server *nfs_create_server(struct nfs_mount_info *, | 169 | extern struct nfs_server *nfs_create_server(struct nfs_mount_info *, |
170 | struct nfs_subversion *); | 170 | struct nfs_subversion *); |
171 | extern struct nfs_server *nfs4_create_server( | 171 | extern struct nfs_server *nfs4_create_server( |
@@ -255,6 +255,7 @@ extern int nfs4_decode_dirent(struct xdr_stream *, | |||
255 | #ifdef CONFIG_NFS_V4_1 | 255 | #ifdef CONFIG_NFS_V4_1 |
256 | extern const u32 nfs41_maxread_overhead; | 256 | extern const u32 nfs41_maxread_overhead; |
257 | extern const u32 nfs41_maxwrite_overhead; | 257 | extern const u32 nfs41_maxwrite_overhead; |
258 | extern const u32 nfs41_maxgetdevinfo_overhead; | ||
258 | #endif | 259 | #endif |
259 | 260 | ||
260 | /* nfs4proc.c */ | 261 | /* nfs4proc.c */ |
diff --git a/fs/nfs/mount_clnt.c b/fs/nfs/mount_clnt.c index 91a6faf811ac..99a45283b9ee 100644 --- a/fs/nfs/mount_clnt.c +++ b/fs/nfs/mount_clnt.c | |||
@@ -139,7 +139,10 @@ struct mnt_fhstatus { | |||
139 | * nfs_mount - Obtain an NFS file handle for the given host and path | 139 | * nfs_mount - Obtain an NFS file handle for the given host and path |
140 | * @info: pointer to mount request arguments | 140 | * @info: pointer to mount request arguments |
141 | * | 141 | * |
142 | * Uses default timeout parameters specified by underlying transport. | 142 | * Uses default timeout parameters specified by underlying transport. On |
143 | * successful return, the auth_flavs list and auth_flav_len will be populated | ||
144 | * with the list from the server or a faked-up list if the server didn't | ||
145 | * provide one. | ||
143 | */ | 146 | */ |
144 | int nfs_mount(struct nfs_mount_request *info) | 147 | int nfs_mount(struct nfs_mount_request *info) |
145 | { | 148 | { |
@@ -195,6 +198,15 @@ int nfs_mount(struct nfs_mount_request *info) | |||
195 | dprintk("NFS: MNT request succeeded\n"); | 198 | dprintk("NFS: MNT request succeeded\n"); |
196 | status = 0; | 199 | status = 0; |
197 | 200 | ||
201 | /* | ||
202 | * If the server didn't provide a flavor list, allow the | ||
203 | * client to try any flavor. | ||
204 | */ | ||
205 | if (info->version != NFS_MNT3_VERSION || *info->auth_flav_len == 0) { | ||
206 | dprintk("NFS: Faking up auth_flavs list\n"); | ||
207 | info->auth_flavs[0] = RPC_AUTH_NULL; | ||
208 | *info->auth_flav_len = 1; | ||
209 | } | ||
198 | out: | 210 | out: |
199 | return status; | 211 | return status; |
200 | 212 | ||
diff --git a/fs/nfs/namespace.c b/fs/nfs/namespace.c index fc8dc20fdeb9..348b535cd786 100644 --- a/fs/nfs/namespace.c +++ b/fs/nfs/namespace.c | |||
@@ -280,7 +280,7 @@ struct vfsmount *nfs_submount(struct nfs_server *server, struct dentry *dentry, | |||
280 | struct dentry *parent = dget_parent(dentry); | 280 | struct dentry *parent = dget_parent(dentry); |
281 | 281 | ||
282 | /* Look it up again to get its attributes */ | 282 | /* Look it up again to get its attributes */ |
283 | err = server->nfs_client->rpc_ops->lookup(parent->d_inode, &dentry->d_name, fh, fattr); | 283 | err = server->nfs_client->rpc_ops->lookup(parent->d_inode, &dentry->d_name, fh, fattr, NULL); |
284 | dput(parent); | 284 | dput(parent); |
285 | if (err != 0) | 285 | if (err != 0) |
286 | return ERR_PTR(err); | 286 | return ERR_PTR(err); |
diff --git a/fs/nfs/nfs3proc.c b/fs/nfs/nfs3proc.c index 43ea96ced28c..f5c84c3efbca 100644 --- a/fs/nfs/nfs3proc.c +++ b/fs/nfs/nfs3proc.c | |||
@@ -33,7 +33,7 @@ nfs3_rpc_wrapper(struct rpc_clnt *clnt, struct rpc_message *msg, int flags) | |||
33 | res = rpc_call_sync(clnt, msg, flags); | 33 | res = rpc_call_sync(clnt, msg, flags); |
34 | if (res != -EJUKEBOX) | 34 | if (res != -EJUKEBOX) |
35 | break; | 35 | break; |
36 | freezable_schedule_timeout_killable(NFS_JUKEBOX_RETRY_TIME); | 36 | freezable_schedule_timeout_killable_unsafe(NFS_JUKEBOX_RETRY_TIME); |
37 | res = -ERESTARTSYS; | 37 | res = -ERESTARTSYS; |
38 | } while (!fatal_signal_pending(current)); | 38 | } while (!fatal_signal_pending(current)); |
39 | return res; | 39 | return res; |
@@ -98,7 +98,7 @@ nfs3_proc_get_root(struct nfs_server *server, struct nfs_fh *fhandle, | |||
98 | */ | 98 | */ |
99 | static int | 99 | static int |
100 | nfs3_proc_getattr(struct nfs_server *server, struct nfs_fh *fhandle, | 100 | nfs3_proc_getattr(struct nfs_server *server, struct nfs_fh *fhandle, |
101 | struct nfs_fattr *fattr) | 101 | struct nfs_fattr *fattr, struct nfs4_label *label) |
102 | { | 102 | { |
103 | struct rpc_message msg = { | 103 | struct rpc_message msg = { |
104 | .rpc_proc = &nfs3_procedures[NFS3PROC_GETATTR], | 104 | .rpc_proc = &nfs3_procedures[NFS3PROC_GETATTR], |
@@ -143,7 +143,8 @@ nfs3_proc_setattr(struct dentry *dentry, struct nfs_fattr *fattr, | |||
143 | 143 | ||
144 | static int | 144 | static int |
145 | nfs3_proc_lookup(struct inode *dir, struct qstr *name, | 145 | nfs3_proc_lookup(struct inode *dir, struct qstr *name, |
146 | struct nfs_fh *fhandle, struct nfs_fattr *fattr) | 146 | struct nfs_fh *fhandle, struct nfs_fattr *fattr, |
147 | struct nfs4_label *label) | ||
147 | { | 148 | { |
148 | struct nfs3_diropargs arg = { | 149 | struct nfs3_diropargs arg = { |
149 | .fh = NFS_FH(dir), | 150 | .fh = NFS_FH(dir), |
@@ -300,7 +301,7 @@ static int nfs3_do_create(struct inode *dir, struct dentry *dentry, struct nfs3_ | |||
300 | status = rpc_call_sync(NFS_CLIENT(dir), &data->msg, 0); | 301 | status = rpc_call_sync(NFS_CLIENT(dir), &data->msg, 0); |
301 | nfs_post_op_update_inode(dir, data->res.dir_attr); | 302 | nfs_post_op_update_inode(dir, data->res.dir_attr); |
302 | if (status == 0) | 303 | if (status == 0) |
303 | status = nfs_instantiate(dentry, data->res.fh, data->res.fattr); | 304 | status = nfs_instantiate(dentry, data->res.fh, data->res.fattr, NULL); |
304 | return status; | 305 | return status; |
305 | } | 306 | } |
306 | 307 | ||
diff --git a/fs/nfs/nfs4_fs.h b/fs/nfs/nfs4_fs.h index a1dd768d0a35..ee81e354bce7 100644 --- a/fs/nfs/nfs4_fs.h +++ b/fs/nfs/nfs4_fs.h | |||
@@ -194,7 +194,7 @@ struct nfs4_state_recovery_ops { | |||
194 | int (*recover_lock)(struct nfs4_state *, struct file_lock *); | 194 | int (*recover_lock)(struct nfs4_state *, struct file_lock *); |
195 | int (*establish_clid)(struct nfs_client *, struct rpc_cred *); | 195 | int (*establish_clid)(struct nfs_client *, struct rpc_cred *); |
196 | struct rpc_cred * (*get_clid_cred)(struct nfs_client *); | 196 | struct rpc_cred * (*get_clid_cred)(struct nfs_client *); |
197 | int (*reclaim_complete)(struct nfs_client *); | 197 | int (*reclaim_complete)(struct nfs_client *, struct rpc_cred *); |
198 | int (*detect_trunking)(struct nfs_client *, struct nfs_client **, | 198 | int (*detect_trunking)(struct nfs_client *, struct nfs_client **, |
199 | struct rpc_cred *); | 199 | struct rpc_cred *); |
200 | }; | 200 | }; |
@@ -303,10 +303,10 @@ is_ds_client(struct nfs_client *clp) | |||
303 | extern const struct nfs4_minor_version_ops *nfs_v4_minor_ops[]; | 303 | extern const struct nfs4_minor_version_ops *nfs_v4_minor_ops[]; |
304 | 304 | ||
305 | extern const u32 nfs4_fattr_bitmap[3]; | 305 | extern const u32 nfs4_fattr_bitmap[3]; |
306 | extern const u32 nfs4_statfs_bitmap[2]; | 306 | extern const u32 nfs4_statfs_bitmap[3]; |
307 | extern const u32 nfs4_pathconf_bitmap[2]; | 307 | extern const u32 nfs4_pathconf_bitmap[3]; |
308 | extern const u32 nfs4_fsinfo_bitmap[3]; | 308 | extern const u32 nfs4_fsinfo_bitmap[3]; |
309 | extern const u32 nfs4_fs_locations_bitmap[2]; | 309 | extern const u32 nfs4_fs_locations_bitmap[3]; |
310 | 310 | ||
311 | void nfs4_free_client(struct nfs_client *); | 311 | void nfs4_free_client(struct nfs_client *); |
312 | 312 | ||
diff --git a/fs/nfs/nfs4client.c b/fs/nfs/nfs4client.c index 4cbad5d6b276..90dce91dd5b5 100644 --- a/fs/nfs/nfs4client.c +++ b/fs/nfs/nfs4client.c | |||
@@ -66,6 +66,11 @@ struct nfs_client *nfs4_alloc_client(const struct nfs_client_initdata *cl_init) | |||
66 | if (err) | 66 | if (err) |
67 | goto error; | 67 | goto error; |
68 | 68 | ||
69 | if (cl_init->minorversion > NFS4_MAX_MINOR_VERSION) { | ||
70 | err = -EINVAL; | ||
71 | goto error; | ||
72 | } | ||
73 | |||
69 | spin_lock_init(&clp->cl_lock); | 74 | spin_lock_init(&clp->cl_lock); |
70 | INIT_DELAYED_WORK(&clp->cl_renewd, nfs4_renew_state); | 75 | INIT_DELAYED_WORK(&clp->cl_renewd, nfs4_renew_state); |
71 | rpc_init_wait_queue(&clp->cl_rpcwaitq, "NFS client"); | 76 | rpc_init_wait_queue(&clp->cl_rpcwaitq, "NFS client"); |
@@ -562,14 +567,14 @@ static bool nfs4_cb_match_client(const struct sockaddr *addr, | |||
562 | */ | 567 | */ |
563 | struct nfs_client * | 568 | struct nfs_client * |
564 | nfs4_find_client_sessionid(struct net *net, const struct sockaddr *addr, | 569 | nfs4_find_client_sessionid(struct net *net, const struct sockaddr *addr, |
565 | struct nfs4_sessionid *sid) | 570 | struct nfs4_sessionid *sid, u32 minorversion) |
566 | { | 571 | { |
567 | struct nfs_client *clp; | 572 | struct nfs_client *clp; |
568 | struct nfs_net *nn = net_generic(net, nfs_net_id); | 573 | struct nfs_net *nn = net_generic(net, nfs_net_id); |
569 | 574 | ||
570 | spin_lock(&nn->nfs_client_lock); | 575 | spin_lock(&nn->nfs_client_lock); |
571 | list_for_each_entry(clp, &nn->nfs_client_list, cl_share_link) { | 576 | list_for_each_entry(clp, &nn->nfs_client_list, cl_share_link) { |
572 | if (nfs4_cb_match_client(addr, clp, 1) == false) | 577 | if (nfs4_cb_match_client(addr, clp, minorversion) == false) |
573 | continue; | 578 | continue; |
574 | 579 | ||
575 | if (!nfs4_has_session(clp)) | 580 | if (!nfs4_has_session(clp)) |
@@ -592,7 +597,7 @@ nfs4_find_client_sessionid(struct net *net, const struct sockaddr *addr, | |||
592 | 597 | ||
593 | struct nfs_client * | 598 | struct nfs_client * |
594 | nfs4_find_client_sessionid(struct net *net, const struct sockaddr *addr, | 599 | nfs4_find_client_sessionid(struct net *net, const struct sockaddr *addr, |
595 | struct nfs4_sessionid *sid) | 600 | struct nfs4_sessionid *sid, u32 minorversion) |
596 | { | 601 | { |
597 | return NULL; | 602 | return NULL; |
598 | } | 603 | } |
@@ -626,6 +631,8 @@ static int nfs4_set_client(struct nfs_server *server, | |||
626 | 631 | ||
627 | if (server->flags & NFS_MOUNT_NORESVPORT) | 632 | if (server->flags & NFS_MOUNT_NORESVPORT) |
628 | set_bit(NFS_CS_NORESVPORT, &cl_init.init_flags); | 633 | set_bit(NFS_CS_NORESVPORT, &cl_init.init_flags); |
634 | if (server->options & NFS_OPTION_MIGRATION) | ||
635 | set_bit(NFS_CS_MIGRATION, &cl_init.init_flags); | ||
629 | 636 | ||
630 | /* Allocate or find a client reference we can use */ | 637 | /* Allocate or find a client reference we can use */ |
631 | clp = nfs_get_client(&cl_init, timeparms, ip_addr, authflavour); | 638 | clp = nfs_get_client(&cl_init, timeparms, ip_addr, authflavour); |
@@ -730,7 +737,7 @@ static int nfs4_server_common_setup(struct nfs_server *server, | |||
730 | return -ENOMEM; | 737 | return -ENOMEM; |
731 | 738 | ||
732 | /* We must ensure the session is initialised first */ | 739 | /* We must ensure the session is initialised first */ |
733 | error = nfs4_init_session(server); | 740 | error = nfs4_init_session(server->nfs_client); |
734 | if (error < 0) | 741 | if (error < 0) |
735 | goto out; | 742 | goto out; |
736 | 743 | ||
diff --git a/fs/nfs/nfs4file.c b/fs/nfs/nfs4file.c index 13e6bb3e3fe5..e5b804dd944c 100644 --- a/fs/nfs/nfs4file.c +++ b/fs/nfs/nfs4file.c | |||
@@ -69,7 +69,6 @@ nfs4_file_open(struct inode *inode, struct file *filp) | |||
69 | goto out_drop; | 69 | goto out_drop; |
70 | } | 70 | } |
71 | } | 71 | } |
72 | iput(inode); | ||
73 | if (inode != dentry->d_inode) | 72 | if (inode != dentry->d_inode) |
74 | goto out_drop; | 73 | goto out_drop; |
75 | 74 | ||
diff --git a/fs/nfs/nfs4filelayout.c b/fs/nfs/nfs4filelayout.c index 22d10623f5ee..17ed87ef9de8 100644 --- a/fs/nfs/nfs4filelayout.c +++ b/fs/nfs/nfs4filelayout.c | |||
@@ -643,7 +643,8 @@ filelayout_check_layout(struct pnfs_layout_hdr *lo, | |||
643 | d = nfs4_find_get_deviceid(NFS_SERVER(lo->plh_inode)->pnfs_curr_ld, | 643 | d = nfs4_find_get_deviceid(NFS_SERVER(lo->plh_inode)->pnfs_curr_ld, |
644 | NFS_SERVER(lo->plh_inode)->nfs_client, id); | 644 | NFS_SERVER(lo->plh_inode)->nfs_client, id); |
645 | if (d == NULL) { | 645 | if (d == NULL) { |
646 | dsaddr = filelayout_get_device_info(lo->plh_inode, id, gfp_flags); | 646 | dsaddr = filelayout_get_device_info(lo->plh_inode, id, |
647 | lo->plh_lc_cred, gfp_flags); | ||
647 | if (dsaddr == NULL) | 648 | if (dsaddr == NULL) |
648 | goto out; | 649 | goto out; |
649 | } else | 650 | } else |
diff --git a/fs/nfs/nfs4filelayout.h b/fs/nfs/nfs4filelayout.h index 235ff952d3c8..cebd20e7e923 100644 --- a/fs/nfs/nfs4filelayout.h +++ b/fs/nfs/nfs4filelayout.h | |||
@@ -150,6 +150,7 @@ struct nfs4_pnfs_ds *nfs4_fl_prepare_ds(struct pnfs_layout_segment *lseg, | |||
150 | extern void nfs4_fl_put_deviceid(struct nfs4_file_layout_dsaddr *dsaddr); | 150 | extern void nfs4_fl_put_deviceid(struct nfs4_file_layout_dsaddr *dsaddr); |
151 | extern void nfs4_fl_free_deviceid(struct nfs4_file_layout_dsaddr *dsaddr); | 151 | extern void nfs4_fl_free_deviceid(struct nfs4_file_layout_dsaddr *dsaddr); |
152 | struct nfs4_file_layout_dsaddr * | 152 | struct nfs4_file_layout_dsaddr * |
153 | filelayout_get_device_info(struct inode *inode, struct nfs4_deviceid *dev_id, gfp_t gfp_flags); | 153 | filelayout_get_device_info(struct inode *inode, struct nfs4_deviceid *dev_id, |
154 | struct rpc_cred *cred, gfp_t gfp_flags); | ||
154 | 155 | ||
155 | #endif /* FS_NFS_NFS4FILELAYOUT_H */ | 156 | #endif /* FS_NFS_NFS4FILELAYOUT_H */ |
diff --git a/fs/nfs/nfs4filelayoutdev.c b/fs/nfs/nfs4filelayoutdev.c index 661a0f611215..95604f64cab8 100644 --- a/fs/nfs/nfs4filelayoutdev.c +++ b/fs/nfs/nfs4filelayoutdev.c | |||
@@ -668,7 +668,10 @@ decode_and_add_device(struct inode *inode, struct pnfs_device *dev, gfp_t gfp_fl | |||
668 | * of available devices, and return it. | 668 | * of available devices, and return it. |
669 | */ | 669 | */ |
670 | struct nfs4_file_layout_dsaddr * | 670 | struct nfs4_file_layout_dsaddr * |
671 | filelayout_get_device_info(struct inode *inode, struct nfs4_deviceid *dev_id, gfp_t gfp_flags) | 671 | filelayout_get_device_info(struct inode *inode, |
672 | struct nfs4_deviceid *dev_id, | ||
673 | struct rpc_cred *cred, | ||
674 | gfp_t gfp_flags) | ||
672 | { | 675 | { |
673 | struct pnfs_device *pdev = NULL; | 676 | struct pnfs_device *pdev = NULL; |
674 | u32 max_resp_sz; | 677 | u32 max_resp_sz; |
@@ -708,8 +711,9 @@ filelayout_get_device_info(struct inode *inode, struct nfs4_deviceid *dev_id, gf | |||
708 | pdev->pgbase = 0; | 711 | pdev->pgbase = 0; |
709 | pdev->pglen = max_resp_sz; | 712 | pdev->pglen = max_resp_sz; |
710 | pdev->mincount = 0; | 713 | pdev->mincount = 0; |
714 | pdev->maxcount = max_resp_sz - nfs41_maxgetdevinfo_overhead; | ||
711 | 715 | ||
712 | rc = nfs4_proc_getdeviceinfo(server, pdev); | 716 | rc = nfs4_proc_getdeviceinfo(server, pdev, cred); |
713 | dprintk("%s getdevice info returns %d\n", __func__, rc); | 717 | dprintk("%s getdevice info returns %d\n", __func__, rc); |
714 | if (rc) | 718 | if (rc) |
715 | goto out_free; | 719 | goto out_free; |
diff --git a/fs/nfs/nfs4proc.c b/fs/nfs/nfs4proc.c index d7ba5616989c..108a774095f7 100644 --- a/fs/nfs/nfs4proc.c +++ b/fs/nfs/nfs4proc.c | |||
@@ -77,15 +77,68 @@ static int _nfs4_recover_proc_open(struct nfs4_opendata *data); | |||
77 | static int nfs4_do_fsinfo(struct nfs_server *, struct nfs_fh *, struct nfs_fsinfo *); | 77 | static int nfs4_do_fsinfo(struct nfs_server *, struct nfs_fh *, struct nfs_fsinfo *); |
78 | static int nfs4_async_handle_error(struct rpc_task *, const struct nfs_server *, struct nfs4_state *); | 78 | static int nfs4_async_handle_error(struct rpc_task *, const struct nfs_server *, struct nfs4_state *); |
79 | static void nfs_fixup_referral_attributes(struct nfs_fattr *fattr); | 79 | static void nfs_fixup_referral_attributes(struct nfs_fattr *fattr); |
80 | static int nfs4_proc_getattr(struct nfs_server *, struct nfs_fh *, struct nfs_fattr *); | 80 | static int nfs4_proc_getattr(struct nfs_server *, struct nfs_fh *, struct nfs_fattr *, struct nfs4_label *label); |
81 | static int _nfs4_proc_getattr(struct nfs_server *server, struct nfs_fh *fhandle, struct nfs_fattr *fattr); | 81 | static int _nfs4_proc_getattr(struct nfs_server *server, struct nfs_fh *fhandle, struct nfs_fattr *fattr, struct nfs4_label *label); |
82 | static int nfs4_do_setattr(struct inode *inode, struct rpc_cred *cred, | 82 | static int nfs4_do_setattr(struct inode *inode, struct rpc_cred *cred, |
83 | struct nfs_fattr *fattr, struct iattr *sattr, | 83 | struct nfs_fattr *fattr, struct iattr *sattr, |
84 | struct nfs4_state *state); | 84 | struct nfs4_state *state, struct nfs4_label *ilabel, |
85 | struct nfs4_label *olabel); | ||
85 | #ifdef CONFIG_NFS_V4_1 | 86 | #ifdef CONFIG_NFS_V4_1 |
86 | static int nfs41_test_stateid(struct nfs_server *, nfs4_stateid *); | 87 | static int nfs41_test_stateid(struct nfs_server *, nfs4_stateid *, |
87 | static int nfs41_free_stateid(struct nfs_server *, nfs4_stateid *); | 88 | struct rpc_cred *); |
89 | static int nfs41_free_stateid(struct nfs_server *, nfs4_stateid *, | ||
90 | struct rpc_cred *); | ||
88 | #endif | 91 | #endif |
92 | |||
93 | #ifdef CONFIG_NFS_V4_SECURITY_LABEL | ||
94 | static inline struct nfs4_label * | ||
95 | nfs4_label_init_security(struct inode *dir, struct dentry *dentry, | ||
96 | struct iattr *sattr, struct nfs4_label *label) | ||
97 | { | ||
98 | int err; | ||
99 | |||
100 | if (label == NULL) | ||
101 | return NULL; | ||
102 | |||
103 | if (nfs_server_capable(dir, NFS_CAP_SECURITY_LABEL) == 0) | ||
104 | return NULL; | ||
105 | |||
106 | if (NFS_SERVER(dir)->nfs_client->cl_minorversion < 2) | ||
107 | return NULL; | ||
108 | |||
109 | err = security_dentry_init_security(dentry, sattr->ia_mode, | ||
110 | &dentry->d_name, (void **)&label->label, &label->len); | ||
111 | if (err == 0) | ||
112 | return label; | ||
113 | |||
114 | return NULL; | ||
115 | } | ||
116 | static inline void | ||
117 | nfs4_label_release_security(struct nfs4_label *label) | ||
118 | { | ||
119 | if (label) | ||
120 | security_release_secctx(label->label, label->len); | ||
121 | } | ||
122 | static inline u32 *nfs4_bitmask(struct nfs_server *server, struct nfs4_label *label) | ||
123 | { | ||
124 | if (label) | ||
125 | return server->attr_bitmask; | ||
126 | |||
127 | return server->attr_bitmask_nl; | ||
128 | } | ||
129 | #else | ||
130 | static inline struct nfs4_label * | ||
131 | nfs4_label_init_security(struct inode *dir, struct dentry *dentry, | ||
132 | struct iattr *sattr, struct nfs4_label *l) | ||
133 | { return NULL; } | ||
134 | static inline void | ||
135 | nfs4_label_release_security(struct nfs4_label *label) | ||
136 | { return; } | ||
137 | static inline u32 * | ||
138 | nfs4_bitmask(struct nfs_server *server, struct nfs4_label *label) | ||
139 | { return server->attr_bitmask; } | ||
140 | #endif | ||
141 | |||
89 | /* Prevent leaks of NFSv4 errors into userland */ | 142 | /* Prevent leaks of NFSv4 errors into userland */ |
90 | static int nfs4_map_errors(int err) | 143 | static int nfs4_map_errors(int err) |
91 | { | 144 | { |
@@ -134,7 +187,10 @@ const u32 nfs4_fattr_bitmap[3] = { | |||
134 | | FATTR4_WORD1_SPACE_USED | 187 | | FATTR4_WORD1_SPACE_USED |
135 | | FATTR4_WORD1_TIME_ACCESS | 188 | | FATTR4_WORD1_TIME_ACCESS |
136 | | FATTR4_WORD1_TIME_METADATA | 189 | | FATTR4_WORD1_TIME_METADATA |
137 | | FATTR4_WORD1_TIME_MODIFY | 190 | | FATTR4_WORD1_TIME_MODIFY, |
191 | #ifdef CONFIG_NFS_V4_SECURITY_LABEL | ||
192 | FATTR4_WORD2_SECURITY_LABEL | ||
193 | #endif | ||
138 | }; | 194 | }; |
139 | 195 | ||
140 | static const u32 nfs4_pnfs_open_bitmap[3] = { | 196 | static const u32 nfs4_pnfs_open_bitmap[3] = { |
@@ -161,7 +217,7 @@ static const u32 nfs4_open_noattr_bitmap[3] = { | |||
161 | | FATTR4_WORD0_FILEID, | 217 | | FATTR4_WORD0_FILEID, |
162 | }; | 218 | }; |
163 | 219 | ||
164 | const u32 nfs4_statfs_bitmap[2] = { | 220 | const u32 nfs4_statfs_bitmap[3] = { |
165 | FATTR4_WORD0_FILES_AVAIL | 221 | FATTR4_WORD0_FILES_AVAIL |
166 | | FATTR4_WORD0_FILES_FREE | 222 | | FATTR4_WORD0_FILES_FREE |
167 | | FATTR4_WORD0_FILES_TOTAL, | 223 | | FATTR4_WORD0_FILES_TOTAL, |
@@ -170,7 +226,7 @@ const u32 nfs4_statfs_bitmap[2] = { | |||
170 | | FATTR4_WORD1_SPACE_TOTAL | 226 | | FATTR4_WORD1_SPACE_TOTAL |
171 | }; | 227 | }; |
172 | 228 | ||
173 | const u32 nfs4_pathconf_bitmap[2] = { | 229 | const u32 nfs4_pathconf_bitmap[3] = { |
174 | FATTR4_WORD0_MAXLINK | 230 | FATTR4_WORD0_MAXLINK |
175 | | FATTR4_WORD0_MAXNAME, | 231 | | FATTR4_WORD0_MAXNAME, |
176 | 0 | 232 | 0 |
@@ -185,7 +241,7 @@ const u32 nfs4_fsinfo_bitmap[3] = { FATTR4_WORD0_MAXFILESIZE | |||
185 | FATTR4_WORD2_LAYOUT_BLKSIZE | 241 | FATTR4_WORD2_LAYOUT_BLKSIZE |
186 | }; | 242 | }; |
187 | 243 | ||
188 | const u32 nfs4_fs_locations_bitmap[2] = { | 244 | const u32 nfs4_fs_locations_bitmap[3] = { |
189 | FATTR4_WORD0_TYPE | 245 | FATTR4_WORD0_TYPE |
190 | | FATTR4_WORD0_CHANGE | 246 | | FATTR4_WORD0_CHANGE |
191 | | FATTR4_WORD0_SIZE | 247 | | FATTR4_WORD0_SIZE |
@@ -201,7 +257,7 @@ const u32 nfs4_fs_locations_bitmap[2] = { | |||
201 | | FATTR4_WORD1_TIME_ACCESS | 257 | | FATTR4_WORD1_TIME_ACCESS |
202 | | FATTR4_WORD1_TIME_METADATA | 258 | | FATTR4_WORD1_TIME_METADATA |
203 | | FATTR4_WORD1_TIME_MODIFY | 259 | | FATTR4_WORD1_TIME_MODIFY |
204 | | FATTR4_WORD1_MOUNTED_ON_FILEID | 260 | | FATTR4_WORD1_MOUNTED_ON_FILEID, |
205 | }; | 261 | }; |
206 | 262 | ||
207 | static void nfs4_setup_readdir(u64 cookie, __be32 *verifier, struct dentry *dentry, | 263 | static void nfs4_setup_readdir(u64 cookie, __be32 *verifier, struct dentry *dentry, |
@@ -268,7 +324,7 @@ static int nfs4_delay(struct rpc_clnt *clnt, long *timeout) | |||
268 | *timeout = NFS4_POLL_RETRY_MIN; | 324 | *timeout = NFS4_POLL_RETRY_MIN; |
269 | if (*timeout > NFS4_POLL_RETRY_MAX) | 325 | if (*timeout > NFS4_POLL_RETRY_MAX) |
270 | *timeout = NFS4_POLL_RETRY_MAX; | 326 | *timeout = NFS4_POLL_RETRY_MAX; |
271 | freezable_schedule_timeout_killable(*timeout); | 327 | freezable_schedule_timeout_killable_unsafe(*timeout); |
272 | if (fatal_signal_pending(current)) | 328 | if (fatal_signal_pending(current)) |
273 | res = -ERESTARTSYS; | 329 | res = -ERESTARTSYS; |
274 | *timeout <<= 1; | 330 | *timeout <<= 1; |
@@ -762,6 +818,7 @@ struct nfs4_opendata { | |||
762 | struct nfs4_string owner_name; | 818 | struct nfs4_string owner_name; |
763 | struct nfs4_string group_name; | 819 | struct nfs4_string group_name; |
764 | struct nfs_fattr f_attr; | 820 | struct nfs_fattr f_attr; |
821 | struct nfs4_label *f_label; | ||
765 | struct dentry *dir; | 822 | struct dentry *dir; |
766 | struct dentry *dentry; | 823 | struct dentry *dentry; |
767 | struct nfs4_state_owner *owner; | 824 | struct nfs4_state_owner *owner; |
@@ -807,6 +864,7 @@ nfs4_map_atomic_open_claim(struct nfs_server *server, | |||
807 | static void nfs4_init_opendata_res(struct nfs4_opendata *p) | 864 | static void nfs4_init_opendata_res(struct nfs4_opendata *p) |
808 | { | 865 | { |
809 | p->o_res.f_attr = &p->f_attr; | 866 | p->o_res.f_attr = &p->f_attr; |
867 | p->o_res.f_label = p->f_label; | ||
810 | p->o_res.seqid = p->o_arg.seqid; | 868 | p->o_res.seqid = p->o_arg.seqid; |
811 | p->c_res.seqid = p->c_arg.seqid; | 869 | p->c_res.seqid = p->c_arg.seqid; |
812 | p->o_res.server = p->o_arg.server; | 870 | p->o_res.server = p->o_arg.server; |
@@ -818,6 +876,7 @@ static void nfs4_init_opendata_res(struct nfs4_opendata *p) | |||
818 | static struct nfs4_opendata *nfs4_opendata_alloc(struct dentry *dentry, | 876 | static struct nfs4_opendata *nfs4_opendata_alloc(struct dentry *dentry, |
819 | struct nfs4_state_owner *sp, fmode_t fmode, int flags, | 877 | struct nfs4_state_owner *sp, fmode_t fmode, int flags, |
820 | const struct iattr *attrs, | 878 | const struct iattr *attrs, |
879 | struct nfs4_label *label, | ||
821 | enum open_claim_type4 claim, | 880 | enum open_claim_type4 claim, |
822 | gfp_t gfp_mask) | 881 | gfp_t gfp_mask) |
823 | { | 882 | { |
@@ -829,9 +888,14 @@ static struct nfs4_opendata *nfs4_opendata_alloc(struct dentry *dentry, | |||
829 | p = kzalloc(sizeof(*p), gfp_mask); | 888 | p = kzalloc(sizeof(*p), gfp_mask); |
830 | if (p == NULL) | 889 | if (p == NULL) |
831 | goto err; | 890 | goto err; |
891 | |||
892 | p->f_label = nfs4_label_alloc(server, gfp_mask); | ||
893 | if (IS_ERR(p->f_label)) | ||
894 | goto err_free_p; | ||
895 | |||
832 | p->o_arg.seqid = nfs_alloc_seqid(&sp->so_seqid, gfp_mask); | 896 | p->o_arg.seqid = nfs_alloc_seqid(&sp->so_seqid, gfp_mask); |
833 | if (p->o_arg.seqid == NULL) | 897 | if (p->o_arg.seqid == NULL) |
834 | goto err_free; | 898 | goto err_free_label; |
835 | nfs_sb_active(dentry->d_sb); | 899 | nfs_sb_active(dentry->d_sb); |
836 | p->dentry = dget(dentry); | 900 | p->dentry = dget(dentry); |
837 | p->dir = parent; | 901 | p->dir = parent; |
@@ -852,8 +916,9 @@ static struct nfs4_opendata *nfs4_opendata_alloc(struct dentry *dentry, | |||
852 | p->o_arg.id.uniquifier = sp->so_seqid.owner_id; | 916 | p->o_arg.id.uniquifier = sp->so_seqid.owner_id; |
853 | p->o_arg.name = &dentry->d_name; | 917 | p->o_arg.name = &dentry->d_name; |
854 | p->o_arg.server = server; | 918 | p->o_arg.server = server; |
855 | p->o_arg.bitmask = server->attr_bitmask; | 919 | p->o_arg.bitmask = nfs4_bitmask(server, label); |
856 | p->o_arg.open_bitmap = &nfs4_fattr_bitmap[0]; | 920 | p->o_arg.open_bitmap = &nfs4_fattr_bitmap[0]; |
921 | p->o_arg.label = label; | ||
857 | p->o_arg.claim = nfs4_map_atomic_open_claim(server, claim); | 922 | p->o_arg.claim = nfs4_map_atomic_open_claim(server, claim); |
858 | switch (p->o_arg.claim) { | 923 | switch (p->o_arg.claim) { |
859 | case NFS4_OPEN_CLAIM_NULL: | 924 | case NFS4_OPEN_CLAIM_NULL: |
@@ -884,7 +949,10 @@ static struct nfs4_opendata *nfs4_opendata_alloc(struct dentry *dentry, | |||
884 | nfs4_init_opendata_res(p); | 949 | nfs4_init_opendata_res(p); |
885 | kref_init(&p->kref); | 950 | kref_init(&p->kref); |
886 | return p; | 951 | return p; |
887 | err_free: | 952 | |
953 | err_free_label: | ||
954 | nfs4_label_free(p->f_label); | ||
955 | err_free_p: | ||
888 | kfree(p); | 956 | kfree(p); |
889 | err: | 957 | err: |
890 | dput(parent); | 958 | dput(parent); |
@@ -901,6 +969,9 @@ static void nfs4_opendata_free(struct kref *kref) | |||
901 | if (p->state != NULL) | 969 | if (p->state != NULL) |
902 | nfs4_put_open_state(p->state); | 970 | nfs4_put_open_state(p->state); |
903 | nfs4_put_state_owner(p->owner); | 971 | nfs4_put_state_owner(p->owner); |
972 | |||
973 | nfs4_label_free(p->f_label); | ||
974 | |||
904 | dput(p->dir); | 975 | dput(p->dir); |
905 | dput(p->dentry); | 976 | dput(p->dentry); |
906 | nfs_sb_deactive(sb); | 977 | nfs_sb_deactive(sb); |
@@ -1179,6 +1250,8 @@ _nfs4_opendata_reclaim_to_nfs4_state(struct nfs4_opendata *data) | |||
1179 | if (ret) | 1250 | if (ret) |
1180 | goto err; | 1251 | goto err; |
1181 | 1252 | ||
1253 | nfs_setsecurity(inode, &data->f_attr, data->f_label); | ||
1254 | |||
1182 | if (data->o_res.delegation_type != 0) | 1255 | if (data->o_res.delegation_type != 0) |
1183 | nfs4_opendata_check_deleg(data, state); | 1256 | nfs4_opendata_check_deleg(data, state); |
1184 | update_open_stateid(state, &data->o_res.stateid, NULL, | 1257 | update_open_stateid(state, &data->o_res.stateid, NULL, |
@@ -1205,7 +1278,7 @@ _nfs4_opendata_to_nfs4_state(struct nfs4_opendata *data) | |||
1205 | ret = -EAGAIN; | 1278 | ret = -EAGAIN; |
1206 | if (!(data->f_attr.valid & NFS_ATTR_FATTR)) | 1279 | if (!(data->f_attr.valid & NFS_ATTR_FATTR)) |
1207 | goto err; | 1280 | goto err; |
1208 | inode = nfs_fhget(data->dir->d_sb, &data->o_res.fh, &data->f_attr); | 1281 | inode = nfs_fhget(data->dir->d_sb, &data->o_res.fh, &data->f_attr, data->f_label); |
1209 | ret = PTR_ERR(inode); | 1282 | ret = PTR_ERR(inode); |
1210 | if (IS_ERR(inode)) | 1283 | if (IS_ERR(inode)) |
1211 | goto err; | 1284 | goto err; |
@@ -1258,7 +1331,7 @@ static struct nfs4_opendata *nfs4_open_recoverdata_alloc(struct nfs_open_context | |||
1258 | struct nfs4_opendata *opendata; | 1331 | struct nfs4_opendata *opendata; |
1259 | 1332 | ||
1260 | opendata = nfs4_opendata_alloc(ctx->dentry, state->owner, 0, 0, | 1333 | opendata = nfs4_opendata_alloc(ctx->dentry, state->owner, 0, 0, |
1261 | NULL, claim, GFP_NOFS); | 1334 | NULL, NULL, claim, GFP_NOFS); |
1262 | if (opendata == NULL) | 1335 | if (opendata == NULL) |
1263 | return ERR_PTR(-ENOMEM); | 1336 | return ERR_PTR(-ENOMEM); |
1264 | opendata->state = state; | 1337 | opendata->state = state; |
@@ -1784,7 +1857,7 @@ static int _nfs4_proc_open(struct nfs4_opendata *data) | |||
1784 | return status; | 1857 | return status; |
1785 | } | 1858 | } |
1786 | if (!(o_res->f_attr->valid & NFS_ATTR_FATTR)) | 1859 | if (!(o_res->f_attr->valid & NFS_ATTR_FATTR)) |
1787 | _nfs4_proc_getattr(server, &o_res->fh, o_res->f_attr); | 1860 | _nfs4_proc_getattr(server, &o_res->fh, o_res->f_attr, o_res->f_label); |
1788 | return 0; | 1861 | return 0; |
1789 | } | 1862 | } |
1790 | 1863 | ||
@@ -1855,18 +1928,30 @@ static void nfs41_clear_delegation_stateid(struct nfs4_state *state) | |||
1855 | { | 1928 | { |
1856 | struct nfs_server *server = NFS_SERVER(state->inode); | 1929 | struct nfs_server *server = NFS_SERVER(state->inode); |
1857 | nfs4_stateid *stateid = &state->stateid; | 1930 | nfs4_stateid *stateid = &state->stateid; |
1858 | int status; | 1931 | struct nfs_delegation *delegation; |
1932 | struct rpc_cred *cred = NULL; | ||
1933 | int status = -NFS4ERR_BAD_STATEID; | ||
1859 | 1934 | ||
1860 | /* If a state reset has been done, test_stateid is unneeded */ | 1935 | /* If a state reset has been done, test_stateid is unneeded */ |
1861 | if (test_bit(NFS_DELEGATED_STATE, &state->flags) == 0) | 1936 | if (test_bit(NFS_DELEGATED_STATE, &state->flags) == 0) |
1862 | return; | 1937 | return; |
1863 | 1938 | ||
1864 | status = nfs41_test_stateid(server, stateid); | 1939 | /* Get the delegation credential for use by test/free_stateid */ |
1940 | rcu_read_lock(); | ||
1941 | delegation = rcu_dereference(NFS_I(state->inode)->delegation); | ||
1942 | if (delegation != NULL && | ||
1943 | nfs4_stateid_match(&delegation->stateid, stateid)) { | ||
1944 | cred = get_rpccred(delegation->cred); | ||
1945 | rcu_read_unlock(); | ||
1946 | status = nfs41_test_stateid(server, stateid, cred); | ||
1947 | } else | ||
1948 | rcu_read_unlock(); | ||
1949 | |||
1865 | if (status != NFS_OK) { | 1950 | if (status != NFS_OK) { |
1866 | /* Free the stateid unless the server explicitly | 1951 | /* Free the stateid unless the server explicitly |
1867 | * informs us the stateid is unrecognized. */ | 1952 | * informs us the stateid is unrecognized. */ |
1868 | if (status != -NFS4ERR_BAD_STATEID) | 1953 | if (status != -NFS4ERR_BAD_STATEID) |
1869 | nfs41_free_stateid(server, stateid); | 1954 | nfs41_free_stateid(server, stateid, cred); |
1870 | nfs_remove_bad_delegation(state->inode); | 1955 | nfs_remove_bad_delegation(state->inode); |
1871 | 1956 | ||
1872 | write_seqlock(&state->seqlock); | 1957 | write_seqlock(&state->seqlock); |
@@ -1874,6 +1959,9 @@ static void nfs41_clear_delegation_stateid(struct nfs4_state *state) | |||
1874 | write_sequnlock(&state->seqlock); | 1959 | write_sequnlock(&state->seqlock); |
1875 | clear_bit(NFS_DELEGATED_STATE, &state->flags); | 1960 | clear_bit(NFS_DELEGATED_STATE, &state->flags); |
1876 | } | 1961 | } |
1962 | |||
1963 | if (cred != NULL) | ||
1964 | put_rpccred(cred); | ||
1877 | } | 1965 | } |
1878 | 1966 | ||
1879 | /** | 1967 | /** |
@@ -1888,6 +1976,7 @@ static int nfs41_check_open_stateid(struct nfs4_state *state) | |||
1888 | { | 1976 | { |
1889 | struct nfs_server *server = NFS_SERVER(state->inode); | 1977 | struct nfs_server *server = NFS_SERVER(state->inode); |
1890 | nfs4_stateid *stateid = &state->open_stateid; | 1978 | nfs4_stateid *stateid = &state->open_stateid; |
1979 | struct rpc_cred *cred = state->owner->so_cred; | ||
1891 | int status; | 1980 | int status; |
1892 | 1981 | ||
1893 | /* If a state reset has been done, test_stateid is unneeded */ | 1982 | /* If a state reset has been done, test_stateid is unneeded */ |
@@ -1896,12 +1985,12 @@ static int nfs41_check_open_stateid(struct nfs4_state *state) | |||
1896 | (test_bit(NFS_O_RDWR_STATE, &state->flags) == 0)) | 1985 | (test_bit(NFS_O_RDWR_STATE, &state->flags) == 0)) |
1897 | return -NFS4ERR_BAD_STATEID; | 1986 | return -NFS4ERR_BAD_STATEID; |
1898 | 1987 | ||
1899 | status = nfs41_test_stateid(server, stateid); | 1988 | status = nfs41_test_stateid(server, stateid, cred); |
1900 | if (status != NFS_OK) { | 1989 | if (status != NFS_OK) { |
1901 | /* Free the stateid unless the server explicitly | 1990 | /* Free the stateid unless the server explicitly |
1902 | * informs us the stateid is unrecognized. */ | 1991 | * informs us the stateid is unrecognized. */ |
1903 | if (status != -NFS4ERR_BAD_STATEID) | 1992 | if (status != -NFS4ERR_BAD_STATEID) |
1904 | nfs41_free_stateid(server, stateid); | 1993 | nfs41_free_stateid(server, stateid, cred); |
1905 | 1994 | ||
1906 | clear_bit(NFS_O_RDONLY_STATE, &state->flags); | 1995 | clear_bit(NFS_O_RDONLY_STATE, &state->flags); |
1907 | clear_bit(NFS_O_WRONLY_STATE, &state->flags); | 1996 | clear_bit(NFS_O_WRONLY_STATE, &state->flags); |
@@ -1942,10 +2031,11 @@ static inline void nfs4_exclusive_attrset(struct nfs4_opendata *opendata, struct | |||
1942 | static int _nfs4_open_and_get_state(struct nfs4_opendata *opendata, | 2031 | static int _nfs4_open_and_get_state(struct nfs4_opendata *opendata, |
1943 | fmode_t fmode, | 2032 | fmode_t fmode, |
1944 | int flags, | 2033 | int flags, |
1945 | struct nfs4_state **res) | 2034 | struct nfs_open_context *ctx) |
1946 | { | 2035 | { |
1947 | struct nfs4_state_owner *sp = opendata->owner; | 2036 | struct nfs4_state_owner *sp = opendata->owner; |
1948 | struct nfs_server *server = sp->so_server; | 2037 | struct nfs_server *server = sp->so_server; |
2038 | struct dentry *dentry; | ||
1949 | struct nfs4_state *state; | 2039 | struct nfs4_state *state; |
1950 | unsigned int seq; | 2040 | unsigned int seq; |
1951 | int ret; | 2041 | int ret; |
@@ -1963,13 +2053,31 @@ static int _nfs4_open_and_get_state(struct nfs4_opendata *opendata, | |||
1963 | if (server->caps & NFS_CAP_POSIX_LOCK) | 2053 | if (server->caps & NFS_CAP_POSIX_LOCK) |
1964 | set_bit(NFS_STATE_POSIX_LOCKS, &state->flags); | 2054 | set_bit(NFS_STATE_POSIX_LOCKS, &state->flags); |
1965 | 2055 | ||
2056 | dentry = opendata->dentry; | ||
2057 | if (dentry->d_inode == NULL) { | ||
2058 | /* FIXME: Is this d_drop() ever needed? */ | ||
2059 | d_drop(dentry); | ||
2060 | dentry = d_add_unique(dentry, igrab(state->inode)); | ||
2061 | if (dentry == NULL) { | ||
2062 | dentry = opendata->dentry; | ||
2063 | } else if (dentry != ctx->dentry) { | ||
2064 | dput(ctx->dentry); | ||
2065 | ctx->dentry = dget(dentry); | ||
2066 | } | ||
2067 | nfs_set_verifier(dentry, | ||
2068 | nfs_save_change_attribute(opendata->dir->d_inode)); | ||
2069 | } | ||
2070 | |||
1966 | ret = nfs4_opendata_access(sp->so_cred, opendata, state, fmode, flags); | 2071 | ret = nfs4_opendata_access(sp->so_cred, opendata, state, fmode, flags); |
1967 | if (ret != 0) | 2072 | if (ret != 0) |
1968 | goto out; | 2073 | goto out; |
1969 | 2074 | ||
1970 | if (read_seqcount_retry(&sp->so_reclaim_seqcount, seq)) | 2075 | ctx->state = state; |
1971 | nfs4_schedule_stateid_recovery(server, state); | 2076 | if (dentry->d_inode == state->inode) { |
1972 | *res = state; | 2077 | nfs_inode_attach_open_context(ctx); |
2078 | if (read_seqcount_retry(&sp->so_reclaim_seqcount, seq)) | ||
2079 | nfs4_schedule_stateid_recovery(server, state); | ||
2080 | } | ||
1973 | out: | 2081 | out: |
1974 | return ret; | 2082 | return ret; |
1975 | } | 2083 | } |
@@ -1978,19 +2086,21 @@ out: | |||
1978 | * Returns a referenced nfs4_state | 2086 | * Returns a referenced nfs4_state |
1979 | */ | 2087 | */ |
1980 | static int _nfs4_do_open(struct inode *dir, | 2088 | static int _nfs4_do_open(struct inode *dir, |
1981 | struct dentry *dentry, | 2089 | struct nfs_open_context *ctx, |
1982 | fmode_t fmode, | ||
1983 | int flags, | 2090 | int flags, |
1984 | struct iattr *sattr, | 2091 | struct iattr *sattr, |
1985 | struct rpc_cred *cred, | 2092 | struct nfs4_label *label) |
1986 | struct nfs4_state **res, | ||
1987 | struct nfs4_threshold **ctx_th) | ||
1988 | { | 2093 | { |
1989 | struct nfs4_state_owner *sp; | 2094 | struct nfs4_state_owner *sp; |
1990 | struct nfs4_state *state = NULL; | 2095 | struct nfs4_state *state = NULL; |
1991 | struct nfs_server *server = NFS_SERVER(dir); | 2096 | struct nfs_server *server = NFS_SERVER(dir); |
1992 | struct nfs4_opendata *opendata; | 2097 | struct nfs4_opendata *opendata; |
2098 | struct dentry *dentry = ctx->dentry; | ||
2099 | struct rpc_cred *cred = ctx->cred; | ||
2100 | struct nfs4_threshold **ctx_th = &ctx->mdsthreshold; | ||
2101 | fmode_t fmode = ctx->mode & (FMODE_READ|FMODE_WRITE|FMODE_EXEC); | ||
1993 | enum open_claim_type4 claim = NFS4_OPEN_CLAIM_NULL; | 2102 | enum open_claim_type4 claim = NFS4_OPEN_CLAIM_NULL; |
2103 | struct nfs4_label *olabel = NULL; | ||
1994 | int status; | 2104 | int status; |
1995 | 2105 | ||
1996 | /* Protect against reboot recovery conflicts */ | 2106 | /* Protect against reboot recovery conflicts */ |
@@ -2009,22 +2119,31 @@ static int _nfs4_do_open(struct inode *dir, | |||
2009 | if (dentry->d_inode) | 2119 | if (dentry->d_inode) |
2010 | claim = NFS4_OPEN_CLAIM_FH; | 2120 | claim = NFS4_OPEN_CLAIM_FH; |
2011 | opendata = nfs4_opendata_alloc(dentry, sp, fmode, flags, sattr, | 2121 | opendata = nfs4_opendata_alloc(dentry, sp, fmode, flags, sattr, |
2012 | claim, GFP_KERNEL); | 2122 | label, claim, GFP_KERNEL); |
2013 | if (opendata == NULL) | 2123 | if (opendata == NULL) |
2014 | goto err_put_state_owner; | 2124 | goto err_put_state_owner; |
2015 | 2125 | ||
2126 | if (label) { | ||
2127 | olabel = nfs4_label_alloc(server, GFP_KERNEL); | ||
2128 | if (IS_ERR(olabel)) { | ||
2129 | status = PTR_ERR(olabel); | ||
2130 | goto err_opendata_put; | ||
2131 | } | ||
2132 | } | ||
2133 | |||
2016 | if (ctx_th && server->attr_bitmask[2] & FATTR4_WORD2_MDSTHRESHOLD) { | 2134 | if (ctx_th && server->attr_bitmask[2] & FATTR4_WORD2_MDSTHRESHOLD) { |
2017 | opendata->f_attr.mdsthreshold = pnfs_mdsthreshold_alloc(); | 2135 | opendata->f_attr.mdsthreshold = pnfs_mdsthreshold_alloc(); |
2018 | if (!opendata->f_attr.mdsthreshold) | 2136 | if (!opendata->f_attr.mdsthreshold) |
2019 | goto err_opendata_put; | 2137 | goto err_free_label; |
2020 | opendata->o_arg.open_bitmap = &nfs4_pnfs_open_bitmap[0]; | 2138 | opendata->o_arg.open_bitmap = &nfs4_pnfs_open_bitmap[0]; |
2021 | } | 2139 | } |
2022 | if (dentry->d_inode != NULL) | 2140 | if (dentry->d_inode != NULL) |
2023 | opendata->state = nfs4_get_open_state(dentry->d_inode, sp); | 2141 | opendata->state = nfs4_get_open_state(dentry->d_inode, sp); |
2024 | 2142 | ||
2025 | status = _nfs4_open_and_get_state(opendata, fmode, flags, &state); | 2143 | status = _nfs4_open_and_get_state(opendata, fmode, flags, ctx); |
2026 | if (status != 0) | 2144 | if (status != 0) |
2027 | goto err_opendata_put; | 2145 | goto err_free_label; |
2146 | state = ctx->state; | ||
2028 | 2147 | ||
2029 | if ((opendata->o_arg.open_flags & O_EXCL) && | 2148 | if ((opendata->o_arg.open_flags & O_EXCL) && |
2030 | (opendata->o_arg.createmode != NFS4_CREATE_GUARDED)) { | 2149 | (opendata->o_arg.createmode != NFS4_CREATE_GUARDED)) { |
@@ -2033,10 +2152,12 @@ static int _nfs4_do_open(struct inode *dir, | |||
2033 | nfs_fattr_init(opendata->o_res.f_attr); | 2152 | nfs_fattr_init(opendata->o_res.f_attr); |
2034 | status = nfs4_do_setattr(state->inode, cred, | 2153 | status = nfs4_do_setattr(state->inode, cred, |
2035 | opendata->o_res.f_attr, sattr, | 2154 | opendata->o_res.f_attr, sattr, |
2036 | state); | 2155 | state, label, olabel); |
2037 | if (status == 0) | 2156 | if (status == 0) { |
2038 | nfs_setattr_update_inode(state->inode, sattr); | 2157 | nfs_setattr_update_inode(state->inode, sattr); |
2039 | nfs_post_op_update_inode(state->inode, opendata->o_res.f_attr); | 2158 | nfs_post_op_update_inode(state->inode, opendata->o_res.f_attr); |
2159 | nfs_setsecurity(state->inode, opendata->o_res.f_attr, olabel); | ||
2160 | } | ||
2040 | } | 2161 | } |
2041 | 2162 | ||
2042 | if (pnfs_use_threshold(ctx_th, opendata->f_attr.mdsthreshold, server)) | 2163 | if (pnfs_use_threshold(ctx_th, opendata->f_attr.mdsthreshold, server)) |
@@ -2045,38 +2166,37 @@ static int _nfs4_do_open(struct inode *dir, | |||
2045 | kfree(opendata->f_attr.mdsthreshold); | 2166 | kfree(opendata->f_attr.mdsthreshold); |
2046 | opendata->f_attr.mdsthreshold = NULL; | 2167 | opendata->f_attr.mdsthreshold = NULL; |
2047 | 2168 | ||
2169 | nfs4_label_free(olabel); | ||
2170 | |||
2048 | nfs4_opendata_put(opendata); | 2171 | nfs4_opendata_put(opendata); |
2049 | nfs4_put_state_owner(sp); | 2172 | nfs4_put_state_owner(sp); |
2050 | *res = state; | ||
2051 | return 0; | 2173 | return 0; |
2174 | err_free_label: | ||
2175 | nfs4_label_free(olabel); | ||
2052 | err_opendata_put: | 2176 | err_opendata_put: |
2053 | kfree(opendata->f_attr.mdsthreshold); | 2177 | kfree(opendata->f_attr.mdsthreshold); |
2054 | nfs4_opendata_put(opendata); | 2178 | nfs4_opendata_put(opendata); |
2055 | err_put_state_owner: | 2179 | err_put_state_owner: |
2056 | nfs4_put_state_owner(sp); | 2180 | nfs4_put_state_owner(sp); |
2057 | out_err: | 2181 | out_err: |
2058 | *res = NULL; | ||
2059 | return status; | 2182 | return status; |
2060 | } | 2183 | } |
2061 | 2184 | ||
2062 | 2185 | ||
2063 | static struct nfs4_state *nfs4_do_open(struct inode *dir, | 2186 | static struct nfs4_state *nfs4_do_open(struct inode *dir, |
2064 | struct dentry *dentry, | 2187 | struct nfs_open_context *ctx, |
2065 | fmode_t fmode, | ||
2066 | int flags, | 2188 | int flags, |
2067 | struct iattr *sattr, | 2189 | struct iattr *sattr, |
2068 | struct rpc_cred *cred, | 2190 | struct nfs4_label *label) |
2069 | struct nfs4_threshold **ctx_th) | ||
2070 | { | 2191 | { |
2071 | struct nfs_server *server = NFS_SERVER(dir); | 2192 | struct nfs_server *server = NFS_SERVER(dir); |
2072 | struct nfs4_exception exception = { }; | 2193 | struct nfs4_exception exception = { }; |
2073 | struct nfs4_state *res; | 2194 | struct nfs4_state *res; |
2074 | int status; | 2195 | int status; |
2075 | 2196 | ||
2076 | fmode &= FMODE_READ|FMODE_WRITE|FMODE_EXEC; | ||
2077 | do { | 2197 | do { |
2078 | status = _nfs4_do_open(dir, dentry, fmode, flags, sattr, cred, | 2198 | status = _nfs4_do_open(dir, ctx, flags, sattr, label); |
2079 | &res, ctx_th); | 2199 | res = ctx->state; |
2080 | if (status == 0) | 2200 | if (status == 0) |
2081 | break; | 2201 | break; |
2082 | /* NOTE: BAD_SEQID means the server and client disagree about the | 2202 | /* NOTE: BAD_SEQID means the server and client disagree about the |
@@ -2122,7 +2242,8 @@ static struct nfs4_state *nfs4_do_open(struct inode *dir, | |||
2122 | 2242 | ||
2123 | static int _nfs4_do_setattr(struct inode *inode, struct rpc_cred *cred, | 2243 | static int _nfs4_do_setattr(struct inode *inode, struct rpc_cred *cred, |
2124 | struct nfs_fattr *fattr, struct iattr *sattr, | 2244 | struct nfs_fattr *fattr, struct iattr *sattr, |
2125 | struct nfs4_state *state) | 2245 | struct nfs4_state *state, struct nfs4_label *ilabel, |
2246 | struct nfs4_label *olabel) | ||
2126 | { | 2247 | { |
2127 | struct nfs_server *server = NFS_SERVER(inode); | 2248 | struct nfs_server *server = NFS_SERVER(inode); |
2128 | struct nfs_setattrargs arg = { | 2249 | struct nfs_setattrargs arg = { |
@@ -2130,9 +2251,11 @@ static int _nfs4_do_setattr(struct inode *inode, struct rpc_cred *cred, | |||
2130 | .iap = sattr, | 2251 | .iap = sattr, |
2131 | .server = server, | 2252 | .server = server, |
2132 | .bitmask = server->attr_bitmask, | 2253 | .bitmask = server->attr_bitmask, |
2254 | .label = ilabel, | ||
2133 | }; | 2255 | }; |
2134 | struct nfs_setattrres res = { | 2256 | struct nfs_setattrres res = { |
2135 | .fattr = fattr, | 2257 | .fattr = fattr, |
2258 | .label = olabel, | ||
2136 | .server = server, | 2259 | .server = server, |
2137 | }; | 2260 | }; |
2138 | struct rpc_message msg = { | 2261 | struct rpc_message msg = { |
@@ -2146,6 +2269,10 @@ static int _nfs4_do_setattr(struct inode *inode, struct rpc_cred *cred, | |||
2146 | bool truncate; | 2269 | bool truncate; |
2147 | int status; | 2270 | int status; |
2148 | 2271 | ||
2272 | arg.bitmask = nfs4_bitmask(server, ilabel); | ||
2273 | if (ilabel) | ||
2274 | arg.bitmask = nfs4_bitmask(server, olabel); | ||
2275 | |||
2149 | nfs_fattr_init(fattr); | 2276 | nfs_fattr_init(fattr); |
2150 | 2277 | ||
2151 | /* Servers should only apply open mode checks for file size changes */ | 2278 | /* Servers should only apply open mode checks for file size changes */ |
@@ -2172,7 +2299,8 @@ static int _nfs4_do_setattr(struct inode *inode, struct rpc_cred *cred, | |||
2172 | 2299 | ||
2173 | static int nfs4_do_setattr(struct inode *inode, struct rpc_cred *cred, | 2300 | static int nfs4_do_setattr(struct inode *inode, struct rpc_cred *cred, |
2174 | struct nfs_fattr *fattr, struct iattr *sattr, | 2301 | struct nfs_fattr *fattr, struct iattr *sattr, |
2175 | struct nfs4_state *state) | 2302 | struct nfs4_state *state, struct nfs4_label *ilabel, |
2303 | struct nfs4_label *olabel) | ||
2176 | { | 2304 | { |
2177 | struct nfs_server *server = NFS_SERVER(inode); | 2305 | struct nfs_server *server = NFS_SERVER(inode); |
2178 | struct nfs4_exception exception = { | 2306 | struct nfs4_exception exception = { |
@@ -2181,7 +2309,7 @@ static int nfs4_do_setattr(struct inode *inode, struct rpc_cred *cred, | |||
2181 | }; | 2309 | }; |
2182 | int err; | 2310 | int err; |
2183 | do { | 2311 | do { |
2184 | err = _nfs4_do_setattr(inode, cred, fattr, sattr, state); | 2312 | err = _nfs4_do_setattr(inode, cred, fattr, sattr, state, ilabel, olabel); |
2185 | switch (err) { | 2313 | switch (err) { |
2186 | case -NFS4ERR_OPENMODE: | 2314 | case -NFS4ERR_OPENMODE: |
2187 | if (!(sattr->ia_valid & ATTR_SIZE)) { | 2315 | if (!(sattr->ia_valid & ATTR_SIZE)) { |
@@ -2426,14 +2554,18 @@ static struct inode * | |||
2426 | nfs4_atomic_open(struct inode *dir, struct nfs_open_context *ctx, int open_flags, struct iattr *attr) | 2554 | nfs4_atomic_open(struct inode *dir, struct nfs_open_context *ctx, int open_flags, struct iattr *attr) |
2427 | { | 2555 | { |
2428 | struct nfs4_state *state; | 2556 | struct nfs4_state *state; |
2557 | struct nfs4_label l = {0, 0, 0, NULL}, *label = NULL; | ||
2558 | |||
2559 | label = nfs4_label_init_security(dir, ctx->dentry, attr, &l); | ||
2429 | 2560 | ||
2430 | /* Protect against concurrent sillydeletes */ | 2561 | /* Protect against concurrent sillydeletes */ |
2431 | state = nfs4_do_open(dir, ctx->dentry, ctx->mode, open_flags, attr, | 2562 | state = nfs4_do_open(dir, ctx, open_flags, attr, label); |
2432 | ctx->cred, &ctx->mdsthreshold); | 2563 | |
2564 | nfs4_label_release_security(label); | ||
2565 | |||
2433 | if (IS_ERR(state)) | 2566 | if (IS_ERR(state)) |
2434 | return ERR_CAST(state); | 2567 | return ERR_CAST(state); |
2435 | ctx->state = state; | 2568 | return state->inode; |
2436 | return igrab(state->inode); | ||
2437 | } | 2569 | } |
2438 | 2570 | ||
2439 | static void nfs4_close_context(struct nfs_open_context *ctx, int is_sync) | 2571 | static void nfs4_close_context(struct nfs_open_context *ctx, int is_sync) |
@@ -2489,7 +2621,17 @@ static int _nfs4_server_capabilities(struct nfs_server *server, struct nfs_fh *f | |||
2489 | server->caps |= NFS_CAP_CTIME; | 2621 | server->caps |= NFS_CAP_CTIME; |
2490 | if (res.attr_bitmask[1] & FATTR4_WORD1_TIME_MODIFY) | 2622 | if (res.attr_bitmask[1] & FATTR4_WORD1_TIME_MODIFY) |
2491 | server->caps |= NFS_CAP_MTIME; | 2623 | server->caps |= NFS_CAP_MTIME; |
2624 | #ifdef CONFIG_NFS_V4_SECURITY_LABEL | ||
2625 | if (res.attr_bitmask[2] & FATTR4_WORD2_SECURITY_LABEL) | ||
2626 | server->caps |= NFS_CAP_SECURITY_LABEL; | ||
2627 | #endif | ||
2628 | memcpy(server->attr_bitmask_nl, res.attr_bitmask, | ||
2629 | sizeof(server->attr_bitmask)); | ||
2492 | 2630 | ||
2631 | if (server->caps & NFS_CAP_SECURITY_LABEL) { | ||
2632 | server->attr_bitmask_nl[2] &= ~FATTR4_WORD2_SECURITY_LABEL; | ||
2633 | res.attr_bitmask[2] &= ~FATTR4_WORD2_SECURITY_LABEL; | ||
2634 | } | ||
2493 | memcpy(server->cache_consistency_bitmask, res.attr_bitmask, sizeof(server->cache_consistency_bitmask)); | 2635 | memcpy(server->cache_consistency_bitmask, res.attr_bitmask, sizeof(server->cache_consistency_bitmask)); |
2494 | server->cache_consistency_bitmask[0] &= FATTR4_WORD0_CHANGE|FATTR4_WORD0_SIZE; | 2636 | server->cache_consistency_bitmask[0] &= FATTR4_WORD0_CHANGE|FATTR4_WORD0_SIZE; |
2495 | server->cache_consistency_bitmask[1] &= FATTR4_WORD1_TIME_METADATA|FATTR4_WORD1_TIME_MODIFY; | 2637 | server->cache_consistency_bitmask[1] &= FATTR4_WORD1_TIME_METADATA|FATTR4_WORD1_TIME_MODIFY; |
@@ -2515,8 +2657,9 @@ int nfs4_server_capabilities(struct nfs_server *server, struct nfs_fh *fhandle) | |||
2515 | static int _nfs4_lookup_root(struct nfs_server *server, struct nfs_fh *fhandle, | 2657 | static int _nfs4_lookup_root(struct nfs_server *server, struct nfs_fh *fhandle, |
2516 | struct nfs_fsinfo *info) | 2658 | struct nfs_fsinfo *info) |
2517 | { | 2659 | { |
2660 | u32 bitmask[3]; | ||
2518 | struct nfs4_lookup_root_arg args = { | 2661 | struct nfs4_lookup_root_arg args = { |
2519 | .bitmask = nfs4_fattr_bitmap, | 2662 | .bitmask = bitmask, |
2520 | }; | 2663 | }; |
2521 | struct nfs4_lookup_res res = { | 2664 | struct nfs4_lookup_res res = { |
2522 | .server = server, | 2665 | .server = server, |
@@ -2529,6 +2672,13 @@ static int _nfs4_lookup_root(struct nfs_server *server, struct nfs_fh *fhandle, | |||
2529 | .rpc_resp = &res, | 2672 | .rpc_resp = &res, |
2530 | }; | 2673 | }; |
2531 | 2674 | ||
2675 | bitmask[0] = nfs4_fattr_bitmap[0]; | ||
2676 | bitmask[1] = nfs4_fattr_bitmap[1]; | ||
2677 | /* | ||
2678 | * Process the label in the upcoming getfattr | ||
2679 | */ | ||
2680 | bitmask[2] = nfs4_fattr_bitmap[2] & ~FATTR4_WORD2_SECURITY_LABEL; | ||
2681 | |||
2532 | nfs_fattr_init(info->fattr); | 2682 | nfs_fattr_init(info->fattr); |
2533 | return nfs4_call_sync(server->client, server, &msg, &args.seq_args, &res.seq_res, 0); | 2683 | return nfs4_call_sync(server->client, server, &msg, &args.seq_args, &res.seq_res, 0); |
2534 | } | 2684 | } |
@@ -2648,6 +2798,7 @@ static int nfs4_proc_get_root(struct nfs_server *server, struct nfs_fh *mntfh, | |||
2648 | { | 2798 | { |
2649 | int error; | 2799 | int error; |
2650 | struct nfs_fattr *fattr = info->fattr; | 2800 | struct nfs_fattr *fattr = info->fattr; |
2801 | struct nfs4_label *label = NULL; | ||
2651 | 2802 | ||
2652 | error = nfs4_server_capabilities(server, mntfh); | 2803 | error = nfs4_server_capabilities(server, mntfh); |
2653 | if (error < 0) { | 2804 | if (error < 0) { |
@@ -2655,16 +2806,23 @@ static int nfs4_proc_get_root(struct nfs_server *server, struct nfs_fh *mntfh, | |||
2655 | return error; | 2806 | return error; |
2656 | } | 2807 | } |
2657 | 2808 | ||
2658 | error = nfs4_proc_getattr(server, mntfh, fattr); | 2809 | label = nfs4_label_alloc(server, GFP_KERNEL); |
2810 | if (IS_ERR(label)) | ||
2811 | return PTR_ERR(label); | ||
2812 | |||
2813 | error = nfs4_proc_getattr(server, mntfh, fattr, label); | ||
2659 | if (error < 0) { | 2814 | if (error < 0) { |
2660 | dprintk("nfs4_get_root: getattr error = %d\n", -error); | 2815 | dprintk("nfs4_get_root: getattr error = %d\n", -error); |
2661 | return error; | 2816 | goto err_free_label; |
2662 | } | 2817 | } |
2663 | 2818 | ||
2664 | if (fattr->valid & NFS_ATTR_FATTR_FSID && | 2819 | if (fattr->valid & NFS_ATTR_FATTR_FSID && |
2665 | !nfs_fsid_equal(&server->fsid, &fattr->fsid)) | 2820 | !nfs_fsid_equal(&server->fsid, &fattr->fsid)) |
2666 | memcpy(&server->fsid, &fattr->fsid, sizeof(server->fsid)); | 2821 | memcpy(&server->fsid, &fattr->fsid, sizeof(server->fsid)); |
2667 | 2822 | ||
2823 | err_free_label: | ||
2824 | nfs4_label_free(label); | ||
2825 | |||
2668 | return error; | 2826 | return error; |
2669 | } | 2827 | } |
2670 | 2828 | ||
@@ -2711,7 +2869,8 @@ out: | |||
2711 | return status; | 2869 | return status; |
2712 | } | 2870 | } |
2713 | 2871 | ||
2714 | static int _nfs4_proc_getattr(struct nfs_server *server, struct nfs_fh *fhandle, struct nfs_fattr *fattr) | 2872 | static int _nfs4_proc_getattr(struct nfs_server *server, struct nfs_fh *fhandle, |
2873 | struct nfs_fattr *fattr, struct nfs4_label *label) | ||
2715 | { | 2874 | { |
2716 | struct nfs4_getattr_arg args = { | 2875 | struct nfs4_getattr_arg args = { |
2717 | .fh = fhandle, | 2876 | .fh = fhandle, |
@@ -2719,6 +2878,7 @@ static int _nfs4_proc_getattr(struct nfs_server *server, struct nfs_fh *fhandle, | |||
2719 | }; | 2878 | }; |
2720 | struct nfs4_getattr_res res = { | 2879 | struct nfs4_getattr_res res = { |
2721 | .fattr = fattr, | 2880 | .fattr = fattr, |
2881 | .label = label, | ||
2722 | .server = server, | 2882 | .server = server, |
2723 | }; | 2883 | }; |
2724 | struct rpc_message msg = { | 2884 | struct rpc_message msg = { |
@@ -2726,18 +2886,21 @@ static int _nfs4_proc_getattr(struct nfs_server *server, struct nfs_fh *fhandle, | |||
2726 | .rpc_argp = &args, | 2886 | .rpc_argp = &args, |
2727 | .rpc_resp = &res, | 2887 | .rpc_resp = &res, |
2728 | }; | 2888 | }; |
2729 | 2889 | ||
2890 | args.bitmask = nfs4_bitmask(server, label); | ||
2891 | |||
2730 | nfs_fattr_init(fattr); | 2892 | nfs_fattr_init(fattr); |
2731 | return nfs4_call_sync(server->client, server, &msg, &args.seq_args, &res.seq_res, 0); | 2893 | return nfs4_call_sync(server->client, server, &msg, &args.seq_args, &res.seq_res, 0); |
2732 | } | 2894 | } |
2733 | 2895 | ||
2734 | static int nfs4_proc_getattr(struct nfs_server *server, struct nfs_fh *fhandle, struct nfs_fattr *fattr) | 2896 | static int nfs4_proc_getattr(struct nfs_server *server, struct nfs_fh *fhandle, |
2897 | struct nfs_fattr *fattr, struct nfs4_label *label) | ||
2735 | { | 2898 | { |
2736 | struct nfs4_exception exception = { }; | 2899 | struct nfs4_exception exception = { }; |
2737 | int err; | 2900 | int err; |
2738 | do { | 2901 | do { |
2739 | err = nfs4_handle_exception(server, | 2902 | err = nfs4_handle_exception(server, |
2740 | _nfs4_proc_getattr(server, fhandle, fattr), | 2903 | _nfs4_proc_getattr(server, fhandle, fattr, label), |
2741 | &exception); | 2904 | &exception); |
2742 | } while (exception.retry); | 2905 | } while (exception.retry); |
2743 | return err; | 2906 | return err; |
@@ -2767,6 +2930,7 @@ nfs4_proc_setattr(struct dentry *dentry, struct nfs_fattr *fattr, | |||
2767 | struct inode *inode = dentry->d_inode; | 2930 | struct inode *inode = dentry->d_inode; |
2768 | struct rpc_cred *cred = NULL; | 2931 | struct rpc_cred *cred = NULL; |
2769 | struct nfs4_state *state = NULL; | 2932 | struct nfs4_state *state = NULL; |
2933 | struct nfs4_label *label = NULL; | ||
2770 | int status; | 2934 | int status; |
2771 | 2935 | ||
2772 | if (pnfs_ld_layoutret_on_setattr(inode)) | 2936 | if (pnfs_ld_layoutret_on_setattr(inode)) |
@@ -2793,15 +2957,22 @@ nfs4_proc_setattr(struct dentry *dentry, struct nfs_fattr *fattr, | |||
2793 | } | 2957 | } |
2794 | } | 2958 | } |
2795 | 2959 | ||
2796 | status = nfs4_do_setattr(inode, cred, fattr, sattr, state); | 2960 | label = nfs4_label_alloc(NFS_SERVER(inode), GFP_KERNEL); |
2797 | if (status == 0) | 2961 | if (IS_ERR(label)) |
2962 | return PTR_ERR(label); | ||
2963 | |||
2964 | status = nfs4_do_setattr(inode, cred, fattr, sattr, state, NULL, label); | ||
2965 | if (status == 0) { | ||
2798 | nfs_setattr_update_inode(inode, sattr); | 2966 | nfs_setattr_update_inode(inode, sattr); |
2967 | nfs_setsecurity(inode, fattr, label); | ||
2968 | } | ||
2969 | nfs4_label_free(label); | ||
2799 | return status; | 2970 | return status; |
2800 | } | 2971 | } |
2801 | 2972 | ||
2802 | static int _nfs4_proc_lookup(struct rpc_clnt *clnt, struct inode *dir, | 2973 | static int _nfs4_proc_lookup(struct rpc_clnt *clnt, struct inode *dir, |
2803 | const struct qstr *name, struct nfs_fh *fhandle, | 2974 | const struct qstr *name, struct nfs_fh *fhandle, |
2804 | struct nfs_fattr *fattr) | 2975 | struct nfs_fattr *fattr, struct nfs4_label *label) |
2805 | { | 2976 | { |
2806 | struct nfs_server *server = NFS_SERVER(dir); | 2977 | struct nfs_server *server = NFS_SERVER(dir); |
2807 | int status; | 2978 | int status; |
@@ -2813,6 +2984,7 @@ static int _nfs4_proc_lookup(struct rpc_clnt *clnt, struct inode *dir, | |||
2813 | struct nfs4_lookup_res res = { | 2984 | struct nfs4_lookup_res res = { |
2814 | .server = server, | 2985 | .server = server, |
2815 | .fattr = fattr, | 2986 | .fattr = fattr, |
2987 | .label = label, | ||
2816 | .fh = fhandle, | 2988 | .fh = fhandle, |
2817 | }; | 2989 | }; |
2818 | struct rpc_message msg = { | 2990 | struct rpc_message msg = { |
@@ -2821,6 +2993,8 @@ static int _nfs4_proc_lookup(struct rpc_clnt *clnt, struct inode *dir, | |||
2821 | .rpc_resp = &res, | 2993 | .rpc_resp = &res, |
2822 | }; | 2994 | }; |
2823 | 2995 | ||
2996 | args.bitmask = nfs4_bitmask(server, label); | ||
2997 | |||
2824 | nfs_fattr_init(fattr); | 2998 | nfs_fattr_init(fattr); |
2825 | 2999 | ||
2826 | dprintk("NFS call lookup %s\n", name->name); | 3000 | dprintk("NFS call lookup %s\n", name->name); |
@@ -2839,13 +3013,13 @@ static void nfs_fixup_secinfo_attributes(struct nfs_fattr *fattr) | |||
2839 | 3013 | ||
2840 | static int nfs4_proc_lookup_common(struct rpc_clnt **clnt, struct inode *dir, | 3014 | static int nfs4_proc_lookup_common(struct rpc_clnt **clnt, struct inode *dir, |
2841 | struct qstr *name, struct nfs_fh *fhandle, | 3015 | struct qstr *name, struct nfs_fh *fhandle, |
2842 | struct nfs_fattr *fattr) | 3016 | struct nfs_fattr *fattr, struct nfs4_label *label) |
2843 | { | 3017 | { |
2844 | struct nfs4_exception exception = { }; | 3018 | struct nfs4_exception exception = { }; |
2845 | struct rpc_clnt *client = *clnt; | 3019 | struct rpc_clnt *client = *clnt; |
2846 | int err; | 3020 | int err; |
2847 | do { | 3021 | do { |
2848 | err = _nfs4_proc_lookup(client, dir, name, fhandle, fattr); | 3022 | err = _nfs4_proc_lookup(client, dir, name, fhandle, fattr, label); |
2849 | switch (err) { | 3023 | switch (err) { |
2850 | case -NFS4ERR_BADNAME: | 3024 | case -NFS4ERR_BADNAME: |
2851 | err = -ENOENT; | 3025 | err = -ENOENT; |
@@ -2879,12 +3053,13 @@ out: | |||
2879 | } | 3053 | } |
2880 | 3054 | ||
2881 | static int nfs4_proc_lookup(struct inode *dir, struct qstr *name, | 3055 | static int nfs4_proc_lookup(struct inode *dir, struct qstr *name, |
2882 | struct nfs_fh *fhandle, struct nfs_fattr *fattr) | 3056 | struct nfs_fh *fhandle, struct nfs_fattr *fattr, |
3057 | struct nfs4_label *label) | ||
2883 | { | 3058 | { |
2884 | int status; | 3059 | int status; |
2885 | struct rpc_clnt *client = NFS_CLIENT(dir); | 3060 | struct rpc_clnt *client = NFS_CLIENT(dir); |
2886 | 3061 | ||
2887 | status = nfs4_proc_lookup_common(&client, dir, name, fhandle, fattr); | 3062 | status = nfs4_proc_lookup_common(&client, dir, name, fhandle, fattr, label); |
2888 | if (client != NFS_CLIENT(dir)) { | 3063 | if (client != NFS_CLIENT(dir)) { |
2889 | rpc_shutdown_client(client); | 3064 | rpc_shutdown_client(client); |
2890 | nfs_fixup_secinfo_attributes(fattr); | 3065 | nfs_fixup_secinfo_attributes(fattr); |
@@ -2896,15 +3071,13 @@ struct rpc_clnt * | |||
2896 | nfs4_proc_lookup_mountpoint(struct inode *dir, struct qstr *name, | 3071 | nfs4_proc_lookup_mountpoint(struct inode *dir, struct qstr *name, |
2897 | struct nfs_fh *fhandle, struct nfs_fattr *fattr) | 3072 | struct nfs_fh *fhandle, struct nfs_fattr *fattr) |
2898 | { | 3073 | { |
3074 | struct rpc_clnt *client = NFS_CLIENT(dir); | ||
2899 | int status; | 3075 | int status; |
2900 | struct rpc_clnt *client = rpc_clone_client(NFS_CLIENT(dir)); | ||
2901 | 3076 | ||
2902 | status = nfs4_proc_lookup_common(&client, dir, name, fhandle, fattr); | 3077 | status = nfs4_proc_lookup_common(&client, dir, name, fhandle, fattr, NULL); |
2903 | if (status < 0) { | 3078 | if (status < 0) |
2904 | rpc_shutdown_client(client); | ||
2905 | return ERR_PTR(status); | 3079 | return ERR_PTR(status); |
2906 | } | 3080 | return (client == NFS_CLIENT(dir)) ? rpc_clone_client(client) : client; |
2907 | return client; | ||
2908 | } | 3081 | } |
2909 | 3082 | ||
2910 | static int _nfs4_proc_access(struct inode *inode, struct nfs_access_entry *entry) | 3083 | static int _nfs4_proc_access(struct inode *inode, struct nfs_access_entry *entry) |
@@ -2924,7 +3097,7 @@ static int _nfs4_proc_access(struct inode *inode, struct nfs_access_entry *entry | |||
2924 | .rpc_cred = entry->cred, | 3097 | .rpc_cred = entry->cred, |
2925 | }; | 3098 | }; |
2926 | int mode = entry->mask; | 3099 | int mode = entry->mask; |
2927 | int status; | 3100 | int status = 0; |
2928 | 3101 | ||
2929 | /* | 3102 | /* |
2930 | * Determine which access bits we want to ask for... | 3103 | * Determine which access bits we want to ask for... |
@@ -3029,6 +3202,7 @@ static int | |||
3029 | nfs4_proc_create(struct inode *dir, struct dentry *dentry, struct iattr *sattr, | 3202 | nfs4_proc_create(struct inode *dir, struct dentry *dentry, struct iattr *sattr, |
3030 | int flags) | 3203 | int flags) |
3031 | { | 3204 | { |
3205 | struct nfs4_label l, *ilabel = NULL; | ||
3032 | struct nfs_open_context *ctx; | 3206 | struct nfs_open_context *ctx; |
3033 | struct nfs4_state *state; | 3207 | struct nfs4_state *state; |
3034 | int status = 0; | 3208 | int status = 0; |
@@ -3037,19 +3211,16 @@ nfs4_proc_create(struct inode *dir, struct dentry *dentry, struct iattr *sattr, | |||
3037 | if (IS_ERR(ctx)) | 3211 | if (IS_ERR(ctx)) |
3038 | return PTR_ERR(ctx); | 3212 | return PTR_ERR(ctx); |
3039 | 3213 | ||
3214 | ilabel = nfs4_label_init_security(dir, dentry, sattr, &l); | ||
3215 | |||
3040 | sattr->ia_mode &= ~current_umask(); | 3216 | sattr->ia_mode &= ~current_umask(); |
3041 | state = nfs4_do_open(dir, dentry, ctx->mode, | 3217 | state = nfs4_do_open(dir, ctx, flags, sattr, ilabel); |
3042 | flags, sattr, ctx->cred, | ||
3043 | &ctx->mdsthreshold); | ||
3044 | d_drop(dentry); | ||
3045 | if (IS_ERR(state)) { | 3218 | if (IS_ERR(state)) { |
3046 | status = PTR_ERR(state); | 3219 | status = PTR_ERR(state); |
3047 | goto out; | 3220 | goto out; |
3048 | } | 3221 | } |
3049 | d_add(dentry, igrab(state->inode)); | ||
3050 | nfs_set_verifier(dentry, nfs_save_change_attribute(dir)); | ||
3051 | ctx->state = state; | ||
3052 | out: | 3222 | out: |
3223 | nfs4_label_release_security(ilabel); | ||
3053 | put_nfs_open_context(ctx); | 3224 | put_nfs_open_context(ctx); |
3054 | return status; | 3225 | return status; |
3055 | } | 3226 | } |
@@ -3098,6 +3269,8 @@ static void nfs4_proc_unlink_setup(struct rpc_message *msg, struct inode *dir) | |||
3098 | res->server = server; | 3269 | res->server = server; |
3099 | msg->rpc_proc = &nfs4_procedures[NFSPROC4_CLNT_REMOVE]; | 3270 | msg->rpc_proc = &nfs4_procedures[NFSPROC4_CLNT_REMOVE]; |
3100 | nfs41_init_sequence(&args->seq_args, &res->seq_res, 1); | 3271 | nfs41_init_sequence(&args->seq_args, &res->seq_res, 1); |
3272 | |||
3273 | nfs_fattr_init(res->dir_attr); | ||
3101 | } | 3274 | } |
3102 | 3275 | ||
3103 | static void nfs4_proc_unlink_rpc_prepare(struct rpc_task *task, struct nfs_unlinkdata *data) | 3276 | static void nfs4_proc_unlink_rpc_prepare(struct rpc_task *task, struct nfs_unlinkdata *data) |
@@ -3173,7 +3346,7 @@ static int _nfs4_proc_rename(struct inode *old_dir, struct qstr *old_name, | |||
3173 | .rpc_resp = &res, | 3346 | .rpc_resp = &res, |
3174 | }; | 3347 | }; |
3175 | int status = -ENOMEM; | 3348 | int status = -ENOMEM; |
3176 | 3349 | ||
3177 | status = nfs4_call_sync(server->client, server, &msg, &arg.seq_args, &res.seq_res, 1); | 3350 | status = nfs4_call_sync(server->client, server, &msg, &arg.seq_args, &res.seq_res, 1); |
3178 | if (!status) { | 3351 | if (!status) { |
3179 | update_changeattr(old_dir, &res.old_cinfo); | 3352 | update_changeattr(old_dir, &res.old_cinfo); |
@@ -3207,6 +3380,7 @@ static int _nfs4_proc_link(struct inode *inode, struct inode *dir, struct qstr * | |||
3207 | }; | 3380 | }; |
3208 | struct nfs4_link_res res = { | 3381 | struct nfs4_link_res res = { |
3209 | .server = server, | 3382 | .server = server, |
3383 | .label = NULL, | ||
3210 | }; | 3384 | }; |
3211 | struct rpc_message msg = { | 3385 | struct rpc_message msg = { |
3212 | .rpc_proc = &nfs4_procedures[NFSPROC4_CLNT_LINK], | 3386 | .rpc_proc = &nfs4_procedures[NFSPROC4_CLNT_LINK], |
@@ -3219,11 +3393,24 @@ static int _nfs4_proc_link(struct inode *inode, struct inode *dir, struct qstr * | |||
3219 | if (res.fattr == NULL) | 3393 | if (res.fattr == NULL) |
3220 | goto out; | 3394 | goto out; |
3221 | 3395 | ||
3396 | res.label = nfs4_label_alloc(server, GFP_KERNEL); | ||
3397 | if (IS_ERR(res.label)) { | ||
3398 | status = PTR_ERR(res.label); | ||
3399 | goto out; | ||
3400 | } | ||
3401 | arg.bitmask = nfs4_bitmask(server, res.label); | ||
3402 | |||
3222 | status = nfs4_call_sync(server->client, server, &msg, &arg.seq_args, &res.seq_res, 1); | 3403 | status = nfs4_call_sync(server->client, server, &msg, &arg.seq_args, &res.seq_res, 1); |
3223 | if (!status) { | 3404 | if (!status) { |
3224 | update_changeattr(dir, &res.cinfo); | 3405 | update_changeattr(dir, &res.cinfo); |
3225 | nfs_post_op_update_inode(inode, res.fattr); | 3406 | status = nfs_post_op_update_inode(inode, res.fattr); |
3407 | if (!status) | ||
3408 | nfs_setsecurity(inode, res.fattr, res.label); | ||
3226 | } | 3409 | } |
3410 | |||
3411 | |||
3412 | nfs4_label_free(res.label); | ||
3413 | |||
3227 | out: | 3414 | out: |
3228 | nfs_free_fattr(res.fattr); | 3415 | nfs_free_fattr(res.fattr); |
3229 | return status; | 3416 | return status; |
@@ -3247,6 +3434,7 @@ struct nfs4_createdata { | |||
3247 | struct nfs4_create_res res; | 3434 | struct nfs4_create_res res; |
3248 | struct nfs_fh fh; | 3435 | struct nfs_fh fh; |
3249 | struct nfs_fattr fattr; | 3436 | struct nfs_fattr fattr; |
3437 | struct nfs4_label *label; | ||
3250 | }; | 3438 | }; |
3251 | 3439 | ||
3252 | static struct nfs4_createdata *nfs4_alloc_createdata(struct inode *dir, | 3440 | static struct nfs4_createdata *nfs4_alloc_createdata(struct inode *dir, |
@@ -3258,6 +3446,10 @@ static struct nfs4_createdata *nfs4_alloc_createdata(struct inode *dir, | |||
3258 | if (data != NULL) { | 3446 | if (data != NULL) { |
3259 | struct nfs_server *server = NFS_SERVER(dir); | 3447 | struct nfs_server *server = NFS_SERVER(dir); |
3260 | 3448 | ||
3449 | data->label = nfs4_label_alloc(server, GFP_KERNEL); | ||
3450 | if (IS_ERR(data->label)) | ||
3451 | goto out_free; | ||
3452 | |||
3261 | data->msg.rpc_proc = &nfs4_procedures[NFSPROC4_CLNT_CREATE]; | 3453 | data->msg.rpc_proc = &nfs4_procedures[NFSPROC4_CLNT_CREATE]; |
3262 | data->msg.rpc_argp = &data->arg; | 3454 | data->msg.rpc_argp = &data->arg; |
3263 | data->msg.rpc_resp = &data->res; | 3455 | data->msg.rpc_resp = &data->res; |
@@ -3266,13 +3458,17 @@ static struct nfs4_createdata *nfs4_alloc_createdata(struct inode *dir, | |||
3266 | data->arg.name = name; | 3458 | data->arg.name = name; |
3267 | data->arg.attrs = sattr; | 3459 | data->arg.attrs = sattr; |
3268 | data->arg.ftype = ftype; | 3460 | data->arg.ftype = ftype; |
3269 | data->arg.bitmask = server->attr_bitmask; | 3461 | data->arg.bitmask = nfs4_bitmask(server, data->label); |
3270 | data->res.server = server; | 3462 | data->res.server = server; |
3271 | data->res.fh = &data->fh; | 3463 | data->res.fh = &data->fh; |
3272 | data->res.fattr = &data->fattr; | 3464 | data->res.fattr = &data->fattr; |
3465 | data->res.label = data->label; | ||
3273 | nfs_fattr_init(data->res.fattr); | 3466 | nfs_fattr_init(data->res.fattr); |
3274 | } | 3467 | } |
3275 | return data; | 3468 | return data; |
3469 | out_free: | ||
3470 | kfree(data); | ||
3471 | return NULL; | ||
3276 | } | 3472 | } |
3277 | 3473 | ||
3278 | static int nfs4_do_create(struct inode *dir, struct dentry *dentry, struct nfs4_createdata *data) | 3474 | static int nfs4_do_create(struct inode *dir, struct dentry *dentry, struct nfs4_createdata *data) |
@@ -3281,18 +3477,20 @@ static int nfs4_do_create(struct inode *dir, struct dentry *dentry, struct nfs4_ | |||
3281 | &data->arg.seq_args, &data->res.seq_res, 1); | 3477 | &data->arg.seq_args, &data->res.seq_res, 1); |
3282 | if (status == 0) { | 3478 | if (status == 0) { |
3283 | update_changeattr(dir, &data->res.dir_cinfo); | 3479 | update_changeattr(dir, &data->res.dir_cinfo); |
3284 | status = nfs_instantiate(dentry, data->res.fh, data->res.fattr); | 3480 | status = nfs_instantiate(dentry, data->res.fh, data->res.fattr, data->res.label); |
3285 | } | 3481 | } |
3286 | return status; | 3482 | return status; |
3287 | } | 3483 | } |
3288 | 3484 | ||
3289 | static void nfs4_free_createdata(struct nfs4_createdata *data) | 3485 | static void nfs4_free_createdata(struct nfs4_createdata *data) |
3290 | { | 3486 | { |
3487 | nfs4_label_free(data->label); | ||
3291 | kfree(data); | 3488 | kfree(data); |
3292 | } | 3489 | } |
3293 | 3490 | ||
3294 | static int _nfs4_proc_symlink(struct inode *dir, struct dentry *dentry, | 3491 | static int _nfs4_proc_symlink(struct inode *dir, struct dentry *dentry, |
3295 | struct page *page, unsigned int len, struct iattr *sattr) | 3492 | struct page *page, unsigned int len, struct iattr *sattr, |
3493 | struct nfs4_label *label) | ||
3296 | { | 3494 | { |
3297 | struct nfs4_createdata *data; | 3495 | struct nfs4_createdata *data; |
3298 | int status = -ENAMETOOLONG; | 3496 | int status = -ENAMETOOLONG; |
@@ -3308,6 +3506,7 @@ static int _nfs4_proc_symlink(struct inode *dir, struct dentry *dentry, | |||
3308 | data->msg.rpc_proc = &nfs4_procedures[NFSPROC4_CLNT_SYMLINK]; | 3506 | data->msg.rpc_proc = &nfs4_procedures[NFSPROC4_CLNT_SYMLINK]; |
3309 | data->arg.u.symlink.pages = &page; | 3507 | data->arg.u.symlink.pages = &page; |
3310 | data->arg.u.symlink.len = len; | 3508 | data->arg.u.symlink.len = len; |
3509 | data->arg.label = label; | ||
3311 | 3510 | ||
3312 | status = nfs4_do_create(dir, dentry, data); | 3511 | status = nfs4_do_create(dir, dentry, data); |
3313 | 3512 | ||
@@ -3320,18 +3519,24 @@ static int nfs4_proc_symlink(struct inode *dir, struct dentry *dentry, | |||
3320 | struct page *page, unsigned int len, struct iattr *sattr) | 3519 | struct page *page, unsigned int len, struct iattr *sattr) |
3321 | { | 3520 | { |
3322 | struct nfs4_exception exception = { }; | 3521 | struct nfs4_exception exception = { }; |
3522 | struct nfs4_label l, *label = NULL; | ||
3323 | int err; | 3523 | int err; |
3524 | |||
3525 | label = nfs4_label_init_security(dir, dentry, sattr, &l); | ||
3526 | |||
3324 | do { | 3527 | do { |
3325 | err = nfs4_handle_exception(NFS_SERVER(dir), | 3528 | err = nfs4_handle_exception(NFS_SERVER(dir), |
3326 | _nfs4_proc_symlink(dir, dentry, page, | 3529 | _nfs4_proc_symlink(dir, dentry, page, |
3327 | len, sattr), | 3530 | len, sattr, label), |
3328 | &exception); | 3531 | &exception); |
3329 | } while (exception.retry); | 3532 | } while (exception.retry); |
3533 | |||
3534 | nfs4_label_release_security(label); | ||
3330 | return err; | 3535 | return err; |
3331 | } | 3536 | } |
3332 | 3537 | ||
3333 | static int _nfs4_proc_mkdir(struct inode *dir, struct dentry *dentry, | 3538 | static int _nfs4_proc_mkdir(struct inode *dir, struct dentry *dentry, |
3334 | struct iattr *sattr) | 3539 | struct iattr *sattr, struct nfs4_label *label) |
3335 | { | 3540 | { |
3336 | struct nfs4_createdata *data; | 3541 | struct nfs4_createdata *data; |
3337 | int status = -ENOMEM; | 3542 | int status = -ENOMEM; |
@@ -3340,6 +3545,7 @@ static int _nfs4_proc_mkdir(struct inode *dir, struct dentry *dentry, | |||
3340 | if (data == NULL) | 3545 | if (data == NULL) |
3341 | goto out; | 3546 | goto out; |
3342 | 3547 | ||
3548 | data->arg.label = label; | ||
3343 | status = nfs4_do_create(dir, dentry, data); | 3549 | status = nfs4_do_create(dir, dentry, data); |
3344 | 3550 | ||
3345 | nfs4_free_createdata(data); | 3551 | nfs4_free_createdata(data); |
@@ -3351,14 +3557,19 @@ static int nfs4_proc_mkdir(struct inode *dir, struct dentry *dentry, | |||
3351 | struct iattr *sattr) | 3557 | struct iattr *sattr) |
3352 | { | 3558 | { |
3353 | struct nfs4_exception exception = { }; | 3559 | struct nfs4_exception exception = { }; |
3560 | struct nfs4_label l, *label = NULL; | ||
3354 | int err; | 3561 | int err; |
3355 | 3562 | ||
3563 | label = nfs4_label_init_security(dir, dentry, sattr, &l); | ||
3564 | |||
3356 | sattr->ia_mode &= ~current_umask(); | 3565 | sattr->ia_mode &= ~current_umask(); |
3357 | do { | 3566 | do { |
3358 | err = nfs4_handle_exception(NFS_SERVER(dir), | 3567 | err = nfs4_handle_exception(NFS_SERVER(dir), |
3359 | _nfs4_proc_mkdir(dir, dentry, sattr), | 3568 | _nfs4_proc_mkdir(dir, dentry, sattr, label), |
3360 | &exception); | 3569 | &exception); |
3361 | } while (exception.retry); | 3570 | } while (exception.retry); |
3571 | nfs4_label_release_security(label); | ||
3572 | |||
3362 | return err; | 3573 | return err; |
3363 | } | 3574 | } |
3364 | 3575 | ||
@@ -3416,7 +3627,7 @@ static int nfs4_proc_readdir(struct dentry *dentry, struct rpc_cred *cred, | |||
3416 | } | 3627 | } |
3417 | 3628 | ||
3418 | static int _nfs4_proc_mknod(struct inode *dir, struct dentry *dentry, | 3629 | static int _nfs4_proc_mknod(struct inode *dir, struct dentry *dentry, |
3419 | struct iattr *sattr, dev_t rdev) | 3630 | struct iattr *sattr, struct nfs4_label *label, dev_t rdev) |
3420 | { | 3631 | { |
3421 | struct nfs4_createdata *data; | 3632 | struct nfs4_createdata *data; |
3422 | int mode = sattr->ia_mode; | 3633 | int mode = sattr->ia_mode; |
@@ -3441,7 +3652,8 @@ static int _nfs4_proc_mknod(struct inode *dir, struct dentry *dentry, | |||
3441 | status = -EINVAL; | 3652 | status = -EINVAL; |
3442 | goto out_free; | 3653 | goto out_free; |
3443 | } | 3654 | } |
3444 | 3655 | ||
3656 | data->arg.label = label; | ||
3445 | status = nfs4_do_create(dir, dentry, data); | 3657 | status = nfs4_do_create(dir, dentry, data); |
3446 | out_free: | 3658 | out_free: |
3447 | nfs4_free_createdata(data); | 3659 | nfs4_free_createdata(data); |
@@ -3453,14 +3665,20 @@ static int nfs4_proc_mknod(struct inode *dir, struct dentry *dentry, | |||
3453 | struct iattr *sattr, dev_t rdev) | 3665 | struct iattr *sattr, dev_t rdev) |
3454 | { | 3666 | { |
3455 | struct nfs4_exception exception = { }; | 3667 | struct nfs4_exception exception = { }; |
3668 | struct nfs4_label l, *label = NULL; | ||
3456 | int err; | 3669 | int err; |
3457 | 3670 | ||
3671 | label = nfs4_label_init_security(dir, dentry, sattr, &l); | ||
3672 | |||
3458 | sattr->ia_mode &= ~current_umask(); | 3673 | sattr->ia_mode &= ~current_umask(); |
3459 | do { | 3674 | do { |
3460 | err = nfs4_handle_exception(NFS_SERVER(dir), | 3675 | err = nfs4_handle_exception(NFS_SERVER(dir), |
3461 | _nfs4_proc_mknod(dir, dentry, sattr, rdev), | 3676 | _nfs4_proc_mknod(dir, dentry, sattr, label, rdev), |
3462 | &exception); | 3677 | &exception); |
3463 | } while (exception.retry); | 3678 | } while (exception.retry); |
3679 | |||
3680 | nfs4_label_release_security(label); | ||
3681 | |||
3464 | return err; | 3682 | return err; |
3465 | } | 3683 | } |
3466 | 3684 | ||
@@ -4187,6 +4405,155 @@ static int nfs4_proc_set_acl(struct inode *inode, const void *buf, size_t buflen | |||
4187 | return err; | 4405 | return err; |
4188 | } | 4406 | } |
4189 | 4407 | ||
4408 | #ifdef CONFIG_NFS_V4_SECURITY_LABEL | ||
4409 | static int _nfs4_get_security_label(struct inode *inode, void *buf, | ||
4410 | size_t buflen) | ||
4411 | { | ||
4412 | struct nfs_server *server = NFS_SERVER(inode); | ||
4413 | struct nfs_fattr fattr; | ||
4414 | struct nfs4_label label = {0, 0, buflen, buf}; | ||
4415 | |||
4416 | u32 bitmask[3] = { 0, 0, FATTR4_WORD2_SECURITY_LABEL }; | ||
4417 | struct nfs4_getattr_arg args = { | ||
4418 | .fh = NFS_FH(inode), | ||
4419 | .bitmask = bitmask, | ||
4420 | }; | ||
4421 | struct nfs4_getattr_res res = { | ||
4422 | .fattr = &fattr, | ||
4423 | .label = &label, | ||
4424 | .server = server, | ||
4425 | }; | ||
4426 | struct rpc_message msg = { | ||
4427 | .rpc_proc = &nfs4_procedures[NFSPROC4_CLNT_GETATTR], | ||
4428 | .rpc_argp = &args, | ||
4429 | .rpc_resp = &res, | ||
4430 | }; | ||
4431 | int ret; | ||
4432 | |||
4433 | nfs_fattr_init(&fattr); | ||
4434 | |||
4435 | ret = rpc_call_sync(server->client, &msg, 0); | ||
4436 | if (ret) | ||
4437 | return ret; | ||
4438 | if (!(fattr.valid & NFS_ATTR_FATTR_V4_SECURITY_LABEL)) | ||
4439 | return -ENOENT; | ||
4440 | if (buflen < label.len) | ||
4441 | return -ERANGE; | ||
4442 | return 0; | ||
4443 | } | ||
4444 | |||
4445 | static int nfs4_get_security_label(struct inode *inode, void *buf, | ||
4446 | size_t buflen) | ||
4447 | { | ||
4448 | struct nfs4_exception exception = { }; | ||
4449 | int err; | ||
4450 | |||
4451 | if (!nfs_server_capable(inode, NFS_CAP_SECURITY_LABEL)) | ||
4452 | return -EOPNOTSUPP; | ||
4453 | |||
4454 | do { | ||
4455 | err = nfs4_handle_exception(NFS_SERVER(inode), | ||
4456 | _nfs4_get_security_label(inode, buf, buflen), | ||
4457 | &exception); | ||
4458 | } while (exception.retry); | ||
4459 | return err; | ||
4460 | } | ||
4461 | |||
4462 | static int _nfs4_do_set_security_label(struct inode *inode, | ||
4463 | struct nfs4_label *ilabel, | ||
4464 | struct nfs_fattr *fattr, | ||
4465 | struct nfs4_label *olabel) | ||
4466 | { | ||
4467 | |||
4468 | struct iattr sattr = {0}; | ||
4469 | struct nfs_server *server = NFS_SERVER(inode); | ||
4470 | const u32 bitmask[3] = { 0, 0, FATTR4_WORD2_SECURITY_LABEL }; | ||
4471 | struct nfs_setattrargs args = { | ||
4472 | .fh = NFS_FH(inode), | ||
4473 | .iap = &sattr, | ||
4474 | .server = server, | ||
4475 | .bitmask = bitmask, | ||
4476 | .label = ilabel, | ||
4477 | }; | ||
4478 | struct nfs_setattrres res = { | ||
4479 | .fattr = fattr, | ||
4480 | .label = olabel, | ||
4481 | .server = server, | ||
4482 | }; | ||
4483 | struct rpc_message msg = { | ||
4484 | .rpc_proc = &nfs4_procedures[NFSPROC4_CLNT_SETATTR], | ||
4485 | .rpc_argp = &args, | ||
4486 | .rpc_resp = &res, | ||
4487 | }; | ||
4488 | int status; | ||
4489 | |||
4490 | nfs4_stateid_copy(&args.stateid, &zero_stateid); | ||
4491 | |||
4492 | status = rpc_call_sync(server->client, &msg, 0); | ||
4493 | if (status) | ||
4494 | dprintk("%s failed: %d\n", __func__, status); | ||
4495 | |||
4496 | return status; | ||
4497 | } | ||
4498 | |||
4499 | static int nfs4_do_set_security_label(struct inode *inode, | ||
4500 | struct nfs4_label *ilabel, | ||
4501 | struct nfs_fattr *fattr, | ||
4502 | struct nfs4_label *olabel) | ||
4503 | { | ||
4504 | struct nfs4_exception exception = { }; | ||
4505 | int err; | ||
4506 | |||
4507 | do { | ||
4508 | err = nfs4_handle_exception(NFS_SERVER(inode), | ||
4509 | _nfs4_do_set_security_label(inode, ilabel, | ||
4510 | fattr, olabel), | ||
4511 | &exception); | ||
4512 | } while (exception.retry); | ||
4513 | return err; | ||
4514 | } | ||
4515 | |||
4516 | static int | ||
4517 | nfs4_set_security_label(struct dentry *dentry, const void *buf, size_t buflen) | ||
4518 | { | ||
4519 | struct nfs4_label ilabel, *olabel = NULL; | ||
4520 | struct nfs_fattr fattr; | ||
4521 | struct rpc_cred *cred; | ||
4522 | struct inode *inode = dentry->d_inode; | ||
4523 | int status; | ||
4524 | |||
4525 | if (!nfs_server_capable(inode, NFS_CAP_SECURITY_LABEL)) | ||
4526 | return -EOPNOTSUPP; | ||
4527 | |||
4528 | nfs_fattr_init(&fattr); | ||
4529 | |||
4530 | ilabel.pi = 0; | ||
4531 | ilabel.lfs = 0; | ||
4532 | ilabel.label = (char *)buf; | ||
4533 | ilabel.len = buflen; | ||
4534 | |||
4535 | cred = rpc_lookup_cred(); | ||
4536 | if (IS_ERR(cred)) | ||
4537 | return PTR_ERR(cred); | ||
4538 | |||
4539 | olabel = nfs4_label_alloc(NFS_SERVER(inode), GFP_KERNEL); | ||
4540 | if (IS_ERR(olabel)) { | ||
4541 | status = -PTR_ERR(olabel); | ||
4542 | goto out; | ||
4543 | } | ||
4544 | |||
4545 | status = nfs4_do_set_security_label(inode, &ilabel, &fattr, olabel); | ||
4546 | if (status == 0) | ||
4547 | nfs_setsecurity(inode, &fattr, olabel); | ||
4548 | |||
4549 | nfs4_label_free(olabel); | ||
4550 | out: | ||
4551 | put_rpccred(cred); | ||
4552 | return status; | ||
4553 | } | ||
4554 | #endif /* CONFIG_NFS_V4_SECURITY_LABEL */ | ||
4555 | |||
4556 | |||
4190 | static int | 4557 | static int |
4191 | nfs4_async_handle_error(struct rpc_task *task, const struct nfs_server *server, struct nfs4_state *state) | 4558 | nfs4_async_handle_error(struct rpc_task *task, const struct nfs_server *server, struct nfs4_state *state) |
4192 | { | 4559 | { |
@@ -4345,7 +4712,7 @@ int nfs4_proc_setclientid(struct nfs_client *clp, u32 program, | |||
4345 | /* cb_client4 */ | 4712 | /* cb_client4 */ |
4346 | rcu_read_lock(); | 4713 | rcu_read_lock(); |
4347 | setclientid.sc_netid_len = scnprintf(setclientid.sc_netid, | 4714 | setclientid.sc_netid_len = scnprintf(setclientid.sc_netid, |
4348 | sizeof(setclientid.sc_netid), | 4715 | sizeof(setclientid.sc_netid), "%s", |
4349 | rpc_peeraddr2str(clp->cl_rpcclient, | 4716 | rpc_peeraddr2str(clp->cl_rpcclient, |
4350 | RPC_DISPLAY_NETID)); | 4717 | RPC_DISPLAY_NETID)); |
4351 | rcu_read_unlock(); | 4718 | rcu_read_unlock(); |
@@ -4528,7 +4895,7 @@ int nfs4_proc_delegreturn(struct inode *inode, struct rpc_cred *cred, const nfs4 | |||
4528 | static unsigned long | 4895 | static unsigned long |
4529 | nfs4_set_lock_task_retry(unsigned long timeout) | 4896 | nfs4_set_lock_task_retry(unsigned long timeout) |
4530 | { | 4897 | { |
4531 | freezable_schedule_timeout_killable(timeout); | 4898 | freezable_schedule_timeout_killable_unsafe(timeout); |
4532 | timeout <<= 1; | 4899 | timeout <<= 1; |
4533 | if (timeout > NFS4_LOCK_MAXTIMEOUT) | 4900 | if (timeout > NFS4_LOCK_MAXTIMEOUT) |
4534 | return NFS4_LOCK_MAXTIMEOUT; | 4901 | return NFS4_LOCK_MAXTIMEOUT; |
@@ -5056,13 +5423,18 @@ static int nfs41_check_expired_locks(struct nfs4_state *state) | |||
5056 | 5423 | ||
5057 | list_for_each_entry(lsp, &state->lock_states, ls_locks) { | 5424 | list_for_each_entry(lsp, &state->lock_states, ls_locks) { |
5058 | if (test_bit(NFS_LOCK_INITIALIZED, &lsp->ls_flags)) { | 5425 | if (test_bit(NFS_LOCK_INITIALIZED, &lsp->ls_flags)) { |
5059 | status = nfs41_test_stateid(server, &lsp->ls_stateid); | 5426 | struct rpc_cred *cred = lsp->ls_state->owner->so_cred; |
5427 | |||
5428 | status = nfs41_test_stateid(server, | ||
5429 | &lsp->ls_stateid, | ||
5430 | cred); | ||
5060 | if (status != NFS_OK) { | 5431 | if (status != NFS_OK) { |
5061 | /* Free the stateid unless the server | 5432 | /* Free the stateid unless the server |
5062 | * informs us the stateid is unrecognized. */ | 5433 | * informs us the stateid is unrecognized. */ |
5063 | if (status != -NFS4ERR_BAD_STATEID) | 5434 | if (status != -NFS4ERR_BAD_STATEID) |
5064 | nfs41_free_stateid(server, | 5435 | nfs41_free_stateid(server, |
5065 | &lsp->ls_stateid); | 5436 | &lsp->ls_stateid, |
5437 | cred); | ||
5066 | clear_bit(NFS_LOCK_INITIALIZED, &lsp->ls_flags); | 5438 | clear_bit(NFS_LOCK_INITIALIZED, &lsp->ls_flags); |
5067 | ret = status; | 5439 | ret = status; |
5068 | } | 5440 | } |
@@ -5295,6 +5667,53 @@ static size_t nfs4_xattr_list_nfs4_acl(struct dentry *dentry, char *list, | |||
5295 | return len; | 5667 | return len; |
5296 | } | 5668 | } |
5297 | 5669 | ||
5670 | #ifdef CONFIG_NFS_V4_SECURITY_LABEL | ||
5671 | static inline int nfs4_server_supports_labels(struct nfs_server *server) | ||
5672 | { | ||
5673 | return server->caps & NFS_CAP_SECURITY_LABEL; | ||
5674 | } | ||
5675 | |||
5676 | static int nfs4_xattr_set_nfs4_label(struct dentry *dentry, const char *key, | ||
5677 | const void *buf, size_t buflen, | ||
5678 | int flags, int type) | ||
5679 | { | ||
5680 | if (security_ismaclabel(key)) | ||
5681 | return nfs4_set_security_label(dentry, buf, buflen); | ||
5682 | |||
5683 | return -EOPNOTSUPP; | ||
5684 | } | ||
5685 | |||
5686 | static int nfs4_xattr_get_nfs4_label(struct dentry *dentry, const char *key, | ||
5687 | void *buf, size_t buflen, int type) | ||
5688 | { | ||
5689 | if (security_ismaclabel(key)) | ||
5690 | return nfs4_get_security_label(dentry->d_inode, buf, buflen); | ||
5691 | return -EOPNOTSUPP; | ||
5692 | } | ||
5693 | |||
5694 | static size_t nfs4_xattr_list_nfs4_label(struct dentry *dentry, char *list, | ||
5695 | size_t list_len, const char *name, | ||
5696 | size_t name_len, int type) | ||
5697 | { | ||
5698 | size_t len = 0; | ||
5699 | |||
5700 | if (nfs_server_capable(dentry->d_inode, NFS_CAP_SECURITY_LABEL)) { | ||
5701 | len = security_inode_listsecurity(dentry->d_inode, NULL, 0); | ||
5702 | if (list && len <= list_len) | ||
5703 | security_inode_listsecurity(dentry->d_inode, list, len); | ||
5704 | } | ||
5705 | return len; | ||
5706 | } | ||
5707 | |||
5708 | static const struct xattr_handler nfs4_xattr_nfs4_label_handler = { | ||
5709 | .prefix = XATTR_SECURITY_PREFIX, | ||
5710 | .list = nfs4_xattr_list_nfs4_label, | ||
5711 | .get = nfs4_xattr_get_nfs4_label, | ||
5712 | .set = nfs4_xattr_set_nfs4_label, | ||
5713 | }; | ||
5714 | #endif | ||
5715 | |||
5716 | |||
5298 | /* | 5717 | /* |
5299 | * nfs_fhget will use either the mounted_on_fileid or the fileid | 5718 | * nfs_fhget will use either the mounted_on_fileid or the fileid |
5300 | */ | 5719 | */ |
@@ -5318,7 +5737,7 @@ static int _nfs4_proc_fs_locations(struct rpc_clnt *client, struct inode *dir, | |||
5318 | struct page *page) | 5737 | struct page *page) |
5319 | { | 5738 | { |
5320 | struct nfs_server *server = NFS_SERVER(dir); | 5739 | struct nfs_server *server = NFS_SERVER(dir); |
5321 | u32 bitmask[2] = { | 5740 | u32 bitmask[3] = { |
5322 | [0] = FATTR4_WORD0_FSID | FATTR4_WORD0_FS_LOCATIONS, | 5741 | [0] = FATTR4_WORD0_FSID | FATTR4_WORD0_FS_LOCATIONS, |
5323 | }; | 5742 | }; |
5324 | struct nfs4_fs_locations_arg args = { | 5743 | struct nfs4_fs_locations_arg args = { |
@@ -5505,7 +5924,8 @@ int nfs4_proc_exchange_id(struct nfs_client *clp, struct rpc_cred *cred) | |||
5505 | struct nfs41_exchange_id_args args = { | 5924 | struct nfs41_exchange_id_args args = { |
5506 | .verifier = &verifier, | 5925 | .verifier = &verifier, |
5507 | .client = clp, | 5926 | .client = clp, |
5508 | .flags = EXCHGID4_FLAG_SUPP_MOVED_REFER, | 5927 | .flags = EXCHGID4_FLAG_SUPP_MOVED_REFER | |
5928 | EXCHGID4_FLAG_BIND_PRINC_STATEID, | ||
5509 | }; | 5929 | }; |
5510 | struct nfs41_exchange_id_res res = { | 5930 | struct nfs41_exchange_id_res res = { |
5511 | 0 | 5931 | 0 |
@@ -5762,17 +6182,14 @@ int nfs4_proc_get_lease_time(struct nfs_client *clp, struct nfs_fsinfo *fsinfo) | |||
5762 | */ | 6182 | */ |
5763 | static void nfs4_init_channel_attrs(struct nfs41_create_session_args *args) | 6183 | static void nfs4_init_channel_attrs(struct nfs41_create_session_args *args) |
5764 | { | 6184 | { |
5765 | struct nfs4_session *session = args->client->cl_session; | 6185 | unsigned int max_rqst_sz, max_resp_sz; |
5766 | unsigned int mxrqst_sz = session->fc_target_max_rqst_sz, | 6186 | |
5767 | mxresp_sz = session->fc_target_max_resp_sz; | 6187 | max_rqst_sz = NFS_MAX_FILE_IO_SIZE + nfs41_maxwrite_overhead; |
6188 | max_resp_sz = NFS_MAX_FILE_IO_SIZE + nfs41_maxread_overhead; | ||
5768 | 6189 | ||
5769 | if (mxrqst_sz == 0) | ||
5770 | mxrqst_sz = NFS_MAX_FILE_IO_SIZE; | ||
5771 | if (mxresp_sz == 0) | ||
5772 | mxresp_sz = NFS_MAX_FILE_IO_SIZE; | ||
5773 | /* Fore channel attributes */ | 6190 | /* Fore channel attributes */ |
5774 | args->fc_attrs.max_rqst_sz = mxrqst_sz; | 6191 | args->fc_attrs.max_rqst_sz = max_rqst_sz; |
5775 | args->fc_attrs.max_resp_sz = mxresp_sz; | 6192 | args->fc_attrs.max_resp_sz = max_resp_sz; |
5776 | args->fc_attrs.max_ops = NFS4_MAX_OPS; | 6193 | args->fc_attrs.max_ops = NFS4_MAX_OPS; |
5777 | args->fc_attrs.max_reqs = max_session_slots; | 6194 | args->fc_attrs.max_reqs = max_session_slots; |
5778 | 6195 | ||
@@ -6159,12 +6576,14 @@ static const struct rpc_call_ops nfs4_reclaim_complete_call_ops = { | |||
6159 | /* | 6576 | /* |
6160 | * Issue a global reclaim complete. | 6577 | * Issue a global reclaim complete. |
6161 | */ | 6578 | */ |
6162 | static int nfs41_proc_reclaim_complete(struct nfs_client *clp) | 6579 | static int nfs41_proc_reclaim_complete(struct nfs_client *clp, |
6580 | struct rpc_cred *cred) | ||
6163 | { | 6581 | { |
6164 | struct nfs4_reclaim_complete_data *calldata; | 6582 | struct nfs4_reclaim_complete_data *calldata; |
6165 | struct rpc_task *task; | 6583 | struct rpc_task *task; |
6166 | struct rpc_message msg = { | 6584 | struct rpc_message msg = { |
6167 | .rpc_proc = &nfs4_procedures[NFSPROC4_CLNT_RECLAIM_COMPLETE], | 6585 | .rpc_proc = &nfs4_procedures[NFSPROC4_CLNT_RECLAIM_COMPLETE], |
6586 | .rpc_cred = cred, | ||
6168 | }; | 6587 | }; |
6169 | struct rpc_task_setup task_setup_data = { | 6588 | struct rpc_task_setup task_setup_data = { |
6170 | .rpc_client = clp->cl_rpcclient, | 6589 | .rpc_client = clp->cl_rpcclient, |
@@ -6348,6 +6767,7 @@ nfs4_proc_layoutget(struct nfs4_layoutget *lgp, gfp_t gfp_flags) | |||
6348 | .rpc_proc = &nfs4_procedures[NFSPROC4_CLNT_LAYOUTGET], | 6767 | .rpc_proc = &nfs4_procedures[NFSPROC4_CLNT_LAYOUTGET], |
6349 | .rpc_argp = &lgp->args, | 6768 | .rpc_argp = &lgp->args, |
6350 | .rpc_resp = &lgp->res, | 6769 | .rpc_resp = &lgp->res, |
6770 | .rpc_cred = lgp->cred, | ||
6351 | }; | 6771 | }; |
6352 | struct rpc_task_setup task_setup_data = { | 6772 | struct rpc_task_setup task_setup_data = { |
6353 | .rpc_client = server->client, | 6773 | .rpc_client = server->client, |
@@ -6451,6 +6871,7 @@ int nfs4_proc_layoutreturn(struct nfs4_layoutreturn *lrp) | |||
6451 | .rpc_proc = &nfs4_procedures[NFSPROC4_CLNT_LAYOUTRETURN], | 6871 | .rpc_proc = &nfs4_procedures[NFSPROC4_CLNT_LAYOUTRETURN], |
6452 | .rpc_argp = &lrp->args, | 6872 | .rpc_argp = &lrp->args, |
6453 | .rpc_resp = &lrp->res, | 6873 | .rpc_resp = &lrp->res, |
6874 | .rpc_cred = lrp->cred, | ||
6454 | }; | 6875 | }; |
6455 | struct rpc_task_setup task_setup_data = { | 6876 | struct rpc_task_setup task_setup_data = { |
6456 | .rpc_client = lrp->clp->cl_rpcclient, | 6877 | .rpc_client = lrp->clp->cl_rpcclient, |
@@ -6520,7 +6941,9 @@ int nfs4_proc_getdevicelist(struct nfs_server *server, | |||
6520 | EXPORT_SYMBOL_GPL(nfs4_proc_getdevicelist); | 6941 | EXPORT_SYMBOL_GPL(nfs4_proc_getdevicelist); |
6521 | 6942 | ||
6522 | static int | 6943 | static int |
6523 | _nfs4_proc_getdeviceinfo(struct nfs_server *server, struct pnfs_device *pdev) | 6944 | _nfs4_proc_getdeviceinfo(struct nfs_server *server, |
6945 | struct pnfs_device *pdev, | ||
6946 | struct rpc_cred *cred) | ||
6524 | { | 6947 | { |
6525 | struct nfs4_getdeviceinfo_args args = { | 6948 | struct nfs4_getdeviceinfo_args args = { |
6526 | .pdev = pdev, | 6949 | .pdev = pdev, |
@@ -6532,6 +6955,7 @@ _nfs4_proc_getdeviceinfo(struct nfs_server *server, struct pnfs_device *pdev) | |||
6532 | .rpc_proc = &nfs4_procedures[NFSPROC4_CLNT_GETDEVICEINFO], | 6955 | .rpc_proc = &nfs4_procedures[NFSPROC4_CLNT_GETDEVICEINFO], |
6533 | .rpc_argp = &args, | 6956 | .rpc_argp = &args, |
6534 | .rpc_resp = &res, | 6957 | .rpc_resp = &res, |
6958 | .rpc_cred = cred, | ||
6535 | }; | 6959 | }; |
6536 | int status; | 6960 | int status; |
6537 | 6961 | ||
@@ -6542,14 +6966,16 @@ _nfs4_proc_getdeviceinfo(struct nfs_server *server, struct pnfs_device *pdev) | |||
6542 | return status; | 6966 | return status; |
6543 | } | 6967 | } |
6544 | 6968 | ||
6545 | int nfs4_proc_getdeviceinfo(struct nfs_server *server, struct pnfs_device *pdev) | 6969 | int nfs4_proc_getdeviceinfo(struct nfs_server *server, |
6970 | struct pnfs_device *pdev, | ||
6971 | struct rpc_cred *cred) | ||
6546 | { | 6972 | { |
6547 | struct nfs4_exception exception = { }; | 6973 | struct nfs4_exception exception = { }; |
6548 | int err; | 6974 | int err; |
6549 | 6975 | ||
6550 | do { | 6976 | do { |
6551 | err = nfs4_handle_exception(server, | 6977 | err = nfs4_handle_exception(server, |
6552 | _nfs4_proc_getdeviceinfo(server, pdev), | 6978 | _nfs4_proc_getdeviceinfo(server, pdev, cred), |
6553 | &exception); | 6979 | &exception); |
6554 | } while (exception.retry); | 6980 | } while (exception.retry); |
6555 | return err; | 6981 | return err; |
@@ -6733,7 +7159,9 @@ out: | |||
6733 | return err; | 7159 | return err; |
6734 | } | 7160 | } |
6735 | 7161 | ||
6736 | static int _nfs41_test_stateid(struct nfs_server *server, nfs4_stateid *stateid) | 7162 | static int _nfs41_test_stateid(struct nfs_server *server, |
7163 | nfs4_stateid *stateid, | ||
7164 | struct rpc_cred *cred) | ||
6737 | { | 7165 | { |
6738 | int status; | 7166 | int status; |
6739 | struct nfs41_test_stateid_args args = { | 7167 | struct nfs41_test_stateid_args args = { |
@@ -6744,6 +7172,7 @@ static int _nfs41_test_stateid(struct nfs_server *server, nfs4_stateid *stateid) | |||
6744 | .rpc_proc = &nfs4_procedures[NFSPROC4_CLNT_TEST_STATEID], | 7172 | .rpc_proc = &nfs4_procedures[NFSPROC4_CLNT_TEST_STATEID], |
6745 | .rpc_argp = &args, | 7173 | .rpc_argp = &args, |
6746 | .rpc_resp = &res, | 7174 | .rpc_resp = &res, |
7175 | .rpc_cred = cred, | ||
6747 | }; | 7176 | }; |
6748 | 7177 | ||
6749 | dprintk("NFS call test_stateid %p\n", stateid); | 7178 | dprintk("NFS call test_stateid %p\n", stateid); |
@@ -6764,17 +7193,20 @@ static int _nfs41_test_stateid(struct nfs_server *server, nfs4_stateid *stateid) | |||
6764 | * | 7193 | * |
6765 | * @server: server / transport on which to perform the operation | 7194 | * @server: server / transport on which to perform the operation |
6766 | * @stateid: state ID to test | 7195 | * @stateid: state ID to test |
7196 | * @cred: credential | ||
6767 | * | 7197 | * |
6768 | * Returns NFS_OK if the server recognizes that "stateid" is valid. | 7198 | * Returns NFS_OK if the server recognizes that "stateid" is valid. |
6769 | * Otherwise a negative NFS4ERR value is returned if the operation | 7199 | * Otherwise a negative NFS4ERR value is returned if the operation |
6770 | * failed or the state ID is not currently valid. | 7200 | * failed or the state ID is not currently valid. |
6771 | */ | 7201 | */ |
6772 | static int nfs41_test_stateid(struct nfs_server *server, nfs4_stateid *stateid) | 7202 | static int nfs41_test_stateid(struct nfs_server *server, |
7203 | nfs4_stateid *stateid, | ||
7204 | struct rpc_cred *cred) | ||
6773 | { | 7205 | { |
6774 | struct nfs4_exception exception = { }; | 7206 | struct nfs4_exception exception = { }; |
6775 | int err; | 7207 | int err; |
6776 | do { | 7208 | do { |
6777 | err = _nfs41_test_stateid(server, stateid); | 7209 | err = _nfs41_test_stateid(server, stateid, cred); |
6778 | if (err != -NFS4ERR_DELAY) | 7210 | if (err != -NFS4ERR_DELAY) |
6779 | break; | 7211 | break; |
6780 | nfs4_handle_exception(server, err, &exception); | 7212 | nfs4_handle_exception(server, err, &exception); |
@@ -6823,10 +7255,12 @@ const struct rpc_call_ops nfs41_free_stateid_ops = { | |||
6823 | 7255 | ||
6824 | static struct rpc_task *_nfs41_free_stateid(struct nfs_server *server, | 7256 | static struct rpc_task *_nfs41_free_stateid(struct nfs_server *server, |
6825 | nfs4_stateid *stateid, | 7257 | nfs4_stateid *stateid, |
7258 | struct rpc_cred *cred, | ||
6826 | bool privileged) | 7259 | bool privileged) |
6827 | { | 7260 | { |
6828 | struct rpc_message msg = { | 7261 | struct rpc_message msg = { |
6829 | .rpc_proc = &nfs4_procedures[NFSPROC4_CLNT_FREE_STATEID], | 7262 | .rpc_proc = &nfs4_procedures[NFSPROC4_CLNT_FREE_STATEID], |
7263 | .rpc_cred = cred, | ||
6830 | }; | 7264 | }; |
6831 | struct rpc_task_setup task_setup = { | 7265 | struct rpc_task_setup task_setup = { |
6832 | .rpc_client = server->client, | 7266 | .rpc_client = server->client, |
@@ -6859,16 +7293,19 @@ static struct rpc_task *_nfs41_free_stateid(struct nfs_server *server, | |||
6859 | * | 7293 | * |
6860 | * @server: server / transport on which to perform the operation | 7294 | * @server: server / transport on which to perform the operation |
6861 | * @stateid: state ID to release | 7295 | * @stateid: state ID to release |
7296 | * @cred: credential | ||
6862 | * | 7297 | * |
6863 | * Returns NFS_OK if the server freed "stateid". Otherwise a | 7298 | * Returns NFS_OK if the server freed "stateid". Otherwise a |
6864 | * negative NFS4ERR value is returned. | 7299 | * negative NFS4ERR value is returned. |
6865 | */ | 7300 | */ |
6866 | static int nfs41_free_stateid(struct nfs_server *server, nfs4_stateid *stateid) | 7301 | static int nfs41_free_stateid(struct nfs_server *server, |
7302 | nfs4_stateid *stateid, | ||
7303 | struct rpc_cred *cred) | ||
6867 | { | 7304 | { |
6868 | struct rpc_task *task; | 7305 | struct rpc_task *task; |
6869 | int ret; | 7306 | int ret; |
6870 | 7307 | ||
6871 | task = _nfs41_free_stateid(server, stateid, true); | 7308 | task = _nfs41_free_stateid(server, stateid, cred, true); |
6872 | if (IS_ERR(task)) | 7309 | if (IS_ERR(task)) |
6873 | return PTR_ERR(task); | 7310 | return PTR_ERR(task); |
6874 | ret = rpc_wait_for_completion_task(task); | 7311 | ret = rpc_wait_for_completion_task(task); |
@@ -6881,8 +7318,9 @@ static int nfs41_free_stateid(struct nfs_server *server, nfs4_stateid *stateid) | |||
6881 | static int nfs41_free_lock_state(struct nfs_server *server, struct nfs4_lock_state *lsp) | 7318 | static int nfs41_free_lock_state(struct nfs_server *server, struct nfs4_lock_state *lsp) |
6882 | { | 7319 | { |
6883 | struct rpc_task *task; | 7320 | struct rpc_task *task; |
7321 | struct rpc_cred *cred = lsp->ls_state->owner->so_cred; | ||
6884 | 7322 | ||
6885 | task = _nfs41_free_stateid(server, &lsp->ls_stateid, false); | 7323 | task = _nfs41_free_stateid(server, &lsp->ls_stateid, cred, false); |
6886 | nfs4_free_lock_state(server, lsp); | 7324 | nfs4_free_lock_state(server, lsp); |
6887 | if (IS_ERR(task)) | 7325 | if (IS_ERR(task)) |
6888 | return PTR_ERR(task); | 7326 | return PTR_ERR(task); |
@@ -7004,11 +7442,33 @@ static const struct nfs4_minor_version_ops nfs_v4_1_minor_ops = { | |||
7004 | }; | 7442 | }; |
7005 | #endif | 7443 | #endif |
7006 | 7444 | ||
7445 | #if defined(CONFIG_NFS_V4_2) | ||
7446 | static const struct nfs4_minor_version_ops nfs_v4_2_minor_ops = { | ||
7447 | .minor_version = 2, | ||
7448 | .init_caps = NFS_CAP_READDIRPLUS | ||
7449 | | NFS_CAP_ATOMIC_OPEN | ||
7450 | | NFS_CAP_CHANGE_ATTR | ||
7451 | | NFS_CAP_POSIX_LOCK | ||
7452 | | NFS_CAP_STATEID_NFSV41 | ||
7453 | | NFS_CAP_ATOMIC_OPEN_V1, | ||
7454 | .call_sync = nfs4_call_sync_sequence, | ||
7455 | .match_stateid = nfs41_match_stateid, | ||
7456 | .find_root_sec = nfs41_find_root_sec, | ||
7457 | .free_lock_state = nfs41_free_lock_state, | ||
7458 | .reboot_recovery_ops = &nfs41_reboot_recovery_ops, | ||
7459 | .nograce_recovery_ops = &nfs41_nograce_recovery_ops, | ||
7460 | .state_renewal_ops = &nfs41_state_renewal_ops, | ||
7461 | }; | ||
7462 | #endif | ||
7463 | |||
7007 | const struct nfs4_minor_version_ops *nfs_v4_minor_ops[] = { | 7464 | const struct nfs4_minor_version_ops *nfs_v4_minor_ops[] = { |
7008 | [0] = &nfs_v4_0_minor_ops, | 7465 | [0] = &nfs_v4_0_minor_ops, |
7009 | #if defined(CONFIG_NFS_V4_1) | 7466 | #if defined(CONFIG_NFS_V4_1) |
7010 | [1] = &nfs_v4_1_minor_ops, | 7467 | [1] = &nfs_v4_1_minor_ops, |
7011 | #endif | 7468 | #endif |
7469 | #if defined(CONFIG_NFS_V4_2) | ||
7470 | [2] = &nfs_v4_2_minor_ops, | ||
7471 | #endif | ||
7012 | }; | 7472 | }; |
7013 | 7473 | ||
7014 | const struct inode_operations nfs4_dir_inode_operations = { | 7474 | const struct inode_operations nfs4_dir_inode_operations = { |
@@ -7108,6 +7568,9 @@ static const struct xattr_handler nfs4_xattr_nfs4_acl_handler = { | |||
7108 | 7568 | ||
7109 | const struct xattr_handler *nfs4_xattr_handlers[] = { | 7569 | const struct xattr_handler *nfs4_xattr_handlers[] = { |
7110 | &nfs4_xattr_nfs4_acl_handler, | 7570 | &nfs4_xattr_nfs4_acl_handler, |
7571 | #ifdef CONFIG_NFS_V4_SECURITY_LABEL | ||
7572 | &nfs4_xattr_nfs4_label_handler, | ||
7573 | #endif | ||
7111 | NULL | 7574 | NULL |
7112 | }; | 7575 | }; |
7113 | 7576 | ||
diff --git a/fs/nfs/nfs4session.c b/fs/nfs/nfs4session.c index c4e225e4a9af..36e21cb29d65 100644 --- a/fs/nfs/nfs4session.c +++ b/fs/nfs/nfs4session.c | |||
@@ -478,48 +478,12 @@ static int nfs41_check_session_ready(struct nfs_client *clp) | |||
478 | return 0; | 478 | return 0; |
479 | } | 479 | } |
480 | 480 | ||
481 | int nfs4_init_session(struct nfs_server *server) | 481 | int nfs4_init_session(struct nfs_client *clp) |
482 | { | 482 | { |
483 | struct nfs_client *clp = server->nfs_client; | ||
484 | struct nfs4_session *session; | ||
485 | unsigned int target_max_rqst_sz = NFS_MAX_FILE_IO_SIZE; | ||
486 | unsigned int target_max_resp_sz = NFS_MAX_FILE_IO_SIZE; | ||
487 | |||
488 | if (!nfs4_has_session(clp)) | 483 | if (!nfs4_has_session(clp)) |
489 | return 0; | 484 | return 0; |
490 | 485 | ||
491 | if (server->rsize != 0) | 486 | clear_bit(NFS4_SESSION_INITING, &clp->cl_session->session_state); |
492 | target_max_resp_sz = server->rsize; | ||
493 | target_max_resp_sz += nfs41_maxread_overhead; | ||
494 | |||
495 | if (server->wsize != 0) | ||
496 | target_max_rqst_sz = server->wsize; | ||
497 | target_max_rqst_sz += nfs41_maxwrite_overhead; | ||
498 | |||
499 | session = clp->cl_session; | ||
500 | spin_lock(&clp->cl_lock); | ||
501 | if (test_and_clear_bit(NFS4_SESSION_INITING, &session->session_state)) { | ||
502 | /* Initialise targets and channel attributes */ | ||
503 | session->fc_target_max_rqst_sz = target_max_rqst_sz; | ||
504 | session->fc_attrs.max_rqst_sz = target_max_rqst_sz; | ||
505 | session->fc_target_max_resp_sz = target_max_resp_sz; | ||
506 | session->fc_attrs.max_resp_sz = target_max_resp_sz; | ||
507 | } else { | ||
508 | /* Just adjust the targets */ | ||
509 | if (target_max_rqst_sz > session->fc_target_max_rqst_sz) { | ||
510 | session->fc_target_max_rqst_sz = target_max_rqst_sz; | ||
511 | set_bit(NFS4CLNT_SESSION_RESET, &clp->cl_state); | ||
512 | } | ||
513 | if (target_max_resp_sz > session->fc_target_max_resp_sz) { | ||
514 | session->fc_target_max_resp_sz = target_max_resp_sz; | ||
515 | set_bit(NFS4CLNT_SESSION_RESET, &clp->cl_state); | ||
516 | } | ||
517 | } | ||
518 | spin_unlock(&clp->cl_lock); | ||
519 | |||
520 | if (test_bit(NFS4CLNT_SESSION_RESET, &clp->cl_state)) | ||
521 | nfs4_schedule_lease_recovery(clp); | ||
522 | |||
523 | return nfs41_check_session_ready(clp); | 487 | return nfs41_check_session_ready(clp); |
524 | } | 488 | } |
525 | 489 | ||
diff --git a/fs/nfs/nfs4session.h b/fs/nfs/nfs4session.h index ff7d9f0f8a65..3a153d82b90c 100644 --- a/fs/nfs/nfs4session.h +++ b/fs/nfs/nfs4session.h | |||
@@ -66,9 +66,6 @@ struct nfs4_session { | |||
66 | struct nfs4_channel_attrs bc_attrs; | 66 | struct nfs4_channel_attrs bc_attrs; |
67 | struct nfs4_slot_table bc_slot_table; | 67 | struct nfs4_slot_table bc_slot_table; |
68 | struct nfs_client *clp; | 68 | struct nfs_client *clp; |
69 | /* Create session arguments */ | ||
70 | unsigned int fc_target_max_rqst_sz; | ||
71 | unsigned int fc_target_max_resp_sz; | ||
72 | }; | 69 | }; |
73 | 70 | ||
74 | enum nfs4_session_state { | 71 | enum nfs4_session_state { |
@@ -89,7 +86,7 @@ extern int nfs4_setup_session_slot_tables(struct nfs4_session *ses); | |||
89 | 86 | ||
90 | extern struct nfs4_session *nfs4_alloc_session(struct nfs_client *clp); | 87 | extern struct nfs4_session *nfs4_alloc_session(struct nfs_client *clp); |
91 | extern void nfs4_destroy_session(struct nfs4_session *session); | 88 | extern void nfs4_destroy_session(struct nfs4_session *session); |
92 | extern int nfs4_init_session(struct nfs_server *server); | 89 | extern int nfs4_init_session(struct nfs_client *clp); |
93 | extern int nfs4_init_ds_session(struct nfs_client *, unsigned long); | 90 | extern int nfs4_init_ds_session(struct nfs_client *, unsigned long); |
94 | 91 | ||
95 | extern void nfs4_slot_tbl_drain_complete(struct nfs4_slot_table *tbl); | 92 | extern void nfs4_slot_tbl_drain_complete(struct nfs4_slot_table *tbl); |
@@ -122,7 +119,7 @@ static inline int nfs4_has_persistent_session(const struct nfs_client *clp) | |||
122 | 119 | ||
123 | #else /* defined(CONFIG_NFS_V4_1) */ | 120 | #else /* defined(CONFIG_NFS_V4_1) */ |
124 | 121 | ||
125 | static inline int nfs4_init_session(struct nfs_server *server) | 122 | static inline int nfs4_init_session(struct nfs_client *clp) |
126 | { | 123 | { |
127 | return 0; | 124 | return 0; |
128 | } | 125 | } |
diff --git a/fs/nfs/nfs4state.c b/fs/nfs/nfs4state.c index 1fab140764c4..e22862f13564 100644 --- a/fs/nfs/nfs4state.c +++ b/fs/nfs/nfs4state.c | |||
@@ -228,19 +228,8 @@ static int nfs41_setup_state_renewal(struct nfs_client *clp) | |||
228 | return status; | 228 | return status; |
229 | } | 229 | } |
230 | 230 | ||
231 | /* | 231 | static void nfs4_end_drain_slot_table(struct nfs4_slot_table *tbl) |
232 | * Back channel returns NFS4ERR_DELAY for new requests when | ||
233 | * NFS4_SESSION_DRAINING is set so there is no work to be done when draining | ||
234 | * is ended. | ||
235 | */ | ||
236 | static void nfs4_end_drain_session(struct nfs_client *clp) | ||
237 | { | 232 | { |
238 | struct nfs4_session *ses = clp->cl_session; | ||
239 | struct nfs4_slot_table *tbl; | ||
240 | |||
241 | if (ses == NULL) | ||
242 | return; | ||
243 | tbl = &ses->fc_slot_table; | ||
244 | if (test_and_clear_bit(NFS4_SLOT_TBL_DRAINING, &tbl->slot_tbl_state)) { | 233 | if (test_and_clear_bit(NFS4_SLOT_TBL_DRAINING, &tbl->slot_tbl_state)) { |
245 | spin_lock(&tbl->slot_tbl_lock); | 234 | spin_lock(&tbl->slot_tbl_lock); |
246 | nfs41_wake_slot_table(tbl); | 235 | nfs41_wake_slot_table(tbl); |
@@ -248,6 +237,16 @@ static void nfs4_end_drain_session(struct nfs_client *clp) | |||
248 | } | 237 | } |
249 | } | 238 | } |
250 | 239 | ||
240 | static void nfs4_end_drain_session(struct nfs_client *clp) | ||
241 | { | ||
242 | struct nfs4_session *ses = clp->cl_session; | ||
243 | |||
244 | if (ses != NULL) { | ||
245 | nfs4_end_drain_slot_table(&ses->bc_slot_table); | ||
246 | nfs4_end_drain_slot_table(&ses->fc_slot_table); | ||
247 | } | ||
248 | } | ||
249 | |||
251 | /* | 250 | /* |
252 | * Signal state manager thread if session fore channel is drained | 251 | * Signal state manager thread if session fore channel is drained |
253 | */ | 252 | */ |
@@ -1194,7 +1193,7 @@ void nfs4_schedule_state_manager(struct nfs_client *clp) | |||
1194 | snprintf(buf, sizeof(buf), "%s-manager", | 1193 | snprintf(buf, sizeof(buf), "%s-manager", |
1195 | rpc_peeraddr2str(clp->cl_rpcclient, RPC_DISPLAY_ADDR)); | 1194 | rpc_peeraddr2str(clp->cl_rpcclient, RPC_DISPLAY_ADDR)); |
1196 | rcu_read_unlock(); | 1195 | rcu_read_unlock(); |
1197 | task = kthread_run(nfs4_run_state_manager, clp, buf); | 1196 | task = kthread_run(nfs4_run_state_manager, clp, "%s", buf); |
1198 | if (IS_ERR(task)) { | 1197 | if (IS_ERR(task)) { |
1199 | printk(KERN_ERR "%s: kthread_run: %ld\n", | 1198 | printk(KERN_ERR "%s: kthread_run: %ld\n", |
1200 | __func__, PTR_ERR(task)); | 1199 | __func__, PTR_ERR(task)); |
@@ -1373,13 +1372,13 @@ static int nfs4_reclaim_locks(struct nfs4_state *state, const struct nfs4_state_ | |||
1373 | /* Guard against delegation returns and new lock/unlock calls */ | 1372 | /* Guard against delegation returns and new lock/unlock calls */ |
1374 | down_write(&nfsi->rwsem); | 1373 | down_write(&nfsi->rwsem); |
1375 | /* Protect inode->i_flock using the BKL */ | 1374 | /* Protect inode->i_flock using the BKL */ |
1376 | lock_flocks(); | 1375 | spin_lock(&inode->i_lock); |
1377 | for (fl = inode->i_flock; fl != NULL; fl = fl->fl_next) { | 1376 | for (fl = inode->i_flock; fl != NULL; fl = fl->fl_next) { |
1378 | if (!(fl->fl_flags & (FL_POSIX|FL_FLOCK))) | 1377 | if (!(fl->fl_flags & (FL_POSIX|FL_FLOCK))) |
1379 | continue; | 1378 | continue; |
1380 | if (nfs_file_open_context(fl->fl_file)->state != state) | 1379 | if (nfs_file_open_context(fl->fl_file)->state != state) |
1381 | continue; | 1380 | continue; |
1382 | unlock_flocks(); | 1381 | spin_unlock(&inode->i_lock); |
1383 | status = ops->recover_lock(state, fl); | 1382 | status = ops->recover_lock(state, fl); |
1384 | switch (status) { | 1383 | switch (status) { |
1385 | case 0: | 1384 | case 0: |
@@ -1406,9 +1405,9 @@ static int nfs4_reclaim_locks(struct nfs4_state *state, const struct nfs4_state_ | |||
1406 | /* kill_proc(fl->fl_pid, SIGLOST, 1); */ | 1405 | /* kill_proc(fl->fl_pid, SIGLOST, 1); */ |
1407 | status = 0; | 1406 | status = 0; |
1408 | } | 1407 | } |
1409 | lock_flocks(); | 1408 | spin_lock(&inode->i_lock); |
1410 | } | 1409 | } |
1411 | unlock_flocks(); | 1410 | spin_unlock(&inode->i_lock); |
1412 | out: | 1411 | out: |
1413 | up_write(&nfsi->rwsem); | 1412 | up_write(&nfsi->rwsem); |
1414 | return status; | 1413 | return status; |
@@ -1563,11 +1562,12 @@ static void nfs4_state_start_reclaim_reboot(struct nfs_client *clp) | |||
1563 | } | 1562 | } |
1564 | 1563 | ||
1565 | static void nfs4_reclaim_complete(struct nfs_client *clp, | 1564 | static void nfs4_reclaim_complete(struct nfs_client *clp, |
1566 | const struct nfs4_state_recovery_ops *ops) | 1565 | const struct nfs4_state_recovery_ops *ops, |
1566 | struct rpc_cred *cred) | ||
1567 | { | 1567 | { |
1568 | /* Notify the server we're done reclaiming our state */ | 1568 | /* Notify the server we're done reclaiming our state */ |
1569 | if (ops->reclaim_complete) | 1569 | if (ops->reclaim_complete) |
1570 | (void)ops->reclaim_complete(clp); | 1570 | (void)ops->reclaim_complete(clp, cred); |
1571 | } | 1571 | } |
1572 | 1572 | ||
1573 | static void nfs4_clear_reclaim_server(struct nfs_server *server) | 1573 | static void nfs4_clear_reclaim_server(struct nfs_server *server) |
@@ -1612,9 +1612,15 @@ static int nfs4_state_clear_reclaim_reboot(struct nfs_client *clp) | |||
1612 | 1612 | ||
1613 | static void nfs4_state_end_reclaim_reboot(struct nfs_client *clp) | 1613 | static void nfs4_state_end_reclaim_reboot(struct nfs_client *clp) |
1614 | { | 1614 | { |
1615 | const struct nfs4_state_recovery_ops *ops; | ||
1616 | struct rpc_cred *cred; | ||
1617 | |||
1615 | if (!nfs4_state_clear_reclaim_reboot(clp)) | 1618 | if (!nfs4_state_clear_reclaim_reboot(clp)) |
1616 | return; | 1619 | return; |
1617 | nfs4_reclaim_complete(clp, clp->cl_mvops->reboot_recovery_ops); | 1620 | ops = clp->cl_mvops->reboot_recovery_ops; |
1621 | cred = ops->get_clid_cred(clp); | ||
1622 | nfs4_reclaim_complete(clp, ops, cred); | ||
1623 | put_rpccred(cred); | ||
1618 | } | 1624 | } |
1619 | 1625 | ||
1620 | static void nfs_delegation_clear_all(struct nfs_client *clp) | 1626 | static void nfs_delegation_clear_all(struct nfs_client *clp) |
diff --git a/fs/nfs/nfs4super.c b/fs/nfs/nfs4super.c index a5e1a3026d48..5dbe2d269210 100644 --- a/fs/nfs/nfs4super.c +++ b/fs/nfs/nfs4super.c | |||
@@ -9,6 +9,7 @@ | |||
9 | #include "delegation.h" | 9 | #include "delegation.h" |
10 | #include "internal.h" | 10 | #include "internal.h" |
11 | #include "nfs4_fs.h" | 11 | #include "nfs4_fs.h" |
12 | #include "dns_resolve.h" | ||
12 | #include "pnfs.h" | 13 | #include "pnfs.h" |
13 | #include "nfs.h" | 14 | #include "nfs.h" |
14 | 15 | ||
@@ -331,18 +332,24 @@ static int __init init_nfs_v4(void) | |||
331 | { | 332 | { |
332 | int err; | 333 | int err; |
333 | 334 | ||
334 | err = nfs_idmap_init(); | 335 | err = nfs_dns_resolver_init(); |
335 | if (err) | 336 | if (err) |
336 | goto out; | 337 | goto out; |
337 | 338 | ||
338 | err = nfs4_register_sysctl(); | 339 | err = nfs_idmap_init(); |
339 | if (err) | 340 | if (err) |
340 | goto out1; | 341 | goto out1; |
341 | 342 | ||
343 | err = nfs4_register_sysctl(); | ||
344 | if (err) | ||
345 | goto out2; | ||
346 | |||
342 | register_nfs_version(&nfs_v4); | 347 | register_nfs_version(&nfs_v4); |
343 | return 0; | 348 | return 0; |
344 | out1: | 349 | out2: |
345 | nfs_idmap_quit(); | 350 | nfs_idmap_quit(); |
351 | out1: | ||
352 | nfs_dns_resolver_destroy(); | ||
346 | out: | 353 | out: |
347 | return err; | 354 | return err; |
348 | } | 355 | } |
@@ -352,6 +359,7 @@ static void __exit exit_nfs_v4(void) | |||
352 | unregister_nfs_version(&nfs_v4); | 359 | unregister_nfs_version(&nfs_v4); |
353 | nfs4_unregister_sysctl(); | 360 | nfs4_unregister_sysctl(); |
354 | nfs_idmap_quit(); | 361 | nfs_idmap_quit(); |
362 | nfs_dns_resolver_destroy(); | ||
355 | } | 363 | } |
356 | 364 | ||
357 | MODULE_LICENSE("GPL"); | 365 | MODULE_LICENSE("GPL"); |
diff --git a/fs/nfs/nfs4xdr.c b/fs/nfs/nfs4xdr.c index 4be8d135ed61..3850b018815f 100644 --- a/fs/nfs/nfs4xdr.c +++ b/fs/nfs/nfs4xdr.c | |||
@@ -102,12 +102,23 @@ static int nfs4_stat_to_errno(int); | |||
102 | #define nfs4_path_maxsz (1 + ((3 + NFS4_MAXPATHLEN) >> 2)) | 102 | #define nfs4_path_maxsz (1 + ((3 + NFS4_MAXPATHLEN) >> 2)) |
103 | #define nfs4_owner_maxsz (1 + XDR_QUADLEN(IDMAP_NAMESZ)) | 103 | #define nfs4_owner_maxsz (1 + XDR_QUADLEN(IDMAP_NAMESZ)) |
104 | #define nfs4_group_maxsz (1 + XDR_QUADLEN(IDMAP_NAMESZ)) | 104 | #define nfs4_group_maxsz (1 + XDR_QUADLEN(IDMAP_NAMESZ)) |
105 | #ifdef CONFIG_NFS_V4_SECURITY_LABEL | ||
106 | /* PI(4 bytes) + LFS(4 bytes) + 1(for null terminator?) + MAXLABELLEN */ | ||
107 | #define nfs4_label_maxsz (4 + 4 + 1 + XDR_QUADLEN(NFS4_MAXLABELLEN)) | ||
108 | #define encode_readdir_space 24 | ||
109 | #define encode_readdir_bitmask_sz 3 | ||
110 | #else | ||
111 | #define nfs4_label_maxsz 0 | ||
112 | #define encode_readdir_space 20 | ||
113 | #define encode_readdir_bitmask_sz 2 | ||
114 | #endif | ||
105 | /* We support only one layout type per file system */ | 115 | /* We support only one layout type per file system */ |
106 | #define decode_mdsthreshold_maxsz (1 + 1 + nfs4_fattr_bitmap_maxsz + 1 + 8) | 116 | #define decode_mdsthreshold_maxsz (1 + 1 + nfs4_fattr_bitmap_maxsz + 1 + 8) |
107 | /* This is based on getfattr, which uses the most attributes: */ | 117 | /* This is based on getfattr, which uses the most attributes: */ |
108 | #define nfs4_fattr_value_maxsz (1 + (1 + 2 + 2 + 4 + 2 + 1 + 1 + 2 + 2 + \ | 118 | #define nfs4_fattr_value_maxsz (1 + (1 + 2 + 2 + 4 + 2 + 1 + 1 + 2 + 2 + \ |
109 | 3 + 3 + 3 + nfs4_owner_maxsz + \ | 119 | 3 + 3 + 3 + nfs4_owner_maxsz + \ |
110 | nfs4_group_maxsz + decode_mdsthreshold_maxsz)) | 120 | nfs4_group_maxsz + nfs4_label_maxsz + \ |
121 | decode_mdsthreshold_maxsz)) | ||
111 | #define nfs4_fattr_maxsz (nfs4_fattr_bitmap_maxsz + \ | 122 | #define nfs4_fattr_maxsz (nfs4_fattr_bitmap_maxsz + \ |
112 | nfs4_fattr_value_maxsz) | 123 | nfs4_fattr_value_maxsz) |
113 | #define decode_getattr_maxsz (op_decode_hdr_maxsz + nfs4_fattr_maxsz) | 124 | #define decode_getattr_maxsz (op_decode_hdr_maxsz + nfs4_fattr_maxsz) |
@@ -115,6 +126,7 @@ static int nfs4_stat_to_errno(int); | |||
115 | 1 + 2 + 1 + \ | 126 | 1 + 2 + 1 + \ |
116 | nfs4_owner_maxsz + \ | 127 | nfs4_owner_maxsz + \ |
117 | nfs4_group_maxsz + \ | 128 | nfs4_group_maxsz + \ |
129 | nfs4_label_maxsz + \ | ||
118 | 4 + 4) | 130 | 4 + 4) |
119 | #define encode_savefh_maxsz (op_encode_hdr_maxsz) | 131 | #define encode_savefh_maxsz (op_encode_hdr_maxsz) |
120 | #define decode_savefh_maxsz (op_decode_hdr_maxsz) | 132 | #define decode_savefh_maxsz (op_decode_hdr_maxsz) |
@@ -192,9 +204,11 @@ static int nfs4_stat_to_errno(int); | |||
192 | encode_stateid_maxsz + 3) | 204 | encode_stateid_maxsz + 3) |
193 | #define decode_read_maxsz (op_decode_hdr_maxsz + 2) | 205 | #define decode_read_maxsz (op_decode_hdr_maxsz + 2) |
194 | #define encode_readdir_maxsz (op_encode_hdr_maxsz + \ | 206 | #define encode_readdir_maxsz (op_encode_hdr_maxsz + \ |
195 | 2 + encode_verifier_maxsz + 5) | 207 | 2 + encode_verifier_maxsz + 5 + \ |
208 | nfs4_label_maxsz) | ||
196 | #define decode_readdir_maxsz (op_decode_hdr_maxsz + \ | 209 | #define decode_readdir_maxsz (op_decode_hdr_maxsz + \ |
197 | decode_verifier_maxsz) | 210 | decode_verifier_maxsz + \ |
211 | nfs4_label_maxsz + nfs4_fattr_maxsz) | ||
198 | #define encode_readlink_maxsz (op_encode_hdr_maxsz) | 212 | #define encode_readlink_maxsz (op_encode_hdr_maxsz) |
199 | #define decode_readlink_maxsz (op_decode_hdr_maxsz + 1) | 213 | #define decode_readlink_maxsz (op_decode_hdr_maxsz + 1) |
200 | #define encode_write_maxsz (op_encode_hdr_maxsz + \ | 214 | #define encode_write_maxsz (op_encode_hdr_maxsz + \ |
@@ -853,6 +867,12 @@ const u32 nfs41_maxread_overhead = ((RPC_MAX_HEADER_WITH_AUTH + | |||
853 | decode_sequence_maxsz + | 867 | decode_sequence_maxsz + |
854 | decode_putfh_maxsz) * | 868 | decode_putfh_maxsz) * |
855 | XDR_UNIT); | 869 | XDR_UNIT); |
870 | |||
871 | const u32 nfs41_maxgetdevinfo_overhead = ((RPC_MAX_REPHEADER_WITH_AUTH + | ||
872 | compound_decode_hdr_maxsz + | ||
873 | decode_sequence_maxsz) * | ||
874 | XDR_UNIT); | ||
875 | EXPORT_SYMBOL_GPL(nfs41_maxgetdevinfo_overhead); | ||
856 | #endif /* CONFIG_NFS_V4_1 */ | 876 | #endif /* CONFIG_NFS_V4_1 */ |
857 | 877 | ||
858 | static const umode_t nfs_type2fmt[] = { | 878 | static const umode_t nfs_type2fmt[] = { |
@@ -968,7 +988,9 @@ static void encode_nfs4_verifier(struct xdr_stream *xdr, const nfs4_verifier *ve | |||
968 | encode_opaque_fixed(xdr, verf->data, NFS4_VERIFIER_SIZE); | 988 | encode_opaque_fixed(xdr, verf->data, NFS4_VERIFIER_SIZE); |
969 | } | 989 | } |
970 | 990 | ||
971 | static void encode_attrs(struct xdr_stream *xdr, const struct iattr *iap, const struct nfs_server *server) | 991 | static void encode_attrs(struct xdr_stream *xdr, const struct iattr *iap, |
992 | const struct nfs4_label *label, | ||
993 | const struct nfs_server *server) | ||
972 | { | 994 | { |
973 | char owner_name[IDMAP_NAMESZ]; | 995 | char owner_name[IDMAP_NAMESZ]; |
974 | char owner_group[IDMAP_NAMESZ]; | 996 | char owner_group[IDMAP_NAMESZ]; |
@@ -977,17 +999,19 @@ static void encode_attrs(struct xdr_stream *xdr, const struct iattr *iap, const | |||
977 | __be32 *p; | 999 | __be32 *p; |
978 | __be32 *q; | 1000 | __be32 *q; |
979 | int len; | 1001 | int len; |
1002 | uint32_t bmval_len = 2; | ||
980 | uint32_t bmval0 = 0; | 1003 | uint32_t bmval0 = 0; |
981 | uint32_t bmval1 = 0; | 1004 | uint32_t bmval1 = 0; |
1005 | uint32_t bmval2 = 0; | ||
982 | 1006 | ||
983 | /* | 1007 | /* |
984 | * We reserve enough space to write the entire attribute buffer at once. | 1008 | * We reserve enough space to write the entire attribute buffer at once. |
985 | * In the worst-case, this would be | 1009 | * In the worst-case, this would be |
986 | * 12(bitmap) + 4(attrlen) + 8(size) + 4(mode) + 4(atime) + 4(mtime) | 1010 | * 16(bitmap) + 4(attrlen) + 8(size) + 4(mode) + 4(atime) + 4(mtime) |
987 | * = 36 bytes, plus any contribution from variable-length fields | 1011 | * = 40 bytes, plus any contribution from variable-length fields |
988 | * such as owner/group. | 1012 | * such as owner/group. |
989 | */ | 1013 | */ |
990 | len = 16; | 1014 | len = 8; |
991 | 1015 | ||
992 | /* Sigh */ | 1016 | /* Sigh */ |
993 | if (iap->ia_valid & ATTR_SIZE) | 1017 | if (iap->ia_valid & ATTR_SIZE) |
@@ -1025,15 +1049,22 @@ static void encode_attrs(struct xdr_stream *xdr, const struct iattr *iap, const | |||
1025 | len += 16; | 1049 | len += 16; |
1026 | else if (iap->ia_valid & ATTR_MTIME) | 1050 | else if (iap->ia_valid & ATTR_MTIME) |
1027 | len += 4; | 1051 | len += 4; |
1052 | if (label) { | ||
1053 | len += 4 + 4 + 4 + (XDR_QUADLEN(label->len) << 2); | ||
1054 | bmval_len = 3; | ||
1055 | } | ||
1056 | |||
1057 | len += bmval_len << 2; | ||
1028 | p = reserve_space(xdr, len); | 1058 | p = reserve_space(xdr, len); |
1029 | 1059 | ||
1030 | /* | 1060 | /* |
1031 | * We write the bitmap length now, but leave the bitmap and the attribute | 1061 | * We write the bitmap length now, but leave the bitmap and the attribute |
1032 | * buffer length to be backfilled at the end of this routine. | 1062 | * buffer length to be backfilled at the end of this routine. |
1033 | */ | 1063 | */ |
1034 | *p++ = cpu_to_be32(2); | 1064 | *p++ = cpu_to_be32(bmval_len); |
1035 | q = p; | 1065 | q = p; |
1036 | p += 3; | 1066 | /* Skip bitmap entries + attrlen */ |
1067 | p += bmval_len + 1; | ||
1037 | 1068 | ||
1038 | if (iap->ia_valid & ATTR_SIZE) { | 1069 | if (iap->ia_valid & ATTR_SIZE) { |
1039 | bmval0 |= FATTR4_WORD0_SIZE; | 1070 | bmval0 |= FATTR4_WORD0_SIZE; |
@@ -1071,6 +1102,13 @@ static void encode_attrs(struct xdr_stream *xdr, const struct iattr *iap, const | |||
1071 | bmval1 |= FATTR4_WORD1_TIME_MODIFY_SET; | 1102 | bmval1 |= FATTR4_WORD1_TIME_MODIFY_SET; |
1072 | *p++ = cpu_to_be32(NFS4_SET_TO_SERVER_TIME); | 1103 | *p++ = cpu_to_be32(NFS4_SET_TO_SERVER_TIME); |
1073 | } | 1104 | } |
1105 | if (label) { | ||
1106 | bmval2 |= FATTR4_WORD2_SECURITY_LABEL; | ||
1107 | *p++ = cpu_to_be32(label->lfs); | ||
1108 | *p++ = cpu_to_be32(label->pi); | ||
1109 | *p++ = cpu_to_be32(label->len); | ||
1110 | p = xdr_encode_opaque_fixed(p, label->label, label->len); | ||
1111 | } | ||
1074 | 1112 | ||
1075 | /* | 1113 | /* |
1076 | * Now we backfill the bitmap and the attribute buffer length. | 1114 | * Now we backfill the bitmap and the attribute buffer length. |
@@ -1080,9 +1118,11 @@ static void encode_attrs(struct xdr_stream *xdr, const struct iattr *iap, const | |||
1080 | len, ((char *)p - (char *)q) + 4); | 1118 | len, ((char *)p - (char *)q) + 4); |
1081 | BUG(); | 1119 | BUG(); |
1082 | } | 1120 | } |
1083 | len = (char *)p - (char *)q - 12; | ||
1084 | *q++ = htonl(bmval0); | 1121 | *q++ = htonl(bmval0); |
1085 | *q++ = htonl(bmval1); | 1122 | *q++ = htonl(bmval1); |
1123 | if (bmval_len == 3) | ||
1124 | *q++ = htonl(bmval2); | ||
1125 | len = (char *)p - (char *)(q + 1); | ||
1086 | *q = htonl(len); | 1126 | *q = htonl(len); |
1087 | 1127 | ||
1088 | /* out: */ | 1128 | /* out: */ |
@@ -1136,7 +1176,7 @@ static void encode_create(struct xdr_stream *xdr, const struct nfs4_create_arg * | |||
1136 | } | 1176 | } |
1137 | 1177 | ||
1138 | encode_string(xdr, create->name->len, create->name->name); | 1178 | encode_string(xdr, create->name->len, create->name->name); |
1139 | encode_attrs(xdr, create->attrs, create->server); | 1179 | encode_attrs(xdr, create->attrs, create->label, create->server); |
1140 | } | 1180 | } |
1141 | 1181 | ||
1142 | static void encode_getattr_one(struct xdr_stream *xdr, uint32_t bitmap, struct compound_hdr *hdr) | 1182 | static void encode_getattr_one(struct xdr_stream *xdr, uint32_t bitmap, struct compound_hdr *hdr) |
@@ -1188,8 +1228,10 @@ encode_getattr_three(struct xdr_stream *xdr, | |||
1188 | 1228 | ||
1189 | static void encode_getfattr(struct xdr_stream *xdr, const u32* bitmask, struct compound_hdr *hdr) | 1229 | static void encode_getfattr(struct xdr_stream *xdr, const u32* bitmask, struct compound_hdr *hdr) |
1190 | { | 1230 | { |
1191 | encode_getattr_two(xdr, bitmask[0] & nfs4_fattr_bitmap[0], | 1231 | encode_getattr_three(xdr, bitmask[0] & nfs4_fattr_bitmap[0], |
1192 | bitmask[1] & nfs4_fattr_bitmap[1], hdr); | 1232 | bitmask[1] & nfs4_fattr_bitmap[1], |
1233 | bitmask[2] & nfs4_fattr_bitmap[2], | ||
1234 | hdr); | ||
1193 | } | 1235 | } |
1194 | 1236 | ||
1195 | static void encode_getfattr_open(struct xdr_stream *xdr, const u32 *bitmask, | 1237 | static void encode_getfattr_open(struct xdr_stream *xdr, const u32 *bitmask, |
@@ -1367,11 +1409,11 @@ static inline void encode_createmode(struct xdr_stream *xdr, const struct nfs_op | |||
1367 | switch(arg->createmode) { | 1409 | switch(arg->createmode) { |
1368 | case NFS4_CREATE_UNCHECKED: | 1410 | case NFS4_CREATE_UNCHECKED: |
1369 | *p = cpu_to_be32(NFS4_CREATE_UNCHECKED); | 1411 | *p = cpu_to_be32(NFS4_CREATE_UNCHECKED); |
1370 | encode_attrs(xdr, arg->u.attrs, arg->server); | 1412 | encode_attrs(xdr, arg->u.attrs, arg->label, arg->server); |
1371 | break; | 1413 | break; |
1372 | case NFS4_CREATE_GUARDED: | 1414 | case NFS4_CREATE_GUARDED: |
1373 | *p = cpu_to_be32(NFS4_CREATE_GUARDED); | 1415 | *p = cpu_to_be32(NFS4_CREATE_GUARDED); |
1374 | encode_attrs(xdr, arg->u.attrs, arg->server); | 1416 | encode_attrs(xdr, arg->u.attrs, arg->label, arg->server); |
1375 | break; | 1417 | break; |
1376 | case NFS4_CREATE_EXCLUSIVE: | 1418 | case NFS4_CREATE_EXCLUSIVE: |
1377 | *p = cpu_to_be32(NFS4_CREATE_EXCLUSIVE); | 1419 | *p = cpu_to_be32(NFS4_CREATE_EXCLUSIVE); |
@@ -1381,7 +1423,7 @@ static inline void encode_createmode(struct xdr_stream *xdr, const struct nfs_op | |||
1381 | *p = cpu_to_be32(NFS4_CREATE_EXCLUSIVE4_1); | 1423 | *p = cpu_to_be32(NFS4_CREATE_EXCLUSIVE4_1); |
1382 | encode_nfs4_verifier(xdr, &arg->u.verifier); | 1424 | encode_nfs4_verifier(xdr, &arg->u.verifier); |
1383 | dummy.ia_valid = 0; | 1425 | dummy.ia_valid = 0; |
1384 | encode_attrs(xdr, &dummy, arg->server); | 1426 | encode_attrs(xdr, &dummy, arg->label, arg->server); |
1385 | } | 1427 | } |
1386 | } | 1428 | } |
1387 | 1429 | ||
@@ -1532,7 +1574,7 @@ static void encode_read(struct xdr_stream *xdr, const struct nfs_readargs *args, | |||
1532 | 1574 | ||
1533 | static void encode_readdir(struct xdr_stream *xdr, const struct nfs4_readdir_arg *readdir, struct rpc_rqst *req, struct compound_hdr *hdr) | 1575 | static void encode_readdir(struct xdr_stream *xdr, const struct nfs4_readdir_arg *readdir, struct rpc_rqst *req, struct compound_hdr *hdr) |
1534 | { | 1576 | { |
1535 | uint32_t attrs[2] = { | 1577 | uint32_t attrs[3] = { |
1536 | FATTR4_WORD0_RDATTR_ERROR, | 1578 | FATTR4_WORD0_RDATTR_ERROR, |
1537 | FATTR4_WORD1_MOUNTED_ON_FILEID, | 1579 | FATTR4_WORD1_MOUNTED_ON_FILEID, |
1538 | }; | 1580 | }; |
@@ -1555,20 +1597,26 @@ static void encode_readdir(struct xdr_stream *xdr, const struct nfs4_readdir_arg | |||
1555 | encode_op_hdr(xdr, OP_READDIR, decode_readdir_maxsz, hdr); | 1597 | encode_op_hdr(xdr, OP_READDIR, decode_readdir_maxsz, hdr); |
1556 | encode_uint64(xdr, readdir->cookie); | 1598 | encode_uint64(xdr, readdir->cookie); |
1557 | encode_nfs4_verifier(xdr, &readdir->verifier); | 1599 | encode_nfs4_verifier(xdr, &readdir->verifier); |
1558 | p = reserve_space(xdr, 20); | 1600 | p = reserve_space(xdr, encode_readdir_space); |
1559 | *p++ = cpu_to_be32(dircount); | 1601 | *p++ = cpu_to_be32(dircount); |
1560 | *p++ = cpu_to_be32(readdir->count); | 1602 | *p++ = cpu_to_be32(readdir->count); |
1561 | *p++ = cpu_to_be32(2); | 1603 | *p++ = cpu_to_be32(encode_readdir_bitmask_sz); |
1562 | |||
1563 | *p++ = cpu_to_be32(attrs[0] & readdir->bitmask[0]); | 1604 | *p++ = cpu_to_be32(attrs[0] & readdir->bitmask[0]); |
1564 | *p = cpu_to_be32(attrs[1] & readdir->bitmask[1]); | 1605 | *p = cpu_to_be32(attrs[1] & readdir->bitmask[1]); |
1606 | if (encode_readdir_bitmask_sz > 2) { | ||
1607 | if (hdr->minorversion > 1) | ||
1608 | attrs[2] |= FATTR4_WORD2_SECURITY_LABEL; | ||
1609 | p++, *p++ = cpu_to_be32(attrs[2] & readdir->bitmask[2]); | ||
1610 | } | ||
1565 | memcpy(verf, readdir->verifier.data, sizeof(verf)); | 1611 | memcpy(verf, readdir->verifier.data, sizeof(verf)); |
1566 | dprintk("%s: cookie = %Lu, verifier = %08x:%08x, bitmap = %08x:%08x\n", | 1612 | |
1613 | dprintk("%s: cookie = %llu, verifier = %08x:%08x, bitmap = %08x:%08x:%08x\n", | ||
1567 | __func__, | 1614 | __func__, |
1568 | (unsigned long long)readdir->cookie, | 1615 | (unsigned long long)readdir->cookie, |
1569 | verf[0], verf[1], | 1616 | verf[0], verf[1], |
1570 | attrs[0] & readdir->bitmask[0], | 1617 | attrs[0] & readdir->bitmask[0], |
1571 | attrs[1] & readdir->bitmask[1]); | 1618 | attrs[1] & readdir->bitmask[1], |
1619 | attrs[2] & readdir->bitmask[2]); | ||
1572 | } | 1620 | } |
1573 | 1621 | ||
1574 | static void encode_readlink(struct xdr_stream *xdr, const struct nfs4_readlink *readlink, struct rpc_rqst *req, struct compound_hdr *hdr) | 1622 | static void encode_readlink(struct xdr_stream *xdr, const struct nfs4_readlink *readlink, struct rpc_rqst *req, struct compound_hdr *hdr) |
@@ -1627,7 +1675,7 @@ static void encode_setattr(struct xdr_stream *xdr, const struct nfs_setattrargs | |||
1627 | { | 1675 | { |
1628 | encode_op_hdr(xdr, OP_SETATTR, decode_setattr_maxsz, hdr); | 1676 | encode_op_hdr(xdr, OP_SETATTR, decode_setattr_maxsz, hdr); |
1629 | encode_nfs4_stateid(xdr, &arg->stateid); | 1677 | encode_nfs4_stateid(xdr, &arg->stateid); |
1630 | encode_attrs(xdr, arg->iap, server); | 1678 | encode_attrs(xdr, arg->iap, arg->label, server); |
1631 | } | 1679 | } |
1632 | 1680 | ||
1633 | static void encode_setclientid(struct xdr_stream *xdr, const struct nfs4_setclientid *setclientid, struct compound_hdr *hdr) | 1681 | static void encode_setclientid(struct xdr_stream *xdr, const struct nfs4_setclientid *setclientid, struct compound_hdr *hdr) |
@@ -1889,7 +1937,7 @@ encode_getdeviceinfo(struct xdr_stream *xdr, | |||
1889 | p = xdr_encode_opaque_fixed(p, args->pdev->dev_id.data, | 1937 | p = xdr_encode_opaque_fixed(p, args->pdev->dev_id.data, |
1890 | NFS4_DEVICEID4_SIZE); | 1938 | NFS4_DEVICEID4_SIZE); |
1891 | *p++ = cpu_to_be32(args->pdev->layout_type); | 1939 | *p++ = cpu_to_be32(args->pdev->layout_type); |
1892 | *p++ = cpu_to_be32(args->pdev->pglen); /* gdia_maxcount */ | 1940 | *p++ = cpu_to_be32(args->pdev->maxcount); /* gdia_maxcount */ |
1893 | *p++ = cpu_to_be32(0); /* bitmap length 0 */ | 1941 | *p++ = cpu_to_be32(0); /* bitmap length 0 */ |
1894 | } | 1942 | } |
1895 | 1943 | ||
@@ -4038,6 +4086,56 @@ static int decode_attr_time_delta(struct xdr_stream *xdr, uint32_t *bitmap, | |||
4038 | return status; | 4086 | return status; |
4039 | } | 4087 | } |
4040 | 4088 | ||
4089 | static int decode_attr_security_label(struct xdr_stream *xdr, uint32_t *bitmap, | ||
4090 | struct nfs4_label *label) | ||
4091 | { | ||
4092 | uint32_t pi = 0; | ||
4093 | uint32_t lfs = 0; | ||
4094 | __u32 len; | ||
4095 | __be32 *p; | ||
4096 | int status = 0; | ||
4097 | |||
4098 | if (unlikely(bitmap[2] & (FATTR4_WORD2_SECURITY_LABEL - 1U))) | ||
4099 | return -EIO; | ||
4100 | if (likely(bitmap[2] & FATTR4_WORD2_SECURITY_LABEL)) { | ||
4101 | p = xdr_inline_decode(xdr, 4); | ||
4102 | if (unlikely(!p)) | ||
4103 | goto out_overflow; | ||
4104 | lfs = be32_to_cpup(p++); | ||
4105 | p = xdr_inline_decode(xdr, 4); | ||
4106 | if (unlikely(!p)) | ||
4107 | goto out_overflow; | ||
4108 | pi = be32_to_cpup(p++); | ||
4109 | p = xdr_inline_decode(xdr, 4); | ||
4110 | if (unlikely(!p)) | ||
4111 | goto out_overflow; | ||
4112 | len = be32_to_cpup(p++); | ||
4113 | p = xdr_inline_decode(xdr, len); | ||
4114 | if (unlikely(!p)) | ||
4115 | goto out_overflow; | ||
4116 | if (len < NFS4_MAXLABELLEN) { | ||
4117 | if (label) { | ||
4118 | memcpy(label->label, p, len); | ||
4119 | label->len = len; | ||
4120 | label->pi = pi; | ||
4121 | label->lfs = lfs; | ||
4122 | status = NFS_ATTR_FATTR_V4_SECURITY_LABEL; | ||
4123 | } | ||
4124 | bitmap[2] &= ~FATTR4_WORD2_SECURITY_LABEL; | ||
4125 | } else | ||
4126 | printk(KERN_WARNING "%s: label too long (%u)!\n", | ||
4127 | __func__, len); | ||
4128 | } | ||
4129 | if (label && label->label) | ||
4130 | dprintk("%s: label=%s, len=%d, PI=%d, LFS=%d\n", __func__, | ||
4131 | (char *)label->label, label->len, label->pi, label->lfs); | ||
4132 | return status; | ||
4133 | |||
4134 | out_overflow: | ||
4135 | print_overflow_msg(__func__, xdr); | ||
4136 | return -EIO; | ||
4137 | } | ||
4138 | |||
4041 | static int decode_attr_time_modify(struct xdr_stream *xdr, uint32_t *bitmap, struct timespec *time) | 4139 | static int decode_attr_time_modify(struct xdr_stream *xdr, uint32_t *bitmap, struct timespec *time) |
4042 | { | 4140 | { |
4043 | int status = 0; | 4141 | int status = 0; |
@@ -4380,7 +4478,7 @@ out_overflow: | |||
4380 | 4478 | ||
4381 | static int decode_getfattr_attrs(struct xdr_stream *xdr, uint32_t *bitmap, | 4479 | static int decode_getfattr_attrs(struct xdr_stream *xdr, uint32_t *bitmap, |
4382 | struct nfs_fattr *fattr, struct nfs_fh *fh, | 4480 | struct nfs_fattr *fattr, struct nfs_fh *fh, |
4383 | struct nfs4_fs_locations *fs_loc, | 4481 | struct nfs4_fs_locations *fs_loc, struct nfs4_label *label, |
4384 | const struct nfs_server *server) | 4482 | const struct nfs_server *server) |
4385 | { | 4483 | { |
4386 | int status; | 4484 | int status; |
@@ -4488,6 +4586,13 @@ static int decode_getfattr_attrs(struct xdr_stream *xdr, uint32_t *bitmap, | |||
4488 | if (status < 0) | 4586 | if (status < 0) |
4489 | goto xdr_error; | 4587 | goto xdr_error; |
4490 | 4588 | ||
4589 | if (label) { | ||
4590 | status = decode_attr_security_label(xdr, bitmap, label); | ||
4591 | if (status < 0) | ||
4592 | goto xdr_error; | ||
4593 | fattr->valid |= status; | ||
4594 | } | ||
4595 | |||
4491 | xdr_error: | 4596 | xdr_error: |
4492 | dprintk("%s: xdr returned %d\n", __func__, -status); | 4597 | dprintk("%s: xdr returned %d\n", __func__, -status); |
4493 | return status; | 4598 | return status; |
@@ -4495,7 +4600,7 @@ xdr_error: | |||
4495 | 4600 | ||
4496 | static int decode_getfattr_generic(struct xdr_stream *xdr, struct nfs_fattr *fattr, | 4601 | static int decode_getfattr_generic(struct xdr_stream *xdr, struct nfs_fattr *fattr, |
4497 | struct nfs_fh *fh, struct nfs4_fs_locations *fs_loc, | 4602 | struct nfs_fh *fh, struct nfs4_fs_locations *fs_loc, |
4498 | const struct nfs_server *server) | 4603 | struct nfs4_label *label, const struct nfs_server *server) |
4499 | { | 4604 | { |
4500 | unsigned int savep; | 4605 | unsigned int savep; |
4501 | uint32_t attrlen, | 4606 | uint32_t attrlen, |
@@ -4514,7 +4619,8 @@ static int decode_getfattr_generic(struct xdr_stream *xdr, struct nfs_fattr *fat | |||
4514 | if (status < 0) | 4619 | if (status < 0) |
4515 | goto xdr_error; | 4620 | goto xdr_error; |
4516 | 4621 | ||
4517 | status = decode_getfattr_attrs(xdr, bitmap, fattr, fh, fs_loc, server); | 4622 | status = decode_getfattr_attrs(xdr, bitmap, fattr, fh, fs_loc, |
4623 | label, server); | ||
4518 | if (status < 0) | 4624 | if (status < 0) |
4519 | goto xdr_error; | 4625 | goto xdr_error; |
4520 | 4626 | ||
@@ -4524,10 +4630,16 @@ xdr_error: | |||
4524 | return status; | 4630 | return status; |
4525 | } | 4631 | } |
4526 | 4632 | ||
4633 | static int decode_getfattr_label(struct xdr_stream *xdr, struct nfs_fattr *fattr, | ||
4634 | struct nfs4_label *label, const struct nfs_server *server) | ||
4635 | { | ||
4636 | return decode_getfattr_generic(xdr, fattr, NULL, NULL, label, server); | ||
4637 | } | ||
4638 | |||
4527 | static int decode_getfattr(struct xdr_stream *xdr, struct nfs_fattr *fattr, | 4639 | static int decode_getfattr(struct xdr_stream *xdr, struct nfs_fattr *fattr, |
4528 | const struct nfs_server *server) | 4640 | const struct nfs_server *server) |
4529 | { | 4641 | { |
4530 | return decode_getfattr_generic(xdr, fattr, NULL, NULL, server); | 4642 | return decode_getfattr_generic(xdr, fattr, NULL, NULL, NULL, server); |
4531 | } | 4643 | } |
4532 | 4644 | ||
4533 | /* | 4645 | /* |
@@ -5919,7 +6031,7 @@ static int nfs4_xdr_dec_lookup(struct rpc_rqst *rqstp, struct xdr_stream *xdr, | |||
5919 | status = decode_getfh(xdr, res->fh); | 6031 | status = decode_getfh(xdr, res->fh); |
5920 | if (status) | 6032 | if (status) |
5921 | goto out; | 6033 | goto out; |
5922 | status = decode_getfattr(xdr, res->fattr, res->server); | 6034 | status = decode_getfattr_label(xdr, res->fattr, res->label, res->server); |
5923 | out: | 6035 | out: |
5924 | return status; | 6036 | return status; |
5925 | } | 6037 | } |
@@ -5945,7 +6057,8 @@ static int nfs4_xdr_dec_lookup_root(struct rpc_rqst *rqstp, | |||
5945 | goto out; | 6057 | goto out; |
5946 | status = decode_getfh(xdr, res->fh); | 6058 | status = decode_getfh(xdr, res->fh); |
5947 | if (status == 0) | 6059 | if (status == 0) |
5948 | status = decode_getfattr(xdr, res->fattr, res->server); | 6060 | status = decode_getfattr_label(xdr, res->fattr, |
6061 | res->label, res->server); | ||
5949 | out: | 6062 | out: |
5950 | return status; | 6063 | return status; |
5951 | } | 6064 | } |
@@ -6036,7 +6149,7 @@ static int nfs4_xdr_dec_link(struct rpc_rqst *rqstp, struct xdr_stream *xdr, | |||
6036 | status = decode_restorefh(xdr); | 6149 | status = decode_restorefh(xdr); |
6037 | if (status) | 6150 | if (status) |
6038 | goto out; | 6151 | goto out; |
6039 | decode_getfattr(xdr, res->fattr, res->server); | 6152 | decode_getfattr_label(xdr, res->fattr, res->label, res->server); |
6040 | out: | 6153 | out: |
6041 | return status; | 6154 | return status; |
6042 | } | 6155 | } |
@@ -6065,7 +6178,7 @@ static int nfs4_xdr_dec_create(struct rpc_rqst *rqstp, struct xdr_stream *xdr, | |||
6065 | status = decode_getfh(xdr, res->fh); | 6178 | status = decode_getfh(xdr, res->fh); |
6066 | if (status) | 6179 | if (status) |
6067 | goto out; | 6180 | goto out; |
6068 | decode_getfattr(xdr, res->fattr, res->server); | 6181 | decode_getfattr_label(xdr, res->fattr, res->label, res->server); |
6069 | out: | 6182 | out: |
6070 | return status; | 6183 | return status; |
6071 | } | 6184 | } |
@@ -6097,7 +6210,7 @@ static int nfs4_xdr_dec_getattr(struct rpc_rqst *rqstp, struct xdr_stream *xdr, | |||
6097 | status = decode_putfh(xdr); | 6210 | status = decode_putfh(xdr); |
6098 | if (status) | 6211 | if (status) |
6099 | goto out; | 6212 | goto out; |
6100 | status = decode_getfattr(xdr, res->fattr, res->server); | 6213 | status = decode_getfattr_label(xdr, res->fattr, res->label, res->server); |
6101 | out: | 6214 | out: |
6102 | return status; | 6215 | return status; |
6103 | } | 6216 | } |
@@ -6230,7 +6343,7 @@ static int nfs4_xdr_dec_open(struct rpc_rqst *rqstp, struct xdr_stream *xdr, | |||
6230 | goto out; | 6343 | goto out; |
6231 | if (res->access_request) | 6344 | if (res->access_request) |
6232 | decode_access(xdr, &res->access_supported, &res->access_result); | 6345 | decode_access(xdr, &res->access_supported, &res->access_result); |
6233 | decode_getfattr(xdr, res->f_attr, res->server); | 6346 | decode_getfattr_label(xdr, res->f_attr, res->f_label, res->server); |
6234 | out: | 6347 | out: |
6235 | return status; | 6348 | return status; |
6236 | } | 6349 | } |
@@ -6307,7 +6420,7 @@ static int nfs4_xdr_dec_setattr(struct rpc_rqst *rqstp, | |||
6307 | status = decode_setattr(xdr); | 6420 | status = decode_setattr(xdr); |
6308 | if (status) | 6421 | if (status) |
6309 | goto out; | 6422 | goto out; |
6310 | decode_getfattr(xdr, res->fattr, res->server); | 6423 | decode_getfattr_label(xdr, res->fattr, res->label, res->server); |
6311 | out: | 6424 | out: |
6312 | return status; | 6425 | return status; |
6313 | } | 6426 | } |
@@ -6696,7 +6809,7 @@ static int nfs4_xdr_dec_fs_locations(struct rpc_rqst *req, | |||
6696 | xdr_enter_page(xdr, PAGE_SIZE); | 6809 | xdr_enter_page(xdr, PAGE_SIZE); |
6697 | status = decode_getfattr_generic(xdr, &res->fs_locations->fattr, | 6810 | status = decode_getfattr_generic(xdr, &res->fs_locations->fattr, |
6698 | NULL, res->fs_locations, | 6811 | NULL, res->fs_locations, |
6699 | res->fs_locations->server); | 6812 | NULL, res->fs_locations->server); |
6700 | out: | 6813 | out: |
6701 | return status; | 6814 | return status; |
6702 | } | 6815 | } |
@@ -7109,7 +7222,7 @@ int nfs4_decode_dirent(struct xdr_stream *xdr, struct nfs_entry *entry, | |||
7109 | goto out_overflow; | 7222 | goto out_overflow; |
7110 | 7223 | ||
7111 | if (decode_getfattr_attrs(xdr, bitmap, entry->fattr, entry->fh, | 7224 | if (decode_getfattr_attrs(xdr, bitmap, entry->fattr, entry->fh, |
7112 | NULL, entry->server) < 0) | 7225 | NULL, entry->label, entry->server) < 0) |
7113 | goto out_overflow; | 7226 | goto out_overflow; |
7114 | if (entry->fattr->valid & NFS_ATTR_FATTR_MOUNTED_ON_FILEID) | 7227 | if (entry->fattr->valid & NFS_ATTR_FATTR_MOUNTED_ON_FILEID) |
7115 | entry->ino = entry->fattr->mounted_on_fileid; | 7228 | entry->ino = entry->fattr->mounted_on_fileid; |
diff --git a/fs/nfs/objlayout/objlayout.c b/fs/nfs/objlayout/objlayout.c index a9ebd817278b..e4f9cbfec67b 100644 --- a/fs/nfs/objlayout/objlayout.c +++ b/fs/nfs/objlayout/objlayout.c | |||
@@ -613,8 +613,10 @@ int objlayout_get_deviceinfo(struct pnfs_layout_hdr *pnfslay, | |||
613 | pd.pgbase = 0; | 613 | pd.pgbase = 0; |
614 | pd.pglen = PAGE_SIZE; | 614 | pd.pglen = PAGE_SIZE; |
615 | pd.mincount = 0; | 615 | pd.mincount = 0; |
616 | pd.maxcount = PAGE_SIZE; | ||
616 | 617 | ||
617 | err = nfs4_proc_getdeviceinfo(NFS_SERVER(pnfslay->plh_inode), &pd); | 618 | err = nfs4_proc_getdeviceinfo(NFS_SERVER(pnfslay->plh_inode), &pd, |
619 | pnfslay->plh_lc_cred); | ||
618 | dprintk("%s nfs_getdeviceinfo returned %d\n", __func__, err); | 620 | dprintk("%s nfs_getdeviceinfo returned %d\n", __func__, err); |
619 | if (err) | 621 | if (err) |
620 | goto err_out; | 622 | goto err_out; |
diff --git a/fs/nfs/pnfs.c b/fs/nfs/pnfs.c index c5bd758e5637..3a3a79d6bf15 100644 --- a/fs/nfs/pnfs.c +++ b/fs/nfs/pnfs.c | |||
@@ -360,7 +360,7 @@ pnfs_put_lseg(struct pnfs_layout_segment *lseg) | |||
360 | } | 360 | } |
361 | EXPORT_SYMBOL_GPL(pnfs_put_lseg); | 361 | EXPORT_SYMBOL_GPL(pnfs_put_lseg); |
362 | 362 | ||
363 | static inline u64 | 363 | static u64 |
364 | end_offset(u64 start, u64 len) | 364 | end_offset(u64 start, u64 len) |
365 | { | 365 | { |
366 | u64 end; | 366 | u64 end; |
@@ -376,9 +376,9 @@ end_offset(u64 start, u64 len) | |||
376 | * start2 end2 | 376 | * start2 end2 |
377 | * [----------------) | 377 | * [----------------) |
378 | */ | 378 | */ |
379 | static inline int | 379 | static bool |
380 | lo_seg_contained(struct pnfs_layout_range *l1, | 380 | pnfs_lseg_range_contained(const struct pnfs_layout_range *l1, |
381 | struct pnfs_layout_range *l2) | 381 | const struct pnfs_layout_range *l2) |
382 | { | 382 | { |
383 | u64 start1 = l1->offset; | 383 | u64 start1 = l1->offset; |
384 | u64 end1 = end_offset(start1, l1->length); | 384 | u64 end1 = end_offset(start1, l1->length); |
@@ -395,9 +395,9 @@ lo_seg_contained(struct pnfs_layout_range *l1, | |||
395 | * start2 end2 | 395 | * start2 end2 |
396 | * [----------------) | 396 | * [----------------) |
397 | */ | 397 | */ |
398 | static inline int | 398 | static bool |
399 | lo_seg_intersecting(struct pnfs_layout_range *l1, | 399 | pnfs_lseg_range_intersecting(const struct pnfs_layout_range *l1, |
400 | struct pnfs_layout_range *l2) | 400 | const struct pnfs_layout_range *l2) |
401 | { | 401 | { |
402 | u64 start1 = l1->offset; | 402 | u64 start1 = l1->offset; |
403 | u64 end1 = end_offset(start1, l1->length); | 403 | u64 end1 = end_offset(start1, l1->length); |
@@ -409,12 +409,12 @@ lo_seg_intersecting(struct pnfs_layout_range *l1, | |||
409 | } | 409 | } |
410 | 410 | ||
411 | static bool | 411 | static bool |
412 | should_free_lseg(struct pnfs_layout_range *lseg_range, | 412 | should_free_lseg(const struct pnfs_layout_range *lseg_range, |
413 | struct pnfs_layout_range *recall_range) | 413 | const struct pnfs_layout_range *recall_range) |
414 | { | 414 | { |
415 | return (recall_range->iomode == IOMODE_ANY || | 415 | return (recall_range->iomode == IOMODE_ANY || |
416 | lseg_range->iomode == recall_range->iomode) && | 416 | lseg_range->iomode == recall_range->iomode) && |
417 | lo_seg_intersecting(lseg_range, recall_range); | 417 | pnfs_lseg_range_intersecting(lseg_range, recall_range); |
418 | } | 418 | } |
419 | 419 | ||
420 | static bool pnfs_lseg_dec_and_remove_zero(struct pnfs_layout_segment *lseg, | 420 | static bool pnfs_lseg_dec_and_remove_zero(struct pnfs_layout_segment *lseg, |
@@ -766,6 +766,7 @@ send_layoutget(struct pnfs_layout_hdr *lo, | |||
766 | lgp->args.inode = ino; | 766 | lgp->args.inode = ino; |
767 | lgp->args.ctx = get_nfs_open_context(ctx); | 767 | lgp->args.ctx = get_nfs_open_context(ctx); |
768 | lgp->gfp_flags = gfp_flags; | 768 | lgp->gfp_flags = gfp_flags; |
769 | lgp->cred = lo->plh_lc_cred; | ||
769 | 770 | ||
770 | /* Synchronously retrieve layout information from server and | 771 | /* Synchronously retrieve layout information from server and |
771 | * store in lseg. | 772 | * store in lseg. |
@@ -860,6 +861,7 @@ _pnfs_return_layout(struct inode *ino) | |||
860 | lrp->args.inode = ino; | 861 | lrp->args.inode = ino; |
861 | lrp->args.layout = lo; | 862 | lrp->args.layout = lo; |
862 | lrp->clp = NFS_SERVER(ino)->nfs_client; | 863 | lrp->clp = NFS_SERVER(ino)->nfs_client; |
864 | lrp->cred = lo->plh_lc_cred; | ||
863 | 865 | ||
864 | status = nfs4_proc_layoutreturn(lrp); | 866 | status = nfs4_proc_layoutreturn(lrp); |
865 | out: | 867 | out: |
@@ -984,8 +986,8 @@ out: | |||
984 | * are seen first. | 986 | * are seen first. |
985 | */ | 987 | */ |
986 | static s64 | 988 | static s64 |
987 | cmp_layout(struct pnfs_layout_range *l1, | 989 | pnfs_lseg_range_cmp(const struct pnfs_layout_range *l1, |
988 | struct pnfs_layout_range *l2) | 990 | const struct pnfs_layout_range *l2) |
989 | { | 991 | { |
990 | s64 d; | 992 | s64 d; |
991 | 993 | ||
@@ -1012,7 +1014,7 @@ pnfs_layout_insert_lseg(struct pnfs_layout_hdr *lo, | |||
1012 | dprintk("%s:Begin\n", __func__); | 1014 | dprintk("%s:Begin\n", __func__); |
1013 | 1015 | ||
1014 | list_for_each_entry(lp, &lo->plh_segs, pls_list) { | 1016 | list_for_each_entry(lp, &lo->plh_segs, pls_list) { |
1015 | if (cmp_layout(&lseg->pls_range, &lp->pls_range) > 0) | 1017 | if (pnfs_lseg_range_cmp(&lseg->pls_range, &lp->pls_range) > 0) |
1016 | continue; | 1018 | continue; |
1017 | list_add_tail(&lseg->pls_list, &lp->pls_list); | 1019 | list_add_tail(&lseg->pls_list, &lp->pls_list); |
1018 | dprintk("%s: inserted lseg %p " | 1020 | dprintk("%s: inserted lseg %p " |
@@ -1050,7 +1052,7 @@ alloc_init_layout_hdr(struct inode *ino, | |||
1050 | INIT_LIST_HEAD(&lo->plh_segs); | 1052 | INIT_LIST_HEAD(&lo->plh_segs); |
1051 | INIT_LIST_HEAD(&lo->plh_bulk_destroy); | 1053 | INIT_LIST_HEAD(&lo->plh_bulk_destroy); |
1052 | lo->plh_inode = ino; | 1054 | lo->plh_inode = ino; |
1053 | lo->plh_lc_cred = get_rpccred(ctx->state->owner->so_cred); | 1055 | lo->plh_lc_cred = get_rpccred(ctx->cred); |
1054 | return lo; | 1056 | return lo; |
1055 | } | 1057 | } |
1056 | 1058 | ||
@@ -1091,21 +1093,21 @@ out_existing: | |||
1091 | * READ READ true | 1093 | * READ READ true |
1092 | * READ RW true | 1094 | * READ RW true |
1093 | */ | 1095 | */ |
1094 | static int | 1096 | static bool |
1095 | is_matching_lseg(struct pnfs_layout_range *ls_range, | 1097 | pnfs_lseg_range_match(const struct pnfs_layout_range *ls_range, |
1096 | struct pnfs_layout_range *range) | 1098 | const struct pnfs_layout_range *range) |
1097 | { | 1099 | { |
1098 | struct pnfs_layout_range range1; | 1100 | struct pnfs_layout_range range1; |
1099 | 1101 | ||
1100 | if ((range->iomode == IOMODE_RW && | 1102 | if ((range->iomode == IOMODE_RW && |
1101 | ls_range->iomode != IOMODE_RW) || | 1103 | ls_range->iomode != IOMODE_RW) || |
1102 | !lo_seg_intersecting(ls_range, range)) | 1104 | !pnfs_lseg_range_intersecting(ls_range, range)) |
1103 | return 0; | 1105 | return 0; |
1104 | 1106 | ||
1105 | /* range1 covers only the first byte in the range */ | 1107 | /* range1 covers only the first byte in the range */ |
1106 | range1 = *range; | 1108 | range1 = *range; |
1107 | range1.length = 1; | 1109 | range1.length = 1; |
1108 | return lo_seg_contained(ls_range, &range1); | 1110 | return pnfs_lseg_range_contained(ls_range, &range1); |
1109 | } | 1111 | } |
1110 | 1112 | ||
1111 | /* | 1113 | /* |
@@ -1121,7 +1123,7 @@ pnfs_find_lseg(struct pnfs_layout_hdr *lo, | |||
1121 | 1123 | ||
1122 | list_for_each_entry(lseg, &lo->plh_segs, pls_list) { | 1124 | list_for_each_entry(lseg, &lo->plh_segs, pls_list) { |
1123 | if (test_bit(NFS_LSEG_VALID, &lseg->pls_flags) && | 1125 | if (test_bit(NFS_LSEG_VALID, &lseg->pls_flags) && |
1124 | is_matching_lseg(&lseg->pls_range, range)) { | 1126 | pnfs_lseg_range_match(&lseg->pls_range, range)) { |
1125 | ret = pnfs_get_lseg(lseg); | 1127 | ret = pnfs_get_lseg(lseg); |
1126 | break; | 1128 | break; |
1127 | } | 1129 | } |
diff --git a/fs/nfs/pnfs.h b/fs/nfs/pnfs.h index f5f8a470a647..a4f41810a7f4 100644 --- a/fs/nfs/pnfs.h +++ b/fs/nfs/pnfs.h | |||
@@ -149,9 +149,10 @@ struct pnfs_device { | |||
149 | struct nfs4_deviceid dev_id; | 149 | struct nfs4_deviceid dev_id; |
150 | unsigned int layout_type; | 150 | unsigned int layout_type; |
151 | unsigned int mincount; | 151 | unsigned int mincount; |
152 | unsigned int maxcount; /* gdia_maxcount */ | ||
152 | struct page **pages; | 153 | struct page **pages; |
153 | unsigned int pgbase; | 154 | unsigned int pgbase; |
154 | unsigned int pglen; | 155 | unsigned int pglen; /* reply buffer length */ |
155 | }; | 156 | }; |
156 | 157 | ||
157 | #define NFS4_PNFS_GETDEVLIST_MAXNUM 16 | 158 | #define NFS4_PNFS_GETDEVLIST_MAXNUM 16 |
@@ -170,7 +171,8 @@ extern int nfs4_proc_getdevicelist(struct nfs_server *server, | |||
170 | const struct nfs_fh *fh, | 171 | const struct nfs_fh *fh, |
171 | struct pnfs_devicelist *devlist); | 172 | struct pnfs_devicelist *devlist); |
172 | extern int nfs4_proc_getdeviceinfo(struct nfs_server *server, | 173 | extern int nfs4_proc_getdeviceinfo(struct nfs_server *server, |
173 | struct pnfs_device *dev); | 174 | struct pnfs_device *dev, |
175 | struct rpc_cred *cred); | ||
174 | extern struct pnfs_layout_segment* nfs4_proc_layoutget(struct nfs4_layoutget *lgp, gfp_t gfp_flags); | 176 | extern struct pnfs_layout_segment* nfs4_proc_layoutget(struct nfs4_layoutget *lgp, gfp_t gfp_flags); |
175 | extern int nfs4_proc_layoutreturn(struct nfs4_layoutreturn *lrp); | 177 | extern int nfs4_proc_layoutreturn(struct nfs4_layoutreturn *lrp); |
176 | 178 | ||
diff --git a/fs/nfs/proc.c b/fs/nfs/proc.c index fc8de9016acf..c041c41f7a52 100644 --- a/fs/nfs/proc.c +++ b/fs/nfs/proc.c | |||
@@ -98,7 +98,7 @@ nfs_proc_get_root(struct nfs_server *server, struct nfs_fh *fhandle, | |||
98 | */ | 98 | */ |
99 | static int | 99 | static int |
100 | nfs_proc_getattr(struct nfs_server *server, struct nfs_fh *fhandle, | 100 | nfs_proc_getattr(struct nfs_server *server, struct nfs_fh *fhandle, |
101 | struct nfs_fattr *fattr) | 101 | struct nfs_fattr *fattr, struct nfs4_label *label) |
102 | { | 102 | { |
103 | struct rpc_message msg = { | 103 | struct rpc_message msg = { |
104 | .rpc_proc = &nfs_procedures[NFSPROC_GETATTR], | 104 | .rpc_proc = &nfs_procedures[NFSPROC_GETATTR], |
@@ -146,7 +146,8 @@ nfs_proc_setattr(struct dentry *dentry, struct nfs_fattr *fattr, | |||
146 | 146 | ||
147 | static int | 147 | static int |
148 | nfs_proc_lookup(struct inode *dir, struct qstr *name, | 148 | nfs_proc_lookup(struct inode *dir, struct qstr *name, |
149 | struct nfs_fh *fhandle, struct nfs_fattr *fattr) | 149 | struct nfs_fh *fhandle, struct nfs_fattr *fattr, |
150 | struct nfs4_label *label) | ||
150 | { | 151 | { |
151 | struct nfs_diropargs arg = { | 152 | struct nfs_diropargs arg = { |
152 | .fh = NFS_FH(dir), | 153 | .fh = NFS_FH(dir), |
@@ -243,7 +244,7 @@ nfs_proc_create(struct inode *dir, struct dentry *dentry, struct iattr *sattr, | |||
243 | status = rpc_call_sync(NFS_CLIENT(dir), &msg, 0); | 244 | status = rpc_call_sync(NFS_CLIENT(dir), &msg, 0); |
244 | nfs_mark_for_revalidate(dir); | 245 | nfs_mark_for_revalidate(dir); |
245 | if (status == 0) | 246 | if (status == 0) |
246 | status = nfs_instantiate(dentry, data->res.fh, data->res.fattr); | 247 | status = nfs_instantiate(dentry, data->res.fh, data->res.fattr, NULL); |
247 | nfs_free_createdata(data); | 248 | nfs_free_createdata(data); |
248 | out: | 249 | out: |
249 | dprintk("NFS reply create: %d\n", status); | 250 | dprintk("NFS reply create: %d\n", status); |
@@ -290,7 +291,7 @@ nfs_proc_mknod(struct inode *dir, struct dentry *dentry, struct iattr *sattr, | |||
290 | status = rpc_call_sync(NFS_CLIENT(dir), &msg, 0); | 291 | status = rpc_call_sync(NFS_CLIENT(dir), &msg, 0); |
291 | } | 292 | } |
292 | if (status == 0) | 293 | if (status == 0) |
293 | status = nfs_instantiate(dentry, data->res.fh, data->res.fattr); | 294 | status = nfs_instantiate(dentry, data->res.fh, data->res.fattr, NULL); |
294 | nfs_free_createdata(data); | 295 | nfs_free_createdata(data); |
295 | out: | 296 | out: |
296 | dprintk("NFS reply mknod: %d\n", status); | 297 | dprintk("NFS reply mknod: %d\n", status); |
@@ -442,7 +443,7 @@ nfs_proc_symlink(struct inode *dir, struct dentry *dentry, struct page *page, | |||
442 | * should fill in the data with a LOOKUP call on the wire. | 443 | * should fill in the data with a LOOKUP call on the wire. |
443 | */ | 444 | */ |
444 | if (status == 0) | 445 | if (status == 0) |
445 | status = nfs_instantiate(dentry, fh, fattr); | 446 | status = nfs_instantiate(dentry, fh, fattr, NULL); |
446 | 447 | ||
447 | out_free: | 448 | out_free: |
448 | nfs_free_fattr(fattr); | 449 | nfs_free_fattr(fattr); |
@@ -471,7 +472,7 @@ nfs_proc_mkdir(struct inode *dir, struct dentry *dentry, struct iattr *sattr) | |||
471 | status = rpc_call_sync(NFS_CLIENT(dir), &msg, 0); | 472 | status = rpc_call_sync(NFS_CLIENT(dir), &msg, 0); |
472 | nfs_mark_for_revalidate(dir); | 473 | nfs_mark_for_revalidate(dir); |
473 | if (status == 0) | 474 | if (status == 0) |
474 | status = nfs_instantiate(dentry, data->res.fh, data->res.fattr); | 475 | status = nfs_instantiate(dentry, data->res.fh, data->res.fattr, NULL); |
475 | nfs_free_createdata(data); | 476 | nfs_free_createdata(data); |
476 | out: | 477 | out: |
477 | dprintk("NFS reply mkdir: %d\n", status); | 478 | dprintk("NFS reply mkdir: %d\n", status); |
diff --git a/fs/nfs/super.c b/fs/nfs/super.c index 2d7525fbcf25..f6db66d8f647 100644 --- a/fs/nfs/super.c +++ b/fs/nfs/super.c | |||
@@ -269,7 +269,7 @@ static match_table_t nfs_local_lock_tokens = { | |||
269 | 269 | ||
270 | enum { | 270 | enum { |
271 | Opt_vers_2, Opt_vers_3, Opt_vers_4, Opt_vers_4_0, | 271 | Opt_vers_2, Opt_vers_3, Opt_vers_4, Opt_vers_4_0, |
272 | Opt_vers_4_1, | 272 | Opt_vers_4_1, Opt_vers_4_2, |
273 | 273 | ||
274 | Opt_vers_err | 274 | Opt_vers_err |
275 | }; | 275 | }; |
@@ -280,6 +280,7 @@ static match_table_t nfs_vers_tokens = { | |||
280 | { Opt_vers_4, "4" }, | 280 | { Opt_vers_4, "4" }, |
281 | { Opt_vers_4_0, "4.0" }, | 281 | { Opt_vers_4_0, "4.0" }, |
282 | { Opt_vers_4_1, "4.1" }, | 282 | { Opt_vers_4_1, "4.1" }, |
283 | { Opt_vers_4_2, "4.2" }, | ||
283 | 284 | ||
284 | { Opt_vers_err, NULL } | 285 | { Opt_vers_err, NULL } |
285 | }; | 286 | }; |
@@ -832,6 +833,7 @@ int nfs_show_stats(struct seq_file *m, struct dentry *root) | |||
832 | seq_printf(m, "\n\tnfsv4:\t"); | 833 | seq_printf(m, "\n\tnfsv4:\t"); |
833 | seq_printf(m, "bm0=0x%x", nfss->attr_bitmask[0]); | 834 | seq_printf(m, "bm0=0x%x", nfss->attr_bitmask[0]); |
834 | seq_printf(m, ",bm1=0x%x", nfss->attr_bitmask[1]); | 835 | seq_printf(m, ",bm1=0x%x", nfss->attr_bitmask[1]); |
836 | seq_printf(m, ",bm2=0x%x", nfss->attr_bitmask[2]); | ||
835 | seq_printf(m, ",acl=0x%x", nfss->acl_bitmask); | 837 | seq_printf(m, ",acl=0x%x", nfss->acl_bitmask); |
836 | show_sessions(m, nfss); | 838 | show_sessions(m, nfss); |
837 | show_pnfs(m, nfss); | 839 | show_pnfs(m, nfss); |
@@ -1097,6 +1099,10 @@ static int nfs_parse_version_string(char *string, | |||
1097 | mnt->version = 4; | 1099 | mnt->version = 4; |
1098 | mnt->minorversion = 1; | 1100 | mnt->minorversion = 1; |
1099 | break; | 1101 | break; |
1102 | case Opt_vers_4_2: | ||
1103 | mnt->version = 4; | ||
1104 | mnt->minorversion = 2; | ||
1105 | break; | ||
1100 | default: | 1106 | default: |
1101 | return 0; | 1107 | return 0; |
1102 | } | 1108 | } |
@@ -1608,29 +1614,13 @@ out_security_failure: | |||
1608 | } | 1614 | } |
1609 | 1615 | ||
1610 | /* | 1616 | /* |
1611 | * Select a security flavor for this mount. The selected flavor | 1617 | * Ensure that the specified authtype in args->auth_flavors[0] is supported by |
1612 | * is planted in args->auth_flavors[0]. | 1618 | * the server. Returns 0 if it's ok, and -EACCES if not. |
1613 | * | ||
1614 | * Returns 0 on success, -EACCES on failure. | ||
1615 | */ | 1619 | */ |
1616 | static int nfs_select_flavor(struct nfs_parsed_mount_data *args, | 1620 | static int nfs_verify_authflavor(struct nfs_parsed_mount_data *args, |
1617 | struct nfs_mount_request *request) | 1621 | rpc_authflavor_t *server_authlist, unsigned int count) |
1618 | { | 1622 | { |
1619 | unsigned int i, count = *(request->auth_flav_len); | 1623 | unsigned int i; |
1620 | rpc_authflavor_t flavor; | ||
1621 | |||
1622 | /* | ||
1623 | * The NFSv2 MNT operation does not return a flavor list. | ||
1624 | */ | ||
1625 | if (args->mount_server.version != NFS_MNT3_VERSION) | ||
1626 | goto out_default; | ||
1627 | |||
1628 | /* | ||
1629 | * Certain releases of Linux's mountd return an empty | ||
1630 | * flavor list in some cases. | ||
1631 | */ | ||
1632 | if (count == 0) | ||
1633 | goto out_default; | ||
1634 | 1624 | ||
1635 | /* | 1625 | /* |
1636 | * If the sec= mount option is used, the specified flavor or AUTH_NULL | 1626 | * If the sec= mount option is used, the specified flavor or AUTH_NULL |
@@ -1640,60 +1630,19 @@ static int nfs_select_flavor(struct nfs_parsed_mount_data *args, | |||
1640 | * means that the server will ignore the rpc creds, so any flavor | 1630 | * means that the server will ignore the rpc creds, so any flavor |
1641 | * can be used. | 1631 | * can be used. |
1642 | */ | 1632 | */ |
1643 | if (args->auth_flavors[0] != RPC_AUTH_MAXFLAVOR) { | ||
1644 | for (i = 0; i < count; i++) { | ||
1645 | if (args->auth_flavors[0] == request->auth_flavs[i] || | ||
1646 | request->auth_flavs[i] == RPC_AUTH_NULL) | ||
1647 | goto out; | ||
1648 | } | ||
1649 | dfprintk(MOUNT, "NFS: auth flavor %d not supported by server\n", | ||
1650 | args->auth_flavors[0]); | ||
1651 | goto out_err; | ||
1652 | } | ||
1653 | |||
1654 | /* | ||
1655 | * RFC 2623, section 2.7 suggests we SHOULD prefer the | ||
1656 | * flavor listed first. However, some servers list | ||
1657 | * AUTH_NULL first. Avoid ever choosing AUTH_NULL. | ||
1658 | */ | ||
1659 | for (i = 0; i < count; i++) { | ||
1660 | struct rpcsec_gss_info info; | ||
1661 | |||
1662 | flavor = request->auth_flavs[i]; | ||
1663 | switch (flavor) { | ||
1664 | case RPC_AUTH_UNIX: | ||
1665 | goto out_set; | ||
1666 | case RPC_AUTH_NULL: | ||
1667 | continue; | ||
1668 | default: | ||
1669 | if (rpcauth_get_gssinfo(flavor, &info) == 0) | ||
1670 | goto out_set; | ||
1671 | } | ||
1672 | } | ||
1673 | |||
1674 | /* | ||
1675 | * As a last chance, see if the server list contains AUTH_NULL - | ||
1676 | * if it does, use the default flavor. | ||
1677 | */ | ||
1678 | for (i = 0; i < count; i++) { | 1633 | for (i = 0; i < count; i++) { |
1679 | if (request->auth_flavs[i] == RPC_AUTH_NULL) | 1634 | if (args->auth_flavors[0] == server_authlist[i] || |
1680 | goto out_default; | 1635 | server_authlist[i] == RPC_AUTH_NULL) |
1636 | goto out; | ||
1681 | } | 1637 | } |
1682 | 1638 | ||
1683 | dfprintk(MOUNT, "NFS: no auth flavors in common with server\n"); | 1639 | dfprintk(MOUNT, "NFS: auth flavor %u not supported by server\n", |
1684 | goto out_err; | 1640 | args->auth_flavors[0]); |
1641 | return -EACCES; | ||
1685 | 1642 | ||
1686 | out_default: | ||
1687 | /* use default if flavor not already set */ | ||
1688 | flavor = (args->auth_flavors[0] == RPC_AUTH_MAXFLAVOR) ? | ||
1689 | RPC_AUTH_UNIX : args->auth_flavors[0]; | ||
1690 | out_set: | ||
1691 | args->auth_flavors[0] = flavor; | ||
1692 | out: | 1643 | out: |
1693 | dfprintk(MOUNT, "NFS: using auth flavor %d\n", args->auth_flavors[0]); | 1644 | dfprintk(MOUNT, "NFS: using auth flavor %u\n", args->auth_flavors[0]); |
1694 | return 0; | 1645 | return 0; |
1695 | out_err: | ||
1696 | return -EACCES; | ||
1697 | } | 1646 | } |
1698 | 1647 | ||
1699 | /* | 1648 | /* |
@@ -1701,10 +1650,10 @@ out_err: | |||
1701 | * corresponding to the provided path. | 1650 | * corresponding to the provided path. |
1702 | */ | 1651 | */ |
1703 | static int nfs_request_mount(struct nfs_parsed_mount_data *args, | 1652 | static int nfs_request_mount(struct nfs_parsed_mount_data *args, |
1704 | struct nfs_fh *root_fh) | 1653 | struct nfs_fh *root_fh, |
1654 | rpc_authflavor_t *server_authlist, | ||
1655 | unsigned int *server_authlist_len) | ||
1705 | { | 1656 | { |
1706 | rpc_authflavor_t server_authlist[NFS_MAX_SECFLAVORS]; | ||
1707 | unsigned int server_authlist_len = ARRAY_SIZE(server_authlist); | ||
1708 | struct nfs_mount_request request = { | 1657 | struct nfs_mount_request request = { |
1709 | .sap = (struct sockaddr *) | 1658 | .sap = (struct sockaddr *) |
1710 | &args->mount_server.address, | 1659 | &args->mount_server.address, |
@@ -1712,7 +1661,7 @@ static int nfs_request_mount(struct nfs_parsed_mount_data *args, | |||
1712 | .protocol = args->mount_server.protocol, | 1661 | .protocol = args->mount_server.protocol, |
1713 | .fh = root_fh, | 1662 | .fh = root_fh, |
1714 | .noresvport = args->flags & NFS_MOUNT_NORESVPORT, | 1663 | .noresvport = args->flags & NFS_MOUNT_NORESVPORT, |
1715 | .auth_flav_len = &server_authlist_len, | 1664 | .auth_flav_len = server_authlist_len, |
1716 | .auth_flavs = server_authlist, | 1665 | .auth_flavs = server_authlist, |
1717 | .net = args->net, | 1666 | .net = args->net, |
1718 | }; | 1667 | }; |
@@ -1756,24 +1705,92 @@ static int nfs_request_mount(struct nfs_parsed_mount_data *args, | |||
1756 | return status; | 1705 | return status; |
1757 | } | 1706 | } |
1758 | 1707 | ||
1759 | return nfs_select_flavor(args, &request); | 1708 | return 0; |
1760 | } | 1709 | } |
1761 | 1710 | ||
1762 | struct dentry *nfs_try_mount(int flags, const char *dev_name, | 1711 | static struct nfs_server *nfs_try_mount_request(struct nfs_mount_info *mount_info, |
1763 | struct nfs_mount_info *mount_info, | 1712 | struct nfs_subversion *nfs_mod) |
1764 | struct nfs_subversion *nfs_mod) | ||
1765 | { | 1713 | { |
1766 | int status; | 1714 | int status; |
1767 | struct nfs_server *server; | 1715 | unsigned int i; |
1716 | bool tried_auth_unix = false; | ||
1717 | bool auth_null_in_list = false; | ||
1718 | struct nfs_server *server = ERR_PTR(-EACCES); | ||
1719 | struct nfs_parsed_mount_data *args = mount_info->parsed; | ||
1720 | rpc_authflavor_t authlist[NFS_MAX_SECFLAVORS]; | ||
1721 | unsigned int authlist_len = ARRAY_SIZE(authlist); | ||
1722 | |||
1723 | status = nfs_request_mount(args, mount_info->mntfh, authlist, | ||
1724 | &authlist_len); | ||
1725 | if (status) | ||
1726 | return ERR_PTR(status); | ||
1768 | 1727 | ||
1769 | if (mount_info->parsed->need_mount) { | 1728 | /* |
1770 | status = nfs_request_mount(mount_info->parsed, mount_info->mntfh); | 1729 | * Was a sec= authflavor specified in the options? First, verify |
1730 | * whether the server supports it, and then just try to use it if so. | ||
1731 | */ | ||
1732 | if (args->auth_flavors[0] != RPC_AUTH_MAXFLAVOR) { | ||
1733 | status = nfs_verify_authflavor(args, authlist, authlist_len); | ||
1734 | dfprintk(MOUNT, "NFS: using auth flavor %u\n", args->auth_flavors[0]); | ||
1771 | if (status) | 1735 | if (status) |
1772 | return ERR_PTR(status); | 1736 | return ERR_PTR(status); |
1737 | return nfs_mod->rpc_ops->create_server(mount_info, nfs_mod); | ||
1738 | } | ||
1739 | |||
1740 | /* | ||
1741 | * No sec= option was provided. RFC 2623, section 2.7 suggests we | ||
1742 | * SHOULD prefer the flavor listed first. However, some servers list | ||
1743 | * AUTH_NULL first. Avoid ever choosing AUTH_NULL. | ||
1744 | */ | ||
1745 | for (i = 0; i < authlist_len; ++i) { | ||
1746 | rpc_authflavor_t flavor; | ||
1747 | struct rpcsec_gss_info info; | ||
1748 | |||
1749 | flavor = authlist[i]; | ||
1750 | switch (flavor) { | ||
1751 | case RPC_AUTH_UNIX: | ||
1752 | tried_auth_unix = true; | ||
1753 | break; | ||
1754 | case RPC_AUTH_NULL: | ||
1755 | auth_null_in_list = true; | ||
1756 | continue; | ||
1757 | default: | ||
1758 | if (rpcauth_get_gssinfo(flavor, &info) != 0) | ||
1759 | continue; | ||
1760 | /* Fallthrough */ | ||
1761 | } | ||
1762 | dfprintk(MOUNT, "NFS: attempting to use auth flavor %u\n", flavor); | ||
1763 | args->auth_flavors[0] = flavor; | ||
1764 | server = nfs_mod->rpc_ops->create_server(mount_info, nfs_mod); | ||
1765 | if (!IS_ERR(server)) | ||
1766 | return server; | ||
1773 | } | 1767 | } |
1774 | 1768 | ||
1775 | /* Get a volume representation */ | 1769 | /* |
1776 | server = nfs_mod->rpc_ops->create_server(mount_info, nfs_mod); | 1770 | * Nothing we tried so far worked. At this point, give up if we've |
1771 | * already tried AUTH_UNIX or if the server's list doesn't contain | ||
1772 | * AUTH_NULL | ||
1773 | */ | ||
1774 | if (tried_auth_unix || !auth_null_in_list) | ||
1775 | return server; | ||
1776 | |||
1777 | /* Last chance! Try AUTH_UNIX */ | ||
1778 | dfprintk(MOUNT, "NFS: attempting to use auth flavor %u\n", RPC_AUTH_UNIX); | ||
1779 | args->auth_flavors[0] = RPC_AUTH_UNIX; | ||
1780 | return nfs_mod->rpc_ops->create_server(mount_info, nfs_mod); | ||
1781 | } | ||
1782 | |||
1783 | struct dentry *nfs_try_mount(int flags, const char *dev_name, | ||
1784 | struct nfs_mount_info *mount_info, | ||
1785 | struct nfs_subversion *nfs_mod) | ||
1786 | { | ||
1787 | struct nfs_server *server; | ||
1788 | |||
1789 | if (mount_info->parsed->need_mount) | ||
1790 | server = nfs_try_mount_request(mount_info, nfs_mod); | ||
1791 | else | ||
1792 | server = nfs_mod->rpc_ops->create_server(mount_info, nfs_mod); | ||
1793 | |||
1777 | if (IS_ERR(server)) | 1794 | if (IS_ERR(server)) |
1778 | return ERR_CAST(server); | 1795 | return ERR_CAST(server); |
1779 | 1796 | ||
@@ -2412,7 +2429,21 @@ static int nfs_bdi_register(struct nfs_server *server) | |||
2412 | int nfs_set_sb_security(struct super_block *s, struct dentry *mntroot, | 2429 | int nfs_set_sb_security(struct super_block *s, struct dentry *mntroot, |
2413 | struct nfs_mount_info *mount_info) | 2430 | struct nfs_mount_info *mount_info) |
2414 | { | 2431 | { |
2415 | return security_sb_set_mnt_opts(s, &mount_info->parsed->lsm_opts); | 2432 | int error; |
2433 | unsigned long kflags = 0, kflags_out = 0; | ||
2434 | if (NFS_SB(s)->caps & NFS_CAP_SECURITY_LABEL) | ||
2435 | kflags |= SECURITY_LSM_NATIVE_LABELS; | ||
2436 | |||
2437 | error = security_sb_set_mnt_opts(s, &mount_info->parsed->lsm_opts, | ||
2438 | kflags, &kflags_out); | ||
2439 | if (error) | ||
2440 | goto err; | ||
2441 | |||
2442 | if (NFS_SB(s)->caps & NFS_CAP_SECURITY_LABEL && | ||
2443 | !(kflags_out & SECURITY_LSM_NATIVE_LABELS)) | ||
2444 | NFS_SB(s)->caps &= ~NFS_CAP_SECURITY_LABEL; | ||
2445 | err: | ||
2446 | return error; | ||
2416 | } | 2447 | } |
2417 | EXPORT_SYMBOL_GPL(nfs_set_sb_security); | 2448 | EXPORT_SYMBOL_GPL(nfs_set_sb_security); |
2418 | 2449 | ||
@@ -2447,6 +2478,10 @@ struct dentry *nfs_fs_mount_common(struct nfs_server *server, | |||
2447 | if (server->flags & NFS_MOUNT_NOAC) | 2478 | if (server->flags & NFS_MOUNT_NOAC) |
2448 | sb_mntdata.mntflags |= MS_SYNCHRONOUS; | 2479 | sb_mntdata.mntflags |= MS_SYNCHRONOUS; |
2449 | 2480 | ||
2481 | if (mount_info->cloned != NULL && mount_info->cloned->sb != NULL) | ||
2482 | if (mount_info->cloned->sb->s_flags & MS_SYNCHRONOUS) | ||
2483 | sb_mntdata.mntflags |= MS_SYNCHRONOUS; | ||
2484 | |||
2450 | /* Get a superblock - note that we may end up sharing one that already exists */ | 2485 | /* Get a superblock - note that we may end up sharing one that already exists */ |
2451 | s = sget(nfs_mod->nfs_fs, compare_super, nfs_set_super, flags, &sb_mntdata); | 2486 | s = sget(nfs_mod->nfs_fs, compare_super, nfs_set_super, flags, &sb_mntdata); |
2452 | if (IS_ERR(s)) { | 2487 | if (IS_ERR(s)) { |
diff --git a/fs/nfs/unlink.c b/fs/nfs/unlink.c index 1f1f38f0c5d5..60395ad3a2e4 100644 --- a/fs/nfs/unlink.c +++ b/fs/nfs/unlink.c | |||
@@ -479,7 +479,7 @@ nfs_sillyrename(struct inode *dir, struct dentry *dentry) | |||
479 | 479 | ||
480 | dfprintk(VFS, "NFS: silly-rename(%s/%s, ct=%d)\n", | 480 | dfprintk(VFS, "NFS: silly-rename(%s/%s, ct=%d)\n", |
481 | dentry->d_parent->d_name.name, dentry->d_name.name, | 481 | dentry->d_parent->d_name.name, dentry->d_name.name, |
482 | dentry->d_count); | 482 | d_count(dentry)); |
483 | nfs_inc_stats(dir, NFSIOS_SILLYRENAME); | 483 | nfs_inc_stats(dir, NFSIOS_SILLYRENAME); |
484 | 484 | ||
485 | /* | 485 | /* |
diff --git a/fs/nfs/write.c b/fs/nfs/write.c index a2c7c28049d5..f1bdb7254776 100644 --- a/fs/nfs/write.c +++ b/fs/nfs/write.c | |||
@@ -888,6 +888,28 @@ out: | |||
888 | return PageUptodate(page) != 0; | 888 | return PageUptodate(page) != 0; |
889 | } | 889 | } |
890 | 890 | ||
891 | /* If we know the page is up to date, and we're not using byte range locks (or | ||
892 | * if we have the whole file locked for writing), it may be more efficient to | ||
893 | * extend the write to cover the entire page in order to avoid fragmentation | ||
894 | * inefficiencies. | ||
895 | * | ||
896 | * If the file is opened for synchronous writes or if we have a write delegation | ||
897 | * from the server then we can just skip the rest of the checks. | ||
898 | */ | ||
899 | static int nfs_can_extend_write(struct file *file, struct page *page, struct inode *inode) | ||
900 | { | ||
901 | if (file->f_flags & O_DSYNC) | ||
902 | return 0; | ||
903 | if (NFS_PROTO(inode)->have_delegation(inode, FMODE_WRITE)) | ||
904 | return 1; | ||
905 | if (nfs_write_pageuptodate(page, inode) && (inode->i_flock == NULL || | ||
906 | (inode->i_flock->fl_start == 0 && | ||
907 | inode->i_flock->fl_end == OFFSET_MAX && | ||
908 | inode->i_flock->fl_type != F_RDLCK))) | ||
909 | return 1; | ||
910 | return 0; | ||
911 | } | ||
912 | |||
891 | /* | 913 | /* |
892 | * Update and possibly write a cached page of an NFS file. | 914 | * Update and possibly write a cached page of an NFS file. |
893 | * | 915 | * |
@@ -908,14 +930,7 @@ int nfs_updatepage(struct file *file, struct page *page, | |||
908 | file->f_path.dentry->d_name.name, count, | 930 | file->f_path.dentry->d_name.name, count, |
909 | (long long)(page_file_offset(page) + offset)); | 931 | (long long)(page_file_offset(page) + offset)); |
910 | 932 | ||
911 | /* If we're not using byte range locks, and we know the page | 933 | if (nfs_can_extend_write(file, page, inode)) { |
912 | * is up to date, it may be more efficient to extend the write | ||
913 | * to cover the entire page in order to avoid fragmentation | ||
914 | * inefficiencies. | ||
915 | */ | ||
916 | if (nfs_write_pageuptodate(page, inode) && | ||
917 | inode->i_flock == NULL && | ||
918 | !(file->f_flags & O_DSYNC)) { | ||
919 | count = max(count + offset, nfs_page_length(page)); | 934 | count = max(count + offset, nfs_page_length(page)); |
920 | offset = 0; | 935 | offset = 0; |
921 | } | 936 | } |
diff --git a/fs/nfsd/Kconfig b/fs/nfsd/Kconfig index 430b6872806f..dc8f1ef665ce 100644 --- a/fs/nfsd/Kconfig +++ b/fs/nfsd/Kconfig | |||
@@ -81,6 +81,22 @@ config NFSD_V4 | |||
81 | 81 | ||
82 | If unsure, say N. | 82 | If unsure, say N. |
83 | 83 | ||
84 | config NFSD_V4_SECURITY_LABEL | ||
85 | bool "Provide Security Label support for NFSv4 server" | ||
86 | depends on NFSD_V4 && SECURITY | ||
87 | help | ||
88 | |||
89 | Say Y here if you want enable fine-grained security label attribute | ||
90 | support for NFS version 4. Security labels allow security modules like | ||
91 | SELinux and Smack to label files to facilitate enforcement of their policies. | ||
92 | Without this an NFSv4 mount will have the same label on each file. | ||
93 | |||
94 | If you do not wish to enable fine-grained security labels SELinux or | ||
95 | Smack policies on NFSv4 files, say N. | ||
96 | |||
97 | WARNING: there is still a chance of backwards-incompatible protocol changes. | ||
98 | For now we recommend "Y" only for developers and testers." | ||
99 | |||
84 | config NFSD_FAULT_INJECTION | 100 | config NFSD_FAULT_INJECTION |
85 | bool "NFS server manual fault injection" | 101 | bool "NFS server manual fault injection" |
86 | depends on NFSD_V4 && DEBUG_KERNEL | 102 | depends on NFSD_V4 && DEBUG_KERNEL |
diff --git a/fs/nfsd/nfs4proc.c b/fs/nfsd/nfs4proc.c index 27d74a294515..419572f33b72 100644 --- a/fs/nfsd/nfs4proc.c +++ b/fs/nfsd/nfs4proc.c | |||
@@ -42,6 +42,36 @@ | |||
42 | #include "current_stateid.h" | 42 | #include "current_stateid.h" |
43 | #include "netns.h" | 43 | #include "netns.h" |
44 | 44 | ||
45 | #ifdef CONFIG_NFSD_V4_SECURITY_LABEL | ||
46 | #include <linux/security.h> | ||
47 | |||
48 | static inline void | ||
49 | nfsd4_security_inode_setsecctx(struct svc_fh *resfh, struct xdr_netobj *label, u32 *bmval) | ||
50 | { | ||
51 | struct inode *inode = resfh->fh_dentry->d_inode; | ||
52 | int status; | ||
53 | |||
54 | mutex_lock(&inode->i_mutex); | ||
55 | status = security_inode_setsecctx(resfh->fh_dentry, | ||
56 | label->data, label->len); | ||
57 | mutex_unlock(&inode->i_mutex); | ||
58 | |||
59 | if (status) | ||
60 | /* | ||
61 | * XXX: We should really fail the whole open, but we may | ||
62 | * already have created a new file, so it may be too | ||
63 | * late. For now this seems the least of evils: | ||
64 | */ | ||
65 | bmval[2] &= ~FATTR4_WORD2_SECURITY_LABEL; | ||
66 | |||
67 | return; | ||
68 | } | ||
69 | #else | ||
70 | static inline void | ||
71 | nfsd4_security_inode_setsecctx(struct svc_fh *resfh, struct xdr_netobj *label, u32 *bmval) | ||
72 | { } | ||
73 | #endif | ||
74 | |||
45 | #define NFSDDBG_FACILITY NFSDDBG_PROC | 75 | #define NFSDDBG_FACILITY NFSDDBG_PROC |
46 | 76 | ||
47 | static u32 nfsd_attrmask[] = { | 77 | static u32 nfsd_attrmask[] = { |
@@ -239,6 +269,9 @@ do_open_lookup(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate, stru | |||
239 | (u32 *)open->op_verf.data, | 269 | (u32 *)open->op_verf.data, |
240 | &open->op_truncate, &open->op_created); | 270 | &open->op_truncate, &open->op_created); |
241 | 271 | ||
272 | if (!status && open->op_label.len) | ||
273 | nfsd4_security_inode_setsecctx(resfh, &open->op_label, open->op_bmval); | ||
274 | |||
242 | /* | 275 | /* |
243 | * Following rfc 3530 14.2.16, use the returned bitmask | 276 | * Following rfc 3530 14.2.16, use the returned bitmask |
244 | * to indicate which attributes we used to store the | 277 | * to indicate which attributes we used to store the |
@@ -263,7 +296,8 @@ do_open_lookup(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate, stru | |||
263 | 296 | ||
264 | nfsd4_set_open_owner_reply_cache(cstate, open, resfh); | 297 | nfsd4_set_open_owner_reply_cache(cstate, open, resfh); |
265 | accmode = NFSD_MAY_NOP; | 298 | accmode = NFSD_MAY_NOP; |
266 | if (open->op_created) | 299 | if (open->op_created || |
300 | open->op_claim_type == NFS4_OPEN_CLAIM_DELEGATE_CUR) | ||
267 | accmode |= NFSD_MAY_OWNER_OVERRIDE; | 301 | accmode |= NFSD_MAY_OWNER_OVERRIDE; |
268 | status = do_open_permission(rqstp, resfh, open, accmode); | 302 | status = do_open_permission(rqstp, resfh, open, accmode); |
269 | set_change_info(&open->op_cinfo, current_fh); | 303 | set_change_info(&open->op_cinfo, current_fh); |
@@ -637,6 +671,9 @@ nfsd4_create(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate, | |||
637 | if (status) | 671 | if (status) |
638 | goto out; | 672 | goto out; |
639 | 673 | ||
674 | if (create->cr_label.len) | ||
675 | nfsd4_security_inode_setsecctx(&resfh, &create->cr_label, create->cr_bmval); | ||
676 | |||
640 | if (create->cr_acl != NULL) | 677 | if (create->cr_acl != NULL) |
641 | do_set_nfs4_acl(rqstp, &resfh, create->cr_acl, | 678 | do_set_nfs4_acl(rqstp, &resfh, create->cr_acl, |
642 | create->cr_bmval); | 679 | create->cr_bmval); |
@@ -916,6 +953,11 @@ nfsd4_setattr(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate, | |||
916 | setattr->sa_acl); | 953 | setattr->sa_acl); |
917 | if (status) | 954 | if (status) |
918 | goto out; | 955 | goto out; |
956 | if (setattr->sa_label.len) | ||
957 | status = nfsd4_set_nfs4_label(rqstp, &cstate->current_fh, | ||
958 | &setattr->sa_label); | ||
959 | if (status) | ||
960 | goto out; | ||
919 | status = nfsd_setattr(rqstp, &cstate->current_fh, &setattr->sa_iattr, | 961 | status = nfsd_setattr(rqstp, &cstate->current_fh, &setattr->sa_iattr, |
920 | 0, (time_t)0); | 962 | 0, (time_t)0); |
921 | out: | 963 | out: |
@@ -1251,7 +1293,7 @@ nfsd4_proc_compound(struct svc_rqst *rqstp, | |||
1251 | * According to RFC3010, this takes precedence over all other errors. | 1293 | * According to RFC3010, this takes precedence over all other errors. |
1252 | */ | 1294 | */ |
1253 | status = nfserr_minor_vers_mismatch; | 1295 | status = nfserr_minor_vers_mismatch; |
1254 | if (args->minorversion > nfsd_supported_minorversion) | 1296 | if (nfsd_minorversion(args->minorversion, NFSD_TEST) <= 0) |
1255 | goto out; | 1297 | goto out; |
1256 | 1298 | ||
1257 | status = nfs41_check_op_ordering(args); | 1299 | status = nfs41_check_op_ordering(args); |
@@ -1482,7 +1524,7 @@ static inline u32 nfsd4_write_rsize(struct svc_rqst *rqstp, struct nfsd4_op *op) | |||
1482 | static inline u32 nfsd4_exchange_id_rsize(struct svc_rqst *rqstp, struct nfsd4_op *op) | 1524 | static inline u32 nfsd4_exchange_id_rsize(struct svc_rqst *rqstp, struct nfsd4_op *op) |
1483 | { | 1525 | { |
1484 | return (op_encode_hdr_size + 2 + 1 + /* eir_clientid, eir_sequenceid */\ | 1526 | return (op_encode_hdr_size + 2 + 1 + /* eir_clientid, eir_sequenceid */\ |
1485 | 1 + 1 + 0 + /* eir_flags, spr_how, SP4_NONE (for now) */\ | 1527 | 1 + 1 + 2 + /* eir_flags, spr_how, spo_must_enforce & _allow */\ |
1486 | 2 + /*eir_server_owner.so_minor_id */\ | 1528 | 2 + /*eir_server_owner.so_minor_id */\ |
1487 | /* eir_server_owner.so_major_id<> */\ | 1529 | /* eir_server_owner.so_major_id<> */\ |
1488 | XDR_QUADLEN(NFS4_OPAQUE_LIMIT) + 1 +\ | 1530 | XDR_QUADLEN(NFS4_OPAQUE_LIMIT) + 1 +\ |
diff --git a/fs/nfsd/nfs4recover.c b/fs/nfsd/nfs4recover.c index 4e9a21db867a..105a3b080d12 100644 --- a/fs/nfsd/nfs4recover.c +++ b/fs/nfsd/nfs4recover.c | |||
@@ -240,11 +240,16 @@ struct name_list { | |||
240 | struct list_head list; | 240 | struct list_head list; |
241 | }; | 241 | }; |
242 | 242 | ||
243 | struct nfs4_dir_ctx { | ||
244 | struct dir_context ctx; | ||
245 | struct list_head names; | ||
246 | }; | ||
247 | |||
243 | static int | 248 | static int |
244 | nfsd4_build_namelist(void *arg, const char *name, int namlen, | 249 | nfsd4_build_namelist(void *arg, const char *name, int namlen, |
245 | loff_t offset, u64 ino, unsigned int d_type) | 250 | loff_t offset, u64 ino, unsigned int d_type) |
246 | { | 251 | { |
247 | struct list_head *names = arg; | 252 | struct nfs4_dir_ctx *ctx = arg; |
248 | struct name_list *entry; | 253 | struct name_list *entry; |
249 | 254 | ||
250 | if (namlen != HEXDIR_LEN - 1) | 255 | if (namlen != HEXDIR_LEN - 1) |
@@ -254,7 +259,7 @@ nfsd4_build_namelist(void *arg, const char *name, int namlen, | |||
254 | return -ENOMEM; | 259 | return -ENOMEM; |
255 | memcpy(entry->name, name, HEXDIR_LEN - 1); | 260 | memcpy(entry->name, name, HEXDIR_LEN - 1); |
256 | entry->name[HEXDIR_LEN - 1] = '\0'; | 261 | entry->name[HEXDIR_LEN - 1] = '\0'; |
257 | list_add(&entry->list, names); | 262 | list_add(&entry->list, &ctx->names); |
258 | return 0; | 263 | return 0; |
259 | } | 264 | } |
260 | 265 | ||
@@ -263,7 +268,10 @@ nfsd4_list_rec_dir(recdir_func *f, struct nfsd_net *nn) | |||
263 | { | 268 | { |
264 | const struct cred *original_cred; | 269 | const struct cred *original_cred; |
265 | struct dentry *dir = nn->rec_file->f_path.dentry; | 270 | struct dentry *dir = nn->rec_file->f_path.dentry; |
266 | LIST_HEAD(names); | 271 | struct nfs4_dir_ctx ctx = { |
272 | .ctx.actor = nfsd4_build_namelist, | ||
273 | .names = LIST_HEAD_INIT(ctx.names) | ||
274 | }; | ||
267 | int status; | 275 | int status; |
268 | 276 | ||
269 | status = nfs4_save_creds(&original_cred); | 277 | status = nfs4_save_creds(&original_cred); |
@@ -276,11 +284,11 @@ nfsd4_list_rec_dir(recdir_func *f, struct nfsd_net *nn) | |||
276 | return status; | 284 | return status; |
277 | } | 285 | } |
278 | 286 | ||
279 | status = vfs_readdir(nn->rec_file, nfsd4_build_namelist, &names); | 287 | status = iterate_dir(nn->rec_file, &ctx.ctx); |
280 | mutex_lock_nested(&dir->d_inode->i_mutex, I_MUTEX_PARENT); | 288 | mutex_lock_nested(&dir->d_inode->i_mutex, I_MUTEX_PARENT); |
281 | while (!list_empty(&names)) { | 289 | while (!list_empty(&ctx.names)) { |
282 | struct name_list *entry; | 290 | struct name_list *entry; |
283 | entry = list_entry(names.next, struct name_list, list); | 291 | entry = list_entry(ctx.names.next, struct name_list, list); |
284 | if (!status) { | 292 | if (!status) { |
285 | struct dentry *dentry; | 293 | struct dentry *dentry; |
286 | dentry = lookup_one_len(entry->name, dir, HEXDIR_LEN-1); | 294 | dentry = lookup_one_len(entry->name, dir, HEXDIR_LEN-1); |
diff --git a/fs/nfsd/nfs4state.c b/fs/nfsd/nfs4state.c index 316ec843dec2..43f42290e5df 100644 --- a/fs/nfsd/nfs4state.c +++ b/fs/nfsd/nfs4state.c | |||
@@ -97,19 +97,20 @@ nfs4_lock_state(void) | |||
97 | 97 | ||
98 | static void free_session(struct nfsd4_session *); | 98 | static void free_session(struct nfsd4_session *); |
99 | 99 | ||
100 | void nfsd4_put_session(struct nfsd4_session *ses) | 100 | static bool is_session_dead(struct nfsd4_session *ses) |
101 | { | 101 | { |
102 | atomic_dec(&ses->se_ref); | 102 | return ses->se_flags & NFS4_SESSION_DEAD; |
103 | } | 103 | } |
104 | 104 | ||
105 | static bool is_session_dead(struct nfsd4_session *ses) | 105 | void nfsd4_put_session(struct nfsd4_session *ses) |
106 | { | 106 | { |
107 | return ses->se_flags & NFS4_SESSION_DEAD; | 107 | if (atomic_dec_and_test(&ses->se_ref) && is_session_dead(ses)) |
108 | free_session(ses); | ||
108 | } | 109 | } |
109 | 110 | ||
110 | static __be32 mark_session_dead_locked(struct nfsd4_session *ses) | 111 | static __be32 mark_session_dead_locked(struct nfsd4_session *ses, int ref_held_by_me) |
111 | { | 112 | { |
112 | if (atomic_read(&ses->se_ref)) | 113 | if (atomic_read(&ses->se_ref) > ref_held_by_me) |
113 | return nfserr_jukebox; | 114 | return nfserr_jukebox; |
114 | ses->se_flags |= NFS4_SESSION_DEAD; | 115 | ses->se_flags |= NFS4_SESSION_DEAD; |
115 | return nfs_ok; | 116 | return nfs_ok; |
@@ -364,19 +365,12 @@ static struct nfs4_ol_stateid * nfs4_alloc_stateid(struct nfs4_client *clp) | |||
364 | } | 365 | } |
365 | 366 | ||
366 | static struct nfs4_delegation * | 367 | static struct nfs4_delegation * |
367 | alloc_init_deleg(struct nfs4_client *clp, struct nfs4_ol_stateid *stp, struct svc_fh *current_fh, u32 type) | 368 | alloc_init_deleg(struct nfs4_client *clp, struct nfs4_ol_stateid *stp, struct svc_fh *current_fh) |
368 | { | 369 | { |
369 | struct nfs4_delegation *dp; | 370 | struct nfs4_delegation *dp; |
370 | struct nfs4_file *fp = stp->st_file; | 371 | struct nfs4_file *fp = stp->st_file; |
371 | 372 | ||
372 | dprintk("NFSD alloc_init_deleg\n"); | 373 | dprintk("NFSD alloc_init_deleg\n"); |
373 | /* | ||
374 | * Major work on the lease subsystem (for example, to support | ||
375 | * calbacks on stat) will be required before we can support | ||
376 | * write delegations properly. | ||
377 | */ | ||
378 | if (type != NFS4_OPEN_DELEGATE_READ) | ||
379 | return NULL; | ||
380 | if (fp->fi_had_conflict) | 374 | if (fp->fi_had_conflict) |
381 | return NULL; | 375 | return NULL; |
382 | if (num_delegations > max_delegations) | 376 | if (num_delegations > max_delegations) |
@@ -397,7 +391,7 @@ alloc_init_deleg(struct nfs4_client *clp, struct nfs4_ol_stateid *stp, struct sv | |||
397 | INIT_LIST_HEAD(&dp->dl_recall_lru); | 391 | INIT_LIST_HEAD(&dp->dl_recall_lru); |
398 | get_nfs4_file(fp); | 392 | get_nfs4_file(fp); |
399 | dp->dl_file = fp; | 393 | dp->dl_file = fp; |
400 | dp->dl_type = type; | 394 | dp->dl_type = NFS4_OPEN_DELEGATE_READ; |
401 | fh_copy_shallow(&dp->dl_fh, ¤t_fh->fh_handle); | 395 | fh_copy_shallow(&dp->dl_fh, ¤t_fh->fh_handle); |
402 | dp->dl_time = 0; | 396 | dp->dl_time = 0; |
403 | atomic_set(&dp->dl_count, 1); | 397 | atomic_set(&dp->dl_count, 1); |
@@ -1188,6 +1182,9 @@ static int copy_cred(struct svc_cred *target, struct svc_cred *source) | |||
1188 | target->cr_gid = source->cr_gid; | 1182 | target->cr_gid = source->cr_gid; |
1189 | target->cr_group_info = source->cr_group_info; | 1183 | target->cr_group_info = source->cr_group_info; |
1190 | get_group_info(target->cr_group_info); | 1184 | get_group_info(target->cr_group_info); |
1185 | target->cr_gss_mech = source->cr_gss_mech; | ||
1186 | if (source->cr_gss_mech) | ||
1187 | gss_mech_get(source->cr_gss_mech); | ||
1191 | return 0; | 1188 | return 0; |
1192 | } | 1189 | } |
1193 | 1190 | ||
@@ -1262,6 +1259,33 @@ same_creds(struct svc_cred *cr1, struct svc_cred *cr2) | |||
1262 | return 0 == strcmp(cr1->cr_principal, cr2->cr_principal); | 1259 | return 0 == strcmp(cr1->cr_principal, cr2->cr_principal); |
1263 | } | 1260 | } |
1264 | 1261 | ||
1262 | static bool svc_rqst_integrity_protected(struct svc_rqst *rqstp) | ||
1263 | { | ||
1264 | struct svc_cred *cr = &rqstp->rq_cred; | ||
1265 | u32 service; | ||
1266 | |||
1267 | if (!cr->cr_gss_mech) | ||
1268 | return false; | ||
1269 | service = gss_pseudoflavor_to_service(cr->cr_gss_mech, cr->cr_flavor); | ||
1270 | return service == RPC_GSS_SVC_INTEGRITY || | ||
1271 | service == RPC_GSS_SVC_PRIVACY; | ||
1272 | } | ||
1273 | |||
1274 | static bool mach_creds_match(struct nfs4_client *cl, struct svc_rqst *rqstp) | ||
1275 | { | ||
1276 | struct svc_cred *cr = &rqstp->rq_cred; | ||
1277 | |||
1278 | if (!cl->cl_mach_cred) | ||
1279 | return true; | ||
1280 | if (cl->cl_cred.cr_gss_mech != cr->cr_gss_mech) | ||
1281 | return false; | ||
1282 | if (!svc_rqst_integrity_protected(rqstp)) | ||
1283 | return false; | ||
1284 | if (!cr->cr_principal) | ||
1285 | return false; | ||
1286 | return 0 == strcmp(cl->cl_cred.cr_principal, cr->cr_principal); | ||
1287 | } | ||
1288 | |||
1265 | static void gen_clid(struct nfs4_client *clp, struct nfsd_net *nn) | 1289 | static void gen_clid(struct nfs4_client *clp, struct nfsd_net *nn) |
1266 | { | 1290 | { |
1267 | static u32 current_clientid = 1; | 1291 | static u32 current_clientid = 1; |
@@ -1639,16 +1663,16 @@ nfsd4_exchange_id(struct svc_rqst *rqstp, | |||
1639 | if (exid->flags & ~EXCHGID4_FLAG_MASK_A) | 1663 | if (exid->flags & ~EXCHGID4_FLAG_MASK_A) |
1640 | return nfserr_inval; | 1664 | return nfserr_inval; |
1641 | 1665 | ||
1642 | /* Currently only support SP4_NONE */ | ||
1643 | switch (exid->spa_how) { | 1666 | switch (exid->spa_how) { |
1667 | case SP4_MACH_CRED: | ||
1668 | if (!svc_rqst_integrity_protected(rqstp)) | ||
1669 | return nfserr_inval; | ||
1644 | case SP4_NONE: | 1670 | case SP4_NONE: |
1645 | break; | 1671 | break; |
1646 | default: /* checked by xdr code */ | 1672 | default: /* checked by xdr code */ |
1647 | WARN_ON_ONCE(1); | 1673 | WARN_ON_ONCE(1); |
1648 | case SP4_SSV: | 1674 | case SP4_SSV: |
1649 | return nfserr_encr_alg_unsupp; | 1675 | return nfserr_encr_alg_unsupp; |
1650 | case SP4_MACH_CRED: | ||
1651 | return nfserr_serverfault; /* no excuse :-/ */ | ||
1652 | } | 1676 | } |
1653 | 1677 | ||
1654 | /* Cases below refer to rfc 5661 section 18.35.4: */ | 1678 | /* Cases below refer to rfc 5661 section 18.35.4: */ |
@@ -1663,6 +1687,10 @@ nfsd4_exchange_id(struct svc_rqst *rqstp, | |||
1663 | status = nfserr_inval; | 1687 | status = nfserr_inval; |
1664 | goto out; | 1688 | goto out; |
1665 | } | 1689 | } |
1690 | if (!mach_creds_match(conf, rqstp)) { | ||
1691 | status = nfserr_wrong_cred; | ||
1692 | goto out; | ||
1693 | } | ||
1666 | if (!creds_match) { /* case 9 */ | 1694 | if (!creds_match) { /* case 9 */ |
1667 | status = nfserr_perm; | 1695 | status = nfserr_perm; |
1668 | goto out; | 1696 | goto out; |
@@ -1709,7 +1737,8 @@ out_new: | |||
1709 | status = nfserr_jukebox; | 1737 | status = nfserr_jukebox; |
1710 | goto out; | 1738 | goto out; |
1711 | } | 1739 | } |
1712 | new->cl_minorversion = 1; | 1740 | new->cl_minorversion = cstate->minorversion; |
1741 | new->cl_mach_cred = (exid->spa_how == SP4_MACH_CRED); | ||
1713 | 1742 | ||
1714 | gen_clid(new, nn); | 1743 | gen_clid(new, nn); |
1715 | add_to_unconfirmed(new); | 1744 | add_to_unconfirmed(new); |
@@ -1839,6 +1868,24 @@ static __be32 check_backchannel_attrs(struct nfsd4_channel_attrs *ca) | |||
1839 | return nfs_ok; | 1868 | return nfs_ok; |
1840 | } | 1869 | } |
1841 | 1870 | ||
1871 | static __be32 nfsd4_check_cb_sec(struct nfsd4_cb_sec *cbs) | ||
1872 | { | ||
1873 | switch (cbs->flavor) { | ||
1874 | case RPC_AUTH_NULL: | ||
1875 | case RPC_AUTH_UNIX: | ||
1876 | return nfs_ok; | ||
1877 | default: | ||
1878 | /* | ||
1879 | * GSS case: the spec doesn't allow us to return this | ||
1880 | * error. But it also doesn't allow us not to support | ||
1881 | * GSS. | ||
1882 | * I'd rather this fail hard than return some error the | ||
1883 | * client might think it can already handle: | ||
1884 | */ | ||
1885 | return nfserr_encr_alg_unsupp; | ||
1886 | } | ||
1887 | } | ||
1888 | |||
1842 | __be32 | 1889 | __be32 |
1843 | nfsd4_create_session(struct svc_rqst *rqstp, | 1890 | nfsd4_create_session(struct svc_rqst *rqstp, |
1844 | struct nfsd4_compound_state *cstate, | 1891 | struct nfsd4_compound_state *cstate, |
@@ -1854,6 +1901,9 @@ nfsd4_create_session(struct svc_rqst *rqstp, | |||
1854 | 1901 | ||
1855 | if (cr_ses->flags & ~SESSION4_FLAG_MASK_A) | 1902 | if (cr_ses->flags & ~SESSION4_FLAG_MASK_A) |
1856 | return nfserr_inval; | 1903 | return nfserr_inval; |
1904 | status = nfsd4_check_cb_sec(&cr_ses->cb_sec); | ||
1905 | if (status) | ||
1906 | return status; | ||
1857 | status = check_forechannel_attrs(&cr_ses->fore_channel, nn); | 1907 | status = check_forechannel_attrs(&cr_ses->fore_channel, nn); |
1858 | if (status) | 1908 | if (status) |
1859 | return status; | 1909 | return status; |
@@ -1874,6 +1924,9 @@ nfsd4_create_session(struct svc_rqst *rqstp, | |||
1874 | WARN_ON_ONCE(conf && unconf); | 1924 | WARN_ON_ONCE(conf && unconf); |
1875 | 1925 | ||
1876 | if (conf) { | 1926 | if (conf) { |
1927 | status = nfserr_wrong_cred; | ||
1928 | if (!mach_creds_match(conf, rqstp)) | ||
1929 | goto out_free_conn; | ||
1877 | cs_slot = &conf->cl_cs_slot; | 1930 | cs_slot = &conf->cl_cs_slot; |
1878 | status = check_slot_seqid(cr_ses->seqid, cs_slot->sl_seqid, 0); | 1931 | status = check_slot_seqid(cr_ses->seqid, cs_slot->sl_seqid, 0); |
1879 | if (status == nfserr_replay_cache) { | 1932 | if (status == nfserr_replay_cache) { |
@@ -1890,6 +1943,9 @@ nfsd4_create_session(struct svc_rqst *rqstp, | |||
1890 | status = nfserr_clid_inuse; | 1943 | status = nfserr_clid_inuse; |
1891 | goto out_free_conn; | 1944 | goto out_free_conn; |
1892 | } | 1945 | } |
1946 | status = nfserr_wrong_cred; | ||
1947 | if (!mach_creds_match(unconf, rqstp)) | ||
1948 | goto out_free_conn; | ||
1893 | cs_slot = &unconf->cl_cs_slot; | 1949 | cs_slot = &unconf->cl_cs_slot; |
1894 | status = check_slot_seqid(cr_ses->seqid, cs_slot->sl_seqid, 0); | 1950 | status = check_slot_seqid(cr_ses->seqid, cs_slot->sl_seqid, 0); |
1895 | if (status) { | 1951 | if (status) { |
@@ -1957,7 +2013,11 @@ __be32 nfsd4_backchannel_ctl(struct svc_rqst *rqstp, struct nfsd4_compound_state | |||
1957 | { | 2013 | { |
1958 | struct nfsd4_session *session = cstate->session; | 2014 | struct nfsd4_session *session = cstate->session; |
1959 | struct nfsd_net *nn = net_generic(SVC_NET(rqstp), nfsd_net_id); | 2015 | struct nfsd_net *nn = net_generic(SVC_NET(rqstp), nfsd_net_id); |
2016 | __be32 status; | ||
1960 | 2017 | ||
2018 | status = nfsd4_check_cb_sec(&bc->bc_cb_sec); | ||
2019 | if (status) | ||
2020 | return status; | ||
1961 | spin_lock(&nn->client_lock); | 2021 | spin_lock(&nn->client_lock); |
1962 | session->se_cb_prog = bc->bc_cb_program; | 2022 | session->se_cb_prog = bc->bc_cb_program; |
1963 | session->se_cb_sec = bc->bc_cb_sec; | 2023 | session->se_cb_sec = bc->bc_cb_sec; |
@@ -1986,6 +2046,9 @@ __be32 nfsd4_bind_conn_to_session(struct svc_rqst *rqstp, | |||
1986 | status = nfserr_badsession; | 2046 | status = nfserr_badsession; |
1987 | if (!session) | 2047 | if (!session) |
1988 | goto out; | 2048 | goto out; |
2049 | status = nfserr_wrong_cred; | ||
2050 | if (!mach_creds_match(session->se_client, rqstp)) | ||
2051 | goto out; | ||
1989 | status = nfsd4_map_bcts_dir(&bcts->dir); | 2052 | status = nfsd4_map_bcts_dir(&bcts->dir); |
1990 | if (status) | 2053 | if (status) |
1991 | goto out; | 2054 | goto out; |
@@ -2014,6 +2077,7 @@ nfsd4_destroy_session(struct svc_rqst *r, | |||
2014 | { | 2077 | { |
2015 | struct nfsd4_session *ses; | 2078 | struct nfsd4_session *ses; |
2016 | __be32 status; | 2079 | __be32 status; |
2080 | int ref_held_by_me = 0; | ||
2017 | struct nfsd_net *nn = net_generic(SVC_NET(r), nfsd_net_id); | 2081 | struct nfsd_net *nn = net_generic(SVC_NET(r), nfsd_net_id); |
2018 | 2082 | ||
2019 | nfs4_lock_state(); | 2083 | nfs4_lock_state(); |
@@ -2021,6 +2085,7 @@ nfsd4_destroy_session(struct svc_rqst *r, | |||
2021 | if (nfsd4_compound_in_session(cstate->session, &sessionid->sessionid)) { | 2085 | if (nfsd4_compound_in_session(cstate->session, &sessionid->sessionid)) { |
2022 | if (!nfsd4_last_compound_op(r)) | 2086 | if (!nfsd4_last_compound_op(r)) |
2023 | goto out; | 2087 | goto out; |
2088 | ref_held_by_me++; | ||
2024 | } | 2089 | } |
2025 | dump_sessionid(__func__, &sessionid->sessionid); | 2090 | dump_sessionid(__func__, &sessionid->sessionid); |
2026 | spin_lock(&nn->client_lock); | 2091 | spin_lock(&nn->client_lock); |
@@ -2028,17 +2093,22 @@ nfsd4_destroy_session(struct svc_rqst *r, | |||
2028 | status = nfserr_badsession; | 2093 | status = nfserr_badsession; |
2029 | if (!ses) | 2094 | if (!ses) |
2030 | goto out_client_lock; | 2095 | goto out_client_lock; |
2031 | status = mark_session_dead_locked(ses); | 2096 | status = nfserr_wrong_cred; |
2032 | if (status) | 2097 | if (!mach_creds_match(ses->se_client, r)) |
2033 | goto out_client_lock; | 2098 | goto out_client_lock; |
2099 | nfsd4_get_session_locked(ses); | ||
2100 | status = mark_session_dead_locked(ses, 1 + ref_held_by_me); | ||
2101 | if (status) | ||
2102 | goto out_put_session; | ||
2034 | unhash_session(ses); | 2103 | unhash_session(ses); |
2035 | spin_unlock(&nn->client_lock); | 2104 | spin_unlock(&nn->client_lock); |
2036 | 2105 | ||
2037 | nfsd4_probe_callback_sync(ses->se_client); | 2106 | nfsd4_probe_callback_sync(ses->se_client); |
2038 | 2107 | ||
2039 | spin_lock(&nn->client_lock); | 2108 | spin_lock(&nn->client_lock); |
2040 | free_session(ses); | ||
2041 | status = nfs_ok; | 2109 | status = nfs_ok; |
2110 | out_put_session: | ||
2111 | nfsd4_put_session(ses); | ||
2042 | out_client_lock: | 2112 | out_client_lock: |
2043 | spin_unlock(&nn->client_lock); | 2113 | spin_unlock(&nn->client_lock); |
2044 | out: | 2114 | out: |
@@ -2058,26 +2128,31 @@ static struct nfsd4_conn *__nfsd4_find_conn(struct svc_xprt *xpt, struct nfsd4_s | |||
2058 | return NULL; | 2128 | return NULL; |
2059 | } | 2129 | } |
2060 | 2130 | ||
2061 | static void nfsd4_sequence_check_conn(struct nfsd4_conn *new, struct nfsd4_session *ses) | 2131 | static __be32 nfsd4_sequence_check_conn(struct nfsd4_conn *new, struct nfsd4_session *ses) |
2062 | { | 2132 | { |
2063 | struct nfs4_client *clp = ses->se_client; | 2133 | struct nfs4_client *clp = ses->se_client; |
2064 | struct nfsd4_conn *c; | 2134 | struct nfsd4_conn *c; |
2135 | __be32 status = nfs_ok; | ||
2065 | int ret; | 2136 | int ret; |
2066 | 2137 | ||
2067 | spin_lock(&clp->cl_lock); | 2138 | spin_lock(&clp->cl_lock); |
2068 | c = __nfsd4_find_conn(new->cn_xprt, ses); | 2139 | c = __nfsd4_find_conn(new->cn_xprt, ses); |
2069 | if (c) { | 2140 | if (c) |
2070 | spin_unlock(&clp->cl_lock); | 2141 | goto out_free; |
2071 | free_conn(new); | 2142 | status = nfserr_conn_not_bound_to_session; |
2072 | return; | 2143 | if (clp->cl_mach_cred) |
2073 | } | 2144 | goto out_free; |
2074 | __nfsd4_hash_conn(new, ses); | 2145 | __nfsd4_hash_conn(new, ses); |
2075 | spin_unlock(&clp->cl_lock); | 2146 | spin_unlock(&clp->cl_lock); |
2076 | ret = nfsd4_register_conn(new); | 2147 | ret = nfsd4_register_conn(new); |
2077 | if (ret) | 2148 | if (ret) |
2078 | /* oops; xprt is already down: */ | 2149 | /* oops; xprt is already down: */ |
2079 | nfsd4_conn_lost(&new->cn_xpt_user); | 2150 | nfsd4_conn_lost(&new->cn_xpt_user); |
2080 | return; | 2151 | return nfs_ok; |
2152 | out_free: | ||
2153 | spin_unlock(&clp->cl_lock); | ||
2154 | free_conn(new); | ||
2155 | return status; | ||
2081 | } | 2156 | } |
2082 | 2157 | ||
2083 | static bool nfsd4_session_too_many_ops(struct svc_rqst *rqstp, struct nfsd4_session *session) | 2158 | static bool nfsd4_session_too_many_ops(struct svc_rqst *rqstp, struct nfsd4_session *session) |
@@ -2169,8 +2244,10 @@ nfsd4_sequence(struct svc_rqst *rqstp, | |||
2169 | if (status) | 2244 | if (status) |
2170 | goto out_put_session; | 2245 | goto out_put_session; |
2171 | 2246 | ||
2172 | nfsd4_sequence_check_conn(conn, session); | 2247 | status = nfsd4_sequence_check_conn(conn, session); |
2173 | conn = NULL; | 2248 | conn = NULL; |
2249 | if (status) | ||
2250 | goto out_put_session; | ||
2174 | 2251 | ||
2175 | /* Success! bump slot seqid */ | 2252 | /* Success! bump slot seqid */ |
2176 | slot->sl_seqid = seq->seqid; | 2253 | slot->sl_seqid = seq->seqid; |
@@ -2232,7 +2309,10 @@ nfsd4_destroy_clientid(struct svc_rqst *rqstp, struct nfsd4_compound_state *csta | |||
2232 | status = nfserr_stale_clientid; | 2309 | status = nfserr_stale_clientid; |
2233 | goto out; | 2310 | goto out; |
2234 | } | 2311 | } |
2235 | 2312 | if (!mach_creds_match(clp, rqstp)) { | |
2313 | status = nfserr_wrong_cred; | ||
2314 | goto out; | ||
2315 | } | ||
2236 | expire_client(clp); | 2316 | expire_client(clp); |
2237 | out: | 2317 | out: |
2238 | nfs4_unlock_state(); | 2318 | nfs4_unlock_state(); |
@@ -2645,13 +2725,13 @@ static void nfsd_break_one_deleg(struct nfs4_delegation *dp) | |||
2645 | 2725 | ||
2646 | list_add_tail(&dp->dl_recall_lru, &nn->del_recall_lru); | 2726 | list_add_tail(&dp->dl_recall_lru, &nn->del_recall_lru); |
2647 | 2727 | ||
2648 | /* only place dl_time is set. protected by lock_flocks*/ | 2728 | /* Only place dl_time is set; protected by i_lock: */ |
2649 | dp->dl_time = get_seconds(); | 2729 | dp->dl_time = get_seconds(); |
2650 | 2730 | ||
2651 | nfsd4_cb_recall(dp); | 2731 | nfsd4_cb_recall(dp); |
2652 | } | 2732 | } |
2653 | 2733 | ||
2654 | /* Called from break_lease() with lock_flocks() held. */ | 2734 | /* Called from break_lease() with i_lock held. */ |
2655 | static void nfsd_break_deleg_cb(struct file_lock *fl) | 2735 | static void nfsd_break_deleg_cb(struct file_lock *fl) |
2656 | { | 2736 | { |
2657 | struct nfs4_file *fp = (struct nfs4_file *)fl->fl_owner; | 2737 | struct nfs4_file *fp = (struct nfs4_file *)fl->fl_owner; |
@@ -2940,13 +3020,13 @@ static struct file_lock *nfs4_alloc_init_lease(struct nfs4_delegation *dp, int f | |||
2940 | return fl; | 3020 | return fl; |
2941 | } | 3021 | } |
2942 | 3022 | ||
2943 | static int nfs4_setlease(struct nfs4_delegation *dp, int flag) | 3023 | static int nfs4_setlease(struct nfs4_delegation *dp) |
2944 | { | 3024 | { |
2945 | struct nfs4_file *fp = dp->dl_file; | 3025 | struct nfs4_file *fp = dp->dl_file; |
2946 | struct file_lock *fl; | 3026 | struct file_lock *fl; |
2947 | int status; | 3027 | int status; |
2948 | 3028 | ||
2949 | fl = nfs4_alloc_init_lease(dp, flag); | 3029 | fl = nfs4_alloc_init_lease(dp, NFS4_OPEN_DELEGATE_READ); |
2950 | if (!fl) | 3030 | if (!fl) |
2951 | return -ENOMEM; | 3031 | return -ENOMEM; |
2952 | fl->fl_file = find_readable_file(fp); | 3032 | fl->fl_file = find_readable_file(fp); |
@@ -2964,12 +3044,12 @@ static int nfs4_setlease(struct nfs4_delegation *dp, int flag) | |||
2964 | return 0; | 3044 | return 0; |
2965 | } | 3045 | } |
2966 | 3046 | ||
2967 | static int nfs4_set_delegation(struct nfs4_delegation *dp, int flag) | 3047 | static int nfs4_set_delegation(struct nfs4_delegation *dp) |
2968 | { | 3048 | { |
2969 | struct nfs4_file *fp = dp->dl_file; | 3049 | struct nfs4_file *fp = dp->dl_file; |
2970 | 3050 | ||
2971 | if (!fp->fi_lease) | 3051 | if (!fp->fi_lease) |
2972 | return nfs4_setlease(dp, flag); | 3052 | return nfs4_setlease(dp); |
2973 | spin_lock(&recall_lock); | 3053 | spin_lock(&recall_lock); |
2974 | if (fp->fi_had_conflict) { | 3054 | if (fp->fi_had_conflict) { |
2975 | spin_unlock(&recall_lock); | 3055 | spin_unlock(&recall_lock); |
@@ -3005,6 +3085,9 @@ static void nfsd4_open_deleg_none_ext(struct nfsd4_open *open, int status) | |||
3005 | 3085 | ||
3006 | /* | 3086 | /* |
3007 | * Attempt to hand out a delegation. | 3087 | * Attempt to hand out a delegation. |
3088 | * | ||
3089 | * Note we don't support write delegations, and won't until the vfs has | ||
3090 | * proper support for them. | ||
3008 | */ | 3091 | */ |
3009 | static void | 3092 | static void |
3010 | nfs4_open_delegation(struct net *net, struct svc_fh *fh, | 3093 | nfs4_open_delegation(struct net *net, struct svc_fh *fh, |
@@ -3013,39 +3096,45 @@ nfs4_open_delegation(struct net *net, struct svc_fh *fh, | |||
3013 | struct nfs4_delegation *dp; | 3096 | struct nfs4_delegation *dp; |
3014 | struct nfs4_openowner *oo = container_of(stp->st_stateowner, struct nfs4_openowner, oo_owner); | 3097 | struct nfs4_openowner *oo = container_of(stp->st_stateowner, struct nfs4_openowner, oo_owner); |
3015 | int cb_up; | 3098 | int cb_up; |
3016 | int status = 0, flag = 0; | 3099 | int status = 0; |
3017 | 3100 | ||
3018 | cb_up = nfsd4_cb_channel_good(oo->oo_owner.so_client); | 3101 | cb_up = nfsd4_cb_channel_good(oo->oo_owner.so_client); |
3019 | flag = NFS4_OPEN_DELEGATE_NONE; | ||
3020 | open->op_recall = 0; | 3102 | open->op_recall = 0; |
3021 | switch (open->op_claim_type) { | 3103 | switch (open->op_claim_type) { |
3022 | case NFS4_OPEN_CLAIM_PREVIOUS: | 3104 | case NFS4_OPEN_CLAIM_PREVIOUS: |
3023 | if (!cb_up) | 3105 | if (!cb_up) |
3024 | open->op_recall = 1; | 3106 | open->op_recall = 1; |
3025 | flag = open->op_delegate_type; | 3107 | if (open->op_delegate_type != NFS4_OPEN_DELEGATE_READ) |
3026 | if (flag == NFS4_OPEN_DELEGATE_NONE) | 3108 | goto out_no_deleg; |
3027 | goto out; | ||
3028 | break; | 3109 | break; |
3029 | case NFS4_OPEN_CLAIM_NULL: | 3110 | case NFS4_OPEN_CLAIM_NULL: |
3030 | /* Let's not give out any delegations till everyone's | 3111 | /* |
3031 | * had the chance to reclaim theirs.... */ | 3112 | * Let's not give out any delegations till everyone's |
3113 | * had the chance to reclaim theirs.... | ||
3114 | */ | ||
3032 | if (locks_in_grace(net)) | 3115 | if (locks_in_grace(net)) |
3033 | goto out; | 3116 | goto out_no_deleg; |
3034 | if (!cb_up || !(oo->oo_flags & NFS4_OO_CONFIRMED)) | 3117 | if (!cb_up || !(oo->oo_flags & NFS4_OO_CONFIRMED)) |
3035 | goto out; | 3118 | goto out_no_deleg; |
3119 | /* | ||
3120 | * Also, if the file was opened for write or | ||
3121 | * create, there's a good chance the client's | ||
3122 | * about to write to it, resulting in an | ||
3123 | * immediate recall (since we don't support | ||
3124 | * write delegations): | ||
3125 | */ | ||
3036 | if (open->op_share_access & NFS4_SHARE_ACCESS_WRITE) | 3126 | if (open->op_share_access & NFS4_SHARE_ACCESS_WRITE) |
3037 | flag = NFS4_OPEN_DELEGATE_WRITE; | 3127 | goto out_no_deleg; |
3038 | else | 3128 | if (open->op_create == NFS4_OPEN_CREATE) |
3039 | flag = NFS4_OPEN_DELEGATE_READ; | 3129 | goto out_no_deleg; |
3040 | break; | 3130 | break; |
3041 | default: | 3131 | default: |
3042 | goto out; | 3132 | goto out_no_deleg; |
3043 | } | 3133 | } |
3044 | 3134 | dp = alloc_init_deleg(oo->oo_owner.so_client, stp, fh); | |
3045 | dp = alloc_init_deleg(oo->oo_owner.so_client, stp, fh, flag); | ||
3046 | if (dp == NULL) | 3135 | if (dp == NULL) |
3047 | goto out_no_deleg; | 3136 | goto out_no_deleg; |
3048 | status = nfs4_set_delegation(dp, flag); | 3137 | status = nfs4_set_delegation(dp); |
3049 | if (status) | 3138 | if (status) |
3050 | goto out_free; | 3139 | goto out_free; |
3051 | 3140 | ||
@@ -3053,24 +3142,23 @@ nfs4_open_delegation(struct net *net, struct svc_fh *fh, | |||
3053 | 3142 | ||
3054 | dprintk("NFSD: delegation stateid=" STATEID_FMT "\n", | 3143 | dprintk("NFSD: delegation stateid=" STATEID_FMT "\n", |
3055 | STATEID_VAL(&dp->dl_stid.sc_stateid)); | 3144 | STATEID_VAL(&dp->dl_stid.sc_stateid)); |
3056 | out: | 3145 | open->op_delegate_type = NFS4_OPEN_DELEGATE_READ; |
3057 | open->op_delegate_type = flag; | ||
3058 | if (flag == NFS4_OPEN_DELEGATE_NONE) { | ||
3059 | if (open->op_claim_type == NFS4_OPEN_CLAIM_PREVIOUS && | ||
3060 | open->op_delegate_type != NFS4_OPEN_DELEGATE_NONE) | ||
3061 | dprintk("NFSD: WARNING: refusing delegation reclaim\n"); | ||
3062 | |||
3063 | /* 4.1 client asking for a delegation? */ | ||
3064 | if (open->op_deleg_want) | ||
3065 | nfsd4_open_deleg_none_ext(open, status); | ||
3066 | } | ||
3067 | return; | 3146 | return; |
3068 | out_free: | 3147 | out_free: |
3069 | unhash_stid(&dp->dl_stid); | 3148 | unhash_stid(&dp->dl_stid); |
3070 | nfs4_put_delegation(dp); | 3149 | nfs4_put_delegation(dp); |
3071 | out_no_deleg: | 3150 | out_no_deleg: |
3072 | flag = NFS4_OPEN_DELEGATE_NONE; | 3151 | open->op_delegate_type = NFS4_OPEN_DELEGATE_NONE; |
3073 | goto out; | 3152 | if (open->op_claim_type == NFS4_OPEN_CLAIM_PREVIOUS && |
3153 | open->op_delegate_type != NFS4_OPEN_DELEGATE_NONE) { | ||
3154 | dprintk("NFSD: WARNING: refusing delegation reclaim\n"); | ||
3155 | open->op_recall = 1; | ||
3156 | } | ||
3157 | |||
3158 | /* 4.1 client asking for a delegation? */ | ||
3159 | if (open->op_deleg_want) | ||
3160 | nfsd4_open_deleg_none_ext(open, status); | ||
3161 | return; | ||
3074 | } | 3162 | } |
3075 | 3163 | ||
3076 | static void nfsd4_deleg_xgrade_none_ext(struct nfsd4_open *open, | 3164 | static void nfsd4_deleg_xgrade_none_ext(struct nfsd4_open *open, |
@@ -3427,7 +3515,7 @@ grace_disallows_io(struct net *net, struct inode *inode) | |||
3427 | /* Returns true iff a is later than b: */ | 3515 | /* Returns true iff a is later than b: */ |
3428 | static bool stateid_generation_after(stateid_t *a, stateid_t *b) | 3516 | static bool stateid_generation_after(stateid_t *a, stateid_t *b) |
3429 | { | 3517 | { |
3430 | return (s32)a->si_generation - (s32)b->si_generation > 0; | 3518 | return (s32)(a->si_generation - b->si_generation) > 0; |
3431 | } | 3519 | } |
3432 | 3520 | ||
3433 | static __be32 check_stateid_generation(stateid_t *in, stateid_t *ref, bool has_session) | 3521 | static __be32 check_stateid_generation(stateid_t *in, stateid_t *ref, bool has_session) |
@@ -4435,7 +4523,6 @@ __be32 | |||
4435 | nfsd4_locku(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate, | 4523 | nfsd4_locku(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate, |
4436 | struct nfsd4_locku *locku) | 4524 | struct nfsd4_locku *locku) |
4437 | { | 4525 | { |
4438 | struct nfs4_lockowner *lo; | ||
4439 | struct nfs4_ol_stateid *stp; | 4526 | struct nfs4_ol_stateid *stp; |
4440 | struct file *filp = NULL; | 4527 | struct file *filp = NULL; |
4441 | struct file_lock *file_lock = NULL; | 4528 | struct file_lock *file_lock = NULL; |
@@ -4468,10 +4555,9 @@ nfsd4_locku(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate, | |||
4468 | status = nfserr_jukebox; | 4555 | status = nfserr_jukebox; |
4469 | goto out; | 4556 | goto out; |
4470 | } | 4557 | } |
4471 | lo = lockowner(stp->st_stateowner); | ||
4472 | locks_init_lock(file_lock); | 4558 | locks_init_lock(file_lock); |
4473 | file_lock->fl_type = F_UNLCK; | 4559 | file_lock->fl_type = F_UNLCK; |
4474 | file_lock->fl_owner = (fl_owner_t)lo; | 4560 | file_lock->fl_owner = (fl_owner_t)lockowner(stp->st_stateowner); |
4475 | file_lock->fl_pid = current->tgid; | 4561 | file_lock->fl_pid = current->tgid; |
4476 | file_lock->fl_file = filp; | 4562 | file_lock->fl_file = filp; |
4477 | file_lock->fl_flags = FL_POSIX; | 4563 | file_lock->fl_flags = FL_POSIX; |
@@ -4490,11 +4576,6 @@ nfsd4_locku(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate, | |||
4490 | update_stateid(&stp->st_stid.sc_stateid); | 4576 | update_stateid(&stp->st_stid.sc_stateid); |
4491 | memcpy(&locku->lu_stateid, &stp->st_stid.sc_stateid, sizeof(stateid_t)); | 4577 | memcpy(&locku->lu_stateid, &stp->st_stid.sc_stateid, sizeof(stateid_t)); |
4492 | 4578 | ||
4493 | if (nfsd4_has_session(cstate) && !check_for_locks(stp->st_file, lo)) { | ||
4494 | WARN_ON_ONCE(cstate->replay_owner); | ||
4495 | release_lockowner(lo); | ||
4496 | } | ||
4497 | |||
4498 | out: | 4579 | out: |
4499 | nfsd4_bump_seqid(cstate, status); | 4580 | nfsd4_bump_seqid(cstate, status); |
4500 | if (!cstate->replay_owner) | 4581 | if (!cstate->replay_owner) |
@@ -4520,7 +4601,7 @@ check_for_locks(struct nfs4_file *filp, struct nfs4_lockowner *lowner) | |||
4520 | struct inode *inode = filp->fi_inode; | 4601 | struct inode *inode = filp->fi_inode; |
4521 | int status = 0; | 4602 | int status = 0; |
4522 | 4603 | ||
4523 | lock_flocks(); | 4604 | spin_lock(&inode->i_lock); |
4524 | for (flpp = &inode->i_flock; *flpp != NULL; flpp = &(*flpp)->fl_next) { | 4605 | for (flpp = &inode->i_flock; *flpp != NULL; flpp = &(*flpp)->fl_next) { |
4525 | if ((*flpp)->fl_owner == (fl_owner_t)lowner) { | 4606 | if ((*flpp)->fl_owner == (fl_owner_t)lowner) { |
4526 | status = 1; | 4607 | status = 1; |
@@ -4528,7 +4609,7 @@ check_for_locks(struct nfs4_file *filp, struct nfs4_lockowner *lowner) | |||
4528 | } | 4609 | } |
4529 | } | 4610 | } |
4530 | out: | 4611 | out: |
4531 | unlock_flocks(); | 4612 | spin_unlock(&inode->i_lock); |
4532 | return status; | 4613 | return status; |
4533 | } | 4614 | } |
4534 | 4615 | ||
diff --git a/fs/nfsd/nfs4xdr.c b/fs/nfsd/nfs4xdr.c index 6cd86e0fe450..c2a4701d7286 100644 --- a/fs/nfsd/nfs4xdr.c +++ b/fs/nfsd/nfs4xdr.c | |||
@@ -55,6 +55,11 @@ | |||
55 | #include "cache.h" | 55 | #include "cache.h" |
56 | #include "netns.h" | 56 | #include "netns.h" |
57 | 57 | ||
58 | #ifdef CONFIG_NFSD_V4_SECURITY_LABEL | ||
59 | #include <linux/security.h> | ||
60 | #endif | ||
61 | |||
62 | |||
58 | #define NFSDDBG_FACILITY NFSDDBG_XDR | 63 | #define NFSDDBG_FACILITY NFSDDBG_XDR |
59 | 64 | ||
60 | /* | 65 | /* |
@@ -134,6 +139,19 @@ xdr_error: \ | |||
134 | } \ | 139 | } \ |
135 | } while (0) | 140 | } while (0) |
136 | 141 | ||
142 | static void next_decode_page(struct nfsd4_compoundargs *argp) | ||
143 | { | ||
144 | argp->pagelist++; | ||
145 | argp->p = page_address(argp->pagelist[0]); | ||
146 | if (argp->pagelen < PAGE_SIZE) { | ||
147 | argp->end = argp->p + (argp->pagelen>>2); | ||
148 | argp->pagelen = 0; | ||
149 | } else { | ||
150 | argp->end = argp->p + (PAGE_SIZE>>2); | ||
151 | argp->pagelen -= PAGE_SIZE; | ||
152 | } | ||
153 | } | ||
154 | |||
137 | static __be32 *read_buf(struct nfsd4_compoundargs *argp, u32 nbytes) | 155 | static __be32 *read_buf(struct nfsd4_compoundargs *argp, u32 nbytes) |
138 | { | 156 | { |
139 | /* We want more bytes than seem to be available. | 157 | /* We want more bytes than seem to be available. |
@@ -161,16 +179,7 @@ static __be32 *read_buf(struct nfsd4_compoundargs *argp, u32 nbytes) | |||
161 | * guarantee p points to at least nbytes bytes. | 179 | * guarantee p points to at least nbytes bytes. |
162 | */ | 180 | */ |
163 | memcpy(p, argp->p, avail); | 181 | memcpy(p, argp->p, avail); |
164 | /* step to next page */ | 182 | next_decode_page(argp); |
165 | argp->p = page_address(argp->pagelist[0]); | ||
166 | argp->pagelist++; | ||
167 | if (argp->pagelen < PAGE_SIZE) { | ||
168 | argp->end = argp->p + (argp->pagelen>>2); | ||
169 | argp->pagelen = 0; | ||
170 | } else { | ||
171 | argp->end = argp->p + (PAGE_SIZE>>2); | ||
172 | argp->pagelen -= PAGE_SIZE; | ||
173 | } | ||
174 | memcpy(((char*)p)+avail, argp->p, (nbytes - avail)); | 183 | memcpy(((char*)p)+avail, argp->p, (nbytes - avail)); |
175 | argp->p += XDR_QUADLEN(nbytes - avail); | 184 | argp->p += XDR_QUADLEN(nbytes - avail); |
176 | return p; | 185 | return p; |
@@ -242,7 +251,8 @@ nfsd4_decode_bitmap(struct nfsd4_compoundargs *argp, u32 *bmval) | |||
242 | 251 | ||
243 | static __be32 | 252 | static __be32 |
244 | nfsd4_decode_fattr(struct nfsd4_compoundargs *argp, u32 *bmval, | 253 | nfsd4_decode_fattr(struct nfsd4_compoundargs *argp, u32 *bmval, |
245 | struct iattr *iattr, struct nfs4_acl **acl) | 254 | struct iattr *iattr, struct nfs4_acl **acl, |
255 | struct xdr_netobj *label) | ||
246 | { | 256 | { |
247 | int expected_len, len = 0; | 257 | int expected_len, len = 0; |
248 | u32 dummy32; | 258 | u32 dummy32; |
@@ -380,6 +390,32 @@ nfsd4_decode_fattr(struct nfsd4_compoundargs *argp, u32 *bmval, | |||
380 | goto xdr_error; | 390 | goto xdr_error; |
381 | } | 391 | } |
382 | } | 392 | } |
393 | |||
394 | label->len = 0; | ||
395 | #ifdef CONFIG_NFSD_V4_SECURITY_LABEL | ||
396 | if (bmval[2] & FATTR4_WORD2_SECURITY_LABEL) { | ||
397 | READ_BUF(4); | ||
398 | len += 4; | ||
399 | READ32(dummy32); /* lfs: we don't use it */ | ||
400 | READ_BUF(4); | ||
401 | len += 4; | ||
402 | READ32(dummy32); /* pi: we don't use it either */ | ||
403 | READ_BUF(4); | ||
404 | len += 4; | ||
405 | READ32(dummy32); | ||
406 | READ_BUF(dummy32); | ||
407 | if (dummy32 > NFSD4_MAX_SEC_LABEL_LEN) | ||
408 | return nfserr_badlabel; | ||
409 | len += (XDR_QUADLEN(dummy32) << 2); | ||
410 | READMEM(buf, dummy32); | ||
411 | label->data = kzalloc(dummy32 + 1, GFP_KERNEL); | ||
412 | if (!label->data) | ||
413 | return nfserr_jukebox; | ||
414 | defer_free(argp, kfree, label->data); | ||
415 | memcpy(label->data, buf, dummy32); | ||
416 | } | ||
417 | #endif | ||
418 | |||
383 | if (bmval[0] & ~NFSD_WRITEABLE_ATTRS_WORD0 | 419 | if (bmval[0] & ~NFSD_WRITEABLE_ATTRS_WORD0 |
384 | || bmval[1] & ~NFSD_WRITEABLE_ATTRS_WORD1 | 420 | || bmval[1] & ~NFSD_WRITEABLE_ATTRS_WORD1 |
385 | || bmval[2] & ~NFSD_WRITEABLE_ATTRS_WORD2) | 421 | || bmval[2] & ~NFSD_WRITEABLE_ATTRS_WORD2) |
@@ -428,7 +464,11 @@ static __be32 nfsd4_decode_cb_sec(struct nfsd4_compoundargs *argp, struct nfsd4_ | |||
428 | /* callback_sec_params4 */ | 464 | /* callback_sec_params4 */ |
429 | READ_BUF(4); | 465 | READ_BUF(4); |
430 | READ32(nr_secflavs); | 466 | READ32(nr_secflavs); |
431 | cbs->flavor = (u32)(-1); | 467 | if (nr_secflavs) |
468 | cbs->flavor = (u32)(-1); | ||
469 | else | ||
470 | /* Is this legal? Be generous, take it to mean AUTH_NONE: */ | ||
471 | cbs->flavor = 0; | ||
432 | for (i = 0; i < nr_secflavs; ++i) { | 472 | for (i = 0; i < nr_secflavs; ++i) { |
433 | READ_BUF(4); | 473 | READ_BUF(4); |
434 | READ32(dummy); | 474 | READ32(dummy); |
@@ -576,7 +616,7 @@ nfsd4_decode_create(struct nfsd4_compoundargs *argp, struct nfsd4_create *create | |||
576 | return status; | 616 | return status; |
577 | 617 | ||
578 | status = nfsd4_decode_fattr(argp, create->cr_bmval, &create->cr_iattr, | 618 | status = nfsd4_decode_fattr(argp, create->cr_bmval, &create->cr_iattr, |
579 | &create->cr_acl); | 619 | &create->cr_acl, &create->cr_label); |
580 | if (status) | 620 | if (status) |
581 | goto out; | 621 | goto out; |
582 | 622 | ||
@@ -827,7 +867,7 @@ nfsd4_decode_open(struct nfsd4_compoundargs *argp, struct nfsd4_open *open) | |||
827 | case NFS4_CREATE_UNCHECKED: | 867 | case NFS4_CREATE_UNCHECKED: |
828 | case NFS4_CREATE_GUARDED: | 868 | case NFS4_CREATE_GUARDED: |
829 | status = nfsd4_decode_fattr(argp, open->op_bmval, | 869 | status = nfsd4_decode_fattr(argp, open->op_bmval, |
830 | &open->op_iattr, &open->op_acl); | 870 | &open->op_iattr, &open->op_acl, &open->op_label); |
831 | if (status) | 871 | if (status) |
832 | goto out; | 872 | goto out; |
833 | break; | 873 | break; |
@@ -841,7 +881,7 @@ nfsd4_decode_open(struct nfsd4_compoundargs *argp, struct nfsd4_open *open) | |||
841 | READ_BUF(NFS4_VERIFIER_SIZE); | 881 | READ_BUF(NFS4_VERIFIER_SIZE); |
842 | COPYMEM(open->op_verf.data, NFS4_VERIFIER_SIZE); | 882 | COPYMEM(open->op_verf.data, NFS4_VERIFIER_SIZE); |
843 | status = nfsd4_decode_fattr(argp, open->op_bmval, | 883 | status = nfsd4_decode_fattr(argp, open->op_bmval, |
844 | &open->op_iattr, &open->op_acl); | 884 | &open->op_iattr, &open->op_acl, &open->op_label); |
845 | if (status) | 885 | if (status) |
846 | goto out; | 886 | goto out; |
847 | break; | 887 | break; |
@@ -1063,7 +1103,7 @@ nfsd4_decode_setattr(struct nfsd4_compoundargs *argp, struct nfsd4_setattr *seta | |||
1063 | if (status) | 1103 | if (status) |
1064 | return status; | 1104 | return status; |
1065 | return nfsd4_decode_fattr(argp, setattr->sa_bmval, &setattr->sa_iattr, | 1105 | return nfsd4_decode_fattr(argp, setattr->sa_bmval, &setattr->sa_iattr, |
1066 | &setattr->sa_acl); | 1106 | &setattr->sa_acl, &setattr->sa_label); |
1067 | } | 1107 | } |
1068 | 1108 | ||
1069 | static __be32 | 1109 | static __be32 |
@@ -1567,6 +1607,7 @@ struct nfsd4_minorversion_ops { | |||
1567 | static struct nfsd4_minorversion_ops nfsd4_minorversion[] = { | 1607 | static struct nfsd4_minorversion_ops nfsd4_minorversion[] = { |
1568 | [0] = { nfsd4_dec_ops, ARRAY_SIZE(nfsd4_dec_ops) }, | 1608 | [0] = { nfsd4_dec_ops, ARRAY_SIZE(nfsd4_dec_ops) }, |
1569 | [1] = { nfsd41_dec_ops, ARRAY_SIZE(nfsd41_dec_ops) }, | 1609 | [1] = { nfsd41_dec_ops, ARRAY_SIZE(nfsd41_dec_ops) }, |
1610 | [2] = { nfsd41_dec_ops, ARRAY_SIZE(nfsd41_dec_ops) }, | ||
1570 | }; | 1611 | }; |
1571 | 1612 | ||
1572 | static __be32 | 1613 | static __be32 |
@@ -1953,6 +1994,36 @@ nfsd4_encode_aclname(struct svc_rqst *rqstp, struct nfs4_ace *ace, | |||
1953 | FATTR4_WORD0_RDATTR_ERROR) | 1994 | FATTR4_WORD0_RDATTR_ERROR) |
1954 | #define WORD1_ABSENT_FS_ATTRS FATTR4_WORD1_MOUNTED_ON_FILEID | 1995 | #define WORD1_ABSENT_FS_ATTRS FATTR4_WORD1_MOUNTED_ON_FILEID |
1955 | 1996 | ||
1997 | #ifdef CONFIG_NFSD_V4_SECURITY_LABEL | ||
1998 | static inline __be32 | ||
1999 | nfsd4_encode_security_label(struct svc_rqst *rqstp, void *context, int len, __be32 **pp, int *buflen) | ||
2000 | { | ||
2001 | __be32 *p = *pp; | ||
2002 | |||
2003 | if (*buflen < ((XDR_QUADLEN(len) << 2) + 4 + 4 + 4)) | ||
2004 | return nfserr_resource; | ||
2005 | |||
2006 | /* | ||
2007 | * For now we use a 0 here to indicate the null translation; in | ||
2008 | * the future we may place a call to translation code here. | ||
2009 | */ | ||
2010 | if ((*buflen -= 8) < 0) | ||
2011 | return nfserr_resource; | ||
2012 | |||
2013 | WRITE32(0); /* lfs */ | ||
2014 | WRITE32(0); /* pi */ | ||
2015 | p = xdr_encode_opaque(p, context, len); | ||
2016 | *buflen -= (XDR_QUADLEN(len) << 2) + 4; | ||
2017 | |||
2018 | *pp = p; | ||
2019 | return 0; | ||
2020 | } | ||
2021 | #else | ||
2022 | static inline __be32 | ||
2023 | nfsd4_encode_security_label(struct svc_rqst *rqstp, void *context, int len, __be32 **pp, int *buflen) | ||
2024 | { return 0; } | ||
2025 | #endif | ||
2026 | |||
1956 | static __be32 fattr_handle_absent_fs(u32 *bmval0, u32 *bmval1, u32 *rdattr_err) | 2027 | static __be32 fattr_handle_absent_fs(u32 *bmval0, u32 *bmval1, u32 *rdattr_err) |
1957 | { | 2028 | { |
1958 | /* As per referral draft: */ | 2029 | /* As per referral draft: */ |
@@ -2012,6 +2083,9 @@ nfsd4_encode_fattr(struct svc_fh *fhp, struct svc_export *exp, | |||
2012 | int err; | 2083 | int err; |
2013 | int aclsupport = 0; | 2084 | int aclsupport = 0; |
2014 | struct nfs4_acl *acl = NULL; | 2085 | struct nfs4_acl *acl = NULL; |
2086 | void *context = NULL; | ||
2087 | int contextlen; | ||
2088 | bool contextsupport = false; | ||
2015 | struct nfsd4_compoundres *resp = rqstp->rq_resp; | 2089 | struct nfsd4_compoundres *resp = rqstp->rq_resp; |
2016 | u32 minorversion = resp->cstate.minorversion; | 2090 | u32 minorversion = resp->cstate.minorversion; |
2017 | struct path path = { | 2091 | struct path path = { |
@@ -2065,6 +2139,21 @@ nfsd4_encode_fattr(struct svc_fh *fhp, struct svc_export *exp, | |||
2065 | } | 2139 | } |
2066 | } | 2140 | } |
2067 | 2141 | ||
2142 | #ifdef CONFIG_NFSD_V4_SECURITY_LABEL | ||
2143 | if ((bmval[2] & FATTR4_WORD2_SECURITY_LABEL) || | ||
2144 | bmval[0] & FATTR4_WORD0_SUPPORTED_ATTRS) { | ||
2145 | err = security_inode_getsecctx(dentry->d_inode, | ||
2146 | &context, &contextlen); | ||
2147 | contextsupport = (err == 0); | ||
2148 | if (bmval2 & FATTR4_WORD2_SECURITY_LABEL) { | ||
2149 | if (err == -EOPNOTSUPP) | ||
2150 | bmval2 &= ~FATTR4_WORD2_SECURITY_LABEL; | ||
2151 | else if (err) | ||
2152 | goto out_nfserr; | ||
2153 | } | ||
2154 | } | ||
2155 | #endif /* CONFIG_NFSD_V4_SECURITY_LABEL */ | ||
2156 | |||
2068 | if (bmval2) { | 2157 | if (bmval2) { |
2069 | if ((buflen -= 16) < 0) | 2158 | if ((buflen -= 16) < 0) |
2070 | goto out_resource; | 2159 | goto out_resource; |
@@ -2093,6 +2182,8 @@ nfsd4_encode_fattr(struct svc_fh *fhp, struct svc_export *exp, | |||
2093 | 2182 | ||
2094 | if (!aclsupport) | 2183 | if (!aclsupport) |
2095 | word0 &= ~FATTR4_WORD0_ACL; | 2184 | word0 &= ~FATTR4_WORD0_ACL; |
2185 | if (!contextsupport) | ||
2186 | word2 &= ~FATTR4_WORD2_SECURITY_LABEL; | ||
2096 | if (!word2) { | 2187 | if (!word2) { |
2097 | if ((buflen -= 12) < 0) | 2188 | if ((buflen -= 12) < 0) |
2098 | goto out_resource; | 2189 | goto out_resource; |
@@ -2400,6 +2491,12 @@ out_acl: | |||
2400 | get_parent_attributes(exp, &stat); | 2491 | get_parent_attributes(exp, &stat); |
2401 | WRITE64(stat.ino); | 2492 | WRITE64(stat.ino); |
2402 | } | 2493 | } |
2494 | if (bmval2 & FATTR4_WORD2_SECURITY_LABEL) { | ||
2495 | status = nfsd4_encode_security_label(rqstp, context, | ||
2496 | contextlen, &p, &buflen); | ||
2497 | if (status) | ||
2498 | goto out; | ||
2499 | } | ||
2403 | if (bmval2 & FATTR4_WORD2_SUPPATTR_EXCLCREAT) { | 2500 | if (bmval2 & FATTR4_WORD2_SUPPATTR_EXCLCREAT) { |
2404 | WRITE32(3); | 2501 | WRITE32(3); |
2405 | WRITE32(NFSD_SUPPATTR_EXCLCREAT_WORD0); | 2502 | WRITE32(NFSD_SUPPATTR_EXCLCREAT_WORD0); |
@@ -2412,6 +2509,10 @@ out_acl: | |||
2412 | status = nfs_ok; | 2509 | status = nfs_ok; |
2413 | 2510 | ||
2414 | out: | 2511 | out: |
2512 | #ifdef CONFIG_NFSD_V4_SECURITY_LABEL | ||
2513 | if (context) | ||
2514 | security_release_secctx(context, contextlen); | ||
2515 | #endif /* CONFIG_NFSD_V4_SECURITY_LABEL */ | ||
2415 | kfree(acl); | 2516 | kfree(acl); |
2416 | if (fhp == &tempfh) | 2517 | if (fhp == &tempfh) |
2417 | fh_put(&tempfh); | 2518 | fh_put(&tempfh); |
@@ -3176,16 +3277,18 @@ nfsd4_encode_setattr(struct nfsd4_compoundres *resp, __be32 nfserr, struct nfsd4 | |||
3176 | { | 3277 | { |
3177 | __be32 *p; | 3278 | __be32 *p; |
3178 | 3279 | ||
3179 | RESERVE_SPACE(12); | 3280 | RESERVE_SPACE(16); |
3180 | if (nfserr) { | 3281 | if (nfserr) { |
3181 | WRITE32(2); | 3282 | WRITE32(3); |
3283 | WRITE32(0); | ||
3182 | WRITE32(0); | 3284 | WRITE32(0); |
3183 | WRITE32(0); | 3285 | WRITE32(0); |
3184 | } | 3286 | } |
3185 | else { | 3287 | else { |
3186 | WRITE32(2); | 3288 | WRITE32(3); |
3187 | WRITE32(setattr->sa_bmval[0]); | 3289 | WRITE32(setattr->sa_bmval[0]); |
3188 | WRITE32(setattr->sa_bmval[1]); | 3290 | WRITE32(setattr->sa_bmval[1]); |
3291 | WRITE32(setattr->sa_bmval[2]); | ||
3189 | } | 3292 | } |
3190 | ADJUST_ARGS(); | 3293 | ADJUST_ARGS(); |
3191 | return nfserr; | 3294 | return nfserr; |
@@ -3226,6 +3329,14 @@ nfsd4_encode_write(struct nfsd4_compoundres *resp, __be32 nfserr, struct nfsd4_w | |||
3226 | return nfserr; | 3329 | return nfserr; |
3227 | } | 3330 | } |
3228 | 3331 | ||
3332 | static const u32 nfs4_minimal_spo_must_enforce[2] = { | ||
3333 | [1] = 1 << (OP_BIND_CONN_TO_SESSION - 32) | | ||
3334 | 1 << (OP_EXCHANGE_ID - 32) | | ||
3335 | 1 << (OP_CREATE_SESSION - 32) | | ||
3336 | 1 << (OP_DESTROY_SESSION - 32) | | ||
3337 | 1 << (OP_DESTROY_CLIENTID - 32) | ||
3338 | }; | ||
3339 | |||
3229 | static __be32 | 3340 | static __be32 |
3230 | nfsd4_encode_exchange_id(struct nfsd4_compoundres *resp, __be32 nfserr, | 3341 | nfsd4_encode_exchange_id(struct nfsd4_compoundres *resp, __be32 nfserr, |
3231 | struct nfsd4_exchange_id *exid) | 3342 | struct nfsd4_exchange_id *exid) |
@@ -3249,7 +3360,8 @@ nfsd4_encode_exchange_id(struct nfsd4_compoundres *resp, __be32 nfserr, | |||
3249 | 8 /* eir_clientid */ + | 3360 | 8 /* eir_clientid */ + |
3250 | 4 /* eir_sequenceid */ + | 3361 | 4 /* eir_sequenceid */ + |
3251 | 4 /* eir_flags */ + | 3362 | 4 /* eir_flags */ + |
3252 | 4 /* spr_how (SP4_NONE) */ + | 3363 | 4 /* spr_how */ + |
3364 | 8 /* spo_must_enforce, spo_must_allow */ + | ||
3253 | 8 /* so_minor_id */ + | 3365 | 8 /* so_minor_id */ + |
3254 | 4 /* so_major_id.len */ + | 3366 | 4 /* so_major_id.len */ + |
3255 | (XDR_QUADLEN(major_id_sz) * 4) + | 3367 | (XDR_QUADLEN(major_id_sz) * 4) + |
@@ -3261,9 +3373,21 @@ nfsd4_encode_exchange_id(struct nfsd4_compoundres *resp, __be32 nfserr, | |||
3261 | WRITE32(exid->seqid); | 3373 | WRITE32(exid->seqid); |
3262 | WRITE32(exid->flags); | 3374 | WRITE32(exid->flags); |
3263 | 3375 | ||
3264 | /* state_protect4_r. Currently only support SP4_NONE */ | ||
3265 | BUG_ON(exid->spa_how != SP4_NONE); | ||
3266 | WRITE32(exid->spa_how); | 3376 | WRITE32(exid->spa_how); |
3377 | switch (exid->spa_how) { | ||
3378 | case SP4_NONE: | ||
3379 | break; | ||
3380 | case SP4_MACH_CRED: | ||
3381 | /* spo_must_enforce bitmap: */ | ||
3382 | WRITE32(2); | ||
3383 | WRITE32(nfs4_minimal_spo_must_enforce[0]); | ||
3384 | WRITE32(nfs4_minimal_spo_must_enforce[1]); | ||
3385 | /* empty spo_must_allow bitmap: */ | ||
3386 | WRITE32(0); | ||
3387 | break; | ||
3388 | default: | ||
3389 | WARN_ON_ONCE(1); | ||
3390 | } | ||
3267 | 3391 | ||
3268 | /* The server_owner struct */ | 3392 | /* The server_owner struct */ |
3269 | WRITE64(minor_id); /* Minor id */ | 3393 | WRITE64(minor_id); /* Minor id */ |
@@ -3635,13 +3759,17 @@ nfs4svc_encode_compoundres(struct svc_rqst *rqstp, __be32 *p, struct nfsd4_compo | |||
3635 | iov->iov_len = ((char*)resp->p) - (char*)iov->iov_base; | 3759 | iov->iov_len = ((char*)resp->p) - (char*)iov->iov_base; |
3636 | BUG_ON(iov->iov_len > PAGE_SIZE); | 3760 | BUG_ON(iov->iov_len > PAGE_SIZE); |
3637 | if (nfsd4_has_session(cs)) { | 3761 | if (nfsd4_has_session(cs)) { |
3762 | struct nfsd_net *nn = net_generic(SVC_NET(rqstp), nfsd_net_id); | ||
3763 | struct nfs4_client *clp = cs->session->se_client; | ||
3638 | if (cs->status != nfserr_replay_cache) { | 3764 | if (cs->status != nfserr_replay_cache) { |
3639 | nfsd4_store_cache_entry(resp); | 3765 | nfsd4_store_cache_entry(resp); |
3640 | cs->slot->sl_flags &= ~NFSD4_SLOT_INUSE; | 3766 | cs->slot->sl_flags &= ~NFSD4_SLOT_INUSE; |
3641 | } | 3767 | } |
3642 | /* Renew the clientid on success and on replay */ | 3768 | /* Renew the clientid on success and on replay */ |
3643 | put_client_renew(cs->session->se_client); | 3769 | spin_lock(&nn->client_lock); |
3644 | nfsd4_put_session(cs->session); | 3770 | nfsd4_put_session(cs->session); |
3771 | spin_unlock(&nn->client_lock); | ||
3772 | put_client_renew(clp); | ||
3645 | } | 3773 | } |
3646 | return 1; | 3774 | return 1; |
3647 | } | 3775 | } |
diff --git a/fs/nfsd/nfsd.h b/fs/nfsd/nfsd.h index 07a473fd49bc..30f34ab02137 100644 --- a/fs/nfsd/nfsd.h +++ b/fs/nfsd/nfsd.h | |||
@@ -24,7 +24,7 @@ | |||
24 | /* | 24 | /* |
25 | * nfsd version | 25 | * nfsd version |
26 | */ | 26 | */ |
27 | #define NFSD_SUPPORTED_MINOR_VERSION 1 | 27 | #define NFSD_SUPPORTED_MINOR_VERSION 2 |
28 | /* | 28 | /* |
29 | * Maximum blocksizes supported by daemon under various circumstances. | 29 | * Maximum blocksizes supported by daemon under various circumstances. |
30 | */ | 30 | */ |
@@ -53,7 +53,6 @@ struct readdir_cd { | |||
53 | extern struct svc_program nfsd_program; | 53 | extern struct svc_program nfsd_program; |
54 | extern struct svc_version nfsd_version2, nfsd_version3, | 54 | extern struct svc_version nfsd_version2, nfsd_version3, |
55 | nfsd_version4; | 55 | nfsd_version4; |
56 | extern u32 nfsd_supported_minorversion; | ||
57 | extern struct mutex nfsd_mutex; | 56 | extern struct mutex nfsd_mutex; |
58 | extern spinlock_t nfsd_drc_lock; | 57 | extern spinlock_t nfsd_drc_lock; |
59 | extern unsigned long nfsd_drc_max_mem; | 58 | extern unsigned long nfsd_drc_max_mem; |
@@ -243,6 +242,12 @@ void nfsd_lockd_shutdown(void); | |||
243 | #define nfserr_reject_deleg cpu_to_be32(NFS4ERR_REJECT_DELEG) | 242 | #define nfserr_reject_deleg cpu_to_be32(NFS4ERR_REJECT_DELEG) |
244 | #define nfserr_returnconflict cpu_to_be32(NFS4ERR_RETURNCONFLICT) | 243 | #define nfserr_returnconflict cpu_to_be32(NFS4ERR_RETURNCONFLICT) |
245 | #define nfserr_deleg_revoked cpu_to_be32(NFS4ERR_DELEG_REVOKED) | 244 | #define nfserr_deleg_revoked cpu_to_be32(NFS4ERR_DELEG_REVOKED) |
245 | #define nfserr_partner_notsupp cpu_to_be32(NFS4ERR_PARTNER_NOTSUPP) | ||
246 | #define nfserr_partner_no_auth cpu_to_be32(NFS4ERR_PARTNER_NO_AUTH) | ||
247 | #define nfserr_metadata_notsupp cpu_to_be32(NFS4ERR_METADATA_NOTSUPP) | ||
248 | #define nfserr_offload_denied cpu_to_be32(NFS4ERR_OFFLOAD_DENIED) | ||
249 | #define nfserr_wrong_lfs cpu_to_be32(NFS4ERR_WRONG_LFS) | ||
250 | #define nfserr_badlabel cpu_to_be32(NFS4ERR_BADLABEL) | ||
246 | 251 | ||
247 | /* error codes for internal use */ | 252 | /* error codes for internal use */ |
248 | /* if a request fails due to kmalloc failure, it gets dropped. | 253 | /* if a request fails due to kmalloc failure, it gets dropped. |
@@ -322,6 +327,13 @@ void nfsd_lockd_shutdown(void); | |||
322 | #define NFSD4_1_SUPPORTED_ATTRS_WORD2 \ | 327 | #define NFSD4_1_SUPPORTED_ATTRS_WORD2 \ |
323 | (NFSD4_SUPPORTED_ATTRS_WORD2 | FATTR4_WORD2_SUPPATTR_EXCLCREAT) | 328 | (NFSD4_SUPPORTED_ATTRS_WORD2 | FATTR4_WORD2_SUPPATTR_EXCLCREAT) |
324 | 329 | ||
330 | #ifdef CONFIG_NFSD_V4_SECURITY_LABEL | ||
331 | #define NFSD4_2_SUPPORTED_ATTRS_WORD2 \ | ||
332 | (NFSD4_1_SUPPORTED_ATTRS_WORD2 | FATTR4_WORD2_SECURITY_LABEL) | ||
333 | #else | ||
334 | #define NFSD4_2_SUPPORTED_ATTRS_WORD2 0 | ||
335 | #endif | ||
336 | |||
325 | static inline u32 nfsd_suppattrs0(u32 minorversion) | 337 | static inline u32 nfsd_suppattrs0(u32 minorversion) |
326 | { | 338 | { |
327 | return minorversion ? NFSD4_1_SUPPORTED_ATTRS_WORD0 | 339 | return minorversion ? NFSD4_1_SUPPORTED_ATTRS_WORD0 |
@@ -336,8 +348,11 @@ static inline u32 nfsd_suppattrs1(u32 minorversion) | |||
336 | 348 | ||
337 | static inline u32 nfsd_suppattrs2(u32 minorversion) | 349 | static inline u32 nfsd_suppattrs2(u32 minorversion) |
338 | { | 350 | { |
339 | return minorversion ? NFSD4_1_SUPPORTED_ATTRS_WORD2 | 351 | switch (minorversion) { |
340 | : NFSD4_SUPPORTED_ATTRS_WORD2; | 352 | default: return NFSD4_2_SUPPORTED_ATTRS_WORD2; |
353 | case 1: return NFSD4_1_SUPPORTED_ATTRS_WORD2; | ||
354 | case 0: return NFSD4_SUPPORTED_ATTRS_WORD2; | ||
355 | } | ||
341 | } | 356 | } |
342 | 357 | ||
343 | /* These will return ERR_INVAL if specified in GETATTR or READDIR. */ | 358 | /* These will return ERR_INVAL if specified in GETATTR or READDIR. */ |
@@ -350,7 +365,11 @@ static inline u32 nfsd_suppattrs2(u32 minorversion) | |||
350 | #define NFSD_WRITEABLE_ATTRS_WORD1 \ | 365 | #define NFSD_WRITEABLE_ATTRS_WORD1 \ |
351 | (FATTR4_WORD1_MODE | FATTR4_WORD1_OWNER | FATTR4_WORD1_OWNER_GROUP \ | 366 | (FATTR4_WORD1_MODE | FATTR4_WORD1_OWNER | FATTR4_WORD1_OWNER_GROUP \ |
352 | | FATTR4_WORD1_TIME_ACCESS_SET | FATTR4_WORD1_TIME_MODIFY_SET) | 367 | | FATTR4_WORD1_TIME_ACCESS_SET | FATTR4_WORD1_TIME_MODIFY_SET) |
368 | #ifdef CONFIG_NFSD_V4_SECURITY_LABEL | ||
369 | #define NFSD_WRITEABLE_ATTRS_WORD2 FATTR4_WORD2_SECURITY_LABEL | ||
370 | #else | ||
353 | #define NFSD_WRITEABLE_ATTRS_WORD2 0 | 371 | #define NFSD_WRITEABLE_ATTRS_WORD2 0 |
372 | #endif | ||
354 | 373 | ||
355 | #define NFSD_SUPPATTR_EXCLCREAT_WORD0 \ | 374 | #define NFSD_SUPPATTR_EXCLCREAT_WORD0 \ |
356 | NFSD_WRITEABLE_ATTRS_WORD0 | 375 | NFSD_WRITEABLE_ATTRS_WORD0 |
diff --git a/fs/nfsd/nfssvc.c b/fs/nfsd/nfssvc.c index 262df5ccbf59..760c85a6f534 100644 --- a/fs/nfsd/nfssvc.c +++ b/fs/nfsd/nfssvc.c | |||
@@ -116,7 +116,10 @@ struct svc_program nfsd_program = { | |||
116 | 116 | ||
117 | }; | 117 | }; |
118 | 118 | ||
119 | u32 nfsd_supported_minorversion; | 119 | static bool nfsd_supported_minorversions[NFSD_SUPPORTED_MINOR_VERSION + 1] = { |
120 | [0] = 1, | ||
121 | [1] = 1, | ||
122 | }; | ||
120 | 123 | ||
121 | int nfsd_vers(int vers, enum vers_op change) | 124 | int nfsd_vers(int vers, enum vers_op change) |
122 | { | 125 | { |
@@ -151,15 +154,13 @@ int nfsd_minorversion(u32 minorversion, enum vers_op change) | |||
151 | return -1; | 154 | return -1; |
152 | switch(change) { | 155 | switch(change) { |
153 | case NFSD_SET: | 156 | case NFSD_SET: |
154 | nfsd_supported_minorversion = minorversion; | 157 | nfsd_supported_minorversions[minorversion] = true; |
155 | break; | 158 | break; |
156 | case NFSD_CLEAR: | 159 | case NFSD_CLEAR: |
157 | if (minorversion == 0) | 160 | nfsd_supported_minorversions[minorversion] = false; |
158 | return -1; | ||
159 | nfsd_supported_minorversion = minorversion - 1; | ||
160 | break; | 161 | break; |
161 | case NFSD_TEST: | 162 | case NFSD_TEST: |
162 | return minorversion <= nfsd_supported_minorversion; | 163 | return nfsd_supported_minorversions[minorversion]; |
163 | case NFSD_AVAIL: | 164 | case NFSD_AVAIL: |
164 | return minorversion <= NFSD_SUPPORTED_MINOR_VERSION; | 165 | return minorversion <= NFSD_SUPPORTED_MINOR_VERSION; |
165 | } | 166 | } |
diff --git a/fs/nfsd/state.h b/fs/nfsd/state.h index 274e2a114e05..424d8f5f2317 100644 --- a/fs/nfsd/state.h +++ b/fs/nfsd/state.h | |||
@@ -246,6 +246,7 @@ struct nfs4_client { | |||
246 | nfs4_verifier cl_verifier; /* generated by client */ | 246 | nfs4_verifier cl_verifier; /* generated by client */ |
247 | time_t cl_time; /* time of last lease renewal */ | 247 | time_t cl_time; /* time of last lease renewal */ |
248 | struct sockaddr_storage cl_addr; /* client ipaddress */ | 248 | struct sockaddr_storage cl_addr; /* client ipaddress */ |
249 | bool cl_mach_cred; /* SP4_MACH_CRED in force */ | ||
249 | struct svc_cred cl_cred; /* setclientid principal */ | 250 | struct svc_cred cl_cred; /* setclientid principal */ |
250 | clientid_t cl_clientid; /* generated by server */ | 251 | clientid_t cl_clientid; /* generated by server */ |
251 | nfs4_verifier cl_confirm; /* generated by server */ | 252 | nfs4_verifier cl_confirm; /* generated by server */ |
diff --git a/fs/nfsd/vfs.c b/fs/nfsd/vfs.c index 84ce601d8063..c827acb0e943 100644 --- a/fs/nfsd/vfs.c +++ b/fs/nfsd/vfs.c | |||
@@ -28,6 +28,7 @@ | |||
28 | #include <asm/uaccess.h> | 28 | #include <asm/uaccess.h> |
29 | #include <linux/exportfs.h> | 29 | #include <linux/exportfs.h> |
30 | #include <linux/writeback.h> | 30 | #include <linux/writeback.h> |
31 | #include <linux/security.h> | ||
31 | 32 | ||
32 | #ifdef CONFIG_NFSD_V3 | 33 | #ifdef CONFIG_NFSD_V3 |
33 | #include "xdr3.h" | 34 | #include "xdr3.h" |
@@ -621,6 +622,33 @@ int nfsd4_is_junction(struct dentry *dentry) | |||
621 | return 0; | 622 | return 0; |
622 | return 1; | 623 | return 1; |
623 | } | 624 | } |
625 | #ifdef CONFIG_NFSD_V4_SECURITY_LABEL | ||
626 | __be32 nfsd4_set_nfs4_label(struct svc_rqst *rqstp, struct svc_fh *fhp, | ||
627 | struct xdr_netobj *label) | ||
628 | { | ||
629 | __be32 error; | ||
630 | int host_error; | ||
631 | struct dentry *dentry; | ||
632 | |||
633 | error = fh_verify(rqstp, fhp, 0 /* S_IFREG */, NFSD_MAY_SATTR); | ||
634 | if (error) | ||
635 | return error; | ||
636 | |||
637 | dentry = fhp->fh_dentry; | ||
638 | |||
639 | mutex_lock(&dentry->d_inode->i_mutex); | ||
640 | host_error = security_inode_setsecctx(dentry, label->data, label->len); | ||
641 | mutex_unlock(&dentry->d_inode->i_mutex); | ||
642 | return nfserrno(host_error); | ||
643 | } | ||
644 | #else | ||
645 | __be32 nfsd4_set_nfs4_label(struct svc_rqst *rqstp, struct svc_fh *fhp, | ||
646 | struct xdr_netobj *label) | ||
647 | { | ||
648 | return nfserr_notsupp; | ||
649 | } | ||
650 | #endif | ||
651 | |||
624 | #endif /* defined(CONFIG_NFSD_V4) */ | 652 | #endif /* defined(CONFIG_NFSD_V4) */ |
625 | 653 | ||
626 | #ifdef CONFIG_NFSD_V3 | 654 | #ifdef CONFIG_NFSD_V3 |
@@ -802,9 +830,10 @@ nfsd_open(struct svc_rqst *rqstp, struct svc_fh *fhp, umode_t type, | |||
802 | flags = O_WRONLY|O_LARGEFILE; | 830 | flags = O_WRONLY|O_LARGEFILE; |
803 | } | 831 | } |
804 | *filp = dentry_open(&path, flags, current_cred()); | 832 | *filp = dentry_open(&path, flags, current_cred()); |
805 | if (IS_ERR(*filp)) | 833 | if (IS_ERR(*filp)) { |
806 | host_err = PTR_ERR(*filp); | 834 | host_err = PTR_ERR(*filp); |
807 | else { | 835 | *filp = NULL; |
836 | } else { | ||
808 | host_err = ima_file_check(*filp, may_flags); | 837 | host_err = ima_file_check(*filp, may_flags); |
809 | 838 | ||
810 | if (may_flags & NFSD_MAY_64BIT_COOKIE) | 839 | if (may_flags & NFSD_MAY_64BIT_COOKIE) |
@@ -1912,6 +1941,7 @@ struct buffered_dirent { | |||
1912 | }; | 1941 | }; |
1913 | 1942 | ||
1914 | struct readdir_data { | 1943 | struct readdir_data { |
1944 | struct dir_context ctx; | ||
1915 | char *dirent; | 1945 | char *dirent; |
1916 | size_t used; | 1946 | size_t used; |
1917 | int full; | 1947 | int full; |
@@ -1943,13 +1973,15 @@ static int nfsd_buffered_filldir(void *__buf, const char *name, int namlen, | |||
1943 | static __be32 nfsd_buffered_readdir(struct file *file, filldir_t func, | 1973 | static __be32 nfsd_buffered_readdir(struct file *file, filldir_t func, |
1944 | struct readdir_cd *cdp, loff_t *offsetp) | 1974 | struct readdir_cd *cdp, loff_t *offsetp) |
1945 | { | 1975 | { |
1946 | struct readdir_data buf; | ||
1947 | struct buffered_dirent *de; | 1976 | struct buffered_dirent *de; |
1948 | int host_err; | 1977 | int host_err; |
1949 | int size; | 1978 | int size; |
1950 | loff_t offset; | 1979 | loff_t offset; |
1980 | struct readdir_data buf = { | ||
1981 | .ctx.actor = nfsd_buffered_filldir, | ||
1982 | .dirent = (void *)__get_free_page(GFP_KERNEL) | ||
1983 | }; | ||
1951 | 1984 | ||
1952 | buf.dirent = (void *)__get_free_page(GFP_KERNEL); | ||
1953 | if (!buf.dirent) | 1985 | if (!buf.dirent) |
1954 | return nfserrno(-ENOMEM); | 1986 | return nfserrno(-ENOMEM); |
1955 | 1987 | ||
@@ -1963,7 +1995,7 @@ static __be32 nfsd_buffered_readdir(struct file *file, filldir_t func, | |||
1963 | buf.used = 0; | 1995 | buf.used = 0; |
1964 | buf.full = 0; | 1996 | buf.full = 0; |
1965 | 1997 | ||
1966 | host_err = vfs_readdir(file, nfsd_buffered_filldir, &buf); | 1998 | host_err = iterate_dir(file, &buf.ctx); |
1967 | if (buf.full) | 1999 | if (buf.full) |
1968 | host_err = 0; | 2000 | host_err = 0; |
1969 | 2001 | ||
diff --git a/fs/nfsd/vfs.h b/fs/nfsd/vfs.h index 5b5894159f22..a4be2e389670 100644 --- a/fs/nfsd/vfs.h +++ b/fs/nfsd/vfs.h | |||
@@ -39,7 +39,6 @@ | |||
39 | typedef int (*nfsd_dirop_t)(struct inode *, struct dentry *, int, int); | 39 | typedef int (*nfsd_dirop_t)(struct inode *, struct dentry *, int, int); |
40 | 40 | ||
41 | /* nfsd/vfs.c */ | 41 | /* nfsd/vfs.c */ |
42 | int fh_lock_parent(struct svc_fh *, struct dentry *); | ||
43 | int nfsd_racache_init(int); | 42 | int nfsd_racache_init(int); |
44 | void nfsd_racache_shutdown(void); | 43 | void nfsd_racache_shutdown(void); |
45 | int nfsd_cross_mnt(struct svc_rqst *rqstp, struct dentry **dpp, | 44 | int nfsd_cross_mnt(struct svc_rqst *rqstp, struct dentry **dpp, |
@@ -56,6 +55,8 @@ int nfsd_mountpoint(struct dentry *, struct svc_export *); | |||
56 | __be32 nfsd4_set_nfs4_acl(struct svc_rqst *, struct svc_fh *, | 55 | __be32 nfsd4_set_nfs4_acl(struct svc_rqst *, struct svc_fh *, |
57 | struct nfs4_acl *); | 56 | struct nfs4_acl *); |
58 | int nfsd4_get_nfs4_acl(struct svc_rqst *, struct dentry *, struct nfs4_acl **); | 57 | int nfsd4_get_nfs4_acl(struct svc_rqst *, struct dentry *, struct nfs4_acl **); |
58 | __be32 nfsd4_set_nfs4_label(struct svc_rqst *, struct svc_fh *, | ||
59 | struct xdr_netobj *); | ||
59 | #endif /* CONFIG_NFSD_V4 */ | 60 | #endif /* CONFIG_NFSD_V4 */ |
60 | __be32 nfsd_create(struct svc_rqst *, struct svc_fh *, | 61 | __be32 nfsd_create(struct svc_rqst *, struct svc_fh *, |
61 | char *name, int len, struct iattr *attrs, | 62 | char *name, int len, struct iattr *attrs, |
@@ -92,17 +93,13 @@ __be32 nfsd_remove(struct svc_rqst *, | |||
92 | struct svc_fh *, char *, int); | 93 | struct svc_fh *, char *, int); |
93 | __be32 nfsd_unlink(struct svc_rqst *, struct svc_fh *, int type, | 94 | __be32 nfsd_unlink(struct svc_rqst *, struct svc_fh *, int type, |
94 | char *name, int len); | 95 | char *name, int len); |
95 | int nfsd_truncate(struct svc_rqst *, struct svc_fh *, | ||
96 | unsigned long size); | ||
97 | __be32 nfsd_readdir(struct svc_rqst *, struct svc_fh *, | 96 | __be32 nfsd_readdir(struct svc_rqst *, struct svc_fh *, |
98 | loff_t *, struct readdir_cd *, filldir_t); | 97 | loff_t *, struct readdir_cd *, filldir_t); |
99 | __be32 nfsd_statfs(struct svc_rqst *, struct svc_fh *, | 98 | __be32 nfsd_statfs(struct svc_rqst *, struct svc_fh *, |
100 | struct kstatfs *, int access); | 99 | struct kstatfs *, int access); |
101 | 100 | ||
102 | int nfsd_notify_change(struct inode *, struct iattr *); | ||
103 | __be32 nfsd_permission(struct svc_rqst *, struct svc_export *, | 101 | __be32 nfsd_permission(struct svc_rqst *, struct svc_export *, |
104 | struct dentry *, int); | 102 | struct dentry *, int); |
105 | int nfsd_sync_dir(struct dentry *dp); | ||
106 | 103 | ||
107 | #if defined(CONFIG_NFSD_V2_ACL) || defined(CONFIG_NFSD_V3_ACL) | 104 | #if defined(CONFIG_NFSD_V2_ACL) || defined(CONFIG_NFSD_V3_ACL) |
108 | struct posix_acl *nfsd_get_posix_acl(struct svc_fh *, int); | 105 | struct posix_acl *nfsd_get_posix_acl(struct svc_fh *, int); |
diff --git a/fs/nfsd/xdr4.h b/fs/nfsd/xdr4.h index 3b271d2092b6..b3ed6446ed8e 100644 --- a/fs/nfsd/xdr4.h +++ b/fs/nfsd/xdr4.h | |||
@@ -40,6 +40,7 @@ | |||
40 | #include "state.h" | 40 | #include "state.h" |
41 | #include "nfsd.h" | 41 | #include "nfsd.h" |
42 | 42 | ||
43 | #define NFSD4_MAX_SEC_LABEL_LEN 2048 | ||
43 | #define NFSD4_MAX_TAGLEN 128 | 44 | #define NFSD4_MAX_TAGLEN 128 |
44 | #define XDR_LEN(n) (((n) + 3) & ~3) | 45 | #define XDR_LEN(n) (((n) + 3) & ~3) |
45 | 46 | ||
@@ -118,6 +119,7 @@ struct nfsd4_create { | |||
118 | struct iattr cr_iattr; /* request */ | 119 | struct iattr cr_iattr; /* request */ |
119 | struct nfsd4_change_info cr_cinfo; /* response */ | 120 | struct nfsd4_change_info cr_cinfo; /* response */ |
120 | struct nfs4_acl *cr_acl; | 121 | struct nfs4_acl *cr_acl; |
122 | struct xdr_netobj cr_label; | ||
121 | }; | 123 | }; |
122 | #define cr_linklen u.link.namelen | 124 | #define cr_linklen u.link.namelen |
123 | #define cr_linkname u.link.name | 125 | #define cr_linkname u.link.name |
@@ -246,6 +248,7 @@ struct nfsd4_open { | |||
246 | struct nfs4_file *op_file; /* used during processing */ | 248 | struct nfs4_file *op_file; /* used during processing */ |
247 | struct nfs4_ol_stateid *op_stp; /* used during processing */ | 249 | struct nfs4_ol_stateid *op_stp; /* used during processing */ |
248 | struct nfs4_acl *op_acl; | 250 | struct nfs4_acl *op_acl; |
251 | struct xdr_netobj op_label; | ||
249 | }; | 252 | }; |
250 | #define op_iattr iattr | 253 | #define op_iattr iattr |
251 | 254 | ||
@@ -330,6 +333,7 @@ struct nfsd4_setattr { | |||
330 | u32 sa_bmval[3]; /* request */ | 333 | u32 sa_bmval[3]; /* request */ |
331 | struct iattr sa_iattr; /* request */ | 334 | struct iattr sa_iattr; /* request */ |
332 | struct nfs4_acl *sa_acl; | 335 | struct nfs4_acl *sa_acl; |
336 | struct xdr_netobj sa_label; | ||
333 | }; | 337 | }; |
334 | 338 | ||
335 | struct nfsd4_setclientid { | 339 | struct nfsd4_setclientid { |
diff --git a/fs/nilfs2/alloc.c b/fs/nilfs2/alloc.c index eed4d7b26249..741fd02e0444 100644 --- a/fs/nilfs2/alloc.c +++ b/fs/nilfs2/alloc.c | |||
@@ -398,6 +398,69 @@ nilfs_palloc_rest_groups_in_desc_block(const struct inode *inode, | |||
398 | } | 398 | } |
399 | 399 | ||
400 | /** | 400 | /** |
401 | * nilfs_palloc_count_desc_blocks - count descriptor blocks number | ||
402 | * @inode: inode of metadata file using this allocator | ||
403 | * @desc_blocks: descriptor blocks number [out] | ||
404 | */ | ||
405 | static int nilfs_palloc_count_desc_blocks(struct inode *inode, | ||
406 | unsigned long *desc_blocks) | ||
407 | { | ||
408 | unsigned long blknum; | ||
409 | int ret; | ||
410 | |||
411 | ret = nilfs_bmap_last_key(NILFS_I(inode)->i_bmap, &blknum); | ||
412 | if (likely(!ret)) | ||
413 | *desc_blocks = DIV_ROUND_UP( | ||
414 | blknum, NILFS_MDT(inode)->mi_blocks_per_desc_block); | ||
415 | return ret; | ||
416 | } | ||
417 | |||
418 | /** | ||
419 | * nilfs_palloc_mdt_file_can_grow - check potential opportunity for | ||
420 | * MDT file growing | ||
421 | * @inode: inode of metadata file using this allocator | ||
422 | * @desc_blocks: known current descriptor blocks count | ||
423 | */ | ||
424 | static inline bool nilfs_palloc_mdt_file_can_grow(struct inode *inode, | ||
425 | unsigned long desc_blocks) | ||
426 | { | ||
427 | return (nilfs_palloc_groups_per_desc_block(inode) * desc_blocks) < | ||
428 | nilfs_palloc_groups_count(inode); | ||
429 | } | ||
430 | |||
431 | /** | ||
432 | * nilfs_palloc_count_max_entries - count max number of entries that can be | ||
433 | * described by descriptor blocks count | ||
434 | * @inode: inode of metadata file using this allocator | ||
435 | * @nused: current number of used entries | ||
436 | * @nmaxp: max number of entries [out] | ||
437 | */ | ||
438 | int nilfs_palloc_count_max_entries(struct inode *inode, u64 nused, u64 *nmaxp) | ||
439 | { | ||
440 | unsigned long desc_blocks = 0; | ||
441 | u64 entries_per_desc_block, nmax; | ||
442 | int err; | ||
443 | |||
444 | err = nilfs_palloc_count_desc_blocks(inode, &desc_blocks); | ||
445 | if (unlikely(err)) | ||
446 | return err; | ||
447 | |||
448 | entries_per_desc_block = (u64)nilfs_palloc_entries_per_group(inode) * | ||
449 | nilfs_palloc_groups_per_desc_block(inode); | ||
450 | nmax = entries_per_desc_block * desc_blocks; | ||
451 | |||
452 | if (nused == nmax && | ||
453 | nilfs_palloc_mdt_file_can_grow(inode, desc_blocks)) | ||
454 | nmax += entries_per_desc_block; | ||
455 | |||
456 | if (nused > nmax) | ||
457 | return -ERANGE; | ||
458 | |||
459 | *nmaxp = nmax; | ||
460 | return 0; | ||
461 | } | ||
462 | |||
463 | /** | ||
401 | * nilfs_palloc_prepare_alloc_entry - prepare to allocate a persistent object | 464 | * nilfs_palloc_prepare_alloc_entry - prepare to allocate a persistent object |
402 | * @inode: inode of metadata file using this allocator | 465 | * @inode: inode of metadata file using this allocator |
403 | * @req: nilfs_palloc_req structure exchanged for the allocation | 466 | * @req: nilfs_palloc_req structure exchanged for the allocation |
diff --git a/fs/nilfs2/alloc.h b/fs/nilfs2/alloc.h index fb7238100548..4bd6451b5703 100644 --- a/fs/nilfs2/alloc.h +++ b/fs/nilfs2/alloc.h | |||
@@ -48,6 +48,8 @@ int nilfs_palloc_get_entry_block(struct inode *, __u64, int, | |||
48 | void *nilfs_palloc_block_get_entry(const struct inode *, __u64, | 48 | void *nilfs_palloc_block_get_entry(const struct inode *, __u64, |
49 | const struct buffer_head *, void *); | 49 | const struct buffer_head *, void *); |
50 | 50 | ||
51 | int nilfs_palloc_count_max_entries(struct inode *, u64, u64 *); | ||
52 | |||
51 | /** | 53 | /** |
52 | * nilfs_palloc_req - persistent allocator request and reply | 54 | * nilfs_palloc_req - persistent allocator request and reply |
53 | * @pr_entry_nr: entry number (vblocknr or inode number) | 55 | * @pr_entry_nr: entry number (vblocknr or inode number) |
diff --git a/fs/nilfs2/dir.c b/fs/nilfs2/dir.c index f30b017740a7..197a63e9d102 100644 --- a/fs/nilfs2/dir.c +++ b/fs/nilfs2/dir.c | |||
@@ -256,22 +256,18 @@ static void nilfs_set_de_type(struct nilfs_dir_entry *de, struct inode *inode) | |||
256 | de->file_type = nilfs_type_by_mode[(mode & S_IFMT)>>S_SHIFT]; | 256 | de->file_type = nilfs_type_by_mode[(mode & S_IFMT)>>S_SHIFT]; |
257 | } | 257 | } |
258 | 258 | ||
259 | static int nilfs_readdir(struct file *filp, void *dirent, filldir_t filldir) | 259 | static int nilfs_readdir(struct file *file, struct dir_context *ctx) |
260 | { | 260 | { |
261 | loff_t pos = filp->f_pos; | 261 | loff_t pos = ctx->pos; |
262 | struct inode *inode = file_inode(filp); | 262 | struct inode *inode = file_inode(file); |
263 | struct super_block *sb = inode->i_sb; | 263 | struct super_block *sb = inode->i_sb; |
264 | unsigned int offset = pos & ~PAGE_CACHE_MASK; | 264 | unsigned int offset = pos & ~PAGE_CACHE_MASK; |
265 | unsigned long n = pos >> PAGE_CACHE_SHIFT; | 265 | unsigned long n = pos >> PAGE_CACHE_SHIFT; |
266 | unsigned long npages = dir_pages(inode); | 266 | unsigned long npages = dir_pages(inode); |
267 | /* unsigned chunk_mask = ~(nilfs_chunk_size(inode)-1); */ | 267 | /* unsigned chunk_mask = ~(nilfs_chunk_size(inode)-1); */ |
268 | unsigned char *types = NULL; | ||
269 | int ret; | ||
270 | 268 | ||
271 | if (pos > inode->i_size - NILFS_DIR_REC_LEN(1)) | 269 | if (pos > inode->i_size - NILFS_DIR_REC_LEN(1)) |
272 | goto success; | 270 | return 0; |
273 | |||
274 | types = nilfs_filetype_table; | ||
275 | 271 | ||
276 | for ( ; n < npages; n++, offset = 0) { | 272 | for ( ; n < npages; n++, offset = 0) { |
277 | char *kaddr, *limit; | 273 | char *kaddr, *limit; |
@@ -281,9 +277,8 @@ static int nilfs_readdir(struct file *filp, void *dirent, filldir_t filldir) | |||
281 | if (IS_ERR(page)) { | 277 | if (IS_ERR(page)) { |
282 | nilfs_error(sb, __func__, "bad page in #%lu", | 278 | nilfs_error(sb, __func__, "bad page in #%lu", |
283 | inode->i_ino); | 279 | inode->i_ino); |
284 | filp->f_pos += PAGE_CACHE_SIZE - offset; | 280 | ctx->pos += PAGE_CACHE_SIZE - offset; |
285 | ret = -EIO; | 281 | return -EIO; |
286 | goto done; | ||
287 | } | 282 | } |
288 | kaddr = page_address(page); | 283 | kaddr = page_address(page); |
289 | de = (struct nilfs_dir_entry *)(kaddr + offset); | 284 | de = (struct nilfs_dir_entry *)(kaddr + offset); |
@@ -293,35 +288,28 @@ static int nilfs_readdir(struct file *filp, void *dirent, filldir_t filldir) | |||
293 | if (de->rec_len == 0) { | 288 | if (de->rec_len == 0) { |
294 | nilfs_error(sb, __func__, | 289 | nilfs_error(sb, __func__, |
295 | "zero-length directory entry"); | 290 | "zero-length directory entry"); |
296 | ret = -EIO; | ||
297 | nilfs_put_page(page); | 291 | nilfs_put_page(page); |
298 | goto done; | 292 | return -EIO; |
299 | } | 293 | } |
300 | if (de->inode) { | 294 | if (de->inode) { |
301 | int over; | 295 | unsigned char t; |
302 | unsigned char d_type = DT_UNKNOWN; | ||
303 | 296 | ||
304 | if (types && de->file_type < NILFS_FT_MAX) | 297 | if (de->file_type < NILFS_FT_MAX) |
305 | d_type = types[de->file_type]; | 298 | t = nilfs_filetype_table[de->file_type]; |
299 | else | ||
300 | t = DT_UNKNOWN; | ||
306 | 301 | ||
307 | offset = (char *)de - kaddr; | 302 | if (!dir_emit(ctx, de->name, de->name_len, |
308 | over = filldir(dirent, de->name, de->name_len, | 303 | le64_to_cpu(de->inode), t)) { |
309 | (n<<PAGE_CACHE_SHIFT) | offset, | ||
310 | le64_to_cpu(de->inode), d_type); | ||
311 | if (over) { | ||
312 | nilfs_put_page(page); | 304 | nilfs_put_page(page); |
313 | goto success; | 305 | return 0; |
314 | } | 306 | } |
315 | } | 307 | } |
316 | filp->f_pos += nilfs_rec_len_from_disk(de->rec_len); | 308 | ctx->pos += nilfs_rec_len_from_disk(de->rec_len); |
317 | } | 309 | } |
318 | nilfs_put_page(page); | 310 | nilfs_put_page(page); |
319 | } | 311 | } |
320 | 312 | return 0; | |
321 | success: | ||
322 | ret = 0; | ||
323 | done: | ||
324 | return ret; | ||
325 | } | 313 | } |
326 | 314 | ||
327 | /* | 315 | /* |
@@ -678,7 +666,7 @@ not_empty: | |||
678 | const struct file_operations nilfs_dir_operations = { | 666 | const struct file_operations nilfs_dir_operations = { |
679 | .llseek = generic_file_llseek, | 667 | .llseek = generic_file_llseek, |
680 | .read = generic_read_dir, | 668 | .read = generic_read_dir, |
681 | .readdir = nilfs_readdir, | 669 | .iterate = nilfs_readdir, |
682 | .unlocked_ioctl = nilfs_ioctl, | 670 | .unlocked_ioctl = nilfs_ioctl, |
683 | #ifdef CONFIG_COMPAT | 671 | #ifdef CONFIG_COMPAT |
684 | .compat_ioctl = nilfs_compat_ioctl, | 672 | .compat_ioctl = nilfs_compat_ioctl, |
diff --git a/fs/nilfs2/ifile.c b/fs/nilfs2/ifile.c index d8e65bde083c..6548c7851b48 100644 --- a/fs/nilfs2/ifile.c +++ b/fs/nilfs2/ifile.c | |||
@@ -160,6 +160,28 @@ int nilfs_ifile_get_inode_block(struct inode *ifile, ino_t ino, | |||
160 | } | 160 | } |
161 | 161 | ||
162 | /** | 162 | /** |
163 | * nilfs_ifile_count_free_inodes - calculate free inodes count | ||
164 | * @ifile: ifile inode | ||
165 | * @nmaxinodes: current maximum of available inodes count [out] | ||
166 | * @nfreeinodes: free inodes count [out] | ||
167 | */ | ||
168 | int nilfs_ifile_count_free_inodes(struct inode *ifile, | ||
169 | u64 *nmaxinodes, u64 *nfreeinodes) | ||
170 | { | ||
171 | u64 nused; | ||
172 | int err; | ||
173 | |||
174 | *nmaxinodes = 0; | ||
175 | *nfreeinodes = 0; | ||
176 | |||
177 | nused = atomic64_read(&NILFS_I(ifile)->i_root->inodes_count); | ||
178 | err = nilfs_palloc_count_max_entries(ifile, nused, nmaxinodes); | ||
179 | if (likely(!err)) | ||
180 | *nfreeinodes = *nmaxinodes - nused; | ||
181 | return err; | ||
182 | } | ||
183 | |||
184 | /** | ||
163 | * nilfs_ifile_read - read or get ifile inode | 185 | * nilfs_ifile_read - read or get ifile inode |
164 | * @sb: super block instance | 186 | * @sb: super block instance |
165 | * @root: root object | 187 | * @root: root object |
diff --git a/fs/nilfs2/ifile.h b/fs/nilfs2/ifile.h index 59b6f2b51df6..679674d13372 100644 --- a/fs/nilfs2/ifile.h +++ b/fs/nilfs2/ifile.h | |||
@@ -49,6 +49,8 @@ int nilfs_ifile_create_inode(struct inode *, ino_t *, struct buffer_head **); | |||
49 | int nilfs_ifile_delete_inode(struct inode *, ino_t); | 49 | int nilfs_ifile_delete_inode(struct inode *, ino_t); |
50 | int nilfs_ifile_get_inode_block(struct inode *, ino_t, struct buffer_head **); | 50 | int nilfs_ifile_get_inode_block(struct inode *, ino_t, struct buffer_head **); |
51 | 51 | ||
52 | int nilfs_ifile_count_free_inodes(struct inode *, u64 *, u64 *); | ||
53 | |||
52 | int nilfs_ifile_read(struct super_block *sb, struct nilfs_root *root, | 54 | int nilfs_ifile_read(struct super_block *sb, struct nilfs_root *root, |
53 | size_t inode_size, struct nilfs_inode *raw_inode, | 55 | size_t inode_size, struct nilfs_inode *raw_inode, |
54 | struct inode **inodep); | 56 | struct inode **inodep); |
diff --git a/fs/nilfs2/inode.c b/fs/nilfs2/inode.c index bccfec8343c5..b1a5277cfd18 100644 --- a/fs/nilfs2/inode.c +++ b/fs/nilfs2/inode.c | |||
@@ -54,7 +54,7 @@ void nilfs_inode_add_blocks(struct inode *inode, int n) | |||
54 | 54 | ||
55 | inode_add_bytes(inode, (1 << inode->i_blkbits) * n); | 55 | inode_add_bytes(inode, (1 << inode->i_blkbits) * n); |
56 | if (root) | 56 | if (root) |
57 | atomic_add(n, &root->blocks_count); | 57 | atomic64_add(n, &root->blocks_count); |
58 | } | 58 | } |
59 | 59 | ||
60 | void nilfs_inode_sub_blocks(struct inode *inode, int n) | 60 | void nilfs_inode_sub_blocks(struct inode *inode, int n) |
@@ -63,7 +63,7 @@ void nilfs_inode_sub_blocks(struct inode *inode, int n) | |||
63 | 63 | ||
64 | inode_sub_bytes(inode, (1 << inode->i_blkbits) * n); | 64 | inode_sub_bytes(inode, (1 << inode->i_blkbits) * n); |
65 | if (root) | 65 | if (root) |
66 | atomic_sub(n, &root->blocks_count); | 66 | atomic64_sub(n, &root->blocks_count); |
67 | } | 67 | } |
68 | 68 | ||
69 | /** | 69 | /** |
@@ -369,7 +369,7 @@ struct inode *nilfs_new_inode(struct inode *dir, umode_t mode) | |||
369 | goto failed_ifile_create_inode; | 369 | goto failed_ifile_create_inode; |
370 | /* reference count of i_bh inherits from nilfs_mdt_read_block() */ | 370 | /* reference count of i_bh inherits from nilfs_mdt_read_block() */ |
371 | 371 | ||
372 | atomic_inc(&root->inodes_count); | 372 | atomic64_inc(&root->inodes_count); |
373 | inode_init_owner(inode, dir, mode); | 373 | inode_init_owner(inode, dir, mode); |
374 | inode->i_ino = ino; | 374 | inode->i_ino = ino; |
375 | inode->i_mtime = inode->i_atime = inode->i_ctime = CURRENT_TIME; | 375 | inode->i_mtime = inode->i_atime = inode->i_ctime = CURRENT_TIME; |
@@ -801,7 +801,7 @@ void nilfs_evict_inode(struct inode *inode) | |||
801 | 801 | ||
802 | ret = nilfs_ifile_delete_inode(ii->i_root->ifile, inode->i_ino); | 802 | ret = nilfs_ifile_delete_inode(ii->i_root->ifile, inode->i_ino); |
803 | if (!ret) | 803 | if (!ret) |
804 | atomic_dec(&ii->i_root->inodes_count); | 804 | atomic64_dec(&ii->i_root->inodes_count); |
805 | 805 | ||
806 | nilfs_clear_inode(inode); | 806 | nilfs_clear_inode(inode); |
807 | 807 | ||
diff --git a/fs/nilfs2/segment.c b/fs/nilfs2/segment.c index a5752a589932..bd88a7461063 100644 --- a/fs/nilfs2/segment.c +++ b/fs/nilfs2/segment.c | |||
@@ -835,9 +835,9 @@ static int nilfs_segctor_fill_in_checkpoint(struct nilfs_sc_info *sci) | |||
835 | raw_cp->cp_snapshot_list.ssl_next = 0; | 835 | raw_cp->cp_snapshot_list.ssl_next = 0; |
836 | raw_cp->cp_snapshot_list.ssl_prev = 0; | 836 | raw_cp->cp_snapshot_list.ssl_prev = 0; |
837 | raw_cp->cp_inodes_count = | 837 | raw_cp->cp_inodes_count = |
838 | cpu_to_le64(atomic_read(&sci->sc_root->inodes_count)); | 838 | cpu_to_le64(atomic64_read(&sci->sc_root->inodes_count)); |
839 | raw_cp->cp_blocks_count = | 839 | raw_cp->cp_blocks_count = |
840 | cpu_to_le64(atomic_read(&sci->sc_root->blocks_count)); | 840 | cpu_to_le64(atomic64_read(&sci->sc_root->blocks_count)); |
841 | raw_cp->cp_nblk_inc = | 841 | raw_cp->cp_nblk_inc = |
842 | cpu_to_le64(sci->sc_nblk_inc + sci->sc_nblk_this_inc); | 842 | cpu_to_le64(sci->sc_nblk_inc + sci->sc_nblk_this_inc); |
843 | raw_cp->cp_create = cpu_to_le64(sci->sc_seg_ctime); | 843 | raw_cp->cp_create = cpu_to_le64(sci->sc_seg_ctime); |
diff --git a/fs/nilfs2/super.c b/fs/nilfs2/super.c index c7d1f9f18b09..af3ba0478cdf 100644 --- a/fs/nilfs2/super.c +++ b/fs/nilfs2/super.c | |||
@@ -554,8 +554,10 @@ int nilfs_attach_checkpoint(struct super_block *sb, __u64 cno, int curr_mnt, | |||
554 | if (err) | 554 | if (err) |
555 | goto failed_bh; | 555 | goto failed_bh; |
556 | 556 | ||
557 | atomic_set(&root->inodes_count, le64_to_cpu(raw_cp->cp_inodes_count)); | 557 | atomic64_set(&root->inodes_count, |
558 | atomic_set(&root->blocks_count, le64_to_cpu(raw_cp->cp_blocks_count)); | 558 | le64_to_cpu(raw_cp->cp_inodes_count)); |
559 | atomic64_set(&root->blocks_count, | ||
560 | le64_to_cpu(raw_cp->cp_blocks_count)); | ||
559 | 561 | ||
560 | nilfs_cpfile_put_checkpoint(nilfs->ns_cpfile, cno, bh_cp); | 562 | nilfs_cpfile_put_checkpoint(nilfs->ns_cpfile, cno, bh_cp); |
561 | 563 | ||
@@ -609,6 +611,7 @@ static int nilfs_statfs(struct dentry *dentry, struct kstatfs *buf) | |||
609 | unsigned long overhead; | 611 | unsigned long overhead; |
610 | unsigned long nrsvblocks; | 612 | unsigned long nrsvblocks; |
611 | sector_t nfreeblocks; | 613 | sector_t nfreeblocks; |
614 | u64 nmaxinodes, nfreeinodes; | ||
612 | int err; | 615 | int err; |
613 | 616 | ||
614 | /* | 617 | /* |
@@ -633,14 +636,34 @@ static int nilfs_statfs(struct dentry *dentry, struct kstatfs *buf) | |||
633 | if (unlikely(err)) | 636 | if (unlikely(err)) |
634 | return err; | 637 | return err; |
635 | 638 | ||
639 | err = nilfs_ifile_count_free_inodes(root->ifile, | ||
640 | &nmaxinodes, &nfreeinodes); | ||
641 | if (unlikely(err)) { | ||
642 | printk(KERN_WARNING | ||
643 | "NILFS warning: fail to count free inodes: err %d.\n", | ||
644 | err); | ||
645 | if (err == -ERANGE) { | ||
646 | /* | ||
647 | * If nilfs_palloc_count_max_entries() returns | ||
648 | * -ERANGE error code then we simply treat | ||
649 | * curent inodes count as maximum possible and | ||
650 | * zero as free inodes value. | ||
651 | */ | ||
652 | nmaxinodes = atomic64_read(&root->inodes_count); | ||
653 | nfreeinodes = 0; | ||
654 | err = 0; | ||
655 | } else | ||
656 | return err; | ||
657 | } | ||
658 | |||
636 | buf->f_type = NILFS_SUPER_MAGIC; | 659 | buf->f_type = NILFS_SUPER_MAGIC; |
637 | buf->f_bsize = sb->s_blocksize; | 660 | buf->f_bsize = sb->s_blocksize; |
638 | buf->f_blocks = blocks - overhead; | 661 | buf->f_blocks = blocks - overhead; |
639 | buf->f_bfree = nfreeblocks; | 662 | buf->f_bfree = nfreeblocks; |
640 | buf->f_bavail = (buf->f_bfree >= nrsvblocks) ? | 663 | buf->f_bavail = (buf->f_bfree >= nrsvblocks) ? |
641 | (buf->f_bfree - nrsvblocks) : 0; | 664 | (buf->f_bfree - nrsvblocks) : 0; |
642 | buf->f_files = atomic_read(&root->inodes_count); | 665 | buf->f_files = nmaxinodes; |
643 | buf->f_ffree = 0; /* nilfs_count_free_inodes(sb); */ | 666 | buf->f_ffree = nfreeinodes; |
644 | buf->f_namelen = NILFS_NAME_LEN; | 667 | buf->f_namelen = NILFS_NAME_LEN; |
645 | buf->f_fsid.val[0] = (u32)id; | 668 | buf->f_fsid.val[0] = (u32)id; |
646 | buf->f_fsid.val[1] = (u32)(id >> 32); | 669 | buf->f_fsid.val[1] = (u32)(id >> 32); |
@@ -973,7 +996,7 @@ static int nilfs_attach_snapshot(struct super_block *s, __u64 cno, | |||
973 | 996 | ||
974 | static int nilfs_tree_was_touched(struct dentry *root_dentry) | 997 | static int nilfs_tree_was_touched(struct dentry *root_dentry) |
975 | { | 998 | { |
976 | return root_dentry->d_count > 1; | 999 | return d_count(root_dentry) > 1; |
977 | } | 1000 | } |
978 | 1001 | ||
979 | /** | 1002 | /** |
diff --git a/fs/nilfs2/the_nilfs.c b/fs/nilfs2/the_nilfs.c index 41e6a04a561f..94c451ce6d24 100644 --- a/fs/nilfs2/the_nilfs.c +++ b/fs/nilfs2/the_nilfs.c | |||
@@ -764,8 +764,8 @@ nilfs_find_or_create_root(struct the_nilfs *nilfs, __u64 cno) | |||
764 | new->ifile = NULL; | 764 | new->ifile = NULL; |
765 | new->nilfs = nilfs; | 765 | new->nilfs = nilfs; |
766 | atomic_set(&new->count, 1); | 766 | atomic_set(&new->count, 1); |
767 | atomic_set(&new->inodes_count, 0); | 767 | atomic64_set(&new->inodes_count, 0); |
768 | atomic_set(&new->blocks_count, 0); | 768 | atomic64_set(&new->blocks_count, 0); |
769 | 769 | ||
770 | rb_link_node(&new->rb_node, parent, p); | 770 | rb_link_node(&new->rb_node, parent, p); |
771 | rb_insert_color(&new->rb_node, &nilfs->ns_cptree); | 771 | rb_insert_color(&new->rb_node, &nilfs->ns_cptree); |
diff --git a/fs/nilfs2/the_nilfs.h b/fs/nilfs2/the_nilfs.h index be1267a34cea..de8cc53b4a5c 100644 --- a/fs/nilfs2/the_nilfs.h +++ b/fs/nilfs2/the_nilfs.h | |||
@@ -241,8 +241,8 @@ struct nilfs_root { | |||
241 | struct the_nilfs *nilfs; | 241 | struct the_nilfs *nilfs; |
242 | struct inode *ifile; | 242 | struct inode *ifile; |
243 | 243 | ||
244 | atomic_t inodes_count; | 244 | atomic64_t inodes_count; |
245 | atomic_t blocks_count; | 245 | atomic64_t blocks_count; |
246 | }; | 246 | }; |
247 | 247 | ||
248 | /* Special checkpoint number */ | 248 | /* Special checkpoint number */ |
diff --git a/fs/notify/dnotify/dnotify.c b/fs/notify/dnotify/dnotify.c index 2bfe6dc413a0..1fedd5f7ccc4 100644 --- a/fs/notify/dnotify/dnotify.c +++ b/fs/notify/dnotify/dnotify.c | |||
@@ -31,7 +31,6 @@ int dir_notify_enable __read_mostly = 1; | |||
31 | static struct kmem_cache *dnotify_struct_cache __read_mostly; | 31 | static struct kmem_cache *dnotify_struct_cache __read_mostly; |
32 | static struct kmem_cache *dnotify_mark_cache __read_mostly; | 32 | static struct kmem_cache *dnotify_mark_cache __read_mostly; |
33 | static struct fsnotify_group *dnotify_group __read_mostly; | 33 | static struct fsnotify_group *dnotify_group __read_mostly; |
34 | static DEFINE_MUTEX(dnotify_mark_mutex); | ||
35 | 34 | ||
36 | /* | 35 | /* |
37 | * dnotify will attach one of these to each inode (i_fsnotify_marks) which | 36 | * dnotify will attach one of these to each inode (i_fsnotify_marks) which |
@@ -183,7 +182,7 @@ void dnotify_flush(struct file *filp, fl_owner_t id) | |||
183 | return; | 182 | return; |
184 | dn_mark = container_of(fsn_mark, struct dnotify_mark, fsn_mark); | 183 | dn_mark = container_of(fsn_mark, struct dnotify_mark, fsn_mark); |
185 | 184 | ||
186 | mutex_lock(&dnotify_mark_mutex); | 185 | mutex_lock(&dnotify_group->mark_mutex); |
187 | 186 | ||
188 | spin_lock(&fsn_mark->lock); | 187 | spin_lock(&fsn_mark->lock); |
189 | prev = &dn_mark->dn; | 188 | prev = &dn_mark->dn; |
@@ -199,11 +198,12 @@ void dnotify_flush(struct file *filp, fl_owner_t id) | |||
199 | 198 | ||
200 | spin_unlock(&fsn_mark->lock); | 199 | spin_unlock(&fsn_mark->lock); |
201 | 200 | ||
202 | /* nothing else could have found us thanks to the dnotify_mark_mutex */ | 201 | /* nothing else could have found us thanks to the dnotify_groups |
202 | mark_mutex */ | ||
203 | if (dn_mark->dn == NULL) | 203 | if (dn_mark->dn == NULL) |
204 | fsnotify_destroy_mark(fsn_mark, dnotify_group); | 204 | fsnotify_destroy_mark_locked(fsn_mark, dnotify_group); |
205 | 205 | ||
206 | mutex_unlock(&dnotify_mark_mutex); | 206 | mutex_unlock(&dnotify_group->mark_mutex); |
207 | 207 | ||
208 | fsnotify_put_mark(fsn_mark); | 208 | fsnotify_put_mark(fsn_mark); |
209 | } | 209 | } |
@@ -326,7 +326,7 @@ int fcntl_dirnotify(int fd, struct file *filp, unsigned long arg) | |||
326 | new_dn_mark->dn = NULL; | 326 | new_dn_mark->dn = NULL; |
327 | 327 | ||
328 | /* this is needed to prevent the fcntl/close race described below */ | 328 | /* this is needed to prevent the fcntl/close race described below */ |
329 | mutex_lock(&dnotify_mark_mutex); | 329 | mutex_lock(&dnotify_group->mark_mutex); |
330 | 330 | ||
331 | /* add the new_fsn_mark or find an old one. */ | 331 | /* add the new_fsn_mark or find an old one. */ |
332 | fsn_mark = fsnotify_find_inode_mark(dnotify_group, inode); | 332 | fsn_mark = fsnotify_find_inode_mark(dnotify_group, inode); |
@@ -334,7 +334,8 @@ int fcntl_dirnotify(int fd, struct file *filp, unsigned long arg) | |||
334 | dn_mark = container_of(fsn_mark, struct dnotify_mark, fsn_mark); | 334 | dn_mark = container_of(fsn_mark, struct dnotify_mark, fsn_mark); |
335 | spin_lock(&fsn_mark->lock); | 335 | spin_lock(&fsn_mark->lock); |
336 | } else { | 336 | } else { |
337 | fsnotify_add_mark(new_fsn_mark, dnotify_group, inode, NULL, 0); | 337 | fsnotify_add_mark_locked(new_fsn_mark, dnotify_group, inode, |
338 | NULL, 0); | ||
338 | spin_lock(&new_fsn_mark->lock); | 339 | spin_lock(&new_fsn_mark->lock); |
339 | fsn_mark = new_fsn_mark; | 340 | fsn_mark = new_fsn_mark; |
340 | dn_mark = new_dn_mark; | 341 | dn_mark = new_dn_mark; |
@@ -348,9 +349,9 @@ int fcntl_dirnotify(int fd, struct file *filp, unsigned long arg) | |||
348 | 349 | ||
349 | /* if (f != filp) means that we lost a race and another task/thread | 350 | /* if (f != filp) means that we lost a race and another task/thread |
350 | * actually closed the fd we are still playing with before we grabbed | 351 | * actually closed the fd we are still playing with before we grabbed |
351 | * the dnotify_mark_mutex and fsn_mark->lock. Since closing the fd is the | 352 | * the dnotify_groups mark_mutex and fsn_mark->lock. Since closing the |
352 | * only time we clean up the marks we need to get our mark off | 353 | * fd is the only time we clean up the marks we need to get our mark |
353 | * the list. */ | 354 | * off the list. */ |
354 | if (f != filp) { | 355 | if (f != filp) { |
355 | /* if we added ourselves, shoot ourselves, it's possible that | 356 | /* if we added ourselves, shoot ourselves, it's possible that |
356 | * the flush actually did shoot this fsn_mark. That's fine too | 357 | * the flush actually did shoot this fsn_mark. That's fine too |
@@ -385,9 +386,9 @@ out: | |||
385 | spin_unlock(&fsn_mark->lock); | 386 | spin_unlock(&fsn_mark->lock); |
386 | 387 | ||
387 | if (destroy) | 388 | if (destroy) |
388 | fsnotify_destroy_mark(fsn_mark, dnotify_group); | 389 | fsnotify_destroy_mark_locked(fsn_mark, dnotify_group); |
389 | 390 | ||
390 | mutex_unlock(&dnotify_mark_mutex); | 391 | mutex_unlock(&dnotify_group->mark_mutex); |
391 | fsnotify_put_mark(fsn_mark); | 392 | fsnotify_put_mark(fsn_mark); |
392 | out_err: | 393 | out_err: |
393 | if (new_fsn_mark) | 394 | if (new_fsn_mark) |
diff --git a/fs/notify/fanotify/fanotify_user.c b/fs/notify/fanotify/fanotify_user.c index 6c80083a984f..e44cb6427df3 100644 --- a/fs/notify/fanotify/fanotify_user.c +++ b/fs/notify/fanotify/fanotify_user.c | |||
@@ -122,6 +122,7 @@ static int fill_event_metadata(struct fsnotify_group *group, | |||
122 | metadata->event_len = FAN_EVENT_METADATA_LEN; | 122 | metadata->event_len = FAN_EVENT_METADATA_LEN; |
123 | metadata->metadata_len = FAN_EVENT_METADATA_LEN; | 123 | metadata->metadata_len = FAN_EVENT_METADATA_LEN; |
124 | metadata->vers = FANOTIFY_METADATA_VERSION; | 124 | metadata->vers = FANOTIFY_METADATA_VERSION; |
125 | metadata->reserved = 0; | ||
125 | metadata->mask = event->mask & FAN_ALL_OUTGOING_EVENTS; | 126 | metadata->mask = event->mask & FAN_ALL_OUTGOING_EVENTS; |
126 | metadata->pid = pid_vnr(event->tgid); | 127 | metadata->pid = pid_vnr(event->tgid); |
127 | if (unlikely(event->mask & FAN_Q_OVERFLOW)) | 128 | if (unlikely(event->mask & FAN_Q_OVERFLOW)) |
@@ -399,9 +400,6 @@ static int fanotify_release(struct inode *ignored, struct file *file) | |||
399 | wake_up(&group->fanotify_data.access_waitq); | 400 | wake_up(&group->fanotify_data.access_waitq); |
400 | #endif | 401 | #endif |
401 | 402 | ||
402 | if (file->f_flags & FASYNC) | ||
403 | fsnotify_fasync(-1, file, 0); | ||
404 | |||
405 | /* matches the fanotify_init->fsnotify_alloc_group */ | 403 | /* matches the fanotify_init->fsnotify_alloc_group */ |
406 | fsnotify_destroy_group(group); | 404 | fsnotify_destroy_group(group); |
407 | 405 | ||
@@ -526,14 +524,18 @@ static int fanotify_remove_vfsmount_mark(struct fsnotify_group *group, | |||
526 | __u32 removed; | 524 | __u32 removed; |
527 | int destroy_mark; | 525 | int destroy_mark; |
528 | 526 | ||
527 | mutex_lock(&group->mark_mutex); | ||
529 | fsn_mark = fsnotify_find_vfsmount_mark(group, mnt); | 528 | fsn_mark = fsnotify_find_vfsmount_mark(group, mnt); |
530 | if (!fsn_mark) | 529 | if (!fsn_mark) { |
530 | mutex_unlock(&group->mark_mutex); | ||
531 | return -ENOENT; | 531 | return -ENOENT; |
532 | } | ||
532 | 533 | ||
533 | removed = fanotify_mark_remove_from_mask(fsn_mark, mask, flags, | 534 | removed = fanotify_mark_remove_from_mask(fsn_mark, mask, flags, |
534 | &destroy_mark); | 535 | &destroy_mark); |
535 | if (destroy_mark) | 536 | if (destroy_mark) |
536 | fsnotify_destroy_mark(fsn_mark, group); | 537 | fsnotify_destroy_mark_locked(fsn_mark, group); |
538 | mutex_unlock(&group->mark_mutex); | ||
537 | 539 | ||
538 | fsnotify_put_mark(fsn_mark); | 540 | fsnotify_put_mark(fsn_mark); |
539 | if (removed & real_mount(mnt)->mnt_fsnotify_mask) | 541 | if (removed & real_mount(mnt)->mnt_fsnotify_mask) |
@@ -550,14 +552,19 @@ static int fanotify_remove_inode_mark(struct fsnotify_group *group, | |||
550 | __u32 removed; | 552 | __u32 removed; |
551 | int destroy_mark; | 553 | int destroy_mark; |
552 | 554 | ||
555 | mutex_lock(&group->mark_mutex); | ||
553 | fsn_mark = fsnotify_find_inode_mark(group, inode); | 556 | fsn_mark = fsnotify_find_inode_mark(group, inode); |
554 | if (!fsn_mark) | 557 | if (!fsn_mark) { |
558 | mutex_unlock(&group->mark_mutex); | ||
555 | return -ENOENT; | 559 | return -ENOENT; |
560 | } | ||
556 | 561 | ||
557 | removed = fanotify_mark_remove_from_mask(fsn_mark, mask, flags, | 562 | removed = fanotify_mark_remove_from_mask(fsn_mark, mask, flags, |
558 | &destroy_mark); | 563 | &destroy_mark); |
559 | if (destroy_mark) | 564 | if (destroy_mark) |
560 | fsnotify_destroy_mark(fsn_mark, group); | 565 | fsnotify_destroy_mark_locked(fsn_mark, group); |
566 | mutex_unlock(&group->mark_mutex); | ||
567 | |||
561 | /* matches the fsnotify_find_inode_mark() */ | 568 | /* matches the fsnotify_find_inode_mark() */ |
562 | fsnotify_put_mark(fsn_mark); | 569 | fsnotify_put_mark(fsn_mark); |
563 | if (removed & inode->i_fsnotify_mask) | 570 | if (removed & inode->i_fsnotify_mask) |
@@ -593,35 +600,55 @@ static __u32 fanotify_mark_add_to_mask(struct fsnotify_mark *fsn_mark, | |||
593 | return mask & ~oldmask; | 600 | return mask & ~oldmask; |
594 | } | 601 | } |
595 | 602 | ||
603 | static struct fsnotify_mark *fanotify_add_new_mark(struct fsnotify_group *group, | ||
604 | struct inode *inode, | ||
605 | struct vfsmount *mnt) | ||
606 | { | ||
607 | struct fsnotify_mark *mark; | ||
608 | int ret; | ||
609 | |||
610 | if (atomic_read(&group->num_marks) > group->fanotify_data.max_marks) | ||
611 | return ERR_PTR(-ENOSPC); | ||
612 | |||
613 | mark = kmem_cache_alloc(fanotify_mark_cache, GFP_KERNEL); | ||
614 | if (!mark) | ||
615 | return ERR_PTR(-ENOMEM); | ||
616 | |||
617 | fsnotify_init_mark(mark, fanotify_free_mark); | ||
618 | ret = fsnotify_add_mark_locked(mark, group, inode, mnt, 0); | ||
619 | if (ret) { | ||
620 | fsnotify_put_mark(mark); | ||
621 | return ERR_PTR(ret); | ||
622 | } | ||
623 | |||
624 | return mark; | ||
625 | } | ||
626 | |||
627 | |||
596 | static int fanotify_add_vfsmount_mark(struct fsnotify_group *group, | 628 | static int fanotify_add_vfsmount_mark(struct fsnotify_group *group, |
597 | struct vfsmount *mnt, __u32 mask, | 629 | struct vfsmount *mnt, __u32 mask, |
598 | unsigned int flags) | 630 | unsigned int flags) |
599 | { | 631 | { |
600 | struct fsnotify_mark *fsn_mark; | 632 | struct fsnotify_mark *fsn_mark; |
601 | __u32 added; | 633 | __u32 added; |
602 | int ret = 0; | ||
603 | 634 | ||
635 | mutex_lock(&group->mark_mutex); | ||
604 | fsn_mark = fsnotify_find_vfsmount_mark(group, mnt); | 636 | fsn_mark = fsnotify_find_vfsmount_mark(group, mnt); |
605 | if (!fsn_mark) { | 637 | if (!fsn_mark) { |
606 | if (atomic_read(&group->num_marks) > group->fanotify_data.max_marks) | 638 | fsn_mark = fanotify_add_new_mark(group, NULL, mnt); |
607 | return -ENOSPC; | 639 | if (IS_ERR(fsn_mark)) { |
608 | 640 | mutex_unlock(&group->mark_mutex); | |
609 | fsn_mark = kmem_cache_alloc(fanotify_mark_cache, GFP_KERNEL); | 641 | return PTR_ERR(fsn_mark); |
610 | if (!fsn_mark) | 642 | } |
611 | return -ENOMEM; | ||
612 | |||
613 | fsnotify_init_mark(fsn_mark, fanotify_free_mark); | ||
614 | ret = fsnotify_add_mark(fsn_mark, group, NULL, mnt, 0); | ||
615 | if (ret) | ||
616 | goto err; | ||
617 | } | 643 | } |
618 | added = fanotify_mark_add_to_mask(fsn_mark, mask, flags); | 644 | added = fanotify_mark_add_to_mask(fsn_mark, mask, flags); |
645 | mutex_unlock(&group->mark_mutex); | ||
619 | 646 | ||
620 | if (added & ~real_mount(mnt)->mnt_fsnotify_mask) | 647 | if (added & ~real_mount(mnt)->mnt_fsnotify_mask) |
621 | fsnotify_recalc_vfsmount_mask(mnt); | 648 | fsnotify_recalc_vfsmount_mask(mnt); |
622 | err: | 649 | |
623 | fsnotify_put_mark(fsn_mark); | 650 | fsnotify_put_mark(fsn_mark); |
624 | return ret; | 651 | return 0; |
625 | } | 652 | } |
626 | 653 | ||
627 | static int fanotify_add_inode_mark(struct fsnotify_group *group, | 654 | static int fanotify_add_inode_mark(struct fsnotify_group *group, |
@@ -630,7 +657,6 @@ static int fanotify_add_inode_mark(struct fsnotify_group *group, | |||
630 | { | 657 | { |
631 | struct fsnotify_mark *fsn_mark; | 658 | struct fsnotify_mark *fsn_mark; |
632 | __u32 added; | 659 | __u32 added; |
633 | int ret = 0; | ||
634 | 660 | ||
635 | pr_debug("%s: group=%p inode=%p\n", __func__, group, inode); | 661 | pr_debug("%s: group=%p inode=%p\n", __func__, group, inode); |
636 | 662 | ||
@@ -644,27 +670,23 @@ static int fanotify_add_inode_mark(struct fsnotify_group *group, | |||
644 | (atomic_read(&inode->i_writecount) > 0)) | 670 | (atomic_read(&inode->i_writecount) > 0)) |
645 | return 0; | 671 | return 0; |
646 | 672 | ||
673 | mutex_lock(&group->mark_mutex); | ||
647 | fsn_mark = fsnotify_find_inode_mark(group, inode); | 674 | fsn_mark = fsnotify_find_inode_mark(group, inode); |
648 | if (!fsn_mark) { | 675 | if (!fsn_mark) { |
649 | if (atomic_read(&group->num_marks) > group->fanotify_data.max_marks) | 676 | fsn_mark = fanotify_add_new_mark(group, inode, NULL); |
650 | return -ENOSPC; | 677 | if (IS_ERR(fsn_mark)) { |
651 | 678 | mutex_unlock(&group->mark_mutex); | |
652 | fsn_mark = kmem_cache_alloc(fanotify_mark_cache, GFP_KERNEL); | 679 | return PTR_ERR(fsn_mark); |
653 | if (!fsn_mark) | 680 | } |
654 | return -ENOMEM; | ||
655 | |||
656 | fsnotify_init_mark(fsn_mark, fanotify_free_mark); | ||
657 | ret = fsnotify_add_mark(fsn_mark, group, inode, NULL, 0); | ||
658 | if (ret) | ||
659 | goto err; | ||
660 | } | 681 | } |
661 | added = fanotify_mark_add_to_mask(fsn_mark, mask, flags); | 682 | added = fanotify_mark_add_to_mask(fsn_mark, mask, flags); |
683 | mutex_unlock(&group->mark_mutex); | ||
662 | 684 | ||
663 | if (added & ~inode->i_fsnotify_mask) | 685 | if (added & ~inode->i_fsnotify_mask) |
664 | fsnotify_recalc_inode_mask(inode); | 686 | fsnotify_recalc_inode_mask(inode); |
665 | err: | 687 | |
666 | fsnotify_put_mark(fsn_mark); | 688 | fsnotify_put_mark(fsn_mark); |
667 | return ret; | 689 | return 0; |
668 | } | 690 | } |
669 | 691 | ||
670 | /* fanotify syscalls */ | 692 | /* fanotify syscalls */ |
diff --git a/fs/notify/inotify/inotify_user.c b/fs/notify/inotify/inotify_user.c index 959815c1e017..60f954a891ab 100644 --- a/fs/notify/inotify/inotify_user.c +++ b/fs/notify/inotify/inotify_user.c | |||
@@ -636,7 +636,8 @@ static int inotify_new_watch(struct fsnotify_group *group, | |||
636 | goto out_err; | 636 | goto out_err; |
637 | 637 | ||
638 | /* we are on the idr, now get on the inode */ | 638 | /* we are on the idr, now get on the inode */ |
639 | ret = fsnotify_add_mark(&tmp_i_mark->fsn_mark, group, inode, NULL, 0); | 639 | ret = fsnotify_add_mark_locked(&tmp_i_mark->fsn_mark, group, inode, |
640 | NULL, 0); | ||
640 | if (ret) { | 641 | if (ret) { |
641 | /* we failed to get on the inode, get off the idr */ | 642 | /* we failed to get on the inode, get off the idr */ |
642 | inotify_remove_from_idr(group, tmp_i_mark); | 643 | inotify_remove_from_idr(group, tmp_i_mark); |
@@ -660,19 +661,13 @@ static int inotify_update_watch(struct fsnotify_group *group, struct inode *inod | |||
660 | { | 661 | { |
661 | int ret = 0; | 662 | int ret = 0; |
662 | 663 | ||
663 | retry: | 664 | mutex_lock(&group->mark_mutex); |
664 | /* try to update and existing watch with the new arg */ | 665 | /* try to update and existing watch with the new arg */ |
665 | ret = inotify_update_existing_watch(group, inode, arg); | 666 | ret = inotify_update_existing_watch(group, inode, arg); |
666 | /* no mark present, try to add a new one */ | 667 | /* no mark present, try to add a new one */ |
667 | if (ret == -ENOENT) | 668 | if (ret == -ENOENT) |
668 | ret = inotify_new_watch(group, inode, arg); | 669 | ret = inotify_new_watch(group, inode, arg); |
669 | /* | 670 | mutex_unlock(&group->mark_mutex); |
670 | * inotify_new_watch could race with another thread which did an | ||
671 | * inotify_new_watch between the update_existing and the add watch | ||
672 | * here, go back and try to update an existing mark again. | ||
673 | */ | ||
674 | if (ret == -EEXIST) | ||
675 | goto retry; | ||
676 | 671 | ||
677 | return ret; | 672 | return ret; |
678 | } | 673 | } |
diff --git a/fs/notify/mark.c b/fs/notify/mark.c index fc6b49bf7360..923fe4a5f503 100644 --- a/fs/notify/mark.c +++ b/fs/notify/mark.c | |||
@@ -20,28 +20,29 @@ | |||
20 | * fsnotify inode mark locking/lifetime/and refcnting | 20 | * fsnotify inode mark locking/lifetime/and refcnting |
21 | * | 21 | * |
22 | * REFCNT: | 22 | * REFCNT: |
23 | * The mark->refcnt tells how many "things" in the kernel currently are | 23 | * The group->recnt and mark->refcnt tell how many "things" in the kernel |
24 | * referencing this object. The object typically will live inside the kernel | 24 | * currently are referencing the objects. Both kind of objects typically will |
25 | * with a refcnt of 2, one for each list it is on (i_list, g_list). Any task | 25 | * live inside the kernel with a refcnt of 2, one for its creation and one for |
26 | * which can find this object holding the appropriete locks, can take a reference | 26 | * the reference a group and a mark hold to each other. |
27 | * and the object itself is guaranteed to survive until the reference is dropped. | 27 | * If you are holding the appropriate locks, you can take a reference and the |
28 | * object itself is guaranteed to survive until the reference is dropped. | ||
28 | * | 29 | * |
29 | * LOCKING: | 30 | * LOCKING: |
30 | * There are 3 spinlocks involved with fsnotify inode marks and they MUST | 31 | * There are 3 locks involved with fsnotify inode marks and they MUST be taken |
31 | * be taken in order as follows: | 32 | * in order as follows: |
32 | * | 33 | * |
34 | * group->mark_mutex | ||
33 | * mark->lock | 35 | * mark->lock |
34 | * group->mark_lock | ||
35 | * inode->i_lock | 36 | * inode->i_lock |
36 | * | 37 | * |
37 | * mark->lock protects 2 things, mark->group and mark->inode. You must hold | 38 | * group->mark_mutex protects the marks_list anchored inside a given group and |
38 | * that lock to dereference either of these things (they could be NULL even with | 39 | * each mark is hooked via the g_list. It also protects the groups private |
39 | * the lock) | 40 | * data (i.e group limits). |
40 | * | 41 | |
41 | * group->mark_lock protects the marks_list anchored inside a given group | 42 | * mark->lock protects the marks attributes like its masks and flags. |
42 | * and each mark is hooked via the g_list. It also sorta protects the | 43 | * Furthermore it protects the access to a reference of the group that the mark |
43 | * free_g_list, which when used is anchored by a private list on the stack of the | 44 | * is assigned to as well as the access to a reference of the inode/vfsmount |
44 | * task which held the group->mark_lock. | 45 | * that is being watched by the mark. |
45 | * | 46 | * |
46 | * inode->i_lock protects the i_fsnotify_marks list anchored inside a | 47 | * inode->i_lock protects the i_fsnotify_marks list anchored inside a |
47 | * given inode and each mark is hooked via the i_list. (and sorta the | 48 | * given inode and each mark is hooked via the i_list. (and sorta the |
@@ -64,18 +65,11 @@ | |||
64 | * inode. We take i_lock and walk the i_fsnotify_marks safely. For each | 65 | * inode. We take i_lock and walk the i_fsnotify_marks safely. For each |
65 | * mark on the list we take a reference (so the mark can't disappear under us). | 66 | * mark on the list we take a reference (so the mark can't disappear under us). |
66 | * We remove that mark form the inode's list of marks and we add this mark to a | 67 | * We remove that mark form the inode's list of marks and we add this mark to a |
67 | * private list anchored on the stack using i_free_list; At this point we no | 68 | * private list anchored on the stack using i_free_list; we walk i_free_list |
68 | * longer fear anything finding the mark using the inode's list of marks. | 69 | * and before we destroy the mark we make sure that we dont race with a |
69 | * | 70 | * concurrent destroy_group by getting a ref to the marks group and taking the |
70 | * We can safely and locklessly run the private list on the stack of everything | 71 | * groups mutex. |
71 | * we just unattached from the original inode. For each mark on the private list | 72 | |
72 | * we grab the mark-> and can thus dereference mark->group and mark->inode. If | ||
73 | * we see the group and inode are not NULL we take those locks. Now holding all | ||
74 | * 3 locks we can completely remove the mark from other tasks finding it in the | ||
75 | * future. Remember, 10 things might already be referencing this mark, but they | ||
76 | * better be holding a ref. We drop our reference we took before we unhooked it | ||
77 | * from the inode. When the ref hits 0 we can free the mark. | ||
78 | * | ||
79 | * Very similarly for freeing by group, except we use free_g_list. | 73 | * Very similarly for freeing by group, except we use free_g_list. |
80 | * | 74 | * |
81 | * This has the very interesting property of being able to run concurrently with | 75 | * This has the very interesting property of being able to run concurrently with |
diff --git a/fs/ntfs/aops.c b/fs/ntfs/aops.c index fa9c05f97af4..d267ea6aa1a0 100644 --- a/fs/ntfs/aops.c +++ b/fs/ntfs/aops.c | |||
@@ -1372,7 +1372,7 @@ retry_writepage: | |||
1372 | * The page may have dirty, unmapped buffers. Make them | 1372 | * The page may have dirty, unmapped buffers. Make them |
1373 | * freeable here, so the page does not leak. | 1373 | * freeable here, so the page does not leak. |
1374 | */ | 1374 | */ |
1375 | block_invalidatepage(page, 0); | 1375 | block_invalidatepage(page, 0, PAGE_CACHE_SIZE); |
1376 | unlock_page(page); | 1376 | unlock_page(page); |
1377 | ntfs_debug("Write outside i_size - truncated?"); | 1377 | ntfs_debug("Write outside i_size - truncated?"); |
1378 | return 0; | 1378 | return 0; |
diff --git a/fs/ntfs/dir.c b/fs/ntfs/dir.c index aa411c3f20e9..9e38dafa3bc7 100644 --- a/fs/ntfs/dir.c +++ b/fs/ntfs/dir.c | |||
@@ -1004,13 +1004,11 @@ dir_err_out: | |||
1004 | /** | 1004 | /** |
1005 | * ntfs_filldir - ntfs specific filldir method | 1005 | * ntfs_filldir - ntfs specific filldir method |
1006 | * @vol: current ntfs volume | 1006 | * @vol: current ntfs volume |
1007 | * @fpos: position in the directory | ||
1008 | * @ndir: ntfs inode of current directory | 1007 | * @ndir: ntfs inode of current directory |
1009 | * @ia_page: page in which the index allocation buffer @ie is in resides | 1008 | * @ia_page: page in which the index allocation buffer @ie is in resides |
1010 | * @ie: current index entry | 1009 | * @ie: current index entry |
1011 | * @name: buffer to use for the converted name | 1010 | * @name: buffer to use for the converted name |
1012 | * @dirent: vfs filldir callback context | 1011 | * @actor: what to feed the entries to |
1013 | * @filldir: vfs filldir callback | ||
1014 | * | 1012 | * |
1015 | * Convert the Unicode @name to the loaded NLS and pass it to the @filldir | 1013 | * Convert the Unicode @name to the loaded NLS and pass it to the @filldir |
1016 | * callback. | 1014 | * callback. |
@@ -1024,12 +1022,12 @@ dir_err_out: | |||
1024 | * retake the lock if we are returning a non-zero value as ntfs_readdir() | 1022 | * retake the lock if we are returning a non-zero value as ntfs_readdir() |
1025 | * would need to drop the lock immediately anyway. | 1023 | * would need to drop the lock immediately anyway. |
1026 | */ | 1024 | */ |
1027 | static inline int ntfs_filldir(ntfs_volume *vol, loff_t fpos, | 1025 | static inline int ntfs_filldir(ntfs_volume *vol, |
1028 | ntfs_inode *ndir, struct page *ia_page, INDEX_ENTRY *ie, | 1026 | ntfs_inode *ndir, struct page *ia_page, INDEX_ENTRY *ie, |
1029 | u8 *name, void *dirent, filldir_t filldir) | 1027 | u8 *name, struct dir_context *actor) |
1030 | { | 1028 | { |
1031 | unsigned long mref; | 1029 | unsigned long mref; |
1032 | int name_len, rc; | 1030 | int name_len; |
1033 | unsigned dt_type; | 1031 | unsigned dt_type; |
1034 | FILE_NAME_TYPE_FLAGS name_type; | 1032 | FILE_NAME_TYPE_FLAGS name_type; |
1035 | 1033 | ||
@@ -1068,13 +1066,14 @@ static inline int ntfs_filldir(ntfs_volume *vol, loff_t fpos, | |||
1068 | if (ia_page) | 1066 | if (ia_page) |
1069 | unlock_page(ia_page); | 1067 | unlock_page(ia_page); |
1070 | ntfs_debug("Calling filldir for %s with len %i, fpos 0x%llx, inode " | 1068 | ntfs_debug("Calling filldir for %s with len %i, fpos 0x%llx, inode " |
1071 | "0x%lx, DT_%s.", name, name_len, fpos, mref, | 1069 | "0x%lx, DT_%s.", name, name_len, actor->pos, mref, |
1072 | dt_type == DT_DIR ? "DIR" : "REG"); | 1070 | dt_type == DT_DIR ? "DIR" : "REG"); |
1073 | rc = filldir(dirent, name, name_len, fpos, mref, dt_type); | 1071 | if (!dir_emit(actor, name, name_len, mref, dt_type)) |
1072 | return 1; | ||
1074 | /* Relock the page but not if we are aborting ->readdir. */ | 1073 | /* Relock the page but not if we are aborting ->readdir. */ |
1075 | if (!rc && ia_page) | 1074 | if (ia_page) |
1076 | lock_page(ia_page); | 1075 | lock_page(ia_page); |
1077 | return rc; | 1076 | return 0; |
1078 | } | 1077 | } |
1079 | 1078 | ||
1080 | /* | 1079 | /* |
@@ -1097,11 +1096,11 @@ static inline int ntfs_filldir(ntfs_volume *vol, loff_t fpos, | |||
1097 | * removes them again after the write is complete after which it | 1096 | * removes them again after the write is complete after which it |
1098 | * unlocks the page. | 1097 | * unlocks the page. |
1099 | */ | 1098 | */ |
1100 | static int ntfs_readdir(struct file *filp, void *dirent, filldir_t filldir) | 1099 | static int ntfs_readdir(struct file *file, struct dir_context *actor) |
1101 | { | 1100 | { |
1102 | s64 ia_pos, ia_start, prev_ia_pos, bmp_pos; | 1101 | s64 ia_pos, ia_start, prev_ia_pos, bmp_pos; |
1103 | loff_t fpos, i_size; | 1102 | loff_t i_size; |
1104 | struct inode *bmp_vi, *vdir = file_inode(filp); | 1103 | struct inode *bmp_vi, *vdir = file_inode(file); |
1105 | struct super_block *sb = vdir->i_sb; | 1104 | struct super_block *sb = vdir->i_sb; |
1106 | ntfs_inode *ndir = NTFS_I(vdir); | 1105 | ntfs_inode *ndir = NTFS_I(vdir); |
1107 | ntfs_volume *vol = NTFS_SB(sb); | 1106 | ntfs_volume *vol = NTFS_SB(sb); |
@@ -1116,33 +1115,16 @@ static int ntfs_readdir(struct file *filp, void *dirent, filldir_t filldir) | |||
1116 | u8 *kaddr, *bmp, *index_end; | 1115 | u8 *kaddr, *bmp, *index_end; |
1117 | ntfs_attr_search_ctx *ctx; | 1116 | ntfs_attr_search_ctx *ctx; |
1118 | 1117 | ||
1119 | fpos = filp->f_pos; | ||
1120 | ntfs_debug("Entering for inode 0x%lx, fpos 0x%llx.", | 1118 | ntfs_debug("Entering for inode 0x%lx, fpos 0x%llx.", |
1121 | vdir->i_ino, fpos); | 1119 | vdir->i_ino, actor->pos); |
1122 | rc = err = 0; | 1120 | rc = err = 0; |
1123 | /* Are we at end of dir yet? */ | 1121 | /* Are we at end of dir yet? */ |
1124 | i_size = i_size_read(vdir); | 1122 | i_size = i_size_read(vdir); |
1125 | if (fpos >= i_size + vol->mft_record_size) | 1123 | if (actor->pos >= i_size + vol->mft_record_size) |
1126 | goto done; | 1124 | return 0; |
1127 | /* Emulate . and .. for all directories. */ | 1125 | /* Emulate . and .. for all directories. */ |
1128 | if (!fpos) { | 1126 | if (!dir_emit_dots(file, actor)) |
1129 | ntfs_debug("Calling filldir for . with len 1, fpos 0x0, " | 1127 | return 0; |
1130 | "inode 0x%lx, DT_DIR.", vdir->i_ino); | ||
1131 | rc = filldir(dirent, ".", 1, fpos, vdir->i_ino, DT_DIR); | ||
1132 | if (rc) | ||
1133 | goto done; | ||
1134 | fpos++; | ||
1135 | } | ||
1136 | if (fpos == 1) { | ||
1137 | ntfs_debug("Calling filldir for .. with len 2, fpos 0x1, " | ||
1138 | "inode 0x%lx, DT_DIR.", | ||
1139 | (unsigned long)parent_ino(filp->f_path.dentry)); | ||
1140 | rc = filldir(dirent, "..", 2, fpos, | ||
1141 | parent_ino(filp->f_path.dentry), DT_DIR); | ||
1142 | if (rc) | ||
1143 | goto done; | ||
1144 | fpos++; | ||
1145 | } | ||
1146 | m = NULL; | 1128 | m = NULL; |
1147 | ctx = NULL; | 1129 | ctx = NULL; |
1148 | /* | 1130 | /* |
@@ -1155,7 +1137,7 @@ static int ntfs_readdir(struct file *filp, void *dirent, filldir_t filldir) | |||
1155 | goto err_out; | 1137 | goto err_out; |
1156 | } | 1138 | } |
1157 | /* Are we jumping straight into the index allocation attribute? */ | 1139 | /* Are we jumping straight into the index allocation attribute? */ |
1158 | if (fpos >= vol->mft_record_size) | 1140 | if (actor->pos >= vol->mft_record_size) |
1159 | goto skip_index_root; | 1141 | goto skip_index_root; |
1160 | /* Get hold of the mft record for the directory. */ | 1142 | /* Get hold of the mft record for the directory. */ |
1161 | m = map_mft_record(ndir); | 1143 | m = map_mft_record(ndir); |
@@ -1170,7 +1152,7 @@ static int ntfs_readdir(struct file *filp, void *dirent, filldir_t filldir) | |||
1170 | goto err_out; | 1152 | goto err_out; |
1171 | } | 1153 | } |
1172 | /* Get the offset into the index root attribute. */ | 1154 | /* Get the offset into the index root attribute. */ |
1173 | ir_pos = (s64)fpos; | 1155 | ir_pos = (s64)actor->pos; |
1174 | /* Find the index root attribute in the mft record. */ | 1156 | /* Find the index root attribute in the mft record. */ |
1175 | err = ntfs_attr_lookup(AT_INDEX_ROOT, I30, 4, CASE_SENSITIVE, 0, NULL, | 1157 | err = ntfs_attr_lookup(AT_INDEX_ROOT, I30, 4, CASE_SENSITIVE, 0, NULL, |
1176 | 0, ctx); | 1158 | 0, ctx); |
@@ -1226,10 +1208,9 @@ static int ntfs_readdir(struct file *filp, void *dirent, filldir_t filldir) | |||
1226 | if (ir_pos > (u8*)ie - (u8*)ir) | 1208 | if (ir_pos > (u8*)ie - (u8*)ir) |
1227 | continue; | 1209 | continue; |
1228 | /* Advance the position even if going to skip the entry. */ | 1210 | /* Advance the position even if going to skip the entry. */ |
1229 | fpos = (u8*)ie - (u8*)ir; | 1211 | actor->pos = (u8*)ie - (u8*)ir; |
1230 | /* Submit the name to the filldir callback. */ | 1212 | /* Submit the name to the filldir callback. */ |
1231 | rc = ntfs_filldir(vol, fpos, ndir, NULL, ie, name, dirent, | 1213 | rc = ntfs_filldir(vol, ndir, NULL, ie, name, actor); |
1232 | filldir); | ||
1233 | if (rc) { | 1214 | if (rc) { |
1234 | kfree(ir); | 1215 | kfree(ir); |
1235 | goto abort; | 1216 | goto abort; |
@@ -1242,12 +1223,12 @@ static int ntfs_readdir(struct file *filp, void *dirent, filldir_t filldir) | |||
1242 | if (!NInoIndexAllocPresent(ndir)) | 1223 | if (!NInoIndexAllocPresent(ndir)) |
1243 | goto EOD; | 1224 | goto EOD; |
1244 | /* Advance fpos to the beginning of the index allocation. */ | 1225 | /* Advance fpos to the beginning of the index allocation. */ |
1245 | fpos = vol->mft_record_size; | 1226 | actor->pos = vol->mft_record_size; |
1246 | skip_index_root: | 1227 | skip_index_root: |
1247 | kaddr = NULL; | 1228 | kaddr = NULL; |
1248 | prev_ia_pos = -1LL; | 1229 | prev_ia_pos = -1LL; |
1249 | /* Get the offset into the index allocation attribute. */ | 1230 | /* Get the offset into the index allocation attribute. */ |
1250 | ia_pos = (s64)fpos - vol->mft_record_size; | 1231 | ia_pos = (s64)actor->pos - vol->mft_record_size; |
1251 | ia_mapping = vdir->i_mapping; | 1232 | ia_mapping = vdir->i_mapping; |
1252 | ntfs_debug("Inode 0x%lx, getting index bitmap.", vdir->i_ino); | 1233 | ntfs_debug("Inode 0x%lx, getting index bitmap.", vdir->i_ino); |
1253 | bmp_vi = ntfs_attr_iget(vdir, AT_BITMAP, I30, 4); | 1234 | bmp_vi = ntfs_attr_iget(vdir, AT_BITMAP, I30, 4); |
@@ -1409,7 +1390,7 @@ find_next_index_buffer: | |||
1409 | if (ia_pos - ia_start > (u8*)ie - (u8*)ia) | 1390 | if (ia_pos - ia_start > (u8*)ie - (u8*)ia) |
1410 | continue; | 1391 | continue; |
1411 | /* Advance the position even if going to skip the entry. */ | 1392 | /* Advance the position even if going to skip the entry. */ |
1412 | fpos = (u8*)ie - (u8*)ia + | 1393 | actor->pos = (u8*)ie - (u8*)ia + |
1413 | (sle64_to_cpu(ia->index_block_vcn) << | 1394 | (sle64_to_cpu(ia->index_block_vcn) << |
1414 | ndir->itype.index.vcn_size_bits) + | 1395 | ndir->itype.index.vcn_size_bits) + |
1415 | vol->mft_record_size; | 1396 | vol->mft_record_size; |
@@ -1419,8 +1400,7 @@ find_next_index_buffer: | |||
1419 | * before returning, unless a non-zero value is returned in | 1400 | * before returning, unless a non-zero value is returned in |
1420 | * which case the page is left unlocked. | 1401 | * which case the page is left unlocked. |
1421 | */ | 1402 | */ |
1422 | rc = ntfs_filldir(vol, fpos, ndir, ia_page, ie, name, dirent, | 1403 | rc = ntfs_filldir(vol, ndir, ia_page, ie, name, actor); |
1423 | filldir); | ||
1424 | if (rc) { | 1404 | if (rc) { |
1425 | /* @ia_page is already unlocked in this case. */ | 1405 | /* @ia_page is already unlocked in this case. */ |
1426 | ntfs_unmap_page(ia_page); | 1406 | ntfs_unmap_page(ia_page); |
@@ -1439,18 +1419,9 @@ unm_EOD: | |||
1439 | iput(bmp_vi); | 1419 | iput(bmp_vi); |
1440 | EOD: | 1420 | EOD: |
1441 | /* We are finished, set fpos to EOD. */ | 1421 | /* We are finished, set fpos to EOD. */ |
1442 | fpos = i_size + vol->mft_record_size; | 1422 | actor->pos = i_size + vol->mft_record_size; |
1443 | abort: | 1423 | abort: |
1444 | kfree(name); | 1424 | kfree(name); |
1445 | done: | ||
1446 | #ifdef DEBUG | ||
1447 | if (!rc) | ||
1448 | ntfs_debug("EOD, fpos 0x%llx, returning 0.", fpos); | ||
1449 | else | ||
1450 | ntfs_debug("filldir returned %i, fpos 0x%llx, returning 0.", | ||
1451 | rc, fpos); | ||
1452 | #endif | ||
1453 | filp->f_pos = fpos; | ||
1454 | return 0; | 1425 | return 0; |
1455 | err_out: | 1426 | err_out: |
1456 | if (bmp_page) { | 1427 | if (bmp_page) { |
@@ -1471,7 +1442,6 @@ iput_err_out: | |||
1471 | if (!err) | 1442 | if (!err) |
1472 | err = -EIO; | 1443 | err = -EIO; |
1473 | ntfs_debug("Failed. Returning error code %i.", -err); | 1444 | ntfs_debug("Failed. Returning error code %i.", -err); |
1474 | filp->f_pos = fpos; | ||
1475 | return err; | 1445 | return err; |
1476 | } | 1446 | } |
1477 | 1447 | ||
@@ -1571,7 +1541,7 @@ static int ntfs_dir_fsync(struct file *filp, loff_t start, loff_t end, | |||
1571 | const struct file_operations ntfs_dir_ops = { | 1541 | const struct file_operations ntfs_dir_ops = { |
1572 | .llseek = generic_file_llseek, /* Seek inside directory. */ | 1542 | .llseek = generic_file_llseek, /* Seek inside directory. */ |
1573 | .read = generic_read_dir, /* Return -EISDIR. */ | 1543 | .read = generic_read_dir, /* Return -EISDIR. */ |
1574 | .readdir = ntfs_readdir, /* Read directory contents. */ | 1544 | .iterate = ntfs_readdir, /* Read directory contents. */ |
1575 | #ifdef NTFS_RW | 1545 | #ifdef NTFS_RW |
1576 | .fsync = ntfs_dir_fsync, /* Sync a directory to disk. */ | 1546 | .fsync = ntfs_dir_fsync, /* Sync a directory to disk. */ |
1577 | /*.aio_fsync = ,*/ /* Sync all outstanding async | 1547 | /*.aio_fsync = ,*/ /* Sync all outstanding async |
diff --git a/fs/ocfs2/alloc.c b/fs/ocfs2/alloc.c index b8a9d87231b1..17e6bdde96c5 100644 --- a/fs/ocfs2/alloc.c +++ b/fs/ocfs2/alloc.c | |||
@@ -5655,7 +5655,7 @@ int ocfs2_remove_btree_range(struct inode *inode, | |||
5655 | &ref_tree, NULL); | 5655 | &ref_tree, NULL); |
5656 | if (ret) { | 5656 | if (ret) { |
5657 | mlog_errno(ret); | 5657 | mlog_errno(ret); |
5658 | goto out; | 5658 | goto bail; |
5659 | } | 5659 | } |
5660 | 5660 | ||
5661 | ret = ocfs2_prepare_refcount_change_for_del(inode, | 5661 | ret = ocfs2_prepare_refcount_change_for_del(inode, |
@@ -5666,7 +5666,7 @@ int ocfs2_remove_btree_range(struct inode *inode, | |||
5666 | &extra_blocks); | 5666 | &extra_blocks); |
5667 | if (ret < 0) { | 5667 | if (ret < 0) { |
5668 | mlog_errno(ret); | 5668 | mlog_errno(ret); |
5669 | goto out; | 5669 | goto bail; |
5670 | } | 5670 | } |
5671 | } | 5671 | } |
5672 | 5672 | ||
@@ -5674,7 +5674,7 @@ int ocfs2_remove_btree_range(struct inode *inode, | |||
5674 | extra_blocks); | 5674 | extra_blocks); |
5675 | if (ret) { | 5675 | if (ret) { |
5676 | mlog_errno(ret); | 5676 | mlog_errno(ret); |
5677 | return ret; | 5677 | goto bail; |
5678 | } | 5678 | } |
5679 | 5679 | ||
5680 | mutex_lock(&tl_inode->i_mutex); | 5680 | mutex_lock(&tl_inode->i_mutex); |
@@ -5734,7 +5734,7 @@ out_commit: | |||
5734 | ocfs2_commit_trans(osb, handle); | 5734 | ocfs2_commit_trans(osb, handle); |
5735 | out: | 5735 | out: |
5736 | mutex_unlock(&tl_inode->i_mutex); | 5736 | mutex_unlock(&tl_inode->i_mutex); |
5737 | 5737 | bail: | |
5738 | if (meta_ac) | 5738 | if (meta_ac) |
5739 | ocfs2_free_alloc_context(meta_ac); | 5739 | ocfs2_free_alloc_context(meta_ac); |
5740 | 5740 | ||
diff --git a/fs/ocfs2/aops.c b/fs/ocfs2/aops.c index 20dfec72e903..2abf97b2a592 100644 --- a/fs/ocfs2/aops.c +++ b/fs/ocfs2/aops.c | |||
@@ -603,11 +603,12 @@ static void ocfs2_dio_end_io(struct kiocb *iocb, | |||
603 | * from ext3. PageChecked() bits have been removed as OCFS2 does not | 603 | * from ext3. PageChecked() bits have been removed as OCFS2 does not |
604 | * do journalled data. | 604 | * do journalled data. |
605 | */ | 605 | */ |
606 | static void ocfs2_invalidatepage(struct page *page, unsigned long offset) | 606 | static void ocfs2_invalidatepage(struct page *page, unsigned int offset, |
607 | unsigned int length) | ||
607 | { | 608 | { |
608 | journal_t *journal = OCFS2_SB(page->mapping->host->i_sb)->journal->j_journal; | 609 | journal_t *journal = OCFS2_SB(page->mapping->host->i_sb)->journal->j_journal; |
609 | 610 | ||
610 | jbd2_journal_invalidatepage(journal, page, offset); | 611 | jbd2_journal_invalidatepage(journal, page, offset, length); |
611 | } | 612 | } |
612 | 613 | ||
613 | static int ocfs2_releasepage(struct page *page, gfp_t wait) | 614 | static int ocfs2_releasepage(struct page *page, gfp_t wait) |
@@ -1756,7 +1757,7 @@ try_again: | |||
1756 | goto out; | 1757 | goto out; |
1757 | } else if (ret == 1) { | 1758 | } else if (ret == 1) { |
1758 | clusters_need = wc->w_clen; | 1759 | clusters_need = wc->w_clen; |
1759 | ret = ocfs2_refcount_cow(inode, filp, di_bh, | 1760 | ret = ocfs2_refcount_cow(inode, di_bh, |
1760 | wc->w_cpos, wc->w_clen, UINT_MAX); | 1761 | wc->w_cpos, wc->w_clen, UINT_MAX); |
1761 | if (ret) { | 1762 | if (ret) { |
1762 | mlog_errno(ret); | 1763 | mlog_errno(ret); |
diff --git a/fs/ocfs2/cluster/heartbeat.c b/fs/ocfs2/cluster/heartbeat.c index 42252bf64b51..5c1c864e81cc 100644 --- a/fs/ocfs2/cluster/heartbeat.c +++ b/fs/ocfs2/cluster/heartbeat.c | |||
@@ -176,7 +176,7 @@ static void o2hb_dead_threshold_set(unsigned int threshold) | |||
176 | } | 176 | } |
177 | } | 177 | } |
178 | 178 | ||
179 | static int o2hb_global_hearbeat_mode_set(unsigned int hb_mode) | 179 | static int o2hb_global_heartbeat_mode_set(unsigned int hb_mode) |
180 | { | 180 | { |
181 | int ret = -1; | 181 | int ret = -1; |
182 | 182 | ||
@@ -500,7 +500,7 @@ static int o2hb_issue_node_write(struct o2hb_region *reg, | |||
500 | } | 500 | } |
501 | 501 | ||
502 | atomic_inc(&write_wc->wc_num_reqs); | 502 | atomic_inc(&write_wc->wc_num_reqs); |
503 | submit_bio(WRITE, bio); | 503 | submit_bio(WRITE_SYNC, bio); |
504 | 504 | ||
505 | status = 0; | 505 | status = 0; |
506 | bail: | 506 | bail: |
@@ -2271,7 +2271,7 @@ ssize_t o2hb_heartbeat_group_mode_store(struct o2hb_heartbeat_group *group, | |||
2271 | if (strnicmp(page, o2hb_heartbeat_mode_desc[i], len)) | 2271 | if (strnicmp(page, o2hb_heartbeat_mode_desc[i], len)) |
2272 | continue; | 2272 | continue; |
2273 | 2273 | ||
2274 | ret = o2hb_global_hearbeat_mode_set(i); | 2274 | ret = o2hb_global_heartbeat_mode_set(i); |
2275 | if (!ret) | 2275 | if (!ret) |
2276 | printk(KERN_NOTICE "o2hb: Heartbeat mode set to %s\n", | 2276 | printk(KERN_NOTICE "o2hb: Heartbeat mode set to %s\n", |
2277 | o2hb_heartbeat_mode_desc[i]); | 2277 | o2hb_heartbeat_mode_desc[i]); |
@@ -2304,7 +2304,7 @@ static struct configfs_attribute *o2hb_heartbeat_group_attrs[] = { | |||
2304 | NULL, | 2304 | NULL, |
2305 | }; | 2305 | }; |
2306 | 2306 | ||
2307 | static struct configfs_item_operations o2hb_hearbeat_group_item_ops = { | 2307 | static struct configfs_item_operations o2hb_heartbeat_group_item_ops = { |
2308 | .show_attribute = o2hb_heartbeat_group_show, | 2308 | .show_attribute = o2hb_heartbeat_group_show, |
2309 | .store_attribute = o2hb_heartbeat_group_store, | 2309 | .store_attribute = o2hb_heartbeat_group_store, |
2310 | }; | 2310 | }; |
@@ -2316,7 +2316,7 @@ static struct configfs_group_operations o2hb_heartbeat_group_group_ops = { | |||
2316 | 2316 | ||
2317 | static struct config_item_type o2hb_heartbeat_group_type = { | 2317 | static struct config_item_type o2hb_heartbeat_group_type = { |
2318 | .ct_group_ops = &o2hb_heartbeat_group_group_ops, | 2318 | .ct_group_ops = &o2hb_heartbeat_group_group_ops, |
2319 | .ct_item_ops = &o2hb_hearbeat_group_item_ops, | 2319 | .ct_item_ops = &o2hb_heartbeat_group_item_ops, |
2320 | .ct_attrs = o2hb_heartbeat_group_attrs, | 2320 | .ct_attrs = o2hb_heartbeat_group_attrs, |
2321 | .ct_owner = THIS_MODULE, | 2321 | .ct_owner = THIS_MODULE, |
2322 | }; | 2322 | }; |
@@ -2389,6 +2389,9 @@ static int o2hb_region_pin(const char *region_uuid) | |||
2389 | assert_spin_locked(&o2hb_live_lock); | 2389 | assert_spin_locked(&o2hb_live_lock); |
2390 | 2390 | ||
2391 | list_for_each_entry(reg, &o2hb_all_regions, hr_all_item) { | 2391 | list_for_each_entry(reg, &o2hb_all_regions, hr_all_item) { |
2392 | if (reg->hr_item_dropped) | ||
2393 | continue; | ||
2394 | |||
2392 | uuid = config_item_name(®->hr_item); | 2395 | uuid = config_item_name(®->hr_item); |
2393 | 2396 | ||
2394 | /* local heartbeat */ | 2397 | /* local heartbeat */ |
@@ -2439,6 +2442,9 @@ static void o2hb_region_unpin(const char *region_uuid) | |||
2439 | assert_spin_locked(&o2hb_live_lock); | 2442 | assert_spin_locked(&o2hb_live_lock); |
2440 | 2443 | ||
2441 | list_for_each_entry(reg, &o2hb_all_regions, hr_all_item) { | 2444 | list_for_each_entry(reg, &o2hb_all_regions, hr_all_item) { |
2445 | if (reg->hr_item_dropped) | ||
2446 | continue; | ||
2447 | |||
2442 | uuid = config_item_name(®->hr_item); | 2448 | uuid = config_item_name(®->hr_item); |
2443 | if (region_uuid) { | 2449 | if (region_uuid) { |
2444 | if (strcmp(region_uuid, uuid)) | 2450 | if (strcmp(region_uuid, uuid)) |
@@ -2654,6 +2660,9 @@ int o2hb_get_all_regions(char *region_uuids, u8 max_regions) | |||
2654 | 2660 | ||
2655 | p = region_uuids; | 2661 | p = region_uuids; |
2656 | list_for_each_entry(reg, &o2hb_all_regions, hr_all_item) { | 2662 | list_for_each_entry(reg, &o2hb_all_regions, hr_all_item) { |
2663 | if (reg->hr_item_dropped) | ||
2664 | continue; | ||
2665 | |||
2657 | mlog(0, "Region: %s\n", config_item_name(®->hr_item)); | 2666 | mlog(0, "Region: %s\n", config_item_name(®->hr_item)); |
2658 | if (numregs < max_regions) { | 2667 | if (numregs < max_regions) { |
2659 | memcpy(p, config_item_name(®->hr_item), | 2668 | memcpy(p, config_item_name(®->hr_item), |
diff --git a/fs/ocfs2/cluster/quorum.c b/fs/ocfs2/cluster/quorum.c index c19897d0fe14..1ec141e758d7 100644 --- a/fs/ocfs2/cluster/quorum.c +++ b/fs/ocfs2/cluster/quorum.c | |||
@@ -264,7 +264,7 @@ void o2quo_hb_still_up(u8 node) | |||
264 | /* This is analogous to hb_up. as a node's connection comes up we delay the | 264 | /* This is analogous to hb_up. as a node's connection comes up we delay the |
265 | * quorum decision until we see it heartbeating. the hold will be droped in | 265 | * quorum decision until we see it heartbeating. the hold will be droped in |
266 | * hb_up or hb_down. it might be perpetuated by con_err until hb_down. if | 266 | * hb_up or hb_down. it might be perpetuated by con_err until hb_down. if |
267 | * it's already heartbeating we we might be dropping a hold that conn_up got. | 267 | * it's already heartbeating we might be dropping a hold that conn_up got. |
268 | * */ | 268 | * */ |
269 | void o2quo_conn_up(u8 node) | 269 | void o2quo_conn_up(u8 node) |
270 | { | 270 | { |
diff --git a/fs/ocfs2/cluster/tcp.c b/fs/ocfs2/cluster/tcp.c index aa88bd8bcedc..d644dc611425 100644 --- a/fs/ocfs2/cluster/tcp.c +++ b/fs/ocfs2/cluster/tcp.c | |||
@@ -406,6 +406,9 @@ static void sc_kref_release(struct kref *kref) | |||
406 | sc->sc_node = NULL; | 406 | sc->sc_node = NULL; |
407 | 407 | ||
408 | o2net_debug_del_sc(sc); | 408 | o2net_debug_del_sc(sc); |
409 | |||
410 | if (sc->sc_page) | ||
411 | __free_page(sc->sc_page); | ||
409 | kfree(sc); | 412 | kfree(sc); |
410 | } | 413 | } |
411 | 414 | ||
@@ -630,19 +633,19 @@ static void o2net_state_change(struct sock *sk) | |||
630 | state_change = sc->sc_state_change; | 633 | state_change = sc->sc_state_change; |
631 | 634 | ||
632 | switch(sk->sk_state) { | 635 | switch(sk->sk_state) { |
633 | /* ignore connecting sockets as they make progress */ | 636 | /* ignore connecting sockets as they make progress */ |
634 | case TCP_SYN_SENT: | 637 | case TCP_SYN_SENT: |
635 | case TCP_SYN_RECV: | 638 | case TCP_SYN_RECV: |
636 | break; | 639 | break; |
637 | case TCP_ESTABLISHED: | 640 | case TCP_ESTABLISHED: |
638 | o2net_sc_queue_work(sc, &sc->sc_connect_work); | 641 | o2net_sc_queue_work(sc, &sc->sc_connect_work); |
639 | break; | 642 | break; |
640 | default: | 643 | default: |
641 | printk(KERN_INFO "o2net: Connection to " SC_NODEF_FMT | 644 | printk(KERN_INFO "o2net: Connection to " SC_NODEF_FMT |
642 | " shutdown, state %d\n", | 645 | " shutdown, state %d\n", |
643 | SC_NODEF_ARGS(sc), sk->sk_state); | 646 | SC_NODEF_ARGS(sc), sk->sk_state); |
644 | o2net_sc_queue_work(sc, &sc->sc_shutdown_work); | 647 | o2net_sc_queue_work(sc, &sc->sc_shutdown_work); |
645 | break; | 648 | break; |
646 | } | 649 | } |
647 | out: | 650 | out: |
648 | read_unlock(&sk->sk_callback_lock); | 651 | read_unlock(&sk->sk_callback_lock); |
diff --git a/fs/ocfs2/dir.c b/fs/ocfs2/dir.c index f1e1aed8f638..30544ce8e9f7 100644 --- a/fs/ocfs2/dir.c +++ b/fs/ocfs2/dir.c | |||
@@ -1761,11 +1761,10 @@ bail: | |||
1761 | 1761 | ||
1762 | static int ocfs2_dir_foreach_blk_id(struct inode *inode, | 1762 | static int ocfs2_dir_foreach_blk_id(struct inode *inode, |
1763 | u64 *f_version, | 1763 | u64 *f_version, |
1764 | loff_t *f_pos, void *priv, | 1764 | struct dir_context *ctx) |
1765 | filldir_t filldir, int *filldir_err) | ||
1766 | { | 1765 | { |
1767 | int ret, i, filldir_ret; | 1766 | int ret, i; |
1768 | unsigned long offset = *f_pos; | 1767 | unsigned long offset = ctx->pos; |
1769 | struct buffer_head *di_bh = NULL; | 1768 | struct buffer_head *di_bh = NULL; |
1770 | struct ocfs2_dinode *di; | 1769 | struct ocfs2_dinode *di; |
1771 | struct ocfs2_inline_data *data; | 1770 | struct ocfs2_inline_data *data; |
@@ -1781,8 +1780,7 @@ static int ocfs2_dir_foreach_blk_id(struct inode *inode, | |||
1781 | di = (struct ocfs2_dinode *)di_bh->b_data; | 1780 | di = (struct ocfs2_dinode *)di_bh->b_data; |
1782 | data = &di->id2.i_data; | 1781 | data = &di->id2.i_data; |
1783 | 1782 | ||
1784 | while (*f_pos < i_size_read(inode)) { | 1783 | while (ctx->pos < i_size_read(inode)) { |
1785 | revalidate: | ||
1786 | /* If the dir block has changed since the last call to | 1784 | /* If the dir block has changed since the last call to |
1787 | * readdir(2), then we might be pointing to an invalid | 1785 | * readdir(2), then we might be pointing to an invalid |
1788 | * dirent right now. Scan from the start of the block | 1786 | * dirent right now. Scan from the start of the block |
@@ -1802,50 +1800,31 @@ revalidate: | |||
1802 | break; | 1800 | break; |
1803 | i += le16_to_cpu(de->rec_len); | 1801 | i += le16_to_cpu(de->rec_len); |
1804 | } | 1802 | } |
1805 | *f_pos = offset = i; | 1803 | ctx->pos = offset = i; |
1806 | *f_version = inode->i_version; | 1804 | *f_version = inode->i_version; |
1807 | } | 1805 | } |
1808 | 1806 | ||
1809 | de = (struct ocfs2_dir_entry *) (data->id_data + *f_pos); | 1807 | de = (struct ocfs2_dir_entry *) (data->id_data + ctx->pos); |
1810 | if (!ocfs2_check_dir_entry(inode, de, di_bh, *f_pos)) { | 1808 | if (!ocfs2_check_dir_entry(inode, de, di_bh, ctx->pos)) { |
1811 | /* On error, skip the f_pos to the end. */ | 1809 | /* On error, skip the f_pos to the end. */ |
1812 | *f_pos = i_size_read(inode); | 1810 | ctx->pos = i_size_read(inode); |
1813 | goto out; | 1811 | break; |
1814 | } | 1812 | } |
1815 | offset += le16_to_cpu(de->rec_len); | 1813 | offset += le16_to_cpu(de->rec_len); |
1816 | if (le64_to_cpu(de->inode)) { | 1814 | if (le64_to_cpu(de->inode)) { |
1817 | /* We might block in the next section | ||
1818 | * if the data destination is | ||
1819 | * currently swapped out. So, use a | ||
1820 | * version stamp to detect whether or | ||
1821 | * not the directory has been modified | ||
1822 | * during the copy operation. | ||
1823 | */ | ||
1824 | u64 version = *f_version; | ||
1825 | unsigned char d_type = DT_UNKNOWN; | 1815 | unsigned char d_type = DT_UNKNOWN; |
1826 | 1816 | ||
1827 | if (de->file_type < OCFS2_FT_MAX) | 1817 | if (de->file_type < OCFS2_FT_MAX) |
1828 | d_type = ocfs2_filetype_table[de->file_type]; | 1818 | d_type = ocfs2_filetype_table[de->file_type]; |
1829 | 1819 | ||
1830 | filldir_ret = filldir(priv, de->name, | 1820 | if (!dir_emit(ctx, de->name, de->name_len, |
1831 | de->name_len, | 1821 | le64_to_cpu(de->inode), d_type)) |
1832 | *f_pos, | 1822 | goto out; |
1833 | le64_to_cpu(de->inode), | ||
1834 | d_type); | ||
1835 | if (filldir_ret) { | ||
1836 | if (filldir_err) | ||
1837 | *filldir_err = filldir_ret; | ||
1838 | break; | ||
1839 | } | ||
1840 | if (version != *f_version) | ||
1841 | goto revalidate; | ||
1842 | } | 1823 | } |
1843 | *f_pos += le16_to_cpu(de->rec_len); | 1824 | ctx->pos += le16_to_cpu(de->rec_len); |
1844 | } | 1825 | } |
1845 | |||
1846 | out: | 1826 | out: |
1847 | brelse(di_bh); | 1827 | brelse(di_bh); |
1848 | |||
1849 | return 0; | 1828 | return 0; |
1850 | } | 1829 | } |
1851 | 1830 | ||
@@ -1855,27 +1834,26 @@ out: | |||
1855 | */ | 1834 | */ |
1856 | static int ocfs2_dir_foreach_blk_el(struct inode *inode, | 1835 | static int ocfs2_dir_foreach_blk_el(struct inode *inode, |
1857 | u64 *f_version, | 1836 | u64 *f_version, |
1858 | loff_t *f_pos, void *priv, | 1837 | struct dir_context *ctx, |
1859 | filldir_t filldir, int *filldir_err) | 1838 | bool persist) |
1860 | { | 1839 | { |
1861 | int error = 0; | ||
1862 | unsigned long offset, blk, last_ra_blk = 0; | 1840 | unsigned long offset, blk, last_ra_blk = 0; |
1863 | int i, stored; | 1841 | int i; |
1864 | struct buffer_head * bh, * tmp; | 1842 | struct buffer_head * bh, * tmp; |
1865 | struct ocfs2_dir_entry * de; | 1843 | struct ocfs2_dir_entry * de; |
1866 | struct super_block * sb = inode->i_sb; | 1844 | struct super_block * sb = inode->i_sb; |
1867 | unsigned int ra_sectors = 16; | 1845 | unsigned int ra_sectors = 16; |
1846 | int stored = 0; | ||
1868 | 1847 | ||
1869 | stored = 0; | ||
1870 | bh = NULL; | 1848 | bh = NULL; |
1871 | 1849 | ||
1872 | offset = (*f_pos) & (sb->s_blocksize - 1); | 1850 | offset = ctx->pos & (sb->s_blocksize - 1); |
1873 | 1851 | ||
1874 | while (!error && !stored && *f_pos < i_size_read(inode)) { | 1852 | while (ctx->pos < i_size_read(inode)) { |
1875 | blk = (*f_pos) >> sb->s_blocksize_bits; | 1853 | blk = ctx->pos >> sb->s_blocksize_bits; |
1876 | if (ocfs2_read_dir_block(inode, blk, &bh, 0)) { | 1854 | if (ocfs2_read_dir_block(inode, blk, &bh, 0)) { |
1877 | /* Skip the corrupt dirblock and keep trying */ | 1855 | /* Skip the corrupt dirblock and keep trying */ |
1878 | *f_pos += sb->s_blocksize - offset; | 1856 | ctx->pos += sb->s_blocksize - offset; |
1879 | continue; | 1857 | continue; |
1880 | } | 1858 | } |
1881 | 1859 | ||
@@ -1897,7 +1875,6 @@ static int ocfs2_dir_foreach_blk_el(struct inode *inode, | |||
1897 | ra_sectors = 8; | 1875 | ra_sectors = 8; |
1898 | } | 1876 | } |
1899 | 1877 | ||
1900 | revalidate: | ||
1901 | /* If the dir block has changed since the last call to | 1878 | /* If the dir block has changed since the last call to |
1902 | * readdir(2), then we might be pointing to an invalid | 1879 | * readdir(2), then we might be pointing to an invalid |
1903 | * dirent right now. Scan from the start of the block | 1880 | * dirent right now. Scan from the start of the block |
@@ -1917,93 +1894,64 @@ revalidate: | |||
1917 | i += le16_to_cpu(de->rec_len); | 1894 | i += le16_to_cpu(de->rec_len); |
1918 | } | 1895 | } |
1919 | offset = i; | 1896 | offset = i; |
1920 | *f_pos = ((*f_pos) & ~(sb->s_blocksize - 1)) | 1897 | ctx->pos = (ctx->pos & ~(sb->s_blocksize - 1)) |
1921 | | offset; | 1898 | | offset; |
1922 | *f_version = inode->i_version; | 1899 | *f_version = inode->i_version; |
1923 | } | 1900 | } |
1924 | 1901 | ||
1925 | while (!error && *f_pos < i_size_read(inode) | 1902 | while (ctx->pos < i_size_read(inode) |
1926 | && offset < sb->s_blocksize) { | 1903 | && offset < sb->s_blocksize) { |
1927 | de = (struct ocfs2_dir_entry *) (bh->b_data + offset); | 1904 | de = (struct ocfs2_dir_entry *) (bh->b_data + offset); |
1928 | if (!ocfs2_check_dir_entry(inode, de, bh, offset)) { | 1905 | if (!ocfs2_check_dir_entry(inode, de, bh, offset)) { |
1929 | /* On error, skip the f_pos to the | 1906 | /* On error, skip the f_pos to the |
1930 | next block. */ | 1907 | next block. */ |
1931 | *f_pos = ((*f_pos) | (sb->s_blocksize - 1)) + 1; | 1908 | ctx->pos = (ctx->pos | (sb->s_blocksize - 1)) + 1; |
1932 | brelse(bh); | 1909 | brelse(bh); |
1933 | goto out; | 1910 | continue; |
1934 | } | 1911 | } |
1935 | offset += le16_to_cpu(de->rec_len); | ||
1936 | if (le64_to_cpu(de->inode)) { | 1912 | if (le64_to_cpu(de->inode)) { |
1937 | /* We might block in the next section | ||
1938 | * if the data destination is | ||
1939 | * currently swapped out. So, use a | ||
1940 | * version stamp to detect whether or | ||
1941 | * not the directory has been modified | ||
1942 | * during the copy operation. | ||
1943 | */ | ||
1944 | unsigned long version = *f_version; | ||
1945 | unsigned char d_type = DT_UNKNOWN; | 1913 | unsigned char d_type = DT_UNKNOWN; |
1946 | 1914 | ||
1947 | if (de->file_type < OCFS2_FT_MAX) | 1915 | if (de->file_type < OCFS2_FT_MAX) |
1948 | d_type = ocfs2_filetype_table[de->file_type]; | 1916 | d_type = ocfs2_filetype_table[de->file_type]; |
1949 | error = filldir(priv, de->name, | 1917 | if (!dir_emit(ctx, de->name, |
1950 | de->name_len, | 1918 | de->name_len, |
1951 | *f_pos, | ||
1952 | le64_to_cpu(de->inode), | 1919 | le64_to_cpu(de->inode), |
1953 | d_type); | 1920 | d_type)) { |
1954 | if (error) { | 1921 | brelse(bh); |
1955 | if (filldir_err) | 1922 | return 0; |
1956 | *filldir_err = error; | ||
1957 | break; | ||
1958 | } | 1923 | } |
1959 | if (version != *f_version) | 1924 | stored++; |
1960 | goto revalidate; | ||
1961 | stored ++; | ||
1962 | } | 1925 | } |
1963 | *f_pos += le16_to_cpu(de->rec_len); | 1926 | offset += le16_to_cpu(de->rec_len); |
1927 | ctx->pos += le16_to_cpu(de->rec_len); | ||
1964 | } | 1928 | } |
1965 | offset = 0; | 1929 | offset = 0; |
1966 | brelse(bh); | 1930 | brelse(bh); |
1967 | bh = NULL; | 1931 | bh = NULL; |
1932 | if (!persist && stored) | ||
1933 | break; | ||
1968 | } | 1934 | } |
1969 | 1935 | return 0; | |
1970 | stored = 0; | ||
1971 | out: | ||
1972 | return stored; | ||
1973 | } | 1936 | } |
1974 | 1937 | ||
1975 | static int ocfs2_dir_foreach_blk(struct inode *inode, u64 *f_version, | 1938 | static int ocfs2_dir_foreach_blk(struct inode *inode, u64 *f_version, |
1976 | loff_t *f_pos, void *priv, filldir_t filldir, | 1939 | struct dir_context *ctx, |
1977 | int *filldir_err) | 1940 | bool persist) |
1978 | { | 1941 | { |
1979 | if (OCFS2_I(inode)->ip_dyn_features & OCFS2_INLINE_DATA_FL) | 1942 | if (OCFS2_I(inode)->ip_dyn_features & OCFS2_INLINE_DATA_FL) |
1980 | return ocfs2_dir_foreach_blk_id(inode, f_version, f_pos, priv, | 1943 | return ocfs2_dir_foreach_blk_id(inode, f_version, ctx); |
1981 | filldir, filldir_err); | 1944 | return ocfs2_dir_foreach_blk_el(inode, f_version, ctx, persist); |
1982 | |||
1983 | return ocfs2_dir_foreach_blk_el(inode, f_version, f_pos, priv, filldir, | ||
1984 | filldir_err); | ||
1985 | } | 1945 | } |
1986 | 1946 | ||
1987 | /* | 1947 | /* |
1988 | * This is intended to be called from inside other kernel functions, | 1948 | * This is intended to be called from inside other kernel functions, |
1989 | * so we fake some arguments. | 1949 | * so we fake some arguments. |
1990 | */ | 1950 | */ |
1991 | int ocfs2_dir_foreach(struct inode *inode, loff_t *f_pos, void *priv, | 1951 | int ocfs2_dir_foreach(struct inode *inode, struct dir_context *ctx) |
1992 | filldir_t filldir) | ||
1993 | { | 1952 | { |
1994 | int ret = 0, filldir_err = 0; | ||
1995 | u64 version = inode->i_version; | 1953 | u64 version = inode->i_version; |
1996 | 1954 | ocfs2_dir_foreach_blk(inode, &version, ctx, true); | |
1997 | while (*f_pos < i_size_read(inode)) { | ||
1998 | ret = ocfs2_dir_foreach_blk(inode, &version, f_pos, priv, | ||
1999 | filldir, &filldir_err); | ||
2000 | if (ret || filldir_err) | ||
2001 | break; | ||
2002 | } | ||
2003 | |||
2004 | if (ret > 0) | ||
2005 | ret = -EIO; | ||
2006 | |||
2007 | return 0; | 1955 | return 0; |
2008 | } | 1956 | } |
2009 | 1957 | ||
@@ -2011,15 +1959,15 @@ int ocfs2_dir_foreach(struct inode *inode, loff_t *f_pos, void *priv, | |||
2011 | * ocfs2_readdir() | 1959 | * ocfs2_readdir() |
2012 | * | 1960 | * |
2013 | */ | 1961 | */ |
2014 | int ocfs2_readdir(struct file * filp, void * dirent, filldir_t filldir) | 1962 | int ocfs2_readdir(struct file *file, struct dir_context *ctx) |
2015 | { | 1963 | { |
2016 | int error = 0; | 1964 | int error = 0; |
2017 | struct inode *inode = file_inode(filp); | 1965 | struct inode *inode = file_inode(file); |
2018 | int lock_level = 0; | 1966 | int lock_level = 0; |
2019 | 1967 | ||
2020 | trace_ocfs2_readdir((unsigned long long)OCFS2_I(inode)->ip_blkno); | 1968 | trace_ocfs2_readdir((unsigned long long)OCFS2_I(inode)->ip_blkno); |
2021 | 1969 | ||
2022 | error = ocfs2_inode_lock_atime(inode, filp->f_path.mnt, &lock_level); | 1970 | error = ocfs2_inode_lock_atime(inode, file->f_path.mnt, &lock_level); |
2023 | if (lock_level && error >= 0) { | 1971 | if (lock_level && error >= 0) { |
2024 | /* We release EX lock which used to update atime | 1972 | /* We release EX lock which used to update atime |
2025 | * and get PR lock again to reduce contention | 1973 | * and get PR lock again to reduce contention |
@@ -2035,8 +1983,7 @@ int ocfs2_readdir(struct file * filp, void * dirent, filldir_t filldir) | |||
2035 | goto bail_nolock; | 1983 | goto bail_nolock; |
2036 | } | 1984 | } |
2037 | 1985 | ||
2038 | error = ocfs2_dir_foreach_blk(inode, &filp->f_version, &filp->f_pos, | 1986 | error = ocfs2_dir_foreach_blk(inode, &file->f_version, ctx, false); |
2039 | dirent, filldir, NULL); | ||
2040 | 1987 | ||
2041 | ocfs2_inode_unlock(inode, lock_level); | 1988 | ocfs2_inode_unlock(inode, lock_level); |
2042 | if (error) | 1989 | if (error) |
@@ -2120,6 +2067,7 @@ bail: | |||
2120 | } | 2067 | } |
2121 | 2068 | ||
2122 | struct ocfs2_empty_dir_priv { | 2069 | struct ocfs2_empty_dir_priv { |
2070 | struct dir_context ctx; | ||
2123 | unsigned seen_dot; | 2071 | unsigned seen_dot; |
2124 | unsigned seen_dot_dot; | 2072 | unsigned seen_dot_dot; |
2125 | unsigned seen_other; | 2073 | unsigned seen_other; |
@@ -2204,10 +2152,9 @@ out: | |||
2204 | int ocfs2_empty_dir(struct inode *inode) | 2152 | int ocfs2_empty_dir(struct inode *inode) |
2205 | { | 2153 | { |
2206 | int ret; | 2154 | int ret; |
2207 | loff_t start = 0; | 2155 | struct ocfs2_empty_dir_priv priv = { |
2208 | struct ocfs2_empty_dir_priv priv; | 2156 | .ctx.actor = ocfs2_empty_dir_filldir, |
2209 | 2157 | }; | |
2210 | memset(&priv, 0, sizeof(priv)); | ||
2211 | 2158 | ||
2212 | if (ocfs2_dir_indexed(inode)) { | 2159 | if (ocfs2_dir_indexed(inode)) { |
2213 | ret = ocfs2_empty_dir_dx(inode, &priv); | 2160 | ret = ocfs2_empty_dir_dx(inode, &priv); |
@@ -2219,7 +2166,7 @@ int ocfs2_empty_dir(struct inode *inode) | |||
2219 | */ | 2166 | */ |
2220 | } | 2167 | } |
2221 | 2168 | ||
2222 | ret = ocfs2_dir_foreach(inode, &start, &priv, ocfs2_empty_dir_filldir); | 2169 | ret = ocfs2_dir_foreach(inode, &priv.ctx); |
2223 | if (ret) | 2170 | if (ret) |
2224 | mlog_errno(ret); | 2171 | mlog_errno(ret); |
2225 | 2172 | ||
diff --git a/fs/ocfs2/dir.h b/fs/ocfs2/dir.h index e683f3deb645..f0344b75b14d 100644 --- a/fs/ocfs2/dir.h +++ b/fs/ocfs2/dir.h | |||
@@ -92,9 +92,8 @@ int ocfs2_find_files_on_disk(const char *name, | |||
92 | struct ocfs2_dir_lookup_result *res); | 92 | struct ocfs2_dir_lookup_result *res); |
93 | int ocfs2_lookup_ino_from_name(struct inode *dir, const char *name, | 93 | int ocfs2_lookup_ino_from_name(struct inode *dir, const char *name, |
94 | int namelen, u64 *blkno); | 94 | int namelen, u64 *blkno); |
95 | int ocfs2_readdir(struct file *filp, void *dirent, filldir_t filldir); | 95 | int ocfs2_readdir(struct file *file, struct dir_context *ctx); |
96 | int ocfs2_dir_foreach(struct inode *inode, loff_t *f_pos, void *priv, | 96 | int ocfs2_dir_foreach(struct inode *inode, struct dir_context *ctx); |
97 | filldir_t filldir); | ||
98 | int ocfs2_prepare_dir_for_insert(struct ocfs2_super *osb, | 97 | int ocfs2_prepare_dir_for_insert(struct ocfs2_super *osb, |
99 | struct inode *dir, | 98 | struct inode *dir, |
100 | struct buffer_head *parent_fe_bh, | 99 | struct buffer_head *parent_fe_bh, |
diff --git a/fs/ocfs2/dlm/dlmlock.c b/fs/ocfs2/dlm/dlmlock.c index 975810b98492..47e67c2d228f 100644 --- a/fs/ocfs2/dlm/dlmlock.c +++ b/fs/ocfs2/dlm/dlmlock.c | |||
@@ -178,6 +178,7 @@ static enum dlm_status dlmlock_master(struct dlm_ctxt *dlm, | |||
178 | lock->ml.node); | 178 | lock->ml.node); |
179 | } | 179 | } |
180 | } else { | 180 | } else { |
181 | status = DLM_NORMAL; | ||
181 | dlm_lock_get(lock); | 182 | dlm_lock_get(lock); |
182 | list_add_tail(&lock->list, &res->blocked); | 183 | list_add_tail(&lock->list, &res->blocked); |
183 | kick_thread = 1; | 184 | kick_thread = 1; |
diff --git a/fs/ocfs2/dlm/dlmrecovery.c b/fs/ocfs2/dlm/dlmrecovery.c index e68588e6b1e8..773bd32bfd8c 100644 --- a/fs/ocfs2/dlm/dlmrecovery.c +++ b/fs/ocfs2/dlm/dlmrecovery.c | |||
@@ -55,9 +55,6 @@ | |||
55 | static void dlm_do_local_recovery_cleanup(struct dlm_ctxt *dlm, u8 dead_node); | 55 | static void dlm_do_local_recovery_cleanup(struct dlm_ctxt *dlm, u8 dead_node); |
56 | 56 | ||
57 | static int dlm_recovery_thread(void *data); | 57 | static int dlm_recovery_thread(void *data); |
58 | void dlm_complete_recovery_thread(struct dlm_ctxt *dlm); | ||
59 | int dlm_launch_recovery_thread(struct dlm_ctxt *dlm); | ||
60 | void dlm_kick_recovery_thread(struct dlm_ctxt *dlm); | ||
61 | static int dlm_do_recovery(struct dlm_ctxt *dlm); | 58 | static int dlm_do_recovery(struct dlm_ctxt *dlm); |
62 | 59 | ||
63 | static int dlm_pick_recovery_master(struct dlm_ctxt *dlm); | 60 | static int dlm_pick_recovery_master(struct dlm_ctxt *dlm); |
@@ -789,7 +786,7 @@ static int dlm_request_all_locks(struct dlm_ctxt *dlm, u8 request_from, | |||
789 | u8 dead_node) | 786 | u8 dead_node) |
790 | { | 787 | { |
791 | struct dlm_lock_request lr; | 788 | struct dlm_lock_request lr; |
792 | enum dlm_status ret; | 789 | int ret; |
793 | 790 | ||
794 | mlog(0, "\n"); | 791 | mlog(0, "\n"); |
795 | 792 | ||
@@ -802,7 +799,6 @@ static int dlm_request_all_locks(struct dlm_ctxt *dlm, u8 request_from, | |||
802 | lr.dead_node = dead_node; | 799 | lr.dead_node = dead_node; |
803 | 800 | ||
804 | // send message | 801 | // send message |
805 | ret = DLM_NOLOCKMGR; | ||
806 | ret = o2net_send_message(DLM_LOCK_REQUEST_MSG, dlm->key, | 802 | ret = o2net_send_message(DLM_LOCK_REQUEST_MSG, dlm->key, |
807 | &lr, sizeof(lr), request_from, NULL); | 803 | &lr, sizeof(lr), request_from, NULL); |
808 | 804 | ||
@@ -2696,6 +2692,7 @@ int dlm_begin_reco_handler(struct o2net_msg *msg, u32 len, void *data, | |||
2696 | dlm->name, br->node_idx, br->dead_node, | 2692 | dlm->name, br->node_idx, br->dead_node, |
2697 | dlm->reco.dead_node, dlm->reco.new_master); | 2693 | dlm->reco.dead_node, dlm->reco.new_master); |
2698 | spin_unlock(&dlm->spinlock); | 2694 | spin_unlock(&dlm->spinlock); |
2695 | dlm_put(dlm); | ||
2699 | return -EAGAIN; | 2696 | return -EAGAIN; |
2700 | } | 2697 | } |
2701 | spin_unlock(&dlm->spinlock); | 2698 | spin_unlock(&dlm->spinlock); |
diff --git a/fs/ocfs2/file.c b/fs/ocfs2/file.c index ff54014a24ec..3261d71319ee 100644 --- a/fs/ocfs2/file.c +++ b/fs/ocfs2/file.c | |||
@@ -370,7 +370,7 @@ static int ocfs2_cow_file_pos(struct inode *inode, | |||
370 | if (!(ext_flags & OCFS2_EXT_REFCOUNTED)) | 370 | if (!(ext_flags & OCFS2_EXT_REFCOUNTED)) |
371 | goto out; | 371 | goto out; |
372 | 372 | ||
373 | return ocfs2_refcount_cow(inode, NULL, fe_bh, cpos, 1, cpos+1); | 373 | return ocfs2_refcount_cow(inode, fe_bh, cpos, 1, cpos+1); |
374 | 374 | ||
375 | out: | 375 | out: |
376 | return status; | 376 | return status; |
@@ -899,7 +899,7 @@ static int ocfs2_zero_extend_get_range(struct inode *inode, | |||
899 | zero_clusters = last_cpos - zero_cpos; | 899 | zero_clusters = last_cpos - zero_cpos; |
900 | 900 | ||
901 | if (needs_cow) { | 901 | if (needs_cow) { |
902 | rc = ocfs2_refcount_cow(inode, NULL, di_bh, zero_cpos, | 902 | rc = ocfs2_refcount_cow(inode, di_bh, zero_cpos, |
903 | zero_clusters, UINT_MAX); | 903 | zero_clusters, UINT_MAX); |
904 | if (rc) { | 904 | if (rc) { |
905 | mlog_errno(rc); | 905 | mlog_errno(rc); |
@@ -2078,7 +2078,7 @@ static int ocfs2_prepare_inode_for_refcount(struct inode *inode, | |||
2078 | 2078 | ||
2079 | *meta_level = 1; | 2079 | *meta_level = 1; |
2080 | 2080 | ||
2081 | ret = ocfs2_refcount_cow(inode, file, di_bh, cpos, clusters, UINT_MAX); | 2081 | ret = ocfs2_refcount_cow(inode, di_bh, cpos, clusters, UINT_MAX); |
2082 | if (ret) | 2082 | if (ret) |
2083 | mlog_errno(ret); | 2083 | mlog_errno(ret); |
2084 | out: | 2084 | out: |
@@ -2646,17 +2646,7 @@ static loff_t ocfs2_file_llseek(struct file *file, loff_t offset, int whence) | |||
2646 | goto out; | 2646 | goto out; |
2647 | } | 2647 | } |
2648 | 2648 | ||
2649 | if (offset < 0 && !(file->f_mode & FMODE_UNSIGNED_OFFSET)) | 2649 | offset = vfs_setpos(file, offset, inode->i_sb->s_maxbytes); |
2650 | ret = -EINVAL; | ||
2651 | if (!ret && offset > inode->i_sb->s_maxbytes) | ||
2652 | ret = -EINVAL; | ||
2653 | if (ret) | ||
2654 | goto out; | ||
2655 | |||
2656 | if (offset != file->f_pos) { | ||
2657 | file->f_pos = offset; | ||
2658 | file->f_version = 0; | ||
2659 | } | ||
2660 | 2650 | ||
2661 | out: | 2651 | out: |
2662 | mutex_unlock(&inode->i_mutex); | 2652 | mutex_unlock(&inode->i_mutex); |
@@ -2712,7 +2702,7 @@ const struct file_operations ocfs2_fops = { | |||
2712 | const struct file_operations ocfs2_dops = { | 2702 | const struct file_operations ocfs2_dops = { |
2713 | .llseek = generic_file_llseek, | 2703 | .llseek = generic_file_llseek, |
2714 | .read = generic_read_dir, | 2704 | .read = generic_read_dir, |
2715 | .readdir = ocfs2_readdir, | 2705 | .iterate = ocfs2_readdir, |
2716 | .fsync = ocfs2_sync_file, | 2706 | .fsync = ocfs2_sync_file, |
2717 | .release = ocfs2_dir_release, | 2707 | .release = ocfs2_dir_release, |
2718 | .open = ocfs2_dir_open, | 2708 | .open = ocfs2_dir_open, |
@@ -2759,7 +2749,7 @@ const struct file_operations ocfs2_fops_no_plocks = { | |||
2759 | const struct file_operations ocfs2_dops_no_plocks = { | 2749 | const struct file_operations ocfs2_dops_no_plocks = { |
2760 | .llseek = generic_file_llseek, | 2750 | .llseek = generic_file_llseek, |
2761 | .read = generic_read_dir, | 2751 | .read = generic_read_dir, |
2762 | .readdir = ocfs2_readdir, | 2752 | .iterate = ocfs2_readdir, |
2763 | .fsync = ocfs2_sync_file, | 2753 | .fsync = ocfs2_sync_file, |
2764 | .release = ocfs2_dir_release, | 2754 | .release = ocfs2_dir_release, |
2765 | .open = ocfs2_dir_open, | 2755 | .open = ocfs2_dir_open, |
diff --git a/fs/ocfs2/journal.c b/fs/ocfs2/journal.c index 8eccfabcd12e..242170d83971 100644 --- a/fs/ocfs2/journal.c +++ b/fs/ocfs2/journal.c | |||
@@ -1941,6 +1941,7 @@ void ocfs2_orphan_scan_start(struct ocfs2_super *osb) | |||
1941 | } | 1941 | } |
1942 | 1942 | ||
1943 | struct ocfs2_orphan_filldir_priv { | 1943 | struct ocfs2_orphan_filldir_priv { |
1944 | struct dir_context ctx; | ||
1944 | struct inode *head; | 1945 | struct inode *head; |
1945 | struct ocfs2_super *osb; | 1946 | struct ocfs2_super *osb; |
1946 | }; | 1947 | }; |
@@ -1977,11 +1978,11 @@ static int ocfs2_queue_orphans(struct ocfs2_super *osb, | |||
1977 | { | 1978 | { |
1978 | int status; | 1979 | int status; |
1979 | struct inode *orphan_dir_inode = NULL; | 1980 | struct inode *orphan_dir_inode = NULL; |
1980 | struct ocfs2_orphan_filldir_priv priv; | 1981 | struct ocfs2_orphan_filldir_priv priv = { |
1981 | loff_t pos = 0; | 1982 | .ctx.actor = ocfs2_orphan_filldir, |
1982 | 1983 | .osb = osb, | |
1983 | priv.osb = osb; | 1984 | .head = *head |
1984 | priv.head = *head; | 1985 | }; |
1985 | 1986 | ||
1986 | orphan_dir_inode = ocfs2_get_system_file_inode(osb, | 1987 | orphan_dir_inode = ocfs2_get_system_file_inode(osb, |
1987 | ORPHAN_DIR_SYSTEM_INODE, | 1988 | ORPHAN_DIR_SYSTEM_INODE, |
@@ -1999,8 +2000,7 @@ static int ocfs2_queue_orphans(struct ocfs2_super *osb, | |||
1999 | goto out; | 2000 | goto out; |
2000 | } | 2001 | } |
2001 | 2002 | ||
2002 | status = ocfs2_dir_foreach(orphan_dir_inode, &pos, &priv, | 2003 | status = ocfs2_dir_foreach(orphan_dir_inode, &priv.ctx); |
2003 | ocfs2_orphan_filldir); | ||
2004 | if (status) { | 2004 | if (status) { |
2005 | mlog_errno(status); | 2005 | mlog_errno(status); |
2006 | goto out_cluster; | 2006 | goto out_cluster; |
diff --git a/fs/ocfs2/journal.h b/fs/ocfs2/journal.h index a3385b63ff5e..0a992737dcaf 100644 --- a/fs/ocfs2/journal.h +++ b/fs/ocfs2/journal.h | |||
@@ -200,7 +200,6 @@ void ocfs2_complete_quota_recovery(struct ocfs2_super *osb); | |||
200 | 200 | ||
201 | static inline void ocfs2_start_checkpoint(struct ocfs2_super *osb) | 201 | static inline void ocfs2_start_checkpoint(struct ocfs2_super *osb) |
202 | { | 202 | { |
203 | atomic_set(&osb->needs_checkpoint, 1); | ||
204 | wake_up(&osb->checkpoint_event); | 203 | wake_up(&osb->checkpoint_event); |
205 | } | 204 | } |
206 | 205 | ||
diff --git a/fs/ocfs2/move_extents.c b/fs/ocfs2/move_extents.c index f1fc172175b6..452068b45749 100644 --- a/fs/ocfs2/move_extents.c +++ b/fs/ocfs2/move_extents.c | |||
@@ -69,7 +69,7 @@ static int __ocfs2_move_extent(handle_t *handle, | |||
69 | u64 ino = ocfs2_metadata_cache_owner(context->et.et_ci); | 69 | u64 ino = ocfs2_metadata_cache_owner(context->et.et_ci); |
70 | u64 old_blkno = ocfs2_clusters_to_blocks(inode->i_sb, p_cpos); | 70 | u64 old_blkno = ocfs2_clusters_to_blocks(inode->i_sb, p_cpos); |
71 | 71 | ||
72 | ret = ocfs2_duplicate_clusters_by_page(handle, context->file, cpos, | 72 | ret = ocfs2_duplicate_clusters_by_page(handle, inode, cpos, |
73 | p_cpos, new_p_cpos, len); | 73 | p_cpos, new_p_cpos, len); |
74 | if (ret) { | 74 | if (ret) { |
75 | mlog_errno(ret); | 75 | mlog_errno(ret); |
diff --git a/fs/ocfs2/namei.c b/fs/ocfs2/namei.c index b4a5cdf9dbc5..be3f8676a438 100644 --- a/fs/ocfs2/namei.c +++ b/fs/ocfs2/namei.c | |||
@@ -522,7 +522,7 @@ static int __ocfs2_mknod_locked(struct inode *dir, | |||
522 | 522 | ||
523 | fe->i_last_eb_blk = 0; | 523 | fe->i_last_eb_blk = 0; |
524 | strcpy(fe->i_signature, OCFS2_INODE_SIGNATURE); | 524 | strcpy(fe->i_signature, OCFS2_INODE_SIGNATURE); |
525 | le32_add_cpu(&fe->i_flags, OCFS2_VALID_FL); | 525 | fe->i_flags |= cpu_to_le32(OCFS2_VALID_FL); |
526 | fe->i_atime = fe->i_ctime = fe->i_mtime = | 526 | fe->i_atime = fe->i_ctime = fe->i_mtime = |
527 | cpu_to_le64(CURRENT_TIME.tv_sec); | 527 | cpu_to_le64(CURRENT_TIME.tv_sec); |
528 | fe->i_mtime_nsec = fe->i_ctime_nsec = fe->i_atime_nsec = | 528 | fe->i_mtime_nsec = fe->i_ctime_nsec = fe->i_atime_nsec = |
@@ -773,7 +773,7 @@ static int ocfs2_remote_dentry_delete(struct dentry *dentry) | |||
773 | return ret; | 773 | return ret; |
774 | } | 774 | } |
775 | 775 | ||
776 | static inline int inode_is_unlinkable(struct inode *inode) | 776 | static inline int ocfs2_inode_is_unlinkable(struct inode *inode) |
777 | { | 777 | { |
778 | if (S_ISDIR(inode->i_mode)) { | 778 | if (S_ISDIR(inode->i_mode)) { |
779 | if (inode->i_nlink == 2) | 779 | if (inode->i_nlink == 2) |
@@ -791,6 +791,7 @@ static int ocfs2_unlink(struct inode *dir, | |||
791 | { | 791 | { |
792 | int status; | 792 | int status; |
793 | int child_locked = 0; | 793 | int child_locked = 0; |
794 | bool is_unlinkable = false; | ||
794 | struct inode *inode = dentry->d_inode; | 795 | struct inode *inode = dentry->d_inode; |
795 | struct inode *orphan_dir = NULL; | 796 | struct inode *orphan_dir = NULL; |
796 | struct ocfs2_super *osb = OCFS2_SB(dir->i_sb); | 797 | struct ocfs2_super *osb = OCFS2_SB(dir->i_sb); |
@@ -865,7 +866,7 @@ static int ocfs2_unlink(struct inode *dir, | |||
865 | goto leave; | 866 | goto leave; |
866 | } | 867 | } |
867 | 868 | ||
868 | if (inode_is_unlinkable(inode)) { | 869 | if (ocfs2_inode_is_unlinkable(inode)) { |
869 | status = ocfs2_prepare_orphan_dir(osb, &orphan_dir, | 870 | status = ocfs2_prepare_orphan_dir(osb, &orphan_dir, |
870 | OCFS2_I(inode)->ip_blkno, | 871 | OCFS2_I(inode)->ip_blkno, |
871 | orphan_name, &orphan_insert); | 872 | orphan_name, &orphan_insert); |
@@ -873,6 +874,7 @@ static int ocfs2_unlink(struct inode *dir, | |||
873 | mlog_errno(status); | 874 | mlog_errno(status); |
874 | goto leave; | 875 | goto leave; |
875 | } | 876 | } |
877 | is_unlinkable = true; | ||
876 | } | 878 | } |
877 | 879 | ||
878 | handle = ocfs2_start_trans(osb, ocfs2_unlink_credits(osb->sb)); | 880 | handle = ocfs2_start_trans(osb, ocfs2_unlink_credits(osb->sb)); |
@@ -892,15 +894,6 @@ static int ocfs2_unlink(struct inode *dir, | |||
892 | 894 | ||
893 | fe = (struct ocfs2_dinode *) fe_bh->b_data; | 895 | fe = (struct ocfs2_dinode *) fe_bh->b_data; |
894 | 896 | ||
895 | if (inode_is_unlinkable(inode)) { | ||
896 | status = ocfs2_orphan_add(osb, handle, inode, fe_bh, orphan_name, | ||
897 | &orphan_insert, orphan_dir); | ||
898 | if (status < 0) { | ||
899 | mlog_errno(status); | ||
900 | goto leave; | ||
901 | } | ||
902 | } | ||
903 | |||
904 | /* delete the name from the parent dir */ | 897 | /* delete the name from the parent dir */ |
905 | status = ocfs2_delete_entry(handle, dir, &lookup); | 898 | status = ocfs2_delete_entry(handle, dir, &lookup); |
906 | if (status < 0) { | 899 | if (status < 0) { |
@@ -923,6 +916,14 @@ static int ocfs2_unlink(struct inode *dir, | |||
923 | mlog_errno(status); | 916 | mlog_errno(status); |
924 | if (S_ISDIR(inode->i_mode)) | 917 | if (S_ISDIR(inode->i_mode)) |
925 | inc_nlink(dir); | 918 | inc_nlink(dir); |
919 | goto leave; | ||
920 | } | ||
921 | |||
922 | if (is_unlinkable) { | ||
923 | status = ocfs2_orphan_add(osb, handle, inode, fe_bh, | ||
924 | orphan_name, &orphan_insert, orphan_dir); | ||
925 | if (status < 0) | ||
926 | mlog_errno(status); | ||
926 | } | 927 | } |
927 | 928 | ||
928 | leave: | 929 | leave: |
@@ -2012,6 +2013,21 @@ static int ocfs2_orphan_add(struct ocfs2_super *osb, | |||
2012 | goto leave; | 2013 | goto leave; |
2013 | } | 2014 | } |
2014 | 2015 | ||
2016 | /* | ||
2017 | * We're going to journal the change of i_flags and i_orphaned_slot. | ||
2018 | * It's safe anyway, though some callers may duplicate the journaling. | ||
2019 | * Journaling within the func just make the logic look more | ||
2020 | * straightforward. | ||
2021 | */ | ||
2022 | status = ocfs2_journal_access_di(handle, | ||
2023 | INODE_CACHE(inode), | ||
2024 | fe_bh, | ||
2025 | OCFS2_JOURNAL_ACCESS_WRITE); | ||
2026 | if (status < 0) { | ||
2027 | mlog_errno(status); | ||
2028 | goto leave; | ||
2029 | } | ||
2030 | |||
2015 | /* we're a cluster, and nlink can change on disk from | 2031 | /* we're a cluster, and nlink can change on disk from |
2016 | * underneath us... */ | 2032 | * underneath us... */ |
2017 | orphan_fe = (struct ocfs2_dinode *) orphan_dir_bh->b_data; | 2033 | orphan_fe = (struct ocfs2_dinode *) orphan_dir_bh->b_data; |
@@ -2026,25 +2042,10 @@ static int ocfs2_orphan_add(struct ocfs2_super *osb, | |||
2026 | orphan_dir_bh, lookup); | 2042 | orphan_dir_bh, lookup); |
2027 | if (status < 0) { | 2043 | if (status < 0) { |
2028 | mlog_errno(status); | 2044 | mlog_errno(status); |
2029 | goto leave; | 2045 | goto rollback; |
2030 | } | ||
2031 | |||
2032 | /* | ||
2033 | * We're going to journal the change of i_flags and i_orphaned_slot. | ||
2034 | * It's safe anyway, though some callers may duplicate the journaling. | ||
2035 | * Journaling within the func just make the logic look more | ||
2036 | * straightforward. | ||
2037 | */ | ||
2038 | status = ocfs2_journal_access_di(handle, | ||
2039 | INODE_CACHE(inode), | ||
2040 | fe_bh, | ||
2041 | OCFS2_JOURNAL_ACCESS_WRITE); | ||
2042 | if (status < 0) { | ||
2043 | mlog_errno(status); | ||
2044 | goto leave; | ||
2045 | } | 2046 | } |
2046 | 2047 | ||
2047 | le32_add_cpu(&fe->i_flags, OCFS2_ORPHANED_FL); | 2048 | fe->i_flags |= cpu_to_le32(OCFS2_ORPHANED_FL); |
2048 | OCFS2_I(inode)->ip_flags &= ~OCFS2_INODE_SKIP_ORPHAN_DIR; | 2049 | OCFS2_I(inode)->ip_flags &= ~OCFS2_INODE_SKIP_ORPHAN_DIR; |
2049 | 2050 | ||
2050 | /* Record which orphan dir our inode now resides | 2051 | /* Record which orphan dir our inode now resides |
@@ -2057,11 +2058,16 @@ static int ocfs2_orphan_add(struct ocfs2_super *osb, | |||
2057 | trace_ocfs2_orphan_add_end((unsigned long long)OCFS2_I(inode)->ip_blkno, | 2058 | trace_ocfs2_orphan_add_end((unsigned long long)OCFS2_I(inode)->ip_blkno, |
2058 | osb->slot_num); | 2059 | osb->slot_num); |
2059 | 2060 | ||
2061 | rollback: | ||
2062 | if (status < 0) { | ||
2063 | if (S_ISDIR(inode->i_mode)) | ||
2064 | ocfs2_add_links_count(orphan_fe, -1); | ||
2065 | set_nlink(orphan_dir_inode, ocfs2_read_links_count(orphan_fe)); | ||
2066 | } | ||
2067 | |||
2060 | leave: | 2068 | leave: |
2061 | brelse(orphan_dir_bh); | 2069 | brelse(orphan_dir_bh); |
2062 | 2070 | ||
2063 | if (status) | ||
2064 | mlog_errno(status); | ||
2065 | return status; | 2071 | return status; |
2066 | } | 2072 | } |
2067 | 2073 | ||
@@ -2434,7 +2440,7 @@ int ocfs2_mv_orphaned_inode_to_new(struct inode *dir, | |||
2434 | } | 2440 | } |
2435 | 2441 | ||
2436 | di = (struct ocfs2_dinode *)di_bh->b_data; | 2442 | di = (struct ocfs2_dinode *)di_bh->b_data; |
2437 | le32_add_cpu(&di->i_flags, -OCFS2_ORPHANED_FL); | 2443 | di->i_flags &= ~cpu_to_le32(OCFS2_ORPHANED_FL); |
2438 | di->i_orphaned_slot = 0; | 2444 | di->i_orphaned_slot = 0; |
2439 | set_nlink(inode, 1); | 2445 | set_nlink(inode, 1); |
2440 | ocfs2_set_links_count(di, inode->i_nlink); | 2446 | ocfs2_set_links_count(di, inode->i_nlink); |
diff --git a/fs/ocfs2/ocfs2.h b/fs/ocfs2/ocfs2.h index d355e6e36b36..3a903470c794 100644 --- a/fs/ocfs2/ocfs2.h +++ b/fs/ocfs2/ocfs2.h | |||
@@ -347,7 +347,6 @@ struct ocfs2_super | |||
347 | struct task_struct *recovery_thread_task; | 347 | struct task_struct *recovery_thread_task; |
348 | int disable_recovery; | 348 | int disable_recovery; |
349 | wait_queue_head_t checkpoint_event; | 349 | wait_queue_head_t checkpoint_event; |
350 | atomic_t needs_checkpoint; | ||
351 | struct ocfs2_journal *journal; | 350 | struct ocfs2_journal *journal; |
352 | unsigned long osb_commit_interval; | 351 | unsigned long osb_commit_interval; |
353 | 352 | ||
diff --git a/fs/ocfs2/refcounttree.c b/fs/ocfs2/refcounttree.c index 998b17eda09d..a70d604593b6 100644 --- a/fs/ocfs2/refcounttree.c +++ b/fs/ocfs2/refcounttree.c | |||
@@ -49,7 +49,6 @@ | |||
49 | 49 | ||
50 | struct ocfs2_cow_context { | 50 | struct ocfs2_cow_context { |
51 | struct inode *inode; | 51 | struct inode *inode; |
52 | struct file *file; | ||
53 | u32 cow_start; | 52 | u32 cow_start; |
54 | u32 cow_len; | 53 | u32 cow_len; |
55 | struct ocfs2_extent_tree data_et; | 54 | struct ocfs2_extent_tree data_et; |
@@ -66,7 +65,7 @@ struct ocfs2_cow_context { | |||
66 | u32 *num_clusters, | 65 | u32 *num_clusters, |
67 | unsigned int *extent_flags); | 66 | unsigned int *extent_flags); |
68 | int (*cow_duplicate_clusters)(handle_t *handle, | 67 | int (*cow_duplicate_clusters)(handle_t *handle, |
69 | struct file *file, | 68 | struct inode *inode, |
70 | u32 cpos, u32 old_cluster, | 69 | u32 cpos, u32 old_cluster, |
71 | u32 new_cluster, u32 new_len); | 70 | u32 new_cluster, u32 new_len); |
72 | }; | 71 | }; |
@@ -2922,14 +2921,12 @@ static int ocfs2_clear_cow_buffer(handle_t *handle, struct buffer_head *bh) | |||
2922 | } | 2921 | } |
2923 | 2922 | ||
2924 | int ocfs2_duplicate_clusters_by_page(handle_t *handle, | 2923 | int ocfs2_duplicate_clusters_by_page(handle_t *handle, |
2925 | struct file *file, | 2924 | struct inode *inode, |
2926 | u32 cpos, u32 old_cluster, | 2925 | u32 cpos, u32 old_cluster, |
2927 | u32 new_cluster, u32 new_len) | 2926 | u32 new_cluster, u32 new_len) |
2928 | { | 2927 | { |
2929 | int ret = 0, partial; | 2928 | int ret = 0, partial; |
2930 | struct inode *inode = file_inode(file); | 2929 | struct super_block *sb = inode->i_sb; |
2931 | struct ocfs2_caching_info *ci = INODE_CACHE(inode); | ||
2932 | struct super_block *sb = ocfs2_metadata_cache_get_super(ci); | ||
2933 | u64 new_block = ocfs2_clusters_to_blocks(sb, new_cluster); | 2930 | u64 new_block = ocfs2_clusters_to_blocks(sb, new_cluster); |
2934 | struct page *page; | 2931 | struct page *page; |
2935 | pgoff_t page_index; | 2932 | pgoff_t page_index; |
@@ -2965,6 +2962,11 @@ int ocfs2_duplicate_clusters_by_page(handle_t *handle, | |||
2965 | to = map_end & (PAGE_CACHE_SIZE - 1); | 2962 | to = map_end & (PAGE_CACHE_SIZE - 1); |
2966 | 2963 | ||
2967 | page = find_or_create_page(mapping, page_index, GFP_NOFS); | 2964 | page = find_or_create_page(mapping, page_index, GFP_NOFS); |
2965 | if (!page) { | ||
2966 | ret = -ENOMEM; | ||
2967 | mlog_errno(ret); | ||
2968 | break; | ||
2969 | } | ||
2968 | 2970 | ||
2969 | /* | 2971 | /* |
2970 | * In case PAGE_CACHE_SIZE <= CLUSTER_SIZE, This page | 2972 | * In case PAGE_CACHE_SIZE <= CLUSTER_SIZE, This page |
@@ -2973,13 +2975,6 @@ int ocfs2_duplicate_clusters_by_page(handle_t *handle, | |||
2973 | if (PAGE_CACHE_SIZE <= OCFS2_SB(sb)->s_clustersize) | 2975 | if (PAGE_CACHE_SIZE <= OCFS2_SB(sb)->s_clustersize) |
2974 | BUG_ON(PageDirty(page)); | 2976 | BUG_ON(PageDirty(page)); |
2975 | 2977 | ||
2976 | if (PageReadahead(page)) { | ||
2977 | page_cache_async_readahead(mapping, | ||
2978 | &file->f_ra, file, | ||
2979 | page, page_index, | ||
2980 | readahead_pages); | ||
2981 | } | ||
2982 | |||
2983 | if (!PageUptodate(page)) { | 2978 | if (!PageUptodate(page)) { |
2984 | ret = block_read_full_page(page, ocfs2_get_block); | 2979 | ret = block_read_full_page(page, ocfs2_get_block); |
2985 | if (ret) { | 2980 | if (ret) { |
@@ -2999,7 +2994,8 @@ int ocfs2_duplicate_clusters_by_page(handle_t *handle, | |||
2999 | } | 2994 | } |
3000 | } | 2995 | } |
3001 | 2996 | ||
3002 | ocfs2_map_and_dirty_page(inode, handle, from, to, | 2997 | ocfs2_map_and_dirty_page(inode, |
2998 | handle, from, to, | ||
3003 | page, 0, &new_block); | 2999 | page, 0, &new_block); |
3004 | mark_page_accessed(page); | 3000 | mark_page_accessed(page); |
3005 | unlock: | 3001 | unlock: |
@@ -3015,12 +3011,11 @@ unlock: | |||
3015 | } | 3011 | } |
3016 | 3012 | ||
3017 | int ocfs2_duplicate_clusters_by_jbd(handle_t *handle, | 3013 | int ocfs2_duplicate_clusters_by_jbd(handle_t *handle, |
3018 | struct file *file, | 3014 | struct inode *inode, |
3019 | u32 cpos, u32 old_cluster, | 3015 | u32 cpos, u32 old_cluster, |
3020 | u32 new_cluster, u32 new_len) | 3016 | u32 new_cluster, u32 new_len) |
3021 | { | 3017 | { |
3022 | int ret = 0; | 3018 | int ret = 0; |
3023 | struct inode *inode = file_inode(file); | ||
3024 | struct super_block *sb = inode->i_sb; | 3019 | struct super_block *sb = inode->i_sb; |
3025 | struct ocfs2_caching_info *ci = INODE_CACHE(inode); | 3020 | struct ocfs2_caching_info *ci = INODE_CACHE(inode); |
3026 | int i, blocks = ocfs2_clusters_to_blocks(sb, new_len); | 3021 | int i, blocks = ocfs2_clusters_to_blocks(sb, new_len); |
@@ -3145,7 +3140,7 @@ static int ocfs2_replace_clusters(handle_t *handle, | |||
3145 | 3140 | ||
3146 | /*If the old clusters is unwritten, no need to duplicate. */ | 3141 | /*If the old clusters is unwritten, no need to duplicate. */ |
3147 | if (!(ext_flags & OCFS2_EXT_UNWRITTEN)) { | 3142 | if (!(ext_flags & OCFS2_EXT_UNWRITTEN)) { |
3148 | ret = context->cow_duplicate_clusters(handle, context->file, | 3143 | ret = context->cow_duplicate_clusters(handle, context->inode, |
3149 | cpos, old, new, len); | 3144 | cpos, old, new, len); |
3150 | if (ret) { | 3145 | if (ret) { |
3151 | mlog_errno(ret); | 3146 | mlog_errno(ret); |
@@ -3423,35 +3418,12 @@ static int ocfs2_replace_cow(struct ocfs2_cow_context *context) | |||
3423 | return ret; | 3418 | return ret; |
3424 | } | 3419 | } |
3425 | 3420 | ||
3426 | static void ocfs2_readahead_for_cow(struct inode *inode, | ||
3427 | struct file *file, | ||
3428 | u32 start, u32 len) | ||
3429 | { | ||
3430 | struct address_space *mapping; | ||
3431 | pgoff_t index; | ||
3432 | unsigned long num_pages; | ||
3433 | int cs_bits = OCFS2_SB(inode->i_sb)->s_clustersize_bits; | ||
3434 | |||
3435 | if (!file) | ||
3436 | return; | ||
3437 | |||
3438 | mapping = file->f_mapping; | ||
3439 | num_pages = (len << cs_bits) >> PAGE_CACHE_SHIFT; | ||
3440 | if (!num_pages) | ||
3441 | num_pages = 1; | ||
3442 | |||
3443 | index = ((loff_t)start << cs_bits) >> PAGE_CACHE_SHIFT; | ||
3444 | page_cache_sync_readahead(mapping, &file->f_ra, file, | ||
3445 | index, num_pages); | ||
3446 | } | ||
3447 | |||
3448 | /* | 3421 | /* |
3449 | * Starting at cpos, try to CoW write_len clusters. Don't CoW | 3422 | * Starting at cpos, try to CoW write_len clusters. Don't CoW |
3450 | * past max_cpos. This will stop when it runs into a hole or an | 3423 | * past max_cpos. This will stop when it runs into a hole or an |
3451 | * unrefcounted extent. | 3424 | * unrefcounted extent. |
3452 | */ | 3425 | */ |
3453 | static int ocfs2_refcount_cow_hunk(struct inode *inode, | 3426 | static int ocfs2_refcount_cow_hunk(struct inode *inode, |
3454 | struct file *file, | ||
3455 | struct buffer_head *di_bh, | 3427 | struct buffer_head *di_bh, |
3456 | u32 cpos, u32 write_len, u32 max_cpos) | 3428 | u32 cpos, u32 write_len, u32 max_cpos) |
3457 | { | 3429 | { |
@@ -3480,8 +3452,6 @@ static int ocfs2_refcount_cow_hunk(struct inode *inode, | |||
3480 | 3452 | ||
3481 | BUG_ON(cow_len == 0); | 3453 | BUG_ON(cow_len == 0); |
3482 | 3454 | ||
3483 | ocfs2_readahead_for_cow(inode, file, cow_start, cow_len); | ||
3484 | |||
3485 | context = kzalloc(sizeof(struct ocfs2_cow_context), GFP_NOFS); | 3455 | context = kzalloc(sizeof(struct ocfs2_cow_context), GFP_NOFS); |
3486 | if (!context) { | 3456 | if (!context) { |
3487 | ret = -ENOMEM; | 3457 | ret = -ENOMEM; |
@@ -3503,7 +3473,6 @@ static int ocfs2_refcount_cow_hunk(struct inode *inode, | |||
3503 | context->ref_root_bh = ref_root_bh; | 3473 | context->ref_root_bh = ref_root_bh; |
3504 | context->cow_duplicate_clusters = ocfs2_duplicate_clusters_by_page; | 3474 | context->cow_duplicate_clusters = ocfs2_duplicate_clusters_by_page; |
3505 | context->get_clusters = ocfs2_di_get_clusters; | 3475 | context->get_clusters = ocfs2_di_get_clusters; |
3506 | context->file = file; | ||
3507 | 3476 | ||
3508 | ocfs2_init_dinode_extent_tree(&context->data_et, | 3477 | ocfs2_init_dinode_extent_tree(&context->data_et, |
3509 | INODE_CACHE(inode), di_bh); | 3478 | INODE_CACHE(inode), di_bh); |
@@ -3532,7 +3501,6 @@ out: | |||
3532 | * clusters between cpos and cpos+write_len are safe to modify. | 3501 | * clusters between cpos and cpos+write_len are safe to modify. |
3533 | */ | 3502 | */ |
3534 | int ocfs2_refcount_cow(struct inode *inode, | 3503 | int ocfs2_refcount_cow(struct inode *inode, |
3535 | struct file *file, | ||
3536 | struct buffer_head *di_bh, | 3504 | struct buffer_head *di_bh, |
3537 | u32 cpos, u32 write_len, u32 max_cpos) | 3505 | u32 cpos, u32 write_len, u32 max_cpos) |
3538 | { | 3506 | { |
@@ -3552,7 +3520,7 @@ int ocfs2_refcount_cow(struct inode *inode, | |||
3552 | num_clusters = write_len; | 3520 | num_clusters = write_len; |
3553 | 3521 | ||
3554 | if (ext_flags & OCFS2_EXT_REFCOUNTED) { | 3522 | if (ext_flags & OCFS2_EXT_REFCOUNTED) { |
3555 | ret = ocfs2_refcount_cow_hunk(inode, file, di_bh, cpos, | 3523 | ret = ocfs2_refcount_cow_hunk(inode, di_bh, cpos, |
3556 | num_clusters, max_cpos); | 3524 | num_clusters, max_cpos); |
3557 | if (ret) { | 3525 | if (ret) { |
3558 | mlog_errno(ret); | 3526 | mlog_errno(ret); |
diff --git a/fs/ocfs2/refcounttree.h b/fs/ocfs2/refcounttree.h index 7754608c83a4..6422bbcdb525 100644 --- a/fs/ocfs2/refcounttree.h +++ b/fs/ocfs2/refcounttree.h | |||
@@ -53,7 +53,7 @@ int ocfs2_prepare_refcount_change_for_del(struct inode *inode, | |||
53 | int *credits, | 53 | int *credits, |
54 | int *ref_blocks); | 54 | int *ref_blocks); |
55 | int ocfs2_refcount_cow(struct inode *inode, | 55 | int ocfs2_refcount_cow(struct inode *inode, |
56 | struct file *filep, struct buffer_head *di_bh, | 56 | struct buffer_head *di_bh, |
57 | u32 cpos, u32 write_len, u32 max_cpos); | 57 | u32 cpos, u32 write_len, u32 max_cpos); |
58 | 58 | ||
59 | typedef int (ocfs2_post_refcount_func)(struct inode *inode, | 59 | typedef int (ocfs2_post_refcount_func)(struct inode *inode, |
@@ -85,11 +85,11 @@ int ocfs2_refcount_cow_xattr(struct inode *inode, | |||
85 | u32 cpos, u32 write_len, | 85 | u32 cpos, u32 write_len, |
86 | struct ocfs2_post_refcount *post); | 86 | struct ocfs2_post_refcount *post); |
87 | int ocfs2_duplicate_clusters_by_page(handle_t *handle, | 87 | int ocfs2_duplicate_clusters_by_page(handle_t *handle, |
88 | struct file *file, | 88 | struct inode *inode, |
89 | u32 cpos, u32 old_cluster, | 89 | u32 cpos, u32 old_cluster, |
90 | u32 new_cluster, u32 new_len); | 90 | u32 new_cluster, u32 new_len); |
91 | int ocfs2_duplicate_clusters_by_jbd(handle_t *handle, | 91 | int ocfs2_duplicate_clusters_by_jbd(handle_t *handle, |
92 | struct file *file, | 92 | struct inode *inode, |
93 | u32 cpos, u32 old_cluster, | 93 | u32 cpos, u32 old_cluster, |
94 | u32 new_cluster, u32 new_len); | 94 | u32 new_cluster, u32 new_len); |
95 | int ocfs2_cow_sync_writeback(struct super_block *sb, | 95 | int ocfs2_cow_sync_writeback(struct super_block *sb, |
diff --git a/fs/ocfs2/suballoc.c b/fs/ocfs2/suballoc.c index b7e74b580c0f..5397c07ce608 100644 --- a/fs/ocfs2/suballoc.c +++ b/fs/ocfs2/suballoc.c | |||
@@ -1422,7 +1422,7 @@ static int ocfs2_relink_block_group(handle_t *handle, | |||
1422 | int status; | 1422 | int status; |
1423 | /* there is a really tiny chance the journal calls could fail, | 1423 | /* there is a really tiny chance the journal calls could fail, |
1424 | * but we wouldn't want inconsistent blocks in *any* case. */ | 1424 | * but we wouldn't want inconsistent blocks in *any* case. */ |
1425 | u64 fe_ptr, bg_ptr, prev_bg_ptr; | 1425 | u64 bg_ptr, prev_bg_ptr; |
1426 | struct ocfs2_dinode *fe = (struct ocfs2_dinode *) fe_bh->b_data; | 1426 | struct ocfs2_dinode *fe = (struct ocfs2_dinode *) fe_bh->b_data; |
1427 | struct ocfs2_group_desc *bg = (struct ocfs2_group_desc *) bg_bh->b_data; | 1427 | struct ocfs2_group_desc *bg = (struct ocfs2_group_desc *) bg_bh->b_data; |
1428 | struct ocfs2_group_desc *prev_bg = (struct ocfs2_group_desc *) prev_bg_bh->b_data; | 1428 | struct ocfs2_group_desc *prev_bg = (struct ocfs2_group_desc *) prev_bg_bh->b_data; |
@@ -1437,51 +1437,44 @@ static int ocfs2_relink_block_group(handle_t *handle, | |||
1437 | (unsigned long long)le64_to_cpu(bg->bg_blkno), | 1437 | (unsigned long long)le64_to_cpu(bg->bg_blkno), |
1438 | (unsigned long long)le64_to_cpu(prev_bg->bg_blkno)); | 1438 | (unsigned long long)le64_to_cpu(prev_bg->bg_blkno)); |
1439 | 1439 | ||
1440 | fe_ptr = le64_to_cpu(fe->id2.i_chain.cl_recs[chain].c_blkno); | ||
1441 | bg_ptr = le64_to_cpu(bg->bg_next_group); | 1440 | bg_ptr = le64_to_cpu(bg->bg_next_group); |
1442 | prev_bg_ptr = le64_to_cpu(prev_bg->bg_next_group); | 1441 | prev_bg_ptr = le64_to_cpu(prev_bg->bg_next_group); |
1443 | 1442 | ||
1444 | status = ocfs2_journal_access_gd(handle, INODE_CACHE(alloc_inode), | 1443 | status = ocfs2_journal_access_gd(handle, INODE_CACHE(alloc_inode), |
1445 | prev_bg_bh, | 1444 | prev_bg_bh, |
1446 | OCFS2_JOURNAL_ACCESS_WRITE); | 1445 | OCFS2_JOURNAL_ACCESS_WRITE); |
1447 | if (status < 0) { | 1446 | if (status < 0) |
1448 | mlog_errno(status); | 1447 | goto out; |
1449 | goto out_rollback; | ||
1450 | } | ||
1451 | 1448 | ||
1452 | prev_bg->bg_next_group = bg->bg_next_group; | 1449 | prev_bg->bg_next_group = bg->bg_next_group; |
1453 | ocfs2_journal_dirty(handle, prev_bg_bh); | 1450 | ocfs2_journal_dirty(handle, prev_bg_bh); |
1454 | 1451 | ||
1455 | status = ocfs2_journal_access_gd(handle, INODE_CACHE(alloc_inode), | 1452 | status = ocfs2_journal_access_gd(handle, INODE_CACHE(alloc_inode), |
1456 | bg_bh, OCFS2_JOURNAL_ACCESS_WRITE); | 1453 | bg_bh, OCFS2_JOURNAL_ACCESS_WRITE); |
1457 | if (status < 0) { | 1454 | if (status < 0) |
1458 | mlog_errno(status); | 1455 | goto out_rollback_prev_bg; |
1459 | goto out_rollback; | ||
1460 | } | ||
1461 | 1456 | ||
1462 | bg->bg_next_group = fe->id2.i_chain.cl_recs[chain].c_blkno; | 1457 | bg->bg_next_group = fe->id2.i_chain.cl_recs[chain].c_blkno; |
1463 | ocfs2_journal_dirty(handle, bg_bh); | 1458 | ocfs2_journal_dirty(handle, bg_bh); |
1464 | 1459 | ||
1465 | status = ocfs2_journal_access_di(handle, INODE_CACHE(alloc_inode), | 1460 | status = ocfs2_journal_access_di(handle, INODE_CACHE(alloc_inode), |
1466 | fe_bh, OCFS2_JOURNAL_ACCESS_WRITE); | 1461 | fe_bh, OCFS2_JOURNAL_ACCESS_WRITE); |
1467 | if (status < 0) { | 1462 | if (status < 0) |
1468 | mlog_errno(status); | 1463 | goto out_rollback_bg; |
1469 | goto out_rollback; | ||
1470 | } | ||
1471 | 1464 | ||
1472 | fe->id2.i_chain.cl_recs[chain].c_blkno = bg->bg_blkno; | 1465 | fe->id2.i_chain.cl_recs[chain].c_blkno = bg->bg_blkno; |
1473 | ocfs2_journal_dirty(handle, fe_bh); | 1466 | ocfs2_journal_dirty(handle, fe_bh); |
1474 | 1467 | ||
1475 | out_rollback: | 1468 | out: |
1476 | if (status < 0) { | 1469 | if (status < 0) |
1477 | fe->id2.i_chain.cl_recs[chain].c_blkno = cpu_to_le64(fe_ptr); | ||
1478 | bg->bg_next_group = cpu_to_le64(bg_ptr); | ||
1479 | prev_bg->bg_next_group = cpu_to_le64(prev_bg_ptr); | ||
1480 | } | ||
1481 | |||
1482 | if (status) | ||
1483 | mlog_errno(status); | 1470 | mlog_errno(status); |
1484 | return status; | 1471 | return status; |
1472 | |||
1473 | out_rollback_bg: | ||
1474 | bg->bg_next_group = cpu_to_le64(bg_ptr); | ||
1475 | out_rollback_prev_bg: | ||
1476 | prev_bg->bg_next_group = cpu_to_le64(prev_bg_ptr); | ||
1477 | goto out; | ||
1485 | } | 1478 | } |
1486 | 1479 | ||
1487 | static inline int ocfs2_block_group_reasonably_empty(struct ocfs2_group_desc *bg, | 1480 | static inline int ocfs2_block_group_reasonably_empty(struct ocfs2_group_desc *bg, |
diff --git a/fs/ocfs2/super.c b/fs/ocfs2/super.c index 01b85165552b..854d80955bf8 100644 --- a/fs/ocfs2/super.c +++ b/fs/ocfs2/super.c | |||
@@ -286,10 +286,9 @@ static int ocfs2_osb_dump(struct ocfs2_super *osb, char *buf, int len) | |||
286 | spin_unlock(&osb->osb_lock); | 286 | spin_unlock(&osb->osb_lock); |
287 | 287 | ||
288 | out += snprintf(buf + out, len - out, | 288 | out += snprintf(buf + out, len - out, |
289 | "%10s => Pid: %d Interval: %lu Needs: %d\n", "Commit", | 289 | "%10s => Pid: %d Interval: %lu\n", "Commit", |
290 | (osb->commit_task ? task_pid_nr(osb->commit_task) : -1), | 290 | (osb->commit_task ? task_pid_nr(osb->commit_task) : -1), |
291 | osb->osb_commit_interval, | 291 | osb->osb_commit_interval); |
292 | atomic_read(&osb->needs_checkpoint)); | ||
293 | 292 | ||
294 | out += snprintf(buf + out, len - out, | 293 | out += snprintf(buf + out, len - out, |
295 | "%10s => State: %d TxnId: %lu NumTxns: %d\n", | 294 | "%10s => State: %d TxnId: %lu NumTxns: %d\n", |
@@ -2154,7 +2153,6 @@ static int ocfs2_initialize_super(struct super_block *sb, | |||
2154 | } | 2153 | } |
2155 | 2154 | ||
2156 | init_waitqueue_head(&osb->checkpoint_event); | 2155 | init_waitqueue_head(&osb->checkpoint_event); |
2157 | atomic_set(&osb->needs_checkpoint, 0); | ||
2158 | 2156 | ||
2159 | osb->s_atime_quantum = OCFS2_DEFAULT_ATIME_QUANTUM; | 2157 | osb->s_atime_quantum = OCFS2_DEFAULT_ATIME_QUANTUM; |
2160 | 2158 | ||
diff --git a/fs/ocfs2/xattr.c b/fs/ocfs2/xattr.c index 2e3ea308c144..317ef0abccbb 100644 --- a/fs/ocfs2/xattr.c +++ b/fs/ocfs2/xattr.c | |||
@@ -2751,7 +2751,6 @@ static int ocfs2_xattr_ibody_set(struct inode *inode, | |||
2751 | { | 2751 | { |
2752 | int ret; | 2752 | int ret; |
2753 | struct ocfs2_inode_info *oi = OCFS2_I(inode); | 2753 | struct ocfs2_inode_info *oi = OCFS2_I(inode); |
2754 | struct ocfs2_dinode *di = (struct ocfs2_dinode *)xs->inode_bh->b_data; | ||
2755 | struct ocfs2_xa_loc loc; | 2754 | struct ocfs2_xa_loc loc; |
2756 | 2755 | ||
2757 | if (inode->i_sb->s_blocksize == OCFS2_MIN_BLOCKSIZE) | 2756 | if (inode->i_sb->s_blocksize == OCFS2_MIN_BLOCKSIZE) |
@@ -2759,13 +2758,6 @@ static int ocfs2_xattr_ibody_set(struct inode *inode, | |||
2759 | 2758 | ||
2760 | down_write(&oi->ip_alloc_sem); | 2759 | down_write(&oi->ip_alloc_sem); |
2761 | if (!(oi->ip_dyn_features & OCFS2_INLINE_XATTR_FL)) { | 2760 | if (!(oi->ip_dyn_features & OCFS2_INLINE_XATTR_FL)) { |
2762 | if (!ocfs2_xattr_has_space_inline(inode, di)) { | ||
2763 | ret = -ENOSPC; | ||
2764 | goto out; | ||
2765 | } | ||
2766 | } | ||
2767 | |||
2768 | if (!(oi->ip_dyn_features & OCFS2_INLINE_XATTR_FL)) { | ||
2769 | ret = ocfs2_xattr_ibody_init(inode, xs->inode_bh, ctxt); | 2761 | ret = ocfs2_xattr_ibody_init(inode, xs->inode_bh, ctxt); |
2770 | if (ret) { | 2762 | if (ret) { |
2771 | if (ret != -ENOSPC) | 2763 | if (ret != -ENOSPC) |
@@ -6499,6 +6491,16 @@ static int ocfs2_reflink_xattr_inline(struct ocfs2_xattr_reflink *args) | |||
6499 | } | 6491 | } |
6500 | 6492 | ||
6501 | new_oi = OCFS2_I(args->new_inode); | 6493 | new_oi = OCFS2_I(args->new_inode); |
6494 | /* | ||
6495 | * Adjust extent record count to reserve space for extended attribute. | ||
6496 | * Inline data count had been adjusted in ocfs2_duplicate_inline_data(). | ||
6497 | */ | ||
6498 | if (!(new_oi->ip_dyn_features & OCFS2_INLINE_DATA_FL) && | ||
6499 | !(ocfs2_inode_is_fast_symlink(args->new_inode))) { | ||
6500 | struct ocfs2_extent_list *el = &new_di->id2.i_list; | ||
6501 | le16_add_cpu(&el->l_count, -(inline_size / | ||
6502 | sizeof(struct ocfs2_extent_rec))); | ||
6503 | } | ||
6502 | spin_lock(&new_oi->ip_lock); | 6504 | spin_lock(&new_oi->ip_lock); |
6503 | new_oi->ip_dyn_features |= OCFS2_HAS_XATTR_FL | OCFS2_INLINE_XATTR_FL; | 6505 | new_oi->ip_dyn_features |= OCFS2_HAS_XATTR_FL | OCFS2_INLINE_XATTR_FL; |
6504 | new_di->i_dyn_features = cpu_to_le16(new_oi->ip_dyn_features); | 6506 | new_di->i_dyn_features = cpu_to_le16(new_oi->ip_dyn_features); |
diff --git a/fs/omfs/dir.c b/fs/omfs/dir.c index acbaebcad3a8..1b8e9e8405b2 100644 --- a/fs/omfs/dir.c +++ b/fs/omfs/dir.c | |||
@@ -327,26 +327,23 @@ int omfs_is_bad(struct omfs_sb_info *sbi, struct omfs_header *header, | |||
327 | return is_bad; | 327 | return is_bad; |
328 | } | 328 | } |
329 | 329 | ||
330 | static int omfs_fill_chain(struct file *filp, void *dirent, filldir_t filldir, | 330 | static bool omfs_fill_chain(struct inode *dir, struct dir_context *ctx, |
331 | u64 fsblock, int hindex) | 331 | u64 fsblock, int hindex) |
332 | { | 332 | { |
333 | struct inode *dir = file_inode(filp); | ||
334 | struct buffer_head *bh; | ||
335 | struct omfs_inode *oi; | ||
336 | u64 self; | ||
337 | int res = 0; | ||
338 | unsigned char d_type; | ||
339 | |||
340 | /* follow chain in this bucket */ | 333 | /* follow chain in this bucket */ |
341 | while (fsblock != ~0) { | 334 | while (fsblock != ~0) { |
342 | bh = omfs_bread(dir->i_sb, fsblock); | 335 | struct buffer_head *bh = omfs_bread(dir->i_sb, fsblock); |
336 | struct omfs_inode *oi; | ||
337 | u64 self; | ||
338 | unsigned char d_type; | ||
339 | |||
343 | if (!bh) | 340 | if (!bh) |
344 | goto out; | 341 | return true; |
345 | 342 | ||
346 | oi = (struct omfs_inode *) bh->b_data; | 343 | oi = (struct omfs_inode *) bh->b_data; |
347 | if (omfs_is_bad(OMFS_SB(dir->i_sb), &oi->i_head, fsblock)) { | 344 | if (omfs_is_bad(OMFS_SB(dir->i_sb), &oi->i_head, fsblock)) { |
348 | brelse(bh); | 345 | brelse(bh); |
349 | goto out; | 346 | return true; |
350 | } | 347 | } |
351 | 348 | ||
352 | self = fsblock; | 349 | self = fsblock; |
@@ -361,15 +358,16 @@ static int omfs_fill_chain(struct file *filp, void *dirent, filldir_t filldir, | |||
361 | 358 | ||
362 | d_type = (oi->i_type == OMFS_DIR) ? DT_DIR : DT_REG; | 359 | d_type = (oi->i_type == OMFS_DIR) ? DT_DIR : DT_REG; |
363 | 360 | ||
364 | res = filldir(dirent, oi->i_name, strnlen(oi->i_name, | 361 | if (!dir_emit(ctx, oi->i_name, |
365 | OMFS_NAMELEN), filp->f_pos, self, d_type); | 362 | strnlen(oi->i_name, OMFS_NAMELEN), |
363 | self, d_type)) { | ||
364 | brelse(bh); | ||
365 | return false; | ||
366 | } | ||
366 | brelse(bh); | 367 | brelse(bh); |
367 | if (res < 0) | 368 | ctx->pos++; |
368 | break; | ||
369 | filp->f_pos++; | ||
370 | } | 369 | } |
371 | out: | 370 | return true; |
372 | return res; | ||
373 | } | 371 | } |
374 | 372 | ||
375 | static int omfs_rename(struct inode *old_dir, struct dentry *old_dentry, | 373 | static int omfs_rename(struct inode *old_dir, struct dentry *old_dentry, |
@@ -403,60 +401,44 @@ out: | |||
403 | return err; | 401 | return err; |
404 | } | 402 | } |
405 | 403 | ||
406 | static int omfs_readdir(struct file *filp, void *dirent, filldir_t filldir) | 404 | static int omfs_readdir(struct file *file, struct dir_context *ctx) |
407 | { | 405 | { |
408 | struct inode *dir = file_inode(filp); | 406 | struct inode *dir = file_inode(file); |
409 | struct buffer_head *bh; | 407 | struct buffer_head *bh; |
410 | loff_t offset, res; | 408 | __be64 *p; |
411 | unsigned int hchain, hindex; | 409 | unsigned int hchain, hindex; |
412 | int nbuckets; | 410 | int nbuckets; |
413 | u64 fsblock; | 411 | |
414 | int ret = -EINVAL; | 412 | if (ctx->pos >> 32) |
415 | 413 | return -EINVAL; | |
416 | if (filp->f_pos >> 32) | 414 | |
417 | goto success; | 415 | if (ctx->pos < 1 << 20) { |
418 | 416 | if (!dir_emit_dots(file, ctx)) | |
419 | switch ((unsigned long) filp->f_pos) { | 417 | return 0; |
420 | case 0: | 418 | ctx->pos = 1 << 20; |
421 | if (filldir(dirent, ".", 1, 0, dir->i_ino, DT_DIR) < 0) | ||
422 | goto success; | ||
423 | filp->f_pos++; | ||
424 | /* fall through */ | ||
425 | case 1: | ||
426 | if (filldir(dirent, "..", 2, 1, | ||
427 | parent_ino(filp->f_dentry), DT_DIR) < 0) | ||
428 | goto success; | ||
429 | filp->f_pos = 1 << 20; | ||
430 | /* fall through */ | ||
431 | } | 419 | } |
432 | 420 | ||
433 | nbuckets = (dir->i_size - OMFS_DIR_START) / 8; | 421 | nbuckets = (dir->i_size - OMFS_DIR_START) / 8; |
434 | 422 | ||
435 | /* high 12 bits store bucket + 1 and low 20 bits store hash index */ | 423 | /* high 12 bits store bucket + 1 and low 20 bits store hash index */ |
436 | hchain = (filp->f_pos >> 20) - 1; | 424 | hchain = (ctx->pos >> 20) - 1; |
437 | hindex = filp->f_pos & 0xfffff; | 425 | hindex = ctx->pos & 0xfffff; |
438 | 426 | ||
439 | bh = omfs_bread(dir->i_sb, dir->i_ino); | 427 | bh = omfs_bread(dir->i_sb, dir->i_ino); |
440 | if (!bh) | 428 | if (!bh) |
441 | goto out; | 429 | return -EINVAL; |
442 | 430 | ||
443 | offset = OMFS_DIR_START + hchain * 8; | 431 | p = (__be64 *)(bh->b_data + OMFS_DIR_START) + hchain; |
444 | 432 | ||
445 | for (; hchain < nbuckets; hchain++, offset += 8) { | 433 | for (; hchain < nbuckets; hchain++) { |
446 | fsblock = be64_to_cpu(*((__be64 *) &bh->b_data[offset])); | 434 | __u64 fsblock = be64_to_cpu(*p++); |
447 | 435 | if (!omfs_fill_chain(dir, ctx, fsblock, hindex)) | |
448 | res = omfs_fill_chain(filp, dirent, filldir, fsblock, hindex); | ||
449 | hindex = 0; | ||
450 | if (res < 0) | ||
451 | break; | 436 | break; |
452 | 437 | hindex = 0; | |
453 | filp->f_pos = (hchain+2) << 20; | 438 | ctx->pos = (hchain+2) << 20; |
454 | } | 439 | } |
455 | brelse(bh); | 440 | brelse(bh); |
456 | success: | 441 | return 0; |
457 | ret = 0; | ||
458 | out: | ||
459 | return ret; | ||
460 | } | 442 | } |
461 | 443 | ||
462 | const struct inode_operations omfs_dir_inops = { | 444 | const struct inode_operations omfs_dir_inops = { |
@@ -470,6 +452,6 @@ const struct inode_operations omfs_dir_inops = { | |||
470 | 452 | ||
471 | const struct file_operations omfs_dir_operations = { | 453 | const struct file_operations omfs_dir_operations = { |
472 | .read = generic_read_dir, | 454 | .read = generic_read_dir, |
473 | .readdir = omfs_readdir, | 455 | .iterate = omfs_readdir, |
474 | .llseek = generic_file_llseek, | 456 | .llseek = generic_file_llseek, |
475 | }; | 457 | }; |
@@ -823,7 +823,7 @@ static inline int build_open_flags(int flags, umode_t mode, struct open_flags *o | |||
823 | int lookup_flags = 0; | 823 | int lookup_flags = 0; |
824 | int acc_mode; | 824 | int acc_mode; |
825 | 825 | ||
826 | if (flags & O_CREAT) | 826 | if (flags & (O_CREAT | __O_TMPFILE)) |
827 | op->mode = (mode & S_IALLUGO) | S_IFREG; | 827 | op->mode = (mode & S_IALLUGO) | S_IFREG; |
828 | else | 828 | else |
829 | op->mode = 0; | 829 | op->mode = 0; |
@@ -840,11 +840,17 @@ static inline int build_open_flags(int flags, umode_t mode, struct open_flags *o | |||
840 | if (flags & __O_SYNC) | 840 | if (flags & __O_SYNC) |
841 | flags |= O_DSYNC; | 841 | flags |= O_DSYNC; |
842 | 842 | ||
843 | /* | 843 | if (flags & __O_TMPFILE) { |
844 | * If we have O_PATH in the open flag. Then we | 844 | if ((flags & O_TMPFILE_MASK) != O_TMPFILE) |
845 | * cannot have anything other than the below set of flags | 845 | return -EINVAL; |
846 | */ | 846 | acc_mode = MAY_OPEN | ACC_MODE(flags); |
847 | if (flags & O_PATH) { | 847 | if (!(acc_mode & MAY_WRITE)) |
848 | return -EINVAL; | ||
849 | } else if (flags & O_PATH) { | ||
850 | /* | ||
851 | * If we have O_PATH in the open flag. Then we | ||
852 | * cannot have anything other than the below set of flags | ||
853 | */ | ||
848 | flags &= O_DIRECTORY | O_NOFOLLOW | O_PATH; | 854 | flags &= O_DIRECTORY | O_NOFOLLOW | O_PATH; |
849 | acc_mode = 0; | 855 | acc_mode = 0; |
850 | } else { | 856 | } else { |
@@ -876,7 +882,8 @@ static inline int build_open_flags(int flags, umode_t mode, struct open_flags *o | |||
876 | lookup_flags |= LOOKUP_DIRECTORY; | 882 | lookup_flags |= LOOKUP_DIRECTORY; |
877 | if (!(flags & O_NOFOLLOW)) | 883 | if (!(flags & O_NOFOLLOW)) |
878 | lookup_flags |= LOOKUP_FOLLOW; | 884 | lookup_flags |= LOOKUP_FOLLOW; |
879 | return lookup_flags; | 885 | op->lookup_flags = lookup_flags; |
886 | return 0; | ||
880 | } | 887 | } |
881 | 888 | ||
882 | /** | 889 | /** |
@@ -893,8 +900,8 @@ static inline int build_open_flags(int flags, umode_t mode, struct open_flags *o | |||
893 | struct file *file_open_name(struct filename *name, int flags, umode_t mode) | 900 | struct file *file_open_name(struct filename *name, int flags, umode_t mode) |
894 | { | 901 | { |
895 | struct open_flags op; | 902 | struct open_flags op; |
896 | int lookup = build_open_flags(flags, mode, &op); | 903 | int err = build_open_flags(flags, mode, &op); |
897 | return do_filp_open(AT_FDCWD, name, &op, lookup); | 904 | return err ? ERR_PTR(err) : do_filp_open(AT_FDCWD, name, &op); |
898 | } | 905 | } |
899 | 906 | ||
900 | /** | 907 | /** |
@@ -919,37 +926,43 @@ struct file *file_open_root(struct dentry *dentry, struct vfsmount *mnt, | |||
919 | const char *filename, int flags) | 926 | const char *filename, int flags) |
920 | { | 927 | { |
921 | struct open_flags op; | 928 | struct open_flags op; |
922 | int lookup = build_open_flags(flags, 0, &op); | 929 | int err = build_open_flags(flags, 0, &op); |
930 | if (err) | ||
931 | return ERR_PTR(err); | ||
923 | if (flags & O_CREAT) | 932 | if (flags & O_CREAT) |
924 | return ERR_PTR(-EINVAL); | 933 | return ERR_PTR(-EINVAL); |
925 | if (!filename && (flags & O_DIRECTORY)) | 934 | if (!filename && (flags & O_DIRECTORY)) |
926 | if (!dentry->d_inode->i_op->lookup) | 935 | if (!dentry->d_inode->i_op->lookup) |
927 | return ERR_PTR(-ENOTDIR); | 936 | return ERR_PTR(-ENOTDIR); |
928 | return do_file_open_root(dentry, mnt, filename, &op, lookup); | 937 | return do_file_open_root(dentry, mnt, filename, &op); |
929 | } | 938 | } |
930 | EXPORT_SYMBOL(file_open_root); | 939 | EXPORT_SYMBOL(file_open_root); |
931 | 940 | ||
932 | long do_sys_open(int dfd, const char __user *filename, int flags, umode_t mode) | 941 | long do_sys_open(int dfd, const char __user *filename, int flags, umode_t mode) |
933 | { | 942 | { |
934 | struct open_flags op; | 943 | struct open_flags op; |
935 | int lookup = build_open_flags(flags, mode, &op); | 944 | int fd = build_open_flags(flags, mode, &op); |
936 | struct filename *tmp = getname(filename); | 945 | struct filename *tmp; |
937 | int fd = PTR_ERR(tmp); | 946 | |
938 | 947 | if (fd) | |
939 | if (!IS_ERR(tmp)) { | 948 | return fd; |
940 | fd = get_unused_fd_flags(flags); | 949 | |
941 | if (fd >= 0) { | 950 | tmp = getname(filename); |
942 | struct file *f = do_filp_open(dfd, tmp, &op, lookup); | 951 | if (IS_ERR(tmp)) |
943 | if (IS_ERR(f)) { | 952 | return PTR_ERR(tmp); |
944 | put_unused_fd(fd); | 953 | |
945 | fd = PTR_ERR(f); | 954 | fd = get_unused_fd_flags(flags); |
946 | } else { | 955 | if (fd >= 0) { |
947 | fsnotify_open(f); | 956 | struct file *f = do_filp_open(dfd, tmp, &op); |
948 | fd_install(fd, f); | 957 | if (IS_ERR(f)) { |
949 | } | 958 | put_unused_fd(fd); |
959 | fd = PTR_ERR(f); | ||
960 | } else { | ||
961 | fsnotify_open(f); | ||
962 | fd_install(fd, f); | ||
950 | } | 963 | } |
951 | putname(tmp); | ||
952 | } | 964 | } |
965 | putname(tmp); | ||
953 | return fd; | 966 | return fd; |
954 | } | 967 | } |
955 | 968 | ||
diff --git a/fs/openpromfs/inode.c b/fs/openpromfs/inode.c index 75885ffde44e..8c0ceb8dd1f7 100644 --- a/fs/openpromfs/inode.c +++ b/fs/openpromfs/inode.c | |||
@@ -162,11 +162,11 @@ static const struct file_operations openpromfs_prop_ops = { | |||
162 | .release = seq_release, | 162 | .release = seq_release, |
163 | }; | 163 | }; |
164 | 164 | ||
165 | static int openpromfs_readdir(struct file *, void *, filldir_t); | 165 | static int openpromfs_readdir(struct file *, struct dir_context *); |
166 | 166 | ||
167 | static const struct file_operations openprom_operations = { | 167 | static const struct file_operations openprom_operations = { |
168 | .read = generic_read_dir, | 168 | .read = generic_read_dir, |
169 | .readdir = openpromfs_readdir, | 169 | .iterate = openpromfs_readdir, |
170 | .llseek = generic_file_llseek, | 170 | .llseek = generic_file_llseek, |
171 | }; | 171 | }; |
172 | 172 | ||
@@ -260,71 +260,64 @@ found: | |||
260 | return NULL; | 260 | return NULL; |
261 | } | 261 | } |
262 | 262 | ||
263 | static int openpromfs_readdir(struct file * filp, void * dirent, filldir_t filldir) | 263 | static int openpromfs_readdir(struct file *file, struct dir_context *ctx) |
264 | { | 264 | { |
265 | struct inode *inode = file_inode(filp); | 265 | struct inode *inode = file_inode(file); |
266 | struct op_inode_info *oi = OP_I(inode); | 266 | struct op_inode_info *oi = OP_I(inode); |
267 | struct device_node *dp = oi->u.node; | 267 | struct device_node *dp = oi->u.node; |
268 | struct device_node *child; | 268 | struct device_node *child; |
269 | struct property *prop; | 269 | struct property *prop; |
270 | unsigned int ino; | ||
271 | int i; | 270 | int i; |
272 | 271 | ||
273 | mutex_lock(&op_mutex); | 272 | mutex_lock(&op_mutex); |
274 | 273 | ||
275 | ino = inode->i_ino; | 274 | if (ctx->pos == 0) { |
276 | i = filp->f_pos; | 275 | if (!dir_emit(ctx, ".", 1, inode->i_ino, DT_DIR)) |
277 | switch (i) { | ||
278 | case 0: | ||
279 | if (filldir(dirent, ".", 1, i, ino, DT_DIR) < 0) | ||
280 | goto out; | 276 | goto out; |
281 | i++; | 277 | ctx->pos = 1; |
282 | filp->f_pos++; | 278 | } |
283 | /* fall thru */ | 279 | if (ctx->pos == 1) { |
284 | case 1: | 280 | if (!dir_emit(ctx, "..", 2, |
285 | if (filldir(dirent, "..", 2, i, | ||
286 | (dp->parent == NULL ? | 281 | (dp->parent == NULL ? |
287 | OPENPROM_ROOT_INO : | 282 | OPENPROM_ROOT_INO : |
288 | dp->parent->unique_id), DT_DIR) < 0) | 283 | dp->parent->unique_id), DT_DIR)) |
289 | goto out; | 284 | goto out; |
290 | i++; | 285 | ctx->pos = 2; |
291 | filp->f_pos++; | 286 | } |
292 | /* fall thru */ | 287 | i = ctx->pos - 2; |
293 | default: | ||
294 | i -= 2; | ||
295 | |||
296 | /* First, the children nodes as directories. */ | ||
297 | child = dp->child; | ||
298 | while (i && child) { | ||
299 | child = child->sibling; | ||
300 | i--; | ||
301 | } | ||
302 | while (child) { | ||
303 | if (filldir(dirent, | ||
304 | child->path_component_name, | ||
305 | strlen(child->path_component_name), | ||
306 | filp->f_pos, child->unique_id, DT_DIR) < 0) | ||
307 | goto out; | ||
308 | |||
309 | filp->f_pos++; | ||
310 | child = child->sibling; | ||
311 | } | ||
312 | 288 | ||
313 | /* Next, the properties as files. */ | 289 | /* First, the children nodes as directories. */ |
314 | prop = dp->properties; | 290 | child = dp->child; |
315 | while (i && prop) { | 291 | while (i && child) { |
316 | prop = prop->next; | 292 | child = child->sibling; |
317 | i--; | 293 | i--; |
318 | } | 294 | } |
319 | while (prop) { | 295 | while (child) { |
320 | if (filldir(dirent, prop->name, strlen(prop->name), | 296 | if (!dir_emit(ctx, |
321 | filp->f_pos, prop->unique_id, DT_REG) < 0) | 297 | child->path_component_name, |
322 | goto out; | 298 | strlen(child->path_component_name), |
299 | child->unique_id, DT_DIR)) | ||
300 | goto out; | ||
323 | 301 | ||
324 | filp->f_pos++; | 302 | ctx->pos++; |
325 | prop = prop->next; | 303 | child = child->sibling; |
326 | } | 304 | } |
305 | |||
306 | /* Next, the properties as files. */ | ||
307 | prop = dp->properties; | ||
308 | while (i && prop) { | ||
309 | prop = prop->next; | ||
310 | i--; | ||
327 | } | 311 | } |
312 | while (prop) { | ||
313 | if (!dir_emit(ctx, prop->name, strlen(prop->name), | ||
314 | prop->unique_id, DT_REG)) | ||
315 | goto out; | ||
316 | |||
317 | ctx->pos++; | ||
318 | prop = prop->next; | ||
319 | } | ||
320 | |||
328 | out: | 321 | out: |
329 | mutex_unlock(&op_mutex); | 322 | mutex_unlock(&op_mutex); |
330 | return 0; | 323 | return 0; |
diff --git a/fs/proc/base.c b/fs/proc/base.c index c3834dad09b3..1485e38daaa3 100644 --- a/fs/proc/base.c +++ b/fs/proc/base.c | |||
@@ -1681,46 +1681,34 @@ const struct dentry_operations pid_dentry_operations = | |||
1681 | * reported by readdir in sync with the inode numbers reported | 1681 | * reported by readdir in sync with the inode numbers reported |
1682 | * by stat. | 1682 | * by stat. |
1683 | */ | 1683 | */ |
1684 | int proc_fill_cache(struct file *filp, void *dirent, filldir_t filldir, | 1684 | bool proc_fill_cache(struct file *file, struct dir_context *ctx, |
1685 | const char *name, int len, | 1685 | const char *name, int len, |
1686 | instantiate_t instantiate, struct task_struct *task, const void *ptr) | 1686 | instantiate_t instantiate, struct task_struct *task, const void *ptr) |
1687 | { | 1687 | { |
1688 | struct dentry *child, *dir = filp->f_path.dentry; | 1688 | struct dentry *child, *dir = file->f_path.dentry; |
1689 | struct qstr qname = QSTR_INIT(name, len); | ||
1689 | struct inode *inode; | 1690 | struct inode *inode; |
1690 | struct qstr qname; | 1691 | unsigned type; |
1691 | ino_t ino = 0; | 1692 | ino_t ino; |
1692 | unsigned type = DT_UNKNOWN; | ||
1693 | |||
1694 | qname.name = name; | ||
1695 | qname.len = len; | ||
1696 | qname.hash = full_name_hash(name, len); | ||
1697 | 1693 | ||
1698 | child = d_lookup(dir, &qname); | 1694 | child = d_hash_and_lookup(dir, &qname); |
1699 | if (!child) { | 1695 | if (!child) { |
1700 | struct dentry *new; | 1696 | child = d_alloc(dir, &qname); |
1701 | new = d_alloc(dir, &qname); | 1697 | if (!child) |
1702 | if (new) { | 1698 | goto end_instantiate; |
1703 | child = instantiate(dir->d_inode, new, task, ptr); | 1699 | if (instantiate(dir->d_inode, child, task, ptr) < 0) { |
1704 | if (child) | 1700 | dput(child); |
1705 | dput(new); | 1701 | goto end_instantiate; |
1706 | else | ||
1707 | child = new; | ||
1708 | } | 1702 | } |
1709 | } | 1703 | } |
1710 | if (!child || IS_ERR(child) || !child->d_inode) | ||
1711 | goto end_instantiate; | ||
1712 | inode = child->d_inode; | 1704 | inode = child->d_inode; |
1713 | if (inode) { | 1705 | ino = inode->i_ino; |
1714 | ino = inode->i_ino; | 1706 | type = inode->i_mode >> 12; |
1715 | type = inode->i_mode >> 12; | ||
1716 | } | ||
1717 | dput(child); | 1707 | dput(child); |
1708 | return dir_emit(ctx, name, len, ino, type); | ||
1709 | |||
1718 | end_instantiate: | 1710 | end_instantiate: |
1719 | if (!ino) | 1711 | return dir_emit(ctx, name, len, 1, DT_UNKNOWN); |
1720 | ino = find_inode_number(dir, &qname); | ||
1721 | if (!ino) | ||
1722 | ino = 1; | ||
1723 | return filldir(dirent, name, len, filp->f_pos, ino, type); | ||
1724 | } | 1712 | } |
1725 | 1713 | ||
1726 | #ifdef CONFIG_CHECKPOINT_RESTORE | 1714 | #ifdef CONFIG_CHECKPOINT_RESTORE |
@@ -1846,7 +1834,7 @@ struct map_files_info { | |||
1846 | unsigned char name[4*sizeof(long)+2]; /* max: %lx-%lx\0 */ | 1834 | unsigned char name[4*sizeof(long)+2]; /* max: %lx-%lx\0 */ |
1847 | }; | 1835 | }; |
1848 | 1836 | ||
1849 | static struct dentry * | 1837 | static int |
1850 | proc_map_files_instantiate(struct inode *dir, struct dentry *dentry, | 1838 | proc_map_files_instantiate(struct inode *dir, struct dentry *dentry, |
1851 | struct task_struct *task, const void *ptr) | 1839 | struct task_struct *task, const void *ptr) |
1852 | { | 1840 | { |
@@ -1856,7 +1844,7 @@ proc_map_files_instantiate(struct inode *dir, struct dentry *dentry, | |||
1856 | 1844 | ||
1857 | inode = proc_pid_make_inode(dir->i_sb, task); | 1845 | inode = proc_pid_make_inode(dir->i_sb, task); |
1858 | if (!inode) | 1846 | if (!inode) |
1859 | return ERR_PTR(-ENOENT); | 1847 | return -ENOENT; |
1860 | 1848 | ||
1861 | ei = PROC_I(inode); | 1849 | ei = PROC_I(inode); |
1862 | ei->op.proc_get_link = proc_map_files_get_link; | 1850 | ei->op.proc_get_link = proc_map_files_get_link; |
@@ -1873,7 +1861,7 @@ proc_map_files_instantiate(struct inode *dir, struct dentry *dentry, | |||
1873 | d_set_d_op(dentry, &tid_map_files_dentry_operations); | 1861 | d_set_d_op(dentry, &tid_map_files_dentry_operations); |
1874 | d_add(dentry, inode); | 1862 | d_add(dentry, inode); |
1875 | 1863 | ||
1876 | return NULL; | 1864 | return 0; |
1877 | } | 1865 | } |
1878 | 1866 | ||
1879 | static struct dentry *proc_map_files_lookup(struct inode *dir, | 1867 | static struct dentry *proc_map_files_lookup(struct inode *dir, |
@@ -1882,23 +1870,23 @@ static struct dentry *proc_map_files_lookup(struct inode *dir, | |||
1882 | unsigned long vm_start, vm_end; | 1870 | unsigned long vm_start, vm_end; |
1883 | struct vm_area_struct *vma; | 1871 | struct vm_area_struct *vma; |
1884 | struct task_struct *task; | 1872 | struct task_struct *task; |
1885 | struct dentry *result; | 1873 | int result; |
1886 | struct mm_struct *mm; | 1874 | struct mm_struct *mm; |
1887 | 1875 | ||
1888 | result = ERR_PTR(-EPERM); | 1876 | result = -EPERM; |
1889 | if (!capable(CAP_SYS_ADMIN)) | 1877 | if (!capable(CAP_SYS_ADMIN)) |
1890 | goto out; | 1878 | goto out; |
1891 | 1879 | ||
1892 | result = ERR_PTR(-ENOENT); | 1880 | result = -ENOENT; |
1893 | task = get_proc_task(dir); | 1881 | task = get_proc_task(dir); |
1894 | if (!task) | 1882 | if (!task) |
1895 | goto out; | 1883 | goto out; |
1896 | 1884 | ||
1897 | result = ERR_PTR(-EACCES); | 1885 | result = -EACCES; |
1898 | if (!ptrace_may_access(task, PTRACE_MODE_READ)) | 1886 | if (!ptrace_may_access(task, PTRACE_MODE_READ)) |
1899 | goto out_put_task; | 1887 | goto out_put_task; |
1900 | 1888 | ||
1901 | result = ERR_PTR(-ENOENT); | 1889 | result = -ENOENT; |
1902 | if (dname_to_vma_addr(dentry, &vm_start, &vm_end)) | 1890 | if (dname_to_vma_addr(dentry, &vm_start, &vm_end)) |
1903 | goto out_put_task; | 1891 | goto out_put_task; |
1904 | 1892 | ||
@@ -1921,7 +1909,7 @@ out_no_vma: | |||
1921 | out_put_task: | 1909 | out_put_task: |
1922 | put_task_struct(task); | 1910 | put_task_struct(task); |
1923 | out: | 1911 | out: |
1924 | return result; | 1912 | return ERR_PTR(result); |
1925 | } | 1913 | } |
1926 | 1914 | ||
1927 | static const struct inode_operations proc_map_files_inode_operations = { | 1915 | static const struct inode_operations proc_map_files_inode_operations = { |
@@ -1931,14 +1919,15 @@ static const struct inode_operations proc_map_files_inode_operations = { | |||
1931 | }; | 1919 | }; |
1932 | 1920 | ||
1933 | static int | 1921 | static int |
1934 | proc_map_files_readdir(struct file *filp, void *dirent, filldir_t filldir) | 1922 | proc_map_files_readdir(struct file *file, struct dir_context *ctx) |
1935 | { | 1923 | { |
1936 | struct dentry *dentry = filp->f_path.dentry; | ||
1937 | struct inode *inode = dentry->d_inode; | ||
1938 | struct vm_area_struct *vma; | 1924 | struct vm_area_struct *vma; |
1939 | struct task_struct *task; | 1925 | struct task_struct *task; |
1940 | struct mm_struct *mm; | 1926 | struct mm_struct *mm; |
1941 | ino_t ino; | 1927 | unsigned long nr_files, pos, i; |
1928 | struct flex_array *fa = NULL; | ||
1929 | struct map_files_info info; | ||
1930 | struct map_files_info *p; | ||
1942 | int ret; | 1931 | int ret; |
1943 | 1932 | ||
1944 | ret = -EPERM; | 1933 | ret = -EPERM; |
@@ -1946,7 +1935,7 @@ proc_map_files_readdir(struct file *filp, void *dirent, filldir_t filldir) | |||
1946 | goto out; | 1935 | goto out; |
1947 | 1936 | ||
1948 | ret = -ENOENT; | 1937 | ret = -ENOENT; |
1949 | task = get_proc_task(inode); | 1938 | task = get_proc_task(file_inode(file)); |
1950 | if (!task) | 1939 | if (!task) |
1951 | goto out; | 1940 | goto out; |
1952 | 1941 | ||
@@ -1955,91 +1944,73 @@ proc_map_files_readdir(struct file *filp, void *dirent, filldir_t filldir) | |||
1955 | goto out_put_task; | 1944 | goto out_put_task; |
1956 | 1945 | ||
1957 | ret = 0; | 1946 | ret = 0; |
1958 | switch (filp->f_pos) { | 1947 | if (!dir_emit_dots(file, ctx)) |
1959 | case 0: | 1948 | goto out_put_task; |
1960 | ino = inode->i_ino; | ||
1961 | if (filldir(dirent, ".", 1, 0, ino, DT_DIR) < 0) | ||
1962 | goto out_put_task; | ||
1963 | filp->f_pos++; | ||
1964 | case 1: | ||
1965 | ino = parent_ino(dentry); | ||
1966 | if (filldir(dirent, "..", 2, 1, ino, DT_DIR) < 0) | ||
1967 | goto out_put_task; | ||
1968 | filp->f_pos++; | ||
1969 | default: | ||
1970 | { | ||
1971 | unsigned long nr_files, pos, i; | ||
1972 | struct flex_array *fa = NULL; | ||
1973 | struct map_files_info info; | ||
1974 | struct map_files_info *p; | ||
1975 | |||
1976 | mm = get_task_mm(task); | ||
1977 | if (!mm) | ||
1978 | goto out_put_task; | ||
1979 | down_read(&mm->mmap_sem); | ||
1980 | 1949 | ||
1981 | nr_files = 0; | 1950 | mm = get_task_mm(task); |
1951 | if (!mm) | ||
1952 | goto out_put_task; | ||
1953 | down_read(&mm->mmap_sem); | ||
1982 | 1954 | ||
1983 | /* | 1955 | nr_files = 0; |
1984 | * We need two passes here: | ||
1985 | * | ||
1986 | * 1) Collect vmas of mapped files with mmap_sem taken | ||
1987 | * 2) Release mmap_sem and instantiate entries | ||
1988 | * | ||
1989 | * otherwise we get lockdep complained, since filldir() | ||
1990 | * routine might require mmap_sem taken in might_fault(). | ||
1991 | */ | ||
1992 | 1956 | ||
1993 | for (vma = mm->mmap, pos = 2; vma; vma = vma->vm_next) { | 1957 | /* |
1994 | if (vma->vm_file && ++pos > filp->f_pos) | 1958 | * We need two passes here: |
1995 | nr_files++; | 1959 | * |
1996 | } | 1960 | * 1) Collect vmas of mapped files with mmap_sem taken |
1961 | * 2) Release mmap_sem and instantiate entries | ||
1962 | * | ||
1963 | * otherwise we get lockdep complained, since filldir() | ||
1964 | * routine might require mmap_sem taken in might_fault(). | ||
1965 | */ | ||
1997 | 1966 | ||
1998 | if (nr_files) { | 1967 | for (vma = mm->mmap, pos = 2; vma; vma = vma->vm_next) { |
1999 | fa = flex_array_alloc(sizeof(info), nr_files, | 1968 | if (vma->vm_file && ++pos > ctx->pos) |
2000 | GFP_KERNEL); | 1969 | nr_files++; |
2001 | if (!fa || flex_array_prealloc(fa, 0, nr_files, | 1970 | } |
2002 | GFP_KERNEL)) { | 1971 | |
2003 | ret = -ENOMEM; | 1972 | if (nr_files) { |
2004 | if (fa) | 1973 | fa = flex_array_alloc(sizeof(info), nr_files, |
2005 | flex_array_free(fa); | 1974 | GFP_KERNEL); |
2006 | up_read(&mm->mmap_sem); | 1975 | if (!fa || flex_array_prealloc(fa, 0, nr_files, |
2007 | mmput(mm); | 1976 | GFP_KERNEL)) { |
2008 | goto out_put_task; | 1977 | ret = -ENOMEM; |
2009 | } | 1978 | if (fa) |
2010 | for (i = 0, vma = mm->mmap, pos = 2; vma; | 1979 | flex_array_free(fa); |
2011 | vma = vma->vm_next) { | 1980 | up_read(&mm->mmap_sem); |
2012 | if (!vma->vm_file) | 1981 | mmput(mm); |
2013 | continue; | 1982 | goto out_put_task; |
2014 | if (++pos <= filp->f_pos) | ||
2015 | continue; | ||
2016 | |||
2017 | info.mode = vma->vm_file->f_mode; | ||
2018 | info.len = snprintf(info.name, | ||
2019 | sizeof(info.name), "%lx-%lx", | ||
2020 | vma->vm_start, vma->vm_end); | ||
2021 | if (flex_array_put(fa, i++, &info, GFP_KERNEL)) | ||
2022 | BUG(); | ||
2023 | } | ||
2024 | } | 1983 | } |
2025 | up_read(&mm->mmap_sem); | 1984 | for (i = 0, vma = mm->mmap, pos = 2; vma; |
2026 | 1985 | vma = vma->vm_next) { | |
2027 | for (i = 0; i < nr_files; i++) { | 1986 | if (!vma->vm_file) |
2028 | p = flex_array_get(fa, i); | 1987 | continue; |
2029 | ret = proc_fill_cache(filp, dirent, filldir, | 1988 | if (++pos <= ctx->pos) |
2030 | p->name, p->len, | 1989 | continue; |
2031 | proc_map_files_instantiate, | 1990 | |
2032 | task, | 1991 | info.mode = vma->vm_file->f_mode; |
2033 | (void *)(unsigned long)p->mode); | 1992 | info.len = snprintf(info.name, |
2034 | if (ret) | 1993 | sizeof(info.name), "%lx-%lx", |
2035 | break; | 1994 | vma->vm_start, vma->vm_end); |
2036 | filp->f_pos++; | 1995 | if (flex_array_put(fa, i++, &info, GFP_KERNEL)) |
1996 | BUG(); | ||
2037 | } | 1997 | } |
2038 | if (fa) | ||
2039 | flex_array_free(fa); | ||
2040 | mmput(mm); | ||
2041 | } | 1998 | } |
1999 | up_read(&mm->mmap_sem); | ||
2000 | |||
2001 | for (i = 0; i < nr_files; i++) { | ||
2002 | p = flex_array_get(fa, i); | ||
2003 | if (!proc_fill_cache(file, ctx, | ||
2004 | p->name, p->len, | ||
2005 | proc_map_files_instantiate, | ||
2006 | task, | ||
2007 | (void *)(unsigned long)p->mode)) | ||
2008 | break; | ||
2009 | ctx->pos++; | ||
2042 | } | 2010 | } |
2011 | if (fa) | ||
2012 | flex_array_free(fa); | ||
2013 | mmput(mm); | ||
2043 | 2014 | ||
2044 | out_put_task: | 2015 | out_put_task: |
2045 | put_task_struct(task); | 2016 | put_task_struct(task); |
@@ -2049,7 +2020,7 @@ out: | |||
2049 | 2020 | ||
2050 | static const struct file_operations proc_map_files_operations = { | 2021 | static const struct file_operations proc_map_files_operations = { |
2051 | .read = generic_read_dir, | 2022 | .read = generic_read_dir, |
2052 | .readdir = proc_map_files_readdir, | 2023 | .iterate = proc_map_files_readdir, |
2053 | .llseek = default_llseek, | 2024 | .llseek = default_llseek, |
2054 | }; | 2025 | }; |
2055 | 2026 | ||
@@ -2152,13 +2123,12 @@ static const struct file_operations proc_timers_operations = { | |||
2152 | }; | 2123 | }; |
2153 | #endif /* CONFIG_CHECKPOINT_RESTORE */ | 2124 | #endif /* CONFIG_CHECKPOINT_RESTORE */ |
2154 | 2125 | ||
2155 | static struct dentry *proc_pident_instantiate(struct inode *dir, | 2126 | static int proc_pident_instantiate(struct inode *dir, |
2156 | struct dentry *dentry, struct task_struct *task, const void *ptr) | 2127 | struct dentry *dentry, struct task_struct *task, const void *ptr) |
2157 | { | 2128 | { |
2158 | const struct pid_entry *p = ptr; | 2129 | const struct pid_entry *p = ptr; |
2159 | struct inode *inode; | 2130 | struct inode *inode; |
2160 | struct proc_inode *ei; | 2131 | struct proc_inode *ei; |
2161 | struct dentry *error = ERR_PTR(-ENOENT); | ||
2162 | 2132 | ||
2163 | inode = proc_pid_make_inode(dir->i_sb, task); | 2133 | inode = proc_pid_make_inode(dir->i_sb, task); |
2164 | if (!inode) | 2134 | if (!inode) |
@@ -2177,9 +2147,9 @@ static struct dentry *proc_pident_instantiate(struct inode *dir, | |||
2177 | d_add(dentry, inode); | 2147 | d_add(dentry, inode); |
2178 | /* Close the race of the process dying before we return the dentry */ | 2148 | /* Close the race of the process dying before we return the dentry */ |
2179 | if (pid_revalidate(dentry, 0)) | 2149 | if (pid_revalidate(dentry, 0)) |
2180 | error = NULL; | 2150 | return 0; |
2181 | out: | 2151 | out: |
2182 | return error; | 2152 | return -ENOENT; |
2183 | } | 2153 | } |
2184 | 2154 | ||
2185 | static struct dentry *proc_pident_lookup(struct inode *dir, | 2155 | static struct dentry *proc_pident_lookup(struct inode *dir, |
@@ -2187,11 +2157,11 @@ static struct dentry *proc_pident_lookup(struct inode *dir, | |||
2187 | const struct pid_entry *ents, | 2157 | const struct pid_entry *ents, |
2188 | unsigned int nents) | 2158 | unsigned int nents) |
2189 | { | 2159 | { |
2190 | struct dentry *error; | 2160 | int error; |
2191 | struct task_struct *task = get_proc_task(dir); | 2161 | struct task_struct *task = get_proc_task(dir); |
2192 | const struct pid_entry *p, *last; | 2162 | const struct pid_entry *p, *last; |
2193 | 2163 | ||
2194 | error = ERR_PTR(-ENOENT); | 2164 | error = -ENOENT; |
2195 | 2165 | ||
2196 | if (!task) | 2166 | if (!task) |
2197 | goto out_no_task; | 2167 | goto out_no_task; |
@@ -2214,70 +2184,33 @@ static struct dentry *proc_pident_lookup(struct inode *dir, | |||
2214 | out: | 2184 | out: |
2215 | put_task_struct(task); | 2185 | put_task_struct(task); |
2216 | out_no_task: | 2186 | out_no_task: |
2217 | return error; | 2187 | return ERR_PTR(error); |
2218 | } | ||
2219 | |||
2220 | static int proc_pident_fill_cache(struct file *filp, void *dirent, | ||
2221 | filldir_t filldir, struct task_struct *task, const struct pid_entry *p) | ||
2222 | { | ||
2223 | return proc_fill_cache(filp, dirent, filldir, p->name, p->len, | ||
2224 | proc_pident_instantiate, task, p); | ||
2225 | } | 2188 | } |
2226 | 2189 | ||
2227 | static int proc_pident_readdir(struct file *filp, | 2190 | static int proc_pident_readdir(struct file *file, struct dir_context *ctx, |
2228 | void *dirent, filldir_t filldir, | ||
2229 | const struct pid_entry *ents, unsigned int nents) | 2191 | const struct pid_entry *ents, unsigned int nents) |
2230 | { | 2192 | { |
2231 | int i; | 2193 | struct task_struct *task = get_proc_task(file_inode(file)); |
2232 | struct dentry *dentry = filp->f_path.dentry; | 2194 | const struct pid_entry *p; |
2233 | struct inode *inode = dentry->d_inode; | ||
2234 | struct task_struct *task = get_proc_task(inode); | ||
2235 | const struct pid_entry *p, *last; | ||
2236 | ino_t ino; | ||
2237 | int ret; | ||
2238 | 2195 | ||
2239 | ret = -ENOENT; | ||
2240 | if (!task) | 2196 | if (!task) |
2241 | goto out_no_task; | 2197 | return -ENOENT; |
2242 | 2198 | ||
2243 | ret = 0; | 2199 | if (!dir_emit_dots(file, ctx)) |
2244 | i = filp->f_pos; | 2200 | goto out; |
2245 | switch (i) { | 2201 | |
2246 | case 0: | 2202 | if (ctx->pos >= nents + 2) |
2247 | ino = inode->i_ino; | 2203 | goto out; |
2248 | if (filldir(dirent, ".", 1, i, ino, DT_DIR) < 0) | ||
2249 | goto out; | ||
2250 | i++; | ||
2251 | filp->f_pos++; | ||
2252 | /* fall through */ | ||
2253 | case 1: | ||
2254 | ino = parent_ino(dentry); | ||
2255 | if (filldir(dirent, "..", 2, i, ino, DT_DIR) < 0) | ||
2256 | goto out; | ||
2257 | i++; | ||
2258 | filp->f_pos++; | ||
2259 | /* fall through */ | ||
2260 | default: | ||
2261 | i -= 2; | ||
2262 | if (i >= nents) { | ||
2263 | ret = 1; | ||
2264 | goto out; | ||
2265 | } | ||
2266 | p = ents + i; | ||
2267 | last = &ents[nents - 1]; | ||
2268 | while (p <= last) { | ||
2269 | if (proc_pident_fill_cache(filp, dirent, filldir, task, p) < 0) | ||
2270 | goto out; | ||
2271 | filp->f_pos++; | ||
2272 | p++; | ||
2273 | } | ||
2274 | } | ||
2275 | 2204 | ||
2276 | ret = 1; | 2205 | for (p = ents + (ctx->pos - 2); p <= ents + nents - 1; p++) { |
2206 | if (!proc_fill_cache(file, ctx, p->name, p->len, | ||
2207 | proc_pident_instantiate, task, p)) | ||
2208 | break; | ||
2209 | ctx->pos++; | ||
2210 | } | ||
2277 | out: | 2211 | out: |
2278 | put_task_struct(task); | 2212 | put_task_struct(task); |
2279 | out_no_task: | 2213 | return 0; |
2280 | return ret; | ||
2281 | } | 2214 | } |
2282 | 2215 | ||
2283 | #ifdef CONFIG_SECURITY | 2216 | #ifdef CONFIG_SECURITY |
@@ -2362,16 +2295,15 @@ static const struct pid_entry attr_dir_stuff[] = { | |||
2362 | REG("sockcreate", S_IRUGO|S_IWUGO, proc_pid_attr_operations), | 2295 | REG("sockcreate", S_IRUGO|S_IWUGO, proc_pid_attr_operations), |
2363 | }; | 2296 | }; |
2364 | 2297 | ||
2365 | static int proc_attr_dir_readdir(struct file * filp, | 2298 | static int proc_attr_dir_readdir(struct file *file, struct dir_context *ctx) |
2366 | void * dirent, filldir_t filldir) | ||
2367 | { | 2299 | { |
2368 | return proc_pident_readdir(filp,dirent,filldir, | 2300 | return proc_pident_readdir(file, ctx, |
2369 | attr_dir_stuff,ARRAY_SIZE(attr_dir_stuff)); | 2301 | attr_dir_stuff, ARRAY_SIZE(attr_dir_stuff)); |
2370 | } | 2302 | } |
2371 | 2303 | ||
2372 | static const struct file_operations proc_attr_dir_operations = { | 2304 | static const struct file_operations proc_attr_dir_operations = { |
2373 | .read = generic_read_dir, | 2305 | .read = generic_read_dir, |
2374 | .readdir = proc_attr_dir_readdir, | 2306 | .iterate = proc_attr_dir_readdir, |
2375 | .llseek = default_llseek, | 2307 | .llseek = default_llseek, |
2376 | }; | 2308 | }; |
2377 | 2309 | ||
@@ -2725,16 +2657,15 @@ static const struct pid_entry tgid_base_stuff[] = { | |||
2725 | #endif | 2657 | #endif |
2726 | }; | 2658 | }; |
2727 | 2659 | ||
2728 | static int proc_tgid_base_readdir(struct file * filp, | 2660 | static int proc_tgid_base_readdir(struct file *file, struct dir_context *ctx) |
2729 | void * dirent, filldir_t filldir) | ||
2730 | { | 2661 | { |
2731 | return proc_pident_readdir(filp,dirent,filldir, | 2662 | return proc_pident_readdir(file, ctx, |
2732 | tgid_base_stuff,ARRAY_SIZE(tgid_base_stuff)); | 2663 | tgid_base_stuff, ARRAY_SIZE(tgid_base_stuff)); |
2733 | } | 2664 | } |
2734 | 2665 | ||
2735 | static const struct file_operations proc_tgid_base_operations = { | 2666 | static const struct file_operations proc_tgid_base_operations = { |
2736 | .read = generic_read_dir, | 2667 | .read = generic_read_dir, |
2737 | .readdir = proc_tgid_base_readdir, | 2668 | .iterate = proc_tgid_base_readdir, |
2738 | .llseek = default_llseek, | 2669 | .llseek = default_llseek, |
2739 | }; | 2670 | }; |
2740 | 2671 | ||
@@ -2836,11 +2767,10 @@ void proc_flush_task(struct task_struct *task) | |||
2836 | } | 2767 | } |
2837 | } | 2768 | } |
2838 | 2769 | ||
2839 | static struct dentry *proc_pid_instantiate(struct inode *dir, | 2770 | static int proc_pid_instantiate(struct inode *dir, |
2840 | struct dentry * dentry, | 2771 | struct dentry * dentry, |
2841 | struct task_struct *task, const void *ptr) | 2772 | struct task_struct *task, const void *ptr) |
2842 | { | 2773 | { |
2843 | struct dentry *error = ERR_PTR(-ENOENT); | ||
2844 | struct inode *inode; | 2774 | struct inode *inode; |
2845 | 2775 | ||
2846 | inode = proc_pid_make_inode(dir->i_sb, task); | 2776 | inode = proc_pid_make_inode(dir->i_sb, task); |
@@ -2860,14 +2790,14 @@ static struct dentry *proc_pid_instantiate(struct inode *dir, | |||
2860 | d_add(dentry, inode); | 2790 | d_add(dentry, inode); |
2861 | /* Close the race of the process dying before we return the dentry */ | 2791 | /* Close the race of the process dying before we return the dentry */ |
2862 | if (pid_revalidate(dentry, 0)) | 2792 | if (pid_revalidate(dentry, 0)) |
2863 | error = NULL; | 2793 | return 0; |
2864 | out: | 2794 | out: |
2865 | return error; | 2795 | return -ENOENT; |
2866 | } | 2796 | } |
2867 | 2797 | ||
2868 | struct dentry *proc_pid_lookup(struct inode *dir, struct dentry * dentry, unsigned int flags) | 2798 | struct dentry *proc_pid_lookup(struct inode *dir, struct dentry * dentry, unsigned int flags) |
2869 | { | 2799 | { |
2870 | struct dentry *result = NULL; | 2800 | int result = 0; |
2871 | struct task_struct *task; | 2801 | struct task_struct *task; |
2872 | unsigned tgid; | 2802 | unsigned tgid; |
2873 | struct pid_namespace *ns; | 2803 | struct pid_namespace *ns; |
@@ -2888,7 +2818,7 @@ struct dentry *proc_pid_lookup(struct inode *dir, struct dentry * dentry, unsign | |||
2888 | result = proc_pid_instantiate(dir, dentry, task, NULL); | 2818 | result = proc_pid_instantiate(dir, dentry, task, NULL); |
2889 | put_task_struct(task); | 2819 | put_task_struct(task); |
2890 | out: | 2820 | out: |
2891 | return result; | 2821 | return ERR_PTR(result); |
2892 | } | 2822 | } |
2893 | 2823 | ||
2894 | /* | 2824 | /* |
@@ -2936,58 +2866,42 @@ retry: | |||
2936 | 2866 | ||
2937 | #define TGID_OFFSET (FIRST_PROCESS_ENTRY + 1) | 2867 | #define TGID_OFFSET (FIRST_PROCESS_ENTRY + 1) |
2938 | 2868 | ||
2939 | static int proc_pid_fill_cache(struct file *filp, void *dirent, filldir_t filldir, | ||
2940 | struct tgid_iter iter) | ||
2941 | { | ||
2942 | char name[PROC_NUMBUF]; | ||
2943 | int len = snprintf(name, sizeof(name), "%d", iter.tgid); | ||
2944 | return proc_fill_cache(filp, dirent, filldir, name, len, | ||
2945 | proc_pid_instantiate, iter.task, NULL); | ||
2946 | } | ||
2947 | |||
2948 | static int fake_filldir(void *buf, const char *name, int namelen, | ||
2949 | loff_t offset, u64 ino, unsigned d_type) | ||
2950 | { | ||
2951 | return 0; | ||
2952 | } | ||
2953 | |||
2954 | /* for the /proc/ directory itself, after non-process stuff has been done */ | 2869 | /* for the /proc/ directory itself, after non-process stuff has been done */ |
2955 | int proc_pid_readdir(struct file * filp, void * dirent, filldir_t filldir) | 2870 | int proc_pid_readdir(struct file *file, struct dir_context *ctx) |
2956 | { | 2871 | { |
2957 | struct tgid_iter iter; | 2872 | struct tgid_iter iter; |
2958 | struct pid_namespace *ns; | 2873 | struct pid_namespace *ns = file->f_dentry->d_sb->s_fs_info; |
2959 | filldir_t __filldir; | 2874 | loff_t pos = ctx->pos; |
2960 | loff_t pos = filp->f_pos; | ||
2961 | 2875 | ||
2962 | if (pos >= PID_MAX_LIMIT + TGID_OFFSET) | 2876 | if (pos >= PID_MAX_LIMIT + TGID_OFFSET) |
2963 | goto out; | 2877 | return 0; |
2964 | 2878 | ||
2965 | if (pos == TGID_OFFSET - 1) { | 2879 | if (pos == TGID_OFFSET - 1) { |
2966 | if (proc_fill_cache(filp, dirent, filldir, "self", 4, | 2880 | struct inode *inode = ns->proc_self->d_inode; |
2967 | NULL, NULL, NULL) < 0) | 2881 | if (!dir_emit(ctx, "self", 4, inode->i_ino, DT_LNK)) |
2968 | goto out; | 2882 | return 0; |
2969 | iter.tgid = 0; | 2883 | iter.tgid = 0; |
2970 | } else { | 2884 | } else { |
2971 | iter.tgid = pos - TGID_OFFSET; | 2885 | iter.tgid = pos - TGID_OFFSET; |
2972 | } | 2886 | } |
2973 | iter.task = NULL; | 2887 | iter.task = NULL; |
2974 | ns = filp->f_dentry->d_sb->s_fs_info; | ||
2975 | for (iter = next_tgid(ns, iter); | 2888 | for (iter = next_tgid(ns, iter); |
2976 | iter.task; | 2889 | iter.task; |
2977 | iter.tgid += 1, iter = next_tgid(ns, iter)) { | 2890 | iter.tgid += 1, iter = next_tgid(ns, iter)) { |
2978 | if (has_pid_permissions(ns, iter.task, 2)) | 2891 | char name[PROC_NUMBUF]; |
2979 | __filldir = filldir; | 2892 | int len; |
2980 | else | 2893 | if (!has_pid_permissions(ns, iter.task, 2)) |
2981 | __filldir = fake_filldir; | 2894 | continue; |
2982 | 2895 | ||
2983 | filp->f_pos = iter.tgid + TGID_OFFSET; | 2896 | len = snprintf(name, sizeof(name), "%d", iter.tgid); |
2984 | if (proc_pid_fill_cache(filp, dirent, __filldir, iter) < 0) { | 2897 | ctx->pos = iter.tgid + TGID_OFFSET; |
2898 | if (!proc_fill_cache(file, ctx, name, len, | ||
2899 | proc_pid_instantiate, iter.task, NULL)) { | ||
2985 | put_task_struct(iter.task); | 2900 | put_task_struct(iter.task); |
2986 | goto out; | 2901 | return 0; |
2987 | } | 2902 | } |
2988 | } | 2903 | } |
2989 | filp->f_pos = PID_MAX_LIMIT + TGID_OFFSET; | 2904 | ctx->pos = PID_MAX_LIMIT + TGID_OFFSET; |
2990 | out: | ||
2991 | return 0; | 2905 | return 0; |
2992 | } | 2906 | } |
2993 | 2907 | ||
@@ -3075,11 +2989,10 @@ static const struct pid_entry tid_base_stuff[] = { | |||
3075 | #endif | 2989 | #endif |
3076 | }; | 2990 | }; |
3077 | 2991 | ||
3078 | static int proc_tid_base_readdir(struct file * filp, | 2992 | static int proc_tid_base_readdir(struct file *file, struct dir_context *ctx) |
3079 | void * dirent, filldir_t filldir) | ||
3080 | { | 2993 | { |
3081 | return proc_pident_readdir(filp,dirent,filldir, | 2994 | return proc_pident_readdir(file, ctx, |
3082 | tid_base_stuff,ARRAY_SIZE(tid_base_stuff)); | 2995 | tid_base_stuff, ARRAY_SIZE(tid_base_stuff)); |
3083 | } | 2996 | } |
3084 | 2997 | ||
3085 | static struct dentry *proc_tid_base_lookup(struct inode *dir, struct dentry *dentry, unsigned int flags) | 2998 | static struct dentry *proc_tid_base_lookup(struct inode *dir, struct dentry *dentry, unsigned int flags) |
@@ -3090,7 +3003,7 @@ static struct dentry *proc_tid_base_lookup(struct inode *dir, struct dentry *den | |||
3090 | 3003 | ||
3091 | static const struct file_operations proc_tid_base_operations = { | 3004 | static const struct file_operations proc_tid_base_operations = { |
3092 | .read = generic_read_dir, | 3005 | .read = generic_read_dir, |
3093 | .readdir = proc_tid_base_readdir, | 3006 | .iterate = proc_tid_base_readdir, |
3094 | .llseek = default_llseek, | 3007 | .llseek = default_llseek, |
3095 | }; | 3008 | }; |
3096 | 3009 | ||
@@ -3100,10 +3013,9 @@ static const struct inode_operations proc_tid_base_inode_operations = { | |||
3100 | .setattr = proc_setattr, | 3013 | .setattr = proc_setattr, |
3101 | }; | 3014 | }; |
3102 | 3015 | ||
3103 | static struct dentry *proc_task_instantiate(struct inode *dir, | 3016 | static int proc_task_instantiate(struct inode *dir, |
3104 | struct dentry *dentry, struct task_struct *task, const void *ptr) | 3017 | struct dentry *dentry, struct task_struct *task, const void *ptr) |
3105 | { | 3018 | { |
3106 | struct dentry *error = ERR_PTR(-ENOENT); | ||
3107 | struct inode *inode; | 3019 | struct inode *inode; |
3108 | inode = proc_pid_make_inode(dir->i_sb, task); | 3020 | inode = proc_pid_make_inode(dir->i_sb, task); |
3109 | 3021 | ||
@@ -3122,14 +3034,14 @@ static struct dentry *proc_task_instantiate(struct inode *dir, | |||
3122 | d_add(dentry, inode); | 3034 | d_add(dentry, inode); |
3123 | /* Close the race of the process dying before we return the dentry */ | 3035 | /* Close the race of the process dying before we return the dentry */ |
3124 | if (pid_revalidate(dentry, 0)) | 3036 | if (pid_revalidate(dentry, 0)) |
3125 | error = NULL; | 3037 | return 0; |
3126 | out: | 3038 | out: |
3127 | return error; | 3039 | return -ENOENT; |
3128 | } | 3040 | } |
3129 | 3041 | ||
3130 | static struct dentry *proc_task_lookup(struct inode *dir, struct dentry * dentry, unsigned int flags) | 3042 | static struct dentry *proc_task_lookup(struct inode *dir, struct dentry * dentry, unsigned int flags) |
3131 | { | 3043 | { |
3132 | struct dentry *result = ERR_PTR(-ENOENT); | 3044 | int result = -ENOENT; |
3133 | struct task_struct *task; | 3045 | struct task_struct *task; |
3134 | struct task_struct *leader = get_proc_task(dir); | 3046 | struct task_struct *leader = get_proc_task(dir); |
3135 | unsigned tid; | 3047 | unsigned tid; |
@@ -3159,7 +3071,7 @@ out_drop_task: | |||
3159 | out: | 3071 | out: |
3160 | put_task_struct(leader); | 3072 | put_task_struct(leader); |
3161 | out_no_task: | 3073 | out_no_task: |
3162 | return result; | 3074 | return ERR_PTR(result); |
3163 | } | 3075 | } |
3164 | 3076 | ||
3165 | /* | 3077 | /* |
@@ -3231,30 +3143,16 @@ static struct task_struct *next_tid(struct task_struct *start) | |||
3231 | return pos; | 3143 | return pos; |
3232 | } | 3144 | } |
3233 | 3145 | ||
3234 | static int proc_task_fill_cache(struct file *filp, void *dirent, filldir_t filldir, | ||
3235 | struct task_struct *task, int tid) | ||
3236 | { | ||
3237 | char name[PROC_NUMBUF]; | ||
3238 | int len = snprintf(name, sizeof(name), "%d", tid); | ||
3239 | return proc_fill_cache(filp, dirent, filldir, name, len, | ||
3240 | proc_task_instantiate, task, NULL); | ||
3241 | } | ||
3242 | |||
3243 | /* for the /proc/TGID/task/ directories */ | 3146 | /* for the /proc/TGID/task/ directories */ |
3244 | static int proc_task_readdir(struct file * filp, void * dirent, filldir_t filldir) | 3147 | static int proc_task_readdir(struct file *file, struct dir_context *ctx) |
3245 | { | 3148 | { |
3246 | struct dentry *dentry = filp->f_path.dentry; | ||
3247 | struct inode *inode = dentry->d_inode; | ||
3248 | struct task_struct *leader = NULL; | 3149 | struct task_struct *leader = NULL; |
3249 | struct task_struct *task; | 3150 | struct task_struct *task = get_proc_task(file_inode(file)); |
3250 | int retval = -ENOENT; | ||
3251 | ino_t ino; | ||
3252 | int tid; | ||
3253 | struct pid_namespace *ns; | 3151 | struct pid_namespace *ns; |
3152 | int tid; | ||
3254 | 3153 | ||
3255 | task = get_proc_task(inode); | ||
3256 | if (!task) | 3154 | if (!task) |
3257 | goto out_no_task; | 3155 | return -ENOENT; |
3258 | rcu_read_lock(); | 3156 | rcu_read_lock(); |
3259 | if (pid_alive(task)) { | 3157 | if (pid_alive(task)) { |
3260 | leader = task->group_leader; | 3158 | leader = task->group_leader; |
@@ -3263,46 +3161,36 @@ static int proc_task_readdir(struct file * filp, void * dirent, filldir_t filldi | |||
3263 | rcu_read_unlock(); | 3161 | rcu_read_unlock(); |
3264 | put_task_struct(task); | 3162 | put_task_struct(task); |
3265 | if (!leader) | 3163 | if (!leader) |
3266 | goto out_no_task; | 3164 | return -ENOENT; |
3267 | retval = 0; | ||
3268 | 3165 | ||
3269 | switch ((unsigned long)filp->f_pos) { | 3166 | if (!dir_emit_dots(file, ctx)) |
3270 | case 0: | 3167 | goto out; |
3271 | ino = inode->i_ino; | ||
3272 | if (filldir(dirent, ".", 1, filp->f_pos, ino, DT_DIR) < 0) | ||
3273 | goto out; | ||
3274 | filp->f_pos++; | ||
3275 | /* fall through */ | ||
3276 | case 1: | ||
3277 | ino = parent_ino(dentry); | ||
3278 | if (filldir(dirent, "..", 2, filp->f_pos, ino, DT_DIR) < 0) | ||
3279 | goto out; | ||
3280 | filp->f_pos++; | ||
3281 | /* fall through */ | ||
3282 | } | ||
3283 | 3168 | ||
3284 | /* f_version caches the tgid value that the last readdir call couldn't | 3169 | /* f_version caches the tgid value that the last readdir call couldn't |
3285 | * return. lseek aka telldir automagically resets f_version to 0. | 3170 | * return. lseek aka telldir automagically resets f_version to 0. |
3286 | */ | 3171 | */ |
3287 | ns = filp->f_dentry->d_sb->s_fs_info; | 3172 | ns = file->f_dentry->d_sb->s_fs_info; |
3288 | tid = (int)filp->f_version; | 3173 | tid = (int)file->f_version; |
3289 | filp->f_version = 0; | 3174 | file->f_version = 0; |
3290 | for (task = first_tid(leader, tid, filp->f_pos - 2, ns); | 3175 | for (task = first_tid(leader, tid, ctx->pos - 2, ns); |
3291 | task; | 3176 | task; |
3292 | task = next_tid(task), filp->f_pos++) { | 3177 | task = next_tid(task), ctx->pos++) { |
3178 | char name[PROC_NUMBUF]; | ||
3179 | int len; | ||
3293 | tid = task_pid_nr_ns(task, ns); | 3180 | tid = task_pid_nr_ns(task, ns); |
3294 | if (proc_task_fill_cache(filp, dirent, filldir, task, tid) < 0) { | 3181 | len = snprintf(name, sizeof(name), "%d", tid); |
3182 | if (!proc_fill_cache(file, ctx, name, len, | ||
3183 | proc_task_instantiate, task, NULL)) { | ||
3295 | /* returning this tgid failed, save it as the first | 3184 | /* returning this tgid failed, save it as the first |
3296 | * pid for the next readir call */ | 3185 | * pid for the next readir call */ |
3297 | filp->f_version = (u64)tid; | 3186 | file->f_version = (u64)tid; |
3298 | put_task_struct(task); | 3187 | put_task_struct(task); |
3299 | break; | 3188 | break; |
3300 | } | 3189 | } |
3301 | } | 3190 | } |
3302 | out: | 3191 | out: |
3303 | put_task_struct(leader); | 3192 | put_task_struct(leader); |
3304 | out_no_task: | 3193 | return 0; |
3305 | return retval; | ||
3306 | } | 3194 | } |
3307 | 3195 | ||
3308 | static int proc_task_getattr(struct vfsmount *mnt, struct dentry *dentry, struct kstat *stat) | 3196 | static int proc_task_getattr(struct vfsmount *mnt, struct dentry *dentry, struct kstat *stat) |
@@ -3328,6 +3216,6 @@ static const struct inode_operations proc_task_inode_operations = { | |||
3328 | 3216 | ||
3329 | static const struct file_operations proc_task_operations = { | 3217 | static const struct file_operations proc_task_operations = { |
3330 | .read = generic_read_dir, | 3218 | .read = generic_read_dir, |
3331 | .readdir = proc_task_readdir, | 3219 | .iterate = proc_task_readdir, |
3332 | .llseek = default_llseek, | 3220 | .llseek = default_llseek, |
3333 | }; | 3221 | }; |
diff --git a/fs/proc/fd.c b/fs/proc/fd.c index d7a4a28ef630..75f2890abbd8 100644 --- a/fs/proc/fd.c +++ b/fs/proc/fd.c | |||
@@ -167,11 +167,10 @@ static int proc_fd_link(struct dentry *dentry, struct path *path) | |||
167 | return ret; | 167 | return ret; |
168 | } | 168 | } |
169 | 169 | ||
170 | static struct dentry * | 170 | static int |
171 | proc_fd_instantiate(struct inode *dir, struct dentry *dentry, | 171 | proc_fd_instantiate(struct inode *dir, struct dentry *dentry, |
172 | struct task_struct *task, const void *ptr) | 172 | struct task_struct *task, const void *ptr) |
173 | { | 173 | { |
174 | struct dentry *error = ERR_PTR(-ENOENT); | ||
175 | unsigned fd = (unsigned long)ptr; | 174 | unsigned fd = (unsigned long)ptr; |
176 | struct proc_inode *ei; | 175 | struct proc_inode *ei; |
177 | struct inode *inode; | 176 | struct inode *inode; |
@@ -194,9 +193,9 @@ proc_fd_instantiate(struct inode *dir, struct dentry *dentry, | |||
194 | 193 | ||
195 | /* Close the race of the process dying before we return the dentry */ | 194 | /* Close the race of the process dying before we return the dentry */ |
196 | if (tid_fd_revalidate(dentry, 0)) | 195 | if (tid_fd_revalidate(dentry, 0)) |
197 | error = NULL; | 196 | return 0; |
198 | out: | 197 | out: |
199 | return error; | 198 | return -ENOENT; |
200 | } | 199 | } |
201 | 200 | ||
202 | static struct dentry *proc_lookupfd_common(struct inode *dir, | 201 | static struct dentry *proc_lookupfd_common(struct inode *dir, |
@@ -204,7 +203,7 @@ static struct dentry *proc_lookupfd_common(struct inode *dir, | |||
204 | instantiate_t instantiate) | 203 | instantiate_t instantiate) |
205 | { | 204 | { |
206 | struct task_struct *task = get_proc_task(dir); | 205 | struct task_struct *task = get_proc_task(dir); |
207 | struct dentry *result = ERR_PTR(-ENOENT); | 206 | int result = -ENOENT; |
208 | unsigned fd = name_to_int(dentry); | 207 | unsigned fd = name_to_int(dentry); |
209 | 208 | ||
210 | if (!task) | 209 | if (!task) |
@@ -216,77 +215,61 @@ static struct dentry *proc_lookupfd_common(struct inode *dir, | |||
216 | out: | 215 | out: |
217 | put_task_struct(task); | 216 | put_task_struct(task); |
218 | out_no_task: | 217 | out_no_task: |
219 | return result; | 218 | return ERR_PTR(result); |
220 | } | 219 | } |
221 | 220 | ||
222 | static int proc_readfd_common(struct file * filp, void * dirent, | 221 | static int proc_readfd_common(struct file *file, struct dir_context *ctx, |
223 | filldir_t filldir, instantiate_t instantiate) | 222 | instantiate_t instantiate) |
224 | { | 223 | { |
225 | struct dentry *dentry = filp->f_path.dentry; | 224 | struct task_struct *p = get_proc_task(file_inode(file)); |
226 | struct inode *inode = dentry->d_inode; | ||
227 | struct task_struct *p = get_proc_task(inode); | ||
228 | struct files_struct *files; | 225 | struct files_struct *files; |
229 | unsigned int fd, ino; | 226 | unsigned int fd; |
230 | int retval; | ||
231 | 227 | ||
232 | retval = -ENOENT; | ||
233 | if (!p) | 228 | if (!p) |
234 | goto out_no_task; | 229 | return -ENOENT; |
235 | retval = 0; | ||
236 | |||
237 | fd = filp->f_pos; | ||
238 | switch (fd) { | ||
239 | case 0: | ||
240 | if (filldir(dirent, ".", 1, 0, inode->i_ino, DT_DIR) < 0) | ||
241 | goto out; | ||
242 | filp->f_pos++; | ||
243 | case 1: | ||
244 | ino = parent_ino(dentry); | ||
245 | if (filldir(dirent, "..", 2, 1, ino, DT_DIR) < 0) | ||
246 | goto out; | ||
247 | filp->f_pos++; | ||
248 | default: | ||
249 | files = get_files_struct(p); | ||
250 | if (!files) | ||
251 | goto out; | ||
252 | rcu_read_lock(); | ||
253 | for (fd = filp->f_pos - 2; | ||
254 | fd < files_fdtable(files)->max_fds; | ||
255 | fd++, filp->f_pos++) { | ||
256 | char name[PROC_NUMBUF]; | ||
257 | int len; | ||
258 | int rv; | ||
259 | |||
260 | if (!fcheck_files(files, fd)) | ||
261 | continue; | ||
262 | rcu_read_unlock(); | ||
263 | 230 | ||
264 | len = snprintf(name, sizeof(name), "%d", fd); | 231 | if (!dir_emit_dots(file, ctx)) |
265 | rv = proc_fill_cache(filp, dirent, filldir, | 232 | goto out; |
266 | name, len, instantiate, p, | 233 | if (!dir_emit_dots(file, ctx)) |
267 | (void *)(unsigned long)fd); | 234 | goto out; |
268 | if (rv < 0) | 235 | files = get_files_struct(p); |
269 | goto out_fd_loop; | 236 | if (!files) |
270 | rcu_read_lock(); | 237 | goto out; |
271 | } | 238 | |
272 | rcu_read_unlock(); | 239 | rcu_read_lock(); |
273 | out_fd_loop: | 240 | for (fd = ctx->pos - 2; |
274 | put_files_struct(files); | 241 | fd < files_fdtable(files)->max_fds; |
242 | fd++, ctx->pos++) { | ||
243 | char name[PROC_NUMBUF]; | ||
244 | int len; | ||
245 | |||
246 | if (!fcheck_files(files, fd)) | ||
247 | continue; | ||
248 | rcu_read_unlock(); | ||
249 | |||
250 | len = snprintf(name, sizeof(name), "%d", fd); | ||
251 | if (!proc_fill_cache(file, ctx, | ||
252 | name, len, instantiate, p, | ||
253 | (void *)(unsigned long)fd)) | ||
254 | goto out_fd_loop; | ||
255 | rcu_read_lock(); | ||
275 | } | 256 | } |
257 | rcu_read_unlock(); | ||
258 | out_fd_loop: | ||
259 | put_files_struct(files); | ||
276 | out: | 260 | out: |
277 | put_task_struct(p); | 261 | put_task_struct(p); |
278 | out_no_task: | 262 | return 0; |
279 | return retval; | ||
280 | } | 263 | } |
281 | 264 | ||
282 | static int proc_readfd(struct file *filp, void *dirent, filldir_t filldir) | 265 | static int proc_readfd(struct file *file, struct dir_context *ctx) |
283 | { | 266 | { |
284 | return proc_readfd_common(filp, dirent, filldir, proc_fd_instantiate); | 267 | return proc_readfd_common(file, ctx, proc_fd_instantiate); |
285 | } | 268 | } |
286 | 269 | ||
287 | const struct file_operations proc_fd_operations = { | 270 | const struct file_operations proc_fd_operations = { |
288 | .read = generic_read_dir, | 271 | .read = generic_read_dir, |
289 | .readdir = proc_readfd, | 272 | .iterate = proc_readfd, |
290 | .llseek = default_llseek, | 273 | .llseek = default_llseek, |
291 | }; | 274 | }; |
292 | 275 | ||
@@ -316,11 +299,10 @@ const struct inode_operations proc_fd_inode_operations = { | |||
316 | .setattr = proc_setattr, | 299 | .setattr = proc_setattr, |
317 | }; | 300 | }; |
318 | 301 | ||
319 | static struct dentry * | 302 | static int |
320 | proc_fdinfo_instantiate(struct inode *dir, struct dentry *dentry, | 303 | proc_fdinfo_instantiate(struct inode *dir, struct dentry *dentry, |
321 | struct task_struct *task, const void *ptr) | 304 | struct task_struct *task, const void *ptr) |
322 | { | 305 | { |
323 | struct dentry *error = ERR_PTR(-ENOENT); | ||
324 | unsigned fd = (unsigned long)ptr; | 306 | unsigned fd = (unsigned long)ptr; |
325 | struct proc_inode *ei; | 307 | struct proc_inode *ei; |
326 | struct inode *inode; | 308 | struct inode *inode; |
@@ -340,9 +322,9 @@ proc_fdinfo_instantiate(struct inode *dir, struct dentry *dentry, | |||
340 | 322 | ||
341 | /* Close the race of the process dying before we return the dentry */ | 323 | /* Close the race of the process dying before we return the dentry */ |
342 | if (tid_fd_revalidate(dentry, 0)) | 324 | if (tid_fd_revalidate(dentry, 0)) |
343 | error = NULL; | 325 | return 0; |
344 | out: | 326 | out: |
345 | return error; | 327 | return -ENOENT; |
346 | } | 328 | } |
347 | 329 | ||
348 | static struct dentry * | 330 | static struct dentry * |
@@ -351,9 +333,9 @@ proc_lookupfdinfo(struct inode *dir, struct dentry *dentry, unsigned int flags) | |||
351 | return proc_lookupfd_common(dir, dentry, proc_fdinfo_instantiate); | 333 | return proc_lookupfd_common(dir, dentry, proc_fdinfo_instantiate); |
352 | } | 334 | } |
353 | 335 | ||
354 | static int proc_readfdinfo(struct file *filp, void *dirent, filldir_t filldir) | 336 | static int proc_readfdinfo(struct file *file, struct dir_context *ctx) |
355 | { | 337 | { |
356 | return proc_readfd_common(filp, dirent, filldir, | 338 | return proc_readfd_common(file, ctx, |
357 | proc_fdinfo_instantiate); | 339 | proc_fdinfo_instantiate); |
358 | } | 340 | } |
359 | 341 | ||
@@ -364,6 +346,6 @@ const struct inode_operations proc_fdinfo_inode_operations = { | |||
364 | 346 | ||
365 | const struct file_operations proc_fdinfo_operations = { | 347 | const struct file_operations proc_fdinfo_operations = { |
366 | .read = generic_read_dir, | 348 | .read = generic_read_dir, |
367 | .readdir = proc_readfdinfo, | 349 | .iterate = proc_readfdinfo, |
368 | .llseek = default_llseek, | 350 | .llseek = default_llseek, |
369 | }; | 351 | }; |
diff --git a/fs/proc/generic.c b/fs/proc/generic.c index a2596afffae6..94441a407337 100644 --- a/fs/proc/generic.c +++ b/fs/proc/generic.c | |||
@@ -233,76 +233,52 @@ struct dentry *proc_lookup(struct inode *dir, struct dentry *dentry, | |||
233 | * value of the readdir() call, as long as it's non-negative | 233 | * value of the readdir() call, as long as it's non-negative |
234 | * for success.. | 234 | * for success.. |
235 | */ | 235 | */ |
236 | int proc_readdir_de(struct proc_dir_entry *de, struct file *filp, void *dirent, | 236 | int proc_readdir_de(struct proc_dir_entry *de, struct file *file, |
237 | filldir_t filldir) | 237 | struct dir_context *ctx) |
238 | { | 238 | { |
239 | unsigned int ino; | ||
240 | int i; | 239 | int i; |
241 | struct inode *inode = file_inode(filp); | ||
242 | int ret = 0; | ||
243 | |||
244 | ino = inode->i_ino; | ||
245 | i = filp->f_pos; | ||
246 | switch (i) { | ||
247 | case 0: | ||
248 | if (filldir(dirent, ".", 1, i, ino, DT_DIR) < 0) | ||
249 | goto out; | ||
250 | i++; | ||
251 | filp->f_pos++; | ||
252 | /* fall through */ | ||
253 | case 1: | ||
254 | if (filldir(dirent, "..", 2, i, | ||
255 | parent_ino(filp->f_path.dentry), | ||
256 | DT_DIR) < 0) | ||
257 | goto out; | ||
258 | i++; | ||
259 | filp->f_pos++; | ||
260 | /* fall through */ | ||
261 | default: | ||
262 | spin_lock(&proc_subdir_lock); | ||
263 | de = de->subdir; | ||
264 | i -= 2; | ||
265 | for (;;) { | ||
266 | if (!de) { | ||
267 | ret = 1; | ||
268 | spin_unlock(&proc_subdir_lock); | ||
269 | goto out; | ||
270 | } | ||
271 | if (!i) | ||
272 | break; | ||
273 | de = de->next; | ||
274 | i--; | ||
275 | } | ||
276 | 240 | ||
277 | do { | 241 | if (!dir_emit_dots(file, ctx)) |
278 | struct proc_dir_entry *next; | 242 | return 0; |
279 | 243 | ||
280 | /* filldir passes info to user space */ | 244 | spin_lock(&proc_subdir_lock); |
281 | pde_get(de); | 245 | de = de->subdir; |
282 | spin_unlock(&proc_subdir_lock); | 246 | i = ctx->pos - 2; |
283 | if (filldir(dirent, de->name, de->namelen, filp->f_pos, | 247 | for (;;) { |
284 | de->low_ino, de->mode >> 12) < 0) { | 248 | if (!de) { |
285 | pde_put(de); | ||
286 | goto out; | ||
287 | } | ||
288 | spin_lock(&proc_subdir_lock); | ||
289 | filp->f_pos++; | ||
290 | next = de->next; | ||
291 | pde_put(de); | ||
292 | de = next; | ||
293 | } while (de); | ||
294 | spin_unlock(&proc_subdir_lock); | 249 | spin_unlock(&proc_subdir_lock); |
250 | return 0; | ||
251 | } | ||
252 | if (!i) | ||
253 | break; | ||
254 | de = de->next; | ||
255 | i--; | ||
295 | } | 256 | } |
296 | ret = 1; | 257 | |
297 | out: | 258 | do { |
298 | return ret; | 259 | struct proc_dir_entry *next; |
260 | pde_get(de); | ||
261 | spin_unlock(&proc_subdir_lock); | ||
262 | if (!dir_emit(ctx, de->name, de->namelen, | ||
263 | de->low_ino, de->mode >> 12)) { | ||
264 | pde_put(de); | ||
265 | return 0; | ||
266 | } | ||
267 | spin_lock(&proc_subdir_lock); | ||
268 | ctx->pos++; | ||
269 | next = de->next; | ||
270 | pde_put(de); | ||
271 | de = next; | ||
272 | } while (de); | ||
273 | spin_unlock(&proc_subdir_lock); | ||
274 | return 0; | ||
299 | } | 275 | } |
300 | 276 | ||
301 | int proc_readdir(struct file *filp, void *dirent, filldir_t filldir) | 277 | int proc_readdir(struct file *file, struct dir_context *ctx) |
302 | { | 278 | { |
303 | struct inode *inode = file_inode(filp); | 279 | struct inode *inode = file_inode(file); |
304 | 280 | ||
305 | return proc_readdir_de(PDE(inode), filp, dirent, filldir); | 281 | return proc_readdir_de(PDE(inode), file, ctx); |
306 | } | 282 | } |
307 | 283 | ||
308 | /* | 284 | /* |
@@ -313,7 +289,7 @@ int proc_readdir(struct file *filp, void *dirent, filldir_t filldir) | |||
313 | static const struct file_operations proc_dir_operations = { | 289 | static const struct file_operations proc_dir_operations = { |
314 | .llseek = generic_file_llseek, | 290 | .llseek = generic_file_llseek, |
315 | .read = generic_read_dir, | 291 | .read = generic_read_dir, |
316 | .readdir = proc_readdir, | 292 | .iterate = proc_readdir, |
317 | }; | 293 | }; |
318 | 294 | ||
319 | /* | 295 | /* |
diff --git a/fs/proc/internal.h b/fs/proc/internal.h index d600fb098b6a..651d09a11dde 100644 --- a/fs/proc/internal.h +++ b/fs/proc/internal.h | |||
@@ -165,14 +165,14 @@ extern int proc_setattr(struct dentry *, struct iattr *); | |||
165 | extern struct inode *proc_pid_make_inode(struct super_block *, struct task_struct *); | 165 | extern struct inode *proc_pid_make_inode(struct super_block *, struct task_struct *); |
166 | extern int pid_revalidate(struct dentry *, unsigned int); | 166 | extern int pid_revalidate(struct dentry *, unsigned int); |
167 | extern int pid_delete_dentry(const struct dentry *); | 167 | extern int pid_delete_dentry(const struct dentry *); |
168 | extern int proc_pid_readdir(struct file *, void *, filldir_t); | 168 | extern int proc_pid_readdir(struct file *, struct dir_context *); |
169 | extern struct dentry *proc_pid_lookup(struct inode *, struct dentry *, unsigned int); | 169 | extern struct dentry *proc_pid_lookup(struct inode *, struct dentry *, unsigned int); |
170 | extern loff_t mem_lseek(struct file *, loff_t, int); | 170 | extern loff_t mem_lseek(struct file *, loff_t, int); |
171 | 171 | ||
172 | /* Lookups */ | 172 | /* Lookups */ |
173 | typedef struct dentry *instantiate_t(struct inode *, struct dentry *, | 173 | typedef int instantiate_t(struct inode *, struct dentry *, |
174 | struct task_struct *, const void *); | 174 | struct task_struct *, const void *); |
175 | extern int proc_fill_cache(struct file *, void *, filldir_t, const char *, int, | 175 | extern bool proc_fill_cache(struct file *, struct dir_context *, const char *, int, |
176 | instantiate_t, struct task_struct *, const void *); | 176 | instantiate_t, struct task_struct *, const void *); |
177 | 177 | ||
178 | /* | 178 | /* |
@@ -183,8 +183,8 @@ extern spinlock_t proc_subdir_lock; | |||
183 | extern struct dentry *proc_lookup(struct inode *, struct dentry *, unsigned int); | 183 | extern struct dentry *proc_lookup(struct inode *, struct dentry *, unsigned int); |
184 | extern struct dentry *proc_lookup_de(struct proc_dir_entry *, struct inode *, | 184 | extern struct dentry *proc_lookup_de(struct proc_dir_entry *, struct inode *, |
185 | struct dentry *); | 185 | struct dentry *); |
186 | extern int proc_readdir(struct file *, void *, filldir_t); | 186 | extern int proc_readdir(struct file *, struct dir_context *); |
187 | extern int proc_readdir_de(struct proc_dir_entry *, struct file *, void *, filldir_t); | 187 | extern int proc_readdir_de(struct proc_dir_entry *, struct file *, struct dir_context *); |
188 | 188 | ||
189 | static inline struct proc_dir_entry *pde_get(struct proc_dir_entry *pde) | 189 | static inline struct proc_dir_entry *pde_get(struct proc_dir_entry *pde) |
190 | { | 190 | { |
diff --git a/fs/proc/kcore.c b/fs/proc/kcore.c index 0a22194e5d58..06ea155e1a59 100644 --- a/fs/proc/kcore.c +++ b/fs/proc/kcore.c | |||
@@ -408,7 +408,7 @@ static void elf_kcore_store_hdr(char *bufp, int nphdr, int dataoff) | |||
408 | prpsinfo.pr_zomb = 0; | 408 | prpsinfo.pr_zomb = 0; |
409 | 409 | ||
410 | strcpy(prpsinfo.pr_fname, "vmlinux"); | 410 | strcpy(prpsinfo.pr_fname, "vmlinux"); |
411 | strncpy(prpsinfo.pr_psargs, saved_command_line, ELF_PRARGSZ); | 411 | strlcpy(prpsinfo.pr_psargs, saved_command_line, sizeof(prpsinfo.pr_psargs)); |
412 | 412 | ||
413 | nhdr->p_filesz += notesize(¬es[1]); | 413 | nhdr->p_filesz += notesize(¬es[1]); |
414 | bufp = storenote(¬es[1], bufp); | 414 | bufp = storenote(¬es[1], bufp); |
diff --git a/fs/proc/namespaces.c b/fs/proc/namespaces.c index 54bdc6701e9f..49a7fff2e83a 100644 --- a/fs/proc/namespaces.c +++ b/fs/proc/namespaces.c | |||
@@ -187,13 +187,12 @@ static const struct inode_operations proc_ns_link_inode_operations = { | |||
187 | .setattr = proc_setattr, | 187 | .setattr = proc_setattr, |
188 | }; | 188 | }; |
189 | 189 | ||
190 | static struct dentry *proc_ns_instantiate(struct inode *dir, | 190 | static int proc_ns_instantiate(struct inode *dir, |
191 | struct dentry *dentry, struct task_struct *task, const void *ptr) | 191 | struct dentry *dentry, struct task_struct *task, const void *ptr) |
192 | { | 192 | { |
193 | const struct proc_ns_operations *ns_ops = ptr; | 193 | const struct proc_ns_operations *ns_ops = ptr; |
194 | struct inode *inode; | 194 | struct inode *inode; |
195 | struct proc_inode *ei; | 195 | struct proc_inode *ei; |
196 | struct dentry *error = ERR_PTR(-ENOENT); | ||
197 | 196 | ||
198 | inode = proc_pid_make_inode(dir->i_sb, task); | 197 | inode = proc_pid_make_inode(dir->i_sb, task); |
199 | if (!inode) | 198 | if (!inode) |
@@ -208,90 +207,52 @@ static struct dentry *proc_ns_instantiate(struct inode *dir, | |||
208 | d_add(dentry, inode); | 207 | d_add(dentry, inode); |
209 | /* Close the race of the process dying before we return the dentry */ | 208 | /* Close the race of the process dying before we return the dentry */ |
210 | if (pid_revalidate(dentry, 0)) | 209 | if (pid_revalidate(dentry, 0)) |
211 | error = NULL; | 210 | return 0; |
212 | out: | 211 | out: |
213 | return error; | 212 | return -ENOENT; |
214 | } | ||
215 | |||
216 | static int proc_ns_fill_cache(struct file *filp, void *dirent, | ||
217 | filldir_t filldir, struct task_struct *task, | ||
218 | const struct proc_ns_operations *ops) | ||
219 | { | ||
220 | return proc_fill_cache(filp, dirent, filldir, | ||
221 | ops->name, strlen(ops->name), | ||
222 | proc_ns_instantiate, task, ops); | ||
223 | } | 213 | } |
224 | 214 | ||
225 | static int proc_ns_dir_readdir(struct file *filp, void *dirent, | 215 | static int proc_ns_dir_readdir(struct file *file, struct dir_context *ctx) |
226 | filldir_t filldir) | ||
227 | { | 216 | { |
228 | int i; | 217 | struct task_struct *task = get_proc_task(file_inode(file)); |
229 | struct dentry *dentry = filp->f_path.dentry; | ||
230 | struct inode *inode = dentry->d_inode; | ||
231 | struct task_struct *task = get_proc_task(inode); | ||
232 | const struct proc_ns_operations **entry, **last; | 218 | const struct proc_ns_operations **entry, **last; |
233 | ino_t ino; | ||
234 | int ret; | ||
235 | 219 | ||
236 | ret = -ENOENT; | ||
237 | if (!task) | 220 | if (!task) |
238 | goto out_no_task; | 221 | return -ENOENT; |
239 | 222 | ||
240 | ret = 0; | 223 | if (!dir_emit_dots(file, ctx)) |
241 | i = filp->f_pos; | 224 | goto out; |
242 | switch (i) { | 225 | if (ctx->pos >= 2 + ARRAY_SIZE(ns_entries)) |
243 | case 0: | 226 | goto out; |
244 | ino = inode->i_ino; | 227 | entry = ns_entries + (ctx->pos - 2); |
245 | if (filldir(dirent, ".", 1, i, ino, DT_DIR) < 0) | 228 | last = &ns_entries[ARRAY_SIZE(ns_entries) - 1]; |
246 | goto out; | 229 | while (entry <= last) { |
247 | i++; | 230 | const struct proc_ns_operations *ops = *entry; |
248 | filp->f_pos++; | 231 | if (!proc_fill_cache(file, ctx, ops->name, strlen(ops->name), |
249 | /* fall through */ | 232 | proc_ns_instantiate, task, ops)) |
250 | case 1: | 233 | break; |
251 | ino = parent_ino(dentry); | 234 | ctx->pos++; |
252 | if (filldir(dirent, "..", 2, i, ino, DT_DIR) < 0) | 235 | entry++; |
253 | goto out; | ||
254 | i++; | ||
255 | filp->f_pos++; | ||
256 | /* fall through */ | ||
257 | default: | ||
258 | i -= 2; | ||
259 | if (i >= ARRAY_SIZE(ns_entries)) { | ||
260 | ret = 1; | ||
261 | goto out; | ||
262 | } | ||
263 | entry = ns_entries + i; | ||
264 | last = &ns_entries[ARRAY_SIZE(ns_entries) - 1]; | ||
265 | while (entry <= last) { | ||
266 | if (proc_ns_fill_cache(filp, dirent, filldir, | ||
267 | task, *entry) < 0) | ||
268 | goto out; | ||
269 | filp->f_pos++; | ||
270 | entry++; | ||
271 | } | ||
272 | } | 236 | } |
273 | |||
274 | ret = 1; | ||
275 | out: | 237 | out: |
276 | put_task_struct(task); | 238 | put_task_struct(task); |
277 | out_no_task: | 239 | return 0; |
278 | return ret; | ||
279 | } | 240 | } |
280 | 241 | ||
281 | const struct file_operations proc_ns_dir_operations = { | 242 | const struct file_operations proc_ns_dir_operations = { |
282 | .read = generic_read_dir, | 243 | .read = generic_read_dir, |
283 | .readdir = proc_ns_dir_readdir, | 244 | .iterate = proc_ns_dir_readdir, |
284 | }; | 245 | }; |
285 | 246 | ||
286 | static struct dentry *proc_ns_dir_lookup(struct inode *dir, | 247 | static struct dentry *proc_ns_dir_lookup(struct inode *dir, |
287 | struct dentry *dentry, unsigned int flags) | 248 | struct dentry *dentry, unsigned int flags) |
288 | { | 249 | { |
289 | struct dentry *error; | 250 | int error; |
290 | struct task_struct *task = get_proc_task(dir); | 251 | struct task_struct *task = get_proc_task(dir); |
291 | const struct proc_ns_operations **entry, **last; | 252 | const struct proc_ns_operations **entry, **last; |
292 | unsigned int len = dentry->d_name.len; | 253 | unsigned int len = dentry->d_name.len; |
293 | 254 | ||
294 | error = ERR_PTR(-ENOENT); | 255 | error = -ENOENT; |
295 | 256 | ||
296 | if (!task) | 257 | if (!task) |
297 | goto out_no_task; | 258 | goto out_no_task; |
@@ -310,7 +271,7 @@ static struct dentry *proc_ns_dir_lookup(struct inode *dir, | |||
310 | out: | 271 | out: |
311 | put_task_struct(task); | 272 | put_task_struct(task); |
312 | out_no_task: | 273 | out_no_task: |
313 | return error; | 274 | return ERR_PTR(error); |
314 | } | 275 | } |
315 | 276 | ||
316 | const struct inode_operations proc_ns_dir_inode_operations = { | 277 | const struct inode_operations proc_ns_dir_inode_operations = { |
diff --git a/fs/proc/proc_net.c b/fs/proc/proc_net.c index 986e83220d56..4677bb7dc7c2 100644 --- a/fs/proc/proc_net.c +++ b/fs/proc/proc_net.c | |||
@@ -160,16 +160,15 @@ const struct inode_operations proc_net_inode_operations = { | |||
160 | .getattr = proc_tgid_net_getattr, | 160 | .getattr = proc_tgid_net_getattr, |
161 | }; | 161 | }; |
162 | 162 | ||
163 | static int proc_tgid_net_readdir(struct file *filp, void *dirent, | 163 | static int proc_tgid_net_readdir(struct file *file, struct dir_context *ctx) |
164 | filldir_t filldir) | ||
165 | { | 164 | { |
166 | int ret; | 165 | int ret; |
167 | struct net *net; | 166 | struct net *net; |
168 | 167 | ||
169 | ret = -EINVAL; | 168 | ret = -EINVAL; |
170 | net = get_proc_task_net(file_inode(filp)); | 169 | net = get_proc_task_net(file_inode(file)); |
171 | if (net != NULL) { | 170 | if (net != NULL) { |
172 | ret = proc_readdir_de(net->proc_net, filp, dirent, filldir); | 171 | ret = proc_readdir_de(net->proc_net, file, ctx); |
173 | put_net(net); | 172 | put_net(net); |
174 | } | 173 | } |
175 | return ret; | 174 | return ret; |
@@ -178,7 +177,7 @@ static int proc_tgid_net_readdir(struct file *filp, void *dirent, | |||
178 | const struct file_operations proc_net_operations = { | 177 | const struct file_operations proc_net_operations = { |
179 | .llseek = generic_file_llseek, | 178 | .llseek = generic_file_llseek, |
180 | .read = generic_read_dir, | 179 | .read = generic_read_dir, |
181 | .readdir = proc_tgid_net_readdir, | 180 | .iterate = proc_tgid_net_readdir, |
182 | }; | 181 | }; |
183 | 182 | ||
184 | static __net_init int proc_net_ns_init(struct net *net) | 183 | static __net_init int proc_net_ns_init(struct net *net) |
diff --git a/fs/proc/proc_sysctl.c b/fs/proc/proc_sysctl.c index ac05f33a0dde..71290463a1d3 100644 --- a/fs/proc/proc_sysctl.c +++ b/fs/proc/proc_sysctl.c | |||
@@ -573,12 +573,12 @@ out: | |||
573 | return ret; | 573 | return ret; |
574 | } | 574 | } |
575 | 575 | ||
576 | static int proc_sys_fill_cache(struct file *filp, void *dirent, | 576 | static bool proc_sys_fill_cache(struct file *file, |
577 | filldir_t filldir, | 577 | struct dir_context *ctx, |
578 | struct ctl_table_header *head, | 578 | struct ctl_table_header *head, |
579 | struct ctl_table *table) | 579 | struct ctl_table *table) |
580 | { | 580 | { |
581 | struct dentry *child, *dir = filp->f_path.dentry; | 581 | struct dentry *child, *dir = file->f_path.dentry; |
582 | struct inode *inode; | 582 | struct inode *inode; |
583 | struct qstr qname; | 583 | struct qstr qname; |
584 | ino_t ino = 0; | 584 | ino_t ino = 0; |
@@ -595,38 +595,38 @@ static int proc_sys_fill_cache(struct file *filp, void *dirent, | |||
595 | inode = proc_sys_make_inode(dir->d_sb, head, table); | 595 | inode = proc_sys_make_inode(dir->d_sb, head, table); |
596 | if (!inode) { | 596 | if (!inode) { |
597 | dput(child); | 597 | dput(child); |
598 | return -ENOMEM; | 598 | return false; |
599 | } else { | 599 | } else { |
600 | d_set_d_op(child, &proc_sys_dentry_operations); | 600 | d_set_d_op(child, &proc_sys_dentry_operations); |
601 | d_add(child, inode); | 601 | d_add(child, inode); |
602 | } | 602 | } |
603 | } else { | 603 | } else { |
604 | return -ENOMEM; | 604 | return false; |
605 | } | 605 | } |
606 | } | 606 | } |
607 | inode = child->d_inode; | 607 | inode = child->d_inode; |
608 | ino = inode->i_ino; | 608 | ino = inode->i_ino; |
609 | type = inode->i_mode >> 12; | 609 | type = inode->i_mode >> 12; |
610 | dput(child); | 610 | dput(child); |
611 | return !!filldir(dirent, qname.name, qname.len, filp->f_pos, ino, type); | 611 | return dir_emit(ctx, qname.name, qname.len, ino, type); |
612 | } | 612 | } |
613 | 613 | ||
614 | static int proc_sys_link_fill_cache(struct file *filp, void *dirent, | 614 | static bool proc_sys_link_fill_cache(struct file *file, |
615 | filldir_t filldir, | 615 | struct dir_context *ctx, |
616 | struct ctl_table_header *head, | 616 | struct ctl_table_header *head, |
617 | struct ctl_table *table) | 617 | struct ctl_table *table) |
618 | { | 618 | { |
619 | int err, ret = 0; | 619 | bool ret = true; |
620 | head = sysctl_head_grab(head); | 620 | head = sysctl_head_grab(head); |
621 | 621 | ||
622 | if (S_ISLNK(table->mode)) { | 622 | if (S_ISLNK(table->mode)) { |
623 | /* It is not an error if we can not follow the link ignore it */ | 623 | /* It is not an error if we can not follow the link ignore it */ |
624 | err = sysctl_follow_link(&head, &table, current->nsproxy); | 624 | int err = sysctl_follow_link(&head, &table, current->nsproxy); |
625 | if (err) | 625 | if (err) |
626 | goto out; | 626 | goto out; |
627 | } | 627 | } |
628 | 628 | ||
629 | ret = proc_sys_fill_cache(filp, dirent, filldir, head, table); | 629 | ret = proc_sys_fill_cache(file, ctx, head, table); |
630 | out: | 630 | out: |
631 | sysctl_head_finish(head); | 631 | sysctl_head_finish(head); |
632 | return ret; | 632 | return ret; |
@@ -634,67 +634,50 @@ out: | |||
634 | 634 | ||
635 | static int scan(struct ctl_table_header *head, ctl_table *table, | 635 | static int scan(struct ctl_table_header *head, ctl_table *table, |
636 | unsigned long *pos, struct file *file, | 636 | unsigned long *pos, struct file *file, |
637 | void *dirent, filldir_t filldir) | 637 | struct dir_context *ctx) |
638 | { | 638 | { |
639 | int res; | 639 | bool res; |
640 | 640 | ||
641 | if ((*pos)++ < file->f_pos) | 641 | if ((*pos)++ < ctx->pos) |
642 | return 0; | 642 | return true; |
643 | 643 | ||
644 | if (unlikely(S_ISLNK(table->mode))) | 644 | if (unlikely(S_ISLNK(table->mode))) |
645 | res = proc_sys_link_fill_cache(file, dirent, filldir, head, table); | 645 | res = proc_sys_link_fill_cache(file, ctx, head, table); |
646 | else | 646 | else |
647 | res = proc_sys_fill_cache(file, dirent, filldir, head, table); | 647 | res = proc_sys_fill_cache(file, ctx, head, table); |
648 | 648 | ||
649 | if (res == 0) | 649 | if (res) |
650 | file->f_pos = *pos; | 650 | ctx->pos = *pos; |
651 | 651 | ||
652 | return res; | 652 | return res; |
653 | } | 653 | } |
654 | 654 | ||
655 | static int proc_sys_readdir(struct file *filp, void *dirent, filldir_t filldir) | 655 | static int proc_sys_readdir(struct file *file, struct dir_context *ctx) |
656 | { | 656 | { |
657 | struct dentry *dentry = filp->f_path.dentry; | 657 | struct ctl_table_header *head = grab_header(file_inode(file)); |
658 | struct inode *inode = dentry->d_inode; | ||
659 | struct ctl_table_header *head = grab_header(inode); | ||
660 | struct ctl_table_header *h = NULL; | 658 | struct ctl_table_header *h = NULL; |
661 | struct ctl_table *entry; | 659 | struct ctl_table *entry; |
662 | struct ctl_dir *ctl_dir; | 660 | struct ctl_dir *ctl_dir; |
663 | unsigned long pos; | 661 | unsigned long pos; |
664 | int ret = -EINVAL; | ||
665 | 662 | ||
666 | if (IS_ERR(head)) | 663 | if (IS_ERR(head)) |
667 | return PTR_ERR(head); | 664 | return PTR_ERR(head); |
668 | 665 | ||
669 | ctl_dir = container_of(head, struct ctl_dir, header); | 666 | ctl_dir = container_of(head, struct ctl_dir, header); |
670 | 667 | ||
671 | ret = 0; | 668 | if (!dir_emit_dots(file, ctx)) |
672 | /* Avoid a switch here: arm builds fail with missing __cmpdi2 */ | 669 | return 0; |
673 | if (filp->f_pos == 0) { | 670 | |
674 | if (filldir(dirent, ".", 1, filp->f_pos, | ||
675 | inode->i_ino, DT_DIR) < 0) | ||
676 | goto out; | ||
677 | filp->f_pos++; | ||
678 | } | ||
679 | if (filp->f_pos == 1) { | ||
680 | if (filldir(dirent, "..", 2, filp->f_pos, | ||
681 | parent_ino(dentry), DT_DIR) < 0) | ||
682 | goto out; | ||
683 | filp->f_pos++; | ||
684 | } | ||
685 | pos = 2; | 671 | pos = 2; |
686 | 672 | ||
687 | for (first_entry(ctl_dir, &h, &entry); h; next_entry(&h, &entry)) { | 673 | for (first_entry(ctl_dir, &h, &entry); h; next_entry(&h, &entry)) { |
688 | ret = scan(h, entry, &pos, filp, dirent, filldir); | 674 | if (!scan(h, entry, &pos, file, ctx)) { |
689 | if (ret) { | ||
690 | sysctl_head_finish(h); | 675 | sysctl_head_finish(h); |
691 | break; | 676 | break; |
692 | } | 677 | } |
693 | } | 678 | } |
694 | ret = 1; | ||
695 | out: | ||
696 | sysctl_head_finish(head); | 679 | sysctl_head_finish(head); |
697 | return ret; | 680 | return 0; |
698 | } | 681 | } |
699 | 682 | ||
700 | static int proc_sys_permission(struct inode *inode, int mask) | 683 | static int proc_sys_permission(struct inode *inode, int mask) |
@@ -769,7 +752,7 @@ static const struct file_operations proc_sys_file_operations = { | |||
769 | 752 | ||
770 | static const struct file_operations proc_sys_dir_file_operations = { | 753 | static const struct file_operations proc_sys_dir_file_operations = { |
771 | .read = generic_read_dir, | 754 | .read = generic_read_dir, |
772 | .readdir = proc_sys_readdir, | 755 | .iterate = proc_sys_readdir, |
773 | .llseek = generic_file_llseek, | 756 | .llseek = generic_file_llseek, |
774 | }; | 757 | }; |
775 | 758 | ||
@@ -813,15 +796,16 @@ static int sysctl_is_seen(struct ctl_table_header *p) | |||
813 | return res; | 796 | return res; |
814 | } | 797 | } |
815 | 798 | ||
816 | static int proc_sys_compare(const struct dentry *parent, | 799 | static int proc_sys_compare(const struct dentry *parent, const struct dentry *dentry, |
817 | const struct inode *pinode, | ||
818 | const struct dentry *dentry, const struct inode *inode, | ||
819 | unsigned int len, const char *str, const struct qstr *name) | 800 | unsigned int len, const char *str, const struct qstr *name) |
820 | { | 801 | { |
821 | struct ctl_table_header *head; | 802 | struct ctl_table_header *head; |
803 | struct inode *inode; | ||
804 | |||
822 | /* Although proc doesn't have negative dentries, rcu-walk means | 805 | /* Although proc doesn't have negative dentries, rcu-walk means |
823 | * that inode here can be NULL */ | 806 | * that inode here can be NULL */ |
824 | /* AV: can it, indeed? */ | 807 | /* AV: can it, indeed? */ |
808 | inode = ACCESS_ONCE(dentry->d_inode); | ||
825 | if (!inode) | 809 | if (!inode) |
826 | return 1; | 810 | return 1; |
827 | if (name->len != len) | 811 | if (name->len != len) |
diff --git a/fs/proc/root.c b/fs/proc/root.c index 41a6ea93f486..229e366598da 100644 --- a/fs/proc/root.c +++ b/fs/proc/root.c | |||
@@ -202,21 +202,14 @@ static struct dentry *proc_root_lookup(struct inode * dir, struct dentry * dentr | |||
202 | return proc_pid_lookup(dir, dentry, flags); | 202 | return proc_pid_lookup(dir, dentry, flags); |
203 | } | 203 | } |
204 | 204 | ||
205 | static int proc_root_readdir(struct file * filp, | 205 | static int proc_root_readdir(struct file *file, struct dir_context *ctx) |
206 | void * dirent, filldir_t filldir) | ||
207 | { | 206 | { |
208 | unsigned int nr = filp->f_pos; | 207 | if (ctx->pos < FIRST_PROCESS_ENTRY) { |
209 | int ret; | 208 | proc_readdir(file, ctx); |
210 | 209 | ctx->pos = FIRST_PROCESS_ENTRY; | |
211 | if (nr < FIRST_PROCESS_ENTRY) { | ||
212 | int error = proc_readdir(filp, dirent, filldir); | ||
213 | if (error <= 0) | ||
214 | return error; | ||
215 | filp->f_pos = FIRST_PROCESS_ENTRY; | ||
216 | } | 210 | } |
217 | 211 | ||
218 | ret = proc_pid_readdir(filp, dirent, filldir); | 212 | return proc_pid_readdir(file, ctx); |
219 | return ret; | ||
220 | } | 213 | } |
221 | 214 | ||
222 | /* | 215 | /* |
@@ -226,7 +219,7 @@ static int proc_root_readdir(struct file * filp, | |||
226 | */ | 219 | */ |
227 | static const struct file_operations proc_root_operations = { | 220 | static const struct file_operations proc_root_operations = { |
228 | .read = generic_read_dir, | 221 | .read = generic_read_dir, |
229 | .readdir = proc_root_readdir, | 222 | .iterate = proc_root_readdir, |
230 | .llseek = default_llseek, | 223 | .llseek = default_llseek, |
231 | }; | 224 | }; |
232 | 225 | ||
diff --git a/fs/proc/task_mmu.c b/fs/proc/task_mmu.c index 3e636d864d56..107d026f5d6e 100644 --- a/fs/proc/task_mmu.c +++ b/fs/proc/task_mmu.c | |||
@@ -11,6 +11,7 @@ | |||
11 | #include <linux/rmap.h> | 11 | #include <linux/rmap.h> |
12 | #include <linux/swap.h> | 12 | #include <linux/swap.h> |
13 | #include <linux/swapops.h> | 13 | #include <linux/swapops.h> |
14 | #include <linux/mmu_notifier.h> | ||
14 | 15 | ||
15 | #include <asm/elf.h> | 16 | #include <asm/elf.h> |
16 | #include <asm/uaccess.h> | 17 | #include <asm/uaccess.h> |
@@ -688,10 +689,66 @@ const struct file_operations proc_tid_smaps_operations = { | |||
688 | .release = seq_release_private, | 689 | .release = seq_release_private, |
689 | }; | 690 | }; |
690 | 691 | ||
692 | /* | ||
693 | * We do not want to have constant page-shift bits sitting in | ||
694 | * pagemap entries and are about to reuse them some time soon. | ||
695 | * | ||
696 | * Here's the "migration strategy": | ||
697 | * 1. when the system boots these bits remain what they are, | ||
698 | * but a warning about future change is printed in log; | ||
699 | * 2. once anyone clears soft-dirty bits via clear_refs file, | ||
700 | * these flag is set to denote, that user is aware of the | ||
701 | * new API and those page-shift bits change their meaning. | ||
702 | * The respective warning is printed in dmesg; | ||
703 | * 3. In a couple of releases we will remove all the mentions | ||
704 | * of page-shift in pagemap entries. | ||
705 | */ | ||
706 | |||
707 | static bool soft_dirty_cleared __read_mostly; | ||
708 | |||
709 | enum clear_refs_types { | ||
710 | CLEAR_REFS_ALL = 1, | ||
711 | CLEAR_REFS_ANON, | ||
712 | CLEAR_REFS_MAPPED, | ||
713 | CLEAR_REFS_SOFT_DIRTY, | ||
714 | CLEAR_REFS_LAST, | ||
715 | }; | ||
716 | |||
717 | struct clear_refs_private { | ||
718 | struct vm_area_struct *vma; | ||
719 | enum clear_refs_types type; | ||
720 | }; | ||
721 | |||
722 | static inline void clear_soft_dirty(struct vm_area_struct *vma, | ||
723 | unsigned long addr, pte_t *pte) | ||
724 | { | ||
725 | #ifdef CONFIG_MEM_SOFT_DIRTY | ||
726 | /* | ||
727 | * The soft-dirty tracker uses #PF-s to catch writes | ||
728 | * to pages, so write-protect the pte as well. See the | ||
729 | * Documentation/vm/soft-dirty.txt for full description | ||
730 | * of how soft-dirty works. | ||
731 | */ | ||
732 | pte_t ptent = *pte; | ||
733 | |||
734 | if (pte_present(ptent)) { | ||
735 | ptent = pte_wrprotect(ptent); | ||
736 | ptent = pte_clear_flags(ptent, _PAGE_SOFT_DIRTY); | ||
737 | } else if (is_swap_pte(ptent)) { | ||
738 | ptent = pte_swp_clear_soft_dirty(ptent); | ||
739 | } else if (pte_file(ptent)) { | ||
740 | ptent = pte_file_clear_soft_dirty(ptent); | ||
741 | } | ||
742 | |||
743 | set_pte_at(vma->vm_mm, addr, pte, ptent); | ||
744 | #endif | ||
745 | } | ||
746 | |||
691 | static int clear_refs_pte_range(pmd_t *pmd, unsigned long addr, | 747 | static int clear_refs_pte_range(pmd_t *pmd, unsigned long addr, |
692 | unsigned long end, struct mm_walk *walk) | 748 | unsigned long end, struct mm_walk *walk) |
693 | { | 749 | { |
694 | struct vm_area_struct *vma = walk->private; | 750 | struct clear_refs_private *cp = walk->private; |
751 | struct vm_area_struct *vma = cp->vma; | ||
695 | pte_t *pte, ptent; | 752 | pte_t *pte, ptent; |
696 | spinlock_t *ptl; | 753 | spinlock_t *ptl; |
697 | struct page *page; | 754 | struct page *page; |
@@ -703,6 +760,12 @@ static int clear_refs_pte_range(pmd_t *pmd, unsigned long addr, | |||
703 | pte = pte_offset_map_lock(vma->vm_mm, pmd, addr, &ptl); | 760 | pte = pte_offset_map_lock(vma->vm_mm, pmd, addr, &ptl); |
704 | for (; addr != end; pte++, addr += PAGE_SIZE) { | 761 | for (; addr != end; pte++, addr += PAGE_SIZE) { |
705 | ptent = *pte; | 762 | ptent = *pte; |
763 | |||
764 | if (cp->type == CLEAR_REFS_SOFT_DIRTY) { | ||
765 | clear_soft_dirty(vma, addr, pte); | ||
766 | continue; | ||
767 | } | ||
768 | |||
706 | if (!pte_present(ptent)) | 769 | if (!pte_present(ptent)) |
707 | continue; | 770 | continue; |
708 | 771 | ||
@@ -719,10 +782,6 @@ static int clear_refs_pte_range(pmd_t *pmd, unsigned long addr, | |||
719 | return 0; | 782 | return 0; |
720 | } | 783 | } |
721 | 784 | ||
722 | #define CLEAR_REFS_ALL 1 | ||
723 | #define CLEAR_REFS_ANON 2 | ||
724 | #define CLEAR_REFS_MAPPED 3 | ||
725 | |||
726 | static ssize_t clear_refs_write(struct file *file, const char __user *buf, | 785 | static ssize_t clear_refs_write(struct file *file, const char __user *buf, |
727 | size_t count, loff_t *ppos) | 786 | size_t count, loff_t *ppos) |
728 | { | 787 | { |
@@ -730,7 +789,8 @@ static ssize_t clear_refs_write(struct file *file, const char __user *buf, | |||
730 | char buffer[PROC_NUMBUF]; | 789 | char buffer[PROC_NUMBUF]; |
731 | struct mm_struct *mm; | 790 | struct mm_struct *mm; |
732 | struct vm_area_struct *vma; | 791 | struct vm_area_struct *vma; |
733 | int type; | 792 | enum clear_refs_types type; |
793 | int itype; | ||
734 | int rv; | 794 | int rv; |
735 | 795 | ||
736 | memset(buffer, 0, sizeof(buffer)); | 796 | memset(buffer, 0, sizeof(buffer)); |
@@ -738,23 +798,37 @@ static ssize_t clear_refs_write(struct file *file, const char __user *buf, | |||
738 | count = sizeof(buffer) - 1; | 798 | count = sizeof(buffer) - 1; |
739 | if (copy_from_user(buffer, buf, count)) | 799 | if (copy_from_user(buffer, buf, count)) |
740 | return -EFAULT; | 800 | return -EFAULT; |
741 | rv = kstrtoint(strstrip(buffer), 10, &type); | 801 | rv = kstrtoint(strstrip(buffer), 10, &itype); |
742 | if (rv < 0) | 802 | if (rv < 0) |
743 | return rv; | 803 | return rv; |
744 | if (type < CLEAR_REFS_ALL || type > CLEAR_REFS_MAPPED) | 804 | type = (enum clear_refs_types)itype; |
805 | if (type < CLEAR_REFS_ALL || type >= CLEAR_REFS_LAST) | ||
745 | return -EINVAL; | 806 | return -EINVAL; |
807 | |||
808 | if (type == CLEAR_REFS_SOFT_DIRTY) { | ||
809 | soft_dirty_cleared = true; | ||
810 | pr_warn_once("The pagemap bits 55-60 has changed their meaning! " | ||
811 | "See the linux/Documentation/vm/pagemap.txt for details.\n"); | ||
812 | } | ||
813 | |||
746 | task = get_proc_task(file_inode(file)); | 814 | task = get_proc_task(file_inode(file)); |
747 | if (!task) | 815 | if (!task) |
748 | return -ESRCH; | 816 | return -ESRCH; |
749 | mm = get_task_mm(task); | 817 | mm = get_task_mm(task); |
750 | if (mm) { | 818 | if (mm) { |
819 | struct clear_refs_private cp = { | ||
820 | .type = type, | ||
821 | }; | ||
751 | struct mm_walk clear_refs_walk = { | 822 | struct mm_walk clear_refs_walk = { |
752 | .pmd_entry = clear_refs_pte_range, | 823 | .pmd_entry = clear_refs_pte_range, |
753 | .mm = mm, | 824 | .mm = mm, |
825 | .private = &cp, | ||
754 | }; | 826 | }; |
755 | down_read(&mm->mmap_sem); | 827 | down_read(&mm->mmap_sem); |
828 | if (type == CLEAR_REFS_SOFT_DIRTY) | ||
829 | mmu_notifier_invalidate_range_start(mm, 0, -1); | ||
756 | for (vma = mm->mmap; vma; vma = vma->vm_next) { | 830 | for (vma = mm->mmap; vma; vma = vma->vm_next) { |
757 | clear_refs_walk.private = vma; | 831 | cp.vma = vma; |
758 | if (is_vm_hugetlb_page(vma)) | 832 | if (is_vm_hugetlb_page(vma)) |
759 | continue; | 833 | continue; |
760 | /* | 834 | /* |
@@ -773,6 +847,8 @@ static ssize_t clear_refs_write(struct file *file, const char __user *buf, | |||
773 | walk_page_range(vma->vm_start, vma->vm_end, | 847 | walk_page_range(vma->vm_start, vma->vm_end, |
774 | &clear_refs_walk); | 848 | &clear_refs_walk); |
775 | } | 849 | } |
850 | if (type == CLEAR_REFS_SOFT_DIRTY) | ||
851 | mmu_notifier_invalidate_range_end(mm, 0, -1); | ||
776 | flush_tlb_mm(mm); | 852 | flush_tlb_mm(mm); |
777 | up_read(&mm->mmap_sem); | 853 | up_read(&mm->mmap_sem); |
778 | mmput(mm); | 854 | mmput(mm); |
@@ -792,14 +868,15 @@ typedef struct { | |||
792 | } pagemap_entry_t; | 868 | } pagemap_entry_t; |
793 | 869 | ||
794 | struct pagemapread { | 870 | struct pagemapread { |
795 | int pos, len; | 871 | int pos, len; /* units: PM_ENTRY_BYTES, not bytes */ |
796 | pagemap_entry_t *buffer; | 872 | pagemap_entry_t *buffer; |
873 | bool v2; | ||
797 | }; | 874 | }; |
798 | 875 | ||
799 | #define PAGEMAP_WALK_SIZE (PMD_SIZE) | 876 | #define PAGEMAP_WALK_SIZE (PMD_SIZE) |
800 | #define PAGEMAP_WALK_MASK (PMD_MASK) | 877 | #define PAGEMAP_WALK_MASK (PMD_MASK) |
801 | 878 | ||
802 | #define PM_ENTRY_BYTES sizeof(u64) | 879 | #define PM_ENTRY_BYTES sizeof(pagemap_entry_t) |
803 | #define PM_STATUS_BITS 3 | 880 | #define PM_STATUS_BITS 3 |
804 | #define PM_STATUS_OFFSET (64 - PM_STATUS_BITS) | 881 | #define PM_STATUS_OFFSET (64 - PM_STATUS_BITS) |
805 | #define PM_STATUS_MASK (((1LL << PM_STATUS_BITS) - 1) << PM_STATUS_OFFSET) | 882 | #define PM_STATUS_MASK (((1LL << PM_STATUS_BITS) - 1) << PM_STATUS_OFFSET) |
@@ -807,14 +884,17 @@ struct pagemapread { | |||
807 | #define PM_PSHIFT_BITS 6 | 884 | #define PM_PSHIFT_BITS 6 |
808 | #define PM_PSHIFT_OFFSET (PM_STATUS_OFFSET - PM_PSHIFT_BITS) | 885 | #define PM_PSHIFT_OFFSET (PM_STATUS_OFFSET - PM_PSHIFT_BITS) |
809 | #define PM_PSHIFT_MASK (((1LL << PM_PSHIFT_BITS) - 1) << PM_PSHIFT_OFFSET) | 886 | #define PM_PSHIFT_MASK (((1LL << PM_PSHIFT_BITS) - 1) << PM_PSHIFT_OFFSET) |
810 | #define PM_PSHIFT(x) (((u64) (x) << PM_PSHIFT_OFFSET) & PM_PSHIFT_MASK) | 887 | #define __PM_PSHIFT(x) (((u64) (x) << PM_PSHIFT_OFFSET) & PM_PSHIFT_MASK) |
811 | #define PM_PFRAME_MASK ((1LL << PM_PSHIFT_OFFSET) - 1) | 888 | #define PM_PFRAME_MASK ((1LL << PM_PSHIFT_OFFSET) - 1) |
812 | #define PM_PFRAME(x) ((x) & PM_PFRAME_MASK) | 889 | #define PM_PFRAME(x) ((x) & PM_PFRAME_MASK) |
890 | /* in "new" pagemap pshift bits are occupied with more status bits */ | ||
891 | #define PM_STATUS2(v2, x) (__PM_PSHIFT(v2 ? x : PAGE_SHIFT)) | ||
813 | 892 | ||
893 | #define __PM_SOFT_DIRTY (1LL) | ||
814 | #define PM_PRESENT PM_STATUS(4LL) | 894 | #define PM_PRESENT PM_STATUS(4LL) |
815 | #define PM_SWAP PM_STATUS(2LL) | 895 | #define PM_SWAP PM_STATUS(2LL) |
816 | #define PM_FILE PM_STATUS(1LL) | 896 | #define PM_FILE PM_STATUS(1LL) |
817 | #define PM_NOT_PRESENT PM_PSHIFT(PAGE_SHIFT) | 897 | #define PM_NOT_PRESENT(v2) PM_STATUS2(v2, 0) |
818 | #define PM_END_OF_BUFFER 1 | 898 | #define PM_END_OF_BUFFER 1 |
819 | 899 | ||
820 | static inline pagemap_entry_t make_pme(u64 val) | 900 | static inline pagemap_entry_t make_pme(u64 val) |
@@ -837,7 +917,7 @@ static int pagemap_pte_hole(unsigned long start, unsigned long end, | |||
837 | struct pagemapread *pm = walk->private; | 917 | struct pagemapread *pm = walk->private; |
838 | unsigned long addr; | 918 | unsigned long addr; |
839 | int err = 0; | 919 | int err = 0; |
840 | pagemap_entry_t pme = make_pme(PM_NOT_PRESENT); | 920 | pagemap_entry_t pme = make_pme(PM_NOT_PRESENT(pm->v2)); |
841 | 921 | ||
842 | for (addr = start; addr < end; addr += PAGE_SIZE) { | 922 | for (addr = start; addr < end; addr += PAGE_SIZE) { |
843 | err = add_to_pagemap(addr, &pme, pm); | 923 | err = add_to_pagemap(addr, &pme, pm); |
@@ -847,38 +927,43 @@ static int pagemap_pte_hole(unsigned long start, unsigned long end, | |||
847 | return err; | 927 | return err; |
848 | } | 928 | } |
849 | 929 | ||
850 | static void pte_to_pagemap_entry(pagemap_entry_t *pme, | 930 | static void pte_to_pagemap_entry(pagemap_entry_t *pme, struct pagemapread *pm, |
851 | struct vm_area_struct *vma, unsigned long addr, pte_t pte) | 931 | struct vm_area_struct *vma, unsigned long addr, pte_t pte) |
852 | { | 932 | { |
853 | u64 frame, flags; | 933 | u64 frame, flags; |
854 | struct page *page = NULL; | 934 | struct page *page = NULL; |
935 | int flags2 = 0; | ||
855 | 936 | ||
856 | if (pte_present(pte)) { | 937 | if (pte_present(pte)) { |
857 | frame = pte_pfn(pte); | 938 | frame = pte_pfn(pte); |
858 | flags = PM_PRESENT; | 939 | flags = PM_PRESENT; |
859 | page = vm_normal_page(vma, addr, pte); | 940 | page = vm_normal_page(vma, addr, pte); |
860 | } else if (is_swap_pte(pte)) { | 941 | } else if (is_swap_pte(pte)) { |
861 | swp_entry_t entry = pte_to_swp_entry(pte); | 942 | swp_entry_t entry; |
862 | 943 | if (pte_swp_soft_dirty(pte)) | |
944 | flags2 |= __PM_SOFT_DIRTY; | ||
945 | entry = pte_to_swp_entry(pte); | ||
863 | frame = swp_type(entry) | | 946 | frame = swp_type(entry) | |
864 | (swp_offset(entry) << MAX_SWAPFILES_SHIFT); | 947 | (swp_offset(entry) << MAX_SWAPFILES_SHIFT); |
865 | flags = PM_SWAP; | 948 | flags = PM_SWAP; |
866 | if (is_migration_entry(entry)) | 949 | if (is_migration_entry(entry)) |
867 | page = migration_entry_to_page(entry); | 950 | page = migration_entry_to_page(entry); |
868 | } else { | 951 | } else { |
869 | *pme = make_pme(PM_NOT_PRESENT); | 952 | *pme = make_pme(PM_NOT_PRESENT(pm->v2)); |
870 | return; | 953 | return; |
871 | } | 954 | } |
872 | 955 | ||
873 | if (page && !PageAnon(page)) | 956 | if (page && !PageAnon(page)) |
874 | flags |= PM_FILE; | 957 | flags |= PM_FILE; |
958 | if (pte_soft_dirty(pte)) | ||
959 | flags2 |= __PM_SOFT_DIRTY; | ||
875 | 960 | ||
876 | *pme = make_pme(PM_PFRAME(frame) | PM_PSHIFT(PAGE_SHIFT) | flags); | 961 | *pme = make_pme(PM_PFRAME(frame) | PM_STATUS2(pm->v2, flags2) | flags); |
877 | } | 962 | } |
878 | 963 | ||
879 | #ifdef CONFIG_TRANSPARENT_HUGEPAGE | 964 | #ifdef CONFIG_TRANSPARENT_HUGEPAGE |
880 | static void thp_pmd_to_pagemap_entry(pagemap_entry_t *pme, | 965 | static void thp_pmd_to_pagemap_entry(pagemap_entry_t *pme, struct pagemapread *pm, |
881 | pmd_t pmd, int offset) | 966 | pmd_t pmd, int offset, int pmd_flags2) |
882 | { | 967 | { |
883 | /* | 968 | /* |
884 | * Currently pmd for thp is always present because thp can not be | 969 | * Currently pmd for thp is always present because thp can not be |
@@ -887,13 +972,13 @@ static void thp_pmd_to_pagemap_entry(pagemap_entry_t *pme, | |||
887 | */ | 972 | */ |
888 | if (pmd_present(pmd)) | 973 | if (pmd_present(pmd)) |
889 | *pme = make_pme(PM_PFRAME(pmd_pfn(pmd) + offset) | 974 | *pme = make_pme(PM_PFRAME(pmd_pfn(pmd) + offset) |
890 | | PM_PSHIFT(PAGE_SHIFT) | PM_PRESENT); | 975 | | PM_STATUS2(pm->v2, pmd_flags2) | PM_PRESENT); |
891 | else | 976 | else |
892 | *pme = make_pme(PM_NOT_PRESENT); | 977 | *pme = make_pme(PM_NOT_PRESENT(pm->v2)); |
893 | } | 978 | } |
894 | #else | 979 | #else |
895 | static inline void thp_pmd_to_pagemap_entry(pagemap_entry_t *pme, | 980 | static inline void thp_pmd_to_pagemap_entry(pagemap_entry_t *pme, struct pagemapread *pm, |
896 | pmd_t pmd, int offset) | 981 | pmd_t pmd, int offset, int pmd_flags2) |
897 | { | 982 | { |
898 | } | 983 | } |
899 | #endif | 984 | #endif |
@@ -905,17 +990,20 @@ static int pagemap_pte_range(pmd_t *pmd, unsigned long addr, unsigned long end, | |||
905 | struct pagemapread *pm = walk->private; | 990 | struct pagemapread *pm = walk->private; |
906 | pte_t *pte; | 991 | pte_t *pte; |
907 | int err = 0; | 992 | int err = 0; |
908 | pagemap_entry_t pme = make_pme(PM_NOT_PRESENT); | 993 | pagemap_entry_t pme = make_pme(PM_NOT_PRESENT(pm->v2)); |
909 | 994 | ||
910 | /* find the first VMA at or above 'addr' */ | 995 | /* find the first VMA at or above 'addr' */ |
911 | vma = find_vma(walk->mm, addr); | 996 | vma = find_vma(walk->mm, addr); |
912 | if (vma && pmd_trans_huge_lock(pmd, vma) == 1) { | 997 | if (vma && pmd_trans_huge_lock(pmd, vma) == 1) { |
998 | int pmd_flags2; | ||
999 | |||
1000 | pmd_flags2 = (pmd_soft_dirty(*pmd) ? __PM_SOFT_DIRTY : 0); | ||
913 | for (; addr != end; addr += PAGE_SIZE) { | 1001 | for (; addr != end; addr += PAGE_SIZE) { |
914 | unsigned long offset; | 1002 | unsigned long offset; |
915 | 1003 | ||
916 | offset = (addr & ~PAGEMAP_WALK_MASK) >> | 1004 | offset = (addr & ~PAGEMAP_WALK_MASK) >> |
917 | PAGE_SHIFT; | 1005 | PAGE_SHIFT; |
918 | thp_pmd_to_pagemap_entry(&pme, *pmd, offset); | 1006 | thp_pmd_to_pagemap_entry(&pme, pm, *pmd, offset, pmd_flags2); |
919 | err = add_to_pagemap(addr, &pme, pm); | 1007 | err = add_to_pagemap(addr, &pme, pm); |
920 | if (err) | 1008 | if (err) |
921 | break; | 1009 | break; |
@@ -932,7 +1020,7 @@ static int pagemap_pte_range(pmd_t *pmd, unsigned long addr, unsigned long end, | |||
932 | * and need a new, higher one */ | 1020 | * and need a new, higher one */ |
933 | if (vma && (addr >= vma->vm_end)) { | 1021 | if (vma && (addr >= vma->vm_end)) { |
934 | vma = find_vma(walk->mm, addr); | 1022 | vma = find_vma(walk->mm, addr); |
935 | pme = make_pme(PM_NOT_PRESENT); | 1023 | pme = make_pme(PM_NOT_PRESENT(pm->v2)); |
936 | } | 1024 | } |
937 | 1025 | ||
938 | /* check that 'vma' actually covers this address, | 1026 | /* check that 'vma' actually covers this address, |
@@ -940,7 +1028,7 @@ static int pagemap_pte_range(pmd_t *pmd, unsigned long addr, unsigned long end, | |||
940 | if (vma && (vma->vm_start <= addr) && | 1028 | if (vma && (vma->vm_start <= addr) && |
941 | !is_vm_hugetlb_page(vma)) { | 1029 | !is_vm_hugetlb_page(vma)) { |
942 | pte = pte_offset_map(pmd, addr); | 1030 | pte = pte_offset_map(pmd, addr); |
943 | pte_to_pagemap_entry(&pme, vma, addr, *pte); | 1031 | pte_to_pagemap_entry(&pme, pm, vma, addr, *pte); |
944 | /* unmap before userspace copy */ | 1032 | /* unmap before userspace copy */ |
945 | pte_unmap(pte); | 1033 | pte_unmap(pte); |
946 | } | 1034 | } |
@@ -955,14 +1043,14 @@ static int pagemap_pte_range(pmd_t *pmd, unsigned long addr, unsigned long end, | |||
955 | } | 1043 | } |
956 | 1044 | ||
957 | #ifdef CONFIG_HUGETLB_PAGE | 1045 | #ifdef CONFIG_HUGETLB_PAGE |
958 | static void huge_pte_to_pagemap_entry(pagemap_entry_t *pme, | 1046 | static void huge_pte_to_pagemap_entry(pagemap_entry_t *pme, struct pagemapread *pm, |
959 | pte_t pte, int offset) | 1047 | pte_t pte, int offset) |
960 | { | 1048 | { |
961 | if (pte_present(pte)) | 1049 | if (pte_present(pte)) |
962 | *pme = make_pme(PM_PFRAME(pte_pfn(pte) + offset) | 1050 | *pme = make_pme(PM_PFRAME(pte_pfn(pte) + offset) |
963 | | PM_PSHIFT(PAGE_SHIFT) | PM_PRESENT); | 1051 | | PM_STATUS2(pm->v2, 0) | PM_PRESENT); |
964 | else | 1052 | else |
965 | *pme = make_pme(PM_NOT_PRESENT); | 1053 | *pme = make_pme(PM_NOT_PRESENT(pm->v2)); |
966 | } | 1054 | } |
967 | 1055 | ||
968 | /* This function walks within one hugetlb entry in the single call */ | 1056 | /* This function walks within one hugetlb entry in the single call */ |
@@ -976,7 +1064,7 @@ static int pagemap_hugetlb_range(pte_t *pte, unsigned long hmask, | |||
976 | 1064 | ||
977 | for (; addr != end; addr += PAGE_SIZE) { | 1065 | for (; addr != end; addr += PAGE_SIZE) { |
978 | int offset = (addr & ~hmask) >> PAGE_SHIFT; | 1066 | int offset = (addr & ~hmask) >> PAGE_SHIFT; |
979 | huge_pte_to_pagemap_entry(&pme, *pte, offset); | 1067 | huge_pte_to_pagemap_entry(&pme, pm, *pte, offset); |
980 | err = add_to_pagemap(addr, &pme, pm); | 1068 | err = add_to_pagemap(addr, &pme, pm); |
981 | if (err) | 1069 | if (err) |
982 | return err; | 1070 | return err; |
@@ -1038,8 +1126,9 @@ static ssize_t pagemap_read(struct file *file, char __user *buf, | |||
1038 | if (!count) | 1126 | if (!count) |
1039 | goto out_task; | 1127 | goto out_task; |
1040 | 1128 | ||
1041 | pm.len = PM_ENTRY_BYTES * (PAGEMAP_WALK_SIZE >> PAGE_SHIFT); | 1129 | pm.v2 = soft_dirty_cleared; |
1042 | pm.buffer = kmalloc(pm.len, GFP_TEMPORARY); | 1130 | pm.len = (PAGEMAP_WALK_SIZE >> PAGE_SHIFT); |
1131 | pm.buffer = kmalloc(pm.len * PM_ENTRY_BYTES, GFP_TEMPORARY); | ||
1043 | ret = -ENOMEM; | 1132 | ret = -ENOMEM; |
1044 | if (!pm.buffer) | 1133 | if (!pm.buffer) |
1045 | goto out_task; | 1134 | goto out_task; |
@@ -1110,9 +1199,18 @@ out: | |||
1110 | return ret; | 1199 | return ret; |
1111 | } | 1200 | } |
1112 | 1201 | ||
1202 | static int pagemap_open(struct inode *inode, struct file *file) | ||
1203 | { | ||
1204 | pr_warn_once("Bits 55-60 of /proc/PID/pagemap entries are about " | ||
1205 | "to stop being page-shift some time soon. See the " | ||
1206 | "linux/Documentation/vm/pagemap.txt for details.\n"); | ||
1207 | return 0; | ||
1208 | } | ||
1209 | |||
1113 | const struct file_operations proc_pagemap_operations = { | 1210 | const struct file_operations proc_pagemap_operations = { |
1114 | .llseek = mem_lseek, /* borrow this */ | 1211 | .llseek = mem_lseek, /* borrow this */ |
1115 | .read = pagemap_read, | 1212 | .read = pagemap_read, |
1213 | .open = pagemap_open, | ||
1116 | }; | 1214 | }; |
1117 | #endif /* CONFIG_PROC_PAGE_MONITOR */ | 1215 | #endif /* CONFIG_PROC_PAGE_MONITOR */ |
1118 | 1216 | ||
diff --git a/fs/proc/uptime.c b/fs/proc/uptime.c index 9610ac772d7e..061894625903 100644 --- a/fs/proc/uptime.c +++ b/fs/proc/uptime.c | |||
@@ -20,8 +20,7 @@ static int uptime_proc_show(struct seq_file *m, void *v) | |||
20 | for_each_possible_cpu(i) | 20 | for_each_possible_cpu(i) |
21 | idletime += (__force u64) kcpustat_cpu(i).cpustat[CPUTIME_IDLE]; | 21 | idletime += (__force u64) kcpustat_cpu(i).cpustat[CPUTIME_IDLE]; |
22 | 22 | ||
23 | do_posix_clock_monotonic_gettime(&uptime); | 23 | get_monotonic_boottime(&uptime); |
24 | monotonic_to_bootbased(&uptime); | ||
25 | nsec = cputime64_to_jiffies64(idletime) * TICK_NSEC; | 24 | nsec = cputime64_to_jiffies64(idletime) * TICK_NSEC; |
26 | idle.tv_sec = div_u64_rem(nsec, NSEC_PER_SEC, &rem); | 25 | idle.tv_sec = div_u64_rem(nsec, NSEC_PER_SEC, &rem); |
27 | idle.tv_nsec = rem; | 26 | idle.tv_nsec = rem; |
diff --git a/fs/proc/vmcore.c b/fs/proc/vmcore.c index 17f7e080d7ff..a1a16eb97c7b 100644 --- a/fs/proc/vmcore.c +++ b/fs/proc/vmcore.c | |||
@@ -20,6 +20,7 @@ | |||
20 | #include <linux/init.h> | 20 | #include <linux/init.h> |
21 | #include <linux/crash_dump.h> | 21 | #include <linux/crash_dump.h> |
22 | #include <linux/list.h> | 22 | #include <linux/list.h> |
23 | #include <linux/vmalloc.h> | ||
23 | #include <asm/uaccess.h> | 24 | #include <asm/uaccess.h> |
24 | #include <asm/io.h> | 25 | #include <asm/io.h> |
25 | #include "internal.h" | 26 | #include "internal.h" |
@@ -32,6 +33,10 @@ static LIST_HEAD(vmcore_list); | |||
32 | /* Stores the pointer to the buffer containing kernel elf core headers. */ | 33 | /* Stores the pointer to the buffer containing kernel elf core headers. */ |
33 | static char *elfcorebuf; | 34 | static char *elfcorebuf; |
34 | static size_t elfcorebuf_sz; | 35 | static size_t elfcorebuf_sz; |
36 | static size_t elfcorebuf_sz_orig; | ||
37 | |||
38 | static char *elfnotes_buf; | ||
39 | static size_t elfnotes_sz; | ||
35 | 40 | ||
36 | /* Total size of vmcore file. */ | 41 | /* Total size of vmcore file. */ |
37 | static u64 vmcore_size; | 42 | static u64 vmcore_size; |
@@ -118,27 +123,6 @@ static ssize_t read_from_oldmem(char *buf, size_t count, | |||
118 | return read; | 123 | return read; |
119 | } | 124 | } |
120 | 125 | ||
121 | /* Maps vmcore file offset to respective physical address in memroy. */ | ||
122 | static u64 map_offset_to_paddr(loff_t offset, struct list_head *vc_list, | ||
123 | struct vmcore **m_ptr) | ||
124 | { | ||
125 | struct vmcore *m; | ||
126 | u64 paddr; | ||
127 | |||
128 | list_for_each_entry(m, vc_list, list) { | ||
129 | u64 start, end; | ||
130 | start = m->offset; | ||
131 | end = m->offset + m->size - 1; | ||
132 | if (offset >= start && offset <= end) { | ||
133 | paddr = m->paddr + offset - start; | ||
134 | *m_ptr = m; | ||
135 | return paddr; | ||
136 | } | ||
137 | } | ||
138 | *m_ptr = NULL; | ||
139 | return 0; | ||
140 | } | ||
141 | |||
142 | /* Read from the ELF header and then the crash dump. On error, negative value is | 126 | /* Read from the ELF header and then the crash dump. On error, negative value is |
143 | * returned otherwise number of bytes read are returned. | 127 | * returned otherwise number of bytes read are returned. |
144 | */ | 128 | */ |
@@ -147,8 +131,8 @@ static ssize_t read_vmcore(struct file *file, char __user *buffer, | |||
147 | { | 131 | { |
148 | ssize_t acc = 0, tmp; | 132 | ssize_t acc = 0, tmp; |
149 | size_t tsz; | 133 | size_t tsz; |
150 | u64 start, nr_bytes; | 134 | u64 start; |
151 | struct vmcore *curr_m = NULL; | 135 | struct vmcore *m = NULL; |
152 | 136 | ||
153 | if (buflen == 0 || *fpos >= vmcore_size) | 137 | if (buflen == 0 || *fpos >= vmcore_size) |
154 | return 0; | 138 | return 0; |
@@ -159,9 +143,7 @@ static ssize_t read_vmcore(struct file *file, char __user *buffer, | |||
159 | 143 | ||
160 | /* Read ELF core header */ | 144 | /* Read ELF core header */ |
161 | if (*fpos < elfcorebuf_sz) { | 145 | if (*fpos < elfcorebuf_sz) { |
162 | tsz = elfcorebuf_sz - *fpos; | 146 | tsz = min(elfcorebuf_sz - (size_t)*fpos, buflen); |
163 | if (buflen < tsz) | ||
164 | tsz = buflen; | ||
165 | if (copy_to_user(buffer, elfcorebuf + *fpos, tsz)) | 147 | if (copy_to_user(buffer, elfcorebuf + *fpos, tsz)) |
166 | return -EFAULT; | 148 | return -EFAULT; |
167 | buflen -= tsz; | 149 | buflen -= tsz; |
@@ -174,39 +156,161 @@ static ssize_t read_vmcore(struct file *file, char __user *buffer, | |||
174 | return acc; | 156 | return acc; |
175 | } | 157 | } |
176 | 158 | ||
177 | start = map_offset_to_paddr(*fpos, &vmcore_list, &curr_m); | 159 | /* Read Elf note segment */ |
178 | if (!curr_m) | 160 | if (*fpos < elfcorebuf_sz + elfnotes_sz) { |
179 | return -EINVAL; | 161 | void *kaddr; |
180 | |||
181 | while (buflen) { | ||
182 | tsz = min_t(size_t, buflen, PAGE_SIZE - (start & ~PAGE_MASK)); | ||
183 | 162 | ||
184 | /* Calculate left bytes in current memory segment. */ | 163 | tsz = min(elfcorebuf_sz + elfnotes_sz - (size_t)*fpos, buflen); |
185 | nr_bytes = (curr_m->size - (start - curr_m->paddr)); | 164 | kaddr = elfnotes_buf + *fpos - elfcorebuf_sz; |
186 | if (tsz > nr_bytes) | 165 | if (copy_to_user(buffer, kaddr, tsz)) |
187 | tsz = nr_bytes; | 166 | return -EFAULT; |
188 | |||
189 | tmp = read_from_oldmem(buffer, tsz, &start, 1); | ||
190 | if (tmp < 0) | ||
191 | return tmp; | ||
192 | buflen -= tsz; | 167 | buflen -= tsz; |
193 | *fpos += tsz; | 168 | *fpos += tsz; |
194 | buffer += tsz; | 169 | buffer += tsz; |
195 | acc += tsz; | 170 | acc += tsz; |
196 | if (start >= (curr_m->paddr + curr_m->size)) { | 171 | |
197 | if (curr_m->list.next == &vmcore_list) | 172 | /* leave now if filled buffer already */ |
198 | return acc; /*EOF*/ | 173 | if (buflen == 0) |
199 | curr_m = list_entry(curr_m->list.next, | 174 | return acc; |
200 | struct vmcore, list); | 175 | } |
201 | start = curr_m->paddr; | 176 | |
177 | list_for_each_entry(m, &vmcore_list, list) { | ||
178 | if (*fpos < m->offset + m->size) { | ||
179 | tsz = min_t(size_t, m->offset + m->size - *fpos, buflen); | ||
180 | start = m->paddr + *fpos - m->offset; | ||
181 | tmp = read_from_oldmem(buffer, tsz, &start, 1); | ||
182 | if (tmp < 0) | ||
183 | return tmp; | ||
184 | buflen -= tsz; | ||
185 | *fpos += tsz; | ||
186 | buffer += tsz; | ||
187 | acc += tsz; | ||
188 | |||
189 | /* leave now if filled buffer already */ | ||
190 | if (buflen == 0) | ||
191 | return acc; | ||
202 | } | 192 | } |
203 | } | 193 | } |
194 | |||
204 | return acc; | 195 | return acc; |
205 | } | 196 | } |
206 | 197 | ||
198 | /** | ||
199 | * alloc_elfnotes_buf - allocate buffer for ELF note segment in | ||
200 | * vmalloc memory | ||
201 | * | ||
202 | * @notes_sz: size of buffer | ||
203 | * | ||
204 | * If CONFIG_MMU is defined, use vmalloc_user() to allow users to mmap | ||
205 | * the buffer to user-space by means of remap_vmalloc_range(). | ||
206 | * | ||
207 | * If CONFIG_MMU is not defined, use vzalloc() since mmap_vmcore() is | ||
208 | * disabled and there's no need to allow users to mmap the buffer. | ||
209 | */ | ||
210 | static inline char *alloc_elfnotes_buf(size_t notes_sz) | ||
211 | { | ||
212 | #ifdef CONFIG_MMU | ||
213 | return vmalloc_user(notes_sz); | ||
214 | #else | ||
215 | return vzalloc(notes_sz); | ||
216 | #endif | ||
217 | } | ||
218 | |||
219 | /* | ||
220 | * Disable mmap_vmcore() if CONFIG_MMU is not defined. MMU is | ||
221 | * essential for mmap_vmcore() in order to map physically | ||
222 | * non-contiguous objects (ELF header, ELF note segment and memory | ||
223 | * regions in the 1st kernel pointed to by PT_LOAD entries) into | ||
224 | * virtually contiguous user-space in ELF layout. | ||
225 | */ | ||
226 | #if defined(CONFIG_MMU) && !defined(CONFIG_S390) | ||
227 | static int mmap_vmcore(struct file *file, struct vm_area_struct *vma) | ||
228 | { | ||
229 | size_t size = vma->vm_end - vma->vm_start; | ||
230 | u64 start, end, len, tsz; | ||
231 | struct vmcore *m; | ||
232 | |||
233 | start = (u64)vma->vm_pgoff << PAGE_SHIFT; | ||
234 | end = start + size; | ||
235 | |||
236 | if (size > vmcore_size || end > vmcore_size) | ||
237 | return -EINVAL; | ||
238 | |||
239 | if (vma->vm_flags & (VM_WRITE | VM_EXEC)) | ||
240 | return -EPERM; | ||
241 | |||
242 | vma->vm_flags &= ~(VM_MAYWRITE | VM_MAYEXEC); | ||
243 | vma->vm_flags |= VM_MIXEDMAP; | ||
244 | |||
245 | len = 0; | ||
246 | |||
247 | if (start < elfcorebuf_sz) { | ||
248 | u64 pfn; | ||
249 | |||
250 | tsz = min(elfcorebuf_sz - (size_t)start, size); | ||
251 | pfn = __pa(elfcorebuf + start) >> PAGE_SHIFT; | ||
252 | if (remap_pfn_range(vma, vma->vm_start, pfn, tsz, | ||
253 | vma->vm_page_prot)) | ||
254 | return -EAGAIN; | ||
255 | size -= tsz; | ||
256 | start += tsz; | ||
257 | len += tsz; | ||
258 | |||
259 | if (size == 0) | ||
260 | return 0; | ||
261 | } | ||
262 | |||
263 | if (start < elfcorebuf_sz + elfnotes_sz) { | ||
264 | void *kaddr; | ||
265 | |||
266 | tsz = min(elfcorebuf_sz + elfnotes_sz - (size_t)start, size); | ||
267 | kaddr = elfnotes_buf + start - elfcorebuf_sz; | ||
268 | if (remap_vmalloc_range_partial(vma, vma->vm_start + len, | ||
269 | kaddr, tsz)) | ||
270 | goto fail; | ||
271 | size -= tsz; | ||
272 | start += tsz; | ||
273 | len += tsz; | ||
274 | |||
275 | if (size == 0) | ||
276 | return 0; | ||
277 | } | ||
278 | |||
279 | list_for_each_entry(m, &vmcore_list, list) { | ||
280 | if (start < m->offset + m->size) { | ||
281 | u64 paddr = 0; | ||
282 | |||
283 | tsz = min_t(size_t, m->offset + m->size - start, size); | ||
284 | paddr = m->paddr + start - m->offset; | ||
285 | if (remap_pfn_range(vma, vma->vm_start + len, | ||
286 | paddr >> PAGE_SHIFT, tsz, | ||
287 | vma->vm_page_prot)) | ||
288 | goto fail; | ||
289 | size -= tsz; | ||
290 | start += tsz; | ||
291 | len += tsz; | ||
292 | |||
293 | if (size == 0) | ||
294 | return 0; | ||
295 | } | ||
296 | } | ||
297 | |||
298 | return 0; | ||
299 | fail: | ||
300 | do_munmap(vma->vm_mm, vma->vm_start, len); | ||
301 | return -EAGAIN; | ||
302 | } | ||
303 | #else | ||
304 | static int mmap_vmcore(struct file *file, struct vm_area_struct *vma) | ||
305 | { | ||
306 | return -ENOSYS; | ||
307 | } | ||
308 | #endif | ||
309 | |||
207 | static const struct file_operations proc_vmcore_operations = { | 310 | static const struct file_operations proc_vmcore_operations = { |
208 | .read = read_vmcore, | 311 | .read = read_vmcore, |
209 | .llseek = default_llseek, | 312 | .llseek = default_llseek, |
313 | .mmap = mmap_vmcore, | ||
210 | }; | 314 | }; |
211 | 315 | ||
212 | static struct vmcore* __init get_new_element(void) | 316 | static struct vmcore* __init get_new_element(void) |
@@ -214,61 +318,40 @@ static struct vmcore* __init get_new_element(void) | |||
214 | return kzalloc(sizeof(struct vmcore), GFP_KERNEL); | 318 | return kzalloc(sizeof(struct vmcore), GFP_KERNEL); |
215 | } | 319 | } |
216 | 320 | ||
217 | static u64 __init get_vmcore_size_elf64(char *elfptr) | 321 | static u64 __init get_vmcore_size(size_t elfsz, size_t elfnotesegsz, |
322 | struct list_head *vc_list) | ||
218 | { | 323 | { |
219 | int i; | ||
220 | u64 size; | ||
221 | Elf64_Ehdr *ehdr_ptr; | ||
222 | Elf64_Phdr *phdr_ptr; | ||
223 | |||
224 | ehdr_ptr = (Elf64_Ehdr *)elfptr; | ||
225 | phdr_ptr = (Elf64_Phdr*)(elfptr + sizeof(Elf64_Ehdr)); | ||
226 | size = sizeof(Elf64_Ehdr) + ((ehdr_ptr->e_phnum) * sizeof(Elf64_Phdr)); | ||
227 | for (i = 0; i < ehdr_ptr->e_phnum; i++) { | ||
228 | size += phdr_ptr->p_memsz; | ||
229 | phdr_ptr++; | ||
230 | } | ||
231 | return size; | ||
232 | } | ||
233 | |||
234 | static u64 __init get_vmcore_size_elf32(char *elfptr) | ||
235 | { | ||
236 | int i; | ||
237 | u64 size; | 324 | u64 size; |
238 | Elf32_Ehdr *ehdr_ptr; | 325 | struct vmcore *m; |
239 | Elf32_Phdr *phdr_ptr; | ||
240 | 326 | ||
241 | ehdr_ptr = (Elf32_Ehdr *)elfptr; | 327 | size = elfsz + elfnotesegsz; |
242 | phdr_ptr = (Elf32_Phdr*)(elfptr + sizeof(Elf32_Ehdr)); | 328 | list_for_each_entry(m, vc_list, list) { |
243 | size = sizeof(Elf32_Ehdr) + ((ehdr_ptr->e_phnum) * sizeof(Elf32_Phdr)); | 329 | size += m->size; |
244 | for (i = 0; i < ehdr_ptr->e_phnum; i++) { | ||
245 | size += phdr_ptr->p_memsz; | ||
246 | phdr_ptr++; | ||
247 | } | 330 | } |
248 | return size; | 331 | return size; |
249 | } | 332 | } |
250 | 333 | ||
251 | /* Merges all the PT_NOTE headers into one. */ | 334 | /** |
252 | static int __init merge_note_headers_elf64(char *elfptr, size_t *elfsz, | 335 | * update_note_header_size_elf64 - update p_memsz member of each PT_NOTE entry |
253 | struct list_head *vc_list) | 336 | * |
337 | * @ehdr_ptr: ELF header | ||
338 | * | ||
339 | * This function updates p_memsz member of each PT_NOTE entry in the | ||
340 | * program header table pointed to by @ehdr_ptr to real size of ELF | ||
341 | * note segment. | ||
342 | */ | ||
343 | static int __init update_note_header_size_elf64(const Elf64_Ehdr *ehdr_ptr) | ||
254 | { | 344 | { |
255 | int i, nr_ptnote=0, rc=0; | 345 | int i, rc=0; |
256 | char *tmp; | 346 | Elf64_Phdr *phdr_ptr; |
257 | Elf64_Ehdr *ehdr_ptr; | ||
258 | Elf64_Phdr phdr, *phdr_ptr; | ||
259 | Elf64_Nhdr *nhdr_ptr; | 347 | Elf64_Nhdr *nhdr_ptr; |
260 | u64 phdr_sz = 0, note_off; | ||
261 | 348 | ||
262 | ehdr_ptr = (Elf64_Ehdr *)elfptr; | 349 | phdr_ptr = (Elf64_Phdr *)(ehdr_ptr + 1); |
263 | phdr_ptr = (Elf64_Phdr*)(elfptr + sizeof(Elf64_Ehdr)); | ||
264 | for (i = 0; i < ehdr_ptr->e_phnum; i++, phdr_ptr++) { | 350 | for (i = 0; i < ehdr_ptr->e_phnum; i++, phdr_ptr++) { |
265 | int j; | ||
266 | void *notes_section; | 351 | void *notes_section; |
267 | struct vmcore *new; | ||
268 | u64 offset, max_sz, sz, real_sz = 0; | 352 | u64 offset, max_sz, sz, real_sz = 0; |
269 | if (phdr_ptr->p_type != PT_NOTE) | 353 | if (phdr_ptr->p_type != PT_NOTE) |
270 | continue; | 354 | continue; |
271 | nr_ptnote++; | ||
272 | max_sz = phdr_ptr->p_memsz; | 355 | max_sz = phdr_ptr->p_memsz; |
273 | offset = phdr_ptr->p_offset; | 356 | offset = phdr_ptr->p_offset; |
274 | notes_section = kmalloc(max_sz, GFP_KERNEL); | 357 | notes_section = kmalloc(max_sz, GFP_KERNEL); |
@@ -280,7 +363,7 @@ static int __init merge_note_headers_elf64(char *elfptr, size_t *elfsz, | |||
280 | return rc; | 363 | return rc; |
281 | } | 364 | } |
282 | nhdr_ptr = notes_section; | 365 | nhdr_ptr = notes_section; |
283 | for (j = 0; j < max_sz; j += sz) { | 366 | while (real_sz < max_sz) { |
284 | if (nhdr_ptr->n_namesz == 0) | 367 | if (nhdr_ptr->n_namesz == 0) |
285 | break; | 368 | break; |
286 | sz = sizeof(Elf64_Nhdr) + | 369 | sz = sizeof(Elf64_Nhdr) + |
@@ -289,26 +372,122 @@ static int __init merge_note_headers_elf64(char *elfptr, size_t *elfsz, | |||
289 | real_sz += sz; | 372 | real_sz += sz; |
290 | nhdr_ptr = (Elf64_Nhdr*)((char*)nhdr_ptr + sz); | 373 | nhdr_ptr = (Elf64_Nhdr*)((char*)nhdr_ptr + sz); |
291 | } | 374 | } |
292 | |||
293 | /* Add this contiguous chunk of notes section to vmcore list.*/ | ||
294 | new = get_new_element(); | ||
295 | if (!new) { | ||
296 | kfree(notes_section); | ||
297 | return -ENOMEM; | ||
298 | } | ||
299 | new->paddr = phdr_ptr->p_offset; | ||
300 | new->size = real_sz; | ||
301 | list_add_tail(&new->list, vc_list); | ||
302 | phdr_sz += real_sz; | ||
303 | kfree(notes_section); | 375 | kfree(notes_section); |
376 | phdr_ptr->p_memsz = real_sz; | ||
377 | } | ||
378 | |||
379 | return 0; | ||
380 | } | ||
381 | |||
382 | /** | ||
383 | * get_note_number_and_size_elf64 - get the number of PT_NOTE program | ||
384 | * headers and sum of real size of their ELF note segment headers and | ||
385 | * data. | ||
386 | * | ||
387 | * @ehdr_ptr: ELF header | ||
388 | * @nr_ptnote: buffer for the number of PT_NOTE program headers | ||
389 | * @sz_ptnote: buffer for size of unique PT_NOTE program header | ||
390 | * | ||
391 | * This function is used to merge multiple PT_NOTE program headers | ||
392 | * into a unique single one. The resulting unique entry will have | ||
393 | * @sz_ptnote in its phdr->p_mem. | ||
394 | * | ||
395 | * It is assumed that program headers with PT_NOTE type pointed to by | ||
396 | * @ehdr_ptr has already been updated by update_note_header_size_elf64 | ||
397 | * and each of PT_NOTE program headers has actual ELF note segment | ||
398 | * size in its p_memsz member. | ||
399 | */ | ||
400 | static int __init get_note_number_and_size_elf64(const Elf64_Ehdr *ehdr_ptr, | ||
401 | int *nr_ptnote, u64 *sz_ptnote) | ||
402 | { | ||
403 | int i; | ||
404 | Elf64_Phdr *phdr_ptr; | ||
405 | |||
406 | *nr_ptnote = *sz_ptnote = 0; | ||
407 | |||
408 | phdr_ptr = (Elf64_Phdr *)(ehdr_ptr + 1); | ||
409 | for (i = 0; i < ehdr_ptr->e_phnum; i++, phdr_ptr++) { | ||
410 | if (phdr_ptr->p_type != PT_NOTE) | ||
411 | continue; | ||
412 | *nr_ptnote += 1; | ||
413 | *sz_ptnote += phdr_ptr->p_memsz; | ||
414 | } | ||
415 | |||
416 | return 0; | ||
417 | } | ||
418 | |||
419 | /** | ||
420 | * copy_notes_elf64 - copy ELF note segments in a given buffer | ||
421 | * | ||
422 | * @ehdr_ptr: ELF header | ||
423 | * @notes_buf: buffer into which ELF note segments are copied | ||
424 | * | ||
425 | * This function is used to copy ELF note segment in the 1st kernel | ||
426 | * into the buffer @notes_buf in the 2nd kernel. It is assumed that | ||
427 | * size of the buffer @notes_buf is equal to or larger than sum of the | ||
428 | * real ELF note segment headers and data. | ||
429 | * | ||
430 | * It is assumed that program headers with PT_NOTE type pointed to by | ||
431 | * @ehdr_ptr has already been updated by update_note_header_size_elf64 | ||
432 | * and each of PT_NOTE program headers has actual ELF note segment | ||
433 | * size in its p_memsz member. | ||
434 | */ | ||
435 | static int __init copy_notes_elf64(const Elf64_Ehdr *ehdr_ptr, char *notes_buf) | ||
436 | { | ||
437 | int i, rc=0; | ||
438 | Elf64_Phdr *phdr_ptr; | ||
439 | |||
440 | phdr_ptr = (Elf64_Phdr*)(ehdr_ptr + 1); | ||
441 | |||
442 | for (i = 0; i < ehdr_ptr->e_phnum; i++, phdr_ptr++) { | ||
443 | u64 offset; | ||
444 | if (phdr_ptr->p_type != PT_NOTE) | ||
445 | continue; | ||
446 | offset = phdr_ptr->p_offset; | ||
447 | rc = read_from_oldmem(notes_buf, phdr_ptr->p_memsz, &offset, 0); | ||
448 | if (rc < 0) | ||
449 | return rc; | ||
450 | notes_buf += phdr_ptr->p_memsz; | ||
304 | } | 451 | } |
305 | 452 | ||
453 | return 0; | ||
454 | } | ||
455 | |||
456 | /* Merges all the PT_NOTE headers into one. */ | ||
457 | static int __init merge_note_headers_elf64(char *elfptr, size_t *elfsz, | ||
458 | char **notes_buf, size_t *notes_sz) | ||
459 | { | ||
460 | int i, nr_ptnote=0, rc=0; | ||
461 | char *tmp; | ||
462 | Elf64_Ehdr *ehdr_ptr; | ||
463 | Elf64_Phdr phdr; | ||
464 | u64 phdr_sz = 0, note_off; | ||
465 | |||
466 | ehdr_ptr = (Elf64_Ehdr *)elfptr; | ||
467 | |||
468 | rc = update_note_header_size_elf64(ehdr_ptr); | ||
469 | if (rc < 0) | ||
470 | return rc; | ||
471 | |||
472 | rc = get_note_number_and_size_elf64(ehdr_ptr, &nr_ptnote, &phdr_sz); | ||
473 | if (rc < 0) | ||
474 | return rc; | ||
475 | |||
476 | *notes_sz = roundup(phdr_sz, PAGE_SIZE); | ||
477 | *notes_buf = alloc_elfnotes_buf(*notes_sz); | ||
478 | if (!*notes_buf) | ||
479 | return -ENOMEM; | ||
480 | |||
481 | rc = copy_notes_elf64(ehdr_ptr, *notes_buf); | ||
482 | if (rc < 0) | ||
483 | return rc; | ||
484 | |||
306 | /* Prepare merged PT_NOTE program header. */ | 485 | /* Prepare merged PT_NOTE program header. */ |
307 | phdr.p_type = PT_NOTE; | 486 | phdr.p_type = PT_NOTE; |
308 | phdr.p_flags = 0; | 487 | phdr.p_flags = 0; |
309 | note_off = sizeof(Elf64_Ehdr) + | 488 | note_off = sizeof(Elf64_Ehdr) + |
310 | (ehdr_ptr->e_phnum - nr_ptnote +1) * sizeof(Elf64_Phdr); | 489 | (ehdr_ptr->e_phnum - nr_ptnote +1) * sizeof(Elf64_Phdr); |
311 | phdr.p_offset = note_off; | 490 | phdr.p_offset = roundup(note_off, PAGE_SIZE); |
312 | phdr.p_vaddr = phdr.p_paddr = 0; | 491 | phdr.p_vaddr = phdr.p_paddr = 0; |
313 | phdr.p_filesz = phdr.p_memsz = phdr_sz; | 492 | phdr.p_filesz = phdr.p_memsz = phdr_sz; |
314 | phdr.p_align = 0; | 493 | phdr.p_align = 0; |
@@ -322,6 +501,8 @@ static int __init merge_note_headers_elf64(char *elfptr, size_t *elfsz, | |||
322 | i = (nr_ptnote - 1) * sizeof(Elf64_Phdr); | 501 | i = (nr_ptnote - 1) * sizeof(Elf64_Phdr); |
323 | *elfsz = *elfsz - i; | 502 | *elfsz = *elfsz - i; |
324 | memmove(tmp, tmp+i, ((*elfsz)-sizeof(Elf64_Ehdr)-sizeof(Elf64_Phdr))); | 503 | memmove(tmp, tmp+i, ((*elfsz)-sizeof(Elf64_Ehdr)-sizeof(Elf64_Phdr))); |
504 | memset(elfptr + *elfsz, 0, i); | ||
505 | *elfsz = roundup(*elfsz, PAGE_SIZE); | ||
325 | 506 | ||
326 | /* Modify e_phnum to reflect merged headers. */ | 507 | /* Modify e_phnum to reflect merged headers. */ |
327 | ehdr_ptr->e_phnum = ehdr_ptr->e_phnum - nr_ptnote + 1; | 508 | ehdr_ptr->e_phnum = ehdr_ptr->e_phnum - nr_ptnote + 1; |
@@ -329,27 +510,27 @@ static int __init merge_note_headers_elf64(char *elfptr, size_t *elfsz, | |||
329 | return 0; | 510 | return 0; |
330 | } | 511 | } |
331 | 512 | ||
332 | /* Merges all the PT_NOTE headers into one. */ | 513 | /** |
333 | static int __init merge_note_headers_elf32(char *elfptr, size_t *elfsz, | 514 | * update_note_header_size_elf32 - update p_memsz member of each PT_NOTE entry |
334 | struct list_head *vc_list) | 515 | * |
516 | * @ehdr_ptr: ELF header | ||
517 | * | ||
518 | * This function updates p_memsz member of each PT_NOTE entry in the | ||
519 | * program header table pointed to by @ehdr_ptr to real size of ELF | ||
520 | * note segment. | ||
521 | */ | ||
522 | static int __init update_note_header_size_elf32(const Elf32_Ehdr *ehdr_ptr) | ||
335 | { | 523 | { |
336 | int i, nr_ptnote=0, rc=0; | 524 | int i, rc=0; |
337 | char *tmp; | 525 | Elf32_Phdr *phdr_ptr; |
338 | Elf32_Ehdr *ehdr_ptr; | ||
339 | Elf32_Phdr phdr, *phdr_ptr; | ||
340 | Elf32_Nhdr *nhdr_ptr; | 526 | Elf32_Nhdr *nhdr_ptr; |
341 | u64 phdr_sz = 0, note_off; | ||
342 | 527 | ||
343 | ehdr_ptr = (Elf32_Ehdr *)elfptr; | 528 | phdr_ptr = (Elf32_Phdr *)(ehdr_ptr + 1); |
344 | phdr_ptr = (Elf32_Phdr*)(elfptr + sizeof(Elf32_Ehdr)); | ||
345 | for (i = 0; i < ehdr_ptr->e_phnum; i++, phdr_ptr++) { | 529 | for (i = 0; i < ehdr_ptr->e_phnum; i++, phdr_ptr++) { |
346 | int j; | ||
347 | void *notes_section; | 530 | void *notes_section; |
348 | struct vmcore *new; | ||
349 | u64 offset, max_sz, sz, real_sz = 0; | 531 | u64 offset, max_sz, sz, real_sz = 0; |
350 | if (phdr_ptr->p_type != PT_NOTE) | 532 | if (phdr_ptr->p_type != PT_NOTE) |
351 | continue; | 533 | continue; |
352 | nr_ptnote++; | ||
353 | max_sz = phdr_ptr->p_memsz; | 534 | max_sz = phdr_ptr->p_memsz; |
354 | offset = phdr_ptr->p_offset; | 535 | offset = phdr_ptr->p_offset; |
355 | notes_section = kmalloc(max_sz, GFP_KERNEL); | 536 | notes_section = kmalloc(max_sz, GFP_KERNEL); |
@@ -361,7 +542,7 @@ static int __init merge_note_headers_elf32(char *elfptr, size_t *elfsz, | |||
361 | return rc; | 542 | return rc; |
362 | } | 543 | } |
363 | nhdr_ptr = notes_section; | 544 | nhdr_ptr = notes_section; |
364 | for (j = 0; j < max_sz; j += sz) { | 545 | while (real_sz < max_sz) { |
365 | if (nhdr_ptr->n_namesz == 0) | 546 | if (nhdr_ptr->n_namesz == 0) |
366 | break; | 547 | break; |
367 | sz = sizeof(Elf32_Nhdr) + | 548 | sz = sizeof(Elf32_Nhdr) + |
@@ -370,26 +551,122 @@ static int __init merge_note_headers_elf32(char *elfptr, size_t *elfsz, | |||
370 | real_sz += sz; | 551 | real_sz += sz; |
371 | nhdr_ptr = (Elf32_Nhdr*)((char*)nhdr_ptr + sz); | 552 | nhdr_ptr = (Elf32_Nhdr*)((char*)nhdr_ptr + sz); |
372 | } | 553 | } |
373 | |||
374 | /* Add this contiguous chunk of notes section to vmcore list.*/ | ||
375 | new = get_new_element(); | ||
376 | if (!new) { | ||
377 | kfree(notes_section); | ||
378 | return -ENOMEM; | ||
379 | } | ||
380 | new->paddr = phdr_ptr->p_offset; | ||
381 | new->size = real_sz; | ||
382 | list_add_tail(&new->list, vc_list); | ||
383 | phdr_sz += real_sz; | ||
384 | kfree(notes_section); | 554 | kfree(notes_section); |
555 | phdr_ptr->p_memsz = real_sz; | ||
556 | } | ||
557 | |||
558 | return 0; | ||
559 | } | ||
560 | |||
561 | /** | ||
562 | * get_note_number_and_size_elf32 - get the number of PT_NOTE program | ||
563 | * headers and sum of real size of their ELF note segment headers and | ||
564 | * data. | ||
565 | * | ||
566 | * @ehdr_ptr: ELF header | ||
567 | * @nr_ptnote: buffer for the number of PT_NOTE program headers | ||
568 | * @sz_ptnote: buffer for size of unique PT_NOTE program header | ||
569 | * | ||
570 | * This function is used to merge multiple PT_NOTE program headers | ||
571 | * into a unique single one. The resulting unique entry will have | ||
572 | * @sz_ptnote in its phdr->p_mem. | ||
573 | * | ||
574 | * It is assumed that program headers with PT_NOTE type pointed to by | ||
575 | * @ehdr_ptr has already been updated by update_note_header_size_elf32 | ||
576 | * and each of PT_NOTE program headers has actual ELF note segment | ||
577 | * size in its p_memsz member. | ||
578 | */ | ||
579 | static int __init get_note_number_and_size_elf32(const Elf32_Ehdr *ehdr_ptr, | ||
580 | int *nr_ptnote, u64 *sz_ptnote) | ||
581 | { | ||
582 | int i; | ||
583 | Elf32_Phdr *phdr_ptr; | ||
584 | |||
585 | *nr_ptnote = *sz_ptnote = 0; | ||
586 | |||
587 | phdr_ptr = (Elf32_Phdr *)(ehdr_ptr + 1); | ||
588 | for (i = 0; i < ehdr_ptr->e_phnum; i++, phdr_ptr++) { | ||
589 | if (phdr_ptr->p_type != PT_NOTE) | ||
590 | continue; | ||
591 | *nr_ptnote += 1; | ||
592 | *sz_ptnote += phdr_ptr->p_memsz; | ||
593 | } | ||
594 | |||
595 | return 0; | ||
596 | } | ||
597 | |||
598 | /** | ||
599 | * copy_notes_elf32 - copy ELF note segments in a given buffer | ||
600 | * | ||
601 | * @ehdr_ptr: ELF header | ||
602 | * @notes_buf: buffer into which ELF note segments are copied | ||
603 | * | ||
604 | * This function is used to copy ELF note segment in the 1st kernel | ||
605 | * into the buffer @notes_buf in the 2nd kernel. It is assumed that | ||
606 | * size of the buffer @notes_buf is equal to or larger than sum of the | ||
607 | * real ELF note segment headers and data. | ||
608 | * | ||
609 | * It is assumed that program headers with PT_NOTE type pointed to by | ||
610 | * @ehdr_ptr has already been updated by update_note_header_size_elf32 | ||
611 | * and each of PT_NOTE program headers has actual ELF note segment | ||
612 | * size in its p_memsz member. | ||
613 | */ | ||
614 | static int __init copy_notes_elf32(const Elf32_Ehdr *ehdr_ptr, char *notes_buf) | ||
615 | { | ||
616 | int i, rc=0; | ||
617 | Elf32_Phdr *phdr_ptr; | ||
618 | |||
619 | phdr_ptr = (Elf32_Phdr*)(ehdr_ptr + 1); | ||
620 | |||
621 | for (i = 0; i < ehdr_ptr->e_phnum; i++, phdr_ptr++) { | ||
622 | u64 offset; | ||
623 | if (phdr_ptr->p_type != PT_NOTE) | ||
624 | continue; | ||
625 | offset = phdr_ptr->p_offset; | ||
626 | rc = read_from_oldmem(notes_buf, phdr_ptr->p_memsz, &offset, 0); | ||
627 | if (rc < 0) | ||
628 | return rc; | ||
629 | notes_buf += phdr_ptr->p_memsz; | ||
385 | } | 630 | } |
386 | 631 | ||
632 | return 0; | ||
633 | } | ||
634 | |||
635 | /* Merges all the PT_NOTE headers into one. */ | ||
636 | static int __init merge_note_headers_elf32(char *elfptr, size_t *elfsz, | ||
637 | char **notes_buf, size_t *notes_sz) | ||
638 | { | ||
639 | int i, nr_ptnote=0, rc=0; | ||
640 | char *tmp; | ||
641 | Elf32_Ehdr *ehdr_ptr; | ||
642 | Elf32_Phdr phdr; | ||
643 | u64 phdr_sz = 0, note_off; | ||
644 | |||
645 | ehdr_ptr = (Elf32_Ehdr *)elfptr; | ||
646 | |||
647 | rc = update_note_header_size_elf32(ehdr_ptr); | ||
648 | if (rc < 0) | ||
649 | return rc; | ||
650 | |||
651 | rc = get_note_number_and_size_elf32(ehdr_ptr, &nr_ptnote, &phdr_sz); | ||
652 | if (rc < 0) | ||
653 | return rc; | ||
654 | |||
655 | *notes_sz = roundup(phdr_sz, PAGE_SIZE); | ||
656 | *notes_buf = alloc_elfnotes_buf(*notes_sz); | ||
657 | if (!*notes_buf) | ||
658 | return -ENOMEM; | ||
659 | |||
660 | rc = copy_notes_elf32(ehdr_ptr, *notes_buf); | ||
661 | if (rc < 0) | ||
662 | return rc; | ||
663 | |||
387 | /* Prepare merged PT_NOTE program header. */ | 664 | /* Prepare merged PT_NOTE program header. */ |
388 | phdr.p_type = PT_NOTE; | 665 | phdr.p_type = PT_NOTE; |
389 | phdr.p_flags = 0; | 666 | phdr.p_flags = 0; |
390 | note_off = sizeof(Elf32_Ehdr) + | 667 | note_off = sizeof(Elf32_Ehdr) + |
391 | (ehdr_ptr->e_phnum - nr_ptnote +1) * sizeof(Elf32_Phdr); | 668 | (ehdr_ptr->e_phnum - nr_ptnote +1) * sizeof(Elf32_Phdr); |
392 | phdr.p_offset = note_off; | 669 | phdr.p_offset = roundup(note_off, PAGE_SIZE); |
393 | phdr.p_vaddr = phdr.p_paddr = 0; | 670 | phdr.p_vaddr = phdr.p_paddr = 0; |
394 | phdr.p_filesz = phdr.p_memsz = phdr_sz; | 671 | phdr.p_filesz = phdr.p_memsz = phdr_sz; |
395 | phdr.p_align = 0; | 672 | phdr.p_align = 0; |
@@ -403,6 +680,8 @@ static int __init merge_note_headers_elf32(char *elfptr, size_t *elfsz, | |||
403 | i = (nr_ptnote - 1) * sizeof(Elf32_Phdr); | 680 | i = (nr_ptnote - 1) * sizeof(Elf32_Phdr); |
404 | *elfsz = *elfsz - i; | 681 | *elfsz = *elfsz - i; |
405 | memmove(tmp, tmp+i, ((*elfsz)-sizeof(Elf32_Ehdr)-sizeof(Elf32_Phdr))); | 682 | memmove(tmp, tmp+i, ((*elfsz)-sizeof(Elf32_Ehdr)-sizeof(Elf32_Phdr))); |
683 | memset(elfptr + *elfsz, 0, i); | ||
684 | *elfsz = roundup(*elfsz, PAGE_SIZE); | ||
406 | 685 | ||
407 | /* Modify e_phnum to reflect merged headers. */ | 686 | /* Modify e_phnum to reflect merged headers. */ |
408 | ehdr_ptr->e_phnum = ehdr_ptr->e_phnum - nr_ptnote + 1; | 687 | ehdr_ptr->e_phnum = ehdr_ptr->e_phnum - nr_ptnote + 1; |
@@ -414,6 +693,7 @@ static int __init merge_note_headers_elf32(char *elfptr, size_t *elfsz, | |||
414 | * the new offset fields of exported program headers. */ | 693 | * the new offset fields of exported program headers. */ |
415 | static int __init process_ptload_program_headers_elf64(char *elfptr, | 694 | static int __init process_ptload_program_headers_elf64(char *elfptr, |
416 | size_t elfsz, | 695 | size_t elfsz, |
696 | size_t elfnotes_sz, | ||
417 | struct list_head *vc_list) | 697 | struct list_head *vc_list) |
418 | { | 698 | { |
419 | int i; | 699 | int i; |
@@ -425,32 +705,38 @@ static int __init process_ptload_program_headers_elf64(char *elfptr, | |||
425 | ehdr_ptr = (Elf64_Ehdr *)elfptr; | 705 | ehdr_ptr = (Elf64_Ehdr *)elfptr; |
426 | phdr_ptr = (Elf64_Phdr*)(elfptr + sizeof(Elf64_Ehdr)); /* PT_NOTE hdr */ | 706 | phdr_ptr = (Elf64_Phdr*)(elfptr + sizeof(Elf64_Ehdr)); /* PT_NOTE hdr */ |
427 | 707 | ||
428 | /* First program header is PT_NOTE header. */ | 708 | /* Skip Elf header, program headers and Elf note segment. */ |
429 | vmcore_off = sizeof(Elf64_Ehdr) + | 709 | vmcore_off = elfsz + elfnotes_sz; |
430 | (ehdr_ptr->e_phnum) * sizeof(Elf64_Phdr) + | ||
431 | phdr_ptr->p_memsz; /* Note sections */ | ||
432 | 710 | ||
433 | for (i = 0; i < ehdr_ptr->e_phnum; i++, phdr_ptr++) { | 711 | for (i = 0; i < ehdr_ptr->e_phnum; i++, phdr_ptr++) { |
712 | u64 paddr, start, end, size; | ||
713 | |||
434 | if (phdr_ptr->p_type != PT_LOAD) | 714 | if (phdr_ptr->p_type != PT_LOAD) |
435 | continue; | 715 | continue; |
436 | 716 | ||
717 | paddr = phdr_ptr->p_offset; | ||
718 | start = rounddown(paddr, PAGE_SIZE); | ||
719 | end = roundup(paddr + phdr_ptr->p_memsz, PAGE_SIZE); | ||
720 | size = end - start; | ||
721 | |||
437 | /* Add this contiguous chunk of memory to vmcore list.*/ | 722 | /* Add this contiguous chunk of memory to vmcore list.*/ |
438 | new = get_new_element(); | 723 | new = get_new_element(); |
439 | if (!new) | 724 | if (!new) |
440 | return -ENOMEM; | 725 | return -ENOMEM; |
441 | new->paddr = phdr_ptr->p_offset; | 726 | new->paddr = start; |
442 | new->size = phdr_ptr->p_memsz; | 727 | new->size = size; |
443 | list_add_tail(&new->list, vc_list); | 728 | list_add_tail(&new->list, vc_list); |
444 | 729 | ||
445 | /* Update the program header offset. */ | 730 | /* Update the program header offset. */ |
446 | phdr_ptr->p_offset = vmcore_off; | 731 | phdr_ptr->p_offset = vmcore_off + (paddr - start); |
447 | vmcore_off = vmcore_off + phdr_ptr->p_memsz; | 732 | vmcore_off = vmcore_off + size; |
448 | } | 733 | } |
449 | return 0; | 734 | return 0; |
450 | } | 735 | } |
451 | 736 | ||
452 | static int __init process_ptload_program_headers_elf32(char *elfptr, | 737 | static int __init process_ptload_program_headers_elf32(char *elfptr, |
453 | size_t elfsz, | 738 | size_t elfsz, |
739 | size_t elfnotes_sz, | ||
454 | struct list_head *vc_list) | 740 | struct list_head *vc_list) |
455 | { | 741 | { |
456 | int i; | 742 | int i; |
@@ -462,43 +748,44 @@ static int __init process_ptload_program_headers_elf32(char *elfptr, | |||
462 | ehdr_ptr = (Elf32_Ehdr *)elfptr; | 748 | ehdr_ptr = (Elf32_Ehdr *)elfptr; |
463 | phdr_ptr = (Elf32_Phdr*)(elfptr + sizeof(Elf32_Ehdr)); /* PT_NOTE hdr */ | 749 | phdr_ptr = (Elf32_Phdr*)(elfptr + sizeof(Elf32_Ehdr)); /* PT_NOTE hdr */ |
464 | 750 | ||
465 | /* First program header is PT_NOTE header. */ | 751 | /* Skip Elf header, program headers and Elf note segment. */ |
466 | vmcore_off = sizeof(Elf32_Ehdr) + | 752 | vmcore_off = elfsz + elfnotes_sz; |
467 | (ehdr_ptr->e_phnum) * sizeof(Elf32_Phdr) + | ||
468 | phdr_ptr->p_memsz; /* Note sections */ | ||
469 | 753 | ||
470 | for (i = 0; i < ehdr_ptr->e_phnum; i++, phdr_ptr++) { | 754 | for (i = 0; i < ehdr_ptr->e_phnum; i++, phdr_ptr++) { |
755 | u64 paddr, start, end, size; | ||
756 | |||
471 | if (phdr_ptr->p_type != PT_LOAD) | 757 | if (phdr_ptr->p_type != PT_LOAD) |
472 | continue; | 758 | continue; |
473 | 759 | ||
760 | paddr = phdr_ptr->p_offset; | ||
761 | start = rounddown(paddr, PAGE_SIZE); | ||
762 | end = roundup(paddr + phdr_ptr->p_memsz, PAGE_SIZE); | ||
763 | size = end - start; | ||
764 | |||
474 | /* Add this contiguous chunk of memory to vmcore list.*/ | 765 | /* Add this contiguous chunk of memory to vmcore list.*/ |
475 | new = get_new_element(); | 766 | new = get_new_element(); |
476 | if (!new) | 767 | if (!new) |
477 | return -ENOMEM; | 768 | return -ENOMEM; |
478 | new->paddr = phdr_ptr->p_offset; | 769 | new->paddr = start; |
479 | new->size = phdr_ptr->p_memsz; | 770 | new->size = size; |
480 | list_add_tail(&new->list, vc_list); | 771 | list_add_tail(&new->list, vc_list); |
481 | 772 | ||
482 | /* Update the program header offset */ | 773 | /* Update the program header offset */ |
483 | phdr_ptr->p_offset = vmcore_off; | 774 | phdr_ptr->p_offset = vmcore_off + (paddr - start); |
484 | vmcore_off = vmcore_off + phdr_ptr->p_memsz; | 775 | vmcore_off = vmcore_off + size; |
485 | } | 776 | } |
486 | return 0; | 777 | return 0; |
487 | } | 778 | } |
488 | 779 | ||
489 | /* Sets offset fields of vmcore elements. */ | 780 | /* Sets offset fields of vmcore elements. */ |
490 | static void __init set_vmcore_list_offsets_elf64(char *elfptr, | 781 | static void __init set_vmcore_list_offsets(size_t elfsz, size_t elfnotes_sz, |
491 | struct list_head *vc_list) | 782 | struct list_head *vc_list) |
492 | { | 783 | { |
493 | loff_t vmcore_off; | 784 | loff_t vmcore_off; |
494 | Elf64_Ehdr *ehdr_ptr; | ||
495 | struct vmcore *m; | 785 | struct vmcore *m; |
496 | 786 | ||
497 | ehdr_ptr = (Elf64_Ehdr *)elfptr; | 787 | /* Skip Elf header, program headers and Elf note segment. */ |
498 | 788 | vmcore_off = elfsz + elfnotes_sz; | |
499 | /* Skip Elf header and program headers. */ | ||
500 | vmcore_off = sizeof(Elf64_Ehdr) + | ||
501 | (ehdr_ptr->e_phnum) * sizeof(Elf64_Phdr); | ||
502 | 789 | ||
503 | list_for_each_entry(m, vc_list, list) { | 790 | list_for_each_entry(m, vc_list, list) { |
504 | m->offset = vmcore_off; | 791 | m->offset = vmcore_off; |
@@ -506,24 +793,12 @@ static void __init set_vmcore_list_offsets_elf64(char *elfptr, | |||
506 | } | 793 | } |
507 | } | 794 | } |
508 | 795 | ||
509 | /* Sets offset fields of vmcore elements. */ | 796 | static void free_elfcorebuf(void) |
510 | static void __init set_vmcore_list_offsets_elf32(char *elfptr, | ||
511 | struct list_head *vc_list) | ||
512 | { | 797 | { |
513 | loff_t vmcore_off; | 798 | free_pages((unsigned long)elfcorebuf, get_order(elfcorebuf_sz_orig)); |
514 | Elf32_Ehdr *ehdr_ptr; | 799 | elfcorebuf = NULL; |
515 | struct vmcore *m; | 800 | vfree(elfnotes_buf); |
516 | 801 | elfnotes_buf = NULL; | |
517 | ehdr_ptr = (Elf32_Ehdr *)elfptr; | ||
518 | |||
519 | /* Skip Elf header and program headers. */ | ||
520 | vmcore_off = sizeof(Elf32_Ehdr) + | ||
521 | (ehdr_ptr->e_phnum) * sizeof(Elf32_Phdr); | ||
522 | |||
523 | list_for_each_entry(m, vc_list, list) { | ||
524 | m->offset = vmcore_off; | ||
525 | vmcore_off += m->size; | ||
526 | } | ||
527 | } | 802 | } |
528 | 803 | ||
529 | static int __init parse_crash_elf64_headers(void) | 804 | static int __init parse_crash_elf64_headers(void) |
@@ -554,31 +829,32 @@ static int __init parse_crash_elf64_headers(void) | |||
554 | } | 829 | } |
555 | 830 | ||
556 | /* Read in all elf headers. */ | 831 | /* Read in all elf headers. */ |
557 | elfcorebuf_sz = sizeof(Elf64_Ehdr) + ehdr.e_phnum * sizeof(Elf64_Phdr); | 832 | elfcorebuf_sz_orig = sizeof(Elf64_Ehdr) + |
558 | elfcorebuf = kmalloc(elfcorebuf_sz, GFP_KERNEL); | 833 | ehdr.e_phnum * sizeof(Elf64_Phdr); |
834 | elfcorebuf_sz = elfcorebuf_sz_orig; | ||
835 | elfcorebuf = (void *)__get_free_pages(GFP_KERNEL | __GFP_ZERO, | ||
836 | get_order(elfcorebuf_sz_orig)); | ||
559 | if (!elfcorebuf) | 837 | if (!elfcorebuf) |
560 | return -ENOMEM; | 838 | return -ENOMEM; |
561 | addr = elfcorehdr_addr; | 839 | addr = elfcorehdr_addr; |
562 | rc = read_from_oldmem(elfcorebuf, elfcorebuf_sz, &addr, 0); | 840 | rc = read_from_oldmem(elfcorebuf, elfcorebuf_sz_orig, &addr, 0); |
563 | if (rc < 0) { | 841 | if (rc < 0) |
564 | kfree(elfcorebuf); | 842 | goto fail; |
565 | return rc; | ||
566 | } | ||
567 | 843 | ||
568 | /* Merge all PT_NOTE headers into one. */ | 844 | /* Merge all PT_NOTE headers into one. */ |
569 | rc = merge_note_headers_elf64(elfcorebuf, &elfcorebuf_sz, &vmcore_list); | 845 | rc = merge_note_headers_elf64(elfcorebuf, &elfcorebuf_sz, |
570 | if (rc) { | 846 | &elfnotes_buf, &elfnotes_sz); |
571 | kfree(elfcorebuf); | 847 | if (rc) |
572 | return rc; | 848 | goto fail; |
573 | } | ||
574 | rc = process_ptload_program_headers_elf64(elfcorebuf, elfcorebuf_sz, | 849 | rc = process_ptload_program_headers_elf64(elfcorebuf, elfcorebuf_sz, |
575 | &vmcore_list); | 850 | elfnotes_sz, &vmcore_list); |
576 | if (rc) { | 851 | if (rc) |
577 | kfree(elfcorebuf); | 852 | goto fail; |
578 | return rc; | 853 | set_vmcore_list_offsets(elfcorebuf_sz, elfnotes_sz, &vmcore_list); |
579 | } | ||
580 | set_vmcore_list_offsets_elf64(elfcorebuf, &vmcore_list); | ||
581 | return 0; | 854 | return 0; |
855 | fail: | ||
856 | free_elfcorebuf(); | ||
857 | return rc; | ||
582 | } | 858 | } |
583 | 859 | ||
584 | static int __init parse_crash_elf32_headers(void) | 860 | static int __init parse_crash_elf32_headers(void) |
@@ -609,31 +885,31 @@ static int __init parse_crash_elf32_headers(void) | |||
609 | } | 885 | } |
610 | 886 | ||
611 | /* Read in all elf headers. */ | 887 | /* Read in all elf headers. */ |
612 | elfcorebuf_sz = sizeof(Elf32_Ehdr) + ehdr.e_phnum * sizeof(Elf32_Phdr); | 888 | elfcorebuf_sz_orig = sizeof(Elf32_Ehdr) + ehdr.e_phnum * sizeof(Elf32_Phdr); |
613 | elfcorebuf = kmalloc(elfcorebuf_sz, GFP_KERNEL); | 889 | elfcorebuf_sz = elfcorebuf_sz_orig; |
890 | elfcorebuf = (void *)__get_free_pages(GFP_KERNEL | __GFP_ZERO, | ||
891 | get_order(elfcorebuf_sz_orig)); | ||
614 | if (!elfcorebuf) | 892 | if (!elfcorebuf) |
615 | return -ENOMEM; | 893 | return -ENOMEM; |
616 | addr = elfcorehdr_addr; | 894 | addr = elfcorehdr_addr; |
617 | rc = read_from_oldmem(elfcorebuf, elfcorebuf_sz, &addr, 0); | 895 | rc = read_from_oldmem(elfcorebuf, elfcorebuf_sz_orig, &addr, 0); |
618 | if (rc < 0) { | 896 | if (rc < 0) |
619 | kfree(elfcorebuf); | 897 | goto fail; |
620 | return rc; | ||
621 | } | ||
622 | 898 | ||
623 | /* Merge all PT_NOTE headers into one. */ | 899 | /* Merge all PT_NOTE headers into one. */ |
624 | rc = merge_note_headers_elf32(elfcorebuf, &elfcorebuf_sz, &vmcore_list); | 900 | rc = merge_note_headers_elf32(elfcorebuf, &elfcorebuf_sz, |
625 | if (rc) { | 901 | &elfnotes_buf, &elfnotes_sz); |
626 | kfree(elfcorebuf); | 902 | if (rc) |
627 | return rc; | 903 | goto fail; |
628 | } | ||
629 | rc = process_ptload_program_headers_elf32(elfcorebuf, elfcorebuf_sz, | 904 | rc = process_ptload_program_headers_elf32(elfcorebuf, elfcorebuf_sz, |
630 | &vmcore_list); | 905 | elfnotes_sz, &vmcore_list); |
631 | if (rc) { | 906 | if (rc) |
632 | kfree(elfcorebuf); | 907 | goto fail; |
633 | return rc; | 908 | set_vmcore_list_offsets(elfcorebuf_sz, elfnotes_sz, &vmcore_list); |
634 | } | ||
635 | set_vmcore_list_offsets_elf32(elfcorebuf, &vmcore_list); | ||
636 | return 0; | 909 | return 0; |
910 | fail: | ||
911 | free_elfcorebuf(); | ||
912 | return rc; | ||
637 | } | 913 | } |
638 | 914 | ||
639 | static int __init parse_crash_elf_headers(void) | 915 | static int __init parse_crash_elf_headers(void) |
@@ -655,20 +931,19 @@ static int __init parse_crash_elf_headers(void) | |||
655 | rc = parse_crash_elf64_headers(); | 931 | rc = parse_crash_elf64_headers(); |
656 | if (rc) | 932 | if (rc) |
657 | return rc; | 933 | return rc; |
658 | |||
659 | /* Determine vmcore size. */ | ||
660 | vmcore_size = get_vmcore_size_elf64(elfcorebuf); | ||
661 | } else if (e_ident[EI_CLASS] == ELFCLASS32) { | 934 | } else if (e_ident[EI_CLASS] == ELFCLASS32) { |
662 | rc = parse_crash_elf32_headers(); | 935 | rc = parse_crash_elf32_headers(); |
663 | if (rc) | 936 | if (rc) |
664 | return rc; | 937 | return rc; |
665 | |||
666 | /* Determine vmcore size. */ | ||
667 | vmcore_size = get_vmcore_size_elf32(elfcorebuf); | ||
668 | } else { | 938 | } else { |
669 | pr_warn("Warning: Core image elf header is not sane\n"); | 939 | pr_warn("Warning: Core image elf header is not sane\n"); |
670 | return -EINVAL; | 940 | return -EINVAL; |
671 | } | 941 | } |
942 | |||
943 | /* Determine vmcore size. */ | ||
944 | vmcore_size = get_vmcore_size(elfcorebuf_sz, elfnotes_sz, | ||
945 | &vmcore_list); | ||
946 | |||
672 | return 0; | 947 | return 0; |
673 | } | 948 | } |
674 | 949 | ||
@@ -711,7 +986,6 @@ void vmcore_cleanup(void) | |||
711 | list_del(&m->list); | 986 | list_del(&m->list); |
712 | kfree(m); | 987 | kfree(m); |
713 | } | 988 | } |
714 | kfree(elfcorebuf); | 989 | free_elfcorebuf(); |
715 | elfcorebuf = NULL; | ||
716 | } | 990 | } |
717 | EXPORT_SYMBOL_GPL(vmcore_cleanup); | 991 | EXPORT_SYMBOL_GPL(vmcore_cleanup); |
diff --git a/fs/pstore/ftrace.c b/fs/pstore/ftrace.c index 43b12807a51d..76a4eeb92982 100644 --- a/fs/pstore/ftrace.c +++ b/fs/pstore/ftrace.c | |||
@@ -44,7 +44,7 @@ static void notrace pstore_ftrace_call(unsigned long ip, | |||
44 | rec.parent_ip = parent_ip; | 44 | rec.parent_ip = parent_ip; |
45 | pstore_ftrace_encode_cpu(&rec, raw_smp_processor_id()); | 45 | pstore_ftrace_encode_cpu(&rec, raw_smp_processor_id()); |
46 | psinfo->write_buf(PSTORE_TYPE_FTRACE, 0, NULL, 0, (void *)&rec, | 46 | psinfo->write_buf(PSTORE_TYPE_FTRACE, 0, NULL, 0, (void *)&rec, |
47 | sizeof(rec), psinfo); | 47 | 0, sizeof(rec), psinfo); |
48 | 48 | ||
49 | local_irq_restore(flags); | 49 | local_irq_restore(flags); |
50 | } | 50 | } |
diff --git a/fs/pstore/inode.c b/fs/pstore/inode.c index e4bcb2cf055a..71bf5f4ae84c 100644 --- a/fs/pstore/inode.c +++ b/fs/pstore/inode.c | |||
@@ -178,6 +178,8 @@ static int pstore_unlink(struct inode *dir, struct dentry *dentry) | |||
178 | if (p->psi->erase) | 178 | if (p->psi->erase) |
179 | p->psi->erase(p->type, p->id, p->count, | 179 | p->psi->erase(p->type, p->id, p->count, |
180 | dentry->d_inode->i_ctime, p->psi); | 180 | dentry->d_inode->i_ctime, p->psi); |
181 | else | ||
182 | return -EPERM; | ||
181 | 183 | ||
182 | return simple_unlink(dir, dentry); | 184 | return simple_unlink(dir, dentry); |
183 | } | 185 | } |
@@ -324,6 +326,15 @@ int pstore_mkfile(enum pstore_type_id type, char *psname, u64 id, int count, | |||
324 | case PSTORE_TYPE_MCE: | 326 | case PSTORE_TYPE_MCE: |
325 | sprintf(name, "mce-%s-%lld", psname, id); | 327 | sprintf(name, "mce-%s-%lld", psname, id); |
326 | break; | 328 | break; |
329 | case PSTORE_TYPE_PPC_RTAS: | ||
330 | sprintf(name, "rtas-%s-%lld", psname, id); | ||
331 | break; | ||
332 | case PSTORE_TYPE_PPC_OF: | ||
333 | sprintf(name, "powerpc-ofw-%s-%lld", psname, id); | ||
334 | break; | ||
335 | case PSTORE_TYPE_PPC_COMMON: | ||
336 | sprintf(name, "powerpc-common-%s-%lld", psname, id); | ||
337 | break; | ||
327 | case PSTORE_TYPE_UNKNOWN: | 338 | case PSTORE_TYPE_UNKNOWN: |
328 | sprintf(name, "unknown-%s-%lld", psname, id); | 339 | sprintf(name, "unknown-%s-%lld", psname, id); |
329 | break; | 340 | break; |
diff --git a/fs/pstore/platform.c b/fs/pstore/platform.c index 86d1038b5a12..422962ae9fc2 100644 --- a/fs/pstore/platform.c +++ b/fs/pstore/platform.c | |||
@@ -159,7 +159,7 @@ static void pstore_dump(struct kmsg_dumper *dumper, | |||
159 | break; | 159 | break; |
160 | 160 | ||
161 | ret = psinfo->write(PSTORE_TYPE_DMESG, reason, &id, part, | 161 | ret = psinfo->write(PSTORE_TYPE_DMESG, reason, &id, part, |
162 | oopscount, hsize + len, psinfo); | 162 | oopscount, hsize, hsize + len, psinfo); |
163 | if (ret == 0 && reason == KMSG_DUMP_OOPS && pstore_is_mounted()) | 163 | if (ret == 0 && reason == KMSG_DUMP_OOPS && pstore_is_mounted()) |
164 | pstore_new_entry = 1; | 164 | pstore_new_entry = 1; |
165 | 165 | ||
@@ -196,7 +196,7 @@ static void pstore_console_write(struct console *con, const char *s, unsigned c) | |||
196 | spin_lock_irqsave(&psinfo->buf_lock, flags); | 196 | spin_lock_irqsave(&psinfo->buf_lock, flags); |
197 | } | 197 | } |
198 | memcpy(psinfo->buf, s, c); | 198 | memcpy(psinfo->buf, s, c); |
199 | psinfo->write(PSTORE_TYPE_CONSOLE, 0, &id, 0, 0, c, psinfo); | 199 | psinfo->write(PSTORE_TYPE_CONSOLE, 0, &id, 0, 0, 0, c, psinfo); |
200 | spin_unlock_irqrestore(&psinfo->buf_lock, flags); | 200 | spin_unlock_irqrestore(&psinfo->buf_lock, flags); |
201 | s += c; | 201 | s += c; |
202 | c = e - s; | 202 | c = e - s; |
@@ -221,9 +221,11 @@ static void pstore_register_console(void) {} | |||
221 | static int pstore_write_compat(enum pstore_type_id type, | 221 | static int pstore_write_compat(enum pstore_type_id type, |
222 | enum kmsg_dump_reason reason, | 222 | enum kmsg_dump_reason reason, |
223 | u64 *id, unsigned int part, int count, | 223 | u64 *id, unsigned int part, int count, |
224 | size_t size, struct pstore_info *psi) | 224 | size_t hsize, size_t size, |
225 | struct pstore_info *psi) | ||
225 | { | 226 | { |
226 | return psi->write_buf(type, reason, id, part, psinfo->buf, size, psi); | 227 | return psi->write_buf(type, reason, id, part, psinfo->buf, hsize, |
228 | size, psi); | ||
227 | } | 229 | } |
228 | 230 | ||
229 | /* | 231 | /* |
@@ -239,17 +241,15 @@ int pstore_register(struct pstore_info *psi) | |||
239 | { | 241 | { |
240 | struct module *owner = psi->owner; | 242 | struct module *owner = psi->owner; |
241 | 243 | ||
244 | if (backend && strcmp(backend, psi->name)) | ||
245 | return -EPERM; | ||
246 | |||
242 | spin_lock(&pstore_lock); | 247 | spin_lock(&pstore_lock); |
243 | if (psinfo) { | 248 | if (psinfo) { |
244 | spin_unlock(&pstore_lock); | 249 | spin_unlock(&pstore_lock); |
245 | return -EBUSY; | 250 | return -EBUSY; |
246 | } | 251 | } |
247 | 252 | ||
248 | if (backend && strcmp(backend, psi->name)) { | ||
249 | spin_unlock(&pstore_lock); | ||
250 | return -EINVAL; | ||
251 | } | ||
252 | |||
253 | if (!psi->write) | 253 | if (!psi->write) |
254 | psi->write = pstore_write_compat; | 254 | psi->write = pstore_write_compat; |
255 | psinfo = psi; | 255 | psinfo = psi; |
@@ -274,6 +274,9 @@ int pstore_register(struct pstore_info *psi) | |||
274 | add_timer(&pstore_timer); | 274 | add_timer(&pstore_timer); |
275 | } | 275 | } |
276 | 276 | ||
277 | pr_info("pstore: Registered %s as persistent store backend\n", | ||
278 | psi->name); | ||
279 | |||
277 | return 0; | 280 | return 0; |
278 | } | 281 | } |
279 | EXPORT_SYMBOL_GPL(pstore_register); | 282 | EXPORT_SYMBOL_GPL(pstore_register); |
diff --git a/fs/pstore/ram.c b/fs/pstore/ram.c index 1376e5a8f0d6..a6119f9469e2 100644 --- a/fs/pstore/ram.c +++ b/fs/pstore/ram.c | |||
@@ -195,7 +195,8 @@ static size_t ramoops_write_kmsg_hdr(struct persistent_ram_zone *prz) | |||
195 | static int notrace ramoops_pstore_write_buf(enum pstore_type_id type, | 195 | static int notrace ramoops_pstore_write_buf(enum pstore_type_id type, |
196 | enum kmsg_dump_reason reason, | 196 | enum kmsg_dump_reason reason, |
197 | u64 *id, unsigned int part, | 197 | u64 *id, unsigned int part, |
198 | const char *buf, size_t size, | 198 | const char *buf, |
199 | size_t hsize, size_t size, | ||
199 | struct pstore_info *psi) | 200 | struct pstore_info *psi) |
200 | { | 201 | { |
201 | struct ramoops_context *cxt = psi->data; | 202 | struct ramoops_context *cxt = psi->data; |
@@ -399,8 +400,6 @@ static int ramoops_probe(struct platform_device *pdev) | |||
399 | goto fail_out; | 400 | goto fail_out; |
400 | } | 401 | } |
401 | 402 | ||
402 | if (!is_power_of_2(pdata->mem_size)) | ||
403 | pdata->mem_size = rounddown_pow_of_two(pdata->mem_size); | ||
404 | if (!is_power_of_2(pdata->record_size)) | 403 | if (!is_power_of_2(pdata->record_size)) |
405 | pdata->record_size = rounddown_pow_of_two(pdata->record_size); | 404 | pdata->record_size = rounddown_pow_of_two(pdata->record_size); |
406 | if (!is_power_of_2(pdata->console_size)) | 405 | if (!is_power_of_2(pdata->console_size)) |
diff --git a/fs/pstore/ram_core.c b/fs/pstore/ram_core.c index 59337326e288..de272d426763 100644 --- a/fs/pstore/ram_core.c +++ b/fs/pstore/ram_core.c | |||
@@ -46,7 +46,7 @@ static inline size_t buffer_start(struct persistent_ram_zone *prz) | |||
46 | } | 46 | } |
47 | 47 | ||
48 | /* increase and wrap the start pointer, returning the old value */ | 48 | /* increase and wrap the start pointer, returning the old value */ |
49 | static inline size_t buffer_start_add(struct persistent_ram_zone *prz, size_t a) | 49 | static size_t buffer_start_add_atomic(struct persistent_ram_zone *prz, size_t a) |
50 | { | 50 | { |
51 | int old; | 51 | int old; |
52 | int new; | 52 | int new; |
@@ -62,7 +62,7 @@ static inline size_t buffer_start_add(struct persistent_ram_zone *prz, size_t a) | |||
62 | } | 62 | } |
63 | 63 | ||
64 | /* increase the size counter until it hits the max size */ | 64 | /* increase the size counter until it hits the max size */ |
65 | static inline void buffer_size_add(struct persistent_ram_zone *prz, size_t a) | 65 | static void buffer_size_add_atomic(struct persistent_ram_zone *prz, size_t a) |
66 | { | 66 | { |
67 | size_t old; | 67 | size_t old; |
68 | size_t new; | 68 | size_t new; |
@@ -78,6 +78,53 @@ static inline void buffer_size_add(struct persistent_ram_zone *prz, size_t a) | |||
78 | } while (atomic_cmpxchg(&prz->buffer->size, old, new) != old); | 78 | } while (atomic_cmpxchg(&prz->buffer->size, old, new) != old); |
79 | } | 79 | } |
80 | 80 | ||
81 | static DEFINE_RAW_SPINLOCK(buffer_lock); | ||
82 | |||
83 | /* increase and wrap the start pointer, returning the old value */ | ||
84 | static size_t buffer_start_add_locked(struct persistent_ram_zone *prz, size_t a) | ||
85 | { | ||
86 | int old; | ||
87 | int new; | ||
88 | unsigned long flags; | ||
89 | |||
90 | raw_spin_lock_irqsave(&buffer_lock, flags); | ||
91 | |||
92 | old = atomic_read(&prz->buffer->start); | ||
93 | new = old + a; | ||
94 | while (unlikely(new > prz->buffer_size)) | ||
95 | new -= prz->buffer_size; | ||
96 | atomic_set(&prz->buffer->start, new); | ||
97 | |||
98 | raw_spin_unlock_irqrestore(&buffer_lock, flags); | ||
99 | |||
100 | return old; | ||
101 | } | ||
102 | |||
103 | /* increase the size counter until it hits the max size */ | ||
104 | static void buffer_size_add_locked(struct persistent_ram_zone *prz, size_t a) | ||
105 | { | ||
106 | size_t old; | ||
107 | size_t new; | ||
108 | unsigned long flags; | ||
109 | |||
110 | raw_spin_lock_irqsave(&buffer_lock, flags); | ||
111 | |||
112 | old = atomic_read(&prz->buffer->size); | ||
113 | if (old == prz->buffer_size) | ||
114 | goto exit; | ||
115 | |||
116 | new = old + a; | ||
117 | if (new > prz->buffer_size) | ||
118 | new = prz->buffer_size; | ||
119 | atomic_set(&prz->buffer->size, new); | ||
120 | |||
121 | exit: | ||
122 | raw_spin_unlock_irqrestore(&buffer_lock, flags); | ||
123 | } | ||
124 | |||
125 | static size_t (*buffer_start_add)(struct persistent_ram_zone *, size_t) = buffer_start_add_atomic; | ||
126 | static void (*buffer_size_add)(struct persistent_ram_zone *, size_t) = buffer_size_add_atomic; | ||
127 | |||
81 | static void notrace persistent_ram_encode_rs8(struct persistent_ram_zone *prz, | 128 | static void notrace persistent_ram_encode_rs8(struct persistent_ram_zone *prz, |
82 | uint8_t *data, size_t len, uint8_t *ecc) | 129 | uint8_t *data, size_t len, uint8_t *ecc) |
83 | { | 130 | { |
@@ -372,6 +419,9 @@ static void *persistent_ram_iomap(phys_addr_t start, size_t size) | |||
372 | return NULL; | 419 | return NULL; |
373 | } | 420 | } |
374 | 421 | ||
422 | buffer_start_add = buffer_start_add_locked; | ||
423 | buffer_size_add = buffer_size_add_locked; | ||
424 | |||
375 | return ioremap(start, size); | 425 | return ioremap(start, size); |
376 | } | 426 | } |
377 | 427 | ||
diff --git a/fs/qnx4/dir.c b/fs/qnx4/dir.c index 28ce014b3cef..b218f965817b 100644 --- a/fs/qnx4/dir.c +++ b/fs/qnx4/dir.c | |||
@@ -14,9 +14,9 @@ | |||
14 | #include <linux/buffer_head.h> | 14 | #include <linux/buffer_head.h> |
15 | #include "qnx4.h" | 15 | #include "qnx4.h" |
16 | 16 | ||
17 | static int qnx4_readdir(struct file *filp, void *dirent, filldir_t filldir) | 17 | static int qnx4_readdir(struct file *file, struct dir_context *ctx) |
18 | { | 18 | { |
19 | struct inode *inode = file_inode(filp); | 19 | struct inode *inode = file_inode(file); |
20 | unsigned int offset; | 20 | unsigned int offset; |
21 | struct buffer_head *bh; | 21 | struct buffer_head *bh; |
22 | struct qnx4_inode_entry *de; | 22 | struct qnx4_inode_entry *de; |
@@ -26,48 +26,44 @@ static int qnx4_readdir(struct file *filp, void *dirent, filldir_t filldir) | |||
26 | int size; | 26 | int size; |
27 | 27 | ||
28 | QNX4DEBUG((KERN_INFO "qnx4_readdir:i_size = %ld\n", (long) inode->i_size)); | 28 | QNX4DEBUG((KERN_INFO "qnx4_readdir:i_size = %ld\n", (long) inode->i_size)); |
29 | QNX4DEBUG((KERN_INFO "filp->f_pos = %ld\n", (long) filp->f_pos)); | 29 | QNX4DEBUG((KERN_INFO "pos = %ld\n", (long) ctx->pos)); |
30 | 30 | ||
31 | while (filp->f_pos < inode->i_size) { | 31 | while (ctx->pos < inode->i_size) { |
32 | blknum = qnx4_block_map( inode, filp->f_pos >> QNX4_BLOCK_SIZE_BITS ); | 32 | blknum = qnx4_block_map(inode, ctx->pos >> QNX4_BLOCK_SIZE_BITS); |
33 | bh = sb_bread(inode->i_sb, blknum); | 33 | bh = sb_bread(inode->i_sb, blknum); |
34 | if(bh==NULL) { | 34 | if (bh == NULL) { |
35 | printk(KERN_ERR "qnx4_readdir: bread failed (%ld)\n", blknum); | 35 | printk(KERN_ERR "qnx4_readdir: bread failed (%ld)\n", blknum); |
36 | break; | 36 | return 0; |
37 | } | 37 | } |
38 | ix = (int)(filp->f_pos >> QNX4_DIR_ENTRY_SIZE_BITS) % QNX4_INODES_PER_BLOCK; | 38 | ix = (ctx->pos >> QNX4_DIR_ENTRY_SIZE_BITS) % QNX4_INODES_PER_BLOCK; |
39 | while (ix < QNX4_INODES_PER_BLOCK) { | 39 | for (; ix < QNX4_INODES_PER_BLOCK; ix++, ctx->pos += QNX4_DIR_ENTRY_SIZE) { |
40 | offset = ix * QNX4_DIR_ENTRY_SIZE; | 40 | offset = ix * QNX4_DIR_ENTRY_SIZE; |
41 | de = (struct qnx4_inode_entry *) (bh->b_data + offset); | 41 | de = (struct qnx4_inode_entry *) (bh->b_data + offset); |
42 | size = strlen(de->di_fname); | 42 | if (!de->di_fname[0]) |
43 | if (size) { | 43 | continue; |
44 | if ( !( de->di_status & QNX4_FILE_LINK ) && size > QNX4_SHORT_NAME_MAX ) | 44 | if (!(de->di_status & (QNX4_FILE_USED|QNX4_FILE_LINK))) |
45 | size = QNX4_SHORT_NAME_MAX; | 45 | continue; |
46 | else if ( size > QNX4_NAME_MAX ) | 46 | if (!(de->di_status & QNX4_FILE_LINK)) |
47 | size = QNX4_NAME_MAX; | 47 | size = QNX4_SHORT_NAME_MAX; |
48 | 48 | else | |
49 | if ( ( de->di_status & (QNX4_FILE_USED|QNX4_FILE_LINK) ) != 0 ) { | 49 | size = QNX4_NAME_MAX; |
50 | QNX4DEBUG((KERN_INFO "qnx4_readdir:%.*s\n", size, de->di_fname)); | 50 | size = strnlen(de->di_fname, size); |
51 | if ( ( de->di_status & QNX4_FILE_LINK ) == 0 ) | 51 | QNX4DEBUG((KERN_INFO "qnx4_readdir:%.*s\n", size, de->di_fname)); |
52 | ino = blknum * QNX4_INODES_PER_BLOCK + ix - 1; | 52 | if (!(de->di_status & QNX4_FILE_LINK)) |
53 | else { | 53 | ino = blknum * QNX4_INODES_PER_BLOCK + ix - 1; |
54 | le = (struct qnx4_link_info*)de; | 54 | else { |
55 | ino = ( le32_to_cpu(le->dl_inode_blk) - 1 ) * | 55 | le = (struct qnx4_link_info*)de; |
56 | QNX4_INODES_PER_BLOCK + | 56 | ino = ( le32_to_cpu(le->dl_inode_blk) - 1 ) * |
57 | le->dl_inode_ndx; | 57 | QNX4_INODES_PER_BLOCK + |
58 | } | 58 | le->dl_inode_ndx; |
59 | if (filldir(dirent, de->di_fname, size, filp->f_pos, ino, DT_UNKNOWN) < 0) { | 59 | } |
60 | brelse(bh); | 60 | if (!dir_emit(ctx, de->di_fname, size, ino, DT_UNKNOWN)) { |
61 | goto out; | 61 | brelse(bh); |
62 | } | 62 | return 0; |
63 | } | ||
64 | } | 63 | } |
65 | ix++; | ||
66 | filp->f_pos += QNX4_DIR_ENTRY_SIZE; | ||
67 | } | 64 | } |
68 | brelse(bh); | 65 | brelse(bh); |
69 | } | 66 | } |
70 | out: | ||
71 | return 0; | 67 | return 0; |
72 | } | 68 | } |
73 | 69 | ||
@@ -75,7 +71,7 @@ const struct file_operations qnx4_dir_operations = | |||
75 | { | 71 | { |
76 | .llseek = generic_file_llseek, | 72 | .llseek = generic_file_llseek, |
77 | .read = generic_read_dir, | 73 | .read = generic_read_dir, |
78 | .readdir = qnx4_readdir, | 74 | .iterate = qnx4_readdir, |
79 | .fsync = generic_file_fsync, | 75 | .fsync = generic_file_fsync, |
80 | }; | 76 | }; |
81 | 77 | ||
diff --git a/fs/qnx6/dir.c b/fs/qnx6/dir.c index afa6be6fc397..15b7d92ed60d 100644 --- a/fs/qnx6/dir.c +++ b/fs/qnx6/dir.c | |||
@@ -65,8 +65,8 @@ static struct qnx6_long_filename *qnx6_longname(struct super_block *sb, | |||
65 | 65 | ||
66 | static int qnx6_dir_longfilename(struct inode *inode, | 66 | static int qnx6_dir_longfilename(struct inode *inode, |
67 | struct qnx6_long_dir_entry *de, | 67 | struct qnx6_long_dir_entry *de, |
68 | void *dirent, loff_t pos, | 68 | struct dir_context *ctx, |
69 | unsigned de_inode, filldir_t filldir) | 69 | unsigned de_inode) |
70 | { | 70 | { |
71 | struct qnx6_long_filename *lf; | 71 | struct qnx6_long_filename *lf; |
72 | struct super_block *s = inode->i_sb; | 72 | struct super_block *s = inode->i_sb; |
@@ -104,8 +104,7 @@ static int qnx6_dir_longfilename(struct inode *inode, | |||
104 | 104 | ||
105 | QNX6DEBUG((KERN_INFO "qnx6_readdir:%.*s inode:%u\n", | 105 | QNX6DEBUG((KERN_INFO "qnx6_readdir:%.*s inode:%u\n", |
106 | lf_size, lf->lf_fname, de_inode)); | 106 | lf_size, lf->lf_fname, de_inode)); |
107 | if (filldir(dirent, lf->lf_fname, lf_size, pos, de_inode, | 107 | if (!dir_emit(ctx, lf->lf_fname, lf_size, de_inode, DT_UNKNOWN)) { |
108 | DT_UNKNOWN) < 0) { | ||
109 | qnx6_put_page(page); | 108 | qnx6_put_page(page); |
110 | return 0; | 109 | return 0; |
111 | } | 110 | } |
@@ -115,18 +114,19 @@ static int qnx6_dir_longfilename(struct inode *inode, | |||
115 | return 1; | 114 | return 1; |
116 | } | 115 | } |
117 | 116 | ||
118 | static int qnx6_readdir(struct file *filp, void *dirent, filldir_t filldir) | 117 | static int qnx6_readdir(struct file *file, struct dir_context *ctx) |
119 | { | 118 | { |
120 | struct inode *inode = file_inode(filp); | 119 | struct inode *inode = file_inode(file); |
121 | struct super_block *s = inode->i_sb; | 120 | struct super_block *s = inode->i_sb; |
122 | struct qnx6_sb_info *sbi = QNX6_SB(s); | 121 | struct qnx6_sb_info *sbi = QNX6_SB(s); |
123 | loff_t pos = filp->f_pos & ~(QNX6_DIR_ENTRY_SIZE - 1); | 122 | loff_t pos = ctx->pos & ~(QNX6_DIR_ENTRY_SIZE - 1); |
124 | unsigned long npages = dir_pages(inode); | 123 | unsigned long npages = dir_pages(inode); |
125 | unsigned long n = pos >> PAGE_CACHE_SHIFT; | 124 | unsigned long n = pos >> PAGE_CACHE_SHIFT; |
126 | unsigned start = (pos & ~PAGE_CACHE_MASK) / QNX6_DIR_ENTRY_SIZE; | 125 | unsigned start = (pos & ~PAGE_CACHE_MASK) / QNX6_DIR_ENTRY_SIZE; |
127 | bool done = false; | 126 | bool done = false; |
128 | 127 | ||
129 | if (filp->f_pos >= inode->i_size) | 128 | ctx->pos = pos; |
129 | if (ctx->pos >= inode->i_size) | ||
130 | return 0; | 130 | return 0; |
131 | 131 | ||
132 | for ( ; !done && n < npages; n++, start = 0) { | 132 | for ( ; !done && n < npages; n++, start = 0) { |
@@ -137,11 +137,11 @@ static int qnx6_readdir(struct file *filp, void *dirent, filldir_t filldir) | |||
137 | 137 | ||
138 | if (IS_ERR(page)) { | 138 | if (IS_ERR(page)) { |
139 | printk(KERN_ERR "qnx6_readdir: read failed\n"); | 139 | printk(KERN_ERR "qnx6_readdir: read failed\n"); |
140 | filp->f_pos = (n + 1) << PAGE_CACHE_SHIFT; | 140 | ctx->pos = (n + 1) << PAGE_CACHE_SHIFT; |
141 | return PTR_ERR(page); | 141 | return PTR_ERR(page); |
142 | } | 142 | } |
143 | de = ((struct qnx6_dir_entry *)page_address(page)) + start; | 143 | de = ((struct qnx6_dir_entry *)page_address(page)) + start; |
144 | for (; i < limit; i++, de++, pos += QNX6_DIR_ENTRY_SIZE) { | 144 | for (; i < limit; i++, de++, ctx->pos += QNX6_DIR_ENTRY_SIZE) { |
145 | int size = de->de_size; | 145 | int size = de->de_size; |
146 | u32 no_inode = fs32_to_cpu(sbi, de->de_inode); | 146 | u32 no_inode = fs32_to_cpu(sbi, de->de_inode); |
147 | 147 | ||
@@ -154,8 +154,7 @@ static int qnx6_readdir(struct file *filp, void *dirent, filldir_t filldir) | |||
154 | structure / block */ | 154 | structure / block */ |
155 | if (!qnx6_dir_longfilename(inode, | 155 | if (!qnx6_dir_longfilename(inode, |
156 | (struct qnx6_long_dir_entry *)de, | 156 | (struct qnx6_long_dir_entry *)de, |
157 | dirent, pos, no_inode, | 157 | ctx, no_inode)) { |
158 | filldir)) { | ||
159 | done = true; | 158 | done = true; |
160 | break; | 159 | break; |
161 | } | 160 | } |
@@ -163,9 +162,8 @@ static int qnx6_readdir(struct file *filp, void *dirent, filldir_t filldir) | |||
163 | QNX6DEBUG((KERN_INFO "qnx6_readdir:%.*s" | 162 | QNX6DEBUG((KERN_INFO "qnx6_readdir:%.*s" |
164 | " inode:%u\n", size, de->de_fname, | 163 | " inode:%u\n", size, de->de_fname, |
165 | no_inode)); | 164 | no_inode)); |
166 | if (filldir(dirent, de->de_fname, size, | 165 | if (!dir_emit(ctx, de->de_fname, size, |
167 | pos, no_inode, DT_UNKNOWN) | 166 | no_inode, DT_UNKNOWN)) { |
168 | < 0) { | ||
169 | done = true; | 167 | done = true; |
170 | break; | 168 | break; |
171 | } | 169 | } |
@@ -173,7 +171,6 @@ static int qnx6_readdir(struct file *filp, void *dirent, filldir_t filldir) | |||
173 | } | 171 | } |
174 | qnx6_put_page(page); | 172 | qnx6_put_page(page); |
175 | } | 173 | } |
176 | filp->f_pos = pos; | ||
177 | return 0; | 174 | return 0; |
178 | } | 175 | } |
179 | 176 | ||
@@ -282,7 +279,7 @@ found: | |||
282 | const struct file_operations qnx6_dir_operations = { | 279 | const struct file_operations qnx6_dir_operations = { |
283 | .llseek = generic_file_llseek, | 280 | .llseek = generic_file_llseek, |
284 | .read = generic_read_dir, | 281 | .read = generic_read_dir, |
285 | .readdir = qnx6_readdir, | 282 | .iterate = qnx6_readdir, |
286 | .fsync = generic_file_fsync, | 283 | .fsync = generic_file_fsync, |
287 | }; | 284 | }; |
288 | 285 | ||
diff --git a/fs/quota/dquot.c b/fs/quota/dquot.c index 3e64169ef527..fbad622841f9 100644 --- a/fs/quota/dquot.c +++ b/fs/quota/dquot.c | |||
@@ -2585,7 +2585,7 @@ static int do_proc_dqstats(struct ctl_table *table, int write, | |||
2585 | return proc_dointvec(table, write, buffer, lenp, ppos); | 2585 | return proc_dointvec(table, write, buffer, lenp, ppos); |
2586 | } | 2586 | } |
2587 | 2587 | ||
2588 | static ctl_table fs_dqstats_table[] = { | 2588 | static struct ctl_table fs_dqstats_table[] = { |
2589 | { | 2589 | { |
2590 | .procname = "lookups", | 2590 | .procname = "lookups", |
2591 | .data = &dqstats.stat[DQST_LOOKUPS], | 2591 | .data = &dqstats.stat[DQST_LOOKUPS], |
@@ -2654,7 +2654,7 @@ static ctl_table fs_dqstats_table[] = { | |||
2654 | { }, | 2654 | { }, |
2655 | }; | 2655 | }; |
2656 | 2656 | ||
2657 | static ctl_table fs_table[] = { | 2657 | static struct ctl_table fs_table[] = { |
2658 | { | 2658 | { |
2659 | .procname = "quota", | 2659 | .procname = "quota", |
2660 | .mode = 0555, | 2660 | .mode = 0555, |
@@ -2663,7 +2663,7 @@ static ctl_table fs_table[] = { | |||
2663 | { }, | 2663 | { }, |
2664 | }; | 2664 | }; |
2665 | 2665 | ||
2666 | static ctl_table sys_table[] = { | 2666 | static struct ctl_table sys_table[] = { |
2667 | { | 2667 | { |
2668 | .procname = "fs", | 2668 | .procname = "fs", |
2669 | .mode = 0555, | 2669 | .mode = 0555, |
diff --git a/fs/read_write.c b/fs/read_write.c index 2cefa417be34..122a3846d9e1 100644 --- a/fs/read_write.c +++ b/fs/read_write.c | |||
@@ -41,8 +41,19 @@ static inline int unsigned_offsets(struct file *file) | |||
41 | return file->f_mode & FMODE_UNSIGNED_OFFSET; | 41 | return file->f_mode & FMODE_UNSIGNED_OFFSET; |
42 | } | 42 | } |
43 | 43 | ||
44 | static loff_t lseek_execute(struct file *file, struct inode *inode, | 44 | /** |
45 | loff_t offset, loff_t maxsize) | 45 | * vfs_setpos - update the file offset for lseek |
46 | * @file: file structure in question | ||
47 | * @offset: file offset to seek to | ||
48 | * @maxsize: maximum file size | ||
49 | * | ||
50 | * This is a low-level filesystem helper for updating the file offset to | ||
51 | * the value specified by @offset if the given offset is valid and it is | ||
52 | * not equal to the current file offset. | ||
53 | * | ||
54 | * Return the specified offset on success and -EINVAL on invalid offset. | ||
55 | */ | ||
56 | loff_t vfs_setpos(struct file *file, loff_t offset, loff_t maxsize) | ||
46 | { | 57 | { |
47 | if (offset < 0 && !unsigned_offsets(file)) | 58 | if (offset < 0 && !unsigned_offsets(file)) |
48 | return -EINVAL; | 59 | return -EINVAL; |
@@ -55,6 +66,7 @@ static loff_t lseek_execute(struct file *file, struct inode *inode, | |||
55 | } | 66 | } |
56 | return offset; | 67 | return offset; |
57 | } | 68 | } |
69 | EXPORT_SYMBOL(vfs_setpos); | ||
58 | 70 | ||
59 | /** | 71 | /** |
60 | * generic_file_llseek_size - generic llseek implementation for regular files | 72 | * generic_file_llseek_size - generic llseek implementation for regular files |
@@ -76,8 +88,6 @@ loff_t | |||
76 | generic_file_llseek_size(struct file *file, loff_t offset, int whence, | 88 | generic_file_llseek_size(struct file *file, loff_t offset, int whence, |
77 | loff_t maxsize, loff_t eof) | 89 | loff_t maxsize, loff_t eof) |
78 | { | 90 | { |
79 | struct inode *inode = file->f_mapping->host; | ||
80 | |||
81 | switch (whence) { | 91 | switch (whence) { |
82 | case SEEK_END: | 92 | case SEEK_END: |
83 | offset += eof; | 93 | offset += eof; |
@@ -97,8 +107,7 @@ generic_file_llseek_size(struct file *file, loff_t offset, int whence, | |||
97 | * like SEEK_SET. | 107 | * like SEEK_SET. |
98 | */ | 108 | */ |
99 | spin_lock(&file->f_lock); | 109 | spin_lock(&file->f_lock); |
100 | offset = lseek_execute(file, inode, file->f_pos + offset, | 110 | offset = vfs_setpos(file, file->f_pos + offset, maxsize); |
101 | maxsize); | ||
102 | spin_unlock(&file->f_lock); | 111 | spin_unlock(&file->f_lock); |
103 | return offset; | 112 | return offset; |
104 | case SEEK_DATA: | 113 | case SEEK_DATA: |
@@ -120,7 +129,7 @@ generic_file_llseek_size(struct file *file, loff_t offset, int whence, | |||
120 | break; | 129 | break; |
121 | } | 130 | } |
122 | 131 | ||
123 | return lseek_execute(file, inode, offset, maxsize); | 132 | return vfs_setpos(file, offset, maxsize); |
124 | } | 133 | } |
125 | EXPORT_SYMBOL(generic_file_llseek_size); | 134 | EXPORT_SYMBOL(generic_file_llseek_size); |
126 | 135 | ||
@@ -145,6 +154,26 @@ loff_t generic_file_llseek(struct file *file, loff_t offset, int whence) | |||
145 | EXPORT_SYMBOL(generic_file_llseek); | 154 | EXPORT_SYMBOL(generic_file_llseek); |
146 | 155 | ||
147 | /** | 156 | /** |
157 | * fixed_size_llseek - llseek implementation for fixed-sized devices | ||
158 | * @file: file structure to seek on | ||
159 | * @offset: file offset to seek to | ||
160 | * @whence: type of seek | ||
161 | * @size: size of the file | ||
162 | * | ||
163 | */ | ||
164 | loff_t fixed_size_llseek(struct file *file, loff_t offset, int whence, loff_t size) | ||
165 | { | ||
166 | switch (whence) { | ||
167 | case SEEK_SET: case SEEK_CUR: case SEEK_END: | ||
168 | return generic_file_llseek_size(file, offset, whence, | ||
169 | size, size); | ||
170 | default: | ||
171 | return -EINVAL; | ||
172 | } | ||
173 | } | ||
174 | EXPORT_SYMBOL(fixed_size_llseek); | ||
175 | |||
176 | /** | ||
148 | * noop_llseek - No Operation Performed llseek implementation | 177 | * noop_llseek - No Operation Performed llseek implementation |
149 | * @file: file structure to seek on | 178 | * @file: file structure to seek on |
150 | * @offset: file offset to seek to | 179 | * @offset: file offset to seek to |
@@ -296,7 +325,7 @@ out_putf: | |||
296 | * them to something that fits in "int" so that others | 325 | * them to something that fits in "int" so that others |
297 | * won't have to do range checks all the time. | 326 | * won't have to do range checks all the time. |
298 | */ | 327 | */ |
299 | int rw_verify_area(int read_write, struct file *file, loff_t *ppos, size_t count) | 328 | int rw_verify_area(int read_write, struct file *file, const loff_t *ppos, size_t count) |
300 | { | 329 | { |
301 | struct inode *inode; | 330 | struct inode *inode; |
302 | loff_t pos; | 331 | loff_t pos; |
@@ -477,7 +506,8 @@ SYSCALL_DEFINE3(read, unsigned int, fd, char __user *, buf, size_t, count) | |||
477 | if (f.file) { | 506 | if (f.file) { |
478 | loff_t pos = file_pos_read(f.file); | 507 | loff_t pos = file_pos_read(f.file); |
479 | ret = vfs_read(f.file, buf, count, &pos); | 508 | ret = vfs_read(f.file, buf, count, &pos); |
480 | file_pos_write(f.file, pos); | 509 | if (ret >= 0) |
510 | file_pos_write(f.file, pos); | ||
481 | fdput(f); | 511 | fdput(f); |
482 | } | 512 | } |
483 | return ret; | 513 | return ret; |
@@ -492,7 +522,8 @@ SYSCALL_DEFINE3(write, unsigned int, fd, const char __user *, buf, | |||
492 | if (f.file) { | 522 | if (f.file) { |
493 | loff_t pos = file_pos_read(f.file); | 523 | loff_t pos = file_pos_read(f.file); |
494 | ret = vfs_write(f.file, buf, count, &pos); | 524 | ret = vfs_write(f.file, buf, count, &pos); |
495 | file_pos_write(f.file, pos); | 525 | if (ret >= 0) |
526 | file_pos_write(f.file, pos); | ||
496 | fdput(f); | 527 | fdput(f); |
497 | } | 528 | } |
498 | 529 | ||
@@ -780,7 +811,8 @@ SYSCALL_DEFINE3(readv, unsigned long, fd, const struct iovec __user *, vec, | |||
780 | if (f.file) { | 811 | if (f.file) { |
781 | loff_t pos = file_pos_read(f.file); | 812 | loff_t pos = file_pos_read(f.file); |
782 | ret = vfs_readv(f.file, vec, vlen, &pos); | 813 | ret = vfs_readv(f.file, vec, vlen, &pos); |
783 | file_pos_write(f.file, pos); | 814 | if (ret >= 0) |
815 | file_pos_write(f.file, pos); | ||
784 | fdput(f); | 816 | fdput(f); |
785 | } | 817 | } |
786 | 818 | ||
@@ -799,7 +831,8 @@ SYSCALL_DEFINE3(writev, unsigned long, fd, const struct iovec __user *, vec, | |||
799 | if (f.file) { | 831 | if (f.file) { |
800 | loff_t pos = file_pos_read(f.file); | 832 | loff_t pos = file_pos_read(f.file); |
801 | ret = vfs_writev(f.file, vec, vlen, &pos); | 833 | ret = vfs_writev(f.file, vec, vlen, &pos); |
802 | file_pos_write(f.file, pos); | 834 | if (ret >= 0) |
835 | file_pos_write(f.file, pos); | ||
803 | fdput(f); | 836 | fdput(f); |
804 | } | 837 | } |
805 | 838 | ||
@@ -959,7 +992,8 @@ COMPAT_SYSCALL_DEFINE3(readv, unsigned long, fd, | |||
959 | return -EBADF; | 992 | return -EBADF; |
960 | pos = f.file->f_pos; | 993 | pos = f.file->f_pos; |
961 | ret = compat_readv(f.file, vec, vlen, &pos); | 994 | ret = compat_readv(f.file, vec, vlen, &pos); |
962 | f.file->f_pos = pos; | 995 | if (ret >= 0) |
996 | f.file->f_pos = pos; | ||
963 | fdput(f); | 997 | fdput(f); |
964 | return ret; | 998 | return ret; |
965 | } | 999 | } |
@@ -1025,7 +1059,8 @@ COMPAT_SYSCALL_DEFINE3(writev, unsigned long, fd, | |||
1025 | return -EBADF; | 1059 | return -EBADF; |
1026 | pos = f.file->f_pos; | 1060 | pos = f.file->f_pos; |
1027 | ret = compat_writev(f.file, vec, vlen, &pos); | 1061 | ret = compat_writev(f.file, vec, vlen, &pos); |
1028 | f.file->f_pos = pos; | 1062 | if (ret >= 0) |
1063 | f.file->f_pos = pos; | ||
1029 | fdput(f); | 1064 | fdput(f); |
1030 | return ret; | 1065 | return ret; |
1031 | } | 1066 | } |
@@ -1129,7 +1164,9 @@ static ssize_t do_sendfile(int out_fd, int in_fd, loff_t *ppos, | |||
1129 | if (in.file->f_flags & O_NONBLOCK) | 1164 | if (in.file->f_flags & O_NONBLOCK) |
1130 | fl = SPLICE_F_NONBLOCK; | 1165 | fl = SPLICE_F_NONBLOCK; |
1131 | #endif | 1166 | #endif |
1167 | file_start_write(out.file); | ||
1132 | retval = do_splice_direct(in.file, &pos, out.file, &out_pos, count, fl); | 1168 | retval = do_splice_direct(in.file, &pos, out.file, &out_pos, count, fl); |
1169 | file_end_write(out.file); | ||
1133 | 1170 | ||
1134 | if (retval > 0) { | 1171 | if (retval > 0) { |
1135 | add_rchar(current, retval); | 1172 | add_rchar(current, retval); |
diff --git a/fs/readdir.c b/fs/readdir.c index fee38e04fae4..93d71e574310 100644 --- a/fs/readdir.c +++ b/fs/readdir.c | |||
@@ -20,11 +20,11 @@ | |||
20 | 20 | ||
21 | #include <asm/uaccess.h> | 21 | #include <asm/uaccess.h> |
22 | 22 | ||
23 | int vfs_readdir(struct file *file, filldir_t filler, void *buf) | 23 | int iterate_dir(struct file *file, struct dir_context *ctx) |
24 | { | 24 | { |
25 | struct inode *inode = file_inode(file); | 25 | struct inode *inode = file_inode(file); |
26 | int res = -ENOTDIR; | 26 | int res = -ENOTDIR; |
27 | if (!file->f_op || !file->f_op->readdir) | 27 | if (!file->f_op || !file->f_op->iterate) |
28 | goto out; | 28 | goto out; |
29 | 29 | ||
30 | res = security_file_permission(file, MAY_READ); | 30 | res = security_file_permission(file, MAY_READ); |
@@ -37,15 +37,16 @@ int vfs_readdir(struct file *file, filldir_t filler, void *buf) | |||
37 | 37 | ||
38 | res = -ENOENT; | 38 | res = -ENOENT; |
39 | if (!IS_DEADDIR(inode)) { | 39 | if (!IS_DEADDIR(inode)) { |
40 | res = file->f_op->readdir(file, buf, filler); | 40 | ctx->pos = file->f_pos; |
41 | res = file->f_op->iterate(file, ctx); | ||
42 | file->f_pos = ctx->pos; | ||
41 | file_accessed(file); | 43 | file_accessed(file); |
42 | } | 44 | } |
43 | mutex_unlock(&inode->i_mutex); | 45 | mutex_unlock(&inode->i_mutex); |
44 | out: | 46 | out: |
45 | return res; | 47 | return res; |
46 | } | 48 | } |
47 | 49 | EXPORT_SYMBOL(iterate_dir); | |
48 | EXPORT_SYMBOL(vfs_readdir); | ||
49 | 50 | ||
50 | /* | 51 | /* |
51 | * Traditional linux readdir() handling.. | 52 | * Traditional linux readdir() handling.. |
@@ -66,6 +67,7 @@ struct old_linux_dirent { | |||
66 | }; | 67 | }; |
67 | 68 | ||
68 | struct readdir_callback { | 69 | struct readdir_callback { |
70 | struct dir_context ctx; | ||
69 | struct old_linux_dirent __user * dirent; | 71 | struct old_linux_dirent __user * dirent; |
70 | int result; | 72 | int result; |
71 | }; | 73 | }; |
@@ -73,7 +75,7 @@ struct readdir_callback { | |||
73 | static int fillonedir(void * __buf, const char * name, int namlen, loff_t offset, | 75 | static int fillonedir(void * __buf, const char * name, int namlen, loff_t offset, |
74 | u64 ino, unsigned int d_type) | 76 | u64 ino, unsigned int d_type) |
75 | { | 77 | { |
76 | struct readdir_callback * buf = (struct readdir_callback *) __buf; | 78 | struct readdir_callback *buf = (struct readdir_callback *) __buf; |
77 | struct old_linux_dirent __user * dirent; | 79 | struct old_linux_dirent __user * dirent; |
78 | unsigned long d_ino; | 80 | unsigned long d_ino; |
79 | 81 | ||
@@ -107,15 +109,15 @@ SYSCALL_DEFINE3(old_readdir, unsigned int, fd, | |||
107 | { | 109 | { |
108 | int error; | 110 | int error; |
109 | struct fd f = fdget(fd); | 111 | struct fd f = fdget(fd); |
110 | struct readdir_callback buf; | 112 | struct readdir_callback buf = { |
113 | .ctx.actor = fillonedir, | ||
114 | .dirent = dirent | ||
115 | }; | ||
111 | 116 | ||
112 | if (!f.file) | 117 | if (!f.file) |
113 | return -EBADF; | 118 | return -EBADF; |
114 | 119 | ||
115 | buf.result = 0; | 120 | error = iterate_dir(f.file, &buf.ctx); |
116 | buf.dirent = dirent; | ||
117 | |||
118 | error = vfs_readdir(f.file, fillonedir, &buf); | ||
119 | if (buf.result) | 121 | if (buf.result) |
120 | error = buf.result; | 122 | error = buf.result; |
121 | 123 | ||
@@ -137,6 +139,7 @@ struct linux_dirent { | |||
137 | }; | 139 | }; |
138 | 140 | ||
139 | struct getdents_callback { | 141 | struct getdents_callback { |
142 | struct dir_context ctx; | ||
140 | struct linux_dirent __user * current_dir; | 143 | struct linux_dirent __user * current_dir; |
141 | struct linux_dirent __user * previous; | 144 | struct linux_dirent __user * previous; |
142 | int count; | 145 | int count; |
@@ -191,7 +194,11 @@ SYSCALL_DEFINE3(getdents, unsigned int, fd, | |||
191 | { | 194 | { |
192 | struct fd f; | 195 | struct fd f; |
193 | struct linux_dirent __user * lastdirent; | 196 | struct linux_dirent __user * lastdirent; |
194 | struct getdents_callback buf; | 197 | struct getdents_callback buf = { |
198 | .ctx.actor = filldir, | ||
199 | .count = count, | ||
200 | .current_dir = dirent | ||
201 | }; | ||
195 | int error; | 202 | int error; |
196 | 203 | ||
197 | if (!access_ok(VERIFY_WRITE, dirent, count)) | 204 | if (!access_ok(VERIFY_WRITE, dirent, count)) |
@@ -201,17 +208,12 @@ SYSCALL_DEFINE3(getdents, unsigned int, fd, | |||
201 | if (!f.file) | 208 | if (!f.file) |
202 | return -EBADF; | 209 | return -EBADF; |
203 | 210 | ||
204 | buf.current_dir = dirent; | 211 | error = iterate_dir(f.file, &buf.ctx); |
205 | buf.previous = NULL; | ||
206 | buf.count = count; | ||
207 | buf.error = 0; | ||
208 | |||
209 | error = vfs_readdir(f.file, filldir, &buf); | ||
210 | if (error >= 0) | 212 | if (error >= 0) |
211 | error = buf.error; | 213 | error = buf.error; |
212 | lastdirent = buf.previous; | 214 | lastdirent = buf.previous; |
213 | if (lastdirent) { | 215 | if (lastdirent) { |
214 | if (put_user(f.file->f_pos, &lastdirent->d_off)) | 216 | if (put_user(buf.ctx.pos, &lastdirent->d_off)) |
215 | error = -EFAULT; | 217 | error = -EFAULT; |
216 | else | 218 | else |
217 | error = count - buf.count; | 219 | error = count - buf.count; |
@@ -221,6 +223,7 @@ SYSCALL_DEFINE3(getdents, unsigned int, fd, | |||
221 | } | 223 | } |
222 | 224 | ||
223 | struct getdents_callback64 { | 225 | struct getdents_callback64 { |
226 | struct dir_context ctx; | ||
224 | struct linux_dirent64 __user * current_dir; | 227 | struct linux_dirent64 __user * current_dir; |
225 | struct linux_dirent64 __user * previous; | 228 | struct linux_dirent64 __user * previous; |
226 | int count; | 229 | int count; |
@@ -271,7 +274,11 @@ SYSCALL_DEFINE3(getdents64, unsigned int, fd, | |||
271 | { | 274 | { |
272 | struct fd f; | 275 | struct fd f; |
273 | struct linux_dirent64 __user * lastdirent; | 276 | struct linux_dirent64 __user * lastdirent; |
274 | struct getdents_callback64 buf; | 277 | struct getdents_callback64 buf = { |
278 | .ctx.actor = filldir64, | ||
279 | .count = count, | ||
280 | .current_dir = dirent | ||
281 | }; | ||
275 | int error; | 282 | int error; |
276 | 283 | ||
277 | if (!access_ok(VERIFY_WRITE, dirent, count)) | 284 | if (!access_ok(VERIFY_WRITE, dirent, count)) |
@@ -281,17 +288,12 @@ SYSCALL_DEFINE3(getdents64, unsigned int, fd, | |||
281 | if (!f.file) | 288 | if (!f.file) |
282 | return -EBADF; | 289 | return -EBADF; |
283 | 290 | ||
284 | buf.current_dir = dirent; | 291 | error = iterate_dir(f.file, &buf.ctx); |
285 | buf.previous = NULL; | ||
286 | buf.count = count; | ||
287 | buf.error = 0; | ||
288 | |||
289 | error = vfs_readdir(f.file, filldir64, &buf); | ||
290 | if (error >= 0) | 292 | if (error >= 0) |
291 | error = buf.error; | 293 | error = buf.error; |
292 | lastdirent = buf.previous; | 294 | lastdirent = buf.previous; |
293 | if (lastdirent) { | 295 | if (lastdirent) { |
294 | typeof(lastdirent->d_off) d_off = f.file->f_pos; | 296 | typeof(lastdirent->d_off) d_off = buf.ctx.pos; |
295 | if (__put_user(d_off, &lastdirent->d_off)) | 297 | if (__put_user(d_off, &lastdirent->d_off)) |
296 | error = -EFAULT; | 298 | error = -EFAULT; |
297 | else | 299 | else |
diff --git a/fs/reiserfs/dir.c b/fs/reiserfs/dir.c index 6c2d136561cb..03e4ca5624d6 100644 --- a/fs/reiserfs/dir.c +++ b/fs/reiserfs/dir.c | |||
@@ -13,14 +13,14 @@ | |||
13 | 13 | ||
14 | extern const struct reiserfs_key MIN_KEY; | 14 | extern const struct reiserfs_key MIN_KEY; |
15 | 15 | ||
16 | static int reiserfs_readdir(struct file *, void *, filldir_t); | 16 | static int reiserfs_readdir(struct file *, struct dir_context *); |
17 | static int reiserfs_dir_fsync(struct file *filp, loff_t start, loff_t end, | 17 | static int reiserfs_dir_fsync(struct file *filp, loff_t start, loff_t end, |
18 | int datasync); | 18 | int datasync); |
19 | 19 | ||
20 | const struct file_operations reiserfs_dir_operations = { | 20 | const struct file_operations reiserfs_dir_operations = { |
21 | .llseek = generic_file_llseek, | 21 | .llseek = generic_file_llseek, |
22 | .read = generic_read_dir, | 22 | .read = generic_read_dir, |
23 | .readdir = reiserfs_readdir, | 23 | .iterate = reiserfs_readdir, |
24 | .fsync = reiserfs_dir_fsync, | 24 | .fsync = reiserfs_dir_fsync, |
25 | .unlocked_ioctl = reiserfs_ioctl, | 25 | .unlocked_ioctl = reiserfs_ioctl, |
26 | #ifdef CONFIG_COMPAT | 26 | #ifdef CONFIG_COMPAT |
@@ -50,18 +50,15 @@ static int reiserfs_dir_fsync(struct file *filp, loff_t start, loff_t end, | |||
50 | 50 | ||
51 | #define store_ih(where,what) copy_item_head (where, what) | 51 | #define store_ih(where,what) copy_item_head (where, what) |
52 | 52 | ||
53 | static inline bool is_privroot_deh(struct dentry *dir, | 53 | static inline bool is_privroot_deh(struct inode *dir, struct reiserfs_de_head *deh) |
54 | struct reiserfs_de_head *deh) | ||
55 | { | 54 | { |
56 | struct dentry *privroot = REISERFS_SB(dir->d_sb)->priv_root; | 55 | struct dentry *privroot = REISERFS_SB(dir->i_sb)->priv_root; |
57 | return (dir == dir->d_parent && privroot->d_inode && | 56 | return (privroot->d_inode && |
58 | deh->deh_objectid == INODE_PKEY(privroot->d_inode)->k_objectid); | 57 | deh->deh_objectid == INODE_PKEY(privroot->d_inode)->k_objectid); |
59 | } | 58 | } |
60 | 59 | ||
61 | int reiserfs_readdir_dentry(struct dentry *dentry, void *dirent, | 60 | int reiserfs_readdir_inode(struct inode *inode, struct dir_context *ctx) |
62 | filldir_t filldir, loff_t *pos) | ||
63 | { | 61 | { |
64 | struct inode *inode = dentry->d_inode; | ||
65 | struct cpu_key pos_key; /* key of current position in the directory (key of directory entry) */ | 62 | struct cpu_key pos_key; /* key of current position in the directory (key of directory entry) */ |
66 | INITIALIZE_PATH(path_to_entry); | 63 | INITIALIZE_PATH(path_to_entry); |
67 | struct buffer_head *bh; | 64 | struct buffer_head *bh; |
@@ -81,7 +78,7 @@ int reiserfs_readdir_dentry(struct dentry *dentry, void *dirent, | |||
81 | 78 | ||
82 | /* form key for search the next directory entry using f_pos field of | 79 | /* form key for search the next directory entry using f_pos field of |
83 | file structure */ | 80 | file structure */ |
84 | make_cpu_key(&pos_key, inode, *pos ?: DOT_OFFSET, TYPE_DIRENTRY, 3); | 81 | make_cpu_key(&pos_key, inode, ctx->pos ?: DOT_OFFSET, TYPE_DIRENTRY, 3); |
85 | next_pos = cpu_key_k_offset(&pos_key); | 82 | next_pos = cpu_key_k_offset(&pos_key); |
86 | 83 | ||
87 | path_to_entry.reada = PATH_READA; | 84 | path_to_entry.reada = PATH_READA; |
@@ -126,7 +123,6 @@ int reiserfs_readdir_dentry(struct dentry *dentry, void *dirent, | |||
126 | entry_num++, deh++) { | 123 | entry_num++, deh++) { |
127 | int d_reclen; | 124 | int d_reclen; |
128 | char *d_name; | 125 | char *d_name; |
129 | off_t d_off; | ||
130 | ino_t d_ino; | 126 | ino_t d_ino; |
131 | 127 | ||
132 | if (!de_visible(deh)) | 128 | if (!de_visible(deh)) |
@@ -155,11 +151,10 @@ int reiserfs_readdir_dentry(struct dentry *dentry, void *dirent, | |||
155 | } | 151 | } |
156 | 152 | ||
157 | /* Ignore the .reiserfs_priv entry */ | 153 | /* Ignore the .reiserfs_priv entry */ |
158 | if (is_privroot_deh(dentry, deh)) | 154 | if (is_privroot_deh(inode, deh)) |
159 | continue; | 155 | continue; |
160 | 156 | ||
161 | d_off = deh_offset(deh); | 157 | ctx->pos = deh_offset(deh); |
162 | *pos = d_off; | ||
163 | d_ino = deh_objectid(deh); | 158 | d_ino = deh_objectid(deh); |
164 | if (d_reclen <= 32) { | 159 | if (d_reclen <= 32) { |
165 | local_buf = small_buf; | 160 | local_buf = small_buf; |
@@ -187,9 +182,9 @@ int reiserfs_readdir_dentry(struct dentry *dentry, void *dirent, | |||
187 | * the write lock here for other waiters | 182 | * the write lock here for other waiters |
188 | */ | 183 | */ |
189 | reiserfs_write_unlock(inode->i_sb); | 184 | reiserfs_write_unlock(inode->i_sb); |
190 | if (filldir | 185 | if (!dir_emit |
191 | (dirent, local_buf, d_reclen, d_off, d_ino, | 186 | (ctx, local_buf, d_reclen, d_ino, |
192 | DT_UNKNOWN) < 0) { | 187 | DT_UNKNOWN)) { |
193 | reiserfs_write_lock(inode->i_sb); | 188 | reiserfs_write_lock(inode->i_sb); |
194 | if (local_buf != small_buf) { | 189 | if (local_buf != small_buf) { |
195 | kfree(local_buf); | 190 | kfree(local_buf); |
@@ -237,7 +232,7 @@ int reiserfs_readdir_dentry(struct dentry *dentry, void *dirent, | |||
237 | } /* while */ | 232 | } /* while */ |
238 | 233 | ||
239 | end: | 234 | end: |
240 | *pos = next_pos; | 235 | ctx->pos = next_pos; |
241 | pathrelse(&path_to_entry); | 236 | pathrelse(&path_to_entry); |
242 | reiserfs_check_path(&path_to_entry); | 237 | reiserfs_check_path(&path_to_entry); |
243 | out: | 238 | out: |
@@ -245,10 +240,9 @@ out: | |||
245 | return ret; | 240 | return ret; |
246 | } | 241 | } |
247 | 242 | ||
248 | static int reiserfs_readdir(struct file *file, void *dirent, filldir_t filldir) | 243 | static int reiserfs_readdir(struct file *file, struct dir_context *ctx) |
249 | { | 244 | { |
250 | struct dentry *dentry = file->f_path.dentry; | 245 | return reiserfs_readdir_inode(file_inode(file), ctx); |
251 | return reiserfs_readdir_dentry(dentry, dirent, filldir, &file->f_pos); | ||
252 | } | 246 | } |
253 | 247 | ||
254 | /* compose directory item containing "." and ".." entries (entries are | 248 | /* compose directory item containing "." and ".." entries (entries are |
diff --git a/fs/reiserfs/inode.c b/fs/reiserfs/inode.c index f844533792ee..0048cc16a6a8 100644 --- a/fs/reiserfs/inode.c +++ b/fs/reiserfs/inode.c | |||
@@ -2975,16 +2975,19 @@ static int invalidatepage_can_drop(struct inode *inode, struct buffer_head *bh) | |||
2975 | } | 2975 | } |
2976 | 2976 | ||
2977 | /* clm -- taken from fs/buffer.c:block_invalidate_page */ | 2977 | /* clm -- taken from fs/buffer.c:block_invalidate_page */ |
2978 | static void reiserfs_invalidatepage(struct page *page, unsigned long offset) | 2978 | static void reiserfs_invalidatepage(struct page *page, unsigned int offset, |
2979 | unsigned int length) | ||
2979 | { | 2980 | { |
2980 | struct buffer_head *head, *bh, *next; | 2981 | struct buffer_head *head, *bh, *next; |
2981 | struct inode *inode = page->mapping->host; | 2982 | struct inode *inode = page->mapping->host; |
2982 | unsigned int curr_off = 0; | 2983 | unsigned int curr_off = 0; |
2984 | unsigned int stop = offset + length; | ||
2985 | int partial_page = (offset || length < PAGE_CACHE_SIZE); | ||
2983 | int ret = 1; | 2986 | int ret = 1; |
2984 | 2987 | ||
2985 | BUG_ON(!PageLocked(page)); | 2988 | BUG_ON(!PageLocked(page)); |
2986 | 2989 | ||
2987 | if (offset == 0) | 2990 | if (!partial_page) |
2988 | ClearPageChecked(page); | 2991 | ClearPageChecked(page); |
2989 | 2992 | ||
2990 | if (!page_has_buffers(page)) | 2993 | if (!page_has_buffers(page)) |
@@ -2996,6 +2999,9 @@ static void reiserfs_invalidatepage(struct page *page, unsigned long offset) | |||
2996 | unsigned int next_off = curr_off + bh->b_size; | 2999 | unsigned int next_off = curr_off + bh->b_size; |
2997 | next = bh->b_this_page; | 3000 | next = bh->b_this_page; |
2998 | 3001 | ||
3002 | if (next_off > stop) | ||
3003 | goto out; | ||
3004 | |||
2999 | /* | 3005 | /* |
3000 | * is this block fully invalidated? | 3006 | * is this block fully invalidated? |
3001 | */ | 3007 | */ |
@@ -3014,7 +3020,7 @@ static void reiserfs_invalidatepage(struct page *page, unsigned long offset) | |||
3014 | * The get_block cached value has been unconditionally invalidated, | 3020 | * The get_block cached value has been unconditionally invalidated, |
3015 | * so real IO is not possible anymore. | 3021 | * so real IO is not possible anymore. |
3016 | */ | 3022 | */ |
3017 | if (!offset && ret) { | 3023 | if (!partial_page && ret) { |
3018 | ret = try_to_release_page(page, 0); | 3024 | ret = try_to_release_page(page, 0); |
3019 | /* maybe should BUG_ON(!ret); - neilb */ | 3025 | /* maybe should BUG_ON(!ret); - neilb */ |
3020 | } | 3026 | } |
diff --git a/fs/reiserfs/procfs.c b/fs/reiserfs/procfs.c index 33532f79b4f7..a958444a75fc 100644 --- a/fs/reiserfs/procfs.c +++ b/fs/reiserfs/procfs.c | |||
@@ -19,12 +19,13 @@ | |||
19 | /* | 19 | /* |
20 | * LOCKING: | 20 | * LOCKING: |
21 | * | 21 | * |
22 | * We rely on new Alexander Viro's super-block locking. | 22 | * These guys are evicted from procfs as the very first step in ->kill_sb(). |
23 | * | 23 | * |
24 | */ | 24 | */ |
25 | 25 | ||
26 | static int show_version(struct seq_file *m, struct super_block *sb) | 26 | static int show_version(struct seq_file *m, void *unused) |
27 | { | 27 | { |
28 | struct super_block *sb = m->private; | ||
28 | char *format; | 29 | char *format; |
29 | 30 | ||
30 | if (REISERFS_SB(sb)->s_properties & (1 << REISERFS_3_6)) { | 31 | if (REISERFS_SB(sb)->s_properties & (1 << REISERFS_3_6)) { |
@@ -66,8 +67,9 @@ static int show_version(struct seq_file *m, struct super_block *sb) | |||
66 | #define DJP( x ) le32_to_cpu( jp -> x ) | 67 | #define DJP( x ) le32_to_cpu( jp -> x ) |
67 | #define JF( x ) ( r -> s_journal -> x ) | 68 | #define JF( x ) ( r -> s_journal -> x ) |
68 | 69 | ||
69 | static int show_super(struct seq_file *m, struct super_block *sb) | 70 | static int show_super(struct seq_file *m, void *unused) |
70 | { | 71 | { |
72 | struct super_block *sb = m->private; | ||
71 | struct reiserfs_sb_info *r = REISERFS_SB(sb); | 73 | struct reiserfs_sb_info *r = REISERFS_SB(sb); |
72 | 74 | ||
73 | seq_printf(m, "state: \t%s\n" | 75 | seq_printf(m, "state: \t%s\n" |
@@ -128,8 +130,9 @@ static int show_super(struct seq_file *m, struct super_block *sb) | |||
128 | return 0; | 130 | return 0; |
129 | } | 131 | } |
130 | 132 | ||
131 | static int show_per_level(struct seq_file *m, struct super_block *sb) | 133 | static int show_per_level(struct seq_file *m, void *unused) |
132 | { | 134 | { |
135 | struct super_block *sb = m->private; | ||
133 | struct reiserfs_sb_info *r = REISERFS_SB(sb); | 136 | struct reiserfs_sb_info *r = REISERFS_SB(sb); |
134 | int level; | 137 | int level; |
135 | 138 | ||
@@ -186,8 +189,9 @@ static int show_per_level(struct seq_file *m, struct super_block *sb) | |||
186 | return 0; | 189 | return 0; |
187 | } | 190 | } |
188 | 191 | ||
189 | static int show_bitmap(struct seq_file *m, struct super_block *sb) | 192 | static int show_bitmap(struct seq_file *m, void *unused) |
190 | { | 193 | { |
194 | struct super_block *sb = m->private; | ||
191 | struct reiserfs_sb_info *r = REISERFS_SB(sb); | 195 | struct reiserfs_sb_info *r = REISERFS_SB(sb); |
192 | 196 | ||
193 | seq_printf(m, "free_block: %lu\n" | 197 | seq_printf(m, "free_block: %lu\n" |
@@ -218,8 +222,9 @@ static int show_bitmap(struct seq_file *m, struct super_block *sb) | |||
218 | return 0; | 222 | return 0; |
219 | } | 223 | } |
220 | 224 | ||
221 | static int show_on_disk_super(struct seq_file *m, struct super_block *sb) | 225 | static int show_on_disk_super(struct seq_file *m, void *unused) |
222 | { | 226 | { |
227 | struct super_block *sb = m->private; | ||
223 | struct reiserfs_sb_info *sb_info = REISERFS_SB(sb); | 228 | struct reiserfs_sb_info *sb_info = REISERFS_SB(sb); |
224 | struct reiserfs_super_block *rs = sb_info->s_rs; | 229 | struct reiserfs_super_block *rs = sb_info->s_rs; |
225 | int hash_code = DFL(s_hash_function_code); | 230 | int hash_code = DFL(s_hash_function_code); |
@@ -261,8 +266,9 @@ static int show_on_disk_super(struct seq_file *m, struct super_block *sb) | |||
261 | return 0; | 266 | return 0; |
262 | } | 267 | } |
263 | 268 | ||
264 | static int show_oidmap(struct seq_file *m, struct super_block *sb) | 269 | static int show_oidmap(struct seq_file *m, void *unused) |
265 | { | 270 | { |
271 | struct super_block *sb = m->private; | ||
266 | struct reiserfs_sb_info *sb_info = REISERFS_SB(sb); | 272 | struct reiserfs_sb_info *sb_info = REISERFS_SB(sb); |
267 | struct reiserfs_super_block *rs = sb_info->s_rs; | 273 | struct reiserfs_super_block *rs = sb_info->s_rs; |
268 | unsigned int mapsize = le16_to_cpu(rs->s_v1.s_oid_cursize); | 274 | unsigned int mapsize = le16_to_cpu(rs->s_v1.s_oid_cursize); |
@@ -291,8 +297,9 @@ static int show_oidmap(struct seq_file *m, struct super_block *sb) | |||
291 | return 0; | 297 | return 0; |
292 | } | 298 | } |
293 | 299 | ||
294 | static int show_journal(struct seq_file *m, struct super_block *sb) | 300 | static int show_journal(struct seq_file *m, void *unused) |
295 | { | 301 | { |
302 | struct super_block *sb = m->private; | ||
296 | struct reiserfs_sb_info *r = REISERFS_SB(sb); | 303 | struct reiserfs_sb_info *r = REISERFS_SB(sb); |
297 | struct reiserfs_super_block *rs = r->s_rs; | 304 | struct reiserfs_super_block *rs = r->s_rs; |
298 | struct journal_params *jp = &rs->s_v1.s_journal; | 305 | struct journal_params *jp = &rs->s_v1.s_journal; |
@@ -383,92 +390,24 @@ static int show_journal(struct seq_file *m, struct super_block *sb) | |||
383 | return 0; | 390 | return 0; |
384 | } | 391 | } |
385 | 392 | ||
386 | /* iterator */ | ||
387 | static int test_sb(struct super_block *sb, void *data) | ||
388 | { | ||
389 | return data == sb; | ||
390 | } | ||
391 | |||
392 | static int set_sb(struct super_block *sb, void *data) | ||
393 | { | ||
394 | return -ENOENT; | ||
395 | } | ||
396 | |||
397 | struct reiserfs_seq_private { | ||
398 | struct super_block *sb; | ||
399 | int (*show) (struct seq_file *, struct super_block *); | ||
400 | }; | ||
401 | |||
402 | static void *r_start(struct seq_file *m, loff_t * pos) | ||
403 | { | ||
404 | struct reiserfs_seq_private *priv = m->private; | ||
405 | loff_t l = *pos; | ||
406 | |||
407 | if (l) | ||
408 | return NULL; | ||
409 | |||
410 | if (IS_ERR(sget(&reiserfs_fs_type, test_sb, set_sb, 0, priv->sb))) | ||
411 | return NULL; | ||
412 | |||
413 | up_write(&priv->sb->s_umount); | ||
414 | return priv->sb; | ||
415 | } | ||
416 | |||
417 | static void *r_next(struct seq_file *m, void *v, loff_t * pos) | ||
418 | { | ||
419 | ++*pos; | ||
420 | if (v) | ||
421 | deactivate_super(v); | ||
422 | return NULL; | ||
423 | } | ||
424 | |||
425 | static void r_stop(struct seq_file *m, void *v) | ||
426 | { | ||
427 | if (v) | ||
428 | deactivate_super(v); | ||
429 | } | ||
430 | |||
431 | static int r_show(struct seq_file *m, void *v) | ||
432 | { | ||
433 | struct reiserfs_seq_private *priv = m->private; | ||
434 | return priv->show(m, v); | ||
435 | } | ||
436 | |||
437 | static const struct seq_operations r_ops = { | ||
438 | .start = r_start, | ||
439 | .next = r_next, | ||
440 | .stop = r_stop, | ||
441 | .show = r_show, | ||
442 | }; | ||
443 | |||
444 | static int r_open(struct inode *inode, struct file *file) | 393 | static int r_open(struct inode *inode, struct file *file) |
445 | { | 394 | { |
446 | struct reiserfs_seq_private *priv; | 395 | return single_open(file, PDE_DATA(inode), |
447 | int ret = seq_open_private(file, &r_ops, | 396 | proc_get_parent_data(inode)); |
448 | sizeof(struct reiserfs_seq_private)); | ||
449 | |||
450 | if (!ret) { | ||
451 | struct seq_file *m = file->private_data; | ||
452 | priv = m->private; | ||
453 | priv->sb = proc_get_parent_data(inode); | ||
454 | priv->show = PDE_DATA(inode); | ||
455 | } | ||
456 | return ret; | ||
457 | } | 397 | } |
458 | 398 | ||
459 | static const struct file_operations r_file_operations = { | 399 | static const struct file_operations r_file_operations = { |
460 | .open = r_open, | 400 | .open = r_open, |
461 | .read = seq_read, | 401 | .read = seq_read, |
462 | .llseek = seq_lseek, | 402 | .llseek = seq_lseek, |
463 | .release = seq_release_private, | 403 | .release = single_release, |
464 | .owner = THIS_MODULE, | ||
465 | }; | 404 | }; |
466 | 405 | ||
467 | static struct proc_dir_entry *proc_info_root = NULL; | 406 | static struct proc_dir_entry *proc_info_root = NULL; |
468 | static const char proc_info_root_name[] = "fs/reiserfs"; | 407 | static const char proc_info_root_name[] = "fs/reiserfs"; |
469 | 408 | ||
470 | static void add_file(struct super_block *sb, char *name, | 409 | static void add_file(struct super_block *sb, char *name, |
471 | int (*func) (struct seq_file *, struct super_block *)) | 410 | int (*func) (struct seq_file *, void *)) |
472 | { | 411 | { |
473 | proc_create_data(name, 0, REISERFS_SB(sb)->procdir, | 412 | proc_create_data(name, 0, REISERFS_SB(sb)->procdir, |
474 | &r_file_operations, func); | 413 | &r_file_operations, func); |
diff --git a/fs/reiserfs/reiserfs.h b/fs/reiserfs/reiserfs.h index 157e474ab303..3df5ce6c724d 100644 --- a/fs/reiserfs/reiserfs.h +++ b/fs/reiserfs/reiserfs.h | |||
@@ -2709,7 +2709,7 @@ extern const struct inode_operations reiserfs_dir_inode_operations; | |||
2709 | extern const struct inode_operations reiserfs_symlink_inode_operations; | 2709 | extern const struct inode_operations reiserfs_symlink_inode_operations; |
2710 | extern const struct inode_operations reiserfs_special_inode_operations; | 2710 | extern const struct inode_operations reiserfs_special_inode_operations; |
2711 | extern const struct file_operations reiserfs_dir_operations; | 2711 | extern const struct file_operations reiserfs_dir_operations; |
2712 | int reiserfs_readdir_dentry(struct dentry *, void *, filldir_t, loff_t *); | 2712 | int reiserfs_readdir_inode(struct inode *, struct dir_context *); |
2713 | 2713 | ||
2714 | /* tail_conversion.c */ | 2714 | /* tail_conversion.c */ |
2715 | int direct2indirect(struct reiserfs_transaction_handle *, struct inode *, | 2715 | int direct2indirect(struct reiserfs_transaction_handle *, struct inode *, |
diff --git a/fs/reiserfs/super.c b/fs/reiserfs/super.c index f8a23c3078f8..e2e202a07b31 100644 --- a/fs/reiserfs/super.c +++ b/fs/reiserfs/super.c | |||
@@ -499,6 +499,7 @@ int remove_save_link(struct inode *inode, int truncate) | |||
499 | static void reiserfs_kill_sb(struct super_block *s) | 499 | static void reiserfs_kill_sb(struct super_block *s) |
500 | { | 500 | { |
501 | if (REISERFS_SB(s)) { | 501 | if (REISERFS_SB(s)) { |
502 | reiserfs_proc_info_done(s); | ||
502 | /* | 503 | /* |
503 | * Force any pending inode evictions to occur now. Any | 504 | * Force any pending inode evictions to occur now. Any |
504 | * inodes to be removed that have extended attributes | 505 | * inodes to be removed that have extended attributes |
@@ -554,8 +555,6 @@ static void reiserfs_put_super(struct super_block *s) | |||
554 | REISERFS_SB(s)->reserved_blocks); | 555 | REISERFS_SB(s)->reserved_blocks); |
555 | } | 556 | } |
556 | 557 | ||
557 | reiserfs_proc_info_done(s); | ||
558 | |||
559 | reiserfs_write_unlock(s); | 558 | reiserfs_write_unlock(s); |
560 | mutex_destroy(&REISERFS_SB(s)->lock); | 559 | mutex_destroy(&REISERFS_SB(s)->lock); |
561 | kfree(s->s_fs_info); | 560 | kfree(s->s_fs_info); |
diff --git a/fs/reiserfs/xattr.c b/fs/reiserfs/xattr.c index 821bcf70e467..c69cdd749f09 100644 --- a/fs/reiserfs/xattr.c +++ b/fs/reiserfs/xattr.c | |||
@@ -171,6 +171,7 @@ static struct dentry *open_xa_dir(const struct inode *inode, int flags) | |||
171 | * modifying extended attributes. This includes operations such as permissions | 171 | * modifying extended attributes. This includes operations such as permissions |
172 | * or ownership changes, object deletions, etc. */ | 172 | * or ownership changes, object deletions, etc. */ |
173 | struct reiserfs_dentry_buf { | 173 | struct reiserfs_dentry_buf { |
174 | struct dir_context ctx; | ||
174 | struct dentry *xadir; | 175 | struct dentry *xadir; |
175 | int count; | 176 | int count; |
176 | struct dentry *dentries[8]; | 177 | struct dentry *dentries[8]; |
@@ -223,9 +224,8 @@ static int reiserfs_for_each_xattr(struct inode *inode, | |||
223 | { | 224 | { |
224 | struct dentry *dir; | 225 | struct dentry *dir; |
225 | int i, err = 0; | 226 | int i, err = 0; |
226 | loff_t pos = 0; | ||
227 | struct reiserfs_dentry_buf buf = { | 227 | struct reiserfs_dentry_buf buf = { |
228 | .count = 0, | 228 | .ctx.actor = fill_with_dentries, |
229 | }; | 229 | }; |
230 | 230 | ||
231 | /* Skip out, an xattr has no xattrs associated with it */ | 231 | /* Skip out, an xattr has no xattrs associated with it */ |
@@ -249,29 +249,27 @@ static int reiserfs_for_each_xattr(struct inode *inode, | |||
249 | reiserfs_write_lock(inode->i_sb); | 249 | reiserfs_write_lock(inode->i_sb); |
250 | 250 | ||
251 | buf.xadir = dir; | 251 | buf.xadir = dir; |
252 | err = reiserfs_readdir_dentry(dir, &buf, fill_with_dentries, &pos); | 252 | while (1) { |
253 | while ((err == 0 || err == -ENOSPC) && buf.count) { | 253 | err = reiserfs_readdir_inode(dir->d_inode, &buf.ctx); |
254 | err = 0; | 254 | if (err) |
255 | 255 | break; | |
256 | for (i = 0; i < buf.count && buf.dentries[i]; i++) { | 256 | if (!buf.count) |
257 | int lerr = 0; | 257 | break; |
258 | for (i = 0; !err && i < buf.count && buf.dentries[i]; i++) { | ||
258 | struct dentry *dentry = buf.dentries[i]; | 259 | struct dentry *dentry = buf.dentries[i]; |
259 | 260 | ||
260 | if (err == 0 && !S_ISDIR(dentry->d_inode->i_mode)) | 261 | if (!S_ISDIR(dentry->d_inode->i_mode)) |
261 | lerr = action(dentry, data); | 262 | err = action(dentry, data); |
262 | 263 | ||
263 | dput(dentry); | 264 | dput(dentry); |
264 | buf.dentries[i] = NULL; | 265 | buf.dentries[i] = NULL; |
265 | err = lerr ?: err; | ||
266 | } | 266 | } |
267 | if (err) | ||
268 | break; | ||
267 | buf.count = 0; | 269 | buf.count = 0; |
268 | if (!err) | ||
269 | err = reiserfs_readdir_dentry(dir, &buf, | ||
270 | fill_with_dentries, &pos); | ||
271 | } | 270 | } |
272 | mutex_unlock(&dir->d_inode->i_mutex); | 271 | mutex_unlock(&dir->d_inode->i_mutex); |
273 | 272 | ||
274 | /* Clean up after a failed readdir */ | ||
275 | cleanup_dentry_buf(&buf); | 273 | cleanup_dentry_buf(&buf); |
276 | 274 | ||
277 | if (!err) { | 275 | if (!err) { |
@@ -800,6 +798,7 @@ int reiserfs_removexattr(struct dentry *dentry, const char *name) | |||
800 | } | 798 | } |
801 | 799 | ||
802 | struct listxattr_buf { | 800 | struct listxattr_buf { |
801 | struct dir_context ctx; | ||
803 | size_t size; | 802 | size_t size; |
804 | size_t pos; | 803 | size_t pos; |
805 | char *buf; | 804 | char *buf; |
@@ -845,8 +844,8 @@ ssize_t reiserfs_listxattr(struct dentry * dentry, char *buffer, size_t size) | |||
845 | { | 844 | { |
846 | struct dentry *dir; | 845 | struct dentry *dir; |
847 | int err = 0; | 846 | int err = 0; |
848 | loff_t pos = 0; | ||
849 | struct listxattr_buf buf = { | 847 | struct listxattr_buf buf = { |
848 | .ctx.actor = listxattr_filler, | ||
850 | .dentry = dentry, | 849 | .dentry = dentry, |
851 | .buf = buffer, | 850 | .buf = buffer, |
852 | .size = buffer ? size : 0, | 851 | .size = buffer ? size : 0, |
@@ -868,7 +867,7 @@ ssize_t reiserfs_listxattr(struct dentry * dentry, char *buffer, size_t size) | |||
868 | } | 867 | } |
869 | 868 | ||
870 | mutex_lock_nested(&dir->d_inode->i_mutex, I_MUTEX_XATTR); | 869 | mutex_lock_nested(&dir->d_inode->i_mutex, I_MUTEX_XATTR); |
871 | err = reiserfs_readdir_dentry(dir, &buf, listxattr_filler, &pos); | 870 | err = reiserfs_readdir_inode(dir->d_inode, &buf.ctx); |
872 | mutex_unlock(&dir->d_inode->i_mutex); | 871 | mutex_unlock(&dir->d_inode->i_mutex); |
873 | 872 | ||
874 | if (!err) | 873 | if (!err) |
diff --git a/fs/romfs/super.c b/fs/romfs/super.c index 15cbc41ee365..ff1d3d42e72a 100644 --- a/fs/romfs/super.c +++ b/fs/romfs/super.c | |||
@@ -145,19 +145,18 @@ static const struct address_space_operations romfs_aops = { | |||
145 | /* | 145 | /* |
146 | * read the entries from a directory | 146 | * read the entries from a directory |
147 | */ | 147 | */ |
148 | static int romfs_readdir(struct file *filp, void *dirent, filldir_t filldir) | 148 | static int romfs_readdir(struct file *file, struct dir_context *ctx) |
149 | { | 149 | { |
150 | struct inode *i = file_inode(filp); | 150 | struct inode *i = file_inode(file); |
151 | struct romfs_inode ri; | 151 | struct romfs_inode ri; |
152 | unsigned long offset, maxoff; | 152 | unsigned long offset, maxoff; |
153 | int j, ino, nextfh; | 153 | int j, ino, nextfh; |
154 | int stored = 0; | ||
155 | char fsname[ROMFS_MAXFN]; /* XXX dynamic? */ | 154 | char fsname[ROMFS_MAXFN]; /* XXX dynamic? */ |
156 | int ret; | 155 | int ret; |
157 | 156 | ||
158 | maxoff = romfs_maxsize(i->i_sb); | 157 | maxoff = romfs_maxsize(i->i_sb); |
159 | 158 | ||
160 | offset = filp->f_pos; | 159 | offset = ctx->pos; |
161 | if (!offset) { | 160 | if (!offset) { |
162 | offset = i->i_ino & ROMFH_MASK; | 161 | offset = i->i_ino & ROMFH_MASK; |
163 | ret = romfs_dev_read(i->i_sb, offset, &ri, ROMFH_SIZE); | 162 | ret = romfs_dev_read(i->i_sb, offset, &ri, ROMFH_SIZE); |
@@ -170,10 +169,10 @@ static int romfs_readdir(struct file *filp, void *dirent, filldir_t filldir) | |||
170 | for (;;) { | 169 | for (;;) { |
171 | if (!offset || offset >= maxoff) { | 170 | if (!offset || offset >= maxoff) { |
172 | offset = maxoff; | 171 | offset = maxoff; |
173 | filp->f_pos = offset; | 172 | ctx->pos = offset; |
174 | goto out; | 173 | goto out; |
175 | } | 174 | } |
176 | filp->f_pos = offset; | 175 | ctx->pos = offset; |
177 | 176 | ||
178 | /* Fetch inode info */ | 177 | /* Fetch inode info */ |
179 | ret = romfs_dev_read(i->i_sb, offset, &ri, ROMFH_SIZE); | 178 | ret = romfs_dev_read(i->i_sb, offset, &ri, ROMFH_SIZE); |
@@ -194,16 +193,14 @@ static int romfs_readdir(struct file *filp, void *dirent, filldir_t filldir) | |||
194 | nextfh = be32_to_cpu(ri.next); | 193 | nextfh = be32_to_cpu(ri.next); |
195 | if ((nextfh & ROMFH_TYPE) == ROMFH_HRD) | 194 | if ((nextfh & ROMFH_TYPE) == ROMFH_HRD) |
196 | ino = be32_to_cpu(ri.spec); | 195 | ino = be32_to_cpu(ri.spec); |
197 | if (filldir(dirent, fsname, j, offset, ino, | 196 | if (!dir_emit(ctx, fsname, j, ino, |
198 | romfs_dtype_table[nextfh & ROMFH_TYPE]) < 0) | 197 | romfs_dtype_table[nextfh & ROMFH_TYPE])) |
199 | goto out; | 198 | goto out; |
200 | 199 | ||
201 | stored++; | ||
202 | offset = nextfh & ROMFH_MASK; | 200 | offset = nextfh & ROMFH_MASK; |
203 | } | 201 | } |
204 | |||
205 | out: | 202 | out: |
206 | return stored; | 203 | return 0; |
207 | } | 204 | } |
208 | 205 | ||
209 | /* | 206 | /* |
@@ -281,7 +278,7 @@ error: | |||
281 | 278 | ||
282 | static const struct file_operations romfs_dir_operations = { | 279 | static const struct file_operations romfs_dir_operations = { |
283 | .read = generic_read_dir, | 280 | .read = generic_read_dir, |
284 | .readdir = romfs_readdir, | 281 | .iterate = romfs_readdir, |
285 | .llseek = default_llseek, | 282 | .llseek = default_llseek, |
286 | }; | 283 | }; |
287 | 284 | ||
diff --git a/fs/select.c b/fs/select.c index 8c1c96c27062..35d4adc749d9 100644 --- a/fs/select.c +++ b/fs/select.c | |||
@@ -27,6 +27,8 @@ | |||
27 | #include <linux/rcupdate.h> | 27 | #include <linux/rcupdate.h> |
28 | #include <linux/hrtimer.h> | 28 | #include <linux/hrtimer.h> |
29 | #include <linux/sched/rt.h> | 29 | #include <linux/sched/rt.h> |
30 | #include <linux/freezer.h> | ||
31 | #include <net/busy_poll.h> | ||
30 | 32 | ||
31 | #include <asm/uaccess.h> | 33 | #include <asm/uaccess.h> |
32 | 34 | ||
@@ -236,7 +238,8 @@ int poll_schedule_timeout(struct poll_wqueues *pwq, int state, | |||
236 | 238 | ||
237 | set_current_state(state); | 239 | set_current_state(state); |
238 | if (!pwq->triggered) | 240 | if (!pwq->triggered) |
239 | rc = schedule_hrtimeout_range(expires, slack, HRTIMER_MODE_ABS); | 241 | rc = freezable_schedule_hrtimeout_range(expires, slack, |
242 | HRTIMER_MODE_ABS); | ||
240 | __set_current_state(TASK_RUNNING); | 243 | __set_current_state(TASK_RUNNING); |
241 | 244 | ||
242 | /* | 245 | /* |
@@ -384,9 +387,10 @@ get_max: | |||
384 | #define POLLEX_SET (POLLPRI) | 387 | #define POLLEX_SET (POLLPRI) |
385 | 388 | ||
386 | static inline void wait_key_set(poll_table *wait, unsigned long in, | 389 | static inline void wait_key_set(poll_table *wait, unsigned long in, |
387 | unsigned long out, unsigned long bit) | 390 | unsigned long out, unsigned long bit, |
391 | unsigned int ll_flag) | ||
388 | { | 392 | { |
389 | wait->_key = POLLEX_SET; | 393 | wait->_key = POLLEX_SET | ll_flag; |
390 | if (in & bit) | 394 | if (in & bit) |
391 | wait->_key |= POLLIN_SET; | 395 | wait->_key |= POLLIN_SET; |
392 | if (out & bit) | 396 | if (out & bit) |
@@ -400,6 +404,8 @@ int do_select(int n, fd_set_bits *fds, struct timespec *end_time) | |||
400 | poll_table *wait; | 404 | poll_table *wait; |
401 | int retval, i, timed_out = 0; | 405 | int retval, i, timed_out = 0; |
402 | unsigned long slack = 0; | 406 | unsigned long slack = 0; |
407 | unsigned int busy_flag = net_busy_loop_on() ? POLL_BUSY_LOOP : 0; | ||
408 | unsigned long busy_end = 0; | ||
403 | 409 | ||
404 | rcu_read_lock(); | 410 | rcu_read_lock(); |
405 | retval = max_select_fd(n, fds); | 411 | retval = max_select_fd(n, fds); |
@@ -422,6 +428,7 @@ int do_select(int n, fd_set_bits *fds, struct timespec *end_time) | |||
422 | retval = 0; | 428 | retval = 0; |
423 | for (;;) { | 429 | for (;;) { |
424 | unsigned long *rinp, *routp, *rexp, *inp, *outp, *exp; | 430 | unsigned long *rinp, *routp, *rexp, *inp, *outp, *exp; |
431 | bool can_busy_loop = false; | ||
425 | 432 | ||
426 | inp = fds->in; outp = fds->out; exp = fds->ex; | 433 | inp = fds->in; outp = fds->out; exp = fds->ex; |
427 | rinp = fds->res_in; routp = fds->res_out; rexp = fds->res_ex; | 434 | rinp = fds->res_in; routp = fds->res_out; rexp = fds->res_ex; |
@@ -449,7 +456,8 @@ int do_select(int n, fd_set_bits *fds, struct timespec *end_time) | |||
449 | f_op = f.file->f_op; | 456 | f_op = f.file->f_op; |
450 | mask = DEFAULT_POLLMASK; | 457 | mask = DEFAULT_POLLMASK; |
451 | if (f_op && f_op->poll) { | 458 | if (f_op && f_op->poll) { |
452 | wait_key_set(wait, in, out, bit); | 459 | wait_key_set(wait, in, out, |
460 | bit, busy_flag); | ||
453 | mask = (*f_op->poll)(f.file, wait); | 461 | mask = (*f_op->poll)(f.file, wait); |
454 | } | 462 | } |
455 | fdput(f); | 463 | fdput(f); |
@@ -468,6 +476,18 @@ int do_select(int n, fd_set_bits *fds, struct timespec *end_time) | |||
468 | retval++; | 476 | retval++; |
469 | wait->_qproc = NULL; | 477 | wait->_qproc = NULL; |
470 | } | 478 | } |
479 | /* got something, stop busy polling */ | ||
480 | if (retval) { | ||
481 | can_busy_loop = false; | ||
482 | busy_flag = 0; | ||
483 | |||
484 | /* | ||
485 | * only remember a returned | ||
486 | * POLL_BUSY_LOOP if we asked for it | ||
487 | */ | ||
488 | } else if (busy_flag & mask) | ||
489 | can_busy_loop = true; | ||
490 | |||
471 | } | 491 | } |
472 | } | 492 | } |
473 | if (res_in) | 493 | if (res_in) |
@@ -486,6 +506,17 @@ int do_select(int n, fd_set_bits *fds, struct timespec *end_time) | |||
486 | break; | 506 | break; |
487 | } | 507 | } |
488 | 508 | ||
509 | /* only if found POLL_BUSY_LOOP sockets && not out of time */ | ||
510 | if (can_busy_loop && !need_resched()) { | ||
511 | if (!busy_end) { | ||
512 | busy_end = busy_loop_end_time(); | ||
513 | continue; | ||
514 | } | ||
515 | if (!busy_loop_timeout(busy_end)) | ||
516 | continue; | ||
517 | } | ||
518 | busy_flag = 0; | ||
519 | |||
489 | /* | 520 | /* |
490 | * If this is the first loop and we have a timeout | 521 | * If this is the first loop and we have a timeout |
491 | * given, then we convert to ktime_t and set the to | 522 | * given, then we convert to ktime_t and set the to |
@@ -717,7 +748,9 @@ struct poll_list { | |||
717 | * pwait poll_table will be used by the fd-provided poll handler for waiting, | 748 | * pwait poll_table will be used by the fd-provided poll handler for waiting, |
718 | * if pwait->_qproc is non-NULL. | 749 | * if pwait->_qproc is non-NULL. |
719 | */ | 750 | */ |
720 | static inline unsigned int do_pollfd(struct pollfd *pollfd, poll_table *pwait) | 751 | static inline unsigned int do_pollfd(struct pollfd *pollfd, poll_table *pwait, |
752 | bool *can_busy_poll, | ||
753 | unsigned int busy_flag) | ||
721 | { | 754 | { |
722 | unsigned int mask; | 755 | unsigned int mask; |
723 | int fd; | 756 | int fd; |
@@ -731,7 +764,10 @@ static inline unsigned int do_pollfd(struct pollfd *pollfd, poll_table *pwait) | |||
731 | mask = DEFAULT_POLLMASK; | 764 | mask = DEFAULT_POLLMASK; |
732 | if (f.file->f_op && f.file->f_op->poll) { | 765 | if (f.file->f_op && f.file->f_op->poll) { |
733 | pwait->_key = pollfd->events|POLLERR|POLLHUP; | 766 | pwait->_key = pollfd->events|POLLERR|POLLHUP; |
767 | pwait->_key |= busy_flag; | ||
734 | mask = f.file->f_op->poll(f.file, pwait); | 768 | mask = f.file->f_op->poll(f.file, pwait); |
769 | if (mask & busy_flag) | ||
770 | *can_busy_poll = true; | ||
735 | } | 771 | } |
736 | /* Mask out unneeded events. */ | 772 | /* Mask out unneeded events. */ |
737 | mask &= pollfd->events | POLLERR | POLLHUP; | 773 | mask &= pollfd->events | POLLERR | POLLHUP; |
@@ -750,6 +786,8 @@ static int do_poll(unsigned int nfds, struct poll_list *list, | |||
750 | ktime_t expire, *to = NULL; | 786 | ktime_t expire, *to = NULL; |
751 | int timed_out = 0, count = 0; | 787 | int timed_out = 0, count = 0; |
752 | unsigned long slack = 0; | 788 | unsigned long slack = 0; |
789 | unsigned int busy_flag = net_busy_loop_on() ? POLL_BUSY_LOOP : 0; | ||
790 | unsigned long busy_end = 0; | ||
753 | 791 | ||
754 | /* Optimise the no-wait case */ | 792 | /* Optimise the no-wait case */ |
755 | if (end_time && !end_time->tv_sec && !end_time->tv_nsec) { | 793 | if (end_time && !end_time->tv_sec && !end_time->tv_nsec) { |
@@ -762,6 +800,7 @@ static int do_poll(unsigned int nfds, struct poll_list *list, | |||
762 | 800 | ||
763 | for (;;) { | 801 | for (;;) { |
764 | struct poll_list *walk; | 802 | struct poll_list *walk; |
803 | bool can_busy_loop = false; | ||
765 | 804 | ||
766 | for (walk = list; walk != NULL; walk = walk->next) { | 805 | for (walk = list; walk != NULL; walk = walk->next) { |
767 | struct pollfd * pfd, * pfd_end; | 806 | struct pollfd * pfd, * pfd_end; |
@@ -776,9 +815,13 @@ static int do_poll(unsigned int nfds, struct poll_list *list, | |||
776 | * this. They'll get immediately deregistered | 815 | * this. They'll get immediately deregistered |
777 | * when we break out and return. | 816 | * when we break out and return. |
778 | */ | 817 | */ |
779 | if (do_pollfd(pfd, pt)) { | 818 | if (do_pollfd(pfd, pt, &can_busy_loop, |
819 | busy_flag)) { | ||
780 | count++; | 820 | count++; |
781 | pt->_qproc = NULL; | 821 | pt->_qproc = NULL; |
822 | /* found something, stop busy polling */ | ||
823 | busy_flag = 0; | ||
824 | can_busy_loop = false; | ||
782 | } | 825 | } |
783 | } | 826 | } |
784 | } | 827 | } |
@@ -795,6 +838,17 @@ static int do_poll(unsigned int nfds, struct poll_list *list, | |||
795 | if (count || timed_out) | 838 | if (count || timed_out) |
796 | break; | 839 | break; |
797 | 840 | ||
841 | /* only if found POLL_BUSY_LOOP sockets && not out of time */ | ||
842 | if (can_busy_loop && !need_resched()) { | ||
843 | if (!busy_end) { | ||
844 | busy_end = busy_loop_end_time(); | ||
845 | continue; | ||
846 | } | ||
847 | if (!busy_loop_timeout(busy_end)) | ||
848 | continue; | ||
849 | } | ||
850 | busy_flag = 0; | ||
851 | |||
798 | /* | 852 | /* |
799 | * If this is the first loop and we have a timeout | 853 | * If this is the first loop and we have a timeout |
800 | * given, then we convert to ktime_t and set the to | 854 | * given, then we convert to ktime_t and set the to |
diff --git a/fs/seq_file.c b/fs/seq_file.c index 774c1eb7f1c9..3135c2525c76 100644 --- a/fs/seq_file.c +++ b/fs/seq_file.c | |||
@@ -921,3 +921,57 @@ struct hlist_node *seq_hlist_next_rcu(void *v, | |||
921 | return rcu_dereference(node->next); | 921 | return rcu_dereference(node->next); |
922 | } | 922 | } |
923 | EXPORT_SYMBOL(seq_hlist_next_rcu); | 923 | EXPORT_SYMBOL(seq_hlist_next_rcu); |
924 | |||
925 | /** | ||
926 | * seq_hlist_start_precpu - start an iteration of a percpu hlist array | ||
927 | * @head: pointer to percpu array of struct hlist_heads | ||
928 | * @cpu: pointer to cpu "cursor" | ||
929 | * @pos: start position of sequence | ||
930 | * | ||
931 | * Called at seq_file->op->start(). | ||
932 | */ | ||
933 | struct hlist_node * | ||
934 | seq_hlist_start_percpu(struct hlist_head __percpu *head, int *cpu, loff_t pos) | ||
935 | { | ||
936 | struct hlist_node *node; | ||
937 | |||
938 | for_each_possible_cpu(*cpu) { | ||
939 | hlist_for_each(node, per_cpu_ptr(head, *cpu)) { | ||
940 | if (pos-- == 0) | ||
941 | return node; | ||
942 | } | ||
943 | } | ||
944 | return NULL; | ||
945 | } | ||
946 | EXPORT_SYMBOL(seq_hlist_start_percpu); | ||
947 | |||
948 | /** | ||
949 | * seq_hlist_next_percpu - move to the next position of the percpu hlist array | ||
950 | * @v: pointer to current hlist_node | ||
951 | * @head: pointer to percpu array of struct hlist_heads | ||
952 | * @cpu: pointer to cpu "cursor" | ||
953 | * @pos: start position of sequence | ||
954 | * | ||
955 | * Called at seq_file->op->next(). | ||
956 | */ | ||
957 | struct hlist_node * | ||
958 | seq_hlist_next_percpu(void *v, struct hlist_head __percpu *head, | ||
959 | int *cpu, loff_t *pos) | ||
960 | { | ||
961 | struct hlist_node *node = v; | ||
962 | |||
963 | ++*pos; | ||
964 | |||
965 | if (node->next) | ||
966 | return node->next; | ||
967 | |||
968 | for (*cpu = cpumask_next(*cpu, cpu_possible_mask); *cpu < nr_cpu_ids; | ||
969 | *cpu = cpumask_next(*cpu, cpu_possible_mask)) { | ||
970 | struct hlist_head *bucket = per_cpu_ptr(head, *cpu); | ||
971 | |||
972 | if (!hlist_empty(bucket)) | ||
973 | return bucket->first; | ||
974 | } | ||
975 | return NULL; | ||
976 | } | ||
977 | EXPORT_SYMBOL(seq_hlist_next_percpu); | ||
diff --git a/fs/splice.c b/fs/splice.c index d37431dd60a1..3b7ee656f3aa 100644 --- a/fs/splice.c +++ b/fs/splice.c | |||
@@ -1098,27 +1098,13 @@ static long do_splice_from(struct pipe_inode_info *pipe, struct file *out, | |||
1098 | { | 1098 | { |
1099 | ssize_t (*splice_write)(struct pipe_inode_info *, struct file *, | 1099 | ssize_t (*splice_write)(struct pipe_inode_info *, struct file *, |
1100 | loff_t *, size_t, unsigned int); | 1100 | loff_t *, size_t, unsigned int); |
1101 | int ret; | ||
1102 | |||
1103 | if (unlikely(!(out->f_mode & FMODE_WRITE))) | ||
1104 | return -EBADF; | ||
1105 | |||
1106 | if (unlikely(out->f_flags & O_APPEND)) | ||
1107 | return -EINVAL; | ||
1108 | |||
1109 | ret = rw_verify_area(WRITE, out, ppos, len); | ||
1110 | if (unlikely(ret < 0)) | ||
1111 | return ret; | ||
1112 | 1101 | ||
1113 | if (out->f_op && out->f_op->splice_write) | 1102 | if (out->f_op && out->f_op->splice_write) |
1114 | splice_write = out->f_op->splice_write; | 1103 | splice_write = out->f_op->splice_write; |
1115 | else | 1104 | else |
1116 | splice_write = default_file_splice_write; | 1105 | splice_write = default_file_splice_write; |
1117 | 1106 | ||
1118 | file_start_write(out); | 1107 | return splice_write(pipe, out, ppos, len, flags); |
1119 | ret = splice_write(pipe, out, ppos, len, flags); | ||
1120 | file_end_write(out); | ||
1121 | return ret; | ||
1122 | } | 1108 | } |
1123 | 1109 | ||
1124 | /* | 1110 | /* |
@@ -1307,6 +1293,16 @@ long do_splice_direct(struct file *in, loff_t *ppos, struct file *out, | |||
1307 | }; | 1293 | }; |
1308 | long ret; | 1294 | long ret; |
1309 | 1295 | ||
1296 | if (unlikely(!(out->f_mode & FMODE_WRITE))) | ||
1297 | return -EBADF; | ||
1298 | |||
1299 | if (unlikely(out->f_flags & O_APPEND)) | ||
1300 | return -EINVAL; | ||
1301 | |||
1302 | ret = rw_verify_area(WRITE, out, opos, len); | ||
1303 | if (unlikely(ret < 0)) | ||
1304 | return ret; | ||
1305 | |||
1310 | ret = splice_direct_to_actor(in, &sd, direct_splice_actor); | 1306 | ret = splice_direct_to_actor(in, &sd, direct_splice_actor); |
1311 | if (ret > 0) | 1307 | if (ret > 0) |
1312 | *ppos = sd.pos; | 1308 | *ppos = sd.pos; |
@@ -1362,7 +1358,19 @@ static long do_splice(struct file *in, loff_t __user *off_in, | |||
1362 | offset = out->f_pos; | 1358 | offset = out->f_pos; |
1363 | } | 1359 | } |
1364 | 1360 | ||
1361 | if (unlikely(!(out->f_mode & FMODE_WRITE))) | ||
1362 | return -EBADF; | ||
1363 | |||
1364 | if (unlikely(out->f_flags & O_APPEND)) | ||
1365 | return -EINVAL; | ||
1366 | |||
1367 | ret = rw_verify_area(WRITE, out, &offset, len); | ||
1368 | if (unlikely(ret < 0)) | ||
1369 | return ret; | ||
1370 | |||
1371 | file_start_write(out); | ||
1365 | ret = do_splice_from(ipipe, out, &offset, len, flags); | 1372 | ret = do_splice_from(ipipe, out, &offset, len, flags); |
1373 | file_end_write(out); | ||
1366 | 1374 | ||
1367 | if (!off_out) | 1375 | if (!off_out) |
1368 | out->f_pos = offset; | 1376 | out->f_pos = offset; |
diff --git a/fs/squashfs/dir.c b/fs/squashfs/dir.c index 57dc70ebbb19..f7f527bf8c10 100644 --- a/fs/squashfs/dir.c +++ b/fs/squashfs/dir.c | |||
@@ -100,7 +100,7 @@ static int get_dir_index_using_offset(struct super_block *sb, | |||
100 | } | 100 | } |
101 | 101 | ||
102 | 102 | ||
103 | static int squashfs_readdir(struct file *file, void *dirent, filldir_t filldir) | 103 | static int squashfs_readdir(struct file *file, struct dir_context *ctx) |
104 | { | 104 | { |
105 | struct inode *inode = file_inode(file); | 105 | struct inode *inode = file_inode(file); |
106 | struct squashfs_sb_info *msblk = inode->i_sb->s_fs_info; | 106 | struct squashfs_sb_info *msblk = inode->i_sb->s_fs_info; |
@@ -127,11 +127,11 @@ static int squashfs_readdir(struct file *file, void *dirent, filldir_t filldir) | |||
127 | * It also means that the external f_pos is offset by 3 from the | 127 | * It also means that the external f_pos is offset by 3 from the |
128 | * on-disk directory f_pos. | 128 | * on-disk directory f_pos. |
129 | */ | 129 | */ |
130 | while (file->f_pos < 3) { | 130 | while (ctx->pos < 3) { |
131 | char *name; | 131 | char *name; |
132 | int i_ino; | 132 | int i_ino; |
133 | 133 | ||
134 | if (file->f_pos == 0) { | 134 | if (ctx->pos == 0) { |
135 | name = "."; | 135 | name = "."; |
136 | size = 1; | 136 | size = 1; |
137 | i_ino = inode->i_ino; | 137 | i_ino = inode->i_ino; |
@@ -141,24 +141,18 @@ static int squashfs_readdir(struct file *file, void *dirent, filldir_t filldir) | |||
141 | i_ino = squashfs_i(inode)->parent; | 141 | i_ino = squashfs_i(inode)->parent; |
142 | } | 142 | } |
143 | 143 | ||
144 | TRACE("Calling filldir(%p, %s, %d, %lld, %d, %d)\n", | 144 | if (!dir_emit(ctx, name, size, i_ino, |
145 | dirent, name, size, file->f_pos, i_ino, | 145 | squashfs_filetype_table[1])) |
146 | squashfs_filetype_table[1]); | ||
147 | |||
148 | if (filldir(dirent, name, size, file->f_pos, i_ino, | ||
149 | squashfs_filetype_table[1]) < 0) { | ||
150 | TRACE("Filldir returned less than 0\n"); | ||
151 | goto finish; | 146 | goto finish; |
152 | } | ||
153 | 147 | ||
154 | file->f_pos += size; | 148 | ctx->pos += size; |
155 | } | 149 | } |
156 | 150 | ||
157 | length = get_dir_index_using_offset(inode->i_sb, &block, &offset, | 151 | length = get_dir_index_using_offset(inode->i_sb, &block, &offset, |
158 | squashfs_i(inode)->dir_idx_start, | 152 | squashfs_i(inode)->dir_idx_start, |
159 | squashfs_i(inode)->dir_idx_offset, | 153 | squashfs_i(inode)->dir_idx_offset, |
160 | squashfs_i(inode)->dir_idx_cnt, | 154 | squashfs_i(inode)->dir_idx_cnt, |
161 | file->f_pos); | 155 | ctx->pos); |
162 | 156 | ||
163 | while (length < i_size_read(inode)) { | 157 | while (length < i_size_read(inode)) { |
164 | /* | 158 | /* |
@@ -198,7 +192,7 @@ static int squashfs_readdir(struct file *file, void *dirent, filldir_t filldir) | |||
198 | 192 | ||
199 | length += sizeof(*dire) + size; | 193 | length += sizeof(*dire) + size; |
200 | 194 | ||
201 | if (file->f_pos >= length) | 195 | if (ctx->pos >= length) |
202 | continue; | 196 | continue; |
203 | 197 | ||
204 | dire->name[size] = '\0'; | 198 | dire->name[size] = '\0'; |
@@ -206,22 +200,12 @@ static int squashfs_readdir(struct file *file, void *dirent, filldir_t filldir) | |||
206 | ((short) le16_to_cpu(dire->inode_number)); | 200 | ((short) le16_to_cpu(dire->inode_number)); |
207 | type = le16_to_cpu(dire->type); | 201 | type = le16_to_cpu(dire->type); |
208 | 202 | ||
209 | TRACE("Calling filldir(%p, %s, %d, %lld, %x:%x, %d, %d)" | 203 | if (!dir_emit(ctx, dire->name, size, |
210 | "\n", dirent, dire->name, size, | ||
211 | file->f_pos, | ||
212 | le32_to_cpu(dirh.start_block), | ||
213 | le16_to_cpu(dire->offset), | ||
214 | inode_number, | ||
215 | squashfs_filetype_table[type]); | ||
216 | |||
217 | if (filldir(dirent, dire->name, size, file->f_pos, | ||
218 | inode_number, | 204 | inode_number, |
219 | squashfs_filetype_table[type]) < 0) { | 205 | squashfs_filetype_table[type])) |
220 | TRACE("Filldir returned less than 0\n"); | ||
221 | goto finish; | 206 | goto finish; |
222 | } | ||
223 | 207 | ||
224 | file->f_pos = length; | 208 | ctx->pos = length; |
225 | } | 209 | } |
226 | } | 210 | } |
227 | 211 | ||
@@ -238,6 +222,6 @@ failed_read: | |||
238 | 222 | ||
239 | const struct file_operations squashfs_dir_ops = { | 223 | const struct file_operations squashfs_dir_ops = { |
240 | .read = generic_read_dir, | 224 | .read = generic_read_dir, |
241 | .readdir = squashfs_readdir, | 225 | .iterate = squashfs_readdir, |
242 | .llseek = default_llseek, | 226 | .llseek = default_llseek, |
243 | }; | 227 | }; |
diff --git a/fs/super.c b/fs/super.c index 7465d4364208..68307c029228 100644 --- a/fs/super.c +++ b/fs/super.c | |||
@@ -336,19 +336,19 @@ EXPORT_SYMBOL(deactivate_super); | |||
336 | * and want to turn it into a full-blown active reference. grab_super() | 336 | * and want to turn it into a full-blown active reference. grab_super() |
337 | * is called with sb_lock held and drops it. Returns 1 in case of | 337 | * is called with sb_lock held and drops it. Returns 1 in case of |
338 | * success, 0 if we had failed (superblock contents was already dead or | 338 | * success, 0 if we had failed (superblock contents was already dead or |
339 | * dying when grab_super() had been called). | 339 | * dying when grab_super() had been called). Note that this is only |
340 | * called for superblocks not in rundown mode (== ones still on ->fs_supers | ||
341 | * of their type), so increment of ->s_count is OK here. | ||
340 | */ | 342 | */ |
341 | static int grab_super(struct super_block *s) __releases(sb_lock) | 343 | static int grab_super(struct super_block *s) __releases(sb_lock) |
342 | { | 344 | { |
343 | if (atomic_inc_not_zero(&s->s_active)) { | ||
344 | spin_unlock(&sb_lock); | ||
345 | return 1; | ||
346 | } | ||
347 | /* it's going away */ | ||
348 | s->s_count++; | 345 | s->s_count++; |
349 | spin_unlock(&sb_lock); | 346 | spin_unlock(&sb_lock); |
350 | /* wait for it to die */ | ||
351 | down_write(&s->s_umount); | 347 | down_write(&s->s_umount); |
348 | if ((s->s_flags & MS_BORN) && atomic_inc_not_zero(&s->s_active)) { | ||
349 | put_super(s); | ||
350 | return 1; | ||
351 | } | ||
352 | up_write(&s->s_umount); | 352 | up_write(&s->s_umount); |
353 | put_super(s); | 353 | put_super(s); |
354 | return 0; | 354 | return 0; |
@@ -463,11 +463,6 @@ retry: | |||
463 | destroy_super(s); | 463 | destroy_super(s); |
464 | s = NULL; | 464 | s = NULL; |
465 | } | 465 | } |
466 | down_write(&old->s_umount); | ||
467 | if (unlikely(!(old->s_flags & MS_BORN))) { | ||
468 | deactivate_locked_super(old); | ||
469 | goto retry; | ||
470 | } | ||
471 | return old; | 466 | return old; |
472 | } | 467 | } |
473 | } | 468 | } |
@@ -660,10 +655,10 @@ restart: | |||
660 | if (hlist_unhashed(&sb->s_instances)) | 655 | if (hlist_unhashed(&sb->s_instances)) |
661 | continue; | 656 | continue; |
662 | if (sb->s_bdev == bdev) { | 657 | if (sb->s_bdev == bdev) { |
663 | if (grab_super(sb)) /* drops sb_lock */ | 658 | if (!grab_super(sb)) |
664 | return sb; | ||
665 | else | ||
666 | goto restart; | 659 | goto restart; |
660 | up_write(&sb->s_umount); | ||
661 | return sb; | ||
667 | } | 662 | } |
668 | } | 663 | } |
669 | spin_unlock(&sb_lock); | 664 | spin_unlock(&sb_lock); |
diff --git a/fs/sysfs/dir.c b/fs/sysfs/dir.c index e8e0e71b29d5..e068e744dbdd 100644 --- a/fs/sysfs/dir.c +++ b/fs/sysfs/dir.c | |||
@@ -74,7 +74,7 @@ static int sysfs_sd_compare(const struct sysfs_dirent *left, | |||
74 | } | 74 | } |
75 | 75 | ||
76 | /** | 76 | /** |
77 | * sysfs_link_subling - link sysfs_dirent into sibling rbtree | 77 | * sysfs_link_sibling - link sysfs_dirent into sibling rbtree |
78 | * @sd: sysfs_dirent of interest | 78 | * @sd: sysfs_dirent of interest |
79 | * | 79 | * |
80 | * Link @sd into its sibling rbtree which starts from | 80 | * Link @sd into its sibling rbtree which starts from |
@@ -998,68 +998,38 @@ static struct sysfs_dirent *sysfs_dir_next_pos(const void *ns, | |||
998 | return pos; | 998 | return pos; |
999 | } | 999 | } |
1000 | 1000 | ||
1001 | static int sysfs_readdir(struct file * filp, void * dirent, filldir_t filldir) | 1001 | static int sysfs_readdir(struct file *file, struct dir_context *ctx) |
1002 | { | 1002 | { |
1003 | struct dentry *dentry = filp->f_path.dentry; | 1003 | struct dentry *dentry = file->f_path.dentry; |
1004 | struct sysfs_dirent * parent_sd = dentry->d_fsdata; | 1004 | struct sysfs_dirent * parent_sd = dentry->d_fsdata; |
1005 | struct sysfs_dirent *pos = filp->private_data; | 1005 | struct sysfs_dirent *pos = file->private_data; |
1006 | enum kobj_ns_type type; | 1006 | enum kobj_ns_type type; |
1007 | const void *ns; | 1007 | const void *ns; |
1008 | ino_t ino; | ||
1009 | loff_t off; | ||
1010 | 1008 | ||
1011 | type = sysfs_ns_type(parent_sd); | 1009 | type = sysfs_ns_type(parent_sd); |
1012 | ns = sysfs_info(dentry->d_sb)->ns[type]; | 1010 | ns = sysfs_info(dentry->d_sb)->ns[type]; |
1013 | 1011 | ||
1014 | if (filp->f_pos == 0) { | 1012 | if (!dir_emit_dots(file, ctx)) |
1015 | ino = parent_sd->s_ino; | 1013 | return 0; |
1016 | if (filldir(dirent, ".", 1, filp->f_pos, ino, DT_DIR) == 0) | ||
1017 | filp->f_pos++; | ||
1018 | else | ||
1019 | return 0; | ||
1020 | } | ||
1021 | if (filp->f_pos == 1) { | ||
1022 | if (parent_sd->s_parent) | ||
1023 | ino = parent_sd->s_parent->s_ino; | ||
1024 | else | ||
1025 | ino = parent_sd->s_ino; | ||
1026 | if (filldir(dirent, "..", 2, filp->f_pos, ino, DT_DIR) == 0) | ||
1027 | filp->f_pos++; | ||
1028 | else | ||
1029 | return 0; | ||
1030 | } | ||
1031 | mutex_lock(&sysfs_mutex); | 1014 | mutex_lock(&sysfs_mutex); |
1032 | off = filp->f_pos; | 1015 | for (pos = sysfs_dir_pos(ns, parent_sd, ctx->pos, pos); |
1033 | for (pos = sysfs_dir_pos(ns, parent_sd, filp->f_pos, pos); | ||
1034 | pos; | 1016 | pos; |
1035 | pos = sysfs_dir_next_pos(ns, parent_sd, filp->f_pos, pos)) { | 1017 | pos = sysfs_dir_next_pos(ns, parent_sd, ctx->pos, pos)) { |
1036 | const char * name; | 1018 | const char *name = pos->s_name; |
1037 | unsigned int type; | 1019 | unsigned int type = dt_type(pos); |
1038 | int len, ret; | 1020 | int len = strlen(name); |
1039 | 1021 | ino_t ino = pos->s_ino; | |
1040 | name = pos->s_name; | 1022 | ctx->pos = pos->s_hash; |
1041 | len = strlen(name); | 1023 | file->private_data = sysfs_get(pos); |
1042 | ino = pos->s_ino; | ||
1043 | type = dt_type(pos); | ||
1044 | off = filp->f_pos = pos->s_hash; | ||
1045 | filp->private_data = sysfs_get(pos); | ||
1046 | 1024 | ||
1047 | mutex_unlock(&sysfs_mutex); | 1025 | mutex_unlock(&sysfs_mutex); |
1048 | ret = filldir(dirent, name, len, off, ino, type); | 1026 | if (!dir_emit(ctx, name, len, ino, type)) |
1027 | return 0; | ||
1049 | mutex_lock(&sysfs_mutex); | 1028 | mutex_lock(&sysfs_mutex); |
1050 | if (ret < 0) | ||
1051 | break; | ||
1052 | } | 1029 | } |
1053 | mutex_unlock(&sysfs_mutex); | 1030 | mutex_unlock(&sysfs_mutex); |
1054 | 1031 | file->private_data = NULL; | |
1055 | /* don't reference last entry if its refcount is dropped */ | 1032 | ctx->pos = INT_MAX; |
1056 | if (!pos) { | ||
1057 | filp->private_data = NULL; | ||
1058 | |||
1059 | /* EOF and not changed as 0 or 1 in read/write path */ | ||
1060 | if (off == filp->f_pos && off > 1) | ||
1061 | filp->f_pos = INT_MAX; | ||
1062 | } | ||
1063 | return 0; | 1033 | return 0; |
1064 | } | 1034 | } |
1065 | 1035 | ||
@@ -1077,7 +1047,7 @@ static loff_t sysfs_dir_llseek(struct file *file, loff_t offset, int whence) | |||
1077 | 1047 | ||
1078 | const struct file_operations sysfs_dir_operations = { | 1048 | const struct file_operations sysfs_dir_operations = { |
1079 | .read = generic_read_dir, | 1049 | .read = generic_read_dir, |
1080 | .readdir = sysfs_readdir, | 1050 | .iterate = sysfs_readdir, |
1081 | .release = sysfs_dir_release, | 1051 | .release = sysfs_dir_release, |
1082 | .llseek = sysfs_dir_llseek, | 1052 | .llseek = sysfs_dir_llseek, |
1083 | }; | 1053 | }; |
diff --git a/fs/sysfs/file.c b/fs/sysfs/file.c index 602f56db0442..d2bb7ed8fa74 100644 --- a/fs/sysfs/file.c +++ b/fs/sysfs/file.c | |||
@@ -449,10 +449,12 @@ void sysfs_notify_dirent(struct sysfs_dirent *sd) | |||
449 | 449 | ||
450 | spin_lock_irqsave(&sysfs_open_dirent_lock, flags); | 450 | spin_lock_irqsave(&sysfs_open_dirent_lock, flags); |
451 | 451 | ||
452 | od = sd->s_attr.open; | 452 | if (!WARN_ON(sysfs_type(sd) != SYSFS_KOBJ_ATTR)) { |
453 | if (od) { | 453 | od = sd->s_attr.open; |
454 | atomic_inc(&od->event); | 454 | if (od) { |
455 | wake_up_interruptible(&od->poll); | 455 | atomic_inc(&od->event); |
456 | wake_up_interruptible(&od->poll); | ||
457 | } | ||
456 | } | 458 | } |
457 | 459 | ||
458 | spin_unlock_irqrestore(&sysfs_open_dirent_lock, flags); | 460 | spin_unlock_irqrestore(&sysfs_open_dirent_lock, flags); |
diff --git a/fs/sysfs/group.c b/fs/sysfs/group.c index aec3d5c98c94..09a1a25cd145 100644 --- a/fs/sysfs/group.c +++ b/fs/sysfs/group.c | |||
@@ -20,38 +20,64 @@ static void remove_files(struct sysfs_dirent *dir_sd, struct kobject *kobj, | |||
20 | const struct attribute_group *grp) | 20 | const struct attribute_group *grp) |
21 | { | 21 | { |
22 | struct attribute *const* attr; | 22 | struct attribute *const* attr; |
23 | int i; | 23 | struct bin_attribute *const* bin_attr; |
24 | 24 | ||
25 | for (i = 0, attr = grp->attrs; *attr; i++, attr++) | 25 | if (grp->attrs) |
26 | sysfs_hash_and_remove(dir_sd, NULL, (*attr)->name); | 26 | for (attr = grp->attrs; *attr; attr++) |
27 | sysfs_hash_and_remove(dir_sd, NULL, (*attr)->name); | ||
28 | if (grp->bin_attrs) | ||
29 | for (bin_attr = grp->bin_attrs; *bin_attr; bin_attr++) | ||
30 | sysfs_remove_bin_file(kobj, *bin_attr); | ||
27 | } | 31 | } |
28 | 32 | ||
29 | static int create_files(struct sysfs_dirent *dir_sd, struct kobject *kobj, | 33 | static int create_files(struct sysfs_dirent *dir_sd, struct kobject *kobj, |
30 | const struct attribute_group *grp, int update) | 34 | const struct attribute_group *grp, int update) |
31 | { | 35 | { |
32 | struct attribute *const* attr; | 36 | struct attribute *const* attr; |
37 | struct bin_attribute *const* bin_attr; | ||
33 | int error = 0, i; | 38 | int error = 0, i; |
34 | 39 | ||
35 | for (i = 0, attr = grp->attrs; *attr && !error; i++, attr++) { | 40 | if (grp->attrs) { |
36 | umode_t mode = 0; | 41 | for (i = 0, attr = grp->attrs; *attr && !error; i++, attr++) { |
42 | umode_t mode = 0; | ||
43 | |||
44 | /* | ||
45 | * In update mode, we're changing the permissions or | ||
46 | * visibility. Do this by first removing then | ||
47 | * re-adding (if required) the file. | ||
48 | */ | ||
49 | if (update) | ||
50 | sysfs_hash_and_remove(dir_sd, NULL, | ||
51 | (*attr)->name); | ||
52 | if (grp->is_visible) { | ||
53 | mode = grp->is_visible(kobj, *attr, i); | ||
54 | if (!mode) | ||
55 | continue; | ||
56 | } | ||
57 | error = sysfs_add_file_mode(dir_sd, *attr, | ||
58 | SYSFS_KOBJ_ATTR, | ||
59 | (*attr)->mode | mode); | ||
60 | if (unlikely(error)) | ||
61 | break; | ||
62 | } | ||
63 | if (error) { | ||
64 | remove_files(dir_sd, kobj, grp); | ||
65 | goto exit; | ||
66 | } | ||
67 | } | ||
37 | 68 | ||
38 | /* in update mode, we're changing the permissions or | 69 | if (grp->bin_attrs) { |
39 | * visibility. Do this by first removing then | 70 | for (bin_attr = grp->bin_attrs; *bin_attr; bin_attr++) { |
40 | * re-adding (if required) the file */ | 71 | if (update) |
41 | if (update) | 72 | sysfs_remove_bin_file(kobj, *bin_attr); |
42 | sysfs_hash_and_remove(dir_sd, NULL, (*attr)->name); | 73 | error = sysfs_create_bin_file(kobj, *bin_attr); |
43 | if (grp->is_visible) { | 74 | if (error) |
44 | mode = grp->is_visible(kobj, *attr, i); | 75 | break; |
45 | if (!mode) | ||
46 | continue; | ||
47 | } | 76 | } |
48 | error = sysfs_add_file_mode(dir_sd, *attr, SYSFS_KOBJ_ATTR, | 77 | if (error) |
49 | (*attr)->mode | mode); | 78 | remove_files(dir_sd, kobj, grp); |
50 | if (unlikely(error)) | ||
51 | break; | ||
52 | } | 79 | } |
53 | if (error) | 80 | exit: |
54 | remove_files(dir_sd, kobj, grp); | ||
55 | return error; | 81 | return error; |
56 | } | 82 | } |
57 | 83 | ||
@@ -67,8 +93,8 @@ static int internal_create_group(struct kobject *kobj, int update, | |||
67 | /* Updates may happen before the object has been instantiated */ | 93 | /* Updates may happen before the object has been instantiated */ |
68 | if (unlikely(update && !kobj->sd)) | 94 | if (unlikely(update && !kobj->sd)) |
69 | return -EINVAL; | 95 | return -EINVAL; |
70 | if (!grp->attrs) { | 96 | if (!grp->attrs && !grp->bin_attrs) { |
71 | WARN(1, "sysfs: attrs not set by subsystem for group: %s/%s\n", | 97 | WARN(1, "sysfs: (bin_)attrs not set by subsystem for group: %s/%s\n", |
72 | kobj->name, grp->name ? "" : grp->name); | 98 | kobj->name, grp->name ? "" : grp->name); |
73 | return -EINVAL; | 99 | return -EINVAL; |
74 | } | 100 | } |
diff --git a/fs/sysfs/inode.c b/fs/sysfs/inode.c index 0ce3ccf7f401..3e2837a633ed 100644 --- a/fs/sysfs/inode.c +++ b/fs/sysfs/inode.c | |||
@@ -24,8 +24,6 @@ | |||
24 | #include <linux/security.h> | 24 | #include <linux/security.h> |
25 | #include "sysfs.h" | 25 | #include "sysfs.h" |
26 | 26 | ||
27 | extern struct super_block * sysfs_sb; | ||
28 | |||
29 | static const struct address_space_operations sysfs_aops = { | 27 | static const struct address_space_operations sysfs_aops = { |
30 | .readpage = simple_readpage, | 28 | .readpage = simple_readpage, |
31 | .write_begin = simple_write_begin, | 29 | .write_begin = simple_write_begin, |
diff --git a/fs/sysv/dir.c b/fs/sysv/dir.c index 3799e8dac3eb..d42291d08215 100644 --- a/fs/sysv/dir.c +++ b/fs/sysv/dir.c | |||
@@ -18,12 +18,12 @@ | |||
18 | #include <linux/swap.h> | 18 | #include <linux/swap.h> |
19 | #include "sysv.h" | 19 | #include "sysv.h" |
20 | 20 | ||
21 | static int sysv_readdir(struct file *, void *, filldir_t); | 21 | static int sysv_readdir(struct file *, struct dir_context *); |
22 | 22 | ||
23 | const struct file_operations sysv_dir_operations = { | 23 | const struct file_operations sysv_dir_operations = { |
24 | .llseek = generic_file_llseek, | 24 | .llseek = generic_file_llseek, |
25 | .read = generic_read_dir, | 25 | .read = generic_read_dir, |
26 | .readdir = sysv_readdir, | 26 | .iterate = sysv_readdir, |
27 | .fsync = generic_file_fsync, | 27 | .fsync = generic_file_fsync, |
28 | }; | 28 | }; |
29 | 29 | ||
@@ -65,18 +65,21 @@ static struct page * dir_get_page(struct inode *dir, unsigned long n) | |||
65 | return page; | 65 | return page; |
66 | } | 66 | } |
67 | 67 | ||
68 | static int sysv_readdir(struct file * filp, void * dirent, filldir_t filldir) | 68 | static int sysv_readdir(struct file *file, struct dir_context *ctx) |
69 | { | 69 | { |
70 | unsigned long pos = filp->f_pos; | 70 | unsigned long pos = ctx->pos; |
71 | struct inode *inode = file_inode(filp); | 71 | struct inode *inode = file_inode(file); |
72 | struct super_block *sb = inode->i_sb; | 72 | struct super_block *sb = inode->i_sb; |
73 | unsigned offset = pos & ~PAGE_CACHE_MASK; | ||
74 | unsigned long n = pos >> PAGE_CACHE_SHIFT; | ||
75 | unsigned long npages = dir_pages(inode); | 73 | unsigned long npages = dir_pages(inode); |
74 | unsigned offset; | ||
75 | unsigned long n; | ||
76 | 76 | ||
77 | pos = (pos + SYSV_DIRSIZE-1) & ~(SYSV_DIRSIZE-1); | 77 | ctx->pos = pos = (pos + SYSV_DIRSIZE-1) & ~(SYSV_DIRSIZE-1); |
78 | if (pos >= inode->i_size) | 78 | if (pos >= inode->i_size) |
79 | goto done; | 79 | return 0; |
80 | |||
81 | offset = pos & ~PAGE_CACHE_MASK; | ||
82 | n = pos >> PAGE_CACHE_SHIFT; | ||
80 | 83 | ||
81 | for ( ; n < npages; n++, offset = 0) { | 84 | for ( ; n < npages; n++, offset = 0) { |
82 | char *kaddr, *limit; | 85 | char *kaddr, *limit; |
@@ -88,29 +91,21 @@ static int sysv_readdir(struct file * filp, void * dirent, filldir_t filldir) | |||
88 | kaddr = (char *)page_address(page); | 91 | kaddr = (char *)page_address(page); |
89 | de = (struct sysv_dir_entry *)(kaddr+offset); | 92 | de = (struct sysv_dir_entry *)(kaddr+offset); |
90 | limit = kaddr + PAGE_CACHE_SIZE - SYSV_DIRSIZE; | 93 | limit = kaddr + PAGE_CACHE_SIZE - SYSV_DIRSIZE; |
91 | for ( ;(char*)de <= limit; de++) { | 94 | for ( ;(char*)de <= limit; de++, ctx->pos += sizeof(*de)) { |
92 | char *name = de->name; | 95 | char *name = de->name; |
93 | int over; | ||
94 | 96 | ||
95 | if (!de->inode) | 97 | if (!de->inode) |
96 | continue; | 98 | continue; |
97 | 99 | ||
98 | offset = (char *)de - kaddr; | 100 | if (!dir_emit(ctx, name, strnlen(name,SYSV_NAMELEN), |
99 | |||
100 | over = filldir(dirent, name, strnlen(name,SYSV_NAMELEN), | ||
101 | ((loff_t)n<<PAGE_CACHE_SHIFT) | offset, | ||
102 | fs16_to_cpu(SYSV_SB(sb), de->inode), | 101 | fs16_to_cpu(SYSV_SB(sb), de->inode), |
103 | DT_UNKNOWN); | 102 | DT_UNKNOWN)) { |
104 | if (over) { | ||
105 | dir_put_page(page); | 103 | dir_put_page(page); |
106 | goto done; | 104 | return 0; |
107 | } | 105 | } |
108 | } | 106 | } |
109 | dir_put_page(page); | 107 | dir_put_page(page); |
110 | } | 108 | } |
111 | |||
112 | done: | ||
113 | filp->f_pos = ((loff_t)n << PAGE_CACHE_SHIFT) | offset; | ||
114 | return 0; | 109 | return 0; |
115 | } | 110 | } |
116 | 111 | ||
diff --git a/fs/sysv/namei.c b/fs/sysv/namei.c index 1c0d5f264767..731b2bbcaab3 100644 --- a/fs/sysv/namei.c +++ b/fs/sysv/namei.c | |||
@@ -27,8 +27,7 @@ static int add_nondir(struct dentry *dentry, struct inode *inode) | |||
27 | return err; | 27 | return err; |
28 | } | 28 | } |
29 | 29 | ||
30 | static int sysv_hash(const struct dentry *dentry, const struct inode *inode, | 30 | static int sysv_hash(const struct dentry *dentry, struct qstr *qstr) |
31 | struct qstr *qstr) | ||
32 | { | 31 | { |
33 | /* Truncate the name in place, avoids having to define a compare | 32 | /* Truncate the name in place, avoids having to define a compare |
34 | function. */ | 33 | function. */ |
diff --git a/fs/timerfd.c b/fs/timerfd.c index 32b644f03690..929312180dd0 100644 --- a/fs/timerfd.c +++ b/fs/timerfd.c | |||
@@ -8,6 +8,7 @@ | |||
8 | * | 8 | * |
9 | */ | 9 | */ |
10 | 10 | ||
11 | #include <linux/alarmtimer.h> | ||
11 | #include <linux/file.h> | 12 | #include <linux/file.h> |
12 | #include <linux/poll.h> | 13 | #include <linux/poll.h> |
13 | #include <linux/init.h> | 14 | #include <linux/init.h> |
@@ -26,7 +27,10 @@ | |||
26 | #include <linux/rcupdate.h> | 27 | #include <linux/rcupdate.h> |
27 | 28 | ||
28 | struct timerfd_ctx { | 29 | struct timerfd_ctx { |
29 | struct hrtimer tmr; | 30 | union { |
31 | struct hrtimer tmr; | ||
32 | struct alarm alarm; | ||
33 | } t; | ||
30 | ktime_t tintv; | 34 | ktime_t tintv; |
31 | ktime_t moffs; | 35 | ktime_t moffs; |
32 | wait_queue_head_t wqh; | 36 | wait_queue_head_t wqh; |
@@ -41,14 +45,19 @@ struct timerfd_ctx { | |||
41 | static LIST_HEAD(cancel_list); | 45 | static LIST_HEAD(cancel_list); |
42 | static DEFINE_SPINLOCK(cancel_lock); | 46 | static DEFINE_SPINLOCK(cancel_lock); |
43 | 47 | ||
48 | static inline bool isalarm(struct timerfd_ctx *ctx) | ||
49 | { | ||
50 | return ctx->clockid == CLOCK_REALTIME_ALARM || | ||
51 | ctx->clockid == CLOCK_BOOTTIME_ALARM; | ||
52 | } | ||
53 | |||
44 | /* | 54 | /* |
45 | * This gets called when the timer event triggers. We set the "expired" | 55 | * This gets called when the timer event triggers. We set the "expired" |
46 | * flag, but we do not re-arm the timer (in case it's necessary, | 56 | * flag, but we do not re-arm the timer (in case it's necessary, |
47 | * tintv.tv64 != 0) until the timer is accessed. | 57 | * tintv.tv64 != 0) until the timer is accessed. |
48 | */ | 58 | */ |
49 | static enum hrtimer_restart timerfd_tmrproc(struct hrtimer *htmr) | 59 | static void timerfd_triggered(struct timerfd_ctx *ctx) |
50 | { | 60 | { |
51 | struct timerfd_ctx *ctx = container_of(htmr, struct timerfd_ctx, tmr); | ||
52 | unsigned long flags; | 61 | unsigned long flags; |
53 | 62 | ||
54 | spin_lock_irqsave(&ctx->wqh.lock, flags); | 63 | spin_lock_irqsave(&ctx->wqh.lock, flags); |
@@ -56,10 +65,25 @@ static enum hrtimer_restart timerfd_tmrproc(struct hrtimer *htmr) | |||
56 | ctx->ticks++; | 65 | ctx->ticks++; |
57 | wake_up_locked(&ctx->wqh); | 66 | wake_up_locked(&ctx->wqh); |
58 | spin_unlock_irqrestore(&ctx->wqh.lock, flags); | 67 | spin_unlock_irqrestore(&ctx->wqh.lock, flags); |
68 | } | ||
59 | 69 | ||
70 | static enum hrtimer_restart timerfd_tmrproc(struct hrtimer *htmr) | ||
71 | { | ||
72 | struct timerfd_ctx *ctx = container_of(htmr, struct timerfd_ctx, | ||
73 | t.tmr); | ||
74 | timerfd_triggered(ctx); | ||
60 | return HRTIMER_NORESTART; | 75 | return HRTIMER_NORESTART; |
61 | } | 76 | } |
62 | 77 | ||
78 | static enum alarmtimer_restart timerfd_alarmproc(struct alarm *alarm, | ||
79 | ktime_t now) | ||
80 | { | ||
81 | struct timerfd_ctx *ctx = container_of(alarm, struct timerfd_ctx, | ||
82 | t.alarm); | ||
83 | timerfd_triggered(ctx); | ||
84 | return ALARMTIMER_NORESTART; | ||
85 | } | ||
86 | |||
63 | /* | 87 | /* |
64 | * Called when the clock was set to cancel the timers in the cancel | 88 | * Called when the clock was set to cancel the timers in the cancel |
65 | * list. This will wake up processes waiting on these timers. The | 89 | * list. This will wake up processes waiting on these timers. The |
@@ -107,8 +131,9 @@ static bool timerfd_canceled(struct timerfd_ctx *ctx) | |||
107 | 131 | ||
108 | static void timerfd_setup_cancel(struct timerfd_ctx *ctx, int flags) | 132 | static void timerfd_setup_cancel(struct timerfd_ctx *ctx, int flags) |
109 | { | 133 | { |
110 | if (ctx->clockid == CLOCK_REALTIME && (flags & TFD_TIMER_ABSTIME) && | 134 | if ((ctx->clockid == CLOCK_REALTIME || |
111 | (flags & TFD_TIMER_CANCEL_ON_SET)) { | 135 | ctx->clockid == CLOCK_REALTIME_ALARM) && |
136 | (flags & TFD_TIMER_ABSTIME) && (flags & TFD_TIMER_CANCEL_ON_SET)) { | ||
112 | if (!ctx->might_cancel) { | 137 | if (!ctx->might_cancel) { |
113 | ctx->might_cancel = true; | 138 | ctx->might_cancel = true; |
114 | spin_lock(&cancel_lock); | 139 | spin_lock(&cancel_lock); |
@@ -124,7 +149,11 @@ static ktime_t timerfd_get_remaining(struct timerfd_ctx *ctx) | |||
124 | { | 149 | { |
125 | ktime_t remaining; | 150 | ktime_t remaining; |
126 | 151 | ||
127 | remaining = hrtimer_expires_remaining(&ctx->tmr); | 152 | if (isalarm(ctx)) |
153 | remaining = alarm_expires_remaining(&ctx->t.alarm); | ||
154 | else | ||
155 | remaining = hrtimer_expires_remaining(&ctx->t.tmr); | ||
156 | |||
128 | return remaining.tv64 < 0 ? ktime_set(0, 0): remaining; | 157 | return remaining.tv64 < 0 ? ktime_set(0, 0): remaining; |
129 | } | 158 | } |
130 | 159 | ||
@@ -142,11 +171,28 @@ static int timerfd_setup(struct timerfd_ctx *ctx, int flags, | |||
142 | ctx->expired = 0; | 171 | ctx->expired = 0; |
143 | ctx->ticks = 0; | 172 | ctx->ticks = 0; |
144 | ctx->tintv = timespec_to_ktime(ktmr->it_interval); | 173 | ctx->tintv = timespec_to_ktime(ktmr->it_interval); |
145 | hrtimer_init(&ctx->tmr, clockid, htmode); | 174 | |
146 | hrtimer_set_expires(&ctx->tmr, texp); | 175 | if (isalarm(ctx)) { |
147 | ctx->tmr.function = timerfd_tmrproc; | 176 | alarm_init(&ctx->t.alarm, |
177 | ctx->clockid == CLOCK_REALTIME_ALARM ? | ||
178 | ALARM_REALTIME : ALARM_BOOTTIME, | ||
179 | timerfd_alarmproc); | ||
180 | } else { | ||
181 | hrtimer_init(&ctx->t.tmr, clockid, htmode); | ||
182 | hrtimer_set_expires(&ctx->t.tmr, texp); | ||
183 | ctx->t.tmr.function = timerfd_tmrproc; | ||
184 | } | ||
185 | |||
148 | if (texp.tv64 != 0) { | 186 | if (texp.tv64 != 0) { |
149 | hrtimer_start(&ctx->tmr, texp, htmode); | 187 | if (isalarm(ctx)) { |
188 | if (flags & TFD_TIMER_ABSTIME) | ||
189 | alarm_start(&ctx->t.alarm, texp); | ||
190 | else | ||
191 | alarm_start_relative(&ctx->t.alarm, texp); | ||
192 | } else { | ||
193 | hrtimer_start(&ctx->t.tmr, texp, htmode); | ||
194 | } | ||
195 | |||
150 | if (timerfd_canceled(ctx)) | 196 | if (timerfd_canceled(ctx)) |
151 | return -ECANCELED; | 197 | return -ECANCELED; |
152 | } | 198 | } |
@@ -158,7 +204,11 @@ static int timerfd_release(struct inode *inode, struct file *file) | |||
158 | struct timerfd_ctx *ctx = file->private_data; | 204 | struct timerfd_ctx *ctx = file->private_data; |
159 | 205 | ||
160 | timerfd_remove_cancel(ctx); | 206 | timerfd_remove_cancel(ctx); |
161 | hrtimer_cancel(&ctx->tmr); | 207 | |
208 | if (isalarm(ctx)) | ||
209 | alarm_cancel(&ctx->t.alarm); | ||
210 | else | ||
211 | hrtimer_cancel(&ctx->t.tmr); | ||
162 | kfree_rcu(ctx, rcu); | 212 | kfree_rcu(ctx, rcu); |
163 | return 0; | 213 | return 0; |
164 | } | 214 | } |
@@ -215,9 +265,15 @@ static ssize_t timerfd_read(struct file *file, char __user *buf, size_t count, | |||
215 | * callback to avoid DoS attacks specifying a very | 265 | * callback to avoid DoS attacks specifying a very |
216 | * short timer period. | 266 | * short timer period. |
217 | */ | 267 | */ |
218 | ticks += hrtimer_forward_now(&ctx->tmr, | 268 | if (isalarm(ctx)) { |
219 | ctx->tintv) - 1; | 269 | ticks += alarm_forward_now( |
220 | hrtimer_restart(&ctx->tmr); | 270 | &ctx->t.alarm, ctx->tintv) - 1; |
271 | alarm_restart(&ctx->t.alarm); | ||
272 | } else { | ||
273 | ticks += hrtimer_forward_now(&ctx->t.tmr, | ||
274 | ctx->tintv) - 1; | ||
275 | hrtimer_restart(&ctx->t.tmr); | ||
276 | } | ||
221 | } | 277 | } |
222 | ctx->expired = 0; | 278 | ctx->expired = 0; |
223 | ctx->ticks = 0; | 279 | ctx->ticks = 0; |
@@ -259,7 +315,9 @@ SYSCALL_DEFINE2(timerfd_create, int, clockid, int, flags) | |||
259 | 315 | ||
260 | if ((flags & ~TFD_CREATE_FLAGS) || | 316 | if ((flags & ~TFD_CREATE_FLAGS) || |
261 | (clockid != CLOCK_MONOTONIC && | 317 | (clockid != CLOCK_MONOTONIC && |
262 | clockid != CLOCK_REALTIME)) | 318 | clockid != CLOCK_REALTIME && |
319 | clockid != CLOCK_REALTIME_ALARM && | ||
320 | clockid != CLOCK_BOOTTIME_ALARM)) | ||
263 | return -EINVAL; | 321 | return -EINVAL; |
264 | 322 | ||
265 | ctx = kzalloc(sizeof(*ctx), GFP_KERNEL); | 323 | ctx = kzalloc(sizeof(*ctx), GFP_KERNEL); |
@@ -268,7 +326,15 @@ SYSCALL_DEFINE2(timerfd_create, int, clockid, int, flags) | |||
268 | 326 | ||
269 | init_waitqueue_head(&ctx->wqh); | 327 | init_waitqueue_head(&ctx->wqh); |
270 | ctx->clockid = clockid; | 328 | ctx->clockid = clockid; |
271 | hrtimer_init(&ctx->tmr, clockid, HRTIMER_MODE_ABS); | 329 | |
330 | if (isalarm(ctx)) | ||
331 | alarm_init(&ctx->t.alarm, | ||
332 | ctx->clockid == CLOCK_REALTIME_ALARM ? | ||
333 | ALARM_REALTIME : ALARM_BOOTTIME, | ||
334 | timerfd_alarmproc); | ||
335 | else | ||
336 | hrtimer_init(&ctx->t.tmr, clockid, HRTIMER_MODE_ABS); | ||
337 | |||
272 | ctx->moffs = ktime_get_monotonic_offset(); | 338 | ctx->moffs = ktime_get_monotonic_offset(); |
273 | 339 | ||
274 | ufd = anon_inode_getfd("[timerfd]", &timerfd_fops, ctx, | 340 | ufd = anon_inode_getfd("[timerfd]", &timerfd_fops, ctx, |
@@ -305,8 +371,14 @@ static int do_timerfd_settime(int ufd, int flags, | |||
305 | */ | 371 | */ |
306 | for (;;) { | 372 | for (;;) { |
307 | spin_lock_irq(&ctx->wqh.lock); | 373 | spin_lock_irq(&ctx->wqh.lock); |
308 | if (hrtimer_try_to_cancel(&ctx->tmr) >= 0) | 374 | |
309 | break; | 375 | if (isalarm(ctx)) { |
376 | if (alarm_try_to_cancel(&ctx->t.alarm) >= 0) | ||
377 | break; | ||
378 | } else { | ||
379 | if (hrtimer_try_to_cancel(&ctx->t.tmr) >= 0) | ||
380 | break; | ||
381 | } | ||
310 | spin_unlock_irq(&ctx->wqh.lock); | 382 | spin_unlock_irq(&ctx->wqh.lock); |
311 | cpu_relax(); | 383 | cpu_relax(); |
312 | } | 384 | } |
@@ -317,8 +389,12 @@ static int do_timerfd_settime(int ufd, int flags, | |||
317 | * We do not update "ticks" and "expired" since the timer will be | 389 | * We do not update "ticks" and "expired" since the timer will be |
318 | * re-programmed again in the following timerfd_setup() call. | 390 | * re-programmed again in the following timerfd_setup() call. |
319 | */ | 391 | */ |
320 | if (ctx->expired && ctx->tintv.tv64) | 392 | if (ctx->expired && ctx->tintv.tv64) { |
321 | hrtimer_forward_now(&ctx->tmr, ctx->tintv); | 393 | if (isalarm(ctx)) |
394 | alarm_forward_now(&ctx->t.alarm, ctx->tintv); | ||
395 | else | ||
396 | hrtimer_forward_now(&ctx->t.tmr, ctx->tintv); | ||
397 | } | ||
322 | 398 | ||
323 | old->it_value = ktime_to_timespec(timerfd_get_remaining(ctx)); | 399 | old->it_value = ktime_to_timespec(timerfd_get_remaining(ctx)); |
324 | old->it_interval = ktime_to_timespec(ctx->tintv); | 400 | old->it_interval = ktime_to_timespec(ctx->tintv); |
@@ -345,9 +421,18 @@ static int do_timerfd_gettime(int ufd, struct itimerspec *t) | |||
345 | spin_lock_irq(&ctx->wqh.lock); | 421 | spin_lock_irq(&ctx->wqh.lock); |
346 | if (ctx->expired && ctx->tintv.tv64) { | 422 | if (ctx->expired && ctx->tintv.tv64) { |
347 | ctx->expired = 0; | 423 | ctx->expired = 0; |
348 | ctx->ticks += | 424 | |
349 | hrtimer_forward_now(&ctx->tmr, ctx->tintv) - 1; | 425 | if (isalarm(ctx)) { |
350 | hrtimer_restart(&ctx->tmr); | 426 | ctx->ticks += |
427 | alarm_forward_now( | ||
428 | &ctx->t.alarm, ctx->tintv) - 1; | ||
429 | alarm_restart(&ctx->t.alarm); | ||
430 | } else { | ||
431 | ctx->ticks += | ||
432 | hrtimer_forward_now(&ctx->t.tmr, ctx->tintv) | ||
433 | - 1; | ||
434 | hrtimer_restart(&ctx->t.tmr); | ||
435 | } | ||
351 | } | 436 | } |
352 | t->it_value = ktime_to_timespec(timerfd_get_remaining(ctx)); | 437 | t->it_value = ktime_to_timespec(timerfd_get_remaining(ctx)); |
353 | t->it_interval = ktime_to_timespec(ctx->tintv); | 438 | t->it_interval = ktime_to_timespec(ctx->tintv); |
diff --git a/fs/ubifs/dir.c b/fs/ubifs/dir.c index 605af512aec2..6b4947f75af7 100644 --- a/fs/ubifs/dir.c +++ b/fs/ubifs/dir.c | |||
@@ -346,19 +346,18 @@ static unsigned int vfs_dent_type(uint8_t type) | |||
346 | * This means that UBIFS cannot support NFS which requires full | 346 | * This means that UBIFS cannot support NFS which requires full |
347 | * 'seekdir()'/'telldir()' support. | 347 | * 'seekdir()'/'telldir()' support. |
348 | */ | 348 | */ |
349 | static int ubifs_readdir(struct file *file, void *dirent, filldir_t filldir) | 349 | static int ubifs_readdir(struct file *file, struct dir_context *ctx) |
350 | { | 350 | { |
351 | int err, over = 0; | 351 | int err; |
352 | loff_t pos = file->f_pos; | ||
353 | struct qstr nm; | 352 | struct qstr nm; |
354 | union ubifs_key key; | 353 | union ubifs_key key; |
355 | struct ubifs_dent_node *dent; | 354 | struct ubifs_dent_node *dent; |
356 | struct inode *dir = file_inode(file); | 355 | struct inode *dir = file_inode(file); |
357 | struct ubifs_info *c = dir->i_sb->s_fs_info; | 356 | struct ubifs_info *c = dir->i_sb->s_fs_info; |
358 | 357 | ||
359 | dbg_gen("dir ino %lu, f_pos %#llx", dir->i_ino, pos); | 358 | dbg_gen("dir ino %lu, f_pos %#llx", dir->i_ino, ctx->pos); |
360 | 359 | ||
361 | if (pos > UBIFS_S_KEY_HASH_MASK || pos == 2) | 360 | if (ctx->pos > UBIFS_S_KEY_HASH_MASK || ctx->pos == 2) |
362 | /* | 361 | /* |
363 | * The directory was seek'ed to a senseless position or there | 362 | * The directory was seek'ed to a senseless position or there |
364 | * are no more entries. | 363 | * are no more entries. |
@@ -384,19 +383,9 @@ static int ubifs_readdir(struct file *file, void *dirent, filldir_t filldir) | |||
384 | file->f_version = 1; | 383 | file->f_version = 1; |
385 | 384 | ||
386 | /* File positions 0 and 1 correspond to "." and ".." */ | 385 | /* File positions 0 and 1 correspond to "." and ".." */ |
387 | if (pos == 0) { | 386 | if (ctx->pos < 2) { |
388 | ubifs_assert(!file->private_data); | ||
389 | over = filldir(dirent, ".", 1, 0, dir->i_ino, DT_DIR); | ||
390 | if (over) | ||
391 | return 0; | ||
392 | file->f_pos = pos = 1; | ||
393 | } | ||
394 | |||
395 | if (pos == 1) { | ||
396 | ubifs_assert(!file->private_data); | 387 | ubifs_assert(!file->private_data); |
397 | over = filldir(dirent, "..", 2, 1, | 388 | if (!dir_emit_dots(file, ctx)) |
398 | parent_ino(file->f_path.dentry), DT_DIR); | ||
399 | if (over) | ||
400 | return 0; | 389 | return 0; |
401 | 390 | ||
402 | /* Find the first entry in TNC and save it */ | 391 | /* Find the first entry in TNC and save it */ |
@@ -408,7 +397,7 @@ static int ubifs_readdir(struct file *file, void *dirent, filldir_t filldir) | |||
408 | goto out; | 397 | goto out; |
409 | } | 398 | } |
410 | 399 | ||
411 | file->f_pos = pos = key_hash_flash(c, &dent->key); | 400 | ctx->pos = key_hash_flash(c, &dent->key); |
412 | file->private_data = dent; | 401 | file->private_data = dent; |
413 | } | 402 | } |
414 | 403 | ||
@@ -416,16 +405,16 @@ static int ubifs_readdir(struct file *file, void *dirent, filldir_t filldir) | |||
416 | if (!dent) { | 405 | if (!dent) { |
417 | /* | 406 | /* |
418 | * The directory was seek'ed to and is now readdir'ed. | 407 | * The directory was seek'ed to and is now readdir'ed. |
419 | * Find the entry corresponding to @pos or the closest one. | 408 | * Find the entry corresponding to @ctx->pos or the closest one. |
420 | */ | 409 | */ |
421 | dent_key_init_hash(c, &key, dir->i_ino, pos); | 410 | dent_key_init_hash(c, &key, dir->i_ino, ctx->pos); |
422 | nm.name = NULL; | 411 | nm.name = NULL; |
423 | dent = ubifs_tnc_next_ent(c, &key, &nm); | 412 | dent = ubifs_tnc_next_ent(c, &key, &nm); |
424 | if (IS_ERR(dent)) { | 413 | if (IS_ERR(dent)) { |
425 | err = PTR_ERR(dent); | 414 | err = PTR_ERR(dent); |
426 | goto out; | 415 | goto out; |
427 | } | 416 | } |
428 | file->f_pos = pos = key_hash_flash(c, &dent->key); | 417 | ctx->pos = key_hash_flash(c, &dent->key); |
429 | file->private_data = dent; | 418 | file->private_data = dent; |
430 | } | 419 | } |
431 | 420 | ||
@@ -437,10 +426,9 @@ static int ubifs_readdir(struct file *file, void *dirent, filldir_t filldir) | |||
437 | ubifs_inode(dir)->creat_sqnum); | 426 | ubifs_inode(dir)->creat_sqnum); |
438 | 427 | ||
439 | nm.len = le16_to_cpu(dent->nlen); | 428 | nm.len = le16_to_cpu(dent->nlen); |
440 | over = filldir(dirent, dent->name, nm.len, pos, | 429 | if (!dir_emit(ctx, dent->name, nm.len, |
441 | le64_to_cpu(dent->inum), | 430 | le64_to_cpu(dent->inum), |
442 | vfs_dent_type(dent->type)); | 431 | vfs_dent_type(dent->type))) |
443 | if (over) | ||
444 | return 0; | 432 | return 0; |
445 | 433 | ||
446 | /* Switch to the next entry */ | 434 | /* Switch to the next entry */ |
@@ -453,17 +441,9 @@ static int ubifs_readdir(struct file *file, void *dirent, filldir_t filldir) | |||
453 | } | 441 | } |
454 | 442 | ||
455 | kfree(file->private_data); | 443 | kfree(file->private_data); |
456 | file->f_pos = pos = key_hash_flash(c, &dent->key); | 444 | ctx->pos = key_hash_flash(c, &dent->key); |
457 | file->private_data = dent; | 445 | file->private_data = dent; |
458 | cond_resched(); | 446 | cond_resched(); |
459 | |||
460 | if (file->f_version == 0) | ||
461 | /* | ||
462 | * The file was seek'ed meanwhile, lets return and start | ||
463 | * reading direntries from the new position on the next | ||
464 | * invocation. | ||
465 | */ | ||
466 | return 0; | ||
467 | } | 447 | } |
468 | 448 | ||
469 | out: | 449 | out: |
@@ -475,15 +455,10 @@ out: | |||
475 | kfree(file->private_data); | 455 | kfree(file->private_data); |
476 | file->private_data = NULL; | 456 | file->private_data = NULL; |
477 | /* 2 is a special value indicating that there are no more direntries */ | 457 | /* 2 is a special value indicating that there are no more direntries */ |
478 | file->f_pos = 2; | 458 | ctx->pos = 2; |
479 | return 0; | 459 | return 0; |
480 | } | 460 | } |
481 | 461 | ||
482 | static loff_t ubifs_dir_llseek(struct file *file, loff_t offset, int whence) | ||
483 | { | ||
484 | return generic_file_llseek(file, offset, whence); | ||
485 | } | ||
486 | |||
487 | /* Free saved readdir() state when the directory is closed */ | 462 | /* Free saved readdir() state when the directory is closed */ |
488 | static int ubifs_dir_release(struct inode *dir, struct file *file) | 463 | static int ubifs_dir_release(struct inode *dir, struct file *file) |
489 | { | 464 | { |
@@ -1201,10 +1176,10 @@ const struct inode_operations ubifs_dir_inode_operations = { | |||
1201 | }; | 1176 | }; |
1202 | 1177 | ||
1203 | const struct file_operations ubifs_dir_operations = { | 1178 | const struct file_operations ubifs_dir_operations = { |
1204 | .llseek = ubifs_dir_llseek, | 1179 | .llseek = generic_file_llseek, |
1205 | .release = ubifs_dir_release, | 1180 | .release = ubifs_dir_release, |
1206 | .read = generic_read_dir, | 1181 | .read = generic_read_dir, |
1207 | .readdir = ubifs_readdir, | 1182 | .iterate = ubifs_readdir, |
1208 | .fsync = ubifs_fsync, | 1183 | .fsync = ubifs_fsync, |
1209 | .unlocked_ioctl = ubifs_ioctl, | 1184 | .unlocked_ioctl = ubifs_ioctl, |
1210 | #ifdef CONFIG_COMPAT | 1185 | #ifdef CONFIG_COMPAT |
diff --git a/fs/ubifs/file.c b/fs/ubifs/file.c index 14374530784c..123c79b7261e 100644 --- a/fs/ubifs/file.c +++ b/fs/ubifs/file.c | |||
@@ -1277,13 +1277,14 @@ int ubifs_setattr(struct dentry *dentry, struct iattr *attr) | |||
1277 | return err; | 1277 | return err; |
1278 | } | 1278 | } |
1279 | 1279 | ||
1280 | static void ubifs_invalidatepage(struct page *page, unsigned long offset) | 1280 | static void ubifs_invalidatepage(struct page *page, unsigned int offset, |
1281 | unsigned int length) | ||
1281 | { | 1282 | { |
1282 | struct inode *inode = page->mapping->host; | 1283 | struct inode *inode = page->mapping->host; |
1283 | struct ubifs_info *c = inode->i_sb->s_fs_info; | 1284 | struct ubifs_info *c = inode->i_sb->s_fs_info; |
1284 | 1285 | ||
1285 | ubifs_assert(PagePrivate(page)); | 1286 | ubifs_assert(PagePrivate(page)); |
1286 | if (offset) | 1287 | if (offset || length < PAGE_CACHE_SIZE) |
1287 | /* Partial page remains dirty */ | 1288 | /* Partial page remains dirty */ |
1288 | return; | 1289 | return; |
1289 | 1290 | ||
diff --git a/fs/ubifs/super.c b/fs/ubifs/super.c index f21acf0ef01f..879b9976c12b 100644 --- a/fs/ubifs/super.c +++ b/fs/ubifs/super.c | |||
@@ -1412,7 +1412,7 @@ static int mount_ubifs(struct ubifs_info *c) | |||
1412 | 1412 | ||
1413 | ubifs_msg("mounted UBI device %d, volume %d, name \"%s\"%s", | 1413 | ubifs_msg("mounted UBI device %d, volume %d, name \"%s\"%s", |
1414 | c->vi.ubi_num, c->vi.vol_id, c->vi.name, | 1414 | c->vi.ubi_num, c->vi.vol_id, c->vi.name, |
1415 | c->ro_mount ? ", R/O mode" : NULL); | 1415 | c->ro_mount ? ", R/O mode" : ""); |
1416 | x = (long long)c->main_lebs * c->leb_size; | 1416 | x = (long long)c->main_lebs * c->leb_size; |
1417 | y = (long long)c->log_lebs * c->leb_size + c->max_bud_bytes; | 1417 | y = (long long)c->log_lebs * c->leb_size + c->max_bud_bytes; |
1418 | ubifs_msg("LEB size: %d bytes (%d KiB), min./max. I/O unit sizes: %d bytes/%d bytes", | 1418 | ubifs_msg("LEB size: %d bytes (%d KiB), min./max. I/O unit sizes: %d bytes/%d bytes", |
diff --git a/fs/udf/dir.c b/fs/udf/dir.c index b3e93f5e17c3..a012c51caffd 100644 --- a/fs/udf/dir.c +++ b/fs/udf/dir.c | |||
@@ -35,14 +35,16 @@ | |||
35 | #include "udf_i.h" | 35 | #include "udf_i.h" |
36 | #include "udf_sb.h" | 36 | #include "udf_sb.h" |
37 | 37 | ||
38 | static int do_udf_readdir(struct inode *dir, struct file *filp, | 38 | |
39 | filldir_t filldir, void *dirent) | 39 | static int udf_readdir(struct file *file, struct dir_context *ctx) |
40 | { | 40 | { |
41 | struct inode *dir = file_inode(file); | ||
42 | struct udf_inode_info *iinfo = UDF_I(dir); | ||
41 | struct udf_fileident_bh fibh = { .sbh = NULL, .ebh = NULL}; | 43 | struct udf_fileident_bh fibh = { .sbh = NULL, .ebh = NULL}; |
42 | struct fileIdentDesc *fi = NULL; | 44 | struct fileIdentDesc *fi = NULL; |
43 | struct fileIdentDesc cfi; | 45 | struct fileIdentDesc cfi; |
44 | int block, iblock; | 46 | int block, iblock; |
45 | loff_t nf_pos = (filp->f_pos - 1) << 2; | 47 | loff_t nf_pos; |
46 | int flen; | 48 | int flen; |
47 | unsigned char *fname = NULL; | 49 | unsigned char *fname = NULL; |
48 | unsigned char *nameptr; | 50 | unsigned char *nameptr; |
@@ -54,10 +56,14 @@ static int do_udf_readdir(struct inode *dir, struct file *filp, | |||
54 | uint32_t elen; | 56 | uint32_t elen; |
55 | sector_t offset; | 57 | sector_t offset; |
56 | int i, num, ret = 0; | 58 | int i, num, ret = 0; |
57 | unsigned int dt_type; | ||
58 | struct extent_position epos = { NULL, 0, {0, 0} }; | 59 | struct extent_position epos = { NULL, 0, {0, 0} }; |
59 | struct udf_inode_info *iinfo; | ||
60 | 60 | ||
61 | if (ctx->pos == 0) { | ||
62 | if (!dir_emit_dot(file, ctx)) | ||
63 | return 0; | ||
64 | ctx->pos = 1; | ||
65 | } | ||
66 | nf_pos = (ctx->pos - 1) << 2; | ||
61 | if (nf_pos >= size) | 67 | if (nf_pos >= size) |
62 | goto out; | 68 | goto out; |
63 | 69 | ||
@@ -71,7 +77,6 @@ static int do_udf_readdir(struct inode *dir, struct file *filp, | |||
71 | nf_pos = udf_ext0_offset(dir); | 77 | nf_pos = udf_ext0_offset(dir); |
72 | 78 | ||
73 | fibh.soffset = fibh.eoffset = nf_pos & (dir->i_sb->s_blocksize - 1); | 79 | fibh.soffset = fibh.eoffset = nf_pos & (dir->i_sb->s_blocksize - 1); |
74 | iinfo = UDF_I(dir); | ||
75 | if (iinfo->i_alloc_type != ICBTAG_FLAG_AD_IN_ICB) { | 80 | if (iinfo->i_alloc_type != ICBTAG_FLAG_AD_IN_ICB) { |
76 | if (inode_bmap(dir, nf_pos >> dir->i_sb->s_blocksize_bits, | 81 | if (inode_bmap(dir, nf_pos >> dir->i_sb->s_blocksize_bits, |
77 | &epos, &eloc, &elen, &offset) | 82 | &epos, &eloc, &elen, &offset) |
@@ -116,7 +121,9 @@ static int do_udf_readdir(struct inode *dir, struct file *filp, | |||
116 | } | 121 | } |
117 | 122 | ||
118 | while (nf_pos < size) { | 123 | while (nf_pos < size) { |
119 | filp->f_pos = (nf_pos >> 2) + 1; | 124 | struct kernel_lb_addr tloc; |
125 | |||
126 | ctx->pos = (nf_pos >> 2) + 1; | ||
120 | 127 | ||
121 | fi = udf_fileident_read(dir, &nf_pos, &fibh, &cfi, &epos, &eloc, | 128 | fi = udf_fileident_read(dir, &nf_pos, &fibh, &cfi, &epos, &eloc, |
122 | &elen, &offset); | 129 | &elen, &offset); |
@@ -155,24 +162,22 @@ static int do_udf_readdir(struct inode *dir, struct file *filp, | |||
155 | } | 162 | } |
156 | 163 | ||
157 | if (cfi.fileCharacteristics & FID_FILE_CHAR_PARENT) { | 164 | if (cfi.fileCharacteristics & FID_FILE_CHAR_PARENT) { |
158 | iblock = parent_ino(filp->f_path.dentry); | 165 | if (!dir_emit_dotdot(file, ctx)) |
159 | flen = 2; | 166 | goto out; |
160 | memcpy(fname, "..", flen); | 167 | continue; |
161 | dt_type = DT_DIR; | ||
162 | } else { | ||
163 | struct kernel_lb_addr tloc = lelb_to_cpu(cfi.icb.extLocation); | ||
164 | |||
165 | iblock = udf_get_lb_pblock(dir->i_sb, &tloc, 0); | ||
166 | flen = udf_get_filename(dir->i_sb, nameptr, fname, lfi); | ||
167 | dt_type = DT_UNKNOWN; | ||
168 | } | 168 | } |
169 | 169 | ||
170 | if (flen && filldir(dirent, fname, flen, filp->f_pos, | 170 | flen = udf_get_filename(dir->i_sb, nameptr, fname, lfi); |
171 | iblock, dt_type) < 0) | 171 | if (!flen) |
172 | continue; | ||
173 | |||
174 | tloc = lelb_to_cpu(cfi.icb.extLocation); | ||
175 | iblock = udf_get_lb_pblock(dir->i_sb, &tloc, 0); | ||
176 | if (!dir_emit(ctx, fname, flen, iblock, DT_UNKNOWN)) | ||
172 | goto out; | 177 | goto out; |
173 | } /* end while */ | 178 | } /* end while */ |
174 | 179 | ||
175 | filp->f_pos = (nf_pos >> 2) + 1; | 180 | ctx->pos = (nf_pos >> 2) + 1; |
176 | 181 | ||
177 | out: | 182 | out: |
178 | if (fibh.sbh != fibh.ebh) | 183 | if (fibh.sbh != fibh.ebh) |
@@ -184,27 +189,11 @@ out: | |||
184 | return ret; | 189 | return ret; |
185 | } | 190 | } |
186 | 191 | ||
187 | static int udf_readdir(struct file *filp, void *dirent, filldir_t filldir) | ||
188 | { | ||
189 | struct inode *dir = file_inode(filp); | ||
190 | int result; | ||
191 | |||
192 | if (filp->f_pos == 0) { | ||
193 | if (filldir(dirent, ".", 1, filp->f_pos, dir->i_ino, DT_DIR) < 0) { | ||
194 | return 0; | ||
195 | } | ||
196 | filp->f_pos++; | ||
197 | } | ||
198 | |||
199 | result = do_udf_readdir(dir, filp, filldir, dirent); | ||
200 | return result; | ||
201 | } | ||
202 | |||
203 | /* readdir and lookup functions */ | 192 | /* readdir and lookup functions */ |
204 | const struct file_operations udf_dir_operations = { | 193 | const struct file_operations udf_dir_operations = { |
205 | .llseek = generic_file_llseek, | 194 | .llseek = generic_file_llseek, |
206 | .read = generic_read_dir, | 195 | .read = generic_read_dir, |
207 | .readdir = udf_readdir, | 196 | .iterate = udf_readdir, |
208 | .unlocked_ioctl = udf_ioctl, | 197 | .unlocked_ioctl = udf_ioctl, |
209 | .fsync = generic_file_fsync, | 198 | .fsync = generic_file_fsync, |
210 | }; | 199 | }; |
diff --git a/fs/udf/namei.c b/fs/udf/namei.c index 102c072c6bbf..5f6fc17d6bc5 100644 --- a/fs/udf/namei.c +++ b/fs/udf/namei.c | |||
@@ -594,6 +594,29 @@ static int udf_create(struct inode *dir, struct dentry *dentry, umode_t mode, | |||
594 | return 0; | 594 | return 0; |
595 | } | 595 | } |
596 | 596 | ||
597 | static int udf_tmpfile(struct inode *dir, struct dentry *dentry, umode_t mode) | ||
598 | { | ||
599 | struct inode *inode; | ||
600 | struct udf_inode_info *iinfo; | ||
601 | int err; | ||
602 | |||
603 | inode = udf_new_inode(dir, mode, &err); | ||
604 | if (!inode) | ||
605 | return err; | ||
606 | |||
607 | iinfo = UDF_I(inode); | ||
608 | if (iinfo->i_alloc_type == ICBTAG_FLAG_AD_IN_ICB) | ||
609 | inode->i_data.a_ops = &udf_adinicb_aops; | ||
610 | else | ||
611 | inode->i_data.a_ops = &udf_aops; | ||
612 | inode->i_op = &udf_file_inode_operations; | ||
613 | inode->i_fop = &udf_file_operations; | ||
614 | mark_inode_dirty(inode); | ||
615 | |||
616 | d_tmpfile(dentry, inode); | ||
617 | return 0; | ||
618 | } | ||
619 | |||
597 | static int udf_mknod(struct inode *dir, struct dentry *dentry, umode_t mode, | 620 | static int udf_mknod(struct inode *dir, struct dentry *dentry, umode_t mode, |
598 | dev_t rdev) | 621 | dev_t rdev) |
599 | { | 622 | { |
@@ -1311,6 +1334,7 @@ const struct inode_operations udf_dir_inode_operations = { | |||
1311 | .rmdir = udf_rmdir, | 1334 | .rmdir = udf_rmdir, |
1312 | .mknod = udf_mknod, | 1335 | .mknod = udf_mknod, |
1313 | .rename = udf_rename, | 1336 | .rename = udf_rename, |
1337 | .tmpfile = udf_tmpfile, | ||
1314 | }; | 1338 | }; |
1315 | const struct inode_operations udf_symlink_inode_operations = { | 1339 | const struct inode_operations udf_symlink_inode_operations = { |
1316 | .readlink = generic_readlink, | 1340 | .readlink = generic_readlink, |
diff --git a/fs/ufs/dir.c b/fs/ufs/dir.c index 3a75ca09c506..0ecc2cebed8f 100644 --- a/fs/ufs/dir.c +++ b/fs/ufs/dir.c | |||
@@ -430,16 +430,16 @@ ufs_validate_entry(struct super_block *sb, char *base, | |||
430 | * This is blatantly stolen from ext2fs | 430 | * This is blatantly stolen from ext2fs |
431 | */ | 431 | */ |
432 | static int | 432 | static int |
433 | ufs_readdir(struct file *filp, void *dirent, filldir_t filldir) | 433 | ufs_readdir(struct file *file, struct dir_context *ctx) |
434 | { | 434 | { |
435 | loff_t pos = filp->f_pos; | 435 | loff_t pos = ctx->pos; |
436 | struct inode *inode = file_inode(filp); | 436 | struct inode *inode = file_inode(file); |
437 | struct super_block *sb = inode->i_sb; | 437 | struct super_block *sb = inode->i_sb; |
438 | unsigned int offset = pos & ~PAGE_CACHE_MASK; | 438 | unsigned int offset = pos & ~PAGE_CACHE_MASK; |
439 | unsigned long n = pos >> PAGE_CACHE_SHIFT; | 439 | unsigned long n = pos >> PAGE_CACHE_SHIFT; |
440 | unsigned long npages = ufs_dir_pages(inode); | 440 | unsigned long npages = ufs_dir_pages(inode); |
441 | unsigned chunk_mask = ~(UFS_SB(sb)->s_uspi->s_dirblksize - 1); | 441 | unsigned chunk_mask = ~(UFS_SB(sb)->s_uspi->s_dirblksize - 1); |
442 | int need_revalidate = filp->f_version != inode->i_version; | 442 | int need_revalidate = file->f_version != inode->i_version; |
443 | unsigned flags = UFS_SB(sb)->s_flags; | 443 | unsigned flags = UFS_SB(sb)->s_flags; |
444 | 444 | ||
445 | UFSD("BEGIN\n"); | 445 | UFSD("BEGIN\n"); |
@@ -457,16 +457,16 @@ ufs_readdir(struct file *filp, void *dirent, filldir_t filldir) | |||
457 | ufs_error(sb, __func__, | 457 | ufs_error(sb, __func__, |
458 | "bad page in #%lu", | 458 | "bad page in #%lu", |
459 | inode->i_ino); | 459 | inode->i_ino); |
460 | filp->f_pos += PAGE_CACHE_SIZE - offset; | 460 | ctx->pos += PAGE_CACHE_SIZE - offset; |
461 | return -EIO; | 461 | return -EIO; |
462 | } | 462 | } |
463 | kaddr = page_address(page); | 463 | kaddr = page_address(page); |
464 | if (unlikely(need_revalidate)) { | 464 | if (unlikely(need_revalidate)) { |
465 | if (offset) { | 465 | if (offset) { |
466 | offset = ufs_validate_entry(sb, kaddr, offset, chunk_mask); | 466 | offset = ufs_validate_entry(sb, kaddr, offset, chunk_mask); |
467 | filp->f_pos = (n<<PAGE_CACHE_SHIFT) + offset; | 467 | ctx->pos = (n<<PAGE_CACHE_SHIFT) + offset; |
468 | } | 468 | } |
469 | filp->f_version = inode->i_version; | 469 | file->f_version = inode->i_version; |
470 | need_revalidate = 0; | 470 | need_revalidate = 0; |
471 | } | 471 | } |
472 | de = (struct ufs_dir_entry *)(kaddr+offset); | 472 | de = (struct ufs_dir_entry *)(kaddr+offset); |
@@ -479,11 +479,8 @@ ufs_readdir(struct file *filp, void *dirent, filldir_t filldir) | |||
479 | return -EIO; | 479 | return -EIO; |
480 | } | 480 | } |
481 | if (de->d_ino) { | 481 | if (de->d_ino) { |
482 | int over; | ||
483 | unsigned char d_type = DT_UNKNOWN; | 482 | unsigned char d_type = DT_UNKNOWN; |
484 | 483 | ||
485 | offset = (char *)de - kaddr; | ||
486 | |||
487 | UFSD("filldir(%s,%u)\n", de->d_name, | 484 | UFSD("filldir(%s,%u)\n", de->d_name, |
488 | fs32_to_cpu(sb, de->d_ino)); | 485 | fs32_to_cpu(sb, de->d_ino)); |
489 | UFSD("namlen %u\n", ufs_get_de_namlen(sb, de)); | 486 | UFSD("namlen %u\n", ufs_get_de_namlen(sb, de)); |
@@ -491,16 +488,15 @@ ufs_readdir(struct file *filp, void *dirent, filldir_t filldir) | |||
491 | if ((flags & UFS_DE_MASK) == UFS_DE_44BSD) | 488 | if ((flags & UFS_DE_MASK) == UFS_DE_44BSD) |
492 | d_type = de->d_u.d_44.d_type; | 489 | d_type = de->d_u.d_44.d_type; |
493 | 490 | ||
494 | over = filldir(dirent, de->d_name, | 491 | if (!dir_emit(ctx, de->d_name, |
495 | ufs_get_de_namlen(sb, de), | 492 | ufs_get_de_namlen(sb, de), |
496 | (n<<PAGE_CACHE_SHIFT) | offset, | 493 | fs32_to_cpu(sb, de->d_ino), |
497 | fs32_to_cpu(sb, de->d_ino), d_type); | 494 | d_type)) { |
498 | if (over) { | ||
499 | ufs_put_page(page); | 495 | ufs_put_page(page); |
500 | return 0; | 496 | return 0; |
501 | } | 497 | } |
502 | } | 498 | } |
503 | filp->f_pos += fs16_to_cpu(sb, de->d_reclen); | 499 | ctx->pos += fs16_to_cpu(sb, de->d_reclen); |
504 | } | 500 | } |
505 | ufs_put_page(page); | 501 | ufs_put_page(page); |
506 | } | 502 | } |
@@ -660,7 +656,7 @@ not_empty: | |||
660 | 656 | ||
661 | const struct file_operations ufs_dir_operations = { | 657 | const struct file_operations ufs_dir_operations = { |
662 | .read = generic_read_dir, | 658 | .read = generic_read_dir, |
663 | .readdir = ufs_readdir, | 659 | .iterate = ufs_readdir, |
664 | .fsync = generic_file_fsync, | 660 | .fsync = generic_file_fsync, |
665 | .llseek = generic_file_llseek, | 661 | .llseek = generic_file_llseek, |
666 | }; | 662 | }; |
diff --git a/fs/xfs/Makefile b/fs/xfs/Makefile index 6313b69b6644..4a4508023a3c 100644 --- a/fs/xfs/Makefile +++ b/fs/xfs/Makefile | |||
@@ -71,6 +71,7 @@ xfs-y += xfs_alloc.o \ | |||
71 | xfs_dir2_sf.o \ | 71 | xfs_dir2_sf.o \ |
72 | xfs_ialloc.o \ | 72 | xfs_ialloc.o \ |
73 | xfs_ialloc_btree.o \ | 73 | xfs_ialloc_btree.o \ |
74 | xfs_icreate_item.o \ | ||
74 | xfs_inode.o \ | 75 | xfs_inode.o \ |
75 | xfs_log_recover.o \ | 76 | xfs_log_recover.o \ |
76 | xfs_mount.o \ | 77 | xfs_mount.o \ |
diff --git a/fs/xfs/xfs_alloc.c b/fs/xfs/xfs_alloc.c index 5673bcfda2f0..71596e57283a 100644 --- a/fs/xfs/xfs_alloc.c +++ b/fs/xfs/xfs_alloc.c | |||
@@ -175,6 +175,7 @@ xfs_alloc_compute_diff( | |||
175 | xfs_agblock_t wantbno, /* target starting block */ | 175 | xfs_agblock_t wantbno, /* target starting block */ |
176 | xfs_extlen_t wantlen, /* target length */ | 176 | xfs_extlen_t wantlen, /* target length */ |
177 | xfs_extlen_t alignment, /* target alignment */ | 177 | xfs_extlen_t alignment, /* target alignment */ |
178 | char userdata, /* are we allocating data? */ | ||
178 | xfs_agblock_t freebno, /* freespace's starting block */ | 179 | xfs_agblock_t freebno, /* freespace's starting block */ |
179 | xfs_extlen_t freelen, /* freespace's length */ | 180 | xfs_extlen_t freelen, /* freespace's length */ |
180 | xfs_agblock_t *newbnop) /* result: best start block from free */ | 181 | xfs_agblock_t *newbnop) /* result: best start block from free */ |
@@ -189,7 +190,14 @@ xfs_alloc_compute_diff( | |||
189 | ASSERT(freelen >= wantlen); | 190 | ASSERT(freelen >= wantlen); |
190 | freeend = freebno + freelen; | 191 | freeend = freebno + freelen; |
191 | wantend = wantbno + wantlen; | 192 | wantend = wantbno + wantlen; |
192 | if (freebno >= wantbno) { | 193 | /* |
194 | * We want to allocate from the start of a free extent if it is past | ||
195 | * the desired block or if we are allocating user data and the free | ||
196 | * extent is before desired block. The second case is there to allow | ||
197 | * for contiguous allocation from the remaining free space if the file | ||
198 | * grows in the short term. | ||
199 | */ | ||
200 | if (freebno >= wantbno || (userdata && freeend < wantend)) { | ||
193 | if ((newbno1 = roundup(freebno, alignment)) >= freeend) | 201 | if ((newbno1 = roundup(freebno, alignment)) >= freeend) |
194 | newbno1 = NULLAGBLOCK; | 202 | newbno1 = NULLAGBLOCK; |
195 | } else if (freeend >= wantend && alignment > 1) { | 203 | } else if (freeend >= wantend && alignment > 1) { |
@@ -805,7 +813,8 @@ xfs_alloc_find_best_extent( | |||
805 | xfs_alloc_fix_len(args); | 813 | xfs_alloc_fix_len(args); |
806 | 814 | ||
807 | sdiff = xfs_alloc_compute_diff(args->agbno, args->len, | 815 | sdiff = xfs_alloc_compute_diff(args->agbno, args->len, |
808 | args->alignment, *sbnoa, | 816 | args->alignment, |
817 | args->userdata, *sbnoa, | ||
809 | *slena, &new); | 818 | *slena, &new); |
810 | 819 | ||
811 | /* | 820 | /* |
@@ -976,7 +985,8 @@ restart: | |||
976 | if (args->len < blen) | 985 | if (args->len < blen) |
977 | continue; | 986 | continue; |
978 | ltdiff = xfs_alloc_compute_diff(args->agbno, args->len, | 987 | ltdiff = xfs_alloc_compute_diff(args->agbno, args->len, |
979 | args->alignment, ltbnoa, ltlena, <new); | 988 | args->alignment, args->userdata, ltbnoa, |
989 | ltlena, <new); | ||
980 | if (ltnew != NULLAGBLOCK && | 990 | if (ltnew != NULLAGBLOCK && |
981 | (args->len > blen || ltdiff < bdiff)) { | 991 | (args->len > blen || ltdiff < bdiff)) { |
982 | bdiff = ltdiff; | 992 | bdiff = ltdiff; |
@@ -1128,7 +1138,8 @@ restart: | |||
1128 | args->len = XFS_EXTLEN_MIN(ltlena, args->maxlen); | 1138 | args->len = XFS_EXTLEN_MIN(ltlena, args->maxlen); |
1129 | xfs_alloc_fix_len(args); | 1139 | xfs_alloc_fix_len(args); |
1130 | ltdiff = xfs_alloc_compute_diff(args->agbno, args->len, | 1140 | ltdiff = xfs_alloc_compute_diff(args->agbno, args->len, |
1131 | args->alignment, ltbnoa, ltlena, <new); | 1141 | args->alignment, args->userdata, ltbnoa, |
1142 | ltlena, <new); | ||
1132 | 1143 | ||
1133 | error = xfs_alloc_find_best_extent(args, | 1144 | error = xfs_alloc_find_best_extent(args, |
1134 | &bno_cur_lt, &bno_cur_gt, | 1145 | &bno_cur_lt, &bno_cur_gt, |
@@ -1144,7 +1155,8 @@ restart: | |||
1144 | args->len = XFS_EXTLEN_MIN(gtlena, args->maxlen); | 1155 | args->len = XFS_EXTLEN_MIN(gtlena, args->maxlen); |
1145 | xfs_alloc_fix_len(args); | 1156 | xfs_alloc_fix_len(args); |
1146 | gtdiff = xfs_alloc_compute_diff(args->agbno, args->len, | 1157 | gtdiff = xfs_alloc_compute_diff(args->agbno, args->len, |
1147 | args->alignment, gtbnoa, gtlena, >new); | 1158 | args->alignment, args->userdata, gtbnoa, |
1159 | gtlena, >new); | ||
1148 | 1160 | ||
1149 | error = xfs_alloc_find_best_extent(args, | 1161 | error = xfs_alloc_find_best_extent(args, |
1150 | &bno_cur_gt, &bno_cur_lt, | 1162 | &bno_cur_gt, &bno_cur_lt, |
@@ -1203,7 +1215,7 @@ restart: | |||
1203 | } | 1215 | } |
1204 | rlen = args->len; | 1216 | rlen = args->len; |
1205 | (void)xfs_alloc_compute_diff(args->agbno, rlen, args->alignment, | 1217 | (void)xfs_alloc_compute_diff(args->agbno, rlen, args->alignment, |
1206 | ltbnoa, ltlena, <new); | 1218 | args->userdata, ltbnoa, ltlena, <new); |
1207 | ASSERT(ltnew >= ltbno); | 1219 | ASSERT(ltnew >= ltbno); |
1208 | ASSERT(ltnew + rlen <= ltbnoa + ltlena); | 1220 | ASSERT(ltnew + rlen <= ltbnoa + ltlena); |
1209 | ASSERT(ltnew + rlen <= be32_to_cpu(XFS_BUF_TO_AGF(args->agbp)->agf_length)); | 1221 | ASSERT(ltnew + rlen <= be32_to_cpu(XFS_BUF_TO_AGF(args->agbp)->agf_length)); |
diff --git a/fs/xfs/xfs_aops.c b/fs/xfs/xfs_aops.c index 41a695048be7..596ec71da00e 100644 --- a/fs/xfs/xfs_aops.c +++ b/fs/xfs/xfs_aops.c | |||
@@ -843,10 +843,12 @@ xfs_cluster_write( | |||
843 | STATIC void | 843 | STATIC void |
844 | xfs_vm_invalidatepage( | 844 | xfs_vm_invalidatepage( |
845 | struct page *page, | 845 | struct page *page, |
846 | unsigned long offset) | 846 | unsigned int offset, |
847 | unsigned int length) | ||
847 | { | 848 | { |
848 | trace_xfs_invalidatepage(page->mapping->host, page, offset); | 849 | trace_xfs_invalidatepage(page->mapping->host, page, offset, |
849 | block_invalidatepage(page, offset); | 850 | length); |
851 | block_invalidatepage(page, offset, length); | ||
850 | } | 852 | } |
851 | 853 | ||
852 | /* | 854 | /* |
@@ -910,7 +912,7 @@ next_buffer: | |||
910 | 912 | ||
911 | xfs_iunlock(ip, XFS_ILOCK_EXCL); | 913 | xfs_iunlock(ip, XFS_ILOCK_EXCL); |
912 | out_invalidate: | 914 | out_invalidate: |
913 | xfs_vm_invalidatepage(page, 0); | 915 | xfs_vm_invalidatepage(page, 0, PAGE_CACHE_SIZE); |
914 | return; | 916 | return; |
915 | } | 917 | } |
916 | 918 | ||
@@ -940,7 +942,7 @@ xfs_vm_writepage( | |||
940 | int count = 0; | 942 | int count = 0; |
941 | int nonblocking = 0; | 943 | int nonblocking = 0; |
942 | 944 | ||
943 | trace_xfs_writepage(inode, page, 0); | 945 | trace_xfs_writepage(inode, page, 0, 0); |
944 | 946 | ||
945 | ASSERT(page_has_buffers(page)); | 947 | ASSERT(page_has_buffers(page)); |
946 | 948 | ||
@@ -1171,7 +1173,7 @@ xfs_vm_releasepage( | |||
1171 | { | 1173 | { |
1172 | int delalloc, unwritten; | 1174 | int delalloc, unwritten; |
1173 | 1175 | ||
1174 | trace_xfs_releasepage(page->mapping->host, page, 0); | 1176 | trace_xfs_releasepage(page->mapping->host, page, 0, 0); |
1175 | 1177 | ||
1176 | xfs_count_page_state(page, &delalloc, &unwritten); | 1178 | xfs_count_page_state(page, &delalloc, &unwritten); |
1177 | 1179 | ||
diff --git a/fs/xfs/xfs_attr_leaf.c b/fs/xfs/xfs_attr_leaf.c index 31d3cd129269..b800fbcafc7f 100644 --- a/fs/xfs/xfs_attr_leaf.c +++ b/fs/xfs/xfs_attr_leaf.c | |||
@@ -690,6 +690,8 @@ xfs_attr_shortform_to_leaf(xfs_da_args_t *args) | |||
690 | sf = (xfs_attr_shortform_t *)tmpbuffer; | 690 | sf = (xfs_attr_shortform_t *)tmpbuffer; |
691 | 691 | ||
692 | xfs_idata_realloc(dp, -size, XFS_ATTR_FORK); | 692 | xfs_idata_realloc(dp, -size, XFS_ATTR_FORK); |
693 | xfs_bmap_local_to_extents_empty(dp, XFS_ATTR_FORK); | ||
694 | |||
693 | bp = NULL; | 695 | bp = NULL; |
694 | error = xfs_da_grow_inode(args, &blkno); | 696 | error = xfs_da_grow_inode(args, &blkno); |
695 | if (error) { | 697 | if (error) { |
diff --git a/fs/xfs/xfs_bmap.c b/fs/xfs/xfs_bmap.c index 89042848f9ec..05c698ccb238 100644 --- a/fs/xfs/xfs_bmap.c +++ b/fs/xfs/xfs_bmap.c | |||
@@ -1161,6 +1161,24 @@ xfs_bmap_extents_to_btree( | |||
1161 | * since the file data needs to get logged so things will stay consistent. | 1161 | * since the file data needs to get logged so things will stay consistent. |
1162 | * (The bmap-level manipulations are ok, though). | 1162 | * (The bmap-level manipulations are ok, though). |
1163 | */ | 1163 | */ |
1164 | void | ||
1165 | xfs_bmap_local_to_extents_empty( | ||
1166 | struct xfs_inode *ip, | ||
1167 | int whichfork) | ||
1168 | { | ||
1169 | struct xfs_ifork *ifp = XFS_IFORK_PTR(ip, whichfork); | ||
1170 | |||
1171 | ASSERT(XFS_IFORK_FORMAT(ip, whichfork) == XFS_DINODE_FMT_LOCAL); | ||
1172 | ASSERT(ifp->if_bytes == 0); | ||
1173 | ASSERT(XFS_IFORK_NEXTENTS(ip, whichfork) == 0); | ||
1174 | |||
1175 | xfs_bmap_forkoff_reset(ip->i_mount, ip, whichfork); | ||
1176 | ifp->if_flags &= ~XFS_IFINLINE; | ||
1177 | ifp->if_flags |= XFS_IFEXTENTS; | ||
1178 | XFS_IFORK_FMT_SET(ip, whichfork, XFS_DINODE_FMT_EXTENTS); | ||
1179 | } | ||
1180 | |||
1181 | |||
1164 | STATIC int /* error */ | 1182 | STATIC int /* error */ |
1165 | xfs_bmap_local_to_extents( | 1183 | xfs_bmap_local_to_extents( |
1166 | xfs_trans_t *tp, /* transaction pointer */ | 1184 | xfs_trans_t *tp, /* transaction pointer */ |
@@ -1174,9 +1192,12 @@ xfs_bmap_local_to_extents( | |||
1174 | struct xfs_inode *ip, | 1192 | struct xfs_inode *ip, |
1175 | struct xfs_ifork *ifp)) | 1193 | struct xfs_ifork *ifp)) |
1176 | { | 1194 | { |
1177 | int error; /* error return value */ | 1195 | int error = 0; |
1178 | int flags; /* logging flags returned */ | 1196 | int flags; /* logging flags returned */ |
1179 | xfs_ifork_t *ifp; /* inode fork pointer */ | 1197 | xfs_ifork_t *ifp; /* inode fork pointer */ |
1198 | xfs_alloc_arg_t args; /* allocation arguments */ | ||
1199 | xfs_buf_t *bp; /* buffer for extent block */ | ||
1200 | xfs_bmbt_rec_host_t *ep; /* extent record pointer */ | ||
1180 | 1201 | ||
1181 | /* | 1202 | /* |
1182 | * We don't want to deal with the case of keeping inode data inline yet. | 1203 | * We don't want to deal with the case of keeping inode data inline yet. |
@@ -1185,68 +1206,65 @@ xfs_bmap_local_to_extents( | |||
1185 | ASSERT(!(S_ISREG(ip->i_d.di_mode) && whichfork == XFS_DATA_FORK)); | 1206 | ASSERT(!(S_ISREG(ip->i_d.di_mode) && whichfork == XFS_DATA_FORK)); |
1186 | ifp = XFS_IFORK_PTR(ip, whichfork); | 1207 | ifp = XFS_IFORK_PTR(ip, whichfork); |
1187 | ASSERT(XFS_IFORK_FORMAT(ip, whichfork) == XFS_DINODE_FMT_LOCAL); | 1208 | ASSERT(XFS_IFORK_FORMAT(ip, whichfork) == XFS_DINODE_FMT_LOCAL); |
1209 | |||
1210 | if (!ifp->if_bytes) { | ||
1211 | xfs_bmap_local_to_extents_empty(ip, whichfork); | ||
1212 | flags = XFS_ILOG_CORE; | ||
1213 | goto done; | ||
1214 | } | ||
1215 | |||
1188 | flags = 0; | 1216 | flags = 0; |
1189 | error = 0; | 1217 | error = 0; |
1190 | if (ifp->if_bytes) { | 1218 | ASSERT((ifp->if_flags & (XFS_IFINLINE|XFS_IFEXTENTS|XFS_IFEXTIREC)) == |
1191 | xfs_alloc_arg_t args; /* allocation arguments */ | 1219 | XFS_IFINLINE); |
1192 | xfs_buf_t *bp; /* buffer for extent block */ | 1220 | memset(&args, 0, sizeof(args)); |
1193 | xfs_bmbt_rec_host_t *ep;/* extent record pointer */ | 1221 | args.tp = tp; |
1194 | 1222 | args.mp = ip->i_mount; | |
1195 | ASSERT((ifp->if_flags & | 1223 | args.firstblock = *firstblock; |
1196 | (XFS_IFINLINE|XFS_IFEXTENTS|XFS_IFEXTIREC)) == XFS_IFINLINE); | 1224 | /* |
1197 | memset(&args, 0, sizeof(args)); | 1225 | * Allocate a block. We know we need only one, since the |
1198 | args.tp = tp; | 1226 | * file currently fits in an inode. |
1199 | args.mp = ip->i_mount; | 1227 | */ |
1200 | args.firstblock = *firstblock; | 1228 | if (*firstblock == NULLFSBLOCK) { |
1201 | /* | 1229 | args.fsbno = XFS_INO_TO_FSB(args.mp, ip->i_ino); |
1202 | * Allocate a block. We know we need only one, since the | 1230 | args.type = XFS_ALLOCTYPE_START_BNO; |
1203 | * file currently fits in an inode. | ||
1204 | */ | ||
1205 | if (*firstblock == NULLFSBLOCK) { | ||
1206 | args.fsbno = XFS_INO_TO_FSB(args.mp, ip->i_ino); | ||
1207 | args.type = XFS_ALLOCTYPE_START_BNO; | ||
1208 | } else { | ||
1209 | args.fsbno = *firstblock; | ||
1210 | args.type = XFS_ALLOCTYPE_NEAR_BNO; | ||
1211 | } | ||
1212 | args.total = total; | ||
1213 | args.minlen = args.maxlen = args.prod = 1; | ||
1214 | error = xfs_alloc_vextent(&args); | ||
1215 | if (error) | ||
1216 | goto done; | ||
1217 | |||
1218 | /* Can't fail, the space was reserved. */ | ||
1219 | ASSERT(args.fsbno != NULLFSBLOCK); | ||
1220 | ASSERT(args.len == 1); | ||
1221 | *firstblock = args.fsbno; | ||
1222 | bp = xfs_btree_get_bufl(args.mp, tp, args.fsbno, 0); | ||
1223 | |||
1224 | /* initialise the block and copy the data */ | ||
1225 | init_fn(tp, bp, ip, ifp); | ||
1226 | |||
1227 | /* account for the change in fork size and log everything */ | ||
1228 | xfs_trans_log_buf(tp, bp, 0, ifp->if_bytes - 1); | ||
1229 | xfs_bmap_forkoff_reset(args.mp, ip, whichfork); | ||
1230 | xfs_idata_realloc(ip, -ifp->if_bytes, whichfork); | ||
1231 | xfs_iext_add(ifp, 0, 1); | ||
1232 | ep = xfs_iext_get_ext(ifp, 0); | ||
1233 | xfs_bmbt_set_allf(ep, 0, args.fsbno, 1, XFS_EXT_NORM); | ||
1234 | trace_xfs_bmap_post_update(ip, 0, | ||
1235 | whichfork == XFS_ATTR_FORK ? BMAP_ATTRFORK : 0, | ||
1236 | _THIS_IP_); | ||
1237 | XFS_IFORK_NEXT_SET(ip, whichfork, 1); | ||
1238 | ip->i_d.di_nblocks = 1; | ||
1239 | xfs_trans_mod_dquot_byino(tp, ip, | ||
1240 | XFS_TRANS_DQ_BCOUNT, 1L); | ||
1241 | flags |= xfs_ilog_fext(whichfork); | ||
1242 | } else { | 1231 | } else { |
1243 | ASSERT(XFS_IFORK_NEXTENTS(ip, whichfork) == 0); | 1232 | args.fsbno = *firstblock; |
1244 | xfs_bmap_forkoff_reset(ip->i_mount, ip, whichfork); | 1233 | args.type = XFS_ALLOCTYPE_NEAR_BNO; |
1245 | } | 1234 | } |
1246 | ifp->if_flags &= ~XFS_IFINLINE; | 1235 | args.total = total; |
1247 | ifp->if_flags |= XFS_IFEXTENTS; | 1236 | args.minlen = args.maxlen = args.prod = 1; |
1248 | XFS_IFORK_FMT_SET(ip, whichfork, XFS_DINODE_FMT_EXTENTS); | 1237 | error = xfs_alloc_vextent(&args); |
1238 | if (error) | ||
1239 | goto done; | ||
1240 | |||
1241 | /* Can't fail, the space was reserved. */ | ||
1242 | ASSERT(args.fsbno != NULLFSBLOCK); | ||
1243 | ASSERT(args.len == 1); | ||
1244 | *firstblock = args.fsbno; | ||
1245 | bp = xfs_btree_get_bufl(args.mp, tp, args.fsbno, 0); | ||
1246 | |||
1247 | /* initialise the block and copy the data */ | ||
1248 | init_fn(tp, bp, ip, ifp); | ||
1249 | |||
1250 | /* account for the change in fork size and log everything */ | ||
1251 | xfs_trans_log_buf(tp, bp, 0, ifp->if_bytes - 1); | ||
1252 | xfs_idata_realloc(ip, -ifp->if_bytes, whichfork); | ||
1253 | xfs_bmap_local_to_extents_empty(ip, whichfork); | ||
1249 | flags |= XFS_ILOG_CORE; | 1254 | flags |= XFS_ILOG_CORE; |
1255 | |||
1256 | xfs_iext_add(ifp, 0, 1); | ||
1257 | ep = xfs_iext_get_ext(ifp, 0); | ||
1258 | xfs_bmbt_set_allf(ep, 0, args.fsbno, 1, XFS_EXT_NORM); | ||
1259 | trace_xfs_bmap_post_update(ip, 0, | ||
1260 | whichfork == XFS_ATTR_FORK ? BMAP_ATTRFORK : 0, | ||
1261 | _THIS_IP_); | ||
1262 | XFS_IFORK_NEXT_SET(ip, whichfork, 1); | ||
1263 | ip->i_d.di_nblocks = 1; | ||
1264 | xfs_trans_mod_dquot_byino(tp, ip, | ||
1265 | XFS_TRANS_DQ_BCOUNT, 1L); | ||
1266 | flags |= xfs_ilog_fext(whichfork); | ||
1267 | |||
1250 | done: | 1268 | done: |
1251 | *logflagsp = flags; | 1269 | *logflagsp = flags; |
1252 | return error; | 1270 | return error; |
@@ -1323,25 +1341,6 @@ xfs_bmap_add_attrfork_extents( | |||
1323 | } | 1341 | } |
1324 | 1342 | ||
1325 | /* | 1343 | /* |
1326 | * Block initialisation function for local to extent format conversion. | ||
1327 | * | ||
1328 | * This shouldn't actually be called by anyone, so make sure debug kernels cause | ||
1329 | * a noticable failure. | ||
1330 | */ | ||
1331 | STATIC void | ||
1332 | xfs_bmap_local_to_extents_init_fn( | ||
1333 | struct xfs_trans *tp, | ||
1334 | struct xfs_buf *bp, | ||
1335 | struct xfs_inode *ip, | ||
1336 | struct xfs_ifork *ifp) | ||
1337 | { | ||
1338 | ASSERT(0); | ||
1339 | bp->b_ops = &xfs_bmbt_buf_ops; | ||
1340 | memcpy(bp->b_addr, ifp->if_u1.if_data, ifp->if_bytes); | ||
1341 | xfs_trans_buf_set_type(tp, bp, XFS_BLFT_BTREE_BUF); | ||
1342 | } | ||
1343 | |||
1344 | /* | ||
1345 | * Called from xfs_bmap_add_attrfork to handle local format files. Each | 1344 | * Called from xfs_bmap_add_attrfork to handle local format files. Each |
1346 | * different data fork content type needs a different callout to do the | 1345 | * different data fork content type needs a different callout to do the |
1347 | * conversion. Some are basic and only require special block initialisation | 1346 | * conversion. Some are basic and only require special block initialisation |
@@ -1381,9 +1380,9 @@ xfs_bmap_add_attrfork_local( | |||
1381 | flags, XFS_DATA_FORK, | 1380 | flags, XFS_DATA_FORK, |
1382 | xfs_symlink_local_to_remote); | 1381 | xfs_symlink_local_to_remote); |
1383 | 1382 | ||
1384 | return xfs_bmap_local_to_extents(tp, ip, firstblock, 1, flags, | 1383 | /* should only be called for types that support local format data */ |
1385 | XFS_DATA_FORK, | 1384 | ASSERT(0); |
1386 | xfs_bmap_local_to_extents_init_fn); | 1385 | return EFSCORRUPTED; |
1387 | } | 1386 | } |
1388 | 1387 | ||
1389 | /* | 1388 | /* |
@@ -4907,20 +4906,19 @@ xfs_bmapi_write( | |||
4907 | orig_mval = mval; | 4906 | orig_mval = mval; |
4908 | orig_nmap = *nmap; | 4907 | orig_nmap = *nmap; |
4909 | #endif | 4908 | #endif |
4909 | whichfork = (flags & XFS_BMAPI_ATTRFORK) ? | ||
4910 | XFS_ATTR_FORK : XFS_DATA_FORK; | ||
4910 | 4911 | ||
4911 | ASSERT(*nmap >= 1); | 4912 | ASSERT(*nmap >= 1); |
4912 | ASSERT(*nmap <= XFS_BMAP_MAX_NMAP); | 4913 | ASSERT(*nmap <= XFS_BMAP_MAX_NMAP); |
4913 | ASSERT(!(flags & XFS_BMAPI_IGSTATE)); | 4914 | ASSERT(!(flags & XFS_BMAPI_IGSTATE)); |
4914 | ASSERT(tp != NULL); | 4915 | ASSERT(tp != NULL); |
4915 | ASSERT(len > 0); | 4916 | ASSERT(len > 0); |
4916 | 4917 | ASSERT(XFS_IFORK_FORMAT(ip, whichfork) != XFS_DINODE_FMT_LOCAL); | |
4917 | whichfork = (flags & XFS_BMAPI_ATTRFORK) ? | ||
4918 | XFS_ATTR_FORK : XFS_DATA_FORK; | ||
4919 | 4918 | ||
4920 | if (unlikely(XFS_TEST_ERROR( | 4919 | if (unlikely(XFS_TEST_ERROR( |
4921 | (XFS_IFORK_FORMAT(ip, whichfork) != XFS_DINODE_FMT_EXTENTS && | 4920 | (XFS_IFORK_FORMAT(ip, whichfork) != XFS_DINODE_FMT_EXTENTS && |
4922 | XFS_IFORK_FORMAT(ip, whichfork) != XFS_DINODE_FMT_BTREE && | 4921 | XFS_IFORK_FORMAT(ip, whichfork) != XFS_DINODE_FMT_BTREE), |
4923 | XFS_IFORK_FORMAT(ip, whichfork) != XFS_DINODE_FMT_LOCAL), | ||
4924 | mp, XFS_ERRTAG_BMAPIFORMAT, XFS_RANDOM_BMAPIFORMAT))) { | 4922 | mp, XFS_ERRTAG_BMAPIFORMAT, XFS_RANDOM_BMAPIFORMAT))) { |
4925 | XFS_ERROR_REPORT("xfs_bmapi_write", XFS_ERRLEVEL_LOW, mp); | 4923 | XFS_ERROR_REPORT("xfs_bmapi_write", XFS_ERRLEVEL_LOW, mp); |
4926 | return XFS_ERROR(EFSCORRUPTED); | 4924 | return XFS_ERROR(EFSCORRUPTED); |
@@ -4933,37 +4931,6 @@ xfs_bmapi_write( | |||
4933 | 4931 | ||
4934 | XFS_STATS_INC(xs_blk_mapw); | 4932 | XFS_STATS_INC(xs_blk_mapw); |
4935 | 4933 | ||
4936 | if (XFS_IFORK_FORMAT(ip, whichfork) == XFS_DINODE_FMT_LOCAL) { | ||
4937 | /* | ||
4938 | * XXX (dgc): This assumes we are only called for inodes that | ||
4939 | * contain content neutral data in local format. Anything that | ||
4940 | * contains caller-specific data in local format that needs | ||
4941 | * transformation to move to a block format needs to do the | ||
4942 | * conversion to extent format itself. | ||
4943 | * | ||
4944 | * Directory data forks and attribute forks handle this | ||
4945 | * themselves, but with the addition of metadata verifiers every | ||
4946 | * data fork in local format now contains caller specific data | ||
4947 | * and as such conversion through this function is likely to be | ||
4948 | * broken. | ||
4949 | * | ||
4950 | * The only likely user of this branch is for remote symlinks, | ||
4951 | * but we cannot overwrite the data fork contents of the symlink | ||
4952 | * (EEXIST occurs higher up the stack) and so it will never go | ||
4953 | * from local format to extent format here. Hence I don't think | ||
4954 | * this branch is ever executed intentionally and we should | ||
4955 | * consider removing it and asserting that xfs_bmapi_write() | ||
4956 | * cannot be called directly on local format forks. i.e. callers | ||
4957 | * are completely responsible for local to extent format | ||
4958 | * conversion, not xfs_bmapi_write(). | ||
4959 | */ | ||
4960 | error = xfs_bmap_local_to_extents(tp, ip, firstblock, total, | ||
4961 | &bma.logflags, whichfork, | ||
4962 | xfs_bmap_local_to_extents_init_fn); | ||
4963 | if (error) | ||
4964 | goto error0; | ||
4965 | } | ||
4966 | |||
4967 | if (*firstblock == NULLFSBLOCK) { | 4934 | if (*firstblock == NULLFSBLOCK) { |
4968 | if (XFS_IFORK_FORMAT(ip, whichfork) == XFS_DINODE_FMT_BTREE) | 4935 | if (XFS_IFORK_FORMAT(ip, whichfork) == XFS_DINODE_FMT_BTREE) |
4969 | bma.minleft = be16_to_cpu(ifp->if_broot->bb_level) + 1; | 4936 | bma.minleft = be16_to_cpu(ifp->if_broot->bb_level) + 1; |
diff --git a/fs/xfs/xfs_bmap.h b/fs/xfs/xfs_bmap.h index 5f469c3516eb..1cf1292d29b7 100644 --- a/fs/xfs/xfs_bmap.h +++ b/fs/xfs/xfs_bmap.h | |||
@@ -172,6 +172,7 @@ void xfs_bmap_trace_exlist(struct xfs_inode *ip, xfs_extnum_t cnt, | |||
172 | #endif | 172 | #endif |
173 | 173 | ||
174 | int xfs_bmap_add_attrfork(struct xfs_inode *ip, int size, int rsvd); | 174 | int xfs_bmap_add_attrfork(struct xfs_inode *ip, int size, int rsvd); |
175 | void xfs_bmap_local_to_extents_empty(struct xfs_inode *ip, int whichfork); | ||
175 | void xfs_bmap_add_free(xfs_fsblock_t bno, xfs_filblks_t len, | 176 | void xfs_bmap_add_free(xfs_fsblock_t bno, xfs_filblks_t len, |
176 | struct xfs_bmap_free *flist, struct xfs_mount *mp); | 177 | struct xfs_bmap_free *flist, struct xfs_mount *mp); |
177 | void xfs_bmap_cancel(struct xfs_bmap_free *flist); | 178 | void xfs_bmap_cancel(struct xfs_bmap_free *flist); |
diff --git a/fs/xfs/xfs_bmap_btree.h b/fs/xfs/xfs_bmap_btree.h index 70c43d9f72c1..1b726d626941 100644 --- a/fs/xfs/xfs_bmap_btree.h +++ b/fs/xfs/xfs_bmap_btree.h | |||
@@ -196,6 +196,8 @@ typedef __be64 xfs_bmbt_ptr_t, xfs_bmdr_ptr_t; | |||
196 | #define XFS_BMDR_SPACE_CALC(nrecs) \ | 196 | #define XFS_BMDR_SPACE_CALC(nrecs) \ |
197 | (int)(sizeof(xfs_bmdr_block_t) + \ | 197 | (int)(sizeof(xfs_bmdr_block_t) + \ |
198 | ((nrecs) * (sizeof(xfs_bmbt_key_t) + sizeof(xfs_bmbt_ptr_t)))) | 198 | ((nrecs) * (sizeof(xfs_bmbt_key_t) + sizeof(xfs_bmbt_ptr_t)))) |
199 | #define XFS_BMAP_BMDR_SPACE(bb) \ | ||
200 | (XFS_BMDR_SPACE_CALC(be16_to_cpu((bb)->bb_numrecs))) | ||
199 | 201 | ||
200 | /* | 202 | /* |
201 | * Maximum number of bmap btree levels. | 203 | * Maximum number of bmap btree levels. |
diff --git a/fs/xfs/xfs_buf_item.c b/fs/xfs/xfs_buf_item.c index 4ec431777048..bfc4e0c26fd3 100644 --- a/fs/xfs/xfs_buf_item.c +++ b/fs/xfs/xfs_buf_item.c | |||
@@ -140,6 +140,16 @@ xfs_buf_item_size( | |||
140 | 140 | ||
141 | ASSERT(bip->bli_flags & XFS_BLI_LOGGED); | 141 | ASSERT(bip->bli_flags & XFS_BLI_LOGGED); |
142 | 142 | ||
143 | if (bip->bli_flags & XFS_BLI_ORDERED) { | ||
144 | /* | ||
145 | * The buffer has been logged just to order it. | ||
146 | * It is not being included in the transaction | ||
147 | * commit, so no vectors are used at all. | ||
148 | */ | ||
149 | trace_xfs_buf_item_size_ordered(bip); | ||
150 | return XFS_LOG_VEC_ORDERED; | ||
151 | } | ||
152 | |||
143 | /* | 153 | /* |
144 | * the vector count is based on the number of buffer vectors we have | 154 | * the vector count is based on the number of buffer vectors we have |
145 | * dirty bits in. This will only be greater than one when we have a | 155 | * dirty bits in. This will only be greater than one when we have a |
@@ -212,6 +222,7 @@ xfs_buf_item_format_segment( | |||
212 | goto out; | 222 | goto out; |
213 | } | 223 | } |
214 | 224 | ||
225 | |||
215 | /* | 226 | /* |
216 | * Fill in an iovec for each set of contiguous chunks. | 227 | * Fill in an iovec for each set of contiguous chunks. |
217 | */ | 228 | */ |
@@ -299,18 +310,36 @@ xfs_buf_item_format( | |||
299 | 310 | ||
300 | /* | 311 | /* |
301 | * If it is an inode buffer, transfer the in-memory state to the | 312 | * If it is an inode buffer, transfer the in-memory state to the |
302 | * format flags and clear the in-memory state. We do not transfer | 313 | * format flags and clear the in-memory state. |
314 | * | ||
315 | * For buffer based inode allocation, we do not transfer | ||
303 | * this state if the inode buffer allocation has not yet been committed | 316 | * this state if the inode buffer allocation has not yet been committed |
304 | * to the log as setting the XFS_BLI_INODE_BUF flag will prevent | 317 | * to the log as setting the XFS_BLI_INODE_BUF flag will prevent |
305 | * correct replay of the inode allocation. | 318 | * correct replay of the inode allocation. |
319 | * | ||
320 | * For icreate item based inode allocation, the buffers aren't written | ||
321 | * to the journal during allocation, and hence we should always tag the | ||
322 | * buffer as an inode buffer so that the correct unlinked list replay | ||
323 | * occurs during recovery. | ||
306 | */ | 324 | */ |
307 | if (bip->bli_flags & XFS_BLI_INODE_BUF) { | 325 | if (bip->bli_flags & XFS_BLI_INODE_BUF) { |
308 | if (!((bip->bli_flags & XFS_BLI_INODE_ALLOC_BUF) && | 326 | if (xfs_sb_version_hascrc(&lip->li_mountp->m_sb) || |
327 | !((bip->bli_flags & XFS_BLI_INODE_ALLOC_BUF) && | ||
309 | xfs_log_item_in_current_chkpt(lip))) | 328 | xfs_log_item_in_current_chkpt(lip))) |
310 | bip->__bli_format.blf_flags |= XFS_BLF_INODE_BUF; | 329 | bip->__bli_format.blf_flags |= XFS_BLF_INODE_BUF; |
311 | bip->bli_flags &= ~XFS_BLI_INODE_BUF; | 330 | bip->bli_flags &= ~XFS_BLI_INODE_BUF; |
312 | } | 331 | } |
313 | 332 | ||
333 | if ((bip->bli_flags & (XFS_BLI_ORDERED|XFS_BLI_STALE)) == | ||
334 | XFS_BLI_ORDERED) { | ||
335 | /* | ||
336 | * The buffer has been logged just to order it. It is not being | ||
337 | * included in the transaction commit, so don't format it. | ||
338 | */ | ||
339 | trace_xfs_buf_item_format_ordered(bip); | ||
340 | return; | ||
341 | } | ||
342 | |||
314 | for (i = 0; i < bip->bli_format_count; i++) { | 343 | for (i = 0; i < bip->bli_format_count; i++) { |
315 | vecp = xfs_buf_item_format_segment(bip, vecp, offset, | 344 | vecp = xfs_buf_item_format_segment(bip, vecp, offset, |
316 | &bip->bli_formats[i]); | 345 | &bip->bli_formats[i]); |
@@ -340,6 +369,7 @@ xfs_buf_item_pin( | |||
340 | 369 | ||
341 | ASSERT(atomic_read(&bip->bli_refcount) > 0); | 370 | ASSERT(atomic_read(&bip->bli_refcount) > 0); |
342 | ASSERT((bip->bli_flags & XFS_BLI_LOGGED) || | 371 | ASSERT((bip->bli_flags & XFS_BLI_LOGGED) || |
372 | (bip->bli_flags & XFS_BLI_ORDERED) || | ||
343 | (bip->bli_flags & XFS_BLI_STALE)); | 373 | (bip->bli_flags & XFS_BLI_STALE)); |
344 | 374 | ||
345 | trace_xfs_buf_item_pin(bip); | 375 | trace_xfs_buf_item_pin(bip); |
@@ -512,8 +542,9 @@ xfs_buf_item_unlock( | |||
512 | { | 542 | { |
513 | struct xfs_buf_log_item *bip = BUF_ITEM(lip); | 543 | struct xfs_buf_log_item *bip = BUF_ITEM(lip); |
514 | struct xfs_buf *bp = bip->bli_buf; | 544 | struct xfs_buf *bp = bip->bli_buf; |
515 | int aborted, clean, i; | 545 | bool clean; |
516 | uint hold; | 546 | bool aborted; |
547 | int flags; | ||
517 | 548 | ||
518 | /* Clear the buffer's association with this transaction. */ | 549 | /* Clear the buffer's association with this transaction. */ |
519 | bp->b_transp = NULL; | 550 | bp->b_transp = NULL; |
@@ -524,23 +555,21 @@ xfs_buf_item_unlock( | |||
524 | * (cancelled) buffers at unpin time, but we'll never go through the | 555 | * (cancelled) buffers at unpin time, but we'll never go through the |
525 | * pin/unpin cycle if we abort inside commit. | 556 | * pin/unpin cycle if we abort inside commit. |
526 | */ | 557 | */ |
527 | aborted = (lip->li_flags & XFS_LI_ABORTED) != 0; | 558 | aborted = (lip->li_flags & XFS_LI_ABORTED) ? true : false; |
528 | |||
529 | /* | 559 | /* |
530 | * Before possibly freeing the buf item, determine if we should | 560 | * Before possibly freeing the buf item, copy the per-transaction state |
531 | * release the buffer at the end of this routine. | 561 | * so we can reference it safely later after clearing it from the |
562 | * buffer log item. | ||
532 | */ | 563 | */ |
533 | hold = bip->bli_flags & XFS_BLI_HOLD; | 564 | flags = bip->bli_flags; |
534 | 565 | bip->bli_flags &= ~(XFS_BLI_LOGGED | XFS_BLI_HOLD | XFS_BLI_ORDERED); | |
535 | /* Clear the per transaction state. */ | ||
536 | bip->bli_flags &= ~(XFS_BLI_LOGGED | XFS_BLI_HOLD); | ||
537 | 566 | ||
538 | /* | 567 | /* |
539 | * If the buf item is marked stale, then don't do anything. We'll | 568 | * If the buf item is marked stale, then don't do anything. We'll |
540 | * unlock the buffer and free the buf item when the buffer is unpinned | 569 | * unlock the buffer and free the buf item when the buffer is unpinned |
541 | * for the last time. | 570 | * for the last time. |
542 | */ | 571 | */ |
543 | if (bip->bli_flags & XFS_BLI_STALE) { | 572 | if (flags & XFS_BLI_STALE) { |
544 | trace_xfs_buf_item_unlock_stale(bip); | 573 | trace_xfs_buf_item_unlock_stale(bip); |
545 | ASSERT(bip->__bli_format.blf_flags & XFS_BLF_CANCEL); | 574 | ASSERT(bip->__bli_format.blf_flags & XFS_BLF_CANCEL); |
546 | if (!aborted) { | 575 | if (!aborted) { |
@@ -557,13 +586,19 @@ xfs_buf_item_unlock( | |||
557 | * be the only reference to the buf item, so we free it anyway | 586 | * be the only reference to the buf item, so we free it anyway |
558 | * regardless of whether it is dirty or not. A dirty abort implies a | 587 | * regardless of whether it is dirty or not. A dirty abort implies a |
559 | * shutdown, anyway. | 588 | * shutdown, anyway. |
589 | * | ||
590 | * Ordered buffers are dirty but may have no recorded changes, so ensure | ||
591 | * we only release clean items here. | ||
560 | */ | 592 | */ |
561 | clean = 1; | 593 | clean = (flags & XFS_BLI_DIRTY) ? false : true; |
562 | for (i = 0; i < bip->bli_format_count; i++) { | 594 | if (clean) { |
563 | if (!xfs_bitmap_empty(bip->bli_formats[i].blf_data_map, | 595 | int i; |
564 | bip->bli_formats[i].blf_map_size)) { | 596 | for (i = 0; i < bip->bli_format_count; i++) { |
565 | clean = 0; | 597 | if (!xfs_bitmap_empty(bip->bli_formats[i].blf_data_map, |
566 | break; | 598 | bip->bli_formats[i].blf_map_size)) { |
599 | clean = false; | ||
600 | break; | ||
601 | } | ||
567 | } | 602 | } |
568 | } | 603 | } |
569 | if (clean) | 604 | if (clean) |
@@ -576,7 +611,7 @@ xfs_buf_item_unlock( | |||
576 | } else | 611 | } else |
577 | atomic_dec(&bip->bli_refcount); | 612 | atomic_dec(&bip->bli_refcount); |
578 | 613 | ||
579 | if (!hold) | 614 | if (!(flags & XFS_BLI_HOLD)) |
580 | xfs_buf_relse(bp); | 615 | xfs_buf_relse(bp); |
581 | } | 616 | } |
582 | 617 | ||
@@ -842,12 +877,6 @@ xfs_buf_item_log( | |||
842 | struct xfs_buf *bp = bip->bli_buf; | 877 | struct xfs_buf *bp = bip->bli_buf; |
843 | 878 | ||
844 | /* | 879 | /* |
845 | * Mark the item as having some dirty data for | ||
846 | * quick reference in xfs_buf_item_dirty. | ||
847 | */ | ||
848 | bip->bli_flags |= XFS_BLI_DIRTY; | ||
849 | |||
850 | /* | ||
851 | * walk each buffer segment and mark them dirty appropriately. | 880 | * walk each buffer segment and mark them dirty appropriately. |
852 | */ | 881 | */ |
853 | start = 0; | 882 | start = 0; |
@@ -873,7 +902,7 @@ xfs_buf_item_log( | |||
873 | 902 | ||
874 | 903 | ||
875 | /* | 904 | /* |
876 | * Return 1 if the buffer has some data that has been logged (at any | 905 | * Return 1 if the buffer has been logged or ordered in a transaction (at any |
877 | * point, not just the current transaction) and 0 if not. | 906 | * point, not just the current transaction) and 0 if not. |
878 | */ | 907 | */ |
879 | uint | 908 | uint |
@@ -907,11 +936,11 @@ void | |||
907 | xfs_buf_item_relse( | 936 | xfs_buf_item_relse( |
908 | xfs_buf_t *bp) | 937 | xfs_buf_t *bp) |
909 | { | 938 | { |
910 | xfs_buf_log_item_t *bip; | 939 | xfs_buf_log_item_t *bip = bp->b_fspriv; |
911 | 940 | ||
912 | trace_xfs_buf_item_relse(bp, _RET_IP_); | 941 | trace_xfs_buf_item_relse(bp, _RET_IP_); |
942 | ASSERT(!(bip->bli_item.li_flags & XFS_LI_IN_AIL)); | ||
913 | 943 | ||
914 | bip = bp->b_fspriv; | ||
915 | bp->b_fspriv = bip->bli_item.li_bio_list; | 944 | bp->b_fspriv = bip->bli_item.li_bio_list; |
916 | if (bp->b_fspriv == NULL) | 945 | if (bp->b_fspriv == NULL) |
917 | bp->b_iodone = NULL; | 946 | bp->b_iodone = NULL; |
diff --git a/fs/xfs/xfs_buf_item.h b/fs/xfs/xfs_buf_item.h index 2573d2a75fc8..0f1c247dc680 100644 --- a/fs/xfs/xfs_buf_item.h +++ b/fs/xfs/xfs_buf_item.h | |||
@@ -120,6 +120,7 @@ xfs_blft_from_flags(struct xfs_buf_log_format *blf) | |||
120 | #define XFS_BLI_INODE_ALLOC_BUF 0x10 | 120 | #define XFS_BLI_INODE_ALLOC_BUF 0x10 |
121 | #define XFS_BLI_STALE_INODE 0x20 | 121 | #define XFS_BLI_STALE_INODE 0x20 |
122 | #define XFS_BLI_INODE_BUF 0x40 | 122 | #define XFS_BLI_INODE_BUF 0x40 |
123 | #define XFS_BLI_ORDERED 0x80 | ||
123 | 124 | ||
124 | #define XFS_BLI_FLAGS \ | 125 | #define XFS_BLI_FLAGS \ |
125 | { XFS_BLI_HOLD, "HOLD" }, \ | 126 | { XFS_BLI_HOLD, "HOLD" }, \ |
@@ -128,7 +129,8 @@ xfs_blft_from_flags(struct xfs_buf_log_format *blf) | |||
128 | { XFS_BLI_LOGGED, "LOGGED" }, \ | 129 | { XFS_BLI_LOGGED, "LOGGED" }, \ |
129 | { XFS_BLI_INODE_ALLOC_BUF, "INODE_ALLOC" }, \ | 130 | { XFS_BLI_INODE_ALLOC_BUF, "INODE_ALLOC" }, \ |
130 | { XFS_BLI_STALE_INODE, "STALE_INODE" }, \ | 131 | { XFS_BLI_STALE_INODE, "STALE_INODE" }, \ |
131 | { XFS_BLI_INODE_BUF, "INODE_BUF" } | 132 | { XFS_BLI_INODE_BUF, "INODE_BUF" }, \ |
133 | { XFS_BLI_ORDERED, "ORDERED" } | ||
132 | 134 | ||
133 | 135 | ||
134 | #ifdef __KERNEL__ | 136 | #ifdef __KERNEL__ |
diff --git a/fs/xfs/xfs_dfrag.c b/fs/xfs/xfs_dfrag.c index c407e1ccff43..e36445ceaf80 100644 --- a/fs/xfs/xfs_dfrag.c +++ b/fs/xfs/xfs_dfrag.c | |||
@@ -24,6 +24,9 @@ | |||
24 | #include "xfs_ag.h" | 24 | #include "xfs_ag.h" |
25 | #include "xfs_mount.h" | 25 | #include "xfs_mount.h" |
26 | #include "xfs_bmap_btree.h" | 26 | #include "xfs_bmap_btree.h" |
27 | #include "xfs_alloc_btree.h" | ||
28 | #include "xfs_ialloc_btree.h" | ||
29 | #include "xfs_btree.h" | ||
27 | #include "xfs_dinode.h" | 30 | #include "xfs_dinode.h" |
28 | #include "xfs_inode.h" | 31 | #include "xfs_inode.h" |
29 | #include "xfs_inode_item.h" | 32 | #include "xfs_inode_item.h" |
@@ -182,7 +185,7 @@ xfs_swap_extents_check_format( | |||
182 | */ | 185 | */ |
183 | if (tip->i_d.di_format == XFS_DINODE_FMT_BTREE) { | 186 | if (tip->i_d.di_format == XFS_DINODE_FMT_BTREE) { |
184 | if (XFS_IFORK_BOFF(ip) && | 187 | if (XFS_IFORK_BOFF(ip) && |
185 | tip->i_df.if_broot_bytes > XFS_IFORK_BOFF(ip)) | 188 | XFS_BMAP_BMDR_SPACE(tip->i_df.if_broot) > XFS_IFORK_BOFF(ip)) |
186 | return EINVAL; | 189 | return EINVAL; |
187 | if (XFS_IFORK_NEXTENTS(tip, XFS_DATA_FORK) <= | 190 | if (XFS_IFORK_NEXTENTS(tip, XFS_DATA_FORK) <= |
188 | XFS_IFORK_MAXEXT(ip, XFS_DATA_FORK)) | 191 | XFS_IFORK_MAXEXT(ip, XFS_DATA_FORK)) |
@@ -192,9 +195,8 @@ xfs_swap_extents_check_format( | |||
192 | /* Reciprocal target->temp btree format checks */ | 195 | /* Reciprocal target->temp btree format checks */ |
193 | if (ip->i_d.di_format == XFS_DINODE_FMT_BTREE) { | 196 | if (ip->i_d.di_format == XFS_DINODE_FMT_BTREE) { |
194 | if (XFS_IFORK_BOFF(tip) && | 197 | if (XFS_IFORK_BOFF(tip) && |
195 | ip->i_df.if_broot_bytes > XFS_IFORK_BOFF(tip)) | 198 | XFS_BMAP_BMDR_SPACE(ip->i_df.if_broot) > XFS_IFORK_BOFF(tip)) |
196 | return EINVAL; | 199 | return EINVAL; |
197 | |||
198 | if (XFS_IFORK_NEXTENTS(ip, XFS_DATA_FORK) <= | 200 | if (XFS_IFORK_NEXTENTS(ip, XFS_DATA_FORK) <= |
199 | XFS_IFORK_MAXEXT(tip, XFS_DATA_FORK)) | 201 | XFS_IFORK_MAXEXT(tip, XFS_DATA_FORK)) |
200 | return EINVAL; | 202 | return EINVAL; |
diff --git a/fs/xfs/xfs_dinode.h b/fs/xfs/xfs_dinode.h index f7a0e95d197a..e5869b50dc41 100644 --- a/fs/xfs/xfs_dinode.h +++ b/fs/xfs/xfs_dinode.h | |||
@@ -39,6 +39,9 @@ typedef struct xfs_timestamp { | |||
39 | * There is a very similar struct icdinode in xfs_inode which matches the | 39 | * There is a very similar struct icdinode in xfs_inode which matches the |
40 | * layout of the first 96 bytes of this structure, but is kept in native | 40 | * layout of the first 96 bytes of this structure, but is kept in native |
41 | * format instead of big endian. | 41 | * format instead of big endian. |
42 | * | ||
43 | * Note: di_flushiter is only used by v1/2 inodes - it's effectively a zeroed | ||
44 | * padding field for v3 inodes. | ||
42 | */ | 45 | */ |
43 | typedef struct xfs_dinode { | 46 | typedef struct xfs_dinode { |
44 | __be16 di_magic; /* inode magic # = XFS_DINODE_MAGIC */ | 47 | __be16 di_magic; /* inode magic # = XFS_DINODE_MAGIC */ |
@@ -132,9 +135,6 @@ typedef enum xfs_dinode_fmt { | |||
132 | #define XFS_LITINO(mp, version) \ | 135 | #define XFS_LITINO(mp, version) \ |
133 | ((int)(((mp)->m_sb.sb_inodesize) - xfs_dinode_size(version))) | 136 | ((int)(((mp)->m_sb.sb_inodesize) - xfs_dinode_size(version))) |
134 | 137 | ||
135 | #define XFS_BROOT_SIZE_ADJ(ip) \ | ||
136 | (XFS_BMBT_BLOCK_LEN((ip)->i_mount) - sizeof(xfs_bmdr_block_t)) | ||
137 | |||
138 | /* | 138 | /* |
139 | * Inode data & attribute fork sizes, per inode. | 139 | * Inode data & attribute fork sizes, per inode. |
140 | */ | 140 | */ |
diff --git a/fs/xfs/xfs_dir2.c b/fs/xfs/xfs_dir2.c index b26a50f9921d..8f023dee404d 100644 --- a/fs/xfs/xfs_dir2.c +++ b/fs/xfs/xfs_dir2.c | |||
@@ -368,10 +368,8 @@ xfs_dir_removename( | |||
368 | int | 368 | int |
369 | xfs_readdir( | 369 | xfs_readdir( |
370 | xfs_inode_t *dp, | 370 | xfs_inode_t *dp, |
371 | void *dirent, | 371 | struct dir_context *ctx, |
372 | size_t bufsize, | 372 | size_t bufsize) |
373 | xfs_off_t *offset, | ||
374 | filldir_t filldir) | ||
375 | { | 373 | { |
376 | int rval; /* return value */ | 374 | int rval; /* return value */ |
377 | int v; /* type-checking value */ | 375 | int v; /* type-checking value */ |
@@ -385,14 +383,13 @@ xfs_readdir( | |||
385 | XFS_STATS_INC(xs_dir_getdents); | 383 | XFS_STATS_INC(xs_dir_getdents); |
386 | 384 | ||
387 | if (dp->i_d.di_format == XFS_DINODE_FMT_LOCAL) | 385 | if (dp->i_d.di_format == XFS_DINODE_FMT_LOCAL) |
388 | rval = xfs_dir2_sf_getdents(dp, dirent, offset, filldir); | 386 | rval = xfs_dir2_sf_getdents(dp, ctx); |
389 | else if ((rval = xfs_dir2_isblock(NULL, dp, &v))) | 387 | else if ((rval = xfs_dir2_isblock(NULL, dp, &v))) |
390 | ; | 388 | ; |
391 | else if (v) | 389 | else if (v) |
392 | rval = xfs_dir2_block_getdents(dp, dirent, offset, filldir); | 390 | rval = xfs_dir2_block_getdents(dp, ctx); |
393 | else | 391 | else |
394 | rval = xfs_dir2_leaf_getdents(dp, dirent, bufsize, offset, | 392 | rval = xfs_dir2_leaf_getdents(dp, ctx, bufsize); |
395 | filldir); | ||
396 | return rval; | 393 | return rval; |
397 | } | 394 | } |
398 | 395 | ||
diff --git a/fs/xfs/xfs_dir2_block.c b/fs/xfs/xfs_dir2_block.c index e59f5fc816fe..5e7fbd72cf52 100644 --- a/fs/xfs/xfs_dir2_block.c +++ b/fs/xfs/xfs_dir2_block.c | |||
@@ -29,6 +29,7 @@ | |||
29 | #include "xfs_dinode.h" | 29 | #include "xfs_dinode.h" |
30 | #include "xfs_inode.h" | 30 | #include "xfs_inode.h" |
31 | #include "xfs_inode_item.h" | 31 | #include "xfs_inode_item.h" |
32 | #include "xfs_bmap.h" | ||
32 | #include "xfs_buf_item.h" | 33 | #include "xfs_buf_item.h" |
33 | #include "xfs_dir2.h" | 34 | #include "xfs_dir2.h" |
34 | #include "xfs_dir2_format.h" | 35 | #include "xfs_dir2_format.h" |
@@ -569,9 +570,7 @@ xfs_dir2_block_addname( | |||
569 | int /* error */ | 570 | int /* error */ |
570 | xfs_dir2_block_getdents( | 571 | xfs_dir2_block_getdents( |
571 | xfs_inode_t *dp, /* incore inode */ | 572 | xfs_inode_t *dp, /* incore inode */ |
572 | void *dirent, | 573 | struct dir_context *ctx) |
573 | xfs_off_t *offset, | ||
574 | filldir_t filldir) | ||
575 | { | 574 | { |
576 | xfs_dir2_data_hdr_t *hdr; /* block header */ | 575 | xfs_dir2_data_hdr_t *hdr; /* block header */ |
577 | struct xfs_buf *bp; /* buffer for block */ | 576 | struct xfs_buf *bp; /* buffer for block */ |
@@ -589,7 +588,7 @@ xfs_dir2_block_getdents( | |||
589 | /* | 588 | /* |
590 | * If the block number in the offset is out of range, we're done. | 589 | * If the block number in the offset is out of range, we're done. |
591 | */ | 590 | */ |
592 | if (xfs_dir2_dataptr_to_db(mp, *offset) > mp->m_dirdatablk) | 591 | if (xfs_dir2_dataptr_to_db(mp, ctx->pos) > mp->m_dirdatablk) |
593 | return 0; | 592 | return 0; |
594 | 593 | ||
595 | error = xfs_dir3_block_read(NULL, dp, &bp); | 594 | error = xfs_dir3_block_read(NULL, dp, &bp); |
@@ -600,7 +599,7 @@ xfs_dir2_block_getdents( | |||
600 | * Extract the byte offset we start at from the seek pointer. | 599 | * Extract the byte offset we start at from the seek pointer. |
601 | * We'll skip entries before this. | 600 | * We'll skip entries before this. |
602 | */ | 601 | */ |
603 | wantoff = xfs_dir2_dataptr_to_off(mp, *offset); | 602 | wantoff = xfs_dir2_dataptr_to_off(mp, ctx->pos); |
604 | hdr = bp->b_addr; | 603 | hdr = bp->b_addr; |
605 | xfs_dir3_data_check(dp, bp); | 604 | xfs_dir3_data_check(dp, bp); |
606 | /* | 605 | /* |
@@ -639,13 +638,12 @@ xfs_dir2_block_getdents( | |||
639 | cook = xfs_dir2_db_off_to_dataptr(mp, mp->m_dirdatablk, | 638 | cook = xfs_dir2_db_off_to_dataptr(mp, mp->m_dirdatablk, |
640 | (char *)dep - (char *)hdr); | 639 | (char *)dep - (char *)hdr); |
641 | 640 | ||
641 | ctx->pos = cook & 0x7fffffff; | ||
642 | /* | 642 | /* |
643 | * If it didn't fit, set the final offset to here & return. | 643 | * If it didn't fit, set the final offset to here & return. |
644 | */ | 644 | */ |
645 | if (filldir(dirent, (char *)dep->name, dep->namelen, | 645 | if (!dir_emit(ctx, (char *)dep->name, dep->namelen, |
646 | cook & 0x7fffffff, be64_to_cpu(dep->inumber), | 646 | be64_to_cpu(dep->inumber), DT_UNKNOWN)) { |
647 | DT_UNKNOWN)) { | ||
648 | *offset = cook & 0x7fffffff; | ||
649 | xfs_trans_brelse(NULL, bp); | 647 | xfs_trans_brelse(NULL, bp); |
650 | return 0; | 648 | return 0; |
651 | } | 649 | } |
@@ -655,7 +653,7 @@ xfs_dir2_block_getdents( | |||
655 | * Reached the end of the block. | 653 | * Reached the end of the block. |
656 | * Set the offset to a non-existent block 1 and return. | 654 | * Set the offset to a non-existent block 1 and return. |
657 | */ | 655 | */ |
658 | *offset = xfs_dir2_db_off_to_dataptr(mp, mp->m_dirdatablk + 1, 0) & | 656 | ctx->pos = xfs_dir2_db_off_to_dataptr(mp, mp->m_dirdatablk + 1, 0) & |
659 | 0x7fffffff; | 657 | 0x7fffffff; |
660 | xfs_trans_brelse(NULL, bp); | 658 | xfs_trans_brelse(NULL, bp); |
661 | return 0; | 659 | return 0; |
@@ -1167,13 +1165,15 @@ xfs_dir2_sf_to_block( | |||
1167 | __be16 *tagp; /* end of data entry */ | 1165 | __be16 *tagp; /* end of data entry */ |
1168 | xfs_trans_t *tp; /* transaction pointer */ | 1166 | xfs_trans_t *tp; /* transaction pointer */ |
1169 | struct xfs_name name; | 1167 | struct xfs_name name; |
1168 | struct xfs_ifork *ifp; | ||
1170 | 1169 | ||
1171 | trace_xfs_dir2_sf_to_block(args); | 1170 | trace_xfs_dir2_sf_to_block(args); |
1172 | 1171 | ||
1173 | dp = args->dp; | 1172 | dp = args->dp; |
1174 | tp = args->trans; | 1173 | tp = args->trans; |
1175 | mp = dp->i_mount; | 1174 | mp = dp->i_mount; |
1176 | ASSERT(dp->i_df.if_flags & XFS_IFINLINE); | 1175 | ifp = XFS_IFORK_PTR(dp, XFS_DATA_FORK); |
1176 | ASSERT(ifp->if_flags & XFS_IFINLINE); | ||
1177 | /* | 1177 | /* |
1178 | * Bomb out if the shortform directory is way too short. | 1178 | * Bomb out if the shortform directory is way too short. |
1179 | */ | 1179 | */ |
@@ -1182,22 +1182,23 @@ xfs_dir2_sf_to_block( | |||
1182 | return XFS_ERROR(EIO); | 1182 | return XFS_ERROR(EIO); |
1183 | } | 1183 | } |
1184 | 1184 | ||
1185 | oldsfp = (xfs_dir2_sf_hdr_t *)dp->i_df.if_u1.if_data; | 1185 | oldsfp = (xfs_dir2_sf_hdr_t *)ifp->if_u1.if_data; |
1186 | 1186 | ||
1187 | ASSERT(dp->i_df.if_bytes == dp->i_d.di_size); | 1187 | ASSERT(ifp->if_bytes == dp->i_d.di_size); |
1188 | ASSERT(dp->i_df.if_u1.if_data != NULL); | 1188 | ASSERT(ifp->if_u1.if_data != NULL); |
1189 | ASSERT(dp->i_d.di_size >= xfs_dir2_sf_hdr_size(oldsfp->i8count)); | 1189 | ASSERT(dp->i_d.di_size >= xfs_dir2_sf_hdr_size(oldsfp->i8count)); |
1190 | ASSERT(dp->i_d.di_nextents == 0); | ||
1190 | 1191 | ||
1191 | /* | 1192 | /* |
1192 | * Copy the directory into a temporary buffer. | 1193 | * Copy the directory into a temporary buffer. |
1193 | * Then pitch the incore inode data so we can make extents. | 1194 | * Then pitch the incore inode data so we can make extents. |
1194 | */ | 1195 | */ |
1195 | sfp = kmem_alloc(dp->i_df.if_bytes, KM_SLEEP); | 1196 | sfp = kmem_alloc(ifp->if_bytes, KM_SLEEP); |
1196 | memcpy(sfp, oldsfp, dp->i_df.if_bytes); | 1197 | memcpy(sfp, oldsfp, ifp->if_bytes); |
1197 | 1198 | ||
1198 | xfs_idata_realloc(dp, -dp->i_df.if_bytes, XFS_DATA_FORK); | 1199 | xfs_idata_realloc(dp, -ifp->if_bytes, XFS_DATA_FORK); |
1200 | xfs_bmap_local_to_extents_empty(dp, XFS_DATA_FORK); | ||
1199 | dp->i_d.di_size = 0; | 1201 | dp->i_d.di_size = 0; |
1200 | xfs_trans_log_inode(tp, dp, XFS_ILOG_CORE); | ||
1201 | 1202 | ||
1202 | /* | 1203 | /* |
1203 | * Add block 0 to the inode. | 1204 | * Add block 0 to the inode. |
diff --git a/fs/xfs/xfs_dir2_leaf.c b/fs/xfs/xfs_dir2_leaf.c index da71a1819d78..2aed25cae04d 100644 --- a/fs/xfs/xfs_dir2_leaf.c +++ b/fs/xfs/xfs_dir2_leaf.c | |||
@@ -1108,6 +1108,7 @@ xfs_dir2_leaf_readbuf( | |||
1108 | struct xfs_mount *mp = dp->i_mount; | 1108 | struct xfs_mount *mp = dp->i_mount; |
1109 | struct xfs_buf *bp = *bpp; | 1109 | struct xfs_buf *bp = *bpp; |
1110 | struct xfs_bmbt_irec *map = mip->map; | 1110 | struct xfs_bmbt_irec *map = mip->map; |
1111 | struct blk_plug plug; | ||
1111 | int error = 0; | 1112 | int error = 0; |
1112 | int length; | 1113 | int length; |
1113 | int i; | 1114 | int i; |
@@ -1236,6 +1237,7 @@ xfs_dir2_leaf_readbuf( | |||
1236 | /* | 1237 | /* |
1237 | * Do we need more readahead? | 1238 | * Do we need more readahead? |
1238 | */ | 1239 | */ |
1240 | blk_start_plug(&plug); | ||
1239 | for (mip->ra_index = mip->ra_offset = i = 0; | 1241 | for (mip->ra_index = mip->ra_offset = i = 0; |
1240 | mip->ra_want > mip->ra_current && i < mip->map_blocks; | 1242 | mip->ra_want > mip->ra_current && i < mip->map_blocks; |
1241 | i += mp->m_dirblkfsbs) { | 1243 | i += mp->m_dirblkfsbs) { |
@@ -1287,6 +1289,7 @@ xfs_dir2_leaf_readbuf( | |||
1287 | } | 1289 | } |
1288 | } | 1290 | } |
1289 | } | 1291 | } |
1292 | blk_finish_plug(&plug); | ||
1290 | 1293 | ||
1291 | out: | 1294 | out: |
1292 | *bpp = bp; | 1295 | *bpp = bp; |
@@ -1300,10 +1303,8 @@ out: | |||
1300 | int /* error */ | 1303 | int /* error */ |
1301 | xfs_dir2_leaf_getdents( | 1304 | xfs_dir2_leaf_getdents( |
1302 | xfs_inode_t *dp, /* incore directory inode */ | 1305 | xfs_inode_t *dp, /* incore directory inode */ |
1303 | void *dirent, | 1306 | struct dir_context *ctx, |
1304 | size_t bufsize, | 1307 | size_t bufsize) |
1305 | xfs_off_t *offset, | ||
1306 | filldir_t filldir) | ||
1307 | { | 1308 | { |
1308 | struct xfs_buf *bp = NULL; /* data block buffer */ | 1309 | struct xfs_buf *bp = NULL; /* data block buffer */ |
1309 | xfs_dir2_data_hdr_t *hdr; /* data block header */ | 1310 | xfs_dir2_data_hdr_t *hdr; /* data block header */ |
@@ -1322,7 +1323,7 @@ xfs_dir2_leaf_getdents( | |||
1322 | * If the offset is at or past the largest allowed value, | 1323 | * If the offset is at or past the largest allowed value, |
1323 | * give up right away. | 1324 | * give up right away. |
1324 | */ | 1325 | */ |
1325 | if (*offset >= XFS_DIR2_MAX_DATAPTR) | 1326 | if (ctx->pos >= XFS_DIR2_MAX_DATAPTR) |
1326 | return 0; | 1327 | return 0; |
1327 | 1328 | ||
1328 | mp = dp->i_mount; | 1329 | mp = dp->i_mount; |
@@ -1343,7 +1344,7 @@ xfs_dir2_leaf_getdents( | |||
1343 | * Inside the loop we keep the main offset value as a byte offset | 1344 | * Inside the loop we keep the main offset value as a byte offset |
1344 | * in the directory file. | 1345 | * in the directory file. |
1345 | */ | 1346 | */ |
1346 | curoff = xfs_dir2_dataptr_to_byte(mp, *offset); | 1347 | curoff = xfs_dir2_dataptr_to_byte(mp, ctx->pos); |
1347 | 1348 | ||
1348 | /* | 1349 | /* |
1349 | * Force this conversion through db so we truncate the offset | 1350 | * Force this conversion through db so we truncate the offset |
@@ -1444,8 +1445,8 @@ xfs_dir2_leaf_getdents( | |||
1444 | dep = (xfs_dir2_data_entry_t *)ptr; | 1445 | dep = (xfs_dir2_data_entry_t *)ptr; |
1445 | length = xfs_dir2_data_entsize(dep->namelen); | 1446 | length = xfs_dir2_data_entsize(dep->namelen); |
1446 | 1447 | ||
1447 | if (filldir(dirent, (char *)dep->name, dep->namelen, | 1448 | ctx->pos = xfs_dir2_byte_to_dataptr(mp, curoff) & 0x7fffffff; |
1448 | xfs_dir2_byte_to_dataptr(mp, curoff) & 0x7fffffff, | 1449 | if (!dir_emit(ctx, (char *)dep->name, dep->namelen, |
1449 | be64_to_cpu(dep->inumber), DT_UNKNOWN)) | 1450 | be64_to_cpu(dep->inumber), DT_UNKNOWN)) |
1450 | break; | 1451 | break; |
1451 | 1452 | ||
@@ -1462,9 +1463,9 @@ xfs_dir2_leaf_getdents( | |||
1462 | * All done. Set output offset value to current offset. | 1463 | * All done. Set output offset value to current offset. |
1463 | */ | 1464 | */ |
1464 | if (curoff > xfs_dir2_dataptr_to_byte(mp, XFS_DIR2_MAX_DATAPTR)) | 1465 | if (curoff > xfs_dir2_dataptr_to_byte(mp, XFS_DIR2_MAX_DATAPTR)) |
1465 | *offset = XFS_DIR2_MAX_DATAPTR & 0x7fffffff; | 1466 | ctx->pos = XFS_DIR2_MAX_DATAPTR & 0x7fffffff; |
1466 | else | 1467 | else |
1467 | *offset = xfs_dir2_byte_to_dataptr(mp, curoff) & 0x7fffffff; | 1468 | ctx->pos = xfs_dir2_byte_to_dataptr(mp, curoff) & 0x7fffffff; |
1468 | kmem_free(map_info); | 1469 | kmem_free(map_info); |
1469 | if (bp) | 1470 | if (bp) |
1470 | xfs_trans_brelse(NULL, bp); | 1471 | xfs_trans_brelse(NULL, bp); |
diff --git a/fs/xfs/xfs_dir2_priv.h b/fs/xfs/xfs_dir2_priv.h index 7cf573c88aad..0511cda4a712 100644 --- a/fs/xfs/xfs_dir2_priv.h +++ b/fs/xfs/xfs_dir2_priv.h | |||
@@ -33,8 +33,8 @@ extern int xfs_dir_cilookup_result(struct xfs_da_args *args, | |||
33 | extern const struct xfs_buf_ops xfs_dir3_block_buf_ops; | 33 | extern const struct xfs_buf_ops xfs_dir3_block_buf_ops; |
34 | 34 | ||
35 | extern int xfs_dir2_block_addname(struct xfs_da_args *args); | 35 | extern int xfs_dir2_block_addname(struct xfs_da_args *args); |
36 | extern int xfs_dir2_block_getdents(struct xfs_inode *dp, void *dirent, | 36 | extern int xfs_dir2_block_getdents(struct xfs_inode *dp, |
37 | xfs_off_t *offset, filldir_t filldir); | 37 | struct dir_context *ctx); |
38 | extern int xfs_dir2_block_lookup(struct xfs_da_args *args); | 38 | extern int xfs_dir2_block_lookup(struct xfs_da_args *args); |
39 | extern int xfs_dir2_block_removename(struct xfs_da_args *args); | 39 | extern int xfs_dir2_block_removename(struct xfs_da_args *args); |
40 | extern int xfs_dir2_block_replace(struct xfs_da_args *args); | 40 | extern int xfs_dir2_block_replace(struct xfs_da_args *args); |
@@ -91,8 +91,8 @@ extern void xfs_dir3_leaf_compact(struct xfs_da_args *args, | |||
91 | extern void xfs_dir3_leaf_compact_x1(struct xfs_dir3_icleaf_hdr *leafhdr, | 91 | extern void xfs_dir3_leaf_compact_x1(struct xfs_dir3_icleaf_hdr *leafhdr, |
92 | struct xfs_dir2_leaf_entry *ents, int *indexp, | 92 | struct xfs_dir2_leaf_entry *ents, int *indexp, |
93 | int *lowstalep, int *highstalep, int *lowlogp, int *highlogp); | 93 | int *lowstalep, int *highstalep, int *lowlogp, int *highlogp); |
94 | extern int xfs_dir2_leaf_getdents(struct xfs_inode *dp, void *dirent, | 94 | extern int xfs_dir2_leaf_getdents(struct xfs_inode *dp, struct dir_context *ctx, |
95 | size_t bufsize, xfs_off_t *offset, filldir_t filldir); | 95 | size_t bufsize); |
96 | extern int xfs_dir3_leaf_get_buf(struct xfs_da_args *args, xfs_dir2_db_t bno, | 96 | extern int xfs_dir3_leaf_get_buf(struct xfs_da_args *args, xfs_dir2_db_t bno, |
97 | struct xfs_buf **bpp, __uint16_t magic); | 97 | struct xfs_buf **bpp, __uint16_t magic); |
98 | extern void xfs_dir3_leaf_log_ents(struct xfs_trans *tp, struct xfs_buf *bp, | 98 | extern void xfs_dir3_leaf_log_ents(struct xfs_trans *tp, struct xfs_buf *bp, |
@@ -153,8 +153,7 @@ extern int xfs_dir2_block_to_sf(struct xfs_da_args *args, struct xfs_buf *bp, | |||
153 | int size, xfs_dir2_sf_hdr_t *sfhp); | 153 | int size, xfs_dir2_sf_hdr_t *sfhp); |
154 | extern int xfs_dir2_sf_addname(struct xfs_da_args *args); | 154 | extern int xfs_dir2_sf_addname(struct xfs_da_args *args); |
155 | extern int xfs_dir2_sf_create(struct xfs_da_args *args, xfs_ino_t pino); | 155 | extern int xfs_dir2_sf_create(struct xfs_da_args *args, xfs_ino_t pino); |
156 | extern int xfs_dir2_sf_getdents(struct xfs_inode *dp, void *dirent, | 156 | extern int xfs_dir2_sf_getdents(struct xfs_inode *dp, struct dir_context *ctx); |
157 | xfs_off_t *offset, filldir_t filldir); | ||
158 | extern int xfs_dir2_sf_lookup(struct xfs_da_args *args); | 157 | extern int xfs_dir2_sf_lookup(struct xfs_da_args *args); |
159 | extern int xfs_dir2_sf_removename(struct xfs_da_args *args); | 158 | extern int xfs_dir2_sf_removename(struct xfs_da_args *args); |
160 | extern int xfs_dir2_sf_replace(struct xfs_da_args *args); | 159 | extern int xfs_dir2_sf_replace(struct xfs_da_args *args); |
diff --git a/fs/xfs/xfs_dir2_sf.c b/fs/xfs/xfs_dir2_sf.c index 6157424dbf8f..97676a347da1 100644 --- a/fs/xfs/xfs_dir2_sf.c +++ b/fs/xfs/xfs_dir2_sf.c | |||
@@ -768,9 +768,7 @@ xfs_dir2_sf_create( | |||
768 | int /* error */ | 768 | int /* error */ |
769 | xfs_dir2_sf_getdents( | 769 | xfs_dir2_sf_getdents( |
770 | xfs_inode_t *dp, /* incore directory inode */ | 770 | xfs_inode_t *dp, /* incore directory inode */ |
771 | void *dirent, | 771 | struct dir_context *ctx) |
772 | xfs_off_t *offset, | ||
773 | filldir_t filldir) | ||
774 | { | 772 | { |
775 | int i; /* shortform entry number */ | 773 | int i; /* shortform entry number */ |
776 | xfs_mount_t *mp; /* filesystem mount point */ | 774 | xfs_mount_t *mp; /* filesystem mount point */ |
@@ -802,7 +800,7 @@ xfs_dir2_sf_getdents( | |||
802 | /* | 800 | /* |
803 | * If the block number in the offset is out of range, we're done. | 801 | * If the block number in the offset is out of range, we're done. |
804 | */ | 802 | */ |
805 | if (xfs_dir2_dataptr_to_db(mp, *offset) > mp->m_dirdatablk) | 803 | if (xfs_dir2_dataptr_to_db(mp, ctx->pos) > mp->m_dirdatablk) |
806 | return 0; | 804 | return 0; |
807 | 805 | ||
808 | /* | 806 | /* |
@@ -819,22 +817,20 @@ xfs_dir2_sf_getdents( | |||
819 | /* | 817 | /* |
820 | * Put . entry unless we're starting past it. | 818 | * Put . entry unless we're starting past it. |
821 | */ | 819 | */ |
822 | if (*offset <= dot_offset) { | 820 | if (ctx->pos <= dot_offset) { |
823 | if (filldir(dirent, ".", 1, dot_offset & 0x7fffffff, dp->i_ino, DT_DIR)) { | 821 | ctx->pos = dot_offset & 0x7fffffff; |
824 | *offset = dot_offset & 0x7fffffff; | 822 | if (!dir_emit(ctx, ".", 1, dp->i_ino, DT_DIR)) |
825 | return 0; | 823 | return 0; |
826 | } | ||
827 | } | 824 | } |
828 | 825 | ||
829 | /* | 826 | /* |
830 | * Put .. entry unless we're starting past it. | 827 | * Put .. entry unless we're starting past it. |
831 | */ | 828 | */ |
832 | if (*offset <= dotdot_offset) { | 829 | if (ctx->pos <= dotdot_offset) { |
833 | ino = xfs_dir2_sf_get_parent_ino(sfp); | 830 | ino = xfs_dir2_sf_get_parent_ino(sfp); |
834 | if (filldir(dirent, "..", 2, dotdot_offset & 0x7fffffff, ino, DT_DIR)) { | 831 | ctx->pos = dotdot_offset & 0x7fffffff; |
835 | *offset = dotdot_offset & 0x7fffffff; | 832 | if (!dir_emit(ctx, "..", 2, ino, DT_DIR)) |
836 | return 0; | 833 | return 0; |
837 | } | ||
838 | } | 834 | } |
839 | 835 | ||
840 | /* | 836 | /* |
@@ -845,21 +841,20 @@ xfs_dir2_sf_getdents( | |||
845 | off = xfs_dir2_db_off_to_dataptr(mp, mp->m_dirdatablk, | 841 | off = xfs_dir2_db_off_to_dataptr(mp, mp->m_dirdatablk, |
846 | xfs_dir2_sf_get_offset(sfep)); | 842 | xfs_dir2_sf_get_offset(sfep)); |
847 | 843 | ||
848 | if (*offset > off) { | 844 | if (ctx->pos > off) { |
849 | sfep = xfs_dir2_sf_nextentry(sfp, sfep); | 845 | sfep = xfs_dir2_sf_nextentry(sfp, sfep); |
850 | continue; | 846 | continue; |
851 | } | 847 | } |
852 | 848 | ||
853 | ino = xfs_dir2_sfe_get_ino(sfp, sfep); | 849 | ino = xfs_dir2_sfe_get_ino(sfp, sfep); |
854 | if (filldir(dirent, (char *)sfep->name, sfep->namelen, | 850 | ctx->pos = off & 0x7fffffff; |
855 | off & 0x7fffffff, ino, DT_UNKNOWN)) { | 851 | if (!dir_emit(ctx, (char *)sfep->name, sfep->namelen, |
856 | *offset = off & 0x7fffffff; | 852 | ino, DT_UNKNOWN)) |
857 | return 0; | 853 | return 0; |
858 | } | ||
859 | sfep = xfs_dir2_sf_nextentry(sfp, sfep); | 854 | sfep = xfs_dir2_sf_nextentry(sfp, sfep); |
860 | } | 855 | } |
861 | 856 | ||
862 | *offset = xfs_dir2_db_off_to_dataptr(mp, mp->m_dirdatablk + 1, 0) & | 857 | ctx->pos = xfs_dir2_db_off_to_dataptr(mp, mp->m_dirdatablk + 1, 0) & |
863 | 0x7fffffff; | 858 | 0x7fffffff; |
864 | return 0; | 859 | return 0; |
865 | } | 860 | } |
diff --git a/fs/xfs/xfs_dquot.c b/fs/xfs/xfs_dquot.c index 044e97a33c8d..0adf27ecf3f1 100644 --- a/fs/xfs/xfs_dquot.c +++ b/fs/xfs/xfs_dquot.c | |||
@@ -570,13 +570,13 @@ xfs_qm_dqtobp( | |||
570 | xfs_buf_t **O_bpp, | 570 | xfs_buf_t **O_bpp, |
571 | uint flags) | 571 | uint flags) |
572 | { | 572 | { |
573 | xfs_bmbt_irec_t map; | 573 | struct xfs_bmbt_irec map; |
574 | int nmaps = 1, error; | 574 | int nmaps = 1, error; |
575 | xfs_buf_t *bp; | 575 | struct xfs_buf *bp; |
576 | xfs_inode_t *quotip = XFS_DQ_TO_QIP(dqp); | 576 | struct xfs_inode *quotip = xfs_dq_to_quota_inode(dqp); |
577 | xfs_mount_t *mp = dqp->q_mount; | 577 | struct xfs_mount *mp = dqp->q_mount; |
578 | xfs_dqid_t id = be32_to_cpu(dqp->q_core.d_id); | 578 | xfs_dqid_t id = be32_to_cpu(dqp->q_core.d_id); |
579 | xfs_trans_t *tp = (tpp ? *tpp : NULL); | 579 | struct xfs_trans *tp = (tpp ? *tpp : NULL); |
580 | 580 | ||
581 | dqp->q_fileoffset = (xfs_fileoff_t)id / mp->m_quotainfo->qi_dqperchunk; | 581 | dqp->q_fileoffset = (xfs_fileoff_t)id / mp->m_quotainfo->qi_dqperchunk; |
582 | 582 | ||
@@ -804,7 +804,7 @@ xfs_qm_dqget( | |||
804 | xfs_dquot_t **O_dqpp) /* OUT : locked incore dquot */ | 804 | xfs_dquot_t **O_dqpp) /* OUT : locked incore dquot */ |
805 | { | 805 | { |
806 | struct xfs_quotainfo *qi = mp->m_quotainfo; | 806 | struct xfs_quotainfo *qi = mp->m_quotainfo; |
807 | struct radix_tree_root *tree = XFS_DQUOT_TREE(qi, type); | 807 | struct radix_tree_root *tree = xfs_dquot_tree(qi, type); |
808 | struct xfs_dquot *dqp; | 808 | struct xfs_dquot *dqp; |
809 | int error; | 809 | int error; |
810 | 810 | ||
@@ -936,6 +936,7 @@ xfs_qm_dqput_final( | |||
936 | { | 936 | { |
937 | struct xfs_quotainfo *qi = dqp->q_mount->m_quotainfo; | 937 | struct xfs_quotainfo *qi = dqp->q_mount->m_quotainfo; |
938 | struct xfs_dquot *gdqp; | 938 | struct xfs_dquot *gdqp; |
939 | struct xfs_dquot *pdqp; | ||
939 | 940 | ||
940 | trace_xfs_dqput_free(dqp); | 941 | trace_xfs_dqput_free(dqp); |
941 | 942 | ||
@@ -949,21 +950,29 @@ xfs_qm_dqput_final( | |||
949 | 950 | ||
950 | /* | 951 | /* |
951 | * If we just added a udquot to the freelist, then we want to release | 952 | * If we just added a udquot to the freelist, then we want to release |
952 | * the gdquot reference that it (probably) has. Otherwise it'll keep | 953 | * the gdquot/pdquot reference that it (probably) has. Otherwise it'll |
953 | * the gdquot from getting reclaimed. | 954 | * keep the gdquot/pdquot from getting reclaimed. |
954 | */ | 955 | */ |
955 | gdqp = dqp->q_gdquot; | 956 | gdqp = dqp->q_gdquot; |
956 | if (gdqp) { | 957 | if (gdqp) { |
957 | xfs_dqlock(gdqp); | 958 | xfs_dqlock(gdqp); |
958 | dqp->q_gdquot = NULL; | 959 | dqp->q_gdquot = NULL; |
959 | } | 960 | } |
961 | |||
962 | pdqp = dqp->q_pdquot; | ||
963 | if (pdqp) { | ||
964 | xfs_dqlock(pdqp); | ||
965 | dqp->q_pdquot = NULL; | ||
966 | } | ||
960 | xfs_dqunlock(dqp); | 967 | xfs_dqunlock(dqp); |
961 | 968 | ||
962 | /* | 969 | /* |
963 | * If we had a group quota hint, release it now. | 970 | * If we had a group/project quota hint, release it now. |
964 | */ | 971 | */ |
965 | if (gdqp) | 972 | if (gdqp) |
966 | xfs_qm_dqput(gdqp); | 973 | xfs_qm_dqput(gdqp); |
974 | if (pdqp) | ||
975 | xfs_qm_dqput(pdqp); | ||
967 | } | 976 | } |
968 | 977 | ||
969 | /* | 978 | /* |
diff --git a/fs/xfs/xfs_dquot.h b/fs/xfs/xfs_dquot.h index 4f0ebfc43cc9..55abbca2883d 100644 --- a/fs/xfs/xfs_dquot.h +++ b/fs/xfs/xfs_dquot.h | |||
@@ -53,6 +53,7 @@ typedef struct xfs_dquot { | |||
53 | xfs_fileoff_t q_fileoffset; /* offset in quotas file */ | 53 | xfs_fileoff_t q_fileoffset; /* offset in quotas file */ |
54 | 54 | ||
55 | struct xfs_dquot*q_gdquot; /* group dquot, hint only */ | 55 | struct xfs_dquot*q_gdquot; /* group dquot, hint only */ |
56 | struct xfs_dquot*q_pdquot; /* project dquot, hint only */ | ||
56 | xfs_disk_dquot_t q_core; /* actual usage & quotas */ | 57 | xfs_disk_dquot_t q_core; /* actual usage & quotas */ |
57 | xfs_dq_logitem_t q_logitem; /* dquot log item */ | 58 | xfs_dq_logitem_t q_logitem; /* dquot log item */ |
58 | xfs_qcnt_t q_res_bcount; /* total regular nblks used+reserved */ | 59 | xfs_qcnt_t q_res_bcount; /* total regular nblks used+reserved */ |
@@ -118,8 +119,9 @@ static inline int xfs_this_quota_on(struct xfs_mount *mp, int type) | |||
118 | case XFS_DQ_USER: | 119 | case XFS_DQ_USER: |
119 | return XFS_IS_UQUOTA_ON(mp); | 120 | return XFS_IS_UQUOTA_ON(mp); |
120 | case XFS_DQ_GROUP: | 121 | case XFS_DQ_GROUP: |
122 | return XFS_IS_GQUOTA_ON(mp); | ||
121 | case XFS_DQ_PROJ: | 123 | case XFS_DQ_PROJ: |
122 | return XFS_IS_OQUOTA_ON(mp); | 124 | return XFS_IS_PQUOTA_ON(mp); |
123 | default: | 125 | default: |
124 | return 0; | 126 | return 0; |
125 | } | 127 | } |
@@ -131,8 +133,9 @@ static inline xfs_dquot_t *xfs_inode_dquot(struct xfs_inode *ip, int type) | |||
131 | case XFS_DQ_USER: | 133 | case XFS_DQ_USER: |
132 | return ip->i_udquot; | 134 | return ip->i_udquot; |
133 | case XFS_DQ_GROUP: | 135 | case XFS_DQ_GROUP: |
134 | case XFS_DQ_PROJ: | ||
135 | return ip->i_gdquot; | 136 | return ip->i_gdquot; |
137 | case XFS_DQ_PROJ: | ||
138 | return ip->i_pdquot; | ||
136 | default: | 139 | default: |
137 | return NULL; | 140 | return NULL; |
138 | } | 141 | } |
@@ -143,10 +146,6 @@ static inline xfs_dquot_t *xfs_inode_dquot(struct xfs_inode *ip, int type) | |||
143 | #define XFS_QM_ISUDQ(dqp) ((dqp)->dq_flags & XFS_DQ_USER) | 146 | #define XFS_QM_ISUDQ(dqp) ((dqp)->dq_flags & XFS_DQ_USER) |
144 | #define XFS_QM_ISPDQ(dqp) ((dqp)->dq_flags & XFS_DQ_PROJ) | 147 | #define XFS_QM_ISPDQ(dqp) ((dqp)->dq_flags & XFS_DQ_PROJ) |
145 | #define XFS_QM_ISGDQ(dqp) ((dqp)->dq_flags & XFS_DQ_GROUP) | 148 | #define XFS_QM_ISGDQ(dqp) ((dqp)->dq_flags & XFS_DQ_GROUP) |
146 | #define XFS_DQ_TO_QINF(dqp) ((dqp)->q_mount->m_quotainfo) | ||
147 | #define XFS_DQ_TO_QIP(dqp) (XFS_QM_ISUDQ(dqp) ? \ | ||
148 | XFS_DQ_TO_QINF(dqp)->qi_uquotaip : \ | ||
149 | XFS_DQ_TO_QINF(dqp)->qi_gquotaip) | ||
150 | 149 | ||
151 | extern int xfs_qm_dqread(struct xfs_mount *, xfs_dqid_t, uint, | 150 | extern int xfs_qm_dqread(struct xfs_mount *, xfs_dqid_t, uint, |
152 | uint, struct xfs_dquot **); | 151 | uint, struct xfs_dquot **); |
diff --git a/fs/xfs/xfs_file.c b/fs/xfs/xfs_file.c index a5f2042aec8b..de3dc98f4e8f 100644 --- a/fs/xfs/xfs_file.c +++ b/fs/xfs/xfs_file.c | |||
@@ -906,11 +906,10 @@ xfs_file_release( | |||
906 | 906 | ||
907 | STATIC int | 907 | STATIC int |
908 | xfs_file_readdir( | 908 | xfs_file_readdir( |
909 | struct file *filp, | 909 | struct file *file, |
910 | void *dirent, | 910 | struct dir_context *ctx) |
911 | filldir_t filldir) | ||
912 | { | 911 | { |
913 | struct inode *inode = file_inode(filp); | 912 | struct inode *inode = file_inode(file); |
914 | xfs_inode_t *ip = XFS_I(inode); | 913 | xfs_inode_t *ip = XFS_I(inode); |
915 | int error; | 914 | int error; |
916 | size_t bufsize; | 915 | size_t bufsize; |
@@ -929,8 +928,7 @@ xfs_file_readdir( | |||
929 | */ | 928 | */ |
930 | bufsize = (size_t)min_t(loff_t, 32768, ip->i_d.di_size); | 929 | bufsize = (size_t)min_t(loff_t, 32768, ip->i_d.di_size); |
931 | 930 | ||
932 | error = xfs_readdir(ip, dirent, bufsize, | 931 | error = xfs_readdir(ip, ctx, bufsize); |
933 | (xfs_off_t *)&filp->f_pos, filldir); | ||
934 | if (error) | 932 | if (error) |
935 | return -error; | 933 | return -error; |
936 | return 0; | 934 | return 0; |
@@ -1270,8 +1268,7 @@ xfs_seek_data( | |||
1270 | } | 1268 | } |
1271 | 1269 | ||
1272 | out: | 1270 | out: |
1273 | if (offset != file->f_pos) | 1271 | offset = vfs_setpos(file, offset, inode->i_sb->s_maxbytes); |
1274 | file->f_pos = offset; | ||
1275 | 1272 | ||
1276 | out_unlock: | 1273 | out_unlock: |
1277 | xfs_iunlock_map_shared(ip, lock); | 1274 | xfs_iunlock_map_shared(ip, lock); |
@@ -1379,8 +1376,7 @@ out: | |||
1379 | * situation in particular. | 1376 | * situation in particular. |
1380 | */ | 1377 | */ |
1381 | offset = min_t(loff_t, offset, isize); | 1378 | offset = min_t(loff_t, offset, isize); |
1382 | if (offset != file->f_pos) | 1379 | offset = vfs_setpos(file, offset, inode->i_sb->s_maxbytes); |
1383 | file->f_pos = offset; | ||
1384 | 1380 | ||
1385 | out_unlock: | 1381 | out_unlock: |
1386 | xfs_iunlock_map_shared(ip, lock); | 1382 | xfs_iunlock_map_shared(ip, lock); |
@@ -1432,7 +1428,7 @@ const struct file_operations xfs_file_operations = { | |||
1432 | const struct file_operations xfs_dir_file_operations = { | 1428 | const struct file_operations xfs_dir_file_operations = { |
1433 | .open = xfs_dir_open, | 1429 | .open = xfs_dir_open, |
1434 | .read = generic_read_dir, | 1430 | .read = generic_read_dir, |
1435 | .readdir = xfs_file_readdir, | 1431 | .iterate = xfs_file_readdir, |
1436 | .llseek = generic_file_llseek, | 1432 | .llseek = generic_file_llseek, |
1437 | .unlocked_ioctl = xfs_file_ioctl, | 1433 | .unlocked_ioctl = xfs_file_ioctl, |
1438 | #ifdef CONFIG_COMPAT | 1434 | #ifdef CONFIG_COMPAT |
diff --git a/fs/xfs/xfs_fsops.c b/fs/xfs/xfs_fsops.c index 3c3644ea825b..614eb0cc3608 100644 --- a/fs/xfs/xfs_fsops.c +++ b/fs/xfs/xfs_fsops.c | |||
@@ -176,7 +176,7 @@ xfs_growfs_data_private( | |||
176 | if (!bp) | 176 | if (!bp) |
177 | return EIO; | 177 | return EIO; |
178 | if (bp->b_error) { | 178 | if (bp->b_error) { |
179 | int error = bp->b_error; | 179 | error = bp->b_error; |
180 | xfs_buf_relse(bp); | 180 | xfs_buf_relse(bp); |
181 | return error; | 181 | return error; |
182 | } | 182 | } |
diff --git a/fs/xfs/xfs_ialloc.c b/fs/xfs/xfs_ialloc.c index c8f5ae1debf2..7a0c17d7ec09 100644 --- a/fs/xfs/xfs_ialloc.c +++ b/fs/xfs/xfs_ialloc.c | |||
@@ -38,6 +38,7 @@ | |||
38 | #include "xfs_bmap.h" | 38 | #include "xfs_bmap.h" |
39 | #include "xfs_cksum.h" | 39 | #include "xfs_cksum.h" |
40 | #include "xfs_buf_item.h" | 40 | #include "xfs_buf_item.h" |
41 | #include "xfs_icreate_item.h" | ||
41 | 42 | ||
42 | 43 | ||
43 | /* | 44 | /* |
@@ -150,12 +151,16 @@ xfs_check_agi_freecount( | |||
150 | #endif | 151 | #endif |
151 | 152 | ||
152 | /* | 153 | /* |
153 | * Initialise a new set of inodes. | 154 | * Initialise a new set of inodes. When called without a transaction context |
155 | * (e.g. from recovery) we initiate a delayed write of the inode buffers rather | ||
156 | * than logging them (which in a transaction context puts them into the AIL | ||
157 | * for writeback rather than the xfsbufd queue). | ||
154 | */ | 158 | */ |
155 | STATIC int | 159 | int |
156 | xfs_ialloc_inode_init( | 160 | xfs_ialloc_inode_init( |
157 | struct xfs_mount *mp, | 161 | struct xfs_mount *mp, |
158 | struct xfs_trans *tp, | 162 | struct xfs_trans *tp, |
163 | struct list_head *buffer_list, | ||
159 | xfs_agnumber_t agno, | 164 | xfs_agnumber_t agno, |
160 | xfs_agblock_t agbno, | 165 | xfs_agblock_t agbno, |
161 | xfs_agblock_t length, | 166 | xfs_agblock_t length, |
@@ -208,6 +213,18 @@ xfs_ialloc_inode_init( | |||
208 | version = 3; | 213 | version = 3; |
209 | ino = XFS_AGINO_TO_INO(mp, agno, | 214 | ino = XFS_AGINO_TO_INO(mp, agno, |
210 | XFS_OFFBNO_TO_AGINO(mp, agbno, 0)); | 215 | XFS_OFFBNO_TO_AGINO(mp, agbno, 0)); |
216 | |||
217 | /* | ||
218 | * log the initialisation that is about to take place as an | ||
219 | * logical operation. This means the transaction does not | ||
220 | * need to log the physical changes to the inode buffers as log | ||
221 | * recovery will know what initialisation is actually needed. | ||
222 | * Hence we only need to log the buffers as "ordered" buffers so | ||
223 | * they track in the AIL as if they were physically logged. | ||
224 | */ | ||
225 | if (tp) | ||
226 | xfs_icreate_log(tp, agno, agbno, XFS_IALLOC_INODES(mp), | ||
227 | mp->m_sb.sb_inodesize, length, gen); | ||
211 | } else if (xfs_sb_version_hasnlink(&mp->m_sb)) | 228 | } else if (xfs_sb_version_hasnlink(&mp->m_sb)) |
212 | version = 2; | 229 | version = 2; |
213 | else | 230 | else |
@@ -223,13 +240,8 @@ xfs_ialloc_inode_init( | |||
223 | XBF_UNMAPPED); | 240 | XBF_UNMAPPED); |
224 | if (!fbuf) | 241 | if (!fbuf) |
225 | return ENOMEM; | 242 | return ENOMEM; |
226 | /* | 243 | |
227 | * Initialize all inodes in this buffer and then log them. | 244 | /* Initialize the inode buffers and log them appropriately. */ |
228 | * | ||
229 | * XXX: It would be much better if we had just one transaction | ||
230 | * to log a whole cluster of inodes instead of all the | ||
231 | * individual transactions causing a lot of log traffic. | ||
232 | */ | ||
233 | fbuf->b_ops = &xfs_inode_buf_ops; | 245 | fbuf->b_ops = &xfs_inode_buf_ops; |
234 | xfs_buf_zero(fbuf, 0, BBTOB(fbuf->b_length)); | 246 | xfs_buf_zero(fbuf, 0, BBTOB(fbuf->b_length)); |
235 | for (i = 0; i < ninodes; i++) { | 247 | for (i = 0; i < ninodes; i++) { |
@@ -247,18 +259,39 @@ xfs_ialloc_inode_init( | |||
247 | ino++; | 259 | ino++; |
248 | uuid_copy(&free->di_uuid, &mp->m_sb.sb_uuid); | 260 | uuid_copy(&free->di_uuid, &mp->m_sb.sb_uuid); |
249 | xfs_dinode_calc_crc(mp, free); | 261 | xfs_dinode_calc_crc(mp, free); |
250 | } else { | 262 | } else if (tp) { |
251 | /* just log the inode core */ | 263 | /* just log the inode core */ |
252 | xfs_trans_log_buf(tp, fbuf, ioffset, | 264 | xfs_trans_log_buf(tp, fbuf, ioffset, |
253 | ioffset + isize - 1); | 265 | ioffset + isize - 1); |
254 | } | 266 | } |
255 | } | 267 | } |
256 | if (version == 3) { | 268 | |
257 | /* need to log the entire buffer */ | 269 | if (tp) { |
258 | xfs_trans_log_buf(tp, fbuf, 0, | 270 | /* |
259 | BBTOB(fbuf->b_length) - 1); | 271 | * Mark the buffer as an inode allocation buffer so it |
272 | * sticks in AIL at the point of this allocation | ||
273 | * transaction. This ensures the they are on disk before | ||
274 | * the tail of the log can be moved past this | ||
275 | * transaction (i.e. by preventing relogging from moving | ||
276 | * it forward in the log). | ||
277 | */ | ||
278 | xfs_trans_inode_alloc_buf(tp, fbuf); | ||
279 | if (version == 3) { | ||
280 | /* | ||
281 | * Mark the buffer as ordered so that they are | ||
282 | * not physically logged in the transaction but | ||
283 | * still tracked in the AIL as part of the | ||
284 | * transaction and pin the log appropriately. | ||
285 | */ | ||
286 | xfs_trans_ordered_buf(tp, fbuf); | ||
287 | xfs_trans_log_buf(tp, fbuf, 0, | ||
288 | BBTOB(fbuf->b_length) - 1); | ||
289 | } | ||
290 | } else { | ||
291 | fbuf->b_flags |= XBF_DONE; | ||
292 | xfs_buf_delwri_queue(fbuf, buffer_list); | ||
293 | xfs_buf_relse(fbuf); | ||
260 | } | 294 | } |
261 | xfs_trans_inode_alloc_buf(tp, fbuf); | ||
262 | } | 295 | } |
263 | return 0; | 296 | return 0; |
264 | } | 297 | } |
@@ -303,7 +336,7 @@ xfs_ialloc_ag_alloc( | |||
303 | * First try to allocate inodes contiguous with the last-allocated | 336 | * First try to allocate inodes contiguous with the last-allocated |
304 | * chunk of inodes. If the filesystem is striped, this will fill | 337 | * chunk of inodes. If the filesystem is striped, this will fill |
305 | * an entire stripe unit with inodes. | 338 | * an entire stripe unit with inodes. |
306 | */ | 339 | */ |
307 | agi = XFS_BUF_TO_AGI(agbp); | 340 | agi = XFS_BUF_TO_AGI(agbp); |
308 | newino = be32_to_cpu(agi->agi_newino); | 341 | newino = be32_to_cpu(agi->agi_newino); |
309 | agno = be32_to_cpu(agi->agi_seqno); | 342 | agno = be32_to_cpu(agi->agi_seqno); |
@@ -402,7 +435,7 @@ xfs_ialloc_ag_alloc( | |||
402 | * rather than a linear progression to prevent the next generation | 435 | * rather than a linear progression to prevent the next generation |
403 | * number from being easily guessable. | 436 | * number from being easily guessable. |
404 | */ | 437 | */ |
405 | error = xfs_ialloc_inode_init(args.mp, tp, agno, args.agbno, | 438 | error = xfs_ialloc_inode_init(args.mp, tp, NULL, agno, args.agbno, |
406 | args.len, prandom_u32()); | 439 | args.len, prandom_u32()); |
407 | 440 | ||
408 | if (error) | 441 | if (error) |
@@ -615,8 +648,7 @@ xfs_ialloc_get_rec( | |||
615 | struct xfs_btree_cur *cur, | 648 | struct xfs_btree_cur *cur, |
616 | xfs_agino_t agino, | 649 | xfs_agino_t agino, |
617 | xfs_inobt_rec_incore_t *rec, | 650 | xfs_inobt_rec_incore_t *rec, |
618 | int *done, | 651 | int *done) |
619 | int left) | ||
620 | { | 652 | { |
621 | int error; | 653 | int error; |
622 | int i; | 654 | int i; |
@@ -724,12 +756,12 @@ xfs_dialloc_ag( | |||
724 | pag->pagl_leftrec != NULLAGINO && | 756 | pag->pagl_leftrec != NULLAGINO && |
725 | pag->pagl_rightrec != NULLAGINO) { | 757 | pag->pagl_rightrec != NULLAGINO) { |
726 | error = xfs_ialloc_get_rec(tcur, pag->pagl_leftrec, | 758 | error = xfs_ialloc_get_rec(tcur, pag->pagl_leftrec, |
727 | &trec, &doneleft, 1); | 759 | &trec, &doneleft); |
728 | if (error) | 760 | if (error) |
729 | goto error1; | 761 | goto error1; |
730 | 762 | ||
731 | error = xfs_ialloc_get_rec(cur, pag->pagl_rightrec, | 763 | error = xfs_ialloc_get_rec(cur, pag->pagl_rightrec, |
732 | &rec, &doneright, 0); | 764 | &rec, &doneright); |
733 | if (error) | 765 | if (error) |
734 | goto error1; | 766 | goto error1; |
735 | } else { | 767 | } else { |
diff --git a/fs/xfs/xfs_ialloc.h b/fs/xfs/xfs_ialloc.h index c8da3df271e6..68c07320f096 100644 --- a/fs/xfs/xfs_ialloc.h +++ b/fs/xfs/xfs_ialloc.h | |||
@@ -150,6 +150,14 @@ int xfs_inobt_lookup(struct xfs_btree_cur *cur, xfs_agino_t ino, | |||
150 | int xfs_inobt_get_rec(struct xfs_btree_cur *cur, | 150 | int xfs_inobt_get_rec(struct xfs_btree_cur *cur, |
151 | xfs_inobt_rec_incore_t *rec, int *stat); | 151 | xfs_inobt_rec_incore_t *rec, int *stat); |
152 | 152 | ||
153 | /* | ||
154 | * Inode chunk initialisation routine | ||
155 | */ | ||
156 | int xfs_ialloc_inode_init(struct xfs_mount *mp, struct xfs_trans *tp, | ||
157 | struct list_head *buffer_list, | ||
158 | xfs_agnumber_t agno, xfs_agblock_t agbno, | ||
159 | xfs_agblock_t length, unsigned int gen); | ||
160 | |||
153 | extern const struct xfs_buf_ops xfs_agi_buf_ops; | 161 | extern const struct xfs_buf_ops xfs_agi_buf_ops; |
154 | 162 | ||
155 | #endif /* __XFS_IALLOC_H__ */ | 163 | #endif /* __XFS_IALLOC_H__ */ |
diff --git a/fs/xfs/xfs_icache.c b/fs/xfs/xfs_icache.c index 96e344e3e927..3f90e1ceb8d6 100644 --- a/fs/xfs/xfs_icache.c +++ b/fs/xfs/xfs_icache.c | |||
@@ -335,7 +335,9 @@ xfs_iget_cache_miss( | |||
335 | iflags = XFS_INEW; | 335 | iflags = XFS_INEW; |
336 | if (flags & XFS_IGET_DONTCACHE) | 336 | if (flags & XFS_IGET_DONTCACHE) |
337 | iflags |= XFS_IDONTCACHE; | 337 | iflags |= XFS_IDONTCACHE; |
338 | ip->i_udquot = ip->i_gdquot = NULL; | 338 | ip->i_udquot = NULL; |
339 | ip->i_gdquot = NULL; | ||
340 | ip->i_pdquot = NULL; | ||
339 | xfs_iflags_set(ip, iflags); | 341 | xfs_iflags_set(ip, iflags); |
340 | 342 | ||
341 | /* insert the new inode */ | 343 | /* insert the new inode */ |
diff --git a/fs/xfs/xfs_icache.h b/fs/xfs/xfs_icache.h index e0f138c70a2f..a01afbb3909a 100644 --- a/fs/xfs/xfs_icache.h +++ b/fs/xfs/xfs_icache.h | |||
@@ -40,7 +40,6 @@ void xfs_inode_clear_eofblocks_tag(struct xfs_inode *ip); | |||
40 | int xfs_icache_free_eofblocks(struct xfs_mount *, struct xfs_eofblocks *); | 40 | int xfs_icache_free_eofblocks(struct xfs_mount *, struct xfs_eofblocks *); |
41 | void xfs_eofblocks_worker(struct work_struct *); | 41 | void xfs_eofblocks_worker(struct work_struct *); |
42 | 42 | ||
43 | int xfs_sync_inode_grab(struct xfs_inode *ip); | ||
44 | int xfs_inode_ag_iterator(struct xfs_mount *mp, | 43 | int xfs_inode_ag_iterator(struct xfs_mount *mp, |
45 | int (*execute)(struct xfs_inode *ip, struct xfs_perag *pag, | 44 | int (*execute)(struct xfs_inode *ip, struct xfs_perag *pag, |
46 | int flags, void *args), | 45 | int flags, void *args), |
diff --git a/fs/xfs/xfs_icreate_item.c b/fs/xfs/xfs_icreate_item.c new file mode 100644 index 000000000000..7716a4e7375e --- /dev/null +++ b/fs/xfs/xfs_icreate_item.c | |||
@@ -0,0 +1,195 @@ | |||
1 | /* | ||
2 | * Copyright (c) 2008-2010, 2013 Dave Chinner | ||
3 | * All Rights Reserved. | ||
4 | * | ||
5 | * This program is free software; you can redistribute it and/or | ||
6 | * modify it under the terms of the GNU General Public License as | ||
7 | * published by the Free Software Foundation. | ||
8 | * | ||
9 | * This program is distributed in the hope that it would be useful, | ||
10 | * but WITHOUT ANY WARRANTY; without even the implied warranty of | ||
11 | * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the | ||
12 | * GNU General Public License for more details. | ||
13 | * | ||
14 | * You should have received a copy of the GNU General Public License | ||
15 | * along with this program; if not, write the Free Software Foundation, | ||
16 | * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA | ||
17 | */ | ||
18 | #include "xfs.h" | ||
19 | #include "xfs_fs.h" | ||
20 | #include "xfs_types.h" | ||
21 | #include "xfs_bit.h" | ||
22 | #include "xfs_log.h" | ||
23 | #include "xfs_inum.h" | ||
24 | #include "xfs_trans.h" | ||
25 | #include "xfs_buf_item.h" | ||
26 | #include "xfs_sb.h" | ||
27 | #include "xfs_ag.h" | ||
28 | #include "xfs_dir2.h" | ||
29 | #include "xfs_mount.h" | ||
30 | #include "xfs_trans_priv.h" | ||
31 | #include "xfs_bmap_btree.h" | ||
32 | #include "xfs_alloc_btree.h" | ||
33 | #include "xfs_ialloc_btree.h" | ||
34 | #include "xfs_attr_sf.h" | ||
35 | #include "xfs_dinode.h" | ||
36 | #include "xfs_inode.h" | ||
37 | #include "xfs_inode_item.h" | ||
38 | #include "xfs_btree.h" | ||
39 | #include "xfs_ialloc.h" | ||
40 | #include "xfs_error.h" | ||
41 | #include "xfs_icreate_item.h" | ||
42 | |||
43 | kmem_zone_t *xfs_icreate_zone; /* inode create item zone */ | ||
44 | |||
45 | static inline struct xfs_icreate_item *ICR_ITEM(struct xfs_log_item *lip) | ||
46 | { | ||
47 | return container_of(lip, struct xfs_icreate_item, ic_item); | ||
48 | } | ||
49 | |||
50 | /* | ||
51 | * This returns the number of iovecs needed to log the given inode item. | ||
52 | * | ||
53 | * We only need one iovec for the icreate log structure. | ||
54 | */ | ||
55 | STATIC uint | ||
56 | xfs_icreate_item_size( | ||
57 | struct xfs_log_item *lip) | ||
58 | { | ||
59 | return 1; | ||
60 | } | ||
61 | |||
62 | /* | ||
63 | * This is called to fill in the vector of log iovecs for the | ||
64 | * given inode create log item. | ||
65 | */ | ||
66 | STATIC void | ||
67 | xfs_icreate_item_format( | ||
68 | struct xfs_log_item *lip, | ||
69 | struct xfs_log_iovec *log_vector) | ||
70 | { | ||
71 | struct xfs_icreate_item *icp = ICR_ITEM(lip); | ||
72 | |||
73 | log_vector->i_addr = (xfs_caddr_t)&icp->ic_format; | ||
74 | log_vector->i_len = sizeof(struct xfs_icreate_log); | ||
75 | log_vector->i_type = XLOG_REG_TYPE_ICREATE; | ||
76 | } | ||
77 | |||
78 | |||
79 | /* Pinning has no meaning for the create item, so just return. */ | ||
80 | STATIC void | ||
81 | xfs_icreate_item_pin( | ||
82 | struct xfs_log_item *lip) | ||
83 | { | ||
84 | } | ||
85 | |||
86 | |||
87 | /* pinning has no meaning for the create item, so just return. */ | ||
88 | STATIC void | ||
89 | xfs_icreate_item_unpin( | ||
90 | struct xfs_log_item *lip, | ||
91 | int remove) | ||
92 | { | ||
93 | } | ||
94 | |||
95 | STATIC void | ||
96 | xfs_icreate_item_unlock( | ||
97 | struct xfs_log_item *lip) | ||
98 | { | ||
99 | struct xfs_icreate_item *icp = ICR_ITEM(lip); | ||
100 | |||
101 | if (icp->ic_item.li_flags & XFS_LI_ABORTED) | ||
102 | kmem_zone_free(xfs_icreate_zone, icp); | ||
103 | return; | ||
104 | } | ||
105 | |||
106 | /* | ||
107 | * Because we have ordered buffers being tracked in the AIL for the inode | ||
108 | * creation, we don't need the create item after this. Hence we can free | ||
109 | * the log item and return -1 to tell the caller we're done with the item. | ||
110 | */ | ||
111 | STATIC xfs_lsn_t | ||
112 | xfs_icreate_item_committed( | ||
113 | struct xfs_log_item *lip, | ||
114 | xfs_lsn_t lsn) | ||
115 | { | ||
116 | struct xfs_icreate_item *icp = ICR_ITEM(lip); | ||
117 | |||
118 | kmem_zone_free(xfs_icreate_zone, icp); | ||
119 | return (xfs_lsn_t)-1; | ||
120 | } | ||
121 | |||
122 | /* item can never get into the AIL */ | ||
123 | STATIC uint | ||
124 | xfs_icreate_item_push( | ||
125 | struct xfs_log_item *lip, | ||
126 | struct list_head *buffer_list) | ||
127 | { | ||
128 | ASSERT(0); | ||
129 | return XFS_ITEM_SUCCESS; | ||
130 | } | ||
131 | |||
132 | /* Ordered buffers do the dependency tracking here, so this does nothing. */ | ||
133 | STATIC void | ||
134 | xfs_icreate_item_committing( | ||
135 | struct xfs_log_item *lip, | ||
136 | xfs_lsn_t lsn) | ||
137 | { | ||
138 | } | ||
139 | |||
140 | /* | ||
141 | * This is the ops vector shared by all buf log items. | ||
142 | */ | ||
143 | static struct xfs_item_ops xfs_icreate_item_ops = { | ||
144 | .iop_size = xfs_icreate_item_size, | ||
145 | .iop_format = xfs_icreate_item_format, | ||
146 | .iop_pin = xfs_icreate_item_pin, | ||
147 | .iop_unpin = xfs_icreate_item_unpin, | ||
148 | .iop_push = xfs_icreate_item_push, | ||
149 | .iop_unlock = xfs_icreate_item_unlock, | ||
150 | .iop_committed = xfs_icreate_item_committed, | ||
151 | .iop_committing = xfs_icreate_item_committing, | ||
152 | }; | ||
153 | |||
154 | |||
155 | /* | ||
156 | * Initialize the inode log item for a newly allocated (in-core) inode. | ||
157 | * | ||
158 | * Inode extents can only reside within an AG. Hence specify the starting | ||
159 | * block for the inode chunk by offset within an AG as well as the | ||
160 | * length of the allocated extent. | ||
161 | * | ||
162 | * This joins the item to the transaction and marks it dirty so | ||
163 | * that we don't need a separate call to do this, nor does the | ||
164 | * caller need to know anything about the icreate item. | ||
165 | */ | ||
166 | void | ||
167 | xfs_icreate_log( | ||
168 | struct xfs_trans *tp, | ||
169 | xfs_agnumber_t agno, | ||
170 | xfs_agblock_t agbno, | ||
171 | unsigned int count, | ||
172 | unsigned int inode_size, | ||
173 | xfs_agblock_t length, | ||
174 | unsigned int generation) | ||
175 | { | ||
176 | struct xfs_icreate_item *icp; | ||
177 | |||
178 | icp = kmem_zone_zalloc(xfs_icreate_zone, KM_SLEEP); | ||
179 | |||
180 | xfs_log_item_init(tp->t_mountp, &icp->ic_item, XFS_LI_ICREATE, | ||
181 | &xfs_icreate_item_ops); | ||
182 | |||
183 | icp->ic_format.icl_type = XFS_LI_ICREATE; | ||
184 | icp->ic_format.icl_size = 1; /* single vector */ | ||
185 | icp->ic_format.icl_ag = cpu_to_be32(agno); | ||
186 | icp->ic_format.icl_agbno = cpu_to_be32(agbno); | ||
187 | icp->ic_format.icl_count = cpu_to_be32(count); | ||
188 | icp->ic_format.icl_isize = cpu_to_be32(inode_size); | ||
189 | icp->ic_format.icl_length = cpu_to_be32(length); | ||
190 | icp->ic_format.icl_gen = cpu_to_be32(generation); | ||
191 | |||
192 | xfs_trans_add_item(tp, &icp->ic_item); | ||
193 | tp->t_flags |= XFS_TRANS_DIRTY; | ||
194 | icp->ic_item.li_desc->lid_flags |= XFS_LID_DIRTY; | ||
195 | } | ||
diff --git a/fs/xfs/xfs_icreate_item.h b/fs/xfs/xfs_icreate_item.h new file mode 100644 index 000000000000..88ba8aa0bc41 --- /dev/null +++ b/fs/xfs/xfs_icreate_item.h | |||
@@ -0,0 +1,52 @@ | |||
1 | /* | ||
2 | * Copyright (c) 2008-2010, Dave Chinner | ||
3 | * All Rights Reserved. | ||
4 | * | ||
5 | * This program is free software; you can redistribute it and/or | ||
6 | * modify it under the terms of the GNU General Public License as | ||
7 | * published by the Free Software Foundation. | ||
8 | * | ||
9 | * This program is distributed in the hope that it would be useful, | ||
10 | * but WITHOUT ANY WARRANTY; without even the implied warranty of | ||
11 | * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the | ||
12 | * GNU General Public License for more details. | ||
13 | * | ||
14 | * You should have received a copy of the GNU General Public License | ||
15 | * along with this program; if not, write the Free Software Foundation, | ||
16 | * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA | ||
17 | */ | ||
18 | #ifndef XFS_ICREATE_ITEM_H | ||
19 | #define XFS_ICREATE_ITEM_H 1 | ||
20 | |||
21 | /* | ||
22 | * on disk log item structure | ||
23 | * | ||
24 | * Log recovery assumes the first two entries are the type and size and they fit | ||
25 | * in 32 bits. Also in host order (ugh) so they have to be 32 bit aligned so | ||
26 | * decoding can be done correctly. | ||
27 | */ | ||
28 | struct xfs_icreate_log { | ||
29 | __uint16_t icl_type; /* type of log format structure */ | ||
30 | __uint16_t icl_size; /* size of log format structure */ | ||
31 | __be32 icl_ag; /* ag being allocated in */ | ||
32 | __be32 icl_agbno; /* start block of inode range */ | ||
33 | __be32 icl_count; /* number of inodes to initialise */ | ||
34 | __be32 icl_isize; /* size of inodes */ | ||
35 | __be32 icl_length; /* length of extent to initialise */ | ||
36 | __be32 icl_gen; /* inode generation number to use */ | ||
37 | }; | ||
38 | |||
39 | /* in memory log item structure */ | ||
40 | struct xfs_icreate_item { | ||
41 | struct xfs_log_item ic_item; | ||
42 | struct xfs_icreate_log ic_format; | ||
43 | }; | ||
44 | |||
45 | extern kmem_zone_t *xfs_icreate_zone; /* inode create item zone */ | ||
46 | |||
47 | void xfs_icreate_log(struct xfs_trans *tp, xfs_agnumber_t agno, | ||
48 | xfs_agblock_t agbno, unsigned int count, | ||
49 | unsigned int inode_size, xfs_agblock_t length, | ||
50 | unsigned int generation); | ||
51 | |||
52 | #endif /* XFS_ICREATE_ITEM_H */ | ||
diff --git a/fs/xfs/xfs_inode.c b/fs/xfs/xfs_inode.c index 7f7be5f98f52..bb262c25c8de 100644 --- a/fs/xfs/xfs_inode.c +++ b/fs/xfs/xfs_inode.c | |||
@@ -896,7 +896,6 @@ xfs_dinode_to_disk( | |||
896 | to->di_projid_lo = cpu_to_be16(from->di_projid_lo); | 896 | to->di_projid_lo = cpu_to_be16(from->di_projid_lo); |
897 | to->di_projid_hi = cpu_to_be16(from->di_projid_hi); | 897 | to->di_projid_hi = cpu_to_be16(from->di_projid_hi); |
898 | memcpy(to->di_pad, from->di_pad, sizeof(to->di_pad)); | 898 | memcpy(to->di_pad, from->di_pad, sizeof(to->di_pad)); |
899 | to->di_flushiter = cpu_to_be16(from->di_flushiter); | ||
900 | to->di_atime.t_sec = cpu_to_be32(from->di_atime.t_sec); | 899 | to->di_atime.t_sec = cpu_to_be32(from->di_atime.t_sec); |
901 | to->di_atime.t_nsec = cpu_to_be32(from->di_atime.t_nsec); | 900 | to->di_atime.t_nsec = cpu_to_be32(from->di_atime.t_nsec); |
902 | to->di_mtime.t_sec = cpu_to_be32(from->di_mtime.t_sec); | 901 | to->di_mtime.t_sec = cpu_to_be32(from->di_mtime.t_sec); |
@@ -924,6 +923,9 @@ xfs_dinode_to_disk( | |||
924 | to->di_lsn = cpu_to_be64(from->di_lsn); | 923 | to->di_lsn = cpu_to_be64(from->di_lsn); |
925 | memcpy(to->di_pad2, from->di_pad2, sizeof(to->di_pad2)); | 924 | memcpy(to->di_pad2, from->di_pad2, sizeof(to->di_pad2)); |
926 | uuid_copy(&to->di_uuid, &from->di_uuid); | 925 | uuid_copy(&to->di_uuid, &from->di_uuid); |
926 | to->di_flushiter = 0; | ||
927 | } else { | ||
928 | to->di_flushiter = cpu_to_be16(from->di_flushiter); | ||
927 | } | 929 | } |
928 | } | 930 | } |
929 | 931 | ||
@@ -1028,6 +1030,15 @@ xfs_dinode_calc_crc( | |||
1028 | 1030 | ||
1029 | /* | 1031 | /* |
1030 | * Read the disk inode attributes into the in-core inode structure. | 1032 | * Read the disk inode attributes into the in-core inode structure. |
1033 | * | ||
1034 | * For version 5 superblocks, if we are initialising a new inode and we are not | ||
1035 | * utilising the XFS_MOUNT_IKEEP inode cluster mode, we can simple build the new | ||
1036 | * inode core with a random generation number. If we are keeping inodes around, | ||
1037 | * we need to read the inode cluster to get the existing generation number off | ||
1038 | * disk. Further, if we are using version 4 superblocks (i.e. v1/v2 inode | ||
1039 | * format) then log recovery is dependent on the di_flushiter field being | ||
1040 | * initialised from the current on-disk value and hence we must also read the | ||
1041 | * inode off disk. | ||
1031 | */ | 1042 | */ |
1032 | int | 1043 | int |
1033 | xfs_iread( | 1044 | xfs_iread( |
@@ -1047,6 +1058,23 @@ xfs_iread( | |||
1047 | if (error) | 1058 | if (error) |
1048 | return error; | 1059 | return error; |
1049 | 1060 | ||
1061 | /* shortcut IO on inode allocation if possible */ | ||
1062 | if ((iget_flags & XFS_IGET_CREATE) && | ||
1063 | xfs_sb_version_hascrc(&mp->m_sb) && | ||
1064 | !(mp->m_flags & XFS_MOUNT_IKEEP)) { | ||
1065 | /* initialise the on-disk inode core */ | ||
1066 | memset(&ip->i_d, 0, sizeof(ip->i_d)); | ||
1067 | ip->i_d.di_magic = XFS_DINODE_MAGIC; | ||
1068 | ip->i_d.di_gen = prandom_u32(); | ||
1069 | if (xfs_sb_version_hascrc(&mp->m_sb)) { | ||
1070 | ip->i_d.di_version = 3; | ||
1071 | ip->i_d.di_ino = ip->i_ino; | ||
1072 | uuid_copy(&ip->i_d.di_uuid, &mp->m_sb.sb_uuid); | ||
1073 | } else | ||
1074 | ip->i_d.di_version = 2; | ||
1075 | return 0; | ||
1076 | } | ||
1077 | |||
1050 | /* | 1078 | /* |
1051 | * Get pointers to the on-disk inode and the buffer containing it. | 1079 | * Get pointers to the on-disk inode and the buffer containing it. |
1052 | */ | 1080 | */ |
@@ -1133,17 +1161,16 @@ xfs_iread( | |||
1133 | xfs_buf_set_ref(bp, XFS_INO_REF); | 1161 | xfs_buf_set_ref(bp, XFS_INO_REF); |
1134 | 1162 | ||
1135 | /* | 1163 | /* |
1136 | * Use xfs_trans_brelse() to release the buffer containing the | 1164 | * Use xfs_trans_brelse() to release the buffer containing the on-disk |
1137 | * on-disk inode, because it was acquired with xfs_trans_read_buf() | 1165 | * inode, because it was acquired with xfs_trans_read_buf() in |
1138 | * in xfs_imap_to_bp() above. If tp is NULL, this is just a normal | 1166 | * xfs_imap_to_bp() above. If tp is NULL, this is just a normal |
1139 | * brelse(). If we're within a transaction, then xfs_trans_brelse() | 1167 | * brelse(). If we're within a transaction, then xfs_trans_brelse() |
1140 | * will only release the buffer if it is not dirty within the | 1168 | * will only release the buffer if it is not dirty within the |
1141 | * transaction. It will be OK to release the buffer in this case, | 1169 | * transaction. It will be OK to release the buffer in this case, |
1142 | * because inodes on disk are never destroyed and we will be | 1170 | * because inodes on disk are never destroyed and we will be locking the |
1143 | * locking the new in-core inode before putting it in the hash | 1171 | * new in-core inode before putting it in the cache where other |
1144 | * table where other processes can find it. Thus we don't have | 1172 | * processes can find it. Thus we don't have to worry about the inode |
1145 | * to worry about the inode being changed just because we released | 1173 | * being changed just because we released the buffer. |
1146 | * the buffer. | ||
1147 | */ | 1174 | */ |
1148 | out_brelse: | 1175 | out_brelse: |
1149 | xfs_trans_brelse(tp, bp); | 1176 | xfs_trans_brelse(tp, bp); |
@@ -2028,8 +2055,6 @@ xfs_ifree( | |||
2028 | int error; | 2055 | int error; |
2029 | int delete; | 2056 | int delete; |
2030 | xfs_ino_t first_ino; | 2057 | xfs_ino_t first_ino; |
2031 | xfs_dinode_t *dip; | ||
2032 | xfs_buf_t *ibp; | ||
2033 | 2058 | ||
2034 | ASSERT(xfs_isilocked(ip, XFS_ILOCK_EXCL)); | 2059 | ASSERT(xfs_isilocked(ip, XFS_ILOCK_EXCL)); |
2035 | ASSERT(ip->i_d.di_nlink == 0); | 2060 | ASSERT(ip->i_d.di_nlink == 0); |
@@ -2042,14 +2067,13 @@ xfs_ifree( | |||
2042 | * Pull the on-disk inode from the AGI unlinked list. | 2067 | * Pull the on-disk inode from the AGI unlinked list. |
2043 | */ | 2068 | */ |
2044 | error = xfs_iunlink_remove(tp, ip); | 2069 | error = xfs_iunlink_remove(tp, ip); |
2045 | if (error != 0) { | 2070 | if (error) |
2046 | return error; | 2071 | return error; |
2047 | } | ||
2048 | 2072 | ||
2049 | error = xfs_difree(tp, ip->i_ino, flist, &delete, &first_ino); | 2073 | error = xfs_difree(tp, ip->i_ino, flist, &delete, &first_ino); |
2050 | if (error != 0) { | 2074 | if (error) |
2051 | return error; | 2075 | return error; |
2052 | } | 2076 | |
2053 | ip->i_d.di_mode = 0; /* mark incore inode as free */ | 2077 | ip->i_d.di_mode = 0; /* mark incore inode as free */ |
2054 | ip->i_d.di_flags = 0; | 2078 | ip->i_d.di_flags = 0; |
2055 | ip->i_d.di_dmevmask = 0; | 2079 | ip->i_d.di_dmevmask = 0; |
@@ -2061,31 +2085,10 @@ xfs_ifree( | |||
2061 | * by reincarnations of this inode. | 2085 | * by reincarnations of this inode. |
2062 | */ | 2086 | */ |
2063 | ip->i_d.di_gen++; | 2087 | ip->i_d.di_gen++; |
2064 | |||
2065 | xfs_trans_log_inode(tp, ip, XFS_ILOG_CORE); | 2088 | xfs_trans_log_inode(tp, ip, XFS_ILOG_CORE); |
2066 | 2089 | ||
2067 | error = xfs_imap_to_bp(ip->i_mount, tp, &ip->i_imap, &dip, &ibp, | 2090 | if (delete) |
2068 | 0, 0); | ||
2069 | if (error) | ||
2070 | return error; | ||
2071 | |||
2072 | /* | ||
2073 | * Clear the on-disk di_mode. This is to prevent xfs_bulkstat | ||
2074 | * from picking up this inode when it is reclaimed (its incore state | ||
2075 | * initialzed but not flushed to disk yet). The in-core di_mode is | ||
2076 | * already cleared and a corresponding transaction logged. | ||
2077 | * The hack here just synchronizes the in-core to on-disk | ||
2078 | * di_mode value in advance before the actual inode sync to disk. | ||
2079 | * This is OK because the inode is already unlinked and would never | ||
2080 | * change its di_mode again for this inode generation. | ||
2081 | * This is a temporary hack that would require a proper fix | ||
2082 | * in the future. | ||
2083 | */ | ||
2084 | dip->di_mode = 0; | ||
2085 | |||
2086 | if (delete) { | ||
2087 | error = xfs_ifree_cluster(ip, tp, first_ino); | 2091 | error = xfs_ifree_cluster(ip, tp, first_ino); |
2088 | } | ||
2089 | 2092 | ||
2090 | return error; | 2093 | return error; |
2091 | } | 2094 | } |
@@ -2160,8 +2163,8 @@ xfs_iroot_realloc( | |||
2160 | np = (char *)XFS_BMAP_BROOT_PTR_ADDR(mp, ifp->if_broot, 1, | 2163 | np = (char *)XFS_BMAP_BROOT_PTR_ADDR(mp, ifp->if_broot, 1, |
2161 | (int)new_size); | 2164 | (int)new_size); |
2162 | ifp->if_broot_bytes = (int)new_size; | 2165 | ifp->if_broot_bytes = (int)new_size; |
2163 | ASSERT(ifp->if_broot_bytes <= | 2166 | ASSERT(XFS_BMAP_BMDR_SPACE(ifp->if_broot) <= |
2164 | XFS_IFORK_SIZE(ip, whichfork) + XFS_BROOT_SIZE_ADJ(ip)); | 2167 | XFS_IFORK_SIZE(ip, whichfork)); |
2165 | memmove(np, op, cur_max * (uint)sizeof(xfs_dfsbno_t)); | 2168 | memmove(np, op, cur_max * (uint)sizeof(xfs_dfsbno_t)); |
2166 | return; | 2169 | return; |
2167 | } | 2170 | } |
@@ -2214,8 +2217,9 @@ xfs_iroot_realloc( | |||
2214 | kmem_free(ifp->if_broot); | 2217 | kmem_free(ifp->if_broot); |
2215 | ifp->if_broot = new_broot; | 2218 | ifp->if_broot = new_broot; |
2216 | ifp->if_broot_bytes = (int)new_size; | 2219 | ifp->if_broot_bytes = (int)new_size; |
2217 | ASSERT(ifp->if_broot_bytes <= | 2220 | if (ifp->if_broot) |
2218 | XFS_IFORK_SIZE(ip, whichfork) + XFS_BROOT_SIZE_ADJ(ip)); | 2221 | ASSERT(XFS_BMAP_BMDR_SPACE(ifp->if_broot) <= |
2222 | XFS_IFORK_SIZE(ip, whichfork)); | ||
2219 | return; | 2223 | return; |
2220 | } | 2224 | } |
2221 | 2225 | ||
@@ -2526,9 +2530,8 @@ xfs_iflush_fork( | |||
2526 | if ((iip->ili_fields & brootflag[whichfork]) && | 2530 | if ((iip->ili_fields & brootflag[whichfork]) && |
2527 | (ifp->if_broot_bytes > 0)) { | 2531 | (ifp->if_broot_bytes > 0)) { |
2528 | ASSERT(ifp->if_broot != NULL); | 2532 | ASSERT(ifp->if_broot != NULL); |
2529 | ASSERT(ifp->if_broot_bytes <= | 2533 | ASSERT(XFS_BMAP_BMDR_SPACE(ifp->if_broot) <= |
2530 | (XFS_IFORK_SIZE(ip, whichfork) + | 2534 | XFS_IFORK_SIZE(ip, whichfork)); |
2531 | XFS_BROOT_SIZE_ADJ(ip))); | ||
2532 | xfs_bmbt_to_bmdr(mp, ifp->if_broot, ifp->if_broot_bytes, | 2535 | xfs_bmbt_to_bmdr(mp, ifp->if_broot, ifp->if_broot_bytes, |
2533 | (xfs_bmdr_block_t *)cp, | 2536 | (xfs_bmdr_block_t *)cp, |
2534 | XFS_DFORK_SIZE(dip, mp, whichfork)); | 2537 | XFS_DFORK_SIZE(dip, mp, whichfork)); |
@@ -2886,12 +2889,18 @@ xfs_iflush_int( | |||
2886 | __func__, ip->i_ino, ip->i_d.di_forkoff, ip); | 2889 | __func__, ip->i_ino, ip->i_d.di_forkoff, ip); |
2887 | goto corrupt_out; | 2890 | goto corrupt_out; |
2888 | } | 2891 | } |
2892 | |||
2889 | /* | 2893 | /* |
2890 | * bump the flush iteration count, used to detect flushes which | 2894 | * Inode item log recovery for v1/v2 inodes are dependent on the |
2891 | * postdate a log record during recovery. This is redundant as we now | 2895 | * di_flushiter count for correct sequencing. We bump the flush |
2892 | * log every change and hence this can't happen. Still, it doesn't hurt. | 2896 | * iteration count so we can detect flushes which postdate a log record |
2897 | * during recovery. This is redundant as we now log every change and | ||
2898 | * hence this can't happen but we need to still do it to ensure | ||
2899 | * backwards compatibility with old kernels that predate logging all | ||
2900 | * inode changes. | ||
2893 | */ | 2901 | */ |
2894 | ip->i_d.di_flushiter++; | 2902 | if (ip->i_d.di_version < 3) |
2903 | ip->i_d.di_flushiter++; | ||
2895 | 2904 | ||
2896 | /* | 2905 | /* |
2897 | * Copy the dirty parts of the inode into the on-disk | 2906 | * Copy the dirty parts of the inode into the on-disk |
diff --git a/fs/xfs/xfs_inode.h b/fs/xfs/xfs_inode.h index 91129794aaec..b55fd347ab5b 100644 --- a/fs/xfs/xfs_inode.h +++ b/fs/xfs/xfs_inode.h | |||
@@ -250,6 +250,7 @@ typedef struct xfs_inode { | |||
250 | struct xfs_mount *i_mount; /* fs mount struct ptr */ | 250 | struct xfs_mount *i_mount; /* fs mount struct ptr */ |
251 | struct xfs_dquot *i_udquot; /* user dquot */ | 251 | struct xfs_dquot *i_udquot; /* user dquot */ |
252 | struct xfs_dquot *i_gdquot; /* group dquot */ | 252 | struct xfs_dquot *i_gdquot; /* group dquot */ |
253 | struct xfs_dquot *i_pdquot; /* project dquot */ | ||
253 | 254 | ||
254 | /* Inode location stuff */ | 255 | /* Inode location stuff */ |
255 | xfs_ino_t i_ino; /* inode number (agno/agino)*/ | 256 | xfs_ino_t i_ino; /* inode number (agno/agino)*/ |
diff --git a/fs/xfs/xfs_ioctl.c b/fs/xfs/xfs_ioctl.c index 5e999680094a..6e2bca5d44d6 100644 --- a/fs/xfs/xfs_ioctl.c +++ b/fs/xfs/xfs_ioctl.c | |||
@@ -248,7 +248,7 @@ xfs_open_by_handle( | |||
248 | goto out_dput; | 248 | goto out_dput; |
249 | } | 249 | } |
250 | 250 | ||
251 | fd = get_unused_fd(); | 251 | fd = get_unused_fd_flags(0); |
252 | if (fd < 0) { | 252 | if (fd < 0) { |
253 | error = fd; | 253 | error = fd; |
254 | goto out_dput; | 254 | goto out_dput; |
@@ -928,7 +928,7 @@ xfs_ioctl_setattr( | |||
928 | struct xfs_trans *tp; | 928 | struct xfs_trans *tp; |
929 | unsigned int lock_flags = 0; | 929 | unsigned int lock_flags = 0; |
930 | struct xfs_dquot *udqp = NULL; | 930 | struct xfs_dquot *udqp = NULL; |
931 | struct xfs_dquot *gdqp = NULL; | 931 | struct xfs_dquot *pdqp = NULL; |
932 | struct xfs_dquot *olddquot = NULL; | 932 | struct xfs_dquot *olddquot = NULL; |
933 | int code; | 933 | int code; |
934 | 934 | ||
@@ -957,7 +957,7 @@ xfs_ioctl_setattr( | |||
957 | if (XFS_IS_QUOTA_ON(mp) && (mask & FSX_PROJID)) { | 957 | if (XFS_IS_QUOTA_ON(mp) && (mask & FSX_PROJID)) { |
958 | code = xfs_qm_vop_dqalloc(ip, ip->i_d.di_uid, | 958 | code = xfs_qm_vop_dqalloc(ip, ip->i_d.di_uid, |
959 | ip->i_d.di_gid, fa->fsx_projid, | 959 | ip->i_d.di_gid, fa->fsx_projid, |
960 | XFS_QMOPT_PQUOTA, &udqp, &gdqp); | 960 | XFS_QMOPT_PQUOTA, &udqp, NULL, &pdqp); |
961 | if (code) | 961 | if (code) |
962 | return code; | 962 | return code; |
963 | } | 963 | } |
@@ -994,8 +994,8 @@ xfs_ioctl_setattr( | |||
994 | XFS_IS_PQUOTA_ON(mp) && | 994 | XFS_IS_PQUOTA_ON(mp) && |
995 | xfs_get_projid(ip) != fa->fsx_projid) { | 995 | xfs_get_projid(ip) != fa->fsx_projid) { |
996 | ASSERT(tp); | 996 | ASSERT(tp); |
997 | code = xfs_qm_vop_chown_reserve(tp, ip, udqp, gdqp, | 997 | code = xfs_qm_vop_chown_reserve(tp, ip, udqp, NULL, |
998 | capable(CAP_FOWNER) ? | 998 | pdqp, capable(CAP_FOWNER) ? |
999 | XFS_QMOPT_FORCE_RES : 0); | 999 | XFS_QMOPT_FORCE_RES : 0); |
1000 | if (code) /* out of quota */ | 1000 | if (code) /* out of quota */ |
1001 | goto error_return; | 1001 | goto error_return; |
@@ -1113,7 +1113,7 @@ xfs_ioctl_setattr( | |||
1113 | if (xfs_get_projid(ip) != fa->fsx_projid) { | 1113 | if (xfs_get_projid(ip) != fa->fsx_projid) { |
1114 | if (XFS_IS_QUOTA_RUNNING(mp) && XFS_IS_PQUOTA_ON(mp)) { | 1114 | if (XFS_IS_QUOTA_RUNNING(mp) && XFS_IS_PQUOTA_ON(mp)) { |
1115 | olddquot = xfs_qm_vop_chown(tp, ip, | 1115 | olddquot = xfs_qm_vop_chown(tp, ip, |
1116 | &ip->i_gdquot, gdqp); | 1116 | &ip->i_pdquot, pdqp); |
1117 | } | 1117 | } |
1118 | xfs_set_projid(ip, fa->fsx_projid); | 1118 | xfs_set_projid(ip, fa->fsx_projid); |
1119 | 1119 | ||
@@ -1160,13 +1160,13 @@ xfs_ioctl_setattr( | |||
1160 | */ | 1160 | */ |
1161 | xfs_qm_dqrele(olddquot); | 1161 | xfs_qm_dqrele(olddquot); |
1162 | xfs_qm_dqrele(udqp); | 1162 | xfs_qm_dqrele(udqp); |
1163 | xfs_qm_dqrele(gdqp); | 1163 | xfs_qm_dqrele(pdqp); |
1164 | 1164 | ||
1165 | return code; | 1165 | return code; |
1166 | 1166 | ||
1167 | error_return: | 1167 | error_return: |
1168 | xfs_qm_dqrele(udqp); | 1168 | xfs_qm_dqrele(udqp); |
1169 | xfs_qm_dqrele(gdqp); | 1169 | xfs_qm_dqrele(pdqp); |
1170 | xfs_trans_cancel(tp, 0); | 1170 | xfs_trans_cancel(tp, 0); |
1171 | if (lock_flags) | 1171 | if (lock_flags) |
1172 | xfs_iunlock(ip, lock_flags); | 1172 | xfs_iunlock(ip, lock_flags); |
diff --git a/fs/xfs/xfs_iomap.c b/fs/xfs/xfs_iomap.c index 8f8aaee7f379..6a7096422295 100644 --- a/fs/xfs/xfs_iomap.c +++ b/fs/xfs/xfs_iomap.c | |||
@@ -284,6 +284,15 @@ xfs_iomap_eof_want_preallocate( | |||
284 | return 0; | 284 | return 0; |
285 | 285 | ||
286 | /* | 286 | /* |
287 | * If the file is smaller than the minimum prealloc and we are using | ||
288 | * dynamic preallocation, don't do any preallocation at all as it is | ||
289 | * likely this is the only write to the file that is going to be done. | ||
290 | */ | ||
291 | if (!(mp->m_flags & XFS_MOUNT_DFLT_IOSIZE) && | ||
292 | XFS_ISIZE(ip) < XFS_FSB_TO_B(mp, mp->m_writeio_blocks)) | ||
293 | return 0; | ||
294 | |||
295 | /* | ||
287 | * If there are any real blocks past eof, then don't | 296 | * If there are any real blocks past eof, then don't |
288 | * do any speculative allocation. | 297 | * do any speculative allocation. |
289 | */ | 298 | */ |
@@ -345,6 +354,10 @@ xfs_iomap_eof_prealloc_initial_size( | |||
345 | if (mp->m_flags & XFS_MOUNT_DFLT_IOSIZE) | 354 | if (mp->m_flags & XFS_MOUNT_DFLT_IOSIZE) |
346 | return 0; | 355 | return 0; |
347 | 356 | ||
357 | /* If the file is small, then use the minimum prealloc */ | ||
358 | if (XFS_ISIZE(ip) < XFS_FSB_TO_B(mp, mp->m_dalign)) | ||
359 | return 0; | ||
360 | |||
348 | /* | 361 | /* |
349 | * As we write multiple pages, the offset will always align to the | 362 | * As we write multiple pages, the offset will always align to the |
350 | * start of a page and hence point to a hole at EOF. i.e. if the size is | 363 | * start of a page and hence point to a hole at EOF. i.e. if the size is |
diff --git a/fs/xfs/xfs_iops.c b/fs/xfs/xfs_iops.c index ca9ecaa81112..96dda62d497b 100644 --- a/fs/xfs/xfs_iops.c +++ b/fs/xfs/xfs_iops.c | |||
@@ -467,9 +467,6 @@ xfs_setattr_mode( | |||
467 | ASSERT(tp); | 467 | ASSERT(tp); |
468 | ASSERT(xfs_isilocked(ip, XFS_ILOCK_EXCL)); | 468 | ASSERT(xfs_isilocked(ip, XFS_ILOCK_EXCL)); |
469 | 469 | ||
470 | if (!in_group_p(inode->i_gid) && !capable(CAP_FSETID)) | ||
471 | mode &= ~S_ISGID; | ||
472 | |||
473 | ip->i_d.di_mode &= S_IFMT; | 470 | ip->i_d.di_mode &= S_IFMT; |
474 | ip->i_d.di_mode |= mode & ~S_IFMT; | 471 | ip->i_d.di_mode |= mode & ~S_IFMT; |
475 | 472 | ||
@@ -495,15 +492,18 @@ xfs_setattr_nonsize( | |||
495 | 492 | ||
496 | trace_xfs_setattr(ip); | 493 | trace_xfs_setattr(ip); |
497 | 494 | ||
498 | if (mp->m_flags & XFS_MOUNT_RDONLY) | 495 | /* If acls are being inherited, we already have this checked */ |
499 | return XFS_ERROR(EROFS); | 496 | if (!(flags & XFS_ATTR_NOACL)) { |
497 | if (mp->m_flags & XFS_MOUNT_RDONLY) | ||
498 | return XFS_ERROR(EROFS); | ||
500 | 499 | ||
501 | if (XFS_FORCED_SHUTDOWN(mp)) | 500 | if (XFS_FORCED_SHUTDOWN(mp)) |
502 | return XFS_ERROR(EIO); | 501 | return XFS_ERROR(EIO); |
503 | 502 | ||
504 | error = -inode_change_ok(inode, iattr); | 503 | error = -inode_change_ok(inode, iattr); |
505 | if (error) | 504 | if (error) |
506 | return XFS_ERROR(error); | 505 | return XFS_ERROR(error); |
506 | } | ||
507 | 507 | ||
508 | ASSERT((mask & ATTR_SIZE) == 0); | 508 | ASSERT((mask & ATTR_SIZE) == 0); |
509 | 509 | ||
@@ -539,7 +539,7 @@ xfs_setattr_nonsize( | |||
539 | ASSERT(udqp == NULL); | 539 | ASSERT(udqp == NULL); |
540 | ASSERT(gdqp == NULL); | 540 | ASSERT(gdqp == NULL); |
541 | error = xfs_qm_vop_dqalloc(ip, uid, gid, xfs_get_projid(ip), | 541 | error = xfs_qm_vop_dqalloc(ip, uid, gid, xfs_get_projid(ip), |
542 | qflags, &udqp, &gdqp); | 542 | qflags, &udqp, &gdqp, NULL); |
543 | if (error) | 543 | if (error) |
544 | return error; | 544 | return error; |
545 | } | 545 | } |
@@ -575,7 +575,7 @@ xfs_setattr_nonsize( | |||
575 | (XFS_IS_GQUOTA_ON(mp) && igid != gid))) { | 575 | (XFS_IS_GQUOTA_ON(mp) && igid != gid))) { |
576 | ASSERT(tp); | 576 | ASSERT(tp); |
577 | error = xfs_qm_vop_chown_reserve(tp, ip, udqp, gdqp, | 577 | error = xfs_qm_vop_chown_reserve(tp, ip, udqp, gdqp, |
578 | capable(CAP_FOWNER) ? | 578 | NULL, capable(CAP_FOWNER) ? |
579 | XFS_QMOPT_FORCE_RES : 0); | 579 | XFS_QMOPT_FORCE_RES : 0); |
580 | if (error) /* out of quota */ | 580 | if (error) /* out of quota */ |
581 | goto out_trans_cancel; | 581 | goto out_trans_cancel; |
@@ -987,7 +987,8 @@ xfs_fiemap_format( | |||
987 | if (bmv->bmv_oflags & BMV_OF_PREALLOC) | 987 | if (bmv->bmv_oflags & BMV_OF_PREALLOC) |
988 | fiemap_flags |= FIEMAP_EXTENT_UNWRITTEN; | 988 | fiemap_flags |= FIEMAP_EXTENT_UNWRITTEN; |
989 | else if (bmv->bmv_oflags & BMV_OF_DELALLOC) { | 989 | else if (bmv->bmv_oflags & BMV_OF_DELALLOC) { |
990 | fiemap_flags |= FIEMAP_EXTENT_DELALLOC; | 990 | fiemap_flags |= (FIEMAP_EXTENT_DELALLOC | |
991 | FIEMAP_EXTENT_UNKNOWN); | ||
991 | physical = 0; /* no block yet */ | 992 | physical = 0; /* no block yet */ |
992 | } | 993 | } |
993 | if (bmv->bmv_oflags & BMV_OF_LAST) | 994 | if (bmv->bmv_oflags & BMV_OF_LAST) |
diff --git a/fs/xfs/xfs_itable.c b/fs/xfs/xfs_itable.c index 2ea7d402188d..b93e14b86754 100644 --- a/fs/xfs/xfs_itable.c +++ b/fs/xfs/xfs_itable.c | |||
@@ -43,7 +43,7 @@ xfs_internal_inum( | |||
43 | { | 43 | { |
44 | return (ino == mp->m_sb.sb_rbmino || ino == mp->m_sb.sb_rsumino || | 44 | return (ino == mp->m_sb.sb_rbmino || ino == mp->m_sb.sb_rsumino || |
45 | (xfs_sb_version_hasquota(&mp->m_sb) && | 45 | (xfs_sb_version_hasquota(&mp->m_sb) && |
46 | (ino == mp->m_sb.sb_uquotino || ino == mp->m_sb.sb_gquotino))); | 46 | xfs_is_quota_inode(&mp->m_sb, ino))); |
47 | } | 47 | } |
48 | 48 | ||
49 | /* | 49 | /* |
@@ -221,7 +221,6 @@ xfs_bulkstat( | |||
221 | char __user *ubufp; /* pointer into user's buffer */ | 221 | char __user *ubufp; /* pointer into user's buffer */ |
222 | int ubelem; /* spaces used in user's buffer */ | 222 | int ubelem; /* spaces used in user's buffer */ |
223 | int ubused; /* bytes used by formatter */ | 223 | int ubused; /* bytes used by formatter */ |
224 | xfs_buf_t *bp; /* ptr to on-disk inode cluster buf */ | ||
225 | 224 | ||
226 | /* | 225 | /* |
227 | * Get the last inode value, see if there's nothing to do. | 226 | * Get the last inode value, see if there's nothing to do. |
@@ -263,7 +262,6 @@ xfs_bulkstat( | |||
263 | rval = 0; | 262 | rval = 0; |
264 | while (XFS_BULKSTAT_UBLEFT(ubleft) && agno < mp->m_sb.sb_agcount) { | 263 | while (XFS_BULKSTAT_UBLEFT(ubleft) && agno < mp->m_sb.sb_agcount) { |
265 | cond_resched(); | 264 | cond_resched(); |
266 | bp = NULL; | ||
267 | error = xfs_ialloc_read_agi(mp, NULL, agno, &agbp); | 265 | error = xfs_ialloc_read_agi(mp, NULL, agno, &agbp); |
268 | if (error) { | 266 | if (error) { |
269 | /* | 267 | /* |
@@ -383,11 +381,13 @@ xfs_bulkstat( | |||
383 | * Also start read-ahead now for this chunk. | 381 | * Also start read-ahead now for this chunk. |
384 | */ | 382 | */ |
385 | if (r.ir_freecount < XFS_INODES_PER_CHUNK) { | 383 | if (r.ir_freecount < XFS_INODES_PER_CHUNK) { |
384 | struct blk_plug plug; | ||
386 | /* | 385 | /* |
387 | * Loop over all clusters in the next chunk. | 386 | * Loop over all clusters in the next chunk. |
388 | * Do a readahead if there are any allocated | 387 | * Do a readahead if there are any allocated |
389 | * inodes in that cluster. | 388 | * inodes in that cluster. |
390 | */ | 389 | */ |
390 | blk_start_plug(&plug); | ||
391 | agbno = XFS_AGINO_TO_AGBNO(mp, r.ir_startino); | 391 | agbno = XFS_AGINO_TO_AGBNO(mp, r.ir_startino); |
392 | for (chunkidx = 0; | 392 | for (chunkidx = 0; |
393 | chunkidx < XFS_INODES_PER_CHUNK; | 393 | chunkidx < XFS_INODES_PER_CHUNK; |
@@ -399,6 +399,7 @@ xfs_bulkstat( | |||
399 | agbno, nbcluster, | 399 | agbno, nbcluster, |
400 | &xfs_inode_buf_ops); | 400 | &xfs_inode_buf_ops); |
401 | } | 401 | } |
402 | blk_finish_plug(&plug); | ||
402 | irbp->ir_startino = r.ir_startino; | 403 | irbp->ir_startino = r.ir_startino; |
403 | irbp->ir_freecount = r.ir_freecount; | 404 | irbp->ir_freecount = r.ir_freecount; |
404 | irbp->ir_free = r.ir_free; | 405 | irbp->ir_free = r.ir_free; |
@@ -433,27 +434,7 @@ xfs_bulkstat( | |||
433 | irbp->ir_freecount < XFS_INODES_PER_CHUNK; | 434 | irbp->ir_freecount < XFS_INODES_PER_CHUNK; |
434 | chunkidx++, clustidx++, agino++) { | 435 | chunkidx++, clustidx++, agino++) { |
435 | ASSERT(chunkidx < XFS_INODES_PER_CHUNK); | 436 | ASSERT(chunkidx < XFS_INODES_PER_CHUNK); |
436 | /* | 437 | |
437 | * Recompute agbno if this is the | ||
438 | * first inode of the cluster. | ||
439 | * | ||
440 | * Careful with clustidx. There can be | ||
441 | * multiple clusters per chunk, a single | ||
442 | * cluster per chunk or a cluster that has | ||
443 | * inodes represented from several different | ||
444 | * chunks (if blocksize is large). | ||
445 | * | ||
446 | * Because of this, the starting clustidx is | ||
447 | * initialized to zero in this loop but must | ||
448 | * later be reset after reading in the cluster | ||
449 | * buffer. | ||
450 | */ | ||
451 | if ((chunkidx & (nicluster - 1)) == 0) { | ||
452 | agbno = XFS_AGINO_TO_AGBNO(mp, | ||
453 | irbp->ir_startino) + | ||
454 | ((chunkidx & nimask) >> | ||
455 | mp->m_sb.sb_inopblog); | ||
456 | } | ||
457 | ino = XFS_AGINO_TO_INO(mp, agno, agino); | 438 | ino = XFS_AGINO_TO_INO(mp, agno, agino); |
458 | /* | 439 | /* |
459 | * Skip if this inode is free. | 440 | * Skip if this inode is free. |
@@ -499,10 +480,6 @@ xfs_bulkstat( | |||
499 | 480 | ||
500 | cond_resched(); | 481 | cond_resched(); |
501 | } | 482 | } |
502 | |||
503 | if (bp) | ||
504 | xfs_buf_relse(bp); | ||
505 | |||
506 | /* | 483 | /* |
507 | * Set up for the next loop iteration. | 484 | * Set up for the next loop iteration. |
508 | */ | 485 | */ |
diff --git a/fs/xfs/xfs_log.c b/fs/xfs/xfs_log.c index b345a7c85153..d852a2b3e1fd 100644 --- a/fs/xfs/xfs_log.c +++ b/fs/xfs/xfs_log.c | |||
@@ -1963,6 +1963,10 @@ xlog_write_calc_vec_length( | |||
1963 | headers++; | 1963 | headers++; |
1964 | 1964 | ||
1965 | for (lv = log_vector; lv; lv = lv->lv_next) { | 1965 | for (lv = log_vector; lv; lv = lv->lv_next) { |
1966 | /* we don't write ordered log vectors */ | ||
1967 | if (lv->lv_buf_len == XFS_LOG_VEC_ORDERED) | ||
1968 | continue; | ||
1969 | |||
1966 | headers += lv->lv_niovecs; | 1970 | headers += lv->lv_niovecs; |
1967 | 1971 | ||
1968 | for (i = 0; i < lv->lv_niovecs; i++) { | 1972 | for (i = 0; i < lv->lv_niovecs; i++) { |
@@ -2216,7 +2220,7 @@ xlog_write( | |||
2216 | index = 0; | 2220 | index = 0; |
2217 | lv = log_vector; | 2221 | lv = log_vector; |
2218 | vecp = lv->lv_iovecp; | 2222 | vecp = lv->lv_iovecp; |
2219 | while (lv && index < lv->lv_niovecs) { | 2223 | while (lv && (!lv->lv_niovecs || index < lv->lv_niovecs)) { |
2220 | void *ptr; | 2224 | void *ptr; |
2221 | int log_offset; | 2225 | int log_offset; |
2222 | 2226 | ||
@@ -2236,13 +2240,22 @@ xlog_write( | |||
2236 | * This loop writes out as many regions as can fit in the amount | 2240 | * This loop writes out as many regions as can fit in the amount |
2237 | * of space which was allocated by xlog_state_get_iclog_space(). | 2241 | * of space which was allocated by xlog_state_get_iclog_space(). |
2238 | */ | 2242 | */ |
2239 | while (lv && index < lv->lv_niovecs) { | 2243 | while (lv && (!lv->lv_niovecs || index < lv->lv_niovecs)) { |
2240 | struct xfs_log_iovec *reg = &vecp[index]; | 2244 | struct xfs_log_iovec *reg; |
2241 | struct xlog_op_header *ophdr; | 2245 | struct xlog_op_header *ophdr; |
2242 | int start_rec_copy; | 2246 | int start_rec_copy; |
2243 | int copy_len; | 2247 | int copy_len; |
2244 | int copy_off; | 2248 | int copy_off; |
2249 | bool ordered = false; | ||
2250 | |||
2251 | /* ordered log vectors have no regions to write */ | ||
2252 | if (lv->lv_buf_len == XFS_LOG_VEC_ORDERED) { | ||
2253 | ASSERT(lv->lv_niovecs == 0); | ||
2254 | ordered = true; | ||
2255 | goto next_lv; | ||
2256 | } | ||
2245 | 2257 | ||
2258 | reg = &vecp[index]; | ||
2246 | ASSERT(reg->i_len % sizeof(__int32_t) == 0); | 2259 | ASSERT(reg->i_len % sizeof(__int32_t) == 0); |
2247 | ASSERT((unsigned long)ptr % sizeof(__int32_t) == 0); | 2260 | ASSERT((unsigned long)ptr % sizeof(__int32_t) == 0); |
2248 | 2261 | ||
@@ -2302,12 +2315,13 @@ xlog_write( | |||
2302 | break; | 2315 | break; |
2303 | 2316 | ||
2304 | if (++index == lv->lv_niovecs) { | 2317 | if (++index == lv->lv_niovecs) { |
2318 | next_lv: | ||
2305 | lv = lv->lv_next; | 2319 | lv = lv->lv_next; |
2306 | index = 0; | 2320 | index = 0; |
2307 | if (lv) | 2321 | if (lv) |
2308 | vecp = lv->lv_iovecp; | 2322 | vecp = lv->lv_iovecp; |
2309 | } | 2323 | } |
2310 | if (record_cnt == 0) { | 2324 | if (record_cnt == 0 && ordered == false) { |
2311 | if (!lv) | 2325 | if (!lv) |
2312 | return 0; | 2326 | return 0; |
2313 | break; | 2327 | break; |
diff --git a/fs/xfs/xfs_log.h b/fs/xfs/xfs_log.h index 5caee96059df..fb630e496c12 100644 --- a/fs/xfs/xfs_log.h +++ b/fs/xfs/xfs_log.h | |||
@@ -88,7 +88,8 @@ static inline xfs_lsn_t _lsn_cmp(xfs_lsn_t lsn1, xfs_lsn_t lsn2) | |||
88 | #define XLOG_REG_TYPE_UNMOUNT 17 | 88 | #define XLOG_REG_TYPE_UNMOUNT 17 |
89 | #define XLOG_REG_TYPE_COMMIT 18 | 89 | #define XLOG_REG_TYPE_COMMIT 18 |
90 | #define XLOG_REG_TYPE_TRANSHDR 19 | 90 | #define XLOG_REG_TYPE_TRANSHDR 19 |
91 | #define XLOG_REG_TYPE_MAX 19 | 91 | #define XLOG_REG_TYPE_ICREATE 20 |
92 | #define XLOG_REG_TYPE_MAX 20 | ||
92 | 93 | ||
93 | typedef struct xfs_log_iovec { | 94 | typedef struct xfs_log_iovec { |
94 | void *i_addr; /* beginning address of region */ | 95 | void *i_addr; /* beginning address of region */ |
@@ -105,6 +106,8 @@ struct xfs_log_vec { | |||
105 | int lv_buf_len; /* size of formatted buffer */ | 106 | int lv_buf_len; /* size of formatted buffer */ |
106 | }; | 107 | }; |
107 | 108 | ||
109 | #define XFS_LOG_VEC_ORDERED (-1) | ||
110 | |||
108 | /* | 111 | /* |
109 | * Structure used to pass callback function and the function's argument | 112 | * Structure used to pass callback function and the function's argument |
110 | * to the log manager. | 113 | * to the log manager. |
diff --git a/fs/xfs/xfs_log_cil.c b/fs/xfs/xfs_log_cil.c index d0833b54e55d..02b9cf3f8252 100644 --- a/fs/xfs/xfs_log_cil.c +++ b/fs/xfs/xfs_log_cil.c | |||
@@ -127,6 +127,7 @@ xlog_cil_prepare_log_vecs( | |||
127 | int index; | 127 | int index; |
128 | int len = 0; | 128 | int len = 0; |
129 | uint niovecs; | 129 | uint niovecs; |
130 | bool ordered = false; | ||
130 | 131 | ||
131 | /* Skip items which aren't dirty in this transaction. */ | 132 | /* Skip items which aren't dirty in this transaction. */ |
132 | if (!(lidp->lid_flags & XFS_LID_DIRTY)) | 133 | if (!(lidp->lid_flags & XFS_LID_DIRTY)) |
@@ -137,14 +138,30 @@ xlog_cil_prepare_log_vecs( | |||
137 | if (!niovecs) | 138 | if (!niovecs) |
138 | continue; | 139 | continue; |
139 | 140 | ||
141 | /* | ||
142 | * Ordered items need to be tracked but we do not wish to write | ||
143 | * them. We need a logvec to track the object, but we do not | ||
144 | * need an iovec or buffer to be allocated for copying data. | ||
145 | */ | ||
146 | if (niovecs == XFS_LOG_VEC_ORDERED) { | ||
147 | ordered = true; | ||
148 | niovecs = 0; | ||
149 | } | ||
150 | |||
140 | new_lv = kmem_zalloc(sizeof(*new_lv) + | 151 | new_lv = kmem_zalloc(sizeof(*new_lv) + |
141 | niovecs * sizeof(struct xfs_log_iovec), | 152 | niovecs * sizeof(struct xfs_log_iovec), |
142 | KM_SLEEP|KM_NOFS); | 153 | KM_SLEEP|KM_NOFS); |
143 | 154 | ||
155 | new_lv->lv_item = lidp->lid_item; | ||
156 | new_lv->lv_niovecs = niovecs; | ||
157 | if (ordered) { | ||
158 | /* track as an ordered logvec */ | ||
159 | new_lv->lv_buf_len = XFS_LOG_VEC_ORDERED; | ||
160 | goto next; | ||
161 | } | ||
162 | |||
144 | /* The allocated iovec region lies beyond the log vector. */ | 163 | /* The allocated iovec region lies beyond the log vector. */ |
145 | new_lv->lv_iovecp = (struct xfs_log_iovec *)&new_lv[1]; | 164 | new_lv->lv_iovecp = (struct xfs_log_iovec *)&new_lv[1]; |
146 | new_lv->lv_niovecs = niovecs; | ||
147 | new_lv->lv_item = lidp->lid_item; | ||
148 | 165 | ||
149 | /* build the vector array and calculate it's length */ | 166 | /* build the vector array and calculate it's length */ |
150 | IOP_FORMAT(new_lv->lv_item, new_lv->lv_iovecp); | 167 | IOP_FORMAT(new_lv->lv_item, new_lv->lv_iovecp); |
@@ -165,6 +182,7 @@ xlog_cil_prepare_log_vecs( | |||
165 | } | 182 | } |
166 | ASSERT(ptr == new_lv->lv_buf + new_lv->lv_buf_len); | 183 | ASSERT(ptr == new_lv->lv_buf + new_lv->lv_buf_len); |
167 | 184 | ||
185 | next: | ||
168 | if (!ret_lv) | 186 | if (!ret_lv) |
169 | ret_lv = new_lv; | 187 | ret_lv = new_lv; |
170 | else | 188 | else |
@@ -191,8 +209,18 @@ xfs_cil_prepare_item( | |||
191 | 209 | ||
192 | if (old) { | 210 | if (old) { |
193 | /* existing lv on log item, space used is a delta */ | 211 | /* existing lv on log item, space used is a delta */ |
194 | ASSERT(!list_empty(&lv->lv_item->li_cil)); | 212 | ASSERT((old->lv_buf && old->lv_buf_len && old->lv_niovecs) || |
195 | ASSERT(old->lv_buf && old->lv_buf_len && old->lv_niovecs); | 213 | old->lv_buf_len == XFS_LOG_VEC_ORDERED); |
214 | |||
215 | /* | ||
216 | * If the new item is ordered, keep the old one that is already | ||
217 | * tracking dirty or ordered regions | ||
218 | */ | ||
219 | if (lv->lv_buf_len == XFS_LOG_VEC_ORDERED) { | ||
220 | ASSERT(!lv->lv_buf); | ||
221 | kmem_free(lv); | ||
222 | return; | ||
223 | } | ||
196 | 224 | ||
197 | *len += lv->lv_buf_len - old->lv_buf_len; | 225 | *len += lv->lv_buf_len - old->lv_buf_len; |
198 | *diff_iovecs += lv->lv_niovecs - old->lv_niovecs; | 226 | *diff_iovecs += lv->lv_niovecs - old->lv_niovecs; |
@@ -201,10 +229,11 @@ xfs_cil_prepare_item( | |||
201 | } else { | 229 | } else { |
202 | /* new lv, must pin the log item */ | 230 | /* new lv, must pin the log item */ |
203 | ASSERT(!lv->lv_item->li_lv); | 231 | ASSERT(!lv->lv_item->li_lv); |
204 | ASSERT(list_empty(&lv->lv_item->li_cil)); | ||
205 | 232 | ||
206 | *len += lv->lv_buf_len; | 233 | if (lv->lv_buf_len != XFS_LOG_VEC_ORDERED) { |
207 | *diff_iovecs += lv->lv_niovecs; | 234 | *len += lv->lv_buf_len; |
235 | *diff_iovecs += lv->lv_niovecs; | ||
236 | } | ||
208 | IOP_PIN(lv->lv_item); | 237 | IOP_PIN(lv->lv_item); |
209 | 238 | ||
210 | } | 239 | } |
@@ -259,18 +288,24 @@ xlog_cil_insert_items( | |||
259 | * We can do this safely because the context can't checkpoint until we | 288 | * We can do this safely because the context can't checkpoint until we |
260 | * are done so it doesn't matter exactly how we update the CIL. | 289 | * are done so it doesn't matter exactly how we update the CIL. |
261 | */ | 290 | */ |
262 | for (lv = log_vector; lv; lv = lv->lv_next) | ||
263 | xfs_cil_prepare_item(log, lv, &len, &diff_iovecs); | ||
264 | |||
265 | /* account for space used by new iovec headers */ | ||
266 | len += diff_iovecs * sizeof(xlog_op_header_t); | ||
267 | |||
268 | spin_lock(&cil->xc_cil_lock); | 291 | spin_lock(&cil->xc_cil_lock); |
292 | for (lv = log_vector; lv; ) { | ||
293 | struct xfs_log_vec *next = lv->lv_next; | ||
269 | 294 | ||
270 | /* move the items to the tail of the CIL */ | 295 | ASSERT(lv->lv_item->li_lv || list_empty(&lv->lv_item->li_cil)); |
271 | for (lv = log_vector; lv; lv = lv->lv_next) | 296 | lv->lv_next = NULL; |
297 | |||
298 | /* | ||
299 | * xfs_cil_prepare_item() may free the lv, so move the item on | ||
300 | * the CIL first. | ||
301 | */ | ||
272 | list_move_tail(&lv->lv_item->li_cil, &cil->xc_cil); | 302 | list_move_tail(&lv->lv_item->li_cil, &cil->xc_cil); |
303 | xfs_cil_prepare_item(log, lv, &len, &diff_iovecs); | ||
304 | lv = next; | ||
305 | } | ||
273 | 306 | ||
307 | /* account for space used by new iovec headers */ | ||
308 | len += diff_iovecs * sizeof(xlog_op_header_t); | ||
274 | ctx->nvecs += diff_iovecs; | 309 | ctx->nvecs += diff_iovecs; |
275 | 310 | ||
276 | /* | 311 | /* |
@@ -381,9 +416,7 @@ xlog_cil_push( | |||
381 | struct xfs_cil_ctx *new_ctx; | 416 | struct xfs_cil_ctx *new_ctx; |
382 | struct xlog_in_core *commit_iclog; | 417 | struct xlog_in_core *commit_iclog; |
383 | struct xlog_ticket *tic; | 418 | struct xlog_ticket *tic; |
384 | int num_lv; | ||
385 | int num_iovecs; | 419 | int num_iovecs; |
386 | int len; | ||
387 | int error = 0; | 420 | int error = 0; |
388 | struct xfs_trans_header thdr; | 421 | struct xfs_trans_header thdr; |
389 | struct xfs_log_iovec lhdr; | 422 | struct xfs_log_iovec lhdr; |
@@ -428,12 +461,9 @@ xlog_cil_push( | |||
428 | * side which is currently locked out by the flush lock. | 461 | * side which is currently locked out by the flush lock. |
429 | */ | 462 | */ |
430 | lv = NULL; | 463 | lv = NULL; |
431 | num_lv = 0; | ||
432 | num_iovecs = 0; | 464 | num_iovecs = 0; |
433 | len = 0; | ||
434 | while (!list_empty(&cil->xc_cil)) { | 465 | while (!list_empty(&cil->xc_cil)) { |
435 | struct xfs_log_item *item; | 466 | struct xfs_log_item *item; |
436 | int i; | ||
437 | 467 | ||
438 | item = list_first_entry(&cil->xc_cil, | 468 | item = list_first_entry(&cil->xc_cil, |
439 | struct xfs_log_item, li_cil); | 469 | struct xfs_log_item, li_cil); |
@@ -444,11 +474,7 @@ xlog_cil_push( | |||
444 | lv->lv_next = item->li_lv; | 474 | lv->lv_next = item->li_lv; |
445 | lv = item->li_lv; | 475 | lv = item->li_lv; |
446 | item->li_lv = NULL; | 476 | item->li_lv = NULL; |
447 | |||
448 | num_lv++; | ||
449 | num_iovecs += lv->lv_niovecs; | 477 | num_iovecs += lv->lv_niovecs; |
450 | for (i = 0; i < lv->lv_niovecs; i++) | ||
451 | len += lv->lv_iovecp[i].i_len; | ||
452 | } | 478 | } |
453 | 479 | ||
454 | /* | 480 | /* |
@@ -701,6 +727,7 @@ xfs_log_commit_cil( | |||
701 | if (commit_lsn) | 727 | if (commit_lsn) |
702 | *commit_lsn = log->l_cilp->xc_ctx->sequence; | 728 | *commit_lsn = log->l_cilp->xc_ctx->sequence; |
703 | 729 | ||
730 | /* xlog_cil_insert_items() destroys log_vector list */ | ||
704 | xlog_cil_insert_items(log, log_vector, tp->t_ticket); | 731 | xlog_cil_insert_items(log, log_vector, tp->t_ticket); |
705 | 732 | ||
706 | /* check we didn't blow the reservation */ | 733 | /* check we didn't blow the reservation */ |
diff --git a/fs/xfs/xfs_log_recover.c b/fs/xfs/xfs_log_recover.c index 7cf5e4eafe28..7681b19aa5dc 100644 --- a/fs/xfs/xfs_log_recover.c +++ b/fs/xfs/xfs_log_recover.c | |||
@@ -45,6 +45,7 @@ | |||
45 | #include "xfs_cksum.h" | 45 | #include "xfs_cksum.h" |
46 | #include "xfs_trace.h" | 46 | #include "xfs_trace.h" |
47 | #include "xfs_icache.h" | 47 | #include "xfs_icache.h" |
48 | #include "xfs_icreate_item.h" | ||
48 | 49 | ||
49 | /* Need all the magic numbers and buffer ops structures from these headers */ | 50 | /* Need all the magic numbers and buffer ops structures from these headers */ |
50 | #include "xfs_symlink.h" | 51 | #include "xfs_symlink.h" |
@@ -1617,7 +1618,10 @@ xlog_recover_add_to_trans( | |||
1617 | * form the cancelled buffer table. Hence they have tobe done last. | 1618 | * form the cancelled buffer table. Hence they have tobe done last. |
1618 | * | 1619 | * |
1619 | * 3. Inode allocation buffers must be replayed before inode items that | 1620 | * 3. Inode allocation buffers must be replayed before inode items that |
1620 | * read the buffer and replay changes into it. | 1621 | * read the buffer and replay changes into it. For filesystems using the |
1622 | * ICREATE transactions, this means XFS_LI_ICREATE objects need to get | ||
1623 | * treated the same as inode allocation buffers as they create and | ||
1624 | * initialise the buffers directly. | ||
1621 | * | 1625 | * |
1622 | * 4. Inode unlink buffers must be replayed after inode items are replayed. | 1626 | * 4. Inode unlink buffers must be replayed after inode items are replayed. |
1623 | * This ensures that inodes are completely flushed to the inode buffer | 1627 | * This ensures that inodes are completely flushed to the inode buffer |
@@ -1632,10 +1636,17 @@ xlog_recover_add_to_trans( | |||
1632 | * from all the other buffers and move them to last. | 1636 | * from all the other buffers and move them to last. |
1633 | * | 1637 | * |
1634 | * Hence, 4 lists, in order from head to tail: | 1638 | * Hence, 4 lists, in order from head to tail: |
1635 | * - buffer_list for all buffers except cancelled/inode unlink buffers | 1639 | * - buffer_list for all buffers except cancelled/inode unlink buffers |
1636 | * - item_list for all non-buffer items | 1640 | * - item_list for all non-buffer items |
1637 | * - inode_buffer_list for inode unlink buffers | 1641 | * - inode_buffer_list for inode unlink buffers |
1638 | * - cancel_list for the cancelled buffers | 1642 | * - cancel_list for the cancelled buffers |
1643 | * | ||
1644 | * Note that we add objects to the tail of the lists so that first-to-last | ||
1645 | * ordering is preserved within the lists. Adding objects to the head of the | ||
1646 | * list means when we traverse from the head we walk them in last-to-first | ||
1647 | * order. For cancelled buffers and inode unlink buffers this doesn't matter, | ||
1648 | * but for all other items there may be specific ordering that we need to | ||
1649 | * preserve. | ||
1639 | */ | 1650 | */ |
1640 | STATIC int | 1651 | STATIC int |
1641 | xlog_recover_reorder_trans( | 1652 | xlog_recover_reorder_trans( |
@@ -1655,6 +1666,9 @@ xlog_recover_reorder_trans( | |||
1655 | xfs_buf_log_format_t *buf_f = item->ri_buf[0].i_addr; | 1666 | xfs_buf_log_format_t *buf_f = item->ri_buf[0].i_addr; |
1656 | 1667 | ||
1657 | switch (ITEM_TYPE(item)) { | 1668 | switch (ITEM_TYPE(item)) { |
1669 | case XFS_LI_ICREATE: | ||
1670 | list_move_tail(&item->ri_list, &buffer_list); | ||
1671 | break; | ||
1658 | case XFS_LI_BUF: | 1672 | case XFS_LI_BUF: |
1659 | if (buf_f->blf_flags & XFS_BLF_CANCEL) { | 1673 | if (buf_f->blf_flags & XFS_BLF_CANCEL) { |
1660 | trace_xfs_log_recover_item_reorder_head(log, | 1674 | trace_xfs_log_recover_item_reorder_head(log, |
@@ -2578,8 +2592,16 @@ xlog_recover_inode_pass2( | |||
2578 | goto error; | 2592 | goto error; |
2579 | } | 2593 | } |
2580 | 2594 | ||
2581 | /* Skip replay when the on disk inode is newer than the log one */ | 2595 | /* |
2582 | if (dicp->di_flushiter < be16_to_cpu(dip->di_flushiter)) { | 2596 | * di_flushiter is only valid for v1/2 inodes. All changes for v3 inodes |
2597 | * are transactional and if ordering is necessary we can determine that | ||
2598 | * more accurately by the LSN field in the V3 inode core. Don't trust | ||
2599 | * the inode versions we might be changing them here - use the | ||
2600 | * superblock flag to determine whether we need to look at di_flushiter | ||
2601 | * to skip replay when the on disk inode is newer than the log one | ||
2602 | */ | ||
2603 | if (!xfs_sb_version_hascrc(&mp->m_sb) && | ||
2604 | dicp->di_flushiter < be16_to_cpu(dip->di_flushiter)) { | ||
2583 | /* | 2605 | /* |
2584 | * Deal with the wrap case, DI_MAX_FLUSH is less | 2606 | * Deal with the wrap case, DI_MAX_FLUSH is less |
2585 | * than smaller numbers | 2607 | * than smaller numbers |
@@ -2594,6 +2616,7 @@ xlog_recover_inode_pass2( | |||
2594 | goto error; | 2616 | goto error; |
2595 | } | 2617 | } |
2596 | } | 2618 | } |
2619 | |||
2597 | /* Take the opportunity to reset the flush iteration count */ | 2620 | /* Take the opportunity to reset the flush iteration count */ |
2598 | dicp->di_flushiter = 0; | 2621 | dicp->di_flushiter = 0; |
2599 | 2622 | ||
@@ -2982,6 +3005,93 @@ xlog_recover_efd_pass2( | |||
2982 | } | 3005 | } |
2983 | 3006 | ||
2984 | /* | 3007 | /* |
3008 | * This routine is called when an inode create format structure is found in a | ||
3009 | * committed transaction in the log. It's purpose is to initialise the inodes | ||
3010 | * being allocated on disk. This requires us to get inode cluster buffers that | ||
3011 | * match the range to be intialised, stamped with inode templates and written | ||
3012 | * by delayed write so that subsequent modifications will hit the cached buffer | ||
3013 | * and only need writing out at the end of recovery. | ||
3014 | */ | ||
3015 | STATIC int | ||
3016 | xlog_recover_do_icreate_pass2( | ||
3017 | struct xlog *log, | ||
3018 | struct list_head *buffer_list, | ||
3019 | xlog_recover_item_t *item) | ||
3020 | { | ||
3021 | struct xfs_mount *mp = log->l_mp; | ||
3022 | struct xfs_icreate_log *icl; | ||
3023 | xfs_agnumber_t agno; | ||
3024 | xfs_agblock_t agbno; | ||
3025 | unsigned int count; | ||
3026 | unsigned int isize; | ||
3027 | xfs_agblock_t length; | ||
3028 | |||
3029 | icl = (struct xfs_icreate_log *)item->ri_buf[0].i_addr; | ||
3030 | if (icl->icl_type != XFS_LI_ICREATE) { | ||
3031 | xfs_warn(log->l_mp, "xlog_recover_do_icreate_trans: bad type"); | ||
3032 | return EINVAL; | ||
3033 | } | ||
3034 | |||
3035 | if (icl->icl_size != 1) { | ||
3036 | xfs_warn(log->l_mp, "xlog_recover_do_icreate_trans: bad icl size"); | ||
3037 | return EINVAL; | ||
3038 | } | ||
3039 | |||
3040 | agno = be32_to_cpu(icl->icl_ag); | ||
3041 | if (agno >= mp->m_sb.sb_agcount) { | ||
3042 | xfs_warn(log->l_mp, "xlog_recover_do_icreate_trans: bad agno"); | ||
3043 | return EINVAL; | ||
3044 | } | ||
3045 | agbno = be32_to_cpu(icl->icl_agbno); | ||
3046 | if (!agbno || agbno == NULLAGBLOCK || agbno >= mp->m_sb.sb_agblocks) { | ||
3047 | xfs_warn(log->l_mp, "xlog_recover_do_icreate_trans: bad agbno"); | ||
3048 | return EINVAL; | ||
3049 | } | ||
3050 | isize = be32_to_cpu(icl->icl_isize); | ||
3051 | if (isize != mp->m_sb.sb_inodesize) { | ||
3052 | xfs_warn(log->l_mp, "xlog_recover_do_icreate_trans: bad isize"); | ||
3053 | return EINVAL; | ||
3054 | } | ||
3055 | count = be32_to_cpu(icl->icl_count); | ||
3056 | if (!count) { | ||
3057 | xfs_warn(log->l_mp, "xlog_recover_do_icreate_trans: bad count"); | ||
3058 | return EINVAL; | ||
3059 | } | ||
3060 | length = be32_to_cpu(icl->icl_length); | ||
3061 | if (!length || length >= mp->m_sb.sb_agblocks) { | ||
3062 | xfs_warn(log->l_mp, "xlog_recover_do_icreate_trans: bad length"); | ||
3063 | return EINVAL; | ||
3064 | } | ||
3065 | |||
3066 | /* existing allocation is fixed value */ | ||
3067 | ASSERT(count == XFS_IALLOC_INODES(mp)); | ||
3068 | ASSERT(length == XFS_IALLOC_BLOCKS(mp)); | ||
3069 | if (count != XFS_IALLOC_INODES(mp) || | ||
3070 | length != XFS_IALLOC_BLOCKS(mp)) { | ||
3071 | xfs_warn(log->l_mp, "xlog_recover_do_icreate_trans: bad count 2"); | ||
3072 | return EINVAL; | ||
3073 | } | ||
3074 | |||
3075 | /* | ||
3076 | * Inode buffers can be freed. Do not replay the inode initialisation as | ||
3077 | * we could be overwriting something written after this inode buffer was | ||
3078 | * cancelled. | ||
3079 | * | ||
3080 | * XXX: we need to iterate all buffers and only init those that are not | ||
3081 | * cancelled. I think that a more fine grained factoring of | ||
3082 | * xfs_ialloc_inode_init may be appropriate here to enable this to be | ||
3083 | * done easily. | ||
3084 | */ | ||
3085 | if (xlog_check_buffer_cancelled(log, | ||
3086 | XFS_AGB_TO_DADDR(mp, agno, agbno), length, 0)) | ||
3087 | return 0; | ||
3088 | |||
3089 | xfs_ialloc_inode_init(mp, NULL, buffer_list, agno, agbno, length, | ||
3090 | be32_to_cpu(icl->icl_gen)); | ||
3091 | return 0; | ||
3092 | } | ||
3093 | |||
3094 | /* | ||
2985 | * Free up any resources allocated by the transaction | 3095 | * Free up any resources allocated by the transaction |
2986 | * | 3096 | * |
2987 | * Remember that EFIs, EFDs, and IUNLINKs are handled later. | 3097 | * Remember that EFIs, EFDs, and IUNLINKs are handled later. |
@@ -3023,6 +3133,7 @@ xlog_recover_commit_pass1( | |||
3023 | case XFS_LI_EFI: | 3133 | case XFS_LI_EFI: |
3024 | case XFS_LI_EFD: | 3134 | case XFS_LI_EFD: |
3025 | case XFS_LI_DQUOT: | 3135 | case XFS_LI_DQUOT: |
3136 | case XFS_LI_ICREATE: | ||
3026 | /* nothing to do in pass 1 */ | 3137 | /* nothing to do in pass 1 */ |
3027 | return 0; | 3138 | return 0; |
3028 | default: | 3139 | default: |
@@ -3053,6 +3164,8 @@ xlog_recover_commit_pass2( | |||
3053 | return xlog_recover_efd_pass2(log, item); | 3164 | return xlog_recover_efd_pass2(log, item); |
3054 | case XFS_LI_DQUOT: | 3165 | case XFS_LI_DQUOT: |
3055 | return xlog_recover_dquot_pass2(log, buffer_list, item); | 3166 | return xlog_recover_dquot_pass2(log, buffer_list, item); |
3167 | case XFS_LI_ICREATE: | ||
3168 | return xlog_recover_do_icreate_pass2(log, buffer_list, item); | ||
3056 | case XFS_LI_QUOTAOFF: | 3169 | case XFS_LI_QUOTAOFF: |
3057 | /* nothing to do in pass2 */ | 3170 | /* nothing to do in pass2 */ |
3058 | return 0; | 3171 | return 0; |
diff --git a/fs/xfs/xfs_mount.c b/fs/xfs/xfs_mount.c index e8e310c05097..2b0ba3581656 100644 --- a/fs/xfs/xfs_mount.c +++ b/fs/xfs/xfs_mount.c | |||
@@ -336,6 +336,14 @@ xfs_mount_validate_sb( | |||
336 | return XFS_ERROR(EWRONGFS); | 336 | return XFS_ERROR(EWRONGFS); |
337 | } | 337 | } |
338 | 338 | ||
339 | if ((sbp->sb_qflags & (XFS_OQUOTA_ENFD | XFS_OQUOTA_CHKD)) && | ||
340 | (sbp->sb_qflags & (XFS_PQUOTA_ENFD | XFS_GQUOTA_ENFD | | ||
341 | XFS_PQUOTA_CHKD | XFS_GQUOTA_CHKD))) { | ||
342 | xfs_notice(mp, | ||
343 | "Super block has XFS_OQUOTA bits along with XFS_PQUOTA and/or XFS_GQUOTA bits.\n"); | ||
344 | return XFS_ERROR(EFSCORRUPTED); | ||
345 | } | ||
346 | |||
339 | /* | 347 | /* |
340 | * Version 5 superblock feature mask validation. Reject combinations the | 348 | * Version 5 superblock feature mask validation. Reject combinations the |
341 | * kernel cannot support up front before checking anything else. For | 349 | * kernel cannot support up front before checking anything else. For |
@@ -561,6 +569,18 @@ out_unwind: | |||
561 | return error; | 569 | return error; |
562 | } | 570 | } |
563 | 571 | ||
572 | static void | ||
573 | xfs_sb_quota_from_disk(struct xfs_sb *sbp) | ||
574 | { | ||
575 | if (sbp->sb_qflags & XFS_OQUOTA_ENFD) | ||
576 | sbp->sb_qflags |= (sbp->sb_qflags & XFS_PQUOTA_ACCT) ? | ||
577 | XFS_PQUOTA_ENFD : XFS_GQUOTA_ENFD; | ||
578 | if (sbp->sb_qflags & XFS_OQUOTA_CHKD) | ||
579 | sbp->sb_qflags |= (sbp->sb_qflags & XFS_PQUOTA_ACCT) ? | ||
580 | XFS_PQUOTA_CHKD : XFS_GQUOTA_CHKD; | ||
581 | sbp->sb_qflags &= ~(XFS_OQUOTA_ENFD | XFS_OQUOTA_CHKD); | ||
582 | } | ||
583 | |||
564 | void | 584 | void |
565 | xfs_sb_from_disk( | 585 | xfs_sb_from_disk( |
566 | struct xfs_sb *to, | 586 | struct xfs_sb *to, |
@@ -622,6 +642,35 @@ xfs_sb_from_disk( | |||
622 | to->sb_lsn = be64_to_cpu(from->sb_lsn); | 642 | to->sb_lsn = be64_to_cpu(from->sb_lsn); |
623 | } | 643 | } |
624 | 644 | ||
645 | static inline void | ||
646 | xfs_sb_quota_to_disk( | ||
647 | xfs_dsb_t *to, | ||
648 | xfs_sb_t *from, | ||
649 | __int64_t *fields) | ||
650 | { | ||
651 | __uint16_t qflags = from->sb_qflags; | ||
652 | |||
653 | if (*fields & XFS_SB_QFLAGS) { | ||
654 | /* | ||
655 | * The in-core version of sb_qflags do not have | ||
656 | * XFS_OQUOTA_* flags, whereas the on-disk version | ||
657 | * does. So, convert incore XFS_{PG}QUOTA_* flags | ||
658 | * to on-disk XFS_OQUOTA_* flags. | ||
659 | */ | ||
660 | qflags &= ~(XFS_PQUOTA_ENFD | XFS_PQUOTA_CHKD | | ||
661 | XFS_GQUOTA_ENFD | XFS_GQUOTA_CHKD); | ||
662 | |||
663 | if (from->sb_qflags & | ||
664 | (XFS_PQUOTA_ENFD | XFS_GQUOTA_ENFD)) | ||
665 | qflags |= XFS_OQUOTA_ENFD; | ||
666 | if (from->sb_qflags & | ||
667 | (XFS_PQUOTA_CHKD | XFS_GQUOTA_CHKD)) | ||
668 | qflags |= XFS_OQUOTA_CHKD; | ||
669 | to->sb_qflags = cpu_to_be16(qflags); | ||
670 | *fields &= ~XFS_SB_QFLAGS; | ||
671 | } | ||
672 | } | ||
673 | |||
625 | /* | 674 | /* |
626 | * Copy in core superblock to ondisk one. | 675 | * Copy in core superblock to ondisk one. |
627 | * | 676 | * |
@@ -643,6 +692,7 @@ xfs_sb_to_disk( | |||
643 | if (!fields) | 692 | if (!fields) |
644 | return; | 693 | return; |
645 | 694 | ||
695 | xfs_sb_quota_to_disk(to, from, &fields); | ||
646 | while (fields) { | 696 | while (fields) { |
647 | f = (xfs_sb_field_t)xfs_lowbit64((__uint64_t)fields); | 697 | f = (xfs_sb_field_t)xfs_lowbit64((__uint64_t)fields); |
648 | first = xfs_sb_info[f].offset; | 698 | first = xfs_sb_info[f].offset; |
@@ -835,6 +885,7 @@ reread: | |||
835 | */ | 885 | */ |
836 | xfs_sb_from_disk(&mp->m_sb, XFS_BUF_TO_SBP(bp)); | 886 | xfs_sb_from_disk(&mp->m_sb, XFS_BUF_TO_SBP(bp)); |
837 | 887 | ||
888 | xfs_sb_quota_from_disk(&mp->m_sb); | ||
838 | /* | 889 | /* |
839 | * We must be able to do sector-sized and sector-aligned IO. | 890 | * We must be able to do sector-sized and sector-aligned IO. |
840 | */ | 891 | */ |
@@ -987,42 +1038,27 @@ xfs_update_alignment(xfs_mount_t *mp) | |||
987 | */ | 1038 | */ |
988 | if ((BBTOB(mp->m_dalign) & mp->m_blockmask) || | 1039 | if ((BBTOB(mp->m_dalign) & mp->m_blockmask) || |
989 | (BBTOB(mp->m_swidth) & mp->m_blockmask)) { | 1040 | (BBTOB(mp->m_swidth) & mp->m_blockmask)) { |
990 | if (mp->m_flags & XFS_MOUNT_RETERR) { | 1041 | xfs_warn(mp, |
991 | xfs_warn(mp, "alignment check failed: " | 1042 | "alignment check failed: sunit/swidth vs. blocksize(%d)", |
992 | "(sunit/swidth vs. blocksize)"); | 1043 | sbp->sb_blocksize); |
993 | return XFS_ERROR(EINVAL); | 1044 | return XFS_ERROR(EINVAL); |
994 | } | ||
995 | mp->m_dalign = mp->m_swidth = 0; | ||
996 | } else { | 1045 | } else { |
997 | /* | 1046 | /* |
998 | * Convert the stripe unit and width to FSBs. | 1047 | * Convert the stripe unit and width to FSBs. |
999 | */ | 1048 | */ |
1000 | mp->m_dalign = XFS_BB_TO_FSBT(mp, mp->m_dalign); | 1049 | mp->m_dalign = XFS_BB_TO_FSBT(mp, mp->m_dalign); |
1001 | if (mp->m_dalign && (sbp->sb_agblocks % mp->m_dalign)) { | 1050 | if (mp->m_dalign && (sbp->sb_agblocks % mp->m_dalign)) { |
1002 | if (mp->m_flags & XFS_MOUNT_RETERR) { | ||
1003 | xfs_warn(mp, "alignment check failed: " | ||
1004 | "(sunit/swidth vs. ag size)"); | ||
1005 | return XFS_ERROR(EINVAL); | ||
1006 | } | ||
1007 | xfs_warn(mp, | 1051 | xfs_warn(mp, |
1008 | "stripe alignment turned off: sunit(%d)/swidth(%d) " | 1052 | "alignment check failed: sunit/swidth vs. agsize(%d)", |
1009 | "incompatible with agsize(%d)", | 1053 | sbp->sb_agblocks); |
1010 | mp->m_dalign, mp->m_swidth, | 1054 | return XFS_ERROR(EINVAL); |
1011 | sbp->sb_agblocks); | ||
1012 | |||
1013 | mp->m_dalign = 0; | ||
1014 | mp->m_swidth = 0; | ||
1015 | } else if (mp->m_dalign) { | 1055 | } else if (mp->m_dalign) { |
1016 | mp->m_swidth = XFS_BB_TO_FSBT(mp, mp->m_swidth); | 1056 | mp->m_swidth = XFS_BB_TO_FSBT(mp, mp->m_swidth); |
1017 | } else { | 1057 | } else { |
1018 | if (mp->m_flags & XFS_MOUNT_RETERR) { | 1058 | xfs_warn(mp, |
1019 | xfs_warn(mp, "alignment check failed: " | 1059 | "alignment check failed: sunit(%d) less than bsize(%d)", |
1020 | "sunit(%d) less than bsize(%d)", | 1060 | mp->m_dalign, sbp->sb_blocksize); |
1021 | mp->m_dalign, | 1061 | return XFS_ERROR(EINVAL); |
1022 | mp->m_blockmask +1); | ||
1023 | return XFS_ERROR(EINVAL); | ||
1024 | } | ||
1025 | mp->m_swidth = 0; | ||
1026 | } | 1062 | } |
1027 | } | 1063 | } |
1028 | 1064 | ||
@@ -1039,6 +1075,10 @@ xfs_update_alignment(xfs_mount_t *mp) | |||
1039 | sbp->sb_width = mp->m_swidth; | 1075 | sbp->sb_width = mp->m_swidth; |
1040 | mp->m_update_flags |= XFS_SB_WIDTH; | 1076 | mp->m_update_flags |= XFS_SB_WIDTH; |
1041 | } | 1077 | } |
1078 | } else { | ||
1079 | xfs_warn(mp, | ||
1080 | "cannot change alignment: superblock does not support data alignment"); | ||
1081 | return XFS_ERROR(EINVAL); | ||
1042 | } | 1082 | } |
1043 | } else if ((mp->m_flags & XFS_MOUNT_NOALIGN) != XFS_MOUNT_NOALIGN && | 1083 | } else if ((mp->m_flags & XFS_MOUNT_NOALIGN) != XFS_MOUNT_NOALIGN && |
1044 | xfs_sb_version_hasdalign(&mp->m_sb)) { | 1084 | xfs_sb_version_hasdalign(&mp->m_sb)) { |
diff --git a/fs/xfs/xfs_mount.h b/fs/xfs/xfs_mount.h index b004cecdfb04..4e374d4a9189 100644 --- a/fs/xfs/xfs_mount.h +++ b/fs/xfs/xfs_mount.h | |||
@@ -192,8 +192,6 @@ typedef struct xfs_mount { | |||
192 | xfs_dablk_t m_dirleafblk; /* blockno of dir non-data v2 */ | 192 | xfs_dablk_t m_dirleafblk; /* blockno of dir non-data v2 */ |
193 | xfs_dablk_t m_dirfreeblk; /* blockno of dirfreeindex v2 */ | 193 | xfs_dablk_t m_dirfreeblk; /* blockno of dirfreeindex v2 */ |
194 | uint m_chsize; /* size of next field */ | 194 | uint m_chsize; /* size of next field */ |
195 | struct xfs_chash *m_chash; /* fs private inode per-cluster | ||
196 | * hash table */ | ||
197 | atomic_t m_active_trans; /* number trans frozen */ | 195 | atomic_t m_active_trans; /* number trans frozen */ |
198 | #ifdef HAVE_PERCPU_SB | 196 | #ifdef HAVE_PERCPU_SB |
199 | xfs_icsb_cnts_t __percpu *m_sb_cnts; /* per-cpu superblock counters */ | 197 | xfs_icsb_cnts_t __percpu *m_sb_cnts; /* per-cpu superblock counters */ |
@@ -229,8 +227,6 @@ typedef struct xfs_mount { | |||
229 | operations, typically for | 227 | operations, typically for |
230 | disk errors in metadata */ | 228 | disk errors in metadata */ |
231 | #define XFS_MOUNT_DISCARD (1ULL << 5) /* discard unused blocks */ | 229 | #define XFS_MOUNT_DISCARD (1ULL << 5) /* discard unused blocks */ |
232 | #define XFS_MOUNT_RETERR (1ULL << 6) /* return alignment errors to | ||
233 | user */ | ||
234 | #define XFS_MOUNT_NOALIGN (1ULL << 7) /* turn off stripe alignment | 230 | #define XFS_MOUNT_NOALIGN (1ULL << 7) /* turn off stripe alignment |
235 | allocations */ | 231 | allocations */ |
236 | #define XFS_MOUNT_ATTR2 (1ULL << 8) /* allow use of attr2 format */ | 232 | #define XFS_MOUNT_ATTR2 (1ULL << 8) /* allow use of attr2 format */ |
diff --git a/fs/xfs/xfs_qm.c b/fs/xfs/xfs_qm.c index b75c9bb6e71e..d320794d03ce 100644 --- a/fs/xfs/xfs_qm.c +++ b/fs/xfs/xfs_qm.c | |||
@@ -70,7 +70,7 @@ xfs_qm_dquot_walk( | |||
70 | void *data) | 70 | void *data) |
71 | { | 71 | { |
72 | struct xfs_quotainfo *qi = mp->m_quotainfo; | 72 | struct xfs_quotainfo *qi = mp->m_quotainfo; |
73 | struct radix_tree_root *tree = XFS_DQUOT_TREE(qi, type); | 73 | struct radix_tree_root *tree = xfs_dquot_tree(qi, type); |
74 | uint32_t next_index; | 74 | uint32_t next_index; |
75 | int last_error = 0; | 75 | int last_error = 0; |
76 | int skipped; | 76 | int skipped; |
@@ -137,6 +137,7 @@ xfs_qm_dqpurge( | |||
137 | struct xfs_mount *mp = dqp->q_mount; | 137 | struct xfs_mount *mp = dqp->q_mount; |
138 | struct xfs_quotainfo *qi = mp->m_quotainfo; | 138 | struct xfs_quotainfo *qi = mp->m_quotainfo; |
139 | struct xfs_dquot *gdqp = NULL; | 139 | struct xfs_dquot *gdqp = NULL; |
140 | struct xfs_dquot *pdqp = NULL; | ||
140 | 141 | ||
141 | xfs_dqlock(dqp); | 142 | xfs_dqlock(dqp); |
142 | if ((dqp->dq_flags & XFS_DQ_FREEING) || dqp->q_nrefs != 0) { | 143 | if ((dqp->dq_flags & XFS_DQ_FREEING) || dqp->q_nrefs != 0) { |
@@ -145,8 +146,7 @@ xfs_qm_dqpurge( | |||
145 | } | 146 | } |
146 | 147 | ||
147 | /* | 148 | /* |
148 | * If this quota has a group hint attached, prepare for releasing it | 149 | * If this quota has a hint attached, prepare for releasing it now. |
149 | * now. | ||
150 | */ | 150 | */ |
151 | gdqp = dqp->q_gdquot; | 151 | gdqp = dqp->q_gdquot; |
152 | if (gdqp) { | 152 | if (gdqp) { |
@@ -154,6 +154,12 @@ xfs_qm_dqpurge( | |||
154 | dqp->q_gdquot = NULL; | 154 | dqp->q_gdquot = NULL; |
155 | } | 155 | } |
156 | 156 | ||
157 | pdqp = dqp->q_pdquot; | ||
158 | if (pdqp) { | ||
159 | xfs_dqlock(pdqp); | ||
160 | dqp->q_pdquot = NULL; | ||
161 | } | ||
162 | |||
157 | dqp->dq_flags |= XFS_DQ_FREEING; | 163 | dqp->dq_flags |= XFS_DQ_FREEING; |
158 | 164 | ||
159 | xfs_dqflock(dqp); | 165 | xfs_dqflock(dqp); |
@@ -189,7 +195,7 @@ xfs_qm_dqpurge( | |||
189 | xfs_dqfunlock(dqp); | 195 | xfs_dqfunlock(dqp); |
190 | xfs_dqunlock(dqp); | 196 | xfs_dqunlock(dqp); |
191 | 197 | ||
192 | radix_tree_delete(XFS_DQUOT_TREE(qi, dqp->q_core.d_flags), | 198 | radix_tree_delete(xfs_dquot_tree(qi, dqp->q_core.d_flags), |
193 | be32_to_cpu(dqp->q_core.d_id)); | 199 | be32_to_cpu(dqp->q_core.d_id)); |
194 | qi->qi_dquots--; | 200 | qi->qi_dquots--; |
195 | 201 | ||
@@ -208,6 +214,8 @@ xfs_qm_dqpurge( | |||
208 | 214 | ||
209 | if (gdqp) | 215 | if (gdqp) |
210 | xfs_qm_dqput(gdqp); | 216 | xfs_qm_dqput(gdqp); |
217 | if (pdqp) | ||
218 | xfs_qm_dqput(pdqp); | ||
211 | return 0; | 219 | return 0; |
212 | } | 220 | } |
213 | 221 | ||
@@ -299,8 +307,10 @@ xfs_qm_mount_quotas( | |||
299 | */ | 307 | */ |
300 | if (!XFS_IS_UQUOTA_ON(mp)) | 308 | if (!XFS_IS_UQUOTA_ON(mp)) |
301 | mp->m_qflags &= ~XFS_UQUOTA_CHKD; | 309 | mp->m_qflags &= ~XFS_UQUOTA_CHKD; |
302 | if (!(XFS_IS_GQUOTA_ON(mp) || XFS_IS_PQUOTA_ON(mp))) | 310 | if (!XFS_IS_GQUOTA_ON(mp)) |
303 | mp->m_qflags &= ~XFS_OQUOTA_CHKD; | 311 | mp->m_qflags &= ~XFS_GQUOTA_CHKD; |
312 | if (!XFS_IS_PQUOTA_ON(mp)) | ||
313 | mp->m_qflags &= ~XFS_PQUOTA_CHKD; | ||
304 | 314 | ||
305 | write_changes: | 315 | write_changes: |
306 | /* | 316 | /* |
@@ -362,6 +372,10 @@ xfs_qm_unmount_quotas( | |||
362 | IRELE(mp->m_quotainfo->qi_gquotaip); | 372 | IRELE(mp->m_quotainfo->qi_gquotaip); |
363 | mp->m_quotainfo->qi_gquotaip = NULL; | 373 | mp->m_quotainfo->qi_gquotaip = NULL; |
364 | } | 374 | } |
375 | if (mp->m_quotainfo->qi_pquotaip) { | ||
376 | IRELE(mp->m_quotainfo->qi_pquotaip); | ||
377 | mp->m_quotainfo->qi_pquotaip = NULL; | ||
378 | } | ||
365 | } | 379 | } |
366 | } | 380 | } |
367 | 381 | ||
@@ -408,7 +422,10 @@ xfs_qm_dqattach_one( | |||
408 | * be reclaimed as long as we have a ref from inode and we | 422 | * be reclaimed as long as we have a ref from inode and we |
409 | * hold the ilock. | 423 | * hold the ilock. |
410 | */ | 424 | */ |
411 | dqp = udqhint->q_gdquot; | 425 | if (type == XFS_DQ_GROUP) |
426 | dqp = udqhint->q_gdquot; | ||
427 | else | ||
428 | dqp = udqhint->q_pdquot; | ||
412 | if (dqp && be32_to_cpu(dqp->q_core.d_id) == id) { | 429 | if (dqp && be32_to_cpu(dqp->q_core.d_id) == id) { |
413 | ASSERT(*IO_idqpp == NULL); | 430 | ASSERT(*IO_idqpp == NULL); |
414 | 431 | ||
@@ -451,28 +468,42 @@ xfs_qm_dqattach_one( | |||
451 | 468 | ||
452 | 469 | ||
453 | /* | 470 | /* |
454 | * Given a udquot and gdquot, attach a ptr to the group dquot in the | 471 | * Given a udquot and group/project type, attach the group/project |
455 | * udquot as a hint for future lookups. | 472 | * dquot pointer to the udquot as a hint for future lookups. |
456 | */ | 473 | */ |
457 | STATIC void | 474 | STATIC void |
458 | xfs_qm_dqattach_grouphint( | 475 | xfs_qm_dqattach_hint( |
459 | xfs_dquot_t *udq, | 476 | struct xfs_inode *ip, |
460 | xfs_dquot_t *gdq) | 477 | int type) |
461 | { | 478 | { |
462 | xfs_dquot_t *tmp; | 479 | struct xfs_dquot **dqhintp; |
480 | struct xfs_dquot *dqp; | ||
481 | struct xfs_dquot *udq = ip->i_udquot; | ||
482 | |||
483 | ASSERT(type == XFS_DQ_GROUP || type == XFS_DQ_PROJ); | ||
463 | 484 | ||
464 | xfs_dqlock(udq); | 485 | xfs_dqlock(udq); |
465 | 486 | ||
466 | tmp = udq->q_gdquot; | 487 | if (type == XFS_DQ_GROUP) { |
467 | if (tmp) { | 488 | dqp = ip->i_gdquot; |
468 | if (tmp == gdq) | 489 | dqhintp = &udq->q_gdquot; |
490 | } else { | ||
491 | dqp = ip->i_pdquot; | ||
492 | dqhintp = &udq->q_pdquot; | ||
493 | } | ||
494 | |||
495 | if (*dqhintp) { | ||
496 | struct xfs_dquot *tmp; | ||
497 | |||
498 | if (*dqhintp == dqp) | ||
469 | goto done; | 499 | goto done; |
470 | 500 | ||
471 | udq->q_gdquot = NULL; | 501 | tmp = *dqhintp; |
502 | *dqhintp = NULL; | ||
472 | xfs_qm_dqrele(tmp); | 503 | xfs_qm_dqrele(tmp); |
473 | } | 504 | } |
474 | 505 | ||
475 | udq->q_gdquot = xfs_qm_dqhold(gdq); | 506 | *dqhintp = xfs_qm_dqhold(dqp); |
476 | done: | 507 | done: |
477 | xfs_dqunlock(udq); | 508 | xfs_dqunlock(udq); |
478 | } | 509 | } |
@@ -489,8 +520,7 @@ xfs_qm_need_dqattach( | |||
489 | return false; | 520 | return false; |
490 | if (!XFS_NOT_DQATTACHED(mp, ip)) | 521 | if (!XFS_NOT_DQATTACHED(mp, ip)) |
491 | return false; | 522 | return false; |
492 | if (ip->i_ino == mp->m_sb.sb_uquotino || | 523 | if (xfs_is_quota_inode(&mp->m_sb, ip->i_ino)) |
493 | ip->i_ino == mp->m_sb.sb_gquotino) | ||
494 | return false; | 524 | return false; |
495 | return true; | 525 | return true; |
496 | } | 526 | } |
@@ -526,12 +556,8 @@ xfs_qm_dqattach_locked( | |||
526 | } | 556 | } |
527 | 557 | ||
528 | ASSERT(xfs_isilocked(ip, XFS_ILOCK_EXCL)); | 558 | ASSERT(xfs_isilocked(ip, XFS_ILOCK_EXCL)); |
529 | if (XFS_IS_OQUOTA_ON(mp)) { | 559 | if (XFS_IS_GQUOTA_ON(mp)) { |
530 | error = XFS_IS_GQUOTA_ON(mp) ? | 560 | error = xfs_qm_dqattach_one(ip, ip->i_d.di_gid, XFS_DQ_GROUP, |
531 | xfs_qm_dqattach_one(ip, ip->i_d.di_gid, XFS_DQ_GROUP, | ||
532 | flags & XFS_QMOPT_DQALLOC, | ||
533 | ip->i_udquot, &ip->i_gdquot) : | ||
534 | xfs_qm_dqattach_one(ip, xfs_get_projid(ip), XFS_DQ_PROJ, | ||
535 | flags & XFS_QMOPT_DQALLOC, | 561 | flags & XFS_QMOPT_DQALLOC, |
536 | ip->i_udquot, &ip->i_gdquot); | 562 | ip->i_udquot, &ip->i_gdquot); |
537 | /* | 563 | /* |
@@ -543,14 +569,28 @@ xfs_qm_dqattach_locked( | |||
543 | nquotas++; | 569 | nquotas++; |
544 | } | 570 | } |
545 | 571 | ||
572 | ASSERT(xfs_isilocked(ip, XFS_ILOCK_EXCL)); | ||
573 | if (XFS_IS_PQUOTA_ON(mp)) { | ||
574 | error = xfs_qm_dqattach_one(ip, xfs_get_projid(ip), XFS_DQ_PROJ, | ||
575 | flags & XFS_QMOPT_DQALLOC, | ||
576 | ip->i_udquot, &ip->i_pdquot); | ||
577 | /* | ||
578 | * Don't worry about the udquot that we may have | ||
579 | * attached above. It'll get detached, if not already. | ||
580 | */ | ||
581 | if (error) | ||
582 | goto done; | ||
583 | nquotas++; | ||
584 | } | ||
585 | |||
546 | /* | 586 | /* |
547 | * Attach this group quota to the user quota as a hint. | 587 | * Attach this group/project quota to the user quota as a hint. |
548 | * This WON'T, in general, result in a thrash. | 588 | * This WON'T, in general, result in a thrash. |
549 | */ | 589 | */ |
550 | if (nquotas == 2) { | 590 | if (nquotas > 1 && ip->i_udquot) { |
551 | ASSERT(xfs_isilocked(ip, XFS_ILOCK_EXCL)); | 591 | ASSERT(xfs_isilocked(ip, XFS_ILOCK_EXCL)); |
552 | ASSERT(ip->i_udquot); | 592 | ASSERT(ip->i_gdquot || !XFS_IS_GQUOTA_ON(mp)); |
553 | ASSERT(ip->i_gdquot); | 593 | ASSERT(ip->i_pdquot || !XFS_IS_PQUOTA_ON(mp)); |
554 | 594 | ||
555 | /* | 595 | /* |
556 | * We do not have i_udquot locked at this point, but this check | 596 | * We do not have i_udquot locked at this point, but this check |
@@ -559,7 +599,10 @@ xfs_qm_dqattach_locked( | |||
559 | * succeed in general. | 599 | * succeed in general. |
560 | */ | 600 | */ |
561 | if (ip->i_udquot->q_gdquot != ip->i_gdquot) | 601 | if (ip->i_udquot->q_gdquot != ip->i_gdquot) |
562 | xfs_qm_dqattach_grouphint(ip->i_udquot, ip->i_gdquot); | 602 | xfs_qm_dqattach_hint(ip, XFS_DQ_GROUP); |
603 | |||
604 | if (ip->i_udquot->q_pdquot != ip->i_pdquot) | ||
605 | xfs_qm_dqattach_hint(ip, XFS_DQ_PROJ); | ||
563 | } | 606 | } |
564 | 607 | ||
565 | done: | 608 | done: |
@@ -567,8 +610,10 @@ xfs_qm_dqattach_locked( | |||
567 | if (!error) { | 610 | if (!error) { |
568 | if (XFS_IS_UQUOTA_ON(mp)) | 611 | if (XFS_IS_UQUOTA_ON(mp)) |
569 | ASSERT(ip->i_udquot); | 612 | ASSERT(ip->i_udquot); |
570 | if (XFS_IS_OQUOTA_ON(mp)) | 613 | if (XFS_IS_GQUOTA_ON(mp)) |
571 | ASSERT(ip->i_gdquot); | 614 | ASSERT(ip->i_gdquot); |
615 | if (XFS_IS_PQUOTA_ON(mp)) | ||
616 | ASSERT(ip->i_pdquot); | ||
572 | } | 617 | } |
573 | ASSERT(xfs_isilocked(ip, XFS_ILOCK_EXCL)); | 618 | ASSERT(xfs_isilocked(ip, XFS_ILOCK_EXCL)); |
574 | #endif | 619 | #endif |
@@ -601,13 +646,12 @@ void | |||
601 | xfs_qm_dqdetach( | 646 | xfs_qm_dqdetach( |
602 | xfs_inode_t *ip) | 647 | xfs_inode_t *ip) |
603 | { | 648 | { |
604 | if (!(ip->i_udquot || ip->i_gdquot)) | 649 | if (!(ip->i_udquot || ip->i_gdquot || ip->i_pdquot)) |
605 | return; | 650 | return; |
606 | 651 | ||
607 | trace_xfs_dquot_dqdetach(ip); | 652 | trace_xfs_dquot_dqdetach(ip); |
608 | 653 | ||
609 | ASSERT(ip->i_ino != ip->i_mount->m_sb.sb_uquotino); | 654 | ASSERT(!xfs_is_quota_inode(&ip->i_mount->m_sb, ip->i_ino)); |
610 | ASSERT(ip->i_ino != ip->i_mount->m_sb.sb_gquotino); | ||
611 | if (ip->i_udquot) { | 655 | if (ip->i_udquot) { |
612 | xfs_qm_dqrele(ip->i_udquot); | 656 | xfs_qm_dqrele(ip->i_udquot); |
613 | ip->i_udquot = NULL; | 657 | ip->i_udquot = NULL; |
@@ -616,6 +660,10 @@ xfs_qm_dqdetach( | |||
616 | xfs_qm_dqrele(ip->i_gdquot); | 660 | xfs_qm_dqrele(ip->i_gdquot); |
617 | ip->i_gdquot = NULL; | 661 | ip->i_gdquot = NULL; |
618 | } | 662 | } |
663 | if (ip->i_pdquot) { | ||
664 | xfs_qm_dqrele(ip->i_pdquot); | ||
665 | ip->i_pdquot = NULL; | ||
666 | } | ||
619 | } | 667 | } |
620 | 668 | ||
621 | int | 669 | int |
@@ -660,6 +708,7 @@ xfs_qm_init_quotainfo( | |||
660 | 708 | ||
661 | INIT_RADIX_TREE(&qinf->qi_uquota_tree, GFP_NOFS); | 709 | INIT_RADIX_TREE(&qinf->qi_uquota_tree, GFP_NOFS); |
662 | INIT_RADIX_TREE(&qinf->qi_gquota_tree, GFP_NOFS); | 710 | INIT_RADIX_TREE(&qinf->qi_gquota_tree, GFP_NOFS); |
711 | INIT_RADIX_TREE(&qinf->qi_pquota_tree, GFP_NOFS); | ||
663 | mutex_init(&qinf->qi_tree_lock); | 712 | mutex_init(&qinf->qi_tree_lock); |
664 | 713 | ||
665 | INIT_LIST_HEAD(&qinf->qi_lru_list); | 714 | INIT_LIST_HEAD(&qinf->qi_lru_list); |
@@ -761,6 +810,10 @@ xfs_qm_destroy_quotainfo( | |||
761 | IRELE(qi->qi_gquotaip); | 810 | IRELE(qi->qi_gquotaip); |
762 | qi->qi_gquotaip = NULL; | 811 | qi->qi_gquotaip = NULL; |
763 | } | 812 | } |
813 | if (qi->qi_pquotaip) { | ||
814 | IRELE(qi->qi_pquotaip); | ||
815 | qi->qi_pquotaip = NULL; | ||
816 | } | ||
764 | mutex_destroy(&qi->qi_quotaofflock); | 817 | mutex_destroy(&qi->qi_quotaofflock); |
765 | kmem_free(qi); | 818 | kmem_free(qi); |
766 | mp->m_quotainfo = NULL; | 819 | mp->m_quotainfo = NULL; |
@@ -1152,7 +1205,7 @@ xfs_qm_dqusage_adjust( | |||
1152 | * rootino must have its resources accounted for, not so with the quota | 1205 | * rootino must have its resources accounted for, not so with the quota |
1153 | * inodes. | 1206 | * inodes. |
1154 | */ | 1207 | */ |
1155 | if (ino == mp->m_sb.sb_uquotino || ino == mp->m_sb.sb_gquotino) { | 1208 | if (xfs_is_quota_inode(&mp->m_sb, ino)) { |
1156 | *res = BULKSTAT_RV_NOTHING; | 1209 | *res = BULKSTAT_RV_NOTHING; |
1157 | return XFS_ERROR(EINVAL); | 1210 | return XFS_ERROR(EINVAL); |
1158 | } | 1211 | } |
@@ -1262,19 +1315,21 @@ int | |||
1262 | xfs_qm_quotacheck( | 1315 | xfs_qm_quotacheck( |
1263 | xfs_mount_t *mp) | 1316 | xfs_mount_t *mp) |
1264 | { | 1317 | { |
1265 | int done, count, error, error2; | 1318 | int done, count, error, error2; |
1266 | xfs_ino_t lastino; | 1319 | xfs_ino_t lastino; |
1267 | size_t structsz; | 1320 | size_t structsz; |
1268 | xfs_inode_t *uip, *gip; | 1321 | uint flags; |
1269 | uint flags; | 1322 | LIST_HEAD (buffer_list); |
1270 | LIST_HEAD (buffer_list); | 1323 | struct xfs_inode *uip = mp->m_quotainfo->qi_uquotaip; |
1324 | struct xfs_inode *gip = mp->m_quotainfo->qi_gquotaip; | ||
1325 | struct xfs_inode *pip = mp->m_quotainfo->qi_pquotaip; | ||
1271 | 1326 | ||
1272 | count = INT_MAX; | 1327 | count = INT_MAX; |
1273 | structsz = 1; | 1328 | structsz = 1; |
1274 | lastino = 0; | 1329 | lastino = 0; |
1275 | flags = 0; | 1330 | flags = 0; |
1276 | 1331 | ||
1277 | ASSERT(mp->m_quotainfo->qi_uquotaip || mp->m_quotainfo->qi_gquotaip); | 1332 | ASSERT(uip || gip || pip); |
1278 | ASSERT(XFS_IS_QUOTA_RUNNING(mp)); | 1333 | ASSERT(XFS_IS_QUOTA_RUNNING(mp)); |
1279 | 1334 | ||
1280 | xfs_notice(mp, "Quotacheck needed: Please wait."); | 1335 | xfs_notice(mp, "Quotacheck needed: Please wait."); |
@@ -1284,7 +1339,6 @@ xfs_qm_quotacheck( | |||
1284 | * their counters to zero. We need a clean slate. | 1339 | * their counters to zero. We need a clean slate. |
1285 | * We don't log our changes till later. | 1340 | * We don't log our changes till later. |
1286 | */ | 1341 | */ |
1287 | uip = mp->m_quotainfo->qi_uquotaip; | ||
1288 | if (uip) { | 1342 | if (uip) { |
1289 | error = xfs_qm_dqiterate(mp, uip, XFS_QMOPT_UQUOTA, | 1343 | error = xfs_qm_dqiterate(mp, uip, XFS_QMOPT_UQUOTA, |
1290 | &buffer_list); | 1344 | &buffer_list); |
@@ -1293,14 +1347,20 @@ xfs_qm_quotacheck( | |||
1293 | flags |= XFS_UQUOTA_CHKD; | 1347 | flags |= XFS_UQUOTA_CHKD; |
1294 | } | 1348 | } |
1295 | 1349 | ||
1296 | gip = mp->m_quotainfo->qi_gquotaip; | ||
1297 | if (gip) { | 1350 | if (gip) { |
1298 | error = xfs_qm_dqiterate(mp, gip, XFS_IS_GQUOTA_ON(mp) ? | 1351 | error = xfs_qm_dqiterate(mp, gip, XFS_QMOPT_GQUOTA, |
1299 | XFS_QMOPT_GQUOTA : XFS_QMOPT_PQUOTA, | ||
1300 | &buffer_list); | 1352 | &buffer_list); |
1301 | if (error) | 1353 | if (error) |
1302 | goto error_return; | 1354 | goto error_return; |
1303 | flags |= XFS_OQUOTA_CHKD; | 1355 | flags |= XFS_GQUOTA_CHKD; |
1356 | } | ||
1357 | |||
1358 | if (pip) { | ||
1359 | error = xfs_qm_dqiterate(mp, pip, XFS_QMOPT_PQUOTA, | ||
1360 | &buffer_list); | ||
1361 | if (error) | ||
1362 | goto error_return; | ||
1363 | flags |= XFS_PQUOTA_CHKD; | ||
1304 | } | 1364 | } |
1305 | 1365 | ||
1306 | do { | 1366 | do { |
@@ -1395,15 +1455,14 @@ STATIC int | |||
1395 | xfs_qm_init_quotainos( | 1455 | xfs_qm_init_quotainos( |
1396 | xfs_mount_t *mp) | 1456 | xfs_mount_t *mp) |
1397 | { | 1457 | { |
1398 | xfs_inode_t *uip, *gip; | 1458 | struct xfs_inode *uip = NULL; |
1399 | int error; | 1459 | struct xfs_inode *gip = NULL; |
1400 | __int64_t sbflags; | 1460 | struct xfs_inode *pip = NULL; |
1401 | uint flags; | 1461 | int error; |
1462 | __int64_t sbflags = 0; | ||
1463 | uint flags = 0; | ||
1402 | 1464 | ||
1403 | ASSERT(mp->m_quotainfo); | 1465 | ASSERT(mp->m_quotainfo); |
1404 | uip = gip = NULL; | ||
1405 | sbflags = 0; | ||
1406 | flags = 0; | ||
1407 | 1466 | ||
1408 | /* | 1467 | /* |
1409 | * Get the uquota and gquota inodes | 1468 | * Get the uquota and gquota inodes |
@@ -1412,19 +1471,27 @@ xfs_qm_init_quotainos( | |||
1412 | if (XFS_IS_UQUOTA_ON(mp) && | 1471 | if (XFS_IS_UQUOTA_ON(mp) && |
1413 | mp->m_sb.sb_uquotino != NULLFSINO) { | 1472 | mp->m_sb.sb_uquotino != NULLFSINO) { |
1414 | ASSERT(mp->m_sb.sb_uquotino > 0); | 1473 | ASSERT(mp->m_sb.sb_uquotino > 0); |
1415 | if ((error = xfs_iget(mp, NULL, mp->m_sb.sb_uquotino, | 1474 | error = xfs_iget(mp, NULL, mp->m_sb.sb_uquotino, |
1416 | 0, 0, &uip))) | 1475 | 0, 0, &uip); |
1476 | if (error) | ||
1417 | return XFS_ERROR(error); | 1477 | return XFS_ERROR(error); |
1418 | } | 1478 | } |
1419 | if (XFS_IS_OQUOTA_ON(mp) && | 1479 | if (XFS_IS_GQUOTA_ON(mp) && |
1420 | mp->m_sb.sb_gquotino != NULLFSINO) { | 1480 | mp->m_sb.sb_gquotino != NULLFSINO) { |
1421 | ASSERT(mp->m_sb.sb_gquotino > 0); | 1481 | ASSERT(mp->m_sb.sb_gquotino > 0); |
1422 | if ((error = xfs_iget(mp, NULL, mp->m_sb.sb_gquotino, | 1482 | error = xfs_iget(mp, NULL, mp->m_sb.sb_gquotino, |
1423 | 0, 0, &gip))) { | 1483 | 0, 0, &gip); |
1424 | if (uip) | 1484 | if (error) |
1425 | IRELE(uip); | 1485 | goto error_rele; |
1426 | return XFS_ERROR(error); | 1486 | } |
1427 | } | 1487 | /* XXX: Use gquotino for now */ |
1488 | if (XFS_IS_PQUOTA_ON(mp) && | ||
1489 | mp->m_sb.sb_gquotino != NULLFSINO) { | ||
1490 | ASSERT(mp->m_sb.sb_gquotino > 0); | ||
1491 | error = xfs_iget(mp, NULL, mp->m_sb.sb_gquotino, | ||
1492 | 0, 0, &pip); | ||
1493 | if (error) | ||
1494 | goto error_rele; | ||
1428 | } | 1495 | } |
1429 | } else { | 1496 | } else { |
1430 | flags |= XFS_QMOPT_SBVERSION; | 1497 | flags |= XFS_QMOPT_SBVERSION; |
@@ -1433,36 +1500,52 @@ xfs_qm_init_quotainos( | |||
1433 | } | 1500 | } |
1434 | 1501 | ||
1435 | /* | 1502 | /* |
1436 | * Create the two inodes, if they don't exist already. The changes | 1503 | * Create the three inodes, if they don't exist already. The changes |
1437 | * made above will get added to a transaction and logged in one of | 1504 | * made above will get added to a transaction and logged in one of |
1438 | * the qino_alloc calls below. If the device is readonly, | 1505 | * the qino_alloc calls below. If the device is readonly, |
1439 | * temporarily switch to read-write to do this. | 1506 | * temporarily switch to read-write to do this. |
1440 | */ | 1507 | */ |
1441 | if (XFS_IS_UQUOTA_ON(mp) && uip == NULL) { | 1508 | if (XFS_IS_UQUOTA_ON(mp) && uip == NULL) { |
1442 | if ((error = xfs_qm_qino_alloc(mp, &uip, | 1509 | error = xfs_qm_qino_alloc(mp, &uip, |
1443 | sbflags | XFS_SB_UQUOTINO, | 1510 | sbflags | XFS_SB_UQUOTINO, |
1444 | flags | XFS_QMOPT_UQUOTA))) | 1511 | flags | XFS_QMOPT_UQUOTA); |
1445 | return XFS_ERROR(error); | 1512 | if (error) |
1513 | goto error_rele; | ||
1446 | 1514 | ||
1447 | flags &= ~XFS_QMOPT_SBVERSION; | 1515 | flags &= ~XFS_QMOPT_SBVERSION; |
1448 | } | 1516 | } |
1449 | if (XFS_IS_OQUOTA_ON(mp) && gip == NULL) { | 1517 | if (XFS_IS_GQUOTA_ON(mp) && gip == NULL) { |
1450 | flags |= (XFS_IS_GQUOTA_ON(mp) ? | ||
1451 | XFS_QMOPT_GQUOTA : XFS_QMOPT_PQUOTA); | ||
1452 | error = xfs_qm_qino_alloc(mp, &gip, | 1518 | error = xfs_qm_qino_alloc(mp, &gip, |
1453 | sbflags | XFS_SB_GQUOTINO, flags); | 1519 | sbflags | XFS_SB_GQUOTINO, |
1454 | if (error) { | 1520 | flags | XFS_QMOPT_GQUOTA); |
1455 | if (uip) | 1521 | if (error) |
1456 | IRELE(uip); | 1522 | goto error_rele; |
1457 | 1523 | ||
1458 | return XFS_ERROR(error); | 1524 | flags &= ~XFS_QMOPT_SBVERSION; |
1459 | } | 1525 | } |
1526 | if (XFS_IS_PQUOTA_ON(mp) && pip == NULL) { | ||
1527 | /* XXX: Use XFS_SB_GQUOTINO for now */ | ||
1528 | error = xfs_qm_qino_alloc(mp, &pip, | ||
1529 | sbflags | XFS_SB_GQUOTINO, | ||
1530 | flags | XFS_QMOPT_PQUOTA); | ||
1531 | if (error) | ||
1532 | goto error_rele; | ||
1460 | } | 1533 | } |
1461 | 1534 | ||
1462 | mp->m_quotainfo->qi_uquotaip = uip; | 1535 | mp->m_quotainfo->qi_uquotaip = uip; |
1463 | mp->m_quotainfo->qi_gquotaip = gip; | 1536 | mp->m_quotainfo->qi_gquotaip = gip; |
1537 | mp->m_quotainfo->qi_pquotaip = pip; | ||
1464 | 1538 | ||
1465 | return 0; | 1539 | return 0; |
1540 | |||
1541 | error_rele: | ||
1542 | if (uip) | ||
1543 | IRELE(uip); | ||
1544 | if (gip) | ||
1545 | IRELE(gip); | ||
1546 | if (pip) | ||
1547 | IRELE(pip); | ||
1548 | return XFS_ERROR(error); | ||
1466 | } | 1549 | } |
1467 | 1550 | ||
1468 | STATIC void | 1551 | STATIC void |
@@ -1473,7 +1556,7 @@ xfs_qm_dqfree_one( | |||
1473 | struct xfs_quotainfo *qi = mp->m_quotainfo; | 1556 | struct xfs_quotainfo *qi = mp->m_quotainfo; |
1474 | 1557 | ||
1475 | mutex_lock(&qi->qi_tree_lock); | 1558 | mutex_lock(&qi->qi_tree_lock); |
1476 | radix_tree_delete(XFS_DQUOT_TREE(qi, dqp->q_core.d_flags), | 1559 | radix_tree_delete(xfs_dquot_tree(qi, dqp->q_core.d_flags), |
1477 | be32_to_cpu(dqp->q_core.d_id)); | 1560 | be32_to_cpu(dqp->q_core.d_id)); |
1478 | 1561 | ||
1479 | qi->qi_dquots--; | 1562 | qi->qi_dquots--; |
@@ -1656,10 +1739,13 @@ xfs_qm_vop_dqalloc( | |||
1656 | prid_t prid, | 1739 | prid_t prid, |
1657 | uint flags, | 1740 | uint flags, |
1658 | struct xfs_dquot **O_udqpp, | 1741 | struct xfs_dquot **O_udqpp, |
1659 | struct xfs_dquot **O_gdqpp) | 1742 | struct xfs_dquot **O_gdqpp, |
1743 | struct xfs_dquot **O_pdqpp) | ||
1660 | { | 1744 | { |
1661 | struct xfs_mount *mp = ip->i_mount; | 1745 | struct xfs_mount *mp = ip->i_mount; |
1662 | struct xfs_dquot *uq, *gq; | 1746 | struct xfs_dquot *uq = NULL; |
1747 | struct xfs_dquot *gq = NULL; | ||
1748 | struct xfs_dquot *pq = NULL; | ||
1663 | int error; | 1749 | int error; |
1664 | uint lockflags; | 1750 | uint lockflags; |
1665 | 1751 | ||
@@ -1684,7 +1770,6 @@ xfs_qm_vop_dqalloc( | |||
1684 | } | 1770 | } |
1685 | } | 1771 | } |
1686 | 1772 | ||
1687 | uq = gq = NULL; | ||
1688 | if ((flags & XFS_QMOPT_UQUOTA) && XFS_IS_UQUOTA_ON(mp)) { | 1773 | if ((flags & XFS_QMOPT_UQUOTA) && XFS_IS_UQUOTA_ON(mp)) { |
1689 | if (ip->i_d.di_uid != uid) { | 1774 | if (ip->i_d.di_uid != uid) { |
1690 | /* | 1775 | /* |
@@ -1697,11 +1782,12 @@ xfs_qm_vop_dqalloc( | |||
1697 | * holding ilock. | 1782 | * holding ilock. |
1698 | */ | 1783 | */ |
1699 | xfs_iunlock(ip, lockflags); | 1784 | xfs_iunlock(ip, lockflags); |
1700 | if ((error = xfs_qm_dqget(mp, NULL, (xfs_dqid_t) uid, | 1785 | error = xfs_qm_dqget(mp, NULL, (xfs_dqid_t) uid, |
1701 | XFS_DQ_USER, | 1786 | XFS_DQ_USER, |
1702 | XFS_QMOPT_DQALLOC | | 1787 | XFS_QMOPT_DQALLOC | |
1703 | XFS_QMOPT_DOWARN, | 1788 | XFS_QMOPT_DOWARN, |
1704 | &uq))) { | 1789 | &uq); |
1790 | if (error) { | ||
1705 | ASSERT(error != ENOENT); | 1791 | ASSERT(error != ENOENT); |
1706 | return error; | 1792 | return error; |
1707 | } | 1793 | } |
@@ -1723,15 +1809,14 @@ xfs_qm_vop_dqalloc( | |||
1723 | if ((flags & XFS_QMOPT_GQUOTA) && XFS_IS_GQUOTA_ON(mp)) { | 1809 | if ((flags & XFS_QMOPT_GQUOTA) && XFS_IS_GQUOTA_ON(mp)) { |
1724 | if (ip->i_d.di_gid != gid) { | 1810 | if (ip->i_d.di_gid != gid) { |
1725 | xfs_iunlock(ip, lockflags); | 1811 | xfs_iunlock(ip, lockflags); |
1726 | if ((error = xfs_qm_dqget(mp, NULL, (xfs_dqid_t)gid, | 1812 | error = xfs_qm_dqget(mp, NULL, (xfs_dqid_t)gid, |
1727 | XFS_DQ_GROUP, | 1813 | XFS_DQ_GROUP, |
1728 | XFS_QMOPT_DQALLOC | | 1814 | XFS_QMOPT_DQALLOC | |
1729 | XFS_QMOPT_DOWARN, | 1815 | XFS_QMOPT_DOWARN, |
1730 | &gq))) { | 1816 | &gq); |
1731 | if (uq) | 1817 | if (error) { |
1732 | xfs_qm_dqrele(uq); | ||
1733 | ASSERT(error != ENOENT); | 1818 | ASSERT(error != ENOENT); |
1734 | return error; | 1819 | goto error_rele; |
1735 | } | 1820 | } |
1736 | xfs_dqunlock(gq); | 1821 | xfs_dqunlock(gq); |
1737 | lockflags = XFS_ILOCK_SHARED; | 1822 | lockflags = XFS_ILOCK_SHARED; |
@@ -1740,25 +1825,25 @@ xfs_qm_vop_dqalloc( | |||
1740 | ASSERT(ip->i_gdquot); | 1825 | ASSERT(ip->i_gdquot); |
1741 | gq = xfs_qm_dqhold(ip->i_gdquot); | 1826 | gq = xfs_qm_dqhold(ip->i_gdquot); |
1742 | } | 1827 | } |
1743 | } else if ((flags & XFS_QMOPT_PQUOTA) && XFS_IS_PQUOTA_ON(mp)) { | 1828 | } |
1829 | if ((flags & XFS_QMOPT_PQUOTA) && XFS_IS_PQUOTA_ON(mp)) { | ||
1744 | if (xfs_get_projid(ip) != prid) { | 1830 | if (xfs_get_projid(ip) != prid) { |
1745 | xfs_iunlock(ip, lockflags); | 1831 | xfs_iunlock(ip, lockflags); |
1746 | if ((error = xfs_qm_dqget(mp, NULL, (xfs_dqid_t)prid, | 1832 | error = xfs_qm_dqget(mp, NULL, (xfs_dqid_t)prid, |
1747 | XFS_DQ_PROJ, | 1833 | XFS_DQ_PROJ, |
1748 | XFS_QMOPT_DQALLOC | | 1834 | XFS_QMOPT_DQALLOC | |
1749 | XFS_QMOPT_DOWARN, | 1835 | XFS_QMOPT_DOWARN, |
1750 | &gq))) { | 1836 | &pq); |
1751 | if (uq) | 1837 | if (error) { |
1752 | xfs_qm_dqrele(uq); | ||
1753 | ASSERT(error != ENOENT); | 1838 | ASSERT(error != ENOENT); |
1754 | return (error); | 1839 | goto error_rele; |
1755 | } | 1840 | } |
1756 | xfs_dqunlock(gq); | 1841 | xfs_dqunlock(pq); |
1757 | lockflags = XFS_ILOCK_SHARED; | 1842 | lockflags = XFS_ILOCK_SHARED; |
1758 | xfs_ilock(ip, lockflags); | 1843 | xfs_ilock(ip, lockflags); |
1759 | } else { | 1844 | } else { |
1760 | ASSERT(ip->i_gdquot); | 1845 | ASSERT(ip->i_pdquot); |
1761 | gq = xfs_qm_dqhold(ip->i_gdquot); | 1846 | pq = xfs_qm_dqhold(ip->i_pdquot); |
1762 | } | 1847 | } |
1763 | } | 1848 | } |
1764 | if (uq) | 1849 | if (uq) |
@@ -1773,7 +1858,18 @@ xfs_qm_vop_dqalloc( | |||
1773 | *O_gdqpp = gq; | 1858 | *O_gdqpp = gq; |
1774 | else if (gq) | 1859 | else if (gq) |
1775 | xfs_qm_dqrele(gq); | 1860 | xfs_qm_dqrele(gq); |
1861 | if (O_pdqpp) | ||
1862 | *O_pdqpp = pq; | ||
1863 | else if (pq) | ||
1864 | xfs_qm_dqrele(pq); | ||
1776 | return 0; | 1865 | return 0; |
1866 | |||
1867 | error_rele: | ||
1868 | if (gq) | ||
1869 | xfs_qm_dqrele(gq); | ||
1870 | if (uq) | ||
1871 | xfs_qm_dqrele(uq); | ||
1872 | return error; | ||
1777 | } | 1873 | } |
1778 | 1874 | ||
1779 | /* | 1875 | /* |
@@ -1821,29 +1917,34 @@ xfs_qm_vop_chown( | |||
1821 | */ | 1917 | */ |
1822 | int | 1918 | int |
1823 | xfs_qm_vop_chown_reserve( | 1919 | xfs_qm_vop_chown_reserve( |
1824 | xfs_trans_t *tp, | 1920 | struct xfs_trans *tp, |
1825 | xfs_inode_t *ip, | 1921 | struct xfs_inode *ip, |
1826 | xfs_dquot_t *udqp, | 1922 | struct xfs_dquot *udqp, |
1827 | xfs_dquot_t *gdqp, | 1923 | struct xfs_dquot *gdqp, |
1828 | uint flags) | 1924 | struct xfs_dquot *pdqp, |
1925 | uint flags) | ||
1829 | { | 1926 | { |
1830 | xfs_mount_t *mp = ip->i_mount; | 1927 | struct xfs_mount *mp = ip->i_mount; |
1831 | uint delblks, blkflags, prjflags = 0; | 1928 | uint delblks, blkflags, prjflags = 0; |
1832 | xfs_dquot_t *unresudq, *unresgdq, *delblksudq, *delblksgdq; | 1929 | struct xfs_dquot *udq_unres = NULL; |
1833 | int error; | 1930 | struct xfs_dquot *gdq_unres = NULL; |
1931 | struct xfs_dquot *pdq_unres = NULL; | ||
1932 | struct xfs_dquot *udq_delblks = NULL; | ||
1933 | struct xfs_dquot *gdq_delblks = NULL; | ||
1934 | struct xfs_dquot *pdq_delblks = NULL; | ||
1935 | int error; | ||
1834 | 1936 | ||
1835 | 1937 | ||
1836 | ASSERT(xfs_isilocked(ip, XFS_ILOCK_EXCL|XFS_ILOCK_SHARED)); | 1938 | ASSERT(xfs_isilocked(ip, XFS_ILOCK_EXCL|XFS_ILOCK_SHARED)); |
1837 | ASSERT(XFS_IS_QUOTA_RUNNING(mp)); | 1939 | ASSERT(XFS_IS_QUOTA_RUNNING(mp)); |
1838 | 1940 | ||
1839 | delblks = ip->i_delayed_blks; | 1941 | delblks = ip->i_delayed_blks; |
1840 | delblksudq = delblksgdq = unresudq = unresgdq = NULL; | ||
1841 | blkflags = XFS_IS_REALTIME_INODE(ip) ? | 1942 | blkflags = XFS_IS_REALTIME_INODE(ip) ? |
1842 | XFS_QMOPT_RES_RTBLKS : XFS_QMOPT_RES_REGBLKS; | 1943 | XFS_QMOPT_RES_RTBLKS : XFS_QMOPT_RES_REGBLKS; |
1843 | 1944 | ||
1844 | if (XFS_IS_UQUOTA_ON(mp) && udqp && | 1945 | if (XFS_IS_UQUOTA_ON(mp) && udqp && |
1845 | ip->i_d.di_uid != (uid_t)be32_to_cpu(udqp->q_core.d_id)) { | 1946 | ip->i_d.di_uid != (uid_t)be32_to_cpu(udqp->q_core.d_id)) { |
1846 | delblksudq = udqp; | 1947 | udq_delblks = udqp; |
1847 | /* | 1948 | /* |
1848 | * If there are delayed allocation blocks, then we have to | 1949 | * If there are delayed allocation blocks, then we have to |
1849 | * unreserve those from the old dquot, and add them to the | 1950 | * unreserve those from the old dquot, and add them to the |
@@ -1851,29 +1952,34 @@ xfs_qm_vop_chown_reserve( | |||
1851 | */ | 1952 | */ |
1852 | if (delblks) { | 1953 | if (delblks) { |
1853 | ASSERT(ip->i_udquot); | 1954 | ASSERT(ip->i_udquot); |
1854 | unresudq = ip->i_udquot; | 1955 | udq_unres = ip->i_udquot; |
1855 | } | 1956 | } |
1856 | } | 1957 | } |
1857 | if (XFS_IS_OQUOTA_ON(ip->i_mount) && gdqp) { | 1958 | if (XFS_IS_GQUOTA_ON(ip->i_mount) && gdqp && |
1858 | if (XFS_IS_PQUOTA_ON(ip->i_mount) && | 1959 | ip->i_d.di_gid != be32_to_cpu(gdqp->q_core.d_id)) { |
1859 | xfs_get_projid(ip) != be32_to_cpu(gdqp->q_core.d_id)) | 1960 | gdq_delblks = gdqp; |
1860 | prjflags = XFS_QMOPT_ENOSPC; | 1961 | if (delblks) { |
1861 | 1962 | ASSERT(ip->i_gdquot); | |
1862 | if (prjflags || | 1963 | gdq_unres = ip->i_gdquot; |
1863 | (XFS_IS_GQUOTA_ON(ip->i_mount) && | ||
1864 | ip->i_d.di_gid != be32_to_cpu(gdqp->q_core.d_id))) { | ||
1865 | delblksgdq = gdqp; | ||
1866 | if (delblks) { | ||
1867 | ASSERT(ip->i_gdquot); | ||
1868 | unresgdq = ip->i_gdquot; | ||
1869 | } | ||
1870 | } | 1964 | } |
1871 | } | 1965 | } |
1872 | 1966 | ||
1873 | if ((error = xfs_trans_reserve_quota_bydquots(tp, ip->i_mount, | 1967 | if (XFS_IS_PQUOTA_ON(ip->i_mount) && pdqp && |
1874 | delblksudq, delblksgdq, ip->i_d.di_nblocks, 1, | 1968 | xfs_get_projid(ip) != be32_to_cpu(pdqp->q_core.d_id)) { |
1875 | flags | blkflags | prjflags))) | 1969 | prjflags = XFS_QMOPT_ENOSPC; |
1876 | return (error); | 1970 | pdq_delblks = pdqp; |
1971 | if (delblks) { | ||
1972 | ASSERT(ip->i_pdquot); | ||
1973 | pdq_unres = ip->i_pdquot; | ||
1974 | } | ||
1975 | } | ||
1976 | |||
1977 | error = xfs_trans_reserve_quota_bydquots(tp, ip->i_mount, | ||
1978 | udq_delblks, gdq_delblks, pdq_delblks, | ||
1979 | ip->i_d.di_nblocks, 1, | ||
1980 | flags | blkflags | prjflags); | ||
1981 | if (error) | ||
1982 | return error; | ||
1877 | 1983 | ||
1878 | /* | 1984 | /* |
1879 | * Do the delayed blks reservations/unreservations now. Since, these | 1985 | * Do the delayed blks reservations/unreservations now. Since, these |
@@ -1885,15 +1991,17 @@ xfs_qm_vop_chown_reserve( | |||
1885 | /* | 1991 | /* |
1886 | * Do the reservations first. Unreservation can't fail. | 1992 | * Do the reservations first. Unreservation can't fail. |
1887 | */ | 1993 | */ |
1888 | ASSERT(delblksudq || delblksgdq); | 1994 | ASSERT(udq_delblks || gdq_delblks || pdq_delblks); |
1889 | ASSERT(unresudq || unresgdq); | 1995 | ASSERT(udq_unres || gdq_unres || pdq_unres); |
1890 | if ((error = xfs_trans_reserve_quota_bydquots(NULL, ip->i_mount, | 1996 | error = xfs_trans_reserve_quota_bydquots(NULL, ip->i_mount, |
1891 | delblksudq, delblksgdq, (xfs_qcnt_t)delblks, 0, | 1997 | udq_delblks, gdq_delblks, pdq_delblks, |
1892 | flags | blkflags | prjflags))) | 1998 | (xfs_qcnt_t)delblks, 0, |
1893 | return (error); | 1999 | flags | blkflags | prjflags); |
2000 | if (error) | ||
2001 | return error; | ||
1894 | xfs_trans_reserve_quota_bydquots(NULL, ip->i_mount, | 2002 | xfs_trans_reserve_quota_bydquots(NULL, ip->i_mount, |
1895 | unresudq, unresgdq, -((xfs_qcnt_t)delblks), 0, | 2003 | udq_unres, gdq_unres, pdq_unres, |
1896 | blkflags); | 2004 | -((xfs_qcnt_t)delblks), 0, blkflags); |
1897 | } | 2005 | } |
1898 | 2006 | ||
1899 | return (0); | 2007 | return (0); |
@@ -1932,7 +2040,8 @@ xfs_qm_vop_create_dqattach( | |||
1932 | struct xfs_trans *tp, | 2040 | struct xfs_trans *tp, |
1933 | struct xfs_inode *ip, | 2041 | struct xfs_inode *ip, |
1934 | struct xfs_dquot *udqp, | 2042 | struct xfs_dquot *udqp, |
1935 | struct xfs_dquot *gdqp) | 2043 | struct xfs_dquot *gdqp, |
2044 | struct xfs_dquot *pdqp) | ||
1936 | { | 2045 | { |
1937 | struct xfs_mount *mp = tp->t_mountp; | 2046 | struct xfs_mount *mp = tp->t_mountp; |
1938 | 2047 | ||
@@ -1952,13 +2061,18 @@ xfs_qm_vop_create_dqattach( | |||
1952 | } | 2061 | } |
1953 | if (gdqp) { | 2062 | if (gdqp) { |
1954 | ASSERT(ip->i_gdquot == NULL); | 2063 | ASSERT(ip->i_gdquot == NULL); |
1955 | ASSERT(XFS_IS_OQUOTA_ON(mp)); | 2064 | ASSERT(XFS_IS_GQUOTA_ON(mp)); |
1956 | ASSERT((XFS_IS_GQUOTA_ON(mp) ? | 2065 | ASSERT(ip->i_d.di_gid == be32_to_cpu(gdqp->q_core.d_id)); |
1957 | ip->i_d.di_gid : xfs_get_projid(ip)) == | ||
1958 | be32_to_cpu(gdqp->q_core.d_id)); | ||
1959 | |||
1960 | ip->i_gdquot = xfs_qm_dqhold(gdqp); | 2066 | ip->i_gdquot = xfs_qm_dqhold(gdqp); |
1961 | xfs_trans_mod_dquot(tp, gdqp, XFS_TRANS_DQ_ICOUNT, 1); | 2067 | xfs_trans_mod_dquot(tp, gdqp, XFS_TRANS_DQ_ICOUNT, 1); |
1962 | } | 2068 | } |
2069 | if (pdqp) { | ||
2070 | ASSERT(ip->i_pdquot == NULL); | ||
2071 | ASSERT(XFS_IS_PQUOTA_ON(mp)); | ||
2072 | ASSERT(xfs_get_projid(ip) == be32_to_cpu(pdqp->q_core.d_id)); | ||
2073 | |||
2074 | ip->i_pdquot = xfs_qm_dqhold(pdqp); | ||
2075 | xfs_trans_mod_dquot(tp, pdqp, XFS_TRANS_DQ_ICOUNT, 1); | ||
2076 | } | ||
1963 | } | 2077 | } |
1964 | 2078 | ||
diff --git a/fs/xfs/xfs_qm.h b/fs/xfs/xfs_qm.h index 5d16a6e6900f..579d6a02a5b6 100644 --- a/fs/xfs/xfs_qm.h +++ b/fs/xfs/xfs_qm.h | |||
@@ -44,9 +44,11 @@ extern struct kmem_zone *xfs_qm_dqtrxzone; | |||
44 | typedef struct xfs_quotainfo { | 44 | typedef struct xfs_quotainfo { |
45 | struct radix_tree_root qi_uquota_tree; | 45 | struct radix_tree_root qi_uquota_tree; |
46 | struct radix_tree_root qi_gquota_tree; | 46 | struct radix_tree_root qi_gquota_tree; |
47 | struct radix_tree_root qi_pquota_tree; | ||
47 | struct mutex qi_tree_lock; | 48 | struct mutex qi_tree_lock; |
48 | xfs_inode_t *qi_uquotaip; /* user quota inode */ | 49 | struct xfs_inode *qi_uquotaip; /* user quota inode */ |
49 | xfs_inode_t *qi_gquotaip; /* group quota inode */ | 50 | struct xfs_inode *qi_gquotaip; /* group quota inode */ |
51 | struct xfs_inode *qi_pquotaip; /* project quota inode */ | ||
50 | struct list_head qi_lru_list; | 52 | struct list_head qi_lru_list; |
51 | struct mutex qi_lru_lock; | 53 | struct mutex qi_lru_lock; |
52 | int qi_lru_count; | 54 | int qi_lru_count; |
@@ -69,30 +71,66 @@ typedef struct xfs_quotainfo { | |||
69 | struct shrinker qi_shrinker; | 71 | struct shrinker qi_shrinker; |
70 | } xfs_quotainfo_t; | 72 | } xfs_quotainfo_t; |
71 | 73 | ||
72 | #define XFS_DQUOT_TREE(qi, type) \ | 74 | static inline struct radix_tree_root * |
73 | ((type & XFS_DQ_USER) ? \ | 75 | xfs_dquot_tree( |
74 | &((qi)->qi_uquota_tree) : \ | 76 | struct xfs_quotainfo *qi, |
75 | &((qi)->qi_gquota_tree)) | 77 | int type) |
78 | { | ||
79 | switch (type) { | ||
80 | case XFS_DQ_USER: | ||
81 | return &qi->qi_uquota_tree; | ||
82 | case XFS_DQ_GROUP: | ||
83 | return &qi->qi_gquota_tree; | ||
84 | case XFS_DQ_PROJ: | ||
85 | return &qi->qi_pquota_tree; | ||
86 | default: | ||
87 | ASSERT(0); | ||
88 | } | ||
89 | return NULL; | ||
90 | } | ||
76 | 91 | ||
92 | static inline struct xfs_inode * | ||
93 | xfs_dq_to_quota_inode(struct xfs_dquot *dqp) | ||
94 | { | ||
95 | switch (dqp->dq_flags & XFS_DQ_ALLTYPES) { | ||
96 | case XFS_DQ_USER: | ||
97 | return dqp->q_mount->m_quotainfo->qi_uquotaip; | ||
98 | case XFS_DQ_GROUP: | ||
99 | return dqp->q_mount->m_quotainfo->qi_gquotaip; | ||
100 | case XFS_DQ_PROJ: | ||
101 | return dqp->q_mount->m_quotainfo->qi_pquotaip; | ||
102 | default: | ||
103 | ASSERT(0); | ||
104 | } | ||
105 | return NULL; | ||
106 | } | ||
77 | 107 | ||
78 | extern int xfs_qm_calc_dquots_per_chunk(struct xfs_mount *mp, | 108 | extern int xfs_qm_calc_dquots_per_chunk(struct xfs_mount *mp, |
79 | unsigned int nbblks); | 109 | unsigned int nbblks); |
80 | extern void xfs_trans_mod_dquot(xfs_trans_t *, xfs_dquot_t *, uint, long); | 110 | extern void xfs_trans_mod_dquot(struct xfs_trans *, |
81 | extern int xfs_trans_reserve_quota_bydquots(xfs_trans_t *, xfs_mount_t *, | 111 | struct xfs_dquot *, uint, long); |
82 | xfs_dquot_t *, xfs_dquot_t *, long, long, uint); | 112 | extern int xfs_trans_reserve_quota_bydquots(struct xfs_trans *, |
83 | extern void xfs_trans_dqjoin(xfs_trans_t *, xfs_dquot_t *); | 113 | struct xfs_mount *, struct xfs_dquot *, |
84 | extern void xfs_trans_log_dquot(xfs_trans_t *, xfs_dquot_t *); | 114 | struct xfs_dquot *, struct xfs_dquot *, |
115 | long, long, uint); | ||
116 | extern void xfs_trans_dqjoin(struct xfs_trans *, struct xfs_dquot *); | ||
117 | extern void xfs_trans_log_dquot(struct xfs_trans *, struct xfs_dquot *); | ||
85 | 118 | ||
86 | /* | 119 | /* |
87 | * We keep the usr and grp dquots separately so that locking will be easier | 120 | * We keep the usr, grp, and prj dquots separately so that locking will be |
88 | * to do at commit time. All transactions that we know of at this point | 121 | * easier to do at commit time. All transactions that we know of at this point |
89 | * affect no more than two dquots of one type. Hence, the TRANS_MAXDQS value. | 122 | * affect no more than two dquots of one type. Hence, the TRANS_MAXDQS value. |
90 | */ | 123 | */ |
124 | enum { | ||
125 | XFS_QM_TRANS_USR = 0, | ||
126 | XFS_QM_TRANS_GRP, | ||
127 | XFS_QM_TRANS_PRJ, | ||
128 | XFS_QM_TRANS_DQTYPES | ||
129 | }; | ||
91 | #define XFS_QM_TRANS_MAXDQS 2 | 130 | #define XFS_QM_TRANS_MAXDQS 2 |
92 | typedef struct xfs_dquot_acct { | 131 | struct xfs_dquot_acct { |
93 | xfs_dqtrx_t dqa_usrdquots[XFS_QM_TRANS_MAXDQS]; | 132 | struct xfs_dqtrx dqs[XFS_QM_TRANS_DQTYPES][XFS_QM_TRANS_MAXDQS]; |
94 | xfs_dqtrx_t dqa_grpdquots[XFS_QM_TRANS_MAXDQS]; | 133 | }; |
95 | } xfs_dquot_acct_t; | ||
96 | 134 | ||
97 | /* | 135 | /* |
98 | * Users are allowed to have a usage exceeding their softlimit for | 136 | * Users are allowed to have a usage exceeding their softlimit for |
@@ -106,22 +144,23 @@ typedef struct xfs_dquot_acct { | |||
106 | #define XFS_QM_IWARNLIMIT 5 | 144 | #define XFS_QM_IWARNLIMIT 5 |
107 | #define XFS_QM_RTBWARNLIMIT 5 | 145 | #define XFS_QM_RTBWARNLIMIT 5 |
108 | 146 | ||
109 | extern void xfs_qm_destroy_quotainfo(xfs_mount_t *); | 147 | extern void xfs_qm_destroy_quotainfo(struct xfs_mount *); |
110 | extern int xfs_qm_quotacheck(xfs_mount_t *); | 148 | extern int xfs_qm_quotacheck(struct xfs_mount *); |
111 | extern int xfs_qm_write_sb_changes(xfs_mount_t *, __int64_t); | 149 | extern int xfs_qm_write_sb_changes(struct xfs_mount *, __int64_t); |
112 | 150 | ||
113 | /* dquot stuff */ | 151 | /* dquot stuff */ |
114 | extern void xfs_qm_dqpurge_all(xfs_mount_t *, uint); | 152 | extern void xfs_qm_dqpurge_all(struct xfs_mount *, uint); |
115 | extern void xfs_qm_dqrele_all_inodes(xfs_mount_t *, uint); | 153 | extern void xfs_qm_dqrele_all_inodes(struct xfs_mount *, uint); |
116 | 154 | ||
117 | /* quota ops */ | 155 | /* quota ops */ |
118 | extern int xfs_qm_scall_trunc_qfiles(xfs_mount_t *, uint); | 156 | extern int xfs_qm_scall_trunc_qfiles(struct xfs_mount *, uint); |
119 | extern int xfs_qm_scall_getquota(xfs_mount_t *, xfs_dqid_t, uint, | 157 | extern int xfs_qm_scall_getquota(struct xfs_mount *, xfs_dqid_t, |
120 | fs_disk_quota_t *); | 158 | uint, struct fs_disk_quota *); |
121 | extern int xfs_qm_scall_setqlim(struct xfs_mount *, xfs_dqid_t, uint, | 159 | extern int xfs_qm_scall_setqlim(struct xfs_mount *, xfs_dqid_t, uint, |
122 | fs_disk_quota_t *); | 160 | struct fs_disk_quota *); |
123 | extern int xfs_qm_scall_getqstat(xfs_mount_t *, fs_quota_stat_t *); | 161 | extern int xfs_qm_scall_getqstat(struct xfs_mount *, |
124 | extern int xfs_qm_scall_quotaon(xfs_mount_t *, uint); | 162 | struct fs_quota_stat *); |
125 | extern int xfs_qm_scall_quotaoff(xfs_mount_t *, uint); | 163 | extern int xfs_qm_scall_quotaon(struct xfs_mount *, uint); |
164 | extern int xfs_qm_scall_quotaoff(struct xfs_mount *, uint); | ||
126 | 165 | ||
127 | #endif /* __XFS_QM_H__ */ | 166 | #endif /* __XFS_QM_H__ */ |
diff --git a/fs/xfs/xfs_qm_bhv.c b/fs/xfs/xfs_qm_bhv.c index 2d02eac1c9a8..437a52d91f6d 100644 --- a/fs/xfs/xfs_qm_bhv.c +++ b/fs/xfs/xfs_qm_bhv.c | |||
@@ -112,16 +112,16 @@ xfs_qm_newmount( | |||
112 | 112 | ||
113 | if (((uquotaondisk && !XFS_IS_UQUOTA_ON(mp)) || | 113 | if (((uquotaondisk && !XFS_IS_UQUOTA_ON(mp)) || |
114 | (!uquotaondisk && XFS_IS_UQUOTA_ON(mp)) || | 114 | (!uquotaondisk && XFS_IS_UQUOTA_ON(mp)) || |
115 | (pquotaondisk && !XFS_IS_PQUOTA_ON(mp)) || | ||
116 | (!pquotaondisk && XFS_IS_PQUOTA_ON(mp)) || | ||
117 | (gquotaondisk && !XFS_IS_GQUOTA_ON(mp)) || | 115 | (gquotaondisk && !XFS_IS_GQUOTA_ON(mp)) || |
118 | (!gquotaondisk && XFS_IS_OQUOTA_ON(mp))) && | 116 | (!gquotaondisk && XFS_IS_GQUOTA_ON(mp)) || |
117 | (pquotaondisk && !XFS_IS_PQUOTA_ON(mp)) || | ||
118 | (!pquotaondisk && XFS_IS_PQUOTA_ON(mp))) && | ||
119 | xfs_dev_is_read_only(mp, "changing quota state")) { | 119 | xfs_dev_is_read_only(mp, "changing quota state")) { |
120 | xfs_warn(mp, "please mount with%s%s%s%s.", | 120 | xfs_warn(mp, "please mount with%s%s%s%s.", |
121 | (!quotaondisk ? "out quota" : ""), | 121 | (!quotaondisk ? "out quota" : ""), |
122 | (uquotaondisk ? " usrquota" : ""), | 122 | (uquotaondisk ? " usrquota" : ""), |
123 | (pquotaondisk ? " prjquota" : ""), | 123 | (gquotaondisk ? " grpquota" : ""), |
124 | (gquotaondisk ? " grpquota" : "")); | 124 | (pquotaondisk ? " prjquota" : "")); |
125 | return XFS_ERROR(EPERM); | 125 | return XFS_ERROR(EPERM); |
126 | } | 126 | } |
127 | 127 | ||
diff --git a/fs/xfs/xfs_qm_syscalls.c b/fs/xfs/xfs_qm_syscalls.c index 6cdf6ffc36a1..e4f8b2d6f38b 100644 --- a/fs/xfs/xfs_qm_syscalls.c +++ b/fs/xfs/xfs_qm_syscalls.c | |||
@@ -117,11 +117,12 @@ xfs_qm_scall_quotaoff( | |||
117 | } | 117 | } |
118 | if (flags & XFS_GQUOTA_ACCT) { | 118 | if (flags & XFS_GQUOTA_ACCT) { |
119 | dqtype |= XFS_QMOPT_GQUOTA; | 119 | dqtype |= XFS_QMOPT_GQUOTA; |
120 | flags |= (XFS_OQUOTA_CHKD | XFS_OQUOTA_ENFD); | 120 | flags |= (XFS_GQUOTA_CHKD | XFS_GQUOTA_ENFD); |
121 | inactivate_flags |= XFS_GQUOTA_ACTIVE; | 121 | inactivate_flags |= XFS_GQUOTA_ACTIVE; |
122 | } else if (flags & XFS_PQUOTA_ACCT) { | 122 | } |
123 | if (flags & XFS_PQUOTA_ACCT) { | ||
123 | dqtype |= XFS_QMOPT_PQUOTA; | 124 | dqtype |= XFS_QMOPT_PQUOTA; |
124 | flags |= (XFS_OQUOTA_CHKD | XFS_OQUOTA_ENFD); | 125 | flags |= (XFS_PQUOTA_CHKD | XFS_PQUOTA_ENFD); |
125 | inactivate_flags |= XFS_PQUOTA_ACTIVE; | 126 | inactivate_flags |= XFS_PQUOTA_ACTIVE; |
126 | } | 127 | } |
127 | 128 | ||
@@ -198,10 +199,9 @@ xfs_qm_scall_quotaoff( | |||
198 | } | 199 | } |
199 | 200 | ||
200 | /* | 201 | /* |
201 | * If quotas is completely disabled, close shop. | 202 | * If all quotas are completely turned off, close shop. |
202 | */ | 203 | */ |
203 | if (((flags & XFS_MOUNT_QUOTA_ALL) == XFS_MOUNT_QUOTA_SET1) || | 204 | if (mp->m_qflags == 0) { |
204 | ((flags & XFS_MOUNT_QUOTA_ALL) == XFS_MOUNT_QUOTA_SET2)) { | ||
205 | mutex_unlock(&q->qi_quotaofflock); | 205 | mutex_unlock(&q->qi_quotaofflock); |
206 | xfs_qm_destroy_quotainfo(mp); | 206 | xfs_qm_destroy_quotainfo(mp); |
207 | return (0); | 207 | return (0); |
@@ -214,10 +214,14 @@ xfs_qm_scall_quotaoff( | |||
214 | IRELE(q->qi_uquotaip); | 214 | IRELE(q->qi_uquotaip); |
215 | q->qi_uquotaip = NULL; | 215 | q->qi_uquotaip = NULL; |
216 | } | 216 | } |
217 | if ((dqtype & (XFS_QMOPT_GQUOTA|XFS_QMOPT_PQUOTA)) && q->qi_gquotaip) { | 217 | if ((dqtype & XFS_QMOPT_GQUOTA) && q->qi_gquotaip) { |
218 | IRELE(q->qi_gquotaip); | 218 | IRELE(q->qi_gquotaip); |
219 | q->qi_gquotaip = NULL; | 219 | q->qi_gquotaip = NULL; |
220 | } | 220 | } |
221 | if ((dqtype & XFS_QMOPT_PQUOTA) && q->qi_pquotaip) { | ||
222 | IRELE(q->qi_pquotaip); | ||
223 | q->qi_pquotaip = NULL; | ||
224 | } | ||
221 | 225 | ||
222 | out_unlock: | 226 | out_unlock: |
223 | mutex_unlock(&q->qi_quotaofflock); | 227 | mutex_unlock(&q->qi_quotaofflock); |
@@ -335,14 +339,14 @@ xfs_qm_scall_quotaon( | |||
335 | * quota acct on ondisk without m_qflags' knowing. | 339 | * quota acct on ondisk without m_qflags' knowing. |
336 | */ | 340 | */ |
337 | if (((flags & XFS_UQUOTA_ACCT) == 0 && | 341 | if (((flags & XFS_UQUOTA_ACCT) == 0 && |
338 | (mp->m_sb.sb_qflags & XFS_UQUOTA_ACCT) == 0 && | 342 | (mp->m_sb.sb_qflags & XFS_UQUOTA_ACCT) == 0 && |
339 | (flags & XFS_UQUOTA_ENFD)) | 343 | (flags & XFS_UQUOTA_ENFD)) || |
340 | || | 344 | ((flags & XFS_GQUOTA_ACCT) == 0 && |
345 | (mp->m_sb.sb_qflags & XFS_GQUOTA_ACCT) == 0 && | ||
346 | (flags & XFS_GQUOTA_ENFD)) || | ||
341 | ((flags & XFS_PQUOTA_ACCT) == 0 && | 347 | ((flags & XFS_PQUOTA_ACCT) == 0 && |
342 | (mp->m_sb.sb_qflags & XFS_PQUOTA_ACCT) == 0 && | 348 | (mp->m_sb.sb_qflags & XFS_PQUOTA_ACCT) == 0 && |
343 | (flags & XFS_GQUOTA_ACCT) == 0 && | 349 | (flags & XFS_PQUOTA_ENFD))) { |
344 | (mp->m_sb.sb_qflags & XFS_GQUOTA_ACCT) == 0 && | ||
345 | (flags & XFS_OQUOTA_ENFD))) { | ||
346 | xfs_debug(mp, | 350 | xfs_debug(mp, |
347 | "%s: Can't enforce without acct, flags=%x sbflags=%x\n", | 351 | "%s: Can't enforce without acct, flags=%x sbflags=%x\n", |
348 | __func__, flags, mp->m_sb.sb_qflags); | 352 | __func__, flags, mp->m_sb.sb_qflags); |
@@ -407,11 +411,11 @@ xfs_qm_scall_getqstat( | |||
407 | struct fs_quota_stat *out) | 411 | struct fs_quota_stat *out) |
408 | { | 412 | { |
409 | struct xfs_quotainfo *q = mp->m_quotainfo; | 413 | struct xfs_quotainfo *q = mp->m_quotainfo; |
410 | struct xfs_inode *uip, *gip; | 414 | struct xfs_inode *uip = NULL; |
411 | bool tempuqip, tempgqip; | 415 | struct xfs_inode *gip = NULL; |
416 | bool tempuqip = false; | ||
417 | bool tempgqip = false; | ||
412 | 418 | ||
413 | uip = gip = NULL; | ||
414 | tempuqip = tempgqip = false; | ||
415 | memset(out, 0, sizeof(fs_quota_stat_t)); | 419 | memset(out, 0, sizeof(fs_quota_stat_t)); |
416 | 420 | ||
417 | out->qs_version = FS_QSTAT_VERSION; | 421 | out->qs_version = FS_QSTAT_VERSION; |
@@ -776,9 +780,12 @@ xfs_qm_scall_getquota( | |||
776 | * gets turned off. No need to confuse the user level code, | 780 | * gets turned off. No need to confuse the user level code, |
777 | * so return zeroes in that case. | 781 | * so return zeroes in that case. |
778 | */ | 782 | */ |
779 | if ((!XFS_IS_UQUOTA_ENFORCED(mp) && dqp->q_core.d_flags == XFS_DQ_USER) || | 783 | if ((!XFS_IS_UQUOTA_ENFORCED(mp) && |
780 | (!XFS_IS_OQUOTA_ENFORCED(mp) && | 784 | dqp->q_core.d_flags == XFS_DQ_USER) || |
781 | (dqp->q_core.d_flags & (XFS_DQ_PROJ | XFS_DQ_GROUP)))) { | 785 | (!XFS_IS_GQUOTA_ENFORCED(mp) && |
786 | dqp->q_core.d_flags == XFS_DQ_GROUP) || | ||
787 | (!XFS_IS_PQUOTA_ENFORCED(mp) && | ||
788 | dqp->q_core.d_flags == XFS_DQ_PROJ)) { | ||
782 | dst->d_btimer = 0; | 789 | dst->d_btimer = 0; |
783 | dst->d_itimer = 0; | 790 | dst->d_itimer = 0; |
784 | dst->d_rtbtimer = 0; | 791 | dst->d_rtbtimer = 0; |
@@ -786,8 +793,8 @@ xfs_qm_scall_getquota( | |||
786 | 793 | ||
787 | #ifdef DEBUG | 794 | #ifdef DEBUG |
788 | if (((XFS_IS_UQUOTA_ENFORCED(mp) && dst->d_flags == FS_USER_QUOTA) || | 795 | if (((XFS_IS_UQUOTA_ENFORCED(mp) && dst->d_flags == FS_USER_QUOTA) || |
789 | (XFS_IS_OQUOTA_ENFORCED(mp) && | 796 | (XFS_IS_GQUOTA_ENFORCED(mp) && dst->d_flags == FS_GROUP_QUOTA) || |
790 | (dst->d_flags & (FS_PROJ_QUOTA | FS_GROUP_QUOTA)))) && | 797 | (XFS_IS_PQUOTA_ENFORCED(mp) && dst->d_flags == FS_PROJ_QUOTA)) && |
791 | dst->d_id != 0) { | 798 | dst->d_id != 0) { |
792 | if ((dst->d_bcount > dst->d_blk_softlimit) && | 799 | if ((dst->d_bcount > dst->d_blk_softlimit) && |
793 | (dst->d_blk_softlimit > 0)) { | 800 | (dst->d_blk_softlimit > 0)) { |
@@ -833,16 +840,16 @@ xfs_qm_export_flags( | |||
833 | uflags = 0; | 840 | uflags = 0; |
834 | if (flags & XFS_UQUOTA_ACCT) | 841 | if (flags & XFS_UQUOTA_ACCT) |
835 | uflags |= FS_QUOTA_UDQ_ACCT; | 842 | uflags |= FS_QUOTA_UDQ_ACCT; |
836 | if (flags & XFS_PQUOTA_ACCT) | ||
837 | uflags |= FS_QUOTA_PDQ_ACCT; | ||
838 | if (flags & XFS_GQUOTA_ACCT) | 843 | if (flags & XFS_GQUOTA_ACCT) |
839 | uflags |= FS_QUOTA_GDQ_ACCT; | 844 | uflags |= FS_QUOTA_GDQ_ACCT; |
845 | if (flags & XFS_PQUOTA_ACCT) | ||
846 | uflags |= FS_QUOTA_PDQ_ACCT; | ||
840 | if (flags & XFS_UQUOTA_ENFD) | 847 | if (flags & XFS_UQUOTA_ENFD) |
841 | uflags |= FS_QUOTA_UDQ_ENFD; | 848 | uflags |= FS_QUOTA_UDQ_ENFD; |
842 | if (flags & (XFS_OQUOTA_ENFD)) { | 849 | if (flags & XFS_GQUOTA_ENFD) |
843 | uflags |= (flags & XFS_GQUOTA_ACCT) ? | 850 | uflags |= FS_QUOTA_GDQ_ENFD; |
844 | FS_QUOTA_GDQ_ENFD : FS_QUOTA_PDQ_ENFD; | 851 | if (flags & XFS_PQUOTA_ENFD) |
845 | } | 852 | uflags |= FS_QUOTA_PDQ_ENFD; |
846 | return (uflags); | 853 | return (uflags); |
847 | } | 854 | } |
848 | 855 | ||
@@ -856,9 +863,11 @@ xfs_dqrele_inode( | |||
856 | { | 863 | { |
857 | /* skip quota inodes */ | 864 | /* skip quota inodes */ |
858 | if (ip == ip->i_mount->m_quotainfo->qi_uquotaip || | 865 | if (ip == ip->i_mount->m_quotainfo->qi_uquotaip || |
859 | ip == ip->i_mount->m_quotainfo->qi_gquotaip) { | 866 | ip == ip->i_mount->m_quotainfo->qi_gquotaip || |
867 | ip == ip->i_mount->m_quotainfo->qi_pquotaip) { | ||
860 | ASSERT(ip->i_udquot == NULL); | 868 | ASSERT(ip->i_udquot == NULL); |
861 | ASSERT(ip->i_gdquot == NULL); | 869 | ASSERT(ip->i_gdquot == NULL); |
870 | ASSERT(ip->i_pdquot == NULL); | ||
862 | return 0; | 871 | return 0; |
863 | } | 872 | } |
864 | 873 | ||
@@ -867,10 +876,14 @@ xfs_dqrele_inode( | |||
867 | xfs_qm_dqrele(ip->i_udquot); | 876 | xfs_qm_dqrele(ip->i_udquot); |
868 | ip->i_udquot = NULL; | 877 | ip->i_udquot = NULL; |
869 | } | 878 | } |
870 | if (flags & (XFS_PQUOTA_ACCT|XFS_GQUOTA_ACCT) && ip->i_gdquot) { | 879 | if ((flags & XFS_GQUOTA_ACCT) && ip->i_gdquot) { |
871 | xfs_qm_dqrele(ip->i_gdquot); | 880 | xfs_qm_dqrele(ip->i_gdquot); |
872 | ip->i_gdquot = NULL; | 881 | ip->i_gdquot = NULL; |
873 | } | 882 | } |
883 | if ((flags & XFS_PQUOTA_ACCT) && ip->i_pdquot) { | ||
884 | xfs_qm_dqrele(ip->i_pdquot); | ||
885 | ip->i_pdquot = NULL; | ||
886 | } | ||
874 | xfs_iunlock(ip, XFS_ILOCK_EXCL); | 887 | xfs_iunlock(ip, XFS_ILOCK_EXCL); |
875 | return 0; | 888 | return 0; |
876 | } | 889 | } |
diff --git a/fs/xfs/xfs_quota.h b/fs/xfs/xfs_quota.h index c38068f26c55..b14f42c714b6 100644 --- a/fs/xfs/xfs_quota.h +++ b/fs/xfs/xfs_quota.h | |||
@@ -108,11 +108,28 @@ typedef struct xfs_dqblk { | |||
108 | { XFS_DQ_FREEING, "FREEING" } | 108 | { XFS_DQ_FREEING, "FREEING" } |
109 | 109 | ||
110 | /* | 110 | /* |
111 | * In the worst case, when both user and group quotas are on, | 111 | * We have the possibility of all three quota types being active at once, and |
112 | * we can have a max of three dquots changing in a single transaction. | 112 | * hence free space modification requires modification of all three current |
113 | * dquots in a single transaction. For this case we need to have a reservation | ||
114 | * of at least 3 dquots. | ||
115 | * | ||
116 | * However, a chmod operation can change both UID and GID in a single | ||
117 | * transaction, resulting in requiring {old, new} x {uid, gid} dquots to be | ||
118 | * modified. Hence for this case we need to reserve space for at least 4 dquots. | ||
119 | * | ||
120 | * And in the worst case, there's a rename operation that can be modifying up to | ||
121 | * 4 inodes with dquots attached to them. In reality, the only inodes that can | ||
122 | * have their dquots modified are the source and destination directory inodes | ||
123 | * due to directory name creation and removal. That can require space allocation | ||
124 | * and/or freeing on both directory inodes, and hence all three dquots on each | ||
125 | * inode can be modified. And if the directories are world writeable, all the | ||
126 | * dquots can be unique and so 6 dquots can be modified.... | ||
127 | * | ||
128 | * And, of course, we also need to take into account the dquot log format item | ||
129 | * used to describe each dquot. | ||
113 | */ | 130 | */ |
114 | #define XFS_DQUOT_LOGRES(mp) (sizeof(xfs_disk_dquot_t) * 3) | 131 | #define XFS_DQUOT_LOGRES(mp) \ |
115 | 132 | ((sizeof(struct xfs_dq_logformat) + sizeof(struct xfs_disk_dquot)) * 6) | |
116 | 133 | ||
117 | /* | 134 | /* |
118 | * These are the structures used to lay out dquots and quotaoff | 135 | * These are the structures used to lay out dquots and quotaoff |
@@ -161,30 +178,42 @@ typedef struct xfs_qoff_logformat { | |||
161 | #define XFS_GQUOTA_ACCT 0x0040 /* group quota accounting ON */ | 178 | #define XFS_GQUOTA_ACCT 0x0040 /* group quota accounting ON */ |
162 | 179 | ||
163 | /* | 180 | /* |
181 | * Conversion to and from the combined OQUOTA flag (if necessary) | ||
182 | * is done only in xfs_sb_qflags_to_disk() and xfs_sb_qflags_from_disk() | ||
183 | */ | ||
184 | #define XFS_GQUOTA_ENFD 0x0080 /* group quota limits enforced */ | ||
185 | #define XFS_GQUOTA_CHKD 0x0100 /* quotacheck run on group quotas */ | ||
186 | #define XFS_PQUOTA_ENFD 0x0200 /* project quota limits enforced */ | ||
187 | #define XFS_PQUOTA_CHKD 0x0400 /* quotacheck run on project quotas */ | ||
188 | |||
189 | /* | ||
164 | * Quota Accounting/Enforcement flags | 190 | * Quota Accounting/Enforcement flags |
165 | */ | 191 | */ |
166 | #define XFS_ALL_QUOTA_ACCT \ | 192 | #define XFS_ALL_QUOTA_ACCT \ |
167 | (XFS_UQUOTA_ACCT | XFS_GQUOTA_ACCT | XFS_PQUOTA_ACCT) | 193 | (XFS_UQUOTA_ACCT | XFS_GQUOTA_ACCT | XFS_PQUOTA_ACCT) |
168 | #define XFS_ALL_QUOTA_ENFD (XFS_UQUOTA_ENFD | XFS_OQUOTA_ENFD) | 194 | #define XFS_ALL_QUOTA_ENFD \ |
169 | #define XFS_ALL_QUOTA_CHKD (XFS_UQUOTA_CHKD | XFS_OQUOTA_CHKD) | 195 | (XFS_UQUOTA_ENFD | XFS_GQUOTA_ENFD | XFS_PQUOTA_ENFD) |
196 | #define XFS_ALL_QUOTA_CHKD \ | ||
197 | (XFS_UQUOTA_CHKD | XFS_GQUOTA_CHKD | XFS_PQUOTA_CHKD) | ||
170 | 198 | ||
171 | #define XFS_IS_QUOTA_RUNNING(mp) ((mp)->m_qflags & XFS_ALL_QUOTA_ACCT) | 199 | #define XFS_IS_QUOTA_RUNNING(mp) ((mp)->m_qflags & XFS_ALL_QUOTA_ACCT) |
172 | #define XFS_IS_UQUOTA_RUNNING(mp) ((mp)->m_qflags & XFS_UQUOTA_ACCT) | 200 | #define XFS_IS_UQUOTA_RUNNING(mp) ((mp)->m_qflags & XFS_UQUOTA_ACCT) |
173 | #define XFS_IS_PQUOTA_RUNNING(mp) ((mp)->m_qflags & XFS_PQUOTA_ACCT) | 201 | #define XFS_IS_PQUOTA_RUNNING(mp) ((mp)->m_qflags & XFS_PQUOTA_ACCT) |
174 | #define XFS_IS_GQUOTA_RUNNING(mp) ((mp)->m_qflags & XFS_GQUOTA_ACCT) | 202 | #define XFS_IS_GQUOTA_RUNNING(mp) ((mp)->m_qflags & XFS_GQUOTA_ACCT) |
175 | #define XFS_IS_UQUOTA_ENFORCED(mp) ((mp)->m_qflags & XFS_UQUOTA_ENFD) | 203 | #define XFS_IS_UQUOTA_ENFORCED(mp) ((mp)->m_qflags & XFS_UQUOTA_ENFD) |
176 | #define XFS_IS_OQUOTA_ENFORCED(mp) ((mp)->m_qflags & XFS_OQUOTA_ENFD) | 204 | #define XFS_IS_GQUOTA_ENFORCED(mp) ((mp)->m_qflags & XFS_GQUOTA_ENFD) |
205 | #define XFS_IS_PQUOTA_ENFORCED(mp) ((mp)->m_qflags & XFS_PQUOTA_ENFD) | ||
177 | 206 | ||
178 | /* | 207 | /* |
179 | * Incore only flags for quotaoff - these bits get cleared when quota(s) | 208 | * Incore only flags for quotaoff - these bits get cleared when quota(s) |
180 | * are in the process of getting turned off. These flags are in m_qflags but | 209 | * are in the process of getting turned off. These flags are in m_qflags but |
181 | * never in sb_qflags. | 210 | * never in sb_qflags. |
182 | */ | 211 | */ |
183 | #define XFS_UQUOTA_ACTIVE 0x0100 /* uquotas are being turned off */ | 212 | #define XFS_UQUOTA_ACTIVE 0x1000 /* uquotas are being turned off */ |
184 | #define XFS_PQUOTA_ACTIVE 0x0200 /* pquotas are being turned off */ | 213 | #define XFS_GQUOTA_ACTIVE 0x2000 /* gquotas are being turned off */ |
185 | #define XFS_GQUOTA_ACTIVE 0x0400 /* gquotas are being turned off */ | 214 | #define XFS_PQUOTA_ACTIVE 0x4000 /* pquotas are being turned off */ |
186 | #define XFS_ALL_QUOTA_ACTIVE \ | 215 | #define XFS_ALL_QUOTA_ACTIVE \ |
187 | (XFS_UQUOTA_ACTIVE | XFS_PQUOTA_ACTIVE | XFS_GQUOTA_ACTIVE) | 216 | (XFS_UQUOTA_ACTIVE | XFS_GQUOTA_ACTIVE | XFS_PQUOTA_ACTIVE) |
188 | 217 | ||
189 | /* | 218 | /* |
190 | * Checking XFS_IS_*QUOTA_ON() while holding any inode lock guarantees | 219 | * Checking XFS_IS_*QUOTA_ON() while holding any inode lock guarantees |
@@ -259,33 +288,24 @@ typedef struct xfs_qoff_logformat { | |||
259 | * we didn't have the inode locked, the appropriate dquot(s) will be | 288 | * we didn't have the inode locked, the appropriate dquot(s) will be |
260 | * attached atomically. | 289 | * attached atomically. |
261 | */ | 290 | */ |
262 | #define XFS_NOT_DQATTACHED(mp, ip) ((XFS_IS_UQUOTA_ON(mp) &&\ | 291 | #define XFS_NOT_DQATTACHED(mp, ip) \ |
263 | (ip)->i_udquot == NULL) || \ | 292 | ((XFS_IS_UQUOTA_ON(mp) && (ip)->i_udquot == NULL) || \ |
264 | (XFS_IS_OQUOTA_ON(mp) && \ | 293 | (XFS_IS_GQUOTA_ON(mp) && (ip)->i_gdquot == NULL) || \ |
265 | (ip)->i_gdquot == NULL)) | 294 | (XFS_IS_PQUOTA_ON(mp) && (ip)->i_pdquot == NULL)) |
266 | 295 | ||
267 | #define XFS_QM_NEED_QUOTACHECK(mp) \ | 296 | #define XFS_QM_NEED_QUOTACHECK(mp) \ |
268 | ((XFS_IS_UQUOTA_ON(mp) && \ | 297 | ((XFS_IS_UQUOTA_ON(mp) && \ |
269 | (mp->m_sb.sb_qflags & XFS_UQUOTA_CHKD) == 0) || \ | 298 | (mp->m_sb.sb_qflags & XFS_UQUOTA_CHKD) == 0) || \ |
270 | (XFS_IS_GQUOTA_ON(mp) && \ | 299 | (XFS_IS_GQUOTA_ON(mp) && \ |
271 | ((mp->m_sb.sb_qflags & XFS_OQUOTA_CHKD) == 0 || \ | 300 | (mp->m_sb.sb_qflags & XFS_GQUOTA_CHKD) == 0) || \ |
272 | (mp->m_sb.sb_qflags & XFS_PQUOTA_ACCT))) || \ | ||
273 | (XFS_IS_PQUOTA_ON(mp) && \ | 301 | (XFS_IS_PQUOTA_ON(mp) && \ |
274 | ((mp->m_sb.sb_qflags & XFS_OQUOTA_CHKD) == 0 || \ | 302 | (mp->m_sb.sb_qflags & XFS_PQUOTA_CHKD) == 0)) |
275 | (mp->m_sb.sb_qflags & XFS_GQUOTA_ACCT)))) | ||
276 | |||
277 | #define XFS_MOUNT_QUOTA_SET1 (XFS_UQUOTA_ACCT|XFS_UQUOTA_ENFD|\ | ||
278 | XFS_UQUOTA_CHKD|XFS_PQUOTA_ACCT|\ | ||
279 | XFS_OQUOTA_ENFD|XFS_OQUOTA_CHKD) | ||
280 | |||
281 | #define XFS_MOUNT_QUOTA_SET2 (XFS_UQUOTA_ACCT|XFS_UQUOTA_ENFD|\ | ||
282 | XFS_UQUOTA_CHKD|XFS_GQUOTA_ACCT|\ | ||
283 | XFS_OQUOTA_ENFD|XFS_OQUOTA_CHKD) | ||
284 | 303 | ||
285 | #define XFS_MOUNT_QUOTA_ALL (XFS_UQUOTA_ACCT|XFS_UQUOTA_ENFD|\ | 304 | #define XFS_MOUNT_QUOTA_ALL (XFS_UQUOTA_ACCT|XFS_UQUOTA_ENFD|\ |
286 | XFS_UQUOTA_CHKD|XFS_PQUOTA_ACCT|\ | 305 | XFS_UQUOTA_CHKD|XFS_GQUOTA_ACCT|\ |
287 | XFS_OQUOTA_ENFD|XFS_OQUOTA_CHKD|\ | 306 | XFS_GQUOTA_ENFD|XFS_GQUOTA_CHKD|\ |
288 | XFS_GQUOTA_ACCT) | 307 | XFS_PQUOTA_ACCT|XFS_PQUOTA_ENFD|\ |
308 | XFS_PQUOTA_CHKD) | ||
289 | 309 | ||
290 | 310 | ||
291 | /* | 311 | /* |
@@ -318,17 +338,18 @@ extern int xfs_trans_reserve_quota_nblks(struct xfs_trans *, | |||
318 | struct xfs_inode *, long, long, uint); | 338 | struct xfs_inode *, long, long, uint); |
319 | extern int xfs_trans_reserve_quota_bydquots(struct xfs_trans *, | 339 | extern int xfs_trans_reserve_quota_bydquots(struct xfs_trans *, |
320 | struct xfs_mount *, struct xfs_dquot *, | 340 | struct xfs_mount *, struct xfs_dquot *, |
321 | struct xfs_dquot *, long, long, uint); | 341 | struct xfs_dquot *, struct xfs_dquot *, long, long, uint); |
322 | 342 | ||
323 | extern int xfs_qm_vop_dqalloc(struct xfs_inode *, uid_t, gid_t, prid_t, uint, | 343 | extern int xfs_qm_vop_dqalloc(struct xfs_inode *, uid_t, gid_t, prid_t, uint, |
324 | struct xfs_dquot **, struct xfs_dquot **); | 344 | struct xfs_dquot **, struct xfs_dquot **, struct xfs_dquot **); |
325 | extern void xfs_qm_vop_create_dqattach(struct xfs_trans *, struct xfs_inode *, | 345 | extern void xfs_qm_vop_create_dqattach(struct xfs_trans *, struct xfs_inode *, |
326 | struct xfs_dquot *, struct xfs_dquot *); | 346 | struct xfs_dquot *, struct xfs_dquot *, struct xfs_dquot *); |
327 | extern int xfs_qm_vop_rename_dqattach(struct xfs_inode **); | 347 | extern int xfs_qm_vop_rename_dqattach(struct xfs_inode **); |
328 | extern struct xfs_dquot *xfs_qm_vop_chown(struct xfs_trans *, | 348 | extern struct xfs_dquot *xfs_qm_vop_chown(struct xfs_trans *, |
329 | struct xfs_inode *, struct xfs_dquot **, struct xfs_dquot *); | 349 | struct xfs_inode *, struct xfs_dquot **, struct xfs_dquot *); |
330 | extern int xfs_qm_vop_chown_reserve(struct xfs_trans *, struct xfs_inode *, | 350 | extern int xfs_qm_vop_chown_reserve(struct xfs_trans *, struct xfs_inode *, |
331 | struct xfs_dquot *, struct xfs_dquot *, uint); | 351 | struct xfs_dquot *, struct xfs_dquot *, |
352 | struct xfs_dquot *, uint); | ||
332 | extern int xfs_qm_dqattach(struct xfs_inode *, uint); | 353 | extern int xfs_qm_dqattach(struct xfs_inode *, uint); |
333 | extern int xfs_qm_dqattach_locked(struct xfs_inode *, uint); | 354 | extern int xfs_qm_dqattach_locked(struct xfs_inode *, uint); |
334 | extern void xfs_qm_dqdetach(struct xfs_inode *); | 355 | extern void xfs_qm_dqdetach(struct xfs_inode *); |
@@ -342,10 +363,12 @@ extern void xfs_qm_unmount_quotas(struct xfs_mount *); | |||
342 | #else | 363 | #else |
343 | static inline int | 364 | static inline int |
344 | xfs_qm_vop_dqalloc(struct xfs_inode *ip, uid_t uid, gid_t gid, prid_t prid, | 365 | xfs_qm_vop_dqalloc(struct xfs_inode *ip, uid_t uid, gid_t gid, prid_t prid, |
345 | uint flags, struct xfs_dquot **udqp, struct xfs_dquot **gdqp) | 366 | uint flags, struct xfs_dquot **udqp, struct xfs_dquot **gdqp, |
367 | struct xfs_dquot **pdqp) | ||
346 | { | 368 | { |
347 | *udqp = NULL; | 369 | *udqp = NULL; |
348 | *gdqp = NULL; | 370 | *gdqp = NULL; |
371 | *pdqp = NULL; | ||
349 | return 0; | 372 | return 0; |
350 | } | 373 | } |
351 | #define xfs_trans_dup_dqinfo(tp, tp2) | 374 | #define xfs_trans_dup_dqinfo(tp, tp2) |
@@ -360,14 +383,15 @@ static inline int xfs_trans_reserve_quota_nblks(struct xfs_trans *tp, | |||
360 | } | 383 | } |
361 | static inline int xfs_trans_reserve_quota_bydquots(struct xfs_trans *tp, | 384 | static inline int xfs_trans_reserve_quota_bydquots(struct xfs_trans *tp, |
362 | struct xfs_mount *mp, struct xfs_dquot *udqp, | 385 | struct xfs_mount *mp, struct xfs_dquot *udqp, |
363 | struct xfs_dquot *gdqp, long nblks, long nions, uint flags) | 386 | struct xfs_dquot *gdqp, struct xfs_dquot *pdqp, |
387 | long nblks, long nions, uint flags) | ||
364 | { | 388 | { |
365 | return 0; | 389 | return 0; |
366 | } | 390 | } |
367 | #define xfs_qm_vop_create_dqattach(tp, ip, u, g) | 391 | #define xfs_qm_vop_create_dqattach(tp, ip, u, g, p) |
368 | #define xfs_qm_vop_rename_dqattach(it) (0) | 392 | #define xfs_qm_vop_rename_dqattach(it) (0) |
369 | #define xfs_qm_vop_chown(tp, ip, old, new) (NULL) | 393 | #define xfs_qm_vop_chown(tp, ip, old, new) (NULL) |
370 | #define xfs_qm_vop_chown_reserve(tp, ip, u, g, fl) (0) | 394 | #define xfs_qm_vop_chown_reserve(tp, ip, u, g, p, fl) (0) |
371 | #define xfs_qm_dqattach(ip, fl) (0) | 395 | #define xfs_qm_dqattach(ip, fl) (0) |
372 | #define xfs_qm_dqattach_locked(ip, fl) (0) | 396 | #define xfs_qm_dqattach_locked(ip, fl) (0) |
373 | #define xfs_qm_dqdetach(ip) | 397 | #define xfs_qm_dqdetach(ip) |
@@ -381,8 +405,8 @@ static inline int xfs_trans_reserve_quota_bydquots(struct xfs_trans *tp, | |||
381 | 405 | ||
382 | #define xfs_trans_unreserve_quota_nblks(tp, ip, nblks, ninos, flags) \ | 406 | #define xfs_trans_unreserve_quota_nblks(tp, ip, nblks, ninos, flags) \ |
383 | xfs_trans_reserve_quota_nblks(tp, ip, -(nblks), -(ninos), flags) | 407 | xfs_trans_reserve_quota_nblks(tp, ip, -(nblks), -(ninos), flags) |
384 | #define xfs_trans_reserve_quota(tp, mp, ud, gd, nb, ni, f) \ | 408 | #define xfs_trans_reserve_quota(tp, mp, ud, gd, pd, nb, ni, f) \ |
385 | xfs_trans_reserve_quota_bydquots(tp, mp, ud, gd, nb, ni, \ | 409 | xfs_trans_reserve_quota_bydquots(tp, mp, ud, gd, pd, nb, ni, \ |
386 | f | XFS_QMOPT_RES_REGBLKS) | 410 | f | XFS_QMOPT_RES_REGBLKS) |
387 | 411 | ||
388 | extern int xfs_qm_dqcheck(struct xfs_mount *, xfs_disk_dquot_t *, | 412 | extern int xfs_qm_dqcheck(struct xfs_mount *, xfs_disk_dquot_t *, |
diff --git a/fs/xfs/xfs_quotaops.c b/fs/xfs/xfs_quotaops.c index 71926d630527..20e30f93b0c7 100644 --- a/fs/xfs/xfs_quotaops.c +++ b/fs/xfs/xfs_quotaops.c | |||
@@ -75,8 +75,10 @@ xfs_fs_set_xstate( | |||
75 | flags |= XFS_GQUOTA_ACCT; | 75 | flags |= XFS_GQUOTA_ACCT; |
76 | if (uflags & FS_QUOTA_UDQ_ENFD) | 76 | if (uflags & FS_QUOTA_UDQ_ENFD) |
77 | flags |= XFS_UQUOTA_ENFD; | 77 | flags |= XFS_UQUOTA_ENFD; |
78 | if (uflags & (FS_QUOTA_PDQ_ENFD|FS_QUOTA_GDQ_ENFD)) | 78 | if (uflags & FS_QUOTA_GDQ_ENFD) |
79 | flags |= XFS_OQUOTA_ENFD; | 79 | flags |= XFS_GQUOTA_ENFD; |
80 | if (uflags & FS_QUOTA_PDQ_ENFD) | ||
81 | flags |= XFS_PQUOTA_ENFD; | ||
80 | 82 | ||
81 | switch (op) { | 83 | switch (op) { |
82 | case Q_XQUOTAON: | 84 | case Q_XQUOTAON: |
diff --git a/fs/xfs/xfs_sb.h b/fs/xfs/xfs_sb.h index 2de58a85833c..78f9e70b80c7 100644 --- a/fs/xfs/xfs_sb.h +++ b/fs/xfs/xfs_sb.h | |||
@@ -618,6 +618,12 @@ xfs_sb_has_incompat_log_feature( | |||
618 | return (sbp->sb_features_log_incompat & feature) != 0; | 618 | return (sbp->sb_features_log_incompat & feature) != 0; |
619 | } | 619 | } |
620 | 620 | ||
621 | static inline bool | ||
622 | xfs_is_quota_inode(struct xfs_sb *sbp, xfs_ino_t ino) | ||
623 | { | ||
624 | return (ino == sbp->sb_uquotino || ino == sbp->sb_gquotino); | ||
625 | } | ||
626 | |||
621 | /* | 627 | /* |
622 | * end of superblock version macros | 628 | * end of superblock version macros |
623 | */ | 629 | */ |
diff --git a/fs/xfs/xfs_super.c b/fs/xfs/xfs_super.c index 3033ba5e9762..1d68ffcdeaa7 100644 --- a/fs/xfs/xfs_super.c +++ b/fs/xfs/xfs_super.c | |||
@@ -51,6 +51,7 @@ | |||
51 | #include "xfs_inode_item.h" | 51 | #include "xfs_inode_item.h" |
52 | #include "xfs_icache.h" | 52 | #include "xfs_icache.h" |
53 | #include "xfs_trace.h" | 53 | #include "xfs_trace.h" |
54 | #include "xfs_icreate_item.h" | ||
54 | 55 | ||
55 | #include <linux/namei.h> | 56 | #include <linux/namei.h> |
56 | #include <linux/init.h> | 57 | #include <linux/init.h> |
@@ -359,17 +360,17 @@ xfs_parseargs( | |||
359 | } else if (!strcmp(this_char, MNTOPT_PQUOTA) || | 360 | } else if (!strcmp(this_char, MNTOPT_PQUOTA) || |
360 | !strcmp(this_char, MNTOPT_PRJQUOTA)) { | 361 | !strcmp(this_char, MNTOPT_PRJQUOTA)) { |
361 | mp->m_qflags |= (XFS_PQUOTA_ACCT | XFS_PQUOTA_ACTIVE | | 362 | mp->m_qflags |= (XFS_PQUOTA_ACCT | XFS_PQUOTA_ACTIVE | |
362 | XFS_OQUOTA_ENFD); | 363 | XFS_PQUOTA_ENFD); |
363 | } else if (!strcmp(this_char, MNTOPT_PQUOTANOENF)) { | 364 | } else if (!strcmp(this_char, MNTOPT_PQUOTANOENF)) { |
364 | mp->m_qflags |= (XFS_PQUOTA_ACCT | XFS_PQUOTA_ACTIVE); | 365 | mp->m_qflags |= (XFS_PQUOTA_ACCT | XFS_PQUOTA_ACTIVE); |
365 | mp->m_qflags &= ~XFS_OQUOTA_ENFD; | 366 | mp->m_qflags &= ~XFS_PQUOTA_ENFD; |
366 | } else if (!strcmp(this_char, MNTOPT_GQUOTA) || | 367 | } else if (!strcmp(this_char, MNTOPT_GQUOTA) || |
367 | !strcmp(this_char, MNTOPT_GRPQUOTA)) { | 368 | !strcmp(this_char, MNTOPT_GRPQUOTA)) { |
368 | mp->m_qflags |= (XFS_GQUOTA_ACCT | XFS_GQUOTA_ACTIVE | | 369 | mp->m_qflags |= (XFS_GQUOTA_ACCT | XFS_GQUOTA_ACTIVE | |
369 | XFS_OQUOTA_ENFD); | 370 | XFS_GQUOTA_ENFD); |
370 | } else if (!strcmp(this_char, MNTOPT_GQUOTANOENF)) { | 371 | } else if (!strcmp(this_char, MNTOPT_GQUOTANOENF)) { |
371 | mp->m_qflags |= (XFS_GQUOTA_ACCT | XFS_GQUOTA_ACTIVE); | 372 | mp->m_qflags |= (XFS_GQUOTA_ACCT | XFS_GQUOTA_ACTIVE); |
372 | mp->m_qflags &= ~XFS_OQUOTA_ENFD; | 373 | mp->m_qflags &= ~XFS_GQUOTA_ENFD; |
373 | } else if (!strcmp(this_char, MNTOPT_DELAYLOG)) { | 374 | } else if (!strcmp(this_char, MNTOPT_DELAYLOG)) { |
374 | xfs_warn(mp, | 375 | xfs_warn(mp, |
375 | "delaylog is the default now, option is deprecated."); | 376 | "delaylog is the default now, option is deprecated."); |
@@ -439,20 +440,15 @@ xfs_parseargs( | |||
439 | } | 440 | } |
440 | 441 | ||
441 | done: | 442 | done: |
442 | if (!(mp->m_flags & XFS_MOUNT_NOALIGN)) { | 443 | if (dsunit && !(mp->m_flags & XFS_MOUNT_NOALIGN)) { |
443 | /* | 444 | /* |
444 | * At this point the superblock has not been read | 445 | * At this point the superblock has not been read |
445 | * in, therefore we do not know the block size. | 446 | * in, therefore we do not know the block size. |
446 | * Before the mount call ends we will convert | 447 | * Before the mount call ends we will convert |
447 | * these to FSBs. | 448 | * these to FSBs. |
448 | */ | 449 | */ |
449 | if (dsunit) { | 450 | mp->m_dalign = dsunit; |
450 | mp->m_dalign = dsunit; | 451 | mp->m_swidth = dswidth; |
451 | mp->m_flags |= XFS_MOUNT_RETERR; | ||
452 | } | ||
453 | |||
454 | if (dswidth) | ||
455 | mp->m_swidth = dswidth; | ||
456 | } | 452 | } |
457 | 453 | ||
458 | if (mp->m_logbufs != -1 && | 454 | if (mp->m_logbufs != -1 && |
@@ -563,12 +559,12 @@ xfs_showargs( | |||
563 | /* Either project or group quotas can be active, not both */ | 559 | /* Either project or group quotas can be active, not both */ |
564 | 560 | ||
565 | if (mp->m_qflags & XFS_PQUOTA_ACCT) { | 561 | if (mp->m_qflags & XFS_PQUOTA_ACCT) { |
566 | if (mp->m_qflags & XFS_OQUOTA_ENFD) | 562 | if (mp->m_qflags & XFS_PQUOTA_ENFD) |
567 | seq_puts(m, "," MNTOPT_PRJQUOTA); | 563 | seq_puts(m, "," MNTOPT_PRJQUOTA); |
568 | else | 564 | else |
569 | seq_puts(m, "," MNTOPT_PQUOTANOENF); | 565 | seq_puts(m, "," MNTOPT_PQUOTANOENF); |
570 | } else if (mp->m_qflags & XFS_GQUOTA_ACCT) { | 566 | } else if (mp->m_qflags & XFS_GQUOTA_ACCT) { |
571 | if (mp->m_qflags & XFS_OQUOTA_ENFD) | 567 | if (mp->m_qflags & XFS_GQUOTA_ENFD) |
572 | seq_puts(m, "," MNTOPT_GRPQUOTA); | 568 | seq_puts(m, "," MNTOPT_GRPQUOTA); |
573 | else | 569 | else |
574 | seq_puts(m, "," MNTOPT_GQUOTANOENF); | 570 | seq_puts(m, "," MNTOPT_GQUOTANOENF); |
@@ -1136,8 +1132,8 @@ xfs_fs_statfs( | |||
1136 | spin_unlock(&mp->m_sb_lock); | 1132 | spin_unlock(&mp->m_sb_lock); |
1137 | 1133 | ||
1138 | if ((ip->i_d.di_flags & XFS_DIFLAG_PROJINHERIT) && | 1134 | if ((ip->i_d.di_flags & XFS_DIFLAG_PROJINHERIT) && |
1139 | ((mp->m_qflags & (XFS_PQUOTA_ACCT|XFS_OQUOTA_ENFD))) == | 1135 | ((mp->m_qflags & (XFS_PQUOTA_ACCT|XFS_PQUOTA_ENFD))) == |
1140 | (XFS_PQUOTA_ACCT|XFS_OQUOTA_ENFD)) | 1136 | (XFS_PQUOTA_ACCT|XFS_PQUOTA_ENFD)) |
1141 | xfs_qm_statvfs(ip, statp); | 1137 | xfs_qm_statvfs(ip, statp); |
1142 | return 0; | 1138 | return 0; |
1143 | } | 1139 | } |
@@ -1481,6 +1477,10 @@ xfs_fs_fill_super( | |||
1481 | sb->s_time_gran = 1; | 1477 | sb->s_time_gran = 1; |
1482 | set_posix_acl_flag(sb); | 1478 | set_posix_acl_flag(sb); |
1483 | 1479 | ||
1480 | /* version 5 superblocks support inode version counters. */ | ||
1481 | if (XFS_SB_VERSION_NUM(&mp->m_sb) == XFS_SB_VERSION_5) | ||
1482 | sb->s_flags |= MS_I_VERSION; | ||
1483 | |||
1484 | error = xfs_mountfs(mp); | 1484 | error = xfs_mountfs(mp); |
1485 | if (error) | 1485 | if (error) |
1486 | goto out_filestream_unmount; | 1486 | goto out_filestream_unmount; |
@@ -1655,9 +1655,15 @@ xfs_init_zones(void) | |||
1655 | KM_ZONE_SPREAD, NULL); | 1655 | KM_ZONE_SPREAD, NULL); |
1656 | if (!xfs_ili_zone) | 1656 | if (!xfs_ili_zone) |
1657 | goto out_destroy_inode_zone; | 1657 | goto out_destroy_inode_zone; |
1658 | xfs_icreate_zone = kmem_zone_init(sizeof(struct xfs_icreate_item), | ||
1659 | "xfs_icr"); | ||
1660 | if (!xfs_icreate_zone) | ||
1661 | goto out_destroy_ili_zone; | ||
1658 | 1662 | ||
1659 | return 0; | 1663 | return 0; |
1660 | 1664 | ||
1665 | out_destroy_ili_zone: | ||
1666 | kmem_zone_destroy(xfs_ili_zone); | ||
1661 | out_destroy_inode_zone: | 1667 | out_destroy_inode_zone: |
1662 | kmem_zone_destroy(xfs_inode_zone); | 1668 | kmem_zone_destroy(xfs_inode_zone); |
1663 | out_destroy_efi_zone: | 1669 | out_destroy_efi_zone: |
@@ -1696,6 +1702,7 @@ xfs_destroy_zones(void) | |||
1696 | * destroy caches. | 1702 | * destroy caches. |
1697 | */ | 1703 | */ |
1698 | rcu_barrier(); | 1704 | rcu_barrier(); |
1705 | kmem_zone_destroy(xfs_icreate_zone); | ||
1699 | kmem_zone_destroy(xfs_ili_zone); | 1706 | kmem_zone_destroy(xfs_ili_zone); |
1700 | kmem_zone_destroy(xfs_inode_zone); | 1707 | kmem_zone_destroy(xfs_inode_zone); |
1701 | kmem_zone_destroy(xfs_efi_zone); | 1708 | kmem_zone_destroy(xfs_efi_zone); |
diff --git a/fs/xfs/xfs_symlink.c b/fs/xfs/xfs_symlink.c index 195a403e1522..f4895b662fcb 100644 --- a/fs/xfs/xfs_symlink.c +++ b/fs/xfs/xfs_symlink.c | |||
@@ -358,7 +358,9 @@ xfs_symlink( | |||
358 | int n; | 358 | int n; |
359 | xfs_buf_t *bp; | 359 | xfs_buf_t *bp; |
360 | prid_t prid; | 360 | prid_t prid; |
361 | struct xfs_dquot *udqp, *gdqp; | 361 | struct xfs_dquot *udqp = NULL; |
362 | struct xfs_dquot *gdqp = NULL; | ||
363 | struct xfs_dquot *pdqp = NULL; | ||
362 | uint resblks; | 364 | uint resblks; |
363 | 365 | ||
364 | *ipp = NULL; | 366 | *ipp = NULL; |
@@ -385,7 +387,7 @@ xfs_symlink( | |||
385 | * Make sure that we have allocated dquot(s) on disk. | 387 | * Make sure that we have allocated dquot(s) on disk. |
386 | */ | 388 | */ |
387 | error = xfs_qm_vop_dqalloc(dp, current_fsuid(), current_fsgid(), prid, | 389 | error = xfs_qm_vop_dqalloc(dp, current_fsuid(), current_fsgid(), prid, |
388 | XFS_QMOPT_QUOTALL | XFS_QMOPT_INHERIT, &udqp, &gdqp); | 390 | XFS_QMOPT_QUOTALL | XFS_QMOPT_INHERIT, &udqp, &gdqp, &pdqp); |
389 | if (error) | 391 | if (error) |
390 | goto std_return; | 392 | goto std_return; |
391 | 393 | ||
@@ -426,7 +428,8 @@ xfs_symlink( | |||
426 | /* | 428 | /* |
427 | * Reserve disk quota : blocks and inode. | 429 | * Reserve disk quota : blocks and inode. |
428 | */ | 430 | */ |
429 | error = xfs_trans_reserve_quota(tp, mp, udqp, gdqp, resblks, 1, 0); | 431 | error = xfs_trans_reserve_quota(tp, mp, udqp, gdqp, |
432 | pdqp, resblks, 1, 0); | ||
430 | if (error) | 433 | if (error) |
431 | goto error_return; | 434 | goto error_return; |
432 | 435 | ||
@@ -464,7 +467,7 @@ xfs_symlink( | |||
464 | /* | 467 | /* |
465 | * Also attach the dquot(s) to it, if applicable. | 468 | * Also attach the dquot(s) to it, if applicable. |
466 | */ | 469 | */ |
467 | xfs_qm_vop_create_dqattach(tp, ip, udqp, gdqp); | 470 | xfs_qm_vop_create_dqattach(tp, ip, udqp, gdqp, pdqp); |
468 | 471 | ||
469 | if (resblks) | 472 | if (resblks) |
470 | resblks -= XFS_IALLOC_SPACE_RES(mp); | 473 | resblks -= XFS_IALLOC_SPACE_RES(mp); |
@@ -562,6 +565,7 @@ xfs_symlink( | |||
562 | error = xfs_trans_commit(tp, XFS_TRANS_RELEASE_LOG_RES); | 565 | error = xfs_trans_commit(tp, XFS_TRANS_RELEASE_LOG_RES); |
563 | xfs_qm_dqrele(udqp); | 566 | xfs_qm_dqrele(udqp); |
564 | xfs_qm_dqrele(gdqp); | 567 | xfs_qm_dqrele(gdqp); |
568 | xfs_qm_dqrele(pdqp); | ||
565 | 569 | ||
566 | *ipp = ip; | 570 | *ipp = ip; |
567 | return 0; | 571 | return 0; |
@@ -575,6 +579,7 @@ xfs_symlink( | |||
575 | xfs_trans_cancel(tp, cancel_flags); | 579 | xfs_trans_cancel(tp, cancel_flags); |
576 | xfs_qm_dqrele(udqp); | 580 | xfs_qm_dqrele(udqp); |
577 | xfs_qm_dqrele(gdqp); | 581 | xfs_qm_dqrele(gdqp); |
582 | xfs_qm_dqrele(pdqp); | ||
578 | 583 | ||
579 | if (unlock_dp_on_error) | 584 | if (unlock_dp_on_error) |
580 | xfs_iunlock(dp, XFS_ILOCK_EXCL); | 585 | xfs_iunlock(dp, XFS_ILOCK_EXCL); |
@@ -585,7 +590,7 @@ xfs_symlink( | |||
585 | /* | 590 | /* |
586 | * Free a symlink that has blocks associated with it. | 591 | * Free a symlink that has blocks associated with it. |
587 | */ | 592 | */ |
588 | int | 593 | STATIC int |
589 | xfs_inactive_symlink_rmt( | 594 | xfs_inactive_symlink_rmt( |
590 | xfs_inode_t *ip, | 595 | xfs_inode_t *ip, |
591 | xfs_trans_t **tpp) | 596 | xfs_trans_t **tpp) |
@@ -606,7 +611,7 @@ xfs_inactive_symlink_rmt( | |||
606 | 611 | ||
607 | tp = *tpp; | 612 | tp = *tpp; |
608 | mp = ip->i_mount; | 613 | mp = ip->i_mount; |
609 | ASSERT(ip->i_d.di_size > XFS_IFORK_DSIZE(ip)); | 614 | ASSERT(ip->i_df.if_flags & XFS_IFEXTENTS); |
610 | /* | 615 | /* |
611 | * We're freeing a symlink that has some | 616 | * We're freeing a symlink that has some |
612 | * blocks allocated to it. Free the | 617 | * blocks allocated to it. Free the |
@@ -720,3 +725,47 @@ xfs_inactive_symlink_rmt( | |||
720 | error0: | 725 | error0: |
721 | return error; | 726 | return error; |
722 | } | 727 | } |
728 | |||
729 | /* | ||
730 | * xfs_inactive_symlink - free a symlink | ||
731 | */ | ||
732 | int | ||
733 | xfs_inactive_symlink( | ||
734 | struct xfs_inode *ip, | ||
735 | struct xfs_trans **tp) | ||
736 | { | ||
737 | struct xfs_mount *mp = ip->i_mount; | ||
738 | int pathlen; | ||
739 | |||
740 | trace_xfs_inactive_symlink(ip); | ||
741 | |||
742 | ASSERT(xfs_isilocked(ip, XFS_ILOCK_EXCL)); | ||
743 | |||
744 | if (XFS_FORCED_SHUTDOWN(mp)) | ||
745 | return XFS_ERROR(EIO); | ||
746 | |||
747 | /* | ||
748 | * Zero length symlinks _can_ exist. | ||
749 | */ | ||
750 | pathlen = (int)ip->i_d.di_size; | ||
751 | if (!pathlen) | ||
752 | return 0; | ||
753 | |||
754 | if (pathlen < 0 || pathlen > MAXPATHLEN) { | ||
755 | xfs_alert(mp, "%s: inode (0x%llx) bad symlink length (%d)", | ||
756 | __func__, (unsigned long long)ip->i_ino, pathlen); | ||
757 | ASSERT(0); | ||
758 | return XFS_ERROR(EFSCORRUPTED); | ||
759 | } | ||
760 | |||
761 | if (ip->i_df.if_flags & XFS_IFINLINE) { | ||
762 | if (ip->i_df.if_bytes > 0) | ||
763 | xfs_idata_realloc(ip, -(ip->i_df.if_bytes), | ||
764 | XFS_DATA_FORK); | ||
765 | ASSERT(ip->i_df.if_bytes == 0); | ||
766 | return 0; | ||
767 | } | ||
768 | |||
769 | /* remove the remote symlink */ | ||
770 | return xfs_inactive_symlink_rmt(ip, tp); | ||
771 | } | ||
diff --git a/fs/xfs/xfs_symlink.h b/fs/xfs/xfs_symlink.h index b39398d2097c..374394880c01 100644 --- a/fs/xfs/xfs_symlink.h +++ b/fs/xfs/xfs_symlink.h | |||
@@ -60,7 +60,7 @@ extern const struct xfs_buf_ops xfs_symlink_buf_ops; | |||
60 | int xfs_symlink(struct xfs_inode *dp, struct xfs_name *link_name, | 60 | int xfs_symlink(struct xfs_inode *dp, struct xfs_name *link_name, |
61 | const char *target_path, umode_t mode, struct xfs_inode **ipp); | 61 | const char *target_path, umode_t mode, struct xfs_inode **ipp); |
62 | int xfs_readlink(struct xfs_inode *ip, char *link); | 62 | int xfs_readlink(struct xfs_inode *ip, char *link); |
63 | int xfs_inactive_symlink_rmt(struct xfs_inode *ip, struct xfs_trans **tpp); | 63 | int xfs_inactive_symlink(struct xfs_inode *ip, struct xfs_trans **tpp); |
64 | 64 | ||
65 | #endif /* __KERNEL__ */ | 65 | #endif /* __KERNEL__ */ |
66 | #endif /* __XFS_SYMLINK_H */ | 66 | #endif /* __XFS_SYMLINK_H */ |
diff --git a/fs/xfs/xfs_sysctl.c b/fs/xfs/xfs_sysctl.c index 2801b5ce6cdb..1743b9f8e23d 100644 --- a/fs/xfs/xfs_sysctl.c +++ b/fs/xfs/xfs_sysctl.c | |||
@@ -25,11 +25,11 @@ static struct ctl_table_header *xfs_table_header; | |||
25 | #ifdef CONFIG_PROC_FS | 25 | #ifdef CONFIG_PROC_FS |
26 | STATIC int | 26 | STATIC int |
27 | xfs_stats_clear_proc_handler( | 27 | xfs_stats_clear_proc_handler( |
28 | ctl_table *ctl, | 28 | struct ctl_table *ctl, |
29 | int write, | 29 | int write, |
30 | void __user *buffer, | 30 | void __user *buffer, |
31 | size_t *lenp, | 31 | size_t *lenp, |
32 | loff_t *ppos) | 32 | loff_t *ppos) |
33 | { | 33 | { |
34 | int c, ret, *valp = ctl->data; | 34 | int c, ret, *valp = ctl->data; |
35 | __uint32_t vn_active; | 35 | __uint32_t vn_active; |
@@ -55,11 +55,11 @@ xfs_stats_clear_proc_handler( | |||
55 | 55 | ||
56 | STATIC int | 56 | STATIC int |
57 | xfs_panic_mask_proc_handler( | 57 | xfs_panic_mask_proc_handler( |
58 | ctl_table *ctl, | 58 | struct ctl_table *ctl, |
59 | int write, | 59 | int write, |
60 | void __user *buffer, | 60 | void __user *buffer, |
61 | size_t *lenp, | 61 | size_t *lenp, |
62 | loff_t *ppos) | 62 | loff_t *ppos) |
63 | { | 63 | { |
64 | int ret, *valp = ctl->data; | 64 | int ret, *valp = ctl->data; |
65 | 65 | ||
@@ -74,7 +74,7 @@ xfs_panic_mask_proc_handler( | |||
74 | } | 74 | } |
75 | #endif /* CONFIG_PROC_FS */ | 75 | #endif /* CONFIG_PROC_FS */ |
76 | 76 | ||
77 | static ctl_table xfs_table[] = { | 77 | static struct ctl_table xfs_table[] = { |
78 | { | 78 | { |
79 | .procname = "irix_sgid_inherit", | 79 | .procname = "irix_sgid_inherit", |
80 | .data = &xfs_params.sgid_inherit.val, | 80 | .data = &xfs_params.sgid_inherit.val, |
@@ -227,7 +227,7 @@ static ctl_table xfs_table[] = { | |||
227 | {} | 227 | {} |
228 | }; | 228 | }; |
229 | 229 | ||
230 | static ctl_table xfs_dir_table[] = { | 230 | static struct ctl_table xfs_dir_table[] = { |
231 | { | 231 | { |
232 | .procname = "xfs", | 232 | .procname = "xfs", |
233 | .mode = 0555, | 233 | .mode = 0555, |
@@ -236,7 +236,7 @@ static ctl_table xfs_dir_table[] = { | |||
236 | {} | 236 | {} |
237 | }; | 237 | }; |
238 | 238 | ||
239 | static ctl_table xfs_root_table[] = { | 239 | static struct ctl_table xfs_root_table[] = { |
240 | { | 240 | { |
241 | .procname = "fs", | 241 | .procname = "fs", |
242 | .mode = 0555, | 242 | .mode = 0555, |
diff --git a/fs/xfs/xfs_trace.h b/fs/xfs/xfs_trace.h index aa4db3307d36..47910e638c18 100644 --- a/fs/xfs/xfs_trace.h +++ b/fs/xfs/xfs_trace.h | |||
@@ -486,9 +486,12 @@ DEFINE_EVENT(xfs_buf_item_class, name, \ | |||
486 | TP_PROTO(struct xfs_buf_log_item *bip), \ | 486 | TP_PROTO(struct xfs_buf_log_item *bip), \ |
487 | TP_ARGS(bip)) | 487 | TP_ARGS(bip)) |
488 | DEFINE_BUF_ITEM_EVENT(xfs_buf_item_size); | 488 | DEFINE_BUF_ITEM_EVENT(xfs_buf_item_size); |
489 | DEFINE_BUF_ITEM_EVENT(xfs_buf_item_size_ordered); | ||
489 | DEFINE_BUF_ITEM_EVENT(xfs_buf_item_size_stale); | 490 | DEFINE_BUF_ITEM_EVENT(xfs_buf_item_size_stale); |
490 | DEFINE_BUF_ITEM_EVENT(xfs_buf_item_format); | 491 | DEFINE_BUF_ITEM_EVENT(xfs_buf_item_format); |
492 | DEFINE_BUF_ITEM_EVENT(xfs_buf_item_format_ordered); | ||
491 | DEFINE_BUF_ITEM_EVENT(xfs_buf_item_format_stale); | 493 | DEFINE_BUF_ITEM_EVENT(xfs_buf_item_format_stale); |
494 | DEFINE_BUF_ITEM_EVENT(xfs_buf_item_ordered); | ||
492 | DEFINE_BUF_ITEM_EVENT(xfs_buf_item_pin); | 495 | DEFINE_BUF_ITEM_EVENT(xfs_buf_item_pin); |
493 | DEFINE_BUF_ITEM_EVENT(xfs_buf_item_unpin); | 496 | DEFINE_BUF_ITEM_EVENT(xfs_buf_item_unpin); |
494 | DEFINE_BUF_ITEM_EVENT(xfs_buf_item_unpin_stale); | 497 | DEFINE_BUF_ITEM_EVENT(xfs_buf_item_unpin_stale); |
@@ -508,6 +511,7 @@ DEFINE_BUF_ITEM_EVENT(xfs_trans_bjoin); | |||
508 | DEFINE_BUF_ITEM_EVENT(xfs_trans_bhold); | 511 | DEFINE_BUF_ITEM_EVENT(xfs_trans_bhold); |
509 | DEFINE_BUF_ITEM_EVENT(xfs_trans_bhold_release); | 512 | DEFINE_BUF_ITEM_EVENT(xfs_trans_bhold_release); |
510 | DEFINE_BUF_ITEM_EVENT(xfs_trans_binval); | 513 | DEFINE_BUF_ITEM_EVENT(xfs_trans_binval); |
514 | DEFINE_BUF_ITEM_EVENT(xfs_trans_buf_ordered); | ||
511 | 515 | ||
512 | DECLARE_EVENT_CLASS(xfs_lock_class, | 516 | DECLARE_EVENT_CLASS(xfs_lock_class, |
513 | TP_PROTO(struct xfs_inode *ip, unsigned lock_flags, | 517 | TP_PROTO(struct xfs_inode *ip, unsigned lock_flags, |
@@ -571,6 +575,7 @@ DEFINE_INODE_EVENT(xfs_iget_miss); | |||
571 | DEFINE_INODE_EVENT(xfs_getattr); | 575 | DEFINE_INODE_EVENT(xfs_getattr); |
572 | DEFINE_INODE_EVENT(xfs_setattr); | 576 | DEFINE_INODE_EVENT(xfs_setattr); |
573 | DEFINE_INODE_EVENT(xfs_readlink); | 577 | DEFINE_INODE_EVENT(xfs_readlink); |
578 | DEFINE_INODE_EVENT(xfs_inactive_symlink); | ||
574 | DEFINE_INODE_EVENT(xfs_alloc_file_space); | 579 | DEFINE_INODE_EVENT(xfs_alloc_file_space); |
575 | DEFINE_INODE_EVENT(xfs_free_file_space); | 580 | DEFINE_INODE_EVENT(xfs_free_file_space); |
576 | DEFINE_INODE_EVENT(xfs_readdir); | 581 | DEFINE_INODE_EVENT(xfs_readdir); |
@@ -974,14 +979,16 @@ DEFINE_RW_EVENT(xfs_file_splice_read); | |||
974 | DEFINE_RW_EVENT(xfs_file_splice_write); | 979 | DEFINE_RW_EVENT(xfs_file_splice_write); |
975 | 980 | ||
976 | DECLARE_EVENT_CLASS(xfs_page_class, | 981 | DECLARE_EVENT_CLASS(xfs_page_class, |
977 | TP_PROTO(struct inode *inode, struct page *page, unsigned long off), | 982 | TP_PROTO(struct inode *inode, struct page *page, unsigned long off, |
978 | TP_ARGS(inode, page, off), | 983 | unsigned int len), |
984 | TP_ARGS(inode, page, off, len), | ||
979 | TP_STRUCT__entry( | 985 | TP_STRUCT__entry( |
980 | __field(dev_t, dev) | 986 | __field(dev_t, dev) |
981 | __field(xfs_ino_t, ino) | 987 | __field(xfs_ino_t, ino) |
982 | __field(pgoff_t, pgoff) | 988 | __field(pgoff_t, pgoff) |
983 | __field(loff_t, size) | 989 | __field(loff_t, size) |
984 | __field(unsigned long, offset) | 990 | __field(unsigned long, offset) |
991 | __field(unsigned int, length) | ||
985 | __field(int, delalloc) | 992 | __field(int, delalloc) |
986 | __field(int, unwritten) | 993 | __field(int, unwritten) |
987 | ), | 994 | ), |
@@ -995,24 +1002,27 @@ DECLARE_EVENT_CLASS(xfs_page_class, | |||
995 | __entry->pgoff = page_offset(page); | 1002 | __entry->pgoff = page_offset(page); |
996 | __entry->size = i_size_read(inode); | 1003 | __entry->size = i_size_read(inode); |
997 | __entry->offset = off; | 1004 | __entry->offset = off; |
1005 | __entry->length = len; | ||
998 | __entry->delalloc = delalloc; | 1006 | __entry->delalloc = delalloc; |
999 | __entry->unwritten = unwritten; | 1007 | __entry->unwritten = unwritten; |
1000 | ), | 1008 | ), |
1001 | TP_printk("dev %d:%d ino 0x%llx pgoff 0x%lx size 0x%llx offset %lx " | 1009 | TP_printk("dev %d:%d ino 0x%llx pgoff 0x%lx size 0x%llx offset %lx " |
1002 | "delalloc %d unwritten %d", | 1010 | "length %x delalloc %d unwritten %d", |
1003 | MAJOR(__entry->dev), MINOR(__entry->dev), | 1011 | MAJOR(__entry->dev), MINOR(__entry->dev), |
1004 | __entry->ino, | 1012 | __entry->ino, |
1005 | __entry->pgoff, | 1013 | __entry->pgoff, |
1006 | __entry->size, | 1014 | __entry->size, |
1007 | __entry->offset, | 1015 | __entry->offset, |
1016 | __entry->length, | ||
1008 | __entry->delalloc, | 1017 | __entry->delalloc, |
1009 | __entry->unwritten) | 1018 | __entry->unwritten) |
1010 | ) | 1019 | ) |
1011 | 1020 | ||
1012 | #define DEFINE_PAGE_EVENT(name) \ | 1021 | #define DEFINE_PAGE_EVENT(name) \ |
1013 | DEFINE_EVENT(xfs_page_class, name, \ | 1022 | DEFINE_EVENT(xfs_page_class, name, \ |
1014 | TP_PROTO(struct inode *inode, struct page *page, unsigned long off), \ | 1023 | TP_PROTO(struct inode *inode, struct page *page, unsigned long off, \ |
1015 | TP_ARGS(inode, page, off)) | 1024 | unsigned int len), \ |
1025 | TP_ARGS(inode, page, off, len)) | ||
1016 | DEFINE_PAGE_EVENT(xfs_writepage); | 1026 | DEFINE_PAGE_EVENT(xfs_writepage); |
1017 | DEFINE_PAGE_EVENT(xfs_releasepage); | 1027 | DEFINE_PAGE_EVENT(xfs_releasepage); |
1018 | DEFINE_PAGE_EVENT(xfs_invalidatepage); | 1028 | DEFINE_PAGE_EVENT(xfs_invalidatepage); |
diff --git a/fs/xfs/xfs_trans.c b/fs/xfs/xfs_trans.c index 2fd7c1ff1d21..35a229981354 100644 --- a/fs/xfs/xfs_trans.c +++ b/fs/xfs/xfs_trans.c | |||
@@ -234,71 +234,93 @@ xfs_calc_remove_reservation( | |||
234 | } | 234 | } |
235 | 235 | ||
236 | /* | 236 | /* |
237 | * For symlink we can modify: | 237 | * For create, break it in to the two cases that the transaction |
238 | * covers. We start with the modify case - allocation done by modification | ||
239 | * of the state of existing inodes - and the allocation case. | ||
240 | */ | ||
241 | |||
242 | /* | ||
243 | * For create we can modify: | ||
238 | * the parent directory inode: inode size | 244 | * the parent directory inode: inode size |
239 | * the new inode: inode size | 245 | * the new inode: inode size |
240 | * the inode btree entry: 1 block | 246 | * the inode btree entry: block size |
247 | * the superblock for the nlink flag: sector size | ||
241 | * the directory btree: (max depth + v2) * dir block size | 248 | * the directory btree: (max depth + v2) * dir block size |
242 | * the directory inode's bmap btree: (max depth + v2) * block size | 249 | * the directory inode's bmap btree: (max depth + v2) * block size |
243 | * the blocks for the symlink: 1 kB | 250 | */ |
244 | * Or in the first xact we allocate some inodes giving: | 251 | STATIC uint |
252 | xfs_calc_create_resv_modify( | ||
253 | struct xfs_mount *mp) | ||
254 | { | ||
255 | return xfs_calc_buf_res(2, mp->m_sb.sb_inodesize) + | ||
256 | xfs_calc_buf_res(1, mp->m_sb.sb_sectsize) + | ||
257 | (uint)XFS_FSB_TO_B(mp, 1) + | ||
258 | xfs_calc_buf_res(XFS_DIROP_LOG_COUNT(mp), XFS_FSB_TO_B(mp, 1)); | ||
259 | } | ||
260 | |||
261 | /* | ||
262 | * For create we can allocate some inodes giving: | ||
245 | * the agi and agf of the ag getting the new inodes: 2 * sectorsize | 263 | * the agi and agf of the ag getting the new inodes: 2 * sectorsize |
264 | * the superblock for the nlink flag: sector size | ||
246 | * the inode blocks allocated: XFS_IALLOC_BLOCKS * blocksize | 265 | * the inode blocks allocated: XFS_IALLOC_BLOCKS * blocksize |
247 | * the inode btree: max depth * blocksize | 266 | * the inode btree: max depth * blocksize |
248 | * the allocation btrees: 2 trees * (2 * max depth - 1) * block size | 267 | * the allocation btrees: 2 trees * (max depth - 1) * block size |
249 | */ | 268 | */ |
250 | STATIC uint | 269 | STATIC uint |
251 | xfs_calc_symlink_reservation( | 270 | xfs_calc_create_resv_alloc( |
271 | struct xfs_mount *mp) | ||
272 | { | ||
273 | return xfs_calc_buf_res(2, mp->m_sb.sb_sectsize) + | ||
274 | mp->m_sb.sb_sectsize + | ||
275 | xfs_calc_buf_res(XFS_IALLOC_BLOCKS(mp), XFS_FSB_TO_B(mp, 1)) + | ||
276 | xfs_calc_buf_res(mp->m_in_maxlevels, XFS_FSB_TO_B(mp, 1)) + | ||
277 | xfs_calc_buf_res(XFS_ALLOCFREE_LOG_COUNT(mp, 1), | ||
278 | XFS_FSB_TO_B(mp, 1)); | ||
279 | } | ||
280 | |||
281 | STATIC uint | ||
282 | __xfs_calc_create_reservation( | ||
252 | struct xfs_mount *mp) | 283 | struct xfs_mount *mp) |
253 | { | 284 | { |
254 | return XFS_DQUOT_LOGRES(mp) + | 285 | return XFS_DQUOT_LOGRES(mp) + |
255 | MAX((xfs_calc_buf_res(2, mp->m_sb.sb_inodesize) + | 286 | MAX(xfs_calc_create_resv_alloc(mp), |
256 | xfs_calc_buf_res(1, XFS_FSB_TO_B(mp, 1)) + | 287 | xfs_calc_create_resv_modify(mp)); |
257 | xfs_calc_buf_res(XFS_DIROP_LOG_COUNT(mp), | ||
258 | XFS_FSB_TO_B(mp, 1)) + | ||
259 | xfs_calc_buf_res(1, 1024)), | ||
260 | (xfs_calc_buf_res(2, mp->m_sb.sb_sectsize) + | ||
261 | xfs_calc_buf_res(XFS_IALLOC_BLOCKS(mp), | ||
262 | XFS_FSB_TO_B(mp, 1)) + | ||
263 | xfs_calc_buf_res(mp->m_in_maxlevels, | ||
264 | XFS_FSB_TO_B(mp, 1)) + | ||
265 | xfs_calc_buf_res(XFS_ALLOCFREE_LOG_COUNT(mp, 1), | ||
266 | XFS_FSB_TO_B(mp, 1)))); | ||
267 | } | 288 | } |
268 | 289 | ||
269 | /* | 290 | /* |
270 | * For create we can modify: | 291 | * For icreate we can allocate some inodes giving: |
271 | * the parent directory inode: inode size | ||
272 | * the new inode: inode size | ||
273 | * the inode btree entry: block size | ||
274 | * the superblock for the nlink flag: sector size | ||
275 | * the directory btree: (max depth + v2) * dir block size | ||
276 | * the directory inode's bmap btree: (max depth + v2) * block size | ||
277 | * Or in the first xact we allocate some inodes giving: | ||
278 | * the agi and agf of the ag getting the new inodes: 2 * sectorsize | 292 | * the agi and agf of the ag getting the new inodes: 2 * sectorsize |
279 | * the superblock for the nlink flag: sector size | 293 | * the superblock for the nlink flag: sector size |
280 | * the inode blocks allocated: XFS_IALLOC_BLOCKS * blocksize | ||
281 | * the inode btree: max depth * blocksize | 294 | * the inode btree: max depth * blocksize |
282 | * the allocation btrees: 2 trees * (max depth - 1) * block size | 295 | * the allocation btrees: 2 trees * (max depth - 1) * block size |
283 | */ | 296 | */ |
284 | STATIC uint | 297 | STATIC uint |
285 | xfs_calc_create_reservation( | 298 | xfs_calc_icreate_resv_alloc( |
286 | struct xfs_mount *mp) | 299 | struct xfs_mount *mp) |
287 | { | 300 | { |
301 | return xfs_calc_buf_res(2, mp->m_sb.sb_sectsize) + | ||
302 | mp->m_sb.sb_sectsize + | ||
303 | xfs_calc_buf_res(mp->m_in_maxlevels, XFS_FSB_TO_B(mp, 1)) + | ||
304 | xfs_calc_buf_res(XFS_ALLOCFREE_LOG_COUNT(mp, 1), | ||
305 | XFS_FSB_TO_B(mp, 1)); | ||
306 | } | ||
307 | |||
308 | STATIC uint | ||
309 | xfs_calc_icreate_reservation(xfs_mount_t *mp) | ||
310 | { | ||
288 | return XFS_DQUOT_LOGRES(mp) + | 311 | return XFS_DQUOT_LOGRES(mp) + |
289 | MAX((xfs_calc_buf_res(2, mp->m_sb.sb_inodesize) + | 312 | MAX(xfs_calc_icreate_resv_alloc(mp), |
290 | xfs_calc_buf_res(1, mp->m_sb.sb_sectsize) + | 313 | xfs_calc_create_resv_modify(mp)); |
291 | (uint)XFS_FSB_TO_B(mp, 1) + | 314 | } |
292 | xfs_calc_buf_res(XFS_DIROP_LOG_COUNT(mp), | 315 | |
293 | XFS_FSB_TO_B(mp, 1))), | 316 | STATIC uint |
294 | (xfs_calc_buf_res(2, mp->m_sb.sb_sectsize) + | 317 | xfs_calc_create_reservation( |
295 | mp->m_sb.sb_sectsize + | 318 | struct xfs_mount *mp) |
296 | xfs_calc_buf_res(XFS_IALLOC_BLOCKS(mp), | 319 | { |
297 | XFS_FSB_TO_B(mp, 1)) + | 320 | if (xfs_sb_version_hascrc(&mp->m_sb)) |
298 | xfs_calc_buf_res(mp->m_in_maxlevels, | 321 | return xfs_calc_icreate_reservation(mp); |
299 | XFS_FSB_TO_B(mp, 1)) + | 322 | return __xfs_calc_create_reservation(mp); |
300 | xfs_calc_buf_res(XFS_ALLOCFREE_LOG_COUNT(mp, 1), | 323 | |
301 | XFS_FSB_TO_B(mp, 1)))); | ||
302 | } | 324 | } |
303 | 325 | ||
304 | /* | 326 | /* |
@@ -311,6 +333,20 @@ xfs_calc_mkdir_reservation( | |||
311 | return xfs_calc_create_reservation(mp); | 333 | return xfs_calc_create_reservation(mp); |
312 | } | 334 | } |
313 | 335 | ||
336 | |||
337 | /* | ||
338 | * Making a new symplink is the same as creating a new file, but | ||
339 | * with the added blocks for remote symlink data which can be up to 1kB in | ||
340 | * length (MAXPATHLEN). | ||
341 | */ | ||
342 | STATIC uint | ||
343 | xfs_calc_symlink_reservation( | ||
344 | struct xfs_mount *mp) | ||
345 | { | ||
346 | return xfs_calc_create_reservation(mp) + | ||
347 | xfs_calc_buf_res(1, MAXPATHLEN); | ||
348 | } | ||
349 | |||
314 | /* | 350 | /* |
315 | * In freeing an inode we can modify: | 351 | * In freeing an inode we can modify: |
316 | * the inode being freed: inode size | 352 | * the inode being freed: inode size |
diff --git a/fs/xfs/xfs_trans.h b/fs/xfs/xfs_trans.h index a44dba5b2cdb..2b4946393e30 100644 --- a/fs/xfs/xfs_trans.h +++ b/fs/xfs/xfs_trans.h | |||
@@ -48,6 +48,7 @@ typedef struct xfs_trans_header { | |||
48 | #define XFS_LI_BUF 0x123c /* v2 bufs, variable sized inode bufs */ | 48 | #define XFS_LI_BUF 0x123c /* v2 bufs, variable sized inode bufs */ |
49 | #define XFS_LI_DQUOT 0x123d | 49 | #define XFS_LI_DQUOT 0x123d |
50 | #define XFS_LI_QUOTAOFF 0x123e | 50 | #define XFS_LI_QUOTAOFF 0x123e |
51 | #define XFS_LI_ICREATE 0x123f | ||
51 | 52 | ||
52 | #define XFS_LI_TYPE_DESC \ | 53 | #define XFS_LI_TYPE_DESC \ |
53 | { XFS_LI_EFI, "XFS_LI_EFI" }, \ | 54 | { XFS_LI_EFI, "XFS_LI_EFI" }, \ |
@@ -107,7 +108,8 @@ typedef struct xfs_trans_header { | |||
107 | #define XFS_TRANS_SWAPEXT 40 | 108 | #define XFS_TRANS_SWAPEXT 40 |
108 | #define XFS_TRANS_SB_COUNT 41 | 109 | #define XFS_TRANS_SB_COUNT 41 |
109 | #define XFS_TRANS_CHECKPOINT 42 | 110 | #define XFS_TRANS_CHECKPOINT 42 |
110 | #define XFS_TRANS_TYPE_MAX 42 | 111 | #define XFS_TRANS_ICREATE 43 |
112 | #define XFS_TRANS_TYPE_MAX 43 | ||
111 | /* new transaction types need to be reflected in xfs_logprint(8) */ | 113 | /* new transaction types need to be reflected in xfs_logprint(8) */ |
112 | 114 | ||
113 | #define XFS_TRANS_TYPES \ | 115 | #define XFS_TRANS_TYPES \ |
@@ -210,23 +212,18 @@ struct xfs_log_item_desc { | |||
210 | /* | 212 | /* |
211 | * Per-extent log reservation for the allocation btree changes | 213 | * Per-extent log reservation for the allocation btree changes |
212 | * involved in freeing or allocating an extent. | 214 | * involved in freeing or allocating an extent. |
213 | * 2 trees * (2 blocks/level * max depth - 1) * block size | 215 | * 2 trees * (2 blocks/level * max depth - 1) |
214 | */ | 216 | */ |
215 | #define XFS_ALLOCFREE_LOG_RES(mp,nx) \ | ||
216 | ((nx) * (2 * XFS_FSB_TO_B((mp), 2 * XFS_AG_MAXLEVELS(mp) - 1))) | ||
217 | #define XFS_ALLOCFREE_LOG_COUNT(mp,nx) \ | 217 | #define XFS_ALLOCFREE_LOG_COUNT(mp,nx) \ |
218 | ((nx) * (2 * (2 * XFS_AG_MAXLEVELS(mp) - 1))) | 218 | ((nx) * (2 * (2 * XFS_AG_MAXLEVELS(mp) - 1))) |
219 | 219 | ||
220 | /* | 220 | /* |
221 | * Per-directory log reservation for any directory change. | 221 | * Per-directory log reservation for any directory change. |
222 | * dir blocks: (1 btree block per level + data block + free block) * dblock size | 222 | * dir blocks: (1 btree block per level + data block + free block) |
223 | * bmap btree: (levels + 2) * max depth * block size | 223 | * bmap btree: (levels + 2) * max depth |
224 | * v2 directory blocks can be fragmented below the dirblksize down to the fsb | 224 | * v2 directory blocks can be fragmented below the dirblksize down to the fsb |
225 | * size, so account for that in the DAENTER macros. | 225 | * size, so account for that in the DAENTER macros. |
226 | */ | 226 | */ |
227 | #define XFS_DIROP_LOG_RES(mp) \ | ||
228 | (XFS_FSB_TO_B(mp, XFS_DAENTER_BLOCKS(mp, XFS_DATA_FORK)) + \ | ||
229 | (XFS_FSB_TO_B(mp, XFS_DAENTER_BMAPS(mp, XFS_DATA_FORK) + 1))) | ||
230 | #define XFS_DIROP_LOG_COUNT(mp) \ | 227 | #define XFS_DIROP_LOG_COUNT(mp) \ |
231 | (XFS_DAENTER_BLOCKS(mp, XFS_DATA_FORK) + \ | 228 | (XFS_DAENTER_BLOCKS(mp, XFS_DATA_FORK) + \ |
232 | XFS_DAENTER_BMAPS(mp, XFS_DATA_FORK) + 1) | 229 | XFS_DAENTER_BMAPS(mp, XFS_DATA_FORK) + 1) |
@@ -503,6 +500,7 @@ void xfs_trans_bhold_release(xfs_trans_t *, struct xfs_buf *); | |||
503 | void xfs_trans_binval(xfs_trans_t *, struct xfs_buf *); | 500 | void xfs_trans_binval(xfs_trans_t *, struct xfs_buf *); |
504 | void xfs_trans_inode_buf(xfs_trans_t *, struct xfs_buf *); | 501 | void xfs_trans_inode_buf(xfs_trans_t *, struct xfs_buf *); |
505 | void xfs_trans_stale_inode_buf(xfs_trans_t *, struct xfs_buf *); | 502 | void xfs_trans_stale_inode_buf(xfs_trans_t *, struct xfs_buf *); |
503 | void xfs_trans_ordered_buf(xfs_trans_t *, struct xfs_buf *); | ||
506 | void xfs_trans_dquot_buf(xfs_trans_t *, struct xfs_buf *, uint); | 504 | void xfs_trans_dquot_buf(xfs_trans_t *, struct xfs_buf *, uint); |
507 | void xfs_trans_inode_alloc_buf(xfs_trans_t *, struct xfs_buf *); | 505 | void xfs_trans_inode_alloc_buf(xfs_trans_t *, struct xfs_buf *); |
508 | void xfs_trans_ichgtime(struct xfs_trans *, struct xfs_inode *, int); | 506 | void xfs_trans_ichgtime(struct xfs_trans *, struct xfs_inode *, int); |
diff --git a/fs/xfs/xfs_trans_buf.c b/fs/xfs/xfs_trans_buf.c index 73a5fa457e16..aa5a04b844d6 100644 --- a/fs/xfs/xfs_trans_buf.c +++ b/fs/xfs/xfs_trans_buf.c | |||
@@ -397,7 +397,6 @@ shutdown_abort: | |||
397 | return XFS_ERROR(EIO); | 397 | return XFS_ERROR(EIO); |
398 | } | 398 | } |
399 | 399 | ||
400 | |||
401 | /* | 400 | /* |
402 | * Release the buffer bp which was previously acquired with one of the | 401 | * Release the buffer bp which was previously acquired with one of the |
403 | * xfs_trans_... buffer allocation routines if the buffer has not | 402 | * xfs_trans_... buffer allocation routines if the buffer has not |
@@ -603,8 +602,14 @@ xfs_trans_log_buf(xfs_trans_t *tp, | |||
603 | 602 | ||
604 | tp->t_flags |= XFS_TRANS_DIRTY; | 603 | tp->t_flags |= XFS_TRANS_DIRTY; |
605 | bip->bli_item.li_desc->lid_flags |= XFS_LID_DIRTY; | 604 | bip->bli_item.li_desc->lid_flags |= XFS_LID_DIRTY; |
606 | bip->bli_flags |= XFS_BLI_LOGGED; | 605 | |
607 | xfs_buf_item_log(bip, first, last); | 606 | /* |
607 | * If we have an ordered buffer we are not logging any dirty range but | ||
608 | * it still needs to be marked dirty and that it has been logged. | ||
609 | */ | ||
610 | bip->bli_flags |= XFS_BLI_DIRTY | XFS_BLI_LOGGED; | ||
611 | if (!(bip->bli_flags & XFS_BLI_ORDERED)) | ||
612 | xfs_buf_item_log(bip, first, last); | ||
608 | } | 613 | } |
609 | 614 | ||
610 | 615 | ||
@@ -757,6 +762,29 @@ xfs_trans_inode_alloc_buf( | |||
757 | } | 762 | } |
758 | 763 | ||
759 | /* | 764 | /* |
765 | * Mark the buffer as ordered for this transaction. This means | ||
766 | * that the contents of the buffer are not recorded in the transaction | ||
767 | * but it is tracked in the AIL as though it was. This allows us | ||
768 | * to record logical changes in transactions rather than the physical | ||
769 | * changes we make to the buffer without changing writeback ordering | ||
770 | * constraints of metadata buffers. | ||
771 | */ | ||
772 | void | ||
773 | xfs_trans_ordered_buf( | ||
774 | struct xfs_trans *tp, | ||
775 | struct xfs_buf *bp) | ||
776 | { | ||
777 | struct xfs_buf_log_item *bip = bp->b_fspriv; | ||
778 | |||
779 | ASSERT(bp->b_transp == tp); | ||
780 | ASSERT(bip != NULL); | ||
781 | ASSERT(atomic_read(&bip->bli_refcount) > 0); | ||
782 | |||
783 | bip->bli_flags |= XFS_BLI_ORDERED; | ||
784 | trace_xfs_buf_item_ordered(bip); | ||
785 | } | ||
786 | |||
787 | /* | ||
760 | * Set the type of the buffer for log recovery so that it can correctly identify | 788 | * Set the type of the buffer for log recovery so that it can correctly identify |
761 | * and hence attach the correct buffer ops to the buffer after replay. | 789 | * and hence attach the correct buffer ops to the buffer after replay. |
762 | */ | 790 | */ |
diff --git a/fs/xfs/xfs_trans_dquot.c b/fs/xfs/xfs_trans_dquot.c index fec75d023703..61407a847b86 100644 --- a/fs/xfs/xfs_trans_dquot.c +++ b/fs/xfs/xfs_trans_dquot.c | |||
@@ -103,8 +103,6 @@ xfs_trans_dup_dqinfo( | |||
103 | return; | 103 | return; |
104 | 104 | ||
105 | xfs_trans_alloc_dqinfo(ntp); | 105 | xfs_trans_alloc_dqinfo(ntp); |
106 | oqa = otp->t_dqinfo->dqa_usrdquots; | ||
107 | nqa = ntp->t_dqinfo->dqa_usrdquots; | ||
108 | 106 | ||
109 | /* | 107 | /* |
110 | * Because the quota blk reservation is carried forward, | 108 | * Because the quota blk reservation is carried forward, |
@@ -113,7 +111,9 @@ xfs_trans_dup_dqinfo( | |||
113 | if(otp->t_flags & XFS_TRANS_DQ_DIRTY) | 111 | if(otp->t_flags & XFS_TRANS_DQ_DIRTY) |
114 | ntp->t_flags |= XFS_TRANS_DQ_DIRTY; | 112 | ntp->t_flags |= XFS_TRANS_DQ_DIRTY; |
115 | 113 | ||
116 | for (j = 0; j < 2; j++) { | 114 | for (j = 0; j < XFS_QM_TRANS_DQTYPES; j++) { |
115 | oqa = otp->t_dqinfo->dqs[j]; | ||
116 | nqa = ntp->t_dqinfo->dqs[j]; | ||
117 | for (i = 0; i < XFS_QM_TRANS_MAXDQS; i++) { | 117 | for (i = 0; i < XFS_QM_TRANS_MAXDQS; i++) { |
118 | if (oqa[i].qt_dquot == NULL) | 118 | if (oqa[i].qt_dquot == NULL) |
119 | break; | 119 | break; |
@@ -138,8 +138,6 @@ xfs_trans_dup_dqinfo( | |||
138 | oq->qt_ino_res = oq->qt_ino_res_used; | 138 | oq->qt_ino_res = oq->qt_ino_res_used; |
139 | 139 | ||
140 | } | 140 | } |
141 | oqa = otp->t_dqinfo->dqa_grpdquots; | ||
142 | nqa = ntp->t_dqinfo->dqa_grpdquots; | ||
143 | } | 141 | } |
144 | } | 142 | } |
145 | 143 | ||
@@ -157,8 +155,7 @@ xfs_trans_mod_dquot_byino( | |||
157 | 155 | ||
158 | if (!XFS_IS_QUOTA_RUNNING(mp) || | 156 | if (!XFS_IS_QUOTA_RUNNING(mp) || |
159 | !XFS_IS_QUOTA_ON(mp) || | 157 | !XFS_IS_QUOTA_ON(mp) || |
160 | ip->i_ino == mp->m_sb.sb_uquotino || | 158 | xfs_is_quota_inode(&mp->m_sb, ip->i_ino)) |
161 | ip->i_ino == mp->m_sb.sb_gquotino) | ||
162 | return; | 159 | return; |
163 | 160 | ||
164 | if (tp->t_dqinfo == NULL) | 161 | if (tp->t_dqinfo == NULL) |
@@ -166,20 +163,28 @@ xfs_trans_mod_dquot_byino( | |||
166 | 163 | ||
167 | if (XFS_IS_UQUOTA_ON(mp) && ip->i_udquot) | 164 | if (XFS_IS_UQUOTA_ON(mp) && ip->i_udquot) |
168 | (void) xfs_trans_mod_dquot(tp, ip->i_udquot, field, delta); | 165 | (void) xfs_trans_mod_dquot(tp, ip->i_udquot, field, delta); |
169 | if (XFS_IS_OQUOTA_ON(mp) && ip->i_gdquot) | 166 | if (XFS_IS_GQUOTA_ON(mp) && ip->i_gdquot) |
170 | (void) xfs_trans_mod_dquot(tp, ip->i_gdquot, field, delta); | 167 | (void) xfs_trans_mod_dquot(tp, ip->i_gdquot, field, delta); |
168 | if (XFS_IS_PQUOTA_ON(mp) && ip->i_pdquot) | ||
169 | (void) xfs_trans_mod_dquot(tp, ip->i_pdquot, field, delta); | ||
171 | } | 170 | } |
172 | 171 | ||
173 | STATIC xfs_dqtrx_t * | 172 | STATIC struct xfs_dqtrx * |
174 | xfs_trans_get_dqtrx( | 173 | xfs_trans_get_dqtrx( |
175 | xfs_trans_t *tp, | 174 | struct xfs_trans *tp, |
176 | xfs_dquot_t *dqp) | 175 | struct xfs_dquot *dqp) |
177 | { | 176 | { |
178 | int i; | 177 | int i; |
179 | xfs_dqtrx_t *qa; | 178 | struct xfs_dqtrx *qa; |
180 | 179 | ||
181 | qa = XFS_QM_ISUDQ(dqp) ? | 180 | if (XFS_QM_ISUDQ(dqp)) |
182 | tp->t_dqinfo->dqa_usrdquots : tp->t_dqinfo->dqa_grpdquots; | 181 | qa = tp->t_dqinfo->dqs[XFS_QM_TRANS_USR]; |
182 | else if (XFS_QM_ISGDQ(dqp)) | ||
183 | qa = tp->t_dqinfo->dqs[XFS_QM_TRANS_GRP]; | ||
184 | else if (XFS_QM_ISPDQ(dqp)) | ||
185 | qa = tp->t_dqinfo->dqs[XFS_QM_TRANS_PRJ]; | ||
186 | else | ||
187 | return NULL; | ||
183 | 188 | ||
184 | for (i = 0; i < XFS_QM_TRANS_MAXDQS; i++) { | 189 | for (i = 0; i < XFS_QM_TRANS_MAXDQS; i++) { |
185 | if (qa[i].qt_dquot == NULL || | 190 | if (qa[i].qt_dquot == NULL || |
@@ -292,11 +297,10 @@ xfs_trans_mod_dquot( | |||
292 | 297 | ||
293 | 298 | ||
294 | /* | 299 | /* |
295 | * Given an array of dqtrx structures, lock all the dquots associated | 300 | * Given an array of dqtrx structures, lock all the dquots associated and join |
296 | * and join them to the transaction, provided they have been modified. | 301 | * them to the transaction, provided they have been modified. We know that the |
297 | * We know that the highest number of dquots (of one type - usr OR grp), | 302 | * highest number of dquots of one type - usr, grp OR prj - involved in a |
298 | * involved in a transaction is 2 and that both usr and grp combined - 3. | 303 | * transaction is 2 so we don't need to make this very generic. |
299 | * So, we don't attempt to make this very generic. | ||
300 | */ | 304 | */ |
301 | STATIC void | 305 | STATIC void |
302 | xfs_trans_dqlockedjoin( | 306 | xfs_trans_dqlockedjoin( |
@@ -339,12 +343,10 @@ xfs_trans_apply_dquot_deltas( | |||
339 | return; | 343 | return; |
340 | 344 | ||
341 | ASSERT(tp->t_dqinfo); | 345 | ASSERT(tp->t_dqinfo); |
342 | qa = tp->t_dqinfo->dqa_usrdquots; | 346 | for (j = 0; j < XFS_QM_TRANS_DQTYPES; j++) { |
343 | for (j = 0; j < 2; j++) { | 347 | qa = tp->t_dqinfo->dqs[j]; |
344 | if (qa[0].qt_dquot == NULL) { | 348 | if (qa[0].qt_dquot == NULL) |
345 | qa = tp->t_dqinfo->dqa_grpdquots; | ||
346 | continue; | 349 | continue; |
347 | } | ||
348 | 350 | ||
349 | /* | 351 | /* |
350 | * Lock all of the dquots and join them to the transaction. | 352 | * Lock all of the dquots and join them to the transaction. |
@@ -495,10 +497,6 @@ xfs_trans_apply_dquot_deltas( | |||
495 | ASSERT(dqp->q_res_rtbcount >= | 497 | ASSERT(dqp->q_res_rtbcount >= |
496 | be64_to_cpu(dqp->q_core.d_rtbcount)); | 498 | be64_to_cpu(dqp->q_core.d_rtbcount)); |
497 | } | 499 | } |
498 | /* | ||
499 | * Do the group quotas next | ||
500 | */ | ||
501 | qa = tp->t_dqinfo->dqa_grpdquots; | ||
502 | } | 500 | } |
503 | } | 501 | } |
504 | 502 | ||
@@ -521,9 +519,9 @@ xfs_trans_unreserve_and_mod_dquots( | |||
521 | if (!tp->t_dqinfo || !(tp->t_flags & XFS_TRANS_DQ_DIRTY)) | 519 | if (!tp->t_dqinfo || !(tp->t_flags & XFS_TRANS_DQ_DIRTY)) |
522 | return; | 520 | return; |
523 | 521 | ||
524 | qa = tp->t_dqinfo->dqa_usrdquots; | 522 | for (j = 0; j < XFS_QM_TRANS_DQTYPES; j++) { |
523 | qa = tp->t_dqinfo->dqs[j]; | ||
525 | 524 | ||
526 | for (j = 0; j < 2; j++) { | ||
527 | for (i = 0; i < XFS_QM_TRANS_MAXDQS; i++) { | 525 | for (i = 0; i < XFS_QM_TRANS_MAXDQS; i++) { |
528 | qtrx = &qa[i]; | 526 | qtrx = &qa[i]; |
529 | /* | 527 | /* |
@@ -565,7 +563,6 @@ xfs_trans_unreserve_and_mod_dquots( | |||
565 | xfs_dqunlock(dqp); | 563 | xfs_dqunlock(dqp); |
566 | 564 | ||
567 | } | 565 | } |
568 | qa = tp->t_dqinfo->dqa_grpdquots; | ||
569 | } | 566 | } |
570 | } | 567 | } |
571 | 568 | ||
@@ -640,8 +637,8 @@ xfs_trans_dqresv( | |||
640 | if ((flags & XFS_QMOPT_FORCE_RES) == 0 && | 637 | if ((flags & XFS_QMOPT_FORCE_RES) == 0 && |
641 | dqp->q_core.d_id && | 638 | dqp->q_core.d_id && |
642 | ((XFS_IS_UQUOTA_ENFORCED(dqp->q_mount) && XFS_QM_ISUDQ(dqp)) || | 639 | ((XFS_IS_UQUOTA_ENFORCED(dqp->q_mount) && XFS_QM_ISUDQ(dqp)) || |
643 | (XFS_IS_OQUOTA_ENFORCED(dqp->q_mount) && | 640 | (XFS_IS_GQUOTA_ENFORCED(dqp->q_mount) && XFS_QM_ISGDQ(dqp)) || |
644 | (XFS_QM_ISPDQ(dqp) || XFS_QM_ISGDQ(dqp))))) { | 641 | (XFS_IS_PQUOTA_ENFORCED(dqp->q_mount) && XFS_QM_ISPDQ(dqp)))) { |
645 | if (nblks > 0) { | 642 | if (nblks > 0) { |
646 | /* | 643 | /* |
647 | * dquot is locked already. See if we'd go over the | 644 | * dquot is locked already. See if we'd go over the |
@@ -736,8 +733,8 @@ error_return: | |||
736 | 733 | ||
737 | /* | 734 | /* |
738 | * Given dquot(s), make disk block and/or inode reservations against them. | 735 | * Given dquot(s), make disk block and/or inode reservations against them. |
739 | * The fact that this does the reservation against both the usr and | 736 | * The fact that this does the reservation against user, group and |
740 | * grp/prj quotas is important, because this follows a both-or-nothing | 737 | * project quotas is important, because this follows a all-or-nothing |
741 | * approach. | 738 | * approach. |
742 | * | 739 | * |
743 | * flags = XFS_QMOPT_FORCE_RES evades limit enforcement. Used by chown. | 740 | * flags = XFS_QMOPT_FORCE_RES evades limit enforcement. Used by chown. |
@@ -748,15 +745,16 @@ error_return: | |||
748 | */ | 745 | */ |
749 | int | 746 | int |
750 | xfs_trans_reserve_quota_bydquots( | 747 | xfs_trans_reserve_quota_bydquots( |
751 | xfs_trans_t *tp, | 748 | struct xfs_trans *tp, |
752 | xfs_mount_t *mp, | 749 | struct xfs_mount *mp, |
753 | xfs_dquot_t *udqp, | 750 | struct xfs_dquot *udqp, |
754 | xfs_dquot_t *gdqp, | 751 | struct xfs_dquot *gdqp, |
755 | long nblks, | 752 | struct xfs_dquot *pdqp, |
756 | long ninos, | 753 | long nblks, |
757 | uint flags) | 754 | long ninos, |
755 | uint flags) | ||
758 | { | 756 | { |
759 | int resvd = 0, error; | 757 | int error; |
760 | 758 | ||
761 | if (!XFS_IS_QUOTA_RUNNING(mp) || !XFS_IS_QUOTA_ON(mp)) | 759 | if (!XFS_IS_QUOTA_RUNNING(mp) || !XFS_IS_QUOTA_ON(mp)) |
762 | return 0; | 760 | return 0; |
@@ -771,28 +769,34 @@ xfs_trans_reserve_quota_bydquots( | |||
771 | (flags & ~XFS_QMOPT_ENOSPC)); | 769 | (flags & ~XFS_QMOPT_ENOSPC)); |
772 | if (error) | 770 | if (error) |
773 | return error; | 771 | return error; |
774 | resvd = 1; | ||
775 | } | 772 | } |
776 | 773 | ||
777 | if (gdqp) { | 774 | if (gdqp) { |
778 | error = xfs_trans_dqresv(tp, mp, gdqp, nblks, ninos, flags); | 775 | error = xfs_trans_dqresv(tp, mp, gdqp, nblks, ninos, flags); |
779 | if (error) { | 776 | if (error) |
780 | /* | 777 | goto unwind_usr; |
781 | * can't do it, so backout previous reservation | 778 | } |
782 | */ | 779 | |
783 | if (resvd) { | 780 | if (pdqp) { |
784 | flags |= XFS_QMOPT_FORCE_RES; | 781 | error = xfs_trans_dqresv(tp, mp, pdqp, nblks, ninos, flags); |
785 | xfs_trans_dqresv(tp, mp, udqp, | 782 | if (error) |
786 | -nblks, -ninos, flags); | 783 | goto unwind_grp; |
787 | } | ||
788 | return error; | ||
789 | } | ||
790 | } | 784 | } |
791 | 785 | ||
792 | /* | 786 | /* |
793 | * Didn't change anything critical, so, no need to log | 787 | * Didn't change anything critical, so, no need to log |
794 | */ | 788 | */ |
795 | return 0; | 789 | return 0; |
790 | |||
791 | unwind_grp: | ||
792 | flags |= XFS_QMOPT_FORCE_RES; | ||
793 | if (gdqp) | ||
794 | xfs_trans_dqresv(tp, mp, gdqp, -nblks, -ninos, flags); | ||
795 | unwind_usr: | ||
796 | flags |= XFS_QMOPT_FORCE_RES; | ||
797 | if (udqp) | ||
798 | xfs_trans_dqresv(tp, mp, udqp, -nblks, -ninos, flags); | ||
799 | return error; | ||
796 | } | 800 | } |
797 | 801 | ||
798 | 802 | ||
@@ -816,8 +820,7 @@ xfs_trans_reserve_quota_nblks( | |||
816 | if (XFS_IS_PQUOTA_ON(mp)) | 820 | if (XFS_IS_PQUOTA_ON(mp)) |
817 | flags |= XFS_QMOPT_ENOSPC; | 821 | flags |= XFS_QMOPT_ENOSPC; |
818 | 822 | ||
819 | ASSERT(ip->i_ino != mp->m_sb.sb_uquotino); | 823 | ASSERT(!xfs_is_quota_inode(&mp->m_sb, ip->i_ino)); |
820 | ASSERT(ip->i_ino != mp->m_sb.sb_gquotino); | ||
821 | 824 | ||
822 | ASSERT(xfs_isilocked(ip, XFS_ILOCK_EXCL)); | 825 | ASSERT(xfs_isilocked(ip, XFS_ILOCK_EXCL)); |
823 | ASSERT((flags & ~(XFS_QMOPT_FORCE_RES | XFS_QMOPT_ENOSPC)) == | 826 | ASSERT((flags & ~(XFS_QMOPT_FORCE_RES | XFS_QMOPT_ENOSPC)) == |
@@ -830,6 +833,7 @@ xfs_trans_reserve_quota_nblks( | |||
830 | */ | 833 | */ |
831 | return xfs_trans_reserve_quota_bydquots(tp, mp, | 834 | return xfs_trans_reserve_quota_bydquots(tp, mp, |
832 | ip->i_udquot, ip->i_gdquot, | 835 | ip->i_udquot, ip->i_gdquot, |
836 | ip->i_pdquot, | ||
833 | nblks, ninos, flags); | 837 | nblks, ninos, flags); |
834 | } | 838 | } |
835 | 839 | ||
diff --git a/fs/xfs/xfs_trans_inode.c b/fs/xfs/xfs_trans_inode.c index ac6d567704db..53dfe46f3680 100644 --- a/fs/xfs/xfs_trans_inode.c +++ b/fs/xfs/xfs_trans_inode.c | |||
@@ -112,6 +112,17 @@ xfs_trans_log_inode( | |||
112 | ASSERT(ip->i_itemp != NULL); | 112 | ASSERT(ip->i_itemp != NULL); |
113 | ASSERT(xfs_isilocked(ip, XFS_ILOCK_EXCL)); | 113 | ASSERT(xfs_isilocked(ip, XFS_ILOCK_EXCL)); |
114 | 114 | ||
115 | /* | ||
116 | * First time we log the inode in a transaction, bump the inode change | ||
117 | * counter if it is configured for this to occur. | ||
118 | */ | ||
119 | if (!(ip->i_itemp->ili_item.li_desc->lid_flags & XFS_LID_DIRTY) && | ||
120 | IS_I_VERSION(VFS_I(ip))) { | ||
121 | inode_inc_iversion(VFS_I(ip)); | ||
122 | ip->i_d.di_changecount = VFS_I(ip)->i_version; | ||
123 | flags |= XFS_ILOG_CORE; | ||
124 | } | ||
125 | |||
115 | tp->t_flags |= XFS_TRANS_DIRTY; | 126 | tp->t_flags |= XFS_TRANS_DIRTY; |
116 | ip->i_itemp->ili_item.li_desc->lid_flags |= XFS_LID_DIRTY; | 127 | ip->i_itemp->ili_item.li_desc->lid_flags |= XFS_LID_DIRTY; |
117 | 128 | ||
diff --git a/fs/xfs/xfs_vnodeops.c b/fs/xfs/xfs_vnodeops.c index 0176bb21f09a..dc730ac272be 100644 --- a/fs/xfs/xfs_vnodeops.c +++ b/fs/xfs/xfs_vnodeops.c | |||
@@ -322,18 +322,9 @@ xfs_inactive( | |||
322 | xfs_trans_ijoin(tp, ip, 0); | 322 | xfs_trans_ijoin(tp, ip, 0); |
323 | 323 | ||
324 | if (S_ISLNK(ip->i_d.di_mode)) { | 324 | if (S_ISLNK(ip->i_d.di_mode)) { |
325 | /* | 325 | error = xfs_inactive_symlink(ip, &tp); |
326 | * Zero length symlinks _can_ exist. | 326 | if (error) |
327 | */ | 327 | goto out_cancel; |
328 | if (ip->i_d.di_size > XFS_IFORK_DSIZE(ip)) { | ||
329 | error = xfs_inactive_symlink_rmt(ip, &tp); | ||
330 | if (error) | ||
331 | goto out_cancel; | ||
332 | } else if (ip->i_df.if_bytes > 0) { | ||
333 | xfs_idata_realloc(ip, -(ip->i_df.if_bytes), | ||
334 | XFS_DATA_FORK); | ||
335 | ASSERT(ip->i_df.if_bytes == 0); | ||
336 | } | ||
337 | } else if (truncate) { | 328 | } else if (truncate) { |
338 | ip->i_d.di_size = 0; | 329 | ip->i_d.di_size = 0; |
339 | xfs_trans_log_inode(tp, ip, XFS_ILOG_CORE); | 330 | xfs_trans_log_inode(tp, ip, XFS_ILOG_CORE); |
@@ -498,6 +489,7 @@ xfs_create( | |||
498 | prid_t prid; | 489 | prid_t prid; |
499 | struct xfs_dquot *udqp = NULL; | 490 | struct xfs_dquot *udqp = NULL; |
500 | struct xfs_dquot *gdqp = NULL; | 491 | struct xfs_dquot *gdqp = NULL; |
492 | struct xfs_dquot *pdqp = NULL; | ||
501 | uint resblks; | 493 | uint resblks; |
502 | uint log_res; | 494 | uint log_res; |
503 | uint log_count; | 495 | uint log_count; |
@@ -516,7 +508,8 @@ xfs_create( | |||
516 | * Make sure that we have allocated dquot(s) on disk. | 508 | * Make sure that we have allocated dquot(s) on disk. |
517 | */ | 509 | */ |
518 | error = xfs_qm_vop_dqalloc(dp, current_fsuid(), current_fsgid(), prid, | 510 | error = xfs_qm_vop_dqalloc(dp, current_fsuid(), current_fsgid(), prid, |
519 | XFS_QMOPT_QUOTALL | XFS_QMOPT_INHERIT, &udqp, &gdqp); | 511 | XFS_QMOPT_QUOTALL | XFS_QMOPT_INHERIT, |
512 | &udqp, &gdqp, &pdqp); | ||
520 | if (error) | 513 | if (error) |
521 | return error; | 514 | return error; |
522 | 515 | ||
@@ -568,7 +561,8 @@ xfs_create( | |||
568 | /* | 561 | /* |
569 | * Reserve disk quota and the inode. | 562 | * Reserve disk quota and the inode. |
570 | */ | 563 | */ |
571 | error = xfs_trans_reserve_quota(tp, mp, udqp, gdqp, resblks, 1, 0); | 564 | error = xfs_trans_reserve_quota(tp, mp, udqp, gdqp, |
565 | pdqp, resblks, 1, 0); | ||
572 | if (error) | 566 | if (error) |
573 | goto out_trans_cancel; | 567 | goto out_trans_cancel; |
574 | 568 | ||
@@ -632,7 +626,7 @@ xfs_create( | |||
632 | * These ids of the inode couldn't have changed since the new | 626 | * These ids of the inode couldn't have changed since the new |
633 | * inode has been locked ever since it was created. | 627 | * inode has been locked ever since it was created. |
634 | */ | 628 | */ |
635 | xfs_qm_vop_create_dqattach(tp, ip, udqp, gdqp); | 629 | xfs_qm_vop_create_dqattach(tp, ip, udqp, gdqp, pdqp); |
636 | 630 | ||
637 | error = xfs_bmap_finish(&tp, &free_list, &committed); | 631 | error = xfs_bmap_finish(&tp, &free_list, &committed); |
638 | if (error) | 632 | if (error) |
@@ -644,6 +638,7 @@ xfs_create( | |||
644 | 638 | ||
645 | xfs_qm_dqrele(udqp); | 639 | xfs_qm_dqrele(udqp); |
646 | xfs_qm_dqrele(gdqp); | 640 | xfs_qm_dqrele(gdqp); |
641 | xfs_qm_dqrele(pdqp); | ||
647 | 642 | ||
648 | *ipp = ip; | 643 | *ipp = ip; |
649 | return 0; | 644 | return 0; |
@@ -665,6 +660,7 @@ xfs_create( | |||
665 | 660 | ||
666 | xfs_qm_dqrele(udqp); | 661 | xfs_qm_dqrele(udqp); |
667 | xfs_qm_dqrele(gdqp); | 662 | xfs_qm_dqrele(gdqp); |
663 | xfs_qm_dqrele(pdqp); | ||
668 | 664 | ||
669 | if (unlock_dp_on_error) | 665 | if (unlock_dp_on_error) |
670 | xfs_iunlock(dp, XFS_ILOCK_EXCL); | 666 | xfs_iunlock(dp, XFS_ILOCK_EXCL); |
@@ -1577,7 +1573,7 @@ xfs_free_file_space( | |||
1577 | } | 1573 | } |
1578 | xfs_ilock(ip, XFS_ILOCK_EXCL); | 1574 | xfs_ilock(ip, XFS_ILOCK_EXCL); |
1579 | error = xfs_trans_reserve_quota(tp, mp, | 1575 | error = xfs_trans_reserve_quota(tp, mp, |
1580 | ip->i_udquot, ip->i_gdquot, | 1576 | ip->i_udquot, ip->i_gdquot, ip->i_pdquot, |
1581 | resblks, 0, XFS_QMOPT_RES_REGBLKS); | 1577 | resblks, 0, XFS_QMOPT_RES_REGBLKS); |
1582 | if (error) | 1578 | if (error) |
1583 | goto error1; | 1579 | goto error1; |
diff --git a/fs/xfs/xfs_vnodeops.h b/fs/xfs/xfs_vnodeops.h index 5163022d9808..38c67c34d73f 100644 --- a/fs/xfs/xfs_vnodeops.h +++ b/fs/xfs/xfs_vnodeops.h | |||
@@ -31,8 +31,7 @@ int xfs_remove(struct xfs_inode *dp, struct xfs_name *name, | |||
31 | struct xfs_inode *ip); | 31 | struct xfs_inode *ip); |
32 | int xfs_link(struct xfs_inode *tdp, struct xfs_inode *sip, | 32 | int xfs_link(struct xfs_inode *tdp, struct xfs_inode *sip, |
33 | struct xfs_name *target_name); | 33 | struct xfs_name *target_name); |
34 | int xfs_readdir(struct xfs_inode *dp, void *dirent, size_t bufsize, | 34 | int xfs_readdir(struct xfs_inode *dp, struct dir_context *ctx, size_t bufsize); |
35 | xfs_off_t *offset, filldir_t filldir); | ||
36 | int xfs_symlink(struct xfs_inode *dp, struct xfs_name *link_name, | 35 | int xfs_symlink(struct xfs_inode *dp, struct xfs_name *link_name, |
37 | const char *target_path, umode_t mode, struct xfs_inode **ipp); | 36 | const char *target_path, umode_t mode, struct xfs_inode **ipp); |
38 | int xfs_set_dmattrs(struct xfs_inode *ip, u_int evmask, u_int16_t state); | 37 | int xfs_set_dmattrs(struct xfs_inode *ip, u_int evmask, u_int16_t state); |