aboutsummaryrefslogtreecommitdiffstats
path: root/fs
diff options
context:
space:
mode:
Diffstat (limited to 'fs')
-rw-r--r--fs/9p/v9fs.c21
-rw-r--r--fs/9p/v9fs.h1
-rw-r--r--fs/9p/vfs_inode.c126
-rw-r--r--fs/9p/vfs_super.c39
-rw-r--r--fs/afs/file.c18
-rw-r--r--fs/autofs4/expire.c2
-rw-r--r--fs/btrfs/inode.c21
-rw-r--r--fs/buffer.c7
-rw-r--r--fs/compat.c17
-rw-r--r--fs/exec.c67
-rw-r--r--fs/ext2/namei.c4
-rw-r--r--fs/ext3/Kconfig32
-rw-r--r--fs/ext3/super.c40
-rw-r--r--fs/gfs2/sys.c20
-rw-r--r--fs/hugetlbfs/inode.c20
-rw-r--r--fs/jffs2/wbuf.c10
-rw-r--r--fs/libfs.c2
-rw-r--r--fs/nfs/direct.c20
-rw-r--r--fs/nfs/nfs4state.c4
-rw-r--r--fs/nfs/read.c6
-rw-r--r--fs/nfs/write.c6
-rw-r--r--fs/nilfs2/btnode.c2
-rw-r--r--fs/nilfs2/super.c2
-rw-r--r--fs/nilfs2/the_nilfs.h2
-rw-r--r--fs/notify/inotify/inotify_fsnotify.c46
-rw-r--r--fs/notify/inotify/inotify_user.c254
-rw-r--r--fs/notify/notification.c11
-rw-r--r--fs/ocfs2/alloc.c49
-rw-r--r--fs/ocfs2/aops.c69
-rw-r--r--fs/ocfs2/dcache.c46
-rw-r--r--fs/ocfs2/dcache.h3
-rw-r--r--fs/ocfs2/dlm/dlmast.c1
-rw-r--r--fs/ocfs2/dlm/dlmrecovery.c2
-rw-r--r--fs/ocfs2/dlm/dlmunlock.c4
-rw-r--r--fs/ocfs2/file.c5
-rw-r--r--fs/ocfs2/journal.c8
-rw-r--r--fs/ocfs2/journal.h19
-rw-r--r--fs/ocfs2/ocfs2.h22
-rw-r--r--fs/ocfs2/ocfs2_lockid.h1
-rw-r--r--fs/ocfs2/quota.h1
-rw-r--r--fs/ocfs2/quota_global.c144
-rw-r--r--fs/ocfs2/quota_local.c110
-rw-r--r--fs/ocfs2/stack_o2cb.c3
-rw-r--r--fs/ocfs2/super.c34
-rw-r--r--fs/ocfs2/xattr.c3
-rw-r--r--fs/proc/base.c19
-rw-r--r--fs/select.c1
-rw-r--r--fs/xfs/linux-2.6/xfs_buf.c2
-rw-r--r--fs/xfs/linux-2.6/xfs_ioctl32.c2
-rw-r--r--fs/xfs/linux-2.6/xfs_sync.c13
-rw-r--r--fs/xfs/linux-2.6/xfs_sync.h1
-rw-r--r--fs/xfs/xfs_attr.c8
-rw-r--r--fs/xfs/xfs_bmap.c2
-rw-r--r--fs/xfs/xfs_btree.c4
-rw-r--r--fs/xfs/xfs_da_btree.c6
-rw-r--r--fs/xfs/xfs_dir2.c2
-rw-r--r--fs/xfs/xfs_fsops.c20
-rw-r--r--fs/xfs/xfs_iget.c113
-rw-r--r--fs/xfs/xfs_inode.c10
-rw-r--r--fs/xfs/xfs_log.c2
-rw-r--r--fs/xfs/xfs_vnodeops.c4
61 files changed, 971 insertions, 562 deletions
diff --git a/fs/9p/v9fs.c b/fs/9p/v9fs.c
index 332b5ff02fec..f7003cfac63d 100644
--- a/fs/9p/v9fs.c
+++ b/fs/9p/v9fs.c
@@ -76,7 +76,7 @@ static const match_table_t tokens = {
76 * Return 0 upon success, -ERRNO upon failure. 76 * Return 0 upon success, -ERRNO upon failure.
77 */ 77 */
78 78
79static int v9fs_parse_options(struct v9fs_session_info *v9ses) 79static int v9fs_parse_options(struct v9fs_session_info *v9ses, char *opts)
80{ 80{
81 char *options; 81 char *options;
82 substring_t args[MAX_OPT_ARGS]; 82 substring_t args[MAX_OPT_ARGS];
@@ -90,10 +90,10 @@ static int v9fs_parse_options(struct v9fs_session_info *v9ses)
90 v9ses->debug = 0; 90 v9ses->debug = 0;
91 v9ses->cache = 0; 91 v9ses->cache = 0;
92 92
93 if (!v9ses->options) 93 if (!opts)
94 return 0; 94 return 0;
95 95
96 options = kstrdup(v9ses->options, GFP_KERNEL); 96 options = kstrdup(opts, GFP_KERNEL);
97 if (!options) { 97 if (!options) {
98 P9_DPRINTK(P9_DEBUG_ERROR, 98 P9_DPRINTK(P9_DEBUG_ERROR,
99 "failed to allocate copy of option string\n"); 99 "failed to allocate copy of option string\n");
@@ -206,24 +206,14 @@ struct p9_fid *v9fs_session_init(struct v9fs_session_info *v9ses,
206 v9ses->uid = ~0; 206 v9ses->uid = ~0;
207 v9ses->dfltuid = V9FS_DEFUID; 207 v9ses->dfltuid = V9FS_DEFUID;
208 v9ses->dfltgid = V9FS_DEFGID; 208 v9ses->dfltgid = V9FS_DEFGID;
209 if (data) {
210 v9ses->options = kstrdup(data, GFP_KERNEL);
211 if (!v9ses->options) {
212 P9_DPRINTK(P9_DEBUG_ERROR,
213 "failed to allocate copy of option string\n");
214 retval = -ENOMEM;
215 goto error;
216 }
217 }
218 209
219 rc = v9fs_parse_options(v9ses); 210 rc = v9fs_parse_options(v9ses, data);
220 if (rc < 0) { 211 if (rc < 0) {
221 retval = rc; 212 retval = rc;
222 goto error; 213 goto error;
223 } 214 }
224 215
225 v9ses->clnt = p9_client_create(dev_name, v9ses->options); 216 v9ses->clnt = p9_client_create(dev_name, data);
226
227 if (IS_ERR(v9ses->clnt)) { 217 if (IS_ERR(v9ses->clnt)) {
228 retval = PTR_ERR(v9ses->clnt); 218 retval = PTR_ERR(v9ses->clnt);
229 v9ses->clnt = NULL; 219 v9ses->clnt = NULL;
@@ -280,7 +270,6 @@ void v9fs_session_close(struct v9fs_session_info *v9ses)
280 270
281 __putname(v9ses->uname); 271 __putname(v9ses->uname);
282 __putname(v9ses->aname); 272 __putname(v9ses->aname);
283 kfree(v9ses->options);
284} 273}
285 274
286/** 275/**
diff --git a/fs/9p/v9fs.h b/fs/9p/v9fs.h
index a7d567192998..38762bf102a9 100644
--- a/fs/9p/v9fs.h
+++ b/fs/9p/v9fs.h
@@ -85,7 +85,6 @@ struct v9fs_session_info {
85 unsigned int afid; 85 unsigned int afid;
86 unsigned int cache; 86 unsigned int cache;
87 87
88 char *options; /* copy of mount options */
89 char *uname; /* user name to mount as */ 88 char *uname; /* user name to mount as */
90 char *aname; /* name of remote hierarchy being mounted */ 89 char *aname; /* name of remote hierarchy being mounted */
91 unsigned int maxdata; /* max data for client interface */ 90 unsigned int maxdata; /* max data for client interface */
diff --git a/fs/9p/vfs_inode.c b/fs/9p/vfs_inode.c
index 81f8bbf12f9f..06a223d50a81 100644
--- a/fs/9p/vfs_inode.c
+++ b/fs/9p/vfs_inode.c
@@ -171,7 +171,6 @@ int v9fs_uflags2omode(int uflags, int extended)
171 171
172/** 172/**
173 * v9fs_blank_wstat - helper function to setup a 9P stat structure 173 * v9fs_blank_wstat - helper function to setup a 9P stat structure
174 * @v9ses: 9P session info (for determining extended mode)
175 * @wstat: structure to initialize 174 * @wstat: structure to initialize
176 * 175 *
177 */ 176 */
@@ -207,65 +206,72 @@ v9fs_blank_wstat(struct p9_wstat *wstat)
207 206
208struct inode *v9fs_get_inode(struct super_block *sb, int mode) 207struct inode *v9fs_get_inode(struct super_block *sb, int mode)
209{ 208{
209 int err;
210 struct inode *inode; 210 struct inode *inode;
211 struct v9fs_session_info *v9ses = sb->s_fs_info; 211 struct v9fs_session_info *v9ses = sb->s_fs_info;
212 212
213 P9_DPRINTK(P9_DEBUG_VFS, "super block: %p mode: %o\n", sb, mode); 213 P9_DPRINTK(P9_DEBUG_VFS, "super block: %p mode: %o\n", sb, mode);
214 214
215 inode = new_inode(sb); 215 inode = new_inode(sb);
216 if (inode) { 216 if (!inode) {
217 inode->i_mode = mode;
218 inode->i_uid = current_fsuid();
219 inode->i_gid = current_fsgid();
220 inode->i_blocks = 0;
221 inode->i_rdev = 0;
222 inode->i_atime = inode->i_mtime = inode->i_ctime = CURRENT_TIME;
223 inode->i_mapping->a_ops = &v9fs_addr_operations;
224
225 switch (mode & S_IFMT) {
226 case S_IFIFO:
227 case S_IFBLK:
228 case S_IFCHR:
229 case S_IFSOCK:
230 if (!v9fs_extended(v9ses)) {
231 P9_DPRINTK(P9_DEBUG_ERROR,
232 "special files without extended mode\n");
233 return ERR_PTR(-EINVAL);
234 }
235 init_special_inode(inode, inode->i_mode,
236 inode->i_rdev);
237 break;
238 case S_IFREG:
239 inode->i_op = &v9fs_file_inode_operations;
240 inode->i_fop = &v9fs_file_operations;
241 break;
242 case S_IFLNK:
243 if (!v9fs_extended(v9ses)) {
244 P9_DPRINTK(P9_DEBUG_ERROR,
245 "extended modes used w/o 9P2000.u\n");
246 return ERR_PTR(-EINVAL);
247 }
248 inode->i_op = &v9fs_symlink_inode_operations;
249 break;
250 case S_IFDIR:
251 inc_nlink(inode);
252 if (v9fs_extended(v9ses))
253 inode->i_op = &v9fs_dir_inode_operations_ext;
254 else
255 inode->i_op = &v9fs_dir_inode_operations;
256 inode->i_fop = &v9fs_dir_operations;
257 break;
258 default:
259 P9_DPRINTK(P9_DEBUG_ERROR,
260 "BAD mode 0x%x S_IFMT 0x%x\n",
261 mode, mode & S_IFMT);
262 return ERR_PTR(-EINVAL);
263 }
264 } else {
265 P9_EPRINTK(KERN_WARNING, "Problem allocating inode\n"); 217 P9_EPRINTK(KERN_WARNING, "Problem allocating inode\n");
266 return ERR_PTR(-ENOMEM); 218 return ERR_PTR(-ENOMEM);
267 } 219 }
220
221 inode->i_mode = mode;
222 inode->i_uid = current_fsuid();
223 inode->i_gid = current_fsgid();
224 inode->i_blocks = 0;
225 inode->i_rdev = 0;
226 inode->i_atime = inode->i_mtime = inode->i_ctime = CURRENT_TIME;
227 inode->i_mapping->a_ops = &v9fs_addr_operations;
228
229 switch (mode & S_IFMT) {
230 case S_IFIFO:
231 case S_IFBLK:
232 case S_IFCHR:
233 case S_IFSOCK:
234 if (!v9fs_extended(v9ses)) {
235 P9_DPRINTK(P9_DEBUG_ERROR,
236 "special files without extended mode\n");
237 err = -EINVAL;
238 goto error;
239 }
240 init_special_inode(inode, inode->i_mode, inode->i_rdev);
241 break;
242 case S_IFREG:
243 inode->i_op = &v9fs_file_inode_operations;
244 inode->i_fop = &v9fs_file_operations;
245 break;
246 case S_IFLNK:
247 if (!v9fs_extended(v9ses)) {
248 P9_DPRINTK(P9_DEBUG_ERROR,
249 "extended modes used w/o 9P2000.u\n");
250 err = -EINVAL;
251 goto error;
252 }
253 inode->i_op = &v9fs_symlink_inode_operations;
254 break;
255 case S_IFDIR:
256 inc_nlink(inode);
257 if (v9fs_extended(v9ses))
258 inode->i_op = &v9fs_dir_inode_operations_ext;
259 else
260 inode->i_op = &v9fs_dir_inode_operations;
261 inode->i_fop = &v9fs_dir_operations;
262 break;
263 default:
264 P9_DPRINTK(P9_DEBUG_ERROR, "BAD mode 0x%x S_IFMT 0x%x\n",
265 mode, mode & S_IFMT);
266 err = -EINVAL;
267 goto error;
268 }
269
268 return inode; 270 return inode;
271
272error:
273 iput(inode);
274 return ERR_PTR(err);
269} 275}
270 276
271/* 277/*
@@ -338,30 +344,25 @@ v9fs_inode_from_fid(struct v9fs_session_info *v9ses, struct p9_fid *fid,
338 344
339 ret = NULL; 345 ret = NULL;
340 st = p9_client_stat(fid); 346 st = p9_client_stat(fid);
341 if (IS_ERR(st)) { 347 if (IS_ERR(st))
342 err = PTR_ERR(st); 348 return ERR_CAST(st);
343 st = NULL;
344 goto error;
345 }
346 349
347 umode = p9mode2unixmode(v9ses, st->mode); 350 umode = p9mode2unixmode(v9ses, st->mode);
348 ret = v9fs_get_inode(sb, umode); 351 ret = v9fs_get_inode(sb, umode);
349 if (IS_ERR(ret)) { 352 if (IS_ERR(ret)) {
350 err = PTR_ERR(ret); 353 err = PTR_ERR(ret);
351 ret = NULL;
352 goto error; 354 goto error;
353 } 355 }
354 356
355 v9fs_stat2inode(st, ret, sb); 357 v9fs_stat2inode(st, ret, sb);
356 ret->i_ino = v9fs_qid2ino(&st->qid); 358 ret->i_ino = v9fs_qid2ino(&st->qid);
359 p9stat_free(st);
357 kfree(st); 360 kfree(st);
358 return ret; 361 return ret;
359 362
360error: 363error:
364 p9stat_free(st);
361 kfree(st); 365 kfree(st);
362 if (ret)
363 iput(ret);
364
365 return ERR_PTR(err); 366 return ERR_PTR(err);
366} 367}
367 368
@@ -403,9 +404,9 @@ v9fs_open_created(struct inode *inode, struct file *file)
403 * @v9ses: session information 404 * @v9ses: session information
404 * @dir: directory that dentry is being created in 405 * @dir: directory that dentry is being created in
405 * @dentry: dentry that is being created 406 * @dentry: dentry that is being created
407 * @extension: 9p2000.u extension string to support devices, etc.
406 * @perm: create permissions 408 * @perm: create permissions
407 * @mode: open mode 409 * @mode: open mode
408 * @extension: 9p2000.u extension string to support devices, etc.
409 * 410 *
410 */ 411 */
411static struct p9_fid * 412static struct p9_fid *
@@ -470,7 +471,10 @@ v9fs_create(struct v9fs_session_info *v9ses, struct inode *dir,
470 dentry->d_op = &v9fs_dentry_operations; 471 dentry->d_op = &v9fs_dentry_operations;
471 472
472 d_instantiate(dentry, inode); 473 d_instantiate(dentry, inode);
473 v9fs_fid_add(dentry, fid); 474 err = v9fs_fid_add(dentry, fid);
475 if (err < 0)
476 goto error;
477
474 return ofid; 478 return ofid;
475 479
476error: 480error:
diff --git a/fs/9p/vfs_super.c b/fs/9p/vfs_super.c
index 38d695d66a0b..8961f1a8f668 100644
--- a/fs/9p/vfs_super.c
+++ b/fs/9p/vfs_super.c
@@ -81,7 +81,7 @@ static int v9fs_set_super(struct super_block *s, void *data)
81 81
82static void 82static void
83v9fs_fill_super(struct super_block *sb, struct v9fs_session_info *v9ses, 83v9fs_fill_super(struct super_block *sb, struct v9fs_session_info *v9ses,
84 int flags) 84 int flags, void *data)
85{ 85{
86 sb->s_maxbytes = MAX_LFS_FILESIZE; 86 sb->s_maxbytes = MAX_LFS_FILESIZE;
87 sb->s_blocksize_bits = fls(v9ses->maxdata - 1); 87 sb->s_blocksize_bits = fls(v9ses->maxdata - 1);
@@ -91,6 +91,8 @@ v9fs_fill_super(struct super_block *sb, struct v9fs_session_info *v9ses,
91 91
92 sb->s_flags = flags | MS_ACTIVE | MS_SYNCHRONOUS | MS_DIRSYNC | 92 sb->s_flags = flags | MS_ACTIVE | MS_SYNCHRONOUS | MS_DIRSYNC |
93 MS_NOATIME; 93 MS_NOATIME;
94
95 save_mount_options(sb, data);
94} 96}
95 97
96/** 98/**
@@ -113,14 +115,11 @@ static int v9fs_get_sb(struct file_system_type *fs_type, int flags,
113 struct v9fs_session_info *v9ses = NULL; 115 struct v9fs_session_info *v9ses = NULL;
114 struct p9_wstat *st = NULL; 116 struct p9_wstat *st = NULL;
115 int mode = S_IRWXUGO | S_ISVTX; 117 int mode = S_IRWXUGO | S_ISVTX;
116 uid_t uid = current_fsuid();
117 gid_t gid = current_fsgid();
118 struct p9_fid *fid; 118 struct p9_fid *fid;
119 int retval = 0; 119 int retval = 0;
120 120
121 P9_DPRINTK(P9_DEBUG_VFS, " \n"); 121 P9_DPRINTK(P9_DEBUG_VFS, " \n");
122 122
123 st = NULL;
124 v9ses = kzalloc(sizeof(struct v9fs_session_info), GFP_KERNEL); 123 v9ses = kzalloc(sizeof(struct v9fs_session_info), GFP_KERNEL);
125 if (!v9ses) 124 if (!v9ses)
126 return -ENOMEM; 125 return -ENOMEM;
@@ -142,7 +141,7 @@ static int v9fs_get_sb(struct file_system_type *fs_type, int flags,
142 retval = PTR_ERR(sb); 141 retval = PTR_ERR(sb);
143 goto free_stat; 142 goto free_stat;
144 } 143 }
145 v9fs_fill_super(sb, v9ses, flags); 144 v9fs_fill_super(sb, v9ses, flags, data);
146 145
147 inode = v9fs_get_inode(sb, S_IFDIR | mode); 146 inode = v9fs_get_inode(sb, S_IFDIR | mode);
148 if (IS_ERR(inode)) { 147 if (IS_ERR(inode)) {
@@ -150,9 +149,6 @@ static int v9fs_get_sb(struct file_system_type *fs_type, int flags,
150 goto release_sb; 149 goto release_sb;
151 } 150 }
152 151
153 inode->i_uid = uid;
154 inode->i_gid = gid;
155
156 root = d_alloc_root(inode); 152 root = d_alloc_root(inode);
157 if (!root) { 153 if (!root) {
158 iput(inode); 154 iput(inode);
@@ -173,10 +169,8 @@ P9_DPRINTK(P9_DEBUG_VFS, " simple set mount, return 0\n");
173 simple_set_mnt(mnt, sb); 169 simple_set_mnt(mnt, sb);
174 return 0; 170 return 0;
175 171
176release_sb:
177 deactivate_locked_super(sb);
178
179free_stat: 172free_stat:
173 p9stat_free(st);
180 kfree(st); 174 kfree(st);
181 175
182clunk_fid: 176clunk_fid:
@@ -185,7 +179,12 @@ clunk_fid:
185close_session: 179close_session:
186 v9fs_session_close(v9ses); 180 v9fs_session_close(v9ses);
187 kfree(v9ses); 181 kfree(v9ses);
182 return retval;
188 183
184release_sb:
185 p9stat_free(st);
186 kfree(st);
187 deactivate_locked_super(sb);
189 return retval; 188 return retval;
190} 189}
191 190
@@ -207,24 +206,10 @@ static void v9fs_kill_super(struct super_block *s)
207 206
208 v9fs_session_close(v9ses); 207 v9fs_session_close(v9ses);
209 kfree(v9ses); 208 kfree(v9ses);
209 s->s_fs_info = NULL;
210 P9_DPRINTK(P9_DEBUG_VFS, "exiting kill_super\n"); 210 P9_DPRINTK(P9_DEBUG_VFS, "exiting kill_super\n");
211} 211}
212 212
213/**
214 * v9fs_show_options - Show mount options in /proc/mounts
215 * @m: seq_file to write to
216 * @mnt: mount descriptor
217 *
218 */
219
220static int v9fs_show_options(struct seq_file *m, struct vfsmount *mnt)
221{
222 struct v9fs_session_info *v9ses = mnt->mnt_sb->s_fs_info;
223
224 seq_printf(m, "%s", v9ses->options);
225 return 0;
226}
227
228static void 213static void
229v9fs_umount_begin(struct super_block *sb) 214v9fs_umount_begin(struct super_block *sb)
230{ 215{
@@ -237,7 +222,7 @@ v9fs_umount_begin(struct super_block *sb)
237static const struct super_operations v9fs_super_ops = { 222static const struct super_operations v9fs_super_ops = {
238 .statfs = simple_statfs, 223 .statfs = simple_statfs,
239 .clear_inode = v9fs_clear_inode, 224 .clear_inode = v9fs_clear_inode,
240 .show_options = v9fs_show_options, 225 .show_options = generic_show_options,
241 .umount_begin = v9fs_umount_begin, 226 .umount_begin = v9fs_umount_begin,
242}; 227};
243 228
diff --git a/fs/afs/file.c b/fs/afs/file.c
index 0149dab365e7..681c2a7b013f 100644
--- a/fs/afs/file.c
+++ b/fs/afs/file.c
@@ -134,9 +134,16 @@ static int afs_readpage(struct file *file, struct page *page)
134 134
135 inode = page->mapping->host; 135 inode = page->mapping->host;
136 136
137 ASSERT(file != NULL); 137 if (file) {
138 key = file->private_data; 138 key = file->private_data;
139 ASSERT(key != NULL); 139 ASSERT(key != NULL);
140 } else {
141 key = afs_request_key(AFS_FS_S(inode->i_sb)->volume->cell);
142 if (IS_ERR(key)) {
143 ret = PTR_ERR(key);
144 goto error_nokey;
145 }
146 }
140 147
141 _enter("{%x},{%lu},{%lu}", key_serial(key), inode->i_ino, page->index); 148 _enter("{%x},{%lu},{%lu}", key_serial(key), inode->i_ino, page->index);
142 149
@@ -207,12 +214,17 @@ static int afs_readpage(struct file *file, struct page *page)
207 unlock_page(page); 214 unlock_page(page);
208 } 215 }
209 216
217 if (!file)
218 key_put(key);
210 _leave(" = 0"); 219 _leave(" = 0");
211 return 0; 220 return 0;
212 221
213error: 222error:
214 SetPageError(page); 223 SetPageError(page);
215 unlock_page(page); 224 unlock_page(page);
225 if (!file)
226 key_put(key);
227error_nokey:
216 _leave(" = %d", ret); 228 _leave(" = %d", ret);
217 return ret; 229 return ret;
218} 230}
diff --git a/fs/autofs4/expire.c b/fs/autofs4/expire.c
index aa39ae83f019..3da18d453488 100644
--- a/fs/autofs4/expire.c
+++ b/fs/autofs4/expire.c
@@ -77,7 +77,7 @@ static int autofs4_mount_busy(struct vfsmount *mnt, struct dentry *dentry)
77 } 77 }
78 78
79 /* Update the expiry counter if fs is busy */ 79 /* Update the expiry counter if fs is busy */
80 if (!may_umount_tree(mnt)) { 80 if (!may_umount_tree(path.mnt)) {
81 struct autofs_info *ino = autofs4_dentry_ino(top); 81 struct autofs_info *ino = autofs4_dentry_ino(top);
82 ino->last_used = jiffies; 82 ino->last_used = jiffies;
83 goto done; 83 goto done;
diff --git a/fs/btrfs/inode.c b/fs/btrfs/inode.c
index 272b9b2bea86..59cba180fe83 100644
--- a/fs/btrfs/inode.c
+++ b/fs/btrfs/inode.c
@@ -3099,8 +3099,12 @@ static void inode_tree_add(struct inode *inode)
3099{ 3099{
3100 struct btrfs_root *root = BTRFS_I(inode)->root; 3100 struct btrfs_root *root = BTRFS_I(inode)->root;
3101 struct btrfs_inode *entry; 3101 struct btrfs_inode *entry;
3102 struct rb_node **p = &root->inode_tree.rb_node; 3102 struct rb_node **p;
3103 struct rb_node *parent = NULL; 3103 struct rb_node *parent;
3104
3105again:
3106 p = &root->inode_tree.rb_node;
3107 parent = NULL;
3104 3108
3105 spin_lock(&root->inode_lock); 3109 spin_lock(&root->inode_lock);
3106 while (*p) { 3110 while (*p) {
@@ -3108,13 +3112,16 @@ static void inode_tree_add(struct inode *inode)
3108 entry = rb_entry(parent, struct btrfs_inode, rb_node); 3112 entry = rb_entry(parent, struct btrfs_inode, rb_node);
3109 3113
3110 if (inode->i_ino < entry->vfs_inode.i_ino) 3114 if (inode->i_ino < entry->vfs_inode.i_ino)
3111 p = &(*p)->rb_left; 3115 p = &parent->rb_left;
3112 else if (inode->i_ino > entry->vfs_inode.i_ino) 3116 else if (inode->i_ino > entry->vfs_inode.i_ino)
3113 p = &(*p)->rb_right; 3117 p = &parent->rb_right;
3114 else { 3118 else {
3115 WARN_ON(!(entry->vfs_inode.i_state & 3119 WARN_ON(!(entry->vfs_inode.i_state &
3116 (I_WILL_FREE | I_FREEING | I_CLEAR))); 3120 (I_WILL_FREE | I_FREEING | I_CLEAR)));
3117 break; 3121 rb_erase(parent, &root->inode_tree);
3122 RB_CLEAR_NODE(parent);
3123 spin_unlock(&root->inode_lock);
3124 goto again;
3118 } 3125 }
3119 } 3126 }
3120 rb_link_node(&BTRFS_I(inode)->rb_node, parent, p); 3127 rb_link_node(&BTRFS_I(inode)->rb_node, parent, p);
@@ -3126,12 +3133,12 @@ static void inode_tree_del(struct inode *inode)
3126{ 3133{
3127 struct btrfs_root *root = BTRFS_I(inode)->root; 3134 struct btrfs_root *root = BTRFS_I(inode)->root;
3128 3135
3136 spin_lock(&root->inode_lock);
3129 if (!RB_EMPTY_NODE(&BTRFS_I(inode)->rb_node)) { 3137 if (!RB_EMPTY_NODE(&BTRFS_I(inode)->rb_node)) {
3130 spin_lock(&root->inode_lock);
3131 rb_erase(&BTRFS_I(inode)->rb_node, &root->inode_tree); 3138 rb_erase(&BTRFS_I(inode)->rb_node, &root->inode_tree);
3132 spin_unlock(&root->inode_lock);
3133 RB_CLEAR_NODE(&BTRFS_I(inode)->rb_node); 3139 RB_CLEAR_NODE(&BTRFS_I(inode)->rb_node);
3134 } 3140 }
3141 spin_unlock(&root->inode_lock);
3135} 3142}
3136 3143
3137static noinline void init_btrfs_i(struct inode *inode) 3144static noinline void init_btrfs_i(struct inode *inode)
diff --git a/fs/buffer.c b/fs/buffer.c
index a3ef091a45bd..28f320fac4d4 100644
--- a/fs/buffer.c
+++ b/fs/buffer.c
@@ -1165,8 +1165,11 @@ void mark_buffer_dirty(struct buffer_head *bh)
1165 1165
1166 if (!test_set_buffer_dirty(bh)) { 1166 if (!test_set_buffer_dirty(bh)) {
1167 struct page *page = bh->b_page; 1167 struct page *page = bh->b_page;
1168 if (!TestSetPageDirty(page)) 1168 if (!TestSetPageDirty(page)) {
1169 __set_page_dirty(page, page_mapping(page), 0); 1169 struct address_space *mapping = page_mapping(page);
1170 if (mapping)
1171 __set_page_dirty(page, mapping, 0);
1172 }
1170 } 1173 }
1171} 1174}
1172 1175
diff --git a/fs/compat.c b/fs/compat.c
index 94502dab972a..6d6f98fe64a0 100644
--- a/fs/compat.c
+++ b/fs/compat.c
@@ -1485,20 +1485,15 @@ int compat_do_execve(char * filename,
1485 if (!bprm) 1485 if (!bprm)
1486 goto out_files; 1486 goto out_files;
1487 1487
1488 retval = -ERESTARTNOINTR; 1488 retval = prepare_bprm_creds(bprm);
1489 if (mutex_lock_interruptible(&current->cred_guard_mutex)) 1489 if (retval)
1490 goto out_free; 1490 goto out_free;
1491 current->in_execve = 1;
1492
1493 retval = -ENOMEM;
1494 bprm->cred = prepare_exec_creds();
1495 if (!bprm->cred)
1496 goto out_unlock;
1497 1491
1498 retval = check_unsafe_exec(bprm); 1492 retval = check_unsafe_exec(bprm);
1499 if (retval < 0) 1493 if (retval < 0)
1500 goto out_unlock; 1494 goto out_free;
1501 clear_in_exec = retval; 1495 clear_in_exec = retval;
1496 current->in_execve = 1;
1502 1497
1503 file = open_exec(filename); 1498 file = open_exec(filename);
1504 retval = PTR_ERR(file); 1499 retval = PTR_ERR(file);
@@ -1547,7 +1542,6 @@ int compat_do_execve(char * filename,
1547 /* execve succeeded */ 1542 /* execve succeeded */
1548 current->fs->in_exec = 0; 1543 current->fs->in_exec = 0;
1549 current->in_execve = 0; 1544 current->in_execve = 0;
1550 mutex_unlock(&current->cred_guard_mutex);
1551 acct_update_integrals(current); 1545 acct_update_integrals(current);
1552 free_bprm(bprm); 1546 free_bprm(bprm);
1553 if (displaced) 1547 if (displaced)
@@ -1567,10 +1561,7 @@ out_file:
1567out_unmark: 1561out_unmark:
1568 if (clear_in_exec) 1562 if (clear_in_exec)
1569 current->fs->in_exec = 0; 1563 current->fs->in_exec = 0;
1570
1571out_unlock:
1572 current->in_execve = 0; 1564 current->in_execve = 0;
1573 mutex_unlock(&current->cred_guard_mutex);
1574 1565
1575out_free: 1566out_free:
1576 free_bprm(bprm); 1567 free_bprm(bprm);
diff --git a/fs/exec.c b/fs/exec.c
index 4a8849e45b21..172ceb6edde4 100644
--- a/fs/exec.c
+++ b/fs/exec.c
@@ -678,8 +678,8 @@ exit:
678} 678}
679EXPORT_SYMBOL(open_exec); 679EXPORT_SYMBOL(open_exec);
680 680
681int kernel_read(struct file *file, unsigned long offset, 681int kernel_read(struct file *file, loff_t offset,
682 char *addr, unsigned long count) 682 char *addr, unsigned long count)
683{ 683{
684 mm_segment_t old_fs; 684 mm_segment_t old_fs;
685 loff_t pos = offset; 685 loff_t pos = offset;
@@ -1016,6 +1016,35 @@ out:
1016EXPORT_SYMBOL(flush_old_exec); 1016EXPORT_SYMBOL(flush_old_exec);
1017 1017
1018/* 1018/*
1019 * Prepare credentials and lock ->cred_guard_mutex.
1020 * install_exec_creds() commits the new creds and drops the lock.
1021 * Or, if exec fails before, free_bprm() should release ->cred and
1022 * and unlock.
1023 */
1024int prepare_bprm_creds(struct linux_binprm *bprm)
1025{
1026 if (mutex_lock_interruptible(&current->cred_guard_mutex))
1027 return -ERESTARTNOINTR;
1028
1029 bprm->cred = prepare_exec_creds();
1030 if (likely(bprm->cred))
1031 return 0;
1032
1033 mutex_unlock(&current->cred_guard_mutex);
1034 return -ENOMEM;
1035}
1036
1037void free_bprm(struct linux_binprm *bprm)
1038{
1039 free_arg_pages(bprm);
1040 if (bprm->cred) {
1041 mutex_unlock(&current->cred_guard_mutex);
1042 abort_creds(bprm->cred);
1043 }
1044 kfree(bprm);
1045}
1046
1047/*
1019 * install the new credentials for this executable 1048 * install the new credentials for this executable
1020 */ 1049 */
1021void install_exec_creds(struct linux_binprm *bprm) 1050void install_exec_creds(struct linux_binprm *bprm)
@@ -1024,12 +1053,13 @@ void install_exec_creds(struct linux_binprm *bprm)
1024 1053
1025 commit_creds(bprm->cred); 1054 commit_creds(bprm->cred);
1026 bprm->cred = NULL; 1055 bprm->cred = NULL;
1027 1056 /*
1028 /* cred_guard_mutex must be held at least to this point to prevent 1057 * cred_guard_mutex must be held at least to this point to prevent
1029 * ptrace_attach() from altering our determination of the task's 1058 * ptrace_attach() from altering our determination of the task's
1030 * credentials; any time after this it may be unlocked */ 1059 * credentials; any time after this it may be unlocked.
1031 1060 */
1032 security_bprm_committed_creds(bprm); 1061 security_bprm_committed_creds(bprm);
1062 mutex_unlock(&current->cred_guard_mutex);
1033} 1063}
1034EXPORT_SYMBOL(install_exec_creds); 1064EXPORT_SYMBOL(install_exec_creds);
1035 1065
@@ -1246,14 +1276,6 @@ int search_binary_handler(struct linux_binprm *bprm,struct pt_regs *regs)
1246 1276
1247EXPORT_SYMBOL(search_binary_handler); 1277EXPORT_SYMBOL(search_binary_handler);
1248 1278
1249void free_bprm(struct linux_binprm *bprm)
1250{
1251 free_arg_pages(bprm);
1252 if (bprm->cred)
1253 abort_creds(bprm->cred);
1254 kfree(bprm);
1255}
1256
1257/* 1279/*
1258 * sys_execve() executes a new program. 1280 * sys_execve() executes a new program.
1259 */ 1281 */
@@ -1277,20 +1299,15 @@ int do_execve(char * filename,
1277 if (!bprm) 1299 if (!bprm)
1278 goto out_files; 1300 goto out_files;
1279 1301
1280 retval = -ERESTARTNOINTR; 1302 retval = prepare_bprm_creds(bprm);
1281 if (mutex_lock_interruptible(&current->cred_guard_mutex)) 1303 if (retval)
1282 goto out_free; 1304 goto out_free;
1283 current->in_execve = 1;
1284
1285 retval = -ENOMEM;
1286 bprm->cred = prepare_exec_creds();
1287 if (!bprm->cred)
1288 goto out_unlock;
1289 1305
1290 retval = check_unsafe_exec(bprm); 1306 retval = check_unsafe_exec(bprm);
1291 if (retval < 0) 1307 if (retval < 0)
1292 goto out_unlock; 1308 goto out_free;
1293 clear_in_exec = retval; 1309 clear_in_exec = retval;
1310 current->in_execve = 1;
1294 1311
1295 file = open_exec(filename); 1312 file = open_exec(filename);
1296 retval = PTR_ERR(file); 1313 retval = PTR_ERR(file);
@@ -1340,7 +1357,6 @@ int do_execve(char * filename,
1340 /* execve succeeded */ 1357 /* execve succeeded */
1341 current->fs->in_exec = 0; 1358 current->fs->in_exec = 0;
1342 current->in_execve = 0; 1359 current->in_execve = 0;
1343 mutex_unlock(&current->cred_guard_mutex);
1344 acct_update_integrals(current); 1360 acct_update_integrals(current);
1345 free_bprm(bprm); 1361 free_bprm(bprm);
1346 if (displaced) 1362 if (displaced)
@@ -1360,10 +1376,7 @@ out_file:
1360out_unmark: 1376out_unmark:
1361 if (clear_in_exec) 1377 if (clear_in_exec)
1362 current->fs->in_exec = 0; 1378 current->fs->in_exec = 0;
1363
1364out_unlock:
1365 current->in_execve = 0; 1379 current->in_execve = 0;
1366 mutex_unlock(&current->cred_guard_mutex);
1367 1380
1368out_free: 1381out_free:
1369 free_bprm(bprm); 1382 free_bprm(bprm);
diff --git a/fs/ext2/namei.c b/fs/ext2/namei.c
index e1dedb0f7873..78d9b925fc94 100644
--- a/fs/ext2/namei.c
+++ b/fs/ext2/namei.c
@@ -362,6 +362,10 @@ static int ext2_rename (struct inode * old_dir, struct dentry * old_dentry,
362 if (dir_de) { 362 if (dir_de) {
363 if (old_dir != new_dir) 363 if (old_dir != new_dir)
364 ext2_set_link(old_inode, dir_de, dir_page, new_dir, 0); 364 ext2_set_link(old_inode, dir_de, dir_page, new_dir, 0);
365 else {
366 kunmap(dir_page);
367 page_cache_release(dir_page);
368 }
365 inode_dec_link_count(old_dir); 369 inode_dec_link_count(old_dir);
366 } 370 }
367 return 0; 371 return 0;
diff --git a/fs/ext3/Kconfig b/fs/ext3/Kconfig
index fb3c1a21b135..522b15498f45 100644
--- a/fs/ext3/Kconfig
+++ b/fs/ext3/Kconfig
@@ -29,23 +29,25 @@ config EXT3_FS
29 module will be called ext3. 29 module will be called ext3.
30 30
31config EXT3_DEFAULTS_TO_ORDERED 31config EXT3_DEFAULTS_TO_ORDERED
32 bool "Default to 'data=ordered' in ext3 (legacy option)" 32 bool "Default to 'data=ordered' in ext3"
33 depends on EXT3_FS 33 depends on EXT3_FS
34 help 34 help
35 If a filesystem does not explicitly specify a data ordering 35 The journal mode options for ext3 have different tradeoffs
36 mode, and the journal capability allowed it, ext3 used to 36 between when data is guaranteed to be on disk and
37 historically default to 'data=ordered'. 37 performance. The use of "data=writeback" can cause
38 38 unwritten data to appear in files after an system crash or
39 That was a rather unfortunate choice, because it leads to all 39 power failure, which can be a security issue. However,
40 kinds of latency problems, and the 'data=writeback' mode is more 40 "data=ordered" mode can also result in major performance
41 appropriate these days. 41 problems, including seconds-long delays before an fsync()
42 42 call returns. For details, see:
43 You should probably always answer 'n' here, and if you really 43
44 want to use 'data=ordered' mode, set it in the filesystem itself 44 http://ext4.wiki.kernel.org/index.php/Ext3_data_mode_tradeoffs
45 with 'tune2fs -o journal_data_ordered'. 45
46 46 If you have been historically happy with ext3's performance,
47 But if you really want to enable the legacy default, you can do 47 data=ordered mode will be a safe choice and you should
48 so by answering 'y' to this question. 48 answer 'y' here. If you understand the reliability and data
49 privacy issues of data=writeback and are willing to make
50 that trade off, answer 'n'.
49 51
50config EXT3_FS_XATTR 52config EXT3_FS_XATTR
51 bool "Ext3 extended attributes" 53 bool "Ext3 extended attributes"
diff --git a/fs/ext3/super.c b/fs/ext3/super.c
index 524b349c6299..a8d80a7f1105 100644
--- a/fs/ext3/super.c
+++ b/fs/ext3/super.c
@@ -543,6 +543,19 @@ static inline void ext3_show_quota_options(struct seq_file *seq, struct super_bl
543#endif 543#endif
544} 544}
545 545
546static char *data_mode_string(unsigned long mode)
547{
548 switch (mode) {
549 case EXT3_MOUNT_JOURNAL_DATA:
550 return "journal";
551 case EXT3_MOUNT_ORDERED_DATA:
552 return "ordered";
553 case EXT3_MOUNT_WRITEBACK_DATA:
554 return "writeback";
555 }
556 return "unknown";
557}
558
546/* 559/*
547 * Show an option if 560 * Show an option if
548 * - it's set to a non-default value OR 561 * - it's set to a non-default value OR
@@ -616,13 +629,8 @@ static int ext3_show_options(struct seq_file *seq, struct vfsmount *vfs)
616 if (test_opt(sb, NOBH)) 629 if (test_opt(sb, NOBH))
617 seq_puts(seq, ",nobh"); 630 seq_puts(seq, ",nobh");
618 631
619 if (test_opt(sb, DATA_FLAGS) == EXT3_MOUNT_JOURNAL_DATA) 632 seq_printf(seq, ",data=%s", data_mode_string(sbi->s_mount_opt &
620 seq_puts(seq, ",data=journal"); 633 EXT3_MOUNT_DATA_FLAGS));
621 else if (test_opt(sb, DATA_FLAGS) == EXT3_MOUNT_ORDERED_DATA)
622 seq_puts(seq, ",data=ordered");
623 else if (test_opt(sb, DATA_FLAGS) == EXT3_MOUNT_WRITEBACK_DATA)
624 seq_puts(seq, ",data=writeback");
625
626 if (test_opt(sb, DATA_ERR_ABORT)) 634 if (test_opt(sb, DATA_ERR_ABORT))
627 seq_puts(seq, ",data_err=abort"); 635 seq_puts(seq, ",data_err=abort");
628 636
@@ -1024,12 +1032,18 @@ static int parse_options (char *options, struct super_block *sb,
1024 datacheck: 1032 datacheck:
1025 if (is_remount) { 1033 if (is_remount) {
1026 if ((sbi->s_mount_opt & EXT3_MOUNT_DATA_FLAGS) 1034 if ((sbi->s_mount_opt & EXT3_MOUNT_DATA_FLAGS)
1027 != data_opt) { 1035 == data_opt)
1028 printk(KERN_ERR 1036 break;
1029 "EXT3-fs: cannot change data " 1037 printk(KERN_ERR
1030 "mode on remount\n"); 1038 "EXT3-fs (device %s): Cannot change "
1031 return 0; 1039 "data mode on remount. The filesystem "
1032 } 1040 "is mounted in data=%s mode and you "
1041 "try to remount it in data=%s mode.\n",
1042 sb->s_id,
1043 data_mode_string(sbi->s_mount_opt &
1044 EXT3_MOUNT_DATA_FLAGS),
1045 data_mode_string(data_opt));
1046 return 0;
1033 } else { 1047 } else {
1034 sbi->s_mount_opt &= ~EXT3_MOUNT_DATA_FLAGS; 1048 sbi->s_mount_opt &= ~EXT3_MOUNT_DATA_FLAGS;
1035 sbi->s_mount_opt |= data_opt; 1049 sbi->s_mount_opt |= data_opt;
diff --git a/fs/gfs2/sys.c b/fs/gfs2/sys.c
index 23419dc3027b..a7cbfbd340c7 100644
--- a/fs/gfs2/sys.c
+++ b/fs/gfs2/sys.c
@@ -386,16 +386,16 @@ static ssize_t jid_show(struct gfs2_sbd *sdp, char *buf)
386#define GDLM_ATTR(_name,_mode,_show,_store) \ 386#define GDLM_ATTR(_name,_mode,_show,_store) \
387static struct gfs2_attr gdlm_attr_##_name = __ATTR(_name,_mode,_show,_store) 387static struct gfs2_attr gdlm_attr_##_name = __ATTR(_name,_mode,_show,_store)
388 388
389GDLM_ATTR(proto_name, 0444, proto_name_show, NULL); 389GDLM_ATTR(proto_name, 0444, proto_name_show, NULL);
390GDLM_ATTR(block, 0644, block_show, block_store); 390GDLM_ATTR(block, 0644, block_show, block_store);
391GDLM_ATTR(withdraw, 0644, withdraw_show, withdraw_store); 391GDLM_ATTR(withdraw, 0644, withdraw_show, withdraw_store);
392GDLM_ATTR(id, 0444, lkid_show, NULL); 392GDLM_ATTR(id, 0444, lkid_show, NULL);
393GDLM_ATTR(jid, 0444, jid_show, NULL); 393GDLM_ATTR(jid, 0444, jid_show, NULL);
394GDLM_ATTR(first, 0444, lkfirst_show, NULL); 394GDLM_ATTR(first, 0444, lkfirst_show, NULL);
395GDLM_ATTR(first_done, 0444, first_done_show, NULL); 395GDLM_ATTR(first_done, 0444, first_done_show, NULL);
396GDLM_ATTR(recover, 0200, NULL, recover_store); 396GDLM_ATTR(recover, 0600, NULL, recover_store);
397GDLM_ATTR(recover_done, 0444, recover_done_show, NULL); 397GDLM_ATTR(recover_done, 0444, recover_done_show, NULL);
398GDLM_ATTR(recover_status, 0444, recover_status_show, NULL); 398GDLM_ATTR(recover_status, 0444, recover_status_show, NULL);
399 399
400static struct attribute *lock_module_attrs[] = { 400static struct attribute *lock_module_attrs[] = {
401 &gdlm_attr_proto_name.attr, 401 &gdlm_attr_proto_name.attr,
diff --git a/fs/hugetlbfs/inode.c b/fs/hugetlbfs/inode.c
index 941c8425c10b..cb88dac8ccaa 100644
--- a/fs/hugetlbfs/inode.c
+++ b/fs/hugetlbfs/inode.c
@@ -935,26 +935,28 @@ static int can_do_hugetlb_shm(void)
935 return capable(CAP_IPC_LOCK) || in_group_p(sysctl_hugetlb_shm_group); 935 return capable(CAP_IPC_LOCK) || in_group_p(sysctl_hugetlb_shm_group);
936} 936}
937 937
938struct file *hugetlb_file_setup(const char *name, size_t size, int acctflag) 938struct file *hugetlb_file_setup(const char *name, size_t size, int acctflag,
939 struct user_struct **user)
939{ 940{
940 int error = -ENOMEM; 941 int error = -ENOMEM;
941 int unlock_shm = 0;
942 struct file *file; 942 struct file *file;
943 struct inode *inode; 943 struct inode *inode;
944 struct dentry *dentry, *root; 944 struct dentry *dentry, *root;
945 struct qstr quick_string; 945 struct qstr quick_string;
946 struct user_struct *user = current_user();
947 946
947 *user = NULL;
948 if (!hugetlbfs_vfsmount) 948 if (!hugetlbfs_vfsmount)
949 return ERR_PTR(-ENOENT); 949 return ERR_PTR(-ENOENT);
950 950
951 if (!can_do_hugetlb_shm()) { 951 if (!can_do_hugetlb_shm()) {
952 if (user_shm_lock(size, user)) { 952 *user = current_user();
953 unlock_shm = 1; 953 if (user_shm_lock(size, *user)) {
954 WARN_ONCE(1, 954 WARN_ONCE(1,
955 "Using mlock ulimits for SHM_HUGETLB deprecated\n"); 955 "Using mlock ulimits for SHM_HUGETLB deprecated\n");
956 } else 956 } else {
957 *user = NULL;
957 return ERR_PTR(-EPERM); 958 return ERR_PTR(-EPERM);
959 }
958 } 960 }
959 961
960 root = hugetlbfs_vfsmount->mnt_root; 962 root = hugetlbfs_vfsmount->mnt_root;
@@ -996,8 +998,10 @@ out_inode:
996out_dentry: 998out_dentry:
997 dput(dentry); 999 dput(dentry);
998out_shm_unlock: 1000out_shm_unlock:
999 if (unlock_shm) 1001 if (*user) {
1000 user_shm_unlock(size, user); 1002 user_shm_unlock(size, *user);
1003 *user = NULL;
1004 }
1001 return ERR_PTR(error); 1005 return ERR_PTR(error);
1002} 1006}
1003 1007
diff --git a/fs/jffs2/wbuf.c b/fs/jffs2/wbuf.c
index d9a721e6db70..5ef7bac265e5 100644
--- a/fs/jffs2/wbuf.c
+++ b/fs/jffs2/wbuf.c
@@ -1268,10 +1268,20 @@ int jffs2_nor_wbuf_flash_setup(struct jffs2_sb_info *c) {
1268 if (!c->wbuf) 1268 if (!c->wbuf)
1269 return -ENOMEM; 1269 return -ENOMEM;
1270 1270
1271#ifdef CONFIG_JFFS2_FS_WBUF_VERIFY
1272 c->wbuf_verify = kmalloc(c->wbuf_pagesize, GFP_KERNEL);
1273 if (!c->wbuf_verify) {
1274 kfree(c->wbuf);
1275 return -ENOMEM;
1276 }
1277#endif
1271 return 0; 1278 return 0;
1272} 1279}
1273 1280
1274void jffs2_nor_wbuf_flash_cleanup(struct jffs2_sb_info *c) { 1281void jffs2_nor_wbuf_flash_cleanup(struct jffs2_sb_info *c) {
1282#ifdef CONFIG_JFFS2_FS_WBUF_VERIFY
1283 kfree(c->wbuf_verify);
1284#endif
1275 kfree(c->wbuf); 1285 kfree(c->wbuf);
1276} 1286}
1277 1287
diff --git a/fs/libfs.c b/fs/libfs.c
index ddfa89948c3f..dcec3d3ea64f 100644
--- a/fs/libfs.c
+++ b/fs/libfs.c
@@ -217,7 +217,7 @@ int get_sb_pseudo(struct file_system_type *fs_type, char *name,
217 return PTR_ERR(s); 217 return PTR_ERR(s);
218 218
219 s->s_flags = MS_NOUSER; 219 s->s_flags = MS_NOUSER;
220 s->s_maxbytes = ~0ULL; 220 s->s_maxbytes = MAX_LFS_FILESIZE;
221 s->s_blocksize = PAGE_SIZE; 221 s->s_blocksize = PAGE_SIZE;
222 s->s_blocksize_bits = PAGE_SHIFT; 222 s->s_blocksize_bits = PAGE_SHIFT;
223 s->s_magic = magic; 223 s->s_magic = magic;
diff --git a/fs/nfs/direct.c b/fs/nfs/direct.c
index 489fc01a3204..e4e089a8f294 100644
--- a/fs/nfs/direct.c
+++ b/fs/nfs/direct.c
@@ -255,7 +255,7 @@ static void nfs_direct_read_release(void *calldata)
255 255
256 if (put_dreq(dreq)) 256 if (put_dreq(dreq))
257 nfs_direct_complete(dreq); 257 nfs_direct_complete(dreq);
258 nfs_readdata_release(calldata); 258 nfs_readdata_free(data);
259} 259}
260 260
261static const struct rpc_call_ops nfs_read_direct_ops = { 261static const struct rpc_call_ops nfs_read_direct_ops = {
@@ -314,14 +314,14 @@ static ssize_t nfs_direct_read_schedule_segment(struct nfs_direct_req *dreq,
314 data->npages, 1, 0, data->pagevec, NULL); 314 data->npages, 1, 0, data->pagevec, NULL);
315 up_read(&current->mm->mmap_sem); 315 up_read(&current->mm->mmap_sem);
316 if (result < 0) { 316 if (result < 0) {
317 nfs_readdata_release(data); 317 nfs_readdata_free(data);
318 break; 318 break;
319 } 319 }
320 if ((unsigned)result < data->npages) { 320 if ((unsigned)result < data->npages) {
321 bytes = result * PAGE_SIZE; 321 bytes = result * PAGE_SIZE;
322 if (bytes <= pgbase) { 322 if (bytes <= pgbase) {
323 nfs_direct_release_pages(data->pagevec, result); 323 nfs_direct_release_pages(data->pagevec, result);
324 nfs_readdata_release(data); 324 nfs_readdata_free(data);
325 break; 325 break;
326 } 326 }
327 bytes -= pgbase; 327 bytes -= pgbase;
@@ -334,7 +334,7 @@ static ssize_t nfs_direct_read_schedule_segment(struct nfs_direct_req *dreq,
334 data->inode = inode; 334 data->inode = inode;
335 data->cred = msg.rpc_cred; 335 data->cred = msg.rpc_cred;
336 data->args.fh = NFS_FH(inode); 336 data->args.fh = NFS_FH(inode);
337 data->args.context = get_nfs_open_context(ctx); 337 data->args.context = ctx;
338 data->args.offset = pos; 338 data->args.offset = pos;
339 data->args.pgbase = pgbase; 339 data->args.pgbase = pgbase;
340 data->args.pages = data->pagevec; 340 data->args.pages = data->pagevec;
@@ -441,7 +441,7 @@ static void nfs_direct_free_writedata(struct nfs_direct_req *dreq)
441 struct nfs_write_data *data = list_entry(dreq->rewrite_list.next, struct nfs_write_data, pages); 441 struct nfs_write_data *data = list_entry(dreq->rewrite_list.next, struct nfs_write_data, pages);
442 list_del(&data->pages); 442 list_del(&data->pages);
443 nfs_direct_release_pages(data->pagevec, data->npages); 443 nfs_direct_release_pages(data->pagevec, data->npages);
444 nfs_writedata_release(data); 444 nfs_writedata_free(data);
445 } 445 }
446} 446}
447 447
@@ -534,7 +534,7 @@ static void nfs_direct_commit_release(void *calldata)
534 534
535 dprintk("NFS: %5u commit returned %d\n", data->task.tk_pid, status); 535 dprintk("NFS: %5u commit returned %d\n", data->task.tk_pid, status);
536 nfs_direct_write_complete(dreq, data->inode); 536 nfs_direct_write_complete(dreq, data->inode);
537 nfs_commitdata_release(calldata); 537 nfs_commit_free(data);
538} 538}
539 539
540static const struct rpc_call_ops nfs_commit_direct_ops = { 540static const struct rpc_call_ops nfs_commit_direct_ops = {
@@ -570,7 +570,7 @@ static void nfs_direct_commit_schedule(struct nfs_direct_req *dreq)
570 data->args.fh = NFS_FH(data->inode); 570 data->args.fh = NFS_FH(data->inode);
571 data->args.offset = 0; 571 data->args.offset = 0;
572 data->args.count = 0; 572 data->args.count = 0;
573 data->args.context = get_nfs_open_context(dreq->ctx); 573 data->args.context = dreq->ctx;
574 data->res.count = 0; 574 data->res.count = 0;
575 data->res.fattr = &data->fattr; 575 data->res.fattr = &data->fattr;
576 data->res.verf = &data->verf; 576 data->res.verf = &data->verf;
@@ -734,14 +734,14 @@ static ssize_t nfs_direct_write_schedule_segment(struct nfs_direct_req *dreq,
734 data->npages, 0, 0, data->pagevec, NULL); 734 data->npages, 0, 0, data->pagevec, NULL);
735 up_read(&current->mm->mmap_sem); 735 up_read(&current->mm->mmap_sem);
736 if (result < 0) { 736 if (result < 0) {
737 nfs_writedata_release(data); 737 nfs_writedata_free(data);
738 break; 738 break;
739 } 739 }
740 if ((unsigned)result < data->npages) { 740 if ((unsigned)result < data->npages) {
741 bytes = result * PAGE_SIZE; 741 bytes = result * PAGE_SIZE;
742 if (bytes <= pgbase) { 742 if (bytes <= pgbase) {
743 nfs_direct_release_pages(data->pagevec, result); 743 nfs_direct_release_pages(data->pagevec, result);
744 nfs_writedata_release(data); 744 nfs_writedata_free(data);
745 break; 745 break;
746 } 746 }
747 bytes -= pgbase; 747 bytes -= pgbase;
@@ -756,7 +756,7 @@ static ssize_t nfs_direct_write_schedule_segment(struct nfs_direct_req *dreq,
756 data->inode = inode; 756 data->inode = inode;
757 data->cred = msg.rpc_cred; 757 data->cred = msg.rpc_cred;
758 data->args.fh = NFS_FH(inode); 758 data->args.fh = NFS_FH(inode);
759 data->args.context = get_nfs_open_context(ctx); 759 data->args.context = ctx;
760 data->args.offset = pos; 760 data->args.offset = pos;
761 data->args.pgbase = pgbase; 761 data->args.pgbase = pgbase;
762 data->args.pages = data->pagevec; 762 data->args.pages = data->pagevec;
diff --git a/fs/nfs/nfs4state.c b/fs/nfs/nfs4state.c
index 65ca8c18476f..1434080aefeb 100644
--- a/fs/nfs/nfs4state.c
+++ b/fs/nfs/nfs4state.c
@@ -1250,8 +1250,8 @@ static void nfs4_state_manager(struct nfs_client *clp)
1250 continue; 1250 continue;
1251 } 1251 }
1252 /* Initialize or reset the session */ 1252 /* Initialize or reset the session */
1253 if (nfs4_has_session(clp) && 1253 if (test_and_clear_bit(NFS4CLNT_SESSION_SETUP, &clp->cl_state)
1254 test_and_clear_bit(NFS4CLNT_SESSION_SETUP, &clp->cl_state)) { 1254 && nfs4_has_session(clp)) {
1255 if (clp->cl_cons_state == NFS_CS_SESSION_INITING) 1255 if (clp->cl_cons_state == NFS_CS_SESSION_INITING)
1256 status = nfs4_initialize_session(clp); 1256 status = nfs4_initialize_session(clp);
1257 else 1257 else
diff --git a/fs/nfs/read.c b/fs/nfs/read.c
index 73ea5e8d66ce..12c9e66d3f1d 100644
--- a/fs/nfs/read.c
+++ b/fs/nfs/read.c
@@ -60,17 +60,15 @@ struct nfs_read_data *nfs_readdata_alloc(unsigned int pagecount)
60 return p; 60 return p;
61} 61}
62 62
63static void nfs_readdata_free(struct nfs_read_data *p) 63void nfs_readdata_free(struct nfs_read_data *p)
64{ 64{
65 if (p && (p->pagevec != &p->page_array[0])) 65 if (p && (p->pagevec != &p->page_array[0]))
66 kfree(p->pagevec); 66 kfree(p->pagevec);
67 mempool_free(p, nfs_rdata_mempool); 67 mempool_free(p, nfs_rdata_mempool);
68} 68}
69 69
70void nfs_readdata_release(void *data) 70static void nfs_readdata_release(struct nfs_read_data *rdata)
71{ 71{
72 struct nfs_read_data *rdata = data;
73
74 put_nfs_open_context(rdata->args.context); 72 put_nfs_open_context(rdata->args.context);
75 nfs_readdata_free(rdata); 73 nfs_readdata_free(rdata);
76} 74}
diff --git a/fs/nfs/write.c b/fs/nfs/write.c
index 0a0a2ff767c3..a34fae21fe10 100644
--- a/fs/nfs/write.c
+++ b/fs/nfs/write.c
@@ -87,17 +87,15 @@ struct nfs_write_data *nfs_writedata_alloc(unsigned int pagecount)
87 return p; 87 return p;
88} 88}
89 89
90static void nfs_writedata_free(struct nfs_write_data *p) 90void nfs_writedata_free(struct nfs_write_data *p)
91{ 91{
92 if (p && (p->pagevec != &p->page_array[0])) 92 if (p && (p->pagevec != &p->page_array[0]))
93 kfree(p->pagevec); 93 kfree(p->pagevec);
94 mempool_free(p, nfs_wdata_mempool); 94 mempool_free(p, nfs_wdata_mempool);
95} 95}
96 96
97void nfs_writedata_release(void *data) 97static void nfs_writedata_release(struct nfs_write_data *wdata)
98{ 98{
99 struct nfs_write_data *wdata = data;
100
101 put_nfs_open_context(wdata->args.context); 99 put_nfs_open_context(wdata->args.context);
102 nfs_writedata_free(wdata); 100 nfs_writedata_free(wdata);
103} 101}
diff --git a/fs/nilfs2/btnode.c b/fs/nilfs2/btnode.c
index 7e0b61be212e..c668bca579c1 100644
--- a/fs/nilfs2/btnode.c
+++ b/fs/nilfs2/btnode.c
@@ -209,6 +209,7 @@ int nilfs_btnode_prepare_change_key(struct address_space *btnc,
209 * We cannot call radix_tree_preload for the kernels older 209 * We cannot call radix_tree_preload for the kernels older
210 * than 2.6.23, because it is not exported for modules. 210 * than 2.6.23, because it is not exported for modules.
211 */ 211 */
212retry:
212 err = radix_tree_preload(GFP_NOFS & ~__GFP_HIGHMEM); 213 err = radix_tree_preload(GFP_NOFS & ~__GFP_HIGHMEM);
213 if (err) 214 if (err)
214 goto failed_unlock; 215 goto failed_unlock;
@@ -219,7 +220,6 @@ int nilfs_btnode_prepare_change_key(struct address_space *btnc,
219 (unsigned long long)oldkey, 220 (unsigned long long)oldkey,
220 (unsigned long long)newkey); 221 (unsigned long long)newkey);
221 222
222retry:
223 spin_lock_irq(&btnc->tree_lock); 223 spin_lock_irq(&btnc->tree_lock);
224 err = radix_tree_insert(&btnc->page_tree, newkey, obh->b_page); 224 err = radix_tree_insert(&btnc->page_tree, newkey, obh->b_page);
225 spin_unlock_irq(&btnc->tree_lock); 225 spin_unlock_irq(&btnc->tree_lock);
diff --git a/fs/nilfs2/super.c b/fs/nilfs2/super.c
index 8e2ec43b18f4..151964f0de4c 100644
--- a/fs/nilfs2/super.c
+++ b/fs/nilfs2/super.c
@@ -416,8 +416,10 @@ int nilfs_attach_checkpoint(struct nilfs_sb_info *sbi, __u64 cno)
416 if (unlikely(err)) 416 if (unlikely(err))
417 goto failed; 417 goto failed;
418 418
419 down_read(&nilfs->ns_segctor_sem);
419 err = nilfs_cpfile_get_checkpoint(nilfs->ns_cpfile, cno, 0, &raw_cp, 420 err = nilfs_cpfile_get_checkpoint(nilfs->ns_cpfile, cno, 0, &raw_cp,
420 &bh_cp); 421 &bh_cp);
422 up_read(&nilfs->ns_segctor_sem);
421 if (unlikely(err)) { 423 if (unlikely(err)) {
422 if (err == -ENOENT || err == -EINVAL) { 424 if (err == -ENOENT || err == -EINVAL) {
423 printk(KERN_ERR 425 printk(KERN_ERR
diff --git a/fs/nilfs2/the_nilfs.h b/fs/nilfs2/the_nilfs.h
index e8adbffc626f..1b9caafb8662 100644
--- a/fs/nilfs2/the_nilfs.h
+++ b/fs/nilfs2/the_nilfs.h
@@ -253,7 +253,7 @@ nilfs_detach_writer(struct the_nilfs *nilfs, struct nilfs_sb_info *sbi)
253 253
254static inline void nilfs_put_sbinfo(struct nilfs_sb_info *sbi) 254static inline void nilfs_put_sbinfo(struct nilfs_sb_info *sbi)
255{ 255{
256 if (!atomic_dec_and_test(&sbi->s_count)) 256 if (atomic_dec_and_test(&sbi->s_count))
257 kfree(sbi); 257 kfree(sbi);
258} 258}
259 259
diff --git a/fs/notify/inotify/inotify_fsnotify.c b/fs/notify/inotify/inotify_fsnotify.c
index 47cd258fd24d..c9ee67b442e1 100644
--- a/fs/notify/inotify/inotify_fsnotify.c
+++ b/fs/notify/inotify/inotify_fsnotify.c
@@ -62,13 +62,14 @@ static int inotify_handle_event(struct fsnotify_group *group, struct fsnotify_ev
62 event_priv->wd = wd; 62 event_priv->wd = wd;
63 63
64 ret = fsnotify_add_notify_event(group, event, fsn_event_priv); 64 ret = fsnotify_add_notify_event(group, event, fsn_event_priv);
65 /* EEXIST is not an error */ 65 if (ret) {
66 if (ret == -EEXIST)
67 ret = 0;
68
69 /* did event_priv get attached? */
70 if (list_empty(&fsn_event_priv->event_list))
71 inotify_free_event_priv(fsn_event_priv); 66 inotify_free_event_priv(fsn_event_priv);
67 /* EEXIST says we tail matched, EOVERFLOW isn't something
68 * to report up the stack. */
69 if ((ret == -EEXIST) ||
70 (ret == -EOVERFLOW))
71 ret = 0;
72 }
72 73
73 /* 74 /*
74 * If we hold the entry until after the event is on the queue 75 * If we hold the entry until after the event is on the queue
@@ -104,16 +105,45 @@ static bool inotify_should_send_event(struct fsnotify_group *group, struct inode
104 return send; 105 return send;
105} 106}
106 107
108/*
109 * This is NEVER supposed to be called. Inotify marks should either have been
110 * removed from the idr when the watch was removed or in the
111 * fsnotify_destroy_mark_by_group() call when the inotify instance was being
112 * torn down. This is only called if the idr is about to be freed but there
113 * are still marks in it.
114 */
107static int idr_callback(int id, void *p, void *data) 115static int idr_callback(int id, void *p, void *data)
108{ 116{
109 BUG(); 117 struct fsnotify_mark_entry *entry;
118 struct inotify_inode_mark_entry *ientry;
119 static bool warned = false;
120
121 if (warned)
122 return 0;
123
124 warned = false;
125 entry = p;
126 ientry = container_of(entry, struct inotify_inode_mark_entry, fsn_entry);
127
128 WARN(1, "inotify closing but id=%d for entry=%p in group=%p still in "
129 "idr. Probably leaking memory\n", id, p, data);
130
131 /*
132 * I'm taking the liberty of assuming that the mark in question is a
133 * valid address and I'm dereferencing it. This might help to figure
134 * out why we got here and the panic is no worse than the original
135 * BUG() that was here.
136 */
137 if (entry)
138 printk(KERN_WARNING "entry->group=%p inode=%p wd=%d\n",
139 entry->group, entry->inode, ientry->wd);
110 return 0; 140 return 0;
111} 141}
112 142
113static void inotify_free_group_priv(struct fsnotify_group *group) 143static void inotify_free_group_priv(struct fsnotify_group *group)
114{ 144{
115 /* ideally the idr is empty and we won't hit the BUG in teh callback */ 145 /* ideally the idr is empty and we won't hit the BUG in teh callback */
116 idr_for_each(&group->inotify_data.idr, idr_callback, NULL); 146 idr_for_each(&group->inotify_data.idr, idr_callback, group);
117 idr_remove_all(&group->inotify_data.idr); 147 idr_remove_all(&group->inotify_data.idr);
118 idr_destroy(&group->inotify_data.idr); 148 idr_destroy(&group->inotify_data.idr);
119} 149}
diff --git a/fs/notify/inotify/inotify_user.c b/fs/notify/inotify/inotify_user.c
index f30d9bbc2e1b..dcd2040d330c 100644
--- a/fs/notify/inotify/inotify_user.c
+++ b/fs/notify/inotify/inotify_user.c
@@ -47,9 +47,6 @@
47 47
48static struct vfsmount *inotify_mnt __read_mostly; 48static struct vfsmount *inotify_mnt __read_mostly;
49 49
50/* this just sits here and wastes global memory. used to just pad userspace messages with zeros */
51static struct inotify_event nul_inotify_event;
52
53/* these are configurable via /proc/sys/fs/inotify/ */ 50/* these are configurable via /proc/sys/fs/inotify/ */
54static int inotify_max_user_instances __read_mostly; 51static int inotify_max_user_instances __read_mostly;
55static int inotify_max_queued_events __read_mostly; 52static int inotify_max_queued_events __read_mostly;
@@ -157,7 +154,8 @@ static struct fsnotify_event *get_one_event(struct fsnotify_group *group,
157 154
158 event = fsnotify_peek_notify_event(group); 155 event = fsnotify_peek_notify_event(group);
159 156
160 event_size += roundup(event->name_len, event_size); 157 if (event->name_len)
158 event_size += roundup(event->name_len + 1, event_size);
161 159
162 if (event_size > count) 160 if (event_size > count)
163 return ERR_PTR(-EINVAL); 161 return ERR_PTR(-EINVAL);
@@ -183,7 +181,7 @@ static ssize_t copy_event_to_user(struct fsnotify_group *group,
183 struct fsnotify_event_private_data *fsn_priv; 181 struct fsnotify_event_private_data *fsn_priv;
184 struct inotify_event_private_data *priv; 182 struct inotify_event_private_data *priv;
185 size_t event_size = sizeof(struct inotify_event); 183 size_t event_size = sizeof(struct inotify_event);
186 size_t name_len; 184 size_t name_len = 0;
187 185
188 /* we get the inotify watch descriptor from the event private data */ 186 /* we get the inotify watch descriptor from the event private data */
189 spin_lock(&event->lock); 187 spin_lock(&event->lock);
@@ -199,8 +197,12 @@ static ssize_t copy_event_to_user(struct fsnotify_group *group,
199 inotify_free_event_priv(fsn_priv); 197 inotify_free_event_priv(fsn_priv);
200 } 198 }
201 199
202 /* round up event->name_len so it is a multiple of event_size */ 200 /*
203 name_len = roundup(event->name_len, event_size); 201 * round up event->name_len so it is a multiple of event_size
202 * plus an extra byte for the terminating '\0'.
203 */
204 if (event->name_len)
205 name_len = roundup(event->name_len + 1, event_size);
204 inotify_event.len = name_len; 206 inotify_event.len = name_len;
205 207
206 inotify_event.mask = inotify_mask_to_arg(event->mask); 208 inotify_event.mask = inotify_mask_to_arg(event->mask);
@@ -224,8 +226,8 @@ static ssize_t copy_event_to_user(struct fsnotify_group *group,
224 return -EFAULT; 226 return -EFAULT;
225 buf += event->name_len; 227 buf += event->name_len;
226 228
227 /* fill userspace with 0's from nul_inotify_event */ 229 /* fill userspace with 0's */
228 if (copy_to_user(buf, &nul_inotify_event, len_to_zero)) 230 if (clear_user(buf, len_to_zero))
229 return -EFAULT; 231 return -EFAULT;
230 buf += len_to_zero; 232 buf += len_to_zero;
231 event_size += name_len; 233 event_size += name_len;
@@ -326,8 +328,9 @@ static long inotify_ioctl(struct file *file, unsigned int cmd,
326 list_for_each_entry(holder, &group->notification_list, event_list) { 328 list_for_each_entry(holder, &group->notification_list, event_list) {
327 event = holder->event; 329 event = holder->event;
328 send_len += sizeof(struct inotify_event); 330 send_len += sizeof(struct inotify_event);
329 send_len += roundup(event->name_len, 331 if (event->name_len)
330 sizeof(struct inotify_event)); 332 send_len += roundup(event->name_len + 1,
333 sizeof(struct inotify_event));
331 } 334 }
332 mutex_unlock(&group->notification_mutex); 335 mutex_unlock(&group->notification_mutex);
333 ret = put_user(send_len, (int __user *) p); 336 ret = put_user(send_len, (int __user *) p);
@@ -364,20 +367,53 @@ static int inotify_find_inode(const char __user *dirname, struct path *path, uns
364 return error; 367 return error;
365} 368}
366 369
370/*
371 * Remove the mark from the idr (if present) and drop the reference
372 * on the mark because it was in the idr.
373 */
367static void inotify_remove_from_idr(struct fsnotify_group *group, 374static void inotify_remove_from_idr(struct fsnotify_group *group,
368 struct inotify_inode_mark_entry *ientry) 375 struct inotify_inode_mark_entry *ientry)
369{ 376{
370 struct idr *idr; 377 struct idr *idr;
378 struct fsnotify_mark_entry *entry;
379 struct inotify_inode_mark_entry *found_ientry;
380 int wd;
371 381
372 spin_lock(&group->inotify_data.idr_lock); 382 spin_lock(&group->inotify_data.idr_lock);
373 idr = &group->inotify_data.idr; 383 idr = &group->inotify_data.idr;
374 idr_remove(idr, ientry->wd); 384 wd = ientry->wd;
375 spin_unlock(&group->inotify_data.idr_lock); 385
386 if (wd == -1)
387 goto out;
388
389 entry = idr_find(&group->inotify_data.idr, wd);
390 if (unlikely(!entry))
391 goto out;
392
393 found_ientry = container_of(entry, struct inotify_inode_mark_entry, fsn_entry);
394 if (unlikely(found_ientry != ientry)) {
395 /* We found an entry in the idr with the right wd, but it's
396 * not the entry we were told to remove. eparis seriously
397 * fucked up somewhere. */
398 WARN_ON(1);
399 ientry->wd = -1;
400 goto out;
401 }
402
403 /* One ref for being in the idr, one ref held by the caller */
404 BUG_ON(atomic_read(&entry->refcnt) < 2);
405
406 idr_remove(idr, wd);
376 ientry->wd = -1; 407 ientry->wd = -1;
408
409 /* removed from the idr, drop that ref */
410 fsnotify_put_mark(entry);
411out:
412 spin_unlock(&group->inotify_data.idr_lock);
377} 413}
414
378/* 415/*
379 * Send IN_IGNORED for this wd, remove this wd from the idr, and drop the 416 * Send IN_IGNORED for this wd, remove this wd from the idr.
380 * internal reference help on the mark because it is in the idr.
381 */ 417 */
382void inotify_ignored_and_remove_idr(struct fsnotify_mark_entry *entry, 418void inotify_ignored_and_remove_idr(struct fsnotify_mark_entry *entry,
383 struct fsnotify_group *group) 419 struct fsnotify_group *group)
@@ -386,6 +422,7 @@ void inotify_ignored_and_remove_idr(struct fsnotify_mark_entry *entry,
386 struct fsnotify_event *ignored_event; 422 struct fsnotify_event *ignored_event;
387 struct inotify_event_private_data *event_priv; 423 struct inotify_event_private_data *event_priv;
388 struct fsnotify_event_private_data *fsn_event_priv; 424 struct fsnotify_event_private_data *fsn_event_priv;
425 int ret;
389 426
390 ignored_event = fsnotify_create_event(NULL, FS_IN_IGNORED, NULL, 427 ignored_event = fsnotify_create_event(NULL, FS_IN_IGNORED, NULL,
391 FSNOTIFY_EVENT_NONE, NULL, 0, 428 FSNOTIFY_EVENT_NONE, NULL, 0,
@@ -404,10 +441,8 @@ void inotify_ignored_and_remove_idr(struct fsnotify_mark_entry *entry,
404 fsn_event_priv->group = group; 441 fsn_event_priv->group = group;
405 event_priv->wd = ientry->wd; 442 event_priv->wd = ientry->wd;
406 443
407 fsnotify_add_notify_event(group, ignored_event, fsn_event_priv); 444 ret = fsnotify_add_notify_event(group, ignored_event, fsn_event_priv);
408 445 if (ret)
409 /* did the private data get added? */
410 if (list_empty(&fsn_event_priv->event_list))
411 inotify_free_event_priv(fsn_event_priv); 446 inotify_free_event_priv(fsn_event_priv);
412 447
413skip_send_ignore: 448skip_send_ignore:
@@ -418,9 +453,6 @@ skip_send_ignore:
418 /* remove this entry from the idr */ 453 /* remove this entry from the idr */
419 inotify_remove_from_idr(group, ientry); 454 inotify_remove_from_idr(group, ientry);
420 455
421 /* removed from idr, drop that reference */
422 fsnotify_put_mark(entry);
423
424 atomic_dec(&group->inotify_data.user->inotify_watches); 456 atomic_dec(&group->inotify_data.user->inotify_watches);
425} 457}
426 458
@@ -432,80 +464,29 @@ static void inotify_free_mark(struct fsnotify_mark_entry *entry)
432 kmem_cache_free(inotify_inode_mark_cachep, ientry); 464 kmem_cache_free(inotify_inode_mark_cachep, ientry);
433} 465}
434 466
435static int inotify_update_watch(struct fsnotify_group *group, struct inode *inode, u32 arg) 467static int inotify_update_existing_watch(struct fsnotify_group *group,
468 struct inode *inode,
469 u32 arg)
436{ 470{
437 struct fsnotify_mark_entry *entry = NULL; 471 struct fsnotify_mark_entry *entry;
438 struct inotify_inode_mark_entry *ientry; 472 struct inotify_inode_mark_entry *ientry;
439 struct inotify_inode_mark_entry *tmp_ientry;
440 int ret = 0;
441 int add = (arg & IN_MASK_ADD);
442 __u32 mask;
443 __u32 old_mask, new_mask; 473 __u32 old_mask, new_mask;
474 __u32 mask;
475 int add = (arg & IN_MASK_ADD);
476 int ret;
444 477
445 /* don't allow invalid bits: we don't want flags set */ 478 /* don't allow invalid bits: we don't want flags set */
446 mask = inotify_arg_to_mask(arg); 479 mask = inotify_arg_to_mask(arg);
447 if (unlikely(!mask)) 480 if (unlikely(!mask))
448 return -EINVAL; 481 return -EINVAL;
449 482
450 tmp_ientry = kmem_cache_alloc(inotify_inode_mark_cachep, GFP_KERNEL);
451 if (unlikely(!tmp_ientry))
452 return -ENOMEM;
453 /* we set the mask at the end after attaching it */
454 fsnotify_init_mark(&tmp_ientry->fsn_entry, inotify_free_mark);
455 tmp_ientry->wd = -1;
456
457find_entry:
458 spin_lock(&inode->i_lock); 483 spin_lock(&inode->i_lock);
459 entry = fsnotify_find_mark_entry(group, inode); 484 entry = fsnotify_find_mark_entry(group, inode);
460 spin_unlock(&inode->i_lock); 485 spin_unlock(&inode->i_lock);
461 if (entry) { 486 if (!entry)
462 ientry = container_of(entry, struct inotify_inode_mark_entry, fsn_entry); 487 return -ENOENT;
463 } else {
464 ret = -ENOSPC;
465 if (atomic_read(&group->inotify_data.user->inotify_watches) >= inotify_max_user_watches)
466 goto out_err;
467retry:
468 ret = -ENOMEM;
469 if (unlikely(!idr_pre_get(&group->inotify_data.idr, GFP_KERNEL)))
470 goto out_err;
471
472 spin_lock(&group->inotify_data.idr_lock);
473 ret = idr_get_new_above(&group->inotify_data.idr, &tmp_ientry->fsn_entry,
474 group->inotify_data.last_wd,
475 &tmp_ientry->wd);
476 spin_unlock(&group->inotify_data.idr_lock);
477 if (ret) {
478 if (ret == -EAGAIN)
479 goto retry;
480 goto out_err;
481 }
482 488
483 ret = fsnotify_add_mark(&tmp_ientry->fsn_entry, group, inode); 489 ientry = container_of(entry, struct inotify_inode_mark_entry, fsn_entry);
484 if (ret) {
485 inotify_remove_from_idr(group, tmp_ientry);
486 if (ret == -EEXIST)
487 goto find_entry;
488 goto out_err;
489 }
490
491 /* tmp_ientry has been added to the inode, so we are all set up.
492 * now we just need to make sure tmp_ientry doesn't get freed and
493 * we need to set up entry and ientry so the generic code can
494 * do its thing. */
495 ientry = tmp_ientry;
496 entry = &ientry->fsn_entry;
497 tmp_ientry = NULL;
498
499 atomic_inc(&group->inotify_data.user->inotify_watches);
500
501 /* update the idr hint */
502 group->inotify_data.last_wd = ientry->wd;
503
504 /* we put the mark on the idr, take a reference */
505 fsnotify_get_mark(entry);
506 }
507
508 ret = ientry->wd;
509 490
510 spin_lock(&entry->lock); 491 spin_lock(&entry->lock);
511 492
@@ -537,18 +518,107 @@ retry:
537 fsnotify_recalc_group_mask(group); 518 fsnotify_recalc_group_mask(group);
538 } 519 }
539 520
540 /* this either matches fsnotify_find_mark_entry, or init_mark_entry 521 /* return the wd */
541 * depending on which path we took... */ 522 ret = ientry->wd;
523
524 /* match the get from fsnotify_find_mark_entry() */
542 fsnotify_put_mark(entry); 525 fsnotify_put_mark(entry);
543 526
527 return ret;
528}
529
530static int inotify_new_watch(struct fsnotify_group *group,
531 struct inode *inode,
532 u32 arg)
533{
534 struct inotify_inode_mark_entry *tmp_ientry;
535 __u32 mask;
536 int ret;
537
538 /* don't allow invalid bits: we don't want flags set */
539 mask = inotify_arg_to_mask(arg);
540 if (unlikely(!mask))
541 return -EINVAL;
542
543 tmp_ientry = kmem_cache_alloc(inotify_inode_mark_cachep, GFP_KERNEL);
544 if (unlikely(!tmp_ientry))
545 return -ENOMEM;
546
547 fsnotify_init_mark(&tmp_ientry->fsn_entry, inotify_free_mark);
548 tmp_ientry->fsn_entry.mask = mask;
549 tmp_ientry->wd = -1;
550
551 ret = -ENOSPC;
552 if (atomic_read(&group->inotify_data.user->inotify_watches) >= inotify_max_user_watches)
553 goto out_err;
554retry:
555 ret = -ENOMEM;
556 if (unlikely(!idr_pre_get(&group->inotify_data.idr, GFP_KERNEL)))
557 goto out_err;
558
559 spin_lock(&group->inotify_data.idr_lock);
560 ret = idr_get_new_above(&group->inotify_data.idr, &tmp_ientry->fsn_entry,
561 group->inotify_data.last_wd,
562 &tmp_ientry->wd);
563 spin_unlock(&group->inotify_data.idr_lock);
564 if (ret) {
565 /* idr was out of memory allocate and try again */
566 if (ret == -EAGAIN)
567 goto retry;
568 goto out_err;
569 }
570
571 /* we put the mark on the idr, take a reference */
572 fsnotify_get_mark(&tmp_ientry->fsn_entry);
573
574 /* we are on the idr, now get on the inode */
575 ret = fsnotify_add_mark(&tmp_ientry->fsn_entry, group, inode);
576 if (ret) {
577 /* we failed to get on the inode, get off the idr */
578 inotify_remove_from_idr(group, tmp_ientry);
579 goto out_err;
580 }
581
582 /* update the idr hint, who cares about races, it's just a hint */
583 group->inotify_data.last_wd = tmp_ientry->wd;
584
585 /* increment the number of watches the user has */
586 atomic_inc(&group->inotify_data.user->inotify_watches);
587
588 /* return the watch descriptor for this new entry */
589 ret = tmp_ientry->wd;
590
591 /* match the ref from fsnotify_init_markentry() */
592 fsnotify_put_mark(&tmp_ientry->fsn_entry);
593
594 /* if this mark added a new event update the group mask */
595 if (mask & ~group->mask)
596 fsnotify_recalc_group_mask(group);
597
544out_err: 598out_err:
545 /* could be an error, could be that we found an existing mark */ 599 if (ret < 0)
546 if (tmp_ientry) {
547 /* on the idr but didn't make it on the inode */
548 if (tmp_ientry->wd != -1)
549 inotify_remove_from_idr(group, tmp_ientry);
550 kmem_cache_free(inotify_inode_mark_cachep, tmp_ientry); 600 kmem_cache_free(inotify_inode_mark_cachep, tmp_ientry);
551 } 601
602 return ret;
603}
604
605static int inotify_update_watch(struct fsnotify_group *group, struct inode *inode, u32 arg)
606{
607 int ret = 0;
608
609retry:
610 /* try to update and existing watch with the new arg */
611 ret = inotify_update_existing_watch(group, inode, arg);
612 /* no mark present, try to add a new one */
613 if (ret == -ENOENT)
614 ret = inotify_new_watch(group, inode, arg);
615 /*
616 * inotify_new_watch could race with another thread which did an
617 * inotify_new_watch between the update_existing and the add watch
618 * here, go back and try to update an existing mark again.
619 */
620 if (ret == -EEXIST)
621 goto retry;
552 622
553 return ret; 623 return ret;
554} 624}
@@ -568,7 +638,7 @@ static struct fsnotify_group *inotify_new_group(struct user_struct *user, unsign
568 638
569 spin_lock_init(&group->inotify_data.idr_lock); 639 spin_lock_init(&group->inotify_data.idr_lock);
570 idr_init(&group->inotify_data.idr); 640 idr_init(&group->inotify_data.idr);
571 group->inotify_data.last_wd = 0; 641 group->inotify_data.last_wd = 1;
572 group->inotify_data.user = user; 642 group->inotify_data.user = user;
573 group->inotify_data.fa = NULL; 643 group->inotify_data.fa = NULL;
574 644
diff --git a/fs/notify/notification.c b/fs/notify/notification.c
index 521368574e97..3816d5750dd5 100644
--- a/fs/notify/notification.c
+++ b/fs/notify/notification.c
@@ -153,6 +153,10 @@ static bool event_compare(struct fsnotify_event *old, struct fsnotify_event *new
153 return true; 153 return true;
154 break; 154 break;
155 case (FSNOTIFY_EVENT_NONE): 155 case (FSNOTIFY_EVENT_NONE):
156 if (old->mask & FS_Q_OVERFLOW)
157 return true;
158 else if (old->mask & FS_IN_IGNORED)
159 return false;
156 return false; 160 return false;
157 }; 161 };
158 } 162 }
@@ -171,9 +175,7 @@ int fsnotify_add_notify_event(struct fsnotify_group *group, struct fsnotify_even
171 struct list_head *list = &group->notification_list; 175 struct list_head *list = &group->notification_list;
172 struct fsnotify_event_holder *last_holder; 176 struct fsnotify_event_holder *last_holder;
173 struct fsnotify_event *last_event; 177 struct fsnotify_event *last_event;
174 178 int ret = 0;
175 /* easy to tell if priv was attached to the event */
176 INIT_LIST_HEAD(&priv->event_list);
177 179
178 /* 180 /*
179 * There is one fsnotify_event_holder embedded inside each fsnotify_event. 181 * There is one fsnotify_event_holder embedded inside each fsnotify_event.
@@ -194,6 +196,7 @@ alloc_holder:
194 196
195 if (group->q_len >= group->max_events) { 197 if (group->q_len >= group->max_events) {
196 event = &q_overflow_event; 198 event = &q_overflow_event;
199 ret = -EOVERFLOW;
197 /* sorry, no private data on the overflow event */ 200 /* sorry, no private data on the overflow event */
198 priv = NULL; 201 priv = NULL;
199 } 202 }
@@ -235,7 +238,7 @@ alloc_holder:
235 mutex_unlock(&group->notification_mutex); 238 mutex_unlock(&group->notification_mutex);
236 239
237 wake_up(&group->notification_waitq); 240 wake_up(&group->notification_waitq);
238 return 0; 241 return ret;
239} 242}
240 243
241/* 244/*
diff --git a/fs/ocfs2/alloc.c b/fs/ocfs2/alloc.c
index 9edcde4974aa..ab513ddaeff2 100644
--- a/fs/ocfs2/alloc.c
+++ b/fs/ocfs2/alloc.c
@@ -1914,7 +1914,8 @@ static void ocfs2_adjust_adjacent_records(struct ocfs2_extent_rec *left_rec,
1914 * immediately to their right. 1914 * immediately to their right.
1915 */ 1915 */
1916 left_clusters = le32_to_cpu(right_child_el->l_recs[0].e_cpos); 1916 left_clusters = le32_to_cpu(right_child_el->l_recs[0].e_cpos);
1917 if (ocfs2_is_empty_extent(&right_child_el->l_recs[0])) { 1917 if (!ocfs2_rec_clusters(right_child_el, &right_child_el->l_recs[0])) {
1918 BUG_ON(right_child_el->l_tree_depth);
1918 BUG_ON(le16_to_cpu(right_child_el->l_next_free_rec) <= 1); 1919 BUG_ON(le16_to_cpu(right_child_el->l_next_free_rec) <= 1);
1919 left_clusters = le32_to_cpu(right_child_el->l_recs[1].e_cpos); 1920 left_clusters = le32_to_cpu(right_child_el->l_recs[1].e_cpos);
1920 } 1921 }
@@ -2476,15 +2477,37 @@ out_ret_path:
2476 return ret; 2477 return ret;
2477} 2478}
2478 2479
2479static void ocfs2_update_edge_lengths(struct inode *inode, handle_t *handle, 2480static int ocfs2_update_edge_lengths(struct inode *inode, handle_t *handle,
2480 struct ocfs2_path *path) 2481 int subtree_index, struct ocfs2_path *path)
2481{ 2482{
2482 int i, idx; 2483 int i, idx, ret;
2483 struct ocfs2_extent_rec *rec; 2484 struct ocfs2_extent_rec *rec;
2484 struct ocfs2_extent_list *el; 2485 struct ocfs2_extent_list *el;
2485 struct ocfs2_extent_block *eb; 2486 struct ocfs2_extent_block *eb;
2486 u32 range; 2487 u32 range;
2487 2488
2489 /*
2490 * In normal tree rotation process, we will never touch the
2491 * tree branch above subtree_index and ocfs2_extend_rotate_transaction
2492 * doesn't reserve the credits for them either.
2493 *
2494 * But we do have a special case here which will update the rightmost
2495 * records for all the bh in the path.
2496 * So we have to allocate extra credits and access them.
2497 */
2498 ret = ocfs2_extend_trans(handle,
2499 handle->h_buffer_credits + subtree_index);
2500 if (ret) {
2501 mlog_errno(ret);
2502 goto out;
2503 }
2504
2505 ret = ocfs2_journal_access_path(inode, handle, path);
2506 if (ret) {
2507 mlog_errno(ret);
2508 goto out;
2509 }
2510
2488 /* Path should always be rightmost. */ 2511 /* Path should always be rightmost. */
2489 eb = (struct ocfs2_extent_block *)path_leaf_bh(path)->b_data; 2512 eb = (struct ocfs2_extent_block *)path_leaf_bh(path)->b_data;
2490 BUG_ON(eb->h_next_leaf_blk != 0ULL); 2513 BUG_ON(eb->h_next_leaf_blk != 0ULL);
@@ -2505,6 +2528,8 @@ static void ocfs2_update_edge_lengths(struct inode *inode, handle_t *handle,
2505 2528
2506 ocfs2_journal_dirty(handle, path->p_node[i].bh); 2529 ocfs2_journal_dirty(handle, path->p_node[i].bh);
2507 } 2530 }
2531out:
2532 return ret;
2508} 2533}
2509 2534
2510static void ocfs2_unlink_path(struct inode *inode, handle_t *handle, 2535static void ocfs2_unlink_path(struct inode *inode, handle_t *handle,
@@ -2717,7 +2742,12 @@ static int ocfs2_rotate_subtree_left(struct inode *inode, handle_t *handle,
2717 if (del_right_subtree) { 2742 if (del_right_subtree) {
2718 ocfs2_unlink_subtree(inode, handle, left_path, right_path, 2743 ocfs2_unlink_subtree(inode, handle, left_path, right_path,
2719 subtree_index, dealloc); 2744 subtree_index, dealloc);
2720 ocfs2_update_edge_lengths(inode, handle, left_path); 2745 ret = ocfs2_update_edge_lengths(inode, handle, subtree_index,
2746 left_path);
2747 if (ret) {
2748 mlog_errno(ret);
2749 goto out;
2750 }
2721 2751
2722 eb = (struct ocfs2_extent_block *)path_leaf_bh(left_path)->b_data; 2752 eb = (struct ocfs2_extent_block *)path_leaf_bh(left_path)->b_data;
2723 ocfs2_et_set_last_eb_blk(et, le64_to_cpu(eb->h_blkno)); 2753 ocfs2_et_set_last_eb_blk(et, le64_to_cpu(eb->h_blkno));
@@ -3034,7 +3064,12 @@ static int ocfs2_remove_rightmost_path(struct inode *inode, handle_t *handle,
3034 3064
3035 ocfs2_unlink_subtree(inode, handle, left_path, path, 3065 ocfs2_unlink_subtree(inode, handle, left_path, path,
3036 subtree_index, dealloc); 3066 subtree_index, dealloc);
3037 ocfs2_update_edge_lengths(inode, handle, left_path); 3067 ret = ocfs2_update_edge_lengths(inode, handle, subtree_index,
3068 left_path);
3069 if (ret) {
3070 mlog_errno(ret);
3071 goto out;
3072 }
3038 3073
3039 eb = (struct ocfs2_extent_block *)path_leaf_bh(left_path)->b_data; 3074 eb = (struct ocfs2_extent_block *)path_leaf_bh(left_path)->b_data;
3040 ocfs2_et_set_last_eb_blk(et, le64_to_cpu(eb->h_blkno)); 3075 ocfs2_et_set_last_eb_blk(et, le64_to_cpu(eb->h_blkno));
@@ -6816,7 +6851,7 @@ static int ocfs2_do_truncate(struct ocfs2_super *osb,
6816 } 6851 }
6817 status = 0; 6852 status = 0;
6818bail: 6853bail:
6819 6854 brelse(last_eb_bh);
6820 mlog_exit(status); 6855 mlog_exit(status);
6821 return status; 6856 return status;
6822} 6857}
diff --git a/fs/ocfs2/aops.c b/fs/ocfs2/aops.c
index b2c52b3a1484..8a1e61545f41 100644
--- a/fs/ocfs2/aops.c
+++ b/fs/ocfs2/aops.c
@@ -193,6 +193,7 @@ static int ocfs2_get_block(struct inode *inode, sector_t iblock,
193 (unsigned long long)OCFS2_I(inode)->ip_blkno); 193 (unsigned long long)OCFS2_I(inode)->ip_blkno);
194 mlog(ML_ERROR, "Size %llu, clusters %u\n", (unsigned long long)i_size_read(inode), OCFS2_I(inode)->ip_clusters); 194 mlog(ML_ERROR, "Size %llu, clusters %u\n", (unsigned long long)i_size_read(inode), OCFS2_I(inode)->ip_clusters);
195 dump_stack(); 195 dump_stack();
196 goto bail;
196 } 197 }
197 198
198 past_eof = ocfs2_blocks_for_bytes(inode->i_sb, i_size_read(inode)); 199 past_eof = ocfs2_blocks_for_bytes(inode->i_sb, i_size_read(inode));
@@ -894,18 +895,17 @@ struct ocfs2_write_cluster_desc {
894 */ 895 */
895 unsigned c_new; 896 unsigned c_new;
896 unsigned c_unwritten; 897 unsigned c_unwritten;
898 unsigned c_needs_zero;
897}; 899};
898 900
899static inline int ocfs2_should_zero_cluster(struct ocfs2_write_cluster_desc *d)
900{
901 return d->c_new || d->c_unwritten;
902}
903
904struct ocfs2_write_ctxt { 901struct ocfs2_write_ctxt {
905 /* Logical cluster position / len of write */ 902 /* Logical cluster position / len of write */
906 u32 w_cpos; 903 u32 w_cpos;
907 u32 w_clen; 904 u32 w_clen;
908 905
906 /* First cluster allocated in a nonsparse extend */
907 u32 w_first_new_cpos;
908
909 struct ocfs2_write_cluster_desc w_desc[OCFS2_MAX_CLUSTERS_PER_PAGE]; 909 struct ocfs2_write_cluster_desc w_desc[OCFS2_MAX_CLUSTERS_PER_PAGE];
910 910
911 /* 911 /*
@@ -983,6 +983,7 @@ static int ocfs2_alloc_write_ctxt(struct ocfs2_write_ctxt **wcp,
983 return -ENOMEM; 983 return -ENOMEM;
984 984
985 wc->w_cpos = pos >> osb->s_clustersize_bits; 985 wc->w_cpos = pos >> osb->s_clustersize_bits;
986 wc->w_first_new_cpos = UINT_MAX;
986 cend = (pos + len - 1) >> osb->s_clustersize_bits; 987 cend = (pos + len - 1) >> osb->s_clustersize_bits;
987 wc->w_clen = cend - wc->w_cpos + 1; 988 wc->w_clen = cend - wc->w_cpos + 1;
988 get_bh(di_bh); 989 get_bh(di_bh);
@@ -1217,20 +1218,18 @@ out:
1217 */ 1218 */
1218static int ocfs2_write_cluster(struct address_space *mapping, 1219static int ocfs2_write_cluster(struct address_space *mapping,
1219 u32 phys, unsigned int unwritten, 1220 u32 phys, unsigned int unwritten,
1221 unsigned int should_zero,
1220 struct ocfs2_alloc_context *data_ac, 1222 struct ocfs2_alloc_context *data_ac,
1221 struct ocfs2_alloc_context *meta_ac, 1223 struct ocfs2_alloc_context *meta_ac,
1222 struct ocfs2_write_ctxt *wc, u32 cpos, 1224 struct ocfs2_write_ctxt *wc, u32 cpos,
1223 loff_t user_pos, unsigned user_len) 1225 loff_t user_pos, unsigned user_len)
1224{ 1226{
1225 int ret, i, new, should_zero = 0; 1227 int ret, i, new;
1226 u64 v_blkno, p_blkno; 1228 u64 v_blkno, p_blkno;
1227 struct inode *inode = mapping->host; 1229 struct inode *inode = mapping->host;
1228 struct ocfs2_extent_tree et; 1230 struct ocfs2_extent_tree et;
1229 1231
1230 new = phys == 0 ? 1 : 0; 1232 new = phys == 0 ? 1 : 0;
1231 if (new || unwritten)
1232 should_zero = 1;
1233
1234 if (new) { 1233 if (new) {
1235 u32 tmp_pos; 1234 u32 tmp_pos;
1236 1235
@@ -1301,7 +1300,7 @@ static int ocfs2_write_cluster(struct address_space *mapping,
1301 if (tmpret) { 1300 if (tmpret) {
1302 mlog_errno(tmpret); 1301 mlog_errno(tmpret);
1303 if (ret == 0) 1302 if (ret == 0)
1304 tmpret = ret; 1303 ret = tmpret;
1305 } 1304 }
1306 } 1305 }
1307 1306
@@ -1341,7 +1340,9 @@ static int ocfs2_write_cluster_by_desc(struct address_space *mapping,
1341 local_len = osb->s_clustersize - cluster_off; 1340 local_len = osb->s_clustersize - cluster_off;
1342 1341
1343 ret = ocfs2_write_cluster(mapping, desc->c_phys, 1342 ret = ocfs2_write_cluster(mapping, desc->c_phys,
1344 desc->c_unwritten, data_ac, meta_ac, 1343 desc->c_unwritten,
1344 desc->c_needs_zero,
1345 data_ac, meta_ac,
1345 wc, desc->c_cpos, pos, local_len); 1346 wc, desc->c_cpos, pos, local_len);
1346 if (ret) { 1347 if (ret) {
1347 mlog_errno(ret); 1348 mlog_errno(ret);
@@ -1391,14 +1392,14 @@ static void ocfs2_set_target_boundaries(struct ocfs2_super *osb,
1391 * newly allocated cluster. 1392 * newly allocated cluster.
1392 */ 1393 */
1393 desc = &wc->w_desc[0]; 1394 desc = &wc->w_desc[0];
1394 if (ocfs2_should_zero_cluster(desc)) 1395 if (desc->c_needs_zero)
1395 ocfs2_figure_cluster_boundaries(osb, 1396 ocfs2_figure_cluster_boundaries(osb,
1396 desc->c_cpos, 1397 desc->c_cpos,
1397 &wc->w_target_from, 1398 &wc->w_target_from,
1398 NULL); 1399 NULL);
1399 1400
1400 desc = &wc->w_desc[wc->w_clen - 1]; 1401 desc = &wc->w_desc[wc->w_clen - 1];
1401 if (ocfs2_should_zero_cluster(desc)) 1402 if (desc->c_needs_zero)
1402 ocfs2_figure_cluster_boundaries(osb, 1403 ocfs2_figure_cluster_boundaries(osb,
1403 desc->c_cpos, 1404 desc->c_cpos,
1404 NULL, 1405 NULL,
@@ -1466,13 +1467,28 @@ static int ocfs2_populate_write_desc(struct inode *inode,
1466 phys++; 1467 phys++;
1467 } 1468 }
1468 1469
1470 /*
1471 * If w_first_new_cpos is < UINT_MAX, we have a non-sparse
1472 * file that got extended. w_first_new_cpos tells us
1473 * where the newly allocated clusters are so we can
1474 * zero them.
1475 */
1476 if (desc->c_cpos >= wc->w_first_new_cpos) {
1477 BUG_ON(phys == 0);
1478 desc->c_needs_zero = 1;
1479 }
1480
1469 desc->c_phys = phys; 1481 desc->c_phys = phys;
1470 if (phys == 0) { 1482 if (phys == 0) {
1471 desc->c_new = 1; 1483 desc->c_new = 1;
1484 desc->c_needs_zero = 1;
1472 *clusters_to_alloc = *clusters_to_alloc + 1; 1485 *clusters_to_alloc = *clusters_to_alloc + 1;
1473 } 1486 }
1474 if (ext_flags & OCFS2_EXT_UNWRITTEN) 1487
1488 if (ext_flags & OCFS2_EXT_UNWRITTEN) {
1475 desc->c_unwritten = 1; 1489 desc->c_unwritten = 1;
1490 desc->c_needs_zero = 1;
1491 }
1476 1492
1477 num_clusters--; 1493 num_clusters--;
1478 } 1494 }
@@ -1632,10 +1648,13 @@ static int ocfs2_expand_nonsparse_inode(struct inode *inode, loff_t pos,
1632 if (newsize <= i_size_read(inode)) 1648 if (newsize <= i_size_read(inode))
1633 return 0; 1649 return 0;
1634 1650
1635 ret = ocfs2_extend_no_holes(inode, newsize, newsize - len); 1651 ret = ocfs2_extend_no_holes(inode, newsize, pos);
1636 if (ret) 1652 if (ret)
1637 mlog_errno(ret); 1653 mlog_errno(ret);
1638 1654
1655 wc->w_first_new_cpos =
1656 ocfs2_clusters_for_bytes(inode->i_sb, i_size_read(inode));
1657
1639 return ret; 1658 return ret;
1640} 1659}
1641 1660
@@ -1644,7 +1663,7 @@ int ocfs2_write_begin_nolock(struct address_space *mapping,
1644 struct page **pagep, void **fsdata, 1663 struct page **pagep, void **fsdata,
1645 struct buffer_head *di_bh, struct page *mmap_page) 1664 struct buffer_head *di_bh, struct page *mmap_page)
1646{ 1665{
1647 int ret, credits = OCFS2_INODE_UPDATE_CREDITS; 1666 int ret, cluster_of_pages, credits = OCFS2_INODE_UPDATE_CREDITS;
1648 unsigned int clusters_to_alloc, extents_to_split; 1667 unsigned int clusters_to_alloc, extents_to_split;
1649 struct ocfs2_write_ctxt *wc; 1668 struct ocfs2_write_ctxt *wc;
1650 struct inode *inode = mapping->host; 1669 struct inode *inode = mapping->host;
@@ -1722,8 +1741,19 @@ int ocfs2_write_begin_nolock(struct address_space *mapping,
1722 1741
1723 } 1742 }
1724 1743
1725 ocfs2_set_target_boundaries(osb, wc, pos, len, 1744 /*
1726 clusters_to_alloc + extents_to_split); 1745 * We have to zero sparse allocated clusters, unwritten extent clusters,
1746 * and non-sparse clusters we just extended. For non-sparse writes,
1747 * we know zeros will only be needed in the first and/or last cluster.
1748 */
1749 if (clusters_to_alloc || extents_to_split ||
1750 (wc->w_clen && (wc->w_desc[0].c_needs_zero ||
1751 wc->w_desc[wc->w_clen - 1].c_needs_zero)))
1752 cluster_of_pages = 1;
1753 else
1754 cluster_of_pages = 0;
1755
1756 ocfs2_set_target_boundaries(osb, wc, pos, len, cluster_of_pages);
1727 1757
1728 handle = ocfs2_start_trans(osb, credits); 1758 handle = ocfs2_start_trans(osb, credits);
1729 if (IS_ERR(handle)) { 1759 if (IS_ERR(handle)) {
@@ -1756,8 +1786,7 @@ int ocfs2_write_begin_nolock(struct address_space *mapping,
1756 * extent. 1786 * extent.
1757 */ 1787 */
1758 ret = ocfs2_grab_pages_for_write(mapping, wc, wc->w_cpos, pos, 1788 ret = ocfs2_grab_pages_for_write(mapping, wc, wc->w_cpos, pos,
1759 clusters_to_alloc + extents_to_split, 1789 cluster_of_pages, mmap_page);
1760 mmap_page);
1761 if (ret) { 1790 if (ret) {
1762 mlog_errno(ret); 1791 mlog_errno(ret);
1763 goto out_quota; 1792 goto out_quota;
diff --git a/fs/ocfs2/dcache.c b/fs/ocfs2/dcache.c
index b574431a031d..b4957c7d9fe2 100644
--- a/fs/ocfs2/dcache.c
+++ b/fs/ocfs2/dcache.c
@@ -85,6 +85,17 @@ static int ocfs2_dentry_revalidate(struct dentry *dentry,
85 goto bail; 85 goto bail;
86 } 86 }
87 87
88 /*
89 * If the last lookup failed to create dentry lock, let us
90 * redo it.
91 */
92 if (!dentry->d_fsdata) {
93 mlog(0, "Inode %llu doesn't have dentry lock, "
94 "returning false\n",
95 (unsigned long long)OCFS2_I(inode)->ip_blkno);
96 goto bail;
97 }
98
88 ret = 1; 99 ret = 1;
89 100
90bail: 101bail:
@@ -310,22 +321,19 @@ out_attach:
310 return ret; 321 return ret;
311} 322}
312 323
313static DEFINE_SPINLOCK(dentry_list_lock); 324DEFINE_SPINLOCK(dentry_list_lock);
314 325
315/* We limit the number of dentry locks to drop in one go. We have 326/* We limit the number of dentry locks to drop in one go. We have
316 * this limit so that we don't starve other users of ocfs2_wq. */ 327 * this limit so that we don't starve other users of ocfs2_wq. */
317#define DL_INODE_DROP_COUNT 64 328#define DL_INODE_DROP_COUNT 64
318 329
319/* Drop inode references from dentry locks */ 330/* Drop inode references from dentry locks */
320void ocfs2_drop_dl_inodes(struct work_struct *work) 331static void __ocfs2_drop_dl_inodes(struct ocfs2_super *osb, int drop_count)
321{ 332{
322 struct ocfs2_super *osb = container_of(work, struct ocfs2_super,
323 dentry_lock_work);
324 struct ocfs2_dentry_lock *dl; 333 struct ocfs2_dentry_lock *dl;
325 int drop_count = DL_INODE_DROP_COUNT;
326 334
327 spin_lock(&dentry_list_lock); 335 spin_lock(&dentry_list_lock);
328 while (osb->dentry_lock_list && drop_count--) { 336 while (osb->dentry_lock_list && (drop_count < 0 || drop_count--)) {
329 dl = osb->dentry_lock_list; 337 dl = osb->dentry_lock_list;
330 osb->dentry_lock_list = dl->dl_next; 338 osb->dentry_lock_list = dl->dl_next;
331 spin_unlock(&dentry_list_lock); 339 spin_unlock(&dentry_list_lock);
@@ -333,11 +341,32 @@ void ocfs2_drop_dl_inodes(struct work_struct *work)
333 kfree(dl); 341 kfree(dl);
334 spin_lock(&dentry_list_lock); 342 spin_lock(&dentry_list_lock);
335 } 343 }
336 if (osb->dentry_lock_list) 344 spin_unlock(&dentry_list_lock);
345}
346
347void ocfs2_drop_dl_inodes(struct work_struct *work)
348{
349 struct ocfs2_super *osb = container_of(work, struct ocfs2_super,
350 dentry_lock_work);
351
352 __ocfs2_drop_dl_inodes(osb, DL_INODE_DROP_COUNT);
353 /*
354 * Don't queue dropping if umount is in progress. We flush the
355 * list in ocfs2_dismount_volume
356 */
357 spin_lock(&dentry_list_lock);
358 if (osb->dentry_lock_list &&
359 !ocfs2_test_osb_flag(osb, OCFS2_OSB_DROP_DENTRY_LOCK_IMMED))
337 queue_work(ocfs2_wq, &osb->dentry_lock_work); 360 queue_work(ocfs2_wq, &osb->dentry_lock_work);
338 spin_unlock(&dentry_list_lock); 361 spin_unlock(&dentry_list_lock);
339} 362}
340 363
364/* Flush the whole work queue */
365void ocfs2_drop_all_dl_inodes(struct ocfs2_super *osb)
366{
367 __ocfs2_drop_dl_inodes(osb, -1);
368}
369
341/* 370/*
342 * ocfs2_dentry_iput() and friends. 371 * ocfs2_dentry_iput() and friends.
343 * 372 *
@@ -368,7 +397,8 @@ static void ocfs2_drop_dentry_lock(struct ocfs2_super *osb,
368 /* We leave dropping of inode reference to ocfs2_wq as that can 397 /* We leave dropping of inode reference to ocfs2_wq as that can
369 * possibly lead to inode deletion which gets tricky */ 398 * possibly lead to inode deletion which gets tricky */
370 spin_lock(&dentry_list_lock); 399 spin_lock(&dentry_list_lock);
371 if (!osb->dentry_lock_list) 400 if (!osb->dentry_lock_list &&
401 !ocfs2_test_osb_flag(osb, OCFS2_OSB_DROP_DENTRY_LOCK_IMMED))
372 queue_work(ocfs2_wq, &osb->dentry_lock_work); 402 queue_work(ocfs2_wq, &osb->dentry_lock_work);
373 dl->dl_next = osb->dentry_lock_list; 403 dl->dl_next = osb->dentry_lock_list;
374 osb->dentry_lock_list = dl; 404 osb->dentry_lock_list = dl;
diff --git a/fs/ocfs2/dcache.h b/fs/ocfs2/dcache.h
index faa12e75f98d..f5dd1789acf1 100644
--- a/fs/ocfs2/dcache.h
+++ b/fs/ocfs2/dcache.h
@@ -49,10 +49,13 @@ struct ocfs2_dentry_lock {
49int ocfs2_dentry_attach_lock(struct dentry *dentry, struct inode *inode, 49int ocfs2_dentry_attach_lock(struct dentry *dentry, struct inode *inode,
50 u64 parent_blkno); 50 u64 parent_blkno);
51 51
52extern spinlock_t dentry_list_lock;
53
52void ocfs2_dentry_lock_put(struct ocfs2_super *osb, 54void ocfs2_dentry_lock_put(struct ocfs2_super *osb,
53 struct ocfs2_dentry_lock *dl); 55 struct ocfs2_dentry_lock *dl);
54 56
55void ocfs2_drop_dl_inodes(struct work_struct *work); 57void ocfs2_drop_dl_inodes(struct work_struct *work);
58void ocfs2_drop_all_dl_inodes(struct ocfs2_super *osb);
56 59
57struct dentry *ocfs2_find_local_alias(struct inode *inode, u64 parent_blkno, 60struct dentry *ocfs2_find_local_alias(struct inode *inode, u64 parent_blkno,
58 int skip_unhashed); 61 int skip_unhashed);
diff --git a/fs/ocfs2/dlm/dlmast.c b/fs/ocfs2/dlm/dlmast.c
index d07ddbe4b283..81eff8e58322 100644
--- a/fs/ocfs2/dlm/dlmast.c
+++ b/fs/ocfs2/dlm/dlmast.c
@@ -103,7 +103,6 @@ static void __dlm_queue_ast(struct dlm_ctxt *dlm, struct dlm_lock *lock)
103 lock->ast_pending, lock->ml.type); 103 lock->ast_pending, lock->ml.type);
104 BUG(); 104 BUG();
105 } 105 }
106 BUG_ON(!list_empty(&lock->ast_list));
107 if (lock->ast_pending) 106 if (lock->ast_pending)
108 mlog(0, "lock has an ast getting flushed right now\n"); 107 mlog(0, "lock has an ast getting flushed right now\n");
109 108
diff --git a/fs/ocfs2/dlm/dlmrecovery.c b/fs/ocfs2/dlm/dlmrecovery.c
index bcb9260c3735..43e6e3280569 100644
--- a/fs/ocfs2/dlm/dlmrecovery.c
+++ b/fs/ocfs2/dlm/dlmrecovery.c
@@ -1118,7 +1118,7 @@ static int dlm_send_mig_lockres_msg(struct dlm_ctxt *dlm,
1118 1118
1119 mlog(0, "%s:%.*s: sending mig lockres (%s) to %u\n", 1119 mlog(0, "%s:%.*s: sending mig lockres (%s) to %u\n",
1120 dlm->name, res->lockname.len, res->lockname.name, 1120 dlm->name, res->lockname.len, res->lockname.name,
1121 orig_flags & DLM_MRES_MIGRATION ? "migrate" : "recovery", 1121 orig_flags & DLM_MRES_MIGRATION ? "migration" : "recovery",
1122 send_to); 1122 send_to);
1123 1123
1124 /* send it */ 1124 /* send it */
diff --git a/fs/ocfs2/dlm/dlmunlock.c b/fs/ocfs2/dlm/dlmunlock.c
index fcf879ed6930..756f5b0998e0 100644
--- a/fs/ocfs2/dlm/dlmunlock.c
+++ b/fs/ocfs2/dlm/dlmunlock.c
@@ -122,7 +122,7 @@ static enum dlm_status dlmunlock_common(struct dlm_ctxt *dlm,
122 * that still has AST's pending... */ 122 * that still has AST's pending... */
123 in_use = !list_empty(&lock->ast_list); 123 in_use = !list_empty(&lock->ast_list);
124 spin_unlock(&dlm->ast_lock); 124 spin_unlock(&dlm->ast_lock);
125 if (in_use) { 125 if (in_use && !(flags & LKM_CANCEL)) {
126 mlog(ML_ERROR, "lockres %.*s: Someone is calling dlmunlock " 126 mlog(ML_ERROR, "lockres %.*s: Someone is calling dlmunlock "
127 "while waiting for an ast!", res->lockname.len, 127 "while waiting for an ast!", res->lockname.len,
128 res->lockname.name); 128 res->lockname.name);
@@ -131,7 +131,7 @@ static enum dlm_status dlmunlock_common(struct dlm_ctxt *dlm,
131 131
132 spin_lock(&res->spinlock); 132 spin_lock(&res->spinlock);
133 if (res->state & DLM_LOCK_RES_IN_PROGRESS) { 133 if (res->state & DLM_LOCK_RES_IN_PROGRESS) {
134 if (master_node) { 134 if (master_node && !(flags & LKM_CANCEL)) {
135 mlog(ML_ERROR, "lockres in progress!\n"); 135 mlog(ML_ERROR, "lockres in progress!\n");
136 spin_unlock(&res->spinlock); 136 spin_unlock(&res->spinlock);
137 return DLM_FORWARD; 137 return DLM_FORWARD;
diff --git a/fs/ocfs2/file.c b/fs/ocfs2/file.c
index 62442e413a00..aa501d3f93f1 100644
--- a/fs/ocfs2/file.c
+++ b/fs/ocfs2/file.c
@@ -1851,6 +1851,7 @@ relock:
1851 if (ret) 1851 if (ret)
1852 goto out_dio; 1852 goto out_dio;
1853 1853
1854 count = ocount;
1854 ret = generic_write_checks(file, ppos, &count, 1855 ret = generic_write_checks(file, ppos, &count,
1855 S_ISBLK(inode->i_mode)); 1856 S_ISBLK(inode->i_mode));
1856 if (ret) 1857 if (ret)
@@ -1918,8 +1919,10 @@ out_sems:
1918 1919
1919 mutex_unlock(&inode->i_mutex); 1920 mutex_unlock(&inode->i_mutex);
1920 1921
1922 if (written)
1923 ret = written;
1921 mlog_exit(ret); 1924 mlog_exit(ret);
1922 return written ? written : ret; 1925 return ret;
1923} 1926}
1924 1927
1925static int ocfs2_splice_to_file(struct pipe_inode_info *pipe, 1928static int ocfs2_splice_to_file(struct pipe_inode_info *pipe,
diff --git a/fs/ocfs2/journal.c b/fs/ocfs2/journal.c
index f033760ecbea..c48b93ac6b65 100644
--- a/fs/ocfs2/journal.c
+++ b/fs/ocfs2/journal.c
@@ -1954,10 +1954,16 @@ void ocfs2_orphan_scan_init(struct ocfs2_super *osb)
1954 os->os_osb = osb; 1954 os->os_osb = osb;
1955 os->os_count = 0; 1955 os->os_count = 0;
1956 os->os_seqno = 0; 1956 os->os_seqno = 0;
1957 os->os_scantime = CURRENT_TIME;
1958 mutex_init(&os->os_lock); 1957 mutex_init(&os->os_lock);
1959 INIT_DELAYED_WORK(&os->os_orphan_scan_work, ocfs2_orphan_scan_work); 1958 INIT_DELAYED_WORK(&os->os_orphan_scan_work, ocfs2_orphan_scan_work);
1959}
1960 1960
1961void ocfs2_orphan_scan_start(struct ocfs2_super *osb)
1962{
1963 struct ocfs2_orphan_scan *os;
1964
1965 os = &osb->osb_orphan_scan;
1966 os->os_scantime = CURRENT_TIME;
1961 if (ocfs2_is_hard_readonly(osb) || ocfs2_mount_local(osb)) 1967 if (ocfs2_is_hard_readonly(osb) || ocfs2_mount_local(osb))
1962 atomic_set(&os->os_state, ORPHAN_SCAN_INACTIVE); 1968 atomic_set(&os->os_state, ORPHAN_SCAN_INACTIVE);
1963 else { 1969 else {
diff --git a/fs/ocfs2/journal.h b/fs/ocfs2/journal.h
index 5432c7f79cc6..2c3222aec622 100644
--- a/fs/ocfs2/journal.h
+++ b/fs/ocfs2/journal.h
@@ -145,6 +145,7 @@ static inline void ocfs2_inode_set_new(struct ocfs2_super *osb,
145 145
146/* Exported only for the journal struct init code in super.c. Do not call. */ 146/* Exported only for the journal struct init code in super.c. Do not call. */
147void ocfs2_orphan_scan_init(struct ocfs2_super *osb); 147void ocfs2_orphan_scan_init(struct ocfs2_super *osb);
148void ocfs2_orphan_scan_start(struct ocfs2_super *osb);
148void ocfs2_orphan_scan_stop(struct ocfs2_super *osb); 149void ocfs2_orphan_scan_stop(struct ocfs2_super *osb);
149void ocfs2_orphan_scan_exit(struct ocfs2_super *osb); 150void ocfs2_orphan_scan_exit(struct ocfs2_super *osb);
150 151
@@ -329,20 +330,27 @@ int ocfs2_journal_dirty(handle_t *handle,
329/* extended attribute block update */ 330/* extended attribute block update */
330#define OCFS2_XATTR_BLOCK_UPDATE_CREDITS 1 331#define OCFS2_XATTR_BLOCK_UPDATE_CREDITS 1
331 332
333/* Update of a single quota block */
334#define OCFS2_QUOTA_BLOCK_UPDATE_CREDITS 1
335
332/* global quotafile inode update, data block */ 336/* global quotafile inode update, data block */
333#define OCFS2_QINFO_WRITE_CREDITS (OCFS2_INODE_UPDATE_CREDITS + 1) 337#define OCFS2_QINFO_WRITE_CREDITS (OCFS2_INODE_UPDATE_CREDITS + \
338 OCFS2_QUOTA_BLOCK_UPDATE_CREDITS)
334 339
340#define OCFS2_LOCAL_QINFO_WRITE_CREDITS OCFS2_QUOTA_BLOCK_UPDATE_CREDITS
335/* 341/*
336 * The two writes below can accidentally see global info dirty due 342 * The two writes below can accidentally see global info dirty due
337 * to set_info() quotactl so make them prepared for the writes. 343 * to set_info() quotactl so make them prepared for the writes.
338 */ 344 */
339/* quota data block, global info */ 345/* quota data block, global info */
340/* Write to local quota file */ 346/* Write to local quota file */
341#define OCFS2_QWRITE_CREDITS (OCFS2_QINFO_WRITE_CREDITS + 1) 347#define OCFS2_QWRITE_CREDITS (OCFS2_QINFO_WRITE_CREDITS + \
348 OCFS2_QUOTA_BLOCK_UPDATE_CREDITS)
342 349
343/* global quota data block, local quota data block, global quota inode, 350/* global quota data block, local quota data block, global quota inode,
344 * global quota info */ 351 * global quota info */
345#define OCFS2_QSYNC_CREDITS (OCFS2_INODE_UPDATE_CREDITS + 3) 352#define OCFS2_QSYNC_CREDITS (OCFS2_QINFO_WRITE_CREDITS + \
353 2 * OCFS2_QUOTA_BLOCK_UPDATE_CREDITS)
346 354
347static inline int ocfs2_quota_trans_credits(struct super_block *sb) 355static inline int ocfs2_quota_trans_credits(struct super_block *sb)
348{ 356{
@@ -355,11 +363,6 @@ static inline int ocfs2_quota_trans_credits(struct super_block *sb)
355 return credits; 363 return credits;
356} 364}
357 365
358/* Number of credits needed for removing quota structure from file */
359int ocfs2_calc_qdel_credits(struct super_block *sb, int type);
360/* Number of credits needed for initialization of new quota structure */
361int ocfs2_calc_qinit_credits(struct super_block *sb, int type);
362
363/* group extend. inode update and last group update. */ 366/* group extend. inode update and last group update. */
364#define OCFS2_GROUP_EXTEND_CREDITS (OCFS2_INODE_UPDATE_CREDITS + 1) 367#define OCFS2_GROUP_EXTEND_CREDITS (OCFS2_INODE_UPDATE_CREDITS + 1)
365 368
diff --git a/fs/ocfs2/ocfs2.h b/fs/ocfs2/ocfs2.h
index c9345ebb8493..39e1d5a39505 100644
--- a/fs/ocfs2/ocfs2.h
+++ b/fs/ocfs2/ocfs2.h
@@ -224,10 +224,12 @@ enum ocfs2_mount_options
224 OCFS2_MOUNT_GRPQUOTA = 1 << 10, /* We support group quotas */ 224 OCFS2_MOUNT_GRPQUOTA = 1 << 10, /* We support group quotas */
225}; 225};
226 226
227#define OCFS2_OSB_SOFT_RO 0x0001 227#define OCFS2_OSB_SOFT_RO 0x0001
228#define OCFS2_OSB_HARD_RO 0x0002 228#define OCFS2_OSB_HARD_RO 0x0002
229#define OCFS2_OSB_ERROR_FS 0x0004 229#define OCFS2_OSB_ERROR_FS 0x0004
230#define OCFS2_DEFAULT_ATIME_QUANTUM 60 230#define OCFS2_OSB_DROP_DENTRY_LOCK_IMMED 0x0008
231
232#define OCFS2_DEFAULT_ATIME_QUANTUM 60
231 233
232struct ocfs2_journal; 234struct ocfs2_journal;
233struct ocfs2_slot_info; 235struct ocfs2_slot_info;
@@ -490,6 +492,18 @@ static inline void ocfs2_set_osb_flag(struct ocfs2_super *osb,
490 spin_unlock(&osb->osb_lock); 492 spin_unlock(&osb->osb_lock);
491} 493}
492 494
495
496static inline unsigned long ocfs2_test_osb_flag(struct ocfs2_super *osb,
497 unsigned long flag)
498{
499 unsigned long ret;
500
501 spin_lock(&osb->osb_lock);
502 ret = osb->osb_flags & flag;
503 spin_unlock(&osb->osb_lock);
504 return ret;
505}
506
493static inline void ocfs2_set_ro_flag(struct ocfs2_super *osb, 507static inline void ocfs2_set_ro_flag(struct ocfs2_super *osb,
494 int hard) 508 int hard)
495{ 509{
diff --git a/fs/ocfs2/ocfs2_lockid.h b/fs/ocfs2/ocfs2_lockid.h
index fcdba091af3d..c212cf5a2bdf 100644
--- a/fs/ocfs2/ocfs2_lockid.h
+++ b/fs/ocfs2/ocfs2_lockid.h
@@ -108,6 +108,7 @@ static char *ocfs2_lock_type_strings[] = {
108 [OCFS2_LOCK_TYPE_OPEN] = "Open", 108 [OCFS2_LOCK_TYPE_OPEN] = "Open",
109 [OCFS2_LOCK_TYPE_FLOCK] = "Flock", 109 [OCFS2_LOCK_TYPE_FLOCK] = "Flock",
110 [OCFS2_LOCK_TYPE_QINFO] = "Quota", 110 [OCFS2_LOCK_TYPE_QINFO] = "Quota",
111 [OCFS2_LOCK_TYPE_NFS_SYNC] = "NFSSync",
111 [OCFS2_LOCK_TYPE_ORPHAN_SCAN] = "OrphanScan", 112 [OCFS2_LOCK_TYPE_ORPHAN_SCAN] = "OrphanScan",
112}; 113};
113 114
diff --git a/fs/ocfs2/quota.h b/fs/ocfs2/quota.h
index 7365e2e08706..3fb96fcd4c81 100644
--- a/fs/ocfs2/quota.h
+++ b/fs/ocfs2/quota.h
@@ -50,7 +50,6 @@ struct ocfs2_mem_dqinfo {
50 unsigned int dqi_chunks; /* Number of chunks in local quota file */ 50 unsigned int dqi_chunks; /* Number of chunks in local quota file */
51 unsigned int dqi_blocks; /* Number of blocks allocated for local quota file */ 51 unsigned int dqi_blocks; /* Number of blocks allocated for local quota file */
52 unsigned int dqi_syncms; /* How often should we sync with other nodes */ 52 unsigned int dqi_syncms; /* How often should we sync with other nodes */
53 unsigned int dqi_syncjiff; /* Precomputed dqi_syncms in jiffies */
54 struct list_head dqi_chunk; /* List of chunks */ 53 struct list_head dqi_chunk; /* List of chunks */
55 struct inode *dqi_gqinode; /* Global quota file inode */ 54 struct inode *dqi_gqinode; /* Global quota file inode */
56 struct ocfs2_lock_res dqi_gqlock; /* Lock protecting quota information structure */ 55 struct ocfs2_lock_res dqi_gqlock; /* Lock protecting quota information structure */
diff --git a/fs/ocfs2/quota_global.c b/fs/ocfs2/quota_global.c
index edfa60cd155c..44f2a5e1d042 100644
--- a/fs/ocfs2/quota_global.c
+++ b/fs/ocfs2/quota_global.c
@@ -23,6 +23,7 @@
23#include "sysfile.h" 23#include "sysfile.h"
24#include "dlmglue.h" 24#include "dlmglue.h"
25#include "uptodate.h" 25#include "uptodate.h"
26#include "super.h"
26#include "quota.h" 27#include "quota.h"
27 28
28static struct workqueue_struct *ocfs2_quota_wq = NULL; 29static struct workqueue_struct *ocfs2_quota_wq = NULL;
@@ -69,6 +70,7 @@ static void ocfs2_global_mem2diskdqb(void *dp, struct dquot *dquot)
69 d->dqb_curspace = cpu_to_le64(m->dqb_curspace); 70 d->dqb_curspace = cpu_to_le64(m->dqb_curspace);
70 d->dqb_btime = cpu_to_le64(m->dqb_btime); 71 d->dqb_btime = cpu_to_le64(m->dqb_btime);
71 d->dqb_itime = cpu_to_le64(m->dqb_itime); 72 d->dqb_itime = cpu_to_le64(m->dqb_itime);
73 d->dqb_pad1 = d->dqb_pad2 = 0;
72} 74}
73 75
74static int ocfs2_global_is_id(void *dp, struct dquot *dquot) 76static int ocfs2_global_is_id(void *dp, struct dquot *dquot)
@@ -113,6 +115,15 @@ int ocfs2_read_quota_block(struct inode *inode, u64 v_block,
113 int rc = 0; 115 int rc = 0;
114 struct buffer_head *tmp = *bh; 116 struct buffer_head *tmp = *bh;
115 117
118 if (i_size_read(inode) >> inode->i_sb->s_blocksize_bits <= v_block) {
119 ocfs2_error(inode->i_sb,
120 "Quota file %llu is probably corrupted! Requested "
121 "to read block %Lu but file has size only %Lu\n",
122 (unsigned long long)OCFS2_I(inode)->ip_blkno,
123 (unsigned long long)v_block,
124 (unsigned long long)i_size_read(inode));
125 return -EIO;
126 }
116 rc = ocfs2_read_virt_blocks(inode, v_block, 1, &tmp, 0, 127 rc = ocfs2_read_virt_blocks(inode, v_block, 1, &tmp, 0,
117 ocfs2_validate_quota_block); 128 ocfs2_validate_quota_block);
118 if (rc) 129 if (rc)
@@ -211,14 +222,13 @@ ssize_t ocfs2_quota_write(struct super_block *sb, int type,
211 222
212 mutex_lock_nested(&gqinode->i_mutex, I_MUTEX_QUOTA); 223 mutex_lock_nested(&gqinode->i_mutex, I_MUTEX_QUOTA);
213 if (gqinode->i_size < off + len) { 224 if (gqinode->i_size < off + len) {
214 down_write(&OCFS2_I(gqinode)->ip_alloc_sem); 225 loff_t rounded_end =
215 err = ocfs2_extend_no_holes(gqinode, off + len, off); 226 ocfs2_align_bytes_to_blocks(sb, off + len);
216 up_write(&OCFS2_I(gqinode)->ip_alloc_sem); 227
217 if (err < 0) 228 /* Space is already allocated in ocfs2_global_read_dquot() */
218 goto out;
219 err = ocfs2_simple_size_update(gqinode, 229 err = ocfs2_simple_size_update(gqinode,
220 oinfo->dqi_gqi_bh, 230 oinfo->dqi_gqi_bh,
221 off + len); 231 rounded_end);
222 if (err < 0) 232 if (err < 0)
223 goto out; 233 goto out;
224 new = 1; 234 new = 1;
@@ -234,7 +244,7 @@ ssize_t ocfs2_quota_write(struct super_block *sb, int type,
234 } 244 }
235 if (err) { 245 if (err) {
236 mlog_errno(err); 246 mlog_errno(err);
237 return err; 247 goto out;
238 } 248 }
239 lock_buffer(bh); 249 lock_buffer(bh);
240 if (new) 250 if (new)
@@ -342,7 +352,6 @@ int ocfs2_global_read_info(struct super_block *sb, int type)
342 info->dqi_bgrace = le32_to_cpu(dinfo.dqi_bgrace); 352 info->dqi_bgrace = le32_to_cpu(dinfo.dqi_bgrace);
343 info->dqi_igrace = le32_to_cpu(dinfo.dqi_igrace); 353 info->dqi_igrace = le32_to_cpu(dinfo.dqi_igrace);
344 oinfo->dqi_syncms = le32_to_cpu(dinfo.dqi_syncms); 354 oinfo->dqi_syncms = le32_to_cpu(dinfo.dqi_syncms);
345 oinfo->dqi_syncjiff = msecs_to_jiffies(oinfo->dqi_syncms);
346 oinfo->dqi_gi.dqi_blocks = le32_to_cpu(dinfo.dqi_blocks); 355 oinfo->dqi_gi.dqi_blocks = le32_to_cpu(dinfo.dqi_blocks);
347 oinfo->dqi_gi.dqi_free_blk = le32_to_cpu(dinfo.dqi_free_blk); 356 oinfo->dqi_gi.dqi_free_blk = le32_to_cpu(dinfo.dqi_free_blk);
348 oinfo->dqi_gi.dqi_free_entry = le32_to_cpu(dinfo.dqi_free_entry); 357 oinfo->dqi_gi.dqi_free_entry = le32_to_cpu(dinfo.dqi_free_entry);
@@ -352,7 +361,7 @@ int ocfs2_global_read_info(struct super_block *sb, int type)
352 oinfo->dqi_gi.dqi_qtree_depth = qtree_depth(&oinfo->dqi_gi); 361 oinfo->dqi_gi.dqi_qtree_depth = qtree_depth(&oinfo->dqi_gi);
353 INIT_DELAYED_WORK(&oinfo->dqi_sync_work, qsync_work_fn); 362 INIT_DELAYED_WORK(&oinfo->dqi_sync_work, qsync_work_fn);
354 queue_delayed_work(ocfs2_quota_wq, &oinfo->dqi_sync_work, 363 queue_delayed_work(ocfs2_quota_wq, &oinfo->dqi_sync_work,
355 oinfo->dqi_syncjiff); 364 msecs_to_jiffies(oinfo->dqi_syncms));
356 365
357out_err: 366out_err:
358 mlog_exit(status); 367 mlog_exit(status);
@@ -402,13 +411,36 @@ int ocfs2_global_write_info(struct super_block *sb, int type)
402 return err; 411 return err;
403} 412}
404 413
414static int ocfs2_global_qinit_alloc(struct super_block *sb, int type)
415{
416 struct ocfs2_mem_dqinfo *oinfo = sb_dqinfo(sb, type)->dqi_priv;
417
418 /*
419 * We may need to allocate tree blocks and a leaf block but not the
420 * root block
421 */
422 return oinfo->dqi_gi.dqi_qtree_depth;
423}
424
425static int ocfs2_calc_global_qinit_credits(struct super_block *sb, int type)
426{
427 /* We modify all the allocated blocks, tree root, and info block */
428 return (ocfs2_global_qinit_alloc(sb, type) + 2) *
429 OCFS2_QUOTA_BLOCK_UPDATE_CREDITS;
430}
431
405/* Read in information from global quota file and acquire a reference to it. 432/* Read in information from global quota file and acquire a reference to it.
406 * dquot_acquire() has already started the transaction and locked quota file */ 433 * dquot_acquire() has already started the transaction and locked quota file */
407int ocfs2_global_read_dquot(struct dquot *dquot) 434int ocfs2_global_read_dquot(struct dquot *dquot)
408{ 435{
409 int err, err2, ex = 0; 436 int err, err2, ex = 0;
410 struct ocfs2_mem_dqinfo *info = 437 struct super_block *sb = dquot->dq_sb;
411 sb_dqinfo(dquot->dq_sb, dquot->dq_type)->dqi_priv; 438 int type = dquot->dq_type;
439 struct ocfs2_mem_dqinfo *info = sb_dqinfo(sb, type)->dqi_priv;
440 struct ocfs2_super *osb = OCFS2_SB(sb);
441 struct inode *gqinode = info->dqi_gqinode;
442 int need_alloc = ocfs2_global_qinit_alloc(sb, type);
443 handle_t *handle = NULL;
412 444
413 err = ocfs2_qinfo_lock(info, 0); 445 err = ocfs2_qinfo_lock(info, 0);
414 if (err < 0) 446 if (err < 0)
@@ -419,14 +451,33 @@ int ocfs2_global_read_dquot(struct dquot *dquot)
419 OCFS2_DQUOT(dquot)->dq_use_count++; 451 OCFS2_DQUOT(dquot)->dq_use_count++;
420 OCFS2_DQUOT(dquot)->dq_origspace = dquot->dq_dqb.dqb_curspace; 452 OCFS2_DQUOT(dquot)->dq_origspace = dquot->dq_dqb.dqb_curspace;
421 OCFS2_DQUOT(dquot)->dq_originodes = dquot->dq_dqb.dqb_curinodes; 453 OCFS2_DQUOT(dquot)->dq_originodes = dquot->dq_dqb.dqb_curinodes;
454 ocfs2_qinfo_unlock(info, 0);
455
422 if (!dquot->dq_off) { /* No real quota entry? */ 456 if (!dquot->dq_off) { /* No real quota entry? */
423 /* Upgrade to exclusive lock for allocation */
424 ocfs2_qinfo_unlock(info, 0);
425 err = ocfs2_qinfo_lock(info, 1);
426 if (err < 0)
427 goto out_qlock;
428 ex = 1; 457 ex = 1;
458 /*
459 * Add blocks to quota file before we start a transaction since
460 * locking allocators ranks above a transaction start
461 */
462 WARN_ON(journal_current_handle());
463 down_write(&OCFS2_I(gqinode)->ip_alloc_sem);
464 err = ocfs2_extend_no_holes(gqinode,
465 gqinode->i_size + (need_alloc << sb->s_blocksize_bits),
466 gqinode->i_size);
467 up_write(&OCFS2_I(gqinode)->ip_alloc_sem);
468 if (err < 0)
469 goto out;
429 } 470 }
471
472 handle = ocfs2_start_trans(osb,
473 ocfs2_calc_global_qinit_credits(sb, type));
474 if (IS_ERR(handle)) {
475 err = PTR_ERR(handle);
476 goto out;
477 }
478 err = ocfs2_qinfo_lock(info, ex);
479 if (err < 0)
480 goto out_trans;
430 err = qtree_write_dquot(&info->dqi_gi, dquot); 481 err = qtree_write_dquot(&info->dqi_gi, dquot);
431 if (ex && info_dirty(sb_dqinfo(dquot->dq_sb, dquot->dq_type))) { 482 if (ex && info_dirty(sb_dqinfo(dquot->dq_sb, dquot->dq_type))) {
432 err2 = __ocfs2_global_write_info(dquot->dq_sb, dquot->dq_type); 483 err2 = __ocfs2_global_write_info(dquot->dq_sb, dquot->dq_type);
@@ -438,6 +489,9 @@ out_qlock:
438 ocfs2_qinfo_unlock(info, 1); 489 ocfs2_qinfo_unlock(info, 1);
439 else 490 else
440 ocfs2_qinfo_unlock(info, 0); 491 ocfs2_qinfo_unlock(info, 0);
492out_trans:
493 if (handle)
494 ocfs2_commit_trans(osb, handle);
441out: 495out:
442 if (err < 0) 496 if (err < 0)
443 mlog_errno(err); 497 mlog_errno(err);
@@ -607,7 +661,7 @@ static void qsync_work_fn(struct work_struct *work)
607 661
608 dquot_scan_active(sb, ocfs2_sync_dquot_helper, oinfo->dqi_type); 662 dquot_scan_active(sb, ocfs2_sync_dquot_helper, oinfo->dqi_type);
609 queue_delayed_work(ocfs2_quota_wq, &oinfo->dqi_sync_work, 663 queue_delayed_work(ocfs2_quota_wq, &oinfo->dqi_sync_work,
610 oinfo->dqi_syncjiff); 664 msecs_to_jiffies(oinfo->dqi_syncms));
611} 665}
612 666
613/* 667/*
@@ -635,20 +689,18 @@ out:
635 return status; 689 return status;
636} 690}
637 691
638int ocfs2_calc_qdel_credits(struct super_block *sb, int type) 692static int ocfs2_calc_qdel_credits(struct super_block *sb, int type)
639{ 693{
640 struct ocfs2_mem_dqinfo *oinfo; 694 struct ocfs2_mem_dqinfo *oinfo = sb_dqinfo(sb, type)->dqi_priv;
641 int features[MAXQUOTAS] = { OCFS2_FEATURE_RO_COMPAT_USRQUOTA, 695 /*
642 OCFS2_FEATURE_RO_COMPAT_GRPQUOTA }; 696 * We modify tree, leaf block, global info, local chunk header,
643 697 * global and local inode; OCFS2_QINFO_WRITE_CREDITS already
644 if (!OCFS2_HAS_RO_COMPAT_FEATURE(sb, features[type])) 698 * accounts for inode update
645 return 0; 699 */
646 700 return (oinfo->dqi_gi.dqi_qtree_depth + 2) *
647 oinfo = sb_dqinfo(sb, type)->dqi_priv; 701 OCFS2_QUOTA_BLOCK_UPDATE_CREDITS +
648 /* We modify tree, leaf block, global info, local chunk header, 702 OCFS2_QINFO_WRITE_CREDITS +
649 * global and local inode */ 703 OCFS2_INODE_UPDATE_CREDITS;
650 return oinfo->dqi_gi.dqi_qtree_depth + 2 + 1 +
651 2 * OCFS2_INODE_UPDATE_CREDITS;
652} 704}
653 705
654static int ocfs2_release_dquot(struct dquot *dquot) 706static int ocfs2_release_dquot(struct dquot *dquot)
@@ -680,33 +732,10 @@ out:
680 return status; 732 return status;
681} 733}
682 734
683int ocfs2_calc_qinit_credits(struct super_block *sb, int type)
684{
685 struct ocfs2_mem_dqinfo *oinfo;
686 int features[MAXQUOTAS] = { OCFS2_FEATURE_RO_COMPAT_USRQUOTA,
687 OCFS2_FEATURE_RO_COMPAT_GRPQUOTA };
688 struct ocfs2_dinode *lfe, *gfe;
689
690 if (!OCFS2_HAS_RO_COMPAT_FEATURE(sb, features[type]))
691 return 0;
692
693 oinfo = sb_dqinfo(sb, type)->dqi_priv;
694 gfe = (struct ocfs2_dinode *)oinfo->dqi_gqi_bh->b_data;
695 lfe = (struct ocfs2_dinode *)oinfo->dqi_lqi_bh->b_data;
696 /* We can extend local file + global file. In local file we
697 * can modify info, chunk header block and dquot block. In
698 * global file we can modify info, tree and leaf block */
699 return ocfs2_calc_extend_credits(sb, &lfe->id2.i_list, 0) +
700 ocfs2_calc_extend_credits(sb, &gfe->id2.i_list, 0) +
701 3 + oinfo->dqi_gi.dqi_qtree_depth + 2;
702}
703
704static int ocfs2_acquire_dquot(struct dquot *dquot) 735static int ocfs2_acquire_dquot(struct dquot *dquot)
705{ 736{
706 handle_t *handle;
707 struct ocfs2_mem_dqinfo *oinfo = 737 struct ocfs2_mem_dqinfo *oinfo =
708 sb_dqinfo(dquot->dq_sb, dquot->dq_type)->dqi_priv; 738 sb_dqinfo(dquot->dq_sb, dquot->dq_type)->dqi_priv;
709 struct ocfs2_super *osb = OCFS2_SB(dquot->dq_sb);
710 int status = 0; 739 int status = 0;
711 740
712 mlog_entry("id=%u, type=%d", dquot->dq_id, dquot->dq_type); 741 mlog_entry("id=%u, type=%d", dquot->dq_id, dquot->dq_type);
@@ -715,16 +744,7 @@ static int ocfs2_acquire_dquot(struct dquot *dquot)
715 status = ocfs2_lock_global_qf(oinfo, 1); 744 status = ocfs2_lock_global_qf(oinfo, 1);
716 if (status < 0) 745 if (status < 0)
717 goto out; 746 goto out;
718 handle = ocfs2_start_trans(osb,
719 ocfs2_calc_qinit_credits(dquot->dq_sb, dquot->dq_type));
720 if (IS_ERR(handle)) {
721 status = PTR_ERR(handle);
722 mlog_errno(status);
723 goto out_ilock;
724 }
725 status = dquot_acquire(dquot); 747 status = dquot_acquire(dquot);
726 ocfs2_commit_trans(osb, handle);
727out_ilock:
728 ocfs2_unlock_global_qf(oinfo, 1); 748 ocfs2_unlock_global_qf(oinfo, 1);
729out: 749out:
730 mlog_exit(status); 750 mlog_exit(status);
diff --git a/fs/ocfs2/quota_local.c b/fs/ocfs2/quota_local.c
index 5a460fa82553..bdb09cb6e1fe 100644
--- a/fs/ocfs2/quota_local.c
+++ b/fs/ocfs2/quota_local.c
@@ -20,6 +20,7 @@
20#include "sysfile.h" 20#include "sysfile.h"
21#include "dlmglue.h" 21#include "dlmglue.h"
22#include "quota.h" 22#include "quota.h"
23#include "uptodate.h"
23 24
24/* Number of local quota structures per block */ 25/* Number of local quota structures per block */
25static inline unsigned int ol_quota_entries_per_block(struct super_block *sb) 26static inline unsigned int ol_quota_entries_per_block(struct super_block *sb)
@@ -100,7 +101,8 @@ static int ocfs2_modify_bh(struct inode *inode, struct buffer_head *bh,
100 handle_t *handle; 101 handle_t *handle;
101 int status; 102 int status;
102 103
103 handle = ocfs2_start_trans(OCFS2_SB(sb), 1); 104 handle = ocfs2_start_trans(OCFS2_SB(sb),
105 OCFS2_QUOTA_BLOCK_UPDATE_CREDITS);
104 if (IS_ERR(handle)) { 106 if (IS_ERR(handle)) {
105 status = PTR_ERR(handle); 107 status = PTR_ERR(handle);
106 mlog_errno(status); 108 mlog_errno(status);
@@ -610,7 +612,8 @@ int ocfs2_finish_quota_recovery(struct ocfs2_super *osb,
610 goto out_bh; 612 goto out_bh;
611 /* Mark quota file as clean if we are recovering quota file of 613 /* Mark quota file as clean if we are recovering quota file of
612 * some other node. */ 614 * some other node. */
613 handle = ocfs2_start_trans(osb, 1); 615 handle = ocfs2_start_trans(osb,
616 OCFS2_LOCAL_QINFO_WRITE_CREDITS);
614 if (IS_ERR(handle)) { 617 if (IS_ERR(handle)) {
615 status = PTR_ERR(handle); 618 status = PTR_ERR(handle);
616 mlog_errno(status); 619 mlog_errno(status);
@@ -940,7 +943,7 @@ static struct ocfs2_quota_chunk *ocfs2_local_quota_add_chunk(
940 struct ocfs2_local_disk_chunk *dchunk; 943 struct ocfs2_local_disk_chunk *dchunk;
941 int status; 944 int status;
942 handle_t *handle; 945 handle_t *handle;
943 struct buffer_head *bh = NULL; 946 struct buffer_head *bh = NULL, *dbh = NULL;
944 u64 p_blkno; 947 u64 p_blkno;
945 948
946 /* We are protected by dqio_sem so no locking needed */ 949 /* We are protected by dqio_sem so no locking needed */
@@ -964,32 +967,35 @@ static struct ocfs2_quota_chunk *ocfs2_local_quota_add_chunk(
964 mlog_errno(status); 967 mlog_errno(status);
965 goto out; 968 goto out;
966 } 969 }
970 /* Local quota info and two new blocks we initialize */
971 handle = ocfs2_start_trans(OCFS2_SB(sb),
972 OCFS2_LOCAL_QINFO_WRITE_CREDITS +
973 2 * OCFS2_QUOTA_BLOCK_UPDATE_CREDITS);
974 if (IS_ERR(handle)) {
975 status = PTR_ERR(handle);
976 mlog_errno(status);
977 goto out;
978 }
967 979
980 /* Initialize chunk header */
968 down_read(&OCFS2_I(lqinode)->ip_alloc_sem); 981 down_read(&OCFS2_I(lqinode)->ip_alloc_sem);
969 status = ocfs2_extent_map_get_blocks(lqinode, oinfo->dqi_blocks, 982 status = ocfs2_extent_map_get_blocks(lqinode, oinfo->dqi_blocks,
970 &p_blkno, NULL, NULL); 983 &p_blkno, NULL, NULL);
971 up_read(&OCFS2_I(lqinode)->ip_alloc_sem); 984 up_read(&OCFS2_I(lqinode)->ip_alloc_sem);
972 if (status < 0) { 985 if (status < 0) {
973 mlog_errno(status); 986 mlog_errno(status);
974 goto out; 987 goto out_trans;
975 } 988 }
976 bh = sb_getblk(sb, p_blkno); 989 bh = sb_getblk(sb, p_blkno);
977 if (!bh) { 990 if (!bh) {
978 status = -ENOMEM; 991 status = -ENOMEM;
979 mlog_errno(status); 992 mlog_errno(status);
980 goto out; 993 goto out_trans;
981 } 994 }
982 dchunk = (struct ocfs2_local_disk_chunk *)bh->b_data; 995 dchunk = (struct ocfs2_local_disk_chunk *)bh->b_data;
983 996 ocfs2_set_new_buffer_uptodate(lqinode, bh);
984 handle = ocfs2_start_trans(OCFS2_SB(sb), 2);
985 if (IS_ERR(handle)) {
986 status = PTR_ERR(handle);
987 mlog_errno(status);
988 goto out;
989 }
990
991 status = ocfs2_journal_access_dq(handle, lqinode, bh, 997 status = ocfs2_journal_access_dq(handle, lqinode, bh,
992 OCFS2_JOURNAL_ACCESS_WRITE); 998 OCFS2_JOURNAL_ACCESS_CREATE);
993 if (status < 0) { 999 if (status < 0) {
994 mlog_errno(status); 1000 mlog_errno(status);
995 goto out_trans; 1001 goto out_trans;
@@ -999,7 +1005,6 @@ static struct ocfs2_quota_chunk *ocfs2_local_quota_add_chunk(
999 memset(dchunk->dqc_bitmap, 0, 1005 memset(dchunk->dqc_bitmap, 0,
1000 sb->s_blocksize - sizeof(struct ocfs2_local_disk_chunk) - 1006 sb->s_blocksize - sizeof(struct ocfs2_local_disk_chunk) -
1001 OCFS2_QBLK_RESERVED_SPACE); 1007 OCFS2_QBLK_RESERVED_SPACE);
1002 set_buffer_uptodate(bh);
1003 unlock_buffer(bh); 1008 unlock_buffer(bh);
1004 status = ocfs2_journal_dirty(handle, bh); 1009 status = ocfs2_journal_dirty(handle, bh);
1005 if (status < 0) { 1010 if (status < 0) {
@@ -1007,6 +1012,38 @@ static struct ocfs2_quota_chunk *ocfs2_local_quota_add_chunk(
1007 goto out_trans; 1012 goto out_trans;
1008 } 1013 }
1009 1014
1015 /* Initialize new block with structures */
1016 down_read(&OCFS2_I(lqinode)->ip_alloc_sem);
1017 status = ocfs2_extent_map_get_blocks(lqinode, oinfo->dqi_blocks + 1,
1018 &p_blkno, NULL, NULL);
1019 up_read(&OCFS2_I(lqinode)->ip_alloc_sem);
1020 if (status < 0) {
1021 mlog_errno(status);
1022 goto out_trans;
1023 }
1024 dbh = sb_getblk(sb, p_blkno);
1025 if (!dbh) {
1026 status = -ENOMEM;
1027 mlog_errno(status);
1028 goto out_trans;
1029 }
1030 ocfs2_set_new_buffer_uptodate(lqinode, dbh);
1031 status = ocfs2_journal_access_dq(handle, lqinode, dbh,
1032 OCFS2_JOURNAL_ACCESS_CREATE);
1033 if (status < 0) {
1034 mlog_errno(status);
1035 goto out_trans;
1036 }
1037 lock_buffer(dbh);
1038 memset(dbh->b_data, 0, sb->s_blocksize - OCFS2_QBLK_RESERVED_SPACE);
1039 unlock_buffer(dbh);
1040 status = ocfs2_journal_dirty(handle, dbh);
1041 if (status < 0) {
1042 mlog_errno(status);
1043 goto out_trans;
1044 }
1045
1046 /* Update local quotafile info */
1010 oinfo->dqi_blocks += 2; 1047 oinfo->dqi_blocks += 2;
1011 oinfo->dqi_chunks++; 1048 oinfo->dqi_chunks++;
1012 status = ocfs2_local_write_info(sb, type); 1049 status = ocfs2_local_write_info(sb, type);
@@ -1031,6 +1068,7 @@ out_trans:
1031 ocfs2_commit_trans(OCFS2_SB(sb), handle); 1068 ocfs2_commit_trans(OCFS2_SB(sb), handle);
1032out: 1069out:
1033 brelse(bh); 1070 brelse(bh);
1071 brelse(dbh);
1034 kmem_cache_free(ocfs2_qf_chunk_cachep, chunk); 1072 kmem_cache_free(ocfs2_qf_chunk_cachep, chunk);
1035 return ERR_PTR(status); 1073 return ERR_PTR(status);
1036} 1074}
@@ -1048,6 +1086,8 @@ static struct ocfs2_quota_chunk *ocfs2_extend_local_quota_file(
1048 struct ocfs2_local_disk_chunk *dchunk; 1086 struct ocfs2_local_disk_chunk *dchunk;
1049 int epb = ol_quota_entries_per_block(sb); 1087 int epb = ol_quota_entries_per_block(sb);
1050 unsigned int chunk_blocks; 1088 unsigned int chunk_blocks;
1089 struct buffer_head *bh;
1090 u64 p_blkno;
1051 int status; 1091 int status;
1052 handle_t *handle; 1092 handle_t *handle;
1053 1093
@@ -1075,12 +1115,49 @@ static struct ocfs2_quota_chunk *ocfs2_extend_local_quota_file(
1075 mlog_errno(status); 1115 mlog_errno(status);
1076 goto out; 1116 goto out;
1077 } 1117 }
1078 handle = ocfs2_start_trans(OCFS2_SB(sb), 2); 1118
1119 /* Get buffer from the just added block */
1120 down_read(&OCFS2_I(lqinode)->ip_alloc_sem);
1121 status = ocfs2_extent_map_get_blocks(lqinode, oinfo->dqi_blocks,
1122 &p_blkno, NULL, NULL);
1123 up_read(&OCFS2_I(lqinode)->ip_alloc_sem);
1124 if (status < 0) {
1125 mlog_errno(status);
1126 goto out;
1127 }
1128 bh = sb_getblk(sb, p_blkno);
1129 if (!bh) {
1130 status = -ENOMEM;
1131 mlog_errno(status);
1132 goto out;
1133 }
1134 ocfs2_set_new_buffer_uptodate(lqinode, bh);
1135
1136 /* Local quota info, chunk header and the new block we initialize */
1137 handle = ocfs2_start_trans(OCFS2_SB(sb),
1138 OCFS2_LOCAL_QINFO_WRITE_CREDITS +
1139 2 * OCFS2_QUOTA_BLOCK_UPDATE_CREDITS);
1079 if (IS_ERR(handle)) { 1140 if (IS_ERR(handle)) {
1080 status = PTR_ERR(handle); 1141 status = PTR_ERR(handle);
1081 mlog_errno(status); 1142 mlog_errno(status);
1082 goto out; 1143 goto out;
1083 } 1144 }
1145 /* Zero created block */
1146 status = ocfs2_journal_access_dq(handle, lqinode, bh,
1147 OCFS2_JOURNAL_ACCESS_CREATE);
1148 if (status < 0) {
1149 mlog_errno(status);
1150 goto out_trans;
1151 }
1152 lock_buffer(bh);
1153 memset(bh->b_data, 0, sb->s_blocksize);
1154 unlock_buffer(bh);
1155 status = ocfs2_journal_dirty(handle, bh);
1156 if (status < 0) {
1157 mlog_errno(status);
1158 goto out_trans;
1159 }
1160 /* Update chunk header */
1084 status = ocfs2_journal_access_dq(handle, lqinode, chunk->qc_headerbh, 1161 status = ocfs2_journal_access_dq(handle, lqinode, chunk->qc_headerbh,
1085 OCFS2_JOURNAL_ACCESS_WRITE); 1162 OCFS2_JOURNAL_ACCESS_WRITE);
1086 if (status < 0) { 1163 if (status < 0) {
@@ -1097,6 +1174,7 @@ static struct ocfs2_quota_chunk *ocfs2_extend_local_quota_file(
1097 mlog_errno(status); 1174 mlog_errno(status);
1098 goto out_trans; 1175 goto out_trans;
1099 } 1176 }
1177 /* Update file header */
1100 oinfo->dqi_blocks++; 1178 oinfo->dqi_blocks++;
1101 status = ocfs2_local_write_info(sb, type); 1179 status = ocfs2_local_write_info(sb, type);
1102 if (status < 0) { 1180 if (status < 0) {
diff --git a/fs/ocfs2/stack_o2cb.c b/fs/ocfs2/stack_o2cb.c
index 3f661376a2de..e49c41050264 100644
--- a/fs/ocfs2/stack_o2cb.c
+++ b/fs/ocfs2/stack_o2cb.c
@@ -17,6 +17,7 @@
17 * General Public License for more details. 17 * General Public License for more details.
18 */ 18 */
19 19
20#include <linux/kernel.h>
20#include <linux/crc32.h> 21#include <linux/crc32.h>
21#include <linux/module.h> 22#include <linux/module.h>
22 23
@@ -153,7 +154,7 @@ static int status_map[] = {
153 154
154static int dlm_status_to_errno(enum dlm_status status) 155static int dlm_status_to_errno(enum dlm_status status)
155{ 156{
156 BUG_ON(status > (sizeof(status_map) / sizeof(status_map[0]))); 157 BUG_ON(status < 0 || status >= ARRAY_SIZE(status_map));
157 158
158 return status_map[status]; 159 return status_map[status];
159} 160}
diff --git a/fs/ocfs2/super.c b/fs/ocfs2/super.c
index 7efb349fb9bd..a3f8871d21fd 100644
--- a/fs/ocfs2/super.c
+++ b/fs/ocfs2/super.c
@@ -777,6 +777,7 @@ static int ocfs2_sb_probe(struct super_block *sb,
777 } 777 }
778 di = (struct ocfs2_dinode *) (*bh)->b_data; 778 di = (struct ocfs2_dinode *) (*bh)->b_data;
779 memset(stats, 0, sizeof(struct ocfs2_blockcheck_stats)); 779 memset(stats, 0, sizeof(struct ocfs2_blockcheck_stats));
780 spin_lock_init(&stats->b_lock);
780 status = ocfs2_verify_volume(di, *bh, blksize, stats); 781 status = ocfs2_verify_volume(di, *bh, blksize, stats);
781 if (status >= 0) 782 if (status >= 0)
782 goto bail; 783 goto bail;
@@ -1182,7 +1183,7 @@ static int ocfs2_fill_super(struct super_block *sb, void *data, int silent)
1182 wake_up(&osb->osb_mount_event); 1183 wake_up(&osb->osb_mount_event);
1183 1184
1184 /* Start this when the mount is almost sure of being successful */ 1185 /* Start this when the mount is almost sure of being successful */
1185 ocfs2_orphan_scan_init(osb); 1186 ocfs2_orphan_scan_start(osb);
1186 1187
1187 mlog_exit(status); 1188 mlog_exit(status);
1188 return status; 1189 return status;
@@ -1213,14 +1214,31 @@ static int ocfs2_get_sb(struct file_system_type *fs_type,
1213 mnt); 1214 mnt);
1214} 1215}
1215 1216
1217static void ocfs2_kill_sb(struct super_block *sb)
1218{
1219 struct ocfs2_super *osb = OCFS2_SB(sb);
1220
1221 /* Failed mount? */
1222 if (!osb || atomic_read(&osb->vol_state) == VOLUME_DISABLED)
1223 goto out;
1224
1225 /* Prevent further queueing of inode drop events */
1226 spin_lock(&dentry_list_lock);
1227 ocfs2_set_osb_flag(osb, OCFS2_OSB_DROP_DENTRY_LOCK_IMMED);
1228 spin_unlock(&dentry_list_lock);
1229 /* Wait for work to finish and/or remove it */
1230 cancel_work_sync(&osb->dentry_lock_work);
1231out:
1232 kill_block_super(sb);
1233}
1234
1216static struct file_system_type ocfs2_fs_type = { 1235static struct file_system_type ocfs2_fs_type = {
1217 .owner = THIS_MODULE, 1236 .owner = THIS_MODULE,
1218 .name = "ocfs2", 1237 .name = "ocfs2",
1219 .get_sb = ocfs2_get_sb, /* is this called when we mount 1238 .get_sb = ocfs2_get_sb, /* is this called when we mount
1220 * the fs? */ 1239 * the fs? */
1221 .kill_sb = kill_block_super, /* set to the generic one 1240 .kill_sb = ocfs2_kill_sb,
1222 * right now, but do we 1241
1223 * need to change that? */
1224 .fs_flags = FS_REQUIRES_DEV|FS_RENAME_DOES_D_MOVE, 1242 .fs_flags = FS_REQUIRES_DEV|FS_RENAME_DOES_D_MOVE,
1225 .next = NULL 1243 .next = NULL
1226}; 1244};
@@ -1819,6 +1837,12 @@ static void ocfs2_dismount_volume(struct super_block *sb, int mnt_err)
1819 1837
1820 debugfs_remove(osb->osb_ctxt); 1838 debugfs_remove(osb->osb_ctxt);
1821 1839
1840 /*
1841 * Flush inode dropping work queue so that deletes are
1842 * performed while the filesystem is still working
1843 */
1844 ocfs2_drop_all_dl_inodes(osb);
1845
1822 /* Orphan scan should be stopped as early as possible */ 1846 /* Orphan scan should be stopped as early as possible */
1823 ocfs2_orphan_scan_stop(osb); 1847 ocfs2_orphan_scan_stop(osb);
1824 1848
@@ -1981,6 +2005,8 @@ static int ocfs2_initialize_super(struct super_block *sb,
1981 snprintf(osb->dev_str, sizeof(osb->dev_str), "%u,%u", 2005 snprintf(osb->dev_str, sizeof(osb->dev_str), "%u,%u",
1982 MAJOR(osb->sb->s_dev), MINOR(osb->sb->s_dev)); 2006 MAJOR(osb->sb->s_dev), MINOR(osb->sb->s_dev));
1983 2007
2008 ocfs2_orphan_scan_init(osb);
2009
1984 status = ocfs2_recovery_init(osb); 2010 status = ocfs2_recovery_init(osb);
1985 if (status) { 2011 if (status) {
1986 mlog(ML_ERROR, "Unable to initialize recovery state\n"); 2012 mlog(ML_ERROR, "Unable to initialize recovery state\n");
diff --git a/fs/ocfs2/xattr.c b/fs/ocfs2/xattr.c
index ba320e250747..d1a27cda984f 100644
--- a/fs/ocfs2/xattr.c
+++ b/fs/ocfs2/xattr.c
@@ -1052,7 +1052,8 @@ static int ocfs2_xattr_block_get(struct inode *inode,
1052 struct ocfs2_xattr_block *xb; 1052 struct ocfs2_xattr_block *xb;
1053 struct ocfs2_xattr_value_root *xv; 1053 struct ocfs2_xattr_value_root *xv;
1054 size_t size; 1054 size_t size;
1055 int ret = -ENODATA, name_offset, name_len, block_off, i; 1055 int ret = -ENODATA, name_offset, name_len, i;
1056 int uninitialized_var(block_off);
1056 1057
1057 xs->bucket = ocfs2_xattr_bucket_new(inode); 1058 xs->bucket = ocfs2_xattr_bucket_new(inode);
1058 if (!xs->bucket) { 1059 if (!xs->bucket) {
diff --git a/fs/proc/base.c b/fs/proc/base.c
index 175db258942f..6f742f6658a9 100644
--- a/fs/proc/base.c
+++ b/fs/proc/base.c
@@ -1003,12 +1003,7 @@ static ssize_t oom_adjust_read(struct file *file, char __user *buf,
1003 1003
1004 if (!task) 1004 if (!task)
1005 return -ESRCH; 1005 return -ESRCH;
1006 task_lock(task); 1006 oom_adjust = task->oomkilladj;
1007 if (task->mm)
1008 oom_adjust = task->mm->oom_adj;
1009 else
1010 oom_adjust = OOM_DISABLE;
1011 task_unlock(task);
1012 put_task_struct(task); 1007 put_task_struct(task);
1013 1008
1014 len = snprintf(buffer, sizeof(buffer), "%i\n", oom_adjust); 1009 len = snprintf(buffer, sizeof(buffer), "%i\n", oom_adjust);
@@ -1037,19 +1032,11 @@ static ssize_t oom_adjust_write(struct file *file, const char __user *buf,
1037 task = get_proc_task(file->f_path.dentry->d_inode); 1032 task = get_proc_task(file->f_path.dentry->d_inode);
1038 if (!task) 1033 if (!task)
1039 return -ESRCH; 1034 return -ESRCH;
1040 task_lock(task); 1035 if (oom_adjust < task->oomkilladj && !capable(CAP_SYS_RESOURCE)) {
1041 if (!task->mm) {
1042 task_unlock(task);
1043 put_task_struct(task);
1044 return -EINVAL;
1045 }
1046 if (oom_adjust < task->mm->oom_adj && !capable(CAP_SYS_RESOURCE)) {
1047 task_unlock(task);
1048 put_task_struct(task); 1036 put_task_struct(task);
1049 return -EACCES; 1037 return -EACCES;
1050 } 1038 }
1051 task->mm->oom_adj = oom_adjust; 1039 task->oomkilladj = oom_adjust;
1052 task_unlock(task);
1053 put_task_struct(task); 1040 put_task_struct(task);
1054 if (end - buffer == 0) 1041 if (end - buffer == 0)
1055 return -EIO; 1042 return -EIO;
diff --git a/fs/select.c b/fs/select.c
index d870237e42c7..8084834e123e 100644
--- a/fs/select.c
+++ b/fs/select.c
@@ -110,6 +110,7 @@ void poll_initwait(struct poll_wqueues *pwq)
110{ 110{
111 init_poll_funcptr(&pwq->pt, __pollwait); 111 init_poll_funcptr(&pwq->pt, __pollwait);
112 pwq->polling_task = current; 112 pwq->polling_task = current;
113 pwq->triggered = 0;
113 pwq->error = 0; 114 pwq->error = 0;
114 pwq->table = NULL; 115 pwq->table = NULL;
115 pwq->inline_index = 0; 116 pwq->inline_index = 0;
diff --git a/fs/xfs/linux-2.6/xfs_buf.c b/fs/xfs/linux-2.6/xfs_buf.c
index 0c93c7ef3d18..965df1227d64 100644
--- a/fs/xfs/linux-2.6/xfs_buf.c
+++ b/fs/xfs/linux-2.6/xfs_buf.c
@@ -770,7 +770,7 @@ xfs_buf_associate_memory(
770 bp->b_pages = NULL; 770 bp->b_pages = NULL;
771 bp->b_addr = mem; 771 bp->b_addr = mem;
772 772
773 rval = _xfs_buf_get_pages(bp, page_count, 0); 773 rval = _xfs_buf_get_pages(bp, page_count, XBF_DONT_BLOCK);
774 if (rval) 774 if (rval)
775 return rval; 775 return rval;
776 776
diff --git a/fs/xfs/linux-2.6/xfs_ioctl32.c b/fs/xfs/linux-2.6/xfs_ioctl32.c
index 0882d166239a..eafcc7c18706 100644
--- a/fs/xfs/linux-2.6/xfs_ioctl32.c
+++ b/fs/xfs/linux-2.6/xfs_ioctl32.c
@@ -619,7 +619,7 @@ xfs_file_compat_ioctl(
619 case XFS_IOC_GETVERSION_32: 619 case XFS_IOC_GETVERSION_32:
620 cmd = _NATIVE_IOC(cmd, long); 620 cmd = _NATIVE_IOC(cmd, long);
621 return xfs_file_ioctl(filp, cmd, p); 621 return xfs_file_ioctl(filp, cmd, p);
622 case XFS_IOC_SWAPEXT: { 622 case XFS_IOC_SWAPEXT_32: {
623 struct xfs_swapext sxp; 623 struct xfs_swapext sxp;
624 struct compat_xfs_swapext __user *sxu = arg; 624 struct compat_xfs_swapext __user *sxu = arg;
625 625
diff --git a/fs/xfs/linux-2.6/xfs_sync.c b/fs/xfs/linux-2.6/xfs_sync.c
index b619d6b8ca43..98ef624d9baf 100644
--- a/fs/xfs/linux-2.6/xfs_sync.c
+++ b/fs/xfs/linux-2.6/xfs_sync.c
@@ -708,6 +708,16 @@ xfs_reclaim_inode(
708 return 0; 708 return 0;
709} 709}
710 710
711void
712__xfs_inode_set_reclaim_tag(
713 struct xfs_perag *pag,
714 struct xfs_inode *ip)
715{
716 radix_tree_tag_set(&pag->pag_ici_root,
717 XFS_INO_TO_AGINO(ip->i_mount, ip->i_ino),
718 XFS_ICI_RECLAIM_TAG);
719}
720
711/* 721/*
712 * We set the inode flag atomically with the radix tree tag. 722 * We set the inode flag atomically with the radix tree tag.
713 * Once we get tag lookups on the radix tree, this inode flag 723 * Once we get tag lookups on the radix tree, this inode flag
@@ -722,8 +732,7 @@ xfs_inode_set_reclaim_tag(
722 732
723 read_lock(&pag->pag_ici_lock); 733 read_lock(&pag->pag_ici_lock);
724 spin_lock(&ip->i_flags_lock); 734 spin_lock(&ip->i_flags_lock);
725 radix_tree_tag_set(&pag->pag_ici_root, 735 __xfs_inode_set_reclaim_tag(pag, ip);
726 XFS_INO_TO_AGINO(mp, ip->i_ino), XFS_ICI_RECLAIM_TAG);
727 __xfs_iflags_set(ip, XFS_IRECLAIMABLE); 736 __xfs_iflags_set(ip, XFS_IRECLAIMABLE);
728 spin_unlock(&ip->i_flags_lock); 737 spin_unlock(&ip->i_flags_lock);
729 read_unlock(&pag->pag_ici_lock); 738 read_unlock(&pag->pag_ici_lock);
diff --git a/fs/xfs/linux-2.6/xfs_sync.h b/fs/xfs/linux-2.6/xfs_sync.h
index 2a10301c99c7..59120602588a 100644
--- a/fs/xfs/linux-2.6/xfs_sync.h
+++ b/fs/xfs/linux-2.6/xfs_sync.h
@@ -48,6 +48,7 @@ int xfs_reclaim_inode(struct xfs_inode *ip, int locked, int sync_mode);
48int xfs_reclaim_inodes(struct xfs_mount *mp, int mode); 48int xfs_reclaim_inodes(struct xfs_mount *mp, int mode);
49 49
50void xfs_inode_set_reclaim_tag(struct xfs_inode *ip); 50void xfs_inode_set_reclaim_tag(struct xfs_inode *ip);
51void __xfs_inode_set_reclaim_tag(struct xfs_perag *pag, struct xfs_inode *ip);
51void xfs_inode_clear_reclaim_tag(struct xfs_inode *ip); 52void xfs_inode_clear_reclaim_tag(struct xfs_inode *ip);
52void __xfs_inode_clear_reclaim_tag(struct xfs_mount *mp, struct xfs_perag *pag, 53void __xfs_inode_clear_reclaim_tag(struct xfs_mount *mp, struct xfs_perag *pag,
53 struct xfs_inode *ip); 54 struct xfs_inode *ip);
diff --git a/fs/xfs/xfs_attr.c b/fs/xfs/xfs_attr.c
index db15feb906ff..4ece1906bd41 100644
--- a/fs/xfs/xfs_attr.c
+++ b/fs/xfs/xfs_attr.c
@@ -2010,7 +2010,9 @@ xfs_attr_rmtval_get(xfs_da_args_t *args)
2010 dblkno = XFS_FSB_TO_DADDR(mp, map[i].br_startblock); 2010 dblkno = XFS_FSB_TO_DADDR(mp, map[i].br_startblock);
2011 blkcnt = XFS_FSB_TO_BB(mp, map[i].br_blockcount); 2011 blkcnt = XFS_FSB_TO_BB(mp, map[i].br_blockcount);
2012 error = xfs_read_buf(mp, mp->m_ddev_targp, dblkno, 2012 error = xfs_read_buf(mp, mp->m_ddev_targp, dblkno,
2013 blkcnt, XFS_BUF_LOCK, &bp); 2013 blkcnt,
2014 XFS_BUF_LOCK | XBF_DONT_BLOCK,
2015 &bp);
2014 if (error) 2016 if (error)
2015 return(error); 2017 return(error);
2016 2018
@@ -2141,8 +2143,8 @@ xfs_attr_rmtval_set(xfs_da_args_t *args)
2141 dblkno = XFS_FSB_TO_DADDR(mp, map.br_startblock), 2143 dblkno = XFS_FSB_TO_DADDR(mp, map.br_startblock),
2142 blkcnt = XFS_FSB_TO_BB(mp, map.br_blockcount); 2144 blkcnt = XFS_FSB_TO_BB(mp, map.br_blockcount);
2143 2145
2144 bp = xfs_buf_get_flags(mp->m_ddev_targp, dblkno, 2146 bp = xfs_buf_get_flags(mp->m_ddev_targp, dblkno, blkcnt,
2145 blkcnt, XFS_BUF_LOCK); 2147 XFS_BUF_LOCK | XBF_DONT_BLOCK);
2146 ASSERT(bp); 2148 ASSERT(bp);
2147 ASSERT(!XFS_BUF_GETERROR(bp)); 2149 ASSERT(!XFS_BUF_GETERROR(bp));
2148 2150
diff --git a/fs/xfs/xfs_bmap.c b/fs/xfs/xfs_bmap.c
index 7928b9983c1d..8ee5b5a76a2a 100644
--- a/fs/xfs/xfs_bmap.c
+++ b/fs/xfs/xfs_bmap.c
@@ -6009,7 +6009,7 @@ xfs_getbmap(
6009 */ 6009 */
6010 error = ENOMEM; 6010 error = ENOMEM;
6011 subnex = 16; 6011 subnex = 16;
6012 map = kmem_alloc(subnex * sizeof(*map), KM_MAYFAIL); 6012 map = kmem_alloc(subnex * sizeof(*map), KM_MAYFAIL | KM_NOFS);
6013 if (!map) 6013 if (!map)
6014 goto out_unlock_ilock; 6014 goto out_unlock_ilock;
6015 6015
diff --git a/fs/xfs/xfs_btree.c b/fs/xfs/xfs_btree.c
index e9df99574829..26717388acf5 100644
--- a/fs/xfs/xfs_btree.c
+++ b/fs/xfs/xfs_btree.c
@@ -120,8 +120,8 @@ xfs_btree_check_sblock(
120 XFS_RANDOM_BTREE_CHECK_SBLOCK))) { 120 XFS_RANDOM_BTREE_CHECK_SBLOCK))) {
121 if (bp) 121 if (bp)
122 xfs_buftrace("SBTREE ERROR", bp); 122 xfs_buftrace("SBTREE ERROR", bp);
123 XFS_ERROR_REPORT("xfs_btree_check_sblock", XFS_ERRLEVEL_LOW, 123 XFS_CORRUPTION_ERROR("xfs_btree_check_sblock",
124 cur->bc_mp); 124 XFS_ERRLEVEL_LOW, cur->bc_mp, block);
125 return XFS_ERROR(EFSCORRUPTED); 125 return XFS_ERROR(EFSCORRUPTED);
126 } 126 }
127 return 0; 127 return 0;
diff --git a/fs/xfs/xfs_da_btree.c b/fs/xfs/xfs_da_btree.c
index 9ff6e57a5075..2847bbc1c534 100644
--- a/fs/xfs/xfs_da_btree.c
+++ b/fs/xfs/xfs_da_btree.c
@@ -2201,7 +2201,7 @@ kmem_zone_t *xfs_dabuf_zone; /* dabuf zone */
2201xfs_da_state_t * 2201xfs_da_state_t *
2202xfs_da_state_alloc(void) 2202xfs_da_state_alloc(void)
2203{ 2203{
2204 return kmem_zone_zalloc(xfs_da_state_zone, KM_SLEEP); 2204 return kmem_zone_zalloc(xfs_da_state_zone, KM_NOFS);
2205} 2205}
2206 2206
2207/* 2207/*
@@ -2261,9 +2261,9 @@ xfs_da_buf_make(int nbuf, xfs_buf_t **bps, inst_t *ra)
2261 int off; 2261 int off;
2262 2262
2263 if (nbuf == 1) 2263 if (nbuf == 1)
2264 dabuf = kmem_zone_alloc(xfs_dabuf_zone, KM_SLEEP); 2264 dabuf = kmem_zone_alloc(xfs_dabuf_zone, KM_NOFS);
2265 else 2265 else
2266 dabuf = kmem_alloc(XFS_DA_BUF_SIZE(nbuf), KM_SLEEP); 2266 dabuf = kmem_alloc(XFS_DA_BUF_SIZE(nbuf), KM_NOFS);
2267 dabuf->dirty = 0; 2267 dabuf->dirty = 0;
2268#ifdef XFS_DABUF_DEBUG 2268#ifdef XFS_DABUF_DEBUG
2269 dabuf->ra = ra; 2269 dabuf->ra = ra;
diff --git a/fs/xfs/xfs_dir2.c b/fs/xfs/xfs_dir2.c
index c657bec6d951..bb1d58eb3982 100644
--- a/fs/xfs/xfs_dir2.c
+++ b/fs/xfs/xfs_dir2.c
@@ -256,7 +256,7 @@ xfs_dir_cilookup_result(
256 !(args->op_flags & XFS_DA_OP_CILOOKUP)) 256 !(args->op_flags & XFS_DA_OP_CILOOKUP))
257 return EEXIST; 257 return EEXIST;
258 258
259 args->value = kmem_alloc(len, KM_MAYFAIL); 259 args->value = kmem_alloc(len, KM_NOFS | KM_MAYFAIL);
260 if (!args->value) 260 if (!args->value)
261 return ENOMEM; 261 return ENOMEM;
262 262
diff --git a/fs/xfs/xfs_fsops.c b/fs/xfs/xfs_fsops.c
index cbd451bb4848..2d0b3e1da9e6 100644
--- a/fs/xfs/xfs_fsops.c
+++ b/fs/xfs/xfs_fsops.c
@@ -167,17 +167,25 @@ xfs_growfs_data_private(
167 new = nb - mp->m_sb.sb_dblocks; 167 new = nb - mp->m_sb.sb_dblocks;
168 oagcount = mp->m_sb.sb_agcount; 168 oagcount = mp->m_sb.sb_agcount;
169 if (nagcount > oagcount) { 169 if (nagcount > oagcount) {
170 void *new_perag, *old_perag;
171
170 xfs_filestream_flush(mp); 172 xfs_filestream_flush(mp);
173
174 new_perag = kmem_zalloc(sizeof(xfs_perag_t) * nagcount,
175 KM_MAYFAIL);
176 if (!new_perag)
177 return XFS_ERROR(ENOMEM);
178
171 down_write(&mp->m_peraglock); 179 down_write(&mp->m_peraglock);
172 mp->m_perag = kmem_realloc(mp->m_perag, 180 memcpy(new_perag, mp->m_perag, sizeof(xfs_perag_t) * oagcount);
173 sizeof(xfs_perag_t) * nagcount, 181 old_perag = mp->m_perag;
174 sizeof(xfs_perag_t) * oagcount, 182 mp->m_perag = new_perag;
175 KM_SLEEP); 183
176 memset(&mp->m_perag[oagcount], 0,
177 (nagcount - oagcount) * sizeof(xfs_perag_t));
178 mp->m_flags |= XFS_MOUNT_32BITINODES; 184 mp->m_flags |= XFS_MOUNT_32BITINODES;
179 nagimax = xfs_initialize_perag(mp, nagcount); 185 nagimax = xfs_initialize_perag(mp, nagcount);
180 up_write(&mp->m_peraglock); 186 up_write(&mp->m_peraglock);
187
188 kmem_free(old_perag);
181 } 189 }
182 tp = xfs_trans_alloc(mp, XFS_TRANS_GROWFS); 190 tp = xfs_trans_alloc(mp, XFS_TRANS_GROWFS);
183 tp->t_flags |= XFS_TRANS_RESERVE; 191 tp->t_flags |= XFS_TRANS_RESERVE;
diff --git a/fs/xfs/xfs_iget.c b/fs/xfs/xfs_iget.c
index 34ec86923f7e..ecbf8b4d2e2e 100644
--- a/fs/xfs/xfs_iget.c
+++ b/fs/xfs/xfs_iget.c
@@ -191,80 +191,82 @@ xfs_iget_cache_hit(
191 int flags, 191 int flags,
192 int lock_flags) __releases(pag->pag_ici_lock) 192 int lock_flags) __releases(pag->pag_ici_lock)
193{ 193{
194 struct inode *inode = VFS_I(ip);
194 struct xfs_mount *mp = ip->i_mount; 195 struct xfs_mount *mp = ip->i_mount;
195 int error = EAGAIN; 196 int error;
197
198 spin_lock(&ip->i_flags_lock);
196 199
197 /* 200 /*
198 * If INEW is set this inode is being set up 201 * If we are racing with another cache hit that is currently
199 * If IRECLAIM is set this inode is being torn down 202 * instantiating this inode or currently recycling it out of
200 * Pause and try again. 203 * reclaimabe state, wait for the initialisation to complete
204 * before continuing.
205 *
206 * XXX(hch): eventually we should do something equivalent to
207 * wait_on_inode to wait for these flags to be cleared
208 * instead of polling for it.
201 */ 209 */
202 if (xfs_iflags_test(ip, (XFS_INEW|XFS_IRECLAIM))) { 210 if (ip->i_flags & (XFS_INEW|XFS_IRECLAIM)) {
203 XFS_STATS_INC(xs_ig_frecycle); 211 XFS_STATS_INC(xs_ig_frecycle);
212 error = EAGAIN;
204 goto out_error; 213 goto out_error;
205 } 214 }
206 215
207 /* If IRECLAIMABLE is set, we've torn down the vfs inode part */ 216 /*
208 if (xfs_iflags_test(ip, XFS_IRECLAIMABLE)) { 217 * If lookup is racing with unlink return an error immediately.
209 218 */
210 /* 219 if (ip->i_d.di_mode == 0 && !(flags & XFS_IGET_CREATE)) {
211 * If lookup is racing with unlink, then we should return an 220 error = ENOENT;
212 * error immediately so we don't remove it from the reclaim 221 goto out_error;
213 * list and potentially leak the inode. 222 }
214 */
215 if ((ip->i_d.di_mode == 0) && !(flags & XFS_IGET_CREATE)) {
216 error = ENOENT;
217 goto out_error;
218 }
219 223
224 /*
225 * If IRECLAIMABLE is set, we've torn down the VFS inode already.
226 * Need to carefully get it back into useable state.
227 */
228 if (ip->i_flags & XFS_IRECLAIMABLE) {
220 xfs_itrace_exit_tag(ip, "xfs_iget.alloc"); 229 xfs_itrace_exit_tag(ip, "xfs_iget.alloc");
221 230
222 /* 231 /*
223 * We need to re-initialise the VFS inode as it has been 232 * We need to set XFS_INEW atomically with clearing the
224 * 'freed' by the VFS. Do this here so we can deal with 233 * reclaimable tag so that we do have an indicator of the
225 * errors cleanly, then tag it so it can be set up correctly 234 * inode still being initialized.
226 * later.
227 */ 235 */
228 if (inode_init_always(mp->m_super, VFS_I(ip))) { 236 ip->i_flags |= XFS_INEW;
229 error = ENOMEM; 237 ip->i_flags &= ~XFS_IRECLAIMABLE;
230 goto out_error; 238 __xfs_inode_clear_reclaim_tag(mp, pag, ip);
231 }
232 239
233 /* 240 spin_unlock(&ip->i_flags_lock);
234 * We must set the XFS_INEW flag before clearing the 241 read_unlock(&pag->pag_ici_lock);
235 * XFS_IRECLAIMABLE flag so that if a racing lookup does
236 * not find the XFS_IRECLAIMABLE above but has the igrab()
237 * below succeed we can safely check XFS_INEW to detect
238 * that this inode is still being initialised.
239 */
240 xfs_iflags_set(ip, XFS_INEW);
241 xfs_iflags_clear(ip, XFS_IRECLAIMABLE);
242 242
243 /* clear the radix tree reclaim flag as well. */ 243 error = -inode_init_always(mp->m_super, inode);
244 __xfs_inode_clear_reclaim_tag(mp, pag, ip); 244 if (error) {
245 } else if (!igrab(VFS_I(ip))) { 245 /*
246 * Re-initializing the inode failed, and we are in deep
247 * trouble. Try to re-add it to the reclaim list.
248 */
249 read_lock(&pag->pag_ici_lock);
250 spin_lock(&ip->i_flags_lock);
251
252 ip->i_flags &= ~XFS_INEW;
253 ip->i_flags |= XFS_IRECLAIMABLE;
254 __xfs_inode_set_reclaim_tag(pag, ip);
255 goto out_error;
256 }
257 inode->i_state = I_LOCK|I_NEW;
258 } else {
246 /* If the VFS inode is being torn down, pause and try again. */ 259 /* If the VFS inode is being torn down, pause and try again. */
247 XFS_STATS_INC(xs_ig_frecycle); 260 if (!igrab(inode)) {
248 goto out_error; 261 error = EAGAIN;
249 } else if (xfs_iflags_test(ip, XFS_INEW)) { 262 goto out_error;
250 /* 263 }
251 * We are racing with another cache hit that is
252 * currently recycling this inode out of the XFS_IRECLAIMABLE
253 * state. Wait for the initialisation to complete before
254 * continuing.
255 */
256 wait_on_inode(VFS_I(ip));
257 }
258 264
259 if (ip->i_d.di_mode == 0 && !(flags & XFS_IGET_CREATE)) { 265 /* We've got a live one. */
260 error = ENOENT; 266 spin_unlock(&ip->i_flags_lock);
261 iput(VFS_I(ip)); 267 read_unlock(&pag->pag_ici_lock);
262 goto out_error;
263 } 268 }
264 269
265 /* We've got a live one. */
266 read_unlock(&pag->pag_ici_lock);
267
268 if (lock_flags != 0) 270 if (lock_flags != 0)
269 xfs_ilock(ip, lock_flags); 271 xfs_ilock(ip, lock_flags);
270 272
@@ -274,6 +276,7 @@ xfs_iget_cache_hit(
274 return 0; 276 return 0;
275 277
276out_error: 278out_error:
279 spin_unlock(&ip->i_flags_lock);
277 read_unlock(&pag->pag_ici_lock); 280 read_unlock(&pag->pag_ici_lock);
278 return error; 281 return error;
279} 282}
diff --git a/fs/xfs/xfs_inode.c b/fs/xfs/xfs_inode.c
index 1f22d65fed0a..da428b3fe0f5 100644
--- a/fs/xfs/xfs_inode.c
+++ b/fs/xfs/xfs_inode.c
@@ -343,6 +343,16 @@ xfs_iformat(
343 return XFS_ERROR(EFSCORRUPTED); 343 return XFS_ERROR(EFSCORRUPTED);
344 } 344 }
345 345
346 if (unlikely((ip->i_d.di_flags & XFS_DIFLAG_REALTIME) &&
347 !ip->i_mount->m_rtdev_targp)) {
348 xfs_fs_repair_cmn_err(CE_WARN, ip->i_mount,
349 "corrupt dinode %Lu, has realtime flag set.",
350 ip->i_ino);
351 XFS_CORRUPTION_ERROR("xfs_iformat(realtime)",
352 XFS_ERRLEVEL_LOW, ip->i_mount, dip);
353 return XFS_ERROR(EFSCORRUPTED);
354 }
355
346 switch (ip->i_d.di_mode & S_IFMT) { 356 switch (ip->i_d.di_mode & S_IFMT) {
347 case S_IFIFO: 357 case S_IFIFO:
348 case S_IFCHR: 358 case S_IFCHR:
diff --git a/fs/xfs/xfs_log.c b/fs/xfs/xfs_log.c
index 3750f04ede0b..9dbdff3ea484 100644
--- a/fs/xfs/xfs_log.c
+++ b/fs/xfs/xfs_log.c
@@ -3180,7 +3180,7 @@ try_again:
3180STATIC void 3180STATIC void
3181xlog_state_want_sync(xlog_t *log, xlog_in_core_t *iclog) 3181xlog_state_want_sync(xlog_t *log, xlog_in_core_t *iclog)
3182{ 3182{
3183 ASSERT(spin_is_locked(&log->l_icloglock)); 3183 assert_spin_locked(&log->l_icloglock);
3184 3184
3185 if (iclog->ic_state == XLOG_STATE_ACTIVE) { 3185 if (iclog->ic_state == XLOG_STATE_ACTIVE) {
3186 xlog_state_switch_iclogs(log, iclog, 0); 3186 xlog_state_switch_iclogs(log, iclog, 0);
diff --git a/fs/xfs/xfs_vnodeops.c b/fs/xfs/xfs_vnodeops.c
index c4eca5ed5dab..492d75bae2bf 100644
--- a/fs/xfs/xfs_vnodeops.c
+++ b/fs/xfs/xfs_vnodeops.c
@@ -538,7 +538,9 @@ xfs_readlink_bmap(
538 d = XFS_FSB_TO_DADDR(mp, mval[n].br_startblock); 538 d = XFS_FSB_TO_DADDR(mp, mval[n].br_startblock);
539 byte_cnt = XFS_FSB_TO_B(mp, mval[n].br_blockcount); 539 byte_cnt = XFS_FSB_TO_B(mp, mval[n].br_blockcount);
540 540
541 bp = xfs_buf_read(mp->m_ddev_targp, d, BTOBB(byte_cnt), 0); 541 bp = xfs_buf_read_flags(mp->m_ddev_targp, d, BTOBB(byte_cnt),
542 XBF_LOCK | XBF_MAPPED |
543 XBF_DONT_BLOCK);
542 error = XFS_BUF_GETERROR(bp); 544 error = XFS_BUF_GETERROR(bp);
543 if (error) { 545 if (error) {
544 xfs_ioerror_alert("xfs_readlink", 546 xfs_ioerror_alert("xfs_readlink",