aboutsummaryrefslogtreecommitdiffstats
path: root/fs
diff options
context:
space:
mode:
authorIngo Molnar <mingo@elte.hu>2009-09-02 15:42:56 -0400
committerIngo Molnar <mingo@elte.hu>2009-09-02 15:42:59 -0400
commitf76bd108e5031202bb40849306f98c4afebe4ef6 (patch)
tree455e14a23642a82513eebd4e4f7d3b76842869cd /fs
parentcd6feeeafddbef6abfe4d90fb26e42fd844d34ed (diff)
parenteced1dfcfcf6b0a35e925d73916a9d8e36ab5457 (diff)
Merge branch 'perfcounters/urgent' into perfcounters/core
Merge reason: We are going to modify a place modified by perfcounters/urgent. Signed-off-by: Ingo Molnar <mingo@elte.hu>
Diffstat (limited to 'fs')
-rw-r--r--fs/9p/v9fs.c21
-rw-r--r--fs/9p/v9fs.h1
-rw-r--r--fs/9p/vfs_inode.c126
-rw-r--r--fs/9p/vfs_super.c39
-rw-r--r--fs/afs/file.c18
-rw-r--r--fs/btrfs/inode.c21
-rw-r--r--fs/buffer.c7
-rw-r--r--fs/exec.c4
-rw-r--r--fs/ext3/Kconfig32
-rw-r--r--fs/ext3/super.c40
-rw-r--r--fs/gfs2/sys.c20
-rw-r--r--fs/hugetlbfs/inode.c20
-rw-r--r--fs/libfs.c2
-rw-r--r--fs/nfs/nfs4state.c4
-rw-r--r--fs/nilfs2/super.c2
-rw-r--r--fs/nilfs2/the_nilfs.h2
-rw-r--r--fs/notify/inotify/inotify_fsnotify.c46
-rw-r--r--fs/notify/inotify/inotify_user.c238
-rw-r--r--fs/notify/notification.c11
-rw-r--r--fs/ocfs2/alloc.c2
-rw-r--r--fs/ocfs2/dlm/dlmunlock.c4
-rw-r--r--fs/ocfs2/ocfs2_lockid.h1
-rw-r--r--fs/ocfs2/quota_global.c10
-rw-r--r--fs/ocfs2/super.c6
-rw-r--r--fs/proc/base.c19
-rw-r--r--fs/select.c1
-rw-r--r--fs/xfs/linux-2.6/xfs_sync.c13
-rw-r--r--fs/xfs/linux-2.6/xfs_sync.h1
-rw-r--r--fs/xfs/xfs_iget.c113
29 files changed, 478 insertions, 346 deletions
diff --git a/fs/9p/v9fs.c b/fs/9p/v9fs.c
index 332b5ff02fec..f7003cfac63d 100644
--- a/fs/9p/v9fs.c
+++ b/fs/9p/v9fs.c
@@ -76,7 +76,7 @@ static const match_table_t tokens = {
76 * Return 0 upon success, -ERRNO upon failure. 76 * Return 0 upon success, -ERRNO upon failure.
77 */ 77 */
78 78
79static int v9fs_parse_options(struct v9fs_session_info *v9ses) 79static int v9fs_parse_options(struct v9fs_session_info *v9ses, char *opts)
80{ 80{
81 char *options; 81 char *options;
82 substring_t args[MAX_OPT_ARGS]; 82 substring_t args[MAX_OPT_ARGS];
@@ -90,10 +90,10 @@ static int v9fs_parse_options(struct v9fs_session_info *v9ses)
90 v9ses->debug = 0; 90 v9ses->debug = 0;
91 v9ses->cache = 0; 91 v9ses->cache = 0;
92 92
93 if (!v9ses->options) 93 if (!opts)
94 return 0; 94 return 0;
95 95
96 options = kstrdup(v9ses->options, GFP_KERNEL); 96 options = kstrdup(opts, GFP_KERNEL);
97 if (!options) { 97 if (!options) {
98 P9_DPRINTK(P9_DEBUG_ERROR, 98 P9_DPRINTK(P9_DEBUG_ERROR,
99 "failed to allocate copy of option string\n"); 99 "failed to allocate copy of option string\n");
@@ -206,24 +206,14 @@ struct p9_fid *v9fs_session_init(struct v9fs_session_info *v9ses,
206 v9ses->uid = ~0; 206 v9ses->uid = ~0;
207 v9ses->dfltuid = V9FS_DEFUID; 207 v9ses->dfltuid = V9FS_DEFUID;
208 v9ses->dfltgid = V9FS_DEFGID; 208 v9ses->dfltgid = V9FS_DEFGID;
209 if (data) {
210 v9ses->options = kstrdup(data, GFP_KERNEL);
211 if (!v9ses->options) {
212 P9_DPRINTK(P9_DEBUG_ERROR,
213 "failed to allocate copy of option string\n");
214 retval = -ENOMEM;
215 goto error;
216 }
217 }
218 209
219 rc = v9fs_parse_options(v9ses); 210 rc = v9fs_parse_options(v9ses, data);
220 if (rc < 0) { 211 if (rc < 0) {
221 retval = rc; 212 retval = rc;
222 goto error; 213 goto error;
223 } 214 }
224 215
225 v9ses->clnt = p9_client_create(dev_name, v9ses->options); 216 v9ses->clnt = p9_client_create(dev_name, data);
226
227 if (IS_ERR(v9ses->clnt)) { 217 if (IS_ERR(v9ses->clnt)) {
228 retval = PTR_ERR(v9ses->clnt); 218 retval = PTR_ERR(v9ses->clnt);
229 v9ses->clnt = NULL; 219 v9ses->clnt = NULL;
@@ -280,7 +270,6 @@ void v9fs_session_close(struct v9fs_session_info *v9ses)
280 270
281 __putname(v9ses->uname); 271 __putname(v9ses->uname);
282 __putname(v9ses->aname); 272 __putname(v9ses->aname);
283 kfree(v9ses->options);
284} 273}
285 274
286/** 275/**
diff --git a/fs/9p/v9fs.h b/fs/9p/v9fs.h
index a7d567192998..38762bf102a9 100644
--- a/fs/9p/v9fs.h
+++ b/fs/9p/v9fs.h
@@ -85,7 +85,6 @@ struct v9fs_session_info {
85 unsigned int afid; 85 unsigned int afid;
86 unsigned int cache; 86 unsigned int cache;
87 87
88 char *options; /* copy of mount options */
89 char *uname; /* user name to mount as */ 88 char *uname; /* user name to mount as */
90 char *aname; /* name of remote hierarchy being mounted */ 89 char *aname; /* name of remote hierarchy being mounted */
91 unsigned int maxdata; /* max data for client interface */ 90 unsigned int maxdata; /* max data for client interface */
diff --git a/fs/9p/vfs_inode.c b/fs/9p/vfs_inode.c
index 81f8bbf12f9f..06a223d50a81 100644
--- a/fs/9p/vfs_inode.c
+++ b/fs/9p/vfs_inode.c
@@ -171,7 +171,6 @@ int v9fs_uflags2omode(int uflags, int extended)
171 171
172/** 172/**
173 * v9fs_blank_wstat - helper function to setup a 9P stat structure 173 * v9fs_blank_wstat - helper function to setup a 9P stat structure
174 * @v9ses: 9P session info (for determining extended mode)
175 * @wstat: structure to initialize 174 * @wstat: structure to initialize
176 * 175 *
177 */ 176 */
@@ -207,65 +206,72 @@ v9fs_blank_wstat(struct p9_wstat *wstat)
207 206
208struct inode *v9fs_get_inode(struct super_block *sb, int mode) 207struct inode *v9fs_get_inode(struct super_block *sb, int mode)
209{ 208{
209 int err;
210 struct inode *inode; 210 struct inode *inode;
211 struct v9fs_session_info *v9ses = sb->s_fs_info; 211 struct v9fs_session_info *v9ses = sb->s_fs_info;
212 212
213 P9_DPRINTK(P9_DEBUG_VFS, "super block: %p mode: %o\n", sb, mode); 213 P9_DPRINTK(P9_DEBUG_VFS, "super block: %p mode: %o\n", sb, mode);
214 214
215 inode = new_inode(sb); 215 inode = new_inode(sb);
216 if (inode) { 216 if (!inode) {
217 inode->i_mode = mode;
218 inode->i_uid = current_fsuid();
219 inode->i_gid = current_fsgid();
220 inode->i_blocks = 0;
221 inode->i_rdev = 0;
222 inode->i_atime = inode->i_mtime = inode->i_ctime = CURRENT_TIME;
223 inode->i_mapping->a_ops = &v9fs_addr_operations;
224
225 switch (mode & S_IFMT) {
226 case S_IFIFO:
227 case S_IFBLK:
228 case S_IFCHR:
229 case S_IFSOCK:
230 if (!v9fs_extended(v9ses)) {
231 P9_DPRINTK(P9_DEBUG_ERROR,
232 "special files without extended mode\n");
233 return ERR_PTR(-EINVAL);
234 }
235 init_special_inode(inode, inode->i_mode,
236 inode->i_rdev);
237 break;
238 case S_IFREG:
239 inode->i_op = &v9fs_file_inode_operations;
240 inode->i_fop = &v9fs_file_operations;
241 break;
242 case S_IFLNK:
243 if (!v9fs_extended(v9ses)) {
244 P9_DPRINTK(P9_DEBUG_ERROR,
245 "extended modes used w/o 9P2000.u\n");
246 return ERR_PTR(-EINVAL);
247 }
248 inode->i_op = &v9fs_symlink_inode_operations;
249 break;
250 case S_IFDIR:
251 inc_nlink(inode);
252 if (v9fs_extended(v9ses))
253 inode->i_op = &v9fs_dir_inode_operations_ext;
254 else
255 inode->i_op = &v9fs_dir_inode_operations;
256 inode->i_fop = &v9fs_dir_operations;
257 break;
258 default:
259 P9_DPRINTK(P9_DEBUG_ERROR,
260 "BAD mode 0x%x S_IFMT 0x%x\n",
261 mode, mode & S_IFMT);
262 return ERR_PTR(-EINVAL);
263 }
264 } else {
265 P9_EPRINTK(KERN_WARNING, "Problem allocating inode\n"); 217 P9_EPRINTK(KERN_WARNING, "Problem allocating inode\n");
266 return ERR_PTR(-ENOMEM); 218 return ERR_PTR(-ENOMEM);
267 } 219 }
220
221 inode->i_mode = mode;
222 inode->i_uid = current_fsuid();
223 inode->i_gid = current_fsgid();
224 inode->i_blocks = 0;
225 inode->i_rdev = 0;
226 inode->i_atime = inode->i_mtime = inode->i_ctime = CURRENT_TIME;
227 inode->i_mapping->a_ops = &v9fs_addr_operations;
228
229 switch (mode & S_IFMT) {
230 case S_IFIFO:
231 case S_IFBLK:
232 case S_IFCHR:
233 case S_IFSOCK:
234 if (!v9fs_extended(v9ses)) {
235 P9_DPRINTK(P9_DEBUG_ERROR,
236 "special files without extended mode\n");
237 err = -EINVAL;
238 goto error;
239 }
240 init_special_inode(inode, inode->i_mode, inode->i_rdev);
241 break;
242 case S_IFREG:
243 inode->i_op = &v9fs_file_inode_operations;
244 inode->i_fop = &v9fs_file_operations;
245 break;
246 case S_IFLNK:
247 if (!v9fs_extended(v9ses)) {
248 P9_DPRINTK(P9_DEBUG_ERROR,
249 "extended modes used w/o 9P2000.u\n");
250 err = -EINVAL;
251 goto error;
252 }
253 inode->i_op = &v9fs_symlink_inode_operations;
254 break;
255 case S_IFDIR:
256 inc_nlink(inode);
257 if (v9fs_extended(v9ses))
258 inode->i_op = &v9fs_dir_inode_operations_ext;
259 else
260 inode->i_op = &v9fs_dir_inode_operations;
261 inode->i_fop = &v9fs_dir_operations;
262 break;
263 default:
264 P9_DPRINTK(P9_DEBUG_ERROR, "BAD mode 0x%x S_IFMT 0x%x\n",
265 mode, mode & S_IFMT);
266 err = -EINVAL;
267 goto error;
268 }
269
268 return inode; 270 return inode;
271
272error:
273 iput(inode);
274 return ERR_PTR(err);
269} 275}
270 276
271/* 277/*
@@ -338,30 +344,25 @@ v9fs_inode_from_fid(struct v9fs_session_info *v9ses, struct p9_fid *fid,
338 344
339 ret = NULL; 345 ret = NULL;
340 st = p9_client_stat(fid); 346 st = p9_client_stat(fid);
341 if (IS_ERR(st)) { 347 if (IS_ERR(st))
342 err = PTR_ERR(st); 348 return ERR_CAST(st);
343 st = NULL;
344 goto error;
345 }
346 349
347 umode = p9mode2unixmode(v9ses, st->mode); 350 umode = p9mode2unixmode(v9ses, st->mode);
348 ret = v9fs_get_inode(sb, umode); 351 ret = v9fs_get_inode(sb, umode);
349 if (IS_ERR(ret)) { 352 if (IS_ERR(ret)) {
350 err = PTR_ERR(ret); 353 err = PTR_ERR(ret);
351 ret = NULL;
352 goto error; 354 goto error;
353 } 355 }
354 356
355 v9fs_stat2inode(st, ret, sb); 357 v9fs_stat2inode(st, ret, sb);
356 ret->i_ino = v9fs_qid2ino(&st->qid); 358 ret->i_ino = v9fs_qid2ino(&st->qid);
359 p9stat_free(st);
357 kfree(st); 360 kfree(st);
358 return ret; 361 return ret;
359 362
360error: 363error:
364 p9stat_free(st);
361 kfree(st); 365 kfree(st);
362 if (ret)
363 iput(ret);
364
365 return ERR_PTR(err); 366 return ERR_PTR(err);
366} 367}
367 368
@@ -403,9 +404,9 @@ v9fs_open_created(struct inode *inode, struct file *file)
403 * @v9ses: session information 404 * @v9ses: session information
404 * @dir: directory that dentry is being created in 405 * @dir: directory that dentry is being created in
405 * @dentry: dentry that is being created 406 * @dentry: dentry that is being created
407 * @extension: 9p2000.u extension string to support devices, etc.
406 * @perm: create permissions 408 * @perm: create permissions
407 * @mode: open mode 409 * @mode: open mode
408 * @extension: 9p2000.u extension string to support devices, etc.
409 * 410 *
410 */ 411 */
411static struct p9_fid * 412static struct p9_fid *
@@ -470,7 +471,10 @@ v9fs_create(struct v9fs_session_info *v9ses, struct inode *dir,
470 dentry->d_op = &v9fs_dentry_operations; 471 dentry->d_op = &v9fs_dentry_operations;
471 472
472 d_instantiate(dentry, inode); 473 d_instantiate(dentry, inode);
473 v9fs_fid_add(dentry, fid); 474 err = v9fs_fid_add(dentry, fid);
475 if (err < 0)
476 goto error;
477
474 return ofid; 478 return ofid;
475 479
476error: 480error:
diff --git a/fs/9p/vfs_super.c b/fs/9p/vfs_super.c
index 38d695d66a0b..8961f1a8f668 100644
--- a/fs/9p/vfs_super.c
+++ b/fs/9p/vfs_super.c
@@ -81,7 +81,7 @@ static int v9fs_set_super(struct super_block *s, void *data)
81 81
82static void 82static void
83v9fs_fill_super(struct super_block *sb, struct v9fs_session_info *v9ses, 83v9fs_fill_super(struct super_block *sb, struct v9fs_session_info *v9ses,
84 int flags) 84 int flags, void *data)
85{ 85{
86 sb->s_maxbytes = MAX_LFS_FILESIZE; 86 sb->s_maxbytes = MAX_LFS_FILESIZE;
87 sb->s_blocksize_bits = fls(v9ses->maxdata - 1); 87 sb->s_blocksize_bits = fls(v9ses->maxdata - 1);
@@ -91,6 +91,8 @@ v9fs_fill_super(struct super_block *sb, struct v9fs_session_info *v9ses,
91 91
92 sb->s_flags = flags | MS_ACTIVE | MS_SYNCHRONOUS | MS_DIRSYNC | 92 sb->s_flags = flags | MS_ACTIVE | MS_SYNCHRONOUS | MS_DIRSYNC |
93 MS_NOATIME; 93 MS_NOATIME;
94
95 save_mount_options(sb, data);
94} 96}
95 97
96/** 98/**
@@ -113,14 +115,11 @@ static int v9fs_get_sb(struct file_system_type *fs_type, int flags,
113 struct v9fs_session_info *v9ses = NULL; 115 struct v9fs_session_info *v9ses = NULL;
114 struct p9_wstat *st = NULL; 116 struct p9_wstat *st = NULL;
115 int mode = S_IRWXUGO | S_ISVTX; 117 int mode = S_IRWXUGO | S_ISVTX;
116 uid_t uid = current_fsuid();
117 gid_t gid = current_fsgid();
118 struct p9_fid *fid; 118 struct p9_fid *fid;
119 int retval = 0; 119 int retval = 0;
120 120
121 P9_DPRINTK(P9_DEBUG_VFS, " \n"); 121 P9_DPRINTK(P9_DEBUG_VFS, " \n");
122 122
123 st = NULL;
124 v9ses = kzalloc(sizeof(struct v9fs_session_info), GFP_KERNEL); 123 v9ses = kzalloc(sizeof(struct v9fs_session_info), GFP_KERNEL);
125 if (!v9ses) 124 if (!v9ses)
126 return -ENOMEM; 125 return -ENOMEM;
@@ -142,7 +141,7 @@ static int v9fs_get_sb(struct file_system_type *fs_type, int flags,
142 retval = PTR_ERR(sb); 141 retval = PTR_ERR(sb);
143 goto free_stat; 142 goto free_stat;
144 } 143 }
145 v9fs_fill_super(sb, v9ses, flags); 144 v9fs_fill_super(sb, v9ses, flags, data);
146 145
147 inode = v9fs_get_inode(sb, S_IFDIR | mode); 146 inode = v9fs_get_inode(sb, S_IFDIR | mode);
148 if (IS_ERR(inode)) { 147 if (IS_ERR(inode)) {
@@ -150,9 +149,6 @@ static int v9fs_get_sb(struct file_system_type *fs_type, int flags,
150 goto release_sb; 149 goto release_sb;
151 } 150 }
152 151
153 inode->i_uid = uid;
154 inode->i_gid = gid;
155
156 root = d_alloc_root(inode); 152 root = d_alloc_root(inode);
157 if (!root) { 153 if (!root) {
158 iput(inode); 154 iput(inode);
@@ -173,10 +169,8 @@ P9_DPRINTK(P9_DEBUG_VFS, " simple set mount, return 0\n");
173 simple_set_mnt(mnt, sb); 169 simple_set_mnt(mnt, sb);
174 return 0; 170 return 0;
175 171
176release_sb:
177 deactivate_locked_super(sb);
178
179free_stat: 172free_stat:
173 p9stat_free(st);
180 kfree(st); 174 kfree(st);
181 175
182clunk_fid: 176clunk_fid:
@@ -185,7 +179,12 @@ clunk_fid:
185close_session: 179close_session:
186 v9fs_session_close(v9ses); 180 v9fs_session_close(v9ses);
187 kfree(v9ses); 181 kfree(v9ses);
182 return retval;
188 183
184release_sb:
185 p9stat_free(st);
186 kfree(st);
187 deactivate_locked_super(sb);
189 return retval; 188 return retval;
190} 189}
191 190
@@ -207,24 +206,10 @@ static void v9fs_kill_super(struct super_block *s)
207 206
208 v9fs_session_close(v9ses); 207 v9fs_session_close(v9ses);
209 kfree(v9ses); 208 kfree(v9ses);
209 s->s_fs_info = NULL;
210 P9_DPRINTK(P9_DEBUG_VFS, "exiting kill_super\n"); 210 P9_DPRINTK(P9_DEBUG_VFS, "exiting kill_super\n");
211} 211}
212 212
213/**
214 * v9fs_show_options - Show mount options in /proc/mounts
215 * @m: seq_file to write to
216 * @mnt: mount descriptor
217 *
218 */
219
220static int v9fs_show_options(struct seq_file *m, struct vfsmount *mnt)
221{
222 struct v9fs_session_info *v9ses = mnt->mnt_sb->s_fs_info;
223
224 seq_printf(m, "%s", v9ses->options);
225 return 0;
226}
227
228static void 213static void
229v9fs_umount_begin(struct super_block *sb) 214v9fs_umount_begin(struct super_block *sb)
230{ 215{
@@ -237,7 +222,7 @@ v9fs_umount_begin(struct super_block *sb)
237static const struct super_operations v9fs_super_ops = { 222static const struct super_operations v9fs_super_ops = {
238 .statfs = simple_statfs, 223 .statfs = simple_statfs,
239 .clear_inode = v9fs_clear_inode, 224 .clear_inode = v9fs_clear_inode,
240 .show_options = v9fs_show_options, 225 .show_options = generic_show_options,
241 .umount_begin = v9fs_umount_begin, 226 .umount_begin = v9fs_umount_begin,
242}; 227};
243 228
diff --git a/fs/afs/file.c b/fs/afs/file.c
index 0149dab365e7..681c2a7b013f 100644
--- a/fs/afs/file.c
+++ b/fs/afs/file.c
@@ -134,9 +134,16 @@ static int afs_readpage(struct file *file, struct page *page)
134 134
135 inode = page->mapping->host; 135 inode = page->mapping->host;
136 136
137 ASSERT(file != NULL); 137 if (file) {
138 key = file->private_data; 138 key = file->private_data;
139 ASSERT(key != NULL); 139 ASSERT(key != NULL);
140 } else {
141 key = afs_request_key(AFS_FS_S(inode->i_sb)->volume->cell);
142 if (IS_ERR(key)) {
143 ret = PTR_ERR(key);
144 goto error_nokey;
145 }
146 }
140 147
141 _enter("{%x},{%lu},{%lu}", key_serial(key), inode->i_ino, page->index); 148 _enter("{%x},{%lu},{%lu}", key_serial(key), inode->i_ino, page->index);
142 149
@@ -207,12 +214,17 @@ static int afs_readpage(struct file *file, struct page *page)
207 unlock_page(page); 214 unlock_page(page);
208 } 215 }
209 216
217 if (!file)
218 key_put(key);
210 _leave(" = 0"); 219 _leave(" = 0");
211 return 0; 220 return 0;
212 221
213error: 222error:
214 SetPageError(page); 223 SetPageError(page);
215 unlock_page(page); 224 unlock_page(page);
225 if (!file)
226 key_put(key);
227error_nokey:
216 _leave(" = %d", ret); 228 _leave(" = %d", ret);
217 return ret; 229 return ret;
218} 230}
diff --git a/fs/btrfs/inode.c b/fs/btrfs/inode.c
index 272b9b2bea86..59cba180fe83 100644
--- a/fs/btrfs/inode.c
+++ b/fs/btrfs/inode.c
@@ -3099,8 +3099,12 @@ static void inode_tree_add(struct inode *inode)
3099{ 3099{
3100 struct btrfs_root *root = BTRFS_I(inode)->root; 3100 struct btrfs_root *root = BTRFS_I(inode)->root;
3101 struct btrfs_inode *entry; 3101 struct btrfs_inode *entry;
3102 struct rb_node **p = &root->inode_tree.rb_node; 3102 struct rb_node **p;
3103 struct rb_node *parent = NULL; 3103 struct rb_node *parent;
3104
3105again:
3106 p = &root->inode_tree.rb_node;
3107 parent = NULL;
3104 3108
3105 spin_lock(&root->inode_lock); 3109 spin_lock(&root->inode_lock);
3106 while (*p) { 3110 while (*p) {
@@ -3108,13 +3112,16 @@ static void inode_tree_add(struct inode *inode)
3108 entry = rb_entry(parent, struct btrfs_inode, rb_node); 3112 entry = rb_entry(parent, struct btrfs_inode, rb_node);
3109 3113
3110 if (inode->i_ino < entry->vfs_inode.i_ino) 3114 if (inode->i_ino < entry->vfs_inode.i_ino)
3111 p = &(*p)->rb_left; 3115 p = &parent->rb_left;
3112 else if (inode->i_ino > entry->vfs_inode.i_ino) 3116 else if (inode->i_ino > entry->vfs_inode.i_ino)
3113 p = &(*p)->rb_right; 3117 p = &parent->rb_right;
3114 else { 3118 else {
3115 WARN_ON(!(entry->vfs_inode.i_state & 3119 WARN_ON(!(entry->vfs_inode.i_state &
3116 (I_WILL_FREE | I_FREEING | I_CLEAR))); 3120 (I_WILL_FREE | I_FREEING | I_CLEAR)));
3117 break; 3121 rb_erase(parent, &root->inode_tree);
3122 RB_CLEAR_NODE(parent);
3123 spin_unlock(&root->inode_lock);
3124 goto again;
3118 } 3125 }
3119 } 3126 }
3120 rb_link_node(&BTRFS_I(inode)->rb_node, parent, p); 3127 rb_link_node(&BTRFS_I(inode)->rb_node, parent, p);
@@ -3126,12 +3133,12 @@ static void inode_tree_del(struct inode *inode)
3126{ 3133{
3127 struct btrfs_root *root = BTRFS_I(inode)->root; 3134 struct btrfs_root *root = BTRFS_I(inode)->root;
3128 3135
3136 spin_lock(&root->inode_lock);
3129 if (!RB_EMPTY_NODE(&BTRFS_I(inode)->rb_node)) { 3137 if (!RB_EMPTY_NODE(&BTRFS_I(inode)->rb_node)) {
3130 spin_lock(&root->inode_lock);
3131 rb_erase(&BTRFS_I(inode)->rb_node, &root->inode_tree); 3138 rb_erase(&BTRFS_I(inode)->rb_node, &root->inode_tree);
3132 spin_unlock(&root->inode_lock);
3133 RB_CLEAR_NODE(&BTRFS_I(inode)->rb_node); 3139 RB_CLEAR_NODE(&BTRFS_I(inode)->rb_node);
3134 } 3140 }
3141 spin_unlock(&root->inode_lock);
3135} 3142}
3136 3143
3137static noinline void init_btrfs_i(struct inode *inode) 3144static noinline void init_btrfs_i(struct inode *inode)
diff --git a/fs/buffer.c b/fs/buffer.c
index a3ef091a45bd..28f320fac4d4 100644
--- a/fs/buffer.c
+++ b/fs/buffer.c
@@ -1165,8 +1165,11 @@ void mark_buffer_dirty(struct buffer_head *bh)
1165 1165
1166 if (!test_set_buffer_dirty(bh)) { 1166 if (!test_set_buffer_dirty(bh)) {
1167 struct page *page = bh->b_page; 1167 struct page *page = bh->b_page;
1168 if (!TestSetPageDirty(page)) 1168 if (!TestSetPageDirty(page)) {
1169 __set_page_dirty(page, page_mapping(page), 0); 1169 struct address_space *mapping = page_mapping(page);
1170 if (mapping)
1171 __set_page_dirty(page, mapping, 0);
1172 }
1170 } 1173 }
1171} 1174}
1172 1175
diff --git a/fs/exec.c b/fs/exec.c
index 4a8849e45b21..fb4f3cdda78c 100644
--- a/fs/exec.c
+++ b/fs/exec.c
@@ -678,8 +678,8 @@ exit:
678} 678}
679EXPORT_SYMBOL(open_exec); 679EXPORT_SYMBOL(open_exec);
680 680
681int kernel_read(struct file *file, unsigned long offset, 681int kernel_read(struct file *file, loff_t offset,
682 char *addr, unsigned long count) 682 char *addr, unsigned long count)
683{ 683{
684 mm_segment_t old_fs; 684 mm_segment_t old_fs;
685 loff_t pos = offset; 685 loff_t pos = offset;
diff --git a/fs/ext3/Kconfig b/fs/ext3/Kconfig
index fb3c1a21b135..522b15498f45 100644
--- a/fs/ext3/Kconfig
+++ b/fs/ext3/Kconfig
@@ -29,23 +29,25 @@ config EXT3_FS
29 module will be called ext3. 29 module will be called ext3.
30 30
31config EXT3_DEFAULTS_TO_ORDERED 31config EXT3_DEFAULTS_TO_ORDERED
32 bool "Default to 'data=ordered' in ext3 (legacy option)" 32 bool "Default to 'data=ordered' in ext3"
33 depends on EXT3_FS 33 depends on EXT3_FS
34 help 34 help
35 If a filesystem does not explicitly specify a data ordering 35 The journal mode options for ext3 have different tradeoffs
36 mode, and the journal capability allowed it, ext3 used to 36 between when data is guaranteed to be on disk and
37 historically default to 'data=ordered'. 37 performance. The use of "data=writeback" can cause
38 38 unwritten data to appear in files after an system crash or
39 That was a rather unfortunate choice, because it leads to all 39 power failure, which can be a security issue. However,
40 kinds of latency problems, and the 'data=writeback' mode is more 40 "data=ordered" mode can also result in major performance
41 appropriate these days. 41 problems, including seconds-long delays before an fsync()
42 42 call returns. For details, see:
43 You should probably always answer 'n' here, and if you really 43
44 want to use 'data=ordered' mode, set it in the filesystem itself 44 http://ext4.wiki.kernel.org/index.php/Ext3_data_mode_tradeoffs
45 with 'tune2fs -o journal_data_ordered'. 45
46 46 If you have been historically happy with ext3's performance,
47 But if you really want to enable the legacy default, you can do 47 data=ordered mode will be a safe choice and you should
48 so by answering 'y' to this question. 48 answer 'y' here. If you understand the reliability and data
49 privacy issues of data=writeback and are willing to make
50 that trade off, answer 'n'.
49 51
50config EXT3_FS_XATTR 52config EXT3_FS_XATTR
51 bool "Ext3 extended attributes" 53 bool "Ext3 extended attributes"
diff --git a/fs/ext3/super.c b/fs/ext3/super.c
index 524b349c6299..a8d80a7f1105 100644
--- a/fs/ext3/super.c
+++ b/fs/ext3/super.c
@@ -543,6 +543,19 @@ static inline void ext3_show_quota_options(struct seq_file *seq, struct super_bl
543#endif 543#endif
544} 544}
545 545
546static char *data_mode_string(unsigned long mode)
547{
548 switch (mode) {
549 case EXT3_MOUNT_JOURNAL_DATA:
550 return "journal";
551 case EXT3_MOUNT_ORDERED_DATA:
552 return "ordered";
553 case EXT3_MOUNT_WRITEBACK_DATA:
554 return "writeback";
555 }
556 return "unknown";
557}
558
546/* 559/*
547 * Show an option if 560 * Show an option if
548 * - it's set to a non-default value OR 561 * - it's set to a non-default value OR
@@ -616,13 +629,8 @@ static int ext3_show_options(struct seq_file *seq, struct vfsmount *vfs)
616 if (test_opt(sb, NOBH)) 629 if (test_opt(sb, NOBH))
617 seq_puts(seq, ",nobh"); 630 seq_puts(seq, ",nobh");
618 631
619 if (test_opt(sb, DATA_FLAGS) == EXT3_MOUNT_JOURNAL_DATA) 632 seq_printf(seq, ",data=%s", data_mode_string(sbi->s_mount_opt &
620 seq_puts(seq, ",data=journal"); 633 EXT3_MOUNT_DATA_FLAGS));
621 else if (test_opt(sb, DATA_FLAGS) == EXT3_MOUNT_ORDERED_DATA)
622 seq_puts(seq, ",data=ordered");
623 else if (test_opt(sb, DATA_FLAGS) == EXT3_MOUNT_WRITEBACK_DATA)
624 seq_puts(seq, ",data=writeback");
625
626 if (test_opt(sb, DATA_ERR_ABORT)) 634 if (test_opt(sb, DATA_ERR_ABORT))
627 seq_puts(seq, ",data_err=abort"); 635 seq_puts(seq, ",data_err=abort");
628 636
@@ -1024,12 +1032,18 @@ static int parse_options (char *options, struct super_block *sb,
1024 datacheck: 1032 datacheck:
1025 if (is_remount) { 1033 if (is_remount) {
1026 if ((sbi->s_mount_opt & EXT3_MOUNT_DATA_FLAGS) 1034 if ((sbi->s_mount_opt & EXT3_MOUNT_DATA_FLAGS)
1027 != data_opt) { 1035 == data_opt)
1028 printk(KERN_ERR 1036 break;
1029 "EXT3-fs: cannot change data " 1037 printk(KERN_ERR
1030 "mode on remount\n"); 1038 "EXT3-fs (device %s): Cannot change "
1031 return 0; 1039 "data mode on remount. The filesystem "
1032 } 1040 "is mounted in data=%s mode and you "
1041 "try to remount it in data=%s mode.\n",
1042 sb->s_id,
1043 data_mode_string(sbi->s_mount_opt &
1044 EXT3_MOUNT_DATA_FLAGS),
1045 data_mode_string(data_opt));
1046 return 0;
1033 } else { 1047 } else {
1034 sbi->s_mount_opt &= ~EXT3_MOUNT_DATA_FLAGS; 1048 sbi->s_mount_opt &= ~EXT3_MOUNT_DATA_FLAGS;
1035 sbi->s_mount_opt |= data_opt; 1049 sbi->s_mount_opt |= data_opt;
diff --git a/fs/gfs2/sys.c b/fs/gfs2/sys.c
index 23419dc3027b..a7cbfbd340c7 100644
--- a/fs/gfs2/sys.c
+++ b/fs/gfs2/sys.c
@@ -386,16 +386,16 @@ static ssize_t jid_show(struct gfs2_sbd *sdp, char *buf)
386#define GDLM_ATTR(_name,_mode,_show,_store) \ 386#define GDLM_ATTR(_name,_mode,_show,_store) \
387static struct gfs2_attr gdlm_attr_##_name = __ATTR(_name,_mode,_show,_store) 387static struct gfs2_attr gdlm_attr_##_name = __ATTR(_name,_mode,_show,_store)
388 388
389GDLM_ATTR(proto_name, 0444, proto_name_show, NULL); 389GDLM_ATTR(proto_name, 0444, proto_name_show, NULL);
390GDLM_ATTR(block, 0644, block_show, block_store); 390GDLM_ATTR(block, 0644, block_show, block_store);
391GDLM_ATTR(withdraw, 0644, withdraw_show, withdraw_store); 391GDLM_ATTR(withdraw, 0644, withdraw_show, withdraw_store);
392GDLM_ATTR(id, 0444, lkid_show, NULL); 392GDLM_ATTR(id, 0444, lkid_show, NULL);
393GDLM_ATTR(jid, 0444, jid_show, NULL); 393GDLM_ATTR(jid, 0444, jid_show, NULL);
394GDLM_ATTR(first, 0444, lkfirst_show, NULL); 394GDLM_ATTR(first, 0444, lkfirst_show, NULL);
395GDLM_ATTR(first_done, 0444, first_done_show, NULL); 395GDLM_ATTR(first_done, 0444, first_done_show, NULL);
396GDLM_ATTR(recover, 0200, NULL, recover_store); 396GDLM_ATTR(recover, 0600, NULL, recover_store);
397GDLM_ATTR(recover_done, 0444, recover_done_show, NULL); 397GDLM_ATTR(recover_done, 0444, recover_done_show, NULL);
398GDLM_ATTR(recover_status, 0444, recover_status_show, NULL); 398GDLM_ATTR(recover_status, 0444, recover_status_show, NULL);
399 399
400static struct attribute *lock_module_attrs[] = { 400static struct attribute *lock_module_attrs[] = {
401 &gdlm_attr_proto_name.attr, 401 &gdlm_attr_proto_name.attr,
diff --git a/fs/hugetlbfs/inode.c b/fs/hugetlbfs/inode.c
index 941c8425c10b..cb88dac8ccaa 100644
--- a/fs/hugetlbfs/inode.c
+++ b/fs/hugetlbfs/inode.c
@@ -935,26 +935,28 @@ static int can_do_hugetlb_shm(void)
935 return capable(CAP_IPC_LOCK) || in_group_p(sysctl_hugetlb_shm_group); 935 return capable(CAP_IPC_LOCK) || in_group_p(sysctl_hugetlb_shm_group);
936} 936}
937 937
938struct file *hugetlb_file_setup(const char *name, size_t size, int acctflag) 938struct file *hugetlb_file_setup(const char *name, size_t size, int acctflag,
939 struct user_struct **user)
939{ 940{
940 int error = -ENOMEM; 941 int error = -ENOMEM;
941 int unlock_shm = 0;
942 struct file *file; 942 struct file *file;
943 struct inode *inode; 943 struct inode *inode;
944 struct dentry *dentry, *root; 944 struct dentry *dentry, *root;
945 struct qstr quick_string; 945 struct qstr quick_string;
946 struct user_struct *user = current_user();
947 946
947 *user = NULL;
948 if (!hugetlbfs_vfsmount) 948 if (!hugetlbfs_vfsmount)
949 return ERR_PTR(-ENOENT); 949 return ERR_PTR(-ENOENT);
950 950
951 if (!can_do_hugetlb_shm()) { 951 if (!can_do_hugetlb_shm()) {
952 if (user_shm_lock(size, user)) { 952 *user = current_user();
953 unlock_shm = 1; 953 if (user_shm_lock(size, *user)) {
954 WARN_ONCE(1, 954 WARN_ONCE(1,
955 "Using mlock ulimits for SHM_HUGETLB deprecated\n"); 955 "Using mlock ulimits for SHM_HUGETLB deprecated\n");
956 } else 956 } else {
957 *user = NULL;
957 return ERR_PTR(-EPERM); 958 return ERR_PTR(-EPERM);
959 }
958 } 960 }
959 961
960 root = hugetlbfs_vfsmount->mnt_root; 962 root = hugetlbfs_vfsmount->mnt_root;
@@ -996,8 +998,10 @@ out_inode:
996out_dentry: 998out_dentry:
997 dput(dentry); 999 dput(dentry);
998out_shm_unlock: 1000out_shm_unlock:
999 if (unlock_shm) 1001 if (*user) {
1000 user_shm_unlock(size, user); 1002 user_shm_unlock(size, *user);
1003 *user = NULL;
1004 }
1001 return ERR_PTR(error); 1005 return ERR_PTR(error);
1002} 1006}
1003 1007
diff --git a/fs/libfs.c b/fs/libfs.c
index ddfa89948c3f..dcec3d3ea64f 100644
--- a/fs/libfs.c
+++ b/fs/libfs.c
@@ -217,7 +217,7 @@ int get_sb_pseudo(struct file_system_type *fs_type, char *name,
217 return PTR_ERR(s); 217 return PTR_ERR(s);
218 218
219 s->s_flags = MS_NOUSER; 219 s->s_flags = MS_NOUSER;
220 s->s_maxbytes = ~0ULL; 220 s->s_maxbytes = MAX_LFS_FILESIZE;
221 s->s_blocksize = PAGE_SIZE; 221 s->s_blocksize = PAGE_SIZE;
222 s->s_blocksize_bits = PAGE_SHIFT; 222 s->s_blocksize_bits = PAGE_SHIFT;
223 s->s_magic = magic; 223 s->s_magic = magic;
diff --git a/fs/nfs/nfs4state.c b/fs/nfs/nfs4state.c
index 65ca8c18476f..1434080aefeb 100644
--- a/fs/nfs/nfs4state.c
+++ b/fs/nfs/nfs4state.c
@@ -1250,8 +1250,8 @@ static void nfs4_state_manager(struct nfs_client *clp)
1250 continue; 1250 continue;
1251 } 1251 }
1252 /* Initialize or reset the session */ 1252 /* Initialize or reset the session */
1253 if (nfs4_has_session(clp) && 1253 if (test_and_clear_bit(NFS4CLNT_SESSION_SETUP, &clp->cl_state)
1254 test_and_clear_bit(NFS4CLNT_SESSION_SETUP, &clp->cl_state)) { 1254 && nfs4_has_session(clp)) {
1255 if (clp->cl_cons_state == NFS_CS_SESSION_INITING) 1255 if (clp->cl_cons_state == NFS_CS_SESSION_INITING)
1256 status = nfs4_initialize_session(clp); 1256 status = nfs4_initialize_session(clp);
1257 else 1257 else
diff --git a/fs/nilfs2/super.c b/fs/nilfs2/super.c
index 8e2ec43b18f4..151964f0de4c 100644
--- a/fs/nilfs2/super.c
+++ b/fs/nilfs2/super.c
@@ -416,8 +416,10 @@ int nilfs_attach_checkpoint(struct nilfs_sb_info *sbi, __u64 cno)
416 if (unlikely(err)) 416 if (unlikely(err))
417 goto failed; 417 goto failed;
418 418
419 down_read(&nilfs->ns_segctor_sem);
419 err = nilfs_cpfile_get_checkpoint(nilfs->ns_cpfile, cno, 0, &raw_cp, 420 err = nilfs_cpfile_get_checkpoint(nilfs->ns_cpfile, cno, 0, &raw_cp,
420 &bh_cp); 421 &bh_cp);
422 up_read(&nilfs->ns_segctor_sem);
421 if (unlikely(err)) { 423 if (unlikely(err)) {
422 if (err == -ENOENT || err == -EINVAL) { 424 if (err == -ENOENT || err == -EINVAL) {
423 printk(KERN_ERR 425 printk(KERN_ERR
diff --git a/fs/nilfs2/the_nilfs.h b/fs/nilfs2/the_nilfs.h
index e8adbffc626f..1b9caafb8662 100644
--- a/fs/nilfs2/the_nilfs.h
+++ b/fs/nilfs2/the_nilfs.h
@@ -253,7 +253,7 @@ nilfs_detach_writer(struct the_nilfs *nilfs, struct nilfs_sb_info *sbi)
253 253
254static inline void nilfs_put_sbinfo(struct nilfs_sb_info *sbi) 254static inline void nilfs_put_sbinfo(struct nilfs_sb_info *sbi)
255{ 255{
256 if (!atomic_dec_and_test(&sbi->s_count)) 256 if (atomic_dec_and_test(&sbi->s_count))
257 kfree(sbi); 257 kfree(sbi);
258} 258}
259 259
diff --git a/fs/notify/inotify/inotify_fsnotify.c b/fs/notify/inotify/inotify_fsnotify.c
index 47cd258fd24d..c9ee67b442e1 100644
--- a/fs/notify/inotify/inotify_fsnotify.c
+++ b/fs/notify/inotify/inotify_fsnotify.c
@@ -62,13 +62,14 @@ static int inotify_handle_event(struct fsnotify_group *group, struct fsnotify_ev
62 event_priv->wd = wd; 62 event_priv->wd = wd;
63 63
64 ret = fsnotify_add_notify_event(group, event, fsn_event_priv); 64 ret = fsnotify_add_notify_event(group, event, fsn_event_priv);
65 /* EEXIST is not an error */ 65 if (ret) {
66 if (ret == -EEXIST)
67 ret = 0;
68
69 /* did event_priv get attached? */
70 if (list_empty(&fsn_event_priv->event_list))
71 inotify_free_event_priv(fsn_event_priv); 66 inotify_free_event_priv(fsn_event_priv);
67 /* EEXIST says we tail matched, EOVERFLOW isn't something
68 * to report up the stack. */
69 if ((ret == -EEXIST) ||
70 (ret == -EOVERFLOW))
71 ret = 0;
72 }
72 73
73 /* 74 /*
74 * If we hold the entry until after the event is on the queue 75 * If we hold the entry until after the event is on the queue
@@ -104,16 +105,45 @@ static bool inotify_should_send_event(struct fsnotify_group *group, struct inode
104 return send; 105 return send;
105} 106}
106 107
108/*
109 * This is NEVER supposed to be called. Inotify marks should either have been
110 * removed from the idr when the watch was removed or in the
111 * fsnotify_destroy_mark_by_group() call when the inotify instance was being
112 * torn down. This is only called if the idr is about to be freed but there
113 * are still marks in it.
114 */
107static int idr_callback(int id, void *p, void *data) 115static int idr_callback(int id, void *p, void *data)
108{ 116{
109 BUG(); 117 struct fsnotify_mark_entry *entry;
118 struct inotify_inode_mark_entry *ientry;
119 static bool warned = false;
120
121 if (warned)
122 return 0;
123
124 warned = false;
125 entry = p;
126 ientry = container_of(entry, struct inotify_inode_mark_entry, fsn_entry);
127
128 WARN(1, "inotify closing but id=%d for entry=%p in group=%p still in "
129 "idr. Probably leaking memory\n", id, p, data);
130
131 /*
132 * I'm taking the liberty of assuming that the mark in question is a
133 * valid address and I'm dereferencing it. This might help to figure
134 * out why we got here and the panic is no worse than the original
135 * BUG() that was here.
136 */
137 if (entry)
138 printk(KERN_WARNING "entry->group=%p inode=%p wd=%d\n",
139 entry->group, entry->inode, ientry->wd);
110 return 0; 140 return 0;
111} 141}
112 142
113static void inotify_free_group_priv(struct fsnotify_group *group) 143static void inotify_free_group_priv(struct fsnotify_group *group)
114{ 144{
115 /* ideally the idr is empty and we won't hit the BUG in teh callback */ 145 /* ideally the idr is empty and we won't hit the BUG in teh callback */
116 idr_for_each(&group->inotify_data.idr, idr_callback, NULL); 146 idr_for_each(&group->inotify_data.idr, idr_callback, group);
117 idr_remove_all(&group->inotify_data.idr); 147 idr_remove_all(&group->inotify_data.idr);
118 idr_destroy(&group->inotify_data.idr); 148 idr_destroy(&group->inotify_data.idr);
119} 149}
diff --git a/fs/notify/inotify/inotify_user.c b/fs/notify/inotify/inotify_user.c
index f30d9bbc2e1b..0e781bc88d1e 100644
--- a/fs/notify/inotify/inotify_user.c
+++ b/fs/notify/inotify/inotify_user.c
@@ -47,9 +47,6 @@
47 47
48static struct vfsmount *inotify_mnt __read_mostly; 48static struct vfsmount *inotify_mnt __read_mostly;
49 49
50/* this just sits here and wastes global memory. used to just pad userspace messages with zeros */
51static struct inotify_event nul_inotify_event;
52
53/* these are configurable via /proc/sys/fs/inotify/ */ 50/* these are configurable via /proc/sys/fs/inotify/ */
54static int inotify_max_user_instances __read_mostly; 51static int inotify_max_user_instances __read_mostly;
55static int inotify_max_queued_events __read_mostly; 52static int inotify_max_queued_events __read_mostly;
@@ -199,8 +196,10 @@ static ssize_t copy_event_to_user(struct fsnotify_group *group,
199 inotify_free_event_priv(fsn_priv); 196 inotify_free_event_priv(fsn_priv);
200 } 197 }
201 198
202 /* round up event->name_len so it is a multiple of event_size */ 199 /* round up event->name_len so it is a multiple of event_size
203 name_len = roundup(event->name_len, event_size); 200 * plus an extra byte for the terminating '\0'.
201 */
202 name_len = roundup(event->name_len + 1, event_size);
204 inotify_event.len = name_len; 203 inotify_event.len = name_len;
205 204
206 inotify_event.mask = inotify_mask_to_arg(event->mask); 205 inotify_event.mask = inotify_mask_to_arg(event->mask);
@@ -224,8 +223,8 @@ static ssize_t copy_event_to_user(struct fsnotify_group *group,
224 return -EFAULT; 223 return -EFAULT;
225 buf += event->name_len; 224 buf += event->name_len;
226 225
227 /* fill userspace with 0's from nul_inotify_event */ 226 /* fill userspace with 0's */
228 if (copy_to_user(buf, &nul_inotify_event, len_to_zero)) 227 if (clear_user(buf, len_to_zero))
229 return -EFAULT; 228 return -EFAULT;
230 buf += len_to_zero; 229 buf += len_to_zero;
231 event_size += name_len; 230 event_size += name_len;
@@ -364,20 +363,53 @@ static int inotify_find_inode(const char __user *dirname, struct path *path, uns
364 return error; 363 return error;
365} 364}
366 365
366/*
367 * Remove the mark from the idr (if present) and drop the reference
368 * on the mark because it was in the idr.
369 */
367static void inotify_remove_from_idr(struct fsnotify_group *group, 370static void inotify_remove_from_idr(struct fsnotify_group *group,
368 struct inotify_inode_mark_entry *ientry) 371 struct inotify_inode_mark_entry *ientry)
369{ 372{
370 struct idr *idr; 373 struct idr *idr;
374 struct fsnotify_mark_entry *entry;
375 struct inotify_inode_mark_entry *found_ientry;
376 int wd;
371 377
372 spin_lock(&group->inotify_data.idr_lock); 378 spin_lock(&group->inotify_data.idr_lock);
373 idr = &group->inotify_data.idr; 379 idr = &group->inotify_data.idr;
374 idr_remove(idr, ientry->wd); 380 wd = ientry->wd;
375 spin_unlock(&group->inotify_data.idr_lock); 381
382 if (wd == -1)
383 goto out;
384
385 entry = idr_find(&group->inotify_data.idr, wd);
386 if (unlikely(!entry))
387 goto out;
388
389 found_ientry = container_of(entry, struct inotify_inode_mark_entry, fsn_entry);
390 if (unlikely(found_ientry != ientry)) {
391 /* We found an entry in the idr with the right wd, but it's
392 * not the entry we were told to remove. eparis seriously
393 * fucked up somewhere. */
394 WARN_ON(1);
395 ientry->wd = -1;
396 goto out;
397 }
398
399 /* One ref for being in the idr, one ref held by the caller */
400 BUG_ON(atomic_read(&entry->refcnt) < 2);
401
402 idr_remove(idr, wd);
376 ientry->wd = -1; 403 ientry->wd = -1;
404
405 /* removed from the idr, drop that ref */
406 fsnotify_put_mark(entry);
407out:
408 spin_unlock(&group->inotify_data.idr_lock);
377} 409}
410
378/* 411/*
379 * Send IN_IGNORED for this wd, remove this wd from the idr, and drop the 412 * Send IN_IGNORED for this wd, remove this wd from the idr.
380 * internal reference help on the mark because it is in the idr.
381 */ 413 */
382void inotify_ignored_and_remove_idr(struct fsnotify_mark_entry *entry, 414void inotify_ignored_and_remove_idr(struct fsnotify_mark_entry *entry,
383 struct fsnotify_group *group) 415 struct fsnotify_group *group)
@@ -386,6 +418,7 @@ void inotify_ignored_and_remove_idr(struct fsnotify_mark_entry *entry,
386 struct fsnotify_event *ignored_event; 418 struct fsnotify_event *ignored_event;
387 struct inotify_event_private_data *event_priv; 419 struct inotify_event_private_data *event_priv;
388 struct fsnotify_event_private_data *fsn_event_priv; 420 struct fsnotify_event_private_data *fsn_event_priv;
421 int ret;
389 422
390 ignored_event = fsnotify_create_event(NULL, FS_IN_IGNORED, NULL, 423 ignored_event = fsnotify_create_event(NULL, FS_IN_IGNORED, NULL,
391 FSNOTIFY_EVENT_NONE, NULL, 0, 424 FSNOTIFY_EVENT_NONE, NULL, 0,
@@ -404,10 +437,8 @@ void inotify_ignored_and_remove_idr(struct fsnotify_mark_entry *entry,
404 fsn_event_priv->group = group; 437 fsn_event_priv->group = group;
405 event_priv->wd = ientry->wd; 438 event_priv->wd = ientry->wd;
406 439
407 fsnotify_add_notify_event(group, ignored_event, fsn_event_priv); 440 ret = fsnotify_add_notify_event(group, ignored_event, fsn_event_priv);
408 441 if (ret)
409 /* did the private data get added? */
410 if (list_empty(&fsn_event_priv->event_list))
411 inotify_free_event_priv(fsn_event_priv); 442 inotify_free_event_priv(fsn_event_priv);
412 443
413skip_send_ignore: 444skip_send_ignore:
@@ -418,9 +449,6 @@ skip_send_ignore:
418 /* remove this entry from the idr */ 449 /* remove this entry from the idr */
419 inotify_remove_from_idr(group, ientry); 450 inotify_remove_from_idr(group, ientry);
420 451
421 /* removed from idr, drop that reference */
422 fsnotify_put_mark(entry);
423
424 atomic_dec(&group->inotify_data.user->inotify_watches); 452 atomic_dec(&group->inotify_data.user->inotify_watches);
425} 453}
426 454
@@ -432,80 +460,29 @@ static void inotify_free_mark(struct fsnotify_mark_entry *entry)
432 kmem_cache_free(inotify_inode_mark_cachep, ientry); 460 kmem_cache_free(inotify_inode_mark_cachep, ientry);
433} 461}
434 462
435static int inotify_update_watch(struct fsnotify_group *group, struct inode *inode, u32 arg) 463static int inotify_update_existing_watch(struct fsnotify_group *group,
464 struct inode *inode,
465 u32 arg)
436{ 466{
437 struct fsnotify_mark_entry *entry = NULL; 467 struct fsnotify_mark_entry *entry;
438 struct inotify_inode_mark_entry *ientry; 468 struct inotify_inode_mark_entry *ientry;
439 struct inotify_inode_mark_entry *tmp_ientry;
440 int ret = 0;
441 int add = (arg & IN_MASK_ADD);
442 __u32 mask;
443 __u32 old_mask, new_mask; 469 __u32 old_mask, new_mask;
470 __u32 mask;
471 int add = (arg & IN_MASK_ADD);
472 int ret;
444 473
445 /* don't allow invalid bits: we don't want flags set */ 474 /* don't allow invalid bits: we don't want flags set */
446 mask = inotify_arg_to_mask(arg); 475 mask = inotify_arg_to_mask(arg);
447 if (unlikely(!mask)) 476 if (unlikely(!mask))
448 return -EINVAL; 477 return -EINVAL;
449 478
450 tmp_ientry = kmem_cache_alloc(inotify_inode_mark_cachep, GFP_KERNEL);
451 if (unlikely(!tmp_ientry))
452 return -ENOMEM;
453 /* we set the mask at the end after attaching it */
454 fsnotify_init_mark(&tmp_ientry->fsn_entry, inotify_free_mark);
455 tmp_ientry->wd = -1;
456
457find_entry:
458 spin_lock(&inode->i_lock); 479 spin_lock(&inode->i_lock);
459 entry = fsnotify_find_mark_entry(group, inode); 480 entry = fsnotify_find_mark_entry(group, inode);
460 spin_unlock(&inode->i_lock); 481 spin_unlock(&inode->i_lock);
461 if (entry) { 482 if (!entry)
462 ientry = container_of(entry, struct inotify_inode_mark_entry, fsn_entry); 483 return -ENOENT;
463 } else {
464 ret = -ENOSPC;
465 if (atomic_read(&group->inotify_data.user->inotify_watches) >= inotify_max_user_watches)
466 goto out_err;
467retry:
468 ret = -ENOMEM;
469 if (unlikely(!idr_pre_get(&group->inotify_data.idr, GFP_KERNEL)))
470 goto out_err;
471
472 spin_lock(&group->inotify_data.idr_lock);
473 ret = idr_get_new_above(&group->inotify_data.idr, &tmp_ientry->fsn_entry,
474 group->inotify_data.last_wd,
475 &tmp_ientry->wd);
476 spin_unlock(&group->inotify_data.idr_lock);
477 if (ret) {
478 if (ret == -EAGAIN)
479 goto retry;
480 goto out_err;
481 }
482 484
483 ret = fsnotify_add_mark(&tmp_ientry->fsn_entry, group, inode); 485 ientry = container_of(entry, struct inotify_inode_mark_entry, fsn_entry);
484 if (ret) {
485 inotify_remove_from_idr(group, tmp_ientry);
486 if (ret == -EEXIST)
487 goto find_entry;
488 goto out_err;
489 }
490
491 /* tmp_ientry has been added to the inode, so we are all set up.
492 * now we just need to make sure tmp_ientry doesn't get freed and
493 * we need to set up entry and ientry so the generic code can
494 * do its thing. */
495 ientry = tmp_ientry;
496 entry = &ientry->fsn_entry;
497 tmp_ientry = NULL;
498
499 atomic_inc(&group->inotify_data.user->inotify_watches);
500
501 /* update the idr hint */
502 group->inotify_data.last_wd = ientry->wd;
503
504 /* we put the mark on the idr, take a reference */
505 fsnotify_get_mark(entry);
506 }
507
508 ret = ientry->wd;
509 486
510 spin_lock(&entry->lock); 487 spin_lock(&entry->lock);
511 488
@@ -537,18 +514,103 @@ retry:
537 fsnotify_recalc_group_mask(group); 514 fsnotify_recalc_group_mask(group);
538 } 515 }
539 516
540 /* this either matches fsnotify_find_mark_entry, or init_mark_entry 517 /* return the wd */
541 * depending on which path we took... */ 518 ret = ientry->wd;
519
520 /* match the get from fsnotify_find_mark_entry() */
542 fsnotify_put_mark(entry); 521 fsnotify_put_mark(entry);
543 522
523 return ret;
524}
525
526static int inotify_new_watch(struct fsnotify_group *group,
527 struct inode *inode,
528 u32 arg)
529{
530 struct inotify_inode_mark_entry *tmp_ientry;
531 __u32 mask;
532 int ret;
533
534 /* don't allow invalid bits: we don't want flags set */
535 mask = inotify_arg_to_mask(arg);
536 if (unlikely(!mask))
537 return -EINVAL;
538
539 tmp_ientry = kmem_cache_alloc(inotify_inode_mark_cachep, GFP_KERNEL);
540 if (unlikely(!tmp_ientry))
541 return -ENOMEM;
542
543 fsnotify_init_mark(&tmp_ientry->fsn_entry, inotify_free_mark);
544 tmp_ientry->fsn_entry.mask = mask;
545 tmp_ientry->wd = -1;
546
547 ret = -ENOSPC;
548 if (atomic_read(&group->inotify_data.user->inotify_watches) >= inotify_max_user_watches)
549 goto out_err;
550retry:
551 ret = -ENOMEM;
552 if (unlikely(!idr_pre_get(&group->inotify_data.idr, GFP_KERNEL)))
553 goto out_err;
554
555 spin_lock(&group->inotify_data.idr_lock);
556 ret = idr_get_new_above(&group->inotify_data.idr, &tmp_ientry->fsn_entry,
557 group->inotify_data.last_wd,
558 &tmp_ientry->wd);
559 spin_unlock(&group->inotify_data.idr_lock);
560 if (ret) {
561 /* idr was out of memory allocate and try again */
562 if (ret == -EAGAIN)
563 goto retry;
564 goto out_err;
565 }
566
567 /* we put the mark on the idr, take a reference */
568 fsnotify_get_mark(&tmp_ientry->fsn_entry);
569
570 /* we are on the idr, now get on the inode */
571 ret = fsnotify_add_mark(&tmp_ientry->fsn_entry, group, inode);
572 if (ret) {
573 /* we failed to get on the inode, get off the idr */
574 inotify_remove_from_idr(group, tmp_ientry);
575 goto out_err;
576 }
577
578 /* update the idr hint, who cares about races, it's just a hint */
579 group->inotify_data.last_wd = tmp_ientry->wd;
580
581 /* increment the number of watches the user has */
582 atomic_inc(&group->inotify_data.user->inotify_watches);
583
584 /* return the watch descriptor for this new entry */
585 ret = tmp_ientry->wd;
586
587 /* match the ref from fsnotify_init_markentry() */
588 fsnotify_put_mark(&tmp_ientry->fsn_entry);
589
544out_err: 590out_err:
545 /* could be an error, could be that we found an existing mark */ 591 if (ret < 0)
546 if (tmp_ientry) {
547 /* on the idr but didn't make it on the inode */
548 if (tmp_ientry->wd != -1)
549 inotify_remove_from_idr(group, tmp_ientry);
550 kmem_cache_free(inotify_inode_mark_cachep, tmp_ientry); 592 kmem_cache_free(inotify_inode_mark_cachep, tmp_ientry);
551 } 593
594 return ret;
595}
596
597static int inotify_update_watch(struct fsnotify_group *group, struct inode *inode, u32 arg)
598{
599 int ret = 0;
600
601retry:
602 /* try to update and existing watch with the new arg */
603 ret = inotify_update_existing_watch(group, inode, arg);
604 /* no mark present, try to add a new one */
605 if (ret == -ENOENT)
606 ret = inotify_new_watch(group, inode, arg);
607 /*
608 * inotify_new_watch could race with another thread which did an
609 * inotify_new_watch between the update_existing and the add watch
610 * here, go back and try to update an existing mark again.
611 */
612 if (ret == -EEXIST)
613 goto retry;
552 614
553 return ret; 615 return ret;
554} 616}
@@ -568,7 +630,7 @@ static struct fsnotify_group *inotify_new_group(struct user_struct *user, unsign
568 630
569 spin_lock_init(&group->inotify_data.idr_lock); 631 spin_lock_init(&group->inotify_data.idr_lock);
570 idr_init(&group->inotify_data.idr); 632 idr_init(&group->inotify_data.idr);
571 group->inotify_data.last_wd = 0; 633 group->inotify_data.last_wd = 1;
572 group->inotify_data.user = user; 634 group->inotify_data.user = user;
573 group->inotify_data.fa = NULL; 635 group->inotify_data.fa = NULL;
574 636
diff --git a/fs/notify/notification.c b/fs/notify/notification.c
index 521368574e97..3816d5750dd5 100644
--- a/fs/notify/notification.c
+++ b/fs/notify/notification.c
@@ -153,6 +153,10 @@ static bool event_compare(struct fsnotify_event *old, struct fsnotify_event *new
153 return true; 153 return true;
154 break; 154 break;
155 case (FSNOTIFY_EVENT_NONE): 155 case (FSNOTIFY_EVENT_NONE):
156 if (old->mask & FS_Q_OVERFLOW)
157 return true;
158 else if (old->mask & FS_IN_IGNORED)
159 return false;
156 return false; 160 return false;
157 }; 161 };
158 } 162 }
@@ -171,9 +175,7 @@ int fsnotify_add_notify_event(struct fsnotify_group *group, struct fsnotify_even
171 struct list_head *list = &group->notification_list; 175 struct list_head *list = &group->notification_list;
172 struct fsnotify_event_holder *last_holder; 176 struct fsnotify_event_holder *last_holder;
173 struct fsnotify_event *last_event; 177 struct fsnotify_event *last_event;
174 178 int ret = 0;
175 /* easy to tell if priv was attached to the event */
176 INIT_LIST_HEAD(&priv->event_list);
177 179
178 /* 180 /*
179 * There is one fsnotify_event_holder embedded inside each fsnotify_event. 181 * There is one fsnotify_event_holder embedded inside each fsnotify_event.
@@ -194,6 +196,7 @@ alloc_holder:
194 196
195 if (group->q_len >= group->max_events) { 197 if (group->q_len >= group->max_events) {
196 event = &q_overflow_event; 198 event = &q_overflow_event;
199 ret = -EOVERFLOW;
197 /* sorry, no private data on the overflow event */ 200 /* sorry, no private data on the overflow event */
198 priv = NULL; 201 priv = NULL;
199 } 202 }
@@ -235,7 +238,7 @@ alloc_holder:
235 mutex_unlock(&group->notification_mutex); 238 mutex_unlock(&group->notification_mutex);
236 239
237 wake_up(&group->notification_waitq); 240 wake_up(&group->notification_waitq);
238 return 0; 241 return ret;
239} 242}
240 243
241/* 244/*
diff --git a/fs/ocfs2/alloc.c b/fs/ocfs2/alloc.c
index f9a3e8942669..ab513ddaeff2 100644
--- a/fs/ocfs2/alloc.c
+++ b/fs/ocfs2/alloc.c
@@ -6851,7 +6851,7 @@ static int ocfs2_do_truncate(struct ocfs2_super *osb,
6851 } 6851 }
6852 status = 0; 6852 status = 0;
6853bail: 6853bail:
6854 6854 brelse(last_eb_bh);
6855 mlog_exit(status); 6855 mlog_exit(status);
6856 return status; 6856 return status;
6857} 6857}
diff --git a/fs/ocfs2/dlm/dlmunlock.c b/fs/ocfs2/dlm/dlmunlock.c
index fcf879ed6930..756f5b0998e0 100644
--- a/fs/ocfs2/dlm/dlmunlock.c
+++ b/fs/ocfs2/dlm/dlmunlock.c
@@ -122,7 +122,7 @@ static enum dlm_status dlmunlock_common(struct dlm_ctxt *dlm,
122 * that still has AST's pending... */ 122 * that still has AST's pending... */
123 in_use = !list_empty(&lock->ast_list); 123 in_use = !list_empty(&lock->ast_list);
124 spin_unlock(&dlm->ast_lock); 124 spin_unlock(&dlm->ast_lock);
125 if (in_use) { 125 if (in_use && !(flags & LKM_CANCEL)) {
126 mlog(ML_ERROR, "lockres %.*s: Someone is calling dlmunlock " 126 mlog(ML_ERROR, "lockres %.*s: Someone is calling dlmunlock "
127 "while waiting for an ast!", res->lockname.len, 127 "while waiting for an ast!", res->lockname.len,
128 res->lockname.name); 128 res->lockname.name);
@@ -131,7 +131,7 @@ static enum dlm_status dlmunlock_common(struct dlm_ctxt *dlm,
131 131
132 spin_lock(&res->spinlock); 132 spin_lock(&res->spinlock);
133 if (res->state & DLM_LOCK_RES_IN_PROGRESS) { 133 if (res->state & DLM_LOCK_RES_IN_PROGRESS) {
134 if (master_node) { 134 if (master_node && !(flags & LKM_CANCEL)) {
135 mlog(ML_ERROR, "lockres in progress!\n"); 135 mlog(ML_ERROR, "lockres in progress!\n");
136 spin_unlock(&res->spinlock); 136 spin_unlock(&res->spinlock);
137 return DLM_FORWARD; 137 return DLM_FORWARD;
diff --git a/fs/ocfs2/ocfs2_lockid.h b/fs/ocfs2/ocfs2_lockid.h
index fcdba091af3d..c212cf5a2bdf 100644
--- a/fs/ocfs2/ocfs2_lockid.h
+++ b/fs/ocfs2/ocfs2_lockid.h
@@ -108,6 +108,7 @@ static char *ocfs2_lock_type_strings[] = {
108 [OCFS2_LOCK_TYPE_OPEN] = "Open", 108 [OCFS2_LOCK_TYPE_OPEN] = "Open",
109 [OCFS2_LOCK_TYPE_FLOCK] = "Flock", 109 [OCFS2_LOCK_TYPE_FLOCK] = "Flock",
110 [OCFS2_LOCK_TYPE_QINFO] = "Quota", 110 [OCFS2_LOCK_TYPE_QINFO] = "Quota",
111 [OCFS2_LOCK_TYPE_NFS_SYNC] = "NFSSync",
111 [OCFS2_LOCK_TYPE_ORPHAN_SCAN] = "OrphanScan", 112 [OCFS2_LOCK_TYPE_ORPHAN_SCAN] = "OrphanScan",
112}; 113};
113 114
diff --git a/fs/ocfs2/quota_global.c b/fs/ocfs2/quota_global.c
index bf7742d0ee3b..44f2a5e1d042 100644
--- a/fs/ocfs2/quota_global.c
+++ b/fs/ocfs2/quota_global.c
@@ -23,6 +23,7 @@
23#include "sysfile.h" 23#include "sysfile.h"
24#include "dlmglue.h" 24#include "dlmglue.h"
25#include "uptodate.h" 25#include "uptodate.h"
26#include "super.h"
26#include "quota.h" 27#include "quota.h"
27 28
28static struct workqueue_struct *ocfs2_quota_wq = NULL; 29static struct workqueue_struct *ocfs2_quota_wq = NULL;
@@ -114,6 +115,15 @@ int ocfs2_read_quota_block(struct inode *inode, u64 v_block,
114 int rc = 0; 115 int rc = 0;
115 struct buffer_head *tmp = *bh; 116 struct buffer_head *tmp = *bh;
116 117
118 if (i_size_read(inode) >> inode->i_sb->s_blocksize_bits <= v_block) {
119 ocfs2_error(inode->i_sb,
120 "Quota file %llu is probably corrupted! Requested "
121 "to read block %Lu but file has size only %Lu\n",
122 (unsigned long long)OCFS2_I(inode)->ip_blkno,
123 (unsigned long long)v_block,
124 (unsigned long long)i_size_read(inode));
125 return -EIO;
126 }
117 rc = ocfs2_read_virt_blocks(inode, v_block, 1, &tmp, 0, 127 rc = ocfs2_read_virt_blocks(inode, v_block, 1, &tmp, 0,
118 ocfs2_validate_quota_block); 128 ocfs2_validate_quota_block);
119 if (rc) 129 if (rc)
diff --git a/fs/ocfs2/super.c b/fs/ocfs2/super.c
index b0ee0fdf799a..a3f8871d21fd 100644
--- a/fs/ocfs2/super.c
+++ b/fs/ocfs2/super.c
@@ -1218,13 +1218,17 @@ static void ocfs2_kill_sb(struct super_block *sb)
1218{ 1218{
1219 struct ocfs2_super *osb = OCFS2_SB(sb); 1219 struct ocfs2_super *osb = OCFS2_SB(sb);
1220 1220
1221 /* Failed mount? */
1222 if (!osb || atomic_read(&osb->vol_state) == VOLUME_DISABLED)
1223 goto out;
1224
1221 /* Prevent further queueing of inode drop events */ 1225 /* Prevent further queueing of inode drop events */
1222 spin_lock(&dentry_list_lock); 1226 spin_lock(&dentry_list_lock);
1223 ocfs2_set_osb_flag(osb, OCFS2_OSB_DROP_DENTRY_LOCK_IMMED); 1227 ocfs2_set_osb_flag(osb, OCFS2_OSB_DROP_DENTRY_LOCK_IMMED);
1224 spin_unlock(&dentry_list_lock); 1228 spin_unlock(&dentry_list_lock);
1225 /* Wait for work to finish and/or remove it */ 1229 /* Wait for work to finish and/or remove it */
1226 cancel_work_sync(&osb->dentry_lock_work); 1230 cancel_work_sync(&osb->dentry_lock_work);
1227 1231out:
1228 kill_block_super(sb); 1232 kill_block_super(sb);
1229} 1233}
1230 1234
diff --git a/fs/proc/base.c b/fs/proc/base.c
index 175db258942f..6f742f6658a9 100644
--- a/fs/proc/base.c
+++ b/fs/proc/base.c
@@ -1003,12 +1003,7 @@ static ssize_t oom_adjust_read(struct file *file, char __user *buf,
1003 1003
1004 if (!task) 1004 if (!task)
1005 return -ESRCH; 1005 return -ESRCH;
1006 task_lock(task); 1006 oom_adjust = task->oomkilladj;
1007 if (task->mm)
1008 oom_adjust = task->mm->oom_adj;
1009 else
1010 oom_adjust = OOM_DISABLE;
1011 task_unlock(task);
1012 put_task_struct(task); 1007 put_task_struct(task);
1013 1008
1014 len = snprintf(buffer, sizeof(buffer), "%i\n", oom_adjust); 1009 len = snprintf(buffer, sizeof(buffer), "%i\n", oom_adjust);
@@ -1037,19 +1032,11 @@ static ssize_t oom_adjust_write(struct file *file, const char __user *buf,
1037 task = get_proc_task(file->f_path.dentry->d_inode); 1032 task = get_proc_task(file->f_path.dentry->d_inode);
1038 if (!task) 1033 if (!task)
1039 return -ESRCH; 1034 return -ESRCH;
1040 task_lock(task); 1035 if (oom_adjust < task->oomkilladj && !capable(CAP_SYS_RESOURCE)) {
1041 if (!task->mm) {
1042 task_unlock(task);
1043 put_task_struct(task);
1044 return -EINVAL;
1045 }
1046 if (oom_adjust < task->mm->oom_adj && !capable(CAP_SYS_RESOURCE)) {
1047 task_unlock(task);
1048 put_task_struct(task); 1036 put_task_struct(task);
1049 return -EACCES; 1037 return -EACCES;
1050 } 1038 }
1051 task->mm->oom_adj = oom_adjust; 1039 task->oomkilladj = oom_adjust;
1052 task_unlock(task);
1053 put_task_struct(task); 1040 put_task_struct(task);
1054 if (end - buffer == 0) 1041 if (end - buffer == 0)
1055 return -EIO; 1042 return -EIO;
diff --git a/fs/select.c b/fs/select.c
index d870237e42c7..8084834e123e 100644
--- a/fs/select.c
+++ b/fs/select.c
@@ -110,6 +110,7 @@ void poll_initwait(struct poll_wqueues *pwq)
110{ 110{
111 init_poll_funcptr(&pwq->pt, __pollwait); 111 init_poll_funcptr(&pwq->pt, __pollwait);
112 pwq->polling_task = current; 112 pwq->polling_task = current;
113 pwq->triggered = 0;
113 pwq->error = 0; 114 pwq->error = 0;
114 pwq->table = NULL; 115 pwq->table = NULL;
115 pwq->inline_index = 0; 116 pwq->inline_index = 0;
diff --git a/fs/xfs/linux-2.6/xfs_sync.c b/fs/xfs/linux-2.6/xfs_sync.c
index b619d6b8ca43..98ef624d9baf 100644
--- a/fs/xfs/linux-2.6/xfs_sync.c
+++ b/fs/xfs/linux-2.6/xfs_sync.c
@@ -708,6 +708,16 @@ xfs_reclaim_inode(
708 return 0; 708 return 0;
709} 709}
710 710
711void
712__xfs_inode_set_reclaim_tag(
713 struct xfs_perag *pag,
714 struct xfs_inode *ip)
715{
716 radix_tree_tag_set(&pag->pag_ici_root,
717 XFS_INO_TO_AGINO(ip->i_mount, ip->i_ino),
718 XFS_ICI_RECLAIM_TAG);
719}
720
711/* 721/*
712 * We set the inode flag atomically with the radix tree tag. 722 * We set the inode flag atomically with the radix tree tag.
713 * Once we get tag lookups on the radix tree, this inode flag 723 * Once we get tag lookups on the radix tree, this inode flag
@@ -722,8 +732,7 @@ xfs_inode_set_reclaim_tag(
722 732
723 read_lock(&pag->pag_ici_lock); 733 read_lock(&pag->pag_ici_lock);
724 spin_lock(&ip->i_flags_lock); 734 spin_lock(&ip->i_flags_lock);
725 radix_tree_tag_set(&pag->pag_ici_root, 735 __xfs_inode_set_reclaim_tag(pag, ip);
726 XFS_INO_TO_AGINO(mp, ip->i_ino), XFS_ICI_RECLAIM_TAG);
727 __xfs_iflags_set(ip, XFS_IRECLAIMABLE); 736 __xfs_iflags_set(ip, XFS_IRECLAIMABLE);
728 spin_unlock(&ip->i_flags_lock); 737 spin_unlock(&ip->i_flags_lock);
729 read_unlock(&pag->pag_ici_lock); 738 read_unlock(&pag->pag_ici_lock);
diff --git a/fs/xfs/linux-2.6/xfs_sync.h b/fs/xfs/linux-2.6/xfs_sync.h
index 2a10301c99c7..59120602588a 100644
--- a/fs/xfs/linux-2.6/xfs_sync.h
+++ b/fs/xfs/linux-2.6/xfs_sync.h
@@ -48,6 +48,7 @@ int xfs_reclaim_inode(struct xfs_inode *ip, int locked, int sync_mode);
48int xfs_reclaim_inodes(struct xfs_mount *mp, int mode); 48int xfs_reclaim_inodes(struct xfs_mount *mp, int mode);
49 49
50void xfs_inode_set_reclaim_tag(struct xfs_inode *ip); 50void xfs_inode_set_reclaim_tag(struct xfs_inode *ip);
51void __xfs_inode_set_reclaim_tag(struct xfs_perag *pag, struct xfs_inode *ip);
51void xfs_inode_clear_reclaim_tag(struct xfs_inode *ip); 52void xfs_inode_clear_reclaim_tag(struct xfs_inode *ip);
52void __xfs_inode_clear_reclaim_tag(struct xfs_mount *mp, struct xfs_perag *pag, 53void __xfs_inode_clear_reclaim_tag(struct xfs_mount *mp, struct xfs_perag *pag,
53 struct xfs_inode *ip); 54 struct xfs_inode *ip);
diff --git a/fs/xfs/xfs_iget.c b/fs/xfs/xfs_iget.c
index 34ec86923f7e..ecbf8b4d2e2e 100644
--- a/fs/xfs/xfs_iget.c
+++ b/fs/xfs/xfs_iget.c
@@ -191,80 +191,82 @@ xfs_iget_cache_hit(
191 int flags, 191 int flags,
192 int lock_flags) __releases(pag->pag_ici_lock) 192 int lock_flags) __releases(pag->pag_ici_lock)
193{ 193{
194 struct inode *inode = VFS_I(ip);
194 struct xfs_mount *mp = ip->i_mount; 195 struct xfs_mount *mp = ip->i_mount;
195 int error = EAGAIN; 196 int error;
197
198 spin_lock(&ip->i_flags_lock);
196 199
197 /* 200 /*
198 * If INEW is set this inode is being set up 201 * If we are racing with another cache hit that is currently
199 * If IRECLAIM is set this inode is being torn down 202 * instantiating this inode or currently recycling it out of
200 * Pause and try again. 203 * reclaimabe state, wait for the initialisation to complete
204 * before continuing.
205 *
206 * XXX(hch): eventually we should do something equivalent to
207 * wait_on_inode to wait for these flags to be cleared
208 * instead of polling for it.
201 */ 209 */
202 if (xfs_iflags_test(ip, (XFS_INEW|XFS_IRECLAIM))) { 210 if (ip->i_flags & (XFS_INEW|XFS_IRECLAIM)) {
203 XFS_STATS_INC(xs_ig_frecycle); 211 XFS_STATS_INC(xs_ig_frecycle);
212 error = EAGAIN;
204 goto out_error; 213 goto out_error;
205 } 214 }
206 215
207 /* If IRECLAIMABLE is set, we've torn down the vfs inode part */ 216 /*
208 if (xfs_iflags_test(ip, XFS_IRECLAIMABLE)) { 217 * If lookup is racing with unlink return an error immediately.
209 218 */
210 /* 219 if (ip->i_d.di_mode == 0 && !(flags & XFS_IGET_CREATE)) {
211 * If lookup is racing with unlink, then we should return an 220 error = ENOENT;
212 * error immediately so we don't remove it from the reclaim 221 goto out_error;
213 * list and potentially leak the inode. 222 }
214 */
215 if ((ip->i_d.di_mode == 0) && !(flags & XFS_IGET_CREATE)) {
216 error = ENOENT;
217 goto out_error;
218 }
219 223
224 /*
225 * If IRECLAIMABLE is set, we've torn down the VFS inode already.
226 * Need to carefully get it back into useable state.
227 */
228 if (ip->i_flags & XFS_IRECLAIMABLE) {
220 xfs_itrace_exit_tag(ip, "xfs_iget.alloc"); 229 xfs_itrace_exit_tag(ip, "xfs_iget.alloc");
221 230
222 /* 231 /*
223 * We need to re-initialise the VFS inode as it has been 232 * We need to set XFS_INEW atomically with clearing the
224 * 'freed' by the VFS. Do this here so we can deal with 233 * reclaimable tag so that we do have an indicator of the
225 * errors cleanly, then tag it so it can be set up correctly 234 * inode still being initialized.
226 * later.
227 */ 235 */
228 if (inode_init_always(mp->m_super, VFS_I(ip))) { 236 ip->i_flags |= XFS_INEW;
229 error = ENOMEM; 237 ip->i_flags &= ~XFS_IRECLAIMABLE;
230 goto out_error; 238 __xfs_inode_clear_reclaim_tag(mp, pag, ip);
231 }
232 239
233 /* 240 spin_unlock(&ip->i_flags_lock);
234 * We must set the XFS_INEW flag before clearing the 241 read_unlock(&pag->pag_ici_lock);
235 * XFS_IRECLAIMABLE flag so that if a racing lookup does
236 * not find the XFS_IRECLAIMABLE above but has the igrab()
237 * below succeed we can safely check XFS_INEW to detect
238 * that this inode is still being initialised.
239 */
240 xfs_iflags_set(ip, XFS_INEW);
241 xfs_iflags_clear(ip, XFS_IRECLAIMABLE);
242 242
243 /* clear the radix tree reclaim flag as well. */ 243 error = -inode_init_always(mp->m_super, inode);
244 __xfs_inode_clear_reclaim_tag(mp, pag, ip); 244 if (error) {
245 } else if (!igrab(VFS_I(ip))) { 245 /*
246 * Re-initializing the inode failed, and we are in deep
247 * trouble. Try to re-add it to the reclaim list.
248 */
249 read_lock(&pag->pag_ici_lock);
250 spin_lock(&ip->i_flags_lock);
251
252 ip->i_flags &= ~XFS_INEW;
253 ip->i_flags |= XFS_IRECLAIMABLE;
254 __xfs_inode_set_reclaim_tag(pag, ip);
255 goto out_error;
256 }
257 inode->i_state = I_LOCK|I_NEW;
258 } else {
246 /* If the VFS inode is being torn down, pause and try again. */ 259 /* If the VFS inode is being torn down, pause and try again. */
247 XFS_STATS_INC(xs_ig_frecycle); 260 if (!igrab(inode)) {
248 goto out_error; 261 error = EAGAIN;
249 } else if (xfs_iflags_test(ip, XFS_INEW)) { 262 goto out_error;
250 /* 263 }
251 * We are racing with another cache hit that is
252 * currently recycling this inode out of the XFS_IRECLAIMABLE
253 * state. Wait for the initialisation to complete before
254 * continuing.
255 */
256 wait_on_inode(VFS_I(ip));
257 }
258 264
259 if (ip->i_d.di_mode == 0 && !(flags & XFS_IGET_CREATE)) { 265 /* We've got a live one. */
260 error = ENOENT; 266 spin_unlock(&ip->i_flags_lock);
261 iput(VFS_I(ip)); 267 read_unlock(&pag->pag_ici_lock);
262 goto out_error;
263 } 268 }
264 269
265 /* We've got a live one. */
266 read_unlock(&pag->pag_ici_lock);
267
268 if (lock_flags != 0) 270 if (lock_flags != 0)
269 xfs_ilock(ip, lock_flags); 271 xfs_ilock(ip, lock_flags);
270 272
@@ -274,6 +276,7 @@ xfs_iget_cache_hit(
274 return 0; 276 return 0;
275 277
276out_error: 278out_error:
279 spin_unlock(&ip->i_flags_lock);
277 read_unlock(&pag->pag_ici_lock); 280 read_unlock(&pag->pag_ici_lock);
278 return error; 281 return error;
279} 282}