diff options
Diffstat (limited to 'fs')
156 files changed, 3393 insertions, 1614 deletions
diff --git a/fs/9p/fid.c b/fs/9p/fid.c index 358563689064..6406f896bf95 100644 --- a/fs/9p/fid.c +++ b/fs/9p/fid.c | |||
@@ -242,7 +242,8 @@ struct p9_fid *v9fs_fid_lookup(struct dentry *dentry) | |||
242 | } | 242 | } |
243 | kfree(wnames); | 243 | kfree(wnames); |
244 | fid_out: | 244 | fid_out: |
245 | v9fs_fid_add(dentry, fid); | 245 | if (!IS_ERR(fid)) |
246 | v9fs_fid_add(dentry, fid); | ||
246 | err_out: | 247 | err_out: |
247 | up_read(&v9ses->rename_sem); | 248 | up_read(&v9ses->rename_sem); |
248 | return fid; | 249 | return fid; |
diff --git a/fs/afs/cell.c b/fs/afs/cell.c index ffea35c63879..0d5eeadf6121 100644 --- a/fs/afs/cell.c +++ b/fs/afs/cell.c | |||
@@ -31,21 +31,20 @@ static struct afs_cell *afs_cell_root; | |||
31 | * allocate a cell record and fill in its name, VL server address list and | 31 | * allocate a cell record and fill in its name, VL server address list and |
32 | * allocate an anonymous key | 32 | * allocate an anonymous key |
33 | */ | 33 | */ |
34 | static struct afs_cell *afs_cell_alloc(const char *name, char *vllist) | 34 | static struct afs_cell *afs_cell_alloc(const char *name, unsigned namelen, |
35 | char *vllist) | ||
35 | { | 36 | { |
36 | struct afs_cell *cell; | 37 | struct afs_cell *cell; |
37 | struct key *key; | 38 | struct key *key; |
38 | size_t namelen; | ||
39 | char keyname[4 + AFS_MAXCELLNAME + 1], *cp, *dp, *next; | 39 | char keyname[4 + AFS_MAXCELLNAME + 1], *cp, *dp, *next; |
40 | char *dvllist = NULL, *_vllist = NULL; | 40 | char *dvllist = NULL, *_vllist = NULL; |
41 | char delimiter = ':'; | 41 | char delimiter = ':'; |
42 | int ret; | 42 | int ret; |
43 | 43 | ||
44 | _enter("%s,%s", name, vllist); | 44 | _enter("%*.*s,%s", namelen, namelen, name ?: "", vllist); |
45 | 45 | ||
46 | BUG_ON(!name); /* TODO: want to look up "this cell" in the cache */ | 46 | BUG_ON(!name); /* TODO: want to look up "this cell" in the cache */ |
47 | 47 | ||
48 | namelen = strlen(name); | ||
49 | if (namelen > AFS_MAXCELLNAME) { | 48 | if (namelen > AFS_MAXCELLNAME) { |
50 | _leave(" = -ENAMETOOLONG"); | 49 | _leave(" = -ENAMETOOLONG"); |
51 | return ERR_PTR(-ENAMETOOLONG); | 50 | return ERR_PTR(-ENAMETOOLONG); |
@@ -73,6 +72,10 @@ static struct afs_cell *afs_cell_alloc(const char *name, char *vllist) | |||
73 | if (!vllist || strlen(vllist) < 7) { | 72 | if (!vllist || strlen(vllist) < 7) { |
74 | ret = dns_query("afsdb", name, namelen, "ipv4", &dvllist, NULL); | 73 | ret = dns_query("afsdb", name, namelen, "ipv4", &dvllist, NULL); |
75 | if (ret < 0) { | 74 | if (ret < 0) { |
75 | if (ret == -ENODATA || ret == -EAGAIN || ret == -ENOKEY) | ||
76 | /* translate these errors into something | ||
77 | * userspace might understand */ | ||
78 | ret = -EDESTADDRREQ; | ||
76 | _leave(" = %d", ret); | 79 | _leave(" = %d", ret); |
77 | return ERR_PTR(ret); | 80 | return ERR_PTR(ret); |
78 | } | 81 | } |
@@ -138,26 +141,29 @@ error: | |||
138 | } | 141 | } |
139 | 142 | ||
140 | /* | 143 | /* |
141 | * create a cell record | 144 | * afs_cell_crate() - create a cell record |
142 | * - "name" is the name of the cell | 145 | * @name: is the name of the cell. |
143 | * - "vllist" is a colon separated list of IP addresses in "a.b.c.d" format | 146 | * @namsesz: is the strlen of the cell name. |
147 | * @vllist: is a colon separated list of IP addresses in "a.b.c.d" format. | ||
148 | * @retref: is T to return the cell reference when the cell exists. | ||
144 | */ | 149 | */ |
145 | struct afs_cell *afs_cell_create(const char *name, char *vllist) | 150 | struct afs_cell *afs_cell_create(const char *name, unsigned namesz, |
151 | char *vllist, bool retref) | ||
146 | { | 152 | { |
147 | struct afs_cell *cell; | 153 | struct afs_cell *cell; |
148 | int ret; | 154 | int ret; |
149 | 155 | ||
150 | _enter("%s,%s", name, vllist); | 156 | _enter("%*.*s,%s", namesz, namesz, name ?: "", vllist); |
151 | 157 | ||
152 | down_write(&afs_cells_sem); | 158 | down_write(&afs_cells_sem); |
153 | read_lock(&afs_cells_lock); | 159 | read_lock(&afs_cells_lock); |
154 | list_for_each_entry(cell, &afs_cells, link) { | 160 | list_for_each_entry(cell, &afs_cells, link) { |
155 | if (strcasecmp(cell->name, name) == 0) | 161 | if (strncasecmp(cell->name, name, namesz) == 0) |
156 | goto duplicate_name; | 162 | goto duplicate_name; |
157 | } | 163 | } |
158 | read_unlock(&afs_cells_lock); | 164 | read_unlock(&afs_cells_lock); |
159 | 165 | ||
160 | cell = afs_cell_alloc(name, vllist); | 166 | cell = afs_cell_alloc(name, namesz, vllist); |
161 | if (IS_ERR(cell)) { | 167 | if (IS_ERR(cell)) { |
162 | _leave(" = %ld", PTR_ERR(cell)); | 168 | _leave(" = %ld", PTR_ERR(cell)); |
163 | up_write(&afs_cells_sem); | 169 | up_write(&afs_cells_sem); |
@@ -197,8 +203,18 @@ error: | |||
197 | return ERR_PTR(ret); | 203 | return ERR_PTR(ret); |
198 | 204 | ||
199 | duplicate_name: | 205 | duplicate_name: |
206 | if (retref && !IS_ERR(cell)) | ||
207 | afs_get_cell(cell); | ||
208 | |||
200 | read_unlock(&afs_cells_lock); | 209 | read_unlock(&afs_cells_lock); |
201 | up_write(&afs_cells_sem); | 210 | up_write(&afs_cells_sem); |
211 | |||
212 | if (retref) { | ||
213 | _leave(" = %p", cell); | ||
214 | return cell; | ||
215 | } | ||
216 | |||
217 | _leave(" = -EEXIST"); | ||
202 | return ERR_PTR(-EEXIST); | 218 | return ERR_PTR(-EEXIST); |
203 | } | 219 | } |
204 | 220 | ||
@@ -229,7 +245,7 @@ int afs_cell_init(char *rootcell) | |||
229 | *cp++ = 0; | 245 | *cp++ = 0; |
230 | 246 | ||
231 | /* allocate a cell record for the root cell */ | 247 | /* allocate a cell record for the root cell */ |
232 | new_root = afs_cell_create(rootcell, cp); | 248 | new_root = afs_cell_create(rootcell, strlen(rootcell), cp, false); |
233 | if (IS_ERR(new_root)) { | 249 | if (IS_ERR(new_root)) { |
234 | _leave(" = %ld", PTR_ERR(new_root)); | 250 | _leave(" = %ld", PTR_ERR(new_root)); |
235 | return PTR_ERR(new_root); | 251 | return PTR_ERR(new_root); |
@@ -249,11 +265,12 @@ int afs_cell_init(char *rootcell) | |||
249 | /* | 265 | /* |
250 | * lookup a cell record | 266 | * lookup a cell record |
251 | */ | 267 | */ |
252 | struct afs_cell *afs_cell_lookup(const char *name, unsigned namesz) | 268 | struct afs_cell *afs_cell_lookup(const char *name, unsigned namesz, |
269 | bool dns_cell) | ||
253 | { | 270 | { |
254 | struct afs_cell *cell; | 271 | struct afs_cell *cell; |
255 | 272 | ||
256 | _enter("\"%*.*s\",", namesz, namesz, name ? name : ""); | 273 | _enter("\"%*.*s\",", namesz, namesz, name ?: ""); |
257 | 274 | ||
258 | down_read(&afs_cells_sem); | 275 | down_read(&afs_cells_sem); |
259 | read_lock(&afs_cells_lock); | 276 | read_lock(&afs_cells_lock); |
@@ -267,6 +284,8 @@ struct afs_cell *afs_cell_lookup(const char *name, unsigned namesz) | |||
267 | } | 284 | } |
268 | } | 285 | } |
269 | cell = ERR_PTR(-ENOENT); | 286 | cell = ERR_PTR(-ENOENT); |
287 | if (dns_cell) | ||
288 | goto create_cell; | ||
270 | found: | 289 | found: |
271 | ; | 290 | ; |
272 | } else { | 291 | } else { |
@@ -289,6 +308,15 @@ struct afs_cell *afs_cell_lookup(const char *name, unsigned namesz) | |||
289 | up_read(&afs_cells_sem); | 308 | up_read(&afs_cells_sem); |
290 | _leave(" = %p", cell); | 309 | _leave(" = %p", cell); |
291 | return cell; | 310 | return cell; |
311 | |||
312 | create_cell: | ||
313 | read_unlock(&afs_cells_lock); | ||
314 | up_read(&afs_cells_sem); | ||
315 | |||
316 | cell = afs_cell_create(name, namesz, NULL, true); | ||
317 | |||
318 | _leave(" = %p", cell); | ||
319 | return cell; | ||
292 | } | 320 | } |
293 | 321 | ||
294 | #if 0 | 322 | #if 0 |
diff --git a/fs/afs/dir.c b/fs/afs/dir.c index b42d5cc1d6d2..0d38c09bd55e 100644 --- a/fs/afs/dir.c +++ b/fs/afs/dir.c | |||
@@ -477,6 +477,40 @@ static int afs_do_lookup(struct inode *dir, struct dentry *dentry, | |||
477 | } | 477 | } |
478 | 478 | ||
479 | /* | 479 | /* |
480 | * Try to auto mount the mountpoint with pseudo directory, if the autocell | ||
481 | * operation is setted. | ||
482 | */ | ||
483 | static struct inode *afs_try_auto_mntpt( | ||
484 | int ret, struct dentry *dentry, struct inode *dir, struct key *key, | ||
485 | struct afs_fid *fid) | ||
486 | { | ||
487 | const char *devname = dentry->d_name.name; | ||
488 | struct afs_vnode *vnode = AFS_FS_I(dir); | ||
489 | struct inode *inode; | ||
490 | |||
491 | _enter("%d, %p{%s}, {%x:%u}, %p", | ||
492 | ret, dentry, devname, vnode->fid.vid, vnode->fid.vnode, key); | ||
493 | |||
494 | if (ret != -ENOENT || | ||
495 | !test_bit(AFS_VNODE_AUTOCELL, &vnode->flags)) | ||
496 | goto out; | ||
497 | |||
498 | inode = afs_iget_autocell(dir, devname, strlen(devname), key); | ||
499 | if (IS_ERR(inode)) { | ||
500 | ret = PTR_ERR(inode); | ||
501 | goto out; | ||
502 | } | ||
503 | |||
504 | *fid = AFS_FS_I(inode)->fid; | ||
505 | _leave("= %p", inode); | ||
506 | return inode; | ||
507 | |||
508 | out: | ||
509 | _leave("= %d", ret); | ||
510 | return ERR_PTR(ret); | ||
511 | } | ||
512 | |||
513 | /* | ||
480 | * look up an entry in a directory | 514 | * look up an entry in a directory |
481 | */ | 515 | */ |
482 | static struct dentry *afs_lookup(struct inode *dir, struct dentry *dentry, | 516 | static struct dentry *afs_lookup(struct inode *dir, struct dentry *dentry, |
@@ -520,6 +554,13 @@ static struct dentry *afs_lookup(struct inode *dir, struct dentry *dentry, | |||
520 | 554 | ||
521 | ret = afs_do_lookup(dir, dentry, &fid, key); | 555 | ret = afs_do_lookup(dir, dentry, &fid, key); |
522 | if (ret < 0) { | 556 | if (ret < 0) { |
557 | inode = afs_try_auto_mntpt(ret, dentry, dir, key, &fid); | ||
558 | if (!IS_ERR(inode)) { | ||
559 | key_put(key); | ||
560 | goto success; | ||
561 | } | ||
562 | |||
563 | ret = PTR_ERR(inode); | ||
523 | key_put(key); | 564 | key_put(key); |
524 | if (ret == -ENOENT) { | 565 | if (ret == -ENOENT) { |
525 | d_add(dentry, NULL); | 566 | d_add(dentry, NULL); |
@@ -539,6 +580,7 @@ static struct dentry *afs_lookup(struct inode *dir, struct dentry *dentry, | |||
539 | return ERR_CAST(inode); | 580 | return ERR_CAST(inode); |
540 | } | 581 | } |
541 | 582 | ||
583 | success: | ||
542 | dentry->d_op = &afs_fs_dentry_operations; | 584 | dentry->d_op = &afs_fs_dentry_operations; |
543 | 585 | ||
544 | d_add(dentry, inode); | 586 | d_add(dentry, inode); |
@@ -696,8 +738,9 @@ static int afs_d_delete(struct dentry *dentry) | |||
696 | goto zap; | 738 | goto zap; |
697 | 739 | ||
698 | if (dentry->d_inode && | 740 | if (dentry->d_inode && |
699 | test_bit(AFS_VNODE_DELETED, &AFS_FS_I(dentry->d_inode)->flags)) | 741 | (test_bit(AFS_VNODE_DELETED, &AFS_FS_I(dentry->d_inode)->flags) || |
700 | goto zap; | 742 | test_bit(AFS_VNODE_PSEUDODIR, &AFS_FS_I(dentry->d_inode)->flags))) |
743 | goto zap; | ||
701 | 744 | ||
702 | _leave(" = 0 [keep]"); | 745 | _leave(" = 0 [keep]"); |
703 | return 0; | 746 | return 0; |
diff --git a/fs/afs/inode.c b/fs/afs/inode.c index 320ffef11574..0747339011c3 100644 --- a/fs/afs/inode.c +++ b/fs/afs/inode.c | |||
@@ -19,6 +19,8 @@ | |||
19 | #include <linux/fs.h> | 19 | #include <linux/fs.h> |
20 | #include <linux/pagemap.h> | 20 | #include <linux/pagemap.h> |
21 | #include <linux/sched.h> | 21 | #include <linux/sched.h> |
22 | #include <linux/mount.h> | ||
23 | #include <linux/namei.h> | ||
22 | #include "internal.h" | 24 | #include "internal.h" |
23 | 25 | ||
24 | struct afs_iget_data { | 26 | struct afs_iget_data { |
@@ -102,6 +104,16 @@ static int afs_iget5_test(struct inode *inode, void *opaque) | |||
102 | } | 104 | } |
103 | 105 | ||
104 | /* | 106 | /* |
107 | * iget5() comparator for inode created by autocell operations | ||
108 | * | ||
109 | * These pseudo inodes don't match anything. | ||
110 | */ | ||
111 | static int afs_iget5_autocell_test(struct inode *inode, void *opaque) | ||
112 | { | ||
113 | return 0; | ||
114 | } | ||
115 | |||
116 | /* | ||
105 | * iget5() inode initialiser | 117 | * iget5() inode initialiser |
106 | */ | 118 | */ |
107 | static int afs_iget5_set(struct inode *inode, void *opaque) | 119 | static int afs_iget5_set(struct inode *inode, void *opaque) |
@@ -118,6 +130,67 @@ static int afs_iget5_set(struct inode *inode, void *opaque) | |||
118 | } | 130 | } |
119 | 131 | ||
120 | /* | 132 | /* |
133 | * inode retrieval for autocell | ||
134 | */ | ||
135 | struct inode *afs_iget_autocell(struct inode *dir, const char *dev_name, | ||
136 | int namesz, struct key *key) | ||
137 | { | ||
138 | struct afs_iget_data data; | ||
139 | struct afs_super_info *as; | ||
140 | struct afs_vnode *vnode; | ||
141 | struct super_block *sb; | ||
142 | struct inode *inode; | ||
143 | static atomic_t afs_autocell_ino; | ||
144 | |||
145 | _enter("{%x:%u},%*.*s,", | ||
146 | AFS_FS_I(dir)->fid.vid, AFS_FS_I(dir)->fid.vnode, | ||
147 | namesz, namesz, dev_name ?: ""); | ||
148 | |||
149 | sb = dir->i_sb; | ||
150 | as = sb->s_fs_info; | ||
151 | data.volume = as->volume; | ||
152 | data.fid.vid = as->volume->vid; | ||
153 | data.fid.unique = 0; | ||
154 | data.fid.vnode = 0; | ||
155 | |||
156 | inode = iget5_locked(sb, atomic_inc_return(&afs_autocell_ino), | ||
157 | afs_iget5_autocell_test, afs_iget5_set, | ||
158 | &data); | ||
159 | if (!inode) { | ||
160 | _leave(" = -ENOMEM"); | ||
161 | return ERR_PTR(-ENOMEM); | ||
162 | } | ||
163 | |||
164 | _debug("GOT INODE %p { ino=%lu, vl=%x, vn=%x, u=%x }", | ||
165 | inode, inode->i_ino, data.fid.vid, data.fid.vnode, | ||
166 | data.fid.unique); | ||
167 | |||
168 | vnode = AFS_FS_I(inode); | ||
169 | |||
170 | /* there shouldn't be an existing inode */ | ||
171 | BUG_ON(!(inode->i_state & I_NEW)); | ||
172 | |||
173 | inode->i_size = 0; | ||
174 | inode->i_mode = S_IFDIR | S_IRUGO | S_IXUGO; | ||
175 | inode->i_op = &afs_autocell_inode_operations; | ||
176 | inode->i_nlink = 2; | ||
177 | inode->i_uid = 0; | ||
178 | inode->i_gid = 0; | ||
179 | inode->i_ctime.tv_sec = get_seconds(); | ||
180 | inode->i_ctime.tv_nsec = 0; | ||
181 | inode->i_atime = inode->i_mtime = inode->i_ctime; | ||
182 | inode->i_blocks = 0; | ||
183 | inode->i_version = 0; | ||
184 | inode->i_generation = 0; | ||
185 | |||
186 | set_bit(AFS_VNODE_PSEUDODIR, &vnode->flags); | ||
187 | inode->i_flags |= S_NOATIME; | ||
188 | unlock_new_inode(inode); | ||
189 | _leave(" = %p", inode); | ||
190 | return inode; | ||
191 | } | ||
192 | |||
193 | /* | ||
121 | * inode retrieval | 194 | * inode retrieval |
122 | */ | 195 | */ |
123 | struct inode *afs_iget(struct super_block *sb, struct key *key, | 196 | struct inode *afs_iget(struct super_block *sb, struct key *key, |
@@ -314,6 +387,19 @@ int afs_getattr(struct vfsmount *mnt, struct dentry *dentry, | |||
314 | } | 387 | } |
315 | 388 | ||
316 | /* | 389 | /* |
390 | * discard an AFS inode | ||
391 | */ | ||
392 | int afs_drop_inode(struct inode *inode) | ||
393 | { | ||
394 | _enter(""); | ||
395 | |||
396 | if (test_bit(AFS_VNODE_PSEUDODIR, &AFS_FS_I(inode)->flags)) | ||
397 | return generic_delete_inode(inode); | ||
398 | else | ||
399 | return generic_drop_inode(inode); | ||
400 | } | ||
401 | |||
402 | /* | ||
317 | * clear an AFS inode | 403 | * clear an AFS inode |
318 | */ | 404 | */ |
319 | void afs_evict_inode(struct inode *inode) | 405 | void afs_evict_inode(struct inode *inode) |
diff --git a/fs/afs/internal.h b/fs/afs/internal.h index 8679089ce9a1..cca8eef736fc 100644 --- a/fs/afs/internal.h +++ b/fs/afs/internal.h | |||
@@ -42,6 +42,7 @@ typedef enum { | |||
42 | struct afs_mount_params { | 42 | struct afs_mount_params { |
43 | bool rwpath; /* T if the parent should be considered R/W */ | 43 | bool rwpath; /* T if the parent should be considered R/W */ |
44 | bool force; /* T to force cell type */ | 44 | bool force; /* T to force cell type */ |
45 | bool autocell; /* T if set auto mount operation */ | ||
45 | afs_voltype_t type; /* type of volume requested */ | 46 | afs_voltype_t type; /* type of volume requested */ |
46 | int volnamesz; /* size of volume name */ | 47 | int volnamesz; /* size of volume name */ |
47 | const char *volname; /* name of volume to mount */ | 48 | const char *volname; /* name of volume to mount */ |
@@ -358,6 +359,8 @@ struct afs_vnode { | |||
358 | #define AFS_VNODE_READLOCKED 7 /* set if vnode is read-locked on the server */ | 359 | #define AFS_VNODE_READLOCKED 7 /* set if vnode is read-locked on the server */ |
359 | #define AFS_VNODE_WRITELOCKED 8 /* set if vnode is write-locked on the server */ | 360 | #define AFS_VNODE_WRITELOCKED 8 /* set if vnode is write-locked on the server */ |
360 | #define AFS_VNODE_UNLOCKING 9 /* set if vnode is being unlocked on the server */ | 361 | #define AFS_VNODE_UNLOCKING 9 /* set if vnode is being unlocked on the server */ |
362 | #define AFS_VNODE_AUTOCELL 10 /* set if Vnode is an auto mount point */ | ||
363 | #define AFS_VNODE_PSEUDODIR 11 /* set if Vnode is a pseudo directory */ | ||
361 | 364 | ||
362 | long acl_order; /* ACL check count (callback break count) */ | 365 | long acl_order; /* ACL check count (callback break count) */ |
363 | 366 | ||
@@ -468,8 +471,8 @@ extern struct list_head afs_proc_cells; | |||
468 | 471 | ||
469 | #define afs_get_cell(C) do { atomic_inc(&(C)->usage); } while(0) | 472 | #define afs_get_cell(C) do { atomic_inc(&(C)->usage); } while(0) |
470 | extern int afs_cell_init(char *); | 473 | extern int afs_cell_init(char *); |
471 | extern struct afs_cell *afs_cell_create(const char *, char *); | 474 | extern struct afs_cell *afs_cell_create(const char *, unsigned, char *, bool); |
472 | extern struct afs_cell *afs_cell_lookup(const char *, unsigned); | 475 | extern struct afs_cell *afs_cell_lookup(const char *, unsigned, bool); |
473 | extern struct afs_cell *afs_grab_cell(struct afs_cell *); | 476 | extern struct afs_cell *afs_grab_cell(struct afs_cell *); |
474 | extern void afs_put_cell(struct afs_cell *); | 477 | extern void afs_put_cell(struct afs_cell *); |
475 | extern void afs_cell_purge(void); | 478 | extern void afs_cell_purge(void); |
@@ -558,6 +561,8 @@ extern int afs_fs_release_lock(struct afs_server *, struct key *, | |||
558 | /* | 561 | /* |
559 | * inode.c | 562 | * inode.c |
560 | */ | 563 | */ |
564 | extern struct inode *afs_iget_autocell(struct inode *, const char *, int, | ||
565 | struct key *); | ||
561 | extern struct inode *afs_iget(struct super_block *, struct key *, | 566 | extern struct inode *afs_iget(struct super_block *, struct key *, |
562 | struct afs_fid *, struct afs_file_status *, | 567 | struct afs_fid *, struct afs_file_status *, |
563 | struct afs_callback *); | 568 | struct afs_callback *); |
@@ -566,6 +571,7 @@ extern int afs_validate(struct afs_vnode *, struct key *); | |||
566 | extern int afs_getattr(struct vfsmount *, struct dentry *, struct kstat *); | 571 | extern int afs_getattr(struct vfsmount *, struct dentry *, struct kstat *); |
567 | extern int afs_setattr(struct dentry *, struct iattr *); | 572 | extern int afs_setattr(struct dentry *, struct iattr *); |
568 | extern void afs_evict_inode(struct inode *); | 573 | extern void afs_evict_inode(struct inode *); |
574 | extern int afs_drop_inode(struct inode *); | ||
569 | 575 | ||
570 | /* | 576 | /* |
571 | * main.c | 577 | * main.c |
@@ -581,6 +587,7 @@ extern int afs_abort_to_error(u32); | |||
581 | * mntpt.c | 587 | * mntpt.c |
582 | */ | 588 | */ |
583 | extern const struct inode_operations afs_mntpt_inode_operations; | 589 | extern const struct inode_operations afs_mntpt_inode_operations; |
590 | extern const struct inode_operations afs_autocell_inode_operations; | ||
584 | extern const struct file_operations afs_mntpt_file_operations; | 591 | extern const struct file_operations afs_mntpt_file_operations; |
585 | 592 | ||
586 | extern int afs_mntpt_check_symlink(struct afs_vnode *, struct key *); | 593 | extern int afs_mntpt_check_symlink(struct afs_vnode *, struct key *); |
@@ -752,12 +759,6 @@ extern unsigned afs_debug; | |||
752 | #define dbgprintk(FMT,...) \ | 759 | #define dbgprintk(FMT,...) \ |
753 | printk("[%-6.6s] "FMT"\n", current->comm ,##__VA_ARGS__) | 760 | printk("[%-6.6s] "FMT"\n", current->comm ,##__VA_ARGS__) |
754 | 761 | ||
755 | /* make sure we maintain the format strings, even when debugging is disabled */ | ||
756 | static inline __attribute__((format(printf,1,2))) | ||
757 | void _dbprintk(const char *fmt, ...) | ||
758 | { | ||
759 | } | ||
760 | |||
761 | #define kenter(FMT,...) dbgprintk("==> %s("FMT")",__func__ ,##__VA_ARGS__) | 762 | #define kenter(FMT,...) dbgprintk("==> %s("FMT")",__func__ ,##__VA_ARGS__) |
762 | #define kleave(FMT,...) dbgprintk("<== %s()"FMT"",__func__ ,##__VA_ARGS__) | 763 | #define kleave(FMT,...) dbgprintk("<== %s()"FMT"",__func__ ,##__VA_ARGS__) |
763 | #define kdebug(FMT,...) dbgprintk(" "FMT ,##__VA_ARGS__) | 764 | #define kdebug(FMT,...) dbgprintk(" "FMT ,##__VA_ARGS__) |
@@ -792,9 +793,9 @@ do { \ | |||
792 | } while (0) | 793 | } while (0) |
793 | 794 | ||
794 | #else | 795 | #else |
795 | #define _enter(FMT,...) _dbprintk("==> %s("FMT")",__func__ ,##__VA_ARGS__) | 796 | #define _enter(FMT,...) no_printk("==> %s("FMT")",__func__ ,##__VA_ARGS__) |
796 | #define _leave(FMT,...) _dbprintk("<== %s()"FMT"",__func__ ,##__VA_ARGS__) | 797 | #define _leave(FMT,...) no_printk("<== %s()"FMT"",__func__ ,##__VA_ARGS__) |
797 | #define _debug(FMT,...) _dbprintk(" "FMT ,##__VA_ARGS__) | 798 | #define _debug(FMT,...) no_printk(" "FMT ,##__VA_ARGS__) |
798 | #endif | 799 | #endif |
799 | 800 | ||
800 | /* | 801 | /* |
diff --git a/fs/afs/mntpt.c b/fs/afs/mntpt.c index a9e23039ea34..6d552686c498 100644 --- a/fs/afs/mntpt.c +++ b/fs/afs/mntpt.c | |||
@@ -38,6 +38,11 @@ const struct inode_operations afs_mntpt_inode_operations = { | |||
38 | .getattr = afs_getattr, | 38 | .getattr = afs_getattr, |
39 | }; | 39 | }; |
40 | 40 | ||
41 | const struct inode_operations afs_autocell_inode_operations = { | ||
42 | .follow_link = afs_mntpt_follow_link, | ||
43 | .getattr = afs_getattr, | ||
44 | }; | ||
45 | |||
41 | static LIST_HEAD(afs_vfsmounts); | 46 | static LIST_HEAD(afs_vfsmounts); |
42 | static DECLARE_DELAYED_WORK(afs_mntpt_expiry_timer, afs_mntpt_expiry_timed_out); | 47 | static DECLARE_DELAYED_WORK(afs_mntpt_expiry_timer, afs_mntpt_expiry_timed_out); |
43 | 48 | ||
@@ -136,20 +141,16 @@ static struct vfsmount *afs_mntpt_do_automount(struct dentry *mntpt) | |||
136 | { | 141 | { |
137 | struct afs_super_info *super; | 142 | struct afs_super_info *super; |
138 | struct vfsmount *mnt; | 143 | struct vfsmount *mnt; |
144 | struct afs_vnode *vnode; | ||
139 | struct page *page; | 145 | struct page *page; |
140 | size_t size; | 146 | char *devname, *options; |
141 | char *buf, *devname, *options; | 147 | bool rwpath = false; |
142 | int ret; | 148 | int ret; |
143 | 149 | ||
144 | _enter("{%s}", mntpt->d_name.name); | 150 | _enter("{%s}", mntpt->d_name.name); |
145 | 151 | ||
146 | BUG_ON(!mntpt->d_inode); | 152 | BUG_ON(!mntpt->d_inode); |
147 | 153 | ||
148 | ret = -EINVAL; | ||
149 | size = mntpt->d_inode->i_size; | ||
150 | if (size > PAGE_SIZE - 1) | ||
151 | goto error_no_devname; | ||
152 | |||
153 | ret = -ENOMEM; | 154 | ret = -ENOMEM; |
154 | devname = (char *) get_zeroed_page(GFP_KERNEL); | 155 | devname = (char *) get_zeroed_page(GFP_KERNEL); |
155 | if (!devname) | 156 | if (!devname) |
@@ -159,28 +160,59 @@ static struct vfsmount *afs_mntpt_do_automount(struct dentry *mntpt) | |||
159 | if (!options) | 160 | if (!options) |
160 | goto error_no_options; | 161 | goto error_no_options; |
161 | 162 | ||
162 | /* read the contents of the AFS special symlink */ | 163 | vnode = AFS_FS_I(mntpt->d_inode); |
163 | page = read_mapping_page(mntpt->d_inode->i_mapping, 0, NULL); | 164 | if (test_bit(AFS_VNODE_PSEUDODIR, &vnode->flags)) { |
164 | if (IS_ERR(page)) { | 165 | /* if the directory is a pseudo directory, use the d_name */ |
165 | ret = PTR_ERR(page); | 166 | static const char afs_root_cell[] = ":root.cell."; |
166 | goto error_no_page; | 167 | unsigned size = mntpt->d_name.len; |
168 | |||
169 | ret = -ENOENT; | ||
170 | if (size < 2 || size > AFS_MAXCELLNAME) | ||
171 | goto error_no_page; | ||
172 | |||
173 | if (mntpt->d_name.name[0] == '.') { | ||
174 | devname[0] = '#'; | ||
175 | memcpy(devname + 1, mntpt->d_name.name, size - 1); | ||
176 | memcpy(devname + size, afs_root_cell, | ||
177 | sizeof(afs_root_cell)); | ||
178 | rwpath = true; | ||
179 | } else { | ||
180 | devname[0] = '%'; | ||
181 | memcpy(devname + 1, mntpt->d_name.name, size); | ||
182 | memcpy(devname + size + 1, afs_root_cell, | ||
183 | sizeof(afs_root_cell)); | ||
184 | } | ||
185 | } else { | ||
186 | /* read the contents of the AFS special symlink */ | ||
187 | loff_t size = i_size_read(mntpt->d_inode); | ||
188 | char *buf; | ||
189 | |||
190 | ret = -EINVAL; | ||
191 | if (size > PAGE_SIZE - 1) | ||
192 | goto error_no_page; | ||
193 | |||
194 | page = read_mapping_page(mntpt->d_inode->i_mapping, 0, NULL); | ||
195 | if (IS_ERR(page)) { | ||
196 | ret = PTR_ERR(page); | ||
197 | goto error_no_page; | ||
198 | } | ||
199 | |||
200 | ret = -EIO; | ||
201 | if (PageError(page)) | ||
202 | goto error; | ||
203 | |||
204 | buf = kmap_atomic(page, KM_USER0); | ||
205 | memcpy(devname, buf, size); | ||
206 | kunmap_atomic(buf, KM_USER0); | ||
207 | page_cache_release(page); | ||
208 | page = NULL; | ||
167 | } | 209 | } |
168 | 210 | ||
169 | ret = -EIO; | ||
170 | if (PageError(page)) | ||
171 | goto error; | ||
172 | |||
173 | buf = kmap_atomic(page, KM_USER0); | ||
174 | memcpy(devname, buf, size); | ||
175 | kunmap_atomic(buf, KM_USER0); | ||
176 | page_cache_release(page); | ||
177 | page = NULL; | ||
178 | |||
179 | /* work out what options we want */ | 211 | /* work out what options we want */ |
180 | super = AFS_FS_S(mntpt->d_sb); | 212 | super = AFS_FS_S(mntpt->d_sb); |
181 | memcpy(options, "cell=", 5); | 213 | memcpy(options, "cell=", 5); |
182 | strcpy(options + 5, super->volume->cell->name); | 214 | strcpy(options + 5, super->volume->cell->name); |
183 | if (super->volume->type == AFSVL_RWVOL) | 215 | if (super->volume->type == AFSVL_RWVOL || rwpath) |
184 | strcat(options, ",rwpath"); | 216 | strcat(options, ",rwpath"); |
185 | 217 | ||
186 | /* try and do the mount */ | 218 | /* try and do the mount */ |
diff --git a/fs/afs/proc.c b/fs/afs/proc.c index 852739d262a9..096b23f821a1 100644 --- a/fs/afs/proc.c +++ b/fs/afs/proc.c | |||
@@ -294,7 +294,7 @@ static ssize_t afs_proc_cells_write(struct file *file, const char __user *buf, | |||
294 | if (strcmp(kbuf, "add") == 0) { | 294 | if (strcmp(kbuf, "add") == 0) { |
295 | struct afs_cell *cell; | 295 | struct afs_cell *cell; |
296 | 296 | ||
297 | cell = afs_cell_create(name, args); | 297 | cell = afs_cell_create(name, strlen(name), args, false); |
298 | if (IS_ERR(cell)) { | 298 | if (IS_ERR(cell)) { |
299 | ret = PTR_ERR(cell); | 299 | ret = PTR_ERR(cell); |
300 | goto done; | 300 | goto done; |
diff --git a/fs/afs/super.c b/fs/afs/super.c index 9cf80f02da16..77e1e5a61154 100644 --- a/fs/afs/super.c +++ b/fs/afs/super.c | |||
@@ -16,6 +16,7 @@ | |||
16 | 16 | ||
17 | #include <linux/kernel.h> | 17 | #include <linux/kernel.h> |
18 | #include <linux/module.h> | 18 | #include <linux/module.h> |
19 | #include <linux/mount.h> | ||
19 | #include <linux/init.h> | 20 | #include <linux/init.h> |
20 | #include <linux/slab.h> | 21 | #include <linux/slab.h> |
21 | #include <linux/smp_lock.h> | 22 | #include <linux/smp_lock.h> |
@@ -48,6 +49,7 @@ struct file_system_type afs_fs_type = { | |||
48 | static const struct super_operations afs_super_ops = { | 49 | static const struct super_operations afs_super_ops = { |
49 | .statfs = afs_statfs, | 50 | .statfs = afs_statfs, |
50 | .alloc_inode = afs_alloc_inode, | 51 | .alloc_inode = afs_alloc_inode, |
52 | .drop_inode = afs_drop_inode, | ||
51 | .destroy_inode = afs_destroy_inode, | 53 | .destroy_inode = afs_destroy_inode, |
52 | .evict_inode = afs_evict_inode, | 54 | .evict_inode = afs_evict_inode, |
53 | .put_super = afs_put_super, | 55 | .put_super = afs_put_super, |
@@ -62,12 +64,14 @@ enum { | |||
62 | afs_opt_cell, | 64 | afs_opt_cell, |
63 | afs_opt_rwpath, | 65 | afs_opt_rwpath, |
64 | afs_opt_vol, | 66 | afs_opt_vol, |
67 | afs_opt_autocell, | ||
65 | }; | 68 | }; |
66 | 69 | ||
67 | static const match_table_t afs_options_list = { | 70 | static const match_table_t afs_options_list = { |
68 | { afs_opt_cell, "cell=%s" }, | 71 | { afs_opt_cell, "cell=%s" }, |
69 | { afs_opt_rwpath, "rwpath" }, | 72 | { afs_opt_rwpath, "rwpath" }, |
70 | { afs_opt_vol, "vol=%s" }, | 73 | { afs_opt_vol, "vol=%s" }, |
74 | { afs_opt_autocell, "autocell" }, | ||
71 | { afs_no_opt, NULL }, | 75 | { afs_no_opt, NULL }, |
72 | }; | 76 | }; |
73 | 77 | ||
@@ -151,7 +155,8 @@ static int afs_parse_options(struct afs_mount_params *params, | |||
151 | switch (token) { | 155 | switch (token) { |
152 | case afs_opt_cell: | 156 | case afs_opt_cell: |
153 | cell = afs_cell_lookup(args[0].from, | 157 | cell = afs_cell_lookup(args[0].from, |
154 | args[0].to - args[0].from); | 158 | args[0].to - args[0].from, |
159 | false); | ||
155 | if (IS_ERR(cell)) | 160 | if (IS_ERR(cell)) |
156 | return PTR_ERR(cell); | 161 | return PTR_ERR(cell); |
157 | afs_put_cell(params->cell); | 162 | afs_put_cell(params->cell); |
@@ -166,6 +171,10 @@ static int afs_parse_options(struct afs_mount_params *params, | |||
166 | *devname = args[0].from; | 171 | *devname = args[0].from; |
167 | break; | 172 | break; |
168 | 173 | ||
174 | case afs_opt_autocell: | ||
175 | params->autocell = 1; | ||
176 | break; | ||
177 | |||
169 | default: | 178 | default: |
170 | printk(KERN_ERR "kAFS:" | 179 | printk(KERN_ERR "kAFS:" |
171 | " Unknown or invalid mount option: '%s'\n", p); | 180 | " Unknown or invalid mount option: '%s'\n", p); |
@@ -252,10 +261,10 @@ static int afs_parse_device_name(struct afs_mount_params *params, | |||
252 | 261 | ||
253 | /* lookup the cell record */ | 262 | /* lookup the cell record */ |
254 | if (cellname || !params->cell) { | 263 | if (cellname || !params->cell) { |
255 | cell = afs_cell_lookup(cellname, cellnamesz); | 264 | cell = afs_cell_lookup(cellname, cellnamesz, true); |
256 | if (IS_ERR(cell)) { | 265 | if (IS_ERR(cell)) { |
257 | printk(KERN_ERR "kAFS: unable to lookup cell '%s'\n", | 266 | printk(KERN_ERR "kAFS: unable to lookup cell '%*.*s'\n", |
258 | cellname ?: ""); | 267 | cellnamesz, cellnamesz, cellname ?: ""); |
259 | return PTR_ERR(cell); | 268 | return PTR_ERR(cell); |
260 | } | 269 | } |
261 | afs_put_cell(params->cell); | 270 | afs_put_cell(params->cell); |
@@ -321,6 +330,9 @@ static int afs_fill_super(struct super_block *sb, void *data) | |||
321 | if (IS_ERR(inode)) | 330 | if (IS_ERR(inode)) |
322 | goto error_inode; | 331 | goto error_inode; |
323 | 332 | ||
333 | if (params->autocell) | ||
334 | set_bit(AFS_VNODE_AUTOCELL, &AFS_FS_I(inode)->flags); | ||
335 | |||
324 | ret = -ENOMEM; | 336 | ret = -ENOMEM; |
325 | root = d_alloc_root(inode); | 337 | root = d_alloc_root(inode); |
326 | if (!root) | 338 | if (!root) |
diff --git a/fs/bad_inode.c b/fs/bad_inode.c index 52e59bf4aa5f..f024d8aaddef 100644 --- a/fs/bad_inode.c +++ b/fs/bad_inode.c | |||
@@ -55,12 +55,6 @@ static unsigned int bad_file_poll(struct file *filp, poll_table *wait) | |||
55 | return POLLERR; | 55 | return POLLERR; |
56 | } | 56 | } |
57 | 57 | ||
58 | static int bad_file_ioctl (struct inode *inode, struct file *filp, | ||
59 | unsigned int cmd, unsigned long arg) | ||
60 | { | ||
61 | return -EIO; | ||
62 | } | ||
63 | |||
64 | static long bad_file_unlocked_ioctl(struct file *file, unsigned cmd, | 58 | static long bad_file_unlocked_ioctl(struct file *file, unsigned cmd, |
65 | unsigned long arg) | 59 | unsigned long arg) |
66 | { | 60 | { |
@@ -159,7 +153,6 @@ static const struct file_operations bad_file_ops = | |||
159 | .aio_write = bad_file_aio_write, | 153 | .aio_write = bad_file_aio_write, |
160 | .readdir = bad_file_readdir, | 154 | .readdir = bad_file_readdir, |
161 | .poll = bad_file_poll, | 155 | .poll = bad_file_poll, |
162 | .ioctl = bad_file_ioctl, | ||
163 | .unlocked_ioctl = bad_file_unlocked_ioctl, | 156 | .unlocked_ioctl = bad_file_unlocked_ioctl, |
164 | .compat_ioctl = bad_file_compat_ioctl, | 157 | .compat_ioctl = bad_file_compat_ioctl, |
165 | .mmap = bad_file_mmap, | 158 | .mmap = bad_file_mmap, |
diff --git a/fs/binfmt_misc.c b/fs/binfmt_misc.c index 9e60fd201716..a7528b913936 100644 --- a/fs/binfmt_misc.c +++ b/fs/binfmt_misc.c | |||
@@ -108,7 +108,7 @@ static int load_misc_binary(struct linux_binprm *bprm, struct pt_regs *regs) | |||
108 | Node *fmt; | 108 | Node *fmt; |
109 | struct file * interp_file = NULL; | 109 | struct file * interp_file = NULL; |
110 | char iname[BINPRM_BUF_SIZE]; | 110 | char iname[BINPRM_BUF_SIZE]; |
111 | char *iname_addr = iname; | 111 | const char *iname_addr = iname; |
112 | int retval; | 112 | int retval; |
113 | int fd_binary = -1; | 113 | int fd_binary = -1; |
114 | 114 | ||
diff --git a/fs/binfmt_script.c b/fs/binfmt_script.c index aca9d55afb22..396a9884591f 100644 --- a/fs/binfmt_script.c +++ b/fs/binfmt_script.c | |||
@@ -16,7 +16,8 @@ | |||
16 | 16 | ||
17 | static int load_script(struct linux_binprm *bprm,struct pt_regs *regs) | 17 | static int load_script(struct linux_binprm *bprm,struct pt_regs *regs) |
18 | { | 18 | { |
19 | char *cp, *i_name, *i_arg; | 19 | const char *i_arg, *i_name; |
20 | char *cp; | ||
20 | struct file *file; | 21 | struct file *file; |
21 | char interp[BINPRM_BUF_SIZE]; | 22 | char interp[BINPRM_BUF_SIZE]; |
22 | int retval; | 23 | int retval; |
diff --git a/fs/buffer.c b/fs/buffer.c index 50efa339e051..3e7dca279d1c 100644 --- a/fs/buffer.c +++ b/fs/buffer.c | |||
@@ -770,11 +770,12 @@ static int fsync_buffers_list(spinlock_t *lock, struct list_head *list) | |||
770 | spin_unlock(lock); | 770 | spin_unlock(lock); |
771 | /* | 771 | /* |
772 | * Ensure any pending I/O completes so that | 772 | * Ensure any pending I/O completes so that |
773 | * ll_rw_block() actually writes the current | 773 | * write_dirty_buffer() actually writes the |
774 | * contents - it is a noop if I/O is still in | 774 | * current contents - it is a noop if I/O is |
775 | * flight on potentially older contents. | 775 | * still in flight on potentially older |
776 | * contents. | ||
776 | */ | 777 | */ |
777 | ll_rw_block(SWRITE_SYNC_PLUG, 1, &bh); | 778 | write_dirty_buffer(bh, WRITE_SYNC_PLUG); |
778 | 779 | ||
779 | /* | 780 | /* |
780 | * Kick off IO for the previous mapping. Note | 781 | * Kick off IO for the previous mapping. Note |
@@ -2912,13 +2913,6 @@ int submit_bh(int rw, struct buffer_head * bh) | |||
2912 | BUG_ON(buffer_unwritten(bh)); | 2913 | BUG_ON(buffer_unwritten(bh)); |
2913 | 2914 | ||
2914 | /* | 2915 | /* |
2915 | * Mask in barrier bit for a write (could be either a WRITE or a | ||
2916 | * WRITE_SYNC | ||
2917 | */ | ||
2918 | if (buffer_ordered(bh) && (rw & WRITE)) | ||
2919 | rw |= WRITE_BARRIER; | ||
2920 | |||
2921 | /* | ||
2922 | * Only clear out a write error when rewriting | 2916 | * Only clear out a write error when rewriting |
2923 | */ | 2917 | */ |
2924 | if (test_set_buffer_req(bh) && (rw & WRITE)) | 2918 | if (test_set_buffer_req(bh) && (rw & WRITE)) |
@@ -2956,22 +2950,21 @@ EXPORT_SYMBOL(submit_bh); | |||
2956 | 2950 | ||
2957 | /** | 2951 | /** |
2958 | * ll_rw_block: low-level access to block devices (DEPRECATED) | 2952 | * ll_rw_block: low-level access to block devices (DEPRECATED) |
2959 | * @rw: whether to %READ or %WRITE or %SWRITE or maybe %READA (readahead) | 2953 | * @rw: whether to %READ or %WRITE or maybe %READA (readahead) |
2960 | * @nr: number of &struct buffer_heads in the array | 2954 | * @nr: number of &struct buffer_heads in the array |
2961 | * @bhs: array of pointers to &struct buffer_head | 2955 | * @bhs: array of pointers to &struct buffer_head |
2962 | * | 2956 | * |
2963 | * ll_rw_block() takes an array of pointers to &struct buffer_heads, and | 2957 | * ll_rw_block() takes an array of pointers to &struct buffer_heads, and |
2964 | * requests an I/O operation on them, either a %READ or a %WRITE. The third | 2958 | * requests an I/O operation on them, either a %READ or a %WRITE. The third |
2965 | * %SWRITE is like %WRITE only we make sure that the *current* data in buffers | 2959 | * %READA option is described in the documentation for generic_make_request() |
2966 | * are sent to disk. The fourth %READA option is described in the documentation | 2960 | * which ll_rw_block() calls. |
2967 | * for generic_make_request() which ll_rw_block() calls. | ||
2968 | * | 2961 | * |
2969 | * This function drops any buffer that it cannot get a lock on (with the | 2962 | * This function drops any buffer that it cannot get a lock on (with the |
2970 | * BH_Lock state bit) unless SWRITE is required, any buffer that appears to be | 2963 | * BH_Lock state bit), any buffer that appears to be clean when doing a write |
2971 | * clean when doing a write request, and any buffer that appears to be | 2964 | * request, and any buffer that appears to be up-to-date when doing read |
2972 | * up-to-date when doing read request. Further it marks as clean buffers that | 2965 | * request. Further it marks as clean buffers that are processed for |
2973 | * are processed for writing (the buffer cache won't assume that they are | 2966 | * writing (the buffer cache won't assume that they are actually clean |
2974 | * actually clean until the buffer gets unlocked). | 2967 | * until the buffer gets unlocked). |
2975 | * | 2968 | * |
2976 | * ll_rw_block sets b_end_io to simple completion handler that marks | 2969 | * ll_rw_block sets b_end_io to simple completion handler that marks |
2977 | * the buffer up-to-date (if approriate), unlocks the buffer and wakes | 2970 | * the buffer up-to-date (if approriate), unlocks the buffer and wakes |
@@ -2987,20 +2980,13 @@ void ll_rw_block(int rw, int nr, struct buffer_head *bhs[]) | |||
2987 | for (i = 0; i < nr; i++) { | 2980 | for (i = 0; i < nr; i++) { |
2988 | struct buffer_head *bh = bhs[i]; | 2981 | struct buffer_head *bh = bhs[i]; |
2989 | 2982 | ||
2990 | if (rw == SWRITE || rw == SWRITE_SYNC || rw == SWRITE_SYNC_PLUG) | 2983 | if (!trylock_buffer(bh)) |
2991 | lock_buffer(bh); | ||
2992 | else if (!trylock_buffer(bh)) | ||
2993 | continue; | 2984 | continue; |
2994 | 2985 | if (rw == WRITE) { | |
2995 | if (rw == WRITE || rw == SWRITE || rw == SWRITE_SYNC || | ||
2996 | rw == SWRITE_SYNC_PLUG) { | ||
2997 | if (test_clear_buffer_dirty(bh)) { | 2986 | if (test_clear_buffer_dirty(bh)) { |
2998 | bh->b_end_io = end_buffer_write_sync; | 2987 | bh->b_end_io = end_buffer_write_sync; |
2999 | get_bh(bh); | 2988 | get_bh(bh); |
3000 | if (rw == SWRITE_SYNC) | 2989 | submit_bh(WRITE, bh); |
3001 | submit_bh(WRITE_SYNC, bh); | ||
3002 | else | ||
3003 | submit_bh(WRITE, bh); | ||
3004 | continue; | 2990 | continue; |
3005 | } | 2991 | } |
3006 | } else { | 2992 | } else { |
@@ -3016,12 +3002,25 @@ void ll_rw_block(int rw, int nr, struct buffer_head *bhs[]) | |||
3016 | } | 3002 | } |
3017 | EXPORT_SYMBOL(ll_rw_block); | 3003 | EXPORT_SYMBOL(ll_rw_block); |
3018 | 3004 | ||
3005 | void write_dirty_buffer(struct buffer_head *bh, int rw) | ||
3006 | { | ||
3007 | lock_buffer(bh); | ||
3008 | if (!test_clear_buffer_dirty(bh)) { | ||
3009 | unlock_buffer(bh); | ||
3010 | return; | ||
3011 | } | ||
3012 | bh->b_end_io = end_buffer_write_sync; | ||
3013 | get_bh(bh); | ||
3014 | submit_bh(rw, bh); | ||
3015 | } | ||
3016 | EXPORT_SYMBOL(write_dirty_buffer); | ||
3017 | |||
3019 | /* | 3018 | /* |
3020 | * For a data-integrity writeout, we need to wait upon any in-progress I/O | 3019 | * For a data-integrity writeout, we need to wait upon any in-progress I/O |
3021 | * and then start new I/O and then wait upon it. The caller must have a ref on | 3020 | * and then start new I/O and then wait upon it. The caller must have a ref on |
3022 | * the buffer_head. | 3021 | * the buffer_head. |
3023 | */ | 3022 | */ |
3024 | int sync_dirty_buffer(struct buffer_head *bh) | 3023 | int __sync_dirty_buffer(struct buffer_head *bh, int rw) |
3025 | { | 3024 | { |
3026 | int ret = 0; | 3025 | int ret = 0; |
3027 | 3026 | ||
@@ -3030,7 +3029,7 @@ int sync_dirty_buffer(struct buffer_head *bh) | |||
3030 | if (test_clear_buffer_dirty(bh)) { | 3029 | if (test_clear_buffer_dirty(bh)) { |
3031 | get_bh(bh); | 3030 | get_bh(bh); |
3032 | bh->b_end_io = end_buffer_write_sync; | 3031 | bh->b_end_io = end_buffer_write_sync; |
3033 | ret = submit_bh(WRITE_SYNC, bh); | 3032 | ret = submit_bh(rw, bh); |
3034 | wait_on_buffer(bh); | 3033 | wait_on_buffer(bh); |
3035 | if (buffer_eopnotsupp(bh)) { | 3034 | if (buffer_eopnotsupp(bh)) { |
3036 | clear_buffer_eopnotsupp(bh); | 3035 | clear_buffer_eopnotsupp(bh); |
@@ -3043,6 +3042,12 @@ int sync_dirty_buffer(struct buffer_head *bh) | |||
3043 | } | 3042 | } |
3044 | return ret; | 3043 | return ret; |
3045 | } | 3044 | } |
3045 | EXPORT_SYMBOL(__sync_dirty_buffer); | ||
3046 | |||
3047 | int sync_dirty_buffer(struct buffer_head *bh) | ||
3048 | { | ||
3049 | return __sync_dirty_buffer(bh, WRITE_SYNC); | ||
3050 | } | ||
3046 | EXPORT_SYMBOL(sync_dirty_buffer); | 3051 | EXPORT_SYMBOL(sync_dirty_buffer); |
3047 | 3052 | ||
3048 | /* | 3053 | /* |
diff --git a/fs/cachefiles/internal.h b/fs/cachefiles/internal.h index a8cd821226da..bd6bc1bde2d7 100644 --- a/fs/cachefiles/internal.h +++ b/fs/cachefiles/internal.h | |||
@@ -267,13 +267,6 @@ do { \ | |||
267 | #define dbgprintk(FMT, ...) \ | 267 | #define dbgprintk(FMT, ...) \ |
268 | printk(KERN_DEBUG "[%-6.6s] "FMT"\n", current->comm, ##__VA_ARGS__) | 268 | printk(KERN_DEBUG "[%-6.6s] "FMT"\n", current->comm, ##__VA_ARGS__) |
269 | 269 | ||
270 | /* make sure we maintain the format strings, even when debugging is disabled */ | ||
271 | static inline void _dbprintk(const char *fmt, ...) | ||
272 | __attribute__((format(printf, 1, 2))); | ||
273 | static inline void _dbprintk(const char *fmt, ...) | ||
274 | { | ||
275 | } | ||
276 | |||
277 | #define kenter(FMT, ...) dbgprintk("==> %s("FMT")", __func__, ##__VA_ARGS__) | 270 | #define kenter(FMT, ...) dbgprintk("==> %s("FMT")", __func__, ##__VA_ARGS__) |
278 | #define kleave(FMT, ...) dbgprintk("<== %s()"FMT"", __func__, ##__VA_ARGS__) | 271 | #define kleave(FMT, ...) dbgprintk("<== %s()"FMT"", __func__, ##__VA_ARGS__) |
279 | #define kdebug(FMT, ...) dbgprintk(FMT, ##__VA_ARGS__) | 272 | #define kdebug(FMT, ...) dbgprintk(FMT, ##__VA_ARGS__) |
@@ -304,9 +297,9 @@ do { \ | |||
304 | } while (0) | 297 | } while (0) |
305 | 298 | ||
306 | #else | 299 | #else |
307 | #define _enter(FMT, ...) _dbprintk("==> %s("FMT")", __func__, ##__VA_ARGS__) | 300 | #define _enter(FMT, ...) no_printk("==> %s("FMT")", __func__, ##__VA_ARGS__) |
308 | #define _leave(FMT, ...) _dbprintk("<== %s()"FMT"", __func__, ##__VA_ARGS__) | 301 | #define _leave(FMT, ...) no_printk("<== %s()"FMT"", __func__, ##__VA_ARGS__) |
309 | #define _debug(FMT, ...) _dbprintk(FMT, ##__VA_ARGS__) | 302 | #define _debug(FMT, ...) no_printk(FMT, ##__VA_ARGS__) |
310 | #endif | 303 | #endif |
311 | 304 | ||
312 | #if 1 /* defined(__KDEBUGALL) */ | 305 | #if 1 /* defined(__KDEBUGALL) */ |
diff --git a/fs/ceph/addr.c b/fs/ceph/addr.c index 5598a0d02295..4cfce1ee31fa 100644 --- a/fs/ceph/addr.c +++ b/fs/ceph/addr.c | |||
@@ -87,7 +87,7 @@ static int ceph_set_page_dirty(struct page *page) | |||
87 | 87 | ||
88 | /* dirty the head */ | 88 | /* dirty the head */ |
89 | spin_lock(&inode->i_lock); | 89 | spin_lock(&inode->i_lock); |
90 | if (ci->i_wrbuffer_ref_head == 0) | 90 | if (ci->i_head_snapc == NULL) |
91 | ci->i_head_snapc = ceph_get_snap_context(snapc); | 91 | ci->i_head_snapc = ceph_get_snap_context(snapc); |
92 | ++ci->i_wrbuffer_ref_head; | 92 | ++ci->i_wrbuffer_ref_head; |
93 | if (ci->i_wrbuffer_ref == 0) | 93 | if (ci->i_wrbuffer_ref == 0) |
@@ -105,13 +105,7 @@ static int ceph_set_page_dirty(struct page *page) | |||
105 | spin_lock_irq(&mapping->tree_lock); | 105 | spin_lock_irq(&mapping->tree_lock); |
106 | if (page->mapping) { /* Race with truncate? */ | 106 | if (page->mapping) { /* Race with truncate? */ |
107 | WARN_ON_ONCE(!PageUptodate(page)); | 107 | WARN_ON_ONCE(!PageUptodate(page)); |
108 | 108 | account_page_dirtied(page, page->mapping); | |
109 | if (mapping_cap_account_dirty(mapping)) { | ||
110 | __inc_zone_page_state(page, NR_FILE_DIRTY); | ||
111 | __inc_bdi_stat(mapping->backing_dev_info, | ||
112 | BDI_RECLAIMABLE); | ||
113 | task_io_account_write(PAGE_CACHE_SIZE); | ||
114 | } | ||
115 | radix_tree_tag_set(&mapping->page_tree, | 109 | radix_tree_tag_set(&mapping->page_tree, |
116 | page_index(page), PAGECACHE_TAG_DIRTY); | 110 | page_index(page), PAGECACHE_TAG_DIRTY); |
117 | 111 | ||
@@ -352,7 +346,7 @@ static struct ceph_snap_context *get_oldest_context(struct inode *inode, | |||
352 | break; | 346 | break; |
353 | } | 347 | } |
354 | } | 348 | } |
355 | if (!snapc && ci->i_head_snapc) { | 349 | if (!snapc && ci->i_wrbuffer_ref_head) { |
356 | snapc = ceph_get_snap_context(ci->i_head_snapc); | 350 | snapc = ceph_get_snap_context(ci->i_head_snapc); |
357 | dout(" head snapc %p has %d dirty pages\n", | 351 | dout(" head snapc %p has %d dirty pages\n", |
358 | snapc, ci->i_wrbuffer_ref_head); | 352 | snapc, ci->i_wrbuffer_ref_head); |
diff --git a/fs/ceph/auth_x.c b/fs/ceph/auth_x.c index 582e0b2caf8a..a2d002cbdec2 100644 --- a/fs/ceph/auth_x.c +++ b/fs/ceph/auth_x.c | |||
@@ -376,7 +376,7 @@ static void ceph_x_validate_tickets(struct ceph_auth_client *ac, int *pneed) | |||
376 | 376 | ||
377 | th = get_ticket_handler(ac, service); | 377 | th = get_ticket_handler(ac, service); |
378 | 378 | ||
379 | if (!th) { | 379 | if (IS_ERR(th)) { |
380 | *pneed |= service; | 380 | *pneed |= service; |
381 | continue; | 381 | continue; |
382 | } | 382 | } |
@@ -399,6 +399,9 @@ static int ceph_x_build_request(struct ceph_auth_client *ac, | |||
399 | struct ceph_x_ticket_handler *th = | 399 | struct ceph_x_ticket_handler *th = |
400 | get_ticket_handler(ac, CEPH_ENTITY_TYPE_AUTH); | 400 | get_ticket_handler(ac, CEPH_ENTITY_TYPE_AUTH); |
401 | 401 | ||
402 | if (IS_ERR(th)) | ||
403 | return PTR_ERR(th); | ||
404 | |||
402 | ceph_x_validate_tickets(ac, &need); | 405 | ceph_x_validate_tickets(ac, &need); |
403 | 406 | ||
404 | dout("build_request want %x have %x need %x\n", | 407 | dout("build_request want %x have %x need %x\n", |
@@ -450,7 +453,6 @@ static int ceph_x_build_request(struct ceph_auth_client *ac, | |||
450 | return -ERANGE; | 453 | return -ERANGE; |
451 | head->op = cpu_to_le16(CEPHX_GET_PRINCIPAL_SESSION_KEY); | 454 | head->op = cpu_to_le16(CEPHX_GET_PRINCIPAL_SESSION_KEY); |
452 | 455 | ||
453 | BUG_ON(!th); | ||
454 | ret = ceph_x_build_authorizer(ac, th, &xi->auth_authorizer); | 456 | ret = ceph_x_build_authorizer(ac, th, &xi->auth_authorizer); |
455 | if (ret) | 457 | if (ret) |
456 | return ret; | 458 | return ret; |
@@ -505,7 +507,8 @@ static int ceph_x_handle_reply(struct ceph_auth_client *ac, int result, | |||
505 | 507 | ||
506 | case CEPHX_GET_PRINCIPAL_SESSION_KEY: | 508 | case CEPHX_GET_PRINCIPAL_SESSION_KEY: |
507 | th = get_ticket_handler(ac, CEPH_ENTITY_TYPE_AUTH); | 509 | th = get_ticket_handler(ac, CEPH_ENTITY_TYPE_AUTH); |
508 | BUG_ON(!th); | 510 | if (IS_ERR(th)) |
511 | return PTR_ERR(th); | ||
509 | ret = ceph_x_proc_ticket_reply(ac, &th->session_key, | 512 | ret = ceph_x_proc_ticket_reply(ac, &th->session_key, |
510 | buf + sizeof(*head), end); | 513 | buf + sizeof(*head), end); |
511 | break; | 514 | break; |
@@ -563,8 +566,8 @@ static int ceph_x_verify_authorizer_reply(struct ceph_auth_client *ac, | |||
563 | void *end = p + sizeof(au->reply_buf); | 566 | void *end = p + sizeof(au->reply_buf); |
564 | 567 | ||
565 | th = get_ticket_handler(ac, au->service); | 568 | th = get_ticket_handler(ac, au->service); |
566 | if (!th) | 569 | if (IS_ERR(th)) |
567 | return -EIO; /* hrm! */ | 570 | return PTR_ERR(th); |
568 | ret = ceph_x_decrypt(&th->session_key, &p, end, &reply, sizeof(reply)); | 571 | ret = ceph_x_decrypt(&th->session_key, &p, end, &reply, sizeof(reply)); |
569 | if (ret < 0) | 572 | if (ret < 0) |
570 | return ret; | 573 | return ret; |
@@ -626,7 +629,7 @@ static void ceph_x_invalidate_authorizer(struct ceph_auth_client *ac, | |||
626 | struct ceph_x_ticket_handler *th; | 629 | struct ceph_x_ticket_handler *th; |
627 | 630 | ||
628 | th = get_ticket_handler(ac, peer_type); | 631 | th = get_ticket_handler(ac, peer_type); |
629 | if (th && !IS_ERR(th)) | 632 | if (!IS_ERR(th)) |
630 | remove_ticket_handler(ac, th); | 633 | remove_ticket_handler(ac, th); |
631 | } | 634 | } |
632 | 635 | ||
diff --git a/fs/ceph/caps.c b/fs/ceph/caps.c index 7bf182b03973..a2069b6680ae 100644 --- a/fs/ceph/caps.c +++ b/fs/ceph/caps.c | |||
@@ -1082,6 +1082,7 @@ static int __send_cap(struct ceph_mds_client *mdsc, struct ceph_cap *cap, | |||
1082 | gid_t gid; | 1082 | gid_t gid; |
1083 | struct ceph_mds_session *session; | 1083 | struct ceph_mds_session *session; |
1084 | u64 xattr_version = 0; | 1084 | u64 xattr_version = 0; |
1085 | struct ceph_buffer *xattr_blob = NULL; | ||
1085 | int delayed = 0; | 1086 | int delayed = 0; |
1086 | u64 flush_tid = 0; | 1087 | u64 flush_tid = 0; |
1087 | int i; | 1088 | int i; |
@@ -1142,6 +1143,10 @@ static int __send_cap(struct ceph_mds_client *mdsc, struct ceph_cap *cap, | |||
1142 | for (i = 0; i < CEPH_CAP_BITS; i++) | 1143 | for (i = 0; i < CEPH_CAP_BITS; i++) |
1143 | if (flushing & (1 << i)) | 1144 | if (flushing & (1 << i)) |
1144 | ci->i_cap_flush_tid[i] = flush_tid; | 1145 | ci->i_cap_flush_tid[i] = flush_tid; |
1146 | |||
1147 | follows = ci->i_head_snapc->seq; | ||
1148 | } else { | ||
1149 | follows = 0; | ||
1145 | } | 1150 | } |
1146 | 1151 | ||
1147 | keep = cap->implemented; | 1152 | keep = cap->implemented; |
@@ -1155,14 +1160,14 @@ static int __send_cap(struct ceph_mds_client *mdsc, struct ceph_cap *cap, | |||
1155 | mtime = inode->i_mtime; | 1160 | mtime = inode->i_mtime; |
1156 | atime = inode->i_atime; | 1161 | atime = inode->i_atime; |
1157 | time_warp_seq = ci->i_time_warp_seq; | 1162 | time_warp_seq = ci->i_time_warp_seq; |
1158 | follows = ci->i_snap_realm->cached_context->seq; | ||
1159 | uid = inode->i_uid; | 1163 | uid = inode->i_uid; |
1160 | gid = inode->i_gid; | 1164 | gid = inode->i_gid; |
1161 | mode = inode->i_mode; | 1165 | mode = inode->i_mode; |
1162 | 1166 | ||
1163 | if (dropping & CEPH_CAP_XATTR_EXCL) { | 1167 | if (flushing & CEPH_CAP_XATTR_EXCL) { |
1164 | __ceph_build_xattrs_blob(ci); | 1168 | __ceph_build_xattrs_blob(ci); |
1165 | xattr_version = ci->i_xattrs.version + 1; | 1169 | xattr_blob = ci->i_xattrs.blob; |
1170 | xattr_version = ci->i_xattrs.version; | ||
1166 | } | 1171 | } |
1167 | 1172 | ||
1168 | spin_unlock(&inode->i_lock); | 1173 | spin_unlock(&inode->i_lock); |
@@ -1170,9 +1175,7 @@ static int __send_cap(struct ceph_mds_client *mdsc, struct ceph_cap *cap, | |||
1170 | ret = send_cap_msg(session, ceph_vino(inode).ino, cap_id, | 1175 | ret = send_cap_msg(session, ceph_vino(inode).ino, cap_id, |
1171 | op, keep, want, flushing, seq, flush_tid, issue_seq, mseq, | 1176 | op, keep, want, flushing, seq, flush_tid, issue_seq, mseq, |
1172 | size, max_size, &mtime, &atime, time_warp_seq, | 1177 | size, max_size, &mtime, &atime, time_warp_seq, |
1173 | uid, gid, mode, | 1178 | uid, gid, mode, xattr_version, xattr_blob, |
1174 | xattr_version, | ||
1175 | (flushing & CEPH_CAP_XATTR_EXCL) ? ci->i_xattrs.blob : NULL, | ||
1176 | follows); | 1179 | follows); |
1177 | if (ret < 0) { | 1180 | if (ret < 0) { |
1178 | dout("error sending cap msg, must requeue %p\n", inode); | 1181 | dout("error sending cap msg, must requeue %p\n", inode); |
@@ -1282,7 +1285,7 @@ retry: | |||
1282 | &capsnap->mtime, &capsnap->atime, | 1285 | &capsnap->mtime, &capsnap->atime, |
1283 | capsnap->time_warp_seq, | 1286 | capsnap->time_warp_seq, |
1284 | capsnap->uid, capsnap->gid, capsnap->mode, | 1287 | capsnap->uid, capsnap->gid, capsnap->mode, |
1285 | 0, NULL, | 1288 | capsnap->xattr_version, capsnap->xattr_blob, |
1286 | capsnap->follows); | 1289 | capsnap->follows); |
1287 | 1290 | ||
1288 | next_follows = capsnap->follows + 1; | 1291 | next_follows = capsnap->follows + 1; |
@@ -1332,7 +1335,11 @@ void __ceph_mark_dirty_caps(struct ceph_inode_info *ci, int mask) | |||
1332 | ceph_cap_string(was | mask)); | 1335 | ceph_cap_string(was | mask)); |
1333 | ci->i_dirty_caps |= mask; | 1336 | ci->i_dirty_caps |= mask; |
1334 | if (was == 0) { | 1337 | if (was == 0) { |
1335 | dout(" inode %p now dirty\n", &ci->vfs_inode); | 1338 | if (!ci->i_head_snapc) |
1339 | ci->i_head_snapc = ceph_get_snap_context( | ||
1340 | ci->i_snap_realm->cached_context); | ||
1341 | dout(" inode %p now dirty snapc %p\n", &ci->vfs_inode, | ||
1342 | ci->i_head_snapc); | ||
1336 | BUG_ON(!list_empty(&ci->i_dirty_item)); | 1343 | BUG_ON(!list_empty(&ci->i_dirty_item)); |
1337 | spin_lock(&mdsc->cap_dirty_lock); | 1344 | spin_lock(&mdsc->cap_dirty_lock); |
1338 | list_add(&ci->i_dirty_item, &mdsc->cap_dirty); | 1345 | list_add(&ci->i_dirty_item, &mdsc->cap_dirty); |
@@ -2190,7 +2197,9 @@ void ceph_put_wrbuffer_cap_refs(struct ceph_inode_info *ci, int nr, | |||
2190 | 2197 | ||
2191 | if (ci->i_head_snapc == snapc) { | 2198 | if (ci->i_head_snapc == snapc) { |
2192 | ci->i_wrbuffer_ref_head -= nr; | 2199 | ci->i_wrbuffer_ref_head -= nr; |
2193 | if (!ci->i_wrbuffer_ref_head) { | 2200 | if (ci->i_wrbuffer_ref_head == 0 && |
2201 | ci->i_dirty_caps == 0 && ci->i_flushing_caps == 0) { | ||
2202 | BUG_ON(!ci->i_head_snapc); | ||
2194 | ceph_put_snap_context(ci->i_head_snapc); | 2203 | ceph_put_snap_context(ci->i_head_snapc); |
2195 | ci->i_head_snapc = NULL; | 2204 | ci->i_head_snapc = NULL; |
2196 | } | 2205 | } |
@@ -2483,6 +2492,11 @@ static void handle_cap_flush_ack(struct inode *inode, u64 flush_tid, | |||
2483 | dout(" inode %p now clean\n", inode); | 2492 | dout(" inode %p now clean\n", inode); |
2484 | BUG_ON(!list_empty(&ci->i_dirty_item)); | 2493 | BUG_ON(!list_empty(&ci->i_dirty_item)); |
2485 | drop = 1; | 2494 | drop = 1; |
2495 | if (ci->i_wrbuffer_ref_head == 0) { | ||
2496 | BUG_ON(!ci->i_head_snapc); | ||
2497 | ceph_put_snap_context(ci->i_head_snapc); | ||
2498 | ci->i_head_snapc = NULL; | ||
2499 | } | ||
2486 | } else { | 2500 | } else { |
2487 | BUG_ON(list_empty(&ci->i_dirty_item)); | 2501 | BUG_ON(list_empty(&ci->i_dirty_item)); |
2488 | } | 2502 | } |
diff --git a/fs/ceph/debugfs.c b/fs/ceph/debugfs.c index 360c4f22718d..6fd8b20a8611 100644 --- a/fs/ceph/debugfs.c +++ b/fs/ceph/debugfs.c | |||
@@ -171,6 +171,8 @@ static int mdsc_show(struct seq_file *s, void *p) | |||
171 | } else if (req->r_dentry) { | 171 | } else if (req->r_dentry) { |
172 | path = ceph_mdsc_build_path(req->r_dentry, &pathlen, | 172 | path = ceph_mdsc_build_path(req->r_dentry, &pathlen, |
173 | &pathbase, 0); | 173 | &pathbase, 0); |
174 | if (IS_ERR(path)) | ||
175 | path = NULL; | ||
174 | spin_lock(&req->r_dentry->d_lock); | 176 | spin_lock(&req->r_dentry->d_lock); |
175 | seq_printf(s, " #%llx/%.*s (%s)", | 177 | seq_printf(s, " #%llx/%.*s (%s)", |
176 | ceph_ino(req->r_dentry->d_parent->d_inode), | 178 | ceph_ino(req->r_dentry->d_parent->d_inode), |
@@ -187,6 +189,8 @@ static int mdsc_show(struct seq_file *s, void *p) | |||
187 | if (req->r_old_dentry) { | 189 | if (req->r_old_dentry) { |
188 | path = ceph_mdsc_build_path(req->r_old_dentry, &pathlen, | 190 | path = ceph_mdsc_build_path(req->r_old_dentry, &pathlen, |
189 | &pathbase, 0); | 191 | &pathbase, 0); |
192 | if (IS_ERR(path)) | ||
193 | path = NULL; | ||
190 | spin_lock(&req->r_old_dentry->d_lock); | 194 | spin_lock(&req->r_old_dentry->d_lock); |
191 | seq_printf(s, " #%llx/%.*s (%s)", | 195 | seq_printf(s, " #%llx/%.*s (%s)", |
192 | ceph_ino(req->r_old_dentry->d_parent->d_inode), | 196 | ceph_ino(req->r_old_dentry->d_parent->d_inode), |
diff --git a/fs/ceph/dir.c b/fs/ceph/dir.c index 67bbb41d5526..6e4f43ff23ec 100644 --- a/fs/ceph/dir.c +++ b/fs/ceph/dir.c | |||
@@ -46,7 +46,7 @@ int ceph_init_dentry(struct dentry *dentry) | |||
46 | else | 46 | else |
47 | dentry->d_op = &ceph_snap_dentry_ops; | 47 | dentry->d_op = &ceph_snap_dentry_ops; |
48 | 48 | ||
49 | di = kmem_cache_alloc(ceph_dentry_cachep, GFP_NOFS); | 49 | di = kmem_cache_alloc(ceph_dentry_cachep, GFP_NOFS | __GFP_ZERO); |
50 | if (!di) | 50 | if (!di) |
51 | return -ENOMEM; /* oh well */ | 51 | return -ENOMEM; /* oh well */ |
52 | 52 | ||
diff --git a/fs/ceph/inode.c b/fs/ceph/inode.c index 5d893d31e399..e7cca414da03 100644 --- a/fs/ceph/inode.c +++ b/fs/ceph/inode.c | |||
@@ -677,6 +677,7 @@ static int fill_inode(struct inode *inode, | |||
677 | if (ci->i_files == 0 && ci->i_subdirs == 0 && | 677 | if (ci->i_files == 0 && ci->i_subdirs == 0 && |
678 | ceph_snap(inode) == CEPH_NOSNAP && | 678 | ceph_snap(inode) == CEPH_NOSNAP && |
679 | (le32_to_cpu(info->cap.caps) & CEPH_CAP_FILE_SHARED) && | 679 | (le32_to_cpu(info->cap.caps) & CEPH_CAP_FILE_SHARED) && |
680 | (issued & CEPH_CAP_FILE_EXCL) == 0 && | ||
680 | (ci->i_ceph_flags & CEPH_I_COMPLETE) == 0) { | 681 | (ci->i_ceph_flags & CEPH_I_COMPLETE) == 0) { |
681 | dout(" marking %p complete (empty)\n", inode); | 682 | dout(" marking %p complete (empty)\n", inode); |
682 | ci->i_ceph_flags |= CEPH_I_COMPLETE; | 683 | ci->i_ceph_flags |= CEPH_I_COMPLETE; |
@@ -1229,11 +1230,11 @@ retry_lookup: | |||
1229 | in = dn->d_inode; | 1230 | in = dn->d_inode; |
1230 | } else { | 1231 | } else { |
1231 | in = ceph_get_inode(parent->d_sb, vino); | 1232 | in = ceph_get_inode(parent->d_sb, vino); |
1232 | if (in == NULL) { | 1233 | if (IS_ERR(in)) { |
1233 | dout("new_inode badness\n"); | 1234 | dout("new_inode badness\n"); |
1234 | d_delete(dn); | 1235 | d_delete(dn); |
1235 | dput(dn); | 1236 | dput(dn); |
1236 | err = -ENOMEM; | 1237 | err = PTR_ERR(in); |
1237 | goto out; | 1238 | goto out; |
1238 | } | 1239 | } |
1239 | dn = splice_dentry(dn, in, NULL); | 1240 | dn = splice_dentry(dn, in, NULL); |
diff --git a/fs/ceph/locks.c b/fs/ceph/locks.c index ae85af06454f..ff4e753aae92 100644 --- a/fs/ceph/locks.c +++ b/fs/ceph/locks.c | |||
@@ -82,7 +82,8 @@ int ceph_lock(struct file *file, int cmd, struct file_lock *fl) | |||
82 | length = fl->fl_end - fl->fl_start + 1; | 82 | length = fl->fl_end - fl->fl_start + 1; |
83 | 83 | ||
84 | err = ceph_lock_message(CEPH_LOCK_FCNTL, op, file, | 84 | err = ceph_lock_message(CEPH_LOCK_FCNTL, op, file, |
85 | (u64)fl->fl_pid, (u64)fl->fl_nspid, | 85 | (u64)fl->fl_pid, |
86 | (u64)(unsigned long)fl->fl_nspid, | ||
86 | lock_cmd, fl->fl_start, | 87 | lock_cmd, fl->fl_start, |
87 | length, wait); | 88 | length, wait); |
88 | if (!err) { | 89 | if (!err) { |
@@ -92,7 +93,8 @@ int ceph_lock(struct file *file, int cmd, struct file_lock *fl) | |||
92 | /* undo! This should only happen if the kernel detects | 93 | /* undo! This should only happen if the kernel detects |
93 | * local deadlock. */ | 94 | * local deadlock. */ |
94 | ceph_lock_message(CEPH_LOCK_FCNTL, op, file, | 95 | ceph_lock_message(CEPH_LOCK_FCNTL, op, file, |
95 | (u64)fl->fl_pid, (u64)fl->fl_nspid, | 96 | (u64)fl->fl_pid, |
97 | (u64)(unsigned long)fl->fl_nspid, | ||
96 | CEPH_LOCK_UNLOCK, fl->fl_start, | 98 | CEPH_LOCK_UNLOCK, fl->fl_start, |
97 | length, 0); | 99 | length, 0); |
98 | dout("got %d on posix_lock_file, undid lock", err); | 100 | dout("got %d on posix_lock_file, undid lock", err); |
@@ -132,7 +134,8 @@ int ceph_flock(struct file *file, int cmd, struct file_lock *fl) | |||
132 | length = fl->fl_end - fl->fl_start + 1; | 134 | length = fl->fl_end - fl->fl_start + 1; |
133 | 135 | ||
134 | err = ceph_lock_message(CEPH_LOCK_FLOCK, CEPH_MDS_OP_SETFILELOCK, | 136 | err = ceph_lock_message(CEPH_LOCK_FLOCK, CEPH_MDS_OP_SETFILELOCK, |
135 | file, (u64)fl->fl_pid, (u64)fl->fl_nspid, | 137 | file, (u64)fl->fl_pid, |
138 | (u64)(unsigned long)fl->fl_nspid, | ||
136 | lock_cmd, fl->fl_start, | 139 | lock_cmd, fl->fl_start, |
137 | length, wait); | 140 | length, wait); |
138 | if (!err) { | 141 | if (!err) { |
@@ -141,7 +144,7 @@ int ceph_flock(struct file *file, int cmd, struct file_lock *fl) | |||
141 | ceph_lock_message(CEPH_LOCK_FLOCK, | 144 | ceph_lock_message(CEPH_LOCK_FLOCK, |
142 | CEPH_MDS_OP_SETFILELOCK, | 145 | CEPH_MDS_OP_SETFILELOCK, |
143 | file, (u64)fl->fl_pid, | 146 | file, (u64)fl->fl_pid, |
144 | (u64)fl->fl_nspid, | 147 | (u64)(unsigned long)fl->fl_nspid, |
145 | CEPH_LOCK_UNLOCK, fl->fl_start, | 148 | CEPH_LOCK_UNLOCK, fl->fl_start, |
146 | length, 0); | 149 | length, 0); |
147 | dout("got %d on flock_lock_file_wait, undid lock", err); | 150 | dout("got %d on flock_lock_file_wait, undid lock", err); |
@@ -235,7 +238,8 @@ int lock_to_ceph_filelock(struct file_lock *lock, | |||
235 | cephlock->length = cpu_to_le64(lock->fl_end - lock->fl_start + 1); | 238 | cephlock->length = cpu_to_le64(lock->fl_end - lock->fl_start + 1); |
236 | cephlock->client = cpu_to_le64(0); | 239 | cephlock->client = cpu_to_le64(0); |
237 | cephlock->pid = cpu_to_le64(lock->fl_pid); | 240 | cephlock->pid = cpu_to_le64(lock->fl_pid); |
238 | cephlock->pid_namespace = cpu_to_le64((u64)lock->fl_nspid); | 241 | cephlock->pid_namespace = |
242 | cpu_to_le64((u64)(unsigned long)lock->fl_nspid); | ||
239 | 243 | ||
240 | switch (lock->fl_type) { | 244 | switch (lock->fl_type) { |
241 | case F_RDLCK: | 245 | case F_RDLCK: |
diff --git a/fs/ceph/mds_client.c b/fs/ceph/mds_client.c index a75ddbf9fe37..f091b1351786 100644 --- a/fs/ceph/mds_client.c +++ b/fs/ceph/mds_client.c | |||
@@ -560,6 +560,13 @@ static void __unregister_request(struct ceph_mds_client *mdsc, | |||
560 | * | 560 | * |
561 | * Called under mdsc->mutex. | 561 | * Called under mdsc->mutex. |
562 | */ | 562 | */ |
563 | struct dentry *get_nonsnap_parent(struct dentry *dentry) | ||
564 | { | ||
565 | while (!IS_ROOT(dentry) && ceph_snap(dentry->d_inode) != CEPH_NOSNAP) | ||
566 | dentry = dentry->d_parent; | ||
567 | return dentry; | ||
568 | } | ||
569 | |||
563 | static int __choose_mds(struct ceph_mds_client *mdsc, | 570 | static int __choose_mds(struct ceph_mds_client *mdsc, |
564 | struct ceph_mds_request *req) | 571 | struct ceph_mds_request *req) |
565 | { | 572 | { |
@@ -590,14 +597,29 @@ static int __choose_mds(struct ceph_mds_client *mdsc, | |||
590 | if (req->r_inode) { | 597 | if (req->r_inode) { |
591 | inode = req->r_inode; | 598 | inode = req->r_inode; |
592 | } else if (req->r_dentry) { | 599 | } else if (req->r_dentry) { |
593 | if (req->r_dentry->d_inode) { | 600 | struct inode *dir = req->r_dentry->d_parent->d_inode; |
601 | |||
602 | if (dir->i_sb != mdsc->client->sb) { | ||
603 | /* not this fs! */ | ||
604 | inode = req->r_dentry->d_inode; | ||
605 | } else if (ceph_snap(dir) != CEPH_NOSNAP) { | ||
606 | /* direct snapped/virtual snapdir requests | ||
607 | * based on parent dir inode */ | ||
608 | struct dentry *dn = | ||
609 | get_nonsnap_parent(req->r_dentry->d_parent); | ||
610 | inode = dn->d_inode; | ||
611 | dout("__choose_mds using nonsnap parent %p\n", inode); | ||
612 | } else if (req->r_dentry->d_inode) { | ||
613 | /* dentry target */ | ||
594 | inode = req->r_dentry->d_inode; | 614 | inode = req->r_dentry->d_inode; |
595 | } else { | 615 | } else { |
596 | inode = req->r_dentry->d_parent->d_inode; | 616 | /* dir + name */ |
617 | inode = dir; | ||
597 | hash = req->r_dentry->d_name.hash; | 618 | hash = req->r_dentry->d_name.hash; |
598 | is_hash = true; | 619 | is_hash = true; |
599 | } | 620 | } |
600 | } | 621 | } |
622 | |||
601 | dout("__choose_mds %p is_hash=%d (%d) mode %d\n", inode, (int)is_hash, | 623 | dout("__choose_mds %p is_hash=%d (%d) mode %d\n", inode, (int)is_hash, |
602 | (int)hash, mode); | 624 | (int)hash, mode); |
603 | if (!inode) | 625 | if (!inode) |
@@ -2208,7 +2230,7 @@ static void handle_session(struct ceph_mds_session *session, | |||
2208 | pr_info("mds%d reconnect denied\n", session->s_mds); | 2230 | pr_info("mds%d reconnect denied\n", session->s_mds); |
2209 | remove_session_caps(session); | 2231 | remove_session_caps(session); |
2210 | wake = 1; /* for good measure */ | 2232 | wake = 1; /* for good measure */ |
2211 | complete_all(&mdsc->session_close_waiters); | 2233 | wake_up_all(&mdsc->session_close_wq); |
2212 | kick_requests(mdsc, mds); | 2234 | kick_requests(mdsc, mds); |
2213 | break; | 2235 | break; |
2214 | 2236 | ||
@@ -2302,7 +2324,7 @@ static int encode_caps_cb(struct inode *inode, struct ceph_cap *cap, | |||
2302 | path = ceph_mdsc_build_path(dentry, &pathlen, &pathbase, 0); | 2324 | path = ceph_mdsc_build_path(dentry, &pathlen, &pathbase, 0); |
2303 | if (IS_ERR(path)) { | 2325 | if (IS_ERR(path)) { |
2304 | err = PTR_ERR(path); | 2326 | err = PTR_ERR(path); |
2305 | BUG_ON(err); | 2327 | goto out_dput; |
2306 | } | 2328 | } |
2307 | } else { | 2329 | } else { |
2308 | path = NULL; | 2330 | path = NULL; |
@@ -2310,7 +2332,7 @@ static int encode_caps_cb(struct inode *inode, struct ceph_cap *cap, | |||
2310 | } | 2332 | } |
2311 | err = ceph_pagelist_encode_string(pagelist, path, pathlen); | 2333 | err = ceph_pagelist_encode_string(pagelist, path, pathlen); |
2312 | if (err) | 2334 | if (err) |
2313 | goto out; | 2335 | goto out_free; |
2314 | 2336 | ||
2315 | spin_lock(&inode->i_lock); | 2337 | spin_lock(&inode->i_lock); |
2316 | cap->seq = 0; /* reset cap seq */ | 2338 | cap->seq = 0; /* reset cap seq */ |
@@ -2354,8 +2376,9 @@ static int encode_caps_cb(struct inode *inode, struct ceph_cap *cap, | |||
2354 | unlock_kernel(); | 2376 | unlock_kernel(); |
2355 | } | 2377 | } |
2356 | 2378 | ||
2357 | out: | 2379 | out_free: |
2358 | kfree(path); | 2380 | kfree(path); |
2381 | out_dput: | ||
2359 | dput(dentry); | 2382 | dput(dentry); |
2360 | return err; | 2383 | return err; |
2361 | } | 2384 | } |
@@ -2876,7 +2899,7 @@ int ceph_mdsc_init(struct ceph_mds_client *mdsc, struct ceph_client *client) | |||
2876 | return -ENOMEM; | 2899 | return -ENOMEM; |
2877 | 2900 | ||
2878 | init_completion(&mdsc->safe_umount_waiters); | 2901 | init_completion(&mdsc->safe_umount_waiters); |
2879 | init_completion(&mdsc->session_close_waiters); | 2902 | init_waitqueue_head(&mdsc->session_close_wq); |
2880 | INIT_LIST_HEAD(&mdsc->waiting_for_map); | 2903 | INIT_LIST_HEAD(&mdsc->waiting_for_map); |
2881 | mdsc->sessions = NULL; | 2904 | mdsc->sessions = NULL; |
2882 | mdsc->max_sessions = 0; | 2905 | mdsc->max_sessions = 0; |
@@ -3021,6 +3044,23 @@ void ceph_mdsc_sync(struct ceph_mds_client *mdsc) | |||
3021 | wait_event(mdsc->cap_flushing_wq, check_cap_flush(mdsc, want_flush)); | 3044 | wait_event(mdsc->cap_flushing_wq, check_cap_flush(mdsc, want_flush)); |
3022 | } | 3045 | } |
3023 | 3046 | ||
3047 | /* | ||
3048 | * true if all sessions are closed, or we force unmount | ||
3049 | */ | ||
3050 | bool done_closing_sessions(struct ceph_mds_client *mdsc) | ||
3051 | { | ||
3052 | int i, n = 0; | ||
3053 | |||
3054 | if (mdsc->client->mount_state == CEPH_MOUNT_SHUTDOWN) | ||
3055 | return true; | ||
3056 | |||
3057 | mutex_lock(&mdsc->mutex); | ||
3058 | for (i = 0; i < mdsc->max_sessions; i++) | ||
3059 | if (mdsc->sessions[i]) | ||
3060 | n++; | ||
3061 | mutex_unlock(&mdsc->mutex); | ||
3062 | return n == 0; | ||
3063 | } | ||
3024 | 3064 | ||
3025 | /* | 3065 | /* |
3026 | * called after sb is ro. | 3066 | * called after sb is ro. |
@@ -3029,45 +3069,32 @@ void ceph_mdsc_close_sessions(struct ceph_mds_client *mdsc) | |||
3029 | { | 3069 | { |
3030 | struct ceph_mds_session *session; | 3070 | struct ceph_mds_session *session; |
3031 | int i; | 3071 | int i; |
3032 | int n; | ||
3033 | struct ceph_client *client = mdsc->client; | 3072 | struct ceph_client *client = mdsc->client; |
3034 | unsigned long started, timeout = client->mount_args->mount_timeout * HZ; | 3073 | unsigned long timeout = client->mount_args->mount_timeout * HZ; |
3035 | 3074 | ||
3036 | dout("close_sessions\n"); | 3075 | dout("close_sessions\n"); |
3037 | 3076 | ||
3038 | mutex_lock(&mdsc->mutex); | ||
3039 | |||
3040 | /* close sessions */ | 3077 | /* close sessions */ |
3041 | started = jiffies; | 3078 | mutex_lock(&mdsc->mutex); |
3042 | while (time_before(jiffies, started + timeout)) { | 3079 | for (i = 0; i < mdsc->max_sessions; i++) { |
3043 | dout("closing sessions\n"); | 3080 | session = __ceph_lookup_mds_session(mdsc, i); |
3044 | n = 0; | 3081 | if (!session) |
3045 | for (i = 0; i < mdsc->max_sessions; i++) { | 3082 | continue; |
3046 | session = __ceph_lookup_mds_session(mdsc, i); | ||
3047 | if (!session) | ||
3048 | continue; | ||
3049 | mutex_unlock(&mdsc->mutex); | ||
3050 | mutex_lock(&session->s_mutex); | ||
3051 | __close_session(mdsc, session); | ||
3052 | mutex_unlock(&session->s_mutex); | ||
3053 | ceph_put_mds_session(session); | ||
3054 | mutex_lock(&mdsc->mutex); | ||
3055 | n++; | ||
3056 | } | ||
3057 | if (n == 0) | ||
3058 | break; | ||
3059 | |||
3060 | if (client->mount_state == CEPH_MOUNT_SHUTDOWN) | ||
3061 | break; | ||
3062 | |||
3063 | dout("waiting for sessions to close\n"); | ||
3064 | mutex_unlock(&mdsc->mutex); | 3083 | mutex_unlock(&mdsc->mutex); |
3065 | wait_for_completion_timeout(&mdsc->session_close_waiters, | 3084 | mutex_lock(&session->s_mutex); |
3066 | timeout); | 3085 | __close_session(mdsc, session); |
3086 | mutex_unlock(&session->s_mutex); | ||
3087 | ceph_put_mds_session(session); | ||
3067 | mutex_lock(&mdsc->mutex); | 3088 | mutex_lock(&mdsc->mutex); |
3068 | } | 3089 | } |
3090 | mutex_unlock(&mdsc->mutex); | ||
3091 | |||
3092 | dout("waiting for sessions to close\n"); | ||
3093 | wait_event_timeout(mdsc->session_close_wq, done_closing_sessions(mdsc), | ||
3094 | timeout); | ||
3069 | 3095 | ||
3070 | /* tear down remaining sessions */ | 3096 | /* tear down remaining sessions */ |
3097 | mutex_lock(&mdsc->mutex); | ||
3071 | for (i = 0; i < mdsc->max_sessions; i++) { | 3098 | for (i = 0; i < mdsc->max_sessions; i++) { |
3072 | if (mdsc->sessions[i]) { | 3099 | if (mdsc->sessions[i]) { |
3073 | session = get_session(mdsc->sessions[i]); | 3100 | session = get_session(mdsc->sessions[i]); |
@@ -3080,9 +3107,7 @@ void ceph_mdsc_close_sessions(struct ceph_mds_client *mdsc) | |||
3080 | mutex_lock(&mdsc->mutex); | 3107 | mutex_lock(&mdsc->mutex); |
3081 | } | 3108 | } |
3082 | } | 3109 | } |
3083 | |||
3084 | WARN_ON(!list_empty(&mdsc->cap_delay_list)); | 3110 | WARN_ON(!list_empty(&mdsc->cap_delay_list)); |
3085 | |||
3086 | mutex_unlock(&mdsc->mutex); | 3111 | mutex_unlock(&mdsc->mutex); |
3087 | 3112 | ||
3088 | ceph_cleanup_empty_realms(mdsc); | 3113 | ceph_cleanup_empty_realms(mdsc); |
diff --git a/fs/ceph/mds_client.h b/fs/ceph/mds_client.h index ab7e89f5e344..c98267ce6d2a 100644 --- a/fs/ceph/mds_client.h +++ b/fs/ceph/mds_client.h | |||
@@ -234,7 +234,8 @@ struct ceph_mds_client { | |||
234 | struct mutex mutex; /* all nested structures */ | 234 | struct mutex mutex; /* all nested structures */ |
235 | 235 | ||
236 | struct ceph_mdsmap *mdsmap; | 236 | struct ceph_mdsmap *mdsmap; |
237 | struct completion safe_umount_waiters, session_close_waiters; | 237 | struct completion safe_umount_waiters; |
238 | wait_queue_head_t session_close_wq; | ||
238 | struct list_head waiting_for_map; | 239 | struct list_head waiting_for_map; |
239 | 240 | ||
240 | struct ceph_mds_session **sessions; /* NULL for mds if no session */ | 241 | struct ceph_mds_session **sessions; /* NULL for mds if no session */ |
diff --git a/fs/ceph/osd_client.c b/fs/ceph/osd_client.c index bed6391e52c7..dfced1dacbcd 100644 --- a/fs/ceph/osd_client.c +++ b/fs/ceph/osd_client.c | |||
@@ -661,7 +661,7 @@ static int __send_request(struct ceph_osd_client *osdc, | |||
661 | reqhead->reassert_version = req->r_reassert_version; | 661 | reqhead->reassert_version = req->r_reassert_version; |
662 | 662 | ||
663 | req->r_stamp = jiffies; | 663 | req->r_stamp = jiffies; |
664 | list_move_tail(&osdc->req_lru, &req->r_req_lru_item); | 664 | list_move_tail(&req->r_req_lru_item, &osdc->req_lru); |
665 | 665 | ||
666 | ceph_msg_get(req->r_request); /* send consumes a ref */ | 666 | ceph_msg_get(req->r_request); /* send consumes a ref */ |
667 | ceph_con_send(&req->r_osd->o_con, req->r_request); | 667 | ceph_con_send(&req->r_osd->o_con, req->r_request); |
diff --git a/fs/ceph/snap.c b/fs/ceph/snap.c index c0b26b6badba..4868b9dcac5a 100644 --- a/fs/ceph/snap.c +++ b/fs/ceph/snap.c | |||
@@ -435,7 +435,7 @@ void ceph_queue_cap_snap(struct ceph_inode_info *ci) | |||
435 | { | 435 | { |
436 | struct inode *inode = &ci->vfs_inode; | 436 | struct inode *inode = &ci->vfs_inode; |
437 | struct ceph_cap_snap *capsnap; | 437 | struct ceph_cap_snap *capsnap; |
438 | int used; | 438 | int used, dirty; |
439 | 439 | ||
440 | capsnap = kzalloc(sizeof(*capsnap), GFP_NOFS); | 440 | capsnap = kzalloc(sizeof(*capsnap), GFP_NOFS); |
441 | if (!capsnap) { | 441 | if (!capsnap) { |
@@ -445,6 +445,7 @@ void ceph_queue_cap_snap(struct ceph_inode_info *ci) | |||
445 | 445 | ||
446 | spin_lock(&inode->i_lock); | 446 | spin_lock(&inode->i_lock); |
447 | used = __ceph_caps_used(ci); | 447 | used = __ceph_caps_used(ci); |
448 | dirty = __ceph_caps_dirty(ci); | ||
448 | if (__ceph_have_pending_cap_snap(ci)) { | 449 | if (__ceph_have_pending_cap_snap(ci)) { |
449 | /* there is no point in queuing multiple "pending" cap_snaps, | 450 | /* there is no point in queuing multiple "pending" cap_snaps, |
450 | as no new writes are allowed to start when pending, so any | 451 | as no new writes are allowed to start when pending, so any |
@@ -452,11 +453,15 @@ void ceph_queue_cap_snap(struct ceph_inode_info *ci) | |||
452 | cap_snap. lucky us. */ | 453 | cap_snap. lucky us. */ |
453 | dout("queue_cap_snap %p already pending\n", inode); | 454 | dout("queue_cap_snap %p already pending\n", inode); |
454 | kfree(capsnap); | 455 | kfree(capsnap); |
455 | } else if (ci->i_wrbuffer_ref_head || (used & CEPH_CAP_FILE_WR)) { | 456 | } else if (ci->i_wrbuffer_ref_head || (used & CEPH_CAP_FILE_WR) || |
457 | (dirty & (CEPH_CAP_AUTH_EXCL|CEPH_CAP_XATTR_EXCL| | ||
458 | CEPH_CAP_FILE_EXCL|CEPH_CAP_FILE_WR))) { | ||
456 | struct ceph_snap_context *snapc = ci->i_head_snapc; | 459 | struct ceph_snap_context *snapc = ci->i_head_snapc; |
457 | 460 | ||
461 | dout("queue_cap_snap %p cap_snap %p queuing under %p\n", inode, | ||
462 | capsnap, snapc); | ||
458 | igrab(inode); | 463 | igrab(inode); |
459 | 464 | ||
460 | atomic_set(&capsnap->nref, 1); | 465 | atomic_set(&capsnap->nref, 1); |
461 | capsnap->ci = ci; | 466 | capsnap->ci = ci; |
462 | INIT_LIST_HEAD(&capsnap->ci_item); | 467 | INIT_LIST_HEAD(&capsnap->ci_item); |
@@ -464,15 +469,21 @@ void ceph_queue_cap_snap(struct ceph_inode_info *ci) | |||
464 | 469 | ||
465 | capsnap->follows = snapc->seq - 1; | 470 | capsnap->follows = snapc->seq - 1; |
466 | capsnap->issued = __ceph_caps_issued(ci, NULL); | 471 | capsnap->issued = __ceph_caps_issued(ci, NULL); |
467 | capsnap->dirty = __ceph_caps_dirty(ci); | 472 | capsnap->dirty = dirty; |
468 | 473 | ||
469 | capsnap->mode = inode->i_mode; | 474 | capsnap->mode = inode->i_mode; |
470 | capsnap->uid = inode->i_uid; | 475 | capsnap->uid = inode->i_uid; |
471 | capsnap->gid = inode->i_gid; | 476 | capsnap->gid = inode->i_gid; |
472 | 477 | ||
473 | /* fixme? */ | 478 | if (dirty & CEPH_CAP_XATTR_EXCL) { |
474 | capsnap->xattr_blob = NULL; | 479 | __ceph_build_xattrs_blob(ci); |
475 | capsnap->xattr_len = 0; | 480 | capsnap->xattr_blob = |
481 | ceph_buffer_get(ci->i_xattrs.blob); | ||
482 | capsnap->xattr_version = ci->i_xattrs.version; | ||
483 | } else { | ||
484 | capsnap->xattr_blob = NULL; | ||
485 | capsnap->xattr_version = 0; | ||
486 | } | ||
476 | 487 | ||
477 | /* dirty page count moved from _head to this cap_snap; | 488 | /* dirty page count moved from _head to this cap_snap; |
478 | all subsequent writes page dirties occur _after_ this | 489 | all subsequent writes page dirties occur _after_ this |
@@ -480,7 +491,9 @@ void ceph_queue_cap_snap(struct ceph_inode_info *ci) | |||
480 | capsnap->dirty_pages = ci->i_wrbuffer_ref_head; | 491 | capsnap->dirty_pages = ci->i_wrbuffer_ref_head; |
481 | ci->i_wrbuffer_ref_head = 0; | 492 | ci->i_wrbuffer_ref_head = 0; |
482 | capsnap->context = snapc; | 493 | capsnap->context = snapc; |
483 | ci->i_head_snapc = NULL; | 494 | ci->i_head_snapc = |
495 | ceph_get_snap_context(ci->i_snap_realm->cached_context); | ||
496 | dout(" new snapc is %p\n", ci->i_head_snapc); | ||
484 | list_add_tail(&capsnap->ci_item, &ci->i_cap_snaps); | 497 | list_add_tail(&capsnap->ci_item, &ci->i_cap_snaps); |
485 | 498 | ||
486 | if (used & CEPH_CAP_FILE_WR) { | 499 | if (used & CEPH_CAP_FILE_WR) { |
@@ -539,6 +552,41 @@ int __ceph_finish_cap_snap(struct ceph_inode_info *ci, | |||
539 | return 1; /* caller may want to ceph_flush_snaps */ | 552 | return 1; /* caller may want to ceph_flush_snaps */ |
540 | } | 553 | } |
541 | 554 | ||
555 | /* | ||
556 | * Queue cap_snaps for snap writeback for this realm and its children. | ||
557 | * Called under snap_rwsem, so realm topology won't change. | ||
558 | */ | ||
559 | static void queue_realm_cap_snaps(struct ceph_snap_realm *realm) | ||
560 | { | ||
561 | struct ceph_inode_info *ci; | ||
562 | struct inode *lastinode = NULL; | ||
563 | struct ceph_snap_realm *child; | ||
564 | |||
565 | dout("queue_realm_cap_snaps %p %llx inodes\n", realm, realm->ino); | ||
566 | |||
567 | spin_lock(&realm->inodes_with_caps_lock); | ||
568 | list_for_each_entry(ci, &realm->inodes_with_caps, | ||
569 | i_snap_realm_item) { | ||
570 | struct inode *inode = igrab(&ci->vfs_inode); | ||
571 | if (!inode) | ||
572 | continue; | ||
573 | spin_unlock(&realm->inodes_with_caps_lock); | ||
574 | if (lastinode) | ||
575 | iput(lastinode); | ||
576 | lastinode = inode; | ||
577 | ceph_queue_cap_snap(ci); | ||
578 | spin_lock(&realm->inodes_with_caps_lock); | ||
579 | } | ||
580 | spin_unlock(&realm->inodes_with_caps_lock); | ||
581 | if (lastinode) | ||
582 | iput(lastinode); | ||
583 | |||
584 | dout("queue_realm_cap_snaps %p %llx children\n", realm, realm->ino); | ||
585 | list_for_each_entry(child, &realm->children, child_item) | ||
586 | queue_realm_cap_snaps(child); | ||
587 | |||
588 | dout("queue_realm_cap_snaps %p %llx done\n", realm, realm->ino); | ||
589 | } | ||
542 | 590 | ||
543 | /* | 591 | /* |
544 | * Parse and apply a snapblob "snap trace" from the MDS. This specifies | 592 | * Parse and apply a snapblob "snap trace" from the MDS. This specifies |
@@ -589,29 +637,8 @@ more: | |||
589 | * | 637 | * |
590 | * ...unless it's a snap deletion! | 638 | * ...unless it's a snap deletion! |
591 | */ | 639 | */ |
592 | if (!deletion) { | 640 | if (!deletion) |
593 | struct ceph_inode_info *ci; | 641 | queue_realm_cap_snaps(realm); |
594 | struct inode *lastinode = NULL; | ||
595 | |||
596 | spin_lock(&realm->inodes_with_caps_lock); | ||
597 | list_for_each_entry(ci, &realm->inodes_with_caps, | ||
598 | i_snap_realm_item) { | ||
599 | struct inode *inode = igrab(&ci->vfs_inode); | ||
600 | if (!inode) | ||
601 | continue; | ||
602 | spin_unlock(&realm->inodes_with_caps_lock); | ||
603 | if (lastinode) | ||
604 | iput(lastinode); | ||
605 | lastinode = inode; | ||
606 | ceph_queue_cap_snap(ci); | ||
607 | spin_lock(&realm->inodes_with_caps_lock); | ||
608 | } | ||
609 | spin_unlock(&realm->inodes_with_caps_lock); | ||
610 | if (lastinode) | ||
611 | iput(lastinode); | ||
612 | dout("update_snap_trace cap_snaps queued\n"); | ||
613 | } | ||
614 | |||
615 | } else { | 642 | } else { |
616 | dout("update_snap_trace %llx %p seq %lld unchanged\n", | 643 | dout("update_snap_trace %llx %p seq %lld unchanged\n", |
617 | realm->ino, realm, realm->seq); | 644 | realm->ino, realm, realm->seq); |
diff --git a/fs/ceph/super.h b/fs/ceph/super.h index 2482d696f0de..c33897ae5725 100644 --- a/fs/ceph/super.h +++ b/fs/ceph/super.h | |||
@@ -216,8 +216,7 @@ struct ceph_cap_snap { | |||
216 | uid_t uid; | 216 | uid_t uid; |
217 | gid_t gid; | 217 | gid_t gid; |
218 | 218 | ||
219 | void *xattr_blob; | 219 | struct ceph_buffer *xattr_blob; |
220 | int xattr_len; | ||
221 | u64 xattr_version; | 220 | u64 xattr_version; |
222 | 221 | ||
223 | u64 size; | 222 | u64 size; |
@@ -229,8 +228,11 @@ struct ceph_cap_snap { | |||
229 | 228 | ||
230 | static inline void ceph_put_cap_snap(struct ceph_cap_snap *capsnap) | 229 | static inline void ceph_put_cap_snap(struct ceph_cap_snap *capsnap) |
231 | { | 230 | { |
232 | if (atomic_dec_and_test(&capsnap->nref)) | 231 | if (atomic_dec_and_test(&capsnap->nref)) { |
232 | if (capsnap->xattr_blob) | ||
233 | ceph_buffer_put(capsnap->xattr_blob); | ||
233 | kfree(capsnap); | 234 | kfree(capsnap); |
235 | } | ||
234 | } | 236 | } |
235 | 237 | ||
236 | /* | 238 | /* |
@@ -342,7 +344,8 @@ struct ceph_inode_info { | |||
342 | unsigned i_cap_exporting_issued; | 344 | unsigned i_cap_exporting_issued; |
343 | struct ceph_cap_reservation i_cap_migration_resv; | 345 | struct ceph_cap_reservation i_cap_migration_resv; |
344 | struct list_head i_cap_snaps; /* snapped state pending flush to mds */ | 346 | struct list_head i_cap_snaps; /* snapped state pending flush to mds */ |
345 | struct ceph_snap_context *i_head_snapc; /* set if wr_buffer_head > 0 */ | 347 | struct ceph_snap_context *i_head_snapc; /* set if wr_buffer_head > 0 or |
348 | dirty|flushing caps */ | ||
346 | unsigned i_snap_caps; /* cap bits for snapped files */ | 349 | unsigned i_snap_caps; /* cap bits for snapped files */ |
347 | 350 | ||
348 | int i_nr_by_mode[CEPH_FILE_MODE_NUM]; /* open file counts */ | 351 | int i_nr_by_mode[CEPH_FILE_MODE_NUM]; /* open file counts */ |
diff --git a/fs/ceph/xattr.c b/fs/ceph/xattr.c index 097a2654c00f..9578af610b73 100644 --- a/fs/ceph/xattr.c +++ b/fs/ceph/xattr.c | |||
@@ -485,6 +485,7 @@ void __ceph_build_xattrs_blob(struct ceph_inode_info *ci) | |||
485 | ci->i_xattrs.blob = ci->i_xattrs.prealloc_blob; | 485 | ci->i_xattrs.blob = ci->i_xattrs.prealloc_blob; |
486 | ci->i_xattrs.prealloc_blob = NULL; | 486 | ci->i_xattrs.prealloc_blob = NULL; |
487 | ci->i_xattrs.dirty = false; | 487 | ci->i_xattrs.dirty = false; |
488 | ci->i_xattrs.version++; | ||
488 | } | 489 | } |
489 | } | 490 | } |
490 | 491 | ||
diff --git a/fs/cifs/Kconfig b/fs/cifs/Kconfig index 917b7d449bb2..0da1debd499d 100644 --- a/fs/cifs/Kconfig +++ b/fs/cifs/Kconfig | |||
@@ -2,6 +2,8 @@ config CIFS | |||
2 | tristate "CIFS support (advanced network filesystem, SMBFS successor)" | 2 | tristate "CIFS support (advanced network filesystem, SMBFS successor)" |
3 | depends on INET | 3 | depends on INET |
4 | select NLS | 4 | select NLS |
5 | select CRYPTO_MD5 | ||
6 | select CRYPTO_ARC4 | ||
5 | help | 7 | help |
6 | This is the client VFS module for the Common Internet File System | 8 | This is the client VFS module for the Common Internet File System |
7 | (CIFS) protocol which is the successor to the Server Message Block | 9 | (CIFS) protocol which is the successor to the Server Message Block |
diff --git a/fs/cifs/README b/fs/cifs/README index a7081eeeb85d..7099a526f775 100644 --- a/fs/cifs/README +++ b/fs/cifs/README | |||
@@ -301,6 +301,16 @@ A partial list of the supported mount options follows: | |||
301 | gid Set the default gid for inodes (similar to above). | 301 | gid Set the default gid for inodes (similar to above). |
302 | file_mode If CIFS Unix extensions are not supported by the server | 302 | file_mode If CIFS Unix extensions are not supported by the server |
303 | this overrides the default mode for file inodes. | 303 | this overrides the default mode for file inodes. |
304 | fsc Enable local disk caching using FS-Cache (off by default). This | ||
305 | option could be useful to improve performance on a slow link, | ||
306 | heavily loaded server and/or network where reading from the | ||
307 | disk is faster than reading from the server (over the network). | ||
308 | This could also impact scalability positively as the | ||
309 | number of calls to the server are reduced. However, local | ||
310 | caching is not suitable for all workloads for e.g. read-once | ||
311 | type workloads. So, you need to consider carefully your | ||
312 | workload/scenario before using this option. Currently, local | ||
313 | disk caching is functional for CIFS files opened as read-only. | ||
304 | dir_mode If CIFS Unix extensions are not supported by the server | 314 | dir_mode If CIFS Unix extensions are not supported by the server |
305 | this overrides the default mode for directory inodes. | 315 | this overrides the default mode for directory inodes. |
306 | port attempt to contact the server on this tcp port, before | 316 | port attempt to contact the server on this tcp port, before |
diff --git a/fs/cifs/asn1.c b/fs/cifs/asn1.c index cfd1ce34e0bc..21f0fbd86989 100644 --- a/fs/cifs/asn1.c +++ b/fs/cifs/asn1.c | |||
@@ -597,13 +597,13 @@ decode_negTokenInit(unsigned char *security_blob, int length, | |||
597 | if (compare_oid(oid, oidlen, MSKRB5_OID, | 597 | if (compare_oid(oid, oidlen, MSKRB5_OID, |
598 | MSKRB5_OID_LEN)) | 598 | MSKRB5_OID_LEN)) |
599 | server->sec_mskerberos = true; | 599 | server->sec_mskerberos = true; |
600 | else if (compare_oid(oid, oidlen, KRB5U2U_OID, | 600 | if (compare_oid(oid, oidlen, KRB5U2U_OID, |
601 | KRB5U2U_OID_LEN)) | 601 | KRB5U2U_OID_LEN)) |
602 | server->sec_kerberosu2u = true; | 602 | server->sec_kerberosu2u = true; |
603 | else if (compare_oid(oid, oidlen, KRB5_OID, | 603 | if (compare_oid(oid, oidlen, KRB5_OID, |
604 | KRB5_OID_LEN)) | 604 | KRB5_OID_LEN)) |
605 | server->sec_kerberos = true; | 605 | server->sec_kerberos = true; |
606 | else if (compare_oid(oid, oidlen, NTLMSSP_OID, | 606 | if (compare_oid(oid, oidlen, NTLMSSP_OID, |
607 | NTLMSSP_OID_LEN)) | 607 | NTLMSSP_OID_LEN)) |
608 | server->sec_ntlmssp = true; | 608 | server->sec_ntlmssp = true; |
609 | 609 | ||
diff --git a/fs/cifs/cifs_unicode.h b/fs/cifs/cifs_unicode.h index 650638275a6f..7fe6b52df507 100644 --- a/fs/cifs/cifs_unicode.h +++ b/fs/cifs/cifs_unicode.h | |||
@@ -30,6 +30,8 @@ | |||
30 | * This is a compressed table of upper and lower case conversion. | 30 | * This is a compressed table of upper and lower case conversion. |
31 | * | 31 | * |
32 | */ | 32 | */ |
33 | #ifndef _CIFS_UNICODE_H | ||
34 | #define _CIFS_UNICODE_H | ||
33 | 35 | ||
34 | #include <asm/byteorder.h> | 36 | #include <asm/byteorder.h> |
35 | #include <linux/types.h> | 37 | #include <linux/types.h> |
@@ -67,8 +69,8 @@ extern const struct UniCaseRange CifsUniUpperRange[]; | |||
67 | #endif /* UNIUPR_NOUPPER */ | 69 | #endif /* UNIUPR_NOUPPER */ |
68 | 70 | ||
69 | #ifndef UNIUPR_NOLOWER | 71 | #ifndef UNIUPR_NOLOWER |
70 | extern signed char UniLowerTable[512]; | 72 | extern signed char CifsUniLowerTable[512]; |
71 | extern struct UniCaseRange UniLowerRange[]; | 73 | extern const struct UniCaseRange CifsUniLowerRange[]; |
72 | #endif /* UNIUPR_NOLOWER */ | 74 | #endif /* UNIUPR_NOLOWER */ |
73 | 75 | ||
74 | #ifdef __KERNEL__ | 76 | #ifdef __KERNEL__ |
@@ -337,15 +339,15 @@ UniStrupr(register wchar_t *upin) | |||
337 | * UniTolower: Convert a unicode character to lower case | 339 | * UniTolower: Convert a unicode character to lower case |
338 | */ | 340 | */ |
339 | static inline wchar_t | 341 | static inline wchar_t |
340 | UniTolower(wchar_t uc) | 342 | UniTolower(register wchar_t uc) |
341 | { | 343 | { |
342 | register struct UniCaseRange *rp; | 344 | register const struct UniCaseRange *rp; |
343 | 345 | ||
344 | if (uc < sizeof(UniLowerTable)) { | 346 | if (uc < sizeof(CifsUniLowerTable)) { |
345 | /* Latin characters */ | 347 | /* Latin characters */ |
346 | return uc + UniLowerTable[uc]; /* Use base tables */ | 348 | return uc + CifsUniLowerTable[uc]; /* Use base tables */ |
347 | } else { | 349 | } else { |
348 | rp = UniLowerRange; /* Use range tables */ | 350 | rp = CifsUniLowerRange; /* Use range tables */ |
349 | while (rp->start) { | 351 | while (rp->start) { |
350 | if (uc < rp->start) /* Before start of range */ | 352 | if (uc < rp->start) /* Before start of range */ |
351 | return uc; /* Uppercase = input */ | 353 | return uc; /* Uppercase = input */ |
@@ -374,3 +376,5 @@ UniStrlwr(register wchar_t *upin) | |||
374 | } | 376 | } |
375 | 377 | ||
376 | #endif | 378 | #endif |
379 | |||
380 | #endif /* _CIFS_UNICODE_H */ | ||
diff --git a/fs/cifs/cifs_uniupr.h b/fs/cifs/cifs_uniupr.h index 18a9d978e519..0ac7c5a8633a 100644 --- a/fs/cifs/cifs_uniupr.h +++ b/fs/cifs/cifs_uniupr.h | |||
@@ -140,7 +140,7 @@ const struct UniCaseRange CifsUniUpperRange[] = { | |||
140 | /* | 140 | /* |
141 | * Latin lower case | 141 | * Latin lower case |
142 | */ | 142 | */ |
143 | static signed char CifsUniLowerTable[512] = { | 143 | signed char CifsUniLowerTable[512] = { |
144 | 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, /* 000-00f */ | 144 | 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, /* 000-00f */ |
145 | 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, /* 010-01f */ | 145 | 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, /* 010-01f */ |
146 | 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, /* 020-02f */ | 146 | 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, /* 020-02f */ |
@@ -242,12 +242,12 @@ static signed char UniCaseRangeLff20[27] = { | |||
242 | /* | 242 | /* |
243 | * Lower Case Range | 243 | * Lower Case Range |
244 | */ | 244 | */ |
245 | static const struct UniCaseRange CifsUniLowerRange[] = { | 245 | const struct UniCaseRange CifsUniLowerRange[] = { |
246 | 0x0380, 0x03ab, UniCaseRangeL0380, | 246 | {0x0380, 0x03ab, UniCaseRangeL0380}, |
247 | 0x0400, 0x042f, UniCaseRangeL0400, | 247 | {0x0400, 0x042f, UniCaseRangeL0400}, |
248 | 0x0490, 0x04cb, UniCaseRangeL0490, | 248 | {0x0490, 0x04cb, UniCaseRangeL0490}, |
249 | 0x1e00, 0x1ff7, UniCaseRangeL1e00, | 249 | {0x1e00, 0x1ff7, UniCaseRangeL1e00}, |
250 | 0xff20, 0xff3a, UniCaseRangeLff20, | 250 | {0xff20, 0xff3a, UniCaseRangeLff20}, |
251 | 0, 0, 0 | 251 | {0} |
252 | }; | 252 | }; |
253 | #endif | 253 | #endif |
diff --git a/fs/cifs/cifsencrypt.c b/fs/cifs/cifsencrypt.c index 847628dfdc44..709f2296bdb4 100644 --- a/fs/cifs/cifsencrypt.c +++ b/fs/cifs/cifsencrypt.c | |||
@@ -27,6 +27,7 @@ | |||
27 | #include "md5.h" | 27 | #include "md5.h" |
28 | #include "cifs_unicode.h" | 28 | #include "cifs_unicode.h" |
29 | #include "cifsproto.h" | 29 | #include "cifsproto.h" |
30 | #include "ntlmssp.h" | ||
30 | #include <linux/ctype.h> | 31 | #include <linux/ctype.h> |
31 | #include <linux/random.h> | 32 | #include <linux/random.h> |
32 | 33 | ||
@@ -42,21 +43,43 @@ extern void SMBencrypt(unsigned char *passwd, const unsigned char *c8, | |||
42 | unsigned char *p24); | 43 | unsigned char *p24); |
43 | 44 | ||
44 | static int cifs_calculate_signature(const struct smb_hdr *cifs_pdu, | 45 | static int cifs_calculate_signature(const struct smb_hdr *cifs_pdu, |
45 | const struct mac_key *key, char *signature) | 46 | struct TCP_Server_Info *server, char *signature) |
46 | { | 47 | { |
47 | struct MD5Context context; | 48 | int rc; |
48 | 49 | ||
49 | if ((cifs_pdu == NULL) || (signature == NULL) || (key == NULL)) | 50 | if (cifs_pdu == NULL || server == NULL || signature == NULL) |
50 | return -EINVAL; | 51 | return -EINVAL; |
51 | 52 | ||
52 | cifs_MD5_init(&context); | 53 | if (!server->ntlmssp.sdescmd5) { |
53 | cifs_MD5_update(&context, (char *)&key->data, key->len); | 54 | cERROR(1, |
54 | cifs_MD5_update(&context, cifs_pdu->Protocol, cifs_pdu->smb_buf_length); | 55 | "cifs_calculate_signature: can't generate signature\n"); |
56 | return -1; | ||
57 | } | ||
55 | 58 | ||
56 | cifs_MD5_final(signature, &context); | 59 | rc = crypto_shash_init(&server->ntlmssp.sdescmd5->shash); |
57 | return 0; | 60 | if (rc) { |
61 | cERROR(1, "cifs_calculate_signature: oould not init md5\n"); | ||
62 | return rc; | ||
63 | } | ||
64 | |||
65 | if (server->secType == RawNTLMSSP) | ||
66 | crypto_shash_update(&server->ntlmssp.sdescmd5->shash, | ||
67 | server->session_key.data.ntlmv2.key, | ||
68 | CIFS_NTLMV2_SESSKEY_SIZE); | ||
69 | else | ||
70 | crypto_shash_update(&server->ntlmssp.sdescmd5->shash, | ||
71 | (char *)&server->session_key.data, | ||
72 | server->session_key.len); | ||
73 | |||
74 | crypto_shash_update(&server->ntlmssp.sdescmd5->shash, | ||
75 | cifs_pdu->Protocol, cifs_pdu->smb_buf_length); | ||
76 | |||
77 | rc = crypto_shash_final(&server->ntlmssp.sdescmd5->shash, signature); | ||
78 | |||
79 | return rc; | ||
58 | } | 80 | } |
59 | 81 | ||
82 | |||
60 | int cifs_sign_smb(struct smb_hdr *cifs_pdu, struct TCP_Server_Info *server, | 83 | int cifs_sign_smb(struct smb_hdr *cifs_pdu, struct TCP_Server_Info *server, |
61 | __u32 *pexpected_response_sequence_number) | 84 | __u32 *pexpected_response_sequence_number) |
62 | { | 85 | { |
@@ -78,8 +101,7 @@ int cifs_sign_smb(struct smb_hdr *cifs_pdu, struct TCP_Server_Info *server, | |||
78 | server->sequence_number++; | 101 | server->sequence_number++; |
79 | spin_unlock(&GlobalMid_Lock); | 102 | spin_unlock(&GlobalMid_Lock); |
80 | 103 | ||
81 | rc = cifs_calculate_signature(cifs_pdu, &server->mac_signing_key, | 104 | rc = cifs_calculate_signature(cifs_pdu, server, smb_signature); |
82 | smb_signature); | ||
83 | if (rc) | 105 | if (rc) |
84 | memset(cifs_pdu->Signature.SecuritySignature, 0, 8); | 106 | memset(cifs_pdu->Signature.SecuritySignature, 0, 8); |
85 | else | 107 | else |
@@ -89,21 +111,39 @@ int cifs_sign_smb(struct smb_hdr *cifs_pdu, struct TCP_Server_Info *server, | |||
89 | } | 111 | } |
90 | 112 | ||
91 | static int cifs_calc_signature2(const struct kvec *iov, int n_vec, | 113 | static int cifs_calc_signature2(const struct kvec *iov, int n_vec, |
92 | const struct mac_key *key, char *signature) | 114 | struct TCP_Server_Info *server, char *signature) |
93 | { | 115 | { |
94 | struct MD5Context context; | ||
95 | int i; | 116 | int i; |
117 | int rc; | ||
96 | 118 | ||
97 | if ((iov == NULL) || (signature == NULL) || (key == NULL)) | 119 | if (iov == NULL || server == NULL || signature == NULL) |
98 | return -EINVAL; | 120 | return -EINVAL; |
99 | 121 | ||
100 | cifs_MD5_init(&context); | 122 | if (!server->ntlmssp.sdescmd5) { |
101 | cifs_MD5_update(&context, (char *)&key->data, key->len); | 123 | cERROR(1, "cifs_calc_signature2: can't generate signature\n"); |
124 | return -1; | ||
125 | } | ||
126 | |||
127 | rc = crypto_shash_init(&server->ntlmssp.sdescmd5->shash); | ||
128 | if (rc) { | ||
129 | cERROR(1, "cifs_calc_signature2: oould not init md5\n"); | ||
130 | return rc; | ||
131 | } | ||
132 | |||
133 | if (server->secType == RawNTLMSSP) | ||
134 | crypto_shash_update(&server->ntlmssp.sdescmd5->shash, | ||
135 | server->session_key.data.ntlmv2.key, | ||
136 | CIFS_NTLMV2_SESSKEY_SIZE); | ||
137 | else | ||
138 | crypto_shash_update(&server->ntlmssp.sdescmd5->shash, | ||
139 | (char *)&server->session_key.data, | ||
140 | server->session_key.len); | ||
141 | |||
102 | for (i = 0; i < n_vec; i++) { | 142 | for (i = 0; i < n_vec; i++) { |
103 | if (iov[i].iov_len == 0) | 143 | if (iov[i].iov_len == 0) |
104 | continue; | 144 | continue; |
105 | if (iov[i].iov_base == NULL) { | 145 | if (iov[i].iov_base == NULL) { |
106 | cERROR(1, "null iovec entry"); | 146 | cERROR(1, "cifs_calc_signature2: null iovec entry"); |
107 | return -EIO; | 147 | return -EIO; |
108 | } | 148 | } |
109 | /* The first entry includes a length field (which does not get | 149 | /* The first entry includes a length field (which does not get |
@@ -111,18 +151,18 @@ static int cifs_calc_signature2(const struct kvec *iov, int n_vec, | |||
111 | if (i == 0) { | 151 | if (i == 0) { |
112 | if (iov[0].iov_len <= 8) /* cmd field at offset 9 */ | 152 | if (iov[0].iov_len <= 8) /* cmd field at offset 9 */ |
113 | break; /* nothing to sign or corrupt header */ | 153 | break; /* nothing to sign or corrupt header */ |
114 | cifs_MD5_update(&context, iov[0].iov_base+4, | 154 | crypto_shash_update(&server->ntlmssp.sdescmd5->shash, |
115 | iov[0].iov_len-4); | 155 | iov[i].iov_base + 4, iov[i].iov_len - 4); |
116 | } else | 156 | } else |
117 | cifs_MD5_update(&context, iov[i].iov_base, iov[i].iov_len); | 157 | crypto_shash_update(&server->ntlmssp.sdescmd5->shash, |
158 | iov[i].iov_base, iov[i].iov_len); | ||
118 | } | 159 | } |
119 | 160 | ||
120 | cifs_MD5_final(signature, &context); | 161 | rc = crypto_shash_final(&server->ntlmssp.sdescmd5->shash, signature); |
121 | 162 | ||
122 | return 0; | 163 | return rc; |
123 | } | 164 | } |
124 | 165 | ||
125 | |||
126 | int cifs_sign_smb2(struct kvec *iov, int n_vec, struct TCP_Server_Info *server, | 166 | int cifs_sign_smb2(struct kvec *iov, int n_vec, struct TCP_Server_Info *server, |
127 | __u32 *pexpected_response_sequence_number) | 167 | __u32 *pexpected_response_sequence_number) |
128 | { | 168 | { |
@@ -145,8 +185,7 @@ int cifs_sign_smb2(struct kvec *iov, int n_vec, struct TCP_Server_Info *server, | |||
145 | server->sequence_number++; | 185 | server->sequence_number++; |
146 | spin_unlock(&GlobalMid_Lock); | 186 | spin_unlock(&GlobalMid_Lock); |
147 | 187 | ||
148 | rc = cifs_calc_signature2(iov, n_vec, &server->mac_signing_key, | 188 | rc = cifs_calc_signature2(iov, n_vec, server, smb_signature); |
149 | smb_signature); | ||
150 | if (rc) | 189 | if (rc) |
151 | memset(cifs_pdu->Signature.SecuritySignature, 0, 8); | 190 | memset(cifs_pdu->Signature.SecuritySignature, 0, 8); |
152 | else | 191 | else |
@@ -156,14 +195,14 @@ int cifs_sign_smb2(struct kvec *iov, int n_vec, struct TCP_Server_Info *server, | |||
156 | } | 195 | } |
157 | 196 | ||
158 | int cifs_verify_signature(struct smb_hdr *cifs_pdu, | 197 | int cifs_verify_signature(struct smb_hdr *cifs_pdu, |
159 | const struct mac_key *mac_key, | 198 | struct TCP_Server_Info *server, |
160 | __u32 expected_sequence_number) | 199 | __u32 expected_sequence_number) |
161 | { | 200 | { |
162 | unsigned int rc; | 201 | int rc; |
163 | char server_response_sig[8]; | 202 | char server_response_sig[8]; |
164 | char what_we_think_sig_should_be[20]; | 203 | char what_we_think_sig_should_be[20]; |
165 | 204 | ||
166 | if ((cifs_pdu == NULL) || (mac_key == NULL)) | 205 | if (cifs_pdu == NULL || server == NULL) |
167 | return -EINVAL; | 206 | return -EINVAL; |
168 | 207 | ||
169 | if (cifs_pdu->Command == SMB_COM_NEGOTIATE) | 208 | if (cifs_pdu->Command == SMB_COM_NEGOTIATE) |
@@ -192,7 +231,7 @@ int cifs_verify_signature(struct smb_hdr *cifs_pdu, | |||
192 | cpu_to_le32(expected_sequence_number); | 231 | cpu_to_le32(expected_sequence_number); |
193 | cifs_pdu->Signature.Sequence.Reserved = 0; | 232 | cifs_pdu->Signature.Sequence.Reserved = 0; |
194 | 233 | ||
195 | rc = cifs_calculate_signature(cifs_pdu, mac_key, | 234 | rc = cifs_calculate_signature(cifs_pdu, server, |
196 | what_we_think_sig_should_be); | 235 | what_we_think_sig_should_be); |
197 | 236 | ||
198 | if (rc) | 237 | if (rc) |
@@ -209,7 +248,7 @@ int cifs_verify_signature(struct smb_hdr *cifs_pdu, | |||
209 | } | 248 | } |
210 | 249 | ||
211 | /* We fill in key by putting in 40 byte array which was allocated by caller */ | 250 | /* We fill in key by putting in 40 byte array which was allocated by caller */ |
212 | int cifs_calculate_mac_key(struct mac_key *key, const char *rn, | 251 | int cifs_calculate_session_key(struct session_key *key, const char *rn, |
213 | const char *password) | 252 | const char *password) |
214 | { | 253 | { |
215 | char temp_key[16]; | 254 | char temp_key[16]; |
@@ -223,63 +262,6 @@ int cifs_calculate_mac_key(struct mac_key *key, const char *rn, | |||
223 | return 0; | 262 | return 0; |
224 | } | 263 | } |
225 | 264 | ||
226 | int CalcNTLMv2_partial_mac_key(struct cifsSesInfo *ses, | ||
227 | const struct nls_table *nls_info) | ||
228 | { | ||
229 | char temp_hash[16]; | ||
230 | struct HMACMD5Context ctx; | ||
231 | char *ucase_buf; | ||
232 | __le16 *unicode_buf; | ||
233 | unsigned int i, user_name_len, dom_name_len; | ||
234 | |||
235 | if (ses == NULL) | ||
236 | return -EINVAL; | ||
237 | |||
238 | E_md4hash(ses->password, temp_hash); | ||
239 | |||
240 | hmac_md5_init_limK_to_64(temp_hash, 16, &ctx); | ||
241 | user_name_len = strlen(ses->userName); | ||
242 | if (user_name_len > MAX_USERNAME_SIZE) | ||
243 | return -EINVAL; | ||
244 | if (ses->domainName == NULL) | ||
245 | return -EINVAL; /* BB should we use CIFS_LINUX_DOM */ | ||
246 | dom_name_len = strlen(ses->domainName); | ||
247 | if (dom_name_len > MAX_USERNAME_SIZE) | ||
248 | return -EINVAL; | ||
249 | |||
250 | ucase_buf = kmalloc((MAX_USERNAME_SIZE+1), GFP_KERNEL); | ||
251 | if (ucase_buf == NULL) | ||
252 | return -ENOMEM; | ||
253 | unicode_buf = kmalloc((MAX_USERNAME_SIZE+1)*4, GFP_KERNEL); | ||
254 | if (unicode_buf == NULL) { | ||
255 | kfree(ucase_buf); | ||
256 | return -ENOMEM; | ||
257 | } | ||
258 | |||
259 | for (i = 0; i < user_name_len; i++) | ||
260 | ucase_buf[i] = nls_info->charset2upper[(int)ses->userName[i]]; | ||
261 | ucase_buf[i] = 0; | ||
262 | user_name_len = cifs_strtoUCS(unicode_buf, ucase_buf, | ||
263 | MAX_USERNAME_SIZE*2, nls_info); | ||
264 | unicode_buf[user_name_len] = 0; | ||
265 | user_name_len++; | ||
266 | |||
267 | for (i = 0; i < dom_name_len; i++) | ||
268 | ucase_buf[i] = nls_info->charset2upper[(int)ses->domainName[i]]; | ||
269 | ucase_buf[i] = 0; | ||
270 | dom_name_len = cifs_strtoUCS(unicode_buf+user_name_len, ucase_buf, | ||
271 | MAX_USERNAME_SIZE*2, nls_info); | ||
272 | |||
273 | unicode_buf[user_name_len + dom_name_len] = 0; | ||
274 | hmac_md5_update((const unsigned char *) unicode_buf, | ||
275 | (user_name_len+dom_name_len)*2, &ctx); | ||
276 | |||
277 | hmac_md5_final(ses->server->ntlmv2_hash, &ctx); | ||
278 | kfree(ucase_buf); | ||
279 | kfree(unicode_buf); | ||
280 | return 0; | ||
281 | } | ||
282 | |||
283 | #ifdef CONFIG_CIFS_WEAK_PW_HASH | 265 | #ifdef CONFIG_CIFS_WEAK_PW_HASH |
284 | void calc_lanman_hash(const char *password, const char *cryptkey, bool encrypt, | 266 | void calc_lanman_hash(const char *password, const char *cryptkey, bool encrypt, |
285 | char *lnm_session_key) | 267 | char *lnm_session_key) |
@@ -324,38 +306,52 @@ static int calc_ntlmv2_hash(struct cifsSesInfo *ses, | |||
324 | { | 306 | { |
325 | int rc = 0; | 307 | int rc = 0; |
326 | int len; | 308 | int len; |
327 | char nt_hash[16]; | 309 | char nt_hash[CIFS_NTHASH_SIZE]; |
328 | struct HMACMD5Context *pctxt; | ||
329 | wchar_t *user; | 310 | wchar_t *user; |
330 | wchar_t *domain; | 311 | wchar_t *domain; |
312 | wchar_t *server; | ||
331 | 313 | ||
332 | pctxt = kmalloc(sizeof(struct HMACMD5Context), GFP_KERNEL); | 314 | if (!ses->server->ntlmssp.sdeschmacmd5) { |
333 | 315 | cERROR(1, "calc_ntlmv2_hash: can't generate ntlmv2 hash\n"); | |
334 | if (pctxt == NULL) | 316 | return -1; |
335 | return -ENOMEM; | 317 | } |
336 | 318 | ||
337 | /* calculate md4 hash of password */ | 319 | /* calculate md4 hash of password */ |
338 | E_md4hash(ses->password, nt_hash); | 320 | E_md4hash(ses->password, nt_hash); |
339 | 321 | ||
340 | /* convert Domainname to unicode and uppercase */ | 322 | crypto_shash_setkey(ses->server->ntlmssp.hmacmd5, nt_hash, |
341 | hmac_md5_init_limK_to_64(nt_hash, 16, pctxt); | 323 | CIFS_NTHASH_SIZE); |
324 | |||
325 | rc = crypto_shash_init(&ses->server->ntlmssp.sdeschmacmd5->shash); | ||
326 | if (rc) { | ||
327 | cERROR(1, "calc_ntlmv2_hash: could not init hmacmd5\n"); | ||
328 | return rc; | ||
329 | } | ||
342 | 330 | ||
343 | /* convert ses->userName to unicode and uppercase */ | 331 | /* convert ses->userName to unicode and uppercase */ |
344 | len = strlen(ses->userName); | 332 | len = strlen(ses->userName); |
345 | user = kmalloc(2 + (len * 2), GFP_KERNEL); | 333 | user = kmalloc(2 + (len * 2), GFP_KERNEL); |
346 | if (user == NULL) | 334 | if (user == NULL) { |
335 | cERROR(1, "calc_ntlmv2_hash: user mem alloc failure\n"); | ||
336 | rc = -ENOMEM; | ||
347 | goto calc_exit_2; | 337 | goto calc_exit_2; |
338 | } | ||
348 | len = cifs_strtoUCS((__le16 *)user, ses->userName, len, nls_cp); | 339 | len = cifs_strtoUCS((__le16 *)user, ses->userName, len, nls_cp); |
349 | UniStrupr(user); | 340 | UniStrupr(user); |
350 | hmac_md5_update((char *)user, 2*len, pctxt); | 341 | |
342 | crypto_shash_update(&ses->server->ntlmssp.sdeschmacmd5->shash, | ||
343 | (char *)user, 2 * len); | ||
351 | 344 | ||
352 | /* convert ses->domainName to unicode and uppercase */ | 345 | /* convert ses->domainName to unicode and uppercase */ |
353 | if (ses->domainName) { | 346 | if (ses->domainName) { |
354 | len = strlen(ses->domainName); | 347 | len = strlen(ses->domainName); |
355 | 348 | ||
356 | domain = kmalloc(2 + (len * 2), GFP_KERNEL); | 349 | domain = kmalloc(2 + (len * 2), GFP_KERNEL); |
357 | if (domain == NULL) | 350 | if (domain == NULL) { |
351 | cERROR(1, "calc_ntlmv2_hash: domain mem alloc failure"); | ||
352 | rc = -ENOMEM; | ||
358 | goto calc_exit_1; | 353 | goto calc_exit_1; |
354 | } | ||
359 | len = cifs_strtoUCS((__le16 *)domain, ses->domainName, len, | 355 | len = cifs_strtoUCS((__le16 *)domain, ses->domainName, len, |
360 | nls_cp); | 356 | nls_cp); |
361 | /* the following line was removed since it didn't work well | 357 | /* the following line was removed since it didn't work well |
@@ -363,65 +359,292 @@ static int calc_ntlmv2_hash(struct cifsSesInfo *ses, | |||
363 | Maybe converting the domain name earlier makes sense */ | 359 | Maybe converting the domain name earlier makes sense */ |
364 | /* UniStrupr(domain); */ | 360 | /* UniStrupr(domain); */ |
365 | 361 | ||
366 | hmac_md5_update((char *)domain, 2*len, pctxt); | 362 | crypto_shash_update(&ses->server->ntlmssp.sdeschmacmd5->shash, |
363 | (char *)domain, 2 * len); | ||
367 | 364 | ||
368 | kfree(domain); | 365 | kfree(domain); |
366 | } else if (ses->serverName) { | ||
367 | len = strlen(ses->serverName); | ||
368 | |||
369 | server = kmalloc(2 + (len * 2), GFP_KERNEL); | ||
370 | if (server == NULL) { | ||
371 | cERROR(1, "calc_ntlmv2_hash: server mem alloc failure"); | ||
372 | rc = -ENOMEM; | ||
373 | goto calc_exit_1; | ||
374 | } | ||
375 | len = cifs_strtoUCS((__le16 *)server, ses->serverName, len, | ||
376 | nls_cp); | ||
377 | /* the following line was removed since it didn't work well | ||
378 | with lower cased domain name that passed as an option. | ||
379 | Maybe converting the domain name earlier makes sense */ | ||
380 | /* UniStrupr(domain); */ | ||
381 | |||
382 | crypto_shash_update(&ses->server->ntlmssp.sdeschmacmd5->shash, | ||
383 | (char *)server, 2 * len); | ||
384 | |||
385 | kfree(server); | ||
369 | } | 386 | } |
387 | |||
388 | rc = crypto_shash_final(&ses->server->ntlmssp.sdeschmacmd5->shash, | ||
389 | ses->server->ntlmv2_hash); | ||
390 | |||
370 | calc_exit_1: | 391 | calc_exit_1: |
371 | kfree(user); | 392 | kfree(user); |
372 | calc_exit_2: | 393 | calc_exit_2: |
373 | /* BB FIXME what about bytes 24 through 40 of the signing key? | 394 | /* BB FIXME what about bytes 24 through 40 of the signing key? |
374 | compare with the NTLM example */ | 395 | compare with the NTLM example */ |
375 | hmac_md5_final(ses->server->ntlmv2_hash, pctxt); | ||
376 | 396 | ||
377 | kfree(pctxt); | ||
378 | return rc; | 397 | return rc; |
379 | } | 398 | } |
380 | 399 | ||
381 | void setup_ntlmv2_rsp(struct cifsSesInfo *ses, char *resp_buf, | 400 | static int |
382 | const struct nls_table *nls_cp) | 401 | find_domain_name(struct cifsSesInfo *ses) |
402 | { | ||
403 | int rc = 0; | ||
404 | unsigned int attrsize; | ||
405 | unsigned int type; | ||
406 | unsigned char *blobptr; | ||
407 | struct ntlmssp2_name *attrptr; | ||
408 | |||
409 | if (ses->server->tiblob) { | ||
410 | blobptr = ses->server->tiblob; | ||
411 | attrptr = (struct ntlmssp2_name *) blobptr; | ||
412 | |||
413 | while ((type = attrptr->type) != 0) { | ||
414 | blobptr += 2; /* advance attr type */ | ||
415 | attrsize = attrptr->length; | ||
416 | blobptr += 2; /* advance attr size */ | ||
417 | if (type == NTLMSSP_AV_NB_DOMAIN_NAME) { | ||
418 | if (!ses->domainName) { | ||
419 | ses->domainName = | ||
420 | kmalloc(attrptr->length + 1, | ||
421 | GFP_KERNEL); | ||
422 | if (!ses->domainName) | ||
423 | return -ENOMEM; | ||
424 | cifs_from_ucs2(ses->domainName, | ||
425 | (__le16 *)blobptr, | ||
426 | attrptr->length, | ||
427 | attrptr->length, | ||
428 | load_nls_default(), false); | ||
429 | } | ||
430 | } | ||
431 | blobptr += attrsize; /* advance attr value */ | ||
432 | attrptr = (struct ntlmssp2_name *) blobptr; | ||
433 | } | ||
434 | } else { | ||
435 | ses->server->tilen = 2 * sizeof(struct ntlmssp2_name); | ||
436 | ses->server->tiblob = kmalloc(ses->server->tilen, GFP_KERNEL); | ||
437 | if (!ses->server->tiblob) { | ||
438 | ses->server->tilen = 0; | ||
439 | cERROR(1, "Challenge target info allocation failure"); | ||
440 | return -ENOMEM; | ||
441 | } | ||
442 | memset(ses->server->tiblob, 0x0, ses->server->tilen); | ||
443 | attrptr = (struct ntlmssp2_name *) ses->server->tiblob; | ||
444 | attrptr->type = cpu_to_le16(NTLMSSP_DOMAIN_TYPE); | ||
445 | } | ||
446 | |||
447 | return rc; | ||
448 | } | ||
449 | |||
450 | static int | ||
451 | CalcNTLMv2_response(const struct TCP_Server_Info *server, | ||
452 | char *v2_session_response) | ||
383 | { | 453 | { |
384 | int rc; | 454 | int rc; |
455 | |||
456 | if (!server->ntlmssp.sdeschmacmd5) { | ||
457 | cERROR(1, "calc_ntlmv2_hash: can't generate ntlmv2 hash\n"); | ||
458 | return -1; | ||
459 | } | ||
460 | |||
461 | crypto_shash_setkey(server->ntlmssp.hmacmd5, server->ntlmv2_hash, | ||
462 | CIFS_HMAC_MD5_HASH_SIZE); | ||
463 | |||
464 | rc = crypto_shash_init(&server->ntlmssp.sdeschmacmd5->shash); | ||
465 | if (rc) { | ||
466 | cERROR(1, "CalcNTLMv2_response: could not init hmacmd5"); | ||
467 | return rc; | ||
468 | } | ||
469 | |||
470 | memcpy(v2_session_response + CIFS_SERVER_CHALLENGE_SIZE, | ||
471 | server->cryptKey, CIFS_SERVER_CHALLENGE_SIZE); | ||
472 | crypto_shash_update(&server->ntlmssp.sdeschmacmd5->shash, | ||
473 | v2_session_response + CIFS_SERVER_CHALLENGE_SIZE, | ||
474 | sizeof(struct ntlmv2_resp) - CIFS_SERVER_CHALLENGE_SIZE); | ||
475 | |||
476 | if (server->tilen) | ||
477 | crypto_shash_update(&server->ntlmssp.sdeschmacmd5->shash, | ||
478 | server->tiblob, server->tilen); | ||
479 | |||
480 | rc = crypto_shash_final(&server->ntlmssp.sdeschmacmd5->shash, | ||
481 | v2_session_response); | ||
482 | |||
483 | return rc; | ||
484 | } | ||
485 | |||
486 | int | ||
487 | setup_ntlmv2_rsp(struct cifsSesInfo *ses, char *resp_buf, | ||
488 | const struct nls_table *nls_cp) | ||
489 | { | ||
490 | int rc = 0; | ||
385 | struct ntlmv2_resp *buf = (struct ntlmv2_resp *)resp_buf; | 491 | struct ntlmv2_resp *buf = (struct ntlmv2_resp *)resp_buf; |
386 | struct HMACMD5Context context; | ||
387 | 492 | ||
388 | buf->blob_signature = cpu_to_le32(0x00000101); | 493 | buf->blob_signature = cpu_to_le32(0x00000101); |
389 | buf->reserved = 0; | 494 | buf->reserved = 0; |
390 | buf->time = cpu_to_le64(cifs_UnixTimeToNT(CURRENT_TIME)); | 495 | buf->time = cpu_to_le64(cifs_UnixTimeToNT(CURRENT_TIME)); |
391 | get_random_bytes(&buf->client_chal, sizeof(buf->client_chal)); | 496 | get_random_bytes(&buf->client_chal, sizeof(buf->client_chal)); |
392 | buf->reserved2 = 0; | 497 | buf->reserved2 = 0; |
393 | buf->names[0].type = cpu_to_le16(NTLMSSP_DOMAIN_TYPE); | 498 | |
394 | buf->names[0].length = 0; | 499 | if (!ses->domainName) { |
395 | buf->names[1].type = 0; | 500 | rc = find_domain_name(ses); |
396 | buf->names[1].length = 0; | 501 | if (rc) { |
502 | cERROR(1, "could not get domain/server name rc %d", rc); | ||
503 | return rc; | ||
504 | } | ||
505 | } | ||
397 | 506 | ||
398 | /* calculate buf->ntlmv2_hash */ | 507 | /* calculate buf->ntlmv2_hash */ |
399 | rc = calc_ntlmv2_hash(ses, nls_cp); | 508 | rc = calc_ntlmv2_hash(ses, nls_cp); |
400 | if (rc) | 509 | if (rc) { |
401 | cERROR(1, "could not get v2 hash rc %d", rc); | 510 | cERROR(1, "could not get v2 hash rc %d", rc); |
402 | CalcNTLMv2_response(ses, resp_buf); | 511 | return rc; |
512 | } | ||
513 | rc = CalcNTLMv2_response(ses->server, resp_buf); | ||
514 | if (rc) { | ||
515 | cERROR(1, "could not get v2 hash rc %d", rc); | ||
516 | return rc; | ||
517 | } | ||
403 | 518 | ||
404 | /* now calculate the MAC key for NTLMv2 */ | 519 | if (!ses->server->ntlmssp.sdeschmacmd5) { |
405 | hmac_md5_init_limK_to_64(ses->server->ntlmv2_hash, 16, &context); | 520 | cERROR(1, "calc_ntlmv2_hash: can't generate ntlmv2 hash\n"); |
406 | hmac_md5_update(resp_buf, 16, &context); | 521 | return -1; |
407 | hmac_md5_final(ses->server->mac_signing_key.data.ntlmv2.key, &context); | 522 | } |
408 | 523 | ||
409 | memcpy(&ses->server->mac_signing_key.data.ntlmv2.resp, resp_buf, | 524 | crypto_shash_setkey(ses->server->ntlmssp.hmacmd5, |
410 | sizeof(struct ntlmv2_resp)); | 525 | ses->server->ntlmv2_hash, CIFS_HMAC_MD5_HASH_SIZE); |
411 | ses->server->mac_signing_key.len = 16 + sizeof(struct ntlmv2_resp); | 526 | |
527 | rc = crypto_shash_init(&ses->server->ntlmssp.sdeschmacmd5->shash); | ||
528 | if (rc) { | ||
529 | cERROR(1, "setup_ntlmv2_rsp: could not init hmacmd5\n"); | ||
530 | return rc; | ||
531 | } | ||
532 | |||
533 | crypto_shash_update(&ses->server->ntlmssp.sdeschmacmd5->shash, | ||
534 | resp_buf, CIFS_HMAC_MD5_HASH_SIZE); | ||
535 | |||
536 | rc = crypto_shash_final(&ses->server->ntlmssp.sdeschmacmd5->shash, | ||
537 | ses->server->session_key.data.ntlmv2.key); | ||
538 | |||
539 | memcpy(&ses->server->session_key.data.ntlmv2.resp, resp_buf, | ||
540 | sizeof(struct ntlmv2_resp)); | ||
541 | ses->server->session_key.len = 16 + sizeof(struct ntlmv2_resp); | ||
542 | |||
543 | return rc; | ||
412 | } | 544 | } |
413 | 545 | ||
414 | void CalcNTLMv2_response(const struct cifsSesInfo *ses, | 546 | int |
415 | char *v2_session_response) | 547 | calc_seckey(struct TCP_Server_Info *server) |
416 | { | 548 | { |
417 | struct HMACMD5Context context; | 549 | int rc; |
418 | /* rest of v2 struct already generated */ | 550 | unsigned char sec_key[CIFS_NTLMV2_SESSKEY_SIZE]; |
419 | memcpy(v2_session_response + 8, ses->server->cryptKey, 8); | 551 | struct crypto_blkcipher *tfm_arc4; |
420 | hmac_md5_init_limK_to_64(ses->server->ntlmv2_hash, 16, &context); | 552 | struct scatterlist sgin, sgout; |
553 | struct blkcipher_desc desc; | ||
554 | |||
555 | get_random_bytes(sec_key, CIFS_NTLMV2_SESSKEY_SIZE); | ||
556 | |||
557 | tfm_arc4 = crypto_alloc_blkcipher("ecb(arc4)", | ||
558 | 0, CRYPTO_ALG_ASYNC); | ||
559 | if (!tfm_arc4 || IS_ERR(tfm_arc4)) { | ||
560 | cERROR(1, "could not allocate " "master crypto API arc4\n"); | ||
561 | return 1; | ||
562 | } | ||
563 | |||
564 | desc.tfm = tfm_arc4; | ||
565 | |||
566 | crypto_blkcipher_setkey(tfm_arc4, | ||
567 | server->session_key.data.ntlmv2.key, CIFS_CPHTXT_SIZE); | ||
568 | sg_init_one(&sgin, sec_key, CIFS_CPHTXT_SIZE); | ||
569 | sg_init_one(&sgout, server->ntlmssp.ciphertext, CIFS_CPHTXT_SIZE); | ||
570 | rc = crypto_blkcipher_encrypt(&desc, &sgout, &sgin, CIFS_CPHTXT_SIZE); | ||
421 | 571 | ||
422 | hmac_md5_update(v2_session_response+8, | 572 | if (!rc) |
423 | sizeof(struct ntlmv2_resp) - 8, &context); | 573 | memcpy(server->session_key.data.ntlmv2.key, |
574 | sec_key, CIFS_NTLMV2_SESSKEY_SIZE); | ||
575 | |||
576 | crypto_free_blkcipher(tfm_arc4); | ||
577 | |||
578 | return 0; | ||
579 | } | ||
424 | 580 | ||
425 | hmac_md5_final(v2_session_response, &context); | 581 | void |
426 | /* cifs_dump_mem("v2_sess_rsp: ", v2_session_response, 32); */ | 582 | cifs_crypto_shash_release(struct TCP_Server_Info *server) |
583 | { | ||
584 | if (server->ntlmssp.md5) | ||
585 | crypto_free_shash(server->ntlmssp.md5); | ||
586 | |||
587 | if (server->ntlmssp.hmacmd5) | ||
588 | crypto_free_shash(server->ntlmssp.hmacmd5); | ||
589 | |||
590 | kfree(server->ntlmssp.sdeschmacmd5); | ||
591 | |||
592 | kfree(server->ntlmssp.sdescmd5); | ||
593 | } | ||
594 | |||
595 | int | ||
596 | cifs_crypto_shash_allocate(struct TCP_Server_Info *server) | ||
597 | { | ||
598 | int rc; | ||
599 | unsigned int size; | ||
600 | |||
601 | server->ntlmssp.hmacmd5 = crypto_alloc_shash("hmac(md5)", 0, 0); | ||
602 | if (!server->ntlmssp.hmacmd5 || | ||
603 | IS_ERR(server->ntlmssp.hmacmd5)) { | ||
604 | cERROR(1, "could not allocate crypto hmacmd5\n"); | ||
605 | return 1; | ||
606 | } | ||
607 | |||
608 | server->ntlmssp.md5 = crypto_alloc_shash("md5", 0, 0); | ||
609 | if (!server->ntlmssp.md5 || IS_ERR(server->ntlmssp.md5)) { | ||
610 | cERROR(1, "could not allocate crypto md5\n"); | ||
611 | rc = 1; | ||
612 | goto cifs_crypto_shash_allocate_ret1; | ||
613 | } | ||
614 | |||
615 | size = sizeof(struct shash_desc) + | ||
616 | crypto_shash_descsize(server->ntlmssp.hmacmd5); | ||
617 | server->ntlmssp.sdeschmacmd5 = kmalloc(size, GFP_KERNEL); | ||
618 | if (!server->ntlmssp.sdeschmacmd5) { | ||
619 | cERROR(1, "cifs_crypto_shash_allocate: can't alloc hmacmd5\n"); | ||
620 | rc = -ENOMEM; | ||
621 | goto cifs_crypto_shash_allocate_ret2; | ||
622 | } | ||
623 | server->ntlmssp.sdeschmacmd5->shash.tfm = server->ntlmssp.hmacmd5; | ||
624 | server->ntlmssp.sdeschmacmd5->shash.flags = 0x0; | ||
625 | |||
626 | |||
627 | size = sizeof(struct shash_desc) + | ||
628 | crypto_shash_descsize(server->ntlmssp.md5); | ||
629 | server->ntlmssp.sdescmd5 = kmalloc(size, GFP_KERNEL); | ||
630 | if (!server->ntlmssp.sdescmd5) { | ||
631 | cERROR(1, "cifs_crypto_shash_allocate: can't alloc md5\n"); | ||
632 | rc = -ENOMEM; | ||
633 | goto cifs_crypto_shash_allocate_ret3; | ||
634 | } | ||
635 | server->ntlmssp.sdescmd5->shash.tfm = server->ntlmssp.md5; | ||
636 | server->ntlmssp.sdescmd5->shash.flags = 0x0; | ||
637 | |||
638 | return 0; | ||
639 | |||
640 | cifs_crypto_shash_allocate_ret3: | ||
641 | kfree(server->ntlmssp.sdeschmacmd5); | ||
642 | |||
643 | cifs_crypto_shash_allocate_ret2: | ||
644 | crypto_free_shash(server->ntlmssp.md5); | ||
645 | |||
646 | cifs_crypto_shash_allocate_ret1: | ||
647 | crypto_free_shash(server->ntlmssp.hmacmd5); | ||
648 | |||
649 | return rc; | ||
427 | } | 650 | } |
diff --git a/fs/cifs/cifsglob.h b/fs/cifs/cifsglob.h index 0cdfb8c32ac6..c9d0cfc086eb 100644 --- a/fs/cifs/cifsglob.h +++ b/fs/cifs/cifsglob.h | |||
@@ -25,6 +25,9 @@ | |||
25 | #include <linux/workqueue.h> | 25 | #include <linux/workqueue.h> |
26 | #include "cifs_fs_sb.h" | 26 | #include "cifs_fs_sb.h" |
27 | #include "cifsacl.h" | 27 | #include "cifsacl.h" |
28 | #include <crypto/internal/hash.h> | ||
29 | #include <linux/scatterlist.h> | ||
30 | |||
28 | /* | 31 | /* |
29 | * The sizes of various internal tables and strings | 32 | * The sizes of various internal tables and strings |
30 | */ | 33 | */ |
@@ -97,7 +100,7 @@ enum protocolEnum { | |||
97 | /* Netbios frames protocol not supported at this time */ | 100 | /* Netbios frames protocol not supported at this time */ |
98 | }; | 101 | }; |
99 | 102 | ||
100 | struct mac_key { | 103 | struct session_key { |
101 | unsigned int len; | 104 | unsigned int len; |
102 | union { | 105 | union { |
103 | char ntlm[CIFS_SESS_KEY_SIZE + 16]; | 106 | char ntlm[CIFS_SESS_KEY_SIZE + 16]; |
@@ -120,6 +123,21 @@ struct cifs_cred { | |||
120 | struct cifs_ace *aces; | 123 | struct cifs_ace *aces; |
121 | }; | 124 | }; |
122 | 125 | ||
126 | struct sdesc { | ||
127 | struct shash_desc shash; | ||
128 | char ctx[]; | ||
129 | }; | ||
130 | |||
131 | struct ntlmssp_auth { | ||
132 | __u32 client_flags; | ||
133 | __u32 server_flags; | ||
134 | unsigned char ciphertext[CIFS_CPHTXT_SIZE]; | ||
135 | struct crypto_shash *hmacmd5; | ||
136 | struct crypto_shash *md5; | ||
137 | struct sdesc *sdeschmacmd5; | ||
138 | struct sdesc *sdescmd5; | ||
139 | }; | ||
140 | |||
123 | /* | 141 | /* |
124 | ***************************************************************** | 142 | ***************************************************************** |
125 | * Except the CIFS PDUs themselves all the | 143 | * Except the CIFS PDUs themselves all the |
@@ -182,11 +200,14 @@ struct TCP_Server_Info { | |||
182 | /* 16th byte of RFC1001 workstation name is always null */ | 200 | /* 16th byte of RFC1001 workstation name is always null */ |
183 | char workstation_RFC1001_name[RFC1001_NAME_LEN_WITH_NULL]; | 201 | char workstation_RFC1001_name[RFC1001_NAME_LEN_WITH_NULL]; |
184 | __u32 sequence_number; /* needed for CIFS PDU signature */ | 202 | __u32 sequence_number; /* needed for CIFS PDU signature */ |
185 | struct mac_key mac_signing_key; | 203 | struct session_key session_key; |
186 | char ntlmv2_hash[16]; | 204 | char ntlmv2_hash[16]; |
187 | unsigned long lstrp; /* when we got last response from this server */ | 205 | unsigned long lstrp; /* when we got last response from this server */ |
188 | u16 dialect; /* dialect index that server chose */ | 206 | u16 dialect; /* dialect index that server chose */ |
189 | /* extended security flavors that server supports */ | 207 | /* extended security flavors that server supports */ |
208 | unsigned int tilen; /* length of the target info blob */ | ||
209 | unsigned char *tiblob; /* target info blob in challenge response */ | ||
210 | struct ntlmssp_auth ntlmssp; /* various keys, ciphers, flags */ | ||
190 | bool sec_kerberos; /* supports plain Kerberos */ | 211 | bool sec_kerberos; /* supports plain Kerberos */ |
191 | bool sec_mskerberos; /* supports legacy MS Kerberos */ | 212 | bool sec_mskerberos; /* supports legacy MS Kerberos */ |
192 | bool sec_kerberosu2u; /* supports U2U Kerberos */ | 213 | bool sec_kerberosu2u; /* supports U2U Kerberos */ |
diff --git a/fs/cifs/cifspdu.h b/fs/cifs/cifspdu.h index 14d036d8db11..320e0fd0ba7b 100644 --- a/fs/cifs/cifspdu.h +++ b/fs/cifs/cifspdu.h | |||
@@ -134,6 +134,12 @@ | |||
134 | * Size of the session key (crypto key encrypted with the password | 134 | * Size of the session key (crypto key encrypted with the password |
135 | */ | 135 | */ |
136 | #define CIFS_SESS_KEY_SIZE (24) | 136 | #define CIFS_SESS_KEY_SIZE (24) |
137 | #define CIFS_CLIENT_CHALLENGE_SIZE (8) | ||
138 | #define CIFS_SERVER_CHALLENGE_SIZE (8) | ||
139 | #define CIFS_HMAC_MD5_HASH_SIZE (16) | ||
140 | #define CIFS_CPHTXT_SIZE (16) | ||
141 | #define CIFS_NTLMV2_SESSKEY_SIZE (16) | ||
142 | #define CIFS_NTHASH_SIZE (16) | ||
137 | 143 | ||
138 | /* | 144 | /* |
139 | * Maximum user name length | 145 | * Maximum user name length |
@@ -663,7 +669,6 @@ struct ntlmv2_resp { | |||
663 | __le64 time; | 669 | __le64 time; |
664 | __u64 client_chal; /* random */ | 670 | __u64 client_chal; /* random */ |
665 | __u32 reserved2; | 671 | __u32 reserved2; |
666 | struct ntlmssp2_name names[2]; | ||
667 | /* array of name entries could follow ending in minimum 4 byte struct */ | 672 | /* array of name entries could follow ending in minimum 4 byte struct */ |
668 | } __attribute__((packed)); | 673 | } __attribute__((packed)); |
669 | 674 | ||
diff --git a/fs/cifs/cifsproto.h b/fs/cifs/cifsproto.h index 1f5450814087..1378d9133844 100644 --- a/fs/cifs/cifsproto.h +++ b/fs/cifs/cifsproto.h | |||
@@ -361,15 +361,15 @@ extern int cifs_sign_smb(struct smb_hdr *, struct TCP_Server_Info *, __u32 *); | |||
361 | extern int cifs_sign_smb2(struct kvec *iov, int n_vec, struct TCP_Server_Info *, | 361 | extern int cifs_sign_smb2(struct kvec *iov, int n_vec, struct TCP_Server_Info *, |
362 | __u32 *); | 362 | __u32 *); |
363 | extern int cifs_verify_signature(struct smb_hdr *, | 363 | extern int cifs_verify_signature(struct smb_hdr *, |
364 | const struct mac_key *mac_key, | 364 | struct TCP_Server_Info *server, |
365 | __u32 expected_sequence_number); | 365 | __u32 expected_sequence_number); |
366 | extern int cifs_calculate_mac_key(struct mac_key *key, const char *rn, | 366 | extern int cifs_calculate_session_key(struct session_key *key, const char *rn, |
367 | const char *pass); | 367 | const char *pass); |
368 | extern int CalcNTLMv2_partial_mac_key(struct cifsSesInfo *, | 368 | extern int setup_ntlmv2_rsp(struct cifsSesInfo *, char *, |
369 | const struct nls_table *); | ||
370 | extern void CalcNTLMv2_response(const struct cifsSesInfo *, char *); | ||
371 | extern void setup_ntlmv2_rsp(struct cifsSesInfo *, char *, | ||
372 | const struct nls_table *); | 369 | const struct nls_table *); |
370 | extern int cifs_crypto_shash_allocate(struct TCP_Server_Info *); | ||
371 | extern void cifs_crypto_shash_release(struct TCP_Server_Info *); | ||
372 | extern int calc_seckey(struct TCP_Server_Info *); | ||
373 | #ifdef CONFIG_CIFS_WEAK_PW_HASH | 373 | #ifdef CONFIG_CIFS_WEAK_PW_HASH |
374 | extern void calc_lanman_hash(const char *password, const char *cryptkey, | 374 | extern void calc_lanman_hash(const char *password, const char *cryptkey, |
375 | bool encrypt, char *lnm_session_key); | 375 | bool encrypt, char *lnm_session_key); |
diff --git a/fs/cifs/cifssmb.c b/fs/cifs/cifssmb.c index c65c3419dd37..4bda920d1f75 100644 --- a/fs/cifs/cifssmb.c +++ b/fs/cifs/cifssmb.c | |||
@@ -604,11 +604,14 @@ CIFSSMBNegotiate(unsigned int xid, struct cifsSesInfo *ses) | |||
604 | else | 604 | else |
605 | rc = -EINVAL; | 605 | rc = -EINVAL; |
606 | 606 | ||
607 | if (server->sec_kerberos || server->sec_mskerberos) | 607 | if (server->secType == Kerberos) { |
608 | server->secType = Kerberos; | 608 | if (!server->sec_kerberos && |
609 | else if (server->sec_ntlmssp) | 609 | !server->sec_mskerberos) |
610 | server->secType = RawNTLMSSP; | 610 | rc = -EOPNOTSUPP; |
611 | else | 611 | } else if (server->secType == RawNTLMSSP) { |
612 | if (!server->sec_ntlmssp) | ||
613 | rc = -EOPNOTSUPP; | ||
614 | } else | ||
612 | rc = -EOPNOTSUPP; | 615 | rc = -EOPNOTSUPP; |
613 | } | 616 | } |
614 | } else | 617 | } else |
diff --git a/fs/cifs/connect.c b/fs/cifs/connect.c index 95c2ea67edfb..ec0ea4a43bdb 100644 --- a/fs/cifs/connect.c +++ b/fs/cifs/connect.c | |||
@@ -1673,7 +1673,9 @@ cifs_find_smb_ses(struct TCP_Server_Info *server, struct smb_vol *vol) | |||
1673 | MAX_USERNAME_SIZE)) | 1673 | MAX_USERNAME_SIZE)) |
1674 | continue; | 1674 | continue; |
1675 | if (strlen(vol->username) != 0 && | 1675 | if (strlen(vol->username) != 0 && |
1676 | strncmp(ses->password, vol->password, | 1676 | ses->password != NULL && |
1677 | strncmp(ses->password, | ||
1678 | vol->password ? vol->password : "", | ||
1677 | MAX_PASSWORD_SIZE)) | 1679 | MAX_PASSWORD_SIZE)) |
1678 | continue; | 1680 | continue; |
1679 | } | 1681 | } |
@@ -1706,6 +1708,7 @@ cifs_put_smb_ses(struct cifsSesInfo *ses) | |||
1706 | CIFSSMBLogoff(xid, ses); | 1708 | CIFSSMBLogoff(xid, ses); |
1707 | _FreeXid(xid); | 1709 | _FreeXid(xid); |
1708 | } | 1710 | } |
1711 | cifs_crypto_shash_release(server); | ||
1709 | sesInfoFree(ses); | 1712 | sesInfoFree(ses); |
1710 | cifs_put_tcp_session(server); | 1713 | cifs_put_tcp_session(server); |
1711 | } | 1714 | } |
@@ -1785,13 +1788,23 @@ cifs_get_smb_ses(struct TCP_Server_Info *server, struct smb_vol *volume_info) | |||
1785 | ses->linux_uid = volume_info->linux_uid; | 1788 | ses->linux_uid = volume_info->linux_uid; |
1786 | ses->overrideSecFlg = volume_info->secFlg; | 1789 | ses->overrideSecFlg = volume_info->secFlg; |
1787 | 1790 | ||
1791 | rc = cifs_crypto_shash_allocate(server); | ||
1792 | if (rc) { | ||
1793 | cERROR(1, "could not setup hash structures rc %d", rc); | ||
1794 | goto get_ses_fail; | ||
1795 | } | ||
1796 | server->tilen = 0; | ||
1797 | server->tiblob = NULL; | ||
1798 | |||
1788 | mutex_lock(&ses->session_mutex); | 1799 | mutex_lock(&ses->session_mutex); |
1789 | rc = cifs_negotiate_protocol(xid, ses); | 1800 | rc = cifs_negotiate_protocol(xid, ses); |
1790 | if (!rc) | 1801 | if (!rc) |
1791 | rc = cifs_setup_session(xid, ses, volume_info->local_nls); | 1802 | rc = cifs_setup_session(xid, ses, volume_info->local_nls); |
1792 | mutex_unlock(&ses->session_mutex); | 1803 | mutex_unlock(&ses->session_mutex); |
1793 | if (rc) | 1804 | if (rc) { |
1805 | cifs_crypto_shash_release(ses->server); | ||
1794 | goto get_ses_fail; | 1806 | goto get_ses_fail; |
1807 | } | ||
1795 | 1808 | ||
1796 | /* success, put it on the list */ | 1809 | /* success, put it on the list */ |
1797 | write_lock(&cifs_tcp_ses_lock); | 1810 | write_lock(&cifs_tcp_ses_lock); |
diff --git a/fs/cifs/dir.c b/fs/cifs/dir.c index 578d88c5b46e..f9ed0751cc12 100644 --- a/fs/cifs/dir.c +++ b/fs/cifs/dir.c | |||
@@ -305,8 +305,7 @@ cifs_create(struct inode *inode, struct dentry *direntry, int mode, | |||
305 | full_path = build_path_from_dentry(direntry); | 305 | full_path = build_path_from_dentry(direntry); |
306 | if (full_path == NULL) { | 306 | if (full_path == NULL) { |
307 | rc = -ENOMEM; | 307 | rc = -ENOMEM; |
308 | FreeXid(xid); | 308 | goto cifs_create_out; |
309 | return rc; | ||
310 | } | 309 | } |
311 | 310 | ||
312 | if (oplockEnabled) | 311 | if (oplockEnabled) |
@@ -365,9 +364,8 @@ cifs_create(struct inode *inode, struct dentry *direntry, int mode, | |||
365 | 364 | ||
366 | buf = kmalloc(sizeof(FILE_ALL_INFO), GFP_KERNEL); | 365 | buf = kmalloc(sizeof(FILE_ALL_INFO), GFP_KERNEL); |
367 | if (buf == NULL) { | 366 | if (buf == NULL) { |
368 | kfree(full_path); | 367 | rc = -ENOMEM; |
369 | FreeXid(xid); | 368 | goto cifs_create_out; |
370 | return -ENOMEM; | ||
371 | } | 369 | } |
372 | 370 | ||
373 | /* | 371 | /* |
@@ -496,6 +494,11 @@ int cifs_mknod(struct inode *inode, struct dentry *direntry, int mode, | |||
496 | struct cifsTconInfo *pTcon; | 494 | struct cifsTconInfo *pTcon; |
497 | char *full_path = NULL; | 495 | char *full_path = NULL; |
498 | struct inode *newinode = NULL; | 496 | struct inode *newinode = NULL; |
497 | int oplock = 0; | ||
498 | u16 fileHandle; | ||
499 | FILE_ALL_INFO *buf = NULL; | ||
500 | unsigned int bytes_written; | ||
501 | struct win_dev *pdev; | ||
499 | 502 | ||
500 | if (!old_valid_dev(device_number)) | 503 | if (!old_valid_dev(device_number)) |
501 | return -EINVAL; | 504 | return -EINVAL; |
@@ -506,9 +509,12 @@ int cifs_mknod(struct inode *inode, struct dentry *direntry, int mode, | |||
506 | pTcon = cifs_sb->tcon; | 509 | pTcon = cifs_sb->tcon; |
507 | 510 | ||
508 | full_path = build_path_from_dentry(direntry); | 511 | full_path = build_path_from_dentry(direntry); |
509 | if (full_path == NULL) | 512 | if (full_path == NULL) { |
510 | rc = -ENOMEM; | 513 | rc = -ENOMEM; |
511 | else if (pTcon->unix_ext) { | 514 | goto mknod_out; |
515 | } | ||
516 | |||
517 | if (pTcon->unix_ext) { | ||
512 | struct cifs_unix_set_info_args args = { | 518 | struct cifs_unix_set_info_args args = { |
513 | .mode = mode & ~current_umask(), | 519 | .mode = mode & ~current_umask(), |
514 | .ctime = NO_CHANGE_64, | 520 | .ctime = NO_CHANGE_64, |
@@ -527,87 +533,78 @@ int cifs_mknod(struct inode *inode, struct dentry *direntry, int mode, | |||
527 | cifs_sb->local_nls, | 533 | cifs_sb->local_nls, |
528 | cifs_sb->mnt_cifs_flags & | 534 | cifs_sb->mnt_cifs_flags & |
529 | CIFS_MOUNT_MAP_SPECIAL_CHR); | 535 | CIFS_MOUNT_MAP_SPECIAL_CHR); |
536 | if (rc) | ||
537 | goto mknod_out; | ||
530 | 538 | ||
531 | if (!rc) { | 539 | rc = cifs_get_inode_info_unix(&newinode, full_path, |
532 | rc = cifs_get_inode_info_unix(&newinode, full_path, | ||
533 | inode->i_sb, xid); | 540 | inode->i_sb, xid); |
534 | if (pTcon->nocase) | 541 | if (pTcon->nocase) |
535 | direntry->d_op = &cifs_ci_dentry_ops; | 542 | direntry->d_op = &cifs_ci_dentry_ops; |
536 | else | 543 | else |
537 | direntry->d_op = &cifs_dentry_ops; | 544 | direntry->d_op = &cifs_dentry_ops; |
538 | if (rc == 0) | ||
539 | d_instantiate(direntry, newinode); | ||
540 | } | ||
541 | } else { | ||
542 | if (cifs_sb->mnt_cifs_flags & CIFS_MOUNT_UNX_EMUL) { | ||
543 | int oplock = 0; | ||
544 | u16 fileHandle; | ||
545 | FILE_ALL_INFO *buf; | ||
546 | 545 | ||
547 | cFYI(1, "sfu compat create special file"); | 546 | if (rc == 0) |
547 | d_instantiate(direntry, newinode); | ||
548 | goto mknod_out; | ||
549 | } | ||
548 | 550 | ||
549 | buf = kmalloc(sizeof(FILE_ALL_INFO), GFP_KERNEL); | 551 | if (!(cifs_sb->mnt_cifs_flags & CIFS_MOUNT_UNX_EMUL)) |
550 | if (buf == NULL) { | 552 | goto mknod_out; |
551 | kfree(full_path); | ||
552 | rc = -ENOMEM; | ||
553 | FreeXid(xid); | ||
554 | return rc; | ||
555 | } | ||
556 | 553 | ||
557 | rc = CIFSSMBOpen(xid, pTcon, full_path, | 554 | |
558 | FILE_CREATE, /* fail if exists */ | 555 | cFYI(1, "sfu compat create special file"); |
559 | GENERIC_WRITE /* BB would | 556 | |
560 | WRITE_OWNER | WRITE_DAC be better? */, | 557 | buf = kmalloc(sizeof(FILE_ALL_INFO), GFP_KERNEL); |
561 | /* Create a file and set the | 558 | if (buf == NULL) { |
562 | file attribute to SYSTEM */ | 559 | kfree(full_path); |
563 | CREATE_NOT_DIR | CREATE_OPTION_SPECIAL, | 560 | rc = -ENOMEM; |
564 | &fileHandle, &oplock, buf, | 561 | FreeXid(xid); |
565 | cifs_sb->local_nls, | 562 | return rc; |
566 | cifs_sb->mnt_cifs_flags & | ||
567 | CIFS_MOUNT_MAP_SPECIAL_CHR); | ||
568 | |||
569 | /* BB FIXME - add handling for backlevel servers | ||
570 | which need legacy open and check for all | ||
571 | calls to SMBOpen for fallback to SMBLeagcyOpen */ | ||
572 | if (!rc) { | ||
573 | /* BB Do not bother to decode buf since no | ||
574 | local inode yet to put timestamps in, | ||
575 | but we can reuse it safely */ | ||
576 | unsigned int bytes_written; | ||
577 | struct win_dev *pdev; | ||
578 | pdev = (struct win_dev *)buf; | ||
579 | if (S_ISCHR(mode)) { | ||
580 | memcpy(pdev->type, "IntxCHR", 8); | ||
581 | pdev->major = | ||
582 | cpu_to_le64(MAJOR(device_number)); | ||
583 | pdev->minor = | ||
584 | cpu_to_le64(MINOR(device_number)); | ||
585 | rc = CIFSSMBWrite(xid, pTcon, | ||
586 | fileHandle, | ||
587 | sizeof(struct win_dev), | ||
588 | 0, &bytes_written, (char *)pdev, | ||
589 | NULL, 0); | ||
590 | } else if (S_ISBLK(mode)) { | ||
591 | memcpy(pdev->type, "IntxBLK", 8); | ||
592 | pdev->major = | ||
593 | cpu_to_le64(MAJOR(device_number)); | ||
594 | pdev->minor = | ||
595 | cpu_to_le64(MINOR(device_number)); | ||
596 | rc = CIFSSMBWrite(xid, pTcon, | ||
597 | fileHandle, | ||
598 | sizeof(struct win_dev), | ||
599 | 0, &bytes_written, (char *)pdev, | ||
600 | NULL, 0); | ||
601 | } /* else if(S_ISFIFO */ | ||
602 | CIFSSMBClose(xid, pTcon, fileHandle); | ||
603 | d_drop(direntry); | ||
604 | } | ||
605 | kfree(buf); | ||
606 | /* add code here to set EAs */ | ||
607 | } | ||
608 | } | 563 | } |
609 | 564 | ||
565 | /* FIXME: would WRITE_OWNER | WRITE_DAC be better? */ | ||
566 | rc = CIFSSMBOpen(xid, pTcon, full_path, FILE_CREATE, | ||
567 | GENERIC_WRITE, CREATE_NOT_DIR | CREATE_OPTION_SPECIAL, | ||
568 | &fileHandle, &oplock, buf, cifs_sb->local_nls, | ||
569 | cifs_sb->mnt_cifs_flags & CIFS_MOUNT_MAP_SPECIAL_CHR); | ||
570 | if (rc) | ||
571 | goto mknod_out; | ||
572 | |||
573 | /* BB Do not bother to decode buf since no local inode yet to put | ||
574 | * timestamps in, but we can reuse it safely */ | ||
575 | |||
576 | pdev = (struct win_dev *)buf; | ||
577 | if (S_ISCHR(mode)) { | ||
578 | memcpy(pdev->type, "IntxCHR", 8); | ||
579 | pdev->major = | ||
580 | cpu_to_le64(MAJOR(device_number)); | ||
581 | pdev->minor = | ||
582 | cpu_to_le64(MINOR(device_number)); | ||
583 | rc = CIFSSMBWrite(xid, pTcon, | ||
584 | fileHandle, | ||
585 | sizeof(struct win_dev), | ||
586 | 0, &bytes_written, (char *)pdev, | ||
587 | NULL, 0); | ||
588 | } else if (S_ISBLK(mode)) { | ||
589 | memcpy(pdev->type, "IntxBLK", 8); | ||
590 | pdev->major = | ||
591 | cpu_to_le64(MAJOR(device_number)); | ||
592 | pdev->minor = | ||
593 | cpu_to_le64(MINOR(device_number)); | ||
594 | rc = CIFSSMBWrite(xid, pTcon, | ||
595 | fileHandle, | ||
596 | sizeof(struct win_dev), | ||
597 | 0, &bytes_written, (char *)pdev, | ||
598 | NULL, 0); | ||
599 | } /* else if (S_ISFIFO) */ | ||
600 | CIFSSMBClose(xid, pTcon, fileHandle); | ||
601 | d_drop(direntry); | ||
602 | |||
603 | /* FIXME: add code here to set EAs */ | ||
604 | |||
605 | mknod_out: | ||
610 | kfree(full_path); | 606 | kfree(full_path); |
607 | kfree(buf); | ||
611 | FreeXid(xid); | 608 | FreeXid(xid); |
612 | return rc; | 609 | return rc; |
613 | } | 610 | } |
diff --git a/fs/cifs/file.c b/fs/cifs/file.c index db11fdef0e92..de748c652d11 100644 --- a/fs/cifs/file.c +++ b/fs/cifs/file.c | |||
@@ -242,8 +242,7 @@ int cifs_open(struct inode *inode, struct file *file) | |||
242 | full_path = build_path_from_dentry(file->f_path.dentry); | 242 | full_path = build_path_from_dentry(file->f_path.dentry); |
243 | if (full_path == NULL) { | 243 | if (full_path == NULL) { |
244 | rc = -ENOMEM; | 244 | rc = -ENOMEM; |
245 | FreeXid(xid); | 245 | goto out; |
246 | return rc; | ||
247 | } | 246 | } |
248 | 247 | ||
249 | cFYI(1, "inode = 0x%p file flags are 0x%x for %s", | 248 | cFYI(1, "inode = 0x%p file flags are 0x%x for %s", |
diff --git a/fs/cifs/inode.c b/fs/cifs/inode.c index 4bc47e5b5f29..86a164f08a74 100644 --- a/fs/cifs/inode.c +++ b/fs/cifs/inode.c | |||
@@ -834,7 +834,7 @@ struct inode *cifs_root_iget(struct super_block *sb, unsigned long ino) | |||
834 | xid, NULL); | 834 | xid, NULL); |
835 | 835 | ||
836 | if (!inode) | 836 | if (!inode) |
837 | return ERR_PTR(-ENOMEM); | 837 | return ERR_PTR(rc); |
838 | 838 | ||
839 | #ifdef CONFIG_CIFS_FSCACHE | 839 | #ifdef CONFIG_CIFS_FSCACHE |
840 | /* populate tcon->resource_id */ | 840 | /* populate tcon->resource_id */ |
diff --git a/fs/cifs/ntlmssp.h b/fs/cifs/ntlmssp.h index 49c9a4e75319..1db0f0746a5b 100644 --- a/fs/cifs/ntlmssp.h +++ b/fs/cifs/ntlmssp.h | |||
@@ -61,6 +61,19 @@ | |||
61 | #define NTLMSSP_NEGOTIATE_KEY_XCH 0x40000000 | 61 | #define NTLMSSP_NEGOTIATE_KEY_XCH 0x40000000 |
62 | #define NTLMSSP_NEGOTIATE_56 0x80000000 | 62 | #define NTLMSSP_NEGOTIATE_56 0x80000000 |
63 | 63 | ||
64 | /* Define AV Pair Field IDs */ | ||
65 | #define NTLMSSP_AV_EOL 0 | ||
66 | #define NTLMSSP_AV_NB_COMPUTER_NAME 1 | ||
67 | #define NTLMSSP_AV_NB_DOMAIN_NAME 2 | ||
68 | #define NTLMSSP_AV_DNS_COMPUTER_NAME 3 | ||
69 | #define NTLMSSP_AV_DNS_DOMAIN_NAME 4 | ||
70 | #define NTLMSSP_AV_DNS_TREE_NAME 5 | ||
71 | #define NTLMSSP_AV_FLAGS 6 | ||
72 | #define NTLMSSP_AV_TIMESTAMP 7 | ||
73 | #define NTLMSSP_AV_RESTRICTION 8 | ||
74 | #define NTLMSSP_AV_TARGET_NAME 9 | ||
75 | #define NTLMSSP_AV_CHANNEL_BINDINGS 10 | ||
76 | |||
64 | /* Although typedefs are not commonly used for structure definitions */ | 77 | /* Although typedefs are not commonly used for structure definitions */ |
65 | /* in the Linux kernel, in this particular case they are useful */ | 78 | /* in the Linux kernel, in this particular case they are useful */ |
66 | /* to more closely match the standards document for NTLMSSP from */ | 79 | /* to more closely match the standards document for NTLMSSP from */ |
diff --git a/fs/cifs/sess.c b/fs/cifs/sess.c index 0a57cb7db5dd..795095f4eac6 100644 --- a/fs/cifs/sess.c +++ b/fs/cifs/sess.c | |||
@@ -383,6 +383,9 @@ static int decode_ascii_ssetup(char **pbcc_area, int bleft, | |||
383 | static int decode_ntlmssp_challenge(char *bcc_ptr, int blob_len, | 383 | static int decode_ntlmssp_challenge(char *bcc_ptr, int blob_len, |
384 | struct cifsSesInfo *ses) | 384 | struct cifsSesInfo *ses) |
385 | { | 385 | { |
386 | unsigned int tioffset; /* challeng message target info area */ | ||
387 | unsigned int tilen; /* challeng message target info area length */ | ||
388 | |||
386 | CHALLENGE_MESSAGE *pblob = (CHALLENGE_MESSAGE *)bcc_ptr; | 389 | CHALLENGE_MESSAGE *pblob = (CHALLENGE_MESSAGE *)bcc_ptr; |
387 | 390 | ||
388 | if (blob_len < sizeof(CHALLENGE_MESSAGE)) { | 391 | if (blob_len < sizeof(CHALLENGE_MESSAGE)) { |
@@ -405,6 +408,20 @@ static int decode_ntlmssp_challenge(char *bcc_ptr, int blob_len, | |||
405 | /* BB spec says that if AvId field of MsvAvTimestamp is populated then | 408 | /* BB spec says that if AvId field of MsvAvTimestamp is populated then |
406 | we must set the MIC field of the AUTHENTICATE_MESSAGE */ | 409 | we must set the MIC field of the AUTHENTICATE_MESSAGE */ |
407 | 410 | ||
411 | ses->server->ntlmssp.server_flags = le32_to_cpu(pblob->NegotiateFlags); | ||
412 | |||
413 | tioffset = cpu_to_le16(pblob->TargetInfoArray.BufferOffset); | ||
414 | tilen = cpu_to_le16(pblob->TargetInfoArray.Length); | ||
415 | ses->server->tilen = tilen; | ||
416 | if (tilen) { | ||
417 | ses->server->tiblob = kmalloc(tilen, GFP_KERNEL); | ||
418 | if (!ses->server->tiblob) { | ||
419 | cERROR(1, "Challenge target info allocation failure"); | ||
420 | return -ENOMEM; | ||
421 | } | ||
422 | memcpy(ses->server->tiblob, bcc_ptr + tioffset, tilen); | ||
423 | } | ||
424 | |||
408 | return 0; | 425 | return 0; |
409 | } | 426 | } |
410 | 427 | ||
@@ -425,12 +442,13 @@ static void build_ntlmssp_negotiate_blob(unsigned char *pbuffer, | |||
425 | /* BB is NTLMV2 session security format easier to use here? */ | 442 | /* BB is NTLMV2 session security format easier to use here? */ |
426 | flags = NTLMSSP_NEGOTIATE_56 | NTLMSSP_REQUEST_TARGET | | 443 | flags = NTLMSSP_NEGOTIATE_56 | NTLMSSP_REQUEST_TARGET | |
427 | NTLMSSP_NEGOTIATE_128 | NTLMSSP_NEGOTIATE_UNICODE | | 444 | NTLMSSP_NEGOTIATE_128 | NTLMSSP_NEGOTIATE_UNICODE | |
428 | NTLMSSP_NEGOTIATE_NT_ONLY | NTLMSSP_NEGOTIATE_NTLM; | 445 | NTLMSSP_NEGOTIATE_NTLM; |
429 | if (ses->server->secMode & | 446 | if (ses->server->secMode & |
430 | (SECMODE_SIGN_REQUIRED | SECMODE_SIGN_ENABLED)) | 447 | (SECMODE_SIGN_REQUIRED | SECMODE_SIGN_ENABLED)) { |
431 | flags |= NTLMSSP_NEGOTIATE_SIGN; | 448 | flags |= NTLMSSP_NEGOTIATE_SIGN | |
432 | if (ses->server->secMode & SECMODE_SIGN_REQUIRED) | 449 | NTLMSSP_NEGOTIATE_KEY_XCH | |
433 | flags |= NTLMSSP_NEGOTIATE_ALWAYS_SIGN; | 450 | NTLMSSP_NEGOTIATE_EXTENDED_SEC; |
451 | } | ||
434 | 452 | ||
435 | sec_blob->NegotiateFlags |= cpu_to_le32(flags); | 453 | sec_blob->NegotiateFlags |= cpu_to_le32(flags); |
436 | 454 | ||
@@ -451,10 +469,12 @@ static int build_ntlmssp_auth_blob(unsigned char *pbuffer, | |||
451 | struct cifsSesInfo *ses, | 469 | struct cifsSesInfo *ses, |
452 | const struct nls_table *nls_cp, bool first) | 470 | const struct nls_table *nls_cp, bool first) |
453 | { | 471 | { |
472 | int rc; | ||
473 | unsigned int size; | ||
454 | AUTHENTICATE_MESSAGE *sec_blob = (AUTHENTICATE_MESSAGE *)pbuffer; | 474 | AUTHENTICATE_MESSAGE *sec_blob = (AUTHENTICATE_MESSAGE *)pbuffer; |
455 | __u32 flags; | 475 | __u32 flags; |
456 | unsigned char *tmp; | 476 | unsigned char *tmp; |
457 | char ntlm_session_key[CIFS_SESS_KEY_SIZE]; | 477 | struct ntlmv2_resp ntlmv2_response = {}; |
458 | 478 | ||
459 | memcpy(sec_blob->Signature, NTLMSSP_SIGNATURE, 8); | 479 | memcpy(sec_blob->Signature, NTLMSSP_SIGNATURE, 8); |
460 | sec_blob->MessageType = NtLmAuthenticate; | 480 | sec_blob->MessageType = NtLmAuthenticate; |
@@ -477,19 +497,25 @@ static int build_ntlmssp_auth_blob(unsigned char *pbuffer, | |||
477 | sec_blob->LmChallengeResponse.Length = 0; | 497 | sec_blob->LmChallengeResponse.Length = 0; |
478 | sec_blob->LmChallengeResponse.MaximumLength = 0; | 498 | sec_blob->LmChallengeResponse.MaximumLength = 0; |
479 | 499 | ||
480 | /* calculate session key, BB what about adding similar ntlmv2 path? */ | ||
481 | SMBNTencrypt(ses->password, ses->server->cryptKey, ntlm_session_key); | ||
482 | if (first) | ||
483 | cifs_calculate_mac_key(&ses->server->mac_signing_key, | ||
484 | ntlm_session_key, ses->password); | ||
485 | |||
486 | memcpy(tmp, ntlm_session_key, CIFS_SESS_KEY_SIZE); | ||
487 | sec_blob->NtChallengeResponse.BufferOffset = cpu_to_le32(tmp - pbuffer); | 500 | sec_blob->NtChallengeResponse.BufferOffset = cpu_to_le32(tmp - pbuffer); |
488 | sec_blob->NtChallengeResponse.Length = cpu_to_le16(CIFS_SESS_KEY_SIZE); | 501 | rc = setup_ntlmv2_rsp(ses, (char *)&ntlmv2_response, nls_cp); |
489 | sec_blob->NtChallengeResponse.MaximumLength = | 502 | if (rc) { |
490 | cpu_to_le16(CIFS_SESS_KEY_SIZE); | 503 | cERROR(1, "error rc: %d during ntlmssp ntlmv2 setup", rc); |
504 | goto setup_ntlmv2_ret; | ||
505 | } | ||
506 | size = sizeof(struct ntlmv2_resp); | ||
507 | memcpy(tmp, (char *)&ntlmv2_response, size); | ||
508 | tmp += size; | ||
509 | if (ses->server->tilen > 0) { | ||
510 | memcpy(tmp, ses->server->tiblob, ses->server->tilen); | ||
511 | tmp += ses->server->tilen; | ||
512 | } else | ||
513 | ses->server->tilen = 0; | ||
491 | 514 | ||
492 | tmp += CIFS_SESS_KEY_SIZE; | 515 | sec_blob->NtChallengeResponse.Length = cpu_to_le16(size + |
516 | ses->server->tilen); | ||
517 | sec_blob->NtChallengeResponse.MaximumLength = | ||
518 | cpu_to_le16(size + ses->server->tilen); | ||
493 | 519 | ||
494 | if (ses->domainName == NULL) { | 520 | if (ses->domainName == NULL) { |
495 | sec_blob->DomainName.BufferOffset = cpu_to_le32(tmp - pbuffer); | 521 | sec_blob->DomainName.BufferOffset = cpu_to_le32(tmp - pbuffer); |
@@ -501,7 +527,6 @@ static int build_ntlmssp_auth_blob(unsigned char *pbuffer, | |||
501 | len = cifs_strtoUCS((__le16 *)tmp, ses->domainName, | 527 | len = cifs_strtoUCS((__le16 *)tmp, ses->domainName, |
502 | MAX_USERNAME_SIZE, nls_cp); | 528 | MAX_USERNAME_SIZE, nls_cp); |
503 | len *= 2; /* unicode is 2 bytes each */ | 529 | len *= 2; /* unicode is 2 bytes each */ |
504 | len += 2; /* trailing null */ | ||
505 | sec_blob->DomainName.BufferOffset = cpu_to_le32(tmp - pbuffer); | 530 | sec_blob->DomainName.BufferOffset = cpu_to_le32(tmp - pbuffer); |
506 | sec_blob->DomainName.Length = cpu_to_le16(len); | 531 | sec_blob->DomainName.Length = cpu_to_le16(len); |
507 | sec_blob->DomainName.MaximumLength = cpu_to_le16(len); | 532 | sec_blob->DomainName.MaximumLength = cpu_to_le16(len); |
@@ -518,7 +543,6 @@ static int build_ntlmssp_auth_blob(unsigned char *pbuffer, | |||
518 | len = cifs_strtoUCS((__le16 *)tmp, ses->userName, | 543 | len = cifs_strtoUCS((__le16 *)tmp, ses->userName, |
519 | MAX_USERNAME_SIZE, nls_cp); | 544 | MAX_USERNAME_SIZE, nls_cp); |
520 | len *= 2; /* unicode is 2 bytes each */ | 545 | len *= 2; /* unicode is 2 bytes each */ |
521 | len += 2; /* trailing null */ | ||
522 | sec_blob->UserName.BufferOffset = cpu_to_le32(tmp - pbuffer); | 546 | sec_blob->UserName.BufferOffset = cpu_to_le32(tmp - pbuffer); |
523 | sec_blob->UserName.Length = cpu_to_le16(len); | 547 | sec_blob->UserName.Length = cpu_to_le16(len); |
524 | sec_blob->UserName.MaximumLength = cpu_to_le16(len); | 548 | sec_blob->UserName.MaximumLength = cpu_to_le16(len); |
@@ -530,9 +554,26 @@ static int build_ntlmssp_auth_blob(unsigned char *pbuffer, | |||
530 | sec_blob->WorkstationName.MaximumLength = 0; | 554 | sec_blob->WorkstationName.MaximumLength = 0; |
531 | tmp += 2; | 555 | tmp += 2; |
532 | 556 | ||
533 | sec_blob->SessionKey.BufferOffset = cpu_to_le32(tmp - pbuffer); | 557 | if ((ses->server->ntlmssp.server_flags & NTLMSSP_NEGOTIATE_KEY_XCH) && |
534 | sec_blob->SessionKey.Length = 0; | 558 | !calc_seckey(ses->server)) { |
535 | sec_blob->SessionKey.MaximumLength = 0; | 559 | memcpy(tmp, ses->server->ntlmssp.ciphertext, CIFS_CPHTXT_SIZE); |
560 | sec_blob->SessionKey.BufferOffset = cpu_to_le32(tmp - pbuffer); | ||
561 | sec_blob->SessionKey.Length = cpu_to_le16(CIFS_CPHTXT_SIZE); | ||
562 | sec_blob->SessionKey.MaximumLength = | ||
563 | cpu_to_le16(CIFS_CPHTXT_SIZE); | ||
564 | tmp += CIFS_CPHTXT_SIZE; | ||
565 | } else { | ||
566 | sec_blob->SessionKey.BufferOffset = cpu_to_le32(tmp - pbuffer); | ||
567 | sec_blob->SessionKey.Length = 0; | ||
568 | sec_blob->SessionKey.MaximumLength = 0; | ||
569 | } | ||
570 | |||
571 | ses->server->sequence_number = 0; | ||
572 | |||
573 | setup_ntlmv2_ret: | ||
574 | if (ses->server->tilen > 0) | ||
575 | kfree(ses->server->tiblob); | ||
576 | |||
536 | return tmp - pbuffer; | 577 | return tmp - pbuffer; |
537 | } | 578 | } |
538 | 579 | ||
@@ -546,15 +587,14 @@ static void setup_ntlmssp_neg_req(SESSION_SETUP_ANDX *pSMB, | |||
546 | return; | 587 | return; |
547 | } | 588 | } |
548 | 589 | ||
549 | static int setup_ntlmssp_auth_req(SESSION_SETUP_ANDX *pSMB, | 590 | static int setup_ntlmssp_auth_req(char *ntlmsspblob, |
550 | struct cifsSesInfo *ses, | 591 | struct cifsSesInfo *ses, |
551 | const struct nls_table *nls, bool first_time) | 592 | const struct nls_table *nls, bool first_time) |
552 | { | 593 | { |
553 | int bloblen; | 594 | int bloblen; |
554 | 595 | ||
555 | bloblen = build_ntlmssp_auth_blob(&pSMB->req.SecurityBlob[0], ses, nls, | 596 | bloblen = build_ntlmssp_auth_blob(ntlmsspblob, ses, nls, |
556 | first_time); | 597 | first_time); |
557 | pSMB->req.SecurityBlobLength = cpu_to_le16(bloblen); | ||
558 | 598 | ||
559 | return bloblen; | 599 | return bloblen; |
560 | } | 600 | } |
@@ -690,7 +730,7 @@ ssetup_ntlmssp_authenticate: | |||
690 | 730 | ||
691 | if (first_time) /* should this be moved into common code | 731 | if (first_time) /* should this be moved into common code |
692 | with similar ntlmv2 path? */ | 732 | with similar ntlmv2 path? */ |
693 | cifs_calculate_mac_key(&ses->server->mac_signing_key, | 733 | cifs_calculate_session_key(&ses->server->session_key, |
694 | ntlm_session_key, ses->password); | 734 | ntlm_session_key, ses->password); |
695 | /* copy session key */ | 735 | /* copy session key */ |
696 | 736 | ||
@@ -729,12 +769,21 @@ ssetup_ntlmssp_authenticate: | |||
729 | cpu_to_le16(sizeof(struct ntlmv2_resp)); | 769 | cpu_to_le16(sizeof(struct ntlmv2_resp)); |
730 | 770 | ||
731 | /* calculate session key */ | 771 | /* calculate session key */ |
732 | setup_ntlmv2_rsp(ses, v2_sess_key, nls_cp); | 772 | rc = setup_ntlmv2_rsp(ses, v2_sess_key, nls_cp); |
773 | if (rc) { | ||
774 | kfree(v2_sess_key); | ||
775 | goto ssetup_exit; | ||
776 | } | ||
733 | /* FIXME: calculate MAC key */ | 777 | /* FIXME: calculate MAC key */ |
734 | memcpy(bcc_ptr, (char *)v2_sess_key, | 778 | memcpy(bcc_ptr, (char *)v2_sess_key, |
735 | sizeof(struct ntlmv2_resp)); | 779 | sizeof(struct ntlmv2_resp)); |
736 | bcc_ptr += sizeof(struct ntlmv2_resp); | 780 | bcc_ptr += sizeof(struct ntlmv2_resp); |
737 | kfree(v2_sess_key); | 781 | kfree(v2_sess_key); |
782 | if (ses->server->tilen > 0) { | ||
783 | memcpy(bcc_ptr, ses->server->tiblob, | ||
784 | ses->server->tilen); | ||
785 | bcc_ptr += ses->server->tilen; | ||
786 | } | ||
738 | if (ses->capabilities & CAP_UNICODE) { | 787 | if (ses->capabilities & CAP_UNICODE) { |
739 | if (iov[0].iov_len % 2) { | 788 | if (iov[0].iov_len % 2) { |
740 | *bcc_ptr = 0; | 789 | *bcc_ptr = 0; |
@@ -765,15 +814,15 @@ ssetup_ntlmssp_authenticate: | |||
765 | } | 814 | } |
766 | /* bail out if key is too long */ | 815 | /* bail out if key is too long */ |
767 | if (msg->sesskey_len > | 816 | if (msg->sesskey_len > |
768 | sizeof(ses->server->mac_signing_key.data.krb5)) { | 817 | sizeof(ses->server->session_key.data.krb5)) { |
769 | cERROR(1, "Kerberos signing key too long (%u bytes)", | 818 | cERROR(1, "Kerberos signing key too long (%u bytes)", |
770 | msg->sesskey_len); | 819 | msg->sesskey_len); |
771 | rc = -EOVERFLOW; | 820 | rc = -EOVERFLOW; |
772 | goto ssetup_exit; | 821 | goto ssetup_exit; |
773 | } | 822 | } |
774 | if (first_time) { | 823 | if (first_time) { |
775 | ses->server->mac_signing_key.len = msg->sesskey_len; | 824 | ses->server->session_key.len = msg->sesskey_len; |
776 | memcpy(ses->server->mac_signing_key.data.krb5, | 825 | memcpy(ses->server->session_key.data.krb5, |
777 | msg->data, msg->sesskey_len); | 826 | msg->data, msg->sesskey_len); |
778 | } | 827 | } |
779 | pSMB->req.hdr.Flags2 |= SMBFLG2_EXT_SEC; | 828 | pSMB->req.hdr.Flags2 |= SMBFLG2_EXT_SEC; |
@@ -815,12 +864,28 @@ ssetup_ntlmssp_authenticate: | |||
815 | if (phase == NtLmNegotiate) { | 864 | if (phase == NtLmNegotiate) { |
816 | setup_ntlmssp_neg_req(pSMB, ses); | 865 | setup_ntlmssp_neg_req(pSMB, ses); |
817 | iov[1].iov_len = sizeof(NEGOTIATE_MESSAGE); | 866 | iov[1].iov_len = sizeof(NEGOTIATE_MESSAGE); |
867 | iov[1].iov_base = &pSMB->req.SecurityBlob[0]; | ||
818 | } else if (phase == NtLmAuthenticate) { | 868 | } else if (phase == NtLmAuthenticate) { |
819 | int blob_len; | 869 | int blob_len; |
820 | blob_len = setup_ntlmssp_auth_req(pSMB, ses, | 870 | char *ntlmsspblob; |
821 | nls_cp, | 871 | |
822 | first_time); | 872 | ntlmsspblob = kmalloc(5 * |
873 | sizeof(struct _AUTHENTICATE_MESSAGE), | ||
874 | GFP_KERNEL); | ||
875 | if (!ntlmsspblob) { | ||
876 | cERROR(1, "Can't allocate NTLMSSP"); | ||
877 | rc = -ENOMEM; | ||
878 | goto ssetup_exit; | ||
879 | } | ||
880 | |||
881 | blob_len = setup_ntlmssp_auth_req(ntlmsspblob, | ||
882 | ses, | ||
883 | nls_cp, | ||
884 | first_time); | ||
823 | iov[1].iov_len = blob_len; | 885 | iov[1].iov_len = blob_len; |
886 | iov[1].iov_base = ntlmsspblob; | ||
887 | pSMB->req.SecurityBlobLength = | ||
888 | cpu_to_le16(blob_len); | ||
824 | /* Make sure that we tell the server that we | 889 | /* Make sure that we tell the server that we |
825 | are using the uid that it just gave us back | 890 | are using the uid that it just gave us back |
826 | on the response (challenge) */ | 891 | on the response (challenge) */ |
@@ -830,7 +895,6 @@ ssetup_ntlmssp_authenticate: | |||
830 | rc = -ENOSYS; | 895 | rc = -ENOSYS; |
831 | goto ssetup_exit; | 896 | goto ssetup_exit; |
832 | } | 897 | } |
833 | iov[1].iov_base = &pSMB->req.SecurityBlob[0]; | ||
834 | /* unicode strings must be word aligned */ | 898 | /* unicode strings must be word aligned */ |
835 | if ((iov[0].iov_len + iov[1].iov_len) % 2) { | 899 | if ((iov[0].iov_len + iov[1].iov_len) % 2) { |
836 | *bcc_ptr = 0; | 900 | *bcc_ptr = 0; |
diff --git a/fs/cifs/transport.c b/fs/cifs/transport.c index 82f78c4d6978..e0588cdf4cc5 100644 --- a/fs/cifs/transport.c +++ b/fs/cifs/transport.c | |||
@@ -543,7 +543,7 @@ SendReceive2(const unsigned int xid, struct cifsSesInfo *ses, | |||
543 | (ses->server->secMode & (SECMODE_SIGN_REQUIRED | | 543 | (ses->server->secMode & (SECMODE_SIGN_REQUIRED | |
544 | SECMODE_SIGN_ENABLED))) { | 544 | SECMODE_SIGN_ENABLED))) { |
545 | rc = cifs_verify_signature(midQ->resp_buf, | 545 | rc = cifs_verify_signature(midQ->resp_buf, |
546 | &ses->server->mac_signing_key, | 546 | ses->server, |
547 | midQ->sequence_number+1); | 547 | midQ->sequence_number+1); |
548 | if (rc) { | 548 | if (rc) { |
549 | cERROR(1, "Unexpected SMB signature"); | 549 | cERROR(1, "Unexpected SMB signature"); |
@@ -731,7 +731,7 @@ SendReceive(const unsigned int xid, struct cifsSesInfo *ses, | |||
731 | (ses->server->secMode & (SECMODE_SIGN_REQUIRED | | 731 | (ses->server->secMode & (SECMODE_SIGN_REQUIRED | |
732 | SECMODE_SIGN_ENABLED))) { | 732 | SECMODE_SIGN_ENABLED))) { |
733 | rc = cifs_verify_signature(out_buf, | 733 | rc = cifs_verify_signature(out_buf, |
734 | &ses->server->mac_signing_key, | 734 | ses->server, |
735 | midQ->sequence_number+1); | 735 | midQ->sequence_number+1); |
736 | if (rc) { | 736 | if (rc) { |
737 | cERROR(1, "Unexpected SMB signature"); | 737 | cERROR(1, "Unexpected SMB signature"); |
@@ -981,7 +981,7 @@ SendReceiveBlockingLock(const unsigned int xid, struct cifsTconInfo *tcon, | |||
981 | (ses->server->secMode & (SECMODE_SIGN_REQUIRED | | 981 | (ses->server->secMode & (SECMODE_SIGN_REQUIRED | |
982 | SECMODE_SIGN_ENABLED))) { | 982 | SECMODE_SIGN_ENABLED))) { |
983 | rc = cifs_verify_signature(out_buf, | 983 | rc = cifs_verify_signature(out_buf, |
984 | &ses->server->mac_signing_key, | 984 | ses->server, |
985 | midQ->sequence_number+1); | 985 | midQ->sequence_number+1); |
986 | if (rc) { | 986 | if (rc) { |
987 | cERROR(1, "Unexpected SMB signature"); | 987 | cERROR(1, "Unexpected SMB signature"); |
diff --git a/fs/compat.c b/fs/compat.c index e6d5d70cf3cf..718c7062aec1 100644 --- a/fs/compat.c +++ b/fs/compat.c | |||
@@ -77,7 +77,8 @@ int compat_printk(const char *fmt, ...) | |||
77 | * Not all architectures have sys_utime, so implement this in terms | 77 | * Not all architectures have sys_utime, so implement this in terms |
78 | * of sys_utimes. | 78 | * of sys_utimes. |
79 | */ | 79 | */ |
80 | asmlinkage long compat_sys_utime(char __user *filename, struct compat_utimbuf __user *t) | 80 | asmlinkage long compat_sys_utime(const char __user *filename, |
81 | struct compat_utimbuf __user *t) | ||
81 | { | 82 | { |
82 | struct timespec tv[2]; | 83 | struct timespec tv[2]; |
83 | 84 | ||
@@ -91,7 +92,7 @@ asmlinkage long compat_sys_utime(char __user *filename, struct compat_utimbuf __ | |||
91 | return do_utimes(AT_FDCWD, filename, t ? tv : NULL, 0); | 92 | return do_utimes(AT_FDCWD, filename, t ? tv : NULL, 0); |
92 | } | 93 | } |
93 | 94 | ||
94 | asmlinkage long compat_sys_utimensat(unsigned int dfd, char __user *filename, struct compat_timespec __user *t, int flags) | 95 | asmlinkage long compat_sys_utimensat(unsigned int dfd, const char __user *filename, struct compat_timespec __user *t, int flags) |
95 | { | 96 | { |
96 | struct timespec tv[2]; | 97 | struct timespec tv[2]; |
97 | 98 | ||
@@ -106,7 +107,7 @@ asmlinkage long compat_sys_utimensat(unsigned int dfd, char __user *filename, st | |||
106 | return do_utimes(dfd, filename, t ? tv : NULL, flags); | 107 | return do_utimes(dfd, filename, t ? tv : NULL, flags); |
107 | } | 108 | } |
108 | 109 | ||
109 | asmlinkage long compat_sys_futimesat(unsigned int dfd, char __user *filename, struct compat_timeval __user *t) | 110 | asmlinkage long compat_sys_futimesat(unsigned int dfd, const char __user *filename, struct compat_timeval __user *t) |
110 | { | 111 | { |
111 | struct timespec tv[2]; | 112 | struct timespec tv[2]; |
112 | 113 | ||
@@ -125,7 +126,7 @@ asmlinkage long compat_sys_futimesat(unsigned int dfd, char __user *filename, st | |||
125 | return do_utimes(dfd, filename, t ? tv : NULL, 0); | 126 | return do_utimes(dfd, filename, t ? tv : NULL, 0); |
126 | } | 127 | } |
127 | 128 | ||
128 | asmlinkage long compat_sys_utimes(char __user *filename, struct compat_timeval __user *t) | 129 | asmlinkage long compat_sys_utimes(const char __user *filename, struct compat_timeval __user *t) |
129 | { | 130 | { |
130 | return compat_sys_futimesat(AT_FDCWD, filename, t); | 131 | return compat_sys_futimesat(AT_FDCWD, filename, t); |
131 | } | 132 | } |
@@ -169,7 +170,7 @@ static int cp_compat_stat(struct kstat *stat, struct compat_stat __user *ubuf) | |||
169 | return err; | 170 | return err; |
170 | } | 171 | } |
171 | 172 | ||
172 | asmlinkage long compat_sys_newstat(char __user * filename, | 173 | asmlinkage long compat_sys_newstat(const char __user * filename, |
173 | struct compat_stat __user *statbuf) | 174 | struct compat_stat __user *statbuf) |
174 | { | 175 | { |
175 | struct kstat stat; | 176 | struct kstat stat; |
@@ -181,7 +182,7 @@ asmlinkage long compat_sys_newstat(char __user * filename, | |||
181 | return cp_compat_stat(&stat, statbuf); | 182 | return cp_compat_stat(&stat, statbuf); |
182 | } | 183 | } |
183 | 184 | ||
184 | asmlinkage long compat_sys_newlstat(char __user * filename, | 185 | asmlinkage long compat_sys_newlstat(const char __user * filename, |
185 | struct compat_stat __user *statbuf) | 186 | struct compat_stat __user *statbuf) |
186 | { | 187 | { |
187 | struct kstat stat; | 188 | struct kstat stat; |
@@ -194,7 +195,8 @@ asmlinkage long compat_sys_newlstat(char __user * filename, | |||
194 | } | 195 | } |
195 | 196 | ||
196 | #ifndef __ARCH_WANT_STAT64 | 197 | #ifndef __ARCH_WANT_STAT64 |
197 | asmlinkage long compat_sys_newfstatat(unsigned int dfd, char __user *filename, | 198 | asmlinkage long compat_sys_newfstatat(unsigned int dfd, |
199 | const char __user *filename, | ||
198 | struct compat_stat __user *statbuf, int flag) | 200 | struct compat_stat __user *statbuf, int flag) |
199 | { | 201 | { |
200 | struct kstat stat; | 202 | struct kstat stat; |
@@ -837,9 +839,10 @@ static int do_nfs4_super_data_conv(void *raw_data) | |||
837 | #define NCPFS_NAME "ncpfs" | 839 | #define NCPFS_NAME "ncpfs" |
838 | #define NFS4_NAME "nfs4" | 840 | #define NFS4_NAME "nfs4" |
839 | 841 | ||
840 | asmlinkage long compat_sys_mount(char __user * dev_name, char __user * dir_name, | 842 | asmlinkage long compat_sys_mount(const char __user * dev_name, |
841 | char __user * type, unsigned long flags, | 843 | const char __user * dir_name, |
842 | void __user * data) | 844 | const char __user * type, unsigned long flags, |
845 | const void __user * data) | ||
843 | { | 846 | { |
844 | char *kernel_type; | 847 | char *kernel_type; |
845 | unsigned long data_page; | 848 | unsigned long data_page; |
diff --git a/fs/compat_ioctl.c b/fs/compat_ioctl.c index 70227e0dc01d..03e59aa318eb 100644 --- a/fs/compat_ioctl.c +++ b/fs/compat_ioctl.c | |||
@@ -1699,8 +1699,7 @@ asmlinkage long compat_sys_ioctl(unsigned int fd, unsigned int cmd, | |||
1699 | goto out_fput; | 1699 | goto out_fput; |
1700 | } | 1700 | } |
1701 | 1701 | ||
1702 | if (!filp->f_op || | 1702 | if (!filp->f_op || !filp->f_op->unlocked_ioctl) |
1703 | (!filp->f_op->ioctl && !filp->f_op->unlocked_ioctl)) | ||
1704 | goto do_ioctl; | 1703 | goto do_ioctl; |
1705 | break; | 1704 | break; |
1706 | } | 1705 | } |
diff --git a/fs/cramfs/inode.c b/fs/cramfs/inode.c index a53b130b366c..1e7a33028d33 100644 --- a/fs/cramfs/inode.c +++ b/fs/cramfs/inode.c | |||
@@ -80,7 +80,7 @@ static struct inode *get_cramfs_inode(struct super_block *sb, | |||
80 | } | 80 | } |
81 | } else { | 81 | } else { |
82 | inode = iget_locked(sb, CRAMINO(cramfs_inode)); | 82 | inode = iget_locked(sb, CRAMINO(cramfs_inode)); |
83 | if (inode) { | 83 | if (inode && (inode->i_state & I_NEW)) { |
84 | setup_inode(inode, cramfs_inode); | 84 | setup_inode(inode, cramfs_inode); |
85 | unlock_new_inode(inode); | 85 | unlock_new_inode(inode); |
86 | } | 86 | } |
diff --git a/fs/dcache.c b/fs/dcache.c index 166d35d56868..83293be48149 100644 --- a/fs/dcache.c +++ b/fs/dcache.c | |||
@@ -1332,31 +1332,13 @@ EXPORT_SYMBOL(d_add_ci); | |||
1332 | * d_lookup - search for a dentry | 1332 | * d_lookup - search for a dentry |
1333 | * @parent: parent dentry | 1333 | * @parent: parent dentry |
1334 | * @name: qstr of name we wish to find | 1334 | * @name: qstr of name we wish to find |
1335 | * Returns: dentry, or NULL | ||
1335 | * | 1336 | * |
1336 | * Searches the children of the parent dentry for the name in question. If | 1337 | * d_lookup searches the children of the parent dentry for the name in |
1337 | * the dentry is found its reference count is incremented and the dentry | 1338 | * question. If the dentry is found its reference count is incremented and the |
1338 | * is returned. The caller must use dput to free the entry when it has | 1339 | * dentry is returned. The caller must use dput to free the entry when it has |
1339 | * finished using it. %NULL is returned on failure. | 1340 | * finished using it. %NULL is returned if the dentry does not exist. |
1340 | * | ||
1341 | * __d_lookup is dcache_lock free. The hash list is protected using RCU. | ||
1342 | * Memory barriers are used while updating and doing lockless traversal. | ||
1343 | * To avoid races with d_move while rename is happening, d_lock is used. | ||
1344 | * | ||
1345 | * Overflows in memcmp(), while d_move, are avoided by keeping the length | ||
1346 | * and name pointer in one structure pointed by d_qstr. | ||
1347 | * | ||
1348 | * rcu_read_lock() and rcu_read_unlock() are used to disable preemption while | ||
1349 | * lookup is going on. | ||
1350 | * | ||
1351 | * The dentry unused LRU is not updated even if lookup finds the required dentry | ||
1352 | * in there. It is updated in places such as prune_dcache, shrink_dcache_sb, | ||
1353 | * select_parent and __dget_locked. This laziness saves lookup from dcache_lock | ||
1354 | * acquisition. | ||
1355 | * | ||
1356 | * d_lookup() is protected against the concurrent renames in some unrelated | ||
1357 | * directory using the seqlockt_t rename_lock. | ||
1358 | */ | 1341 | */ |
1359 | |||
1360 | struct dentry * d_lookup(struct dentry * parent, struct qstr * name) | 1342 | struct dentry * d_lookup(struct dentry * parent, struct qstr * name) |
1361 | { | 1343 | { |
1362 | struct dentry * dentry = NULL; | 1344 | struct dentry * dentry = NULL; |
@@ -1372,6 +1354,21 @@ struct dentry * d_lookup(struct dentry * parent, struct qstr * name) | |||
1372 | } | 1354 | } |
1373 | EXPORT_SYMBOL(d_lookup); | 1355 | EXPORT_SYMBOL(d_lookup); |
1374 | 1356 | ||
1357 | /* | ||
1358 | * __d_lookup - search for a dentry (racy) | ||
1359 | * @parent: parent dentry | ||
1360 | * @name: qstr of name we wish to find | ||
1361 | * Returns: dentry, or NULL | ||
1362 | * | ||
1363 | * __d_lookup is like d_lookup, however it may (rarely) return a | ||
1364 | * false-negative result due to unrelated rename activity. | ||
1365 | * | ||
1366 | * __d_lookup is slightly faster by avoiding rename_lock read seqlock, | ||
1367 | * however it must be used carefully, eg. with a following d_lookup in | ||
1368 | * the case of failure. | ||
1369 | * | ||
1370 | * __d_lookup callers must be commented. | ||
1371 | */ | ||
1375 | struct dentry * __d_lookup(struct dentry * parent, struct qstr * name) | 1372 | struct dentry * __d_lookup(struct dentry * parent, struct qstr * name) |
1376 | { | 1373 | { |
1377 | unsigned int len = name->len; | 1374 | unsigned int len = name->len; |
@@ -1382,6 +1379,19 @@ struct dentry * __d_lookup(struct dentry * parent, struct qstr * name) | |||
1382 | struct hlist_node *node; | 1379 | struct hlist_node *node; |
1383 | struct dentry *dentry; | 1380 | struct dentry *dentry; |
1384 | 1381 | ||
1382 | /* | ||
1383 | * The hash list is protected using RCU. | ||
1384 | * | ||
1385 | * Take d_lock when comparing a candidate dentry, to avoid races | ||
1386 | * with d_move(). | ||
1387 | * | ||
1388 | * It is possible that concurrent renames can mess up our list | ||
1389 | * walk here and result in missing our dentry, resulting in the | ||
1390 | * false-negative result. d_lookup() protects against concurrent | ||
1391 | * renames using rename_lock seqlock. | ||
1392 | * | ||
1393 | * See Documentation/vfs/dcache-locking.txt for more details. | ||
1394 | */ | ||
1385 | rcu_read_lock(); | 1395 | rcu_read_lock(); |
1386 | 1396 | ||
1387 | hlist_for_each_entry_rcu(dentry, node, head, d_hash) { | 1397 | hlist_for_each_entry_rcu(dentry, node, head, d_hash) { |
@@ -1396,8 +1406,8 @@ struct dentry * __d_lookup(struct dentry * parent, struct qstr * name) | |||
1396 | 1406 | ||
1397 | /* | 1407 | /* |
1398 | * Recheck the dentry after taking the lock - d_move may have | 1408 | * Recheck the dentry after taking the lock - d_move may have |
1399 | * changed things. Don't bother checking the hash because we're | 1409 | * changed things. Don't bother checking the hash because |
1400 | * about to compare the whole name anyway. | 1410 | * we're about to compare the whole name anyway. |
1401 | */ | 1411 | */ |
1402 | if (dentry->d_parent != parent) | 1412 | if (dentry->d_parent != parent) |
1403 | goto next; | 1413 | goto next; |
@@ -1925,7 +1935,7 @@ static int prepend_path(const struct path *path, struct path *root, | |||
1925 | bool slash = false; | 1935 | bool slash = false; |
1926 | int error = 0; | 1936 | int error = 0; |
1927 | 1937 | ||
1928 | spin_lock(&vfsmount_lock); | 1938 | br_read_lock(vfsmount_lock); |
1929 | while (dentry != root->dentry || vfsmnt != root->mnt) { | 1939 | while (dentry != root->dentry || vfsmnt != root->mnt) { |
1930 | struct dentry * parent; | 1940 | struct dentry * parent; |
1931 | 1941 | ||
@@ -1954,7 +1964,7 @@ out: | |||
1954 | if (!error && !slash) | 1964 | if (!error && !slash) |
1955 | error = prepend(buffer, buflen, "/", 1); | 1965 | error = prepend(buffer, buflen, "/", 1); |
1956 | 1966 | ||
1957 | spin_unlock(&vfsmount_lock); | 1967 | br_read_unlock(vfsmount_lock); |
1958 | return error; | 1968 | return error; |
1959 | 1969 | ||
1960 | global_root: | 1970 | global_root: |
@@ -1976,7 +1986,7 @@ global_root: | |||
1976 | * __d_path - return the path of a dentry | 1986 | * __d_path - return the path of a dentry |
1977 | * @path: the dentry/vfsmount to report | 1987 | * @path: the dentry/vfsmount to report |
1978 | * @root: root vfsmnt/dentry (may be modified by this function) | 1988 | * @root: root vfsmnt/dentry (may be modified by this function) |
1979 | * @buffer: buffer to return value in | 1989 | * @buf: buffer to return value in |
1980 | * @buflen: buffer length | 1990 | * @buflen: buffer length |
1981 | * | 1991 | * |
1982 | * Convert a dentry into an ASCII path name. | 1992 | * Convert a dentry into an ASCII path name. |
@@ -2292,11 +2302,12 @@ int path_is_under(struct path *path1, struct path *path2) | |||
2292 | struct vfsmount *mnt = path1->mnt; | 2302 | struct vfsmount *mnt = path1->mnt; |
2293 | struct dentry *dentry = path1->dentry; | 2303 | struct dentry *dentry = path1->dentry; |
2294 | int res; | 2304 | int res; |
2295 | spin_lock(&vfsmount_lock); | 2305 | |
2306 | br_read_lock(vfsmount_lock); | ||
2296 | if (mnt != path2->mnt) { | 2307 | if (mnt != path2->mnt) { |
2297 | for (;;) { | 2308 | for (;;) { |
2298 | if (mnt->mnt_parent == mnt) { | 2309 | if (mnt->mnt_parent == mnt) { |
2299 | spin_unlock(&vfsmount_lock); | 2310 | br_read_unlock(vfsmount_lock); |
2300 | return 0; | 2311 | return 0; |
2301 | } | 2312 | } |
2302 | if (mnt->mnt_parent == path2->mnt) | 2313 | if (mnt->mnt_parent == path2->mnt) |
@@ -2306,7 +2317,7 @@ int path_is_under(struct path *path1, struct path *path2) | |||
2306 | dentry = mnt->mnt_mountpoint; | 2317 | dentry = mnt->mnt_mountpoint; |
2307 | } | 2318 | } |
2308 | res = is_subdir(dentry, path2->dentry); | 2319 | res = is_subdir(dentry, path2->dentry); |
2309 | spin_unlock(&vfsmount_lock); | 2320 | br_read_unlock(vfsmount_lock); |
2310 | return res; | 2321 | return res; |
2311 | } | 2322 | } |
2312 | EXPORT_SYMBOL(path_is_under); | 2323 | EXPORT_SYMBOL(path_is_under); |
diff --git a/fs/ecryptfs/crypto.c b/fs/ecryptfs/crypto.c index a2e3b562e65d..cbadc1bee6e7 100644 --- a/fs/ecryptfs/crypto.c +++ b/fs/ecryptfs/crypto.c | |||
@@ -1793,7 +1793,7 @@ struct kmem_cache *ecryptfs_key_tfm_cache; | |||
1793 | static struct list_head key_tfm_list; | 1793 | static struct list_head key_tfm_list; |
1794 | struct mutex key_tfm_list_mutex; | 1794 | struct mutex key_tfm_list_mutex; |
1795 | 1795 | ||
1796 | int ecryptfs_init_crypto(void) | 1796 | int __init ecryptfs_init_crypto(void) |
1797 | { | 1797 | { |
1798 | mutex_init(&key_tfm_list_mutex); | 1798 | mutex_init(&key_tfm_list_mutex); |
1799 | INIT_LIST_HEAD(&key_tfm_list); | 1799 | INIT_LIST_HEAD(&key_tfm_list); |
@@ -2169,7 +2169,6 @@ int ecryptfs_encrypt_and_encode_filename( | |||
2169 | (ECRYPTFS_FNEK_ENCRYPTED_FILENAME_PREFIX_SIZE | 2169 | (ECRYPTFS_FNEK_ENCRYPTED_FILENAME_PREFIX_SIZE |
2170 | + encoded_name_no_prefix_size); | 2170 | + encoded_name_no_prefix_size); |
2171 | (*encoded_name)[(*encoded_name_size)] = '\0'; | 2171 | (*encoded_name)[(*encoded_name_size)] = '\0'; |
2172 | (*encoded_name_size)++; | ||
2173 | } else { | 2172 | } else { |
2174 | rc = -EOPNOTSUPP; | 2173 | rc = -EOPNOTSUPP; |
2175 | } | 2174 | } |
diff --git a/fs/ecryptfs/inode.c b/fs/ecryptfs/inode.c index 6c55113e7222..3fbc94203380 100644 --- a/fs/ecryptfs/inode.c +++ b/fs/ecryptfs/inode.c | |||
@@ -349,7 +349,7 @@ out: | |||
349 | 349 | ||
350 | /** | 350 | /** |
351 | * ecryptfs_new_lower_dentry | 351 | * ecryptfs_new_lower_dentry |
352 | * @ename: The name of the new dentry. | 352 | * @name: The name of the new dentry. |
353 | * @lower_dir_dentry: Parent directory of the new dentry. | 353 | * @lower_dir_dentry: Parent directory of the new dentry. |
354 | * @nd: nameidata from last lookup. | 354 | * @nd: nameidata from last lookup. |
355 | * | 355 | * |
@@ -386,20 +386,19 @@ ecryptfs_new_lower_dentry(struct qstr *name, struct dentry *lower_dir_dentry, | |||
386 | * ecryptfs_lookup_one_lower | 386 | * ecryptfs_lookup_one_lower |
387 | * @ecryptfs_dentry: The eCryptfs dentry that we are looking up | 387 | * @ecryptfs_dentry: The eCryptfs dentry that we are looking up |
388 | * @lower_dir_dentry: lower parent directory | 388 | * @lower_dir_dentry: lower parent directory |
389 | * @name: lower file name | ||
389 | * | 390 | * |
390 | * Get the lower dentry from vfs. If lower dentry does not exist yet, | 391 | * Get the lower dentry from vfs. If lower dentry does not exist yet, |
391 | * create it. | 392 | * create it. |
392 | */ | 393 | */ |
393 | static struct dentry * | 394 | static struct dentry * |
394 | ecryptfs_lookup_one_lower(struct dentry *ecryptfs_dentry, | 395 | ecryptfs_lookup_one_lower(struct dentry *ecryptfs_dentry, |
395 | struct dentry *lower_dir_dentry) | 396 | struct dentry *lower_dir_dentry, struct qstr *name) |
396 | { | 397 | { |
397 | struct nameidata nd; | 398 | struct nameidata nd; |
398 | struct vfsmount *lower_mnt; | 399 | struct vfsmount *lower_mnt; |
399 | struct qstr *name; | ||
400 | int err; | 400 | int err; |
401 | 401 | ||
402 | name = &ecryptfs_dentry->d_name; | ||
403 | lower_mnt = mntget(ecryptfs_dentry_to_lower_mnt( | 402 | lower_mnt = mntget(ecryptfs_dentry_to_lower_mnt( |
404 | ecryptfs_dentry->d_parent)); | 403 | ecryptfs_dentry->d_parent)); |
405 | err = vfs_path_lookup(lower_dir_dentry, lower_mnt, name->name , 0, &nd); | 404 | err = vfs_path_lookup(lower_dir_dentry, lower_mnt, name->name , 0, &nd); |
@@ -434,6 +433,7 @@ static struct dentry *ecryptfs_lookup(struct inode *ecryptfs_dir_inode, | |||
434 | size_t encrypted_and_encoded_name_size; | 433 | size_t encrypted_and_encoded_name_size; |
435 | struct ecryptfs_mount_crypt_stat *mount_crypt_stat = NULL; | 434 | struct ecryptfs_mount_crypt_stat *mount_crypt_stat = NULL; |
436 | struct dentry *lower_dir_dentry, *lower_dentry; | 435 | struct dentry *lower_dir_dentry, *lower_dentry; |
436 | struct qstr lower_name; | ||
437 | int rc = 0; | 437 | int rc = 0; |
438 | 438 | ||
439 | ecryptfs_dentry->d_op = &ecryptfs_dops; | 439 | ecryptfs_dentry->d_op = &ecryptfs_dops; |
@@ -444,9 +444,17 @@ static struct dentry *ecryptfs_lookup(struct inode *ecryptfs_dir_inode, | |||
444 | goto out_d_drop; | 444 | goto out_d_drop; |
445 | } | 445 | } |
446 | lower_dir_dentry = ecryptfs_dentry_to_lower(ecryptfs_dentry->d_parent); | 446 | lower_dir_dentry = ecryptfs_dentry_to_lower(ecryptfs_dentry->d_parent); |
447 | 447 | lower_name.name = ecryptfs_dentry->d_name.name; | |
448 | lower_name.len = ecryptfs_dentry->d_name.len; | ||
449 | lower_name.hash = ecryptfs_dentry->d_name.hash; | ||
450 | if (lower_dir_dentry->d_op && lower_dir_dentry->d_op->d_hash) { | ||
451 | rc = lower_dir_dentry->d_op->d_hash(lower_dir_dentry, | ||
452 | &lower_name); | ||
453 | if (rc < 0) | ||
454 | goto out_d_drop; | ||
455 | } | ||
448 | lower_dentry = ecryptfs_lookup_one_lower(ecryptfs_dentry, | 456 | lower_dentry = ecryptfs_lookup_one_lower(ecryptfs_dentry, |
449 | lower_dir_dentry); | 457 | lower_dir_dentry, &lower_name); |
450 | if (IS_ERR(lower_dentry)) { | 458 | if (IS_ERR(lower_dentry)) { |
451 | rc = PTR_ERR(lower_dentry); | 459 | rc = PTR_ERR(lower_dentry); |
452 | ecryptfs_printk(KERN_DEBUG, "%s: lookup_one_lower() returned " | 460 | ecryptfs_printk(KERN_DEBUG, "%s: lookup_one_lower() returned " |
@@ -471,8 +479,17 @@ static struct dentry *ecryptfs_lookup(struct inode *ecryptfs_dir_inode, | |||
471 | "filename; rc = [%d]\n", __func__, rc); | 479 | "filename; rc = [%d]\n", __func__, rc); |
472 | goto out_d_drop; | 480 | goto out_d_drop; |
473 | } | 481 | } |
482 | lower_name.name = encrypted_and_encoded_name; | ||
483 | lower_name.len = encrypted_and_encoded_name_size; | ||
484 | lower_name.hash = full_name_hash(lower_name.name, lower_name.len); | ||
485 | if (lower_dir_dentry->d_op && lower_dir_dentry->d_op->d_hash) { | ||
486 | rc = lower_dir_dentry->d_op->d_hash(lower_dir_dentry, | ||
487 | &lower_name); | ||
488 | if (rc < 0) | ||
489 | goto out_d_drop; | ||
490 | } | ||
474 | lower_dentry = ecryptfs_lookup_one_lower(ecryptfs_dentry, | 491 | lower_dentry = ecryptfs_lookup_one_lower(ecryptfs_dentry, |
475 | lower_dir_dentry); | 492 | lower_dir_dentry, &lower_name); |
476 | if (IS_ERR(lower_dentry)) { | 493 | if (IS_ERR(lower_dentry)) { |
477 | rc = PTR_ERR(lower_dentry); | 494 | rc = PTR_ERR(lower_dentry); |
478 | ecryptfs_printk(KERN_DEBUG, "%s: lookup_one_lower() returned " | 495 | ecryptfs_printk(KERN_DEBUG, "%s: lookup_one_lower() returned " |
diff --git a/fs/ecryptfs/keystore.c b/fs/ecryptfs/keystore.c index 89c5476506ef..73811cfa2ea4 100644 --- a/fs/ecryptfs/keystore.c +++ b/fs/ecryptfs/keystore.c | |||
@@ -515,6 +515,7 @@ ecryptfs_write_tag_70_packet(char *dest, size_t *remaining_bytes, | |||
515 | if (!s) { | 515 | if (!s) { |
516 | printk(KERN_ERR "%s: Out of memory whilst trying to kmalloc " | 516 | printk(KERN_ERR "%s: Out of memory whilst trying to kmalloc " |
517 | "[%zd] bytes of kernel memory\n", __func__, sizeof(*s)); | 517 | "[%zd] bytes of kernel memory\n", __func__, sizeof(*s)); |
518 | rc = -ENOMEM; | ||
518 | goto out; | 519 | goto out; |
519 | } | 520 | } |
520 | s->desc.flags = CRYPTO_TFM_REQ_MAY_SLEEP; | 521 | s->desc.flags = CRYPTO_TFM_REQ_MAY_SLEEP; |
@@ -806,6 +807,7 @@ ecryptfs_parse_tag_70_packet(char **filename, size_t *filename_size, | |||
806 | if (!s) { | 807 | if (!s) { |
807 | printk(KERN_ERR "%s: Out of memory whilst trying to kmalloc " | 808 | printk(KERN_ERR "%s: Out of memory whilst trying to kmalloc " |
808 | "[%zd] bytes of kernel memory\n", __func__, sizeof(*s)); | 809 | "[%zd] bytes of kernel memory\n", __func__, sizeof(*s)); |
810 | rc = -ENOMEM; | ||
809 | goto out; | 811 | goto out; |
810 | } | 812 | } |
811 | s->desc.flags = CRYPTO_TFM_REQ_MAY_SLEEP; | 813 | s->desc.flags = CRYPTO_TFM_REQ_MAY_SLEEP; |
diff --git a/fs/ecryptfs/kthread.c b/fs/ecryptfs/kthread.c index d8c3a373aafa..0851ab6980f5 100644 --- a/fs/ecryptfs/kthread.c +++ b/fs/ecryptfs/kthread.c | |||
@@ -86,7 +86,7 @@ out: | |||
86 | return 0; | 86 | return 0; |
87 | } | 87 | } |
88 | 88 | ||
89 | int ecryptfs_init_kthread(void) | 89 | int __init ecryptfs_init_kthread(void) |
90 | { | 90 | { |
91 | int rc = 0; | 91 | int rc = 0; |
92 | 92 | ||
diff --git a/fs/ecryptfs/messaging.c b/fs/ecryptfs/messaging.c index bcb68c0cb1f0..ab2248090515 100644 --- a/fs/ecryptfs/messaging.c +++ b/fs/ecryptfs/messaging.c | |||
@@ -473,7 +473,7 @@ sleep: | |||
473 | return rc; | 473 | return rc; |
474 | } | 474 | } |
475 | 475 | ||
476 | int ecryptfs_init_messaging(void) | 476 | int __init ecryptfs_init_messaging(void) |
477 | { | 477 | { |
478 | int i; | 478 | int i; |
479 | int rc = 0; | 479 | int rc = 0; |
diff --git a/fs/ecryptfs/miscdev.c b/fs/ecryptfs/miscdev.c index 3745f612bcd4..00208c3d7e92 100644 --- a/fs/ecryptfs/miscdev.c +++ b/fs/ecryptfs/miscdev.c | |||
@@ -500,7 +500,7 @@ static struct miscdevice ecryptfs_miscdev = { | |||
500 | * | 500 | * |
501 | * Returns zero on success; non-zero otherwise | 501 | * Returns zero on success; non-zero otherwise |
502 | */ | 502 | */ |
503 | int ecryptfs_init_ecryptfs_miscdev(void) | 503 | int __init ecryptfs_init_ecryptfs_miscdev(void) |
504 | { | 504 | { |
505 | int rc; | 505 | int rc; |
506 | 506 | ||
@@ -361,13 +361,13 @@ err: | |||
361 | /* | 361 | /* |
362 | * count() counts the number of strings in array ARGV. | 362 | * count() counts the number of strings in array ARGV. |
363 | */ | 363 | */ |
364 | static int count(char __user * __user * argv, int max) | 364 | static int count(const char __user * const __user * argv, int max) |
365 | { | 365 | { |
366 | int i = 0; | 366 | int i = 0; |
367 | 367 | ||
368 | if (argv != NULL) { | 368 | if (argv != NULL) { |
369 | for (;;) { | 369 | for (;;) { |
370 | char __user * p; | 370 | const char __user * p; |
371 | 371 | ||
372 | if (get_user(p, argv)) | 372 | if (get_user(p, argv)) |
373 | return -EFAULT; | 373 | return -EFAULT; |
@@ -387,7 +387,7 @@ static int count(char __user * __user * argv, int max) | |||
387 | * processes's memory to the new process's stack. The call to get_user_pages() | 387 | * processes's memory to the new process's stack. The call to get_user_pages() |
388 | * ensures the destination page is created and not swapped out. | 388 | * ensures the destination page is created and not swapped out. |
389 | */ | 389 | */ |
390 | static int copy_strings(int argc, char __user * __user * argv, | 390 | static int copy_strings(int argc, const char __user *const __user *argv, |
391 | struct linux_binprm *bprm) | 391 | struct linux_binprm *bprm) |
392 | { | 392 | { |
393 | struct page *kmapped_page = NULL; | 393 | struct page *kmapped_page = NULL; |
@@ -396,7 +396,7 @@ static int copy_strings(int argc, char __user * __user * argv, | |||
396 | int ret; | 396 | int ret; |
397 | 397 | ||
398 | while (argc-- > 0) { | 398 | while (argc-- > 0) { |
399 | char __user *str; | 399 | const char __user *str; |
400 | int len; | 400 | int len; |
401 | unsigned long pos; | 401 | unsigned long pos; |
402 | 402 | ||
@@ -470,12 +470,13 @@ out: | |||
470 | /* | 470 | /* |
471 | * Like copy_strings, but get argv and its values from kernel memory. | 471 | * Like copy_strings, but get argv and its values from kernel memory. |
472 | */ | 472 | */ |
473 | int copy_strings_kernel(int argc,char ** argv, struct linux_binprm *bprm) | 473 | int copy_strings_kernel(int argc, const char *const *argv, |
474 | struct linux_binprm *bprm) | ||
474 | { | 475 | { |
475 | int r; | 476 | int r; |
476 | mm_segment_t oldfs = get_fs(); | 477 | mm_segment_t oldfs = get_fs(); |
477 | set_fs(KERNEL_DS); | 478 | set_fs(KERNEL_DS); |
478 | r = copy_strings(argc, (char __user * __user *)argv, bprm); | 479 | r = copy_strings(argc, (const char __user *const __user *)argv, bprm); |
479 | set_fs(oldfs); | 480 | set_fs(oldfs); |
480 | return r; | 481 | return r; |
481 | } | 482 | } |
@@ -997,7 +998,7 @@ EXPORT_SYMBOL(flush_old_exec); | |||
997 | void setup_new_exec(struct linux_binprm * bprm) | 998 | void setup_new_exec(struct linux_binprm * bprm) |
998 | { | 999 | { |
999 | int i, ch; | 1000 | int i, ch; |
1000 | char * name; | 1001 | const char *name; |
1001 | char tcomm[sizeof(current->comm)]; | 1002 | char tcomm[sizeof(current->comm)]; |
1002 | 1003 | ||
1003 | arch_pick_mmap_layout(current->mm); | 1004 | arch_pick_mmap_layout(current->mm); |
@@ -1117,7 +1118,7 @@ int check_unsafe_exec(struct linux_binprm *bprm) | |||
1117 | bprm->unsafe = tracehook_unsafe_exec(p); | 1118 | bprm->unsafe = tracehook_unsafe_exec(p); |
1118 | 1119 | ||
1119 | n_fs = 1; | 1120 | n_fs = 1; |
1120 | write_lock(&p->fs->lock); | 1121 | spin_lock(&p->fs->lock); |
1121 | rcu_read_lock(); | 1122 | rcu_read_lock(); |
1122 | for (t = next_thread(p); t != p; t = next_thread(t)) { | 1123 | for (t = next_thread(p); t != p; t = next_thread(t)) { |
1123 | if (t->fs == p->fs) | 1124 | if (t->fs == p->fs) |
@@ -1134,7 +1135,7 @@ int check_unsafe_exec(struct linux_binprm *bprm) | |||
1134 | res = 1; | 1135 | res = 1; |
1135 | } | 1136 | } |
1136 | } | 1137 | } |
1137 | write_unlock(&p->fs->lock); | 1138 | spin_unlock(&p->fs->lock); |
1138 | 1139 | ||
1139 | return res; | 1140 | return res; |
1140 | } | 1141 | } |
@@ -1316,9 +1317,9 @@ EXPORT_SYMBOL(search_binary_handler); | |||
1316 | /* | 1317 | /* |
1317 | * sys_execve() executes a new program. | 1318 | * sys_execve() executes a new program. |
1318 | */ | 1319 | */ |
1319 | int do_execve(char * filename, | 1320 | int do_execve(const char * filename, |
1320 | char __user *__user *argv, | 1321 | const char __user *const __user *argv, |
1321 | char __user *__user *envp, | 1322 | const char __user *const __user *envp, |
1322 | struct pt_regs * regs) | 1323 | struct pt_regs * regs) |
1323 | { | 1324 | { |
1324 | struct linux_binprm *bprm; | 1325 | struct linux_binprm *bprm; |
diff --git a/fs/fat/misc.c b/fs/fat/misc.c index 1fa23f6ffba5..1736f2356388 100644 --- a/fs/fat/misc.c +++ b/fs/fat/misc.c | |||
@@ -250,7 +250,9 @@ int fat_sync_bhs(struct buffer_head **bhs, int nr_bhs) | |||
250 | { | 250 | { |
251 | int i, err = 0; | 251 | int i, err = 0; |
252 | 252 | ||
253 | ll_rw_block(SWRITE, nr_bhs, bhs); | 253 | for (i = 0; i < nr_bhs; i++) |
254 | write_dirty_buffer(bhs[i], WRITE); | ||
255 | |||
254 | for (i = 0; i < nr_bhs; i++) { | 256 | for (i = 0; i < nr_bhs; i++) { |
255 | wait_on_buffer(bhs[i]); | 257 | wait_on_buffer(bhs[i]); |
256 | if (buffer_eopnotsupp(bhs[i])) { | 258 | if (buffer_eopnotsupp(bhs[i])) { |
diff --git a/fs/file_table.c b/fs/file_table.c index 2fc3b3c08911..a04bdd81c11c 100644 --- a/fs/file_table.c +++ b/fs/file_table.c | |||
@@ -20,7 +20,9 @@ | |||
20 | #include <linux/cdev.h> | 20 | #include <linux/cdev.h> |
21 | #include <linux/fsnotify.h> | 21 | #include <linux/fsnotify.h> |
22 | #include <linux/sysctl.h> | 22 | #include <linux/sysctl.h> |
23 | #include <linux/lglock.h> | ||
23 | #include <linux/percpu_counter.h> | 24 | #include <linux/percpu_counter.h> |
25 | #include <linux/percpu.h> | ||
24 | #include <linux/ima.h> | 26 | #include <linux/ima.h> |
25 | 27 | ||
26 | #include <asm/atomic.h> | 28 | #include <asm/atomic.h> |
@@ -32,8 +34,8 @@ struct files_stat_struct files_stat = { | |||
32 | .max_files = NR_FILE | 34 | .max_files = NR_FILE |
33 | }; | 35 | }; |
34 | 36 | ||
35 | /* public. Not pretty! */ | 37 | DECLARE_LGLOCK(files_lglock); |
36 | __cacheline_aligned_in_smp DEFINE_SPINLOCK(files_lock); | 38 | DEFINE_LGLOCK(files_lglock); |
37 | 39 | ||
38 | /* SLAB cache for file structures */ | 40 | /* SLAB cache for file structures */ |
39 | static struct kmem_cache *filp_cachep __read_mostly; | 41 | static struct kmem_cache *filp_cachep __read_mostly; |
@@ -230,15 +232,6 @@ static void __fput(struct file *file) | |||
230 | might_sleep(); | 232 | might_sleep(); |
231 | 233 | ||
232 | fsnotify_close(file); | 234 | fsnotify_close(file); |
233 | |||
234 | /* | ||
235 | * fsnotify_create_event may have taken one or more references on this | ||
236 | * file. If it did so it left one reference for us to drop to make sure | ||
237 | * its calls to fput could not prematurely destroy the file. | ||
238 | */ | ||
239 | if (atomic_long_read(&file->f_count)) | ||
240 | return fput(file); | ||
241 | |||
242 | /* | 235 | /* |
243 | * The function eventpoll_release() should be the first called | 236 | * The function eventpoll_release() should be the first called |
244 | * in the file cleanup chain. | 237 | * in the file cleanup chain. |
@@ -258,7 +251,7 @@ static void __fput(struct file *file) | |||
258 | cdev_put(inode->i_cdev); | 251 | cdev_put(inode->i_cdev); |
259 | fops_put(file->f_op); | 252 | fops_put(file->f_op); |
260 | put_pid(file->f_owner.pid); | 253 | put_pid(file->f_owner.pid); |
261 | file_kill(file); | 254 | file_sb_list_del(file); |
262 | if (file->f_mode & FMODE_WRITE) | 255 | if (file->f_mode & FMODE_WRITE) |
263 | drop_file_write_access(file); | 256 | drop_file_write_access(file); |
264 | file->f_path.dentry = NULL; | 257 | file->f_path.dentry = NULL; |
@@ -337,41 +330,107 @@ struct file *fget_light(unsigned int fd, int *fput_needed) | |||
337 | return file; | 330 | return file; |
338 | } | 331 | } |
339 | 332 | ||
340 | |||
341 | void put_filp(struct file *file) | 333 | void put_filp(struct file *file) |
342 | { | 334 | { |
343 | if (atomic_long_dec_and_test(&file->f_count)) { | 335 | if (atomic_long_dec_and_test(&file->f_count)) { |
344 | security_file_free(file); | 336 | security_file_free(file); |
345 | file_kill(file); | 337 | file_sb_list_del(file); |
346 | file_free(file); | 338 | file_free(file); |
347 | } | 339 | } |
348 | } | 340 | } |
349 | 341 | ||
350 | void file_move(struct file *file, struct list_head *list) | 342 | static inline int file_list_cpu(struct file *file) |
351 | { | 343 | { |
352 | if (!list) | 344 | #ifdef CONFIG_SMP |
353 | return; | 345 | return file->f_sb_list_cpu; |
354 | file_list_lock(); | 346 | #else |
355 | list_move(&file->f_u.fu_list, list); | 347 | return smp_processor_id(); |
356 | file_list_unlock(); | 348 | #endif |
349 | } | ||
350 | |||
351 | /* helper for file_sb_list_add to reduce ifdefs */ | ||
352 | static inline void __file_sb_list_add(struct file *file, struct super_block *sb) | ||
353 | { | ||
354 | struct list_head *list; | ||
355 | #ifdef CONFIG_SMP | ||
356 | int cpu; | ||
357 | cpu = smp_processor_id(); | ||
358 | file->f_sb_list_cpu = cpu; | ||
359 | list = per_cpu_ptr(sb->s_files, cpu); | ||
360 | #else | ||
361 | list = &sb->s_files; | ||
362 | #endif | ||
363 | list_add(&file->f_u.fu_list, list); | ||
357 | } | 364 | } |
358 | 365 | ||
359 | void file_kill(struct file *file) | 366 | /** |
367 | * file_sb_list_add - add a file to the sb's file list | ||
368 | * @file: file to add | ||
369 | * @sb: sb to add it to | ||
370 | * | ||
371 | * Use this function to associate a file with the superblock of the inode it | ||
372 | * refers to. | ||
373 | */ | ||
374 | void file_sb_list_add(struct file *file, struct super_block *sb) | ||
375 | { | ||
376 | lg_local_lock(files_lglock); | ||
377 | __file_sb_list_add(file, sb); | ||
378 | lg_local_unlock(files_lglock); | ||
379 | } | ||
380 | |||
381 | /** | ||
382 | * file_sb_list_del - remove a file from the sb's file list | ||
383 | * @file: file to remove | ||
384 | * @sb: sb to remove it from | ||
385 | * | ||
386 | * Use this function to remove a file from its superblock. | ||
387 | */ | ||
388 | void file_sb_list_del(struct file *file) | ||
360 | { | 389 | { |
361 | if (!list_empty(&file->f_u.fu_list)) { | 390 | if (!list_empty(&file->f_u.fu_list)) { |
362 | file_list_lock(); | 391 | lg_local_lock_cpu(files_lglock, file_list_cpu(file)); |
363 | list_del_init(&file->f_u.fu_list); | 392 | list_del_init(&file->f_u.fu_list); |
364 | file_list_unlock(); | 393 | lg_local_unlock_cpu(files_lglock, file_list_cpu(file)); |
365 | } | 394 | } |
366 | } | 395 | } |
367 | 396 | ||
397 | #ifdef CONFIG_SMP | ||
398 | |||
399 | /* | ||
400 | * These macros iterate all files on all CPUs for a given superblock. | ||
401 | * files_lglock must be held globally. | ||
402 | */ | ||
403 | #define do_file_list_for_each_entry(__sb, __file) \ | ||
404 | { \ | ||
405 | int i; \ | ||
406 | for_each_possible_cpu(i) { \ | ||
407 | struct list_head *list; \ | ||
408 | list = per_cpu_ptr((__sb)->s_files, i); \ | ||
409 | list_for_each_entry((__file), list, f_u.fu_list) | ||
410 | |||
411 | #define while_file_list_for_each_entry \ | ||
412 | } \ | ||
413 | } | ||
414 | |||
415 | #else | ||
416 | |||
417 | #define do_file_list_for_each_entry(__sb, __file) \ | ||
418 | { \ | ||
419 | struct list_head *list; \ | ||
420 | list = &(sb)->s_files; \ | ||
421 | list_for_each_entry((__file), list, f_u.fu_list) | ||
422 | |||
423 | #define while_file_list_for_each_entry \ | ||
424 | } | ||
425 | |||
426 | #endif | ||
427 | |||
368 | int fs_may_remount_ro(struct super_block *sb) | 428 | int fs_may_remount_ro(struct super_block *sb) |
369 | { | 429 | { |
370 | struct file *file; | 430 | struct file *file; |
371 | |||
372 | /* Check that no files are currently opened for writing. */ | 431 | /* Check that no files are currently opened for writing. */ |
373 | file_list_lock(); | 432 | lg_global_lock(files_lglock); |
374 | list_for_each_entry(file, &sb->s_files, f_u.fu_list) { | 433 | do_file_list_for_each_entry(sb, file) { |
375 | struct inode *inode = file->f_path.dentry->d_inode; | 434 | struct inode *inode = file->f_path.dentry->d_inode; |
376 | 435 | ||
377 | /* File with pending delete? */ | 436 | /* File with pending delete? */ |
@@ -381,11 +440,11 @@ int fs_may_remount_ro(struct super_block *sb) | |||
381 | /* Writeable file? */ | 440 | /* Writeable file? */ |
382 | if (S_ISREG(inode->i_mode) && (file->f_mode & FMODE_WRITE)) | 441 | if (S_ISREG(inode->i_mode) && (file->f_mode & FMODE_WRITE)) |
383 | goto too_bad; | 442 | goto too_bad; |
384 | } | 443 | } while_file_list_for_each_entry; |
385 | file_list_unlock(); | 444 | lg_global_unlock(files_lglock); |
386 | return 1; /* Tis' cool bro. */ | 445 | return 1; /* Tis' cool bro. */ |
387 | too_bad: | 446 | too_bad: |
388 | file_list_unlock(); | 447 | lg_global_unlock(files_lglock); |
389 | return 0; | 448 | return 0; |
390 | } | 449 | } |
391 | 450 | ||
@@ -401,8 +460,8 @@ void mark_files_ro(struct super_block *sb) | |||
401 | struct file *f; | 460 | struct file *f; |
402 | 461 | ||
403 | retry: | 462 | retry: |
404 | file_list_lock(); | 463 | lg_global_lock(files_lglock); |
405 | list_for_each_entry(f, &sb->s_files, f_u.fu_list) { | 464 | do_file_list_for_each_entry(sb, f) { |
406 | struct vfsmount *mnt; | 465 | struct vfsmount *mnt; |
407 | if (!S_ISREG(f->f_path.dentry->d_inode->i_mode)) | 466 | if (!S_ISREG(f->f_path.dentry->d_inode->i_mode)) |
408 | continue; | 467 | continue; |
@@ -417,16 +476,13 @@ retry: | |||
417 | continue; | 476 | continue; |
418 | file_release_write(f); | 477 | file_release_write(f); |
419 | mnt = mntget(f->f_path.mnt); | 478 | mnt = mntget(f->f_path.mnt); |
420 | file_list_unlock(); | 479 | /* This can sleep, so we can't hold the spinlock. */ |
421 | /* | 480 | lg_global_unlock(files_lglock); |
422 | * This can sleep, so we can't hold | ||
423 | * the file_list_lock() spinlock. | ||
424 | */ | ||
425 | mnt_drop_write(mnt); | 481 | mnt_drop_write(mnt); |
426 | mntput(mnt); | 482 | mntput(mnt); |
427 | goto retry; | 483 | goto retry; |
428 | } | 484 | } while_file_list_for_each_entry; |
429 | file_list_unlock(); | 485 | lg_global_unlock(files_lglock); |
430 | } | 486 | } |
431 | 487 | ||
432 | void __init files_init(unsigned long mempages) | 488 | void __init files_init(unsigned long mempages) |
@@ -446,5 +502,6 @@ void __init files_init(unsigned long mempages) | |||
446 | if (files_stat.max_files < NR_FILE) | 502 | if (files_stat.max_files < NR_FILE) |
447 | files_stat.max_files = NR_FILE; | 503 | files_stat.max_files = NR_FILE; |
448 | files_defer_init(); | 504 | files_defer_init(); |
505 | lg_lock_init(files_lglock); | ||
449 | percpu_counter_init(&nr_files, 0); | 506 | percpu_counter_init(&nr_files, 0); |
450 | } | 507 | } |
diff --git a/fs/fs-writeback.c b/fs/fs-writeback.c index 2f76c4a081a2..7d9d06ba184b 100644 --- a/fs/fs-writeback.c +++ b/fs/fs-writeback.c | |||
@@ -68,7 +68,7 @@ int nr_pdflush_threads; | |||
68 | */ | 68 | */ |
69 | int writeback_in_progress(struct backing_dev_info *bdi) | 69 | int writeback_in_progress(struct backing_dev_info *bdi) |
70 | { | 70 | { |
71 | return !list_empty(&bdi->work_list); | 71 | return test_bit(BDI_writeback_running, &bdi->state); |
72 | } | 72 | } |
73 | 73 | ||
74 | static void bdi_queue_work(struct backing_dev_info *bdi, | 74 | static void bdi_queue_work(struct backing_dev_info *bdi, |
@@ -249,10 +249,18 @@ static void move_expired_inodes(struct list_head *delaying_queue, | |||
249 | 249 | ||
250 | /* | 250 | /* |
251 | * Queue all expired dirty inodes for io, eldest first. | 251 | * Queue all expired dirty inodes for io, eldest first. |
252 | * Before | ||
253 | * newly dirtied b_dirty b_io b_more_io | ||
254 | * =============> gf edc BA | ||
255 | * After | ||
256 | * newly dirtied b_dirty b_io b_more_io | ||
257 | * =============> g fBAedc | ||
258 | * | | ||
259 | * +--> dequeue for IO | ||
252 | */ | 260 | */ |
253 | static void queue_io(struct bdi_writeback *wb, unsigned long *older_than_this) | 261 | static void queue_io(struct bdi_writeback *wb, unsigned long *older_than_this) |
254 | { | 262 | { |
255 | list_splice_init(&wb->b_more_io, wb->b_io.prev); | 263 | list_splice_init(&wb->b_more_io, &wb->b_io); |
256 | move_expired_inodes(&wb->b_dirty, &wb->b_io, older_than_this); | 264 | move_expired_inodes(&wb->b_dirty, &wb->b_io, older_than_this); |
257 | } | 265 | } |
258 | 266 | ||
@@ -363,62 +371,35 @@ writeback_single_inode(struct inode *inode, struct writeback_control *wbc) | |||
363 | spin_lock(&inode_lock); | 371 | spin_lock(&inode_lock); |
364 | inode->i_state &= ~I_SYNC; | 372 | inode->i_state &= ~I_SYNC; |
365 | if (!(inode->i_state & I_FREEING)) { | 373 | if (!(inode->i_state & I_FREEING)) { |
366 | if ((inode->i_state & I_DIRTY_PAGES) && wbc->for_kupdate) { | 374 | if (mapping_tagged(mapping, PAGECACHE_TAG_DIRTY)) { |
367 | /* | ||
368 | * More pages get dirtied by a fast dirtier. | ||
369 | */ | ||
370 | goto select_queue; | ||
371 | } else if (inode->i_state & I_DIRTY) { | ||
372 | /* | ||
373 | * At least XFS will redirty the inode during the | ||
374 | * writeback (delalloc) and on io completion (isize). | ||
375 | */ | ||
376 | redirty_tail(inode); | ||
377 | } else if (mapping_tagged(mapping, PAGECACHE_TAG_DIRTY)) { | ||
378 | /* | 375 | /* |
379 | * We didn't write back all the pages. nfs_writepages() | 376 | * We didn't write back all the pages. nfs_writepages() |
380 | * sometimes bales out without doing anything. Redirty | 377 | * sometimes bales out without doing anything. |
381 | * the inode; Move it from b_io onto b_more_io/b_dirty. | ||
382 | */ | 378 | */ |
383 | /* | 379 | inode->i_state |= I_DIRTY_PAGES; |
384 | * akpm: if the caller was the kupdate function we put | 380 | if (wbc->nr_to_write <= 0) { |
385 | * this inode at the head of b_dirty so it gets first | ||
386 | * consideration. Otherwise, move it to the tail, for | ||
387 | * the reasons described there. I'm not really sure | ||
388 | * how much sense this makes. Presumably I had a good | ||
389 | * reasons for doing it this way, and I'd rather not | ||
390 | * muck with it at present. | ||
391 | */ | ||
392 | if (wbc->for_kupdate) { | ||
393 | /* | 381 | /* |
394 | * For the kupdate function we move the inode | 382 | * slice used up: queue for next turn |
395 | * to b_more_io so it will get more writeout as | ||
396 | * soon as the queue becomes uncongested. | ||
397 | */ | 383 | */ |
398 | inode->i_state |= I_DIRTY_PAGES; | 384 | requeue_io(inode); |
399 | select_queue: | ||
400 | if (wbc->nr_to_write <= 0) { | ||
401 | /* | ||
402 | * slice used up: queue for next turn | ||
403 | */ | ||
404 | requeue_io(inode); | ||
405 | } else { | ||
406 | /* | ||
407 | * somehow blocked: retry later | ||
408 | */ | ||
409 | redirty_tail(inode); | ||
410 | } | ||
411 | } else { | 385 | } else { |
412 | /* | 386 | /* |
413 | * Otherwise fully redirty the inode so that | 387 | * Writeback blocked by something other than |
414 | * other inodes on this superblock will get some | 388 | * congestion. Delay the inode for some time to |
415 | * writeout. Otherwise heavy writing to one | 389 | * avoid spinning on the CPU (100% iowait) |
416 | * file would indefinitely suspend writeout of | 390 | * retrying writeback of the dirty page/inode |
417 | * all the other files. | 391 | * that cannot be performed immediately. |
418 | */ | 392 | */ |
419 | inode->i_state |= I_DIRTY_PAGES; | ||
420 | redirty_tail(inode); | 393 | redirty_tail(inode); |
421 | } | 394 | } |
395 | } else if (inode->i_state & I_DIRTY) { | ||
396 | /* | ||
397 | * Filesystems can dirty the inode during writeback | ||
398 | * operations, such as delayed allocation during | ||
399 | * submission or metadata updates after data IO | ||
400 | * completion. | ||
401 | */ | ||
402 | redirty_tail(inode); | ||
422 | } else if (atomic_read(&inode->i_count)) { | 403 | } else if (atomic_read(&inode->i_count)) { |
423 | /* | 404 | /* |
424 | * The inode is clean, inuse | 405 | * The inode is clean, inuse |
@@ -590,7 +571,7 @@ static inline bool over_bground_thresh(void) | |||
590 | { | 571 | { |
591 | unsigned long background_thresh, dirty_thresh; | 572 | unsigned long background_thresh, dirty_thresh; |
592 | 573 | ||
593 | get_dirty_limits(&background_thresh, &dirty_thresh, NULL, NULL); | 574 | global_dirty_limits(&background_thresh, &dirty_thresh); |
594 | 575 | ||
595 | return (global_page_state(NR_FILE_DIRTY) + | 576 | return (global_page_state(NR_FILE_DIRTY) + |
596 | global_page_state(NR_UNSTABLE_NFS) >= background_thresh); | 577 | global_page_state(NR_UNSTABLE_NFS) >= background_thresh); |
@@ -759,6 +740,7 @@ long wb_do_writeback(struct bdi_writeback *wb, int force_wait) | |||
759 | struct wb_writeback_work *work; | 740 | struct wb_writeback_work *work; |
760 | long wrote = 0; | 741 | long wrote = 0; |
761 | 742 | ||
743 | set_bit(BDI_writeback_running, &wb->bdi->state); | ||
762 | while ((work = get_next_work_item(bdi)) != NULL) { | 744 | while ((work = get_next_work_item(bdi)) != NULL) { |
763 | /* | 745 | /* |
764 | * Override sync mode, in case we must wait for completion | 746 | * Override sync mode, in case we must wait for completion |
@@ -785,6 +767,7 @@ long wb_do_writeback(struct bdi_writeback *wb, int force_wait) | |||
785 | * Check for periodic writeback, kupdated() style | 767 | * Check for periodic writeback, kupdated() style |
786 | */ | 768 | */ |
787 | wrote += wb_check_old_data_flush(wb); | 769 | wrote += wb_check_old_data_flush(wb); |
770 | clear_bit(BDI_writeback_running, &wb->bdi->state); | ||
788 | 771 | ||
789 | return wrote; | 772 | return wrote; |
790 | } | 773 | } |
diff --git a/fs/fs_struct.c b/fs/fs_struct.c index 1ee40eb9a2c0..ed45a9cf5f3d 100644 --- a/fs/fs_struct.c +++ b/fs/fs_struct.c | |||
@@ -13,11 +13,11 @@ void set_fs_root(struct fs_struct *fs, struct path *path) | |||
13 | { | 13 | { |
14 | struct path old_root; | 14 | struct path old_root; |
15 | 15 | ||
16 | write_lock(&fs->lock); | 16 | spin_lock(&fs->lock); |
17 | old_root = fs->root; | 17 | old_root = fs->root; |
18 | fs->root = *path; | 18 | fs->root = *path; |
19 | path_get(path); | 19 | path_get(path); |
20 | write_unlock(&fs->lock); | 20 | spin_unlock(&fs->lock); |
21 | if (old_root.dentry) | 21 | if (old_root.dentry) |
22 | path_put(&old_root); | 22 | path_put(&old_root); |
23 | } | 23 | } |
@@ -30,11 +30,11 @@ void set_fs_pwd(struct fs_struct *fs, struct path *path) | |||
30 | { | 30 | { |
31 | struct path old_pwd; | 31 | struct path old_pwd; |
32 | 32 | ||
33 | write_lock(&fs->lock); | 33 | spin_lock(&fs->lock); |
34 | old_pwd = fs->pwd; | 34 | old_pwd = fs->pwd; |
35 | fs->pwd = *path; | 35 | fs->pwd = *path; |
36 | path_get(path); | 36 | path_get(path); |
37 | write_unlock(&fs->lock); | 37 | spin_unlock(&fs->lock); |
38 | 38 | ||
39 | if (old_pwd.dentry) | 39 | if (old_pwd.dentry) |
40 | path_put(&old_pwd); | 40 | path_put(&old_pwd); |
@@ -51,7 +51,7 @@ void chroot_fs_refs(struct path *old_root, struct path *new_root) | |||
51 | task_lock(p); | 51 | task_lock(p); |
52 | fs = p->fs; | 52 | fs = p->fs; |
53 | if (fs) { | 53 | if (fs) { |
54 | write_lock(&fs->lock); | 54 | spin_lock(&fs->lock); |
55 | if (fs->root.dentry == old_root->dentry | 55 | if (fs->root.dentry == old_root->dentry |
56 | && fs->root.mnt == old_root->mnt) { | 56 | && fs->root.mnt == old_root->mnt) { |
57 | path_get(new_root); | 57 | path_get(new_root); |
@@ -64,7 +64,7 @@ void chroot_fs_refs(struct path *old_root, struct path *new_root) | |||
64 | fs->pwd = *new_root; | 64 | fs->pwd = *new_root; |
65 | count++; | 65 | count++; |
66 | } | 66 | } |
67 | write_unlock(&fs->lock); | 67 | spin_unlock(&fs->lock); |
68 | } | 68 | } |
69 | task_unlock(p); | 69 | task_unlock(p); |
70 | } while_each_thread(g, p); | 70 | } while_each_thread(g, p); |
@@ -87,10 +87,10 @@ void exit_fs(struct task_struct *tsk) | |||
87 | if (fs) { | 87 | if (fs) { |
88 | int kill; | 88 | int kill; |
89 | task_lock(tsk); | 89 | task_lock(tsk); |
90 | write_lock(&fs->lock); | 90 | spin_lock(&fs->lock); |
91 | tsk->fs = NULL; | 91 | tsk->fs = NULL; |
92 | kill = !--fs->users; | 92 | kill = !--fs->users; |
93 | write_unlock(&fs->lock); | 93 | spin_unlock(&fs->lock); |
94 | task_unlock(tsk); | 94 | task_unlock(tsk); |
95 | if (kill) | 95 | if (kill) |
96 | free_fs_struct(fs); | 96 | free_fs_struct(fs); |
@@ -104,7 +104,7 @@ struct fs_struct *copy_fs_struct(struct fs_struct *old) | |||
104 | if (fs) { | 104 | if (fs) { |
105 | fs->users = 1; | 105 | fs->users = 1; |
106 | fs->in_exec = 0; | 106 | fs->in_exec = 0; |
107 | rwlock_init(&fs->lock); | 107 | spin_lock_init(&fs->lock); |
108 | fs->umask = old->umask; | 108 | fs->umask = old->umask; |
109 | get_fs_root_and_pwd(old, &fs->root, &fs->pwd); | 109 | get_fs_root_and_pwd(old, &fs->root, &fs->pwd); |
110 | } | 110 | } |
@@ -121,10 +121,10 @@ int unshare_fs_struct(void) | |||
121 | return -ENOMEM; | 121 | return -ENOMEM; |
122 | 122 | ||
123 | task_lock(current); | 123 | task_lock(current); |
124 | write_lock(&fs->lock); | 124 | spin_lock(&fs->lock); |
125 | kill = !--fs->users; | 125 | kill = !--fs->users; |
126 | current->fs = new_fs; | 126 | current->fs = new_fs; |
127 | write_unlock(&fs->lock); | 127 | spin_unlock(&fs->lock); |
128 | task_unlock(current); | 128 | task_unlock(current); |
129 | 129 | ||
130 | if (kill) | 130 | if (kill) |
@@ -143,7 +143,7 @@ EXPORT_SYMBOL(current_umask); | |||
143 | /* to be mentioned only in INIT_TASK */ | 143 | /* to be mentioned only in INIT_TASK */ |
144 | struct fs_struct init_fs = { | 144 | struct fs_struct init_fs = { |
145 | .users = 1, | 145 | .users = 1, |
146 | .lock = __RW_LOCK_UNLOCKED(init_fs.lock), | 146 | .lock = __SPIN_LOCK_UNLOCKED(init_fs.lock), |
147 | .umask = 0022, | 147 | .umask = 0022, |
148 | }; | 148 | }; |
149 | 149 | ||
@@ -156,14 +156,14 @@ void daemonize_fs_struct(void) | |||
156 | 156 | ||
157 | task_lock(current); | 157 | task_lock(current); |
158 | 158 | ||
159 | write_lock(&init_fs.lock); | 159 | spin_lock(&init_fs.lock); |
160 | init_fs.users++; | 160 | init_fs.users++; |
161 | write_unlock(&init_fs.lock); | 161 | spin_unlock(&init_fs.lock); |
162 | 162 | ||
163 | write_lock(&fs->lock); | 163 | spin_lock(&fs->lock); |
164 | current->fs = &init_fs; | 164 | current->fs = &init_fs; |
165 | kill = !--fs->users; | 165 | kill = !--fs->users; |
166 | write_unlock(&fs->lock); | 166 | spin_unlock(&fs->lock); |
167 | 167 | ||
168 | task_unlock(current); | 168 | task_unlock(current); |
169 | if (kill) | 169 | if (kill) |
diff --git a/fs/fscache/internal.h b/fs/fscache/internal.h index 6a026441c5a6..f6aad48d38a8 100644 --- a/fs/fscache/internal.h +++ b/fs/fscache/internal.h | |||
@@ -321,17 +321,11 @@ void fscache_put_context(struct fscache_cookie *cookie, void *context) | |||
321 | #define dbgprintk(FMT, ...) \ | 321 | #define dbgprintk(FMT, ...) \ |
322 | printk(KERN_DEBUG "[%-6.6s] "FMT"\n", current->comm, ##__VA_ARGS__) | 322 | printk(KERN_DEBUG "[%-6.6s] "FMT"\n", current->comm, ##__VA_ARGS__) |
323 | 323 | ||
324 | /* make sure we maintain the format strings, even when debugging is disabled */ | ||
325 | static inline __attribute__((format(printf, 1, 2))) | ||
326 | void _dbprintk(const char *fmt, ...) | ||
327 | { | ||
328 | } | ||
329 | |||
330 | #define kenter(FMT, ...) dbgprintk("==> %s("FMT")", __func__, ##__VA_ARGS__) | 324 | #define kenter(FMT, ...) dbgprintk("==> %s("FMT")", __func__, ##__VA_ARGS__) |
331 | #define kleave(FMT, ...) dbgprintk("<== %s()"FMT"", __func__, ##__VA_ARGS__) | 325 | #define kleave(FMT, ...) dbgprintk("<== %s()"FMT"", __func__, ##__VA_ARGS__) |
332 | #define kdebug(FMT, ...) dbgprintk(FMT, ##__VA_ARGS__) | 326 | #define kdebug(FMT, ...) dbgprintk(FMT, ##__VA_ARGS__) |
333 | 327 | ||
334 | #define kjournal(FMT, ...) _dbprintk(FMT, ##__VA_ARGS__) | 328 | #define kjournal(FMT, ...) no_printk(FMT, ##__VA_ARGS__) |
335 | 329 | ||
336 | #ifdef __KDEBUG | 330 | #ifdef __KDEBUG |
337 | #define _enter(FMT, ...) kenter(FMT, ##__VA_ARGS__) | 331 | #define _enter(FMT, ...) kenter(FMT, ##__VA_ARGS__) |
@@ -358,9 +352,9 @@ do { \ | |||
358 | } while (0) | 352 | } while (0) |
359 | 353 | ||
360 | #else | 354 | #else |
361 | #define _enter(FMT, ...) _dbprintk("==> %s("FMT")", __func__, ##__VA_ARGS__) | 355 | #define _enter(FMT, ...) no_printk("==> %s("FMT")", __func__, ##__VA_ARGS__) |
362 | #define _leave(FMT, ...) _dbprintk("<== %s()"FMT"", __func__, ##__VA_ARGS__) | 356 | #define _leave(FMT, ...) no_printk("<== %s()"FMT"", __func__, ##__VA_ARGS__) |
363 | #define _debug(FMT, ...) _dbprintk(FMT, ##__VA_ARGS__) | 357 | #define _debug(FMT, ...) no_printk(FMT, ##__VA_ARGS__) |
364 | #endif | 358 | #endif |
365 | 359 | ||
366 | /* | 360 | /* |
diff --git a/fs/fuse/dev.c b/fs/fuse/dev.c index 69ad053ffd78..d367af1514ef 100644 --- a/fs/fuse/dev.c +++ b/fs/fuse/dev.c | |||
@@ -276,7 +276,7 @@ static void flush_bg_queue(struct fuse_conn *fc) | |||
276 | * Called with fc->lock, unlocks it | 276 | * Called with fc->lock, unlocks it |
277 | */ | 277 | */ |
278 | static void request_end(struct fuse_conn *fc, struct fuse_req *req) | 278 | static void request_end(struct fuse_conn *fc, struct fuse_req *req) |
279 | __releases(&fc->lock) | 279 | __releases(fc->lock) |
280 | { | 280 | { |
281 | void (*end) (struct fuse_conn *, struct fuse_req *) = req->end; | 281 | void (*end) (struct fuse_conn *, struct fuse_req *) = req->end; |
282 | req->end = NULL; | 282 | req->end = NULL; |
@@ -306,8 +306,8 @@ __releases(&fc->lock) | |||
306 | 306 | ||
307 | static void wait_answer_interruptible(struct fuse_conn *fc, | 307 | static void wait_answer_interruptible(struct fuse_conn *fc, |
308 | struct fuse_req *req) | 308 | struct fuse_req *req) |
309 | __releases(&fc->lock) | 309 | __releases(fc->lock) |
310 | __acquires(&fc->lock) | 310 | __acquires(fc->lock) |
311 | { | 311 | { |
312 | if (signal_pending(current)) | 312 | if (signal_pending(current)) |
313 | return; | 313 | return; |
@@ -325,8 +325,8 @@ static void queue_interrupt(struct fuse_conn *fc, struct fuse_req *req) | |||
325 | } | 325 | } |
326 | 326 | ||
327 | static void request_wait_answer(struct fuse_conn *fc, struct fuse_req *req) | 327 | static void request_wait_answer(struct fuse_conn *fc, struct fuse_req *req) |
328 | __releases(&fc->lock) | 328 | __releases(fc->lock) |
329 | __acquires(&fc->lock) | 329 | __acquires(fc->lock) |
330 | { | 330 | { |
331 | if (!fc->no_interrupt) { | 331 | if (!fc->no_interrupt) { |
332 | /* Any signal may interrupt this */ | 332 | /* Any signal may interrupt this */ |
@@ -905,8 +905,8 @@ static int request_pending(struct fuse_conn *fc) | |||
905 | 905 | ||
906 | /* Wait until a request is available on the pending list */ | 906 | /* Wait until a request is available on the pending list */ |
907 | static void request_wait(struct fuse_conn *fc) | 907 | static void request_wait(struct fuse_conn *fc) |
908 | __releases(&fc->lock) | 908 | __releases(fc->lock) |
909 | __acquires(&fc->lock) | 909 | __acquires(fc->lock) |
910 | { | 910 | { |
911 | DECLARE_WAITQUEUE(wait, current); | 911 | DECLARE_WAITQUEUE(wait, current); |
912 | 912 | ||
@@ -934,7 +934,7 @@ __acquires(&fc->lock) | |||
934 | */ | 934 | */ |
935 | static int fuse_read_interrupt(struct fuse_conn *fc, struct fuse_copy_state *cs, | 935 | static int fuse_read_interrupt(struct fuse_conn *fc, struct fuse_copy_state *cs, |
936 | size_t nbytes, struct fuse_req *req) | 936 | size_t nbytes, struct fuse_req *req) |
937 | __releases(&fc->lock) | 937 | __releases(fc->lock) |
938 | { | 938 | { |
939 | struct fuse_in_header ih; | 939 | struct fuse_in_header ih; |
940 | struct fuse_interrupt_in arg; | 940 | struct fuse_interrupt_in arg; |
@@ -1720,8 +1720,8 @@ static unsigned fuse_dev_poll(struct file *file, poll_table *wait) | |||
1720 | * This function releases and reacquires fc->lock | 1720 | * This function releases and reacquires fc->lock |
1721 | */ | 1721 | */ |
1722 | static void end_requests(struct fuse_conn *fc, struct list_head *head) | 1722 | static void end_requests(struct fuse_conn *fc, struct list_head *head) |
1723 | __releases(&fc->lock) | 1723 | __releases(fc->lock) |
1724 | __acquires(&fc->lock) | 1724 | __acquires(fc->lock) |
1725 | { | 1725 | { |
1726 | while (!list_empty(head)) { | 1726 | while (!list_empty(head)) { |
1727 | struct fuse_req *req; | 1727 | struct fuse_req *req; |
@@ -1744,8 +1744,8 @@ __acquires(&fc->lock) | |||
1744 | * locked). | 1744 | * locked). |
1745 | */ | 1745 | */ |
1746 | static void end_io_requests(struct fuse_conn *fc) | 1746 | static void end_io_requests(struct fuse_conn *fc) |
1747 | __releases(&fc->lock) | 1747 | __releases(fc->lock) |
1748 | __acquires(&fc->lock) | 1748 | __acquires(fc->lock) |
1749 | { | 1749 | { |
1750 | while (!list_empty(&fc->io)) { | 1750 | while (!list_empty(&fc->io)) { |
1751 | struct fuse_req *req = | 1751 | struct fuse_req *req = |
@@ -1769,6 +1769,16 @@ __acquires(&fc->lock) | |||
1769 | } | 1769 | } |
1770 | } | 1770 | } |
1771 | 1771 | ||
1772 | static void end_queued_requests(struct fuse_conn *fc) | ||
1773 | __releases(fc->lock) | ||
1774 | __acquires(fc->lock) | ||
1775 | { | ||
1776 | fc->max_background = UINT_MAX; | ||
1777 | flush_bg_queue(fc); | ||
1778 | end_requests(fc, &fc->pending); | ||
1779 | end_requests(fc, &fc->processing); | ||
1780 | } | ||
1781 | |||
1772 | /* | 1782 | /* |
1773 | * Abort all requests. | 1783 | * Abort all requests. |
1774 | * | 1784 | * |
@@ -1795,8 +1805,7 @@ void fuse_abort_conn(struct fuse_conn *fc) | |||
1795 | fc->connected = 0; | 1805 | fc->connected = 0; |
1796 | fc->blocked = 0; | 1806 | fc->blocked = 0; |
1797 | end_io_requests(fc); | 1807 | end_io_requests(fc); |
1798 | end_requests(fc, &fc->pending); | 1808 | end_queued_requests(fc); |
1799 | end_requests(fc, &fc->processing); | ||
1800 | wake_up_all(&fc->waitq); | 1809 | wake_up_all(&fc->waitq); |
1801 | wake_up_all(&fc->blocked_waitq); | 1810 | wake_up_all(&fc->blocked_waitq); |
1802 | kill_fasync(&fc->fasync, SIGIO, POLL_IN); | 1811 | kill_fasync(&fc->fasync, SIGIO, POLL_IN); |
@@ -1811,8 +1820,9 @@ int fuse_dev_release(struct inode *inode, struct file *file) | |||
1811 | if (fc) { | 1820 | if (fc) { |
1812 | spin_lock(&fc->lock); | 1821 | spin_lock(&fc->lock); |
1813 | fc->connected = 0; | 1822 | fc->connected = 0; |
1814 | end_requests(fc, &fc->pending); | 1823 | fc->blocked = 0; |
1815 | end_requests(fc, &fc->processing); | 1824 | end_queued_requests(fc); |
1825 | wake_up_all(&fc->blocked_waitq); | ||
1816 | spin_unlock(&fc->lock); | 1826 | spin_unlock(&fc->lock); |
1817 | fuse_conn_put(fc); | 1827 | fuse_conn_put(fc); |
1818 | } | 1828 | } |
diff --git a/fs/fuse/file.c b/fs/fuse/file.c index 147c1f71bdb9..c8224587123f 100644 --- a/fs/fuse/file.c +++ b/fs/fuse/file.c | |||
@@ -1144,8 +1144,8 @@ static void fuse_writepage_finish(struct fuse_conn *fc, struct fuse_req *req) | |||
1144 | 1144 | ||
1145 | /* Called under fc->lock, may release and reacquire it */ | 1145 | /* Called under fc->lock, may release and reacquire it */ |
1146 | static void fuse_send_writepage(struct fuse_conn *fc, struct fuse_req *req) | 1146 | static void fuse_send_writepage(struct fuse_conn *fc, struct fuse_req *req) |
1147 | __releases(&fc->lock) | 1147 | __releases(fc->lock) |
1148 | __acquires(&fc->lock) | 1148 | __acquires(fc->lock) |
1149 | { | 1149 | { |
1150 | struct fuse_inode *fi = get_fuse_inode(req->inode); | 1150 | struct fuse_inode *fi = get_fuse_inode(req->inode); |
1151 | loff_t size = i_size_read(req->inode); | 1151 | loff_t size = i_size_read(req->inode); |
@@ -1183,8 +1183,8 @@ __acquires(&fc->lock) | |||
1183 | * Called with fc->lock | 1183 | * Called with fc->lock |
1184 | */ | 1184 | */ |
1185 | void fuse_flush_writepages(struct inode *inode) | 1185 | void fuse_flush_writepages(struct inode *inode) |
1186 | __releases(&fc->lock) | 1186 | __releases(fc->lock) |
1187 | __acquires(&fc->lock) | 1187 | __acquires(fc->lock) |
1188 | { | 1188 | { |
1189 | struct fuse_conn *fc = get_fuse_conn(inode); | 1189 | struct fuse_conn *fc = get_fuse_conn(inode); |
1190 | struct fuse_inode *fi = get_fuse_inode(inode); | 1190 | struct fuse_inode *fi = get_fuse_inode(inode); |
diff --git a/fs/generic_acl.c b/fs/generic_acl.c index 99800e564157..6bc9e3a5a693 100644 --- a/fs/generic_acl.c +++ b/fs/generic_acl.c | |||
@@ -94,6 +94,7 @@ generic_acl_set(struct dentry *dentry, const char *name, const void *value, | |||
94 | if (error < 0) | 94 | if (error < 0) |
95 | goto failed; | 95 | goto failed; |
96 | inode->i_mode = mode; | 96 | inode->i_mode = mode; |
97 | inode->i_ctime = CURRENT_TIME; | ||
97 | if (error == 0) { | 98 | if (error == 0) { |
98 | posix_acl_release(acl); | 99 | posix_acl_release(acl); |
99 | acl = NULL; | 100 | acl = NULL; |
diff --git a/fs/hostfs/hostfs_kern.c b/fs/hostfs/hostfs_kern.c index dd1e55535a4e..f7dc9b5f9ef8 100644 --- a/fs/hostfs/hostfs_kern.c +++ b/fs/hostfs/hostfs_kern.c | |||
@@ -104,7 +104,7 @@ static char *__dentry_name(struct dentry *dentry, char *name) | |||
104 | __putname(name); | 104 | __putname(name); |
105 | return NULL; | 105 | return NULL; |
106 | } | 106 | } |
107 | strncpy(name, root, PATH_MAX); | 107 | strlcpy(name, root, PATH_MAX); |
108 | if (len > p - name) { | 108 | if (len > p - name) { |
109 | __putname(name); | 109 | __putname(name); |
110 | return NULL; | 110 | return NULL; |
@@ -876,7 +876,7 @@ static void *hostfs_follow_link(struct dentry *dentry, struct nameidata *nd) | |||
876 | char *path = dentry_name(dentry); | 876 | char *path = dentry_name(dentry); |
877 | int err = -ENOMEM; | 877 | int err = -ENOMEM; |
878 | if (path) { | 878 | if (path) { |
879 | int err = hostfs_do_readlink(path, link, PATH_MAX); | 879 | err = hostfs_do_readlink(path, link, PATH_MAX); |
880 | if (err == PATH_MAX) | 880 | if (err == PATH_MAX) |
881 | err = -E2BIG; | 881 | err = -E2BIG; |
882 | __putname(path); | 882 | __putname(path); |
diff --git a/fs/internal.h b/fs/internal.h index 6b706bc60a66..a6910e91cee8 100644 --- a/fs/internal.h +++ b/fs/internal.h | |||
@@ -9,6 +9,8 @@ | |||
9 | * 2 of the License, or (at your option) any later version. | 9 | * 2 of the License, or (at your option) any later version. |
10 | */ | 10 | */ |
11 | 11 | ||
12 | #include <linux/lglock.h> | ||
13 | |||
12 | struct super_block; | 14 | struct super_block; |
13 | struct linux_binprm; | 15 | struct linux_binprm; |
14 | struct path; | 16 | struct path; |
@@ -70,7 +72,8 @@ extern struct vfsmount *copy_tree(struct vfsmount *, struct dentry *, int); | |||
70 | 72 | ||
71 | extern void __init mnt_init(void); | 73 | extern void __init mnt_init(void); |
72 | 74 | ||
73 | extern spinlock_t vfsmount_lock; | 75 | DECLARE_BRLOCK(vfsmount_lock); |
76 | |||
74 | 77 | ||
75 | /* | 78 | /* |
76 | * fs_struct.c | 79 | * fs_struct.c |
@@ -80,6 +83,8 @@ extern void chroot_fs_refs(struct path *, struct path *); | |||
80 | /* | 83 | /* |
81 | * file_table.c | 84 | * file_table.c |
82 | */ | 85 | */ |
86 | extern void file_sb_list_add(struct file *f, struct super_block *sb); | ||
87 | extern void file_sb_list_del(struct file *f); | ||
83 | extern void mark_files_ro(struct super_block *); | 88 | extern void mark_files_ro(struct super_block *); |
84 | extern struct file *get_empty_filp(void); | 89 | extern struct file *get_empty_filp(void); |
85 | 90 | ||
diff --git a/fs/ioctl.c b/fs/ioctl.c index 2d140a713861..f855ea4fc888 100644 --- a/fs/ioctl.c +++ b/fs/ioctl.c | |||
@@ -29,7 +29,6 @@ | |||
29 | * @arg: command-specific argument for ioctl | 29 | * @arg: command-specific argument for ioctl |
30 | * | 30 | * |
31 | * Invokes filesystem specific ->unlocked_ioctl, if one exists; otherwise | 31 | * Invokes filesystem specific ->unlocked_ioctl, if one exists; otherwise |
32 | * invokes filesystem specific ->ioctl method. If neither method exists, | ||
33 | * returns -ENOTTY. | 32 | * returns -ENOTTY. |
34 | * | 33 | * |
35 | * Returns 0 on success, -errno on error. | 34 | * Returns 0 on success, -errno on error. |
@@ -39,21 +38,12 @@ static long vfs_ioctl(struct file *filp, unsigned int cmd, | |||
39 | { | 38 | { |
40 | int error = -ENOTTY; | 39 | int error = -ENOTTY; |
41 | 40 | ||
42 | if (!filp->f_op) | 41 | if (!filp->f_op || !filp->f_op->unlocked_ioctl) |
43 | goto out; | 42 | goto out; |
44 | 43 | ||
45 | if (filp->f_op->unlocked_ioctl) { | 44 | error = filp->f_op->unlocked_ioctl(filp, cmd, arg); |
46 | error = filp->f_op->unlocked_ioctl(filp, cmd, arg); | 45 | if (error == -ENOIOCTLCMD) |
47 | if (error == -ENOIOCTLCMD) | 46 | error = -EINVAL; |
48 | error = -EINVAL; | ||
49 | goto out; | ||
50 | } else if (filp->f_op->ioctl) { | ||
51 | lock_kernel(); | ||
52 | error = filp->f_op->ioctl(filp->f_path.dentry->d_inode, | ||
53 | filp, cmd, arg); | ||
54 | unlock_kernel(); | ||
55 | } | ||
56 | |||
57 | out: | 47 | out: |
58 | return error; | 48 | return error; |
59 | } | 49 | } |
diff --git a/fs/jbd/checkpoint.c b/fs/jbd/checkpoint.c index b0435dd0654d..05a38b9c4c0e 100644 --- a/fs/jbd/checkpoint.c +++ b/fs/jbd/checkpoint.c | |||
@@ -254,7 +254,9 @@ __flush_batch(journal_t *journal, struct buffer_head **bhs, int *batch_count) | |||
254 | { | 254 | { |
255 | int i; | 255 | int i; |
256 | 256 | ||
257 | ll_rw_block(SWRITE, *batch_count, bhs); | 257 | for (i = 0; i < *batch_count; i++) |
258 | write_dirty_buffer(bhs[i], WRITE); | ||
259 | |||
258 | for (i = 0; i < *batch_count; i++) { | 260 | for (i = 0; i < *batch_count; i++) { |
259 | struct buffer_head *bh = bhs[i]; | 261 | struct buffer_head *bh = bhs[i]; |
260 | clear_buffer_jwrite(bh); | 262 | clear_buffer_jwrite(bh); |
diff --git a/fs/jbd/commit.c b/fs/jbd/commit.c index 28a9ddaa0c49..95d8c11c929e 100644 --- a/fs/jbd/commit.c +++ b/fs/jbd/commit.c | |||
@@ -119,7 +119,6 @@ static int journal_write_commit_record(journal_t *journal, | |||
119 | struct buffer_head *bh; | 119 | struct buffer_head *bh; |
120 | journal_header_t *header; | 120 | journal_header_t *header; |
121 | int ret; | 121 | int ret; |
122 | int barrier_done = 0; | ||
123 | 122 | ||
124 | if (is_journal_aborted(journal)) | 123 | if (is_journal_aborted(journal)) |
125 | return 0; | 124 | return 0; |
@@ -137,34 +136,36 @@ static int journal_write_commit_record(journal_t *journal, | |||
137 | 136 | ||
138 | JBUFFER_TRACE(descriptor, "write commit block"); | 137 | JBUFFER_TRACE(descriptor, "write commit block"); |
139 | set_buffer_dirty(bh); | 138 | set_buffer_dirty(bh); |
139 | |||
140 | if (journal->j_flags & JFS_BARRIER) { | 140 | if (journal->j_flags & JFS_BARRIER) { |
141 | set_buffer_ordered(bh); | 141 | ret = __sync_dirty_buffer(bh, WRITE_SYNC | WRITE_BARRIER); |
142 | barrier_done = 1; | ||
143 | } | ||
144 | ret = sync_dirty_buffer(bh); | ||
145 | if (barrier_done) | ||
146 | clear_buffer_ordered(bh); | ||
147 | /* is it possible for another commit to fail at roughly | ||
148 | * the same time as this one? If so, we don't want to | ||
149 | * trust the barrier flag in the super, but instead want | ||
150 | * to remember if we sent a barrier request | ||
151 | */ | ||
152 | if (ret == -EOPNOTSUPP && barrier_done) { | ||
153 | char b[BDEVNAME_SIZE]; | ||
154 | 142 | ||
155 | printk(KERN_WARNING | 143 | /* |
156 | "JBD: barrier-based sync failed on %s - " | 144 | * Is it possible for another commit to fail at roughly |
157 | "disabling barriers\n", | 145 | * the same time as this one? If so, we don't want to |
158 | bdevname(journal->j_dev, b)); | 146 | * trust the barrier flag in the super, but instead want |
159 | spin_lock(&journal->j_state_lock); | 147 | * to remember if we sent a barrier request |
160 | journal->j_flags &= ~JFS_BARRIER; | 148 | */ |
161 | spin_unlock(&journal->j_state_lock); | 149 | if (ret == -EOPNOTSUPP) { |
150 | char b[BDEVNAME_SIZE]; | ||
162 | 151 | ||
163 | /* And try again, without the barrier */ | 152 | printk(KERN_WARNING |
164 | set_buffer_uptodate(bh); | 153 | "JBD: barrier-based sync failed on %s - " |
165 | set_buffer_dirty(bh); | 154 | "disabling barriers\n", |
155 | bdevname(journal->j_dev, b)); | ||
156 | spin_lock(&journal->j_state_lock); | ||
157 | journal->j_flags &= ~JFS_BARRIER; | ||
158 | spin_unlock(&journal->j_state_lock); | ||
159 | |||
160 | /* And try again, without the barrier */ | ||
161 | set_buffer_uptodate(bh); | ||
162 | set_buffer_dirty(bh); | ||
163 | ret = sync_dirty_buffer(bh); | ||
164 | } | ||
165 | } else { | ||
166 | ret = sync_dirty_buffer(bh); | 166 | ret = sync_dirty_buffer(bh); |
167 | } | 167 | } |
168 | |||
168 | put_bh(bh); /* One for getblk() */ | 169 | put_bh(bh); /* One for getblk() */ |
169 | journal_put_journal_head(descriptor); | 170 | journal_put_journal_head(descriptor); |
170 | 171 | ||
diff --git a/fs/jbd/journal.c b/fs/jbd/journal.c index f19ce94693d8..2c4b1f109da9 100644 --- a/fs/jbd/journal.c +++ b/fs/jbd/journal.c | |||
@@ -1024,7 +1024,7 @@ void journal_update_superblock(journal_t *journal, int wait) | |||
1024 | if (wait) | 1024 | if (wait) |
1025 | sync_dirty_buffer(bh); | 1025 | sync_dirty_buffer(bh); |
1026 | else | 1026 | else |
1027 | ll_rw_block(SWRITE, 1, &bh); | 1027 | write_dirty_buffer(bh, WRITE); |
1028 | 1028 | ||
1029 | out: | 1029 | out: |
1030 | /* If we have just flushed the log (by marking s_start==0), then | 1030 | /* If we have just flushed the log (by marking s_start==0), then |
diff --git a/fs/jbd/revoke.c b/fs/jbd/revoke.c index ad717328343a..d29018307e2e 100644 --- a/fs/jbd/revoke.c +++ b/fs/jbd/revoke.c | |||
@@ -617,7 +617,7 @@ static void flush_descriptor(journal_t *journal, | |||
617 | set_buffer_jwrite(bh); | 617 | set_buffer_jwrite(bh); |
618 | BUFFER_TRACE(bh, "write"); | 618 | BUFFER_TRACE(bh, "write"); |
619 | set_buffer_dirty(bh); | 619 | set_buffer_dirty(bh); |
620 | ll_rw_block((write_op == WRITE) ? SWRITE : SWRITE_SYNC_PLUG, 1, &bh); | 620 | write_dirty_buffer(bh, write_op); |
621 | } | 621 | } |
622 | #endif | 622 | #endif |
623 | 623 | ||
diff --git a/fs/jbd2/checkpoint.c b/fs/jbd2/checkpoint.c index 1c23a0f4e8a3..5247e7ffdcb4 100644 --- a/fs/jbd2/checkpoint.c +++ b/fs/jbd2/checkpoint.c | |||
@@ -255,7 +255,9 @@ __flush_batch(journal_t *journal, int *batch_count) | |||
255 | { | 255 | { |
256 | int i; | 256 | int i; |
257 | 257 | ||
258 | ll_rw_block(SWRITE, *batch_count, journal->j_chkpt_bhs); | 258 | for (i = 0; i < *batch_count; i++) |
259 | write_dirty_buffer(journal->j_chkpt_bhs[i], WRITE); | ||
260 | |||
259 | for (i = 0; i < *batch_count; i++) { | 261 | for (i = 0; i < *batch_count; i++) { |
260 | struct buffer_head *bh = journal->j_chkpt_bhs[i]; | 262 | struct buffer_head *bh = journal->j_chkpt_bhs[i]; |
261 | clear_buffer_jwrite(bh); | 263 | clear_buffer_jwrite(bh); |
diff --git a/fs/jbd2/commit.c b/fs/jbd2/commit.c index f52e5e8049f1..7c068c189d80 100644 --- a/fs/jbd2/commit.c +++ b/fs/jbd2/commit.c | |||
@@ -101,7 +101,6 @@ static int journal_submit_commit_record(journal_t *journal, | |||
101 | struct commit_header *tmp; | 101 | struct commit_header *tmp; |
102 | struct buffer_head *bh; | 102 | struct buffer_head *bh; |
103 | int ret; | 103 | int ret; |
104 | int barrier_done = 0; | ||
105 | struct timespec now = current_kernel_time(); | 104 | struct timespec now = current_kernel_time(); |
106 | 105 | ||
107 | if (is_journal_aborted(journal)) | 106 | if (is_journal_aborted(journal)) |
@@ -136,30 +135,22 @@ static int journal_submit_commit_record(journal_t *journal, | |||
136 | if (journal->j_flags & JBD2_BARRIER && | 135 | if (journal->j_flags & JBD2_BARRIER && |
137 | !JBD2_HAS_INCOMPAT_FEATURE(journal, | 136 | !JBD2_HAS_INCOMPAT_FEATURE(journal, |
138 | JBD2_FEATURE_INCOMPAT_ASYNC_COMMIT)) { | 137 | JBD2_FEATURE_INCOMPAT_ASYNC_COMMIT)) { |
139 | set_buffer_ordered(bh); | 138 | ret = submit_bh(WRITE_SYNC_PLUG | WRITE_BARRIER, bh); |
140 | barrier_done = 1; | 139 | if (ret == -EOPNOTSUPP) { |
141 | } | 140 | printk(KERN_WARNING |
142 | ret = submit_bh(WRITE_SYNC_PLUG, bh); | 141 | "JBD2: Disabling barriers on %s, " |
143 | if (barrier_done) | 142 | "not supported by device\n", journal->j_devname); |
144 | clear_buffer_ordered(bh); | 143 | write_lock(&journal->j_state_lock); |
145 | 144 | journal->j_flags &= ~JBD2_BARRIER; | |
146 | /* is it possible for another commit to fail at roughly | 145 | write_unlock(&journal->j_state_lock); |
147 | * the same time as this one? If so, we don't want to | ||
148 | * trust the barrier flag in the super, but instead want | ||
149 | * to remember if we sent a barrier request | ||
150 | */ | ||
151 | if (ret == -EOPNOTSUPP && barrier_done) { | ||
152 | printk(KERN_WARNING | ||
153 | "JBD2: Disabling barriers on %s, " | ||
154 | "not supported by device\n", journal->j_devname); | ||
155 | write_lock(&journal->j_state_lock); | ||
156 | journal->j_flags &= ~JBD2_BARRIER; | ||
157 | write_unlock(&journal->j_state_lock); | ||
158 | 146 | ||
159 | /* And try again, without the barrier */ | 147 | /* And try again, without the barrier */ |
160 | lock_buffer(bh); | 148 | lock_buffer(bh); |
161 | set_buffer_uptodate(bh); | 149 | set_buffer_uptodate(bh); |
162 | clear_buffer_dirty(bh); | 150 | clear_buffer_dirty(bh); |
151 | ret = submit_bh(WRITE_SYNC_PLUG, bh); | ||
152 | } | ||
153 | } else { | ||
163 | ret = submit_bh(WRITE_SYNC_PLUG, bh); | 154 | ret = submit_bh(WRITE_SYNC_PLUG, bh); |
164 | } | 155 | } |
165 | *cbh = bh; | 156 | *cbh = bh; |
diff --git a/fs/jbd2/journal.c b/fs/jbd2/journal.c index ad5866aaf0f9..0e8014ea6b94 100644 --- a/fs/jbd2/journal.c +++ b/fs/jbd2/journal.c | |||
@@ -1124,7 +1124,7 @@ void jbd2_journal_update_superblock(journal_t *journal, int wait) | |||
1124 | set_buffer_uptodate(bh); | 1124 | set_buffer_uptodate(bh); |
1125 | } | 1125 | } |
1126 | } else | 1126 | } else |
1127 | ll_rw_block(SWRITE, 1, &bh); | 1127 | write_dirty_buffer(bh, WRITE); |
1128 | 1128 | ||
1129 | out: | 1129 | out: |
1130 | /* If we have just flushed the log (by marking s_start==0), then | 1130 | /* If we have just flushed the log (by marking s_start==0), then |
diff --git a/fs/jbd2/revoke.c b/fs/jbd2/revoke.c index a360b06af2e3..9ad321fd63fd 100644 --- a/fs/jbd2/revoke.c +++ b/fs/jbd2/revoke.c | |||
@@ -625,7 +625,7 @@ static void flush_descriptor(journal_t *journal, | |||
625 | set_buffer_jwrite(bh); | 625 | set_buffer_jwrite(bh); |
626 | BUFFER_TRACE(bh, "write"); | 626 | BUFFER_TRACE(bh, "write"); |
627 | set_buffer_dirty(bh); | 627 | set_buffer_dirty(bh); |
628 | ll_rw_block((write_op == WRITE) ? SWRITE : SWRITE_SYNC_PLUG, 1, &bh); | 628 | write_dirty_buffer(bh, write_op); |
629 | } | 629 | } |
630 | #endif | 630 | #endif |
631 | 631 | ||
diff --git a/fs/jbd2/transaction.c b/fs/jbd2/transaction.c index d95cc9d0401d..f3479d6e0a83 100644 --- a/fs/jbd2/transaction.c +++ b/fs/jbd2/transaction.c | |||
@@ -82,6 +82,32 @@ jbd2_get_transaction(journal_t *journal, transaction_t *transaction) | |||
82 | */ | 82 | */ |
83 | 83 | ||
84 | /* | 84 | /* |
85 | * Update transiaction's maximum wait time, if debugging is enabled. | ||
86 | * | ||
87 | * In order for t_max_wait to be reliable, it must be protected by a | ||
88 | * lock. But doing so will mean that start_this_handle() can not be | ||
89 | * run in parallel on SMP systems, which limits our scalability. So | ||
90 | * unless debugging is enabled, we no longer update t_max_wait, which | ||
91 | * means that maximum wait time reported by the jbd2_run_stats | ||
92 | * tracepoint will always be zero. | ||
93 | */ | ||
94 | static inline void update_t_max_wait(transaction_t *transaction) | ||
95 | { | ||
96 | #ifdef CONFIG_JBD2_DEBUG | ||
97 | unsigned long ts = jiffies; | ||
98 | |||
99 | if (jbd2_journal_enable_debug && | ||
100 | time_after(transaction->t_start, ts)) { | ||
101 | ts = jbd2_time_diff(ts, transaction->t_start); | ||
102 | spin_lock(&transaction->t_handle_lock); | ||
103 | if (ts > transaction->t_max_wait) | ||
104 | transaction->t_max_wait = ts; | ||
105 | spin_unlock(&transaction->t_handle_lock); | ||
106 | } | ||
107 | #endif | ||
108 | } | ||
109 | |||
110 | /* | ||
85 | * start_this_handle: Given a handle, deal with any locking or stalling | 111 | * start_this_handle: Given a handle, deal with any locking or stalling |
86 | * needed to make sure that there is enough journal space for the handle | 112 | * needed to make sure that there is enough journal space for the handle |
87 | * to begin. Attach the handle to a transaction and set up the | 113 | * to begin. Attach the handle to a transaction and set up the |
@@ -95,7 +121,6 @@ static int start_this_handle(journal_t *journal, handle_t *handle, | |||
95 | int needed; | 121 | int needed; |
96 | int nblocks = handle->h_buffer_credits; | 122 | int nblocks = handle->h_buffer_credits; |
97 | transaction_t *new_transaction = NULL; | 123 | transaction_t *new_transaction = NULL; |
98 | unsigned long ts = jiffies; | ||
99 | 124 | ||
100 | if (nblocks > journal->j_max_transaction_buffers) { | 125 | if (nblocks > journal->j_max_transaction_buffers) { |
101 | printk(KERN_ERR "JBD: %s wants too many credits (%d > %d)\n", | 126 | printk(KERN_ERR "JBD: %s wants too many credits (%d > %d)\n", |
@@ -241,25 +266,8 @@ repeat: | |||
241 | 266 | ||
242 | /* OK, account for the buffers that this operation expects to | 267 | /* OK, account for the buffers that this operation expects to |
243 | * use and add the handle to the running transaction. | 268 | * use and add the handle to the running transaction. |
244 | * | ||
245 | * In order for t_max_wait to be reliable, it must be | ||
246 | * protected by a lock. But doing so will mean that | ||
247 | * start_this_handle() can not be run in parallel on SMP | ||
248 | * systems, which limits our scalability. So we only enable | ||
249 | * it when debugging is enabled. We may want to use a | ||
250 | * separate flag, eventually, so we can enable this | ||
251 | * independently of debugging. | ||
252 | */ | 269 | */ |
253 | #ifdef CONFIG_JBD2_DEBUG | 270 | update_t_max_wait(transaction); |
254 | if (jbd2_journal_enable_debug && | ||
255 | time_after(transaction->t_start, ts)) { | ||
256 | ts = jbd2_time_diff(ts, transaction->t_start); | ||
257 | spin_lock(&transaction->t_handle_lock); | ||
258 | if (ts > transaction->t_max_wait) | ||
259 | transaction->t_max_wait = ts; | ||
260 | spin_unlock(&transaction->t_handle_lock); | ||
261 | } | ||
262 | #endif | ||
263 | handle->h_transaction = transaction; | 271 | handle->h_transaction = transaction; |
264 | atomic_inc(&transaction->t_updates); | 272 | atomic_inc(&transaction->t_updates); |
265 | atomic_inc(&transaction->t_handle_count); | 273 | atomic_inc(&transaction->t_handle_count); |
diff --git a/fs/logfs/dir.c b/fs/logfs/dir.c index 675cc49197fe..9777eb5b5522 100644 --- a/fs/logfs/dir.c +++ b/fs/logfs/dir.c | |||
@@ -824,7 +824,7 @@ const struct inode_operations logfs_dir_iops = { | |||
824 | }; | 824 | }; |
825 | const struct file_operations logfs_dir_fops = { | 825 | const struct file_operations logfs_dir_fops = { |
826 | .fsync = logfs_fsync, | 826 | .fsync = logfs_fsync, |
827 | .ioctl = logfs_ioctl, | 827 | .unlocked_ioctl = logfs_ioctl, |
828 | .readdir = logfs_readdir, | 828 | .readdir = logfs_readdir, |
829 | .read = generic_read_dir, | 829 | .read = generic_read_dir, |
830 | }; | 830 | }; |
diff --git a/fs/logfs/file.c b/fs/logfs/file.c index 4dd0f7c06e39..e86376b87af1 100644 --- a/fs/logfs/file.c +++ b/fs/logfs/file.c | |||
@@ -181,9 +181,9 @@ static int logfs_releasepage(struct page *page, gfp_t only_xfs_uses_this) | |||
181 | } | 181 | } |
182 | 182 | ||
183 | 183 | ||
184 | int logfs_ioctl(struct inode *inode, struct file *file, unsigned int cmd, | 184 | long logfs_ioctl(struct file *file, unsigned int cmd, unsigned long arg) |
185 | unsigned long arg) | ||
186 | { | 185 | { |
186 | struct inode *inode = file->f_path.dentry->d_inode; | ||
187 | struct logfs_inode *li = logfs_inode(inode); | 187 | struct logfs_inode *li = logfs_inode(inode); |
188 | unsigned int oldflags, flags; | 188 | unsigned int oldflags, flags; |
189 | int err; | 189 | int err; |
@@ -255,7 +255,7 @@ const struct file_operations logfs_reg_fops = { | |||
255 | .aio_read = generic_file_aio_read, | 255 | .aio_read = generic_file_aio_read, |
256 | .aio_write = generic_file_aio_write, | 256 | .aio_write = generic_file_aio_write, |
257 | .fsync = logfs_fsync, | 257 | .fsync = logfs_fsync, |
258 | .ioctl = logfs_ioctl, | 258 | .unlocked_ioctl = logfs_ioctl, |
259 | .llseek = generic_file_llseek, | 259 | .llseek = generic_file_llseek, |
260 | .mmap = generic_file_readonly_mmap, | 260 | .mmap = generic_file_readonly_mmap, |
261 | .open = generic_file_open, | 261 | .open = generic_file_open, |
diff --git a/fs/logfs/logfs.h b/fs/logfs/logfs.h index 5e3b72077951..b8786264d243 100644 --- a/fs/logfs/logfs.h +++ b/fs/logfs/logfs.h | |||
@@ -504,8 +504,7 @@ extern const struct inode_operations logfs_reg_iops; | |||
504 | extern const struct file_operations logfs_reg_fops; | 504 | extern const struct file_operations logfs_reg_fops; |
505 | extern const struct address_space_operations logfs_reg_aops; | 505 | extern const struct address_space_operations logfs_reg_aops; |
506 | int logfs_readpage(struct file *file, struct page *page); | 506 | int logfs_readpage(struct file *file, struct page *page); |
507 | int logfs_ioctl(struct inode *inode, struct file *file, unsigned int cmd, | 507 | long logfs_ioctl(struct file *file, unsigned int cmd, unsigned long arg); |
508 | unsigned long arg); | ||
509 | int logfs_fsync(struct file *file, int datasync); | 508 | int logfs_fsync(struct file *file, int datasync); |
510 | 509 | ||
511 | /* gc.c */ | 510 | /* gc.c */ |
diff --git a/fs/mbcache.c b/fs/mbcache.c index cf4e6cdfd15b..93444747237b 100644 --- a/fs/mbcache.c +++ b/fs/mbcache.c | |||
@@ -80,6 +80,7 @@ struct mb_cache { | |||
80 | struct list_head c_cache_list; | 80 | struct list_head c_cache_list; |
81 | const char *c_name; | 81 | const char *c_name; |
82 | atomic_t c_entry_count; | 82 | atomic_t c_entry_count; |
83 | int c_max_entries; | ||
83 | int c_bucket_bits; | 84 | int c_bucket_bits; |
84 | struct kmem_cache *c_entry_cache; | 85 | struct kmem_cache *c_entry_cache; |
85 | struct list_head *c_block_hash; | 86 | struct list_head *c_block_hash; |
@@ -243,6 +244,12 @@ mb_cache_create(const char *name, int bucket_bits) | |||
243 | if (!cache->c_entry_cache) | 244 | if (!cache->c_entry_cache) |
244 | goto fail2; | 245 | goto fail2; |
245 | 246 | ||
247 | /* | ||
248 | * Set an upper limit on the number of cache entries so that the hash | ||
249 | * chains won't grow too long. | ||
250 | */ | ||
251 | cache->c_max_entries = bucket_count << 4; | ||
252 | |||
246 | spin_lock(&mb_cache_spinlock); | 253 | spin_lock(&mb_cache_spinlock); |
247 | list_add(&cache->c_cache_list, &mb_cache_list); | 254 | list_add(&cache->c_cache_list, &mb_cache_list); |
248 | spin_unlock(&mb_cache_spinlock); | 255 | spin_unlock(&mb_cache_spinlock); |
@@ -333,7 +340,6 @@ mb_cache_destroy(struct mb_cache *cache) | |||
333 | kfree(cache); | 340 | kfree(cache); |
334 | } | 341 | } |
335 | 342 | ||
336 | |||
337 | /* | 343 | /* |
338 | * mb_cache_entry_alloc() | 344 | * mb_cache_entry_alloc() |
339 | * | 345 | * |
@@ -345,17 +351,29 @@ mb_cache_destroy(struct mb_cache *cache) | |||
345 | struct mb_cache_entry * | 351 | struct mb_cache_entry * |
346 | mb_cache_entry_alloc(struct mb_cache *cache, gfp_t gfp_flags) | 352 | mb_cache_entry_alloc(struct mb_cache *cache, gfp_t gfp_flags) |
347 | { | 353 | { |
348 | struct mb_cache_entry *ce; | 354 | struct mb_cache_entry *ce = NULL; |
349 | 355 | ||
350 | ce = kmem_cache_alloc(cache->c_entry_cache, gfp_flags); | 356 | if (atomic_read(&cache->c_entry_count) >= cache->c_max_entries) { |
351 | if (ce) { | 357 | spin_lock(&mb_cache_spinlock); |
358 | if (!list_empty(&mb_cache_lru_list)) { | ||
359 | ce = list_entry(mb_cache_lru_list.next, | ||
360 | struct mb_cache_entry, e_lru_list); | ||
361 | list_del_init(&ce->e_lru_list); | ||
362 | __mb_cache_entry_unhash(ce); | ||
363 | } | ||
364 | spin_unlock(&mb_cache_spinlock); | ||
365 | } | ||
366 | if (!ce) { | ||
367 | ce = kmem_cache_alloc(cache->c_entry_cache, gfp_flags); | ||
368 | if (!ce) | ||
369 | return NULL; | ||
352 | atomic_inc(&cache->c_entry_count); | 370 | atomic_inc(&cache->c_entry_count); |
353 | INIT_LIST_HEAD(&ce->e_lru_list); | 371 | INIT_LIST_HEAD(&ce->e_lru_list); |
354 | INIT_LIST_HEAD(&ce->e_block_list); | 372 | INIT_LIST_HEAD(&ce->e_block_list); |
355 | ce->e_cache = cache; | 373 | ce->e_cache = cache; |
356 | ce->e_used = 1 + MB_CACHE_WRITER; | ||
357 | ce->e_queued = 0; | 374 | ce->e_queued = 0; |
358 | } | 375 | } |
376 | ce->e_used = 1 + MB_CACHE_WRITER; | ||
359 | return ce; | 377 | return ce; |
360 | } | 378 | } |
361 | 379 | ||
diff --git a/fs/namei.c b/fs/namei.c index 17ea76bf2fbe..24896e833565 100644 --- a/fs/namei.c +++ b/fs/namei.c | |||
@@ -595,15 +595,16 @@ int follow_up(struct path *path) | |||
595 | { | 595 | { |
596 | struct vfsmount *parent; | 596 | struct vfsmount *parent; |
597 | struct dentry *mountpoint; | 597 | struct dentry *mountpoint; |
598 | spin_lock(&vfsmount_lock); | 598 | |
599 | br_read_lock(vfsmount_lock); | ||
599 | parent = path->mnt->mnt_parent; | 600 | parent = path->mnt->mnt_parent; |
600 | if (parent == path->mnt) { | 601 | if (parent == path->mnt) { |
601 | spin_unlock(&vfsmount_lock); | 602 | br_read_unlock(vfsmount_lock); |
602 | return 0; | 603 | return 0; |
603 | } | 604 | } |
604 | mntget(parent); | 605 | mntget(parent); |
605 | mountpoint = dget(path->mnt->mnt_mountpoint); | 606 | mountpoint = dget(path->mnt->mnt_mountpoint); |
606 | spin_unlock(&vfsmount_lock); | 607 | br_read_unlock(vfsmount_lock); |
607 | dput(path->dentry); | 608 | dput(path->dentry); |
608 | path->dentry = mountpoint; | 609 | path->dentry = mountpoint; |
609 | mntput(path->mnt); | 610 | mntput(path->mnt); |
@@ -686,6 +687,35 @@ static __always_inline void follow_dotdot(struct nameidata *nd) | |||
686 | } | 687 | } |
687 | 688 | ||
688 | /* | 689 | /* |
690 | * Allocate a dentry with name and parent, and perform a parent | ||
691 | * directory ->lookup on it. Returns the new dentry, or ERR_PTR | ||
692 | * on error. parent->d_inode->i_mutex must be held. d_lookup must | ||
693 | * have verified that no child exists while under i_mutex. | ||
694 | */ | ||
695 | static struct dentry *d_alloc_and_lookup(struct dentry *parent, | ||
696 | struct qstr *name, struct nameidata *nd) | ||
697 | { | ||
698 | struct inode *inode = parent->d_inode; | ||
699 | struct dentry *dentry; | ||
700 | struct dentry *old; | ||
701 | |||
702 | /* Don't create child dentry for a dead directory. */ | ||
703 | if (unlikely(IS_DEADDIR(inode))) | ||
704 | return ERR_PTR(-ENOENT); | ||
705 | |||
706 | dentry = d_alloc(parent, name); | ||
707 | if (unlikely(!dentry)) | ||
708 | return ERR_PTR(-ENOMEM); | ||
709 | |||
710 | old = inode->i_op->lookup(inode, dentry, nd); | ||
711 | if (unlikely(old)) { | ||
712 | dput(dentry); | ||
713 | dentry = old; | ||
714 | } | ||
715 | return dentry; | ||
716 | } | ||
717 | |||
718 | /* | ||
689 | * It's more convoluted than I'd like it to be, but... it's still fairly | 719 | * It's more convoluted than I'd like it to be, but... it's still fairly |
690 | * small and for now I'd prefer to have fast path as straight as possible. | 720 | * small and for now I'd prefer to have fast path as straight as possible. |
691 | * It _is_ time-critical. | 721 | * It _is_ time-critical. |
@@ -706,9 +736,15 @@ static int do_lookup(struct nameidata *nd, struct qstr *name, | |||
706 | return err; | 736 | return err; |
707 | } | 737 | } |
708 | 738 | ||
739 | /* | ||
740 | * Rename seqlock is not required here because in the off chance | ||
741 | * of a false negative due to a concurrent rename, we're going to | ||
742 | * do the non-racy lookup, below. | ||
743 | */ | ||
709 | dentry = __d_lookup(nd->path.dentry, name); | 744 | dentry = __d_lookup(nd->path.dentry, name); |
710 | if (!dentry) | 745 | if (!dentry) |
711 | goto need_lookup; | 746 | goto need_lookup; |
747 | found: | ||
712 | if (dentry->d_op && dentry->d_op->d_revalidate) | 748 | if (dentry->d_op && dentry->d_op->d_revalidate) |
713 | goto need_revalidate; | 749 | goto need_revalidate; |
714 | done: | 750 | done: |
@@ -724,56 +760,28 @@ need_lookup: | |||
724 | mutex_lock(&dir->i_mutex); | 760 | mutex_lock(&dir->i_mutex); |
725 | /* | 761 | /* |
726 | * First re-do the cached lookup just in case it was created | 762 | * First re-do the cached lookup just in case it was created |
727 | * while we waited for the directory semaphore.. | 763 | * while we waited for the directory semaphore, or the first |
764 | * lookup failed due to an unrelated rename. | ||
728 | * | 765 | * |
729 | * FIXME! This could use version numbering or similar to | 766 | * This could use version numbering or similar to avoid unnecessary |
730 | * avoid unnecessary cache lookups. | 767 | * cache lookups, but then we'd have to do the first lookup in the |
731 | * | 768 | * non-racy way. However in the common case here, everything should |
732 | * The "dcache_lock" is purely to protect the RCU list walker | 769 | * be hot in cache, so would it be a big win? |
733 | * from concurrent renames at this point (we mustn't get false | ||
734 | * negatives from the RCU list walk here, unlike the optimistic | ||
735 | * fast walk). | ||
736 | * | ||
737 | * so doing d_lookup() (with seqlock), instead of lockfree __d_lookup | ||
738 | */ | 770 | */ |
739 | dentry = d_lookup(parent, name); | 771 | dentry = d_lookup(parent, name); |
740 | if (!dentry) { | 772 | if (likely(!dentry)) { |
741 | struct dentry *new; | 773 | dentry = d_alloc_and_lookup(parent, name, nd); |
742 | |||
743 | /* Don't create child dentry for a dead directory. */ | ||
744 | dentry = ERR_PTR(-ENOENT); | ||
745 | if (IS_DEADDIR(dir)) | ||
746 | goto out_unlock; | ||
747 | |||
748 | new = d_alloc(parent, name); | ||
749 | dentry = ERR_PTR(-ENOMEM); | ||
750 | if (new) { | ||
751 | dentry = dir->i_op->lookup(dir, new, nd); | ||
752 | if (dentry) | ||
753 | dput(new); | ||
754 | else | ||
755 | dentry = new; | ||
756 | } | ||
757 | out_unlock: | ||
758 | mutex_unlock(&dir->i_mutex); | 774 | mutex_unlock(&dir->i_mutex); |
759 | if (IS_ERR(dentry)) | 775 | if (IS_ERR(dentry)) |
760 | goto fail; | 776 | goto fail; |
761 | goto done; | 777 | goto done; |
762 | } | 778 | } |
763 | |||
764 | /* | 779 | /* |
765 | * Uhhuh! Nasty case: the cache was re-populated while | 780 | * Uhhuh! Nasty case: the cache was re-populated while |
766 | * we waited on the semaphore. Need to revalidate. | 781 | * we waited on the semaphore. Need to revalidate. |
767 | */ | 782 | */ |
768 | mutex_unlock(&dir->i_mutex); | 783 | mutex_unlock(&dir->i_mutex); |
769 | if (dentry->d_op && dentry->d_op->d_revalidate) { | 784 | goto found; |
770 | dentry = do_revalidate(dentry, nd); | ||
771 | if (!dentry) | ||
772 | dentry = ERR_PTR(-ENOENT); | ||
773 | } | ||
774 | if (IS_ERR(dentry)) | ||
775 | goto fail; | ||
776 | goto done; | ||
777 | 785 | ||
778 | need_revalidate: | 786 | need_revalidate: |
779 | dentry = do_revalidate(dentry, nd); | 787 | dentry = do_revalidate(dentry, nd); |
@@ -1130,35 +1138,18 @@ static struct dentry *__lookup_hash(struct qstr *name, | |||
1130 | goto out; | 1138 | goto out; |
1131 | } | 1139 | } |
1132 | 1140 | ||
1133 | dentry = __d_lookup(base, name); | 1141 | /* |
1134 | 1142 | * Don't bother with __d_lookup: callers are for creat as | |
1135 | /* lockess __d_lookup may fail due to concurrent d_move() | 1143 | * well as unlink, so a lot of the time it would cost |
1136 | * in some unrelated directory, so try with d_lookup | 1144 | * a double lookup. |
1137 | */ | 1145 | */ |
1138 | if (!dentry) | 1146 | dentry = d_lookup(base, name); |
1139 | dentry = d_lookup(base, name); | ||
1140 | 1147 | ||
1141 | if (dentry && dentry->d_op && dentry->d_op->d_revalidate) | 1148 | if (dentry && dentry->d_op && dentry->d_op->d_revalidate) |
1142 | dentry = do_revalidate(dentry, nd); | 1149 | dentry = do_revalidate(dentry, nd); |
1143 | 1150 | ||
1144 | if (!dentry) { | 1151 | if (!dentry) |
1145 | struct dentry *new; | 1152 | dentry = d_alloc_and_lookup(base, name, nd); |
1146 | |||
1147 | /* Don't create child dentry for a dead directory. */ | ||
1148 | dentry = ERR_PTR(-ENOENT); | ||
1149 | if (IS_DEADDIR(inode)) | ||
1150 | goto out; | ||
1151 | |||
1152 | new = d_alloc(base, name); | ||
1153 | dentry = ERR_PTR(-ENOMEM); | ||
1154 | if (!new) | ||
1155 | goto out; | ||
1156 | dentry = inode->i_op->lookup(inode, new, nd); | ||
1157 | if (!dentry) | ||
1158 | dentry = new; | ||
1159 | else | ||
1160 | dput(new); | ||
1161 | } | ||
1162 | out: | 1153 | out: |
1163 | return dentry; | 1154 | return dentry; |
1164 | } | 1155 | } |
diff --git a/fs/namespace.c b/fs/namespace.c index 2e10cb19c5b0..a72eaabfe8f2 100644 --- a/fs/namespace.c +++ b/fs/namespace.c | |||
@@ -11,6 +11,8 @@ | |||
11 | #include <linux/syscalls.h> | 11 | #include <linux/syscalls.h> |
12 | #include <linux/slab.h> | 12 | #include <linux/slab.h> |
13 | #include <linux/sched.h> | 13 | #include <linux/sched.h> |
14 | #include <linux/spinlock.h> | ||
15 | #include <linux/percpu.h> | ||
14 | #include <linux/smp_lock.h> | 16 | #include <linux/smp_lock.h> |
15 | #include <linux/init.h> | 17 | #include <linux/init.h> |
16 | #include <linux/kernel.h> | 18 | #include <linux/kernel.h> |
@@ -38,12 +40,10 @@ | |||
38 | #define HASH_SHIFT ilog2(PAGE_SIZE / sizeof(struct list_head)) | 40 | #define HASH_SHIFT ilog2(PAGE_SIZE / sizeof(struct list_head)) |
39 | #define HASH_SIZE (1UL << HASH_SHIFT) | 41 | #define HASH_SIZE (1UL << HASH_SHIFT) |
40 | 42 | ||
41 | /* spinlock for vfsmount related operations, inplace of dcache_lock */ | ||
42 | __cacheline_aligned_in_smp DEFINE_SPINLOCK(vfsmount_lock); | ||
43 | |||
44 | static int event; | 43 | static int event; |
45 | static DEFINE_IDA(mnt_id_ida); | 44 | static DEFINE_IDA(mnt_id_ida); |
46 | static DEFINE_IDA(mnt_group_ida); | 45 | static DEFINE_IDA(mnt_group_ida); |
46 | static DEFINE_SPINLOCK(mnt_id_lock); | ||
47 | static int mnt_id_start = 0; | 47 | static int mnt_id_start = 0; |
48 | static int mnt_group_start = 1; | 48 | static int mnt_group_start = 1; |
49 | 49 | ||
@@ -55,6 +55,16 @@ static struct rw_semaphore namespace_sem; | |||
55 | struct kobject *fs_kobj; | 55 | struct kobject *fs_kobj; |
56 | EXPORT_SYMBOL_GPL(fs_kobj); | 56 | EXPORT_SYMBOL_GPL(fs_kobj); |
57 | 57 | ||
58 | /* | ||
59 | * vfsmount lock may be taken for read to prevent changes to the | ||
60 | * vfsmount hash, ie. during mountpoint lookups or walking back | ||
61 | * up the tree. | ||
62 | * | ||
63 | * It should be taken for write in all cases where the vfsmount | ||
64 | * tree or hash is modified or when a vfsmount structure is modified. | ||
65 | */ | ||
66 | DEFINE_BRLOCK(vfsmount_lock); | ||
67 | |||
58 | static inline unsigned long hash(struct vfsmount *mnt, struct dentry *dentry) | 68 | static inline unsigned long hash(struct vfsmount *mnt, struct dentry *dentry) |
59 | { | 69 | { |
60 | unsigned long tmp = ((unsigned long)mnt / L1_CACHE_BYTES); | 70 | unsigned long tmp = ((unsigned long)mnt / L1_CACHE_BYTES); |
@@ -65,18 +75,21 @@ static inline unsigned long hash(struct vfsmount *mnt, struct dentry *dentry) | |||
65 | 75 | ||
66 | #define MNT_WRITER_UNDERFLOW_LIMIT -(1<<16) | 76 | #define MNT_WRITER_UNDERFLOW_LIMIT -(1<<16) |
67 | 77 | ||
68 | /* allocation is serialized by namespace_sem */ | 78 | /* |
79 | * allocation is serialized by namespace_sem, but we need the spinlock to | ||
80 | * serialize with freeing. | ||
81 | */ | ||
69 | static int mnt_alloc_id(struct vfsmount *mnt) | 82 | static int mnt_alloc_id(struct vfsmount *mnt) |
70 | { | 83 | { |
71 | int res; | 84 | int res; |
72 | 85 | ||
73 | retry: | 86 | retry: |
74 | ida_pre_get(&mnt_id_ida, GFP_KERNEL); | 87 | ida_pre_get(&mnt_id_ida, GFP_KERNEL); |
75 | spin_lock(&vfsmount_lock); | 88 | spin_lock(&mnt_id_lock); |
76 | res = ida_get_new_above(&mnt_id_ida, mnt_id_start, &mnt->mnt_id); | 89 | res = ida_get_new_above(&mnt_id_ida, mnt_id_start, &mnt->mnt_id); |
77 | if (!res) | 90 | if (!res) |
78 | mnt_id_start = mnt->mnt_id + 1; | 91 | mnt_id_start = mnt->mnt_id + 1; |
79 | spin_unlock(&vfsmount_lock); | 92 | spin_unlock(&mnt_id_lock); |
80 | if (res == -EAGAIN) | 93 | if (res == -EAGAIN) |
81 | goto retry; | 94 | goto retry; |
82 | 95 | ||
@@ -86,11 +99,11 @@ retry: | |||
86 | static void mnt_free_id(struct vfsmount *mnt) | 99 | static void mnt_free_id(struct vfsmount *mnt) |
87 | { | 100 | { |
88 | int id = mnt->mnt_id; | 101 | int id = mnt->mnt_id; |
89 | spin_lock(&vfsmount_lock); | 102 | spin_lock(&mnt_id_lock); |
90 | ida_remove(&mnt_id_ida, id); | 103 | ida_remove(&mnt_id_ida, id); |
91 | if (mnt_id_start > id) | 104 | if (mnt_id_start > id) |
92 | mnt_id_start = id; | 105 | mnt_id_start = id; |
93 | spin_unlock(&vfsmount_lock); | 106 | spin_unlock(&mnt_id_lock); |
94 | } | 107 | } |
95 | 108 | ||
96 | /* | 109 | /* |
@@ -348,7 +361,7 @@ static int mnt_make_readonly(struct vfsmount *mnt) | |||
348 | { | 361 | { |
349 | int ret = 0; | 362 | int ret = 0; |
350 | 363 | ||
351 | spin_lock(&vfsmount_lock); | 364 | br_write_lock(vfsmount_lock); |
352 | mnt->mnt_flags |= MNT_WRITE_HOLD; | 365 | mnt->mnt_flags |= MNT_WRITE_HOLD; |
353 | /* | 366 | /* |
354 | * After storing MNT_WRITE_HOLD, we'll read the counters. This store | 367 | * After storing MNT_WRITE_HOLD, we'll read the counters. This store |
@@ -382,15 +395,15 @@ static int mnt_make_readonly(struct vfsmount *mnt) | |||
382 | */ | 395 | */ |
383 | smp_wmb(); | 396 | smp_wmb(); |
384 | mnt->mnt_flags &= ~MNT_WRITE_HOLD; | 397 | mnt->mnt_flags &= ~MNT_WRITE_HOLD; |
385 | spin_unlock(&vfsmount_lock); | 398 | br_write_unlock(vfsmount_lock); |
386 | return ret; | 399 | return ret; |
387 | } | 400 | } |
388 | 401 | ||
389 | static void __mnt_unmake_readonly(struct vfsmount *mnt) | 402 | static void __mnt_unmake_readonly(struct vfsmount *mnt) |
390 | { | 403 | { |
391 | spin_lock(&vfsmount_lock); | 404 | br_write_lock(vfsmount_lock); |
392 | mnt->mnt_flags &= ~MNT_READONLY; | 405 | mnt->mnt_flags &= ~MNT_READONLY; |
393 | spin_unlock(&vfsmount_lock); | 406 | br_write_unlock(vfsmount_lock); |
394 | } | 407 | } |
395 | 408 | ||
396 | void simple_set_mnt(struct vfsmount *mnt, struct super_block *sb) | 409 | void simple_set_mnt(struct vfsmount *mnt, struct super_block *sb) |
@@ -414,6 +427,7 @@ void free_vfsmnt(struct vfsmount *mnt) | |||
414 | /* | 427 | /* |
415 | * find the first or last mount at @dentry on vfsmount @mnt depending on | 428 | * find the first or last mount at @dentry on vfsmount @mnt depending on |
416 | * @dir. If @dir is set return the first mount else return the last mount. | 429 | * @dir. If @dir is set return the first mount else return the last mount. |
430 | * vfsmount_lock must be held for read or write. | ||
417 | */ | 431 | */ |
418 | struct vfsmount *__lookup_mnt(struct vfsmount *mnt, struct dentry *dentry, | 432 | struct vfsmount *__lookup_mnt(struct vfsmount *mnt, struct dentry *dentry, |
419 | int dir) | 433 | int dir) |
@@ -443,10 +457,11 @@ struct vfsmount *__lookup_mnt(struct vfsmount *mnt, struct dentry *dentry, | |||
443 | struct vfsmount *lookup_mnt(struct path *path) | 457 | struct vfsmount *lookup_mnt(struct path *path) |
444 | { | 458 | { |
445 | struct vfsmount *child_mnt; | 459 | struct vfsmount *child_mnt; |
446 | spin_lock(&vfsmount_lock); | 460 | |
461 | br_read_lock(vfsmount_lock); | ||
447 | if ((child_mnt = __lookup_mnt(path->mnt, path->dentry, 1))) | 462 | if ((child_mnt = __lookup_mnt(path->mnt, path->dentry, 1))) |
448 | mntget(child_mnt); | 463 | mntget(child_mnt); |
449 | spin_unlock(&vfsmount_lock); | 464 | br_read_unlock(vfsmount_lock); |
450 | return child_mnt; | 465 | return child_mnt; |
451 | } | 466 | } |
452 | 467 | ||
@@ -455,6 +470,9 @@ static inline int check_mnt(struct vfsmount *mnt) | |||
455 | return mnt->mnt_ns == current->nsproxy->mnt_ns; | 470 | return mnt->mnt_ns == current->nsproxy->mnt_ns; |
456 | } | 471 | } |
457 | 472 | ||
473 | /* | ||
474 | * vfsmount lock must be held for write | ||
475 | */ | ||
458 | static void touch_mnt_namespace(struct mnt_namespace *ns) | 476 | static void touch_mnt_namespace(struct mnt_namespace *ns) |
459 | { | 477 | { |
460 | if (ns) { | 478 | if (ns) { |
@@ -463,6 +481,9 @@ static void touch_mnt_namespace(struct mnt_namespace *ns) | |||
463 | } | 481 | } |
464 | } | 482 | } |
465 | 483 | ||
484 | /* | ||
485 | * vfsmount lock must be held for write | ||
486 | */ | ||
466 | static void __touch_mnt_namespace(struct mnt_namespace *ns) | 487 | static void __touch_mnt_namespace(struct mnt_namespace *ns) |
467 | { | 488 | { |
468 | if (ns && ns->event != event) { | 489 | if (ns && ns->event != event) { |
@@ -471,6 +492,9 @@ static void __touch_mnt_namespace(struct mnt_namespace *ns) | |||
471 | } | 492 | } |
472 | } | 493 | } |
473 | 494 | ||
495 | /* | ||
496 | * vfsmount lock must be held for write | ||
497 | */ | ||
474 | static void detach_mnt(struct vfsmount *mnt, struct path *old_path) | 498 | static void detach_mnt(struct vfsmount *mnt, struct path *old_path) |
475 | { | 499 | { |
476 | old_path->dentry = mnt->mnt_mountpoint; | 500 | old_path->dentry = mnt->mnt_mountpoint; |
@@ -482,6 +506,9 @@ static void detach_mnt(struct vfsmount *mnt, struct path *old_path) | |||
482 | old_path->dentry->d_mounted--; | 506 | old_path->dentry->d_mounted--; |
483 | } | 507 | } |
484 | 508 | ||
509 | /* | ||
510 | * vfsmount lock must be held for write | ||
511 | */ | ||
485 | void mnt_set_mountpoint(struct vfsmount *mnt, struct dentry *dentry, | 512 | void mnt_set_mountpoint(struct vfsmount *mnt, struct dentry *dentry, |
486 | struct vfsmount *child_mnt) | 513 | struct vfsmount *child_mnt) |
487 | { | 514 | { |
@@ -490,6 +517,9 @@ void mnt_set_mountpoint(struct vfsmount *mnt, struct dentry *dentry, | |||
490 | dentry->d_mounted++; | 517 | dentry->d_mounted++; |
491 | } | 518 | } |
492 | 519 | ||
520 | /* | ||
521 | * vfsmount lock must be held for write | ||
522 | */ | ||
493 | static void attach_mnt(struct vfsmount *mnt, struct path *path) | 523 | static void attach_mnt(struct vfsmount *mnt, struct path *path) |
494 | { | 524 | { |
495 | mnt_set_mountpoint(path->mnt, path->dentry, mnt); | 525 | mnt_set_mountpoint(path->mnt, path->dentry, mnt); |
@@ -499,7 +529,7 @@ static void attach_mnt(struct vfsmount *mnt, struct path *path) | |||
499 | } | 529 | } |
500 | 530 | ||
501 | /* | 531 | /* |
502 | * the caller must hold vfsmount_lock | 532 | * vfsmount lock must be held for write |
503 | */ | 533 | */ |
504 | static void commit_tree(struct vfsmount *mnt) | 534 | static void commit_tree(struct vfsmount *mnt) |
505 | { | 535 | { |
@@ -623,39 +653,43 @@ static inline void __mntput(struct vfsmount *mnt) | |||
623 | void mntput_no_expire(struct vfsmount *mnt) | 653 | void mntput_no_expire(struct vfsmount *mnt) |
624 | { | 654 | { |
625 | repeat: | 655 | repeat: |
626 | if (atomic_dec_and_lock(&mnt->mnt_count, &vfsmount_lock)) { | 656 | if (atomic_add_unless(&mnt->mnt_count, -1, 1)) |
627 | if (likely(!mnt->mnt_pinned)) { | 657 | return; |
628 | spin_unlock(&vfsmount_lock); | 658 | br_write_lock(vfsmount_lock); |
629 | __mntput(mnt); | 659 | if (!atomic_dec_and_test(&mnt->mnt_count)) { |
630 | return; | 660 | br_write_unlock(vfsmount_lock); |
631 | } | 661 | return; |
632 | atomic_add(mnt->mnt_pinned + 1, &mnt->mnt_count); | ||
633 | mnt->mnt_pinned = 0; | ||
634 | spin_unlock(&vfsmount_lock); | ||
635 | acct_auto_close_mnt(mnt); | ||
636 | goto repeat; | ||
637 | } | 662 | } |
663 | if (likely(!mnt->mnt_pinned)) { | ||
664 | br_write_unlock(vfsmount_lock); | ||
665 | __mntput(mnt); | ||
666 | return; | ||
667 | } | ||
668 | atomic_add(mnt->mnt_pinned + 1, &mnt->mnt_count); | ||
669 | mnt->mnt_pinned = 0; | ||
670 | br_write_unlock(vfsmount_lock); | ||
671 | acct_auto_close_mnt(mnt); | ||
672 | goto repeat; | ||
638 | } | 673 | } |
639 | |||
640 | EXPORT_SYMBOL(mntput_no_expire); | 674 | EXPORT_SYMBOL(mntput_no_expire); |
641 | 675 | ||
642 | void mnt_pin(struct vfsmount *mnt) | 676 | void mnt_pin(struct vfsmount *mnt) |
643 | { | 677 | { |
644 | spin_lock(&vfsmount_lock); | 678 | br_write_lock(vfsmount_lock); |
645 | mnt->mnt_pinned++; | 679 | mnt->mnt_pinned++; |
646 | spin_unlock(&vfsmount_lock); | 680 | br_write_unlock(vfsmount_lock); |
647 | } | 681 | } |
648 | 682 | ||
649 | EXPORT_SYMBOL(mnt_pin); | 683 | EXPORT_SYMBOL(mnt_pin); |
650 | 684 | ||
651 | void mnt_unpin(struct vfsmount *mnt) | 685 | void mnt_unpin(struct vfsmount *mnt) |
652 | { | 686 | { |
653 | spin_lock(&vfsmount_lock); | 687 | br_write_lock(vfsmount_lock); |
654 | if (mnt->mnt_pinned) { | 688 | if (mnt->mnt_pinned) { |
655 | atomic_inc(&mnt->mnt_count); | 689 | atomic_inc(&mnt->mnt_count); |
656 | mnt->mnt_pinned--; | 690 | mnt->mnt_pinned--; |
657 | } | 691 | } |
658 | spin_unlock(&vfsmount_lock); | 692 | br_write_unlock(vfsmount_lock); |
659 | } | 693 | } |
660 | 694 | ||
661 | EXPORT_SYMBOL(mnt_unpin); | 695 | EXPORT_SYMBOL(mnt_unpin); |
@@ -746,12 +780,12 @@ int mnt_had_events(struct proc_mounts *p) | |||
746 | struct mnt_namespace *ns = p->ns; | 780 | struct mnt_namespace *ns = p->ns; |
747 | int res = 0; | 781 | int res = 0; |
748 | 782 | ||
749 | spin_lock(&vfsmount_lock); | 783 | br_read_lock(vfsmount_lock); |
750 | if (p->event != ns->event) { | 784 | if (p->event != ns->event) { |
751 | p->event = ns->event; | 785 | p->event = ns->event; |
752 | res = 1; | 786 | res = 1; |
753 | } | 787 | } |
754 | spin_unlock(&vfsmount_lock); | 788 | br_read_unlock(vfsmount_lock); |
755 | 789 | ||
756 | return res; | 790 | return res; |
757 | } | 791 | } |
@@ -952,12 +986,12 @@ int may_umount_tree(struct vfsmount *mnt) | |||
952 | int minimum_refs = 0; | 986 | int minimum_refs = 0; |
953 | struct vfsmount *p; | 987 | struct vfsmount *p; |
954 | 988 | ||
955 | spin_lock(&vfsmount_lock); | 989 | br_read_lock(vfsmount_lock); |
956 | for (p = mnt; p; p = next_mnt(p, mnt)) { | 990 | for (p = mnt; p; p = next_mnt(p, mnt)) { |
957 | actual_refs += atomic_read(&p->mnt_count); | 991 | actual_refs += atomic_read(&p->mnt_count); |
958 | minimum_refs += 2; | 992 | minimum_refs += 2; |
959 | } | 993 | } |
960 | spin_unlock(&vfsmount_lock); | 994 | br_read_unlock(vfsmount_lock); |
961 | 995 | ||
962 | if (actual_refs > minimum_refs) | 996 | if (actual_refs > minimum_refs) |
963 | return 0; | 997 | return 0; |
@@ -984,10 +1018,10 @@ int may_umount(struct vfsmount *mnt) | |||
984 | { | 1018 | { |
985 | int ret = 1; | 1019 | int ret = 1; |
986 | down_read(&namespace_sem); | 1020 | down_read(&namespace_sem); |
987 | spin_lock(&vfsmount_lock); | 1021 | br_read_lock(vfsmount_lock); |
988 | if (propagate_mount_busy(mnt, 2)) | 1022 | if (propagate_mount_busy(mnt, 2)) |
989 | ret = 0; | 1023 | ret = 0; |
990 | spin_unlock(&vfsmount_lock); | 1024 | br_read_unlock(vfsmount_lock); |
991 | up_read(&namespace_sem); | 1025 | up_read(&namespace_sem); |
992 | return ret; | 1026 | return ret; |
993 | } | 1027 | } |
@@ -1003,13 +1037,14 @@ void release_mounts(struct list_head *head) | |||
1003 | if (mnt->mnt_parent != mnt) { | 1037 | if (mnt->mnt_parent != mnt) { |
1004 | struct dentry *dentry; | 1038 | struct dentry *dentry; |
1005 | struct vfsmount *m; | 1039 | struct vfsmount *m; |
1006 | spin_lock(&vfsmount_lock); | 1040 | |
1041 | br_write_lock(vfsmount_lock); | ||
1007 | dentry = mnt->mnt_mountpoint; | 1042 | dentry = mnt->mnt_mountpoint; |
1008 | m = mnt->mnt_parent; | 1043 | m = mnt->mnt_parent; |
1009 | mnt->mnt_mountpoint = mnt->mnt_root; | 1044 | mnt->mnt_mountpoint = mnt->mnt_root; |
1010 | mnt->mnt_parent = mnt; | 1045 | mnt->mnt_parent = mnt; |
1011 | m->mnt_ghosts--; | 1046 | m->mnt_ghosts--; |
1012 | spin_unlock(&vfsmount_lock); | 1047 | br_write_unlock(vfsmount_lock); |
1013 | dput(dentry); | 1048 | dput(dentry); |
1014 | mntput(m); | 1049 | mntput(m); |
1015 | } | 1050 | } |
@@ -1017,6 +1052,10 @@ void release_mounts(struct list_head *head) | |||
1017 | } | 1052 | } |
1018 | } | 1053 | } |
1019 | 1054 | ||
1055 | /* | ||
1056 | * vfsmount lock must be held for write | ||
1057 | * namespace_sem must be held for write | ||
1058 | */ | ||
1020 | void umount_tree(struct vfsmount *mnt, int propagate, struct list_head *kill) | 1059 | void umount_tree(struct vfsmount *mnt, int propagate, struct list_head *kill) |
1021 | { | 1060 | { |
1022 | struct vfsmount *p; | 1061 | struct vfsmount *p; |
@@ -1107,7 +1146,7 @@ static int do_umount(struct vfsmount *mnt, int flags) | |||
1107 | } | 1146 | } |
1108 | 1147 | ||
1109 | down_write(&namespace_sem); | 1148 | down_write(&namespace_sem); |
1110 | spin_lock(&vfsmount_lock); | 1149 | br_write_lock(vfsmount_lock); |
1111 | event++; | 1150 | event++; |
1112 | 1151 | ||
1113 | if (!(flags & MNT_DETACH)) | 1152 | if (!(flags & MNT_DETACH)) |
@@ -1119,7 +1158,7 @@ static int do_umount(struct vfsmount *mnt, int flags) | |||
1119 | umount_tree(mnt, 1, &umount_list); | 1158 | umount_tree(mnt, 1, &umount_list); |
1120 | retval = 0; | 1159 | retval = 0; |
1121 | } | 1160 | } |
1122 | spin_unlock(&vfsmount_lock); | 1161 | br_write_unlock(vfsmount_lock); |
1123 | up_write(&namespace_sem); | 1162 | up_write(&namespace_sem); |
1124 | release_mounts(&umount_list); | 1163 | release_mounts(&umount_list); |
1125 | return retval; | 1164 | return retval; |
@@ -1231,19 +1270,19 @@ struct vfsmount *copy_tree(struct vfsmount *mnt, struct dentry *dentry, | |||
1231 | q = clone_mnt(p, p->mnt_root, flag); | 1270 | q = clone_mnt(p, p->mnt_root, flag); |
1232 | if (!q) | 1271 | if (!q) |
1233 | goto Enomem; | 1272 | goto Enomem; |
1234 | spin_lock(&vfsmount_lock); | 1273 | br_write_lock(vfsmount_lock); |
1235 | list_add_tail(&q->mnt_list, &res->mnt_list); | 1274 | list_add_tail(&q->mnt_list, &res->mnt_list); |
1236 | attach_mnt(q, &path); | 1275 | attach_mnt(q, &path); |
1237 | spin_unlock(&vfsmount_lock); | 1276 | br_write_unlock(vfsmount_lock); |
1238 | } | 1277 | } |
1239 | } | 1278 | } |
1240 | return res; | 1279 | return res; |
1241 | Enomem: | 1280 | Enomem: |
1242 | if (res) { | 1281 | if (res) { |
1243 | LIST_HEAD(umount_list); | 1282 | LIST_HEAD(umount_list); |
1244 | spin_lock(&vfsmount_lock); | 1283 | br_write_lock(vfsmount_lock); |
1245 | umount_tree(res, 0, &umount_list); | 1284 | umount_tree(res, 0, &umount_list); |
1246 | spin_unlock(&vfsmount_lock); | 1285 | br_write_unlock(vfsmount_lock); |
1247 | release_mounts(&umount_list); | 1286 | release_mounts(&umount_list); |
1248 | } | 1287 | } |
1249 | return NULL; | 1288 | return NULL; |
@@ -1262,9 +1301,9 @@ void drop_collected_mounts(struct vfsmount *mnt) | |||
1262 | { | 1301 | { |
1263 | LIST_HEAD(umount_list); | 1302 | LIST_HEAD(umount_list); |
1264 | down_write(&namespace_sem); | 1303 | down_write(&namespace_sem); |
1265 | spin_lock(&vfsmount_lock); | 1304 | br_write_lock(vfsmount_lock); |
1266 | umount_tree(mnt, 0, &umount_list); | 1305 | umount_tree(mnt, 0, &umount_list); |
1267 | spin_unlock(&vfsmount_lock); | 1306 | br_write_unlock(vfsmount_lock); |
1268 | up_write(&namespace_sem); | 1307 | up_write(&namespace_sem); |
1269 | release_mounts(&umount_list); | 1308 | release_mounts(&umount_list); |
1270 | } | 1309 | } |
@@ -1392,7 +1431,7 @@ static int attach_recursive_mnt(struct vfsmount *source_mnt, | |||
1392 | if (err) | 1431 | if (err) |
1393 | goto out_cleanup_ids; | 1432 | goto out_cleanup_ids; |
1394 | 1433 | ||
1395 | spin_lock(&vfsmount_lock); | 1434 | br_write_lock(vfsmount_lock); |
1396 | 1435 | ||
1397 | if (IS_MNT_SHARED(dest_mnt)) { | 1436 | if (IS_MNT_SHARED(dest_mnt)) { |
1398 | for (p = source_mnt; p; p = next_mnt(p, source_mnt)) | 1437 | for (p = source_mnt; p; p = next_mnt(p, source_mnt)) |
@@ -1411,7 +1450,8 @@ static int attach_recursive_mnt(struct vfsmount *source_mnt, | |||
1411 | list_del_init(&child->mnt_hash); | 1450 | list_del_init(&child->mnt_hash); |
1412 | commit_tree(child); | 1451 | commit_tree(child); |
1413 | } | 1452 | } |
1414 | spin_unlock(&vfsmount_lock); | 1453 | br_write_unlock(vfsmount_lock); |
1454 | |||
1415 | return 0; | 1455 | return 0; |
1416 | 1456 | ||
1417 | out_cleanup_ids: | 1457 | out_cleanup_ids: |
@@ -1444,13 +1484,30 @@ out_unlock: | |||
1444 | } | 1484 | } |
1445 | 1485 | ||
1446 | /* | 1486 | /* |
1487 | * Sanity check the flags to change_mnt_propagation. | ||
1488 | */ | ||
1489 | |||
1490 | static int flags_to_propagation_type(int flags) | ||
1491 | { | ||
1492 | int type = flags & ~MS_REC; | ||
1493 | |||
1494 | /* Fail if any non-propagation flags are set */ | ||
1495 | if (type & ~(MS_SHARED | MS_PRIVATE | MS_SLAVE | MS_UNBINDABLE)) | ||
1496 | return 0; | ||
1497 | /* Only one propagation flag should be set */ | ||
1498 | if (!is_power_of_2(type)) | ||
1499 | return 0; | ||
1500 | return type; | ||
1501 | } | ||
1502 | |||
1503 | /* | ||
1447 | * recursively change the type of the mountpoint. | 1504 | * recursively change the type of the mountpoint. |
1448 | */ | 1505 | */ |
1449 | static int do_change_type(struct path *path, int flag) | 1506 | static int do_change_type(struct path *path, int flag) |
1450 | { | 1507 | { |
1451 | struct vfsmount *m, *mnt = path->mnt; | 1508 | struct vfsmount *m, *mnt = path->mnt; |
1452 | int recurse = flag & MS_REC; | 1509 | int recurse = flag & MS_REC; |
1453 | int type = flag & ~MS_REC; | 1510 | int type; |
1454 | int err = 0; | 1511 | int err = 0; |
1455 | 1512 | ||
1456 | if (!capable(CAP_SYS_ADMIN)) | 1513 | if (!capable(CAP_SYS_ADMIN)) |
@@ -1459,6 +1516,10 @@ static int do_change_type(struct path *path, int flag) | |||
1459 | if (path->dentry != path->mnt->mnt_root) | 1516 | if (path->dentry != path->mnt->mnt_root) |
1460 | return -EINVAL; | 1517 | return -EINVAL; |
1461 | 1518 | ||
1519 | type = flags_to_propagation_type(flag); | ||
1520 | if (!type) | ||
1521 | return -EINVAL; | ||
1522 | |||
1462 | down_write(&namespace_sem); | 1523 | down_write(&namespace_sem); |
1463 | if (type == MS_SHARED) { | 1524 | if (type == MS_SHARED) { |
1464 | err = invent_group_ids(mnt, recurse); | 1525 | err = invent_group_ids(mnt, recurse); |
@@ -1466,10 +1527,10 @@ static int do_change_type(struct path *path, int flag) | |||
1466 | goto out_unlock; | 1527 | goto out_unlock; |
1467 | } | 1528 | } |
1468 | 1529 | ||
1469 | spin_lock(&vfsmount_lock); | 1530 | br_write_lock(vfsmount_lock); |
1470 | for (m = mnt; m; m = (recurse ? next_mnt(m, mnt) : NULL)) | 1531 | for (m = mnt; m; m = (recurse ? next_mnt(m, mnt) : NULL)) |
1471 | change_mnt_propagation(m, type); | 1532 | change_mnt_propagation(m, type); |
1472 | spin_unlock(&vfsmount_lock); | 1533 | br_write_unlock(vfsmount_lock); |
1473 | 1534 | ||
1474 | out_unlock: | 1535 | out_unlock: |
1475 | up_write(&namespace_sem); | 1536 | up_write(&namespace_sem); |
@@ -1513,9 +1574,10 @@ static int do_loopback(struct path *path, char *old_name, | |||
1513 | err = graft_tree(mnt, path); | 1574 | err = graft_tree(mnt, path); |
1514 | if (err) { | 1575 | if (err) { |
1515 | LIST_HEAD(umount_list); | 1576 | LIST_HEAD(umount_list); |
1516 | spin_lock(&vfsmount_lock); | 1577 | |
1578 | br_write_lock(vfsmount_lock); | ||
1517 | umount_tree(mnt, 0, &umount_list); | 1579 | umount_tree(mnt, 0, &umount_list); |
1518 | spin_unlock(&vfsmount_lock); | 1580 | br_write_unlock(vfsmount_lock); |
1519 | release_mounts(&umount_list); | 1581 | release_mounts(&umount_list); |
1520 | } | 1582 | } |
1521 | 1583 | ||
@@ -1568,16 +1630,16 @@ static int do_remount(struct path *path, int flags, int mnt_flags, | |||
1568 | else | 1630 | else |
1569 | err = do_remount_sb(sb, flags, data, 0); | 1631 | err = do_remount_sb(sb, flags, data, 0); |
1570 | if (!err) { | 1632 | if (!err) { |
1571 | spin_lock(&vfsmount_lock); | 1633 | br_write_lock(vfsmount_lock); |
1572 | mnt_flags |= path->mnt->mnt_flags & MNT_PROPAGATION_MASK; | 1634 | mnt_flags |= path->mnt->mnt_flags & MNT_PROPAGATION_MASK; |
1573 | path->mnt->mnt_flags = mnt_flags; | 1635 | path->mnt->mnt_flags = mnt_flags; |
1574 | spin_unlock(&vfsmount_lock); | 1636 | br_write_unlock(vfsmount_lock); |
1575 | } | 1637 | } |
1576 | up_write(&sb->s_umount); | 1638 | up_write(&sb->s_umount); |
1577 | if (!err) { | 1639 | if (!err) { |
1578 | spin_lock(&vfsmount_lock); | 1640 | br_write_lock(vfsmount_lock); |
1579 | touch_mnt_namespace(path->mnt->mnt_ns); | 1641 | touch_mnt_namespace(path->mnt->mnt_ns); |
1580 | spin_unlock(&vfsmount_lock); | 1642 | br_write_unlock(vfsmount_lock); |
1581 | } | 1643 | } |
1582 | return err; | 1644 | return err; |
1583 | } | 1645 | } |
@@ -1754,7 +1816,7 @@ void mark_mounts_for_expiry(struct list_head *mounts) | |||
1754 | return; | 1816 | return; |
1755 | 1817 | ||
1756 | down_write(&namespace_sem); | 1818 | down_write(&namespace_sem); |
1757 | spin_lock(&vfsmount_lock); | 1819 | br_write_lock(vfsmount_lock); |
1758 | 1820 | ||
1759 | /* extract from the expiration list every vfsmount that matches the | 1821 | /* extract from the expiration list every vfsmount that matches the |
1760 | * following criteria: | 1822 | * following criteria: |
@@ -1773,7 +1835,7 @@ void mark_mounts_for_expiry(struct list_head *mounts) | |||
1773 | touch_mnt_namespace(mnt->mnt_ns); | 1835 | touch_mnt_namespace(mnt->mnt_ns); |
1774 | umount_tree(mnt, 1, &umounts); | 1836 | umount_tree(mnt, 1, &umounts); |
1775 | } | 1837 | } |
1776 | spin_unlock(&vfsmount_lock); | 1838 | br_write_unlock(vfsmount_lock); |
1777 | up_write(&namespace_sem); | 1839 | up_write(&namespace_sem); |
1778 | 1840 | ||
1779 | release_mounts(&umounts); | 1841 | release_mounts(&umounts); |
@@ -1830,6 +1892,8 @@ resume: | |||
1830 | /* | 1892 | /* |
1831 | * process a list of expirable mountpoints with the intent of discarding any | 1893 | * process a list of expirable mountpoints with the intent of discarding any |
1832 | * submounts of a specific parent mountpoint | 1894 | * submounts of a specific parent mountpoint |
1895 | * | ||
1896 | * vfsmount_lock must be held for write | ||
1833 | */ | 1897 | */ |
1834 | static void shrink_submounts(struct vfsmount *mnt, struct list_head *umounts) | 1898 | static void shrink_submounts(struct vfsmount *mnt, struct list_head *umounts) |
1835 | { | 1899 | { |
@@ -2048,9 +2112,9 @@ static struct mnt_namespace *dup_mnt_ns(struct mnt_namespace *mnt_ns, | |||
2048 | kfree(new_ns); | 2112 | kfree(new_ns); |
2049 | return ERR_PTR(-ENOMEM); | 2113 | return ERR_PTR(-ENOMEM); |
2050 | } | 2114 | } |
2051 | spin_lock(&vfsmount_lock); | 2115 | br_write_lock(vfsmount_lock); |
2052 | list_add_tail(&new_ns->list, &new_ns->root->mnt_list); | 2116 | list_add_tail(&new_ns->list, &new_ns->root->mnt_list); |
2053 | spin_unlock(&vfsmount_lock); | 2117 | br_write_unlock(vfsmount_lock); |
2054 | 2118 | ||
2055 | /* | 2119 | /* |
2056 | * Second pass: switch the tsk->fs->* elements and mark new vfsmounts | 2120 | * Second pass: switch the tsk->fs->* elements and mark new vfsmounts |
@@ -2244,7 +2308,7 @@ SYSCALL_DEFINE2(pivot_root, const char __user *, new_root, | |||
2244 | goto out2; /* not attached */ | 2308 | goto out2; /* not attached */ |
2245 | /* make sure we can reach put_old from new_root */ | 2309 | /* make sure we can reach put_old from new_root */ |
2246 | tmp = old.mnt; | 2310 | tmp = old.mnt; |
2247 | spin_lock(&vfsmount_lock); | 2311 | br_write_lock(vfsmount_lock); |
2248 | if (tmp != new.mnt) { | 2312 | if (tmp != new.mnt) { |
2249 | for (;;) { | 2313 | for (;;) { |
2250 | if (tmp->mnt_parent == tmp) | 2314 | if (tmp->mnt_parent == tmp) |
@@ -2264,7 +2328,7 @@ SYSCALL_DEFINE2(pivot_root, const char __user *, new_root, | |||
2264 | /* mount new_root on / */ | 2328 | /* mount new_root on / */ |
2265 | attach_mnt(new.mnt, &root_parent); | 2329 | attach_mnt(new.mnt, &root_parent); |
2266 | touch_mnt_namespace(current->nsproxy->mnt_ns); | 2330 | touch_mnt_namespace(current->nsproxy->mnt_ns); |
2267 | spin_unlock(&vfsmount_lock); | 2331 | br_write_unlock(vfsmount_lock); |
2268 | chroot_fs_refs(&root, &new); | 2332 | chroot_fs_refs(&root, &new); |
2269 | error = 0; | 2333 | error = 0; |
2270 | path_put(&root_parent); | 2334 | path_put(&root_parent); |
@@ -2279,7 +2343,7 @@ out1: | |||
2279 | out0: | 2343 | out0: |
2280 | return error; | 2344 | return error; |
2281 | out3: | 2345 | out3: |
2282 | spin_unlock(&vfsmount_lock); | 2346 | br_write_unlock(vfsmount_lock); |
2283 | goto out2; | 2347 | goto out2; |
2284 | } | 2348 | } |
2285 | 2349 | ||
@@ -2326,6 +2390,8 @@ void __init mnt_init(void) | |||
2326 | for (u = 0; u < HASH_SIZE; u++) | 2390 | for (u = 0; u < HASH_SIZE; u++) |
2327 | INIT_LIST_HEAD(&mount_hashtable[u]); | 2391 | INIT_LIST_HEAD(&mount_hashtable[u]); |
2328 | 2392 | ||
2393 | br_lock_init(vfsmount_lock); | ||
2394 | |||
2329 | err = sysfs_init(); | 2395 | err = sysfs_init(); |
2330 | if (err) | 2396 | if (err) |
2331 | printk(KERN_WARNING "%s: sysfs_init error: %d\n", | 2397 | printk(KERN_WARNING "%s: sysfs_init error: %d\n", |
@@ -2344,9 +2410,9 @@ void put_mnt_ns(struct mnt_namespace *ns) | |||
2344 | if (!atomic_dec_and_test(&ns->count)) | 2410 | if (!atomic_dec_and_test(&ns->count)) |
2345 | return; | 2411 | return; |
2346 | down_write(&namespace_sem); | 2412 | down_write(&namespace_sem); |
2347 | spin_lock(&vfsmount_lock); | 2413 | br_write_lock(vfsmount_lock); |
2348 | umount_tree(ns->root, 0, &umount_list); | 2414 | umount_tree(ns->root, 0, &umount_list); |
2349 | spin_unlock(&vfsmount_lock); | 2415 | br_write_unlock(vfsmount_lock); |
2350 | up_write(&namespace_sem); | 2416 | up_write(&namespace_sem); |
2351 | release_mounts(&umount_list); | 2417 | release_mounts(&umount_list); |
2352 | kfree(ns); | 2418 | kfree(ns); |
diff --git a/fs/nfs/Kconfig b/fs/nfs/Kconfig index cc1bb33b59b8..6c2aad49d731 100644 --- a/fs/nfs/Kconfig +++ b/fs/nfs/Kconfig | |||
@@ -63,7 +63,6 @@ config NFS_V3_ACL | |||
63 | config NFS_V4 | 63 | config NFS_V4 |
64 | bool "NFS client support for NFS version 4" | 64 | bool "NFS client support for NFS version 4" |
65 | depends on NFS_FS | 65 | depends on NFS_FS |
66 | select RPCSEC_GSS_KRB5 | ||
67 | help | 66 | help |
68 | This option enables support for version 4 of the NFS protocol | 67 | This option enables support for version 4 of the NFS protocol |
69 | (RFC 3530) in the kernel's NFS client. | 68 | (RFC 3530) in the kernel's NFS client. |
@@ -100,3 +99,20 @@ config NFS_FSCACHE | |||
100 | help | 99 | help |
101 | Say Y here if you want NFS data to be cached locally on disc through | 100 | Say Y here if you want NFS data to be cached locally on disc through |
102 | the general filesystem cache manager | 101 | the general filesystem cache manager |
102 | |||
103 | config NFS_USE_LEGACY_DNS | ||
104 | bool "Use the legacy NFS DNS resolver" | ||
105 | depends on NFS_V4 | ||
106 | help | ||
107 | The kernel now provides a method for translating a host name into an | ||
108 | IP address. Select Y here if you would rather use your own DNS | ||
109 | resolver script. | ||
110 | |||
111 | If unsure, say N | ||
112 | |||
113 | config NFS_USE_KERNEL_DNS | ||
114 | bool | ||
115 | depends on NFS_V4 && !NFS_USE_LEGACY_DNS | ||
116 | select DNS_RESOLVER | ||
117 | select KEYS | ||
118 | default y | ||
diff --git a/fs/nfs/callback.c b/fs/nfs/callback.c index 36dfdae95123..e17b49e2eabd 100644 --- a/fs/nfs/callback.c +++ b/fs/nfs/callback.c | |||
@@ -45,7 +45,7 @@ unsigned short nfs_callback_tcpport; | |||
45 | unsigned short nfs_callback_tcpport6; | 45 | unsigned short nfs_callback_tcpport6; |
46 | #define NFS_CALLBACK_MAXPORTNR (65535U) | 46 | #define NFS_CALLBACK_MAXPORTNR (65535U) |
47 | 47 | ||
48 | static int param_set_portnr(const char *val, struct kernel_param *kp) | 48 | static int param_set_portnr(const char *val, const struct kernel_param *kp) |
49 | { | 49 | { |
50 | unsigned long num; | 50 | unsigned long num; |
51 | int ret; | 51 | int ret; |
@@ -58,11 +58,10 @@ static int param_set_portnr(const char *val, struct kernel_param *kp) | |||
58 | *((unsigned int *)kp->arg) = num; | 58 | *((unsigned int *)kp->arg) = num; |
59 | return 0; | 59 | return 0; |
60 | } | 60 | } |
61 | 61 | static struct kernel_param_ops param_ops_portnr = { | |
62 | static int param_get_portnr(char *buffer, struct kernel_param *kp) | 62 | .set = param_set_portnr, |
63 | { | 63 | .get = param_get_uint, |
64 | return param_get_uint(buffer, kp); | 64 | }; |
65 | } | ||
66 | #define param_check_portnr(name, p) __param_check(name, p, unsigned int); | 65 | #define param_check_portnr(name, p) __param_check(name, p, unsigned int); |
67 | 66 | ||
68 | module_param_named(callback_tcpport, nfs_callback_set_tcpport, portnr, 0644); | 67 | module_param_named(callback_tcpport, nfs_callback_set_tcpport, portnr, 0644); |
diff --git a/fs/nfs/dir.c b/fs/nfs/dir.c index 29539ceeb745..e257172d438c 100644 --- a/fs/nfs/dir.c +++ b/fs/nfs/dir.c | |||
@@ -140,6 +140,13 @@ nfs_opendir(struct inode *inode, struct file *filp) | |||
140 | 140 | ||
141 | /* Call generic open code in order to cache credentials */ | 141 | /* Call generic open code in order to cache credentials */ |
142 | res = nfs_open(inode, filp); | 142 | res = nfs_open(inode, filp); |
143 | if (filp->f_path.dentry == filp->f_path.mnt->mnt_root) { | ||
144 | /* This is a mountpoint, so d_revalidate will never | ||
145 | * have been called, so we need to refresh the | ||
146 | * inode (for close-open consistency) ourselves. | ||
147 | */ | ||
148 | __nfs_revalidate_inode(NFS_SERVER(inode), inode); | ||
149 | } | ||
143 | return res; | 150 | return res; |
144 | } | 151 | } |
145 | 152 | ||
@@ -1103,7 +1110,7 @@ static int nfs_open_revalidate(struct dentry *dentry, struct nameidata *nd) | |||
1103 | if ((openflags & (O_CREAT|O_EXCL)) == (O_CREAT|O_EXCL)) | 1110 | if ((openflags & (O_CREAT|O_EXCL)) == (O_CREAT|O_EXCL)) |
1104 | goto no_open_dput; | 1111 | goto no_open_dput; |
1105 | /* We can't create new files, or truncate existing ones here */ | 1112 | /* We can't create new files, or truncate existing ones here */ |
1106 | openflags &= ~(O_CREAT|O_TRUNC); | 1113 | openflags &= ~(O_CREAT|O_EXCL|O_TRUNC); |
1107 | 1114 | ||
1108 | /* | 1115 | /* |
1109 | * Note: we're not holding inode->i_mutex and so may be racing with | 1116 | * Note: we're not holding inode->i_mutex and so may be racing with |
diff --git a/fs/nfs/dns_resolve.c b/fs/nfs/dns_resolve.c index 76fd235d0024..dba50a5625db 100644 --- a/fs/nfs/dns_resolve.c +++ b/fs/nfs/dns_resolve.c | |||
@@ -6,6 +6,29 @@ | |||
6 | * Resolves DNS hostnames into valid ip addresses | 6 | * Resolves DNS hostnames into valid ip addresses |
7 | */ | 7 | */ |
8 | 8 | ||
9 | #ifdef CONFIG_NFS_USE_KERNEL_DNS | ||
10 | |||
11 | #include <linux/sunrpc/clnt.h> | ||
12 | #include <linux/dns_resolver.h> | ||
13 | |||
14 | ssize_t nfs_dns_resolve_name(char *name, size_t namelen, | ||
15 | struct sockaddr *sa, size_t salen) | ||
16 | { | ||
17 | ssize_t ret; | ||
18 | char *ip_addr = NULL; | ||
19 | int ip_len; | ||
20 | |||
21 | ip_len = dns_query(NULL, name, namelen, NULL, &ip_addr, NULL); | ||
22 | if (ip_len > 0) | ||
23 | ret = rpc_pton(ip_addr, ip_len, sa, salen); | ||
24 | else | ||
25 | ret = -ESRCH; | ||
26 | kfree(ip_addr); | ||
27 | return ret; | ||
28 | } | ||
29 | |||
30 | #else | ||
31 | |||
9 | #include <linux/hash.h> | 32 | #include <linux/hash.h> |
10 | #include <linux/string.h> | 33 | #include <linux/string.h> |
11 | #include <linux/kmod.h> | 34 | #include <linux/kmod.h> |
@@ -346,3 +369,4 @@ void nfs_dns_resolver_destroy(void) | |||
346 | nfs_cache_unregister(&nfs_dns_resolve); | 369 | nfs_cache_unregister(&nfs_dns_resolve); |
347 | } | 370 | } |
348 | 371 | ||
372 | #endif | ||
diff --git a/fs/nfs/dns_resolve.h b/fs/nfs/dns_resolve.h index a3f0938babf7..199bb5543a91 100644 --- a/fs/nfs/dns_resolve.h +++ b/fs/nfs/dns_resolve.h | |||
@@ -6,8 +6,20 @@ | |||
6 | 6 | ||
7 | #define NFS_DNS_HOSTNAME_MAXLEN (128) | 7 | #define NFS_DNS_HOSTNAME_MAXLEN (128) |
8 | 8 | ||
9 | |||
10 | #ifdef CONFIG_NFS_USE_KERNEL_DNS | ||
11 | static inline int nfs_dns_resolver_init(void) | ||
12 | { | ||
13 | return 0; | ||
14 | } | ||
15 | |||
16 | static inline void nfs_dns_resolver_destroy(void) | ||
17 | {} | ||
18 | #else | ||
9 | extern int nfs_dns_resolver_init(void); | 19 | extern int nfs_dns_resolver_init(void); |
10 | extern void nfs_dns_resolver_destroy(void); | 20 | extern void nfs_dns_resolver_destroy(void); |
21 | #endif | ||
22 | |||
11 | extern ssize_t nfs_dns_resolve_name(char *name, size_t namelen, | 23 | extern ssize_t nfs_dns_resolve_name(char *name, size_t namelen, |
12 | struct sockaddr *sa, size_t salen); | 24 | struct sockaddr *sa, size_t salen); |
13 | 25 | ||
diff --git a/fs/nfs/file.c b/fs/nfs/file.c index 2d141a74ae82..eb51bd6201da 100644 --- a/fs/nfs/file.c +++ b/fs/nfs/file.c | |||
@@ -323,7 +323,7 @@ nfs_file_fsync(struct file *file, int datasync) | |||
323 | have_error |= test_bit(NFS_CONTEXT_ERROR_WRITE, &ctx->flags); | 323 | have_error |= test_bit(NFS_CONTEXT_ERROR_WRITE, &ctx->flags); |
324 | if (have_error) | 324 | if (have_error) |
325 | ret = xchg(&ctx->error, 0); | 325 | ret = xchg(&ctx->error, 0); |
326 | if (!ret) | 326 | if (!ret && status < 0) |
327 | ret = status; | 327 | ret = status; |
328 | return ret; | 328 | return ret; |
329 | } | 329 | } |
diff --git a/fs/nfs/nfs4proc.c b/fs/nfs/nfs4proc.c index 7ffbb98ddec3..089da5b5d20a 100644 --- a/fs/nfs/nfs4proc.c +++ b/fs/nfs/nfs4proc.c | |||
@@ -2036,7 +2036,8 @@ nfs4_atomic_open(struct inode *dir, struct dentry *dentry, struct nameidata *nd) | |||
2036 | struct rpc_cred *cred; | 2036 | struct rpc_cred *cred; |
2037 | struct nfs4_state *state; | 2037 | struct nfs4_state *state; |
2038 | struct dentry *res; | 2038 | struct dentry *res; |
2039 | fmode_t fmode = nd->intent.open.flags & (FMODE_READ | FMODE_WRITE | FMODE_EXEC); | 2039 | int open_flags = nd->intent.open.flags; |
2040 | fmode_t fmode = open_flags & (FMODE_READ | FMODE_WRITE | FMODE_EXEC); | ||
2040 | 2041 | ||
2041 | if (nd->flags & LOOKUP_CREATE) { | 2042 | if (nd->flags & LOOKUP_CREATE) { |
2042 | attr.ia_mode = nd->intent.open.create_mode; | 2043 | attr.ia_mode = nd->intent.open.create_mode; |
@@ -2044,8 +2045,9 @@ nfs4_atomic_open(struct inode *dir, struct dentry *dentry, struct nameidata *nd) | |||
2044 | if (!IS_POSIXACL(dir)) | 2045 | if (!IS_POSIXACL(dir)) |
2045 | attr.ia_mode &= ~current_umask(); | 2046 | attr.ia_mode &= ~current_umask(); |
2046 | } else { | 2047 | } else { |
2048 | open_flags &= ~O_EXCL; | ||
2047 | attr.ia_valid = 0; | 2049 | attr.ia_valid = 0; |
2048 | BUG_ON(nd->intent.open.flags & O_CREAT); | 2050 | BUG_ON(open_flags & O_CREAT); |
2049 | } | 2051 | } |
2050 | 2052 | ||
2051 | cred = rpc_lookup_cred(); | 2053 | cred = rpc_lookup_cred(); |
@@ -2054,7 +2056,7 @@ nfs4_atomic_open(struct inode *dir, struct dentry *dentry, struct nameidata *nd) | |||
2054 | parent = dentry->d_parent; | 2056 | parent = dentry->d_parent; |
2055 | /* Protect against concurrent sillydeletes */ | 2057 | /* Protect against concurrent sillydeletes */ |
2056 | nfs_block_sillyrename(parent); | 2058 | nfs_block_sillyrename(parent); |
2057 | state = nfs4_do_open(dir, &path, fmode, nd->intent.open.flags, &attr, cred); | 2059 | state = nfs4_do_open(dir, &path, fmode, open_flags, &attr, cred); |
2058 | put_rpccred(cred); | 2060 | put_rpccred(cred); |
2059 | if (IS_ERR(state)) { | 2061 | if (IS_ERR(state)) { |
2060 | if (PTR_ERR(state) == -ENOENT) { | 2062 | if (PTR_ERR(state) == -ENOENT) { |
@@ -2273,8 +2275,7 @@ static int nfs4_get_referral(struct inode *dir, const struct qstr *name, struct | |||
2273 | out: | 2275 | out: |
2274 | if (page) | 2276 | if (page) |
2275 | __free_page(page); | 2277 | __free_page(page); |
2276 | if (locations) | 2278 | kfree(locations); |
2277 | kfree(locations); | ||
2278 | return status; | 2279 | return status; |
2279 | } | 2280 | } |
2280 | 2281 | ||
diff --git a/fs/nfs/super.c b/fs/nfs/super.c index ee26316ad1f4..ec3966e4706b 100644 --- a/fs/nfs/super.c +++ b/fs/nfs/super.c | |||
@@ -655,6 +655,13 @@ static void nfs_show_mount_options(struct seq_file *m, struct nfs_server *nfss, | |||
655 | 655 | ||
656 | if (nfss->options & NFS_OPTION_FSCACHE) | 656 | if (nfss->options & NFS_OPTION_FSCACHE) |
657 | seq_printf(m, ",fsc"); | 657 | seq_printf(m, ",fsc"); |
658 | |||
659 | if (nfss->flags & NFS_MOUNT_LOOKUP_CACHE_NONEG) { | ||
660 | if (nfss->flags & NFS_MOUNT_LOOKUP_CACHE_NONE) | ||
661 | seq_printf(m, ",lookupcache=none"); | ||
662 | else | ||
663 | seq_printf(m, ",lookupcache=pos"); | ||
664 | } | ||
658 | } | 665 | } |
659 | 666 | ||
660 | /* | 667 | /* |
diff --git a/fs/nfsd/Kconfig b/fs/nfsd/Kconfig index 503b9da159a3..95932f523aef 100644 --- a/fs/nfsd/Kconfig +++ b/fs/nfsd/Kconfig | |||
@@ -69,7 +69,6 @@ config NFSD_V4 | |||
69 | depends on NFSD && PROC_FS && EXPERIMENTAL | 69 | depends on NFSD && PROC_FS && EXPERIMENTAL |
70 | select NFSD_V3 | 70 | select NFSD_V3 |
71 | select FS_POSIX_ACL | 71 | select FS_POSIX_ACL |
72 | select RPCSEC_GSS_KRB5 | ||
73 | help | 72 | help |
74 | This option enables support in your system's NFS server for | 73 | This option enables support in your system's NFS server for |
75 | version 4 of the NFS protocol (RFC 3530). | 74 | version 4 of the NFS protocol (RFC 3530). |
diff --git a/fs/nfsd/nfs4state.c b/fs/nfsd/nfs4state.c index 2e7357104cfd..cf0d2ffb3c84 100644 --- a/fs/nfsd/nfs4state.c +++ b/fs/nfsd/nfs4state.c | |||
@@ -440,7 +440,7 @@ test_share(struct nfs4_stateid *stp, struct nfsd4_open *open) { | |||
440 | 440 | ||
441 | static int nfs4_access_to_omode(u32 access) | 441 | static int nfs4_access_to_omode(u32 access) |
442 | { | 442 | { |
443 | switch (access) { | 443 | switch (access & NFS4_SHARE_ACCESS_BOTH) { |
444 | case NFS4_SHARE_ACCESS_READ: | 444 | case NFS4_SHARE_ACCESS_READ: |
445 | return O_RDONLY; | 445 | return O_RDONLY; |
446 | case NFS4_SHARE_ACCESS_WRITE: | 446 | case NFS4_SHARE_ACCESS_WRITE: |
@@ -2450,14 +2450,13 @@ nfsd4_truncate(struct svc_rqst *rqstp, struct svc_fh *fh, | |||
2450 | static __be32 | 2450 | static __be32 |
2451 | nfs4_upgrade_open(struct svc_rqst *rqstp, struct nfs4_file *fp, struct svc_fh *cur_fh, struct nfs4_stateid *stp, struct nfsd4_open *open) | 2451 | nfs4_upgrade_open(struct svc_rqst *rqstp, struct nfs4_file *fp, struct svc_fh *cur_fh, struct nfs4_stateid *stp, struct nfsd4_open *open) |
2452 | { | 2452 | { |
2453 | u32 op_share_access, new_access; | 2453 | u32 op_share_access = open->op_share_access & ~NFS4_SHARE_WANT_MASK; |
2454 | bool new_access; | ||
2454 | __be32 status; | 2455 | __be32 status; |
2455 | 2456 | ||
2456 | set_access(&new_access, stp->st_access_bmap); | 2457 | new_access = !test_bit(op_share_access, &stp->st_access_bmap); |
2457 | new_access = (~new_access) & open->op_share_access & ~NFS4_SHARE_WANT_MASK; | ||
2458 | |||
2459 | if (new_access) { | 2458 | if (new_access) { |
2460 | status = nfs4_get_vfs_file(rqstp, fp, cur_fh, new_access); | 2459 | status = nfs4_get_vfs_file(rqstp, fp, cur_fh, op_share_access); |
2461 | if (status) | 2460 | if (status) |
2462 | return status; | 2461 | return status; |
2463 | } | 2462 | } |
@@ -2470,7 +2469,6 @@ nfs4_upgrade_open(struct svc_rqst *rqstp, struct nfs4_file *fp, struct svc_fh *c | |||
2470 | return status; | 2469 | return status; |
2471 | } | 2470 | } |
2472 | /* remember the open */ | 2471 | /* remember the open */ |
2473 | op_share_access = open->op_share_access & ~NFS4_SHARE_WANT_MASK; | ||
2474 | __set_bit(op_share_access, &stp->st_access_bmap); | 2472 | __set_bit(op_share_access, &stp->st_access_bmap); |
2475 | __set_bit(open->op_share_deny, &stp->st_deny_bmap); | 2473 | __set_bit(open->op_share_deny, &stp->st_deny_bmap); |
2476 | 2474 | ||
@@ -2983,7 +2981,6 @@ nfs4_preprocess_stateid_op(struct nfsd4_compound_state *cstate, | |||
2983 | *filpp = find_readable_file(stp->st_file); | 2981 | *filpp = find_readable_file(stp->st_file); |
2984 | else | 2982 | else |
2985 | *filpp = find_writeable_file(stp->st_file); | 2983 | *filpp = find_writeable_file(stp->st_file); |
2986 | BUG_ON(!*filpp); /* assured by check_openmode */ | ||
2987 | } | 2984 | } |
2988 | } | 2985 | } |
2989 | status = nfs_ok; | 2986 | status = nfs_ok; |
@@ -3561,7 +3558,8 @@ nfsd4_lock(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate, | |||
3561 | struct nfs4_stateowner *open_sop = NULL; | 3558 | struct nfs4_stateowner *open_sop = NULL; |
3562 | struct nfs4_stateowner *lock_sop = NULL; | 3559 | struct nfs4_stateowner *lock_sop = NULL; |
3563 | struct nfs4_stateid *lock_stp; | 3560 | struct nfs4_stateid *lock_stp; |
3564 | struct file *filp; | 3561 | struct nfs4_file *fp; |
3562 | struct file *filp = NULL; | ||
3565 | struct file_lock file_lock; | 3563 | struct file_lock file_lock; |
3566 | struct file_lock conflock; | 3564 | struct file_lock conflock; |
3567 | __be32 status = 0; | 3565 | __be32 status = 0; |
@@ -3591,7 +3589,6 @@ nfsd4_lock(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate, | |||
3591 | * lock stateid. | 3589 | * lock stateid. |
3592 | */ | 3590 | */ |
3593 | struct nfs4_stateid *open_stp = NULL; | 3591 | struct nfs4_stateid *open_stp = NULL; |
3594 | struct nfs4_file *fp; | ||
3595 | 3592 | ||
3596 | status = nfserr_stale_clientid; | 3593 | status = nfserr_stale_clientid; |
3597 | if (!nfsd4_has_session(cstate) && | 3594 | if (!nfsd4_has_session(cstate) && |
@@ -3634,6 +3631,7 @@ nfsd4_lock(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate, | |||
3634 | if (status) | 3631 | if (status) |
3635 | goto out; | 3632 | goto out; |
3636 | lock_sop = lock->lk_replay_owner; | 3633 | lock_sop = lock->lk_replay_owner; |
3634 | fp = lock_stp->st_file; | ||
3637 | } | 3635 | } |
3638 | /* lock->lk_replay_owner and lock_stp have been created or found */ | 3636 | /* lock->lk_replay_owner and lock_stp have been created or found */ |
3639 | 3637 | ||
@@ -3648,13 +3646,19 @@ nfsd4_lock(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate, | |||
3648 | switch (lock->lk_type) { | 3646 | switch (lock->lk_type) { |
3649 | case NFS4_READ_LT: | 3647 | case NFS4_READ_LT: |
3650 | case NFS4_READW_LT: | 3648 | case NFS4_READW_LT: |
3651 | filp = find_readable_file(lock_stp->st_file); | 3649 | if (find_readable_file(lock_stp->st_file)) { |
3650 | nfs4_get_vfs_file(rqstp, fp, &cstate->current_fh, NFS4_SHARE_ACCESS_READ); | ||
3651 | filp = find_readable_file(lock_stp->st_file); | ||
3652 | } | ||
3652 | file_lock.fl_type = F_RDLCK; | 3653 | file_lock.fl_type = F_RDLCK; |
3653 | cmd = F_SETLK; | 3654 | cmd = F_SETLK; |
3654 | break; | 3655 | break; |
3655 | case NFS4_WRITE_LT: | 3656 | case NFS4_WRITE_LT: |
3656 | case NFS4_WRITEW_LT: | 3657 | case NFS4_WRITEW_LT: |
3657 | filp = find_writeable_file(lock_stp->st_file); | 3658 | if (find_writeable_file(lock_stp->st_file)) { |
3659 | nfs4_get_vfs_file(rqstp, fp, &cstate->current_fh, NFS4_SHARE_ACCESS_WRITE); | ||
3660 | filp = find_writeable_file(lock_stp->st_file); | ||
3661 | } | ||
3658 | file_lock.fl_type = F_WRLCK; | 3662 | file_lock.fl_type = F_WRLCK; |
3659 | cmd = F_SETLK; | 3663 | cmd = F_SETLK; |
3660 | break; | 3664 | break; |
diff --git a/fs/nfsd/state.h b/fs/nfsd/state.h index 7731a75971dd..322518c88e4b 100644 --- a/fs/nfsd/state.h +++ b/fs/nfsd/state.h | |||
@@ -363,23 +363,23 @@ struct nfs4_file { | |||
363 | * at all? */ | 363 | * at all? */ |
364 | static inline struct file *find_writeable_file(struct nfs4_file *f) | 364 | static inline struct file *find_writeable_file(struct nfs4_file *f) |
365 | { | 365 | { |
366 | if (f->fi_fds[O_RDWR]) | 366 | if (f->fi_fds[O_WRONLY]) |
367 | return f->fi_fds[O_RDWR]; | 367 | return f->fi_fds[O_WRONLY]; |
368 | return f->fi_fds[O_WRONLY]; | 368 | return f->fi_fds[O_RDWR]; |
369 | } | 369 | } |
370 | 370 | ||
371 | static inline struct file *find_readable_file(struct nfs4_file *f) | 371 | static inline struct file *find_readable_file(struct nfs4_file *f) |
372 | { | 372 | { |
373 | if (f->fi_fds[O_RDWR]) | 373 | if (f->fi_fds[O_RDONLY]) |
374 | return f->fi_fds[O_RDWR]; | 374 | return f->fi_fds[O_RDONLY]; |
375 | return f->fi_fds[O_RDONLY]; | 375 | return f->fi_fds[O_RDWR]; |
376 | } | 376 | } |
377 | 377 | ||
378 | static inline struct file *find_any_file(struct nfs4_file *f) | 378 | static inline struct file *find_any_file(struct nfs4_file *f) |
379 | { | 379 | { |
380 | if (f->fi_fds[O_RDWR]) | 380 | if (f->fi_fds[O_RDWR]) |
381 | return f->fi_fds[O_RDWR]; | 381 | return f->fi_fds[O_RDWR]; |
382 | else if (f->fi_fds[O_RDWR]) | 382 | else if (f->fi_fds[O_WRONLY]) |
383 | return f->fi_fds[O_WRONLY]; | 383 | return f->fi_fds[O_WRONLY]; |
384 | else | 384 | else |
385 | return f->fi_fds[O_RDONLY]; | 385 | return f->fi_fds[O_RDONLY]; |
diff --git a/fs/nfsd/vfs.c b/fs/nfsd/vfs.c index 96360a83cb91..661a6cf8e826 100644 --- a/fs/nfsd/vfs.c +++ b/fs/nfsd/vfs.c | |||
@@ -2033,15 +2033,17 @@ out: | |||
2033 | __be32 | 2033 | __be32 |
2034 | nfsd_statfs(struct svc_rqst *rqstp, struct svc_fh *fhp, struct kstatfs *stat, int access) | 2034 | nfsd_statfs(struct svc_rqst *rqstp, struct svc_fh *fhp, struct kstatfs *stat, int access) |
2035 | { | 2035 | { |
2036 | struct path path = { | ||
2037 | .mnt = fhp->fh_export->ex_path.mnt, | ||
2038 | .dentry = fhp->fh_dentry, | ||
2039 | }; | ||
2040 | __be32 err; | 2036 | __be32 err; |
2041 | 2037 | ||
2042 | err = fh_verify(rqstp, fhp, 0, NFSD_MAY_NOP | access); | 2038 | err = fh_verify(rqstp, fhp, 0, NFSD_MAY_NOP | access); |
2043 | if (!err && vfs_statfs(&path, stat)) | 2039 | if (!err) { |
2044 | err = nfserr_io; | 2040 | struct path path = { |
2041 | .mnt = fhp->fh_export->ex_path.mnt, | ||
2042 | .dentry = fhp->fh_dentry, | ||
2043 | }; | ||
2044 | if (vfs_statfs(&path, stat)) | ||
2045 | err = nfserr_io; | ||
2046 | } | ||
2045 | return err; | 2047 | return err; |
2046 | } | 2048 | } |
2047 | 2049 | ||
diff --git a/fs/nilfs2/super.c b/fs/nilfs2/super.c index 1fa86b9df73b..922263393c76 100644 --- a/fs/nilfs2/super.c +++ b/fs/nilfs2/super.c | |||
@@ -175,24 +175,24 @@ static int nilfs_sync_super(struct nilfs_sb_info *sbi, int flag) | |||
175 | { | 175 | { |
176 | struct the_nilfs *nilfs = sbi->s_nilfs; | 176 | struct the_nilfs *nilfs = sbi->s_nilfs; |
177 | int err; | 177 | int err; |
178 | int barrier_done = 0; | ||
179 | 178 | ||
180 | if (nilfs_test_opt(sbi, BARRIER)) { | ||
181 | set_buffer_ordered(nilfs->ns_sbh[0]); | ||
182 | barrier_done = 1; | ||
183 | } | ||
184 | retry: | 179 | retry: |
185 | set_buffer_dirty(nilfs->ns_sbh[0]); | 180 | set_buffer_dirty(nilfs->ns_sbh[0]); |
186 | err = sync_dirty_buffer(nilfs->ns_sbh[0]); | 181 | |
187 | if (err == -EOPNOTSUPP && barrier_done) { | 182 | if (nilfs_test_opt(sbi, BARRIER)) { |
188 | nilfs_warning(sbi->s_super, __func__, | 183 | err = __sync_dirty_buffer(nilfs->ns_sbh[0], |
189 | "barrier-based sync failed. " | 184 | WRITE_SYNC | WRITE_BARRIER); |
190 | "disabling barriers\n"); | 185 | if (err == -EOPNOTSUPP) { |
191 | nilfs_clear_opt(sbi, BARRIER); | 186 | nilfs_warning(sbi->s_super, __func__, |
192 | barrier_done = 0; | 187 | "barrier-based sync failed. " |
193 | clear_buffer_ordered(nilfs->ns_sbh[0]); | 188 | "disabling barriers\n"); |
194 | goto retry; | 189 | nilfs_clear_opt(sbi, BARRIER); |
190 | goto retry; | ||
191 | } | ||
192 | } else { | ||
193 | err = sync_dirty_buffer(nilfs->ns_sbh[0]); | ||
195 | } | 194 | } |
195 | |||
196 | if (unlikely(err)) { | 196 | if (unlikely(err)) { |
197 | printk(KERN_ERR | 197 | printk(KERN_ERR |
198 | "NILFS: unable to write superblock (err=%d)\n", err); | 198 | "NILFS: unable to write superblock (err=%d)\n", err); |
@@ -400,9 +400,10 @@ int nilfs_attach_checkpoint(struct nilfs_sb_info *sbi, __u64 cno) | |||
400 | list_add(&sbi->s_list, &nilfs->ns_supers); | 400 | list_add(&sbi->s_list, &nilfs->ns_supers); |
401 | up_write(&nilfs->ns_super_sem); | 401 | up_write(&nilfs->ns_super_sem); |
402 | 402 | ||
403 | err = -ENOMEM; | ||
403 | sbi->s_ifile = nilfs_ifile_new(sbi, nilfs->ns_inode_size); | 404 | sbi->s_ifile = nilfs_ifile_new(sbi, nilfs->ns_inode_size); |
404 | if (!sbi->s_ifile) | 405 | if (!sbi->s_ifile) |
405 | return -ENOMEM; | 406 | goto delist; |
406 | 407 | ||
407 | down_read(&nilfs->ns_segctor_sem); | 408 | down_read(&nilfs->ns_segctor_sem); |
408 | err = nilfs_cpfile_get_checkpoint(nilfs->ns_cpfile, cno, 0, &raw_cp, | 409 | err = nilfs_cpfile_get_checkpoint(nilfs->ns_cpfile, cno, 0, &raw_cp, |
@@ -433,6 +434,7 @@ int nilfs_attach_checkpoint(struct nilfs_sb_info *sbi, __u64 cno) | |||
433 | nilfs_mdt_destroy(sbi->s_ifile); | 434 | nilfs_mdt_destroy(sbi->s_ifile); |
434 | sbi->s_ifile = NULL; | 435 | sbi->s_ifile = NULL; |
435 | 436 | ||
437 | delist: | ||
436 | down_write(&nilfs->ns_super_sem); | 438 | down_write(&nilfs->ns_super_sem); |
437 | list_del_init(&sbi->s_list); | 439 | list_del_init(&sbi->s_list); |
438 | up_write(&nilfs->ns_super_sem); | 440 | up_write(&nilfs->ns_super_sem); |
diff --git a/fs/nilfs2/the_nilfs.c b/fs/nilfs2/the_nilfs.c index 37de1f062d81..ba7c10c917fc 100644 --- a/fs/nilfs2/the_nilfs.c +++ b/fs/nilfs2/the_nilfs.c | |||
@@ -446,6 +446,7 @@ int load_nilfs(struct the_nilfs *nilfs, struct nilfs_sb_info *sbi) | |||
446 | nilfs_mdt_destroy(nilfs->ns_cpfile); | 446 | nilfs_mdt_destroy(nilfs->ns_cpfile); |
447 | nilfs_mdt_destroy(nilfs->ns_sufile); | 447 | nilfs_mdt_destroy(nilfs->ns_sufile); |
448 | nilfs_mdt_destroy(nilfs->ns_dat); | 448 | nilfs_mdt_destroy(nilfs->ns_dat); |
449 | nilfs_mdt_destroy(nilfs->ns_gc_dat); | ||
449 | 450 | ||
450 | failed: | 451 | failed: |
451 | nilfs_clear_recovery_info(&ri); | 452 | nilfs_clear_recovery_info(&ri); |
@@ -608,11 +609,11 @@ static int nilfs_load_super_block(struct the_nilfs *nilfs, | |||
608 | return -EINVAL; | 609 | return -EINVAL; |
609 | } | 610 | } |
610 | 611 | ||
611 | if (swp) { | 612 | if (!valid[!swp]) |
612 | printk(KERN_WARNING "NILFS warning: broken superblock. " | 613 | printk(KERN_WARNING "NILFS warning: broken superblock. " |
613 | "using spare superblock.\n"); | 614 | "using spare superblock.\n"); |
615 | if (swp) | ||
614 | nilfs_swap_super_block(nilfs); | 616 | nilfs_swap_super_block(nilfs); |
615 | } | ||
616 | 617 | ||
617 | nilfs->ns_sbwcount = 0; | 618 | nilfs->ns_sbwcount = 0; |
618 | nilfs->ns_sbwtime = le64_to_cpu(sbp[0]->s_wtime); | 619 | nilfs->ns_sbwtime = le64_to_cpu(sbp[0]->s_wtime); |
@@ -775,6 +776,7 @@ int nilfs_discard_segments(struct the_nilfs *nilfs, __u64 *segnump, | |||
775 | start * sects_per_block, | 776 | start * sects_per_block, |
776 | nblocks * sects_per_block, | 777 | nblocks * sects_per_block, |
777 | GFP_NOFS, | 778 | GFP_NOFS, |
779 | BLKDEV_IFL_WAIT | | ||
778 | BLKDEV_IFL_BARRIER); | 780 | BLKDEV_IFL_BARRIER); |
779 | if (ret < 0) | 781 | if (ret < 0) |
780 | return ret; | 782 | return ret; |
@@ -785,7 +787,8 @@ int nilfs_discard_segments(struct the_nilfs *nilfs, __u64 *segnump, | |||
785 | ret = blkdev_issue_discard(nilfs->ns_bdev, | 787 | ret = blkdev_issue_discard(nilfs->ns_bdev, |
786 | start * sects_per_block, | 788 | start * sects_per_block, |
787 | nblocks * sects_per_block, | 789 | nblocks * sects_per_block, |
788 | GFP_NOFS, BLKDEV_IFL_BARRIER); | 790 | GFP_NOFS, |
791 | BLKDEV_IFL_WAIT | BLKDEV_IFL_BARRIER); | ||
789 | return ret; | 792 | return ret; |
790 | } | 793 | } |
791 | 794 | ||
diff --git a/fs/notify/fanotify/fanotify.c b/fs/notify/fanotify/fanotify.c index eb8f73c9c131..85366c78cc37 100644 --- a/fs/notify/fanotify/fanotify.c +++ b/fs/notify/fanotify/fanotify.c | |||
@@ -17,9 +17,9 @@ static bool should_merge(struct fsnotify_event *old, struct fsnotify_event *new) | |||
17 | old->data_type == new->data_type && | 17 | old->data_type == new->data_type && |
18 | old->tgid == new->tgid) { | 18 | old->tgid == new->tgid) { |
19 | switch (old->data_type) { | 19 | switch (old->data_type) { |
20 | case (FSNOTIFY_EVENT_FILE): | 20 | case (FSNOTIFY_EVENT_PATH): |
21 | if ((old->file->f_path.mnt == new->file->f_path.mnt) && | 21 | if ((old->path.mnt == new->path.mnt) && |
22 | (old->file->f_path.dentry == new->file->f_path.dentry)) | 22 | (old->path.dentry == new->path.dentry)) |
23 | return true; | 23 | return true; |
24 | case (FSNOTIFY_EVENT_NONE): | 24 | case (FSNOTIFY_EVENT_NONE): |
25 | return true; | 25 | return true; |
@@ -165,16 +165,13 @@ static bool fanotify_should_send_event(struct fsnotify_group *group, | |||
165 | "mask=%x data=%p data_type=%d\n", __func__, group, to_tell, | 165 | "mask=%x data=%p data_type=%d\n", __func__, group, to_tell, |
166 | inode_mark, vfsmnt_mark, event_mask, data, data_type); | 166 | inode_mark, vfsmnt_mark, event_mask, data, data_type); |
167 | 167 | ||
168 | pr_debug("%s: group=%p vfsmount_mark=%p inode_mark=%p mask=%x\n", | ||
169 | __func__, group, vfsmnt_mark, inode_mark, event_mask); | ||
170 | |||
171 | /* sorry, fanotify only gives a damn about files and dirs */ | 168 | /* sorry, fanotify only gives a damn about files and dirs */ |
172 | if (!S_ISREG(to_tell->i_mode) && | 169 | if (!S_ISREG(to_tell->i_mode) && |
173 | !S_ISDIR(to_tell->i_mode)) | 170 | !S_ISDIR(to_tell->i_mode)) |
174 | return false; | 171 | return false; |
175 | 172 | ||
176 | /* if we don't have enough info to send an event to userspace say no */ | 173 | /* if we don't have enough info to send an event to userspace say no */ |
177 | if (data_type != FSNOTIFY_EVENT_FILE) | 174 | if (data_type != FSNOTIFY_EVENT_PATH) |
178 | return false; | 175 | return false; |
179 | 176 | ||
180 | if (inode_mark && vfsmnt_mark) { | 177 | if (inode_mark && vfsmnt_mark) { |
diff --git a/fs/notify/fanotify/fanotify_user.c b/fs/notify/fanotify/fanotify_user.c index 25a3b4dfcf61..5ed8e58d7bfc 100644 --- a/fs/notify/fanotify/fanotify_user.c +++ b/fs/notify/fanotify/fanotify_user.c | |||
@@ -65,7 +65,7 @@ static int create_fd(struct fsnotify_group *group, struct fsnotify_event *event) | |||
65 | if (client_fd < 0) | 65 | if (client_fd < 0) |
66 | return client_fd; | 66 | return client_fd; |
67 | 67 | ||
68 | if (event->data_type != FSNOTIFY_EVENT_FILE) { | 68 | if (event->data_type != FSNOTIFY_EVENT_PATH) { |
69 | WARN_ON(1); | 69 | WARN_ON(1); |
70 | put_unused_fd(client_fd); | 70 | put_unused_fd(client_fd); |
71 | return -EINVAL; | 71 | return -EINVAL; |
@@ -75,8 +75,8 @@ static int create_fd(struct fsnotify_group *group, struct fsnotify_event *event) | |||
75 | * we need a new file handle for the userspace program so it can read even if it was | 75 | * we need a new file handle for the userspace program so it can read even if it was |
76 | * originally opened O_WRONLY. | 76 | * originally opened O_WRONLY. |
77 | */ | 77 | */ |
78 | dentry = dget(event->file->f_path.dentry); | 78 | dentry = dget(event->path.dentry); |
79 | mnt = mntget(event->file->f_path.mnt); | 79 | mnt = mntget(event->path.mnt); |
80 | /* it's possible this event was an overflow event. in that case dentry and mnt | 80 | /* it's possible this event was an overflow event. in that case dentry and mnt |
81 | * are NULL; That's fine, just don't call dentry open */ | 81 | * are NULL; That's fine, just don't call dentry open */ |
82 | if (dentry && mnt) | 82 | if (dentry && mnt) |
@@ -195,6 +195,14 @@ static int prepare_for_access_response(struct fsnotify_group *group, | |||
195 | re->fd = fd; | 195 | re->fd = fd; |
196 | 196 | ||
197 | mutex_lock(&group->fanotify_data.access_mutex); | 197 | mutex_lock(&group->fanotify_data.access_mutex); |
198 | |||
199 | if (group->fanotify_data.bypass_perm) { | ||
200 | mutex_unlock(&group->fanotify_data.access_mutex); | ||
201 | kmem_cache_free(fanotify_response_event_cache, re); | ||
202 | event->response = FAN_ALLOW; | ||
203 | return 0; | ||
204 | } | ||
205 | |||
198 | list_add_tail(&re->list, &group->fanotify_data.access_list); | 206 | list_add_tail(&re->list, &group->fanotify_data.access_list); |
199 | mutex_unlock(&group->fanotify_data.access_mutex); | 207 | mutex_unlock(&group->fanotify_data.access_mutex); |
200 | 208 | ||
@@ -364,9 +372,28 @@ static ssize_t fanotify_write(struct file *file, const char __user *buf, size_t | |||
364 | static int fanotify_release(struct inode *ignored, struct file *file) | 372 | static int fanotify_release(struct inode *ignored, struct file *file) |
365 | { | 373 | { |
366 | struct fsnotify_group *group = file->private_data; | 374 | struct fsnotify_group *group = file->private_data; |
375 | struct fanotify_response_event *re, *lre; | ||
367 | 376 | ||
368 | pr_debug("%s: file=%p group=%p\n", __func__, file, group); | 377 | pr_debug("%s: file=%p group=%p\n", __func__, file, group); |
369 | 378 | ||
379 | #ifdef CONFIG_FANOTIFY_ACCESS_PERMISSIONS | ||
380 | mutex_lock(&group->fanotify_data.access_mutex); | ||
381 | |||
382 | group->fanotify_data.bypass_perm = true; | ||
383 | |||
384 | list_for_each_entry_safe(re, lre, &group->fanotify_data.access_list, list) { | ||
385 | pr_debug("%s: found group=%p re=%p event=%p\n", __func__, group, | ||
386 | re, re->event); | ||
387 | |||
388 | list_del_init(&re->list); | ||
389 | re->event->response = FAN_ALLOW; | ||
390 | |||
391 | kmem_cache_free(fanotify_response_event_cache, re); | ||
392 | } | ||
393 | mutex_unlock(&group->fanotify_data.access_mutex); | ||
394 | |||
395 | wake_up(&group->fanotify_data.access_waitq); | ||
396 | #endif | ||
370 | /* matches the fanotify_init->fsnotify_alloc_group */ | 397 | /* matches the fanotify_init->fsnotify_alloc_group */ |
371 | fsnotify_put_group(group); | 398 | fsnotify_put_group(group); |
372 | 399 | ||
@@ -614,7 +641,7 @@ SYSCALL_DEFINE2(fanotify_init, unsigned int, flags, unsigned int, event_f_flags) | |||
614 | __func__, flags, event_f_flags); | 641 | __func__, flags, event_f_flags); |
615 | 642 | ||
616 | if (!capable(CAP_SYS_ADMIN)) | 643 | if (!capable(CAP_SYS_ADMIN)) |
617 | return -EACCES; | 644 | return -EPERM; |
618 | 645 | ||
619 | if (flags & ~FAN_ALL_INIT_FLAGS) | 646 | if (flags & ~FAN_ALL_INIT_FLAGS) |
620 | return -EINVAL; | 647 | return -EINVAL; |
diff --git a/fs/notify/fsnotify.c b/fs/notify/fsnotify.c index 4d2a82c1ceb1..36802420d69a 100644 --- a/fs/notify/fsnotify.c +++ b/fs/notify/fsnotify.c | |||
@@ -84,7 +84,7 @@ void __fsnotify_update_child_dentry_flags(struct inode *inode) | |||
84 | } | 84 | } |
85 | 85 | ||
86 | /* Notify this dentry's parent about a child's events. */ | 86 | /* Notify this dentry's parent about a child's events. */ |
87 | void __fsnotify_parent(struct file *file, struct dentry *dentry, __u32 mask) | 87 | void __fsnotify_parent(struct path *path, struct dentry *dentry, __u32 mask) |
88 | { | 88 | { |
89 | struct dentry *parent; | 89 | struct dentry *parent; |
90 | struct inode *p_inode; | 90 | struct inode *p_inode; |
@@ -92,7 +92,7 @@ void __fsnotify_parent(struct file *file, struct dentry *dentry, __u32 mask) | |||
92 | bool should_update_children = false; | 92 | bool should_update_children = false; |
93 | 93 | ||
94 | if (!dentry) | 94 | if (!dentry) |
95 | dentry = file->f_path.dentry; | 95 | dentry = path->dentry; |
96 | 96 | ||
97 | if (!(dentry->d_flags & DCACHE_FSNOTIFY_PARENT_WATCHED)) | 97 | if (!(dentry->d_flags & DCACHE_FSNOTIFY_PARENT_WATCHED)) |
98 | return; | 98 | return; |
@@ -124,8 +124,8 @@ void __fsnotify_parent(struct file *file, struct dentry *dentry, __u32 mask) | |||
124 | * specifies these are events which came from a child. */ | 124 | * specifies these are events which came from a child. */ |
125 | mask |= FS_EVENT_ON_CHILD; | 125 | mask |= FS_EVENT_ON_CHILD; |
126 | 126 | ||
127 | if (file) | 127 | if (path) |
128 | fsnotify(p_inode, mask, file, FSNOTIFY_EVENT_FILE, | 128 | fsnotify(p_inode, mask, path, FSNOTIFY_EVENT_PATH, |
129 | dentry->d_name.name, 0); | 129 | dentry->d_name.name, 0); |
130 | else | 130 | else |
131 | fsnotify(p_inode, mask, dentry->d_inode, FSNOTIFY_EVENT_INODE, | 131 | fsnotify(p_inode, mask, dentry->d_inode, FSNOTIFY_EVENT_INODE, |
@@ -148,13 +148,14 @@ static int send_to_group(struct inode *to_tell, struct vfsmount *mnt, | |||
148 | const unsigned char *file_name, | 148 | const unsigned char *file_name, |
149 | struct fsnotify_event **event) | 149 | struct fsnotify_event **event) |
150 | { | 150 | { |
151 | struct fsnotify_group *group = inode_mark->group; | 151 | struct fsnotify_group *group = NULL; |
152 | __u32 inode_test_mask = (mask & ~FS_EVENT_ON_CHILD); | 152 | __u32 inode_test_mask = 0; |
153 | __u32 vfsmount_test_mask = (mask & ~FS_EVENT_ON_CHILD); | 153 | __u32 vfsmount_test_mask = 0; |
154 | 154 | ||
155 | pr_debug("%s: group=%p to_tell=%p mnt=%p mark=%p mask=%x data=%p" | 155 | if (unlikely(!inode_mark && !vfsmount_mark)) { |
156 | " data_is=%d cookie=%d event=%p\n", __func__, group, to_tell, | 156 | BUG(); |
157 | mnt, inode_mark, mask, data, data_is, cookie, *event); | 157 | return 0; |
158 | } | ||
158 | 159 | ||
159 | /* clear ignored on inode modification */ | 160 | /* clear ignored on inode modification */ |
160 | if (mask & FS_MODIFY) { | 161 | if (mask & FS_MODIFY) { |
@@ -168,18 +169,29 @@ static int send_to_group(struct inode *to_tell, struct vfsmount *mnt, | |||
168 | 169 | ||
169 | /* does the inode mark tell us to do something? */ | 170 | /* does the inode mark tell us to do something? */ |
170 | if (inode_mark) { | 171 | if (inode_mark) { |
172 | group = inode_mark->group; | ||
173 | inode_test_mask = (mask & ~FS_EVENT_ON_CHILD); | ||
171 | inode_test_mask &= inode_mark->mask; | 174 | inode_test_mask &= inode_mark->mask; |
172 | inode_test_mask &= ~inode_mark->ignored_mask; | 175 | inode_test_mask &= ~inode_mark->ignored_mask; |
173 | } | 176 | } |
174 | 177 | ||
175 | /* does the vfsmount_mark tell us to do something? */ | 178 | /* does the vfsmount_mark tell us to do something? */ |
176 | if (vfsmount_mark) { | 179 | if (vfsmount_mark) { |
180 | vfsmount_test_mask = (mask & ~FS_EVENT_ON_CHILD); | ||
181 | group = vfsmount_mark->group; | ||
177 | vfsmount_test_mask &= vfsmount_mark->mask; | 182 | vfsmount_test_mask &= vfsmount_mark->mask; |
178 | vfsmount_test_mask &= ~vfsmount_mark->ignored_mask; | 183 | vfsmount_test_mask &= ~vfsmount_mark->ignored_mask; |
179 | if (inode_mark) | 184 | if (inode_mark) |
180 | vfsmount_test_mask &= ~inode_mark->ignored_mask; | 185 | vfsmount_test_mask &= ~inode_mark->ignored_mask; |
181 | } | 186 | } |
182 | 187 | ||
188 | pr_debug("%s: group=%p to_tell=%p mnt=%p mask=%x inode_mark=%p" | ||
189 | " inode_test_mask=%x vfsmount_mark=%p vfsmount_test_mask=%x" | ||
190 | " data=%p data_is=%d cookie=%d event=%p\n", | ||
191 | __func__, group, to_tell, mnt, mask, inode_mark, | ||
192 | inode_test_mask, vfsmount_mark, vfsmount_test_mask, data, | ||
193 | data_is, cookie, *event); | ||
194 | |||
183 | if (!inode_test_mask && !vfsmount_test_mask) | 195 | if (!inode_test_mask && !vfsmount_test_mask) |
184 | return 0; | 196 | return 0; |
185 | 197 | ||
@@ -207,18 +219,17 @@ static int send_to_group(struct inode *to_tell, struct vfsmount *mnt, | |||
207 | int fsnotify(struct inode *to_tell, __u32 mask, void *data, int data_is, | 219 | int fsnotify(struct inode *to_tell, __u32 mask, void *data, int data_is, |
208 | const unsigned char *file_name, u32 cookie) | 220 | const unsigned char *file_name, u32 cookie) |
209 | { | 221 | { |
210 | struct hlist_node *inode_node, *vfsmount_node; | 222 | struct hlist_node *inode_node = NULL, *vfsmount_node = NULL; |
211 | struct fsnotify_mark *inode_mark = NULL, *vfsmount_mark = NULL; | 223 | struct fsnotify_mark *inode_mark = NULL, *vfsmount_mark = NULL; |
212 | struct fsnotify_group *inode_group, *vfsmount_group; | 224 | struct fsnotify_group *inode_group, *vfsmount_group; |
213 | struct fsnotify_event *event = NULL; | 225 | struct fsnotify_event *event = NULL; |
214 | struct vfsmount *mnt; | 226 | struct vfsmount *mnt; |
215 | int idx, ret = 0; | 227 | int idx, ret = 0; |
216 | bool used_inode = false, used_vfsmount = false; | ||
217 | /* global tests shouldn't care about events on child only the specific event */ | 228 | /* global tests shouldn't care about events on child only the specific event */ |
218 | __u32 test_mask = (mask & ~FS_EVENT_ON_CHILD); | 229 | __u32 test_mask = (mask & ~FS_EVENT_ON_CHILD); |
219 | 230 | ||
220 | if (data_is == FSNOTIFY_EVENT_FILE) | 231 | if (data_is == FSNOTIFY_EVENT_PATH) |
221 | mnt = ((struct file *)data)->f_path.mnt; | 232 | mnt = ((struct path *)data)->mnt; |
222 | else | 233 | else |
223 | mnt = NULL; | 234 | mnt = NULL; |
224 | 235 | ||
@@ -238,57 +249,50 @@ int fsnotify(struct inode *to_tell, __u32 mask, void *data, int data_is, | |||
238 | (test_mask & to_tell->i_fsnotify_mask)) | 249 | (test_mask & to_tell->i_fsnotify_mask)) |
239 | inode_node = srcu_dereference(to_tell->i_fsnotify_marks.first, | 250 | inode_node = srcu_dereference(to_tell->i_fsnotify_marks.first, |
240 | &fsnotify_mark_srcu); | 251 | &fsnotify_mark_srcu); |
241 | else | ||
242 | inode_node = NULL; | ||
243 | 252 | ||
244 | if (mnt) { | 253 | if (mnt && ((mask & FS_MODIFY) || |
245 | if ((mask & FS_MODIFY) || | 254 | (test_mask & mnt->mnt_fsnotify_mask))) { |
246 | (test_mask & mnt->mnt_fsnotify_mask)) | 255 | vfsmount_node = srcu_dereference(mnt->mnt_fsnotify_marks.first, |
247 | vfsmount_node = srcu_dereference(mnt->mnt_fsnotify_marks.first, | 256 | &fsnotify_mark_srcu); |
248 | &fsnotify_mark_srcu); | 257 | inode_node = srcu_dereference(to_tell->i_fsnotify_marks.first, |
249 | else | 258 | &fsnotify_mark_srcu); |
250 | vfsmount_node = NULL; | ||
251 | } else { | ||
252 | mnt = NULL; | ||
253 | vfsmount_node = NULL; | ||
254 | } | 259 | } |
255 | 260 | ||
256 | while (inode_node || vfsmount_node) { | 261 | while (inode_node || vfsmount_node) { |
262 | inode_group = vfsmount_group = NULL; | ||
263 | |||
257 | if (inode_node) { | 264 | if (inode_node) { |
258 | inode_mark = hlist_entry(srcu_dereference(inode_node, &fsnotify_mark_srcu), | 265 | inode_mark = hlist_entry(srcu_dereference(inode_node, &fsnotify_mark_srcu), |
259 | struct fsnotify_mark, i.i_list); | 266 | struct fsnotify_mark, i.i_list); |
260 | inode_group = inode_mark->group; | 267 | inode_group = inode_mark->group; |
261 | } else | 268 | } |
262 | inode_group = (void *)-1; | ||
263 | 269 | ||
264 | if (vfsmount_node) { | 270 | if (vfsmount_node) { |
265 | vfsmount_mark = hlist_entry(srcu_dereference(vfsmount_node, &fsnotify_mark_srcu), | 271 | vfsmount_mark = hlist_entry(srcu_dereference(vfsmount_node, &fsnotify_mark_srcu), |
266 | struct fsnotify_mark, m.m_list); | 272 | struct fsnotify_mark, m.m_list); |
267 | vfsmount_group = vfsmount_mark->group; | 273 | vfsmount_group = vfsmount_mark->group; |
268 | } else | 274 | } |
269 | vfsmount_group = (void *)-1; | ||
270 | 275 | ||
271 | if (inode_group < vfsmount_group) { | 276 | if (inode_group > vfsmount_group) { |
272 | /* handle inode */ | 277 | /* handle inode */ |
273 | send_to_group(to_tell, NULL, inode_mark, NULL, mask, data, | 278 | send_to_group(to_tell, NULL, inode_mark, NULL, mask, data, |
274 | data_is, cookie, file_name, &event); | 279 | data_is, cookie, file_name, &event); |
275 | used_inode = true; | 280 | /* we didn't use the vfsmount_mark */ |
276 | } else if (vfsmount_group < inode_group) { | 281 | vfsmount_group = NULL; |
282 | } else if (vfsmount_group > inode_group) { | ||
277 | send_to_group(to_tell, mnt, NULL, vfsmount_mark, mask, data, | 283 | send_to_group(to_tell, mnt, NULL, vfsmount_mark, mask, data, |
278 | data_is, cookie, file_name, &event); | 284 | data_is, cookie, file_name, &event); |
279 | used_vfsmount = true; | 285 | inode_group = NULL; |
280 | } else { | 286 | } else { |
281 | send_to_group(to_tell, mnt, inode_mark, vfsmount_mark, | 287 | send_to_group(to_tell, mnt, inode_mark, vfsmount_mark, |
282 | mask, data, data_is, cookie, file_name, | 288 | mask, data, data_is, cookie, file_name, |
283 | &event); | 289 | &event); |
284 | used_vfsmount = true; | ||
285 | used_inode = true; | ||
286 | } | 290 | } |
287 | 291 | ||
288 | if (used_inode) | 292 | if (inode_group) |
289 | inode_node = srcu_dereference(inode_node->next, | 293 | inode_node = srcu_dereference(inode_node->next, |
290 | &fsnotify_mark_srcu); | 294 | &fsnotify_mark_srcu); |
291 | if (used_vfsmount) | 295 | if (vfsmount_group) |
292 | vfsmount_node = srcu_dereference(vfsmount_node->next, | 296 | vfsmount_node = srcu_dereference(vfsmount_node->next, |
293 | &fsnotify_mark_srcu); | 297 | &fsnotify_mark_srcu); |
294 | } | 298 | } |
diff --git a/fs/notify/inotify/inotify_fsnotify.c b/fs/notify/inotify/inotify_fsnotify.c index 5e73eeb2c697..a91b69a6a291 100644 --- a/fs/notify/inotify/inotify_fsnotify.c +++ b/fs/notify/inotify/inotify_fsnotify.c | |||
@@ -52,9 +52,9 @@ static bool event_compare(struct fsnotify_event *old, struct fsnotify_event *new | |||
52 | !strcmp(old->file_name, new->file_name)) | 52 | !strcmp(old->file_name, new->file_name)) |
53 | return true; | 53 | return true; |
54 | break; | 54 | break; |
55 | case (FSNOTIFY_EVENT_FILE): | 55 | case (FSNOTIFY_EVENT_PATH): |
56 | if ((old->file->f_path.mnt == new->file->f_path.mnt) && | 56 | if ((old->path.mnt == new->path.mnt) && |
57 | (old->file->f_path.dentry == new->file->f_path.dentry)) | 57 | (old->path.dentry == new->path.dentry)) |
58 | return true; | 58 | return true; |
59 | break; | 59 | break; |
60 | case (FSNOTIFY_EVENT_NONE): | 60 | case (FSNOTIFY_EVENT_NONE): |
@@ -147,10 +147,10 @@ static bool inotify_should_send_event(struct fsnotify_group *group, struct inode | |||
147 | __u32 mask, void *data, int data_type) | 147 | __u32 mask, void *data, int data_type) |
148 | { | 148 | { |
149 | if ((inode_mark->mask & FS_EXCL_UNLINK) && | 149 | if ((inode_mark->mask & FS_EXCL_UNLINK) && |
150 | (data_type == FSNOTIFY_EVENT_FILE)) { | 150 | (data_type == FSNOTIFY_EVENT_PATH)) { |
151 | struct file *file = data; | 151 | struct path *path = data; |
152 | 152 | ||
153 | if (d_unlinked(file->f_path.dentry)) | 153 | if (d_unlinked(path->dentry)) |
154 | return false; | 154 | return false; |
155 | } | 155 | } |
156 | 156 | ||
diff --git a/fs/notify/notification.c b/fs/notify/notification.c index d6c435adc7a2..f39260f8f865 100644 --- a/fs/notify/notification.c +++ b/fs/notify/notification.c | |||
@@ -31,7 +31,6 @@ | |||
31 | * allocated and used. | 31 | * allocated and used. |
32 | */ | 32 | */ |
33 | 33 | ||
34 | #include <linux/file.h> | ||
35 | #include <linux/fs.h> | 34 | #include <linux/fs.h> |
36 | #include <linux/init.h> | 35 | #include <linux/init.h> |
37 | #include <linux/kernel.h> | 36 | #include <linux/kernel.h> |
@@ -90,8 +89,8 @@ void fsnotify_put_event(struct fsnotify_event *event) | |||
90 | if (atomic_dec_and_test(&event->refcnt)) { | 89 | if (atomic_dec_and_test(&event->refcnt)) { |
91 | pr_debug("%s: event=%p\n", __func__, event); | 90 | pr_debug("%s: event=%p\n", __func__, event); |
92 | 91 | ||
93 | if (event->data_type == FSNOTIFY_EVENT_FILE) | 92 | if (event->data_type == FSNOTIFY_EVENT_PATH) |
94 | fput(event->file); | 93 | path_put(&event->path); |
95 | 94 | ||
96 | BUG_ON(!list_empty(&event->private_data_list)); | 95 | BUG_ON(!list_empty(&event->private_data_list)); |
97 | 96 | ||
@@ -376,8 +375,8 @@ struct fsnotify_event *fsnotify_clone_event(struct fsnotify_event *old_event) | |||
376 | } | 375 | } |
377 | } | 376 | } |
378 | event->tgid = get_pid(old_event->tgid); | 377 | event->tgid = get_pid(old_event->tgid); |
379 | if (event->data_type == FSNOTIFY_EVENT_FILE) | 378 | if (event->data_type == FSNOTIFY_EVENT_PATH) |
380 | get_file(event->file); | 379 | path_get(&event->path); |
381 | 380 | ||
382 | return event; | 381 | return event; |
383 | } | 382 | } |
@@ -424,22 +423,11 @@ struct fsnotify_event *fsnotify_create_event(struct inode *to_tell, __u32 mask, | |||
424 | event->data_type = data_type; | 423 | event->data_type = data_type; |
425 | 424 | ||
426 | switch (data_type) { | 425 | switch (data_type) { |
427 | case FSNOTIFY_EVENT_FILE: { | 426 | case FSNOTIFY_EVENT_PATH: { |
428 | event->file = data; | 427 | struct path *path = data; |
429 | /* | 428 | event->path.dentry = path->dentry; |
430 | * if this file is about to disappear hold an extra reference | 429 | event->path.mnt = path->mnt; |
431 | * until we return to __fput so we don't have to worry about | 430 | path_get(&event->path); |
432 | * future get/put destroying the file under us or generating | ||
433 | * additional events. Notice that we change f_mode without | ||
434 | * holding f_lock. This is safe since this is the only possible | ||
435 | * reference to this object in the kernel (it was about to be | ||
436 | * freed, remember?) | ||
437 | */ | ||
438 | if (!atomic_long_read(&event->file->f_count)) { | ||
439 | event->file->f_mode |= FMODE_NONOTIFY; | ||
440 | get_file(event->file); | ||
441 | } | ||
442 | get_file(event->file); | ||
443 | break; | 431 | break; |
444 | } | 432 | } |
445 | case FSNOTIFY_EVENT_INODE: | 433 | case FSNOTIFY_EVENT_INODE: |
@@ -447,7 +435,8 @@ struct fsnotify_event *fsnotify_create_event(struct inode *to_tell, __u32 mask, | |||
447 | break; | 435 | break; |
448 | case FSNOTIFY_EVENT_NONE: | 436 | case FSNOTIFY_EVENT_NONE: |
449 | event->inode = NULL; | 437 | event->inode = NULL; |
450 | event->file = NULL; | 438 | event->path.dentry = NULL; |
439 | event->path.mnt = NULL; | ||
451 | break; | 440 | break; |
452 | default: | 441 | default: |
453 | BUG(); | 442 | BUG(); |
diff --git a/fs/ocfs2/acl.c b/fs/ocfs2/acl.c index da702294d7e7..a76e0aa5cd3f 100644 --- a/fs/ocfs2/acl.c +++ b/fs/ocfs2/acl.c | |||
@@ -290,12 +290,30 @@ static int ocfs2_set_acl(handle_t *handle, | |||
290 | 290 | ||
291 | int ocfs2_check_acl(struct inode *inode, int mask) | 291 | int ocfs2_check_acl(struct inode *inode, int mask) |
292 | { | 292 | { |
293 | struct posix_acl *acl = ocfs2_get_acl(inode, ACL_TYPE_ACCESS); | 293 | struct ocfs2_super *osb = OCFS2_SB(inode->i_sb); |
294 | struct buffer_head *di_bh = NULL; | ||
295 | struct posix_acl *acl; | ||
296 | int ret = -EAGAIN; | ||
294 | 297 | ||
295 | if (IS_ERR(acl)) | 298 | if (!(osb->s_mount_opt & OCFS2_MOUNT_POSIX_ACL)) |
299 | return ret; | ||
300 | |||
301 | ret = ocfs2_read_inode_block(inode, &di_bh); | ||
302 | if (ret < 0) { | ||
303 | mlog_errno(ret); | ||
304 | return ret; | ||
305 | } | ||
306 | |||
307 | acl = ocfs2_get_acl_nolock(inode, ACL_TYPE_ACCESS, di_bh); | ||
308 | |||
309 | brelse(di_bh); | ||
310 | |||
311 | if (IS_ERR(acl)) { | ||
312 | mlog_errno(PTR_ERR(acl)); | ||
296 | return PTR_ERR(acl); | 313 | return PTR_ERR(acl); |
314 | } | ||
297 | if (acl) { | 315 | if (acl) { |
298 | int ret = posix_acl_permission(inode, acl, mask); | 316 | ret = posix_acl_permission(inode, acl, mask); |
299 | posix_acl_release(acl); | 317 | posix_acl_release(acl); |
300 | return ret; | 318 | return ret; |
301 | } | 319 | } |
@@ -344,7 +362,7 @@ int ocfs2_init_acl(handle_t *handle, | |||
344 | { | 362 | { |
345 | struct ocfs2_super *osb = OCFS2_SB(inode->i_sb); | 363 | struct ocfs2_super *osb = OCFS2_SB(inode->i_sb); |
346 | struct posix_acl *acl = NULL; | 364 | struct posix_acl *acl = NULL; |
347 | int ret = 0; | 365 | int ret = 0, ret2; |
348 | mode_t mode; | 366 | mode_t mode; |
349 | 367 | ||
350 | if (!S_ISLNK(inode->i_mode)) { | 368 | if (!S_ISLNK(inode->i_mode)) { |
@@ -381,7 +399,12 @@ int ocfs2_init_acl(handle_t *handle, | |||
381 | mode = inode->i_mode; | 399 | mode = inode->i_mode; |
382 | ret = posix_acl_create_masq(clone, &mode); | 400 | ret = posix_acl_create_masq(clone, &mode); |
383 | if (ret >= 0) { | 401 | if (ret >= 0) { |
384 | ret = ocfs2_acl_set_mode(inode, di_bh, handle, mode); | 402 | ret2 = ocfs2_acl_set_mode(inode, di_bh, handle, mode); |
403 | if (ret2) { | ||
404 | mlog_errno(ret2); | ||
405 | ret = ret2; | ||
406 | goto cleanup; | ||
407 | } | ||
385 | if (ret > 0) { | 408 | if (ret > 0) { |
386 | ret = ocfs2_set_acl(handle, inode, | 409 | ret = ocfs2_set_acl(handle, inode, |
387 | di_bh, ACL_TYPE_ACCESS, | 410 | di_bh, ACL_TYPE_ACCESS, |
diff --git a/fs/ocfs2/alloc.c b/fs/ocfs2/alloc.c index 215e12ce1d85..592fae5007d1 100644 --- a/fs/ocfs2/alloc.c +++ b/fs/ocfs2/alloc.c | |||
@@ -6672,7 +6672,7 @@ int ocfs2_grab_pages(struct inode *inode, loff_t start, loff_t end, | |||
6672 | last_page_bytes = PAGE_ALIGN(end); | 6672 | last_page_bytes = PAGE_ALIGN(end); |
6673 | index = start >> PAGE_CACHE_SHIFT; | 6673 | index = start >> PAGE_CACHE_SHIFT; |
6674 | do { | 6674 | do { |
6675 | pages[numpages] = grab_cache_page(mapping, index); | 6675 | pages[numpages] = find_or_create_page(mapping, index, GFP_NOFS); |
6676 | if (!pages[numpages]) { | 6676 | if (!pages[numpages]) { |
6677 | ret = -ENOMEM; | 6677 | ret = -ENOMEM; |
6678 | mlog_errno(ret); | 6678 | mlog_errno(ret); |
diff --git a/fs/ocfs2/aops.c b/fs/ocfs2/aops.c index 7155c5a919d7..5cfeee118158 100644 --- a/fs/ocfs2/aops.c +++ b/fs/ocfs2/aops.c | |||
@@ -883,8 +883,8 @@ struct ocfs2_write_ctxt { | |||
883 | * out in so that future reads from that region will get | 883 | * out in so that future reads from that region will get |
884 | * zero's. | 884 | * zero's. |
885 | */ | 885 | */ |
886 | struct page *w_pages[OCFS2_MAX_CTXT_PAGES]; | ||
887 | unsigned int w_num_pages; | 886 | unsigned int w_num_pages; |
887 | struct page *w_pages[OCFS2_MAX_CTXT_PAGES]; | ||
888 | struct page *w_target_page; | 888 | struct page *w_target_page; |
889 | 889 | ||
890 | /* | 890 | /* |
diff --git a/fs/ocfs2/blockcheck.c b/fs/ocfs2/blockcheck.c index ec6d12339593..c7ee03c22226 100644 --- a/fs/ocfs2/blockcheck.c +++ b/fs/ocfs2/blockcheck.c | |||
@@ -439,7 +439,7 @@ int ocfs2_block_check_validate(void *data, size_t blocksize, | |||
439 | 439 | ||
440 | ocfs2_blockcheck_inc_failure(stats); | 440 | ocfs2_blockcheck_inc_failure(stats); |
441 | mlog(ML_ERROR, | 441 | mlog(ML_ERROR, |
442 | "CRC32 failed: stored: %u, computed %u. Applying ECC.\n", | 442 | "CRC32 failed: stored: 0x%x, computed 0x%x. Applying ECC.\n", |
443 | (unsigned int)check.bc_crc32e, (unsigned int)crc); | 443 | (unsigned int)check.bc_crc32e, (unsigned int)crc); |
444 | 444 | ||
445 | /* Ok, try ECC fixups */ | 445 | /* Ok, try ECC fixups */ |
@@ -453,7 +453,7 @@ int ocfs2_block_check_validate(void *data, size_t blocksize, | |||
453 | goto out; | 453 | goto out; |
454 | } | 454 | } |
455 | 455 | ||
456 | mlog(ML_ERROR, "Fixed CRC32 failed: stored: %u, computed %u\n", | 456 | mlog(ML_ERROR, "Fixed CRC32 failed: stored: 0x%x, computed 0x%x\n", |
457 | (unsigned int)check.bc_crc32e, (unsigned int)crc); | 457 | (unsigned int)check.bc_crc32e, (unsigned int)crc); |
458 | 458 | ||
459 | rc = -EIO; | 459 | rc = -EIO; |
diff --git a/fs/ocfs2/cluster/tcp.c b/fs/ocfs2/cluster/tcp.c index aa75ca3f78da..1361997cf205 100644 --- a/fs/ocfs2/cluster/tcp.c +++ b/fs/ocfs2/cluster/tcp.c | |||
@@ -1759,6 +1759,7 @@ static int o2net_accept_one(struct socket *sock) | |||
1759 | struct sockaddr_in sin; | 1759 | struct sockaddr_in sin; |
1760 | struct socket *new_sock = NULL; | 1760 | struct socket *new_sock = NULL; |
1761 | struct o2nm_node *node = NULL; | 1761 | struct o2nm_node *node = NULL; |
1762 | struct o2nm_node *local_node = NULL; | ||
1762 | struct o2net_sock_container *sc = NULL; | 1763 | struct o2net_sock_container *sc = NULL; |
1763 | struct o2net_node *nn; | 1764 | struct o2net_node *nn; |
1764 | 1765 | ||
@@ -1796,11 +1797,15 @@ static int o2net_accept_one(struct socket *sock) | |||
1796 | goto out; | 1797 | goto out; |
1797 | } | 1798 | } |
1798 | 1799 | ||
1799 | if (o2nm_this_node() > node->nd_num) { | 1800 | if (o2nm_this_node() >= node->nd_num) { |
1800 | mlog(ML_NOTICE, "unexpected connect attempted from a lower " | 1801 | local_node = o2nm_get_node_by_num(o2nm_this_node()); |
1801 | "numbered node '%s' at " "%pI4:%d with num %u\n", | 1802 | mlog(ML_NOTICE, "unexpected connect attempt seen at node '%s' (" |
1802 | node->nd_name, &sin.sin_addr.s_addr, | 1803 | "%u, %pI4:%d) from node '%s' (%u, %pI4:%d)\n", |
1803 | ntohs(sin.sin_port), node->nd_num); | 1804 | local_node->nd_name, local_node->nd_num, |
1805 | &(local_node->nd_ipv4_address), | ||
1806 | ntohs(local_node->nd_ipv4_port), | ||
1807 | node->nd_name, node->nd_num, &sin.sin_addr.s_addr, | ||
1808 | ntohs(sin.sin_port)); | ||
1804 | ret = -EINVAL; | 1809 | ret = -EINVAL; |
1805 | goto out; | 1810 | goto out; |
1806 | } | 1811 | } |
@@ -1857,6 +1862,8 @@ out: | |||
1857 | sock_release(new_sock); | 1862 | sock_release(new_sock); |
1858 | if (node) | 1863 | if (node) |
1859 | o2nm_node_put(node); | 1864 | o2nm_node_put(node); |
1865 | if (local_node) | ||
1866 | o2nm_node_put(local_node); | ||
1860 | if (sc) | 1867 | if (sc) |
1861 | sc_put(sc); | 1868 | sc_put(sc); |
1862 | return ret; | 1869 | return ret; |
diff --git a/fs/ocfs2/dlm/dlmmaster.c b/fs/ocfs2/dlm/dlmmaster.c index 94b97fc6a88e..ffb4c68dafa4 100644 --- a/fs/ocfs2/dlm/dlmmaster.c +++ b/fs/ocfs2/dlm/dlmmaster.c | |||
@@ -511,8 +511,6 @@ static void dlm_lockres_release(struct kref *kref) | |||
511 | 511 | ||
512 | atomic_dec(&dlm->res_cur_count); | 512 | atomic_dec(&dlm->res_cur_count); |
513 | 513 | ||
514 | dlm_put(dlm); | ||
515 | |||
516 | if (!hlist_unhashed(&res->hash_node) || | 514 | if (!hlist_unhashed(&res->hash_node) || |
517 | !list_empty(&res->granted) || | 515 | !list_empty(&res->granted) || |
518 | !list_empty(&res->converting) || | 516 | !list_empty(&res->converting) || |
@@ -585,8 +583,6 @@ static void dlm_init_lockres(struct dlm_ctxt *dlm, | |||
585 | res->migration_pending = 0; | 583 | res->migration_pending = 0; |
586 | res->inflight_locks = 0; | 584 | res->inflight_locks = 0; |
587 | 585 | ||
588 | /* put in dlm_lockres_release */ | ||
589 | dlm_grab(dlm); | ||
590 | res->dlm = dlm; | 586 | res->dlm = dlm; |
591 | 587 | ||
592 | kref_init(&res->refs); | 588 | kref_init(&res->refs); |
@@ -3050,8 +3046,6 @@ int dlm_migrate_request_handler(struct o2net_msg *msg, u32 len, void *data, | |||
3050 | /* check for pre-existing lock */ | 3046 | /* check for pre-existing lock */ |
3051 | spin_lock(&dlm->spinlock); | 3047 | spin_lock(&dlm->spinlock); |
3052 | res = __dlm_lookup_lockres(dlm, name, namelen, hash); | 3048 | res = __dlm_lookup_lockres(dlm, name, namelen, hash); |
3053 | spin_lock(&dlm->master_lock); | ||
3054 | |||
3055 | if (res) { | 3049 | if (res) { |
3056 | spin_lock(&res->spinlock); | 3050 | spin_lock(&res->spinlock); |
3057 | if (res->state & DLM_LOCK_RES_RECOVERING) { | 3051 | if (res->state & DLM_LOCK_RES_RECOVERING) { |
@@ -3069,14 +3063,15 @@ int dlm_migrate_request_handler(struct o2net_msg *msg, u32 len, void *data, | |||
3069 | spin_unlock(&res->spinlock); | 3063 | spin_unlock(&res->spinlock); |
3070 | } | 3064 | } |
3071 | 3065 | ||
3066 | spin_lock(&dlm->master_lock); | ||
3072 | /* ignore status. only nonzero status would BUG. */ | 3067 | /* ignore status. only nonzero status would BUG. */ |
3073 | ret = dlm_add_migration_mle(dlm, res, mle, &oldmle, | 3068 | ret = dlm_add_migration_mle(dlm, res, mle, &oldmle, |
3074 | name, namelen, | 3069 | name, namelen, |
3075 | migrate->new_master, | 3070 | migrate->new_master, |
3076 | migrate->master); | 3071 | migrate->master); |
3077 | 3072 | ||
3078 | unlock: | ||
3079 | spin_unlock(&dlm->master_lock); | 3073 | spin_unlock(&dlm->master_lock); |
3074 | unlock: | ||
3080 | spin_unlock(&dlm->spinlock); | 3075 | spin_unlock(&dlm->spinlock); |
3081 | 3076 | ||
3082 | if (oldmle) { | 3077 | if (oldmle) { |
diff --git a/fs/ocfs2/dlm/dlmrecovery.c b/fs/ocfs2/dlm/dlmrecovery.c index 9dfaac73b36d..aaaffbcbe916 100644 --- a/fs/ocfs2/dlm/dlmrecovery.c +++ b/fs/ocfs2/dlm/dlmrecovery.c | |||
@@ -1997,6 +1997,8 @@ void dlm_move_lockres_to_recovery_list(struct dlm_ctxt *dlm, | |||
1997 | struct list_head *queue; | 1997 | struct list_head *queue; |
1998 | struct dlm_lock *lock, *next; | 1998 | struct dlm_lock *lock, *next; |
1999 | 1999 | ||
2000 | assert_spin_locked(&dlm->spinlock); | ||
2001 | assert_spin_locked(&res->spinlock); | ||
2000 | res->state |= DLM_LOCK_RES_RECOVERING; | 2002 | res->state |= DLM_LOCK_RES_RECOVERING; |
2001 | if (!list_empty(&res->recovering)) { | 2003 | if (!list_empty(&res->recovering)) { |
2002 | mlog(0, | 2004 | mlog(0, |
@@ -2326,19 +2328,15 @@ static void dlm_do_local_recovery_cleanup(struct dlm_ctxt *dlm, u8 dead_node) | |||
2326 | /* zero the lvb if necessary */ | 2328 | /* zero the lvb if necessary */ |
2327 | dlm_revalidate_lvb(dlm, res, dead_node); | 2329 | dlm_revalidate_lvb(dlm, res, dead_node); |
2328 | if (res->owner == dead_node) { | 2330 | if (res->owner == dead_node) { |
2329 | if (res->state & DLM_LOCK_RES_DROPPING_REF) | 2331 | if (res->state & DLM_LOCK_RES_DROPPING_REF) { |
2330 | mlog(0, "%s:%.*s: owned by " | 2332 | mlog(ML_NOTICE, "Ignore %.*s for " |
2331 | "dead node %u, this node was " | 2333 | "recovery as it is being freed\n", |
2332 | "dropping its ref when it died. " | 2334 | res->lockname.len, |
2333 | "continue, dropping the flag.\n", | 2335 | res->lockname.name); |
2334 | dlm->name, res->lockname.len, | 2336 | } else |
2335 | res->lockname.name, dead_node); | 2337 | dlm_move_lockres_to_recovery_list(dlm, |
2336 | 2338 | res); | |
2337 | /* the wake_up for this will happen when the | ||
2338 | * RECOVERING flag is dropped later */ | ||
2339 | res->state &= ~DLM_LOCK_RES_DROPPING_REF; | ||
2340 | 2339 | ||
2341 | dlm_move_lockres_to_recovery_list(dlm, res); | ||
2342 | } else if (res->owner == dlm->node_num) { | 2340 | } else if (res->owner == dlm->node_num) { |
2343 | dlm_free_dead_locks(dlm, res, dead_node); | 2341 | dlm_free_dead_locks(dlm, res, dead_node); |
2344 | __dlm_lockres_calc_usage(dlm, res); | 2342 | __dlm_lockres_calc_usage(dlm, res); |
diff --git a/fs/ocfs2/dlm/dlmthread.c b/fs/ocfs2/dlm/dlmthread.c index d4f73ca68fe5..2211acf33d9b 100644 --- a/fs/ocfs2/dlm/dlmthread.c +++ b/fs/ocfs2/dlm/dlmthread.c | |||
@@ -92,19 +92,27 @@ int __dlm_lockres_has_locks(struct dlm_lock_resource *res) | |||
92 | * truly ready to be freed. */ | 92 | * truly ready to be freed. */ |
93 | int __dlm_lockres_unused(struct dlm_lock_resource *res) | 93 | int __dlm_lockres_unused(struct dlm_lock_resource *res) |
94 | { | 94 | { |
95 | if (!__dlm_lockres_has_locks(res) && | 95 | int bit; |
96 | (list_empty(&res->dirty) && !(res->state & DLM_LOCK_RES_DIRTY))) { | 96 | |
97 | /* try not to scan the bitmap unless the first two | 97 | if (__dlm_lockres_has_locks(res)) |
98 | * conditions are already true */ | 98 | return 0; |
99 | int bit = find_next_bit(res->refmap, O2NM_MAX_NODES, 0); | 99 | |
100 | if (bit >= O2NM_MAX_NODES) { | 100 | if (!list_empty(&res->dirty) || res->state & DLM_LOCK_RES_DIRTY) |
101 | /* since the bit for dlm->node_num is not | 101 | return 0; |
102 | * set, inflight_locks better be zero */ | 102 | |
103 | BUG_ON(res->inflight_locks != 0); | 103 | if (res->state & DLM_LOCK_RES_RECOVERING) |
104 | return 1; | 104 | return 0; |
105 | } | 105 | |
106 | } | 106 | bit = find_next_bit(res->refmap, O2NM_MAX_NODES, 0); |
107 | return 0; | 107 | if (bit < O2NM_MAX_NODES) |
108 | return 0; | ||
109 | |||
110 | /* | ||
111 | * since the bit for dlm->node_num is not set, inflight_locks better | ||
112 | * be zero | ||
113 | */ | ||
114 | BUG_ON(res->inflight_locks != 0); | ||
115 | return 1; | ||
108 | } | 116 | } |
109 | 117 | ||
110 | 118 | ||
@@ -152,45 +160,25 @@ void dlm_lockres_calc_usage(struct dlm_ctxt *dlm, | |||
152 | spin_unlock(&dlm->spinlock); | 160 | spin_unlock(&dlm->spinlock); |
153 | } | 161 | } |
154 | 162 | ||
155 | static int dlm_purge_lockres(struct dlm_ctxt *dlm, | 163 | static void dlm_purge_lockres(struct dlm_ctxt *dlm, |
156 | struct dlm_lock_resource *res) | 164 | struct dlm_lock_resource *res) |
157 | { | 165 | { |
158 | int master; | 166 | int master; |
159 | int ret = 0; | 167 | int ret = 0; |
160 | 168 | ||
161 | spin_lock(&res->spinlock); | 169 | assert_spin_locked(&dlm->spinlock); |
162 | if (!__dlm_lockres_unused(res)) { | 170 | assert_spin_locked(&res->spinlock); |
163 | mlog(0, "%s:%.*s: tried to purge but not unused\n", | ||
164 | dlm->name, res->lockname.len, res->lockname.name); | ||
165 | __dlm_print_one_lock_resource(res); | ||
166 | spin_unlock(&res->spinlock); | ||
167 | BUG(); | ||
168 | } | ||
169 | |||
170 | if (res->state & DLM_LOCK_RES_MIGRATING) { | ||
171 | mlog(0, "%s:%.*s: Delay dropref as this lockres is " | ||
172 | "being remastered\n", dlm->name, res->lockname.len, | ||
173 | res->lockname.name); | ||
174 | /* Re-add the lockres to the end of the purge list */ | ||
175 | if (!list_empty(&res->purge)) { | ||
176 | list_del_init(&res->purge); | ||
177 | list_add_tail(&res->purge, &dlm->purge_list); | ||
178 | } | ||
179 | spin_unlock(&res->spinlock); | ||
180 | return 0; | ||
181 | } | ||
182 | 171 | ||
183 | master = (res->owner == dlm->node_num); | 172 | master = (res->owner == dlm->node_num); |
184 | 173 | ||
185 | if (!master) | ||
186 | res->state |= DLM_LOCK_RES_DROPPING_REF; | ||
187 | spin_unlock(&res->spinlock); | ||
188 | 174 | ||
189 | mlog(0, "purging lockres %.*s, master = %d\n", res->lockname.len, | 175 | mlog(0, "purging lockres %.*s, master = %d\n", res->lockname.len, |
190 | res->lockname.name, master); | 176 | res->lockname.name, master); |
191 | 177 | ||
192 | if (!master) { | 178 | if (!master) { |
179 | res->state |= DLM_LOCK_RES_DROPPING_REF; | ||
193 | /* drop spinlock... retake below */ | 180 | /* drop spinlock... retake below */ |
181 | spin_unlock(&res->spinlock); | ||
194 | spin_unlock(&dlm->spinlock); | 182 | spin_unlock(&dlm->spinlock); |
195 | 183 | ||
196 | spin_lock(&res->spinlock); | 184 | spin_lock(&res->spinlock); |
@@ -208,31 +196,35 @@ static int dlm_purge_lockres(struct dlm_ctxt *dlm, | |||
208 | mlog(0, "%s:%.*s: dlm_deref_lockres returned %d\n", | 196 | mlog(0, "%s:%.*s: dlm_deref_lockres returned %d\n", |
209 | dlm->name, res->lockname.len, res->lockname.name, ret); | 197 | dlm->name, res->lockname.len, res->lockname.name, ret); |
210 | spin_lock(&dlm->spinlock); | 198 | spin_lock(&dlm->spinlock); |
199 | spin_lock(&res->spinlock); | ||
211 | } | 200 | } |
212 | 201 | ||
213 | spin_lock(&res->spinlock); | ||
214 | if (!list_empty(&res->purge)) { | 202 | if (!list_empty(&res->purge)) { |
215 | mlog(0, "removing lockres %.*s:%p from purgelist, " | 203 | mlog(0, "removing lockres %.*s:%p from purgelist, " |
216 | "master = %d\n", res->lockname.len, res->lockname.name, | 204 | "master = %d\n", res->lockname.len, res->lockname.name, |
217 | res, master); | 205 | res, master); |
218 | list_del_init(&res->purge); | 206 | list_del_init(&res->purge); |
219 | spin_unlock(&res->spinlock); | ||
220 | dlm_lockres_put(res); | 207 | dlm_lockres_put(res); |
221 | dlm->purge_count--; | 208 | dlm->purge_count--; |
222 | } else | 209 | } |
223 | spin_unlock(&res->spinlock); | 210 | |
211 | if (!__dlm_lockres_unused(res)) { | ||
212 | mlog(ML_ERROR, "found lockres %s:%.*s: in use after deref\n", | ||
213 | dlm->name, res->lockname.len, res->lockname.name); | ||
214 | __dlm_print_one_lock_resource(res); | ||
215 | BUG(); | ||
216 | } | ||
224 | 217 | ||
225 | __dlm_unhash_lockres(res); | 218 | __dlm_unhash_lockres(res); |
226 | 219 | ||
227 | /* lockres is not in the hash now. drop the flag and wake up | 220 | /* lockres is not in the hash now. drop the flag and wake up |
228 | * any processes waiting in dlm_get_lock_resource. */ | 221 | * any processes waiting in dlm_get_lock_resource. */ |
229 | if (!master) { | 222 | if (!master) { |
230 | spin_lock(&res->spinlock); | ||
231 | res->state &= ~DLM_LOCK_RES_DROPPING_REF; | 223 | res->state &= ~DLM_LOCK_RES_DROPPING_REF; |
232 | spin_unlock(&res->spinlock); | 224 | spin_unlock(&res->spinlock); |
233 | wake_up(&res->wq); | 225 | wake_up(&res->wq); |
234 | } | 226 | } else |
235 | return 0; | 227 | spin_unlock(&res->spinlock); |
236 | } | 228 | } |
237 | 229 | ||
238 | static void dlm_run_purge_list(struct dlm_ctxt *dlm, | 230 | static void dlm_run_purge_list(struct dlm_ctxt *dlm, |
@@ -251,17 +243,7 @@ static void dlm_run_purge_list(struct dlm_ctxt *dlm, | |||
251 | lockres = list_entry(dlm->purge_list.next, | 243 | lockres = list_entry(dlm->purge_list.next, |
252 | struct dlm_lock_resource, purge); | 244 | struct dlm_lock_resource, purge); |
253 | 245 | ||
254 | /* Status of the lockres *might* change so double | ||
255 | * check. If the lockres is unused, holding the dlm | ||
256 | * spinlock will prevent people from getting and more | ||
257 | * refs on it -- there's no need to keep the lockres | ||
258 | * spinlock. */ | ||
259 | spin_lock(&lockres->spinlock); | 246 | spin_lock(&lockres->spinlock); |
260 | unused = __dlm_lockres_unused(lockres); | ||
261 | spin_unlock(&lockres->spinlock); | ||
262 | |||
263 | if (!unused) | ||
264 | continue; | ||
265 | 247 | ||
266 | purge_jiffies = lockres->last_used + | 248 | purge_jiffies = lockres->last_used + |
267 | msecs_to_jiffies(DLM_PURGE_INTERVAL_MS); | 249 | msecs_to_jiffies(DLM_PURGE_INTERVAL_MS); |
@@ -273,15 +255,29 @@ static void dlm_run_purge_list(struct dlm_ctxt *dlm, | |||
273 | * in tail order, we can stop at the first | 255 | * in tail order, we can stop at the first |
274 | * unpurgable resource -- anyone added after | 256 | * unpurgable resource -- anyone added after |
275 | * him will have a greater last_used value */ | 257 | * him will have a greater last_used value */ |
258 | spin_unlock(&lockres->spinlock); | ||
276 | break; | 259 | break; |
277 | } | 260 | } |
278 | 261 | ||
262 | /* Status of the lockres *might* change so double | ||
263 | * check. If the lockres is unused, holding the dlm | ||
264 | * spinlock will prevent people from getting and more | ||
265 | * refs on it. */ | ||
266 | unused = __dlm_lockres_unused(lockres); | ||
267 | if (!unused || | ||
268 | (lockres->state & DLM_LOCK_RES_MIGRATING)) { | ||
269 | mlog(0, "lockres %s:%.*s: is in use or " | ||
270 | "being remastered, used %d, state %d\n", | ||
271 | dlm->name, lockres->lockname.len, | ||
272 | lockres->lockname.name, !unused, lockres->state); | ||
273 | list_move_tail(&dlm->purge_list, &lockres->purge); | ||
274 | spin_unlock(&lockres->spinlock); | ||
275 | continue; | ||
276 | } | ||
277 | |||
279 | dlm_lockres_get(lockres); | 278 | dlm_lockres_get(lockres); |
280 | 279 | ||
281 | /* This may drop and reacquire the dlm spinlock if it | 280 | dlm_purge_lockres(dlm, lockres); |
282 | * has to do migration. */ | ||
283 | if (dlm_purge_lockres(dlm, lockres)) | ||
284 | BUG(); | ||
285 | 281 | ||
286 | dlm_lockres_put(lockres); | 282 | dlm_lockres_put(lockres); |
287 | 283 | ||
diff --git a/fs/ocfs2/file.c b/fs/ocfs2/file.c index 4331f57e9fde..9a74542e1a05 100644 --- a/fs/ocfs2/file.c +++ b/fs/ocfs2/file.c | |||
@@ -36,6 +36,7 @@ | |||
36 | #include <linux/writeback.h> | 36 | #include <linux/writeback.h> |
37 | #include <linux/falloc.h> | 37 | #include <linux/falloc.h> |
38 | #include <linux/quotaops.h> | 38 | #include <linux/quotaops.h> |
39 | #include <linux/blkdev.h> | ||
39 | 40 | ||
40 | #define MLOG_MASK_PREFIX ML_INODE | 41 | #define MLOG_MASK_PREFIX ML_INODE |
41 | #include <cluster/masklog.h> | 42 | #include <cluster/masklog.h> |
@@ -63,12 +64,6 @@ | |||
63 | 64 | ||
64 | #include "buffer_head_io.h" | 65 | #include "buffer_head_io.h" |
65 | 66 | ||
66 | static int ocfs2_sync_inode(struct inode *inode) | ||
67 | { | ||
68 | filemap_fdatawrite(inode->i_mapping); | ||
69 | return sync_mapping_buffers(inode->i_mapping); | ||
70 | } | ||
71 | |||
72 | static int ocfs2_init_file_private(struct inode *inode, struct file *file) | 67 | static int ocfs2_init_file_private(struct inode *inode, struct file *file) |
73 | { | 68 | { |
74 | struct ocfs2_file_private *fp; | 69 | struct ocfs2_file_private *fp; |
@@ -186,12 +181,16 @@ static int ocfs2_sync_file(struct file *file, int datasync) | |||
186 | mlog_entry("(0x%p, 0x%p, %d, '%.*s')\n", file, dentry, datasync, | 181 | mlog_entry("(0x%p, 0x%p, %d, '%.*s')\n", file, dentry, datasync, |
187 | dentry->d_name.len, dentry->d_name.name); | 182 | dentry->d_name.len, dentry->d_name.name); |
188 | 183 | ||
189 | err = ocfs2_sync_inode(dentry->d_inode); | 184 | if (datasync && !(inode->i_state & I_DIRTY_DATASYNC)) { |
190 | if (err) | 185 | /* |
191 | goto bail; | 186 | * We still have to flush drive's caches to get data to the |
192 | 187 | * platter | |
193 | if (datasync && !(inode->i_state & I_DIRTY_DATASYNC)) | 188 | */ |
189 | if (osb->s_mount_opt & OCFS2_MOUNT_BARRIER) | ||
190 | blkdev_issue_flush(inode->i_sb->s_bdev, GFP_KERNEL, | ||
191 | NULL, BLKDEV_IFL_WAIT); | ||
194 | goto bail; | 192 | goto bail; |
193 | } | ||
195 | 194 | ||
196 | journal = osb->journal->j_journal; | 195 | journal = osb->journal->j_journal; |
197 | err = jbd2_journal_force_commit(journal); | 196 | err = jbd2_journal_force_commit(journal); |
@@ -774,7 +773,7 @@ static int ocfs2_write_zero_page(struct inode *inode, u64 abs_from, | |||
774 | BUG_ON(abs_to > (((u64)index + 1) << PAGE_CACHE_SHIFT)); | 773 | BUG_ON(abs_to > (((u64)index + 1) << PAGE_CACHE_SHIFT)); |
775 | BUG_ON(abs_from & (inode->i_blkbits - 1)); | 774 | BUG_ON(abs_from & (inode->i_blkbits - 1)); |
776 | 775 | ||
777 | page = grab_cache_page(mapping, index); | 776 | page = find_or_create_page(mapping, index, GFP_NOFS); |
778 | if (!page) { | 777 | if (!page) { |
779 | ret = -ENOMEM; | 778 | ret = -ENOMEM; |
780 | mlog_errno(ret); | 779 | mlog_errno(ret); |
@@ -2306,17 +2305,6 @@ relock: | |||
2306 | written = generic_file_direct_write(iocb, iov, &nr_segs, *ppos, | 2305 | written = generic_file_direct_write(iocb, iov, &nr_segs, *ppos, |
2307 | ppos, count, ocount); | 2306 | ppos, count, ocount); |
2308 | if (written < 0) { | 2307 | if (written < 0) { |
2309 | /* | ||
2310 | * direct write may have instantiated a few | ||
2311 | * blocks outside i_size. Trim these off again. | ||
2312 | * Don't need i_size_read because we hold i_mutex. | ||
2313 | * | ||
2314 | * XXX(truncate): this looks buggy because ocfs2 did not | ||
2315 | * actually implement ->truncate. Take a look at | ||
2316 | * the new truncate sequence and update this accordingly | ||
2317 | */ | ||
2318 | if (*ppos + count > inode->i_size) | ||
2319 | truncate_setsize(inode, inode->i_size); | ||
2320 | ret = written; | 2308 | ret = written; |
2321 | goto out_dio; | 2309 | goto out_dio; |
2322 | } | 2310 | } |
@@ -2332,7 +2320,7 @@ out_dio: | |||
2332 | BUG_ON(ret == -EIOCBQUEUED && !(file->f_flags & O_DIRECT)); | 2320 | BUG_ON(ret == -EIOCBQUEUED && !(file->f_flags & O_DIRECT)); |
2333 | 2321 | ||
2334 | if (((file->f_flags & O_DSYNC) && !direct_io) || IS_SYNC(inode) || | 2322 | if (((file->f_flags & O_DSYNC) && !direct_io) || IS_SYNC(inode) || |
2335 | ((file->f_flags & O_DIRECT) && has_refcount)) { | 2323 | ((file->f_flags & O_DIRECT) && !direct_io)) { |
2336 | ret = filemap_fdatawrite_range(file->f_mapping, pos, | 2324 | ret = filemap_fdatawrite_range(file->f_mapping, pos, |
2337 | pos + count - 1); | 2325 | pos + count - 1); |
2338 | if (ret < 0) | 2326 | if (ret < 0) |
diff --git a/fs/ocfs2/inode.c b/fs/ocfs2/inode.c index 0492464916b1..eece3e05d9d0 100644 --- a/fs/ocfs2/inode.c +++ b/fs/ocfs2/inode.c | |||
@@ -488,7 +488,11 @@ static int ocfs2_read_locked_inode(struct inode *inode, | |||
488 | OCFS2_BH_IGNORE_CACHE); | 488 | OCFS2_BH_IGNORE_CACHE); |
489 | } else { | 489 | } else { |
490 | status = ocfs2_read_blocks_sync(osb, args->fi_blkno, 1, &bh); | 490 | status = ocfs2_read_blocks_sync(osb, args->fi_blkno, 1, &bh); |
491 | if (!status) | 491 | /* |
492 | * If buffer is in jbd, then its checksum may not have been | ||
493 | * computed as yet. | ||
494 | */ | ||
495 | if (!status && !buffer_jbd(bh)) | ||
492 | status = ocfs2_validate_inode_block(osb->sb, bh); | 496 | status = ocfs2_validate_inode_block(osb->sb, bh); |
493 | } | 497 | } |
494 | if (status < 0) { | 498 | if (status < 0) { |
diff --git a/fs/ocfs2/inode.h b/fs/ocfs2/inode.h index 6de5a869db30..0bc477a3aeb8 100644 --- a/fs/ocfs2/inode.h +++ b/fs/ocfs2/inode.h | |||
@@ -46,27 +46,24 @@ struct ocfs2_inode_info | |||
46 | /* These fields are protected by ip_lock */ | 46 | /* These fields are protected by ip_lock */ |
47 | spinlock_t ip_lock; | 47 | spinlock_t ip_lock; |
48 | u32 ip_open_count; | 48 | u32 ip_open_count; |
49 | u32 ip_clusters; | ||
50 | struct list_head ip_io_markers; | 49 | struct list_head ip_io_markers; |
50 | u32 ip_clusters; | ||
51 | 51 | ||
52 | u16 ip_dyn_features; | ||
52 | struct mutex ip_io_mutex; | 53 | struct mutex ip_io_mutex; |
53 | |||
54 | u32 ip_flags; /* see below */ | 54 | u32 ip_flags; /* see below */ |
55 | u32 ip_attr; /* inode attributes */ | 55 | u32 ip_attr; /* inode attributes */ |
56 | u16 ip_dyn_features; | ||
57 | 56 | ||
58 | /* protected by recovery_lock. */ | 57 | /* protected by recovery_lock. */ |
59 | struct inode *ip_next_orphan; | 58 | struct inode *ip_next_orphan; |
60 | 59 | ||
61 | u32 ip_dir_start_lookup; | ||
62 | |||
63 | struct ocfs2_caching_info ip_metadata_cache; | 60 | struct ocfs2_caching_info ip_metadata_cache; |
64 | |||
65 | struct ocfs2_extent_map ip_extent_map; | 61 | struct ocfs2_extent_map ip_extent_map; |
66 | |||
67 | struct inode vfs_inode; | 62 | struct inode vfs_inode; |
68 | struct jbd2_inode ip_jinode; | 63 | struct jbd2_inode ip_jinode; |
69 | 64 | ||
65 | u32 ip_dir_start_lookup; | ||
66 | |||
70 | /* Only valid if the inode is the dir. */ | 67 | /* Only valid if the inode is the dir. */ |
71 | u32 ip_last_used_slot; | 68 | u32 ip_last_used_slot; |
72 | u64 ip_last_used_group; | 69 | u64 ip_last_used_group; |
diff --git a/fs/ocfs2/ioctl.c b/fs/ocfs2/ioctl.c index 7d9d9c132cef..7a4868196152 100644 --- a/fs/ocfs2/ioctl.c +++ b/fs/ocfs2/ioctl.c | |||
@@ -26,6 +26,26 @@ | |||
26 | 26 | ||
27 | #include <linux/ext2_fs.h> | 27 | #include <linux/ext2_fs.h> |
28 | 28 | ||
29 | #define o2info_from_user(a, b) \ | ||
30 | copy_from_user(&(a), (b), sizeof(a)) | ||
31 | #define o2info_to_user(a, b) \ | ||
32 | copy_to_user((typeof(a) __user *)b, &(a), sizeof(a)) | ||
33 | |||
34 | /* | ||
35 | * This call is void because we are already reporting an error that may | ||
36 | * be -EFAULT. The error will be returned from the ioctl(2) call. It's | ||
37 | * just a best-effort to tell userspace that this request caused the error. | ||
38 | */ | ||
39 | static inline void __o2info_set_request_error(struct ocfs2_info_request *kreq, | ||
40 | struct ocfs2_info_request __user *req) | ||
41 | { | ||
42 | kreq->ir_flags |= OCFS2_INFO_FL_ERROR; | ||
43 | (void)put_user(kreq->ir_flags, (__u32 __user *)&(req->ir_flags)); | ||
44 | } | ||
45 | |||
46 | #define o2info_set_request_error(a, b) \ | ||
47 | __o2info_set_request_error((struct ocfs2_info_request *)&(a), b) | ||
48 | |||
29 | static int ocfs2_get_inode_attr(struct inode *inode, unsigned *flags) | 49 | static int ocfs2_get_inode_attr(struct inode *inode, unsigned *flags) |
30 | { | 50 | { |
31 | int status; | 51 | int status; |
@@ -109,6 +129,328 @@ bail: | |||
109 | return status; | 129 | return status; |
110 | } | 130 | } |
111 | 131 | ||
132 | int ocfs2_info_handle_blocksize(struct inode *inode, | ||
133 | struct ocfs2_info_request __user *req) | ||
134 | { | ||
135 | int status = -EFAULT; | ||
136 | struct ocfs2_info_blocksize oib; | ||
137 | |||
138 | if (o2info_from_user(oib, req)) | ||
139 | goto bail; | ||
140 | |||
141 | oib.ib_blocksize = inode->i_sb->s_blocksize; | ||
142 | oib.ib_req.ir_flags |= OCFS2_INFO_FL_FILLED; | ||
143 | |||
144 | if (o2info_to_user(oib, req)) | ||
145 | goto bail; | ||
146 | |||
147 | status = 0; | ||
148 | bail: | ||
149 | if (status) | ||
150 | o2info_set_request_error(oib, req); | ||
151 | |||
152 | return status; | ||
153 | } | ||
154 | |||
155 | int ocfs2_info_handle_clustersize(struct inode *inode, | ||
156 | struct ocfs2_info_request __user *req) | ||
157 | { | ||
158 | int status = -EFAULT; | ||
159 | struct ocfs2_info_clustersize oic; | ||
160 | struct ocfs2_super *osb = OCFS2_SB(inode->i_sb); | ||
161 | |||
162 | if (o2info_from_user(oic, req)) | ||
163 | goto bail; | ||
164 | |||
165 | oic.ic_clustersize = osb->s_clustersize; | ||
166 | oic.ic_req.ir_flags |= OCFS2_INFO_FL_FILLED; | ||
167 | |||
168 | if (o2info_to_user(oic, req)) | ||
169 | goto bail; | ||
170 | |||
171 | status = 0; | ||
172 | bail: | ||
173 | if (status) | ||
174 | o2info_set_request_error(oic, req); | ||
175 | |||
176 | return status; | ||
177 | } | ||
178 | |||
179 | int ocfs2_info_handle_maxslots(struct inode *inode, | ||
180 | struct ocfs2_info_request __user *req) | ||
181 | { | ||
182 | int status = -EFAULT; | ||
183 | struct ocfs2_info_maxslots oim; | ||
184 | struct ocfs2_super *osb = OCFS2_SB(inode->i_sb); | ||
185 | |||
186 | if (o2info_from_user(oim, req)) | ||
187 | goto bail; | ||
188 | |||
189 | oim.im_max_slots = osb->max_slots; | ||
190 | oim.im_req.ir_flags |= OCFS2_INFO_FL_FILLED; | ||
191 | |||
192 | if (o2info_to_user(oim, req)) | ||
193 | goto bail; | ||
194 | |||
195 | status = 0; | ||
196 | bail: | ||
197 | if (status) | ||
198 | o2info_set_request_error(oim, req); | ||
199 | |||
200 | return status; | ||
201 | } | ||
202 | |||
203 | int ocfs2_info_handle_label(struct inode *inode, | ||
204 | struct ocfs2_info_request __user *req) | ||
205 | { | ||
206 | int status = -EFAULT; | ||
207 | struct ocfs2_info_label oil; | ||
208 | struct ocfs2_super *osb = OCFS2_SB(inode->i_sb); | ||
209 | |||
210 | if (o2info_from_user(oil, req)) | ||
211 | goto bail; | ||
212 | |||
213 | memcpy(oil.il_label, osb->vol_label, OCFS2_MAX_VOL_LABEL_LEN); | ||
214 | oil.il_req.ir_flags |= OCFS2_INFO_FL_FILLED; | ||
215 | |||
216 | if (o2info_to_user(oil, req)) | ||
217 | goto bail; | ||
218 | |||
219 | status = 0; | ||
220 | bail: | ||
221 | if (status) | ||
222 | o2info_set_request_error(oil, req); | ||
223 | |||
224 | return status; | ||
225 | } | ||
226 | |||
227 | int ocfs2_info_handle_uuid(struct inode *inode, | ||
228 | struct ocfs2_info_request __user *req) | ||
229 | { | ||
230 | int status = -EFAULT; | ||
231 | struct ocfs2_info_uuid oiu; | ||
232 | struct ocfs2_super *osb = OCFS2_SB(inode->i_sb); | ||
233 | |||
234 | if (o2info_from_user(oiu, req)) | ||
235 | goto bail; | ||
236 | |||
237 | memcpy(oiu.iu_uuid_str, osb->uuid_str, OCFS2_TEXT_UUID_LEN + 1); | ||
238 | oiu.iu_req.ir_flags |= OCFS2_INFO_FL_FILLED; | ||
239 | |||
240 | if (o2info_to_user(oiu, req)) | ||
241 | goto bail; | ||
242 | |||
243 | status = 0; | ||
244 | bail: | ||
245 | if (status) | ||
246 | o2info_set_request_error(oiu, req); | ||
247 | |||
248 | return status; | ||
249 | } | ||
250 | |||
251 | int ocfs2_info_handle_fs_features(struct inode *inode, | ||
252 | struct ocfs2_info_request __user *req) | ||
253 | { | ||
254 | int status = -EFAULT; | ||
255 | struct ocfs2_info_fs_features oif; | ||
256 | struct ocfs2_super *osb = OCFS2_SB(inode->i_sb); | ||
257 | |||
258 | if (o2info_from_user(oif, req)) | ||
259 | goto bail; | ||
260 | |||
261 | oif.if_compat_features = osb->s_feature_compat; | ||
262 | oif.if_incompat_features = osb->s_feature_incompat; | ||
263 | oif.if_ro_compat_features = osb->s_feature_ro_compat; | ||
264 | oif.if_req.ir_flags |= OCFS2_INFO_FL_FILLED; | ||
265 | |||
266 | if (o2info_to_user(oif, req)) | ||
267 | goto bail; | ||
268 | |||
269 | status = 0; | ||
270 | bail: | ||
271 | if (status) | ||
272 | o2info_set_request_error(oif, req); | ||
273 | |||
274 | return status; | ||
275 | } | ||
276 | |||
277 | int ocfs2_info_handle_journal_size(struct inode *inode, | ||
278 | struct ocfs2_info_request __user *req) | ||
279 | { | ||
280 | int status = -EFAULT; | ||
281 | struct ocfs2_info_journal_size oij; | ||
282 | struct ocfs2_super *osb = OCFS2_SB(inode->i_sb); | ||
283 | |||
284 | if (o2info_from_user(oij, req)) | ||
285 | goto bail; | ||
286 | |||
287 | oij.ij_journal_size = osb->journal->j_inode->i_size; | ||
288 | |||
289 | oij.ij_req.ir_flags |= OCFS2_INFO_FL_FILLED; | ||
290 | |||
291 | if (o2info_to_user(oij, req)) | ||
292 | goto bail; | ||
293 | |||
294 | status = 0; | ||
295 | bail: | ||
296 | if (status) | ||
297 | o2info_set_request_error(oij, req); | ||
298 | |||
299 | return status; | ||
300 | } | ||
301 | |||
302 | int ocfs2_info_handle_unknown(struct inode *inode, | ||
303 | struct ocfs2_info_request __user *req) | ||
304 | { | ||
305 | int status = -EFAULT; | ||
306 | struct ocfs2_info_request oir; | ||
307 | |||
308 | if (o2info_from_user(oir, req)) | ||
309 | goto bail; | ||
310 | |||
311 | oir.ir_flags &= ~OCFS2_INFO_FL_FILLED; | ||
312 | |||
313 | if (o2info_to_user(oir, req)) | ||
314 | goto bail; | ||
315 | |||
316 | status = 0; | ||
317 | bail: | ||
318 | if (status) | ||
319 | o2info_set_request_error(oir, req); | ||
320 | |||
321 | return status; | ||
322 | } | ||
323 | |||
324 | /* | ||
325 | * Validate and distinguish OCFS2_IOC_INFO requests. | ||
326 | * | ||
327 | * - validate the magic number. | ||
328 | * - distinguish different requests. | ||
329 | * - validate size of different requests. | ||
330 | */ | ||
331 | int ocfs2_info_handle_request(struct inode *inode, | ||
332 | struct ocfs2_info_request __user *req) | ||
333 | { | ||
334 | int status = -EFAULT; | ||
335 | struct ocfs2_info_request oir; | ||
336 | |||
337 | if (o2info_from_user(oir, req)) | ||
338 | goto bail; | ||
339 | |||
340 | status = -EINVAL; | ||
341 | if (oir.ir_magic != OCFS2_INFO_MAGIC) | ||
342 | goto bail; | ||
343 | |||
344 | switch (oir.ir_code) { | ||
345 | case OCFS2_INFO_BLOCKSIZE: | ||
346 | if (oir.ir_size == sizeof(struct ocfs2_info_blocksize)) | ||
347 | status = ocfs2_info_handle_blocksize(inode, req); | ||
348 | break; | ||
349 | case OCFS2_INFO_CLUSTERSIZE: | ||
350 | if (oir.ir_size == sizeof(struct ocfs2_info_clustersize)) | ||
351 | status = ocfs2_info_handle_clustersize(inode, req); | ||
352 | break; | ||
353 | case OCFS2_INFO_MAXSLOTS: | ||
354 | if (oir.ir_size == sizeof(struct ocfs2_info_maxslots)) | ||
355 | status = ocfs2_info_handle_maxslots(inode, req); | ||
356 | break; | ||
357 | case OCFS2_INFO_LABEL: | ||
358 | if (oir.ir_size == sizeof(struct ocfs2_info_label)) | ||
359 | status = ocfs2_info_handle_label(inode, req); | ||
360 | break; | ||
361 | case OCFS2_INFO_UUID: | ||
362 | if (oir.ir_size == sizeof(struct ocfs2_info_uuid)) | ||
363 | status = ocfs2_info_handle_uuid(inode, req); | ||
364 | break; | ||
365 | case OCFS2_INFO_FS_FEATURES: | ||
366 | if (oir.ir_size == sizeof(struct ocfs2_info_fs_features)) | ||
367 | status = ocfs2_info_handle_fs_features(inode, req); | ||
368 | break; | ||
369 | case OCFS2_INFO_JOURNAL_SIZE: | ||
370 | if (oir.ir_size == sizeof(struct ocfs2_info_journal_size)) | ||
371 | status = ocfs2_info_handle_journal_size(inode, req); | ||
372 | break; | ||
373 | default: | ||
374 | status = ocfs2_info_handle_unknown(inode, req); | ||
375 | break; | ||
376 | } | ||
377 | |||
378 | bail: | ||
379 | return status; | ||
380 | } | ||
381 | |||
382 | int ocfs2_get_request_ptr(struct ocfs2_info *info, int idx, | ||
383 | u64 *req_addr, int compat_flag) | ||
384 | { | ||
385 | int status = -EFAULT; | ||
386 | u64 __user *bp = NULL; | ||
387 | |||
388 | if (compat_flag) { | ||
389 | #ifdef CONFIG_COMPAT | ||
390 | /* | ||
391 | * pointer bp stores the base address of a pointers array, | ||
392 | * which collects all addresses of separate request. | ||
393 | */ | ||
394 | bp = (u64 __user *)(unsigned long)compat_ptr(info->oi_requests); | ||
395 | #else | ||
396 | BUG(); | ||
397 | #endif | ||
398 | } else | ||
399 | bp = (u64 __user *)(unsigned long)(info->oi_requests); | ||
400 | |||
401 | if (o2info_from_user(*req_addr, bp + idx)) | ||
402 | goto bail; | ||
403 | |||
404 | status = 0; | ||
405 | bail: | ||
406 | return status; | ||
407 | } | ||
408 | |||
409 | /* | ||
410 | * OCFS2_IOC_INFO handles an array of requests passed from userspace. | ||
411 | * | ||
412 | * ocfs2_info_handle() recevies a large info aggregation, grab and | ||
413 | * validate the request count from header, then break it into small | ||
414 | * pieces, later specific handlers can handle them one by one. | ||
415 | * | ||
416 | * Idea here is to make each separate request small enough to ensure | ||
417 | * a better backward&forward compatibility, since a small piece of | ||
418 | * request will be less likely to be broken if disk layout get changed. | ||
419 | */ | ||
420 | int ocfs2_info_handle(struct inode *inode, struct ocfs2_info *info, | ||
421 | int compat_flag) | ||
422 | { | ||
423 | int i, status = 0; | ||
424 | u64 req_addr; | ||
425 | struct ocfs2_info_request __user *reqp; | ||
426 | |||
427 | if ((info->oi_count > OCFS2_INFO_MAX_REQUEST) || | ||
428 | (!info->oi_requests)) { | ||
429 | status = -EINVAL; | ||
430 | goto bail; | ||
431 | } | ||
432 | |||
433 | for (i = 0; i < info->oi_count; i++) { | ||
434 | |||
435 | status = ocfs2_get_request_ptr(info, i, &req_addr, compat_flag); | ||
436 | if (status) | ||
437 | break; | ||
438 | |||
439 | reqp = (struct ocfs2_info_request *)(unsigned long)req_addr; | ||
440 | if (!reqp) { | ||
441 | status = -EINVAL; | ||
442 | goto bail; | ||
443 | } | ||
444 | |||
445 | status = ocfs2_info_handle_request(inode, reqp); | ||
446 | if (status) | ||
447 | break; | ||
448 | } | ||
449 | |||
450 | bail: | ||
451 | return status; | ||
452 | } | ||
453 | |||
112 | long ocfs2_ioctl(struct file *filp, unsigned int cmd, unsigned long arg) | 454 | long ocfs2_ioctl(struct file *filp, unsigned int cmd, unsigned long arg) |
113 | { | 455 | { |
114 | struct inode *inode = filp->f_path.dentry->d_inode; | 456 | struct inode *inode = filp->f_path.dentry->d_inode; |
@@ -120,6 +462,7 @@ long ocfs2_ioctl(struct file *filp, unsigned int cmd, unsigned long arg) | |||
120 | struct reflink_arguments args; | 462 | struct reflink_arguments args; |
121 | const char *old_path, *new_path; | 463 | const char *old_path, *new_path; |
122 | bool preserve; | 464 | bool preserve; |
465 | struct ocfs2_info info; | ||
123 | 466 | ||
124 | switch (cmd) { | 467 | switch (cmd) { |
125 | case OCFS2_IOC_GETFLAGS: | 468 | case OCFS2_IOC_GETFLAGS: |
@@ -174,6 +517,12 @@ long ocfs2_ioctl(struct file *filp, unsigned int cmd, unsigned long arg) | |||
174 | preserve = (args.preserve != 0); | 517 | preserve = (args.preserve != 0); |
175 | 518 | ||
176 | return ocfs2_reflink_ioctl(inode, old_path, new_path, preserve); | 519 | return ocfs2_reflink_ioctl(inode, old_path, new_path, preserve); |
520 | case OCFS2_IOC_INFO: | ||
521 | if (copy_from_user(&info, (struct ocfs2_info __user *)arg, | ||
522 | sizeof(struct ocfs2_info))) | ||
523 | return -EFAULT; | ||
524 | |||
525 | return ocfs2_info_handle(inode, &info, 0); | ||
177 | default: | 526 | default: |
178 | return -ENOTTY; | 527 | return -ENOTTY; |
179 | } | 528 | } |
@@ -185,6 +534,7 @@ long ocfs2_compat_ioctl(struct file *file, unsigned cmd, unsigned long arg) | |||
185 | bool preserve; | 534 | bool preserve; |
186 | struct reflink_arguments args; | 535 | struct reflink_arguments args; |
187 | struct inode *inode = file->f_path.dentry->d_inode; | 536 | struct inode *inode = file->f_path.dentry->d_inode; |
537 | struct ocfs2_info info; | ||
188 | 538 | ||
189 | switch (cmd) { | 539 | switch (cmd) { |
190 | case OCFS2_IOC32_GETFLAGS: | 540 | case OCFS2_IOC32_GETFLAGS: |
@@ -209,6 +559,12 @@ long ocfs2_compat_ioctl(struct file *file, unsigned cmd, unsigned long arg) | |||
209 | 559 | ||
210 | return ocfs2_reflink_ioctl(inode, compat_ptr(args.old_path), | 560 | return ocfs2_reflink_ioctl(inode, compat_ptr(args.old_path), |
211 | compat_ptr(args.new_path), preserve); | 561 | compat_ptr(args.new_path), preserve); |
562 | case OCFS2_IOC_INFO: | ||
563 | if (copy_from_user(&info, (struct ocfs2_info __user *)arg, | ||
564 | sizeof(struct ocfs2_info))) | ||
565 | return -EFAULT; | ||
566 | |||
567 | return ocfs2_info_handle(inode, &info, 1); | ||
212 | default: | 568 | default: |
213 | return -ENOIOCTLCMD; | 569 | return -ENOIOCTLCMD; |
214 | } | 570 | } |
diff --git a/fs/ocfs2/journal.c b/fs/ocfs2/journal.c index 9b57c0350ff9..faa2303dbf0a 100644 --- a/fs/ocfs2/journal.c +++ b/fs/ocfs2/journal.c | |||
@@ -301,7 +301,6 @@ static int ocfs2_commit_cache(struct ocfs2_super *osb) | |||
301 | { | 301 | { |
302 | int status = 0; | 302 | int status = 0; |
303 | unsigned int flushed; | 303 | unsigned int flushed; |
304 | unsigned long old_id; | ||
305 | struct ocfs2_journal *journal = NULL; | 304 | struct ocfs2_journal *journal = NULL; |
306 | 305 | ||
307 | mlog_entry_void(); | 306 | mlog_entry_void(); |
@@ -326,7 +325,7 @@ static int ocfs2_commit_cache(struct ocfs2_super *osb) | |||
326 | goto finally; | 325 | goto finally; |
327 | } | 326 | } |
328 | 327 | ||
329 | old_id = ocfs2_inc_trans_id(journal); | 328 | ocfs2_inc_trans_id(journal); |
330 | 329 | ||
331 | flushed = atomic_read(&journal->j_num_trans); | 330 | flushed = atomic_read(&journal->j_num_trans); |
332 | atomic_set(&journal->j_num_trans, 0); | 331 | atomic_set(&journal->j_num_trans, 0); |
@@ -342,9 +341,6 @@ finally: | |||
342 | return status; | 341 | return status; |
343 | } | 342 | } |
344 | 343 | ||
345 | /* pass it NULL and it will allocate a new handle object for you. If | ||
346 | * you pass it a handle however, it may still return error, in which | ||
347 | * case it has free'd the passed handle for you. */ | ||
348 | handle_t *ocfs2_start_trans(struct ocfs2_super *osb, int max_buffs) | 344 | handle_t *ocfs2_start_trans(struct ocfs2_super *osb, int max_buffs) |
349 | { | 345 | { |
350 | journal_t *journal = osb->journal->j_journal; | 346 | journal_t *journal = osb->journal->j_journal; |
@@ -1888,6 +1884,8 @@ void ocfs2_queue_orphan_scan(struct ocfs2_super *osb) | |||
1888 | 1884 | ||
1889 | os = &osb->osb_orphan_scan; | 1885 | os = &osb->osb_orphan_scan; |
1890 | 1886 | ||
1887 | mlog(0, "Begin orphan scan\n"); | ||
1888 | |||
1891 | if (atomic_read(&os->os_state) == ORPHAN_SCAN_INACTIVE) | 1889 | if (atomic_read(&os->os_state) == ORPHAN_SCAN_INACTIVE) |
1892 | goto out; | 1890 | goto out; |
1893 | 1891 | ||
@@ -1920,6 +1918,7 @@ void ocfs2_queue_orphan_scan(struct ocfs2_super *osb) | |||
1920 | unlock: | 1918 | unlock: |
1921 | ocfs2_orphan_scan_unlock(osb, seqno); | 1919 | ocfs2_orphan_scan_unlock(osb, seqno); |
1922 | out: | 1920 | out: |
1921 | mlog(0, "Orphan scan completed\n"); | ||
1923 | return; | 1922 | return; |
1924 | } | 1923 | } |
1925 | 1924 | ||
diff --git a/fs/ocfs2/journal.h b/fs/ocfs2/journal.h index b5baaa8e710f..43e56b97f9c0 100644 --- a/fs/ocfs2/journal.h +++ b/fs/ocfs2/journal.h | |||
@@ -67,11 +67,12 @@ struct ocfs2_journal { | |||
67 | struct buffer_head *j_bh; /* Journal disk inode block */ | 67 | struct buffer_head *j_bh; /* Journal disk inode block */ |
68 | atomic_t j_num_trans; /* Number of transactions | 68 | atomic_t j_num_trans; /* Number of transactions |
69 | * currently in the system. */ | 69 | * currently in the system. */ |
70 | spinlock_t j_lock; | ||
70 | unsigned long j_trans_id; | 71 | unsigned long j_trans_id; |
71 | struct rw_semaphore j_trans_barrier; | 72 | struct rw_semaphore j_trans_barrier; |
72 | wait_queue_head_t j_checkpointed; | 73 | wait_queue_head_t j_checkpointed; |
73 | 74 | ||
74 | spinlock_t j_lock; | 75 | /* both fields protected by j_lock*/ |
75 | struct list_head j_la_cleanups; | 76 | struct list_head j_la_cleanups; |
76 | struct work_struct j_recovery_work; | 77 | struct work_struct j_recovery_work; |
77 | }; | 78 | }; |
diff --git a/fs/ocfs2/mmap.c b/fs/ocfs2/mmap.c index b04d6961c0d4..7e32db9c2c99 100644 --- a/fs/ocfs2/mmap.c +++ b/fs/ocfs2/mmap.c | |||
@@ -75,9 +75,11 @@ static int __ocfs2_page_mkwrite(struct file *file, struct buffer_head *di_bh, | |||
75 | /* | 75 | /* |
76 | * Another node might have truncated while we were waiting on | 76 | * Another node might have truncated while we were waiting on |
77 | * cluster locks. | 77 | * cluster locks. |
78 | * We don't check size == 0 before the shift. This is borrowed | ||
79 | * from do_generic_file_read. | ||
78 | */ | 80 | */ |
79 | last_index = size >> PAGE_CACHE_SHIFT; | 81 | last_index = (size - 1) >> PAGE_CACHE_SHIFT; |
80 | if (page->index > last_index) { | 82 | if (unlikely(!size || page->index > last_index)) { |
81 | ret = -EINVAL; | 83 | ret = -EINVAL; |
82 | goto out; | 84 | goto out; |
83 | } | 85 | } |
@@ -108,7 +110,7 @@ static int __ocfs2_page_mkwrite(struct file *file, struct buffer_head *di_bh, | |||
108 | * because the "write" would invalidate their data. | 110 | * because the "write" would invalidate their data. |
109 | */ | 111 | */ |
110 | if (page->index == last_index) | 112 | if (page->index == last_index) |
111 | len = size & ~PAGE_CACHE_MASK; | 113 | len = ((size - 1) & ~PAGE_CACHE_MASK) + 1; |
112 | 114 | ||
113 | ret = ocfs2_write_begin_nolock(file, mapping, pos, len, 0, &locked_page, | 115 | ret = ocfs2_write_begin_nolock(file, mapping, pos, len, 0, &locked_page, |
114 | &fsdata, di_bh, page); | 116 | &fsdata, di_bh, page); |
diff --git a/fs/ocfs2/namei.c b/fs/ocfs2/namei.c index f171b51a74f7..a00dda2e4f16 100644 --- a/fs/ocfs2/namei.c +++ b/fs/ocfs2/namei.c | |||
@@ -472,32 +472,23 @@ leave: | |||
472 | return status; | 472 | return status; |
473 | } | 473 | } |
474 | 474 | ||
475 | static int ocfs2_mknod_locked(struct ocfs2_super *osb, | 475 | static int __ocfs2_mknod_locked(struct inode *dir, |
476 | struct inode *dir, | 476 | struct inode *inode, |
477 | struct inode *inode, | 477 | dev_t dev, |
478 | dev_t dev, | 478 | struct buffer_head **new_fe_bh, |
479 | struct buffer_head **new_fe_bh, | 479 | struct buffer_head *parent_fe_bh, |
480 | struct buffer_head *parent_fe_bh, | 480 | handle_t *handle, |
481 | handle_t *handle, | 481 | struct ocfs2_alloc_context *inode_ac, |
482 | struct ocfs2_alloc_context *inode_ac) | 482 | u64 fe_blkno, u64 suballoc_loc, u16 suballoc_bit) |
483 | { | 483 | { |
484 | int status = 0; | 484 | int status = 0; |
485 | struct ocfs2_super *osb = OCFS2_SB(dir->i_sb); | ||
485 | struct ocfs2_dinode *fe = NULL; | 486 | struct ocfs2_dinode *fe = NULL; |
486 | struct ocfs2_extent_list *fel; | 487 | struct ocfs2_extent_list *fel; |
487 | u64 suballoc_loc, fe_blkno = 0; | ||
488 | u16 suballoc_bit; | ||
489 | u16 feat; | 488 | u16 feat; |
490 | 489 | ||
491 | *new_fe_bh = NULL; | 490 | *new_fe_bh = NULL; |
492 | 491 | ||
493 | status = ocfs2_claim_new_inode(handle, dir, parent_fe_bh, | ||
494 | inode_ac, &suballoc_loc, | ||
495 | &suballoc_bit, &fe_blkno); | ||
496 | if (status < 0) { | ||
497 | mlog_errno(status); | ||
498 | goto leave; | ||
499 | } | ||
500 | |||
501 | /* populate as many fields early on as possible - many of | 492 | /* populate as many fields early on as possible - many of |
502 | * these are used by the support functions here and in | 493 | * these are used by the support functions here and in |
503 | * callers. */ | 494 | * callers. */ |
@@ -591,6 +582,34 @@ leave: | |||
591 | return status; | 582 | return status; |
592 | } | 583 | } |
593 | 584 | ||
585 | static int ocfs2_mknod_locked(struct ocfs2_super *osb, | ||
586 | struct inode *dir, | ||
587 | struct inode *inode, | ||
588 | dev_t dev, | ||
589 | struct buffer_head **new_fe_bh, | ||
590 | struct buffer_head *parent_fe_bh, | ||
591 | handle_t *handle, | ||
592 | struct ocfs2_alloc_context *inode_ac) | ||
593 | { | ||
594 | int status = 0; | ||
595 | u64 suballoc_loc, fe_blkno = 0; | ||
596 | u16 suballoc_bit; | ||
597 | |||
598 | *new_fe_bh = NULL; | ||
599 | |||
600 | status = ocfs2_claim_new_inode(handle, dir, parent_fe_bh, | ||
601 | inode_ac, &suballoc_loc, | ||
602 | &suballoc_bit, &fe_blkno); | ||
603 | if (status < 0) { | ||
604 | mlog_errno(status); | ||
605 | return status; | ||
606 | } | ||
607 | |||
608 | return __ocfs2_mknod_locked(dir, inode, dev, new_fe_bh, | ||
609 | parent_fe_bh, handle, inode_ac, | ||
610 | fe_blkno, suballoc_loc, suballoc_bit); | ||
611 | } | ||
612 | |||
594 | static int ocfs2_mkdir(struct inode *dir, | 613 | static int ocfs2_mkdir(struct inode *dir, |
595 | struct dentry *dentry, | 614 | struct dentry *dentry, |
596 | int mode) | 615 | int mode) |
@@ -1852,61 +1871,117 @@ bail: | |||
1852 | return status; | 1871 | return status; |
1853 | } | 1872 | } |
1854 | 1873 | ||
1855 | static int ocfs2_prepare_orphan_dir(struct ocfs2_super *osb, | 1874 | static int ocfs2_lookup_lock_orphan_dir(struct ocfs2_super *osb, |
1856 | struct inode **ret_orphan_dir, | 1875 | struct inode **ret_orphan_dir, |
1857 | u64 blkno, | 1876 | struct buffer_head **ret_orphan_dir_bh) |
1858 | char *name, | ||
1859 | struct ocfs2_dir_lookup_result *lookup) | ||
1860 | { | 1877 | { |
1861 | struct inode *orphan_dir_inode; | 1878 | struct inode *orphan_dir_inode; |
1862 | struct buffer_head *orphan_dir_bh = NULL; | 1879 | struct buffer_head *orphan_dir_bh = NULL; |
1863 | int status = 0; | 1880 | int ret = 0; |
1864 | |||
1865 | status = ocfs2_blkno_stringify(blkno, name); | ||
1866 | if (status < 0) { | ||
1867 | mlog_errno(status); | ||
1868 | return status; | ||
1869 | } | ||
1870 | 1881 | ||
1871 | orphan_dir_inode = ocfs2_get_system_file_inode(osb, | 1882 | orphan_dir_inode = ocfs2_get_system_file_inode(osb, |
1872 | ORPHAN_DIR_SYSTEM_INODE, | 1883 | ORPHAN_DIR_SYSTEM_INODE, |
1873 | osb->slot_num); | 1884 | osb->slot_num); |
1874 | if (!orphan_dir_inode) { | 1885 | if (!orphan_dir_inode) { |
1875 | status = -ENOENT; | 1886 | ret = -ENOENT; |
1876 | mlog_errno(status); | 1887 | mlog_errno(ret); |
1877 | return status; | 1888 | return ret; |
1878 | } | 1889 | } |
1879 | 1890 | ||
1880 | mutex_lock(&orphan_dir_inode->i_mutex); | 1891 | mutex_lock(&orphan_dir_inode->i_mutex); |
1881 | 1892 | ||
1882 | status = ocfs2_inode_lock(orphan_dir_inode, &orphan_dir_bh, 1); | 1893 | ret = ocfs2_inode_lock(orphan_dir_inode, &orphan_dir_bh, 1); |
1883 | if (status < 0) { | 1894 | if (ret < 0) { |
1884 | mlog_errno(status); | 1895 | mutex_unlock(&orphan_dir_inode->i_mutex); |
1885 | goto leave; | 1896 | iput(orphan_dir_inode); |
1897 | |||
1898 | mlog_errno(ret); | ||
1899 | return ret; | ||
1886 | } | 1900 | } |
1887 | 1901 | ||
1888 | status = ocfs2_prepare_dir_for_insert(osb, orphan_dir_inode, | 1902 | *ret_orphan_dir = orphan_dir_inode; |
1889 | orphan_dir_bh, name, | 1903 | *ret_orphan_dir_bh = orphan_dir_bh; |
1890 | OCFS2_ORPHAN_NAMELEN, lookup); | ||
1891 | if (status < 0) { | ||
1892 | ocfs2_inode_unlock(orphan_dir_inode, 1); | ||
1893 | 1904 | ||
1894 | mlog_errno(status); | 1905 | return 0; |
1895 | goto leave; | 1906 | } |
1907 | |||
1908 | static int __ocfs2_prepare_orphan_dir(struct inode *orphan_dir_inode, | ||
1909 | struct buffer_head *orphan_dir_bh, | ||
1910 | u64 blkno, | ||
1911 | char *name, | ||
1912 | struct ocfs2_dir_lookup_result *lookup) | ||
1913 | { | ||
1914 | int ret; | ||
1915 | struct ocfs2_super *osb = OCFS2_SB(orphan_dir_inode->i_sb); | ||
1916 | |||
1917 | ret = ocfs2_blkno_stringify(blkno, name); | ||
1918 | if (ret < 0) { | ||
1919 | mlog_errno(ret); | ||
1920 | return ret; | ||
1921 | } | ||
1922 | |||
1923 | ret = ocfs2_prepare_dir_for_insert(osb, orphan_dir_inode, | ||
1924 | orphan_dir_bh, name, | ||
1925 | OCFS2_ORPHAN_NAMELEN, lookup); | ||
1926 | if (ret < 0) { | ||
1927 | mlog_errno(ret); | ||
1928 | return ret; | ||
1929 | } | ||
1930 | |||
1931 | return 0; | ||
1932 | } | ||
1933 | |||
1934 | /** | ||
1935 | * ocfs2_prepare_orphan_dir() - Prepare an orphan directory for | ||
1936 | * insertion of an orphan. | ||
1937 | * @osb: ocfs2 file system | ||
1938 | * @ret_orphan_dir: Orphan dir inode - returned locked! | ||
1939 | * @blkno: Actual block number of the inode to be inserted into orphan dir. | ||
1940 | * @lookup: dir lookup result, to be passed back into functions like | ||
1941 | * ocfs2_orphan_add | ||
1942 | * | ||
1943 | * Returns zero on success and the ret_orphan_dir, name and lookup | ||
1944 | * fields will be populated. | ||
1945 | * | ||
1946 | * Returns non-zero on failure. | ||
1947 | */ | ||
1948 | static int ocfs2_prepare_orphan_dir(struct ocfs2_super *osb, | ||
1949 | struct inode **ret_orphan_dir, | ||
1950 | u64 blkno, | ||
1951 | char *name, | ||
1952 | struct ocfs2_dir_lookup_result *lookup) | ||
1953 | { | ||
1954 | struct inode *orphan_dir_inode = NULL; | ||
1955 | struct buffer_head *orphan_dir_bh = NULL; | ||
1956 | int ret = 0; | ||
1957 | |||
1958 | ret = ocfs2_lookup_lock_orphan_dir(osb, &orphan_dir_inode, | ||
1959 | &orphan_dir_bh); | ||
1960 | if (ret < 0) { | ||
1961 | mlog_errno(ret); | ||
1962 | return ret; | ||
1963 | } | ||
1964 | |||
1965 | ret = __ocfs2_prepare_orphan_dir(orphan_dir_inode, orphan_dir_bh, | ||
1966 | blkno, name, lookup); | ||
1967 | if (ret < 0) { | ||
1968 | mlog_errno(ret); | ||
1969 | goto out; | ||
1896 | } | 1970 | } |
1897 | 1971 | ||
1898 | *ret_orphan_dir = orphan_dir_inode; | 1972 | *ret_orphan_dir = orphan_dir_inode; |
1899 | 1973 | ||
1900 | leave: | 1974 | out: |
1901 | if (status) { | 1975 | brelse(orphan_dir_bh); |
1976 | |||
1977 | if (ret) { | ||
1978 | ocfs2_inode_unlock(orphan_dir_inode, 1); | ||
1902 | mutex_unlock(&orphan_dir_inode->i_mutex); | 1979 | mutex_unlock(&orphan_dir_inode->i_mutex); |
1903 | iput(orphan_dir_inode); | 1980 | iput(orphan_dir_inode); |
1904 | } | 1981 | } |
1905 | 1982 | ||
1906 | brelse(orphan_dir_bh); | 1983 | mlog_exit(ret); |
1907 | 1984 | return ret; | |
1908 | mlog_exit(status); | ||
1909 | return status; | ||
1910 | } | 1985 | } |
1911 | 1986 | ||
1912 | static int ocfs2_orphan_add(struct ocfs2_super *osb, | 1987 | static int ocfs2_orphan_add(struct ocfs2_super *osb, |
@@ -2053,6 +2128,99 @@ leave: | |||
2053 | return status; | 2128 | return status; |
2054 | } | 2129 | } |
2055 | 2130 | ||
2131 | /** | ||
2132 | * ocfs2_prep_new_orphaned_file() - Prepare the orphan dir to recieve a newly | ||
2133 | * allocated file. This is different from the typical 'add to orphan dir' | ||
2134 | * operation in that the inode does not yet exist. This is a problem because | ||
2135 | * the orphan dir stringifies the inode block number to come up with it's | ||
2136 | * dirent. Obviously if the inode does not yet exist we have a chicken and egg | ||
2137 | * problem. This function works around it by calling deeper into the orphan | ||
2138 | * and suballoc code than other callers. Use this only by necessity. | ||
2139 | * @dir: The directory which this inode will ultimately wind up under - not the | ||
2140 | * orphan dir! | ||
2141 | * @dir_bh: buffer_head the @dir inode block | ||
2142 | * @orphan_name: string of length (CFS2_ORPHAN_NAMELEN + 1). Will be filled | ||
2143 | * with the string to be used for orphan dirent. Pass back to the orphan dir | ||
2144 | * code. | ||
2145 | * @ret_orphan_dir: orphan dir inode returned to be passed back into orphan | ||
2146 | * dir code. | ||
2147 | * @ret_di_blkno: block number where the new inode will be allocated. | ||
2148 | * @orphan_insert: Dir insert context to be passed back into orphan dir code. | ||
2149 | * @ret_inode_ac: Inode alloc context to be passed back to the allocator. | ||
2150 | * | ||
2151 | * Returns zero on success and the ret_orphan_dir, name and lookup | ||
2152 | * fields will be populated. | ||
2153 | * | ||
2154 | * Returns non-zero on failure. | ||
2155 | */ | ||
2156 | static int ocfs2_prep_new_orphaned_file(struct inode *dir, | ||
2157 | struct buffer_head *dir_bh, | ||
2158 | char *orphan_name, | ||
2159 | struct inode **ret_orphan_dir, | ||
2160 | u64 *ret_di_blkno, | ||
2161 | struct ocfs2_dir_lookup_result *orphan_insert, | ||
2162 | struct ocfs2_alloc_context **ret_inode_ac) | ||
2163 | { | ||
2164 | int ret; | ||
2165 | u64 di_blkno; | ||
2166 | struct ocfs2_super *osb = OCFS2_SB(dir->i_sb); | ||
2167 | struct inode *orphan_dir = NULL; | ||
2168 | struct buffer_head *orphan_dir_bh = NULL; | ||
2169 | struct ocfs2_alloc_context *inode_ac = NULL; | ||
2170 | |||
2171 | ret = ocfs2_lookup_lock_orphan_dir(osb, &orphan_dir, &orphan_dir_bh); | ||
2172 | if (ret < 0) { | ||
2173 | mlog_errno(ret); | ||
2174 | return ret; | ||
2175 | } | ||
2176 | |||
2177 | /* reserve an inode spot */ | ||
2178 | ret = ocfs2_reserve_new_inode(osb, &inode_ac); | ||
2179 | if (ret < 0) { | ||
2180 | if (ret != -ENOSPC) | ||
2181 | mlog_errno(ret); | ||
2182 | goto out; | ||
2183 | } | ||
2184 | |||
2185 | ret = ocfs2_find_new_inode_loc(dir, dir_bh, inode_ac, | ||
2186 | &di_blkno); | ||
2187 | if (ret) { | ||
2188 | mlog_errno(ret); | ||
2189 | goto out; | ||
2190 | } | ||
2191 | |||
2192 | ret = __ocfs2_prepare_orphan_dir(orphan_dir, orphan_dir_bh, | ||
2193 | di_blkno, orphan_name, orphan_insert); | ||
2194 | if (ret < 0) { | ||
2195 | mlog_errno(ret); | ||
2196 | goto out; | ||
2197 | } | ||
2198 | |||
2199 | out: | ||
2200 | if (ret == 0) { | ||
2201 | *ret_orphan_dir = orphan_dir; | ||
2202 | *ret_di_blkno = di_blkno; | ||
2203 | *ret_inode_ac = inode_ac; | ||
2204 | /* | ||
2205 | * orphan_name and orphan_insert are already up to | ||
2206 | * date via prepare_orphan_dir | ||
2207 | */ | ||
2208 | } else { | ||
2209 | /* Unroll reserve_new_inode* */ | ||
2210 | if (inode_ac) | ||
2211 | ocfs2_free_alloc_context(inode_ac); | ||
2212 | |||
2213 | /* Unroll orphan dir locking */ | ||
2214 | mutex_unlock(&orphan_dir->i_mutex); | ||
2215 | ocfs2_inode_unlock(orphan_dir, 1); | ||
2216 | iput(orphan_dir); | ||
2217 | } | ||
2218 | |||
2219 | brelse(orphan_dir_bh); | ||
2220 | |||
2221 | return 0; | ||
2222 | } | ||
2223 | |||
2056 | int ocfs2_create_inode_in_orphan(struct inode *dir, | 2224 | int ocfs2_create_inode_in_orphan(struct inode *dir, |
2057 | int mode, | 2225 | int mode, |
2058 | struct inode **new_inode) | 2226 | struct inode **new_inode) |
@@ -2068,6 +2236,8 @@ int ocfs2_create_inode_in_orphan(struct inode *dir, | |||
2068 | struct buffer_head *new_di_bh = NULL; | 2236 | struct buffer_head *new_di_bh = NULL; |
2069 | struct ocfs2_alloc_context *inode_ac = NULL; | 2237 | struct ocfs2_alloc_context *inode_ac = NULL; |
2070 | struct ocfs2_dir_lookup_result orphan_insert = { NULL, }; | 2238 | struct ocfs2_dir_lookup_result orphan_insert = { NULL, }; |
2239 | u64 uninitialized_var(di_blkno), suballoc_loc; | ||
2240 | u16 suballoc_bit; | ||
2071 | 2241 | ||
2072 | status = ocfs2_inode_lock(dir, &parent_di_bh, 1); | 2242 | status = ocfs2_inode_lock(dir, &parent_di_bh, 1); |
2073 | if (status < 0) { | 2243 | if (status < 0) { |
@@ -2076,20 +2246,9 @@ int ocfs2_create_inode_in_orphan(struct inode *dir, | |||
2076 | return status; | 2246 | return status; |
2077 | } | 2247 | } |
2078 | 2248 | ||
2079 | /* | 2249 | status = ocfs2_prep_new_orphaned_file(dir, parent_di_bh, |
2080 | * We give the orphan dir the root blkno to fake an orphan name, | 2250 | orphan_name, &orphan_dir, |
2081 | * and allocate enough space for our insertion. | 2251 | &di_blkno, &orphan_insert, &inode_ac); |
2082 | */ | ||
2083 | status = ocfs2_prepare_orphan_dir(osb, &orphan_dir, | ||
2084 | osb->root_blkno, | ||
2085 | orphan_name, &orphan_insert); | ||
2086 | if (status < 0) { | ||
2087 | mlog_errno(status); | ||
2088 | goto leave; | ||
2089 | } | ||
2090 | |||
2091 | /* reserve an inode spot */ | ||
2092 | status = ocfs2_reserve_new_inode(osb, &inode_ac); | ||
2093 | if (status < 0) { | 2252 | if (status < 0) { |
2094 | if (status != -ENOSPC) | 2253 | if (status != -ENOSPC) |
2095 | mlog_errno(status); | 2254 | mlog_errno(status); |
@@ -2116,17 +2275,20 @@ int ocfs2_create_inode_in_orphan(struct inode *dir, | |||
2116 | goto leave; | 2275 | goto leave; |
2117 | did_quota_inode = 1; | 2276 | did_quota_inode = 1; |
2118 | 2277 | ||
2119 | inode->i_nlink = 0; | 2278 | status = ocfs2_claim_new_inode_at_loc(handle, dir, inode_ac, |
2120 | /* do the real work now. */ | 2279 | &suballoc_loc, |
2121 | status = ocfs2_mknod_locked(osb, dir, inode, | 2280 | &suballoc_bit, di_blkno); |
2122 | 0, &new_di_bh, parent_di_bh, handle, | ||
2123 | inode_ac); | ||
2124 | if (status < 0) { | 2281 | if (status < 0) { |
2125 | mlog_errno(status); | 2282 | mlog_errno(status); |
2126 | goto leave; | 2283 | goto leave; |
2127 | } | 2284 | } |
2128 | 2285 | ||
2129 | status = ocfs2_blkno_stringify(OCFS2_I(inode)->ip_blkno, orphan_name); | 2286 | inode->i_nlink = 0; |
2287 | /* do the real work now. */ | ||
2288 | status = __ocfs2_mknod_locked(dir, inode, | ||
2289 | 0, &new_di_bh, parent_di_bh, handle, | ||
2290 | inode_ac, di_blkno, suballoc_loc, | ||
2291 | suballoc_bit); | ||
2130 | if (status < 0) { | 2292 | if (status < 0) { |
2131 | mlog_errno(status); | 2293 | mlog_errno(status); |
2132 | goto leave; | 2294 | goto leave; |
diff --git a/fs/ocfs2/ocfs2.h b/fs/ocfs2/ocfs2.h index c67003b6b5a2..65739b3b3276 100644 --- a/fs/ocfs2/ocfs2.h +++ b/fs/ocfs2/ocfs2.h | |||
@@ -150,26 +150,33 @@ typedef void (*ocfs2_lock_callback)(int status, unsigned long data); | |||
150 | struct ocfs2_lock_res { | 150 | struct ocfs2_lock_res { |
151 | void *l_priv; | 151 | void *l_priv; |
152 | struct ocfs2_lock_res_ops *l_ops; | 152 | struct ocfs2_lock_res_ops *l_ops; |
153 | spinlock_t l_lock; | 153 | |
154 | 154 | ||
155 | struct list_head l_blocked_list; | 155 | struct list_head l_blocked_list; |
156 | struct list_head l_mask_waiters; | 156 | struct list_head l_mask_waiters; |
157 | 157 | ||
158 | enum ocfs2_lock_type l_type; | ||
159 | unsigned long l_flags; | 158 | unsigned long l_flags; |
160 | char l_name[OCFS2_LOCK_ID_MAX_LEN]; | 159 | char l_name[OCFS2_LOCK_ID_MAX_LEN]; |
161 | int l_level; | ||
162 | unsigned int l_ro_holders; | 160 | unsigned int l_ro_holders; |
163 | unsigned int l_ex_holders; | 161 | unsigned int l_ex_holders; |
164 | struct ocfs2_dlm_lksb l_lksb; | 162 | unsigned char l_level; |
163 | |||
164 | /* Data packed - type enum ocfs2_lock_type */ | ||
165 | unsigned char l_type; | ||
165 | 166 | ||
166 | /* used from AST/BAST funcs. */ | 167 | /* used from AST/BAST funcs. */ |
167 | enum ocfs2_ast_action l_action; | 168 | /* Data packed - enum type ocfs2_ast_action */ |
168 | enum ocfs2_unlock_action l_unlock_action; | 169 | unsigned char l_action; |
169 | int l_requested; | 170 | /* Data packed - enum type ocfs2_unlock_action */ |
170 | int l_blocking; | 171 | unsigned char l_unlock_action; |
172 | unsigned char l_requested; | ||
173 | unsigned char l_blocking; | ||
171 | unsigned int l_pending_gen; | 174 | unsigned int l_pending_gen; |
172 | 175 | ||
176 | spinlock_t l_lock; | ||
177 | |||
178 | struct ocfs2_dlm_lksb l_lksb; | ||
179 | |||
173 | wait_queue_head_t l_event; | 180 | wait_queue_head_t l_event; |
174 | 181 | ||
175 | struct list_head l_debug_list; | 182 | struct list_head l_debug_list; |
diff --git a/fs/ocfs2/ocfs2_ioctl.h b/fs/ocfs2/ocfs2_ioctl.h index 2d3420af1a83..9bc535499868 100644 --- a/fs/ocfs2/ocfs2_ioctl.h +++ b/fs/ocfs2/ocfs2_ioctl.h | |||
@@ -76,4 +76,99 @@ struct reflink_arguments { | |||
76 | }; | 76 | }; |
77 | #define OCFS2_IOC_REFLINK _IOW('o', 4, struct reflink_arguments) | 77 | #define OCFS2_IOC_REFLINK _IOW('o', 4, struct reflink_arguments) |
78 | 78 | ||
79 | /* Following definitions dedicated for ocfs2_info_request ioctls. */ | ||
80 | #define OCFS2_INFO_MAX_REQUEST (50) | ||
81 | #define OCFS2_TEXT_UUID_LEN (OCFS2_VOL_UUID_LEN * 2) | ||
82 | |||
83 | /* Magic number of all requests */ | ||
84 | #define OCFS2_INFO_MAGIC (0x4F32494E) | ||
85 | |||
86 | /* | ||
87 | * Always try to separate info request into small pieces to | ||
88 | * guarantee the backward&forward compatibility. | ||
89 | */ | ||
90 | struct ocfs2_info { | ||
91 | __u64 oi_requests; /* Array of __u64 pointers to requests */ | ||
92 | __u32 oi_count; /* Number of requests in info_requests */ | ||
93 | __u32 oi_pad; | ||
94 | }; | ||
95 | |||
96 | struct ocfs2_info_request { | ||
97 | /*00*/ __u32 ir_magic; /* Magic number */ | ||
98 | __u32 ir_code; /* Info request code */ | ||
99 | __u32 ir_size; /* Size of request */ | ||
100 | __u32 ir_flags; /* Request flags */ | ||
101 | /*10*/ /* Request specific fields */ | ||
102 | }; | ||
103 | |||
104 | struct ocfs2_info_clustersize { | ||
105 | struct ocfs2_info_request ic_req; | ||
106 | __u32 ic_clustersize; | ||
107 | __u32 ic_pad; | ||
108 | }; | ||
109 | |||
110 | struct ocfs2_info_blocksize { | ||
111 | struct ocfs2_info_request ib_req; | ||
112 | __u32 ib_blocksize; | ||
113 | __u32 ib_pad; | ||
114 | }; | ||
115 | |||
116 | struct ocfs2_info_maxslots { | ||
117 | struct ocfs2_info_request im_req; | ||
118 | __u32 im_max_slots; | ||
119 | __u32 im_pad; | ||
120 | }; | ||
121 | |||
122 | struct ocfs2_info_label { | ||
123 | struct ocfs2_info_request il_req; | ||
124 | __u8 il_label[OCFS2_MAX_VOL_LABEL_LEN]; | ||
125 | } __attribute__ ((packed)); | ||
126 | |||
127 | struct ocfs2_info_uuid { | ||
128 | struct ocfs2_info_request iu_req; | ||
129 | __u8 iu_uuid_str[OCFS2_TEXT_UUID_LEN + 1]; | ||
130 | } __attribute__ ((packed)); | ||
131 | |||
132 | struct ocfs2_info_fs_features { | ||
133 | struct ocfs2_info_request if_req; | ||
134 | __u32 if_compat_features; | ||
135 | __u32 if_incompat_features; | ||
136 | __u32 if_ro_compat_features; | ||
137 | __u32 if_pad; | ||
138 | }; | ||
139 | |||
140 | struct ocfs2_info_journal_size { | ||
141 | struct ocfs2_info_request ij_req; | ||
142 | __u64 ij_journal_size; | ||
143 | }; | ||
144 | |||
145 | /* Codes for ocfs2_info_request */ | ||
146 | enum ocfs2_info_type { | ||
147 | OCFS2_INFO_CLUSTERSIZE = 1, | ||
148 | OCFS2_INFO_BLOCKSIZE, | ||
149 | OCFS2_INFO_MAXSLOTS, | ||
150 | OCFS2_INFO_LABEL, | ||
151 | OCFS2_INFO_UUID, | ||
152 | OCFS2_INFO_FS_FEATURES, | ||
153 | OCFS2_INFO_JOURNAL_SIZE, | ||
154 | OCFS2_INFO_NUM_TYPES | ||
155 | }; | ||
156 | |||
157 | /* Flags for struct ocfs2_info_request */ | ||
158 | /* Filled by the caller */ | ||
159 | #define OCFS2_INFO_FL_NON_COHERENT (0x00000001) /* Cluster coherency not | ||
160 | required. This is a hint. | ||
161 | It is up to ocfs2 whether | ||
162 | the request can be fulfilled | ||
163 | without locking. */ | ||
164 | /* Filled by ocfs2 */ | ||
165 | #define OCFS2_INFO_FL_FILLED (0x40000000) /* Filesystem understood | ||
166 | this request and | ||
167 | filled in the answer */ | ||
168 | |||
169 | #define OCFS2_INFO_FL_ERROR (0x80000000) /* Error happened during | ||
170 | request handling. */ | ||
171 | |||
172 | #define OCFS2_IOC_INFO _IOR('o', 5, struct ocfs2_info) | ||
173 | |||
79 | #endif /* OCFS2_IOCTL_H */ | 174 | #endif /* OCFS2_IOCTL_H */ |
diff --git a/fs/ocfs2/refcounttree.c b/fs/ocfs2/refcounttree.c index 47549f64224c..a120cfcf69bf 100644 --- a/fs/ocfs2/refcounttree.c +++ b/fs/ocfs2/refcounttree.c | |||
@@ -2437,16 +2437,26 @@ static int ocfs2_calc_refcount_meta_credits(struct super_block *sb, | |||
2437 | len = min((u64)cpos + clusters, le64_to_cpu(rec.r_cpos) + | 2437 | len = min((u64)cpos + clusters, le64_to_cpu(rec.r_cpos) + |
2438 | le32_to_cpu(rec.r_clusters)) - cpos; | 2438 | le32_to_cpu(rec.r_clusters)) - cpos; |
2439 | /* | 2439 | /* |
2440 | * If the refcount rec already exist, cool. We just need | ||
2441 | * to check whether there is a split. Otherwise we just need | ||
2442 | * to increase the refcount. | ||
2443 | * If we will insert one, increases recs_add. | ||
2444 | * | ||
2445 | * We record all the records which will be inserted to the | 2440 | * We record all the records which will be inserted to the |
2446 | * same refcount block, so that we can tell exactly whether | 2441 | * same refcount block, so that we can tell exactly whether |
2447 | * we need a new refcount block or not. | 2442 | * we need a new refcount block or not. |
2443 | * | ||
2444 | * If we will insert a new one, this is easy and only happens | ||
2445 | * during adding refcounted flag to the extent, so we don't | ||
2446 | * have a chance of spliting. We just need one record. | ||
2447 | * | ||
2448 | * If the refcount rec already exists, that would be a little | ||
2449 | * complicated. we may have to: | ||
2450 | * 1) split at the beginning if the start pos isn't aligned. | ||
2451 | * we need 1 more record in this case. | ||
2452 | * 2) split int the end if the end pos isn't aligned. | ||
2453 | * we need 1 more record in this case. | ||
2454 | * 3) split in the middle because of file system fragmentation. | ||
2455 | * we need 2 more records in this case(we can't detect this | ||
2456 | * beforehand, so always think of the worst case). | ||
2448 | */ | 2457 | */ |
2449 | if (rec.r_refcount) { | 2458 | if (rec.r_refcount) { |
2459 | recs_add += 2; | ||
2450 | /* Check whether we need a split at the beginning. */ | 2460 | /* Check whether we need a split at the beginning. */ |
2451 | if (cpos == start_cpos && | 2461 | if (cpos == start_cpos && |
2452 | cpos != le64_to_cpu(rec.r_cpos)) | 2462 | cpos != le64_to_cpu(rec.r_cpos)) |
@@ -2954,7 +2964,7 @@ static int ocfs2_duplicate_clusters_by_page(handle_t *handle, | |||
2954 | if (map_end & (PAGE_CACHE_SIZE - 1)) | 2964 | if (map_end & (PAGE_CACHE_SIZE - 1)) |
2955 | to = map_end & (PAGE_CACHE_SIZE - 1); | 2965 | to = map_end & (PAGE_CACHE_SIZE - 1); |
2956 | 2966 | ||
2957 | page = grab_cache_page(mapping, page_index); | 2967 | page = find_or_create_page(mapping, page_index, GFP_NOFS); |
2958 | 2968 | ||
2959 | /* | 2969 | /* |
2960 | * In case PAGE_CACHE_SIZE <= CLUSTER_SIZE, This page | 2970 | * In case PAGE_CACHE_SIZE <= CLUSTER_SIZE, This page |
@@ -3181,7 +3191,8 @@ static int ocfs2_cow_sync_writeback(struct super_block *sb, | |||
3181 | if (map_end > end) | 3191 | if (map_end > end) |
3182 | map_end = end; | 3192 | map_end = end; |
3183 | 3193 | ||
3184 | page = grab_cache_page(context->inode->i_mapping, page_index); | 3194 | page = find_or_create_page(context->inode->i_mapping, |
3195 | page_index, GFP_NOFS); | ||
3185 | BUG_ON(!page); | 3196 | BUG_ON(!page); |
3186 | 3197 | ||
3187 | wait_on_page_writeback(page); | 3198 | wait_on_page_writeback(page); |
diff --git a/fs/ocfs2/refcounttree.h b/fs/ocfs2/refcounttree.h index 29cba0eaa927..c8ce46f7d8e3 100644 --- a/fs/ocfs2/refcounttree.h +++ b/fs/ocfs2/refcounttree.h | |||
@@ -21,14 +21,14 @@ struct ocfs2_refcount_tree { | |||
21 | struct rb_node rf_node; | 21 | struct rb_node rf_node; |
22 | u64 rf_blkno; | 22 | u64 rf_blkno; |
23 | u32 rf_generation; | 23 | u32 rf_generation; |
24 | struct kref rf_getcnt; | ||
24 | struct rw_semaphore rf_sem; | 25 | struct rw_semaphore rf_sem; |
25 | struct ocfs2_lock_res rf_lockres; | 26 | struct ocfs2_lock_res rf_lockres; |
26 | struct kref rf_getcnt; | ||
27 | int rf_removed; | 27 | int rf_removed; |
28 | 28 | ||
29 | /* the following 4 fields are used by caching_info. */ | 29 | /* the following 4 fields are used by caching_info. */ |
30 | struct ocfs2_caching_info rf_ci; | ||
31 | spinlock_t rf_lock; | 30 | spinlock_t rf_lock; |
31 | struct ocfs2_caching_info rf_ci; | ||
32 | struct mutex rf_io_mutex; | 32 | struct mutex rf_io_mutex; |
33 | struct super_block *rf_sb; | 33 | struct super_block *rf_sb; |
34 | }; | 34 | }; |
diff --git a/fs/ocfs2/suballoc.c b/fs/ocfs2/suballoc.c index a8e6a95a353f..8a286f54dca1 100644 --- a/fs/ocfs2/suballoc.c +++ b/fs/ocfs2/suballoc.c | |||
@@ -57,11 +57,28 @@ struct ocfs2_suballoc_result { | |||
57 | u64 sr_bg_blkno; /* The bg we allocated from. Set | 57 | u64 sr_bg_blkno; /* The bg we allocated from. Set |
58 | to 0 when a block group is | 58 | to 0 when a block group is |
59 | contiguous. */ | 59 | contiguous. */ |
60 | u64 sr_bg_stable_blkno; /* | ||
61 | * Doesn't change, always | ||
62 | * set to target block | ||
63 | * group descriptor | ||
64 | * block. | ||
65 | */ | ||
60 | u64 sr_blkno; /* The first allocated block */ | 66 | u64 sr_blkno; /* The first allocated block */ |
61 | unsigned int sr_bit_offset; /* The bit in the bg */ | 67 | unsigned int sr_bit_offset; /* The bit in the bg */ |
62 | unsigned int sr_bits; /* How many bits we claimed */ | 68 | unsigned int sr_bits; /* How many bits we claimed */ |
63 | }; | 69 | }; |
64 | 70 | ||
71 | static u64 ocfs2_group_from_res(struct ocfs2_suballoc_result *res) | ||
72 | { | ||
73 | if (res->sr_blkno == 0) | ||
74 | return 0; | ||
75 | |||
76 | if (res->sr_bg_blkno) | ||
77 | return res->sr_bg_blkno; | ||
78 | |||
79 | return ocfs2_which_suballoc_group(res->sr_blkno, res->sr_bit_offset); | ||
80 | } | ||
81 | |||
65 | static inline void ocfs2_debug_bg(struct ocfs2_group_desc *bg); | 82 | static inline void ocfs2_debug_bg(struct ocfs2_group_desc *bg); |
66 | static inline void ocfs2_debug_suballoc_inode(struct ocfs2_dinode *fe); | 83 | static inline void ocfs2_debug_suballoc_inode(struct ocfs2_dinode *fe); |
67 | static inline u16 ocfs2_find_victim_chain(struct ocfs2_chain_list *cl); | 84 | static inline u16 ocfs2_find_victim_chain(struct ocfs2_chain_list *cl); |
@@ -138,6 +155,10 @@ void ocfs2_free_ac_resource(struct ocfs2_alloc_context *ac) | |||
138 | brelse(ac->ac_bh); | 155 | brelse(ac->ac_bh); |
139 | ac->ac_bh = NULL; | 156 | ac->ac_bh = NULL; |
140 | ac->ac_resv = NULL; | 157 | ac->ac_resv = NULL; |
158 | if (ac->ac_find_loc_priv) { | ||
159 | kfree(ac->ac_find_loc_priv); | ||
160 | ac->ac_find_loc_priv = NULL; | ||
161 | } | ||
141 | } | 162 | } |
142 | 163 | ||
143 | void ocfs2_free_alloc_context(struct ocfs2_alloc_context *ac) | 164 | void ocfs2_free_alloc_context(struct ocfs2_alloc_context *ac) |
@@ -1678,6 +1699,15 @@ static int ocfs2_search_one_group(struct ocfs2_alloc_context *ac, | |||
1678 | if (!ret) | 1699 | if (!ret) |
1679 | ocfs2_bg_discontig_fix_result(ac, gd, res); | 1700 | ocfs2_bg_discontig_fix_result(ac, gd, res); |
1680 | 1701 | ||
1702 | /* | ||
1703 | * sr_bg_blkno might have been changed by | ||
1704 | * ocfs2_bg_discontig_fix_result | ||
1705 | */ | ||
1706 | res->sr_bg_stable_blkno = group_bh->b_blocknr; | ||
1707 | |||
1708 | if (ac->ac_find_loc_only) | ||
1709 | goto out_loc_only; | ||
1710 | |||
1681 | ret = ocfs2_alloc_dinode_update_counts(alloc_inode, handle, ac->ac_bh, | 1711 | ret = ocfs2_alloc_dinode_update_counts(alloc_inode, handle, ac->ac_bh, |
1682 | res->sr_bits, | 1712 | res->sr_bits, |
1683 | le16_to_cpu(gd->bg_chain)); | 1713 | le16_to_cpu(gd->bg_chain)); |
@@ -1691,6 +1721,7 @@ static int ocfs2_search_one_group(struct ocfs2_alloc_context *ac, | |||
1691 | if (ret < 0) | 1721 | if (ret < 0) |
1692 | mlog_errno(ret); | 1722 | mlog_errno(ret); |
1693 | 1723 | ||
1724 | out_loc_only: | ||
1694 | *bits_left = le16_to_cpu(gd->bg_free_bits_count); | 1725 | *bits_left = le16_to_cpu(gd->bg_free_bits_count); |
1695 | 1726 | ||
1696 | out: | 1727 | out: |
@@ -1708,7 +1739,6 @@ static int ocfs2_search_chain(struct ocfs2_alloc_context *ac, | |||
1708 | { | 1739 | { |
1709 | int status; | 1740 | int status; |
1710 | u16 chain; | 1741 | u16 chain; |
1711 | u32 tmp_used; | ||
1712 | u64 next_group; | 1742 | u64 next_group; |
1713 | struct inode *alloc_inode = ac->ac_inode; | 1743 | struct inode *alloc_inode = ac->ac_inode; |
1714 | struct buffer_head *group_bh = NULL; | 1744 | struct buffer_head *group_bh = NULL; |
@@ -1770,6 +1800,11 @@ static int ocfs2_search_chain(struct ocfs2_alloc_context *ac, | |||
1770 | if (!status) | 1800 | if (!status) |
1771 | ocfs2_bg_discontig_fix_result(ac, bg, res); | 1801 | ocfs2_bg_discontig_fix_result(ac, bg, res); |
1772 | 1802 | ||
1803 | /* | ||
1804 | * sr_bg_blkno might have been changed by | ||
1805 | * ocfs2_bg_discontig_fix_result | ||
1806 | */ | ||
1807 | res->sr_bg_stable_blkno = group_bh->b_blocknr; | ||
1773 | 1808 | ||
1774 | /* | 1809 | /* |
1775 | * Keep track of previous block descriptor read. When | 1810 | * Keep track of previous block descriptor read. When |
@@ -1796,22 +1831,17 @@ static int ocfs2_search_chain(struct ocfs2_alloc_context *ac, | |||
1796 | } | 1831 | } |
1797 | } | 1832 | } |
1798 | 1833 | ||
1799 | /* Ok, claim our bits now: set the info on dinode, chainlist | 1834 | if (ac->ac_find_loc_only) |
1800 | * and then the group */ | 1835 | goto out_loc_only; |
1801 | status = ocfs2_journal_access_di(handle, | 1836 | |
1802 | INODE_CACHE(alloc_inode), | 1837 | status = ocfs2_alloc_dinode_update_counts(alloc_inode, handle, |
1803 | ac->ac_bh, | 1838 | ac->ac_bh, res->sr_bits, |
1804 | OCFS2_JOURNAL_ACCESS_WRITE); | 1839 | chain); |
1805 | if (status < 0) { | 1840 | if (status) { |
1806 | mlog_errno(status); | 1841 | mlog_errno(status); |
1807 | goto bail; | 1842 | goto bail; |
1808 | } | 1843 | } |
1809 | 1844 | ||
1810 | tmp_used = le32_to_cpu(fe->id1.bitmap1.i_used); | ||
1811 | fe->id1.bitmap1.i_used = cpu_to_le32(res->sr_bits + tmp_used); | ||
1812 | le32_add_cpu(&cl->cl_recs[chain].c_free, -res->sr_bits); | ||
1813 | ocfs2_journal_dirty(handle, ac->ac_bh); | ||
1814 | |||
1815 | status = ocfs2_block_group_set_bits(handle, | 1845 | status = ocfs2_block_group_set_bits(handle, |
1816 | alloc_inode, | 1846 | alloc_inode, |
1817 | bg, | 1847 | bg, |
@@ -1826,6 +1856,7 @@ static int ocfs2_search_chain(struct ocfs2_alloc_context *ac, | |||
1826 | mlog(0, "Allocated %u bits from suballocator %llu\n", res->sr_bits, | 1856 | mlog(0, "Allocated %u bits from suballocator %llu\n", res->sr_bits, |
1827 | (unsigned long long)le64_to_cpu(fe->i_blkno)); | 1857 | (unsigned long long)le64_to_cpu(fe->i_blkno)); |
1828 | 1858 | ||
1859 | out_loc_only: | ||
1829 | *bits_left = le16_to_cpu(bg->bg_free_bits_count); | 1860 | *bits_left = le16_to_cpu(bg->bg_free_bits_count); |
1830 | bail: | 1861 | bail: |
1831 | brelse(group_bh); | 1862 | brelse(group_bh); |
@@ -1845,6 +1876,7 @@ static int ocfs2_claim_suballoc_bits(struct ocfs2_alloc_context *ac, | |||
1845 | int status; | 1876 | int status; |
1846 | u16 victim, i; | 1877 | u16 victim, i; |
1847 | u16 bits_left = 0; | 1878 | u16 bits_left = 0; |
1879 | u64 hint = ac->ac_last_group; | ||
1848 | struct ocfs2_chain_list *cl; | 1880 | struct ocfs2_chain_list *cl; |
1849 | struct ocfs2_dinode *fe; | 1881 | struct ocfs2_dinode *fe; |
1850 | 1882 | ||
@@ -1872,7 +1904,7 @@ static int ocfs2_claim_suballoc_bits(struct ocfs2_alloc_context *ac, | |||
1872 | goto bail; | 1904 | goto bail; |
1873 | } | 1905 | } |
1874 | 1906 | ||
1875 | res->sr_bg_blkno = ac->ac_last_group; | 1907 | res->sr_bg_blkno = hint; |
1876 | if (res->sr_bg_blkno) { | 1908 | if (res->sr_bg_blkno) { |
1877 | /* Attempt to short-circuit the usual search mechanism | 1909 | /* Attempt to short-circuit the usual search mechanism |
1878 | * by jumping straight to the most recently used | 1910 | * by jumping straight to the most recently used |
@@ -1896,8 +1928,10 @@ static int ocfs2_claim_suballoc_bits(struct ocfs2_alloc_context *ac, | |||
1896 | 1928 | ||
1897 | status = ocfs2_search_chain(ac, handle, bits_wanted, min_bits, | 1929 | status = ocfs2_search_chain(ac, handle, bits_wanted, min_bits, |
1898 | res, &bits_left); | 1930 | res, &bits_left); |
1899 | if (!status) | 1931 | if (!status) { |
1932 | hint = ocfs2_group_from_res(res); | ||
1900 | goto set_hint; | 1933 | goto set_hint; |
1934 | } | ||
1901 | if (status < 0 && status != -ENOSPC) { | 1935 | if (status < 0 && status != -ENOSPC) { |
1902 | mlog_errno(status); | 1936 | mlog_errno(status); |
1903 | goto bail; | 1937 | goto bail; |
@@ -1920,8 +1954,10 @@ static int ocfs2_claim_suballoc_bits(struct ocfs2_alloc_context *ac, | |||
1920 | ac->ac_chain = i; | 1954 | ac->ac_chain = i; |
1921 | status = ocfs2_search_chain(ac, handle, bits_wanted, min_bits, | 1955 | status = ocfs2_search_chain(ac, handle, bits_wanted, min_bits, |
1922 | res, &bits_left); | 1956 | res, &bits_left); |
1923 | if (!status) | 1957 | if (!status) { |
1958 | hint = ocfs2_group_from_res(res); | ||
1924 | break; | 1959 | break; |
1960 | } | ||
1925 | if (status < 0 && status != -ENOSPC) { | 1961 | if (status < 0 && status != -ENOSPC) { |
1926 | mlog_errno(status); | 1962 | mlog_errno(status); |
1927 | goto bail; | 1963 | goto bail; |
@@ -1936,7 +1972,7 @@ set_hint: | |||
1936 | if (bits_left < min_bits) | 1972 | if (bits_left < min_bits) |
1937 | ac->ac_last_group = 0; | 1973 | ac->ac_last_group = 0; |
1938 | else | 1974 | else |
1939 | ac->ac_last_group = res->sr_bg_blkno; | 1975 | ac->ac_last_group = hint; |
1940 | } | 1976 | } |
1941 | 1977 | ||
1942 | bail: | 1978 | bail: |
@@ -2016,6 +2052,136 @@ static inline void ocfs2_save_inode_ac_group(struct inode *dir, | |||
2016 | OCFS2_I(dir)->ip_last_used_slot = ac->ac_alloc_slot; | 2052 | OCFS2_I(dir)->ip_last_used_slot = ac->ac_alloc_slot; |
2017 | } | 2053 | } |
2018 | 2054 | ||
2055 | int ocfs2_find_new_inode_loc(struct inode *dir, | ||
2056 | struct buffer_head *parent_fe_bh, | ||
2057 | struct ocfs2_alloc_context *ac, | ||
2058 | u64 *fe_blkno) | ||
2059 | { | ||
2060 | int ret; | ||
2061 | handle_t *handle = NULL; | ||
2062 | struct ocfs2_suballoc_result *res; | ||
2063 | |||
2064 | BUG_ON(!ac); | ||
2065 | BUG_ON(ac->ac_bits_given != 0); | ||
2066 | BUG_ON(ac->ac_bits_wanted != 1); | ||
2067 | BUG_ON(ac->ac_which != OCFS2_AC_USE_INODE); | ||
2068 | |||
2069 | res = kzalloc(sizeof(*res), GFP_NOFS); | ||
2070 | if (res == NULL) { | ||
2071 | ret = -ENOMEM; | ||
2072 | mlog_errno(ret); | ||
2073 | goto out; | ||
2074 | } | ||
2075 | |||
2076 | ocfs2_init_inode_ac_group(dir, parent_fe_bh, ac); | ||
2077 | |||
2078 | /* | ||
2079 | * The handle started here is for chain relink. Alternatively, | ||
2080 | * we could just disable relink for these calls. | ||
2081 | */ | ||
2082 | handle = ocfs2_start_trans(OCFS2_SB(dir->i_sb), OCFS2_SUBALLOC_ALLOC); | ||
2083 | if (IS_ERR(handle)) { | ||
2084 | ret = PTR_ERR(handle); | ||
2085 | handle = NULL; | ||
2086 | mlog_errno(ret); | ||
2087 | goto out; | ||
2088 | } | ||
2089 | |||
2090 | /* | ||
2091 | * This will instruct ocfs2_claim_suballoc_bits and | ||
2092 | * ocfs2_search_one_group to search but save actual allocation | ||
2093 | * for later. | ||
2094 | */ | ||
2095 | ac->ac_find_loc_only = 1; | ||
2096 | |||
2097 | ret = ocfs2_claim_suballoc_bits(ac, handle, 1, 1, res); | ||
2098 | if (ret < 0) { | ||
2099 | mlog_errno(ret); | ||
2100 | goto out; | ||
2101 | } | ||
2102 | |||
2103 | ac->ac_find_loc_priv = res; | ||
2104 | *fe_blkno = res->sr_blkno; | ||
2105 | |||
2106 | out: | ||
2107 | if (handle) | ||
2108 | ocfs2_commit_trans(OCFS2_SB(dir->i_sb), handle); | ||
2109 | |||
2110 | if (ret) | ||
2111 | kfree(res); | ||
2112 | |||
2113 | return ret; | ||
2114 | } | ||
2115 | |||
2116 | int ocfs2_claim_new_inode_at_loc(handle_t *handle, | ||
2117 | struct inode *dir, | ||
2118 | struct ocfs2_alloc_context *ac, | ||
2119 | u64 *suballoc_loc, | ||
2120 | u16 *suballoc_bit, | ||
2121 | u64 di_blkno) | ||
2122 | { | ||
2123 | int ret; | ||
2124 | u16 chain; | ||
2125 | struct ocfs2_suballoc_result *res = ac->ac_find_loc_priv; | ||
2126 | struct buffer_head *bg_bh = NULL; | ||
2127 | struct ocfs2_group_desc *bg; | ||
2128 | struct ocfs2_dinode *di = (struct ocfs2_dinode *) ac->ac_bh->b_data; | ||
2129 | |||
2130 | /* | ||
2131 | * Since di_blkno is being passed back in, we check for any | ||
2132 | * inconsistencies which may have happened between | ||
2133 | * calls. These are code bugs as di_blkno is not expected to | ||
2134 | * change once returned from ocfs2_find_new_inode_loc() | ||
2135 | */ | ||
2136 | BUG_ON(res->sr_blkno != di_blkno); | ||
2137 | |||
2138 | ret = ocfs2_read_group_descriptor(ac->ac_inode, di, | ||
2139 | res->sr_bg_stable_blkno, &bg_bh); | ||
2140 | if (ret) { | ||
2141 | mlog_errno(ret); | ||
2142 | goto out; | ||
2143 | } | ||
2144 | |||
2145 | bg = (struct ocfs2_group_desc *) bg_bh->b_data; | ||
2146 | chain = le16_to_cpu(bg->bg_chain); | ||
2147 | |||
2148 | ret = ocfs2_alloc_dinode_update_counts(ac->ac_inode, handle, | ||
2149 | ac->ac_bh, res->sr_bits, | ||
2150 | chain); | ||
2151 | if (ret) { | ||
2152 | mlog_errno(ret); | ||
2153 | goto out; | ||
2154 | } | ||
2155 | |||
2156 | ret = ocfs2_block_group_set_bits(handle, | ||
2157 | ac->ac_inode, | ||
2158 | bg, | ||
2159 | bg_bh, | ||
2160 | res->sr_bit_offset, | ||
2161 | res->sr_bits); | ||
2162 | if (ret < 0) { | ||
2163 | mlog_errno(ret); | ||
2164 | goto out; | ||
2165 | } | ||
2166 | |||
2167 | mlog(0, "Allocated %u bits from suballocator %llu\n", res->sr_bits, | ||
2168 | (unsigned long long)di_blkno); | ||
2169 | |||
2170 | atomic_inc(&OCFS2_SB(ac->ac_inode->i_sb)->alloc_stats.bg_allocs); | ||
2171 | |||
2172 | BUG_ON(res->sr_bits != 1); | ||
2173 | |||
2174 | *suballoc_loc = res->sr_bg_blkno; | ||
2175 | *suballoc_bit = res->sr_bit_offset; | ||
2176 | ac->ac_bits_given++; | ||
2177 | ocfs2_save_inode_ac_group(dir, ac); | ||
2178 | |||
2179 | out: | ||
2180 | brelse(bg_bh); | ||
2181 | |||
2182 | return ret; | ||
2183 | } | ||
2184 | |||
2019 | int ocfs2_claim_new_inode(handle_t *handle, | 2185 | int ocfs2_claim_new_inode(handle_t *handle, |
2020 | struct inode *dir, | 2186 | struct inode *dir, |
2021 | struct buffer_head *parent_fe_bh, | 2187 | struct buffer_head *parent_fe_bh, |
@@ -2567,7 +2733,8 @@ out: | |||
2567 | * suballoc_bit. | 2733 | * suballoc_bit. |
2568 | */ | 2734 | */ |
2569 | static int ocfs2_get_suballoc_slot_bit(struct ocfs2_super *osb, u64 blkno, | 2735 | static int ocfs2_get_suballoc_slot_bit(struct ocfs2_super *osb, u64 blkno, |
2570 | u16 *suballoc_slot, u16 *suballoc_bit) | 2736 | u16 *suballoc_slot, u64 *group_blkno, |
2737 | u16 *suballoc_bit) | ||
2571 | { | 2738 | { |
2572 | int status; | 2739 | int status; |
2573 | struct buffer_head *inode_bh = NULL; | 2740 | struct buffer_head *inode_bh = NULL; |
@@ -2604,6 +2771,8 @@ static int ocfs2_get_suballoc_slot_bit(struct ocfs2_super *osb, u64 blkno, | |||
2604 | *suballoc_slot = le16_to_cpu(inode_fe->i_suballoc_slot); | 2771 | *suballoc_slot = le16_to_cpu(inode_fe->i_suballoc_slot); |
2605 | if (suballoc_bit) | 2772 | if (suballoc_bit) |
2606 | *suballoc_bit = le16_to_cpu(inode_fe->i_suballoc_bit); | 2773 | *suballoc_bit = le16_to_cpu(inode_fe->i_suballoc_bit); |
2774 | if (group_blkno) | ||
2775 | *group_blkno = le64_to_cpu(inode_fe->i_suballoc_loc); | ||
2607 | 2776 | ||
2608 | bail: | 2777 | bail: |
2609 | brelse(inode_bh); | 2778 | brelse(inode_bh); |
@@ -2621,7 +2790,8 @@ bail: | |||
2621 | */ | 2790 | */ |
2622 | static int ocfs2_test_suballoc_bit(struct ocfs2_super *osb, | 2791 | static int ocfs2_test_suballoc_bit(struct ocfs2_super *osb, |
2623 | struct inode *suballoc, | 2792 | struct inode *suballoc, |
2624 | struct buffer_head *alloc_bh, u64 blkno, | 2793 | struct buffer_head *alloc_bh, |
2794 | u64 group_blkno, u64 blkno, | ||
2625 | u16 bit, int *res) | 2795 | u16 bit, int *res) |
2626 | { | 2796 | { |
2627 | struct ocfs2_dinode *alloc_di; | 2797 | struct ocfs2_dinode *alloc_di; |
@@ -2642,10 +2812,8 @@ static int ocfs2_test_suballoc_bit(struct ocfs2_super *osb, | |||
2642 | goto bail; | 2812 | goto bail; |
2643 | } | 2813 | } |
2644 | 2814 | ||
2645 | if (alloc_di->i_suballoc_loc) | 2815 | bg_blkno = group_blkno ? group_blkno : |
2646 | bg_blkno = le64_to_cpu(alloc_di->i_suballoc_loc); | 2816 | ocfs2_which_suballoc_group(blkno, bit); |
2647 | else | ||
2648 | bg_blkno = ocfs2_which_suballoc_group(blkno, bit); | ||
2649 | status = ocfs2_read_group_descriptor(suballoc, alloc_di, bg_blkno, | 2817 | status = ocfs2_read_group_descriptor(suballoc, alloc_di, bg_blkno, |
2650 | &group_bh); | 2818 | &group_bh); |
2651 | if (status < 0) { | 2819 | if (status < 0) { |
@@ -2680,6 +2848,7 @@ bail: | |||
2680 | int ocfs2_test_inode_bit(struct ocfs2_super *osb, u64 blkno, int *res) | 2848 | int ocfs2_test_inode_bit(struct ocfs2_super *osb, u64 blkno, int *res) |
2681 | { | 2849 | { |
2682 | int status; | 2850 | int status; |
2851 | u64 group_blkno = 0; | ||
2683 | u16 suballoc_bit = 0, suballoc_slot = 0; | 2852 | u16 suballoc_bit = 0, suballoc_slot = 0; |
2684 | struct inode *inode_alloc_inode; | 2853 | struct inode *inode_alloc_inode; |
2685 | struct buffer_head *alloc_bh = NULL; | 2854 | struct buffer_head *alloc_bh = NULL; |
@@ -2687,7 +2856,7 @@ int ocfs2_test_inode_bit(struct ocfs2_super *osb, u64 blkno, int *res) | |||
2687 | mlog_entry("blkno: %llu", (unsigned long long)blkno); | 2856 | mlog_entry("blkno: %llu", (unsigned long long)blkno); |
2688 | 2857 | ||
2689 | status = ocfs2_get_suballoc_slot_bit(osb, blkno, &suballoc_slot, | 2858 | status = ocfs2_get_suballoc_slot_bit(osb, blkno, &suballoc_slot, |
2690 | &suballoc_bit); | 2859 | &group_blkno, &suballoc_bit); |
2691 | if (status < 0) { | 2860 | if (status < 0) { |
2692 | mlog(ML_ERROR, "get alloc slot and bit failed %d\n", status); | 2861 | mlog(ML_ERROR, "get alloc slot and bit failed %d\n", status); |
2693 | goto bail; | 2862 | goto bail; |
@@ -2715,7 +2884,7 @@ int ocfs2_test_inode_bit(struct ocfs2_super *osb, u64 blkno, int *res) | |||
2715 | } | 2884 | } |
2716 | 2885 | ||
2717 | status = ocfs2_test_suballoc_bit(osb, inode_alloc_inode, alloc_bh, | 2886 | status = ocfs2_test_suballoc_bit(osb, inode_alloc_inode, alloc_bh, |
2718 | blkno, suballoc_bit, res); | 2887 | group_blkno, blkno, suballoc_bit, res); |
2719 | if (status < 0) | 2888 | if (status < 0) |
2720 | mlog(ML_ERROR, "test suballoc bit failed %d\n", status); | 2889 | mlog(ML_ERROR, "test suballoc bit failed %d\n", status); |
2721 | 2890 | ||
diff --git a/fs/ocfs2/suballoc.h b/fs/ocfs2/suballoc.h index a017dd3ee7d9..b8afabfeede4 100644 --- a/fs/ocfs2/suballoc.h +++ b/fs/ocfs2/suballoc.h | |||
@@ -56,6 +56,9 @@ struct ocfs2_alloc_context { | |||
56 | u64 ac_max_block; /* Highest block number to allocate. 0 is | 56 | u64 ac_max_block; /* Highest block number to allocate. 0 is |
57 | is the same as ~0 - unlimited */ | 57 | is the same as ~0 - unlimited */ |
58 | 58 | ||
59 | int ac_find_loc_only; /* hack for reflink operation ordering */ | ||
60 | struct ocfs2_suballoc_result *ac_find_loc_priv; /* */ | ||
61 | |||
59 | struct ocfs2_alloc_reservation *ac_resv; | 62 | struct ocfs2_alloc_reservation *ac_resv; |
60 | }; | 63 | }; |
61 | 64 | ||
@@ -197,4 +200,22 @@ int ocfs2_lock_allocators(struct inode *inode, struct ocfs2_extent_tree *et, | |||
197 | struct ocfs2_alloc_context **meta_ac); | 200 | struct ocfs2_alloc_context **meta_ac); |
198 | 201 | ||
199 | int ocfs2_test_inode_bit(struct ocfs2_super *osb, u64 blkno, int *res); | 202 | int ocfs2_test_inode_bit(struct ocfs2_super *osb, u64 blkno, int *res); |
203 | |||
204 | |||
205 | |||
206 | /* | ||
207 | * The following two interfaces are for ocfs2_create_inode_in_orphan(). | ||
208 | */ | ||
209 | int ocfs2_find_new_inode_loc(struct inode *dir, | ||
210 | struct buffer_head *parent_fe_bh, | ||
211 | struct ocfs2_alloc_context *ac, | ||
212 | u64 *fe_blkno); | ||
213 | |||
214 | int ocfs2_claim_new_inode_at_loc(handle_t *handle, | ||
215 | struct inode *dir, | ||
216 | struct ocfs2_alloc_context *ac, | ||
217 | u64 *suballoc_loc, | ||
218 | u16 *suballoc_bit, | ||
219 | u64 di_blkno); | ||
220 | |||
200 | #endif /* _CHAINALLOC_H_ */ | 221 | #endif /* _CHAINALLOC_H_ */ |
@@ -675,7 +675,7 @@ static struct file *__dentry_open(struct dentry *dentry, struct vfsmount *mnt, | |||
675 | f->f_path.mnt = mnt; | 675 | f->f_path.mnt = mnt; |
676 | f->f_pos = 0; | 676 | f->f_pos = 0; |
677 | f->f_op = fops_get(inode->i_fop); | 677 | f->f_op = fops_get(inode->i_fop); |
678 | file_move(f, &inode->i_sb->s_files); | 678 | file_sb_list_add(f, inode->i_sb); |
679 | 679 | ||
680 | error = security_dentry_open(f, cred); | 680 | error = security_dentry_open(f, cred); |
681 | if (error) | 681 | if (error) |
@@ -721,7 +721,7 @@ cleanup_all: | |||
721 | mnt_drop_write(mnt); | 721 | mnt_drop_write(mnt); |
722 | } | 722 | } |
723 | } | 723 | } |
724 | file_kill(f); | 724 | file_sb_list_del(f); |
725 | f->f_path.dentry = NULL; | 725 | f->f_path.dentry = NULL; |
726 | f->f_path.mnt = NULL; | 726 | f->f_path.mnt = NULL; |
727 | cleanup_file: | 727 | cleanup_file: |
diff --git a/fs/partitions/ibm.c b/fs/partitions/ibm.c index d1b8a5c4bc0a..d513a07f44bb 100644 --- a/fs/partitions/ibm.c +++ b/fs/partitions/ibm.c | |||
@@ -182,7 +182,7 @@ int ibm_partition(struct parsed_partitions *state) | |||
182 | offset = (info->label_block + 1); | 182 | offset = (info->label_block + 1); |
183 | } else { | 183 | } else { |
184 | /* unlabeled disk */ | 184 | /* unlabeled disk */ |
185 | strlcat(tmp, sizeof(tmp), "(nonl)", PAGE_SIZE); | 185 | strlcat(state->pp_buf, "(nonl)", PAGE_SIZE); |
186 | size = i_size >> 9; | 186 | size = i_size >> 9; |
187 | offset = (info->label_block + 1); | 187 | offset = (info->label_block + 1); |
188 | } | 188 | } |
diff --git a/fs/pnode.c b/fs/pnode.c index 5cc564a83149..8066b8dd748f 100644 --- a/fs/pnode.c +++ b/fs/pnode.c | |||
@@ -126,6 +126,9 @@ static int do_make_slave(struct vfsmount *mnt) | |||
126 | return 0; | 126 | return 0; |
127 | } | 127 | } |
128 | 128 | ||
129 | /* | ||
130 | * vfsmount lock must be held for write | ||
131 | */ | ||
129 | void change_mnt_propagation(struct vfsmount *mnt, int type) | 132 | void change_mnt_propagation(struct vfsmount *mnt, int type) |
130 | { | 133 | { |
131 | if (type == MS_SHARED) { | 134 | if (type == MS_SHARED) { |
@@ -270,12 +273,12 @@ int propagate_mnt(struct vfsmount *dest_mnt, struct dentry *dest_dentry, | |||
270 | prev_src_mnt = child; | 273 | prev_src_mnt = child; |
271 | } | 274 | } |
272 | out: | 275 | out: |
273 | spin_lock(&vfsmount_lock); | 276 | br_write_lock(vfsmount_lock); |
274 | while (!list_empty(&tmp_list)) { | 277 | while (!list_empty(&tmp_list)) { |
275 | child = list_first_entry(&tmp_list, struct vfsmount, mnt_hash); | 278 | child = list_first_entry(&tmp_list, struct vfsmount, mnt_hash); |
276 | umount_tree(child, 0, &umount_list); | 279 | umount_tree(child, 0, &umount_list); |
277 | } | 280 | } |
278 | spin_unlock(&vfsmount_lock); | 281 | br_write_unlock(vfsmount_lock); |
279 | release_mounts(&umount_list); | 282 | release_mounts(&umount_list); |
280 | return ret; | 283 | return ret; |
281 | } | 284 | } |
@@ -296,6 +299,8 @@ static inline int do_refcount_check(struct vfsmount *mnt, int count) | |||
296 | * other mounts its parent propagates to. | 299 | * other mounts its parent propagates to. |
297 | * Check if any of these mounts that **do not have submounts** | 300 | * Check if any of these mounts that **do not have submounts** |
298 | * have more references than 'refcnt'. If so return busy. | 301 | * have more references than 'refcnt'. If so return busy. |
302 | * | ||
303 | * vfsmount lock must be held for read or write | ||
299 | */ | 304 | */ |
300 | int propagate_mount_busy(struct vfsmount *mnt, int refcnt) | 305 | int propagate_mount_busy(struct vfsmount *mnt, int refcnt) |
301 | { | 306 | { |
@@ -353,6 +358,8 @@ static void __propagate_umount(struct vfsmount *mnt) | |||
353 | * collect all mounts that receive propagation from the mount in @list, | 358 | * collect all mounts that receive propagation from the mount in @list, |
354 | * and return these additional mounts in the same list. | 359 | * and return these additional mounts in the same list. |
355 | * @list: the list of mounts to be unmounted. | 360 | * @list: the list of mounts to be unmounted. |
361 | * | ||
362 | * vfsmount lock must be held for write | ||
356 | */ | 363 | */ |
357 | int propagate_umount(struct list_head *list) | 364 | int propagate_umount(struct list_head *list) |
358 | { | 365 | { |
diff --git a/fs/proc/inode.c b/fs/proc/inode.c index 23561cda7245..9c2b5f484879 100644 --- a/fs/proc/inode.c +++ b/fs/proc/inode.c | |||
@@ -214,8 +214,7 @@ static long proc_reg_unlocked_ioctl(struct file *file, unsigned int cmd, unsigne | |||
214 | { | 214 | { |
215 | struct proc_dir_entry *pde = PDE(file->f_path.dentry->d_inode); | 215 | struct proc_dir_entry *pde = PDE(file->f_path.dentry->d_inode); |
216 | long rv = -ENOTTY; | 216 | long rv = -ENOTTY; |
217 | long (*unlocked_ioctl)(struct file *, unsigned int, unsigned long); | 217 | long (*ioctl)(struct file *, unsigned int, unsigned long); |
218 | int (*ioctl)(struct inode *, struct file *, unsigned int, unsigned long); | ||
219 | 218 | ||
220 | spin_lock(&pde->pde_unload_lock); | 219 | spin_lock(&pde->pde_unload_lock); |
221 | if (!pde->proc_fops) { | 220 | if (!pde->proc_fops) { |
@@ -223,19 +222,11 @@ static long proc_reg_unlocked_ioctl(struct file *file, unsigned int cmd, unsigne | |||
223 | return rv; | 222 | return rv; |
224 | } | 223 | } |
225 | pde->pde_users++; | 224 | pde->pde_users++; |
226 | unlocked_ioctl = pde->proc_fops->unlocked_ioctl; | 225 | ioctl = pde->proc_fops->unlocked_ioctl; |
227 | ioctl = pde->proc_fops->ioctl; | ||
228 | spin_unlock(&pde->pde_unload_lock); | 226 | spin_unlock(&pde->pde_unload_lock); |
229 | 227 | ||
230 | if (unlocked_ioctl) { | 228 | if (ioctl) |
231 | rv = unlocked_ioctl(file, cmd, arg); | 229 | rv = ioctl(file, cmd, arg); |
232 | if (rv == -ENOIOCTLCMD) | ||
233 | rv = -EINVAL; | ||
234 | } else if (ioctl) { | ||
235 | WARN_ONCE(1, "Procfs ioctl handlers must use unlocked_ioctl, " | ||
236 | "%pf will be called without the Bkl held\n", ioctl); | ||
237 | rv = ioctl(file->f_path.dentry->d_inode, file, cmd, arg); | ||
238 | } | ||
239 | 230 | ||
240 | pde_users_dec(pde); | 231 | pde_users_dec(pde); |
241 | return rv; | 232 | return rv; |
diff --git a/fs/proc/task_mmu.c b/fs/proc/task_mmu.c index aea1d3f1ffb5..271afc48b9a5 100644 --- a/fs/proc/task_mmu.c +++ b/fs/proc/task_mmu.c | |||
@@ -210,6 +210,7 @@ static void show_map_vma(struct seq_file *m, struct vm_area_struct *vma) | |||
210 | int flags = vma->vm_flags; | 210 | int flags = vma->vm_flags; |
211 | unsigned long ino = 0; | 211 | unsigned long ino = 0; |
212 | unsigned long long pgoff = 0; | 212 | unsigned long long pgoff = 0; |
213 | unsigned long start; | ||
213 | dev_t dev = 0; | 214 | dev_t dev = 0; |
214 | int len; | 215 | int len; |
215 | 216 | ||
@@ -220,8 +221,14 @@ static void show_map_vma(struct seq_file *m, struct vm_area_struct *vma) | |||
220 | pgoff = ((loff_t)vma->vm_pgoff) << PAGE_SHIFT; | 221 | pgoff = ((loff_t)vma->vm_pgoff) << PAGE_SHIFT; |
221 | } | 222 | } |
222 | 223 | ||
224 | /* We don't show the stack guard page in /proc/maps */ | ||
225 | start = vma->vm_start; | ||
226 | if (vma->vm_flags & VM_GROWSDOWN) | ||
227 | if (!vma_stack_continue(vma->vm_prev, vma->vm_start)) | ||
228 | start += PAGE_SIZE; | ||
229 | |||
223 | seq_printf(m, "%08lx-%08lx %c%c%c%c %08llx %02x:%02x %lu %n", | 230 | seq_printf(m, "%08lx-%08lx %c%c%c%c %08llx %02x:%02x %lu %n", |
224 | vma->vm_start, | 231 | start, |
225 | vma->vm_end, | 232 | vma->vm_end, |
226 | flags & VM_READ ? 'r' : '-', | 233 | flags & VM_READ ? 'r' : '-', |
227 | flags & VM_WRITE ? 'w' : '-', | 234 | flags & VM_WRITE ? 'w' : '-', |
diff --git a/fs/reiserfs/inode.c b/fs/reiserfs/inode.c index ae35413dcbe1..caa758377d66 100644 --- a/fs/reiserfs/inode.c +++ b/fs/reiserfs/inode.c | |||
@@ -83,6 +83,7 @@ void reiserfs_evict_inode(struct inode *inode) | |||
83 | dquot_drop(inode); | 83 | dquot_drop(inode); |
84 | inode->i_blocks = 0; | 84 | inode->i_blocks = 0; |
85 | reiserfs_write_unlock_once(inode->i_sb, depth); | 85 | reiserfs_write_unlock_once(inode->i_sb, depth); |
86 | return; | ||
86 | 87 | ||
87 | no_delete: | 88 | no_delete: |
88 | end_writeback(inode); | 89 | end_writeback(inode); |
diff --git a/fs/reiserfs/journal.c b/fs/reiserfs/journal.c index 1ec952b1f036..812e2c05aa29 100644 --- a/fs/reiserfs/journal.c +++ b/fs/reiserfs/journal.c | |||
@@ -2311,7 +2311,7 @@ static int journal_read_transaction(struct super_block *sb, | |||
2311 | /* flush out the real blocks */ | 2311 | /* flush out the real blocks */ |
2312 | for (i = 0; i < get_desc_trans_len(desc); i++) { | 2312 | for (i = 0; i < get_desc_trans_len(desc); i++) { |
2313 | set_buffer_dirty(real_blocks[i]); | 2313 | set_buffer_dirty(real_blocks[i]); |
2314 | ll_rw_block(SWRITE, 1, real_blocks + i); | 2314 | write_dirty_buffer(real_blocks[i], WRITE); |
2315 | } | 2315 | } |
2316 | for (i = 0; i < get_desc_trans_len(desc); i++) { | 2316 | for (i = 0; i < get_desc_trans_len(desc); i++) { |
2317 | wait_on_buffer(real_blocks[i]); | 2317 | wait_on_buffer(real_blocks[i]); |
@@ -68,7 +68,8 @@ int vfs_fstat(unsigned int fd, struct kstat *stat) | |||
68 | } | 68 | } |
69 | EXPORT_SYMBOL(vfs_fstat); | 69 | EXPORT_SYMBOL(vfs_fstat); |
70 | 70 | ||
71 | int vfs_fstatat(int dfd, char __user *filename, struct kstat *stat, int flag) | 71 | int vfs_fstatat(int dfd, const char __user *filename, struct kstat *stat, |
72 | int flag) | ||
72 | { | 73 | { |
73 | struct path path; | 74 | struct path path; |
74 | int error = -EINVAL; | 75 | int error = -EINVAL; |
@@ -91,13 +92,13 @@ out: | |||
91 | } | 92 | } |
92 | EXPORT_SYMBOL(vfs_fstatat); | 93 | EXPORT_SYMBOL(vfs_fstatat); |
93 | 94 | ||
94 | int vfs_stat(char __user *name, struct kstat *stat) | 95 | int vfs_stat(const char __user *name, struct kstat *stat) |
95 | { | 96 | { |
96 | return vfs_fstatat(AT_FDCWD, name, stat, 0); | 97 | return vfs_fstatat(AT_FDCWD, name, stat, 0); |
97 | } | 98 | } |
98 | EXPORT_SYMBOL(vfs_stat); | 99 | EXPORT_SYMBOL(vfs_stat); |
99 | 100 | ||
100 | int vfs_lstat(char __user *name, struct kstat *stat) | 101 | int vfs_lstat(const char __user *name, struct kstat *stat) |
101 | { | 102 | { |
102 | return vfs_fstatat(AT_FDCWD, name, stat, AT_SYMLINK_NOFOLLOW); | 103 | return vfs_fstatat(AT_FDCWD, name, stat, AT_SYMLINK_NOFOLLOW); |
103 | } | 104 | } |
@@ -147,7 +148,8 @@ static int cp_old_stat(struct kstat *stat, struct __old_kernel_stat __user * sta | |||
147 | return copy_to_user(statbuf,&tmp,sizeof(tmp)) ? -EFAULT : 0; | 148 | return copy_to_user(statbuf,&tmp,sizeof(tmp)) ? -EFAULT : 0; |
148 | } | 149 | } |
149 | 150 | ||
150 | SYSCALL_DEFINE2(stat, char __user *, filename, struct __old_kernel_stat __user *, statbuf) | 151 | SYSCALL_DEFINE2(stat, const char __user *, filename, |
152 | struct __old_kernel_stat __user *, statbuf) | ||
151 | { | 153 | { |
152 | struct kstat stat; | 154 | struct kstat stat; |
153 | int error; | 155 | int error; |
@@ -159,7 +161,8 @@ SYSCALL_DEFINE2(stat, char __user *, filename, struct __old_kernel_stat __user * | |||
159 | return cp_old_stat(&stat, statbuf); | 161 | return cp_old_stat(&stat, statbuf); |
160 | } | 162 | } |
161 | 163 | ||
162 | SYSCALL_DEFINE2(lstat, char __user *, filename, struct __old_kernel_stat __user *, statbuf) | 164 | SYSCALL_DEFINE2(lstat, const char __user *, filename, |
165 | struct __old_kernel_stat __user *, statbuf) | ||
163 | { | 166 | { |
164 | struct kstat stat; | 167 | struct kstat stat; |
165 | int error; | 168 | int error; |
@@ -234,7 +237,8 @@ static int cp_new_stat(struct kstat *stat, struct stat __user *statbuf) | |||
234 | return copy_to_user(statbuf,&tmp,sizeof(tmp)) ? -EFAULT : 0; | 237 | return copy_to_user(statbuf,&tmp,sizeof(tmp)) ? -EFAULT : 0; |
235 | } | 238 | } |
236 | 239 | ||
237 | SYSCALL_DEFINE2(newstat, char __user *, filename, struct stat __user *, statbuf) | 240 | SYSCALL_DEFINE2(newstat, const char __user *, filename, |
241 | struct stat __user *, statbuf) | ||
238 | { | 242 | { |
239 | struct kstat stat; | 243 | struct kstat stat; |
240 | int error = vfs_stat(filename, &stat); | 244 | int error = vfs_stat(filename, &stat); |
@@ -244,7 +248,8 @@ SYSCALL_DEFINE2(newstat, char __user *, filename, struct stat __user *, statbuf) | |||
244 | return cp_new_stat(&stat, statbuf); | 248 | return cp_new_stat(&stat, statbuf); |
245 | } | 249 | } |
246 | 250 | ||
247 | SYSCALL_DEFINE2(newlstat, char __user *, filename, struct stat __user *, statbuf) | 251 | SYSCALL_DEFINE2(newlstat, const char __user *, filename, |
252 | struct stat __user *, statbuf) | ||
248 | { | 253 | { |
249 | struct kstat stat; | 254 | struct kstat stat; |
250 | int error; | 255 | int error; |
@@ -257,7 +262,7 @@ SYSCALL_DEFINE2(newlstat, char __user *, filename, struct stat __user *, statbuf | |||
257 | } | 262 | } |
258 | 263 | ||
259 | #if !defined(__ARCH_WANT_STAT64) || defined(__ARCH_WANT_SYS_NEWFSTATAT) | 264 | #if !defined(__ARCH_WANT_STAT64) || defined(__ARCH_WANT_SYS_NEWFSTATAT) |
260 | SYSCALL_DEFINE4(newfstatat, int, dfd, char __user *, filename, | 265 | SYSCALL_DEFINE4(newfstatat, int, dfd, const char __user *, filename, |
261 | struct stat __user *, statbuf, int, flag) | 266 | struct stat __user *, statbuf, int, flag) |
262 | { | 267 | { |
263 | struct kstat stat; | 268 | struct kstat stat; |
@@ -355,7 +360,8 @@ static long cp_new_stat64(struct kstat *stat, struct stat64 __user *statbuf) | |||
355 | return copy_to_user(statbuf,&tmp,sizeof(tmp)) ? -EFAULT : 0; | 360 | return copy_to_user(statbuf,&tmp,sizeof(tmp)) ? -EFAULT : 0; |
356 | } | 361 | } |
357 | 362 | ||
358 | SYSCALL_DEFINE2(stat64, char __user *, filename, struct stat64 __user *, statbuf) | 363 | SYSCALL_DEFINE2(stat64, const char __user *, filename, |
364 | struct stat64 __user *, statbuf) | ||
359 | { | 365 | { |
360 | struct kstat stat; | 366 | struct kstat stat; |
361 | int error = vfs_stat(filename, &stat); | 367 | int error = vfs_stat(filename, &stat); |
@@ -366,7 +372,8 @@ SYSCALL_DEFINE2(stat64, char __user *, filename, struct stat64 __user *, statbuf | |||
366 | return error; | 372 | return error; |
367 | } | 373 | } |
368 | 374 | ||
369 | SYSCALL_DEFINE2(lstat64, char __user *, filename, struct stat64 __user *, statbuf) | 375 | SYSCALL_DEFINE2(lstat64, const char __user *, filename, |
376 | struct stat64 __user *, statbuf) | ||
370 | { | 377 | { |
371 | struct kstat stat; | 378 | struct kstat stat; |
372 | int error = vfs_lstat(filename, &stat); | 379 | int error = vfs_lstat(filename, &stat); |
@@ -388,7 +395,7 @@ SYSCALL_DEFINE2(fstat64, unsigned long, fd, struct stat64 __user *, statbuf) | |||
388 | return error; | 395 | return error; |
389 | } | 396 | } |
390 | 397 | ||
391 | SYSCALL_DEFINE4(fstatat64, int, dfd, char __user *, filename, | 398 | SYSCALL_DEFINE4(fstatat64, int, dfd, const char __user *, filename, |
392 | struct stat64 __user *, statbuf, int, flag) | 399 | struct stat64 __user *, statbuf, int, flag) |
393 | { | 400 | { |
394 | struct kstat stat; | 401 | struct kstat stat; |
diff --git a/fs/super.c b/fs/super.c index 9674ab2c8718..8819e3a7ff20 100644 --- a/fs/super.c +++ b/fs/super.c | |||
@@ -54,7 +54,22 @@ static struct super_block *alloc_super(struct file_system_type *type) | |||
54 | s = NULL; | 54 | s = NULL; |
55 | goto out; | 55 | goto out; |
56 | } | 56 | } |
57 | #ifdef CONFIG_SMP | ||
58 | s->s_files = alloc_percpu(struct list_head); | ||
59 | if (!s->s_files) { | ||
60 | security_sb_free(s); | ||
61 | kfree(s); | ||
62 | s = NULL; | ||
63 | goto out; | ||
64 | } else { | ||
65 | int i; | ||
66 | |||
67 | for_each_possible_cpu(i) | ||
68 | INIT_LIST_HEAD(per_cpu_ptr(s->s_files, i)); | ||
69 | } | ||
70 | #else | ||
57 | INIT_LIST_HEAD(&s->s_files); | 71 | INIT_LIST_HEAD(&s->s_files); |
72 | #endif | ||
58 | INIT_LIST_HEAD(&s->s_instances); | 73 | INIT_LIST_HEAD(&s->s_instances); |
59 | INIT_HLIST_HEAD(&s->s_anon); | 74 | INIT_HLIST_HEAD(&s->s_anon); |
60 | INIT_LIST_HEAD(&s->s_inodes); | 75 | INIT_LIST_HEAD(&s->s_inodes); |
@@ -108,6 +123,9 @@ out: | |||
108 | */ | 123 | */ |
109 | static inline void destroy_super(struct super_block *s) | 124 | static inline void destroy_super(struct super_block *s) |
110 | { | 125 | { |
126 | #ifdef CONFIG_SMP | ||
127 | free_percpu(s->s_files); | ||
128 | #endif | ||
111 | security_sb_free(s); | 129 | security_sb_free(s); |
112 | kfree(s->s_subtype); | 130 | kfree(s->s_subtype); |
113 | kfree(s->s_options); | 131 | kfree(s->s_options); |
diff --git a/fs/sysfs/file.c b/fs/sysfs/file.c index 1b27b5688f62..da3fefe91a8f 100644 --- a/fs/sysfs/file.c +++ b/fs/sysfs/file.c | |||
@@ -340,7 +340,7 @@ static int sysfs_open_file(struct inode *inode, struct file *file) | |||
340 | char *p; | 340 | char *p; |
341 | 341 | ||
342 | p = d_path(&file->f_path, last_sysfs_file, sizeof(last_sysfs_file)); | 342 | p = d_path(&file->f_path, last_sysfs_file, sizeof(last_sysfs_file)); |
343 | if (p) | 343 | if (!IS_ERR(p)) |
344 | memmove(last_sysfs_file, p, strlen(p) + 1); | 344 | memmove(last_sysfs_file, p, strlen(p) + 1); |
345 | 345 | ||
346 | /* need attr_sd for attr and ops, its parent for kobj */ | 346 | /* need attr_sd for attr and ops, its parent for kobj */ |
diff --git a/fs/ufs/balloc.c b/fs/ufs/balloc.c index 048484fb10d2..46f7a807bbc1 100644 --- a/fs/ufs/balloc.c +++ b/fs/ufs/balloc.c | |||
@@ -114,10 +114,8 @@ void ufs_free_fragments(struct inode *inode, u64 fragment, unsigned count) | |||
114 | 114 | ||
115 | ubh_mark_buffer_dirty (USPI_UBH(uspi)); | 115 | ubh_mark_buffer_dirty (USPI_UBH(uspi)); |
116 | ubh_mark_buffer_dirty (UCPI_UBH(ucpi)); | 116 | ubh_mark_buffer_dirty (UCPI_UBH(ucpi)); |
117 | if (sb->s_flags & MS_SYNCHRONOUS) { | 117 | if (sb->s_flags & MS_SYNCHRONOUS) |
118 | ubh_ll_rw_block(SWRITE, UCPI_UBH(ucpi)); | 118 | ubh_sync_block(UCPI_UBH(ucpi)); |
119 | ubh_wait_on_buffer (UCPI_UBH(ucpi)); | ||
120 | } | ||
121 | sb->s_dirt = 1; | 119 | sb->s_dirt = 1; |
122 | 120 | ||
123 | unlock_super (sb); | 121 | unlock_super (sb); |
@@ -207,10 +205,8 @@ do_more: | |||
207 | 205 | ||
208 | ubh_mark_buffer_dirty (USPI_UBH(uspi)); | 206 | ubh_mark_buffer_dirty (USPI_UBH(uspi)); |
209 | ubh_mark_buffer_dirty (UCPI_UBH(ucpi)); | 207 | ubh_mark_buffer_dirty (UCPI_UBH(ucpi)); |
210 | if (sb->s_flags & MS_SYNCHRONOUS) { | 208 | if (sb->s_flags & MS_SYNCHRONOUS) |
211 | ubh_ll_rw_block(SWRITE, UCPI_UBH(ucpi)); | 209 | ubh_sync_block(UCPI_UBH(ucpi)); |
212 | ubh_wait_on_buffer (UCPI_UBH(ucpi)); | ||
213 | } | ||
214 | 210 | ||
215 | if (overflow) { | 211 | if (overflow) { |
216 | fragment += count; | 212 | fragment += count; |
@@ -558,10 +554,8 @@ static u64 ufs_add_fragments(struct inode *inode, u64 fragment, | |||
558 | 554 | ||
559 | ubh_mark_buffer_dirty (USPI_UBH(uspi)); | 555 | ubh_mark_buffer_dirty (USPI_UBH(uspi)); |
560 | ubh_mark_buffer_dirty (UCPI_UBH(ucpi)); | 556 | ubh_mark_buffer_dirty (UCPI_UBH(ucpi)); |
561 | if (sb->s_flags & MS_SYNCHRONOUS) { | 557 | if (sb->s_flags & MS_SYNCHRONOUS) |
562 | ubh_ll_rw_block(SWRITE, UCPI_UBH(ucpi)); | 558 | ubh_sync_block(UCPI_UBH(ucpi)); |
563 | ubh_wait_on_buffer (UCPI_UBH(ucpi)); | ||
564 | } | ||
565 | sb->s_dirt = 1; | 559 | sb->s_dirt = 1; |
566 | 560 | ||
567 | UFSD("EXIT, fragment %llu\n", (unsigned long long)fragment); | 561 | UFSD("EXIT, fragment %llu\n", (unsigned long long)fragment); |
@@ -680,10 +674,8 @@ cg_found: | |||
680 | succed: | 674 | succed: |
681 | ubh_mark_buffer_dirty (USPI_UBH(uspi)); | 675 | ubh_mark_buffer_dirty (USPI_UBH(uspi)); |
682 | ubh_mark_buffer_dirty (UCPI_UBH(ucpi)); | 676 | ubh_mark_buffer_dirty (UCPI_UBH(ucpi)); |
683 | if (sb->s_flags & MS_SYNCHRONOUS) { | 677 | if (sb->s_flags & MS_SYNCHRONOUS) |
684 | ubh_ll_rw_block(SWRITE, UCPI_UBH(ucpi)); | 678 | ubh_sync_block(UCPI_UBH(ucpi)); |
685 | ubh_wait_on_buffer (UCPI_UBH(ucpi)); | ||
686 | } | ||
687 | sb->s_dirt = 1; | 679 | sb->s_dirt = 1; |
688 | 680 | ||
689 | result += cgno * uspi->s_fpg; | 681 | result += cgno * uspi->s_fpg; |
diff --git a/fs/ufs/ialloc.c b/fs/ufs/ialloc.c index 428017e018fe..2eabf04af3de 100644 --- a/fs/ufs/ialloc.c +++ b/fs/ufs/ialloc.c | |||
@@ -113,10 +113,8 @@ void ufs_free_inode (struct inode * inode) | |||
113 | 113 | ||
114 | ubh_mark_buffer_dirty (USPI_UBH(uspi)); | 114 | ubh_mark_buffer_dirty (USPI_UBH(uspi)); |
115 | ubh_mark_buffer_dirty (UCPI_UBH(ucpi)); | 115 | ubh_mark_buffer_dirty (UCPI_UBH(ucpi)); |
116 | if (sb->s_flags & MS_SYNCHRONOUS) { | 116 | if (sb->s_flags & MS_SYNCHRONOUS) |
117 | ubh_ll_rw_block(SWRITE, UCPI_UBH(ucpi)); | 117 | ubh_sync_block(UCPI_UBH(ucpi)); |
118 | ubh_wait_on_buffer (UCPI_UBH(ucpi)); | ||
119 | } | ||
120 | 118 | ||
121 | sb->s_dirt = 1; | 119 | sb->s_dirt = 1; |
122 | unlock_super (sb); | 120 | unlock_super (sb); |
@@ -156,10 +154,8 @@ static void ufs2_init_inodes_chunk(struct super_block *sb, | |||
156 | 154 | ||
157 | fs32_add(sb, &ucg->cg_u.cg_u2.cg_initediblk, uspi->s_inopb); | 155 | fs32_add(sb, &ucg->cg_u.cg_u2.cg_initediblk, uspi->s_inopb); |
158 | ubh_mark_buffer_dirty(UCPI_UBH(ucpi)); | 156 | ubh_mark_buffer_dirty(UCPI_UBH(ucpi)); |
159 | if (sb->s_flags & MS_SYNCHRONOUS) { | 157 | if (sb->s_flags & MS_SYNCHRONOUS) |
160 | ubh_ll_rw_block(SWRITE, UCPI_UBH(ucpi)); | 158 | ubh_sync_block(UCPI_UBH(ucpi)); |
161 | ubh_wait_on_buffer(UCPI_UBH(ucpi)); | ||
162 | } | ||
163 | 159 | ||
164 | UFSD("EXIT\n"); | 160 | UFSD("EXIT\n"); |
165 | } | 161 | } |
@@ -290,10 +286,8 @@ cg_found: | |||
290 | } | 286 | } |
291 | ubh_mark_buffer_dirty (USPI_UBH(uspi)); | 287 | ubh_mark_buffer_dirty (USPI_UBH(uspi)); |
292 | ubh_mark_buffer_dirty (UCPI_UBH(ucpi)); | 288 | ubh_mark_buffer_dirty (UCPI_UBH(ucpi)); |
293 | if (sb->s_flags & MS_SYNCHRONOUS) { | 289 | if (sb->s_flags & MS_SYNCHRONOUS) |
294 | ubh_ll_rw_block(SWRITE, UCPI_UBH(ucpi)); | 290 | ubh_sync_block(UCPI_UBH(ucpi)); |
295 | ubh_wait_on_buffer (UCPI_UBH(ucpi)); | ||
296 | } | ||
297 | sb->s_dirt = 1; | 291 | sb->s_dirt = 1; |
298 | 292 | ||
299 | inode->i_ino = cg * uspi->s_ipg + bit; | 293 | inode->i_ino = cg * uspi->s_ipg + bit; |
diff --git a/fs/ufs/truncate.c b/fs/ufs/truncate.c index 34d5cb135320..a58f9155fc9a 100644 --- a/fs/ufs/truncate.c +++ b/fs/ufs/truncate.c | |||
@@ -243,10 +243,8 @@ static int ufs_trunc_indirect(struct inode *inode, u64 offset, void *p) | |||
243 | ubh_bforget(ind_ubh); | 243 | ubh_bforget(ind_ubh); |
244 | ind_ubh = NULL; | 244 | ind_ubh = NULL; |
245 | } | 245 | } |
246 | if (IS_SYNC(inode) && ind_ubh && ubh_buffer_dirty(ind_ubh)) { | 246 | if (IS_SYNC(inode) && ind_ubh && ubh_buffer_dirty(ind_ubh)) |
247 | ubh_ll_rw_block(SWRITE, ind_ubh); | 247 | ubh_sync_block(ind_ubh); |
248 | ubh_wait_on_buffer (ind_ubh); | ||
249 | } | ||
250 | ubh_brelse (ind_ubh); | 248 | ubh_brelse (ind_ubh); |
251 | 249 | ||
252 | UFSD("EXIT: ino %lu\n", inode->i_ino); | 250 | UFSD("EXIT: ino %lu\n", inode->i_ino); |
@@ -307,10 +305,8 @@ static int ufs_trunc_dindirect(struct inode *inode, u64 offset, void *p) | |||
307 | ubh_bforget(dind_bh); | 305 | ubh_bforget(dind_bh); |
308 | dind_bh = NULL; | 306 | dind_bh = NULL; |
309 | } | 307 | } |
310 | if (IS_SYNC(inode) && dind_bh && ubh_buffer_dirty(dind_bh)) { | 308 | if (IS_SYNC(inode) && dind_bh && ubh_buffer_dirty(dind_bh)) |
311 | ubh_ll_rw_block(SWRITE, dind_bh); | 309 | ubh_sync_block(dind_bh); |
312 | ubh_wait_on_buffer (dind_bh); | ||
313 | } | ||
314 | ubh_brelse (dind_bh); | 310 | ubh_brelse (dind_bh); |
315 | 311 | ||
316 | UFSD("EXIT: ino %lu\n", inode->i_ino); | 312 | UFSD("EXIT: ino %lu\n", inode->i_ino); |
@@ -367,10 +363,8 @@ static int ufs_trunc_tindirect(struct inode *inode) | |||
367 | ubh_bforget(tind_bh); | 363 | ubh_bforget(tind_bh); |
368 | tind_bh = NULL; | 364 | tind_bh = NULL; |
369 | } | 365 | } |
370 | if (IS_SYNC(inode) && tind_bh && ubh_buffer_dirty(tind_bh)) { | 366 | if (IS_SYNC(inode) && tind_bh && ubh_buffer_dirty(tind_bh)) |
371 | ubh_ll_rw_block(SWRITE, tind_bh); | 367 | ubh_sync_block(tind_bh); |
372 | ubh_wait_on_buffer (tind_bh); | ||
373 | } | ||
374 | ubh_brelse (tind_bh); | 368 | ubh_brelse (tind_bh); |
375 | 369 | ||
376 | UFSD("EXIT: ino %lu\n", inode->i_ino); | 370 | UFSD("EXIT: ino %lu\n", inode->i_ino); |
diff --git a/fs/ufs/util.c b/fs/ufs/util.c index 85a7fc9e4a4e..d2c36d53fe66 100644 --- a/fs/ufs/util.c +++ b/fs/ufs/util.c | |||
@@ -113,21 +113,17 @@ void ubh_mark_buffer_uptodate (struct ufs_buffer_head * ubh, int flag) | |||
113 | } | 113 | } |
114 | } | 114 | } |
115 | 115 | ||
116 | void ubh_ll_rw_block(int rw, struct ufs_buffer_head *ubh) | 116 | void ubh_sync_block(struct ufs_buffer_head *ubh) |
117 | { | 117 | { |
118 | if (!ubh) | 118 | if (ubh) { |
119 | return; | 119 | unsigned i; |
120 | 120 | ||
121 | ll_rw_block(rw, ubh->count, ubh->bh); | 121 | for (i = 0; i < ubh->count; i++) |
122 | } | 122 | write_dirty_buffer(ubh->bh[i], WRITE); |
123 | 123 | ||
124 | void ubh_wait_on_buffer (struct ufs_buffer_head * ubh) | 124 | for (i = 0; i < ubh->count; i++) |
125 | { | 125 | wait_on_buffer(ubh->bh[i]); |
126 | unsigned i; | 126 | } |
127 | if (!ubh) | ||
128 | return; | ||
129 | for ( i = 0; i < ubh->count; i++ ) | ||
130 | wait_on_buffer (ubh->bh[i]); | ||
131 | } | 127 | } |
132 | 128 | ||
133 | void ubh_bforget (struct ufs_buffer_head * ubh) | 129 | void ubh_bforget (struct ufs_buffer_head * ubh) |
diff --git a/fs/ufs/util.h b/fs/ufs/util.h index 0466036912f1..9f8775ce381c 100644 --- a/fs/ufs/util.h +++ b/fs/ufs/util.h | |||
@@ -269,8 +269,7 @@ extern void ubh_brelse (struct ufs_buffer_head *); | |||
269 | extern void ubh_brelse_uspi (struct ufs_sb_private_info *); | 269 | extern void ubh_brelse_uspi (struct ufs_sb_private_info *); |
270 | extern void ubh_mark_buffer_dirty (struct ufs_buffer_head *); | 270 | extern void ubh_mark_buffer_dirty (struct ufs_buffer_head *); |
271 | extern void ubh_mark_buffer_uptodate (struct ufs_buffer_head *, int); | 271 | extern void ubh_mark_buffer_uptodate (struct ufs_buffer_head *, int); |
272 | extern void ubh_ll_rw_block(int, struct ufs_buffer_head *); | 272 | extern void ubh_sync_block(struct ufs_buffer_head *); |
273 | extern void ubh_wait_on_buffer (struct ufs_buffer_head *); | ||
274 | extern void ubh_bforget (struct ufs_buffer_head *); | 273 | extern void ubh_bforget (struct ufs_buffer_head *); |
275 | extern int ubh_buffer_dirty (struct ufs_buffer_head *); | 274 | extern int ubh_buffer_dirty (struct ufs_buffer_head *); |
276 | #define ubh_ubhcpymem(mem,ubh,size) _ubh_ubhcpymem_(uspi,mem,ubh,size) | 275 | #define ubh_ubhcpymem(mem,ubh,size) _ubh_ubhcpymem_(uspi,mem,ubh,size) |
diff --git a/fs/utimes.c b/fs/utimes.c index e4c75db5d373..179b58690657 100644 --- a/fs/utimes.c +++ b/fs/utimes.c | |||
@@ -126,7 +126,8 @@ out: | |||
126 | * must be owner or have write permission. | 126 | * must be owner or have write permission. |
127 | * Else, update from *times, must be owner or super user. | 127 | * Else, update from *times, must be owner or super user. |
128 | */ | 128 | */ |
129 | long do_utimes(int dfd, char __user *filename, struct timespec *times, int flags) | 129 | long do_utimes(int dfd, const char __user *filename, struct timespec *times, |
130 | int flags) | ||
130 | { | 131 | { |
131 | int error = -EINVAL; | 132 | int error = -EINVAL; |
132 | 133 | ||
@@ -170,7 +171,7 @@ out: | |||
170 | return error; | 171 | return error; |
171 | } | 172 | } |
172 | 173 | ||
173 | SYSCALL_DEFINE4(utimensat, int, dfd, char __user *, filename, | 174 | SYSCALL_DEFINE4(utimensat, int, dfd, const char __user *, filename, |
174 | struct timespec __user *, utimes, int, flags) | 175 | struct timespec __user *, utimes, int, flags) |
175 | { | 176 | { |
176 | struct timespec tstimes[2]; | 177 | struct timespec tstimes[2]; |
@@ -188,7 +189,7 @@ SYSCALL_DEFINE4(utimensat, int, dfd, char __user *, filename, | |||
188 | return do_utimes(dfd, filename, utimes ? tstimes : NULL, flags); | 189 | return do_utimes(dfd, filename, utimes ? tstimes : NULL, flags); |
189 | } | 190 | } |
190 | 191 | ||
191 | SYSCALL_DEFINE3(futimesat, int, dfd, char __user *, filename, | 192 | SYSCALL_DEFINE3(futimesat, int, dfd, const char __user *, filename, |
192 | struct timeval __user *, utimes) | 193 | struct timeval __user *, utimes) |
193 | { | 194 | { |
194 | struct timeval times[2]; | 195 | struct timeval times[2]; |
diff --git a/fs/xfs/linux-2.6/xfs_aops.c b/fs/xfs/linux-2.6/xfs_aops.c index 15412fe15c3a..b552f816de15 100644 --- a/fs/xfs/linux-2.6/xfs_aops.c +++ b/fs/xfs/linux-2.6/xfs_aops.c | |||
@@ -852,8 +852,8 @@ xfs_convert_page( | |||
852 | SetPageUptodate(page); | 852 | SetPageUptodate(page); |
853 | 853 | ||
854 | if (count) { | 854 | if (count) { |
855 | wbc->nr_to_write--; | 855 | if (--wbc->nr_to_write <= 0 && |
856 | if (wbc->nr_to_write <= 0) | 856 | wbc->sync_mode == WB_SYNC_NONE) |
857 | done = 1; | 857 | done = 1; |
858 | } | 858 | } |
859 | xfs_start_page_writeback(page, !page_dirty, count); | 859 | xfs_start_page_writeback(page, !page_dirty, count); |
@@ -1068,7 +1068,7 @@ xfs_vm_writepage( | |||
1068 | * by themselves. | 1068 | * by themselves. |
1069 | */ | 1069 | */ |
1070 | if ((current->flags & (PF_MEMALLOC|PF_KSWAPD)) == PF_MEMALLOC) | 1070 | if ((current->flags & (PF_MEMALLOC|PF_KSWAPD)) == PF_MEMALLOC) |
1071 | goto out_fail; | 1071 | goto redirty; |
1072 | 1072 | ||
1073 | /* | 1073 | /* |
1074 | * We need a transaction if there are delalloc or unwritten buffers | 1074 | * We need a transaction if there are delalloc or unwritten buffers |
@@ -1080,7 +1080,7 @@ xfs_vm_writepage( | |||
1080 | */ | 1080 | */ |
1081 | xfs_count_page_state(page, &delalloc, &unwritten); | 1081 | xfs_count_page_state(page, &delalloc, &unwritten); |
1082 | if ((current->flags & PF_FSTRANS) && (delalloc || unwritten)) | 1082 | if ((current->flags & PF_FSTRANS) && (delalloc || unwritten)) |
1083 | goto out_fail; | 1083 | goto redirty; |
1084 | 1084 | ||
1085 | /* Is this page beyond the end of the file? */ | 1085 | /* Is this page beyond the end of the file? */ |
1086 | offset = i_size_read(inode); | 1086 | offset = i_size_read(inode); |
@@ -1245,12 +1245,15 @@ error: | |||
1245 | if (iohead) | 1245 | if (iohead) |
1246 | xfs_cancel_ioend(iohead); | 1246 | xfs_cancel_ioend(iohead); |
1247 | 1247 | ||
1248 | if (err == -EAGAIN) | ||
1249 | goto redirty; | ||
1250 | |||
1248 | xfs_aops_discard_page(page); | 1251 | xfs_aops_discard_page(page); |
1249 | ClearPageUptodate(page); | 1252 | ClearPageUptodate(page); |
1250 | unlock_page(page); | 1253 | unlock_page(page); |
1251 | return err; | 1254 | return err; |
1252 | 1255 | ||
1253 | out_fail: | 1256 | redirty: |
1254 | redirty_page_for_writepage(wbc, page); | 1257 | redirty_page_for_writepage(wbc, page); |
1255 | unlock_page(page); | 1258 | unlock_page(page); |
1256 | return 0; | 1259 | return 0; |
diff --git a/fs/xfs/linux-2.6/xfs_buf.c b/fs/xfs/linux-2.6/xfs_buf.c index ea79072f5210..d72cf2bb054a 100644 --- a/fs/xfs/linux-2.6/xfs_buf.c +++ b/fs/xfs/linux-2.6/xfs_buf.c | |||
@@ -440,12 +440,7 @@ _xfs_buf_find( | |||
440 | ASSERT(btp == bp->b_target); | 440 | ASSERT(btp == bp->b_target); |
441 | if (bp->b_file_offset == range_base && | 441 | if (bp->b_file_offset == range_base && |
442 | bp->b_buffer_length == range_length) { | 442 | bp->b_buffer_length == range_length) { |
443 | /* | ||
444 | * If we look at something, bring it to the | ||
445 | * front of the list for next time. | ||
446 | */ | ||
447 | atomic_inc(&bp->b_hold); | 443 | atomic_inc(&bp->b_hold); |
448 | list_move(&bp->b_hash_list, &hash->bh_list); | ||
449 | goto found; | 444 | goto found; |
450 | } | 445 | } |
451 | } | 446 | } |
@@ -1443,8 +1438,7 @@ xfs_alloc_bufhash( | |||
1443 | { | 1438 | { |
1444 | unsigned int i; | 1439 | unsigned int i; |
1445 | 1440 | ||
1446 | btp->bt_hashshift = external ? 3 : 8; /* 8 or 256 buckets */ | 1441 | btp->bt_hashshift = external ? 3 : 12; /* 8 or 4096 buckets */ |
1447 | btp->bt_hashmask = (1 << btp->bt_hashshift) - 1; | ||
1448 | btp->bt_hash = kmem_zalloc_large((1 << btp->bt_hashshift) * | 1442 | btp->bt_hash = kmem_zalloc_large((1 << btp->bt_hashshift) * |
1449 | sizeof(xfs_bufhash_t)); | 1443 | sizeof(xfs_bufhash_t)); |
1450 | for (i = 0; i < (1 << btp->bt_hashshift); i++) { | 1444 | for (i = 0; i < (1 << btp->bt_hashshift); i++) { |
diff --git a/fs/xfs/linux-2.6/xfs_buf.h b/fs/xfs/linux-2.6/xfs_buf.h index d072e5ff923b..2a05614f0b92 100644 --- a/fs/xfs/linux-2.6/xfs_buf.h +++ b/fs/xfs/linux-2.6/xfs_buf.h | |||
@@ -137,7 +137,6 @@ typedef struct xfs_buftarg { | |||
137 | size_t bt_smask; | 137 | size_t bt_smask; |
138 | 138 | ||
139 | /* per device buffer hash table */ | 139 | /* per device buffer hash table */ |
140 | uint bt_hashmask; | ||
141 | uint bt_hashshift; | 140 | uint bt_hashshift; |
142 | xfs_bufhash_t *bt_hash; | 141 | xfs_bufhash_t *bt_hash; |
143 | 142 | ||
diff --git a/fs/xfs/linux-2.6/xfs_ioctl.c b/fs/xfs/linux-2.6/xfs_ioctl.c index 237f5ffb2ee8..4fec427b83ef 100644 --- a/fs/xfs/linux-2.6/xfs_ioctl.c +++ b/fs/xfs/linux-2.6/xfs_ioctl.c | |||
@@ -907,6 +907,13 @@ xfs_ioctl_setattr( | |||
907 | return XFS_ERROR(EIO); | 907 | return XFS_ERROR(EIO); |
908 | 908 | ||
909 | /* | 909 | /* |
910 | * Disallow 32bit project ids because on-disk structure | ||
911 | * is 16bit only. | ||
912 | */ | ||
913 | if ((mask & FSX_PROJID) && (fa->fsx_projid > (__uint16_t)-1)) | ||
914 | return XFS_ERROR(EINVAL); | ||
915 | |||
916 | /* | ||
910 | * If disk quotas is on, we make sure that the dquots do exist on disk, | 917 | * If disk quotas is on, we make sure that the dquots do exist on disk, |
911 | * before we start any other transactions. Trying to do this later | 918 | * before we start any other transactions. Trying to do this later |
912 | * is messy. We don't care to take a readlock to look at the ids | 919 | * is messy. We don't care to take a readlock to look at the ids |
diff --git a/fs/xfs/linux-2.6/xfs_iops.c b/fs/xfs/linux-2.6/xfs_iops.c index 68be25dcd301..b1fc2a6bfe83 100644 --- a/fs/xfs/linux-2.6/xfs_iops.c +++ b/fs/xfs/linux-2.6/xfs_iops.c | |||
@@ -664,7 +664,7 @@ xfs_vn_fiemap( | |||
664 | fieinfo->fi_extents_max + 1; | 664 | fieinfo->fi_extents_max + 1; |
665 | bm.bmv_count = min_t(__s32, bm.bmv_count, | 665 | bm.bmv_count = min_t(__s32, bm.bmv_count, |
666 | (PAGE_SIZE * 16 / sizeof(struct getbmapx))); | 666 | (PAGE_SIZE * 16 / sizeof(struct getbmapx))); |
667 | bm.bmv_iflags = BMV_IF_PREALLOC; | 667 | bm.bmv_iflags = BMV_IF_PREALLOC | BMV_IF_NO_HOLES; |
668 | if (fieinfo->fi_flags & FIEMAP_FLAG_XATTR) | 668 | if (fieinfo->fi_flags & FIEMAP_FLAG_XATTR) |
669 | bm.bmv_iflags |= BMV_IF_ATTRFORK; | 669 | bm.bmv_iflags |= BMV_IF_ATTRFORK; |
670 | if (!(fieinfo->fi_flags & FIEMAP_FLAG_SYNC)) | 670 | if (!(fieinfo->fi_flags & FIEMAP_FLAG_SYNC)) |
diff --git a/fs/xfs/linux-2.6/xfs_super.c b/fs/xfs/linux-2.6/xfs_super.c index 15c35b62ff14..a4e07974955b 100644 --- a/fs/xfs/linux-2.6/xfs_super.c +++ b/fs/xfs/linux-2.6/xfs_super.c | |||
@@ -1226,6 +1226,7 @@ xfs_fs_statfs( | |||
1226 | struct xfs_inode *ip = XFS_I(dentry->d_inode); | 1226 | struct xfs_inode *ip = XFS_I(dentry->d_inode); |
1227 | __uint64_t fakeinos, id; | 1227 | __uint64_t fakeinos, id; |
1228 | xfs_extlen_t lsize; | 1228 | xfs_extlen_t lsize; |
1229 | __int64_t ffree; | ||
1229 | 1230 | ||
1230 | statp->f_type = XFS_SB_MAGIC; | 1231 | statp->f_type = XFS_SB_MAGIC; |
1231 | statp->f_namelen = MAXNAMELEN - 1; | 1232 | statp->f_namelen = MAXNAMELEN - 1; |
@@ -1249,7 +1250,11 @@ xfs_fs_statfs( | |||
1249 | statp->f_files = min_t(typeof(statp->f_files), | 1250 | statp->f_files = min_t(typeof(statp->f_files), |
1250 | statp->f_files, | 1251 | statp->f_files, |
1251 | mp->m_maxicount); | 1252 | mp->m_maxicount); |
1252 | statp->f_ffree = statp->f_files - (sbp->sb_icount - sbp->sb_ifree); | 1253 | |
1254 | /* make sure statp->f_ffree does not underflow */ | ||
1255 | ffree = statp->f_files - (sbp->sb_icount - sbp->sb_ifree); | ||
1256 | statp->f_ffree = max_t(__int64_t, ffree, 0); | ||
1257 | |||
1253 | spin_unlock(&mp->m_sb_lock); | 1258 | spin_unlock(&mp->m_sb_lock); |
1254 | 1259 | ||
1255 | if ((ip->i_d.di_flags & XFS_DIFLAG_PROJINHERIT) || | 1260 | if ((ip->i_d.di_flags & XFS_DIFLAG_PROJINHERIT) || |
@@ -1402,7 +1407,7 @@ xfs_fs_freeze( | |||
1402 | 1407 | ||
1403 | xfs_save_resvblks(mp); | 1408 | xfs_save_resvblks(mp); |
1404 | xfs_quiesce_attr(mp); | 1409 | xfs_quiesce_attr(mp); |
1405 | return -xfs_fs_log_dummy(mp); | 1410 | return -xfs_fs_log_dummy(mp, SYNC_WAIT); |
1406 | } | 1411 | } |
1407 | 1412 | ||
1408 | STATIC int | 1413 | STATIC int |
diff --git a/fs/xfs/linux-2.6/xfs_sync.c b/fs/xfs/linux-2.6/xfs_sync.c index dfcbd98d1599..d59c4a65d492 100644 --- a/fs/xfs/linux-2.6/xfs_sync.c +++ b/fs/xfs/linux-2.6/xfs_sync.c | |||
@@ -34,6 +34,7 @@ | |||
34 | #include "xfs_inode_item.h" | 34 | #include "xfs_inode_item.h" |
35 | #include "xfs_quota.h" | 35 | #include "xfs_quota.h" |
36 | #include "xfs_trace.h" | 36 | #include "xfs_trace.h" |
37 | #include "xfs_fsops.h" | ||
37 | 38 | ||
38 | #include <linux/kthread.h> | 39 | #include <linux/kthread.h> |
39 | #include <linux/freezer.h> | 40 | #include <linux/freezer.h> |
@@ -341,38 +342,6 @@ xfs_sync_attr( | |||
341 | } | 342 | } |
342 | 343 | ||
343 | STATIC int | 344 | STATIC int |
344 | xfs_commit_dummy_trans( | ||
345 | struct xfs_mount *mp, | ||
346 | uint flags) | ||
347 | { | ||
348 | struct xfs_inode *ip = mp->m_rootip; | ||
349 | struct xfs_trans *tp; | ||
350 | int error; | ||
351 | |||
352 | /* | ||
353 | * Put a dummy transaction in the log to tell recovery | ||
354 | * that all others are OK. | ||
355 | */ | ||
356 | tp = xfs_trans_alloc(mp, XFS_TRANS_DUMMY1); | ||
357 | error = xfs_trans_reserve(tp, 0, XFS_ICHANGE_LOG_RES(mp), 0, 0, 0); | ||
358 | if (error) { | ||
359 | xfs_trans_cancel(tp, 0); | ||
360 | return error; | ||
361 | } | ||
362 | |||
363 | xfs_ilock(ip, XFS_ILOCK_EXCL); | ||
364 | |||
365 | xfs_trans_ijoin(tp, ip); | ||
366 | xfs_trans_log_inode(tp, ip, XFS_ILOG_CORE); | ||
367 | error = xfs_trans_commit(tp, 0); | ||
368 | xfs_iunlock(ip, XFS_ILOCK_EXCL); | ||
369 | |||
370 | /* the log force ensures this transaction is pushed to disk */ | ||
371 | xfs_log_force(mp, (flags & SYNC_WAIT) ? XFS_LOG_SYNC : 0); | ||
372 | return error; | ||
373 | } | ||
374 | |||
375 | STATIC int | ||
376 | xfs_sync_fsdata( | 345 | xfs_sync_fsdata( |
377 | struct xfs_mount *mp) | 346 | struct xfs_mount *mp) |
378 | { | 347 | { |
@@ -432,7 +401,7 @@ xfs_quiesce_data( | |||
432 | 401 | ||
433 | /* mark the log as covered if needed */ | 402 | /* mark the log as covered if needed */ |
434 | if (xfs_log_need_covered(mp)) | 403 | if (xfs_log_need_covered(mp)) |
435 | error2 = xfs_commit_dummy_trans(mp, SYNC_WAIT); | 404 | error2 = xfs_fs_log_dummy(mp, SYNC_WAIT); |
436 | 405 | ||
437 | /* flush data-only devices */ | 406 | /* flush data-only devices */ |
438 | if (mp->m_rtdev_targp) | 407 | if (mp->m_rtdev_targp) |
@@ -563,7 +532,7 @@ xfs_flush_inodes( | |||
563 | /* | 532 | /* |
564 | * Every sync period we need to unpin all items, reclaim inodes and sync | 533 | * Every sync period we need to unpin all items, reclaim inodes and sync |
565 | * disk quotas. We might need to cover the log to indicate that the | 534 | * disk quotas. We might need to cover the log to indicate that the |
566 | * filesystem is idle. | 535 | * filesystem is idle and not frozen. |
567 | */ | 536 | */ |
568 | STATIC void | 537 | STATIC void |
569 | xfs_sync_worker( | 538 | xfs_sync_worker( |
@@ -577,8 +546,9 @@ xfs_sync_worker( | |||
577 | xfs_reclaim_inodes(mp, 0); | 546 | xfs_reclaim_inodes(mp, 0); |
578 | /* dgc: errors ignored here */ | 547 | /* dgc: errors ignored here */ |
579 | error = xfs_qm_sync(mp, SYNC_TRYLOCK); | 548 | error = xfs_qm_sync(mp, SYNC_TRYLOCK); |
580 | if (xfs_log_need_covered(mp)) | 549 | if (mp->m_super->s_frozen == SB_UNFROZEN && |
581 | error = xfs_commit_dummy_trans(mp, 0); | 550 | xfs_log_need_covered(mp)) |
551 | error = xfs_fs_log_dummy(mp, 0); | ||
582 | } | 552 | } |
583 | mp->m_sync_seq++; | 553 | mp->m_sync_seq++; |
584 | wake_up(&mp->m_wait_single_sync_task); | 554 | wake_up(&mp->m_wait_single_sync_task); |
diff --git a/fs/xfs/xfs_bmap.c b/fs/xfs/xfs_bmap.c index 23f14e595c18..f90dadd5a968 100644 --- a/fs/xfs/xfs_bmap.c +++ b/fs/xfs/xfs_bmap.c | |||
@@ -5533,12 +5533,24 @@ xfs_getbmap( | |||
5533 | map[i].br_startblock)) | 5533 | map[i].br_startblock)) |
5534 | goto out_free_map; | 5534 | goto out_free_map; |
5535 | 5535 | ||
5536 | nexleft--; | ||
5537 | bmv->bmv_offset = | 5536 | bmv->bmv_offset = |
5538 | out[cur_ext].bmv_offset + | 5537 | out[cur_ext].bmv_offset + |
5539 | out[cur_ext].bmv_length; | 5538 | out[cur_ext].bmv_length; |
5540 | bmv->bmv_length = | 5539 | bmv->bmv_length = |
5541 | max_t(__int64_t, 0, bmvend - bmv->bmv_offset); | 5540 | max_t(__int64_t, 0, bmvend - bmv->bmv_offset); |
5541 | |||
5542 | /* | ||
5543 | * In case we don't want to return the hole, | ||
5544 | * don't increase cur_ext so that we can reuse | ||
5545 | * it in the next loop. | ||
5546 | */ | ||
5547 | if ((iflags & BMV_IF_NO_HOLES) && | ||
5548 | map[i].br_startblock == HOLESTARTBLOCK) { | ||
5549 | memset(&out[cur_ext], 0, sizeof(out[cur_ext])); | ||
5550 | continue; | ||
5551 | } | ||
5552 | |||
5553 | nexleft--; | ||
5542 | bmv->bmv_entries++; | 5554 | bmv->bmv_entries++; |
5543 | cur_ext++; | 5555 | cur_ext++; |
5544 | } | 5556 | } |
diff --git a/fs/xfs/xfs_fs.h b/fs/xfs/xfs_fs.h index 7cf7220e7d5f..87c2e9d02288 100644 --- a/fs/xfs/xfs_fs.h +++ b/fs/xfs/xfs_fs.h | |||
@@ -114,8 +114,10 @@ struct getbmapx { | |||
114 | #define BMV_IF_NO_DMAPI_READ 0x2 /* Do not generate DMAPI read event */ | 114 | #define BMV_IF_NO_DMAPI_READ 0x2 /* Do not generate DMAPI read event */ |
115 | #define BMV_IF_PREALLOC 0x4 /* rtn status BMV_OF_PREALLOC if req */ | 115 | #define BMV_IF_PREALLOC 0x4 /* rtn status BMV_OF_PREALLOC if req */ |
116 | #define BMV_IF_DELALLOC 0x8 /* rtn status BMV_OF_DELALLOC if req */ | 116 | #define BMV_IF_DELALLOC 0x8 /* rtn status BMV_OF_DELALLOC if req */ |
117 | #define BMV_IF_NO_HOLES 0x10 /* Do not return holes */ | ||
117 | #define BMV_IF_VALID \ | 118 | #define BMV_IF_VALID \ |
118 | (BMV_IF_ATTRFORK|BMV_IF_NO_DMAPI_READ|BMV_IF_PREALLOC|BMV_IF_DELALLOC) | 119 | (BMV_IF_ATTRFORK|BMV_IF_NO_DMAPI_READ|BMV_IF_PREALLOC| \ |
120 | BMV_IF_DELALLOC|BMV_IF_NO_HOLES) | ||
119 | 121 | ||
120 | /* bmv_oflags values - returned for each non-header segment */ | 122 | /* bmv_oflags values - returned for each non-header segment */ |
121 | #define BMV_OF_PREALLOC 0x1 /* segment = unwritten pre-allocation */ | 123 | #define BMV_OF_PREALLOC 0x1 /* segment = unwritten pre-allocation */ |
diff --git a/fs/xfs/xfs_fsops.c b/fs/xfs/xfs_fsops.c index dbca5f5c37ba..43b1d5699335 100644 --- a/fs/xfs/xfs_fsops.c +++ b/fs/xfs/xfs_fsops.c | |||
@@ -604,31 +604,36 @@ out: | |||
604 | return 0; | 604 | return 0; |
605 | } | 605 | } |
606 | 606 | ||
607 | /* | ||
608 | * Dump a transaction into the log that contains no real change. This is needed | ||
609 | * to be able to make the log dirty or stamp the current tail LSN into the log | ||
610 | * during the covering operation. | ||
611 | * | ||
612 | * We cannot use an inode here for this - that will push dirty state back up | ||
613 | * into the VFS and then periodic inode flushing will prevent log covering from | ||
614 | * making progress. Hence we log a field in the superblock instead. | ||
615 | */ | ||
607 | int | 616 | int |
608 | xfs_fs_log_dummy( | 617 | xfs_fs_log_dummy( |
609 | xfs_mount_t *mp) | 618 | xfs_mount_t *mp, |
619 | int flags) | ||
610 | { | 620 | { |
611 | xfs_trans_t *tp; | 621 | xfs_trans_t *tp; |
612 | xfs_inode_t *ip; | ||
613 | int error; | 622 | int error; |
614 | 623 | ||
615 | tp = _xfs_trans_alloc(mp, XFS_TRANS_DUMMY1, KM_SLEEP); | 624 | tp = _xfs_trans_alloc(mp, XFS_TRANS_DUMMY1, KM_SLEEP); |
616 | error = xfs_trans_reserve(tp, 0, XFS_ICHANGE_LOG_RES(mp), 0, 0, 0); | 625 | error = xfs_trans_reserve(tp, 0, mp->m_sb.sb_sectsize + 128, 0, 0, |
626 | XFS_DEFAULT_LOG_COUNT); | ||
617 | if (error) { | 627 | if (error) { |
618 | xfs_trans_cancel(tp, 0); | 628 | xfs_trans_cancel(tp, 0); |
619 | return error; | 629 | return error; |
620 | } | 630 | } |
621 | 631 | ||
622 | ip = mp->m_rootip; | 632 | /* log the UUID because it is an unchanging field */ |
623 | xfs_ilock(ip, XFS_ILOCK_EXCL); | 633 | xfs_mod_sb(tp, XFS_SB_UUID); |
624 | 634 | if (flags & SYNC_WAIT) | |
625 | xfs_trans_ijoin(tp, ip); | 635 | xfs_trans_set_sync(tp); |
626 | xfs_trans_log_inode(tp, ip, XFS_ILOG_CORE); | 636 | return xfs_trans_commit(tp, 0); |
627 | xfs_trans_set_sync(tp); | ||
628 | error = xfs_trans_commit(tp, 0); | ||
629 | |||
630 | xfs_iunlock(ip, XFS_ILOCK_EXCL); | ||
631 | return error; | ||
632 | } | 637 | } |
633 | 638 | ||
634 | int | 639 | int |
diff --git a/fs/xfs/xfs_fsops.h b/fs/xfs/xfs_fsops.h index 88435e0a77c9..a786c5212c1e 100644 --- a/fs/xfs/xfs_fsops.h +++ b/fs/xfs/xfs_fsops.h | |||
@@ -25,6 +25,6 @@ extern int xfs_fs_counts(xfs_mount_t *mp, xfs_fsop_counts_t *cnt); | |||
25 | extern int xfs_reserve_blocks(xfs_mount_t *mp, __uint64_t *inval, | 25 | extern int xfs_reserve_blocks(xfs_mount_t *mp, __uint64_t *inval, |
26 | xfs_fsop_resblks_t *outval); | 26 | xfs_fsop_resblks_t *outval); |
27 | extern int xfs_fs_goingdown(xfs_mount_t *mp, __uint32_t inflags); | 27 | extern int xfs_fs_goingdown(xfs_mount_t *mp, __uint32_t inflags); |
28 | extern int xfs_fs_log_dummy(xfs_mount_t *mp); | 28 | extern int xfs_fs_log_dummy(xfs_mount_t *mp, int flags); |
29 | 29 | ||
30 | #endif /* __XFS_FSOPS_H__ */ | 30 | #endif /* __XFS_FSOPS_H__ */ |
diff --git a/fs/xfs/xfs_ialloc.c b/fs/xfs/xfs_ialloc.c index abf80ae1e95b..5371d2dc360e 100644 --- a/fs/xfs/xfs_ialloc.c +++ b/fs/xfs/xfs_ialloc.c | |||
@@ -1213,7 +1213,6 @@ xfs_imap_lookup( | |||
1213 | struct xfs_inobt_rec_incore rec; | 1213 | struct xfs_inobt_rec_incore rec; |
1214 | struct xfs_btree_cur *cur; | 1214 | struct xfs_btree_cur *cur; |
1215 | struct xfs_buf *agbp; | 1215 | struct xfs_buf *agbp; |
1216 | xfs_agino_t startino; | ||
1217 | int error; | 1216 | int error; |
1218 | int i; | 1217 | int i; |
1219 | 1218 | ||
@@ -1227,13 +1226,13 @@ xfs_imap_lookup( | |||
1227 | } | 1226 | } |
1228 | 1227 | ||
1229 | /* | 1228 | /* |
1230 | * derive and lookup the exact inode record for the given agino. If the | 1229 | * Lookup the inode record for the given agino. If the record cannot be |
1231 | * record cannot be found, then it's an invalid inode number and we | 1230 | * found, then it's an invalid inode number and we should abort. Once |
1232 | * should abort. | 1231 | * we have a record, we need to ensure it contains the inode number |
1232 | * we are looking up. | ||
1233 | */ | 1233 | */ |
1234 | cur = xfs_inobt_init_cursor(mp, tp, agbp, agno); | 1234 | cur = xfs_inobt_init_cursor(mp, tp, agbp, agno); |
1235 | startino = agino & ~(XFS_IALLOC_INODES(mp) - 1); | 1235 | error = xfs_inobt_lookup(cur, agino, XFS_LOOKUP_LE, &i); |
1236 | error = xfs_inobt_lookup(cur, startino, XFS_LOOKUP_EQ, &i); | ||
1237 | if (!error) { | 1236 | if (!error) { |
1238 | if (i) | 1237 | if (i) |
1239 | error = xfs_inobt_get_rec(cur, &rec, &i); | 1238 | error = xfs_inobt_get_rec(cur, &rec, &i); |
@@ -1246,6 +1245,11 @@ xfs_imap_lookup( | |||
1246 | if (error) | 1245 | if (error) |
1247 | return error; | 1246 | return error; |
1248 | 1247 | ||
1248 | /* check that the returned record contains the required inode */ | ||
1249 | if (rec.ir_startino > agino || | ||
1250 | rec.ir_startino + XFS_IALLOC_INODES(mp) <= agino) | ||
1251 | return EINVAL; | ||
1252 | |||
1249 | /* for untrusted inodes check it is allocated first */ | 1253 | /* for untrusted inodes check it is allocated first */ |
1250 | if ((flags & XFS_IGET_UNTRUSTED) && | 1254 | if ((flags & XFS_IGET_UNTRUSTED) && |
1251 | (rec.ir_free & XFS_INOBT_MASK(agino - rec.ir_startino))) | 1255 | (rec.ir_free & XFS_INOBT_MASK(agino - rec.ir_startino))) |
diff --git a/fs/xfs/xfs_inode.c b/fs/xfs/xfs_inode.c index 68415cb4f23c..34798f391c49 100644 --- a/fs/xfs/xfs_inode.c +++ b/fs/xfs/xfs_inode.c | |||
@@ -1914,6 +1914,11 @@ xfs_iunlink_remove( | |||
1914 | return 0; | 1914 | return 0; |
1915 | } | 1915 | } |
1916 | 1916 | ||
1917 | /* | ||
1918 | * A big issue when freeing the inode cluster is is that we _cannot_ skip any | ||
1919 | * inodes that are in memory - they all must be marked stale and attached to | ||
1920 | * the cluster buffer. | ||
1921 | */ | ||
1917 | STATIC void | 1922 | STATIC void |
1918 | xfs_ifree_cluster( | 1923 | xfs_ifree_cluster( |
1919 | xfs_inode_t *free_ip, | 1924 | xfs_inode_t *free_ip, |
@@ -1945,8 +1950,6 @@ xfs_ifree_cluster( | |||
1945 | } | 1950 | } |
1946 | 1951 | ||
1947 | for (j = 0; j < nbufs; j++, inum += ninodes) { | 1952 | for (j = 0; j < nbufs; j++, inum += ninodes) { |
1948 | int found = 0; | ||
1949 | |||
1950 | blkno = XFS_AGB_TO_DADDR(mp, XFS_INO_TO_AGNO(mp, inum), | 1953 | blkno = XFS_AGB_TO_DADDR(mp, XFS_INO_TO_AGNO(mp, inum), |
1951 | XFS_INO_TO_AGBNO(mp, inum)); | 1954 | XFS_INO_TO_AGBNO(mp, inum)); |
1952 | 1955 | ||
@@ -1965,7 +1968,9 @@ xfs_ifree_cluster( | |||
1965 | /* | 1968 | /* |
1966 | * Walk the inodes already attached to the buffer and mark them | 1969 | * Walk the inodes already attached to the buffer and mark them |
1967 | * stale. These will all have the flush locks held, so an | 1970 | * stale. These will all have the flush locks held, so an |
1968 | * in-memory inode walk can't lock them. | 1971 | * in-memory inode walk can't lock them. By marking them all |
1972 | * stale first, we will not attempt to lock them in the loop | ||
1973 | * below as the XFS_ISTALE flag will be set. | ||
1969 | */ | 1974 | */ |
1970 | lip = XFS_BUF_FSPRIVATE(bp, xfs_log_item_t *); | 1975 | lip = XFS_BUF_FSPRIVATE(bp, xfs_log_item_t *); |
1971 | while (lip) { | 1976 | while (lip) { |
@@ -1977,11 +1982,11 @@ xfs_ifree_cluster( | |||
1977 | &iip->ili_flush_lsn, | 1982 | &iip->ili_flush_lsn, |
1978 | &iip->ili_item.li_lsn); | 1983 | &iip->ili_item.li_lsn); |
1979 | xfs_iflags_set(iip->ili_inode, XFS_ISTALE); | 1984 | xfs_iflags_set(iip->ili_inode, XFS_ISTALE); |
1980 | found++; | ||
1981 | } | 1985 | } |
1982 | lip = lip->li_bio_list; | 1986 | lip = lip->li_bio_list; |
1983 | } | 1987 | } |
1984 | 1988 | ||
1989 | |||
1985 | /* | 1990 | /* |
1986 | * For each inode in memory attempt to add it to the inode | 1991 | * For each inode in memory attempt to add it to the inode |
1987 | * buffer and set it up for being staled on buffer IO | 1992 | * buffer and set it up for being staled on buffer IO |
@@ -1993,6 +1998,7 @@ xfs_ifree_cluster( | |||
1993 | * even trying to lock them. | 1998 | * even trying to lock them. |
1994 | */ | 1999 | */ |
1995 | for (i = 0; i < ninodes; i++) { | 2000 | for (i = 0; i < ninodes; i++) { |
2001 | retry: | ||
1996 | read_lock(&pag->pag_ici_lock); | 2002 | read_lock(&pag->pag_ici_lock); |
1997 | ip = radix_tree_lookup(&pag->pag_ici_root, | 2003 | ip = radix_tree_lookup(&pag->pag_ici_root, |
1998 | XFS_INO_TO_AGINO(mp, (inum + i))); | 2004 | XFS_INO_TO_AGINO(mp, (inum + i))); |
@@ -2003,38 +2009,36 @@ xfs_ifree_cluster( | |||
2003 | continue; | 2009 | continue; |
2004 | } | 2010 | } |
2005 | 2011 | ||
2006 | /* don't try to lock/unlock the current inode */ | 2012 | /* |
2013 | * Don't try to lock/unlock the current inode, but we | ||
2014 | * _cannot_ skip the other inodes that we did not find | ||
2015 | * in the list attached to the buffer and are not | ||
2016 | * already marked stale. If we can't lock it, back off | ||
2017 | * and retry. | ||
2018 | */ | ||
2007 | if (ip != free_ip && | 2019 | if (ip != free_ip && |
2008 | !xfs_ilock_nowait(ip, XFS_ILOCK_EXCL)) { | 2020 | !xfs_ilock_nowait(ip, XFS_ILOCK_EXCL)) { |
2009 | read_unlock(&pag->pag_ici_lock); | 2021 | read_unlock(&pag->pag_ici_lock); |
2010 | continue; | 2022 | delay(1); |
2023 | goto retry; | ||
2011 | } | 2024 | } |
2012 | read_unlock(&pag->pag_ici_lock); | 2025 | read_unlock(&pag->pag_ici_lock); |
2013 | 2026 | ||
2014 | if (!xfs_iflock_nowait(ip)) { | 2027 | xfs_iflock(ip); |
2015 | if (ip != free_ip) | ||
2016 | xfs_iunlock(ip, XFS_ILOCK_EXCL); | ||
2017 | continue; | ||
2018 | } | ||
2019 | |||
2020 | xfs_iflags_set(ip, XFS_ISTALE); | 2028 | xfs_iflags_set(ip, XFS_ISTALE); |
2021 | if (xfs_inode_clean(ip)) { | ||
2022 | ASSERT(ip != free_ip); | ||
2023 | xfs_ifunlock(ip); | ||
2024 | xfs_iunlock(ip, XFS_ILOCK_EXCL); | ||
2025 | continue; | ||
2026 | } | ||
2027 | 2029 | ||
2030 | /* | ||
2031 | * we don't need to attach clean inodes or those only | ||
2032 | * with unlogged changes (which we throw away, anyway). | ||
2033 | */ | ||
2028 | iip = ip->i_itemp; | 2034 | iip = ip->i_itemp; |
2029 | if (!iip) { | 2035 | if (!iip || xfs_inode_clean(ip)) { |
2030 | /* inode with unlogged changes only */ | ||
2031 | ASSERT(ip != free_ip); | 2036 | ASSERT(ip != free_ip); |
2032 | ip->i_update_core = 0; | 2037 | ip->i_update_core = 0; |
2033 | xfs_ifunlock(ip); | 2038 | xfs_ifunlock(ip); |
2034 | xfs_iunlock(ip, XFS_ILOCK_EXCL); | 2039 | xfs_iunlock(ip, XFS_ILOCK_EXCL); |
2035 | continue; | 2040 | continue; |
2036 | } | 2041 | } |
2037 | found++; | ||
2038 | 2042 | ||
2039 | iip->ili_last_fields = iip->ili_format.ilf_fields; | 2043 | iip->ili_last_fields = iip->ili_format.ilf_fields; |
2040 | iip->ili_format.ilf_fields = 0; | 2044 | iip->ili_format.ilf_fields = 0; |
@@ -2049,8 +2053,7 @@ xfs_ifree_cluster( | |||
2049 | xfs_iunlock(ip, XFS_ILOCK_EXCL); | 2053 | xfs_iunlock(ip, XFS_ILOCK_EXCL); |
2050 | } | 2054 | } |
2051 | 2055 | ||
2052 | if (found) | 2056 | xfs_trans_stale_inode_buf(tp, bp); |
2053 | xfs_trans_stale_inode_buf(tp, bp); | ||
2054 | xfs_trans_binval(tp, bp); | 2057 | xfs_trans_binval(tp, bp); |
2055 | } | 2058 | } |
2056 | 2059 | ||
diff --git a/fs/xfs/xfs_log.c b/fs/xfs/xfs_log.c index 925d572bf0f4..33f718f92a48 100644 --- a/fs/xfs/xfs_log.c +++ b/fs/xfs/xfs_log.c | |||
@@ -3015,7 +3015,8 @@ _xfs_log_force( | |||
3015 | 3015 | ||
3016 | XFS_STATS_INC(xs_log_force); | 3016 | XFS_STATS_INC(xs_log_force); |
3017 | 3017 | ||
3018 | xlog_cil_push(log, 1); | 3018 | if (log->l_cilp) |
3019 | xlog_cil_force(log); | ||
3019 | 3020 | ||
3020 | spin_lock(&log->l_icloglock); | 3021 | spin_lock(&log->l_icloglock); |
3021 | 3022 | ||
@@ -3167,7 +3168,7 @@ _xfs_log_force_lsn( | |||
3167 | XFS_STATS_INC(xs_log_force); | 3168 | XFS_STATS_INC(xs_log_force); |
3168 | 3169 | ||
3169 | if (log->l_cilp) { | 3170 | if (log->l_cilp) { |
3170 | lsn = xlog_cil_push_lsn(log, lsn); | 3171 | lsn = xlog_cil_force_lsn(log, lsn); |
3171 | if (lsn == NULLCOMMITLSN) | 3172 | if (lsn == NULLCOMMITLSN) |
3172 | return 0; | 3173 | return 0; |
3173 | } | 3174 | } |
@@ -3724,7 +3725,7 @@ xfs_log_force_umount( | |||
3724 | * call below. | 3725 | * call below. |
3725 | */ | 3726 | */ |
3726 | if (!logerror && (mp->m_flags & XFS_MOUNT_DELAYLOG)) | 3727 | if (!logerror && (mp->m_flags & XFS_MOUNT_DELAYLOG)) |
3727 | xlog_cil_push(log, 1); | 3728 | xlog_cil_force(log); |
3728 | 3729 | ||
3729 | /* | 3730 | /* |
3730 | * We must hold both the GRANT lock and the LOG lock, | 3731 | * We must hold both the GRANT lock and the LOG lock, |
diff --git a/fs/xfs/xfs_log_cil.c b/fs/xfs/xfs_log_cil.c index 31e4ea2d19ac..ed575fb4b495 100644 --- a/fs/xfs/xfs_log_cil.c +++ b/fs/xfs/xfs_log_cil.c | |||
@@ -68,6 +68,7 @@ xlog_cil_init( | |||
68 | ctx->sequence = 1; | 68 | ctx->sequence = 1; |
69 | ctx->cil = cil; | 69 | ctx->cil = cil; |
70 | cil->xc_ctx = ctx; | 70 | cil->xc_ctx = ctx; |
71 | cil->xc_current_sequence = ctx->sequence; | ||
71 | 72 | ||
72 | cil->xc_log = log; | 73 | cil->xc_log = log; |
73 | log->l_cilp = cil; | 74 | log->l_cilp = cil; |
@@ -269,15 +270,10 @@ xlog_cil_insert( | |||
269 | static void | 270 | static void |
270 | xlog_cil_format_items( | 271 | xlog_cil_format_items( |
271 | struct log *log, | 272 | struct log *log, |
272 | struct xfs_log_vec *log_vector, | 273 | struct xfs_log_vec *log_vector) |
273 | struct xlog_ticket *ticket, | ||
274 | xfs_lsn_t *start_lsn) | ||
275 | { | 274 | { |
276 | struct xfs_log_vec *lv; | 275 | struct xfs_log_vec *lv; |
277 | 276 | ||
278 | if (start_lsn) | ||
279 | *start_lsn = log->l_cilp->xc_ctx->sequence; | ||
280 | |||
281 | ASSERT(log_vector); | 277 | ASSERT(log_vector); |
282 | for (lv = log_vector; lv; lv = lv->lv_next) { | 278 | for (lv = log_vector; lv; lv = lv->lv_next) { |
283 | void *ptr; | 279 | void *ptr; |
@@ -301,9 +297,24 @@ xlog_cil_format_items( | |||
301 | ptr += vec->i_len; | 297 | ptr += vec->i_len; |
302 | } | 298 | } |
303 | ASSERT(ptr == lv->lv_buf + lv->lv_buf_len); | 299 | ASSERT(ptr == lv->lv_buf + lv->lv_buf_len); |
300 | } | ||
301 | } | ||
302 | |||
303 | static void | ||
304 | xlog_cil_insert_items( | ||
305 | struct log *log, | ||
306 | struct xfs_log_vec *log_vector, | ||
307 | struct xlog_ticket *ticket, | ||
308 | xfs_lsn_t *start_lsn) | ||
309 | { | ||
310 | struct xfs_log_vec *lv; | ||
311 | |||
312 | if (start_lsn) | ||
313 | *start_lsn = log->l_cilp->xc_ctx->sequence; | ||
304 | 314 | ||
315 | ASSERT(log_vector); | ||
316 | for (lv = log_vector; lv; lv = lv->lv_next) | ||
305 | xlog_cil_insert(log, ticket, lv->lv_item, lv); | 317 | xlog_cil_insert(log, ticket, lv->lv_item, lv); |
306 | } | ||
307 | } | 318 | } |
308 | 319 | ||
309 | static void | 320 | static void |
@@ -321,80 +332,6 @@ xlog_cil_free_logvec( | |||
321 | } | 332 | } |
322 | 333 | ||
323 | /* | 334 | /* |
324 | * Commit a transaction with the given vector to the Committed Item List. | ||
325 | * | ||
326 | * To do this, we need to format the item, pin it in memory if required and | ||
327 | * account for the space used by the transaction. Once we have done that we | ||
328 | * need to release the unused reservation for the transaction, attach the | ||
329 | * transaction to the checkpoint context so we carry the busy extents through | ||
330 | * to checkpoint completion, and then unlock all the items in the transaction. | ||
331 | * | ||
332 | * For more specific information about the order of operations in | ||
333 | * xfs_log_commit_cil() please refer to the comments in | ||
334 | * xfs_trans_commit_iclog(). | ||
335 | * | ||
336 | * Called with the context lock already held in read mode to lock out | ||
337 | * background commit, returns without it held once background commits are | ||
338 | * allowed again. | ||
339 | */ | ||
340 | int | ||
341 | xfs_log_commit_cil( | ||
342 | struct xfs_mount *mp, | ||
343 | struct xfs_trans *tp, | ||
344 | struct xfs_log_vec *log_vector, | ||
345 | xfs_lsn_t *commit_lsn, | ||
346 | int flags) | ||
347 | { | ||
348 | struct log *log = mp->m_log; | ||
349 | int log_flags = 0; | ||
350 | int push = 0; | ||
351 | |||
352 | if (flags & XFS_TRANS_RELEASE_LOG_RES) | ||
353 | log_flags = XFS_LOG_REL_PERM_RESERV; | ||
354 | |||
355 | if (XLOG_FORCED_SHUTDOWN(log)) { | ||
356 | xlog_cil_free_logvec(log_vector); | ||
357 | return XFS_ERROR(EIO); | ||
358 | } | ||
359 | |||
360 | /* lock out background commit */ | ||
361 | down_read(&log->l_cilp->xc_ctx_lock); | ||
362 | xlog_cil_format_items(log, log_vector, tp->t_ticket, commit_lsn); | ||
363 | |||
364 | /* check we didn't blow the reservation */ | ||
365 | if (tp->t_ticket->t_curr_res < 0) | ||
366 | xlog_print_tic_res(log->l_mp, tp->t_ticket); | ||
367 | |||
368 | /* attach the transaction to the CIL if it has any busy extents */ | ||
369 | if (!list_empty(&tp->t_busy)) { | ||
370 | spin_lock(&log->l_cilp->xc_cil_lock); | ||
371 | list_splice_init(&tp->t_busy, | ||
372 | &log->l_cilp->xc_ctx->busy_extents); | ||
373 | spin_unlock(&log->l_cilp->xc_cil_lock); | ||
374 | } | ||
375 | |||
376 | tp->t_commit_lsn = *commit_lsn; | ||
377 | xfs_log_done(mp, tp->t_ticket, NULL, log_flags); | ||
378 | xfs_trans_unreserve_and_mod_sb(tp); | ||
379 | |||
380 | /* check for background commit before unlock */ | ||
381 | if (log->l_cilp->xc_ctx->space_used > XLOG_CIL_SPACE_LIMIT(log)) | ||
382 | push = 1; | ||
383 | up_read(&log->l_cilp->xc_ctx_lock); | ||
384 | |||
385 | /* | ||
386 | * We need to push CIL every so often so we don't cache more than we | ||
387 | * can fit in the log. The limit really is that a checkpoint can't be | ||
388 | * more than half the log (the current checkpoint is not allowed to | ||
389 | * overwrite the previous checkpoint), but commit latency and memory | ||
390 | * usage limit this to a smaller size in most cases. | ||
391 | */ | ||
392 | if (push) | ||
393 | xlog_cil_push(log, 0); | ||
394 | return 0; | ||
395 | } | ||
396 | |||
397 | /* | ||
398 | * Mark all items committed and clear busy extents. We free the log vector | 335 | * Mark all items committed and clear busy extents. We free the log vector |
399 | * chains in a separate pass so that we unpin the log items as quickly as | 336 | * chains in a separate pass so that we unpin the log items as quickly as |
400 | * possible. | 337 | * possible. |
@@ -427,13 +364,23 @@ xlog_cil_committed( | |||
427 | } | 364 | } |
428 | 365 | ||
429 | /* | 366 | /* |
430 | * Push the Committed Item List to the log. If the push_now flag is not set, | 367 | * Push the Committed Item List to the log. If @push_seq flag is zero, then it |
431 | * then it is a background flush and so we can chose to ignore it. | 368 | * is a background flush and so we can chose to ignore it. Otherwise, if the |
369 | * current sequence is the same as @push_seq we need to do a flush. If | ||
370 | * @push_seq is less than the current sequence, then it has already been | ||
371 | * flushed and we don't need to do anything - the caller will wait for it to | ||
372 | * complete if necessary. | ||
373 | * | ||
374 | * @push_seq is a value rather than a flag because that allows us to do an | ||
375 | * unlocked check of the sequence number for a match. Hence we can allows log | ||
376 | * forces to run racily and not issue pushes for the same sequence twice. If we | ||
377 | * get a race between multiple pushes for the same sequence they will block on | ||
378 | * the first one and then abort, hence avoiding needless pushes. | ||
432 | */ | 379 | */ |
433 | int | 380 | STATIC int |
434 | xlog_cil_push( | 381 | xlog_cil_push( |
435 | struct log *log, | 382 | struct log *log, |
436 | int push_now) | 383 | xfs_lsn_t push_seq) |
437 | { | 384 | { |
438 | struct xfs_cil *cil = log->l_cilp; | 385 | struct xfs_cil *cil = log->l_cilp; |
439 | struct xfs_log_vec *lv; | 386 | struct xfs_log_vec *lv; |
@@ -453,12 +400,14 @@ xlog_cil_push( | |||
453 | if (!cil) | 400 | if (!cil) |
454 | return 0; | 401 | return 0; |
455 | 402 | ||
403 | ASSERT(!push_seq || push_seq <= cil->xc_ctx->sequence); | ||
404 | |||
456 | new_ctx = kmem_zalloc(sizeof(*new_ctx), KM_SLEEP|KM_NOFS); | 405 | new_ctx = kmem_zalloc(sizeof(*new_ctx), KM_SLEEP|KM_NOFS); |
457 | new_ctx->ticket = xlog_cil_ticket_alloc(log); | 406 | new_ctx->ticket = xlog_cil_ticket_alloc(log); |
458 | 407 | ||
459 | /* lock out transaction commit, but don't block on background push */ | 408 | /* lock out transaction commit, but don't block on background push */ |
460 | if (!down_write_trylock(&cil->xc_ctx_lock)) { | 409 | if (!down_write_trylock(&cil->xc_ctx_lock)) { |
461 | if (!push_now) | 410 | if (!push_seq) |
462 | goto out_free_ticket; | 411 | goto out_free_ticket; |
463 | down_write(&cil->xc_ctx_lock); | 412 | down_write(&cil->xc_ctx_lock); |
464 | } | 413 | } |
@@ -469,7 +418,11 @@ xlog_cil_push( | |||
469 | goto out_skip; | 418 | goto out_skip; |
470 | 419 | ||
471 | /* check for spurious background flush */ | 420 | /* check for spurious background flush */ |
472 | if (!push_now && cil->xc_ctx->space_used < XLOG_CIL_SPACE_LIMIT(log)) | 421 | if (!push_seq && cil->xc_ctx->space_used < XLOG_CIL_SPACE_LIMIT(log)) |
422 | goto out_skip; | ||
423 | |||
424 | /* check for a previously pushed seqeunce */ | ||
425 | if (push_seq < cil->xc_ctx->sequence) | ||
473 | goto out_skip; | 426 | goto out_skip; |
474 | 427 | ||
475 | /* | 428 | /* |
@@ -515,6 +468,13 @@ xlog_cil_push( | |||
515 | cil->xc_ctx = new_ctx; | 468 | cil->xc_ctx = new_ctx; |
516 | 469 | ||
517 | /* | 470 | /* |
471 | * mirror the new sequence into the cil structure so that we can do | ||
472 | * unlocked checks against the current sequence in log forces without | ||
473 | * risking deferencing a freed context pointer. | ||
474 | */ | ||
475 | cil->xc_current_sequence = new_ctx->sequence; | ||
476 | |||
477 | /* | ||
518 | * The switch is now done, so we can drop the context lock and move out | 478 | * The switch is now done, so we can drop the context lock and move out |
519 | * of a shared context. We can't just go straight to the commit record, | 479 | * of a shared context. We can't just go straight to the commit record, |
520 | * though - we need to synchronise with previous and future commits so | 480 | * though - we need to synchronise with previous and future commits so |
@@ -626,6 +586,102 @@ out_abort: | |||
626 | } | 586 | } |
627 | 587 | ||
628 | /* | 588 | /* |
589 | * Commit a transaction with the given vector to the Committed Item List. | ||
590 | * | ||
591 | * To do this, we need to format the item, pin it in memory if required and | ||
592 | * account for the space used by the transaction. Once we have done that we | ||
593 | * need to release the unused reservation for the transaction, attach the | ||
594 | * transaction to the checkpoint context so we carry the busy extents through | ||
595 | * to checkpoint completion, and then unlock all the items in the transaction. | ||
596 | * | ||
597 | * For more specific information about the order of operations in | ||
598 | * xfs_log_commit_cil() please refer to the comments in | ||
599 | * xfs_trans_commit_iclog(). | ||
600 | * | ||
601 | * Called with the context lock already held in read mode to lock out | ||
602 | * background commit, returns without it held once background commits are | ||
603 | * allowed again. | ||
604 | */ | ||
605 | int | ||
606 | xfs_log_commit_cil( | ||
607 | struct xfs_mount *mp, | ||
608 | struct xfs_trans *tp, | ||
609 | struct xfs_log_vec *log_vector, | ||
610 | xfs_lsn_t *commit_lsn, | ||
611 | int flags) | ||
612 | { | ||
613 | struct log *log = mp->m_log; | ||
614 | int log_flags = 0; | ||
615 | int push = 0; | ||
616 | |||
617 | if (flags & XFS_TRANS_RELEASE_LOG_RES) | ||
618 | log_flags = XFS_LOG_REL_PERM_RESERV; | ||
619 | |||
620 | if (XLOG_FORCED_SHUTDOWN(log)) { | ||
621 | xlog_cil_free_logvec(log_vector); | ||
622 | return XFS_ERROR(EIO); | ||
623 | } | ||
624 | |||
625 | /* | ||
626 | * do all the hard work of formatting items (including memory | ||
627 | * allocation) outside the CIL context lock. This prevents stalling CIL | ||
628 | * pushes when we are low on memory and a transaction commit spends a | ||
629 | * lot of time in memory reclaim. | ||
630 | */ | ||
631 | xlog_cil_format_items(log, log_vector); | ||
632 | |||
633 | /* lock out background commit */ | ||
634 | down_read(&log->l_cilp->xc_ctx_lock); | ||
635 | xlog_cil_insert_items(log, log_vector, tp->t_ticket, commit_lsn); | ||
636 | |||
637 | /* check we didn't blow the reservation */ | ||
638 | if (tp->t_ticket->t_curr_res < 0) | ||
639 | xlog_print_tic_res(log->l_mp, tp->t_ticket); | ||
640 | |||
641 | /* attach the transaction to the CIL if it has any busy extents */ | ||
642 | if (!list_empty(&tp->t_busy)) { | ||
643 | spin_lock(&log->l_cilp->xc_cil_lock); | ||
644 | list_splice_init(&tp->t_busy, | ||
645 | &log->l_cilp->xc_ctx->busy_extents); | ||
646 | spin_unlock(&log->l_cilp->xc_cil_lock); | ||
647 | } | ||
648 | |||
649 | tp->t_commit_lsn = *commit_lsn; | ||
650 | xfs_log_done(mp, tp->t_ticket, NULL, log_flags); | ||
651 | xfs_trans_unreserve_and_mod_sb(tp); | ||
652 | |||
653 | /* | ||
654 | * Once all the items of the transaction have been copied to the CIL, | ||
655 | * the items can be unlocked and freed. | ||
656 | * | ||
657 | * This needs to be done before we drop the CIL context lock because we | ||
658 | * have to update state in the log items and unlock them before they go | ||
659 | * to disk. If we don't, then the CIL checkpoint can race with us and | ||
660 | * we can run checkpoint completion before we've updated and unlocked | ||
661 | * the log items. This affects (at least) processing of stale buffers, | ||
662 | * inodes and EFIs. | ||
663 | */ | ||
664 | xfs_trans_free_items(tp, *commit_lsn, 0); | ||
665 | |||
666 | /* check for background commit before unlock */ | ||
667 | if (log->l_cilp->xc_ctx->space_used > XLOG_CIL_SPACE_LIMIT(log)) | ||
668 | push = 1; | ||
669 | |||
670 | up_read(&log->l_cilp->xc_ctx_lock); | ||
671 | |||
672 | /* | ||
673 | * We need to push CIL every so often so we don't cache more than we | ||
674 | * can fit in the log. The limit really is that a checkpoint can't be | ||
675 | * more than half the log (the current checkpoint is not allowed to | ||
676 | * overwrite the previous checkpoint), but commit latency and memory | ||
677 | * usage limit this to a smaller size in most cases. | ||
678 | */ | ||
679 | if (push) | ||
680 | xlog_cil_push(log, 0); | ||
681 | return 0; | ||
682 | } | ||
683 | |||
684 | /* | ||
629 | * Conditionally push the CIL based on the sequence passed in. | 685 | * Conditionally push the CIL based on the sequence passed in. |
630 | * | 686 | * |
631 | * We only need to push if we haven't already pushed the sequence | 687 | * We only need to push if we haven't already pushed the sequence |
@@ -639,39 +695,34 @@ out_abort: | |||
639 | * commit lsn is there. It'll be empty, so this is broken for now. | 695 | * commit lsn is there. It'll be empty, so this is broken for now. |
640 | */ | 696 | */ |
641 | xfs_lsn_t | 697 | xfs_lsn_t |
642 | xlog_cil_push_lsn( | 698 | xlog_cil_force_lsn( |
643 | struct log *log, | 699 | struct log *log, |
644 | xfs_lsn_t push_seq) | 700 | xfs_lsn_t sequence) |
645 | { | 701 | { |
646 | struct xfs_cil *cil = log->l_cilp; | 702 | struct xfs_cil *cil = log->l_cilp; |
647 | struct xfs_cil_ctx *ctx; | 703 | struct xfs_cil_ctx *ctx; |
648 | xfs_lsn_t commit_lsn = NULLCOMMITLSN; | 704 | xfs_lsn_t commit_lsn = NULLCOMMITLSN; |
649 | 705 | ||
650 | restart: | 706 | ASSERT(sequence <= cil->xc_current_sequence); |
651 | down_write(&cil->xc_ctx_lock); | 707 | |
652 | ASSERT(push_seq <= cil->xc_ctx->sequence); | 708 | /* |
653 | 709 | * check to see if we need to force out the current context. | |
654 | /* check to see if we need to force out the current context */ | 710 | * xlog_cil_push() handles racing pushes for the same sequence, |
655 | if (push_seq == cil->xc_ctx->sequence) { | 711 | * so no need to deal with it here. |
656 | up_write(&cil->xc_ctx_lock); | 712 | */ |
657 | xlog_cil_push(log, 1); | 713 | if (sequence == cil->xc_current_sequence) |
658 | goto restart; | 714 | xlog_cil_push(log, sequence); |
659 | } | ||
660 | 715 | ||
661 | /* | 716 | /* |
662 | * See if we can find a previous sequence still committing. | 717 | * See if we can find a previous sequence still committing. |
663 | * We can drop the flush lock as soon as we have the cil lock | ||
664 | * because we are now only comparing contexts protected by | ||
665 | * the cil lock. | ||
666 | * | ||
667 | * We need to wait for all previous sequence commits to complete | 718 | * We need to wait for all previous sequence commits to complete |
668 | * before allowing the force of push_seq to go ahead. Hence block | 719 | * before allowing the force of push_seq to go ahead. Hence block |
669 | * on commits for those as well. | 720 | * on commits for those as well. |
670 | */ | 721 | */ |
722 | restart: | ||
671 | spin_lock(&cil->xc_cil_lock); | 723 | spin_lock(&cil->xc_cil_lock); |
672 | up_write(&cil->xc_ctx_lock); | ||
673 | list_for_each_entry(ctx, &cil->xc_committing, committing) { | 724 | list_for_each_entry(ctx, &cil->xc_committing, committing) { |
674 | if (ctx->sequence > push_seq) | 725 | if (ctx->sequence > sequence) |
675 | continue; | 726 | continue; |
676 | if (!ctx->commit_lsn) { | 727 | if (!ctx->commit_lsn) { |
677 | /* | 728 | /* |
@@ -681,7 +732,7 @@ restart: | |||
681 | sv_wait(&cil->xc_commit_wait, 0, &cil->xc_cil_lock, 0); | 732 | sv_wait(&cil->xc_commit_wait, 0, &cil->xc_cil_lock, 0); |
682 | goto restart; | 733 | goto restart; |
683 | } | 734 | } |
684 | if (ctx->sequence != push_seq) | 735 | if (ctx->sequence != sequence) |
685 | continue; | 736 | continue; |
686 | /* found it! */ | 737 | /* found it! */ |
687 | commit_lsn = ctx->commit_lsn; | 738 | commit_lsn = ctx->commit_lsn; |
diff --git a/fs/xfs/xfs_log_priv.h b/fs/xfs/xfs_log_priv.h index 8c072618965c..ced52b98b322 100644 --- a/fs/xfs/xfs_log_priv.h +++ b/fs/xfs/xfs_log_priv.h | |||
@@ -422,6 +422,7 @@ struct xfs_cil { | |||
422 | struct rw_semaphore xc_ctx_lock; | 422 | struct rw_semaphore xc_ctx_lock; |
423 | struct list_head xc_committing; | 423 | struct list_head xc_committing; |
424 | sv_t xc_commit_wait; | 424 | sv_t xc_commit_wait; |
425 | xfs_lsn_t xc_current_sequence; | ||
425 | }; | 426 | }; |
426 | 427 | ||
427 | /* | 428 | /* |
@@ -562,8 +563,16 @@ int xlog_cil_init(struct log *log); | |||
562 | void xlog_cil_init_post_recovery(struct log *log); | 563 | void xlog_cil_init_post_recovery(struct log *log); |
563 | void xlog_cil_destroy(struct log *log); | 564 | void xlog_cil_destroy(struct log *log); |
564 | 565 | ||
565 | int xlog_cil_push(struct log *log, int push_now); | 566 | /* |
566 | xfs_lsn_t xlog_cil_push_lsn(struct log *log, xfs_lsn_t push_sequence); | 567 | * CIL force routines |
568 | */ | ||
569 | xfs_lsn_t xlog_cil_force_lsn(struct log *log, xfs_lsn_t sequence); | ||
570 | |||
571 | static inline void | ||
572 | xlog_cil_force(struct log *log) | ||
573 | { | ||
574 | xlog_cil_force_lsn(log, log->l_cilp->xc_current_sequence); | ||
575 | } | ||
567 | 576 | ||
568 | /* | 577 | /* |
569 | * Unmount record type is used as a pseudo transaction type for the ticket. | 578 | * Unmount record type is used as a pseudo transaction type for the ticket. |
diff --git a/fs/xfs/xfs_trans.c b/fs/xfs/xfs_trans.c index fdca7416c754..1c47edaea0d2 100644 --- a/fs/xfs/xfs_trans.c +++ b/fs/xfs/xfs_trans.c | |||
@@ -1167,7 +1167,7 @@ xfs_trans_del_item( | |||
1167 | * Unlock all of the items of a transaction and free all the descriptors | 1167 | * Unlock all of the items of a transaction and free all the descriptors |
1168 | * of that transaction. | 1168 | * of that transaction. |
1169 | */ | 1169 | */ |
1170 | STATIC void | 1170 | void |
1171 | xfs_trans_free_items( | 1171 | xfs_trans_free_items( |
1172 | struct xfs_trans *tp, | 1172 | struct xfs_trans *tp, |
1173 | xfs_lsn_t commit_lsn, | 1173 | xfs_lsn_t commit_lsn, |
@@ -1653,9 +1653,6 @@ xfs_trans_commit_cil( | |||
1653 | return error; | 1653 | return error; |
1654 | 1654 | ||
1655 | current_restore_flags_nested(&tp->t_pflags, PF_FSTRANS); | 1655 | current_restore_flags_nested(&tp->t_pflags, PF_FSTRANS); |
1656 | |||
1657 | /* xfs_trans_free_items() unlocks them first */ | ||
1658 | xfs_trans_free_items(tp, *commit_lsn, 0); | ||
1659 | xfs_trans_free(tp); | 1656 | xfs_trans_free(tp); |
1660 | return 0; | 1657 | return 0; |
1661 | } | 1658 | } |
diff --git a/fs/xfs/xfs_trans_priv.h b/fs/xfs/xfs_trans_priv.h index e2d93d8ead7b..62da86c90de5 100644 --- a/fs/xfs/xfs_trans_priv.h +++ b/fs/xfs/xfs_trans_priv.h | |||
@@ -25,7 +25,8 @@ struct xfs_trans; | |||
25 | 25 | ||
26 | void xfs_trans_add_item(struct xfs_trans *, struct xfs_log_item *); | 26 | void xfs_trans_add_item(struct xfs_trans *, struct xfs_log_item *); |
27 | void xfs_trans_del_item(struct xfs_log_item *); | 27 | void xfs_trans_del_item(struct xfs_log_item *); |
28 | 28 | void xfs_trans_free_items(struct xfs_trans *tp, xfs_lsn_t commit_lsn, | |
29 | int flags); | ||
29 | void xfs_trans_item_committed(struct xfs_log_item *lip, | 30 | void xfs_trans_item_committed(struct xfs_log_item *lip, |
30 | xfs_lsn_t commit_lsn, int aborted); | 31 | xfs_lsn_t commit_lsn, int aborted); |
31 | void xfs_trans_unreserve_and_mod_sb(struct xfs_trans *tp); | 32 | void xfs_trans_unreserve_and_mod_sb(struct xfs_trans *tp); |
diff --git a/fs/xfs/xfs_vnodeops.c b/fs/xfs/xfs_vnodeops.c index 66d585c6917c..4c7c7bfb2b2f 100644 --- a/fs/xfs/xfs_vnodeops.c +++ b/fs/xfs/xfs_vnodeops.c | |||
@@ -2299,15 +2299,22 @@ xfs_alloc_file_space( | |||
2299 | e = allocatesize_fsb; | 2299 | e = allocatesize_fsb; |
2300 | } | 2300 | } |
2301 | 2301 | ||
2302 | /* | ||
2303 | * The transaction reservation is limited to a 32-bit block | ||
2304 | * count, hence we need to limit the number of blocks we are | ||
2305 | * trying to reserve to avoid an overflow. We can't allocate | ||
2306 | * more than @nimaps extents, and an extent is limited on disk | ||
2307 | * to MAXEXTLEN (21 bits), so use that to enforce the limit. | ||
2308 | */ | ||
2309 | resblks = min_t(xfs_fileoff_t, (e - s), (MAXEXTLEN * nimaps)); | ||
2302 | if (unlikely(rt)) { | 2310 | if (unlikely(rt)) { |
2303 | resrtextents = qblocks = (uint)(e - s); | 2311 | resrtextents = qblocks = resblks; |
2304 | resrtextents /= mp->m_sb.sb_rextsize; | 2312 | resrtextents /= mp->m_sb.sb_rextsize; |
2305 | resblks = XFS_DIOSTRAT_SPACE_RES(mp, 0); | 2313 | resblks = XFS_DIOSTRAT_SPACE_RES(mp, 0); |
2306 | quota_flag = XFS_QMOPT_RES_RTBLKS; | 2314 | quota_flag = XFS_QMOPT_RES_RTBLKS; |
2307 | } else { | 2315 | } else { |
2308 | resrtextents = 0; | 2316 | resrtextents = 0; |
2309 | resblks = qblocks = \ | 2317 | resblks = qblocks = XFS_DIOSTRAT_SPACE_RES(mp, resblks); |
2310 | XFS_DIOSTRAT_SPACE_RES(mp, (uint)(e - s)); | ||
2311 | quota_flag = XFS_QMOPT_RES_REGBLKS; | 2318 | quota_flag = XFS_QMOPT_RES_REGBLKS; |
2312 | } | 2319 | } |
2313 | 2320 | ||