diff options
author | Chris Metcalf <cmetcalf@tilera.com> | 2010-08-13 19:59:15 -0400 |
---|---|---|
committer | Chris Metcalf <cmetcalf@tilera.com> | 2010-08-13 19:59:15 -0400 |
commit | 7d72e6fa56c4100b9669efe0044f77ed9eb785a1 (patch) | |
tree | 5e90bf4969809a1ab20b97432b85be20ccfaa1f4 /fs | |
parent | ba00376b0b13f234d839541a7b36a5bf5c2a4036 (diff) | |
parent | 2be1f3a73dd02e38e181cf5abacb3d45a6a2d6b8 (diff) |
Merge branch 'master' into for-linus
Diffstat (limited to 'fs')
112 files changed, 2320 insertions, 1013 deletions
diff --git a/fs/9p/vfs_inode.c b/fs/9p/vfs_inode.c index d97c34a24f7a..c7c23eab9440 100644 --- a/fs/9p/vfs_inode.c +++ b/fs/9p/vfs_inode.c | |||
@@ -1263,10 +1263,19 @@ static int v9fs_vfs_setattr_dotl(struct dentry *dentry, struct iattr *iattr) | |||
1263 | return PTR_ERR(fid); | 1263 | return PTR_ERR(fid); |
1264 | 1264 | ||
1265 | retval = p9_client_setattr(fid, &p9attr); | 1265 | retval = p9_client_setattr(fid, &p9attr); |
1266 | if (retval >= 0) | 1266 | if (retval < 0) |
1267 | retval = inode_setattr(dentry->d_inode, iattr); | 1267 | return retval; |
1268 | 1268 | ||
1269 | return retval; | 1269 | if ((iattr->ia_valid & ATTR_SIZE) && |
1270 | iattr->ia_size != i_size_read(dentry->d_inode)) { | ||
1271 | retval = vmtruncate(dentry->d_inode, iattr->ia_size); | ||
1272 | if (retval) | ||
1273 | return retval; | ||
1274 | } | ||
1275 | |||
1276 | setattr_copy(dentry->d_inode, iattr); | ||
1277 | mark_inode_dirty(dentry->d_inode); | ||
1278 | return 0; | ||
1270 | } | 1279 | } |
1271 | 1280 | ||
1272 | /** | 1281 | /** |
diff --git a/fs/afs/cell.c b/fs/afs/cell.c index ffea35c63879..0d5eeadf6121 100644 --- a/fs/afs/cell.c +++ b/fs/afs/cell.c | |||
@@ -31,21 +31,20 @@ static struct afs_cell *afs_cell_root; | |||
31 | * allocate a cell record and fill in its name, VL server address list and | 31 | * allocate a cell record and fill in its name, VL server address list and |
32 | * allocate an anonymous key | 32 | * allocate an anonymous key |
33 | */ | 33 | */ |
34 | static struct afs_cell *afs_cell_alloc(const char *name, char *vllist) | 34 | static struct afs_cell *afs_cell_alloc(const char *name, unsigned namelen, |
35 | char *vllist) | ||
35 | { | 36 | { |
36 | struct afs_cell *cell; | 37 | struct afs_cell *cell; |
37 | struct key *key; | 38 | struct key *key; |
38 | size_t namelen; | ||
39 | char keyname[4 + AFS_MAXCELLNAME + 1], *cp, *dp, *next; | 39 | char keyname[4 + AFS_MAXCELLNAME + 1], *cp, *dp, *next; |
40 | char *dvllist = NULL, *_vllist = NULL; | 40 | char *dvllist = NULL, *_vllist = NULL; |
41 | char delimiter = ':'; | 41 | char delimiter = ':'; |
42 | int ret; | 42 | int ret; |
43 | 43 | ||
44 | _enter("%s,%s", name, vllist); | 44 | _enter("%*.*s,%s", namelen, namelen, name ?: "", vllist); |
45 | 45 | ||
46 | BUG_ON(!name); /* TODO: want to look up "this cell" in the cache */ | 46 | BUG_ON(!name); /* TODO: want to look up "this cell" in the cache */ |
47 | 47 | ||
48 | namelen = strlen(name); | ||
49 | if (namelen > AFS_MAXCELLNAME) { | 48 | if (namelen > AFS_MAXCELLNAME) { |
50 | _leave(" = -ENAMETOOLONG"); | 49 | _leave(" = -ENAMETOOLONG"); |
51 | return ERR_PTR(-ENAMETOOLONG); | 50 | return ERR_PTR(-ENAMETOOLONG); |
@@ -73,6 +72,10 @@ static struct afs_cell *afs_cell_alloc(const char *name, char *vllist) | |||
73 | if (!vllist || strlen(vllist) < 7) { | 72 | if (!vllist || strlen(vllist) < 7) { |
74 | ret = dns_query("afsdb", name, namelen, "ipv4", &dvllist, NULL); | 73 | ret = dns_query("afsdb", name, namelen, "ipv4", &dvllist, NULL); |
75 | if (ret < 0) { | 74 | if (ret < 0) { |
75 | if (ret == -ENODATA || ret == -EAGAIN || ret == -ENOKEY) | ||
76 | /* translate these errors into something | ||
77 | * userspace might understand */ | ||
78 | ret = -EDESTADDRREQ; | ||
76 | _leave(" = %d", ret); | 79 | _leave(" = %d", ret); |
77 | return ERR_PTR(ret); | 80 | return ERR_PTR(ret); |
78 | } | 81 | } |
@@ -138,26 +141,29 @@ error: | |||
138 | } | 141 | } |
139 | 142 | ||
140 | /* | 143 | /* |
141 | * create a cell record | 144 | * afs_cell_crate() - create a cell record |
142 | * - "name" is the name of the cell | 145 | * @name: is the name of the cell. |
143 | * - "vllist" is a colon separated list of IP addresses in "a.b.c.d" format | 146 | * @namsesz: is the strlen of the cell name. |
147 | * @vllist: is a colon separated list of IP addresses in "a.b.c.d" format. | ||
148 | * @retref: is T to return the cell reference when the cell exists. | ||
144 | */ | 149 | */ |
145 | struct afs_cell *afs_cell_create(const char *name, char *vllist) | 150 | struct afs_cell *afs_cell_create(const char *name, unsigned namesz, |
151 | char *vllist, bool retref) | ||
146 | { | 152 | { |
147 | struct afs_cell *cell; | 153 | struct afs_cell *cell; |
148 | int ret; | 154 | int ret; |
149 | 155 | ||
150 | _enter("%s,%s", name, vllist); | 156 | _enter("%*.*s,%s", namesz, namesz, name ?: "", vllist); |
151 | 157 | ||
152 | down_write(&afs_cells_sem); | 158 | down_write(&afs_cells_sem); |
153 | read_lock(&afs_cells_lock); | 159 | read_lock(&afs_cells_lock); |
154 | list_for_each_entry(cell, &afs_cells, link) { | 160 | list_for_each_entry(cell, &afs_cells, link) { |
155 | if (strcasecmp(cell->name, name) == 0) | 161 | if (strncasecmp(cell->name, name, namesz) == 0) |
156 | goto duplicate_name; | 162 | goto duplicate_name; |
157 | } | 163 | } |
158 | read_unlock(&afs_cells_lock); | 164 | read_unlock(&afs_cells_lock); |
159 | 165 | ||
160 | cell = afs_cell_alloc(name, vllist); | 166 | cell = afs_cell_alloc(name, namesz, vllist); |
161 | if (IS_ERR(cell)) { | 167 | if (IS_ERR(cell)) { |
162 | _leave(" = %ld", PTR_ERR(cell)); | 168 | _leave(" = %ld", PTR_ERR(cell)); |
163 | up_write(&afs_cells_sem); | 169 | up_write(&afs_cells_sem); |
@@ -197,8 +203,18 @@ error: | |||
197 | return ERR_PTR(ret); | 203 | return ERR_PTR(ret); |
198 | 204 | ||
199 | duplicate_name: | 205 | duplicate_name: |
206 | if (retref && !IS_ERR(cell)) | ||
207 | afs_get_cell(cell); | ||
208 | |||
200 | read_unlock(&afs_cells_lock); | 209 | read_unlock(&afs_cells_lock); |
201 | up_write(&afs_cells_sem); | 210 | up_write(&afs_cells_sem); |
211 | |||
212 | if (retref) { | ||
213 | _leave(" = %p", cell); | ||
214 | return cell; | ||
215 | } | ||
216 | |||
217 | _leave(" = -EEXIST"); | ||
202 | return ERR_PTR(-EEXIST); | 218 | return ERR_PTR(-EEXIST); |
203 | } | 219 | } |
204 | 220 | ||
@@ -229,7 +245,7 @@ int afs_cell_init(char *rootcell) | |||
229 | *cp++ = 0; | 245 | *cp++ = 0; |
230 | 246 | ||
231 | /* allocate a cell record for the root cell */ | 247 | /* allocate a cell record for the root cell */ |
232 | new_root = afs_cell_create(rootcell, cp); | 248 | new_root = afs_cell_create(rootcell, strlen(rootcell), cp, false); |
233 | if (IS_ERR(new_root)) { | 249 | if (IS_ERR(new_root)) { |
234 | _leave(" = %ld", PTR_ERR(new_root)); | 250 | _leave(" = %ld", PTR_ERR(new_root)); |
235 | return PTR_ERR(new_root); | 251 | return PTR_ERR(new_root); |
@@ -249,11 +265,12 @@ int afs_cell_init(char *rootcell) | |||
249 | /* | 265 | /* |
250 | * lookup a cell record | 266 | * lookup a cell record |
251 | */ | 267 | */ |
252 | struct afs_cell *afs_cell_lookup(const char *name, unsigned namesz) | 268 | struct afs_cell *afs_cell_lookup(const char *name, unsigned namesz, |
269 | bool dns_cell) | ||
253 | { | 270 | { |
254 | struct afs_cell *cell; | 271 | struct afs_cell *cell; |
255 | 272 | ||
256 | _enter("\"%*.*s\",", namesz, namesz, name ? name : ""); | 273 | _enter("\"%*.*s\",", namesz, namesz, name ?: ""); |
257 | 274 | ||
258 | down_read(&afs_cells_sem); | 275 | down_read(&afs_cells_sem); |
259 | read_lock(&afs_cells_lock); | 276 | read_lock(&afs_cells_lock); |
@@ -267,6 +284,8 @@ struct afs_cell *afs_cell_lookup(const char *name, unsigned namesz) | |||
267 | } | 284 | } |
268 | } | 285 | } |
269 | cell = ERR_PTR(-ENOENT); | 286 | cell = ERR_PTR(-ENOENT); |
287 | if (dns_cell) | ||
288 | goto create_cell; | ||
270 | found: | 289 | found: |
271 | ; | 290 | ; |
272 | } else { | 291 | } else { |
@@ -289,6 +308,15 @@ struct afs_cell *afs_cell_lookup(const char *name, unsigned namesz) | |||
289 | up_read(&afs_cells_sem); | 308 | up_read(&afs_cells_sem); |
290 | _leave(" = %p", cell); | 309 | _leave(" = %p", cell); |
291 | return cell; | 310 | return cell; |
311 | |||
312 | create_cell: | ||
313 | read_unlock(&afs_cells_lock); | ||
314 | up_read(&afs_cells_sem); | ||
315 | |||
316 | cell = afs_cell_create(name, namesz, NULL, true); | ||
317 | |||
318 | _leave(" = %p", cell); | ||
319 | return cell; | ||
292 | } | 320 | } |
293 | 321 | ||
294 | #if 0 | 322 | #if 0 |
diff --git a/fs/afs/dir.c b/fs/afs/dir.c index b42d5cc1d6d2..0d38c09bd55e 100644 --- a/fs/afs/dir.c +++ b/fs/afs/dir.c | |||
@@ -477,6 +477,40 @@ static int afs_do_lookup(struct inode *dir, struct dentry *dentry, | |||
477 | } | 477 | } |
478 | 478 | ||
479 | /* | 479 | /* |
480 | * Try to auto mount the mountpoint with pseudo directory, if the autocell | ||
481 | * operation is setted. | ||
482 | */ | ||
483 | static struct inode *afs_try_auto_mntpt( | ||
484 | int ret, struct dentry *dentry, struct inode *dir, struct key *key, | ||
485 | struct afs_fid *fid) | ||
486 | { | ||
487 | const char *devname = dentry->d_name.name; | ||
488 | struct afs_vnode *vnode = AFS_FS_I(dir); | ||
489 | struct inode *inode; | ||
490 | |||
491 | _enter("%d, %p{%s}, {%x:%u}, %p", | ||
492 | ret, dentry, devname, vnode->fid.vid, vnode->fid.vnode, key); | ||
493 | |||
494 | if (ret != -ENOENT || | ||
495 | !test_bit(AFS_VNODE_AUTOCELL, &vnode->flags)) | ||
496 | goto out; | ||
497 | |||
498 | inode = afs_iget_autocell(dir, devname, strlen(devname), key); | ||
499 | if (IS_ERR(inode)) { | ||
500 | ret = PTR_ERR(inode); | ||
501 | goto out; | ||
502 | } | ||
503 | |||
504 | *fid = AFS_FS_I(inode)->fid; | ||
505 | _leave("= %p", inode); | ||
506 | return inode; | ||
507 | |||
508 | out: | ||
509 | _leave("= %d", ret); | ||
510 | return ERR_PTR(ret); | ||
511 | } | ||
512 | |||
513 | /* | ||
480 | * look up an entry in a directory | 514 | * look up an entry in a directory |
481 | */ | 515 | */ |
482 | static struct dentry *afs_lookup(struct inode *dir, struct dentry *dentry, | 516 | static struct dentry *afs_lookup(struct inode *dir, struct dentry *dentry, |
@@ -520,6 +554,13 @@ static struct dentry *afs_lookup(struct inode *dir, struct dentry *dentry, | |||
520 | 554 | ||
521 | ret = afs_do_lookup(dir, dentry, &fid, key); | 555 | ret = afs_do_lookup(dir, dentry, &fid, key); |
522 | if (ret < 0) { | 556 | if (ret < 0) { |
557 | inode = afs_try_auto_mntpt(ret, dentry, dir, key, &fid); | ||
558 | if (!IS_ERR(inode)) { | ||
559 | key_put(key); | ||
560 | goto success; | ||
561 | } | ||
562 | |||
563 | ret = PTR_ERR(inode); | ||
523 | key_put(key); | 564 | key_put(key); |
524 | if (ret == -ENOENT) { | 565 | if (ret == -ENOENT) { |
525 | d_add(dentry, NULL); | 566 | d_add(dentry, NULL); |
@@ -539,6 +580,7 @@ static struct dentry *afs_lookup(struct inode *dir, struct dentry *dentry, | |||
539 | return ERR_CAST(inode); | 580 | return ERR_CAST(inode); |
540 | } | 581 | } |
541 | 582 | ||
583 | success: | ||
542 | dentry->d_op = &afs_fs_dentry_operations; | 584 | dentry->d_op = &afs_fs_dentry_operations; |
543 | 585 | ||
544 | d_add(dentry, inode); | 586 | d_add(dentry, inode); |
@@ -696,8 +738,9 @@ static int afs_d_delete(struct dentry *dentry) | |||
696 | goto zap; | 738 | goto zap; |
697 | 739 | ||
698 | if (dentry->d_inode && | 740 | if (dentry->d_inode && |
699 | test_bit(AFS_VNODE_DELETED, &AFS_FS_I(dentry->d_inode)->flags)) | 741 | (test_bit(AFS_VNODE_DELETED, &AFS_FS_I(dentry->d_inode)->flags) || |
700 | goto zap; | 742 | test_bit(AFS_VNODE_PSEUDODIR, &AFS_FS_I(dentry->d_inode)->flags))) |
743 | goto zap; | ||
701 | 744 | ||
702 | _leave(" = 0 [keep]"); | 745 | _leave(" = 0 [keep]"); |
703 | return 0; | 746 | return 0; |
diff --git a/fs/afs/inode.c b/fs/afs/inode.c index 320ffef11574..0747339011c3 100644 --- a/fs/afs/inode.c +++ b/fs/afs/inode.c | |||
@@ -19,6 +19,8 @@ | |||
19 | #include <linux/fs.h> | 19 | #include <linux/fs.h> |
20 | #include <linux/pagemap.h> | 20 | #include <linux/pagemap.h> |
21 | #include <linux/sched.h> | 21 | #include <linux/sched.h> |
22 | #include <linux/mount.h> | ||
23 | #include <linux/namei.h> | ||
22 | #include "internal.h" | 24 | #include "internal.h" |
23 | 25 | ||
24 | struct afs_iget_data { | 26 | struct afs_iget_data { |
@@ -102,6 +104,16 @@ static int afs_iget5_test(struct inode *inode, void *opaque) | |||
102 | } | 104 | } |
103 | 105 | ||
104 | /* | 106 | /* |
107 | * iget5() comparator for inode created by autocell operations | ||
108 | * | ||
109 | * These pseudo inodes don't match anything. | ||
110 | */ | ||
111 | static int afs_iget5_autocell_test(struct inode *inode, void *opaque) | ||
112 | { | ||
113 | return 0; | ||
114 | } | ||
115 | |||
116 | /* | ||
105 | * iget5() inode initialiser | 117 | * iget5() inode initialiser |
106 | */ | 118 | */ |
107 | static int afs_iget5_set(struct inode *inode, void *opaque) | 119 | static int afs_iget5_set(struct inode *inode, void *opaque) |
@@ -118,6 +130,67 @@ static int afs_iget5_set(struct inode *inode, void *opaque) | |||
118 | } | 130 | } |
119 | 131 | ||
120 | /* | 132 | /* |
133 | * inode retrieval for autocell | ||
134 | */ | ||
135 | struct inode *afs_iget_autocell(struct inode *dir, const char *dev_name, | ||
136 | int namesz, struct key *key) | ||
137 | { | ||
138 | struct afs_iget_data data; | ||
139 | struct afs_super_info *as; | ||
140 | struct afs_vnode *vnode; | ||
141 | struct super_block *sb; | ||
142 | struct inode *inode; | ||
143 | static atomic_t afs_autocell_ino; | ||
144 | |||
145 | _enter("{%x:%u},%*.*s,", | ||
146 | AFS_FS_I(dir)->fid.vid, AFS_FS_I(dir)->fid.vnode, | ||
147 | namesz, namesz, dev_name ?: ""); | ||
148 | |||
149 | sb = dir->i_sb; | ||
150 | as = sb->s_fs_info; | ||
151 | data.volume = as->volume; | ||
152 | data.fid.vid = as->volume->vid; | ||
153 | data.fid.unique = 0; | ||
154 | data.fid.vnode = 0; | ||
155 | |||
156 | inode = iget5_locked(sb, atomic_inc_return(&afs_autocell_ino), | ||
157 | afs_iget5_autocell_test, afs_iget5_set, | ||
158 | &data); | ||
159 | if (!inode) { | ||
160 | _leave(" = -ENOMEM"); | ||
161 | return ERR_PTR(-ENOMEM); | ||
162 | } | ||
163 | |||
164 | _debug("GOT INODE %p { ino=%lu, vl=%x, vn=%x, u=%x }", | ||
165 | inode, inode->i_ino, data.fid.vid, data.fid.vnode, | ||
166 | data.fid.unique); | ||
167 | |||
168 | vnode = AFS_FS_I(inode); | ||
169 | |||
170 | /* there shouldn't be an existing inode */ | ||
171 | BUG_ON(!(inode->i_state & I_NEW)); | ||
172 | |||
173 | inode->i_size = 0; | ||
174 | inode->i_mode = S_IFDIR | S_IRUGO | S_IXUGO; | ||
175 | inode->i_op = &afs_autocell_inode_operations; | ||
176 | inode->i_nlink = 2; | ||
177 | inode->i_uid = 0; | ||
178 | inode->i_gid = 0; | ||
179 | inode->i_ctime.tv_sec = get_seconds(); | ||
180 | inode->i_ctime.tv_nsec = 0; | ||
181 | inode->i_atime = inode->i_mtime = inode->i_ctime; | ||
182 | inode->i_blocks = 0; | ||
183 | inode->i_version = 0; | ||
184 | inode->i_generation = 0; | ||
185 | |||
186 | set_bit(AFS_VNODE_PSEUDODIR, &vnode->flags); | ||
187 | inode->i_flags |= S_NOATIME; | ||
188 | unlock_new_inode(inode); | ||
189 | _leave(" = %p", inode); | ||
190 | return inode; | ||
191 | } | ||
192 | |||
193 | /* | ||
121 | * inode retrieval | 194 | * inode retrieval |
122 | */ | 195 | */ |
123 | struct inode *afs_iget(struct super_block *sb, struct key *key, | 196 | struct inode *afs_iget(struct super_block *sb, struct key *key, |
@@ -314,6 +387,19 @@ int afs_getattr(struct vfsmount *mnt, struct dentry *dentry, | |||
314 | } | 387 | } |
315 | 388 | ||
316 | /* | 389 | /* |
390 | * discard an AFS inode | ||
391 | */ | ||
392 | int afs_drop_inode(struct inode *inode) | ||
393 | { | ||
394 | _enter(""); | ||
395 | |||
396 | if (test_bit(AFS_VNODE_PSEUDODIR, &AFS_FS_I(inode)->flags)) | ||
397 | return generic_delete_inode(inode); | ||
398 | else | ||
399 | return generic_drop_inode(inode); | ||
400 | } | ||
401 | |||
402 | /* | ||
317 | * clear an AFS inode | 403 | * clear an AFS inode |
318 | */ | 404 | */ |
319 | void afs_evict_inode(struct inode *inode) | 405 | void afs_evict_inode(struct inode *inode) |
diff --git a/fs/afs/internal.h b/fs/afs/internal.h index 8679089ce9a1..cca8eef736fc 100644 --- a/fs/afs/internal.h +++ b/fs/afs/internal.h | |||
@@ -42,6 +42,7 @@ typedef enum { | |||
42 | struct afs_mount_params { | 42 | struct afs_mount_params { |
43 | bool rwpath; /* T if the parent should be considered R/W */ | 43 | bool rwpath; /* T if the parent should be considered R/W */ |
44 | bool force; /* T to force cell type */ | 44 | bool force; /* T to force cell type */ |
45 | bool autocell; /* T if set auto mount operation */ | ||
45 | afs_voltype_t type; /* type of volume requested */ | 46 | afs_voltype_t type; /* type of volume requested */ |
46 | int volnamesz; /* size of volume name */ | 47 | int volnamesz; /* size of volume name */ |
47 | const char *volname; /* name of volume to mount */ | 48 | const char *volname; /* name of volume to mount */ |
@@ -358,6 +359,8 @@ struct afs_vnode { | |||
358 | #define AFS_VNODE_READLOCKED 7 /* set if vnode is read-locked on the server */ | 359 | #define AFS_VNODE_READLOCKED 7 /* set if vnode is read-locked on the server */ |
359 | #define AFS_VNODE_WRITELOCKED 8 /* set if vnode is write-locked on the server */ | 360 | #define AFS_VNODE_WRITELOCKED 8 /* set if vnode is write-locked on the server */ |
360 | #define AFS_VNODE_UNLOCKING 9 /* set if vnode is being unlocked on the server */ | 361 | #define AFS_VNODE_UNLOCKING 9 /* set if vnode is being unlocked on the server */ |
362 | #define AFS_VNODE_AUTOCELL 10 /* set if Vnode is an auto mount point */ | ||
363 | #define AFS_VNODE_PSEUDODIR 11 /* set if Vnode is a pseudo directory */ | ||
361 | 364 | ||
362 | long acl_order; /* ACL check count (callback break count) */ | 365 | long acl_order; /* ACL check count (callback break count) */ |
363 | 366 | ||
@@ -468,8 +471,8 @@ extern struct list_head afs_proc_cells; | |||
468 | 471 | ||
469 | #define afs_get_cell(C) do { atomic_inc(&(C)->usage); } while(0) | 472 | #define afs_get_cell(C) do { atomic_inc(&(C)->usage); } while(0) |
470 | extern int afs_cell_init(char *); | 473 | extern int afs_cell_init(char *); |
471 | extern struct afs_cell *afs_cell_create(const char *, char *); | 474 | extern struct afs_cell *afs_cell_create(const char *, unsigned, char *, bool); |
472 | extern struct afs_cell *afs_cell_lookup(const char *, unsigned); | 475 | extern struct afs_cell *afs_cell_lookup(const char *, unsigned, bool); |
473 | extern struct afs_cell *afs_grab_cell(struct afs_cell *); | 476 | extern struct afs_cell *afs_grab_cell(struct afs_cell *); |
474 | extern void afs_put_cell(struct afs_cell *); | 477 | extern void afs_put_cell(struct afs_cell *); |
475 | extern void afs_cell_purge(void); | 478 | extern void afs_cell_purge(void); |
@@ -558,6 +561,8 @@ extern int afs_fs_release_lock(struct afs_server *, struct key *, | |||
558 | /* | 561 | /* |
559 | * inode.c | 562 | * inode.c |
560 | */ | 563 | */ |
564 | extern struct inode *afs_iget_autocell(struct inode *, const char *, int, | ||
565 | struct key *); | ||
561 | extern struct inode *afs_iget(struct super_block *, struct key *, | 566 | extern struct inode *afs_iget(struct super_block *, struct key *, |
562 | struct afs_fid *, struct afs_file_status *, | 567 | struct afs_fid *, struct afs_file_status *, |
563 | struct afs_callback *); | 568 | struct afs_callback *); |
@@ -566,6 +571,7 @@ extern int afs_validate(struct afs_vnode *, struct key *); | |||
566 | extern int afs_getattr(struct vfsmount *, struct dentry *, struct kstat *); | 571 | extern int afs_getattr(struct vfsmount *, struct dentry *, struct kstat *); |
567 | extern int afs_setattr(struct dentry *, struct iattr *); | 572 | extern int afs_setattr(struct dentry *, struct iattr *); |
568 | extern void afs_evict_inode(struct inode *); | 573 | extern void afs_evict_inode(struct inode *); |
574 | extern int afs_drop_inode(struct inode *); | ||
569 | 575 | ||
570 | /* | 576 | /* |
571 | * main.c | 577 | * main.c |
@@ -581,6 +587,7 @@ extern int afs_abort_to_error(u32); | |||
581 | * mntpt.c | 587 | * mntpt.c |
582 | */ | 588 | */ |
583 | extern const struct inode_operations afs_mntpt_inode_operations; | 589 | extern const struct inode_operations afs_mntpt_inode_operations; |
590 | extern const struct inode_operations afs_autocell_inode_operations; | ||
584 | extern const struct file_operations afs_mntpt_file_operations; | 591 | extern const struct file_operations afs_mntpt_file_operations; |
585 | 592 | ||
586 | extern int afs_mntpt_check_symlink(struct afs_vnode *, struct key *); | 593 | extern int afs_mntpt_check_symlink(struct afs_vnode *, struct key *); |
@@ -752,12 +759,6 @@ extern unsigned afs_debug; | |||
752 | #define dbgprintk(FMT,...) \ | 759 | #define dbgprintk(FMT,...) \ |
753 | printk("[%-6.6s] "FMT"\n", current->comm ,##__VA_ARGS__) | 760 | printk("[%-6.6s] "FMT"\n", current->comm ,##__VA_ARGS__) |
754 | 761 | ||
755 | /* make sure we maintain the format strings, even when debugging is disabled */ | ||
756 | static inline __attribute__((format(printf,1,2))) | ||
757 | void _dbprintk(const char *fmt, ...) | ||
758 | { | ||
759 | } | ||
760 | |||
761 | #define kenter(FMT,...) dbgprintk("==> %s("FMT")",__func__ ,##__VA_ARGS__) | 762 | #define kenter(FMT,...) dbgprintk("==> %s("FMT")",__func__ ,##__VA_ARGS__) |
762 | #define kleave(FMT,...) dbgprintk("<== %s()"FMT"",__func__ ,##__VA_ARGS__) | 763 | #define kleave(FMT,...) dbgprintk("<== %s()"FMT"",__func__ ,##__VA_ARGS__) |
763 | #define kdebug(FMT,...) dbgprintk(" "FMT ,##__VA_ARGS__) | 764 | #define kdebug(FMT,...) dbgprintk(" "FMT ,##__VA_ARGS__) |
@@ -792,9 +793,9 @@ do { \ | |||
792 | } while (0) | 793 | } while (0) |
793 | 794 | ||
794 | #else | 795 | #else |
795 | #define _enter(FMT,...) _dbprintk("==> %s("FMT")",__func__ ,##__VA_ARGS__) | 796 | #define _enter(FMT,...) no_printk("==> %s("FMT")",__func__ ,##__VA_ARGS__) |
796 | #define _leave(FMT,...) _dbprintk("<== %s()"FMT"",__func__ ,##__VA_ARGS__) | 797 | #define _leave(FMT,...) no_printk("<== %s()"FMT"",__func__ ,##__VA_ARGS__) |
797 | #define _debug(FMT,...) _dbprintk(" "FMT ,##__VA_ARGS__) | 798 | #define _debug(FMT,...) no_printk(" "FMT ,##__VA_ARGS__) |
798 | #endif | 799 | #endif |
799 | 800 | ||
800 | /* | 801 | /* |
diff --git a/fs/afs/mntpt.c b/fs/afs/mntpt.c index a9e23039ea34..6d552686c498 100644 --- a/fs/afs/mntpt.c +++ b/fs/afs/mntpt.c | |||
@@ -38,6 +38,11 @@ const struct inode_operations afs_mntpt_inode_operations = { | |||
38 | .getattr = afs_getattr, | 38 | .getattr = afs_getattr, |
39 | }; | 39 | }; |
40 | 40 | ||
41 | const struct inode_operations afs_autocell_inode_operations = { | ||
42 | .follow_link = afs_mntpt_follow_link, | ||
43 | .getattr = afs_getattr, | ||
44 | }; | ||
45 | |||
41 | static LIST_HEAD(afs_vfsmounts); | 46 | static LIST_HEAD(afs_vfsmounts); |
42 | static DECLARE_DELAYED_WORK(afs_mntpt_expiry_timer, afs_mntpt_expiry_timed_out); | 47 | static DECLARE_DELAYED_WORK(afs_mntpt_expiry_timer, afs_mntpt_expiry_timed_out); |
43 | 48 | ||
@@ -136,20 +141,16 @@ static struct vfsmount *afs_mntpt_do_automount(struct dentry *mntpt) | |||
136 | { | 141 | { |
137 | struct afs_super_info *super; | 142 | struct afs_super_info *super; |
138 | struct vfsmount *mnt; | 143 | struct vfsmount *mnt; |
144 | struct afs_vnode *vnode; | ||
139 | struct page *page; | 145 | struct page *page; |
140 | size_t size; | 146 | char *devname, *options; |
141 | char *buf, *devname, *options; | 147 | bool rwpath = false; |
142 | int ret; | 148 | int ret; |
143 | 149 | ||
144 | _enter("{%s}", mntpt->d_name.name); | 150 | _enter("{%s}", mntpt->d_name.name); |
145 | 151 | ||
146 | BUG_ON(!mntpt->d_inode); | 152 | BUG_ON(!mntpt->d_inode); |
147 | 153 | ||
148 | ret = -EINVAL; | ||
149 | size = mntpt->d_inode->i_size; | ||
150 | if (size > PAGE_SIZE - 1) | ||
151 | goto error_no_devname; | ||
152 | |||
153 | ret = -ENOMEM; | 154 | ret = -ENOMEM; |
154 | devname = (char *) get_zeroed_page(GFP_KERNEL); | 155 | devname = (char *) get_zeroed_page(GFP_KERNEL); |
155 | if (!devname) | 156 | if (!devname) |
@@ -159,28 +160,59 @@ static struct vfsmount *afs_mntpt_do_automount(struct dentry *mntpt) | |||
159 | if (!options) | 160 | if (!options) |
160 | goto error_no_options; | 161 | goto error_no_options; |
161 | 162 | ||
162 | /* read the contents of the AFS special symlink */ | 163 | vnode = AFS_FS_I(mntpt->d_inode); |
163 | page = read_mapping_page(mntpt->d_inode->i_mapping, 0, NULL); | 164 | if (test_bit(AFS_VNODE_PSEUDODIR, &vnode->flags)) { |
164 | if (IS_ERR(page)) { | 165 | /* if the directory is a pseudo directory, use the d_name */ |
165 | ret = PTR_ERR(page); | 166 | static const char afs_root_cell[] = ":root.cell."; |
166 | goto error_no_page; | 167 | unsigned size = mntpt->d_name.len; |
168 | |||
169 | ret = -ENOENT; | ||
170 | if (size < 2 || size > AFS_MAXCELLNAME) | ||
171 | goto error_no_page; | ||
172 | |||
173 | if (mntpt->d_name.name[0] == '.') { | ||
174 | devname[0] = '#'; | ||
175 | memcpy(devname + 1, mntpt->d_name.name, size - 1); | ||
176 | memcpy(devname + size, afs_root_cell, | ||
177 | sizeof(afs_root_cell)); | ||
178 | rwpath = true; | ||
179 | } else { | ||
180 | devname[0] = '%'; | ||
181 | memcpy(devname + 1, mntpt->d_name.name, size); | ||
182 | memcpy(devname + size + 1, afs_root_cell, | ||
183 | sizeof(afs_root_cell)); | ||
184 | } | ||
185 | } else { | ||
186 | /* read the contents of the AFS special symlink */ | ||
187 | loff_t size = i_size_read(mntpt->d_inode); | ||
188 | char *buf; | ||
189 | |||
190 | ret = -EINVAL; | ||
191 | if (size > PAGE_SIZE - 1) | ||
192 | goto error_no_page; | ||
193 | |||
194 | page = read_mapping_page(mntpt->d_inode->i_mapping, 0, NULL); | ||
195 | if (IS_ERR(page)) { | ||
196 | ret = PTR_ERR(page); | ||
197 | goto error_no_page; | ||
198 | } | ||
199 | |||
200 | ret = -EIO; | ||
201 | if (PageError(page)) | ||
202 | goto error; | ||
203 | |||
204 | buf = kmap_atomic(page, KM_USER0); | ||
205 | memcpy(devname, buf, size); | ||
206 | kunmap_atomic(buf, KM_USER0); | ||
207 | page_cache_release(page); | ||
208 | page = NULL; | ||
167 | } | 209 | } |
168 | 210 | ||
169 | ret = -EIO; | ||
170 | if (PageError(page)) | ||
171 | goto error; | ||
172 | |||
173 | buf = kmap_atomic(page, KM_USER0); | ||
174 | memcpy(devname, buf, size); | ||
175 | kunmap_atomic(buf, KM_USER0); | ||
176 | page_cache_release(page); | ||
177 | page = NULL; | ||
178 | |||
179 | /* work out what options we want */ | 211 | /* work out what options we want */ |
180 | super = AFS_FS_S(mntpt->d_sb); | 212 | super = AFS_FS_S(mntpt->d_sb); |
181 | memcpy(options, "cell=", 5); | 213 | memcpy(options, "cell=", 5); |
182 | strcpy(options + 5, super->volume->cell->name); | 214 | strcpy(options + 5, super->volume->cell->name); |
183 | if (super->volume->type == AFSVL_RWVOL) | 215 | if (super->volume->type == AFSVL_RWVOL || rwpath) |
184 | strcat(options, ",rwpath"); | 216 | strcat(options, ",rwpath"); |
185 | 217 | ||
186 | /* try and do the mount */ | 218 | /* try and do the mount */ |
diff --git a/fs/afs/proc.c b/fs/afs/proc.c index 852739d262a9..096b23f821a1 100644 --- a/fs/afs/proc.c +++ b/fs/afs/proc.c | |||
@@ -294,7 +294,7 @@ static ssize_t afs_proc_cells_write(struct file *file, const char __user *buf, | |||
294 | if (strcmp(kbuf, "add") == 0) { | 294 | if (strcmp(kbuf, "add") == 0) { |
295 | struct afs_cell *cell; | 295 | struct afs_cell *cell; |
296 | 296 | ||
297 | cell = afs_cell_create(name, args); | 297 | cell = afs_cell_create(name, strlen(name), args, false); |
298 | if (IS_ERR(cell)) { | 298 | if (IS_ERR(cell)) { |
299 | ret = PTR_ERR(cell); | 299 | ret = PTR_ERR(cell); |
300 | goto done; | 300 | goto done; |
diff --git a/fs/afs/rxrpc.c b/fs/afs/rxrpc.c index 67cf810e0fd6..654d8fdbf01f 100644 --- a/fs/afs/rxrpc.c +++ b/fs/afs/rxrpc.c | |||
@@ -100,6 +100,7 @@ int afs_open_socket(void) | |||
100 | ret = kernel_bind(socket, (struct sockaddr *) &srx, sizeof(srx)); | 100 | ret = kernel_bind(socket, (struct sockaddr *) &srx, sizeof(srx)); |
101 | if (ret < 0) { | 101 | if (ret < 0) { |
102 | sock_release(socket); | 102 | sock_release(socket); |
103 | destroy_workqueue(afs_async_calls); | ||
103 | _leave(" = %d [bind]", ret); | 104 | _leave(" = %d [bind]", ret); |
104 | return ret; | 105 | return ret; |
105 | } | 106 | } |
diff --git a/fs/afs/super.c b/fs/afs/super.c index 9cf80f02da16..77e1e5a61154 100644 --- a/fs/afs/super.c +++ b/fs/afs/super.c | |||
@@ -16,6 +16,7 @@ | |||
16 | 16 | ||
17 | #include <linux/kernel.h> | 17 | #include <linux/kernel.h> |
18 | #include <linux/module.h> | 18 | #include <linux/module.h> |
19 | #include <linux/mount.h> | ||
19 | #include <linux/init.h> | 20 | #include <linux/init.h> |
20 | #include <linux/slab.h> | 21 | #include <linux/slab.h> |
21 | #include <linux/smp_lock.h> | 22 | #include <linux/smp_lock.h> |
@@ -48,6 +49,7 @@ struct file_system_type afs_fs_type = { | |||
48 | static const struct super_operations afs_super_ops = { | 49 | static const struct super_operations afs_super_ops = { |
49 | .statfs = afs_statfs, | 50 | .statfs = afs_statfs, |
50 | .alloc_inode = afs_alloc_inode, | 51 | .alloc_inode = afs_alloc_inode, |
52 | .drop_inode = afs_drop_inode, | ||
51 | .destroy_inode = afs_destroy_inode, | 53 | .destroy_inode = afs_destroy_inode, |
52 | .evict_inode = afs_evict_inode, | 54 | .evict_inode = afs_evict_inode, |
53 | .put_super = afs_put_super, | 55 | .put_super = afs_put_super, |
@@ -62,12 +64,14 @@ enum { | |||
62 | afs_opt_cell, | 64 | afs_opt_cell, |
63 | afs_opt_rwpath, | 65 | afs_opt_rwpath, |
64 | afs_opt_vol, | 66 | afs_opt_vol, |
67 | afs_opt_autocell, | ||
65 | }; | 68 | }; |
66 | 69 | ||
67 | static const match_table_t afs_options_list = { | 70 | static const match_table_t afs_options_list = { |
68 | { afs_opt_cell, "cell=%s" }, | 71 | { afs_opt_cell, "cell=%s" }, |
69 | { afs_opt_rwpath, "rwpath" }, | 72 | { afs_opt_rwpath, "rwpath" }, |
70 | { afs_opt_vol, "vol=%s" }, | 73 | { afs_opt_vol, "vol=%s" }, |
74 | { afs_opt_autocell, "autocell" }, | ||
71 | { afs_no_opt, NULL }, | 75 | { afs_no_opt, NULL }, |
72 | }; | 76 | }; |
73 | 77 | ||
@@ -151,7 +155,8 @@ static int afs_parse_options(struct afs_mount_params *params, | |||
151 | switch (token) { | 155 | switch (token) { |
152 | case afs_opt_cell: | 156 | case afs_opt_cell: |
153 | cell = afs_cell_lookup(args[0].from, | 157 | cell = afs_cell_lookup(args[0].from, |
154 | args[0].to - args[0].from); | 158 | args[0].to - args[0].from, |
159 | false); | ||
155 | if (IS_ERR(cell)) | 160 | if (IS_ERR(cell)) |
156 | return PTR_ERR(cell); | 161 | return PTR_ERR(cell); |
157 | afs_put_cell(params->cell); | 162 | afs_put_cell(params->cell); |
@@ -166,6 +171,10 @@ static int afs_parse_options(struct afs_mount_params *params, | |||
166 | *devname = args[0].from; | 171 | *devname = args[0].from; |
167 | break; | 172 | break; |
168 | 173 | ||
174 | case afs_opt_autocell: | ||
175 | params->autocell = 1; | ||
176 | break; | ||
177 | |||
169 | default: | 178 | default: |
170 | printk(KERN_ERR "kAFS:" | 179 | printk(KERN_ERR "kAFS:" |
171 | " Unknown or invalid mount option: '%s'\n", p); | 180 | " Unknown or invalid mount option: '%s'\n", p); |
@@ -252,10 +261,10 @@ static int afs_parse_device_name(struct afs_mount_params *params, | |||
252 | 261 | ||
253 | /* lookup the cell record */ | 262 | /* lookup the cell record */ |
254 | if (cellname || !params->cell) { | 263 | if (cellname || !params->cell) { |
255 | cell = afs_cell_lookup(cellname, cellnamesz); | 264 | cell = afs_cell_lookup(cellname, cellnamesz, true); |
256 | if (IS_ERR(cell)) { | 265 | if (IS_ERR(cell)) { |
257 | printk(KERN_ERR "kAFS: unable to lookup cell '%s'\n", | 266 | printk(KERN_ERR "kAFS: unable to lookup cell '%*.*s'\n", |
258 | cellname ?: ""); | 267 | cellnamesz, cellnamesz, cellname ?: ""); |
259 | return PTR_ERR(cell); | 268 | return PTR_ERR(cell); |
260 | } | 269 | } |
261 | afs_put_cell(params->cell); | 270 | afs_put_cell(params->cell); |
@@ -321,6 +330,9 @@ static int afs_fill_super(struct super_block *sb, void *data) | |||
321 | if (IS_ERR(inode)) | 330 | if (IS_ERR(inode)) |
322 | goto error_inode; | 331 | goto error_inode; |
323 | 332 | ||
333 | if (params->autocell) | ||
334 | set_bit(AFS_VNODE_AUTOCELL, &AFS_FS_I(inode)->flags); | ||
335 | |||
324 | ret = -ENOMEM; | 336 | ret = -ENOMEM; |
325 | root = d_alloc_root(inode); | 337 | root = d_alloc_root(inode); |
326 | if (!root) | 338 | if (!root) |
diff --git a/fs/autofs4/root.c b/fs/autofs4/root.c index 48e056e70fd6..cb1bd38dc08c 100644 --- a/fs/autofs4/root.c +++ b/fs/autofs4/root.c | |||
@@ -204,8 +204,7 @@ static int try_to_fill_dentry(struct dentry *dentry, int flags) | |||
204 | } | 204 | } |
205 | 205 | ||
206 | /* Initialize expiry counter after successful mount */ | 206 | /* Initialize expiry counter after successful mount */ |
207 | if (ino) | 207 | ino->last_used = jiffies; |
208 | ino->last_used = jiffies; | ||
209 | 208 | ||
210 | spin_lock(&sbi->fs_lock); | 209 | spin_lock(&sbi->fs_lock); |
211 | ino->flags &= ~AUTOFS_INF_PENDING; | 210 | ino->flags &= ~AUTOFS_INF_PENDING; |
diff --git a/fs/block_dev.c b/fs/block_dev.c index 66411463b734..50e8c8582faa 100644 --- a/fs/block_dev.c +++ b/fs/block_dev.c | |||
@@ -1340,10 +1340,12 @@ static int __blkdev_get(struct block_device *bdev, fmode_t mode, int for_part) | |||
1340 | /* | 1340 | /* |
1341 | * hooks: /n/, see "layering violations". | 1341 | * hooks: /n/, see "layering violations". |
1342 | */ | 1342 | */ |
1343 | ret = devcgroup_inode_permission(bdev->bd_inode, perm); | 1343 | if (!for_part) { |
1344 | if (ret != 0) { | 1344 | ret = devcgroup_inode_permission(bdev->bd_inode, perm); |
1345 | bdput(bdev); | 1345 | if (ret != 0) { |
1346 | return ret; | 1346 | bdput(bdev); |
1347 | return ret; | ||
1348 | } | ||
1347 | } | 1349 | } |
1348 | 1350 | ||
1349 | restart: | 1351 | restart: |
diff --git a/fs/cachefiles/daemon.c b/fs/cachefiles/daemon.c index 24eb0d37241a..727caedcdd92 100644 --- a/fs/cachefiles/daemon.c +++ b/fs/cachefiles/daemon.c | |||
@@ -552,8 +552,7 @@ static int cachefiles_daemon_tag(struct cachefiles_cache *cache, char *args) | |||
552 | */ | 552 | */ |
553 | static int cachefiles_daemon_cull(struct cachefiles_cache *cache, char *args) | 553 | static int cachefiles_daemon_cull(struct cachefiles_cache *cache, char *args) |
554 | { | 554 | { |
555 | struct fs_struct *fs; | 555 | struct path path; |
556 | struct dentry *dir; | ||
557 | const struct cred *saved_cred; | 556 | const struct cred *saved_cred; |
558 | int ret; | 557 | int ret; |
559 | 558 | ||
@@ -573,24 +572,21 @@ static int cachefiles_daemon_cull(struct cachefiles_cache *cache, char *args) | |||
573 | } | 572 | } |
574 | 573 | ||
575 | /* extract the directory dentry from the cwd */ | 574 | /* extract the directory dentry from the cwd */ |
576 | fs = current->fs; | 575 | get_fs_pwd(current->fs, &path); |
577 | read_lock(&fs->lock); | ||
578 | dir = dget(fs->pwd.dentry); | ||
579 | read_unlock(&fs->lock); | ||
580 | 576 | ||
581 | if (!S_ISDIR(dir->d_inode->i_mode)) | 577 | if (!S_ISDIR(path.dentry->d_inode->i_mode)) |
582 | goto notdir; | 578 | goto notdir; |
583 | 579 | ||
584 | cachefiles_begin_secure(cache, &saved_cred); | 580 | cachefiles_begin_secure(cache, &saved_cred); |
585 | ret = cachefiles_cull(cache, dir, args); | 581 | ret = cachefiles_cull(cache, path.dentry, args); |
586 | cachefiles_end_secure(cache, saved_cred); | 582 | cachefiles_end_secure(cache, saved_cred); |
587 | 583 | ||
588 | dput(dir); | 584 | path_put(&path); |
589 | _leave(" = %d", ret); | 585 | _leave(" = %d", ret); |
590 | return ret; | 586 | return ret; |
591 | 587 | ||
592 | notdir: | 588 | notdir: |
593 | dput(dir); | 589 | path_put(&path); |
594 | kerror("cull command requires dirfd to be a directory"); | 590 | kerror("cull command requires dirfd to be a directory"); |
595 | return -ENOTDIR; | 591 | return -ENOTDIR; |
596 | 592 | ||
@@ -628,8 +624,7 @@ inval: | |||
628 | */ | 624 | */ |
629 | static int cachefiles_daemon_inuse(struct cachefiles_cache *cache, char *args) | 625 | static int cachefiles_daemon_inuse(struct cachefiles_cache *cache, char *args) |
630 | { | 626 | { |
631 | struct fs_struct *fs; | 627 | struct path path; |
632 | struct dentry *dir; | ||
633 | const struct cred *saved_cred; | 628 | const struct cred *saved_cred; |
634 | int ret; | 629 | int ret; |
635 | 630 | ||
@@ -649,24 +644,21 @@ static int cachefiles_daemon_inuse(struct cachefiles_cache *cache, char *args) | |||
649 | } | 644 | } |
650 | 645 | ||
651 | /* extract the directory dentry from the cwd */ | 646 | /* extract the directory dentry from the cwd */ |
652 | fs = current->fs; | 647 | get_fs_pwd(current->fs, &path); |
653 | read_lock(&fs->lock); | ||
654 | dir = dget(fs->pwd.dentry); | ||
655 | read_unlock(&fs->lock); | ||
656 | 648 | ||
657 | if (!S_ISDIR(dir->d_inode->i_mode)) | 649 | if (!S_ISDIR(path.dentry->d_inode->i_mode)) |
658 | goto notdir; | 650 | goto notdir; |
659 | 651 | ||
660 | cachefiles_begin_secure(cache, &saved_cred); | 652 | cachefiles_begin_secure(cache, &saved_cred); |
661 | ret = cachefiles_check_in_use(cache, dir, args); | 653 | ret = cachefiles_check_in_use(cache, path.dentry, args); |
662 | cachefiles_end_secure(cache, saved_cred); | 654 | cachefiles_end_secure(cache, saved_cred); |
663 | 655 | ||
664 | dput(dir); | 656 | path_put(&path); |
665 | //_leave(" = %d", ret); | 657 | //_leave(" = %d", ret); |
666 | return ret; | 658 | return ret; |
667 | 659 | ||
668 | notdir: | 660 | notdir: |
669 | dput(dir); | 661 | path_put(&path); |
670 | kerror("inuse command requires dirfd to be a directory"); | 662 | kerror("inuse command requires dirfd to be a directory"); |
671 | return -ENOTDIR; | 663 | return -ENOTDIR; |
672 | 664 | ||
diff --git a/fs/cachefiles/internal.h b/fs/cachefiles/internal.h index a8cd821226da..bd6bc1bde2d7 100644 --- a/fs/cachefiles/internal.h +++ b/fs/cachefiles/internal.h | |||
@@ -267,13 +267,6 @@ do { \ | |||
267 | #define dbgprintk(FMT, ...) \ | 267 | #define dbgprintk(FMT, ...) \ |
268 | printk(KERN_DEBUG "[%-6.6s] "FMT"\n", current->comm, ##__VA_ARGS__) | 268 | printk(KERN_DEBUG "[%-6.6s] "FMT"\n", current->comm, ##__VA_ARGS__) |
269 | 269 | ||
270 | /* make sure we maintain the format strings, even when debugging is disabled */ | ||
271 | static inline void _dbprintk(const char *fmt, ...) | ||
272 | __attribute__((format(printf, 1, 2))); | ||
273 | static inline void _dbprintk(const char *fmt, ...) | ||
274 | { | ||
275 | } | ||
276 | |||
277 | #define kenter(FMT, ...) dbgprintk("==> %s("FMT")", __func__, ##__VA_ARGS__) | 270 | #define kenter(FMT, ...) dbgprintk("==> %s("FMT")", __func__, ##__VA_ARGS__) |
278 | #define kleave(FMT, ...) dbgprintk("<== %s()"FMT"", __func__, ##__VA_ARGS__) | 271 | #define kleave(FMT, ...) dbgprintk("<== %s()"FMT"", __func__, ##__VA_ARGS__) |
279 | #define kdebug(FMT, ...) dbgprintk(FMT, ##__VA_ARGS__) | 272 | #define kdebug(FMT, ...) dbgprintk(FMT, ##__VA_ARGS__) |
@@ -304,9 +297,9 @@ do { \ | |||
304 | } while (0) | 297 | } while (0) |
305 | 298 | ||
306 | #else | 299 | #else |
307 | #define _enter(FMT, ...) _dbprintk("==> %s("FMT")", __func__, ##__VA_ARGS__) | 300 | #define _enter(FMT, ...) no_printk("==> %s("FMT")", __func__, ##__VA_ARGS__) |
308 | #define _leave(FMT, ...) _dbprintk("<== %s()"FMT"", __func__, ##__VA_ARGS__) | 301 | #define _leave(FMT, ...) no_printk("<== %s()"FMT"", __func__, ##__VA_ARGS__) |
309 | #define _debug(FMT, ...) _dbprintk(FMT, ##__VA_ARGS__) | 302 | #define _debug(FMT, ...) no_printk(FMT, ##__VA_ARGS__) |
310 | #endif | 303 | #endif |
311 | 304 | ||
312 | #if 1 /* defined(__KDEBUGALL) */ | 305 | #if 1 /* defined(__KDEBUGALL) */ |
diff --git a/fs/ceph/Makefile b/fs/ceph/Makefile index 6a660e610be8..278e1172600d 100644 --- a/fs/ceph/Makefile +++ b/fs/ceph/Makefile | |||
@@ -6,7 +6,7 @@ ifneq ($(KERNELRELEASE),) | |||
6 | 6 | ||
7 | obj-$(CONFIG_CEPH_FS) += ceph.o | 7 | obj-$(CONFIG_CEPH_FS) += ceph.o |
8 | 8 | ||
9 | ceph-objs := super.o inode.o dir.o file.o addr.o ioctl.o \ | 9 | ceph-objs := super.o inode.o dir.o file.o locks.o addr.o ioctl.o \ |
10 | export.o caps.o snap.o xattr.o \ | 10 | export.o caps.o snap.o xattr.o \ |
11 | messenger.o msgpool.o buffer.o pagelist.o \ | 11 | messenger.o msgpool.o buffer.o pagelist.o \ |
12 | mds_client.o mdsmap.o \ | 12 | mds_client.o mdsmap.o \ |
diff --git a/fs/ceph/addr.c b/fs/ceph/addr.c index d9c60b84949a..5598a0d02295 100644 --- a/fs/ceph/addr.c +++ b/fs/ceph/addr.c | |||
@@ -309,7 +309,8 @@ static int ceph_readpages(struct file *file, struct address_space *mapping, | |||
309 | zero_user_segment(page, s, PAGE_CACHE_SIZE); | 309 | zero_user_segment(page, s, PAGE_CACHE_SIZE); |
310 | } | 310 | } |
311 | 311 | ||
312 | if (add_to_page_cache_lru(page, mapping, page->index, GFP_NOFS)) { | 312 | if (add_to_page_cache_lru(page, mapping, page->index, |
313 | GFP_NOFS)) { | ||
313 | page_cache_release(page); | 314 | page_cache_release(page); |
314 | dout("readpages %p add_to_page_cache failed %p\n", | 315 | dout("readpages %p add_to_page_cache failed %p\n", |
315 | inode, page); | 316 | inode, page); |
@@ -552,7 +553,7 @@ static void writepages_finish(struct ceph_osd_request *req, | |||
552 | * page truncation thread, possibly losing some data that | 553 | * page truncation thread, possibly losing some data that |
553 | * raced its way in | 554 | * raced its way in |
554 | */ | 555 | */ |
555 | if ((issued & CEPH_CAP_FILE_CACHE) == 0) | 556 | if ((issued & (CEPH_CAP_FILE_CACHE|CEPH_CAP_FILE_LAZYIO)) == 0) |
556 | generic_error_remove_page(inode->i_mapping, page); | 557 | generic_error_remove_page(inode->i_mapping, page); |
557 | 558 | ||
558 | unlock_page(page); | 559 | unlock_page(page); |
@@ -797,9 +798,12 @@ get_more_pages: | |||
797 | dout("%p will write page %p idx %lu\n", | 798 | dout("%p will write page %p idx %lu\n", |
798 | inode, page, page->index); | 799 | inode, page, page->index); |
799 | 800 | ||
800 | writeback_stat = atomic_long_inc_return(&client->writeback_count); | 801 | writeback_stat = |
801 | if (writeback_stat > CONGESTION_ON_THRESH(client->mount_args->congestion_kb)) { | 802 | atomic_long_inc_return(&client->writeback_count); |
802 | set_bdi_congested(&client->backing_dev_info, BLK_RW_ASYNC); | 803 | if (writeback_stat > CONGESTION_ON_THRESH( |
804 | client->mount_args->congestion_kb)) { | ||
805 | set_bdi_congested(&client->backing_dev_info, | ||
806 | BLK_RW_ASYNC); | ||
803 | } | 807 | } |
804 | 808 | ||
805 | set_page_writeback(page); | 809 | set_page_writeback(page); |
@@ -1036,7 +1040,7 @@ static int ceph_write_begin(struct file *file, struct address_space *mapping, | |||
1036 | *pagep = page; | 1040 | *pagep = page; |
1037 | 1041 | ||
1038 | dout("write_begin file %p inode %p page %p %d~%d\n", file, | 1042 | dout("write_begin file %p inode %p page %p %d~%d\n", file, |
1039 | inode, page, (int)pos, (int)len); | 1043 | inode, page, (int)pos, (int)len); |
1040 | 1044 | ||
1041 | r = ceph_update_writeable_page(file, pos, len, page); | 1045 | r = ceph_update_writeable_page(file, pos, len, page); |
1042 | } while (r == -EAGAIN); | 1046 | } while (r == -EAGAIN); |
diff --git a/fs/ceph/armor.c b/fs/ceph/armor.c index 67b2c030924b..eb2a666b0be7 100644 --- a/fs/ceph/armor.c +++ b/fs/ceph/armor.c | |||
@@ -1,11 +1,15 @@ | |||
1 | 1 | ||
2 | #include <linux/errno.h> | 2 | #include <linux/errno.h> |
3 | 3 | ||
4 | int ceph_armor(char *dst, const char *src, const char *end); | ||
5 | int ceph_unarmor(char *dst, const char *src, const char *end); | ||
6 | |||
4 | /* | 7 | /* |
5 | * base64 encode/decode. | 8 | * base64 encode/decode. |
6 | */ | 9 | */ |
7 | 10 | ||
8 | const char *pem_key = "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789+/"; | 11 | static const char *pem_key = |
12 | "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789+/"; | ||
9 | 13 | ||
10 | static int encode_bits(int c) | 14 | static int encode_bits(int c) |
11 | { | 15 | { |
diff --git a/fs/ceph/auth.c b/fs/ceph/auth.c index 89490beaf537..6d2e30600627 100644 --- a/fs/ceph/auth.c +++ b/fs/ceph/auth.c | |||
@@ -20,7 +20,7 @@ static u32 supported_protocols[] = { | |||
20 | CEPH_AUTH_CEPHX | 20 | CEPH_AUTH_CEPHX |
21 | }; | 21 | }; |
22 | 22 | ||
23 | int ceph_auth_init_protocol(struct ceph_auth_client *ac, int protocol) | 23 | static int ceph_auth_init_protocol(struct ceph_auth_client *ac, int protocol) |
24 | { | 24 | { |
25 | switch (protocol) { | 25 | switch (protocol) { |
26 | case CEPH_AUTH_NONE: | 26 | case CEPH_AUTH_NONE: |
@@ -133,8 +133,8 @@ bad: | |||
133 | return -ERANGE; | 133 | return -ERANGE; |
134 | } | 134 | } |
135 | 135 | ||
136 | int ceph_build_auth_request(struct ceph_auth_client *ac, | 136 | static int ceph_build_auth_request(struct ceph_auth_client *ac, |
137 | void *msg_buf, size_t msg_len) | 137 | void *msg_buf, size_t msg_len) |
138 | { | 138 | { |
139 | struct ceph_mon_request_header *monhdr = msg_buf; | 139 | struct ceph_mon_request_header *monhdr = msg_buf; |
140 | void *p = monhdr + 1; | 140 | void *p = monhdr + 1; |
diff --git a/fs/ceph/auth_x.c b/fs/ceph/auth_x.c index 6d44053ecff1..582e0b2caf8a 100644 --- a/fs/ceph/auth_x.c +++ b/fs/ceph/auth_x.c | |||
@@ -87,8 +87,8 @@ static int ceph_x_decrypt(struct ceph_crypto_key *secret, | |||
87 | /* | 87 | /* |
88 | * get existing (or insert new) ticket handler | 88 | * get existing (or insert new) ticket handler |
89 | */ | 89 | */ |
90 | struct ceph_x_ticket_handler *get_ticket_handler(struct ceph_auth_client *ac, | 90 | static struct ceph_x_ticket_handler * |
91 | int service) | 91 | get_ticket_handler(struct ceph_auth_client *ac, int service) |
92 | { | 92 | { |
93 | struct ceph_x_ticket_handler *th; | 93 | struct ceph_x_ticket_handler *th; |
94 | struct ceph_x_info *xi = ac->private; | 94 | struct ceph_x_info *xi = ac->private; |
@@ -429,7 +429,7 @@ static int ceph_x_build_request(struct ceph_auth_client *ac, | |||
429 | auth->struct_v = 1; | 429 | auth->struct_v = 1; |
430 | auth->key = 0; | 430 | auth->key = 0; |
431 | for (u = (u64 *)tmp_enc; u + 1 <= (u64 *)(tmp_enc + ret); u++) | 431 | for (u = (u64 *)tmp_enc; u + 1 <= (u64 *)(tmp_enc + ret); u++) |
432 | auth->key ^= *u; | 432 | auth->key ^= *(__le64 *)u; |
433 | dout(" server_challenge %llx client_challenge %llx key %llx\n", | 433 | dout(" server_challenge %llx client_challenge %llx key %llx\n", |
434 | xi->server_challenge, le64_to_cpu(auth->client_challenge), | 434 | xi->server_challenge, le64_to_cpu(auth->client_challenge), |
435 | le64_to_cpu(auth->key)); | 435 | le64_to_cpu(auth->key)); |
diff --git a/fs/ceph/buffer.c b/fs/ceph/buffer.c index c67535d70aa6..cd39f17021de 100644 --- a/fs/ceph/buffer.c +++ b/fs/ceph/buffer.c | |||
@@ -47,22 +47,6 @@ void ceph_buffer_release(struct kref *kref) | |||
47 | kfree(b); | 47 | kfree(b); |
48 | } | 48 | } |
49 | 49 | ||
50 | int ceph_buffer_alloc(struct ceph_buffer *b, int len, gfp_t gfp) | ||
51 | { | ||
52 | b->vec.iov_base = kmalloc(len, gfp | __GFP_NOWARN); | ||
53 | if (b->vec.iov_base) { | ||
54 | b->is_vmalloc = false; | ||
55 | } else { | ||
56 | b->vec.iov_base = __vmalloc(len, gfp, PAGE_KERNEL); | ||
57 | b->is_vmalloc = true; | ||
58 | } | ||
59 | if (!b->vec.iov_base) | ||
60 | return -ENOMEM; | ||
61 | b->alloc_len = len; | ||
62 | b->vec.iov_len = len; | ||
63 | return 0; | ||
64 | } | ||
65 | |||
66 | int ceph_decode_buffer(struct ceph_buffer **b, void **p, void *end) | 50 | int ceph_decode_buffer(struct ceph_buffer **b, void **p, void *end) |
67 | { | 51 | { |
68 | size_t len; | 52 | size_t len; |
diff --git a/fs/ceph/caps.c b/fs/ceph/caps.c index b81be9a56487..7bf182b03973 100644 --- a/fs/ceph/caps.c +++ b/fs/ceph/caps.c | |||
@@ -113,58 +113,41 @@ const char *ceph_cap_string(int caps) | |||
113 | return cap_str[i]; | 113 | return cap_str[i]; |
114 | } | 114 | } |
115 | 115 | ||
116 | /* | 116 | void ceph_caps_init(struct ceph_mds_client *mdsc) |
117 | * Cap reservations | ||
118 | * | ||
119 | * Maintain a global pool of preallocated struct ceph_caps, referenced | ||
120 | * by struct ceph_caps_reservations. This ensures that we preallocate | ||
121 | * memory needed to successfully process an MDS response. (If an MDS | ||
122 | * sends us cap information and we fail to process it, we will have | ||
123 | * problems due to the client and MDS being out of sync.) | ||
124 | * | ||
125 | * Reservations are 'owned' by a ceph_cap_reservation context. | ||
126 | */ | ||
127 | static spinlock_t caps_list_lock; | ||
128 | static struct list_head caps_list; /* unused (reserved or unreserved) */ | ||
129 | static int caps_total_count; /* total caps allocated */ | ||
130 | static int caps_use_count; /* in use */ | ||
131 | static int caps_reserve_count; /* unused, reserved */ | ||
132 | static int caps_avail_count; /* unused, unreserved */ | ||
133 | static int caps_min_count; /* keep at least this many (unreserved) */ | ||
134 | |||
135 | void __init ceph_caps_init(void) | ||
136 | { | 117 | { |
137 | INIT_LIST_HEAD(&caps_list); | 118 | INIT_LIST_HEAD(&mdsc->caps_list); |
138 | spin_lock_init(&caps_list_lock); | 119 | spin_lock_init(&mdsc->caps_list_lock); |
139 | } | 120 | } |
140 | 121 | ||
141 | void ceph_caps_finalize(void) | 122 | void ceph_caps_finalize(struct ceph_mds_client *mdsc) |
142 | { | 123 | { |
143 | struct ceph_cap *cap; | 124 | struct ceph_cap *cap; |
144 | 125 | ||
145 | spin_lock(&caps_list_lock); | 126 | spin_lock(&mdsc->caps_list_lock); |
146 | while (!list_empty(&caps_list)) { | 127 | while (!list_empty(&mdsc->caps_list)) { |
147 | cap = list_first_entry(&caps_list, struct ceph_cap, caps_item); | 128 | cap = list_first_entry(&mdsc->caps_list, |
129 | struct ceph_cap, caps_item); | ||
148 | list_del(&cap->caps_item); | 130 | list_del(&cap->caps_item); |
149 | kmem_cache_free(ceph_cap_cachep, cap); | 131 | kmem_cache_free(ceph_cap_cachep, cap); |
150 | } | 132 | } |
151 | caps_total_count = 0; | 133 | mdsc->caps_total_count = 0; |
152 | caps_avail_count = 0; | 134 | mdsc->caps_avail_count = 0; |
153 | caps_use_count = 0; | 135 | mdsc->caps_use_count = 0; |
154 | caps_reserve_count = 0; | 136 | mdsc->caps_reserve_count = 0; |
155 | caps_min_count = 0; | 137 | mdsc->caps_min_count = 0; |
156 | spin_unlock(&caps_list_lock); | 138 | spin_unlock(&mdsc->caps_list_lock); |
157 | } | 139 | } |
158 | 140 | ||
159 | void ceph_adjust_min_caps(int delta) | 141 | void ceph_adjust_min_caps(struct ceph_mds_client *mdsc, int delta) |
160 | { | 142 | { |
161 | spin_lock(&caps_list_lock); | 143 | spin_lock(&mdsc->caps_list_lock); |
162 | caps_min_count += delta; | 144 | mdsc->caps_min_count += delta; |
163 | BUG_ON(caps_min_count < 0); | 145 | BUG_ON(mdsc->caps_min_count < 0); |
164 | spin_unlock(&caps_list_lock); | 146 | spin_unlock(&mdsc->caps_list_lock); |
165 | } | 147 | } |
166 | 148 | ||
167 | int ceph_reserve_caps(struct ceph_cap_reservation *ctx, int need) | 149 | int ceph_reserve_caps(struct ceph_mds_client *mdsc, |
150 | struct ceph_cap_reservation *ctx, int need) | ||
168 | { | 151 | { |
169 | int i; | 152 | int i; |
170 | struct ceph_cap *cap; | 153 | struct ceph_cap *cap; |
@@ -176,16 +159,17 @@ int ceph_reserve_caps(struct ceph_cap_reservation *ctx, int need) | |||
176 | dout("reserve caps ctx=%p need=%d\n", ctx, need); | 159 | dout("reserve caps ctx=%p need=%d\n", ctx, need); |
177 | 160 | ||
178 | /* first reserve any caps that are already allocated */ | 161 | /* first reserve any caps that are already allocated */ |
179 | spin_lock(&caps_list_lock); | 162 | spin_lock(&mdsc->caps_list_lock); |
180 | if (caps_avail_count >= need) | 163 | if (mdsc->caps_avail_count >= need) |
181 | have = need; | 164 | have = need; |
182 | else | 165 | else |
183 | have = caps_avail_count; | 166 | have = mdsc->caps_avail_count; |
184 | caps_avail_count -= have; | 167 | mdsc->caps_avail_count -= have; |
185 | caps_reserve_count += have; | 168 | mdsc->caps_reserve_count += have; |
186 | BUG_ON(caps_total_count != caps_use_count + caps_reserve_count + | 169 | BUG_ON(mdsc->caps_total_count != mdsc->caps_use_count + |
187 | caps_avail_count); | 170 | mdsc->caps_reserve_count + |
188 | spin_unlock(&caps_list_lock); | 171 | mdsc->caps_avail_count); |
172 | spin_unlock(&mdsc->caps_list_lock); | ||
189 | 173 | ||
190 | for (i = have; i < need; i++) { | 174 | for (i = have; i < need; i++) { |
191 | cap = kmem_cache_alloc(ceph_cap_cachep, GFP_NOFS); | 175 | cap = kmem_cache_alloc(ceph_cap_cachep, GFP_NOFS); |
@@ -198,19 +182,20 @@ int ceph_reserve_caps(struct ceph_cap_reservation *ctx, int need) | |||
198 | } | 182 | } |
199 | BUG_ON(have + alloc != need); | 183 | BUG_ON(have + alloc != need); |
200 | 184 | ||
201 | spin_lock(&caps_list_lock); | 185 | spin_lock(&mdsc->caps_list_lock); |
202 | caps_total_count += alloc; | 186 | mdsc->caps_total_count += alloc; |
203 | caps_reserve_count += alloc; | 187 | mdsc->caps_reserve_count += alloc; |
204 | list_splice(&newcaps, &caps_list); | 188 | list_splice(&newcaps, &mdsc->caps_list); |
205 | 189 | ||
206 | BUG_ON(caps_total_count != caps_use_count + caps_reserve_count + | 190 | BUG_ON(mdsc->caps_total_count != mdsc->caps_use_count + |
207 | caps_avail_count); | 191 | mdsc->caps_reserve_count + |
208 | spin_unlock(&caps_list_lock); | 192 | mdsc->caps_avail_count); |
193 | spin_unlock(&mdsc->caps_list_lock); | ||
209 | 194 | ||
210 | ctx->count = need; | 195 | ctx->count = need; |
211 | dout("reserve caps ctx=%p %d = %d used + %d resv + %d avail\n", | 196 | dout("reserve caps ctx=%p %d = %d used + %d resv + %d avail\n", |
212 | ctx, caps_total_count, caps_use_count, caps_reserve_count, | 197 | ctx, mdsc->caps_total_count, mdsc->caps_use_count, |
213 | caps_avail_count); | 198 | mdsc->caps_reserve_count, mdsc->caps_avail_count); |
214 | return 0; | 199 | return 0; |
215 | 200 | ||
216 | out_alloc_count: | 201 | out_alloc_count: |
@@ -220,26 +205,29 @@ out_alloc_count: | |||
220 | return ret; | 205 | return ret; |
221 | } | 206 | } |
222 | 207 | ||
223 | int ceph_unreserve_caps(struct ceph_cap_reservation *ctx) | 208 | int ceph_unreserve_caps(struct ceph_mds_client *mdsc, |
209 | struct ceph_cap_reservation *ctx) | ||
224 | { | 210 | { |
225 | dout("unreserve caps ctx=%p count=%d\n", ctx, ctx->count); | 211 | dout("unreserve caps ctx=%p count=%d\n", ctx, ctx->count); |
226 | if (ctx->count) { | 212 | if (ctx->count) { |
227 | spin_lock(&caps_list_lock); | 213 | spin_lock(&mdsc->caps_list_lock); |
228 | BUG_ON(caps_reserve_count < ctx->count); | 214 | BUG_ON(mdsc->caps_reserve_count < ctx->count); |
229 | caps_reserve_count -= ctx->count; | 215 | mdsc->caps_reserve_count -= ctx->count; |
230 | caps_avail_count += ctx->count; | 216 | mdsc->caps_avail_count += ctx->count; |
231 | ctx->count = 0; | 217 | ctx->count = 0; |
232 | dout("unreserve caps %d = %d used + %d resv + %d avail\n", | 218 | dout("unreserve caps %d = %d used + %d resv + %d avail\n", |
233 | caps_total_count, caps_use_count, caps_reserve_count, | 219 | mdsc->caps_total_count, mdsc->caps_use_count, |
234 | caps_avail_count); | 220 | mdsc->caps_reserve_count, mdsc->caps_avail_count); |
235 | BUG_ON(caps_total_count != caps_use_count + caps_reserve_count + | 221 | BUG_ON(mdsc->caps_total_count != mdsc->caps_use_count + |
236 | caps_avail_count); | 222 | mdsc->caps_reserve_count + |
237 | spin_unlock(&caps_list_lock); | 223 | mdsc->caps_avail_count); |
224 | spin_unlock(&mdsc->caps_list_lock); | ||
238 | } | 225 | } |
239 | return 0; | 226 | return 0; |
240 | } | 227 | } |
241 | 228 | ||
242 | static struct ceph_cap *get_cap(struct ceph_cap_reservation *ctx) | 229 | static struct ceph_cap *get_cap(struct ceph_mds_client *mdsc, |
230 | struct ceph_cap_reservation *ctx) | ||
243 | { | 231 | { |
244 | struct ceph_cap *cap = NULL; | 232 | struct ceph_cap *cap = NULL; |
245 | 233 | ||
@@ -247,71 +235,74 @@ static struct ceph_cap *get_cap(struct ceph_cap_reservation *ctx) | |||
247 | if (!ctx) { | 235 | if (!ctx) { |
248 | cap = kmem_cache_alloc(ceph_cap_cachep, GFP_NOFS); | 236 | cap = kmem_cache_alloc(ceph_cap_cachep, GFP_NOFS); |
249 | if (cap) { | 237 | if (cap) { |
250 | caps_use_count++; | 238 | mdsc->caps_use_count++; |
251 | caps_total_count++; | 239 | mdsc->caps_total_count++; |
252 | } | 240 | } |
253 | return cap; | 241 | return cap; |
254 | } | 242 | } |
255 | 243 | ||
256 | spin_lock(&caps_list_lock); | 244 | spin_lock(&mdsc->caps_list_lock); |
257 | dout("get_cap ctx=%p (%d) %d = %d used + %d resv + %d avail\n", | 245 | dout("get_cap ctx=%p (%d) %d = %d used + %d resv + %d avail\n", |
258 | ctx, ctx->count, caps_total_count, caps_use_count, | 246 | ctx, ctx->count, mdsc->caps_total_count, mdsc->caps_use_count, |
259 | caps_reserve_count, caps_avail_count); | 247 | mdsc->caps_reserve_count, mdsc->caps_avail_count); |
260 | BUG_ON(!ctx->count); | 248 | BUG_ON(!ctx->count); |
261 | BUG_ON(ctx->count > caps_reserve_count); | 249 | BUG_ON(ctx->count > mdsc->caps_reserve_count); |
262 | BUG_ON(list_empty(&caps_list)); | 250 | BUG_ON(list_empty(&mdsc->caps_list)); |
263 | 251 | ||
264 | ctx->count--; | 252 | ctx->count--; |
265 | caps_reserve_count--; | 253 | mdsc->caps_reserve_count--; |
266 | caps_use_count++; | 254 | mdsc->caps_use_count++; |
267 | 255 | ||
268 | cap = list_first_entry(&caps_list, struct ceph_cap, caps_item); | 256 | cap = list_first_entry(&mdsc->caps_list, struct ceph_cap, caps_item); |
269 | list_del(&cap->caps_item); | 257 | list_del(&cap->caps_item); |
270 | 258 | ||
271 | BUG_ON(caps_total_count != caps_use_count + caps_reserve_count + | 259 | BUG_ON(mdsc->caps_total_count != mdsc->caps_use_count + |
272 | caps_avail_count); | 260 | mdsc->caps_reserve_count + mdsc->caps_avail_count); |
273 | spin_unlock(&caps_list_lock); | 261 | spin_unlock(&mdsc->caps_list_lock); |
274 | return cap; | 262 | return cap; |
275 | } | 263 | } |
276 | 264 | ||
277 | void ceph_put_cap(struct ceph_cap *cap) | 265 | void ceph_put_cap(struct ceph_mds_client *mdsc, struct ceph_cap *cap) |
278 | { | 266 | { |
279 | spin_lock(&caps_list_lock); | 267 | spin_lock(&mdsc->caps_list_lock); |
280 | dout("put_cap %p %d = %d used + %d resv + %d avail\n", | 268 | dout("put_cap %p %d = %d used + %d resv + %d avail\n", |
281 | cap, caps_total_count, caps_use_count, | 269 | cap, mdsc->caps_total_count, mdsc->caps_use_count, |
282 | caps_reserve_count, caps_avail_count); | 270 | mdsc->caps_reserve_count, mdsc->caps_avail_count); |
283 | caps_use_count--; | 271 | mdsc->caps_use_count--; |
284 | /* | 272 | /* |
285 | * Keep some preallocated caps around (ceph_min_count), to | 273 | * Keep some preallocated caps around (ceph_min_count), to |
286 | * avoid lots of free/alloc churn. | 274 | * avoid lots of free/alloc churn. |
287 | */ | 275 | */ |
288 | if (caps_avail_count >= caps_reserve_count + caps_min_count) { | 276 | if (mdsc->caps_avail_count >= mdsc->caps_reserve_count + |
289 | caps_total_count--; | 277 | mdsc->caps_min_count) { |
278 | mdsc->caps_total_count--; | ||
290 | kmem_cache_free(ceph_cap_cachep, cap); | 279 | kmem_cache_free(ceph_cap_cachep, cap); |
291 | } else { | 280 | } else { |
292 | caps_avail_count++; | 281 | mdsc->caps_avail_count++; |
293 | list_add(&cap->caps_item, &caps_list); | 282 | list_add(&cap->caps_item, &mdsc->caps_list); |
294 | } | 283 | } |
295 | 284 | ||
296 | BUG_ON(caps_total_count != caps_use_count + caps_reserve_count + | 285 | BUG_ON(mdsc->caps_total_count != mdsc->caps_use_count + |
297 | caps_avail_count); | 286 | mdsc->caps_reserve_count + mdsc->caps_avail_count); |
298 | spin_unlock(&caps_list_lock); | 287 | spin_unlock(&mdsc->caps_list_lock); |
299 | } | 288 | } |
300 | 289 | ||
301 | void ceph_reservation_status(struct ceph_client *client, | 290 | void ceph_reservation_status(struct ceph_client *client, |
302 | int *total, int *avail, int *used, int *reserved, | 291 | int *total, int *avail, int *used, int *reserved, |
303 | int *min) | 292 | int *min) |
304 | { | 293 | { |
294 | struct ceph_mds_client *mdsc = &client->mdsc; | ||
295 | |||
305 | if (total) | 296 | if (total) |
306 | *total = caps_total_count; | 297 | *total = mdsc->caps_total_count; |
307 | if (avail) | 298 | if (avail) |
308 | *avail = caps_avail_count; | 299 | *avail = mdsc->caps_avail_count; |
309 | if (used) | 300 | if (used) |
310 | *used = caps_use_count; | 301 | *used = mdsc->caps_use_count; |
311 | if (reserved) | 302 | if (reserved) |
312 | *reserved = caps_reserve_count; | 303 | *reserved = mdsc->caps_reserve_count; |
313 | if (min) | 304 | if (min) |
314 | *min = caps_min_count; | 305 | *min = mdsc->caps_min_count; |
315 | } | 306 | } |
316 | 307 | ||
317 | /* | 308 | /* |
@@ -336,22 +327,29 @@ static struct ceph_cap *__get_cap_for_mds(struct ceph_inode_info *ci, int mds) | |||
336 | return NULL; | 327 | return NULL; |
337 | } | 328 | } |
338 | 329 | ||
330 | struct ceph_cap *ceph_get_cap_for_mds(struct ceph_inode_info *ci, int mds) | ||
331 | { | ||
332 | struct ceph_cap *cap; | ||
333 | |||
334 | spin_lock(&ci->vfs_inode.i_lock); | ||
335 | cap = __get_cap_for_mds(ci, mds); | ||
336 | spin_unlock(&ci->vfs_inode.i_lock); | ||
337 | return cap; | ||
338 | } | ||
339 | |||
339 | /* | 340 | /* |
340 | * Return id of any MDS with a cap, preferably FILE_WR|WRBUFFER|EXCL, else | 341 | * Return id of any MDS with a cap, preferably FILE_WR|BUFFER|EXCL, else -1. |
341 | * -1. | ||
342 | */ | 342 | */ |
343 | static int __ceph_get_cap_mds(struct ceph_inode_info *ci, u32 *mseq) | 343 | static int __ceph_get_cap_mds(struct ceph_inode_info *ci) |
344 | { | 344 | { |
345 | struct ceph_cap *cap; | 345 | struct ceph_cap *cap; |
346 | int mds = -1; | 346 | int mds = -1; |
347 | struct rb_node *p; | 347 | struct rb_node *p; |
348 | 348 | ||
349 | /* prefer mds with WR|WRBUFFER|EXCL caps */ | 349 | /* prefer mds with WR|BUFFER|EXCL caps */ |
350 | for (p = rb_first(&ci->i_caps); p; p = rb_next(p)) { | 350 | for (p = rb_first(&ci->i_caps); p; p = rb_next(p)) { |
351 | cap = rb_entry(p, struct ceph_cap, ci_node); | 351 | cap = rb_entry(p, struct ceph_cap, ci_node); |
352 | mds = cap->mds; | 352 | mds = cap->mds; |
353 | if (mseq) | ||
354 | *mseq = cap->mseq; | ||
355 | if (cap->issued & (CEPH_CAP_FILE_WR | | 353 | if (cap->issued & (CEPH_CAP_FILE_WR | |
356 | CEPH_CAP_FILE_BUFFER | | 354 | CEPH_CAP_FILE_BUFFER | |
357 | CEPH_CAP_FILE_EXCL)) | 355 | CEPH_CAP_FILE_EXCL)) |
@@ -364,7 +362,7 @@ int ceph_get_cap_mds(struct inode *inode) | |||
364 | { | 362 | { |
365 | int mds; | 363 | int mds; |
366 | spin_lock(&inode->i_lock); | 364 | spin_lock(&inode->i_lock); |
367 | mds = __ceph_get_cap_mds(ceph_inode(inode), NULL); | 365 | mds = __ceph_get_cap_mds(ceph_inode(inode)); |
368 | spin_unlock(&inode->i_lock); | 366 | spin_unlock(&inode->i_lock); |
369 | return mds; | 367 | return mds; |
370 | } | 368 | } |
@@ -483,8 +481,8 @@ static void __check_cap_issue(struct ceph_inode_info *ci, struct ceph_cap *cap, | |||
483 | * Each time we receive FILE_CACHE anew, we increment | 481 | * Each time we receive FILE_CACHE anew, we increment |
484 | * i_rdcache_gen. | 482 | * i_rdcache_gen. |
485 | */ | 483 | */ |
486 | if ((issued & CEPH_CAP_FILE_CACHE) && | 484 | if ((issued & (CEPH_CAP_FILE_CACHE|CEPH_CAP_FILE_LAZYIO)) && |
487 | (had & CEPH_CAP_FILE_CACHE) == 0) | 485 | (had & (CEPH_CAP_FILE_CACHE|CEPH_CAP_FILE_LAZYIO)) == 0) |
488 | ci->i_rdcache_gen++; | 486 | ci->i_rdcache_gen++; |
489 | 487 | ||
490 | /* | 488 | /* |
@@ -543,7 +541,7 @@ retry: | |||
543 | new_cap = NULL; | 541 | new_cap = NULL; |
544 | } else { | 542 | } else { |
545 | spin_unlock(&inode->i_lock); | 543 | spin_unlock(&inode->i_lock); |
546 | new_cap = get_cap(caps_reservation); | 544 | new_cap = get_cap(mdsc, caps_reservation); |
547 | if (new_cap == NULL) | 545 | if (new_cap == NULL) |
548 | return -ENOMEM; | 546 | return -ENOMEM; |
549 | goto retry; | 547 | goto retry; |
@@ -588,6 +586,7 @@ retry: | |||
588 | } else { | 586 | } else { |
589 | pr_err("ceph_add_cap: couldn't find snap realm %llx\n", | 587 | pr_err("ceph_add_cap: couldn't find snap realm %llx\n", |
590 | realmino); | 588 | realmino); |
589 | WARN_ON(!realm); | ||
591 | } | 590 | } |
592 | } | 591 | } |
593 | 592 | ||
@@ -831,7 +830,7 @@ int __ceph_caps_file_wanted(struct ceph_inode_info *ci) | |||
831 | { | 830 | { |
832 | int want = 0; | 831 | int want = 0; |
833 | int mode; | 832 | int mode; |
834 | for (mode = 0; mode < 4; mode++) | 833 | for (mode = 0; mode < CEPH_FILE_MODE_NUM; mode++) |
835 | if (ci->i_nr_by_mode[mode]) | 834 | if (ci->i_nr_by_mode[mode]) |
836 | want |= ceph_caps_for_mode(mode); | 835 | want |= ceph_caps_for_mode(mode); |
837 | return want; | 836 | return want; |
@@ -901,7 +900,7 @@ void __ceph_remove_cap(struct ceph_cap *cap) | |||
901 | ci->i_auth_cap = NULL; | 900 | ci->i_auth_cap = NULL; |
902 | 901 | ||
903 | if (removed) | 902 | if (removed) |
904 | ceph_put_cap(cap); | 903 | ceph_put_cap(mdsc, cap); |
905 | 904 | ||
906 | if (!__ceph_is_any_caps(ci) && ci->i_snap_realm) { | 905 | if (!__ceph_is_any_caps(ci) && ci->i_snap_realm) { |
907 | struct ceph_snap_realm *realm = ci->i_snap_realm; | 906 | struct ceph_snap_realm *realm = ci->i_snap_realm; |
@@ -1197,6 +1196,8 @@ static int __send_cap(struct ceph_mds_client *mdsc, struct ceph_cap *cap, | |||
1197 | */ | 1196 | */ |
1198 | void __ceph_flush_snaps(struct ceph_inode_info *ci, | 1197 | void __ceph_flush_snaps(struct ceph_inode_info *ci, |
1199 | struct ceph_mds_session **psession) | 1198 | struct ceph_mds_session **psession) |
1199 | __releases(ci->vfs_inode->i_lock) | ||
1200 | __acquires(ci->vfs_inode->i_lock) | ||
1200 | { | 1201 | { |
1201 | struct inode *inode = &ci->vfs_inode; | 1202 | struct inode *inode = &ci->vfs_inode; |
1202 | int mds; | 1203 | int mds; |
@@ -1232,7 +1233,13 @@ retry: | |||
1232 | BUG_ON(capsnap->dirty == 0); | 1233 | BUG_ON(capsnap->dirty == 0); |
1233 | 1234 | ||
1234 | /* pick mds, take s_mutex */ | 1235 | /* pick mds, take s_mutex */ |
1235 | mds = __ceph_get_cap_mds(ci, &mseq); | 1236 | if (ci->i_auth_cap == NULL) { |
1237 | dout("no auth cap (migrating?), doing nothing\n"); | ||
1238 | goto out; | ||
1239 | } | ||
1240 | mds = ci->i_auth_cap->session->s_mds; | ||
1241 | mseq = ci->i_auth_cap->mseq; | ||
1242 | |||
1236 | if (session && session->s_mds != mds) { | 1243 | if (session && session->s_mds != mds) { |
1237 | dout("oops, wrong session %p mutex\n", session); | 1244 | dout("oops, wrong session %p mutex\n", session); |
1238 | mutex_unlock(&session->s_mutex); | 1245 | mutex_unlock(&session->s_mutex); |
@@ -1251,8 +1258,8 @@ retry: | |||
1251 | } | 1258 | } |
1252 | /* | 1259 | /* |
1253 | * if session == NULL, we raced against a cap | 1260 | * if session == NULL, we raced against a cap |
1254 | * deletion. retry, and we'll get a better | 1261 | * deletion or migration. retry, and we'll |
1255 | * @mds value next time. | 1262 | * get a better @mds value next time. |
1256 | */ | 1263 | */ |
1257 | spin_lock(&inode->i_lock); | 1264 | spin_lock(&inode->i_lock); |
1258 | goto retry; | 1265 | goto retry; |
@@ -1290,6 +1297,7 @@ retry: | |||
1290 | list_del_init(&ci->i_snap_flush_item); | 1297 | list_del_init(&ci->i_snap_flush_item); |
1291 | spin_unlock(&mdsc->snap_flush_lock); | 1298 | spin_unlock(&mdsc->snap_flush_lock); |
1292 | 1299 | ||
1300 | out: | ||
1293 | if (psession) | 1301 | if (psession) |
1294 | *psession = session; | 1302 | *psession = session; |
1295 | else if (session) { | 1303 | else if (session) { |
@@ -1435,7 +1443,6 @@ static int try_nonblocking_invalidate(struct inode *inode) | |||
1435 | */ | 1443 | */ |
1436 | void ceph_check_caps(struct ceph_inode_info *ci, int flags, | 1444 | void ceph_check_caps(struct ceph_inode_info *ci, int flags, |
1437 | struct ceph_mds_session *session) | 1445 | struct ceph_mds_session *session) |
1438 | __releases(session->s_mutex) | ||
1439 | { | 1446 | { |
1440 | struct ceph_client *client = ceph_inode_to_client(&ci->vfs_inode); | 1447 | struct ceph_client *client = ceph_inode_to_client(&ci->vfs_inode); |
1441 | struct ceph_mds_client *mdsc = &client->mdsc; | 1448 | struct ceph_mds_client *mdsc = &client->mdsc; |
@@ -1510,11 +1517,13 @@ retry_locked: | |||
1510 | ci->i_wrbuffer_ref == 0 && /* no dirty pages... */ | 1517 | ci->i_wrbuffer_ref == 0 && /* no dirty pages... */ |
1511 | ci->i_rdcache_gen && /* may have cached pages */ | 1518 | ci->i_rdcache_gen && /* may have cached pages */ |
1512 | (file_wanted == 0 || /* no open files */ | 1519 | (file_wanted == 0 || /* no open files */ |
1513 | (revoking & CEPH_CAP_FILE_CACHE)) && /* or revoking cache */ | 1520 | (revoking & (CEPH_CAP_FILE_CACHE| |
1521 | CEPH_CAP_FILE_LAZYIO))) && /* or revoking cache */ | ||
1514 | !tried_invalidate) { | 1522 | !tried_invalidate) { |
1515 | dout("check_caps trying to invalidate on %p\n", inode); | 1523 | dout("check_caps trying to invalidate on %p\n", inode); |
1516 | if (try_nonblocking_invalidate(inode) < 0) { | 1524 | if (try_nonblocking_invalidate(inode) < 0) { |
1517 | if (revoking & CEPH_CAP_FILE_CACHE) { | 1525 | if (revoking & (CEPH_CAP_FILE_CACHE| |
1526 | CEPH_CAP_FILE_LAZYIO)) { | ||
1518 | dout("check_caps queuing invalidate\n"); | 1527 | dout("check_caps queuing invalidate\n"); |
1519 | queue_invalidate = 1; | 1528 | queue_invalidate = 1; |
1520 | ci->i_rdcache_revoking = ci->i_rdcache_gen; | 1529 | ci->i_rdcache_revoking = ci->i_rdcache_gen; |
@@ -2250,8 +2259,7 @@ static void handle_cap_grant(struct inode *inode, struct ceph_mds_caps *grant, | |||
2250 | struct ceph_mds_session *session, | 2259 | struct ceph_mds_session *session, |
2251 | struct ceph_cap *cap, | 2260 | struct ceph_cap *cap, |
2252 | struct ceph_buffer *xattr_buf) | 2261 | struct ceph_buffer *xattr_buf) |
2253 | __releases(inode->i_lock) | 2262 | __releases(inode->i_lock) |
2254 | __releases(session->s_mutex) | ||
2255 | { | 2263 | { |
2256 | struct ceph_inode_info *ci = ceph_inode(inode); | 2264 | struct ceph_inode_info *ci = ceph_inode(inode); |
2257 | int mds = session->s_mds; | 2265 | int mds = session->s_mds; |
@@ -2278,6 +2286,7 @@ static void handle_cap_grant(struct inode *inode, struct ceph_mds_caps *grant, | |||
2278 | * will invalidate _after_ writeback.) | 2286 | * will invalidate _after_ writeback.) |
2279 | */ | 2287 | */ |
2280 | if (((cap->issued & ~newcaps) & CEPH_CAP_FILE_CACHE) && | 2288 | if (((cap->issued & ~newcaps) & CEPH_CAP_FILE_CACHE) && |
2289 | (newcaps & CEPH_CAP_FILE_LAZYIO) == 0 && | ||
2281 | !ci->i_wrbuffer_ref) { | 2290 | !ci->i_wrbuffer_ref) { |
2282 | if (try_nonblocking_invalidate(inode) == 0) { | 2291 | if (try_nonblocking_invalidate(inode) == 0) { |
2283 | revoked_rdcache = 1; | 2292 | revoked_rdcache = 1; |
@@ -2369,15 +2378,22 @@ static void handle_cap_grant(struct inode *inode, struct ceph_mds_caps *grant, | |||
2369 | 2378 | ||
2370 | /* revocation, grant, or no-op? */ | 2379 | /* revocation, grant, or no-op? */ |
2371 | if (cap->issued & ~newcaps) { | 2380 | if (cap->issued & ~newcaps) { |
2372 | dout("revocation: %s -> %s\n", ceph_cap_string(cap->issued), | 2381 | int revoking = cap->issued & ~newcaps; |
2373 | ceph_cap_string(newcaps)); | 2382 | |
2374 | if ((used & ~newcaps) & CEPH_CAP_FILE_BUFFER) | 2383 | dout("revocation: %s -> %s (revoking %s)\n", |
2375 | writeback = 1; /* will delay ack */ | 2384 | ceph_cap_string(cap->issued), |
2376 | else if (dirty & ~newcaps) | 2385 | ceph_cap_string(newcaps), |
2377 | check_caps = 1; /* initiate writeback in check_caps */ | 2386 | ceph_cap_string(revoking)); |
2378 | else if (((used & ~newcaps) & CEPH_CAP_FILE_CACHE) == 0 || | 2387 | if (revoking & used & CEPH_CAP_FILE_BUFFER) |
2379 | revoked_rdcache) | 2388 | writeback = 1; /* initiate writeback; will delay ack */ |
2380 | check_caps = 2; /* send revoke ack in check_caps */ | 2389 | else if (revoking == CEPH_CAP_FILE_CACHE && |
2390 | (newcaps & CEPH_CAP_FILE_LAZYIO) == 0 && | ||
2391 | queue_invalidate) | ||
2392 | ; /* do nothing yet, invalidation will be queued */ | ||
2393 | else if (cap == ci->i_auth_cap) | ||
2394 | check_caps = 1; /* check auth cap only */ | ||
2395 | else | ||
2396 | check_caps = 2; /* check all caps */ | ||
2381 | cap->issued = newcaps; | 2397 | cap->issued = newcaps; |
2382 | cap->implemented |= newcaps; | 2398 | cap->implemented |= newcaps; |
2383 | } else if (cap->issued == newcaps) { | 2399 | } else if (cap->issued == newcaps) { |
@@ -2568,7 +2584,8 @@ static void handle_cap_trunc(struct inode *inode, | |||
2568 | * caller holds s_mutex | 2584 | * caller holds s_mutex |
2569 | */ | 2585 | */ |
2570 | static void handle_cap_export(struct inode *inode, struct ceph_mds_caps *ex, | 2586 | static void handle_cap_export(struct inode *inode, struct ceph_mds_caps *ex, |
2571 | struct ceph_mds_session *session) | 2587 | struct ceph_mds_session *session, |
2588 | int *open_target_sessions) | ||
2572 | { | 2589 | { |
2573 | struct ceph_inode_info *ci = ceph_inode(inode); | 2590 | struct ceph_inode_info *ci = ceph_inode(inode); |
2574 | int mds = session->s_mds; | 2591 | int mds = session->s_mds; |
@@ -2600,6 +2617,12 @@ static void handle_cap_export(struct inode *inode, struct ceph_mds_caps *ex, | |||
2600 | ci->i_cap_exporting_mds = mds; | 2617 | ci->i_cap_exporting_mds = mds; |
2601 | ci->i_cap_exporting_mseq = mseq; | 2618 | ci->i_cap_exporting_mseq = mseq; |
2602 | ci->i_cap_exporting_issued = cap->issued; | 2619 | ci->i_cap_exporting_issued = cap->issued; |
2620 | |||
2621 | /* | ||
2622 | * make sure we have open sessions with all possible | ||
2623 | * export targets, so that we get the matching IMPORT | ||
2624 | */ | ||
2625 | *open_target_sessions = 1; | ||
2603 | } | 2626 | } |
2604 | __ceph_remove_cap(cap); | 2627 | __ceph_remove_cap(cap); |
2605 | } | 2628 | } |
@@ -2675,6 +2698,10 @@ void ceph_handle_caps(struct ceph_mds_session *session, | |||
2675 | u64 size, max_size; | 2698 | u64 size, max_size; |
2676 | u64 tid; | 2699 | u64 tid; |
2677 | void *snaptrace; | 2700 | void *snaptrace; |
2701 | size_t snaptrace_len; | ||
2702 | void *flock; | ||
2703 | u32 flock_len; | ||
2704 | int open_target_sessions = 0; | ||
2678 | 2705 | ||
2679 | dout("handle_caps from mds%d\n", mds); | 2706 | dout("handle_caps from mds%d\n", mds); |
2680 | 2707 | ||
@@ -2683,7 +2710,6 @@ void ceph_handle_caps(struct ceph_mds_session *session, | |||
2683 | if (msg->front.iov_len < sizeof(*h)) | 2710 | if (msg->front.iov_len < sizeof(*h)) |
2684 | goto bad; | 2711 | goto bad; |
2685 | h = msg->front.iov_base; | 2712 | h = msg->front.iov_base; |
2686 | snaptrace = h + 1; | ||
2687 | op = le32_to_cpu(h->op); | 2713 | op = le32_to_cpu(h->op); |
2688 | vino.ino = le64_to_cpu(h->ino); | 2714 | vino.ino = le64_to_cpu(h->ino); |
2689 | vino.snap = CEPH_NOSNAP; | 2715 | vino.snap = CEPH_NOSNAP; |
@@ -2693,6 +2719,21 @@ void ceph_handle_caps(struct ceph_mds_session *session, | |||
2693 | size = le64_to_cpu(h->size); | 2719 | size = le64_to_cpu(h->size); |
2694 | max_size = le64_to_cpu(h->max_size); | 2720 | max_size = le64_to_cpu(h->max_size); |
2695 | 2721 | ||
2722 | snaptrace = h + 1; | ||
2723 | snaptrace_len = le32_to_cpu(h->snap_trace_len); | ||
2724 | |||
2725 | if (le16_to_cpu(msg->hdr.version) >= 2) { | ||
2726 | void *p, *end; | ||
2727 | |||
2728 | p = snaptrace + snaptrace_len; | ||
2729 | end = msg->front.iov_base + msg->front.iov_len; | ||
2730 | ceph_decode_32_safe(&p, end, flock_len, bad); | ||
2731 | flock = p; | ||
2732 | } else { | ||
2733 | flock = NULL; | ||
2734 | flock_len = 0; | ||
2735 | } | ||
2736 | |||
2696 | mutex_lock(&session->s_mutex); | 2737 | mutex_lock(&session->s_mutex); |
2697 | session->s_seq++; | 2738 | session->s_seq++; |
2698 | dout(" mds%d seq %lld cap seq %u\n", session->s_mds, session->s_seq, | 2739 | dout(" mds%d seq %lld cap seq %u\n", session->s_mds, session->s_seq, |
@@ -2714,7 +2755,7 @@ void ceph_handle_caps(struct ceph_mds_session *session, | |||
2714 | * along for the mds (who clearly thinks we still have this | 2755 | * along for the mds (who clearly thinks we still have this |
2715 | * cap). | 2756 | * cap). |
2716 | */ | 2757 | */ |
2717 | ceph_add_cap_releases(mdsc, session, -1); | 2758 | ceph_add_cap_releases(mdsc, session); |
2718 | ceph_send_cap_releases(mdsc, session); | 2759 | ceph_send_cap_releases(mdsc, session); |
2719 | goto done; | 2760 | goto done; |
2720 | } | 2761 | } |
@@ -2726,12 +2767,12 @@ void ceph_handle_caps(struct ceph_mds_session *session, | |||
2726 | goto done; | 2767 | goto done; |
2727 | 2768 | ||
2728 | case CEPH_CAP_OP_EXPORT: | 2769 | case CEPH_CAP_OP_EXPORT: |
2729 | handle_cap_export(inode, h, session); | 2770 | handle_cap_export(inode, h, session, &open_target_sessions); |
2730 | goto done; | 2771 | goto done; |
2731 | 2772 | ||
2732 | case CEPH_CAP_OP_IMPORT: | 2773 | case CEPH_CAP_OP_IMPORT: |
2733 | handle_cap_import(mdsc, inode, h, session, | 2774 | handle_cap_import(mdsc, inode, h, session, |
2734 | snaptrace, le32_to_cpu(h->snap_trace_len)); | 2775 | snaptrace, snaptrace_len); |
2735 | ceph_check_caps(ceph_inode(inode), CHECK_CAPS_NODELAY, | 2776 | ceph_check_caps(ceph_inode(inode), CHECK_CAPS_NODELAY, |
2736 | session); | 2777 | session); |
2737 | goto done_unlocked; | 2778 | goto done_unlocked; |
@@ -2773,6 +2814,8 @@ done: | |||
2773 | done_unlocked: | 2814 | done_unlocked: |
2774 | if (inode) | 2815 | if (inode) |
2775 | iput(inode); | 2816 | iput(inode); |
2817 | if (open_target_sessions) | ||
2818 | ceph_mdsc_open_export_target_sessions(mdsc, session); | ||
2776 | return; | 2819 | return; |
2777 | 2820 | ||
2778 | bad: | 2821 | bad: |
diff --git a/fs/ceph/ceph_frag.h b/fs/ceph/ceph_frag.h index 793f50cb7c22..5babb8e95352 100644 --- a/fs/ceph/ceph_frag.h +++ b/fs/ceph/ceph_frag.h | |||
@@ -1,5 +1,5 @@ | |||
1 | #ifndef _FS_CEPH_FRAG_H | 1 | #ifndef FS_CEPH_FRAG_H |
2 | #define _FS_CEPH_FRAG_H | 2 | #define FS_CEPH_FRAG_H |
3 | 3 | ||
4 | /* | 4 | /* |
5 | * "Frags" are a way to describe a subset of a 32-bit number space, | 5 | * "Frags" are a way to describe a subset of a 32-bit number space, |
diff --git a/fs/ceph/ceph_fs.c b/fs/ceph/ceph_fs.c index 79d76bc4303f..3ac6cc7c1156 100644 --- a/fs/ceph/ceph_fs.c +++ b/fs/ceph/ceph_fs.c | |||
@@ -29,46 +29,44 @@ int ceph_file_layout_is_valid(const struct ceph_file_layout *layout) | |||
29 | 29 | ||
30 | int ceph_flags_to_mode(int flags) | 30 | int ceph_flags_to_mode(int flags) |
31 | { | 31 | { |
32 | int mode; | ||
33 | |||
32 | #ifdef O_DIRECTORY /* fixme */ | 34 | #ifdef O_DIRECTORY /* fixme */ |
33 | if ((flags & O_DIRECTORY) == O_DIRECTORY) | 35 | if ((flags & O_DIRECTORY) == O_DIRECTORY) |
34 | return CEPH_FILE_MODE_PIN; | 36 | return CEPH_FILE_MODE_PIN; |
35 | #endif | 37 | #endif |
38 | if ((flags & O_APPEND) == O_APPEND) | ||
39 | flags |= O_WRONLY; | ||
40 | |||
41 | if ((flags & O_ACCMODE) == O_RDWR) | ||
42 | mode = CEPH_FILE_MODE_RDWR; | ||
43 | else if ((flags & O_ACCMODE) == O_WRONLY) | ||
44 | mode = CEPH_FILE_MODE_WR; | ||
45 | else | ||
46 | mode = CEPH_FILE_MODE_RD; | ||
47 | |||
36 | #ifdef O_LAZY | 48 | #ifdef O_LAZY |
37 | if (flags & O_LAZY) | 49 | if (flags & O_LAZY) |
38 | return CEPH_FILE_MODE_LAZY; | 50 | mode |= CEPH_FILE_MODE_LAZY; |
39 | #endif | 51 | #endif |
40 | if ((flags & O_APPEND) == O_APPEND) | ||
41 | flags |= O_WRONLY; | ||
42 | 52 | ||
43 | flags &= O_ACCMODE; | 53 | return mode; |
44 | if ((flags & O_RDWR) == O_RDWR) | ||
45 | return CEPH_FILE_MODE_RDWR; | ||
46 | if ((flags & O_WRONLY) == O_WRONLY) | ||
47 | return CEPH_FILE_MODE_WR; | ||
48 | return CEPH_FILE_MODE_RD; | ||
49 | } | 54 | } |
50 | 55 | ||
51 | int ceph_caps_for_mode(int mode) | 56 | int ceph_caps_for_mode(int mode) |
52 | { | 57 | { |
53 | switch (mode) { | 58 | int caps = CEPH_CAP_PIN; |
54 | case CEPH_FILE_MODE_PIN: | 59 | |
55 | return CEPH_CAP_PIN; | 60 | if (mode & CEPH_FILE_MODE_RD) |
56 | case CEPH_FILE_MODE_RD: | 61 | caps |= CEPH_CAP_FILE_SHARED | |
57 | return CEPH_CAP_PIN | CEPH_CAP_FILE_SHARED | | ||
58 | CEPH_CAP_FILE_RD | CEPH_CAP_FILE_CACHE; | 62 | CEPH_CAP_FILE_RD | CEPH_CAP_FILE_CACHE; |
59 | case CEPH_FILE_MODE_RDWR: | 63 | if (mode & CEPH_FILE_MODE_WR) |
60 | return CEPH_CAP_PIN | CEPH_CAP_FILE_SHARED | | 64 | caps |= CEPH_CAP_FILE_EXCL | |
61 | CEPH_CAP_FILE_EXCL | | ||
62 | CEPH_CAP_FILE_RD | CEPH_CAP_FILE_CACHE | | ||
63 | CEPH_CAP_FILE_WR | CEPH_CAP_FILE_BUFFER | | ||
64 | CEPH_CAP_AUTH_SHARED | CEPH_CAP_AUTH_EXCL | | ||
65 | CEPH_CAP_XATTR_SHARED | CEPH_CAP_XATTR_EXCL; | ||
66 | case CEPH_FILE_MODE_WR: | ||
67 | return CEPH_CAP_PIN | CEPH_CAP_FILE_SHARED | | ||
68 | CEPH_CAP_FILE_EXCL | | ||
69 | CEPH_CAP_FILE_WR | CEPH_CAP_FILE_BUFFER | | 65 | CEPH_CAP_FILE_WR | CEPH_CAP_FILE_BUFFER | |
70 | CEPH_CAP_AUTH_SHARED | CEPH_CAP_AUTH_EXCL | | 66 | CEPH_CAP_AUTH_SHARED | CEPH_CAP_AUTH_EXCL | |
71 | CEPH_CAP_XATTR_SHARED | CEPH_CAP_XATTR_EXCL; | 67 | CEPH_CAP_XATTR_SHARED | CEPH_CAP_XATTR_EXCL; |
72 | } | 68 | if (mode & CEPH_FILE_MODE_LAZY) |
73 | return 0; | 69 | caps |= CEPH_CAP_FILE_LAZYIO; |
70 | |||
71 | return caps; | ||
74 | } | 72 | } |
diff --git a/fs/ceph/ceph_fs.h b/fs/ceph/ceph_fs.h index 2fa992eaf7da..d5619ac86711 100644 --- a/fs/ceph/ceph_fs.h +++ b/fs/ceph/ceph_fs.h | |||
@@ -9,27 +9,13 @@ | |||
9 | * LGPL2 | 9 | * LGPL2 |
10 | */ | 10 | */ |
11 | 11 | ||
12 | #ifndef _FS_CEPH_CEPH_FS_H | 12 | #ifndef CEPH_FS_H |
13 | #define _FS_CEPH_CEPH_FS_H | 13 | #define CEPH_FS_H |
14 | 14 | ||
15 | #include "msgr.h" | 15 | #include "msgr.h" |
16 | #include "rados.h" | 16 | #include "rados.h" |
17 | 17 | ||
18 | /* | 18 | /* |
19 | * Ceph release version | ||
20 | */ | ||
21 | #define CEPH_VERSION_MAJOR 0 | ||
22 | #define CEPH_VERSION_MINOR 20 | ||
23 | #define CEPH_VERSION_PATCH 0 | ||
24 | |||
25 | #define _CEPH_STRINGIFY(x) #x | ||
26 | #define CEPH_STRINGIFY(x) _CEPH_STRINGIFY(x) | ||
27 | #define CEPH_MAKE_VERSION(x, y, z) CEPH_STRINGIFY(x) "." CEPH_STRINGIFY(y) \ | ||
28 | "." CEPH_STRINGIFY(z) | ||
29 | #define CEPH_VERSION CEPH_MAKE_VERSION(CEPH_VERSION_MAJOR, \ | ||
30 | CEPH_VERSION_MINOR, CEPH_VERSION_PATCH) | ||
31 | |||
32 | /* | ||
33 | * subprotocol versions. when specific messages types or high-level | 19 | * subprotocol versions. when specific messages types or high-level |
34 | * protocols change, bump the affected components. we keep rev | 20 | * protocols change, bump the affected components. we keep rev |
35 | * internal cluster protocols separately from the public, | 21 | * internal cluster protocols separately from the public, |
@@ -53,18 +39,10 @@ | |||
53 | /* | 39 | /* |
54 | * feature bits | 40 | * feature bits |
55 | */ | 41 | */ |
56 | #define CEPH_FEATURE_UID 1 | 42 | #define CEPH_FEATURE_UID (1<<0) |
57 | #define CEPH_FEATURE_NOSRCADDR 2 | 43 | #define CEPH_FEATURE_NOSRCADDR (1<<1) |
58 | #define CEPH_FEATURE_FLOCK 4 | 44 | #define CEPH_FEATURE_MONCLOCKCHECK (1<<2) |
59 | 45 | #define CEPH_FEATURE_FLOCK (1<<3) | |
60 | #define CEPH_FEATURE_SUPPORTED_MON CEPH_FEATURE_UID|CEPH_FEATURE_NOSRCADDR | ||
61 | #define CEPH_FEATURE_REQUIRED_MON CEPH_FEATURE_UID | ||
62 | #define CEPH_FEATURE_SUPPORTED_MDS CEPH_FEATURE_UID|CEPH_FEATURE_NOSRCADDR|CEPH_FEATURE_FLOCK | ||
63 | #define CEPH_FEATURE_REQUIRED_MDS CEPH_FEATURE_UID | ||
64 | #define CEPH_FEATURE_SUPPORTED_OSD CEPH_FEATURE_UID|CEPH_FEATURE_NOSRCADDR | ||
65 | #define CEPH_FEATURE_REQUIRED_OSD CEPH_FEATURE_UID | ||
66 | #define CEPH_FEATURE_SUPPORTED_CLIENT CEPH_FEATURE_NOSRCADDR | ||
67 | #define CEPH_FEATURE_REQUIRED_CLIENT CEPH_FEATURE_NOSRCADDR | ||
68 | 46 | ||
69 | 47 | ||
70 | /* | 48 | /* |
@@ -96,6 +74,8 @@ int ceph_file_layout_is_valid(const struct ceph_file_layout *layout); | |||
96 | #define CEPH_CRYPTO_NONE 0x0 | 74 | #define CEPH_CRYPTO_NONE 0x0 |
97 | #define CEPH_CRYPTO_AES 0x1 | 75 | #define CEPH_CRYPTO_AES 0x1 |
98 | 76 | ||
77 | #define CEPH_AES_IV "cephsageyudagreg" | ||
78 | |||
99 | /* security/authentication protocols */ | 79 | /* security/authentication protocols */ |
100 | #define CEPH_AUTH_UNKNOWN 0x0 | 80 | #define CEPH_AUTH_UNKNOWN 0x0 |
101 | #define CEPH_AUTH_NONE 0x1 | 81 | #define CEPH_AUTH_NONE 0x1 |
@@ -275,6 +255,7 @@ extern const char *ceph_mds_state_name(int s); | |||
275 | #define CEPH_LOCK_IDFT 512 /* dir frag tree */ | 255 | #define CEPH_LOCK_IDFT 512 /* dir frag tree */ |
276 | #define CEPH_LOCK_INEST 1024 /* mds internal */ | 256 | #define CEPH_LOCK_INEST 1024 /* mds internal */ |
277 | #define CEPH_LOCK_IXATTR 2048 | 257 | #define CEPH_LOCK_IXATTR 2048 |
258 | #define CEPH_LOCK_IFLOCK 4096 /* advisory file locks */ | ||
278 | #define CEPH_LOCK_INO 8192 /* immutable inode bits; not a lock */ | 259 | #define CEPH_LOCK_INO 8192 /* immutable inode bits; not a lock */ |
279 | 260 | ||
280 | /* client_session ops */ | 261 | /* client_session ops */ |
@@ -316,6 +297,8 @@ enum { | |||
316 | CEPH_MDS_OP_RMXATTR = 0x01106, | 297 | CEPH_MDS_OP_RMXATTR = 0x01106, |
317 | CEPH_MDS_OP_SETLAYOUT = 0x01107, | 298 | CEPH_MDS_OP_SETLAYOUT = 0x01107, |
318 | CEPH_MDS_OP_SETATTR = 0x01108, | 299 | CEPH_MDS_OP_SETATTR = 0x01108, |
300 | CEPH_MDS_OP_SETFILELOCK= 0x01109, | ||
301 | CEPH_MDS_OP_GETFILELOCK= 0x00110, | ||
319 | 302 | ||
320 | CEPH_MDS_OP_MKNOD = 0x01201, | 303 | CEPH_MDS_OP_MKNOD = 0x01201, |
321 | CEPH_MDS_OP_LINK = 0x01202, | 304 | CEPH_MDS_OP_LINK = 0x01202, |
@@ -386,6 +369,15 @@ union ceph_mds_request_args { | |||
386 | struct { | 369 | struct { |
387 | struct ceph_file_layout layout; | 370 | struct ceph_file_layout layout; |
388 | } __attribute__ ((packed)) setlayout; | 371 | } __attribute__ ((packed)) setlayout; |
372 | struct { | ||
373 | __u8 rule; /* currently fcntl or flock */ | ||
374 | __u8 type; /* shared, exclusive, remove*/ | ||
375 | __le64 pid; /* process id requesting the lock */ | ||
376 | __le64 pid_namespace; | ||
377 | __le64 start; /* initial location to lock */ | ||
378 | __le64 length; /* num bytes to lock from start */ | ||
379 | __u8 wait; /* will caller wait for lock to become available? */ | ||
380 | } __attribute__ ((packed)) filelock_change; | ||
389 | } __attribute__ ((packed)); | 381 | } __attribute__ ((packed)); |
390 | 382 | ||
391 | #define CEPH_MDS_FLAG_REPLAY 1 /* this is a replayed op */ | 383 | #define CEPH_MDS_FLAG_REPLAY 1 /* this is a replayed op */ |
@@ -480,6 +472,23 @@ struct ceph_mds_reply_dirfrag { | |||
480 | __le32 dist[]; | 472 | __le32 dist[]; |
481 | } __attribute__ ((packed)); | 473 | } __attribute__ ((packed)); |
482 | 474 | ||
475 | #define CEPH_LOCK_FCNTL 1 | ||
476 | #define CEPH_LOCK_FLOCK 2 | ||
477 | |||
478 | #define CEPH_LOCK_SHARED 1 | ||
479 | #define CEPH_LOCK_EXCL 2 | ||
480 | #define CEPH_LOCK_UNLOCK 4 | ||
481 | |||
482 | struct ceph_filelock { | ||
483 | __le64 start;/* file offset to start lock at */ | ||
484 | __le64 length; /* num bytes to lock; 0 for all following start */ | ||
485 | __le64 client; /* which client holds the lock */ | ||
486 | __le64 pid; /* process id holding the lock on the client */ | ||
487 | __le64 pid_namespace; | ||
488 | __u8 type; /* shared lock, exclusive lock, or unlock */ | ||
489 | } __attribute__ ((packed)); | ||
490 | |||
491 | |||
483 | /* file access modes */ | 492 | /* file access modes */ |
484 | #define CEPH_FILE_MODE_PIN 0 | 493 | #define CEPH_FILE_MODE_PIN 0 |
485 | #define CEPH_FILE_MODE_RD 1 | 494 | #define CEPH_FILE_MODE_RD 1 |
@@ -508,9 +517,10 @@ int ceph_flags_to_mode(int flags); | |||
508 | #define CEPH_CAP_SAUTH 2 | 517 | #define CEPH_CAP_SAUTH 2 |
509 | #define CEPH_CAP_SLINK 4 | 518 | #define CEPH_CAP_SLINK 4 |
510 | #define CEPH_CAP_SXATTR 6 | 519 | #define CEPH_CAP_SXATTR 6 |
511 | #define CEPH_CAP_SFILE 8 /* goes at the end (uses >2 cap bits) */ | 520 | #define CEPH_CAP_SFILE 8 |
521 | #define CEPH_CAP_SFLOCK 20 | ||
512 | 522 | ||
513 | #define CEPH_CAP_BITS 16 | 523 | #define CEPH_CAP_BITS 22 |
514 | 524 | ||
515 | /* composed values */ | 525 | /* composed values */ |
516 | #define CEPH_CAP_AUTH_SHARED (CEPH_CAP_GSHARED << CEPH_CAP_SAUTH) | 526 | #define CEPH_CAP_AUTH_SHARED (CEPH_CAP_GSHARED << CEPH_CAP_SAUTH) |
@@ -528,6 +538,9 @@ int ceph_flags_to_mode(int flags); | |||
528 | #define CEPH_CAP_FILE_BUFFER (CEPH_CAP_GBUFFER << CEPH_CAP_SFILE) | 538 | #define CEPH_CAP_FILE_BUFFER (CEPH_CAP_GBUFFER << CEPH_CAP_SFILE) |
529 | #define CEPH_CAP_FILE_WREXTEND (CEPH_CAP_GWREXTEND << CEPH_CAP_SFILE) | 539 | #define CEPH_CAP_FILE_WREXTEND (CEPH_CAP_GWREXTEND << CEPH_CAP_SFILE) |
530 | #define CEPH_CAP_FILE_LAZYIO (CEPH_CAP_GLAZYIO << CEPH_CAP_SFILE) | 540 | #define CEPH_CAP_FILE_LAZYIO (CEPH_CAP_GLAZYIO << CEPH_CAP_SFILE) |
541 | #define CEPH_CAP_FLOCK_SHARED (CEPH_CAP_GSHARED << CEPH_CAP_SFLOCK) | ||
542 | #define CEPH_CAP_FLOCK_EXCL (CEPH_CAP_GEXCL << CEPH_CAP_SFLOCK) | ||
543 | |||
531 | 544 | ||
532 | /* cap masks (for getattr) */ | 545 | /* cap masks (for getattr) */ |
533 | #define CEPH_STAT_CAP_INODE CEPH_CAP_PIN | 546 | #define CEPH_STAT_CAP_INODE CEPH_CAP_PIN |
@@ -563,7 +576,8 @@ int ceph_flags_to_mode(int flags); | |||
563 | CEPH_CAP_FILE_EXCL) | 576 | CEPH_CAP_FILE_EXCL) |
564 | #define CEPH_CAP_ANY_WR (CEPH_CAP_ANY_EXCL | CEPH_CAP_ANY_FILE_WR) | 577 | #define CEPH_CAP_ANY_WR (CEPH_CAP_ANY_EXCL | CEPH_CAP_ANY_FILE_WR) |
565 | #define CEPH_CAP_ANY (CEPH_CAP_ANY_RD | CEPH_CAP_ANY_EXCL | \ | 578 | #define CEPH_CAP_ANY (CEPH_CAP_ANY_RD | CEPH_CAP_ANY_EXCL | \ |
566 | CEPH_CAP_ANY_FILE_WR | CEPH_CAP_PIN) | 579 | CEPH_CAP_ANY_FILE_WR | CEPH_CAP_FILE_LAZYIO | \ |
580 | CEPH_CAP_PIN) | ||
567 | 581 | ||
568 | #define CEPH_CAP_LOCKS (CEPH_LOCK_IFILE | CEPH_LOCK_IAUTH | CEPH_LOCK_ILINK | \ | 582 | #define CEPH_CAP_LOCKS (CEPH_LOCK_IFILE | CEPH_LOCK_IAUTH | CEPH_LOCK_ILINK | \ |
569 | CEPH_LOCK_IXATTR) | 583 | CEPH_LOCK_IXATTR) |
@@ -653,12 +667,21 @@ struct ceph_mds_cap_reconnect { | |||
653 | __le64 cap_id; | 667 | __le64 cap_id; |
654 | __le32 wanted; | 668 | __le32 wanted; |
655 | __le32 issued; | 669 | __le32 issued; |
670 | __le64 snaprealm; | ||
671 | __le64 pathbase; /* base ino for our path to this ino */ | ||
672 | __le32 flock_len; /* size of flock state blob, if any */ | ||
673 | } __attribute__ ((packed)); | ||
674 | /* followed by flock blob */ | ||
675 | |||
676 | struct ceph_mds_cap_reconnect_v1 { | ||
677 | __le64 cap_id; | ||
678 | __le32 wanted; | ||
679 | __le32 issued; | ||
656 | __le64 size; | 680 | __le64 size; |
657 | struct ceph_timespec mtime, atime; | 681 | struct ceph_timespec mtime, atime; |
658 | __le64 snaprealm; | 682 | __le64 snaprealm; |
659 | __le64 pathbase; /* base ino for our path to this ino */ | 683 | __le64 pathbase; /* base ino for our path to this ino */ |
660 | } __attribute__ ((packed)); | 684 | } __attribute__ ((packed)); |
661 | /* followed by encoded string */ | ||
662 | 685 | ||
663 | struct ceph_mds_snaprealm_reconnect { | 686 | struct ceph_mds_snaprealm_reconnect { |
664 | __le64 ino; /* snap realm base */ | 687 | __le64 ino; /* snap realm base */ |
diff --git a/fs/ceph/ceph_hash.h b/fs/ceph/ceph_hash.h index 5ac470c433c9..d099c3f90236 100644 --- a/fs/ceph/ceph_hash.h +++ b/fs/ceph/ceph_hash.h | |||
@@ -1,5 +1,5 @@ | |||
1 | #ifndef _FS_CEPH_HASH_H | 1 | #ifndef FS_CEPH_HASH_H |
2 | #define _FS_CEPH_HASH_H | 2 | #define FS_CEPH_HASH_H |
3 | 3 | ||
4 | #define CEPH_STR_HASH_LINUX 0x1 /* linux dcache hash */ | 4 | #define CEPH_STR_HASH_LINUX 0x1 /* linux dcache hash */ |
5 | #define CEPH_STR_HASH_RJENKINS 0x2 /* robert jenkins' */ | 5 | #define CEPH_STR_HASH_RJENKINS 0x2 /* robert jenkins' */ |
diff --git a/fs/ceph/ceph_strings.c b/fs/ceph/ceph_strings.c index 7503aee828ce..c6179d3a26a2 100644 --- a/fs/ceph/ceph_strings.c +++ b/fs/ceph/ceph_strings.c | |||
@@ -28,6 +28,7 @@ const char *ceph_osd_op_name(int op) | |||
28 | case CEPH_OSD_OP_TRUNCATE: return "truncate"; | 28 | case CEPH_OSD_OP_TRUNCATE: return "truncate"; |
29 | case CEPH_OSD_OP_ZERO: return "zero"; | 29 | case CEPH_OSD_OP_ZERO: return "zero"; |
30 | case CEPH_OSD_OP_WRITEFULL: return "writefull"; | 30 | case CEPH_OSD_OP_WRITEFULL: return "writefull"; |
31 | case CEPH_OSD_OP_ROLLBACK: return "rollback"; | ||
31 | 32 | ||
32 | case CEPH_OSD_OP_APPEND: return "append"; | 33 | case CEPH_OSD_OP_APPEND: return "append"; |
33 | case CEPH_OSD_OP_STARTSYNC: return "startsync"; | 34 | case CEPH_OSD_OP_STARTSYNC: return "startsync"; |
@@ -129,6 +130,8 @@ const char *ceph_mds_op_name(int op) | |||
129 | case CEPH_MDS_OP_LSSNAP: return "lssnap"; | 130 | case CEPH_MDS_OP_LSSNAP: return "lssnap"; |
130 | case CEPH_MDS_OP_MKSNAP: return "mksnap"; | 131 | case CEPH_MDS_OP_MKSNAP: return "mksnap"; |
131 | case CEPH_MDS_OP_RMSNAP: return "rmsnap"; | 132 | case CEPH_MDS_OP_RMSNAP: return "rmsnap"; |
133 | case CEPH_MDS_OP_SETFILELOCK: return "setfilelock"; | ||
134 | case CEPH_MDS_OP_GETFILELOCK: return "getfilelock"; | ||
132 | } | 135 | } |
133 | return "???"; | 136 | return "???"; |
134 | } | 137 | } |
diff --git a/fs/ceph/crush/crush.h b/fs/ceph/crush/crush.h index dcd7e7523700..97e435b191f4 100644 --- a/fs/ceph/crush/crush.h +++ b/fs/ceph/crush/crush.h | |||
@@ -1,5 +1,5 @@ | |||
1 | #ifndef _CRUSH_CRUSH_H | 1 | #ifndef CEPH_CRUSH_CRUSH_H |
2 | #define _CRUSH_CRUSH_H | 2 | #define CEPH_CRUSH_CRUSH_H |
3 | 3 | ||
4 | #include <linux/types.h> | 4 | #include <linux/types.h> |
5 | 5 | ||
diff --git a/fs/ceph/crush/hash.h b/fs/ceph/crush/hash.h index ff48e110e4bb..91e884230d5d 100644 --- a/fs/ceph/crush/hash.h +++ b/fs/ceph/crush/hash.h | |||
@@ -1,5 +1,5 @@ | |||
1 | #ifndef _CRUSH_HASH_H | 1 | #ifndef CEPH_CRUSH_HASH_H |
2 | #define _CRUSH_HASH_H | 2 | #define CEPH_CRUSH_HASH_H |
3 | 3 | ||
4 | #define CRUSH_HASH_RJENKINS1 0 | 4 | #define CRUSH_HASH_RJENKINS1 0 |
5 | 5 | ||
diff --git a/fs/ceph/crush/mapper.h b/fs/ceph/crush/mapper.h index 98e90046fd9f..c46b99c18bb0 100644 --- a/fs/ceph/crush/mapper.h +++ b/fs/ceph/crush/mapper.h | |||
@@ -1,5 +1,5 @@ | |||
1 | #ifndef _CRUSH_MAPPER_H | 1 | #ifndef CEPH_CRUSH_MAPPER_H |
2 | #define _CRUSH_MAPPER_H | 2 | #define CEPH_CRUSH_MAPPER_H |
3 | 3 | ||
4 | /* | 4 | /* |
5 | * CRUSH functions for find rules and then mapping an input to an | 5 | * CRUSH functions for find rules and then mapping an input to an |
diff --git a/fs/ceph/crypto.c b/fs/ceph/crypto.c index f704b3b62424..a3e627f63293 100644 --- a/fs/ceph/crypto.c +++ b/fs/ceph/crypto.c | |||
@@ -75,10 +75,11 @@ static struct crypto_blkcipher *ceph_crypto_alloc_cipher(void) | |||
75 | return crypto_alloc_blkcipher("cbc(aes)", 0, CRYPTO_ALG_ASYNC); | 75 | return crypto_alloc_blkcipher("cbc(aes)", 0, CRYPTO_ALG_ASYNC); |
76 | } | 76 | } |
77 | 77 | ||
78 | const u8 *aes_iv = "cephsageyudagreg"; | 78 | static const u8 *aes_iv = (u8 *)CEPH_AES_IV; |
79 | 79 | ||
80 | int ceph_aes_encrypt(const void *key, int key_len, void *dst, size_t *dst_len, | 80 | static int ceph_aes_encrypt(const void *key, int key_len, |
81 | const void *src, size_t src_len) | 81 | void *dst, size_t *dst_len, |
82 | const void *src, size_t src_len) | ||
82 | { | 83 | { |
83 | struct scatterlist sg_in[2], sg_out[1]; | 84 | struct scatterlist sg_in[2], sg_out[1]; |
84 | struct crypto_blkcipher *tfm = ceph_crypto_alloc_cipher(); | 85 | struct crypto_blkcipher *tfm = ceph_crypto_alloc_cipher(); |
@@ -126,9 +127,10 @@ int ceph_aes_encrypt(const void *key, int key_len, void *dst, size_t *dst_len, | |||
126 | return 0; | 127 | return 0; |
127 | } | 128 | } |
128 | 129 | ||
129 | int ceph_aes_encrypt2(const void *key, int key_len, void *dst, size_t *dst_len, | 130 | static int ceph_aes_encrypt2(const void *key, int key_len, void *dst, |
130 | const void *src1, size_t src1_len, | 131 | size_t *dst_len, |
131 | const void *src2, size_t src2_len) | 132 | const void *src1, size_t src1_len, |
133 | const void *src2, size_t src2_len) | ||
132 | { | 134 | { |
133 | struct scatterlist sg_in[3], sg_out[1]; | 135 | struct scatterlist sg_in[3], sg_out[1]; |
134 | struct crypto_blkcipher *tfm = ceph_crypto_alloc_cipher(); | 136 | struct crypto_blkcipher *tfm = ceph_crypto_alloc_cipher(); |
@@ -179,8 +181,9 @@ int ceph_aes_encrypt2(const void *key, int key_len, void *dst, size_t *dst_len, | |||
179 | return 0; | 181 | return 0; |
180 | } | 182 | } |
181 | 183 | ||
182 | int ceph_aes_decrypt(const void *key, int key_len, void *dst, size_t *dst_len, | 184 | static int ceph_aes_decrypt(const void *key, int key_len, |
183 | const void *src, size_t src_len) | 185 | void *dst, size_t *dst_len, |
186 | const void *src, size_t src_len) | ||
184 | { | 187 | { |
185 | struct scatterlist sg_in[1], sg_out[2]; | 188 | struct scatterlist sg_in[1], sg_out[2]; |
186 | struct crypto_blkcipher *tfm = ceph_crypto_alloc_cipher(); | 189 | struct crypto_blkcipher *tfm = ceph_crypto_alloc_cipher(); |
@@ -238,10 +241,10 @@ int ceph_aes_decrypt(const void *key, int key_len, void *dst, size_t *dst_len, | |||
238 | return 0; | 241 | return 0; |
239 | } | 242 | } |
240 | 243 | ||
241 | int ceph_aes_decrypt2(const void *key, int key_len, | 244 | static int ceph_aes_decrypt2(const void *key, int key_len, |
242 | void *dst1, size_t *dst1_len, | 245 | void *dst1, size_t *dst1_len, |
243 | void *dst2, size_t *dst2_len, | 246 | void *dst2, size_t *dst2_len, |
244 | const void *src, size_t src_len) | 247 | const void *src, size_t src_len) |
245 | { | 248 | { |
246 | struct scatterlist sg_in[1], sg_out[3]; | 249 | struct scatterlist sg_in[1], sg_out[3]; |
247 | struct crypto_blkcipher *tfm = ceph_crypto_alloc_cipher(); | 250 | struct crypto_blkcipher *tfm = ceph_crypto_alloc_cipher(); |
diff --git a/fs/ceph/crypto.h b/fs/ceph/crypto.h index 40b502e6bd89..bdf38607323c 100644 --- a/fs/ceph/crypto.h +++ b/fs/ceph/crypto.h | |||
@@ -42,7 +42,7 @@ extern int ceph_encrypt2(struct ceph_crypto_key *secret, | |||
42 | const void *src2, size_t src2_len); | 42 | const void *src2, size_t src2_len); |
43 | 43 | ||
44 | /* armor.c */ | 44 | /* armor.c */ |
45 | extern int ceph_armor(char *dst, const void *src, const void *end); | 45 | extern int ceph_armor(char *dst, const char *src, const char *end); |
46 | extern int ceph_unarmor(void *dst, const char *src, const char *end); | 46 | extern int ceph_unarmor(char *dst, const char *src, const char *end); |
47 | 47 | ||
48 | #endif | 48 | #endif |
diff --git a/fs/ceph/debugfs.c b/fs/ceph/debugfs.c index f2f5332ddbba..360c4f22718d 100644 --- a/fs/ceph/debugfs.c +++ b/fs/ceph/debugfs.c | |||
@@ -291,7 +291,7 @@ static int dentry_lru_show(struct seq_file *s, void *ptr) | |||
291 | return 0; | 291 | return 0; |
292 | } | 292 | } |
293 | 293 | ||
294 | #define DEFINE_SHOW_FUNC(name) \ | 294 | #define DEFINE_SHOW_FUNC(name) \ |
295 | static int name##_open(struct inode *inode, struct file *file) \ | 295 | static int name##_open(struct inode *inode, struct file *file) \ |
296 | { \ | 296 | { \ |
297 | struct seq_file *sf; \ | 297 | struct seq_file *sf; \ |
@@ -361,8 +361,8 @@ int ceph_debugfs_client_init(struct ceph_client *client) | |||
361 | int ret = 0; | 361 | int ret = 0; |
362 | char name[80]; | 362 | char name[80]; |
363 | 363 | ||
364 | snprintf(name, sizeof(name), FSID_FORMAT ".client%lld", | 364 | snprintf(name, sizeof(name), "%pU.client%lld", &client->fsid, |
365 | PR_FSID(&client->fsid), client->monc.auth->global_id); | 365 | client->monc.auth->global_id); |
366 | 366 | ||
367 | client->debugfs_dir = debugfs_create_dir(name, ceph_debugfs_dir); | 367 | client->debugfs_dir = debugfs_create_dir(name, ceph_debugfs_dir); |
368 | if (!client->debugfs_dir) | 368 | if (!client->debugfs_dir) |
@@ -432,11 +432,12 @@ int ceph_debugfs_client_init(struct ceph_client *client) | |||
432 | if (!client->debugfs_caps) | 432 | if (!client->debugfs_caps) |
433 | goto out; | 433 | goto out; |
434 | 434 | ||
435 | client->debugfs_congestion_kb = debugfs_create_file("writeback_congestion_kb", | 435 | client->debugfs_congestion_kb = |
436 | 0600, | 436 | debugfs_create_file("writeback_congestion_kb", |
437 | client->debugfs_dir, | 437 | 0600, |
438 | client, | 438 | client->debugfs_dir, |
439 | &congestion_kb_fops); | 439 | client, |
440 | &congestion_kb_fops); | ||
440 | if (!client->debugfs_congestion_kb) | 441 | if (!client->debugfs_congestion_kb) |
441 | goto out; | 442 | goto out; |
442 | 443 | ||
@@ -466,7 +467,7 @@ void ceph_debugfs_client_cleanup(struct ceph_client *client) | |||
466 | debugfs_remove(client->debugfs_dir); | 467 | debugfs_remove(client->debugfs_dir); |
467 | } | 468 | } |
468 | 469 | ||
469 | #else // CONFIG_DEBUG_FS | 470 | #else /* CONFIG_DEBUG_FS */ |
470 | 471 | ||
471 | int __init ceph_debugfs_init(void) | 472 | int __init ceph_debugfs_init(void) |
472 | { | 473 | { |
@@ -486,4 +487,4 @@ void ceph_debugfs_client_cleanup(struct ceph_client *client) | |||
486 | { | 487 | { |
487 | } | 488 | } |
488 | 489 | ||
489 | #endif // CONFIG_DEBUG_FS | 490 | #endif /* CONFIG_DEBUG_FS */ |
diff --git a/fs/ceph/decode.h b/fs/ceph/decode.h index 65b3e022eaf5..3d25415afe63 100644 --- a/fs/ceph/decode.h +++ b/fs/ceph/decode.h | |||
@@ -99,11 +99,13 @@ static inline void ceph_encode_timespec(struct ceph_timespec *tv, | |||
99 | */ | 99 | */ |
100 | static inline void ceph_encode_addr(struct ceph_entity_addr *a) | 100 | static inline void ceph_encode_addr(struct ceph_entity_addr *a) |
101 | { | 101 | { |
102 | a->in_addr.ss_family = htons(a->in_addr.ss_family); | 102 | __be16 ss_family = htons(a->in_addr.ss_family); |
103 | a->in_addr.ss_family = *(__u16 *)&ss_family; | ||
103 | } | 104 | } |
104 | static inline void ceph_decode_addr(struct ceph_entity_addr *a) | 105 | static inline void ceph_decode_addr(struct ceph_entity_addr *a) |
105 | { | 106 | { |
106 | a->in_addr.ss_family = ntohs(a->in_addr.ss_family); | 107 | __be16 ss_family = *(__be16 *)&a->in_addr.ss_family; |
108 | a->in_addr.ss_family = ntohs(ss_family); | ||
107 | WARN_ON(a->in_addr.ss_family == 512); | 109 | WARN_ON(a->in_addr.ss_family == 512); |
108 | } | 110 | } |
109 | 111 | ||
diff --git a/fs/ceph/dir.c b/fs/ceph/dir.c index f94ed3c7f6a5..67bbb41d5526 100644 --- a/fs/ceph/dir.c +++ b/fs/ceph/dir.c | |||
@@ -27,7 +27,7 @@ | |||
27 | 27 | ||
28 | const struct inode_operations ceph_dir_iops; | 28 | const struct inode_operations ceph_dir_iops; |
29 | const struct file_operations ceph_dir_fops; | 29 | const struct file_operations ceph_dir_fops; |
30 | struct dentry_operations ceph_dentry_ops; | 30 | const struct dentry_operations ceph_dentry_ops; |
31 | 31 | ||
32 | /* | 32 | /* |
33 | * Initialize ceph dentry state. | 33 | * Initialize ceph dentry state. |
@@ -94,6 +94,8 @@ static unsigned fpos_off(loff_t p) | |||
94 | */ | 94 | */ |
95 | static int __dcache_readdir(struct file *filp, | 95 | static int __dcache_readdir(struct file *filp, |
96 | void *dirent, filldir_t filldir) | 96 | void *dirent, filldir_t filldir) |
97 | __releases(inode->i_lock) | ||
98 | __acquires(inode->i_lock) | ||
97 | { | 99 | { |
98 | struct inode *inode = filp->f_dentry->d_inode; | 100 | struct inode *inode = filp->f_dentry->d_inode; |
99 | struct ceph_file_info *fi = filp->private_data; | 101 | struct ceph_file_info *fi = filp->private_data; |
@@ -1239,16 +1241,16 @@ const struct inode_operations ceph_dir_iops = { | |||
1239 | .create = ceph_create, | 1241 | .create = ceph_create, |
1240 | }; | 1242 | }; |
1241 | 1243 | ||
1242 | struct dentry_operations ceph_dentry_ops = { | 1244 | const struct dentry_operations ceph_dentry_ops = { |
1243 | .d_revalidate = ceph_d_revalidate, | 1245 | .d_revalidate = ceph_d_revalidate, |
1244 | .d_release = ceph_dentry_release, | 1246 | .d_release = ceph_dentry_release, |
1245 | }; | 1247 | }; |
1246 | 1248 | ||
1247 | struct dentry_operations ceph_snapdir_dentry_ops = { | 1249 | const struct dentry_operations ceph_snapdir_dentry_ops = { |
1248 | .d_revalidate = ceph_snapdir_d_revalidate, | 1250 | .d_revalidate = ceph_snapdir_d_revalidate, |
1249 | .d_release = ceph_dentry_release, | 1251 | .d_release = ceph_dentry_release, |
1250 | }; | 1252 | }; |
1251 | 1253 | ||
1252 | struct dentry_operations ceph_snap_dentry_ops = { | 1254 | const struct dentry_operations ceph_snap_dentry_ops = { |
1253 | .d_release = ceph_dentry_release, | 1255 | .d_release = ceph_dentry_release, |
1254 | }; | 1256 | }; |
diff --git a/fs/ceph/file.c b/fs/ceph/file.c index 7c08698fad3e..8c044a4f0457 100644 --- a/fs/ceph/file.c +++ b/fs/ceph/file.c | |||
@@ -317,7 +317,7 @@ void ceph_release_page_vector(struct page **pages, int num_pages) | |||
317 | /* | 317 | /* |
318 | * allocate a vector new pages | 318 | * allocate a vector new pages |
319 | */ | 319 | */ |
320 | struct page **ceph_alloc_page_vector(int num_pages, gfp_t flags) | 320 | static struct page **ceph_alloc_page_vector(int num_pages, gfp_t flags) |
321 | { | 321 | { |
322 | struct page **pages; | 322 | struct page **pages; |
323 | int i; | 323 | int i; |
@@ -665,7 +665,7 @@ more: | |||
665 | * throw out any page cache pages in this range. this | 665 | * throw out any page cache pages in this range. this |
666 | * may block. | 666 | * may block. |
667 | */ | 667 | */ |
668 | truncate_inode_pages_range(inode->i_mapping, pos, | 668 | truncate_inode_pages_range(inode->i_mapping, pos, |
669 | (pos+len) | (PAGE_CACHE_SIZE-1)); | 669 | (pos+len) | (PAGE_CACHE_SIZE-1)); |
670 | } else { | 670 | } else { |
671 | pages = ceph_alloc_page_vector(num_pages, GFP_NOFS); | 671 | pages = ceph_alloc_page_vector(num_pages, GFP_NOFS); |
@@ -740,28 +740,32 @@ static ssize_t ceph_aio_read(struct kiocb *iocb, const struct iovec *iov, | |||
740 | unsigned long nr_segs, loff_t pos) | 740 | unsigned long nr_segs, loff_t pos) |
741 | { | 741 | { |
742 | struct file *filp = iocb->ki_filp; | 742 | struct file *filp = iocb->ki_filp; |
743 | struct ceph_file_info *fi = filp->private_data; | ||
743 | loff_t *ppos = &iocb->ki_pos; | 744 | loff_t *ppos = &iocb->ki_pos; |
744 | size_t len = iov->iov_len; | 745 | size_t len = iov->iov_len; |
745 | struct inode *inode = filp->f_dentry->d_inode; | 746 | struct inode *inode = filp->f_dentry->d_inode; |
746 | struct ceph_inode_info *ci = ceph_inode(inode); | 747 | struct ceph_inode_info *ci = ceph_inode(inode); |
747 | void *base = iov->iov_base; | 748 | void __user *base = iov->iov_base; |
748 | ssize_t ret; | 749 | ssize_t ret; |
749 | int got = 0; | 750 | int want, got = 0; |
750 | int checkeof = 0, read = 0; | 751 | int checkeof = 0, read = 0; |
751 | 752 | ||
752 | dout("aio_read %p %llx.%llx %llu~%u trying to get caps on %p\n", | 753 | dout("aio_read %p %llx.%llx %llu~%u trying to get caps on %p\n", |
753 | inode, ceph_vinop(inode), pos, (unsigned)len, inode); | 754 | inode, ceph_vinop(inode), pos, (unsigned)len, inode); |
754 | again: | 755 | again: |
755 | __ceph_do_pending_vmtruncate(inode); | 756 | __ceph_do_pending_vmtruncate(inode); |
756 | ret = ceph_get_caps(ci, CEPH_CAP_FILE_RD, CEPH_CAP_FILE_CACHE, | 757 | if (fi->fmode & CEPH_FILE_MODE_LAZY) |
757 | &got, -1); | 758 | want = CEPH_CAP_FILE_CACHE | CEPH_CAP_FILE_LAZYIO; |
759 | else | ||
760 | want = CEPH_CAP_FILE_CACHE; | ||
761 | ret = ceph_get_caps(ci, CEPH_CAP_FILE_RD, want, &got, -1); | ||
758 | if (ret < 0) | 762 | if (ret < 0) |
759 | goto out; | 763 | goto out; |
760 | dout("aio_read %p %llx.%llx %llu~%u got cap refs on %s\n", | 764 | dout("aio_read %p %llx.%llx %llu~%u got cap refs on %s\n", |
761 | inode, ceph_vinop(inode), pos, (unsigned)len, | 765 | inode, ceph_vinop(inode), pos, (unsigned)len, |
762 | ceph_cap_string(got)); | 766 | ceph_cap_string(got)); |
763 | 767 | ||
764 | if ((got & CEPH_CAP_FILE_CACHE) == 0 || | 768 | if ((got & (CEPH_CAP_FILE_CACHE|CEPH_CAP_FILE_LAZYIO)) == 0 || |
765 | (iocb->ki_filp->f_flags & O_DIRECT) || | 769 | (iocb->ki_filp->f_flags & O_DIRECT) || |
766 | (inode->i_sb->s_flags & MS_SYNCHRONOUS)) | 770 | (inode->i_sb->s_flags & MS_SYNCHRONOUS)) |
767 | /* hmm, this isn't really async... */ | 771 | /* hmm, this isn't really async... */ |
@@ -807,11 +811,12 @@ static ssize_t ceph_aio_write(struct kiocb *iocb, const struct iovec *iov, | |||
807 | unsigned long nr_segs, loff_t pos) | 811 | unsigned long nr_segs, loff_t pos) |
808 | { | 812 | { |
809 | struct file *file = iocb->ki_filp; | 813 | struct file *file = iocb->ki_filp; |
814 | struct ceph_file_info *fi = file->private_data; | ||
810 | struct inode *inode = file->f_dentry->d_inode; | 815 | struct inode *inode = file->f_dentry->d_inode; |
811 | struct ceph_inode_info *ci = ceph_inode(inode); | 816 | struct ceph_inode_info *ci = ceph_inode(inode); |
812 | struct ceph_osd_client *osdc = &ceph_sb_to_client(inode->i_sb)->osdc; | 817 | struct ceph_osd_client *osdc = &ceph_sb_to_client(inode->i_sb)->osdc; |
813 | loff_t endoff = pos + iov->iov_len; | 818 | loff_t endoff = pos + iov->iov_len; |
814 | int got = 0; | 819 | int want, got = 0; |
815 | int ret, err; | 820 | int ret, err; |
816 | 821 | ||
817 | if (ceph_snap(inode) != CEPH_NOSNAP) | 822 | if (ceph_snap(inode) != CEPH_NOSNAP) |
@@ -824,8 +829,11 @@ retry_snap: | |||
824 | dout("aio_write %p %llx.%llx %llu~%u getting caps. i_size %llu\n", | 829 | dout("aio_write %p %llx.%llx %llu~%u getting caps. i_size %llu\n", |
825 | inode, ceph_vinop(inode), pos, (unsigned)iov->iov_len, | 830 | inode, ceph_vinop(inode), pos, (unsigned)iov->iov_len, |
826 | inode->i_size); | 831 | inode->i_size); |
827 | ret = ceph_get_caps(ci, CEPH_CAP_FILE_WR, CEPH_CAP_FILE_BUFFER, | 832 | if (fi->fmode & CEPH_FILE_MODE_LAZY) |
828 | &got, endoff); | 833 | want = CEPH_CAP_FILE_BUFFER | CEPH_CAP_FILE_LAZYIO; |
834 | else | ||
835 | want = CEPH_CAP_FILE_BUFFER; | ||
836 | ret = ceph_get_caps(ci, CEPH_CAP_FILE_WR, want, &got, endoff); | ||
829 | if (ret < 0) | 837 | if (ret < 0) |
830 | goto out; | 838 | goto out; |
831 | 839 | ||
@@ -833,7 +841,7 @@ retry_snap: | |||
833 | inode, ceph_vinop(inode), pos, (unsigned)iov->iov_len, | 841 | inode, ceph_vinop(inode), pos, (unsigned)iov->iov_len, |
834 | ceph_cap_string(got)); | 842 | ceph_cap_string(got)); |
835 | 843 | ||
836 | if ((got & CEPH_CAP_FILE_BUFFER) == 0 || | 844 | if ((got & (CEPH_CAP_FILE_BUFFER|CEPH_CAP_FILE_LAZYIO)) == 0 || |
837 | (iocb->ki_filp->f_flags & O_DIRECT) || | 845 | (iocb->ki_filp->f_flags & O_DIRECT) || |
838 | (inode->i_sb->s_flags & MS_SYNCHRONOUS)) { | 846 | (inode->i_sb->s_flags & MS_SYNCHRONOUS)) { |
839 | ret = ceph_sync_write(file, iov->iov_base, iov->iov_len, | 847 | ret = ceph_sync_write(file, iov->iov_base, iov->iov_len, |
@@ -930,6 +938,8 @@ const struct file_operations ceph_file_fops = { | |||
930 | .aio_write = ceph_aio_write, | 938 | .aio_write = ceph_aio_write, |
931 | .mmap = ceph_mmap, | 939 | .mmap = ceph_mmap, |
932 | .fsync = ceph_fsync, | 940 | .fsync = ceph_fsync, |
941 | .lock = ceph_lock, | ||
942 | .flock = ceph_flock, | ||
933 | .splice_read = generic_file_splice_read, | 943 | .splice_read = generic_file_splice_read, |
934 | .splice_write = generic_file_splice_write, | 944 | .splice_write = generic_file_splice_write, |
935 | .unlocked_ioctl = ceph_ioctl, | 945 | .unlocked_ioctl = ceph_ioctl, |
diff --git a/fs/ceph/inode.c b/fs/ceph/inode.c index 389f9dbd9949..5d893d31e399 100644 --- a/fs/ceph/inode.c +++ b/fs/ceph/inode.c | |||
@@ -442,8 +442,9 @@ int ceph_fill_file_size(struct inode *inode, int issued, | |||
442 | * the file is either opened or mmaped | 442 | * the file is either opened or mmaped |
443 | */ | 443 | */ |
444 | if ((issued & (CEPH_CAP_FILE_CACHE|CEPH_CAP_FILE_RD| | 444 | if ((issued & (CEPH_CAP_FILE_CACHE|CEPH_CAP_FILE_RD| |
445 | CEPH_CAP_FILE_WR|CEPH_CAP_FILE_BUFFER| | 445 | CEPH_CAP_FILE_WR|CEPH_CAP_FILE_BUFFER| |
446 | CEPH_CAP_FILE_EXCL)) || | 446 | CEPH_CAP_FILE_EXCL| |
447 | CEPH_CAP_FILE_LAZYIO)) || | ||
447 | mapping_mapped(inode->i_mapping) || | 448 | mapping_mapped(inode->i_mapping) || |
448 | __ceph_caps_file_wanted(ci)) { | 449 | __ceph_caps_file_wanted(ci)) { |
449 | ci->i_truncate_pending++; | 450 | ci->i_truncate_pending++; |
diff --git a/fs/ceph/ioctl.c b/fs/ceph/ioctl.c index d085f07756b4..76e307d2aba1 100644 --- a/fs/ceph/ioctl.c +++ b/fs/ceph/ioctl.c | |||
@@ -143,6 +143,27 @@ static long ceph_ioctl_get_dataloc(struct file *file, void __user *arg) | |||
143 | return 0; | 143 | return 0; |
144 | } | 144 | } |
145 | 145 | ||
146 | static long ceph_ioctl_lazyio(struct file *file) | ||
147 | { | ||
148 | struct ceph_file_info *fi = file->private_data; | ||
149 | struct inode *inode = file->f_dentry->d_inode; | ||
150 | struct ceph_inode_info *ci = ceph_inode(inode); | ||
151 | |||
152 | if ((fi->fmode & CEPH_FILE_MODE_LAZY) == 0) { | ||
153 | spin_lock(&inode->i_lock); | ||
154 | ci->i_nr_by_mode[fi->fmode]--; | ||
155 | fi->fmode |= CEPH_FILE_MODE_LAZY; | ||
156 | ci->i_nr_by_mode[fi->fmode]++; | ||
157 | spin_unlock(&inode->i_lock); | ||
158 | dout("ioctl_layzio: file %p marked lazy\n", file); | ||
159 | |||
160 | ceph_check_caps(ci, 0, NULL); | ||
161 | } else { | ||
162 | dout("ioctl_layzio: file %p already lazy\n", file); | ||
163 | } | ||
164 | return 0; | ||
165 | } | ||
166 | |||
146 | long ceph_ioctl(struct file *file, unsigned int cmd, unsigned long arg) | 167 | long ceph_ioctl(struct file *file, unsigned int cmd, unsigned long arg) |
147 | { | 168 | { |
148 | dout("ioctl file %p cmd %u arg %lu\n", file, cmd, arg); | 169 | dout("ioctl file %p cmd %u arg %lu\n", file, cmd, arg); |
@@ -155,6 +176,9 @@ long ceph_ioctl(struct file *file, unsigned int cmd, unsigned long arg) | |||
155 | 176 | ||
156 | case CEPH_IOC_GET_DATALOC: | 177 | case CEPH_IOC_GET_DATALOC: |
157 | return ceph_ioctl_get_dataloc(file, (void __user *)arg); | 178 | return ceph_ioctl_get_dataloc(file, (void __user *)arg); |
179 | |||
180 | case CEPH_IOC_LAZYIO: | ||
181 | return ceph_ioctl_lazyio(file); | ||
158 | } | 182 | } |
159 | return -ENOTTY; | 183 | return -ENOTTY; |
160 | } | 184 | } |
diff --git a/fs/ceph/ioctl.h b/fs/ceph/ioctl.h index 25e4f1a9d059..88451a3b6857 100644 --- a/fs/ceph/ioctl.h +++ b/fs/ceph/ioctl.h | |||
@@ -37,4 +37,6 @@ struct ceph_ioctl_dataloc { | |||
37 | #define CEPH_IOC_GET_DATALOC _IOWR(CEPH_IOCTL_MAGIC, 3, \ | 37 | #define CEPH_IOC_GET_DATALOC _IOWR(CEPH_IOCTL_MAGIC, 3, \ |
38 | struct ceph_ioctl_dataloc) | 38 | struct ceph_ioctl_dataloc) |
39 | 39 | ||
40 | #define CEPH_IOC_LAZYIO _IO(CEPH_IOCTL_MAGIC, 4) | ||
41 | |||
40 | #endif | 42 | #endif |
diff --git a/fs/ceph/locks.c b/fs/ceph/locks.c new file mode 100644 index 000000000000..ae85af06454f --- /dev/null +++ b/fs/ceph/locks.c | |||
@@ -0,0 +1,256 @@ | |||
1 | #include "ceph_debug.h" | ||
2 | |||
3 | #include <linux/file.h> | ||
4 | #include <linux/namei.h> | ||
5 | |||
6 | #include "super.h" | ||
7 | #include "mds_client.h" | ||
8 | #include "pagelist.h" | ||
9 | |||
10 | /** | ||
11 | * Implement fcntl and flock locking functions. | ||
12 | */ | ||
13 | static int ceph_lock_message(u8 lock_type, u16 operation, struct file *file, | ||
14 | u64 pid, u64 pid_ns, | ||
15 | int cmd, u64 start, u64 length, u8 wait) | ||
16 | { | ||
17 | struct inode *inode = file->f_dentry->d_inode; | ||
18 | struct ceph_mds_client *mdsc = | ||
19 | &ceph_sb_to_client(inode->i_sb)->mdsc; | ||
20 | struct ceph_mds_request *req; | ||
21 | int err; | ||
22 | |||
23 | req = ceph_mdsc_create_request(mdsc, operation, USE_AUTH_MDS); | ||
24 | if (IS_ERR(req)) | ||
25 | return PTR_ERR(req); | ||
26 | req->r_inode = igrab(inode); | ||
27 | |||
28 | dout("ceph_lock_message: rule: %d, op: %d, pid: %llu, start: %llu, " | ||
29 | "length: %llu, wait: %d, type`: %d", (int)lock_type, | ||
30 | (int)operation, pid, start, length, wait, cmd); | ||
31 | |||
32 | req->r_args.filelock_change.rule = lock_type; | ||
33 | req->r_args.filelock_change.type = cmd; | ||
34 | req->r_args.filelock_change.pid = cpu_to_le64(pid); | ||
35 | /* This should be adjusted, but I'm not sure if | ||
36 | namespaces actually get id numbers*/ | ||
37 | req->r_args.filelock_change.pid_namespace = | ||
38 | cpu_to_le64((u64)pid_ns); | ||
39 | req->r_args.filelock_change.start = cpu_to_le64(start); | ||
40 | req->r_args.filelock_change.length = cpu_to_le64(length); | ||
41 | req->r_args.filelock_change.wait = wait; | ||
42 | |||
43 | err = ceph_mdsc_do_request(mdsc, inode, req); | ||
44 | ceph_mdsc_put_request(req); | ||
45 | dout("ceph_lock_message: rule: %d, op: %d, pid: %llu, start: %llu, " | ||
46 | "length: %llu, wait: %d, type`: %d err code %d", (int)lock_type, | ||
47 | (int)operation, pid, start, length, wait, cmd, err); | ||
48 | return err; | ||
49 | } | ||
50 | |||
51 | /** | ||
52 | * Attempt to set an fcntl lock. | ||
53 | * For now, this just goes away to the server. Later it may be more awesome. | ||
54 | */ | ||
55 | int ceph_lock(struct file *file, int cmd, struct file_lock *fl) | ||
56 | { | ||
57 | u64 length; | ||
58 | u8 lock_cmd; | ||
59 | int err; | ||
60 | u8 wait = 0; | ||
61 | u16 op = CEPH_MDS_OP_SETFILELOCK; | ||
62 | |||
63 | fl->fl_nspid = get_pid(task_tgid(current)); | ||
64 | dout("ceph_lock, fl_pid:%d", fl->fl_pid); | ||
65 | |||
66 | /* set wait bit as appropriate, then make command as Ceph expects it*/ | ||
67 | if (F_SETLKW == cmd) | ||
68 | wait = 1; | ||
69 | if (F_GETLK == cmd) | ||
70 | op = CEPH_MDS_OP_GETFILELOCK; | ||
71 | |||
72 | if (F_RDLCK == fl->fl_type) | ||
73 | lock_cmd = CEPH_LOCK_SHARED; | ||
74 | else if (F_WRLCK == fl->fl_type) | ||
75 | lock_cmd = CEPH_LOCK_EXCL; | ||
76 | else | ||
77 | lock_cmd = CEPH_LOCK_UNLOCK; | ||
78 | |||
79 | if (LLONG_MAX == fl->fl_end) | ||
80 | length = 0; | ||
81 | else | ||
82 | length = fl->fl_end - fl->fl_start + 1; | ||
83 | |||
84 | err = ceph_lock_message(CEPH_LOCK_FCNTL, op, file, | ||
85 | (u64)fl->fl_pid, (u64)fl->fl_nspid, | ||
86 | lock_cmd, fl->fl_start, | ||
87 | length, wait); | ||
88 | if (!err) { | ||
89 | dout("mds locked, locking locally"); | ||
90 | err = posix_lock_file(file, fl, NULL); | ||
91 | if (err && (CEPH_MDS_OP_SETFILELOCK == op)) { | ||
92 | /* undo! This should only happen if the kernel detects | ||
93 | * local deadlock. */ | ||
94 | ceph_lock_message(CEPH_LOCK_FCNTL, op, file, | ||
95 | (u64)fl->fl_pid, (u64)fl->fl_nspid, | ||
96 | CEPH_LOCK_UNLOCK, fl->fl_start, | ||
97 | length, 0); | ||
98 | dout("got %d on posix_lock_file, undid lock", err); | ||
99 | } | ||
100 | } else { | ||
101 | dout("mds returned error code %d", err); | ||
102 | } | ||
103 | return err; | ||
104 | } | ||
105 | |||
106 | int ceph_flock(struct file *file, int cmd, struct file_lock *fl) | ||
107 | { | ||
108 | u64 length; | ||
109 | u8 lock_cmd; | ||
110 | int err; | ||
111 | u8 wait = 1; | ||
112 | |||
113 | fl->fl_nspid = get_pid(task_tgid(current)); | ||
114 | dout("ceph_flock, fl_pid:%d", fl->fl_pid); | ||
115 | |||
116 | /* set wait bit, then clear it out of cmd*/ | ||
117 | if (cmd & LOCK_NB) | ||
118 | wait = 0; | ||
119 | cmd = cmd & (LOCK_SH | LOCK_EX | LOCK_UN); | ||
120 | /* set command sequence that Ceph wants to see: | ||
121 | shared lock, exclusive lock, or unlock */ | ||
122 | if (LOCK_SH == cmd) | ||
123 | lock_cmd = CEPH_LOCK_SHARED; | ||
124 | else if (LOCK_EX == cmd) | ||
125 | lock_cmd = CEPH_LOCK_EXCL; | ||
126 | else | ||
127 | lock_cmd = CEPH_LOCK_UNLOCK; | ||
128 | /* mds requires start and length rather than start and end */ | ||
129 | if (LLONG_MAX == fl->fl_end) | ||
130 | length = 0; | ||
131 | else | ||
132 | length = fl->fl_end - fl->fl_start + 1; | ||
133 | |||
134 | err = ceph_lock_message(CEPH_LOCK_FLOCK, CEPH_MDS_OP_SETFILELOCK, | ||
135 | file, (u64)fl->fl_pid, (u64)fl->fl_nspid, | ||
136 | lock_cmd, fl->fl_start, | ||
137 | length, wait); | ||
138 | if (!err) { | ||
139 | err = flock_lock_file_wait(file, fl); | ||
140 | if (err) { | ||
141 | ceph_lock_message(CEPH_LOCK_FLOCK, | ||
142 | CEPH_MDS_OP_SETFILELOCK, | ||
143 | file, (u64)fl->fl_pid, | ||
144 | (u64)fl->fl_nspid, | ||
145 | CEPH_LOCK_UNLOCK, fl->fl_start, | ||
146 | length, 0); | ||
147 | dout("got %d on flock_lock_file_wait, undid lock", err); | ||
148 | } | ||
149 | } else { | ||
150 | dout("mds error code %d", err); | ||
151 | } | ||
152 | return err; | ||
153 | } | ||
154 | |||
155 | /** | ||
156 | * Must be called with BKL already held. Fills in the passed | ||
157 | * counter variables, so you can prepare pagelist metadata before calling | ||
158 | * ceph_encode_locks. | ||
159 | */ | ||
160 | void ceph_count_locks(struct inode *inode, int *fcntl_count, int *flock_count) | ||
161 | { | ||
162 | struct file_lock *lock; | ||
163 | |||
164 | *fcntl_count = 0; | ||
165 | *flock_count = 0; | ||
166 | |||
167 | for (lock = inode->i_flock; lock != NULL; lock = lock->fl_next) { | ||
168 | if (lock->fl_flags & FL_POSIX) | ||
169 | ++(*fcntl_count); | ||
170 | else if (lock->fl_flags & FL_FLOCK) | ||
171 | ++(*flock_count); | ||
172 | } | ||
173 | dout("counted %d flock locks and %d fcntl locks", | ||
174 | *flock_count, *fcntl_count); | ||
175 | } | ||
176 | |||
177 | /** | ||
178 | * Encode the flock and fcntl locks for the given inode into the pagelist. | ||
179 | * Format is: #fcntl locks, sequential fcntl locks, #flock locks, | ||
180 | * sequential flock locks. | ||
181 | * Must be called with BLK already held, and the lock numbers should have | ||
182 | * been gathered under the same lock holding window. | ||
183 | */ | ||
184 | int ceph_encode_locks(struct inode *inode, struct ceph_pagelist *pagelist, | ||
185 | int num_fcntl_locks, int num_flock_locks) | ||
186 | { | ||
187 | struct file_lock *lock; | ||
188 | struct ceph_filelock cephlock; | ||
189 | int err = 0; | ||
190 | |||
191 | dout("encoding %d flock and %d fcntl locks", num_flock_locks, | ||
192 | num_fcntl_locks); | ||
193 | err = ceph_pagelist_append(pagelist, &num_fcntl_locks, sizeof(u32)); | ||
194 | if (err) | ||
195 | goto fail; | ||
196 | for (lock = inode->i_flock; lock != NULL; lock = lock->fl_next) { | ||
197 | if (lock->fl_flags & FL_POSIX) { | ||
198 | err = lock_to_ceph_filelock(lock, &cephlock); | ||
199 | if (err) | ||
200 | goto fail; | ||
201 | err = ceph_pagelist_append(pagelist, &cephlock, | ||
202 | sizeof(struct ceph_filelock)); | ||
203 | } | ||
204 | if (err) | ||
205 | goto fail; | ||
206 | } | ||
207 | |||
208 | err = ceph_pagelist_append(pagelist, &num_flock_locks, sizeof(u32)); | ||
209 | if (err) | ||
210 | goto fail; | ||
211 | for (lock = inode->i_flock; lock != NULL; lock = lock->fl_next) { | ||
212 | if (lock->fl_flags & FL_FLOCK) { | ||
213 | err = lock_to_ceph_filelock(lock, &cephlock); | ||
214 | if (err) | ||
215 | goto fail; | ||
216 | err = ceph_pagelist_append(pagelist, &cephlock, | ||
217 | sizeof(struct ceph_filelock)); | ||
218 | } | ||
219 | if (err) | ||
220 | goto fail; | ||
221 | } | ||
222 | fail: | ||
223 | return err; | ||
224 | } | ||
225 | |||
226 | /* | ||
227 | * Given a pointer to a lock, convert it to a ceph filelock | ||
228 | */ | ||
229 | int lock_to_ceph_filelock(struct file_lock *lock, | ||
230 | struct ceph_filelock *cephlock) | ||
231 | { | ||
232 | int err = 0; | ||
233 | |||
234 | cephlock->start = cpu_to_le64(lock->fl_start); | ||
235 | cephlock->length = cpu_to_le64(lock->fl_end - lock->fl_start + 1); | ||
236 | cephlock->client = cpu_to_le64(0); | ||
237 | cephlock->pid = cpu_to_le64(lock->fl_pid); | ||
238 | cephlock->pid_namespace = cpu_to_le64((u64)lock->fl_nspid); | ||
239 | |||
240 | switch (lock->fl_type) { | ||
241 | case F_RDLCK: | ||
242 | cephlock->type = CEPH_LOCK_SHARED; | ||
243 | break; | ||
244 | case F_WRLCK: | ||
245 | cephlock->type = CEPH_LOCK_EXCL; | ||
246 | break; | ||
247 | case F_UNLCK: | ||
248 | cephlock->type = CEPH_LOCK_UNLOCK; | ||
249 | break; | ||
250 | default: | ||
251 | dout("Have unknown lock type %d", lock->fl_type); | ||
252 | err = -EINVAL; | ||
253 | } | ||
254 | |||
255 | return err; | ||
256 | } | ||
diff --git a/fs/ceph/mds_client.c b/fs/ceph/mds_client.c index dd440bd438a9..a75ddbf9fe37 100644 --- a/fs/ceph/mds_client.c +++ b/fs/ceph/mds_client.c | |||
@@ -3,6 +3,7 @@ | |||
3 | #include <linux/wait.h> | 3 | #include <linux/wait.h> |
4 | #include <linux/slab.h> | 4 | #include <linux/slab.h> |
5 | #include <linux/sched.h> | 5 | #include <linux/sched.h> |
6 | #include <linux/smp_lock.h> | ||
6 | 7 | ||
7 | #include "mds_client.h" | 8 | #include "mds_client.h" |
8 | #include "mon_client.h" | 9 | #include "mon_client.h" |
@@ -37,6 +38,11 @@ | |||
37 | * are no longer valid. | 38 | * are no longer valid. |
38 | */ | 39 | */ |
39 | 40 | ||
41 | struct ceph_reconnect_state { | ||
42 | struct ceph_pagelist *pagelist; | ||
43 | bool flock; | ||
44 | }; | ||
45 | |||
40 | static void __wake_requests(struct ceph_mds_client *mdsc, | 46 | static void __wake_requests(struct ceph_mds_client *mdsc, |
41 | struct list_head *head); | 47 | struct list_head *head); |
42 | 48 | ||
@@ -449,7 +455,7 @@ void ceph_mdsc_release_request(struct kref *kref) | |||
449 | kfree(req->r_path1); | 455 | kfree(req->r_path1); |
450 | kfree(req->r_path2); | 456 | kfree(req->r_path2); |
451 | put_request_session(req); | 457 | put_request_session(req); |
452 | ceph_unreserve_caps(&req->r_caps_reservation); | 458 | ceph_unreserve_caps(req->r_mdsc, &req->r_caps_reservation); |
453 | kfree(req); | 459 | kfree(req); |
454 | } | 460 | } |
455 | 461 | ||
@@ -512,7 +518,8 @@ static void __register_request(struct ceph_mds_client *mdsc, | |||
512 | { | 518 | { |
513 | req->r_tid = ++mdsc->last_tid; | 519 | req->r_tid = ++mdsc->last_tid; |
514 | if (req->r_num_caps) | 520 | if (req->r_num_caps) |
515 | ceph_reserve_caps(&req->r_caps_reservation, req->r_num_caps); | 521 | ceph_reserve_caps(mdsc, &req->r_caps_reservation, |
522 | req->r_num_caps); | ||
516 | dout("__register_request %p tid %lld\n", req, req->r_tid); | 523 | dout("__register_request %p tid %lld\n", req, req->r_tid); |
517 | ceph_mdsc_get_request(req); | 524 | ceph_mdsc_get_request(req); |
518 | __insert_request(mdsc, req); | 525 | __insert_request(mdsc, req); |
@@ -704,6 +711,51 @@ static int __open_session(struct ceph_mds_client *mdsc, | |||
704 | } | 711 | } |
705 | 712 | ||
706 | /* | 713 | /* |
714 | * open sessions for any export targets for the given mds | ||
715 | * | ||
716 | * called under mdsc->mutex | ||
717 | */ | ||
718 | static void __open_export_target_sessions(struct ceph_mds_client *mdsc, | ||
719 | struct ceph_mds_session *session) | ||
720 | { | ||
721 | struct ceph_mds_info *mi; | ||
722 | struct ceph_mds_session *ts; | ||
723 | int i, mds = session->s_mds; | ||
724 | int target; | ||
725 | |||
726 | if (mds >= mdsc->mdsmap->m_max_mds) | ||
727 | return; | ||
728 | mi = &mdsc->mdsmap->m_info[mds]; | ||
729 | dout("open_export_target_sessions for mds%d (%d targets)\n", | ||
730 | session->s_mds, mi->num_export_targets); | ||
731 | |||
732 | for (i = 0; i < mi->num_export_targets; i++) { | ||
733 | target = mi->export_targets[i]; | ||
734 | ts = __ceph_lookup_mds_session(mdsc, target); | ||
735 | if (!ts) { | ||
736 | ts = register_session(mdsc, target); | ||
737 | if (IS_ERR(ts)) | ||
738 | return; | ||
739 | } | ||
740 | if (session->s_state == CEPH_MDS_SESSION_NEW || | ||
741 | session->s_state == CEPH_MDS_SESSION_CLOSING) | ||
742 | __open_session(mdsc, session); | ||
743 | else | ||
744 | dout(" mds%d target mds%d %p is %s\n", session->s_mds, | ||
745 | i, ts, session_state_name(ts->s_state)); | ||
746 | ceph_put_mds_session(ts); | ||
747 | } | ||
748 | } | ||
749 | |||
750 | void ceph_mdsc_open_export_target_sessions(struct ceph_mds_client *mdsc, | ||
751 | struct ceph_mds_session *session) | ||
752 | { | ||
753 | mutex_lock(&mdsc->mutex); | ||
754 | __open_export_target_sessions(mdsc, session); | ||
755 | mutex_unlock(&mdsc->mutex); | ||
756 | } | ||
757 | |||
758 | /* | ||
707 | * session caps | 759 | * session caps |
708 | */ | 760 | */ |
709 | 761 | ||
@@ -764,7 +816,7 @@ static int iterate_session_caps(struct ceph_mds_session *session, | |||
764 | last_inode = NULL; | 816 | last_inode = NULL; |
765 | } | 817 | } |
766 | if (old_cap) { | 818 | if (old_cap) { |
767 | ceph_put_cap(old_cap); | 819 | ceph_put_cap(session->s_mdsc, old_cap); |
768 | old_cap = NULL; | 820 | old_cap = NULL; |
769 | } | 821 | } |
770 | 822 | ||
@@ -793,7 +845,7 @@ out: | |||
793 | if (last_inode) | 845 | if (last_inode) |
794 | iput(last_inode); | 846 | iput(last_inode); |
795 | if (old_cap) | 847 | if (old_cap) |
796 | ceph_put_cap(old_cap); | 848 | ceph_put_cap(session->s_mdsc, old_cap); |
797 | 849 | ||
798 | return ret; | 850 | return ret; |
799 | } | 851 | } |
@@ -1067,15 +1119,16 @@ static int trim_caps(struct ceph_mds_client *mdsc, | |||
1067 | * Called under s_mutex. | 1119 | * Called under s_mutex. |
1068 | */ | 1120 | */ |
1069 | int ceph_add_cap_releases(struct ceph_mds_client *mdsc, | 1121 | int ceph_add_cap_releases(struct ceph_mds_client *mdsc, |
1070 | struct ceph_mds_session *session, | 1122 | struct ceph_mds_session *session) |
1071 | int extra) | ||
1072 | { | 1123 | { |
1073 | struct ceph_msg *msg; | 1124 | struct ceph_msg *msg, *partial = NULL; |
1074 | struct ceph_mds_cap_release *head; | 1125 | struct ceph_mds_cap_release *head; |
1075 | int err = -ENOMEM; | 1126 | int err = -ENOMEM; |
1127 | int extra = mdsc->client->mount_args->cap_release_safety; | ||
1128 | int num; | ||
1076 | 1129 | ||
1077 | if (extra < 0) | 1130 | dout("add_cap_releases %p mds%d extra %d\n", session, session->s_mds, |
1078 | extra = mdsc->client->mount_args->cap_release_safety; | 1131 | extra); |
1079 | 1132 | ||
1080 | spin_lock(&session->s_cap_lock); | 1133 | spin_lock(&session->s_cap_lock); |
1081 | 1134 | ||
@@ -1084,9 +1137,14 @@ int ceph_add_cap_releases(struct ceph_mds_client *mdsc, | |||
1084 | struct ceph_msg, | 1137 | struct ceph_msg, |
1085 | list_head); | 1138 | list_head); |
1086 | head = msg->front.iov_base; | 1139 | head = msg->front.iov_base; |
1087 | extra += CEPH_CAPS_PER_RELEASE - le32_to_cpu(head->num); | 1140 | num = le32_to_cpu(head->num); |
1141 | if (num) { | ||
1142 | dout(" partial %p with (%d/%d)\n", msg, num, | ||
1143 | (int)CEPH_CAPS_PER_RELEASE); | ||
1144 | extra += CEPH_CAPS_PER_RELEASE - num; | ||
1145 | partial = msg; | ||
1146 | } | ||
1088 | } | 1147 | } |
1089 | |||
1090 | while (session->s_num_cap_releases < session->s_nr_caps + extra) { | 1148 | while (session->s_num_cap_releases < session->s_nr_caps + extra) { |
1091 | spin_unlock(&session->s_cap_lock); | 1149 | spin_unlock(&session->s_cap_lock); |
1092 | msg = ceph_msg_new(CEPH_MSG_CLIENT_CAPRELEASE, PAGE_CACHE_SIZE, | 1150 | msg = ceph_msg_new(CEPH_MSG_CLIENT_CAPRELEASE, PAGE_CACHE_SIZE, |
@@ -1103,19 +1161,14 @@ int ceph_add_cap_releases(struct ceph_mds_client *mdsc, | |||
1103 | session->s_num_cap_releases += CEPH_CAPS_PER_RELEASE; | 1161 | session->s_num_cap_releases += CEPH_CAPS_PER_RELEASE; |
1104 | } | 1162 | } |
1105 | 1163 | ||
1106 | if (!list_empty(&session->s_cap_releases)) { | 1164 | if (partial) { |
1107 | msg = list_first_entry(&session->s_cap_releases, | 1165 | head = partial->front.iov_base; |
1108 | struct ceph_msg, | 1166 | num = le32_to_cpu(head->num); |
1109 | list_head); | 1167 | dout(" queueing partial %p with %d/%d\n", partial, num, |
1110 | head = msg->front.iov_base; | 1168 | (int)CEPH_CAPS_PER_RELEASE); |
1111 | if (head->num) { | 1169 | list_move_tail(&partial->list_head, |
1112 | dout(" queueing non-full %p (%d)\n", msg, | 1170 | &session->s_cap_releases_done); |
1113 | le32_to_cpu(head->num)); | 1171 | session->s_num_cap_releases -= CEPH_CAPS_PER_RELEASE - num; |
1114 | list_move_tail(&msg->list_head, | ||
1115 | &session->s_cap_releases_done); | ||
1116 | session->s_num_cap_releases -= | ||
1117 | CEPH_CAPS_PER_RELEASE - le32_to_cpu(head->num); | ||
1118 | } | ||
1119 | } | 1172 | } |
1120 | err = 0; | 1173 | err = 0; |
1121 | spin_unlock(&session->s_cap_lock); | 1174 | spin_unlock(&session->s_cap_lock); |
@@ -1250,6 +1303,7 @@ ceph_mdsc_create_request(struct ceph_mds_client *mdsc, int op, int mode) | |||
1250 | return ERR_PTR(-ENOMEM); | 1303 | return ERR_PTR(-ENOMEM); |
1251 | 1304 | ||
1252 | mutex_init(&req->r_fill_mutex); | 1305 | mutex_init(&req->r_fill_mutex); |
1306 | req->r_mdsc = mdsc; | ||
1253 | req->r_started = jiffies; | 1307 | req->r_started = jiffies; |
1254 | req->r_resend_mds = -1; | 1308 | req->r_resend_mds = -1; |
1255 | INIT_LIST_HEAD(&req->r_unsafe_dir_item); | 1309 | INIT_LIST_HEAD(&req->r_unsafe_dir_item); |
@@ -1580,6 +1634,15 @@ static int __prepare_send_request(struct ceph_mds_client *mdsc, | |||
1580 | 1634 | ||
1581 | req->r_mds = mds; | 1635 | req->r_mds = mds; |
1582 | req->r_attempts++; | 1636 | req->r_attempts++; |
1637 | if (req->r_inode) { | ||
1638 | struct ceph_cap *cap = | ||
1639 | ceph_get_cap_for_mds(ceph_inode(req->r_inode), mds); | ||
1640 | |||
1641 | if (cap) | ||
1642 | req->r_sent_on_mseq = cap->mseq; | ||
1643 | else | ||
1644 | req->r_sent_on_mseq = -1; | ||
1645 | } | ||
1583 | dout("prepare_send_request %p tid %lld %s (attempt %d)\n", req, | 1646 | dout("prepare_send_request %p tid %lld %s (attempt %d)\n", req, |
1584 | req->r_tid, ceph_mds_op_name(req->r_op), req->r_attempts); | 1647 | req->r_tid, ceph_mds_op_name(req->r_op), req->r_attempts); |
1585 | 1648 | ||
@@ -1914,21 +1977,40 @@ static void handle_reply(struct ceph_mds_session *session, struct ceph_msg *msg) | |||
1914 | result = le32_to_cpu(head->result); | 1977 | result = le32_to_cpu(head->result); |
1915 | 1978 | ||
1916 | /* | 1979 | /* |
1917 | * Tolerate 2 consecutive ESTALEs from the same mds. | 1980 | * Handle an ESTALE |
1918 | * FIXME: we should be looking at the cap migrate_seq. | 1981 | * if we're not talking to the authority, send to them |
1982 | * if the authority has changed while we weren't looking, | ||
1983 | * send to new authority | ||
1984 | * Otherwise we just have to return an ESTALE | ||
1919 | */ | 1985 | */ |
1920 | if (result == -ESTALE) { | 1986 | if (result == -ESTALE) { |
1921 | req->r_direct_mode = USE_AUTH_MDS; | 1987 | dout("got ESTALE on request %llu", req->r_tid); |
1922 | req->r_num_stale++; | 1988 | if (!req->r_inode) { |
1923 | if (req->r_num_stale <= 2) { | 1989 | /* do nothing; not an authority problem */ |
1990 | } else if (req->r_direct_mode != USE_AUTH_MDS) { | ||
1991 | dout("not using auth, setting for that now"); | ||
1992 | req->r_direct_mode = USE_AUTH_MDS; | ||
1924 | __do_request(mdsc, req); | 1993 | __do_request(mdsc, req); |
1925 | mutex_unlock(&mdsc->mutex); | 1994 | mutex_unlock(&mdsc->mutex); |
1926 | goto out; | 1995 | goto out; |
1996 | } else { | ||
1997 | struct ceph_inode_info *ci = ceph_inode(req->r_inode); | ||
1998 | struct ceph_cap *cap = | ||
1999 | ceph_get_cap_for_mds(ci, req->r_mds);; | ||
2000 | |||
2001 | dout("already using auth"); | ||
2002 | if ((!cap || cap != ci->i_auth_cap) || | ||
2003 | (cap->mseq != req->r_sent_on_mseq)) { | ||
2004 | dout("but cap changed, so resending"); | ||
2005 | __do_request(mdsc, req); | ||
2006 | mutex_unlock(&mdsc->mutex); | ||
2007 | goto out; | ||
2008 | } | ||
1927 | } | 2009 | } |
1928 | } else { | 2010 | dout("have to return ESTALE on request %llu", req->r_tid); |
1929 | req->r_num_stale = 0; | ||
1930 | } | 2011 | } |
1931 | 2012 | ||
2013 | |||
1932 | if (head->safe) { | 2014 | if (head->safe) { |
1933 | req->r_got_safe = true; | 2015 | req->r_got_safe = true; |
1934 | __unregister_request(mdsc, req); | 2016 | __unregister_request(mdsc, req); |
@@ -1985,7 +2067,7 @@ static void handle_reply(struct ceph_mds_session *session, struct ceph_msg *msg) | |||
1985 | if (err == 0) { | 2067 | if (err == 0) { |
1986 | if (result == 0 && rinfo->dir_nr) | 2068 | if (result == 0 && rinfo->dir_nr) |
1987 | ceph_readdir_prepopulate(req, req->r_session); | 2069 | ceph_readdir_prepopulate(req, req->r_session); |
1988 | ceph_unreserve_caps(&req->r_caps_reservation); | 2070 | ceph_unreserve_caps(mdsc, &req->r_caps_reservation); |
1989 | } | 2071 | } |
1990 | mutex_unlock(&req->r_fill_mutex); | 2072 | mutex_unlock(&req->r_fill_mutex); |
1991 | 2073 | ||
@@ -2005,7 +2087,7 @@ out_err: | |||
2005 | } | 2087 | } |
2006 | mutex_unlock(&mdsc->mutex); | 2088 | mutex_unlock(&mdsc->mutex); |
2007 | 2089 | ||
2008 | ceph_add_cap_releases(mdsc, req->r_session, -1); | 2090 | ceph_add_cap_releases(mdsc, req->r_session); |
2009 | mutex_unlock(&session->s_mutex); | 2091 | mutex_unlock(&session->s_mutex); |
2010 | 2092 | ||
2011 | /* kick calling process */ | 2093 | /* kick calling process */ |
@@ -2193,9 +2275,14 @@ static void replay_unsafe_requests(struct ceph_mds_client *mdsc, | |||
2193 | static int encode_caps_cb(struct inode *inode, struct ceph_cap *cap, | 2275 | static int encode_caps_cb(struct inode *inode, struct ceph_cap *cap, |
2194 | void *arg) | 2276 | void *arg) |
2195 | { | 2277 | { |
2196 | struct ceph_mds_cap_reconnect rec; | 2278 | union { |
2279 | struct ceph_mds_cap_reconnect v2; | ||
2280 | struct ceph_mds_cap_reconnect_v1 v1; | ||
2281 | } rec; | ||
2282 | size_t reclen; | ||
2197 | struct ceph_inode_info *ci; | 2283 | struct ceph_inode_info *ci; |
2198 | struct ceph_pagelist *pagelist = arg; | 2284 | struct ceph_reconnect_state *recon_state = arg; |
2285 | struct ceph_pagelist *pagelist = recon_state->pagelist; | ||
2199 | char *path; | 2286 | char *path; |
2200 | int pathlen, err; | 2287 | int pathlen, err; |
2201 | u64 pathbase; | 2288 | u64 pathbase; |
@@ -2228,17 +2315,44 @@ static int encode_caps_cb(struct inode *inode, struct ceph_cap *cap, | |||
2228 | spin_lock(&inode->i_lock); | 2315 | spin_lock(&inode->i_lock); |
2229 | cap->seq = 0; /* reset cap seq */ | 2316 | cap->seq = 0; /* reset cap seq */ |
2230 | cap->issue_seq = 0; /* and issue_seq */ | 2317 | cap->issue_seq = 0; /* and issue_seq */ |
2231 | rec.cap_id = cpu_to_le64(cap->cap_id); | 2318 | |
2232 | rec.pathbase = cpu_to_le64(pathbase); | 2319 | if (recon_state->flock) { |
2233 | rec.wanted = cpu_to_le32(__ceph_caps_wanted(ci)); | 2320 | rec.v2.cap_id = cpu_to_le64(cap->cap_id); |
2234 | rec.issued = cpu_to_le32(cap->issued); | 2321 | rec.v2.wanted = cpu_to_le32(__ceph_caps_wanted(ci)); |
2235 | rec.size = cpu_to_le64(inode->i_size); | 2322 | rec.v2.issued = cpu_to_le32(cap->issued); |
2236 | ceph_encode_timespec(&rec.mtime, &inode->i_mtime); | 2323 | rec.v2.snaprealm = cpu_to_le64(ci->i_snap_realm->ino); |
2237 | ceph_encode_timespec(&rec.atime, &inode->i_atime); | 2324 | rec.v2.pathbase = cpu_to_le64(pathbase); |
2238 | rec.snaprealm = cpu_to_le64(ci->i_snap_realm->ino); | 2325 | rec.v2.flock_len = 0; |
2326 | reclen = sizeof(rec.v2); | ||
2327 | } else { | ||
2328 | rec.v1.cap_id = cpu_to_le64(cap->cap_id); | ||
2329 | rec.v1.wanted = cpu_to_le32(__ceph_caps_wanted(ci)); | ||
2330 | rec.v1.issued = cpu_to_le32(cap->issued); | ||
2331 | rec.v1.size = cpu_to_le64(inode->i_size); | ||
2332 | ceph_encode_timespec(&rec.v1.mtime, &inode->i_mtime); | ||
2333 | ceph_encode_timespec(&rec.v1.atime, &inode->i_atime); | ||
2334 | rec.v1.snaprealm = cpu_to_le64(ci->i_snap_realm->ino); | ||
2335 | rec.v1.pathbase = cpu_to_le64(pathbase); | ||
2336 | reclen = sizeof(rec.v1); | ||
2337 | } | ||
2239 | spin_unlock(&inode->i_lock); | 2338 | spin_unlock(&inode->i_lock); |
2240 | 2339 | ||
2241 | err = ceph_pagelist_append(pagelist, &rec, sizeof(rec)); | 2340 | if (recon_state->flock) { |
2341 | int num_fcntl_locks, num_flock_locks; | ||
2342 | |||
2343 | lock_kernel(); | ||
2344 | ceph_count_locks(inode, &num_fcntl_locks, &num_flock_locks); | ||
2345 | rec.v2.flock_len = (2*sizeof(u32) + | ||
2346 | (num_fcntl_locks+num_flock_locks) * | ||
2347 | sizeof(struct ceph_filelock)); | ||
2348 | |||
2349 | err = ceph_pagelist_append(pagelist, &rec, reclen); | ||
2350 | if (!err) | ||
2351 | err = ceph_encode_locks(inode, pagelist, | ||
2352 | num_fcntl_locks, | ||
2353 | num_flock_locks); | ||
2354 | unlock_kernel(); | ||
2355 | } | ||
2242 | 2356 | ||
2243 | out: | 2357 | out: |
2244 | kfree(path); | 2358 | kfree(path); |
@@ -2267,6 +2381,7 @@ static void send_mds_reconnect(struct ceph_mds_client *mdsc, | |||
2267 | int mds = session->s_mds; | 2381 | int mds = session->s_mds; |
2268 | int err = -ENOMEM; | 2382 | int err = -ENOMEM; |
2269 | struct ceph_pagelist *pagelist; | 2383 | struct ceph_pagelist *pagelist; |
2384 | struct ceph_reconnect_state recon_state; | ||
2270 | 2385 | ||
2271 | pr_info("mds%d reconnect start\n", mds); | 2386 | pr_info("mds%d reconnect start\n", mds); |
2272 | 2387 | ||
@@ -2301,7 +2416,10 @@ static void send_mds_reconnect(struct ceph_mds_client *mdsc, | |||
2301 | err = ceph_pagelist_encode_32(pagelist, session->s_nr_caps); | 2416 | err = ceph_pagelist_encode_32(pagelist, session->s_nr_caps); |
2302 | if (err) | 2417 | if (err) |
2303 | goto fail; | 2418 | goto fail; |
2304 | err = iterate_session_caps(session, encode_caps_cb, pagelist); | 2419 | |
2420 | recon_state.pagelist = pagelist; | ||
2421 | recon_state.flock = session->s_con.peer_features & CEPH_FEATURE_FLOCK; | ||
2422 | err = iterate_session_caps(session, encode_caps_cb, &recon_state); | ||
2305 | if (err < 0) | 2423 | if (err < 0) |
2306 | goto fail; | 2424 | goto fail; |
2307 | 2425 | ||
@@ -2326,6 +2444,8 @@ static void send_mds_reconnect(struct ceph_mds_client *mdsc, | |||
2326 | } | 2444 | } |
2327 | 2445 | ||
2328 | reply->pagelist = pagelist; | 2446 | reply->pagelist = pagelist; |
2447 | if (recon_state.flock) | ||
2448 | reply->hdr.version = cpu_to_le16(2); | ||
2329 | reply->hdr.data_len = cpu_to_le32(pagelist->length); | 2449 | reply->hdr.data_len = cpu_to_le32(pagelist->length); |
2330 | reply->nr_pages = calc_pages_for(0, pagelist->length); | 2450 | reply->nr_pages = calc_pages_for(0, pagelist->length); |
2331 | ceph_con_send(&session->s_con, reply); | 2451 | ceph_con_send(&session->s_con, reply); |
@@ -2376,9 +2496,11 @@ static void check_new_map(struct ceph_mds_client *mdsc, | |||
2376 | oldstate = ceph_mdsmap_get_state(oldmap, i); | 2496 | oldstate = ceph_mdsmap_get_state(oldmap, i); |
2377 | newstate = ceph_mdsmap_get_state(newmap, i); | 2497 | newstate = ceph_mdsmap_get_state(newmap, i); |
2378 | 2498 | ||
2379 | dout("check_new_map mds%d state %s -> %s (session %s)\n", | 2499 | dout("check_new_map mds%d state %s%s -> %s%s (session %s)\n", |
2380 | i, ceph_mds_state_name(oldstate), | 2500 | i, ceph_mds_state_name(oldstate), |
2501 | ceph_mdsmap_is_laggy(oldmap, i) ? " (laggy)" : "", | ||
2381 | ceph_mds_state_name(newstate), | 2502 | ceph_mds_state_name(newstate), |
2503 | ceph_mdsmap_is_laggy(newmap, i) ? " (laggy)" : "", | ||
2382 | session_state_name(s->s_state)); | 2504 | session_state_name(s->s_state)); |
2383 | 2505 | ||
2384 | if (memcmp(ceph_mdsmap_get_addr(oldmap, i), | 2506 | if (memcmp(ceph_mdsmap_get_addr(oldmap, i), |
@@ -2428,6 +2550,21 @@ static void check_new_map(struct ceph_mds_client *mdsc, | |||
2428 | wake_up_session_caps(s, 1); | 2550 | wake_up_session_caps(s, 1); |
2429 | } | 2551 | } |
2430 | } | 2552 | } |
2553 | |||
2554 | for (i = 0; i < newmap->m_max_mds && i < mdsc->max_sessions; i++) { | ||
2555 | s = mdsc->sessions[i]; | ||
2556 | if (!s) | ||
2557 | continue; | ||
2558 | if (!ceph_mdsmap_is_laggy(newmap, i)) | ||
2559 | continue; | ||
2560 | if (s->s_state == CEPH_MDS_SESSION_OPEN || | ||
2561 | s->s_state == CEPH_MDS_SESSION_HUNG || | ||
2562 | s->s_state == CEPH_MDS_SESSION_CLOSING) { | ||
2563 | dout(" connecting to export targets of laggy mds%d\n", | ||
2564 | i); | ||
2565 | __open_export_target_sessions(mdsc, s); | ||
2566 | } | ||
2567 | } | ||
2431 | } | 2568 | } |
2432 | 2569 | ||
2433 | 2570 | ||
@@ -2715,7 +2852,7 @@ static void delayed_work(struct work_struct *work) | |||
2715 | send_renew_caps(mdsc, s); | 2852 | send_renew_caps(mdsc, s); |
2716 | else | 2853 | else |
2717 | ceph_con_keepalive(&s->s_con); | 2854 | ceph_con_keepalive(&s->s_con); |
2718 | ceph_add_cap_releases(mdsc, s, -1); | 2855 | ceph_add_cap_releases(mdsc, s); |
2719 | if (s->s_state == CEPH_MDS_SESSION_OPEN || | 2856 | if (s->s_state == CEPH_MDS_SESSION_OPEN || |
2720 | s->s_state == CEPH_MDS_SESSION_HUNG) | 2857 | s->s_state == CEPH_MDS_SESSION_HUNG) |
2721 | ceph_send_cap_releases(mdsc, s); | 2858 | ceph_send_cap_releases(mdsc, s); |
@@ -2764,6 +2901,9 @@ int ceph_mdsc_init(struct ceph_mds_client *mdsc, struct ceph_client *client) | |||
2764 | spin_lock_init(&mdsc->dentry_lru_lock); | 2901 | spin_lock_init(&mdsc->dentry_lru_lock); |
2765 | INIT_LIST_HEAD(&mdsc->dentry_lru); | 2902 | INIT_LIST_HEAD(&mdsc->dentry_lru); |
2766 | 2903 | ||
2904 | ceph_caps_init(mdsc); | ||
2905 | ceph_adjust_min_caps(mdsc, client->min_caps); | ||
2906 | |||
2767 | return 0; | 2907 | return 0; |
2768 | } | 2908 | } |
2769 | 2909 | ||
@@ -2959,6 +3099,7 @@ void ceph_mdsc_stop(struct ceph_mds_client *mdsc) | |||
2959 | if (mdsc->mdsmap) | 3099 | if (mdsc->mdsmap) |
2960 | ceph_mdsmap_destroy(mdsc->mdsmap); | 3100 | ceph_mdsmap_destroy(mdsc->mdsmap); |
2961 | kfree(mdsc->sessions); | 3101 | kfree(mdsc->sessions); |
3102 | ceph_caps_finalize(mdsc); | ||
2962 | } | 3103 | } |
2963 | 3104 | ||
2964 | 3105 | ||
diff --git a/fs/ceph/mds_client.h b/fs/ceph/mds_client.h index 952410c60d09..ab7e89f5e344 100644 --- a/fs/ceph/mds_client.h +++ b/fs/ceph/mds_client.h | |||
@@ -151,6 +151,7 @@ typedef void (*ceph_mds_request_callback_t) (struct ceph_mds_client *mdsc, | |||
151 | struct ceph_mds_request { | 151 | struct ceph_mds_request { |
152 | u64 r_tid; /* transaction id */ | 152 | u64 r_tid; /* transaction id */ |
153 | struct rb_node r_node; | 153 | struct rb_node r_node; |
154 | struct ceph_mds_client *r_mdsc; | ||
154 | 155 | ||
155 | int r_op; /* mds op code */ | 156 | int r_op; /* mds op code */ |
156 | int r_mds; | 157 | int r_mds; |
@@ -207,8 +208,8 @@ struct ceph_mds_request { | |||
207 | 208 | ||
208 | int r_attempts; /* resend attempts */ | 209 | int r_attempts; /* resend attempts */ |
209 | int r_num_fwd; /* number of forward attempts */ | 210 | int r_num_fwd; /* number of forward attempts */ |
210 | int r_num_stale; | ||
211 | int r_resend_mds; /* mds to resend to next, if any*/ | 211 | int r_resend_mds; /* mds to resend to next, if any*/ |
212 | u32 r_sent_on_mseq; /* cap mseq request was sent at*/ | ||
212 | 213 | ||
213 | struct kref r_kref; | 214 | struct kref r_kref; |
214 | struct list_head r_wait; | 215 | struct list_head r_wait; |
@@ -267,6 +268,27 @@ struct ceph_mds_client { | |||
267 | spinlock_t cap_dirty_lock; /* protects above items */ | 268 | spinlock_t cap_dirty_lock; /* protects above items */ |
268 | wait_queue_head_t cap_flushing_wq; | 269 | wait_queue_head_t cap_flushing_wq; |
269 | 270 | ||
271 | /* | ||
272 | * Cap reservations | ||
273 | * | ||
274 | * Maintain a global pool of preallocated struct ceph_caps, referenced | ||
275 | * by struct ceph_caps_reservations. This ensures that we preallocate | ||
276 | * memory needed to successfully process an MDS response. (If an MDS | ||
277 | * sends us cap information and we fail to process it, we will have | ||
278 | * problems due to the client and MDS being out of sync.) | ||
279 | * | ||
280 | * Reservations are 'owned' by a ceph_cap_reservation context. | ||
281 | */ | ||
282 | spinlock_t caps_list_lock; | ||
283 | struct list_head caps_list; /* unused (reserved or | ||
284 | unreserved) */ | ||
285 | int caps_total_count; /* total caps allocated */ | ||
286 | int caps_use_count; /* in use */ | ||
287 | int caps_reserve_count; /* unused, reserved */ | ||
288 | int caps_avail_count; /* unused, unreserved */ | ||
289 | int caps_min_count; /* keep at least this many | ||
290 | (unreserved) */ | ||
291 | |||
270 | #ifdef CONFIG_DEBUG_FS | 292 | #ifdef CONFIG_DEBUG_FS |
271 | struct dentry *debugfs_file; | 293 | struct dentry *debugfs_file; |
272 | #endif | 294 | #endif |
@@ -324,8 +346,7 @@ static inline void ceph_mdsc_put_request(struct ceph_mds_request *req) | |||
324 | } | 346 | } |
325 | 347 | ||
326 | extern int ceph_add_cap_releases(struct ceph_mds_client *mdsc, | 348 | extern int ceph_add_cap_releases(struct ceph_mds_client *mdsc, |
327 | struct ceph_mds_session *session, | 349 | struct ceph_mds_session *session); |
328 | int extra); | ||
329 | extern void ceph_send_cap_releases(struct ceph_mds_client *mdsc, | 350 | extern void ceph_send_cap_releases(struct ceph_mds_client *mdsc, |
330 | struct ceph_mds_session *session); | 351 | struct ceph_mds_session *session); |
331 | 352 | ||
@@ -343,4 +364,7 @@ extern void ceph_mdsc_lease_send_msg(struct ceph_mds_session *session, | |||
343 | extern void ceph_mdsc_handle_map(struct ceph_mds_client *mdsc, | 364 | extern void ceph_mdsc_handle_map(struct ceph_mds_client *mdsc, |
344 | struct ceph_msg *msg); | 365 | struct ceph_msg *msg); |
345 | 366 | ||
367 | extern void ceph_mdsc_open_export_target_sessions(struct ceph_mds_client *mdsc, | ||
368 | struct ceph_mds_session *session); | ||
369 | |||
346 | #endif | 370 | #endif |
diff --git a/fs/ceph/mdsmap.c b/fs/ceph/mdsmap.c index c4c498e6dfef..040be6d1150b 100644 --- a/fs/ceph/mdsmap.c +++ b/fs/ceph/mdsmap.c | |||
@@ -85,6 +85,7 @@ struct ceph_mdsmap *ceph_mdsmap_decode(void **p, void *end) | |||
85 | struct ceph_entity_addr addr; | 85 | struct ceph_entity_addr addr; |
86 | u32 num_export_targets; | 86 | u32 num_export_targets; |
87 | void *pexport_targets = NULL; | 87 | void *pexport_targets = NULL; |
88 | struct ceph_timespec laggy_since; | ||
88 | 89 | ||
89 | ceph_decode_need(p, end, sizeof(u64)*2 + 1 + sizeof(u32), bad); | 90 | ceph_decode_need(p, end, sizeof(u64)*2 + 1 + sizeof(u32), bad); |
90 | global_id = ceph_decode_64(p); | 91 | global_id = ceph_decode_64(p); |
@@ -103,7 +104,7 @@ struct ceph_mdsmap *ceph_mdsmap_decode(void **p, void *end) | |||
103 | state_seq = ceph_decode_64(p); | 104 | state_seq = ceph_decode_64(p); |
104 | ceph_decode_copy(p, &addr, sizeof(addr)); | 105 | ceph_decode_copy(p, &addr, sizeof(addr)); |
105 | ceph_decode_addr(&addr); | 106 | ceph_decode_addr(&addr); |
106 | *p += sizeof(struct ceph_timespec); | 107 | ceph_decode_copy(p, &laggy_since, sizeof(laggy_since)); |
107 | *p += sizeof(u32); | 108 | *p += sizeof(u32); |
108 | ceph_decode_32_safe(p, end, namelen, bad); | 109 | ceph_decode_32_safe(p, end, namelen, bad); |
109 | *p += namelen; | 110 | *p += namelen; |
@@ -122,6 +123,9 @@ struct ceph_mdsmap *ceph_mdsmap_decode(void **p, void *end) | |||
122 | m->m_info[mds].global_id = global_id; | 123 | m->m_info[mds].global_id = global_id; |
123 | m->m_info[mds].state = state; | 124 | m->m_info[mds].state = state; |
124 | m->m_info[mds].addr = addr; | 125 | m->m_info[mds].addr = addr; |
126 | m->m_info[mds].laggy = | ||
127 | (laggy_since.tv_sec != 0 || | ||
128 | laggy_since.tv_nsec != 0); | ||
125 | m->m_info[mds].num_export_targets = num_export_targets; | 129 | m->m_info[mds].num_export_targets = num_export_targets; |
126 | if (num_export_targets) { | 130 | if (num_export_targets) { |
127 | m->m_info[mds].export_targets = | 131 | m->m_info[mds].export_targets = |
diff --git a/fs/ceph/mdsmap.h b/fs/ceph/mdsmap.h index eacc131aa5cb..4c5cb0880bba 100644 --- a/fs/ceph/mdsmap.h +++ b/fs/ceph/mdsmap.h | |||
@@ -13,6 +13,7 @@ struct ceph_mds_info { | |||
13 | struct ceph_entity_addr addr; | 13 | struct ceph_entity_addr addr; |
14 | s32 state; | 14 | s32 state; |
15 | int num_export_targets; | 15 | int num_export_targets; |
16 | bool laggy; | ||
16 | u32 *export_targets; | 17 | u32 *export_targets; |
17 | }; | 18 | }; |
18 | 19 | ||
@@ -47,6 +48,13 @@ static inline int ceph_mdsmap_get_state(struct ceph_mdsmap *m, int w) | |||
47 | return m->m_info[w].state; | 48 | return m->m_info[w].state; |
48 | } | 49 | } |
49 | 50 | ||
51 | static inline bool ceph_mdsmap_is_laggy(struct ceph_mdsmap *m, int w) | ||
52 | { | ||
53 | if (w >= 0 && w < m->m_max_mds) | ||
54 | return m->m_info[w].laggy; | ||
55 | return false; | ||
56 | } | ||
57 | |||
50 | extern int ceph_mdsmap_get_random_mds(struct ceph_mdsmap *m); | 58 | extern int ceph_mdsmap_get_random_mds(struct ceph_mdsmap *m); |
51 | extern struct ceph_mdsmap *ceph_mdsmap_decode(void **p, void *end); | 59 | extern struct ceph_mdsmap *ceph_mdsmap_decode(void **p, void *end); |
52 | extern void ceph_mdsmap_destroy(struct ceph_mdsmap *m); | 60 | extern void ceph_mdsmap_destroy(struct ceph_mdsmap *m); |
diff --git a/fs/ceph/messenger.c b/fs/ceph/messenger.c index 15167b2daa55..2502d76fcec1 100644 --- a/fs/ceph/messenger.c +++ b/fs/ceph/messenger.c | |||
@@ -108,7 +108,7 @@ void ceph_msgr_exit(void) | |||
108 | destroy_workqueue(ceph_msgr_wq); | 108 | destroy_workqueue(ceph_msgr_wq); |
109 | } | 109 | } |
110 | 110 | ||
111 | void ceph_msgr_flush() | 111 | void ceph_msgr_flush(void) |
112 | { | 112 | { |
113 | flush_workqueue(ceph_msgr_wq); | 113 | flush_workqueue(ceph_msgr_wq); |
114 | } | 114 | } |
@@ -647,7 +647,7 @@ static void prepare_write_connect(struct ceph_messenger *msgr, | |||
647 | dout("prepare_write_connect %p cseq=%d gseq=%d proto=%d\n", con, | 647 | dout("prepare_write_connect %p cseq=%d gseq=%d proto=%d\n", con, |
648 | con->connect_seq, global_seq, proto); | 648 | con->connect_seq, global_seq, proto); |
649 | 649 | ||
650 | con->out_connect.features = cpu_to_le64(CEPH_FEATURE_SUPPORTED_CLIENT); | 650 | con->out_connect.features = cpu_to_le64(CEPH_FEATURE_SUPPORTED); |
651 | con->out_connect.host_type = cpu_to_le32(CEPH_ENTITY_TYPE_CLIENT); | 651 | con->out_connect.host_type = cpu_to_le32(CEPH_ENTITY_TYPE_CLIENT); |
652 | con->out_connect.connect_seq = cpu_to_le32(con->connect_seq); | 652 | con->out_connect.connect_seq = cpu_to_le32(con->connect_seq); |
653 | con->out_connect.global_seq = cpu_to_le32(global_seq); | 653 | con->out_connect.global_seq = cpu_to_le32(global_seq); |
@@ -1081,11 +1081,11 @@ static int process_banner(struct ceph_connection *con) | |||
1081 | sizeof(con->peer_addr)) != 0 && | 1081 | sizeof(con->peer_addr)) != 0 && |
1082 | !(addr_is_blank(&con->actual_peer_addr.in_addr) && | 1082 | !(addr_is_blank(&con->actual_peer_addr.in_addr) && |
1083 | con->actual_peer_addr.nonce == con->peer_addr.nonce)) { | 1083 | con->actual_peer_addr.nonce == con->peer_addr.nonce)) { |
1084 | pr_warning("wrong peer, want %s/%lld, got %s/%lld\n", | 1084 | pr_warning("wrong peer, want %s/%d, got %s/%d\n", |
1085 | pr_addr(&con->peer_addr.in_addr), | 1085 | pr_addr(&con->peer_addr.in_addr), |
1086 | le64_to_cpu(con->peer_addr.nonce), | 1086 | (int)le32_to_cpu(con->peer_addr.nonce), |
1087 | pr_addr(&con->actual_peer_addr.in_addr), | 1087 | pr_addr(&con->actual_peer_addr.in_addr), |
1088 | le64_to_cpu(con->actual_peer_addr.nonce)); | 1088 | (int)le32_to_cpu(con->actual_peer_addr.nonce)); |
1089 | con->error_msg = "wrong peer at address"; | 1089 | con->error_msg = "wrong peer at address"; |
1090 | return -1; | 1090 | return -1; |
1091 | } | 1091 | } |
@@ -1123,8 +1123,8 @@ static void fail_protocol(struct ceph_connection *con) | |||
1123 | 1123 | ||
1124 | static int process_connect(struct ceph_connection *con) | 1124 | static int process_connect(struct ceph_connection *con) |
1125 | { | 1125 | { |
1126 | u64 sup_feat = CEPH_FEATURE_SUPPORTED_CLIENT; | 1126 | u64 sup_feat = CEPH_FEATURE_SUPPORTED; |
1127 | u64 req_feat = CEPH_FEATURE_REQUIRED_CLIENT; | 1127 | u64 req_feat = CEPH_FEATURE_REQUIRED; |
1128 | u64 server_feat = le64_to_cpu(con->in_reply.features); | 1128 | u64 server_feat = le64_to_cpu(con->in_reply.features); |
1129 | 1129 | ||
1130 | dout("process_connect on %p tag %d\n", con, (int)con->in_tag); | 1130 | dout("process_connect on %p tag %d\n", con, (int)con->in_tag); |
@@ -1302,8 +1302,8 @@ static void process_ack(struct ceph_connection *con) | |||
1302 | 1302 | ||
1303 | 1303 | ||
1304 | static int read_partial_message_section(struct ceph_connection *con, | 1304 | static int read_partial_message_section(struct ceph_connection *con, |
1305 | struct kvec *section, unsigned int sec_len, | 1305 | struct kvec *section, |
1306 | u32 *crc) | 1306 | unsigned int sec_len, u32 *crc) |
1307 | { | 1307 | { |
1308 | int left; | 1308 | int left; |
1309 | int ret; | 1309 | int ret; |
@@ -1434,7 +1434,8 @@ static int read_partial_message(struct ceph_connection *con) | |||
1434 | 1434 | ||
1435 | /* middle */ | 1435 | /* middle */ |
1436 | if (m->middle) { | 1436 | if (m->middle) { |
1437 | ret = read_partial_message_section(con, &m->middle->vec, middle_len, | 1437 | ret = read_partial_message_section(con, &m->middle->vec, |
1438 | middle_len, | ||
1438 | &con->in_middle_crc); | 1439 | &con->in_middle_crc); |
1439 | if (ret <= 0) | 1440 | if (ret <= 0) |
1440 | return ret; | 1441 | return ret; |
@@ -1920,7 +1921,7 @@ out: | |||
1920 | /* | 1921 | /* |
1921 | * in case we faulted due to authentication, invalidate our | 1922 | * in case we faulted due to authentication, invalidate our |
1922 | * current tickets so that we can get new ones. | 1923 | * current tickets so that we can get new ones. |
1923 | */ | 1924 | */ |
1924 | if (con->auth_retry && con->ops->invalidate_authorizer) { | 1925 | if (con->auth_retry && con->ops->invalidate_authorizer) { |
1925 | dout("calling invalidate_authorizer()\n"); | 1926 | dout("calling invalidate_authorizer()\n"); |
1926 | con->ops->invalidate_authorizer(con); | 1927 | con->ops->invalidate_authorizer(con); |
diff --git a/fs/ceph/mon_client.c b/fs/ceph/mon_client.c index 54fe01c50706..b2a5a3e4a671 100644 --- a/fs/ceph/mon_client.c +++ b/fs/ceph/mon_client.c | |||
@@ -349,7 +349,7 @@ out: | |||
349 | } | 349 | } |
350 | 350 | ||
351 | /* | 351 | /* |
352 | * statfs | 352 | * generic requests (e.g., statfs, poolop) |
353 | */ | 353 | */ |
354 | static struct ceph_mon_generic_request *__lookup_generic_req( | 354 | static struct ceph_mon_generic_request *__lookup_generic_req( |
355 | struct ceph_mon_client *monc, u64 tid) | 355 | struct ceph_mon_client *monc, u64 tid) |
@@ -442,6 +442,35 @@ static struct ceph_msg *get_generic_reply(struct ceph_connection *con, | |||
442 | return m; | 442 | return m; |
443 | } | 443 | } |
444 | 444 | ||
445 | static int do_generic_request(struct ceph_mon_client *monc, | ||
446 | struct ceph_mon_generic_request *req) | ||
447 | { | ||
448 | int err; | ||
449 | |||
450 | /* register request */ | ||
451 | mutex_lock(&monc->mutex); | ||
452 | req->tid = ++monc->last_tid; | ||
453 | req->request->hdr.tid = cpu_to_le64(req->tid); | ||
454 | __insert_generic_request(monc, req); | ||
455 | monc->num_generic_requests++; | ||
456 | ceph_con_send(monc->con, ceph_msg_get(req->request)); | ||
457 | mutex_unlock(&monc->mutex); | ||
458 | |||
459 | err = wait_for_completion_interruptible(&req->completion); | ||
460 | |||
461 | mutex_lock(&monc->mutex); | ||
462 | rb_erase(&req->node, &monc->generic_request_tree); | ||
463 | monc->num_generic_requests--; | ||
464 | mutex_unlock(&monc->mutex); | ||
465 | |||
466 | if (!err) | ||
467 | err = req->result; | ||
468 | return err; | ||
469 | } | ||
470 | |||
471 | /* | ||
472 | * statfs | ||
473 | */ | ||
445 | static void handle_statfs_reply(struct ceph_mon_client *monc, | 474 | static void handle_statfs_reply(struct ceph_mon_client *monc, |
446 | struct ceph_msg *msg) | 475 | struct ceph_msg *msg) |
447 | { | 476 | { |
@@ -468,7 +497,7 @@ static void handle_statfs_reply(struct ceph_mon_client *monc, | |||
468 | return; | 497 | return; |
469 | 498 | ||
470 | bad: | 499 | bad: |
471 | pr_err("corrupt generic reply, no tid\n"); | 500 | pr_err("corrupt generic reply, tid %llu\n", tid); |
472 | ceph_msg_dump(msg); | 501 | ceph_msg_dump(msg); |
473 | } | 502 | } |
474 | 503 | ||
@@ -487,6 +516,7 @@ int ceph_monc_do_statfs(struct ceph_mon_client *monc, struct ceph_statfs *buf) | |||
487 | 516 | ||
488 | kref_init(&req->kref); | 517 | kref_init(&req->kref); |
489 | req->buf = buf; | 518 | req->buf = buf; |
519 | req->buf_len = sizeof(*buf); | ||
490 | init_completion(&req->completion); | 520 | init_completion(&req->completion); |
491 | 521 | ||
492 | err = -ENOMEM; | 522 | err = -ENOMEM; |
@@ -504,33 +534,134 @@ int ceph_monc_do_statfs(struct ceph_mon_client *monc, struct ceph_statfs *buf) | |||
504 | h->monhdr.session_mon_tid = 0; | 534 | h->monhdr.session_mon_tid = 0; |
505 | h->fsid = monc->monmap->fsid; | 535 | h->fsid = monc->monmap->fsid; |
506 | 536 | ||
507 | /* register request */ | 537 | err = do_generic_request(monc, req); |
508 | mutex_lock(&monc->mutex); | ||
509 | req->tid = ++monc->last_tid; | ||
510 | req->request->hdr.tid = cpu_to_le64(req->tid); | ||
511 | __insert_generic_request(monc, req); | ||
512 | monc->num_generic_requests++; | ||
513 | mutex_unlock(&monc->mutex); | ||
514 | 538 | ||
515 | /* send request and wait */ | 539 | out: |
516 | ceph_con_send(monc->con, ceph_msg_get(req->request)); | 540 | kref_put(&req->kref, release_generic_request); |
517 | err = wait_for_completion_interruptible(&req->completion); | 541 | return err; |
542 | } | ||
543 | |||
544 | /* | ||
545 | * pool ops | ||
546 | */ | ||
547 | static int get_poolop_reply_buf(const char *src, size_t src_len, | ||
548 | char *dst, size_t dst_len) | ||
549 | { | ||
550 | u32 buf_len; | ||
551 | |||
552 | if (src_len != sizeof(u32) + dst_len) | ||
553 | return -EINVAL; | ||
554 | |||
555 | buf_len = le32_to_cpu(*(u32 *)src); | ||
556 | if (buf_len != dst_len) | ||
557 | return -EINVAL; | ||
558 | |||
559 | memcpy(dst, src + sizeof(u32), dst_len); | ||
560 | return 0; | ||
561 | } | ||
562 | |||
563 | static void handle_poolop_reply(struct ceph_mon_client *monc, | ||
564 | struct ceph_msg *msg) | ||
565 | { | ||
566 | struct ceph_mon_generic_request *req; | ||
567 | struct ceph_mon_poolop_reply *reply = msg->front.iov_base; | ||
568 | u64 tid = le64_to_cpu(msg->hdr.tid); | ||
569 | |||
570 | if (msg->front.iov_len < sizeof(*reply)) | ||
571 | goto bad; | ||
572 | dout("handle_poolop_reply %p tid %llu\n", msg, tid); | ||
518 | 573 | ||
519 | mutex_lock(&monc->mutex); | 574 | mutex_lock(&monc->mutex); |
520 | rb_erase(&req->node, &monc->generic_request_tree); | 575 | req = __lookup_generic_req(monc, tid); |
521 | monc->num_generic_requests--; | 576 | if (req) { |
577 | if (req->buf_len && | ||
578 | get_poolop_reply_buf(msg->front.iov_base + sizeof(*reply), | ||
579 | msg->front.iov_len - sizeof(*reply), | ||
580 | req->buf, req->buf_len) < 0) { | ||
581 | mutex_unlock(&monc->mutex); | ||
582 | goto bad; | ||
583 | } | ||
584 | req->result = le32_to_cpu(reply->reply_code); | ||
585 | get_generic_request(req); | ||
586 | } | ||
522 | mutex_unlock(&monc->mutex); | 587 | mutex_unlock(&monc->mutex); |
588 | if (req) { | ||
589 | complete(&req->completion); | ||
590 | put_generic_request(req); | ||
591 | } | ||
592 | return; | ||
523 | 593 | ||
524 | if (!err) | 594 | bad: |
525 | err = req->result; | 595 | pr_err("corrupt generic reply, tid %llu\n", tid); |
596 | ceph_msg_dump(msg); | ||
597 | } | ||
598 | |||
599 | /* | ||
600 | * Do a synchronous pool op. | ||
601 | */ | ||
602 | int ceph_monc_do_poolop(struct ceph_mon_client *monc, u32 op, | ||
603 | u32 pool, u64 snapid, | ||
604 | char *buf, int len) | ||
605 | { | ||
606 | struct ceph_mon_generic_request *req; | ||
607 | struct ceph_mon_poolop *h; | ||
608 | int err; | ||
609 | |||
610 | req = kzalloc(sizeof(*req), GFP_NOFS); | ||
611 | if (!req) | ||
612 | return -ENOMEM; | ||
613 | |||
614 | kref_init(&req->kref); | ||
615 | req->buf = buf; | ||
616 | req->buf_len = len; | ||
617 | init_completion(&req->completion); | ||
618 | |||
619 | err = -ENOMEM; | ||
620 | req->request = ceph_msg_new(CEPH_MSG_POOLOP, sizeof(*h), GFP_NOFS); | ||
621 | if (!req->request) | ||
622 | goto out; | ||
623 | req->reply = ceph_msg_new(CEPH_MSG_POOLOP_REPLY, 1024, GFP_NOFS); | ||
624 | if (!req->reply) | ||
625 | goto out; | ||
626 | |||
627 | /* fill out request */ | ||
628 | req->request->hdr.version = cpu_to_le16(2); | ||
629 | h = req->request->front.iov_base; | ||
630 | h->monhdr.have_version = 0; | ||
631 | h->monhdr.session_mon = cpu_to_le16(-1); | ||
632 | h->monhdr.session_mon_tid = 0; | ||
633 | h->fsid = monc->monmap->fsid; | ||
634 | h->pool = cpu_to_le32(pool); | ||
635 | h->op = cpu_to_le32(op); | ||
636 | h->auid = 0; | ||
637 | h->snapid = cpu_to_le64(snapid); | ||
638 | h->name_len = 0; | ||
639 | |||
640 | err = do_generic_request(monc, req); | ||
526 | 641 | ||
527 | out: | 642 | out: |
528 | kref_put(&req->kref, release_generic_request); | 643 | kref_put(&req->kref, release_generic_request); |
529 | return err; | 644 | return err; |
530 | } | 645 | } |
531 | 646 | ||
647 | int ceph_monc_create_snapid(struct ceph_mon_client *monc, | ||
648 | u32 pool, u64 *snapid) | ||
649 | { | ||
650 | return ceph_monc_do_poolop(monc, POOL_OP_CREATE_UNMANAGED_SNAP, | ||
651 | pool, 0, (char *)snapid, sizeof(*snapid)); | ||
652 | |||
653 | } | ||
654 | |||
655 | int ceph_monc_delete_snapid(struct ceph_mon_client *monc, | ||
656 | u32 pool, u64 snapid) | ||
657 | { | ||
658 | return ceph_monc_do_poolop(monc, POOL_OP_CREATE_UNMANAGED_SNAP, | ||
659 | pool, snapid, 0, 0); | ||
660 | |||
661 | } | ||
662 | |||
532 | /* | 663 | /* |
533 | * Resend pending statfs requests. | 664 | * Resend pending generic requests. |
534 | */ | 665 | */ |
535 | static void __resend_generic_request(struct ceph_mon_client *monc) | 666 | static void __resend_generic_request(struct ceph_mon_client *monc) |
536 | { | 667 | { |
@@ -783,6 +914,10 @@ static void dispatch(struct ceph_connection *con, struct ceph_msg *msg) | |||
783 | handle_statfs_reply(monc, msg); | 914 | handle_statfs_reply(monc, msg); |
784 | break; | 915 | break; |
785 | 916 | ||
917 | case CEPH_MSG_POOLOP_REPLY: | ||
918 | handle_poolop_reply(monc, msg); | ||
919 | break; | ||
920 | |||
786 | case CEPH_MSG_MON_MAP: | 921 | case CEPH_MSG_MON_MAP: |
787 | ceph_monc_handle_map(monc, msg); | 922 | ceph_monc_handle_map(monc, msg); |
788 | break; | 923 | break; |
@@ -820,6 +955,7 @@ static struct ceph_msg *mon_alloc_msg(struct ceph_connection *con, | |||
820 | case CEPH_MSG_MON_SUBSCRIBE_ACK: | 955 | case CEPH_MSG_MON_SUBSCRIBE_ACK: |
821 | m = ceph_msg_get(monc->m_subscribe_ack); | 956 | m = ceph_msg_get(monc->m_subscribe_ack); |
822 | break; | 957 | break; |
958 | case CEPH_MSG_POOLOP_REPLY: | ||
823 | case CEPH_MSG_STATFS_REPLY: | 959 | case CEPH_MSG_STATFS_REPLY: |
824 | return get_generic_reply(con, hdr, skip); | 960 | return get_generic_reply(con, hdr, skip); |
825 | case CEPH_MSG_AUTH_REPLY: | 961 | case CEPH_MSG_AUTH_REPLY: |
diff --git a/fs/ceph/mon_client.h b/fs/ceph/mon_client.h index 174d794321d0..8e396f2c0963 100644 --- a/fs/ceph/mon_client.h +++ b/fs/ceph/mon_client.h | |||
@@ -50,6 +50,7 @@ struct ceph_mon_generic_request { | |||
50 | struct rb_node node; | 50 | struct rb_node node; |
51 | int result; | 51 | int result; |
52 | void *buf; | 52 | void *buf; |
53 | int buf_len; | ||
53 | struct completion completion; | 54 | struct completion completion; |
54 | struct ceph_msg *request; /* original request */ | 55 | struct ceph_msg *request; /* original request */ |
55 | struct ceph_msg *reply; /* and reply */ | 56 | struct ceph_msg *reply; /* and reply */ |
@@ -111,6 +112,10 @@ extern int ceph_monc_open_session(struct ceph_mon_client *monc); | |||
111 | 112 | ||
112 | extern int ceph_monc_validate_auth(struct ceph_mon_client *monc); | 113 | extern int ceph_monc_validate_auth(struct ceph_mon_client *monc); |
113 | 114 | ||
115 | extern int ceph_monc_create_snapid(struct ceph_mon_client *monc, | ||
116 | u32 pool, u64 *snapid); | ||
114 | 117 | ||
118 | extern int ceph_monc_delete_snapid(struct ceph_mon_client *monc, | ||
119 | u32 pool, u64 snapid); | ||
115 | 120 | ||
116 | #endif | 121 | #endif |
diff --git a/fs/ceph/msgr.h b/fs/ceph/msgr.h index 892a0298dfdf..680d3d648cac 100644 --- a/fs/ceph/msgr.h +++ b/fs/ceph/msgr.h | |||
@@ -1,5 +1,5 @@ | |||
1 | #ifndef __MSGR_H | 1 | #ifndef CEPH_MSGR_H |
2 | #define __MSGR_H | 2 | #define CEPH_MSGR_H |
3 | 3 | ||
4 | /* | 4 | /* |
5 | * Data types for message passing layer used by Ceph. | 5 | * Data types for message passing layer used by Ceph. |
diff --git a/fs/ceph/osd_client.c b/fs/ceph/osd_client.c index e38522347898..bed6391e52c7 100644 --- a/fs/ceph/osd_client.c +++ b/fs/ceph/osd_client.c | |||
@@ -1276,8 +1276,6 @@ int ceph_osdc_readpages(struct ceph_osd_client *osdc, | |||
1276 | 1276 | ||
1277 | /* it may be a short read due to an object boundary */ | 1277 | /* it may be a short read due to an object boundary */ |
1278 | req->r_pages = pages; | 1278 | req->r_pages = pages; |
1279 | num_pages = calc_pages_for(off, *plen); | ||
1280 | req->r_num_pages = num_pages; | ||
1281 | 1279 | ||
1282 | dout("readpages final extent is %llu~%llu (%d pages)\n", | 1280 | dout("readpages final extent is %llu~%llu (%d pages)\n", |
1283 | off, *plen, req->r_num_pages); | 1281 | off, *plen, req->r_num_pages); |
@@ -1319,7 +1317,6 @@ int ceph_osdc_writepages(struct ceph_osd_client *osdc, struct ceph_vino vino, | |||
1319 | 1317 | ||
1320 | /* it may be a short write due to an object boundary */ | 1318 | /* it may be a short write due to an object boundary */ |
1321 | req->r_pages = pages; | 1319 | req->r_pages = pages; |
1322 | req->r_num_pages = calc_pages_for(off, len); | ||
1323 | dout("writepages %llu~%llu (%d pages)\n", off, len, | 1320 | dout("writepages %llu~%llu (%d pages)\n", off, len, |
1324 | req->r_num_pages); | 1321 | req->r_num_pages); |
1325 | 1322 | ||
@@ -1476,8 +1473,8 @@ static void put_osd_con(struct ceph_connection *con) | |||
1476 | * authentication | 1473 | * authentication |
1477 | */ | 1474 | */ |
1478 | static int get_authorizer(struct ceph_connection *con, | 1475 | static int get_authorizer(struct ceph_connection *con, |
1479 | void **buf, int *len, int *proto, | 1476 | void **buf, int *len, int *proto, |
1480 | void **reply_buf, int *reply_len, int force_new) | 1477 | void **reply_buf, int *reply_len, int force_new) |
1481 | { | 1478 | { |
1482 | struct ceph_osd *o = con->private; | 1479 | struct ceph_osd *o = con->private; |
1483 | struct ceph_osd_client *osdc = o->o_osdc; | 1480 | struct ceph_osd_client *osdc = o->o_osdc; |
@@ -1497,7 +1494,7 @@ static int get_authorizer(struct ceph_connection *con, | |||
1497 | &o->o_authorizer_reply_buf, | 1494 | &o->o_authorizer_reply_buf, |
1498 | &o->o_authorizer_reply_buf_len); | 1495 | &o->o_authorizer_reply_buf_len); |
1499 | if (ret) | 1496 | if (ret) |
1500 | return ret; | 1497 | return ret; |
1501 | } | 1498 | } |
1502 | 1499 | ||
1503 | *proto = ac->protocol; | 1500 | *proto = ac->protocol; |
diff --git a/fs/ceph/osdmap.c b/fs/ceph/osdmap.c index 416d46adbf87..e31f118f1392 100644 --- a/fs/ceph/osdmap.c +++ b/fs/ceph/osdmap.c | |||
@@ -424,12 +424,30 @@ static void __remove_pg_pool(struct rb_root *root, struct ceph_pg_pool_info *pi) | |||
424 | kfree(pi); | 424 | kfree(pi); |
425 | } | 425 | } |
426 | 426 | ||
427 | void __decode_pool(void **p, struct ceph_pg_pool_info *pi) | 427 | static int __decode_pool(void **p, void *end, struct ceph_pg_pool_info *pi) |
428 | { | 428 | { |
429 | unsigned n, m; | ||
430 | |||
429 | ceph_decode_copy(p, &pi->v, sizeof(pi->v)); | 431 | ceph_decode_copy(p, &pi->v, sizeof(pi->v)); |
430 | calc_pg_masks(pi); | 432 | calc_pg_masks(pi); |
431 | *p += le32_to_cpu(pi->v.num_snaps) * sizeof(u64); | 433 | |
434 | /* num_snaps * snap_info_t */ | ||
435 | n = le32_to_cpu(pi->v.num_snaps); | ||
436 | while (n--) { | ||
437 | ceph_decode_need(p, end, sizeof(u64) + 1 + sizeof(u64) + | ||
438 | sizeof(struct ceph_timespec), bad); | ||
439 | *p += sizeof(u64) + /* key */ | ||
440 | 1 + sizeof(u64) + /* u8, snapid */ | ||
441 | sizeof(struct ceph_timespec); | ||
442 | m = ceph_decode_32(p); /* snap name */ | ||
443 | *p += m; | ||
444 | } | ||
445 | |||
432 | *p += le32_to_cpu(pi->v.num_removed_snap_intervals) * sizeof(u64) * 2; | 446 | *p += le32_to_cpu(pi->v.num_removed_snap_intervals) * sizeof(u64) * 2; |
447 | return 0; | ||
448 | |||
449 | bad: | ||
450 | return -EINVAL; | ||
433 | } | 451 | } |
434 | 452 | ||
435 | static int __decode_pool_names(void **p, void *end, struct ceph_osdmap *map) | 453 | static int __decode_pool_names(void **p, void *end, struct ceph_osdmap *map) |
@@ -571,7 +589,9 @@ struct ceph_osdmap *osdmap_decode(void **p, void *end) | |||
571 | kfree(pi); | 589 | kfree(pi); |
572 | goto bad; | 590 | goto bad; |
573 | } | 591 | } |
574 | __decode_pool(p, pi); | 592 | err = __decode_pool(p, end, pi); |
593 | if (err < 0) | ||
594 | goto bad; | ||
575 | __insert_pg_pool(&map->pg_pools, pi); | 595 | __insert_pg_pool(&map->pg_pools, pi); |
576 | } | 596 | } |
577 | 597 | ||
@@ -760,7 +780,9 @@ struct ceph_osdmap *osdmap_apply_incremental(void **p, void *end, | |||
760 | pi->id = pool; | 780 | pi->id = pool; |
761 | __insert_pg_pool(&map->pg_pools, pi); | 781 | __insert_pg_pool(&map->pg_pools, pi); |
762 | } | 782 | } |
763 | __decode_pool(p, pi); | 783 | err = __decode_pool(p, end, pi); |
784 | if (err < 0) | ||
785 | goto bad; | ||
764 | } | 786 | } |
765 | if (version >= 5 && __decode_pool_names(p, end, map) < 0) | 787 | if (version >= 5 && __decode_pool_names(p, end, map) < 0) |
766 | goto bad; | 788 | goto bad; |
@@ -833,7 +855,7 @@ struct ceph_osdmap *osdmap_apply_incremental(void **p, void *end, | |||
833 | node)->pgid, pgid) <= 0) { | 855 | node)->pgid, pgid) <= 0) { |
834 | struct ceph_pg_mapping *cur = | 856 | struct ceph_pg_mapping *cur = |
835 | rb_entry(rbp, struct ceph_pg_mapping, node); | 857 | rb_entry(rbp, struct ceph_pg_mapping, node); |
836 | 858 | ||
837 | rbp = rb_next(rbp); | 859 | rbp = rb_next(rbp); |
838 | dout(" removed pg_temp %llx\n", *(u64 *)&cur->pgid); | 860 | dout(" removed pg_temp %llx\n", *(u64 *)&cur->pgid); |
839 | rb_erase(&cur->node, &map->pg_temp); | 861 | rb_erase(&cur->node, &map->pg_temp); |
@@ -1026,8 +1048,9 @@ static int *calc_pg_raw(struct ceph_osdmap *osdmap, struct ceph_pg pgid, | |||
1026 | ruleno = crush_find_rule(osdmap->crush, pool->v.crush_ruleset, | 1048 | ruleno = crush_find_rule(osdmap->crush, pool->v.crush_ruleset, |
1027 | pool->v.type, pool->v.size); | 1049 | pool->v.type, pool->v.size); |
1028 | if (ruleno < 0) { | 1050 | if (ruleno < 0) { |
1029 | pr_err("no crush rule pool %d type %d size %d\n", | 1051 | pr_err("no crush rule pool %d ruleset %d type %d size %d\n", |
1030 | poolid, pool->v.type, pool->v.size); | 1052 | poolid, pool->v.crush_ruleset, pool->v.type, |
1053 | pool->v.size); | ||
1031 | return NULL; | 1054 | return NULL; |
1032 | } | 1055 | } |
1033 | 1056 | ||
diff --git a/fs/ceph/rados.h b/fs/ceph/rados.h index 8fcc023056c7..6d5247f2e81b 100644 --- a/fs/ceph/rados.h +++ b/fs/ceph/rados.h | |||
@@ -1,5 +1,5 @@ | |||
1 | #ifndef __RADOS_H | 1 | #ifndef CEPH_RADOS_H |
2 | #define __RADOS_H | 2 | #define CEPH_RADOS_H |
3 | 3 | ||
4 | /* | 4 | /* |
5 | * Data types for the Ceph distributed object storage layer RADOS | 5 | * Data types for the Ceph distributed object storage layer RADOS |
@@ -203,6 +203,7 @@ enum { | |||
203 | CEPH_OSD_OP_TMAPGET = CEPH_OSD_OP_MODE_RD | CEPH_OSD_OP_TYPE_DATA | 12, | 203 | CEPH_OSD_OP_TMAPGET = CEPH_OSD_OP_MODE_RD | CEPH_OSD_OP_TYPE_DATA | 12, |
204 | 204 | ||
205 | CEPH_OSD_OP_CREATE = CEPH_OSD_OP_MODE_WR | CEPH_OSD_OP_TYPE_DATA | 13, | 205 | CEPH_OSD_OP_CREATE = CEPH_OSD_OP_MODE_WR | CEPH_OSD_OP_TYPE_DATA | 13, |
206 | CEPH_OSD_OP_ROLLBACK= CEPH_OSD_OP_MODE_WR | CEPH_OSD_OP_TYPE_DATA | 14, | ||
206 | 207 | ||
207 | /** attrs **/ | 208 | /** attrs **/ |
208 | /* read */ | 209 | /* read */ |
@@ -272,6 +273,10 @@ static inline int ceph_osd_op_mode_modify(int op) | |||
272 | return (op & CEPH_OSD_OP_MODE) == CEPH_OSD_OP_MODE_WR; | 273 | return (op & CEPH_OSD_OP_MODE) == CEPH_OSD_OP_MODE_WR; |
273 | } | 274 | } |
274 | 275 | ||
276 | /* | ||
277 | * note that the following tmap stuff is also defined in the ceph librados.h | ||
278 | * any modification here needs to be updated there | ||
279 | */ | ||
275 | #define CEPH_OSD_TMAP_HDR 'h' | 280 | #define CEPH_OSD_TMAP_HDR 'h' |
276 | #define CEPH_OSD_TMAP_SET 's' | 281 | #define CEPH_OSD_TMAP_SET 's' |
277 | #define CEPH_OSD_TMAP_RM 'r' | 282 | #define CEPH_OSD_TMAP_RM 'r' |
@@ -297,6 +302,7 @@ enum { | |||
297 | CEPH_OSD_FLAG_PARALLELEXEC = 512, /* execute op in parallel */ | 302 | CEPH_OSD_FLAG_PARALLELEXEC = 512, /* execute op in parallel */ |
298 | CEPH_OSD_FLAG_PGOP = 1024, /* pg op, no object */ | 303 | CEPH_OSD_FLAG_PGOP = 1024, /* pg op, no object */ |
299 | CEPH_OSD_FLAG_EXEC = 2048, /* op may exec */ | 304 | CEPH_OSD_FLAG_EXEC = 2048, /* op may exec */ |
305 | CEPH_OSD_FLAG_EXEC_PUBLIC = 4096, /* op may exec (public) */ | ||
300 | }; | 306 | }; |
301 | 307 | ||
302 | enum { | 308 | enum { |
@@ -350,6 +356,9 @@ struct ceph_osd_op { | |||
350 | struct { | 356 | struct { |
351 | __le64 cookie, count; | 357 | __le64 cookie, count; |
352 | } __attribute__ ((packed)) pgls; | 358 | } __attribute__ ((packed)) pgls; |
359 | struct { | ||
360 | __le64 snapid; | ||
361 | } __attribute__ ((packed)) snap; | ||
353 | }; | 362 | }; |
354 | __le32 payload_len; | 363 | __le32 payload_len; |
355 | } __attribute__ ((packed)); | 364 | } __attribute__ ((packed)); |
diff --git a/fs/ceph/super.c b/fs/ceph/super.c index fa87f51e38e1..9922628532b2 100644 --- a/fs/ceph/super.c +++ b/fs/ceph/super.c | |||
@@ -2,6 +2,7 @@ | |||
2 | #include "ceph_debug.h" | 2 | #include "ceph_debug.h" |
3 | 3 | ||
4 | #include <linux/backing-dev.h> | 4 | #include <linux/backing-dev.h> |
5 | #include <linux/ctype.h> | ||
5 | #include <linux/fs.h> | 6 | #include <linux/fs.h> |
6 | #include <linux/inet.h> | 7 | #include <linux/inet.h> |
7 | #include <linux/in6.h> | 8 | #include <linux/in6.h> |
@@ -101,12 +102,21 @@ static int ceph_statfs(struct dentry *dentry, struct kstatfs *buf) | |||
101 | } | 102 | } |
102 | 103 | ||
103 | 104 | ||
104 | static int ceph_syncfs(struct super_block *sb, int wait) | 105 | static int ceph_sync_fs(struct super_block *sb, int wait) |
105 | { | 106 | { |
106 | dout("sync_fs %d\n", wait); | 107 | struct ceph_client *client = ceph_sb_to_client(sb); |
108 | |||
109 | if (!wait) { | ||
110 | dout("sync_fs (non-blocking)\n"); | ||
111 | ceph_flush_dirty_caps(&client->mdsc); | ||
112 | dout("sync_fs (non-blocking) done\n"); | ||
113 | return 0; | ||
114 | } | ||
115 | |||
116 | dout("sync_fs (blocking)\n"); | ||
107 | ceph_osdc_sync(&ceph_sb_to_client(sb)->osdc); | 117 | ceph_osdc_sync(&ceph_sb_to_client(sb)->osdc); |
108 | ceph_mdsc_sync(&ceph_sb_to_client(sb)->mdsc); | 118 | ceph_mdsc_sync(&ceph_sb_to_client(sb)->mdsc); |
109 | dout("sync_fs %d done\n", wait); | 119 | dout("sync_fs (blocking) done\n"); |
110 | return 0; | 120 | return 0; |
111 | } | 121 | } |
112 | 122 | ||
@@ -150,9 +160,7 @@ static int ceph_show_options(struct seq_file *m, struct vfsmount *mnt) | |||
150 | struct ceph_mount_args *args = client->mount_args; | 160 | struct ceph_mount_args *args = client->mount_args; |
151 | 161 | ||
152 | if (args->flags & CEPH_OPT_FSID) | 162 | if (args->flags & CEPH_OPT_FSID) |
153 | seq_printf(m, ",fsidmajor=%llu,fsidminor%llu", | 163 | seq_printf(m, ",fsid=%pU", &args->fsid); |
154 | le64_to_cpu(*(__le64 *)&args->fsid.fsid[0]), | ||
155 | le64_to_cpu(*(__le64 *)&args->fsid.fsid[8])); | ||
156 | if (args->flags & CEPH_OPT_NOSHARE) | 164 | if (args->flags & CEPH_OPT_NOSHARE) |
157 | seq_puts(m, ",noshare"); | 165 | seq_puts(m, ",noshare"); |
158 | if (args->flags & CEPH_OPT_DIRSTAT) | 166 | if (args->flags & CEPH_OPT_DIRSTAT) |
@@ -279,7 +287,7 @@ static const struct super_operations ceph_super_ops = { | |||
279 | .alloc_inode = ceph_alloc_inode, | 287 | .alloc_inode = ceph_alloc_inode, |
280 | .destroy_inode = ceph_destroy_inode, | 288 | .destroy_inode = ceph_destroy_inode, |
281 | .write_inode = ceph_write_inode, | 289 | .write_inode = ceph_write_inode, |
282 | .sync_fs = ceph_syncfs, | 290 | .sync_fs = ceph_sync_fs, |
283 | .put_super = ceph_put_super, | 291 | .put_super = ceph_put_super, |
284 | .show_options = ceph_show_options, | 292 | .show_options = ceph_show_options, |
285 | .statfs = ceph_statfs, | 293 | .statfs = ceph_statfs, |
@@ -322,9 +330,6 @@ const char *ceph_msg_type_name(int type) | |||
322 | * mount options | 330 | * mount options |
323 | */ | 331 | */ |
324 | enum { | 332 | enum { |
325 | Opt_fsidmajor, | ||
326 | Opt_fsidminor, | ||
327 | Opt_monport, | ||
328 | Opt_wsize, | 333 | Opt_wsize, |
329 | Opt_rsize, | 334 | Opt_rsize, |
330 | Opt_osdtimeout, | 335 | Opt_osdtimeout, |
@@ -339,6 +344,7 @@ enum { | |||
339 | Opt_congestion_kb, | 344 | Opt_congestion_kb, |
340 | Opt_last_int, | 345 | Opt_last_int, |
341 | /* int args above */ | 346 | /* int args above */ |
347 | Opt_fsid, | ||
342 | Opt_snapdirname, | 348 | Opt_snapdirname, |
343 | Opt_name, | 349 | Opt_name, |
344 | Opt_secret, | 350 | Opt_secret, |
@@ -355,9 +361,6 @@ enum { | |||
355 | }; | 361 | }; |
356 | 362 | ||
357 | static match_table_t arg_tokens = { | 363 | static match_table_t arg_tokens = { |
358 | {Opt_fsidmajor, "fsidmajor=%ld"}, | ||
359 | {Opt_fsidminor, "fsidminor=%ld"}, | ||
360 | {Opt_monport, "monport=%d"}, | ||
361 | {Opt_wsize, "wsize=%d"}, | 364 | {Opt_wsize, "wsize=%d"}, |
362 | {Opt_rsize, "rsize=%d"}, | 365 | {Opt_rsize, "rsize=%d"}, |
363 | {Opt_osdtimeout, "osdtimeout=%d"}, | 366 | {Opt_osdtimeout, "osdtimeout=%d"}, |
@@ -371,6 +374,7 @@ static match_table_t arg_tokens = { | |||
371 | {Opt_readdir_max_bytes, "readdir_max_bytes=%d"}, | 374 | {Opt_readdir_max_bytes, "readdir_max_bytes=%d"}, |
372 | {Opt_congestion_kb, "write_congestion_kb=%d"}, | 375 | {Opt_congestion_kb, "write_congestion_kb=%d"}, |
373 | /* int args above */ | 376 | /* int args above */ |
377 | {Opt_fsid, "fsid=%s"}, | ||
374 | {Opt_snapdirname, "snapdirname=%s"}, | 378 | {Opt_snapdirname, "snapdirname=%s"}, |
375 | {Opt_name, "name=%s"}, | 379 | {Opt_name, "name=%s"}, |
376 | {Opt_secret, "secret=%s"}, | 380 | {Opt_secret, "secret=%s"}, |
@@ -386,6 +390,36 @@ static match_table_t arg_tokens = { | |||
386 | {-1, NULL} | 390 | {-1, NULL} |
387 | }; | 391 | }; |
388 | 392 | ||
393 | static int parse_fsid(const char *str, struct ceph_fsid *fsid) | ||
394 | { | ||
395 | int i = 0; | ||
396 | char tmp[3]; | ||
397 | int err = -EINVAL; | ||
398 | int d; | ||
399 | |||
400 | dout("parse_fsid '%s'\n", str); | ||
401 | tmp[2] = 0; | ||
402 | while (*str && i < 16) { | ||
403 | if (ispunct(*str)) { | ||
404 | str++; | ||
405 | continue; | ||
406 | } | ||
407 | if (!isxdigit(str[0]) || !isxdigit(str[1])) | ||
408 | break; | ||
409 | tmp[0] = str[0]; | ||
410 | tmp[1] = str[1]; | ||
411 | if (sscanf(tmp, "%x", &d) < 1) | ||
412 | break; | ||
413 | fsid->fsid[i] = d & 0xff; | ||
414 | i++; | ||
415 | str += 2; | ||
416 | } | ||
417 | |||
418 | if (i == 16) | ||
419 | err = 0; | ||
420 | dout("parse_fsid ret %d got fsid %pU", err, fsid); | ||
421 | return err; | ||
422 | } | ||
389 | 423 | ||
390 | static struct ceph_mount_args *parse_mount_args(int flags, char *options, | 424 | static struct ceph_mount_args *parse_mount_args(int flags, char *options, |
391 | const char *dev_name, | 425 | const char *dev_name, |
@@ -469,12 +503,6 @@ static struct ceph_mount_args *parse_mount_args(int flags, char *options, | |||
469 | dout("got token %d\n", token); | 503 | dout("got token %d\n", token); |
470 | } | 504 | } |
471 | switch (token) { | 505 | switch (token) { |
472 | case Opt_fsidmajor: | ||
473 | *(__le64 *)&args->fsid.fsid[0] = cpu_to_le64(intval); | ||
474 | break; | ||
475 | case Opt_fsidminor: | ||
476 | *(__le64 *)&args->fsid.fsid[8] = cpu_to_le64(intval); | ||
477 | break; | ||
478 | case Opt_ip: | 506 | case Opt_ip: |
479 | err = ceph_parse_ips(argstr[0].from, | 507 | err = ceph_parse_ips(argstr[0].from, |
480 | argstr[0].to, | 508 | argstr[0].to, |
@@ -485,6 +513,11 @@ static struct ceph_mount_args *parse_mount_args(int flags, char *options, | |||
485 | args->flags |= CEPH_OPT_MYIP; | 513 | args->flags |= CEPH_OPT_MYIP; |
486 | break; | 514 | break; |
487 | 515 | ||
516 | case Opt_fsid: | ||
517 | err = parse_fsid(argstr[0].from, &args->fsid); | ||
518 | if (err == 0) | ||
519 | args->flags |= CEPH_OPT_FSID; | ||
520 | break; | ||
488 | case Opt_snapdirname: | 521 | case Opt_snapdirname: |
489 | kfree(args->snapdir_name); | 522 | kfree(args->snapdir_name); |
490 | args->snapdir_name = kstrndup(argstr[0].from, | 523 | args->snapdir_name = kstrndup(argstr[0].from, |
@@ -515,6 +548,9 @@ static struct ceph_mount_args *parse_mount_args(int flags, char *options, | |||
515 | case Opt_osdkeepalivetimeout: | 548 | case Opt_osdkeepalivetimeout: |
516 | args->osd_keepalive_timeout = intval; | 549 | args->osd_keepalive_timeout = intval; |
517 | break; | 550 | break; |
551 | case Opt_osd_idle_ttl: | ||
552 | args->osd_idle_ttl = intval; | ||
553 | break; | ||
518 | case Opt_mount_timeout: | 554 | case Opt_mount_timeout: |
519 | args->mount_timeout = intval; | 555 | args->mount_timeout = intval; |
520 | break; | 556 | break; |
@@ -630,7 +666,6 @@ static struct ceph_client *ceph_create_client(struct ceph_mount_args *args) | |||
630 | 666 | ||
631 | /* caps */ | 667 | /* caps */ |
632 | client->min_caps = args->max_readdir; | 668 | client->min_caps = args->max_readdir; |
633 | ceph_adjust_min_caps(client->min_caps); | ||
634 | 669 | ||
635 | /* subsystems */ | 670 | /* subsystems */ |
636 | err = ceph_monc_init(&client->monc, client); | 671 | err = ceph_monc_init(&client->monc, client); |
@@ -680,8 +715,6 @@ static void ceph_destroy_client(struct ceph_client *client) | |||
680 | 715 | ||
681 | ceph_monc_stop(&client->monc); | 716 | ceph_monc_stop(&client->monc); |
682 | 717 | ||
683 | ceph_adjust_min_caps(-client->min_caps); | ||
684 | |||
685 | ceph_debugfs_client_cleanup(client); | 718 | ceph_debugfs_client_cleanup(client); |
686 | destroy_workqueue(client->wb_wq); | 719 | destroy_workqueue(client->wb_wq); |
687 | destroy_workqueue(client->pg_inv_wq); | 720 | destroy_workqueue(client->pg_inv_wq); |
@@ -706,13 +739,13 @@ int ceph_check_fsid(struct ceph_client *client, struct ceph_fsid *fsid) | |||
706 | { | 739 | { |
707 | if (client->have_fsid) { | 740 | if (client->have_fsid) { |
708 | if (ceph_fsid_compare(&client->fsid, fsid)) { | 741 | if (ceph_fsid_compare(&client->fsid, fsid)) { |
709 | pr_err("bad fsid, had " FSID_FORMAT " got " FSID_FORMAT, | 742 | pr_err("bad fsid, had %pU got %pU", |
710 | PR_FSID(&client->fsid), PR_FSID(fsid)); | 743 | &client->fsid, fsid); |
711 | return -1; | 744 | return -1; |
712 | } | 745 | } |
713 | } else { | 746 | } else { |
714 | pr_info("client%lld fsid " FSID_FORMAT "\n", | 747 | pr_info("client%lld fsid %pU\n", client->monc.auth->global_id, |
715 | client->monc.auth->global_id, PR_FSID(fsid)); | 748 | fsid); |
716 | memcpy(&client->fsid, fsid, sizeof(*fsid)); | 749 | memcpy(&client->fsid, fsid, sizeof(*fsid)); |
717 | ceph_debugfs_client_init(client); | 750 | ceph_debugfs_client_init(client); |
718 | client->have_fsid = true; | 751 | client->have_fsid = true; |
@@ -1043,8 +1076,6 @@ static int __init init_ceph(void) | |||
1043 | if (ret) | 1076 | if (ret) |
1044 | goto out_msgr; | 1077 | goto out_msgr; |
1045 | 1078 | ||
1046 | ceph_caps_init(); | ||
1047 | |||
1048 | ret = register_filesystem(&ceph_fs_type); | 1079 | ret = register_filesystem(&ceph_fs_type); |
1049 | if (ret) | 1080 | if (ret) |
1050 | goto out_icache; | 1081 | goto out_icache; |
@@ -1069,7 +1100,6 @@ static void __exit exit_ceph(void) | |||
1069 | { | 1100 | { |
1070 | dout("exit_ceph\n"); | 1101 | dout("exit_ceph\n"); |
1071 | unregister_filesystem(&ceph_fs_type); | 1102 | unregister_filesystem(&ceph_fs_type); |
1072 | ceph_caps_finalize(); | ||
1073 | destroy_caches(); | 1103 | destroy_caches(); |
1074 | ceph_msgr_exit(); | 1104 | ceph_msgr_exit(); |
1075 | ceph_debugfs_cleanup(); | 1105 | ceph_debugfs_cleanup(); |
diff --git a/fs/ceph/super.h b/fs/ceph/super.h index 10a4a406e887..2482d696f0de 100644 --- a/fs/ceph/super.h +++ b/fs/ceph/super.h | |||
@@ -31,6 +31,12 @@ | |||
31 | #define CEPH_BLOCK (1 << CEPH_BLOCK_SHIFT) | 31 | #define CEPH_BLOCK (1 << CEPH_BLOCK_SHIFT) |
32 | 32 | ||
33 | /* | 33 | /* |
34 | * Supported features | ||
35 | */ | ||
36 | #define CEPH_FEATURE_SUPPORTED CEPH_FEATURE_NOSRCADDR | CEPH_FEATURE_FLOCK | ||
37 | #define CEPH_FEATURE_REQUIRED CEPH_FEATURE_NOSRCADDR | ||
38 | |||
39 | /* | ||
34 | * mount options | 40 | * mount options |
35 | */ | 41 | */ |
36 | #define CEPH_OPT_FSID (1<<0) | 42 | #define CEPH_OPT_FSID (1<<0) |
@@ -560,11 +566,13 @@ static inline int __ceph_caps_wanted(struct ceph_inode_info *ci) | |||
560 | /* what the mds thinks we want */ | 566 | /* what the mds thinks we want */ |
561 | extern int __ceph_caps_mds_wanted(struct ceph_inode_info *ci); | 567 | extern int __ceph_caps_mds_wanted(struct ceph_inode_info *ci); |
562 | 568 | ||
563 | extern void ceph_caps_init(void); | 569 | extern void ceph_caps_init(struct ceph_mds_client *mdsc); |
564 | extern void ceph_caps_finalize(void); | 570 | extern void ceph_caps_finalize(struct ceph_mds_client *mdsc); |
565 | extern void ceph_adjust_min_caps(int delta); | 571 | extern void ceph_adjust_min_caps(struct ceph_mds_client *mdsc, int delta); |
566 | extern int ceph_reserve_caps(struct ceph_cap_reservation *ctx, int need); | 572 | extern int ceph_reserve_caps(struct ceph_mds_client *mdsc, |
567 | extern int ceph_unreserve_caps(struct ceph_cap_reservation *ctx); | 573 | struct ceph_cap_reservation *ctx, int need); |
574 | extern int ceph_unreserve_caps(struct ceph_mds_client *mdsc, | ||
575 | struct ceph_cap_reservation *ctx); | ||
568 | extern void ceph_reservation_status(struct ceph_client *client, | 576 | extern void ceph_reservation_status(struct ceph_client *client, |
569 | int *total, int *avail, int *used, | 577 | int *total, int *avail, int *used, |
570 | int *reserved, int *min); | 578 | int *reserved, int *min); |
@@ -738,13 +746,6 @@ extern struct kmem_cache *ceph_file_cachep; | |||
738 | extern const char *ceph_msg_type_name(int type); | 746 | extern const char *ceph_msg_type_name(int type); |
739 | extern int ceph_check_fsid(struct ceph_client *client, struct ceph_fsid *fsid); | 747 | extern int ceph_check_fsid(struct ceph_client *client, struct ceph_fsid *fsid); |
740 | 748 | ||
741 | #define FSID_FORMAT "%02x%02x%02x%02x-%02x%02x-%02x%02x-%02x%02x-" \ | ||
742 | "%02x%02x%02x%02x%02x%02x" | ||
743 | #define PR_FSID(f) (f)->fsid[0], (f)->fsid[1], (f)->fsid[2], (f)->fsid[3], \ | ||
744 | (f)->fsid[4], (f)->fsid[5], (f)->fsid[6], (f)->fsid[7], \ | ||
745 | (f)->fsid[8], (f)->fsid[9], (f)->fsid[10], (f)->fsid[11], \ | ||
746 | (f)->fsid[12], (f)->fsid[13], (f)->fsid[14], (f)->fsid[15] | ||
747 | |||
748 | /* inode.c */ | 749 | /* inode.c */ |
749 | extern const struct inode_operations ceph_file_iops; | 750 | extern const struct inode_operations ceph_file_iops; |
750 | 751 | ||
@@ -806,13 +807,16 @@ static inline void ceph_remove_cap(struct ceph_cap *cap) | |||
806 | __ceph_remove_cap(cap); | 807 | __ceph_remove_cap(cap); |
807 | spin_unlock(&inode->i_lock); | 808 | spin_unlock(&inode->i_lock); |
808 | } | 809 | } |
809 | extern void ceph_put_cap(struct ceph_cap *cap); | 810 | extern void ceph_put_cap(struct ceph_mds_client *mdsc, |
811 | struct ceph_cap *cap); | ||
810 | 812 | ||
811 | extern void ceph_queue_caps_release(struct inode *inode); | 813 | extern void ceph_queue_caps_release(struct inode *inode); |
812 | extern int ceph_write_inode(struct inode *inode, struct writeback_control *wbc); | 814 | extern int ceph_write_inode(struct inode *inode, struct writeback_control *wbc); |
813 | extern int ceph_fsync(struct file *file, int datasync); | 815 | extern int ceph_fsync(struct file *file, int datasync); |
814 | extern void ceph_kick_flushing_caps(struct ceph_mds_client *mdsc, | 816 | extern void ceph_kick_flushing_caps(struct ceph_mds_client *mdsc, |
815 | struct ceph_mds_session *session); | 817 | struct ceph_mds_session *session); |
818 | extern struct ceph_cap *ceph_get_cap_for_mds(struct ceph_inode_info *ci, | ||
819 | int mds); | ||
816 | extern int ceph_get_cap_mds(struct inode *inode); | 820 | extern int ceph_get_cap_mds(struct inode *inode); |
817 | extern void ceph_get_cap_refs(struct ceph_inode_info *ci, int caps); | 821 | extern void ceph_get_cap_refs(struct ceph_inode_info *ci, int caps); |
818 | extern void ceph_put_cap_refs(struct ceph_inode_info *ci, int had); | 822 | extern void ceph_put_cap_refs(struct ceph_inode_info *ci, int had); |
@@ -857,7 +861,7 @@ extern void ceph_release_page_vector(struct page **pages, int num_pages); | |||
857 | /* dir.c */ | 861 | /* dir.c */ |
858 | extern const struct file_operations ceph_dir_fops; | 862 | extern const struct file_operations ceph_dir_fops; |
859 | extern const struct inode_operations ceph_dir_iops; | 863 | extern const struct inode_operations ceph_dir_iops; |
860 | extern struct dentry_operations ceph_dentry_ops, ceph_snap_dentry_ops, | 864 | extern const struct dentry_operations ceph_dentry_ops, ceph_snap_dentry_ops, |
861 | ceph_snapdir_dentry_ops; | 865 | ceph_snapdir_dentry_ops; |
862 | 866 | ||
863 | extern int ceph_handle_notrace_create(struct inode *dir, struct dentry *dentry); | 867 | extern int ceph_handle_notrace_create(struct inode *dir, struct dentry *dentry); |
@@ -888,6 +892,14 @@ extern void ceph_debugfs_cleanup(void); | |||
888 | extern int ceph_debugfs_client_init(struct ceph_client *client); | 892 | extern int ceph_debugfs_client_init(struct ceph_client *client); |
889 | extern void ceph_debugfs_client_cleanup(struct ceph_client *client); | 893 | extern void ceph_debugfs_client_cleanup(struct ceph_client *client); |
890 | 894 | ||
895 | /* locks.c */ | ||
896 | extern int ceph_lock(struct file *file, int cmd, struct file_lock *fl); | ||
897 | extern int ceph_flock(struct file *file, int cmd, struct file_lock *fl); | ||
898 | extern void ceph_count_locks(struct inode *inode, int *p_num, int *f_num); | ||
899 | extern int ceph_encode_locks(struct inode *i, struct ceph_pagelist *p, | ||
900 | int p_locks, int f_locks); | ||
901 | extern int lock_to_ceph_filelock(struct file_lock *fl, struct ceph_filelock *c); | ||
902 | |||
891 | static inline struct inode *get_dentry_parent_inode(struct dentry *dentry) | 903 | static inline struct inode *get_dentry_parent_inode(struct dentry *dentry) |
892 | { | 904 | { |
893 | if (dentry && dentry->d_parent) | 905 | if (dentry && dentry->d_parent) |
diff --git a/fs/ceph/xattr.c b/fs/ceph/xattr.c index 68aeebc69681..097a2654c00f 100644 --- a/fs/ceph/xattr.c +++ b/fs/ceph/xattr.c | |||
@@ -337,6 +337,8 @@ void __ceph_destroy_xattrs(struct ceph_inode_info *ci) | |||
337 | } | 337 | } |
338 | 338 | ||
339 | static int __build_xattrs(struct inode *inode) | 339 | static int __build_xattrs(struct inode *inode) |
340 | __releases(inode->i_lock) | ||
341 | __acquires(inode->i_lock) | ||
340 | { | 342 | { |
341 | u32 namelen; | 343 | u32 namelen; |
342 | u32 numattr = 0; | 344 | u32 numattr = 0; |
diff --git a/fs/cifs/README b/fs/cifs/README index a7081eeeb85d..7099a526f775 100644 --- a/fs/cifs/README +++ b/fs/cifs/README | |||
@@ -301,6 +301,16 @@ A partial list of the supported mount options follows: | |||
301 | gid Set the default gid for inodes (similar to above). | 301 | gid Set the default gid for inodes (similar to above). |
302 | file_mode If CIFS Unix extensions are not supported by the server | 302 | file_mode If CIFS Unix extensions are not supported by the server |
303 | this overrides the default mode for file inodes. | 303 | this overrides the default mode for file inodes. |
304 | fsc Enable local disk caching using FS-Cache (off by default). This | ||
305 | option could be useful to improve performance on a slow link, | ||
306 | heavily loaded server and/or network where reading from the | ||
307 | disk is faster than reading from the server (over the network). | ||
308 | This could also impact scalability positively as the | ||
309 | number of calls to the server are reduced. However, local | ||
310 | caching is not suitable for all workloads for e.g. read-once | ||
311 | type workloads. So, you need to consider carefully your | ||
312 | workload/scenario before using this option. Currently, local | ||
313 | disk caching is functional for CIFS files opened as read-only. | ||
304 | dir_mode If CIFS Unix extensions are not supported by the server | 314 | dir_mode If CIFS Unix extensions are not supported by the server |
305 | this overrides the default mode for directory inodes. | 315 | this overrides the default mode for directory inodes. |
306 | port attempt to contact the server on this tcp port, before | 316 | port attempt to contact the server on this tcp port, before |
diff --git a/fs/dcache.c b/fs/dcache.c index 9f2c13417969..166d35d56868 100644 --- a/fs/dcache.c +++ b/fs/dcache.c | |||
@@ -1905,48 +1905,30 @@ static int prepend_name(char **buffer, int *buflen, struct qstr *name) | |||
1905 | } | 1905 | } |
1906 | 1906 | ||
1907 | /** | 1907 | /** |
1908 | * __d_path - return the path of a dentry | 1908 | * Prepend path string to a buffer |
1909 | * | ||
1909 | * @path: the dentry/vfsmount to report | 1910 | * @path: the dentry/vfsmount to report |
1910 | * @root: root vfsmnt/dentry (may be modified by this function) | 1911 | * @root: root vfsmnt/dentry (may be modified by this function) |
1911 | * @buffer: buffer to return value in | 1912 | * @buffer: pointer to the end of the buffer |
1912 | * @buflen: buffer length | 1913 | * @buflen: pointer to buffer length |
1913 | * | 1914 | * |
1914 | * Convert a dentry into an ASCII path name. If the entry has been deleted | 1915 | * Caller holds the dcache_lock. |
1915 | * the string " (deleted)" is appended. Note that this is ambiguous. | ||
1916 | * | ||
1917 | * Returns a pointer into the buffer or an error code if the | ||
1918 | * path was too long. | ||
1919 | * | ||
1920 | * "buflen" should be positive. Caller holds the dcache_lock. | ||
1921 | * | 1916 | * |
1922 | * If path is not reachable from the supplied root, then the value of | 1917 | * If path is not reachable from the supplied root, then the value of |
1923 | * root is changed (without modifying refcounts). | 1918 | * root is changed (without modifying refcounts). |
1924 | */ | 1919 | */ |
1925 | char *__d_path(const struct path *path, struct path *root, | 1920 | static int prepend_path(const struct path *path, struct path *root, |
1926 | char *buffer, int buflen) | 1921 | char **buffer, int *buflen) |
1927 | { | 1922 | { |
1928 | struct dentry *dentry = path->dentry; | 1923 | struct dentry *dentry = path->dentry; |
1929 | struct vfsmount *vfsmnt = path->mnt; | 1924 | struct vfsmount *vfsmnt = path->mnt; |
1930 | char *end = buffer + buflen; | 1925 | bool slash = false; |
1931 | char *retval; | 1926 | int error = 0; |
1932 | 1927 | ||
1933 | spin_lock(&vfsmount_lock); | 1928 | spin_lock(&vfsmount_lock); |
1934 | prepend(&end, &buflen, "\0", 1); | 1929 | while (dentry != root->dentry || vfsmnt != root->mnt) { |
1935 | if (d_unlinked(dentry) && | ||
1936 | (prepend(&end, &buflen, " (deleted)", 10) != 0)) | ||
1937 | goto Elong; | ||
1938 | |||
1939 | if (buflen < 1) | ||
1940 | goto Elong; | ||
1941 | /* Get '/' right */ | ||
1942 | retval = end-1; | ||
1943 | *retval = '/'; | ||
1944 | |||
1945 | for (;;) { | ||
1946 | struct dentry * parent; | 1930 | struct dentry * parent; |
1947 | 1931 | ||
1948 | if (dentry == root->dentry && vfsmnt == root->mnt) | ||
1949 | break; | ||
1950 | if (dentry == vfsmnt->mnt_root || IS_ROOT(dentry)) { | 1932 | if (dentry == vfsmnt->mnt_root || IS_ROOT(dentry)) { |
1951 | /* Global root? */ | 1933 | /* Global root? */ |
1952 | if (vfsmnt->mnt_parent == vfsmnt) { | 1934 | if (vfsmnt->mnt_parent == vfsmnt) { |
@@ -1958,28 +1940,88 @@ char *__d_path(const struct path *path, struct path *root, | |||
1958 | } | 1940 | } |
1959 | parent = dentry->d_parent; | 1941 | parent = dentry->d_parent; |
1960 | prefetch(parent); | 1942 | prefetch(parent); |
1961 | if ((prepend_name(&end, &buflen, &dentry->d_name) != 0) || | 1943 | error = prepend_name(buffer, buflen, &dentry->d_name); |
1962 | (prepend(&end, &buflen, "/", 1) != 0)) | 1944 | if (!error) |
1963 | goto Elong; | 1945 | error = prepend(buffer, buflen, "/", 1); |
1964 | retval = end; | 1946 | if (error) |
1947 | break; | ||
1948 | |||
1949 | slash = true; | ||
1965 | dentry = parent; | 1950 | dentry = parent; |
1966 | } | 1951 | } |
1967 | 1952 | ||
1968 | out: | 1953 | out: |
1954 | if (!error && !slash) | ||
1955 | error = prepend(buffer, buflen, "/", 1); | ||
1956 | |||
1969 | spin_unlock(&vfsmount_lock); | 1957 | spin_unlock(&vfsmount_lock); |
1970 | return retval; | 1958 | return error; |
1971 | 1959 | ||
1972 | global_root: | 1960 | global_root: |
1973 | retval += 1; /* hit the slash */ | 1961 | /* |
1974 | if (prepend_name(&retval, &buflen, &dentry->d_name) != 0) | 1962 | * Filesystems needing to implement special "root names" |
1975 | goto Elong; | 1963 | * should do so with ->d_dname() |
1964 | */ | ||
1965 | if (IS_ROOT(dentry) && | ||
1966 | (dentry->d_name.len != 1 || dentry->d_name.name[0] != '/')) { | ||
1967 | WARN(1, "Root dentry has weird name <%.*s>\n", | ||
1968 | (int) dentry->d_name.len, dentry->d_name.name); | ||
1969 | } | ||
1976 | root->mnt = vfsmnt; | 1970 | root->mnt = vfsmnt; |
1977 | root->dentry = dentry; | 1971 | root->dentry = dentry; |
1978 | goto out; | 1972 | goto out; |
1973 | } | ||
1979 | 1974 | ||
1980 | Elong: | 1975 | /** |
1981 | retval = ERR_PTR(-ENAMETOOLONG); | 1976 | * __d_path - return the path of a dentry |
1982 | goto out; | 1977 | * @path: the dentry/vfsmount to report |
1978 | * @root: root vfsmnt/dentry (may be modified by this function) | ||
1979 | * @buffer: buffer to return value in | ||
1980 | * @buflen: buffer length | ||
1981 | * | ||
1982 | * Convert a dentry into an ASCII path name. | ||
1983 | * | ||
1984 | * Returns a pointer into the buffer or an error code if the | ||
1985 | * path was too long. | ||
1986 | * | ||
1987 | * "buflen" should be positive. Caller holds the dcache_lock. | ||
1988 | * | ||
1989 | * If path is not reachable from the supplied root, then the value of | ||
1990 | * root is changed (without modifying refcounts). | ||
1991 | */ | ||
1992 | char *__d_path(const struct path *path, struct path *root, | ||
1993 | char *buf, int buflen) | ||
1994 | { | ||
1995 | char *res = buf + buflen; | ||
1996 | int error; | ||
1997 | |||
1998 | prepend(&res, &buflen, "\0", 1); | ||
1999 | error = prepend_path(path, root, &res, &buflen); | ||
2000 | if (error) | ||
2001 | return ERR_PTR(error); | ||
2002 | |||
2003 | return res; | ||
2004 | } | ||
2005 | |||
2006 | /* | ||
2007 | * same as __d_path but appends "(deleted)" for unlinked files. | ||
2008 | */ | ||
2009 | static int path_with_deleted(const struct path *path, struct path *root, | ||
2010 | char **buf, int *buflen) | ||
2011 | { | ||
2012 | prepend(buf, buflen, "\0", 1); | ||
2013 | if (d_unlinked(path->dentry)) { | ||
2014 | int error = prepend(buf, buflen, " (deleted)", 10); | ||
2015 | if (error) | ||
2016 | return error; | ||
2017 | } | ||
2018 | |||
2019 | return prepend_path(path, root, buf, buflen); | ||
2020 | } | ||
2021 | |||
2022 | static int prepend_unreachable(char **buffer, int *buflen) | ||
2023 | { | ||
2024 | return prepend(buffer, buflen, "(unreachable)", 13); | ||
1983 | } | 2025 | } |
1984 | 2026 | ||
1985 | /** | 2027 | /** |
@@ -2000,9 +2042,10 @@ Elong: | |||
2000 | */ | 2042 | */ |
2001 | char *d_path(const struct path *path, char *buf, int buflen) | 2043 | char *d_path(const struct path *path, char *buf, int buflen) |
2002 | { | 2044 | { |
2003 | char *res; | 2045 | char *res = buf + buflen; |
2004 | struct path root; | 2046 | struct path root; |
2005 | struct path tmp; | 2047 | struct path tmp; |
2048 | int error; | ||
2006 | 2049 | ||
2007 | /* | 2050 | /* |
2008 | * We have various synthetic filesystems that never get mounted. On | 2051 | * We have various synthetic filesystems that never get mounted. On |
@@ -2014,19 +2057,51 @@ char *d_path(const struct path *path, char *buf, int buflen) | |||
2014 | if (path->dentry->d_op && path->dentry->d_op->d_dname) | 2057 | if (path->dentry->d_op && path->dentry->d_op->d_dname) |
2015 | return path->dentry->d_op->d_dname(path->dentry, buf, buflen); | 2058 | return path->dentry->d_op->d_dname(path->dentry, buf, buflen); |
2016 | 2059 | ||
2017 | read_lock(¤t->fs->lock); | 2060 | get_fs_root(current->fs, &root); |
2018 | root = current->fs->root; | ||
2019 | path_get(&root); | ||
2020 | read_unlock(¤t->fs->lock); | ||
2021 | spin_lock(&dcache_lock); | 2061 | spin_lock(&dcache_lock); |
2022 | tmp = root; | 2062 | tmp = root; |
2023 | res = __d_path(path, &tmp, buf, buflen); | 2063 | error = path_with_deleted(path, &tmp, &res, &buflen); |
2064 | if (error) | ||
2065 | res = ERR_PTR(error); | ||
2024 | spin_unlock(&dcache_lock); | 2066 | spin_unlock(&dcache_lock); |
2025 | path_put(&root); | 2067 | path_put(&root); |
2026 | return res; | 2068 | return res; |
2027 | } | 2069 | } |
2028 | EXPORT_SYMBOL(d_path); | 2070 | EXPORT_SYMBOL(d_path); |
2029 | 2071 | ||
2072 | /** | ||
2073 | * d_path_with_unreachable - return the path of a dentry | ||
2074 | * @path: path to report | ||
2075 | * @buf: buffer to return value in | ||
2076 | * @buflen: buffer length | ||
2077 | * | ||
2078 | * The difference from d_path() is that this prepends "(unreachable)" | ||
2079 | * to paths which are unreachable from the current process' root. | ||
2080 | */ | ||
2081 | char *d_path_with_unreachable(const struct path *path, char *buf, int buflen) | ||
2082 | { | ||
2083 | char *res = buf + buflen; | ||
2084 | struct path root; | ||
2085 | struct path tmp; | ||
2086 | int error; | ||
2087 | |||
2088 | if (path->dentry->d_op && path->dentry->d_op->d_dname) | ||
2089 | return path->dentry->d_op->d_dname(path->dentry, buf, buflen); | ||
2090 | |||
2091 | get_fs_root(current->fs, &root); | ||
2092 | spin_lock(&dcache_lock); | ||
2093 | tmp = root; | ||
2094 | error = path_with_deleted(path, &tmp, &res, &buflen); | ||
2095 | if (!error && !path_equal(&tmp, &root)) | ||
2096 | error = prepend_unreachable(&res, &buflen); | ||
2097 | spin_unlock(&dcache_lock); | ||
2098 | path_put(&root); | ||
2099 | if (error) | ||
2100 | res = ERR_PTR(error); | ||
2101 | |||
2102 | return res; | ||
2103 | } | ||
2104 | |||
2030 | /* | 2105 | /* |
2031 | * Helper function for dentry_operations.d_dname() members | 2106 | * Helper function for dentry_operations.d_dname() members |
2032 | */ | 2107 | */ |
@@ -2129,27 +2204,30 @@ SYSCALL_DEFINE2(getcwd, char __user *, buf, unsigned long, size) | |||
2129 | if (!page) | 2204 | if (!page) |
2130 | return -ENOMEM; | 2205 | return -ENOMEM; |
2131 | 2206 | ||
2132 | read_lock(¤t->fs->lock); | 2207 | get_fs_root_and_pwd(current->fs, &root, &pwd); |
2133 | pwd = current->fs->pwd; | ||
2134 | path_get(&pwd); | ||
2135 | root = current->fs->root; | ||
2136 | path_get(&root); | ||
2137 | read_unlock(¤t->fs->lock); | ||
2138 | 2208 | ||
2139 | error = -ENOENT; | 2209 | error = -ENOENT; |
2140 | spin_lock(&dcache_lock); | 2210 | spin_lock(&dcache_lock); |
2141 | if (!d_unlinked(pwd.dentry)) { | 2211 | if (!d_unlinked(pwd.dentry)) { |
2142 | unsigned long len; | 2212 | unsigned long len; |
2143 | struct path tmp = root; | 2213 | struct path tmp = root; |
2144 | char * cwd; | 2214 | char *cwd = page + PAGE_SIZE; |
2215 | int buflen = PAGE_SIZE; | ||
2145 | 2216 | ||
2146 | cwd = __d_path(&pwd, &tmp, page, PAGE_SIZE); | 2217 | prepend(&cwd, &buflen, "\0", 1); |
2218 | error = prepend_path(&pwd, &tmp, &cwd, &buflen); | ||
2147 | spin_unlock(&dcache_lock); | 2219 | spin_unlock(&dcache_lock); |
2148 | 2220 | ||
2149 | error = PTR_ERR(cwd); | 2221 | if (error) |
2150 | if (IS_ERR(cwd)) | ||
2151 | goto out; | 2222 | goto out; |
2152 | 2223 | ||
2224 | /* Unreachable from current root */ | ||
2225 | if (!path_equal(&tmp, &root)) { | ||
2226 | error = prepend_unreachable(&cwd, &buflen); | ||
2227 | if (error) | ||
2228 | goto out; | ||
2229 | } | ||
2230 | |||
2153 | error = -ERANGE; | 2231 | error = -ERANGE; |
2154 | len = PAGE_SIZE + page - cwd; | 2232 | len = PAGE_SIZE + page - cwd; |
2155 | if (len <= size) { | 2233 | if (len <= size) { |
diff --git a/fs/exofs/file.c b/fs/exofs/file.c index f9bfe2b501d5..68cb23e3bb98 100644 --- a/fs/exofs/file.c +++ b/fs/exofs/file.c | |||
@@ -30,9 +30,6 @@ | |||
30 | * along with exofs; if not, write to the Free Software | 30 | * along with exofs; if not, write to the Free Software |
31 | * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA | 31 | * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA |
32 | */ | 32 | */ |
33 | |||
34 | #include <linux/buffer_head.h> | ||
35 | |||
36 | #include "exofs.h" | 33 | #include "exofs.h" |
37 | 34 | ||
38 | static int exofs_release_file(struct inode *inode, struct file *filp) | 35 | static int exofs_release_file(struct inode *inode, struct file *filp) |
@@ -40,19 +37,27 @@ static int exofs_release_file(struct inode *inode, struct file *filp) | |||
40 | return 0; | 37 | return 0; |
41 | } | 38 | } |
42 | 39 | ||
40 | /* exofs_file_fsync - flush the inode to disk | ||
41 | * | ||
42 | * Note, in exofs all metadata is written as part of inode, regardless. | ||
43 | * The writeout is synchronous | ||
44 | */ | ||
43 | static int exofs_file_fsync(struct file *filp, int datasync) | 45 | static int exofs_file_fsync(struct file *filp, int datasync) |
44 | { | 46 | { |
45 | int ret; | 47 | int ret; |
46 | struct address_space *mapping = filp->f_mapping; | 48 | struct inode *inode = filp->f_mapping->host; |
47 | struct inode *inode = mapping->host; | 49 | struct writeback_control wbc = { |
50 | .sync_mode = WB_SYNC_ALL, | ||
51 | .nr_to_write = 0, /* metadata-only; caller takes care of data */ | ||
52 | }; | ||
48 | struct super_block *sb; | 53 | struct super_block *sb; |
49 | 54 | ||
50 | ret = filemap_write_and_wait(mapping); | 55 | if (!(inode->i_state & I_DIRTY)) |
51 | if (ret) | 56 | return 0; |
52 | return ret; | 57 | if (datasync && !(inode->i_state & I_DIRTY_DATASYNC)) |
58 | return 0; | ||
53 | 59 | ||
54 | /* sync the inode attributes */ | 60 | ret = sync_inode(inode, &wbc); |
55 | ret = write_inode_now(inode, 1); | ||
56 | 61 | ||
57 | /* This is a good place to write the sb */ | 62 | /* This is a good place to write the sb */ |
58 | /* TODO: Sechedule an sb-sync on create */ | 63 | /* TODO: Sechedule an sb-sync on create */ |
@@ -65,9 +70,9 @@ static int exofs_file_fsync(struct file *filp, int datasync) | |||
65 | 70 | ||
66 | static int exofs_flush(struct file *file, fl_owner_t id) | 71 | static int exofs_flush(struct file *file, fl_owner_t id) |
67 | { | 72 | { |
68 | exofs_file_fsync(file, 1); | 73 | int ret = vfs_fsync(file, 0); |
69 | /* TODO: Flush the OSD target */ | 74 | /* TODO: Flush the OSD target */ |
70 | return 0; | 75 | return ret; |
71 | } | 76 | } |
72 | 77 | ||
73 | const struct file_operations exofs_file_operations = { | 78 | const struct file_operations exofs_file_operations = { |
diff --git a/fs/exofs/inode.c b/fs/exofs/inode.c index 088cb476b68a..eb7368ebd8cd 100644 --- a/fs/exofs/inode.c +++ b/fs/exofs/inode.c | |||
@@ -32,9 +32,6 @@ | |||
32 | */ | 32 | */ |
33 | 33 | ||
34 | #include <linux/slab.h> | 34 | #include <linux/slab.h> |
35 | #include <linux/writeback.h> | ||
36 | #include <linux/buffer_head.h> | ||
37 | #include <scsi/scsi_device.h> | ||
38 | 35 | ||
39 | #include "exofs.h" | 36 | #include "exofs.h" |
40 | 37 | ||
@@ -773,15 +770,13 @@ static int exofs_releasepage(struct page *page, gfp_t gfp) | |||
773 | { | 770 | { |
774 | EXOFS_DBGMSG("page 0x%lx\n", page->index); | 771 | EXOFS_DBGMSG("page 0x%lx\n", page->index); |
775 | WARN_ON(1); | 772 | WARN_ON(1); |
776 | return try_to_free_buffers(page); | 773 | return 0; |
777 | } | 774 | } |
778 | 775 | ||
779 | static void exofs_invalidatepage(struct page *page, unsigned long offset) | 776 | static void exofs_invalidatepage(struct page *page, unsigned long offset) |
780 | { | 777 | { |
781 | EXOFS_DBGMSG("page_has_buffers=>%d\n", page_has_buffers(page)); | 778 | EXOFS_DBGMSG("page 0x%lx offset 0x%lx\n", page->index, offset); |
782 | WARN_ON(1); | 779 | WARN_ON(1); |
783 | |||
784 | block_invalidatepage(page, offset); | ||
785 | } | 780 | } |
786 | 781 | ||
787 | const struct address_space_operations exofs_aops = { | 782 | const struct address_space_operations exofs_aops = { |
diff --git a/fs/exofs/ios.c b/fs/exofs/ios.c index e2732203fa93..6550bf70e41d 100644 --- a/fs/exofs/ios.c +++ b/fs/exofs/ios.c | |||
@@ -305,8 +305,6 @@ int exofs_check_io(struct exofs_io_state *ios, u64 *resid) | |||
305 | struct _striping_info { | 305 | struct _striping_info { |
306 | u64 obj_offset; | 306 | u64 obj_offset; |
307 | u64 group_length; | 307 | u64 group_length; |
308 | u64 total_group_length; | ||
309 | u64 Major; | ||
310 | unsigned dev; | 308 | unsigned dev; |
311 | unsigned unit_off; | 309 | unsigned unit_off; |
312 | }; | 310 | }; |
@@ -343,8 +341,6 @@ static void _calc_stripe_info(struct exofs_io_state *ios, u64 file_offset, | |||
343 | (M * group_depth * stripe_unit); | 341 | (M * group_depth * stripe_unit); |
344 | 342 | ||
345 | si->group_length = T - H; | 343 | si->group_length = T - H; |
346 | si->total_group_length = T; | ||
347 | si->Major = M; | ||
348 | } | 344 | } |
349 | 345 | ||
350 | static int _add_stripe_unit(struct exofs_io_state *ios, unsigned *cur_pg, | 346 | static int _add_stripe_unit(struct exofs_io_state *ios, unsigned *cur_pg, |
@@ -392,20 +388,19 @@ static int _add_stripe_unit(struct exofs_io_state *ios, unsigned *cur_pg, | |||
392 | } | 388 | } |
393 | 389 | ||
394 | static int _prepare_one_group(struct exofs_io_state *ios, u64 length, | 390 | static int _prepare_one_group(struct exofs_io_state *ios, u64 length, |
395 | struct _striping_info *si, unsigned first_comp) | 391 | struct _striping_info *si) |
396 | { | 392 | { |
397 | unsigned stripe_unit = ios->layout->stripe_unit; | 393 | unsigned stripe_unit = ios->layout->stripe_unit; |
398 | unsigned mirrors_p1 = ios->layout->mirrors_p1; | 394 | unsigned mirrors_p1 = ios->layout->mirrors_p1; |
399 | unsigned devs_in_group = ios->layout->group_width * mirrors_p1; | 395 | unsigned devs_in_group = ios->layout->group_width * mirrors_p1; |
400 | unsigned dev = si->dev; | 396 | unsigned dev = si->dev; |
401 | unsigned first_dev = dev - (dev % devs_in_group); | 397 | unsigned first_dev = dev - (dev % devs_in_group); |
402 | unsigned comp = first_comp + (dev - first_dev); | ||
403 | unsigned max_comp = ios->numdevs ? ios->numdevs - mirrors_p1 : 0; | 398 | unsigned max_comp = ios->numdevs ? ios->numdevs - mirrors_p1 : 0; |
404 | unsigned cur_pg = ios->pages_consumed; | 399 | unsigned cur_pg = ios->pages_consumed; |
405 | int ret = 0; | 400 | int ret = 0; |
406 | 401 | ||
407 | while (length) { | 402 | while (length) { |
408 | struct exofs_per_dev_state *per_dev = &ios->per_dev[comp]; | 403 | struct exofs_per_dev_state *per_dev = &ios->per_dev[dev]; |
409 | unsigned cur_len, page_off = 0; | 404 | unsigned cur_len, page_off = 0; |
410 | 405 | ||
411 | if (!per_dev->length) { | 406 | if (!per_dev->length) { |
@@ -424,11 +419,8 @@ static int _prepare_one_group(struct exofs_io_state *ios, u64 length, | |||
424 | cur_len = stripe_unit; | 419 | cur_len = stripe_unit; |
425 | } | 420 | } |
426 | 421 | ||
427 | if (max_comp < comp) | 422 | if (max_comp < dev) |
428 | max_comp = comp; | 423 | max_comp = dev; |
429 | |||
430 | dev += mirrors_p1; | ||
431 | dev = (dev % devs_in_group) + first_dev; | ||
432 | } else { | 424 | } else { |
433 | cur_len = stripe_unit; | 425 | cur_len = stripe_unit; |
434 | } | 426 | } |
@@ -440,8 +432,8 @@ static int _prepare_one_group(struct exofs_io_state *ios, u64 length, | |||
440 | if (unlikely(ret)) | 432 | if (unlikely(ret)) |
441 | goto out; | 433 | goto out; |
442 | 434 | ||
443 | comp += mirrors_p1; | 435 | dev += mirrors_p1; |
444 | comp = (comp % devs_in_group) + first_comp; | 436 | dev = (dev % devs_in_group) + first_dev; |
445 | 437 | ||
446 | length -= cur_len; | 438 | length -= cur_len; |
447 | } | 439 | } |
@@ -454,18 +446,15 @@ out: | |||
454 | static int _prepare_for_striping(struct exofs_io_state *ios) | 446 | static int _prepare_for_striping(struct exofs_io_state *ios) |
455 | { | 447 | { |
456 | u64 length = ios->length; | 448 | u64 length = ios->length; |
449 | u64 offset = ios->offset; | ||
457 | struct _striping_info si; | 450 | struct _striping_info si; |
458 | unsigned devs_in_group = ios->layout->group_width * | ||
459 | ios->layout->mirrors_p1; | ||
460 | unsigned first_comp = 0; | ||
461 | int ret = 0; | 451 | int ret = 0; |
462 | 452 | ||
463 | _calc_stripe_info(ios, ios->offset, &si); | ||
464 | |||
465 | if (!ios->pages) { | 453 | if (!ios->pages) { |
466 | if (ios->kern_buff) { | 454 | if (ios->kern_buff) { |
467 | struct exofs_per_dev_state *per_dev = &ios->per_dev[0]; | 455 | struct exofs_per_dev_state *per_dev = &ios->per_dev[0]; |
468 | 456 | ||
457 | _calc_stripe_info(ios, ios->offset, &si); | ||
469 | per_dev->offset = si.obj_offset; | 458 | per_dev->offset = si.obj_offset; |
470 | per_dev->dev = si.dev; | 459 | per_dev->dev = si.dev; |
471 | 460 | ||
@@ -479,26 +468,17 @@ static int _prepare_for_striping(struct exofs_io_state *ios) | |||
479 | } | 468 | } |
480 | 469 | ||
481 | while (length) { | 470 | while (length) { |
471 | _calc_stripe_info(ios, offset, &si); | ||
472 | |||
482 | if (length < si.group_length) | 473 | if (length < si.group_length) |
483 | si.group_length = length; | 474 | si.group_length = length; |
484 | 475 | ||
485 | ret = _prepare_one_group(ios, si.group_length, &si, first_comp); | 476 | ret = _prepare_one_group(ios, si.group_length, &si); |
486 | if (unlikely(ret)) | 477 | if (unlikely(ret)) |
487 | goto out; | 478 | goto out; |
488 | 479 | ||
480 | offset += si.group_length; | ||
489 | length -= si.group_length; | 481 | length -= si.group_length; |
490 | |||
491 | si.group_length = si.total_group_length; | ||
492 | si.unit_off = 0; | ||
493 | ++si.Major; | ||
494 | si.obj_offset = si.Major * ios->layout->stripe_unit * | ||
495 | ios->layout->group_depth; | ||
496 | |||
497 | si.dev = (si.dev - (si.dev % devs_in_group)) + devs_in_group; | ||
498 | si.dev %= ios->layout->s_numdevs; | ||
499 | |||
500 | first_comp += devs_in_group; | ||
501 | first_comp %= ios->layout->s_numdevs; | ||
502 | } | 482 | } |
503 | 483 | ||
504 | out: | 484 | out: |
diff --git a/fs/exofs/super.c b/fs/exofs/super.c index 32cfd61def5f..047e92fa3af8 100644 --- a/fs/exofs/super.c +++ b/fs/exofs/super.c | |||
@@ -31,7 +31,6 @@ | |||
31 | * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA | 31 | * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA |
32 | */ | 32 | */ |
33 | 33 | ||
34 | #include <linux/smp_lock.h> | ||
35 | #include <linux/string.h> | 34 | #include <linux/string.h> |
36 | #include <linux/parser.h> | 35 | #include <linux/parser.h> |
37 | #include <linux/vfs.h> | 36 | #include <linux/vfs.h> |
diff --git a/fs/fcntl.c b/fs/fcntl.c index 9d175d623aab..6769fd0f35b8 100644 --- a/fs/fcntl.c +++ b/fs/fcntl.c | |||
@@ -767,11 +767,22 @@ void kill_fasync(struct fasync_struct **fp, int sig, int band) | |||
767 | } | 767 | } |
768 | EXPORT_SYMBOL(kill_fasync); | 768 | EXPORT_SYMBOL(kill_fasync); |
769 | 769 | ||
770 | static int __init fasync_init(void) | 770 | static int __init fcntl_init(void) |
771 | { | 771 | { |
772 | /* please add new bits here to ensure allocation uniqueness */ | ||
773 | BUILD_BUG_ON(19 - 1 /* for O_RDONLY being 0 */ != HWEIGHT32( | ||
774 | O_RDONLY | O_WRONLY | O_RDWR | | ||
775 | O_CREAT | O_EXCL | O_NOCTTY | | ||
776 | O_TRUNC | O_APPEND | O_NONBLOCK | | ||
777 | __O_SYNC | O_DSYNC | FASYNC | | ||
778 | O_DIRECT | O_LARGEFILE | O_DIRECTORY | | ||
779 | O_NOFOLLOW | O_NOATIME | O_CLOEXEC | | ||
780 | FMODE_EXEC | ||
781 | )); | ||
782 | |||
772 | fasync_cache = kmem_cache_create("fasync_cache", | 783 | fasync_cache = kmem_cache_create("fasync_cache", |
773 | sizeof(struct fasync_struct), 0, SLAB_PANIC, NULL); | 784 | sizeof(struct fasync_struct), 0, SLAB_PANIC, NULL); |
774 | return 0; | 785 | return 0; |
775 | } | 786 | } |
776 | 787 | ||
777 | module_init(fasync_init) | 788 | module_init(fcntl_init) |
@@ -39,28 +39,27 @@ int sysctl_nr_open_max = 1024 * 1024; /* raised later */ | |||
39 | */ | 39 | */ |
40 | static DEFINE_PER_CPU(struct fdtable_defer, fdtable_defer_list); | 40 | static DEFINE_PER_CPU(struct fdtable_defer, fdtable_defer_list); |
41 | 41 | ||
42 | static inline void * alloc_fdmem(unsigned int size) | 42 | static inline void *alloc_fdmem(unsigned int size) |
43 | { | 43 | { |
44 | if (size <= PAGE_SIZE) | 44 | void *data; |
45 | return kmalloc(size, GFP_KERNEL); | 45 | |
46 | else | 46 | data = kmalloc(size, GFP_KERNEL|__GFP_NOWARN); |
47 | return vmalloc(size); | 47 | if (data != NULL) |
48 | return data; | ||
49 | |||
50 | return vmalloc(size); | ||
48 | } | 51 | } |
49 | 52 | ||
50 | static inline void free_fdarr(struct fdtable *fdt) | 53 | static void free_fdmem(void *ptr) |
51 | { | 54 | { |
52 | if (fdt->max_fds <= (PAGE_SIZE / sizeof(struct file *))) | 55 | is_vmalloc_addr(ptr) ? vfree(ptr) : kfree(ptr); |
53 | kfree(fdt->fd); | ||
54 | else | ||
55 | vfree(fdt->fd); | ||
56 | } | 56 | } |
57 | 57 | ||
58 | static inline void free_fdset(struct fdtable *fdt) | 58 | static void __free_fdtable(struct fdtable *fdt) |
59 | { | 59 | { |
60 | if (fdt->max_fds <= (PAGE_SIZE * BITS_PER_BYTE / 2)) | 60 | free_fdmem(fdt->fd); |
61 | kfree(fdt->open_fds); | 61 | free_fdmem(fdt->open_fds); |
62 | else | 62 | kfree(fdt); |
63 | vfree(fdt->open_fds); | ||
64 | } | 63 | } |
65 | 64 | ||
66 | static void free_fdtable_work(struct work_struct *work) | 65 | static void free_fdtable_work(struct work_struct *work) |
@@ -75,9 +74,8 @@ static void free_fdtable_work(struct work_struct *work) | |||
75 | spin_unlock_bh(&f->lock); | 74 | spin_unlock_bh(&f->lock); |
76 | while(fdt) { | 75 | while(fdt) { |
77 | struct fdtable *next = fdt->next; | 76 | struct fdtable *next = fdt->next; |
78 | vfree(fdt->fd); | 77 | |
79 | free_fdset(fdt); | 78 | __free_fdtable(fdt); |
80 | kfree(fdt); | ||
81 | fdt = next; | 79 | fdt = next; |
82 | } | 80 | } |
83 | } | 81 | } |
@@ -98,7 +96,7 @@ void free_fdtable_rcu(struct rcu_head *rcu) | |||
98 | container_of(fdt, struct files_struct, fdtab)); | 96 | container_of(fdt, struct files_struct, fdtab)); |
99 | return; | 97 | return; |
100 | } | 98 | } |
101 | if (fdt->max_fds <= (PAGE_SIZE / sizeof(struct file *))) { | 99 | if (!is_vmalloc_addr(fdt->fd) && !is_vmalloc_addr(fdt->open_fds)) { |
102 | kfree(fdt->fd); | 100 | kfree(fdt->fd); |
103 | kfree(fdt->open_fds); | 101 | kfree(fdt->open_fds); |
104 | kfree(fdt); | 102 | kfree(fdt); |
@@ -183,7 +181,7 @@ static struct fdtable * alloc_fdtable(unsigned int nr) | |||
183 | return fdt; | 181 | return fdt; |
184 | 182 | ||
185 | out_arr: | 183 | out_arr: |
186 | free_fdarr(fdt); | 184 | free_fdmem(fdt->fd); |
187 | out_fdt: | 185 | out_fdt: |
188 | kfree(fdt); | 186 | kfree(fdt); |
189 | out: | 187 | out: |
@@ -213,9 +211,7 @@ static int expand_fdtable(struct files_struct *files, int nr) | |||
213 | * caller and alloc_fdtable(). Cheaper to catch it here... | 211 | * caller and alloc_fdtable(). Cheaper to catch it here... |
214 | */ | 212 | */ |
215 | if (unlikely(new_fdt->max_fds <= nr)) { | 213 | if (unlikely(new_fdt->max_fds <= nr)) { |
216 | free_fdarr(new_fdt); | 214 | __free_fdtable(new_fdt); |
217 | free_fdset(new_fdt); | ||
218 | kfree(new_fdt); | ||
219 | return -EMFILE; | 215 | return -EMFILE; |
220 | } | 216 | } |
221 | /* | 217 | /* |
@@ -231,9 +227,7 @@ static int expand_fdtable(struct files_struct *files, int nr) | |||
231 | free_fdtable(cur_fdt); | 227 | free_fdtable(cur_fdt); |
232 | } else { | 228 | } else { |
233 | /* Somebody else expanded, so undo our attempt */ | 229 | /* Somebody else expanded, so undo our attempt */ |
234 | free_fdarr(new_fdt); | 230 | __free_fdtable(new_fdt); |
235 | free_fdset(new_fdt); | ||
236 | kfree(new_fdt); | ||
237 | } | 231 | } |
238 | return 1; | 232 | return 1; |
239 | } | 233 | } |
@@ -323,11 +317,8 @@ struct files_struct *dup_fd(struct files_struct *oldf, int *errorp) | |||
323 | while (unlikely(open_files > new_fdt->max_fds)) { | 317 | while (unlikely(open_files > new_fdt->max_fds)) { |
324 | spin_unlock(&oldf->file_lock); | 318 | spin_unlock(&oldf->file_lock); |
325 | 319 | ||
326 | if (new_fdt != &newf->fdtab) { | 320 | if (new_fdt != &newf->fdtab) |
327 | free_fdarr(new_fdt); | 321 | __free_fdtable(new_fdt); |
328 | free_fdset(new_fdt); | ||
329 | kfree(new_fdt); | ||
330 | } | ||
331 | 322 | ||
332 | new_fdt = alloc_fdtable(open_files - 1); | 323 | new_fdt = alloc_fdtable(open_files - 1); |
333 | if (!new_fdt) { | 324 | if (!new_fdt) { |
@@ -337,9 +328,7 @@ struct files_struct *dup_fd(struct files_struct *oldf, int *errorp) | |||
337 | 328 | ||
338 | /* beyond sysctl_nr_open; nothing to do */ | 329 | /* beyond sysctl_nr_open; nothing to do */ |
339 | if (unlikely(new_fdt->max_fds < open_files)) { | 330 | if (unlikely(new_fdt->max_fds < open_files)) { |
340 | free_fdarr(new_fdt); | 331 | __free_fdtable(new_fdt); |
341 | free_fdset(new_fdt); | ||
342 | kfree(new_fdt); | ||
343 | *errorp = -EMFILE; | 332 | *errorp = -EMFILE; |
344 | goto out_release; | 333 | goto out_release; |
345 | } | 334 | } |
diff --git a/fs/file_table.c b/fs/file_table.c index b8a0bb63cbd7..edecd36fed9b 100644 --- a/fs/file_table.c +++ b/fs/file_table.c | |||
@@ -230,15 +230,6 @@ static void __fput(struct file *file) | |||
230 | might_sleep(); | 230 | might_sleep(); |
231 | 231 | ||
232 | fsnotify_close(file); | 232 | fsnotify_close(file); |
233 | |||
234 | /* | ||
235 | * fsnotify_create_event may have taken one or more references on this | ||
236 | * file. If it did so it left one reference for us to drop to make sure | ||
237 | * its calls to fput could not prematurely destroy the file. | ||
238 | */ | ||
239 | if (atomic_long_read(&file->f_count)) | ||
240 | return fput(file); | ||
241 | |||
242 | /* | 233 | /* |
243 | * The function eventpoll_release() should be the first called | 234 | * The function eventpoll_release() should be the first called |
244 | * in the file cleanup chain. | 235 | * in the file cleanup chain. |
@@ -298,11 +289,20 @@ struct file *fget(unsigned int fd) | |||
298 | EXPORT_SYMBOL(fget); | 289 | EXPORT_SYMBOL(fget); |
299 | 290 | ||
300 | /* | 291 | /* |
301 | * Lightweight file lookup - no refcnt increment if fd table isn't shared. | 292 | * Lightweight file lookup - no refcnt increment if fd table isn't shared. |
302 | * You can use this only if it is guranteed that the current task already | 293 | * |
303 | * holds a refcnt to that file. That check has to be done at fget() only | 294 | * You can use this instead of fget if you satisfy all of the following |
304 | * and a flag is returned to be passed to the corresponding fput_light(). | 295 | * conditions: |
305 | * There must not be a cloning between an fget_light/fput_light pair. | 296 | * 1) You must call fput_light before exiting the syscall and returning control |
297 | * to userspace (i.e. you cannot remember the returned struct file * after | ||
298 | * returning to userspace). | ||
299 | * 2) You must not call filp_close on the returned struct file * in between | ||
300 | * calls to fget_light and fput_light. | ||
301 | * 3) You must not clone the current task in between the calls to fget_light | ||
302 | * and fput_light. | ||
303 | * | ||
304 | * The fput_needed flag returned by fget_light should be passed to the | ||
305 | * corresponding fput_light. | ||
306 | */ | 306 | */ |
307 | struct file *fget_light(unsigned int fd, int *fput_needed) | 307 | struct file *fget_light(unsigned int fd, int *fput_needed) |
308 | { | 308 | { |
diff --git a/fs/fs-writeback.c b/fs/fs-writeback.c index 2f76c4a081a2..7d9d06ba184b 100644 --- a/fs/fs-writeback.c +++ b/fs/fs-writeback.c | |||
@@ -68,7 +68,7 @@ int nr_pdflush_threads; | |||
68 | */ | 68 | */ |
69 | int writeback_in_progress(struct backing_dev_info *bdi) | 69 | int writeback_in_progress(struct backing_dev_info *bdi) |
70 | { | 70 | { |
71 | return !list_empty(&bdi->work_list); | 71 | return test_bit(BDI_writeback_running, &bdi->state); |
72 | } | 72 | } |
73 | 73 | ||
74 | static void bdi_queue_work(struct backing_dev_info *bdi, | 74 | static void bdi_queue_work(struct backing_dev_info *bdi, |
@@ -249,10 +249,18 @@ static void move_expired_inodes(struct list_head *delaying_queue, | |||
249 | 249 | ||
250 | /* | 250 | /* |
251 | * Queue all expired dirty inodes for io, eldest first. | 251 | * Queue all expired dirty inodes for io, eldest first. |
252 | * Before | ||
253 | * newly dirtied b_dirty b_io b_more_io | ||
254 | * =============> gf edc BA | ||
255 | * After | ||
256 | * newly dirtied b_dirty b_io b_more_io | ||
257 | * =============> g fBAedc | ||
258 | * | | ||
259 | * +--> dequeue for IO | ||
252 | */ | 260 | */ |
253 | static void queue_io(struct bdi_writeback *wb, unsigned long *older_than_this) | 261 | static void queue_io(struct bdi_writeback *wb, unsigned long *older_than_this) |
254 | { | 262 | { |
255 | list_splice_init(&wb->b_more_io, wb->b_io.prev); | 263 | list_splice_init(&wb->b_more_io, &wb->b_io); |
256 | move_expired_inodes(&wb->b_dirty, &wb->b_io, older_than_this); | 264 | move_expired_inodes(&wb->b_dirty, &wb->b_io, older_than_this); |
257 | } | 265 | } |
258 | 266 | ||
@@ -363,62 +371,35 @@ writeback_single_inode(struct inode *inode, struct writeback_control *wbc) | |||
363 | spin_lock(&inode_lock); | 371 | spin_lock(&inode_lock); |
364 | inode->i_state &= ~I_SYNC; | 372 | inode->i_state &= ~I_SYNC; |
365 | if (!(inode->i_state & I_FREEING)) { | 373 | if (!(inode->i_state & I_FREEING)) { |
366 | if ((inode->i_state & I_DIRTY_PAGES) && wbc->for_kupdate) { | 374 | if (mapping_tagged(mapping, PAGECACHE_TAG_DIRTY)) { |
367 | /* | ||
368 | * More pages get dirtied by a fast dirtier. | ||
369 | */ | ||
370 | goto select_queue; | ||
371 | } else if (inode->i_state & I_DIRTY) { | ||
372 | /* | ||
373 | * At least XFS will redirty the inode during the | ||
374 | * writeback (delalloc) and on io completion (isize). | ||
375 | */ | ||
376 | redirty_tail(inode); | ||
377 | } else if (mapping_tagged(mapping, PAGECACHE_TAG_DIRTY)) { | ||
378 | /* | 375 | /* |
379 | * We didn't write back all the pages. nfs_writepages() | 376 | * We didn't write back all the pages. nfs_writepages() |
380 | * sometimes bales out without doing anything. Redirty | 377 | * sometimes bales out without doing anything. |
381 | * the inode; Move it from b_io onto b_more_io/b_dirty. | ||
382 | */ | 378 | */ |
383 | /* | 379 | inode->i_state |= I_DIRTY_PAGES; |
384 | * akpm: if the caller was the kupdate function we put | 380 | if (wbc->nr_to_write <= 0) { |
385 | * this inode at the head of b_dirty so it gets first | ||
386 | * consideration. Otherwise, move it to the tail, for | ||
387 | * the reasons described there. I'm not really sure | ||
388 | * how much sense this makes. Presumably I had a good | ||
389 | * reasons for doing it this way, and I'd rather not | ||
390 | * muck with it at present. | ||
391 | */ | ||
392 | if (wbc->for_kupdate) { | ||
393 | /* | 381 | /* |
394 | * For the kupdate function we move the inode | 382 | * slice used up: queue for next turn |
395 | * to b_more_io so it will get more writeout as | ||
396 | * soon as the queue becomes uncongested. | ||
397 | */ | 383 | */ |
398 | inode->i_state |= I_DIRTY_PAGES; | 384 | requeue_io(inode); |
399 | select_queue: | ||
400 | if (wbc->nr_to_write <= 0) { | ||
401 | /* | ||
402 | * slice used up: queue for next turn | ||
403 | */ | ||
404 | requeue_io(inode); | ||
405 | } else { | ||
406 | /* | ||
407 | * somehow blocked: retry later | ||
408 | */ | ||
409 | redirty_tail(inode); | ||
410 | } | ||
411 | } else { | 385 | } else { |
412 | /* | 386 | /* |
413 | * Otherwise fully redirty the inode so that | 387 | * Writeback blocked by something other than |
414 | * other inodes on this superblock will get some | 388 | * congestion. Delay the inode for some time to |
415 | * writeout. Otherwise heavy writing to one | 389 | * avoid spinning on the CPU (100% iowait) |
416 | * file would indefinitely suspend writeout of | 390 | * retrying writeback of the dirty page/inode |
417 | * all the other files. | 391 | * that cannot be performed immediately. |
418 | */ | 392 | */ |
419 | inode->i_state |= I_DIRTY_PAGES; | ||
420 | redirty_tail(inode); | 393 | redirty_tail(inode); |
421 | } | 394 | } |
395 | } else if (inode->i_state & I_DIRTY) { | ||
396 | /* | ||
397 | * Filesystems can dirty the inode during writeback | ||
398 | * operations, such as delayed allocation during | ||
399 | * submission or metadata updates after data IO | ||
400 | * completion. | ||
401 | */ | ||
402 | redirty_tail(inode); | ||
422 | } else if (atomic_read(&inode->i_count)) { | 403 | } else if (atomic_read(&inode->i_count)) { |
423 | /* | 404 | /* |
424 | * The inode is clean, inuse | 405 | * The inode is clean, inuse |
@@ -590,7 +571,7 @@ static inline bool over_bground_thresh(void) | |||
590 | { | 571 | { |
591 | unsigned long background_thresh, dirty_thresh; | 572 | unsigned long background_thresh, dirty_thresh; |
592 | 573 | ||
593 | get_dirty_limits(&background_thresh, &dirty_thresh, NULL, NULL); | 574 | global_dirty_limits(&background_thresh, &dirty_thresh); |
594 | 575 | ||
595 | return (global_page_state(NR_FILE_DIRTY) + | 576 | return (global_page_state(NR_FILE_DIRTY) + |
596 | global_page_state(NR_UNSTABLE_NFS) >= background_thresh); | 577 | global_page_state(NR_UNSTABLE_NFS) >= background_thresh); |
@@ -759,6 +740,7 @@ long wb_do_writeback(struct bdi_writeback *wb, int force_wait) | |||
759 | struct wb_writeback_work *work; | 740 | struct wb_writeback_work *work; |
760 | long wrote = 0; | 741 | long wrote = 0; |
761 | 742 | ||
743 | set_bit(BDI_writeback_running, &wb->bdi->state); | ||
762 | while ((work = get_next_work_item(bdi)) != NULL) { | 744 | while ((work = get_next_work_item(bdi)) != NULL) { |
763 | /* | 745 | /* |
764 | * Override sync mode, in case we must wait for completion | 746 | * Override sync mode, in case we must wait for completion |
@@ -785,6 +767,7 @@ long wb_do_writeback(struct bdi_writeback *wb, int force_wait) | |||
785 | * Check for periodic writeback, kupdated() style | 767 | * Check for periodic writeback, kupdated() style |
786 | */ | 768 | */ |
787 | wrote += wb_check_old_data_flush(wb); | 769 | wrote += wb_check_old_data_flush(wb); |
770 | clear_bit(BDI_writeback_running, &wb->bdi->state); | ||
788 | 771 | ||
789 | return wrote; | 772 | return wrote; |
790 | } | 773 | } |
diff --git a/fs/fs_struct.c b/fs/fs_struct.c index eee059052db5..1ee40eb9a2c0 100644 --- a/fs/fs_struct.c +++ b/fs/fs_struct.c | |||
@@ -106,12 +106,7 @@ struct fs_struct *copy_fs_struct(struct fs_struct *old) | |||
106 | fs->in_exec = 0; | 106 | fs->in_exec = 0; |
107 | rwlock_init(&fs->lock); | 107 | rwlock_init(&fs->lock); |
108 | fs->umask = old->umask; | 108 | fs->umask = old->umask; |
109 | read_lock(&old->lock); | 109 | get_fs_root_and_pwd(old, &fs->root, &fs->pwd); |
110 | fs->root = old->root; | ||
111 | path_get(&old->root); | ||
112 | fs->pwd = old->pwd; | ||
113 | path_get(&old->pwd); | ||
114 | read_unlock(&old->lock); | ||
115 | } | 110 | } |
116 | return fs; | 111 | return fs; |
117 | } | 112 | } |
diff --git a/fs/fscache/internal.h b/fs/fscache/internal.h index 6a026441c5a6..f6aad48d38a8 100644 --- a/fs/fscache/internal.h +++ b/fs/fscache/internal.h | |||
@@ -321,17 +321,11 @@ void fscache_put_context(struct fscache_cookie *cookie, void *context) | |||
321 | #define dbgprintk(FMT, ...) \ | 321 | #define dbgprintk(FMT, ...) \ |
322 | printk(KERN_DEBUG "[%-6.6s] "FMT"\n", current->comm, ##__VA_ARGS__) | 322 | printk(KERN_DEBUG "[%-6.6s] "FMT"\n", current->comm, ##__VA_ARGS__) |
323 | 323 | ||
324 | /* make sure we maintain the format strings, even when debugging is disabled */ | ||
325 | static inline __attribute__((format(printf, 1, 2))) | ||
326 | void _dbprintk(const char *fmt, ...) | ||
327 | { | ||
328 | } | ||
329 | |||
330 | #define kenter(FMT, ...) dbgprintk("==> %s("FMT")", __func__, ##__VA_ARGS__) | 324 | #define kenter(FMT, ...) dbgprintk("==> %s("FMT")", __func__, ##__VA_ARGS__) |
331 | #define kleave(FMT, ...) dbgprintk("<== %s()"FMT"", __func__, ##__VA_ARGS__) | 325 | #define kleave(FMT, ...) dbgprintk("<== %s()"FMT"", __func__, ##__VA_ARGS__) |
332 | #define kdebug(FMT, ...) dbgprintk(FMT, ##__VA_ARGS__) | 326 | #define kdebug(FMT, ...) dbgprintk(FMT, ##__VA_ARGS__) |
333 | 327 | ||
334 | #define kjournal(FMT, ...) _dbprintk(FMT, ##__VA_ARGS__) | 328 | #define kjournal(FMT, ...) no_printk(FMT, ##__VA_ARGS__) |
335 | 329 | ||
336 | #ifdef __KDEBUG | 330 | #ifdef __KDEBUG |
337 | #define _enter(FMT, ...) kenter(FMT, ##__VA_ARGS__) | 331 | #define _enter(FMT, ...) kenter(FMT, ##__VA_ARGS__) |
@@ -358,9 +352,9 @@ do { \ | |||
358 | } while (0) | 352 | } while (0) |
359 | 353 | ||
360 | #else | 354 | #else |
361 | #define _enter(FMT, ...) _dbprintk("==> %s("FMT")", __func__, ##__VA_ARGS__) | 355 | #define _enter(FMT, ...) no_printk("==> %s("FMT")", __func__, ##__VA_ARGS__) |
362 | #define _leave(FMT, ...) _dbprintk("<== %s()"FMT"", __func__, ##__VA_ARGS__) | 356 | #define _leave(FMT, ...) no_printk("<== %s()"FMT"", __func__, ##__VA_ARGS__) |
363 | #define _debug(FMT, ...) _dbprintk(FMT, ##__VA_ARGS__) | 357 | #define _debug(FMT, ...) no_printk(FMT, ##__VA_ARGS__) |
364 | #endif | 358 | #endif |
365 | 359 | ||
366 | /* | 360 | /* |
diff --git a/fs/isofs/inode.c b/fs/isofs/inode.c index 6b4dcd4f2943..5a44811b5027 100644 --- a/fs/isofs/inode.c +++ b/fs/isofs/inode.c | |||
@@ -722,7 +722,12 @@ root_found: | |||
722 | } | 722 | } |
723 | 723 | ||
724 | s->s_magic = ISOFS_SUPER_MAGIC; | 724 | s->s_magic = ISOFS_SUPER_MAGIC; |
725 | s->s_maxbytes = 0xffffffff; /* We can handle files up to 4 GB */ | 725 | |
726 | /* | ||
727 | * With multi-extent files, file size is only limited by the maximum | ||
728 | * size of a file system, which is 8 TB. | ||
729 | */ | ||
730 | s->s_maxbytes = 0x80000000000LL; | ||
726 | 731 | ||
727 | /* | 732 | /* |
728 | * The CDROM is read-only, has no nodes (devices) on it, and since | 733 | * The CDROM is read-only, has no nodes (devices) on it, and since |
diff --git a/fs/namei.c b/fs/namei.c index 13ff4abdbdca..17ea76bf2fbe 100644 --- a/fs/namei.c +++ b/fs/namei.c | |||
@@ -483,13 +483,8 @@ ok: | |||
483 | 483 | ||
484 | static __always_inline void set_root(struct nameidata *nd) | 484 | static __always_inline void set_root(struct nameidata *nd) |
485 | { | 485 | { |
486 | if (!nd->root.mnt) { | 486 | if (!nd->root.mnt) |
487 | struct fs_struct *fs = current->fs; | 487 | get_fs_root(current->fs, &nd->root); |
488 | read_lock(&fs->lock); | ||
489 | nd->root = fs->root; | ||
490 | path_get(&nd->root); | ||
491 | read_unlock(&fs->lock); | ||
492 | } | ||
493 | } | 488 | } |
494 | 489 | ||
495 | static int link_path_walk(const char *, struct nameidata *); | 490 | static int link_path_walk(const char *, struct nameidata *); |
@@ -1015,11 +1010,7 @@ static int path_init(int dfd, const char *name, unsigned int flags, struct namei | |||
1015 | nd->path = nd->root; | 1010 | nd->path = nd->root; |
1016 | path_get(&nd->root); | 1011 | path_get(&nd->root); |
1017 | } else if (dfd == AT_FDCWD) { | 1012 | } else if (dfd == AT_FDCWD) { |
1018 | struct fs_struct *fs = current->fs; | 1013 | get_fs_pwd(current->fs, &nd->path); |
1019 | read_lock(&fs->lock); | ||
1020 | nd->path = fs->pwd; | ||
1021 | path_get(&fs->pwd); | ||
1022 | read_unlock(&fs->lock); | ||
1023 | } else { | 1014 | } else { |
1024 | struct dentry *dentry; | 1015 | struct dentry *dentry; |
1025 | 1016 | ||
diff --git a/fs/namespace.c b/fs/namespace.c index 66c4f7e781cb..2e10cb19c5b0 100644 --- a/fs/namespace.c +++ b/fs/namespace.c | |||
@@ -788,7 +788,6 @@ static void show_mnt_opts(struct seq_file *m, struct vfsmount *mnt) | |||
788 | { MNT_NOATIME, ",noatime" }, | 788 | { MNT_NOATIME, ",noatime" }, |
789 | { MNT_NODIRATIME, ",nodiratime" }, | 789 | { MNT_NODIRATIME, ",nodiratime" }, |
790 | { MNT_RELATIME, ",relatime" }, | 790 | { MNT_RELATIME, ",relatime" }, |
791 | { MNT_STRICTATIME, ",strictatime" }, | ||
792 | { 0, NULL } | 791 | { 0, NULL } |
793 | }; | 792 | }; |
794 | const struct proc_fs_info *fs_infop; | 793 | const struct proc_fs_info *fs_infop; |
@@ -2213,10 +2212,7 @@ SYSCALL_DEFINE2(pivot_root, const char __user *, new_root, | |||
2213 | goto out1; | 2212 | goto out1; |
2214 | } | 2213 | } |
2215 | 2214 | ||
2216 | read_lock(¤t->fs->lock); | 2215 | get_fs_root(current->fs, &root); |
2217 | root = current->fs->root; | ||
2218 | path_get(¤t->fs->root); | ||
2219 | read_unlock(¤t->fs->lock); | ||
2220 | down_write(&namespace_sem); | 2216 | down_write(&namespace_sem); |
2221 | mutex_lock(&old.dentry->d_inode->i_mutex); | 2217 | mutex_lock(&old.dentry->d_inode->i_mutex); |
2222 | error = -EINVAL; | 2218 | error = -EINVAL; |
diff --git a/fs/nfs/Kconfig b/fs/nfs/Kconfig index cc1bb33b59b8..26a510a7be09 100644 --- a/fs/nfs/Kconfig +++ b/fs/nfs/Kconfig | |||
@@ -100,3 +100,20 @@ config NFS_FSCACHE | |||
100 | help | 100 | help |
101 | Say Y here if you want NFS data to be cached locally on disc through | 101 | Say Y here if you want NFS data to be cached locally on disc through |
102 | the general filesystem cache manager | 102 | the general filesystem cache manager |
103 | |||
104 | config NFS_USE_LEGACY_DNS | ||
105 | bool "Use the legacy NFS DNS resolver" | ||
106 | depends on NFS_V4 | ||
107 | help | ||
108 | The kernel now provides a method for translating a host name into an | ||
109 | IP address. Select Y here if you would rather use your own DNS | ||
110 | resolver script. | ||
111 | |||
112 | If unsure, say N | ||
113 | |||
114 | config NFS_USE_KERNEL_DNS | ||
115 | bool | ||
116 | depends on NFS_V4 && !NFS_USE_LEGACY_DNS | ||
117 | select DNS_RESOLVER | ||
118 | select KEYS | ||
119 | default y | ||
diff --git a/fs/nfs/callback.c b/fs/nfs/callback.c index 36dfdae95123..e17b49e2eabd 100644 --- a/fs/nfs/callback.c +++ b/fs/nfs/callback.c | |||
@@ -45,7 +45,7 @@ unsigned short nfs_callback_tcpport; | |||
45 | unsigned short nfs_callback_tcpport6; | 45 | unsigned short nfs_callback_tcpport6; |
46 | #define NFS_CALLBACK_MAXPORTNR (65535U) | 46 | #define NFS_CALLBACK_MAXPORTNR (65535U) |
47 | 47 | ||
48 | static int param_set_portnr(const char *val, struct kernel_param *kp) | 48 | static int param_set_portnr(const char *val, const struct kernel_param *kp) |
49 | { | 49 | { |
50 | unsigned long num; | 50 | unsigned long num; |
51 | int ret; | 51 | int ret; |
@@ -58,11 +58,10 @@ static int param_set_portnr(const char *val, struct kernel_param *kp) | |||
58 | *((unsigned int *)kp->arg) = num; | 58 | *((unsigned int *)kp->arg) = num; |
59 | return 0; | 59 | return 0; |
60 | } | 60 | } |
61 | 61 | static struct kernel_param_ops param_ops_portnr = { | |
62 | static int param_get_portnr(char *buffer, struct kernel_param *kp) | 62 | .set = param_set_portnr, |
63 | { | 63 | .get = param_get_uint, |
64 | return param_get_uint(buffer, kp); | 64 | }; |
65 | } | ||
66 | #define param_check_portnr(name, p) __param_check(name, p, unsigned int); | 65 | #define param_check_portnr(name, p) __param_check(name, p, unsigned int); |
67 | 66 | ||
68 | module_param_named(callback_tcpport, nfs_callback_set_tcpport, portnr, 0644); | 67 | module_param_named(callback_tcpport, nfs_callback_set_tcpport, portnr, 0644); |
diff --git a/fs/nfs/dns_resolve.c b/fs/nfs/dns_resolve.c index 76fd235d0024..dba50a5625db 100644 --- a/fs/nfs/dns_resolve.c +++ b/fs/nfs/dns_resolve.c | |||
@@ -6,6 +6,29 @@ | |||
6 | * Resolves DNS hostnames into valid ip addresses | 6 | * Resolves DNS hostnames into valid ip addresses |
7 | */ | 7 | */ |
8 | 8 | ||
9 | #ifdef CONFIG_NFS_USE_KERNEL_DNS | ||
10 | |||
11 | #include <linux/sunrpc/clnt.h> | ||
12 | #include <linux/dns_resolver.h> | ||
13 | |||
14 | ssize_t nfs_dns_resolve_name(char *name, size_t namelen, | ||
15 | struct sockaddr *sa, size_t salen) | ||
16 | { | ||
17 | ssize_t ret; | ||
18 | char *ip_addr = NULL; | ||
19 | int ip_len; | ||
20 | |||
21 | ip_len = dns_query(NULL, name, namelen, NULL, &ip_addr, NULL); | ||
22 | if (ip_len > 0) | ||
23 | ret = rpc_pton(ip_addr, ip_len, sa, salen); | ||
24 | else | ||
25 | ret = -ESRCH; | ||
26 | kfree(ip_addr); | ||
27 | return ret; | ||
28 | } | ||
29 | |||
30 | #else | ||
31 | |||
9 | #include <linux/hash.h> | 32 | #include <linux/hash.h> |
10 | #include <linux/string.h> | 33 | #include <linux/string.h> |
11 | #include <linux/kmod.h> | 34 | #include <linux/kmod.h> |
@@ -346,3 +369,4 @@ void nfs_dns_resolver_destroy(void) | |||
346 | nfs_cache_unregister(&nfs_dns_resolve); | 369 | nfs_cache_unregister(&nfs_dns_resolve); |
347 | } | 370 | } |
348 | 371 | ||
372 | #endif | ||
diff --git a/fs/nfs/dns_resolve.h b/fs/nfs/dns_resolve.h index a3f0938babf7..199bb5543a91 100644 --- a/fs/nfs/dns_resolve.h +++ b/fs/nfs/dns_resolve.h | |||
@@ -6,8 +6,20 @@ | |||
6 | 6 | ||
7 | #define NFS_DNS_HOSTNAME_MAXLEN (128) | 7 | #define NFS_DNS_HOSTNAME_MAXLEN (128) |
8 | 8 | ||
9 | |||
10 | #ifdef CONFIG_NFS_USE_KERNEL_DNS | ||
11 | static inline int nfs_dns_resolver_init(void) | ||
12 | { | ||
13 | return 0; | ||
14 | } | ||
15 | |||
16 | static inline void nfs_dns_resolver_destroy(void) | ||
17 | {} | ||
18 | #else | ||
9 | extern int nfs_dns_resolver_init(void); | 19 | extern int nfs_dns_resolver_init(void); |
10 | extern void nfs_dns_resolver_destroy(void); | 20 | extern void nfs_dns_resolver_destroy(void); |
21 | #endif | ||
22 | |||
11 | extern ssize_t nfs_dns_resolve_name(char *name, size_t namelen, | 23 | extern ssize_t nfs_dns_resolve_name(char *name, size_t namelen, |
12 | struct sockaddr *sa, size_t salen); | 24 | struct sockaddr *sa, size_t salen); |
13 | 25 | ||
diff --git a/fs/notify/fanotify/fanotify.c b/fs/notify/fanotify/fanotify.c index eb8f73c9c131..756566fe8449 100644 --- a/fs/notify/fanotify/fanotify.c +++ b/fs/notify/fanotify/fanotify.c | |||
@@ -17,9 +17,9 @@ static bool should_merge(struct fsnotify_event *old, struct fsnotify_event *new) | |||
17 | old->data_type == new->data_type && | 17 | old->data_type == new->data_type && |
18 | old->tgid == new->tgid) { | 18 | old->tgid == new->tgid) { |
19 | switch (old->data_type) { | 19 | switch (old->data_type) { |
20 | case (FSNOTIFY_EVENT_FILE): | 20 | case (FSNOTIFY_EVENT_PATH): |
21 | if ((old->file->f_path.mnt == new->file->f_path.mnt) && | 21 | if ((old->path.mnt == new->path.mnt) && |
22 | (old->file->f_path.dentry == new->file->f_path.dentry)) | 22 | (old->path.dentry == new->path.dentry)) |
23 | return true; | 23 | return true; |
24 | case (FSNOTIFY_EVENT_NONE): | 24 | case (FSNOTIFY_EVENT_NONE): |
25 | return true; | 25 | return true; |
@@ -174,7 +174,7 @@ static bool fanotify_should_send_event(struct fsnotify_group *group, | |||
174 | return false; | 174 | return false; |
175 | 175 | ||
176 | /* if we don't have enough info to send an event to userspace say no */ | 176 | /* if we don't have enough info to send an event to userspace say no */ |
177 | if (data_type != FSNOTIFY_EVENT_FILE) | 177 | if (data_type != FSNOTIFY_EVENT_PATH) |
178 | return false; | 178 | return false; |
179 | 179 | ||
180 | if (inode_mark && vfsmnt_mark) { | 180 | if (inode_mark && vfsmnt_mark) { |
diff --git a/fs/notify/fanotify/fanotify_user.c b/fs/notify/fanotify/fanotify_user.c index 25a3b4dfcf61..032b837fcd11 100644 --- a/fs/notify/fanotify/fanotify_user.c +++ b/fs/notify/fanotify/fanotify_user.c | |||
@@ -65,7 +65,7 @@ static int create_fd(struct fsnotify_group *group, struct fsnotify_event *event) | |||
65 | if (client_fd < 0) | 65 | if (client_fd < 0) |
66 | return client_fd; | 66 | return client_fd; |
67 | 67 | ||
68 | if (event->data_type != FSNOTIFY_EVENT_FILE) { | 68 | if (event->data_type != FSNOTIFY_EVENT_PATH) { |
69 | WARN_ON(1); | 69 | WARN_ON(1); |
70 | put_unused_fd(client_fd); | 70 | put_unused_fd(client_fd); |
71 | return -EINVAL; | 71 | return -EINVAL; |
@@ -75,8 +75,8 @@ static int create_fd(struct fsnotify_group *group, struct fsnotify_event *event) | |||
75 | * we need a new file handle for the userspace program so it can read even if it was | 75 | * we need a new file handle for the userspace program so it can read even if it was |
76 | * originally opened O_WRONLY. | 76 | * originally opened O_WRONLY. |
77 | */ | 77 | */ |
78 | dentry = dget(event->file->f_path.dentry); | 78 | dentry = dget(event->path.dentry); |
79 | mnt = mntget(event->file->f_path.mnt); | 79 | mnt = mntget(event->path.mnt); |
80 | /* it's possible this event was an overflow event. in that case dentry and mnt | 80 | /* it's possible this event was an overflow event. in that case dentry and mnt |
81 | * are NULL; That's fine, just don't call dentry open */ | 81 | * are NULL; That's fine, just don't call dentry open */ |
82 | if (dentry && mnt) | 82 | if (dentry && mnt) |
diff --git a/fs/notify/fsnotify.c b/fs/notify/fsnotify.c index 4d2a82c1ceb1..3970392b2722 100644 --- a/fs/notify/fsnotify.c +++ b/fs/notify/fsnotify.c | |||
@@ -84,7 +84,7 @@ void __fsnotify_update_child_dentry_flags(struct inode *inode) | |||
84 | } | 84 | } |
85 | 85 | ||
86 | /* Notify this dentry's parent about a child's events. */ | 86 | /* Notify this dentry's parent about a child's events. */ |
87 | void __fsnotify_parent(struct file *file, struct dentry *dentry, __u32 mask) | 87 | void __fsnotify_parent(struct path *path, struct dentry *dentry, __u32 mask) |
88 | { | 88 | { |
89 | struct dentry *parent; | 89 | struct dentry *parent; |
90 | struct inode *p_inode; | 90 | struct inode *p_inode; |
@@ -92,7 +92,7 @@ void __fsnotify_parent(struct file *file, struct dentry *dentry, __u32 mask) | |||
92 | bool should_update_children = false; | 92 | bool should_update_children = false; |
93 | 93 | ||
94 | if (!dentry) | 94 | if (!dentry) |
95 | dentry = file->f_path.dentry; | 95 | dentry = path->dentry; |
96 | 96 | ||
97 | if (!(dentry->d_flags & DCACHE_FSNOTIFY_PARENT_WATCHED)) | 97 | if (!(dentry->d_flags & DCACHE_FSNOTIFY_PARENT_WATCHED)) |
98 | return; | 98 | return; |
@@ -124,8 +124,8 @@ void __fsnotify_parent(struct file *file, struct dentry *dentry, __u32 mask) | |||
124 | * specifies these are events which came from a child. */ | 124 | * specifies these are events which came from a child. */ |
125 | mask |= FS_EVENT_ON_CHILD; | 125 | mask |= FS_EVENT_ON_CHILD; |
126 | 126 | ||
127 | if (file) | 127 | if (path) |
128 | fsnotify(p_inode, mask, file, FSNOTIFY_EVENT_FILE, | 128 | fsnotify(p_inode, mask, path, FSNOTIFY_EVENT_PATH, |
129 | dentry->d_name.name, 0); | 129 | dentry->d_name.name, 0); |
130 | else | 130 | else |
131 | fsnotify(p_inode, mask, dentry->d_inode, FSNOTIFY_EVENT_INODE, | 131 | fsnotify(p_inode, mask, dentry->d_inode, FSNOTIFY_EVENT_INODE, |
@@ -217,8 +217,8 @@ int fsnotify(struct inode *to_tell, __u32 mask, void *data, int data_is, | |||
217 | /* global tests shouldn't care about events on child only the specific event */ | 217 | /* global tests shouldn't care about events on child only the specific event */ |
218 | __u32 test_mask = (mask & ~FS_EVENT_ON_CHILD); | 218 | __u32 test_mask = (mask & ~FS_EVENT_ON_CHILD); |
219 | 219 | ||
220 | if (data_is == FSNOTIFY_EVENT_FILE) | 220 | if (data_is == FSNOTIFY_EVENT_PATH) |
221 | mnt = ((struct file *)data)->f_path.mnt; | 221 | mnt = ((struct path *)data)->mnt; |
222 | else | 222 | else |
223 | mnt = NULL; | 223 | mnt = NULL; |
224 | 224 | ||
diff --git a/fs/notify/inotify/inotify_fsnotify.c b/fs/notify/inotify/inotify_fsnotify.c index 5e73eeb2c697..a91b69a6a291 100644 --- a/fs/notify/inotify/inotify_fsnotify.c +++ b/fs/notify/inotify/inotify_fsnotify.c | |||
@@ -52,9 +52,9 @@ static bool event_compare(struct fsnotify_event *old, struct fsnotify_event *new | |||
52 | !strcmp(old->file_name, new->file_name)) | 52 | !strcmp(old->file_name, new->file_name)) |
53 | return true; | 53 | return true; |
54 | break; | 54 | break; |
55 | case (FSNOTIFY_EVENT_FILE): | 55 | case (FSNOTIFY_EVENT_PATH): |
56 | if ((old->file->f_path.mnt == new->file->f_path.mnt) && | 56 | if ((old->path.mnt == new->path.mnt) && |
57 | (old->file->f_path.dentry == new->file->f_path.dentry)) | 57 | (old->path.dentry == new->path.dentry)) |
58 | return true; | 58 | return true; |
59 | break; | 59 | break; |
60 | case (FSNOTIFY_EVENT_NONE): | 60 | case (FSNOTIFY_EVENT_NONE): |
@@ -147,10 +147,10 @@ static bool inotify_should_send_event(struct fsnotify_group *group, struct inode | |||
147 | __u32 mask, void *data, int data_type) | 147 | __u32 mask, void *data, int data_type) |
148 | { | 148 | { |
149 | if ((inode_mark->mask & FS_EXCL_UNLINK) && | 149 | if ((inode_mark->mask & FS_EXCL_UNLINK) && |
150 | (data_type == FSNOTIFY_EVENT_FILE)) { | 150 | (data_type == FSNOTIFY_EVENT_PATH)) { |
151 | struct file *file = data; | 151 | struct path *path = data; |
152 | 152 | ||
153 | if (d_unlinked(file->f_path.dentry)) | 153 | if (d_unlinked(path->dentry)) |
154 | return false; | 154 | return false; |
155 | } | 155 | } |
156 | 156 | ||
diff --git a/fs/notify/notification.c b/fs/notify/notification.c index d6c435adc7a2..f39260f8f865 100644 --- a/fs/notify/notification.c +++ b/fs/notify/notification.c | |||
@@ -31,7 +31,6 @@ | |||
31 | * allocated and used. | 31 | * allocated and used. |
32 | */ | 32 | */ |
33 | 33 | ||
34 | #include <linux/file.h> | ||
35 | #include <linux/fs.h> | 34 | #include <linux/fs.h> |
36 | #include <linux/init.h> | 35 | #include <linux/init.h> |
37 | #include <linux/kernel.h> | 36 | #include <linux/kernel.h> |
@@ -90,8 +89,8 @@ void fsnotify_put_event(struct fsnotify_event *event) | |||
90 | if (atomic_dec_and_test(&event->refcnt)) { | 89 | if (atomic_dec_and_test(&event->refcnt)) { |
91 | pr_debug("%s: event=%p\n", __func__, event); | 90 | pr_debug("%s: event=%p\n", __func__, event); |
92 | 91 | ||
93 | if (event->data_type == FSNOTIFY_EVENT_FILE) | 92 | if (event->data_type == FSNOTIFY_EVENT_PATH) |
94 | fput(event->file); | 93 | path_put(&event->path); |
95 | 94 | ||
96 | BUG_ON(!list_empty(&event->private_data_list)); | 95 | BUG_ON(!list_empty(&event->private_data_list)); |
97 | 96 | ||
@@ -376,8 +375,8 @@ struct fsnotify_event *fsnotify_clone_event(struct fsnotify_event *old_event) | |||
376 | } | 375 | } |
377 | } | 376 | } |
378 | event->tgid = get_pid(old_event->tgid); | 377 | event->tgid = get_pid(old_event->tgid); |
379 | if (event->data_type == FSNOTIFY_EVENT_FILE) | 378 | if (event->data_type == FSNOTIFY_EVENT_PATH) |
380 | get_file(event->file); | 379 | path_get(&event->path); |
381 | 380 | ||
382 | return event; | 381 | return event; |
383 | } | 382 | } |
@@ -424,22 +423,11 @@ struct fsnotify_event *fsnotify_create_event(struct inode *to_tell, __u32 mask, | |||
424 | event->data_type = data_type; | 423 | event->data_type = data_type; |
425 | 424 | ||
426 | switch (data_type) { | 425 | switch (data_type) { |
427 | case FSNOTIFY_EVENT_FILE: { | 426 | case FSNOTIFY_EVENT_PATH: { |
428 | event->file = data; | 427 | struct path *path = data; |
429 | /* | 428 | event->path.dentry = path->dentry; |
430 | * if this file is about to disappear hold an extra reference | 429 | event->path.mnt = path->mnt; |
431 | * until we return to __fput so we don't have to worry about | 430 | path_get(&event->path); |
432 | * future get/put destroying the file under us or generating | ||
433 | * additional events. Notice that we change f_mode without | ||
434 | * holding f_lock. This is safe since this is the only possible | ||
435 | * reference to this object in the kernel (it was about to be | ||
436 | * freed, remember?) | ||
437 | */ | ||
438 | if (!atomic_long_read(&event->file->f_count)) { | ||
439 | event->file->f_mode |= FMODE_NONOTIFY; | ||
440 | get_file(event->file); | ||
441 | } | ||
442 | get_file(event->file); | ||
443 | break; | 431 | break; |
444 | } | 432 | } |
445 | case FSNOTIFY_EVENT_INODE: | 433 | case FSNOTIFY_EVENT_INODE: |
@@ -447,7 +435,8 @@ struct fsnotify_event *fsnotify_create_event(struct inode *to_tell, __u32 mask, | |||
447 | break; | 435 | break; |
448 | case FSNOTIFY_EVENT_NONE: | 436 | case FSNOTIFY_EVENT_NONE: |
449 | event->inode = NULL; | 437 | event->inode = NULL; |
450 | event->file = NULL; | 438 | event->path.dentry = NULL; |
439 | event->path.mnt = NULL; | ||
451 | break; | 440 | break; |
452 | default: | 441 | default: |
453 | BUG(); | 442 | BUG(); |
diff --git a/fs/ocfs2/acl.c b/fs/ocfs2/acl.c index da702294d7e7..a76e0aa5cd3f 100644 --- a/fs/ocfs2/acl.c +++ b/fs/ocfs2/acl.c | |||
@@ -290,12 +290,30 @@ static int ocfs2_set_acl(handle_t *handle, | |||
290 | 290 | ||
291 | int ocfs2_check_acl(struct inode *inode, int mask) | 291 | int ocfs2_check_acl(struct inode *inode, int mask) |
292 | { | 292 | { |
293 | struct posix_acl *acl = ocfs2_get_acl(inode, ACL_TYPE_ACCESS); | 293 | struct ocfs2_super *osb = OCFS2_SB(inode->i_sb); |
294 | struct buffer_head *di_bh = NULL; | ||
295 | struct posix_acl *acl; | ||
296 | int ret = -EAGAIN; | ||
294 | 297 | ||
295 | if (IS_ERR(acl)) | 298 | if (!(osb->s_mount_opt & OCFS2_MOUNT_POSIX_ACL)) |
299 | return ret; | ||
300 | |||
301 | ret = ocfs2_read_inode_block(inode, &di_bh); | ||
302 | if (ret < 0) { | ||
303 | mlog_errno(ret); | ||
304 | return ret; | ||
305 | } | ||
306 | |||
307 | acl = ocfs2_get_acl_nolock(inode, ACL_TYPE_ACCESS, di_bh); | ||
308 | |||
309 | brelse(di_bh); | ||
310 | |||
311 | if (IS_ERR(acl)) { | ||
312 | mlog_errno(PTR_ERR(acl)); | ||
296 | return PTR_ERR(acl); | 313 | return PTR_ERR(acl); |
314 | } | ||
297 | if (acl) { | 315 | if (acl) { |
298 | int ret = posix_acl_permission(inode, acl, mask); | 316 | ret = posix_acl_permission(inode, acl, mask); |
299 | posix_acl_release(acl); | 317 | posix_acl_release(acl); |
300 | return ret; | 318 | return ret; |
301 | } | 319 | } |
@@ -344,7 +362,7 @@ int ocfs2_init_acl(handle_t *handle, | |||
344 | { | 362 | { |
345 | struct ocfs2_super *osb = OCFS2_SB(inode->i_sb); | 363 | struct ocfs2_super *osb = OCFS2_SB(inode->i_sb); |
346 | struct posix_acl *acl = NULL; | 364 | struct posix_acl *acl = NULL; |
347 | int ret = 0; | 365 | int ret = 0, ret2; |
348 | mode_t mode; | 366 | mode_t mode; |
349 | 367 | ||
350 | if (!S_ISLNK(inode->i_mode)) { | 368 | if (!S_ISLNK(inode->i_mode)) { |
@@ -381,7 +399,12 @@ int ocfs2_init_acl(handle_t *handle, | |||
381 | mode = inode->i_mode; | 399 | mode = inode->i_mode; |
382 | ret = posix_acl_create_masq(clone, &mode); | 400 | ret = posix_acl_create_masq(clone, &mode); |
383 | if (ret >= 0) { | 401 | if (ret >= 0) { |
384 | ret = ocfs2_acl_set_mode(inode, di_bh, handle, mode); | 402 | ret2 = ocfs2_acl_set_mode(inode, di_bh, handle, mode); |
403 | if (ret2) { | ||
404 | mlog_errno(ret2); | ||
405 | ret = ret2; | ||
406 | goto cleanup; | ||
407 | } | ||
385 | if (ret > 0) { | 408 | if (ret > 0) { |
386 | ret = ocfs2_set_acl(handle, inode, | 409 | ret = ocfs2_set_acl(handle, inode, |
387 | di_bh, ACL_TYPE_ACCESS, | 410 | di_bh, ACL_TYPE_ACCESS, |
diff --git a/fs/ocfs2/cluster/tcp.c b/fs/ocfs2/cluster/tcp.c index aa75ca3f78da..1361997cf205 100644 --- a/fs/ocfs2/cluster/tcp.c +++ b/fs/ocfs2/cluster/tcp.c | |||
@@ -1759,6 +1759,7 @@ static int o2net_accept_one(struct socket *sock) | |||
1759 | struct sockaddr_in sin; | 1759 | struct sockaddr_in sin; |
1760 | struct socket *new_sock = NULL; | 1760 | struct socket *new_sock = NULL; |
1761 | struct o2nm_node *node = NULL; | 1761 | struct o2nm_node *node = NULL; |
1762 | struct o2nm_node *local_node = NULL; | ||
1762 | struct o2net_sock_container *sc = NULL; | 1763 | struct o2net_sock_container *sc = NULL; |
1763 | struct o2net_node *nn; | 1764 | struct o2net_node *nn; |
1764 | 1765 | ||
@@ -1796,11 +1797,15 @@ static int o2net_accept_one(struct socket *sock) | |||
1796 | goto out; | 1797 | goto out; |
1797 | } | 1798 | } |
1798 | 1799 | ||
1799 | if (o2nm_this_node() > node->nd_num) { | 1800 | if (o2nm_this_node() >= node->nd_num) { |
1800 | mlog(ML_NOTICE, "unexpected connect attempted from a lower " | 1801 | local_node = o2nm_get_node_by_num(o2nm_this_node()); |
1801 | "numbered node '%s' at " "%pI4:%d with num %u\n", | 1802 | mlog(ML_NOTICE, "unexpected connect attempt seen at node '%s' (" |
1802 | node->nd_name, &sin.sin_addr.s_addr, | 1803 | "%u, %pI4:%d) from node '%s' (%u, %pI4:%d)\n", |
1803 | ntohs(sin.sin_port), node->nd_num); | 1804 | local_node->nd_name, local_node->nd_num, |
1805 | &(local_node->nd_ipv4_address), | ||
1806 | ntohs(local_node->nd_ipv4_port), | ||
1807 | node->nd_name, node->nd_num, &sin.sin_addr.s_addr, | ||
1808 | ntohs(sin.sin_port)); | ||
1804 | ret = -EINVAL; | 1809 | ret = -EINVAL; |
1805 | goto out; | 1810 | goto out; |
1806 | } | 1811 | } |
@@ -1857,6 +1862,8 @@ out: | |||
1857 | sock_release(new_sock); | 1862 | sock_release(new_sock); |
1858 | if (node) | 1863 | if (node) |
1859 | o2nm_node_put(node); | 1864 | o2nm_node_put(node); |
1865 | if (local_node) | ||
1866 | o2nm_node_put(local_node); | ||
1860 | if (sc) | 1867 | if (sc) |
1861 | sc_put(sc); | 1868 | sc_put(sc); |
1862 | return ret; | 1869 | return ret; |
diff --git a/fs/ocfs2/dlm/dlmmaster.c b/fs/ocfs2/dlm/dlmmaster.c index 94b97fc6a88e..ffb4c68dafa4 100644 --- a/fs/ocfs2/dlm/dlmmaster.c +++ b/fs/ocfs2/dlm/dlmmaster.c | |||
@@ -511,8 +511,6 @@ static void dlm_lockres_release(struct kref *kref) | |||
511 | 511 | ||
512 | atomic_dec(&dlm->res_cur_count); | 512 | atomic_dec(&dlm->res_cur_count); |
513 | 513 | ||
514 | dlm_put(dlm); | ||
515 | |||
516 | if (!hlist_unhashed(&res->hash_node) || | 514 | if (!hlist_unhashed(&res->hash_node) || |
517 | !list_empty(&res->granted) || | 515 | !list_empty(&res->granted) || |
518 | !list_empty(&res->converting) || | 516 | !list_empty(&res->converting) || |
@@ -585,8 +583,6 @@ static void dlm_init_lockres(struct dlm_ctxt *dlm, | |||
585 | res->migration_pending = 0; | 583 | res->migration_pending = 0; |
586 | res->inflight_locks = 0; | 584 | res->inflight_locks = 0; |
587 | 585 | ||
588 | /* put in dlm_lockres_release */ | ||
589 | dlm_grab(dlm); | ||
590 | res->dlm = dlm; | 586 | res->dlm = dlm; |
591 | 587 | ||
592 | kref_init(&res->refs); | 588 | kref_init(&res->refs); |
@@ -3050,8 +3046,6 @@ int dlm_migrate_request_handler(struct o2net_msg *msg, u32 len, void *data, | |||
3050 | /* check for pre-existing lock */ | 3046 | /* check for pre-existing lock */ |
3051 | spin_lock(&dlm->spinlock); | 3047 | spin_lock(&dlm->spinlock); |
3052 | res = __dlm_lookup_lockres(dlm, name, namelen, hash); | 3048 | res = __dlm_lookup_lockres(dlm, name, namelen, hash); |
3053 | spin_lock(&dlm->master_lock); | ||
3054 | |||
3055 | if (res) { | 3049 | if (res) { |
3056 | spin_lock(&res->spinlock); | 3050 | spin_lock(&res->spinlock); |
3057 | if (res->state & DLM_LOCK_RES_RECOVERING) { | 3051 | if (res->state & DLM_LOCK_RES_RECOVERING) { |
@@ -3069,14 +3063,15 @@ int dlm_migrate_request_handler(struct o2net_msg *msg, u32 len, void *data, | |||
3069 | spin_unlock(&res->spinlock); | 3063 | spin_unlock(&res->spinlock); |
3070 | } | 3064 | } |
3071 | 3065 | ||
3066 | spin_lock(&dlm->master_lock); | ||
3072 | /* ignore status. only nonzero status would BUG. */ | 3067 | /* ignore status. only nonzero status would BUG. */ |
3073 | ret = dlm_add_migration_mle(dlm, res, mle, &oldmle, | 3068 | ret = dlm_add_migration_mle(dlm, res, mle, &oldmle, |
3074 | name, namelen, | 3069 | name, namelen, |
3075 | migrate->new_master, | 3070 | migrate->new_master, |
3076 | migrate->master); | 3071 | migrate->master); |
3077 | 3072 | ||
3078 | unlock: | ||
3079 | spin_unlock(&dlm->master_lock); | 3073 | spin_unlock(&dlm->master_lock); |
3074 | unlock: | ||
3080 | spin_unlock(&dlm->spinlock); | 3075 | spin_unlock(&dlm->spinlock); |
3081 | 3076 | ||
3082 | if (oldmle) { | 3077 | if (oldmle) { |
diff --git a/fs/ocfs2/dlm/dlmrecovery.c b/fs/ocfs2/dlm/dlmrecovery.c index 9dfaac73b36d..aaaffbcbe916 100644 --- a/fs/ocfs2/dlm/dlmrecovery.c +++ b/fs/ocfs2/dlm/dlmrecovery.c | |||
@@ -1997,6 +1997,8 @@ void dlm_move_lockres_to_recovery_list(struct dlm_ctxt *dlm, | |||
1997 | struct list_head *queue; | 1997 | struct list_head *queue; |
1998 | struct dlm_lock *lock, *next; | 1998 | struct dlm_lock *lock, *next; |
1999 | 1999 | ||
2000 | assert_spin_locked(&dlm->spinlock); | ||
2001 | assert_spin_locked(&res->spinlock); | ||
2000 | res->state |= DLM_LOCK_RES_RECOVERING; | 2002 | res->state |= DLM_LOCK_RES_RECOVERING; |
2001 | if (!list_empty(&res->recovering)) { | 2003 | if (!list_empty(&res->recovering)) { |
2002 | mlog(0, | 2004 | mlog(0, |
@@ -2326,19 +2328,15 @@ static void dlm_do_local_recovery_cleanup(struct dlm_ctxt *dlm, u8 dead_node) | |||
2326 | /* zero the lvb if necessary */ | 2328 | /* zero the lvb if necessary */ |
2327 | dlm_revalidate_lvb(dlm, res, dead_node); | 2329 | dlm_revalidate_lvb(dlm, res, dead_node); |
2328 | if (res->owner == dead_node) { | 2330 | if (res->owner == dead_node) { |
2329 | if (res->state & DLM_LOCK_RES_DROPPING_REF) | 2331 | if (res->state & DLM_LOCK_RES_DROPPING_REF) { |
2330 | mlog(0, "%s:%.*s: owned by " | 2332 | mlog(ML_NOTICE, "Ignore %.*s for " |
2331 | "dead node %u, this node was " | 2333 | "recovery as it is being freed\n", |
2332 | "dropping its ref when it died. " | 2334 | res->lockname.len, |
2333 | "continue, dropping the flag.\n", | 2335 | res->lockname.name); |
2334 | dlm->name, res->lockname.len, | 2336 | } else |
2335 | res->lockname.name, dead_node); | 2337 | dlm_move_lockres_to_recovery_list(dlm, |
2336 | 2338 | res); | |
2337 | /* the wake_up for this will happen when the | ||
2338 | * RECOVERING flag is dropped later */ | ||
2339 | res->state &= ~DLM_LOCK_RES_DROPPING_REF; | ||
2340 | 2339 | ||
2341 | dlm_move_lockres_to_recovery_list(dlm, res); | ||
2342 | } else if (res->owner == dlm->node_num) { | 2340 | } else if (res->owner == dlm->node_num) { |
2343 | dlm_free_dead_locks(dlm, res, dead_node); | 2341 | dlm_free_dead_locks(dlm, res, dead_node); |
2344 | __dlm_lockres_calc_usage(dlm, res); | 2342 | __dlm_lockres_calc_usage(dlm, res); |
diff --git a/fs/ocfs2/dlm/dlmthread.c b/fs/ocfs2/dlm/dlmthread.c index d4f73ca68fe5..2211acf33d9b 100644 --- a/fs/ocfs2/dlm/dlmthread.c +++ b/fs/ocfs2/dlm/dlmthread.c | |||
@@ -92,19 +92,27 @@ int __dlm_lockres_has_locks(struct dlm_lock_resource *res) | |||
92 | * truly ready to be freed. */ | 92 | * truly ready to be freed. */ |
93 | int __dlm_lockres_unused(struct dlm_lock_resource *res) | 93 | int __dlm_lockres_unused(struct dlm_lock_resource *res) |
94 | { | 94 | { |
95 | if (!__dlm_lockres_has_locks(res) && | 95 | int bit; |
96 | (list_empty(&res->dirty) && !(res->state & DLM_LOCK_RES_DIRTY))) { | 96 | |
97 | /* try not to scan the bitmap unless the first two | 97 | if (__dlm_lockres_has_locks(res)) |
98 | * conditions are already true */ | 98 | return 0; |
99 | int bit = find_next_bit(res->refmap, O2NM_MAX_NODES, 0); | 99 | |
100 | if (bit >= O2NM_MAX_NODES) { | 100 | if (!list_empty(&res->dirty) || res->state & DLM_LOCK_RES_DIRTY) |
101 | /* since the bit for dlm->node_num is not | 101 | return 0; |
102 | * set, inflight_locks better be zero */ | 102 | |
103 | BUG_ON(res->inflight_locks != 0); | 103 | if (res->state & DLM_LOCK_RES_RECOVERING) |
104 | return 1; | 104 | return 0; |
105 | } | 105 | |
106 | } | 106 | bit = find_next_bit(res->refmap, O2NM_MAX_NODES, 0); |
107 | return 0; | 107 | if (bit < O2NM_MAX_NODES) |
108 | return 0; | ||
109 | |||
110 | /* | ||
111 | * since the bit for dlm->node_num is not set, inflight_locks better | ||
112 | * be zero | ||
113 | */ | ||
114 | BUG_ON(res->inflight_locks != 0); | ||
115 | return 1; | ||
108 | } | 116 | } |
109 | 117 | ||
110 | 118 | ||
@@ -152,45 +160,25 @@ void dlm_lockres_calc_usage(struct dlm_ctxt *dlm, | |||
152 | spin_unlock(&dlm->spinlock); | 160 | spin_unlock(&dlm->spinlock); |
153 | } | 161 | } |
154 | 162 | ||
155 | static int dlm_purge_lockres(struct dlm_ctxt *dlm, | 163 | static void dlm_purge_lockres(struct dlm_ctxt *dlm, |
156 | struct dlm_lock_resource *res) | 164 | struct dlm_lock_resource *res) |
157 | { | 165 | { |
158 | int master; | 166 | int master; |
159 | int ret = 0; | 167 | int ret = 0; |
160 | 168 | ||
161 | spin_lock(&res->spinlock); | 169 | assert_spin_locked(&dlm->spinlock); |
162 | if (!__dlm_lockres_unused(res)) { | 170 | assert_spin_locked(&res->spinlock); |
163 | mlog(0, "%s:%.*s: tried to purge but not unused\n", | ||
164 | dlm->name, res->lockname.len, res->lockname.name); | ||
165 | __dlm_print_one_lock_resource(res); | ||
166 | spin_unlock(&res->spinlock); | ||
167 | BUG(); | ||
168 | } | ||
169 | |||
170 | if (res->state & DLM_LOCK_RES_MIGRATING) { | ||
171 | mlog(0, "%s:%.*s: Delay dropref as this lockres is " | ||
172 | "being remastered\n", dlm->name, res->lockname.len, | ||
173 | res->lockname.name); | ||
174 | /* Re-add the lockres to the end of the purge list */ | ||
175 | if (!list_empty(&res->purge)) { | ||
176 | list_del_init(&res->purge); | ||
177 | list_add_tail(&res->purge, &dlm->purge_list); | ||
178 | } | ||
179 | spin_unlock(&res->spinlock); | ||
180 | return 0; | ||
181 | } | ||
182 | 171 | ||
183 | master = (res->owner == dlm->node_num); | 172 | master = (res->owner == dlm->node_num); |
184 | 173 | ||
185 | if (!master) | ||
186 | res->state |= DLM_LOCK_RES_DROPPING_REF; | ||
187 | spin_unlock(&res->spinlock); | ||
188 | 174 | ||
189 | mlog(0, "purging lockres %.*s, master = %d\n", res->lockname.len, | 175 | mlog(0, "purging lockres %.*s, master = %d\n", res->lockname.len, |
190 | res->lockname.name, master); | 176 | res->lockname.name, master); |
191 | 177 | ||
192 | if (!master) { | 178 | if (!master) { |
179 | res->state |= DLM_LOCK_RES_DROPPING_REF; | ||
193 | /* drop spinlock... retake below */ | 180 | /* drop spinlock... retake below */ |
181 | spin_unlock(&res->spinlock); | ||
194 | spin_unlock(&dlm->spinlock); | 182 | spin_unlock(&dlm->spinlock); |
195 | 183 | ||
196 | spin_lock(&res->spinlock); | 184 | spin_lock(&res->spinlock); |
@@ -208,31 +196,35 @@ static int dlm_purge_lockres(struct dlm_ctxt *dlm, | |||
208 | mlog(0, "%s:%.*s: dlm_deref_lockres returned %d\n", | 196 | mlog(0, "%s:%.*s: dlm_deref_lockres returned %d\n", |
209 | dlm->name, res->lockname.len, res->lockname.name, ret); | 197 | dlm->name, res->lockname.len, res->lockname.name, ret); |
210 | spin_lock(&dlm->spinlock); | 198 | spin_lock(&dlm->spinlock); |
199 | spin_lock(&res->spinlock); | ||
211 | } | 200 | } |
212 | 201 | ||
213 | spin_lock(&res->spinlock); | ||
214 | if (!list_empty(&res->purge)) { | 202 | if (!list_empty(&res->purge)) { |
215 | mlog(0, "removing lockres %.*s:%p from purgelist, " | 203 | mlog(0, "removing lockres %.*s:%p from purgelist, " |
216 | "master = %d\n", res->lockname.len, res->lockname.name, | 204 | "master = %d\n", res->lockname.len, res->lockname.name, |
217 | res, master); | 205 | res, master); |
218 | list_del_init(&res->purge); | 206 | list_del_init(&res->purge); |
219 | spin_unlock(&res->spinlock); | ||
220 | dlm_lockres_put(res); | 207 | dlm_lockres_put(res); |
221 | dlm->purge_count--; | 208 | dlm->purge_count--; |
222 | } else | 209 | } |
223 | spin_unlock(&res->spinlock); | 210 | |
211 | if (!__dlm_lockres_unused(res)) { | ||
212 | mlog(ML_ERROR, "found lockres %s:%.*s: in use after deref\n", | ||
213 | dlm->name, res->lockname.len, res->lockname.name); | ||
214 | __dlm_print_one_lock_resource(res); | ||
215 | BUG(); | ||
216 | } | ||
224 | 217 | ||
225 | __dlm_unhash_lockres(res); | 218 | __dlm_unhash_lockres(res); |
226 | 219 | ||
227 | /* lockres is not in the hash now. drop the flag and wake up | 220 | /* lockres is not in the hash now. drop the flag and wake up |
228 | * any processes waiting in dlm_get_lock_resource. */ | 221 | * any processes waiting in dlm_get_lock_resource. */ |
229 | if (!master) { | 222 | if (!master) { |
230 | spin_lock(&res->spinlock); | ||
231 | res->state &= ~DLM_LOCK_RES_DROPPING_REF; | 223 | res->state &= ~DLM_LOCK_RES_DROPPING_REF; |
232 | spin_unlock(&res->spinlock); | 224 | spin_unlock(&res->spinlock); |
233 | wake_up(&res->wq); | 225 | wake_up(&res->wq); |
234 | } | 226 | } else |
235 | return 0; | 227 | spin_unlock(&res->spinlock); |
236 | } | 228 | } |
237 | 229 | ||
238 | static void dlm_run_purge_list(struct dlm_ctxt *dlm, | 230 | static void dlm_run_purge_list(struct dlm_ctxt *dlm, |
@@ -251,17 +243,7 @@ static void dlm_run_purge_list(struct dlm_ctxt *dlm, | |||
251 | lockres = list_entry(dlm->purge_list.next, | 243 | lockres = list_entry(dlm->purge_list.next, |
252 | struct dlm_lock_resource, purge); | 244 | struct dlm_lock_resource, purge); |
253 | 245 | ||
254 | /* Status of the lockres *might* change so double | ||
255 | * check. If the lockres is unused, holding the dlm | ||
256 | * spinlock will prevent people from getting and more | ||
257 | * refs on it -- there's no need to keep the lockres | ||
258 | * spinlock. */ | ||
259 | spin_lock(&lockres->spinlock); | 246 | spin_lock(&lockres->spinlock); |
260 | unused = __dlm_lockres_unused(lockres); | ||
261 | spin_unlock(&lockres->spinlock); | ||
262 | |||
263 | if (!unused) | ||
264 | continue; | ||
265 | 247 | ||
266 | purge_jiffies = lockres->last_used + | 248 | purge_jiffies = lockres->last_used + |
267 | msecs_to_jiffies(DLM_PURGE_INTERVAL_MS); | 249 | msecs_to_jiffies(DLM_PURGE_INTERVAL_MS); |
@@ -273,15 +255,29 @@ static void dlm_run_purge_list(struct dlm_ctxt *dlm, | |||
273 | * in tail order, we can stop at the first | 255 | * in tail order, we can stop at the first |
274 | * unpurgable resource -- anyone added after | 256 | * unpurgable resource -- anyone added after |
275 | * him will have a greater last_used value */ | 257 | * him will have a greater last_used value */ |
258 | spin_unlock(&lockres->spinlock); | ||
276 | break; | 259 | break; |
277 | } | 260 | } |
278 | 261 | ||
262 | /* Status of the lockres *might* change so double | ||
263 | * check. If the lockres is unused, holding the dlm | ||
264 | * spinlock will prevent people from getting and more | ||
265 | * refs on it. */ | ||
266 | unused = __dlm_lockres_unused(lockres); | ||
267 | if (!unused || | ||
268 | (lockres->state & DLM_LOCK_RES_MIGRATING)) { | ||
269 | mlog(0, "lockres %s:%.*s: is in use or " | ||
270 | "being remastered, used %d, state %d\n", | ||
271 | dlm->name, lockres->lockname.len, | ||
272 | lockres->lockname.name, !unused, lockres->state); | ||
273 | list_move_tail(&dlm->purge_list, &lockres->purge); | ||
274 | spin_unlock(&lockres->spinlock); | ||
275 | continue; | ||
276 | } | ||
277 | |||
279 | dlm_lockres_get(lockres); | 278 | dlm_lockres_get(lockres); |
280 | 279 | ||
281 | /* This may drop and reacquire the dlm spinlock if it | 280 | dlm_purge_lockres(dlm, lockres); |
282 | * has to do migration. */ | ||
283 | if (dlm_purge_lockres(dlm, lockres)) | ||
284 | BUG(); | ||
285 | 281 | ||
286 | dlm_lockres_put(lockres); | 282 | dlm_lockres_put(lockres); |
287 | 283 | ||
diff --git a/fs/ocfs2/refcounttree.c b/fs/ocfs2/refcounttree.c index 3ac5aa733e9c..73a11ccfd4c2 100644 --- a/fs/ocfs2/refcounttree.c +++ b/fs/ocfs2/refcounttree.c | |||
@@ -2436,16 +2436,26 @@ static int ocfs2_calc_refcount_meta_credits(struct super_block *sb, | |||
2436 | len = min((u64)cpos + clusters, le64_to_cpu(rec.r_cpos) + | 2436 | len = min((u64)cpos + clusters, le64_to_cpu(rec.r_cpos) + |
2437 | le32_to_cpu(rec.r_clusters)) - cpos; | 2437 | le32_to_cpu(rec.r_clusters)) - cpos; |
2438 | /* | 2438 | /* |
2439 | * If the refcount rec already exist, cool. We just need | ||
2440 | * to check whether there is a split. Otherwise we just need | ||
2441 | * to increase the refcount. | ||
2442 | * If we will insert one, increases recs_add. | ||
2443 | * | ||
2444 | * We record all the records which will be inserted to the | 2439 | * We record all the records which will be inserted to the |
2445 | * same refcount block, so that we can tell exactly whether | 2440 | * same refcount block, so that we can tell exactly whether |
2446 | * we need a new refcount block or not. | 2441 | * we need a new refcount block or not. |
2442 | * | ||
2443 | * If we will insert a new one, this is easy and only happens | ||
2444 | * during adding refcounted flag to the extent, so we don't | ||
2445 | * have a chance of spliting. We just need one record. | ||
2446 | * | ||
2447 | * If the refcount rec already exists, that would be a little | ||
2448 | * complicated. we may have to: | ||
2449 | * 1) split at the beginning if the start pos isn't aligned. | ||
2450 | * we need 1 more record in this case. | ||
2451 | * 2) split int the end if the end pos isn't aligned. | ||
2452 | * we need 1 more record in this case. | ||
2453 | * 3) split in the middle because of file system fragmentation. | ||
2454 | * we need 2 more records in this case(we can't detect this | ||
2455 | * beforehand, so always think of the worst case). | ||
2447 | */ | 2456 | */ |
2448 | if (rec.r_refcount) { | 2457 | if (rec.r_refcount) { |
2458 | recs_add += 2; | ||
2449 | /* Check whether we need a split at the beginning. */ | 2459 | /* Check whether we need a split at the beginning. */ |
2450 | if (cpos == start_cpos && | 2460 | if (cpos == start_cpos && |
2451 | cpos != le64_to_cpu(rec.r_cpos)) | 2461 | cpos != le64_to_cpu(rec.r_cpos)) |
@@ -1031,7 +1031,9 @@ EXPORT_SYMBOL(generic_file_open); | |||
1031 | 1031 | ||
1032 | /* | 1032 | /* |
1033 | * This is used by subsystems that don't want seekable | 1033 | * This is used by subsystems that don't want seekable |
1034 | * file descriptors | 1034 | * file descriptors. The function is not supposed to ever fail, the only |
1035 | * reason it returns an 'int' and not 'void' is so that it can be plugged | ||
1036 | * directly into file_operations structure. | ||
1035 | */ | 1037 | */ |
1036 | int nonseekable_open(struct inode *inode, struct file *filp) | 1038 | int nonseekable_open(struct inode *inode, struct file *filp) |
1037 | { | 1039 | { |
diff --git a/fs/partitions/acorn.c b/fs/partitions/acorn.c index 6921e7890be6..fbeb697374d5 100644 --- a/fs/partitions/acorn.c +++ b/fs/partitions/acorn.c | |||
@@ -45,8 +45,11 @@ adfs_partition(struct parsed_partitions *state, char *name, char *data, | |||
45 | nr_sects = (le32_to_cpu(dr->disc_size_high) << 23) | | 45 | nr_sects = (le32_to_cpu(dr->disc_size_high) << 23) | |
46 | (le32_to_cpu(dr->disc_size) >> 9); | 46 | (le32_to_cpu(dr->disc_size) >> 9); |
47 | 47 | ||
48 | if (name) | 48 | if (name) { |
49 | printk(" [%s]", name); | 49 | strlcat(state->pp_buf, " [", PAGE_SIZE); |
50 | strlcat(state->pp_buf, name, PAGE_SIZE); | ||
51 | strlcat(state->pp_buf, "]", PAGE_SIZE); | ||
52 | } | ||
50 | put_partition(state, slot, first_sector, nr_sects); | 53 | put_partition(state, slot, first_sector, nr_sects); |
51 | return dr; | 54 | return dr; |
52 | } | 55 | } |
@@ -81,14 +84,14 @@ static int riscix_partition(struct parsed_partitions *state, | |||
81 | if (!rr) | 84 | if (!rr) |
82 | return -1; | 85 | return -1; |
83 | 86 | ||
84 | printk(" [RISCiX]"); | 87 | strlcat(state->pp_buf, " [RISCiX]", PAGE_SIZE); |
85 | 88 | ||
86 | 89 | ||
87 | if (rr->magic == RISCIX_MAGIC) { | 90 | if (rr->magic == RISCIX_MAGIC) { |
88 | unsigned long size = nr_sects > 2 ? 2 : nr_sects; | 91 | unsigned long size = nr_sects > 2 ? 2 : nr_sects; |
89 | int part; | 92 | int part; |
90 | 93 | ||
91 | printk(" <"); | 94 | strlcat(state->pp_buf, " <", PAGE_SIZE); |
92 | 95 | ||
93 | put_partition(state, slot++, first_sect, size); | 96 | put_partition(state, slot++, first_sect, size); |
94 | for (part = 0; part < 8; part++) { | 97 | for (part = 0; part < 8; part++) { |
@@ -97,11 +100,13 @@ static int riscix_partition(struct parsed_partitions *state, | |||
97 | put_partition(state, slot++, | 100 | put_partition(state, slot++, |
98 | le32_to_cpu(rr->part[part].start), | 101 | le32_to_cpu(rr->part[part].start), |
99 | le32_to_cpu(rr->part[part].length)); | 102 | le32_to_cpu(rr->part[part].length)); |
100 | printk("(%s)", rr->part[part].name); | 103 | strlcat(state->pp_buf, "(", PAGE_SIZE); |
104 | strlcat(state->pp_buf, rr->part[part].name, PAGE_SIZE); | ||
105 | strlcat(state->pp_buf, ")", PAGE_SIZE); | ||
101 | } | 106 | } |
102 | } | 107 | } |
103 | 108 | ||
104 | printk(" >\n"); | 109 | strlcat(state->pp_buf, " >\n", PAGE_SIZE); |
105 | } else { | 110 | } else { |
106 | put_partition(state, slot++, first_sect, nr_sects); | 111 | put_partition(state, slot++, first_sect, nr_sects); |
107 | } | 112 | } |
@@ -131,7 +136,7 @@ static int linux_partition(struct parsed_partitions *state, | |||
131 | struct linux_part *linuxp; | 136 | struct linux_part *linuxp; |
132 | unsigned long size = nr_sects > 2 ? 2 : nr_sects; | 137 | unsigned long size = nr_sects > 2 ? 2 : nr_sects; |
133 | 138 | ||
134 | printk(" [Linux]"); | 139 | strlcat(state->pp_buf, " [Linux]", PAGE_SIZE); |
135 | 140 | ||
136 | put_partition(state, slot++, first_sect, size); | 141 | put_partition(state, slot++, first_sect, size); |
137 | 142 | ||
@@ -139,7 +144,7 @@ static int linux_partition(struct parsed_partitions *state, | |||
139 | if (!linuxp) | 144 | if (!linuxp) |
140 | return -1; | 145 | return -1; |
141 | 146 | ||
142 | printk(" <"); | 147 | strlcat(state->pp_buf, " <", PAGE_SIZE); |
143 | while (linuxp->magic == cpu_to_le32(LINUX_NATIVE_MAGIC) || | 148 | while (linuxp->magic == cpu_to_le32(LINUX_NATIVE_MAGIC) || |
144 | linuxp->magic == cpu_to_le32(LINUX_SWAP_MAGIC)) { | 149 | linuxp->magic == cpu_to_le32(LINUX_SWAP_MAGIC)) { |
145 | if (slot == state->limit) | 150 | if (slot == state->limit) |
@@ -149,7 +154,7 @@ static int linux_partition(struct parsed_partitions *state, | |||
149 | le32_to_cpu(linuxp->nr_sects)); | 154 | le32_to_cpu(linuxp->nr_sects)); |
150 | linuxp ++; | 155 | linuxp ++; |
151 | } | 156 | } |
152 | printk(" >"); | 157 | strlcat(state->pp_buf, " >", PAGE_SIZE); |
153 | 158 | ||
154 | put_dev_sector(sect); | 159 | put_dev_sector(sect); |
155 | return slot; | 160 | return slot; |
@@ -294,7 +299,7 @@ int adfspart_check_ADFS(struct parsed_partitions *state) | |||
294 | break; | 299 | break; |
295 | } | 300 | } |
296 | } | 301 | } |
297 | printk("\n"); | 302 | strlcat(state->pp_buf, "\n", PAGE_SIZE); |
298 | return 1; | 303 | return 1; |
299 | } | 304 | } |
300 | #endif | 305 | #endif |
@@ -367,7 +372,7 @@ int adfspart_check_ICS(struct parsed_partitions *state) | |||
367 | return 0; | 372 | return 0; |
368 | } | 373 | } |
369 | 374 | ||
370 | printk(" [ICS]"); | 375 | strlcat(state->pp_buf, " [ICS]", PAGE_SIZE); |
371 | 376 | ||
372 | for (slot = 1, p = (const struct ics_part *)data; p->size; p++) { | 377 | for (slot = 1, p = (const struct ics_part *)data; p->size; p++) { |
373 | u32 start = le32_to_cpu(p->start); | 378 | u32 start = le32_to_cpu(p->start); |
@@ -401,7 +406,7 @@ int adfspart_check_ICS(struct parsed_partitions *state) | |||
401 | } | 406 | } |
402 | 407 | ||
403 | put_dev_sector(sect); | 408 | put_dev_sector(sect); |
404 | printk("\n"); | 409 | strlcat(state->pp_buf, "\n", PAGE_SIZE); |
405 | return 1; | 410 | return 1; |
406 | } | 411 | } |
407 | #endif | 412 | #endif |
@@ -461,7 +466,7 @@ int adfspart_check_POWERTEC(struct parsed_partitions *state) | |||
461 | return 0; | 466 | return 0; |
462 | } | 467 | } |
463 | 468 | ||
464 | printk(" [POWERTEC]"); | 469 | strlcat(state->pp_buf, " [POWERTEC]", PAGE_SIZE); |
465 | 470 | ||
466 | for (i = 0, p = (const struct ptec_part *)data; i < 12; i++, p++) { | 471 | for (i = 0, p = (const struct ptec_part *)data; i < 12; i++, p++) { |
467 | u32 start = le32_to_cpu(p->start); | 472 | u32 start = le32_to_cpu(p->start); |
@@ -472,7 +477,7 @@ int adfspart_check_POWERTEC(struct parsed_partitions *state) | |||
472 | } | 477 | } |
473 | 478 | ||
474 | put_dev_sector(sect); | 479 | put_dev_sector(sect); |
475 | printk("\n"); | 480 | strlcat(state->pp_buf, "\n", PAGE_SIZE); |
476 | return 1; | 481 | return 1; |
477 | } | 482 | } |
478 | #endif | 483 | #endif |
@@ -543,7 +548,7 @@ int adfspart_check_EESOX(struct parsed_partitions *state) | |||
543 | 548 | ||
544 | size = get_capacity(state->bdev->bd_disk); | 549 | size = get_capacity(state->bdev->bd_disk); |
545 | put_partition(state, slot++, start, size - start); | 550 | put_partition(state, slot++, start, size - start); |
546 | printk("\n"); | 551 | strlcat(state->pp_buf, "\n", PAGE_SIZE); |
547 | } | 552 | } |
548 | 553 | ||
549 | return i ? 1 : 0; | 554 | return i ? 1 : 0; |
diff --git a/fs/partitions/amiga.c b/fs/partitions/amiga.c index ba443d4229f8..70cbf44a1560 100644 --- a/fs/partitions/amiga.c +++ b/fs/partitions/amiga.c | |||
@@ -69,7 +69,13 @@ int amiga_partition(struct parsed_partitions *state) | |||
69 | /* blksize is blocks per 512 byte standard block */ | 69 | /* blksize is blocks per 512 byte standard block */ |
70 | blksize = be32_to_cpu( rdb->rdb_BlockBytes ) / 512; | 70 | blksize = be32_to_cpu( rdb->rdb_BlockBytes ) / 512; |
71 | 71 | ||
72 | printk(" RDSK (%d)", blksize * 512); /* Be more informative */ | 72 | { |
73 | char tmp[7 + 10 + 1 + 1]; | ||
74 | |||
75 | /* Be more informative */ | ||
76 | snprintf(tmp, sizeof(tmp), " RDSK (%d)", blksize * 512); | ||
77 | strlcat(state->pp_buf, tmp, PAGE_SIZE); | ||
78 | } | ||
73 | blk = be32_to_cpu(rdb->rdb_PartitionList); | 79 | blk = be32_to_cpu(rdb->rdb_PartitionList); |
74 | put_dev_sector(sect); | 80 | put_dev_sector(sect); |
75 | for (part = 1; blk>0 && part<=16; part++, put_dev_sector(sect)) { | 81 | for (part = 1; blk>0 && part<=16; part++, put_dev_sector(sect)) { |
@@ -106,23 +112,27 @@ int amiga_partition(struct parsed_partitions *state) | |||
106 | { | 112 | { |
107 | /* Be even more informative to aid mounting */ | 113 | /* Be even more informative to aid mounting */ |
108 | char dostype[4]; | 114 | char dostype[4]; |
115 | char tmp[42]; | ||
116 | |||
109 | __be32 *dt = (__be32 *)dostype; | 117 | __be32 *dt = (__be32 *)dostype; |
110 | *dt = pb->pb_Environment[16]; | 118 | *dt = pb->pb_Environment[16]; |
111 | if (dostype[3] < ' ') | 119 | if (dostype[3] < ' ') |
112 | printk(" (%c%c%c^%c)", | 120 | snprintf(tmp, sizeof(tmp), " (%c%c%c^%c)", |
113 | dostype[0], dostype[1], | 121 | dostype[0], dostype[1], |
114 | dostype[2], dostype[3] + '@' ); | 122 | dostype[2], dostype[3] + '@' ); |
115 | else | 123 | else |
116 | printk(" (%c%c%c%c)", | 124 | snprintf(tmp, sizeof(tmp), " (%c%c%c%c)", |
117 | dostype[0], dostype[1], | 125 | dostype[0], dostype[1], |
118 | dostype[2], dostype[3]); | 126 | dostype[2], dostype[3]); |
119 | printk("(res %d spb %d)", | 127 | strlcat(state->pp_buf, tmp, PAGE_SIZE); |
128 | snprintf(tmp, sizeof(tmp), "(res %d spb %d)", | ||
120 | be32_to_cpu(pb->pb_Environment[6]), | 129 | be32_to_cpu(pb->pb_Environment[6]), |
121 | be32_to_cpu(pb->pb_Environment[4])); | 130 | be32_to_cpu(pb->pb_Environment[4])); |
131 | strlcat(state->pp_buf, tmp, PAGE_SIZE); | ||
122 | } | 132 | } |
123 | res = 1; | 133 | res = 1; |
124 | } | 134 | } |
125 | printk("\n"); | 135 | strlcat(state->pp_buf, "\n", PAGE_SIZE); |
126 | 136 | ||
127 | rdb_done: | 137 | rdb_done: |
128 | return res; | 138 | return res; |
diff --git a/fs/partitions/atari.c b/fs/partitions/atari.c index 4439ff1b6cec..9875b05e80a2 100644 --- a/fs/partitions/atari.c +++ b/fs/partitions/atari.c | |||
@@ -62,7 +62,7 @@ int atari_partition(struct parsed_partitions *state) | |||
62 | } | 62 | } |
63 | 63 | ||
64 | pi = &rs->part[0]; | 64 | pi = &rs->part[0]; |
65 | printk (" AHDI"); | 65 | strlcat(state->pp_buf, " AHDI", PAGE_SIZE); |
66 | for (slot = 1; pi < &rs->part[4] && slot < state->limit; slot++, pi++) { | 66 | for (slot = 1; pi < &rs->part[4] && slot < state->limit; slot++, pi++) { |
67 | struct rootsector *xrs; | 67 | struct rootsector *xrs; |
68 | Sector sect2; | 68 | Sector sect2; |
@@ -81,7 +81,7 @@ int atari_partition(struct parsed_partitions *state) | |||
81 | #ifdef ICD_PARTS | 81 | #ifdef ICD_PARTS |
82 | part_fmt = 1; | 82 | part_fmt = 1; |
83 | #endif | 83 | #endif |
84 | printk(" XGM<"); | 84 | strlcat(state->pp_buf, " XGM<", PAGE_SIZE); |
85 | partsect = extensect = be32_to_cpu(pi->st); | 85 | partsect = extensect = be32_to_cpu(pi->st); |
86 | while (1) { | 86 | while (1) { |
87 | xrs = read_part_sector(state, partsect, §2); | 87 | xrs = read_part_sector(state, partsect, §2); |
@@ -120,14 +120,14 @@ int atari_partition(struct parsed_partitions *state) | |||
120 | break; | 120 | break; |
121 | } | 121 | } |
122 | } | 122 | } |
123 | printk(" >"); | 123 | strlcat(state->pp_buf, " >", PAGE_SIZE); |
124 | } | 124 | } |
125 | #ifdef ICD_PARTS | 125 | #ifdef ICD_PARTS |
126 | if ( part_fmt!=1 ) { /* no extended partitions -> test ICD-format */ | 126 | if ( part_fmt!=1 ) { /* no extended partitions -> test ICD-format */ |
127 | pi = &rs->icdpart[0]; | 127 | pi = &rs->icdpart[0]; |
128 | /* sanity check: no ICD format if first partition invalid */ | 128 | /* sanity check: no ICD format if first partition invalid */ |
129 | if (OK_id(pi->id)) { | 129 | if (OK_id(pi->id)) { |
130 | printk(" ICD<"); | 130 | strlcat(state->pp_buf, " ICD<", PAGE_SIZE); |
131 | for (; pi < &rs->icdpart[8] && slot < state->limit; slot++, pi++) { | 131 | for (; pi < &rs->icdpart[8] && slot < state->limit; slot++, pi++) { |
132 | /* accept only GEM,BGM,RAW,LNX,SWP partitions */ | 132 | /* accept only GEM,BGM,RAW,LNX,SWP partitions */ |
133 | if (!((pi->flg & 1) && OK_id(pi->id))) | 133 | if (!((pi->flg & 1) && OK_id(pi->id))) |
@@ -137,13 +137,13 @@ int atari_partition(struct parsed_partitions *state) | |||
137 | be32_to_cpu(pi->st), | 137 | be32_to_cpu(pi->st), |
138 | be32_to_cpu(pi->siz)); | 138 | be32_to_cpu(pi->siz)); |
139 | } | 139 | } |
140 | printk(" >"); | 140 | strlcat(state->pp_buf, " >", PAGE_SIZE); |
141 | } | 141 | } |
142 | } | 142 | } |
143 | #endif | 143 | #endif |
144 | put_dev_sector(sect); | 144 | put_dev_sector(sect); |
145 | 145 | ||
146 | printk ("\n"); | 146 | strlcat(state->pp_buf, "\n", PAGE_SIZE); |
147 | 147 | ||
148 | return 1; | 148 | return 1; |
149 | } | 149 | } |
diff --git a/fs/partitions/check.c b/fs/partitions/check.c index 72c52656dc2e..79fbf3f390f0 100644 --- a/fs/partitions/check.c +++ b/fs/partitions/check.c | |||
@@ -164,10 +164,16 @@ check_partition(struct gendisk *hd, struct block_device *bdev) | |||
164 | state = kzalloc(sizeof(struct parsed_partitions), GFP_KERNEL); | 164 | state = kzalloc(sizeof(struct parsed_partitions), GFP_KERNEL); |
165 | if (!state) | 165 | if (!state) |
166 | return NULL; | 166 | return NULL; |
167 | state->pp_buf = (char *)__get_free_page(GFP_KERNEL); | ||
168 | if (!state->pp_buf) { | ||
169 | kfree(state); | ||
170 | return NULL; | ||
171 | } | ||
172 | state->pp_buf[0] = '\0'; | ||
167 | 173 | ||
168 | state->bdev = bdev; | 174 | state->bdev = bdev; |
169 | disk_name(hd, 0, state->name); | 175 | disk_name(hd, 0, state->name); |
170 | printk(KERN_INFO " %s:", state->name); | 176 | snprintf(state->pp_buf, PAGE_SIZE, " %s:", state->name); |
171 | if (isdigit(state->name[strlen(state->name)-1])) | 177 | if (isdigit(state->name[strlen(state->name)-1])) |
172 | sprintf(state->name, "p"); | 178 | sprintf(state->name, "p"); |
173 | 179 | ||
@@ -185,17 +191,25 @@ check_partition(struct gendisk *hd, struct block_device *bdev) | |||
185 | } | 191 | } |
186 | 192 | ||
187 | } | 193 | } |
188 | if (res > 0) | 194 | if (res > 0) { |
195 | printk(KERN_INFO "%s", state->pp_buf); | ||
196 | |||
197 | free_page((unsigned long)state->pp_buf); | ||
189 | return state; | 198 | return state; |
199 | } | ||
190 | if (state->access_beyond_eod) | 200 | if (state->access_beyond_eod) |
191 | err = -ENOSPC; | 201 | err = -ENOSPC; |
192 | if (err) | 202 | if (err) |
193 | /* The partition is unrecognized. So report I/O errors if there were any */ | 203 | /* The partition is unrecognized. So report I/O errors if there were any */ |
194 | res = err; | 204 | res = err; |
195 | if (!res) | 205 | if (!res) |
196 | printk(" unknown partition table\n"); | 206 | strlcat(state->pp_buf, " unknown partition table\n", PAGE_SIZE); |
197 | else if (warn_no_part) | 207 | else if (warn_no_part) |
198 | printk(" unable to read partition table\n"); | 208 | strlcat(state->pp_buf, " unable to read partition table\n", PAGE_SIZE); |
209 | |||
210 | printk(KERN_INFO "%s", state->pp_buf); | ||
211 | |||
212 | free_page((unsigned long)state->pp_buf); | ||
199 | kfree(state); | 213 | kfree(state); |
200 | return ERR_PTR(res); | 214 | return ERR_PTR(res); |
201 | } | 215 | } |
diff --git a/fs/partitions/check.h b/fs/partitions/check.h index 52f8bd399396..8e4e103ba216 100644 --- a/fs/partitions/check.h +++ b/fs/partitions/check.h | |||
@@ -16,6 +16,7 @@ struct parsed_partitions { | |||
16 | int next; | 16 | int next; |
17 | int limit; | 17 | int limit; |
18 | bool access_beyond_eod; | 18 | bool access_beyond_eod; |
19 | char *pp_buf; | ||
19 | }; | 20 | }; |
20 | 21 | ||
21 | static inline void *read_part_sector(struct parsed_partitions *state, | 22 | static inline void *read_part_sector(struct parsed_partitions *state, |
@@ -32,9 +33,12 @@ static inline void | |||
32 | put_partition(struct parsed_partitions *p, int n, sector_t from, sector_t size) | 33 | put_partition(struct parsed_partitions *p, int n, sector_t from, sector_t size) |
33 | { | 34 | { |
34 | if (n < p->limit) { | 35 | if (n < p->limit) { |
36 | char tmp[1 + BDEVNAME_SIZE + 10 + 1]; | ||
37 | |||
35 | p->parts[n].from = from; | 38 | p->parts[n].from = from; |
36 | p->parts[n].size = size; | 39 | p->parts[n].size = size; |
37 | printk(" %s%d", p->name, n); | 40 | snprintf(tmp, sizeof(tmp), " %s%d", p->name, n); |
41 | strlcat(p->pp_buf, tmp, PAGE_SIZE); | ||
38 | } | 42 | } |
39 | } | 43 | } |
40 | 44 | ||
diff --git a/fs/partitions/efi.c b/fs/partitions/efi.c index 9efb2cfe2410..dbb44d4bb8a7 100644 --- a/fs/partitions/efi.c +++ b/fs/partitions/efi.c | |||
@@ -630,6 +630,6 @@ int efi_partition(struct parsed_partitions *state) | |||
630 | } | 630 | } |
631 | kfree(ptes); | 631 | kfree(ptes); |
632 | kfree(gpt); | 632 | kfree(gpt); |
633 | printk("\n"); | 633 | strlcat(state->pp_buf, "\n", PAGE_SIZE); |
634 | return 1; | 634 | return 1; |
635 | } | 635 | } |
diff --git a/fs/partitions/ibm.c b/fs/partitions/ibm.c index fc8497643fd0..d513a07f44bb 100644 --- a/fs/partitions/ibm.c +++ b/fs/partitions/ibm.c | |||
@@ -75,6 +75,7 @@ int ibm_partition(struct parsed_partitions *state) | |||
75 | unsigned char *data; | 75 | unsigned char *data; |
76 | Sector sect; | 76 | Sector sect; |
77 | sector_t labelsect; | 77 | sector_t labelsect; |
78 | char tmp[64]; | ||
78 | 79 | ||
79 | res = 0; | 80 | res = 0; |
80 | blocksize = bdev_logical_block_size(bdev); | 81 | blocksize = bdev_logical_block_size(bdev); |
@@ -144,13 +145,15 @@ int ibm_partition(struct parsed_partitions *state) | |||
144 | */ | 145 | */ |
145 | blocksize = label->cms.block_size; | 146 | blocksize = label->cms.block_size; |
146 | if (label->cms.disk_offset != 0) { | 147 | if (label->cms.disk_offset != 0) { |
147 | printk("CMS1/%8s(MDSK):", name); | 148 | snprintf(tmp, sizeof(tmp), "CMS1/%8s(MDSK):", name); |
149 | strlcat(state->pp_buf, tmp, PAGE_SIZE); | ||
148 | /* disk is reserved minidisk */ | 150 | /* disk is reserved minidisk */ |
149 | offset = label->cms.disk_offset; | 151 | offset = label->cms.disk_offset; |
150 | size = (label->cms.block_count - 1) | 152 | size = (label->cms.block_count - 1) |
151 | * (blocksize >> 9); | 153 | * (blocksize >> 9); |
152 | } else { | 154 | } else { |
153 | printk("CMS1/%8s:", name); | 155 | snprintf(tmp, sizeof(tmp), "CMS1/%8s:", name); |
156 | strlcat(state->pp_buf, tmp, PAGE_SIZE); | ||
154 | offset = (info->label_block + 1); | 157 | offset = (info->label_block + 1); |
155 | size = label->cms.block_count | 158 | size = label->cms.block_count |
156 | * (blocksize >> 9); | 159 | * (blocksize >> 9); |
@@ -159,7 +162,8 @@ int ibm_partition(struct parsed_partitions *state) | |||
159 | size-offset*(blocksize >> 9)); | 162 | size-offset*(blocksize >> 9)); |
160 | } else { | 163 | } else { |
161 | if (strncmp(type, "LNX1", 4) == 0) { | 164 | if (strncmp(type, "LNX1", 4) == 0) { |
162 | printk("LNX1/%8s:", name); | 165 | snprintf(tmp, sizeof(tmp), "LNX1/%8s:", name); |
166 | strlcat(state->pp_buf, tmp, PAGE_SIZE); | ||
163 | if (label->lnx.ldl_version == 0xf2) { | 167 | if (label->lnx.ldl_version == 0xf2) { |
164 | fmt_size = label->lnx.formatted_blocks | 168 | fmt_size = label->lnx.formatted_blocks |
165 | * (blocksize >> 9); | 169 | * (blocksize >> 9); |
@@ -178,7 +182,7 @@ int ibm_partition(struct parsed_partitions *state) | |||
178 | offset = (info->label_block + 1); | 182 | offset = (info->label_block + 1); |
179 | } else { | 183 | } else { |
180 | /* unlabeled disk */ | 184 | /* unlabeled disk */ |
181 | printk("(nonl)"); | 185 | strlcat(state->pp_buf, "(nonl)", PAGE_SIZE); |
182 | size = i_size >> 9; | 186 | size = i_size >> 9; |
183 | offset = (info->label_block + 1); | 187 | offset = (info->label_block + 1); |
184 | } | 188 | } |
@@ -197,7 +201,8 @@ int ibm_partition(struct parsed_partitions *state) | |||
197 | * if not, something is wrong, skipping partition detection | 201 | * if not, something is wrong, skipping partition detection |
198 | */ | 202 | */ |
199 | if (strncmp(type, "VOL1", 4) == 0) { | 203 | if (strncmp(type, "VOL1", 4) == 0) { |
200 | printk("VOL1/%8s:", name); | 204 | snprintf(tmp, sizeof(tmp), "VOL1/%8s:", name); |
205 | strlcat(state->pp_buf, tmp, PAGE_SIZE); | ||
201 | /* | 206 | /* |
202 | * get block number and read then go through format1 | 207 | * get block number and read then go through format1 |
203 | * labels | 208 | * labels |
@@ -253,7 +258,7 @@ int ibm_partition(struct parsed_partitions *state) | |||
253 | 258 | ||
254 | } | 259 | } |
255 | 260 | ||
256 | printk("\n"); | 261 | strlcat(state->pp_buf, "\n", PAGE_SIZE); |
257 | goto out_freeall; | 262 | goto out_freeall; |
258 | 263 | ||
259 | 264 | ||
diff --git a/fs/partitions/karma.c b/fs/partitions/karma.c index 1cc928bb762f..0ea19312706b 100644 --- a/fs/partitions/karma.c +++ b/fs/partitions/karma.c | |||
@@ -50,7 +50,7 @@ int karma_partition(struct parsed_partitions *state) | |||
50 | } | 50 | } |
51 | slot++; | 51 | slot++; |
52 | } | 52 | } |
53 | printk("\n"); | 53 | strlcat(state->pp_buf, "\n", PAGE_SIZE); |
54 | put_dev_sector(sect); | 54 | put_dev_sector(sect); |
55 | return 1; | 55 | return 1; |
56 | } | 56 | } |
diff --git a/fs/partitions/ldm.c b/fs/partitions/ldm.c index 648c9d8f3357..5bf8a04b5d9b 100644 --- a/fs/partitions/ldm.c +++ b/fs/partitions/ldm.c | |||
@@ -643,7 +643,7 @@ static bool ldm_create_data_partitions (struct parsed_partitions *pp, | |||
643 | return false; | 643 | return false; |
644 | } | 644 | } |
645 | 645 | ||
646 | printk (" [LDM]"); | 646 | strlcat(pp->pp_buf, " [LDM]", PAGE_SIZE); |
647 | 647 | ||
648 | /* Create the data partitions */ | 648 | /* Create the data partitions */ |
649 | list_for_each (item, &ldb->v_part) { | 649 | list_for_each (item, &ldb->v_part) { |
@@ -658,7 +658,7 @@ static bool ldm_create_data_partitions (struct parsed_partitions *pp, | |||
658 | part_num++; | 658 | part_num++; |
659 | } | 659 | } |
660 | 660 | ||
661 | printk ("\n"); | 661 | strlcat(pp->pp_buf, "\n", PAGE_SIZE); |
662 | return true; | 662 | return true; |
663 | } | 663 | } |
664 | 664 | ||
diff --git a/fs/partitions/mac.c b/fs/partitions/mac.c index 74465ff7c263..68d6a216ee79 100644 --- a/fs/partitions/mac.c +++ b/fs/partitions/mac.c | |||
@@ -59,7 +59,7 @@ int mac_partition(struct parsed_partitions *state) | |||
59 | put_dev_sector(sect); | 59 | put_dev_sector(sect); |
60 | return 0; /* not a MacOS disk */ | 60 | return 0; /* not a MacOS disk */ |
61 | } | 61 | } |
62 | printk(" [mac]"); | 62 | strlcat(state->pp_buf, " [mac]", PAGE_SIZE); |
63 | blocks_in_map = be32_to_cpu(part->map_count); | 63 | blocks_in_map = be32_to_cpu(part->map_count); |
64 | for (blk = 1; blk <= blocks_in_map; ++blk) { | 64 | for (blk = 1; blk <= blocks_in_map; ++blk) { |
65 | int pos = blk * secsize; | 65 | int pos = blk * secsize; |
@@ -128,6 +128,6 @@ int mac_partition(struct parsed_partitions *state) | |||
128 | #endif | 128 | #endif |
129 | 129 | ||
130 | put_dev_sector(sect); | 130 | put_dev_sector(sect); |
131 | printk("\n"); | 131 | strlcat(state->pp_buf, "\n", PAGE_SIZE); |
132 | return 1; | 132 | return 1; |
133 | } | 133 | } |
diff --git a/fs/partitions/msdos.c b/fs/partitions/msdos.c index 15bfb7b1e044..5f79a6677c69 100644 --- a/fs/partitions/msdos.c +++ b/fs/partitions/msdos.c | |||
@@ -213,10 +213,18 @@ static void parse_solaris_x86(struct parsed_partitions *state, | |||
213 | put_dev_sector(sect); | 213 | put_dev_sector(sect); |
214 | return; | 214 | return; |
215 | } | 215 | } |
216 | printk(" %s%d: <solaris:", state->name, origin); | 216 | { |
217 | char tmp[1 + BDEVNAME_SIZE + 10 + 11 + 1]; | ||
218 | |||
219 | snprintf(tmp, sizeof(tmp), " %s%d: <solaris:", state->name, origin); | ||
220 | strlcat(state->pp_buf, tmp, PAGE_SIZE); | ||
221 | } | ||
217 | if (le32_to_cpu(v->v_version) != 1) { | 222 | if (le32_to_cpu(v->v_version) != 1) { |
218 | printk(" cannot handle version %d vtoc>\n", | 223 | char tmp[64]; |
219 | le32_to_cpu(v->v_version)); | 224 | |
225 | snprintf(tmp, sizeof(tmp), " cannot handle version %d vtoc>\n", | ||
226 | le32_to_cpu(v->v_version)); | ||
227 | strlcat(state->pp_buf, tmp, PAGE_SIZE); | ||
220 | put_dev_sector(sect); | 228 | put_dev_sector(sect); |
221 | return; | 229 | return; |
222 | } | 230 | } |
@@ -224,9 +232,12 @@ static void parse_solaris_x86(struct parsed_partitions *state, | |||
224 | max_nparts = le16_to_cpu (v->v_nparts) > 8 ? SOLARIS_X86_NUMSLICE : 8; | 232 | max_nparts = le16_to_cpu (v->v_nparts) > 8 ? SOLARIS_X86_NUMSLICE : 8; |
225 | for (i=0; i<max_nparts && state->next<state->limit; i++) { | 233 | for (i=0; i<max_nparts && state->next<state->limit; i++) { |
226 | struct solaris_x86_slice *s = &v->v_slice[i]; | 234 | struct solaris_x86_slice *s = &v->v_slice[i]; |
235 | char tmp[3 + 10 + 1 + 1]; | ||
236 | |||
227 | if (s->s_size == 0) | 237 | if (s->s_size == 0) |
228 | continue; | 238 | continue; |
229 | printk(" [s%d]", i); | 239 | snprintf(tmp, sizeof(tmp), " [s%d]", i); |
240 | strlcat(state->pp_buf, tmp, PAGE_SIZE); | ||
230 | /* solaris partitions are relative to current MS-DOS | 241 | /* solaris partitions are relative to current MS-DOS |
231 | * one; must add the offset of the current partition */ | 242 | * one; must add the offset of the current partition */ |
232 | put_partition(state, state->next++, | 243 | put_partition(state, state->next++, |
@@ -234,7 +245,7 @@ static void parse_solaris_x86(struct parsed_partitions *state, | |||
234 | le32_to_cpu(s->s_size)); | 245 | le32_to_cpu(s->s_size)); |
235 | } | 246 | } |
236 | put_dev_sector(sect); | 247 | put_dev_sector(sect); |
237 | printk(" >\n"); | 248 | strlcat(state->pp_buf, " >\n", PAGE_SIZE); |
238 | #endif | 249 | #endif |
239 | } | 250 | } |
240 | 251 | ||
@@ -250,6 +261,7 @@ static void parse_bsd(struct parsed_partitions *state, | |||
250 | Sector sect; | 261 | Sector sect; |
251 | struct bsd_disklabel *l; | 262 | struct bsd_disklabel *l; |
252 | struct bsd_partition *p; | 263 | struct bsd_partition *p; |
264 | char tmp[64]; | ||
253 | 265 | ||
254 | l = read_part_sector(state, offset + 1, §); | 266 | l = read_part_sector(state, offset + 1, §); |
255 | if (!l) | 267 | if (!l) |
@@ -258,7 +270,9 @@ static void parse_bsd(struct parsed_partitions *state, | |||
258 | put_dev_sector(sect); | 270 | put_dev_sector(sect); |
259 | return; | 271 | return; |
260 | } | 272 | } |
261 | printk(" %s%d: <%s:", state->name, origin, flavour); | 273 | |
274 | snprintf(tmp, sizeof(tmp), " %s%d: <%s:", state->name, origin, flavour); | ||
275 | strlcat(state->pp_buf, tmp, PAGE_SIZE); | ||
262 | 276 | ||
263 | if (le16_to_cpu(l->d_npartitions) < max_partitions) | 277 | if (le16_to_cpu(l->d_npartitions) < max_partitions) |
264 | max_partitions = le16_to_cpu(l->d_npartitions); | 278 | max_partitions = le16_to_cpu(l->d_npartitions); |
@@ -275,16 +289,18 @@ static void parse_bsd(struct parsed_partitions *state, | |||
275 | /* full parent partition, we have it already */ | 289 | /* full parent partition, we have it already */ |
276 | continue; | 290 | continue; |
277 | if (offset > bsd_start || offset+size < bsd_start+bsd_size) { | 291 | if (offset > bsd_start || offset+size < bsd_start+bsd_size) { |
278 | printk("bad subpartition - ignored\n"); | 292 | strlcat(state->pp_buf, "bad subpartition - ignored\n", PAGE_SIZE); |
279 | continue; | 293 | continue; |
280 | } | 294 | } |
281 | put_partition(state, state->next++, bsd_start, bsd_size); | 295 | put_partition(state, state->next++, bsd_start, bsd_size); |
282 | } | 296 | } |
283 | put_dev_sector(sect); | 297 | put_dev_sector(sect); |
284 | if (le16_to_cpu(l->d_npartitions) > max_partitions) | 298 | if (le16_to_cpu(l->d_npartitions) > max_partitions) { |
285 | printk(" (ignored %d more)", | 299 | snprintf(tmp, sizeof(tmp), " (ignored %d more)", |
286 | le16_to_cpu(l->d_npartitions) - max_partitions); | 300 | le16_to_cpu(l->d_npartitions) - max_partitions); |
287 | printk(" >\n"); | 301 | strlcat(state->pp_buf, tmp, PAGE_SIZE); |
302 | } | ||
303 | strlcat(state->pp_buf, " >\n", PAGE_SIZE); | ||
288 | } | 304 | } |
289 | #endif | 305 | #endif |
290 | 306 | ||
@@ -333,7 +349,12 @@ static void parse_unixware(struct parsed_partitions *state, | |||
333 | put_dev_sector(sect); | 349 | put_dev_sector(sect); |
334 | return; | 350 | return; |
335 | } | 351 | } |
336 | printk(" %s%d: <unixware:", state->name, origin); | 352 | { |
353 | char tmp[1 + BDEVNAME_SIZE + 10 + 12 + 1]; | ||
354 | |||
355 | snprintf(tmp, sizeof(tmp), " %s%d: <unixware:", state->name, origin); | ||
356 | strlcat(state->pp_buf, tmp, PAGE_SIZE); | ||
357 | } | ||
337 | p = &l->vtoc.v_slice[1]; | 358 | p = &l->vtoc.v_slice[1]; |
338 | /* I omit the 0th slice as it is the same as whole disk. */ | 359 | /* I omit the 0th slice as it is the same as whole disk. */ |
339 | while (p - &l->vtoc.v_slice[0] < UNIXWARE_NUMSLICE) { | 360 | while (p - &l->vtoc.v_slice[0] < UNIXWARE_NUMSLICE) { |
@@ -347,7 +368,7 @@ static void parse_unixware(struct parsed_partitions *state, | |||
347 | p++; | 368 | p++; |
348 | } | 369 | } |
349 | put_dev_sector(sect); | 370 | put_dev_sector(sect); |
350 | printk(" >\n"); | 371 | strlcat(state->pp_buf, " >\n", PAGE_SIZE); |
351 | #endif | 372 | #endif |
352 | } | 373 | } |
353 | 374 | ||
@@ -376,8 +397,10 @@ static void parse_minix(struct parsed_partitions *state, | |||
376 | * the normal boot sector. */ | 397 | * the normal boot sector. */ |
377 | if (msdos_magic_present (data + 510) && | 398 | if (msdos_magic_present (data + 510) && |
378 | SYS_IND(p) == MINIX_PARTITION) { /* subpartition table present */ | 399 | SYS_IND(p) == MINIX_PARTITION) { /* subpartition table present */ |
400 | char tmp[1 + BDEVNAME_SIZE + 10 + 9 + 1]; | ||
379 | 401 | ||
380 | printk(" %s%d: <minix:", state->name, origin); | 402 | snprintf(tmp, sizeof(tmp), " %s%d: <minix:", state->name, origin); |
403 | strlcat(state->pp_buf, tmp, PAGE_SIZE); | ||
381 | for (i = 0; i < MINIX_NR_SUBPARTITIONS; i++, p++) { | 404 | for (i = 0; i < MINIX_NR_SUBPARTITIONS; i++, p++) { |
382 | if (state->next == state->limit) | 405 | if (state->next == state->limit) |
383 | break; | 406 | break; |
@@ -386,7 +409,7 @@ static void parse_minix(struct parsed_partitions *state, | |||
386 | put_partition(state, state->next++, | 409 | put_partition(state, state->next++, |
387 | start_sect(p), nr_sects(p)); | 410 | start_sect(p), nr_sects(p)); |
388 | } | 411 | } |
389 | printk(" >\n"); | 412 | strlcat(state->pp_buf, " >\n", PAGE_SIZE); |
390 | } | 413 | } |
391 | put_dev_sector(sect); | 414 | put_dev_sector(sect); |
392 | #endif /* CONFIG_MINIX_SUBPARTITION */ | 415 | #endif /* CONFIG_MINIX_SUBPARTITION */ |
@@ -425,7 +448,7 @@ int msdos_partition(struct parsed_partitions *state) | |||
425 | 448 | ||
426 | if (aix_magic_present(state, data)) { | 449 | if (aix_magic_present(state, data)) { |
427 | put_dev_sector(sect); | 450 | put_dev_sector(sect); |
428 | printk( " [AIX]"); | 451 | strlcat(state->pp_buf, " [AIX]", PAGE_SIZE); |
429 | return 0; | 452 | return 0; |
430 | } | 453 | } |
431 | 454 | ||
@@ -446,7 +469,7 @@ int msdos_partition(struct parsed_partitions *state) | |||
446 | fb = (struct fat_boot_sector *) data; | 469 | fb = (struct fat_boot_sector *) data; |
447 | if (slot == 1 && fb->reserved && fb->fats | 470 | if (slot == 1 && fb->reserved && fb->fats |
448 | && fat_valid_media(fb->media)) { | 471 | && fat_valid_media(fb->media)) { |
449 | printk("\n"); | 472 | strlcat(state->pp_buf, "\n", PAGE_SIZE); |
450 | put_dev_sector(sect); | 473 | put_dev_sector(sect); |
451 | return 1; | 474 | return 1; |
452 | } else { | 475 | } else { |
@@ -491,21 +514,21 @@ int msdos_partition(struct parsed_partitions *state) | |||
491 | n = min(size, max(sector_size, n)); | 514 | n = min(size, max(sector_size, n)); |
492 | put_partition(state, slot, start, n); | 515 | put_partition(state, slot, start, n); |
493 | 516 | ||
494 | printk(" <"); | 517 | strlcat(state->pp_buf, " <", PAGE_SIZE); |
495 | parse_extended(state, start, size); | 518 | parse_extended(state, start, size); |
496 | printk(" >"); | 519 | strlcat(state->pp_buf, " >", PAGE_SIZE); |
497 | continue; | 520 | continue; |
498 | } | 521 | } |
499 | put_partition(state, slot, start, size); | 522 | put_partition(state, slot, start, size); |
500 | if (SYS_IND(p) == LINUX_RAID_PARTITION) | 523 | if (SYS_IND(p) == LINUX_RAID_PARTITION) |
501 | state->parts[slot].flags = ADDPART_FLAG_RAID; | 524 | state->parts[slot].flags = ADDPART_FLAG_RAID; |
502 | if (SYS_IND(p) == DM6_PARTITION) | 525 | if (SYS_IND(p) == DM6_PARTITION) |
503 | printk("[DM]"); | 526 | strlcat(state->pp_buf, "[DM]", PAGE_SIZE); |
504 | if (SYS_IND(p) == EZD_PARTITION) | 527 | if (SYS_IND(p) == EZD_PARTITION) |
505 | printk("[EZD]"); | 528 | strlcat(state->pp_buf, "[EZD]", PAGE_SIZE); |
506 | } | 529 | } |
507 | 530 | ||
508 | printk("\n"); | 531 | strlcat(state->pp_buf, "\n", PAGE_SIZE); |
509 | 532 | ||
510 | /* second pass - output for each on a separate line */ | 533 | /* second pass - output for each on a separate line */ |
511 | p = (struct partition *) (0x1be + data); | 534 | p = (struct partition *) (0x1be + data); |
diff --git a/fs/partitions/osf.c b/fs/partitions/osf.c index fc22b85d436a..48cec7cbca17 100644 --- a/fs/partitions/osf.c +++ b/fs/partitions/osf.c | |||
@@ -72,7 +72,7 @@ int osf_partition(struct parsed_partitions *state) | |||
72 | le32_to_cpu(partition->p_size)); | 72 | le32_to_cpu(partition->p_size)); |
73 | slot++; | 73 | slot++; |
74 | } | 74 | } |
75 | printk("\n"); | 75 | strlcat(state->pp_buf, "\n", PAGE_SIZE); |
76 | put_dev_sector(sect); | 76 | put_dev_sector(sect); |
77 | return 1; | 77 | return 1; |
78 | } | 78 | } |
diff --git a/fs/partitions/sgi.c b/fs/partitions/sgi.c index 43b1df9aa16c..ea8a86dceaf4 100644 --- a/fs/partitions/sgi.c +++ b/fs/partitions/sgi.c | |||
@@ -76,7 +76,7 @@ int sgi_partition(struct parsed_partitions *state) | |||
76 | } | 76 | } |
77 | slot++; | 77 | slot++; |
78 | } | 78 | } |
79 | printk("\n"); | 79 | strlcat(state->pp_buf, "\n", PAGE_SIZE); |
80 | put_dev_sector(sect); | 80 | put_dev_sector(sect); |
81 | return 1; | 81 | return 1; |
82 | } | 82 | } |
diff --git a/fs/partitions/sun.c b/fs/partitions/sun.c index a32660e25f7f..b5b6fcfb3d36 100644 --- a/fs/partitions/sun.c +++ b/fs/partitions/sun.c | |||
@@ -116,7 +116,7 @@ int sun_partition(struct parsed_partitions *state) | |||
116 | } | 116 | } |
117 | slot++; | 117 | slot++; |
118 | } | 118 | } |
119 | printk("\n"); | 119 | strlcat(state->pp_buf, "\n", PAGE_SIZE); |
120 | put_dev_sector(sect); | 120 | put_dev_sector(sect); |
121 | return 1; | 121 | return 1; |
122 | } | 122 | } |
diff --git a/fs/partitions/sysv68.c b/fs/partitions/sysv68.c index 9030c864428e..9627ccffc1c4 100644 --- a/fs/partitions/sysv68.c +++ b/fs/partitions/sysv68.c | |||
@@ -54,6 +54,7 @@ int sysv68_partition(struct parsed_partitions *state) | |||
54 | unsigned char *data; | 54 | unsigned char *data; |
55 | struct dkblk0 *b; | 55 | struct dkblk0 *b; |
56 | struct slice *slice; | 56 | struct slice *slice; |
57 | char tmp[64]; | ||
57 | 58 | ||
58 | data = read_part_sector(state, 0, §); | 59 | data = read_part_sector(state, 0, §); |
59 | if (!data) | 60 | if (!data) |
@@ -73,7 +74,8 @@ int sysv68_partition(struct parsed_partitions *state) | |||
73 | return -1; | 74 | return -1; |
74 | 75 | ||
75 | slices -= 1; /* last slice is the whole disk */ | 76 | slices -= 1; /* last slice is the whole disk */ |
76 | printk("sysV68: %s(s%u)", state->name, slices); | 77 | snprintf(tmp, sizeof(tmp), "sysV68: %s(s%u)", state->name, slices); |
78 | strlcat(state->pp_buf, tmp, PAGE_SIZE); | ||
77 | slice = (struct slice *)data; | 79 | slice = (struct slice *)data; |
78 | for (i = 0; i < slices; i++, slice++) { | 80 | for (i = 0; i < slices; i++, slice++) { |
79 | if (slot == state->limit) | 81 | if (slot == state->limit) |
@@ -82,11 +84,12 @@ int sysv68_partition(struct parsed_partitions *state) | |||
82 | put_partition(state, slot, | 84 | put_partition(state, slot, |
83 | be32_to_cpu(slice->blkoff), | 85 | be32_to_cpu(slice->blkoff), |
84 | be32_to_cpu(slice->nblocks)); | 86 | be32_to_cpu(slice->nblocks)); |
85 | printk("(s%u)", i); | 87 | snprintf(tmp, sizeof(tmp), "(s%u)", i); |
88 | strlcat(state->pp_buf, tmp, PAGE_SIZE); | ||
86 | } | 89 | } |
87 | slot++; | 90 | slot++; |
88 | } | 91 | } |
89 | printk("\n"); | 92 | strlcat(state->pp_buf, "\n", PAGE_SIZE); |
90 | put_dev_sector(sect); | 93 | put_dev_sector(sect); |
91 | return 1; | 94 | return 1; |
92 | } | 95 | } |
diff --git a/fs/partitions/ultrix.c b/fs/partitions/ultrix.c index db9eef260364..8dbaf9f77a99 100644 --- a/fs/partitions/ultrix.c +++ b/fs/partitions/ultrix.c | |||
@@ -39,7 +39,7 @@ int ultrix_partition(struct parsed_partitions *state) | |||
39 | label->pt_part[i].pi_blkoff, | 39 | label->pt_part[i].pi_blkoff, |
40 | label->pt_part[i].pi_nblocks); | 40 | label->pt_part[i].pi_nblocks); |
41 | put_dev_sector(sect); | 41 | put_dev_sector(sect); |
42 | printk ("\n"); | 42 | strlcat(state->pp_buf, "\n", PAGE_SIZE); |
43 | return 1; | 43 | return 1; |
44 | } else { | 44 | } else { |
45 | put_dev_sector(sect); | 45 | put_dev_sector(sect); |
diff --git a/fs/proc/Makefile b/fs/proc/Makefile index 11a7b5c68153..2758e2afc518 100644 --- a/fs/proc/Makefile +++ b/fs/proc/Makefile | |||
@@ -2,7 +2,7 @@ | |||
2 | # Makefile for the Linux proc filesystem routines. | 2 | # Makefile for the Linux proc filesystem routines. |
3 | # | 3 | # |
4 | 4 | ||
5 | obj-$(CONFIG_PROC_FS) += proc.o | 5 | obj-y += proc.o |
6 | 6 | ||
7 | proc-y := nommu.o task_nommu.o | 7 | proc-y := nommu.o task_nommu.o |
8 | proc-$(CONFIG_MMU) := mmu.o task_mmu.o | 8 | proc-$(CONFIG_MMU) := mmu.o task_mmu.o |
diff --git a/fs/proc/base.c b/fs/proc/base.c index c806dfb24e08..a1c43e7c8a7b 100644 --- a/fs/proc/base.c +++ b/fs/proc/base.c | |||
@@ -149,18 +149,13 @@ static unsigned int pid_entry_count_dirs(const struct pid_entry *entries, | |||
149 | return count; | 149 | return count; |
150 | } | 150 | } |
151 | 151 | ||
152 | static int get_fs_path(struct task_struct *task, struct path *path, bool root) | 152 | static int get_task_root(struct task_struct *task, struct path *root) |
153 | { | 153 | { |
154 | struct fs_struct *fs; | ||
155 | int result = -ENOENT; | 154 | int result = -ENOENT; |
156 | 155 | ||
157 | task_lock(task); | 156 | task_lock(task); |
158 | fs = task->fs; | 157 | if (task->fs) { |
159 | if (fs) { | 158 | get_fs_root(task->fs, root); |
160 | read_lock(&fs->lock); | ||
161 | *path = root ? fs->root : fs->pwd; | ||
162 | path_get(path); | ||
163 | read_unlock(&fs->lock); | ||
164 | result = 0; | 159 | result = 0; |
165 | } | 160 | } |
166 | task_unlock(task); | 161 | task_unlock(task); |
@@ -173,7 +168,12 @@ static int proc_cwd_link(struct inode *inode, struct path *path) | |||
173 | int result = -ENOENT; | 168 | int result = -ENOENT; |
174 | 169 | ||
175 | if (task) { | 170 | if (task) { |
176 | result = get_fs_path(task, path, 0); | 171 | task_lock(task); |
172 | if (task->fs) { | ||
173 | get_fs_pwd(task->fs, path); | ||
174 | result = 0; | ||
175 | } | ||
176 | task_unlock(task); | ||
177 | put_task_struct(task); | 177 | put_task_struct(task); |
178 | } | 178 | } |
179 | return result; | 179 | return result; |
@@ -185,7 +185,7 @@ static int proc_root_link(struct inode *inode, struct path *path) | |||
185 | int result = -ENOENT; | 185 | int result = -ENOENT; |
186 | 186 | ||
187 | if (task) { | 187 | if (task) { |
188 | result = get_fs_path(task, path, 1); | 188 | result = get_task_root(task, path); |
189 | put_task_struct(task); | 189 | put_task_struct(task); |
190 | } | 190 | } |
191 | return result; | 191 | return result; |
@@ -597,7 +597,7 @@ static int mounts_open_common(struct inode *inode, struct file *file, | |||
597 | get_mnt_ns(ns); | 597 | get_mnt_ns(ns); |
598 | } | 598 | } |
599 | rcu_read_unlock(); | 599 | rcu_read_unlock(); |
600 | if (ns && get_fs_path(task, &root, 1) == 0) | 600 | if (ns && get_task_root(task, &root) == 0) |
601 | ret = 0; | 601 | ret = 0; |
602 | put_task_struct(task); | 602 | put_task_struct(task); |
603 | } | 603 | } |
@@ -1526,7 +1526,7 @@ static int do_proc_readlink(struct path *path, char __user *buffer, int buflen) | |||
1526 | if (!tmp) | 1526 | if (!tmp) |
1527 | return -ENOMEM; | 1527 | return -ENOMEM; |
1528 | 1528 | ||
1529 | pathname = d_path(path, tmp, PAGE_SIZE); | 1529 | pathname = d_path_with_unreachable(path, tmp, PAGE_SIZE); |
1530 | len = PTR_ERR(pathname); | 1530 | len = PTR_ERR(pathname); |
1531 | if (IS_ERR(pathname)) | 1531 | if (IS_ERR(pathname)) |
1532 | goto out; | 1532 | goto out; |
diff --git a/fs/reiserfs/journal.c b/fs/reiserfs/journal.c index 19fbc810e8e7..1ec952b1f036 100644 --- a/fs/reiserfs/journal.c +++ b/fs/reiserfs/journal.c | |||
@@ -983,7 +983,6 @@ static int flush_older_commits(struct super_block *s, | |||
983 | 983 | ||
984 | static int reiserfs_async_progress_wait(struct super_block *s) | 984 | static int reiserfs_async_progress_wait(struct super_block *s) |
985 | { | 985 | { |
986 | DEFINE_WAIT(wait); | ||
987 | struct reiserfs_journal *j = SB_JOURNAL(s); | 986 | struct reiserfs_journal *j = SB_JOURNAL(s); |
988 | 987 | ||
989 | if (atomic_read(&j->j_async_throttle)) { | 988 | if (atomic_read(&j->j_async_throttle)) { |
diff --git a/fs/signalfd.c b/fs/signalfd.c index f329849ce3c0..1c5a6add779d 100644 --- a/fs/signalfd.c +++ b/fs/signalfd.c | |||
@@ -88,6 +88,7 @@ static int signalfd_copyinfo(struct signalfd_siginfo __user *uinfo, | |||
88 | err |= __put_user(kinfo->si_tid, &uinfo->ssi_tid); | 88 | err |= __put_user(kinfo->si_tid, &uinfo->ssi_tid); |
89 | err |= __put_user(kinfo->si_overrun, &uinfo->ssi_overrun); | 89 | err |= __put_user(kinfo->si_overrun, &uinfo->ssi_overrun); |
90 | err |= __put_user((long) kinfo->si_ptr, &uinfo->ssi_ptr); | 90 | err |= __put_user((long) kinfo->si_ptr, &uinfo->ssi_ptr); |
91 | err |= __put_user(kinfo->si_int, &uinfo->ssi_int); | ||
91 | break; | 92 | break; |
92 | case __SI_POLL: | 93 | case __SI_POLL: |
93 | err |= __put_user(kinfo->si_band, &uinfo->ssi_band); | 94 | err |= __put_user(kinfo->si_band, &uinfo->ssi_band); |
@@ -111,6 +112,7 @@ static int signalfd_copyinfo(struct signalfd_siginfo __user *uinfo, | |||
111 | err |= __put_user(kinfo->si_pid, &uinfo->ssi_pid); | 112 | err |= __put_user(kinfo->si_pid, &uinfo->ssi_pid); |
112 | err |= __put_user(kinfo->si_uid, &uinfo->ssi_uid); | 113 | err |= __put_user(kinfo->si_uid, &uinfo->ssi_uid); |
113 | err |= __put_user((long) kinfo->si_ptr, &uinfo->ssi_ptr); | 114 | err |= __put_user((long) kinfo->si_ptr, &uinfo->ssi_ptr); |
115 | err |= __put_user(kinfo->si_int, &uinfo->ssi_int); | ||
114 | break; | 116 | break; |
115 | default: | 117 | default: |
116 | /* | 118 | /* |
diff --git a/fs/squashfs/Kconfig b/fs/squashfs/Kconfig index cc6ce8a84c21..e5f63da64d04 100644 --- a/fs/squashfs/Kconfig +++ b/fs/squashfs/Kconfig | |||
@@ -5,13 +5,13 @@ config SQUASHFS | |||
5 | help | 5 | help |
6 | Saying Y here includes support for SquashFS 4.0 (a Compressed | 6 | Saying Y here includes support for SquashFS 4.0 (a Compressed |
7 | Read-Only File System). Squashfs is a highly compressed read-only | 7 | Read-Only File System). Squashfs is a highly compressed read-only |
8 | filesystem for Linux. It uses zlib compression to compress both | 8 | filesystem for Linux. It uses zlib/lzo compression to compress both |
9 | files, inodes and directories. Inodes in the system are very small | 9 | files, inodes and directories. Inodes in the system are very small |
10 | and all blocks are packed to minimise data overhead. Block sizes | 10 | and all blocks are packed to minimise data overhead. Block sizes |
11 | greater than 4K are supported up to a maximum of 1 Mbytes (default | 11 | greater than 4K are supported up to a maximum of 1 Mbytes (default |
12 | block size 128K). SquashFS 4.0 supports 64 bit filesystems and files | 12 | block size 128K). SquashFS 4.0 supports 64 bit filesystems and files |
13 | (larger than 4GB), full uid/gid information, hard links and | 13 | (larger than 4GB), full uid/gid information, hard links and |
14 | timestamps. | 14 | timestamps. |
15 | 15 | ||
16 | Squashfs is intended for general read-only filesystem use, for | 16 | Squashfs is intended for general read-only filesystem use, for |
17 | archival use (i.e. in cases where a .tar.gz file may be used), and in | 17 | archival use (i.e. in cases where a .tar.gz file may be used), and in |
@@ -26,7 +26,7 @@ config SQUASHFS | |||
26 | 26 | ||
27 | If unsure, say N. | 27 | If unsure, say N. |
28 | 28 | ||
29 | config SQUASHFS_XATTRS | 29 | config SQUASHFS_XATTR |
30 | bool "Squashfs XATTR support" | 30 | bool "Squashfs XATTR support" |
31 | depends on SQUASHFS | 31 | depends on SQUASHFS |
32 | default n | 32 | default n |
@@ -37,9 +37,24 @@ config SQUASHFS_XATTRS | |||
37 | 37 | ||
38 | If unsure, say N. | 38 | If unsure, say N. |
39 | 39 | ||
40 | config SQUASHFS_EMBEDDED | 40 | config SQUASHFS_LZO |
41 | bool "Include support for LZO compressed file systems" | ||
42 | depends on SQUASHFS | ||
43 | default n | ||
44 | select LZO_DECOMPRESS | ||
45 | help | ||
46 | Saying Y here includes support for reading Squashfs file systems | ||
47 | compressed with LZO compresssion. LZO compression is mainly | ||
48 | aimed at embedded systems with slower CPUs where the overheads | ||
49 | of zlib are too high. | ||
41 | 50 | ||
42 | bool "Additional option for memory-constrained systems" | 51 | LZO is not the standard compression used in Squashfs and so most |
52 | file systems will be readable without selecting this option. | ||
53 | |||
54 | If unsure, say N. | ||
55 | |||
56 | config SQUASHFS_EMBEDDED | ||
57 | bool "Additional option for memory-constrained systems" | ||
43 | depends on SQUASHFS | 58 | depends on SQUASHFS |
44 | default n | 59 | default n |
45 | help | 60 | help |
diff --git a/fs/squashfs/Makefile b/fs/squashfs/Makefile index 2cee3e9fa452..7672bac8d328 100644 --- a/fs/squashfs/Makefile +++ b/fs/squashfs/Makefile | |||
@@ -5,5 +5,5 @@ | |||
5 | obj-$(CONFIG_SQUASHFS) += squashfs.o | 5 | obj-$(CONFIG_SQUASHFS) += squashfs.o |
6 | squashfs-y += block.o cache.o dir.o export.o file.o fragment.o id.o inode.o | 6 | squashfs-y += block.o cache.o dir.o export.o file.o fragment.o id.o inode.o |
7 | squashfs-y += namei.o super.o symlink.o zlib_wrapper.o decompressor.o | 7 | squashfs-y += namei.o super.o symlink.o zlib_wrapper.o decompressor.o |
8 | squashfs-$(CONFIG_SQUASHFS_XATTRS) += xattr.o xattr_id.o | 8 | squashfs-$(CONFIG_SQUASHFS_XATTR) += xattr.o xattr_id.o |
9 | 9 | squashfs-$(CONFIG_SQUASHFS_LZO) += lzo_wrapper.o | |
diff --git a/fs/squashfs/decompressor.c b/fs/squashfs/decompressor.c index 157478da6ac9..24af9ce9722f 100644 --- a/fs/squashfs/decompressor.c +++ b/fs/squashfs/decompressor.c | |||
@@ -40,9 +40,11 @@ static const struct squashfs_decompressor squashfs_lzma_unsupported_comp_ops = { | |||
40 | NULL, NULL, NULL, LZMA_COMPRESSION, "lzma", 0 | 40 | NULL, NULL, NULL, LZMA_COMPRESSION, "lzma", 0 |
41 | }; | 41 | }; |
42 | 42 | ||
43 | #ifndef CONFIG_SQUASHFS_LZO | ||
43 | static const struct squashfs_decompressor squashfs_lzo_unsupported_comp_ops = { | 44 | static const struct squashfs_decompressor squashfs_lzo_unsupported_comp_ops = { |
44 | NULL, NULL, NULL, LZO_COMPRESSION, "lzo", 0 | 45 | NULL, NULL, NULL, LZO_COMPRESSION, "lzo", 0 |
45 | }; | 46 | }; |
47 | #endif | ||
46 | 48 | ||
47 | static const struct squashfs_decompressor squashfs_unknown_comp_ops = { | 49 | static const struct squashfs_decompressor squashfs_unknown_comp_ops = { |
48 | NULL, NULL, NULL, 0, "unknown", 0 | 50 | NULL, NULL, NULL, 0, "unknown", 0 |
@@ -51,7 +53,11 @@ static const struct squashfs_decompressor squashfs_unknown_comp_ops = { | |||
51 | static const struct squashfs_decompressor *decompressor[] = { | 53 | static const struct squashfs_decompressor *decompressor[] = { |
52 | &squashfs_zlib_comp_ops, | 54 | &squashfs_zlib_comp_ops, |
53 | &squashfs_lzma_unsupported_comp_ops, | 55 | &squashfs_lzma_unsupported_comp_ops, |
56 | #ifdef CONFIG_SQUASHFS_LZO | ||
57 | &squashfs_lzo_comp_ops, | ||
58 | #else | ||
54 | &squashfs_lzo_unsupported_comp_ops, | 59 | &squashfs_lzo_unsupported_comp_ops, |
60 | #endif | ||
55 | &squashfs_unknown_comp_ops | 61 | &squashfs_unknown_comp_ops |
56 | }; | 62 | }; |
57 | 63 | ||
diff --git a/fs/squashfs/lzo_wrapper.c b/fs/squashfs/lzo_wrapper.c new file mode 100644 index 000000000000..5d87789bf1c1 --- /dev/null +++ b/fs/squashfs/lzo_wrapper.c | |||
@@ -0,0 +1,136 @@ | |||
1 | /* | ||
2 | * Squashfs - a compressed read only filesystem for Linux | ||
3 | * | ||
4 | * Copyright (c) 2010 LG Electronics | ||
5 | * Chan Jeong <chan.jeong@lge.com> | ||
6 | * | ||
7 | * This program is free software; you can redistribute it and/or | ||
8 | * modify it under the terms of the GNU General Public License | ||
9 | * as published by the Free Software Foundation; either version 2, | ||
10 | * or (at your option) any later version. | ||
11 | * | ||
12 | * This program is distributed in the hope that it will be useful, | ||
13 | * but WITHOUT ANY WARRANTY; without even the implied warranty of | ||
14 | * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the | ||
15 | * GNU General Public License for more details. | ||
16 | * | ||
17 | * You should have received a copy of the GNU General Public License | ||
18 | * along with this program; if not, write to the Free Software | ||
19 | * Foundation, 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA. | ||
20 | * | ||
21 | * lzo_wrapper.c | ||
22 | */ | ||
23 | |||
24 | #include <linux/mutex.h> | ||
25 | #include <linux/buffer_head.h> | ||
26 | #include <linux/slab.h> | ||
27 | #include <linux/vmalloc.h> | ||
28 | #include <linux/lzo.h> | ||
29 | |||
30 | #include "squashfs_fs.h" | ||
31 | #include "squashfs_fs_sb.h" | ||
32 | #include "squashfs_fs_i.h" | ||
33 | #include "squashfs.h" | ||
34 | #include "decompressor.h" | ||
35 | |||
36 | struct squashfs_lzo { | ||
37 | void *input; | ||
38 | void *output; | ||
39 | }; | ||
40 | |||
41 | static void *lzo_init(struct squashfs_sb_info *msblk) | ||
42 | { | ||
43 | int block_size = max_t(int, msblk->block_size, SQUASHFS_METADATA_SIZE); | ||
44 | |||
45 | struct squashfs_lzo *stream = kzalloc(sizeof(*stream), GFP_KERNEL); | ||
46 | if (stream == NULL) | ||
47 | goto failed; | ||
48 | stream->input = vmalloc(block_size); | ||
49 | if (stream->input == NULL) | ||
50 | goto failed; | ||
51 | stream->output = vmalloc(block_size); | ||
52 | if (stream->output == NULL) | ||
53 | goto failed2; | ||
54 | |||
55 | return stream; | ||
56 | |||
57 | failed2: | ||
58 | vfree(stream->input); | ||
59 | failed: | ||
60 | ERROR("Failed to allocate lzo workspace\n"); | ||
61 | kfree(stream); | ||
62 | return NULL; | ||
63 | } | ||
64 | |||
65 | |||
66 | static void lzo_free(void *strm) | ||
67 | { | ||
68 | struct squashfs_lzo *stream = strm; | ||
69 | |||
70 | if (stream) { | ||
71 | vfree(stream->input); | ||
72 | vfree(stream->output); | ||
73 | } | ||
74 | kfree(stream); | ||
75 | } | ||
76 | |||
77 | |||
78 | static int lzo_uncompress(struct squashfs_sb_info *msblk, void **buffer, | ||
79 | struct buffer_head **bh, int b, int offset, int length, int srclength, | ||
80 | int pages) | ||
81 | { | ||
82 | struct squashfs_lzo *stream = msblk->stream; | ||
83 | void *buff = stream->input; | ||
84 | int avail, i, bytes = length, res; | ||
85 | size_t out_len = srclength; | ||
86 | |||
87 | mutex_lock(&msblk->read_data_mutex); | ||
88 | |||
89 | for (i = 0; i < b; i++) { | ||
90 | wait_on_buffer(bh[i]); | ||
91 | if (!buffer_uptodate(bh[i])) | ||
92 | goto block_release; | ||
93 | |||
94 | avail = min(bytes, msblk->devblksize - offset); | ||
95 | memcpy(buff, bh[i]->b_data + offset, avail); | ||
96 | buff += avail; | ||
97 | bytes -= avail; | ||
98 | offset = 0; | ||
99 | put_bh(bh[i]); | ||
100 | } | ||
101 | |||
102 | res = lzo1x_decompress_safe(stream->input, (size_t)length, | ||
103 | stream->output, &out_len); | ||
104 | if (res != LZO_E_OK) | ||
105 | goto failed; | ||
106 | |||
107 | res = bytes = (int)out_len; | ||
108 | for (i = 0, buff = stream->output; bytes && i < pages; i++) { | ||
109 | avail = min_t(int, bytes, PAGE_CACHE_SIZE); | ||
110 | memcpy(buffer[i], buff, avail); | ||
111 | buff += avail; | ||
112 | bytes -= avail; | ||
113 | } | ||
114 | |||
115 | mutex_unlock(&msblk->read_data_mutex); | ||
116 | return res; | ||
117 | |||
118 | block_release: | ||
119 | for (; i < b; i++) | ||
120 | put_bh(bh[i]); | ||
121 | |||
122 | failed: | ||
123 | mutex_unlock(&msblk->read_data_mutex); | ||
124 | |||
125 | ERROR("lzo decompression failed, data probably corrupt\n"); | ||
126 | return -EIO; | ||
127 | } | ||
128 | |||
129 | const struct squashfs_decompressor squashfs_lzo_comp_ops = { | ||
130 | .init = lzo_init, | ||
131 | .free = lzo_free, | ||
132 | .decompress = lzo_uncompress, | ||
133 | .id = LZO_COMPRESSION, | ||
134 | .name = "lzo", | ||
135 | .supported = 1 | ||
136 | }; | ||
diff --git a/fs/squashfs/squashfs.h b/fs/squashfs/squashfs.h index 733a17c42945..5d45569d5f72 100644 --- a/fs/squashfs/squashfs.h +++ b/fs/squashfs/squashfs.h | |||
@@ -104,3 +104,6 @@ extern const struct xattr_handler *squashfs_xattr_handlers[]; | |||
104 | 104 | ||
105 | /* zlib_wrapper.c */ | 105 | /* zlib_wrapper.c */ |
106 | extern const struct squashfs_decompressor squashfs_zlib_comp_ops; | 106 | extern const struct squashfs_decompressor squashfs_zlib_comp_ops; |
107 | |||
108 | /* lzo_wrapper.c */ | ||
109 | extern const struct squashfs_decompressor squashfs_lzo_comp_ops; | ||
diff --git a/fs/squashfs/squashfs_fs.h b/fs/squashfs/squashfs_fs.h index 8eabb808b78d..c5137fc9ab11 100644 --- a/fs/squashfs/squashfs_fs.h +++ b/fs/squashfs/squashfs_fs.h | |||
@@ -274,7 +274,7 @@ struct squashfs_base_inode { | |||
274 | __le16 uid; | 274 | __le16 uid; |
275 | __le16 guid; | 275 | __le16 guid; |
276 | __le32 mtime; | 276 | __le32 mtime; |
277 | __le32 inode_number; | 277 | __le32 inode_number; |
278 | }; | 278 | }; |
279 | 279 | ||
280 | struct squashfs_ipc_inode { | 280 | struct squashfs_ipc_inode { |
@@ -283,7 +283,7 @@ struct squashfs_ipc_inode { | |||
283 | __le16 uid; | 283 | __le16 uid; |
284 | __le16 guid; | 284 | __le16 guid; |
285 | __le32 mtime; | 285 | __le32 mtime; |
286 | __le32 inode_number; | 286 | __le32 inode_number; |
287 | __le32 nlink; | 287 | __le32 nlink; |
288 | }; | 288 | }; |
289 | 289 | ||
@@ -293,7 +293,7 @@ struct squashfs_lipc_inode { | |||
293 | __le16 uid; | 293 | __le16 uid; |
294 | __le16 guid; | 294 | __le16 guid; |
295 | __le32 mtime; | 295 | __le32 mtime; |
296 | __le32 inode_number; | 296 | __le32 inode_number; |
297 | __le32 nlink; | 297 | __le32 nlink; |
298 | __le32 xattr; | 298 | __le32 xattr; |
299 | }; | 299 | }; |
@@ -304,7 +304,7 @@ struct squashfs_dev_inode { | |||
304 | __le16 uid; | 304 | __le16 uid; |
305 | __le16 guid; | 305 | __le16 guid; |
306 | __le32 mtime; | 306 | __le32 mtime; |
307 | __le32 inode_number; | 307 | __le32 inode_number; |
308 | __le32 nlink; | 308 | __le32 nlink; |
309 | __le32 rdev; | 309 | __le32 rdev; |
310 | }; | 310 | }; |
@@ -315,7 +315,7 @@ struct squashfs_ldev_inode { | |||
315 | __le16 uid; | 315 | __le16 uid; |
316 | __le16 guid; | 316 | __le16 guid; |
317 | __le32 mtime; | 317 | __le32 mtime; |
318 | __le32 inode_number; | 318 | __le32 inode_number; |
319 | __le32 nlink; | 319 | __le32 nlink; |
320 | __le32 rdev; | 320 | __le32 rdev; |
321 | __le32 xattr; | 321 | __le32 xattr; |
@@ -327,7 +327,7 @@ struct squashfs_symlink_inode { | |||
327 | __le16 uid; | 327 | __le16 uid; |
328 | __le16 guid; | 328 | __le16 guid; |
329 | __le32 mtime; | 329 | __le32 mtime; |
330 | __le32 inode_number; | 330 | __le32 inode_number; |
331 | __le32 nlink; | 331 | __le32 nlink; |
332 | __le32 symlink_size; | 332 | __le32 symlink_size; |
333 | char symlink[0]; | 333 | char symlink[0]; |
@@ -339,7 +339,7 @@ struct squashfs_reg_inode { | |||
339 | __le16 uid; | 339 | __le16 uid; |
340 | __le16 guid; | 340 | __le16 guid; |
341 | __le32 mtime; | 341 | __le32 mtime; |
342 | __le32 inode_number; | 342 | __le32 inode_number; |
343 | __le32 start_block; | 343 | __le32 start_block; |
344 | __le32 fragment; | 344 | __le32 fragment; |
345 | __le32 offset; | 345 | __le32 offset; |
@@ -353,7 +353,7 @@ struct squashfs_lreg_inode { | |||
353 | __le16 uid; | 353 | __le16 uid; |
354 | __le16 guid; | 354 | __le16 guid; |
355 | __le32 mtime; | 355 | __le32 mtime; |
356 | __le32 inode_number; | 356 | __le32 inode_number; |
357 | __le64 start_block; | 357 | __le64 start_block; |
358 | __le64 file_size; | 358 | __le64 file_size; |
359 | __le64 sparse; | 359 | __le64 sparse; |
@@ -370,7 +370,7 @@ struct squashfs_dir_inode { | |||
370 | __le16 uid; | 370 | __le16 uid; |
371 | __le16 guid; | 371 | __le16 guid; |
372 | __le32 mtime; | 372 | __le32 mtime; |
373 | __le32 inode_number; | 373 | __le32 inode_number; |
374 | __le32 start_block; | 374 | __le32 start_block; |
375 | __le32 nlink; | 375 | __le32 nlink; |
376 | __le16 file_size; | 376 | __le16 file_size; |
@@ -384,7 +384,7 @@ struct squashfs_ldir_inode { | |||
384 | __le16 uid; | 384 | __le16 uid; |
385 | __le16 guid; | 385 | __le16 guid; |
386 | __le32 mtime; | 386 | __le32 mtime; |
387 | __le32 inode_number; | 387 | __le32 inode_number; |
388 | __le32 nlink; | 388 | __le32 nlink; |
389 | __le32 file_size; | 389 | __le32 file_size; |
390 | __le32 start_block; | 390 | __le32 start_block; |
diff --git a/fs/squashfs/xattr.c b/fs/squashfs/xattr.c index c7655e8b31cd..652b8541f9c6 100644 --- a/fs/squashfs/xattr.c +++ b/fs/squashfs/xattr.c | |||
@@ -18,7 +18,7 @@ | |||
18 | * along with this program; if not, write to the Free Software | 18 | * along with this program; if not, write to the Free Software |
19 | * Foundation, 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA. | 19 | * Foundation, 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA. |
20 | * | 20 | * |
21 | * xattr_id.c | 21 | * xattr.c |
22 | */ | 22 | */ |
23 | 23 | ||
24 | #include <linux/init.h> | 24 | #include <linux/init.h> |
@@ -295,7 +295,7 @@ static const struct xattr_handler squashfs_xattr_security_handler = { | |||
295 | .get = squashfs_security_get | 295 | .get = squashfs_security_get |
296 | }; | 296 | }; |
297 | 297 | ||
298 | static inline const struct xattr_handler *squashfs_xattr_handler(int type) | 298 | static const struct xattr_handler *squashfs_xattr_handler(int type) |
299 | { | 299 | { |
300 | if (type & ~(SQUASHFS_XATTR_PREFIX_MASK | SQUASHFS_XATTR_VALUE_OOL)) | 300 | if (type & ~(SQUASHFS_XATTR_PREFIX_MASK | SQUASHFS_XATTR_VALUE_OOL)) |
301 | /* ignore unrecognised type */ | 301 | /* ignore unrecognised type */ |
diff --git a/fs/squashfs/xattr.h b/fs/squashfs/xattr.h index 9da071ae181c..49fe0d719fbf 100644 --- a/fs/squashfs/xattr.h +++ b/fs/squashfs/xattr.h | |||
@@ -21,7 +21,7 @@ | |||
21 | * xattr.h | 21 | * xattr.h |
22 | */ | 22 | */ |
23 | 23 | ||
24 | #ifdef CONFIG_SQUASHFS_XATTRS | 24 | #ifdef CONFIG_SQUASHFS_XATTR |
25 | extern __le64 *squashfs_read_xattr_id_table(struct super_block *, u64, | 25 | extern __le64 *squashfs_read_xattr_id_table(struct super_block *, u64, |
26 | u64 *, int *); | 26 | u64 *, int *); |
27 | extern int squashfs_xattr_lookup(struct super_block *, unsigned int, int *, | 27 | extern int squashfs_xattr_lookup(struct super_block *, unsigned int, int *, |
diff --git a/fs/sysv/super.c b/fs/sysv/super.c index 0e44a6253352..a0b0cda6927e 100644 --- a/fs/sysv/super.c +++ b/fs/sysv/super.c | |||
@@ -434,12 +434,46 @@ Ebadsize: | |||
434 | goto failed; | 434 | goto failed; |
435 | } | 435 | } |
436 | 436 | ||
437 | static int v7_fill_super(struct super_block *sb, void *data, int silent) | 437 | static int v7_sanity_check(struct super_block *sb, struct buffer_head *bh) |
438 | { | 438 | { |
439 | struct sysv_sb_info *sbi; | ||
440 | struct buffer_head *bh, *bh2 = NULL; | ||
441 | struct v7_super_block *v7sb; | 439 | struct v7_super_block *v7sb; |
442 | struct sysv_inode *v7i; | 440 | struct sysv_inode *v7i; |
441 | struct buffer_head *bh2; | ||
442 | struct sysv_sb_info *sbi; | ||
443 | |||
444 | sbi = sb->s_fs_info; | ||
445 | |||
446 | /* plausibility check on superblock */ | ||
447 | v7sb = (struct v7_super_block *) bh->b_data; | ||
448 | if (fs16_to_cpu(sbi, v7sb->s_nfree) > V7_NICFREE || | ||
449 | fs16_to_cpu(sbi, v7sb->s_ninode) > V7_NICINOD || | ||
450 | fs32_to_cpu(sbi, v7sb->s_fsize) > V7_MAXSIZE) | ||
451 | return 0; | ||
452 | |||
453 | /* plausibility check on root inode: it is a directory, | ||
454 | with a nonzero size that is a multiple of 16 */ | ||
455 | bh2 = sb_bread(sb, 2); | ||
456 | if (bh2 == NULL) | ||
457 | return 0; | ||
458 | |||
459 | v7i = (struct sysv_inode *)(bh2->b_data + 64); | ||
460 | if ((fs16_to_cpu(sbi, v7i->i_mode) & ~0777) != S_IFDIR || | ||
461 | (fs32_to_cpu(sbi, v7i->i_size) == 0) || | ||
462 | (fs32_to_cpu(sbi, v7i->i_size) & 017) || | ||
463 | (fs32_to_cpu(sbi, v7i->i_size) > V7_NFILES * | ||
464 | sizeof(struct sysv_dir_entry))) { | ||
465 | brelse(bh2); | ||
466 | return 0; | ||
467 | } | ||
468 | |||
469 | brelse(bh2); | ||
470 | return 1; | ||
471 | } | ||
472 | |||
473 | static int v7_fill_super(struct super_block *sb, void *data, int silent) | ||
474 | { | ||
475 | struct sysv_sb_info *sbi; | ||
476 | struct buffer_head *bh; | ||
443 | 477 | ||
444 | if (440 != sizeof (struct v7_super_block)) | 478 | if (440 != sizeof (struct v7_super_block)) |
445 | panic("V7 FS: bad super-block size"); | 479 | panic("V7 FS: bad super-block size"); |
@@ -453,7 +487,6 @@ static int v7_fill_super(struct super_block *sb, void *data, int silent) | |||
453 | sbi->s_sb = sb; | 487 | sbi->s_sb = sb; |
454 | sbi->s_block_base = 0; | 488 | sbi->s_block_base = 0; |
455 | sbi->s_type = FSTYPE_V7; | 489 | sbi->s_type = FSTYPE_V7; |
456 | sbi->s_bytesex = BYTESEX_PDP; | ||
457 | sb->s_fs_info = sbi; | 490 | sb->s_fs_info = sbi; |
458 | 491 | ||
459 | sb_set_blocksize(sb, 512); | 492 | sb_set_blocksize(sb, 512); |
@@ -465,32 +498,27 @@ static int v7_fill_super(struct super_block *sb, void *data, int silent) | |||
465 | goto failed; | 498 | goto failed; |
466 | } | 499 | } |
467 | 500 | ||
468 | /* plausibility check on superblock */ | 501 | /* Try PDP-11 UNIX */ |
469 | v7sb = (struct v7_super_block *) bh->b_data; | 502 | sbi->s_bytesex = BYTESEX_PDP; |
470 | if (fs16_to_cpu(sbi, v7sb->s_nfree) > V7_NICFREE || | 503 | if (v7_sanity_check(sb, bh)) |
471 | fs16_to_cpu(sbi, v7sb->s_ninode) > V7_NICINOD || | 504 | goto detected; |
472 | fs32_to_cpu(sbi, v7sb->s_time) == 0) | ||
473 | goto failed; | ||
474 | 505 | ||
475 | /* plausibility check on root inode: it is a directory, | 506 | /* Try PC/IX, v7/x86 */ |
476 | with a nonzero size that is a multiple of 16 */ | 507 | sbi->s_bytesex = BYTESEX_LE; |
477 | if ((bh2 = sb_bread(sb, 2)) == NULL) | 508 | if (v7_sanity_check(sb, bh)) |
478 | goto failed; | 509 | goto detected; |
479 | v7i = (struct sysv_inode *)(bh2->b_data + 64); | ||
480 | if ((fs16_to_cpu(sbi, v7i->i_mode) & ~0777) != S_IFDIR || | ||
481 | (fs32_to_cpu(sbi, v7i->i_size) == 0) || | ||
482 | (fs32_to_cpu(sbi, v7i->i_size) & 017) != 0) | ||
483 | goto failed; | ||
484 | brelse(bh2); | ||
485 | bh2 = NULL; | ||
486 | 510 | ||
511 | goto failed; | ||
512 | |||
513 | detected: | ||
487 | sbi->s_bh1 = bh; | 514 | sbi->s_bh1 = bh; |
488 | sbi->s_bh2 = bh; | 515 | sbi->s_bh2 = bh; |
489 | if (complete_read_super(sb, silent, 1)) | 516 | if (complete_read_super(sb, silent, 1)) |
490 | return 0; | 517 | return 0; |
491 | 518 | ||
492 | failed: | 519 | failed: |
493 | brelse(bh2); | 520 | printk(KERN_ERR "VFS: could not find a valid V7 on %s.\n", |
521 | sb->s_id); | ||
494 | brelse(bh); | 522 | brelse(bh); |
495 | kfree(sbi); | 523 | kfree(sbi); |
496 | return -EINVAL; | 524 | return -EINVAL; |
@@ -559,4 +587,5 @@ static void __exit exit_sysv_fs(void) | |||
559 | 587 | ||
560 | module_init(init_sysv_fs) | 588 | module_init(init_sysv_fs) |
561 | module_exit(exit_sysv_fs) | 589 | module_exit(exit_sysv_fs) |
590 | MODULE_ALIAS("v7"); | ||
562 | MODULE_LICENSE("GPL"); | 591 | MODULE_LICENSE("GPL"); |