diff options
Diffstat (limited to 'fs')
112 files changed, 2320 insertions, 1013 deletions
diff --git a/fs/9p/vfs_inode.c b/fs/9p/vfs_inode.c index d97c34a24f7a..c7c23eab9440 100644 --- a/fs/9p/vfs_inode.c +++ b/fs/9p/vfs_inode.c | |||
| @@ -1263,10 +1263,19 @@ static int v9fs_vfs_setattr_dotl(struct dentry *dentry, struct iattr *iattr) | |||
| 1263 | return PTR_ERR(fid); | 1263 | return PTR_ERR(fid); |
| 1264 | 1264 | ||
| 1265 | retval = p9_client_setattr(fid, &p9attr); | 1265 | retval = p9_client_setattr(fid, &p9attr); |
| 1266 | if (retval >= 0) | 1266 | if (retval < 0) |
| 1267 | retval = inode_setattr(dentry->d_inode, iattr); | 1267 | return retval; |
| 1268 | 1268 | ||
| 1269 | return retval; | 1269 | if ((iattr->ia_valid & ATTR_SIZE) && |
| 1270 | iattr->ia_size != i_size_read(dentry->d_inode)) { | ||
| 1271 | retval = vmtruncate(dentry->d_inode, iattr->ia_size); | ||
| 1272 | if (retval) | ||
| 1273 | return retval; | ||
| 1274 | } | ||
| 1275 | |||
| 1276 | setattr_copy(dentry->d_inode, iattr); | ||
| 1277 | mark_inode_dirty(dentry->d_inode); | ||
| 1278 | return 0; | ||
| 1270 | } | 1279 | } |
| 1271 | 1280 | ||
| 1272 | /** | 1281 | /** |
diff --git a/fs/afs/cell.c b/fs/afs/cell.c index ffea35c63879..0d5eeadf6121 100644 --- a/fs/afs/cell.c +++ b/fs/afs/cell.c | |||
| @@ -31,21 +31,20 @@ static struct afs_cell *afs_cell_root; | |||
| 31 | * allocate a cell record and fill in its name, VL server address list and | 31 | * allocate a cell record and fill in its name, VL server address list and |
| 32 | * allocate an anonymous key | 32 | * allocate an anonymous key |
| 33 | */ | 33 | */ |
| 34 | static struct afs_cell *afs_cell_alloc(const char *name, char *vllist) | 34 | static struct afs_cell *afs_cell_alloc(const char *name, unsigned namelen, |
| 35 | char *vllist) | ||
| 35 | { | 36 | { |
| 36 | struct afs_cell *cell; | 37 | struct afs_cell *cell; |
| 37 | struct key *key; | 38 | struct key *key; |
| 38 | size_t namelen; | ||
| 39 | char keyname[4 + AFS_MAXCELLNAME + 1], *cp, *dp, *next; | 39 | char keyname[4 + AFS_MAXCELLNAME + 1], *cp, *dp, *next; |
| 40 | char *dvllist = NULL, *_vllist = NULL; | 40 | char *dvllist = NULL, *_vllist = NULL; |
| 41 | char delimiter = ':'; | 41 | char delimiter = ':'; |
| 42 | int ret; | 42 | int ret; |
| 43 | 43 | ||
| 44 | _enter("%s,%s", name, vllist); | 44 | _enter("%*.*s,%s", namelen, namelen, name ?: "", vllist); |
| 45 | 45 | ||
| 46 | BUG_ON(!name); /* TODO: want to look up "this cell" in the cache */ | 46 | BUG_ON(!name); /* TODO: want to look up "this cell" in the cache */ |
| 47 | 47 | ||
| 48 | namelen = strlen(name); | ||
| 49 | if (namelen > AFS_MAXCELLNAME) { | 48 | if (namelen > AFS_MAXCELLNAME) { |
| 50 | _leave(" = -ENAMETOOLONG"); | 49 | _leave(" = -ENAMETOOLONG"); |
| 51 | return ERR_PTR(-ENAMETOOLONG); | 50 | return ERR_PTR(-ENAMETOOLONG); |
| @@ -73,6 +72,10 @@ static struct afs_cell *afs_cell_alloc(const char *name, char *vllist) | |||
| 73 | if (!vllist || strlen(vllist) < 7) { | 72 | if (!vllist || strlen(vllist) < 7) { |
| 74 | ret = dns_query("afsdb", name, namelen, "ipv4", &dvllist, NULL); | 73 | ret = dns_query("afsdb", name, namelen, "ipv4", &dvllist, NULL); |
| 75 | if (ret < 0) { | 74 | if (ret < 0) { |
| 75 | if (ret == -ENODATA || ret == -EAGAIN || ret == -ENOKEY) | ||
| 76 | /* translate these errors into something | ||
| 77 | * userspace might understand */ | ||
| 78 | ret = -EDESTADDRREQ; | ||
| 76 | _leave(" = %d", ret); | 79 | _leave(" = %d", ret); |
| 77 | return ERR_PTR(ret); | 80 | return ERR_PTR(ret); |
| 78 | } | 81 | } |
| @@ -138,26 +141,29 @@ error: | |||
| 138 | } | 141 | } |
| 139 | 142 | ||
| 140 | /* | 143 | /* |
| 141 | * create a cell record | 144 | * afs_cell_crate() - create a cell record |
| 142 | * - "name" is the name of the cell | 145 | * @name: is the name of the cell. |
| 143 | * - "vllist" is a colon separated list of IP addresses in "a.b.c.d" format | 146 | * @namsesz: is the strlen of the cell name. |
| 147 | * @vllist: is a colon separated list of IP addresses in "a.b.c.d" format. | ||
| 148 | * @retref: is T to return the cell reference when the cell exists. | ||
| 144 | */ | 149 | */ |
| 145 | struct afs_cell *afs_cell_create(const char *name, char *vllist) | 150 | struct afs_cell *afs_cell_create(const char *name, unsigned namesz, |
| 151 | char *vllist, bool retref) | ||
| 146 | { | 152 | { |
| 147 | struct afs_cell *cell; | 153 | struct afs_cell *cell; |
| 148 | int ret; | 154 | int ret; |
| 149 | 155 | ||
| 150 | _enter("%s,%s", name, vllist); | 156 | _enter("%*.*s,%s", namesz, namesz, name ?: "", vllist); |
| 151 | 157 | ||
| 152 | down_write(&afs_cells_sem); | 158 | down_write(&afs_cells_sem); |
| 153 | read_lock(&afs_cells_lock); | 159 | read_lock(&afs_cells_lock); |
| 154 | list_for_each_entry(cell, &afs_cells, link) { | 160 | list_for_each_entry(cell, &afs_cells, link) { |
| 155 | if (strcasecmp(cell->name, name) == 0) | 161 | if (strncasecmp(cell->name, name, namesz) == 0) |
| 156 | goto duplicate_name; | 162 | goto duplicate_name; |
| 157 | } | 163 | } |
| 158 | read_unlock(&afs_cells_lock); | 164 | read_unlock(&afs_cells_lock); |
| 159 | 165 | ||
| 160 | cell = afs_cell_alloc(name, vllist); | 166 | cell = afs_cell_alloc(name, namesz, vllist); |
| 161 | if (IS_ERR(cell)) { | 167 | if (IS_ERR(cell)) { |
| 162 | _leave(" = %ld", PTR_ERR(cell)); | 168 | _leave(" = %ld", PTR_ERR(cell)); |
| 163 | up_write(&afs_cells_sem); | 169 | up_write(&afs_cells_sem); |
| @@ -197,8 +203,18 @@ error: | |||
| 197 | return ERR_PTR(ret); | 203 | return ERR_PTR(ret); |
| 198 | 204 | ||
| 199 | duplicate_name: | 205 | duplicate_name: |
| 206 | if (retref && !IS_ERR(cell)) | ||
| 207 | afs_get_cell(cell); | ||
| 208 | |||
| 200 | read_unlock(&afs_cells_lock); | 209 | read_unlock(&afs_cells_lock); |
| 201 | up_write(&afs_cells_sem); | 210 | up_write(&afs_cells_sem); |
| 211 | |||
| 212 | if (retref) { | ||
| 213 | _leave(" = %p", cell); | ||
| 214 | return cell; | ||
| 215 | } | ||
| 216 | |||
| 217 | _leave(" = -EEXIST"); | ||
| 202 | return ERR_PTR(-EEXIST); | 218 | return ERR_PTR(-EEXIST); |
| 203 | } | 219 | } |
| 204 | 220 | ||
| @@ -229,7 +245,7 @@ int afs_cell_init(char *rootcell) | |||
| 229 | *cp++ = 0; | 245 | *cp++ = 0; |
| 230 | 246 | ||
| 231 | /* allocate a cell record for the root cell */ | 247 | /* allocate a cell record for the root cell */ |
| 232 | new_root = afs_cell_create(rootcell, cp); | 248 | new_root = afs_cell_create(rootcell, strlen(rootcell), cp, false); |
| 233 | if (IS_ERR(new_root)) { | 249 | if (IS_ERR(new_root)) { |
| 234 | _leave(" = %ld", PTR_ERR(new_root)); | 250 | _leave(" = %ld", PTR_ERR(new_root)); |
| 235 | return PTR_ERR(new_root); | 251 | return PTR_ERR(new_root); |
| @@ -249,11 +265,12 @@ int afs_cell_init(char *rootcell) | |||
| 249 | /* | 265 | /* |
| 250 | * lookup a cell record | 266 | * lookup a cell record |
| 251 | */ | 267 | */ |
| 252 | struct afs_cell *afs_cell_lookup(const char *name, unsigned namesz) | 268 | struct afs_cell *afs_cell_lookup(const char *name, unsigned namesz, |
| 269 | bool dns_cell) | ||
| 253 | { | 270 | { |
| 254 | struct afs_cell *cell; | 271 | struct afs_cell *cell; |
| 255 | 272 | ||
| 256 | _enter("\"%*.*s\",", namesz, namesz, name ? name : ""); | 273 | _enter("\"%*.*s\",", namesz, namesz, name ?: ""); |
| 257 | 274 | ||
| 258 | down_read(&afs_cells_sem); | 275 | down_read(&afs_cells_sem); |
| 259 | read_lock(&afs_cells_lock); | 276 | read_lock(&afs_cells_lock); |
| @@ -267,6 +284,8 @@ struct afs_cell *afs_cell_lookup(const char *name, unsigned namesz) | |||
| 267 | } | 284 | } |
| 268 | } | 285 | } |
| 269 | cell = ERR_PTR(-ENOENT); | 286 | cell = ERR_PTR(-ENOENT); |
| 287 | if (dns_cell) | ||
| 288 | goto create_cell; | ||
| 270 | found: | 289 | found: |
| 271 | ; | 290 | ; |
| 272 | } else { | 291 | } else { |
| @@ -289,6 +308,15 @@ struct afs_cell *afs_cell_lookup(const char *name, unsigned namesz) | |||
| 289 | up_read(&afs_cells_sem); | 308 | up_read(&afs_cells_sem); |
| 290 | _leave(" = %p", cell); | 309 | _leave(" = %p", cell); |
| 291 | return cell; | 310 | return cell; |
| 311 | |||
| 312 | create_cell: | ||
| 313 | read_unlock(&afs_cells_lock); | ||
| 314 | up_read(&afs_cells_sem); | ||
| 315 | |||
| 316 | cell = afs_cell_create(name, namesz, NULL, true); | ||
| 317 | |||
| 318 | _leave(" = %p", cell); | ||
| 319 | return cell; | ||
| 292 | } | 320 | } |
| 293 | 321 | ||
| 294 | #if 0 | 322 | #if 0 |
diff --git a/fs/afs/dir.c b/fs/afs/dir.c index b42d5cc1d6d2..0d38c09bd55e 100644 --- a/fs/afs/dir.c +++ b/fs/afs/dir.c | |||
| @@ -477,6 +477,40 @@ static int afs_do_lookup(struct inode *dir, struct dentry *dentry, | |||
| 477 | } | 477 | } |
| 478 | 478 | ||
| 479 | /* | 479 | /* |
| 480 | * Try to auto mount the mountpoint with pseudo directory, if the autocell | ||
| 481 | * operation is setted. | ||
| 482 | */ | ||
| 483 | static struct inode *afs_try_auto_mntpt( | ||
| 484 | int ret, struct dentry *dentry, struct inode *dir, struct key *key, | ||
| 485 | struct afs_fid *fid) | ||
| 486 | { | ||
| 487 | const char *devname = dentry->d_name.name; | ||
| 488 | struct afs_vnode *vnode = AFS_FS_I(dir); | ||
| 489 | struct inode *inode; | ||
| 490 | |||
| 491 | _enter("%d, %p{%s}, {%x:%u}, %p", | ||
| 492 | ret, dentry, devname, vnode->fid.vid, vnode->fid.vnode, key); | ||
| 493 | |||
| 494 | if (ret != -ENOENT || | ||
| 495 | !test_bit(AFS_VNODE_AUTOCELL, &vnode->flags)) | ||
| 496 | goto out; | ||
| 497 | |||
| 498 | inode = afs_iget_autocell(dir, devname, strlen(devname), key); | ||
| 499 | if (IS_ERR(inode)) { | ||
| 500 | ret = PTR_ERR(inode); | ||
| 501 | goto out; | ||
| 502 | } | ||
| 503 | |||
| 504 | *fid = AFS_FS_I(inode)->fid; | ||
| 505 | _leave("= %p", inode); | ||
| 506 | return inode; | ||
| 507 | |||
| 508 | out: | ||
| 509 | _leave("= %d", ret); | ||
| 510 | return ERR_PTR(ret); | ||
| 511 | } | ||
| 512 | |||
| 513 | /* | ||
| 480 | * look up an entry in a directory | 514 | * look up an entry in a directory |
| 481 | */ | 515 | */ |
| 482 | static struct dentry *afs_lookup(struct inode *dir, struct dentry *dentry, | 516 | static struct dentry *afs_lookup(struct inode *dir, struct dentry *dentry, |
| @@ -520,6 +554,13 @@ static struct dentry *afs_lookup(struct inode *dir, struct dentry *dentry, | |||
| 520 | 554 | ||
| 521 | ret = afs_do_lookup(dir, dentry, &fid, key); | 555 | ret = afs_do_lookup(dir, dentry, &fid, key); |
| 522 | if (ret < 0) { | 556 | if (ret < 0) { |
| 557 | inode = afs_try_auto_mntpt(ret, dentry, dir, key, &fid); | ||
| 558 | if (!IS_ERR(inode)) { | ||
| 559 | key_put(key); | ||
| 560 | goto success; | ||
| 561 | } | ||
| 562 | |||
| 563 | ret = PTR_ERR(inode); | ||
| 523 | key_put(key); | 564 | key_put(key); |
| 524 | if (ret == -ENOENT) { | 565 | if (ret == -ENOENT) { |
| 525 | d_add(dentry, NULL); | 566 | d_add(dentry, NULL); |
| @@ -539,6 +580,7 @@ static struct dentry *afs_lookup(struct inode *dir, struct dentry *dentry, | |||
| 539 | return ERR_CAST(inode); | 580 | return ERR_CAST(inode); |
| 540 | } | 581 | } |
| 541 | 582 | ||
| 583 | success: | ||
| 542 | dentry->d_op = &afs_fs_dentry_operations; | 584 | dentry->d_op = &afs_fs_dentry_operations; |
| 543 | 585 | ||
| 544 | d_add(dentry, inode); | 586 | d_add(dentry, inode); |
| @@ -696,8 +738,9 @@ static int afs_d_delete(struct dentry *dentry) | |||
| 696 | goto zap; | 738 | goto zap; |
| 697 | 739 | ||
| 698 | if (dentry->d_inode && | 740 | if (dentry->d_inode && |
| 699 | test_bit(AFS_VNODE_DELETED, &AFS_FS_I(dentry->d_inode)->flags)) | 741 | (test_bit(AFS_VNODE_DELETED, &AFS_FS_I(dentry->d_inode)->flags) || |
| 700 | goto zap; | 742 | test_bit(AFS_VNODE_PSEUDODIR, &AFS_FS_I(dentry->d_inode)->flags))) |
| 743 | goto zap; | ||
| 701 | 744 | ||
| 702 | _leave(" = 0 [keep]"); | 745 | _leave(" = 0 [keep]"); |
| 703 | return 0; | 746 | return 0; |
diff --git a/fs/afs/inode.c b/fs/afs/inode.c index 320ffef11574..0747339011c3 100644 --- a/fs/afs/inode.c +++ b/fs/afs/inode.c | |||
| @@ -19,6 +19,8 @@ | |||
| 19 | #include <linux/fs.h> | 19 | #include <linux/fs.h> |
| 20 | #include <linux/pagemap.h> | 20 | #include <linux/pagemap.h> |
| 21 | #include <linux/sched.h> | 21 | #include <linux/sched.h> |
| 22 | #include <linux/mount.h> | ||
| 23 | #include <linux/namei.h> | ||
| 22 | #include "internal.h" | 24 | #include "internal.h" |
| 23 | 25 | ||
| 24 | struct afs_iget_data { | 26 | struct afs_iget_data { |
| @@ -102,6 +104,16 @@ static int afs_iget5_test(struct inode *inode, void *opaque) | |||
| 102 | } | 104 | } |
| 103 | 105 | ||
| 104 | /* | 106 | /* |
| 107 | * iget5() comparator for inode created by autocell operations | ||
| 108 | * | ||
| 109 | * These pseudo inodes don't match anything. | ||
| 110 | */ | ||
| 111 | static int afs_iget5_autocell_test(struct inode *inode, void *opaque) | ||
| 112 | { | ||
| 113 | return 0; | ||
| 114 | } | ||
| 115 | |||
| 116 | /* | ||
| 105 | * iget5() inode initialiser | 117 | * iget5() inode initialiser |
| 106 | */ | 118 | */ |
| 107 | static int afs_iget5_set(struct inode *inode, void *opaque) | 119 | static int afs_iget5_set(struct inode *inode, void *opaque) |
| @@ -118,6 +130,67 @@ static int afs_iget5_set(struct inode *inode, void *opaque) | |||
| 118 | } | 130 | } |
| 119 | 131 | ||
| 120 | /* | 132 | /* |
| 133 | * inode retrieval for autocell | ||
| 134 | */ | ||
| 135 | struct inode *afs_iget_autocell(struct inode *dir, const char *dev_name, | ||
| 136 | int namesz, struct key *key) | ||
| 137 | { | ||
| 138 | struct afs_iget_data data; | ||
| 139 | struct afs_super_info *as; | ||
| 140 | struct afs_vnode *vnode; | ||
| 141 | struct super_block *sb; | ||
| 142 | struct inode *inode; | ||
| 143 | static atomic_t afs_autocell_ino; | ||
| 144 | |||
| 145 | _enter("{%x:%u},%*.*s,", | ||
| 146 | AFS_FS_I(dir)->fid.vid, AFS_FS_I(dir)->fid.vnode, | ||
| 147 | namesz, namesz, dev_name ?: ""); | ||
| 148 | |||
| 149 | sb = dir->i_sb; | ||
| 150 | as = sb->s_fs_info; | ||
| 151 | data.volume = as->volume; | ||
| 152 | data.fid.vid = as->volume->vid; | ||
| 153 | data.fid.unique = 0; | ||
| 154 | data.fid.vnode = 0; | ||
| 155 | |||
| 156 | inode = iget5_locked(sb, atomic_inc_return(&afs_autocell_ino), | ||
| 157 | afs_iget5_autocell_test, afs_iget5_set, | ||
| 158 | &data); | ||
| 159 | if (!inode) { | ||
| 160 | _leave(" = -ENOMEM"); | ||
| 161 | return ERR_PTR(-ENOMEM); | ||
| 162 | } | ||
| 163 | |||
| 164 | _debug("GOT INODE %p { ino=%lu, vl=%x, vn=%x, u=%x }", | ||
| 165 | inode, inode->i_ino, data.fid.vid, data.fid.vnode, | ||
| 166 | data.fid.unique); | ||
| 167 | |||
| 168 | vnode = AFS_FS_I(inode); | ||
| 169 | |||
| 170 | /* there shouldn't be an existing inode */ | ||
| 171 | BUG_ON(!(inode->i_state & I_NEW)); | ||
| 172 | |||
| 173 | inode->i_size = 0; | ||
| 174 | inode->i_mode = S_IFDIR | S_IRUGO | S_IXUGO; | ||
| 175 | inode->i_op = &afs_autocell_inode_operations; | ||
| 176 | inode->i_nlink = 2; | ||
| 177 | inode->i_uid = 0; | ||
| 178 | inode->i_gid = 0; | ||
| 179 | inode->i_ctime.tv_sec = get_seconds(); | ||
| 180 | inode->i_ctime.tv_nsec = 0; | ||
| 181 | inode->i_atime = inode->i_mtime = inode->i_ctime; | ||
| 182 | inode->i_blocks = 0; | ||
| 183 | inode->i_version = 0; | ||
| 184 | inode->i_generation = 0; | ||
| 185 | |||
| 186 | set_bit(AFS_VNODE_PSEUDODIR, &vnode->flags); | ||
| 187 | inode->i_flags |= S_NOATIME; | ||
| 188 | unlock_new_inode(inode); | ||
| 189 | _leave(" = %p", inode); | ||
| 190 | return inode; | ||
| 191 | } | ||
| 192 | |||
| 193 | /* | ||
| 121 | * inode retrieval | 194 | * inode retrieval |
| 122 | */ | 195 | */ |
| 123 | struct inode *afs_iget(struct super_block *sb, struct key *key, | 196 | struct inode *afs_iget(struct super_block *sb, struct key *key, |
| @@ -314,6 +387,19 @@ int afs_getattr(struct vfsmount *mnt, struct dentry *dentry, | |||
| 314 | } | 387 | } |
| 315 | 388 | ||
| 316 | /* | 389 | /* |
| 390 | * discard an AFS inode | ||
| 391 | */ | ||
| 392 | int afs_drop_inode(struct inode *inode) | ||
| 393 | { | ||
| 394 | _enter(""); | ||
| 395 | |||
| 396 | if (test_bit(AFS_VNODE_PSEUDODIR, &AFS_FS_I(inode)->flags)) | ||
| 397 | return generic_delete_inode(inode); | ||
| 398 | else | ||
| 399 | return generic_drop_inode(inode); | ||
| 400 | } | ||
| 401 | |||
| 402 | /* | ||
| 317 | * clear an AFS inode | 403 | * clear an AFS inode |
| 318 | */ | 404 | */ |
| 319 | void afs_evict_inode(struct inode *inode) | 405 | void afs_evict_inode(struct inode *inode) |
diff --git a/fs/afs/internal.h b/fs/afs/internal.h index 8679089ce9a1..cca8eef736fc 100644 --- a/fs/afs/internal.h +++ b/fs/afs/internal.h | |||
| @@ -42,6 +42,7 @@ typedef enum { | |||
| 42 | struct afs_mount_params { | 42 | struct afs_mount_params { |
| 43 | bool rwpath; /* T if the parent should be considered R/W */ | 43 | bool rwpath; /* T if the parent should be considered R/W */ |
| 44 | bool force; /* T to force cell type */ | 44 | bool force; /* T to force cell type */ |
| 45 | bool autocell; /* T if set auto mount operation */ | ||
| 45 | afs_voltype_t type; /* type of volume requested */ | 46 | afs_voltype_t type; /* type of volume requested */ |
| 46 | int volnamesz; /* size of volume name */ | 47 | int volnamesz; /* size of volume name */ |
| 47 | const char *volname; /* name of volume to mount */ | 48 | const char *volname; /* name of volume to mount */ |
| @@ -358,6 +359,8 @@ struct afs_vnode { | |||
| 358 | #define AFS_VNODE_READLOCKED 7 /* set if vnode is read-locked on the server */ | 359 | #define AFS_VNODE_READLOCKED 7 /* set if vnode is read-locked on the server */ |
| 359 | #define AFS_VNODE_WRITELOCKED 8 /* set if vnode is write-locked on the server */ | 360 | #define AFS_VNODE_WRITELOCKED 8 /* set if vnode is write-locked on the server */ |
| 360 | #define AFS_VNODE_UNLOCKING 9 /* set if vnode is being unlocked on the server */ | 361 | #define AFS_VNODE_UNLOCKING 9 /* set if vnode is being unlocked on the server */ |
| 362 | #define AFS_VNODE_AUTOCELL 10 /* set if Vnode is an auto mount point */ | ||
| 363 | #define AFS_VNODE_PSEUDODIR 11 /* set if Vnode is a pseudo directory */ | ||
| 361 | 364 | ||
| 362 | long acl_order; /* ACL check count (callback break count) */ | 365 | long acl_order; /* ACL check count (callback break count) */ |
| 363 | 366 | ||
| @@ -468,8 +471,8 @@ extern struct list_head afs_proc_cells; | |||
| 468 | 471 | ||
| 469 | #define afs_get_cell(C) do { atomic_inc(&(C)->usage); } while(0) | 472 | #define afs_get_cell(C) do { atomic_inc(&(C)->usage); } while(0) |
| 470 | extern int afs_cell_init(char *); | 473 | extern int afs_cell_init(char *); |
| 471 | extern struct afs_cell *afs_cell_create(const char *, char *); | 474 | extern struct afs_cell *afs_cell_create(const char *, unsigned, char *, bool); |
| 472 | extern struct afs_cell *afs_cell_lookup(const char *, unsigned); | 475 | extern struct afs_cell *afs_cell_lookup(const char *, unsigned, bool); |
| 473 | extern struct afs_cell *afs_grab_cell(struct afs_cell *); | 476 | extern struct afs_cell *afs_grab_cell(struct afs_cell *); |
| 474 | extern void afs_put_cell(struct afs_cell *); | 477 | extern void afs_put_cell(struct afs_cell *); |
| 475 | extern void afs_cell_purge(void); | 478 | extern void afs_cell_purge(void); |
| @@ -558,6 +561,8 @@ extern int afs_fs_release_lock(struct afs_server *, struct key *, | |||
| 558 | /* | 561 | /* |
| 559 | * inode.c | 562 | * inode.c |
| 560 | */ | 563 | */ |
| 564 | extern struct inode *afs_iget_autocell(struct inode *, const char *, int, | ||
| 565 | struct key *); | ||
| 561 | extern struct inode *afs_iget(struct super_block *, struct key *, | 566 | extern struct inode *afs_iget(struct super_block *, struct key *, |
| 562 | struct afs_fid *, struct afs_file_status *, | 567 | struct afs_fid *, struct afs_file_status *, |
| 563 | struct afs_callback *); | 568 | struct afs_callback *); |
| @@ -566,6 +571,7 @@ extern int afs_validate(struct afs_vnode *, struct key *); | |||
| 566 | extern int afs_getattr(struct vfsmount *, struct dentry *, struct kstat *); | 571 | extern int afs_getattr(struct vfsmount *, struct dentry *, struct kstat *); |
| 567 | extern int afs_setattr(struct dentry *, struct iattr *); | 572 | extern int afs_setattr(struct dentry *, struct iattr *); |
| 568 | extern void afs_evict_inode(struct inode *); | 573 | extern void afs_evict_inode(struct inode *); |
| 574 | extern int afs_drop_inode(struct inode *); | ||
| 569 | 575 | ||
| 570 | /* | 576 | /* |
| 571 | * main.c | 577 | * main.c |
| @@ -581,6 +587,7 @@ extern int afs_abort_to_error(u32); | |||
| 581 | * mntpt.c | 587 | * mntpt.c |
| 582 | */ | 588 | */ |
| 583 | extern const struct inode_operations afs_mntpt_inode_operations; | 589 | extern const struct inode_operations afs_mntpt_inode_operations; |
| 590 | extern const struct inode_operations afs_autocell_inode_operations; | ||
| 584 | extern const struct file_operations afs_mntpt_file_operations; | 591 | extern const struct file_operations afs_mntpt_file_operations; |
| 585 | 592 | ||
| 586 | extern int afs_mntpt_check_symlink(struct afs_vnode *, struct key *); | 593 | extern int afs_mntpt_check_symlink(struct afs_vnode *, struct key *); |
| @@ -752,12 +759,6 @@ extern unsigned afs_debug; | |||
| 752 | #define dbgprintk(FMT,...) \ | 759 | #define dbgprintk(FMT,...) \ |
| 753 | printk("[%-6.6s] "FMT"\n", current->comm ,##__VA_ARGS__) | 760 | printk("[%-6.6s] "FMT"\n", current->comm ,##__VA_ARGS__) |
| 754 | 761 | ||
| 755 | /* make sure we maintain the format strings, even when debugging is disabled */ | ||
| 756 | static inline __attribute__((format(printf,1,2))) | ||
| 757 | void _dbprintk(const char *fmt, ...) | ||
| 758 | { | ||
| 759 | } | ||
| 760 | |||
| 761 | #define kenter(FMT,...) dbgprintk("==> %s("FMT")",__func__ ,##__VA_ARGS__) | 762 | #define kenter(FMT,...) dbgprintk("==> %s("FMT")",__func__ ,##__VA_ARGS__) |
| 762 | #define kleave(FMT,...) dbgprintk("<== %s()"FMT"",__func__ ,##__VA_ARGS__) | 763 | #define kleave(FMT,...) dbgprintk("<== %s()"FMT"",__func__ ,##__VA_ARGS__) |
| 763 | #define kdebug(FMT,...) dbgprintk(" "FMT ,##__VA_ARGS__) | 764 | #define kdebug(FMT,...) dbgprintk(" "FMT ,##__VA_ARGS__) |
| @@ -792,9 +793,9 @@ do { \ | |||
| 792 | } while (0) | 793 | } while (0) |
| 793 | 794 | ||
| 794 | #else | 795 | #else |
| 795 | #define _enter(FMT,...) _dbprintk("==> %s("FMT")",__func__ ,##__VA_ARGS__) | 796 | #define _enter(FMT,...) no_printk("==> %s("FMT")",__func__ ,##__VA_ARGS__) |
| 796 | #define _leave(FMT,...) _dbprintk("<== %s()"FMT"",__func__ ,##__VA_ARGS__) | 797 | #define _leave(FMT,...) no_printk("<== %s()"FMT"",__func__ ,##__VA_ARGS__) |
| 797 | #define _debug(FMT,...) _dbprintk(" "FMT ,##__VA_ARGS__) | 798 | #define _debug(FMT,...) no_printk(" "FMT ,##__VA_ARGS__) |
| 798 | #endif | 799 | #endif |
| 799 | 800 | ||
| 800 | /* | 801 | /* |
diff --git a/fs/afs/mntpt.c b/fs/afs/mntpt.c index a9e23039ea34..6d552686c498 100644 --- a/fs/afs/mntpt.c +++ b/fs/afs/mntpt.c | |||
| @@ -38,6 +38,11 @@ const struct inode_operations afs_mntpt_inode_operations = { | |||
| 38 | .getattr = afs_getattr, | 38 | .getattr = afs_getattr, |
| 39 | }; | 39 | }; |
| 40 | 40 | ||
| 41 | const struct inode_operations afs_autocell_inode_operations = { | ||
| 42 | .follow_link = afs_mntpt_follow_link, | ||
| 43 | .getattr = afs_getattr, | ||
| 44 | }; | ||
| 45 | |||
| 41 | static LIST_HEAD(afs_vfsmounts); | 46 | static LIST_HEAD(afs_vfsmounts); |
| 42 | static DECLARE_DELAYED_WORK(afs_mntpt_expiry_timer, afs_mntpt_expiry_timed_out); | 47 | static DECLARE_DELAYED_WORK(afs_mntpt_expiry_timer, afs_mntpt_expiry_timed_out); |
| 43 | 48 | ||
| @@ -136,20 +141,16 @@ static struct vfsmount *afs_mntpt_do_automount(struct dentry *mntpt) | |||
| 136 | { | 141 | { |
| 137 | struct afs_super_info *super; | 142 | struct afs_super_info *super; |
| 138 | struct vfsmount *mnt; | 143 | struct vfsmount *mnt; |
| 144 | struct afs_vnode *vnode; | ||
| 139 | struct page *page; | 145 | struct page *page; |
| 140 | size_t size; | 146 | char *devname, *options; |
| 141 | char *buf, *devname, *options; | 147 | bool rwpath = false; |
| 142 | int ret; | 148 | int ret; |
| 143 | 149 | ||
| 144 | _enter("{%s}", mntpt->d_name.name); | 150 | _enter("{%s}", mntpt->d_name.name); |
| 145 | 151 | ||
| 146 | BUG_ON(!mntpt->d_inode); | 152 | BUG_ON(!mntpt->d_inode); |
| 147 | 153 | ||
| 148 | ret = -EINVAL; | ||
| 149 | size = mntpt->d_inode->i_size; | ||
| 150 | if (size > PAGE_SIZE - 1) | ||
| 151 | goto error_no_devname; | ||
| 152 | |||
| 153 | ret = -ENOMEM; | 154 | ret = -ENOMEM; |
| 154 | devname = (char *) get_zeroed_page(GFP_KERNEL); | 155 | devname = (char *) get_zeroed_page(GFP_KERNEL); |
| 155 | if (!devname) | 156 | if (!devname) |
| @@ -159,28 +160,59 @@ static struct vfsmount *afs_mntpt_do_automount(struct dentry *mntpt) | |||
| 159 | if (!options) | 160 | if (!options) |
| 160 | goto error_no_options; | 161 | goto error_no_options; |
| 161 | 162 | ||
| 162 | /* read the contents of the AFS special symlink */ | 163 | vnode = AFS_FS_I(mntpt->d_inode); |
| 163 | page = read_mapping_page(mntpt->d_inode->i_mapping, 0, NULL); | 164 | if (test_bit(AFS_VNODE_PSEUDODIR, &vnode->flags)) { |
| 164 | if (IS_ERR(page)) { | 165 | /* if the directory is a pseudo directory, use the d_name */ |
| 165 | ret = PTR_ERR(page); | 166 | static const char afs_root_cell[] = ":root.cell."; |
| 166 | goto error_no_page; | 167 | unsigned size = mntpt->d_name.len; |
| 168 | |||
| 169 | ret = -ENOENT; | ||
| 170 | if (size < 2 || size > AFS_MAXCELLNAME) | ||
| 171 | goto error_no_page; | ||
| 172 | |||
| 173 | if (mntpt->d_name.name[0] == '.') { | ||
| 174 | devname[0] = '#'; | ||
| 175 | memcpy(devname + 1, mntpt->d_name.name, size - 1); | ||
| 176 | memcpy(devname + size, afs_root_cell, | ||
| 177 | sizeof(afs_root_cell)); | ||
| 178 | rwpath = true; | ||
| 179 | } else { | ||
| 180 | devname[0] = '%'; | ||
| 181 | memcpy(devname + 1, mntpt->d_name.name, size); | ||
| 182 | memcpy(devname + size + 1, afs_root_cell, | ||
| 183 | sizeof(afs_root_cell)); | ||
| 184 | } | ||
| 185 | } else { | ||
| 186 | /* read the contents of the AFS special symlink */ | ||
| 187 | loff_t size = i_size_read(mntpt->d_inode); | ||
| 188 | char *buf; | ||
| 189 | |||
| 190 | ret = -EINVAL; | ||
| 191 | if (size > PAGE_SIZE - 1) | ||
| 192 | goto error_no_page; | ||
| 193 | |||
| 194 | page = read_mapping_page(mntpt->d_inode->i_mapping, 0, NULL); | ||
| 195 | if (IS_ERR(page)) { | ||
| 196 | ret = PTR_ERR(page); | ||
| 197 | goto error_no_page; | ||
| 198 | } | ||
| 199 | |||
| 200 | ret = -EIO; | ||
| 201 | if (PageError(page)) | ||
| 202 | goto error; | ||
| 203 | |||
| 204 | buf = kmap_atomic(page, KM_USER0); | ||
| 205 | memcpy(devname, buf, size); | ||
| 206 | kunmap_atomic(buf, KM_USER0); | ||
| 207 | page_cache_release(page); | ||
| 208 | page = NULL; | ||
| 167 | } | 209 | } |
| 168 | 210 | ||
| 169 | ret = -EIO; | ||
| 170 | if (PageError(page)) | ||
| 171 | goto error; | ||
| 172 | |||
| 173 | buf = kmap_atomic(page, KM_USER0); | ||
| 174 | memcpy(devname, buf, size); | ||
| 175 | kunmap_atomic(buf, KM_USER0); | ||
| 176 | page_cache_release(page); | ||
| 177 | page = NULL; | ||
| 178 | |||
| 179 | /* work out what options we want */ | 211 | /* work out what options we want */ |
| 180 | super = AFS_FS_S(mntpt->d_sb); | 212 | super = AFS_FS_S(mntpt->d_sb); |
| 181 | memcpy(options, "cell=", 5); | 213 | memcpy(options, "cell=", 5); |
| 182 | strcpy(options + 5, super->volume->cell->name); | 214 | strcpy(options + 5, super->volume->cell->name); |
| 183 | if (super->volume->type == AFSVL_RWVOL) | 215 | if (super->volume->type == AFSVL_RWVOL || rwpath) |
| 184 | strcat(options, ",rwpath"); | 216 | strcat(options, ",rwpath"); |
| 185 | 217 | ||
| 186 | /* try and do the mount */ | 218 | /* try and do the mount */ |
diff --git a/fs/afs/proc.c b/fs/afs/proc.c index 852739d262a9..096b23f821a1 100644 --- a/fs/afs/proc.c +++ b/fs/afs/proc.c | |||
| @@ -294,7 +294,7 @@ static ssize_t afs_proc_cells_write(struct file *file, const char __user *buf, | |||
| 294 | if (strcmp(kbuf, "add") == 0) { | 294 | if (strcmp(kbuf, "add") == 0) { |
| 295 | struct afs_cell *cell; | 295 | struct afs_cell *cell; |
| 296 | 296 | ||
| 297 | cell = afs_cell_create(name, args); | 297 | cell = afs_cell_create(name, strlen(name), args, false); |
| 298 | if (IS_ERR(cell)) { | 298 | if (IS_ERR(cell)) { |
| 299 | ret = PTR_ERR(cell); | 299 | ret = PTR_ERR(cell); |
| 300 | goto done; | 300 | goto done; |
diff --git a/fs/afs/rxrpc.c b/fs/afs/rxrpc.c index 67cf810e0fd6..654d8fdbf01f 100644 --- a/fs/afs/rxrpc.c +++ b/fs/afs/rxrpc.c | |||
| @@ -100,6 +100,7 @@ int afs_open_socket(void) | |||
| 100 | ret = kernel_bind(socket, (struct sockaddr *) &srx, sizeof(srx)); | 100 | ret = kernel_bind(socket, (struct sockaddr *) &srx, sizeof(srx)); |
| 101 | if (ret < 0) { | 101 | if (ret < 0) { |
| 102 | sock_release(socket); | 102 | sock_release(socket); |
| 103 | destroy_workqueue(afs_async_calls); | ||
| 103 | _leave(" = %d [bind]", ret); | 104 | _leave(" = %d [bind]", ret); |
| 104 | return ret; | 105 | return ret; |
| 105 | } | 106 | } |
diff --git a/fs/afs/super.c b/fs/afs/super.c index 9cf80f02da16..77e1e5a61154 100644 --- a/fs/afs/super.c +++ b/fs/afs/super.c | |||
| @@ -16,6 +16,7 @@ | |||
| 16 | 16 | ||
| 17 | #include <linux/kernel.h> | 17 | #include <linux/kernel.h> |
| 18 | #include <linux/module.h> | 18 | #include <linux/module.h> |
| 19 | #include <linux/mount.h> | ||
| 19 | #include <linux/init.h> | 20 | #include <linux/init.h> |
| 20 | #include <linux/slab.h> | 21 | #include <linux/slab.h> |
| 21 | #include <linux/smp_lock.h> | 22 | #include <linux/smp_lock.h> |
| @@ -48,6 +49,7 @@ struct file_system_type afs_fs_type = { | |||
| 48 | static const struct super_operations afs_super_ops = { | 49 | static const struct super_operations afs_super_ops = { |
| 49 | .statfs = afs_statfs, | 50 | .statfs = afs_statfs, |
| 50 | .alloc_inode = afs_alloc_inode, | 51 | .alloc_inode = afs_alloc_inode, |
| 52 | .drop_inode = afs_drop_inode, | ||
| 51 | .destroy_inode = afs_destroy_inode, | 53 | .destroy_inode = afs_destroy_inode, |
| 52 | .evict_inode = afs_evict_inode, | 54 | .evict_inode = afs_evict_inode, |
| 53 | .put_super = afs_put_super, | 55 | .put_super = afs_put_super, |
| @@ -62,12 +64,14 @@ enum { | |||
| 62 | afs_opt_cell, | 64 | afs_opt_cell, |
| 63 | afs_opt_rwpath, | 65 | afs_opt_rwpath, |
| 64 | afs_opt_vol, | 66 | afs_opt_vol, |
| 67 | afs_opt_autocell, | ||
| 65 | }; | 68 | }; |
| 66 | 69 | ||
| 67 | static const match_table_t afs_options_list = { | 70 | static const match_table_t afs_options_list = { |
| 68 | { afs_opt_cell, "cell=%s" }, | 71 | { afs_opt_cell, "cell=%s" }, |
| 69 | { afs_opt_rwpath, "rwpath" }, | 72 | { afs_opt_rwpath, "rwpath" }, |
| 70 | { afs_opt_vol, "vol=%s" }, | 73 | { afs_opt_vol, "vol=%s" }, |
| 74 | { afs_opt_autocell, "autocell" }, | ||
| 71 | { afs_no_opt, NULL }, | 75 | { afs_no_opt, NULL }, |
| 72 | }; | 76 | }; |
| 73 | 77 | ||
| @@ -151,7 +155,8 @@ static int afs_parse_options(struct afs_mount_params *params, | |||
| 151 | switch (token) { | 155 | switch (token) { |
| 152 | case afs_opt_cell: | 156 | case afs_opt_cell: |
| 153 | cell = afs_cell_lookup(args[0].from, | 157 | cell = afs_cell_lookup(args[0].from, |
| 154 | args[0].to - args[0].from); | 158 | args[0].to - args[0].from, |
| 159 | false); | ||
| 155 | if (IS_ERR(cell)) | 160 | if (IS_ERR(cell)) |
| 156 | return PTR_ERR(cell); | 161 | return PTR_ERR(cell); |
| 157 | afs_put_cell(params->cell); | 162 | afs_put_cell(params->cell); |
| @@ -166,6 +171,10 @@ static int afs_parse_options(struct afs_mount_params *params, | |||
| 166 | *devname = args[0].from; | 171 | *devname = args[0].from; |
| 167 | break; | 172 | break; |
| 168 | 173 | ||
| 174 | case afs_opt_autocell: | ||
| 175 | params->autocell = 1; | ||
| 176 | break; | ||
| 177 | |||
| 169 | default: | 178 | default: |
| 170 | printk(KERN_ERR "kAFS:" | 179 | printk(KERN_ERR "kAFS:" |
| 171 | " Unknown or invalid mount option: '%s'\n", p); | 180 | " Unknown or invalid mount option: '%s'\n", p); |
| @@ -252,10 +261,10 @@ static int afs_parse_device_name(struct afs_mount_params *params, | |||
| 252 | 261 | ||
| 253 | /* lookup the cell record */ | 262 | /* lookup the cell record */ |
| 254 | if (cellname || !params->cell) { | 263 | if (cellname || !params->cell) { |
| 255 | cell = afs_cell_lookup(cellname, cellnamesz); | 264 | cell = afs_cell_lookup(cellname, cellnamesz, true); |
| 256 | if (IS_ERR(cell)) { | 265 | if (IS_ERR(cell)) { |
| 257 | printk(KERN_ERR "kAFS: unable to lookup cell '%s'\n", | 266 | printk(KERN_ERR "kAFS: unable to lookup cell '%*.*s'\n", |
| 258 | cellname ?: ""); | 267 | cellnamesz, cellnamesz, cellname ?: ""); |
| 259 | return PTR_ERR(cell); | 268 | return PTR_ERR(cell); |
| 260 | } | 269 | } |
| 261 | afs_put_cell(params->cell); | 270 | afs_put_cell(params->cell); |
| @@ -321,6 +330,9 @@ static int afs_fill_super(struct super_block *sb, void *data) | |||
| 321 | if (IS_ERR(inode)) | 330 | if (IS_ERR(inode)) |
| 322 | goto error_inode; | 331 | goto error_inode; |
| 323 | 332 | ||
| 333 | if (params->autocell) | ||
| 334 | set_bit(AFS_VNODE_AUTOCELL, &AFS_FS_I(inode)->flags); | ||
| 335 | |||
| 324 | ret = -ENOMEM; | 336 | ret = -ENOMEM; |
| 325 | root = d_alloc_root(inode); | 337 | root = d_alloc_root(inode); |
| 326 | if (!root) | 338 | if (!root) |
diff --git a/fs/autofs4/root.c b/fs/autofs4/root.c index 48e056e70fd6..cb1bd38dc08c 100644 --- a/fs/autofs4/root.c +++ b/fs/autofs4/root.c | |||
| @@ -204,8 +204,7 @@ static int try_to_fill_dentry(struct dentry *dentry, int flags) | |||
| 204 | } | 204 | } |
| 205 | 205 | ||
| 206 | /* Initialize expiry counter after successful mount */ | 206 | /* Initialize expiry counter after successful mount */ |
| 207 | if (ino) | 207 | ino->last_used = jiffies; |
| 208 | ino->last_used = jiffies; | ||
| 209 | 208 | ||
| 210 | spin_lock(&sbi->fs_lock); | 209 | spin_lock(&sbi->fs_lock); |
| 211 | ino->flags &= ~AUTOFS_INF_PENDING; | 210 | ino->flags &= ~AUTOFS_INF_PENDING; |
diff --git a/fs/block_dev.c b/fs/block_dev.c index 66411463b734..50e8c8582faa 100644 --- a/fs/block_dev.c +++ b/fs/block_dev.c | |||
| @@ -1340,10 +1340,12 @@ static int __blkdev_get(struct block_device *bdev, fmode_t mode, int for_part) | |||
| 1340 | /* | 1340 | /* |
| 1341 | * hooks: /n/, see "layering violations". | 1341 | * hooks: /n/, see "layering violations". |
| 1342 | */ | 1342 | */ |
| 1343 | ret = devcgroup_inode_permission(bdev->bd_inode, perm); | 1343 | if (!for_part) { |
| 1344 | if (ret != 0) { | 1344 | ret = devcgroup_inode_permission(bdev->bd_inode, perm); |
| 1345 | bdput(bdev); | 1345 | if (ret != 0) { |
| 1346 | return ret; | 1346 | bdput(bdev); |
| 1347 | return ret; | ||
| 1348 | } | ||
| 1347 | } | 1349 | } |
| 1348 | 1350 | ||
| 1349 | restart: | 1351 | restart: |
diff --git a/fs/cachefiles/daemon.c b/fs/cachefiles/daemon.c index 24eb0d37241a..727caedcdd92 100644 --- a/fs/cachefiles/daemon.c +++ b/fs/cachefiles/daemon.c | |||
| @@ -552,8 +552,7 @@ static int cachefiles_daemon_tag(struct cachefiles_cache *cache, char *args) | |||
| 552 | */ | 552 | */ |
| 553 | static int cachefiles_daemon_cull(struct cachefiles_cache *cache, char *args) | 553 | static int cachefiles_daemon_cull(struct cachefiles_cache *cache, char *args) |
| 554 | { | 554 | { |
| 555 | struct fs_struct *fs; | 555 | struct path path; |
| 556 | struct dentry *dir; | ||
| 557 | const struct cred *saved_cred; | 556 | const struct cred *saved_cred; |
| 558 | int ret; | 557 | int ret; |
| 559 | 558 | ||
| @@ -573,24 +572,21 @@ static int cachefiles_daemon_cull(struct cachefiles_cache *cache, char *args) | |||
| 573 | } | 572 | } |
| 574 | 573 | ||
| 575 | /* extract the directory dentry from the cwd */ | 574 | /* extract the directory dentry from the cwd */ |
| 576 | fs = current->fs; | 575 | get_fs_pwd(current->fs, &path); |
| 577 | read_lock(&fs->lock); | ||
| 578 | dir = dget(fs->pwd.dentry); | ||
| 579 | read_unlock(&fs->lock); | ||
| 580 | 576 | ||
| 581 | if (!S_ISDIR(dir->d_inode->i_mode)) | 577 | if (!S_ISDIR(path.dentry->d_inode->i_mode)) |
| 582 | goto notdir; | 578 | goto notdir; |
| 583 | 579 | ||
| 584 | cachefiles_begin_secure(cache, &saved_cred); | 580 | cachefiles_begin_secure(cache, &saved_cred); |
| 585 | ret = cachefiles_cull(cache, dir, args); | 581 | ret = cachefiles_cull(cache, path.dentry, args); |
| 586 | cachefiles_end_secure(cache, saved_cred); | 582 | cachefiles_end_secure(cache, saved_cred); |
| 587 | 583 | ||
| 588 | dput(dir); | 584 | path_put(&path); |
| 589 | _leave(" = %d", ret); | 585 | _leave(" = %d", ret); |
| 590 | return ret; | 586 | return ret; |
| 591 | 587 | ||
| 592 | notdir: | 588 | notdir: |
| 593 | dput(dir); | 589 | path_put(&path); |
| 594 | kerror("cull command requires dirfd to be a directory"); | 590 | kerror("cull command requires dirfd to be a directory"); |
| 595 | return -ENOTDIR; | 591 | return -ENOTDIR; |
| 596 | 592 | ||
| @@ -628,8 +624,7 @@ inval: | |||
| 628 | */ | 624 | */ |
| 629 | static int cachefiles_daemon_inuse(struct cachefiles_cache *cache, char *args) | 625 | static int cachefiles_daemon_inuse(struct cachefiles_cache *cache, char *args) |
| 630 | { | 626 | { |
| 631 | struct fs_struct *fs; | 627 | struct path path; |
| 632 | struct dentry *dir; | ||
| 633 | const struct cred *saved_cred; | 628 | const struct cred *saved_cred; |
| 634 | int ret; | 629 | int ret; |
| 635 | 630 | ||
| @@ -649,24 +644,21 @@ static int cachefiles_daemon_inuse(struct cachefiles_cache *cache, char *args) | |||
| 649 | } | 644 | } |
| 650 | 645 | ||
| 651 | /* extract the directory dentry from the cwd */ | 646 | /* extract the directory dentry from the cwd */ |
| 652 | fs = current->fs; | 647 | get_fs_pwd(current->fs, &path); |
| 653 | read_lock(&fs->lock); | ||
| 654 | dir = dget(fs->pwd.dentry); | ||
| 655 | read_unlock(&fs->lock); | ||
| 656 | 648 | ||
| 657 | if (!S_ISDIR(dir->d_inode->i_mode)) | 649 | if (!S_ISDIR(path.dentry->d_inode->i_mode)) |
| 658 | goto notdir; | 650 | goto notdir; |
| 659 | 651 | ||
| 660 | cachefiles_begin_secure(cache, &saved_cred); | 652 | cachefiles_begin_secure(cache, &saved_cred); |
| 661 | ret = cachefiles_check_in_use(cache, dir, args); | 653 | ret = cachefiles_check_in_use(cache, path.dentry, args); |
| 662 | cachefiles_end_secure(cache, saved_cred); | 654 | cachefiles_end_secure(cache, saved_cred); |
| 663 | 655 | ||
| 664 | dput(dir); | 656 | path_put(&path); |
| 665 | //_leave(" = %d", ret); | 657 | //_leave(" = %d", ret); |
| 666 | return ret; | 658 | return ret; |
| 667 | 659 | ||
| 668 | notdir: | 660 | notdir: |
| 669 | dput(dir); | 661 | path_put(&path); |
| 670 | kerror("inuse command requires dirfd to be a directory"); | 662 | kerror("inuse command requires dirfd to be a directory"); |
| 671 | return -ENOTDIR; | 663 | return -ENOTDIR; |
| 672 | 664 | ||
diff --git a/fs/cachefiles/internal.h b/fs/cachefiles/internal.h index a8cd821226da..bd6bc1bde2d7 100644 --- a/fs/cachefiles/internal.h +++ b/fs/cachefiles/internal.h | |||
| @@ -267,13 +267,6 @@ do { \ | |||
| 267 | #define dbgprintk(FMT, ...) \ | 267 | #define dbgprintk(FMT, ...) \ |
| 268 | printk(KERN_DEBUG "[%-6.6s] "FMT"\n", current->comm, ##__VA_ARGS__) | 268 | printk(KERN_DEBUG "[%-6.6s] "FMT"\n", current->comm, ##__VA_ARGS__) |
| 269 | 269 | ||
| 270 | /* make sure we maintain the format strings, even when debugging is disabled */ | ||
| 271 | static inline void _dbprintk(const char *fmt, ...) | ||
| 272 | __attribute__((format(printf, 1, 2))); | ||
| 273 | static inline void _dbprintk(const char *fmt, ...) | ||
| 274 | { | ||
| 275 | } | ||
| 276 | |||
| 277 | #define kenter(FMT, ...) dbgprintk("==> %s("FMT")", __func__, ##__VA_ARGS__) | 270 | #define kenter(FMT, ...) dbgprintk("==> %s("FMT")", __func__, ##__VA_ARGS__) |
| 278 | #define kleave(FMT, ...) dbgprintk("<== %s()"FMT"", __func__, ##__VA_ARGS__) | 271 | #define kleave(FMT, ...) dbgprintk("<== %s()"FMT"", __func__, ##__VA_ARGS__) |
| 279 | #define kdebug(FMT, ...) dbgprintk(FMT, ##__VA_ARGS__) | 272 | #define kdebug(FMT, ...) dbgprintk(FMT, ##__VA_ARGS__) |
| @@ -304,9 +297,9 @@ do { \ | |||
| 304 | } while (0) | 297 | } while (0) |
| 305 | 298 | ||
| 306 | #else | 299 | #else |
| 307 | #define _enter(FMT, ...) _dbprintk("==> %s("FMT")", __func__, ##__VA_ARGS__) | 300 | #define _enter(FMT, ...) no_printk("==> %s("FMT")", __func__, ##__VA_ARGS__) |
| 308 | #define _leave(FMT, ...) _dbprintk("<== %s()"FMT"", __func__, ##__VA_ARGS__) | 301 | #define _leave(FMT, ...) no_printk("<== %s()"FMT"", __func__, ##__VA_ARGS__) |
| 309 | #define _debug(FMT, ...) _dbprintk(FMT, ##__VA_ARGS__) | 302 | #define _debug(FMT, ...) no_printk(FMT, ##__VA_ARGS__) |
| 310 | #endif | 303 | #endif |
| 311 | 304 | ||
| 312 | #if 1 /* defined(__KDEBUGALL) */ | 305 | #if 1 /* defined(__KDEBUGALL) */ |
diff --git a/fs/ceph/Makefile b/fs/ceph/Makefile index 6a660e610be8..278e1172600d 100644 --- a/fs/ceph/Makefile +++ b/fs/ceph/Makefile | |||
| @@ -6,7 +6,7 @@ ifneq ($(KERNELRELEASE),) | |||
| 6 | 6 | ||
| 7 | obj-$(CONFIG_CEPH_FS) += ceph.o | 7 | obj-$(CONFIG_CEPH_FS) += ceph.o |
| 8 | 8 | ||
| 9 | ceph-objs := super.o inode.o dir.o file.o addr.o ioctl.o \ | 9 | ceph-objs := super.o inode.o dir.o file.o locks.o addr.o ioctl.o \ |
| 10 | export.o caps.o snap.o xattr.o \ | 10 | export.o caps.o snap.o xattr.o \ |
| 11 | messenger.o msgpool.o buffer.o pagelist.o \ | 11 | messenger.o msgpool.o buffer.o pagelist.o \ |
| 12 | mds_client.o mdsmap.o \ | 12 | mds_client.o mdsmap.o \ |
diff --git a/fs/ceph/addr.c b/fs/ceph/addr.c index d9c60b84949a..5598a0d02295 100644 --- a/fs/ceph/addr.c +++ b/fs/ceph/addr.c | |||
| @@ -309,7 +309,8 @@ static int ceph_readpages(struct file *file, struct address_space *mapping, | |||
| 309 | zero_user_segment(page, s, PAGE_CACHE_SIZE); | 309 | zero_user_segment(page, s, PAGE_CACHE_SIZE); |
| 310 | } | 310 | } |
| 311 | 311 | ||
| 312 | if (add_to_page_cache_lru(page, mapping, page->index, GFP_NOFS)) { | 312 | if (add_to_page_cache_lru(page, mapping, page->index, |
| 313 | GFP_NOFS)) { | ||
| 313 | page_cache_release(page); | 314 | page_cache_release(page); |
| 314 | dout("readpages %p add_to_page_cache failed %p\n", | 315 | dout("readpages %p add_to_page_cache failed %p\n", |
| 315 | inode, page); | 316 | inode, page); |
| @@ -552,7 +553,7 @@ static void writepages_finish(struct ceph_osd_request *req, | |||
| 552 | * page truncation thread, possibly losing some data that | 553 | * page truncation thread, possibly losing some data that |
| 553 | * raced its way in | 554 | * raced its way in |
| 554 | */ | 555 | */ |
| 555 | if ((issued & CEPH_CAP_FILE_CACHE) == 0) | 556 | if ((issued & (CEPH_CAP_FILE_CACHE|CEPH_CAP_FILE_LAZYIO)) == 0) |
| 556 | generic_error_remove_page(inode->i_mapping, page); | 557 | generic_error_remove_page(inode->i_mapping, page); |
| 557 | 558 | ||
| 558 | unlock_page(page); | 559 | unlock_page(page); |
| @@ -797,9 +798,12 @@ get_more_pages: | |||
| 797 | dout("%p will write page %p idx %lu\n", | 798 | dout("%p will write page %p idx %lu\n", |
| 798 | inode, page, page->index); | 799 | inode, page, page->index); |
| 799 | 800 | ||
| 800 | writeback_stat = atomic_long_inc_return(&client->writeback_count); | 801 | writeback_stat = |
| 801 | if (writeback_stat > CONGESTION_ON_THRESH(client->mount_args->congestion_kb)) { | 802 | atomic_long_inc_return(&client->writeback_count); |
| 802 | set_bdi_congested(&client->backing_dev_info, BLK_RW_ASYNC); | 803 | if (writeback_stat > CONGESTION_ON_THRESH( |
| 804 | client->mount_args->congestion_kb)) { | ||
| 805 | set_bdi_congested(&client->backing_dev_info, | ||
| 806 | BLK_RW_ASYNC); | ||
| 803 | } | 807 | } |
| 804 | 808 | ||
| 805 | set_page_writeback(page); | 809 | set_page_writeback(page); |
| @@ -1036,7 +1040,7 @@ static int ceph_write_begin(struct file *file, struct address_space *mapping, | |||
| 1036 | *pagep = page; | 1040 | *pagep = page; |
| 1037 | 1041 | ||
| 1038 | dout("write_begin file %p inode %p page %p %d~%d\n", file, | 1042 | dout("write_begin file %p inode %p page %p %d~%d\n", file, |
| 1039 | inode, page, (int)pos, (int)len); | 1043 | inode, page, (int)pos, (int)len); |
| 1040 | 1044 | ||
| 1041 | r = ceph_update_writeable_page(file, pos, len, page); | 1045 | r = ceph_update_writeable_page(file, pos, len, page); |
| 1042 | } while (r == -EAGAIN); | 1046 | } while (r == -EAGAIN); |
diff --git a/fs/ceph/armor.c b/fs/ceph/armor.c index 67b2c030924b..eb2a666b0be7 100644 --- a/fs/ceph/armor.c +++ b/fs/ceph/armor.c | |||
| @@ -1,11 +1,15 @@ | |||
| 1 | 1 | ||
| 2 | #include <linux/errno.h> | 2 | #include <linux/errno.h> |
| 3 | 3 | ||
| 4 | int ceph_armor(char *dst, const char *src, const char *end); | ||
| 5 | int ceph_unarmor(char *dst, const char *src, const char *end); | ||
| 6 | |||
| 4 | /* | 7 | /* |
| 5 | * base64 encode/decode. | 8 | * base64 encode/decode. |
| 6 | */ | 9 | */ |
| 7 | 10 | ||
| 8 | const char *pem_key = "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789+/"; | 11 | static const char *pem_key = |
| 12 | "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789+/"; | ||
| 9 | 13 | ||
| 10 | static int encode_bits(int c) | 14 | static int encode_bits(int c) |
| 11 | { | 15 | { |
diff --git a/fs/ceph/auth.c b/fs/ceph/auth.c index 89490beaf537..6d2e30600627 100644 --- a/fs/ceph/auth.c +++ b/fs/ceph/auth.c | |||
| @@ -20,7 +20,7 @@ static u32 supported_protocols[] = { | |||
| 20 | CEPH_AUTH_CEPHX | 20 | CEPH_AUTH_CEPHX |
| 21 | }; | 21 | }; |
| 22 | 22 | ||
| 23 | int ceph_auth_init_protocol(struct ceph_auth_client *ac, int protocol) | 23 | static int ceph_auth_init_protocol(struct ceph_auth_client *ac, int protocol) |
| 24 | { | 24 | { |
| 25 | switch (protocol) { | 25 | switch (protocol) { |
| 26 | case CEPH_AUTH_NONE: | 26 | case CEPH_AUTH_NONE: |
| @@ -133,8 +133,8 @@ bad: | |||
| 133 | return -ERANGE; | 133 | return -ERANGE; |
| 134 | } | 134 | } |
| 135 | 135 | ||
| 136 | int ceph_build_auth_request(struct ceph_auth_client *ac, | 136 | static int ceph_build_auth_request(struct ceph_auth_client *ac, |
| 137 | void *msg_buf, size_t msg_len) | 137 | void *msg_buf, size_t msg_len) |
| 138 | { | 138 | { |
| 139 | struct ceph_mon_request_header *monhdr = msg_buf; | 139 | struct ceph_mon_request_header *monhdr = msg_buf; |
| 140 | void *p = monhdr + 1; | 140 | void *p = monhdr + 1; |
diff --git a/fs/ceph/auth_x.c b/fs/ceph/auth_x.c index 6d44053ecff1..582e0b2caf8a 100644 --- a/fs/ceph/auth_x.c +++ b/fs/ceph/auth_x.c | |||
| @@ -87,8 +87,8 @@ static int ceph_x_decrypt(struct ceph_crypto_key *secret, | |||
| 87 | /* | 87 | /* |
| 88 | * get existing (or insert new) ticket handler | 88 | * get existing (or insert new) ticket handler |
| 89 | */ | 89 | */ |
| 90 | struct ceph_x_ticket_handler *get_ticket_handler(struct ceph_auth_client *ac, | 90 | static struct ceph_x_ticket_handler * |
| 91 | int service) | 91 | get_ticket_handler(struct ceph_auth_client *ac, int service) |
| 92 | { | 92 | { |
| 93 | struct ceph_x_ticket_handler *th; | 93 | struct ceph_x_ticket_handler *th; |
| 94 | struct ceph_x_info *xi = ac->private; | 94 | struct ceph_x_info *xi = ac->private; |
| @@ -429,7 +429,7 @@ static int ceph_x_build_request(struct ceph_auth_client *ac, | |||
| 429 | auth->struct_v = 1; | 429 | auth->struct_v = 1; |
| 430 | auth->key = 0; | 430 | auth->key = 0; |
| 431 | for (u = (u64 *)tmp_enc; u + 1 <= (u64 *)(tmp_enc + ret); u++) | 431 | for (u = (u64 *)tmp_enc; u + 1 <= (u64 *)(tmp_enc + ret); u++) |
| 432 | auth->key ^= *u; | 432 | auth->key ^= *(__le64 *)u; |
| 433 | dout(" server_challenge %llx client_challenge %llx key %llx\n", | 433 | dout(" server_challenge %llx client_challenge %llx key %llx\n", |
| 434 | xi->server_challenge, le64_to_cpu(auth->client_challenge), | 434 | xi->server_challenge, le64_to_cpu(auth->client_challenge), |
| 435 | le64_to_cpu(auth->key)); | 435 | le64_to_cpu(auth->key)); |
diff --git a/fs/ceph/buffer.c b/fs/ceph/buffer.c index c67535d70aa6..cd39f17021de 100644 --- a/fs/ceph/buffer.c +++ b/fs/ceph/buffer.c | |||
| @@ -47,22 +47,6 @@ void ceph_buffer_release(struct kref *kref) | |||
| 47 | kfree(b); | 47 | kfree(b); |
| 48 | } | 48 | } |
| 49 | 49 | ||
| 50 | int ceph_buffer_alloc(struct ceph_buffer *b, int len, gfp_t gfp) | ||
| 51 | { | ||
| 52 | b->vec.iov_base = kmalloc(len, gfp | __GFP_NOWARN); | ||
| 53 | if (b->vec.iov_base) { | ||
| 54 | b->is_vmalloc = false; | ||
| 55 | } else { | ||
| 56 | b->vec.iov_base = __vmalloc(len, gfp, PAGE_KERNEL); | ||
| 57 | b->is_vmalloc = true; | ||
| 58 | } | ||
| 59 | if (!b->vec.iov_base) | ||
| 60 | return -ENOMEM; | ||
| 61 | b->alloc_len = len; | ||
| 62 | b->vec.iov_len = len; | ||
| 63 | return 0; | ||
| 64 | } | ||
| 65 | |||
| 66 | int ceph_decode_buffer(struct ceph_buffer **b, void **p, void *end) | 50 | int ceph_decode_buffer(struct ceph_buffer **b, void **p, void *end) |
| 67 | { | 51 | { |
| 68 | size_t len; | 52 | size_t len; |
diff --git a/fs/ceph/caps.c b/fs/ceph/caps.c index b81be9a56487..7bf182b03973 100644 --- a/fs/ceph/caps.c +++ b/fs/ceph/caps.c | |||
| @@ -113,58 +113,41 @@ const char *ceph_cap_string(int caps) | |||
| 113 | return cap_str[i]; | 113 | return cap_str[i]; |
| 114 | } | 114 | } |
| 115 | 115 | ||
| 116 | /* | 116 | void ceph_caps_init(struct ceph_mds_client *mdsc) |
| 117 | * Cap reservations | ||
| 118 | * | ||
| 119 | * Maintain a global pool of preallocated struct ceph_caps, referenced | ||
| 120 | * by struct ceph_caps_reservations. This ensures that we preallocate | ||
| 121 | * memory needed to successfully process an MDS response. (If an MDS | ||
| 122 | * sends us cap information and we fail to process it, we will have | ||
| 123 | * problems due to the client and MDS being out of sync.) | ||
| 124 | * | ||
| 125 | * Reservations are 'owned' by a ceph_cap_reservation context. | ||
| 126 | */ | ||
| 127 | static spinlock_t caps_list_lock; | ||
| 128 | static struct list_head caps_list; /* unused (reserved or unreserved) */ | ||
| 129 | static int caps_total_count; /* total caps allocated */ | ||
| 130 | static int caps_use_count; /* in use */ | ||
| 131 | static int caps_reserve_count; /* unused, reserved */ | ||
| 132 | static int caps_avail_count; /* unused, unreserved */ | ||
| 133 | static int caps_min_count; /* keep at least this many (unreserved) */ | ||
| 134 | |||
| 135 | void __init ceph_caps_init(void) | ||
| 136 | { | 117 | { |
| 137 | INIT_LIST_HEAD(&caps_list); | 118 | INIT_LIST_HEAD(&mdsc->caps_list); |
| 138 | spin_lock_init(&caps_list_lock); | 119 | spin_lock_init(&mdsc->caps_list_lock); |
| 139 | } | 120 | } |
| 140 | 121 | ||
| 141 | void ceph_caps_finalize(void) | 122 | void ceph_caps_finalize(struct ceph_mds_client *mdsc) |
| 142 | { | 123 | { |
| 143 | struct ceph_cap *cap; | 124 | struct ceph_cap *cap; |
| 144 | 125 | ||
| 145 | spin_lock(&caps_list_lock); | 126 | spin_lock(&mdsc->caps_list_lock); |
| 146 | while (!list_empty(&caps_list)) { | 127 | while (!list_empty(&mdsc->caps_list)) { |
| 147 | cap = list_first_entry(&caps_list, struct ceph_cap, caps_item); | 128 | cap = list_first_entry(&mdsc->caps_list, |
| 129 | struct ceph_cap, caps_item); | ||
| 148 | list_del(&cap->caps_item); | 130 | list_del(&cap->caps_item); |
| 149 | kmem_cache_free(ceph_cap_cachep, cap); | 131 | kmem_cache_free(ceph_cap_cachep, cap); |
| 150 | } | 132 | } |
| 151 | caps_total_count = 0; | 133 | mdsc->caps_total_count = 0; |
| 152 | caps_avail_count = 0; | 134 | mdsc->caps_avail_count = 0; |
| 153 | caps_use_count = 0; | 135 | mdsc->caps_use_count = 0; |
| 154 | caps_reserve_count = 0; | 136 | mdsc->caps_reserve_count = 0; |
| 155 | caps_min_count = 0; | 137 | mdsc->caps_min_count = 0; |
| 156 | spin_unlock(&caps_list_lock); | 138 | spin_unlock(&mdsc->caps_list_lock); |
| 157 | } | 139 | } |
| 158 | 140 | ||
| 159 | void ceph_adjust_min_caps(int delta) | 141 | void ceph_adjust_min_caps(struct ceph_mds_client *mdsc, int delta) |
| 160 | { | 142 | { |
| 161 | spin_lock(&caps_list_lock); | 143 | spin_lock(&mdsc->caps_list_lock); |
| 162 | caps_min_count += delta; | 144 | mdsc->caps_min_count += delta; |
| 163 | BUG_ON(caps_min_count < 0); | 145 | BUG_ON(mdsc->caps_min_count < 0); |
| 164 | spin_unlock(&caps_list_lock); | 146 | spin_unlock(&mdsc->caps_list_lock); |
| 165 | } | 147 | } |
| 166 | 148 | ||
| 167 | int ceph_reserve_caps(struct ceph_cap_reservation *ctx, int need) | 149 | int ceph_reserve_caps(struct ceph_mds_client *mdsc, |
| 150 | struct ceph_cap_reservation *ctx, int need) | ||
| 168 | { | 151 | { |
| 169 | int i; | 152 | int i; |
| 170 | struct ceph_cap *cap; | 153 | struct ceph_cap *cap; |
| @@ -176,16 +159,17 @@ int ceph_reserve_caps(struct ceph_cap_reservation *ctx, int need) | |||
| 176 | dout("reserve caps ctx=%p need=%d\n", ctx, need); | 159 | dout("reserve caps ctx=%p need=%d\n", ctx, need); |
| 177 | 160 | ||
| 178 | /* first reserve any caps that are already allocated */ | 161 | /* first reserve any caps that are already allocated */ |
| 179 | spin_lock(&caps_list_lock); | 162 | spin_lock(&mdsc->caps_list_lock); |
| 180 | if (caps_avail_count >= need) | 163 | if (mdsc->caps_avail_count >= need) |
| 181 | have = need; | 164 | have = need; |
| 182 | else | 165 | else |
| 183 | have = caps_avail_count; | 166 | have = mdsc->caps_avail_count; |
| 184 | caps_avail_count -= have; | 167 | mdsc->caps_avail_count -= have; |
| 185 | caps_reserve_count += have; | 168 | mdsc->caps_reserve_count += have; |
| 186 | BUG_ON(caps_total_count != caps_use_count + caps_reserve_count + | 169 | BUG_ON(mdsc->caps_total_count != mdsc->caps_use_count + |
| 187 | caps_avail_count); | 170 | mdsc->caps_reserve_count + |
| 188 | spin_unlock(&caps_list_lock); | 171 | mdsc->caps_avail_count); |
| 172 | spin_unlock(&mdsc->caps_list_lock); | ||
| 189 | 173 | ||
| 190 | for (i = have; i < need; i++) { | 174 | for (i = have; i < need; i++) { |
| 191 | cap = kmem_cache_alloc(ceph_cap_cachep, GFP_NOFS); | 175 | cap = kmem_cache_alloc(ceph_cap_cachep, GFP_NOFS); |
| @@ -198,19 +182,20 @@ int ceph_reserve_caps(struct ceph_cap_reservation *ctx, int need) | |||
| 198 | } | 182 | } |
| 199 | BUG_ON(have + alloc != need); | 183 | BUG_ON(have + alloc != need); |
| 200 | 184 | ||
| 201 | spin_lock(&caps_list_lock); | 185 | spin_lock(&mdsc->caps_list_lock); |
| 202 | caps_total_count += alloc; | 186 | mdsc->caps_total_count += alloc; |
| 203 | caps_reserve_count += alloc; | 187 | mdsc->caps_reserve_count += alloc; |
| 204 | list_splice(&newcaps, &caps_list); | 188 | list_splice(&newcaps, &mdsc->caps_list); |
| 205 | 189 | ||
| 206 | BUG_ON(caps_total_count != caps_use_count + caps_reserve_count + | 190 | BUG_ON(mdsc->caps_total_count != mdsc->caps_use_count + |
| 207 | caps_avail_count); | 191 | mdsc->caps_reserve_count + |
| 208 | spin_unlock(&caps_list_lock); | 192 | mdsc->caps_avail_count); |
| 193 | spin_unlock(&mdsc->caps_list_lock); | ||
| 209 | 194 | ||
| 210 | ctx->count = need; | 195 | ctx->count = need; |
| 211 | dout("reserve caps ctx=%p %d = %d used + %d resv + %d avail\n", | 196 | dout("reserve caps ctx=%p %d = %d used + %d resv + %d avail\n", |
| 212 | ctx, caps_total_count, caps_use_count, caps_reserve_count, | 197 | ctx, mdsc->caps_total_count, mdsc->caps_use_count, |
| 213 | caps_avail_count); | 198 | mdsc->caps_reserve_count, mdsc->caps_avail_count); |
| 214 | return 0; | 199 | return 0; |
| 215 | 200 | ||
| 216 | out_alloc_count: | 201 | out_alloc_count: |
| @@ -220,26 +205,29 @@ out_alloc_count: | |||
| 220 | return ret; | 205 | return ret; |
| 221 | } | 206 | } |
| 222 | 207 | ||
| 223 | int ceph_unreserve_caps(struct ceph_cap_reservation *ctx) | 208 | int ceph_unreserve_caps(struct ceph_mds_client *mdsc, |
| 209 | struct ceph_cap_reservation *ctx) | ||
| 224 | { | 210 | { |
| 225 | dout("unreserve caps ctx=%p count=%d\n", ctx, ctx->count); | 211 | dout("unreserve caps ctx=%p count=%d\n", ctx, ctx->count); |
| 226 | if (ctx->count) { | 212 | if (ctx->count) { |
| 227 | spin_lock(&caps_list_lock); | 213 | spin_lock(&mdsc->caps_list_lock); |
| 228 | BUG_ON(caps_reserve_count < ctx->count); | 214 | BUG_ON(mdsc->caps_reserve_count < ctx->count); |
| 229 | caps_reserve_count -= ctx->count; | 215 | mdsc->caps_reserve_count -= ctx->count; |
| 230 | caps_avail_count += ctx->count; | 216 | mdsc->caps_avail_count += ctx->count; |
| 231 | ctx->count = 0; | 217 | ctx->count = 0; |
| 232 | dout("unreserve caps %d = %d used + %d resv + %d avail\n", | 218 | dout("unreserve caps %d = %d used + %d resv + %d avail\n", |
| 233 | caps_total_count, caps_use_count, caps_reserve_count, | 219 | mdsc->caps_total_count, mdsc->caps_use_count, |
| 234 | caps_avail_count); | 220 | mdsc->caps_reserve_count, mdsc->caps_avail_count); |
| 235 | BUG_ON(caps_total_count != caps_use_count + caps_reserve_count + | 221 | BUG_ON(mdsc->caps_total_count != mdsc->caps_use_count + |
| 236 | caps_avail_count); | 222 | mdsc->caps_reserve_count + |
| 237 | spin_unlock(&caps_list_lock); | 223 | mdsc->caps_avail_count); |
| 224 | spin_unlock(&mdsc->caps_list_lock); | ||
| 238 | } | 225 | } |
| 239 | return 0; | 226 | return 0; |
| 240 | } | 227 | } |
| 241 | 228 | ||
| 242 | static struct ceph_cap *get_cap(struct ceph_cap_reservation *ctx) | 229 | static struct ceph_cap *get_cap(struct ceph_mds_client *mdsc, |
| 230 | struct ceph_cap_reservation *ctx) | ||
| 243 | { | 231 | { |
| 244 | struct ceph_cap *cap = NULL; | 232 | struct ceph_cap *cap = NULL; |
| 245 | 233 | ||
| @@ -247,71 +235,74 @@ static struct ceph_cap *get_cap(struct ceph_cap_reservation *ctx) | |||
| 247 | if (!ctx) { | 235 | if (!ctx) { |
| 248 | cap = kmem_cache_alloc(ceph_cap_cachep, GFP_NOFS); | 236 | cap = kmem_cache_alloc(ceph_cap_cachep, GFP_NOFS); |
| 249 | if (cap) { | 237 | if (cap) { |
| 250 | caps_use_count++; | 238 | mdsc->caps_use_count++; |
| 251 | caps_total_count++; | 239 | mdsc->caps_total_count++; |
| 252 | } | 240 | } |
| 253 | return cap; | 241 | return cap; |
| 254 | } | 242 | } |
| 255 | 243 | ||
| 256 | spin_lock(&caps_list_lock); | 244 | spin_lock(&mdsc->caps_list_lock); |
| 257 | dout("get_cap ctx=%p (%d) %d = %d used + %d resv + %d avail\n", | 245 | dout("get_cap ctx=%p (%d) %d = %d used + %d resv + %d avail\n", |
| 258 | ctx, ctx->count, caps_total_count, caps_use_count, | 246 | ctx, ctx->count, mdsc->caps_total_count, mdsc->caps_use_count, |
| 259 | caps_reserve_count, caps_avail_count); | 247 | mdsc->caps_reserve_count, mdsc->caps_avail_count); |
| 260 | BUG_ON(!ctx->count); | 248 | BUG_ON(!ctx->count); |
| 261 | BUG_ON(ctx->count > caps_reserve_count); | 249 | BUG_ON(ctx->count > mdsc->caps_reserve_count); |
| 262 | BUG_ON(list_empty(&caps_list)); | 250 | BUG_ON(list_empty(&mdsc->caps_list)); |
| 263 | 251 | ||
| 264 | ctx->count--; | 252 | ctx->count--; |
| 265 | caps_reserve_count--; | 253 | mdsc->caps_reserve_count--; |
| 266 | caps_use_count++; | 254 | mdsc->caps_use_count++; |
| 267 | 255 | ||
| 268 | cap = list_first_entry(&caps_list, struct ceph_cap, caps_item); | 256 | cap = list_first_entry(&mdsc->caps_list, struct ceph_cap, caps_item); |
| 269 | list_del(&cap->caps_item); | 257 | list_del(&cap->caps_item); |
| 270 | 258 | ||
| 271 | BUG_ON(caps_total_count != caps_use_count + caps_reserve_count + | 259 | BUG_ON(mdsc->caps_total_count != mdsc->caps_use_count + |
| 272 | caps_avail_count); | 260 | mdsc->caps_reserve_count + mdsc->caps_avail_count); |
| 273 | spin_unlock(&caps_list_lock); | 261 | spin_unlock(&mdsc->caps_list_lock); |
| 274 | return cap; | 262 | return cap; |
| 275 | } | 263 | } |
| 276 | 264 | ||
| 277 | void ceph_put_cap(struct ceph_cap *cap) | 265 | void ceph_put_cap(struct ceph_mds_client *mdsc, struct ceph_cap *cap) |
| 278 | { | 266 | { |
| 279 | spin_lock(&caps_list_lock); | 267 | spin_lock(&mdsc->caps_list_lock); |
| 280 | dout("put_cap %p %d = %d used + %d resv + %d avail\n", | 268 | dout("put_cap %p %d = %d used + %d resv + %d avail\n", |
| 281 | cap, caps_total_count, caps_use_count, | 269 | cap, mdsc->caps_total_count, mdsc->caps_use_count, |
| 282 | caps_reserve_count, caps_avail_count); | 270 | mdsc->caps_reserve_count, mdsc->caps_avail_count); |
| 283 | caps_use_count--; | 271 | mdsc->caps_use_count--; |
| 284 | /* | 272 | /* |
| 285 | * Keep some preallocated caps around (ceph_min_count), to | 273 | * Keep some preallocated caps around (ceph_min_count), to |
| 286 | * avoid lots of free/alloc churn. | 274 | * avoid lots of free/alloc churn. |
| 287 | */ | 275 | */ |
| 288 | if (caps_avail_count >= caps_reserve_count + caps_min_count) { | 276 | if (mdsc->caps_avail_count >= mdsc->caps_reserve_count + |
| 289 | caps_total_count--; | 277 | mdsc->caps_min_count) { |
| 278 | mdsc->caps_total_count--; | ||
| 290 | kmem_cache_free(ceph_cap_cachep, cap); | 279 | kmem_cache_free(ceph_cap_cachep, cap); |
| 291 | } else { | 280 | } else { |
| 292 | caps_avail_count++; | 281 | mdsc->caps_avail_count++; |
| 293 | list_add(&cap->caps_item, &caps_list); | 282 | list_add(&cap->caps_item, &mdsc->caps_list); |
| 294 | } | 283 | } |
| 295 | 284 | ||
| 296 | BUG_ON(caps_total_count != caps_use_count + caps_reserve_count + | 285 | BUG_ON(mdsc->caps_total_count != mdsc->caps_use_count + |
| 297 | caps_avail_count); | 286 | mdsc->caps_reserve_count + mdsc->caps_avail_count); |
| 298 | spin_unlock(&caps_list_lock); | 287 | spin_unlock(&mdsc->caps_list_lock); |
| 299 | } | 288 | } |
| 300 | 289 | ||
| 301 | void ceph_reservation_status(struct ceph_client *client, | 290 | void ceph_reservation_status(struct ceph_client *client, |
| 302 | int *total, int *avail, int *used, int *reserved, | 291 | int *total, int *avail, int *used, int *reserved, |
| 303 | int *min) | 292 | int *min) |
| 304 | { | 293 | { |
| 294 | struct ceph_mds_client *mdsc = &client->mdsc; | ||
| 295 | |||
| 305 | if (total) | 296 | if (total) |
| 306 | *total = caps_total_count; | 297 | *total = mdsc->caps_total_count; |
| 307 | if (avail) | 298 | if (avail) |
| 308 | *avail = caps_avail_count; | 299 | *avail = mdsc->caps_avail_count; |
| 309 | if (used) | 300 | if (used) |
| 310 | *used = caps_use_count; | 301 | *used = mdsc->caps_use_count; |
| 311 | if (reserved) | 302 | if (reserved) |
| 312 | *reserved = caps_reserve_count; | 303 | *reserved = mdsc->caps_reserve_count; |
| 313 | if (min) | 304 | if (min) |
| 314 | *min = caps_min_count; | 305 | *min = mdsc->caps_min_count; |
| 315 | } | 306 | } |
| 316 | 307 | ||
| 317 | /* | 308 | /* |
| @@ -336,22 +327,29 @@ static struct ceph_cap *__get_cap_for_mds(struct ceph_inode_info *ci, int mds) | |||
| 336 | return NULL; | 327 | return NULL; |
| 337 | } | 328 | } |
| 338 | 329 | ||
| 330 | struct ceph_cap *ceph_get_cap_for_mds(struct ceph_inode_info *ci, int mds) | ||
| 331 | { | ||
| 332 | struct ceph_cap *cap; | ||
| 333 | |||
| 334 | spin_lock(&ci->vfs_inode.i_lock); | ||
| 335 | cap = __get_cap_for_mds(ci, mds); | ||
| 336 | spin_unlock(&ci->vfs_inode.i_lock); | ||
| 337 | return cap; | ||
| 338 | } | ||
| 339 | |||
| 339 | /* | 340 | /* |
| 340 | * Return id of any MDS with a cap, preferably FILE_WR|WRBUFFER|EXCL, else | 341 | * Return id of any MDS with a cap, preferably FILE_WR|BUFFER|EXCL, else -1. |
| 341 | * -1. | ||
| 342 | */ | 342 | */ |
| 343 | static int __ceph_get_cap_mds(struct ceph_inode_info *ci, u32 *mseq) | 343 | static int __ceph_get_cap_mds(struct ceph_inode_info *ci) |
| 344 | { | 344 | { |
| 345 | struct ceph_cap *cap; | 345 | struct ceph_cap *cap; |
| 346 | int mds = -1; | 346 | int mds = -1; |
| 347 | struct rb_node *p; | 347 | struct rb_node *p; |
| 348 | 348 | ||
| 349 | /* prefer mds with WR|WRBUFFER|EXCL caps */ | 349 | /* prefer mds with WR|BUFFER|EXCL caps */ |
| 350 | for (p = rb_first(&ci->i_caps); p; p = rb_next(p)) { | 350 | for (p = rb_first(&ci->i_caps); p; p = rb_next(p)) { |
| 351 | cap = rb_entry(p, struct ceph_cap, ci_node); | 351 | cap = rb_entry(p, struct ceph_cap, ci_node); |
| 352 | mds = cap->mds; | 352 | mds = cap->mds; |
| 353 | if (mseq) | ||
| 354 | *mseq = cap->mseq; | ||
| 355 | if (cap->issued & (CEPH_CAP_FILE_WR | | 353 | if (cap->issued & (CEPH_CAP_FILE_WR | |
| 356 | CEPH_CAP_FILE_BUFFER | | 354 | CEPH_CAP_FILE_BUFFER | |
| 357 | CEPH_CAP_FILE_EXCL)) | 355 | CEPH_CAP_FILE_EXCL)) |
| @@ -364,7 +362,7 @@ int ceph_get_cap_mds(struct inode *inode) | |||
| 364 | { | 362 | { |
| 365 | int mds; | 363 | int mds; |
| 366 | spin_lock(&inode->i_lock); | 364 | spin_lock(&inode->i_lock); |
| 367 | mds = __ceph_get_cap_mds(ceph_inode(inode), NULL); | 365 | mds = __ceph_get_cap_mds(ceph_inode(inode)); |
| 368 | spin_unlock(&inode->i_lock); | 366 | spin_unlock(&inode->i_lock); |
| 369 | return mds; | 367 | return mds; |
| 370 | } | 368 | } |
| @@ -483,8 +481,8 @@ static void __check_cap_issue(struct ceph_inode_info *ci, struct ceph_cap *cap, | |||
| 483 | * Each time we receive FILE_CACHE anew, we increment | 481 | * Each time we receive FILE_CACHE anew, we increment |
| 484 | * i_rdcache_gen. | 482 | * i_rdcache_gen. |
| 485 | */ | 483 | */ |
| 486 | if ((issued & CEPH_CAP_FILE_CACHE) && | 484 | if ((issued & (CEPH_CAP_FILE_CACHE|CEPH_CAP_FILE_LAZYIO)) && |
| 487 | (had & CEPH_CAP_FILE_CACHE) == 0) | 485 | (had & (CEPH_CAP_FILE_CACHE|CEPH_CAP_FILE_LAZYIO)) == 0) |
| 488 | ci->i_rdcache_gen++; | 486 | ci->i_rdcache_gen++; |
| 489 | 487 | ||
| 490 | /* | 488 | /* |
| @@ -543,7 +541,7 @@ retry: | |||
| 543 | new_cap = NULL; | 541 | new_cap = NULL; |
| 544 | } else { | 542 | } else { |
| 545 | spin_unlock(&inode->i_lock); | 543 | spin_unlock(&inode->i_lock); |
| 546 | new_cap = get_cap(caps_reservation); | 544 | new_cap = get_cap(mdsc, caps_reservation); |
| 547 | if (new_cap == NULL) | 545 | if (new_cap == NULL) |
| 548 | return -ENOMEM; | 546 | return -ENOMEM; |
| 549 | goto retry; | 547 | goto retry; |
| @@ -588,6 +586,7 @@ retry: | |||
| 588 | } else { | 586 | } else { |
| 589 | pr_err("ceph_add_cap: couldn't find snap realm %llx\n", | 587 | pr_err("ceph_add_cap: couldn't find snap realm %llx\n", |
| 590 | realmino); | 588 | realmino); |
| 589 | WARN_ON(!realm); | ||
| 591 | } | 590 | } |
| 592 | } | 591 | } |
| 593 | 592 | ||
| @@ -831,7 +830,7 @@ int __ceph_caps_file_wanted(struct ceph_inode_info *ci) | |||
| 831 | { | 830 | { |
| 832 | int want = 0; | 831 | int want = 0; |
| 833 | int mode; | 832 | int mode; |
| 834 | for (mode = 0; mode < 4; mode++) | 833 | for (mode = 0; mode < CEPH_FILE_MODE_NUM; mode++) |
| 835 | if (ci->i_nr_by_mode[mode]) | 834 | if (ci->i_nr_by_mode[mode]) |
| 836 | want |= ceph_caps_for_mode(mode); | 835 | want |= ceph_caps_for_mode(mode); |
| 837 | return want; | 836 | return want; |
| @@ -901,7 +900,7 @@ void __ceph_remove_cap(struct ceph_cap *cap) | |||
| 901 | ci->i_auth_cap = NULL; | 900 | ci->i_auth_cap = NULL; |
| 902 | 901 | ||
| 903 | if (removed) | 902 | if (removed) |
| 904 | ceph_put_cap(cap); | 903 | ceph_put_cap(mdsc, cap); |
| 905 | 904 | ||
| 906 | if (!__ceph_is_any_caps(ci) && ci->i_snap_realm) { | 905 | if (!__ceph_is_any_caps(ci) && ci->i_snap_realm) { |
| 907 | struct ceph_snap_realm *realm = ci->i_snap_realm; | 906 | struct ceph_snap_realm *realm = ci->i_snap_realm; |
| @@ -1197,6 +1196,8 @@ static int __send_cap(struct ceph_mds_client *mdsc, struct ceph_cap *cap, | |||
| 1197 | */ | 1196 | */ |
| 1198 | void __ceph_flush_snaps(struct ceph_inode_info *ci, | 1197 | void __ceph_flush_snaps(struct ceph_inode_info *ci, |
| 1199 | struct ceph_mds_session **psession) | 1198 | struct ceph_mds_session **psession) |
| 1199 | __releases(ci->vfs_inode->i_lock) | ||
| 1200 | __acquires(ci->vfs_inode->i_lock) | ||
| 1200 | { | 1201 | { |
| 1201 | struct inode *inode = &ci->vfs_inode; | 1202 | struct inode *inode = &ci->vfs_inode; |
| 1202 | int mds; | 1203 | int mds; |
| @@ -1232,7 +1233,13 @@ retry: | |||
| 1232 | BUG_ON(capsnap->dirty == 0); | 1233 | BUG_ON(capsnap->dirty == 0); |
| 1233 | 1234 | ||
| 1234 | /* pick mds, take s_mutex */ | 1235 | /* pick mds, take s_mutex */ |
| 1235 | mds = __ceph_get_cap_mds(ci, &mseq); | 1236 | if (ci->i_auth_cap == NULL) { |
| 1237 | dout("no auth cap (migrating?), doing nothing\n"); | ||
| 1238 | goto out; | ||
| 1239 | } | ||
| 1240 | mds = ci->i_auth_cap->session->s_mds; | ||
| 1241 | mseq = ci->i_auth_cap->mseq; | ||
| 1242 | |||
| 1236 | if (session && session->s_mds != mds) { | 1243 | if (session && session->s_mds != mds) { |
| 1237 | dout("oops, wrong session %p mutex\n", session); | 1244 | dout("oops, wrong session %p mutex\n", session); |
| 1238 | mutex_unlock(&session->s_mutex); | 1245 | mutex_unlock(&session->s_mutex); |
| @@ -1251,8 +1258,8 @@ retry: | |||
| 1251 | } | 1258 | } |
| 1252 | /* | 1259 | /* |
| 1253 | * if session == NULL, we raced against a cap | 1260 | * if session == NULL, we raced against a cap |
| 1254 | * deletion. retry, and we'll get a better | 1261 | * deletion or migration. retry, and we'll |
| 1255 | * @mds value next time. | 1262 | * get a better @mds value next time. |
| 1256 | */ | 1263 | */ |
| 1257 | spin_lock(&inode->i_lock); | 1264 | spin_lock(&inode->i_lock); |
| 1258 | goto retry; | 1265 | goto retry; |
| @@ -1290,6 +1297,7 @@ retry: | |||
| 1290 | list_del_init(&ci->i_snap_flush_item); | 1297 | list_del_init(&ci->i_snap_flush_item); |
| 1291 | spin_unlock(&mdsc->snap_flush_lock); | 1298 | spin_unlock(&mdsc->snap_flush_lock); |
| 1292 | 1299 | ||
| 1300 | out: | ||
| 1293 | if (psession) | 1301 | if (psession) |
| 1294 | *psession = session; | 1302 | *psession = session; |
| 1295 | else if (session) { | 1303 | else if (session) { |
| @@ -1435,7 +1443,6 @@ static int try_nonblocking_invalidate(struct inode *inode) | |||
| 1435 | */ | 1443 | */ |
| 1436 | void ceph_check_caps(struct ceph_inode_info *ci, int flags, | 1444 | void ceph_check_caps(struct ceph_inode_info *ci, int flags, |
| 1437 | struct ceph_mds_session *session) | 1445 | struct ceph_mds_session *session) |
| 1438 | __releases(session->s_mutex) | ||
| 1439 | { | 1446 | { |
| 1440 | struct ceph_client *client = ceph_inode_to_client(&ci->vfs_inode); | 1447 | struct ceph_client *client = ceph_inode_to_client(&ci->vfs_inode); |
| 1441 | struct ceph_mds_client *mdsc = &client->mdsc; | 1448 | struct ceph_mds_client *mdsc = &client->mdsc; |
| @@ -1510,11 +1517,13 @@ retry_locked: | |||
| 1510 | ci->i_wrbuffer_ref == 0 && /* no dirty pages... */ | 1517 | ci->i_wrbuffer_ref == 0 && /* no dirty pages... */ |
| 1511 | ci->i_rdcache_gen && /* may have cached pages */ | 1518 | ci->i_rdcache_gen && /* may have cached pages */ |
| 1512 | (file_wanted == 0 || /* no open files */ | 1519 | (file_wanted == 0 || /* no open files */ |
| 1513 | (revoking & CEPH_CAP_FILE_CACHE)) && /* or revoking cache */ | 1520 | (revoking & (CEPH_CAP_FILE_CACHE| |
| 1521 | CEPH_CAP_FILE_LAZYIO))) && /* or revoking cache */ | ||
| 1514 | !tried_invalidate) { | 1522 | !tried_invalidate) { |
| 1515 | dout("check_caps trying to invalidate on %p\n", inode); | 1523 | dout("check_caps trying to invalidate on %p\n", inode); |
| 1516 | if (try_nonblocking_invalidate(inode) < 0) { | 1524 | if (try_nonblocking_invalidate(inode) < 0) { |
| 1517 | if (revoking & CEPH_CAP_FILE_CACHE) { | 1525 | if (revoking & (CEPH_CAP_FILE_CACHE| |
| 1526 | CEPH_CAP_FILE_LAZYIO)) { | ||
| 1518 | dout("check_caps queuing invalidate\n"); | 1527 | dout("check_caps queuing invalidate\n"); |
| 1519 | queue_invalidate = 1; | 1528 | queue_invalidate = 1; |
| 1520 | ci->i_rdcache_revoking = ci->i_rdcache_gen; | 1529 | ci->i_rdcache_revoking = ci->i_rdcache_gen; |
| @@ -2250,8 +2259,7 @@ static void handle_cap_grant(struct inode *inode, struct ceph_mds_caps *grant, | |||
| 2250 | struct ceph_mds_session *session, | 2259 | struct ceph_mds_session *session, |
| 2251 | struct ceph_cap *cap, | 2260 | struct ceph_cap *cap, |
| 2252 | struct ceph_buffer *xattr_buf) | 2261 | struct ceph_buffer *xattr_buf) |
| 2253 | __releases(inode->i_lock) | 2262 | __releases(inode->i_lock) |
| 2254 | __releases(session->s_mutex) | ||
| 2255 | { | 2263 | { |
| 2256 | struct ceph_inode_info *ci = ceph_inode(inode); | 2264 | struct ceph_inode_info *ci = ceph_inode(inode); |
| 2257 | int mds = session->s_mds; | 2265 | int mds = session->s_mds; |
| @@ -2278,6 +2286,7 @@ static void handle_cap_grant(struct inode *inode, struct ceph_mds_caps *grant, | |||
| 2278 | * will invalidate _after_ writeback.) | 2286 | * will invalidate _after_ writeback.) |
| 2279 | */ | 2287 | */ |
| 2280 | if (((cap->issued & ~newcaps) & CEPH_CAP_FILE_CACHE) && | 2288 | if (((cap->issued & ~newcaps) & CEPH_CAP_FILE_CACHE) && |
| 2289 | (newcaps & CEPH_CAP_FILE_LAZYIO) == 0 && | ||
| 2281 | !ci->i_wrbuffer_ref) { | 2290 | !ci->i_wrbuffer_ref) { |
| 2282 | if (try_nonblocking_invalidate(inode) == 0) { | 2291 | if (try_nonblocking_invalidate(inode) == 0) { |
| 2283 | revoked_rdcache = 1; | 2292 | revoked_rdcache = 1; |
| @@ -2369,15 +2378,22 @@ static void handle_cap_grant(struct inode *inode, struct ceph_mds_caps *grant, | |||
| 2369 | 2378 | ||
| 2370 | /* revocation, grant, or no-op? */ | 2379 | /* revocation, grant, or no-op? */ |
| 2371 | if (cap->issued & ~newcaps) { | 2380 | if (cap->issued & ~newcaps) { |
| 2372 | dout("revocation: %s -> %s\n", ceph_cap_string(cap->issued), | 2381 | int revoking = cap->issued & ~newcaps; |
| 2373 | ceph_cap_string(newcaps)); | 2382 | |
| 2374 | if ((used & ~newcaps) & CEPH_CAP_FILE_BUFFER) | 2383 | dout("revocation: %s -> %s (revoking %s)\n", |
| 2375 | writeback = 1; /* will delay ack */ | 2384 | ceph_cap_string(cap->issued), |
| 2376 | else if (dirty & ~newcaps) | 2385 | ceph_cap_string(newcaps), |
| 2377 | check_caps = 1; /* initiate writeback in check_caps */ | 2386 | ceph_cap_string(revoking)); |
| 2378 | else if (((used & ~newcaps) & CEPH_CAP_FILE_CACHE) == 0 || | 2387 | if (revoking & used & CEPH_CAP_FILE_BUFFER) |
| 2379 | revoked_rdcache) | 2388 | writeback = 1; /* initiate writeback; will delay ack */ |
| 2380 | check_caps = 2; /* send revoke ack in check_caps */ | 2389 | else if (revoking == CEPH_CAP_FILE_CACHE && |
| 2390 | (newcaps & CEPH_CAP_FILE_LAZYIO) == 0 && | ||
| 2391 | queue_invalidate) | ||
| 2392 | ; /* do nothing yet, invalidation will be queued */ | ||
| 2393 | else if (cap == ci->i_auth_cap) | ||
| 2394 | check_caps = 1; /* check auth cap only */ | ||
| 2395 | else | ||
| 2396 | check_caps = 2; /* check all caps */ | ||
| 2381 | cap->issued = newcaps; | 2397 | cap->issued = newcaps; |
| 2382 | cap->implemented |= newcaps; | 2398 | cap->implemented |= newcaps; |
| 2383 | } else if (cap->issued == newcaps) { | 2399 | } else if (cap->issued == newcaps) { |
| @@ -2568,7 +2584,8 @@ static void handle_cap_trunc(struct inode *inode, | |||
| 2568 | * caller holds s_mutex | 2584 | * caller holds s_mutex |
| 2569 | */ | 2585 | */ |
| 2570 | static void handle_cap_export(struct inode *inode, struct ceph_mds_caps *ex, | 2586 | static void handle_cap_export(struct inode *inode, struct ceph_mds_caps *ex, |
| 2571 | struct ceph_mds_session *session) | 2587 | struct ceph_mds_session *session, |
| 2588 | int *open_target_sessions) | ||
| 2572 | { | 2589 | { |
| 2573 | struct ceph_inode_info *ci = ceph_inode(inode); | 2590 | struct ceph_inode_info *ci = ceph_inode(inode); |
| 2574 | int mds = session->s_mds; | 2591 | int mds = session->s_mds; |
| @@ -2600,6 +2617,12 @@ static void handle_cap_export(struct inode *inode, struct ceph_mds_caps *ex, | |||
| 2600 | ci->i_cap_exporting_mds = mds; | 2617 | ci->i_cap_exporting_mds = mds; |
| 2601 | ci->i_cap_exporting_mseq = mseq; | 2618 | ci->i_cap_exporting_mseq = mseq; |
| 2602 | ci->i_cap_exporting_issued = cap->issued; | 2619 | ci->i_cap_exporting_issued = cap->issued; |
| 2620 | |||
| 2621 | /* | ||
| 2622 | * make sure we have open sessions with all possible | ||
| 2623 | * export targets, so that we get the matching IMPORT | ||
| 2624 | */ | ||
| 2625 | *open_target_sessions = 1; | ||
| 2603 | } | 2626 | } |
| 2604 | __ceph_remove_cap(cap); | 2627 | __ceph_remove_cap(cap); |
| 2605 | } | 2628 | } |
| @@ -2675,6 +2698,10 @@ void ceph_handle_caps(struct ceph_mds_session *session, | |||
| 2675 | u64 size, max_size; | 2698 | u64 size, max_size; |
| 2676 | u64 tid; | 2699 | u64 tid; |
| 2677 | void *snaptrace; | 2700 | void *snaptrace; |
| 2701 | size_t snaptrace_len; | ||
| 2702 | void *flock; | ||
| 2703 | u32 flock_len; | ||
| 2704 | int open_target_sessions = 0; | ||
| 2678 | 2705 | ||
| 2679 | dout("handle_caps from mds%d\n", mds); | 2706 | dout("handle_caps from mds%d\n", mds); |
| 2680 | 2707 | ||
| @@ -2683,7 +2710,6 @@ void ceph_handle_caps(struct ceph_mds_session *session, | |||
| 2683 | if (msg->front.iov_len < sizeof(*h)) | 2710 | if (msg->front.iov_len < sizeof(*h)) |
| 2684 | goto bad; | 2711 | goto bad; |
| 2685 | h = msg->front.iov_base; | 2712 | h = msg->front.iov_base; |
| 2686 | snaptrace = h + 1; | ||
| 2687 | op = le32_to_cpu(h->op); | 2713 | op = le32_to_cpu(h->op); |
| 2688 | vino.ino = le64_to_cpu(h->ino); | 2714 | vino.ino = le64_to_cpu(h->ino); |
| 2689 | vino.snap = CEPH_NOSNAP; | 2715 | vino.snap = CEPH_NOSNAP; |
| @@ -2693,6 +2719,21 @@ void ceph_handle_caps(struct ceph_mds_session *session, | |||
| 2693 | size = le64_to_cpu(h->size); | 2719 | size = le64_to_cpu(h->size); |
| 2694 | max_size = le64_to_cpu(h->max_size); | 2720 | max_size = le64_to_cpu(h->max_size); |
| 2695 | 2721 | ||
| 2722 | snaptrace = h + 1; | ||
| 2723 | snaptrace_len = le32_to_cpu(h->snap_trace_len); | ||
| 2724 | |||
| 2725 | if (le16_to_cpu(msg->hdr.version) >= 2) { | ||
| 2726 | void *p, *end; | ||
| 2727 | |||
| 2728 | p = snaptrace + snaptrace_len; | ||
| 2729 | end = msg->front.iov_base + msg->front.iov_len; | ||
| 2730 | ceph_decode_32_safe(&p, end, flock_len, bad); | ||
| 2731 | flock = p; | ||
| 2732 | } else { | ||
| 2733 | flock = NULL; | ||
| 2734 | flock_len = 0; | ||
| 2735 | } | ||
| 2736 | |||
| 2696 | mutex_lock(&session->s_mutex); | 2737 | mutex_lock(&session->s_mutex); |
| 2697 | session->s_seq++; | 2738 | session->s_seq++; |
| 2698 | dout(" mds%d seq %lld cap seq %u\n", session->s_mds, session->s_seq, | 2739 | dout(" mds%d seq %lld cap seq %u\n", session->s_mds, session->s_seq, |
| @@ -2714,7 +2755,7 @@ void ceph_handle_caps(struct ceph_mds_session *session, | |||
| 2714 | * along for the mds (who clearly thinks we still have this | 2755 | * along for the mds (who clearly thinks we still have this |
| 2715 | * cap). | 2756 | * cap). |
| 2716 | */ | 2757 | */ |
| 2717 | ceph_add_cap_releases(mdsc, session, -1); | 2758 | ceph_add_cap_releases(mdsc, session); |
| 2718 | ceph_send_cap_releases(mdsc, session); | 2759 | ceph_send_cap_releases(mdsc, session); |
| 2719 | goto done; | 2760 | goto done; |
| 2720 | } | 2761 | } |
| @@ -2726,12 +2767,12 @@ void ceph_handle_caps(struct ceph_mds_session *session, | |||
| 2726 | goto done; | 2767 | goto done; |
| 2727 | 2768 | ||
| 2728 | case CEPH_CAP_OP_EXPORT: | 2769 | case CEPH_CAP_OP_EXPORT: |
| 2729 | handle_cap_export(inode, h, session); | 2770 | handle_cap_export(inode, h, session, &open_target_sessions); |
| 2730 | goto done; | 2771 | goto done; |
| 2731 | 2772 | ||
| 2732 | case CEPH_CAP_OP_IMPORT: | 2773 | case CEPH_CAP_OP_IMPORT: |
| 2733 | handle_cap_import(mdsc, inode, h, session, | 2774 | handle_cap_import(mdsc, inode, h, session, |
| 2734 | snaptrace, le32_to_cpu(h->snap_trace_len)); | 2775 | snaptrace, snaptrace_len); |
| 2735 | ceph_check_caps(ceph_inode(inode), CHECK_CAPS_NODELAY, | 2776 | ceph_check_caps(ceph_inode(inode), CHECK_CAPS_NODELAY, |
| 2736 | session); | 2777 | session); |
| 2737 | goto done_unlocked; | 2778 | goto done_unlocked; |
| @@ -2773,6 +2814,8 @@ done: | |||
| 2773 | done_unlocked: | 2814 | done_unlocked: |
| 2774 | if (inode) | 2815 | if (inode) |
| 2775 | iput(inode); | 2816 | iput(inode); |
| 2817 | if (open_target_sessions) | ||
| 2818 | ceph_mdsc_open_export_target_sessions(mdsc, session); | ||
| 2776 | return; | 2819 | return; |
| 2777 | 2820 | ||
| 2778 | bad: | 2821 | bad: |
diff --git a/fs/ceph/ceph_frag.h b/fs/ceph/ceph_frag.h index 793f50cb7c22..5babb8e95352 100644 --- a/fs/ceph/ceph_frag.h +++ b/fs/ceph/ceph_frag.h | |||
| @@ -1,5 +1,5 @@ | |||
| 1 | #ifndef _FS_CEPH_FRAG_H | 1 | #ifndef FS_CEPH_FRAG_H |
| 2 | #define _FS_CEPH_FRAG_H | 2 | #define FS_CEPH_FRAG_H |
| 3 | 3 | ||
| 4 | /* | 4 | /* |
| 5 | * "Frags" are a way to describe a subset of a 32-bit number space, | 5 | * "Frags" are a way to describe a subset of a 32-bit number space, |
diff --git a/fs/ceph/ceph_fs.c b/fs/ceph/ceph_fs.c index 79d76bc4303f..3ac6cc7c1156 100644 --- a/fs/ceph/ceph_fs.c +++ b/fs/ceph/ceph_fs.c | |||
| @@ -29,46 +29,44 @@ int ceph_file_layout_is_valid(const struct ceph_file_layout *layout) | |||
| 29 | 29 | ||
| 30 | int ceph_flags_to_mode(int flags) | 30 | int ceph_flags_to_mode(int flags) |
| 31 | { | 31 | { |
| 32 | int mode; | ||
| 33 | |||
| 32 | #ifdef O_DIRECTORY /* fixme */ | 34 | #ifdef O_DIRECTORY /* fixme */ |
| 33 | if ((flags & O_DIRECTORY) == O_DIRECTORY) | 35 | if ((flags & O_DIRECTORY) == O_DIRECTORY) |
| 34 | return CEPH_FILE_MODE_PIN; | 36 | return CEPH_FILE_MODE_PIN; |
| 35 | #endif | 37 | #endif |
| 38 | if ((flags & O_APPEND) == O_APPEND) | ||
| 39 | flags |= O_WRONLY; | ||
| 40 | |||
| 41 | if ((flags & O_ACCMODE) == O_RDWR) | ||
| 42 | mode = CEPH_FILE_MODE_RDWR; | ||
| 43 | else if ((flags & O_ACCMODE) == O_WRONLY) | ||
| 44 | mode = CEPH_FILE_MODE_WR; | ||
| 45 | else | ||
| 46 | mode = CEPH_FILE_MODE_RD; | ||
| 47 | |||
| 36 | #ifdef O_LAZY | 48 | #ifdef O_LAZY |
| 37 | if (flags & O_LAZY) | 49 | if (flags & O_LAZY) |
| 38 | return CEPH_FILE_MODE_LAZY; | 50 | mode |= CEPH_FILE_MODE_LAZY; |
| 39 | #endif | 51 | #endif |
| 40 | if ((flags & O_APPEND) == O_APPEND) | ||
| 41 | flags |= O_WRONLY; | ||
| 42 | 52 | ||
| 43 | flags &= O_ACCMODE; | 53 | return mode; |
| 44 | if ((flags & O_RDWR) == O_RDWR) | ||
| 45 | return CEPH_FILE_MODE_RDWR; | ||
| 46 | if ((flags & O_WRONLY) == O_WRONLY) | ||
| 47 | return CEPH_FILE_MODE_WR; | ||
| 48 | return CEPH_FILE_MODE_RD; | ||
| 49 | } | 54 | } |
| 50 | 55 | ||
| 51 | int ceph_caps_for_mode(int mode) | 56 | int ceph_caps_for_mode(int mode) |
| 52 | { | 57 | { |
| 53 | switch (mode) { | 58 | int caps = CEPH_CAP_PIN; |
| 54 | case CEPH_FILE_MODE_PIN: | 59 | |
| 55 | return CEPH_CAP_PIN; | 60 | if (mode & CEPH_FILE_MODE_RD) |
| 56 | case CEPH_FILE_MODE_RD: | 61 | caps |= CEPH_CAP_FILE_SHARED | |
| 57 | return CEPH_CAP_PIN | CEPH_CAP_FILE_SHARED | | ||
| 58 | CEPH_CAP_FILE_RD | CEPH_CAP_FILE_CACHE; | 62 | CEPH_CAP_FILE_RD | CEPH_CAP_FILE_CACHE; |
| 59 | case CEPH_FILE_MODE_RDWR: | 63 | if (mode & CEPH_FILE_MODE_WR) |
| 60 | return CEPH_CAP_PIN | CEPH_CAP_FILE_SHARED | | 64 | caps |= CEPH_CAP_FILE_EXCL | |
| 61 | CEPH_CAP_FILE_EXCL | | ||
| 62 | CEPH_CAP_FILE_RD | CEPH_CAP_FILE_CACHE | | ||
| 63 | CEPH_CAP_FILE_WR | CEPH_CAP_FILE_BUFFER | | ||
| 64 | CEPH_CAP_AUTH_SHARED | CEPH_CAP_AUTH_EXCL | | ||
| 65 | CEPH_CAP_XATTR_SHARED | CEPH_CAP_XATTR_EXCL; | ||
| 66 | case CEPH_FILE_MODE_WR: | ||
| 67 | return CEPH_CAP_PIN | CEPH_CAP_FILE_SHARED | | ||
| 68 | CEPH_CAP_FILE_EXCL | | ||
| 69 | CEPH_CAP_FILE_WR | CEPH_CAP_FILE_BUFFER | | 65 | CEPH_CAP_FILE_WR | CEPH_CAP_FILE_BUFFER | |
| 70 | CEPH_CAP_AUTH_SHARED | CEPH_CAP_AUTH_EXCL | | 66 | CEPH_CAP_AUTH_SHARED | CEPH_CAP_AUTH_EXCL | |
| 71 | CEPH_CAP_XATTR_SHARED | CEPH_CAP_XATTR_EXCL; | 67 | CEPH_CAP_XATTR_SHARED | CEPH_CAP_XATTR_EXCL; |
| 72 | } | 68 | if (mode & CEPH_FILE_MODE_LAZY) |
| 73 | return 0; | 69 | caps |= CEPH_CAP_FILE_LAZYIO; |
| 70 | |||
| 71 | return caps; | ||
| 74 | } | 72 | } |
diff --git a/fs/ceph/ceph_fs.h b/fs/ceph/ceph_fs.h index 2fa992eaf7da..d5619ac86711 100644 --- a/fs/ceph/ceph_fs.h +++ b/fs/ceph/ceph_fs.h | |||
| @@ -9,27 +9,13 @@ | |||
| 9 | * LGPL2 | 9 | * LGPL2 |
| 10 | */ | 10 | */ |
| 11 | 11 | ||
| 12 | #ifndef _FS_CEPH_CEPH_FS_H | 12 | #ifndef CEPH_FS_H |
| 13 | #define _FS_CEPH_CEPH_FS_H | 13 | #define CEPH_FS_H |
| 14 | 14 | ||
| 15 | #include "msgr.h" | 15 | #include "msgr.h" |
| 16 | #include "rados.h" | 16 | #include "rados.h" |
| 17 | 17 | ||
| 18 | /* | 18 | /* |
| 19 | * Ceph release version | ||
| 20 | */ | ||
| 21 | #define CEPH_VERSION_MAJOR 0 | ||
| 22 | #define CEPH_VERSION_MINOR 20 | ||
| 23 | #define CEPH_VERSION_PATCH 0 | ||
| 24 | |||
| 25 | #define _CEPH_STRINGIFY(x) #x | ||
| 26 | #define CEPH_STRINGIFY(x) _CEPH_STRINGIFY(x) | ||
| 27 | #define CEPH_MAKE_VERSION(x, y, z) CEPH_STRINGIFY(x) "." CEPH_STRINGIFY(y) \ | ||
| 28 | "." CEPH_STRINGIFY(z) | ||
| 29 | #define CEPH_VERSION CEPH_MAKE_VERSION(CEPH_VERSION_MAJOR, \ | ||
| 30 | CEPH_VERSION_MINOR, CEPH_VERSION_PATCH) | ||
| 31 | |||
| 32 | /* | ||
| 33 | * subprotocol versions. when specific messages types or high-level | 19 | * subprotocol versions. when specific messages types or high-level |
| 34 | * protocols change, bump the affected components. we keep rev | 20 | * protocols change, bump the affected components. we keep rev |
| 35 | * internal cluster protocols separately from the public, | 21 | * internal cluster protocols separately from the public, |
| @@ -53,18 +39,10 @@ | |||
| 53 | /* | 39 | /* |
| 54 | * feature bits | 40 | * feature bits |
| 55 | */ | 41 | */ |
| 56 | #define CEPH_FEATURE_UID 1 | 42 | #define CEPH_FEATURE_UID (1<<0) |
| 57 | #define CEPH_FEATURE_NOSRCADDR 2 | 43 | #define CEPH_FEATURE_NOSRCADDR (1<<1) |
| 58 | #define CEPH_FEATURE_FLOCK 4 | 44 | #define CEPH_FEATURE_MONCLOCKCHECK (1<<2) |
| 59 | 45 | #define CEPH_FEATURE_FLOCK (1<<3) | |
| 60 | #define CEPH_FEATURE_SUPPORTED_MON CEPH_FEATURE_UID|CEPH_FEATURE_NOSRCADDR | ||
| 61 | #define CEPH_FEATURE_REQUIRED_MON CEPH_FEATURE_UID | ||
| 62 | #define CEPH_FEATURE_SUPPORTED_MDS CEPH_FEATURE_UID|CEPH_FEATURE_NOSRCADDR|CEPH_FEATURE_FLOCK | ||
| 63 | #define CEPH_FEATURE_REQUIRED_MDS CEPH_FEATURE_UID | ||
| 64 | #define CEPH_FEATURE_SUPPORTED_OSD CEPH_FEATURE_UID|CEPH_FEATURE_NOSRCADDR | ||
| 65 | #define CEPH_FEATURE_REQUIRED_OSD CEPH_FEATURE_UID | ||
| 66 | #define CEPH_FEATURE_SUPPORTED_CLIENT CEPH_FEATURE_NOSRCADDR | ||
| 67 | #define CEPH_FEATURE_REQUIRED_CLIENT CEPH_FEATURE_NOSRCADDR | ||
| 68 | 46 | ||
| 69 | 47 | ||
| 70 | /* | 48 | /* |
| @@ -96,6 +74,8 @@ int ceph_file_layout_is_valid(const struct ceph_file_layout *layout); | |||
| 96 | #define CEPH_CRYPTO_NONE 0x0 | 74 | #define CEPH_CRYPTO_NONE 0x0 |
| 97 | #define CEPH_CRYPTO_AES 0x1 | 75 | #define CEPH_CRYPTO_AES 0x1 |
| 98 | 76 | ||
| 77 | #define CEPH_AES_IV "cephsageyudagreg" | ||
| 78 | |||
| 99 | /* security/authentication protocols */ | 79 | /* security/authentication protocols */ |
| 100 | #define CEPH_AUTH_UNKNOWN 0x0 | 80 | #define CEPH_AUTH_UNKNOWN 0x0 |
| 101 | #define CEPH_AUTH_NONE 0x1 | 81 | #define CEPH_AUTH_NONE 0x1 |
| @@ -275,6 +255,7 @@ extern const char *ceph_mds_state_name(int s); | |||
| 275 | #define CEPH_LOCK_IDFT 512 /* dir frag tree */ | 255 | #define CEPH_LOCK_IDFT 512 /* dir frag tree */ |
| 276 | #define CEPH_LOCK_INEST 1024 /* mds internal */ | 256 | #define CEPH_LOCK_INEST 1024 /* mds internal */ |
| 277 | #define CEPH_LOCK_IXATTR 2048 | 257 | #define CEPH_LOCK_IXATTR 2048 |
| 258 | #define CEPH_LOCK_IFLOCK 4096 /* advisory file locks */ | ||
| 278 | #define CEPH_LOCK_INO 8192 /* immutable inode bits; not a lock */ | 259 | #define CEPH_LOCK_INO 8192 /* immutable inode bits; not a lock */ |
| 279 | 260 | ||
| 280 | /* client_session ops */ | 261 | /* client_session ops */ |
| @@ -316,6 +297,8 @@ enum { | |||
| 316 | CEPH_MDS_OP_RMXATTR = 0x01106, | 297 | CEPH_MDS_OP_RMXATTR = 0x01106, |
| 317 | CEPH_MDS_OP_SETLAYOUT = 0x01107, | 298 | CEPH_MDS_OP_SETLAYOUT = 0x01107, |
| 318 | CEPH_MDS_OP_SETATTR = 0x01108, | 299 | CEPH_MDS_OP_SETATTR = 0x01108, |
| 300 | CEPH_MDS_OP_SETFILELOCK= 0x01109, | ||
| 301 | CEPH_MDS_OP_GETFILELOCK= 0x00110, | ||
| 319 | 302 | ||
| 320 | CEPH_MDS_OP_MKNOD = 0x01201, | 303 | CEPH_MDS_OP_MKNOD = 0x01201, |
| 321 | CEPH_MDS_OP_LINK = 0x01202, | 304 | CEPH_MDS_OP_LINK = 0x01202, |
| @@ -386,6 +369,15 @@ union ceph_mds_request_args { | |||
| 386 | struct { | 369 | struct { |
| 387 | struct ceph_file_layout layout; | 370 | struct ceph_file_layout layout; |
| 388 | } __attribute__ ((packed)) setlayout; | 371 | } __attribute__ ((packed)) setlayout; |
| 372 | struct { | ||
| 373 | __u8 rule; /* currently fcntl or flock */ | ||
| 374 | __u8 type; /* shared, exclusive, remove*/ | ||
| 375 | __le64 pid; /* process id requesting the lock */ | ||
| 376 | __le64 pid_namespace; | ||
| 377 | __le64 start; /* initial location to lock */ | ||
| 378 | __le64 length; /* num bytes to lock from start */ | ||
| 379 | __u8 wait; /* will caller wait for lock to become available? */ | ||
| 380 | } __attribute__ ((packed)) filelock_change; | ||
| 389 | } __attribute__ ((packed)); | 381 | } __attribute__ ((packed)); |
| 390 | 382 | ||
| 391 | #define CEPH_MDS_FLAG_REPLAY 1 /* this is a replayed op */ | 383 | #define CEPH_MDS_FLAG_REPLAY 1 /* this is a replayed op */ |
| @@ -480,6 +472,23 @@ struct ceph_mds_reply_dirfrag { | |||
| 480 | __le32 dist[]; | 472 | __le32 dist[]; |
| 481 | } __attribute__ ((packed)); | 473 | } __attribute__ ((packed)); |
| 482 | 474 | ||
| 475 | #define CEPH_LOCK_FCNTL 1 | ||
| 476 | #define CEPH_LOCK_FLOCK 2 | ||
| 477 | |||
| 478 | #define CEPH_LOCK_SHARED 1 | ||
| 479 | #define CEPH_LOCK_EXCL 2 | ||
| 480 | #define CEPH_LOCK_UNLOCK 4 | ||
| 481 | |||
| 482 | struct ceph_filelock { | ||
| 483 | __le64 start;/* file offset to start lock at */ | ||
| 484 | __le64 length; /* num bytes to lock; 0 for all following start */ | ||
| 485 | __le64 client; /* which client holds the lock */ | ||
| 486 | __le64 pid; /* process id holding the lock on the client */ | ||
| 487 | __le64 pid_namespace; | ||
| 488 | __u8 type; /* shared lock, exclusive lock, or unlock */ | ||
| 489 | } __attribute__ ((packed)); | ||
| 490 | |||
| 491 | |||
| 483 | /* file access modes */ | 492 | /* file access modes */ |
| 484 | #define CEPH_FILE_MODE_PIN 0 | 493 | #define CEPH_FILE_MODE_PIN 0 |
| 485 | #define CEPH_FILE_MODE_RD 1 | 494 | #define CEPH_FILE_MODE_RD 1 |
| @@ -508,9 +517,10 @@ int ceph_flags_to_mode(int flags); | |||
| 508 | #define CEPH_CAP_SAUTH 2 | 517 | #define CEPH_CAP_SAUTH 2 |
| 509 | #define CEPH_CAP_SLINK 4 | 518 | #define CEPH_CAP_SLINK 4 |
| 510 | #define CEPH_CAP_SXATTR 6 | 519 | #define CEPH_CAP_SXATTR 6 |
| 511 | #define CEPH_CAP_SFILE 8 /* goes at the end (uses >2 cap bits) */ | 520 | #define CEPH_CAP_SFILE 8 |
| 521 | #define CEPH_CAP_SFLOCK 20 | ||
| 512 | 522 | ||
| 513 | #define CEPH_CAP_BITS 16 | 523 | #define CEPH_CAP_BITS 22 |
| 514 | 524 | ||
| 515 | /* composed values */ | 525 | /* composed values */ |
| 516 | #define CEPH_CAP_AUTH_SHARED (CEPH_CAP_GSHARED << CEPH_CAP_SAUTH) | 526 | #define CEPH_CAP_AUTH_SHARED (CEPH_CAP_GSHARED << CEPH_CAP_SAUTH) |
| @@ -528,6 +538,9 @@ int ceph_flags_to_mode(int flags); | |||
| 528 | #define CEPH_CAP_FILE_BUFFER (CEPH_CAP_GBUFFER << CEPH_CAP_SFILE) | 538 | #define CEPH_CAP_FILE_BUFFER (CEPH_CAP_GBUFFER << CEPH_CAP_SFILE) |
| 529 | #define CEPH_CAP_FILE_WREXTEND (CEPH_CAP_GWREXTEND << CEPH_CAP_SFILE) | 539 | #define CEPH_CAP_FILE_WREXTEND (CEPH_CAP_GWREXTEND << CEPH_CAP_SFILE) |
| 530 | #define CEPH_CAP_FILE_LAZYIO (CEPH_CAP_GLAZYIO << CEPH_CAP_SFILE) | 540 | #define CEPH_CAP_FILE_LAZYIO (CEPH_CAP_GLAZYIO << CEPH_CAP_SFILE) |
| 541 | #define CEPH_CAP_FLOCK_SHARED (CEPH_CAP_GSHARED << CEPH_CAP_SFLOCK) | ||
| 542 | #define CEPH_CAP_FLOCK_EXCL (CEPH_CAP_GEXCL << CEPH_CAP_SFLOCK) | ||
| 543 | |||
| 531 | 544 | ||
| 532 | /* cap masks (for getattr) */ | 545 | /* cap masks (for getattr) */ |
| 533 | #define CEPH_STAT_CAP_INODE CEPH_CAP_PIN | 546 | #define CEPH_STAT_CAP_INODE CEPH_CAP_PIN |
| @@ -563,7 +576,8 @@ int ceph_flags_to_mode(int flags); | |||
| 563 | CEPH_CAP_FILE_EXCL) | 576 | CEPH_CAP_FILE_EXCL) |
| 564 | #define CEPH_CAP_ANY_WR (CEPH_CAP_ANY_EXCL | CEPH_CAP_ANY_FILE_WR) | 577 | #define CEPH_CAP_ANY_WR (CEPH_CAP_ANY_EXCL | CEPH_CAP_ANY_FILE_WR) |
| 565 | #define CEPH_CAP_ANY (CEPH_CAP_ANY_RD | CEPH_CAP_ANY_EXCL | \ | 578 | #define CEPH_CAP_ANY (CEPH_CAP_ANY_RD | CEPH_CAP_ANY_EXCL | \ |
| 566 | CEPH_CAP_ANY_FILE_WR | CEPH_CAP_PIN) | 579 | CEPH_CAP_ANY_FILE_WR | CEPH_CAP_FILE_LAZYIO | \ |
| 580 | CEPH_CAP_PIN) | ||
| 567 | 581 | ||
| 568 | #define CEPH_CAP_LOCKS (CEPH_LOCK_IFILE | CEPH_LOCK_IAUTH | CEPH_LOCK_ILINK | \ | 582 | #define CEPH_CAP_LOCKS (CEPH_LOCK_IFILE | CEPH_LOCK_IAUTH | CEPH_LOCK_ILINK | \ |
| 569 | CEPH_LOCK_IXATTR) | 583 | CEPH_LOCK_IXATTR) |
| @@ -653,12 +667,21 @@ struct ceph_mds_cap_reconnect { | |||
| 653 | __le64 cap_id; | 667 | __le64 cap_id; |
| 654 | __le32 wanted; | 668 | __le32 wanted; |
| 655 | __le32 issued; | 669 | __le32 issued; |
| 670 | __le64 snaprealm; | ||
| 671 | __le64 pathbase; /* base ino for our path to this ino */ | ||
| 672 | __le32 flock_len; /* size of flock state blob, if any */ | ||
| 673 | } __attribute__ ((packed)); | ||
| 674 | /* followed by flock blob */ | ||
| 675 | |||
| 676 | struct ceph_mds_cap_reconnect_v1 { | ||
| 677 | __le64 cap_id; | ||
| 678 | __le32 wanted; | ||
| 679 | __le32 issued; | ||
| 656 | __le64 size; | 680 | __le64 size; |
| 657 | struct ceph_timespec mtime, atime; | 681 | struct ceph_timespec mtime, atime; |
| 658 | __le64 snaprealm; | 682 | __le64 snaprealm; |
| 659 | __le64 pathbase; /* base ino for our path to this ino */ | 683 | __le64 pathbase; /* base ino for our path to this ino */ |
| 660 | } __attribute__ ((packed)); | 684 | } __attribute__ ((packed)); |
| 661 | /* followed by encoded string */ | ||
| 662 | 685 | ||
| 663 | struct ceph_mds_snaprealm_reconnect { | 686 | struct ceph_mds_snaprealm_reconnect { |
| 664 | __le64 ino; /* snap realm base */ | 687 | __le64 ino; /* snap realm base */ |
diff --git a/fs/ceph/ceph_hash.h b/fs/ceph/ceph_hash.h index 5ac470c433c9..d099c3f90236 100644 --- a/fs/ceph/ceph_hash.h +++ b/fs/ceph/ceph_hash.h | |||
| @@ -1,5 +1,5 @@ | |||
| 1 | #ifndef _FS_CEPH_HASH_H | 1 | #ifndef FS_CEPH_HASH_H |
| 2 | #define _FS_CEPH_HASH_H | 2 | #define FS_CEPH_HASH_H |
| 3 | 3 | ||
| 4 | #define CEPH_STR_HASH_LINUX 0x1 /* linux dcache hash */ | 4 | #define CEPH_STR_HASH_LINUX 0x1 /* linux dcache hash */ |
| 5 | #define CEPH_STR_HASH_RJENKINS 0x2 /* robert jenkins' */ | 5 | #define CEPH_STR_HASH_RJENKINS 0x2 /* robert jenkins' */ |
diff --git a/fs/ceph/ceph_strings.c b/fs/ceph/ceph_strings.c index 7503aee828ce..c6179d3a26a2 100644 --- a/fs/ceph/ceph_strings.c +++ b/fs/ceph/ceph_strings.c | |||
| @@ -28,6 +28,7 @@ const char *ceph_osd_op_name(int op) | |||
| 28 | case CEPH_OSD_OP_TRUNCATE: return "truncate"; | 28 | case CEPH_OSD_OP_TRUNCATE: return "truncate"; |
| 29 | case CEPH_OSD_OP_ZERO: return "zero"; | 29 | case CEPH_OSD_OP_ZERO: return "zero"; |
| 30 | case CEPH_OSD_OP_WRITEFULL: return "writefull"; | 30 | case CEPH_OSD_OP_WRITEFULL: return "writefull"; |
| 31 | case CEPH_OSD_OP_ROLLBACK: return "rollback"; | ||
| 31 | 32 | ||
| 32 | case CEPH_OSD_OP_APPEND: return "append"; | 33 | case CEPH_OSD_OP_APPEND: return "append"; |
| 33 | case CEPH_OSD_OP_STARTSYNC: return "startsync"; | 34 | case CEPH_OSD_OP_STARTSYNC: return "startsync"; |
| @@ -129,6 +130,8 @@ const char *ceph_mds_op_name(int op) | |||
| 129 | case CEPH_MDS_OP_LSSNAP: return "lssnap"; | 130 | case CEPH_MDS_OP_LSSNAP: return "lssnap"; |
| 130 | case CEPH_MDS_OP_MKSNAP: return "mksnap"; | 131 | case CEPH_MDS_OP_MKSNAP: return "mksnap"; |
| 131 | case CEPH_MDS_OP_RMSNAP: return "rmsnap"; | 132 | case CEPH_MDS_OP_RMSNAP: return "rmsnap"; |
| 133 | case CEPH_MDS_OP_SETFILELOCK: return "setfilelock"; | ||
| 134 | case CEPH_MDS_OP_GETFILELOCK: return "getfilelock"; | ||
| 132 | } | 135 | } |
| 133 | return "???"; | 136 | return "???"; |
| 134 | } | 137 | } |
diff --git a/fs/ceph/crush/crush.h b/fs/ceph/crush/crush.h index dcd7e7523700..97e435b191f4 100644 --- a/fs/ceph/crush/crush.h +++ b/fs/ceph/crush/crush.h | |||
| @@ -1,5 +1,5 @@ | |||
| 1 | #ifndef _CRUSH_CRUSH_H | 1 | #ifndef CEPH_CRUSH_CRUSH_H |
| 2 | #define _CRUSH_CRUSH_H | 2 | #define CEPH_CRUSH_CRUSH_H |
| 3 | 3 | ||
| 4 | #include <linux/types.h> | 4 | #include <linux/types.h> |
| 5 | 5 | ||
diff --git a/fs/ceph/crush/hash.h b/fs/ceph/crush/hash.h index ff48e110e4bb..91e884230d5d 100644 --- a/fs/ceph/crush/hash.h +++ b/fs/ceph/crush/hash.h | |||
| @@ -1,5 +1,5 @@ | |||
| 1 | #ifndef _CRUSH_HASH_H | 1 | #ifndef CEPH_CRUSH_HASH_H |
| 2 | #define _CRUSH_HASH_H | 2 | #define CEPH_CRUSH_HASH_H |
| 3 | 3 | ||
| 4 | #define CRUSH_HASH_RJENKINS1 0 | 4 | #define CRUSH_HASH_RJENKINS1 0 |
| 5 | 5 | ||
diff --git a/fs/ceph/crush/mapper.h b/fs/ceph/crush/mapper.h index 98e90046fd9f..c46b99c18bb0 100644 --- a/fs/ceph/crush/mapper.h +++ b/fs/ceph/crush/mapper.h | |||
| @@ -1,5 +1,5 @@ | |||
| 1 | #ifndef _CRUSH_MAPPER_H | 1 | #ifndef CEPH_CRUSH_MAPPER_H |
| 2 | #define _CRUSH_MAPPER_H | 2 | #define CEPH_CRUSH_MAPPER_H |
| 3 | 3 | ||
| 4 | /* | 4 | /* |
| 5 | * CRUSH functions for find rules and then mapping an input to an | 5 | * CRUSH functions for find rules and then mapping an input to an |
diff --git a/fs/ceph/crypto.c b/fs/ceph/crypto.c index f704b3b62424..a3e627f63293 100644 --- a/fs/ceph/crypto.c +++ b/fs/ceph/crypto.c | |||
| @@ -75,10 +75,11 @@ static struct crypto_blkcipher *ceph_crypto_alloc_cipher(void) | |||
| 75 | return crypto_alloc_blkcipher("cbc(aes)", 0, CRYPTO_ALG_ASYNC); | 75 | return crypto_alloc_blkcipher("cbc(aes)", 0, CRYPTO_ALG_ASYNC); |
| 76 | } | 76 | } |
| 77 | 77 | ||
| 78 | const u8 *aes_iv = "cephsageyudagreg"; | 78 | static const u8 *aes_iv = (u8 *)CEPH_AES_IV; |
| 79 | 79 | ||
| 80 | int ceph_aes_encrypt(const void *key, int key_len, void *dst, size_t *dst_len, | 80 | static int ceph_aes_encrypt(const void *key, int key_len, |
| 81 | const void *src, size_t src_len) | 81 | void *dst, size_t *dst_len, |
| 82 | const void *src, size_t src_len) | ||
| 82 | { | 83 | { |
| 83 | struct scatterlist sg_in[2], sg_out[1]; | 84 | struct scatterlist sg_in[2], sg_out[1]; |
| 84 | struct crypto_blkcipher *tfm = ceph_crypto_alloc_cipher(); | 85 | struct crypto_blkcipher *tfm = ceph_crypto_alloc_cipher(); |
| @@ -126,9 +127,10 @@ int ceph_aes_encrypt(const void *key, int key_len, void *dst, size_t *dst_len, | |||
| 126 | return 0; | 127 | return 0; |
| 127 | } | 128 | } |
| 128 | 129 | ||
| 129 | int ceph_aes_encrypt2(const void *key, int key_len, void *dst, size_t *dst_len, | 130 | static int ceph_aes_encrypt2(const void *key, int key_len, void *dst, |
| 130 | const void *src1, size_t src1_len, | 131 | size_t *dst_len, |
| 131 | const void *src2, size_t src2_len) | 132 | const void *src1, size_t src1_len, |
| 133 | const void *src2, size_t src2_len) | ||
| 132 | { | 134 | { |
| 133 | struct scatterlist sg_in[3], sg_out[1]; | 135 | struct scatterlist sg_in[3], sg_out[1]; |
| 134 | struct crypto_blkcipher *tfm = ceph_crypto_alloc_cipher(); | 136 | struct crypto_blkcipher *tfm = ceph_crypto_alloc_cipher(); |
| @@ -179,8 +181,9 @@ int ceph_aes_encrypt2(const void *key, int key_len, void *dst, size_t *dst_len, | |||
| 179 | return 0; | 181 | return 0; |
| 180 | } | 182 | } |
| 181 | 183 | ||
| 182 | int ceph_aes_decrypt(const void *key, int key_len, void *dst, size_t *dst_len, | 184 | static int ceph_aes_decrypt(const void *key, int key_len, |
| 183 | const void *src, size_t src_len) | 185 | void *dst, size_t *dst_len, |
| 186 | const void *src, size_t src_len) | ||
| 184 | { | 187 | { |
| 185 | struct scatterlist sg_in[1], sg_out[2]; | 188 | struct scatterlist sg_in[1], sg_out[2]; |
| 186 | struct crypto_blkcipher *tfm = ceph_crypto_alloc_cipher(); | 189 | struct crypto_blkcipher *tfm = ceph_crypto_alloc_cipher(); |
| @@ -238,10 +241,10 @@ int ceph_aes_decrypt(const void *key, int key_len, void *dst, size_t *dst_len, | |||
| 238 | return 0; | 241 | return 0; |
| 239 | } | 242 | } |
| 240 | 243 | ||
| 241 | int ceph_aes_decrypt2(const void *key, int key_len, | 244 | static int ceph_aes_decrypt2(const void *key, int key_len, |
| 242 | void *dst1, size_t *dst1_len, | 245 | void *dst1, size_t *dst1_len, |
| 243 | void *dst2, size_t *dst2_len, | 246 | void *dst2, size_t *dst2_len, |
| 244 | const void *src, size_t src_len) | 247 | const void *src, size_t src_len) |
| 245 | { | 248 | { |
| 246 | struct scatterlist sg_in[1], sg_out[3]; | 249 | struct scatterlist sg_in[1], sg_out[3]; |
| 247 | struct crypto_blkcipher *tfm = ceph_crypto_alloc_cipher(); | 250 | struct crypto_blkcipher *tfm = ceph_crypto_alloc_cipher(); |
diff --git a/fs/ceph/crypto.h b/fs/ceph/crypto.h index 40b502e6bd89..bdf38607323c 100644 --- a/fs/ceph/crypto.h +++ b/fs/ceph/crypto.h | |||
| @@ -42,7 +42,7 @@ extern int ceph_encrypt2(struct ceph_crypto_key *secret, | |||
| 42 | const void *src2, size_t src2_len); | 42 | const void *src2, size_t src2_len); |
| 43 | 43 | ||
| 44 | /* armor.c */ | 44 | /* armor.c */ |
| 45 | extern int ceph_armor(char *dst, const void *src, const void *end); | 45 | extern int ceph_armor(char *dst, const char *src, const char *end); |
| 46 | extern int ceph_unarmor(void *dst, const char *src, const char *end); | 46 | extern int ceph_unarmor(char *dst, const char *src, const char *end); |
| 47 | 47 | ||
| 48 | #endif | 48 | #endif |
diff --git a/fs/ceph/debugfs.c b/fs/ceph/debugfs.c index f2f5332ddbba..360c4f22718d 100644 --- a/fs/ceph/debugfs.c +++ b/fs/ceph/debugfs.c | |||
| @@ -291,7 +291,7 @@ static int dentry_lru_show(struct seq_file *s, void *ptr) | |||
| 291 | return 0; | 291 | return 0; |
| 292 | } | 292 | } |
| 293 | 293 | ||
| 294 | #define DEFINE_SHOW_FUNC(name) \ | 294 | #define DEFINE_SHOW_FUNC(name) \ |
| 295 | static int name##_open(struct inode *inode, struct file *file) \ | 295 | static int name##_open(struct inode *inode, struct file *file) \ |
| 296 | { \ | 296 | { \ |
| 297 | struct seq_file *sf; \ | 297 | struct seq_file *sf; \ |
| @@ -361,8 +361,8 @@ int ceph_debugfs_client_init(struct ceph_client *client) | |||
| 361 | int ret = 0; | 361 | int ret = 0; |
| 362 | char name[80]; | 362 | char name[80]; |
| 363 | 363 | ||
| 364 | snprintf(name, sizeof(name), FSID_FORMAT ".client%lld", | 364 | snprintf(name, sizeof(name), "%pU.client%lld", &client->fsid, |
| 365 | PR_FSID(&client->fsid), client->monc.auth->global_id); | 365 | client->monc.auth->global_id); |
| 366 | 366 | ||
| 367 | client->debugfs_dir = debugfs_create_dir(name, ceph_debugfs_dir); | 367 | client->debugfs_dir = debugfs_create_dir(name, ceph_debugfs_dir); |
| 368 | if (!client->debugfs_dir) | 368 | if (!client->debugfs_dir) |
| @@ -432,11 +432,12 @@ int ceph_debugfs_client_init(struct ceph_client *client) | |||
| 432 | if (!client->debugfs_caps) | 432 | if (!client->debugfs_caps) |
| 433 | goto out; | 433 | goto out; |
| 434 | 434 | ||
| 435 | client->debugfs_congestion_kb = debugfs_create_file("writeback_congestion_kb", | 435 | client->debugfs_congestion_kb = |
| 436 | 0600, | 436 | debugfs_create_file("writeback_congestion_kb", |
| 437 | client->debugfs_dir, | 437 | 0600, |
| 438 | client, | 438 | client->debugfs_dir, |
| 439 | &congestion_kb_fops); | 439 | client, |
| 440 | &congestion_kb_fops); | ||
| 440 | if (!client->debugfs_congestion_kb) | 441 | if (!client->debugfs_congestion_kb) |
| 441 | goto out; | 442 | goto out; |
| 442 | 443 | ||
| @@ -466,7 +467,7 @@ void ceph_debugfs_client_cleanup(struct ceph_client *client) | |||
| 466 | debugfs_remove(client->debugfs_dir); | 467 | debugfs_remove(client->debugfs_dir); |
| 467 | } | 468 | } |
| 468 | 469 | ||
| 469 | #else // CONFIG_DEBUG_FS | 470 | #else /* CONFIG_DEBUG_FS */ |
| 470 | 471 | ||
| 471 | int __init ceph_debugfs_init(void) | 472 | int __init ceph_debugfs_init(void) |
| 472 | { | 473 | { |
| @@ -486,4 +487,4 @@ void ceph_debugfs_client_cleanup(struct ceph_client *client) | |||
| 486 | { | 487 | { |
| 487 | } | 488 | } |
| 488 | 489 | ||
| 489 | #endif // CONFIG_DEBUG_FS | 490 | #endif /* CONFIG_DEBUG_FS */ |
diff --git a/fs/ceph/decode.h b/fs/ceph/decode.h index 65b3e022eaf5..3d25415afe63 100644 --- a/fs/ceph/decode.h +++ b/fs/ceph/decode.h | |||
| @@ -99,11 +99,13 @@ static inline void ceph_encode_timespec(struct ceph_timespec *tv, | |||
| 99 | */ | 99 | */ |
| 100 | static inline void ceph_encode_addr(struct ceph_entity_addr *a) | 100 | static inline void ceph_encode_addr(struct ceph_entity_addr *a) |
| 101 | { | 101 | { |
| 102 | a->in_addr.ss_family = htons(a->in_addr.ss_family); | 102 | __be16 ss_family = htons(a->in_addr.ss_family); |
| 103 | a->in_addr.ss_family = *(__u16 *)&ss_family; | ||
| 103 | } | 104 | } |
| 104 | static inline void ceph_decode_addr(struct ceph_entity_addr *a) | 105 | static inline void ceph_decode_addr(struct ceph_entity_addr *a) |
| 105 | { | 106 | { |
| 106 | a->in_addr.ss_family = ntohs(a->in_addr.ss_family); | 107 | __be16 ss_family = *(__be16 *)&a->in_addr.ss_family; |
| 108 | a->in_addr.ss_family = ntohs(ss_family); | ||
| 107 | WARN_ON(a->in_addr.ss_family == 512); | 109 | WARN_ON(a->in_addr.ss_family == 512); |
| 108 | } | 110 | } |
| 109 | 111 | ||
diff --git a/fs/ceph/dir.c b/fs/ceph/dir.c index f94ed3c7f6a5..67bbb41d5526 100644 --- a/fs/ceph/dir.c +++ b/fs/ceph/dir.c | |||
| @@ -27,7 +27,7 @@ | |||
| 27 | 27 | ||
| 28 | const struct inode_operations ceph_dir_iops; | 28 | const struct inode_operations ceph_dir_iops; |
| 29 | const struct file_operations ceph_dir_fops; | 29 | const struct file_operations ceph_dir_fops; |
| 30 | struct dentry_operations ceph_dentry_ops; | 30 | const struct dentry_operations ceph_dentry_ops; |
| 31 | 31 | ||
| 32 | /* | 32 | /* |
| 33 | * Initialize ceph dentry state. | 33 | * Initialize ceph dentry state. |
| @@ -94,6 +94,8 @@ static unsigned fpos_off(loff_t p) | |||
| 94 | */ | 94 | */ |
| 95 | static int __dcache_readdir(struct file *filp, | 95 | static int __dcache_readdir(struct file *filp, |
| 96 | void *dirent, filldir_t filldir) | 96 | void *dirent, filldir_t filldir) |
| 97 | __releases(inode->i_lock) | ||
| 98 | __acquires(inode->i_lock) | ||
| 97 | { | 99 | { |
| 98 | struct inode *inode = filp->f_dentry->d_inode; | 100 | struct inode *inode = filp->f_dentry->d_inode; |
| 99 | struct ceph_file_info *fi = filp->private_data; | 101 | struct ceph_file_info *fi = filp->private_data; |
| @@ -1239,16 +1241,16 @@ const struct inode_operations ceph_dir_iops = { | |||
| 1239 | .create = ceph_create, | 1241 | .create = ceph_create, |
| 1240 | }; | 1242 | }; |
| 1241 | 1243 | ||
| 1242 | struct dentry_operations ceph_dentry_ops = { | 1244 | const struct dentry_operations ceph_dentry_ops = { |
| 1243 | .d_revalidate = ceph_d_revalidate, | 1245 | .d_revalidate = ceph_d_revalidate, |
| 1244 | .d_release = ceph_dentry_release, | 1246 | .d_release = ceph_dentry_release, |
| 1245 | }; | 1247 | }; |
| 1246 | 1248 | ||
| 1247 | struct dentry_operations ceph_snapdir_dentry_ops = { | 1249 | const struct dentry_operations ceph_snapdir_dentry_ops = { |
| 1248 | .d_revalidate = ceph_snapdir_d_revalidate, | 1250 | .d_revalidate = ceph_snapdir_d_revalidate, |
| 1249 | .d_release = ceph_dentry_release, | 1251 | .d_release = ceph_dentry_release, |
| 1250 | }; | 1252 | }; |
| 1251 | 1253 | ||
| 1252 | struct dentry_operations ceph_snap_dentry_ops = { | 1254 | const struct dentry_operations ceph_snap_dentry_ops = { |
| 1253 | .d_release = ceph_dentry_release, | 1255 | .d_release = ceph_dentry_release, |
| 1254 | }; | 1256 | }; |
diff --git a/fs/ceph/file.c b/fs/ceph/file.c index 7c08698fad3e..8c044a4f0457 100644 --- a/fs/ceph/file.c +++ b/fs/ceph/file.c | |||
| @@ -317,7 +317,7 @@ void ceph_release_page_vector(struct page **pages, int num_pages) | |||
| 317 | /* | 317 | /* |
| 318 | * allocate a vector new pages | 318 | * allocate a vector new pages |
| 319 | */ | 319 | */ |
| 320 | struct page **ceph_alloc_page_vector(int num_pages, gfp_t flags) | 320 | static struct page **ceph_alloc_page_vector(int num_pages, gfp_t flags) |
| 321 | { | 321 | { |
| 322 | struct page **pages; | 322 | struct page **pages; |
| 323 | int i; | 323 | int i; |
| @@ -665,7 +665,7 @@ more: | |||
| 665 | * throw out any page cache pages in this range. this | 665 | * throw out any page cache pages in this range. this |
| 666 | * may block. | 666 | * may block. |
| 667 | */ | 667 | */ |
| 668 | truncate_inode_pages_range(inode->i_mapping, pos, | 668 | truncate_inode_pages_range(inode->i_mapping, pos, |
| 669 | (pos+len) | (PAGE_CACHE_SIZE-1)); | 669 | (pos+len) | (PAGE_CACHE_SIZE-1)); |
| 670 | } else { | 670 | } else { |
| 671 | pages = ceph_alloc_page_vector(num_pages, GFP_NOFS); | 671 | pages = ceph_alloc_page_vector(num_pages, GFP_NOFS); |
| @@ -740,28 +740,32 @@ static ssize_t ceph_aio_read(struct kiocb *iocb, const struct iovec *iov, | |||
| 740 | unsigned long nr_segs, loff_t pos) | 740 | unsigned long nr_segs, loff_t pos) |
| 741 | { | 741 | { |
| 742 | struct file *filp = iocb->ki_filp; | 742 | struct file *filp = iocb->ki_filp; |
| 743 | struct ceph_file_info *fi = filp->private_data; | ||
| 743 | loff_t *ppos = &iocb->ki_pos; | 744 | loff_t *ppos = &iocb->ki_pos; |
| 744 | size_t len = iov->iov_len; | 745 | size_t len = iov->iov_len; |
| 745 | struct inode *inode = filp->f_dentry->d_inode; | 746 | struct inode *inode = filp->f_dentry->d_inode; |
| 746 | struct ceph_inode_info *ci = ceph_inode(inode); | 747 | struct ceph_inode_info *ci = ceph_inode(inode); |
| 747 | void *base = iov->iov_base; | 748 | void __user *base = iov->iov_base; |
| 748 | ssize_t ret; | 749 | ssize_t ret; |
| 749 | int got = 0; | 750 | int want, got = 0; |
| 750 | int checkeof = 0, read = 0; | 751 | int checkeof = 0, read = 0; |
| 751 | 752 | ||
| 752 | dout("aio_read %p %llx.%llx %llu~%u trying to get caps on %p\n", | 753 | dout("aio_read %p %llx.%llx %llu~%u trying to get caps on %p\n", |
| 753 | inode, ceph_vinop(inode), pos, (unsigned)len, inode); | 754 | inode, ceph_vinop(inode), pos, (unsigned)len, inode); |
| 754 | again: | 755 | again: |
| 755 | __ceph_do_pending_vmtruncate(inode); | 756 | __ceph_do_pending_vmtruncate(inode); |
| 756 | ret = ceph_get_caps(ci, CEPH_CAP_FILE_RD, CEPH_CAP_FILE_CACHE, | 757 | if (fi->fmode & CEPH_FILE_MODE_LAZY) |
| 757 | &got, -1); | 758 | want = CEPH_CAP_FILE_CACHE | CEPH_CAP_FILE_LAZYIO; |
| 759 | else | ||
| 760 | want = CEPH_CAP_FILE_CACHE; | ||
| 761 | ret = ceph_get_caps(ci, CEPH_CAP_FILE_RD, want, &got, -1); | ||
| 758 | if (ret < 0) | 762 | if (ret < 0) |
| 759 | goto out; | 763 | goto out; |
| 760 | dout("aio_read %p %llx.%llx %llu~%u got cap refs on %s\n", | 764 | dout("aio_read %p %llx.%llx %llu~%u got cap refs on %s\n", |
| 761 | inode, ceph_vinop(inode), pos, (unsigned)len, | 765 | inode, ceph_vinop(inode), pos, (unsigned)len, |
| 762 | ceph_cap_string(got)); | 766 | ceph_cap_string(got)); |
| 763 | 767 | ||
| 764 | if ((got & CEPH_CAP_FILE_CACHE) == 0 || | 768 | if ((got & (CEPH_CAP_FILE_CACHE|CEPH_CAP_FILE_LAZYIO)) == 0 || |
| 765 | (iocb->ki_filp->f_flags & O_DIRECT) || | 769 | (iocb->ki_filp->f_flags & O_DIRECT) || |
| 766 | (inode->i_sb->s_flags & MS_SYNCHRONOUS)) | 770 | (inode->i_sb->s_flags & MS_SYNCHRONOUS)) |
| 767 | /* hmm, this isn't really async... */ | 771 | /* hmm, this isn't really async... */ |
| @@ -807,11 +811,12 @@ static ssize_t ceph_aio_write(struct kiocb *iocb, const struct iovec *iov, | |||
| 807 | unsigned long nr_segs, loff_t pos) | 811 | unsigned long nr_segs, loff_t pos) |
| 808 | { | 812 | { |
| 809 | struct file *file = iocb->ki_filp; | 813 | struct file *file = iocb->ki_filp; |
| 814 | struct ceph_file_info *fi = file->private_data; | ||
| 810 | struct inode *inode = file->f_dentry->d_inode; | 815 | struct inode *inode = file->f_dentry->d_inode; |
| 811 | struct ceph_inode_info *ci = ceph_inode(inode); | 816 | struct ceph_inode_info *ci = ceph_inode(inode); |
| 812 | struct ceph_osd_client *osdc = &ceph_sb_to_client(inode->i_sb)->osdc; | 817 | struct ceph_osd_client *osdc = &ceph_sb_to_client(inode->i_sb)->osdc; |
| 813 | loff_t endoff = pos + iov->iov_len; | 818 | loff_t endoff = pos + iov->iov_len; |
| 814 | int got = 0; | 819 | int want, got = 0; |
| 815 | int ret, err; | 820 | int ret, err; |
| 816 | 821 | ||
| 817 | if (ceph_snap(inode) != CEPH_NOSNAP) | 822 | if (ceph_snap(inode) != CEPH_NOSNAP) |
| @@ -824,8 +829,11 @@ retry_snap: | |||
| 824 | dout("aio_write %p %llx.%llx %llu~%u getting caps. i_size %llu\n", | 829 | dout("aio_write %p %llx.%llx %llu~%u getting caps. i_size %llu\n", |
| 825 | inode, ceph_vinop(inode), pos, (unsigned)iov->iov_len, | 830 | inode, ceph_vinop(inode), pos, (unsigned)iov->iov_len, |
| 826 | inode->i_size); | 831 | inode->i_size); |
| 827 | ret = ceph_get_caps(ci, CEPH_CAP_FILE_WR, CEPH_CAP_FILE_BUFFER, | 832 | if (fi->fmode & CEPH_FILE_MODE_LAZY) |
| 828 | &got, endoff); | 833 | want = CEPH_CAP_FILE_BUFFER | CEPH_CAP_FILE_LAZYIO; |
| 834 | else | ||
| 835 | want = CEPH_CAP_FILE_BUFFER; | ||
| 836 | ret = ceph_get_caps(ci, CEPH_CAP_FILE_WR, want, &got, endoff); | ||
| 829 | if (ret < 0) | 837 | if (ret < 0) |
| 830 | goto out; | 838 | goto out; |
| 831 | 839 | ||
| @@ -833,7 +841,7 @@ retry_snap: | |||
| 833 | inode, ceph_vinop(inode), pos, (unsigned)iov->iov_len, | 841 | inode, ceph_vinop(inode), pos, (unsigned)iov->iov_len, |
| 834 | ceph_cap_string(got)); | 842 | ceph_cap_string(got)); |
| 835 | 843 | ||
| 836 | if ((got & CEPH_CAP_FILE_BUFFER) == 0 || | 844 | if ((got & (CEPH_CAP_FILE_BUFFER|CEPH_CAP_FILE_LAZYIO)) == 0 || |
| 837 | (iocb->ki_filp->f_flags & O_DIRECT) || | 845 | (iocb->ki_filp->f_flags & O_DIRECT) || |
| 838 | (inode->i_sb->s_flags & MS_SYNCHRONOUS)) { | 846 | (inode->i_sb->s_flags & MS_SYNCHRONOUS)) { |
| 839 | ret = ceph_sync_write(file, iov->iov_base, iov->iov_len, | 847 | ret = ceph_sync_write(file, iov->iov_base, iov->iov_len, |
| @@ -930,6 +938,8 @@ const struct file_operations ceph_file_fops = { | |||
| 930 | .aio_write = ceph_aio_write, | 938 | .aio_write = ceph_aio_write, |
| 931 | .mmap = ceph_mmap, | 939 | .mmap = ceph_mmap, |
| 932 | .fsync = ceph_fsync, | 940 | .fsync = ceph_fsync, |
| 941 | .lock = ceph_lock, | ||
| 942 | .flock = ceph_flock, | ||
| 933 | .splice_read = generic_file_splice_read, | 943 | .splice_read = generic_file_splice_read, |
| 934 | .splice_write = generic_file_splice_write, | 944 | .splice_write = generic_file_splice_write, |
| 935 | .unlocked_ioctl = ceph_ioctl, | 945 | .unlocked_ioctl = ceph_ioctl, |
diff --git a/fs/ceph/inode.c b/fs/ceph/inode.c index 389f9dbd9949..5d893d31e399 100644 --- a/fs/ceph/inode.c +++ b/fs/ceph/inode.c | |||
| @@ -442,8 +442,9 @@ int ceph_fill_file_size(struct inode *inode, int issued, | |||
| 442 | * the file is either opened or mmaped | 442 | * the file is either opened or mmaped |
| 443 | */ | 443 | */ |
| 444 | if ((issued & (CEPH_CAP_FILE_CACHE|CEPH_CAP_FILE_RD| | 444 | if ((issued & (CEPH_CAP_FILE_CACHE|CEPH_CAP_FILE_RD| |
| 445 | CEPH_CAP_FILE_WR|CEPH_CAP_FILE_BUFFER| | 445 | CEPH_CAP_FILE_WR|CEPH_CAP_FILE_BUFFER| |
| 446 | CEPH_CAP_FILE_EXCL)) || | 446 | CEPH_CAP_FILE_EXCL| |
| 447 | CEPH_CAP_FILE_LAZYIO)) || | ||
| 447 | mapping_mapped(inode->i_mapping) || | 448 | mapping_mapped(inode->i_mapping) || |
| 448 | __ceph_caps_file_wanted(ci)) { | 449 | __ceph_caps_file_wanted(ci)) { |
| 449 | ci->i_truncate_pending++; | 450 | ci->i_truncate_pending++; |
diff --git a/fs/ceph/ioctl.c b/fs/ceph/ioctl.c index d085f07756b4..76e307d2aba1 100644 --- a/fs/ceph/ioctl.c +++ b/fs/ceph/ioctl.c | |||
| @@ -143,6 +143,27 @@ static long ceph_ioctl_get_dataloc(struct file *file, void __user *arg) | |||
| 143 | return 0; | 143 | return 0; |
| 144 | } | 144 | } |
| 145 | 145 | ||
| 146 | static long ceph_ioctl_lazyio(struct file *file) | ||
| 147 | { | ||
| 148 | struct ceph_file_info *fi = file->private_data; | ||
| 149 | struct inode *inode = file->f_dentry->d_inode; | ||
| 150 | struct ceph_inode_info *ci = ceph_inode(inode); | ||
| 151 | |||
| 152 | if ((fi->fmode & CEPH_FILE_MODE_LAZY) == 0) { | ||
| 153 | spin_lock(&inode->i_lock); | ||
| 154 | ci->i_nr_by_mode[fi->fmode]--; | ||
| 155 | fi->fmode |= CEPH_FILE_MODE_LAZY; | ||
| 156 | ci->i_nr_by_mode[fi->fmode]++; | ||
| 157 | spin_unlock(&inode->i_lock); | ||
| 158 | dout("ioctl_layzio: file %p marked lazy\n", file); | ||
| 159 | |||
| 160 | ceph_check_caps(ci, 0, NULL); | ||
| 161 | } else { | ||
| 162 | dout("ioctl_layzio: file %p already lazy\n", file); | ||
| 163 | } | ||
| 164 | return 0; | ||
| 165 | } | ||
| 166 | |||
| 146 | long ceph_ioctl(struct file *file, unsigned int cmd, unsigned long arg) | 167 | long ceph_ioctl(struct file *file, unsigned int cmd, unsigned long arg) |
| 147 | { | 168 | { |
| 148 | dout("ioctl file %p cmd %u arg %lu\n", file, cmd, arg); | 169 | dout("ioctl file %p cmd %u arg %lu\n", file, cmd, arg); |
| @@ -155,6 +176,9 @@ long ceph_ioctl(struct file *file, unsigned int cmd, unsigned long arg) | |||
| 155 | 176 | ||
| 156 | case CEPH_IOC_GET_DATALOC: | 177 | case CEPH_IOC_GET_DATALOC: |
| 157 | return ceph_ioctl_get_dataloc(file, (void __user *)arg); | 178 | return ceph_ioctl_get_dataloc(file, (void __user *)arg); |
| 179 | |||
| 180 | case CEPH_IOC_LAZYIO: | ||
| 181 | return ceph_ioctl_lazyio(file); | ||
| 158 | } | 182 | } |
| 159 | return -ENOTTY; | 183 | return -ENOTTY; |
| 160 | } | 184 | } |
diff --git a/fs/ceph/ioctl.h b/fs/ceph/ioctl.h index 25e4f1a9d059..88451a3b6857 100644 --- a/fs/ceph/ioctl.h +++ b/fs/ceph/ioctl.h | |||
| @@ -37,4 +37,6 @@ struct ceph_ioctl_dataloc { | |||
| 37 | #define CEPH_IOC_GET_DATALOC _IOWR(CEPH_IOCTL_MAGIC, 3, \ | 37 | #define CEPH_IOC_GET_DATALOC _IOWR(CEPH_IOCTL_MAGIC, 3, \ |
| 38 | struct ceph_ioctl_dataloc) | 38 | struct ceph_ioctl_dataloc) |
| 39 | 39 | ||
| 40 | #define CEPH_IOC_LAZYIO _IO(CEPH_IOCTL_MAGIC, 4) | ||
| 41 | |||
| 40 | #endif | 42 | #endif |
diff --git a/fs/ceph/locks.c b/fs/ceph/locks.c new file mode 100644 index 000000000000..ae85af06454f --- /dev/null +++ b/fs/ceph/locks.c | |||
| @@ -0,0 +1,256 @@ | |||
| 1 | #include "ceph_debug.h" | ||
| 2 | |||
| 3 | #include <linux/file.h> | ||
| 4 | #include <linux/namei.h> | ||
| 5 | |||
| 6 | #include "super.h" | ||
| 7 | #include "mds_client.h" | ||
| 8 | #include "pagelist.h" | ||
| 9 | |||
| 10 | /** | ||
| 11 | * Implement fcntl and flock locking functions. | ||
| 12 | */ | ||
| 13 | static int ceph_lock_message(u8 lock_type, u16 operation, struct file *file, | ||
| 14 | u64 pid, u64 pid_ns, | ||
| 15 | int cmd, u64 start, u64 length, u8 wait) | ||
| 16 | { | ||
| 17 | struct inode *inode = file->f_dentry->d_inode; | ||
| 18 | struct ceph_mds_client *mdsc = | ||
| 19 | &ceph_sb_to_client(inode->i_sb)->mdsc; | ||
| 20 | struct ceph_mds_request *req; | ||
| 21 | int err; | ||
| 22 | |||
| 23 | req = ceph_mdsc_create_request(mdsc, operation, USE_AUTH_MDS); | ||
| 24 | if (IS_ERR(req)) | ||
| 25 | return PTR_ERR(req); | ||
| 26 | req->r_inode = igrab(inode); | ||
| 27 | |||
| 28 | dout("ceph_lock_message: rule: %d, op: %d, pid: %llu, start: %llu, " | ||
| 29 | "length: %llu, wait: %d, type`: %d", (int)lock_type, | ||
| 30 | (int)operation, pid, start, length, wait, cmd); | ||
| 31 | |||
| 32 | req->r_args.filelock_change.rule = lock_type; | ||
| 33 | req->r_args.filelock_change.type = cmd; | ||
| 34 | req->r_args.filelock_change.pid = cpu_to_le64(pid); | ||
| 35 | /* This should be adjusted, but I'm not sure if | ||
| 36 | namespaces actually get id numbers*/ | ||
| 37 | req->r_args.filelock_change.pid_namespace = | ||
| 38 | cpu_to_le64((u64)pid_ns); | ||
| 39 | req->r_args.filelock_change.start = cpu_to_le64(start); | ||
| 40 | req->r_args.filelock_change.length = cpu_to_le64(length); | ||
| 41 | req->r_args.filelock_change.wait = wait; | ||
| 42 | |||
| 43 | err = ceph_mdsc_do_request(mdsc, inode, req); | ||
| 44 | ceph_mdsc_put_request(req); | ||
| 45 | dout("ceph_lock_message: rule: %d, op: %d, pid: %llu, start: %llu, " | ||
| 46 | "length: %llu, wait: %d, type`: %d err code %d", (int)lock_type, | ||
| 47 | (int)operation, pid, start, length, wait, cmd, err); | ||
| 48 | return err; | ||
| 49 | } | ||
| 50 | |||
| 51 | /** | ||
| 52 | * Attempt to set an fcntl lock. | ||
| 53 | * For now, this just goes away to the server. Later it may be more awesome. | ||
| 54 | */ | ||
| 55 | int ceph_lock(struct file *file, int cmd, struct file_lock *fl) | ||
| 56 | { | ||
| 57 | u64 length; | ||
| 58 | u8 lock_cmd; | ||
| 59 | int err; | ||
| 60 | u8 wait = 0; | ||
| 61 | u16 op = CEPH_MDS_OP_SETFILELOCK; | ||
| 62 | |||
| 63 | fl->fl_nspid = get_pid(task_tgid(current)); | ||
| 64 | dout("ceph_lock, fl_pid:%d", fl->fl_pid); | ||
| 65 | |||
| 66 | /* set wait bit as appropriate, then make command as Ceph expects it*/ | ||
| 67 | if (F_SETLKW == cmd) | ||
| 68 | wait = 1; | ||
| 69 | if (F_GETLK == cmd) | ||
| 70 | op = CEPH_MDS_OP_GETFILELOCK; | ||
| 71 | |||
| 72 | if (F_RDLCK == fl->fl_type) | ||
| 73 | lock_cmd = CEPH_LOCK_SHARED; | ||
| 74 | else if (F_WRLCK == fl->fl_type) | ||
| 75 | lock_cmd = CEPH_LOCK_EXCL; | ||
| 76 | else | ||
| 77 | lock_cmd = CEPH_LOCK_UNLOCK; | ||
| 78 | |||
| 79 | if (LLONG_MAX == fl->fl_end) | ||
| 80 | length = 0; | ||
| 81 | else | ||
| 82 | length = fl->fl_end - fl->fl_start + 1; | ||
| 83 | |||
| 84 | err = ceph_lock_message(CEPH_LOCK_FCNTL, op, file, | ||
| 85 | (u64)fl->fl_pid, (u64)fl->fl_nspid, | ||
| 86 | lock_cmd, fl->fl_start, | ||
| 87 | length, wait); | ||
| 88 | if (!err) { | ||
| 89 | dout("mds locked, locking locally"); | ||
| 90 | err = posix_lock_file(file, fl, NULL); | ||
| 91 | if (err && (CEPH_MDS_OP_SETFILELOCK == op)) { | ||
| 92 | /* undo! This should only happen if the kernel detects | ||
| 93 | * local deadlock. */ | ||
| 94 | ceph_lock_message(CEPH_LOCK_FCNTL, op, file, | ||
| 95 | (u64)fl->fl_pid, (u64)fl->fl_nspid, | ||
| 96 | CEPH_LOCK_UNLOCK, fl->fl_start, | ||
| 97 | length, 0); | ||
| 98 | dout("got %d on posix_lock_file, undid lock", err); | ||
| 99 | } | ||
| 100 | } else { | ||
| 101 | dout("mds returned error code %d", err); | ||
| 102 | } | ||
| 103 | return err; | ||
| 104 | } | ||
| 105 | |||
| 106 | int ceph_flock(struct file *file, int cmd, struct file_lock *fl) | ||
| 107 | { | ||
| 108 | u64 length; | ||
| 109 | u8 lock_cmd; | ||
| 110 | int err; | ||
| 111 | u8 wait = 1; | ||
| 112 | |||
| 113 | fl->fl_nspid = get_pid(task_tgid(current)); | ||
| 114 | dout("ceph_flock, fl_pid:%d", fl->fl_pid); | ||
| 115 | |||
| 116 | /* set wait bit, then clear it out of cmd*/ | ||
| 117 | if (cmd & LOCK_NB) | ||
| 118 | wait = 0; | ||
| 119 | cmd = cmd & (LOCK_SH | LOCK_EX | LOCK_UN); | ||
| 120 | /* set command sequence that Ceph wants to see: | ||
| 121 | shared lock, exclusive lock, or unlock */ | ||
| 122 | if (LOCK_SH == cmd) | ||
| 123 | lock_cmd = CEPH_LOCK_SHARED; | ||
| 124 | else if (LOCK_EX == cmd) | ||
| 125 | lock_cmd = CEPH_LOCK_EXCL; | ||
| 126 | else | ||
| 127 | lock_cmd = CEPH_LOCK_UNLOCK; | ||
| 128 | /* mds requires start and length rather than start and end */ | ||
| 129 | if (LLONG_MAX == fl->fl_end) | ||
| 130 | length = 0; | ||
| 131 | else | ||
| 132 | length = fl->fl_end - fl->fl_start + 1; | ||
| 133 | |||
| 134 | err = ceph_lock_message(CEPH_LOCK_FLOCK, CEPH_MDS_OP_SETFILELOCK, | ||
| 135 | file, (u64)fl->fl_pid, (u64)fl->fl_nspid, | ||
| 136 | lock_cmd, fl->fl_start, | ||
| 137 | length, wait); | ||
| 138 | if (!err) { | ||
| 139 | err = flock_lock_file_wait(file, fl); | ||
| 140 | if (err) { | ||
| 141 | ceph_lock_message(CEPH_LOCK_FLOCK, | ||
| 142 | CEPH_MDS_OP_SETFILELOCK, | ||
| 143 | file, (u64)fl->fl_pid, | ||
| 144 | (u64)fl->fl_nspid, | ||
| 145 | CEPH_LOCK_UNLOCK, fl->fl_start, | ||
| 146 | length, 0); | ||
| 147 | dout("got %d on flock_lock_file_wait, undid lock", err); | ||
| 148 | } | ||
| 149 | } else { | ||
| 150 | dout("mds error code %d", err); | ||
| 151 | } | ||
| 152 | return err; | ||
| 153 | } | ||
| 154 | |||
| 155 | /** | ||
| 156 | * Must be called with BKL already held. Fills in the passed | ||
| 157 | * counter variables, so you can prepare pagelist metadata before calling | ||
| 158 | * ceph_encode_locks. | ||
| 159 | */ | ||
| 160 | void ceph_count_locks(struct inode *inode, int *fcntl_count, int *flock_count) | ||
| 161 | { | ||
| 162 | struct file_lock *lock; | ||
| 163 | |||
| 164 | *fcntl_count = 0; | ||
| 165 | *flock_count = 0; | ||
| 166 | |||
| 167 | for (lock = inode->i_flock; lock != NULL; lock = lock->fl_next) { | ||
| 168 | if (lock->fl_flags & FL_POSIX) | ||
| 169 | ++(*fcntl_count); | ||
| 170 | else if (lock->fl_flags & FL_FLOCK) | ||
| 171 | ++(*flock_count); | ||
| 172 | } | ||
| 173 | dout("counted %d flock locks and %d fcntl locks", | ||
| 174 | *flock_count, *fcntl_count); | ||
| 175 | } | ||
| 176 | |||
| 177 | /** | ||
| 178 | * Encode the flock and fcntl locks for the given inode into the pagelist. | ||
| 179 | * Format is: #fcntl locks, sequential fcntl locks, #flock locks, | ||
| 180 | * sequential flock locks. | ||
| 181 | * Must be called with BLK already held, and the lock numbers should have | ||
| 182 | * been gathered under the same lock holding window. | ||
| 183 | */ | ||
| 184 | int ceph_encode_locks(struct inode *inode, struct ceph_pagelist *pagelist, | ||
| 185 | int num_fcntl_locks, int num_flock_locks) | ||
| 186 | { | ||
| 187 | struct file_lock *lock; | ||
| 188 | struct ceph_filelock cephlock; | ||
| 189 | int err = 0; | ||
| 190 | |||
| 191 | dout("encoding %d flock and %d fcntl locks", num_flock_locks, | ||
| 192 | num_fcntl_locks); | ||
| 193 | err = ceph_pagelist_append(pagelist, &num_fcntl_locks, sizeof(u32)); | ||
| 194 | if (err) | ||
| 195 | goto fail; | ||
| 196 | for (lock = inode->i_flock; lock != NULL; lock = lock->fl_next) { | ||
| 197 | if (lock->fl_flags & FL_POSIX) { | ||
| 198 | err = lock_to_ceph_filelock(lock, &cephlock); | ||
| 199 | if (err) | ||
| 200 | goto fail; | ||
| 201 | err = ceph_pagelist_append(pagelist, &cephlock, | ||
| 202 | sizeof(struct ceph_filelock)); | ||
| 203 | } | ||
| 204 | if (err) | ||
| 205 | goto fail; | ||
| 206 | } | ||
| 207 | |||
| 208 | err = ceph_pagelist_append(pagelist, &num_flock_locks, sizeof(u32)); | ||
| 209 | if (err) | ||
| 210 | goto fail; | ||
| 211 | for (lock = inode->i_flock; lock != NULL; lock = lock->fl_next) { | ||
| 212 | if (lock->fl_flags & FL_FLOCK) { | ||
| 213 | err = lock_to_ceph_filelock(lock, &cephlock); | ||
| 214 | if (err) | ||
| 215 | goto fail; | ||
| 216 | err = ceph_pagelist_append(pagelist, &cephlock, | ||
| 217 | sizeof(struct ceph_filelock)); | ||
| 218 | } | ||
| 219 | if (err) | ||
| 220 | goto fail; | ||
| 221 | } | ||
| 222 | fail: | ||
| 223 | return err; | ||
| 224 | } | ||
| 225 | |||
| 226 | /* | ||
| 227 | * Given a pointer to a lock, convert it to a ceph filelock | ||
| 228 | */ | ||
| 229 | int lock_to_ceph_filelock(struct file_lock *lock, | ||
| 230 | struct ceph_filelock *cephlock) | ||
| 231 | { | ||
| 232 | int err = 0; | ||
| 233 | |||
| 234 | cephlock->start = cpu_to_le64(lock->fl_start); | ||
| 235 | cephlock->length = cpu_to_le64(lock->fl_end - lock->fl_start + 1); | ||
| 236 | cephlock->client = cpu_to_le64(0); | ||
| 237 | cephlock->pid = cpu_to_le64(lock->fl_pid); | ||
| 238 | cephlock->pid_namespace = cpu_to_le64((u64)lock->fl_nspid); | ||
| 239 | |||
| 240 | switch (lock->fl_type) { | ||
| 241 | case F_RDLCK: | ||
| 242 | cephlock->type = CEPH_LOCK_SHARED; | ||
| 243 | break; | ||
| 244 | case F_WRLCK: | ||
| 245 | cephlock->type = CEPH_LOCK_EXCL; | ||
| 246 | break; | ||
| 247 | case F_UNLCK: | ||
| 248 | cephlock->type = CEPH_LOCK_UNLOCK; | ||
| 249 | break; | ||
| 250 | default: | ||
| 251 | dout("Have unknown lock type %d", lock->fl_type); | ||
| 252 | err = -EINVAL; | ||
| 253 | } | ||
| 254 | |||
| 255 | return err; | ||
| 256 | } | ||
diff --git a/fs/ceph/mds_client.c b/fs/ceph/mds_client.c index dd440bd438a9..a75ddbf9fe37 100644 --- a/fs/ceph/mds_client.c +++ b/fs/ceph/mds_client.c | |||
| @@ -3,6 +3,7 @@ | |||
| 3 | #include <linux/wait.h> | 3 | #include <linux/wait.h> |
| 4 | #include <linux/slab.h> | 4 | #include <linux/slab.h> |
| 5 | #include <linux/sched.h> | 5 | #include <linux/sched.h> |
| 6 | #include <linux/smp_lock.h> | ||
| 6 | 7 | ||
| 7 | #include "mds_client.h" | 8 | #include "mds_client.h" |
| 8 | #include "mon_client.h" | 9 | #include "mon_client.h" |
| @@ -37,6 +38,11 @@ | |||
| 37 | * are no longer valid. | 38 | * are no longer valid. |
| 38 | */ | 39 | */ |
| 39 | 40 | ||
| 41 | struct ceph_reconnect_state { | ||
| 42 | struct ceph_pagelist *pagelist; | ||
| 43 | bool flock; | ||
| 44 | }; | ||
| 45 | |||
| 40 | static void __wake_requests(struct ceph_mds_client *mdsc, | 46 | static void __wake_requests(struct ceph_mds_client *mdsc, |
| 41 | struct list_head *head); | 47 | struct list_head *head); |
| 42 | 48 | ||
| @@ -449,7 +455,7 @@ void ceph_mdsc_release_request(struct kref *kref) | |||
| 449 | kfree(req->r_path1); | 455 | kfree(req->r_path1); |
| 450 | kfree(req->r_path2); | 456 | kfree(req->r_path2); |
| 451 | put_request_session(req); | 457 | put_request_session(req); |
| 452 | ceph_unreserve_caps(&req->r_caps_reservation); | 458 | ceph_unreserve_caps(req->r_mdsc, &req->r_caps_reservation); |
| 453 | kfree(req); | 459 | kfree(req); |
| 454 | } | 460 | } |
| 455 | 461 | ||
| @@ -512,7 +518,8 @@ static void __register_request(struct ceph_mds_client *mdsc, | |||
| 512 | { | 518 | { |
| 513 | req->r_tid = ++mdsc->last_tid; | 519 | req->r_tid = ++mdsc->last_tid; |
| 514 | if (req->r_num_caps) | 520 | if (req->r_num_caps) |
| 515 | ceph_reserve_caps(&req->r_caps_reservation, req->r_num_caps); | 521 | ceph_reserve_caps(mdsc, &req->r_caps_reservation, |
| 522 | req->r_num_caps); | ||
| 516 | dout("__register_request %p tid %lld\n", req, req->r_tid); | 523 | dout("__register_request %p tid %lld\n", req, req->r_tid); |
| 517 | ceph_mdsc_get_request(req); | 524 | ceph_mdsc_get_request(req); |
| 518 | __insert_request(mdsc, req); | 525 | __insert_request(mdsc, req); |
| @@ -704,6 +711,51 @@ static int __open_session(struct ceph_mds_client *mdsc, | |||
| 704 | } | 711 | } |
| 705 | 712 | ||
| 706 | /* | 713 | /* |
| 714 | * open sessions for any export targets for the given mds | ||
| 715 | * | ||
| 716 | * called under mdsc->mutex | ||
| 717 | */ | ||
| 718 | static void __open_export_target_sessions(struct ceph_mds_client *mdsc, | ||
| 719 | struct ceph_mds_session *session) | ||
| 720 | { | ||
| 721 | struct ceph_mds_info *mi; | ||
| 722 | struct ceph_mds_session *ts; | ||
| 723 | int i, mds = session->s_mds; | ||
| 724 | int target; | ||
| 725 | |||
| 726 | if (mds >= mdsc->mdsmap->m_max_mds) | ||
| 727 | return; | ||
| 728 | mi = &mdsc->mdsmap->m_info[mds]; | ||
| 729 | dout("open_export_target_sessions for mds%d (%d targets)\n", | ||
| 730 | session->s_mds, mi->num_export_targets); | ||
| 731 | |||
| 732 | for (i = 0; i < mi->num_export_targets; i++) { | ||
| 733 | target = mi->export_targets[i]; | ||
| 734 | ts = __ceph_lookup_mds_session(mdsc, target); | ||
| 735 | if (!ts) { | ||
| 736 | ts = register_session(mdsc, target); | ||
| 737 | if (IS_ERR(ts)) | ||
| 738 | return; | ||
| 739 | } | ||
| 740 | if (session->s_state == CEPH_MDS_SESSION_NEW || | ||
| 741 | session->s_state == CEPH_MDS_SESSION_CLOSING) | ||
| 742 | __open_session(mdsc, session); | ||
| 743 | else | ||
| 744 | dout(" mds%d target mds%d %p is %s\n", session->s_mds, | ||
| 745 | i, ts, session_state_name(ts->s_state)); | ||
| 746 | ceph_put_mds_session(ts); | ||
| 747 | } | ||
| 748 | } | ||
| 749 | |||
| 750 | void ceph_mdsc_open_export_target_sessions(struct ceph_mds_client *mdsc, | ||
| 751 | struct ceph_mds_session *session) | ||
| 752 | { | ||
| 753 | mutex_lock(&mdsc->mutex); | ||
| 754 | __open_export_target_sessions(mdsc, session); | ||
| 755 | mutex_unlock(&mdsc->mutex); | ||
| 756 | } | ||
| 757 | |||
| 758 | /* | ||
| 707 | * session caps | 759 | * session caps |
| 708 | */ | 760 | */ |
| 709 | 761 | ||
| @@ -764,7 +816,7 @@ static int iterate_session_caps(struct ceph_mds_session *session, | |||
| 764 | last_inode = NULL; | 816 | last_inode = NULL; |
| 765 | } | 817 | } |
| 766 | if (old_cap) { | 818 | if (old_cap) { |
| 767 | ceph_put_cap(old_cap); | 819 | ceph_put_cap(session->s_mdsc, old_cap); |
| 768 | old_cap = NULL; | 820 | old_cap = NULL; |
| 769 | } | 821 | } |
| 770 | 822 | ||
| @@ -793,7 +845,7 @@ out: | |||
| 793 | if (last_inode) | 845 | if (last_inode) |
| 794 | iput(last_inode); | 846 | iput(last_inode); |
| 795 | if (old_cap) | 847 | if (old_cap) |
| 796 | ceph_put_cap(old_cap); | 848 | ceph_put_cap(session->s_mdsc, old_cap); |
| 797 | 849 | ||
| 798 | return ret; | 850 | return ret; |
| 799 | } | 851 | } |
| @@ -1067,15 +1119,16 @@ static int trim_caps(struct ceph_mds_client *mdsc, | |||
| 1067 | * Called under s_mutex. | 1119 | * Called under s_mutex. |
| 1068 | */ | 1120 | */ |
| 1069 | int ceph_add_cap_releases(struct ceph_mds_client *mdsc, | 1121 | int ceph_add_cap_releases(struct ceph_mds_client *mdsc, |
| 1070 | struct ceph_mds_session *session, | 1122 | struct ceph_mds_session *session) |
| 1071 | int extra) | ||
| 1072 | { | 1123 | { |
| 1073 | struct ceph_msg *msg; | 1124 | struct ceph_msg *msg, *partial = NULL; |
| 1074 | struct ceph_mds_cap_release *head; | 1125 | struct ceph_mds_cap_release *head; |
| 1075 | int err = -ENOMEM; | 1126 | int err = -ENOMEM; |
| 1127 | int extra = mdsc->client->mount_args->cap_release_safety; | ||
| 1128 | int num; | ||
| 1076 | 1129 | ||
| 1077 | if (extra < 0) | 1130 | dout("add_cap_releases %p mds%d extra %d\n", session, session->s_mds, |
| 1078 | extra = mdsc->client->mount_args->cap_release_safety; | 1131 | extra); |
| 1079 | 1132 | ||
| 1080 | spin_lock(&session->s_cap_lock); | 1133 | spin_lock(&session->s_cap_lock); |
| 1081 | 1134 | ||
| @@ -1084,9 +1137,14 @@ int ceph_add_cap_releases(struct ceph_mds_client *mdsc, | |||
| 1084 | struct ceph_msg, | 1137 | struct ceph_msg, |
| 1085 | list_head); | 1138 | list_head); |
| 1086 | head = msg->front.iov_base; | 1139 | head = msg->front.iov_base; |
| 1087 | extra += CEPH_CAPS_PER_RELEASE - le32_to_cpu(head->num); | 1140 | num = le32_to_cpu(head->num); |
| 1141 | if (num) { | ||
| 1142 | dout(" partial %p with (%d/%d)\n", msg, num, | ||
| 1143 | (int)CEPH_CAPS_PER_RELEASE); | ||
| 1144 | extra += CEPH_CAPS_PER_RELEASE - num; | ||
| 1145 | partial = msg; | ||
| 1146 | } | ||
| 1088 | } | 1147 | } |
| 1089 | |||
| 1090 | while (session->s_num_cap_releases < session->s_nr_caps + extra) { | 1148 | while (session->s_num_cap_releases < session->s_nr_caps + extra) { |
| 1091 | spin_unlock(&session->s_cap_lock); | 1149 | spin_unlock(&session->s_cap_lock); |
| 1092 | msg = ceph_msg_new(CEPH_MSG_CLIENT_CAPRELEASE, PAGE_CACHE_SIZE, | 1150 | msg = ceph_msg_new(CEPH_MSG_CLIENT_CAPRELEASE, PAGE_CACHE_SIZE, |
| @@ -1103,19 +1161,14 @@ int ceph_add_cap_releases(struct ceph_mds_client *mdsc, | |||
| 1103 | session->s_num_cap_releases += CEPH_CAPS_PER_RELEASE; | 1161 | session->s_num_cap_releases += CEPH_CAPS_PER_RELEASE; |
| 1104 | } | 1162 | } |
| 1105 | 1163 | ||
| 1106 | if (!list_empty(&session->s_cap_releases)) { | 1164 | if (partial) { |
| 1107 | msg = list_first_entry(&session->s_cap_releases, | 1165 | head = partial->front.iov_base; |
| 1108 | struct ceph_msg, | 1166 | num = le32_to_cpu(head->num); |
| 1109 | list_head); | 1167 | dout(" queueing partial %p with %d/%d\n", partial, num, |
| 1110 | head = msg->front.iov_base; | 1168 | (int)CEPH_CAPS_PER_RELEASE); |
| 1111 | if (head->num) { | 1169 | list_move_tail(&partial->list_head, |
| 1112 | dout(" queueing non-full %p (%d)\n", msg, | 1170 | &session->s_cap_releases_done); |
| 1113 | le32_to_cpu(head->num)); | 1171 | session->s_num_cap_releases -= CEPH_CAPS_PER_RELEASE - num; |
| 1114 | list_move_tail(&msg->list_head, | ||
| 1115 | &session->s_cap_releases_done); | ||
| 1116 | session->s_num_cap_releases -= | ||
| 1117 | CEPH_CAPS_PER_RELEASE - le32_to_cpu(head->num); | ||
| 1118 | } | ||
| 1119 | } | 1172 | } |
| 1120 | err = 0; | 1173 | err = 0; |
| 1121 | spin_unlock(&session->s_cap_lock); | 1174 | spin_unlock(&session->s_cap_lock); |
| @@ -1250,6 +1303,7 @@ ceph_mdsc_create_request(struct ceph_mds_client *mdsc, int op, int mode) | |||
| 1250 | return ERR_PTR(-ENOMEM); | 1303 | return ERR_PTR(-ENOMEM); |
| 1251 | 1304 | ||
| 1252 | mutex_init(&req->r_fill_mutex); | 1305 | mutex_init(&req->r_fill_mutex); |
| 1306 | req->r_mdsc = mdsc; | ||
| 1253 | req->r_started = jiffies; | 1307 | req->r_started = jiffies; |
| 1254 | req->r_resend_mds = -1; | 1308 | req->r_resend_mds = -1; |
| 1255 | INIT_LIST_HEAD(&req->r_unsafe_dir_item); | 1309 | INIT_LIST_HEAD(&req->r_unsafe_dir_item); |
| @@ -1580,6 +1634,15 @@ static int __prepare_send_request(struct ceph_mds_client *mdsc, | |||
| 1580 | 1634 | ||
| 1581 | req->r_mds = mds; | 1635 | req->r_mds = mds; |
| 1582 | req->r_attempts++; | 1636 | req->r_attempts++; |
| 1637 | if (req->r_inode) { | ||
| 1638 | struct ceph_cap *cap = | ||
| 1639 | ceph_get_cap_for_mds(ceph_inode(req->r_inode), mds); | ||
| 1640 | |||
| 1641 | if (cap) | ||
| 1642 | req->r_sent_on_mseq = cap->mseq; | ||
| 1643 | else | ||
| 1644 | req->r_sent_on_mseq = -1; | ||
| 1645 | } | ||
| 1583 | dout("prepare_send_request %p tid %lld %s (attempt %d)\n", req, | 1646 | dout("prepare_send_request %p tid %lld %s (attempt %d)\n", req, |
| 1584 | req->r_tid, ceph_mds_op_name(req->r_op), req->r_attempts); | 1647 | req->r_tid, ceph_mds_op_name(req->r_op), req->r_attempts); |
| 1585 | 1648 | ||
| @@ -1914,21 +1977,40 @@ static void handle_reply(struct ceph_mds_session *session, struct ceph_msg *msg) | |||
| 1914 | result = le32_to_cpu(head->result); | 1977 | result = le32_to_cpu(head->result); |
| 1915 | 1978 | ||
| 1916 | /* | 1979 | /* |
| 1917 | * Tolerate 2 consecutive ESTALEs from the same mds. | 1980 | * Handle an ESTALE |
| 1918 | * FIXME: we should be looking at the cap migrate_seq. | 1981 | * if we're not talking to the authority, send to them |
| 1982 | * if the authority has changed while we weren't looking, | ||
| 1983 | * send to new authority | ||
| 1984 | * Otherwise we just have to return an ESTALE | ||
| 1919 | */ | 1985 | */ |
| 1920 | if (result == -ESTALE) { | 1986 | if (result == -ESTALE) { |
| 1921 | req->r_direct_mode = USE_AUTH_MDS; | 1987 | dout("got ESTALE on request %llu", req->r_tid); |
| 1922 | req->r_num_stale++; | 1988 | if (!req->r_inode) { |
| 1923 | if (req->r_num_stale <= 2) { | 1989 | /* do nothing; not an authority problem */ |
| 1990 | } else if (req->r_direct_mode != USE_AUTH_MDS) { | ||
| 1991 | dout("not using auth, setting for that now"); | ||
| 1992 | req->r_direct_mode = USE_AUTH_MDS; | ||
| 1924 | __do_request(mdsc, req); | 1993 | __do_request(mdsc, req); |
| 1925 | mutex_unlock(&mdsc->mutex); | 1994 | mutex_unlock(&mdsc->mutex); |
| 1926 | goto out; | 1995 | goto out; |
| 1996 | } else { | ||
| 1997 | struct ceph_inode_info *ci = ceph_inode(req->r_inode); | ||
| 1998 | struct ceph_cap *cap = | ||
| 1999 | ceph_get_cap_for_mds(ci, req->r_mds);; | ||
| 2000 | |||
| 2001 | dout("already using auth"); | ||
| 2002 | if ((!cap || cap != ci->i_auth_cap) || | ||
| 2003 | (cap->mseq != req->r_sent_on_mseq)) { | ||
| 2004 | dout("but cap changed, so resending"); | ||
| 2005 | __do_request(mdsc, req); | ||
| 2006 | mutex_unlock(&mdsc->mutex); | ||
| 2007 | goto out; | ||
| 2008 | } | ||
| 1927 | } | 2009 | } |
| 1928 | } else { | 2010 | dout("have to return ESTALE on request %llu", req->r_tid); |
| 1929 | req->r_num_stale = 0; | ||
| 1930 | } | 2011 | } |
| 1931 | 2012 | ||
| 2013 | |||
| 1932 | if (head->safe) { | 2014 | if (head->safe) { |
| 1933 | req->r_got_safe = true; | 2015 | req->r_got_safe = true; |
| 1934 | __unregister_request(mdsc, req); | 2016 | __unregister_request(mdsc, req); |
| @@ -1985,7 +2067,7 @@ static void handle_reply(struct ceph_mds_session *session, struct ceph_msg *msg) | |||
| 1985 | if (err == 0) { | 2067 | if (err == 0) { |
| 1986 | if (result == 0 && rinfo->dir_nr) | 2068 | if (result == 0 && rinfo->dir_nr) |
| 1987 | ceph_readdir_prepopulate(req, req->r_session); | 2069 | ceph_readdir_prepopulate(req, req->r_session); |
| 1988 | ceph_unreserve_caps(&req->r_caps_reservation); | 2070 | ceph_unreserve_caps(mdsc, &req->r_caps_reservation); |
| 1989 | } | 2071 | } |
| 1990 | mutex_unlock(&req->r_fill_mutex); | 2072 | mutex_unlock(&req->r_fill_mutex); |
| 1991 | 2073 | ||
| @@ -2005,7 +2087,7 @@ out_err: | |||
| 2005 | } | 2087 | } |
| 2006 | mutex_unlock(&mdsc->mutex); | 2088 | mutex_unlock(&mdsc->mutex); |
| 2007 | 2089 | ||
| 2008 | ceph_add_cap_releases(mdsc, req->r_session, -1); | 2090 | ceph_add_cap_releases(mdsc, req->r_session); |
| 2009 | mutex_unlock(&session->s_mutex); | 2091 | mutex_unlock(&session->s_mutex); |
| 2010 | 2092 | ||
| 2011 | /* kick calling process */ | 2093 | /* kick calling process */ |
| @@ -2193,9 +2275,14 @@ static void replay_unsafe_requests(struct ceph_mds_client *mdsc, | |||
| 2193 | static int encode_caps_cb(struct inode *inode, struct ceph_cap *cap, | 2275 | static int encode_caps_cb(struct inode *inode, struct ceph_cap *cap, |
| 2194 | void *arg) | 2276 | void *arg) |
| 2195 | { | 2277 | { |
| 2196 | struct ceph_mds_cap_reconnect rec; | 2278 | union { |
| 2279 | struct ceph_mds_cap_reconnect v2; | ||
| 2280 | struct ceph_mds_cap_reconnect_v1 v1; | ||
| 2281 | } rec; | ||
| 2282 | size_t reclen; | ||
| 2197 | struct ceph_inode_info *ci; | 2283 | struct ceph_inode_info *ci; |
| 2198 | struct ceph_pagelist *pagelist = arg; | 2284 | struct ceph_reconnect_state *recon_state = arg; |
| 2285 | struct ceph_pagelist *pagelist = recon_state->pagelist; | ||
| 2199 | char *path; | 2286 | char *path; |
| 2200 | int pathlen, err; | 2287 | int pathlen, err; |
| 2201 | u64 pathbase; | 2288 | u64 pathbase; |
| @@ -2228,17 +2315,44 @@ static int encode_caps_cb(struct inode *inode, struct ceph_cap *cap, | |||
| 2228 | spin_lock(&inode->i_lock); | 2315 | spin_lock(&inode->i_lock); |
| 2229 | cap->seq = 0; /* reset cap seq */ | 2316 | cap->seq = 0; /* reset cap seq */ |
| 2230 | cap->issue_seq = 0; /* and issue_seq */ | 2317 | cap->issue_seq = 0; /* and issue_seq */ |
| 2231 | rec.cap_id = cpu_to_le64(cap->cap_id); | 2318 | |
| 2232 | rec.pathbase = cpu_to_le64(pathbase); | 2319 | if (recon_state->flock) { |
| 2233 | rec.wanted = cpu_to_le32(__ceph_caps_wanted(ci)); | 2320 | rec.v2.cap_id = cpu_to_le64(cap->cap_id); |
| 2234 | rec.issued = cpu_to_le32(cap->issued); | 2321 | rec.v2.wanted = cpu_to_le32(__ceph_caps_wanted(ci)); |
| 2235 | rec.size = cpu_to_le64(inode->i_size); | 2322 | rec.v2.issued = cpu_to_le32(cap->issued); |
| 2236 | ceph_encode_timespec(&rec.mtime, &inode->i_mtime); | 2323 | rec.v2.snaprealm = cpu_to_le64(ci->i_snap_realm->ino); |
| 2237 | ceph_encode_timespec(&rec.atime, &inode->i_atime); | 2324 | rec.v2.pathbase = cpu_to_le64(pathbase); |
| 2238 | rec.snaprealm = cpu_to_le64(ci->i_snap_realm->ino); | 2325 | rec.v2.flock_len = 0; |
| 2326 | reclen = sizeof(rec.v2); | ||
| 2327 | } else { | ||
| 2328 | rec.v1.cap_id = cpu_to_le64(cap->cap_id); | ||
| 2329 | rec.v1.wanted = cpu_to_le32(__ceph_caps_wanted(ci)); | ||
| 2330 | rec.v1.issued = cpu_to_le32(cap->issued); | ||
| 2331 | rec.v1.size = cpu_to_le64(inode->i_size); | ||
| 2332 | ceph_encode_timespec(&rec.v1.mtime, &inode->i_mtime); | ||
| 2333 | ceph_encode_timespec(&rec.v1.atime, &inode->i_atime); | ||
| 2334 | rec.v1.snaprealm = cpu_to_le64(ci->i_snap_realm->ino); | ||
| 2335 | rec.v1.pathbase = cpu_to_le64(pathbase); | ||
| 2336 | reclen = sizeof(rec.v1); | ||
| 2337 | } | ||
| 2239 | spin_unlock(&inode->i_lock); | 2338 | spin_unlock(&inode->i_lock); |
| 2240 | 2339 | ||
| 2241 | err = ceph_pagelist_append(pagelist, &rec, sizeof(rec)); | 2340 | if (recon_state->flock) { |
| 2341 | int num_fcntl_locks, num_flock_locks; | ||
| 2342 | |||
| 2343 | lock_kernel(); | ||
| 2344 | ceph_count_locks(inode, &num_fcntl_locks, &num_flock_locks); | ||
| 2345 | rec.v2.flock_len = (2*sizeof(u32) + | ||
| 2346 | (num_fcntl_locks+num_flock_locks) * | ||
| 2347 | sizeof(struct ceph_filelock)); | ||
| 2348 | |||
| 2349 | err = ceph_pagelist_append(pagelist, &rec, reclen); | ||
| 2350 | if (!err) | ||
| 2351 | err = ceph_encode_locks(inode, pagelist, | ||
| 2352 | num_fcntl_locks, | ||
| 2353 | num_flock_locks); | ||
| 2354 | unlock_kernel(); | ||
| 2355 | } | ||
| 2242 | 2356 | ||
| 2243 | out: | 2357 | out: |
| 2244 | kfree(path); | 2358 | kfree(path); |
| @@ -2267,6 +2381,7 @@ static void send_mds_reconnect(struct ceph_mds_client *mdsc, | |||
| 2267 | int mds = session->s_mds; | 2381 | int mds = session->s_mds; |
| 2268 | int err = -ENOMEM; | 2382 | int err = -ENOMEM; |
| 2269 | struct ceph_pagelist *pagelist; | 2383 | struct ceph_pagelist *pagelist; |
| 2384 | struct ceph_reconnect_state recon_state; | ||
| 2270 | 2385 | ||
| 2271 | pr_info("mds%d reconnect start\n", mds); | 2386 | pr_info("mds%d reconnect start\n", mds); |
| 2272 | 2387 | ||
| @@ -2301,7 +2416,10 @@ static void send_mds_reconnect(struct ceph_mds_client *mdsc, | |||
| 2301 | err = ceph_pagelist_encode_32(pagelist, session->s_nr_caps); | 2416 | err = ceph_pagelist_encode_32(pagelist, session->s_nr_caps); |
| 2302 | if (err) | 2417 | if (err) |
| 2303 | goto fail; | 2418 | goto fail; |
| 2304 | err = iterate_session_caps(session, encode_caps_cb, pagelist); | 2419 | |
| 2420 | recon_state.pagelist = pagelist; | ||
| 2421 | recon_state.flock = session->s_con.peer_features & CEPH_FEATURE_FLOCK; | ||
| 2422 | err = iterate_session_caps(session, encode_caps_cb, &recon_state); | ||
| 2305 | if (err < 0) | 2423 | if (err < 0) |
| 2306 | goto fail; | 2424 | goto fail; |
| 2307 | 2425 | ||
| @@ -2326,6 +2444,8 @@ static void send_mds_reconnect(struct ceph_mds_client *mdsc, | |||
| 2326 | } | 2444 | } |
| 2327 | 2445 | ||
| 2328 | reply->pagelist = pagelist; | 2446 | reply->pagelist = pagelist; |
| 2447 | if (recon_state.flock) | ||
| 2448 | reply->hdr.version = cpu_to_le16(2); | ||
| 2329 | reply->hdr.data_len = cpu_to_le32(pagelist->length); | 2449 | reply->hdr.data_len = cpu_to_le32(pagelist->length); |
| 2330 | reply->nr_pages = calc_pages_for(0, pagelist->length); | 2450 | reply->nr_pages = calc_pages_for(0, pagelist->length); |
| 2331 | ceph_con_send(&session->s_con, reply); | 2451 | ceph_con_send(&session->s_con, reply); |
| @@ -2376,9 +2496,11 @@ static void check_new_map(struct ceph_mds_client *mdsc, | |||
| 2376 | oldstate = ceph_mdsmap_get_state(oldmap, i); | 2496 | oldstate = ceph_mdsmap_get_state(oldmap, i); |
| 2377 | newstate = ceph_mdsmap_get_state(newmap, i); | 2497 | newstate = ceph_mdsmap_get_state(newmap, i); |
| 2378 | 2498 | ||
| 2379 | dout("check_new_map mds%d state %s -> %s (session %s)\n", | 2499 | dout("check_new_map mds%d state %s%s -> %s%s (session %s)\n", |
| 2380 | i, ceph_mds_state_name(oldstate), | 2500 | i, ceph_mds_state_name(oldstate), |
| 2501 | ceph_mdsmap_is_laggy(oldmap, i) ? " (laggy)" : "", | ||
| 2381 | ceph_mds_state_name(newstate), | 2502 | ceph_mds_state_name(newstate), |
| 2503 | ceph_mdsmap_is_laggy(newmap, i) ? " (laggy)" : "", | ||
| 2382 | session_state_name(s->s_state)); | 2504 | session_state_name(s->s_state)); |
| 2383 | 2505 | ||
| 2384 | if (memcmp(ceph_mdsmap_get_addr(oldmap, i), | 2506 | if (memcmp(ceph_mdsmap_get_addr(oldmap, i), |
| @@ -2428,6 +2550,21 @@ static void check_new_map(struct ceph_mds_client *mdsc, | |||
| 2428 | wake_up_session_caps(s, 1); | 2550 | wake_up_session_caps(s, 1); |
| 2429 | } | 2551 | } |
| 2430 | } | 2552 | } |
| 2553 | |||
| 2554 | for (i = 0; i < newmap->m_max_mds && i < mdsc->max_sessions; i++) { | ||
| 2555 | s = mdsc->sessions[i]; | ||
| 2556 | if (!s) | ||
| 2557 | continue; | ||
| 2558 | if (!ceph_mdsmap_is_laggy(newmap, i)) | ||
| 2559 | continue; | ||
| 2560 | if (s->s_state == CEPH_MDS_SESSION_OPEN || | ||
| 2561 | s->s_state == CEPH_MDS_SESSION_HUNG || | ||
| 2562 | s->s_state == CEPH_MDS_SESSION_CLOSING) { | ||
| 2563 | dout(" connecting to export targets of laggy mds%d\n", | ||
| 2564 | i); | ||
| 2565 | __open_export_target_sessions(mdsc, s); | ||
| 2566 | } | ||
| 2567 | } | ||
| 2431 | } | 2568 | } |
| 2432 | 2569 | ||
| 2433 | 2570 | ||
| @@ -2715,7 +2852,7 @@ static void delayed_work(struct work_struct *work) | |||
| 2715 | send_renew_caps(mdsc, s); | 2852 | send_renew_caps(mdsc, s); |
| 2716 | else | 2853 | else |
| 2717 | ceph_con_keepalive(&s->s_con); | 2854 | ceph_con_keepalive(&s->s_con); |
| 2718 | ceph_add_cap_releases(mdsc, s, -1); | 2855 | ceph_add_cap_releases(mdsc, s); |
| 2719 | if (s->s_state == CEPH_MDS_SESSION_OPEN || | 2856 | if (s->s_state == CEPH_MDS_SESSION_OPEN || |
| 2720 | s->s_state == CEPH_MDS_SESSION_HUNG) | 2857 | s->s_state == CEPH_MDS_SESSION_HUNG) |
| 2721 | ceph_send_cap_releases(mdsc, s); | 2858 | ceph_send_cap_releases(mdsc, s); |
| @@ -2764,6 +2901,9 @@ int ceph_mdsc_init(struct ceph_mds_client *mdsc, struct ceph_client *client) | |||
| 2764 | spin_lock_init(&mdsc->dentry_lru_lock); | 2901 | spin_lock_init(&mdsc->dentry_lru_lock); |
| 2765 | INIT_LIST_HEAD(&mdsc->dentry_lru); | 2902 | INIT_LIST_HEAD(&mdsc->dentry_lru); |
| 2766 | 2903 | ||
| 2904 | ceph_caps_init(mdsc); | ||
| 2905 | ceph_adjust_min_caps(mdsc, client->min_caps); | ||
| 2906 | |||
| 2767 | return 0; | 2907 | return 0; |
| 2768 | } | 2908 | } |
| 2769 | 2909 | ||
| @@ -2959,6 +3099,7 @@ void ceph_mdsc_stop(struct ceph_mds_client *mdsc) | |||
| 2959 | if (mdsc->mdsmap) | 3099 | if (mdsc->mdsmap) |
| 2960 | ceph_mdsmap_destroy(mdsc->mdsmap); | 3100 | ceph_mdsmap_destroy(mdsc->mdsmap); |
| 2961 | kfree(mdsc->sessions); | 3101 | kfree(mdsc->sessions); |
| 3102 | ceph_caps_finalize(mdsc); | ||
| 2962 | } | 3103 | } |
| 2963 | 3104 | ||
| 2964 | 3105 | ||
diff --git a/fs/ceph/mds_client.h b/fs/ceph/mds_client.h index 952410c60d09..ab7e89f5e344 100644 --- a/fs/ceph/mds_client.h +++ b/fs/ceph/mds_client.h | |||
| @@ -151,6 +151,7 @@ typedef void (*ceph_mds_request_callback_t) (struct ceph_mds_client *mdsc, | |||
| 151 | struct ceph_mds_request { | 151 | struct ceph_mds_request { |
| 152 | u64 r_tid; /* transaction id */ | 152 | u64 r_tid; /* transaction id */ |
| 153 | struct rb_node r_node; | 153 | struct rb_node r_node; |
| 154 | struct ceph_mds_client *r_mdsc; | ||
| 154 | 155 | ||
| 155 | int r_op; /* mds op code */ | 156 | int r_op; /* mds op code */ |
| 156 | int r_mds; | 157 | int r_mds; |
| @@ -207,8 +208,8 @@ struct ceph_mds_request { | |||
| 207 | 208 | ||
| 208 | int r_attempts; /* resend attempts */ | 209 | int r_attempts; /* resend attempts */ |
| 209 | int r_num_fwd; /* number of forward attempts */ | 210 | int r_num_fwd; /* number of forward attempts */ |
| 210 | int r_num_stale; | ||
| 211 | int r_resend_mds; /* mds to resend to next, if any*/ | 211 | int r_resend_mds; /* mds to resend to next, if any*/ |
| 212 | u32 r_sent_on_mseq; /* cap mseq request was sent at*/ | ||
| 212 | 213 | ||
| 213 | struct kref r_kref; | 214 | struct kref r_kref; |
| 214 | struct list_head r_wait; | 215 | struct list_head r_wait; |
| @@ -267,6 +268,27 @@ struct ceph_mds_client { | |||
| 267 | spinlock_t cap_dirty_lock; /* protects above items */ | 268 | spinlock_t cap_dirty_lock; /* protects above items */ |
| 268 | wait_queue_head_t cap_flushing_wq; | 269 | wait_queue_head_t cap_flushing_wq; |
| 269 | 270 | ||
| 271 | /* | ||
| 272 | * Cap reservations | ||
| 273 | * | ||
| 274 | * Maintain a global pool of preallocated struct ceph_caps, referenced | ||
| 275 | * by struct ceph_caps_reservations. This ensures that we preallocate | ||
| 276 | * memory needed to successfully process an MDS response. (If an MDS | ||
| 277 | * sends us cap information and we fail to process it, we will have | ||
| 278 | * problems due to the client and MDS being out of sync.) | ||
| 279 | * | ||
| 280 | * Reservations are 'owned' by a ceph_cap_reservation context. | ||
| 281 | */ | ||
| 282 | spinlock_t caps_list_lock; | ||
| 283 | struct list_head caps_list; /* unused (reserved or | ||
| 284 | unreserved) */ | ||
| 285 | int caps_total_count; /* total caps allocated */ | ||
| 286 | int caps_use_count; /* in use */ | ||
| 287 | int caps_reserve_count; /* unused, reserved */ | ||
| 288 | int caps_avail_count; /* unused, unreserved */ | ||
| 289 | int caps_min_count; /* keep at least this many | ||
| 290 | (unreserved) */ | ||
| 291 | |||
| 270 | #ifdef CONFIG_DEBUG_FS | 292 | #ifdef CONFIG_DEBUG_FS |
| 271 | struct dentry *debugfs_file; | 293 | struct dentry *debugfs_file; |
| 272 | #endif | 294 | #endif |
| @@ -324,8 +346,7 @@ static inline void ceph_mdsc_put_request(struct ceph_mds_request *req) | |||
| 324 | } | 346 | } |
| 325 | 347 | ||
| 326 | extern int ceph_add_cap_releases(struct ceph_mds_client *mdsc, | 348 | extern int ceph_add_cap_releases(struct ceph_mds_client *mdsc, |
| 327 | struct ceph_mds_session *session, | 349 | struct ceph_mds_session *session); |
| 328 | int extra); | ||
| 329 | extern void ceph_send_cap_releases(struct ceph_mds_client *mdsc, | 350 | extern void ceph_send_cap_releases(struct ceph_mds_client *mdsc, |
| 330 | struct ceph_mds_session *session); | 351 | struct ceph_mds_session *session); |
| 331 | 352 | ||
| @@ -343,4 +364,7 @@ extern void ceph_mdsc_lease_send_msg(struct ceph_mds_session *session, | |||
| 343 | extern void ceph_mdsc_handle_map(struct ceph_mds_client *mdsc, | 364 | extern void ceph_mdsc_handle_map(struct ceph_mds_client *mdsc, |
| 344 | struct ceph_msg *msg); | 365 | struct ceph_msg *msg); |
| 345 | 366 | ||
| 367 | extern void ceph_mdsc_open_export_target_sessions(struct ceph_mds_client *mdsc, | ||
| 368 | struct ceph_mds_session *session); | ||
| 369 | |||
| 346 | #endif | 370 | #endif |
diff --git a/fs/ceph/mdsmap.c b/fs/ceph/mdsmap.c index c4c498e6dfef..040be6d1150b 100644 --- a/fs/ceph/mdsmap.c +++ b/fs/ceph/mdsmap.c | |||
| @@ -85,6 +85,7 @@ struct ceph_mdsmap *ceph_mdsmap_decode(void **p, void *end) | |||
| 85 | struct ceph_entity_addr addr; | 85 | struct ceph_entity_addr addr; |
| 86 | u32 num_export_targets; | 86 | u32 num_export_targets; |
| 87 | void *pexport_targets = NULL; | 87 | void *pexport_targets = NULL; |
| 88 | struct ceph_timespec laggy_since; | ||
| 88 | 89 | ||
| 89 | ceph_decode_need(p, end, sizeof(u64)*2 + 1 + sizeof(u32), bad); | 90 | ceph_decode_need(p, end, sizeof(u64)*2 + 1 + sizeof(u32), bad); |
| 90 | global_id = ceph_decode_64(p); | 91 | global_id = ceph_decode_64(p); |
| @@ -103,7 +104,7 @@ struct ceph_mdsmap *ceph_mdsmap_decode(void **p, void *end) | |||
| 103 | state_seq = ceph_decode_64(p); | 104 | state_seq = ceph_decode_64(p); |
| 104 | ceph_decode_copy(p, &addr, sizeof(addr)); | 105 | ceph_decode_copy(p, &addr, sizeof(addr)); |
| 105 | ceph_decode_addr(&addr); | 106 | ceph_decode_addr(&addr); |
| 106 | *p += sizeof(struct ceph_timespec); | 107 | ceph_decode_copy(p, &laggy_since, sizeof(laggy_since)); |
| 107 | *p += sizeof(u32); | 108 | *p += sizeof(u32); |
| 108 | ceph_decode_32_safe(p, end, namelen, bad); | 109 | ceph_decode_32_safe(p, end, namelen, bad); |
| 109 | *p += namelen; | 110 | *p += namelen; |
| @@ -122,6 +123,9 @@ struct ceph_mdsmap *ceph_mdsmap_decode(void **p, void *end) | |||
| 122 | m->m_info[mds].global_id = global_id; | 123 | m->m_info[mds].global_id = global_id; |
| 123 | m->m_info[mds].state = state; | 124 | m->m_info[mds].state = state; |
| 124 | m->m_info[mds].addr = addr; | 125 | m->m_info[mds].addr = addr; |
| 126 | m->m_info[mds].laggy = | ||
| 127 | (laggy_since.tv_sec != 0 || | ||
| 128 | laggy_since.tv_nsec != 0); | ||
| 125 | m->m_info[mds].num_export_targets = num_export_targets; | 129 | m->m_info[mds].num_export_targets = num_export_targets; |
| 126 | if (num_export_targets) { | 130 | if (num_export_targets) { |
| 127 | m->m_info[mds].export_targets = | 131 | m->m_info[mds].export_targets = |
diff --git a/fs/ceph/mdsmap.h b/fs/ceph/mdsmap.h index eacc131aa5cb..4c5cb0880bba 100644 --- a/fs/ceph/mdsmap.h +++ b/fs/ceph/mdsmap.h | |||
| @@ -13,6 +13,7 @@ struct ceph_mds_info { | |||
| 13 | struct ceph_entity_addr addr; | 13 | struct ceph_entity_addr addr; |
| 14 | s32 state; | 14 | s32 state; |
| 15 | int num_export_targets; | 15 | int num_export_targets; |
| 16 | bool laggy; | ||
| 16 | u32 *export_targets; | 17 | u32 *export_targets; |
| 17 | }; | 18 | }; |
| 18 | 19 | ||
| @@ -47,6 +48,13 @@ static inline int ceph_mdsmap_get_state(struct ceph_mdsmap *m, int w) | |||
| 47 | return m->m_info[w].state; | 48 | return m->m_info[w].state; |
| 48 | } | 49 | } |
| 49 | 50 | ||
| 51 | static inline bool ceph_mdsmap_is_laggy(struct ceph_mdsmap *m, int w) | ||
| 52 | { | ||
| 53 | if (w >= 0 && w < m->m_max_mds) | ||
| 54 | return m->m_info[w].laggy; | ||
| 55 | return false; | ||
| 56 | } | ||
| 57 | |||
| 50 | extern int ceph_mdsmap_get_random_mds(struct ceph_mdsmap *m); | 58 | extern int ceph_mdsmap_get_random_mds(struct ceph_mdsmap *m); |
| 51 | extern struct ceph_mdsmap *ceph_mdsmap_decode(void **p, void *end); | 59 | extern struct ceph_mdsmap *ceph_mdsmap_decode(void **p, void *end); |
| 52 | extern void ceph_mdsmap_destroy(struct ceph_mdsmap *m); | 60 | extern void ceph_mdsmap_destroy(struct ceph_mdsmap *m); |
diff --git a/fs/ceph/messenger.c b/fs/ceph/messenger.c index 15167b2daa55..2502d76fcec1 100644 --- a/fs/ceph/messenger.c +++ b/fs/ceph/messenger.c | |||
| @@ -108,7 +108,7 @@ void ceph_msgr_exit(void) | |||
| 108 | destroy_workqueue(ceph_msgr_wq); | 108 | destroy_workqueue(ceph_msgr_wq); |
| 109 | } | 109 | } |
| 110 | 110 | ||
| 111 | void ceph_msgr_flush() | 111 | void ceph_msgr_flush(void) |
| 112 | { | 112 | { |
| 113 | flush_workqueue(ceph_msgr_wq); | 113 | flush_workqueue(ceph_msgr_wq); |
| 114 | } | 114 | } |
| @@ -647,7 +647,7 @@ static void prepare_write_connect(struct ceph_messenger *msgr, | |||
| 647 | dout("prepare_write_connect %p cseq=%d gseq=%d proto=%d\n", con, | 647 | dout("prepare_write_connect %p cseq=%d gseq=%d proto=%d\n", con, |
| 648 | con->connect_seq, global_seq, proto); | 648 | con->connect_seq, global_seq, proto); |
| 649 | 649 | ||
| 650 | con->out_connect.features = cpu_to_le64(CEPH_FEATURE_SUPPORTED_CLIENT); | 650 | con->out_connect.features = cpu_to_le64(CEPH_FEATURE_SUPPORTED); |
| 651 | con->out_connect.host_type = cpu_to_le32(CEPH_ENTITY_TYPE_CLIENT); | 651 | con->out_connect.host_type = cpu_to_le32(CEPH_ENTITY_TYPE_CLIENT); |
| 652 | con->out_connect.connect_seq = cpu_to_le32(con->connect_seq); | 652 | con->out_connect.connect_seq = cpu_to_le32(con->connect_seq); |
| 653 | con->out_connect.global_seq = cpu_to_le32(global_seq); | 653 | con->out_connect.global_seq = cpu_to_le32(global_seq); |
| @@ -1081,11 +1081,11 @@ static int process_banner(struct ceph_connection *con) | |||
| 1081 | sizeof(con->peer_addr)) != 0 && | 1081 | sizeof(con->peer_addr)) != 0 && |
| 1082 | !(addr_is_blank(&con->actual_peer_addr.in_addr) && | 1082 | !(addr_is_blank(&con->actual_peer_addr.in_addr) && |
| 1083 | con->actual_peer_addr.nonce == con->peer_addr.nonce)) { | 1083 | con->actual_peer_addr.nonce == con->peer_addr.nonce)) { |
| 1084 | pr_warning("wrong peer, want %s/%lld, got %s/%lld\n", | 1084 | pr_warning("wrong peer, want %s/%d, got %s/%d\n", |
| 1085 | pr_addr(&con->peer_addr.in_addr), | 1085 | pr_addr(&con->peer_addr.in_addr), |
| 1086 | le64_to_cpu(con->peer_addr.nonce), | 1086 | (int)le32_to_cpu(con->peer_addr.nonce), |
| 1087 | pr_addr(&con->actual_peer_addr.in_addr), | 1087 | pr_addr(&con->actual_peer_addr.in_addr), |
| 1088 | le64_to_cpu(con->actual_peer_addr.nonce)); | 1088 | (int)le32_to_cpu(con->actual_peer_addr.nonce)); |
| 1089 | con->error_msg = "wrong peer at address"; | 1089 | con->error_msg = "wrong peer at address"; |
| 1090 | return -1; | 1090 | return -1; |
| 1091 | } | 1091 | } |
| @@ -1123,8 +1123,8 @@ static void fail_protocol(struct ceph_connection *con) | |||
| 1123 | 1123 | ||
| 1124 | static int process_connect(struct ceph_connection *con) | 1124 | static int process_connect(struct ceph_connection *con) |
| 1125 | { | 1125 | { |
| 1126 | u64 sup_feat = CEPH_FEATURE_SUPPORTED_CLIENT; | 1126 | u64 sup_feat = CEPH_FEATURE_SUPPORTED; |
| 1127 | u64 req_feat = CEPH_FEATURE_REQUIRED_CLIENT; | 1127 | u64 req_feat = CEPH_FEATURE_REQUIRED; |
| 1128 | u64 server_feat = le64_to_cpu(con->in_reply.features); | 1128 | u64 server_feat = le64_to_cpu(con->in_reply.features); |
| 1129 | 1129 | ||
| 1130 | dout("process_connect on %p tag %d\n", con, (int)con->in_tag); | 1130 | dout("process_connect on %p tag %d\n", con, (int)con->in_tag); |
| @@ -1302,8 +1302,8 @@ static void process_ack(struct ceph_connection *con) | |||
| 1302 | 1302 | ||
| 1303 | 1303 | ||
| 1304 | static int read_partial_message_section(struct ceph_connection *con, | 1304 | static int read_partial_message_section(struct ceph_connection *con, |
| 1305 | struct kvec *section, unsigned int sec_len, | 1305 | struct kvec *section, |
| 1306 | u32 *crc) | 1306 | unsigned int sec_len, u32 *crc) |
| 1307 | { | 1307 | { |
| 1308 | int left; | 1308 | int left; |
| 1309 | int ret; | 1309 | int ret; |
| @@ -1434,7 +1434,8 @@ static int read_partial_message(struct ceph_connection *con) | |||
| 1434 | 1434 | ||
| 1435 | /* middle */ | 1435 | /* middle */ |
| 1436 | if (m->middle) { | 1436 | if (m->middle) { |
| 1437 | ret = read_partial_message_section(con, &m->middle->vec, middle_len, | 1437 | ret = read_partial_message_section(con, &m->middle->vec, |
| 1438 | middle_len, | ||
| 1438 | &con->in_middle_crc); | 1439 | &con->in_middle_crc); |
| 1439 | if (ret <= 0) | 1440 | if (ret <= 0) |
| 1440 | return ret; | 1441 | return ret; |
| @@ -1920,7 +1921,7 @@ out: | |||
| 1920 | /* | 1921 | /* |
| 1921 | * in case we faulted due to authentication, invalidate our | 1922 | * in case we faulted due to authentication, invalidate our |
| 1922 | * current tickets so that we can get new ones. | 1923 | * current tickets so that we can get new ones. |
| 1923 | */ | 1924 | */ |
| 1924 | if (con->auth_retry && con->ops->invalidate_authorizer) { | 1925 | if (con->auth_retry && con->ops->invalidate_authorizer) { |
| 1925 | dout("calling invalidate_authorizer()\n"); | 1926 | dout("calling invalidate_authorizer()\n"); |
| 1926 | con->ops->invalidate_authorizer(con); | 1927 | con->ops->invalidate_authorizer(con); |
diff --git a/fs/ceph/mon_client.c b/fs/ceph/mon_client.c index 54fe01c50706..b2a5a3e4a671 100644 --- a/fs/ceph/mon_client.c +++ b/fs/ceph/mon_client.c | |||
| @@ -349,7 +349,7 @@ out: | |||
| 349 | } | 349 | } |
| 350 | 350 | ||
| 351 | /* | 351 | /* |
| 352 | * statfs | 352 | * generic requests (e.g., statfs, poolop) |
| 353 | */ | 353 | */ |
| 354 | static struct ceph_mon_generic_request *__lookup_generic_req( | 354 | static struct ceph_mon_generic_request *__lookup_generic_req( |
| 355 | struct ceph_mon_client *monc, u64 tid) | 355 | struct ceph_mon_client *monc, u64 tid) |
| @@ -442,6 +442,35 @@ static struct ceph_msg *get_generic_reply(struct ceph_connection *con, | |||
| 442 | return m; | 442 | return m; |
| 443 | } | 443 | } |
| 444 | 444 | ||
| 445 | static int do_generic_request(struct ceph_mon_client *monc, | ||
| 446 | struct ceph_mon_generic_request *req) | ||
| 447 | { | ||
| 448 | int err; | ||
| 449 | |||
| 450 | /* register request */ | ||
| 451 | mutex_lock(&monc->mutex); | ||
| 452 | req->tid = ++monc->last_tid; | ||
| 453 | req->request->hdr.tid = cpu_to_le64(req->tid); | ||
| 454 | __insert_generic_request(monc, req); | ||
| 455 | monc->num_generic_requests++; | ||
| 456 | ceph_con_send(monc->con, ceph_msg_get(req->request)); | ||
| 457 | mutex_unlock(&monc->mutex); | ||
| 458 | |||
| 459 | err = wait_for_completion_interruptible(&req->completion); | ||
| 460 | |||
| 461 | mutex_lock(&monc->mutex); | ||
| 462 | rb_erase(&req->node, &monc->generic_request_tree); | ||
| 463 | monc->num_generic_requests--; | ||
| 464 | mutex_unlock(&monc->mutex); | ||
| 465 | |||
| 466 | if (!err) | ||
| 467 | err = req->result; | ||
| 468 | return err; | ||
| 469 | } | ||
| 470 | |||
| 471 | /* | ||
| 472 | * statfs | ||
| 473 | */ | ||
| 445 | static void handle_statfs_reply(struct ceph_mon_client *monc, | 474 | static void handle_statfs_reply(struct ceph_mon_client *monc, |
| 446 | struct ceph_msg *msg) | 475 | struct ceph_msg *msg) |
| 447 | { | 476 | { |
| @@ -468,7 +497,7 @@ static void handle_statfs_reply(struct ceph_mon_client *monc, | |||
| 468 | return; | 497 | return; |
| 469 | 498 | ||
| 470 | bad: | 499 | bad: |
| 471 | pr_err("corrupt generic reply, no tid\n"); | 500 | pr_err("corrupt generic reply, tid %llu\n", tid); |
| 472 | ceph_msg_dump(msg); | 501 | ceph_msg_dump(msg); |
| 473 | } | 502 | } |
| 474 | 503 | ||
| @@ -487,6 +516,7 @@ int ceph_monc_do_statfs(struct ceph_mon_client *monc, struct ceph_statfs *buf) | |||
| 487 | 516 | ||
| 488 | kref_init(&req->kref); | 517 | kref_init(&req->kref); |
| 489 | req->buf = buf; | 518 | req->buf = buf; |
| 519 | req->buf_len = sizeof(*buf); | ||
| 490 | init_completion(&req->completion); | 520 | init_completion(&req->completion); |
| 491 | 521 | ||
| 492 | err = -ENOMEM; | 522 | err = -ENOMEM; |
| @@ -504,33 +534,134 @@ int ceph_monc_do_statfs(struct ceph_mon_client *monc, struct ceph_statfs *buf) | |||
| 504 | h->monhdr.session_mon_tid = 0; | 534 | h->monhdr.session_mon_tid = 0; |
| 505 | h->fsid = monc->monmap->fsid; | 535 | h->fsid = monc->monmap->fsid; |
| 506 | 536 | ||
| 507 | /* register request */ | 537 | err = do_generic_request(monc, req); |
| 508 | mutex_lock(&monc->mutex); | ||
| 509 | req->tid = ++monc->last_tid; | ||
| 510 | req->request->hdr.tid = cpu_to_le64(req->tid); | ||
| 511 | __insert_generic_request(monc, req); | ||
| 512 | monc->num_generic_requests++; | ||
| 513 | mutex_unlock(&monc->mutex); | ||
| 514 | 538 | ||
| 515 | /* send request and wait */ | 539 | out: |
| 516 | ceph_con_send(monc->con, ceph_msg_get(req->request)); | 540 | kref_put(&req->kref, release_generic_request); |
| 517 | err = wait_for_completion_interruptible(&req->completion); | 541 | return err; |
| 542 | } | ||
| 543 | |||
| 544 | /* | ||
| 545 | * pool ops | ||
| 546 | */ | ||
| 547 | static int get_poolop_reply_buf(const char *src, size_t src_len, | ||
| 548 | char *dst, size_t dst_len) | ||
| 549 | { | ||
| 550 | u32 buf_len; | ||
| 551 | |||
| 552 | if (src_len != sizeof(u32) + dst_len) | ||
| 553 | return -EINVAL; | ||
| 554 | |||
| 555 | buf_len = le32_to_cpu(*(u32 *)src); | ||
| 556 | if (buf_len != dst_len) | ||
| 557 | return -EINVAL; | ||
| 558 | |||
| 559 | memcpy(dst, src + sizeof(u32), dst_len); | ||
| 560 | return 0; | ||
| 561 | } | ||
| 562 | |||
| 563 | static void handle_poolop_reply(struct ceph_mon_client *monc, | ||
| 564 | struct ceph_msg *msg) | ||
| 565 | { | ||
| 566 | struct ceph_mon_generic_request *req; | ||
| 567 | struct ceph_mon_poolop_reply *reply = msg->front.iov_base; | ||
| 568 | u64 tid = le64_to_cpu(msg->hdr.tid); | ||
| 569 | |||
| 570 | if (msg->front.iov_len < sizeof(*reply)) | ||
| 571 | goto bad; | ||
| 572 | dout("handle_poolop_reply %p tid %llu\n", msg, tid); | ||
| 518 | 573 | ||
| 519 | mutex_lock(&monc->mutex); | 574 | mutex_lock(&monc->mutex); |
| 520 | rb_erase(&req->node, &monc->generic_request_tree); | 575 | req = __lookup_generic_req(monc, tid); |
| 521 | monc->num_generic_requests--; | 576 | if (req) { |
| 577 | if (req->buf_len && | ||
| 578 | get_poolop_reply_buf(msg->front.iov_base + sizeof(*reply), | ||
| 579 | msg->front.iov_len - sizeof(*reply), | ||
| 580 | req->buf, req->buf_len) < 0) { | ||
| 581 | mutex_unlock(&monc->mutex); | ||
| 582 | goto bad; | ||
| 583 | } | ||
| 584 | req->result = le32_to_cpu(reply->reply_code); | ||
| 585 | get_generic_request(req); | ||
| 586 | } | ||
| 522 | mutex_unlock(&monc->mutex); | 587 | mutex_unlock(&monc->mutex); |
| 588 | if (req) { | ||
| 589 | complete(&req->completion); | ||
| 590 | put_generic_request(req); | ||
| 591 | } | ||
| 592 | return; | ||
| 523 | 593 | ||
| 524 | if (!err) | 594 | bad: |
| 525 | err = req->result; | 595 | pr_err("corrupt generic reply, tid %llu\n", tid); |
| 596 | ceph_msg_dump(msg); | ||
| 597 | } | ||
| 598 | |||
| 599 | /* | ||
| 600 | * Do a synchronous pool op. | ||
| 601 | */ | ||
| 602 | int ceph_monc_do_poolop(struct ceph_mon_client *monc, u32 op, | ||
| 603 | u32 pool, u64 snapid, | ||
| 604 | char *buf, int len) | ||
| 605 | { | ||
| 606 | struct ceph_mon_generic_request *req; | ||
| 607 | struct ceph_mon_poolop *h; | ||
| 608 | int err; | ||
| 609 | |||
| 610 | req = kzalloc(sizeof(*req), GFP_NOFS); | ||
| 611 | if (!req) | ||
| 612 | return -ENOMEM; | ||
| 613 | |||
| 614 | kref_init(&req->kref); | ||
| 615 | req->buf = buf; | ||
| 616 | req->buf_len = len; | ||
| 617 | init_completion(&req->completion); | ||
| 618 | |||
| 619 | err = -ENOMEM; | ||
| 620 | req->request = ceph_msg_new(CEPH_MSG_POOLOP, sizeof(*h), GFP_NOFS); | ||
| 621 | if (!req->request) | ||
| 622 | goto out; | ||
| 623 | req->reply = ceph_msg_new(CEPH_MSG_POOLOP_REPLY, 1024, GFP_NOFS); | ||
| 624 | if (!req->reply) | ||
| 625 | goto out; | ||
| 626 | |||
| 627 | /* fill out request */ | ||
| 628 | req->request->hdr.version = cpu_to_le16(2); | ||
| 629 | h = req->request->front.iov_base; | ||
| 630 | h->monhdr.have_version = 0; | ||
| 631 | h->monhdr.session_mon = cpu_to_le16(-1); | ||
| 632 | h->monhdr.session_mon_tid = 0; | ||
| 633 | h->fsid = monc->monmap->fsid; | ||
| 634 | h->pool = cpu_to_le32(pool); | ||
| 635 | h->op = cpu_to_le32(op); | ||
| 636 | h->auid = 0; | ||
| 637 | h->snapid = cpu_to_le64(snapid); | ||
| 638 | h->name_len = 0; | ||
| 639 | |||
| 640 | err = do_generic_request(monc, req); | ||
| 526 | 641 | ||
| 527 | out: | 642 | out: |
| 528 | kref_put(&req->kref, release_generic_request); | 643 | kref_put(&req->kref, release_generic_request); |
| 529 | return err; | 644 | return err; |
| 530 | } | 645 | } |
| 531 | 646 | ||
| 647 | int ceph_monc_create_snapid(struct ceph_mon_client *monc, | ||
| 648 | u32 pool, u64 *snapid) | ||
| 649 | { | ||
| 650 | return ceph_monc_do_poolop(monc, POOL_OP_CREATE_UNMANAGED_SNAP, | ||
| 651 | pool, 0, (char *)snapid, sizeof(*snapid)); | ||
| 652 | |||
| 653 | } | ||
| 654 | |||
| 655 | int ceph_monc_delete_snapid(struct ceph_mon_client *monc, | ||
| 656 | u32 pool, u64 snapid) | ||
| 657 | { | ||
| 658 | return ceph_monc_do_poolop(monc, POOL_OP_CREATE_UNMANAGED_SNAP, | ||
| 659 | pool, snapid, 0, 0); | ||
| 660 | |||
| 661 | } | ||
| 662 | |||
| 532 | /* | 663 | /* |
| 533 | * Resend pending statfs requests. | 664 | * Resend pending generic requests. |
| 534 | */ | 665 | */ |
| 535 | static void __resend_generic_request(struct ceph_mon_client *monc) | 666 | static void __resend_generic_request(struct ceph_mon_client *monc) |
| 536 | { | 667 | { |
| @@ -783,6 +914,10 @@ static void dispatch(struct ceph_connection *con, struct ceph_msg *msg) | |||
| 783 | handle_statfs_reply(monc, msg); | 914 | handle_statfs_reply(monc, msg); |
| 784 | break; | 915 | break; |
| 785 | 916 | ||
| 917 | case CEPH_MSG_POOLOP_REPLY: | ||
| 918 | handle_poolop_reply(monc, msg); | ||
| 919 | break; | ||
| 920 | |||
| 786 | case CEPH_MSG_MON_MAP: | 921 | case CEPH_MSG_MON_MAP: |
| 787 | ceph_monc_handle_map(monc, msg); | 922 | ceph_monc_handle_map(monc, msg); |
| 788 | break; | 923 | break; |
| @@ -820,6 +955,7 @@ static struct ceph_msg *mon_alloc_msg(struct ceph_connection *con, | |||
| 820 | case CEPH_MSG_MON_SUBSCRIBE_ACK: | 955 | case CEPH_MSG_MON_SUBSCRIBE_ACK: |
| 821 | m = ceph_msg_get(monc->m_subscribe_ack); | 956 | m = ceph_msg_get(monc->m_subscribe_ack); |
| 822 | break; | 957 | break; |
| 958 | case CEPH_MSG_POOLOP_REPLY: | ||
| 823 | case CEPH_MSG_STATFS_REPLY: | 959 | case CEPH_MSG_STATFS_REPLY: |
| 824 | return get_generic_reply(con, hdr, skip); | 960 | return get_generic_reply(con, hdr, skip); |
| 825 | case CEPH_MSG_AUTH_REPLY: | 961 | case CEPH_MSG_AUTH_REPLY: |
diff --git a/fs/ceph/mon_client.h b/fs/ceph/mon_client.h index 174d794321d0..8e396f2c0963 100644 --- a/fs/ceph/mon_client.h +++ b/fs/ceph/mon_client.h | |||
| @@ -50,6 +50,7 @@ struct ceph_mon_generic_request { | |||
| 50 | struct rb_node node; | 50 | struct rb_node node; |
| 51 | int result; | 51 | int result; |
| 52 | void *buf; | 52 | void *buf; |
| 53 | int buf_len; | ||
| 53 | struct completion completion; | 54 | struct completion completion; |
| 54 | struct ceph_msg *request; /* original request */ | 55 | struct ceph_msg *request; /* original request */ |
| 55 | struct ceph_msg *reply; /* and reply */ | 56 | struct ceph_msg *reply; /* and reply */ |
| @@ -111,6 +112,10 @@ extern int ceph_monc_open_session(struct ceph_mon_client *monc); | |||
| 111 | 112 | ||
| 112 | extern int ceph_monc_validate_auth(struct ceph_mon_client *monc); | 113 | extern int ceph_monc_validate_auth(struct ceph_mon_client *monc); |
| 113 | 114 | ||
| 115 | extern int ceph_monc_create_snapid(struct ceph_mon_client *monc, | ||
| 116 | u32 pool, u64 *snapid); | ||
| 114 | 117 | ||
| 118 | extern int ceph_monc_delete_snapid(struct ceph_mon_client *monc, | ||
| 119 | u32 pool, u64 snapid); | ||
| 115 | 120 | ||
| 116 | #endif | 121 | #endif |
diff --git a/fs/ceph/msgr.h b/fs/ceph/msgr.h index 892a0298dfdf..680d3d648cac 100644 --- a/fs/ceph/msgr.h +++ b/fs/ceph/msgr.h | |||
| @@ -1,5 +1,5 @@ | |||
| 1 | #ifndef __MSGR_H | 1 | #ifndef CEPH_MSGR_H |
| 2 | #define __MSGR_H | 2 | #define CEPH_MSGR_H |
| 3 | 3 | ||
| 4 | /* | 4 | /* |
| 5 | * Data types for message passing layer used by Ceph. | 5 | * Data types for message passing layer used by Ceph. |
diff --git a/fs/ceph/osd_client.c b/fs/ceph/osd_client.c index e38522347898..bed6391e52c7 100644 --- a/fs/ceph/osd_client.c +++ b/fs/ceph/osd_client.c | |||
| @@ -1276,8 +1276,6 @@ int ceph_osdc_readpages(struct ceph_osd_client *osdc, | |||
| 1276 | 1276 | ||
| 1277 | /* it may be a short read due to an object boundary */ | 1277 | /* it may be a short read due to an object boundary */ |
| 1278 | req->r_pages = pages; | 1278 | req->r_pages = pages; |
| 1279 | num_pages = calc_pages_for(off, *plen); | ||
| 1280 | req->r_num_pages = num_pages; | ||
| 1281 | 1279 | ||
| 1282 | dout("readpages final extent is %llu~%llu (%d pages)\n", | 1280 | dout("readpages final extent is %llu~%llu (%d pages)\n", |
| 1283 | off, *plen, req->r_num_pages); | 1281 | off, *plen, req->r_num_pages); |
| @@ -1319,7 +1317,6 @@ int ceph_osdc_writepages(struct ceph_osd_client *osdc, struct ceph_vino vino, | |||
| 1319 | 1317 | ||
| 1320 | /* it may be a short write due to an object boundary */ | 1318 | /* it may be a short write due to an object boundary */ |
| 1321 | req->r_pages = pages; | 1319 | req->r_pages = pages; |
| 1322 | req->r_num_pages = calc_pages_for(off, len); | ||
| 1323 | dout("writepages %llu~%llu (%d pages)\n", off, len, | 1320 | dout("writepages %llu~%llu (%d pages)\n", off, len, |
| 1324 | req->r_num_pages); | 1321 | req->r_num_pages); |
| 1325 | 1322 | ||
| @@ -1476,8 +1473,8 @@ static void put_osd_con(struct ceph_connection *con) | |||
| 1476 | * authentication | 1473 | * authentication |
| 1477 | */ | 1474 | */ |
| 1478 | static int get_authorizer(struct ceph_connection *con, | 1475 | static int get_authorizer(struct ceph_connection *con, |
| 1479 | void **buf, int *len, int *proto, | 1476 | void **buf, int *len, int *proto, |
| 1480 | void **reply_buf, int *reply_len, int force_new) | 1477 | void **reply_buf, int *reply_len, int force_new) |
| 1481 | { | 1478 | { |
| 1482 | struct ceph_osd *o = con->private; | 1479 | struct ceph_osd *o = con->private; |
| 1483 | struct ceph_osd_client *osdc = o->o_osdc; | 1480 | struct ceph_osd_client *osdc = o->o_osdc; |
| @@ -1497,7 +1494,7 @@ static int get_authorizer(struct ceph_connection *con, | |||
| 1497 | &o->o_authorizer_reply_buf, | 1494 | &o->o_authorizer_reply_buf, |
| 1498 | &o->o_authorizer_reply_buf_len); | 1495 | &o->o_authorizer_reply_buf_len); |
| 1499 | if (ret) | 1496 | if (ret) |
| 1500 | return ret; | 1497 | return ret; |
| 1501 | } | 1498 | } |
| 1502 | 1499 | ||
| 1503 | *proto = ac->protocol; | 1500 | *proto = ac->protocol; |
diff --git a/fs/ceph/osdmap.c b/fs/ceph/osdmap.c index 416d46adbf87..e31f118f1392 100644 --- a/fs/ceph/osdmap.c +++ b/fs/ceph/osdmap.c | |||
| @@ -424,12 +424,30 @@ static void __remove_pg_pool(struct rb_root *root, struct ceph_pg_pool_info *pi) | |||
| 424 | kfree(pi); | 424 | kfree(pi); |
| 425 | } | 425 | } |
| 426 | 426 | ||
| 427 | void __decode_pool(void **p, struct ceph_pg_pool_info *pi) | 427 | static int __decode_pool(void **p, void *end, struct ceph_pg_pool_info *pi) |
| 428 | { | 428 | { |
| 429 | unsigned n, m; | ||
| 430 | |||
| 429 | ceph_decode_copy(p, &pi->v, sizeof(pi->v)); | 431 | ceph_decode_copy(p, &pi->v, sizeof(pi->v)); |
| 430 | calc_pg_masks(pi); | 432 | calc_pg_masks(pi); |
| 431 | *p += le32_to_cpu(pi->v.num_snaps) * sizeof(u64); | 433 | |
| 434 | /* num_snaps * snap_info_t */ | ||
| 435 | n = le32_to_cpu(pi->v.num_snaps); | ||
| 436 | while (n--) { | ||
| 437 | ceph_decode_need(p, end, sizeof(u64) + 1 + sizeof(u64) + | ||
| 438 | sizeof(struct ceph_timespec), bad); | ||
| 439 | *p += sizeof(u64) + /* key */ | ||
| 440 | 1 + sizeof(u64) + /* u8, snapid */ | ||
| 441 | sizeof(struct ceph_timespec); | ||
| 442 | m = ceph_decode_32(p); /* snap name */ | ||
| 443 | *p += m; | ||
| 444 | } | ||
| 445 | |||
| 432 | *p += le32_to_cpu(pi->v.num_removed_snap_intervals) * sizeof(u64) * 2; | 446 | *p += le32_to_cpu(pi->v.num_removed_snap_intervals) * sizeof(u64) * 2; |
| 447 | return 0; | ||
| 448 | |||
| 449 | bad: | ||
| 450 | return -EINVAL; | ||
| 433 | } | 451 | } |
| 434 | 452 | ||
| 435 | static int __decode_pool_names(void **p, void *end, struct ceph_osdmap *map) | 453 | static int __decode_pool_names(void **p, void *end, struct ceph_osdmap *map) |
| @@ -571,7 +589,9 @@ struct ceph_osdmap *osdmap_decode(void **p, void *end) | |||
| 571 | kfree(pi); | 589 | kfree(pi); |
| 572 | goto bad; | 590 | goto bad; |
| 573 | } | 591 | } |
| 574 | __decode_pool(p, pi); | 592 | err = __decode_pool(p, end, pi); |
| 593 | if (err < 0) | ||
| 594 | goto bad; | ||
| 575 | __insert_pg_pool(&map->pg_pools, pi); | 595 | __insert_pg_pool(&map->pg_pools, pi); |
| 576 | } | 596 | } |
| 577 | 597 | ||
| @@ -760,7 +780,9 @@ struct ceph_osdmap *osdmap_apply_incremental(void **p, void *end, | |||
| 760 | pi->id = pool; | 780 | pi->id = pool; |
| 761 | __insert_pg_pool(&map->pg_pools, pi); | 781 | __insert_pg_pool(&map->pg_pools, pi); |
| 762 | } | 782 | } |
| 763 | __decode_pool(p, pi); | 783 | err = __decode_pool(p, end, pi); |
| 784 | if (err < 0) | ||
| 785 | goto bad; | ||
| 764 | } | 786 | } |
| 765 | if (version >= 5 && __decode_pool_names(p, end, map) < 0) | 787 | if (version >= 5 && __decode_pool_names(p, end, map) < 0) |
| 766 | goto bad; | 788 | goto bad; |
| @@ -833,7 +855,7 @@ struct ceph_osdmap *osdmap_apply_incremental(void **p, void *end, | |||
| 833 | node)->pgid, pgid) <= 0) { | 855 | node)->pgid, pgid) <= 0) { |
| 834 | struct ceph_pg_mapping *cur = | 856 | struct ceph_pg_mapping *cur = |
| 835 | rb_entry(rbp, struct ceph_pg_mapping, node); | 857 | rb_entry(rbp, struct ceph_pg_mapping, node); |
| 836 | 858 | ||
| 837 | rbp = rb_next(rbp); | 859 | rbp = rb_next(rbp); |
| 838 | dout(" removed pg_temp %llx\n", *(u64 *)&cur->pgid); | 860 | dout(" removed pg_temp %llx\n", *(u64 *)&cur->pgid); |
| 839 | rb_erase(&cur->node, &map->pg_temp); | 861 | rb_erase(&cur->node, &map->pg_temp); |
| @@ -1026,8 +1048,9 @@ static int *calc_pg_raw(struct ceph_osdmap *osdmap, struct ceph_pg pgid, | |||
| 1026 | ruleno = crush_find_rule(osdmap->crush, pool->v.crush_ruleset, | 1048 | ruleno = crush_find_rule(osdmap->crush, pool->v.crush_ruleset, |
| 1027 | pool->v.type, pool->v.size); | 1049 | pool->v.type, pool->v.size); |
| 1028 | if (ruleno < 0) { | 1050 | if (ruleno < 0) { |
| 1029 | pr_err("no crush rule pool %d type %d size %d\n", | 1051 | pr_err("no crush rule pool %d ruleset %d type %d size %d\n", |
| 1030 | poolid, pool->v.type, pool->v.size); | 1052 | poolid, pool->v.crush_ruleset, pool->v.type, |
| 1053 | pool->v.size); | ||
| 1031 | return NULL; | 1054 | return NULL; |
| 1032 | } | 1055 | } |
| 1033 | 1056 | ||
diff --git a/fs/ceph/rados.h b/fs/ceph/rados.h index 8fcc023056c7..6d5247f2e81b 100644 --- a/fs/ceph/rados.h +++ b/fs/ceph/rados.h | |||
| @@ -1,5 +1,5 @@ | |||
| 1 | #ifndef __RADOS_H | 1 | #ifndef CEPH_RADOS_H |
| 2 | #define __RADOS_H | 2 | #define CEPH_RADOS_H |
| 3 | 3 | ||
| 4 | /* | 4 | /* |
| 5 | * Data types for the Ceph distributed object storage layer RADOS | 5 | * Data types for the Ceph distributed object storage layer RADOS |
| @@ -203,6 +203,7 @@ enum { | |||
| 203 | CEPH_OSD_OP_TMAPGET = CEPH_OSD_OP_MODE_RD | CEPH_OSD_OP_TYPE_DATA | 12, | 203 | CEPH_OSD_OP_TMAPGET = CEPH_OSD_OP_MODE_RD | CEPH_OSD_OP_TYPE_DATA | 12, |
| 204 | 204 | ||
| 205 | CEPH_OSD_OP_CREATE = CEPH_OSD_OP_MODE_WR | CEPH_OSD_OP_TYPE_DATA | 13, | 205 | CEPH_OSD_OP_CREATE = CEPH_OSD_OP_MODE_WR | CEPH_OSD_OP_TYPE_DATA | 13, |
| 206 | CEPH_OSD_OP_ROLLBACK= CEPH_OSD_OP_MODE_WR | CEPH_OSD_OP_TYPE_DATA | 14, | ||
| 206 | 207 | ||
| 207 | /** attrs **/ | 208 | /** attrs **/ |
| 208 | /* read */ | 209 | /* read */ |
| @@ -272,6 +273,10 @@ static inline int ceph_osd_op_mode_modify(int op) | |||
| 272 | return (op & CEPH_OSD_OP_MODE) == CEPH_OSD_OP_MODE_WR; | 273 | return (op & CEPH_OSD_OP_MODE) == CEPH_OSD_OP_MODE_WR; |
| 273 | } | 274 | } |
| 274 | 275 | ||
| 276 | /* | ||
| 277 | * note that the following tmap stuff is also defined in the ceph librados.h | ||
| 278 | * any modification here needs to be updated there | ||
| 279 | */ | ||
| 275 | #define CEPH_OSD_TMAP_HDR 'h' | 280 | #define CEPH_OSD_TMAP_HDR 'h' |
| 276 | #define CEPH_OSD_TMAP_SET 's' | 281 | #define CEPH_OSD_TMAP_SET 's' |
| 277 | #define CEPH_OSD_TMAP_RM 'r' | 282 | #define CEPH_OSD_TMAP_RM 'r' |
| @@ -297,6 +302,7 @@ enum { | |||
| 297 | CEPH_OSD_FLAG_PARALLELEXEC = 512, /* execute op in parallel */ | 302 | CEPH_OSD_FLAG_PARALLELEXEC = 512, /* execute op in parallel */ |
| 298 | CEPH_OSD_FLAG_PGOP = 1024, /* pg op, no object */ | 303 | CEPH_OSD_FLAG_PGOP = 1024, /* pg op, no object */ |
| 299 | CEPH_OSD_FLAG_EXEC = 2048, /* op may exec */ | 304 | CEPH_OSD_FLAG_EXEC = 2048, /* op may exec */ |
| 305 | CEPH_OSD_FLAG_EXEC_PUBLIC = 4096, /* op may exec (public) */ | ||
| 300 | }; | 306 | }; |
| 301 | 307 | ||
| 302 | enum { | 308 | enum { |
| @@ -350,6 +356,9 @@ struct ceph_osd_op { | |||
| 350 | struct { | 356 | struct { |
| 351 | __le64 cookie, count; | 357 | __le64 cookie, count; |
| 352 | } __attribute__ ((packed)) pgls; | 358 | } __attribute__ ((packed)) pgls; |
| 359 | struct { | ||
| 360 | __le64 snapid; | ||
| 361 | } __attribute__ ((packed)) snap; | ||
| 353 | }; | 362 | }; |
| 354 | __le32 payload_len; | 363 | __le32 payload_len; |
| 355 | } __attribute__ ((packed)); | 364 | } __attribute__ ((packed)); |
diff --git a/fs/ceph/super.c b/fs/ceph/super.c index fa87f51e38e1..9922628532b2 100644 --- a/fs/ceph/super.c +++ b/fs/ceph/super.c | |||
| @@ -2,6 +2,7 @@ | |||
| 2 | #include "ceph_debug.h" | 2 | #include "ceph_debug.h" |
| 3 | 3 | ||
| 4 | #include <linux/backing-dev.h> | 4 | #include <linux/backing-dev.h> |
| 5 | #include <linux/ctype.h> | ||
| 5 | #include <linux/fs.h> | 6 | #include <linux/fs.h> |
| 6 | #include <linux/inet.h> | 7 | #include <linux/inet.h> |
| 7 | #include <linux/in6.h> | 8 | #include <linux/in6.h> |
| @@ -101,12 +102,21 @@ static int ceph_statfs(struct dentry *dentry, struct kstatfs *buf) | |||
| 101 | } | 102 | } |
| 102 | 103 | ||
| 103 | 104 | ||
| 104 | static int ceph_syncfs(struct super_block *sb, int wait) | 105 | static int ceph_sync_fs(struct super_block *sb, int wait) |
| 105 | { | 106 | { |
| 106 | dout("sync_fs %d\n", wait); | 107 | struct ceph_client *client = ceph_sb_to_client(sb); |
| 108 | |||
| 109 | if (!wait) { | ||
| 110 | dout("sync_fs (non-blocking)\n"); | ||
| 111 | ceph_flush_dirty_caps(&client->mdsc); | ||
| 112 | dout("sync_fs (non-blocking) done\n"); | ||
| 113 | return 0; | ||
| 114 | } | ||
| 115 | |||
| 116 | dout("sync_fs (blocking)\n"); | ||
| 107 | ceph_osdc_sync(&ceph_sb_to_client(sb)->osdc); | 117 | ceph_osdc_sync(&ceph_sb_to_client(sb)->osdc); |
| 108 | ceph_mdsc_sync(&ceph_sb_to_client(sb)->mdsc); | 118 | ceph_mdsc_sync(&ceph_sb_to_client(sb)->mdsc); |
| 109 | dout("sync_fs %d done\n", wait); | 119 | dout("sync_fs (blocking) done\n"); |
| 110 | return 0; | 120 | return 0; |
| 111 | } | 121 | } |
| 112 | 122 | ||
| @@ -150,9 +160,7 @@ static int ceph_show_options(struct seq_file *m, struct vfsmount *mnt) | |||
| 150 | struct ceph_mount_args *args = client->mount_args; | 160 | struct ceph_mount_args *args = client->mount_args; |
| 151 | 161 | ||
| 152 | if (args->flags & CEPH_OPT_FSID) | 162 | if (args->flags & CEPH_OPT_FSID) |
| 153 | seq_printf(m, ",fsidmajor=%llu,fsidminor%llu", | 163 | seq_printf(m, ",fsid=%pU", &args->fsid); |
| 154 | le64_to_cpu(*(__le64 *)&args->fsid.fsid[0]), | ||
| 155 | le64_to_cpu(*(__le64 *)&args->fsid.fsid[8])); | ||
| 156 | if (args->flags & CEPH_OPT_NOSHARE) | 164 | if (args->flags & CEPH_OPT_NOSHARE) |
| 157 | seq_puts(m, ",noshare"); | 165 | seq_puts(m, ",noshare"); |
| 158 | if (args->flags & CEPH_OPT_DIRSTAT) | 166 | if (args->flags & CEPH_OPT_DIRSTAT) |
| @@ -279,7 +287,7 @@ static const struct super_operations ceph_super_ops = { | |||
| 279 | .alloc_inode = ceph_alloc_inode, | 287 | .alloc_inode = ceph_alloc_inode, |
| 280 | .destroy_inode = ceph_destroy_inode, | 288 | .destroy_inode = ceph_destroy_inode, |
| 281 | .write_inode = ceph_write_inode, | 289 | .write_inode = ceph_write_inode, |
| 282 | .sync_fs = ceph_syncfs, | 290 | .sync_fs = ceph_sync_fs, |
| 283 | .put_super = ceph_put_super, | 291 | .put_super = ceph_put_super, |
| 284 | .show_options = ceph_show_options, | 292 | .show_options = ceph_show_options, |
| 285 | .statfs = ceph_statfs, | 293 | .statfs = ceph_statfs, |
| @@ -322,9 +330,6 @@ const char *ceph_msg_type_name(int type) | |||
| 322 | * mount options | 330 | * mount options |
| 323 | */ | 331 | */ |
| 324 | enum { | 332 | enum { |
| 325 | Opt_fsidmajor, | ||
| 326 | Opt_fsidminor, | ||
| 327 | Opt_monport, | ||
| 328 | Opt_wsize, | 333 | Opt_wsize, |
| 329 | Opt_rsize, | 334 | Opt_rsize, |
| 330 | Opt_osdtimeout, | 335 | Opt_osdtimeout, |
| @@ -339,6 +344,7 @@ enum { | |||
| 339 | Opt_congestion_kb, | 344 | Opt_congestion_kb, |
| 340 | Opt_last_int, | 345 | Opt_last_int, |
| 341 | /* int args above */ | 346 | /* int args above */ |
| 347 | Opt_fsid, | ||
| 342 | Opt_snapdirname, | 348 | Opt_snapdirname, |
| 343 | Opt_name, | 349 | Opt_name, |
| 344 | Opt_secret, | 350 | Opt_secret, |
| @@ -355,9 +361,6 @@ enum { | |||
| 355 | }; | 361 | }; |
| 356 | 362 | ||
| 357 | static match_table_t arg_tokens = { | 363 | static match_table_t arg_tokens = { |
| 358 | {Opt_fsidmajor, "fsidmajor=%ld"}, | ||
| 359 | {Opt_fsidminor, "fsidminor=%ld"}, | ||
| 360 | {Opt_monport, "monport=%d"}, | ||
| 361 | {Opt_wsize, "wsize=%d"}, | 364 | {Opt_wsize, "wsize=%d"}, |
| 362 | {Opt_rsize, "rsize=%d"}, | 365 | {Opt_rsize, "rsize=%d"}, |
| 363 | {Opt_osdtimeout, "osdtimeout=%d"}, | 366 | {Opt_osdtimeout, "osdtimeout=%d"}, |
| @@ -371,6 +374,7 @@ static match_table_t arg_tokens = { | |||
| 371 | {Opt_readdir_max_bytes, "readdir_max_bytes=%d"}, | 374 | {Opt_readdir_max_bytes, "readdir_max_bytes=%d"}, |
| 372 | {Opt_congestion_kb, "write_congestion_kb=%d"}, | 375 | {Opt_congestion_kb, "write_congestion_kb=%d"}, |
| 373 | /* int args above */ | 376 | /* int args above */ |
| 377 | {Opt_fsid, "fsid=%s"}, | ||
| 374 | {Opt_snapdirname, "snapdirname=%s"}, | 378 | {Opt_snapdirname, "snapdirname=%s"}, |
| 375 | {Opt_name, "name=%s"}, | 379 | {Opt_name, "name=%s"}, |
| 376 | {Opt_secret, "secret=%s"}, | 380 | {Opt_secret, "secret=%s"}, |
| @@ -386,6 +390,36 @@ static match_table_t arg_tokens = { | |||
| 386 | {-1, NULL} | 390 | {-1, NULL} |
| 387 | }; | 391 | }; |
| 388 | 392 | ||
| 393 | static int parse_fsid(const char *str, struct ceph_fsid *fsid) | ||
| 394 | { | ||
| 395 | int i = 0; | ||
| 396 | char tmp[3]; | ||
| 397 | int err = -EINVAL; | ||
| 398 | int d; | ||
| 399 | |||
| 400 | dout("parse_fsid '%s'\n", str); | ||
| 401 | tmp[2] = 0; | ||
| 402 | while (*str && i < 16) { | ||
| 403 | if (ispunct(*str)) { | ||
| 404 | str++; | ||
| 405 | continue; | ||
| 406 | } | ||
| 407 | if (!isxdigit(str[0]) || !isxdigit(str[1])) | ||
| 408 | break; | ||
| 409 | tmp[0] = str[0]; | ||
| 410 | tmp[1] = str[1]; | ||
| 411 | if (sscanf(tmp, "%x", &d) < 1) | ||
| 412 | break; | ||
| 413 | fsid->fsid[i] = d & 0xff; | ||
| 414 | i++; | ||
| 415 | str += 2; | ||
| 416 | } | ||
| 417 | |||
| 418 | if (i == 16) | ||
| 419 | err = 0; | ||
| 420 | dout("parse_fsid ret %d got fsid %pU", err, fsid); | ||
| 421 | return err; | ||
| 422 | } | ||
| 389 | 423 | ||
| 390 | static struct ceph_mount_args *parse_mount_args(int flags, char *options, | 424 | static struct ceph_mount_args *parse_mount_args(int flags, char *options, |
| 391 | const char *dev_name, | 425 | const char *dev_name, |
| @@ -469,12 +503,6 @@ static struct ceph_mount_args *parse_mount_args(int flags, char *options, | |||
| 469 | dout("got token %d\n", token); | 503 | dout("got token %d\n", token); |
| 470 | } | 504 | } |
| 471 | switch (token) { | 505 | switch (token) { |
| 472 | case Opt_fsidmajor: | ||
| 473 | *(__le64 *)&args->fsid.fsid[0] = cpu_to_le64(intval); | ||
| 474 | break; | ||
| 475 | case Opt_fsidminor: | ||
| 476 | *(__le64 *)&args->fsid.fsid[8] = cpu_to_le64(intval); | ||
| 477 | break; | ||
| 478 | case Opt_ip: | 506 | case Opt_ip: |
| 479 | err = ceph_parse_ips(argstr[0].from, | 507 | err = ceph_parse_ips(argstr[0].from, |
| 480 | argstr[0].to, | 508 | argstr[0].to, |
| @@ -485,6 +513,11 @@ static struct ceph_mount_args *parse_mount_args(int flags, char *options, | |||
| 485 | args->flags |= CEPH_OPT_MYIP; | 513 | args->flags |= CEPH_OPT_MYIP; |
| 486 | break; | 514 | break; |
| 487 | 515 | ||
| 516 | case Opt_fsid: | ||
| 517 | err = parse_fsid(argstr[0].from, &args->fsid); | ||
| 518 | if (err == 0) | ||
| 519 | args->flags |= CEPH_OPT_FSID; | ||
| 520 | break; | ||
| 488 | case Opt_snapdirname: | 521 | case Opt_snapdirname: |
| 489 | kfree(args->snapdir_name); | 522 | kfree(args->snapdir_name); |
| 490 | args->snapdir_name = kstrndup(argstr[0].from, | 523 | args->snapdir_name = kstrndup(argstr[0].from, |
| @@ -515,6 +548,9 @@ static struct ceph_mount_args *parse_mount_args(int flags, char *options, | |||
| 515 | case Opt_osdkeepalivetimeout: | 548 | case Opt_osdkeepalivetimeout: |
| 516 | args->osd_keepalive_timeout = intval; | 549 | args->osd_keepalive_timeout = intval; |
| 517 | break; | 550 | break; |
| 551 | case Opt_osd_idle_ttl: | ||
| 552 | args->osd_idle_ttl = intval; | ||
| 553 | break; | ||
| 518 | case Opt_mount_timeout: | 554 | case Opt_mount_timeout: |
| 519 | args->mount_timeout = intval; | 555 | args->mount_timeout = intval; |
| 520 | break; | 556 | break; |
| @@ -630,7 +666,6 @@ static struct ceph_client *ceph_create_client(struct ceph_mount_args *args) | |||
| 630 | 666 | ||
| 631 | /* caps */ | 667 | /* caps */ |
| 632 | client->min_caps = args->max_readdir; | 668 | client->min_caps = args->max_readdir; |
| 633 | ceph_adjust_min_caps(client->min_caps); | ||
| 634 | 669 | ||
| 635 | /* subsystems */ | 670 | /* subsystems */ |
| 636 | err = ceph_monc_init(&client->monc, client); | 671 | err = ceph_monc_init(&client->monc, client); |
| @@ -680,8 +715,6 @@ static void ceph_destroy_client(struct ceph_client *client) | |||
| 680 | 715 | ||
| 681 | ceph_monc_stop(&client->monc); | 716 | ceph_monc_stop(&client->monc); |
| 682 | 717 | ||
| 683 | ceph_adjust_min_caps(-client->min_caps); | ||
| 684 | |||
| 685 | ceph_debugfs_client_cleanup(client); | 718 | ceph_debugfs_client_cleanup(client); |
| 686 | destroy_workqueue(client->wb_wq); | 719 | destroy_workqueue(client->wb_wq); |
| 687 | destroy_workqueue(client->pg_inv_wq); | 720 | destroy_workqueue(client->pg_inv_wq); |
| @@ -706,13 +739,13 @@ int ceph_check_fsid(struct ceph_client *client, struct ceph_fsid *fsid) | |||
| 706 | { | 739 | { |
| 707 | if (client->have_fsid) { | 740 | if (client->have_fsid) { |
| 708 | if (ceph_fsid_compare(&client->fsid, fsid)) { | 741 | if (ceph_fsid_compare(&client->fsid, fsid)) { |
| 709 | pr_err("bad fsid, had " FSID_FORMAT " got " FSID_FORMAT, | 742 | pr_err("bad fsid, had %pU got %pU", |
| 710 | PR_FSID(&client->fsid), PR_FSID(fsid)); | 743 | &client->fsid, fsid); |
| 711 | return -1; | 744 | return -1; |
| 712 | } | 745 | } |
| 713 | } else { | 746 | } else { |
| 714 | pr_info("client%lld fsid " FSID_FORMAT "\n", | 747 | pr_info("client%lld fsid %pU\n", client->monc.auth->global_id, |
| 715 | client->monc.auth->global_id, PR_FSID(fsid)); | 748 | fsid); |
| 716 | memcpy(&client->fsid, fsid, sizeof(*fsid)); | 749 | memcpy(&client->fsid, fsid, sizeof(*fsid)); |
| 717 | ceph_debugfs_client_init(client); | 750 | ceph_debugfs_client_init(client); |
| 718 | client->have_fsid = true; | 751 | client->have_fsid = true; |
| @@ -1043,8 +1076,6 @@ static int __init init_ceph(void) | |||
| 1043 | if (ret) | 1076 | if (ret) |
| 1044 | goto out_msgr; | 1077 | goto out_msgr; |
| 1045 | 1078 | ||
| 1046 | ceph_caps_init(); | ||
| 1047 | |||
| 1048 | ret = register_filesystem(&ceph_fs_type); | 1079 | ret = register_filesystem(&ceph_fs_type); |
| 1049 | if (ret) | 1080 | if (ret) |
| 1050 | goto out_icache; | 1081 | goto out_icache; |
| @@ -1069,7 +1100,6 @@ static void __exit exit_ceph(void) | |||
| 1069 | { | 1100 | { |
| 1070 | dout("exit_ceph\n"); | 1101 | dout("exit_ceph\n"); |
| 1071 | unregister_filesystem(&ceph_fs_type); | 1102 | unregister_filesystem(&ceph_fs_type); |
| 1072 | ceph_caps_finalize(); | ||
| 1073 | destroy_caches(); | 1103 | destroy_caches(); |
| 1074 | ceph_msgr_exit(); | 1104 | ceph_msgr_exit(); |
| 1075 | ceph_debugfs_cleanup(); | 1105 | ceph_debugfs_cleanup(); |
diff --git a/fs/ceph/super.h b/fs/ceph/super.h index 10a4a406e887..2482d696f0de 100644 --- a/fs/ceph/super.h +++ b/fs/ceph/super.h | |||
| @@ -31,6 +31,12 @@ | |||
| 31 | #define CEPH_BLOCK (1 << CEPH_BLOCK_SHIFT) | 31 | #define CEPH_BLOCK (1 << CEPH_BLOCK_SHIFT) |
| 32 | 32 | ||
| 33 | /* | 33 | /* |
| 34 | * Supported features | ||
| 35 | */ | ||
| 36 | #define CEPH_FEATURE_SUPPORTED CEPH_FEATURE_NOSRCADDR | CEPH_FEATURE_FLOCK | ||
| 37 | #define CEPH_FEATURE_REQUIRED CEPH_FEATURE_NOSRCADDR | ||
| 38 | |||
| 39 | /* | ||
| 34 | * mount options | 40 | * mount options |
| 35 | */ | 41 | */ |
| 36 | #define CEPH_OPT_FSID (1<<0) | 42 | #define CEPH_OPT_FSID (1<<0) |
| @@ -560,11 +566,13 @@ static inline int __ceph_caps_wanted(struct ceph_inode_info *ci) | |||
| 560 | /* what the mds thinks we want */ | 566 | /* what the mds thinks we want */ |
| 561 | extern int __ceph_caps_mds_wanted(struct ceph_inode_info *ci); | 567 | extern int __ceph_caps_mds_wanted(struct ceph_inode_info *ci); |
| 562 | 568 | ||
| 563 | extern void ceph_caps_init(void); | 569 | extern void ceph_caps_init(struct ceph_mds_client *mdsc); |
| 564 | extern void ceph_caps_finalize(void); | 570 | extern void ceph_caps_finalize(struct ceph_mds_client *mdsc); |
| 565 | extern void ceph_adjust_min_caps(int delta); | 571 | extern void ceph_adjust_min_caps(struct ceph_mds_client *mdsc, int delta); |
| 566 | extern int ceph_reserve_caps(struct ceph_cap_reservation *ctx, int need); | 572 | extern int ceph_reserve_caps(struct ceph_mds_client *mdsc, |
| 567 | extern int ceph_unreserve_caps(struct ceph_cap_reservation *ctx); | 573 | struct ceph_cap_reservation *ctx, int need); |
| 574 | extern int ceph_unreserve_caps(struct ceph_mds_client *mdsc, | ||
| 575 | struct ceph_cap_reservation *ctx); | ||
| 568 | extern void ceph_reservation_status(struct ceph_client *client, | 576 | extern void ceph_reservation_status(struct ceph_client *client, |
| 569 | int *total, int *avail, int *used, | 577 | int *total, int *avail, int *used, |
| 570 | int *reserved, int *min); | 578 | int *reserved, int *min); |
| @@ -738,13 +746,6 @@ extern struct kmem_cache *ceph_file_cachep; | |||
| 738 | extern const char *ceph_msg_type_name(int type); | 746 | extern const char *ceph_msg_type_name(int type); |
| 739 | extern int ceph_check_fsid(struct ceph_client *client, struct ceph_fsid *fsid); | 747 | extern int ceph_check_fsid(struct ceph_client *client, struct ceph_fsid *fsid); |
| 740 | 748 | ||
| 741 | #define FSID_FORMAT "%02x%02x%02x%02x-%02x%02x-%02x%02x-%02x%02x-" \ | ||
| 742 | "%02x%02x%02x%02x%02x%02x" | ||
| 743 | #define PR_FSID(f) (f)->fsid[0], (f)->fsid[1], (f)->fsid[2], (f)->fsid[3], \ | ||
| 744 | (f)->fsid[4], (f)->fsid[5], (f)->fsid[6], (f)->fsid[7], \ | ||
| 745 | (f)->fsid[8], (f)->fsid[9], (f)->fsid[10], (f)->fsid[11], \ | ||
| 746 | (f)->fsid[12], (f)->fsid[13], (f)->fsid[14], (f)->fsid[15] | ||
| 747 | |||
| 748 | /* inode.c */ | 749 | /* inode.c */ |
| 749 | extern const struct inode_operations ceph_file_iops; | 750 | extern const struct inode_operations ceph_file_iops; |
| 750 | 751 | ||
| @@ -806,13 +807,16 @@ static inline void ceph_remove_cap(struct ceph_cap *cap) | |||
| 806 | __ceph_remove_cap(cap); | 807 | __ceph_remove_cap(cap); |
| 807 | spin_unlock(&inode->i_lock); | 808 | spin_unlock(&inode->i_lock); |
| 808 | } | 809 | } |
| 809 | extern void ceph_put_cap(struct ceph_cap *cap); | 810 | extern void ceph_put_cap(struct ceph_mds_client *mdsc, |
| 811 | struct ceph_cap *cap); | ||
| 810 | 812 | ||
| 811 | extern void ceph_queue_caps_release(struct inode *inode); | 813 | extern void ceph_queue_caps_release(struct inode *inode); |
| 812 | extern int ceph_write_inode(struct inode *inode, struct writeback_control *wbc); | 814 | extern int ceph_write_inode(struct inode *inode, struct writeback_control *wbc); |
| 813 | extern int ceph_fsync(struct file *file, int datasync); | 815 | extern int ceph_fsync(struct file *file, int datasync); |
| 814 | extern void ceph_kick_flushing_caps(struct ceph_mds_client *mdsc, | 816 | extern void ceph_kick_flushing_caps(struct ceph_mds_client *mdsc, |
| 815 | struct ceph_mds_session *session); | 817 | struct ceph_mds_session *session); |
| 818 | extern struct ceph_cap *ceph_get_cap_for_mds(struct ceph_inode_info *ci, | ||
| 819 | int mds); | ||
| 816 | extern int ceph_get_cap_mds(struct inode *inode); | 820 | extern int ceph_get_cap_mds(struct inode *inode); |
| 817 | extern void ceph_get_cap_refs(struct ceph_inode_info *ci, int caps); | 821 | extern void ceph_get_cap_refs(struct ceph_inode_info *ci, int caps); |
| 818 | extern void ceph_put_cap_refs(struct ceph_inode_info *ci, int had); | 822 | extern void ceph_put_cap_refs(struct ceph_inode_info *ci, int had); |
| @@ -857,7 +861,7 @@ extern void ceph_release_page_vector(struct page **pages, int num_pages); | |||
| 857 | /* dir.c */ | 861 | /* dir.c */ |
| 858 | extern const struct file_operations ceph_dir_fops; | 862 | extern const struct file_operations ceph_dir_fops; |
| 859 | extern const struct inode_operations ceph_dir_iops; | 863 | extern const struct inode_operations ceph_dir_iops; |
| 860 | extern struct dentry_operations ceph_dentry_ops, ceph_snap_dentry_ops, | 864 | extern const struct dentry_operations ceph_dentry_ops, ceph_snap_dentry_ops, |
| 861 | ceph_snapdir_dentry_ops; | 865 | ceph_snapdir_dentry_ops; |
| 862 | 866 | ||
| 863 | extern int ceph_handle_notrace_create(struct inode *dir, struct dentry *dentry); | 867 | extern int ceph_handle_notrace_create(struct inode *dir, struct dentry *dentry); |
| @@ -888,6 +892,14 @@ extern void ceph_debugfs_cleanup(void); | |||
| 888 | extern int ceph_debugfs_client_init(struct ceph_client *client); | 892 | extern int ceph_debugfs_client_init(struct ceph_client *client); |
| 889 | extern void ceph_debugfs_client_cleanup(struct ceph_client *client); | 893 | extern void ceph_debugfs_client_cleanup(struct ceph_client *client); |
| 890 | 894 | ||
| 895 | /* locks.c */ | ||
| 896 | extern int ceph_lock(struct file *file, int cmd, struct file_lock *fl); | ||
| 897 | extern int ceph_flock(struct file *file, int cmd, struct file_lock *fl); | ||
| 898 | extern void ceph_count_locks(struct inode *inode, int *p_num, int *f_num); | ||
| 899 | extern int ceph_encode_locks(struct inode *i, struct ceph_pagelist *p, | ||
| 900 | int p_locks, int f_locks); | ||
| 901 | extern int lock_to_ceph_filelock(struct file_lock *fl, struct ceph_filelock *c); | ||
| 902 | |||
| 891 | static inline struct inode *get_dentry_parent_inode(struct dentry *dentry) | 903 | static inline struct inode *get_dentry_parent_inode(struct dentry *dentry) |
| 892 | { | 904 | { |
| 893 | if (dentry && dentry->d_parent) | 905 | if (dentry && dentry->d_parent) |
diff --git a/fs/ceph/xattr.c b/fs/ceph/xattr.c index 68aeebc69681..097a2654c00f 100644 --- a/fs/ceph/xattr.c +++ b/fs/ceph/xattr.c | |||
| @@ -337,6 +337,8 @@ void __ceph_destroy_xattrs(struct ceph_inode_info *ci) | |||
| 337 | } | 337 | } |
| 338 | 338 | ||
| 339 | static int __build_xattrs(struct inode *inode) | 339 | static int __build_xattrs(struct inode *inode) |
| 340 | __releases(inode->i_lock) | ||
| 341 | __acquires(inode->i_lock) | ||
| 340 | { | 342 | { |
| 341 | u32 namelen; | 343 | u32 namelen; |
| 342 | u32 numattr = 0; | 344 | u32 numattr = 0; |
diff --git a/fs/cifs/README b/fs/cifs/README index a7081eeeb85d..7099a526f775 100644 --- a/fs/cifs/README +++ b/fs/cifs/README | |||
| @@ -301,6 +301,16 @@ A partial list of the supported mount options follows: | |||
| 301 | gid Set the default gid for inodes (similar to above). | 301 | gid Set the default gid for inodes (similar to above). |
| 302 | file_mode If CIFS Unix extensions are not supported by the server | 302 | file_mode If CIFS Unix extensions are not supported by the server |
| 303 | this overrides the default mode for file inodes. | 303 | this overrides the default mode for file inodes. |
| 304 | fsc Enable local disk caching using FS-Cache (off by default). This | ||
| 305 | option could be useful to improve performance on a slow link, | ||
| 306 | heavily loaded server and/or network where reading from the | ||
| 307 | disk is faster than reading from the server (over the network). | ||
| 308 | This could also impact scalability positively as the | ||
| 309 | number of calls to the server are reduced. However, local | ||
| 310 | caching is not suitable for all workloads for e.g. read-once | ||
| 311 | type workloads. So, you need to consider carefully your | ||
| 312 | workload/scenario before using this option. Currently, local | ||
| 313 | disk caching is functional for CIFS files opened as read-only. | ||
| 304 | dir_mode If CIFS Unix extensions are not supported by the server | 314 | dir_mode If CIFS Unix extensions are not supported by the server |
| 305 | this overrides the default mode for directory inodes. | 315 | this overrides the default mode for directory inodes. |
| 306 | port attempt to contact the server on this tcp port, before | 316 | port attempt to contact the server on this tcp port, before |
diff --git a/fs/dcache.c b/fs/dcache.c index 9f2c13417969..166d35d56868 100644 --- a/fs/dcache.c +++ b/fs/dcache.c | |||
| @@ -1905,48 +1905,30 @@ static int prepend_name(char **buffer, int *buflen, struct qstr *name) | |||
| 1905 | } | 1905 | } |
| 1906 | 1906 | ||
| 1907 | /** | 1907 | /** |
| 1908 | * __d_path - return the path of a dentry | 1908 | * Prepend path string to a buffer |
| 1909 | * | ||
| 1909 | * @path: the dentry/vfsmount to report | 1910 | * @path: the dentry/vfsmount to report |
| 1910 | * @root: root vfsmnt/dentry (may be modified by this function) | 1911 | * @root: root vfsmnt/dentry (may be modified by this function) |
| 1911 | * @buffer: buffer to return value in | 1912 | * @buffer: pointer to the end of the buffer |
| 1912 | * @buflen: buffer length | 1913 | * @buflen: pointer to buffer length |
| 1913 | * | 1914 | * |
| 1914 | * Convert a dentry into an ASCII path name. If the entry has been deleted | 1915 | * Caller holds the dcache_lock. |
| 1915 | * the string " (deleted)" is appended. Note that this is ambiguous. | ||
| 1916 | * | ||
| 1917 | * Returns a pointer into the buffer or an error code if the | ||
| 1918 | * path was too long. | ||
| 1919 | * | ||
| 1920 | * "buflen" should be positive. Caller holds the dcache_lock. | ||
| 1921 | * | 1916 | * |
| 1922 | * If path is not reachable from the supplied root, then the value of | 1917 | * If path is not reachable from the supplied root, then the value of |
| 1923 | * root is changed (without modifying refcounts). | 1918 | * root is changed (without modifying refcounts). |
| 1924 | */ | 1919 | */ |
| 1925 | char *__d_path(const struct path *path, struct path *root, | 1920 | static int prepend_path(const struct path *path, struct path *root, |
| 1926 | char *buffer, int buflen) | 1921 | char **buffer, int *buflen) |
| 1927 | { | 1922 | { |
| 1928 | struct dentry *dentry = path->dentry; | 1923 | struct dentry *dentry = path->dentry; |
| 1929 | struct vfsmount *vfsmnt = path->mnt; | 1924 | struct vfsmount *vfsmnt = path->mnt; |
| 1930 | char *end = buffer + buflen; | 1925 | bool slash = false; |
| 1931 | char *retval; | 1926 | int error = 0; |
| 1932 | 1927 | ||
| 1933 | spin_lock(&vfsmount_lock); | 1928 | spin_lock(&vfsmount_lock); |
| 1934 | prepend(&end, &buflen, "\0", 1); | 1929 | while (dentry != root->dentry || vfsmnt != root->mnt) { |
| 1935 | if (d_unlinked(dentry) && | ||
| 1936 | (prepend(&end, &buflen, " (deleted)", 10) != 0)) | ||
| 1937 | goto Elong; | ||
| 1938 | |||
| 1939 | if (buflen < 1) | ||
| 1940 | goto Elong; | ||
| 1941 | /* Get '/' right */ | ||
| 1942 | retval = end-1; | ||
| 1943 | *retval = '/'; | ||
| 1944 | |||
| 1945 | for (;;) { | ||
| 1946 | struct dentry * parent; | 1930 | struct dentry * parent; |
| 1947 | 1931 | ||
| 1948 | if (dentry == root->dentry && vfsmnt == root->mnt) | ||
| 1949 | break; | ||
| 1950 | if (dentry == vfsmnt->mnt_root || IS_ROOT(dentry)) { | 1932 | if (dentry == vfsmnt->mnt_root || IS_ROOT(dentry)) { |
| 1951 | /* Global root? */ | 1933 | /* Global root? */ |
| 1952 | if (vfsmnt->mnt_parent == vfsmnt) { | 1934 | if (vfsmnt->mnt_parent == vfsmnt) { |
| @@ -1958,28 +1940,88 @@ char *__d_path(const struct path *path, struct path *root, | |||
| 1958 | } | 1940 | } |
| 1959 | parent = dentry->d_parent; | 1941 | parent = dentry->d_parent; |
| 1960 | prefetch(parent); | 1942 | prefetch(parent); |
| 1961 | if ((prepend_name(&end, &buflen, &dentry->d_name) != 0) || | 1943 | error = prepend_name(buffer, buflen, &dentry->d_name); |
| 1962 | (prepend(&end, &buflen, "/", 1) != 0)) | 1944 | if (!error) |
| 1963 | goto Elong; | 1945 | error = prepend(buffer, buflen, "/", 1); |
| 1964 | retval = end; | 1946 | if (error) |
| 1947 | break; | ||
| 1948 | |||
| 1949 | slash = true; | ||
| 1965 | dentry = parent; | 1950 | dentry = parent; |
| 1966 | } | 1951 | } |
| 1967 | 1952 | ||
| 1968 | out: | 1953 | out: |
| 1954 | if (!error && !slash) | ||
| 1955 | error = prepend(buffer, buflen, "/", 1); | ||
| 1956 | |||
| 1969 | spin_unlock(&vfsmount_lock); | 1957 | spin_unlock(&vfsmount_lock); |
| 1970 | return retval; | 1958 | return error; |
| 1971 | 1959 | ||
| 1972 | global_root: | 1960 | global_root: |
| 1973 | retval += 1; /* hit the slash */ | 1961 | /* |
| 1974 | if (prepend_name(&retval, &buflen, &dentry->d_name) != 0) | 1962 | * Filesystems needing to implement special "root names" |
| 1975 | goto Elong; | 1963 | * should do so with ->d_dname() |
| 1964 | */ | ||
| 1965 | if (IS_ROOT(dentry) && | ||
| 1966 | (dentry->d_name.len != 1 || dentry->d_name.name[0] != '/')) { | ||
| 1967 | WARN(1, "Root dentry has weird name <%.*s>\n", | ||
| 1968 | (int) dentry->d_name.len, dentry->d_name.name); | ||
| 1969 | } | ||
| 1976 | root->mnt = vfsmnt; | 1970 | root->mnt = vfsmnt; |
| 1977 | root->dentry = dentry; | 1971 | root->dentry = dentry; |
| 1978 | goto out; | 1972 | goto out; |
| 1973 | } | ||
| 1979 | 1974 | ||
| 1980 | Elong: | 1975 | /** |
| 1981 | retval = ERR_PTR(-ENAMETOOLONG); | 1976 | * __d_path - return the path of a dentry |
| 1982 | goto out; | 1977 | * @path: the dentry/vfsmount to report |
| 1978 | * @root: root vfsmnt/dentry (may be modified by this function) | ||
| 1979 | * @buffer: buffer to return value in | ||
| 1980 | * @buflen: buffer length | ||
| 1981 | * | ||
| 1982 | * Convert a dentry into an ASCII path name. | ||
| 1983 | * | ||
| 1984 | * Returns a pointer into the buffer or an error code if the | ||
| 1985 | * path was too long. | ||
| 1986 | * | ||
| 1987 | * "buflen" should be positive. Caller holds the dcache_lock. | ||
| 1988 | * | ||
| 1989 | * If path is not reachable from the supplied root, then the value of | ||
| 1990 | * root is changed (without modifying refcounts). | ||
| 1991 | */ | ||
| 1992 | char *__d_path(const struct path *path, struct path *root, | ||
| 1993 | char *buf, int buflen) | ||
| 1994 | { | ||
| 1995 | char *res = buf + buflen; | ||
| 1996 | int error; | ||
| 1997 | |||
| 1998 | prepend(&res, &buflen, "\0", 1); | ||
| 1999 | error = prepend_path(path, root, &res, &buflen); | ||
| 2000 | if (error) | ||
| 2001 | return ERR_PTR(error); | ||
| 2002 | |||
| 2003 | return res; | ||
| 2004 | } | ||
| 2005 | |||
| 2006 | /* | ||
| 2007 | * same as __d_path but appends "(deleted)" for unlinked files. | ||
| 2008 | */ | ||
| 2009 | static int path_with_deleted(const struct path *path, struct path *root, | ||
| 2010 | char **buf, int *buflen) | ||
| 2011 | { | ||
| 2012 | prepend(buf, buflen, "\0", 1); | ||
| 2013 | if (d_unlinked(path->dentry)) { | ||
| 2014 | int error = prepend(buf, buflen, " (deleted)", 10); | ||
| 2015 | if (error) | ||
| 2016 | return error; | ||
| 2017 | } | ||
| 2018 | |||
| 2019 | return prepend_path(path, root, buf, buflen); | ||
| 2020 | } | ||
| 2021 | |||
| 2022 | static int prepend_unreachable(char **buffer, int *buflen) | ||
| 2023 | { | ||
| 2024 | return prepend(buffer, buflen, "(unreachable)", 13); | ||
| 1983 | } | 2025 | } |
| 1984 | 2026 | ||
| 1985 | /** | 2027 | /** |
| @@ -2000,9 +2042,10 @@ Elong: | |||
| 2000 | */ | 2042 | */ |
| 2001 | char *d_path(const struct path *path, char *buf, int buflen) | 2043 | char *d_path(const struct path *path, char *buf, int buflen) |
| 2002 | { | 2044 | { |
| 2003 | char *res; | 2045 | char *res = buf + buflen; |
| 2004 | struct path root; | 2046 | struct path root; |
| 2005 | struct path tmp; | 2047 | struct path tmp; |
| 2048 | int error; | ||
| 2006 | 2049 | ||
| 2007 | /* | 2050 | /* |
| 2008 | * We have various synthetic filesystems that never get mounted. On | 2051 | * We have various synthetic filesystems that never get mounted. On |
| @@ -2014,19 +2057,51 @@ char *d_path(const struct path *path, char *buf, int buflen) | |||
| 2014 | if (path->dentry->d_op && path->dentry->d_op->d_dname) | 2057 | if (path->dentry->d_op && path->dentry->d_op->d_dname) |
| 2015 | return path->dentry->d_op->d_dname(path->dentry, buf, buflen); | 2058 | return path->dentry->d_op->d_dname(path->dentry, buf, buflen); |
| 2016 | 2059 | ||
| 2017 | read_lock(¤t->fs->lock); | 2060 | get_fs_root(current->fs, &root); |
| 2018 | root = current->fs->root; | ||
| 2019 | path_get(&root); | ||
| 2020 | read_unlock(¤t->fs->lock); | ||
| 2021 | spin_lock(&dcache_lock); | 2061 | spin_lock(&dcache_lock); |
| 2022 | tmp = root; | 2062 | tmp = root; |
| 2023 | res = __d_path(path, &tmp, buf, buflen); | 2063 | error = path_with_deleted(path, &tmp, &res, &buflen); |
| 2064 | if (error) | ||
| 2065 | res = ERR_PTR(error); | ||
| 2024 | spin_unlock(&dcache_lock); | 2066 | spin_unlock(&dcache_lock); |
| 2025 | path_put(&root); | 2067 | path_put(&root); |
| 2026 | return res; | 2068 | return res; |
| 2027 | } | 2069 | } |
| 2028 | EXPORT_SYMBOL(d_path); | 2070 | EXPORT_SYMBOL(d_path); |
| 2029 | 2071 | ||
| 2072 | /** | ||
| 2073 | * d_path_with_unreachable - return the path of a dentry | ||
| 2074 | * @path: path to report | ||
| 2075 | * @buf: buffer to return value in | ||
| 2076 | * @buflen: buffer length | ||
| 2077 | * | ||
| 2078 | * The difference from d_path() is that this prepends "(unreachable)" | ||
| 2079 | * to paths which are unreachable from the current process' root. | ||
| 2080 | */ | ||
| 2081 | char *d_path_with_unreachable(const struct path *path, char *buf, int buflen) | ||
| 2082 | { | ||
| 2083 | char *res = buf + buflen; | ||
| 2084 | struct path root; | ||
| 2085 | struct path tmp; | ||
| 2086 | int error; | ||
| 2087 | |||
| 2088 | if (path->dentry->d_op && path->dentry->d_op->d_dname) | ||
| 2089 | return path->dentry->d_op->d_dname(path->dentry, buf, buflen); | ||
| 2090 | |||
| 2091 | get_fs_root(current->fs, &root); | ||
| 2092 | spin_lock(&dcache_lock); | ||
| 2093 | tmp = root; | ||
| 2094 | error = path_with_deleted(path, &tmp, &res, &buflen); | ||
| 2095 | if (!error && !path_equal(&tmp, &root)) | ||
| 2096 | error = prepend_unreachable(&res, &buflen); | ||
| 2097 | spin_unlock(&dcache_lock); | ||
| 2098 | path_put(&root); | ||
| 2099 | if (error) | ||
| 2100 | res = ERR_PTR(error); | ||
| 2101 | |||
| 2102 | return res; | ||
| 2103 | } | ||
| 2104 | |||
| 2030 | /* | 2105 | /* |
| 2031 | * Helper function for dentry_operations.d_dname() members | 2106 | * Helper function for dentry_operations.d_dname() members |
| 2032 | */ | 2107 | */ |
| @@ -2129,27 +2204,30 @@ SYSCALL_DEFINE2(getcwd, char __user *, buf, unsigned long, size) | |||
| 2129 | if (!page) | 2204 | if (!page) |
| 2130 | return -ENOMEM; | 2205 | return -ENOMEM; |
| 2131 | 2206 | ||
| 2132 | read_lock(¤t->fs->lock); | 2207 | get_fs_root_and_pwd(current->fs, &root, &pwd); |
| 2133 | pwd = current->fs->pwd; | ||
| 2134 | path_get(&pwd); | ||
| 2135 | root = current->fs->root; | ||
| 2136 | path_get(&root); | ||
| 2137 | read_unlock(¤t->fs->lock); | ||
| 2138 | 2208 | ||
| 2139 | error = -ENOENT; | 2209 | error = -ENOENT; |
| 2140 | spin_lock(&dcache_lock); | 2210 | spin_lock(&dcache_lock); |
| 2141 | if (!d_unlinked(pwd.dentry)) { | 2211 | if (!d_unlinked(pwd.dentry)) { |
| 2142 | unsigned long len; | 2212 | unsigned long len; |
| 2143 | struct path tmp = root; | 2213 | struct path tmp = root; |
| 2144 | char * cwd; | 2214 | char *cwd = page + PAGE_SIZE; |
| 2215 | int buflen = PAGE_SIZE; | ||
| 2145 | 2216 | ||
| 2146 | cwd = __d_path(&pwd, &tmp, page, PAGE_SIZE); | 2217 | prepend(&cwd, &buflen, "\0", 1); |
| 2218 | error = prepend_path(&pwd, &tmp, &cwd, &buflen); | ||
| 2147 | spin_unlock(&dcache_lock); | 2219 | spin_unlock(&dcache_lock); |
| 2148 | 2220 | ||
| 2149 | error = PTR_ERR(cwd); | 2221 | if (error) |
| 2150 | if (IS_ERR(cwd)) | ||
| 2151 | goto out; | 2222 | goto out; |
| 2152 | 2223 | ||
| 2224 | /* Unreachable from current root */ | ||
| 2225 | if (!path_equal(&tmp, &root)) { | ||
| 2226 | error = prepend_unreachable(&cwd, &buflen); | ||
| 2227 | if (error) | ||
| 2228 | goto out; | ||
| 2229 | } | ||
| 2230 | |||
| 2153 | error = -ERANGE; | 2231 | error = -ERANGE; |
| 2154 | len = PAGE_SIZE + page - cwd; | 2232 | len = PAGE_SIZE + page - cwd; |
| 2155 | if (len <= size) { | 2233 | if (len <= size) { |
diff --git a/fs/exofs/file.c b/fs/exofs/file.c index f9bfe2b501d5..68cb23e3bb98 100644 --- a/fs/exofs/file.c +++ b/fs/exofs/file.c | |||
| @@ -30,9 +30,6 @@ | |||
| 30 | * along with exofs; if not, write to the Free Software | 30 | * along with exofs; if not, write to the Free Software |
| 31 | * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA | 31 | * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA |
| 32 | */ | 32 | */ |
| 33 | |||
| 34 | #include <linux/buffer_head.h> | ||
| 35 | |||
| 36 | #include "exofs.h" | 33 | #include "exofs.h" |
| 37 | 34 | ||
| 38 | static int exofs_release_file(struct inode *inode, struct file *filp) | 35 | static int exofs_release_file(struct inode *inode, struct file *filp) |
| @@ -40,19 +37,27 @@ static int exofs_release_file(struct inode *inode, struct file *filp) | |||
| 40 | return 0; | 37 | return 0; |
| 41 | } | 38 | } |
| 42 | 39 | ||
| 40 | /* exofs_file_fsync - flush the inode to disk | ||
| 41 | * | ||
| 42 | * Note, in exofs all metadata is written as part of inode, regardless. | ||
| 43 | * The writeout is synchronous | ||
| 44 | */ | ||
| 43 | static int exofs_file_fsync(struct file *filp, int datasync) | 45 | static int exofs_file_fsync(struct file *filp, int datasync) |
| 44 | { | 46 | { |
| 45 | int ret; | 47 | int ret; |
| 46 | struct address_space *mapping = filp->f_mapping; | 48 | struct inode *inode = filp->f_mapping->host; |
| 47 | struct inode *inode = mapping->host; | 49 | struct writeback_control wbc = { |
| 50 | .sync_mode = WB_SYNC_ALL, | ||
| 51 | .nr_to_write = 0, /* metadata-only; caller takes care of data */ | ||
| 52 | }; | ||
| 48 | struct super_block *sb; | 53 | struct super_block *sb; |
| 49 | 54 | ||
| 50 | ret = filemap_write_and_wait(mapping); | 55 | if (!(inode->i_state & I_DIRTY)) |
| 51 | if (ret) | 56 | return 0; |
| 52 | return ret; | 57 | if (datasync && !(inode->i_state & I_DIRTY_DATASYNC)) |
| 58 | return 0; | ||
| 53 | 59 | ||
| 54 | /* sync the inode attributes */ | 60 | ret = sync_inode(inode, &wbc); |
| 55 | ret = write_inode_now(inode, 1); | ||
| 56 | 61 | ||
| 57 | /* This is a good place to write the sb */ | 62 | /* This is a good place to write the sb */ |
| 58 | /* TODO: Sechedule an sb-sync on create */ | 63 | /* TODO: Sechedule an sb-sync on create */ |
| @@ -65,9 +70,9 @@ static int exofs_file_fsync(struct file *filp, int datasync) | |||
| 65 | 70 | ||
| 66 | static int exofs_flush(struct file *file, fl_owner_t id) | 71 | static int exofs_flush(struct file *file, fl_owner_t id) |
| 67 | { | 72 | { |
| 68 | exofs_file_fsync(file, 1); | 73 | int ret = vfs_fsync(file, 0); |
| 69 | /* TODO: Flush the OSD target */ | 74 | /* TODO: Flush the OSD target */ |
| 70 | return 0; | 75 | return ret; |
| 71 | } | 76 | } |
| 72 | 77 | ||
| 73 | const struct file_operations exofs_file_operations = { | 78 | const struct file_operations exofs_file_operations = { |
diff --git a/fs/exofs/inode.c b/fs/exofs/inode.c index 088cb476b68a..eb7368ebd8cd 100644 --- a/fs/exofs/inode.c +++ b/fs/exofs/inode.c | |||
| @@ -32,9 +32,6 @@ | |||
| 32 | */ | 32 | */ |
| 33 | 33 | ||
| 34 | #include <linux/slab.h> | 34 | #include <linux/slab.h> |
| 35 | #include <linux/writeback.h> | ||
| 36 | #include <linux/buffer_head.h> | ||
| 37 | #include <scsi/scsi_device.h> | ||
| 38 | 35 | ||
| 39 | #include "exofs.h" | 36 | #include "exofs.h" |
| 40 | 37 | ||
| @@ -773,15 +770,13 @@ static int exofs_releasepage(struct page *page, gfp_t gfp) | |||
| 773 | { | 770 | { |
| 774 | EXOFS_DBGMSG("page 0x%lx\n", page->index); | 771 | EXOFS_DBGMSG("page 0x%lx\n", page->index); |
| 775 | WARN_ON(1); | 772 | WARN_ON(1); |
| 776 | return try_to_free_buffers(page); | 773 | return 0; |
| 777 | } | 774 | } |
| 778 | 775 | ||
| 779 | static void exofs_invalidatepage(struct page *page, unsigned long offset) | 776 | static void exofs_invalidatepage(struct page *page, unsigned long offset) |
| 780 | { | 777 | { |
| 781 | EXOFS_DBGMSG("page_has_buffers=>%d\n", page_has_buffers(page)); | 778 | EXOFS_DBGMSG("page 0x%lx offset 0x%lx\n", page->index, offset); |
| 782 | WARN_ON(1); | 779 | WARN_ON(1); |
| 783 | |||
| 784 | block_invalidatepage(page, offset); | ||
| 785 | } | 780 | } |
| 786 | 781 | ||
| 787 | const struct address_space_operations exofs_aops = { | 782 | const struct address_space_operations exofs_aops = { |
diff --git a/fs/exofs/ios.c b/fs/exofs/ios.c index e2732203fa93..6550bf70e41d 100644 --- a/fs/exofs/ios.c +++ b/fs/exofs/ios.c | |||
| @@ -305,8 +305,6 @@ int exofs_check_io(struct exofs_io_state *ios, u64 *resid) | |||
| 305 | struct _striping_info { | 305 | struct _striping_info { |
| 306 | u64 obj_offset; | 306 | u64 obj_offset; |
| 307 | u64 group_length; | 307 | u64 group_length; |
| 308 | u64 total_group_length; | ||
| 309 | u64 Major; | ||
| 310 | unsigned dev; | 308 | unsigned dev; |
| 311 | unsigned unit_off; | 309 | unsigned unit_off; |
| 312 | }; | 310 | }; |
| @@ -343,8 +341,6 @@ static void _calc_stripe_info(struct exofs_io_state *ios, u64 file_offset, | |||
| 343 | (M * group_depth * stripe_unit); | 341 | (M * group_depth * stripe_unit); |
| 344 | 342 | ||
| 345 | si->group_length = T - H; | 343 | si->group_length = T - H; |
| 346 | si->total_group_length = T; | ||
| 347 | si->Major = M; | ||
| 348 | } | 344 | } |
| 349 | 345 | ||
| 350 | static int _add_stripe_unit(struct exofs_io_state *ios, unsigned *cur_pg, | 346 | static int _add_stripe_unit(struct exofs_io_state *ios, unsigned *cur_pg, |
| @@ -392,20 +388,19 @@ static int _add_stripe_unit(struct exofs_io_state *ios, unsigned *cur_pg, | |||
| 392 | } | 388 | } |
| 393 | 389 | ||
| 394 | static int _prepare_one_group(struct exofs_io_state *ios, u64 length, | 390 | static int _prepare_one_group(struct exofs_io_state *ios, u64 length, |
| 395 | struct _striping_info *si, unsigned first_comp) | 391 | struct _striping_info *si) |
| 396 | { | 392 | { |
| 397 | unsigned stripe_unit = ios->layout->stripe_unit; | 393 | unsigned stripe_unit = ios->layout->stripe_unit; |
| 398 | unsigned mirrors_p1 = ios->layout->mirrors_p1; | 394 | unsigned mirrors_p1 = ios->layout->mirrors_p1; |
| 399 | unsigned devs_in_group = ios->layout->group_width * mirrors_p1; | 395 | unsigned devs_in_group = ios->layout->group_width * mirrors_p1; |
| 400 | unsigned dev = si->dev; | 396 | unsigned dev = si->dev; |
| 401 | unsigned first_dev = dev - (dev % devs_in_group); | 397 | unsigned first_dev = dev - (dev % devs_in_group); |
| 402 | unsigned comp = first_comp + (dev - first_dev); | ||
| 403 | unsigned max_comp = ios->numdevs ? ios->numdevs - mirrors_p1 : 0; | 398 | unsigned max_comp = ios->numdevs ? ios->numdevs - mirrors_p1 : 0; |
| 404 | unsigned cur_pg = ios->pages_consumed; | 399 | unsigned cur_pg = ios->pages_consumed; |
| 405 | int ret = 0; | 400 | int ret = 0; |
| 406 | 401 | ||
| 407 | while (length) { | 402 | while (length) { |
| 408 | struct exofs_per_dev_state *per_dev = &ios->per_dev[comp]; | 403 | struct exofs_per_dev_state *per_dev = &ios->per_dev[dev]; |
| 409 | unsigned cur_len, page_off = 0; | 404 | unsigned cur_len, page_off = 0; |
| 410 | 405 | ||
| 411 | if (!per_dev->length) { | 406 | if (!per_dev->length) { |
| @@ -424,11 +419,8 @@ static int _prepare_one_group(struct exofs_io_state *ios, u64 length, | |||
| 424 | cur_len = stripe_unit; | 419 | cur_len = stripe_unit; |
| 425 | } | 420 | } |
| 426 | 421 | ||
| 427 | if (max_comp < comp) | 422 | if (max_comp < dev) |
| 428 | max_comp = comp; | 423 | max_comp = dev; |
| 429 | |||
| 430 | dev += mirrors_p1; | ||
| 431 | dev = (dev % devs_in_group) + first_dev; | ||
| 432 | } else { | 424 | } else { |
| 433 | cur_len = stripe_unit; | 425 | cur_len = stripe_unit; |
| 434 | } | 426 | } |
| @@ -440,8 +432,8 @@ static int _prepare_one_group(struct exofs_io_state *ios, u64 length, | |||
| 440 | if (unlikely(ret)) | 432 | if (unlikely(ret)) |
| 441 | goto out; | 433 | goto out; |
| 442 | 434 | ||
| 443 | comp += mirrors_p1; | 435 | dev += mirrors_p1; |
| 444 | comp = (comp % devs_in_group) + first_comp; | 436 | dev = (dev % devs_in_group) + first_dev; |
| 445 | 437 | ||
| 446 | length -= cur_len; | 438 | length -= cur_len; |
| 447 | } | 439 | } |
| @@ -454,18 +446,15 @@ out: | |||
| 454 | static int _prepare_for_striping(struct exofs_io_state *ios) | 446 | static int _prepare_for_striping(struct exofs_io_state *ios) |
| 455 | { | 447 | { |
| 456 | u64 length = ios->length; | 448 | u64 length = ios->length; |
| 449 | u64 offset = ios->offset; | ||
| 457 | struct _striping_info si; | 450 | struct _striping_info si; |
| 458 | unsigned devs_in_group = ios->layout->group_width * | ||
| 459 | ios->layout->mirrors_p1; | ||
| 460 | unsigned first_comp = 0; | ||
| 461 | int ret = 0; | 451 | int ret = 0; |
| 462 | 452 | ||
| 463 | _calc_stripe_info(ios, ios->offset, &si); | ||
| 464 | |||
| 465 | if (!ios->pages) { | 453 | if (!ios->pages) { |
| 466 | if (ios->kern_buff) { | 454 | if (ios->kern_buff) { |
| 467 | struct exofs_per_dev_state *per_dev = &ios->per_dev[0]; | 455 | struct exofs_per_dev_state *per_dev = &ios->per_dev[0]; |
| 468 | 456 | ||
| 457 | _calc_stripe_info(ios, ios->offset, &si); | ||
| 469 | per_dev->offset = si.obj_offset; | 458 | per_dev->offset = si.obj_offset; |
| 470 | per_dev->dev = si.dev; | 459 | per_dev->dev = si.dev; |
| 471 | 460 | ||
| @@ -479,26 +468,17 @@ static int _prepare_for_striping(struct exofs_io_state *ios) | |||
| 479 | } | 468 | } |
| 480 | 469 | ||
| 481 | while (length) { | 470 | while (length) { |
| 471 | _calc_stripe_info(ios, offset, &si); | ||
| 472 | |||
| 482 | if (length < si.group_length) | 473 | if (length < si.group_length) |
| 483 | si.group_length = length; | 474 | si.group_length = length; |
| 484 | 475 | ||
| 485 | ret = _prepare_one_group(ios, si.group_length, &si, first_comp); | 476 | ret = _prepare_one_group(ios, si.group_length, &si); |
| 486 | if (unlikely(ret)) | 477 | if (unlikely(ret)) |
| 487 | goto out; | 478 | goto out; |
| 488 | 479 | ||
| 480 | offset += si.group_length; | ||
| 489 | length -= si.group_length; | 481 | length -= si.group_length; |
| 490 | |||
| 491 | si.group_length = si.total_group_length; | ||
| 492 | si.unit_off = 0; | ||
| 493 | ++si.Major; | ||
| 494 | si.obj_offset = si.Major * ios->layout->stripe_unit * | ||
| 495 | ios->layout->group_depth; | ||
| 496 | |||
| 497 | si.dev = (si.dev - (si.dev % devs_in_group)) + devs_in_group; | ||
| 498 | si.dev %= ios->layout->s_numdevs; | ||
| 499 | |||
| 500 | first_comp += devs_in_group; | ||
| 501 | first_comp %= ios->layout->s_numdevs; | ||
| 502 | } | 482 | } |
| 503 | 483 | ||
| 504 | out: | 484 | out: |
diff --git a/fs/exofs/super.c b/fs/exofs/super.c index 32cfd61def5f..047e92fa3af8 100644 --- a/fs/exofs/super.c +++ b/fs/exofs/super.c | |||
| @@ -31,7 +31,6 @@ | |||
| 31 | * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA | 31 | * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA |
| 32 | */ | 32 | */ |
| 33 | 33 | ||
| 34 | #include <linux/smp_lock.h> | ||
| 35 | #include <linux/string.h> | 34 | #include <linux/string.h> |
| 36 | #include <linux/parser.h> | 35 | #include <linux/parser.h> |
| 37 | #include <linux/vfs.h> | 36 | #include <linux/vfs.h> |
diff --git a/fs/fcntl.c b/fs/fcntl.c index 9d175d623aab..6769fd0f35b8 100644 --- a/fs/fcntl.c +++ b/fs/fcntl.c | |||
| @@ -767,11 +767,22 @@ void kill_fasync(struct fasync_struct **fp, int sig, int band) | |||
| 767 | } | 767 | } |
| 768 | EXPORT_SYMBOL(kill_fasync); | 768 | EXPORT_SYMBOL(kill_fasync); |
| 769 | 769 | ||
| 770 | static int __init fasync_init(void) | 770 | static int __init fcntl_init(void) |
| 771 | { | 771 | { |
| 772 | /* please add new bits here to ensure allocation uniqueness */ | ||
| 773 | BUILD_BUG_ON(19 - 1 /* for O_RDONLY being 0 */ != HWEIGHT32( | ||
| 774 | O_RDONLY | O_WRONLY | O_RDWR | | ||
| 775 | O_CREAT | O_EXCL | O_NOCTTY | | ||
| 776 | O_TRUNC | O_APPEND | O_NONBLOCK | | ||
| 777 | __O_SYNC | O_DSYNC | FASYNC | | ||
| 778 | O_DIRECT | O_LARGEFILE | O_DIRECTORY | | ||
| 779 | O_NOFOLLOW | O_NOATIME | O_CLOEXEC | | ||
| 780 | FMODE_EXEC | ||
| 781 | )); | ||
| 782 | |||
| 772 | fasync_cache = kmem_cache_create("fasync_cache", | 783 | fasync_cache = kmem_cache_create("fasync_cache", |
| 773 | sizeof(struct fasync_struct), 0, SLAB_PANIC, NULL); | 784 | sizeof(struct fasync_struct), 0, SLAB_PANIC, NULL); |
| 774 | return 0; | 785 | return 0; |
| 775 | } | 786 | } |
| 776 | 787 | ||
| 777 | module_init(fasync_init) | 788 | module_init(fcntl_init) |
| @@ -39,28 +39,27 @@ int sysctl_nr_open_max = 1024 * 1024; /* raised later */ | |||
| 39 | */ | 39 | */ |
| 40 | static DEFINE_PER_CPU(struct fdtable_defer, fdtable_defer_list); | 40 | static DEFINE_PER_CPU(struct fdtable_defer, fdtable_defer_list); |
| 41 | 41 | ||
| 42 | static inline void * alloc_fdmem(unsigned int size) | 42 | static inline void *alloc_fdmem(unsigned int size) |
| 43 | { | 43 | { |
| 44 | if (size <= PAGE_SIZE) | 44 | void *data; |
| 45 | return kmalloc(size, GFP_KERNEL); | 45 | |
| 46 | else | 46 | data = kmalloc(size, GFP_KERNEL|__GFP_NOWARN); |
| 47 | return vmalloc(size); | 47 | if (data != NULL) |
| 48 | return data; | ||
| 49 | |||
| 50 | return vmalloc(size); | ||
| 48 | } | 51 | } |
| 49 | 52 | ||
| 50 | static inline void free_fdarr(struct fdtable *fdt) | 53 | static void free_fdmem(void *ptr) |
| 51 | { | 54 | { |
| 52 | if (fdt->max_fds <= (PAGE_SIZE / sizeof(struct file *))) | 55 | is_vmalloc_addr(ptr) ? vfree(ptr) : kfree(ptr); |
| 53 | kfree(fdt->fd); | ||
| 54 | else | ||
| 55 | vfree(fdt->fd); | ||
| 56 | } | 56 | } |
| 57 | 57 | ||
| 58 | static inline void free_fdset(struct fdtable *fdt) | 58 | static void __free_fdtable(struct fdtable *fdt) |
| 59 | { | 59 | { |
| 60 | if (fdt->max_fds <= (PAGE_SIZE * BITS_PER_BYTE / 2)) | 60 | free_fdmem(fdt->fd); |
| 61 | kfree(fdt->open_fds); | 61 | free_fdmem(fdt->open_fds); |
| 62 | else | 62 | kfree(fdt); |
| 63 | vfree(fdt->open_fds); | ||
| 64 | } | 63 | } |
| 65 | 64 | ||
| 66 | static void free_fdtable_work(struct work_struct *work) | 65 | static void free_fdtable_work(struct work_struct *work) |
| @@ -75,9 +74,8 @@ static void free_fdtable_work(struct work_struct *work) | |||
| 75 | spin_unlock_bh(&f->lock); | 74 | spin_unlock_bh(&f->lock); |
| 76 | while(fdt) { | 75 | while(fdt) { |
| 77 | struct fdtable *next = fdt->next; | 76 | struct fdtable *next = fdt->next; |
| 78 | vfree(fdt->fd); | 77 | |
| 79 | free_fdset(fdt); | 78 | __free_fdtable(fdt); |
| 80 | kfree(fdt); | ||
| 81 | fdt = next; | 79 | fdt = next; |
| 82 | } | 80 | } |
| 83 | } | 81 | } |
| @@ -98,7 +96,7 @@ void free_fdtable_rcu(struct rcu_head *rcu) | |||
| 98 | container_of(fdt, struct files_struct, fdtab)); | 96 | container_of(fdt, struct files_struct, fdtab)); |
| 99 | return; | 97 | return; |
| 100 | } | 98 | } |
| 101 | if (fdt->max_fds <= (PAGE_SIZE / sizeof(struct file *))) { | 99 | if (!is_vmalloc_addr(fdt->fd) && !is_vmalloc_addr(fdt->open_fds)) { |
| 102 | kfree(fdt->fd); | 100 | kfree(fdt->fd); |
| 103 | kfree(fdt->open_fds); | 101 | kfree(fdt->open_fds); |
| 104 | kfree(fdt); | 102 | kfree(fdt); |
| @@ -183,7 +181,7 @@ static struct fdtable * alloc_fdtable(unsigned int nr) | |||
| 183 | return fdt; | 181 | return fdt; |
| 184 | 182 | ||
| 185 | out_arr: | 183 | out_arr: |
| 186 | free_fdarr(fdt); | 184 | free_fdmem(fdt->fd); |
| 187 | out_fdt: | 185 | out_fdt: |
| 188 | kfree(fdt); | 186 | kfree(fdt); |
| 189 | out: | 187 | out: |
| @@ -213,9 +211,7 @@ static int expand_fdtable(struct files_struct *files, int nr) | |||
| 213 | * caller and alloc_fdtable(). Cheaper to catch it here... | 211 | * caller and alloc_fdtable(). Cheaper to catch it here... |
| 214 | */ | 212 | */ |
| 215 | if (unlikely(new_fdt->max_fds <= nr)) { | 213 | if (unlikely(new_fdt->max_fds <= nr)) { |
| 216 | free_fdarr(new_fdt); | 214 | __free_fdtable(new_fdt); |
| 217 | free_fdset(new_fdt); | ||
| 218 | kfree(new_fdt); | ||
| 219 | return -EMFILE; | 215 | return -EMFILE; |
| 220 | } | 216 | } |
| 221 | /* | 217 | /* |
| @@ -231,9 +227,7 @@ static int expand_fdtable(struct files_struct *files, int nr) | |||
| 231 | free_fdtable(cur_fdt); | 227 | free_fdtable(cur_fdt); |
| 232 | } else { | 228 | } else { |
| 233 | /* Somebody else expanded, so undo our attempt */ | 229 | /* Somebody else expanded, so undo our attempt */ |
| 234 | free_fdarr(new_fdt); | 230 | __free_fdtable(new_fdt); |
| 235 | free_fdset(new_fdt); | ||
| 236 | kfree(new_fdt); | ||
| 237 | } | 231 | } |
| 238 | return 1; | 232 | return 1; |
| 239 | } | 233 | } |
| @@ -323,11 +317,8 @@ struct files_struct *dup_fd(struct files_struct *oldf, int *errorp) | |||
| 323 | while (unlikely(open_files > new_fdt->max_fds)) { | 317 | while (unlikely(open_files > new_fdt->max_fds)) { |
| 324 | spin_unlock(&oldf->file_lock); | 318 | spin_unlock(&oldf->file_lock); |
| 325 | 319 | ||
| 326 | if (new_fdt != &newf->fdtab) { | 320 | if (new_fdt != &newf->fdtab) |
| 327 | free_fdarr(new_fdt); | 321 | __free_fdtable(new_fdt); |
| 328 | free_fdset(new_fdt); | ||
| 329 | kfree(new_fdt); | ||
| 330 | } | ||
| 331 | 322 | ||
| 332 | new_fdt = alloc_fdtable(open_files - 1); | 323 | new_fdt = alloc_fdtable(open_files - 1); |
| 333 | if (!new_fdt) { | 324 | if (!new_fdt) { |
| @@ -337,9 +328,7 @@ struct files_struct *dup_fd(struct files_struct *oldf, int *errorp) | |||
| 337 | 328 | ||
| 338 | /* beyond sysctl_nr_open; nothing to do */ | 329 | /* beyond sysctl_nr_open; nothing to do */ |
| 339 | if (unlikely(new_fdt->max_fds < open_files)) { | 330 | if (unlikely(new_fdt->max_fds < open_files)) { |
| 340 | free_fdarr(new_fdt); | 331 | __free_fdtable(new_fdt); |
| 341 | free_fdset(new_fdt); | ||
| 342 | kfree(new_fdt); | ||
| 343 | *errorp = -EMFILE; | 332 | *errorp = -EMFILE; |
| 344 | goto out_release; | 333 | goto out_release; |
| 345 | } | 334 | } |
diff --git a/fs/file_table.c b/fs/file_table.c index b8a0bb63cbd7..edecd36fed9b 100644 --- a/fs/file_table.c +++ b/fs/file_table.c | |||
| @@ -230,15 +230,6 @@ static void __fput(struct file *file) | |||
| 230 | might_sleep(); | 230 | might_sleep(); |
| 231 | 231 | ||
| 232 | fsnotify_close(file); | 232 | fsnotify_close(file); |
| 233 | |||
| 234 | /* | ||
| 235 | * fsnotify_create_event may have taken one or more references on this | ||
| 236 | * file. If it did so it left one reference for us to drop to make sure | ||
| 237 | * its calls to fput could not prematurely destroy the file. | ||
| 238 | */ | ||
| 239 | if (atomic_long_read(&file->f_count)) | ||
| 240 | return fput(file); | ||
| 241 | |||
| 242 | /* | 233 | /* |
| 243 | * The function eventpoll_release() should be the first called | 234 | * The function eventpoll_release() should be the first called |
| 244 | * in the file cleanup chain. | 235 | * in the file cleanup chain. |
| @@ -298,11 +289,20 @@ struct file *fget(unsigned int fd) | |||
| 298 | EXPORT_SYMBOL(fget); | 289 | EXPORT_SYMBOL(fget); |
| 299 | 290 | ||
| 300 | /* | 291 | /* |
| 301 | * Lightweight file lookup - no refcnt increment if fd table isn't shared. | 292 | * Lightweight file lookup - no refcnt increment if fd table isn't shared. |
| 302 | * You can use this only if it is guranteed that the current task already | 293 | * |
| 303 | * holds a refcnt to that file. That check has to be done at fget() only | 294 | * You can use this instead of fget if you satisfy all of the following |
| 304 | * and a flag is returned to be passed to the corresponding fput_light(). | 295 | * conditions: |
| 305 | * There must not be a cloning between an fget_light/fput_light pair. | 296 | * 1) You must call fput_light before exiting the syscall and returning control |
| 297 | * to userspace (i.e. you cannot remember the returned struct file * after | ||
| 298 | * returning to userspace). | ||
| 299 | * 2) You must not call filp_close on the returned struct file * in between | ||
| 300 | * calls to fget_light and fput_light. | ||
| 301 | * 3) You must not clone the current task in between the calls to fget_light | ||
| 302 | * and fput_light. | ||
| 303 | * | ||
| 304 | * The fput_needed flag returned by fget_light should be passed to the | ||
| 305 | * corresponding fput_light. | ||
| 306 | */ | 306 | */ |
| 307 | struct file *fget_light(unsigned int fd, int *fput_needed) | 307 | struct file *fget_light(unsigned int fd, int *fput_needed) |
| 308 | { | 308 | { |
diff --git a/fs/fs-writeback.c b/fs/fs-writeback.c index 2f76c4a081a2..7d9d06ba184b 100644 --- a/fs/fs-writeback.c +++ b/fs/fs-writeback.c | |||
| @@ -68,7 +68,7 @@ int nr_pdflush_threads; | |||
| 68 | */ | 68 | */ |
| 69 | int writeback_in_progress(struct backing_dev_info *bdi) | 69 | int writeback_in_progress(struct backing_dev_info *bdi) |
| 70 | { | 70 | { |
| 71 | return !list_empty(&bdi->work_list); | 71 | return test_bit(BDI_writeback_running, &bdi->state); |
| 72 | } | 72 | } |
| 73 | 73 | ||
| 74 | static void bdi_queue_work(struct backing_dev_info *bdi, | 74 | static void bdi_queue_work(struct backing_dev_info *bdi, |
| @@ -249,10 +249,18 @@ static void move_expired_inodes(struct list_head *delaying_queue, | |||
| 249 | 249 | ||
| 250 | /* | 250 | /* |
| 251 | * Queue all expired dirty inodes for io, eldest first. | 251 | * Queue all expired dirty inodes for io, eldest first. |
| 252 | * Before | ||
| 253 | * newly dirtied b_dirty b_io b_more_io | ||
| 254 | * =============> gf edc BA | ||
| 255 | * After | ||
| 256 | * newly dirtied b_dirty b_io b_more_io | ||
| 257 | * =============> g fBAedc | ||
| 258 | * | | ||
| 259 | * +--> dequeue for IO | ||
| 252 | */ | 260 | */ |
| 253 | static void queue_io(struct bdi_writeback *wb, unsigned long *older_than_this) | 261 | static void queue_io(struct bdi_writeback *wb, unsigned long *older_than_this) |
| 254 | { | 262 | { |
| 255 | list_splice_init(&wb->b_more_io, wb->b_io.prev); | 263 | list_splice_init(&wb->b_more_io, &wb->b_io); |
| 256 | move_expired_inodes(&wb->b_dirty, &wb->b_io, older_than_this); | 264 | move_expired_inodes(&wb->b_dirty, &wb->b_io, older_than_this); |
| 257 | } | 265 | } |
| 258 | 266 | ||
| @@ -363,62 +371,35 @@ writeback_single_inode(struct inode *inode, struct writeback_control *wbc) | |||
| 363 | spin_lock(&inode_lock); | 371 | spin_lock(&inode_lock); |
| 364 | inode->i_state &= ~I_SYNC; | 372 | inode->i_state &= ~I_SYNC; |
| 365 | if (!(inode->i_state & I_FREEING)) { | 373 | if (!(inode->i_state & I_FREEING)) { |
| 366 | if ((inode->i_state & I_DIRTY_PAGES) && wbc->for_kupdate) { | 374 | if (mapping_tagged(mapping, PAGECACHE_TAG_DIRTY)) { |
| 367 | /* | ||
| 368 | * More pages get dirtied by a fast dirtier. | ||
| 369 | */ | ||
| 370 | goto select_queue; | ||
| 371 | } else if (inode->i_state & I_DIRTY) { | ||
| 372 | /* | ||
| 373 | * At least XFS will redirty the inode during the | ||
| 374 | * writeback (delalloc) and on io completion (isize). | ||
| 375 | */ | ||
| 376 | redirty_tail(inode); | ||
| 377 | } else if (mapping_tagged(mapping, PAGECACHE_TAG_DIRTY)) { | ||
| 378 | /* | 375 | /* |
| 379 | * We didn't write back all the pages. nfs_writepages() | 376 | * We didn't write back all the pages. nfs_writepages() |
| 380 | * sometimes bales out without doing anything. Redirty | 377 | * sometimes bales out without doing anything. |
| 381 | * the inode; Move it from b_io onto b_more_io/b_dirty. | ||
| 382 | */ | 378 | */ |
| 383 | /* | 379 | inode->i_state |= I_DIRTY_PAGES; |
| 384 | * akpm: if the caller was the kupdate function we put | 380 | if (wbc->nr_to_write <= 0) { |
| 385 | * this inode at the head of b_dirty so it gets first | ||
| 386 | * consideration. Otherwise, move it to the tail, for | ||
| 387 | * the reasons described there. I'm not really sure | ||
| 388 | * how much sense this makes. Presumably I had a good | ||
| 389 | * reasons for doing it this way, and I'd rather not | ||
| 390 | * muck with it at present. | ||
| 391 | */ | ||
| 392 | if (wbc->for_kupdate) { | ||
| 393 | /* | 381 | /* |
| 394 | * For the kupdate function we move the inode | 382 | * slice used up: queue for next turn |
| 395 | * to b_more_io so it will get more writeout as | ||
| 396 | * soon as the queue becomes uncongested. | ||
| 397 | */ | 383 | */ |
| 398 | inode->i_state |= I_DIRTY_PAGES; | 384 | requeue_io(inode); |
| 399 | select_queue: | ||
| 400 | if (wbc->nr_to_write <= 0) { | ||
| 401 | /* | ||
| 402 | * slice used up: queue for next turn | ||
| 403 | */ | ||
| 404 | requeue_io(inode); | ||
| 405 | } else { | ||
| 406 | /* | ||
| 407 | * somehow blocked: retry later | ||
| 408 | */ | ||
| 409 | redirty_tail(inode); | ||
| 410 | } | ||
| 411 | } else { | 385 | } else { |
| 412 | /* | 386 | /* |
| 413 | * Otherwise fully redirty the inode so that | 387 | * Writeback blocked by something other than |
| 414 | * other inodes on this superblock will get some | 388 | * congestion. Delay the inode for some time to |
| 415 | * writeout. Otherwise heavy writing to one | 389 | * avoid spinning on the CPU (100% iowait) |
| 416 | * file would indefinitely suspend writeout of | 390 | * retrying writeback of the dirty page/inode |
| 417 | * all the other files. | 391 | * that cannot be performed immediately. |
| 418 | */ | 392 | */ |
| 419 | inode->i_state |= I_DIRTY_PAGES; | ||
| 420 | redirty_tail(inode); | 393 | redirty_tail(inode); |
| 421 | } | 394 | } |
| 395 | } else if (inode->i_state & I_DIRTY) { | ||
| 396 | /* | ||
| 397 | * Filesystems can dirty the inode during writeback | ||
| 398 | * operations, such as delayed allocation during | ||
| 399 | * submission or metadata updates after data IO | ||
| 400 | * completion. | ||
| 401 | */ | ||
| 402 | redirty_tail(inode); | ||
| 422 | } else if (atomic_read(&inode->i_count)) { | 403 | } else if (atomic_read(&inode->i_count)) { |
| 423 | /* | 404 | /* |
| 424 | * The inode is clean, inuse | 405 | * The inode is clean, inuse |
| @@ -590,7 +571,7 @@ static inline bool over_bground_thresh(void) | |||
| 590 | { | 571 | { |
| 591 | unsigned long background_thresh, dirty_thresh; | 572 | unsigned long background_thresh, dirty_thresh; |
| 592 | 573 | ||
| 593 | get_dirty_limits(&background_thresh, &dirty_thresh, NULL, NULL); | 574 | global_dirty_limits(&background_thresh, &dirty_thresh); |
| 594 | 575 | ||
| 595 | return (global_page_state(NR_FILE_DIRTY) + | 576 | return (global_page_state(NR_FILE_DIRTY) + |
| 596 | global_page_state(NR_UNSTABLE_NFS) >= background_thresh); | 577 | global_page_state(NR_UNSTABLE_NFS) >= background_thresh); |
| @@ -759,6 +740,7 @@ long wb_do_writeback(struct bdi_writeback *wb, int force_wait) | |||
| 759 | struct wb_writeback_work *work; | 740 | struct wb_writeback_work *work; |
| 760 | long wrote = 0; | 741 | long wrote = 0; |
| 761 | 742 | ||
| 743 | set_bit(BDI_writeback_running, &wb->bdi->state); | ||
| 762 | while ((work = get_next_work_item(bdi)) != NULL) { | 744 | while ((work = get_next_work_item(bdi)) != NULL) { |
| 763 | /* | 745 | /* |
| 764 | * Override sync mode, in case we must wait for completion | 746 | * Override sync mode, in case we must wait for completion |
| @@ -785,6 +767,7 @@ long wb_do_writeback(struct bdi_writeback *wb, int force_wait) | |||
| 785 | * Check for periodic writeback, kupdated() style | 767 | * Check for periodic writeback, kupdated() style |
| 786 | */ | 768 | */ |
| 787 | wrote += wb_check_old_data_flush(wb); | 769 | wrote += wb_check_old_data_flush(wb); |
| 770 | clear_bit(BDI_writeback_running, &wb->bdi->state); | ||
| 788 | 771 | ||
| 789 | return wrote; | 772 | return wrote; |
| 790 | } | 773 | } |
diff --git a/fs/fs_struct.c b/fs/fs_struct.c index eee059052db5..1ee40eb9a2c0 100644 --- a/fs/fs_struct.c +++ b/fs/fs_struct.c | |||
| @@ -106,12 +106,7 @@ struct fs_struct *copy_fs_struct(struct fs_struct *old) | |||
| 106 | fs->in_exec = 0; | 106 | fs->in_exec = 0; |
| 107 | rwlock_init(&fs->lock); | 107 | rwlock_init(&fs->lock); |
| 108 | fs->umask = old->umask; | 108 | fs->umask = old->umask; |
| 109 | read_lock(&old->lock); | 109 | get_fs_root_and_pwd(old, &fs->root, &fs->pwd); |
| 110 | fs->root = old->root; | ||
| 111 | path_get(&old->root); | ||
| 112 | fs->pwd = old->pwd; | ||
| 113 | path_get(&old->pwd); | ||
| 114 | read_unlock(&old->lock); | ||
| 115 | } | 110 | } |
| 116 | return fs; | 111 | return fs; |
| 117 | } | 112 | } |
diff --git a/fs/fscache/internal.h b/fs/fscache/internal.h index 6a026441c5a6..f6aad48d38a8 100644 --- a/fs/fscache/internal.h +++ b/fs/fscache/internal.h | |||
| @@ -321,17 +321,11 @@ void fscache_put_context(struct fscache_cookie *cookie, void *context) | |||
| 321 | #define dbgprintk(FMT, ...) \ | 321 | #define dbgprintk(FMT, ...) \ |
| 322 | printk(KERN_DEBUG "[%-6.6s] "FMT"\n", current->comm, ##__VA_ARGS__) | 322 | printk(KERN_DEBUG "[%-6.6s] "FMT"\n", current->comm, ##__VA_ARGS__) |
| 323 | 323 | ||
| 324 | /* make sure we maintain the format strings, even when debugging is disabled */ | ||
| 325 | static inline __attribute__((format(printf, 1, 2))) | ||
| 326 | void _dbprintk(const char *fmt, ...) | ||
| 327 | { | ||
| 328 | } | ||
| 329 | |||
| 330 | #define kenter(FMT, ...) dbgprintk("==> %s("FMT")", __func__, ##__VA_ARGS__) | 324 | #define kenter(FMT, ...) dbgprintk("==> %s("FMT")", __func__, ##__VA_ARGS__) |
| 331 | #define kleave(FMT, ...) dbgprintk("<== %s()"FMT"", __func__, ##__VA_ARGS__) | 325 | #define kleave(FMT, ...) dbgprintk("<== %s()"FMT"", __func__, ##__VA_ARGS__) |
| 332 | #define kdebug(FMT, ...) dbgprintk(FMT, ##__VA_ARGS__) | 326 | #define kdebug(FMT, ...) dbgprintk(FMT, ##__VA_ARGS__) |
| 333 | 327 | ||
| 334 | #define kjournal(FMT, ...) _dbprintk(FMT, ##__VA_ARGS__) | 328 | #define kjournal(FMT, ...) no_printk(FMT, ##__VA_ARGS__) |
| 335 | 329 | ||
| 336 | #ifdef __KDEBUG | 330 | #ifdef __KDEBUG |
| 337 | #define _enter(FMT, ...) kenter(FMT, ##__VA_ARGS__) | 331 | #define _enter(FMT, ...) kenter(FMT, ##__VA_ARGS__) |
| @@ -358,9 +352,9 @@ do { \ | |||
| 358 | } while (0) | 352 | } while (0) |
| 359 | 353 | ||
| 360 | #else | 354 | #else |
| 361 | #define _enter(FMT, ...) _dbprintk("==> %s("FMT")", __func__, ##__VA_ARGS__) | 355 | #define _enter(FMT, ...) no_printk("==> %s("FMT")", __func__, ##__VA_ARGS__) |
| 362 | #define _leave(FMT, ...) _dbprintk("<== %s()"FMT"", __func__, ##__VA_ARGS__) | 356 | #define _leave(FMT, ...) no_printk("<== %s()"FMT"", __func__, ##__VA_ARGS__) |
| 363 | #define _debug(FMT, ...) _dbprintk(FMT, ##__VA_ARGS__) | 357 | #define _debug(FMT, ...) no_printk(FMT, ##__VA_ARGS__) |
| 364 | #endif | 358 | #endif |
| 365 | 359 | ||
| 366 | /* | 360 | /* |
diff --git a/fs/isofs/inode.c b/fs/isofs/inode.c index 6b4dcd4f2943..5a44811b5027 100644 --- a/fs/isofs/inode.c +++ b/fs/isofs/inode.c | |||
| @@ -722,7 +722,12 @@ root_found: | |||
| 722 | } | 722 | } |
| 723 | 723 | ||
| 724 | s->s_magic = ISOFS_SUPER_MAGIC; | 724 | s->s_magic = ISOFS_SUPER_MAGIC; |
| 725 | s->s_maxbytes = 0xffffffff; /* We can handle files up to 4 GB */ | 725 | |
| 726 | /* | ||
| 727 | * With multi-extent files, file size is only limited by the maximum | ||
| 728 | * size of a file system, which is 8 TB. | ||
| 729 | */ | ||
| 730 | s->s_maxbytes = 0x80000000000LL; | ||
| 726 | 731 | ||
| 727 | /* | 732 | /* |
| 728 | * The CDROM is read-only, has no nodes (devices) on it, and since | 733 | * The CDROM is read-only, has no nodes (devices) on it, and since |
diff --git a/fs/namei.c b/fs/namei.c index 13ff4abdbdca..17ea76bf2fbe 100644 --- a/fs/namei.c +++ b/fs/namei.c | |||
| @@ -483,13 +483,8 @@ ok: | |||
| 483 | 483 | ||
| 484 | static __always_inline void set_root(struct nameidata *nd) | 484 | static __always_inline void set_root(struct nameidata *nd) |
| 485 | { | 485 | { |
| 486 | if (!nd->root.mnt) { | 486 | if (!nd->root.mnt) |
| 487 | struct fs_struct *fs = current->fs; | 487 | get_fs_root(current->fs, &nd->root); |
| 488 | read_lock(&fs->lock); | ||
| 489 | nd->root = fs->root; | ||
| 490 | path_get(&nd->root); | ||
| 491 | read_unlock(&fs->lock); | ||
| 492 | } | ||
| 493 | } | 488 | } |
| 494 | 489 | ||
| 495 | static int link_path_walk(const char *, struct nameidata *); | 490 | static int link_path_walk(const char *, struct nameidata *); |
| @@ -1015,11 +1010,7 @@ static int path_init(int dfd, const char *name, unsigned int flags, struct namei | |||
| 1015 | nd->path = nd->root; | 1010 | nd->path = nd->root; |
| 1016 | path_get(&nd->root); | 1011 | path_get(&nd->root); |
| 1017 | } else if (dfd == AT_FDCWD) { | 1012 | } else if (dfd == AT_FDCWD) { |
| 1018 | struct fs_struct *fs = current->fs; | 1013 | get_fs_pwd(current->fs, &nd->path); |
| 1019 | read_lock(&fs->lock); | ||
| 1020 | nd->path = fs->pwd; | ||
| 1021 | path_get(&fs->pwd); | ||
| 1022 | read_unlock(&fs->lock); | ||
| 1023 | } else { | 1014 | } else { |
| 1024 | struct dentry *dentry; | 1015 | struct dentry *dentry; |
| 1025 | 1016 | ||
diff --git a/fs/namespace.c b/fs/namespace.c index 66c4f7e781cb..2e10cb19c5b0 100644 --- a/fs/namespace.c +++ b/fs/namespace.c | |||
| @@ -788,7 +788,6 @@ static void show_mnt_opts(struct seq_file *m, struct vfsmount *mnt) | |||
| 788 | { MNT_NOATIME, ",noatime" }, | 788 | { MNT_NOATIME, ",noatime" }, |
| 789 | { MNT_NODIRATIME, ",nodiratime" }, | 789 | { MNT_NODIRATIME, ",nodiratime" }, |
| 790 | { MNT_RELATIME, ",relatime" }, | 790 | { MNT_RELATIME, ",relatime" }, |
| 791 | { MNT_STRICTATIME, ",strictatime" }, | ||
| 792 | { 0, NULL } | 791 | { 0, NULL } |
| 793 | }; | 792 | }; |
| 794 | const struct proc_fs_info *fs_infop; | 793 | const struct proc_fs_info *fs_infop; |
| @@ -2213,10 +2212,7 @@ SYSCALL_DEFINE2(pivot_root, const char __user *, new_root, | |||
| 2213 | goto out1; | 2212 | goto out1; |
| 2214 | } | 2213 | } |
| 2215 | 2214 | ||
| 2216 | read_lock(¤t->fs->lock); | 2215 | get_fs_root(current->fs, &root); |
| 2217 | root = current->fs->root; | ||
| 2218 | path_get(¤t->fs->root); | ||
| 2219 | read_unlock(¤t->fs->lock); | ||
| 2220 | down_write(&namespace_sem); | 2216 | down_write(&namespace_sem); |
| 2221 | mutex_lock(&old.dentry->d_inode->i_mutex); | 2217 | mutex_lock(&old.dentry->d_inode->i_mutex); |
| 2222 | error = -EINVAL; | 2218 | error = -EINVAL; |
diff --git a/fs/nfs/Kconfig b/fs/nfs/Kconfig index cc1bb33b59b8..26a510a7be09 100644 --- a/fs/nfs/Kconfig +++ b/fs/nfs/Kconfig | |||
| @@ -100,3 +100,20 @@ config NFS_FSCACHE | |||
| 100 | help | 100 | help |
| 101 | Say Y here if you want NFS data to be cached locally on disc through | 101 | Say Y here if you want NFS data to be cached locally on disc through |
| 102 | the general filesystem cache manager | 102 | the general filesystem cache manager |
| 103 | |||
| 104 | config NFS_USE_LEGACY_DNS | ||
| 105 | bool "Use the legacy NFS DNS resolver" | ||
| 106 | depends on NFS_V4 | ||
| 107 | help | ||
| 108 | The kernel now provides a method for translating a host name into an | ||
| 109 | IP address. Select Y here if you would rather use your own DNS | ||
| 110 | resolver script. | ||
| 111 | |||
| 112 | If unsure, say N | ||
| 113 | |||
| 114 | config NFS_USE_KERNEL_DNS | ||
| 115 | bool | ||
| 116 | depends on NFS_V4 && !NFS_USE_LEGACY_DNS | ||
| 117 | select DNS_RESOLVER | ||
| 118 | select KEYS | ||
| 119 | default y | ||
diff --git a/fs/nfs/callback.c b/fs/nfs/callback.c index 36dfdae95123..e17b49e2eabd 100644 --- a/fs/nfs/callback.c +++ b/fs/nfs/callback.c | |||
| @@ -45,7 +45,7 @@ unsigned short nfs_callback_tcpport; | |||
| 45 | unsigned short nfs_callback_tcpport6; | 45 | unsigned short nfs_callback_tcpport6; |
| 46 | #define NFS_CALLBACK_MAXPORTNR (65535U) | 46 | #define NFS_CALLBACK_MAXPORTNR (65535U) |
| 47 | 47 | ||
| 48 | static int param_set_portnr(const char *val, struct kernel_param *kp) | 48 | static int param_set_portnr(const char *val, const struct kernel_param *kp) |
| 49 | { | 49 | { |
| 50 | unsigned long num; | 50 | unsigned long num; |
| 51 | int ret; | 51 | int ret; |
| @@ -58,11 +58,10 @@ static int param_set_portnr(const char *val, struct kernel_param *kp) | |||
| 58 | *((unsigned int *)kp->arg) = num; | 58 | *((unsigned int *)kp->arg) = num; |
| 59 | return 0; | 59 | return 0; |
| 60 | } | 60 | } |
| 61 | 61 | static struct kernel_param_ops param_ops_portnr = { | |
| 62 | static int param_get_portnr(char *buffer, struct kernel_param *kp) | 62 | .set = param_set_portnr, |
| 63 | { | 63 | .get = param_get_uint, |
| 64 | return param_get_uint(buffer, kp); | 64 | }; |
| 65 | } | ||
| 66 | #define param_check_portnr(name, p) __param_check(name, p, unsigned int); | 65 | #define param_check_portnr(name, p) __param_check(name, p, unsigned int); |
| 67 | 66 | ||
| 68 | module_param_named(callback_tcpport, nfs_callback_set_tcpport, portnr, 0644); | 67 | module_param_named(callback_tcpport, nfs_callback_set_tcpport, portnr, 0644); |
diff --git a/fs/nfs/dns_resolve.c b/fs/nfs/dns_resolve.c index 76fd235d0024..dba50a5625db 100644 --- a/fs/nfs/dns_resolve.c +++ b/fs/nfs/dns_resolve.c | |||
| @@ -6,6 +6,29 @@ | |||
| 6 | * Resolves DNS hostnames into valid ip addresses | 6 | * Resolves DNS hostnames into valid ip addresses |
| 7 | */ | 7 | */ |
| 8 | 8 | ||
| 9 | #ifdef CONFIG_NFS_USE_KERNEL_DNS | ||
| 10 | |||
| 11 | #include <linux/sunrpc/clnt.h> | ||
| 12 | #include <linux/dns_resolver.h> | ||
| 13 | |||
| 14 | ssize_t nfs_dns_resolve_name(char *name, size_t namelen, | ||
| 15 | struct sockaddr *sa, size_t salen) | ||
| 16 | { | ||
| 17 | ssize_t ret; | ||
| 18 | char *ip_addr = NULL; | ||
| 19 | int ip_len; | ||
| 20 | |||
| 21 | ip_len = dns_query(NULL, name, namelen, NULL, &ip_addr, NULL); | ||
| 22 | if (ip_len > 0) | ||
| 23 | ret = rpc_pton(ip_addr, ip_len, sa, salen); | ||
| 24 | else | ||
| 25 | ret = -ESRCH; | ||
| 26 | kfree(ip_addr); | ||
| 27 | return ret; | ||
| 28 | } | ||
| 29 | |||
| 30 | #else | ||
| 31 | |||
| 9 | #include <linux/hash.h> | 32 | #include <linux/hash.h> |
| 10 | #include <linux/string.h> | 33 | #include <linux/string.h> |
| 11 | #include <linux/kmod.h> | 34 | #include <linux/kmod.h> |
| @@ -346,3 +369,4 @@ void nfs_dns_resolver_destroy(void) | |||
| 346 | nfs_cache_unregister(&nfs_dns_resolve); | 369 | nfs_cache_unregister(&nfs_dns_resolve); |
| 347 | } | 370 | } |
| 348 | 371 | ||
| 372 | #endif | ||
diff --git a/fs/nfs/dns_resolve.h b/fs/nfs/dns_resolve.h index a3f0938babf7..199bb5543a91 100644 --- a/fs/nfs/dns_resolve.h +++ b/fs/nfs/dns_resolve.h | |||
| @@ -6,8 +6,20 @@ | |||
| 6 | 6 | ||
| 7 | #define NFS_DNS_HOSTNAME_MAXLEN (128) | 7 | #define NFS_DNS_HOSTNAME_MAXLEN (128) |
| 8 | 8 | ||
| 9 | |||
| 10 | #ifdef CONFIG_NFS_USE_KERNEL_DNS | ||
| 11 | static inline int nfs_dns_resolver_init(void) | ||
| 12 | { | ||
| 13 | return 0; | ||
| 14 | } | ||
| 15 | |||
| 16 | static inline void nfs_dns_resolver_destroy(void) | ||
| 17 | {} | ||
| 18 | #else | ||
| 9 | extern int nfs_dns_resolver_init(void); | 19 | extern int nfs_dns_resolver_init(void); |
| 10 | extern void nfs_dns_resolver_destroy(void); | 20 | extern void nfs_dns_resolver_destroy(void); |
| 21 | #endif | ||
| 22 | |||
| 11 | extern ssize_t nfs_dns_resolve_name(char *name, size_t namelen, | 23 | extern ssize_t nfs_dns_resolve_name(char *name, size_t namelen, |
| 12 | struct sockaddr *sa, size_t salen); | 24 | struct sockaddr *sa, size_t salen); |
| 13 | 25 | ||
diff --git a/fs/notify/fanotify/fanotify.c b/fs/notify/fanotify/fanotify.c index eb8f73c9c131..756566fe8449 100644 --- a/fs/notify/fanotify/fanotify.c +++ b/fs/notify/fanotify/fanotify.c | |||
| @@ -17,9 +17,9 @@ static bool should_merge(struct fsnotify_event *old, struct fsnotify_event *new) | |||
| 17 | old->data_type == new->data_type && | 17 | old->data_type == new->data_type && |
| 18 | old->tgid == new->tgid) { | 18 | old->tgid == new->tgid) { |
| 19 | switch (old->data_type) { | 19 | switch (old->data_type) { |
| 20 | case (FSNOTIFY_EVENT_FILE): | 20 | case (FSNOTIFY_EVENT_PATH): |
| 21 | if ((old->file->f_path.mnt == new->file->f_path.mnt) && | 21 | if ((old->path.mnt == new->path.mnt) && |
| 22 | (old->file->f_path.dentry == new->file->f_path.dentry)) | 22 | (old->path.dentry == new->path.dentry)) |
| 23 | return true; | 23 | return true; |
| 24 | case (FSNOTIFY_EVENT_NONE): | 24 | case (FSNOTIFY_EVENT_NONE): |
| 25 | return true; | 25 | return true; |
| @@ -174,7 +174,7 @@ static bool fanotify_should_send_event(struct fsnotify_group *group, | |||
| 174 | return false; | 174 | return false; |
| 175 | 175 | ||
| 176 | /* if we don't have enough info to send an event to userspace say no */ | 176 | /* if we don't have enough info to send an event to userspace say no */ |
| 177 | if (data_type != FSNOTIFY_EVENT_FILE) | 177 | if (data_type != FSNOTIFY_EVENT_PATH) |
| 178 | return false; | 178 | return false; |
| 179 | 179 | ||
| 180 | if (inode_mark && vfsmnt_mark) { | 180 | if (inode_mark && vfsmnt_mark) { |
diff --git a/fs/notify/fanotify/fanotify_user.c b/fs/notify/fanotify/fanotify_user.c index 25a3b4dfcf61..032b837fcd11 100644 --- a/fs/notify/fanotify/fanotify_user.c +++ b/fs/notify/fanotify/fanotify_user.c | |||
| @@ -65,7 +65,7 @@ static int create_fd(struct fsnotify_group *group, struct fsnotify_event *event) | |||
| 65 | if (client_fd < 0) | 65 | if (client_fd < 0) |
| 66 | return client_fd; | 66 | return client_fd; |
| 67 | 67 | ||
| 68 | if (event->data_type != FSNOTIFY_EVENT_FILE) { | 68 | if (event->data_type != FSNOTIFY_EVENT_PATH) { |
| 69 | WARN_ON(1); | 69 | WARN_ON(1); |
| 70 | put_unused_fd(client_fd); | 70 | put_unused_fd(client_fd); |
| 71 | return -EINVAL; | 71 | return -EINVAL; |
| @@ -75,8 +75,8 @@ static int create_fd(struct fsnotify_group *group, struct fsnotify_event *event) | |||
| 75 | * we need a new file handle for the userspace program so it can read even if it was | 75 | * we need a new file handle for the userspace program so it can read even if it was |
| 76 | * originally opened O_WRONLY. | 76 | * originally opened O_WRONLY. |
| 77 | */ | 77 | */ |
| 78 | dentry = dget(event->file->f_path.dentry); | 78 | dentry = dget(event->path.dentry); |
| 79 | mnt = mntget(event->file->f_path.mnt); | 79 | mnt = mntget(event->path.mnt); |
| 80 | /* it's possible this event was an overflow event. in that case dentry and mnt | 80 | /* it's possible this event was an overflow event. in that case dentry and mnt |
| 81 | * are NULL; That's fine, just don't call dentry open */ | 81 | * are NULL; That's fine, just don't call dentry open */ |
| 82 | if (dentry && mnt) | 82 | if (dentry && mnt) |
diff --git a/fs/notify/fsnotify.c b/fs/notify/fsnotify.c index 4d2a82c1ceb1..3970392b2722 100644 --- a/fs/notify/fsnotify.c +++ b/fs/notify/fsnotify.c | |||
| @@ -84,7 +84,7 @@ void __fsnotify_update_child_dentry_flags(struct inode *inode) | |||
| 84 | } | 84 | } |
| 85 | 85 | ||
| 86 | /* Notify this dentry's parent about a child's events. */ | 86 | /* Notify this dentry's parent about a child's events. */ |
| 87 | void __fsnotify_parent(struct file *file, struct dentry *dentry, __u32 mask) | 87 | void __fsnotify_parent(struct path *path, struct dentry *dentry, __u32 mask) |
| 88 | { | 88 | { |
| 89 | struct dentry *parent; | 89 | struct dentry *parent; |
| 90 | struct inode *p_inode; | 90 | struct inode *p_inode; |
| @@ -92,7 +92,7 @@ void __fsnotify_parent(struct file *file, struct dentry *dentry, __u32 mask) | |||
| 92 | bool should_update_children = false; | 92 | bool should_update_children = false; |
| 93 | 93 | ||
| 94 | if (!dentry) | 94 | if (!dentry) |
| 95 | dentry = file->f_path.dentry; | 95 | dentry = path->dentry; |
| 96 | 96 | ||
| 97 | if (!(dentry->d_flags & DCACHE_FSNOTIFY_PARENT_WATCHED)) | 97 | if (!(dentry->d_flags & DCACHE_FSNOTIFY_PARENT_WATCHED)) |
| 98 | return; | 98 | return; |
| @@ -124,8 +124,8 @@ void __fsnotify_parent(struct file *file, struct dentry *dentry, __u32 mask) | |||
| 124 | * specifies these are events which came from a child. */ | 124 | * specifies these are events which came from a child. */ |
| 125 | mask |= FS_EVENT_ON_CHILD; | 125 | mask |= FS_EVENT_ON_CHILD; |
| 126 | 126 | ||
| 127 | if (file) | 127 | if (path) |
| 128 | fsnotify(p_inode, mask, file, FSNOTIFY_EVENT_FILE, | 128 | fsnotify(p_inode, mask, path, FSNOTIFY_EVENT_PATH, |
| 129 | dentry->d_name.name, 0); | 129 | dentry->d_name.name, 0); |
| 130 | else | 130 | else |
| 131 | fsnotify(p_inode, mask, dentry->d_inode, FSNOTIFY_EVENT_INODE, | 131 | fsnotify(p_inode, mask, dentry->d_inode, FSNOTIFY_EVENT_INODE, |
| @@ -217,8 +217,8 @@ int fsnotify(struct inode *to_tell, __u32 mask, void *data, int data_is, | |||
| 217 | /* global tests shouldn't care about events on child only the specific event */ | 217 | /* global tests shouldn't care about events on child only the specific event */ |
| 218 | __u32 test_mask = (mask & ~FS_EVENT_ON_CHILD); | 218 | __u32 test_mask = (mask & ~FS_EVENT_ON_CHILD); |
| 219 | 219 | ||
| 220 | if (data_is == FSNOTIFY_EVENT_FILE) | 220 | if (data_is == FSNOTIFY_EVENT_PATH) |
| 221 | mnt = ((struct file *)data)->f_path.mnt; | 221 | mnt = ((struct path *)data)->mnt; |
| 222 | else | 222 | else |
| 223 | mnt = NULL; | 223 | mnt = NULL; |
| 224 | 224 | ||
diff --git a/fs/notify/inotify/inotify_fsnotify.c b/fs/notify/inotify/inotify_fsnotify.c index 5e73eeb2c697..a91b69a6a291 100644 --- a/fs/notify/inotify/inotify_fsnotify.c +++ b/fs/notify/inotify/inotify_fsnotify.c | |||
| @@ -52,9 +52,9 @@ static bool event_compare(struct fsnotify_event *old, struct fsnotify_event *new | |||
| 52 | !strcmp(old->file_name, new->file_name)) | 52 | !strcmp(old->file_name, new->file_name)) |
| 53 | return true; | 53 | return true; |
| 54 | break; | 54 | break; |
| 55 | case (FSNOTIFY_EVENT_FILE): | 55 | case (FSNOTIFY_EVENT_PATH): |
| 56 | if ((old->file->f_path.mnt == new->file->f_path.mnt) && | 56 | if ((old->path.mnt == new->path.mnt) && |
| 57 | (old->file->f_path.dentry == new->file->f_path.dentry)) | 57 | (old->path.dentry == new->path.dentry)) |
| 58 | return true; | 58 | return true; |
| 59 | break; | 59 | break; |
| 60 | case (FSNOTIFY_EVENT_NONE): | 60 | case (FSNOTIFY_EVENT_NONE): |
| @@ -147,10 +147,10 @@ static bool inotify_should_send_event(struct fsnotify_group *group, struct inode | |||
| 147 | __u32 mask, void *data, int data_type) | 147 | __u32 mask, void *data, int data_type) |
| 148 | { | 148 | { |
| 149 | if ((inode_mark->mask & FS_EXCL_UNLINK) && | 149 | if ((inode_mark->mask & FS_EXCL_UNLINK) && |
| 150 | (data_type == FSNOTIFY_EVENT_FILE)) { | 150 | (data_type == FSNOTIFY_EVENT_PATH)) { |
| 151 | struct file *file = data; | 151 | struct path *path = data; |
| 152 | 152 | ||
| 153 | if (d_unlinked(file->f_path.dentry)) | 153 | if (d_unlinked(path->dentry)) |
| 154 | return false; | 154 | return false; |
| 155 | } | 155 | } |
| 156 | 156 | ||
diff --git a/fs/notify/notification.c b/fs/notify/notification.c index d6c435adc7a2..f39260f8f865 100644 --- a/fs/notify/notification.c +++ b/fs/notify/notification.c | |||
| @@ -31,7 +31,6 @@ | |||
| 31 | * allocated and used. | 31 | * allocated and used. |
| 32 | */ | 32 | */ |
| 33 | 33 | ||
| 34 | #include <linux/file.h> | ||
| 35 | #include <linux/fs.h> | 34 | #include <linux/fs.h> |
| 36 | #include <linux/init.h> | 35 | #include <linux/init.h> |
| 37 | #include <linux/kernel.h> | 36 | #include <linux/kernel.h> |
| @@ -90,8 +89,8 @@ void fsnotify_put_event(struct fsnotify_event *event) | |||
| 90 | if (atomic_dec_and_test(&event->refcnt)) { | 89 | if (atomic_dec_and_test(&event->refcnt)) { |
| 91 | pr_debug("%s: event=%p\n", __func__, event); | 90 | pr_debug("%s: event=%p\n", __func__, event); |
| 92 | 91 | ||
| 93 | if (event->data_type == FSNOTIFY_EVENT_FILE) | 92 | if (event->data_type == FSNOTIFY_EVENT_PATH) |
| 94 | fput(event->file); | 93 | path_put(&event->path); |
| 95 | 94 | ||
| 96 | BUG_ON(!list_empty(&event->private_data_list)); | 95 | BUG_ON(!list_empty(&event->private_data_list)); |
| 97 | 96 | ||
| @@ -376,8 +375,8 @@ struct fsnotify_event *fsnotify_clone_event(struct fsnotify_event *old_event) | |||
| 376 | } | 375 | } |
| 377 | } | 376 | } |
| 378 | event->tgid = get_pid(old_event->tgid); | 377 | event->tgid = get_pid(old_event->tgid); |
| 379 | if (event->data_type == FSNOTIFY_EVENT_FILE) | 378 | if (event->data_type == FSNOTIFY_EVENT_PATH) |
| 380 | get_file(event->file); | 379 | path_get(&event->path); |
| 381 | 380 | ||
| 382 | return event; | 381 | return event; |
| 383 | } | 382 | } |
| @@ -424,22 +423,11 @@ struct fsnotify_event *fsnotify_create_event(struct inode *to_tell, __u32 mask, | |||
| 424 | event->data_type = data_type; | 423 | event->data_type = data_type; |
| 425 | 424 | ||
| 426 | switch (data_type) { | 425 | switch (data_type) { |
| 427 | case FSNOTIFY_EVENT_FILE: { | 426 | case FSNOTIFY_EVENT_PATH: { |
| 428 | event->file = data; | 427 | struct path *path = data; |
| 429 | /* | 428 | event->path.dentry = path->dentry; |
| 430 | * if this file is about to disappear hold an extra reference | 429 | event->path.mnt = path->mnt; |
| 431 | * until we return to __fput so we don't have to worry about | 430 | path_get(&event->path); |
| 432 | * future get/put destroying the file under us or generating | ||
| 433 | * additional events. Notice that we change f_mode without | ||
| 434 | * holding f_lock. This is safe since this is the only possible | ||
| 435 | * reference to this object in the kernel (it was about to be | ||
| 436 | * freed, remember?) | ||
| 437 | */ | ||
| 438 | if (!atomic_long_read(&event->file->f_count)) { | ||
| 439 | event->file->f_mode |= FMODE_NONOTIFY; | ||
| 440 | get_file(event->file); | ||
| 441 | } | ||
| 442 | get_file(event->file); | ||
| 443 | break; | 431 | break; |
| 444 | } | 432 | } |
| 445 | case FSNOTIFY_EVENT_INODE: | 433 | case FSNOTIFY_EVENT_INODE: |
| @@ -447,7 +435,8 @@ struct fsnotify_event *fsnotify_create_event(struct inode *to_tell, __u32 mask, | |||
| 447 | break; | 435 | break; |
| 448 | case FSNOTIFY_EVENT_NONE: | 436 | case FSNOTIFY_EVENT_NONE: |
| 449 | event->inode = NULL; | 437 | event->inode = NULL; |
| 450 | event->file = NULL; | 438 | event->path.dentry = NULL; |
| 439 | event->path.mnt = NULL; | ||
| 451 | break; | 440 | break; |
| 452 | default: | 441 | default: |
| 453 | BUG(); | 442 | BUG(); |
diff --git a/fs/ocfs2/acl.c b/fs/ocfs2/acl.c index da702294d7e7..a76e0aa5cd3f 100644 --- a/fs/ocfs2/acl.c +++ b/fs/ocfs2/acl.c | |||
| @@ -290,12 +290,30 @@ static int ocfs2_set_acl(handle_t *handle, | |||
| 290 | 290 | ||
| 291 | int ocfs2_check_acl(struct inode *inode, int mask) | 291 | int ocfs2_check_acl(struct inode *inode, int mask) |
| 292 | { | 292 | { |
| 293 | struct posix_acl *acl = ocfs2_get_acl(inode, ACL_TYPE_ACCESS); | 293 | struct ocfs2_super *osb = OCFS2_SB(inode->i_sb); |
| 294 | struct buffer_head *di_bh = NULL; | ||
| 295 | struct posix_acl *acl; | ||
| 296 | int ret = -EAGAIN; | ||
| 294 | 297 | ||
| 295 | if (IS_ERR(acl)) | 298 | if (!(osb->s_mount_opt & OCFS2_MOUNT_POSIX_ACL)) |
| 299 | return ret; | ||
| 300 | |||
| 301 | ret = ocfs2_read_inode_block(inode, &di_bh); | ||
| 302 | if (ret < 0) { | ||
| 303 | mlog_errno(ret); | ||
| 304 | return ret; | ||
| 305 | } | ||
| 306 | |||
| 307 | acl = ocfs2_get_acl_nolock(inode, ACL_TYPE_ACCESS, di_bh); | ||
| 308 | |||
| 309 | brelse(di_bh); | ||
| 310 | |||
| 311 | if (IS_ERR(acl)) { | ||
| 312 | mlog_errno(PTR_ERR(acl)); | ||
| 296 | return PTR_ERR(acl); | 313 | return PTR_ERR(acl); |
| 314 | } | ||
| 297 | if (acl) { | 315 | if (acl) { |
| 298 | int ret = posix_acl_permission(inode, acl, mask); | 316 | ret = posix_acl_permission(inode, acl, mask); |
| 299 | posix_acl_release(acl); | 317 | posix_acl_release(acl); |
| 300 | return ret; | 318 | return ret; |
| 301 | } | 319 | } |
| @@ -344,7 +362,7 @@ int ocfs2_init_acl(handle_t *handle, | |||
| 344 | { | 362 | { |
| 345 | struct ocfs2_super *osb = OCFS2_SB(inode->i_sb); | 363 | struct ocfs2_super *osb = OCFS2_SB(inode->i_sb); |
| 346 | struct posix_acl *acl = NULL; | 364 | struct posix_acl *acl = NULL; |
| 347 | int ret = 0; | 365 | int ret = 0, ret2; |
| 348 | mode_t mode; | 366 | mode_t mode; |
| 349 | 367 | ||
| 350 | if (!S_ISLNK(inode->i_mode)) { | 368 | if (!S_ISLNK(inode->i_mode)) { |
| @@ -381,7 +399,12 @@ int ocfs2_init_acl(handle_t *handle, | |||
| 381 | mode = inode->i_mode; | 399 | mode = inode->i_mode; |
| 382 | ret = posix_acl_create_masq(clone, &mode); | 400 | ret = posix_acl_create_masq(clone, &mode); |
| 383 | if (ret >= 0) { | 401 | if (ret >= 0) { |
| 384 | ret = ocfs2_acl_set_mode(inode, di_bh, handle, mode); | 402 | ret2 = ocfs2_acl_set_mode(inode, di_bh, handle, mode); |
| 403 | if (ret2) { | ||
| 404 | mlog_errno(ret2); | ||
| 405 | ret = ret2; | ||
| 406 | goto cleanup; | ||
| 407 | } | ||
| 385 | if (ret > 0) { | 408 | if (ret > 0) { |
| 386 | ret = ocfs2_set_acl(handle, inode, | 409 | ret = ocfs2_set_acl(handle, inode, |
| 387 | di_bh, ACL_TYPE_ACCESS, | 410 | di_bh, ACL_TYPE_ACCESS, |
diff --git a/fs/ocfs2/cluster/tcp.c b/fs/ocfs2/cluster/tcp.c index aa75ca3f78da..1361997cf205 100644 --- a/fs/ocfs2/cluster/tcp.c +++ b/fs/ocfs2/cluster/tcp.c | |||
| @@ -1759,6 +1759,7 @@ static int o2net_accept_one(struct socket *sock) | |||
| 1759 | struct sockaddr_in sin; | 1759 | struct sockaddr_in sin; |
| 1760 | struct socket *new_sock = NULL; | 1760 | struct socket *new_sock = NULL; |
| 1761 | struct o2nm_node *node = NULL; | 1761 | struct o2nm_node *node = NULL; |
| 1762 | struct o2nm_node *local_node = NULL; | ||
| 1762 | struct o2net_sock_container *sc = NULL; | 1763 | struct o2net_sock_container *sc = NULL; |
| 1763 | struct o2net_node *nn; | 1764 | struct o2net_node *nn; |
| 1764 | 1765 | ||
| @@ -1796,11 +1797,15 @@ static int o2net_accept_one(struct socket *sock) | |||
| 1796 | goto out; | 1797 | goto out; |
| 1797 | } | 1798 | } |
| 1798 | 1799 | ||
| 1799 | if (o2nm_this_node() > node->nd_num) { | 1800 | if (o2nm_this_node() >= node->nd_num) { |
| 1800 | mlog(ML_NOTICE, "unexpected connect attempted from a lower " | 1801 | local_node = o2nm_get_node_by_num(o2nm_this_node()); |
| 1801 | "numbered node '%s' at " "%pI4:%d with num %u\n", | 1802 | mlog(ML_NOTICE, "unexpected connect attempt seen at node '%s' (" |
| 1802 | node->nd_name, &sin.sin_addr.s_addr, | 1803 | "%u, %pI4:%d) from node '%s' (%u, %pI4:%d)\n", |
| 1803 | ntohs(sin.sin_port), node->nd_num); | 1804 | local_node->nd_name, local_node->nd_num, |
| 1805 | &(local_node->nd_ipv4_address), | ||
| 1806 | ntohs(local_node->nd_ipv4_port), | ||
| 1807 | node->nd_name, node->nd_num, &sin.sin_addr.s_addr, | ||
| 1808 | ntohs(sin.sin_port)); | ||
| 1804 | ret = -EINVAL; | 1809 | ret = -EINVAL; |
| 1805 | goto out; | 1810 | goto out; |
| 1806 | } | 1811 | } |
| @@ -1857,6 +1862,8 @@ out: | |||
| 1857 | sock_release(new_sock); | 1862 | sock_release(new_sock); |
| 1858 | if (node) | 1863 | if (node) |
| 1859 | o2nm_node_put(node); | 1864 | o2nm_node_put(node); |
| 1865 | if (local_node) | ||
| 1866 | o2nm_node_put(local_node); | ||
| 1860 | if (sc) | 1867 | if (sc) |
| 1861 | sc_put(sc); | 1868 | sc_put(sc); |
| 1862 | return ret; | 1869 | return ret; |
diff --git a/fs/ocfs2/dlm/dlmmaster.c b/fs/ocfs2/dlm/dlmmaster.c index 94b97fc6a88e..ffb4c68dafa4 100644 --- a/fs/ocfs2/dlm/dlmmaster.c +++ b/fs/ocfs2/dlm/dlmmaster.c | |||
| @@ -511,8 +511,6 @@ static void dlm_lockres_release(struct kref *kref) | |||
| 511 | 511 | ||
| 512 | atomic_dec(&dlm->res_cur_count); | 512 | atomic_dec(&dlm->res_cur_count); |
| 513 | 513 | ||
| 514 | dlm_put(dlm); | ||
| 515 | |||
| 516 | if (!hlist_unhashed(&res->hash_node) || | 514 | if (!hlist_unhashed(&res->hash_node) || |
| 517 | !list_empty(&res->granted) || | 515 | !list_empty(&res->granted) || |
| 518 | !list_empty(&res->converting) || | 516 | !list_empty(&res->converting) || |
| @@ -585,8 +583,6 @@ static void dlm_init_lockres(struct dlm_ctxt *dlm, | |||
| 585 | res->migration_pending = 0; | 583 | res->migration_pending = 0; |
| 586 | res->inflight_locks = 0; | 584 | res->inflight_locks = 0; |
| 587 | 585 | ||
| 588 | /* put in dlm_lockres_release */ | ||
| 589 | dlm_grab(dlm); | ||
| 590 | res->dlm = dlm; | 586 | res->dlm = dlm; |
| 591 | 587 | ||
| 592 | kref_init(&res->refs); | 588 | kref_init(&res->refs); |
| @@ -3050,8 +3046,6 @@ int dlm_migrate_request_handler(struct o2net_msg *msg, u32 len, void *data, | |||
| 3050 | /* check for pre-existing lock */ | 3046 | /* check for pre-existing lock */ |
| 3051 | spin_lock(&dlm->spinlock); | 3047 | spin_lock(&dlm->spinlock); |
| 3052 | res = __dlm_lookup_lockres(dlm, name, namelen, hash); | 3048 | res = __dlm_lookup_lockres(dlm, name, namelen, hash); |
| 3053 | spin_lock(&dlm->master_lock); | ||
| 3054 | |||
| 3055 | if (res) { | 3049 | if (res) { |
| 3056 | spin_lock(&res->spinlock); | 3050 | spin_lock(&res->spinlock); |
| 3057 | if (res->state & DLM_LOCK_RES_RECOVERING) { | 3051 | if (res->state & DLM_LOCK_RES_RECOVERING) { |
| @@ -3069,14 +3063,15 @@ int dlm_migrate_request_handler(struct o2net_msg *msg, u32 len, void *data, | |||
| 3069 | spin_unlock(&res->spinlock); | 3063 | spin_unlock(&res->spinlock); |
| 3070 | } | 3064 | } |
| 3071 | 3065 | ||
| 3066 | spin_lock(&dlm->master_lock); | ||
| 3072 | /* ignore status. only nonzero status would BUG. */ | 3067 | /* ignore status. only nonzero status would BUG. */ |
| 3073 | ret = dlm_add_migration_mle(dlm, res, mle, &oldmle, | 3068 | ret = dlm_add_migration_mle(dlm, res, mle, &oldmle, |
| 3074 | name, namelen, | 3069 | name, namelen, |
| 3075 | migrate->new_master, | 3070 | migrate->new_master, |
| 3076 | migrate->master); | 3071 | migrate->master); |
| 3077 | 3072 | ||
| 3078 | unlock: | ||
| 3079 | spin_unlock(&dlm->master_lock); | 3073 | spin_unlock(&dlm->master_lock); |
| 3074 | unlock: | ||
| 3080 | spin_unlock(&dlm->spinlock); | 3075 | spin_unlock(&dlm->spinlock); |
| 3081 | 3076 | ||
| 3082 | if (oldmle) { | 3077 | if (oldmle) { |
diff --git a/fs/ocfs2/dlm/dlmrecovery.c b/fs/ocfs2/dlm/dlmrecovery.c index 9dfaac73b36d..aaaffbcbe916 100644 --- a/fs/ocfs2/dlm/dlmrecovery.c +++ b/fs/ocfs2/dlm/dlmrecovery.c | |||
| @@ -1997,6 +1997,8 @@ void dlm_move_lockres_to_recovery_list(struct dlm_ctxt *dlm, | |||
| 1997 | struct list_head *queue; | 1997 | struct list_head *queue; |
| 1998 | struct dlm_lock *lock, *next; | 1998 | struct dlm_lock *lock, *next; |
| 1999 | 1999 | ||
| 2000 | assert_spin_locked(&dlm->spinlock); | ||
| 2001 | assert_spin_locked(&res->spinlock); | ||
| 2000 | res->state |= DLM_LOCK_RES_RECOVERING; | 2002 | res->state |= DLM_LOCK_RES_RECOVERING; |
| 2001 | if (!list_empty(&res->recovering)) { | 2003 | if (!list_empty(&res->recovering)) { |
| 2002 | mlog(0, | 2004 | mlog(0, |
| @@ -2326,19 +2328,15 @@ static void dlm_do_local_recovery_cleanup(struct dlm_ctxt *dlm, u8 dead_node) | |||
| 2326 | /* zero the lvb if necessary */ | 2328 | /* zero the lvb if necessary */ |
| 2327 | dlm_revalidate_lvb(dlm, res, dead_node); | 2329 | dlm_revalidate_lvb(dlm, res, dead_node); |
| 2328 | if (res->owner == dead_node) { | 2330 | if (res->owner == dead_node) { |
| 2329 | if (res->state & DLM_LOCK_RES_DROPPING_REF) | 2331 | if (res->state & DLM_LOCK_RES_DROPPING_REF) { |
| 2330 | mlog(0, "%s:%.*s: owned by " | 2332 | mlog(ML_NOTICE, "Ignore %.*s for " |
| 2331 | "dead node %u, this node was " | 2333 | "recovery as it is being freed\n", |
| 2332 | "dropping its ref when it died. " | 2334 | res->lockname.len, |
| 2333 | "continue, dropping the flag.\n", | 2335 | res->lockname.name); |
| 2334 | dlm->name, res->lockname.len, | 2336 | } else |
| 2335 | res->lockname.name, dead_node); | 2337 | dlm_move_lockres_to_recovery_list(dlm, |
| 2336 | 2338 | res); | |
| 2337 | /* the wake_up for this will happen when the | ||
| 2338 | * RECOVERING flag is dropped later */ | ||
| 2339 | res->state &= ~DLM_LOCK_RES_DROPPING_REF; | ||
| 2340 | 2339 | ||
| 2341 | dlm_move_lockres_to_recovery_list(dlm, res); | ||
| 2342 | } else if (res->owner == dlm->node_num) { | 2340 | } else if (res->owner == dlm->node_num) { |
| 2343 | dlm_free_dead_locks(dlm, res, dead_node); | 2341 | dlm_free_dead_locks(dlm, res, dead_node); |
| 2344 | __dlm_lockres_calc_usage(dlm, res); | 2342 | __dlm_lockres_calc_usage(dlm, res); |
diff --git a/fs/ocfs2/dlm/dlmthread.c b/fs/ocfs2/dlm/dlmthread.c index d4f73ca68fe5..2211acf33d9b 100644 --- a/fs/ocfs2/dlm/dlmthread.c +++ b/fs/ocfs2/dlm/dlmthread.c | |||
| @@ -92,19 +92,27 @@ int __dlm_lockres_has_locks(struct dlm_lock_resource *res) | |||
| 92 | * truly ready to be freed. */ | 92 | * truly ready to be freed. */ |
| 93 | int __dlm_lockres_unused(struct dlm_lock_resource *res) | 93 | int __dlm_lockres_unused(struct dlm_lock_resource *res) |
| 94 | { | 94 | { |
| 95 | if (!__dlm_lockres_has_locks(res) && | 95 | int bit; |
| 96 | (list_empty(&res->dirty) && !(res->state & DLM_LOCK_RES_DIRTY))) { | 96 | |
| 97 | /* try not to scan the bitmap unless the first two | 97 | if (__dlm_lockres_has_locks(res)) |
| 98 | * conditions are already true */ | 98 | return 0; |
| 99 | int bit = find_next_bit(res->refmap, O2NM_MAX_NODES, 0); | 99 | |
| 100 | if (bit >= O2NM_MAX_NODES) { | 100 | if (!list_empty(&res->dirty) || res->state & DLM_LOCK_RES_DIRTY) |
| 101 | /* since the bit for dlm->node_num is not | 101 | return 0; |
| 102 | * set, inflight_locks better be zero */ | 102 | |
| 103 | BUG_ON(res->inflight_locks != 0); | 103 | if (res->state & DLM_LOCK_RES_RECOVERING) |
| 104 | return 1; | 104 | return 0; |
| 105 | } | 105 | |
| 106 | } | 106 | bit = find_next_bit(res->refmap, O2NM_MAX_NODES, 0); |
| 107 | return 0; | 107 | if (bit < O2NM_MAX_NODES) |
| 108 | return 0; | ||
| 109 | |||
| 110 | /* | ||
| 111 | * since the bit for dlm->node_num is not set, inflight_locks better | ||
| 112 | * be zero | ||
| 113 | */ | ||
| 114 | BUG_ON(res->inflight_locks != 0); | ||
| 115 | return 1; | ||
| 108 | } | 116 | } |
| 109 | 117 | ||
| 110 | 118 | ||
| @@ -152,45 +160,25 @@ void dlm_lockres_calc_usage(struct dlm_ctxt *dlm, | |||
| 152 | spin_unlock(&dlm->spinlock); | 160 | spin_unlock(&dlm->spinlock); |
| 153 | } | 161 | } |
| 154 | 162 | ||
| 155 | static int dlm_purge_lockres(struct dlm_ctxt *dlm, | 163 | static void dlm_purge_lockres(struct dlm_ctxt *dlm, |
| 156 | struct dlm_lock_resource *res) | 164 | struct dlm_lock_resource *res) |
| 157 | { | 165 | { |
| 158 | int master; | 166 | int master; |
| 159 | int ret = 0; | 167 | int ret = 0; |
| 160 | 168 | ||
| 161 | spin_lock(&res->spinlock); | 169 | assert_spin_locked(&dlm->spinlock); |
| 162 | if (!__dlm_lockres_unused(res)) { | 170 | assert_spin_locked(&res->spinlock); |
| 163 | mlog(0, "%s:%.*s: tried to purge but not unused\n", | ||
| 164 | dlm->name, res->lockname.len, res->lockname.name); | ||
| 165 | __dlm_print_one_lock_resource(res); | ||
| 166 | spin_unlock(&res->spinlock); | ||
| 167 | BUG(); | ||
| 168 | } | ||
| 169 | |||
| 170 | if (res->state & DLM_LOCK_RES_MIGRATING) { | ||
| 171 | mlog(0, "%s:%.*s: Delay dropref as this lockres is " | ||
| 172 | "being remastered\n", dlm->name, res->lockname.len, | ||
| 173 | res->lockname.name); | ||
| 174 | /* Re-add the lockres to the end of the purge list */ | ||
| 175 | if (!list_empty(&res->purge)) { | ||
| 176 | list_del_init(&res->purge); | ||
| 177 | list_add_tail(&res->purge, &dlm->purge_list); | ||
| 178 | } | ||
| 179 | spin_unlock(&res->spinlock); | ||
| 180 | return 0; | ||
| 181 | } | ||
| 182 | 171 | ||
| 183 | master = (res->owner == dlm->node_num); | 172 | master = (res->owner == dlm->node_num); |
| 184 | 173 | ||
| 185 | if (!master) | ||
| 186 | res->state |= DLM_LOCK_RES_DROPPING_REF; | ||
| 187 | spin_unlock(&res->spinlock); | ||
| 188 | 174 | ||
| 189 | mlog(0, "purging lockres %.*s, master = %d\n", res->lockname.len, | 175 | mlog(0, "purging lockres %.*s, master = %d\n", res->lockname.len, |
| 190 | res->lockname.name, master); | 176 | res->lockname.name, master); |
| 191 | 177 | ||
| 192 | if (!master) { | 178 | if (!master) { |
| 179 | res->state |= DLM_LOCK_RES_DROPPING_REF; | ||
| 193 | /* drop spinlock... retake below */ | 180 | /* drop spinlock... retake below */ |
| 181 | spin_unlock(&res->spinlock); | ||
| 194 | spin_unlock(&dlm->spinlock); | 182 | spin_unlock(&dlm->spinlock); |
| 195 | 183 | ||
| 196 | spin_lock(&res->spinlock); | 184 | spin_lock(&res->spinlock); |
| @@ -208,31 +196,35 @@ static int dlm_purge_lockres(struct dlm_ctxt *dlm, | |||
| 208 | mlog(0, "%s:%.*s: dlm_deref_lockres returned %d\n", | 196 | mlog(0, "%s:%.*s: dlm_deref_lockres returned %d\n", |
| 209 | dlm->name, res->lockname.len, res->lockname.name, ret); | 197 | dlm->name, res->lockname.len, res->lockname.name, ret); |
| 210 | spin_lock(&dlm->spinlock); | 198 | spin_lock(&dlm->spinlock); |
| 199 | spin_lock(&res->spinlock); | ||
| 211 | } | 200 | } |
| 212 | 201 | ||
| 213 | spin_lock(&res->spinlock); | ||
| 214 | if (!list_empty(&res->purge)) { | 202 | if (!list_empty(&res->purge)) { |
| 215 | mlog(0, "removing lockres %.*s:%p from purgelist, " | 203 | mlog(0, "removing lockres %.*s:%p from purgelist, " |
| 216 | "master = %d\n", res->lockname.len, res->lockname.name, | 204 | "master = %d\n", res->lockname.len, res->lockname.name, |
| 217 | res, master); | 205 | res, master); |
| 218 | list_del_init(&res->purge); | 206 | list_del_init(&res->purge); |
| 219 | spin_unlock(&res->spinlock); | ||
| 220 | dlm_lockres_put(res); | 207 | dlm_lockres_put(res); |
| 221 | dlm->purge_count--; | 208 | dlm->purge_count--; |
| 222 | } else | 209 | } |
| 223 | spin_unlock(&res->spinlock); | 210 | |
| 211 | if (!__dlm_lockres_unused(res)) { | ||
| 212 | mlog(ML_ERROR, "found lockres %s:%.*s: in use after deref\n", | ||
| 213 | dlm->name, res->lockname.len, res->lockname.name); | ||
| 214 | __dlm_print_one_lock_resource(res); | ||
| 215 | BUG(); | ||
| 216 | } | ||
| 224 | 217 | ||
| 225 | __dlm_unhash_lockres(res); | 218 | __dlm_unhash_lockres(res); |
| 226 | 219 | ||
| 227 | /* lockres is not in the hash now. drop the flag and wake up | 220 | /* lockres is not in the hash now. drop the flag and wake up |
| 228 | * any processes waiting in dlm_get_lock_resource. */ | 221 | * any processes waiting in dlm_get_lock_resource. */ |
| 229 | if (!master) { | 222 | if (!master) { |
| 230 | spin_lock(&res->spinlock); | ||
| 231 | res->state &= ~DLM_LOCK_RES_DROPPING_REF; | 223 | res->state &= ~DLM_LOCK_RES_DROPPING_REF; |
| 232 | spin_unlock(&res->spinlock); | 224 | spin_unlock(&res->spinlock); |
| 233 | wake_up(&res->wq); | 225 | wake_up(&res->wq); |
| 234 | } | 226 | } else |
| 235 | return 0; | 227 | spin_unlock(&res->spinlock); |
| 236 | } | 228 | } |
| 237 | 229 | ||
| 238 | static void dlm_run_purge_list(struct dlm_ctxt *dlm, | 230 | static void dlm_run_purge_list(struct dlm_ctxt *dlm, |
| @@ -251,17 +243,7 @@ static void dlm_run_purge_list(struct dlm_ctxt *dlm, | |||
| 251 | lockres = list_entry(dlm->purge_list.next, | 243 | lockres = list_entry(dlm->purge_list.next, |
| 252 | struct dlm_lock_resource, purge); | 244 | struct dlm_lock_resource, purge); |
| 253 | 245 | ||
| 254 | /* Status of the lockres *might* change so double | ||
| 255 | * check. If the lockres is unused, holding the dlm | ||
| 256 | * spinlock will prevent people from getting and more | ||
| 257 | * refs on it -- there's no need to keep the lockres | ||
| 258 | * spinlock. */ | ||
| 259 | spin_lock(&lockres->spinlock); | 246 | spin_lock(&lockres->spinlock); |
| 260 | unused = __dlm_lockres_unused(lockres); | ||
| 261 | spin_unlock(&lockres->spinlock); | ||
| 262 | |||
| 263 | if (!unused) | ||
| 264 | continue; | ||
| 265 | 247 | ||
| 266 | purge_jiffies = lockres->last_used + | 248 | purge_jiffies = lockres->last_used + |
| 267 | msecs_to_jiffies(DLM_PURGE_INTERVAL_MS); | 249 | msecs_to_jiffies(DLM_PURGE_INTERVAL_MS); |
| @@ -273,15 +255,29 @@ static void dlm_run_purge_list(struct dlm_ctxt *dlm, | |||
| 273 | * in tail order, we can stop at the first | 255 | * in tail order, we can stop at the first |
| 274 | * unpurgable resource -- anyone added after | 256 | * unpurgable resource -- anyone added after |
| 275 | * him will have a greater last_used value */ | 257 | * him will have a greater last_used value */ |
| 258 | spin_unlock(&lockres->spinlock); | ||
| 276 | break; | 259 | break; |
| 277 | } | 260 | } |
| 278 | 261 | ||
| 262 | /* Status of the lockres *might* change so double | ||
| 263 | * check. If the lockres is unused, holding the dlm | ||
| 264 | * spinlock will prevent people from getting and more | ||
| 265 | * refs on it. */ | ||
| 266 | unused = __dlm_lockres_unused(lockres); | ||
| 267 | if (!unused || | ||
| 268 | (lockres->state & DLM_LOCK_RES_MIGRATING)) { | ||
| 269 | mlog(0, "lockres %s:%.*s: is in use or " | ||
| 270 | "being remastered, used %d, state %d\n", | ||
| 271 | dlm->name, lockres->lockname.len, | ||
| 272 | lockres->lockname.name, !unused, lockres->state); | ||
| 273 | list_move_tail(&dlm->purge_list, &lockres->purge); | ||
| 274 | spin_unlock(&lockres->spinlock); | ||
| 275 | continue; | ||
| 276 | } | ||
| 277 | |||
| 279 | dlm_lockres_get(lockres); | 278 | dlm_lockres_get(lockres); |
| 280 | 279 | ||
| 281 | /* This may drop and reacquire the dlm spinlock if it | 280 | dlm_purge_lockres(dlm, lockres); |
| 282 | * has to do migration. */ | ||
| 283 | if (dlm_purge_lockres(dlm, lockres)) | ||
| 284 | BUG(); | ||
| 285 | 281 | ||
| 286 | dlm_lockres_put(lockres); | 282 | dlm_lockres_put(lockres); |
| 287 | 283 | ||
diff --git a/fs/ocfs2/refcounttree.c b/fs/ocfs2/refcounttree.c index 3ac5aa733e9c..73a11ccfd4c2 100644 --- a/fs/ocfs2/refcounttree.c +++ b/fs/ocfs2/refcounttree.c | |||
| @@ -2436,16 +2436,26 @@ static int ocfs2_calc_refcount_meta_credits(struct super_block *sb, | |||
| 2436 | len = min((u64)cpos + clusters, le64_to_cpu(rec.r_cpos) + | 2436 | len = min((u64)cpos + clusters, le64_to_cpu(rec.r_cpos) + |
| 2437 | le32_to_cpu(rec.r_clusters)) - cpos; | 2437 | le32_to_cpu(rec.r_clusters)) - cpos; |
| 2438 | /* | 2438 | /* |
| 2439 | * If the refcount rec already exist, cool. We just need | ||
| 2440 | * to check whether there is a split. Otherwise we just need | ||
| 2441 | * to increase the refcount. | ||
| 2442 | * If we will insert one, increases recs_add. | ||
| 2443 | * | ||
| 2444 | * We record all the records which will be inserted to the | 2439 | * We record all the records which will be inserted to the |
| 2445 | * same refcount block, so that we can tell exactly whether | 2440 | * same refcount block, so that we can tell exactly whether |
| 2446 | * we need a new refcount block or not. | 2441 | * we need a new refcount block or not. |
| 2442 | * | ||
| 2443 | * If we will insert a new one, this is easy and only happens | ||
| 2444 | * during adding refcounted flag to the extent, so we don't | ||
| 2445 | * have a chance of spliting. We just need one record. | ||
| 2446 | * | ||
| 2447 | * If the refcount rec already exists, that would be a little | ||
| 2448 | * complicated. we may have to: | ||
| 2449 | * 1) split at the beginning if the start pos isn't aligned. | ||
| 2450 | * we need 1 more record in this case. | ||
| 2451 | * 2) split int the end if the end pos isn't aligned. | ||
| 2452 | * we need 1 more record in this case. | ||
| 2453 | * 3) split in the middle because of file system fragmentation. | ||
| 2454 | * we need 2 more records in this case(we can't detect this | ||
| 2455 | * beforehand, so always think of the worst case). | ||
| 2447 | */ | 2456 | */ |
| 2448 | if (rec.r_refcount) { | 2457 | if (rec.r_refcount) { |
| 2458 | recs_add += 2; | ||
| 2449 | /* Check whether we need a split at the beginning. */ | 2459 | /* Check whether we need a split at the beginning. */ |
| 2450 | if (cpos == start_cpos && | 2460 | if (cpos == start_cpos && |
| 2451 | cpos != le64_to_cpu(rec.r_cpos)) | 2461 | cpos != le64_to_cpu(rec.r_cpos)) |
| @@ -1031,7 +1031,9 @@ EXPORT_SYMBOL(generic_file_open); | |||
| 1031 | 1031 | ||
| 1032 | /* | 1032 | /* |
| 1033 | * This is used by subsystems that don't want seekable | 1033 | * This is used by subsystems that don't want seekable |
| 1034 | * file descriptors | 1034 | * file descriptors. The function is not supposed to ever fail, the only |
| 1035 | * reason it returns an 'int' and not 'void' is so that it can be plugged | ||
| 1036 | * directly into file_operations structure. | ||
| 1035 | */ | 1037 | */ |
| 1036 | int nonseekable_open(struct inode *inode, struct file *filp) | 1038 | int nonseekable_open(struct inode *inode, struct file *filp) |
| 1037 | { | 1039 | { |
diff --git a/fs/partitions/acorn.c b/fs/partitions/acorn.c index 6921e7890be6..fbeb697374d5 100644 --- a/fs/partitions/acorn.c +++ b/fs/partitions/acorn.c | |||
| @@ -45,8 +45,11 @@ adfs_partition(struct parsed_partitions *state, char *name, char *data, | |||
| 45 | nr_sects = (le32_to_cpu(dr->disc_size_high) << 23) | | 45 | nr_sects = (le32_to_cpu(dr->disc_size_high) << 23) | |
| 46 | (le32_to_cpu(dr->disc_size) >> 9); | 46 | (le32_to_cpu(dr->disc_size) >> 9); |
| 47 | 47 | ||
| 48 | if (name) | 48 | if (name) { |
| 49 | printk(" [%s]", name); | 49 | strlcat(state->pp_buf, " [", PAGE_SIZE); |
| 50 | strlcat(state->pp_buf, name, PAGE_SIZE); | ||
| 51 | strlcat(state->pp_buf, "]", PAGE_SIZE); | ||
| 52 | } | ||
| 50 | put_partition(state, slot, first_sector, nr_sects); | 53 | put_partition(state, slot, first_sector, nr_sects); |
| 51 | return dr; | 54 | return dr; |
| 52 | } | 55 | } |
| @@ -81,14 +84,14 @@ static int riscix_partition(struct parsed_partitions *state, | |||
| 81 | if (!rr) | 84 | if (!rr) |
| 82 | return -1; | 85 | return -1; |
| 83 | 86 | ||
| 84 | printk(" [RISCiX]"); | 87 | strlcat(state->pp_buf, " [RISCiX]", PAGE_SIZE); |
| 85 | 88 | ||
| 86 | 89 | ||
| 87 | if (rr->magic == RISCIX_MAGIC) { | 90 | if (rr->magic == RISCIX_MAGIC) { |
| 88 | unsigned long size = nr_sects > 2 ? 2 : nr_sects; | 91 | unsigned long size = nr_sects > 2 ? 2 : nr_sects; |
| 89 | int part; | 92 | int part; |
| 90 | 93 | ||
| 91 | printk(" <"); | 94 | strlcat(state->pp_buf, " <", PAGE_SIZE); |
| 92 | 95 | ||
| 93 | put_partition(state, slot++, first_sect, size); | 96 | put_partition(state, slot++, first_sect, size); |
| 94 | for (part = 0; part < 8; part++) { | 97 | for (part = 0; part < 8; part++) { |
| @@ -97,11 +100,13 @@ static int riscix_partition(struct parsed_partitions *state, | |||
| 97 | put_partition(state, slot++, | 100 | put_partition(state, slot++, |
| 98 | le32_to_cpu(rr->part[part].start), | 101 | le32_to_cpu(rr->part[part].start), |
| 99 | le32_to_cpu(rr->part[part].length)); | 102 | le32_to_cpu(rr->part[part].length)); |
| 100 | printk("(%s)", rr->part[part].name); | 103 | strlcat(state->pp_buf, "(", PAGE_SIZE); |
| 104 | strlcat(state->pp_buf, rr->part[part].name, PAGE_SIZE); | ||
| 105 | strlcat(state->pp_buf, ")", PAGE_SIZE); | ||
| 101 | } | 106 | } |
| 102 | } | 107 | } |
| 103 | 108 | ||
| 104 | printk(" >\n"); | 109 | strlcat(state->pp_buf, " >\n", PAGE_SIZE); |
| 105 | } else { | 110 | } else { |
| 106 | put_partition(state, slot++, first_sect, nr_sects); | 111 | put_partition(state, slot++, first_sect, nr_sects); |
| 107 | } | 112 | } |
| @@ -131,7 +136,7 @@ static int linux_partition(struct parsed_partitions *state, | |||
| 131 | struct linux_part *linuxp; | 136 | struct linux_part *linuxp; |
| 132 | unsigned long size = nr_sects > 2 ? 2 : nr_sects; | 137 | unsigned long size = nr_sects > 2 ? 2 : nr_sects; |
| 133 | 138 | ||
| 134 | printk(" [Linux]"); | 139 | strlcat(state->pp_buf, " [Linux]", PAGE_SIZE); |
| 135 | 140 | ||
| 136 | put_partition(state, slot++, first_sect, size); | 141 | put_partition(state, slot++, first_sect, size); |
| 137 | 142 | ||
| @@ -139,7 +144,7 @@ static int linux_partition(struct parsed_partitions *state, | |||
| 139 | if (!linuxp) | 144 | if (!linuxp) |
| 140 | return -1; | 145 | return -1; |
| 141 | 146 | ||
| 142 | printk(" <"); | 147 | strlcat(state->pp_buf, " <", PAGE_SIZE); |
| 143 | while (linuxp->magic == cpu_to_le32(LINUX_NATIVE_MAGIC) || | 148 | while (linuxp->magic == cpu_to_le32(LINUX_NATIVE_MAGIC) || |
| 144 | linuxp->magic == cpu_to_le32(LINUX_SWAP_MAGIC)) { | 149 | linuxp->magic == cpu_to_le32(LINUX_SWAP_MAGIC)) { |
| 145 | if (slot == state->limit) | 150 | if (slot == state->limit) |
| @@ -149,7 +154,7 @@ static int linux_partition(struct parsed_partitions *state, | |||
| 149 | le32_to_cpu(linuxp->nr_sects)); | 154 | le32_to_cpu(linuxp->nr_sects)); |
| 150 | linuxp ++; | 155 | linuxp ++; |
| 151 | } | 156 | } |
| 152 | printk(" >"); | 157 | strlcat(state->pp_buf, " >", PAGE_SIZE); |
| 153 | 158 | ||
| 154 | put_dev_sector(sect); | 159 | put_dev_sector(sect); |
| 155 | return slot; | 160 | return slot; |
| @@ -294,7 +299,7 @@ int adfspart_check_ADFS(struct parsed_partitions *state) | |||
| 294 | break; | 299 | break; |
| 295 | } | 300 | } |
| 296 | } | 301 | } |
| 297 | printk("\n"); | 302 | strlcat(state->pp_buf, "\n", PAGE_SIZE); |
| 298 | return 1; | 303 | return 1; |
| 299 | } | 304 | } |
| 300 | #endif | 305 | #endif |
| @@ -367,7 +372,7 @@ int adfspart_check_ICS(struct parsed_partitions *state) | |||
| 367 | return 0; | 372 | return 0; |
| 368 | } | 373 | } |
| 369 | 374 | ||
| 370 | printk(" [ICS]"); | 375 | strlcat(state->pp_buf, " [ICS]", PAGE_SIZE); |
| 371 | 376 | ||
| 372 | for (slot = 1, p = (const struct ics_part *)data; p->size; p++) { | 377 | for (slot = 1, p = (const struct ics_part *)data; p->size; p++) { |
| 373 | u32 start = le32_to_cpu(p->start); | 378 | u32 start = le32_to_cpu(p->start); |
| @@ -401,7 +406,7 @@ int adfspart_check_ICS(struct parsed_partitions *state) | |||
| 401 | } | 406 | } |
| 402 | 407 | ||
| 403 | put_dev_sector(sect); | 408 | put_dev_sector(sect); |
| 404 | printk("\n"); | 409 | strlcat(state->pp_buf, "\n", PAGE_SIZE); |
| 405 | return 1; | 410 | return 1; |
| 406 | } | 411 | } |
| 407 | #endif | 412 | #endif |
| @@ -461,7 +466,7 @@ int adfspart_check_POWERTEC(struct parsed_partitions *state) | |||
| 461 | return 0; | 466 | return 0; |
| 462 | } | 467 | } |
| 463 | 468 | ||
| 464 | printk(" [POWERTEC]"); | 469 | strlcat(state->pp_buf, " [POWERTEC]", PAGE_SIZE); |
| 465 | 470 | ||
| 466 | for (i = 0, p = (const struct ptec_part *)data; i < 12; i++, p++) { | 471 | for (i = 0, p = (const struct ptec_part *)data; i < 12; i++, p++) { |
| 467 | u32 start = le32_to_cpu(p->start); | 472 | u32 start = le32_to_cpu(p->start); |
| @@ -472,7 +477,7 @@ int adfspart_check_POWERTEC(struct parsed_partitions *state) | |||
| 472 | } | 477 | } |
| 473 | 478 | ||
| 474 | put_dev_sector(sect); | 479 | put_dev_sector(sect); |
| 475 | printk("\n"); | 480 | strlcat(state->pp_buf, "\n", PAGE_SIZE); |
| 476 | return 1; | 481 | return 1; |
| 477 | } | 482 | } |
| 478 | #endif | 483 | #endif |
| @@ -543,7 +548,7 @@ int adfspart_check_EESOX(struct parsed_partitions *state) | |||
| 543 | 548 | ||
| 544 | size = get_capacity(state->bdev->bd_disk); | 549 | size = get_capacity(state->bdev->bd_disk); |
| 545 | put_partition(state, slot++, start, size - start); | 550 | put_partition(state, slot++, start, size - start); |
| 546 | printk("\n"); | 551 | strlcat(state->pp_buf, "\n", PAGE_SIZE); |
| 547 | } | 552 | } |
| 548 | 553 | ||
| 549 | return i ? 1 : 0; | 554 | return i ? 1 : 0; |
diff --git a/fs/partitions/amiga.c b/fs/partitions/amiga.c index ba443d4229f8..70cbf44a1560 100644 --- a/fs/partitions/amiga.c +++ b/fs/partitions/amiga.c | |||
| @@ -69,7 +69,13 @@ int amiga_partition(struct parsed_partitions *state) | |||
| 69 | /* blksize is blocks per 512 byte standard block */ | 69 | /* blksize is blocks per 512 byte standard block */ |
| 70 | blksize = be32_to_cpu( rdb->rdb_BlockBytes ) / 512; | 70 | blksize = be32_to_cpu( rdb->rdb_BlockBytes ) / 512; |
| 71 | 71 | ||
| 72 | printk(" RDSK (%d)", blksize * 512); /* Be more informative */ | 72 | { |
| 73 | char tmp[7 + 10 + 1 + 1]; | ||
| 74 | |||
| 75 | /* Be more informative */ | ||
| 76 | snprintf(tmp, sizeof(tmp), " RDSK (%d)", blksize * 512); | ||
| 77 | strlcat(state->pp_buf, tmp, PAGE_SIZE); | ||
| 78 | } | ||
| 73 | blk = be32_to_cpu(rdb->rdb_PartitionList); | 79 | blk = be32_to_cpu(rdb->rdb_PartitionList); |
| 74 | put_dev_sector(sect); | 80 | put_dev_sector(sect); |
| 75 | for (part = 1; blk>0 && part<=16; part++, put_dev_sector(sect)) { | 81 | for (part = 1; blk>0 && part<=16; part++, put_dev_sector(sect)) { |
| @@ -106,23 +112,27 @@ int amiga_partition(struct parsed_partitions *state) | |||
| 106 | { | 112 | { |
| 107 | /* Be even more informative to aid mounting */ | 113 | /* Be even more informative to aid mounting */ |
| 108 | char dostype[4]; | 114 | char dostype[4]; |
| 115 | char tmp[42]; | ||
| 116 | |||
| 109 | __be32 *dt = (__be32 *)dostype; | 117 | __be32 *dt = (__be32 *)dostype; |
| 110 | *dt = pb->pb_Environment[16]; | 118 | *dt = pb->pb_Environment[16]; |
| 111 | if (dostype[3] < ' ') | 119 | if (dostype[3] < ' ') |
| 112 | printk(" (%c%c%c^%c)", | 120 | snprintf(tmp, sizeof(tmp), " (%c%c%c^%c)", |
| 113 | dostype[0], dostype[1], | 121 | dostype[0], dostype[1], |
| 114 | dostype[2], dostype[3] + '@' ); | 122 | dostype[2], dostype[3] + '@' ); |
| 115 | else | 123 | else |
| 116 | printk(" (%c%c%c%c)", | 124 | snprintf(tmp, sizeof(tmp), " (%c%c%c%c)", |
| 117 | dostype[0], dostype[1], | 125 | dostype[0], dostype[1], |
| 118 | dostype[2], dostype[3]); | 126 | dostype[2], dostype[3]); |
| 119 | printk("(res %d spb %d)", | 127 | strlcat(state->pp_buf, tmp, PAGE_SIZE); |
| 128 | snprintf(tmp, sizeof(tmp), "(res %d spb %d)", | ||
| 120 | be32_to_cpu(pb->pb_Environment[6]), | 129 | be32_to_cpu(pb->pb_Environment[6]), |
| 121 | be32_to_cpu(pb->pb_Environment[4])); | 130 | be32_to_cpu(pb->pb_Environment[4])); |
| 131 | strlcat(state->pp_buf, tmp, PAGE_SIZE); | ||
| 122 | } | 132 | } |
| 123 | res = 1; | 133 | res = 1; |
| 124 | } | 134 | } |
| 125 | printk("\n"); | 135 | strlcat(state->pp_buf, "\n", PAGE_SIZE); |
| 126 | 136 | ||
| 127 | rdb_done: | 137 | rdb_done: |
| 128 | return res; | 138 | return res; |
diff --git a/fs/partitions/atari.c b/fs/partitions/atari.c index 4439ff1b6cec..9875b05e80a2 100644 --- a/fs/partitions/atari.c +++ b/fs/partitions/atari.c | |||
| @@ -62,7 +62,7 @@ int atari_partition(struct parsed_partitions *state) | |||
| 62 | } | 62 | } |
| 63 | 63 | ||
| 64 | pi = &rs->part[0]; | 64 | pi = &rs->part[0]; |
| 65 | printk (" AHDI"); | 65 | strlcat(state->pp_buf, " AHDI", PAGE_SIZE); |
| 66 | for (slot = 1; pi < &rs->part[4] && slot < state->limit; slot++, pi++) { | 66 | for (slot = 1; pi < &rs->part[4] && slot < state->limit; slot++, pi++) { |
| 67 | struct rootsector *xrs; | 67 | struct rootsector *xrs; |
| 68 | Sector sect2; | 68 | Sector sect2; |
| @@ -81,7 +81,7 @@ int atari_partition(struct parsed_partitions *state) | |||
| 81 | #ifdef ICD_PARTS | 81 | #ifdef ICD_PARTS |
| 82 | part_fmt = 1; | 82 | part_fmt = 1; |
| 83 | #endif | 83 | #endif |
| 84 | printk(" XGM<"); | 84 | strlcat(state->pp_buf, " XGM<", PAGE_SIZE); |
| 85 | partsect = extensect = be32_to_cpu(pi->st); | 85 | partsect = extensect = be32_to_cpu(pi->st); |
| 86 | while (1) { | 86 | while (1) { |
| 87 | xrs = read_part_sector(state, partsect, §2); | 87 | xrs = read_part_sector(state, partsect, §2); |
| @@ -120,14 +120,14 @@ int atari_partition(struct parsed_partitions *state) | |||
| 120 | break; | 120 | break; |
| 121 | } | 121 | } |
| 122 | } | 122 | } |
| 123 | printk(" >"); | 123 | strlcat(state->pp_buf, " >", PAGE_SIZE); |
| 124 | } | 124 | } |
| 125 | #ifdef ICD_PARTS | 125 | #ifdef ICD_PARTS |
| 126 | if ( part_fmt!=1 ) { /* no extended partitions -> test ICD-format */ | 126 | if ( part_fmt!=1 ) { /* no extended partitions -> test ICD-format */ |
| 127 | pi = &rs->icdpart[0]; | 127 | pi = &rs->icdpart[0]; |
| 128 | /* sanity check: no ICD format if first partition invalid */ | 128 | /* sanity check: no ICD format if first partition invalid */ |
| 129 | if (OK_id(pi->id)) { | 129 | if (OK_id(pi->id)) { |
| 130 | printk(" ICD<"); | 130 | strlcat(state->pp_buf, " ICD<", PAGE_SIZE); |
| 131 | for (; pi < &rs->icdpart[8] && slot < state->limit; slot++, pi++) { | 131 | for (; pi < &rs->icdpart[8] && slot < state->limit; slot++, pi++) { |
| 132 | /* accept only GEM,BGM,RAW,LNX,SWP partitions */ | 132 | /* accept only GEM,BGM,RAW,LNX,SWP partitions */ |
| 133 | if (!((pi->flg & 1) && OK_id(pi->id))) | 133 | if (!((pi->flg & 1) && OK_id(pi->id))) |
| @@ -137,13 +137,13 @@ int atari_partition(struct parsed_partitions *state) | |||
| 137 | be32_to_cpu(pi->st), | 137 | be32_to_cpu(pi->st), |
| 138 | be32_to_cpu(pi->siz)); | 138 | be32_to_cpu(pi->siz)); |
| 139 | } | 139 | } |
| 140 | printk(" >"); | 140 | strlcat(state->pp_buf, " >", PAGE_SIZE); |
| 141 | } | 141 | } |
| 142 | } | 142 | } |
| 143 | #endif | 143 | #endif |
| 144 | put_dev_sector(sect); | 144 | put_dev_sector(sect); |
| 145 | 145 | ||
| 146 | printk ("\n"); | 146 | strlcat(state->pp_buf, "\n", PAGE_SIZE); |
| 147 | 147 | ||
| 148 | return 1; | 148 | return 1; |
| 149 | } | 149 | } |
diff --git a/fs/partitions/check.c b/fs/partitions/check.c index 72c52656dc2e..79fbf3f390f0 100644 --- a/fs/partitions/check.c +++ b/fs/partitions/check.c | |||
| @@ -164,10 +164,16 @@ check_partition(struct gendisk *hd, struct block_device *bdev) | |||
| 164 | state = kzalloc(sizeof(struct parsed_partitions), GFP_KERNEL); | 164 | state = kzalloc(sizeof(struct parsed_partitions), GFP_KERNEL); |
| 165 | if (!state) | 165 | if (!state) |
| 166 | return NULL; | 166 | return NULL; |
| 167 | state->pp_buf = (char *)__get_free_page(GFP_KERNEL); | ||
| 168 | if (!state->pp_buf) { | ||
| 169 | kfree(state); | ||
| 170 | return NULL; | ||
| 171 | } | ||
| 172 | state->pp_buf[0] = '\0'; | ||
| 167 | 173 | ||
| 168 | state->bdev = bdev; | 174 | state->bdev = bdev; |
| 169 | disk_name(hd, 0, state->name); | 175 | disk_name(hd, 0, state->name); |
| 170 | printk(KERN_INFO " %s:", state->name); | 176 | snprintf(state->pp_buf, PAGE_SIZE, " %s:", state->name); |
| 171 | if (isdigit(state->name[strlen(state->name)-1])) | 177 | if (isdigit(state->name[strlen(state->name)-1])) |
| 172 | sprintf(state->name, "p"); | 178 | sprintf(state->name, "p"); |
| 173 | 179 | ||
| @@ -185,17 +191,25 @@ check_partition(struct gendisk *hd, struct block_device *bdev) | |||
| 185 | } | 191 | } |
| 186 | 192 | ||
| 187 | } | 193 | } |
| 188 | if (res > 0) | 194 | if (res > 0) { |
| 195 | printk(KERN_INFO "%s", state->pp_buf); | ||
| 196 | |||
| 197 | free_page((unsigned long)state->pp_buf); | ||
| 189 | return state; | 198 | return state; |
| 199 | } | ||
| 190 | if (state->access_beyond_eod) | 200 | if (state->access_beyond_eod) |
| 191 | err = -ENOSPC; | 201 | err = -ENOSPC; |
| 192 | if (err) | 202 | if (err) |
| 193 | /* The partition is unrecognized. So report I/O errors if there were any */ | 203 | /* The partition is unrecognized. So report I/O errors if there were any */ |
| 194 | res = err; | 204 | res = err; |
| 195 | if (!res) | 205 | if (!res) |
| 196 | printk(" unknown partition table\n"); | 206 | strlcat(state->pp_buf, " unknown partition table\n", PAGE_SIZE); |
| 197 | else if (warn_no_part) | 207 | else if (warn_no_part) |
| 198 | printk(" unable to read partition table\n"); | 208 | strlcat(state->pp_buf, " unable to read partition table\n", PAGE_SIZE); |
| 209 | |||
| 210 | printk(KERN_INFO "%s", state->pp_buf); | ||
| 211 | |||
| 212 | free_page((unsigned long)state->pp_buf); | ||
| 199 | kfree(state); | 213 | kfree(state); |
| 200 | return ERR_PTR(res); | 214 | return ERR_PTR(res); |
| 201 | } | 215 | } |
diff --git a/fs/partitions/check.h b/fs/partitions/check.h index 52f8bd399396..8e4e103ba216 100644 --- a/fs/partitions/check.h +++ b/fs/partitions/check.h | |||
| @@ -16,6 +16,7 @@ struct parsed_partitions { | |||
| 16 | int next; | 16 | int next; |
| 17 | int limit; | 17 | int limit; |
| 18 | bool access_beyond_eod; | 18 | bool access_beyond_eod; |
| 19 | char *pp_buf; | ||
| 19 | }; | 20 | }; |
| 20 | 21 | ||
| 21 | static inline void *read_part_sector(struct parsed_partitions *state, | 22 | static inline void *read_part_sector(struct parsed_partitions *state, |
| @@ -32,9 +33,12 @@ static inline void | |||
| 32 | put_partition(struct parsed_partitions *p, int n, sector_t from, sector_t size) | 33 | put_partition(struct parsed_partitions *p, int n, sector_t from, sector_t size) |
| 33 | { | 34 | { |
| 34 | if (n < p->limit) { | 35 | if (n < p->limit) { |
| 36 | char tmp[1 + BDEVNAME_SIZE + 10 + 1]; | ||
| 37 | |||
| 35 | p->parts[n].from = from; | 38 | p->parts[n].from = from; |
| 36 | p->parts[n].size = size; | 39 | p->parts[n].size = size; |
| 37 | printk(" %s%d", p->name, n); | 40 | snprintf(tmp, sizeof(tmp), " %s%d", p->name, n); |
| 41 | strlcat(p->pp_buf, tmp, PAGE_SIZE); | ||
| 38 | } | 42 | } |
| 39 | } | 43 | } |
| 40 | 44 | ||
diff --git a/fs/partitions/efi.c b/fs/partitions/efi.c index 9efb2cfe2410..dbb44d4bb8a7 100644 --- a/fs/partitions/efi.c +++ b/fs/partitions/efi.c | |||
| @@ -630,6 +630,6 @@ int efi_partition(struct parsed_partitions *state) | |||
| 630 | } | 630 | } |
| 631 | kfree(ptes); | 631 | kfree(ptes); |
| 632 | kfree(gpt); | 632 | kfree(gpt); |
| 633 | printk("\n"); | 633 | strlcat(state->pp_buf, "\n", PAGE_SIZE); |
| 634 | return 1; | 634 | return 1; |
| 635 | } | 635 | } |
diff --git a/fs/partitions/ibm.c b/fs/partitions/ibm.c index fc8497643fd0..d513a07f44bb 100644 --- a/fs/partitions/ibm.c +++ b/fs/partitions/ibm.c | |||
| @@ -75,6 +75,7 @@ int ibm_partition(struct parsed_partitions *state) | |||
| 75 | unsigned char *data; | 75 | unsigned char *data; |
| 76 | Sector sect; | 76 | Sector sect; |
| 77 | sector_t labelsect; | 77 | sector_t labelsect; |
| 78 | char tmp[64]; | ||
| 78 | 79 | ||
| 79 | res = 0; | 80 | res = 0; |
| 80 | blocksize = bdev_logical_block_size(bdev); | 81 | blocksize = bdev_logical_block_size(bdev); |
| @@ -144,13 +145,15 @@ int ibm_partition(struct parsed_partitions *state) | |||
| 144 | */ | 145 | */ |
| 145 | blocksize = label->cms.block_size; | 146 | blocksize = label->cms.block_size; |
| 146 | if (label->cms.disk_offset != 0) { | 147 | if (label->cms.disk_offset != 0) { |
| 147 | printk("CMS1/%8s(MDSK):", name); | 148 | snprintf(tmp, sizeof(tmp), "CMS1/%8s(MDSK):", name); |
| 149 | strlcat(state->pp_buf, tmp, PAGE_SIZE); | ||
| 148 | /* disk is reserved minidisk */ | 150 | /* disk is reserved minidisk */ |
| 149 | offset = label->cms.disk_offset; | 151 | offset = label->cms.disk_offset; |
| 150 | size = (label->cms.block_count - 1) | 152 | size = (label->cms.block_count - 1) |
| 151 | * (blocksize >> 9); | 153 | * (blocksize >> 9); |
| 152 | } else { | 154 | } else { |
| 153 | printk("CMS1/%8s:", name); | 155 | snprintf(tmp, sizeof(tmp), "CMS1/%8s:", name); |
| 156 | strlcat(state->pp_buf, tmp, PAGE_SIZE); | ||
| 154 | offset = (info->label_block + 1); | 157 | offset = (info->label_block + 1); |
| 155 | size = label->cms.block_count | 158 | size = label->cms.block_count |
| 156 | * (blocksize >> 9); | 159 | * (blocksize >> 9); |
| @@ -159,7 +162,8 @@ int ibm_partition(struct parsed_partitions *state) | |||
| 159 | size-offset*(blocksize >> 9)); | 162 | size-offset*(blocksize >> 9)); |
| 160 | } else { | 163 | } else { |
| 161 | if (strncmp(type, "LNX1", 4) == 0) { | 164 | if (strncmp(type, "LNX1", 4) == 0) { |
| 162 | printk("LNX1/%8s:", name); | 165 | snprintf(tmp, sizeof(tmp), "LNX1/%8s:", name); |
| 166 | strlcat(state->pp_buf, tmp, PAGE_SIZE); | ||
| 163 | if (label->lnx.ldl_version == 0xf2) { | 167 | if (label->lnx.ldl_version == 0xf2) { |
| 164 | fmt_size = label->lnx.formatted_blocks | 168 | fmt_size = label->lnx.formatted_blocks |
| 165 | * (blocksize >> 9); | 169 | * (blocksize >> 9); |
| @@ -178,7 +182,7 @@ int ibm_partition(struct parsed_partitions *state) | |||
| 178 | offset = (info->label_block + 1); | 182 | offset = (info->label_block + 1); |
| 179 | } else { | 183 | } else { |
| 180 | /* unlabeled disk */ | 184 | /* unlabeled disk */ |
| 181 | printk("(nonl)"); | 185 | strlcat(state->pp_buf, "(nonl)", PAGE_SIZE); |
| 182 | size = i_size >> 9; | 186 | size = i_size >> 9; |
| 183 | offset = (info->label_block + 1); | 187 | offset = (info->label_block + 1); |
| 184 | } | 188 | } |
| @@ -197,7 +201,8 @@ int ibm_partition(struct parsed_partitions *state) | |||
| 197 | * if not, something is wrong, skipping partition detection | 201 | * if not, something is wrong, skipping partition detection |
| 198 | */ | 202 | */ |
| 199 | if (strncmp(type, "VOL1", 4) == 0) { | 203 | if (strncmp(type, "VOL1", 4) == 0) { |
| 200 | printk("VOL1/%8s:", name); | 204 | snprintf(tmp, sizeof(tmp), "VOL1/%8s:", name); |
| 205 | strlcat(state->pp_buf, tmp, PAGE_SIZE); | ||
| 201 | /* | 206 | /* |
| 202 | * get block number and read then go through format1 | 207 | * get block number and read then go through format1 |
| 203 | * labels | 208 | * labels |
| @@ -253,7 +258,7 @@ int ibm_partition(struct parsed_partitions *state) | |||
| 253 | 258 | ||
| 254 | } | 259 | } |
| 255 | 260 | ||
| 256 | printk("\n"); | 261 | strlcat(state->pp_buf, "\n", PAGE_SIZE); |
| 257 | goto out_freeall; | 262 | goto out_freeall; |
| 258 | 263 | ||
| 259 | 264 | ||
diff --git a/fs/partitions/karma.c b/fs/partitions/karma.c index 1cc928bb762f..0ea19312706b 100644 --- a/fs/partitions/karma.c +++ b/fs/partitions/karma.c | |||
| @@ -50,7 +50,7 @@ int karma_partition(struct parsed_partitions *state) | |||
| 50 | } | 50 | } |
| 51 | slot++; | 51 | slot++; |
| 52 | } | 52 | } |
| 53 | printk("\n"); | 53 | strlcat(state->pp_buf, "\n", PAGE_SIZE); |
| 54 | put_dev_sector(sect); | 54 | put_dev_sector(sect); |
| 55 | return 1; | 55 | return 1; |
| 56 | } | 56 | } |
diff --git a/fs/partitions/ldm.c b/fs/partitions/ldm.c index 648c9d8f3357..5bf8a04b5d9b 100644 --- a/fs/partitions/ldm.c +++ b/fs/partitions/ldm.c | |||
| @@ -643,7 +643,7 @@ static bool ldm_create_data_partitions (struct parsed_partitions *pp, | |||
| 643 | return false; | 643 | return false; |
| 644 | } | 644 | } |
| 645 | 645 | ||
| 646 | printk (" [LDM]"); | 646 | strlcat(pp->pp_buf, " [LDM]", PAGE_SIZE); |
| 647 | 647 | ||
| 648 | /* Create the data partitions */ | 648 | /* Create the data partitions */ |
| 649 | list_for_each (item, &ldb->v_part) { | 649 | list_for_each (item, &ldb->v_part) { |
| @@ -658,7 +658,7 @@ static bool ldm_create_data_partitions (struct parsed_partitions *pp, | |||
| 658 | part_num++; | 658 | part_num++; |
| 659 | } | 659 | } |
| 660 | 660 | ||
| 661 | printk ("\n"); | 661 | strlcat(pp->pp_buf, "\n", PAGE_SIZE); |
| 662 | return true; | 662 | return true; |
| 663 | } | 663 | } |
| 664 | 664 | ||
diff --git a/fs/partitions/mac.c b/fs/partitions/mac.c index 74465ff7c263..68d6a216ee79 100644 --- a/fs/partitions/mac.c +++ b/fs/partitions/mac.c | |||
| @@ -59,7 +59,7 @@ int mac_partition(struct parsed_partitions *state) | |||
| 59 | put_dev_sector(sect); | 59 | put_dev_sector(sect); |
| 60 | return 0; /* not a MacOS disk */ | 60 | return 0; /* not a MacOS disk */ |
| 61 | } | 61 | } |
| 62 | printk(" [mac]"); | 62 | strlcat(state->pp_buf, " [mac]", PAGE_SIZE); |
| 63 | blocks_in_map = be32_to_cpu(part->map_count); | 63 | blocks_in_map = be32_to_cpu(part->map_count); |
| 64 | for (blk = 1; blk <= blocks_in_map; ++blk) { | 64 | for (blk = 1; blk <= blocks_in_map; ++blk) { |
| 65 | int pos = blk * secsize; | 65 | int pos = blk * secsize; |
| @@ -128,6 +128,6 @@ int mac_partition(struct parsed_partitions *state) | |||
| 128 | #endif | 128 | #endif |
| 129 | 129 | ||
| 130 | put_dev_sector(sect); | 130 | put_dev_sector(sect); |
| 131 | printk("\n"); | 131 | strlcat(state->pp_buf, "\n", PAGE_SIZE); |
| 132 | return 1; | 132 | return 1; |
| 133 | } | 133 | } |
diff --git a/fs/partitions/msdos.c b/fs/partitions/msdos.c index 15bfb7b1e044..5f79a6677c69 100644 --- a/fs/partitions/msdos.c +++ b/fs/partitions/msdos.c | |||
| @@ -213,10 +213,18 @@ static void parse_solaris_x86(struct parsed_partitions *state, | |||
| 213 | put_dev_sector(sect); | 213 | put_dev_sector(sect); |
| 214 | return; | 214 | return; |
| 215 | } | 215 | } |
| 216 | printk(" %s%d: <solaris:", state->name, origin); | 216 | { |
| 217 | char tmp[1 + BDEVNAME_SIZE + 10 + 11 + 1]; | ||
| 218 | |||
| 219 | snprintf(tmp, sizeof(tmp), " %s%d: <solaris:", state->name, origin); | ||
| 220 | strlcat(state->pp_buf, tmp, PAGE_SIZE); | ||
| 221 | } | ||
| 217 | if (le32_to_cpu(v->v_version) != 1) { | 222 | if (le32_to_cpu(v->v_version) != 1) { |
| 218 | printk(" cannot handle version %d vtoc>\n", | 223 | char tmp[64]; |
| 219 | le32_to_cpu(v->v_version)); | 224 | |
| 225 | snprintf(tmp, sizeof(tmp), " cannot handle version %d vtoc>\n", | ||
| 226 | le32_to_cpu(v->v_version)); | ||
| 227 | strlcat(state->pp_buf, tmp, PAGE_SIZE); | ||
| 220 | put_dev_sector(sect); | 228 | put_dev_sector(sect); |
| 221 | return; | 229 | return; |
| 222 | } | 230 | } |
| @@ -224,9 +232,12 @@ static void parse_solaris_x86(struct parsed_partitions *state, | |||
| 224 | max_nparts = le16_to_cpu (v->v_nparts) > 8 ? SOLARIS_X86_NUMSLICE : 8; | 232 | max_nparts = le16_to_cpu (v->v_nparts) > 8 ? SOLARIS_X86_NUMSLICE : 8; |
| 225 | for (i=0; i<max_nparts && state->next<state->limit; i++) { | 233 | for (i=0; i<max_nparts && state->next<state->limit; i++) { |
| 226 | struct solaris_x86_slice *s = &v->v_slice[i]; | 234 | struct solaris_x86_slice *s = &v->v_slice[i]; |
| 235 | char tmp[3 + 10 + 1 + 1]; | ||
| 236 | |||
| 227 | if (s->s_size == 0) | 237 | if (s->s_size == 0) |
| 228 | continue; | 238 | continue; |
| 229 | printk(" [s%d]", i); | 239 | snprintf(tmp, sizeof(tmp), " [s%d]", i); |
| 240 | strlcat(state->pp_buf, tmp, PAGE_SIZE); | ||
| 230 | /* solaris partitions are relative to current MS-DOS | 241 | /* solaris partitions are relative to current MS-DOS |
| 231 | * one; must add the offset of the current partition */ | 242 | * one; must add the offset of the current partition */ |
| 232 | put_partition(state, state->next++, | 243 | put_partition(state, state->next++, |
| @@ -234,7 +245,7 @@ static void parse_solaris_x86(struct parsed_partitions *state, | |||
| 234 | le32_to_cpu(s->s_size)); | 245 | le32_to_cpu(s->s_size)); |
| 235 | } | 246 | } |
| 236 | put_dev_sector(sect); | 247 | put_dev_sector(sect); |
| 237 | printk(" >\n"); | 248 | strlcat(state->pp_buf, " >\n", PAGE_SIZE); |
| 238 | #endif | 249 | #endif |
| 239 | } | 250 | } |
| 240 | 251 | ||
| @@ -250,6 +261,7 @@ static void parse_bsd(struct parsed_partitions *state, | |||
| 250 | Sector sect; | 261 | Sector sect; |
| 251 | struct bsd_disklabel *l; | 262 | struct bsd_disklabel *l; |
| 252 | struct bsd_partition *p; | 263 | struct bsd_partition *p; |
| 264 | char tmp[64]; | ||
| 253 | 265 | ||
| 254 | l = read_part_sector(state, offset + 1, §); | 266 | l = read_part_sector(state, offset + 1, §); |
| 255 | if (!l) | 267 | if (!l) |
| @@ -258,7 +270,9 @@ static void parse_bsd(struct parsed_partitions *state, | |||
| 258 | put_dev_sector(sect); | 270 | put_dev_sector(sect); |
| 259 | return; | 271 | return; |
| 260 | } | 272 | } |
| 261 | printk(" %s%d: <%s:", state->name, origin, flavour); | 273 | |
| 274 | snprintf(tmp, sizeof(tmp), " %s%d: <%s:", state->name, origin, flavour); | ||
| 275 | strlcat(state->pp_buf, tmp, PAGE_SIZE); | ||
| 262 | 276 | ||
| 263 | if (le16_to_cpu(l->d_npartitions) < max_partitions) | 277 | if (le16_to_cpu(l->d_npartitions) < max_partitions) |
| 264 | max_partitions = le16_to_cpu(l->d_npartitions); | 278 | max_partitions = le16_to_cpu(l->d_npartitions); |
| @@ -275,16 +289,18 @@ static void parse_bsd(struct parsed_partitions *state, | |||
| 275 | /* full parent partition, we have it already */ | 289 | /* full parent partition, we have it already */ |
| 276 | continue; | 290 | continue; |
| 277 | if (offset > bsd_start || offset+size < bsd_start+bsd_size) { | 291 | if (offset > bsd_start || offset+size < bsd_start+bsd_size) { |
| 278 | printk("bad subpartition - ignored\n"); | 292 | strlcat(state->pp_buf, "bad subpartition - ignored\n", PAGE_SIZE); |
| 279 | continue; | 293 | continue; |
| 280 | } | 294 | } |
| 281 | put_partition(state, state->next++, bsd_start, bsd_size); | 295 | put_partition(state, state->next++, bsd_start, bsd_size); |
| 282 | } | 296 | } |
| 283 | put_dev_sector(sect); | 297 | put_dev_sector(sect); |
| 284 | if (le16_to_cpu(l->d_npartitions) > max_partitions) | 298 | if (le16_to_cpu(l->d_npartitions) > max_partitions) { |
| 285 | printk(" (ignored %d more)", | 299 | snprintf(tmp, sizeof(tmp), " (ignored %d more)", |
| 286 | le16_to_cpu(l->d_npartitions) - max_partitions); | 300 | le16_to_cpu(l->d_npartitions) - max_partitions); |
| 287 | printk(" >\n"); | 301 | strlcat(state->pp_buf, tmp, PAGE_SIZE); |
| 302 | } | ||
| 303 | strlcat(state->pp_buf, " >\n", PAGE_SIZE); | ||
| 288 | } | 304 | } |
| 289 | #endif | 305 | #endif |
| 290 | 306 | ||
| @@ -333,7 +349,12 @@ static void parse_unixware(struct parsed_partitions *state, | |||
| 333 | put_dev_sector(sect); | 349 | put_dev_sector(sect); |
| 334 | return; | 350 | return; |
| 335 | } | 351 | } |
| 336 | printk(" %s%d: <unixware:", state->name, origin); | 352 | { |
| 353 | char tmp[1 + BDEVNAME_SIZE + 10 + 12 + 1]; | ||
| 354 | |||
| 355 | snprintf(tmp, sizeof(tmp), " %s%d: <unixware:", state->name, origin); | ||
| 356 | strlcat(state->pp_buf, tmp, PAGE_SIZE); | ||
| 357 | } | ||
| 337 | p = &l->vtoc.v_slice[1]; | 358 | p = &l->vtoc.v_slice[1]; |
| 338 | /* I omit the 0th slice as it is the same as whole disk. */ | 359 | /* I omit the 0th slice as it is the same as whole disk. */ |
| 339 | while (p - &l->vtoc.v_slice[0] < UNIXWARE_NUMSLICE) { | 360 | while (p - &l->vtoc.v_slice[0] < UNIXWARE_NUMSLICE) { |
| @@ -347,7 +368,7 @@ static void parse_unixware(struct parsed_partitions *state, | |||
| 347 | p++; | 368 | p++; |
| 348 | } | 369 | } |
| 349 | put_dev_sector(sect); | 370 | put_dev_sector(sect); |
| 350 | printk(" >\n"); | 371 | strlcat(state->pp_buf, " >\n", PAGE_SIZE); |
| 351 | #endif | 372 | #endif |
| 352 | } | 373 | } |
| 353 | 374 | ||
| @@ -376,8 +397,10 @@ static void parse_minix(struct parsed_partitions *state, | |||
| 376 | * the normal boot sector. */ | 397 | * the normal boot sector. */ |
| 377 | if (msdos_magic_present (data + 510) && | 398 | if (msdos_magic_present (data + 510) && |
| 378 | SYS_IND(p) == MINIX_PARTITION) { /* subpartition table present */ | 399 | SYS_IND(p) == MINIX_PARTITION) { /* subpartition table present */ |
| 400 | char tmp[1 + BDEVNAME_SIZE + 10 + 9 + 1]; | ||
| 379 | 401 | ||
| 380 | printk(" %s%d: <minix:", state->name, origin); | 402 | snprintf(tmp, sizeof(tmp), " %s%d: <minix:", state->name, origin); |
| 403 | strlcat(state->pp_buf, tmp, PAGE_SIZE); | ||
| 381 | for (i = 0; i < MINIX_NR_SUBPARTITIONS; i++, p++) { | 404 | for (i = 0; i < MINIX_NR_SUBPARTITIONS; i++, p++) { |
| 382 | if (state->next == state->limit) | 405 | if (state->next == state->limit) |
| 383 | break; | 406 | break; |
| @@ -386,7 +409,7 @@ static void parse_minix(struct parsed_partitions *state, | |||
| 386 | put_partition(state, state->next++, | 409 | put_partition(state, state->next++, |
| 387 | start_sect(p), nr_sects(p)); | 410 | start_sect(p), nr_sects(p)); |
| 388 | } | 411 | } |
| 389 | printk(" >\n"); | 412 | strlcat(state->pp_buf, " >\n", PAGE_SIZE); |
| 390 | } | 413 | } |
| 391 | put_dev_sector(sect); | 414 | put_dev_sector(sect); |
| 392 | #endif /* CONFIG_MINIX_SUBPARTITION */ | 415 | #endif /* CONFIG_MINIX_SUBPARTITION */ |
| @@ -425,7 +448,7 @@ int msdos_partition(struct parsed_partitions *state) | |||
| 425 | 448 | ||
| 426 | if (aix_magic_present(state, data)) { | 449 | if (aix_magic_present(state, data)) { |
| 427 | put_dev_sector(sect); | 450 | put_dev_sector(sect); |
| 428 | printk( " [AIX]"); | 451 | strlcat(state->pp_buf, " [AIX]", PAGE_SIZE); |
| 429 | return 0; | 452 | return 0; |
| 430 | } | 453 | } |
| 431 | 454 | ||
| @@ -446,7 +469,7 @@ int msdos_partition(struct parsed_partitions *state) | |||
| 446 | fb = (struct fat_boot_sector *) data; | 469 | fb = (struct fat_boot_sector *) data; |
| 447 | if (slot == 1 && fb->reserved && fb->fats | 470 | if (slot == 1 && fb->reserved && fb->fats |
| 448 | && fat_valid_media(fb->media)) { | 471 | && fat_valid_media(fb->media)) { |
| 449 | printk("\n"); | 472 | strlcat(state->pp_buf, "\n", PAGE_SIZE); |
| 450 | put_dev_sector(sect); | 473 | put_dev_sector(sect); |
| 451 | return 1; | 474 | return 1; |
| 452 | } else { | 475 | } else { |
| @@ -491,21 +514,21 @@ int msdos_partition(struct parsed_partitions *state) | |||
| 491 | n = min(size, max(sector_size, n)); | 514 | n = min(size, max(sector_size, n)); |
| 492 | put_partition(state, slot, start, n); | 515 | put_partition(state, slot, start, n); |
| 493 | 516 | ||
| 494 | printk(" <"); | 517 | strlcat(state->pp_buf, " <", PAGE_SIZE); |
| 495 | parse_extended(state, start, size); | 518 | parse_extended(state, start, size); |
| 496 | printk(" >"); | 519 | strlcat(state->pp_buf, " >", PAGE_SIZE); |
| 497 | continue; | 520 | continue; |
| 498 | } | 521 | } |
| 499 | put_partition(state, slot, start, size); | 522 | put_partition(state, slot, start, size); |
| 500 | if (SYS_IND(p) == LINUX_RAID_PARTITION) | 523 | if (SYS_IND(p) == LINUX_RAID_PARTITION) |
| 501 | state->parts[slot].flags = ADDPART_FLAG_RAID; | 524 | state->parts[slot].flags = ADDPART_FLAG_RAID; |
| 502 | if (SYS_IND(p) == DM6_PARTITION) | 525 | if (SYS_IND(p) == DM6_PARTITION) |
| 503 | printk("[DM]"); | 526 | strlcat(state->pp_buf, "[DM]", PAGE_SIZE); |
| 504 | if (SYS_IND(p) == EZD_PARTITION) | 527 | if (SYS_IND(p) == EZD_PARTITION) |
| 505 | printk("[EZD]"); | 528 | strlcat(state->pp_buf, "[EZD]", PAGE_SIZE); |
| 506 | } | 529 | } |
| 507 | 530 | ||
| 508 | printk("\n"); | 531 | strlcat(state->pp_buf, "\n", PAGE_SIZE); |
| 509 | 532 | ||
| 510 | /* second pass - output for each on a separate line */ | 533 | /* second pass - output for each on a separate line */ |
| 511 | p = (struct partition *) (0x1be + data); | 534 | p = (struct partition *) (0x1be + data); |
diff --git a/fs/partitions/osf.c b/fs/partitions/osf.c index fc22b85d436a..48cec7cbca17 100644 --- a/fs/partitions/osf.c +++ b/fs/partitions/osf.c | |||
| @@ -72,7 +72,7 @@ int osf_partition(struct parsed_partitions *state) | |||
| 72 | le32_to_cpu(partition->p_size)); | 72 | le32_to_cpu(partition->p_size)); |
| 73 | slot++; | 73 | slot++; |
| 74 | } | 74 | } |
| 75 | printk("\n"); | 75 | strlcat(state->pp_buf, "\n", PAGE_SIZE); |
| 76 | put_dev_sector(sect); | 76 | put_dev_sector(sect); |
| 77 | return 1; | 77 | return 1; |
| 78 | } | 78 | } |
diff --git a/fs/partitions/sgi.c b/fs/partitions/sgi.c index 43b1df9aa16c..ea8a86dceaf4 100644 --- a/fs/partitions/sgi.c +++ b/fs/partitions/sgi.c | |||
| @@ -76,7 +76,7 @@ int sgi_partition(struct parsed_partitions *state) | |||
| 76 | } | 76 | } |
| 77 | slot++; | 77 | slot++; |
| 78 | } | 78 | } |
| 79 | printk("\n"); | 79 | strlcat(state->pp_buf, "\n", PAGE_SIZE); |
| 80 | put_dev_sector(sect); | 80 | put_dev_sector(sect); |
| 81 | return 1; | 81 | return 1; |
| 82 | } | 82 | } |
diff --git a/fs/partitions/sun.c b/fs/partitions/sun.c index a32660e25f7f..b5b6fcfb3d36 100644 --- a/fs/partitions/sun.c +++ b/fs/partitions/sun.c | |||
| @@ -116,7 +116,7 @@ int sun_partition(struct parsed_partitions *state) | |||
| 116 | } | 116 | } |
| 117 | slot++; | 117 | slot++; |
| 118 | } | 118 | } |
| 119 | printk("\n"); | 119 | strlcat(state->pp_buf, "\n", PAGE_SIZE); |
| 120 | put_dev_sector(sect); | 120 | put_dev_sector(sect); |
| 121 | return 1; | 121 | return 1; |
| 122 | } | 122 | } |
diff --git a/fs/partitions/sysv68.c b/fs/partitions/sysv68.c index 9030c864428e..9627ccffc1c4 100644 --- a/fs/partitions/sysv68.c +++ b/fs/partitions/sysv68.c | |||
| @@ -54,6 +54,7 @@ int sysv68_partition(struct parsed_partitions *state) | |||
| 54 | unsigned char *data; | 54 | unsigned char *data; |
| 55 | struct dkblk0 *b; | 55 | struct dkblk0 *b; |
| 56 | struct slice *slice; | 56 | struct slice *slice; |
| 57 | char tmp[64]; | ||
| 57 | 58 | ||
| 58 | data = read_part_sector(state, 0, §); | 59 | data = read_part_sector(state, 0, §); |
| 59 | if (!data) | 60 | if (!data) |
| @@ -73,7 +74,8 @@ int sysv68_partition(struct parsed_partitions *state) | |||
| 73 | return -1; | 74 | return -1; |
| 74 | 75 | ||
| 75 | slices -= 1; /* last slice is the whole disk */ | 76 | slices -= 1; /* last slice is the whole disk */ |
| 76 | printk("sysV68: %s(s%u)", state->name, slices); | 77 | snprintf(tmp, sizeof(tmp), "sysV68: %s(s%u)", state->name, slices); |
| 78 | strlcat(state->pp_buf, tmp, PAGE_SIZE); | ||
| 77 | slice = (struct slice *)data; | 79 | slice = (struct slice *)data; |
| 78 | for (i = 0; i < slices; i++, slice++) { | 80 | for (i = 0; i < slices; i++, slice++) { |
| 79 | if (slot == state->limit) | 81 | if (slot == state->limit) |
| @@ -82,11 +84,12 @@ int sysv68_partition(struct parsed_partitions *state) | |||
| 82 | put_partition(state, slot, | 84 | put_partition(state, slot, |
| 83 | be32_to_cpu(slice->blkoff), | 85 | be32_to_cpu(slice->blkoff), |
| 84 | be32_to_cpu(slice->nblocks)); | 86 | be32_to_cpu(slice->nblocks)); |
| 85 | printk("(s%u)", i); | 87 | snprintf(tmp, sizeof(tmp), "(s%u)", i); |
| 88 | strlcat(state->pp_buf, tmp, PAGE_SIZE); | ||
| 86 | } | 89 | } |
| 87 | slot++; | 90 | slot++; |
| 88 | } | 91 | } |
| 89 | printk("\n"); | 92 | strlcat(state->pp_buf, "\n", PAGE_SIZE); |
| 90 | put_dev_sector(sect); | 93 | put_dev_sector(sect); |
| 91 | return 1; | 94 | return 1; |
| 92 | } | 95 | } |
diff --git a/fs/partitions/ultrix.c b/fs/partitions/ultrix.c index db9eef260364..8dbaf9f77a99 100644 --- a/fs/partitions/ultrix.c +++ b/fs/partitions/ultrix.c | |||
| @@ -39,7 +39,7 @@ int ultrix_partition(struct parsed_partitions *state) | |||
| 39 | label->pt_part[i].pi_blkoff, | 39 | label->pt_part[i].pi_blkoff, |
| 40 | label->pt_part[i].pi_nblocks); | 40 | label->pt_part[i].pi_nblocks); |
| 41 | put_dev_sector(sect); | 41 | put_dev_sector(sect); |
| 42 | printk ("\n"); | 42 | strlcat(state->pp_buf, "\n", PAGE_SIZE); |
| 43 | return 1; | 43 | return 1; |
| 44 | } else { | 44 | } else { |
| 45 | put_dev_sector(sect); | 45 | put_dev_sector(sect); |
diff --git a/fs/proc/Makefile b/fs/proc/Makefile index 11a7b5c68153..2758e2afc518 100644 --- a/fs/proc/Makefile +++ b/fs/proc/Makefile | |||
| @@ -2,7 +2,7 @@ | |||
| 2 | # Makefile for the Linux proc filesystem routines. | 2 | # Makefile for the Linux proc filesystem routines. |
| 3 | # | 3 | # |
| 4 | 4 | ||
| 5 | obj-$(CONFIG_PROC_FS) += proc.o | 5 | obj-y += proc.o |
| 6 | 6 | ||
| 7 | proc-y := nommu.o task_nommu.o | 7 | proc-y := nommu.o task_nommu.o |
| 8 | proc-$(CONFIG_MMU) := mmu.o task_mmu.o | 8 | proc-$(CONFIG_MMU) := mmu.o task_mmu.o |
diff --git a/fs/proc/base.c b/fs/proc/base.c index c806dfb24e08..a1c43e7c8a7b 100644 --- a/fs/proc/base.c +++ b/fs/proc/base.c | |||
| @@ -149,18 +149,13 @@ static unsigned int pid_entry_count_dirs(const struct pid_entry *entries, | |||
| 149 | return count; | 149 | return count; |
| 150 | } | 150 | } |
| 151 | 151 | ||
| 152 | static int get_fs_path(struct task_struct *task, struct path *path, bool root) | 152 | static int get_task_root(struct task_struct *task, struct path *root) |
| 153 | { | 153 | { |
| 154 | struct fs_struct *fs; | ||
| 155 | int result = -ENOENT; | 154 | int result = -ENOENT; |
| 156 | 155 | ||
| 157 | task_lock(task); | 156 | task_lock(task); |
| 158 | fs = task->fs; | 157 | if (task->fs) { |
| 159 | if (fs) { | 158 | get_fs_root(task->fs, root); |
| 160 | read_lock(&fs->lock); | ||
| 161 | *path = root ? fs->root : fs->pwd; | ||
| 162 | path_get(path); | ||
| 163 | read_unlock(&fs->lock); | ||
| 164 | result = 0; | 159 | result = 0; |
| 165 | } | 160 | } |
| 166 | task_unlock(task); | 161 | task_unlock(task); |
| @@ -173,7 +168,12 @@ static int proc_cwd_link(struct inode *inode, struct path *path) | |||
| 173 | int result = -ENOENT; | 168 | int result = -ENOENT; |
| 174 | 169 | ||
| 175 | if (task) { | 170 | if (task) { |
| 176 | result = get_fs_path(task, path, 0); | 171 | task_lock(task); |
| 172 | if (task->fs) { | ||
| 173 | get_fs_pwd(task->fs, path); | ||
| 174 | result = 0; | ||
| 175 | } | ||
| 176 | task_unlock(task); | ||
| 177 | put_task_struct(task); | 177 | put_task_struct(task); |
| 178 | } | 178 | } |
| 179 | return result; | 179 | return result; |
| @@ -185,7 +185,7 @@ static int proc_root_link(struct inode *inode, struct path *path) | |||
| 185 | int result = -ENOENT; | 185 | int result = -ENOENT; |
| 186 | 186 | ||
| 187 | if (task) { | 187 | if (task) { |
| 188 | result = get_fs_path(task, path, 1); | 188 | result = get_task_root(task, path); |
| 189 | put_task_struct(task); | 189 | put_task_struct(task); |
| 190 | } | 190 | } |
| 191 | return result; | 191 | return result; |
| @@ -597,7 +597,7 @@ static int mounts_open_common(struct inode *inode, struct file *file, | |||
| 597 | get_mnt_ns(ns); | 597 | get_mnt_ns(ns); |
| 598 | } | 598 | } |
| 599 | rcu_read_unlock(); | 599 | rcu_read_unlock(); |
| 600 | if (ns && get_fs_path(task, &root, 1) == 0) | 600 | if (ns && get_task_root(task, &root) == 0) |
| 601 | ret = 0; | 601 | ret = 0; |
| 602 | put_task_struct(task); | 602 | put_task_struct(task); |
| 603 | } | 603 | } |
| @@ -1526,7 +1526,7 @@ static int do_proc_readlink(struct path *path, char __user *buffer, int buflen) | |||
| 1526 | if (!tmp) | 1526 | if (!tmp) |
| 1527 | return -ENOMEM; | 1527 | return -ENOMEM; |
| 1528 | 1528 | ||
| 1529 | pathname = d_path(path, tmp, PAGE_SIZE); | 1529 | pathname = d_path_with_unreachable(path, tmp, PAGE_SIZE); |
| 1530 | len = PTR_ERR(pathname); | 1530 | len = PTR_ERR(pathname); |
| 1531 | if (IS_ERR(pathname)) | 1531 | if (IS_ERR(pathname)) |
| 1532 | goto out; | 1532 | goto out; |
diff --git a/fs/reiserfs/journal.c b/fs/reiserfs/journal.c index 19fbc810e8e7..1ec952b1f036 100644 --- a/fs/reiserfs/journal.c +++ b/fs/reiserfs/journal.c | |||
| @@ -983,7 +983,6 @@ static int flush_older_commits(struct super_block *s, | |||
| 983 | 983 | ||
| 984 | static int reiserfs_async_progress_wait(struct super_block *s) | 984 | static int reiserfs_async_progress_wait(struct super_block *s) |
| 985 | { | 985 | { |
| 986 | DEFINE_WAIT(wait); | ||
| 987 | struct reiserfs_journal *j = SB_JOURNAL(s); | 986 | struct reiserfs_journal *j = SB_JOURNAL(s); |
| 988 | 987 | ||
| 989 | if (atomic_read(&j->j_async_throttle)) { | 988 | if (atomic_read(&j->j_async_throttle)) { |
diff --git a/fs/signalfd.c b/fs/signalfd.c index f329849ce3c0..1c5a6add779d 100644 --- a/fs/signalfd.c +++ b/fs/signalfd.c | |||
| @@ -88,6 +88,7 @@ static int signalfd_copyinfo(struct signalfd_siginfo __user *uinfo, | |||
| 88 | err |= __put_user(kinfo->si_tid, &uinfo->ssi_tid); | 88 | err |= __put_user(kinfo->si_tid, &uinfo->ssi_tid); |
| 89 | err |= __put_user(kinfo->si_overrun, &uinfo->ssi_overrun); | 89 | err |= __put_user(kinfo->si_overrun, &uinfo->ssi_overrun); |
| 90 | err |= __put_user((long) kinfo->si_ptr, &uinfo->ssi_ptr); | 90 | err |= __put_user((long) kinfo->si_ptr, &uinfo->ssi_ptr); |
| 91 | err |= __put_user(kinfo->si_int, &uinfo->ssi_int); | ||
| 91 | break; | 92 | break; |
| 92 | case __SI_POLL: | 93 | case __SI_POLL: |
| 93 | err |= __put_user(kinfo->si_band, &uinfo->ssi_band); | 94 | err |= __put_user(kinfo->si_band, &uinfo->ssi_band); |
| @@ -111,6 +112,7 @@ static int signalfd_copyinfo(struct signalfd_siginfo __user *uinfo, | |||
| 111 | err |= __put_user(kinfo->si_pid, &uinfo->ssi_pid); | 112 | err |= __put_user(kinfo->si_pid, &uinfo->ssi_pid); |
| 112 | err |= __put_user(kinfo->si_uid, &uinfo->ssi_uid); | 113 | err |= __put_user(kinfo->si_uid, &uinfo->ssi_uid); |
| 113 | err |= __put_user((long) kinfo->si_ptr, &uinfo->ssi_ptr); | 114 | err |= __put_user((long) kinfo->si_ptr, &uinfo->ssi_ptr); |
| 115 | err |= __put_user(kinfo->si_int, &uinfo->ssi_int); | ||
| 114 | break; | 116 | break; |
| 115 | default: | 117 | default: |
| 116 | /* | 118 | /* |
diff --git a/fs/squashfs/Kconfig b/fs/squashfs/Kconfig index cc6ce8a84c21..e5f63da64d04 100644 --- a/fs/squashfs/Kconfig +++ b/fs/squashfs/Kconfig | |||
| @@ -5,13 +5,13 @@ config SQUASHFS | |||
| 5 | help | 5 | help |
| 6 | Saying Y here includes support for SquashFS 4.0 (a Compressed | 6 | Saying Y here includes support for SquashFS 4.0 (a Compressed |
| 7 | Read-Only File System). Squashfs is a highly compressed read-only | 7 | Read-Only File System). Squashfs is a highly compressed read-only |
| 8 | filesystem for Linux. It uses zlib compression to compress both | 8 | filesystem for Linux. It uses zlib/lzo compression to compress both |
| 9 | files, inodes and directories. Inodes in the system are very small | 9 | files, inodes and directories. Inodes in the system are very small |
| 10 | and all blocks are packed to minimise data overhead. Block sizes | 10 | and all blocks are packed to minimise data overhead. Block sizes |
| 11 | greater than 4K are supported up to a maximum of 1 Mbytes (default | 11 | greater than 4K are supported up to a maximum of 1 Mbytes (default |
| 12 | block size 128K). SquashFS 4.0 supports 64 bit filesystems and files | 12 | block size 128K). SquashFS 4.0 supports 64 bit filesystems and files |
| 13 | (larger than 4GB), full uid/gid information, hard links and | 13 | (larger than 4GB), full uid/gid information, hard links and |
| 14 | timestamps. | 14 | timestamps. |
| 15 | 15 | ||
| 16 | Squashfs is intended for general read-only filesystem use, for | 16 | Squashfs is intended for general read-only filesystem use, for |
| 17 | archival use (i.e. in cases where a .tar.gz file may be used), and in | 17 | archival use (i.e. in cases where a .tar.gz file may be used), and in |
| @@ -26,7 +26,7 @@ config SQUASHFS | |||
| 26 | 26 | ||
| 27 | If unsure, say N. | 27 | If unsure, say N. |
| 28 | 28 | ||
| 29 | config SQUASHFS_XATTRS | 29 | config SQUASHFS_XATTR |
| 30 | bool "Squashfs XATTR support" | 30 | bool "Squashfs XATTR support" |
| 31 | depends on SQUASHFS | 31 | depends on SQUASHFS |
| 32 | default n | 32 | default n |
| @@ -37,9 +37,24 @@ config SQUASHFS_XATTRS | |||
| 37 | 37 | ||
| 38 | If unsure, say N. | 38 | If unsure, say N. |
| 39 | 39 | ||
| 40 | config SQUASHFS_EMBEDDED | 40 | config SQUASHFS_LZO |
| 41 | bool "Include support for LZO compressed file systems" | ||
| 42 | depends on SQUASHFS | ||
| 43 | default n | ||
| 44 | select LZO_DECOMPRESS | ||
| 45 | help | ||
| 46 | Saying Y here includes support for reading Squashfs file systems | ||
| 47 | compressed with LZO compresssion. LZO compression is mainly | ||
| 48 | aimed at embedded systems with slower CPUs where the overheads | ||
| 49 | of zlib are too high. | ||
| 41 | 50 | ||
| 42 | bool "Additional option for memory-constrained systems" | 51 | LZO is not the standard compression used in Squashfs and so most |
| 52 | file systems will be readable without selecting this option. | ||
| 53 | |||
| 54 | If unsure, say N. | ||
| 55 | |||
| 56 | config SQUASHFS_EMBEDDED | ||
| 57 | bool "Additional option for memory-constrained systems" | ||
| 43 | depends on SQUASHFS | 58 | depends on SQUASHFS |
| 44 | default n | 59 | default n |
| 45 | help | 60 | help |
diff --git a/fs/squashfs/Makefile b/fs/squashfs/Makefile index 2cee3e9fa452..7672bac8d328 100644 --- a/fs/squashfs/Makefile +++ b/fs/squashfs/Makefile | |||
| @@ -5,5 +5,5 @@ | |||
| 5 | obj-$(CONFIG_SQUASHFS) += squashfs.o | 5 | obj-$(CONFIG_SQUASHFS) += squashfs.o |
| 6 | squashfs-y += block.o cache.o dir.o export.o file.o fragment.o id.o inode.o | 6 | squashfs-y += block.o cache.o dir.o export.o file.o fragment.o id.o inode.o |
| 7 | squashfs-y += namei.o super.o symlink.o zlib_wrapper.o decompressor.o | 7 | squashfs-y += namei.o super.o symlink.o zlib_wrapper.o decompressor.o |
| 8 | squashfs-$(CONFIG_SQUASHFS_XATTRS) += xattr.o xattr_id.o | 8 | squashfs-$(CONFIG_SQUASHFS_XATTR) += xattr.o xattr_id.o |
| 9 | 9 | squashfs-$(CONFIG_SQUASHFS_LZO) += lzo_wrapper.o | |
diff --git a/fs/squashfs/decompressor.c b/fs/squashfs/decompressor.c index 157478da6ac9..24af9ce9722f 100644 --- a/fs/squashfs/decompressor.c +++ b/fs/squashfs/decompressor.c | |||
| @@ -40,9 +40,11 @@ static const struct squashfs_decompressor squashfs_lzma_unsupported_comp_ops = { | |||
| 40 | NULL, NULL, NULL, LZMA_COMPRESSION, "lzma", 0 | 40 | NULL, NULL, NULL, LZMA_COMPRESSION, "lzma", 0 |
| 41 | }; | 41 | }; |
| 42 | 42 | ||
| 43 | #ifndef CONFIG_SQUASHFS_LZO | ||
| 43 | static const struct squashfs_decompressor squashfs_lzo_unsupported_comp_ops = { | 44 | static const struct squashfs_decompressor squashfs_lzo_unsupported_comp_ops = { |
| 44 | NULL, NULL, NULL, LZO_COMPRESSION, "lzo", 0 | 45 | NULL, NULL, NULL, LZO_COMPRESSION, "lzo", 0 |
| 45 | }; | 46 | }; |
| 47 | #endif | ||
| 46 | 48 | ||
| 47 | static const struct squashfs_decompressor squashfs_unknown_comp_ops = { | 49 | static const struct squashfs_decompressor squashfs_unknown_comp_ops = { |
| 48 | NULL, NULL, NULL, 0, "unknown", 0 | 50 | NULL, NULL, NULL, 0, "unknown", 0 |
| @@ -51,7 +53,11 @@ static const struct squashfs_decompressor squashfs_unknown_comp_ops = { | |||
| 51 | static const struct squashfs_decompressor *decompressor[] = { | 53 | static const struct squashfs_decompressor *decompressor[] = { |
| 52 | &squashfs_zlib_comp_ops, | 54 | &squashfs_zlib_comp_ops, |
| 53 | &squashfs_lzma_unsupported_comp_ops, | 55 | &squashfs_lzma_unsupported_comp_ops, |
| 56 | #ifdef CONFIG_SQUASHFS_LZO | ||
| 57 | &squashfs_lzo_comp_ops, | ||
| 58 | #else | ||
| 54 | &squashfs_lzo_unsupported_comp_ops, | 59 | &squashfs_lzo_unsupported_comp_ops, |
| 60 | #endif | ||
| 55 | &squashfs_unknown_comp_ops | 61 | &squashfs_unknown_comp_ops |
| 56 | }; | 62 | }; |
| 57 | 63 | ||
diff --git a/fs/squashfs/lzo_wrapper.c b/fs/squashfs/lzo_wrapper.c new file mode 100644 index 000000000000..5d87789bf1c1 --- /dev/null +++ b/fs/squashfs/lzo_wrapper.c | |||
| @@ -0,0 +1,136 @@ | |||
| 1 | /* | ||
| 2 | * Squashfs - a compressed read only filesystem for Linux | ||
| 3 | * | ||
| 4 | * Copyright (c) 2010 LG Electronics | ||
| 5 | * Chan Jeong <chan.jeong@lge.com> | ||
| 6 | * | ||
| 7 | * This program is free software; you can redistribute it and/or | ||
| 8 | * modify it under the terms of the GNU General Public License | ||
| 9 | * as published by the Free Software Foundation; either version 2, | ||
| 10 | * or (at your option) any later version. | ||
| 11 | * | ||
| 12 | * This program is distributed in the hope that it will be useful, | ||
| 13 | * but WITHOUT ANY WARRANTY; without even the implied warranty of | ||
| 14 | * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the | ||
| 15 | * GNU General Public License for more details. | ||
| 16 | * | ||
| 17 | * You should have received a copy of the GNU General Public License | ||
| 18 | * along with this program; if not, write to the Free Software | ||
| 19 | * Foundation, 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA. | ||
| 20 | * | ||
| 21 | * lzo_wrapper.c | ||
| 22 | */ | ||
| 23 | |||
| 24 | #include <linux/mutex.h> | ||
| 25 | #include <linux/buffer_head.h> | ||
| 26 | #include <linux/slab.h> | ||
| 27 | #include <linux/vmalloc.h> | ||
| 28 | #include <linux/lzo.h> | ||
| 29 | |||
| 30 | #include "squashfs_fs.h" | ||
| 31 | #include "squashfs_fs_sb.h" | ||
| 32 | #include "squashfs_fs_i.h" | ||
| 33 | #include "squashfs.h" | ||
| 34 | #include "decompressor.h" | ||
| 35 | |||
| 36 | struct squashfs_lzo { | ||
| 37 | void *input; | ||
| 38 | void *output; | ||
| 39 | }; | ||
| 40 | |||
| 41 | static void *lzo_init(struct squashfs_sb_info *msblk) | ||
| 42 | { | ||
| 43 | int block_size = max_t(int, msblk->block_size, SQUASHFS_METADATA_SIZE); | ||
| 44 | |||
| 45 | struct squashfs_lzo *stream = kzalloc(sizeof(*stream), GFP_KERNEL); | ||
| 46 | if (stream == NULL) | ||
| 47 | goto failed; | ||
| 48 | stream->input = vmalloc(block_size); | ||
| 49 | if (stream->input == NULL) | ||
| 50 | goto failed; | ||
| 51 | stream->output = vmalloc(block_size); | ||
| 52 | if (stream->output == NULL) | ||
| 53 | goto failed2; | ||
| 54 | |||
| 55 | return stream; | ||
| 56 | |||
| 57 | failed2: | ||
| 58 | vfree(stream->input); | ||
| 59 | failed: | ||
| 60 | ERROR("Failed to allocate lzo workspace\n"); | ||
| 61 | kfree(stream); | ||
| 62 | return NULL; | ||
| 63 | } | ||
| 64 | |||
| 65 | |||
| 66 | static void lzo_free(void *strm) | ||
| 67 | { | ||
| 68 | struct squashfs_lzo *stream = strm; | ||
| 69 | |||
| 70 | if (stream) { | ||
| 71 | vfree(stream->input); | ||
| 72 | vfree(stream->output); | ||
| 73 | } | ||
| 74 | kfree(stream); | ||
| 75 | } | ||
| 76 | |||
| 77 | |||
| 78 | static int lzo_uncompress(struct squashfs_sb_info *msblk, void **buffer, | ||
| 79 | struct buffer_head **bh, int b, int offset, int length, int srclength, | ||
| 80 | int pages) | ||
| 81 | { | ||
| 82 | struct squashfs_lzo *stream = msblk->stream; | ||
| 83 | void *buff = stream->input; | ||
| 84 | int avail, i, bytes = length, res; | ||
| 85 | size_t out_len = srclength; | ||
| 86 | |||
| 87 | mutex_lock(&msblk->read_data_mutex); | ||
| 88 | |||
| 89 | for (i = 0; i < b; i++) { | ||
| 90 | wait_on_buffer(bh[i]); | ||
| 91 | if (!buffer_uptodate(bh[i])) | ||
| 92 | goto block_release; | ||
| 93 | |||
| 94 | avail = min(bytes, msblk->devblksize - offset); | ||
| 95 | memcpy(buff, bh[i]->b_data + offset, avail); | ||
| 96 | buff += avail; | ||
| 97 | bytes -= avail; | ||
| 98 | offset = 0; | ||
| 99 | put_bh(bh[i]); | ||
| 100 | } | ||
| 101 | |||
| 102 | res = lzo1x_decompress_safe(stream->input, (size_t)length, | ||
| 103 | stream->output, &out_len); | ||
| 104 | if (res != LZO_E_OK) | ||
| 105 | goto failed; | ||
| 106 | |||
| 107 | res = bytes = (int)out_len; | ||
| 108 | for (i = 0, buff = stream->output; bytes && i < pages; i++) { | ||
| 109 | avail = min_t(int, bytes, PAGE_CACHE_SIZE); | ||
| 110 | memcpy(buffer[i], buff, avail); | ||
| 111 | buff += avail; | ||
| 112 | bytes -= avail; | ||
| 113 | } | ||
| 114 | |||
| 115 | mutex_unlock(&msblk->read_data_mutex); | ||
| 116 | return res; | ||
| 117 | |||
| 118 | block_release: | ||
| 119 | for (; i < b; i++) | ||
| 120 | put_bh(bh[i]); | ||
| 121 | |||
| 122 | failed: | ||
| 123 | mutex_unlock(&msblk->read_data_mutex); | ||
| 124 | |||
| 125 | ERROR("lzo decompression failed, data probably corrupt\n"); | ||
| 126 | return -EIO; | ||
| 127 | } | ||
| 128 | |||
| 129 | const struct squashfs_decompressor squashfs_lzo_comp_ops = { | ||
| 130 | .init = lzo_init, | ||
| 131 | .free = lzo_free, | ||
| 132 | .decompress = lzo_uncompress, | ||
| 133 | .id = LZO_COMPRESSION, | ||
| 134 | .name = "lzo", | ||
| 135 | .supported = 1 | ||
| 136 | }; | ||
diff --git a/fs/squashfs/squashfs.h b/fs/squashfs/squashfs.h index 733a17c42945..5d45569d5f72 100644 --- a/fs/squashfs/squashfs.h +++ b/fs/squashfs/squashfs.h | |||
| @@ -104,3 +104,6 @@ extern const struct xattr_handler *squashfs_xattr_handlers[]; | |||
| 104 | 104 | ||
| 105 | /* zlib_wrapper.c */ | 105 | /* zlib_wrapper.c */ |
| 106 | extern const struct squashfs_decompressor squashfs_zlib_comp_ops; | 106 | extern const struct squashfs_decompressor squashfs_zlib_comp_ops; |
| 107 | |||
| 108 | /* lzo_wrapper.c */ | ||
| 109 | extern const struct squashfs_decompressor squashfs_lzo_comp_ops; | ||
diff --git a/fs/squashfs/squashfs_fs.h b/fs/squashfs/squashfs_fs.h index 8eabb808b78d..c5137fc9ab11 100644 --- a/fs/squashfs/squashfs_fs.h +++ b/fs/squashfs/squashfs_fs.h | |||
| @@ -274,7 +274,7 @@ struct squashfs_base_inode { | |||
| 274 | __le16 uid; | 274 | __le16 uid; |
| 275 | __le16 guid; | 275 | __le16 guid; |
| 276 | __le32 mtime; | 276 | __le32 mtime; |
| 277 | __le32 inode_number; | 277 | __le32 inode_number; |
| 278 | }; | 278 | }; |
| 279 | 279 | ||
| 280 | struct squashfs_ipc_inode { | 280 | struct squashfs_ipc_inode { |
| @@ -283,7 +283,7 @@ struct squashfs_ipc_inode { | |||
| 283 | __le16 uid; | 283 | __le16 uid; |
| 284 | __le16 guid; | 284 | __le16 guid; |
| 285 | __le32 mtime; | 285 | __le32 mtime; |
| 286 | __le32 inode_number; | 286 | __le32 inode_number; |
| 287 | __le32 nlink; | 287 | __le32 nlink; |
| 288 | }; | 288 | }; |
| 289 | 289 | ||
| @@ -293,7 +293,7 @@ struct squashfs_lipc_inode { | |||
| 293 | __le16 uid; | 293 | __le16 uid; |
| 294 | __le16 guid; | 294 | __le16 guid; |
| 295 | __le32 mtime; | 295 | __le32 mtime; |
| 296 | __le32 inode_number; | 296 | __le32 inode_number; |
| 297 | __le32 nlink; | 297 | __le32 nlink; |
| 298 | __le32 xattr; | 298 | __le32 xattr; |
| 299 | }; | 299 | }; |
| @@ -304,7 +304,7 @@ struct squashfs_dev_inode { | |||
| 304 | __le16 uid; | 304 | __le16 uid; |
| 305 | __le16 guid; | 305 | __le16 guid; |
| 306 | __le32 mtime; | 306 | __le32 mtime; |
| 307 | __le32 inode_number; | 307 | __le32 inode_number; |
| 308 | __le32 nlink; | 308 | __le32 nlink; |
| 309 | __le32 rdev; | 309 | __le32 rdev; |
| 310 | }; | 310 | }; |
| @@ -315,7 +315,7 @@ struct squashfs_ldev_inode { | |||
| 315 | __le16 uid; | 315 | __le16 uid; |
| 316 | __le16 guid; | 316 | __le16 guid; |
| 317 | __le32 mtime; | 317 | __le32 mtime; |
| 318 | __le32 inode_number; | 318 | __le32 inode_number; |
| 319 | __le32 nlink; | 319 | __le32 nlink; |
| 320 | __le32 rdev; | 320 | __le32 rdev; |
| 321 | __le32 xattr; | 321 | __le32 xattr; |
| @@ -327,7 +327,7 @@ struct squashfs_symlink_inode { | |||
| 327 | __le16 uid; | 327 | __le16 uid; |
| 328 | __le16 guid; | 328 | __le16 guid; |
| 329 | __le32 mtime; | 329 | __le32 mtime; |
| 330 | __le32 inode_number; | 330 | __le32 inode_number; |
| 331 | __le32 nlink; | 331 | __le32 nlink; |
| 332 | __le32 symlink_size; | 332 | __le32 symlink_size; |
| 333 | char symlink[0]; | 333 | char symlink[0]; |
| @@ -339,7 +339,7 @@ struct squashfs_reg_inode { | |||
| 339 | __le16 uid; | 339 | __le16 uid; |
| 340 | __le16 guid; | 340 | __le16 guid; |
| 341 | __le32 mtime; | 341 | __le32 mtime; |
| 342 | __le32 inode_number; | 342 | __le32 inode_number; |
| 343 | __le32 start_block; | 343 | __le32 start_block; |
| 344 | __le32 fragment; | 344 | __le32 fragment; |
| 345 | __le32 offset; | 345 | __le32 offset; |
| @@ -353,7 +353,7 @@ struct squashfs_lreg_inode { | |||
| 353 | __le16 uid; | 353 | __le16 uid; |
| 354 | __le16 guid; | 354 | __le16 guid; |
| 355 | __le32 mtime; | 355 | __le32 mtime; |
| 356 | __le32 inode_number; | 356 | __le32 inode_number; |
| 357 | __le64 start_block; | 357 | __le64 start_block; |
| 358 | __le64 file_size; | 358 | __le64 file_size; |
| 359 | __le64 sparse; | 359 | __le64 sparse; |
| @@ -370,7 +370,7 @@ struct squashfs_dir_inode { | |||
| 370 | __le16 uid; | 370 | __le16 uid; |
| 371 | __le16 guid; | 371 | __le16 guid; |
| 372 | __le32 mtime; | 372 | __le32 mtime; |
| 373 | __le32 inode_number; | 373 | __le32 inode_number; |
| 374 | __le32 start_block; | 374 | __le32 start_block; |
| 375 | __le32 nlink; | 375 | __le32 nlink; |
| 376 | __le16 file_size; | 376 | __le16 file_size; |
| @@ -384,7 +384,7 @@ struct squashfs_ldir_inode { | |||
| 384 | __le16 uid; | 384 | __le16 uid; |
| 385 | __le16 guid; | 385 | __le16 guid; |
| 386 | __le32 mtime; | 386 | __le32 mtime; |
| 387 | __le32 inode_number; | 387 | __le32 inode_number; |
| 388 | __le32 nlink; | 388 | __le32 nlink; |
| 389 | __le32 file_size; | 389 | __le32 file_size; |
| 390 | __le32 start_block; | 390 | __le32 start_block; |
diff --git a/fs/squashfs/xattr.c b/fs/squashfs/xattr.c index c7655e8b31cd..652b8541f9c6 100644 --- a/fs/squashfs/xattr.c +++ b/fs/squashfs/xattr.c | |||
| @@ -18,7 +18,7 @@ | |||
| 18 | * along with this program; if not, write to the Free Software | 18 | * along with this program; if not, write to the Free Software |
| 19 | * Foundation, 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA. | 19 | * Foundation, 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA. |
| 20 | * | 20 | * |
| 21 | * xattr_id.c | 21 | * xattr.c |
| 22 | */ | 22 | */ |
| 23 | 23 | ||
| 24 | #include <linux/init.h> | 24 | #include <linux/init.h> |
| @@ -295,7 +295,7 @@ static const struct xattr_handler squashfs_xattr_security_handler = { | |||
| 295 | .get = squashfs_security_get | 295 | .get = squashfs_security_get |
| 296 | }; | 296 | }; |
| 297 | 297 | ||
| 298 | static inline const struct xattr_handler *squashfs_xattr_handler(int type) | 298 | static const struct xattr_handler *squashfs_xattr_handler(int type) |
| 299 | { | 299 | { |
| 300 | if (type & ~(SQUASHFS_XATTR_PREFIX_MASK | SQUASHFS_XATTR_VALUE_OOL)) | 300 | if (type & ~(SQUASHFS_XATTR_PREFIX_MASK | SQUASHFS_XATTR_VALUE_OOL)) |
| 301 | /* ignore unrecognised type */ | 301 | /* ignore unrecognised type */ |
diff --git a/fs/squashfs/xattr.h b/fs/squashfs/xattr.h index 9da071ae181c..49fe0d719fbf 100644 --- a/fs/squashfs/xattr.h +++ b/fs/squashfs/xattr.h | |||
| @@ -21,7 +21,7 @@ | |||
| 21 | * xattr.h | 21 | * xattr.h |
| 22 | */ | 22 | */ |
| 23 | 23 | ||
| 24 | #ifdef CONFIG_SQUASHFS_XATTRS | 24 | #ifdef CONFIG_SQUASHFS_XATTR |
| 25 | extern __le64 *squashfs_read_xattr_id_table(struct super_block *, u64, | 25 | extern __le64 *squashfs_read_xattr_id_table(struct super_block *, u64, |
| 26 | u64 *, int *); | 26 | u64 *, int *); |
| 27 | extern int squashfs_xattr_lookup(struct super_block *, unsigned int, int *, | 27 | extern int squashfs_xattr_lookup(struct super_block *, unsigned int, int *, |
diff --git a/fs/sysv/super.c b/fs/sysv/super.c index 0e44a6253352..a0b0cda6927e 100644 --- a/fs/sysv/super.c +++ b/fs/sysv/super.c | |||
| @@ -434,12 +434,46 @@ Ebadsize: | |||
| 434 | goto failed; | 434 | goto failed; |
| 435 | } | 435 | } |
| 436 | 436 | ||
| 437 | static int v7_fill_super(struct super_block *sb, void *data, int silent) | 437 | static int v7_sanity_check(struct super_block *sb, struct buffer_head *bh) |
| 438 | { | 438 | { |
| 439 | struct sysv_sb_info *sbi; | ||
| 440 | struct buffer_head *bh, *bh2 = NULL; | ||
| 441 | struct v7_super_block *v7sb; | 439 | struct v7_super_block *v7sb; |
| 442 | struct sysv_inode *v7i; | 440 | struct sysv_inode *v7i; |
| 441 | struct buffer_head *bh2; | ||
| 442 | struct sysv_sb_info *sbi; | ||
| 443 | |||
| 444 | sbi = sb->s_fs_info; | ||
| 445 | |||
| 446 | /* plausibility check on superblock */ | ||
| 447 | v7sb = (struct v7_super_block *) bh->b_data; | ||
| 448 | if (fs16_to_cpu(sbi, v7sb->s_nfree) > V7_NICFREE || | ||
| 449 | fs16_to_cpu(sbi, v7sb->s_ninode) > V7_NICINOD || | ||
| 450 | fs32_to_cpu(sbi, v7sb->s_fsize) > V7_MAXSIZE) | ||
| 451 | return 0; | ||
| 452 | |||
| 453 | /* plausibility check on root inode: it is a directory, | ||
| 454 | with a nonzero size that is a multiple of 16 */ | ||
| 455 | bh2 = sb_bread(sb, 2); | ||
| 456 | if (bh2 == NULL) | ||
| 457 | return 0; | ||
| 458 | |||
| 459 | v7i = (struct sysv_inode *)(bh2->b_data + 64); | ||
| 460 | if ((fs16_to_cpu(sbi, v7i->i_mode) & ~0777) != S_IFDIR || | ||
| 461 | (fs32_to_cpu(sbi, v7i->i_size) == 0) || | ||
| 462 | (fs32_to_cpu(sbi, v7i->i_size) & 017) || | ||
| 463 | (fs32_to_cpu(sbi, v7i->i_size) > V7_NFILES * | ||
| 464 | sizeof(struct sysv_dir_entry))) { | ||
| 465 | brelse(bh2); | ||
| 466 | return 0; | ||
| 467 | } | ||
| 468 | |||
| 469 | brelse(bh2); | ||
| 470 | return 1; | ||
| 471 | } | ||
| 472 | |||
| 473 | static int v7_fill_super(struct super_block *sb, void *data, int silent) | ||
| 474 | { | ||
| 475 | struct sysv_sb_info *sbi; | ||
| 476 | struct buffer_head *bh; | ||
| 443 | 477 | ||
| 444 | if (440 != sizeof (struct v7_super_block)) | 478 | if (440 != sizeof (struct v7_super_block)) |
| 445 | panic("V7 FS: bad super-block size"); | 479 | panic("V7 FS: bad super-block size"); |
| @@ -453,7 +487,6 @@ static int v7_fill_super(struct super_block *sb, void *data, int silent) | |||
| 453 | sbi->s_sb = sb; | 487 | sbi->s_sb = sb; |
| 454 | sbi->s_block_base = 0; | 488 | sbi->s_block_base = 0; |
| 455 | sbi->s_type = FSTYPE_V7; | 489 | sbi->s_type = FSTYPE_V7; |
| 456 | sbi->s_bytesex = BYTESEX_PDP; | ||
| 457 | sb->s_fs_info = sbi; | 490 | sb->s_fs_info = sbi; |
| 458 | 491 | ||
| 459 | sb_set_blocksize(sb, 512); | 492 | sb_set_blocksize(sb, 512); |
| @@ -465,32 +498,27 @@ static int v7_fill_super(struct super_block *sb, void *data, int silent) | |||
| 465 | goto failed; | 498 | goto failed; |
| 466 | } | 499 | } |
| 467 | 500 | ||
| 468 | /* plausibility check on superblock */ | 501 | /* Try PDP-11 UNIX */ |
| 469 | v7sb = (struct v7_super_block *) bh->b_data; | 502 | sbi->s_bytesex = BYTESEX_PDP; |
| 470 | if (fs16_to_cpu(sbi, v7sb->s_nfree) > V7_NICFREE || | 503 | if (v7_sanity_check(sb, bh)) |
| 471 | fs16_to_cpu(sbi, v7sb->s_ninode) > V7_NICINOD || | 504 | goto detected; |
| 472 | fs32_to_cpu(sbi, v7sb->s_time) == 0) | ||
| 473 | goto failed; | ||
| 474 | 505 | ||
| 475 | /* plausibility check on root inode: it is a directory, | 506 | /* Try PC/IX, v7/x86 */ |
| 476 | with a nonzero size that is a multiple of 16 */ | 507 | sbi->s_bytesex = BYTESEX_LE; |
| 477 | if ((bh2 = sb_bread(sb, 2)) == NULL) | 508 | if (v7_sanity_check(sb, bh)) |
| 478 | goto failed; | 509 | goto detected; |
| 479 | v7i = (struct sysv_inode *)(bh2->b_data + 64); | ||
| 480 | if ((fs16_to_cpu(sbi, v7i->i_mode) & ~0777) != S_IFDIR || | ||
| 481 | (fs32_to_cpu(sbi, v7i->i_size) == 0) || | ||
| 482 | (fs32_to_cpu(sbi, v7i->i_size) & 017) != 0) | ||
| 483 | goto failed; | ||
| 484 | brelse(bh2); | ||
| 485 | bh2 = NULL; | ||
| 486 | 510 | ||
| 511 | goto failed; | ||
| 512 | |||
| 513 | detected: | ||
| 487 | sbi->s_bh1 = bh; | 514 | sbi->s_bh1 = bh; |
| 488 | sbi->s_bh2 = bh; | 515 | sbi->s_bh2 = bh; |
| 489 | if (complete_read_super(sb, silent, 1)) | 516 | if (complete_read_super(sb, silent, 1)) |
| 490 | return 0; | 517 | return 0; |
| 491 | 518 | ||
| 492 | failed: | 519 | failed: |
| 493 | brelse(bh2); | 520 | printk(KERN_ERR "VFS: could not find a valid V7 on %s.\n", |
| 521 | sb->s_id); | ||
| 494 | brelse(bh); | 522 | brelse(bh); |
| 495 | kfree(sbi); | 523 | kfree(sbi); |
| 496 | return -EINVAL; | 524 | return -EINVAL; |
| @@ -559,4 +587,5 @@ static void __exit exit_sysv_fs(void) | |||
| 559 | 587 | ||
| 560 | module_init(init_sysv_fs) | 588 | module_init(init_sysv_fs) |
| 561 | module_exit(exit_sysv_fs) | 589 | module_exit(exit_sysv_fs) |
| 590 | MODULE_ALIAS("v7"); | ||
| 562 | MODULE_LICENSE("GPL"); | 591 | MODULE_LICENSE("GPL"); |
