diff options
| author | Linus Torvalds <torvalds@linux-foundation.org> | 2009-08-07 21:53:44 -0400 |
|---|---|---|
| committer | Linus Torvalds <torvalds@linux-foundation.org> | 2009-08-07 21:53:44 -0400 |
| commit | fb385003c4ac9634cf2448f6ded81e5fd1190c1f (patch) | |
| tree | 3a6068559cef144c04b96372ee0eecf20dfb108b | |
| parent | 36b8659f9316b24c514a7c8290596b2382b91dd2 (diff) | |
| parent | b36ec0428a06fcbdb67d61e9e664154e5dd9a8c7 (diff) | |
Merge git://git.kernel.org/pub/scm/linux/kernel/git/hch/xfs-icache-races
* git://git.kernel.org/pub/scm/linux/kernel/git/hch/xfs-icache-races:
xfs: fix freeing of inodes not yet added to the inode cache
vfs: add __destroy_inode
vfs: fix inode_init_always calling convention
| -rw-r--r-- | fs/inode.c | 40 | ||||
| -rw-r--r-- | fs/xfs/xfs_iget.c | 142 | ||||
| -rw-r--r-- | fs/xfs/xfs_inode.h | 17 | ||||
| -rw-r--r-- | include/linux/fs.h | 3 |
4 files changed, 99 insertions, 103 deletions
diff --git a/fs/inode.c b/fs/inode.c index 901bad1e5f12..ae7b67e48661 100644 --- a/fs/inode.c +++ b/fs/inode.c | |||
| @@ -120,12 +120,11 @@ static void wake_up_inode(struct inode *inode) | |||
| 120 | * These are initializations that need to be done on every inode | 120 | * These are initializations that need to be done on every inode |
| 121 | * allocation as the fields are not initialised by slab allocation. | 121 | * allocation as the fields are not initialised by slab allocation. |
| 122 | */ | 122 | */ |
| 123 | struct inode *inode_init_always(struct super_block *sb, struct inode *inode) | 123 | int inode_init_always(struct super_block *sb, struct inode *inode) |
| 124 | { | 124 | { |
| 125 | static const struct address_space_operations empty_aops; | 125 | static const struct address_space_operations empty_aops; |
| 126 | static struct inode_operations empty_iops; | 126 | static struct inode_operations empty_iops; |
| 127 | static const struct file_operations empty_fops; | 127 | static const struct file_operations empty_fops; |
| 128 | |||
| 129 | struct address_space *const mapping = &inode->i_data; | 128 | struct address_space *const mapping = &inode->i_data; |
| 130 | 129 | ||
| 131 | inode->i_sb = sb; | 130 | inode->i_sb = sb; |
| @@ -152,7 +151,7 @@ struct inode *inode_init_always(struct super_block *sb, struct inode *inode) | |||
| 152 | inode->dirtied_when = 0; | 151 | inode->dirtied_when = 0; |
| 153 | 152 | ||
| 154 | if (security_inode_alloc(inode)) | 153 | if (security_inode_alloc(inode)) |
| 155 | goto out_free_inode; | 154 | goto out; |
| 156 | 155 | ||
| 157 | /* allocate and initialize an i_integrity */ | 156 | /* allocate and initialize an i_integrity */ |
| 158 | if (ima_inode_alloc(inode)) | 157 | if (ima_inode_alloc(inode)) |
| @@ -198,16 +197,12 @@ struct inode *inode_init_always(struct super_block *sb, struct inode *inode) | |||
| 198 | inode->i_fsnotify_mask = 0; | 197 | inode->i_fsnotify_mask = 0; |
| 199 | #endif | 198 | #endif |
| 200 | 199 | ||
| 201 | return inode; | 200 | return 0; |
| 202 | 201 | ||
| 203 | out_free_security: | 202 | out_free_security: |
| 204 | security_inode_free(inode); | 203 | security_inode_free(inode); |
| 205 | out_free_inode: | 204 | out: |
| 206 | if (inode->i_sb->s_op->destroy_inode) | 205 | return -ENOMEM; |
| 207 | inode->i_sb->s_op->destroy_inode(inode); | ||
| 208 | else | ||
| 209 | kmem_cache_free(inode_cachep, (inode)); | ||
| 210 | return NULL; | ||
| 211 | } | 206 | } |
| 212 | EXPORT_SYMBOL(inode_init_always); | 207 | EXPORT_SYMBOL(inode_init_always); |
| 213 | 208 | ||
| @@ -220,12 +215,21 @@ static struct inode *alloc_inode(struct super_block *sb) | |||
| 220 | else | 215 | else |
| 221 | inode = kmem_cache_alloc(inode_cachep, GFP_KERNEL); | 216 | inode = kmem_cache_alloc(inode_cachep, GFP_KERNEL); |
| 222 | 217 | ||
| 223 | if (inode) | 218 | if (!inode) |
| 224 | return inode_init_always(sb, inode); | 219 | return NULL; |
| 225 | return NULL; | 220 | |
| 221 | if (unlikely(inode_init_always(sb, inode))) { | ||
| 222 | if (inode->i_sb->s_op->destroy_inode) | ||
| 223 | inode->i_sb->s_op->destroy_inode(inode); | ||
| 224 | else | ||
| 225 | kmem_cache_free(inode_cachep, inode); | ||
| 226 | return NULL; | ||
| 227 | } | ||
| 228 | |||
| 229 | return inode; | ||
| 226 | } | 230 | } |
| 227 | 231 | ||
| 228 | void destroy_inode(struct inode *inode) | 232 | void __destroy_inode(struct inode *inode) |
| 229 | { | 233 | { |
| 230 | BUG_ON(inode_has_buffers(inode)); | 234 | BUG_ON(inode_has_buffers(inode)); |
| 231 | ima_inode_free(inode); | 235 | ima_inode_free(inode); |
| @@ -237,13 +241,17 @@ void destroy_inode(struct inode *inode) | |||
| 237 | if (inode->i_default_acl && inode->i_default_acl != ACL_NOT_CACHED) | 241 | if (inode->i_default_acl && inode->i_default_acl != ACL_NOT_CACHED) |
| 238 | posix_acl_release(inode->i_default_acl); | 242 | posix_acl_release(inode->i_default_acl); |
| 239 | #endif | 243 | #endif |
| 244 | } | ||
| 245 | EXPORT_SYMBOL(__destroy_inode); | ||
| 246 | |||
| 247 | void destroy_inode(struct inode *inode) | ||
| 248 | { | ||
| 249 | __destroy_inode(inode); | ||
| 240 | if (inode->i_sb->s_op->destroy_inode) | 250 | if (inode->i_sb->s_op->destroy_inode) |
| 241 | inode->i_sb->s_op->destroy_inode(inode); | 251 | inode->i_sb->s_op->destroy_inode(inode); |
| 242 | else | 252 | else |
| 243 | kmem_cache_free(inode_cachep, (inode)); | 253 | kmem_cache_free(inode_cachep, (inode)); |
| 244 | } | 254 | } |
| 245 | EXPORT_SYMBOL(destroy_inode); | ||
| 246 | |||
| 247 | 255 | ||
| 248 | /* | 256 | /* |
| 249 | * These are initializations that only need to be done | 257 | * These are initializations that only need to be done |
diff --git a/fs/xfs/xfs_iget.c b/fs/xfs/xfs_iget.c index 5fcec6f020a7..34ec86923f7e 100644 --- a/fs/xfs/xfs_iget.c +++ b/fs/xfs/xfs_iget.c | |||
| @@ -64,6 +64,10 @@ xfs_inode_alloc( | |||
| 64 | ip = kmem_zone_alloc(xfs_inode_zone, KM_SLEEP); | 64 | ip = kmem_zone_alloc(xfs_inode_zone, KM_SLEEP); |
| 65 | if (!ip) | 65 | if (!ip) |
| 66 | return NULL; | 66 | return NULL; |
| 67 | if (inode_init_always(mp->m_super, VFS_I(ip))) { | ||
| 68 | kmem_zone_free(xfs_inode_zone, ip); | ||
| 69 | return NULL; | ||
| 70 | } | ||
| 67 | 71 | ||
| 68 | ASSERT(atomic_read(&ip->i_iocount) == 0); | 72 | ASSERT(atomic_read(&ip->i_iocount) == 0); |
| 69 | ASSERT(atomic_read(&ip->i_pincount) == 0); | 73 | ASSERT(atomic_read(&ip->i_pincount) == 0); |
| @@ -105,17 +109,6 @@ xfs_inode_alloc( | |||
| 105 | #ifdef XFS_DIR2_TRACE | 109 | #ifdef XFS_DIR2_TRACE |
| 106 | ip->i_dir_trace = ktrace_alloc(XFS_DIR2_KTRACE_SIZE, KM_NOFS); | 110 | ip->i_dir_trace = ktrace_alloc(XFS_DIR2_KTRACE_SIZE, KM_NOFS); |
| 107 | #endif | 111 | #endif |
| 108 | /* | ||
| 109 | * Now initialise the VFS inode. We do this after the xfs_inode | ||
| 110 | * initialisation as internal failures will result in ->destroy_inode | ||
| 111 | * being called and that will pass down through the reclaim path and | ||
| 112 | * free the XFS inode. This path requires the XFS inode to already be | ||
| 113 | * initialised. Hence if this call fails, the xfs_inode has already | ||
| 114 | * been freed and we should not reference it at all in the error | ||
| 115 | * handling. | ||
| 116 | */ | ||
| 117 | if (!inode_init_always(mp->m_super, VFS_I(ip))) | ||
| 118 | return NULL; | ||
| 119 | 112 | ||
| 120 | /* prevent anyone from using this yet */ | 113 | /* prevent anyone from using this yet */ |
| 121 | VFS_I(ip)->i_state = I_NEW|I_LOCK; | 114 | VFS_I(ip)->i_state = I_NEW|I_LOCK; |
| @@ -123,6 +116,71 @@ xfs_inode_alloc( | |||
| 123 | return ip; | 116 | return ip; |
| 124 | } | 117 | } |
| 125 | 118 | ||
| 119 | STATIC void | ||
| 120 | xfs_inode_free( | ||
| 121 | struct xfs_inode *ip) | ||
| 122 | { | ||
| 123 | switch (ip->i_d.di_mode & S_IFMT) { | ||
| 124 | case S_IFREG: | ||
| 125 | case S_IFDIR: | ||
| 126 | case S_IFLNK: | ||
| 127 | xfs_idestroy_fork(ip, XFS_DATA_FORK); | ||
| 128 | break; | ||
| 129 | } | ||
| 130 | |||
| 131 | if (ip->i_afp) | ||
| 132 | xfs_idestroy_fork(ip, XFS_ATTR_FORK); | ||
| 133 | |||
| 134 | #ifdef XFS_INODE_TRACE | ||
| 135 | ktrace_free(ip->i_trace); | ||
| 136 | #endif | ||
| 137 | #ifdef XFS_BMAP_TRACE | ||
| 138 | ktrace_free(ip->i_xtrace); | ||
| 139 | #endif | ||
| 140 | #ifdef XFS_BTREE_TRACE | ||
| 141 | ktrace_free(ip->i_btrace); | ||
| 142 | #endif | ||
| 143 | #ifdef XFS_RW_TRACE | ||
| 144 | ktrace_free(ip->i_rwtrace); | ||
| 145 | #endif | ||
| 146 | #ifdef XFS_ILOCK_TRACE | ||
| 147 | ktrace_free(ip->i_lock_trace); | ||
| 148 | #endif | ||
| 149 | #ifdef XFS_DIR2_TRACE | ||
| 150 | ktrace_free(ip->i_dir_trace); | ||
| 151 | #endif | ||
| 152 | |||
| 153 | if (ip->i_itemp) { | ||
| 154 | /* | ||
| 155 | * Only if we are shutting down the fs will we see an | ||
| 156 | * inode still in the AIL. If it is there, we should remove | ||
| 157 | * it to prevent a use-after-free from occurring. | ||
| 158 | */ | ||
| 159 | xfs_log_item_t *lip = &ip->i_itemp->ili_item; | ||
| 160 | struct xfs_ail *ailp = lip->li_ailp; | ||
| 161 | |||
| 162 | ASSERT(((lip->li_flags & XFS_LI_IN_AIL) == 0) || | ||
| 163 | XFS_FORCED_SHUTDOWN(ip->i_mount)); | ||
| 164 | if (lip->li_flags & XFS_LI_IN_AIL) { | ||
| 165 | spin_lock(&ailp->xa_lock); | ||
| 166 | if (lip->li_flags & XFS_LI_IN_AIL) | ||
| 167 | xfs_trans_ail_delete(ailp, lip); | ||
| 168 | else | ||
| 169 | spin_unlock(&ailp->xa_lock); | ||
| 170 | } | ||
| 171 | xfs_inode_item_destroy(ip); | ||
| 172 | ip->i_itemp = NULL; | ||
| 173 | } | ||
| 174 | |||
| 175 | /* asserts to verify all state is correct here */ | ||
| 176 | ASSERT(atomic_read(&ip->i_iocount) == 0); | ||
| 177 | ASSERT(atomic_read(&ip->i_pincount) == 0); | ||
| 178 | ASSERT(!spin_is_locked(&ip->i_flags_lock)); | ||
| 179 | ASSERT(completion_done(&ip->i_flush)); | ||
| 180 | |||
| 181 | kmem_zone_free(xfs_inode_zone, ip); | ||
| 182 | } | ||
| 183 | |||
| 126 | /* | 184 | /* |
| 127 | * Check the validity of the inode we just found it the cache | 185 | * Check the validity of the inode we just found it the cache |
| 128 | */ | 186 | */ |
| @@ -167,7 +225,7 @@ xfs_iget_cache_hit( | |||
| 167 | * errors cleanly, then tag it so it can be set up correctly | 225 | * errors cleanly, then tag it so it can be set up correctly |
| 168 | * later. | 226 | * later. |
| 169 | */ | 227 | */ |
| 170 | if (!inode_init_always(mp->m_super, VFS_I(ip))) { | 228 | if (inode_init_always(mp->m_super, VFS_I(ip))) { |
| 171 | error = ENOMEM; | 229 | error = ENOMEM; |
| 172 | goto out_error; | 230 | goto out_error; |
| 173 | } | 231 | } |
| @@ -299,7 +357,8 @@ out_preload_end: | |||
| 299 | if (lock_flags) | 357 | if (lock_flags) |
| 300 | xfs_iunlock(ip, lock_flags); | 358 | xfs_iunlock(ip, lock_flags); |
| 301 | out_destroy: | 359 | out_destroy: |
| 302 | xfs_destroy_inode(ip); | 360 | __destroy_inode(VFS_I(ip)); |
| 361 | xfs_inode_free(ip); | ||
| 303 | return error; | 362 | return error; |
| 304 | } | 363 | } |
| 305 | 364 | ||
| @@ -504,62 +563,7 @@ xfs_ireclaim( | |||
| 504 | xfs_qm_dqdetach(ip); | 563 | xfs_qm_dqdetach(ip); |
| 505 | xfs_iunlock(ip, XFS_ILOCK_EXCL | XFS_IOLOCK_EXCL); | 564 | xfs_iunlock(ip, XFS_ILOCK_EXCL | XFS_IOLOCK_EXCL); |
| 506 | 565 | ||
| 507 | switch (ip->i_d.di_mode & S_IFMT) { | 566 | xfs_inode_free(ip); |
| 508 | case S_IFREG: | ||
| 509 | case S_IFDIR: | ||
| 510 | case S_IFLNK: | ||
| 511 | xfs_idestroy_fork(ip, XFS_DATA_FORK); | ||
| 512 | break; | ||
| 513 | } | ||
| 514 | |||
| 515 | if (ip->i_afp) | ||
| 516 | xfs_idestroy_fork(ip, XFS_ATTR_FORK); | ||
| 517 | |||
| 518 | #ifdef XFS_INODE_TRACE | ||
| 519 | ktrace_free(ip->i_trace); | ||
| 520 | #endif | ||
| 521 | #ifdef XFS_BMAP_TRACE | ||
| 522 | ktrace_free(ip->i_xtrace); | ||
| 523 | #endif | ||
| 524 | #ifdef XFS_BTREE_TRACE | ||
| 525 | ktrace_free(ip->i_btrace); | ||
| 526 | #endif | ||
| 527 | #ifdef XFS_RW_TRACE | ||
| 528 | ktrace_free(ip->i_rwtrace); | ||
| 529 | #endif | ||
| 530 | #ifdef XFS_ILOCK_TRACE | ||
| 531 | ktrace_free(ip->i_lock_trace); | ||
| 532 | #endif | ||
| 533 | #ifdef XFS_DIR2_TRACE | ||
| 534 | ktrace_free(ip->i_dir_trace); | ||
| 535 | #endif | ||
| 536 | if (ip->i_itemp) { | ||
| 537 | /* | ||
| 538 | * Only if we are shutting down the fs will we see an | ||
| 539 | * inode still in the AIL. If it is there, we should remove | ||
| 540 | * it to prevent a use-after-free from occurring. | ||
| 541 | */ | ||
| 542 | xfs_log_item_t *lip = &ip->i_itemp->ili_item; | ||
| 543 | struct xfs_ail *ailp = lip->li_ailp; | ||
| 544 | |||
| 545 | ASSERT(((lip->li_flags & XFS_LI_IN_AIL) == 0) || | ||
| 546 | XFS_FORCED_SHUTDOWN(ip->i_mount)); | ||
| 547 | if (lip->li_flags & XFS_LI_IN_AIL) { | ||
| 548 | spin_lock(&ailp->xa_lock); | ||
| 549 | if (lip->li_flags & XFS_LI_IN_AIL) | ||
| 550 | xfs_trans_ail_delete(ailp, lip); | ||
| 551 | else | ||
| 552 | spin_unlock(&ailp->xa_lock); | ||
| 553 | } | ||
| 554 | xfs_inode_item_destroy(ip); | ||
| 555 | ip->i_itemp = NULL; | ||
| 556 | } | ||
| 557 | /* asserts to verify all state is correct here */ | ||
| 558 | ASSERT(atomic_read(&ip->i_iocount) == 0); | ||
| 559 | ASSERT(atomic_read(&ip->i_pincount) == 0); | ||
| 560 | ASSERT(!spin_is_locked(&ip->i_flags_lock)); | ||
| 561 | ASSERT(completion_done(&ip->i_flush)); | ||
| 562 | kmem_zone_free(xfs_inode_zone, ip); | ||
| 563 | } | 567 | } |
| 564 | 568 | ||
| 565 | /* | 569 | /* |
diff --git a/fs/xfs/xfs_inode.h b/fs/xfs/xfs_inode.h index 1804f866a71d..65f24a3cc992 100644 --- a/fs/xfs/xfs_inode.h +++ b/fs/xfs/xfs_inode.h | |||
| @@ -310,23 +310,6 @@ static inline struct inode *VFS_I(struct xfs_inode *ip) | |||
| 310 | } | 310 | } |
| 311 | 311 | ||
| 312 | /* | 312 | /* |
| 313 | * Get rid of a partially initialized inode. | ||
| 314 | * | ||
| 315 | * We have to go through destroy_inode to make sure allocations | ||
| 316 | * from init_inode_always like the security data are undone. | ||
| 317 | * | ||
| 318 | * We mark the inode bad so that it takes the short cut in | ||
| 319 | * the reclaim path instead of going through the flush path | ||
| 320 | * which doesn't make sense for an inode that has never seen the | ||
| 321 | * light of day. | ||
| 322 | */ | ||
| 323 | static inline void xfs_destroy_inode(struct xfs_inode *ip) | ||
| 324 | { | ||
| 325 | make_bad_inode(VFS_I(ip)); | ||
| 326 | return destroy_inode(VFS_I(ip)); | ||
| 327 | } | ||
| 328 | |||
| 329 | /* | ||
| 330 | * i_flags helper functions | 313 | * i_flags helper functions |
| 331 | */ | 314 | */ |
| 332 | static inline void | 315 | static inline void |
diff --git a/include/linux/fs.h b/include/linux/fs.h index a36ffa5a77a4..67888a9e0655 100644 --- a/include/linux/fs.h +++ b/include/linux/fs.h | |||
| @@ -2137,7 +2137,7 @@ extern loff_t default_llseek(struct file *file, loff_t offset, int origin); | |||
| 2137 | 2137 | ||
| 2138 | extern loff_t vfs_llseek(struct file *file, loff_t offset, int origin); | 2138 | extern loff_t vfs_llseek(struct file *file, loff_t offset, int origin); |
| 2139 | 2139 | ||
| 2140 | extern struct inode * inode_init_always(struct super_block *, struct inode *); | 2140 | extern int inode_init_always(struct super_block *, struct inode *); |
| 2141 | extern void inode_init_once(struct inode *); | 2141 | extern void inode_init_once(struct inode *); |
| 2142 | extern void inode_add_to_lists(struct super_block *, struct inode *); | 2142 | extern void inode_add_to_lists(struct super_block *, struct inode *); |
| 2143 | extern void iput(struct inode *); | 2143 | extern void iput(struct inode *); |
| @@ -2164,6 +2164,7 @@ extern void __iget(struct inode * inode); | |||
| 2164 | extern void iget_failed(struct inode *); | 2164 | extern void iget_failed(struct inode *); |
| 2165 | extern void clear_inode(struct inode *); | 2165 | extern void clear_inode(struct inode *); |
| 2166 | extern void destroy_inode(struct inode *); | 2166 | extern void destroy_inode(struct inode *); |
| 2167 | extern void __destroy_inode(struct inode *); | ||
| 2167 | extern struct inode *new_inode(struct super_block *); | 2168 | extern struct inode *new_inode(struct super_block *); |
| 2168 | extern int should_remove_suid(struct dentry *); | 2169 | extern int should_remove_suid(struct dentry *); |
| 2169 | extern int file_remove_suid(struct file *); | 2170 | extern int file_remove_suid(struct file *); |
