aboutsummaryrefslogtreecommitdiffstats
path: root/fs/ocfs2/inode.c
diff options
context:
space:
mode:
Diffstat (limited to 'fs/ocfs2/inode.c')
-rw-r--r--fs/ocfs2/inode.c199
1 files changed, 92 insertions, 107 deletions
diff --git a/fs/ocfs2/inode.c b/fs/ocfs2/inode.c
index 28ab56f2b98c..21a605079c62 100644
--- a/fs/ocfs2/inode.c
+++ b/fs/ocfs2/inode.c
@@ -89,24 +89,6 @@ void ocfs2_set_inode_flags(struct inode *inode)
89 inode->i_flags |= S_DIRSYNC; 89 inode->i_flags |= S_DIRSYNC;
90} 90}
91 91
92struct inode *ocfs2_ilookup_for_vote(struct ocfs2_super *osb,
93 u64 blkno,
94 int delete_vote)
95{
96 struct ocfs2_find_inode_args args;
97
98 /* ocfs2_ilookup_for_vote should *only* be called from the
99 * vote thread */
100 BUG_ON(current != osb->vote_task);
101
102 args.fi_blkno = blkno;
103 args.fi_flags = OCFS2_FI_FLAG_NOWAIT;
104 if (delete_vote)
105 args.fi_flags |= OCFS2_FI_FLAG_DELETE;
106 args.fi_ino = ino_from_blkno(osb->sb, blkno);
107 return ilookup5(osb->sb, args.fi_ino, ocfs2_find_actor, &args);
108}
109
110struct inode *ocfs2_iget(struct ocfs2_super *osb, u64 blkno, int flags) 92struct inode *ocfs2_iget(struct ocfs2_super *osb, u64 blkno, int flags)
111{ 93{
112 struct inode *inode = NULL; 94 struct inode *inode = NULL;
@@ -182,28 +164,6 @@ static int ocfs2_find_actor(struct inode *inode, void *opaque)
182 if (oi->ip_blkno != args->fi_blkno) 164 if (oi->ip_blkno != args->fi_blkno)
183 goto bail; 165 goto bail;
184 166
185 /* OCFS2_FI_FLAG_NOWAIT is *only* set from
186 * ocfs2_ilookup_for_vote which won't create an inode for one
187 * that isn't found. The vote thread which doesn't want to get
188 * an inode which is in the process of going away - otherwise
189 * the call to __wait_on_freeing_inode in find_inode_fast will
190 * cause it to deadlock on an inode which may be waiting on a
191 * vote (or lock release) in delete_inode */
192 if ((args->fi_flags & OCFS2_FI_FLAG_NOWAIT) &&
193 (inode->i_state & (I_FREEING|I_CLEAR))) {
194 /* As stated above, we're not going to return an
195 * inode. In the case of a delete vote, the voting
196 * code is going to signal the other node to go
197 * ahead. Mark that state here, so this freeing inode
198 * has the state when it gets to delete_inode. */
199 if (args->fi_flags & OCFS2_FI_FLAG_DELETE) {
200 spin_lock(&oi->ip_lock);
201 ocfs2_mark_inode_remotely_deleted(inode);
202 spin_unlock(&oi->ip_lock);
203 }
204 goto bail;
205 }
206
207 ret = 1; 167 ret = 1;
208bail: 168bail:
209 mlog_exit(ret); 169 mlog_exit(ret);
@@ -261,6 +221,9 @@ int ocfs2_populate_inode(struct inode *inode, struct ocfs2_dinode *fe,
261 goto bail; 221 goto bail;
262 } 222 }
263 223
224 OCFS2_I(inode)->ip_clusters = le32_to_cpu(fe->i_clusters);
225 OCFS2_I(inode)->ip_attr = le32_to_cpu(fe->i_attr);
226
264 inode->i_version = 1; 227 inode->i_version = 1;
265 inode->i_generation = le32_to_cpu(fe->i_generation); 228 inode->i_generation = le32_to_cpu(fe->i_generation);
266 inode->i_rdev = huge_decode_dev(le64_to_cpu(fe->id1.dev1.i_rdev)); 229 inode->i_rdev = huge_decode_dev(le64_to_cpu(fe->id1.dev1.i_rdev));
@@ -272,8 +235,7 @@ int ocfs2_populate_inode(struct inode *inode, struct ocfs2_dinode *fe,
272 if (S_ISLNK(inode->i_mode) && !fe->i_clusters) 235 if (S_ISLNK(inode->i_mode) && !fe->i_clusters)
273 inode->i_blocks = 0; 236 inode->i_blocks = 0;
274 else 237 else
275 inode->i_blocks = 238 inode->i_blocks = ocfs2_inode_sector_count(inode);
276 ocfs2_align_bytes_to_sectors(le64_to_cpu(fe->i_size));
277 inode->i_mapping->a_ops = &ocfs2_aops; 239 inode->i_mapping->a_ops = &ocfs2_aops;
278 inode->i_atime.tv_sec = le64_to_cpu(fe->i_atime); 240 inode->i_atime.tv_sec = le64_to_cpu(fe->i_atime);
279 inode->i_atime.tv_nsec = le32_to_cpu(fe->i_atime_nsec); 241 inode->i_atime.tv_nsec = le32_to_cpu(fe->i_atime_nsec);
@@ -288,10 +250,6 @@ int ocfs2_populate_inode(struct inode *inode, struct ocfs2_dinode *fe,
288 (unsigned long long)OCFS2_I(inode)->ip_blkno, 250 (unsigned long long)OCFS2_I(inode)->ip_blkno,
289 (unsigned long long)fe->i_blkno); 251 (unsigned long long)fe->i_blkno);
290 252
291 OCFS2_I(inode)->ip_clusters = le32_to_cpu(fe->i_clusters);
292 OCFS2_I(inode)->ip_orphaned_slot = OCFS2_INVALID_SLOT;
293 OCFS2_I(inode)->ip_attr = le32_to_cpu(fe->i_attr);
294
295 inode->i_nlink = le16_to_cpu(fe->i_links_count); 253 inode->i_nlink = le16_to_cpu(fe->i_links_count);
296 254
297 if (fe->i_flags & cpu_to_le32(OCFS2_SYSTEM_FL)) 255 if (fe->i_flags & cpu_to_le32(OCFS2_SYSTEM_FL))
@@ -347,6 +305,9 @@ int ocfs2_populate_inode(struct inode *inode, struct ocfs2_dinode *fe,
347 305
348 ocfs2_inode_lock_res_init(&OCFS2_I(inode)->ip_meta_lockres, 306 ocfs2_inode_lock_res_init(&OCFS2_I(inode)->ip_meta_lockres,
349 OCFS2_LOCK_TYPE_META, 0, inode); 307 OCFS2_LOCK_TYPE_META, 0, inode);
308
309 ocfs2_inode_lock_res_init(&OCFS2_I(inode)->ip_open_lockres,
310 OCFS2_LOCK_TYPE_OPEN, 0, inode);
350 } 311 }
351 312
352 ocfs2_inode_lock_res_init(&OCFS2_I(inode)->ip_rw_lockres, 313 ocfs2_inode_lock_res_init(&OCFS2_I(inode)->ip_rw_lockres,
@@ -421,7 +382,7 @@ static int ocfs2_read_locked_inode(struct inode *inode,
421 * cluster lock before trusting anything anyway. 382 * cluster lock before trusting anything anyway.
422 */ 383 */
423 can_lock = !(args->fi_flags & OCFS2_FI_FLAG_SYSFILE) 384 can_lock = !(args->fi_flags & OCFS2_FI_FLAG_SYSFILE)
424 && !(args->fi_flags & OCFS2_FI_FLAG_NOLOCK) 385 && !(args->fi_flags & OCFS2_FI_FLAG_ORPHAN_RECOVERY)
425 && !ocfs2_mount_local(osb); 386 && !ocfs2_mount_local(osb);
426 387
427 /* 388 /*
@@ -438,7 +399,17 @@ static int ocfs2_read_locked_inode(struct inode *inode,
438 OCFS2_LOCK_TYPE_META, 399 OCFS2_LOCK_TYPE_META,
439 generation, inode); 400 generation, inode);
440 401
402 ocfs2_inode_lock_res_init(&OCFS2_I(inode)->ip_open_lockres,
403 OCFS2_LOCK_TYPE_OPEN,
404 0, inode);
405
441 if (can_lock) { 406 if (can_lock) {
407 status = ocfs2_open_lock(inode);
408 if (status) {
409 make_bad_inode(inode);
410 mlog_errno(status);
411 return status;
412 }
442 status = ocfs2_meta_lock(inode, NULL, 0); 413 status = ocfs2_meta_lock(inode, NULL, 0);
443 if (status) { 414 if (status) {
444 make_bad_inode(inode); 415 make_bad_inode(inode);
@@ -447,6 +418,14 @@ static int ocfs2_read_locked_inode(struct inode *inode,
447 } 418 }
448 } 419 }
449 420
421 if (args->fi_flags & OCFS2_FI_FLAG_ORPHAN_RECOVERY) {
422 status = ocfs2_try_open_lock(inode, 0);
423 if (status) {
424 make_bad_inode(inode);
425 return status;
426 }
427 }
428
450 status = ocfs2_read_block(osb, args->fi_blkno, &bh, 0, 429 status = ocfs2_read_block(osb, args->fi_blkno, &bh, 0,
451 can_lock ? inode : NULL); 430 can_lock ? inode : NULL);
452 if (status < 0) { 431 if (status < 0) {
@@ -507,50 +486,56 @@ static int ocfs2_truncate_for_delete(struct ocfs2_super *osb,
507 struct buffer_head *fe_bh) 486 struct buffer_head *fe_bh)
508{ 487{
509 int status = 0; 488 int status = 0;
510 handle_t *handle = NULL;
511 struct ocfs2_truncate_context *tc = NULL; 489 struct ocfs2_truncate_context *tc = NULL;
512 struct ocfs2_dinode *fe; 490 struct ocfs2_dinode *fe;
491 handle_t *handle = NULL;
513 492
514 mlog_entry_void(); 493 mlog_entry_void();
515 494
516 fe = (struct ocfs2_dinode *) fe_bh->b_data; 495 fe = (struct ocfs2_dinode *) fe_bh->b_data;
517 496
518 /* zero allocation, zero truncate :) */ 497 if (fe->i_clusters) {
519 if (!fe->i_clusters) 498 handle = ocfs2_start_trans(osb, OCFS2_INODE_UPDATE_CREDITS);
520 goto bail; 499 if (IS_ERR(handle)) {
500 status = PTR_ERR(handle);
501 mlog_errno(status);
502 goto out;
503 }
521 504
522 handle = ocfs2_start_trans(osb, OCFS2_INODE_UPDATE_CREDITS); 505 status = ocfs2_journal_access(handle, inode, fe_bh,
523 if (IS_ERR(handle)) { 506 OCFS2_JOURNAL_ACCESS_WRITE);
524 status = PTR_ERR(handle); 507 if (status < 0) {
525 handle = NULL; 508 mlog_errno(status);
526 mlog_errno(status); 509 goto out;
527 goto bail; 510 }
528 }
529 511
530 status = ocfs2_set_inode_size(handle, inode, fe_bh, 0ULL); 512 i_size_write(inode, 0);
531 if (status < 0) {
532 mlog_errno(status);
533 goto bail;
534 }
535 513
536 ocfs2_commit_trans(osb, handle); 514 status = ocfs2_mark_inode_dirty(handle, inode, fe_bh);
537 handle = NULL; 515 if (status < 0) {
516 mlog_errno(status);
517 goto out;
518 }
538 519
539 status = ocfs2_prepare_truncate(osb, inode, fe_bh, &tc); 520 ocfs2_commit_trans(osb, handle);
540 if (status < 0) { 521 handle = NULL;
541 mlog_errno(status);
542 goto bail;
543 }
544 522
545 status = ocfs2_commit_truncate(osb, inode, fe_bh, tc); 523 status = ocfs2_prepare_truncate(osb, inode, fe_bh, &tc);
546 if (status < 0) { 524 if (status < 0) {
547 mlog_errno(status); 525 mlog_errno(status);
548 goto bail; 526 goto out;
527 }
528
529 status = ocfs2_commit_truncate(osb, inode, fe_bh, tc);
530 if (status < 0) {
531 mlog_errno(status);
532 goto out;
533 }
549 } 534 }
550bail: 535
536out:
551 if (handle) 537 if (handle)
552 ocfs2_commit_trans(osb, handle); 538 ocfs2_commit_trans(osb, handle);
553
554 mlog_exit(status); 539 mlog_exit(status);
555 return status; 540 return status;
556} 541}
@@ -678,10 +663,10 @@ static int ocfs2_wipe_inode(struct inode *inode,
678 struct inode *orphan_dir_inode = NULL; 663 struct inode *orphan_dir_inode = NULL;
679 struct buffer_head *orphan_dir_bh = NULL; 664 struct buffer_head *orphan_dir_bh = NULL;
680 struct ocfs2_super *osb = OCFS2_SB(inode->i_sb); 665 struct ocfs2_super *osb = OCFS2_SB(inode->i_sb);
666 struct ocfs2_dinode *di;
681 667
682 /* We've already voted on this so it should be readonly - no 668 di = (struct ocfs2_dinode *) di_bh->b_data;
683 * spinlock needed. */ 669 orphaned_slot = le16_to_cpu(di->i_orphaned_slot);
684 orphaned_slot = OCFS2_I(inode)->ip_orphaned_slot;
685 670
686 status = ocfs2_check_orphan_recovery_state(osb, orphaned_slot); 671 status = ocfs2_check_orphan_recovery_state(osb, orphaned_slot);
687 if (status) 672 if (status)
@@ -839,11 +824,20 @@ static int ocfs2_query_inode_wipe(struct inode *inode,
839 goto bail; 824 goto bail;
840 } 825 }
841 826
842 status = ocfs2_request_delete_vote(inode); 827 /*
843 /* -EBUSY means that other nodes are still using the 828 * This is how ocfs2 determines whether an inode is still live
844 * inode. We're done here though, so avoid doing anything on 829 * within the cluster. Every node takes a shared read lock on
845 * disk and let them worry about deleting it. */ 830 * the inode open lock in ocfs2_read_locked_inode(). When we
846 if (status == -EBUSY) { 831 * get to ->delete_inode(), each node tries to convert it's
832 * lock to an exclusive. Trylocks are serialized by the inode
833 * meta data lock. If the upconvert suceeds, we know the inode
834 * is no longer live and can be deleted.
835 *
836 * Though we call this with the meta data lock held, the
837 * trylock keeps us from ABBA deadlock.
838 */
839 status = ocfs2_try_open_lock(inode, 1);
840 if (status == -EAGAIN) {
847 status = 0; 841 status = 0;
848 mlog(0, "Skipping delete of %llu because it is in use on" 842 mlog(0, "Skipping delete of %llu because it is in use on"
849 "other nodes\n", (unsigned long long)oi->ip_blkno); 843 "other nodes\n", (unsigned long long)oi->ip_blkno);
@@ -854,21 +848,10 @@ static int ocfs2_query_inode_wipe(struct inode *inode,
854 goto bail; 848 goto bail;
855 } 849 }
856 850
857 spin_lock(&oi->ip_lock); 851 *wipe = 1;
858 if (oi->ip_orphaned_slot == OCFS2_INVALID_SLOT) { 852 mlog(0, "Inode %llu is ok to wipe from orphan dir %u\n",
859 /* Nobody knew which slot this inode was orphaned 853 (unsigned long long)oi->ip_blkno,
860 * into. This may happen during node death and 854 le16_to_cpu(di->i_orphaned_slot));
861 * recovery knows how to clean it up so we can safely
862 * ignore this inode for now on. */
863 mlog(0, "Nobody knew where inode %llu was orphaned!\n",
864 (unsigned long long)oi->ip_blkno);
865 } else {
866 *wipe = 1;
867
868 mlog(0, "Inode %llu is ok to wipe from orphan dir %d\n",
869 (unsigned long long)oi->ip_blkno, oi->ip_orphaned_slot);
870 }
871 spin_unlock(&oi->ip_lock);
872 855
873bail: 856bail:
874 return status; 857 return status;
@@ -1001,11 +984,16 @@ void ocfs2_clear_inode(struct inode *inode)
1001 mlog_bug_on_msg(OCFS2_SB(inode->i_sb) == NULL, 984 mlog_bug_on_msg(OCFS2_SB(inode->i_sb) == NULL,
1002 "Inode=%lu\n", inode->i_ino); 985 "Inode=%lu\n", inode->i_ino);
1003 986
987 /* For remove delete_inode vote, we hold open lock before,
988 * now it is time to unlock PR and EX open locks. */
989 ocfs2_open_unlock(inode);
990
1004 /* Do these before all the other work so that we don't bounce 991 /* Do these before all the other work so that we don't bounce
1005 * the vote thread while waiting to destroy the locks. */ 992 * the vote thread while waiting to destroy the locks. */
1006 ocfs2_mark_lockres_freeing(&oi->ip_rw_lockres); 993 ocfs2_mark_lockres_freeing(&oi->ip_rw_lockres);
1007 ocfs2_mark_lockres_freeing(&oi->ip_meta_lockres); 994 ocfs2_mark_lockres_freeing(&oi->ip_meta_lockres);
1008 ocfs2_mark_lockres_freeing(&oi->ip_data_lockres); 995 ocfs2_mark_lockres_freeing(&oi->ip_data_lockres);
996 ocfs2_mark_lockres_freeing(&oi->ip_open_lockres);
1009 997
1010 /* We very well may get a clear_inode before all an inodes 998 /* We very well may get a clear_inode before all an inodes
1011 * metadata has hit disk. Of course, we can't drop any cluster 999 * metadata has hit disk. Of course, we can't drop any cluster
@@ -1020,8 +1008,7 @@ void ocfs2_clear_inode(struct inode *inode)
1020 "Clear inode of %llu, inode has io markers\n", 1008 "Clear inode of %llu, inode has io markers\n",
1021 (unsigned long long)oi->ip_blkno); 1009 (unsigned long long)oi->ip_blkno);
1022 1010
1023 ocfs2_extent_map_drop(inode, 0); 1011 ocfs2_extent_map_trunc(inode, 0);
1024 ocfs2_extent_map_init(inode);
1025 1012
1026 status = ocfs2_drop_inode_locks(inode); 1013 status = ocfs2_drop_inode_locks(inode);
1027 if (status < 0) 1014 if (status < 0)
@@ -1030,6 +1017,7 @@ void ocfs2_clear_inode(struct inode *inode)
1030 ocfs2_lock_res_free(&oi->ip_rw_lockres); 1017 ocfs2_lock_res_free(&oi->ip_rw_lockres);
1031 ocfs2_lock_res_free(&oi->ip_meta_lockres); 1018 ocfs2_lock_res_free(&oi->ip_meta_lockres);
1032 ocfs2_lock_res_free(&oi->ip_data_lockres); 1019 ocfs2_lock_res_free(&oi->ip_data_lockres);
1020 ocfs2_lock_res_free(&oi->ip_open_lockres);
1033 1021
1034 ocfs2_metadata_cache_purge(inode); 1022 ocfs2_metadata_cache_purge(inode);
1035 1023
@@ -1086,9 +1074,6 @@ void ocfs2_drop_inode(struct inode *inode)
1086 mlog(0, "Drop inode %llu, nlink = %u, ip_flags = 0x%x\n", 1074 mlog(0, "Drop inode %llu, nlink = %u, ip_flags = 0x%x\n",
1087 (unsigned long long)oi->ip_blkno, inode->i_nlink, oi->ip_flags); 1075 (unsigned long long)oi->ip_blkno, inode->i_nlink, oi->ip_flags);
1088 1076
1089 /* Testing ip_orphaned_slot here wouldn't work because we may
1090 * not have gotten a delete_inode vote from any other nodes
1091 * yet. */
1092 if (oi->ip_flags & OCFS2_INODE_MAYBE_ORPHANED) 1077 if (oi->ip_flags & OCFS2_INODE_MAYBE_ORPHANED)
1093 generic_delete_inode(inode); 1078 generic_delete_inode(inode);
1094 else 1079 else
@@ -1121,8 +1106,8 @@ struct buffer_head *ocfs2_bread(struct inode *inode,
1121 return NULL; 1106 return NULL;
1122 } 1107 }
1123 1108
1124 tmperr = ocfs2_extent_map_get_blocks(inode, block, 1, 1109 tmperr = ocfs2_extent_map_get_blocks(inode, block, &p_blkno, NULL,
1125 &p_blkno, NULL); 1110 NULL);
1126 if (tmperr < 0) { 1111 if (tmperr < 0) {
1127 mlog_errno(tmperr); 1112 mlog_errno(tmperr);
1128 goto fail; 1113 goto fail;
@@ -1259,7 +1244,7 @@ void ocfs2_refresh_inode(struct inode *inode,
1259 if (S_ISLNK(inode->i_mode) && le32_to_cpu(fe->i_clusters) == 0) 1244 if (S_ISLNK(inode->i_mode) && le32_to_cpu(fe->i_clusters) == 0)
1260 inode->i_blocks = 0; 1245 inode->i_blocks = 0;
1261 else 1246 else
1262 inode->i_blocks = ocfs2_align_bytes_to_sectors(i_size_read(inode)); 1247 inode->i_blocks = ocfs2_inode_sector_count(inode);
1263 inode->i_atime.tv_sec = le64_to_cpu(fe->i_atime); 1248 inode->i_atime.tv_sec = le64_to_cpu(fe->i_atime);
1264 inode->i_atime.tv_nsec = le32_to_cpu(fe->i_atime_nsec); 1249 inode->i_atime.tv_nsec = le32_to_cpu(fe->i_atime_nsec);
1265 inode->i_mtime.tv_sec = le64_to_cpu(fe->i_mtime); 1250 inode->i_mtime.tv_sec = le64_to_cpu(fe->i_mtime);