aboutsummaryrefslogtreecommitdiffstats
path: root/fs/ocfs2/inode.c
diff options
context:
space:
mode:
Diffstat (limited to 'fs/ocfs2/inode.c')
-rw-r--r--fs/ocfs2/inode.c188
1 files changed, 142 insertions, 46 deletions
diff --git a/fs/ocfs2/inode.c b/fs/ocfs2/inode.c
index 327a5b7b86ed..69d3db569166 100644
--- a/fs/ocfs2/inode.c
+++ b/fs/ocfs2/inode.c
@@ -54,8 +54,6 @@
54 54
55#include "buffer_head_io.h" 55#include "buffer_head_io.h"
56 56
57#define OCFS2_FI_FLAG_NOWAIT 0x1
58#define OCFS2_FI_FLAG_DELETE 0x2
59struct ocfs2_find_inode_args 57struct ocfs2_find_inode_args
60{ 58{
61 u64 fi_blkno; 59 u64 fi_blkno;
@@ -71,6 +69,26 @@ static int ocfs2_truncate_for_delete(struct ocfs2_super *osb,
71 struct inode *inode, 69 struct inode *inode,
72 struct buffer_head *fe_bh); 70 struct buffer_head *fe_bh);
73 71
72void ocfs2_set_inode_flags(struct inode *inode)
73{
74 unsigned int flags = OCFS2_I(inode)->ip_attr;
75
76 inode->i_flags &= ~(S_IMMUTABLE |
77 S_SYNC | S_APPEND | S_NOATIME | S_DIRSYNC);
78
79 if (flags & OCFS2_IMMUTABLE_FL)
80 inode->i_flags |= S_IMMUTABLE;
81
82 if (flags & OCFS2_SYNC_FL)
83 inode->i_flags |= S_SYNC;
84 if (flags & OCFS2_APPEND_FL)
85 inode->i_flags |= S_APPEND;
86 if (flags & OCFS2_NOATIME_FL)
87 inode->i_flags |= S_NOATIME;
88 if (flags & OCFS2_DIRSYNC_FL)
89 inode->i_flags |= S_DIRSYNC;
90}
91
74struct inode *ocfs2_ilookup_for_vote(struct ocfs2_super *osb, 92struct inode *ocfs2_ilookup_for_vote(struct ocfs2_super *osb,
75 u64 blkno, 93 u64 blkno,
76 int delete_vote) 94 int delete_vote)
@@ -89,7 +107,7 @@ struct inode *ocfs2_ilookup_for_vote(struct ocfs2_super *osb,
89 return ilookup5(osb->sb, args.fi_ino, ocfs2_find_actor, &args); 107 return ilookup5(osb->sb, args.fi_ino, ocfs2_find_actor, &args);
90} 108}
91 109
92struct inode *ocfs2_iget(struct ocfs2_super *osb, u64 blkno) 110struct inode *ocfs2_iget(struct ocfs2_super *osb, u64 blkno, int flags)
93{ 111{
94 struct inode *inode = NULL; 112 struct inode *inode = NULL;
95 struct super_block *sb = osb->sb; 113 struct super_block *sb = osb->sb;
@@ -107,7 +125,7 @@ struct inode *ocfs2_iget(struct ocfs2_super *osb, u64 blkno)
107 } 125 }
108 126
109 args.fi_blkno = blkno; 127 args.fi_blkno = blkno;
110 args.fi_flags = 0; 128 args.fi_flags = flags;
111 args.fi_ino = ino_from_blkno(sb, blkno); 129 args.fi_ino = ino_from_blkno(sb, blkno);
112 130
113 inode = iget5_locked(sb, args.fi_ino, ocfs2_find_actor, 131 inode = iget5_locked(sb, args.fi_ino, ocfs2_find_actor,
@@ -260,7 +278,6 @@ int ocfs2_populate_inode(struct inode *inode, struct ocfs2_dinode *fe,
260 inode->i_blocks = 278 inode->i_blocks =
261 ocfs2_align_bytes_to_sectors(le64_to_cpu(fe->i_size)); 279 ocfs2_align_bytes_to_sectors(le64_to_cpu(fe->i_size));
262 inode->i_mapping->a_ops = &ocfs2_aops; 280 inode->i_mapping->a_ops = &ocfs2_aops;
263 inode->i_flags |= S_NOATIME;
264 inode->i_atime.tv_sec = le64_to_cpu(fe->i_atime); 281 inode->i_atime.tv_sec = le64_to_cpu(fe->i_atime);
265 inode->i_atime.tv_nsec = le32_to_cpu(fe->i_atime_nsec); 282 inode->i_atime.tv_nsec = le32_to_cpu(fe->i_atime_nsec);
266 inode->i_mtime.tv_sec = le64_to_cpu(fe->i_mtime); 283 inode->i_mtime.tv_sec = le64_to_cpu(fe->i_mtime);
@@ -276,16 +293,13 @@ int ocfs2_populate_inode(struct inode *inode, struct ocfs2_dinode *fe,
276 293
277 OCFS2_I(inode)->ip_clusters = le32_to_cpu(fe->i_clusters); 294 OCFS2_I(inode)->ip_clusters = le32_to_cpu(fe->i_clusters);
278 OCFS2_I(inode)->ip_orphaned_slot = OCFS2_INVALID_SLOT; 295 OCFS2_I(inode)->ip_orphaned_slot = OCFS2_INVALID_SLOT;
279 296 OCFS2_I(inode)->ip_attr = le32_to_cpu(fe->i_attr);
280 if (create_ino)
281 inode->i_ino = ino_from_blkno(inode->i_sb,
282 le64_to_cpu(fe->i_blkno));
283
284 mlog(0, "blkno = %llu, ino = %lu, create_ino = %s\n",
285 (unsigned long long)fe->i_blkno, inode->i_ino, create_ino ? "true" : "false");
286 297
287 inode->i_nlink = le16_to_cpu(fe->i_links_count); 298 inode->i_nlink = le16_to_cpu(fe->i_links_count);
288 299
300 if (fe->i_flags & cpu_to_le32(OCFS2_SYSTEM_FL))
301 OCFS2_I(inode)->ip_flags |= OCFS2_INODE_SYSTEM_FILE;
302
289 if (fe->i_flags & cpu_to_le32(OCFS2_LOCAL_ALLOC_FL)) { 303 if (fe->i_flags & cpu_to_le32(OCFS2_LOCAL_ALLOC_FL)) {
290 OCFS2_I(inode)->ip_flags |= OCFS2_INODE_BITMAP; 304 OCFS2_I(inode)->ip_flags |= OCFS2_INODE_BITMAP;
291 mlog(0, "local alloc inode: i_ino=%lu\n", inode->i_ino); 305 mlog(0, "local alloc inode: i_ino=%lu\n", inode->i_ino);
@@ -323,12 +337,31 @@ int ocfs2_populate_inode(struct inode *inode, struct ocfs2_dinode *fe,
323 break; 337 break;
324 } 338 }
325 339
340 if (create_ino) {
341 inode->i_ino = ino_from_blkno(inode->i_sb,
342 le64_to_cpu(fe->i_blkno));
343
344 /*
345 * If we ever want to create system files from kernel,
346 * the generation argument to
347 * ocfs2_inode_lock_res_init() will have to change.
348 */
349 BUG_ON(fe->i_flags & cpu_to_le32(OCFS2_SYSTEM_FL));
350
351 ocfs2_inode_lock_res_init(&OCFS2_I(inode)->ip_meta_lockres,
352 OCFS2_LOCK_TYPE_META, 0, inode);
353 }
354
326 ocfs2_inode_lock_res_init(&OCFS2_I(inode)->ip_rw_lockres, 355 ocfs2_inode_lock_res_init(&OCFS2_I(inode)->ip_rw_lockres,
327 OCFS2_LOCK_TYPE_RW, inode); 356 OCFS2_LOCK_TYPE_RW, inode->i_generation,
328 ocfs2_inode_lock_res_init(&OCFS2_I(inode)->ip_meta_lockres, 357 inode);
329 OCFS2_LOCK_TYPE_META, inode); 358
330 ocfs2_inode_lock_res_init(&OCFS2_I(inode)->ip_data_lockres, 359 ocfs2_inode_lock_res_init(&OCFS2_I(inode)->ip_data_lockres,
331 OCFS2_LOCK_TYPE_DATA, inode); 360 OCFS2_LOCK_TYPE_DATA, inode->i_generation,
361 inode);
362
363 ocfs2_set_inode_flags(inode);
364 inode->i_flags |= S_NOATIME;
332 365
333 status = 0; 366 status = 0;
334bail: 367bail:
@@ -343,15 +376,15 @@ static int ocfs2_read_locked_inode(struct inode *inode,
343 struct ocfs2_super *osb; 376 struct ocfs2_super *osb;
344 struct ocfs2_dinode *fe; 377 struct ocfs2_dinode *fe;
345 struct buffer_head *bh = NULL; 378 struct buffer_head *bh = NULL;
346 int status; 379 int status, can_lock;
347 int sysfile = 0; 380 u32 generation = 0;
348 381
349 mlog_entry("(0x%p, 0x%p)\n", inode, args); 382 mlog_entry("(0x%p, 0x%p)\n", inode, args);
350 383
351 status = -EINVAL; 384 status = -EINVAL;
352 if (inode == NULL || inode->i_sb == NULL) { 385 if (inode == NULL || inode->i_sb == NULL) {
353 mlog(ML_ERROR, "bad inode\n"); 386 mlog(ML_ERROR, "bad inode\n");
354 goto bail; 387 return status;
355 } 388 }
356 sb = inode->i_sb; 389 sb = inode->i_sb;
357 osb = OCFS2_SB(sb); 390 osb = OCFS2_SB(sb);
@@ -359,50 +392,110 @@ static int ocfs2_read_locked_inode(struct inode *inode,
359 if (!args) { 392 if (!args) {
360 mlog(ML_ERROR, "bad inode args\n"); 393 mlog(ML_ERROR, "bad inode args\n");
361 make_bad_inode(inode); 394 make_bad_inode(inode);
362 goto bail; 395 return status;
396 }
397
398 /*
399 * To improve performance of cold-cache inode stats, we take
400 * the cluster lock here if possible.
401 *
402 * Generally, OCFS2 never trusts the contents of an inode
403 * unless it's holding a cluster lock, so taking it here isn't
404 * a correctness issue as much as it is a performance
405 * improvement.
406 *
407 * There are three times when taking the lock is not a good idea:
408 *
409 * 1) During startup, before we have initialized the DLM.
410 *
411 * 2) If we are reading certain system files which never get
412 * cluster locks (local alloc, truncate log).
413 *
414 * 3) If the process doing the iget() is responsible for
415 * orphan dir recovery. We're holding the orphan dir lock and
416 * can get into a deadlock with another process on another
417 * node in ->delete_inode().
418 *
419 * #1 and #2 can be simply solved by never taking the lock
420 * here for system files (which are the only type we read
421 * during mount). It's a heavier approach, but our main
422 * concern is user-accesible files anyway.
423 *
424 * #3 works itself out because we'll eventually take the
425 * cluster lock before trusting anything anyway.
426 */
427 can_lock = !(args->fi_flags & OCFS2_FI_FLAG_SYSFILE)
428 && !(args->fi_flags & OCFS2_FI_FLAG_NOLOCK);
429
430 /*
431 * To maintain backwards compatibility with older versions of
432 * ocfs2-tools, we still store the generation value for system
433 * files. The only ones that actually matter to userspace are
434 * the journals, but it's easier and inexpensive to just flag
435 * all system files similarly.
436 */
437 if (args->fi_flags & OCFS2_FI_FLAG_SYSFILE)
438 generation = osb->fs_generation;
439
440 ocfs2_inode_lock_res_init(&OCFS2_I(inode)->ip_meta_lockres,
441 OCFS2_LOCK_TYPE_META,
442 generation, inode);
443
444 if (can_lock) {
445 status = ocfs2_meta_lock(inode, NULL, NULL, 0);
446 if (status) {
447 make_bad_inode(inode);
448 mlog_errno(status);
449 return status;
450 }
363 } 451 }
364 452
365 /* Read the FE off disk. This is safe because the kernel only 453 status = ocfs2_read_block(osb, args->fi_blkno, &bh, 0,
366 * does one read_inode2 for a new inode, and if it doesn't 454 can_lock ? inode : NULL);
367 * exist yet then nobody can be working on it! */
368 status = ocfs2_read_block(osb, args->fi_blkno, &bh, 0, NULL);
369 if (status < 0) { 455 if (status < 0) {
370 mlog_errno(status); 456 mlog_errno(status);
371 make_bad_inode(inode);
372 goto bail; 457 goto bail;
373 } 458 }
374 459
460 status = -EINVAL;
375 fe = (struct ocfs2_dinode *) bh->b_data; 461 fe = (struct ocfs2_dinode *) bh->b_data;
376 if (!OCFS2_IS_VALID_DINODE(fe)) { 462 if (!OCFS2_IS_VALID_DINODE(fe)) {
377 mlog(ML_ERROR, "Invalid dinode #%llu: signature = %.*s\n", 463 mlog(ML_ERROR, "Invalid dinode #%llu: signature = %.*s\n",
378 (unsigned long long)fe->i_blkno, 7, fe->i_signature); 464 (unsigned long long)fe->i_blkno, 7, fe->i_signature);
379 make_bad_inode(inode);
380 goto bail; 465 goto bail;
381 } 466 }
382 467
383 if (fe->i_flags & cpu_to_le32(OCFS2_SYSTEM_FL)) 468 /*
384 sysfile = 1; 469 * This is a code bug. Right now the caller needs to
470 * understand whether it is asking for a system file inode or
471 * not so the proper lock names can be built.
472 */
473 mlog_bug_on_msg(!!(fe->i_flags & cpu_to_le32(OCFS2_SYSTEM_FL)) !=
474 !!(args->fi_flags & OCFS2_FI_FLAG_SYSFILE),
475 "Inode %llu: system file state is ambigous\n",
476 (unsigned long long)args->fi_blkno);
385 477
386 if (S_ISCHR(le16_to_cpu(fe->i_mode)) || 478 if (S_ISCHR(le16_to_cpu(fe->i_mode)) ||
387 S_ISBLK(le16_to_cpu(fe->i_mode))) 479 S_ISBLK(le16_to_cpu(fe->i_mode)))
388 inode->i_rdev = huge_decode_dev(le64_to_cpu(fe->id1.dev1.i_rdev)); 480 inode->i_rdev = huge_decode_dev(le64_to_cpu(fe->id1.dev1.i_rdev));
389 481
390 status = -EINVAL;
391 if (ocfs2_populate_inode(inode, fe, 0) < 0) { 482 if (ocfs2_populate_inode(inode, fe, 0) < 0) {
392 mlog(ML_ERROR, "populate failed! i_blkno=%llu, i_ino=%lu\n", 483 mlog(ML_ERROR, "populate failed! i_blkno=%llu, i_ino=%lu\n",
393 (unsigned long long)fe->i_blkno, inode->i_ino); 484 (unsigned long long)fe->i_blkno, inode->i_ino);
394 make_bad_inode(inode);
395 goto bail; 485 goto bail;
396 } 486 }
397 487
398 BUG_ON(args->fi_blkno != le64_to_cpu(fe->i_blkno)); 488 BUG_ON(args->fi_blkno != le64_to_cpu(fe->i_blkno));
399 489
400 if (sysfile)
401 OCFS2_I(inode)->ip_flags |= OCFS2_INODE_SYSTEM_FILE;
402
403 status = 0; 490 status = 0;
404 491
405bail: 492bail:
493 if (can_lock)
494 ocfs2_meta_unlock(inode, 0);
495
496 if (status < 0)
497 make_bad_inode(inode);
498
406 if (args && bh) 499 if (args && bh)
407 brelse(bh); 500 brelse(bh);
408 501
@@ -875,9 +968,15 @@ void ocfs2_delete_inode(struct inode *inode)
875 goto bail_unlock_inode; 968 goto bail_unlock_inode;
876 } 969 }
877 970
878 /* Mark the inode as successfully deleted. This is important 971 /*
879 * for ocfs2_clear_inode as it will check this flag and skip 972 * Mark the inode as successfully deleted.
880 * any checkpointing work */ 973 *
974 * This is important for ocfs2_clear_inode() as it will check
975 * this flag and skip any checkpointing work
976 *
977 * ocfs2_stuff_meta_lvb() also uses this flag to invalidate
978 * the LVB for other nodes.
979 */
881 OCFS2_I(inode)->ip_flags |= OCFS2_INODE_DELETED; 980 OCFS2_I(inode)->ip_flags |= OCFS2_INODE_DELETED;
882 981
883bail_unlock_inode: 982bail_unlock_inode:
@@ -1002,12 +1101,10 @@ void ocfs2_drop_inode(struct inode *inode)
1002 /* Testing ip_orphaned_slot here wouldn't work because we may 1101 /* Testing ip_orphaned_slot here wouldn't work because we may
1003 * not have gotten a delete_inode vote from any other nodes 1102 * not have gotten a delete_inode vote from any other nodes
1004 * yet. */ 1103 * yet. */
1005 if (oi->ip_flags & OCFS2_INODE_MAYBE_ORPHANED) { 1104 if (oi->ip_flags & OCFS2_INODE_MAYBE_ORPHANED)
1006 mlog(0, "Inode was orphaned on another node, clearing nlink.\n"); 1105 generic_delete_inode(inode);
1007 inode->i_nlink = 0; 1106 else
1008 } 1107 generic_drop_inode(inode);
1009
1010 generic_drop_inode(inode);
1011 1108
1012 mlog_exit_void(); 1109 mlog_exit_void();
1013} 1110}
@@ -1027,12 +1124,8 @@ struct buffer_head *ocfs2_bread(struct inode *inode,
1027 u64 p_blkno; 1124 u64 p_blkno;
1028 int readflags = OCFS2_BH_CACHED; 1125 int readflags = OCFS2_BH_CACHED;
1029 1126
1030#if 0
1031 /* only turn this on if we know we can deal with read_block
1032 * returning nothing */
1033 if (reada) 1127 if (reada)
1034 readflags |= OCFS2_BH_READAHEAD; 1128 readflags |= OCFS2_BH_READAHEAD;
1035#endif
1036 1129
1037 if (((u64)block << inode->i_sb->s_blocksize_bits) >= 1130 if (((u64)block << inode->i_sb->s_blocksize_bits) >=
1038 i_size_read(inode)) { 1131 i_size_read(inode)) {
@@ -1131,6 +1224,7 @@ int ocfs2_mark_inode_dirty(struct ocfs2_journal_handle *handle,
1131 1224
1132 spin_lock(&OCFS2_I(inode)->ip_lock); 1225 spin_lock(&OCFS2_I(inode)->ip_lock);
1133 fe->i_clusters = cpu_to_le32(OCFS2_I(inode)->ip_clusters); 1226 fe->i_clusters = cpu_to_le32(OCFS2_I(inode)->ip_clusters);
1227 fe->i_attr = cpu_to_le32(OCFS2_I(inode)->ip_attr);
1134 spin_unlock(&OCFS2_I(inode)->ip_lock); 1228 spin_unlock(&OCFS2_I(inode)->ip_lock);
1135 1229
1136 fe->i_size = cpu_to_le64(i_size_read(inode)); 1230 fe->i_size = cpu_to_le64(i_size_read(inode));
@@ -1169,6 +1263,8 @@ void ocfs2_refresh_inode(struct inode *inode,
1169 spin_lock(&OCFS2_I(inode)->ip_lock); 1263 spin_lock(&OCFS2_I(inode)->ip_lock);
1170 1264
1171 OCFS2_I(inode)->ip_clusters = le32_to_cpu(fe->i_clusters); 1265 OCFS2_I(inode)->ip_clusters = le32_to_cpu(fe->i_clusters);
1266 OCFS2_I(inode)->ip_attr = le32_to_cpu(fe->i_attr);
1267 ocfs2_set_inode_flags(inode);
1172 i_size_write(inode, le64_to_cpu(fe->i_size)); 1268 i_size_write(inode, le64_to_cpu(fe->i_size));
1173 inode->i_nlink = le16_to_cpu(fe->i_links_count); 1269 inode->i_nlink = le16_to_cpu(fe->i_links_count);
1174 inode->i_uid = le32_to_cpu(fe->i_uid); 1270 inode->i_uid = le32_to_cpu(fe->i_uid);