diff options
Diffstat (limited to 'fs/ocfs2/inode.c')
-rw-r--r-- | fs/ocfs2/inode.c | 188 |
1 files changed, 142 insertions, 46 deletions
diff --git a/fs/ocfs2/inode.c b/fs/ocfs2/inode.c index 327a5b7b86ed..69d3db569166 100644 --- a/fs/ocfs2/inode.c +++ b/fs/ocfs2/inode.c | |||
@@ -54,8 +54,6 @@ | |||
54 | 54 | ||
55 | #include "buffer_head_io.h" | 55 | #include "buffer_head_io.h" |
56 | 56 | ||
57 | #define OCFS2_FI_FLAG_NOWAIT 0x1 | ||
58 | #define OCFS2_FI_FLAG_DELETE 0x2 | ||
59 | struct ocfs2_find_inode_args | 57 | struct ocfs2_find_inode_args |
60 | { | 58 | { |
61 | u64 fi_blkno; | 59 | u64 fi_blkno; |
@@ -71,6 +69,26 @@ static int ocfs2_truncate_for_delete(struct ocfs2_super *osb, | |||
71 | struct inode *inode, | 69 | struct inode *inode, |
72 | struct buffer_head *fe_bh); | 70 | struct buffer_head *fe_bh); |
73 | 71 | ||
72 | void ocfs2_set_inode_flags(struct inode *inode) | ||
73 | { | ||
74 | unsigned int flags = OCFS2_I(inode)->ip_attr; | ||
75 | |||
76 | inode->i_flags &= ~(S_IMMUTABLE | | ||
77 | S_SYNC | S_APPEND | S_NOATIME | S_DIRSYNC); | ||
78 | |||
79 | if (flags & OCFS2_IMMUTABLE_FL) | ||
80 | inode->i_flags |= S_IMMUTABLE; | ||
81 | |||
82 | if (flags & OCFS2_SYNC_FL) | ||
83 | inode->i_flags |= S_SYNC; | ||
84 | if (flags & OCFS2_APPEND_FL) | ||
85 | inode->i_flags |= S_APPEND; | ||
86 | if (flags & OCFS2_NOATIME_FL) | ||
87 | inode->i_flags |= S_NOATIME; | ||
88 | if (flags & OCFS2_DIRSYNC_FL) | ||
89 | inode->i_flags |= S_DIRSYNC; | ||
90 | } | ||
91 | |||
74 | struct inode *ocfs2_ilookup_for_vote(struct ocfs2_super *osb, | 92 | struct inode *ocfs2_ilookup_for_vote(struct ocfs2_super *osb, |
75 | u64 blkno, | 93 | u64 blkno, |
76 | int delete_vote) | 94 | int delete_vote) |
@@ -89,7 +107,7 @@ struct inode *ocfs2_ilookup_for_vote(struct ocfs2_super *osb, | |||
89 | return ilookup5(osb->sb, args.fi_ino, ocfs2_find_actor, &args); | 107 | return ilookup5(osb->sb, args.fi_ino, ocfs2_find_actor, &args); |
90 | } | 108 | } |
91 | 109 | ||
92 | struct inode *ocfs2_iget(struct ocfs2_super *osb, u64 blkno) | 110 | struct inode *ocfs2_iget(struct ocfs2_super *osb, u64 blkno, int flags) |
93 | { | 111 | { |
94 | struct inode *inode = NULL; | 112 | struct inode *inode = NULL; |
95 | struct super_block *sb = osb->sb; | 113 | struct super_block *sb = osb->sb; |
@@ -107,7 +125,7 @@ struct inode *ocfs2_iget(struct ocfs2_super *osb, u64 blkno) | |||
107 | } | 125 | } |
108 | 126 | ||
109 | args.fi_blkno = blkno; | 127 | args.fi_blkno = blkno; |
110 | args.fi_flags = 0; | 128 | args.fi_flags = flags; |
111 | args.fi_ino = ino_from_blkno(sb, blkno); | 129 | args.fi_ino = ino_from_blkno(sb, blkno); |
112 | 130 | ||
113 | inode = iget5_locked(sb, args.fi_ino, ocfs2_find_actor, | 131 | inode = iget5_locked(sb, args.fi_ino, ocfs2_find_actor, |
@@ -260,7 +278,6 @@ int ocfs2_populate_inode(struct inode *inode, struct ocfs2_dinode *fe, | |||
260 | inode->i_blocks = | 278 | inode->i_blocks = |
261 | ocfs2_align_bytes_to_sectors(le64_to_cpu(fe->i_size)); | 279 | ocfs2_align_bytes_to_sectors(le64_to_cpu(fe->i_size)); |
262 | inode->i_mapping->a_ops = &ocfs2_aops; | 280 | inode->i_mapping->a_ops = &ocfs2_aops; |
263 | inode->i_flags |= S_NOATIME; | ||
264 | inode->i_atime.tv_sec = le64_to_cpu(fe->i_atime); | 281 | inode->i_atime.tv_sec = le64_to_cpu(fe->i_atime); |
265 | inode->i_atime.tv_nsec = le32_to_cpu(fe->i_atime_nsec); | 282 | inode->i_atime.tv_nsec = le32_to_cpu(fe->i_atime_nsec); |
266 | inode->i_mtime.tv_sec = le64_to_cpu(fe->i_mtime); | 283 | inode->i_mtime.tv_sec = le64_to_cpu(fe->i_mtime); |
@@ -276,16 +293,13 @@ int ocfs2_populate_inode(struct inode *inode, struct ocfs2_dinode *fe, | |||
276 | 293 | ||
277 | OCFS2_I(inode)->ip_clusters = le32_to_cpu(fe->i_clusters); | 294 | OCFS2_I(inode)->ip_clusters = le32_to_cpu(fe->i_clusters); |
278 | OCFS2_I(inode)->ip_orphaned_slot = OCFS2_INVALID_SLOT; | 295 | OCFS2_I(inode)->ip_orphaned_slot = OCFS2_INVALID_SLOT; |
279 | 296 | OCFS2_I(inode)->ip_attr = le32_to_cpu(fe->i_attr); | |
280 | if (create_ino) | ||
281 | inode->i_ino = ino_from_blkno(inode->i_sb, | ||
282 | le64_to_cpu(fe->i_blkno)); | ||
283 | |||
284 | mlog(0, "blkno = %llu, ino = %lu, create_ino = %s\n", | ||
285 | (unsigned long long)fe->i_blkno, inode->i_ino, create_ino ? "true" : "false"); | ||
286 | 297 | ||
287 | inode->i_nlink = le16_to_cpu(fe->i_links_count); | 298 | inode->i_nlink = le16_to_cpu(fe->i_links_count); |
288 | 299 | ||
300 | if (fe->i_flags & cpu_to_le32(OCFS2_SYSTEM_FL)) | ||
301 | OCFS2_I(inode)->ip_flags |= OCFS2_INODE_SYSTEM_FILE; | ||
302 | |||
289 | if (fe->i_flags & cpu_to_le32(OCFS2_LOCAL_ALLOC_FL)) { | 303 | if (fe->i_flags & cpu_to_le32(OCFS2_LOCAL_ALLOC_FL)) { |
290 | OCFS2_I(inode)->ip_flags |= OCFS2_INODE_BITMAP; | 304 | OCFS2_I(inode)->ip_flags |= OCFS2_INODE_BITMAP; |
291 | mlog(0, "local alloc inode: i_ino=%lu\n", inode->i_ino); | 305 | mlog(0, "local alloc inode: i_ino=%lu\n", inode->i_ino); |
@@ -323,12 +337,31 @@ int ocfs2_populate_inode(struct inode *inode, struct ocfs2_dinode *fe, | |||
323 | break; | 337 | break; |
324 | } | 338 | } |
325 | 339 | ||
340 | if (create_ino) { | ||
341 | inode->i_ino = ino_from_blkno(inode->i_sb, | ||
342 | le64_to_cpu(fe->i_blkno)); | ||
343 | |||
344 | /* | ||
345 | * If we ever want to create system files from kernel, | ||
346 | * the generation argument to | ||
347 | * ocfs2_inode_lock_res_init() will have to change. | ||
348 | */ | ||
349 | BUG_ON(fe->i_flags & cpu_to_le32(OCFS2_SYSTEM_FL)); | ||
350 | |||
351 | ocfs2_inode_lock_res_init(&OCFS2_I(inode)->ip_meta_lockres, | ||
352 | OCFS2_LOCK_TYPE_META, 0, inode); | ||
353 | } | ||
354 | |||
326 | ocfs2_inode_lock_res_init(&OCFS2_I(inode)->ip_rw_lockres, | 355 | ocfs2_inode_lock_res_init(&OCFS2_I(inode)->ip_rw_lockres, |
327 | OCFS2_LOCK_TYPE_RW, inode); | 356 | OCFS2_LOCK_TYPE_RW, inode->i_generation, |
328 | ocfs2_inode_lock_res_init(&OCFS2_I(inode)->ip_meta_lockres, | 357 | inode); |
329 | OCFS2_LOCK_TYPE_META, inode); | 358 | |
330 | ocfs2_inode_lock_res_init(&OCFS2_I(inode)->ip_data_lockres, | 359 | ocfs2_inode_lock_res_init(&OCFS2_I(inode)->ip_data_lockres, |
331 | OCFS2_LOCK_TYPE_DATA, inode); | 360 | OCFS2_LOCK_TYPE_DATA, inode->i_generation, |
361 | inode); | ||
362 | |||
363 | ocfs2_set_inode_flags(inode); | ||
364 | inode->i_flags |= S_NOATIME; | ||
332 | 365 | ||
333 | status = 0; | 366 | status = 0; |
334 | bail: | 367 | bail: |
@@ -343,15 +376,15 @@ static int ocfs2_read_locked_inode(struct inode *inode, | |||
343 | struct ocfs2_super *osb; | 376 | struct ocfs2_super *osb; |
344 | struct ocfs2_dinode *fe; | 377 | struct ocfs2_dinode *fe; |
345 | struct buffer_head *bh = NULL; | 378 | struct buffer_head *bh = NULL; |
346 | int status; | 379 | int status, can_lock; |
347 | int sysfile = 0; | 380 | u32 generation = 0; |
348 | 381 | ||
349 | mlog_entry("(0x%p, 0x%p)\n", inode, args); | 382 | mlog_entry("(0x%p, 0x%p)\n", inode, args); |
350 | 383 | ||
351 | status = -EINVAL; | 384 | status = -EINVAL; |
352 | if (inode == NULL || inode->i_sb == NULL) { | 385 | if (inode == NULL || inode->i_sb == NULL) { |
353 | mlog(ML_ERROR, "bad inode\n"); | 386 | mlog(ML_ERROR, "bad inode\n"); |
354 | goto bail; | 387 | return status; |
355 | } | 388 | } |
356 | sb = inode->i_sb; | 389 | sb = inode->i_sb; |
357 | osb = OCFS2_SB(sb); | 390 | osb = OCFS2_SB(sb); |
@@ -359,50 +392,110 @@ static int ocfs2_read_locked_inode(struct inode *inode, | |||
359 | if (!args) { | 392 | if (!args) { |
360 | mlog(ML_ERROR, "bad inode args\n"); | 393 | mlog(ML_ERROR, "bad inode args\n"); |
361 | make_bad_inode(inode); | 394 | make_bad_inode(inode); |
362 | goto bail; | 395 | return status; |
396 | } | ||
397 | |||
398 | /* | ||
399 | * To improve performance of cold-cache inode stats, we take | ||
400 | * the cluster lock here if possible. | ||
401 | * | ||
402 | * Generally, OCFS2 never trusts the contents of an inode | ||
403 | * unless it's holding a cluster lock, so taking it here isn't | ||
404 | * a correctness issue as much as it is a performance | ||
405 | * improvement. | ||
406 | * | ||
407 | * There are three times when taking the lock is not a good idea: | ||
408 | * | ||
409 | * 1) During startup, before we have initialized the DLM. | ||
410 | * | ||
411 | * 2) If we are reading certain system files which never get | ||
412 | * cluster locks (local alloc, truncate log). | ||
413 | * | ||
414 | * 3) If the process doing the iget() is responsible for | ||
415 | * orphan dir recovery. We're holding the orphan dir lock and | ||
416 | * can get into a deadlock with another process on another | ||
417 | * node in ->delete_inode(). | ||
418 | * | ||
419 | * #1 and #2 can be simply solved by never taking the lock | ||
420 | * here for system files (which are the only type we read | ||
421 | * during mount). It's a heavier approach, but our main | ||
422 | * concern is user-accesible files anyway. | ||
423 | * | ||
424 | * #3 works itself out because we'll eventually take the | ||
425 | * cluster lock before trusting anything anyway. | ||
426 | */ | ||
427 | can_lock = !(args->fi_flags & OCFS2_FI_FLAG_SYSFILE) | ||
428 | && !(args->fi_flags & OCFS2_FI_FLAG_NOLOCK); | ||
429 | |||
430 | /* | ||
431 | * To maintain backwards compatibility with older versions of | ||
432 | * ocfs2-tools, we still store the generation value for system | ||
433 | * files. The only ones that actually matter to userspace are | ||
434 | * the journals, but it's easier and inexpensive to just flag | ||
435 | * all system files similarly. | ||
436 | */ | ||
437 | if (args->fi_flags & OCFS2_FI_FLAG_SYSFILE) | ||
438 | generation = osb->fs_generation; | ||
439 | |||
440 | ocfs2_inode_lock_res_init(&OCFS2_I(inode)->ip_meta_lockres, | ||
441 | OCFS2_LOCK_TYPE_META, | ||
442 | generation, inode); | ||
443 | |||
444 | if (can_lock) { | ||
445 | status = ocfs2_meta_lock(inode, NULL, NULL, 0); | ||
446 | if (status) { | ||
447 | make_bad_inode(inode); | ||
448 | mlog_errno(status); | ||
449 | return status; | ||
450 | } | ||
363 | } | 451 | } |
364 | 452 | ||
365 | /* Read the FE off disk. This is safe because the kernel only | 453 | status = ocfs2_read_block(osb, args->fi_blkno, &bh, 0, |
366 | * does one read_inode2 for a new inode, and if it doesn't | 454 | can_lock ? inode : NULL); |
367 | * exist yet then nobody can be working on it! */ | ||
368 | status = ocfs2_read_block(osb, args->fi_blkno, &bh, 0, NULL); | ||
369 | if (status < 0) { | 455 | if (status < 0) { |
370 | mlog_errno(status); | 456 | mlog_errno(status); |
371 | make_bad_inode(inode); | ||
372 | goto bail; | 457 | goto bail; |
373 | } | 458 | } |
374 | 459 | ||
460 | status = -EINVAL; | ||
375 | fe = (struct ocfs2_dinode *) bh->b_data; | 461 | fe = (struct ocfs2_dinode *) bh->b_data; |
376 | if (!OCFS2_IS_VALID_DINODE(fe)) { | 462 | if (!OCFS2_IS_VALID_DINODE(fe)) { |
377 | mlog(ML_ERROR, "Invalid dinode #%llu: signature = %.*s\n", | 463 | mlog(ML_ERROR, "Invalid dinode #%llu: signature = %.*s\n", |
378 | (unsigned long long)fe->i_blkno, 7, fe->i_signature); | 464 | (unsigned long long)fe->i_blkno, 7, fe->i_signature); |
379 | make_bad_inode(inode); | ||
380 | goto bail; | 465 | goto bail; |
381 | } | 466 | } |
382 | 467 | ||
383 | if (fe->i_flags & cpu_to_le32(OCFS2_SYSTEM_FL)) | 468 | /* |
384 | sysfile = 1; | 469 | * This is a code bug. Right now the caller needs to |
470 | * understand whether it is asking for a system file inode or | ||
471 | * not so the proper lock names can be built. | ||
472 | */ | ||
473 | mlog_bug_on_msg(!!(fe->i_flags & cpu_to_le32(OCFS2_SYSTEM_FL)) != | ||
474 | !!(args->fi_flags & OCFS2_FI_FLAG_SYSFILE), | ||
475 | "Inode %llu: system file state is ambigous\n", | ||
476 | (unsigned long long)args->fi_blkno); | ||
385 | 477 | ||
386 | if (S_ISCHR(le16_to_cpu(fe->i_mode)) || | 478 | if (S_ISCHR(le16_to_cpu(fe->i_mode)) || |
387 | S_ISBLK(le16_to_cpu(fe->i_mode))) | 479 | S_ISBLK(le16_to_cpu(fe->i_mode))) |
388 | inode->i_rdev = huge_decode_dev(le64_to_cpu(fe->id1.dev1.i_rdev)); | 480 | inode->i_rdev = huge_decode_dev(le64_to_cpu(fe->id1.dev1.i_rdev)); |
389 | 481 | ||
390 | status = -EINVAL; | ||
391 | if (ocfs2_populate_inode(inode, fe, 0) < 0) { | 482 | if (ocfs2_populate_inode(inode, fe, 0) < 0) { |
392 | mlog(ML_ERROR, "populate failed! i_blkno=%llu, i_ino=%lu\n", | 483 | mlog(ML_ERROR, "populate failed! i_blkno=%llu, i_ino=%lu\n", |
393 | (unsigned long long)fe->i_blkno, inode->i_ino); | 484 | (unsigned long long)fe->i_blkno, inode->i_ino); |
394 | make_bad_inode(inode); | ||
395 | goto bail; | 485 | goto bail; |
396 | } | 486 | } |
397 | 487 | ||
398 | BUG_ON(args->fi_blkno != le64_to_cpu(fe->i_blkno)); | 488 | BUG_ON(args->fi_blkno != le64_to_cpu(fe->i_blkno)); |
399 | 489 | ||
400 | if (sysfile) | ||
401 | OCFS2_I(inode)->ip_flags |= OCFS2_INODE_SYSTEM_FILE; | ||
402 | |||
403 | status = 0; | 490 | status = 0; |
404 | 491 | ||
405 | bail: | 492 | bail: |
493 | if (can_lock) | ||
494 | ocfs2_meta_unlock(inode, 0); | ||
495 | |||
496 | if (status < 0) | ||
497 | make_bad_inode(inode); | ||
498 | |||
406 | if (args && bh) | 499 | if (args && bh) |
407 | brelse(bh); | 500 | brelse(bh); |
408 | 501 | ||
@@ -875,9 +968,15 @@ void ocfs2_delete_inode(struct inode *inode) | |||
875 | goto bail_unlock_inode; | 968 | goto bail_unlock_inode; |
876 | } | 969 | } |
877 | 970 | ||
878 | /* Mark the inode as successfully deleted. This is important | 971 | /* |
879 | * for ocfs2_clear_inode as it will check this flag and skip | 972 | * Mark the inode as successfully deleted. |
880 | * any checkpointing work */ | 973 | * |
974 | * This is important for ocfs2_clear_inode() as it will check | ||
975 | * this flag and skip any checkpointing work | ||
976 | * | ||
977 | * ocfs2_stuff_meta_lvb() also uses this flag to invalidate | ||
978 | * the LVB for other nodes. | ||
979 | */ | ||
881 | OCFS2_I(inode)->ip_flags |= OCFS2_INODE_DELETED; | 980 | OCFS2_I(inode)->ip_flags |= OCFS2_INODE_DELETED; |
882 | 981 | ||
883 | bail_unlock_inode: | 982 | bail_unlock_inode: |
@@ -1002,12 +1101,10 @@ void ocfs2_drop_inode(struct inode *inode) | |||
1002 | /* Testing ip_orphaned_slot here wouldn't work because we may | 1101 | /* Testing ip_orphaned_slot here wouldn't work because we may |
1003 | * not have gotten a delete_inode vote from any other nodes | 1102 | * not have gotten a delete_inode vote from any other nodes |
1004 | * yet. */ | 1103 | * yet. */ |
1005 | if (oi->ip_flags & OCFS2_INODE_MAYBE_ORPHANED) { | 1104 | if (oi->ip_flags & OCFS2_INODE_MAYBE_ORPHANED) |
1006 | mlog(0, "Inode was orphaned on another node, clearing nlink.\n"); | 1105 | generic_delete_inode(inode); |
1007 | inode->i_nlink = 0; | 1106 | else |
1008 | } | 1107 | generic_drop_inode(inode); |
1009 | |||
1010 | generic_drop_inode(inode); | ||
1011 | 1108 | ||
1012 | mlog_exit_void(); | 1109 | mlog_exit_void(); |
1013 | } | 1110 | } |
@@ -1027,12 +1124,8 @@ struct buffer_head *ocfs2_bread(struct inode *inode, | |||
1027 | u64 p_blkno; | 1124 | u64 p_blkno; |
1028 | int readflags = OCFS2_BH_CACHED; | 1125 | int readflags = OCFS2_BH_CACHED; |
1029 | 1126 | ||
1030 | #if 0 | ||
1031 | /* only turn this on if we know we can deal with read_block | ||
1032 | * returning nothing */ | ||
1033 | if (reada) | 1127 | if (reada) |
1034 | readflags |= OCFS2_BH_READAHEAD; | 1128 | readflags |= OCFS2_BH_READAHEAD; |
1035 | #endif | ||
1036 | 1129 | ||
1037 | if (((u64)block << inode->i_sb->s_blocksize_bits) >= | 1130 | if (((u64)block << inode->i_sb->s_blocksize_bits) >= |
1038 | i_size_read(inode)) { | 1131 | i_size_read(inode)) { |
@@ -1131,6 +1224,7 @@ int ocfs2_mark_inode_dirty(struct ocfs2_journal_handle *handle, | |||
1131 | 1224 | ||
1132 | spin_lock(&OCFS2_I(inode)->ip_lock); | 1225 | spin_lock(&OCFS2_I(inode)->ip_lock); |
1133 | fe->i_clusters = cpu_to_le32(OCFS2_I(inode)->ip_clusters); | 1226 | fe->i_clusters = cpu_to_le32(OCFS2_I(inode)->ip_clusters); |
1227 | fe->i_attr = cpu_to_le32(OCFS2_I(inode)->ip_attr); | ||
1134 | spin_unlock(&OCFS2_I(inode)->ip_lock); | 1228 | spin_unlock(&OCFS2_I(inode)->ip_lock); |
1135 | 1229 | ||
1136 | fe->i_size = cpu_to_le64(i_size_read(inode)); | 1230 | fe->i_size = cpu_to_le64(i_size_read(inode)); |
@@ -1169,6 +1263,8 @@ void ocfs2_refresh_inode(struct inode *inode, | |||
1169 | spin_lock(&OCFS2_I(inode)->ip_lock); | 1263 | spin_lock(&OCFS2_I(inode)->ip_lock); |
1170 | 1264 | ||
1171 | OCFS2_I(inode)->ip_clusters = le32_to_cpu(fe->i_clusters); | 1265 | OCFS2_I(inode)->ip_clusters = le32_to_cpu(fe->i_clusters); |
1266 | OCFS2_I(inode)->ip_attr = le32_to_cpu(fe->i_attr); | ||
1267 | ocfs2_set_inode_flags(inode); | ||
1172 | i_size_write(inode, le64_to_cpu(fe->i_size)); | 1268 | i_size_write(inode, le64_to_cpu(fe->i_size)); |
1173 | inode->i_nlink = le16_to_cpu(fe->i_links_count); | 1269 | inode->i_nlink = le16_to_cpu(fe->i_links_count); |
1174 | inode->i_uid = le32_to_cpu(fe->i_uid); | 1270 | inode->i_uid = le32_to_cpu(fe->i_uid); |