diff options
Diffstat (limited to 'fs/inode.c')
-rw-r--r-- | fs/inode.c | 124 |
1 files changed, 76 insertions, 48 deletions
diff --git a/fs/inode.c b/fs/inode.c index b2ba83d2c4e1..4d8e3be55976 100644 --- a/fs/inode.c +++ b/fs/inode.c | |||
@@ -14,6 +14,7 @@ | |||
14 | #include <linux/module.h> | 14 | #include <linux/module.h> |
15 | #include <linux/backing-dev.h> | 15 | #include <linux/backing-dev.h> |
16 | #include <linux/wait.h> | 16 | #include <linux/wait.h> |
17 | #include <linux/rwsem.h> | ||
17 | #include <linux/hash.h> | 18 | #include <linux/hash.h> |
18 | #include <linux/swap.h> | 19 | #include <linux/swap.h> |
19 | #include <linux/security.h> | 20 | #include <linux/security.h> |
@@ -87,14 +88,18 @@ static struct hlist_head *inode_hashtable __read_mostly; | |||
87 | DEFINE_SPINLOCK(inode_lock); | 88 | DEFINE_SPINLOCK(inode_lock); |
88 | 89 | ||
89 | /* | 90 | /* |
90 | * iprune_mutex provides exclusion between the kswapd or try_to_free_pages | 91 | * iprune_sem provides exclusion between the kswapd or try_to_free_pages |
91 | * icache shrinking path, and the umount path. Without this exclusion, | 92 | * icache shrinking path, and the umount path. Without this exclusion, |
92 | * by the time prune_icache calls iput for the inode whose pages it has | 93 | * by the time prune_icache calls iput for the inode whose pages it has |
93 | * been invalidating, or by the time it calls clear_inode & destroy_inode | 94 | * been invalidating, or by the time it calls clear_inode & destroy_inode |
94 | * from its final dispose_list, the struct super_block they refer to | 95 | * from its final dispose_list, the struct super_block they refer to |
95 | * (for inode->i_sb->s_op) may already have been freed and reused. | 96 | * (for inode->i_sb->s_op) may already have been freed and reused. |
97 | * | ||
98 | * We make this an rwsem because the fastpath is icache shrinking. In | ||
99 | * some cases a filesystem may be doing a significant amount of work in | ||
100 | * its inode reclaim code, so this should improve parallelism. | ||
96 | */ | 101 | */ |
97 | static DEFINE_MUTEX(iprune_mutex); | 102 | static DECLARE_RWSEM(iprune_sem); |
98 | 103 | ||
99 | /* | 104 | /* |
100 | * Statistics gathering.. | 105 | * Statistics gathering.. |
@@ -123,7 +128,7 @@ static void wake_up_inode(struct inode *inode) | |||
123 | int inode_init_always(struct super_block *sb, struct inode *inode) | 128 | int inode_init_always(struct super_block *sb, struct inode *inode) |
124 | { | 129 | { |
125 | static const struct address_space_operations empty_aops; | 130 | static const struct address_space_operations empty_aops; |
126 | static struct inode_operations empty_iops; | 131 | static const struct inode_operations empty_iops; |
127 | static const struct file_operations empty_fops; | 132 | static const struct file_operations empty_fops; |
128 | struct address_space *const mapping = &inode->i_data; | 133 | struct address_space *const mapping = &inode->i_data; |
129 | 134 | ||
@@ -381,7 +386,7 @@ static int invalidate_list(struct list_head *head, struct list_head *dispose) | |||
381 | /* | 386 | /* |
382 | * We can reschedule here without worrying about the list's | 387 | * We can reschedule here without worrying about the list's |
383 | * consistency because the per-sb list of inodes must not | 388 | * consistency because the per-sb list of inodes must not |
384 | * change during umount anymore, and because iprune_mutex keeps | 389 | * change during umount anymore, and because iprune_sem keeps |
385 | * shrink_icache_memory() away. | 390 | * shrink_icache_memory() away. |
386 | */ | 391 | */ |
387 | cond_resched_lock(&inode_lock); | 392 | cond_resched_lock(&inode_lock); |
@@ -420,7 +425,7 @@ int invalidate_inodes(struct super_block *sb) | |||
420 | int busy; | 425 | int busy; |
421 | LIST_HEAD(throw_away); | 426 | LIST_HEAD(throw_away); |
422 | 427 | ||
423 | mutex_lock(&iprune_mutex); | 428 | down_write(&iprune_sem); |
424 | spin_lock(&inode_lock); | 429 | spin_lock(&inode_lock); |
425 | inotify_unmount_inodes(&sb->s_inodes); | 430 | inotify_unmount_inodes(&sb->s_inodes); |
426 | fsnotify_unmount_inodes(&sb->s_inodes); | 431 | fsnotify_unmount_inodes(&sb->s_inodes); |
@@ -428,7 +433,7 @@ int invalidate_inodes(struct super_block *sb) | |||
428 | spin_unlock(&inode_lock); | 433 | spin_unlock(&inode_lock); |
429 | 434 | ||
430 | dispose_list(&throw_away); | 435 | dispose_list(&throw_away); |
431 | mutex_unlock(&iprune_mutex); | 436 | up_write(&iprune_sem); |
432 | 437 | ||
433 | return busy; | 438 | return busy; |
434 | } | 439 | } |
@@ -467,7 +472,7 @@ static void prune_icache(int nr_to_scan) | |||
467 | int nr_scanned; | 472 | int nr_scanned; |
468 | unsigned long reap = 0; | 473 | unsigned long reap = 0; |
469 | 474 | ||
470 | mutex_lock(&iprune_mutex); | 475 | down_read(&iprune_sem); |
471 | spin_lock(&inode_lock); | 476 | spin_lock(&inode_lock); |
472 | for (nr_scanned = 0; nr_scanned < nr_to_scan; nr_scanned++) { | 477 | for (nr_scanned = 0; nr_scanned < nr_to_scan; nr_scanned++) { |
473 | struct inode *inode; | 478 | struct inode *inode; |
@@ -509,7 +514,7 @@ static void prune_icache(int nr_to_scan) | |||
509 | spin_unlock(&inode_lock); | 514 | spin_unlock(&inode_lock); |
510 | 515 | ||
511 | dispose_list(&freeable); | 516 | dispose_list(&freeable); |
512 | mutex_unlock(&iprune_mutex); | 517 | up_read(&iprune_sem); |
513 | } | 518 | } |
514 | 519 | ||
515 | /* | 520 | /* |
@@ -695,13 +700,15 @@ void unlock_new_inode(struct inode *inode) | |||
695 | } | 700 | } |
696 | #endif | 701 | #endif |
697 | /* | 702 | /* |
698 | * This is special! We do not need the spinlock | 703 | * This is special! We do not need the spinlock when clearing I_LOCK, |
699 | * when clearing I_LOCK, because we're guaranteed | 704 | * because we're guaranteed that nobody else tries to do anything about |
700 | * that nobody else tries to do anything about the | 705 | * the state of the inode when it is locked, as we just created it (so |
701 | * state of the inode when it is locked, as we | 706 | * there can be no old holders that haven't tested I_LOCK). |
702 | * just created it (so there can be no old holders | 707 | * However we must emit the memory barrier so that other CPUs reliably |
703 | * that haven't tested I_LOCK). | 708 | * see the clearing of I_LOCK after the other inode initialisation has |
709 | * completed. | ||
704 | */ | 710 | */ |
711 | smp_mb(); | ||
705 | WARN_ON((inode->i_state & (I_LOCK|I_NEW)) != (I_LOCK|I_NEW)); | 712 | WARN_ON((inode->i_state & (I_LOCK|I_NEW)) != (I_LOCK|I_NEW)); |
706 | inode->i_state &= ~(I_LOCK|I_NEW); | 713 | inode->i_state &= ~(I_LOCK|I_NEW); |
707 | wake_up_inode(inode); | 714 | wake_up_inode(inode); |
@@ -1234,7 +1241,16 @@ void generic_delete_inode(struct inode *inode) | |||
1234 | } | 1241 | } |
1235 | EXPORT_SYMBOL(generic_delete_inode); | 1242 | EXPORT_SYMBOL(generic_delete_inode); |
1236 | 1243 | ||
1237 | static void generic_forget_inode(struct inode *inode) | 1244 | /** |
1245 | * generic_detach_inode - remove inode from inode lists | ||
1246 | * @inode: inode to remove | ||
1247 | * | ||
1248 | * Remove inode from inode lists, write it if it's dirty. This is just an | ||
1249 | * internal VFS helper exported for hugetlbfs. Do not use! | ||
1250 | * | ||
1251 | * Returns 1 if inode should be completely destroyed. | ||
1252 | */ | ||
1253 | int generic_detach_inode(struct inode *inode) | ||
1238 | { | 1254 | { |
1239 | struct super_block *sb = inode->i_sb; | 1255 | struct super_block *sb = inode->i_sb; |
1240 | 1256 | ||
@@ -1244,7 +1260,7 @@ static void generic_forget_inode(struct inode *inode) | |||
1244 | inodes_stat.nr_unused++; | 1260 | inodes_stat.nr_unused++; |
1245 | if (sb->s_flags & MS_ACTIVE) { | 1261 | if (sb->s_flags & MS_ACTIVE) { |
1246 | spin_unlock(&inode_lock); | 1262 | spin_unlock(&inode_lock); |
1247 | return; | 1263 | return 0; |
1248 | } | 1264 | } |
1249 | WARN_ON(inode->i_state & I_NEW); | 1265 | WARN_ON(inode->i_state & I_NEW); |
1250 | inode->i_state |= I_WILL_FREE; | 1266 | inode->i_state |= I_WILL_FREE; |
@@ -1262,6 +1278,14 @@ static void generic_forget_inode(struct inode *inode) | |||
1262 | inode->i_state |= I_FREEING; | 1278 | inode->i_state |= I_FREEING; |
1263 | inodes_stat.nr_inodes--; | 1279 | inodes_stat.nr_inodes--; |
1264 | spin_unlock(&inode_lock); | 1280 | spin_unlock(&inode_lock); |
1281 | return 1; | ||
1282 | } | ||
1283 | EXPORT_SYMBOL_GPL(generic_detach_inode); | ||
1284 | |||
1285 | static void generic_forget_inode(struct inode *inode) | ||
1286 | { | ||
1287 | if (!generic_detach_inode(inode)) | ||
1288 | return; | ||
1265 | if (inode->i_data.nrpages) | 1289 | if (inode->i_data.nrpages) |
1266 | truncate_inode_pages(&inode->i_data, 0); | 1290 | truncate_inode_pages(&inode->i_data, 0); |
1267 | clear_inode(inode); | 1291 | clear_inode(inode); |
@@ -1392,31 +1416,31 @@ void touch_atime(struct vfsmount *mnt, struct dentry *dentry) | |||
1392 | struct inode *inode = dentry->d_inode; | 1416 | struct inode *inode = dentry->d_inode; |
1393 | struct timespec now; | 1417 | struct timespec now; |
1394 | 1418 | ||
1395 | if (mnt_want_write(mnt)) | ||
1396 | return; | ||
1397 | if (inode->i_flags & S_NOATIME) | 1419 | if (inode->i_flags & S_NOATIME) |
1398 | goto out; | 1420 | return; |
1399 | if (IS_NOATIME(inode)) | 1421 | if (IS_NOATIME(inode)) |
1400 | goto out; | 1422 | return; |
1401 | if ((inode->i_sb->s_flags & MS_NODIRATIME) && S_ISDIR(inode->i_mode)) | 1423 | if ((inode->i_sb->s_flags & MS_NODIRATIME) && S_ISDIR(inode->i_mode)) |
1402 | goto out; | 1424 | return; |
1403 | 1425 | ||
1404 | if (mnt->mnt_flags & MNT_NOATIME) | 1426 | if (mnt->mnt_flags & MNT_NOATIME) |
1405 | goto out; | 1427 | return; |
1406 | if ((mnt->mnt_flags & MNT_NODIRATIME) && S_ISDIR(inode->i_mode)) | 1428 | if ((mnt->mnt_flags & MNT_NODIRATIME) && S_ISDIR(inode->i_mode)) |
1407 | goto out; | 1429 | return; |
1408 | 1430 | ||
1409 | now = current_fs_time(inode->i_sb); | 1431 | now = current_fs_time(inode->i_sb); |
1410 | 1432 | ||
1411 | if (!relatime_need_update(mnt, inode, now)) | 1433 | if (!relatime_need_update(mnt, inode, now)) |
1412 | goto out; | 1434 | return; |
1413 | 1435 | ||
1414 | if (timespec_equal(&inode->i_atime, &now)) | 1436 | if (timespec_equal(&inode->i_atime, &now)) |
1415 | goto out; | 1437 | return; |
1438 | |||
1439 | if (mnt_want_write(mnt)) | ||
1440 | return; | ||
1416 | 1441 | ||
1417 | inode->i_atime = now; | 1442 | inode->i_atime = now; |
1418 | mark_inode_dirty_sync(inode); | 1443 | mark_inode_dirty_sync(inode); |
1419 | out: | ||
1420 | mnt_drop_write(mnt); | 1444 | mnt_drop_write(mnt); |
1421 | } | 1445 | } |
1422 | EXPORT_SYMBOL(touch_atime); | 1446 | EXPORT_SYMBOL(touch_atime); |
@@ -1437,34 +1461,37 @@ void file_update_time(struct file *file) | |||
1437 | { | 1461 | { |
1438 | struct inode *inode = file->f_path.dentry->d_inode; | 1462 | struct inode *inode = file->f_path.dentry->d_inode; |
1439 | struct timespec now; | 1463 | struct timespec now; |
1440 | int sync_it = 0; | 1464 | enum { S_MTIME = 1, S_CTIME = 2, S_VERSION = 4 } sync_it = 0; |
1441 | int err; | ||
1442 | 1465 | ||
1466 | /* First try to exhaust all avenues to not sync */ | ||
1443 | if (IS_NOCMTIME(inode)) | 1467 | if (IS_NOCMTIME(inode)) |
1444 | return; | 1468 | return; |
1445 | 1469 | ||
1446 | err = mnt_want_write_file(file); | ||
1447 | if (err) | ||
1448 | return; | ||
1449 | |||
1450 | now = current_fs_time(inode->i_sb); | 1470 | now = current_fs_time(inode->i_sb); |
1451 | if (!timespec_equal(&inode->i_mtime, &now)) { | 1471 | if (!timespec_equal(&inode->i_mtime, &now)) |
1452 | inode->i_mtime = now; | 1472 | sync_it = S_MTIME; |
1453 | sync_it = 1; | ||
1454 | } | ||
1455 | 1473 | ||
1456 | if (!timespec_equal(&inode->i_ctime, &now)) { | 1474 | if (!timespec_equal(&inode->i_ctime, &now)) |
1457 | inode->i_ctime = now; | 1475 | sync_it |= S_CTIME; |
1458 | sync_it = 1; | ||
1459 | } | ||
1460 | 1476 | ||
1461 | if (IS_I_VERSION(inode)) { | 1477 | if (IS_I_VERSION(inode)) |
1462 | inode_inc_iversion(inode); | 1478 | sync_it |= S_VERSION; |
1463 | sync_it = 1; | 1479 | |
1464 | } | 1480 | if (!sync_it) |
1481 | return; | ||
1465 | 1482 | ||
1466 | if (sync_it) | 1483 | /* Finally allowed to write? Takes lock. */ |
1467 | mark_inode_dirty_sync(inode); | 1484 | if (mnt_want_write_file(file)) |
1485 | return; | ||
1486 | |||
1487 | /* Only change inode inside the lock region */ | ||
1488 | if (sync_it & S_VERSION) | ||
1489 | inode_inc_iversion(inode); | ||
1490 | if (sync_it & S_CTIME) | ||
1491 | inode->i_ctime = now; | ||
1492 | if (sync_it & S_MTIME) | ||
1493 | inode->i_mtime = now; | ||
1494 | mark_inode_dirty_sync(inode); | ||
1468 | mnt_drop_write(file->f_path.mnt); | 1495 | mnt_drop_write(file->f_path.mnt); |
1469 | } | 1496 | } |
1470 | EXPORT_SYMBOL(file_update_time); | 1497 | EXPORT_SYMBOL(file_update_time); |
@@ -1592,7 +1619,8 @@ void init_special_inode(struct inode *inode, umode_t mode, dev_t rdev) | |||
1592 | else if (S_ISSOCK(mode)) | 1619 | else if (S_ISSOCK(mode)) |
1593 | inode->i_fop = &bad_sock_fops; | 1620 | inode->i_fop = &bad_sock_fops; |
1594 | else | 1621 | else |
1595 | printk(KERN_DEBUG "init_special_inode: bogus i_mode (%o)\n", | 1622 | printk(KERN_DEBUG "init_special_inode: bogus i_mode (%o) for" |
1596 | mode); | 1623 | " inode %s:%lu\n", mode, inode->i_sb->s_id, |
1624 | inode->i_ino); | ||
1597 | } | 1625 | } |
1598 | EXPORT_SYMBOL(init_special_inode); | 1626 | EXPORT_SYMBOL(init_special_inode); |