aboutsummaryrefslogtreecommitdiffstats
path: root/fs/inode.c
diff options
context:
space:
mode:
Diffstat (limited to 'fs/inode.c')
-rw-r--r--fs/inode.c124
1 files changed, 76 insertions, 48 deletions
diff --git a/fs/inode.c b/fs/inode.c
index b2ba83d2c4e1..4d8e3be55976 100644
--- a/fs/inode.c
+++ b/fs/inode.c
@@ -14,6 +14,7 @@
14#include <linux/module.h> 14#include <linux/module.h>
15#include <linux/backing-dev.h> 15#include <linux/backing-dev.h>
16#include <linux/wait.h> 16#include <linux/wait.h>
17#include <linux/rwsem.h>
17#include <linux/hash.h> 18#include <linux/hash.h>
18#include <linux/swap.h> 19#include <linux/swap.h>
19#include <linux/security.h> 20#include <linux/security.h>
@@ -87,14 +88,18 @@ static struct hlist_head *inode_hashtable __read_mostly;
87DEFINE_SPINLOCK(inode_lock); 88DEFINE_SPINLOCK(inode_lock);
88 89
89/* 90/*
90 * iprune_mutex provides exclusion between the kswapd or try_to_free_pages 91 * iprune_sem provides exclusion between the kswapd or try_to_free_pages
91 * icache shrinking path, and the umount path. Without this exclusion, 92 * icache shrinking path, and the umount path. Without this exclusion,
92 * by the time prune_icache calls iput for the inode whose pages it has 93 * by the time prune_icache calls iput for the inode whose pages it has
93 * been invalidating, or by the time it calls clear_inode & destroy_inode 94 * been invalidating, or by the time it calls clear_inode & destroy_inode
94 * from its final dispose_list, the struct super_block they refer to 95 * from its final dispose_list, the struct super_block they refer to
95 * (for inode->i_sb->s_op) may already have been freed and reused. 96 * (for inode->i_sb->s_op) may already have been freed and reused.
97 *
98 * We make this an rwsem because the fastpath is icache shrinking. In
99 * some cases a filesystem may be doing a significant amount of work in
100 * its inode reclaim code, so this should improve parallelism.
96 */ 101 */
97static DEFINE_MUTEX(iprune_mutex); 102static DECLARE_RWSEM(iprune_sem);
98 103
99/* 104/*
100 * Statistics gathering.. 105 * Statistics gathering..
@@ -123,7 +128,7 @@ static void wake_up_inode(struct inode *inode)
123int inode_init_always(struct super_block *sb, struct inode *inode) 128int inode_init_always(struct super_block *sb, struct inode *inode)
124{ 129{
125 static const struct address_space_operations empty_aops; 130 static const struct address_space_operations empty_aops;
126 static struct inode_operations empty_iops; 131 static const struct inode_operations empty_iops;
127 static const struct file_operations empty_fops; 132 static const struct file_operations empty_fops;
128 struct address_space *const mapping = &inode->i_data; 133 struct address_space *const mapping = &inode->i_data;
129 134
@@ -381,7 +386,7 @@ static int invalidate_list(struct list_head *head, struct list_head *dispose)
381 /* 386 /*
382 * We can reschedule here without worrying about the list's 387 * We can reschedule here without worrying about the list's
383 * consistency because the per-sb list of inodes must not 388 * consistency because the per-sb list of inodes must not
384 * change during umount anymore, and because iprune_mutex keeps 389 * change during umount anymore, and because iprune_sem keeps
385 * shrink_icache_memory() away. 390 * shrink_icache_memory() away.
386 */ 391 */
387 cond_resched_lock(&inode_lock); 392 cond_resched_lock(&inode_lock);
@@ -420,7 +425,7 @@ int invalidate_inodes(struct super_block *sb)
420 int busy; 425 int busy;
421 LIST_HEAD(throw_away); 426 LIST_HEAD(throw_away);
422 427
423 mutex_lock(&iprune_mutex); 428 down_write(&iprune_sem);
424 spin_lock(&inode_lock); 429 spin_lock(&inode_lock);
425 inotify_unmount_inodes(&sb->s_inodes); 430 inotify_unmount_inodes(&sb->s_inodes);
426 fsnotify_unmount_inodes(&sb->s_inodes); 431 fsnotify_unmount_inodes(&sb->s_inodes);
@@ -428,7 +433,7 @@ int invalidate_inodes(struct super_block *sb)
428 spin_unlock(&inode_lock); 433 spin_unlock(&inode_lock);
429 434
430 dispose_list(&throw_away); 435 dispose_list(&throw_away);
431 mutex_unlock(&iprune_mutex); 436 up_write(&iprune_sem);
432 437
433 return busy; 438 return busy;
434} 439}
@@ -467,7 +472,7 @@ static void prune_icache(int nr_to_scan)
467 int nr_scanned; 472 int nr_scanned;
468 unsigned long reap = 0; 473 unsigned long reap = 0;
469 474
470 mutex_lock(&iprune_mutex); 475 down_read(&iprune_sem);
471 spin_lock(&inode_lock); 476 spin_lock(&inode_lock);
472 for (nr_scanned = 0; nr_scanned < nr_to_scan; nr_scanned++) { 477 for (nr_scanned = 0; nr_scanned < nr_to_scan; nr_scanned++) {
473 struct inode *inode; 478 struct inode *inode;
@@ -509,7 +514,7 @@ static void prune_icache(int nr_to_scan)
509 spin_unlock(&inode_lock); 514 spin_unlock(&inode_lock);
510 515
511 dispose_list(&freeable); 516 dispose_list(&freeable);
512 mutex_unlock(&iprune_mutex); 517 up_read(&iprune_sem);
513} 518}
514 519
515/* 520/*
@@ -695,13 +700,15 @@ void unlock_new_inode(struct inode *inode)
695 } 700 }
696#endif 701#endif
697 /* 702 /*
698 * This is special! We do not need the spinlock 703 * This is special! We do not need the spinlock when clearing I_LOCK,
699 * when clearing I_LOCK, because we're guaranteed 704 * because we're guaranteed that nobody else tries to do anything about
700 * that nobody else tries to do anything about the 705 * the state of the inode when it is locked, as we just created it (so
701 * state of the inode when it is locked, as we 706 * there can be no old holders that haven't tested I_LOCK).
702 * just created it (so there can be no old holders 707 * However we must emit the memory barrier so that other CPUs reliably
703 * that haven't tested I_LOCK). 708 * see the clearing of I_LOCK after the other inode initialisation has
709 * completed.
704 */ 710 */
711 smp_mb();
705 WARN_ON((inode->i_state & (I_LOCK|I_NEW)) != (I_LOCK|I_NEW)); 712 WARN_ON((inode->i_state & (I_LOCK|I_NEW)) != (I_LOCK|I_NEW));
706 inode->i_state &= ~(I_LOCK|I_NEW); 713 inode->i_state &= ~(I_LOCK|I_NEW);
707 wake_up_inode(inode); 714 wake_up_inode(inode);
@@ -1234,7 +1241,16 @@ void generic_delete_inode(struct inode *inode)
1234} 1241}
1235EXPORT_SYMBOL(generic_delete_inode); 1242EXPORT_SYMBOL(generic_delete_inode);
1236 1243
1237static void generic_forget_inode(struct inode *inode) 1244/**
1245 * generic_detach_inode - remove inode from inode lists
1246 * @inode: inode to remove
1247 *
1248 * Remove inode from inode lists, write it if it's dirty. This is just an
1249 * internal VFS helper exported for hugetlbfs. Do not use!
1250 *
1251 * Returns 1 if inode should be completely destroyed.
1252 */
1253int generic_detach_inode(struct inode *inode)
1238{ 1254{
1239 struct super_block *sb = inode->i_sb; 1255 struct super_block *sb = inode->i_sb;
1240 1256
@@ -1244,7 +1260,7 @@ static void generic_forget_inode(struct inode *inode)
1244 inodes_stat.nr_unused++; 1260 inodes_stat.nr_unused++;
1245 if (sb->s_flags & MS_ACTIVE) { 1261 if (sb->s_flags & MS_ACTIVE) {
1246 spin_unlock(&inode_lock); 1262 spin_unlock(&inode_lock);
1247 return; 1263 return 0;
1248 } 1264 }
1249 WARN_ON(inode->i_state & I_NEW); 1265 WARN_ON(inode->i_state & I_NEW);
1250 inode->i_state |= I_WILL_FREE; 1266 inode->i_state |= I_WILL_FREE;
@@ -1262,6 +1278,14 @@ static void generic_forget_inode(struct inode *inode)
1262 inode->i_state |= I_FREEING; 1278 inode->i_state |= I_FREEING;
1263 inodes_stat.nr_inodes--; 1279 inodes_stat.nr_inodes--;
1264 spin_unlock(&inode_lock); 1280 spin_unlock(&inode_lock);
1281 return 1;
1282}
1283EXPORT_SYMBOL_GPL(generic_detach_inode);
1284
1285static void generic_forget_inode(struct inode *inode)
1286{
1287 if (!generic_detach_inode(inode))
1288 return;
1265 if (inode->i_data.nrpages) 1289 if (inode->i_data.nrpages)
1266 truncate_inode_pages(&inode->i_data, 0); 1290 truncate_inode_pages(&inode->i_data, 0);
1267 clear_inode(inode); 1291 clear_inode(inode);
@@ -1392,31 +1416,31 @@ void touch_atime(struct vfsmount *mnt, struct dentry *dentry)
1392 struct inode *inode = dentry->d_inode; 1416 struct inode *inode = dentry->d_inode;
1393 struct timespec now; 1417 struct timespec now;
1394 1418
1395 if (mnt_want_write(mnt))
1396 return;
1397 if (inode->i_flags & S_NOATIME) 1419 if (inode->i_flags & S_NOATIME)
1398 goto out; 1420 return;
1399 if (IS_NOATIME(inode)) 1421 if (IS_NOATIME(inode))
1400 goto out; 1422 return;
1401 if ((inode->i_sb->s_flags & MS_NODIRATIME) && S_ISDIR(inode->i_mode)) 1423 if ((inode->i_sb->s_flags & MS_NODIRATIME) && S_ISDIR(inode->i_mode))
1402 goto out; 1424 return;
1403 1425
1404 if (mnt->mnt_flags & MNT_NOATIME) 1426 if (mnt->mnt_flags & MNT_NOATIME)
1405 goto out; 1427 return;
1406 if ((mnt->mnt_flags & MNT_NODIRATIME) && S_ISDIR(inode->i_mode)) 1428 if ((mnt->mnt_flags & MNT_NODIRATIME) && S_ISDIR(inode->i_mode))
1407 goto out; 1429 return;
1408 1430
1409 now = current_fs_time(inode->i_sb); 1431 now = current_fs_time(inode->i_sb);
1410 1432
1411 if (!relatime_need_update(mnt, inode, now)) 1433 if (!relatime_need_update(mnt, inode, now))
1412 goto out; 1434 return;
1413 1435
1414 if (timespec_equal(&inode->i_atime, &now)) 1436 if (timespec_equal(&inode->i_atime, &now))
1415 goto out; 1437 return;
1438
1439 if (mnt_want_write(mnt))
1440 return;
1416 1441
1417 inode->i_atime = now; 1442 inode->i_atime = now;
1418 mark_inode_dirty_sync(inode); 1443 mark_inode_dirty_sync(inode);
1419out:
1420 mnt_drop_write(mnt); 1444 mnt_drop_write(mnt);
1421} 1445}
1422EXPORT_SYMBOL(touch_atime); 1446EXPORT_SYMBOL(touch_atime);
@@ -1437,34 +1461,37 @@ void file_update_time(struct file *file)
1437{ 1461{
1438 struct inode *inode = file->f_path.dentry->d_inode; 1462 struct inode *inode = file->f_path.dentry->d_inode;
1439 struct timespec now; 1463 struct timespec now;
1440 int sync_it = 0; 1464 enum { S_MTIME = 1, S_CTIME = 2, S_VERSION = 4 } sync_it = 0;
1441 int err;
1442 1465
1466 /* First try to exhaust all avenues to not sync */
1443 if (IS_NOCMTIME(inode)) 1467 if (IS_NOCMTIME(inode))
1444 return; 1468 return;
1445 1469
1446 err = mnt_want_write_file(file);
1447 if (err)
1448 return;
1449
1450 now = current_fs_time(inode->i_sb); 1470 now = current_fs_time(inode->i_sb);
1451 if (!timespec_equal(&inode->i_mtime, &now)) { 1471 if (!timespec_equal(&inode->i_mtime, &now))
1452 inode->i_mtime = now; 1472 sync_it = S_MTIME;
1453 sync_it = 1;
1454 }
1455 1473
1456 if (!timespec_equal(&inode->i_ctime, &now)) { 1474 if (!timespec_equal(&inode->i_ctime, &now))
1457 inode->i_ctime = now; 1475 sync_it |= S_CTIME;
1458 sync_it = 1;
1459 }
1460 1476
1461 if (IS_I_VERSION(inode)) { 1477 if (IS_I_VERSION(inode))
1462 inode_inc_iversion(inode); 1478 sync_it |= S_VERSION;
1463 sync_it = 1; 1479
1464 } 1480 if (!sync_it)
1481 return;
1465 1482
1466 if (sync_it) 1483 /* Finally allowed to write? Takes lock. */
1467 mark_inode_dirty_sync(inode); 1484 if (mnt_want_write_file(file))
1485 return;
1486
1487 /* Only change inode inside the lock region */
1488 if (sync_it & S_VERSION)
1489 inode_inc_iversion(inode);
1490 if (sync_it & S_CTIME)
1491 inode->i_ctime = now;
1492 if (sync_it & S_MTIME)
1493 inode->i_mtime = now;
1494 mark_inode_dirty_sync(inode);
1468 mnt_drop_write(file->f_path.mnt); 1495 mnt_drop_write(file->f_path.mnt);
1469} 1496}
1470EXPORT_SYMBOL(file_update_time); 1497EXPORT_SYMBOL(file_update_time);
@@ -1592,7 +1619,8 @@ void init_special_inode(struct inode *inode, umode_t mode, dev_t rdev)
1592 else if (S_ISSOCK(mode)) 1619 else if (S_ISSOCK(mode))
1593 inode->i_fop = &bad_sock_fops; 1620 inode->i_fop = &bad_sock_fops;
1594 else 1621 else
1595 printk(KERN_DEBUG "init_special_inode: bogus i_mode (%o)\n", 1622 printk(KERN_DEBUG "init_special_inode: bogus i_mode (%o) for"
1596 mode); 1623 " inode %s:%lu\n", mode, inode->i_sb->s_id,
1624 inode->i_ino);
1597} 1625}
1598EXPORT_SYMBOL(init_special_inode); 1626EXPORT_SYMBOL(init_special_inode);