aboutsummaryrefslogtreecommitdiffstats
path: root/fs/inode.c
diff options
context:
space:
mode:
Diffstat (limited to 'fs/inode.c')
-rw-r--r--fs/inode.c128
1 files changed, 77 insertions, 51 deletions
diff --git a/fs/inode.c b/fs/inode.c
index ae7b67e48661..4d8e3be55976 100644
--- a/fs/inode.c
+++ b/fs/inode.c
@@ -14,6 +14,7 @@
14#include <linux/module.h> 14#include <linux/module.h>
15#include <linux/backing-dev.h> 15#include <linux/backing-dev.h>
16#include <linux/wait.h> 16#include <linux/wait.h>
17#include <linux/rwsem.h>
17#include <linux/hash.h> 18#include <linux/hash.h>
18#include <linux/swap.h> 19#include <linux/swap.h>
19#include <linux/security.h> 20#include <linux/security.h>
@@ -87,14 +88,18 @@ static struct hlist_head *inode_hashtable __read_mostly;
87DEFINE_SPINLOCK(inode_lock); 88DEFINE_SPINLOCK(inode_lock);
88 89
89/* 90/*
90 * iprune_mutex provides exclusion between the kswapd or try_to_free_pages 91 * iprune_sem provides exclusion between the kswapd or try_to_free_pages
91 * icache shrinking path, and the umount path. Without this exclusion, 92 * icache shrinking path, and the umount path. Without this exclusion,
92 * by the time prune_icache calls iput for the inode whose pages it has 93 * by the time prune_icache calls iput for the inode whose pages it has
93 * been invalidating, or by the time it calls clear_inode & destroy_inode 94 * been invalidating, or by the time it calls clear_inode & destroy_inode
94 * from its final dispose_list, the struct super_block they refer to 95 * from its final dispose_list, the struct super_block they refer to
95 * (for inode->i_sb->s_op) may already have been freed and reused. 96 * (for inode->i_sb->s_op) may already have been freed and reused.
97 *
98 * We make this an rwsem because the fastpath is icache shrinking. In
99 * some cases a filesystem may be doing a significant amount of work in
100 * its inode reclaim code, so this should improve parallelism.
96 */ 101 */
97static DEFINE_MUTEX(iprune_mutex); 102static DECLARE_RWSEM(iprune_sem);
98 103
99/* 104/*
100 * Statistics gathering.. 105 * Statistics gathering..
@@ -123,7 +128,7 @@ static void wake_up_inode(struct inode *inode)
123int inode_init_always(struct super_block *sb, struct inode *inode) 128int inode_init_always(struct super_block *sb, struct inode *inode)
124{ 129{
125 static const struct address_space_operations empty_aops; 130 static const struct address_space_operations empty_aops;
126 static struct inode_operations empty_iops; 131 static const struct inode_operations empty_iops;
127 static const struct file_operations empty_fops; 132 static const struct file_operations empty_fops;
128 struct address_space *const mapping = &inode->i_data; 133 struct address_space *const mapping = &inode->i_data;
129 134
@@ -182,9 +187,7 @@ int inode_init_always(struct super_block *sb, struct inode *inode)
182 if (sb->s_bdev) { 187 if (sb->s_bdev) {
183 struct backing_dev_info *bdi; 188 struct backing_dev_info *bdi;
184 189
185 bdi = sb->s_bdev->bd_inode_backing_dev_info; 190 bdi = sb->s_bdev->bd_inode->i_mapping->backing_dev_info;
186 if (!bdi)
187 bdi = sb->s_bdev->bd_inode->i_mapping->backing_dev_info;
188 mapping->backing_dev_info = bdi; 191 mapping->backing_dev_info = bdi;
189 } 192 }
190 inode->i_private = NULL; 193 inode->i_private = NULL;
@@ -383,7 +386,7 @@ static int invalidate_list(struct list_head *head, struct list_head *dispose)
383 /* 386 /*
384 * We can reschedule here without worrying about the list's 387 * We can reschedule here without worrying about the list's
385 * consistency because the per-sb list of inodes must not 388 * consistency because the per-sb list of inodes must not
386 * change during umount anymore, and because iprune_mutex keeps 389 * change during umount anymore, and because iprune_sem keeps
387 * shrink_icache_memory() away. 390 * shrink_icache_memory() away.
388 */ 391 */
389 cond_resched_lock(&inode_lock); 392 cond_resched_lock(&inode_lock);
@@ -422,7 +425,7 @@ int invalidate_inodes(struct super_block *sb)
422 int busy; 425 int busy;
423 LIST_HEAD(throw_away); 426 LIST_HEAD(throw_away);
424 427
425 mutex_lock(&iprune_mutex); 428 down_write(&iprune_sem);
426 spin_lock(&inode_lock); 429 spin_lock(&inode_lock);
427 inotify_unmount_inodes(&sb->s_inodes); 430 inotify_unmount_inodes(&sb->s_inodes);
428 fsnotify_unmount_inodes(&sb->s_inodes); 431 fsnotify_unmount_inodes(&sb->s_inodes);
@@ -430,7 +433,7 @@ int invalidate_inodes(struct super_block *sb)
430 spin_unlock(&inode_lock); 433 spin_unlock(&inode_lock);
431 434
432 dispose_list(&throw_away); 435 dispose_list(&throw_away);
433 mutex_unlock(&iprune_mutex); 436 up_write(&iprune_sem);
434 437
435 return busy; 438 return busy;
436} 439}
@@ -469,7 +472,7 @@ static void prune_icache(int nr_to_scan)
469 int nr_scanned; 472 int nr_scanned;
470 unsigned long reap = 0; 473 unsigned long reap = 0;
471 474
472 mutex_lock(&iprune_mutex); 475 down_read(&iprune_sem);
473 spin_lock(&inode_lock); 476 spin_lock(&inode_lock);
474 for (nr_scanned = 0; nr_scanned < nr_to_scan; nr_scanned++) { 477 for (nr_scanned = 0; nr_scanned < nr_to_scan; nr_scanned++) {
475 struct inode *inode; 478 struct inode *inode;
@@ -511,7 +514,7 @@ static void prune_icache(int nr_to_scan)
511 spin_unlock(&inode_lock); 514 spin_unlock(&inode_lock);
512 515
513 dispose_list(&freeable); 516 dispose_list(&freeable);
514 mutex_unlock(&iprune_mutex); 517 up_read(&iprune_sem);
515} 518}
516 519
517/* 520/*
@@ -697,13 +700,15 @@ void unlock_new_inode(struct inode *inode)
697 } 700 }
698#endif 701#endif
699 /* 702 /*
700 * This is special! We do not need the spinlock 703 * This is special! We do not need the spinlock when clearing I_LOCK,
701 * when clearing I_LOCK, because we're guaranteed 704 * because we're guaranteed that nobody else tries to do anything about
702 * that nobody else tries to do anything about the 705 * the state of the inode when it is locked, as we just created it (so
703 * state of the inode when it is locked, as we 706 * there can be no old holders that haven't tested I_LOCK).
704 * just created it (so there can be no old holders 707 * However we must emit the memory barrier so that other CPUs reliably
705 * that haven't tested I_LOCK). 708 * see the clearing of I_LOCK after the other inode initialisation has
709 * completed.
706 */ 710 */
711 smp_mb();
707 WARN_ON((inode->i_state & (I_LOCK|I_NEW)) != (I_LOCK|I_NEW)); 712 WARN_ON((inode->i_state & (I_LOCK|I_NEW)) != (I_LOCK|I_NEW));
708 inode->i_state &= ~(I_LOCK|I_NEW); 713 inode->i_state &= ~(I_LOCK|I_NEW);
709 wake_up_inode(inode); 714 wake_up_inode(inode);
@@ -1236,7 +1241,16 @@ void generic_delete_inode(struct inode *inode)
1236} 1241}
1237EXPORT_SYMBOL(generic_delete_inode); 1242EXPORT_SYMBOL(generic_delete_inode);
1238 1243
1239static void generic_forget_inode(struct inode *inode) 1244/**
1245 * generic_detach_inode - remove inode from inode lists
1246 * @inode: inode to remove
1247 *
1248 * Remove inode from inode lists, write it if it's dirty. This is just an
1249 * internal VFS helper exported for hugetlbfs. Do not use!
1250 *
1251 * Returns 1 if inode should be completely destroyed.
1252 */
1253int generic_detach_inode(struct inode *inode)
1240{ 1254{
1241 struct super_block *sb = inode->i_sb; 1255 struct super_block *sb = inode->i_sb;
1242 1256
@@ -1246,7 +1260,7 @@ static void generic_forget_inode(struct inode *inode)
1246 inodes_stat.nr_unused++; 1260 inodes_stat.nr_unused++;
1247 if (sb->s_flags & MS_ACTIVE) { 1261 if (sb->s_flags & MS_ACTIVE) {
1248 spin_unlock(&inode_lock); 1262 spin_unlock(&inode_lock);
1249 return; 1263 return 0;
1250 } 1264 }
1251 WARN_ON(inode->i_state & I_NEW); 1265 WARN_ON(inode->i_state & I_NEW);
1252 inode->i_state |= I_WILL_FREE; 1266 inode->i_state |= I_WILL_FREE;
@@ -1264,6 +1278,14 @@ static void generic_forget_inode(struct inode *inode)
1264 inode->i_state |= I_FREEING; 1278 inode->i_state |= I_FREEING;
1265 inodes_stat.nr_inodes--; 1279 inodes_stat.nr_inodes--;
1266 spin_unlock(&inode_lock); 1280 spin_unlock(&inode_lock);
1281 return 1;
1282}
1283EXPORT_SYMBOL_GPL(generic_detach_inode);
1284
1285static void generic_forget_inode(struct inode *inode)
1286{
1287 if (!generic_detach_inode(inode))
1288 return;
1267 if (inode->i_data.nrpages) 1289 if (inode->i_data.nrpages)
1268 truncate_inode_pages(&inode->i_data, 0); 1290 truncate_inode_pages(&inode->i_data, 0);
1269 clear_inode(inode); 1291 clear_inode(inode);
@@ -1394,31 +1416,31 @@ void touch_atime(struct vfsmount *mnt, struct dentry *dentry)
1394 struct inode *inode = dentry->d_inode; 1416 struct inode *inode = dentry->d_inode;
1395 struct timespec now; 1417 struct timespec now;
1396 1418
1397 if (mnt_want_write(mnt))
1398 return;
1399 if (inode->i_flags & S_NOATIME) 1419 if (inode->i_flags & S_NOATIME)
1400 goto out; 1420 return;
1401 if (IS_NOATIME(inode)) 1421 if (IS_NOATIME(inode))
1402 goto out; 1422 return;
1403 if ((inode->i_sb->s_flags & MS_NODIRATIME) && S_ISDIR(inode->i_mode)) 1423 if ((inode->i_sb->s_flags & MS_NODIRATIME) && S_ISDIR(inode->i_mode))
1404 goto out; 1424 return;
1405 1425
1406 if (mnt->mnt_flags & MNT_NOATIME) 1426 if (mnt->mnt_flags & MNT_NOATIME)
1407 goto out; 1427 return;
1408 if ((mnt->mnt_flags & MNT_NODIRATIME) && S_ISDIR(inode->i_mode)) 1428 if ((mnt->mnt_flags & MNT_NODIRATIME) && S_ISDIR(inode->i_mode))
1409 goto out; 1429 return;
1410 1430
1411 now = current_fs_time(inode->i_sb); 1431 now = current_fs_time(inode->i_sb);
1412 1432
1413 if (!relatime_need_update(mnt, inode, now)) 1433 if (!relatime_need_update(mnt, inode, now))
1414 goto out; 1434 return;
1415 1435
1416 if (timespec_equal(&inode->i_atime, &now)) 1436 if (timespec_equal(&inode->i_atime, &now))
1417 goto out; 1437 return;
1438
1439 if (mnt_want_write(mnt))
1440 return;
1418 1441
1419 inode->i_atime = now; 1442 inode->i_atime = now;
1420 mark_inode_dirty_sync(inode); 1443 mark_inode_dirty_sync(inode);
1421out:
1422 mnt_drop_write(mnt); 1444 mnt_drop_write(mnt);
1423} 1445}
1424EXPORT_SYMBOL(touch_atime); 1446EXPORT_SYMBOL(touch_atime);
@@ -1439,34 +1461,37 @@ void file_update_time(struct file *file)
1439{ 1461{
1440 struct inode *inode = file->f_path.dentry->d_inode; 1462 struct inode *inode = file->f_path.dentry->d_inode;
1441 struct timespec now; 1463 struct timespec now;
1442 int sync_it = 0; 1464 enum { S_MTIME = 1, S_CTIME = 2, S_VERSION = 4 } sync_it = 0;
1443 int err;
1444 1465
1466 /* First try to exhaust all avenues to not sync */
1445 if (IS_NOCMTIME(inode)) 1467 if (IS_NOCMTIME(inode))
1446 return; 1468 return;
1447 1469
1448 err = mnt_want_write_file(file);
1449 if (err)
1450 return;
1451
1452 now = current_fs_time(inode->i_sb); 1470 now = current_fs_time(inode->i_sb);
1453 if (!timespec_equal(&inode->i_mtime, &now)) { 1471 if (!timespec_equal(&inode->i_mtime, &now))
1454 inode->i_mtime = now; 1472 sync_it = S_MTIME;
1455 sync_it = 1;
1456 }
1457 1473
1458 if (!timespec_equal(&inode->i_ctime, &now)) { 1474 if (!timespec_equal(&inode->i_ctime, &now))
1459 inode->i_ctime = now; 1475 sync_it |= S_CTIME;
1460 sync_it = 1;
1461 }
1462 1476
1463 if (IS_I_VERSION(inode)) { 1477 if (IS_I_VERSION(inode))
1464 inode_inc_iversion(inode); 1478 sync_it |= S_VERSION;
1465 sync_it = 1; 1479
1466 } 1480 if (!sync_it)
1481 return;
1467 1482
1468 if (sync_it) 1483 /* Finally allowed to write? Takes lock. */
1469 mark_inode_dirty_sync(inode); 1484 if (mnt_want_write_file(file))
1485 return;
1486
1487 /* Only change inode inside the lock region */
1488 if (sync_it & S_VERSION)
1489 inode_inc_iversion(inode);
1490 if (sync_it & S_CTIME)
1491 inode->i_ctime = now;
1492 if (sync_it & S_MTIME)
1493 inode->i_mtime = now;
1494 mark_inode_dirty_sync(inode);
1470 mnt_drop_write(file->f_path.mnt); 1495 mnt_drop_write(file->f_path.mnt);
1471} 1496}
1472EXPORT_SYMBOL(file_update_time); 1497EXPORT_SYMBOL(file_update_time);
@@ -1594,7 +1619,8 @@ void init_special_inode(struct inode *inode, umode_t mode, dev_t rdev)
1594 else if (S_ISSOCK(mode)) 1619 else if (S_ISSOCK(mode))
1595 inode->i_fop = &bad_sock_fops; 1620 inode->i_fop = &bad_sock_fops;
1596 else 1621 else
1597 printk(KERN_DEBUG "init_special_inode: bogus i_mode (%o)\n", 1622 printk(KERN_DEBUG "init_special_inode: bogus i_mode (%o) for"
1598 mode); 1623 " inode %s:%lu\n", mode, inode->i_sb->s_id,
1624 inode->i_ino);
1599} 1625}
1600EXPORT_SYMBOL(init_special_inode); 1626EXPORT_SYMBOL(init_special_inode);