diff options
Diffstat (limited to 'fs/inode.c')
-rw-r--r-- | fs/inode.c | 128 |
1 files changed, 77 insertions, 51 deletions
diff --git a/fs/inode.c b/fs/inode.c index ae7b67e48661..4d8e3be55976 100644 --- a/fs/inode.c +++ b/fs/inode.c | |||
@@ -14,6 +14,7 @@ | |||
14 | #include <linux/module.h> | 14 | #include <linux/module.h> |
15 | #include <linux/backing-dev.h> | 15 | #include <linux/backing-dev.h> |
16 | #include <linux/wait.h> | 16 | #include <linux/wait.h> |
17 | #include <linux/rwsem.h> | ||
17 | #include <linux/hash.h> | 18 | #include <linux/hash.h> |
18 | #include <linux/swap.h> | 19 | #include <linux/swap.h> |
19 | #include <linux/security.h> | 20 | #include <linux/security.h> |
@@ -87,14 +88,18 @@ static struct hlist_head *inode_hashtable __read_mostly; | |||
87 | DEFINE_SPINLOCK(inode_lock); | 88 | DEFINE_SPINLOCK(inode_lock); |
88 | 89 | ||
89 | /* | 90 | /* |
90 | * iprune_mutex provides exclusion between the kswapd or try_to_free_pages | 91 | * iprune_sem provides exclusion between the kswapd or try_to_free_pages |
91 | * icache shrinking path, and the umount path. Without this exclusion, | 92 | * icache shrinking path, and the umount path. Without this exclusion, |
92 | * by the time prune_icache calls iput for the inode whose pages it has | 93 | * by the time prune_icache calls iput for the inode whose pages it has |
93 | * been invalidating, or by the time it calls clear_inode & destroy_inode | 94 | * been invalidating, or by the time it calls clear_inode & destroy_inode |
94 | * from its final dispose_list, the struct super_block they refer to | 95 | * from its final dispose_list, the struct super_block they refer to |
95 | * (for inode->i_sb->s_op) may already have been freed and reused. | 96 | * (for inode->i_sb->s_op) may already have been freed and reused. |
97 | * | ||
98 | * We make this an rwsem because the fastpath is icache shrinking. In | ||
99 | * some cases a filesystem may be doing a significant amount of work in | ||
100 | * its inode reclaim code, so this should improve parallelism. | ||
96 | */ | 101 | */ |
97 | static DEFINE_MUTEX(iprune_mutex); | 102 | static DECLARE_RWSEM(iprune_sem); |
98 | 103 | ||
99 | /* | 104 | /* |
100 | * Statistics gathering.. | 105 | * Statistics gathering.. |
@@ -123,7 +128,7 @@ static void wake_up_inode(struct inode *inode) | |||
123 | int inode_init_always(struct super_block *sb, struct inode *inode) | 128 | int inode_init_always(struct super_block *sb, struct inode *inode) |
124 | { | 129 | { |
125 | static const struct address_space_operations empty_aops; | 130 | static const struct address_space_operations empty_aops; |
126 | static struct inode_operations empty_iops; | 131 | static const struct inode_operations empty_iops; |
127 | static const struct file_operations empty_fops; | 132 | static const struct file_operations empty_fops; |
128 | struct address_space *const mapping = &inode->i_data; | 133 | struct address_space *const mapping = &inode->i_data; |
129 | 134 | ||
@@ -182,9 +187,7 @@ int inode_init_always(struct super_block *sb, struct inode *inode) | |||
182 | if (sb->s_bdev) { | 187 | if (sb->s_bdev) { |
183 | struct backing_dev_info *bdi; | 188 | struct backing_dev_info *bdi; |
184 | 189 | ||
185 | bdi = sb->s_bdev->bd_inode_backing_dev_info; | 190 | bdi = sb->s_bdev->bd_inode->i_mapping->backing_dev_info; |
186 | if (!bdi) | ||
187 | bdi = sb->s_bdev->bd_inode->i_mapping->backing_dev_info; | ||
188 | mapping->backing_dev_info = bdi; | 191 | mapping->backing_dev_info = bdi; |
189 | } | 192 | } |
190 | inode->i_private = NULL; | 193 | inode->i_private = NULL; |
@@ -383,7 +386,7 @@ static int invalidate_list(struct list_head *head, struct list_head *dispose) | |||
383 | /* | 386 | /* |
384 | * We can reschedule here without worrying about the list's | 387 | * We can reschedule here without worrying about the list's |
385 | * consistency because the per-sb list of inodes must not | 388 | * consistency because the per-sb list of inodes must not |
386 | * change during umount anymore, and because iprune_mutex keeps | 389 | * change during umount anymore, and because iprune_sem keeps |
387 | * shrink_icache_memory() away. | 390 | * shrink_icache_memory() away. |
388 | */ | 391 | */ |
389 | cond_resched_lock(&inode_lock); | 392 | cond_resched_lock(&inode_lock); |
@@ -422,7 +425,7 @@ int invalidate_inodes(struct super_block *sb) | |||
422 | int busy; | 425 | int busy; |
423 | LIST_HEAD(throw_away); | 426 | LIST_HEAD(throw_away); |
424 | 427 | ||
425 | mutex_lock(&iprune_mutex); | 428 | down_write(&iprune_sem); |
426 | spin_lock(&inode_lock); | 429 | spin_lock(&inode_lock); |
427 | inotify_unmount_inodes(&sb->s_inodes); | 430 | inotify_unmount_inodes(&sb->s_inodes); |
428 | fsnotify_unmount_inodes(&sb->s_inodes); | 431 | fsnotify_unmount_inodes(&sb->s_inodes); |
@@ -430,7 +433,7 @@ int invalidate_inodes(struct super_block *sb) | |||
430 | spin_unlock(&inode_lock); | 433 | spin_unlock(&inode_lock); |
431 | 434 | ||
432 | dispose_list(&throw_away); | 435 | dispose_list(&throw_away); |
433 | mutex_unlock(&iprune_mutex); | 436 | up_write(&iprune_sem); |
434 | 437 | ||
435 | return busy; | 438 | return busy; |
436 | } | 439 | } |
@@ -469,7 +472,7 @@ static void prune_icache(int nr_to_scan) | |||
469 | int nr_scanned; | 472 | int nr_scanned; |
470 | unsigned long reap = 0; | 473 | unsigned long reap = 0; |
471 | 474 | ||
472 | mutex_lock(&iprune_mutex); | 475 | down_read(&iprune_sem); |
473 | spin_lock(&inode_lock); | 476 | spin_lock(&inode_lock); |
474 | for (nr_scanned = 0; nr_scanned < nr_to_scan; nr_scanned++) { | 477 | for (nr_scanned = 0; nr_scanned < nr_to_scan; nr_scanned++) { |
475 | struct inode *inode; | 478 | struct inode *inode; |
@@ -511,7 +514,7 @@ static void prune_icache(int nr_to_scan) | |||
511 | spin_unlock(&inode_lock); | 514 | spin_unlock(&inode_lock); |
512 | 515 | ||
513 | dispose_list(&freeable); | 516 | dispose_list(&freeable); |
514 | mutex_unlock(&iprune_mutex); | 517 | up_read(&iprune_sem); |
515 | } | 518 | } |
516 | 519 | ||
517 | /* | 520 | /* |
@@ -697,13 +700,15 @@ void unlock_new_inode(struct inode *inode) | |||
697 | } | 700 | } |
698 | #endif | 701 | #endif |
699 | /* | 702 | /* |
700 | * This is special! We do not need the spinlock | 703 | * This is special! We do not need the spinlock when clearing I_LOCK, |
701 | * when clearing I_LOCK, because we're guaranteed | 704 | * because we're guaranteed that nobody else tries to do anything about |
702 | * that nobody else tries to do anything about the | 705 | * the state of the inode when it is locked, as we just created it (so |
703 | * state of the inode when it is locked, as we | 706 | * there can be no old holders that haven't tested I_LOCK). |
704 | * just created it (so there can be no old holders | 707 | * However we must emit the memory barrier so that other CPUs reliably |
705 | * that haven't tested I_LOCK). | 708 | * see the clearing of I_LOCK after the other inode initialisation has |
709 | * completed. | ||
706 | */ | 710 | */ |
711 | smp_mb(); | ||
707 | WARN_ON((inode->i_state & (I_LOCK|I_NEW)) != (I_LOCK|I_NEW)); | 712 | WARN_ON((inode->i_state & (I_LOCK|I_NEW)) != (I_LOCK|I_NEW)); |
708 | inode->i_state &= ~(I_LOCK|I_NEW); | 713 | inode->i_state &= ~(I_LOCK|I_NEW); |
709 | wake_up_inode(inode); | 714 | wake_up_inode(inode); |
@@ -1236,7 +1241,16 @@ void generic_delete_inode(struct inode *inode) | |||
1236 | } | 1241 | } |
1237 | EXPORT_SYMBOL(generic_delete_inode); | 1242 | EXPORT_SYMBOL(generic_delete_inode); |
1238 | 1243 | ||
1239 | static void generic_forget_inode(struct inode *inode) | 1244 | /** |
1245 | * generic_detach_inode - remove inode from inode lists | ||
1246 | * @inode: inode to remove | ||
1247 | * | ||
1248 | * Remove inode from inode lists, write it if it's dirty. This is just an | ||
1249 | * internal VFS helper exported for hugetlbfs. Do not use! | ||
1250 | * | ||
1251 | * Returns 1 if inode should be completely destroyed. | ||
1252 | */ | ||
1253 | int generic_detach_inode(struct inode *inode) | ||
1240 | { | 1254 | { |
1241 | struct super_block *sb = inode->i_sb; | 1255 | struct super_block *sb = inode->i_sb; |
1242 | 1256 | ||
@@ -1246,7 +1260,7 @@ static void generic_forget_inode(struct inode *inode) | |||
1246 | inodes_stat.nr_unused++; | 1260 | inodes_stat.nr_unused++; |
1247 | if (sb->s_flags & MS_ACTIVE) { | 1261 | if (sb->s_flags & MS_ACTIVE) { |
1248 | spin_unlock(&inode_lock); | 1262 | spin_unlock(&inode_lock); |
1249 | return; | 1263 | return 0; |
1250 | } | 1264 | } |
1251 | WARN_ON(inode->i_state & I_NEW); | 1265 | WARN_ON(inode->i_state & I_NEW); |
1252 | inode->i_state |= I_WILL_FREE; | 1266 | inode->i_state |= I_WILL_FREE; |
@@ -1264,6 +1278,14 @@ static void generic_forget_inode(struct inode *inode) | |||
1264 | inode->i_state |= I_FREEING; | 1278 | inode->i_state |= I_FREEING; |
1265 | inodes_stat.nr_inodes--; | 1279 | inodes_stat.nr_inodes--; |
1266 | spin_unlock(&inode_lock); | 1280 | spin_unlock(&inode_lock); |
1281 | return 1; | ||
1282 | } | ||
1283 | EXPORT_SYMBOL_GPL(generic_detach_inode); | ||
1284 | |||
1285 | static void generic_forget_inode(struct inode *inode) | ||
1286 | { | ||
1287 | if (!generic_detach_inode(inode)) | ||
1288 | return; | ||
1267 | if (inode->i_data.nrpages) | 1289 | if (inode->i_data.nrpages) |
1268 | truncate_inode_pages(&inode->i_data, 0); | 1290 | truncate_inode_pages(&inode->i_data, 0); |
1269 | clear_inode(inode); | 1291 | clear_inode(inode); |
@@ -1394,31 +1416,31 @@ void touch_atime(struct vfsmount *mnt, struct dentry *dentry) | |||
1394 | struct inode *inode = dentry->d_inode; | 1416 | struct inode *inode = dentry->d_inode; |
1395 | struct timespec now; | 1417 | struct timespec now; |
1396 | 1418 | ||
1397 | if (mnt_want_write(mnt)) | ||
1398 | return; | ||
1399 | if (inode->i_flags & S_NOATIME) | 1419 | if (inode->i_flags & S_NOATIME) |
1400 | goto out; | 1420 | return; |
1401 | if (IS_NOATIME(inode)) | 1421 | if (IS_NOATIME(inode)) |
1402 | goto out; | 1422 | return; |
1403 | if ((inode->i_sb->s_flags & MS_NODIRATIME) && S_ISDIR(inode->i_mode)) | 1423 | if ((inode->i_sb->s_flags & MS_NODIRATIME) && S_ISDIR(inode->i_mode)) |
1404 | goto out; | 1424 | return; |
1405 | 1425 | ||
1406 | if (mnt->mnt_flags & MNT_NOATIME) | 1426 | if (mnt->mnt_flags & MNT_NOATIME) |
1407 | goto out; | 1427 | return; |
1408 | if ((mnt->mnt_flags & MNT_NODIRATIME) && S_ISDIR(inode->i_mode)) | 1428 | if ((mnt->mnt_flags & MNT_NODIRATIME) && S_ISDIR(inode->i_mode)) |
1409 | goto out; | 1429 | return; |
1410 | 1430 | ||
1411 | now = current_fs_time(inode->i_sb); | 1431 | now = current_fs_time(inode->i_sb); |
1412 | 1432 | ||
1413 | if (!relatime_need_update(mnt, inode, now)) | 1433 | if (!relatime_need_update(mnt, inode, now)) |
1414 | goto out; | 1434 | return; |
1415 | 1435 | ||
1416 | if (timespec_equal(&inode->i_atime, &now)) | 1436 | if (timespec_equal(&inode->i_atime, &now)) |
1417 | goto out; | 1437 | return; |
1438 | |||
1439 | if (mnt_want_write(mnt)) | ||
1440 | return; | ||
1418 | 1441 | ||
1419 | inode->i_atime = now; | 1442 | inode->i_atime = now; |
1420 | mark_inode_dirty_sync(inode); | 1443 | mark_inode_dirty_sync(inode); |
1421 | out: | ||
1422 | mnt_drop_write(mnt); | 1444 | mnt_drop_write(mnt); |
1423 | } | 1445 | } |
1424 | EXPORT_SYMBOL(touch_atime); | 1446 | EXPORT_SYMBOL(touch_atime); |
@@ -1439,34 +1461,37 @@ void file_update_time(struct file *file) | |||
1439 | { | 1461 | { |
1440 | struct inode *inode = file->f_path.dentry->d_inode; | 1462 | struct inode *inode = file->f_path.dentry->d_inode; |
1441 | struct timespec now; | 1463 | struct timespec now; |
1442 | int sync_it = 0; | 1464 | enum { S_MTIME = 1, S_CTIME = 2, S_VERSION = 4 } sync_it = 0; |
1443 | int err; | ||
1444 | 1465 | ||
1466 | /* First try to exhaust all avenues to not sync */ | ||
1445 | if (IS_NOCMTIME(inode)) | 1467 | if (IS_NOCMTIME(inode)) |
1446 | return; | 1468 | return; |
1447 | 1469 | ||
1448 | err = mnt_want_write_file(file); | ||
1449 | if (err) | ||
1450 | return; | ||
1451 | |||
1452 | now = current_fs_time(inode->i_sb); | 1470 | now = current_fs_time(inode->i_sb); |
1453 | if (!timespec_equal(&inode->i_mtime, &now)) { | 1471 | if (!timespec_equal(&inode->i_mtime, &now)) |
1454 | inode->i_mtime = now; | 1472 | sync_it = S_MTIME; |
1455 | sync_it = 1; | ||
1456 | } | ||
1457 | 1473 | ||
1458 | if (!timespec_equal(&inode->i_ctime, &now)) { | 1474 | if (!timespec_equal(&inode->i_ctime, &now)) |
1459 | inode->i_ctime = now; | 1475 | sync_it |= S_CTIME; |
1460 | sync_it = 1; | ||
1461 | } | ||
1462 | 1476 | ||
1463 | if (IS_I_VERSION(inode)) { | 1477 | if (IS_I_VERSION(inode)) |
1464 | inode_inc_iversion(inode); | 1478 | sync_it |= S_VERSION; |
1465 | sync_it = 1; | 1479 | |
1466 | } | 1480 | if (!sync_it) |
1481 | return; | ||
1467 | 1482 | ||
1468 | if (sync_it) | 1483 | /* Finally allowed to write? Takes lock. */ |
1469 | mark_inode_dirty_sync(inode); | 1484 | if (mnt_want_write_file(file)) |
1485 | return; | ||
1486 | |||
1487 | /* Only change inode inside the lock region */ | ||
1488 | if (sync_it & S_VERSION) | ||
1489 | inode_inc_iversion(inode); | ||
1490 | if (sync_it & S_CTIME) | ||
1491 | inode->i_ctime = now; | ||
1492 | if (sync_it & S_MTIME) | ||
1493 | inode->i_mtime = now; | ||
1494 | mark_inode_dirty_sync(inode); | ||
1470 | mnt_drop_write(file->f_path.mnt); | 1495 | mnt_drop_write(file->f_path.mnt); |
1471 | } | 1496 | } |
1472 | EXPORT_SYMBOL(file_update_time); | 1497 | EXPORT_SYMBOL(file_update_time); |
@@ -1594,7 +1619,8 @@ void init_special_inode(struct inode *inode, umode_t mode, dev_t rdev) | |||
1594 | else if (S_ISSOCK(mode)) | 1619 | else if (S_ISSOCK(mode)) |
1595 | inode->i_fop = &bad_sock_fops; | 1620 | inode->i_fop = &bad_sock_fops; |
1596 | else | 1621 | else |
1597 | printk(KERN_DEBUG "init_special_inode: bogus i_mode (%o)\n", | 1622 | printk(KERN_DEBUG "init_special_inode: bogus i_mode (%o) for" |
1598 | mode); | 1623 | " inode %s:%lu\n", mode, inode->i_sb->s_id, |
1624 | inode->i_ino); | ||
1599 | } | 1625 | } |
1600 | EXPORT_SYMBOL(init_special_inode); | 1626 | EXPORT_SYMBOL(init_special_inode); |