diff options
Diffstat (limited to 'fs/inode.c')
| -rw-r--r-- | fs/inode.c | 128 |
1 files changed, 77 insertions, 51 deletions
diff --git a/fs/inode.c b/fs/inode.c index ae7b67e48661..4d8e3be55976 100644 --- a/fs/inode.c +++ b/fs/inode.c | |||
| @@ -14,6 +14,7 @@ | |||
| 14 | #include <linux/module.h> | 14 | #include <linux/module.h> |
| 15 | #include <linux/backing-dev.h> | 15 | #include <linux/backing-dev.h> |
| 16 | #include <linux/wait.h> | 16 | #include <linux/wait.h> |
| 17 | #include <linux/rwsem.h> | ||
| 17 | #include <linux/hash.h> | 18 | #include <linux/hash.h> |
| 18 | #include <linux/swap.h> | 19 | #include <linux/swap.h> |
| 19 | #include <linux/security.h> | 20 | #include <linux/security.h> |
| @@ -87,14 +88,18 @@ static struct hlist_head *inode_hashtable __read_mostly; | |||
| 87 | DEFINE_SPINLOCK(inode_lock); | 88 | DEFINE_SPINLOCK(inode_lock); |
| 88 | 89 | ||
| 89 | /* | 90 | /* |
| 90 | * iprune_mutex provides exclusion between the kswapd or try_to_free_pages | 91 | * iprune_sem provides exclusion between the kswapd or try_to_free_pages |
| 91 | * icache shrinking path, and the umount path. Without this exclusion, | 92 | * icache shrinking path, and the umount path. Without this exclusion, |
| 92 | * by the time prune_icache calls iput for the inode whose pages it has | 93 | * by the time prune_icache calls iput for the inode whose pages it has |
| 93 | * been invalidating, or by the time it calls clear_inode & destroy_inode | 94 | * been invalidating, or by the time it calls clear_inode & destroy_inode |
| 94 | * from its final dispose_list, the struct super_block they refer to | 95 | * from its final dispose_list, the struct super_block they refer to |
| 95 | * (for inode->i_sb->s_op) may already have been freed and reused. | 96 | * (for inode->i_sb->s_op) may already have been freed and reused. |
| 97 | * | ||
| 98 | * We make this an rwsem because the fastpath is icache shrinking. In | ||
| 99 | * some cases a filesystem may be doing a significant amount of work in | ||
| 100 | * its inode reclaim code, so this should improve parallelism. | ||
| 96 | */ | 101 | */ |
| 97 | static DEFINE_MUTEX(iprune_mutex); | 102 | static DECLARE_RWSEM(iprune_sem); |
| 98 | 103 | ||
| 99 | /* | 104 | /* |
| 100 | * Statistics gathering.. | 105 | * Statistics gathering.. |
| @@ -123,7 +128,7 @@ static void wake_up_inode(struct inode *inode) | |||
| 123 | int inode_init_always(struct super_block *sb, struct inode *inode) | 128 | int inode_init_always(struct super_block *sb, struct inode *inode) |
| 124 | { | 129 | { |
| 125 | static const struct address_space_operations empty_aops; | 130 | static const struct address_space_operations empty_aops; |
| 126 | static struct inode_operations empty_iops; | 131 | static const struct inode_operations empty_iops; |
| 127 | static const struct file_operations empty_fops; | 132 | static const struct file_operations empty_fops; |
| 128 | struct address_space *const mapping = &inode->i_data; | 133 | struct address_space *const mapping = &inode->i_data; |
| 129 | 134 | ||
| @@ -182,9 +187,7 @@ int inode_init_always(struct super_block *sb, struct inode *inode) | |||
| 182 | if (sb->s_bdev) { | 187 | if (sb->s_bdev) { |
| 183 | struct backing_dev_info *bdi; | 188 | struct backing_dev_info *bdi; |
| 184 | 189 | ||
| 185 | bdi = sb->s_bdev->bd_inode_backing_dev_info; | 190 | bdi = sb->s_bdev->bd_inode->i_mapping->backing_dev_info; |
| 186 | if (!bdi) | ||
| 187 | bdi = sb->s_bdev->bd_inode->i_mapping->backing_dev_info; | ||
| 188 | mapping->backing_dev_info = bdi; | 191 | mapping->backing_dev_info = bdi; |
| 189 | } | 192 | } |
| 190 | inode->i_private = NULL; | 193 | inode->i_private = NULL; |
| @@ -383,7 +386,7 @@ static int invalidate_list(struct list_head *head, struct list_head *dispose) | |||
| 383 | /* | 386 | /* |
| 384 | * We can reschedule here without worrying about the list's | 387 | * We can reschedule here without worrying about the list's |
| 385 | * consistency because the per-sb list of inodes must not | 388 | * consistency because the per-sb list of inodes must not |
| 386 | * change during umount anymore, and because iprune_mutex keeps | 389 | * change during umount anymore, and because iprune_sem keeps |
| 387 | * shrink_icache_memory() away. | 390 | * shrink_icache_memory() away. |
| 388 | */ | 391 | */ |
| 389 | cond_resched_lock(&inode_lock); | 392 | cond_resched_lock(&inode_lock); |
| @@ -422,7 +425,7 @@ int invalidate_inodes(struct super_block *sb) | |||
| 422 | int busy; | 425 | int busy; |
| 423 | LIST_HEAD(throw_away); | 426 | LIST_HEAD(throw_away); |
| 424 | 427 | ||
| 425 | mutex_lock(&iprune_mutex); | 428 | down_write(&iprune_sem); |
| 426 | spin_lock(&inode_lock); | 429 | spin_lock(&inode_lock); |
| 427 | inotify_unmount_inodes(&sb->s_inodes); | 430 | inotify_unmount_inodes(&sb->s_inodes); |
| 428 | fsnotify_unmount_inodes(&sb->s_inodes); | 431 | fsnotify_unmount_inodes(&sb->s_inodes); |
| @@ -430,7 +433,7 @@ int invalidate_inodes(struct super_block *sb) | |||
| 430 | spin_unlock(&inode_lock); | 433 | spin_unlock(&inode_lock); |
| 431 | 434 | ||
| 432 | dispose_list(&throw_away); | 435 | dispose_list(&throw_away); |
| 433 | mutex_unlock(&iprune_mutex); | 436 | up_write(&iprune_sem); |
| 434 | 437 | ||
| 435 | return busy; | 438 | return busy; |
| 436 | } | 439 | } |
| @@ -469,7 +472,7 @@ static void prune_icache(int nr_to_scan) | |||
| 469 | int nr_scanned; | 472 | int nr_scanned; |
| 470 | unsigned long reap = 0; | 473 | unsigned long reap = 0; |
| 471 | 474 | ||
| 472 | mutex_lock(&iprune_mutex); | 475 | down_read(&iprune_sem); |
| 473 | spin_lock(&inode_lock); | 476 | spin_lock(&inode_lock); |
| 474 | for (nr_scanned = 0; nr_scanned < nr_to_scan; nr_scanned++) { | 477 | for (nr_scanned = 0; nr_scanned < nr_to_scan; nr_scanned++) { |
| 475 | struct inode *inode; | 478 | struct inode *inode; |
| @@ -511,7 +514,7 @@ static void prune_icache(int nr_to_scan) | |||
| 511 | spin_unlock(&inode_lock); | 514 | spin_unlock(&inode_lock); |
| 512 | 515 | ||
| 513 | dispose_list(&freeable); | 516 | dispose_list(&freeable); |
| 514 | mutex_unlock(&iprune_mutex); | 517 | up_read(&iprune_sem); |
| 515 | } | 518 | } |
| 516 | 519 | ||
| 517 | /* | 520 | /* |
| @@ -697,13 +700,15 @@ void unlock_new_inode(struct inode *inode) | |||
| 697 | } | 700 | } |
| 698 | #endif | 701 | #endif |
| 699 | /* | 702 | /* |
| 700 | * This is special! We do not need the spinlock | 703 | * This is special! We do not need the spinlock when clearing I_LOCK, |
| 701 | * when clearing I_LOCK, because we're guaranteed | 704 | * because we're guaranteed that nobody else tries to do anything about |
| 702 | * that nobody else tries to do anything about the | 705 | * the state of the inode when it is locked, as we just created it (so |
| 703 | * state of the inode when it is locked, as we | 706 | * there can be no old holders that haven't tested I_LOCK). |
| 704 | * just created it (so there can be no old holders | 707 | * However we must emit the memory barrier so that other CPUs reliably |
| 705 | * that haven't tested I_LOCK). | 708 | * see the clearing of I_LOCK after the other inode initialisation has |
| 709 | * completed. | ||
| 706 | */ | 710 | */ |
| 711 | smp_mb(); | ||
| 707 | WARN_ON((inode->i_state & (I_LOCK|I_NEW)) != (I_LOCK|I_NEW)); | 712 | WARN_ON((inode->i_state & (I_LOCK|I_NEW)) != (I_LOCK|I_NEW)); |
| 708 | inode->i_state &= ~(I_LOCK|I_NEW); | 713 | inode->i_state &= ~(I_LOCK|I_NEW); |
| 709 | wake_up_inode(inode); | 714 | wake_up_inode(inode); |
| @@ -1236,7 +1241,16 @@ void generic_delete_inode(struct inode *inode) | |||
| 1236 | } | 1241 | } |
| 1237 | EXPORT_SYMBOL(generic_delete_inode); | 1242 | EXPORT_SYMBOL(generic_delete_inode); |
| 1238 | 1243 | ||
| 1239 | static void generic_forget_inode(struct inode *inode) | 1244 | /** |
| 1245 | * generic_detach_inode - remove inode from inode lists | ||
| 1246 | * @inode: inode to remove | ||
| 1247 | * | ||
| 1248 | * Remove inode from inode lists, write it if it's dirty. This is just an | ||
| 1249 | * internal VFS helper exported for hugetlbfs. Do not use! | ||
| 1250 | * | ||
| 1251 | * Returns 1 if inode should be completely destroyed. | ||
| 1252 | */ | ||
| 1253 | int generic_detach_inode(struct inode *inode) | ||
| 1240 | { | 1254 | { |
| 1241 | struct super_block *sb = inode->i_sb; | 1255 | struct super_block *sb = inode->i_sb; |
| 1242 | 1256 | ||
| @@ -1246,7 +1260,7 @@ static void generic_forget_inode(struct inode *inode) | |||
| 1246 | inodes_stat.nr_unused++; | 1260 | inodes_stat.nr_unused++; |
| 1247 | if (sb->s_flags & MS_ACTIVE) { | 1261 | if (sb->s_flags & MS_ACTIVE) { |
| 1248 | spin_unlock(&inode_lock); | 1262 | spin_unlock(&inode_lock); |
| 1249 | return; | 1263 | return 0; |
| 1250 | } | 1264 | } |
| 1251 | WARN_ON(inode->i_state & I_NEW); | 1265 | WARN_ON(inode->i_state & I_NEW); |
| 1252 | inode->i_state |= I_WILL_FREE; | 1266 | inode->i_state |= I_WILL_FREE; |
| @@ -1264,6 +1278,14 @@ static void generic_forget_inode(struct inode *inode) | |||
| 1264 | inode->i_state |= I_FREEING; | 1278 | inode->i_state |= I_FREEING; |
| 1265 | inodes_stat.nr_inodes--; | 1279 | inodes_stat.nr_inodes--; |
| 1266 | spin_unlock(&inode_lock); | 1280 | spin_unlock(&inode_lock); |
| 1281 | return 1; | ||
| 1282 | } | ||
| 1283 | EXPORT_SYMBOL_GPL(generic_detach_inode); | ||
| 1284 | |||
| 1285 | static void generic_forget_inode(struct inode *inode) | ||
| 1286 | { | ||
| 1287 | if (!generic_detach_inode(inode)) | ||
| 1288 | return; | ||
| 1267 | if (inode->i_data.nrpages) | 1289 | if (inode->i_data.nrpages) |
| 1268 | truncate_inode_pages(&inode->i_data, 0); | 1290 | truncate_inode_pages(&inode->i_data, 0); |
| 1269 | clear_inode(inode); | 1291 | clear_inode(inode); |
| @@ -1394,31 +1416,31 @@ void touch_atime(struct vfsmount *mnt, struct dentry *dentry) | |||
| 1394 | struct inode *inode = dentry->d_inode; | 1416 | struct inode *inode = dentry->d_inode; |
| 1395 | struct timespec now; | 1417 | struct timespec now; |
| 1396 | 1418 | ||
| 1397 | if (mnt_want_write(mnt)) | ||
| 1398 | return; | ||
| 1399 | if (inode->i_flags & S_NOATIME) | 1419 | if (inode->i_flags & S_NOATIME) |
| 1400 | goto out; | 1420 | return; |
| 1401 | if (IS_NOATIME(inode)) | 1421 | if (IS_NOATIME(inode)) |
| 1402 | goto out; | 1422 | return; |
| 1403 | if ((inode->i_sb->s_flags & MS_NODIRATIME) && S_ISDIR(inode->i_mode)) | 1423 | if ((inode->i_sb->s_flags & MS_NODIRATIME) && S_ISDIR(inode->i_mode)) |
| 1404 | goto out; | 1424 | return; |
| 1405 | 1425 | ||
| 1406 | if (mnt->mnt_flags & MNT_NOATIME) | 1426 | if (mnt->mnt_flags & MNT_NOATIME) |
| 1407 | goto out; | 1427 | return; |
| 1408 | if ((mnt->mnt_flags & MNT_NODIRATIME) && S_ISDIR(inode->i_mode)) | 1428 | if ((mnt->mnt_flags & MNT_NODIRATIME) && S_ISDIR(inode->i_mode)) |
| 1409 | goto out; | 1429 | return; |
| 1410 | 1430 | ||
| 1411 | now = current_fs_time(inode->i_sb); | 1431 | now = current_fs_time(inode->i_sb); |
| 1412 | 1432 | ||
| 1413 | if (!relatime_need_update(mnt, inode, now)) | 1433 | if (!relatime_need_update(mnt, inode, now)) |
| 1414 | goto out; | 1434 | return; |
| 1415 | 1435 | ||
| 1416 | if (timespec_equal(&inode->i_atime, &now)) | 1436 | if (timespec_equal(&inode->i_atime, &now)) |
| 1417 | goto out; | 1437 | return; |
| 1438 | |||
| 1439 | if (mnt_want_write(mnt)) | ||
| 1440 | return; | ||
| 1418 | 1441 | ||
| 1419 | inode->i_atime = now; | 1442 | inode->i_atime = now; |
| 1420 | mark_inode_dirty_sync(inode); | 1443 | mark_inode_dirty_sync(inode); |
| 1421 | out: | ||
| 1422 | mnt_drop_write(mnt); | 1444 | mnt_drop_write(mnt); |
| 1423 | } | 1445 | } |
| 1424 | EXPORT_SYMBOL(touch_atime); | 1446 | EXPORT_SYMBOL(touch_atime); |
| @@ -1439,34 +1461,37 @@ void file_update_time(struct file *file) | |||
| 1439 | { | 1461 | { |
| 1440 | struct inode *inode = file->f_path.dentry->d_inode; | 1462 | struct inode *inode = file->f_path.dentry->d_inode; |
| 1441 | struct timespec now; | 1463 | struct timespec now; |
| 1442 | int sync_it = 0; | 1464 | enum { S_MTIME = 1, S_CTIME = 2, S_VERSION = 4 } sync_it = 0; |
| 1443 | int err; | ||
| 1444 | 1465 | ||
| 1466 | /* First try to exhaust all avenues to not sync */ | ||
| 1445 | if (IS_NOCMTIME(inode)) | 1467 | if (IS_NOCMTIME(inode)) |
| 1446 | return; | 1468 | return; |
| 1447 | 1469 | ||
| 1448 | err = mnt_want_write_file(file); | ||
| 1449 | if (err) | ||
| 1450 | return; | ||
| 1451 | |||
| 1452 | now = current_fs_time(inode->i_sb); | 1470 | now = current_fs_time(inode->i_sb); |
| 1453 | if (!timespec_equal(&inode->i_mtime, &now)) { | 1471 | if (!timespec_equal(&inode->i_mtime, &now)) |
| 1454 | inode->i_mtime = now; | 1472 | sync_it = S_MTIME; |
| 1455 | sync_it = 1; | ||
| 1456 | } | ||
| 1457 | 1473 | ||
| 1458 | if (!timespec_equal(&inode->i_ctime, &now)) { | 1474 | if (!timespec_equal(&inode->i_ctime, &now)) |
| 1459 | inode->i_ctime = now; | 1475 | sync_it |= S_CTIME; |
| 1460 | sync_it = 1; | ||
| 1461 | } | ||
| 1462 | 1476 | ||
| 1463 | if (IS_I_VERSION(inode)) { | 1477 | if (IS_I_VERSION(inode)) |
| 1464 | inode_inc_iversion(inode); | 1478 | sync_it |= S_VERSION; |
| 1465 | sync_it = 1; | 1479 | |
| 1466 | } | 1480 | if (!sync_it) |
| 1481 | return; | ||
| 1467 | 1482 | ||
| 1468 | if (sync_it) | 1483 | /* Finally allowed to write? Takes lock. */ |
| 1469 | mark_inode_dirty_sync(inode); | 1484 | if (mnt_want_write_file(file)) |
| 1485 | return; | ||
| 1486 | |||
| 1487 | /* Only change inode inside the lock region */ | ||
| 1488 | if (sync_it & S_VERSION) | ||
| 1489 | inode_inc_iversion(inode); | ||
| 1490 | if (sync_it & S_CTIME) | ||
| 1491 | inode->i_ctime = now; | ||
| 1492 | if (sync_it & S_MTIME) | ||
| 1493 | inode->i_mtime = now; | ||
| 1494 | mark_inode_dirty_sync(inode); | ||
| 1470 | mnt_drop_write(file->f_path.mnt); | 1495 | mnt_drop_write(file->f_path.mnt); |
| 1471 | } | 1496 | } |
| 1472 | EXPORT_SYMBOL(file_update_time); | 1497 | EXPORT_SYMBOL(file_update_time); |
| @@ -1594,7 +1619,8 @@ void init_special_inode(struct inode *inode, umode_t mode, dev_t rdev) | |||
| 1594 | else if (S_ISSOCK(mode)) | 1619 | else if (S_ISSOCK(mode)) |
| 1595 | inode->i_fop = &bad_sock_fops; | 1620 | inode->i_fop = &bad_sock_fops; |
| 1596 | else | 1621 | else |
| 1597 | printk(KERN_DEBUG "init_special_inode: bogus i_mode (%o)\n", | 1622 | printk(KERN_DEBUG "init_special_inode: bogus i_mode (%o) for" |
| 1598 | mode); | 1623 | " inode %s:%lu\n", mode, inode->i_sb->s_id, |
| 1624 | inode->i_ino); | ||
| 1599 | } | 1625 | } |
| 1600 | EXPORT_SYMBOL(init_special_inode); | 1626 | EXPORT_SYMBOL(init_special_inode); |
