aboutsummaryrefslogtreecommitdiffstats
path: root/fs/inode.c
diff options
context:
space:
mode:
Diffstat (limited to 'fs/inode.c')
-rw-r--r--fs/inode.c198
1 files changed, 170 insertions, 28 deletions
diff --git a/fs/inode.c b/fs/inode.c
index 9f4f5fecc09..c99163b1b31 100644
--- a/fs/inode.c
+++ b/fs/inode.c
@@ -135,8 +135,8 @@ int inode_init_always(struct super_block *sb, struct inode *inode)
135 inode->i_fop = &empty_fops; 135 inode->i_fop = &empty_fops;
136 inode->__i_nlink = 1; 136 inode->__i_nlink = 1;
137 inode->i_opflags = 0; 137 inode->i_opflags = 0;
138 inode->i_uid = 0; 138 i_uid_write(inode, 0);
139 inode->i_gid = 0; 139 i_gid_write(inode, 0);
140 atomic_set(&inode->i_writecount, 0); 140 atomic_set(&inode->i_writecount, 0);
141 inode->i_size = 0; 141 inode->i_size = 0;
142 inode->i_blocks = 0; 142 inode->i_blocks = 0;
@@ -486,7 +486,7 @@ void __remove_inode_hash(struct inode *inode)
486} 486}
487EXPORT_SYMBOL(__remove_inode_hash); 487EXPORT_SYMBOL(__remove_inode_hash);
488 488
489void end_writeback(struct inode *inode) 489void clear_inode(struct inode *inode)
490{ 490{
491 might_sleep(); 491 might_sleep();
492 /* 492 /*
@@ -500,11 +500,10 @@ void end_writeback(struct inode *inode)
500 BUG_ON(!list_empty(&inode->i_data.private_list)); 500 BUG_ON(!list_empty(&inode->i_data.private_list));
501 BUG_ON(!(inode->i_state & I_FREEING)); 501 BUG_ON(!(inode->i_state & I_FREEING));
502 BUG_ON(inode->i_state & I_CLEAR); 502 BUG_ON(inode->i_state & I_CLEAR);
503 inode_sync_wait(inode);
504 /* don't need i_lock here, no concurrent mods to i_state */ 503 /* don't need i_lock here, no concurrent mods to i_state */
505 inode->i_state = I_FREEING | I_CLEAR; 504 inode->i_state = I_FREEING | I_CLEAR;
506} 505}
507EXPORT_SYMBOL(end_writeback); 506EXPORT_SYMBOL(clear_inode);
508 507
509/* 508/*
510 * Free the inode passed in, removing it from the lists it is still connected 509 * Free the inode passed in, removing it from the lists it is still connected
@@ -531,12 +530,20 @@ static void evict(struct inode *inode)
531 530
532 inode_sb_list_del(inode); 531 inode_sb_list_del(inode);
533 532
533 /*
534 * Wait for flusher thread to be done with the inode so that filesystem
535 * does not start destroying it while writeback is still running. Since
536 * the inode has I_FREEING set, flusher thread won't start new work on
537 * the inode. We just have to wait for running writeback to finish.
538 */
539 inode_wait_for_writeback(inode);
540
534 if (op->evict_inode) { 541 if (op->evict_inode) {
535 op->evict_inode(inode); 542 op->evict_inode(inode);
536 } else { 543 } else {
537 if (inode->i_data.nrpages) 544 if (inode->i_data.nrpages)
538 truncate_inode_pages(&inode->i_data, 0); 545 truncate_inode_pages(&inode->i_data, 0);
539 end_writeback(inode); 546 clear_inode(inode);
540 } 547 }
541 if (S_ISBLK(inode->i_mode) && inode->i_bdev) 548 if (S_ISBLK(inode->i_mode) && inode->i_bdev)
542 bd_forget(inode); 549 bd_forget(inode);
@@ -1480,10 +1487,30 @@ static int relatime_need_update(struct vfsmount *mnt, struct inode *inode,
1480 return 0; 1487 return 0;
1481} 1488}
1482 1489
1490/*
1491 * This does the actual work of updating an inodes time or version. Must have
1492 * had called mnt_want_write() before calling this.
1493 */
1494static int update_time(struct inode *inode, struct timespec *time, int flags)
1495{
1496 if (inode->i_op->update_time)
1497 return inode->i_op->update_time(inode, time, flags);
1498
1499 if (flags & S_ATIME)
1500 inode->i_atime = *time;
1501 if (flags & S_VERSION)
1502 inode_inc_iversion(inode);
1503 if (flags & S_CTIME)
1504 inode->i_ctime = *time;
1505 if (flags & S_MTIME)
1506 inode->i_mtime = *time;
1507 mark_inode_dirty_sync(inode);
1508 return 0;
1509}
1510
1483/** 1511/**
1484 * touch_atime - update the access time 1512 * touch_atime - update the access time
1485 * @mnt: mount the inode is accessed on 1513 * @path: the &struct path to update
1486 * @dentry: dentry accessed
1487 * 1514 *
1488 * Update the accessed time on an inode and mark it for writeback. 1515 * Update the accessed time on an inode and mark it for writeback.
1489 * This function automatically handles read only file systems and media, 1516 * This function automatically handles read only file systems and media,
@@ -1518,12 +1545,83 @@ void touch_atime(struct path *path)
1518 if (mnt_want_write(mnt)) 1545 if (mnt_want_write(mnt))
1519 return; 1546 return;
1520 1547
1521 inode->i_atime = now; 1548 /*
1522 mark_inode_dirty_sync(inode); 1549 * File systems can error out when updating inodes if they need to
1550 * allocate new space to modify an inode (such is the case for
1551 * Btrfs), but since we touch atime while walking down the path we
1552 * really don't care if we failed to update the atime of the file,
1553 * so just ignore the return value.
1554 */
1555 update_time(inode, &now, S_ATIME);
1523 mnt_drop_write(mnt); 1556 mnt_drop_write(mnt);
1524} 1557}
1525EXPORT_SYMBOL(touch_atime); 1558EXPORT_SYMBOL(touch_atime);
1526 1559
1560/*
1561 * The logic we want is
1562 *
1563 * if suid or (sgid and xgrp)
1564 * remove privs
1565 */
1566int should_remove_suid(struct dentry *dentry)
1567{
1568 umode_t mode = dentry->d_inode->i_mode;
1569 int kill = 0;
1570
1571 /* suid always must be killed */
1572 if (unlikely(mode & S_ISUID))
1573 kill = ATTR_KILL_SUID;
1574
1575 /*
1576 * sgid without any exec bits is just a mandatory locking mark; leave
1577 * it alone. If some exec bits are set, it's a real sgid; kill it.
1578 */
1579 if (unlikely((mode & S_ISGID) && (mode & S_IXGRP)))
1580 kill |= ATTR_KILL_SGID;
1581
1582 if (unlikely(kill && !capable(CAP_FSETID) && S_ISREG(mode)))
1583 return kill;
1584
1585 return 0;
1586}
1587EXPORT_SYMBOL(should_remove_suid);
1588
1589static int __remove_suid(struct dentry *dentry, int kill)
1590{
1591 struct iattr newattrs;
1592
1593 newattrs.ia_valid = ATTR_FORCE | kill;
1594 return notify_change(dentry, &newattrs);
1595}
1596
1597int file_remove_suid(struct file *file)
1598{
1599 struct dentry *dentry = file->f_path.dentry;
1600 struct inode *inode = dentry->d_inode;
1601 int killsuid;
1602 int killpriv;
1603 int error = 0;
1604
1605 /* Fast path for nothing security related */
1606 if (IS_NOSEC(inode))
1607 return 0;
1608
1609 killsuid = should_remove_suid(dentry);
1610 killpriv = security_inode_need_killpriv(dentry);
1611
1612 if (killpriv < 0)
1613 return killpriv;
1614 if (killpriv)
1615 error = security_inode_killpriv(dentry);
1616 if (!error && killsuid)
1617 error = __remove_suid(dentry, killsuid);
1618 if (!error && (inode->i_sb->s_flags & MS_NOSEC))
1619 inode->i_flags |= S_NOSEC;
1620
1621 return error;
1622}
1623EXPORT_SYMBOL(file_remove_suid);
1624
1527/** 1625/**
1528 * file_update_time - update mtime and ctime time 1626 * file_update_time - update mtime and ctime time
1529 * @file: file accessed 1627 * @file: file accessed
@@ -1533,18 +1631,20 @@ EXPORT_SYMBOL(touch_atime);
1533 * usage in the file write path of filesystems, and filesystems may 1631 * usage in the file write path of filesystems, and filesystems may
1534 * choose to explicitly ignore update via this function with the 1632 * choose to explicitly ignore update via this function with the
1535 * S_NOCMTIME inode flag, e.g. for network filesystem where these 1633 * S_NOCMTIME inode flag, e.g. for network filesystem where these
1536 * timestamps are handled by the server. 1634 * timestamps are handled by the server. This can return an error for
1635 * file systems who need to allocate space in order to update an inode.
1537 */ 1636 */
1538 1637
1539void file_update_time(struct file *file) 1638int file_update_time(struct file *file)
1540{ 1639{
1541 struct inode *inode = file->f_path.dentry->d_inode; 1640 struct inode *inode = file->f_path.dentry->d_inode;
1542 struct timespec now; 1641 struct timespec now;
1543 enum { S_MTIME = 1, S_CTIME = 2, S_VERSION = 4 } sync_it = 0; 1642 int sync_it = 0;
1643 int ret;
1544 1644
1545 /* First try to exhaust all avenues to not sync */ 1645 /* First try to exhaust all avenues to not sync */
1546 if (IS_NOCMTIME(inode)) 1646 if (IS_NOCMTIME(inode))
1547 return; 1647 return 0;
1548 1648
1549 now = current_fs_time(inode->i_sb); 1649 now = current_fs_time(inode->i_sb);
1550 if (!timespec_equal(&inode->i_mtime, &now)) 1650 if (!timespec_equal(&inode->i_mtime, &now))
@@ -1557,21 +1657,16 @@ void file_update_time(struct file *file)
1557 sync_it |= S_VERSION; 1657 sync_it |= S_VERSION;
1558 1658
1559 if (!sync_it) 1659 if (!sync_it)
1560 return; 1660 return 0;
1561 1661
1562 /* Finally allowed to write? Takes lock. */ 1662 /* Finally allowed to write? Takes lock. */
1563 if (mnt_want_write_file(file)) 1663 if (mnt_want_write_file(file))
1564 return; 1664 return 0;
1565 1665
1566 /* Only change inode inside the lock region */ 1666 ret = update_time(inode, &now, sync_it);
1567 if (sync_it & S_VERSION)
1568 inode_inc_iversion(inode);
1569 if (sync_it & S_CTIME)
1570 inode->i_ctime = now;
1571 if (sync_it & S_MTIME)
1572 inode->i_mtime = now;
1573 mark_inode_dirty_sync(inode);
1574 mnt_drop_write_file(file); 1667 mnt_drop_write_file(file);
1668
1669 return ret;
1575} 1670}
1576EXPORT_SYMBOL(file_update_time); 1671EXPORT_SYMBOL(file_update_time);
1577 1672
@@ -1647,6 +1742,7 @@ void __init inode_init_early(void)
1647 HASH_EARLY, 1742 HASH_EARLY,
1648 &i_hash_shift, 1743 &i_hash_shift,
1649 &i_hash_mask, 1744 &i_hash_mask,
1745 0,
1650 0); 1746 0);
1651 1747
1652 for (loop = 0; loop < (1U << i_hash_shift); loop++) 1748 for (loop = 0; loop < (1U << i_hash_shift); loop++)
@@ -1677,6 +1773,7 @@ void __init inode_init(void)
1677 0, 1773 0,
1678 &i_hash_shift, 1774 &i_hash_shift,
1679 &i_hash_mask, 1775 &i_hash_mask,
1776 0,
1680 0); 1777 0);
1681 1778
1682 for (loop = 0; loop < (1U << i_hash_shift); loop++) 1779 for (loop = 0; loop < (1U << i_hash_shift); loop++)
@@ -1732,12 +1829,57 @@ EXPORT_SYMBOL(inode_init_owner);
1732 */ 1829 */
1733bool inode_owner_or_capable(const struct inode *inode) 1830bool inode_owner_or_capable(const struct inode *inode)
1734{ 1831{
1735 struct user_namespace *ns = inode_userns(inode); 1832 if (uid_eq(current_fsuid(), inode->i_uid))
1736
1737 if (current_user_ns() == ns && current_fsuid() == inode->i_uid)
1738 return true; 1833 return true;
1739 if (ns_capable(ns, CAP_FOWNER)) 1834 if (inode_capable(inode, CAP_FOWNER))
1740 return true; 1835 return true;
1741 return false; 1836 return false;
1742} 1837}
1743EXPORT_SYMBOL(inode_owner_or_capable); 1838EXPORT_SYMBOL(inode_owner_or_capable);
1839
1840/*
1841 * Direct i/o helper functions
1842 */
1843static void __inode_dio_wait(struct inode *inode)
1844{
1845 wait_queue_head_t *wq = bit_waitqueue(&inode->i_state, __I_DIO_WAKEUP);
1846 DEFINE_WAIT_BIT(q, &inode->i_state, __I_DIO_WAKEUP);
1847
1848 do {
1849 prepare_to_wait(wq, &q.wait, TASK_UNINTERRUPTIBLE);
1850 if (atomic_read(&inode->i_dio_count))
1851 schedule();
1852 } while (atomic_read(&inode->i_dio_count));
1853 finish_wait(wq, &q.wait);
1854}
1855
1856/**
1857 * inode_dio_wait - wait for outstanding DIO requests to finish
1858 * @inode: inode to wait for
1859 *
1860 * Waits for all pending direct I/O requests to finish so that we can
1861 * proceed with a truncate or equivalent operation.
1862 *
1863 * Must be called under a lock that serializes taking new references
1864 * to i_dio_count, usually by inode->i_mutex.
1865 */
1866void inode_dio_wait(struct inode *inode)
1867{
1868 if (atomic_read(&inode->i_dio_count))
1869 __inode_dio_wait(inode);
1870}
1871EXPORT_SYMBOL(inode_dio_wait);
1872
1873/*
1874 * inode_dio_done - signal finish of a direct I/O requests
1875 * @inode: inode the direct I/O happens on
1876 *
1877 * This is called once we've finished processing a direct I/O request,
1878 * and is used to wake up callers waiting for direct I/O to be quiesced.
1879 */
1880void inode_dio_done(struct inode *inode)
1881{
1882 if (atomic_dec_and_test(&inode->i_dio_count))
1883 wake_up_bit(&inode->i_state, __I_DIO_WAKEUP);
1884}
1885EXPORT_SYMBOL(inode_dio_done);