aboutsummaryrefslogtreecommitdiffstats
path: root/fs
diff options
context:
space:
mode:
Diffstat (limited to 'fs')
-rw-r--r--fs/block_dev.c166
-rw-r--r--fs/buffer.c145
-rw-r--r--fs/cifs/file.c6
-rw-r--r--fs/cifs/readdir.c5
-rw-r--r--fs/cifs/smb1ops.c3
-rw-r--r--fs/direct-io.c8
-rw-r--r--fs/file.c14
-rw-r--r--fs/fs-writeback.c2
-rw-r--r--fs/inode.c16
-rw-r--r--fs/internal.h1
-rw-r--r--fs/jbd/transaction.c2
-rw-r--r--fs/namei.c5
-rw-r--r--fs/nfs/dir.c7
-rw-r--r--fs/proc/base.c5
14 files changed, 181 insertions, 204 deletions
diff --git a/fs/block_dev.c b/fs/block_dev.c
index 1a1e5e3b1eaf..ab3a456f6650 100644
--- a/fs/block_dev.c
+++ b/fs/block_dev.c
@@ -70,19 +70,6 @@ static void bdev_inode_switch_bdi(struct inode *inode,
70 spin_unlock(&dst->wb.list_lock); 70 spin_unlock(&dst->wb.list_lock);
71} 71}
72 72
73sector_t blkdev_max_block(struct block_device *bdev)
74{
75 sector_t retval = ~((sector_t)0);
76 loff_t sz = i_size_read(bdev->bd_inode);
77
78 if (sz) {
79 unsigned int size = block_size(bdev);
80 unsigned int sizebits = blksize_bits(size);
81 retval = (sz >> sizebits);
82 }
83 return retval;
84}
85
86/* Kill _all_ buffers and pagecache , dirty or not.. */ 73/* Kill _all_ buffers and pagecache , dirty or not.. */
87void kill_bdev(struct block_device *bdev) 74void kill_bdev(struct block_device *bdev)
88{ 75{
@@ -116,8 +103,6 @@ EXPORT_SYMBOL(invalidate_bdev);
116 103
117int set_blocksize(struct block_device *bdev, int size) 104int set_blocksize(struct block_device *bdev, int size)
118{ 105{
119 struct address_space *mapping;
120
121 /* Size must be a power of two, and between 512 and PAGE_SIZE */ 106 /* Size must be a power of two, and between 512 and PAGE_SIZE */
122 if (size > PAGE_SIZE || size < 512 || !is_power_of_2(size)) 107 if (size > PAGE_SIZE || size < 512 || !is_power_of_2(size))
123 return -EINVAL; 108 return -EINVAL;
@@ -126,19 +111,6 @@ int set_blocksize(struct block_device *bdev, int size)
126 if (size < bdev_logical_block_size(bdev)) 111 if (size < bdev_logical_block_size(bdev))
127 return -EINVAL; 112 return -EINVAL;
128 113
129 /* Prevent starting I/O or mapping the device */
130 percpu_down_write(&bdev->bd_block_size_semaphore);
131
132 /* Check that the block device is not memory mapped */
133 mapping = bdev->bd_inode->i_mapping;
134 mutex_lock(&mapping->i_mmap_mutex);
135 if (mapping_mapped(mapping)) {
136 mutex_unlock(&mapping->i_mmap_mutex);
137 percpu_up_write(&bdev->bd_block_size_semaphore);
138 return -EBUSY;
139 }
140 mutex_unlock(&mapping->i_mmap_mutex);
141
142 /* Don't change the size if it is same as current */ 114 /* Don't change the size if it is same as current */
143 if (bdev->bd_block_size != size) { 115 if (bdev->bd_block_size != size) {
144 sync_blockdev(bdev); 116 sync_blockdev(bdev);
@@ -146,9 +118,6 @@ int set_blocksize(struct block_device *bdev, int size)
146 bdev->bd_inode->i_blkbits = blksize_bits(size); 118 bdev->bd_inode->i_blkbits = blksize_bits(size);
147 kill_bdev(bdev); 119 kill_bdev(bdev);
148 } 120 }
149
150 percpu_up_write(&bdev->bd_block_size_semaphore);
151
152 return 0; 121 return 0;
153} 122}
154 123
@@ -181,52 +150,12 @@ static int
181blkdev_get_block(struct inode *inode, sector_t iblock, 150blkdev_get_block(struct inode *inode, sector_t iblock,
182 struct buffer_head *bh, int create) 151 struct buffer_head *bh, int create)
183{ 152{
184 if (iblock >= blkdev_max_block(I_BDEV(inode))) {
185 if (create)
186 return -EIO;
187
188 /*
189 * for reads, we're just trying to fill a partial page.
190 * return a hole, they will have to call get_block again
191 * before they can fill it, and they will get -EIO at that
192 * time
193 */
194 return 0;
195 }
196 bh->b_bdev = I_BDEV(inode); 153 bh->b_bdev = I_BDEV(inode);
197 bh->b_blocknr = iblock; 154 bh->b_blocknr = iblock;
198 set_buffer_mapped(bh); 155 set_buffer_mapped(bh);
199 return 0; 156 return 0;
200} 157}
201 158
202static int
203blkdev_get_blocks(struct inode *inode, sector_t iblock,
204 struct buffer_head *bh, int create)
205{
206 sector_t end_block = blkdev_max_block(I_BDEV(inode));
207 unsigned long max_blocks = bh->b_size >> inode->i_blkbits;
208
209 if ((iblock + max_blocks) > end_block) {
210 max_blocks = end_block - iblock;
211 if ((long)max_blocks <= 0) {
212 if (create)
213 return -EIO; /* write fully beyond EOF */
214 /*
215 * It is a read which is fully beyond EOF. We return
216 * a !buffer_mapped buffer
217 */
218 max_blocks = 0;
219 }
220 }
221
222 bh->b_bdev = I_BDEV(inode);
223 bh->b_blocknr = iblock;
224 bh->b_size = max_blocks << inode->i_blkbits;
225 if (max_blocks)
226 set_buffer_mapped(bh);
227 return 0;
228}
229
230static ssize_t 159static ssize_t
231blkdev_direct_IO(int rw, struct kiocb *iocb, const struct iovec *iov, 160blkdev_direct_IO(int rw, struct kiocb *iocb, const struct iovec *iov,
232 loff_t offset, unsigned long nr_segs) 161 loff_t offset, unsigned long nr_segs)
@@ -235,7 +164,7 @@ blkdev_direct_IO(int rw, struct kiocb *iocb, const struct iovec *iov,
235 struct inode *inode = file->f_mapping->host; 164 struct inode *inode = file->f_mapping->host;
236 165
237 return __blockdev_direct_IO(rw, iocb, inode, I_BDEV(inode), iov, offset, 166 return __blockdev_direct_IO(rw, iocb, inode, I_BDEV(inode), iov, offset,
238 nr_segs, blkdev_get_blocks, NULL, NULL, 0); 167 nr_segs, blkdev_get_block, NULL, NULL, 0);
239} 168}
240 169
241int __sync_blockdev(struct block_device *bdev, int wait) 170int __sync_blockdev(struct block_device *bdev, int wait)
@@ -459,12 +388,6 @@ static struct inode *bdev_alloc_inode(struct super_block *sb)
459 struct bdev_inode *ei = kmem_cache_alloc(bdev_cachep, GFP_KERNEL); 388 struct bdev_inode *ei = kmem_cache_alloc(bdev_cachep, GFP_KERNEL);
460 if (!ei) 389 if (!ei)
461 return NULL; 390 return NULL;
462
463 if (unlikely(percpu_init_rwsem(&ei->bdev.bd_block_size_semaphore))) {
464 kmem_cache_free(bdev_cachep, ei);
465 return NULL;
466 }
467
468 return &ei->vfs_inode; 391 return &ei->vfs_inode;
469} 392}
470 393
@@ -473,8 +396,6 @@ static void bdev_i_callback(struct rcu_head *head)
473 struct inode *inode = container_of(head, struct inode, i_rcu); 396 struct inode *inode = container_of(head, struct inode, i_rcu);
474 struct bdev_inode *bdi = BDEV_I(inode); 397 struct bdev_inode *bdi = BDEV_I(inode);
475 398
476 percpu_free_rwsem(&bdi->bdev.bd_block_size_semaphore);
477
478 kmem_cache_free(bdev_cachep, bdi); 399 kmem_cache_free(bdev_cachep, bdi);
479} 400}
480 401
@@ -1593,22 +1514,6 @@ static long block_ioctl(struct file *file, unsigned cmd, unsigned long arg)
1593 return blkdev_ioctl(bdev, mode, cmd, arg); 1514 return blkdev_ioctl(bdev, mode, cmd, arg);
1594} 1515}
1595 1516
1596ssize_t blkdev_aio_read(struct kiocb *iocb, const struct iovec *iov,
1597 unsigned long nr_segs, loff_t pos)
1598{
1599 ssize_t ret;
1600 struct block_device *bdev = I_BDEV(iocb->ki_filp->f_mapping->host);
1601
1602 percpu_down_read(&bdev->bd_block_size_semaphore);
1603
1604 ret = generic_file_aio_read(iocb, iov, nr_segs, pos);
1605
1606 percpu_up_read(&bdev->bd_block_size_semaphore);
1607
1608 return ret;
1609}
1610EXPORT_SYMBOL_GPL(blkdev_aio_read);
1611
1612/* 1517/*
1613 * Write data to the block device. Only intended for the block device itself 1518 * Write data to the block device. Only intended for the block device itself
1614 * and the raw driver which basically is a fake block device. 1519 * and the raw driver which basically is a fake block device.
@@ -1620,16 +1525,12 @@ ssize_t blkdev_aio_write(struct kiocb *iocb, const struct iovec *iov,
1620 unsigned long nr_segs, loff_t pos) 1525 unsigned long nr_segs, loff_t pos)
1621{ 1526{
1622 struct file *file = iocb->ki_filp; 1527 struct file *file = iocb->ki_filp;
1623 struct block_device *bdev = I_BDEV(file->f_mapping->host);
1624 struct blk_plug plug; 1528 struct blk_plug plug;
1625 ssize_t ret; 1529 ssize_t ret;
1626 1530
1627 BUG_ON(iocb->ki_pos != pos); 1531 BUG_ON(iocb->ki_pos != pos);
1628 1532
1629 blk_start_plug(&plug); 1533 blk_start_plug(&plug);
1630
1631 percpu_down_read(&bdev->bd_block_size_semaphore);
1632
1633 ret = __generic_file_aio_write(iocb, iov, nr_segs, &iocb->ki_pos); 1534 ret = __generic_file_aio_write(iocb, iov, nr_segs, &iocb->ki_pos);
1634 if (ret > 0 || ret == -EIOCBQUEUED) { 1535 if (ret > 0 || ret == -EIOCBQUEUED) {
1635 ssize_t err; 1536 ssize_t err;
@@ -1638,62 +1539,27 @@ ssize_t blkdev_aio_write(struct kiocb *iocb, const struct iovec *iov,
1638 if (err < 0 && ret > 0) 1539 if (err < 0 && ret > 0)
1639 ret = err; 1540 ret = err;
1640 } 1541 }
1641
1642 percpu_up_read(&bdev->bd_block_size_semaphore);
1643
1644 blk_finish_plug(&plug); 1542 blk_finish_plug(&plug);
1645
1646 return ret; 1543 return ret;
1647} 1544}
1648EXPORT_SYMBOL_GPL(blkdev_aio_write); 1545EXPORT_SYMBOL_GPL(blkdev_aio_write);
1649 1546
1650static int blkdev_mmap(struct file *file, struct vm_area_struct *vma) 1547static ssize_t blkdev_aio_read(struct kiocb *iocb, const struct iovec *iov,
1651{ 1548 unsigned long nr_segs, loff_t pos)
1652 int ret;
1653 struct block_device *bdev = I_BDEV(file->f_mapping->host);
1654
1655 percpu_down_read(&bdev->bd_block_size_semaphore);
1656
1657 ret = generic_file_mmap(file, vma);
1658
1659 percpu_up_read(&bdev->bd_block_size_semaphore);
1660
1661 return ret;
1662}
1663
1664static ssize_t blkdev_splice_read(struct file *file, loff_t *ppos,
1665 struct pipe_inode_info *pipe, size_t len,
1666 unsigned int flags)
1667{
1668 ssize_t ret;
1669 struct block_device *bdev = I_BDEV(file->f_mapping->host);
1670
1671 percpu_down_read(&bdev->bd_block_size_semaphore);
1672
1673 ret = generic_file_splice_read(file, ppos, pipe, len, flags);
1674
1675 percpu_up_read(&bdev->bd_block_size_semaphore);
1676
1677 return ret;
1678}
1679
1680static ssize_t blkdev_splice_write(struct pipe_inode_info *pipe,
1681 struct file *file, loff_t *ppos, size_t len,
1682 unsigned int flags)
1683{ 1549{
1684 ssize_t ret; 1550 struct file *file = iocb->ki_filp;
1685 struct block_device *bdev = I_BDEV(file->f_mapping->host); 1551 struct inode *bd_inode = file->f_mapping->host;
1686 1552 loff_t size = i_size_read(bd_inode);
1687 percpu_down_read(&bdev->bd_block_size_semaphore);
1688
1689 ret = generic_file_splice_write(pipe, file, ppos, len, flags);
1690 1553
1691 percpu_up_read(&bdev->bd_block_size_semaphore); 1554 if (pos >= size)
1555 return 0;
1692 1556
1693 return ret; 1557 size -= pos;
1558 if (size < INT_MAX)
1559 nr_segs = iov_shorten((struct iovec *)iov, nr_segs, size);
1560 return generic_file_aio_read(iocb, iov, nr_segs, pos);
1694} 1561}
1695 1562
1696
1697/* 1563/*
1698 * Try to release a page associated with block device when the system 1564 * Try to release a page associated with block device when the system
1699 * is under memory pressure. 1565 * is under memory pressure.
@@ -1724,16 +1590,16 @@ const struct file_operations def_blk_fops = {
1724 .llseek = block_llseek, 1590 .llseek = block_llseek,
1725 .read = do_sync_read, 1591 .read = do_sync_read,
1726 .write = do_sync_write, 1592 .write = do_sync_write,
1727 .aio_read = blkdev_aio_read, 1593 .aio_read = blkdev_aio_read,
1728 .aio_write = blkdev_aio_write, 1594 .aio_write = blkdev_aio_write,
1729 .mmap = blkdev_mmap, 1595 .mmap = generic_file_mmap,
1730 .fsync = blkdev_fsync, 1596 .fsync = blkdev_fsync,
1731 .unlocked_ioctl = block_ioctl, 1597 .unlocked_ioctl = block_ioctl,
1732#ifdef CONFIG_COMPAT 1598#ifdef CONFIG_COMPAT
1733 .compat_ioctl = compat_blkdev_ioctl, 1599 .compat_ioctl = compat_blkdev_ioctl,
1734#endif 1600#endif
1735 .splice_read = blkdev_splice_read, 1601 .splice_read = generic_file_splice_read,
1736 .splice_write = blkdev_splice_write, 1602 .splice_write = generic_file_splice_write,
1737}; 1603};
1738 1604
1739int ioctl_by_bdev(struct block_device *bdev, unsigned cmd, unsigned long arg) 1605int ioctl_by_bdev(struct block_device *bdev, unsigned cmd, unsigned long arg)
diff --git a/fs/buffer.c b/fs/buffer.c
index b5f044283edb..ec0aca8ba6bf 100644
--- a/fs/buffer.c
+++ b/fs/buffer.c
@@ -911,6 +911,18 @@ link_dev_buffers(struct page *page, struct buffer_head *head)
911 attach_page_buffers(page, head); 911 attach_page_buffers(page, head);
912} 912}
913 913
914static sector_t blkdev_max_block(struct block_device *bdev, unsigned int size)
915{
916 sector_t retval = ~((sector_t)0);
917 loff_t sz = i_size_read(bdev->bd_inode);
918
919 if (sz) {
920 unsigned int sizebits = blksize_bits(size);
921 retval = (sz >> sizebits);
922 }
923 return retval;
924}
925
914/* 926/*
915 * Initialise the state of a blockdev page's buffers. 927 * Initialise the state of a blockdev page's buffers.
916 */ 928 */
@@ -921,7 +933,7 @@ init_page_buffers(struct page *page, struct block_device *bdev,
921 struct buffer_head *head = page_buffers(page); 933 struct buffer_head *head = page_buffers(page);
922 struct buffer_head *bh = head; 934 struct buffer_head *bh = head;
923 int uptodate = PageUptodate(page); 935 int uptodate = PageUptodate(page);
924 sector_t end_block = blkdev_max_block(I_BDEV(bdev->bd_inode)); 936 sector_t end_block = blkdev_max_block(I_BDEV(bdev->bd_inode), size);
925 937
926 do { 938 do {
927 if (!buffer_mapped(bh)) { 939 if (!buffer_mapped(bh)) {
@@ -1553,6 +1565,28 @@ void unmap_underlying_metadata(struct block_device *bdev, sector_t block)
1553EXPORT_SYMBOL(unmap_underlying_metadata); 1565EXPORT_SYMBOL(unmap_underlying_metadata);
1554 1566
1555/* 1567/*
1568 * Size is a power-of-two in the range 512..PAGE_SIZE,
1569 * and the case we care about most is PAGE_SIZE.
1570 *
1571 * So this *could* possibly be written with those
1572 * constraints in mind (relevant mostly if some
1573 * architecture has a slow bit-scan instruction)
1574 */
1575static inline int block_size_bits(unsigned int blocksize)
1576{
1577 return ilog2(blocksize);
1578}
1579
1580static struct buffer_head *create_page_buffers(struct page *page, struct inode *inode, unsigned int b_state)
1581{
1582 BUG_ON(!PageLocked(page));
1583
1584 if (!page_has_buffers(page))
1585 create_empty_buffers(page, 1 << ACCESS_ONCE(inode->i_blkbits), b_state);
1586 return page_buffers(page);
1587}
1588
1589/*
1556 * NOTE! All mapped/uptodate combinations are valid: 1590 * NOTE! All mapped/uptodate combinations are valid:
1557 * 1591 *
1558 * Mapped Uptodate Meaning 1592 * Mapped Uptodate Meaning
@@ -1589,19 +1623,13 @@ static int __block_write_full_page(struct inode *inode, struct page *page,
1589 sector_t block; 1623 sector_t block;
1590 sector_t last_block; 1624 sector_t last_block;
1591 struct buffer_head *bh, *head; 1625 struct buffer_head *bh, *head;
1592 const unsigned blocksize = 1 << inode->i_blkbits; 1626 unsigned int blocksize, bbits;
1593 int nr_underway = 0; 1627 int nr_underway = 0;
1594 int write_op = (wbc->sync_mode == WB_SYNC_ALL ? 1628 int write_op = (wbc->sync_mode == WB_SYNC_ALL ?
1595 WRITE_SYNC : WRITE); 1629 WRITE_SYNC : WRITE);
1596 1630
1597 BUG_ON(!PageLocked(page)); 1631 head = create_page_buffers(page, inode,
1598
1599 last_block = (i_size_read(inode) - 1) >> inode->i_blkbits;
1600
1601 if (!page_has_buffers(page)) {
1602 create_empty_buffers(page, blocksize,
1603 (1 << BH_Dirty)|(1 << BH_Uptodate)); 1632 (1 << BH_Dirty)|(1 << BH_Uptodate));
1604 }
1605 1633
1606 /* 1634 /*
1607 * Be very careful. We have no exclusion from __set_page_dirty_buffers 1635 * Be very careful. We have no exclusion from __set_page_dirty_buffers
@@ -1613,9 +1641,12 @@ static int __block_write_full_page(struct inode *inode, struct page *page,
1613 * handle that here by just cleaning them. 1641 * handle that here by just cleaning them.
1614 */ 1642 */
1615 1643
1616 block = (sector_t)page->index << (PAGE_CACHE_SHIFT - inode->i_blkbits);
1617 head = page_buffers(page);
1618 bh = head; 1644 bh = head;
1645 blocksize = bh->b_size;
1646 bbits = block_size_bits(blocksize);
1647
1648 block = (sector_t)page->index << (PAGE_CACHE_SHIFT - bbits);
1649 last_block = (i_size_read(inode) - 1) >> bbits;
1619 1650
1620 /* 1651 /*
1621 * Get all the dirty buffers mapped to disk addresses and 1652 * Get all the dirty buffers mapped to disk addresses and
@@ -1806,12 +1837,10 @@ int __block_write_begin(struct page *page, loff_t pos, unsigned len,
1806 BUG_ON(to > PAGE_CACHE_SIZE); 1837 BUG_ON(to > PAGE_CACHE_SIZE);
1807 BUG_ON(from > to); 1838 BUG_ON(from > to);
1808 1839
1809 blocksize = 1 << inode->i_blkbits; 1840 head = create_page_buffers(page, inode, 0);
1810 if (!page_has_buffers(page)) 1841 blocksize = head->b_size;
1811 create_empty_buffers(page, blocksize, 0); 1842 bbits = block_size_bits(blocksize);
1812 head = page_buffers(page);
1813 1843
1814 bbits = inode->i_blkbits;
1815 block = (sector_t)page->index << (PAGE_CACHE_SHIFT - bbits); 1844 block = (sector_t)page->index << (PAGE_CACHE_SHIFT - bbits);
1816 1845
1817 for(bh = head, block_start = 0; bh != head || !block_start; 1846 for(bh = head, block_start = 0; bh != head || !block_start;
@@ -1881,11 +1910,11 @@ static int __block_commit_write(struct inode *inode, struct page *page,
1881 unsigned blocksize; 1910 unsigned blocksize;
1882 struct buffer_head *bh, *head; 1911 struct buffer_head *bh, *head;
1883 1912
1884 blocksize = 1 << inode->i_blkbits; 1913 bh = head = page_buffers(page);
1914 blocksize = bh->b_size;
1885 1915
1886 for(bh = head = page_buffers(page), block_start = 0; 1916 block_start = 0;
1887 bh != head || !block_start; 1917 do {
1888 block_start=block_end, bh = bh->b_this_page) {
1889 block_end = block_start + blocksize; 1918 block_end = block_start + blocksize;
1890 if (block_end <= from || block_start >= to) { 1919 if (block_end <= from || block_start >= to) {
1891 if (!buffer_uptodate(bh)) 1920 if (!buffer_uptodate(bh))
@@ -1895,7 +1924,10 @@ static int __block_commit_write(struct inode *inode, struct page *page,
1895 mark_buffer_dirty(bh); 1924 mark_buffer_dirty(bh);
1896 } 1925 }
1897 clear_buffer_new(bh); 1926 clear_buffer_new(bh);
1898 } 1927
1928 block_start = block_end;
1929 bh = bh->b_this_page;
1930 } while (bh != head);
1899 1931
1900 /* 1932 /*
1901 * If this is a partial write which happened to make all buffers 1933 * If this is a partial write which happened to make all buffers
@@ -2020,7 +2052,6 @@ EXPORT_SYMBOL(generic_write_end);
2020int block_is_partially_uptodate(struct page *page, read_descriptor_t *desc, 2052int block_is_partially_uptodate(struct page *page, read_descriptor_t *desc,
2021 unsigned long from) 2053 unsigned long from)
2022{ 2054{
2023 struct inode *inode = page->mapping->host;
2024 unsigned block_start, block_end, blocksize; 2055 unsigned block_start, block_end, blocksize;
2025 unsigned to; 2056 unsigned to;
2026 struct buffer_head *bh, *head; 2057 struct buffer_head *bh, *head;
@@ -2029,13 +2060,13 @@ int block_is_partially_uptodate(struct page *page, read_descriptor_t *desc,
2029 if (!page_has_buffers(page)) 2060 if (!page_has_buffers(page))
2030 return 0; 2061 return 0;
2031 2062
2032 blocksize = 1 << inode->i_blkbits; 2063 head = page_buffers(page);
2064 blocksize = head->b_size;
2033 to = min_t(unsigned, PAGE_CACHE_SIZE - from, desc->count); 2065 to = min_t(unsigned, PAGE_CACHE_SIZE - from, desc->count);
2034 to = from + to; 2066 to = from + to;
2035 if (from < blocksize && to > PAGE_CACHE_SIZE - blocksize) 2067 if (from < blocksize && to > PAGE_CACHE_SIZE - blocksize)
2036 return 0; 2068 return 0;
2037 2069
2038 head = page_buffers(page);
2039 bh = head; 2070 bh = head;
2040 block_start = 0; 2071 block_start = 0;
2041 do { 2072 do {
@@ -2068,18 +2099,16 @@ int block_read_full_page(struct page *page, get_block_t *get_block)
2068 struct inode *inode = page->mapping->host; 2099 struct inode *inode = page->mapping->host;
2069 sector_t iblock, lblock; 2100 sector_t iblock, lblock;
2070 struct buffer_head *bh, *head, *arr[MAX_BUF_PER_PAGE]; 2101 struct buffer_head *bh, *head, *arr[MAX_BUF_PER_PAGE];
2071 unsigned int blocksize; 2102 unsigned int blocksize, bbits;
2072 int nr, i; 2103 int nr, i;
2073 int fully_mapped = 1; 2104 int fully_mapped = 1;
2074 2105
2075 BUG_ON(!PageLocked(page)); 2106 head = create_page_buffers(page, inode, 0);
2076 blocksize = 1 << inode->i_blkbits; 2107 blocksize = head->b_size;
2077 if (!page_has_buffers(page)) 2108 bbits = block_size_bits(blocksize);
2078 create_empty_buffers(page, blocksize, 0);
2079 head = page_buffers(page);
2080 2109
2081 iblock = (sector_t)page->index << (PAGE_CACHE_SHIFT - inode->i_blkbits); 2110 iblock = (sector_t)page->index << (PAGE_CACHE_SHIFT - bbits);
2082 lblock = (i_size_read(inode)+blocksize-1) >> inode->i_blkbits; 2111 lblock = (i_size_read(inode)+blocksize-1) >> bbits;
2083 bh = head; 2112 bh = head;
2084 nr = 0; 2113 nr = 0;
2085 i = 0; 2114 i = 0;
@@ -2864,6 +2893,55 @@ static void end_bio_bh_io_sync(struct bio *bio, int err)
2864 bio_put(bio); 2893 bio_put(bio);
2865} 2894}
2866 2895
2896/*
2897 * This allows us to do IO even on the odd last sectors
2898 * of a device, even if the bh block size is some multiple
2899 * of the physical sector size.
2900 *
2901 * We'll just truncate the bio to the size of the device,
2902 * and clear the end of the buffer head manually.
2903 *
2904 * Truly out-of-range accesses will turn into actual IO
2905 * errors, this only handles the "we need to be able to
2906 * do IO at the final sector" case.
2907 */
2908static void guard_bh_eod(int rw, struct bio *bio, struct buffer_head *bh)
2909{
2910 sector_t maxsector;
2911 unsigned bytes;
2912
2913 maxsector = i_size_read(bio->bi_bdev->bd_inode) >> 9;
2914 if (!maxsector)
2915 return;
2916
2917 /*
2918 * If the *whole* IO is past the end of the device,
2919 * let it through, and the IO layer will turn it into
2920 * an EIO.
2921 */
2922 if (unlikely(bio->bi_sector >= maxsector))
2923 return;
2924
2925 maxsector -= bio->bi_sector;
2926 bytes = bio->bi_size;
2927 if (likely((bytes >> 9) <= maxsector))
2928 return;
2929
2930 /* Uhhuh. We've got a bh that straddles the device size! */
2931 bytes = maxsector << 9;
2932
2933 /* Truncate the bio.. */
2934 bio->bi_size = bytes;
2935 bio->bi_io_vec[0].bv_len = bytes;
2936
2937 /* ..and clear the end of the buffer for reads */
2938 if ((rw & RW_MASK) == READ) {
2939 void *kaddr = kmap_atomic(bh->b_page);
2940 memset(kaddr + bh_offset(bh) + bytes, 0, bh->b_size - bytes);
2941 kunmap_atomic(kaddr);
2942 }
2943}
2944
2867int submit_bh(int rw, struct buffer_head * bh) 2945int submit_bh(int rw, struct buffer_head * bh)
2868{ 2946{
2869 struct bio *bio; 2947 struct bio *bio;
@@ -2900,6 +2978,9 @@ int submit_bh(int rw, struct buffer_head * bh)
2900 bio->bi_end_io = end_bio_bh_io_sync; 2978 bio->bi_end_io = end_bio_bh_io_sync;
2901 bio->bi_private = bh; 2979 bio->bi_private = bh;
2902 2980
2981 /* Take care of bh's that straddle the end of the device */
2982 guard_bh_eod(rw, bio, bh);
2983
2903 bio_get(bio); 2984 bio_get(bio);
2904 submit_bio(rw, bio); 2985 submit_bio(rw, bio);
2905 2986
diff --git a/fs/cifs/file.c b/fs/cifs/file.c
index edb25b4bbb95..70b6f4c3a0c1 100644
--- a/fs/cifs/file.c
+++ b/fs/cifs/file.c
@@ -1794,7 +1794,6 @@ static int cifs_writepages(struct address_space *mapping,
1794 struct TCP_Server_Info *server; 1794 struct TCP_Server_Info *server;
1795 struct page *page; 1795 struct page *page;
1796 int rc = 0; 1796 int rc = 0;
1797 loff_t isize = i_size_read(mapping->host);
1798 1797
1799 /* 1798 /*
1800 * If wsize is smaller than the page cache size, default to writing 1799 * If wsize is smaller than the page cache size, default to writing
@@ -1899,7 +1898,7 @@ retry:
1899 */ 1898 */
1900 set_page_writeback(page); 1899 set_page_writeback(page);
1901 1900
1902 if (page_offset(page) >= isize) { 1901 if (page_offset(page) >= i_size_read(mapping->host)) {
1903 done = true; 1902 done = true;
1904 unlock_page(page); 1903 unlock_page(page);
1905 end_page_writeback(page); 1904 end_page_writeback(page);
@@ -1932,7 +1931,8 @@ retry:
1932 wdata->offset = page_offset(wdata->pages[0]); 1931 wdata->offset = page_offset(wdata->pages[0]);
1933 wdata->pagesz = PAGE_CACHE_SIZE; 1932 wdata->pagesz = PAGE_CACHE_SIZE;
1934 wdata->tailsz = 1933 wdata->tailsz =
1935 min(isize - page_offset(wdata->pages[nr_pages - 1]), 1934 min(i_size_read(mapping->host) -
1935 page_offset(wdata->pages[nr_pages - 1]),
1936 (loff_t)PAGE_CACHE_SIZE); 1936 (loff_t)PAGE_CACHE_SIZE);
1937 wdata->bytes = ((nr_pages - 1) * PAGE_CACHE_SIZE) + 1937 wdata->bytes = ((nr_pages - 1) * PAGE_CACHE_SIZE) +
1938 wdata->tailsz; 1938 wdata->tailsz;
diff --git a/fs/cifs/readdir.c b/fs/cifs/readdir.c
index f9b5d3d6cf33..1c576e871366 100644
--- a/fs/cifs/readdir.c
+++ b/fs/cifs/readdir.c
@@ -86,14 +86,17 @@ cifs_readdir_lookup(struct dentry *parent, struct qstr *name,
86 86
87 dentry = d_lookup(parent, name); 87 dentry = d_lookup(parent, name);
88 if (dentry) { 88 if (dentry) {
89 int err;
89 inode = dentry->d_inode; 90 inode = dentry->d_inode;
90 /* update inode in place if i_ino didn't change */ 91 /* update inode in place if i_ino didn't change */
91 if (inode && CIFS_I(inode)->uniqueid == fattr->cf_uniqueid) { 92 if (inode && CIFS_I(inode)->uniqueid == fattr->cf_uniqueid) {
92 cifs_fattr_to_inode(inode, fattr); 93 cifs_fattr_to_inode(inode, fattr);
93 return dentry; 94 return dentry;
94 } 95 }
95 d_drop(dentry); 96 err = d_invalidate(dentry);
96 dput(dentry); 97 dput(dentry);
98 if (err)
99 return NULL;
97 } 100 }
98 101
99 dentry = d_alloc(parent, name); 102 dentry = d_alloc(parent, name);
diff --git a/fs/cifs/smb1ops.c b/fs/cifs/smb1ops.c
index 56cc4be87807..34cea2798333 100644
--- a/fs/cifs/smb1ops.c
+++ b/fs/cifs/smb1ops.c
@@ -766,7 +766,6 @@ smb_set_file_info(struct inode *inode, const char *full_path,
766 struct cifs_sb_info *cifs_sb = CIFS_SB(inode->i_sb); 766 struct cifs_sb_info *cifs_sb = CIFS_SB(inode->i_sb);
767 struct tcon_link *tlink = NULL; 767 struct tcon_link *tlink = NULL;
768 struct cifs_tcon *tcon; 768 struct cifs_tcon *tcon;
769 FILE_BASIC_INFO info_buf;
770 769
771 /* if the file is already open for write, just use that fileid */ 770 /* if the file is already open for write, just use that fileid */
772 open_file = find_writable_file(cinode, true); 771 open_file = find_writable_file(cinode, true);
@@ -817,7 +816,7 @@ smb_set_file_info(struct inode *inode, const char *full_path,
817 netpid = current->tgid; 816 netpid = current->tgid;
818 817
819set_via_filehandle: 818set_via_filehandle:
820 rc = CIFSSMBSetFileInfo(xid, tcon, &info_buf, netfid, netpid); 819 rc = CIFSSMBSetFileInfo(xid, tcon, buf, netfid, netpid);
821 if (!rc) 820 if (!rc)
822 cinode->cifsAttrs = le32_to_cpu(buf->Attributes); 821 cinode->cifsAttrs = le32_to_cpu(buf->Attributes);
823 822
diff --git a/fs/direct-io.c b/fs/direct-io.c
index f86c720dba0e..cf5b44b10c67 100644
--- a/fs/direct-io.c
+++ b/fs/direct-io.c
@@ -540,6 +540,7 @@ static int get_more_blocks(struct dio *dio, struct dio_submit *sdio,
540 sector_t fs_endblk; /* Into file, in filesystem-sized blocks */ 540 sector_t fs_endblk; /* Into file, in filesystem-sized blocks */
541 unsigned long fs_count; /* Number of filesystem-sized blocks */ 541 unsigned long fs_count; /* Number of filesystem-sized blocks */
542 int create; 542 int create;
543 unsigned int i_blkbits = sdio->blkbits + sdio->blkfactor;
543 544
544 /* 545 /*
545 * If there was a memory error and we've overwritten all the 546 * If there was a memory error and we've overwritten all the
@@ -554,7 +555,7 @@ static int get_more_blocks(struct dio *dio, struct dio_submit *sdio,
554 fs_count = fs_endblk - fs_startblk + 1; 555 fs_count = fs_endblk - fs_startblk + 1;
555 556
556 map_bh->b_state = 0; 557 map_bh->b_state = 0;
557 map_bh->b_size = fs_count << dio->inode->i_blkbits; 558 map_bh->b_size = fs_count << i_blkbits;
558 559
559 /* 560 /*
560 * For writes inside i_size on a DIO_SKIP_HOLES filesystem we 561 * For writes inside i_size on a DIO_SKIP_HOLES filesystem we
@@ -1053,7 +1054,8 @@ do_blockdev_direct_IO(int rw, struct kiocb *iocb, struct inode *inode,
1053 int seg; 1054 int seg;
1054 size_t size; 1055 size_t size;
1055 unsigned long addr; 1056 unsigned long addr;
1056 unsigned blkbits = inode->i_blkbits; 1057 unsigned i_blkbits = ACCESS_ONCE(inode->i_blkbits);
1058 unsigned blkbits = i_blkbits;
1057 unsigned blocksize_mask = (1 << blkbits) - 1; 1059 unsigned blocksize_mask = (1 << blkbits) - 1;
1058 ssize_t retval = -EINVAL; 1060 ssize_t retval = -EINVAL;
1059 loff_t end = offset; 1061 loff_t end = offset;
@@ -1149,7 +1151,7 @@ do_blockdev_direct_IO(int rw, struct kiocb *iocb, struct inode *inode,
1149 dio->inode = inode; 1151 dio->inode = inode;
1150 dio->rw = rw; 1152 dio->rw = rw;
1151 sdio.blkbits = blkbits; 1153 sdio.blkbits = blkbits;
1152 sdio.blkfactor = inode->i_blkbits - blkbits; 1154 sdio.blkfactor = i_blkbits - blkbits;
1153 sdio.block_in_file = offset >> blkbits; 1155 sdio.block_in_file = offset >> blkbits;
1154 1156
1155 sdio.get_block = get_block; 1157 sdio.get_block = get_block;
diff --git a/fs/file.c b/fs/file.c
index 7cb71b992603..eff23162485f 100644
--- a/fs/file.c
+++ b/fs/file.c
@@ -994,16 +994,18 @@ int iterate_fd(struct files_struct *files, unsigned n,
994 const void *p) 994 const void *p)
995{ 995{
996 struct fdtable *fdt; 996 struct fdtable *fdt;
997 struct file *file;
998 int res = 0; 997 int res = 0;
999 if (!files) 998 if (!files)
1000 return 0; 999 return 0;
1001 spin_lock(&files->file_lock); 1000 spin_lock(&files->file_lock);
1002 fdt = files_fdtable(files); 1001 for (fdt = files_fdtable(files); n < fdt->max_fds; n++) {
1003 while (!res && n < fdt->max_fds) { 1002 struct file *file;
1004 file = rcu_dereference_check_fdtable(files, fdt->fd[n++]); 1003 file = rcu_dereference_check_fdtable(files, fdt->fd[n]);
1005 if (file) 1004 if (!file)
1006 res = f(p, file, n); 1005 continue;
1006 res = f(p, file, n);
1007 if (res)
1008 break;
1007 } 1009 }
1008 spin_unlock(&files->file_lock); 1010 spin_unlock(&files->file_lock);
1009 return res; 1011 return res;
diff --git a/fs/fs-writeback.c b/fs/fs-writeback.c
index 51ea267d444c..3e3422f7f0a4 100644
--- a/fs/fs-writeback.c
+++ b/fs/fs-writeback.c
@@ -228,6 +228,8 @@ static void requeue_io(struct inode *inode, struct bdi_writeback *wb)
228static void inode_sync_complete(struct inode *inode) 228static void inode_sync_complete(struct inode *inode)
229{ 229{
230 inode->i_state &= ~I_SYNC; 230 inode->i_state &= ~I_SYNC;
231 /* If inode is clean an unused, put it into LRU now... */
232 inode_add_lru(inode);
231 /* Waiters must see I_SYNC cleared before being woken up */ 233 /* Waiters must see I_SYNC cleared before being woken up */
232 smp_mb(); 234 smp_mb();
233 wake_up_bit(&inode->i_state, __I_SYNC); 235 wake_up_bit(&inode->i_state, __I_SYNC);
diff --git a/fs/inode.c b/fs/inode.c
index b03c71957246..64999f144153 100644
--- a/fs/inode.c
+++ b/fs/inode.c
@@ -408,6 +408,19 @@ static void inode_lru_list_add(struct inode *inode)
408 spin_unlock(&inode->i_sb->s_inode_lru_lock); 408 spin_unlock(&inode->i_sb->s_inode_lru_lock);
409} 409}
410 410
411/*
412 * Add inode to LRU if needed (inode is unused and clean).
413 *
414 * Needs inode->i_lock held.
415 */
416void inode_add_lru(struct inode *inode)
417{
418 if (!(inode->i_state & (I_DIRTY | I_SYNC | I_FREEING | I_WILL_FREE)) &&
419 !atomic_read(&inode->i_count) && inode->i_sb->s_flags & MS_ACTIVE)
420 inode_lru_list_add(inode);
421}
422
423
411static void inode_lru_list_del(struct inode *inode) 424static void inode_lru_list_del(struct inode *inode)
412{ 425{
413 spin_lock(&inode->i_sb->s_inode_lru_lock); 426 spin_lock(&inode->i_sb->s_inode_lru_lock);
@@ -1390,8 +1403,7 @@ static void iput_final(struct inode *inode)
1390 1403
1391 if (!drop && (sb->s_flags & MS_ACTIVE)) { 1404 if (!drop && (sb->s_flags & MS_ACTIVE)) {
1392 inode->i_state |= I_REFERENCED; 1405 inode->i_state |= I_REFERENCED;
1393 if (!(inode->i_state & (I_DIRTY|I_SYNC))) 1406 inode_add_lru(inode);
1394 inode_lru_list_add(inode);
1395 spin_unlock(&inode->i_lock); 1407 spin_unlock(&inode->i_lock);
1396 return; 1408 return;
1397 } 1409 }
diff --git a/fs/internal.h b/fs/internal.h
index 916b7cbf3e3e..2f6af7f645eb 100644
--- a/fs/internal.h
+++ b/fs/internal.h
@@ -110,6 +110,7 @@ extern int open_check_o_direct(struct file *f);
110 * inode.c 110 * inode.c
111 */ 111 */
112extern spinlock_t inode_sb_list_lock; 112extern spinlock_t inode_sb_list_lock;
113extern void inode_add_lru(struct inode *inode);
113 114
114/* 115/*
115 * fs-writeback.c 116 * fs-writeback.c
diff --git a/fs/jbd/transaction.c b/fs/jbd/transaction.c
index 78b7f84241d4..7f5120bf0ec2 100644
--- a/fs/jbd/transaction.c
+++ b/fs/jbd/transaction.c
@@ -1961,7 +1961,9 @@ retry:
1961 spin_unlock(&journal->j_list_lock); 1961 spin_unlock(&journal->j_list_lock);
1962 jbd_unlock_bh_state(bh); 1962 jbd_unlock_bh_state(bh);
1963 spin_unlock(&journal->j_state_lock); 1963 spin_unlock(&journal->j_state_lock);
1964 unlock_buffer(bh);
1964 log_wait_commit(journal, tid); 1965 log_wait_commit(journal, tid);
1966 lock_buffer(bh);
1965 goto retry; 1967 goto retry;
1966 } 1968 }
1967 /* 1969 /*
diff --git a/fs/namei.c b/fs/namei.c
index 937f9d50c84b..5f4cdf3ad913 100644
--- a/fs/namei.c
+++ b/fs/namei.c
@@ -2131,6 +2131,11 @@ struct dentry *lookup_one_len(const char *name, struct dentry *base, int len)
2131 if (!len) 2131 if (!len)
2132 return ERR_PTR(-EACCES); 2132 return ERR_PTR(-EACCES);
2133 2133
2134 if (unlikely(name[0] == '.')) {
2135 if (len < 2 || (len == 2 && name[1] == '.'))
2136 return ERR_PTR(-EACCES);
2137 }
2138
2134 while (len--) { 2139 while (len--) {
2135 c = *(const unsigned char *)name++; 2140 c = *(const unsigned char *)name++;
2136 if (c == '/' || c == '\0') 2141 if (c == '/' || c == '\0')
diff --git a/fs/nfs/dir.c b/fs/nfs/dir.c
index ce8cb926526b..b9e66b7e0c14 100644
--- a/fs/nfs/dir.c
+++ b/fs/nfs/dir.c
@@ -450,7 +450,8 @@ void nfs_prime_dcache(struct dentry *parent, struct nfs_entry *entry)
450 nfs_refresh_inode(dentry->d_inode, entry->fattr); 450 nfs_refresh_inode(dentry->d_inode, entry->fattr);
451 goto out; 451 goto out;
452 } else { 452 } else {
453 d_drop(dentry); 453 if (d_invalidate(dentry) != 0)
454 goto out;
454 dput(dentry); 455 dput(dentry);
455 } 456 }
456 } 457 }
@@ -1100,6 +1101,8 @@ out_set_verifier:
1100out_zap_parent: 1101out_zap_parent:
1101 nfs_zap_caches(dir); 1102 nfs_zap_caches(dir);
1102 out_bad: 1103 out_bad:
1104 nfs_free_fattr(fattr);
1105 nfs_free_fhandle(fhandle);
1103 nfs_mark_for_revalidate(dir); 1106 nfs_mark_for_revalidate(dir);
1104 if (inode && S_ISDIR(inode->i_mode)) { 1107 if (inode && S_ISDIR(inode->i_mode)) {
1105 /* Purge readdir caches. */ 1108 /* Purge readdir caches. */
@@ -1112,8 +1115,6 @@ out_zap_parent:
1112 shrink_dcache_parent(dentry); 1115 shrink_dcache_parent(dentry);
1113 } 1116 }
1114 d_drop(dentry); 1117 d_drop(dentry);
1115 nfs_free_fattr(fattr);
1116 nfs_free_fhandle(fhandle);
1117 dput(parent); 1118 dput(parent);
1118 dfprintk(LOOKUPCACHE, "NFS: %s(%s/%s) is invalid\n", 1119 dfprintk(LOOKUPCACHE, "NFS: %s(%s/%s) is invalid\n",
1119 __func__, dentry->d_parent->d_name.name, 1120 __func__, dentry->d_parent->d_name.name,
diff --git a/fs/proc/base.c b/fs/proc/base.c
index 3c231adf8450..9e28356a959a 100644
--- a/fs/proc/base.c
+++ b/fs/proc/base.c
@@ -1877,8 +1877,9 @@ static struct dentry *proc_map_files_lookup(struct inode *dir,
1877 if (!vma) 1877 if (!vma)
1878 goto out_no_vma; 1878 goto out_no_vma;
1879 1879
1880 result = proc_map_files_instantiate(dir, dentry, task, 1880 if (vma->vm_file)
1881 (void *)(unsigned long)vma->vm_file->f_mode); 1881 result = proc_map_files_instantiate(dir, dentry, task,
1882 (void *)(unsigned long)vma->vm_file->f_mode);
1882 1883
1883out_no_vma: 1884out_no_vma:
1884 up_read(&mm->mmap_sem); 1885 up_read(&mm->mmap_sem);