aboutsummaryrefslogtreecommitdiffstats
path: root/fs
diff options
context:
space:
mode:
authorKonrad Rzeszutek Wilk <konrad.wilk@oracle.com>2013-01-15 15:58:25 -0500
committerKonrad Rzeszutek Wilk <konrad.wilk@oracle.com>2013-01-15 15:58:25 -0500
commit7bcc1ec07748cae3552dc9b46701c117926c8923 (patch)
tree2b3edc7de77ca306b2559ae341077094bac8c4a2 /fs
parente5c702d3b268066dc70d619ecff06a08065f343f (diff)
parent29594404d7fe73cd80eaa4ee8c43dcc53970c60e (diff)
Merge tag 'v3.7' into stable/for-linus-3.8
Linux 3.7 * tag 'v3.7': (833 commits) Linux 3.7 Input: matrix-keymap - provide proper module license Revert "revert "Revert "mm: remove __GFP_NO_KSWAPD""" and associated damage ipv4: ip_check_defrag must not modify skb before unsharing Revert "mm: avoid waking kswapd for THP allocations when compaction is deferred or contended" inet_diag: validate port comparison byte code to prevent unsafe reads inet_diag: avoid unsafe and nonsensical prefix matches in inet_diag_bc_run() inet_diag: validate byte code to prevent oops in inet_diag_bc_run() inet_diag: fix oops for IPv4 AF_INET6 TCP SYN-RECV state mm: vmscan: fix inappropriate zone congestion clearing vfs: fix O_DIRECT read past end of block device net: gro: fix possible panic in skb_gro_receive() tcp: bug fix Fast Open client retransmission tmpfs: fix shared mempolicy leak mm: vmscan: do not keep kswapd looping forever due to individual uncompactable zones mm: compaction: validate pfn range passed to isolate_freepages_block mmc: sh-mmcif: avoid oops on spurious interrupts (second try) Revert misapplied "mmc: sh-mmcif: avoid oops on spurious interrupts" mmc: sdhci-s3c: fix missing clock for gpio card-detect lib/Makefile: Fix oid_registry build dependency ... Signed-off-by: Konrad Rzeszutek Wilk <konrad.wilk@oracle.com> Conflicts: arch/arm/xen/enlighten.c drivers/xen/Makefile [We need to have the v3.7 base as the 'for-3.8' was based off v3.7-rc3 and there are some patches in v3.7-rc6 that we to have in our branch]
Diffstat (limited to 'fs')
-rw-r--r--fs/bio.c6
-rw-r--r--fs/block_dev.c166
-rw-r--r--fs/buffer.c145
-rw-r--r--fs/ceph/export.c2
-rw-r--r--fs/cifs/cifsacl.c49
-rw-r--r--fs/cifs/dir.c11
-rw-r--r--fs/cifs/file.c6
-rw-r--r--fs/cifs/readdir.c5
-rw-r--r--fs/cifs/smb1ops.c3
-rw-r--r--fs/direct-io.c8
-rw-r--r--fs/eventpoll.c38
-rw-r--r--fs/ext3/balloc.c5
-rw-r--r--fs/ext4/ialloc.c19
-rw-r--r--fs/file.c19
-rw-r--r--fs/fs-writeback.c2
-rw-r--r--fs/gfs2/file.c14
-rw-r--r--fs/gfs2/lops.c16
-rw-r--r--fs/gfs2/quota.c7
-rw-r--r--fs/gfs2/rgrp.c33
-rw-r--r--fs/gfs2/super.c3
-rw-r--r--fs/gfs2/trans.c8
-rw-r--r--fs/inode.c16
-rw-r--r--fs/internal.h1
-rw-r--r--fs/jbd/transaction.c2
-rw-r--r--fs/jffs2/file.c39
-rw-r--r--fs/namei.c5
-rw-r--r--fs/nfs/dir.c7
-rw-r--r--fs/nfs/dns_resolve.c5
-rw-r--r--fs/nfs/inode.c5
-rw-r--r--fs/nfs/internal.h6
-rw-r--r--fs/nfs/mount_clnt.c2
-rw-r--r--fs/nfs/namespace.c19
-rw-r--r--fs/nfs/nfs4namespace.c3
-rw-r--r--fs/nfs/nfs4proc.c46
-rw-r--r--fs/nfs/pnfs.c4
-rw-r--r--fs/nfs/super.c51
-rw-r--r--fs/nfs/unlink.c2
-rw-r--r--fs/notify/fanotify/fanotify.c1
-rw-r--r--fs/notify/fanotify/fanotify_user.c3
-rw-r--r--fs/proc/base.c114
-rw-r--r--fs/pstore/platform.c3
-rw-r--r--fs/reiserfs/inode.c10
-rw-r--r--fs/reiserfs/stree.c4
-rw-r--r--fs/reiserfs/super.c60
-rw-r--r--fs/ubifs/find.c12
-rw-r--r--fs/ubifs/lprops.c6
-rw-r--r--fs/ubifs/ubifs.h3
-rw-r--r--fs/xfs/xfs_alloc.c43
-rw-r--r--fs/xfs/xfs_alloc.h3
-rw-r--r--fs/xfs/xfs_alloc_btree.c2
-rw-r--r--fs/xfs/xfs_aops.c54
-rw-r--r--fs/xfs/xfs_attr_leaf.c20
-rw-r--r--fs/xfs/xfs_bmap.c63
-rw-r--r--fs/xfs/xfs_bmap.h9
-rw-r--r--fs/xfs/xfs_buf.c14
-rw-r--r--fs/xfs/xfs_buf_item.c18
-rw-r--r--fs/xfs/xfs_fsops.c21
-rw-r--r--fs/xfs/xfs_ialloc.c1
-rw-r--r--fs/xfs/xfs_inode.c3
-rw-r--r--fs/xfs/xfs_ioctl.c2
-rw-r--r--fs/xfs/xfs_iomap.c4
-rw-r--r--fs/xfs/xfs_log.c19
-rw-r--r--fs/xfs/xfs_log_recover.c2
63 files changed, 793 insertions, 479 deletions
diff --git a/fs/bio.c b/fs/bio.c
index 9298c65ad9c7..b96fc6ce4855 100644
--- a/fs/bio.c
+++ b/fs/bio.c
@@ -75,6 +75,7 @@ static struct kmem_cache *bio_find_or_create_slab(unsigned int extra_size)
75 unsigned int sz = sizeof(struct bio) + extra_size; 75 unsigned int sz = sizeof(struct bio) + extra_size;
76 struct kmem_cache *slab = NULL; 76 struct kmem_cache *slab = NULL;
77 struct bio_slab *bslab, *new_bio_slabs; 77 struct bio_slab *bslab, *new_bio_slabs;
78 unsigned int new_bio_slab_max;
78 unsigned int i, entry = -1; 79 unsigned int i, entry = -1;
79 80
80 mutex_lock(&bio_slab_lock); 81 mutex_lock(&bio_slab_lock);
@@ -97,12 +98,13 @@ static struct kmem_cache *bio_find_or_create_slab(unsigned int extra_size)
97 goto out_unlock; 98 goto out_unlock;
98 99
99 if (bio_slab_nr == bio_slab_max && entry == -1) { 100 if (bio_slab_nr == bio_slab_max && entry == -1) {
100 bio_slab_max <<= 1; 101 new_bio_slab_max = bio_slab_max << 1;
101 new_bio_slabs = krealloc(bio_slabs, 102 new_bio_slabs = krealloc(bio_slabs,
102 bio_slab_max * sizeof(struct bio_slab), 103 new_bio_slab_max * sizeof(struct bio_slab),
103 GFP_KERNEL); 104 GFP_KERNEL);
104 if (!new_bio_slabs) 105 if (!new_bio_slabs)
105 goto out_unlock; 106 goto out_unlock;
107 bio_slab_max = new_bio_slab_max;
106 bio_slabs = new_bio_slabs; 108 bio_slabs = new_bio_slabs;
107 } 109 }
108 if (entry == -1) 110 if (entry == -1)
diff --git a/fs/block_dev.c b/fs/block_dev.c
index 1a1e5e3b1eaf..ab3a456f6650 100644
--- a/fs/block_dev.c
+++ b/fs/block_dev.c
@@ -70,19 +70,6 @@ static void bdev_inode_switch_bdi(struct inode *inode,
70 spin_unlock(&dst->wb.list_lock); 70 spin_unlock(&dst->wb.list_lock);
71} 71}
72 72
73sector_t blkdev_max_block(struct block_device *bdev)
74{
75 sector_t retval = ~((sector_t)0);
76 loff_t sz = i_size_read(bdev->bd_inode);
77
78 if (sz) {
79 unsigned int size = block_size(bdev);
80 unsigned int sizebits = blksize_bits(size);
81 retval = (sz >> sizebits);
82 }
83 return retval;
84}
85
86/* Kill _all_ buffers and pagecache , dirty or not.. */ 73/* Kill _all_ buffers and pagecache , dirty or not.. */
87void kill_bdev(struct block_device *bdev) 74void kill_bdev(struct block_device *bdev)
88{ 75{
@@ -116,8 +103,6 @@ EXPORT_SYMBOL(invalidate_bdev);
116 103
117int set_blocksize(struct block_device *bdev, int size) 104int set_blocksize(struct block_device *bdev, int size)
118{ 105{
119 struct address_space *mapping;
120
121 /* Size must be a power of two, and between 512 and PAGE_SIZE */ 106 /* Size must be a power of two, and between 512 and PAGE_SIZE */
122 if (size > PAGE_SIZE || size < 512 || !is_power_of_2(size)) 107 if (size > PAGE_SIZE || size < 512 || !is_power_of_2(size))
123 return -EINVAL; 108 return -EINVAL;
@@ -126,19 +111,6 @@ int set_blocksize(struct block_device *bdev, int size)
126 if (size < bdev_logical_block_size(bdev)) 111 if (size < bdev_logical_block_size(bdev))
127 return -EINVAL; 112 return -EINVAL;
128 113
129 /* Prevent starting I/O or mapping the device */
130 percpu_down_write(&bdev->bd_block_size_semaphore);
131
132 /* Check that the block device is not memory mapped */
133 mapping = bdev->bd_inode->i_mapping;
134 mutex_lock(&mapping->i_mmap_mutex);
135 if (mapping_mapped(mapping)) {
136 mutex_unlock(&mapping->i_mmap_mutex);
137 percpu_up_write(&bdev->bd_block_size_semaphore);
138 return -EBUSY;
139 }
140 mutex_unlock(&mapping->i_mmap_mutex);
141
142 /* Don't change the size if it is same as current */ 114 /* Don't change the size if it is same as current */
143 if (bdev->bd_block_size != size) { 115 if (bdev->bd_block_size != size) {
144 sync_blockdev(bdev); 116 sync_blockdev(bdev);
@@ -146,9 +118,6 @@ int set_blocksize(struct block_device *bdev, int size)
146 bdev->bd_inode->i_blkbits = blksize_bits(size); 118 bdev->bd_inode->i_blkbits = blksize_bits(size);
147 kill_bdev(bdev); 119 kill_bdev(bdev);
148 } 120 }
149
150 percpu_up_write(&bdev->bd_block_size_semaphore);
151
152 return 0; 121 return 0;
153} 122}
154 123
@@ -181,52 +150,12 @@ static int
181blkdev_get_block(struct inode *inode, sector_t iblock, 150blkdev_get_block(struct inode *inode, sector_t iblock,
182 struct buffer_head *bh, int create) 151 struct buffer_head *bh, int create)
183{ 152{
184 if (iblock >= blkdev_max_block(I_BDEV(inode))) {
185 if (create)
186 return -EIO;
187
188 /*
189 * for reads, we're just trying to fill a partial page.
190 * return a hole, they will have to call get_block again
191 * before they can fill it, and they will get -EIO at that
192 * time
193 */
194 return 0;
195 }
196 bh->b_bdev = I_BDEV(inode); 153 bh->b_bdev = I_BDEV(inode);
197 bh->b_blocknr = iblock; 154 bh->b_blocknr = iblock;
198 set_buffer_mapped(bh); 155 set_buffer_mapped(bh);
199 return 0; 156 return 0;
200} 157}
201 158
202static int
203blkdev_get_blocks(struct inode *inode, sector_t iblock,
204 struct buffer_head *bh, int create)
205{
206 sector_t end_block = blkdev_max_block(I_BDEV(inode));
207 unsigned long max_blocks = bh->b_size >> inode->i_blkbits;
208
209 if ((iblock + max_blocks) > end_block) {
210 max_blocks = end_block - iblock;
211 if ((long)max_blocks <= 0) {
212 if (create)
213 return -EIO; /* write fully beyond EOF */
214 /*
215 * It is a read which is fully beyond EOF. We return
216 * a !buffer_mapped buffer
217 */
218 max_blocks = 0;
219 }
220 }
221
222 bh->b_bdev = I_BDEV(inode);
223 bh->b_blocknr = iblock;
224 bh->b_size = max_blocks << inode->i_blkbits;
225 if (max_blocks)
226 set_buffer_mapped(bh);
227 return 0;
228}
229
230static ssize_t 159static ssize_t
231blkdev_direct_IO(int rw, struct kiocb *iocb, const struct iovec *iov, 160blkdev_direct_IO(int rw, struct kiocb *iocb, const struct iovec *iov,
232 loff_t offset, unsigned long nr_segs) 161 loff_t offset, unsigned long nr_segs)
@@ -235,7 +164,7 @@ blkdev_direct_IO(int rw, struct kiocb *iocb, const struct iovec *iov,
235 struct inode *inode = file->f_mapping->host; 164 struct inode *inode = file->f_mapping->host;
236 165
237 return __blockdev_direct_IO(rw, iocb, inode, I_BDEV(inode), iov, offset, 166 return __blockdev_direct_IO(rw, iocb, inode, I_BDEV(inode), iov, offset,
238 nr_segs, blkdev_get_blocks, NULL, NULL, 0); 167 nr_segs, blkdev_get_block, NULL, NULL, 0);
239} 168}
240 169
241int __sync_blockdev(struct block_device *bdev, int wait) 170int __sync_blockdev(struct block_device *bdev, int wait)
@@ -459,12 +388,6 @@ static struct inode *bdev_alloc_inode(struct super_block *sb)
459 struct bdev_inode *ei = kmem_cache_alloc(bdev_cachep, GFP_KERNEL); 388 struct bdev_inode *ei = kmem_cache_alloc(bdev_cachep, GFP_KERNEL);
460 if (!ei) 389 if (!ei)
461 return NULL; 390 return NULL;
462
463 if (unlikely(percpu_init_rwsem(&ei->bdev.bd_block_size_semaphore))) {
464 kmem_cache_free(bdev_cachep, ei);
465 return NULL;
466 }
467
468 return &ei->vfs_inode; 391 return &ei->vfs_inode;
469} 392}
470 393
@@ -473,8 +396,6 @@ static void bdev_i_callback(struct rcu_head *head)
473 struct inode *inode = container_of(head, struct inode, i_rcu); 396 struct inode *inode = container_of(head, struct inode, i_rcu);
474 struct bdev_inode *bdi = BDEV_I(inode); 397 struct bdev_inode *bdi = BDEV_I(inode);
475 398
476 percpu_free_rwsem(&bdi->bdev.bd_block_size_semaphore);
477
478 kmem_cache_free(bdev_cachep, bdi); 399 kmem_cache_free(bdev_cachep, bdi);
479} 400}
480 401
@@ -1593,22 +1514,6 @@ static long block_ioctl(struct file *file, unsigned cmd, unsigned long arg)
1593 return blkdev_ioctl(bdev, mode, cmd, arg); 1514 return blkdev_ioctl(bdev, mode, cmd, arg);
1594} 1515}
1595 1516
1596ssize_t blkdev_aio_read(struct kiocb *iocb, const struct iovec *iov,
1597 unsigned long nr_segs, loff_t pos)
1598{
1599 ssize_t ret;
1600 struct block_device *bdev = I_BDEV(iocb->ki_filp->f_mapping->host);
1601
1602 percpu_down_read(&bdev->bd_block_size_semaphore);
1603
1604 ret = generic_file_aio_read(iocb, iov, nr_segs, pos);
1605
1606 percpu_up_read(&bdev->bd_block_size_semaphore);
1607
1608 return ret;
1609}
1610EXPORT_SYMBOL_GPL(blkdev_aio_read);
1611
1612/* 1517/*
1613 * Write data to the block device. Only intended for the block device itself 1518 * Write data to the block device. Only intended for the block device itself
1614 * and the raw driver which basically is a fake block device. 1519 * and the raw driver which basically is a fake block device.
@@ -1620,16 +1525,12 @@ ssize_t blkdev_aio_write(struct kiocb *iocb, const struct iovec *iov,
1620 unsigned long nr_segs, loff_t pos) 1525 unsigned long nr_segs, loff_t pos)
1621{ 1526{
1622 struct file *file = iocb->ki_filp; 1527 struct file *file = iocb->ki_filp;
1623 struct block_device *bdev = I_BDEV(file->f_mapping->host);
1624 struct blk_plug plug; 1528 struct blk_plug plug;
1625 ssize_t ret; 1529 ssize_t ret;
1626 1530
1627 BUG_ON(iocb->ki_pos != pos); 1531 BUG_ON(iocb->ki_pos != pos);
1628 1532
1629 blk_start_plug(&plug); 1533 blk_start_plug(&plug);
1630
1631 percpu_down_read(&bdev->bd_block_size_semaphore);
1632
1633 ret = __generic_file_aio_write(iocb, iov, nr_segs, &iocb->ki_pos); 1534 ret = __generic_file_aio_write(iocb, iov, nr_segs, &iocb->ki_pos);
1634 if (ret > 0 || ret == -EIOCBQUEUED) { 1535 if (ret > 0 || ret == -EIOCBQUEUED) {
1635 ssize_t err; 1536 ssize_t err;
@@ -1638,62 +1539,27 @@ ssize_t blkdev_aio_write(struct kiocb *iocb, const struct iovec *iov,
1638 if (err < 0 && ret > 0) 1539 if (err < 0 && ret > 0)
1639 ret = err; 1540 ret = err;
1640 } 1541 }
1641
1642 percpu_up_read(&bdev->bd_block_size_semaphore);
1643
1644 blk_finish_plug(&plug); 1542 blk_finish_plug(&plug);
1645
1646 return ret; 1543 return ret;
1647} 1544}
1648EXPORT_SYMBOL_GPL(blkdev_aio_write); 1545EXPORT_SYMBOL_GPL(blkdev_aio_write);
1649 1546
1650static int blkdev_mmap(struct file *file, struct vm_area_struct *vma) 1547static ssize_t blkdev_aio_read(struct kiocb *iocb, const struct iovec *iov,
1651{ 1548 unsigned long nr_segs, loff_t pos)
1652 int ret;
1653 struct block_device *bdev = I_BDEV(file->f_mapping->host);
1654
1655 percpu_down_read(&bdev->bd_block_size_semaphore);
1656
1657 ret = generic_file_mmap(file, vma);
1658
1659 percpu_up_read(&bdev->bd_block_size_semaphore);
1660
1661 return ret;
1662}
1663
1664static ssize_t blkdev_splice_read(struct file *file, loff_t *ppos,
1665 struct pipe_inode_info *pipe, size_t len,
1666 unsigned int flags)
1667{
1668 ssize_t ret;
1669 struct block_device *bdev = I_BDEV(file->f_mapping->host);
1670
1671 percpu_down_read(&bdev->bd_block_size_semaphore);
1672
1673 ret = generic_file_splice_read(file, ppos, pipe, len, flags);
1674
1675 percpu_up_read(&bdev->bd_block_size_semaphore);
1676
1677 return ret;
1678}
1679
1680static ssize_t blkdev_splice_write(struct pipe_inode_info *pipe,
1681 struct file *file, loff_t *ppos, size_t len,
1682 unsigned int flags)
1683{ 1549{
1684 ssize_t ret; 1550 struct file *file = iocb->ki_filp;
1685 struct block_device *bdev = I_BDEV(file->f_mapping->host); 1551 struct inode *bd_inode = file->f_mapping->host;
1686 1552 loff_t size = i_size_read(bd_inode);
1687 percpu_down_read(&bdev->bd_block_size_semaphore);
1688
1689 ret = generic_file_splice_write(pipe, file, ppos, len, flags);
1690 1553
1691 percpu_up_read(&bdev->bd_block_size_semaphore); 1554 if (pos >= size)
1555 return 0;
1692 1556
1693 return ret; 1557 size -= pos;
1558 if (size < INT_MAX)
1559 nr_segs = iov_shorten((struct iovec *)iov, nr_segs, size);
1560 return generic_file_aio_read(iocb, iov, nr_segs, pos);
1694} 1561}
1695 1562
1696
1697/* 1563/*
1698 * Try to release a page associated with block device when the system 1564 * Try to release a page associated with block device when the system
1699 * is under memory pressure. 1565 * is under memory pressure.
@@ -1724,16 +1590,16 @@ const struct file_operations def_blk_fops = {
1724 .llseek = block_llseek, 1590 .llseek = block_llseek,
1725 .read = do_sync_read, 1591 .read = do_sync_read,
1726 .write = do_sync_write, 1592 .write = do_sync_write,
1727 .aio_read = blkdev_aio_read, 1593 .aio_read = blkdev_aio_read,
1728 .aio_write = blkdev_aio_write, 1594 .aio_write = blkdev_aio_write,
1729 .mmap = blkdev_mmap, 1595 .mmap = generic_file_mmap,
1730 .fsync = blkdev_fsync, 1596 .fsync = blkdev_fsync,
1731 .unlocked_ioctl = block_ioctl, 1597 .unlocked_ioctl = block_ioctl,
1732#ifdef CONFIG_COMPAT 1598#ifdef CONFIG_COMPAT
1733 .compat_ioctl = compat_blkdev_ioctl, 1599 .compat_ioctl = compat_blkdev_ioctl,
1734#endif 1600#endif
1735 .splice_read = blkdev_splice_read, 1601 .splice_read = generic_file_splice_read,
1736 .splice_write = blkdev_splice_write, 1602 .splice_write = generic_file_splice_write,
1737}; 1603};
1738 1604
1739int ioctl_by_bdev(struct block_device *bdev, unsigned cmd, unsigned long arg) 1605int ioctl_by_bdev(struct block_device *bdev, unsigned cmd, unsigned long arg)
diff --git a/fs/buffer.c b/fs/buffer.c
index b5f044283edb..ec0aca8ba6bf 100644
--- a/fs/buffer.c
+++ b/fs/buffer.c
@@ -911,6 +911,18 @@ link_dev_buffers(struct page *page, struct buffer_head *head)
911 attach_page_buffers(page, head); 911 attach_page_buffers(page, head);
912} 912}
913 913
914static sector_t blkdev_max_block(struct block_device *bdev, unsigned int size)
915{
916 sector_t retval = ~((sector_t)0);
917 loff_t sz = i_size_read(bdev->bd_inode);
918
919 if (sz) {
920 unsigned int sizebits = blksize_bits(size);
921 retval = (sz >> sizebits);
922 }
923 return retval;
924}
925
914/* 926/*
915 * Initialise the state of a blockdev page's buffers. 927 * Initialise the state of a blockdev page's buffers.
916 */ 928 */
@@ -921,7 +933,7 @@ init_page_buffers(struct page *page, struct block_device *bdev,
921 struct buffer_head *head = page_buffers(page); 933 struct buffer_head *head = page_buffers(page);
922 struct buffer_head *bh = head; 934 struct buffer_head *bh = head;
923 int uptodate = PageUptodate(page); 935 int uptodate = PageUptodate(page);
924 sector_t end_block = blkdev_max_block(I_BDEV(bdev->bd_inode)); 936 sector_t end_block = blkdev_max_block(I_BDEV(bdev->bd_inode), size);
925 937
926 do { 938 do {
927 if (!buffer_mapped(bh)) { 939 if (!buffer_mapped(bh)) {
@@ -1553,6 +1565,28 @@ void unmap_underlying_metadata(struct block_device *bdev, sector_t block)
1553EXPORT_SYMBOL(unmap_underlying_metadata); 1565EXPORT_SYMBOL(unmap_underlying_metadata);
1554 1566
1555/* 1567/*
1568 * Size is a power-of-two in the range 512..PAGE_SIZE,
1569 * and the case we care about most is PAGE_SIZE.
1570 *
1571 * So this *could* possibly be written with those
1572 * constraints in mind (relevant mostly if some
1573 * architecture has a slow bit-scan instruction)
1574 */
1575static inline int block_size_bits(unsigned int blocksize)
1576{
1577 return ilog2(blocksize);
1578}
1579
1580static struct buffer_head *create_page_buffers(struct page *page, struct inode *inode, unsigned int b_state)
1581{
1582 BUG_ON(!PageLocked(page));
1583
1584 if (!page_has_buffers(page))
1585 create_empty_buffers(page, 1 << ACCESS_ONCE(inode->i_blkbits), b_state);
1586 return page_buffers(page);
1587}
1588
1589/*
1556 * NOTE! All mapped/uptodate combinations are valid: 1590 * NOTE! All mapped/uptodate combinations are valid:
1557 * 1591 *
1558 * Mapped Uptodate Meaning 1592 * Mapped Uptodate Meaning
@@ -1589,19 +1623,13 @@ static int __block_write_full_page(struct inode *inode, struct page *page,
1589 sector_t block; 1623 sector_t block;
1590 sector_t last_block; 1624 sector_t last_block;
1591 struct buffer_head *bh, *head; 1625 struct buffer_head *bh, *head;
1592 const unsigned blocksize = 1 << inode->i_blkbits; 1626 unsigned int blocksize, bbits;
1593 int nr_underway = 0; 1627 int nr_underway = 0;
1594 int write_op = (wbc->sync_mode == WB_SYNC_ALL ? 1628 int write_op = (wbc->sync_mode == WB_SYNC_ALL ?
1595 WRITE_SYNC : WRITE); 1629 WRITE_SYNC : WRITE);
1596 1630
1597 BUG_ON(!PageLocked(page)); 1631 head = create_page_buffers(page, inode,
1598
1599 last_block = (i_size_read(inode) - 1) >> inode->i_blkbits;
1600
1601 if (!page_has_buffers(page)) {
1602 create_empty_buffers(page, blocksize,
1603 (1 << BH_Dirty)|(1 << BH_Uptodate)); 1632 (1 << BH_Dirty)|(1 << BH_Uptodate));
1604 }
1605 1633
1606 /* 1634 /*
1607 * Be very careful. We have no exclusion from __set_page_dirty_buffers 1635 * Be very careful. We have no exclusion from __set_page_dirty_buffers
@@ -1613,9 +1641,12 @@ static int __block_write_full_page(struct inode *inode, struct page *page,
1613 * handle that here by just cleaning them. 1641 * handle that here by just cleaning them.
1614 */ 1642 */
1615 1643
1616 block = (sector_t)page->index << (PAGE_CACHE_SHIFT - inode->i_blkbits);
1617 head = page_buffers(page);
1618 bh = head; 1644 bh = head;
1645 blocksize = bh->b_size;
1646 bbits = block_size_bits(blocksize);
1647
1648 block = (sector_t)page->index << (PAGE_CACHE_SHIFT - bbits);
1649 last_block = (i_size_read(inode) - 1) >> bbits;
1619 1650
1620 /* 1651 /*
1621 * Get all the dirty buffers mapped to disk addresses and 1652 * Get all the dirty buffers mapped to disk addresses and
@@ -1806,12 +1837,10 @@ int __block_write_begin(struct page *page, loff_t pos, unsigned len,
1806 BUG_ON(to > PAGE_CACHE_SIZE); 1837 BUG_ON(to > PAGE_CACHE_SIZE);
1807 BUG_ON(from > to); 1838 BUG_ON(from > to);
1808 1839
1809 blocksize = 1 << inode->i_blkbits; 1840 head = create_page_buffers(page, inode, 0);
1810 if (!page_has_buffers(page)) 1841 blocksize = head->b_size;
1811 create_empty_buffers(page, blocksize, 0); 1842 bbits = block_size_bits(blocksize);
1812 head = page_buffers(page);
1813 1843
1814 bbits = inode->i_blkbits;
1815 block = (sector_t)page->index << (PAGE_CACHE_SHIFT - bbits); 1844 block = (sector_t)page->index << (PAGE_CACHE_SHIFT - bbits);
1816 1845
1817 for(bh = head, block_start = 0; bh != head || !block_start; 1846 for(bh = head, block_start = 0; bh != head || !block_start;
@@ -1881,11 +1910,11 @@ static int __block_commit_write(struct inode *inode, struct page *page,
1881 unsigned blocksize; 1910 unsigned blocksize;
1882 struct buffer_head *bh, *head; 1911 struct buffer_head *bh, *head;
1883 1912
1884 blocksize = 1 << inode->i_blkbits; 1913 bh = head = page_buffers(page);
1914 blocksize = bh->b_size;
1885 1915
1886 for(bh = head = page_buffers(page), block_start = 0; 1916 block_start = 0;
1887 bh != head || !block_start; 1917 do {
1888 block_start=block_end, bh = bh->b_this_page) {
1889 block_end = block_start + blocksize; 1918 block_end = block_start + blocksize;
1890 if (block_end <= from || block_start >= to) { 1919 if (block_end <= from || block_start >= to) {
1891 if (!buffer_uptodate(bh)) 1920 if (!buffer_uptodate(bh))
@@ -1895,7 +1924,10 @@ static int __block_commit_write(struct inode *inode, struct page *page,
1895 mark_buffer_dirty(bh); 1924 mark_buffer_dirty(bh);
1896 } 1925 }
1897 clear_buffer_new(bh); 1926 clear_buffer_new(bh);
1898 } 1927
1928 block_start = block_end;
1929 bh = bh->b_this_page;
1930 } while (bh != head);
1899 1931
1900 /* 1932 /*
1901 * If this is a partial write which happened to make all buffers 1933 * If this is a partial write which happened to make all buffers
@@ -2020,7 +2052,6 @@ EXPORT_SYMBOL(generic_write_end);
2020int block_is_partially_uptodate(struct page *page, read_descriptor_t *desc, 2052int block_is_partially_uptodate(struct page *page, read_descriptor_t *desc,
2021 unsigned long from) 2053 unsigned long from)
2022{ 2054{
2023 struct inode *inode = page->mapping->host;
2024 unsigned block_start, block_end, blocksize; 2055 unsigned block_start, block_end, blocksize;
2025 unsigned to; 2056 unsigned to;
2026 struct buffer_head *bh, *head; 2057 struct buffer_head *bh, *head;
@@ -2029,13 +2060,13 @@ int block_is_partially_uptodate(struct page *page, read_descriptor_t *desc,
2029 if (!page_has_buffers(page)) 2060 if (!page_has_buffers(page))
2030 return 0; 2061 return 0;
2031 2062
2032 blocksize = 1 << inode->i_blkbits; 2063 head = page_buffers(page);
2064 blocksize = head->b_size;
2033 to = min_t(unsigned, PAGE_CACHE_SIZE - from, desc->count); 2065 to = min_t(unsigned, PAGE_CACHE_SIZE - from, desc->count);
2034 to = from + to; 2066 to = from + to;
2035 if (from < blocksize && to > PAGE_CACHE_SIZE - blocksize) 2067 if (from < blocksize && to > PAGE_CACHE_SIZE - blocksize)
2036 return 0; 2068 return 0;
2037 2069
2038 head = page_buffers(page);
2039 bh = head; 2070 bh = head;
2040 block_start = 0; 2071 block_start = 0;
2041 do { 2072 do {
@@ -2068,18 +2099,16 @@ int block_read_full_page(struct page *page, get_block_t *get_block)
2068 struct inode *inode = page->mapping->host; 2099 struct inode *inode = page->mapping->host;
2069 sector_t iblock, lblock; 2100 sector_t iblock, lblock;
2070 struct buffer_head *bh, *head, *arr[MAX_BUF_PER_PAGE]; 2101 struct buffer_head *bh, *head, *arr[MAX_BUF_PER_PAGE];
2071 unsigned int blocksize; 2102 unsigned int blocksize, bbits;
2072 int nr, i; 2103 int nr, i;
2073 int fully_mapped = 1; 2104 int fully_mapped = 1;
2074 2105
2075 BUG_ON(!PageLocked(page)); 2106 head = create_page_buffers(page, inode, 0);
2076 blocksize = 1 << inode->i_blkbits; 2107 blocksize = head->b_size;
2077 if (!page_has_buffers(page)) 2108 bbits = block_size_bits(blocksize);
2078 create_empty_buffers(page, blocksize, 0);
2079 head = page_buffers(page);
2080 2109
2081 iblock = (sector_t)page->index << (PAGE_CACHE_SHIFT - inode->i_blkbits); 2110 iblock = (sector_t)page->index << (PAGE_CACHE_SHIFT - bbits);
2082 lblock = (i_size_read(inode)+blocksize-1) >> inode->i_blkbits; 2111 lblock = (i_size_read(inode)+blocksize-1) >> bbits;
2083 bh = head; 2112 bh = head;
2084 nr = 0; 2113 nr = 0;
2085 i = 0; 2114 i = 0;
@@ -2864,6 +2893,55 @@ static void end_bio_bh_io_sync(struct bio *bio, int err)
2864 bio_put(bio); 2893 bio_put(bio);
2865} 2894}
2866 2895
2896/*
2897 * This allows us to do IO even on the odd last sectors
2898 * of a device, even if the bh block size is some multiple
2899 * of the physical sector size.
2900 *
2901 * We'll just truncate the bio to the size of the device,
2902 * and clear the end of the buffer head manually.
2903 *
2904 * Truly out-of-range accesses will turn into actual IO
2905 * errors, this only handles the "we need to be able to
2906 * do IO at the final sector" case.
2907 */
2908static void guard_bh_eod(int rw, struct bio *bio, struct buffer_head *bh)
2909{
2910 sector_t maxsector;
2911 unsigned bytes;
2912
2913 maxsector = i_size_read(bio->bi_bdev->bd_inode) >> 9;
2914 if (!maxsector)
2915 return;
2916
2917 /*
2918 * If the *whole* IO is past the end of the device,
2919 * let it through, and the IO layer will turn it into
2920 * an EIO.
2921 */
2922 if (unlikely(bio->bi_sector >= maxsector))
2923 return;
2924
2925 maxsector -= bio->bi_sector;
2926 bytes = bio->bi_size;
2927 if (likely((bytes >> 9) <= maxsector))
2928 return;
2929
2930 /* Uhhuh. We've got a bh that straddles the device size! */
2931 bytes = maxsector << 9;
2932
2933 /* Truncate the bio.. */
2934 bio->bi_size = bytes;
2935 bio->bi_io_vec[0].bv_len = bytes;
2936
2937 /* ..and clear the end of the buffer for reads */
2938 if ((rw & RW_MASK) == READ) {
2939 void *kaddr = kmap_atomic(bh->b_page);
2940 memset(kaddr + bh_offset(bh) + bytes, 0, bh->b_size - bytes);
2941 kunmap_atomic(kaddr);
2942 }
2943}
2944
2867int submit_bh(int rw, struct buffer_head * bh) 2945int submit_bh(int rw, struct buffer_head * bh)
2868{ 2946{
2869 struct bio *bio; 2947 struct bio *bio;
@@ -2900,6 +2978,9 @@ int submit_bh(int rw, struct buffer_head * bh)
2900 bio->bi_end_io = end_bio_bh_io_sync; 2978 bio->bi_end_io = end_bio_bh_io_sync;
2901 bio->bi_private = bh; 2979 bio->bi_private = bh;
2902 2980
2981 /* Take care of bh's that straddle the end of the device */
2982 guard_bh_eod(rw, bio, bh);
2983
2903 bio_get(bio); 2984 bio_get(bio);
2904 submit_bio(rw, bio); 2985 submit_bio(rw, bio);
2905 2986
diff --git a/fs/ceph/export.c b/fs/ceph/export.c
index 02ce90972d81..9349bb37a2fe 100644
--- a/fs/ceph/export.c
+++ b/fs/ceph/export.c
@@ -90,6 +90,8 @@ static int ceph_encode_fh(struct inode *inode, u32 *rawfh, int *max_len,
90 *max_len = handle_length; 90 *max_len = handle_length;
91 type = 255; 91 type = 255;
92 } 92 }
93 if (dentry)
94 dput(dentry);
93 return type; 95 return type;
94} 96}
95 97
diff --git a/fs/cifs/cifsacl.c b/fs/cifs/cifsacl.c
index fc783e264420..0fb15bbbe43c 100644
--- a/fs/cifs/cifsacl.c
+++ b/fs/cifs/cifsacl.c
@@ -225,6 +225,13 @@ sid_to_str(struct cifs_sid *sidptr, char *sidstr)
225} 225}
226 226
227static void 227static void
228cifs_copy_sid(struct cifs_sid *dst, const struct cifs_sid *src)
229{
230 memcpy(dst, src, sizeof(*dst));
231 dst->num_subauth = min_t(u8, src->num_subauth, NUM_SUBAUTHS);
232}
233
234static void
228id_rb_insert(struct rb_root *root, struct cifs_sid *sidptr, 235id_rb_insert(struct rb_root *root, struct cifs_sid *sidptr,
229 struct cifs_sid_id **psidid, char *typestr) 236 struct cifs_sid_id **psidid, char *typestr)
230{ 237{
@@ -248,7 +255,7 @@ id_rb_insert(struct rb_root *root, struct cifs_sid *sidptr,
248 } 255 }
249 } 256 }
250 257
251 memcpy(&(*psidid)->sid, sidptr, sizeof(struct cifs_sid)); 258 cifs_copy_sid(&(*psidid)->sid, sidptr);
252 (*psidid)->time = jiffies - (SID_MAP_RETRY + 1); 259 (*psidid)->time = jiffies - (SID_MAP_RETRY + 1);
253 (*psidid)->refcount = 0; 260 (*psidid)->refcount = 0;
254 261
@@ -354,7 +361,7 @@ id_to_sid(unsigned long cid, uint sidtype, struct cifs_sid *ssid)
354 * any fields of the node after a reference is put . 361 * any fields of the node after a reference is put .
355 */ 362 */
356 if (test_bit(SID_ID_MAPPED, &psidid->state)) { 363 if (test_bit(SID_ID_MAPPED, &psidid->state)) {
357 memcpy(ssid, &psidid->sid, sizeof(struct cifs_sid)); 364 cifs_copy_sid(ssid, &psidid->sid);
358 psidid->time = jiffies; /* update ts for accessing */ 365 psidid->time = jiffies; /* update ts for accessing */
359 goto id_sid_out; 366 goto id_sid_out;
360 } 367 }
@@ -370,14 +377,14 @@ id_to_sid(unsigned long cid, uint sidtype, struct cifs_sid *ssid)
370 if (IS_ERR(sidkey)) { 377 if (IS_ERR(sidkey)) {
371 rc = -EINVAL; 378 rc = -EINVAL;
372 cFYI(1, "%s: Can't map and id to a SID", __func__); 379 cFYI(1, "%s: Can't map and id to a SID", __func__);
380 } else if (sidkey->datalen < sizeof(struct cifs_sid)) {
381 rc = -EIO;
382 cFYI(1, "%s: Downcall contained malformed key "
383 "(datalen=%hu)", __func__, sidkey->datalen);
373 } else { 384 } else {
374 lsid = (struct cifs_sid *)sidkey->payload.data; 385 lsid = (struct cifs_sid *)sidkey->payload.data;
375 memcpy(&psidid->sid, lsid, 386 cifs_copy_sid(&psidid->sid, lsid);
376 sidkey->datalen < sizeof(struct cifs_sid) ? 387 cifs_copy_sid(ssid, &psidid->sid);
377 sidkey->datalen : sizeof(struct cifs_sid));
378 memcpy(ssid, &psidid->sid,
379 sidkey->datalen < sizeof(struct cifs_sid) ?
380 sidkey->datalen : sizeof(struct cifs_sid));
381 set_bit(SID_ID_MAPPED, &psidid->state); 388 set_bit(SID_ID_MAPPED, &psidid->state);
382 key_put(sidkey); 389 key_put(sidkey);
383 kfree(psidid->sidstr); 390 kfree(psidid->sidstr);
@@ -396,7 +403,7 @@ id_to_sid(unsigned long cid, uint sidtype, struct cifs_sid *ssid)
396 return rc; 403 return rc;
397 } 404 }
398 if (test_bit(SID_ID_MAPPED, &psidid->state)) 405 if (test_bit(SID_ID_MAPPED, &psidid->state))
399 memcpy(ssid, &psidid->sid, sizeof(struct cifs_sid)); 406 cifs_copy_sid(ssid, &psidid->sid);
400 else 407 else
401 rc = -EINVAL; 408 rc = -EINVAL;
402 } 409 }
@@ -675,8 +682,6 @@ int compare_sids(const struct cifs_sid *ctsid, const struct cifs_sid *cwsid)
675static void copy_sec_desc(const struct cifs_ntsd *pntsd, 682static void copy_sec_desc(const struct cifs_ntsd *pntsd,
676 struct cifs_ntsd *pnntsd, __u32 sidsoffset) 683 struct cifs_ntsd *pnntsd, __u32 sidsoffset)
677{ 684{
678 int i;
679
680 struct cifs_sid *owner_sid_ptr, *group_sid_ptr; 685 struct cifs_sid *owner_sid_ptr, *group_sid_ptr;
681 struct cifs_sid *nowner_sid_ptr, *ngroup_sid_ptr; 686 struct cifs_sid *nowner_sid_ptr, *ngroup_sid_ptr;
682 687
@@ -692,26 +697,14 @@ static void copy_sec_desc(const struct cifs_ntsd *pntsd,
692 owner_sid_ptr = (struct cifs_sid *)((char *)pntsd + 697 owner_sid_ptr = (struct cifs_sid *)((char *)pntsd +
693 le32_to_cpu(pntsd->osidoffset)); 698 le32_to_cpu(pntsd->osidoffset));
694 nowner_sid_ptr = (struct cifs_sid *)((char *)pnntsd + sidsoffset); 699 nowner_sid_ptr = (struct cifs_sid *)((char *)pnntsd + sidsoffset);
695 700 cifs_copy_sid(nowner_sid_ptr, owner_sid_ptr);
696 nowner_sid_ptr->revision = owner_sid_ptr->revision;
697 nowner_sid_ptr->num_subauth = owner_sid_ptr->num_subauth;
698 for (i = 0; i < 6; i++)
699 nowner_sid_ptr->authority[i] = owner_sid_ptr->authority[i];
700 for (i = 0; i < 5; i++)
701 nowner_sid_ptr->sub_auth[i] = owner_sid_ptr->sub_auth[i];
702 701
703 /* copy group sid */ 702 /* copy group sid */
704 group_sid_ptr = (struct cifs_sid *)((char *)pntsd + 703 group_sid_ptr = (struct cifs_sid *)((char *)pntsd +
705 le32_to_cpu(pntsd->gsidoffset)); 704 le32_to_cpu(pntsd->gsidoffset));
706 ngroup_sid_ptr = (struct cifs_sid *)((char *)pnntsd + sidsoffset + 705 ngroup_sid_ptr = (struct cifs_sid *)((char *)pnntsd + sidsoffset +
707 sizeof(struct cifs_sid)); 706 sizeof(struct cifs_sid));
708 707 cifs_copy_sid(ngroup_sid_ptr, group_sid_ptr);
709 ngroup_sid_ptr->revision = group_sid_ptr->revision;
710 ngroup_sid_ptr->num_subauth = group_sid_ptr->num_subauth;
711 for (i = 0; i < 6; i++)
712 ngroup_sid_ptr->authority[i] = group_sid_ptr->authority[i];
713 for (i = 0; i < 5; i++)
714 ngroup_sid_ptr->sub_auth[i] = group_sid_ptr->sub_auth[i];
715 708
716 return; 709 return;
717} 710}
@@ -1120,8 +1113,7 @@ static int build_sec_desc(struct cifs_ntsd *pntsd, struct cifs_ntsd *pnntsd,
1120 kfree(nowner_sid_ptr); 1113 kfree(nowner_sid_ptr);
1121 return rc; 1114 return rc;
1122 } 1115 }
1123 memcpy(owner_sid_ptr, nowner_sid_ptr, 1116 cifs_copy_sid(owner_sid_ptr, nowner_sid_ptr);
1124 sizeof(struct cifs_sid));
1125 kfree(nowner_sid_ptr); 1117 kfree(nowner_sid_ptr);
1126 *aclflag = CIFS_ACL_OWNER; 1118 *aclflag = CIFS_ACL_OWNER;
1127 } 1119 }
@@ -1139,8 +1131,7 @@ static int build_sec_desc(struct cifs_ntsd *pntsd, struct cifs_ntsd *pnntsd,
1139 kfree(ngroup_sid_ptr); 1131 kfree(ngroup_sid_ptr);
1140 return rc; 1132 return rc;
1141 } 1133 }
1142 memcpy(group_sid_ptr, ngroup_sid_ptr, 1134 cifs_copy_sid(group_sid_ptr, ngroup_sid_ptr);
1143 sizeof(struct cifs_sid));
1144 kfree(ngroup_sid_ptr); 1135 kfree(ngroup_sid_ptr);
1145 *aclflag = CIFS_ACL_GROUP; 1136 *aclflag = CIFS_ACL_GROUP;
1146 } 1137 }
diff --git a/fs/cifs/dir.c b/fs/cifs/dir.c
index 7c0a81283645..d3671f2acb29 100644
--- a/fs/cifs/dir.c
+++ b/fs/cifs/dir.c
@@ -398,7 +398,16 @@ cifs_atomic_open(struct inode *inode, struct dentry *direntry,
398 * in network traffic in the other paths. 398 * in network traffic in the other paths.
399 */ 399 */
400 if (!(oflags & O_CREAT)) { 400 if (!(oflags & O_CREAT)) {
401 struct dentry *res = cifs_lookup(inode, direntry, 0); 401 struct dentry *res;
402
403 /*
404 * Check for hashed negative dentry. We have already revalidated
405 * the dentry and it is fine. No need to perform another lookup.
406 */
407 if (!d_unhashed(direntry))
408 return -ENOENT;
409
410 res = cifs_lookup(inode, direntry, 0);
402 if (IS_ERR(res)) 411 if (IS_ERR(res))
403 return PTR_ERR(res); 412 return PTR_ERR(res);
404 413
diff --git a/fs/cifs/file.c b/fs/cifs/file.c
index edb25b4bbb95..70b6f4c3a0c1 100644
--- a/fs/cifs/file.c
+++ b/fs/cifs/file.c
@@ -1794,7 +1794,6 @@ static int cifs_writepages(struct address_space *mapping,
1794 struct TCP_Server_Info *server; 1794 struct TCP_Server_Info *server;
1795 struct page *page; 1795 struct page *page;
1796 int rc = 0; 1796 int rc = 0;
1797 loff_t isize = i_size_read(mapping->host);
1798 1797
1799 /* 1798 /*
1800 * If wsize is smaller than the page cache size, default to writing 1799 * If wsize is smaller than the page cache size, default to writing
@@ -1899,7 +1898,7 @@ retry:
1899 */ 1898 */
1900 set_page_writeback(page); 1899 set_page_writeback(page);
1901 1900
1902 if (page_offset(page) >= isize) { 1901 if (page_offset(page) >= i_size_read(mapping->host)) {
1903 done = true; 1902 done = true;
1904 unlock_page(page); 1903 unlock_page(page);
1905 end_page_writeback(page); 1904 end_page_writeback(page);
@@ -1932,7 +1931,8 @@ retry:
1932 wdata->offset = page_offset(wdata->pages[0]); 1931 wdata->offset = page_offset(wdata->pages[0]);
1933 wdata->pagesz = PAGE_CACHE_SIZE; 1932 wdata->pagesz = PAGE_CACHE_SIZE;
1934 wdata->tailsz = 1933 wdata->tailsz =
1935 min(isize - page_offset(wdata->pages[nr_pages - 1]), 1934 min(i_size_read(mapping->host) -
1935 page_offset(wdata->pages[nr_pages - 1]),
1936 (loff_t)PAGE_CACHE_SIZE); 1936 (loff_t)PAGE_CACHE_SIZE);
1937 wdata->bytes = ((nr_pages - 1) * PAGE_CACHE_SIZE) + 1937 wdata->bytes = ((nr_pages - 1) * PAGE_CACHE_SIZE) +
1938 wdata->tailsz; 1938 wdata->tailsz;
diff --git a/fs/cifs/readdir.c b/fs/cifs/readdir.c
index f9b5d3d6cf33..1c576e871366 100644
--- a/fs/cifs/readdir.c
+++ b/fs/cifs/readdir.c
@@ -86,14 +86,17 @@ cifs_readdir_lookup(struct dentry *parent, struct qstr *name,
86 86
87 dentry = d_lookup(parent, name); 87 dentry = d_lookup(parent, name);
88 if (dentry) { 88 if (dentry) {
89 int err;
89 inode = dentry->d_inode; 90 inode = dentry->d_inode;
90 /* update inode in place if i_ino didn't change */ 91 /* update inode in place if i_ino didn't change */
91 if (inode && CIFS_I(inode)->uniqueid == fattr->cf_uniqueid) { 92 if (inode && CIFS_I(inode)->uniqueid == fattr->cf_uniqueid) {
92 cifs_fattr_to_inode(inode, fattr); 93 cifs_fattr_to_inode(inode, fattr);
93 return dentry; 94 return dentry;
94 } 95 }
95 d_drop(dentry); 96 err = d_invalidate(dentry);
96 dput(dentry); 97 dput(dentry);
98 if (err)
99 return NULL;
97 } 100 }
98 101
99 dentry = d_alloc(parent, name); 102 dentry = d_alloc(parent, name);
diff --git a/fs/cifs/smb1ops.c b/fs/cifs/smb1ops.c
index 56cc4be87807..34cea2798333 100644
--- a/fs/cifs/smb1ops.c
+++ b/fs/cifs/smb1ops.c
@@ -766,7 +766,6 @@ smb_set_file_info(struct inode *inode, const char *full_path,
766 struct cifs_sb_info *cifs_sb = CIFS_SB(inode->i_sb); 766 struct cifs_sb_info *cifs_sb = CIFS_SB(inode->i_sb);
767 struct tcon_link *tlink = NULL; 767 struct tcon_link *tlink = NULL;
768 struct cifs_tcon *tcon; 768 struct cifs_tcon *tcon;
769 FILE_BASIC_INFO info_buf;
770 769
771 /* if the file is already open for write, just use that fileid */ 770 /* if the file is already open for write, just use that fileid */
772 open_file = find_writable_file(cinode, true); 771 open_file = find_writable_file(cinode, true);
@@ -817,7 +816,7 @@ smb_set_file_info(struct inode *inode, const char *full_path,
817 netpid = current->tgid; 816 netpid = current->tgid;
818 817
819set_via_filehandle: 818set_via_filehandle:
820 rc = CIFSSMBSetFileInfo(xid, tcon, &info_buf, netfid, netpid); 819 rc = CIFSSMBSetFileInfo(xid, tcon, buf, netfid, netpid);
821 if (!rc) 820 if (!rc)
822 cinode->cifsAttrs = le32_to_cpu(buf->Attributes); 821 cinode->cifsAttrs = le32_to_cpu(buf->Attributes);
823 822
diff --git a/fs/direct-io.c b/fs/direct-io.c
index f86c720dba0e..cf5b44b10c67 100644
--- a/fs/direct-io.c
+++ b/fs/direct-io.c
@@ -540,6 +540,7 @@ static int get_more_blocks(struct dio *dio, struct dio_submit *sdio,
540 sector_t fs_endblk; /* Into file, in filesystem-sized blocks */ 540 sector_t fs_endblk; /* Into file, in filesystem-sized blocks */
541 unsigned long fs_count; /* Number of filesystem-sized blocks */ 541 unsigned long fs_count; /* Number of filesystem-sized blocks */
542 int create; 542 int create;
543 unsigned int i_blkbits = sdio->blkbits + sdio->blkfactor;
543 544
544 /* 545 /*
545 * If there was a memory error and we've overwritten all the 546 * If there was a memory error and we've overwritten all the
@@ -554,7 +555,7 @@ static int get_more_blocks(struct dio *dio, struct dio_submit *sdio,
554 fs_count = fs_endblk - fs_startblk + 1; 555 fs_count = fs_endblk - fs_startblk + 1;
555 556
556 map_bh->b_state = 0; 557 map_bh->b_state = 0;
557 map_bh->b_size = fs_count << dio->inode->i_blkbits; 558 map_bh->b_size = fs_count << i_blkbits;
558 559
559 /* 560 /*
560 * For writes inside i_size on a DIO_SKIP_HOLES filesystem we 561 * For writes inside i_size on a DIO_SKIP_HOLES filesystem we
@@ -1053,7 +1054,8 @@ do_blockdev_direct_IO(int rw, struct kiocb *iocb, struct inode *inode,
1053 int seg; 1054 int seg;
1054 size_t size; 1055 size_t size;
1055 unsigned long addr; 1056 unsigned long addr;
1056 unsigned blkbits = inode->i_blkbits; 1057 unsigned i_blkbits = ACCESS_ONCE(inode->i_blkbits);
1058 unsigned blkbits = i_blkbits;
1057 unsigned blocksize_mask = (1 << blkbits) - 1; 1059 unsigned blocksize_mask = (1 << blkbits) - 1;
1058 ssize_t retval = -EINVAL; 1060 ssize_t retval = -EINVAL;
1059 loff_t end = offset; 1061 loff_t end = offset;
@@ -1149,7 +1151,7 @@ do_blockdev_direct_IO(int rw, struct kiocb *iocb, struct inode *inode,
1149 dio->inode = inode; 1151 dio->inode = inode;
1150 dio->rw = rw; 1152 dio->rw = rw;
1151 sdio.blkbits = blkbits; 1153 sdio.blkbits = blkbits;
1152 sdio.blkfactor = inode->i_blkbits - blkbits; 1154 sdio.blkfactor = i_blkbits - blkbits;
1153 sdio.block_in_file = offset >> blkbits; 1155 sdio.block_in_file = offset >> blkbits;
1154 1156
1155 sdio.get_block = get_block; 1157 sdio.get_block = get_block;
diff --git a/fs/eventpoll.c b/fs/eventpoll.c
index da72250ddc1c..cd96649bfe62 100644
--- a/fs/eventpoll.c
+++ b/fs/eventpoll.c
@@ -346,7 +346,7 @@ static inline struct epitem *ep_item_from_epqueue(poll_table *p)
346/* Tells if the epoll_ctl(2) operation needs an event copy from userspace */ 346/* Tells if the epoll_ctl(2) operation needs an event copy from userspace */
347static inline int ep_op_has_event(int op) 347static inline int ep_op_has_event(int op)
348{ 348{
349 return op == EPOLL_CTL_ADD || op == EPOLL_CTL_MOD; 349 return op != EPOLL_CTL_DEL;
350} 350}
351 351
352/* Initialize the poll safe wake up structure */ 352/* Initialize the poll safe wake up structure */
@@ -676,34 +676,6 @@ static int ep_remove(struct eventpoll *ep, struct epitem *epi)
676 return 0; 676 return 0;
677} 677}
678 678
679/*
680 * Disables a "struct epitem" in the eventpoll set. Returns -EBUSY if the item
681 * had no event flags set, indicating that another thread may be currently
682 * handling that item's events (in the case that EPOLLONESHOT was being
683 * used). Otherwise a zero result indicates that the item has been disabled
684 * from receiving events. A disabled item may be re-enabled via
685 * EPOLL_CTL_MOD. Must be called with "mtx" held.
686 */
687static int ep_disable(struct eventpoll *ep, struct epitem *epi)
688{
689 int result = 0;
690 unsigned long flags;
691
692 spin_lock_irqsave(&ep->lock, flags);
693 if (epi->event.events & ~EP_PRIVATE_BITS) {
694 if (ep_is_linked(&epi->rdllink))
695 list_del_init(&epi->rdllink);
696 /* Ensure ep_poll_callback will not add epi back onto ready
697 list: */
698 epi->event.events &= EP_PRIVATE_BITS;
699 }
700 else
701 result = -EBUSY;
702 spin_unlock_irqrestore(&ep->lock, flags);
703
704 return result;
705}
706
707static void ep_free(struct eventpoll *ep) 679static void ep_free(struct eventpoll *ep)
708{ 680{
709 struct rb_node *rbp; 681 struct rb_node *rbp;
@@ -1048,6 +1020,8 @@ static void ep_rbtree_insert(struct eventpoll *ep, struct epitem *epi)
1048 rb_insert_color(&epi->rbn, &ep->rbr); 1020 rb_insert_color(&epi->rbn, &ep->rbr);
1049} 1021}
1050 1022
1023
1024
1051#define PATH_ARR_SIZE 5 1025#define PATH_ARR_SIZE 5
1052/* 1026/*
1053 * These are the number paths of length 1 to 5, that we are allowing to emanate 1027 * These are the number paths of length 1 to 5, that we are allowing to emanate
@@ -1813,12 +1787,6 @@ SYSCALL_DEFINE4(epoll_ctl, int, epfd, int, op, int, fd,
1813 } else 1787 } else
1814 error = -ENOENT; 1788 error = -ENOENT;
1815 break; 1789 break;
1816 case EPOLL_CTL_DISABLE:
1817 if (epi)
1818 error = ep_disable(ep, epi);
1819 else
1820 error = -ENOENT;
1821 break;
1822 } 1790 }
1823 mutex_unlock(&ep->mtx); 1791 mutex_unlock(&ep->mtx);
1824 1792
diff --git a/fs/ext3/balloc.c b/fs/ext3/balloc.c
index 7320a66e958f..22548f56197b 100644
--- a/fs/ext3/balloc.c
+++ b/fs/ext3/balloc.c
@@ -2101,8 +2101,9 @@ int ext3_trim_fs(struct super_block *sb, struct fstrim_range *range)
2101 end = start + (range->len >> sb->s_blocksize_bits) - 1; 2101 end = start + (range->len >> sb->s_blocksize_bits) - 1;
2102 minlen = range->minlen >> sb->s_blocksize_bits; 2102 minlen = range->minlen >> sb->s_blocksize_bits;
2103 2103
2104 if (unlikely(minlen > EXT3_BLOCKS_PER_GROUP(sb)) || 2104 if (minlen > EXT3_BLOCKS_PER_GROUP(sb) ||
2105 unlikely(start >= max_blks)) 2105 start >= max_blks ||
2106 range->len < sb->s_blocksize)
2106 return -EINVAL; 2107 return -EINVAL;
2107 if (end >= max_blks) 2108 if (end >= max_blks)
2108 end = max_blks - 1; 2109 end = max_blks - 1;
diff --git a/fs/ext4/ialloc.c b/fs/ext4/ialloc.c
index 4facdd29a350..3a100e7a62a8 100644
--- a/fs/ext4/ialloc.c
+++ b/fs/ext4/ialloc.c
@@ -725,6 +725,10 @@ repeat_in_this_group:
725 "inode=%lu", ino + 1); 725 "inode=%lu", ino + 1);
726 continue; 726 continue;
727 } 727 }
728 BUFFER_TRACE(inode_bitmap_bh, "get_write_access");
729 err = ext4_journal_get_write_access(handle, inode_bitmap_bh);
730 if (err)
731 goto fail;
728 ext4_lock_group(sb, group); 732 ext4_lock_group(sb, group);
729 ret2 = ext4_test_and_set_bit(ino, inode_bitmap_bh->b_data); 733 ret2 = ext4_test_and_set_bit(ino, inode_bitmap_bh->b_data);
730 ext4_unlock_group(sb, group); 734 ext4_unlock_group(sb, group);
@@ -738,6 +742,11 @@ repeat_in_this_group:
738 goto out; 742 goto out;
739 743
740got: 744got:
745 BUFFER_TRACE(inode_bitmap_bh, "call ext4_handle_dirty_metadata");
746 err = ext4_handle_dirty_metadata(handle, NULL, inode_bitmap_bh);
747 if (err)
748 goto fail;
749
741 /* We may have to initialize the block bitmap if it isn't already */ 750 /* We may have to initialize the block bitmap if it isn't already */
742 if (ext4_has_group_desc_csum(sb) && 751 if (ext4_has_group_desc_csum(sb) &&
743 gdp->bg_flags & cpu_to_le16(EXT4_BG_BLOCK_UNINIT)) { 752 gdp->bg_flags & cpu_to_le16(EXT4_BG_BLOCK_UNINIT)) {
@@ -771,11 +780,6 @@ got:
771 goto fail; 780 goto fail;
772 } 781 }
773 782
774 BUFFER_TRACE(inode_bitmap_bh, "get_write_access");
775 err = ext4_journal_get_write_access(handle, inode_bitmap_bh);
776 if (err)
777 goto fail;
778
779 BUFFER_TRACE(group_desc_bh, "get_write_access"); 783 BUFFER_TRACE(group_desc_bh, "get_write_access");
780 err = ext4_journal_get_write_access(handle, group_desc_bh); 784 err = ext4_journal_get_write_access(handle, group_desc_bh);
781 if (err) 785 if (err)
@@ -823,11 +827,6 @@ got:
823 } 827 }
824 ext4_unlock_group(sb, group); 828 ext4_unlock_group(sb, group);
825 829
826 BUFFER_TRACE(inode_bitmap_bh, "call ext4_handle_dirty_metadata");
827 err = ext4_handle_dirty_metadata(handle, NULL, inode_bitmap_bh);
828 if (err)
829 goto fail;
830
831 BUFFER_TRACE(group_desc_bh, "call ext4_handle_dirty_metadata"); 830 BUFFER_TRACE(group_desc_bh, "call ext4_handle_dirty_metadata");
832 err = ext4_handle_dirty_metadata(handle, NULL, group_desc_bh); 831 err = ext4_handle_dirty_metadata(handle, NULL, group_desc_bh);
833 if (err) 832 if (err)
diff --git a/fs/file.c b/fs/file.c
index d3b5fa80b71b..eff23162485f 100644
--- a/fs/file.c
+++ b/fs/file.c
@@ -685,7 +685,6 @@ void do_close_on_exec(struct files_struct *files)
685 struct fdtable *fdt; 685 struct fdtable *fdt;
686 686
687 /* exec unshares first */ 687 /* exec unshares first */
688 BUG_ON(atomic_read(&files->count) != 1);
689 spin_lock(&files->file_lock); 688 spin_lock(&files->file_lock);
690 for (i = 0; ; i++) { 689 for (i = 0; ; i++) {
691 unsigned long set; 690 unsigned long set;
@@ -900,7 +899,7 @@ int replace_fd(unsigned fd, struct file *file, unsigned flags)
900 return __close_fd(files, fd); 899 return __close_fd(files, fd);
901 900
902 if (fd >= rlimit(RLIMIT_NOFILE)) 901 if (fd >= rlimit(RLIMIT_NOFILE))
903 return -EMFILE; 902 return -EBADF;
904 903
905 spin_lock(&files->file_lock); 904 spin_lock(&files->file_lock);
906 err = expand_files(files, fd); 905 err = expand_files(files, fd);
@@ -926,7 +925,7 @@ SYSCALL_DEFINE3(dup3, unsigned int, oldfd, unsigned int, newfd, int, flags)
926 return -EINVAL; 925 return -EINVAL;
927 926
928 if (newfd >= rlimit(RLIMIT_NOFILE)) 927 if (newfd >= rlimit(RLIMIT_NOFILE))
929 return -EMFILE; 928 return -EBADF;
930 929
931 spin_lock(&files->file_lock); 930 spin_lock(&files->file_lock);
932 err = expand_files(files, newfd); 931 err = expand_files(files, newfd);
@@ -995,16 +994,18 @@ int iterate_fd(struct files_struct *files, unsigned n,
995 const void *p) 994 const void *p)
996{ 995{
997 struct fdtable *fdt; 996 struct fdtable *fdt;
998 struct file *file;
999 int res = 0; 997 int res = 0;
1000 if (!files) 998 if (!files)
1001 return 0; 999 return 0;
1002 spin_lock(&files->file_lock); 1000 spin_lock(&files->file_lock);
1003 fdt = files_fdtable(files); 1001 for (fdt = files_fdtable(files); n < fdt->max_fds; n++) {
1004 while (!res && n < fdt->max_fds) { 1002 struct file *file;
1005 file = rcu_dereference_check_fdtable(files, fdt->fd[n++]); 1003 file = rcu_dereference_check_fdtable(files, fdt->fd[n]);
1006 if (file) 1004 if (!file)
1007 res = f(p, file, n); 1005 continue;
1006 res = f(p, file, n);
1007 if (res)
1008 break;
1008 } 1009 }
1009 spin_unlock(&files->file_lock); 1010 spin_unlock(&files->file_lock);
1010 return res; 1011 return res;
diff --git a/fs/fs-writeback.c b/fs/fs-writeback.c
index 51ea267d444c..3e3422f7f0a4 100644
--- a/fs/fs-writeback.c
+++ b/fs/fs-writeback.c
@@ -228,6 +228,8 @@ static void requeue_io(struct inode *inode, struct bdi_writeback *wb)
228static void inode_sync_complete(struct inode *inode) 228static void inode_sync_complete(struct inode *inode)
229{ 229{
230 inode->i_state &= ~I_SYNC; 230 inode->i_state &= ~I_SYNC;
231 /* If inode is clean an unused, put it into LRU now... */
232 inode_add_lru(inode);
231 /* Waiters must see I_SYNC cleared before being woken up */ 233 /* Waiters must see I_SYNC cleared before being woken up */
232 smp_mb(); 234 smp_mb();
233 wake_up_bit(&inode->i_state, __I_SYNC); 235 wake_up_bit(&inode->i_state, __I_SYNC);
diff --git a/fs/gfs2/file.c b/fs/gfs2/file.c
index 0def0504afc1..e056b4ce4877 100644
--- a/fs/gfs2/file.c
+++ b/fs/gfs2/file.c
@@ -516,15 +516,13 @@ static int gfs2_mmap(struct file *file, struct vm_area_struct *vma)
516 struct gfs2_holder i_gh; 516 struct gfs2_holder i_gh;
517 int error; 517 int error;
518 518
519 gfs2_holder_init(ip->i_gl, LM_ST_SHARED, LM_FLAG_ANY, &i_gh); 519 error = gfs2_glock_nq_init(ip->i_gl, LM_ST_SHARED, LM_FLAG_ANY,
520 error = gfs2_glock_nq(&i_gh); 520 &i_gh);
521 if (error == 0) {
522 file_accessed(file);
523 gfs2_glock_dq(&i_gh);
524 }
525 gfs2_holder_uninit(&i_gh);
526 if (error) 521 if (error)
527 return error; 522 return error;
523 /* grab lock to update inode */
524 gfs2_glock_dq_uninit(&i_gh);
525 file_accessed(file);
528 } 526 }
529 vma->vm_ops = &gfs2_vm_ops; 527 vma->vm_ops = &gfs2_vm_ops;
530 528
@@ -677,10 +675,8 @@ static ssize_t gfs2_file_aio_write(struct kiocb *iocb, const struct iovec *iov,
677 size_t writesize = iov_length(iov, nr_segs); 675 size_t writesize = iov_length(iov, nr_segs);
678 struct dentry *dentry = file->f_dentry; 676 struct dentry *dentry = file->f_dentry;
679 struct gfs2_inode *ip = GFS2_I(dentry->d_inode); 677 struct gfs2_inode *ip = GFS2_I(dentry->d_inode);
680 struct gfs2_sbd *sdp;
681 int ret; 678 int ret;
682 679
683 sdp = GFS2_SB(file->f_mapping->host);
684 ret = gfs2_rs_alloc(ip); 680 ret = gfs2_rs_alloc(ip);
685 if (ret) 681 if (ret)
686 return ret; 682 return ret;
diff --git a/fs/gfs2/lops.c b/fs/gfs2/lops.c
index 8ff95a2d54ee..9ceccb1595a3 100644
--- a/fs/gfs2/lops.c
+++ b/fs/gfs2/lops.c
@@ -393,12 +393,10 @@ static void buf_lo_add(struct gfs2_sbd *sdp, struct gfs2_bufdata *bd)
393 struct gfs2_meta_header *mh; 393 struct gfs2_meta_header *mh;
394 struct gfs2_trans *tr; 394 struct gfs2_trans *tr;
395 395
396 lock_buffer(bd->bd_bh);
397 gfs2_log_lock(sdp);
398 tr = current->journal_info; 396 tr = current->journal_info;
399 tr->tr_touched = 1; 397 tr->tr_touched = 1;
400 if (!list_empty(&bd->bd_list)) 398 if (!list_empty(&bd->bd_list))
401 goto out; 399 return;
402 set_bit(GLF_LFLUSH, &bd->bd_gl->gl_flags); 400 set_bit(GLF_LFLUSH, &bd->bd_gl->gl_flags);
403 set_bit(GLF_DIRTY, &bd->bd_gl->gl_flags); 401 set_bit(GLF_DIRTY, &bd->bd_gl->gl_flags);
404 mh = (struct gfs2_meta_header *)bd->bd_bh->b_data; 402 mh = (struct gfs2_meta_header *)bd->bd_bh->b_data;
@@ -414,9 +412,6 @@ static void buf_lo_add(struct gfs2_sbd *sdp, struct gfs2_bufdata *bd)
414 sdp->sd_log_num_buf++; 412 sdp->sd_log_num_buf++;
415 list_add(&bd->bd_list, &sdp->sd_log_le_buf); 413 list_add(&bd->bd_list, &sdp->sd_log_le_buf);
416 tr->tr_num_buf_new++; 414 tr->tr_num_buf_new++;
417out:
418 gfs2_log_unlock(sdp);
419 unlock_buffer(bd->bd_bh);
420} 415}
421 416
422static void gfs2_check_magic(struct buffer_head *bh) 417static void gfs2_check_magic(struct buffer_head *bh)
@@ -621,7 +616,6 @@ static void revoke_lo_add(struct gfs2_sbd *sdp, struct gfs2_bufdata *bd)
621 616
622static void revoke_lo_before_commit(struct gfs2_sbd *sdp) 617static void revoke_lo_before_commit(struct gfs2_sbd *sdp)
623{ 618{
624 struct gfs2_log_descriptor *ld;
625 struct gfs2_meta_header *mh; 619 struct gfs2_meta_header *mh;
626 unsigned int offset; 620 unsigned int offset;
627 struct list_head *head = &sdp->sd_log_le_revoke; 621 struct list_head *head = &sdp->sd_log_le_revoke;
@@ -634,7 +628,6 @@ static void revoke_lo_before_commit(struct gfs2_sbd *sdp)
634 628
635 length = gfs2_struct2blk(sdp, sdp->sd_log_num_revoke, sizeof(u64)); 629 length = gfs2_struct2blk(sdp, sdp->sd_log_num_revoke, sizeof(u64));
636 page = gfs2_get_log_desc(sdp, GFS2_LOG_DESC_REVOKE, length, sdp->sd_log_num_revoke); 630 page = gfs2_get_log_desc(sdp, GFS2_LOG_DESC_REVOKE, length, sdp->sd_log_num_revoke);
637 ld = page_address(page);
638 offset = sizeof(struct gfs2_log_descriptor); 631 offset = sizeof(struct gfs2_log_descriptor);
639 632
640 list_for_each_entry(bd, head, bd_list) { 633 list_for_each_entry(bd, head, bd_list) {
@@ -777,12 +770,10 @@ static void databuf_lo_add(struct gfs2_sbd *sdp, struct gfs2_bufdata *bd)
777 struct address_space *mapping = bd->bd_bh->b_page->mapping; 770 struct address_space *mapping = bd->bd_bh->b_page->mapping;
778 struct gfs2_inode *ip = GFS2_I(mapping->host); 771 struct gfs2_inode *ip = GFS2_I(mapping->host);
779 772
780 lock_buffer(bd->bd_bh);
781 gfs2_log_lock(sdp);
782 if (tr) 773 if (tr)
783 tr->tr_touched = 1; 774 tr->tr_touched = 1;
784 if (!list_empty(&bd->bd_list)) 775 if (!list_empty(&bd->bd_list))
785 goto out; 776 return;
786 set_bit(GLF_LFLUSH, &bd->bd_gl->gl_flags); 777 set_bit(GLF_LFLUSH, &bd->bd_gl->gl_flags);
787 set_bit(GLF_DIRTY, &bd->bd_gl->gl_flags); 778 set_bit(GLF_DIRTY, &bd->bd_gl->gl_flags);
788 if (gfs2_is_jdata(ip)) { 779 if (gfs2_is_jdata(ip)) {
@@ -793,9 +784,6 @@ static void databuf_lo_add(struct gfs2_sbd *sdp, struct gfs2_bufdata *bd)
793 } else { 784 } else {
794 list_add_tail(&bd->bd_list, &sdp->sd_log_le_ordered); 785 list_add_tail(&bd->bd_list, &sdp->sd_log_le_ordered);
795 } 786 }
796out:
797 gfs2_log_unlock(sdp);
798 unlock_buffer(bd->bd_bh);
799} 787}
800 788
801/** 789/**
diff --git a/fs/gfs2/quota.c b/fs/gfs2/quota.c
index 40c4b0d42fa8..c5af8e18f27a 100644
--- a/fs/gfs2/quota.c
+++ b/fs/gfs2/quota.c
@@ -497,8 +497,11 @@ int gfs2_quota_hold(struct gfs2_inode *ip, u32 uid, u32 gid)
497 struct gfs2_quota_data **qd; 497 struct gfs2_quota_data **qd;
498 int error; 498 int error;
499 499
500 if (ip->i_res == NULL) 500 if (ip->i_res == NULL) {
501 gfs2_rs_alloc(ip); 501 error = gfs2_rs_alloc(ip);
502 if (error)
503 return error;
504 }
502 505
503 qd = ip->i_res->rs_qa_qd; 506 qd = ip->i_res->rs_qa_qd;
504 507
diff --git a/fs/gfs2/rgrp.c b/fs/gfs2/rgrp.c
index 3cc402ce6fea..38fe18f2f055 100644
--- a/fs/gfs2/rgrp.c
+++ b/fs/gfs2/rgrp.c
@@ -553,7 +553,6 @@ void gfs2_free_clones(struct gfs2_rgrpd *rgd)
553 */ 553 */
554int gfs2_rs_alloc(struct gfs2_inode *ip) 554int gfs2_rs_alloc(struct gfs2_inode *ip)
555{ 555{
556 int error = 0;
557 struct gfs2_blkreserv *res; 556 struct gfs2_blkreserv *res;
558 557
559 if (ip->i_res) 558 if (ip->i_res)
@@ -561,7 +560,7 @@ int gfs2_rs_alloc(struct gfs2_inode *ip)
561 560
562 res = kmem_cache_zalloc(gfs2_rsrv_cachep, GFP_NOFS); 561 res = kmem_cache_zalloc(gfs2_rsrv_cachep, GFP_NOFS);
563 if (!res) 562 if (!res)
564 error = -ENOMEM; 563 return -ENOMEM;
565 564
566 RB_CLEAR_NODE(&res->rs_node); 565 RB_CLEAR_NODE(&res->rs_node);
567 566
@@ -571,7 +570,7 @@ int gfs2_rs_alloc(struct gfs2_inode *ip)
571 else 570 else
572 ip->i_res = res; 571 ip->i_res = res;
573 up_write(&ip->i_rw_mutex); 572 up_write(&ip->i_rw_mutex);
574 return error; 573 return 0;
575} 574}
576 575
577static void dump_rs(struct seq_file *seq, const struct gfs2_blkreserv *rs) 576static void dump_rs(struct seq_file *seq, const struct gfs2_blkreserv *rs)
@@ -1263,7 +1262,9 @@ int gfs2_fitrim(struct file *filp, void __user *argp)
1263 int ret = 0; 1262 int ret = 0;
1264 u64 amt; 1263 u64 amt;
1265 u64 trimmed = 0; 1264 u64 trimmed = 0;
1265 u64 start, end, minlen;
1266 unsigned int x; 1266 unsigned int x;
1267 unsigned bs_shift = sdp->sd_sb.sb_bsize_shift;
1267 1268
1268 if (!capable(CAP_SYS_ADMIN)) 1269 if (!capable(CAP_SYS_ADMIN))
1269 return -EPERM; 1270 return -EPERM;
@@ -1271,19 +1272,25 @@ int gfs2_fitrim(struct file *filp, void __user *argp)
1271 if (!blk_queue_discard(q)) 1272 if (!blk_queue_discard(q))
1272 return -EOPNOTSUPP; 1273 return -EOPNOTSUPP;
1273 1274
1274 if (argp == NULL) { 1275 if (copy_from_user(&r, argp, sizeof(r)))
1275 r.start = 0;
1276 r.len = ULLONG_MAX;
1277 r.minlen = 0;
1278 } else if (copy_from_user(&r, argp, sizeof(r)))
1279 return -EFAULT; 1276 return -EFAULT;
1280 1277
1281 ret = gfs2_rindex_update(sdp); 1278 ret = gfs2_rindex_update(sdp);
1282 if (ret) 1279 if (ret)
1283 return ret; 1280 return ret;
1284 1281
1285 rgd = gfs2_blk2rgrpd(sdp, r.start, 0); 1282 start = r.start >> bs_shift;
1286 rgd_end = gfs2_blk2rgrpd(sdp, r.start + r.len, 0); 1283 end = start + (r.len >> bs_shift);
1284 minlen = max_t(u64, r.minlen,
1285 q->limits.discard_granularity) >> bs_shift;
1286
1287 rgd = gfs2_blk2rgrpd(sdp, start, 0);
1288 rgd_end = gfs2_blk2rgrpd(sdp, end - 1, 0);
1289
1290 if (end <= start ||
1291 minlen > sdp->sd_max_rg_data ||
1292 start > rgd_end->rd_data0 + rgd_end->rd_data)
1293 return -EINVAL;
1287 1294
1288 while (1) { 1295 while (1) {
1289 1296
@@ -1295,7 +1302,9 @@ int gfs2_fitrim(struct file *filp, void __user *argp)
1295 /* Trim each bitmap in the rgrp */ 1302 /* Trim each bitmap in the rgrp */
1296 for (x = 0; x < rgd->rd_length; x++) { 1303 for (x = 0; x < rgd->rd_length; x++) {
1297 struct gfs2_bitmap *bi = rgd->rd_bits + x; 1304 struct gfs2_bitmap *bi = rgd->rd_bits + x;
1298 ret = gfs2_rgrp_send_discards(sdp, rgd->rd_data0, NULL, bi, r.minlen, &amt); 1305 ret = gfs2_rgrp_send_discards(sdp,
1306 rgd->rd_data0, NULL, bi, minlen,
1307 &amt);
1299 if (ret) { 1308 if (ret) {
1300 gfs2_glock_dq_uninit(&gh); 1309 gfs2_glock_dq_uninit(&gh);
1301 goto out; 1310 goto out;
@@ -1324,7 +1333,7 @@ int gfs2_fitrim(struct file *filp, void __user *argp)
1324 1333
1325out: 1334out:
1326 r.len = trimmed << 9; 1335 r.len = trimmed << 9;
1327 if (argp && copy_to_user(argp, &r, sizeof(r))) 1336 if (copy_to_user(argp, &r, sizeof(r)))
1328 return -EFAULT; 1337 return -EFAULT;
1329 1338
1330 return ret; 1339 return ret;
diff --git a/fs/gfs2/super.c b/fs/gfs2/super.c
index bc737261f234..d6488674d916 100644
--- a/fs/gfs2/super.c
+++ b/fs/gfs2/super.c
@@ -810,7 +810,8 @@ static void gfs2_dirty_inode(struct inode *inode, int flags)
810 return; 810 return;
811 } 811 }
812 need_unlock = 1; 812 need_unlock = 1;
813 } 813 } else if (WARN_ON_ONCE(ip->i_gl->gl_state != LM_ST_EXCLUSIVE))
814 return;
814 815
815 if (current->journal_info == NULL) { 816 if (current->journal_info == NULL) {
816 ret = gfs2_trans_begin(sdp, RES_DINODE, 0); 817 ret = gfs2_trans_begin(sdp, RES_DINODE, 0);
diff --git a/fs/gfs2/trans.c b/fs/gfs2/trans.c
index adbd27875ef9..413627072f36 100644
--- a/fs/gfs2/trans.c
+++ b/fs/gfs2/trans.c
@@ -155,14 +155,22 @@ void gfs2_trans_add_bh(struct gfs2_glock *gl, struct buffer_head *bh, int meta)
155 struct gfs2_sbd *sdp = gl->gl_sbd; 155 struct gfs2_sbd *sdp = gl->gl_sbd;
156 struct gfs2_bufdata *bd; 156 struct gfs2_bufdata *bd;
157 157
158 lock_buffer(bh);
159 gfs2_log_lock(sdp);
158 bd = bh->b_private; 160 bd = bh->b_private;
159 if (bd) 161 if (bd)
160 gfs2_assert(sdp, bd->bd_gl == gl); 162 gfs2_assert(sdp, bd->bd_gl == gl);
161 else { 163 else {
164 gfs2_log_unlock(sdp);
165 unlock_buffer(bh);
162 gfs2_attach_bufdata(gl, bh, meta); 166 gfs2_attach_bufdata(gl, bh, meta);
163 bd = bh->b_private; 167 bd = bh->b_private;
168 lock_buffer(bh);
169 gfs2_log_lock(sdp);
164 } 170 }
165 lops_add(sdp, bd); 171 lops_add(sdp, bd);
172 gfs2_log_unlock(sdp);
173 unlock_buffer(bh);
166} 174}
167 175
168void gfs2_trans_add_revoke(struct gfs2_sbd *sdp, struct gfs2_bufdata *bd) 176void gfs2_trans_add_revoke(struct gfs2_sbd *sdp, struct gfs2_bufdata *bd)
diff --git a/fs/inode.c b/fs/inode.c
index b03c71957246..64999f144153 100644
--- a/fs/inode.c
+++ b/fs/inode.c
@@ -408,6 +408,19 @@ static void inode_lru_list_add(struct inode *inode)
408 spin_unlock(&inode->i_sb->s_inode_lru_lock); 408 spin_unlock(&inode->i_sb->s_inode_lru_lock);
409} 409}
410 410
411/*
412 * Add inode to LRU if needed (inode is unused and clean).
413 *
414 * Needs inode->i_lock held.
415 */
416void inode_add_lru(struct inode *inode)
417{
418 if (!(inode->i_state & (I_DIRTY | I_SYNC | I_FREEING | I_WILL_FREE)) &&
419 !atomic_read(&inode->i_count) && inode->i_sb->s_flags & MS_ACTIVE)
420 inode_lru_list_add(inode);
421}
422
423
411static void inode_lru_list_del(struct inode *inode) 424static void inode_lru_list_del(struct inode *inode)
412{ 425{
413 spin_lock(&inode->i_sb->s_inode_lru_lock); 426 spin_lock(&inode->i_sb->s_inode_lru_lock);
@@ -1390,8 +1403,7 @@ static void iput_final(struct inode *inode)
1390 1403
1391 if (!drop && (sb->s_flags & MS_ACTIVE)) { 1404 if (!drop && (sb->s_flags & MS_ACTIVE)) {
1392 inode->i_state |= I_REFERENCED; 1405 inode->i_state |= I_REFERENCED;
1393 if (!(inode->i_state & (I_DIRTY|I_SYNC))) 1406 inode_add_lru(inode);
1394 inode_lru_list_add(inode);
1395 spin_unlock(&inode->i_lock); 1407 spin_unlock(&inode->i_lock);
1396 return; 1408 return;
1397 } 1409 }
diff --git a/fs/internal.h b/fs/internal.h
index 916b7cbf3e3e..2f6af7f645eb 100644
--- a/fs/internal.h
+++ b/fs/internal.h
@@ -110,6 +110,7 @@ extern int open_check_o_direct(struct file *f);
110 * inode.c 110 * inode.c
111 */ 111 */
112extern spinlock_t inode_sb_list_lock; 112extern spinlock_t inode_sb_list_lock;
113extern void inode_add_lru(struct inode *inode);
113 114
114/* 115/*
115 * fs-writeback.c 116 * fs-writeback.c
diff --git a/fs/jbd/transaction.c b/fs/jbd/transaction.c
index 78b7f84241d4..7f5120bf0ec2 100644
--- a/fs/jbd/transaction.c
+++ b/fs/jbd/transaction.c
@@ -1961,7 +1961,9 @@ retry:
1961 spin_unlock(&journal->j_list_lock); 1961 spin_unlock(&journal->j_list_lock);
1962 jbd_unlock_bh_state(bh); 1962 jbd_unlock_bh_state(bh);
1963 spin_unlock(&journal->j_state_lock); 1963 spin_unlock(&journal->j_state_lock);
1964 unlock_buffer(bh);
1964 log_wait_commit(journal, tid); 1965 log_wait_commit(journal, tid);
1966 lock_buffer(bh);
1965 goto retry; 1967 goto retry;
1966 } 1968 }
1967 /* 1969 /*
diff --git a/fs/jffs2/file.c b/fs/jffs2/file.c
index 60ef3fb707ff..1506673c087e 100644
--- a/fs/jffs2/file.c
+++ b/fs/jffs2/file.c
@@ -138,33 +138,39 @@ static int jffs2_write_begin(struct file *filp, struct address_space *mapping,
138 struct page *pg; 138 struct page *pg;
139 struct inode *inode = mapping->host; 139 struct inode *inode = mapping->host;
140 struct jffs2_inode_info *f = JFFS2_INODE_INFO(inode); 140 struct jffs2_inode_info *f = JFFS2_INODE_INFO(inode);
141 struct jffs2_sb_info *c = JFFS2_SB_INFO(inode->i_sb);
142 struct jffs2_raw_inode ri;
143 uint32_t alloc_len = 0;
141 pgoff_t index = pos >> PAGE_CACHE_SHIFT; 144 pgoff_t index = pos >> PAGE_CACHE_SHIFT;
142 uint32_t pageofs = index << PAGE_CACHE_SHIFT; 145 uint32_t pageofs = index << PAGE_CACHE_SHIFT;
143 int ret = 0; 146 int ret = 0;
144 147
148 jffs2_dbg(1, "%s()\n", __func__);
149
150 if (pageofs > inode->i_size) {
151 ret = jffs2_reserve_space(c, sizeof(ri), &alloc_len,
152 ALLOC_NORMAL, JFFS2_SUMMARY_INODE_SIZE);
153 if (ret)
154 return ret;
155 }
156
157 mutex_lock(&f->sem);
145 pg = grab_cache_page_write_begin(mapping, index, flags); 158 pg = grab_cache_page_write_begin(mapping, index, flags);
146 if (!pg) 159 if (!pg) {
160 if (alloc_len)
161 jffs2_complete_reservation(c);
162 mutex_unlock(&f->sem);
147 return -ENOMEM; 163 return -ENOMEM;
164 }
148 *pagep = pg; 165 *pagep = pg;
149 166
150 jffs2_dbg(1, "%s()\n", __func__); 167 if (alloc_len) {
151
152 if (pageofs > inode->i_size) {
153 /* Make new hole frag from old EOF to new page */ 168 /* Make new hole frag from old EOF to new page */
154 struct jffs2_sb_info *c = JFFS2_SB_INFO(inode->i_sb);
155 struct jffs2_raw_inode ri;
156 struct jffs2_full_dnode *fn; 169 struct jffs2_full_dnode *fn;
157 uint32_t alloc_len;
158 170
159 jffs2_dbg(1, "Writing new hole frag 0x%x-0x%x between current EOF and new page\n", 171 jffs2_dbg(1, "Writing new hole frag 0x%x-0x%x between current EOF and new page\n",
160 (unsigned int)inode->i_size, pageofs); 172 (unsigned int)inode->i_size, pageofs);
161 173
162 ret = jffs2_reserve_space(c, sizeof(ri), &alloc_len,
163 ALLOC_NORMAL, JFFS2_SUMMARY_INODE_SIZE);
164 if (ret)
165 goto out_page;
166
167 mutex_lock(&f->sem);
168 memset(&ri, 0, sizeof(ri)); 174 memset(&ri, 0, sizeof(ri));
169 175
170 ri.magic = cpu_to_je16(JFFS2_MAGIC_BITMASK); 176 ri.magic = cpu_to_je16(JFFS2_MAGIC_BITMASK);
@@ -191,7 +197,6 @@ static int jffs2_write_begin(struct file *filp, struct address_space *mapping,
191 if (IS_ERR(fn)) { 197 if (IS_ERR(fn)) {
192 ret = PTR_ERR(fn); 198 ret = PTR_ERR(fn);
193 jffs2_complete_reservation(c); 199 jffs2_complete_reservation(c);
194 mutex_unlock(&f->sem);
195 goto out_page; 200 goto out_page;
196 } 201 }
197 ret = jffs2_add_full_dnode_to_inode(c, f, fn); 202 ret = jffs2_add_full_dnode_to_inode(c, f, fn);
@@ -206,12 +211,10 @@ static int jffs2_write_begin(struct file *filp, struct address_space *mapping,
206 jffs2_mark_node_obsolete(c, fn->raw); 211 jffs2_mark_node_obsolete(c, fn->raw);
207 jffs2_free_full_dnode(fn); 212 jffs2_free_full_dnode(fn);
208 jffs2_complete_reservation(c); 213 jffs2_complete_reservation(c);
209 mutex_unlock(&f->sem);
210 goto out_page; 214 goto out_page;
211 } 215 }
212 jffs2_complete_reservation(c); 216 jffs2_complete_reservation(c);
213 inode->i_size = pageofs; 217 inode->i_size = pageofs;
214 mutex_unlock(&f->sem);
215 } 218 }
216 219
217 /* 220 /*
@@ -220,18 +223,18 @@ static int jffs2_write_begin(struct file *filp, struct address_space *mapping,
220 * case of a short-copy. 223 * case of a short-copy.
221 */ 224 */
222 if (!PageUptodate(pg)) { 225 if (!PageUptodate(pg)) {
223 mutex_lock(&f->sem);
224 ret = jffs2_do_readpage_nolock(inode, pg); 226 ret = jffs2_do_readpage_nolock(inode, pg);
225 mutex_unlock(&f->sem);
226 if (ret) 227 if (ret)
227 goto out_page; 228 goto out_page;
228 } 229 }
230 mutex_unlock(&f->sem);
229 jffs2_dbg(1, "end write_begin(). pg->flags %lx\n", pg->flags); 231 jffs2_dbg(1, "end write_begin(). pg->flags %lx\n", pg->flags);
230 return ret; 232 return ret;
231 233
232out_page: 234out_page:
233 unlock_page(pg); 235 unlock_page(pg);
234 page_cache_release(pg); 236 page_cache_release(pg);
237 mutex_unlock(&f->sem);
235 return ret; 238 return ret;
236} 239}
237 240
diff --git a/fs/namei.c b/fs/namei.c
index 937f9d50c84b..5f4cdf3ad913 100644
--- a/fs/namei.c
+++ b/fs/namei.c
@@ -2131,6 +2131,11 @@ struct dentry *lookup_one_len(const char *name, struct dentry *base, int len)
2131 if (!len) 2131 if (!len)
2132 return ERR_PTR(-EACCES); 2132 return ERR_PTR(-EACCES);
2133 2133
2134 if (unlikely(name[0] == '.')) {
2135 if (len < 2 || (len == 2 && name[1] == '.'))
2136 return ERR_PTR(-EACCES);
2137 }
2138
2134 while (len--) { 2139 while (len--) {
2135 c = *(const unsigned char *)name++; 2140 c = *(const unsigned char *)name++;
2136 if (c == '/' || c == '\0') 2141 if (c == '/' || c == '\0')
diff --git a/fs/nfs/dir.c b/fs/nfs/dir.c
index ce8cb926526b..b9e66b7e0c14 100644
--- a/fs/nfs/dir.c
+++ b/fs/nfs/dir.c
@@ -450,7 +450,8 @@ void nfs_prime_dcache(struct dentry *parent, struct nfs_entry *entry)
450 nfs_refresh_inode(dentry->d_inode, entry->fattr); 450 nfs_refresh_inode(dentry->d_inode, entry->fattr);
451 goto out; 451 goto out;
452 } else { 452 } else {
453 d_drop(dentry); 453 if (d_invalidate(dentry) != 0)
454 goto out;
454 dput(dentry); 455 dput(dentry);
455 } 456 }
456 } 457 }
@@ -1100,6 +1101,8 @@ out_set_verifier:
1100out_zap_parent: 1101out_zap_parent:
1101 nfs_zap_caches(dir); 1102 nfs_zap_caches(dir);
1102 out_bad: 1103 out_bad:
1104 nfs_free_fattr(fattr);
1105 nfs_free_fhandle(fhandle);
1103 nfs_mark_for_revalidate(dir); 1106 nfs_mark_for_revalidate(dir);
1104 if (inode && S_ISDIR(inode->i_mode)) { 1107 if (inode && S_ISDIR(inode->i_mode)) {
1105 /* Purge readdir caches. */ 1108 /* Purge readdir caches. */
@@ -1112,8 +1115,6 @@ out_zap_parent:
1112 shrink_dcache_parent(dentry); 1115 shrink_dcache_parent(dentry);
1113 } 1116 }
1114 d_drop(dentry); 1117 d_drop(dentry);
1115 nfs_free_fattr(fattr);
1116 nfs_free_fhandle(fhandle);
1117 dput(parent); 1118 dput(parent);
1118 dfprintk(LOOKUPCACHE, "NFS: %s(%s/%s) is invalid\n", 1119 dfprintk(LOOKUPCACHE, "NFS: %s(%s/%s) is invalid\n",
1119 __func__, dentry->d_parent->d_name.name, 1120 __func__, dentry->d_parent->d_name.name,
diff --git a/fs/nfs/dns_resolve.c b/fs/nfs/dns_resolve.c
index 31c26c4dcc23..ca4b11ec87a2 100644
--- a/fs/nfs/dns_resolve.c
+++ b/fs/nfs/dns_resolve.c
@@ -217,7 +217,7 @@ static int nfs_dns_parse(struct cache_detail *cd, char *buf, int buflen)
217{ 217{
218 char buf1[NFS_DNS_HOSTNAME_MAXLEN+1]; 218 char buf1[NFS_DNS_HOSTNAME_MAXLEN+1];
219 struct nfs_dns_ent key, *item; 219 struct nfs_dns_ent key, *item;
220 unsigned long ttl; 220 unsigned int ttl;
221 ssize_t len; 221 ssize_t len;
222 int ret = -EINVAL; 222 int ret = -EINVAL;
223 223
@@ -240,7 +240,8 @@ static int nfs_dns_parse(struct cache_detail *cd, char *buf, int buflen)
240 key.namelen = len; 240 key.namelen = len;
241 memset(&key.h, 0, sizeof(key.h)); 241 memset(&key.h, 0, sizeof(key.h));
242 242
243 ttl = get_expiry(&buf); 243 if (get_uint(&buf, &ttl) < 0)
244 goto out;
244 if (ttl == 0) 245 if (ttl == 0)
245 goto out; 246 goto out;
246 key.h.expiry_time = ttl + seconds_since_boot(); 247 key.h.expiry_time = ttl + seconds_since_boot();
diff --git a/fs/nfs/inode.c b/fs/nfs/inode.c
index 5c7325c5c5e6..6fa01aea2488 100644
--- a/fs/nfs/inode.c
+++ b/fs/nfs/inode.c
@@ -685,7 +685,10 @@ static void __put_nfs_open_context(struct nfs_open_context *ctx, int is_sync)
685 if (ctx->cred != NULL) 685 if (ctx->cred != NULL)
686 put_rpccred(ctx->cred); 686 put_rpccred(ctx->cred);
687 dput(ctx->dentry); 687 dput(ctx->dentry);
688 nfs_sb_deactive(sb); 688 if (is_sync)
689 nfs_sb_deactive(sb);
690 else
691 nfs_sb_deactive_async(sb);
689 kfree(ctx->mdsthreshold); 692 kfree(ctx->mdsthreshold);
690 kfree(ctx); 693 kfree(ctx);
691} 694}
diff --git a/fs/nfs/internal.h b/fs/nfs/internal.h
index 59b133c5d652..05521cadac2e 100644
--- a/fs/nfs/internal.h
+++ b/fs/nfs/internal.h
@@ -351,10 +351,12 @@ extern int __init register_nfs_fs(void);
351extern void __exit unregister_nfs_fs(void); 351extern void __exit unregister_nfs_fs(void);
352extern void nfs_sb_active(struct super_block *sb); 352extern void nfs_sb_active(struct super_block *sb);
353extern void nfs_sb_deactive(struct super_block *sb); 353extern void nfs_sb_deactive(struct super_block *sb);
354extern void nfs_sb_deactive_async(struct super_block *sb);
354 355
355/* namespace.c */ 356/* namespace.c */
357#define NFS_PATH_CANONICAL 1
356extern char *nfs_path(char **p, struct dentry *dentry, 358extern char *nfs_path(char **p, struct dentry *dentry,
357 char *buffer, ssize_t buflen); 359 char *buffer, ssize_t buflen, unsigned flags);
358extern struct vfsmount *nfs_d_automount(struct path *path); 360extern struct vfsmount *nfs_d_automount(struct path *path);
359struct vfsmount *nfs_submount(struct nfs_server *, struct dentry *, 361struct vfsmount *nfs_submount(struct nfs_server *, struct dentry *,
360 struct nfs_fh *, struct nfs_fattr *); 362 struct nfs_fh *, struct nfs_fattr *);
@@ -498,7 +500,7 @@ static inline char *nfs_devname(struct dentry *dentry,
498 char *buffer, ssize_t buflen) 500 char *buffer, ssize_t buflen)
499{ 501{
500 char *dummy; 502 char *dummy;
501 return nfs_path(&dummy, dentry, buffer, buflen); 503 return nfs_path(&dummy, dentry, buffer, buflen, NFS_PATH_CANONICAL);
502} 504}
503 505
504/* 506/*
diff --git a/fs/nfs/mount_clnt.c b/fs/nfs/mount_clnt.c
index 8e65c7f1f87c..015f71f8f62c 100644
--- a/fs/nfs/mount_clnt.c
+++ b/fs/nfs/mount_clnt.c
@@ -181,7 +181,7 @@ int nfs_mount(struct nfs_mount_request *info)
181 else 181 else
182 msg.rpc_proc = &mnt_clnt->cl_procinfo[MOUNTPROC_MNT]; 182 msg.rpc_proc = &mnt_clnt->cl_procinfo[MOUNTPROC_MNT];
183 183
184 status = rpc_call_sync(mnt_clnt, &msg, 0); 184 status = rpc_call_sync(mnt_clnt, &msg, RPC_TASK_SOFT|RPC_TASK_TIMEOUT);
185 rpc_shutdown_client(mnt_clnt); 185 rpc_shutdown_client(mnt_clnt);
186 186
187 if (status < 0) 187 if (status < 0)
diff --git a/fs/nfs/namespace.c b/fs/nfs/namespace.c
index 655925373b91..dd057bc6b65b 100644
--- a/fs/nfs/namespace.c
+++ b/fs/nfs/namespace.c
@@ -33,6 +33,7 @@ int nfs_mountpoint_expiry_timeout = 500 * HZ;
33 * @dentry - pointer to dentry 33 * @dentry - pointer to dentry
34 * @buffer - result buffer 34 * @buffer - result buffer
35 * @buflen - length of buffer 35 * @buflen - length of buffer
36 * @flags - options (see below)
36 * 37 *
37 * Helper function for constructing the server pathname 38 * Helper function for constructing the server pathname
38 * by arbitrary hashed dentry. 39 * by arbitrary hashed dentry.
@@ -40,8 +41,14 @@ int nfs_mountpoint_expiry_timeout = 500 * HZ;
40 * This is mainly for use in figuring out the path on the 41 * This is mainly for use in figuring out the path on the
41 * server side when automounting on top of an existing partition 42 * server side when automounting on top of an existing partition
42 * and in generating /proc/mounts and friends. 43 * and in generating /proc/mounts and friends.
44 *
45 * Supported flags:
46 * NFS_PATH_CANONICAL: ensure there is exactly one slash after
47 * the original device (export) name
48 * (if unset, the original name is returned verbatim)
43 */ 49 */
44char *nfs_path(char **p, struct dentry *dentry, char *buffer, ssize_t buflen) 50char *nfs_path(char **p, struct dentry *dentry, char *buffer, ssize_t buflen,
51 unsigned flags)
45{ 52{
46 char *end; 53 char *end;
47 int namelen; 54 int namelen;
@@ -74,7 +81,7 @@ rename_retry:
74 rcu_read_unlock(); 81 rcu_read_unlock();
75 goto rename_retry; 82 goto rename_retry;
76 } 83 }
77 if (*end != '/') { 84 if ((flags & NFS_PATH_CANONICAL) && *end != '/') {
78 if (--buflen < 0) { 85 if (--buflen < 0) {
79 spin_unlock(&dentry->d_lock); 86 spin_unlock(&dentry->d_lock);
80 rcu_read_unlock(); 87 rcu_read_unlock();
@@ -91,9 +98,11 @@ rename_retry:
91 return end; 98 return end;
92 } 99 }
93 namelen = strlen(base); 100 namelen = strlen(base);
94 /* Strip off excess slashes in base string */ 101 if (flags & NFS_PATH_CANONICAL) {
95 while (namelen > 0 && base[namelen - 1] == '/') 102 /* Strip off excess slashes in base string */
96 namelen--; 103 while (namelen > 0 && base[namelen - 1] == '/')
104 namelen--;
105 }
97 buflen -= namelen; 106 buflen -= namelen;
98 if (buflen < 0) { 107 if (buflen < 0) {
99 spin_unlock(&dentry->d_lock); 108 spin_unlock(&dentry->d_lock);
diff --git a/fs/nfs/nfs4namespace.c b/fs/nfs/nfs4namespace.c
index 79fbb61ce202..1e09eb78543b 100644
--- a/fs/nfs/nfs4namespace.c
+++ b/fs/nfs/nfs4namespace.c
@@ -81,7 +81,8 @@ static char *nfs_path_component(const char *nfspath, const char *end)
81static char *nfs4_path(struct dentry *dentry, char *buffer, ssize_t buflen) 81static char *nfs4_path(struct dentry *dentry, char *buffer, ssize_t buflen)
82{ 82{
83 char *limit; 83 char *limit;
84 char *path = nfs_path(&limit, dentry, buffer, buflen); 84 char *path = nfs_path(&limit, dentry, buffer, buflen,
85 NFS_PATH_CANONICAL);
85 if (!IS_ERR(path)) { 86 if (!IS_ERR(path)) {
86 char *path_component = nfs_path_component(path, limit); 87 char *path_component = nfs_path_component(path, limit);
87 if (path_component) 88 if (path_component)
diff --git a/fs/nfs/nfs4proc.c b/fs/nfs/nfs4proc.c
index 68b21d81b7ac..5eec4429970c 100644
--- a/fs/nfs/nfs4proc.c
+++ b/fs/nfs/nfs4proc.c
@@ -339,8 +339,7 @@ static int nfs4_handle_exception(struct nfs_server *server, int errorcode, struc
339 dprintk("%s ERROR: %d Reset session\n", __func__, 339 dprintk("%s ERROR: %d Reset session\n", __func__,
340 errorcode); 340 errorcode);
341 nfs4_schedule_session_recovery(clp->cl_session, errorcode); 341 nfs4_schedule_session_recovery(clp->cl_session, errorcode);
342 exception->retry = 1; 342 goto wait_on_recovery;
343 break;
344#endif /* defined(CONFIG_NFS_V4_1) */ 343#endif /* defined(CONFIG_NFS_V4_1) */
345 case -NFS4ERR_FILE_OPEN: 344 case -NFS4ERR_FILE_OPEN:
346 if (exception->timeout > HZ) { 345 if (exception->timeout > HZ) {
@@ -1572,9 +1571,11 @@ static void nfs4_open_prepare(struct rpc_task *task, void *calldata)
1572 data->timestamp = jiffies; 1571 data->timestamp = jiffies;
1573 if (nfs4_setup_sequence(data->o_arg.server, 1572 if (nfs4_setup_sequence(data->o_arg.server,
1574 &data->o_arg.seq_args, 1573 &data->o_arg.seq_args,
1575 &data->o_res.seq_res, task)) 1574 &data->o_res.seq_res,
1576 return; 1575 task) != 0)
1577 rpc_call_start(task); 1576 nfs_release_seqid(data->o_arg.seqid);
1577 else
1578 rpc_call_start(task);
1578 return; 1579 return;
1579unlock_no_action: 1580unlock_no_action:
1580 rcu_read_unlock(); 1581 rcu_read_unlock();
@@ -1748,7 +1749,7 @@ static int nfs4_opendata_access(struct rpc_cred *cred,
1748 1749
1749 /* even though OPEN succeeded, access is denied. Close the file */ 1750 /* even though OPEN succeeded, access is denied. Close the file */
1750 nfs4_close_state(state, fmode); 1751 nfs4_close_state(state, fmode);
1751 return -NFS4ERR_ACCESS; 1752 return -EACCES;
1752} 1753}
1753 1754
1754/* 1755/*
@@ -2196,7 +2197,7 @@ static void nfs4_free_closedata(void *data)
2196 nfs4_put_open_state(calldata->state); 2197 nfs4_put_open_state(calldata->state);
2197 nfs_free_seqid(calldata->arg.seqid); 2198 nfs_free_seqid(calldata->arg.seqid);
2198 nfs4_put_state_owner(sp); 2199 nfs4_put_state_owner(sp);
2199 nfs_sb_deactive(sb); 2200 nfs_sb_deactive_async(sb);
2200 kfree(calldata); 2201 kfree(calldata);
2201} 2202}
2202 2203
@@ -2296,9 +2297,10 @@ static void nfs4_close_prepare(struct rpc_task *task, void *data)
2296 if (nfs4_setup_sequence(NFS_SERVER(inode), 2297 if (nfs4_setup_sequence(NFS_SERVER(inode),
2297 &calldata->arg.seq_args, 2298 &calldata->arg.seq_args,
2298 &calldata->res.seq_res, 2299 &calldata->res.seq_res,
2299 task)) 2300 task) != 0)
2300 goto out; 2301 nfs_release_seqid(calldata->arg.seqid);
2301 rpc_call_start(task); 2302 else
2303 rpc_call_start(task);
2302out: 2304out:
2303 dprintk("%s: done!\n", __func__); 2305 dprintk("%s: done!\n", __func__);
2304} 2306}
@@ -4529,6 +4531,7 @@ static void nfs4_locku_done(struct rpc_task *task, void *data)
4529 if (nfs4_async_handle_error(task, calldata->server, NULL) == -EAGAIN) 4531 if (nfs4_async_handle_error(task, calldata->server, NULL) == -EAGAIN)
4530 rpc_restart_call_prepare(task); 4532 rpc_restart_call_prepare(task);
4531 } 4533 }
4534 nfs_release_seqid(calldata->arg.seqid);
4532} 4535}
4533 4536
4534static void nfs4_locku_prepare(struct rpc_task *task, void *data) 4537static void nfs4_locku_prepare(struct rpc_task *task, void *data)
@@ -4545,9 +4548,11 @@ static void nfs4_locku_prepare(struct rpc_task *task, void *data)
4545 calldata->timestamp = jiffies; 4548 calldata->timestamp = jiffies;
4546 if (nfs4_setup_sequence(calldata->server, 4549 if (nfs4_setup_sequence(calldata->server,
4547 &calldata->arg.seq_args, 4550 &calldata->arg.seq_args,
4548 &calldata->res.seq_res, task)) 4551 &calldata->res.seq_res,
4549 return; 4552 task) != 0)
4550 rpc_call_start(task); 4553 nfs_release_seqid(calldata->arg.seqid);
4554 else
4555 rpc_call_start(task);
4551} 4556}
4552 4557
4553static const struct rpc_call_ops nfs4_locku_ops = { 4558static const struct rpc_call_ops nfs4_locku_ops = {
@@ -4692,7 +4697,7 @@ static void nfs4_lock_prepare(struct rpc_task *task, void *calldata)
4692 /* Do we need to do an open_to_lock_owner? */ 4697 /* Do we need to do an open_to_lock_owner? */
4693 if (!(data->arg.lock_seqid->sequence->flags & NFS_SEQID_CONFIRMED)) { 4698 if (!(data->arg.lock_seqid->sequence->flags & NFS_SEQID_CONFIRMED)) {
4694 if (nfs_wait_on_sequence(data->arg.open_seqid, task) != 0) 4699 if (nfs_wait_on_sequence(data->arg.open_seqid, task) != 0)
4695 return; 4700 goto out_release_lock_seqid;
4696 data->arg.open_stateid = &state->stateid; 4701 data->arg.open_stateid = &state->stateid;
4697 data->arg.new_lock_owner = 1; 4702 data->arg.new_lock_owner = 1;
4698 data->res.open_seqid = data->arg.open_seqid; 4703 data->res.open_seqid = data->arg.open_seqid;
@@ -4701,10 +4706,15 @@ static void nfs4_lock_prepare(struct rpc_task *task, void *calldata)
4701 data->timestamp = jiffies; 4706 data->timestamp = jiffies;
4702 if (nfs4_setup_sequence(data->server, 4707 if (nfs4_setup_sequence(data->server,
4703 &data->arg.seq_args, 4708 &data->arg.seq_args,
4704 &data->res.seq_res, task)) 4709 &data->res.seq_res,
4710 task) == 0) {
4711 rpc_call_start(task);
4705 return; 4712 return;
4706 rpc_call_start(task); 4713 }
4707 dprintk("%s: done!, ret = %d\n", __func__, data->rpc_status); 4714 nfs_release_seqid(data->arg.open_seqid);
4715out_release_lock_seqid:
4716 nfs_release_seqid(data->arg.lock_seqid);
4717 dprintk("%s: done!, ret = %d\n", __func__, task->tk_status);
4708} 4718}
4709 4719
4710static void nfs4_recover_lock_prepare(struct rpc_task *task, void *calldata) 4720static void nfs4_recover_lock_prepare(struct rpc_task *task, void *calldata)
@@ -5667,7 +5677,7 @@ static void nfs4_add_and_init_slots(struct nfs4_slot_table *tbl,
5667 tbl->slots = new; 5677 tbl->slots = new;
5668 tbl->max_slots = max_slots; 5678 tbl->max_slots = max_slots;
5669 } 5679 }
5670 tbl->highest_used_slotid = -1; /* no slot is currently used */ 5680 tbl->highest_used_slotid = NFS4_NO_SLOT;
5671 for (i = 0; i < tbl->max_slots; i++) 5681 for (i = 0; i < tbl->max_slots; i++)
5672 tbl->slots[i].seq_nr = ivalue; 5682 tbl->slots[i].seq_nr = ivalue;
5673 spin_unlock(&tbl->slot_tbl_lock); 5683 spin_unlock(&tbl->slot_tbl_lock);
diff --git a/fs/nfs/pnfs.c b/fs/nfs/pnfs.c
index fe624c91bd00..2878f97bd78d 100644
--- a/fs/nfs/pnfs.c
+++ b/fs/nfs/pnfs.c
@@ -925,8 +925,8 @@ pnfs_find_alloc_layout(struct inode *ino,
925 if (likely(nfsi->layout == NULL)) { /* Won the race? */ 925 if (likely(nfsi->layout == NULL)) { /* Won the race? */
926 nfsi->layout = new; 926 nfsi->layout = new;
927 return new; 927 return new;
928 } 928 } else if (new != NULL)
929 pnfs_free_layout_hdr(new); 929 pnfs_free_layout_hdr(new);
930out_existing: 930out_existing:
931 pnfs_get_layout_hdr(nfsi->layout); 931 pnfs_get_layout_hdr(nfsi->layout);
932 return nfsi->layout; 932 return nfsi->layout;
diff --git a/fs/nfs/super.c b/fs/nfs/super.c
index e831bce49766..652d3f7176a9 100644
--- a/fs/nfs/super.c
+++ b/fs/nfs/super.c
@@ -54,6 +54,7 @@
54#include <linux/parser.h> 54#include <linux/parser.h>
55#include <linux/nsproxy.h> 55#include <linux/nsproxy.h>
56#include <linux/rcupdate.h> 56#include <linux/rcupdate.h>
57#include <linux/kthread.h>
57 58
58#include <asm/uaccess.h> 59#include <asm/uaccess.h>
59 60
@@ -415,6 +416,54 @@ void nfs_sb_deactive(struct super_block *sb)
415} 416}
416EXPORT_SYMBOL_GPL(nfs_sb_deactive); 417EXPORT_SYMBOL_GPL(nfs_sb_deactive);
417 418
419static int nfs_deactivate_super_async_work(void *ptr)
420{
421 struct super_block *sb = ptr;
422
423 deactivate_super(sb);
424 module_put_and_exit(0);
425 return 0;
426}
427
428/*
429 * same effect as deactivate_super, but will do final unmount in kthread
430 * context
431 */
432static void nfs_deactivate_super_async(struct super_block *sb)
433{
434 struct task_struct *task;
435 char buf[INET6_ADDRSTRLEN + 1];
436 struct nfs_server *server = NFS_SB(sb);
437 struct nfs_client *clp = server->nfs_client;
438
439 if (!atomic_add_unless(&sb->s_active, -1, 1)) {
440 rcu_read_lock();
441 snprintf(buf, sizeof(buf),
442 rpc_peeraddr2str(clp->cl_rpcclient, RPC_DISPLAY_ADDR));
443 rcu_read_unlock();
444
445 __module_get(THIS_MODULE);
446 task = kthread_run(nfs_deactivate_super_async_work, sb,
447 "%s-deactivate-super", buf);
448 if (IS_ERR(task)) {
449 pr_err("%s: kthread_run: %ld\n",
450 __func__, PTR_ERR(task));
451 /* make synchronous call and hope for the best */
452 deactivate_super(sb);
453 module_put(THIS_MODULE);
454 }
455 }
456}
457
458void nfs_sb_deactive_async(struct super_block *sb)
459{
460 struct nfs_server *server = NFS_SB(sb);
461
462 if (atomic_dec_and_test(&server->active))
463 nfs_deactivate_super_async(sb);
464}
465EXPORT_SYMBOL_GPL(nfs_sb_deactive_async);
466
418/* 467/*
419 * Deliver file system statistics to userspace 468 * Deliver file system statistics to userspace
420 */ 469 */
@@ -771,7 +820,7 @@ int nfs_show_devname(struct seq_file *m, struct dentry *root)
771 int err = 0; 820 int err = 0;
772 if (!page) 821 if (!page)
773 return -ENOMEM; 822 return -ENOMEM;
774 devname = nfs_path(&dummy, root, page, PAGE_SIZE); 823 devname = nfs_path(&dummy, root, page, PAGE_SIZE, 0);
775 if (IS_ERR(devname)) 824 if (IS_ERR(devname))
776 err = PTR_ERR(devname); 825 err = PTR_ERR(devname);
777 else 826 else
diff --git a/fs/nfs/unlink.c b/fs/nfs/unlink.c
index 13cea637eff8..3f79c77153b8 100644
--- a/fs/nfs/unlink.c
+++ b/fs/nfs/unlink.c
@@ -95,7 +95,7 @@ static void nfs_async_unlink_release(void *calldata)
95 95
96 nfs_dec_sillycount(data->dir); 96 nfs_dec_sillycount(data->dir);
97 nfs_free_unlinkdata(data); 97 nfs_free_unlinkdata(data);
98 nfs_sb_deactive(sb); 98 nfs_sb_deactive_async(sb);
99} 99}
100 100
101static void nfs_unlink_prepare(struct rpc_task *task, void *calldata) 101static void nfs_unlink_prepare(struct rpc_task *task, void *calldata)
diff --git a/fs/notify/fanotify/fanotify.c b/fs/notify/fanotify/fanotify.c
index f35794b97e8e..a50636025364 100644
--- a/fs/notify/fanotify/fanotify.c
+++ b/fs/notify/fanotify/fanotify.c
@@ -21,6 +21,7 @@ static bool should_merge(struct fsnotify_event *old, struct fsnotify_event *new)
21 if ((old->path.mnt == new->path.mnt) && 21 if ((old->path.mnt == new->path.mnt) &&
22 (old->path.dentry == new->path.dentry)) 22 (old->path.dentry == new->path.dentry))
23 return true; 23 return true;
24 break;
24 case (FSNOTIFY_EVENT_NONE): 25 case (FSNOTIFY_EVENT_NONE):
25 return true; 26 return true;
26 default: 27 default:
diff --git a/fs/notify/fanotify/fanotify_user.c b/fs/notify/fanotify/fanotify_user.c
index 721d692fa8d4..6fcaeb8c902e 100644
--- a/fs/notify/fanotify/fanotify_user.c
+++ b/fs/notify/fanotify/fanotify_user.c
@@ -258,7 +258,8 @@ static ssize_t copy_event_to_user(struct fsnotify_group *group,
258 if (ret) 258 if (ret)
259 goto out_close_fd; 259 goto out_close_fd;
260 260
261 fd_install(fd, f); 261 if (fd != FAN_NOFD)
262 fd_install(fd, f);
262 return fanotify_event_metadata.event_len; 263 return fanotify_event_metadata.event_len;
263 264
264out_close_fd: 265out_close_fd:
diff --git a/fs/proc/base.c b/fs/proc/base.c
index 144a96732dd7..9e28356a959a 100644
--- a/fs/proc/base.c
+++ b/fs/proc/base.c
@@ -873,6 +873,113 @@ static const struct file_operations proc_environ_operations = {
873 .release = mem_release, 873 .release = mem_release,
874}; 874};
875 875
876static ssize_t oom_adj_read(struct file *file, char __user *buf, size_t count,
877 loff_t *ppos)
878{
879 struct task_struct *task = get_proc_task(file->f_path.dentry->d_inode);
880 char buffer[PROC_NUMBUF];
881 int oom_adj = OOM_ADJUST_MIN;
882 size_t len;
883 unsigned long flags;
884
885 if (!task)
886 return -ESRCH;
887 if (lock_task_sighand(task, &flags)) {
888 if (task->signal->oom_score_adj == OOM_SCORE_ADJ_MAX)
889 oom_adj = OOM_ADJUST_MAX;
890 else
891 oom_adj = (task->signal->oom_score_adj * -OOM_DISABLE) /
892 OOM_SCORE_ADJ_MAX;
893 unlock_task_sighand(task, &flags);
894 }
895 put_task_struct(task);
896 len = snprintf(buffer, sizeof(buffer), "%d\n", oom_adj);
897 return simple_read_from_buffer(buf, count, ppos, buffer, len);
898}
899
900static ssize_t oom_adj_write(struct file *file, const char __user *buf,
901 size_t count, loff_t *ppos)
902{
903 struct task_struct *task;
904 char buffer[PROC_NUMBUF];
905 int oom_adj;
906 unsigned long flags;
907 int err;
908
909 memset(buffer, 0, sizeof(buffer));
910 if (count > sizeof(buffer) - 1)
911 count = sizeof(buffer) - 1;
912 if (copy_from_user(buffer, buf, count)) {
913 err = -EFAULT;
914 goto out;
915 }
916
917 err = kstrtoint(strstrip(buffer), 0, &oom_adj);
918 if (err)
919 goto out;
920 if ((oom_adj < OOM_ADJUST_MIN || oom_adj > OOM_ADJUST_MAX) &&
921 oom_adj != OOM_DISABLE) {
922 err = -EINVAL;
923 goto out;
924 }
925
926 task = get_proc_task(file->f_path.dentry->d_inode);
927 if (!task) {
928 err = -ESRCH;
929 goto out;
930 }
931
932 task_lock(task);
933 if (!task->mm) {
934 err = -EINVAL;
935 goto err_task_lock;
936 }
937
938 if (!lock_task_sighand(task, &flags)) {
939 err = -ESRCH;
940 goto err_task_lock;
941 }
942
943 /*
944 * Scale /proc/pid/oom_score_adj appropriately ensuring that a maximum
945 * value is always attainable.
946 */
947 if (oom_adj == OOM_ADJUST_MAX)
948 oom_adj = OOM_SCORE_ADJ_MAX;
949 else
950 oom_adj = (oom_adj * OOM_SCORE_ADJ_MAX) / -OOM_DISABLE;
951
952 if (oom_adj < task->signal->oom_score_adj &&
953 !capable(CAP_SYS_RESOURCE)) {
954 err = -EACCES;
955 goto err_sighand;
956 }
957
958 /*
959 * /proc/pid/oom_adj is provided for legacy purposes, ask users to use
960 * /proc/pid/oom_score_adj instead.
961 */
962 printk_once(KERN_WARNING "%s (%d): /proc/%d/oom_adj is deprecated, please use /proc/%d/oom_score_adj instead.\n",
963 current->comm, task_pid_nr(current), task_pid_nr(task),
964 task_pid_nr(task));
965
966 task->signal->oom_score_adj = oom_adj;
967 trace_oom_score_adj_update(task);
968err_sighand:
969 unlock_task_sighand(task, &flags);
970err_task_lock:
971 task_unlock(task);
972 put_task_struct(task);
973out:
974 return err < 0 ? err : count;
975}
976
977static const struct file_operations proc_oom_adj_operations = {
978 .read = oom_adj_read,
979 .write = oom_adj_write,
980 .llseek = generic_file_llseek,
981};
982
876static ssize_t oom_score_adj_read(struct file *file, char __user *buf, 983static ssize_t oom_score_adj_read(struct file *file, char __user *buf,
877 size_t count, loff_t *ppos) 984 size_t count, loff_t *ppos)
878{ 985{
@@ -1770,8 +1877,9 @@ static struct dentry *proc_map_files_lookup(struct inode *dir,
1770 if (!vma) 1877 if (!vma)
1771 goto out_no_vma; 1878 goto out_no_vma;
1772 1879
1773 result = proc_map_files_instantiate(dir, dentry, task, 1880 if (vma->vm_file)
1774 (void *)(unsigned long)vma->vm_file->f_mode); 1881 result = proc_map_files_instantiate(dir, dentry, task,
1882 (void *)(unsigned long)vma->vm_file->f_mode);
1775 1883
1776out_no_vma: 1884out_no_vma:
1777 up_read(&mm->mmap_sem); 1885 up_read(&mm->mmap_sem);
@@ -2598,6 +2706,7 @@ static const struct pid_entry tgid_base_stuff[] = {
2598 REG("cgroup", S_IRUGO, proc_cgroup_operations), 2706 REG("cgroup", S_IRUGO, proc_cgroup_operations),
2599#endif 2707#endif
2600 INF("oom_score", S_IRUGO, proc_oom_score), 2708 INF("oom_score", S_IRUGO, proc_oom_score),
2709 REG("oom_adj", S_IRUGO|S_IWUSR, proc_oom_adj_operations),
2601 REG("oom_score_adj", S_IRUGO|S_IWUSR, proc_oom_score_adj_operations), 2710 REG("oom_score_adj", S_IRUGO|S_IWUSR, proc_oom_score_adj_operations),
2602#ifdef CONFIG_AUDITSYSCALL 2711#ifdef CONFIG_AUDITSYSCALL
2603 REG("loginuid", S_IWUSR|S_IRUGO, proc_loginuid_operations), 2712 REG("loginuid", S_IWUSR|S_IRUGO, proc_loginuid_operations),
@@ -2964,6 +3073,7 @@ static const struct pid_entry tid_base_stuff[] = {
2964 REG("cgroup", S_IRUGO, proc_cgroup_operations), 3073 REG("cgroup", S_IRUGO, proc_cgroup_operations),
2965#endif 3074#endif
2966 INF("oom_score", S_IRUGO, proc_oom_score), 3075 INF("oom_score", S_IRUGO, proc_oom_score),
3076 REG("oom_adj", S_IRUGO|S_IWUSR, proc_oom_adj_operations),
2967 REG("oom_score_adj", S_IRUGO|S_IWUSR, proc_oom_score_adj_operations), 3077 REG("oom_score_adj", S_IRUGO|S_IWUSR, proc_oom_score_adj_operations),
2968#ifdef CONFIG_AUDITSYSCALL 3078#ifdef CONFIG_AUDITSYSCALL
2969 REG("loginuid", S_IWUSR|S_IRUGO, proc_loginuid_operations), 3079 REG("loginuid", S_IWUSR|S_IRUGO, proc_loginuid_operations),
diff --git a/fs/pstore/platform.c b/fs/pstore/platform.c
index a40da07e93d6..947fbe06c3b1 100644
--- a/fs/pstore/platform.c
+++ b/fs/pstore/platform.c
@@ -161,6 +161,7 @@ static void pstore_console_write(struct console *con, const char *s, unsigned c)
161 161
162 while (s < e) { 162 while (s < e) {
163 unsigned long flags; 163 unsigned long flags;
164 u64 id;
164 165
165 if (c > psinfo->bufsize) 166 if (c > psinfo->bufsize)
166 c = psinfo->bufsize; 167 c = psinfo->bufsize;
@@ -172,7 +173,7 @@ static void pstore_console_write(struct console *con, const char *s, unsigned c)
172 spin_lock_irqsave(&psinfo->buf_lock, flags); 173 spin_lock_irqsave(&psinfo->buf_lock, flags);
173 } 174 }
174 memcpy(psinfo->buf, s, c); 175 memcpy(psinfo->buf, s, c);
175 psinfo->write(PSTORE_TYPE_CONSOLE, 0, NULL, 0, c, psinfo); 176 psinfo->write(PSTORE_TYPE_CONSOLE, 0, &id, 0, c, psinfo);
176 spin_unlock_irqrestore(&psinfo->buf_lock, flags); 177 spin_unlock_irqrestore(&psinfo->buf_lock, flags);
177 s += c; 178 s += c;
178 c = e - s; 179 c = e - s;
diff --git a/fs/reiserfs/inode.c b/fs/reiserfs/inode.c
index f27f01a98aa2..d83736fbc26c 100644
--- a/fs/reiserfs/inode.c
+++ b/fs/reiserfs/inode.c
@@ -1782,8 +1782,9 @@ int reiserfs_new_inode(struct reiserfs_transaction_handle *th,
1782 1782
1783 BUG_ON(!th->t_trans_id); 1783 BUG_ON(!th->t_trans_id);
1784 1784
1785 dquot_initialize(inode); 1785 reiserfs_write_unlock(inode->i_sb);
1786 err = dquot_alloc_inode(inode); 1786 err = dquot_alloc_inode(inode);
1787 reiserfs_write_lock(inode->i_sb);
1787 if (err) 1788 if (err)
1788 goto out_end_trans; 1789 goto out_end_trans;
1789 if (!dir->i_nlink) { 1790 if (!dir->i_nlink) {
@@ -1979,8 +1980,10 @@ int reiserfs_new_inode(struct reiserfs_transaction_handle *th,
1979 1980
1980 out_end_trans: 1981 out_end_trans:
1981 journal_end(th, th->t_super, th->t_blocks_allocated); 1982 journal_end(th, th->t_super, th->t_blocks_allocated);
1983 reiserfs_write_unlock(inode->i_sb);
1982 /* Drop can be outside and it needs more credits so it's better to have it outside */ 1984 /* Drop can be outside and it needs more credits so it's better to have it outside */
1983 dquot_drop(inode); 1985 dquot_drop(inode);
1986 reiserfs_write_lock(inode->i_sb);
1984 inode->i_flags |= S_NOQUOTA; 1987 inode->i_flags |= S_NOQUOTA;
1985 make_bad_inode(inode); 1988 make_bad_inode(inode);
1986 1989
@@ -3103,10 +3106,9 @@ int reiserfs_setattr(struct dentry *dentry, struct iattr *attr)
3103 /* must be turned off for recursive notify_change calls */ 3106 /* must be turned off for recursive notify_change calls */
3104 ia_valid = attr->ia_valid &= ~(ATTR_KILL_SUID|ATTR_KILL_SGID); 3107 ia_valid = attr->ia_valid &= ~(ATTR_KILL_SUID|ATTR_KILL_SGID);
3105 3108
3106 depth = reiserfs_write_lock_once(inode->i_sb);
3107 if (is_quota_modification(inode, attr)) 3109 if (is_quota_modification(inode, attr))
3108 dquot_initialize(inode); 3110 dquot_initialize(inode);
3109 3111 depth = reiserfs_write_lock_once(inode->i_sb);
3110 if (attr->ia_valid & ATTR_SIZE) { 3112 if (attr->ia_valid & ATTR_SIZE) {
3111 /* version 2 items will be caught by the s_maxbytes check 3113 /* version 2 items will be caught by the s_maxbytes check
3112 ** done for us in vmtruncate 3114 ** done for us in vmtruncate
@@ -3170,7 +3172,9 @@ int reiserfs_setattr(struct dentry *dentry, struct iattr *attr)
3170 error = journal_begin(&th, inode->i_sb, jbegin_count); 3172 error = journal_begin(&th, inode->i_sb, jbegin_count);
3171 if (error) 3173 if (error)
3172 goto out; 3174 goto out;
3175 reiserfs_write_unlock_once(inode->i_sb, depth);
3173 error = dquot_transfer(inode, attr); 3176 error = dquot_transfer(inode, attr);
3177 depth = reiserfs_write_lock_once(inode->i_sb);
3174 if (error) { 3178 if (error) {
3175 journal_end(&th, inode->i_sb, jbegin_count); 3179 journal_end(&th, inode->i_sb, jbegin_count);
3176 goto out; 3180 goto out;
diff --git a/fs/reiserfs/stree.c b/fs/reiserfs/stree.c
index f8afa4b162b8..2f40a4c70a4d 100644
--- a/fs/reiserfs/stree.c
+++ b/fs/reiserfs/stree.c
@@ -1968,7 +1968,9 @@ int reiserfs_paste_into_item(struct reiserfs_transaction_handle *th, struct tree
1968 key2type(&(key->on_disk_key))); 1968 key2type(&(key->on_disk_key)));
1969#endif 1969#endif
1970 1970
1971 reiserfs_write_unlock(inode->i_sb);
1971 retval = dquot_alloc_space_nodirty(inode, pasted_size); 1972 retval = dquot_alloc_space_nodirty(inode, pasted_size);
1973 reiserfs_write_lock(inode->i_sb);
1972 if (retval) { 1974 if (retval) {
1973 pathrelse(search_path); 1975 pathrelse(search_path);
1974 return retval; 1976 return retval;
@@ -2061,9 +2063,11 @@ int reiserfs_insert_item(struct reiserfs_transaction_handle *th,
2061 "reiserquota insert_item(): allocating %u id=%u type=%c", 2063 "reiserquota insert_item(): allocating %u id=%u type=%c",
2062 quota_bytes, inode->i_uid, head2type(ih)); 2064 quota_bytes, inode->i_uid, head2type(ih));
2063#endif 2065#endif
2066 reiserfs_write_unlock(inode->i_sb);
2064 /* We can't dirty inode here. It would be immediately written but 2067 /* We can't dirty inode here. It would be immediately written but
2065 * appropriate stat item isn't inserted yet... */ 2068 * appropriate stat item isn't inserted yet... */
2066 retval = dquot_alloc_space_nodirty(inode, quota_bytes); 2069 retval = dquot_alloc_space_nodirty(inode, quota_bytes);
2070 reiserfs_write_lock(inode->i_sb);
2067 if (retval) { 2071 if (retval) {
2068 pathrelse(path); 2072 pathrelse(path);
2069 return retval; 2073 return retval;
diff --git a/fs/reiserfs/super.c b/fs/reiserfs/super.c
index 1078ae179993..418bdc3a57da 100644
--- a/fs/reiserfs/super.c
+++ b/fs/reiserfs/super.c
@@ -298,7 +298,9 @@ static int finish_unfinished(struct super_block *s)
298 retval = remove_save_link_only(s, &save_link_key, 0); 298 retval = remove_save_link_only(s, &save_link_key, 0);
299 continue; 299 continue;
300 } 300 }
301 reiserfs_write_unlock(s);
301 dquot_initialize(inode); 302 dquot_initialize(inode);
303 reiserfs_write_lock(s);
302 304
303 if (truncate && S_ISDIR(inode->i_mode)) { 305 if (truncate && S_ISDIR(inode->i_mode)) {
304 /* We got a truncate request for a dir which is impossible. 306 /* We got a truncate request for a dir which is impossible.
@@ -1335,7 +1337,7 @@ static int reiserfs_remount(struct super_block *s, int *mount_flags, char *arg)
1335 kfree(qf_names[i]); 1337 kfree(qf_names[i]);
1336#endif 1338#endif
1337 err = -EINVAL; 1339 err = -EINVAL;
1338 goto out_err; 1340 goto out_unlock;
1339 } 1341 }
1340#ifdef CONFIG_QUOTA 1342#ifdef CONFIG_QUOTA
1341 handle_quota_files(s, qf_names, &qfmt); 1343 handle_quota_files(s, qf_names, &qfmt);
@@ -1379,7 +1381,7 @@ static int reiserfs_remount(struct super_block *s, int *mount_flags, char *arg)
1379 if (blocks) { 1381 if (blocks) {
1380 err = reiserfs_resize(s, blocks); 1382 err = reiserfs_resize(s, blocks);
1381 if (err != 0) 1383 if (err != 0)
1382 goto out_err; 1384 goto out_unlock;
1383 } 1385 }
1384 1386
1385 if (*mount_flags & MS_RDONLY) { 1387 if (*mount_flags & MS_RDONLY) {
@@ -1389,9 +1391,15 @@ static int reiserfs_remount(struct super_block *s, int *mount_flags, char *arg)
1389 /* it is read-only already */ 1391 /* it is read-only already */
1390 goto out_ok; 1392 goto out_ok;
1391 1393
1394 /*
1395 * Drop write lock. Quota will retake it when needed and lock
1396 * ordering requires calling dquot_suspend() without it.
1397 */
1398 reiserfs_write_unlock(s);
1392 err = dquot_suspend(s, -1); 1399 err = dquot_suspend(s, -1);
1393 if (err < 0) 1400 if (err < 0)
1394 goto out_err; 1401 goto out_err;
1402 reiserfs_write_lock(s);
1395 1403
1396 /* try to remount file system with read-only permissions */ 1404 /* try to remount file system with read-only permissions */
1397 if (sb_umount_state(rs) == REISERFS_VALID_FS 1405 if (sb_umount_state(rs) == REISERFS_VALID_FS
@@ -1401,7 +1409,7 @@ static int reiserfs_remount(struct super_block *s, int *mount_flags, char *arg)
1401 1409
1402 err = journal_begin(&th, s, 10); 1410 err = journal_begin(&th, s, 10);
1403 if (err) 1411 if (err)
1404 goto out_err; 1412 goto out_unlock;
1405 1413
1406 /* Mounting a rw partition read-only. */ 1414 /* Mounting a rw partition read-only. */
1407 reiserfs_prepare_for_journal(s, SB_BUFFER_WITH_SB(s), 1); 1415 reiserfs_prepare_for_journal(s, SB_BUFFER_WITH_SB(s), 1);
@@ -1416,7 +1424,7 @@ static int reiserfs_remount(struct super_block *s, int *mount_flags, char *arg)
1416 1424
1417 if (reiserfs_is_journal_aborted(journal)) { 1425 if (reiserfs_is_journal_aborted(journal)) {
1418 err = journal->j_errno; 1426 err = journal->j_errno;
1419 goto out_err; 1427 goto out_unlock;
1420 } 1428 }
1421 1429
1422 handle_data_mode(s, mount_options); 1430 handle_data_mode(s, mount_options);
@@ -1425,7 +1433,7 @@ static int reiserfs_remount(struct super_block *s, int *mount_flags, char *arg)
1425 s->s_flags &= ~MS_RDONLY; /* now it is safe to call journal_begin */ 1433 s->s_flags &= ~MS_RDONLY; /* now it is safe to call journal_begin */
1426 err = journal_begin(&th, s, 10); 1434 err = journal_begin(&th, s, 10);
1427 if (err) 1435 if (err)
1428 goto out_err; 1436 goto out_unlock;
1429 1437
1430 /* Mount a partition which is read-only, read-write */ 1438 /* Mount a partition which is read-only, read-write */
1431 reiserfs_prepare_for_journal(s, SB_BUFFER_WITH_SB(s), 1); 1439 reiserfs_prepare_for_journal(s, SB_BUFFER_WITH_SB(s), 1);
@@ -1442,10 +1450,16 @@ static int reiserfs_remount(struct super_block *s, int *mount_flags, char *arg)
1442 SB_JOURNAL(s)->j_must_wait = 1; 1450 SB_JOURNAL(s)->j_must_wait = 1;
1443 err = journal_end(&th, s, 10); 1451 err = journal_end(&th, s, 10);
1444 if (err) 1452 if (err)
1445 goto out_err; 1453 goto out_unlock;
1446 1454
1447 if (!(*mount_flags & MS_RDONLY)) { 1455 if (!(*mount_flags & MS_RDONLY)) {
1456 /*
1457 * Drop write lock. Quota will retake it when needed and lock
1458 * ordering requires calling dquot_resume() without it.
1459 */
1460 reiserfs_write_unlock(s);
1448 dquot_resume(s, -1); 1461 dquot_resume(s, -1);
1462 reiserfs_write_lock(s);
1449 finish_unfinished(s); 1463 finish_unfinished(s);
1450 reiserfs_xattr_init(s, *mount_flags); 1464 reiserfs_xattr_init(s, *mount_flags);
1451 } 1465 }
@@ -1455,9 +1469,10 @@ out_ok:
1455 reiserfs_write_unlock(s); 1469 reiserfs_write_unlock(s);
1456 return 0; 1470 return 0;
1457 1471
1472out_unlock:
1473 reiserfs_write_unlock(s);
1458out_err: 1474out_err:
1459 kfree(new_opts); 1475 kfree(new_opts);
1460 reiserfs_write_unlock(s);
1461 return err; 1476 return err;
1462} 1477}
1463 1478
@@ -2095,13 +2110,15 @@ static int reiserfs_write_dquot(struct dquot *dquot)
2095 REISERFS_QUOTA_TRANS_BLOCKS(dquot->dq_sb)); 2110 REISERFS_QUOTA_TRANS_BLOCKS(dquot->dq_sb));
2096 if (ret) 2111 if (ret)
2097 goto out; 2112 goto out;
2113 reiserfs_write_unlock(dquot->dq_sb);
2098 ret = dquot_commit(dquot); 2114 ret = dquot_commit(dquot);
2115 reiserfs_write_lock(dquot->dq_sb);
2099 err = 2116 err =
2100 journal_end(&th, dquot->dq_sb, 2117 journal_end(&th, dquot->dq_sb,
2101 REISERFS_QUOTA_TRANS_BLOCKS(dquot->dq_sb)); 2118 REISERFS_QUOTA_TRANS_BLOCKS(dquot->dq_sb));
2102 if (!ret && err) 2119 if (!ret && err)
2103 ret = err; 2120 ret = err;
2104 out: 2121out:
2105 reiserfs_write_unlock(dquot->dq_sb); 2122 reiserfs_write_unlock(dquot->dq_sb);
2106 return ret; 2123 return ret;
2107} 2124}
@@ -2117,13 +2134,15 @@ static int reiserfs_acquire_dquot(struct dquot *dquot)
2117 REISERFS_QUOTA_INIT_BLOCKS(dquot->dq_sb)); 2134 REISERFS_QUOTA_INIT_BLOCKS(dquot->dq_sb));
2118 if (ret) 2135 if (ret)
2119 goto out; 2136 goto out;
2137 reiserfs_write_unlock(dquot->dq_sb);
2120 ret = dquot_acquire(dquot); 2138 ret = dquot_acquire(dquot);
2139 reiserfs_write_lock(dquot->dq_sb);
2121 err = 2140 err =
2122 journal_end(&th, dquot->dq_sb, 2141 journal_end(&th, dquot->dq_sb,
2123 REISERFS_QUOTA_INIT_BLOCKS(dquot->dq_sb)); 2142 REISERFS_QUOTA_INIT_BLOCKS(dquot->dq_sb));
2124 if (!ret && err) 2143 if (!ret && err)
2125 ret = err; 2144 ret = err;
2126 out: 2145out:
2127 reiserfs_write_unlock(dquot->dq_sb); 2146 reiserfs_write_unlock(dquot->dq_sb);
2128 return ret; 2147 return ret;
2129} 2148}
@@ -2137,19 +2156,21 @@ static int reiserfs_release_dquot(struct dquot *dquot)
2137 ret = 2156 ret =
2138 journal_begin(&th, dquot->dq_sb, 2157 journal_begin(&th, dquot->dq_sb,
2139 REISERFS_QUOTA_DEL_BLOCKS(dquot->dq_sb)); 2158 REISERFS_QUOTA_DEL_BLOCKS(dquot->dq_sb));
2159 reiserfs_write_unlock(dquot->dq_sb);
2140 if (ret) { 2160 if (ret) {
2141 /* Release dquot anyway to avoid endless cycle in dqput() */ 2161 /* Release dquot anyway to avoid endless cycle in dqput() */
2142 dquot_release(dquot); 2162 dquot_release(dquot);
2143 goto out; 2163 goto out;
2144 } 2164 }
2145 ret = dquot_release(dquot); 2165 ret = dquot_release(dquot);
2166 reiserfs_write_lock(dquot->dq_sb);
2146 err = 2167 err =
2147 journal_end(&th, dquot->dq_sb, 2168 journal_end(&th, dquot->dq_sb,
2148 REISERFS_QUOTA_DEL_BLOCKS(dquot->dq_sb)); 2169 REISERFS_QUOTA_DEL_BLOCKS(dquot->dq_sb));
2149 if (!ret && err) 2170 if (!ret && err)
2150 ret = err; 2171 ret = err;
2151 out:
2152 reiserfs_write_unlock(dquot->dq_sb); 2172 reiserfs_write_unlock(dquot->dq_sb);
2173out:
2153 return ret; 2174 return ret;
2154} 2175}
2155 2176
@@ -2174,11 +2195,13 @@ static int reiserfs_write_info(struct super_block *sb, int type)
2174 ret = journal_begin(&th, sb, 2); 2195 ret = journal_begin(&th, sb, 2);
2175 if (ret) 2196 if (ret)
2176 goto out; 2197 goto out;
2198 reiserfs_write_unlock(sb);
2177 ret = dquot_commit_info(sb, type); 2199 ret = dquot_commit_info(sb, type);
2200 reiserfs_write_lock(sb);
2178 err = journal_end(&th, sb, 2); 2201 err = journal_end(&th, sb, 2);
2179 if (!ret && err) 2202 if (!ret && err)
2180 ret = err; 2203 ret = err;
2181 out: 2204out:
2182 reiserfs_write_unlock(sb); 2205 reiserfs_write_unlock(sb);
2183 return ret; 2206 return ret;
2184} 2207}
@@ -2203,8 +2226,11 @@ static int reiserfs_quota_on(struct super_block *sb, int type, int format_id,
2203 struct reiserfs_transaction_handle th; 2226 struct reiserfs_transaction_handle th;
2204 int opt = type == USRQUOTA ? REISERFS_USRQUOTA : REISERFS_GRPQUOTA; 2227 int opt = type == USRQUOTA ? REISERFS_USRQUOTA : REISERFS_GRPQUOTA;
2205 2228
2206 if (!(REISERFS_SB(sb)->s_mount_opt & (1 << opt))) 2229 reiserfs_write_lock(sb);
2207 return -EINVAL; 2230 if (!(REISERFS_SB(sb)->s_mount_opt & (1 << opt))) {
2231 err = -EINVAL;
2232 goto out;
2233 }
2208 2234
2209 /* Quotafile not on the same filesystem? */ 2235 /* Quotafile not on the same filesystem? */
2210 if (path->dentry->d_sb != sb) { 2236 if (path->dentry->d_sb != sb) {
@@ -2246,8 +2272,10 @@ static int reiserfs_quota_on(struct super_block *sb, int type, int format_id,
2246 if (err) 2272 if (err)
2247 goto out; 2273 goto out;
2248 } 2274 }
2249 err = dquot_quota_on(sb, type, format_id, path); 2275 reiserfs_write_unlock(sb);
2276 return dquot_quota_on(sb, type, format_id, path);
2250out: 2277out:
2278 reiserfs_write_unlock(sb);
2251 return err; 2279 return err;
2252} 2280}
2253 2281
@@ -2320,7 +2348,9 @@ static ssize_t reiserfs_quota_write(struct super_block *sb, int type,
2320 tocopy = sb->s_blocksize - offset < towrite ? 2348 tocopy = sb->s_blocksize - offset < towrite ?
2321 sb->s_blocksize - offset : towrite; 2349 sb->s_blocksize - offset : towrite;
2322 tmp_bh.b_state = 0; 2350 tmp_bh.b_state = 0;
2351 reiserfs_write_lock(sb);
2323 err = reiserfs_get_block(inode, blk, &tmp_bh, GET_BLOCK_CREATE); 2352 err = reiserfs_get_block(inode, blk, &tmp_bh, GET_BLOCK_CREATE);
2353 reiserfs_write_unlock(sb);
2324 if (err) 2354 if (err)
2325 goto out; 2355 goto out;
2326 if (offset || tocopy != sb->s_blocksize) 2356 if (offset || tocopy != sb->s_blocksize)
@@ -2336,10 +2366,12 @@ static ssize_t reiserfs_quota_write(struct super_block *sb, int type,
2336 flush_dcache_page(bh->b_page); 2366 flush_dcache_page(bh->b_page);
2337 set_buffer_uptodate(bh); 2367 set_buffer_uptodate(bh);
2338 unlock_buffer(bh); 2368 unlock_buffer(bh);
2369 reiserfs_write_lock(sb);
2339 reiserfs_prepare_for_journal(sb, bh, 1); 2370 reiserfs_prepare_for_journal(sb, bh, 1);
2340 journal_mark_dirty(current->journal_info, sb, bh); 2371 journal_mark_dirty(current->journal_info, sb, bh);
2341 if (!journal_quota) 2372 if (!journal_quota)
2342 reiserfs_add_ordered_list(inode, bh); 2373 reiserfs_add_ordered_list(inode, bh);
2374 reiserfs_write_unlock(sb);
2343 brelse(bh); 2375 brelse(bh);
2344 offset = 0; 2376 offset = 0;
2345 towrite -= tocopy; 2377 towrite -= tocopy;
diff --git a/fs/ubifs/find.c b/fs/ubifs/find.c
index 28ec13af28d9..2dcf3d473fec 100644
--- a/fs/ubifs/find.c
+++ b/fs/ubifs/find.c
@@ -681,8 +681,16 @@ int ubifs_find_free_leb_for_idx(struct ubifs_info *c)
681 if (!lprops) { 681 if (!lprops) {
682 lprops = ubifs_fast_find_freeable(c); 682 lprops = ubifs_fast_find_freeable(c);
683 if (!lprops) { 683 if (!lprops) {
684 ubifs_assert(c->freeable_cnt == 0); 684 /*
685 if (c->lst.empty_lebs - c->lst.taken_empty_lebs > 0) { 685 * The first condition means the following: go scan the
686 * LPT if there are uncategorized lprops, which means
687 * there may be freeable LEBs there (UBIFS does not
688 * store the information about freeable LEBs in the
689 * master node).
690 */
691 if (c->in_a_category_cnt != c->main_lebs ||
692 c->lst.empty_lebs - c->lst.taken_empty_lebs > 0) {
693 ubifs_assert(c->freeable_cnt == 0);
686 lprops = scan_for_leb_for_idx(c); 694 lprops = scan_for_leb_for_idx(c);
687 if (IS_ERR(lprops)) { 695 if (IS_ERR(lprops)) {
688 err = PTR_ERR(lprops); 696 err = PTR_ERR(lprops);
diff --git a/fs/ubifs/lprops.c b/fs/ubifs/lprops.c
index e5a2a35a46dc..46190a7c42a6 100644
--- a/fs/ubifs/lprops.c
+++ b/fs/ubifs/lprops.c
@@ -300,8 +300,11 @@ void ubifs_add_to_cat(struct ubifs_info *c, struct ubifs_lprops *lprops,
300 default: 300 default:
301 ubifs_assert(0); 301 ubifs_assert(0);
302 } 302 }
303
303 lprops->flags &= ~LPROPS_CAT_MASK; 304 lprops->flags &= ~LPROPS_CAT_MASK;
304 lprops->flags |= cat; 305 lprops->flags |= cat;
306 c->in_a_category_cnt += 1;
307 ubifs_assert(c->in_a_category_cnt <= c->main_lebs);
305} 308}
306 309
307/** 310/**
@@ -334,6 +337,9 @@ static void ubifs_remove_from_cat(struct ubifs_info *c,
334 default: 337 default:
335 ubifs_assert(0); 338 ubifs_assert(0);
336 } 339 }
340
341 c->in_a_category_cnt -= 1;
342 ubifs_assert(c->in_a_category_cnt >= 0);
337} 343}
338 344
339/** 345/**
diff --git a/fs/ubifs/ubifs.h b/fs/ubifs/ubifs.h
index 5486346d0a3f..d133c276fe05 100644
--- a/fs/ubifs/ubifs.h
+++ b/fs/ubifs/ubifs.h
@@ -1183,6 +1183,8 @@ struct ubifs_debug_info;
1183 * @freeable_list: list of freeable non-index LEBs (free + dirty == @leb_size) 1183 * @freeable_list: list of freeable non-index LEBs (free + dirty == @leb_size)
1184 * @frdi_idx_list: list of freeable index LEBs (free + dirty == @leb_size) 1184 * @frdi_idx_list: list of freeable index LEBs (free + dirty == @leb_size)
1185 * @freeable_cnt: number of freeable LEBs in @freeable_list 1185 * @freeable_cnt: number of freeable LEBs in @freeable_list
1186 * @in_a_category_cnt: count of lprops which are in a certain category, which
1187 * basically meants that they were loaded from the flash
1186 * 1188 *
1187 * @ltab_lnum: LEB number of LPT's own lprops table 1189 * @ltab_lnum: LEB number of LPT's own lprops table
1188 * @ltab_offs: offset of LPT's own lprops table 1190 * @ltab_offs: offset of LPT's own lprops table
@@ -1412,6 +1414,7 @@ struct ubifs_info {
1412 struct list_head freeable_list; 1414 struct list_head freeable_list;
1413 struct list_head frdi_idx_list; 1415 struct list_head frdi_idx_list;
1414 int freeable_cnt; 1416 int freeable_cnt;
1417 int in_a_category_cnt;
1415 1418
1416 int ltab_lnum; 1419 int ltab_lnum;
1417 int ltab_offs; 1420 int ltab_offs;
diff --git a/fs/xfs/xfs_alloc.c b/fs/xfs/xfs_alloc.c
index 4f33c32affe3..335206a9c698 100644
--- a/fs/xfs/xfs_alloc.c
+++ b/fs/xfs/xfs_alloc.c
@@ -1866,6 +1866,7 @@ xfs_alloc_fix_freelist(
1866 /* 1866 /*
1867 * Initialize the args structure. 1867 * Initialize the args structure.
1868 */ 1868 */
1869 memset(&targs, 0, sizeof(targs));
1869 targs.tp = tp; 1870 targs.tp = tp;
1870 targs.mp = mp; 1871 targs.mp = mp;
1871 targs.agbp = agbp; 1872 targs.agbp = agbp;
@@ -2207,7 +2208,7 @@ xfs_alloc_read_agf(
2207 * group or loop over the allocation groups to find the result. 2208 * group or loop over the allocation groups to find the result.
2208 */ 2209 */
2209int /* error */ 2210int /* error */
2210__xfs_alloc_vextent( 2211xfs_alloc_vextent(
2211 xfs_alloc_arg_t *args) /* allocation argument structure */ 2212 xfs_alloc_arg_t *args) /* allocation argument structure */
2212{ 2213{
2213 xfs_agblock_t agsize; /* allocation group size */ 2214 xfs_agblock_t agsize; /* allocation group size */
@@ -2417,46 +2418,6 @@ error0:
2417 return error; 2418 return error;
2418} 2419}
2419 2420
2420static void
2421xfs_alloc_vextent_worker(
2422 struct work_struct *work)
2423{
2424 struct xfs_alloc_arg *args = container_of(work,
2425 struct xfs_alloc_arg, work);
2426 unsigned long pflags;
2427
2428 /* we are in a transaction context here */
2429 current_set_flags_nested(&pflags, PF_FSTRANS);
2430
2431 args->result = __xfs_alloc_vextent(args);
2432 complete(args->done);
2433
2434 current_restore_flags_nested(&pflags, PF_FSTRANS);
2435}
2436
2437/*
2438 * Data allocation requests often come in with little stack to work on. Push
2439 * them off to a worker thread so there is lots of stack to use. Metadata
2440 * requests, OTOH, are generally from low stack usage paths, so avoid the
2441 * context switch overhead here.
2442 */
2443int
2444xfs_alloc_vextent(
2445 struct xfs_alloc_arg *args)
2446{
2447 DECLARE_COMPLETION_ONSTACK(done);
2448
2449 if (!args->userdata)
2450 return __xfs_alloc_vextent(args);
2451
2452
2453 args->done = &done;
2454 INIT_WORK_ONSTACK(&args->work, xfs_alloc_vextent_worker);
2455 queue_work(xfs_alloc_wq, &args->work);
2456 wait_for_completion(&done);
2457 return args->result;
2458}
2459
2460/* 2421/*
2461 * Free an extent. 2422 * Free an extent.
2462 * Just break up the extent address and hand off to xfs_free_ag_extent 2423 * Just break up the extent address and hand off to xfs_free_ag_extent
diff --git a/fs/xfs/xfs_alloc.h b/fs/xfs/xfs_alloc.h
index 93be4a667ca1..feacb061bab7 100644
--- a/fs/xfs/xfs_alloc.h
+++ b/fs/xfs/xfs_alloc.h
@@ -120,9 +120,6 @@ typedef struct xfs_alloc_arg {
120 char isfl; /* set if is freelist blocks - !acctg */ 120 char isfl; /* set if is freelist blocks - !acctg */
121 char userdata; /* set if this is user data */ 121 char userdata; /* set if this is user data */
122 xfs_fsblock_t firstblock; /* io first block allocated */ 122 xfs_fsblock_t firstblock; /* io first block allocated */
123 struct completion *done;
124 struct work_struct work;
125 int result;
126} xfs_alloc_arg_t; 123} xfs_alloc_arg_t;
127 124
128/* 125/*
diff --git a/fs/xfs/xfs_alloc_btree.c b/fs/xfs/xfs_alloc_btree.c
index f1647caace8f..f7876c6d6165 100644
--- a/fs/xfs/xfs_alloc_btree.c
+++ b/fs/xfs/xfs_alloc_btree.c
@@ -121,6 +121,8 @@ xfs_allocbt_free_block(
121 xfs_extent_busy_insert(cur->bc_tp, be32_to_cpu(agf->agf_seqno), bno, 1, 121 xfs_extent_busy_insert(cur->bc_tp, be32_to_cpu(agf->agf_seqno), bno, 1,
122 XFS_EXTENT_BUSY_SKIP_DISCARD); 122 XFS_EXTENT_BUSY_SKIP_DISCARD);
123 xfs_trans_agbtree_delta(cur->bc_tp, -1); 123 xfs_trans_agbtree_delta(cur->bc_tp, -1);
124
125 xfs_trans_binval(cur->bc_tp, bp);
124 return 0; 126 return 0;
125} 127}
126 128
diff --git a/fs/xfs/xfs_aops.c b/fs/xfs/xfs_aops.c
index e562dd43f41f..e57e2daa357c 100644
--- a/fs/xfs/xfs_aops.c
+++ b/fs/xfs/xfs_aops.c
@@ -481,11 +481,17 @@ static inline int bio_add_buffer(struct bio *bio, struct buffer_head *bh)
481 * 481 *
482 * The fix is two passes across the ioend list - one to start writeback on the 482 * The fix is two passes across the ioend list - one to start writeback on the
483 * buffer_heads, and then submit them for I/O on the second pass. 483 * buffer_heads, and then submit them for I/O on the second pass.
484 *
485 * If @fail is non-zero, it means that we have a situation where some part of
486 * the submission process has failed after we have marked paged for writeback
487 * and unlocked them. In this situation, we need to fail the ioend chain rather
488 * than submit it to IO. This typically only happens on a filesystem shutdown.
484 */ 489 */
485STATIC void 490STATIC void
486xfs_submit_ioend( 491xfs_submit_ioend(
487 struct writeback_control *wbc, 492 struct writeback_control *wbc,
488 xfs_ioend_t *ioend) 493 xfs_ioend_t *ioend,
494 int fail)
489{ 495{
490 xfs_ioend_t *head = ioend; 496 xfs_ioend_t *head = ioend;
491 xfs_ioend_t *next; 497 xfs_ioend_t *next;
@@ -506,6 +512,18 @@ xfs_submit_ioend(
506 next = ioend->io_list; 512 next = ioend->io_list;
507 bio = NULL; 513 bio = NULL;
508 514
515 /*
516 * If we are failing the IO now, just mark the ioend with an
517 * error and finish it. This will run IO completion immediately
518 * as there is only one reference to the ioend at this point in
519 * time.
520 */
521 if (fail) {
522 ioend->io_error = -fail;
523 xfs_finish_ioend(ioend);
524 continue;
525 }
526
509 for (bh = ioend->io_buffer_head; bh; bh = bh->b_private) { 527 for (bh = ioend->io_buffer_head; bh; bh = bh->b_private) {
510 528
511 if (!bio) { 529 if (!bio) {
@@ -1060,7 +1078,18 @@ xfs_vm_writepage(
1060 1078
1061 xfs_start_page_writeback(page, 1, count); 1079 xfs_start_page_writeback(page, 1, count);
1062 1080
1063 if (ioend && imap_valid) { 1081 /* if there is no IO to be submitted for this page, we are done */
1082 if (!ioend)
1083 return 0;
1084
1085 ASSERT(iohead);
1086
1087 /*
1088 * Any errors from this point onwards need tobe reported through the IO
1089 * completion path as we have marked the initial page as under writeback
1090 * and unlocked it.
1091 */
1092 if (imap_valid) {
1064 xfs_off_t end_index; 1093 xfs_off_t end_index;
1065 1094
1066 end_index = imap.br_startoff + imap.br_blockcount; 1095 end_index = imap.br_startoff + imap.br_blockcount;
@@ -1079,20 +1108,15 @@ xfs_vm_writepage(
1079 wbc, end_index); 1108 wbc, end_index);
1080 } 1109 }
1081 1110
1082 if (iohead) {
1083 /*
1084 * Reserve log space if we might write beyond the on-disk
1085 * inode size.
1086 */
1087 if (ioend->io_type != XFS_IO_UNWRITTEN &&
1088 xfs_ioend_is_append(ioend)) {
1089 err = xfs_setfilesize_trans_alloc(ioend);
1090 if (err)
1091 goto error;
1092 }
1093 1111
1094 xfs_submit_ioend(wbc, iohead); 1112 /*
1095 } 1113 * Reserve log space if we might write beyond the on-disk inode size.
1114 */
1115 err = 0;
1116 if (ioend->io_type != XFS_IO_UNWRITTEN && xfs_ioend_is_append(ioend))
1117 err = xfs_setfilesize_trans_alloc(ioend);
1118
1119 xfs_submit_ioend(wbc, iohead, err);
1096 1120
1097 return 0; 1121 return 0;
1098 1122
diff --git a/fs/xfs/xfs_attr_leaf.c b/fs/xfs/xfs_attr_leaf.c
index d330111ca738..70eec1829776 100644
--- a/fs/xfs/xfs_attr_leaf.c
+++ b/fs/xfs/xfs_attr_leaf.c
@@ -1291,6 +1291,7 @@ xfs_attr_leaf_rebalance(xfs_da_state_t *state, xfs_da_state_blk_t *blk1,
1291 leaf2 = blk2->bp->b_addr; 1291 leaf2 = blk2->bp->b_addr;
1292 ASSERT(leaf1->hdr.info.magic == cpu_to_be16(XFS_ATTR_LEAF_MAGIC)); 1292 ASSERT(leaf1->hdr.info.magic == cpu_to_be16(XFS_ATTR_LEAF_MAGIC));
1293 ASSERT(leaf2->hdr.info.magic == cpu_to_be16(XFS_ATTR_LEAF_MAGIC)); 1293 ASSERT(leaf2->hdr.info.magic == cpu_to_be16(XFS_ATTR_LEAF_MAGIC));
1294 ASSERT(leaf2->hdr.count == 0);
1294 args = state->args; 1295 args = state->args;
1295 1296
1296 trace_xfs_attr_leaf_rebalance(args); 1297 trace_xfs_attr_leaf_rebalance(args);
@@ -1361,6 +1362,7 @@ xfs_attr_leaf_rebalance(xfs_da_state_t *state, xfs_da_state_blk_t *blk1,
1361 * I assert that since all callers pass in an empty 1362 * I assert that since all callers pass in an empty
1362 * second buffer, this code should never execute. 1363 * second buffer, this code should never execute.
1363 */ 1364 */
1365 ASSERT(0);
1364 1366
1365 /* 1367 /*
1366 * Figure the total bytes to be added to the destination leaf. 1368 * Figure the total bytes to be added to the destination leaf.
@@ -1422,10 +1424,24 @@ xfs_attr_leaf_rebalance(xfs_da_state_t *state, xfs_da_state_blk_t *blk1,
1422 args->index2 = 0; 1424 args->index2 = 0;
1423 args->blkno2 = blk2->blkno; 1425 args->blkno2 = blk2->blkno;
1424 } else { 1426 } else {
1427 /*
1428 * On a double leaf split, the original attr location
1429 * is already stored in blkno2/index2, so don't
1430 * overwrite it overwise we corrupt the tree.
1431 */
1425 blk2->index = blk1->index 1432 blk2->index = blk1->index
1426 - be16_to_cpu(leaf1->hdr.count); 1433 - be16_to_cpu(leaf1->hdr.count);
1427 args->index = args->index2 = blk2->index; 1434 args->index = blk2->index;
1428 args->blkno = args->blkno2 = blk2->blkno; 1435 args->blkno = blk2->blkno;
1436 if (!state->extravalid) {
1437 /*
1438 * set the new attr location to match the old
1439 * one and let the higher level split code
1440 * decide where in the leaf to place it.
1441 */
1442 args->index2 = blk2->index;
1443 args->blkno2 = blk2->blkno;
1444 }
1429 } 1445 }
1430 } else { 1446 } else {
1431 ASSERT(state->inleaf == 1); 1447 ASSERT(state->inleaf == 1);
diff --git a/fs/xfs/xfs_bmap.c b/fs/xfs/xfs_bmap.c
index 848ffa77707b..83d0cf3df930 100644
--- a/fs/xfs/xfs_bmap.c
+++ b/fs/xfs/xfs_bmap.c
@@ -2437,6 +2437,7 @@ xfs_bmap_btalloc(
2437 * Normal allocation, done through xfs_alloc_vextent. 2437 * Normal allocation, done through xfs_alloc_vextent.
2438 */ 2438 */
2439 tryagain = isaligned = 0; 2439 tryagain = isaligned = 0;
2440 memset(&args, 0, sizeof(args));
2440 args.tp = ap->tp; 2441 args.tp = ap->tp;
2441 args.mp = mp; 2442 args.mp = mp;
2442 args.fsbno = ap->blkno; 2443 args.fsbno = ap->blkno;
@@ -3082,6 +3083,7 @@ xfs_bmap_extents_to_btree(
3082 * Convert to a btree with two levels, one record in root. 3083 * Convert to a btree with two levels, one record in root.
3083 */ 3084 */
3084 XFS_IFORK_FMT_SET(ip, whichfork, XFS_DINODE_FMT_BTREE); 3085 XFS_IFORK_FMT_SET(ip, whichfork, XFS_DINODE_FMT_BTREE);
3086 memset(&args, 0, sizeof(args));
3085 args.tp = tp; 3087 args.tp = tp;
3086 args.mp = mp; 3088 args.mp = mp;
3087 args.firstblock = *firstblock; 3089 args.firstblock = *firstblock;
@@ -3237,6 +3239,7 @@ xfs_bmap_local_to_extents(
3237 xfs_buf_t *bp; /* buffer for extent block */ 3239 xfs_buf_t *bp; /* buffer for extent block */
3238 xfs_bmbt_rec_host_t *ep;/* extent record pointer */ 3240 xfs_bmbt_rec_host_t *ep;/* extent record pointer */
3239 3241
3242 memset(&args, 0, sizeof(args));
3240 args.tp = tp; 3243 args.tp = tp;
3241 args.mp = ip->i_mount; 3244 args.mp = ip->i_mount;
3242 args.firstblock = *firstblock; 3245 args.firstblock = *firstblock;
@@ -4616,12 +4619,11 @@ xfs_bmapi_delay(
4616 4619
4617 4620
4618STATIC int 4621STATIC int
4619xfs_bmapi_allocate( 4622__xfs_bmapi_allocate(
4620 struct xfs_bmalloca *bma, 4623 struct xfs_bmalloca *bma)
4621 int flags)
4622{ 4624{
4623 struct xfs_mount *mp = bma->ip->i_mount; 4625 struct xfs_mount *mp = bma->ip->i_mount;
4624 int whichfork = (flags & XFS_BMAPI_ATTRFORK) ? 4626 int whichfork = (bma->flags & XFS_BMAPI_ATTRFORK) ?
4625 XFS_ATTR_FORK : XFS_DATA_FORK; 4627 XFS_ATTR_FORK : XFS_DATA_FORK;
4626 struct xfs_ifork *ifp = XFS_IFORK_PTR(bma->ip, whichfork); 4628 struct xfs_ifork *ifp = XFS_IFORK_PTR(bma->ip, whichfork);
4627 int tmp_logflags = 0; 4629 int tmp_logflags = 0;
@@ -4654,24 +4656,27 @@ xfs_bmapi_allocate(
4654 * Indicate if this is the first user data in the file, or just any 4656 * Indicate if this is the first user data in the file, or just any
4655 * user data. 4657 * user data.
4656 */ 4658 */
4657 if (!(flags & XFS_BMAPI_METADATA)) { 4659 if (!(bma->flags & XFS_BMAPI_METADATA)) {
4658 bma->userdata = (bma->offset == 0) ? 4660 bma->userdata = (bma->offset == 0) ?
4659 XFS_ALLOC_INITIAL_USER_DATA : XFS_ALLOC_USERDATA; 4661 XFS_ALLOC_INITIAL_USER_DATA : XFS_ALLOC_USERDATA;
4660 } 4662 }
4661 4663
4662 bma->minlen = (flags & XFS_BMAPI_CONTIG) ? bma->length : 1; 4664 bma->minlen = (bma->flags & XFS_BMAPI_CONTIG) ? bma->length : 1;
4663 4665
4664 /* 4666 /*
4665 * Only want to do the alignment at the eof if it is userdata and 4667 * Only want to do the alignment at the eof if it is userdata and
4666 * allocation length is larger than a stripe unit. 4668 * allocation length is larger than a stripe unit.
4667 */ 4669 */
4668 if (mp->m_dalign && bma->length >= mp->m_dalign && 4670 if (mp->m_dalign && bma->length >= mp->m_dalign &&
4669 !(flags & XFS_BMAPI_METADATA) && whichfork == XFS_DATA_FORK) { 4671 !(bma->flags & XFS_BMAPI_METADATA) && whichfork == XFS_DATA_FORK) {
4670 error = xfs_bmap_isaeof(bma, whichfork); 4672 error = xfs_bmap_isaeof(bma, whichfork);
4671 if (error) 4673 if (error)
4672 return error; 4674 return error;
4673 } 4675 }
4674 4676
4677 if (bma->flags & XFS_BMAPI_STACK_SWITCH)
4678 bma->stack_switch = 1;
4679
4675 error = xfs_bmap_alloc(bma); 4680 error = xfs_bmap_alloc(bma);
4676 if (error) 4681 if (error)
4677 return error; 4682 return error;
@@ -4706,7 +4711,7 @@ xfs_bmapi_allocate(
4706 * A wasdelay extent has been initialized, so shouldn't be flagged 4711 * A wasdelay extent has been initialized, so shouldn't be flagged
4707 * as unwritten. 4712 * as unwritten.
4708 */ 4713 */
4709 if (!bma->wasdel && (flags & XFS_BMAPI_PREALLOC) && 4714 if (!bma->wasdel && (bma->flags & XFS_BMAPI_PREALLOC) &&
4710 xfs_sb_version_hasextflgbit(&mp->m_sb)) 4715 xfs_sb_version_hasextflgbit(&mp->m_sb))
4711 bma->got.br_state = XFS_EXT_UNWRITTEN; 4716 bma->got.br_state = XFS_EXT_UNWRITTEN;
4712 4717
@@ -4734,6 +4739,45 @@ xfs_bmapi_allocate(
4734 return 0; 4739 return 0;
4735} 4740}
4736 4741
4742static void
4743xfs_bmapi_allocate_worker(
4744 struct work_struct *work)
4745{
4746 struct xfs_bmalloca *args = container_of(work,
4747 struct xfs_bmalloca, work);
4748 unsigned long pflags;
4749
4750 /* we are in a transaction context here */
4751 current_set_flags_nested(&pflags, PF_FSTRANS);
4752
4753 args->result = __xfs_bmapi_allocate(args);
4754 complete(args->done);
4755
4756 current_restore_flags_nested(&pflags, PF_FSTRANS);
4757}
4758
4759/*
4760 * Some allocation requests often come in with little stack to work on. Push
4761 * them off to a worker thread so there is lots of stack to use. Otherwise just
4762 * call directly to avoid the context switch overhead here.
4763 */
4764int
4765xfs_bmapi_allocate(
4766 struct xfs_bmalloca *args)
4767{
4768 DECLARE_COMPLETION_ONSTACK(done);
4769
4770 if (!args->stack_switch)
4771 return __xfs_bmapi_allocate(args);
4772
4773
4774 args->done = &done;
4775 INIT_WORK_ONSTACK(&args->work, xfs_bmapi_allocate_worker);
4776 queue_work(xfs_alloc_wq, &args->work);
4777 wait_for_completion(&done);
4778 return args->result;
4779}
4780
4737STATIC int 4781STATIC int
4738xfs_bmapi_convert_unwritten( 4782xfs_bmapi_convert_unwritten(
4739 struct xfs_bmalloca *bma, 4783 struct xfs_bmalloca *bma,
@@ -4919,6 +4963,7 @@ xfs_bmapi_write(
4919 bma.conv = !!(flags & XFS_BMAPI_CONVERT); 4963 bma.conv = !!(flags & XFS_BMAPI_CONVERT);
4920 bma.wasdel = wasdelay; 4964 bma.wasdel = wasdelay;
4921 bma.offset = bno; 4965 bma.offset = bno;
4966 bma.flags = flags;
4922 4967
4923 /* 4968 /*
4924 * There's a 32/64 bit type mismatch between the 4969 * There's a 32/64 bit type mismatch between the
@@ -4934,7 +4979,7 @@ xfs_bmapi_write(
4934 4979
4935 ASSERT(len > 0); 4980 ASSERT(len > 0);
4936 ASSERT(bma.length > 0); 4981 ASSERT(bma.length > 0);
4937 error = xfs_bmapi_allocate(&bma, flags); 4982 error = xfs_bmapi_allocate(&bma);
4938 if (error) 4983 if (error)
4939 goto error0; 4984 goto error0;
4940 if (bma.blkno == NULLFSBLOCK) 4985 if (bma.blkno == NULLFSBLOCK)
diff --git a/fs/xfs/xfs_bmap.h b/fs/xfs/xfs_bmap.h
index 803b56d7ce16..5f469c3516eb 100644
--- a/fs/xfs/xfs_bmap.h
+++ b/fs/xfs/xfs_bmap.h
@@ -77,6 +77,7 @@ typedef struct xfs_bmap_free
77 * from written to unwritten, otherwise convert from unwritten to written. 77 * from written to unwritten, otherwise convert from unwritten to written.
78 */ 78 */
79#define XFS_BMAPI_CONVERT 0x040 79#define XFS_BMAPI_CONVERT 0x040
80#define XFS_BMAPI_STACK_SWITCH 0x080
80 81
81#define XFS_BMAPI_FLAGS \ 82#define XFS_BMAPI_FLAGS \
82 { XFS_BMAPI_ENTIRE, "ENTIRE" }, \ 83 { XFS_BMAPI_ENTIRE, "ENTIRE" }, \
@@ -85,7 +86,8 @@ typedef struct xfs_bmap_free
85 { XFS_BMAPI_PREALLOC, "PREALLOC" }, \ 86 { XFS_BMAPI_PREALLOC, "PREALLOC" }, \
86 { XFS_BMAPI_IGSTATE, "IGSTATE" }, \ 87 { XFS_BMAPI_IGSTATE, "IGSTATE" }, \
87 { XFS_BMAPI_CONTIG, "CONTIG" }, \ 88 { XFS_BMAPI_CONTIG, "CONTIG" }, \
88 { XFS_BMAPI_CONVERT, "CONVERT" } 89 { XFS_BMAPI_CONVERT, "CONVERT" }, \
90 { XFS_BMAPI_STACK_SWITCH, "STACK_SWITCH" }
89 91
90 92
91static inline int xfs_bmapi_aflag(int w) 93static inline int xfs_bmapi_aflag(int w)
@@ -133,6 +135,11 @@ typedef struct xfs_bmalloca {
133 char userdata;/* set if is user data */ 135 char userdata;/* set if is user data */
134 char aeof; /* allocated space at eof */ 136 char aeof; /* allocated space at eof */
135 char conv; /* overwriting unwritten extents */ 137 char conv; /* overwriting unwritten extents */
138 char stack_switch;
139 int flags;
140 struct completion *done;
141 struct work_struct work;
142 int result;
136} xfs_bmalloca_t; 143} xfs_bmalloca_t;
137 144
138/* 145/*
diff --git a/fs/xfs/xfs_buf.c b/fs/xfs/xfs_buf.c
index 933b7930b863..4b0b8dd1b7b0 100644
--- a/fs/xfs/xfs_buf.c
+++ b/fs/xfs/xfs_buf.c
@@ -1197,9 +1197,14 @@ xfs_buf_bio_end_io(
1197{ 1197{
1198 xfs_buf_t *bp = (xfs_buf_t *)bio->bi_private; 1198 xfs_buf_t *bp = (xfs_buf_t *)bio->bi_private;
1199 1199
1200 xfs_buf_ioerror(bp, -error); 1200 /*
1201 * don't overwrite existing errors - otherwise we can lose errors on
1202 * buffers that require multiple bios to complete.
1203 */
1204 if (!bp->b_error)
1205 xfs_buf_ioerror(bp, -error);
1201 1206
1202 if (!error && xfs_buf_is_vmapped(bp) && (bp->b_flags & XBF_READ)) 1207 if (!bp->b_error && xfs_buf_is_vmapped(bp) && (bp->b_flags & XBF_READ))
1203 invalidate_kernel_vmap_range(bp->b_addr, xfs_buf_vmap_len(bp)); 1208 invalidate_kernel_vmap_range(bp->b_addr, xfs_buf_vmap_len(bp));
1204 1209
1205 _xfs_buf_ioend(bp, 1); 1210 _xfs_buf_ioend(bp, 1);
@@ -1279,6 +1284,11 @@ next_chunk:
1279 if (size) 1284 if (size)
1280 goto next_chunk; 1285 goto next_chunk;
1281 } else { 1286 } else {
1287 /*
1288 * This is guaranteed not to be the last io reference count
1289 * because the caller (xfs_buf_iorequest) holds a count itself.
1290 */
1291 atomic_dec(&bp->b_io_remaining);
1282 xfs_buf_ioerror(bp, EIO); 1292 xfs_buf_ioerror(bp, EIO);
1283 bio_put(bio); 1293 bio_put(bio);
1284 } 1294 }
diff --git a/fs/xfs/xfs_buf_item.c b/fs/xfs/xfs_buf_item.c
index a8d0ed911196..becf4a97efc6 100644
--- a/fs/xfs/xfs_buf_item.c
+++ b/fs/xfs/xfs_buf_item.c
@@ -526,7 +526,25 @@ xfs_buf_item_unpin(
526 } 526 }
527 xfs_buf_relse(bp); 527 xfs_buf_relse(bp);
528 } else if (freed && remove) { 528 } else if (freed && remove) {
529 /*
530 * There are currently two references to the buffer - the active
531 * LRU reference and the buf log item. What we are about to do
532 * here - simulate a failed IO completion - requires 3
533 * references.
534 *
535 * The LRU reference is removed by the xfs_buf_stale() call. The
536 * buf item reference is removed by the xfs_buf_iodone()
537 * callback that is run by xfs_buf_do_callbacks() during ioend
538 * processing (via the bp->b_iodone callback), and then finally
539 * the ioend processing will drop the IO reference if the buffer
540 * is marked XBF_ASYNC.
541 *
542 * Hence we need to take an additional reference here so that IO
543 * completion processing doesn't free the buffer prematurely.
544 */
529 xfs_buf_lock(bp); 545 xfs_buf_lock(bp);
546 xfs_buf_hold(bp);
547 bp->b_flags |= XBF_ASYNC;
530 xfs_buf_ioerror(bp, EIO); 548 xfs_buf_ioerror(bp, EIO);
531 XFS_BUF_UNDONE(bp); 549 XFS_BUF_UNDONE(bp);
532 xfs_buf_stale(bp); 550 xfs_buf_stale(bp);
diff --git a/fs/xfs/xfs_fsops.c b/fs/xfs/xfs_fsops.c
index c25b094efbf7..4beaede43277 100644
--- a/fs/xfs/xfs_fsops.c
+++ b/fs/xfs/xfs_fsops.c
@@ -399,9 +399,26 @@ xfs_growfs_data_private(
399 399
400 /* update secondary superblocks. */ 400 /* update secondary superblocks. */
401 for (agno = 1; agno < nagcount; agno++) { 401 for (agno = 1; agno < nagcount; agno++) {
402 error = xfs_trans_read_buf(mp, NULL, mp->m_ddev_targp, 402 error = 0;
403 /*
404 * new secondary superblocks need to be zeroed, not read from
405 * disk as the contents of the new area we are growing into is
406 * completely unknown.
407 */
408 if (agno < oagcount) {
409 error = xfs_trans_read_buf(mp, NULL, mp->m_ddev_targp,
403 XFS_AGB_TO_DADDR(mp, agno, XFS_SB_BLOCK(mp)), 410 XFS_AGB_TO_DADDR(mp, agno, XFS_SB_BLOCK(mp)),
404 XFS_FSS_TO_BB(mp, 1), 0, &bp); 411 XFS_FSS_TO_BB(mp, 1), 0, &bp);
412 } else {
413 bp = xfs_trans_get_buf(NULL, mp->m_ddev_targp,
414 XFS_AGB_TO_DADDR(mp, agno, XFS_SB_BLOCK(mp)),
415 XFS_FSS_TO_BB(mp, 1), 0);
416 if (bp)
417 xfs_buf_zero(bp, 0, BBTOB(bp->b_length));
418 else
419 error = ENOMEM;
420 }
421
405 if (error) { 422 if (error) {
406 xfs_warn(mp, 423 xfs_warn(mp,
407 "error %d reading secondary superblock for ag %d", 424 "error %d reading secondary superblock for ag %d",
@@ -423,7 +440,7 @@ xfs_growfs_data_private(
423 break; /* no point in continuing */ 440 break; /* no point in continuing */
424 } 441 }
425 } 442 }
426 return 0; 443 return error;
427 444
428 error0: 445 error0:
429 xfs_trans_cancel(tp, XFS_TRANS_ABORT); 446 xfs_trans_cancel(tp, XFS_TRANS_ABORT);
diff --git a/fs/xfs/xfs_ialloc.c b/fs/xfs/xfs_ialloc.c
index 445bf1aef31c..c5c4ef4f2bdb 100644
--- a/fs/xfs/xfs_ialloc.c
+++ b/fs/xfs/xfs_ialloc.c
@@ -250,6 +250,7 @@ xfs_ialloc_ag_alloc(
250 /* boundary */ 250 /* boundary */
251 struct xfs_perag *pag; 251 struct xfs_perag *pag;
252 252
253 memset(&args, 0, sizeof(args));
253 args.tp = tp; 254 args.tp = tp;
254 args.mp = tp->t_mountp; 255 args.mp = tp->t_mountp;
255 256
diff --git a/fs/xfs/xfs_inode.c b/fs/xfs/xfs_inode.c
index 2778258fcfa2..1938b41ee9f5 100644
--- a/fs/xfs/xfs_inode.c
+++ b/fs/xfs/xfs_inode.c
@@ -1509,7 +1509,8 @@ xfs_ifree_cluster(
1509 * to mark all the active inodes on the buffer stale. 1509 * to mark all the active inodes on the buffer stale.
1510 */ 1510 */
1511 bp = xfs_trans_get_buf(tp, mp->m_ddev_targp, blkno, 1511 bp = xfs_trans_get_buf(tp, mp->m_ddev_targp, blkno,
1512 mp->m_bsize * blks_per_cluster, 0); 1512 mp->m_bsize * blks_per_cluster,
1513 XBF_UNMAPPED);
1513 1514
1514 if (!bp) 1515 if (!bp)
1515 return ENOMEM; 1516 return ENOMEM;
diff --git a/fs/xfs/xfs_ioctl.c b/fs/xfs/xfs_ioctl.c
index 8305f2ac6773..c1df3c623de2 100644
--- a/fs/xfs/xfs_ioctl.c
+++ b/fs/xfs/xfs_ioctl.c
@@ -70,7 +70,7 @@ xfs_find_handle(
70 int hsize; 70 int hsize;
71 xfs_handle_t handle; 71 xfs_handle_t handle;
72 struct inode *inode; 72 struct inode *inode;
73 struct fd f; 73 struct fd f = {0};
74 struct path path; 74 struct path path;
75 int error; 75 int error;
76 struct xfs_inode *ip; 76 struct xfs_inode *ip;
diff --git a/fs/xfs/xfs_iomap.c b/fs/xfs/xfs_iomap.c
index 973dff6ad935..7f537663365b 100644
--- a/fs/xfs/xfs_iomap.c
+++ b/fs/xfs/xfs_iomap.c
@@ -584,7 +584,9 @@ xfs_iomap_write_allocate(
584 * pointer that the caller gave to us. 584 * pointer that the caller gave to us.
585 */ 585 */
586 error = xfs_bmapi_write(tp, ip, map_start_fsb, 586 error = xfs_bmapi_write(tp, ip, map_start_fsb,
587 count_fsb, 0, &first_block, 1, 587 count_fsb,
588 XFS_BMAPI_STACK_SWITCH,
589 &first_block, 1,
588 imap, &nimaps, &free_list); 590 imap, &nimaps, &free_list);
589 if (error) 591 if (error)
590 goto trans_cancel; 592 goto trans_cancel;
diff --git a/fs/xfs/xfs_log.c b/fs/xfs/xfs_log.c
index 7f4f9370d0e7..4dad756962d0 100644
--- a/fs/xfs/xfs_log.c
+++ b/fs/xfs/xfs_log.c
@@ -2387,14 +2387,27 @@ xlog_state_do_callback(
2387 2387
2388 2388
2389 /* 2389 /*
2390 * update the last_sync_lsn before we drop the 2390 * Completion of a iclog IO does not imply that
2391 * a transaction has completed, as transactions
2392 * can be large enough to span many iclogs. We
2393 * cannot change the tail of the log half way
2394 * through a transaction as this may be the only
2395 * transaction in the log and moving th etail to
2396 * point to the middle of it will prevent
2397 * recovery from finding the start of the
2398 * transaction. Hence we should only update the
2399 * last_sync_lsn if this iclog contains
2400 * transaction completion callbacks on it.
2401 *
2402 * We have to do this before we drop the
2391 * icloglock to ensure we are the only one that 2403 * icloglock to ensure we are the only one that
2392 * can update it. 2404 * can update it.
2393 */ 2405 */
2394 ASSERT(XFS_LSN_CMP(atomic64_read(&log->l_last_sync_lsn), 2406 ASSERT(XFS_LSN_CMP(atomic64_read(&log->l_last_sync_lsn),
2395 be64_to_cpu(iclog->ic_header.h_lsn)) <= 0); 2407 be64_to_cpu(iclog->ic_header.h_lsn)) <= 0);
2396 atomic64_set(&log->l_last_sync_lsn, 2408 if (iclog->ic_callback)
2397 be64_to_cpu(iclog->ic_header.h_lsn)); 2409 atomic64_set(&log->l_last_sync_lsn,
2410 be64_to_cpu(iclog->ic_header.h_lsn));
2398 2411
2399 } else 2412 } else
2400 ioerrors++; 2413 ioerrors++;
diff --git a/fs/xfs/xfs_log_recover.c b/fs/xfs/xfs_log_recover.c
index 5da3ace352bf..d308749fabf1 100644
--- a/fs/xfs/xfs_log_recover.c
+++ b/fs/xfs/xfs_log_recover.c
@@ -3541,7 +3541,7 @@ xlog_do_recovery_pass(
3541 * - order is important. 3541 * - order is important.
3542 */ 3542 */
3543 error = xlog_bread_offset(log, 0, 3543 error = xlog_bread_offset(log, 0,
3544 bblks - split_bblks, hbp, 3544 bblks - split_bblks, dbp,
3545 offset + BBTOB(split_bblks)); 3545 offset + BBTOB(split_bblks));
3546 if (error) 3546 if (error)
3547 goto bread_err2; 3547 goto bread_err2;