diff options
Diffstat (limited to 'fs')
188 files changed, 3628 insertions, 1959 deletions
diff --git a/fs/9p/acl.c b/fs/9p/acl.c index 33aa116732c8..535ab6eccb1a 100644 --- a/fs/9p/acl.c +++ b/fs/9p/acl.c | |||
@@ -323,7 +323,7 @@ static int v9fs_xattr_set_acl(struct dentry *dentry, const char *name, | |||
323 | 323 | ||
324 | if (S_ISLNK(inode->i_mode)) | 324 | if (S_ISLNK(inode->i_mode)) |
325 | return -EOPNOTSUPP; | 325 | return -EOPNOTSUPP; |
326 | if (!is_owner_or_cap(inode)) | 326 | if (!inode_owner_or_capable(inode)) |
327 | return -EPERM; | 327 | return -EPERM; |
328 | if (value) { | 328 | if (value) { |
329 | /* update the cached acl value */ | 329 | /* update the cached acl value */ |
diff --git a/fs/adfs/adfs.h b/fs/adfs/adfs.h index a8a58d864f96..718ac1f440c6 100644 --- a/fs/adfs/adfs.h +++ b/fs/adfs/adfs.h | |||
@@ -112,7 +112,7 @@ struct object_info { | |||
112 | /* RISC OS 12-bit filetype converts to ,xyz hex filename suffix */ | 112 | /* RISC OS 12-bit filetype converts to ,xyz hex filename suffix */ |
113 | static inline int append_filetype_suffix(char *buf, __u16 filetype) | 113 | static inline int append_filetype_suffix(char *buf, __u16 filetype) |
114 | { | 114 | { |
115 | if (filetype == -1) | 115 | if (filetype == 0xffff) /* no explicit 12-bit file type was set */ |
116 | return 0; | 116 | return 0; |
117 | 117 | ||
118 | *buf++ = ','; | 118 | *buf++ = ','; |
diff --git a/fs/adfs/inode.c b/fs/adfs/inode.c index 92444e94f842..d5250c5aae21 100644 --- a/fs/adfs/inode.c +++ b/fs/adfs/inode.c | |||
@@ -72,7 +72,6 @@ static sector_t _adfs_bmap(struct address_space *mapping, sector_t block) | |||
72 | static const struct address_space_operations adfs_aops = { | 72 | static const struct address_space_operations adfs_aops = { |
73 | .readpage = adfs_readpage, | 73 | .readpage = adfs_readpage, |
74 | .writepage = adfs_writepage, | 74 | .writepage = adfs_writepage, |
75 | .sync_page = block_sync_page, | ||
76 | .write_begin = adfs_write_begin, | 75 | .write_begin = adfs_write_begin, |
77 | .write_end = generic_write_end, | 76 | .write_end = generic_write_end, |
78 | .bmap = _adfs_bmap | 77 | .bmap = _adfs_bmap |
diff --git a/fs/affs/file.c b/fs/affs/file.c index 0a90dcd46de2..acf321b70fcd 100644 --- a/fs/affs/file.c +++ b/fs/affs/file.c | |||
@@ -429,7 +429,6 @@ static sector_t _affs_bmap(struct address_space *mapping, sector_t block) | |||
429 | const struct address_space_operations affs_aops = { | 429 | const struct address_space_operations affs_aops = { |
430 | .readpage = affs_readpage, | 430 | .readpage = affs_readpage, |
431 | .writepage = affs_writepage, | 431 | .writepage = affs_writepage, |
432 | .sync_page = block_sync_page, | ||
433 | .write_begin = affs_write_begin, | 432 | .write_begin = affs_write_begin, |
434 | .write_end = generic_write_end, | 433 | .write_end = generic_write_end, |
435 | .bmap = _affs_bmap | 434 | .bmap = _affs_bmap |
@@ -786,7 +785,6 @@ out: | |||
786 | const struct address_space_operations affs_aops_ofs = { | 785 | const struct address_space_operations affs_aops_ofs = { |
787 | .readpage = affs_readpage_ofs, | 786 | .readpage = affs_readpage_ofs, |
788 | //.writepage = affs_writepage_ofs, | 787 | //.writepage = affs_writepage_ofs, |
789 | //.sync_page = affs_sync_page_ofs, | ||
790 | .write_begin = affs_write_begin_ofs, | 788 | .write_begin = affs_write_begin_ofs, |
791 | .write_end = affs_write_end_ofs | 789 | .write_end = affs_write_end_ofs |
792 | }; | 790 | }; |
@@ -34,8 +34,6 @@ | |||
34 | #include <linux/security.h> | 34 | #include <linux/security.h> |
35 | #include <linux/eventfd.h> | 35 | #include <linux/eventfd.h> |
36 | #include <linux/blkdev.h> | 36 | #include <linux/blkdev.h> |
37 | #include <linux/mempool.h> | ||
38 | #include <linux/hash.h> | ||
39 | #include <linux/compat.h> | 37 | #include <linux/compat.h> |
40 | 38 | ||
41 | #include <asm/kmap_types.h> | 39 | #include <asm/kmap_types.h> |
@@ -65,14 +63,6 @@ static DECLARE_WORK(fput_work, aio_fput_routine); | |||
65 | static DEFINE_SPINLOCK(fput_lock); | 63 | static DEFINE_SPINLOCK(fput_lock); |
66 | static LIST_HEAD(fput_head); | 64 | static LIST_HEAD(fput_head); |
67 | 65 | ||
68 | #define AIO_BATCH_HASH_BITS 3 /* allocated on-stack, so don't go crazy */ | ||
69 | #define AIO_BATCH_HASH_SIZE (1 << AIO_BATCH_HASH_BITS) | ||
70 | struct aio_batch_entry { | ||
71 | struct hlist_node list; | ||
72 | struct address_space *mapping; | ||
73 | }; | ||
74 | mempool_t *abe_pool; | ||
75 | |||
76 | static void aio_kick_handler(struct work_struct *); | 66 | static void aio_kick_handler(struct work_struct *); |
77 | static void aio_queue_work(struct kioctx *); | 67 | static void aio_queue_work(struct kioctx *); |
78 | 68 | ||
@@ -86,8 +76,7 @@ static int __init aio_setup(void) | |||
86 | kioctx_cachep = KMEM_CACHE(kioctx,SLAB_HWCACHE_ALIGN|SLAB_PANIC); | 76 | kioctx_cachep = KMEM_CACHE(kioctx,SLAB_HWCACHE_ALIGN|SLAB_PANIC); |
87 | 77 | ||
88 | aio_wq = alloc_workqueue("aio", 0, 1); /* used to limit concurrency */ | 78 | aio_wq = alloc_workqueue("aio", 0, 1); /* used to limit concurrency */ |
89 | abe_pool = mempool_create_kmalloc_pool(1, sizeof(struct aio_batch_entry)); | 79 | BUG_ON(!aio_wq); |
90 | BUG_ON(!aio_wq || !abe_pool); | ||
91 | 80 | ||
92 | pr_debug("aio_setup: sizeof(struct page) = %d\n", (int)sizeof(struct page)); | 81 | pr_debug("aio_setup: sizeof(struct page) = %d\n", (int)sizeof(struct page)); |
93 | 82 | ||
@@ -1525,57 +1514,8 @@ static ssize_t aio_setup_iocb(struct kiocb *kiocb, bool compat) | |||
1525 | return 0; | 1514 | return 0; |
1526 | } | 1515 | } |
1527 | 1516 | ||
1528 | static void aio_batch_add(struct address_space *mapping, | ||
1529 | struct hlist_head *batch_hash) | ||
1530 | { | ||
1531 | struct aio_batch_entry *abe; | ||
1532 | struct hlist_node *pos; | ||
1533 | unsigned bucket; | ||
1534 | |||
1535 | bucket = hash_ptr(mapping, AIO_BATCH_HASH_BITS); | ||
1536 | hlist_for_each_entry(abe, pos, &batch_hash[bucket], list) { | ||
1537 | if (abe->mapping == mapping) | ||
1538 | return; | ||
1539 | } | ||
1540 | |||
1541 | abe = mempool_alloc(abe_pool, GFP_KERNEL); | ||
1542 | |||
1543 | /* | ||
1544 | * we should be using igrab here, but | ||
1545 | * we don't want to hammer on the global | ||
1546 | * inode spinlock just to take an extra | ||
1547 | * reference on a file that we must already | ||
1548 | * have a reference to. | ||
1549 | * | ||
1550 | * When we're called, we always have a reference | ||
1551 | * on the file, so we must always have a reference | ||
1552 | * on the inode, so ihold() is safe here. | ||
1553 | */ | ||
1554 | ihold(mapping->host); | ||
1555 | abe->mapping = mapping; | ||
1556 | hlist_add_head(&abe->list, &batch_hash[bucket]); | ||
1557 | return; | ||
1558 | } | ||
1559 | |||
1560 | static void aio_batch_free(struct hlist_head *batch_hash) | ||
1561 | { | ||
1562 | struct aio_batch_entry *abe; | ||
1563 | struct hlist_node *pos, *n; | ||
1564 | int i; | ||
1565 | |||
1566 | for (i = 0; i < AIO_BATCH_HASH_SIZE; i++) { | ||
1567 | hlist_for_each_entry_safe(abe, pos, n, &batch_hash[i], list) { | ||
1568 | blk_run_address_space(abe->mapping); | ||
1569 | iput(abe->mapping->host); | ||
1570 | hlist_del(&abe->list); | ||
1571 | mempool_free(abe, abe_pool); | ||
1572 | } | ||
1573 | } | ||
1574 | } | ||
1575 | |||
1576 | static int io_submit_one(struct kioctx *ctx, struct iocb __user *user_iocb, | 1517 | static int io_submit_one(struct kioctx *ctx, struct iocb __user *user_iocb, |
1577 | struct iocb *iocb, struct hlist_head *batch_hash, | 1518 | struct iocb *iocb, bool compat) |
1578 | bool compat) | ||
1579 | { | 1519 | { |
1580 | struct kiocb *req; | 1520 | struct kiocb *req; |
1581 | struct file *file; | 1521 | struct file *file; |
@@ -1666,11 +1606,6 @@ static int io_submit_one(struct kioctx *ctx, struct iocb __user *user_iocb, | |||
1666 | ; | 1606 | ; |
1667 | } | 1607 | } |
1668 | spin_unlock_irq(&ctx->ctx_lock); | 1608 | spin_unlock_irq(&ctx->ctx_lock); |
1669 | if (req->ki_opcode == IOCB_CMD_PREAD || | ||
1670 | req->ki_opcode == IOCB_CMD_PREADV || | ||
1671 | req->ki_opcode == IOCB_CMD_PWRITE || | ||
1672 | req->ki_opcode == IOCB_CMD_PWRITEV) | ||
1673 | aio_batch_add(file->f_mapping, batch_hash); | ||
1674 | 1609 | ||
1675 | aio_put_req(req); /* drop extra ref to req */ | 1610 | aio_put_req(req); /* drop extra ref to req */ |
1676 | return 0; | 1611 | return 0; |
@@ -1687,7 +1622,7 @@ long do_io_submit(aio_context_t ctx_id, long nr, | |||
1687 | struct kioctx *ctx; | 1622 | struct kioctx *ctx; |
1688 | long ret = 0; | 1623 | long ret = 0; |
1689 | int i; | 1624 | int i; |
1690 | struct hlist_head batch_hash[AIO_BATCH_HASH_SIZE] = { { 0, }, }; | 1625 | struct blk_plug plug; |
1691 | 1626 | ||
1692 | if (unlikely(nr < 0)) | 1627 | if (unlikely(nr < 0)) |
1693 | return -EINVAL; | 1628 | return -EINVAL; |
@@ -1704,6 +1639,8 @@ long do_io_submit(aio_context_t ctx_id, long nr, | |||
1704 | return -EINVAL; | 1639 | return -EINVAL; |
1705 | } | 1640 | } |
1706 | 1641 | ||
1642 | blk_start_plug(&plug); | ||
1643 | |||
1707 | /* | 1644 | /* |
1708 | * AKPM: should this return a partial result if some of the IOs were | 1645 | * AKPM: should this return a partial result if some of the IOs were |
1709 | * successfully submitted? | 1646 | * successfully submitted? |
@@ -1722,11 +1659,11 @@ long do_io_submit(aio_context_t ctx_id, long nr, | |||
1722 | break; | 1659 | break; |
1723 | } | 1660 | } |
1724 | 1661 | ||
1725 | ret = io_submit_one(ctx, user_iocb, &tmp, batch_hash, compat); | 1662 | ret = io_submit_one(ctx, user_iocb, &tmp, compat); |
1726 | if (ret) | 1663 | if (ret) |
1727 | break; | 1664 | break; |
1728 | } | 1665 | } |
1729 | aio_batch_free(batch_hash); | 1666 | blk_finish_plug(&plug); |
1730 | 1667 | ||
1731 | put_ioctx(ctx); | 1668 | put_ioctx(ctx); |
1732 | return i ? i : ret; | 1669 | return i ? i : ret; |
@@ -59,7 +59,7 @@ int inode_change_ok(const struct inode *inode, struct iattr *attr) | |||
59 | 59 | ||
60 | /* Make sure a caller can chmod. */ | 60 | /* Make sure a caller can chmod. */ |
61 | if (ia_valid & ATTR_MODE) { | 61 | if (ia_valid & ATTR_MODE) { |
62 | if (!is_owner_or_cap(inode)) | 62 | if (!inode_owner_or_capable(inode)) |
63 | return -EPERM; | 63 | return -EPERM; |
64 | /* Also check the setgid bit! */ | 64 | /* Also check the setgid bit! */ |
65 | if (!in_group_p((ia_valid & ATTR_GID) ? attr->ia_gid : | 65 | if (!in_group_p((ia_valid & ATTR_GID) ? attr->ia_gid : |
@@ -69,7 +69,7 @@ int inode_change_ok(const struct inode *inode, struct iattr *attr) | |||
69 | 69 | ||
70 | /* Check for setting the inode time. */ | 70 | /* Check for setting the inode time. */ |
71 | if (ia_valid & (ATTR_MTIME_SET | ATTR_ATIME_SET | ATTR_TIMES_SET)) { | 71 | if (ia_valid & (ATTR_MTIME_SET | ATTR_ATIME_SET | ATTR_TIMES_SET)) { |
72 | if (!is_owner_or_cap(inode)) | 72 | if (!inode_owner_or_capable(inode)) |
73 | return -EPERM; | 73 | return -EPERM; |
74 | } | 74 | } |
75 | 75 | ||
diff --git a/fs/autofs4/autofs_i.h b/fs/autofs4/autofs_i.h index 54f923792728..475f9c597cb7 100644 --- a/fs/autofs4/autofs_i.h +++ b/fs/autofs4/autofs_i.h | |||
@@ -61,8 +61,6 @@ do { \ | |||
61 | current->pid, __func__, ##args); \ | 61 | current->pid, __func__, ##args); \ |
62 | } while (0) | 62 | } while (0) |
63 | 63 | ||
64 | extern spinlock_t autofs4_lock; | ||
65 | |||
66 | /* Unified info structure. This is pointed to by both the dentry and | 64 | /* Unified info structure. This is pointed to by both the dentry and |
67 | inode structures. Each file in the filesystem has an instance of this | 65 | inode structures. Each file in the filesystem has an instance of this |
68 | structure. It holds a reference to the dentry, so dentries are never | 66 | structure. It holds a reference to the dentry, so dentries are never |
diff --git a/fs/autofs4/dev-ioctl.c b/fs/autofs4/dev-ioctl.c index 1442da4860e5..509fe1eb66ae 100644 --- a/fs/autofs4/dev-ioctl.c +++ b/fs/autofs4/dev-ioctl.c | |||
@@ -372,6 +372,10 @@ static int autofs_dev_ioctl_setpipefd(struct file *fp, | |||
372 | return -EBUSY; | 372 | return -EBUSY; |
373 | } else { | 373 | } else { |
374 | struct file *pipe = fget(pipefd); | 374 | struct file *pipe = fget(pipefd); |
375 | if (!pipe) { | ||
376 | err = -EBADF; | ||
377 | goto out; | ||
378 | } | ||
375 | if (!pipe->f_op || !pipe->f_op->write) { | 379 | if (!pipe->f_op || !pipe->f_op->write) { |
376 | err = -EPIPE; | 380 | err = -EPIPE; |
377 | fput(pipe); | 381 | fput(pipe); |
diff --git a/fs/autofs4/expire.c b/fs/autofs4/expire.c index f43100b9662b..450f529a4eae 100644 --- a/fs/autofs4/expire.c +++ b/fs/autofs4/expire.c | |||
@@ -87,18 +87,70 @@ done: | |||
87 | } | 87 | } |
88 | 88 | ||
89 | /* | 89 | /* |
90 | * Calculate and dget next entry in the subdirs list under root. | ||
91 | */ | ||
92 | static struct dentry *get_next_positive_subdir(struct dentry *prev, | ||
93 | struct dentry *root) | ||
94 | { | ||
95 | struct autofs_sb_info *sbi = autofs4_sbi(root->d_sb); | ||
96 | struct list_head *next; | ||
97 | struct dentry *p, *q; | ||
98 | |||
99 | spin_lock(&sbi->lookup_lock); | ||
100 | |||
101 | if (prev == NULL) { | ||
102 | spin_lock(&root->d_lock); | ||
103 | prev = dget_dlock(root); | ||
104 | next = prev->d_subdirs.next; | ||
105 | p = prev; | ||
106 | goto start; | ||
107 | } | ||
108 | |||
109 | p = prev; | ||
110 | spin_lock(&p->d_lock); | ||
111 | again: | ||
112 | next = p->d_u.d_child.next; | ||
113 | start: | ||
114 | if (next == &root->d_subdirs) { | ||
115 | spin_unlock(&p->d_lock); | ||
116 | spin_unlock(&sbi->lookup_lock); | ||
117 | dput(prev); | ||
118 | return NULL; | ||
119 | } | ||
120 | |||
121 | q = list_entry(next, struct dentry, d_u.d_child); | ||
122 | |||
123 | spin_lock_nested(&q->d_lock, DENTRY_D_LOCK_NESTED); | ||
124 | /* Negative dentry - try next */ | ||
125 | if (!simple_positive(q)) { | ||
126 | spin_unlock(&p->d_lock); | ||
127 | p = q; | ||
128 | goto again; | ||
129 | } | ||
130 | dget_dlock(q); | ||
131 | spin_unlock(&q->d_lock); | ||
132 | spin_unlock(&p->d_lock); | ||
133 | spin_unlock(&sbi->lookup_lock); | ||
134 | |||
135 | dput(prev); | ||
136 | |||
137 | return q; | ||
138 | } | ||
139 | |||
140 | /* | ||
90 | * Calculate and dget next entry in top down tree traversal. | 141 | * Calculate and dget next entry in top down tree traversal. |
91 | */ | 142 | */ |
92 | static struct dentry *get_next_positive_dentry(struct dentry *prev, | 143 | static struct dentry *get_next_positive_dentry(struct dentry *prev, |
93 | struct dentry *root) | 144 | struct dentry *root) |
94 | { | 145 | { |
146 | struct autofs_sb_info *sbi = autofs4_sbi(root->d_sb); | ||
95 | struct list_head *next; | 147 | struct list_head *next; |
96 | struct dentry *p, *ret; | 148 | struct dentry *p, *ret; |
97 | 149 | ||
98 | if (prev == NULL) | 150 | if (prev == NULL) |
99 | return dget(root); | 151 | return dget(root); |
100 | 152 | ||
101 | spin_lock(&autofs4_lock); | 153 | spin_lock(&sbi->lookup_lock); |
102 | relock: | 154 | relock: |
103 | p = prev; | 155 | p = prev; |
104 | spin_lock(&p->d_lock); | 156 | spin_lock(&p->d_lock); |
@@ -110,7 +162,7 @@ again: | |||
110 | 162 | ||
111 | if (p == root) { | 163 | if (p == root) { |
112 | spin_unlock(&p->d_lock); | 164 | spin_unlock(&p->d_lock); |
113 | spin_unlock(&autofs4_lock); | 165 | spin_unlock(&sbi->lookup_lock); |
114 | dput(prev); | 166 | dput(prev); |
115 | return NULL; | 167 | return NULL; |
116 | } | 168 | } |
@@ -140,7 +192,7 @@ again: | |||
140 | dget_dlock(ret); | 192 | dget_dlock(ret); |
141 | spin_unlock(&ret->d_lock); | 193 | spin_unlock(&ret->d_lock); |
142 | spin_unlock(&p->d_lock); | 194 | spin_unlock(&p->d_lock); |
143 | spin_unlock(&autofs4_lock); | 195 | spin_unlock(&sbi->lookup_lock); |
144 | 196 | ||
145 | dput(prev); | 197 | dput(prev); |
146 | 198 | ||
@@ -290,11 +342,8 @@ struct dentry *autofs4_expire_direct(struct super_block *sb, | |||
290 | spin_lock(&sbi->fs_lock); | 342 | spin_lock(&sbi->fs_lock); |
291 | ino = autofs4_dentry_ino(root); | 343 | ino = autofs4_dentry_ino(root); |
292 | /* No point expiring a pending mount */ | 344 | /* No point expiring a pending mount */ |
293 | if (ino->flags & AUTOFS_INF_PENDING) { | 345 | if (ino->flags & AUTOFS_INF_PENDING) |
294 | spin_unlock(&sbi->fs_lock); | 346 | goto out; |
295 | return NULL; | ||
296 | } | ||
297 | managed_dentry_set_transit(root); | ||
298 | if (!autofs4_direct_busy(mnt, root, timeout, do_now)) { | 347 | if (!autofs4_direct_busy(mnt, root, timeout, do_now)) { |
299 | struct autofs_info *ino = autofs4_dentry_ino(root); | 348 | struct autofs_info *ino = autofs4_dentry_ino(root); |
300 | ino->flags |= AUTOFS_INF_EXPIRING; | 349 | ino->flags |= AUTOFS_INF_EXPIRING; |
@@ -302,7 +351,7 @@ struct dentry *autofs4_expire_direct(struct super_block *sb, | |||
302 | spin_unlock(&sbi->fs_lock); | 351 | spin_unlock(&sbi->fs_lock); |
303 | return root; | 352 | return root; |
304 | } | 353 | } |
305 | managed_dentry_clear_transit(root); | 354 | out: |
306 | spin_unlock(&sbi->fs_lock); | 355 | spin_unlock(&sbi->fs_lock); |
307 | dput(root); | 356 | dput(root); |
308 | 357 | ||
@@ -336,13 +385,12 @@ struct dentry *autofs4_expire_indirect(struct super_block *sb, | |||
336 | timeout = sbi->exp_timeout; | 385 | timeout = sbi->exp_timeout; |
337 | 386 | ||
338 | dentry = NULL; | 387 | dentry = NULL; |
339 | while ((dentry = get_next_positive_dentry(dentry, root))) { | 388 | while ((dentry = get_next_positive_subdir(dentry, root))) { |
340 | spin_lock(&sbi->fs_lock); | 389 | spin_lock(&sbi->fs_lock); |
341 | ino = autofs4_dentry_ino(dentry); | 390 | ino = autofs4_dentry_ino(dentry); |
342 | /* No point expiring a pending mount */ | 391 | /* No point expiring a pending mount */ |
343 | if (ino->flags & AUTOFS_INF_PENDING) | 392 | if (ino->flags & AUTOFS_INF_PENDING) |
344 | goto cont; | 393 | goto next; |
345 | managed_dentry_set_transit(dentry); | ||
346 | 394 | ||
347 | /* | 395 | /* |
348 | * Case 1: (i) indirect mount or top level pseudo direct mount | 396 | * Case 1: (i) indirect mount or top level pseudo direct mount |
@@ -402,8 +450,6 @@ struct dentry *autofs4_expire_indirect(struct super_block *sb, | |||
402 | } | 450 | } |
403 | } | 451 | } |
404 | next: | 452 | next: |
405 | managed_dentry_clear_transit(dentry); | ||
406 | cont: | ||
407 | spin_unlock(&sbi->fs_lock); | 453 | spin_unlock(&sbi->fs_lock); |
408 | } | 454 | } |
409 | return NULL; | 455 | return NULL; |
@@ -415,13 +461,13 @@ found: | |||
415 | ino->flags |= AUTOFS_INF_EXPIRING; | 461 | ino->flags |= AUTOFS_INF_EXPIRING; |
416 | init_completion(&ino->expire_complete); | 462 | init_completion(&ino->expire_complete); |
417 | spin_unlock(&sbi->fs_lock); | 463 | spin_unlock(&sbi->fs_lock); |
418 | spin_lock(&autofs4_lock); | 464 | spin_lock(&sbi->lookup_lock); |
419 | spin_lock(&expired->d_parent->d_lock); | 465 | spin_lock(&expired->d_parent->d_lock); |
420 | spin_lock_nested(&expired->d_lock, DENTRY_D_LOCK_NESTED); | 466 | spin_lock_nested(&expired->d_lock, DENTRY_D_LOCK_NESTED); |
421 | list_move(&expired->d_parent->d_subdirs, &expired->d_u.d_child); | 467 | list_move(&expired->d_parent->d_subdirs, &expired->d_u.d_child); |
422 | spin_unlock(&expired->d_lock); | 468 | spin_unlock(&expired->d_lock); |
423 | spin_unlock(&expired->d_parent->d_lock); | 469 | spin_unlock(&expired->d_parent->d_lock); |
424 | spin_unlock(&autofs4_lock); | 470 | spin_unlock(&sbi->lookup_lock); |
425 | return expired; | 471 | return expired; |
426 | } | 472 | } |
427 | 473 | ||
@@ -484,8 +530,6 @@ int autofs4_expire_run(struct super_block *sb, | |||
484 | spin_lock(&sbi->fs_lock); | 530 | spin_lock(&sbi->fs_lock); |
485 | ino = autofs4_dentry_ino(dentry); | 531 | ino = autofs4_dentry_ino(dentry); |
486 | ino->flags &= ~AUTOFS_INF_EXPIRING; | 532 | ino->flags &= ~AUTOFS_INF_EXPIRING; |
487 | if (!d_unhashed(dentry)) | ||
488 | managed_dentry_clear_transit(dentry); | ||
489 | complete_all(&ino->expire_complete); | 533 | complete_all(&ino->expire_complete); |
490 | spin_unlock(&sbi->fs_lock); | 534 | spin_unlock(&sbi->fs_lock); |
491 | 535 | ||
@@ -513,9 +557,7 @@ int autofs4_do_expire_multi(struct super_block *sb, struct vfsmount *mnt, | |||
513 | spin_lock(&sbi->fs_lock); | 557 | spin_lock(&sbi->fs_lock); |
514 | ino->flags &= ~AUTOFS_INF_EXPIRING; | 558 | ino->flags &= ~AUTOFS_INF_EXPIRING; |
515 | spin_lock(&dentry->d_lock); | 559 | spin_lock(&dentry->d_lock); |
516 | if (ret) | 560 | if (!ret) { |
517 | __managed_dentry_clear_transit(dentry); | ||
518 | else { | ||
519 | if ((IS_ROOT(dentry) || | 561 | if ((IS_ROOT(dentry) || |
520 | (autofs_type_indirect(sbi->type) && | 562 | (autofs_type_indirect(sbi->type) && |
521 | IS_ROOT(dentry->d_parent))) && | 563 | IS_ROOT(dentry->d_parent))) && |
diff --git a/fs/autofs4/root.c b/fs/autofs4/root.c index e6f84d26f4cf..96804a17bbd0 100644 --- a/fs/autofs4/root.c +++ b/fs/autofs4/root.c | |||
@@ -23,8 +23,6 @@ | |||
23 | 23 | ||
24 | #include "autofs_i.h" | 24 | #include "autofs_i.h" |
25 | 25 | ||
26 | DEFINE_SPINLOCK(autofs4_lock); | ||
27 | |||
28 | static int autofs4_dir_symlink(struct inode *,struct dentry *,const char *); | 26 | static int autofs4_dir_symlink(struct inode *,struct dentry *,const char *); |
29 | static int autofs4_dir_unlink(struct inode *,struct dentry *); | 27 | static int autofs4_dir_unlink(struct inode *,struct dentry *); |
30 | static int autofs4_dir_rmdir(struct inode *,struct dentry *); | 28 | static int autofs4_dir_rmdir(struct inode *,struct dentry *); |
@@ -125,15 +123,15 @@ static int autofs4_dir_open(struct inode *inode, struct file *file) | |||
125 | * autofs file system so just let the libfs routines handle | 123 | * autofs file system so just let the libfs routines handle |
126 | * it. | 124 | * it. |
127 | */ | 125 | */ |
128 | spin_lock(&autofs4_lock); | 126 | spin_lock(&sbi->lookup_lock); |
129 | spin_lock(&dentry->d_lock); | 127 | spin_lock(&dentry->d_lock); |
130 | if (!d_mountpoint(dentry) && list_empty(&dentry->d_subdirs)) { | 128 | if (!d_mountpoint(dentry) && list_empty(&dentry->d_subdirs)) { |
131 | spin_unlock(&dentry->d_lock); | 129 | spin_unlock(&dentry->d_lock); |
132 | spin_unlock(&autofs4_lock); | 130 | spin_unlock(&sbi->lookup_lock); |
133 | return -ENOENT; | 131 | return -ENOENT; |
134 | } | 132 | } |
135 | spin_unlock(&dentry->d_lock); | 133 | spin_unlock(&dentry->d_lock); |
136 | spin_unlock(&autofs4_lock); | 134 | spin_unlock(&sbi->lookup_lock); |
137 | 135 | ||
138 | out: | 136 | out: |
139 | return dcache_dir_open(inode, file); | 137 | return dcache_dir_open(inode, file); |
@@ -171,7 +169,6 @@ static struct dentry *autofs4_lookup_active(struct dentry *dentry) | |||
171 | const unsigned char *str = name->name; | 169 | const unsigned char *str = name->name; |
172 | struct list_head *p, *head; | 170 | struct list_head *p, *head; |
173 | 171 | ||
174 | spin_lock(&autofs4_lock); | ||
175 | spin_lock(&sbi->lookup_lock); | 172 | spin_lock(&sbi->lookup_lock); |
176 | head = &sbi->active_list; | 173 | head = &sbi->active_list; |
177 | list_for_each(p, head) { | 174 | list_for_each(p, head) { |
@@ -204,14 +201,12 @@ static struct dentry *autofs4_lookup_active(struct dentry *dentry) | |||
204 | dget_dlock(active); | 201 | dget_dlock(active); |
205 | spin_unlock(&active->d_lock); | 202 | spin_unlock(&active->d_lock); |
206 | spin_unlock(&sbi->lookup_lock); | 203 | spin_unlock(&sbi->lookup_lock); |
207 | spin_unlock(&autofs4_lock); | ||
208 | return active; | 204 | return active; |
209 | } | 205 | } |
210 | next: | 206 | next: |
211 | spin_unlock(&active->d_lock); | 207 | spin_unlock(&active->d_lock); |
212 | } | 208 | } |
213 | spin_unlock(&sbi->lookup_lock); | 209 | spin_unlock(&sbi->lookup_lock); |
214 | spin_unlock(&autofs4_lock); | ||
215 | 210 | ||
216 | return NULL; | 211 | return NULL; |
217 | } | 212 | } |
@@ -226,7 +221,6 @@ static struct dentry *autofs4_lookup_expiring(struct dentry *dentry) | |||
226 | const unsigned char *str = name->name; | 221 | const unsigned char *str = name->name; |
227 | struct list_head *p, *head; | 222 | struct list_head *p, *head; |
228 | 223 | ||
229 | spin_lock(&autofs4_lock); | ||
230 | spin_lock(&sbi->lookup_lock); | 224 | spin_lock(&sbi->lookup_lock); |
231 | head = &sbi->expiring_list; | 225 | head = &sbi->expiring_list; |
232 | list_for_each(p, head) { | 226 | list_for_each(p, head) { |
@@ -259,14 +253,12 @@ static struct dentry *autofs4_lookup_expiring(struct dentry *dentry) | |||
259 | dget_dlock(expiring); | 253 | dget_dlock(expiring); |
260 | spin_unlock(&expiring->d_lock); | 254 | spin_unlock(&expiring->d_lock); |
261 | spin_unlock(&sbi->lookup_lock); | 255 | spin_unlock(&sbi->lookup_lock); |
262 | spin_unlock(&autofs4_lock); | ||
263 | return expiring; | 256 | return expiring; |
264 | } | 257 | } |
265 | next: | 258 | next: |
266 | spin_unlock(&expiring->d_lock); | 259 | spin_unlock(&expiring->d_lock); |
267 | } | 260 | } |
268 | spin_unlock(&sbi->lookup_lock); | 261 | spin_unlock(&sbi->lookup_lock); |
269 | spin_unlock(&autofs4_lock); | ||
270 | 262 | ||
271 | return NULL; | 263 | return NULL; |
272 | } | 264 | } |
@@ -275,17 +267,16 @@ static int autofs4_mount_wait(struct dentry *dentry) | |||
275 | { | 267 | { |
276 | struct autofs_sb_info *sbi = autofs4_sbi(dentry->d_sb); | 268 | struct autofs_sb_info *sbi = autofs4_sbi(dentry->d_sb); |
277 | struct autofs_info *ino = autofs4_dentry_ino(dentry); | 269 | struct autofs_info *ino = autofs4_dentry_ino(dentry); |
278 | int status; | 270 | int status = 0; |
279 | 271 | ||
280 | if (ino->flags & AUTOFS_INF_PENDING) { | 272 | if (ino->flags & AUTOFS_INF_PENDING) { |
281 | DPRINTK("waiting for mount name=%.*s", | 273 | DPRINTK("waiting for mount name=%.*s", |
282 | dentry->d_name.len, dentry->d_name.name); | 274 | dentry->d_name.len, dentry->d_name.name); |
283 | status = autofs4_wait(sbi, dentry, NFY_MOUNT); | 275 | status = autofs4_wait(sbi, dentry, NFY_MOUNT); |
284 | DPRINTK("mount wait done status=%d", status); | 276 | DPRINTK("mount wait done status=%d", status); |
285 | ino->last_used = jiffies; | ||
286 | return status; | ||
287 | } | 277 | } |
288 | return 0; | 278 | ino->last_used = jiffies; |
279 | return status; | ||
289 | } | 280 | } |
290 | 281 | ||
291 | static int do_expire_wait(struct dentry *dentry) | 282 | static int do_expire_wait(struct dentry *dentry) |
@@ -319,9 +310,12 @@ static struct dentry *autofs4_mountpoint_changed(struct path *path) | |||
319 | */ | 310 | */ |
320 | if (autofs_type_indirect(sbi->type) && d_unhashed(dentry)) { | 311 | if (autofs_type_indirect(sbi->type) && d_unhashed(dentry)) { |
321 | struct dentry *parent = dentry->d_parent; | 312 | struct dentry *parent = dentry->d_parent; |
313 | struct autofs_info *ino; | ||
322 | struct dentry *new = d_lookup(parent, &dentry->d_name); | 314 | struct dentry *new = d_lookup(parent, &dentry->d_name); |
323 | if (!new) | 315 | if (!new) |
324 | return NULL; | 316 | return NULL; |
317 | ino = autofs4_dentry_ino(new); | ||
318 | ino->last_used = jiffies; | ||
325 | dput(path->dentry); | 319 | dput(path->dentry); |
326 | path->dentry = new; | 320 | path->dentry = new; |
327 | } | 321 | } |
@@ -338,18 +332,6 @@ static struct vfsmount *autofs4_d_automount(struct path *path) | |||
338 | DPRINTK("dentry=%p %.*s", | 332 | DPRINTK("dentry=%p %.*s", |
339 | dentry, dentry->d_name.len, dentry->d_name.name); | 333 | dentry, dentry->d_name.len, dentry->d_name.name); |
340 | 334 | ||
341 | /* | ||
342 | * Someone may have manually umounted this or it was a submount | ||
343 | * that has gone away. | ||
344 | */ | ||
345 | spin_lock(&dentry->d_lock); | ||
346 | if (!d_mountpoint(dentry) && list_empty(&dentry->d_subdirs)) { | ||
347 | if (!(dentry->d_flags & DCACHE_MANAGE_TRANSIT) && | ||
348 | (dentry->d_flags & DCACHE_NEED_AUTOMOUNT)) | ||
349 | __managed_dentry_set_transit(path->dentry); | ||
350 | } | ||
351 | spin_unlock(&dentry->d_lock); | ||
352 | |||
353 | /* The daemon never triggers a mount. */ | 335 | /* The daemon never triggers a mount. */ |
354 | if (autofs4_oz_mode(sbi)) | 336 | if (autofs4_oz_mode(sbi)) |
355 | return NULL; | 337 | return NULL; |
@@ -418,18 +400,17 @@ static struct vfsmount *autofs4_d_automount(struct path *path) | |||
418 | done: | 400 | done: |
419 | if (!(ino->flags & AUTOFS_INF_EXPIRING)) { | 401 | if (!(ino->flags & AUTOFS_INF_EXPIRING)) { |
420 | /* | 402 | /* |
421 | * Any needed mounting has been completed and the path updated | 403 | * Any needed mounting has been completed and the path |
422 | * so turn this into a normal dentry so we don't continually | 404 | * updated so clear DCACHE_NEED_AUTOMOUNT so we don't |
423 | * call ->d_automount() and ->d_manage(). | 405 | * call ->d_automount() on rootless multi-mounts since |
424 | */ | 406 | * it can lead to an incorrect ELOOP error return. |
425 | spin_lock(&dentry->d_lock); | 407 | * |
426 | __managed_dentry_clear_transit(dentry); | ||
427 | /* | ||
428 | * Only clear DMANAGED_AUTOMOUNT for rootless multi-mounts and | 408 | * Only clear DMANAGED_AUTOMOUNT for rootless multi-mounts and |
429 | * symlinks as in all other cases the dentry will be covered by | 409 | * symlinks as in all other cases the dentry will be covered by |
430 | * an actual mount so ->d_automount() won't be called during | 410 | * an actual mount so ->d_automount() won't be called during |
431 | * the follow. | 411 | * the follow. |
432 | */ | 412 | */ |
413 | spin_lock(&dentry->d_lock); | ||
433 | if ((!d_mountpoint(dentry) && | 414 | if ((!d_mountpoint(dentry) && |
434 | !list_empty(&dentry->d_subdirs)) || | 415 | !list_empty(&dentry->d_subdirs)) || |
435 | (dentry->d_inode && S_ISLNK(dentry->d_inode->i_mode))) | 416 | (dentry->d_inode && S_ISLNK(dentry->d_inode->i_mode))) |
@@ -455,6 +436,8 @@ int autofs4_d_manage(struct dentry *dentry, bool rcu_walk) | |||
455 | 436 | ||
456 | /* The daemon never waits. */ | 437 | /* The daemon never waits. */ |
457 | if (autofs4_oz_mode(sbi)) { | 438 | if (autofs4_oz_mode(sbi)) { |
439 | if (rcu_walk) | ||
440 | return 0; | ||
458 | if (!d_mountpoint(dentry)) | 441 | if (!d_mountpoint(dentry)) |
459 | return -EISDIR; | 442 | return -EISDIR; |
460 | return 0; | 443 | return 0; |
@@ -612,12 +595,12 @@ static int autofs4_dir_unlink(struct inode *dir, struct dentry *dentry) | |||
612 | 595 | ||
613 | dir->i_mtime = CURRENT_TIME; | 596 | dir->i_mtime = CURRENT_TIME; |
614 | 597 | ||
615 | spin_lock(&autofs4_lock); | 598 | spin_lock(&sbi->lookup_lock); |
616 | autofs4_add_expiring(dentry); | 599 | __autofs4_add_expiring(dentry); |
617 | spin_lock(&dentry->d_lock); | 600 | spin_lock(&dentry->d_lock); |
618 | __d_drop(dentry); | 601 | __d_drop(dentry); |
619 | spin_unlock(&dentry->d_lock); | 602 | spin_unlock(&dentry->d_lock); |
620 | spin_unlock(&autofs4_lock); | 603 | spin_unlock(&sbi->lookup_lock); |
621 | 604 | ||
622 | return 0; | 605 | return 0; |
623 | } | 606 | } |
@@ -686,20 +669,17 @@ static int autofs4_dir_rmdir(struct inode *dir, struct dentry *dentry) | |||
686 | if (!autofs4_oz_mode(sbi)) | 669 | if (!autofs4_oz_mode(sbi)) |
687 | return -EACCES; | 670 | return -EACCES; |
688 | 671 | ||
689 | spin_lock(&autofs4_lock); | ||
690 | spin_lock(&sbi->lookup_lock); | 672 | spin_lock(&sbi->lookup_lock); |
691 | spin_lock(&dentry->d_lock); | 673 | spin_lock(&dentry->d_lock); |
692 | if (!list_empty(&dentry->d_subdirs)) { | 674 | if (!list_empty(&dentry->d_subdirs)) { |
693 | spin_unlock(&dentry->d_lock); | 675 | spin_unlock(&dentry->d_lock); |
694 | spin_unlock(&sbi->lookup_lock); | 676 | spin_unlock(&sbi->lookup_lock); |
695 | spin_unlock(&autofs4_lock); | ||
696 | return -ENOTEMPTY; | 677 | return -ENOTEMPTY; |
697 | } | 678 | } |
698 | __autofs4_add_expiring(dentry); | 679 | __autofs4_add_expiring(dentry); |
699 | spin_unlock(&sbi->lookup_lock); | ||
700 | __d_drop(dentry); | 680 | __d_drop(dentry); |
701 | spin_unlock(&dentry->d_lock); | 681 | spin_unlock(&dentry->d_lock); |
702 | spin_unlock(&autofs4_lock); | 682 | spin_unlock(&sbi->lookup_lock); |
703 | 683 | ||
704 | if (sbi->version < 5) | 684 | if (sbi->version < 5) |
705 | autofs_clear_leaf_automount_flags(dentry); | 685 | autofs_clear_leaf_automount_flags(dentry); |
diff --git a/fs/autofs4/waitq.c b/fs/autofs4/waitq.c index 56010056b2e6..25435987d6ae 100644 --- a/fs/autofs4/waitq.c +++ b/fs/autofs4/waitq.c | |||
@@ -197,12 +197,12 @@ rename_retry: | |||
197 | 197 | ||
198 | seq = read_seqbegin(&rename_lock); | 198 | seq = read_seqbegin(&rename_lock); |
199 | rcu_read_lock(); | 199 | rcu_read_lock(); |
200 | spin_lock(&autofs4_lock); | 200 | spin_lock(&sbi->fs_lock); |
201 | for (tmp = dentry ; tmp != root ; tmp = tmp->d_parent) | 201 | for (tmp = dentry ; tmp != root ; tmp = tmp->d_parent) |
202 | len += tmp->d_name.len + 1; | 202 | len += tmp->d_name.len + 1; |
203 | 203 | ||
204 | if (!len || --len > NAME_MAX) { | 204 | if (!len || --len > NAME_MAX) { |
205 | spin_unlock(&autofs4_lock); | 205 | spin_unlock(&sbi->fs_lock); |
206 | rcu_read_unlock(); | 206 | rcu_read_unlock(); |
207 | if (read_seqretry(&rename_lock, seq)) | 207 | if (read_seqretry(&rename_lock, seq)) |
208 | goto rename_retry; | 208 | goto rename_retry; |
@@ -218,7 +218,7 @@ rename_retry: | |||
218 | p -= tmp->d_name.len; | 218 | p -= tmp->d_name.len; |
219 | strncpy(p, tmp->d_name.name, tmp->d_name.len); | 219 | strncpy(p, tmp->d_name.name, tmp->d_name.len); |
220 | } | 220 | } |
221 | spin_unlock(&autofs4_lock); | 221 | spin_unlock(&sbi->fs_lock); |
222 | rcu_read_unlock(); | 222 | rcu_read_unlock(); |
223 | if (read_seqretry(&rename_lock, seq)) | 223 | if (read_seqretry(&rename_lock, seq)) |
224 | goto rename_retry; | 224 | goto rename_retry; |
diff --git a/fs/befs/linuxvfs.c b/fs/befs/linuxvfs.c index b1d0c794747b..06457ed8f3e7 100644 --- a/fs/befs/linuxvfs.c +++ b/fs/befs/linuxvfs.c | |||
@@ -75,7 +75,6 @@ static const struct inode_operations befs_dir_inode_operations = { | |||
75 | 75 | ||
76 | static const struct address_space_operations befs_aops = { | 76 | static const struct address_space_operations befs_aops = { |
77 | .readpage = befs_readpage, | 77 | .readpage = befs_readpage, |
78 | .sync_page = block_sync_page, | ||
79 | .bmap = befs_bmap, | 78 | .bmap = befs_bmap, |
80 | }; | 79 | }; |
81 | 80 | ||
diff --git a/fs/bfs/file.c b/fs/bfs/file.c index eb67edd0f8ea..f20e8a71062f 100644 --- a/fs/bfs/file.c +++ b/fs/bfs/file.c | |||
@@ -186,7 +186,6 @@ static sector_t bfs_bmap(struct address_space *mapping, sector_t block) | |||
186 | const struct address_space_operations bfs_aops = { | 186 | const struct address_space_operations bfs_aops = { |
187 | .readpage = bfs_readpage, | 187 | .readpage = bfs_readpage, |
188 | .writepage = bfs_writepage, | 188 | .writepage = bfs_writepage, |
189 | .sync_page = block_sync_page, | ||
190 | .write_begin = bfs_write_begin, | 189 | .write_begin = bfs_write_begin, |
191 | .write_end = generic_write_end, | 190 | .write_end = generic_write_end, |
192 | .bmap = bfs_bmap, | 191 | .bmap = bfs_bmap, |
diff --git a/fs/binfmt_elf.c b/fs/binfmt_elf.c index b2fae009a4b7..f34078d702d3 100644 --- a/fs/binfmt_elf.c +++ b/fs/binfmt_elf.c | |||
@@ -1906,7 +1906,7 @@ static int elf_core_dump(struct coredump_params *cprm) | |||
1906 | segs = current->mm->map_count; | 1906 | segs = current->mm->map_count; |
1907 | segs += elf_core_extra_phdrs(); | 1907 | segs += elf_core_extra_phdrs(); |
1908 | 1908 | ||
1909 | gate_vma = get_gate_vma(current); | 1909 | gate_vma = get_gate_vma(current->mm); |
1910 | if (gate_vma != NULL) | 1910 | if (gate_vma != NULL) |
1911 | segs++; | 1911 | segs++; |
1912 | 1912 | ||
diff --git a/fs/bio-integrity.c b/fs/bio-integrity.c index e49cce234c65..9c5e6b2cd11a 100644 --- a/fs/bio-integrity.c +++ b/fs/bio-integrity.c | |||
@@ -761,6 +761,9 @@ int bioset_integrity_create(struct bio_set *bs, int pool_size) | |||
761 | { | 761 | { |
762 | unsigned int max_slab = vecs_to_idx(BIO_MAX_PAGES); | 762 | unsigned int max_slab = vecs_to_idx(BIO_MAX_PAGES); |
763 | 763 | ||
764 | if (bs->bio_integrity_pool) | ||
765 | return 0; | ||
766 | |||
764 | bs->bio_integrity_pool = | 767 | bs->bio_integrity_pool = |
765 | mempool_create_slab_pool(pool_size, bip_slab[max_slab].slab); | 768 | mempool_create_slab_pool(pool_size, bip_slab[max_slab].slab); |
766 | 769 | ||
@@ -43,7 +43,7 @@ static mempool_t *bio_split_pool __read_mostly; | |||
43 | * unsigned short | 43 | * unsigned short |
44 | */ | 44 | */ |
45 | #define BV(x) { .nr_vecs = x, .name = "biovec-"__stringify(x) } | 45 | #define BV(x) { .nr_vecs = x, .name = "biovec-"__stringify(x) } |
46 | struct biovec_slab bvec_slabs[BIOVEC_NR_POOLS] __read_mostly = { | 46 | static struct biovec_slab bvec_slabs[BIOVEC_NR_POOLS] __read_mostly = { |
47 | BV(1), BV(4), BV(16), BV(64), BV(128), BV(BIO_MAX_PAGES), | 47 | BV(1), BV(4), BV(16), BV(64), BV(128), BV(BIO_MAX_PAGES), |
48 | }; | 48 | }; |
49 | #undef BV | 49 | #undef BV |
@@ -1636,9 +1636,6 @@ struct bio_set *bioset_create(unsigned int pool_size, unsigned int front_pad) | |||
1636 | if (!bs->bio_pool) | 1636 | if (!bs->bio_pool) |
1637 | goto bad; | 1637 | goto bad; |
1638 | 1638 | ||
1639 | if (bioset_integrity_create(bs, pool_size)) | ||
1640 | goto bad; | ||
1641 | |||
1642 | if (!biovec_create_pools(bs, pool_size)) | 1639 | if (!biovec_create_pools(bs, pool_size)) |
1643 | return bs; | 1640 | return bs; |
1644 | 1641 | ||
@@ -1656,12 +1653,10 @@ static void __init biovec_init_slabs(void) | |||
1656 | int size; | 1653 | int size; |
1657 | struct biovec_slab *bvs = bvec_slabs + i; | 1654 | struct biovec_slab *bvs = bvec_slabs + i; |
1658 | 1655 | ||
1659 | #ifndef CONFIG_BLK_DEV_INTEGRITY | ||
1660 | if (bvs->nr_vecs <= BIO_INLINE_VECS) { | 1656 | if (bvs->nr_vecs <= BIO_INLINE_VECS) { |
1661 | bvs->slab = NULL; | 1657 | bvs->slab = NULL; |
1662 | continue; | 1658 | continue; |
1663 | } | 1659 | } |
1664 | #endif | ||
1665 | 1660 | ||
1666 | size = bvs->nr_vecs * sizeof(struct bio_vec); | 1661 | size = bvs->nr_vecs * sizeof(struct bio_vec); |
1667 | bvs->slab = kmem_cache_create(bvs->name, size, 0, | 1662 | bvs->slab = kmem_cache_create(bvs->name, size, 0, |
@@ -1684,6 +1679,9 @@ static int __init init_bio(void) | |||
1684 | if (!fs_bio_set) | 1679 | if (!fs_bio_set) |
1685 | panic("bio: can't allocate bios\n"); | 1680 | panic("bio: can't allocate bios\n"); |
1686 | 1681 | ||
1682 | if (bioset_integrity_create(fs_bio_set, BIO_POOL_SIZE)) | ||
1683 | panic("bio: can't create integrity pool\n"); | ||
1684 | |||
1687 | bio_split_pool = mempool_create_kmalloc_pool(BIO_SPLIT_ENTRIES, | 1685 | bio_split_pool = mempool_create_kmalloc_pool(BIO_SPLIT_ENTRIES, |
1688 | sizeof(struct bio_pair)); | 1686 | sizeof(struct bio_pair)); |
1689 | if (!bio_split_pool) | 1687 | if (!bio_split_pool) |
diff --git a/fs/block_dev.c b/fs/block_dev.c index 889287019599..c1511c674f53 100644 --- a/fs/block_dev.c +++ b/fs/block_dev.c | |||
@@ -55,11 +55,13 @@ EXPORT_SYMBOL(I_BDEV); | |||
55 | static void bdev_inode_switch_bdi(struct inode *inode, | 55 | static void bdev_inode_switch_bdi(struct inode *inode, |
56 | struct backing_dev_info *dst) | 56 | struct backing_dev_info *dst) |
57 | { | 57 | { |
58 | spin_lock(&inode_lock); | 58 | spin_lock(&inode_wb_list_lock); |
59 | spin_lock(&inode->i_lock); | ||
59 | inode->i_data.backing_dev_info = dst; | 60 | inode->i_data.backing_dev_info = dst; |
60 | if (inode->i_state & I_DIRTY) | 61 | if (inode->i_state & I_DIRTY) |
61 | list_move(&inode->i_wb_list, &dst->wb.b_dirty); | 62 | list_move(&inode->i_wb_list, &dst->wb.b_dirty); |
62 | spin_unlock(&inode_lock); | 63 | spin_unlock(&inode->i_lock); |
64 | spin_unlock(&inode_wb_list_lock); | ||
63 | } | 65 | } |
64 | 66 | ||
65 | static sector_t max_block(struct block_device *bdev) | 67 | static sector_t max_block(struct block_device *bdev) |
@@ -1087,6 +1089,7 @@ static int __blkdev_get(struct block_device *bdev, fmode_t mode, int for_part) | |||
1087 | if (!disk) | 1089 | if (!disk) |
1088 | goto out; | 1090 | goto out; |
1089 | 1091 | ||
1092 | disk_block_events(disk); | ||
1090 | mutex_lock_nested(&bdev->bd_mutex, for_part); | 1093 | mutex_lock_nested(&bdev->bd_mutex, for_part); |
1091 | if (!bdev->bd_openers) { | 1094 | if (!bdev->bd_openers) { |
1092 | bdev->bd_disk = disk; | 1095 | bdev->bd_disk = disk; |
@@ -1108,10 +1111,11 @@ static int __blkdev_get(struct block_device *bdev, fmode_t mode, int for_part) | |||
1108 | */ | 1111 | */ |
1109 | disk_put_part(bdev->bd_part); | 1112 | disk_put_part(bdev->bd_part); |
1110 | bdev->bd_part = NULL; | 1113 | bdev->bd_part = NULL; |
1111 | module_put(disk->fops->owner); | ||
1112 | put_disk(disk); | ||
1113 | bdev->bd_disk = NULL; | 1114 | bdev->bd_disk = NULL; |
1114 | mutex_unlock(&bdev->bd_mutex); | 1115 | mutex_unlock(&bdev->bd_mutex); |
1116 | disk_unblock_events(disk); | ||
1117 | module_put(disk->fops->owner); | ||
1118 | put_disk(disk); | ||
1115 | goto restart; | 1119 | goto restart; |
1116 | } | 1120 | } |
1117 | if (ret) | 1121 | if (ret) |
@@ -1148,9 +1152,6 @@ static int __blkdev_get(struct block_device *bdev, fmode_t mode, int for_part) | |||
1148 | bd_set_size(bdev, (loff_t)bdev->bd_part->nr_sects << 9); | 1152 | bd_set_size(bdev, (loff_t)bdev->bd_part->nr_sects << 9); |
1149 | } | 1153 | } |
1150 | } else { | 1154 | } else { |
1151 | module_put(disk->fops->owner); | ||
1152 | put_disk(disk); | ||
1153 | disk = NULL; | ||
1154 | if (bdev->bd_contains == bdev) { | 1155 | if (bdev->bd_contains == bdev) { |
1155 | if (bdev->bd_disk->fops->open) { | 1156 | if (bdev->bd_disk->fops->open) { |
1156 | ret = bdev->bd_disk->fops->open(bdev, mode); | 1157 | ret = bdev->bd_disk->fops->open(bdev, mode); |
@@ -1160,11 +1161,15 @@ static int __blkdev_get(struct block_device *bdev, fmode_t mode, int for_part) | |||
1160 | if (bdev->bd_invalidated) | 1161 | if (bdev->bd_invalidated) |
1161 | rescan_partitions(bdev->bd_disk, bdev); | 1162 | rescan_partitions(bdev->bd_disk, bdev); |
1162 | } | 1163 | } |
1164 | /* only one opener holds refs to the module and disk */ | ||
1165 | module_put(disk->fops->owner); | ||
1166 | put_disk(disk); | ||
1163 | } | 1167 | } |
1164 | bdev->bd_openers++; | 1168 | bdev->bd_openers++; |
1165 | if (for_part) | 1169 | if (for_part) |
1166 | bdev->bd_part_count++; | 1170 | bdev->bd_part_count++; |
1167 | mutex_unlock(&bdev->bd_mutex); | 1171 | mutex_unlock(&bdev->bd_mutex); |
1172 | disk_unblock_events(disk); | ||
1168 | return 0; | 1173 | return 0; |
1169 | 1174 | ||
1170 | out_clear: | 1175 | out_clear: |
@@ -1177,10 +1182,10 @@ static int __blkdev_get(struct block_device *bdev, fmode_t mode, int for_part) | |||
1177 | bdev->bd_contains = NULL; | 1182 | bdev->bd_contains = NULL; |
1178 | out_unlock_bdev: | 1183 | out_unlock_bdev: |
1179 | mutex_unlock(&bdev->bd_mutex); | 1184 | mutex_unlock(&bdev->bd_mutex); |
1180 | out: | 1185 | disk_unblock_events(disk); |
1181 | if (disk) | 1186 | module_put(disk->fops->owner); |
1182 | module_put(disk->fops->owner); | ||
1183 | put_disk(disk); | 1187 | put_disk(disk); |
1188 | out: | ||
1184 | bdput(bdev); | 1189 | bdput(bdev); |
1185 | 1190 | ||
1186 | return ret; | 1191 | return ret; |
@@ -1446,14 +1451,13 @@ int blkdev_put(struct block_device *bdev, fmode_t mode) | |||
1446 | if (bdev_free) { | 1451 | if (bdev_free) { |
1447 | if (bdev->bd_write_holder) { | 1452 | if (bdev->bd_write_holder) { |
1448 | disk_unblock_events(bdev->bd_disk); | 1453 | disk_unblock_events(bdev->bd_disk); |
1449 | bdev->bd_write_holder = false; | ||
1450 | } else | ||
1451 | disk_check_events(bdev->bd_disk); | 1454 | disk_check_events(bdev->bd_disk); |
1455 | bdev->bd_write_holder = false; | ||
1456 | } | ||
1452 | } | 1457 | } |
1453 | 1458 | ||
1454 | mutex_unlock(&bdev->bd_mutex); | 1459 | mutex_unlock(&bdev->bd_mutex); |
1455 | } else | 1460 | } |
1456 | disk_check_events(bdev->bd_disk); | ||
1457 | 1461 | ||
1458 | return __blkdev_put(bdev, mode, 0); | 1462 | return __blkdev_put(bdev, mode, 0); |
1459 | } | 1463 | } |
@@ -1527,7 +1531,6 @@ static int blkdev_releasepage(struct page *page, gfp_t wait) | |||
1527 | static const struct address_space_operations def_blk_aops = { | 1531 | static const struct address_space_operations def_blk_aops = { |
1528 | .readpage = blkdev_readpage, | 1532 | .readpage = blkdev_readpage, |
1529 | .writepage = blkdev_writepage, | 1533 | .writepage = blkdev_writepage, |
1530 | .sync_page = block_sync_page, | ||
1531 | .write_begin = blkdev_write_begin, | 1534 | .write_begin = blkdev_write_begin, |
1532 | .write_end = blkdev_write_end, | 1535 | .write_end = blkdev_write_end, |
1533 | .writepages = generic_writepages, | 1536 | .writepages = generic_writepages, |
diff --git a/fs/btrfs/acl.c b/fs/btrfs/acl.c index 9c949348510b..de34bfad9ec3 100644 --- a/fs/btrfs/acl.c +++ b/fs/btrfs/acl.c | |||
@@ -170,7 +170,7 @@ static int btrfs_xattr_acl_set(struct dentry *dentry, const char *name, | |||
170 | int ret; | 170 | int ret; |
171 | struct posix_acl *acl = NULL; | 171 | struct posix_acl *acl = NULL; |
172 | 172 | ||
173 | if (!is_owner_or_cap(dentry->d_inode)) | 173 | if (!inode_owner_or_capable(dentry->d_inode)) |
174 | return -EPERM; | 174 | return -EPERM; |
175 | 175 | ||
176 | if (!IS_POSIXACL(dentry->d_inode)) | 176 | if (!IS_POSIXACL(dentry->d_inode)) |
diff --git a/fs/btrfs/disk-io.c b/fs/btrfs/disk-io.c index 100b07f021b4..830d261d0e6b 100644 --- a/fs/btrfs/disk-io.c +++ b/fs/btrfs/disk-io.c | |||
@@ -847,7 +847,6 @@ static const struct address_space_operations btree_aops = { | |||
847 | .writepages = btree_writepages, | 847 | .writepages = btree_writepages, |
848 | .releasepage = btree_releasepage, | 848 | .releasepage = btree_releasepage, |
849 | .invalidatepage = btree_invalidatepage, | 849 | .invalidatepage = btree_invalidatepage, |
850 | .sync_page = block_sync_page, | ||
851 | #ifdef CONFIG_MIGRATION | 850 | #ifdef CONFIG_MIGRATION |
852 | .migratepage = btree_migratepage, | 851 | .migratepage = btree_migratepage, |
853 | #endif | 852 | #endif |
@@ -1331,82 +1330,6 @@ static int btrfs_congested_fn(void *congested_data, int bdi_bits) | |||
1331 | } | 1330 | } |
1332 | 1331 | ||
1333 | /* | 1332 | /* |
1334 | * this unplugs every device on the box, and it is only used when page | ||
1335 | * is null | ||
1336 | */ | ||
1337 | static void __unplug_io_fn(struct backing_dev_info *bdi, struct page *page) | ||
1338 | { | ||
1339 | struct btrfs_device *device; | ||
1340 | struct btrfs_fs_info *info; | ||
1341 | |||
1342 | info = (struct btrfs_fs_info *)bdi->unplug_io_data; | ||
1343 | list_for_each_entry(device, &info->fs_devices->devices, dev_list) { | ||
1344 | if (!device->bdev) | ||
1345 | continue; | ||
1346 | |||
1347 | bdi = blk_get_backing_dev_info(device->bdev); | ||
1348 | if (bdi->unplug_io_fn) | ||
1349 | bdi->unplug_io_fn(bdi, page); | ||
1350 | } | ||
1351 | } | ||
1352 | |||
1353 | static void btrfs_unplug_io_fn(struct backing_dev_info *bdi, struct page *page) | ||
1354 | { | ||
1355 | struct inode *inode; | ||
1356 | struct extent_map_tree *em_tree; | ||
1357 | struct extent_map *em; | ||
1358 | struct address_space *mapping; | ||
1359 | u64 offset; | ||
1360 | |||
1361 | /* the generic O_DIRECT read code does this */ | ||
1362 | if (1 || !page) { | ||
1363 | __unplug_io_fn(bdi, page); | ||
1364 | return; | ||
1365 | } | ||
1366 | |||
1367 | /* | ||
1368 | * page->mapping may change at any time. Get a consistent copy | ||
1369 | * and use that for everything below | ||
1370 | */ | ||
1371 | smp_mb(); | ||
1372 | mapping = page->mapping; | ||
1373 | if (!mapping) | ||
1374 | return; | ||
1375 | |||
1376 | inode = mapping->host; | ||
1377 | |||
1378 | /* | ||
1379 | * don't do the expensive searching for a small number of | ||
1380 | * devices | ||
1381 | */ | ||
1382 | if (BTRFS_I(inode)->root->fs_info->fs_devices->open_devices <= 2) { | ||
1383 | __unplug_io_fn(bdi, page); | ||
1384 | return; | ||
1385 | } | ||
1386 | |||
1387 | offset = page_offset(page); | ||
1388 | |||
1389 | em_tree = &BTRFS_I(inode)->extent_tree; | ||
1390 | read_lock(&em_tree->lock); | ||
1391 | em = lookup_extent_mapping(em_tree, offset, PAGE_CACHE_SIZE); | ||
1392 | read_unlock(&em_tree->lock); | ||
1393 | if (!em) { | ||
1394 | __unplug_io_fn(bdi, page); | ||
1395 | return; | ||
1396 | } | ||
1397 | |||
1398 | if (em->block_start >= EXTENT_MAP_LAST_BYTE) { | ||
1399 | free_extent_map(em); | ||
1400 | __unplug_io_fn(bdi, page); | ||
1401 | return; | ||
1402 | } | ||
1403 | offset = offset - em->start; | ||
1404 | btrfs_unplug_page(&BTRFS_I(inode)->root->fs_info->mapping_tree, | ||
1405 | em->block_start + offset, page); | ||
1406 | free_extent_map(em); | ||
1407 | } | ||
1408 | |||
1409 | /* | ||
1410 | * If this fails, caller must call bdi_destroy() to get rid of the | 1333 | * If this fails, caller must call bdi_destroy() to get rid of the |
1411 | * bdi again. | 1334 | * bdi again. |
1412 | */ | 1335 | */ |
@@ -1420,8 +1343,6 @@ static int setup_bdi(struct btrfs_fs_info *info, struct backing_dev_info *bdi) | |||
1420 | return err; | 1343 | return err; |
1421 | 1344 | ||
1422 | bdi->ra_pages = default_backing_dev_info.ra_pages; | 1345 | bdi->ra_pages = default_backing_dev_info.ra_pages; |
1423 | bdi->unplug_io_fn = btrfs_unplug_io_fn; | ||
1424 | bdi->unplug_io_data = info; | ||
1425 | bdi->congested_fn = btrfs_congested_fn; | 1346 | bdi->congested_fn = btrfs_congested_fn; |
1426 | bdi->congested_data = info; | 1347 | bdi->congested_data = info; |
1427 | return 0; | 1348 | return 0; |
diff --git a/fs/btrfs/extent_io.c b/fs/btrfs/extent_io.c index 714adc4ac4c2..b5b92824a271 100644 --- a/fs/btrfs/extent_io.c +++ b/fs/btrfs/extent_io.c | |||
@@ -2188,7 +2188,7 @@ static int __extent_writepage(struct page *page, struct writeback_control *wbc, | |||
2188 | unsigned long nr_written = 0; | 2188 | unsigned long nr_written = 0; |
2189 | 2189 | ||
2190 | if (wbc->sync_mode == WB_SYNC_ALL) | 2190 | if (wbc->sync_mode == WB_SYNC_ALL) |
2191 | write_flags = WRITE_SYNC_PLUG; | 2191 | write_flags = WRITE_SYNC; |
2192 | else | 2192 | else |
2193 | write_flags = WRITE; | 2193 | write_flags = WRITE; |
2194 | 2194 | ||
diff --git a/fs/btrfs/inode.c b/fs/btrfs/inode.c index 512c3d1da083..119520bdb9a5 100644 --- a/fs/btrfs/inode.c +++ b/fs/btrfs/inode.c | |||
@@ -7340,7 +7340,6 @@ static const struct address_space_operations btrfs_aops = { | |||
7340 | .writepage = btrfs_writepage, | 7340 | .writepage = btrfs_writepage, |
7341 | .writepages = btrfs_writepages, | 7341 | .writepages = btrfs_writepages, |
7342 | .readpages = btrfs_readpages, | 7342 | .readpages = btrfs_readpages, |
7343 | .sync_page = block_sync_page, | ||
7344 | .direct_IO = btrfs_direct_IO, | 7343 | .direct_IO = btrfs_direct_IO, |
7345 | .invalidatepage = btrfs_invalidatepage, | 7344 | .invalidatepage = btrfs_invalidatepage, |
7346 | .releasepage = btrfs_releasepage, | 7345 | .releasepage = btrfs_releasepage, |
diff --git a/fs/btrfs/ioctl.c b/fs/btrfs/ioctl.c index 5fdb2abc4fa7..d1bace3df9b6 100644 --- a/fs/btrfs/ioctl.c +++ b/fs/btrfs/ioctl.c | |||
@@ -158,7 +158,7 @@ static int btrfs_ioctl_setflags(struct file *file, void __user *arg) | |||
158 | FS_SYNC_FL | FS_DIRSYNC_FL)) | 158 | FS_SYNC_FL | FS_DIRSYNC_FL)) |
159 | return -EOPNOTSUPP; | 159 | return -EOPNOTSUPP; |
160 | 160 | ||
161 | if (!is_owner_or_cap(inode)) | 161 | if (!inode_owner_or_capable(inode)) |
162 | return -EACCES; | 162 | return -EACCES; |
163 | 163 | ||
164 | mutex_lock(&inode->i_mutex); | 164 | mutex_lock(&inode->i_mutex); |
@@ -1077,7 +1077,7 @@ static noinline int btrfs_ioctl_subvol_setflags(struct file *file, | |||
1077 | if (flags & ~BTRFS_SUBVOL_RDONLY) | 1077 | if (flags & ~BTRFS_SUBVOL_RDONLY) |
1078 | return -EOPNOTSUPP; | 1078 | return -EOPNOTSUPP; |
1079 | 1079 | ||
1080 | if (!is_owner_or_cap(inode)) | 1080 | if (!inode_owner_or_capable(inode)) |
1081 | return -EACCES; | 1081 | return -EACCES; |
1082 | 1082 | ||
1083 | down_write(&root->fs_info->subvol_sem); | 1083 | down_write(&root->fs_info->subvol_sem); |
diff --git a/fs/btrfs/volumes.c b/fs/btrfs/volumes.c index dd13eb81ee40..9d554e8e6583 100644 --- a/fs/btrfs/volumes.c +++ b/fs/btrfs/volumes.c | |||
@@ -162,7 +162,6 @@ static noinline int run_scheduled_bios(struct btrfs_device *device) | |||
162 | struct bio *cur; | 162 | struct bio *cur; |
163 | int again = 0; | 163 | int again = 0; |
164 | unsigned long num_run; | 164 | unsigned long num_run; |
165 | unsigned long num_sync_run; | ||
166 | unsigned long batch_run = 0; | 165 | unsigned long batch_run = 0; |
167 | unsigned long limit; | 166 | unsigned long limit; |
168 | unsigned long last_waited = 0; | 167 | unsigned long last_waited = 0; |
@@ -173,11 +172,6 @@ static noinline int run_scheduled_bios(struct btrfs_device *device) | |||
173 | limit = btrfs_async_submit_limit(fs_info); | 172 | limit = btrfs_async_submit_limit(fs_info); |
174 | limit = limit * 2 / 3; | 173 | limit = limit * 2 / 3; |
175 | 174 | ||
176 | /* we want to make sure that every time we switch from the sync | ||
177 | * list to the normal list, we unplug | ||
178 | */ | ||
179 | num_sync_run = 0; | ||
180 | |||
181 | loop: | 175 | loop: |
182 | spin_lock(&device->io_lock); | 176 | spin_lock(&device->io_lock); |
183 | 177 | ||
@@ -223,15 +217,6 @@ loop_lock: | |||
223 | 217 | ||
224 | spin_unlock(&device->io_lock); | 218 | spin_unlock(&device->io_lock); |
225 | 219 | ||
226 | /* | ||
227 | * if we're doing the regular priority list, make sure we unplug | ||
228 | * for any high prio bios we've sent down | ||
229 | */ | ||
230 | if (pending_bios == &device->pending_bios && num_sync_run > 0) { | ||
231 | num_sync_run = 0; | ||
232 | blk_run_backing_dev(bdi, NULL); | ||
233 | } | ||
234 | |||
235 | while (pending) { | 220 | while (pending) { |
236 | 221 | ||
237 | rmb(); | 222 | rmb(); |
@@ -259,19 +244,11 @@ loop_lock: | |||
259 | 244 | ||
260 | BUG_ON(atomic_read(&cur->bi_cnt) == 0); | 245 | BUG_ON(atomic_read(&cur->bi_cnt) == 0); |
261 | 246 | ||
262 | if (cur->bi_rw & REQ_SYNC) | ||
263 | num_sync_run++; | ||
264 | |||
265 | submit_bio(cur->bi_rw, cur); | 247 | submit_bio(cur->bi_rw, cur); |
266 | num_run++; | 248 | num_run++; |
267 | batch_run++; | 249 | batch_run++; |
268 | if (need_resched()) { | 250 | if (need_resched()) |
269 | if (num_sync_run) { | ||
270 | blk_run_backing_dev(bdi, NULL); | ||
271 | num_sync_run = 0; | ||
272 | } | ||
273 | cond_resched(); | 251 | cond_resched(); |
274 | } | ||
275 | 252 | ||
276 | /* | 253 | /* |
277 | * we made progress, there is more work to do and the bdi | 254 | * we made progress, there is more work to do and the bdi |
@@ -304,13 +281,8 @@ loop_lock: | |||
304 | * against it before looping | 281 | * against it before looping |
305 | */ | 282 | */ |
306 | last_waited = ioc->last_waited; | 283 | last_waited = ioc->last_waited; |
307 | if (need_resched()) { | 284 | if (need_resched()) |
308 | if (num_sync_run) { | ||
309 | blk_run_backing_dev(bdi, NULL); | ||
310 | num_sync_run = 0; | ||
311 | } | ||
312 | cond_resched(); | 285 | cond_resched(); |
313 | } | ||
314 | continue; | 286 | continue; |
315 | } | 287 | } |
316 | spin_lock(&device->io_lock); | 288 | spin_lock(&device->io_lock); |
@@ -323,22 +295,6 @@ loop_lock: | |||
323 | } | 295 | } |
324 | } | 296 | } |
325 | 297 | ||
326 | if (num_sync_run) { | ||
327 | num_sync_run = 0; | ||
328 | blk_run_backing_dev(bdi, NULL); | ||
329 | } | ||
330 | /* | ||
331 | * IO has already been through a long path to get here. Checksumming, | ||
332 | * async helper threads, perhaps compression. We've done a pretty | ||
333 | * good job of collecting a batch of IO and should just unplug | ||
334 | * the device right away. | ||
335 | * | ||
336 | * This will help anyone who is waiting on the IO, they might have | ||
337 | * already unplugged, but managed to do so before the bio they | ||
338 | * cared about found its way down here. | ||
339 | */ | ||
340 | blk_run_backing_dev(bdi, NULL); | ||
341 | |||
342 | cond_resched(); | 298 | cond_resched(); |
343 | if (again) | 299 | if (again) |
344 | goto loop; | 300 | goto loop; |
@@ -2955,7 +2911,7 @@ static int find_live_mirror(struct map_lookup *map, int first, int num, | |||
2955 | static int __btrfs_map_block(struct btrfs_mapping_tree *map_tree, int rw, | 2911 | static int __btrfs_map_block(struct btrfs_mapping_tree *map_tree, int rw, |
2956 | u64 logical, u64 *length, | 2912 | u64 logical, u64 *length, |
2957 | struct btrfs_multi_bio **multi_ret, | 2913 | struct btrfs_multi_bio **multi_ret, |
2958 | int mirror_num, struct page *unplug_page) | 2914 | int mirror_num) |
2959 | { | 2915 | { |
2960 | struct extent_map *em; | 2916 | struct extent_map *em; |
2961 | struct map_lookup *map; | 2917 | struct map_lookup *map; |
@@ -2987,11 +2943,6 @@ again: | |||
2987 | em = lookup_extent_mapping(em_tree, logical, *length); | 2943 | em = lookup_extent_mapping(em_tree, logical, *length); |
2988 | read_unlock(&em_tree->lock); | 2944 | read_unlock(&em_tree->lock); |
2989 | 2945 | ||
2990 | if (!em && unplug_page) { | ||
2991 | kfree(multi); | ||
2992 | return 0; | ||
2993 | } | ||
2994 | |||
2995 | if (!em) { | 2946 | if (!em) { |
2996 | printk(KERN_CRIT "unable to find logical %llu len %llu\n", | 2947 | printk(KERN_CRIT "unable to find logical %llu len %llu\n", |
2997 | (unsigned long long)logical, | 2948 | (unsigned long long)logical, |
@@ -3047,13 +2998,13 @@ again: | |||
3047 | *length = em->len - offset; | 2998 | *length = em->len - offset; |
3048 | } | 2999 | } |
3049 | 3000 | ||
3050 | if (!multi_ret && !unplug_page) | 3001 | if (!multi_ret) |
3051 | goto out; | 3002 | goto out; |
3052 | 3003 | ||
3053 | num_stripes = 1; | 3004 | num_stripes = 1; |
3054 | stripe_index = 0; | 3005 | stripe_index = 0; |
3055 | if (map->type & BTRFS_BLOCK_GROUP_RAID1) { | 3006 | if (map->type & BTRFS_BLOCK_GROUP_RAID1) { |
3056 | if (unplug_page || (rw & REQ_WRITE)) | 3007 | if (rw & REQ_WRITE) |
3057 | num_stripes = map->num_stripes; | 3008 | num_stripes = map->num_stripes; |
3058 | else if (mirror_num) | 3009 | else if (mirror_num) |
3059 | stripe_index = mirror_num - 1; | 3010 | stripe_index = mirror_num - 1; |
@@ -3075,7 +3026,7 @@ again: | |||
3075 | stripe_index = do_div(stripe_nr, factor); | 3026 | stripe_index = do_div(stripe_nr, factor); |
3076 | stripe_index *= map->sub_stripes; | 3027 | stripe_index *= map->sub_stripes; |
3077 | 3028 | ||
3078 | if (unplug_page || (rw & REQ_WRITE)) | 3029 | if (rw & REQ_WRITE) |
3079 | num_stripes = map->sub_stripes; | 3030 | num_stripes = map->sub_stripes; |
3080 | else if (mirror_num) | 3031 | else if (mirror_num) |
3081 | stripe_index += mirror_num - 1; | 3032 | stripe_index += mirror_num - 1; |
@@ -3095,22 +3046,10 @@ again: | |||
3095 | BUG_ON(stripe_index >= map->num_stripes); | 3046 | BUG_ON(stripe_index >= map->num_stripes); |
3096 | 3047 | ||
3097 | for (i = 0; i < num_stripes; i++) { | 3048 | for (i = 0; i < num_stripes; i++) { |
3098 | if (unplug_page) { | 3049 | multi->stripes[i].physical = |
3099 | struct btrfs_device *device; | 3050 | map->stripes[stripe_index].physical + |
3100 | struct backing_dev_info *bdi; | 3051 | stripe_offset + stripe_nr * map->stripe_len; |
3101 | 3052 | multi->stripes[i].dev = map->stripes[stripe_index].dev; | |
3102 | device = map->stripes[stripe_index].dev; | ||
3103 | if (device->bdev) { | ||
3104 | bdi = blk_get_backing_dev_info(device->bdev); | ||
3105 | if (bdi->unplug_io_fn) | ||
3106 | bdi->unplug_io_fn(bdi, unplug_page); | ||
3107 | } | ||
3108 | } else { | ||
3109 | multi->stripes[i].physical = | ||
3110 | map->stripes[stripe_index].physical + | ||
3111 | stripe_offset + stripe_nr * map->stripe_len; | ||
3112 | multi->stripes[i].dev = map->stripes[stripe_index].dev; | ||
3113 | } | ||
3114 | stripe_index++; | 3053 | stripe_index++; |
3115 | } | 3054 | } |
3116 | if (multi_ret) { | 3055 | if (multi_ret) { |
@@ -3128,7 +3067,7 @@ int btrfs_map_block(struct btrfs_mapping_tree *map_tree, int rw, | |||
3128 | struct btrfs_multi_bio **multi_ret, int mirror_num) | 3067 | struct btrfs_multi_bio **multi_ret, int mirror_num) |
3129 | { | 3068 | { |
3130 | return __btrfs_map_block(map_tree, rw, logical, length, multi_ret, | 3069 | return __btrfs_map_block(map_tree, rw, logical, length, multi_ret, |
3131 | mirror_num, NULL); | 3070 | mirror_num); |
3132 | } | 3071 | } |
3133 | 3072 | ||
3134 | int btrfs_rmap_block(struct btrfs_mapping_tree *map_tree, | 3073 | int btrfs_rmap_block(struct btrfs_mapping_tree *map_tree, |
@@ -3196,14 +3135,6 @@ int btrfs_rmap_block(struct btrfs_mapping_tree *map_tree, | |||
3196 | return 0; | 3135 | return 0; |
3197 | } | 3136 | } |
3198 | 3137 | ||
3199 | int btrfs_unplug_page(struct btrfs_mapping_tree *map_tree, | ||
3200 | u64 logical, struct page *page) | ||
3201 | { | ||
3202 | u64 length = PAGE_CACHE_SIZE; | ||
3203 | return __btrfs_map_block(map_tree, READ, logical, &length, | ||
3204 | NULL, 0, page); | ||
3205 | } | ||
3206 | |||
3207 | static void end_bio_multi_stripe(struct bio *bio, int err) | 3138 | static void end_bio_multi_stripe(struct bio *bio, int err) |
3208 | { | 3139 | { |
3209 | struct btrfs_multi_bio *multi = bio->bi_private; | 3140 | struct btrfs_multi_bio *multi = bio->bi_private; |
diff --git a/fs/buffer.c b/fs/buffer.c index 2219a76e2caf..a08bb8e61c6f 100644 --- a/fs/buffer.c +++ b/fs/buffer.c | |||
@@ -54,23 +54,15 @@ init_buffer(struct buffer_head *bh, bh_end_io_t *handler, void *private) | |||
54 | } | 54 | } |
55 | EXPORT_SYMBOL(init_buffer); | 55 | EXPORT_SYMBOL(init_buffer); |
56 | 56 | ||
57 | static int sync_buffer(void *word) | 57 | static int sleep_on_buffer(void *word) |
58 | { | 58 | { |
59 | struct block_device *bd; | ||
60 | struct buffer_head *bh | ||
61 | = container_of(word, struct buffer_head, b_state); | ||
62 | |||
63 | smp_mb(); | ||
64 | bd = bh->b_bdev; | ||
65 | if (bd) | ||
66 | blk_run_address_space(bd->bd_inode->i_mapping); | ||
67 | io_schedule(); | 59 | io_schedule(); |
68 | return 0; | 60 | return 0; |
69 | } | 61 | } |
70 | 62 | ||
71 | void __lock_buffer(struct buffer_head *bh) | 63 | void __lock_buffer(struct buffer_head *bh) |
72 | { | 64 | { |
73 | wait_on_bit_lock(&bh->b_state, BH_Lock, sync_buffer, | 65 | wait_on_bit_lock(&bh->b_state, BH_Lock, sleep_on_buffer, |
74 | TASK_UNINTERRUPTIBLE); | 66 | TASK_UNINTERRUPTIBLE); |
75 | } | 67 | } |
76 | EXPORT_SYMBOL(__lock_buffer); | 68 | EXPORT_SYMBOL(__lock_buffer); |
@@ -90,7 +82,7 @@ EXPORT_SYMBOL(unlock_buffer); | |||
90 | */ | 82 | */ |
91 | void __wait_on_buffer(struct buffer_head * bh) | 83 | void __wait_on_buffer(struct buffer_head * bh) |
92 | { | 84 | { |
93 | wait_on_bit(&bh->b_state, BH_Lock, sync_buffer, TASK_UNINTERRUPTIBLE); | 85 | wait_on_bit(&bh->b_state, BH_Lock, sleep_on_buffer, TASK_UNINTERRUPTIBLE); |
94 | } | 86 | } |
95 | EXPORT_SYMBOL(__wait_on_buffer); | 87 | EXPORT_SYMBOL(__wait_on_buffer); |
96 | 88 | ||
@@ -749,10 +741,12 @@ static int fsync_buffers_list(spinlock_t *lock, struct list_head *list) | |||
749 | { | 741 | { |
750 | struct buffer_head *bh; | 742 | struct buffer_head *bh; |
751 | struct list_head tmp; | 743 | struct list_head tmp; |
752 | struct address_space *mapping, *prev_mapping = NULL; | 744 | struct address_space *mapping; |
753 | int err = 0, err2; | 745 | int err = 0, err2; |
746 | struct blk_plug plug; | ||
754 | 747 | ||
755 | INIT_LIST_HEAD(&tmp); | 748 | INIT_LIST_HEAD(&tmp); |
749 | blk_start_plug(&plug); | ||
756 | 750 | ||
757 | spin_lock(lock); | 751 | spin_lock(lock); |
758 | while (!list_empty(list)) { | 752 | while (!list_empty(list)) { |
@@ -775,7 +769,7 @@ static int fsync_buffers_list(spinlock_t *lock, struct list_head *list) | |||
775 | * still in flight on potentially older | 769 | * still in flight on potentially older |
776 | * contents. | 770 | * contents. |
777 | */ | 771 | */ |
778 | write_dirty_buffer(bh, WRITE_SYNC_PLUG); | 772 | write_dirty_buffer(bh, WRITE_SYNC); |
779 | 773 | ||
780 | /* | 774 | /* |
781 | * Kick off IO for the previous mapping. Note | 775 | * Kick off IO for the previous mapping. Note |
@@ -783,16 +777,16 @@ static int fsync_buffers_list(spinlock_t *lock, struct list_head *list) | |||
783 | * wait_on_buffer() will do that for us | 777 | * wait_on_buffer() will do that for us |
784 | * through sync_buffer(). | 778 | * through sync_buffer(). |
785 | */ | 779 | */ |
786 | if (prev_mapping && prev_mapping != mapping) | ||
787 | blk_run_address_space(prev_mapping); | ||
788 | prev_mapping = mapping; | ||
789 | |||
790 | brelse(bh); | 780 | brelse(bh); |
791 | spin_lock(lock); | 781 | spin_lock(lock); |
792 | } | 782 | } |
793 | } | 783 | } |
794 | } | 784 | } |
795 | 785 | ||
786 | spin_unlock(lock); | ||
787 | blk_finish_plug(&plug); | ||
788 | spin_lock(lock); | ||
789 | |||
796 | while (!list_empty(&tmp)) { | 790 | while (!list_empty(&tmp)) { |
797 | bh = BH_ENTRY(tmp.prev); | 791 | bh = BH_ENTRY(tmp.prev); |
798 | get_bh(bh); | 792 | get_bh(bh); |
@@ -1144,7 +1138,7 @@ __getblk_slow(struct block_device *bdev, sector_t block, int size) | |||
1144 | * inode list. | 1138 | * inode list. |
1145 | * | 1139 | * |
1146 | * mark_buffer_dirty() is atomic. It takes bh->b_page->mapping->private_lock, | 1140 | * mark_buffer_dirty() is atomic. It takes bh->b_page->mapping->private_lock, |
1147 | * mapping->tree_lock and the global inode_lock. | 1141 | * mapping->tree_lock and mapping->host->i_lock. |
1148 | */ | 1142 | */ |
1149 | void mark_buffer_dirty(struct buffer_head *bh) | 1143 | void mark_buffer_dirty(struct buffer_head *bh) |
1150 | { | 1144 | { |
@@ -1614,14 +1608,8 @@ EXPORT_SYMBOL(unmap_underlying_metadata); | |||
1614 | * prevents this contention from occurring. | 1608 | * prevents this contention from occurring. |
1615 | * | 1609 | * |
1616 | * If block_write_full_page() is called with wbc->sync_mode == | 1610 | * If block_write_full_page() is called with wbc->sync_mode == |
1617 | * WB_SYNC_ALL, the writes are posted using WRITE_SYNC_PLUG; this | 1611 | * WB_SYNC_ALL, the writes are posted using WRITE_SYNC; this |
1618 | * causes the writes to be flagged as synchronous writes, but the | 1612 | * causes the writes to be flagged as synchronous writes. |
1619 | * block device queue will NOT be unplugged, since usually many pages | ||
1620 | * will be pushed to the out before the higher-level caller actually | ||
1621 | * waits for the writes to be completed. The various wait functions, | ||
1622 | * such as wait_on_writeback_range() will ultimately call sync_page() | ||
1623 | * which will ultimately call blk_run_backing_dev(), which will end up | ||
1624 | * unplugging the device queue. | ||
1625 | */ | 1613 | */ |
1626 | static int __block_write_full_page(struct inode *inode, struct page *page, | 1614 | static int __block_write_full_page(struct inode *inode, struct page *page, |
1627 | get_block_t *get_block, struct writeback_control *wbc, | 1615 | get_block_t *get_block, struct writeback_control *wbc, |
@@ -1634,7 +1622,7 @@ static int __block_write_full_page(struct inode *inode, struct page *page, | |||
1634 | const unsigned blocksize = 1 << inode->i_blkbits; | 1622 | const unsigned blocksize = 1 << inode->i_blkbits; |
1635 | int nr_underway = 0; | 1623 | int nr_underway = 0; |
1636 | int write_op = (wbc->sync_mode == WB_SYNC_ALL ? | 1624 | int write_op = (wbc->sync_mode == WB_SYNC_ALL ? |
1637 | WRITE_SYNC_PLUG : WRITE); | 1625 | WRITE_SYNC : WRITE); |
1638 | 1626 | ||
1639 | BUG_ON(!PageLocked(page)); | 1627 | BUG_ON(!PageLocked(page)); |
1640 | 1628 | ||
@@ -3138,17 +3126,6 @@ out: | |||
3138 | } | 3126 | } |
3139 | EXPORT_SYMBOL(try_to_free_buffers); | 3127 | EXPORT_SYMBOL(try_to_free_buffers); |
3140 | 3128 | ||
3141 | void block_sync_page(struct page *page) | ||
3142 | { | ||
3143 | struct address_space *mapping; | ||
3144 | |||
3145 | smp_mb(); | ||
3146 | mapping = page_mapping(page); | ||
3147 | if (mapping) | ||
3148 | blk_run_backing_dev(mapping->backing_dev_info, page); | ||
3149 | } | ||
3150 | EXPORT_SYMBOL(block_sync_page); | ||
3151 | |||
3152 | /* | 3129 | /* |
3153 | * There are no bdflush tunables left. But distributions are | 3130 | * There are no bdflush tunables left. But distributions are |
3154 | * still running obsolete flush daemons, so we terminate them here. | 3131 | * still running obsolete flush daemons, so we terminate them here. |
diff --git a/fs/cifs/file.c b/fs/cifs/file.c index e964b1cd5dd0..c27d236738fc 100644 --- a/fs/cifs/file.c +++ b/fs/cifs/file.c | |||
@@ -1569,34 +1569,6 @@ int cifs_fsync(struct file *file, int datasync) | |||
1569 | return rc; | 1569 | return rc; |
1570 | } | 1570 | } |
1571 | 1571 | ||
1572 | /* static void cifs_sync_page(struct page *page) | ||
1573 | { | ||
1574 | struct address_space *mapping; | ||
1575 | struct inode *inode; | ||
1576 | unsigned long index = page->index; | ||
1577 | unsigned int rpages = 0; | ||
1578 | int rc = 0; | ||
1579 | |||
1580 | cFYI(1, "sync page %p", page); | ||
1581 | mapping = page->mapping; | ||
1582 | if (!mapping) | ||
1583 | return 0; | ||
1584 | inode = mapping->host; | ||
1585 | if (!inode) | ||
1586 | return; */ | ||
1587 | |||
1588 | /* fill in rpages then | ||
1589 | result = cifs_pagein_inode(inode, index, rpages); */ /* BB finish */ | ||
1590 | |||
1591 | /* cFYI(1, "rpages is %d for sync page of Index %ld", rpages, index); | ||
1592 | |||
1593 | #if 0 | ||
1594 | if (rc < 0) | ||
1595 | return rc; | ||
1596 | return 0; | ||
1597 | #endif | ||
1598 | } */ | ||
1599 | |||
1600 | /* | 1572 | /* |
1601 | * As file closes, flush all cached write data for this inode checking | 1573 | * As file closes, flush all cached write data for this inode checking |
1602 | * for write behind errors. | 1574 | * for write behind errors. |
@@ -2510,7 +2482,6 @@ const struct address_space_operations cifs_addr_ops = { | |||
2510 | .set_page_dirty = __set_page_dirty_nobuffers, | 2482 | .set_page_dirty = __set_page_dirty_nobuffers, |
2511 | .releasepage = cifs_release_page, | 2483 | .releasepage = cifs_release_page, |
2512 | .invalidatepage = cifs_invalidate_page, | 2484 | .invalidatepage = cifs_invalidate_page, |
2513 | /* .sync_page = cifs_sync_page, */ | ||
2514 | /* .direct_IO = */ | 2485 | /* .direct_IO = */ |
2515 | }; | 2486 | }; |
2516 | 2487 | ||
@@ -2528,6 +2499,5 @@ const struct address_space_operations cifs_addr_ops_smallbuf = { | |||
2528 | .set_page_dirty = __set_page_dirty_nobuffers, | 2499 | .set_page_dirty = __set_page_dirty_nobuffers, |
2529 | .releasepage = cifs_release_page, | 2500 | .releasepage = cifs_release_page, |
2530 | .invalidatepage = cifs_invalidate_page, | 2501 | .invalidatepage = cifs_invalidate_page, |
2531 | /* .sync_page = cifs_sync_page, */ | ||
2532 | /* .direct_IO = */ | 2502 | /* .direct_IO = */ |
2533 | }; | 2503 | }; |
diff --git a/fs/coda/sysctl.c b/fs/coda/sysctl.c index 06d27a41807f..af56ad56a89a 100644 --- a/fs/coda/sysctl.c +++ b/fs/coda/sysctl.c | |||
@@ -61,4 +61,13 @@ void coda_sysctl_clean(void) | |||
61 | fs_table_header = NULL; | 61 | fs_table_header = NULL; |
62 | } | 62 | } |
63 | } | 63 | } |
64 | |||
65 | #else | ||
66 | void coda_sysctl_init(void) | ||
67 | { | ||
68 | } | ||
69 | |||
70 | void coda_sysctl_clean(void) | ||
71 | { | ||
72 | } | ||
64 | #endif | 73 | #endif |
diff --git a/fs/direct-io.c b/fs/direct-io.c index dcb5577cde1d..ac5f164170e3 100644 --- a/fs/direct-io.c +++ b/fs/direct-io.c | |||
@@ -1110,11 +1110,8 @@ direct_io_worker(int rw, struct kiocb *iocb, struct inode *inode, | |||
1110 | ((rw & READ) || (dio->result == dio->size))) | 1110 | ((rw & READ) || (dio->result == dio->size))) |
1111 | ret = -EIOCBQUEUED; | 1111 | ret = -EIOCBQUEUED; |
1112 | 1112 | ||
1113 | if (ret != -EIOCBQUEUED) { | 1113 | if (ret != -EIOCBQUEUED) |
1114 | /* All IO is now issued, send it on its way */ | ||
1115 | blk_run_address_space(inode->i_mapping); | ||
1116 | dio_await_completion(dio); | 1114 | dio_await_completion(dio); |
1117 | } | ||
1118 | 1115 | ||
1119 | /* | 1116 | /* |
1120 | * Sync will always be dropping the final ref and completing the | 1117 | * Sync will always be dropping the final ref and completing the |
@@ -1176,7 +1173,7 @@ __blockdev_direct_IO(int rw, struct kiocb *iocb, struct inode *inode, | |||
1176 | struct dio *dio; | 1173 | struct dio *dio; |
1177 | 1174 | ||
1178 | if (rw & WRITE) | 1175 | if (rw & WRITE) |
1179 | rw = WRITE_ODIRECT_PLUG; | 1176 | rw = WRITE_ODIRECT; |
1180 | 1177 | ||
1181 | if (bdev) | 1178 | if (bdev) |
1182 | bdev_blkbits = blksize_bits(bdev_logical_block_size(bdev)); | 1179 | bdev_blkbits = blksize_bits(bdev_logical_block_size(bdev)); |
diff --git a/fs/drop_caches.c b/fs/drop_caches.c index 2195c213ab2f..98b77c89494c 100644 --- a/fs/drop_caches.c +++ b/fs/drop_caches.c | |||
@@ -8,6 +8,7 @@ | |||
8 | #include <linux/writeback.h> | 8 | #include <linux/writeback.h> |
9 | #include <linux/sysctl.h> | 9 | #include <linux/sysctl.h> |
10 | #include <linux/gfp.h> | 10 | #include <linux/gfp.h> |
11 | #include "internal.h" | ||
11 | 12 | ||
12 | /* A global variable is a bit ugly, but it keeps the code simple */ | 13 | /* A global variable is a bit ugly, but it keeps the code simple */ |
13 | int sysctl_drop_caches; | 14 | int sysctl_drop_caches; |
@@ -16,20 +17,23 @@ static void drop_pagecache_sb(struct super_block *sb, void *unused) | |||
16 | { | 17 | { |
17 | struct inode *inode, *toput_inode = NULL; | 18 | struct inode *inode, *toput_inode = NULL; |
18 | 19 | ||
19 | spin_lock(&inode_lock); | 20 | spin_lock(&inode_sb_list_lock); |
20 | list_for_each_entry(inode, &sb->s_inodes, i_sb_list) { | 21 | list_for_each_entry(inode, &sb->s_inodes, i_sb_list) { |
21 | if (inode->i_state & (I_FREEING|I_WILL_FREE|I_NEW)) | 22 | spin_lock(&inode->i_lock); |
22 | continue; | 23 | if ((inode->i_state & (I_FREEING|I_WILL_FREE|I_NEW)) || |
23 | if (inode->i_mapping->nrpages == 0) | 24 | (inode->i_mapping->nrpages == 0)) { |
25 | spin_unlock(&inode->i_lock); | ||
24 | continue; | 26 | continue; |
27 | } | ||
25 | __iget(inode); | 28 | __iget(inode); |
26 | spin_unlock(&inode_lock); | 29 | spin_unlock(&inode->i_lock); |
30 | spin_unlock(&inode_sb_list_lock); | ||
27 | invalidate_mapping_pages(inode->i_mapping, 0, -1); | 31 | invalidate_mapping_pages(inode->i_mapping, 0, -1); |
28 | iput(toput_inode); | 32 | iput(toput_inode); |
29 | toput_inode = inode; | 33 | toput_inode = inode; |
30 | spin_lock(&inode_lock); | 34 | spin_lock(&inode_sb_list_lock); |
31 | } | 35 | } |
32 | spin_unlock(&inode_lock); | 36 | spin_unlock(&inode_sb_list_lock); |
33 | iput(toput_inode); | 37 | iput(toput_inode); |
34 | } | 38 | } |
35 | 39 | ||
@@ -45,7 +49,11 @@ static void drop_slab(void) | |||
45 | int drop_caches_sysctl_handler(ctl_table *table, int write, | 49 | int drop_caches_sysctl_handler(ctl_table *table, int write, |
46 | void __user *buffer, size_t *length, loff_t *ppos) | 50 | void __user *buffer, size_t *length, loff_t *ppos) |
47 | { | 51 | { |
48 | proc_dointvec_minmax(table, write, buffer, length, ppos); | 52 | int ret; |
53 | |||
54 | ret = proc_dointvec_minmax(table, write, buffer, length, ppos); | ||
55 | if (ret) | ||
56 | return ret; | ||
49 | if (write) { | 57 | if (write) { |
50 | if (sysctl_drop_caches & 1) | 58 | if (sysctl_drop_caches & 1) |
51 | iterate_supers(drop_pagecache_sb, NULL); | 59 | iterate_supers(drop_pagecache_sb, NULL); |
diff --git a/fs/efs/inode.c b/fs/efs/inode.c index a8e7797b9477..9c13412e6c99 100644 --- a/fs/efs/inode.c +++ b/fs/efs/inode.c | |||
@@ -23,7 +23,6 @@ static sector_t _efs_bmap(struct address_space *mapping, sector_t block) | |||
23 | } | 23 | } |
24 | static const struct address_space_operations efs_aops = { | 24 | static const struct address_space_operations efs_aops = { |
25 | .readpage = efs_readpage, | 25 | .readpage = efs_readpage, |
26 | .sync_page = block_sync_page, | ||
27 | .bmap = _efs_bmap | 26 | .bmap = _efs_bmap |
28 | }; | 27 | }; |
29 | 28 | ||
diff --git a/fs/exofs/common.h b/fs/exofs/common.h index f0d520312d8b..5e74ad3d4009 100644 --- a/fs/exofs/common.h +++ b/fs/exofs/common.h | |||
@@ -53,10 +53,14 @@ | |||
53 | #define EXOFS_ROOT_ID 0x10002 /* object ID for root directory */ | 53 | #define EXOFS_ROOT_ID 0x10002 /* object ID for root directory */ |
54 | 54 | ||
55 | /* exofs Application specific page/attribute */ | 55 | /* exofs Application specific page/attribute */ |
56 | /* Inode attrs */ | ||
56 | # define EXOFS_APAGE_FS_DATA (OSD_APAGE_APP_DEFINED_FIRST + 3) | 57 | # define EXOFS_APAGE_FS_DATA (OSD_APAGE_APP_DEFINED_FIRST + 3) |
57 | # define EXOFS_ATTR_INODE_DATA 1 | 58 | # define EXOFS_ATTR_INODE_DATA 1 |
58 | # define EXOFS_ATTR_INODE_FILE_LAYOUT 2 | 59 | # define EXOFS_ATTR_INODE_FILE_LAYOUT 2 |
59 | # define EXOFS_ATTR_INODE_DIR_LAYOUT 3 | 60 | # define EXOFS_ATTR_INODE_DIR_LAYOUT 3 |
61 | /* Partition attrs */ | ||
62 | # define EXOFS_APAGE_SB_DATA (0xF0000000U + 3) | ||
63 | # define EXOFS_ATTR_SB_STATS 1 | ||
60 | 64 | ||
61 | /* | 65 | /* |
62 | * The maximum number of files we can have is limited by the size of the | 66 | * The maximum number of files we can have is limited by the size of the |
@@ -86,8 +90,8 @@ enum { | |||
86 | */ | 90 | */ |
87 | enum {EXOFS_FSCB_VER = 1, EXOFS_DT_VER = 1}; | 91 | enum {EXOFS_FSCB_VER = 1, EXOFS_DT_VER = 1}; |
88 | struct exofs_fscb { | 92 | struct exofs_fscb { |
89 | __le64 s_nextid; /* Highest object ID used */ | 93 | __le64 s_nextid; /* Only used after mkfs */ |
90 | __le64 s_numfiles; /* Number of files on fs */ | 94 | __le64 s_numfiles; /* Only used after mkfs */ |
91 | __le32 s_version; /* == EXOFS_FSCB_VER */ | 95 | __le32 s_version; /* == EXOFS_FSCB_VER */ |
92 | __le16 s_magic; /* Magic signature */ | 96 | __le16 s_magic; /* Magic signature */ |
93 | __le16 s_newfs; /* Non-zero if this is a new fs */ | 97 | __le16 s_newfs; /* Non-zero if this is a new fs */ |
@@ -98,6 +102,16 @@ struct exofs_fscb { | |||
98 | } __packed; | 102 | } __packed; |
99 | 103 | ||
100 | /* | 104 | /* |
105 | * This struct is set on the FS partition's attributes. | ||
106 | * [EXOFS_APAGE_SB_DATA, EXOFS_ATTR_SB_STATS] and is written together | ||
107 | * with the create command, to atomically persist the sb writeable information. | ||
108 | */ | ||
109 | struct exofs_sb_stats { | ||
110 | __le64 s_nextid; /* Highest object ID used */ | ||
111 | __le64 s_numfiles; /* Number of files on fs */ | ||
112 | } __packed; | ||
113 | |||
114 | /* | ||
101 | * Describes the raid used in the FS. It is part of the device table. | 115 | * Describes the raid used in the FS. It is part of the device table. |
102 | * This here is taken from the pNFS-objects definition. In exofs we | 116 | * This here is taken from the pNFS-objects definition. In exofs we |
103 | * use one raid policy through-out the filesystem. (NOTE: the funny | 117 | * use one raid policy through-out the filesystem. (NOTE: the funny |
diff --git a/fs/exofs/dir.c b/fs/exofs/dir.c index dcc941d82d67..d0941c6a1f72 100644 --- a/fs/exofs/dir.c +++ b/fs/exofs/dir.c | |||
@@ -124,7 +124,7 @@ out: | |||
124 | 124 | ||
125 | Ebadsize: | 125 | Ebadsize: |
126 | EXOFS_ERR("ERROR [exofs_check_page]: " | 126 | EXOFS_ERR("ERROR [exofs_check_page]: " |
127 | "size of directory #%lu is not a multiple of chunk size", | 127 | "size of directory(0x%lx) is not a multiple of chunk size\n", |
128 | dir->i_ino | 128 | dir->i_ino |
129 | ); | 129 | ); |
130 | goto fail; | 130 | goto fail; |
@@ -142,8 +142,8 @@ Espan: | |||
142 | goto bad_entry; | 142 | goto bad_entry; |
143 | bad_entry: | 143 | bad_entry: |
144 | EXOFS_ERR( | 144 | EXOFS_ERR( |
145 | "ERROR [exofs_check_page]: bad entry in directory #%lu: %s - " | 145 | "ERROR [exofs_check_page]: bad entry in directory(0x%lx): %s - " |
146 | "offset=%lu, inode=%llu, rec_len=%d, name_len=%d", | 146 | "offset=%lu, inode=0x%llu, rec_len=%d, name_len=%d\n", |
147 | dir->i_ino, error, (page->index<<PAGE_CACHE_SHIFT)+offs, | 147 | dir->i_ino, error, (page->index<<PAGE_CACHE_SHIFT)+offs, |
148 | _LLU(le64_to_cpu(p->inode_no)), | 148 | _LLU(le64_to_cpu(p->inode_no)), |
149 | rec_len, p->name_len); | 149 | rec_len, p->name_len); |
@@ -151,8 +151,8 @@ bad_entry: | |||
151 | Eend: | 151 | Eend: |
152 | p = (struct exofs_dir_entry *)(kaddr + offs); | 152 | p = (struct exofs_dir_entry *)(kaddr + offs); |
153 | EXOFS_ERR("ERROR [exofs_check_page]: " | 153 | EXOFS_ERR("ERROR [exofs_check_page]: " |
154 | "entry in directory #%lu spans the page boundary" | 154 | "entry in directory(0x%lx) spans the page boundary" |
155 | "offset=%lu, inode=%llu", | 155 | "offset=%lu, inode=0x%llx\n", |
156 | dir->i_ino, (page->index<<PAGE_CACHE_SHIFT)+offs, | 156 | dir->i_ino, (page->index<<PAGE_CACHE_SHIFT)+offs, |
157 | _LLU(le64_to_cpu(p->inode_no))); | 157 | _LLU(le64_to_cpu(p->inode_no))); |
158 | fail: | 158 | fail: |
@@ -261,9 +261,8 @@ exofs_readdir(struct file *filp, void *dirent, filldir_t filldir) | |||
261 | struct page *page = exofs_get_page(inode, n); | 261 | struct page *page = exofs_get_page(inode, n); |
262 | 262 | ||
263 | if (IS_ERR(page)) { | 263 | if (IS_ERR(page)) { |
264 | EXOFS_ERR("ERROR: " | 264 | EXOFS_ERR("ERROR: bad page in directory(0x%lx)\n", |
265 | "bad page in #%lu", | 265 | inode->i_ino); |
266 | inode->i_ino); | ||
267 | filp->f_pos += PAGE_CACHE_SIZE - offset; | 266 | filp->f_pos += PAGE_CACHE_SIZE - offset; |
268 | return PTR_ERR(page); | 267 | return PTR_ERR(page); |
269 | } | 268 | } |
@@ -283,7 +282,8 @@ exofs_readdir(struct file *filp, void *dirent, filldir_t filldir) | |||
283 | for (; (char *)de <= limit; de = exofs_next_entry(de)) { | 282 | for (; (char *)de <= limit; de = exofs_next_entry(de)) { |
284 | if (de->rec_len == 0) { | 283 | if (de->rec_len == 0) { |
285 | EXOFS_ERR("ERROR: " | 284 | EXOFS_ERR("ERROR: " |
286 | "zero-length directory entry"); | 285 | "zero-length entry in directory(0x%lx)\n", |
286 | inode->i_ino); | ||
287 | exofs_put_page(page); | 287 | exofs_put_page(page); |
288 | return -EIO; | 288 | return -EIO; |
289 | } | 289 | } |
@@ -342,9 +342,9 @@ struct exofs_dir_entry *exofs_find_entry(struct inode *dir, | |||
342 | kaddr += exofs_last_byte(dir, n) - reclen; | 342 | kaddr += exofs_last_byte(dir, n) - reclen; |
343 | while ((char *) de <= kaddr) { | 343 | while ((char *) de <= kaddr) { |
344 | if (de->rec_len == 0) { | 344 | if (de->rec_len == 0) { |
345 | EXOFS_ERR( | 345 | EXOFS_ERR("ERROR: zero-length entry in " |
346 | "ERROR: exofs_find_entry: " | 346 | "directory(0x%lx)\n", |
347 | "zero-length directory entry"); | 347 | dir->i_ino); |
348 | exofs_put_page(page); | 348 | exofs_put_page(page); |
349 | goto out; | 349 | goto out; |
350 | } | 350 | } |
@@ -472,7 +472,8 @@ int exofs_add_link(struct dentry *dentry, struct inode *inode) | |||
472 | } | 472 | } |
473 | if (de->rec_len == 0) { | 473 | if (de->rec_len == 0) { |
474 | EXOFS_ERR("ERROR: exofs_add_link: " | 474 | EXOFS_ERR("ERROR: exofs_add_link: " |
475 | "zero-length directory entry"); | 475 | "zero-length entry in directory(0x%lx)\n", |
476 | inode->i_ino); | ||
476 | err = -EIO; | 477 | err = -EIO; |
477 | goto out_unlock; | 478 | goto out_unlock; |
478 | } | 479 | } |
@@ -491,7 +492,8 @@ int exofs_add_link(struct dentry *dentry, struct inode *inode) | |||
491 | exofs_put_page(page); | 492 | exofs_put_page(page); |
492 | } | 493 | } |
493 | 494 | ||
494 | EXOFS_ERR("exofs_add_link: BAD dentry=%p or inode=%p", dentry, inode); | 495 | EXOFS_ERR("exofs_add_link: BAD dentry=%p or inode=0x%lx\n", |
496 | dentry, inode->i_ino); | ||
495 | return -EINVAL; | 497 | return -EINVAL; |
496 | 498 | ||
497 | got_it: | 499 | got_it: |
@@ -542,7 +544,8 @@ int exofs_delete_entry(struct exofs_dir_entry *dir, struct page *page) | |||
542 | while (de < dir) { | 544 | while (de < dir) { |
543 | if (de->rec_len == 0) { | 545 | if (de->rec_len == 0) { |
544 | EXOFS_ERR("ERROR: exofs_delete_entry:" | 546 | EXOFS_ERR("ERROR: exofs_delete_entry:" |
545 | "zero-length directory entry"); | 547 | "zero-length entry in directory(0x%lx)\n", |
548 | inode->i_ino); | ||
546 | err = -EIO; | 549 | err = -EIO; |
547 | goto out; | 550 | goto out; |
548 | } | 551 | } |
diff --git a/fs/exofs/exofs.h b/fs/exofs/exofs.h index 2dc925fa1010..c965806c2821 100644 --- a/fs/exofs/exofs.h +++ b/fs/exofs/exofs.h | |||
@@ -77,7 +77,7 @@ struct exofs_layout { | |||
77 | * our extension to the in-memory superblock | 77 | * our extension to the in-memory superblock |
78 | */ | 78 | */ |
79 | struct exofs_sb_info { | 79 | struct exofs_sb_info { |
80 | struct exofs_fscb s_fscb; /* Written often, pre-allocate*/ | 80 | struct exofs_sb_stats s_ess; /* Written often, pre-allocate*/ |
81 | int s_timeout; /* timeout for OSD operations */ | 81 | int s_timeout; /* timeout for OSD operations */ |
82 | uint64_t s_nextid; /* highest object ID used */ | 82 | uint64_t s_nextid; /* highest object ID used */ |
83 | uint32_t s_numfiles; /* number of files on fs */ | 83 | uint32_t s_numfiles; /* number of files on fs */ |
@@ -256,6 +256,8 @@ static inline int exofs_oi_read(struct exofs_i_info *oi, | |||
256 | } | 256 | } |
257 | 257 | ||
258 | /* inode.c */ | 258 | /* inode.c */ |
259 | unsigned exofs_max_io_pages(struct exofs_layout *layout, | ||
260 | unsigned expected_pages); | ||
259 | int exofs_setattr(struct dentry *, struct iattr *); | 261 | int exofs_setattr(struct dentry *, struct iattr *); |
260 | int exofs_write_begin(struct file *file, struct address_space *mapping, | 262 | int exofs_write_begin(struct file *file, struct address_space *mapping, |
261 | loff_t pos, unsigned len, unsigned flags, | 263 | loff_t pos, unsigned len, unsigned flags, |
@@ -279,7 +281,7 @@ int exofs_set_link(struct inode *, struct exofs_dir_entry *, struct page *, | |||
279 | struct inode *); | 281 | struct inode *); |
280 | 282 | ||
281 | /* super.c */ | 283 | /* super.c */ |
282 | int exofs_sync_fs(struct super_block *sb, int wait); | 284 | int exofs_sbi_write_stats(struct exofs_sb_info *sbi); |
283 | 285 | ||
284 | /********************* | 286 | /********************* |
285 | * operation vectors * | 287 | * operation vectors * |
diff --git a/fs/exofs/file.c b/fs/exofs/file.c index b905c79b4f0a..45ca323d8363 100644 --- a/fs/exofs/file.c +++ b/fs/exofs/file.c | |||
@@ -45,22 +45,8 @@ static int exofs_release_file(struct inode *inode, struct file *filp) | |||
45 | static int exofs_file_fsync(struct file *filp, int datasync) | 45 | static int exofs_file_fsync(struct file *filp, int datasync) |
46 | { | 46 | { |
47 | int ret; | 47 | int ret; |
48 | struct inode *inode = filp->f_mapping->host; | ||
49 | struct super_block *sb; | ||
50 | |||
51 | if (!(inode->i_state & I_DIRTY)) | ||
52 | return 0; | ||
53 | if (datasync && !(inode->i_state & I_DIRTY_DATASYNC)) | ||
54 | return 0; | ||
55 | |||
56 | ret = sync_inode_metadata(inode, 1); | ||
57 | |||
58 | /* This is a good place to write the sb */ | ||
59 | /* TODO: Sechedule an sb-sync on create */ | ||
60 | sb = inode->i_sb; | ||
61 | if (sb->s_dirt) | ||
62 | exofs_sync_fs(sb, 1); | ||
63 | 48 | ||
49 | ret = sync_inode_metadata(filp->f_mapping->host, 1); | ||
64 | return ret; | 50 | return ret; |
65 | } | 51 | } |
66 | 52 | ||
diff --git a/fs/exofs/inode.c b/fs/exofs/inode.c index a7555238c41a..8472c098445d 100644 --- a/fs/exofs/inode.c +++ b/fs/exofs/inode.c | |||
@@ -43,6 +43,17 @@ enum { BIO_MAX_PAGES_KMALLOC = | |||
43 | PAGE_SIZE / sizeof(struct page *), | 43 | PAGE_SIZE / sizeof(struct page *), |
44 | }; | 44 | }; |
45 | 45 | ||
46 | unsigned exofs_max_io_pages(struct exofs_layout *layout, | ||
47 | unsigned expected_pages) | ||
48 | { | ||
49 | unsigned pages = min_t(unsigned, expected_pages, MAX_PAGES_KMALLOC); | ||
50 | |||
51 | /* TODO: easily support bio chaining */ | ||
52 | pages = min_t(unsigned, pages, | ||
53 | layout->group_width * BIO_MAX_PAGES_KMALLOC); | ||
54 | return pages; | ||
55 | } | ||
56 | |||
46 | struct page_collect { | 57 | struct page_collect { |
47 | struct exofs_sb_info *sbi; | 58 | struct exofs_sb_info *sbi; |
48 | struct inode *inode; | 59 | struct inode *inode; |
@@ -97,8 +108,7 @@ static void _pcol_reset(struct page_collect *pcol) | |||
97 | 108 | ||
98 | static int pcol_try_alloc(struct page_collect *pcol) | 109 | static int pcol_try_alloc(struct page_collect *pcol) |
99 | { | 110 | { |
100 | unsigned pages = min_t(unsigned, pcol->expected_pages, | 111 | unsigned pages; |
101 | MAX_PAGES_KMALLOC); | ||
102 | 112 | ||
103 | if (!pcol->ios) { /* First time allocate io_state */ | 113 | if (!pcol->ios) { /* First time allocate io_state */ |
104 | int ret = exofs_get_io_state(&pcol->sbi->layout, &pcol->ios); | 114 | int ret = exofs_get_io_state(&pcol->sbi->layout, &pcol->ios); |
@@ -108,8 +118,7 @@ static int pcol_try_alloc(struct page_collect *pcol) | |||
108 | } | 118 | } |
109 | 119 | ||
110 | /* TODO: easily support bio chaining */ | 120 | /* TODO: easily support bio chaining */ |
111 | pages = min_t(unsigned, pages, | 121 | pages = exofs_max_io_pages(&pcol->sbi->layout, pcol->expected_pages); |
112 | pcol->sbi->layout.group_width * BIO_MAX_PAGES_KMALLOC); | ||
113 | 122 | ||
114 | for (; pages; pages >>= 1) { | 123 | for (; pages; pages >>= 1) { |
115 | pcol->pages = kmalloc(pages * sizeof(struct page *), | 124 | pcol->pages = kmalloc(pages * sizeof(struct page *), |
@@ -350,8 +359,10 @@ static int readpage_strip(void *data, struct page *page) | |||
350 | 359 | ||
351 | if (!pcol->read_4_write) | 360 | if (!pcol->read_4_write) |
352 | unlock_page(page); | 361 | unlock_page(page); |
353 | EXOFS_DBGMSG("readpage_strip(0x%lx, 0x%lx) empty page," | 362 | EXOFS_DBGMSG("readpage_strip(0x%lx) empty page len=%zx " |
354 | " splitting\n", inode->i_ino, page->index); | 363 | "read_4_write=%d index=0x%lx end_index=0x%lx " |
364 | "splitting\n", inode->i_ino, len, | ||
365 | pcol->read_4_write, page->index, end_index); | ||
355 | 366 | ||
356 | return read_exec(pcol); | 367 | return read_exec(pcol); |
357 | } | 368 | } |
@@ -722,11 +733,28 @@ int exofs_write_begin(struct file *file, struct address_space *mapping, | |||
722 | 733 | ||
723 | /* read modify write */ | 734 | /* read modify write */ |
724 | if (!PageUptodate(page) && (len != PAGE_CACHE_SIZE)) { | 735 | if (!PageUptodate(page) && (len != PAGE_CACHE_SIZE)) { |
736 | loff_t i_size = i_size_read(mapping->host); | ||
737 | pgoff_t end_index = i_size >> PAGE_CACHE_SHIFT; | ||
738 | size_t rlen; | ||
739 | |||
740 | if (page->index < end_index) | ||
741 | rlen = PAGE_CACHE_SIZE; | ||
742 | else if (page->index == end_index) | ||
743 | rlen = i_size & ~PAGE_CACHE_MASK; | ||
744 | else | ||
745 | rlen = 0; | ||
746 | |||
747 | if (!rlen) { | ||
748 | clear_highpage(page); | ||
749 | SetPageUptodate(page); | ||
750 | goto out; | ||
751 | } | ||
752 | |||
725 | ret = _readpage(page, true); | 753 | ret = _readpage(page, true); |
726 | if (ret) { | 754 | if (ret) { |
727 | /*SetPageError was done by _readpage. Is it ok?*/ | 755 | /*SetPageError was done by _readpage. Is it ok?*/ |
728 | unlock_page(page); | 756 | unlock_page(page); |
729 | EXOFS_DBGMSG("__readpage_filler failed\n"); | 757 | EXOFS_DBGMSG("__readpage failed\n"); |
730 | } | 758 | } |
731 | } | 759 | } |
732 | out: | 760 | out: |
@@ -795,7 +823,6 @@ const struct address_space_operations exofs_aops = { | |||
795 | .direct_IO = NULL, /* TODO: Should be trivial to do */ | 823 | .direct_IO = NULL, /* TODO: Should be trivial to do */ |
796 | 824 | ||
797 | /* With these NULL has special meaning or default is not exported */ | 825 | /* With these NULL has special meaning or default is not exported */ |
798 | .sync_page = NULL, | ||
799 | .get_xip_mem = NULL, | 826 | .get_xip_mem = NULL, |
800 | .migratepage = NULL, | 827 | .migratepage = NULL, |
801 | .launder_page = NULL, | 828 | .launder_page = NULL, |
@@ -1030,6 +1057,7 @@ struct inode *exofs_iget(struct super_block *sb, unsigned long ino) | |||
1030 | memcpy(oi->i_data, fcb.i_data, sizeof(fcb.i_data)); | 1057 | memcpy(oi->i_data, fcb.i_data, sizeof(fcb.i_data)); |
1031 | } | 1058 | } |
1032 | 1059 | ||
1060 | inode->i_mapping->backing_dev_info = sb->s_bdi; | ||
1033 | if (S_ISREG(inode->i_mode)) { | 1061 | if (S_ISREG(inode->i_mode)) { |
1034 | inode->i_op = &exofs_file_inode_operations; | 1062 | inode->i_op = &exofs_file_inode_operations; |
1035 | inode->i_fop = &exofs_file_operations; | 1063 | inode->i_fop = &exofs_file_operations; |
@@ -1073,6 +1101,7 @@ int __exofs_wait_obj_created(struct exofs_i_info *oi) | |||
1073 | } | 1101 | } |
1074 | return unlikely(is_bad_inode(&oi->vfs_inode)) ? -EIO : 0; | 1102 | return unlikely(is_bad_inode(&oi->vfs_inode)) ? -EIO : 0; |
1075 | } | 1103 | } |
1104 | |||
1076 | /* | 1105 | /* |
1077 | * Callback function from exofs_new_inode(). The important thing is that we | 1106 | * Callback function from exofs_new_inode(). The important thing is that we |
1078 | * set the obj_created flag so that other methods know that the object exists on | 1107 | * set the obj_created flag so that other methods know that the object exists on |
@@ -1130,7 +1159,7 @@ struct inode *exofs_new_inode(struct inode *dir, int mode) | |||
1130 | 1159 | ||
1131 | sbi = sb->s_fs_info; | 1160 | sbi = sb->s_fs_info; |
1132 | 1161 | ||
1133 | sb->s_dirt = 1; | 1162 | inode->i_mapping->backing_dev_info = sb->s_bdi; |
1134 | inode_init_owner(inode, dir, mode); | 1163 | inode_init_owner(inode, dir, mode); |
1135 | inode->i_ino = sbi->s_nextid++; | 1164 | inode->i_ino = sbi->s_nextid++; |
1136 | inode->i_blkbits = EXOFS_BLKSHIFT; | 1165 | inode->i_blkbits = EXOFS_BLKSHIFT; |
@@ -1141,6 +1170,8 @@ struct inode *exofs_new_inode(struct inode *dir, int mode) | |||
1141 | spin_unlock(&sbi->s_next_gen_lock); | 1170 | spin_unlock(&sbi->s_next_gen_lock); |
1142 | insert_inode_hash(inode); | 1171 | insert_inode_hash(inode); |
1143 | 1172 | ||
1173 | exofs_sbi_write_stats(sbi); /* Make sure new sbi->s_nextid is on disk */ | ||
1174 | |||
1144 | mark_inode_dirty(inode); | 1175 | mark_inode_dirty(inode); |
1145 | 1176 | ||
1146 | ret = exofs_get_io_state(&sbi->layout, &ios); | 1177 | ret = exofs_get_io_state(&sbi->layout, &ios); |
@@ -1271,7 +1302,8 @@ out: | |||
1271 | 1302 | ||
1272 | int exofs_write_inode(struct inode *inode, struct writeback_control *wbc) | 1303 | int exofs_write_inode(struct inode *inode, struct writeback_control *wbc) |
1273 | { | 1304 | { |
1274 | return exofs_update_inode(inode, wbc->sync_mode == WB_SYNC_ALL); | 1305 | /* FIXME: fix fsync and use wbc->sync_mode == WB_SYNC_ALL */ |
1306 | return exofs_update_inode(inode, 1); | ||
1275 | } | 1307 | } |
1276 | 1308 | ||
1277 | /* | 1309 | /* |
diff --git a/fs/exofs/super.c b/fs/exofs/super.c index 8c6c4669b381..06065bd37fc3 100644 --- a/fs/exofs/super.c +++ b/fs/exofs/super.c | |||
@@ -48,6 +48,7 @@ | |||
48 | * struct to hold what we get from mount options | 48 | * struct to hold what we get from mount options |
49 | */ | 49 | */ |
50 | struct exofs_mountopt { | 50 | struct exofs_mountopt { |
51 | bool is_osdname; | ||
51 | const char *dev_name; | 52 | const char *dev_name; |
52 | uint64_t pid; | 53 | uint64_t pid; |
53 | int timeout; | 54 | int timeout; |
@@ -56,7 +57,7 @@ struct exofs_mountopt { | |||
56 | /* | 57 | /* |
57 | * exofs-specific mount-time options. | 58 | * exofs-specific mount-time options. |
58 | */ | 59 | */ |
59 | enum { Opt_pid, Opt_to, Opt_mkfs, Opt_format, Opt_err }; | 60 | enum { Opt_name, Opt_pid, Opt_to, Opt_err }; |
60 | 61 | ||
61 | /* | 62 | /* |
62 | * Our mount-time options. These should ideally be 64-bit unsigned, but the | 63 | * Our mount-time options. These should ideally be 64-bit unsigned, but the |
@@ -64,6 +65,7 @@ enum { Opt_pid, Opt_to, Opt_mkfs, Opt_format, Opt_err }; | |||
64 | * sufficient for most applications now. | 65 | * sufficient for most applications now. |
65 | */ | 66 | */ |
66 | static match_table_t tokens = { | 67 | static match_table_t tokens = { |
68 | {Opt_name, "osdname=%s"}, | ||
67 | {Opt_pid, "pid=%u"}, | 69 | {Opt_pid, "pid=%u"}, |
68 | {Opt_to, "to=%u"}, | 70 | {Opt_to, "to=%u"}, |
69 | {Opt_err, NULL} | 71 | {Opt_err, NULL} |
@@ -94,6 +96,14 @@ static int parse_options(char *options, struct exofs_mountopt *opts) | |||
94 | 96 | ||
95 | token = match_token(p, tokens, args); | 97 | token = match_token(p, tokens, args); |
96 | switch (token) { | 98 | switch (token) { |
99 | case Opt_name: | ||
100 | opts->dev_name = match_strdup(&args[0]); | ||
101 | if (unlikely(!opts->dev_name)) { | ||
102 | EXOFS_ERR("Error allocating dev_name"); | ||
103 | return -ENOMEM; | ||
104 | } | ||
105 | opts->is_osdname = true; | ||
106 | break; | ||
97 | case Opt_pid: | 107 | case Opt_pid: |
98 | if (0 == match_strlcpy(str, &args[0], sizeof(str))) | 108 | if (0 == match_strlcpy(str, &args[0], sizeof(str))) |
99 | return -EINVAL; | 109 | return -EINVAL; |
@@ -203,6 +213,101 @@ static void destroy_inodecache(void) | |||
203 | static const struct super_operations exofs_sops; | 213 | static const struct super_operations exofs_sops; |
204 | static const struct export_operations exofs_export_ops; | 214 | static const struct export_operations exofs_export_ops; |
205 | 215 | ||
216 | static const struct osd_attr g_attr_sb_stats = ATTR_DEF( | ||
217 | EXOFS_APAGE_SB_DATA, | ||
218 | EXOFS_ATTR_SB_STATS, | ||
219 | sizeof(struct exofs_sb_stats)); | ||
220 | |||
221 | static int __sbi_read_stats(struct exofs_sb_info *sbi) | ||
222 | { | ||
223 | struct osd_attr attrs[] = { | ||
224 | [0] = g_attr_sb_stats, | ||
225 | }; | ||
226 | struct exofs_io_state *ios; | ||
227 | int ret; | ||
228 | |||
229 | ret = exofs_get_io_state(&sbi->layout, &ios); | ||
230 | if (unlikely(ret)) { | ||
231 | EXOFS_ERR("%s: exofs_get_io_state failed.\n", __func__); | ||
232 | return ret; | ||
233 | } | ||
234 | |||
235 | ios->cred = sbi->s_cred; | ||
236 | |||
237 | ios->in_attr = attrs; | ||
238 | ios->in_attr_len = ARRAY_SIZE(attrs); | ||
239 | |||
240 | ret = exofs_sbi_read(ios); | ||
241 | if (unlikely(ret)) { | ||
242 | EXOFS_ERR("Error reading super_block stats => %d\n", ret); | ||
243 | goto out; | ||
244 | } | ||
245 | |||
246 | ret = extract_attr_from_ios(ios, &attrs[0]); | ||
247 | if (ret) { | ||
248 | EXOFS_ERR("%s: extract_attr of sb_stats failed\n", __func__); | ||
249 | goto out; | ||
250 | } | ||
251 | if (attrs[0].len) { | ||
252 | struct exofs_sb_stats *ess; | ||
253 | |||
254 | if (unlikely(attrs[0].len != sizeof(*ess))) { | ||
255 | EXOFS_ERR("%s: Wrong version of exofs_sb_stats " | ||
256 | "size(%d) != expected(%zd)\n", | ||
257 | __func__, attrs[0].len, sizeof(*ess)); | ||
258 | goto out; | ||
259 | } | ||
260 | |||
261 | ess = attrs[0].val_ptr; | ||
262 | sbi->s_nextid = le64_to_cpu(ess->s_nextid); | ||
263 | sbi->s_numfiles = le32_to_cpu(ess->s_numfiles); | ||
264 | } | ||
265 | |||
266 | out: | ||
267 | exofs_put_io_state(ios); | ||
268 | return ret; | ||
269 | } | ||
270 | |||
271 | static void stats_done(struct exofs_io_state *ios, void *p) | ||
272 | { | ||
273 | exofs_put_io_state(ios); | ||
274 | /* Good thanks nothing to do anymore */ | ||
275 | } | ||
276 | |||
277 | /* Asynchronously write the stats attribute */ | ||
278 | int exofs_sbi_write_stats(struct exofs_sb_info *sbi) | ||
279 | { | ||
280 | struct osd_attr attrs[] = { | ||
281 | [0] = g_attr_sb_stats, | ||
282 | }; | ||
283 | struct exofs_io_state *ios; | ||
284 | int ret; | ||
285 | |||
286 | ret = exofs_get_io_state(&sbi->layout, &ios); | ||
287 | if (unlikely(ret)) { | ||
288 | EXOFS_ERR("%s: exofs_get_io_state failed.\n", __func__); | ||
289 | return ret; | ||
290 | } | ||
291 | |||
292 | sbi->s_ess.s_nextid = cpu_to_le64(sbi->s_nextid); | ||
293 | sbi->s_ess.s_numfiles = cpu_to_le64(sbi->s_numfiles); | ||
294 | attrs[0].val_ptr = &sbi->s_ess; | ||
295 | |||
296 | ios->cred = sbi->s_cred; | ||
297 | ios->done = stats_done; | ||
298 | ios->private = sbi; | ||
299 | ios->out_attr = attrs; | ||
300 | ios->out_attr_len = ARRAY_SIZE(attrs); | ||
301 | |||
302 | ret = exofs_sbi_write(ios); | ||
303 | if (unlikely(ret)) { | ||
304 | EXOFS_ERR("%s: exofs_sbi_write failed.\n", __func__); | ||
305 | exofs_put_io_state(ios); | ||
306 | } | ||
307 | |||
308 | return ret; | ||
309 | } | ||
310 | |||
206 | /* | 311 | /* |
207 | * Write the superblock to the OSD | 312 | * Write the superblock to the OSD |
208 | */ | 313 | */ |
@@ -213,18 +318,25 @@ int exofs_sync_fs(struct super_block *sb, int wait) | |||
213 | struct exofs_io_state *ios; | 318 | struct exofs_io_state *ios; |
214 | int ret = -ENOMEM; | 319 | int ret = -ENOMEM; |
215 | 320 | ||
216 | lock_super(sb); | 321 | fscb = kmalloc(sizeof(*fscb), GFP_KERNEL); |
322 | if (unlikely(!fscb)) | ||
323 | return -ENOMEM; | ||
324 | |||
217 | sbi = sb->s_fs_info; | 325 | sbi = sb->s_fs_info; |
218 | fscb = &sbi->s_fscb; | ||
219 | 326 | ||
327 | /* NOTE: We no longer dirty the super_block anywhere in exofs. The | ||
328 | * reason we write the fscb here on unmount is so we can stay backwards | ||
329 | * compatible with fscb->s_version == 1. (What we are not compatible | ||
330 | * with is if a new version FS crashed and then we try to mount an old | ||
331 | * version). Otherwise the exofs_fscb is read-only from mkfs time. All | ||
332 | * the writeable info is set in exofs_sbi_write_stats() above. | ||
333 | */ | ||
220 | ret = exofs_get_io_state(&sbi->layout, &ios); | 334 | ret = exofs_get_io_state(&sbi->layout, &ios); |
221 | if (ret) | 335 | if (unlikely(ret)) |
222 | goto out; | 336 | goto out; |
223 | 337 | ||
224 | /* Note: We only write the changing part of the fscb. .i.e upto the | 338 | lock_super(sb); |
225 | * the fscb->s_dev_table_oid member. There is no read-modify-write | 339 | |
226 | * here. | ||
227 | */ | ||
228 | ios->length = offsetof(struct exofs_fscb, s_dev_table_oid); | 340 | ios->length = offsetof(struct exofs_fscb, s_dev_table_oid); |
229 | memset(fscb, 0, ios->length); | 341 | memset(fscb, 0, ios->length); |
230 | fscb->s_nextid = cpu_to_le64(sbi->s_nextid); | 342 | fscb->s_nextid = cpu_to_le64(sbi->s_nextid); |
@@ -239,16 +351,17 @@ int exofs_sync_fs(struct super_block *sb, int wait) | |||
239 | ios->cred = sbi->s_cred; | 351 | ios->cred = sbi->s_cred; |
240 | 352 | ||
241 | ret = exofs_sbi_write(ios); | 353 | ret = exofs_sbi_write(ios); |
242 | if (unlikely(ret)) { | 354 | if (unlikely(ret)) |
243 | EXOFS_ERR("%s: exofs_sbi_write failed.\n", __func__); | 355 | EXOFS_ERR("%s: exofs_sbi_write failed.\n", __func__); |
244 | goto out; | 356 | else |
245 | } | 357 | sb->s_dirt = 0; |
246 | sb->s_dirt = 0; | 358 | |
247 | 359 | ||
360 | unlock_super(sb); | ||
248 | out: | 361 | out: |
249 | EXOFS_DBGMSG("s_nextid=0x%llx ret=%d\n", _LLU(sbi->s_nextid), ret); | 362 | EXOFS_DBGMSG("s_nextid=0x%llx ret=%d\n", _LLU(sbi->s_nextid), ret); |
250 | exofs_put_io_state(ios); | 363 | exofs_put_io_state(ios); |
251 | unlock_super(sb); | 364 | kfree(fscb); |
252 | return ret; | 365 | return ret; |
253 | } | 366 | } |
254 | 367 | ||
@@ -292,13 +405,14 @@ static void exofs_put_super(struct super_block *sb) | |||
292 | int num_pend; | 405 | int num_pend; |
293 | struct exofs_sb_info *sbi = sb->s_fs_info; | 406 | struct exofs_sb_info *sbi = sb->s_fs_info; |
294 | 407 | ||
295 | if (sb->s_dirt) | ||
296 | exofs_write_super(sb); | ||
297 | |||
298 | /* make sure there are no pending commands */ | 408 | /* make sure there are no pending commands */ |
299 | for (num_pend = atomic_read(&sbi->s_curr_pending); num_pend > 0; | 409 | for (num_pend = atomic_read(&sbi->s_curr_pending); num_pend > 0; |
300 | num_pend = atomic_read(&sbi->s_curr_pending)) { | 410 | num_pend = atomic_read(&sbi->s_curr_pending)) { |
301 | wait_queue_head_t wq; | 411 | wait_queue_head_t wq; |
412 | |||
413 | printk(KERN_NOTICE "%s: !!Pending operations in flight. " | ||
414 | "This is a BUG. please report to osd-dev@open-osd.org\n", | ||
415 | __func__); | ||
302 | init_waitqueue_head(&wq); | 416 | init_waitqueue_head(&wq); |
303 | wait_event_timeout(wq, | 417 | wait_event_timeout(wq, |
304 | (atomic_read(&sbi->s_curr_pending) == 0), | 418 | (atomic_read(&sbi->s_curr_pending) == 0), |
@@ -390,6 +504,23 @@ static int _read_and_match_data_map(struct exofs_sb_info *sbi, unsigned numdevs, | |||
390 | return 0; | 504 | return 0; |
391 | } | 505 | } |
392 | 506 | ||
507 | static unsigned __ra_pages(struct exofs_layout *layout) | ||
508 | { | ||
509 | const unsigned _MIN_RA = 32; /* min 128K read-ahead */ | ||
510 | unsigned ra_pages = layout->group_width * layout->stripe_unit / | ||
511 | PAGE_SIZE; | ||
512 | unsigned max_io_pages = exofs_max_io_pages(layout, ~0); | ||
513 | |||
514 | ra_pages *= 2; /* two stripes */ | ||
515 | if (ra_pages < _MIN_RA) | ||
516 | ra_pages = roundup(_MIN_RA, ra_pages / 2); | ||
517 | |||
518 | if (ra_pages > max_io_pages) | ||
519 | ra_pages = max_io_pages; | ||
520 | |||
521 | return ra_pages; | ||
522 | } | ||
523 | |||
393 | /* @odi is valid only as long as @fscb_dev is valid */ | 524 | /* @odi is valid only as long as @fscb_dev is valid */ |
394 | static int exofs_devs_2_odi(struct exofs_dt_device_info *dt_dev, | 525 | static int exofs_devs_2_odi(struct exofs_dt_device_info *dt_dev, |
395 | struct osd_dev_info *odi) | 526 | struct osd_dev_info *odi) |
@@ -495,7 +626,7 @@ static int exofs_read_lookup_dev_table(struct exofs_sb_info **psbi, | |||
495 | } | 626 | } |
496 | 627 | ||
497 | od = osduld_info_lookup(&odi); | 628 | od = osduld_info_lookup(&odi); |
498 | if (unlikely(IS_ERR(od))) { | 629 | if (IS_ERR(od)) { |
499 | ret = PTR_ERR(od); | 630 | ret = PTR_ERR(od); |
500 | EXOFS_ERR("ERROR: device requested is not found " | 631 | EXOFS_ERR("ERROR: device requested is not found " |
501 | "osd_name-%s =>%d\n", odi.osdname, ret); | 632 | "osd_name-%s =>%d\n", odi.osdname, ret); |
@@ -558,9 +689,17 @@ static int exofs_fill_super(struct super_block *sb, void *data, int silent) | |||
558 | goto free_bdi; | 689 | goto free_bdi; |
559 | 690 | ||
560 | /* use mount options to fill superblock */ | 691 | /* use mount options to fill superblock */ |
561 | od = osduld_path_lookup(opts->dev_name); | 692 | if (opts->is_osdname) { |
693 | struct osd_dev_info odi = {.systemid_len = 0}; | ||
694 | |||
695 | odi.osdname_len = strlen(opts->dev_name); | ||
696 | odi.osdname = (u8 *)opts->dev_name; | ||
697 | od = osduld_info_lookup(&odi); | ||
698 | } else { | ||
699 | od = osduld_path_lookup(opts->dev_name); | ||
700 | } | ||
562 | if (IS_ERR(od)) { | 701 | if (IS_ERR(od)) { |
563 | ret = PTR_ERR(od); | 702 | ret = -EINVAL; |
564 | goto free_sbi; | 703 | goto free_sbi; |
565 | } | 704 | } |
566 | 705 | ||
@@ -594,6 +733,7 @@ static int exofs_fill_super(struct super_block *sb, void *data, int silent) | |||
594 | goto free_sbi; | 733 | goto free_sbi; |
595 | 734 | ||
596 | sb->s_magic = le16_to_cpu(fscb.s_magic); | 735 | sb->s_magic = le16_to_cpu(fscb.s_magic); |
736 | /* NOTE: we read below to be backward compatible with old versions */ | ||
597 | sbi->s_nextid = le64_to_cpu(fscb.s_nextid); | 737 | sbi->s_nextid = le64_to_cpu(fscb.s_nextid); |
598 | sbi->s_numfiles = le32_to_cpu(fscb.s_numfiles); | 738 | sbi->s_numfiles = le32_to_cpu(fscb.s_numfiles); |
599 | 739 | ||
@@ -604,7 +744,7 @@ static int exofs_fill_super(struct super_block *sb, void *data, int silent) | |||
604 | ret = -EINVAL; | 744 | ret = -EINVAL; |
605 | goto free_sbi; | 745 | goto free_sbi; |
606 | } | 746 | } |
607 | if (le32_to_cpu(fscb.s_version) != EXOFS_FSCB_VER) { | 747 | if (le32_to_cpu(fscb.s_version) > EXOFS_FSCB_VER) { |
608 | EXOFS_ERR("ERROR: Bad FSCB version expected-%d got-%d\n", | 748 | EXOFS_ERR("ERROR: Bad FSCB version expected-%d got-%d\n", |
609 | EXOFS_FSCB_VER, le32_to_cpu(fscb.s_version)); | 749 | EXOFS_FSCB_VER, le32_to_cpu(fscb.s_version)); |
610 | ret = -EINVAL; | 750 | ret = -EINVAL; |
@@ -622,7 +762,10 @@ static int exofs_fill_super(struct super_block *sb, void *data, int silent) | |||
622 | goto free_sbi; | 762 | goto free_sbi; |
623 | } | 763 | } |
624 | 764 | ||
765 | __sbi_read_stats(sbi); | ||
766 | |||
625 | /* set up operation vectors */ | 767 | /* set up operation vectors */ |
768 | sbi->bdi.ra_pages = __ra_pages(&sbi->layout); | ||
626 | sb->s_bdi = &sbi->bdi; | 769 | sb->s_bdi = &sbi->bdi; |
627 | sb->s_fs_info = sbi; | 770 | sb->s_fs_info = sbi; |
628 | sb->s_op = &exofs_sops; | 771 | sb->s_op = &exofs_sops; |
@@ -652,6 +795,8 @@ static int exofs_fill_super(struct super_block *sb, void *data, int silent) | |||
652 | 795 | ||
653 | _exofs_print_device("Mounting", opts->dev_name, sbi->layout.s_ods[0], | 796 | _exofs_print_device("Mounting", opts->dev_name, sbi->layout.s_ods[0], |
654 | sbi->layout.s_pid); | 797 | sbi->layout.s_pid); |
798 | if (opts->is_osdname) | ||
799 | kfree(opts->dev_name); | ||
655 | return 0; | 800 | return 0; |
656 | 801 | ||
657 | free_sbi: | 802 | free_sbi: |
@@ -660,6 +805,8 @@ free_bdi: | |||
660 | EXOFS_ERR("Unable to mount exofs on %s pid=0x%llx err=%d\n", | 805 | EXOFS_ERR("Unable to mount exofs on %s pid=0x%llx err=%d\n", |
661 | opts->dev_name, sbi->layout.s_pid, ret); | 806 | opts->dev_name, sbi->layout.s_pid, ret); |
662 | exofs_free_sbi(sbi); | 807 | exofs_free_sbi(sbi); |
808 | if (opts->is_osdname) | ||
809 | kfree(opts->dev_name); | ||
663 | return ret; | 810 | return ret; |
664 | } | 811 | } |
665 | 812 | ||
@@ -677,7 +824,8 @@ static struct dentry *exofs_mount(struct file_system_type *type, | |||
677 | if (ret) | 824 | if (ret) |
678 | return ERR_PTR(ret); | 825 | return ERR_PTR(ret); |
679 | 826 | ||
680 | opts.dev_name = dev_name; | 827 | if (!opts.dev_name) |
828 | opts.dev_name = dev_name; | ||
681 | return mount_nodev(type, flags, &opts, exofs_fill_super); | 829 | return mount_nodev(type, flags, &opts, exofs_fill_super); |
682 | } | 830 | } |
683 | 831 | ||
diff --git a/fs/ext2/acl.c b/fs/ext2/acl.c index 7b4180554a62..abea5a17c764 100644 --- a/fs/ext2/acl.c +++ b/fs/ext2/acl.c | |||
@@ -406,7 +406,7 @@ ext2_xattr_set_acl(struct dentry *dentry, const char *name, const void *value, | |||
406 | return -EINVAL; | 406 | return -EINVAL; |
407 | if (!test_opt(dentry->d_sb, POSIX_ACL)) | 407 | if (!test_opt(dentry->d_sb, POSIX_ACL)) |
408 | return -EOPNOTSUPP; | 408 | return -EOPNOTSUPP; |
409 | if (!is_owner_or_cap(dentry->d_inode)) | 409 | if (!inode_owner_or_capable(dentry->d_inode)) |
410 | return -EPERM; | 410 | return -EPERM; |
411 | 411 | ||
412 | if (value) { | 412 | if (value) { |
diff --git a/fs/ext2/ext2.h b/fs/ext2/ext2.h index 1b48c3370872..645be9e7ee47 100644 --- a/fs/ext2/ext2.h +++ b/fs/ext2/ext2.h | |||
@@ -174,3 +174,9 @@ ext2_group_first_block_no(struct super_block *sb, unsigned long group_no) | |||
174 | return group_no * (ext2_fsblk_t)EXT2_BLOCKS_PER_GROUP(sb) + | 174 | return group_no * (ext2_fsblk_t)EXT2_BLOCKS_PER_GROUP(sb) + |
175 | le32_to_cpu(EXT2_SB(sb)->s_es->s_first_data_block); | 175 | le32_to_cpu(EXT2_SB(sb)->s_es->s_first_data_block); |
176 | } | 176 | } |
177 | |||
178 | #define ext2_set_bit __test_and_set_bit_le | ||
179 | #define ext2_clear_bit __test_and_clear_bit_le | ||
180 | #define ext2_test_bit test_bit_le | ||
181 | #define ext2_find_first_zero_bit find_first_zero_bit_le | ||
182 | #define ext2_find_next_zero_bit find_next_zero_bit_le | ||
diff --git a/fs/ext2/inode.c b/fs/ext2/inode.c index 40ad210a5049..c47f706878b5 100644 --- a/fs/ext2/inode.c +++ b/fs/ext2/inode.c | |||
@@ -860,7 +860,6 @@ const struct address_space_operations ext2_aops = { | |||
860 | .readpage = ext2_readpage, | 860 | .readpage = ext2_readpage, |
861 | .readpages = ext2_readpages, | 861 | .readpages = ext2_readpages, |
862 | .writepage = ext2_writepage, | 862 | .writepage = ext2_writepage, |
863 | .sync_page = block_sync_page, | ||
864 | .write_begin = ext2_write_begin, | 863 | .write_begin = ext2_write_begin, |
865 | .write_end = ext2_write_end, | 864 | .write_end = ext2_write_end, |
866 | .bmap = ext2_bmap, | 865 | .bmap = ext2_bmap, |
@@ -880,7 +879,6 @@ const struct address_space_operations ext2_nobh_aops = { | |||
880 | .readpage = ext2_readpage, | 879 | .readpage = ext2_readpage, |
881 | .readpages = ext2_readpages, | 880 | .readpages = ext2_readpages, |
882 | .writepage = ext2_nobh_writepage, | 881 | .writepage = ext2_nobh_writepage, |
883 | .sync_page = block_sync_page, | ||
884 | .write_begin = ext2_nobh_write_begin, | 882 | .write_begin = ext2_nobh_write_begin, |
885 | .write_end = nobh_write_end, | 883 | .write_end = nobh_write_end, |
886 | .bmap = ext2_bmap, | 884 | .bmap = ext2_bmap, |
diff --git a/fs/ext2/ioctl.c b/fs/ext2/ioctl.c index e7431309bdca..f81e250ac5c4 100644 --- a/fs/ext2/ioctl.c +++ b/fs/ext2/ioctl.c | |||
@@ -39,7 +39,7 @@ long ext2_ioctl(struct file *filp, unsigned int cmd, unsigned long arg) | |||
39 | if (ret) | 39 | if (ret) |
40 | return ret; | 40 | return ret; |
41 | 41 | ||
42 | if (!is_owner_or_cap(inode)) { | 42 | if (!inode_owner_or_capable(inode)) { |
43 | ret = -EACCES; | 43 | ret = -EACCES; |
44 | goto setflags_out; | 44 | goto setflags_out; |
45 | } | 45 | } |
@@ -89,7 +89,7 @@ setflags_out: | |||
89 | case EXT2_IOC_GETVERSION: | 89 | case EXT2_IOC_GETVERSION: |
90 | return put_user(inode->i_generation, (int __user *) arg); | 90 | return put_user(inode->i_generation, (int __user *) arg); |
91 | case EXT2_IOC_SETVERSION: | 91 | case EXT2_IOC_SETVERSION: |
92 | if (!is_owner_or_cap(inode)) | 92 | if (!inode_owner_or_capable(inode)) |
93 | return -EPERM; | 93 | return -EPERM; |
94 | ret = mnt_want_write(filp->f_path.mnt); | 94 | ret = mnt_want_write(filp->f_path.mnt); |
95 | if (ret) | 95 | if (ret) |
@@ -115,7 +115,7 @@ setflags_out: | |||
115 | if (!test_opt(inode->i_sb, RESERVATION) ||!S_ISREG(inode->i_mode)) | 115 | if (!test_opt(inode->i_sb, RESERVATION) ||!S_ISREG(inode->i_mode)) |
116 | return -ENOTTY; | 116 | return -ENOTTY; |
117 | 117 | ||
118 | if (!is_owner_or_cap(inode)) | 118 | if (!inode_owner_or_capable(inode)) |
119 | return -EACCES; | 119 | return -EACCES; |
120 | 120 | ||
121 | if (get_user(rsv_window_size, (int __user *)arg)) | 121 | if (get_user(rsv_window_size, (int __user *)arg)) |
diff --git a/fs/ext3/acl.c b/fs/ext3/acl.c index e4fa49e6c539..9d021c0d472a 100644 --- a/fs/ext3/acl.c +++ b/fs/ext3/acl.c | |||
@@ -435,7 +435,7 @@ ext3_xattr_set_acl(struct dentry *dentry, const char *name, const void *value, | |||
435 | return -EINVAL; | 435 | return -EINVAL; |
436 | if (!test_opt(inode->i_sb, POSIX_ACL)) | 436 | if (!test_opt(inode->i_sb, POSIX_ACL)) |
437 | return -EOPNOTSUPP; | 437 | return -EOPNOTSUPP; |
438 | if (!is_owner_or_cap(inode)) | 438 | if (!inode_owner_or_capable(inode)) |
439 | return -EPERM; | 439 | return -EPERM; |
440 | 440 | ||
441 | if (value) { | 441 | if (value) { |
diff --git a/fs/ext3/inode.c b/fs/ext3/inode.c index ae94f6d949f5..fe2541d250e4 100644 --- a/fs/ext3/inode.c +++ b/fs/ext3/inode.c | |||
@@ -1894,7 +1894,6 @@ static const struct address_space_operations ext3_ordered_aops = { | |||
1894 | .readpage = ext3_readpage, | 1894 | .readpage = ext3_readpage, |
1895 | .readpages = ext3_readpages, | 1895 | .readpages = ext3_readpages, |
1896 | .writepage = ext3_ordered_writepage, | 1896 | .writepage = ext3_ordered_writepage, |
1897 | .sync_page = block_sync_page, | ||
1898 | .write_begin = ext3_write_begin, | 1897 | .write_begin = ext3_write_begin, |
1899 | .write_end = ext3_ordered_write_end, | 1898 | .write_end = ext3_ordered_write_end, |
1900 | .bmap = ext3_bmap, | 1899 | .bmap = ext3_bmap, |
@@ -1910,7 +1909,6 @@ static const struct address_space_operations ext3_writeback_aops = { | |||
1910 | .readpage = ext3_readpage, | 1909 | .readpage = ext3_readpage, |
1911 | .readpages = ext3_readpages, | 1910 | .readpages = ext3_readpages, |
1912 | .writepage = ext3_writeback_writepage, | 1911 | .writepage = ext3_writeback_writepage, |
1913 | .sync_page = block_sync_page, | ||
1914 | .write_begin = ext3_write_begin, | 1912 | .write_begin = ext3_write_begin, |
1915 | .write_end = ext3_writeback_write_end, | 1913 | .write_end = ext3_writeback_write_end, |
1916 | .bmap = ext3_bmap, | 1914 | .bmap = ext3_bmap, |
@@ -1926,7 +1924,6 @@ static const struct address_space_operations ext3_journalled_aops = { | |||
1926 | .readpage = ext3_readpage, | 1924 | .readpage = ext3_readpage, |
1927 | .readpages = ext3_readpages, | 1925 | .readpages = ext3_readpages, |
1928 | .writepage = ext3_journalled_writepage, | 1926 | .writepage = ext3_journalled_writepage, |
1929 | .sync_page = block_sync_page, | ||
1930 | .write_begin = ext3_write_begin, | 1927 | .write_begin = ext3_write_begin, |
1931 | .write_end = ext3_journalled_write_end, | 1928 | .write_end = ext3_journalled_write_end, |
1932 | .set_page_dirty = ext3_journalled_set_page_dirty, | 1929 | .set_page_dirty = ext3_journalled_set_page_dirty, |
diff --git a/fs/ext3/ioctl.c b/fs/ext3/ioctl.c index fc080dd561f7..f4090bd2f345 100644 --- a/fs/ext3/ioctl.c +++ b/fs/ext3/ioctl.c | |||
@@ -38,7 +38,7 @@ long ext3_ioctl(struct file *filp, unsigned int cmd, unsigned long arg) | |||
38 | unsigned int oldflags; | 38 | unsigned int oldflags; |
39 | unsigned int jflag; | 39 | unsigned int jflag; |
40 | 40 | ||
41 | if (!is_owner_or_cap(inode)) | 41 | if (!inode_owner_or_capable(inode)) |
42 | return -EACCES; | 42 | return -EACCES; |
43 | 43 | ||
44 | if (get_user(flags, (int __user *) arg)) | 44 | if (get_user(flags, (int __user *) arg)) |
@@ -123,7 +123,7 @@ flags_out: | |||
123 | __u32 generation; | 123 | __u32 generation; |
124 | int err; | 124 | int err; |
125 | 125 | ||
126 | if (!is_owner_or_cap(inode)) | 126 | if (!inode_owner_or_capable(inode)) |
127 | return -EPERM; | 127 | return -EPERM; |
128 | 128 | ||
129 | err = mnt_want_write(filp->f_path.mnt); | 129 | err = mnt_want_write(filp->f_path.mnt); |
@@ -192,7 +192,7 @@ setversion_out: | |||
192 | if (err) | 192 | if (err) |
193 | return err; | 193 | return err; |
194 | 194 | ||
195 | if (!is_owner_or_cap(inode)) { | 195 | if (!inode_owner_or_capable(inode)) { |
196 | err = -EACCES; | 196 | err = -EACCES; |
197 | goto setrsvsz_out; | 197 | goto setrsvsz_out; |
198 | } | 198 | } |
diff --git a/fs/ext4/acl.c b/fs/ext4/acl.c index e0270d1f8d82..21eacd7b7d79 100644 --- a/fs/ext4/acl.c +++ b/fs/ext4/acl.c | |||
@@ -433,7 +433,7 @@ ext4_xattr_set_acl(struct dentry *dentry, const char *name, const void *value, | |||
433 | return -EINVAL; | 433 | return -EINVAL; |
434 | if (!test_opt(inode->i_sb, POSIX_ACL)) | 434 | if (!test_opt(inode->i_sb, POSIX_ACL)) |
435 | return -EOPNOTSUPP; | 435 | return -EOPNOTSUPP; |
436 | if (!is_owner_or_cap(inode)) | 436 | if (!inode_owner_or_capable(inode)) |
437 | return -EPERM; | 437 | return -EPERM; |
438 | 438 | ||
439 | if (value) { | 439 | if (value) { |
diff --git a/fs/ext4/balloc.c b/fs/ext4/balloc.c index adf96b822781..97b970e7dd13 100644 --- a/fs/ext4/balloc.c +++ b/fs/ext4/balloc.c | |||
@@ -21,6 +21,8 @@ | |||
21 | #include "ext4_jbd2.h" | 21 | #include "ext4_jbd2.h" |
22 | #include "mballoc.h" | 22 | #include "mballoc.h" |
23 | 23 | ||
24 | #include <trace/events/ext4.h> | ||
25 | |||
24 | /* | 26 | /* |
25 | * balloc.c contains the blocks allocation and deallocation routines | 27 | * balloc.c contains the blocks allocation and deallocation routines |
26 | */ | 28 | */ |
@@ -342,6 +344,7 @@ ext4_read_block_bitmap(struct super_block *sb, ext4_group_t block_group) | |||
342 | * We do it here so the bitmap uptodate bit | 344 | * We do it here so the bitmap uptodate bit |
343 | * get set with buffer lock held. | 345 | * get set with buffer lock held. |
344 | */ | 346 | */ |
347 | trace_ext4_read_block_bitmap_load(sb, block_group); | ||
345 | set_bitmap_uptodate(bh); | 348 | set_bitmap_uptodate(bh); |
346 | if (bh_submit_read(bh) < 0) { | 349 | if (bh_submit_read(bh) < 0) { |
347 | put_bh(bh); | 350 | put_bh(bh); |
diff --git a/fs/ext4/ext4.h b/fs/ext4/ext4.h index 3aa0b72b3b94..4daaf2b753f4 100644 --- a/fs/ext4/ext4.h +++ b/fs/ext4/ext4.h | |||
@@ -923,14 +923,14 @@ struct ext4_inode_info { | |||
923 | #define test_opt2(sb, opt) (EXT4_SB(sb)->s_mount_opt2 & \ | 923 | #define test_opt2(sb, opt) (EXT4_SB(sb)->s_mount_opt2 & \ |
924 | EXT4_MOUNT2_##opt) | 924 | EXT4_MOUNT2_##opt) |
925 | 925 | ||
926 | #define ext4_set_bit ext2_set_bit | 926 | #define ext4_set_bit __test_and_set_bit_le |
927 | #define ext4_set_bit_atomic ext2_set_bit_atomic | 927 | #define ext4_set_bit_atomic ext2_set_bit_atomic |
928 | #define ext4_clear_bit ext2_clear_bit | 928 | #define ext4_clear_bit __test_and_clear_bit_le |
929 | #define ext4_clear_bit_atomic ext2_clear_bit_atomic | 929 | #define ext4_clear_bit_atomic ext2_clear_bit_atomic |
930 | #define ext4_test_bit ext2_test_bit | 930 | #define ext4_test_bit test_bit_le |
931 | #define ext4_find_first_zero_bit ext2_find_first_zero_bit | 931 | #define ext4_find_first_zero_bit find_first_zero_bit_le |
932 | #define ext4_find_next_zero_bit ext2_find_next_zero_bit | 932 | #define ext4_find_next_zero_bit find_next_zero_bit_le |
933 | #define ext4_find_next_bit ext2_find_next_bit | 933 | #define ext4_find_next_bit find_next_bit_le |
934 | 934 | ||
935 | /* | 935 | /* |
936 | * Maximal mount counts between two filesystem checks | 936 | * Maximal mount counts between two filesystem checks |
diff --git a/fs/ext4/ext4_jbd2.h b/fs/ext4/ext4_jbd2.h index d8b992e658c1..e25e99bf7ee1 100644 --- a/fs/ext4/ext4_jbd2.h +++ b/fs/ext4/ext4_jbd2.h | |||
@@ -202,13 +202,6 @@ static inline int ext4_handle_has_enough_credits(handle_t *handle, int needed) | |||
202 | return 1; | 202 | return 1; |
203 | } | 203 | } |
204 | 204 | ||
205 | static inline void ext4_journal_release_buffer(handle_t *handle, | ||
206 | struct buffer_head *bh) | ||
207 | { | ||
208 | if (ext4_handle_valid(handle)) | ||
209 | jbd2_journal_release_buffer(handle, bh); | ||
210 | } | ||
211 | |||
212 | static inline handle_t *ext4_journal_start(struct inode *inode, int nblocks) | 205 | static inline handle_t *ext4_journal_start(struct inode *inode, int nblocks) |
213 | { | 206 | { |
214 | return ext4_journal_start_sb(inode->i_sb, nblocks); | 207 | return ext4_journal_start_sb(inode->i_sb, nblocks); |
diff --git a/fs/ext4/extents.c b/fs/ext4/extents.c index 7516fb9c0bd5..dd2cb5076ff9 100644 --- a/fs/ext4/extents.c +++ b/fs/ext4/extents.c | |||
@@ -44,6 +44,8 @@ | |||
44 | #include "ext4_jbd2.h" | 44 | #include "ext4_jbd2.h" |
45 | #include "ext4_extents.h" | 45 | #include "ext4_extents.h" |
46 | 46 | ||
47 | #include <trace/events/ext4.h> | ||
48 | |||
47 | static int ext4_ext_truncate_extend_restart(handle_t *handle, | 49 | static int ext4_ext_truncate_extend_restart(handle_t *handle, |
48 | struct inode *inode, | 50 | struct inode *inode, |
49 | int needed) | 51 | int needed) |
@@ -664,6 +666,8 @@ ext4_ext_find_extent(struct inode *inode, ext4_lblk_t block, | |||
664 | if (unlikely(!bh)) | 666 | if (unlikely(!bh)) |
665 | goto err; | 667 | goto err; |
666 | if (!bh_uptodate_or_lock(bh)) { | 668 | if (!bh_uptodate_or_lock(bh)) { |
669 | trace_ext4_ext_load_extent(inode, block, | ||
670 | path[ppos].p_block); | ||
667 | if (bh_submit_read(bh) < 0) { | 671 | if (bh_submit_read(bh) < 0) { |
668 | put_bh(bh); | 672 | put_bh(bh); |
669 | goto err; | 673 | goto err; |
@@ -1034,7 +1038,7 @@ cleanup: | |||
1034 | for (i = 0; i < depth; i++) { | 1038 | for (i = 0; i < depth; i++) { |
1035 | if (!ablocks[i]) | 1039 | if (!ablocks[i]) |
1036 | continue; | 1040 | continue; |
1037 | ext4_free_blocks(handle, inode, 0, ablocks[i], 1, | 1041 | ext4_free_blocks(handle, inode, NULL, ablocks[i], 1, |
1038 | EXT4_FREE_BLOCKS_METADATA); | 1042 | EXT4_FREE_BLOCKS_METADATA); |
1039 | } | 1043 | } |
1040 | } | 1044 | } |
@@ -2059,7 +2063,7 @@ static int ext4_ext_rm_idx(handle_t *handle, struct inode *inode, | |||
2059 | if (err) | 2063 | if (err) |
2060 | return err; | 2064 | return err; |
2061 | ext_debug("index is empty, remove it, free block %llu\n", leaf); | 2065 | ext_debug("index is empty, remove it, free block %llu\n", leaf); |
2062 | ext4_free_blocks(handle, inode, 0, leaf, 1, | 2066 | ext4_free_blocks(handle, inode, NULL, leaf, 1, |
2063 | EXT4_FREE_BLOCKS_METADATA | EXT4_FREE_BLOCKS_FORGET); | 2067 | EXT4_FREE_BLOCKS_METADATA | EXT4_FREE_BLOCKS_FORGET); |
2064 | return err; | 2068 | return err; |
2065 | } | 2069 | } |
@@ -2156,7 +2160,7 @@ static int ext4_remove_blocks(handle_t *handle, struct inode *inode, | |||
2156 | num = le32_to_cpu(ex->ee_block) + ee_len - from; | 2160 | num = le32_to_cpu(ex->ee_block) + ee_len - from; |
2157 | start = ext4_ext_pblock(ex) + ee_len - num; | 2161 | start = ext4_ext_pblock(ex) + ee_len - num; |
2158 | ext_debug("free last %u blocks starting %llu\n", num, start); | 2162 | ext_debug("free last %u blocks starting %llu\n", num, start); |
2159 | ext4_free_blocks(handle, inode, 0, start, num, flags); | 2163 | ext4_free_blocks(handle, inode, NULL, start, num, flags); |
2160 | } else if (from == le32_to_cpu(ex->ee_block) | 2164 | } else if (from == le32_to_cpu(ex->ee_block) |
2161 | && to <= le32_to_cpu(ex->ee_block) + ee_len - 1) { | 2165 | && to <= le32_to_cpu(ex->ee_block) + ee_len - 1) { |
2162 | printk(KERN_INFO "strange request: removal %u-%u from %u:%u\n", | 2166 | printk(KERN_INFO "strange request: removal %u-%u from %u:%u\n", |
@@ -3108,14 +3112,13 @@ static int check_eofblocks_fl(handle_t *handle, struct inode *inode, | |||
3108 | { | 3112 | { |
3109 | int i, depth; | 3113 | int i, depth; |
3110 | struct ext4_extent_header *eh; | 3114 | struct ext4_extent_header *eh; |
3111 | struct ext4_extent *ex, *last_ex; | 3115 | struct ext4_extent *last_ex; |
3112 | 3116 | ||
3113 | if (!ext4_test_inode_flag(inode, EXT4_INODE_EOFBLOCKS)) | 3117 | if (!ext4_test_inode_flag(inode, EXT4_INODE_EOFBLOCKS)) |
3114 | return 0; | 3118 | return 0; |
3115 | 3119 | ||
3116 | depth = ext_depth(inode); | 3120 | depth = ext_depth(inode); |
3117 | eh = path[depth].p_hdr; | 3121 | eh = path[depth].p_hdr; |
3118 | ex = path[depth].p_ext; | ||
3119 | 3122 | ||
3120 | if (unlikely(!eh->eh_entries)) { | 3123 | if (unlikely(!eh->eh_entries)) { |
3121 | EXT4_ERROR_INODE(inode, "eh->eh_entries == 0 and " | 3124 | EXT4_ERROR_INODE(inode, "eh->eh_entries == 0 and " |
@@ -3295,9 +3298,8 @@ int ext4_ext_map_blocks(handle_t *handle, struct inode *inode, | |||
3295 | struct ext4_map_blocks *map, int flags) | 3298 | struct ext4_map_blocks *map, int flags) |
3296 | { | 3299 | { |
3297 | struct ext4_ext_path *path = NULL; | 3300 | struct ext4_ext_path *path = NULL; |
3298 | struct ext4_extent_header *eh; | ||
3299 | struct ext4_extent newex, *ex; | 3301 | struct ext4_extent newex, *ex; |
3300 | ext4_fsblk_t newblock; | 3302 | ext4_fsblk_t newblock = 0; |
3301 | int err = 0, depth, ret; | 3303 | int err = 0, depth, ret; |
3302 | unsigned int allocated = 0; | 3304 | unsigned int allocated = 0; |
3303 | struct ext4_allocation_request ar; | 3305 | struct ext4_allocation_request ar; |
@@ -3305,6 +3307,7 @@ int ext4_ext_map_blocks(handle_t *handle, struct inode *inode, | |||
3305 | 3307 | ||
3306 | ext_debug("blocks %u/%u requested for inode %lu\n", | 3308 | ext_debug("blocks %u/%u requested for inode %lu\n", |
3307 | map->m_lblk, map->m_len, inode->i_ino); | 3309 | map->m_lblk, map->m_len, inode->i_ino); |
3310 | trace_ext4_ext_map_blocks_enter(inode, map->m_lblk, map->m_len, flags); | ||
3308 | 3311 | ||
3309 | /* check in cache */ | 3312 | /* check in cache */ |
3310 | if (ext4_ext_in_cache(inode, map->m_lblk, &newex)) { | 3313 | if (ext4_ext_in_cache(inode, map->m_lblk, &newex)) { |
@@ -3352,7 +3355,6 @@ int ext4_ext_map_blocks(handle_t *handle, struct inode *inode, | |||
3352 | err = -EIO; | 3355 | err = -EIO; |
3353 | goto out2; | 3356 | goto out2; |
3354 | } | 3357 | } |
3355 | eh = path[depth].p_hdr; | ||
3356 | 3358 | ||
3357 | ex = path[depth].p_ext; | 3359 | ex = path[depth].p_ext; |
3358 | if (ex) { | 3360 | if (ex) { |
@@ -3485,7 +3487,7 @@ int ext4_ext_map_blocks(handle_t *handle, struct inode *inode, | |||
3485 | /* not a good idea to call discard here directly, | 3487 | /* not a good idea to call discard here directly, |
3486 | * but otherwise we'd need to call it every free() */ | 3488 | * but otherwise we'd need to call it every free() */ |
3487 | ext4_discard_preallocations(inode); | 3489 | ext4_discard_preallocations(inode); |
3488 | ext4_free_blocks(handle, inode, 0, ext4_ext_pblock(&newex), | 3490 | ext4_free_blocks(handle, inode, NULL, ext4_ext_pblock(&newex), |
3489 | ext4_ext_get_actual_len(&newex), 0); | 3491 | ext4_ext_get_actual_len(&newex), 0); |
3490 | goto out2; | 3492 | goto out2; |
3491 | } | 3493 | } |
@@ -3525,6 +3527,8 @@ out2: | |||
3525 | ext4_ext_drop_refs(path); | 3527 | ext4_ext_drop_refs(path); |
3526 | kfree(path); | 3528 | kfree(path); |
3527 | } | 3529 | } |
3530 | trace_ext4_ext_map_blocks_exit(inode, map->m_lblk, | ||
3531 | newblock, map->m_len, err ? err : allocated); | ||
3528 | return err ? err : allocated; | 3532 | return err ? err : allocated; |
3529 | } | 3533 | } |
3530 | 3534 | ||
@@ -3658,6 +3662,7 @@ long ext4_fallocate(struct file *file, int mode, loff_t offset, loff_t len) | |||
3658 | if (!(ext4_test_inode_flag(inode, EXT4_INODE_EXTENTS))) | 3662 | if (!(ext4_test_inode_flag(inode, EXT4_INODE_EXTENTS))) |
3659 | return -EOPNOTSUPP; | 3663 | return -EOPNOTSUPP; |
3660 | 3664 | ||
3665 | trace_ext4_fallocate_enter(inode, offset, len, mode); | ||
3661 | map.m_lblk = offset >> blkbits; | 3666 | map.m_lblk = offset >> blkbits; |
3662 | /* | 3667 | /* |
3663 | * We can't just convert len to max_blocks because | 3668 | * We can't just convert len to max_blocks because |
@@ -3673,6 +3678,7 @@ long ext4_fallocate(struct file *file, int mode, loff_t offset, loff_t len) | |||
3673 | ret = inode_newsize_ok(inode, (len + offset)); | 3678 | ret = inode_newsize_ok(inode, (len + offset)); |
3674 | if (ret) { | 3679 | if (ret) { |
3675 | mutex_unlock(&inode->i_mutex); | 3680 | mutex_unlock(&inode->i_mutex); |
3681 | trace_ext4_fallocate_exit(inode, offset, max_blocks, ret); | ||
3676 | return ret; | 3682 | return ret; |
3677 | } | 3683 | } |
3678 | retry: | 3684 | retry: |
@@ -3717,6 +3723,8 @@ retry: | |||
3717 | goto retry; | 3723 | goto retry; |
3718 | } | 3724 | } |
3719 | mutex_unlock(&inode->i_mutex); | 3725 | mutex_unlock(&inode->i_mutex); |
3726 | trace_ext4_fallocate_exit(inode, offset, max_blocks, | ||
3727 | ret > 0 ? ret2 : ret); | ||
3720 | return ret > 0 ? ret2 : ret; | 3728 | return ret > 0 ? ret2 : ret; |
3721 | } | 3729 | } |
3722 | 3730 | ||
@@ -3775,6 +3783,7 @@ int ext4_convert_unwritten_extents(struct inode *inode, loff_t offset, | |||
3775 | } | 3783 | } |
3776 | return ret > 0 ? ret2 : ret; | 3784 | return ret > 0 ? ret2 : ret; |
3777 | } | 3785 | } |
3786 | |||
3778 | /* | 3787 | /* |
3779 | * Callback function called for each extent to gather FIEMAP information. | 3788 | * Callback function called for each extent to gather FIEMAP information. |
3780 | */ | 3789 | */ |
@@ -3782,38 +3791,162 @@ static int ext4_ext_fiemap_cb(struct inode *inode, struct ext4_ext_path *path, | |||
3782 | struct ext4_ext_cache *newex, struct ext4_extent *ex, | 3791 | struct ext4_ext_cache *newex, struct ext4_extent *ex, |
3783 | void *data) | 3792 | void *data) |
3784 | { | 3793 | { |
3785 | struct fiemap_extent_info *fieinfo = data; | ||
3786 | unsigned char blksize_bits = inode->i_sb->s_blocksize_bits; | ||
3787 | __u64 logical; | 3794 | __u64 logical; |
3788 | __u64 physical; | 3795 | __u64 physical; |
3789 | __u64 length; | 3796 | __u64 length; |
3797 | loff_t size; | ||
3790 | __u32 flags = 0; | 3798 | __u32 flags = 0; |
3791 | int error; | 3799 | int ret = 0; |
3800 | struct fiemap_extent_info *fieinfo = data; | ||
3801 | unsigned char blksize_bits; | ||
3792 | 3802 | ||
3793 | logical = (__u64)newex->ec_block << blksize_bits; | 3803 | blksize_bits = inode->i_sb->s_blocksize_bits; |
3804 | logical = (__u64)newex->ec_block << blksize_bits; | ||
3794 | 3805 | ||
3795 | if (newex->ec_start == 0) { | 3806 | if (newex->ec_start == 0) { |
3796 | pgoff_t offset; | 3807 | /* |
3797 | struct page *page; | 3808 | * No extent in extent-tree contains block @newex->ec_start, |
3809 | * then the block may stay in 1)a hole or 2)delayed-extent. | ||
3810 | * | ||
3811 | * Holes or delayed-extents are processed as follows. | ||
3812 | * 1. lookup dirty pages with specified range in pagecache. | ||
3813 | * If no page is got, then there is no delayed-extent and | ||
3814 | * return with EXT_CONTINUE. | ||
3815 | * 2. find the 1st mapped buffer, | ||
3816 | * 3. check if the mapped buffer is both in the request range | ||
3817 | * and a delayed buffer. If not, there is no delayed-extent, | ||
3818 | * then return. | ||
3819 | * 4. a delayed-extent is found, the extent will be collected. | ||
3820 | */ | ||
3821 | ext4_lblk_t end = 0; | ||
3822 | pgoff_t last_offset; | ||
3823 | pgoff_t offset; | ||
3824 | pgoff_t index; | ||
3825 | struct page **pages = NULL; | ||
3798 | struct buffer_head *bh = NULL; | 3826 | struct buffer_head *bh = NULL; |
3827 | struct buffer_head *head = NULL; | ||
3828 | unsigned int nr_pages = PAGE_SIZE / sizeof(struct page *); | ||
3829 | |||
3830 | pages = kmalloc(PAGE_SIZE, GFP_KERNEL); | ||
3831 | if (pages == NULL) | ||
3832 | return -ENOMEM; | ||
3799 | 3833 | ||
3800 | offset = logical >> PAGE_SHIFT; | 3834 | offset = logical >> PAGE_SHIFT; |
3801 | page = find_get_page(inode->i_mapping, offset); | 3835 | repeat: |
3802 | if (!page || !page_has_buffers(page)) | 3836 | last_offset = offset; |
3803 | return EXT_CONTINUE; | 3837 | head = NULL; |
3838 | ret = find_get_pages_tag(inode->i_mapping, &offset, | ||
3839 | PAGECACHE_TAG_DIRTY, nr_pages, pages); | ||
3840 | |||
3841 | if (!(flags & FIEMAP_EXTENT_DELALLOC)) { | ||
3842 | /* First time, try to find a mapped buffer. */ | ||
3843 | if (ret == 0) { | ||
3844 | out: | ||
3845 | for (index = 0; index < ret; index++) | ||
3846 | page_cache_release(pages[index]); | ||
3847 | /* just a hole. */ | ||
3848 | kfree(pages); | ||
3849 | return EXT_CONTINUE; | ||
3850 | } | ||
3804 | 3851 | ||
3805 | bh = page_buffers(page); | 3852 | /* Try to find the 1st mapped buffer. */ |
3853 | end = ((__u64)pages[0]->index << PAGE_SHIFT) >> | ||
3854 | blksize_bits; | ||
3855 | if (!page_has_buffers(pages[0])) | ||
3856 | goto out; | ||
3857 | head = page_buffers(pages[0]); | ||
3858 | if (!head) | ||
3859 | goto out; | ||
3806 | 3860 | ||
3807 | if (!bh) | 3861 | bh = head; |
3808 | return EXT_CONTINUE; | 3862 | do { |
3863 | if (buffer_mapped(bh)) { | ||
3864 | /* get the 1st mapped buffer. */ | ||
3865 | if (end > newex->ec_block + | ||
3866 | newex->ec_len) | ||
3867 | /* The buffer is out of | ||
3868 | * the request range. | ||
3869 | */ | ||
3870 | goto out; | ||
3871 | goto found_mapped_buffer; | ||
3872 | } | ||
3873 | bh = bh->b_this_page; | ||
3874 | end++; | ||
3875 | } while (bh != head); | ||
3809 | 3876 | ||
3810 | if (buffer_delay(bh)) { | 3877 | /* No mapped buffer found. */ |
3811 | flags |= FIEMAP_EXTENT_DELALLOC; | 3878 | goto out; |
3812 | page_cache_release(page); | ||
3813 | } else { | 3879 | } else { |
3814 | page_cache_release(page); | 3880 | /*Find contiguous delayed buffers. */ |
3815 | return EXT_CONTINUE; | 3881 | if (ret > 0 && pages[0]->index == last_offset) |
3882 | head = page_buffers(pages[0]); | ||
3883 | bh = head; | ||
3816 | } | 3884 | } |
3885 | |||
3886 | found_mapped_buffer: | ||
3887 | if (bh != NULL && buffer_delay(bh)) { | ||
3888 | /* 1st or contiguous delayed buffer found. */ | ||
3889 | if (!(flags & FIEMAP_EXTENT_DELALLOC)) { | ||
3890 | /* | ||
3891 | * 1st delayed buffer found, record | ||
3892 | * the start of extent. | ||
3893 | */ | ||
3894 | flags |= FIEMAP_EXTENT_DELALLOC; | ||
3895 | newex->ec_block = end; | ||
3896 | logical = (__u64)end << blksize_bits; | ||
3897 | } | ||
3898 | /* Find contiguous delayed buffers. */ | ||
3899 | do { | ||
3900 | if (!buffer_delay(bh)) | ||
3901 | goto found_delayed_extent; | ||
3902 | bh = bh->b_this_page; | ||
3903 | end++; | ||
3904 | } while (bh != head); | ||
3905 | |||
3906 | for (index = 1; index < ret; index++) { | ||
3907 | if (!page_has_buffers(pages[index])) { | ||
3908 | bh = NULL; | ||
3909 | break; | ||
3910 | } | ||
3911 | head = page_buffers(pages[index]); | ||
3912 | if (!head) { | ||
3913 | bh = NULL; | ||
3914 | break; | ||
3915 | } | ||
3916 | if (pages[index]->index != | ||
3917 | pages[0]->index + index) { | ||
3918 | /* Blocks are not contiguous. */ | ||
3919 | bh = NULL; | ||
3920 | break; | ||
3921 | } | ||
3922 | bh = head; | ||
3923 | do { | ||
3924 | if (!buffer_delay(bh)) | ||
3925 | /* Delayed-extent ends. */ | ||
3926 | goto found_delayed_extent; | ||
3927 | bh = bh->b_this_page; | ||
3928 | end++; | ||
3929 | } while (bh != head); | ||
3930 | } | ||
3931 | } else if (!(flags & FIEMAP_EXTENT_DELALLOC)) | ||
3932 | /* a hole found. */ | ||
3933 | goto out; | ||
3934 | |||
3935 | found_delayed_extent: | ||
3936 | newex->ec_len = min(end - newex->ec_block, | ||
3937 | (ext4_lblk_t)EXT_INIT_MAX_LEN); | ||
3938 | if (ret == nr_pages && bh != NULL && | ||
3939 | newex->ec_len < EXT_INIT_MAX_LEN && | ||
3940 | buffer_delay(bh)) { | ||
3941 | /* Have not collected an extent and continue. */ | ||
3942 | for (index = 0; index < ret; index++) | ||
3943 | page_cache_release(pages[index]); | ||
3944 | goto repeat; | ||
3945 | } | ||
3946 | |||
3947 | for (index = 0; index < ret; index++) | ||
3948 | page_cache_release(pages[index]); | ||
3949 | kfree(pages); | ||
3817 | } | 3950 | } |
3818 | 3951 | ||
3819 | physical = (__u64)newex->ec_start << blksize_bits; | 3952 | physical = (__u64)newex->ec_start << blksize_bits; |
@@ -3822,32 +3955,16 @@ static int ext4_ext_fiemap_cb(struct inode *inode, struct ext4_ext_path *path, | |||
3822 | if (ex && ext4_ext_is_uninitialized(ex)) | 3955 | if (ex && ext4_ext_is_uninitialized(ex)) |
3823 | flags |= FIEMAP_EXTENT_UNWRITTEN; | 3956 | flags |= FIEMAP_EXTENT_UNWRITTEN; |
3824 | 3957 | ||
3825 | /* | 3958 | size = i_size_read(inode); |
3826 | * If this extent reaches EXT_MAX_BLOCK, it must be last. | 3959 | if (logical + length >= size) |
3827 | * | ||
3828 | * Or if ext4_ext_next_allocated_block is EXT_MAX_BLOCK, | ||
3829 | * this also indicates no more allocated blocks. | ||
3830 | * | ||
3831 | * XXX this might miss a single-block extent at EXT_MAX_BLOCK | ||
3832 | */ | ||
3833 | if (ext4_ext_next_allocated_block(path) == EXT_MAX_BLOCK || | ||
3834 | newex->ec_block + newex->ec_len - 1 == EXT_MAX_BLOCK) { | ||
3835 | loff_t size = i_size_read(inode); | ||
3836 | loff_t bs = EXT4_BLOCK_SIZE(inode->i_sb); | ||
3837 | |||
3838 | flags |= FIEMAP_EXTENT_LAST; | 3960 | flags |= FIEMAP_EXTENT_LAST; |
3839 | if ((flags & FIEMAP_EXTENT_DELALLOC) && | ||
3840 | logical+length > size) | ||
3841 | length = (size - logical + bs - 1) & ~(bs-1); | ||
3842 | } | ||
3843 | 3961 | ||
3844 | error = fiemap_fill_next_extent(fieinfo, logical, physical, | 3962 | ret = fiemap_fill_next_extent(fieinfo, logical, physical, |
3845 | length, flags); | 3963 | length, flags); |
3846 | if (error < 0) | 3964 | if (ret < 0) |
3847 | return error; | 3965 | return ret; |
3848 | if (error == 1) | 3966 | if (ret == 1) |
3849 | return EXT_BREAK; | 3967 | return EXT_BREAK; |
3850 | |||
3851 | return EXT_CONTINUE; | 3968 | return EXT_CONTINUE; |
3852 | } | 3969 | } |
3853 | 3970 | ||
diff --git a/fs/ext4/fsync.c b/fs/ext4/fsync.c index 7829b287822a..7f74019d6d77 100644 --- a/fs/ext4/fsync.c +++ b/fs/ext4/fsync.c | |||
@@ -164,20 +164,20 @@ int ext4_sync_file(struct file *file, int datasync) | |||
164 | 164 | ||
165 | J_ASSERT(ext4_journal_current_handle() == NULL); | 165 | J_ASSERT(ext4_journal_current_handle() == NULL); |
166 | 166 | ||
167 | trace_ext4_sync_file(file, datasync); | 167 | trace_ext4_sync_file_enter(file, datasync); |
168 | 168 | ||
169 | if (inode->i_sb->s_flags & MS_RDONLY) | 169 | if (inode->i_sb->s_flags & MS_RDONLY) |
170 | return 0; | 170 | return 0; |
171 | 171 | ||
172 | ret = ext4_flush_completed_IO(inode); | 172 | ret = ext4_flush_completed_IO(inode); |
173 | if (ret < 0) | 173 | if (ret < 0) |
174 | return ret; | 174 | goto out; |
175 | 175 | ||
176 | if (!journal) { | 176 | if (!journal) { |
177 | ret = generic_file_fsync(file, datasync); | 177 | ret = generic_file_fsync(file, datasync); |
178 | if (!ret && !list_empty(&inode->i_dentry)) | 178 | if (!ret && !list_empty(&inode->i_dentry)) |
179 | ext4_sync_parent(inode); | 179 | ext4_sync_parent(inode); |
180 | return ret; | 180 | goto out; |
181 | } | 181 | } |
182 | 182 | ||
183 | /* | 183 | /* |
@@ -194,8 +194,10 @@ int ext4_sync_file(struct file *file, int datasync) | |||
194 | * (they were dirtied by commit). But that's OK - the blocks are | 194 | * (they were dirtied by commit). But that's OK - the blocks are |
195 | * safe in-journal, which is all fsync() needs to ensure. | 195 | * safe in-journal, which is all fsync() needs to ensure. |
196 | */ | 196 | */ |
197 | if (ext4_should_journal_data(inode)) | 197 | if (ext4_should_journal_data(inode)) { |
198 | return ext4_force_commit(inode->i_sb); | 198 | ret = ext4_force_commit(inode->i_sb); |
199 | goto out; | ||
200 | } | ||
199 | 201 | ||
200 | commit_tid = datasync ? ei->i_datasync_tid : ei->i_sync_tid; | 202 | commit_tid = datasync ? ei->i_datasync_tid : ei->i_sync_tid; |
201 | if (jbd2_log_start_commit(journal, commit_tid)) { | 203 | if (jbd2_log_start_commit(journal, commit_tid)) { |
@@ -215,5 +217,7 @@ int ext4_sync_file(struct file *file, int datasync) | |||
215 | ret = jbd2_log_wait_commit(journal, commit_tid); | 217 | ret = jbd2_log_wait_commit(journal, commit_tid); |
216 | } else if (journal->j_flags & JBD2_BARRIER) | 218 | } else if (journal->j_flags & JBD2_BARRIER) |
217 | blkdev_issue_flush(inode->i_sb->s_bdev, GFP_KERNEL, NULL); | 219 | blkdev_issue_flush(inode->i_sb->s_bdev, GFP_KERNEL, NULL); |
220 | out: | ||
221 | trace_ext4_sync_file_exit(inode, ret); | ||
218 | return ret; | 222 | return ret; |
219 | } | 223 | } |
diff --git a/fs/ext4/ialloc.c b/fs/ext4/ialloc.c index 78b79e1bd7ed..21bb2f61e502 100644 --- a/fs/ext4/ialloc.c +++ b/fs/ext4/ialloc.c | |||
@@ -152,6 +152,7 @@ ext4_read_inode_bitmap(struct super_block *sb, ext4_group_t block_group) | |||
152 | * We do it here so the bitmap uptodate bit | 152 | * We do it here so the bitmap uptodate bit |
153 | * get set with buffer lock held. | 153 | * get set with buffer lock held. |
154 | */ | 154 | */ |
155 | trace_ext4_load_inode_bitmap(sb, block_group); | ||
155 | set_bitmap_uptodate(bh); | 156 | set_bitmap_uptodate(bh); |
156 | if (bh_submit_read(bh) < 0) { | 157 | if (bh_submit_read(bh) < 0) { |
157 | put_bh(bh); | 158 | put_bh(bh); |
@@ -649,7 +650,7 @@ static int find_group_other(struct super_block *sb, struct inode *parent, | |||
649 | *group = parent_group + flex_size; | 650 | *group = parent_group + flex_size; |
650 | if (*group > ngroups) | 651 | if (*group > ngroups) |
651 | *group = 0; | 652 | *group = 0; |
652 | return find_group_orlov(sb, parent, group, mode, 0); | 653 | return find_group_orlov(sb, parent, group, mode, NULL); |
653 | } | 654 | } |
654 | 655 | ||
655 | /* | 656 | /* |
@@ -1054,6 +1055,11 @@ got: | |||
1054 | } | 1055 | } |
1055 | } | 1056 | } |
1056 | 1057 | ||
1058 | if (ext4_handle_valid(handle)) { | ||
1059 | ei->i_sync_tid = handle->h_transaction->t_tid; | ||
1060 | ei->i_datasync_tid = handle->h_transaction->t_tid; | ||
1061 | } | ||
1062 | |||
1057 | err = ext4_mark_inode_dirty(handle, inode); | 1063 | err = ext4_mark_inode_dirty(handle, inode); |
1058 | if (err) { | 1064 | if (err) { |
1059 | ext4_std_error(sb, err); | 1065 | ext4_std_error(sb, err); |
diff --git a/fs/ext4/inode.c b/fs/ext4/inode.c index 9f7f9e49914f..1a86282b9024 100644 --- a/fs/ext4/inode.c +++ b/fs/ext4/inode.c | |||
@@ -173,7 +173,7 @@ int ext4_truncate_restart_trans(handle_t *handle, struct inode *inode, | |||
173 | BUG_ON(EXT4_JOURNAL(inode) == NULL); | 173 | BUG_ON(EXT4_JOURNAL(inode) == NULL); |
174 | jbd_debug(2, "restarting handle %p\n", handle); | 174 | jbd_debug(2, "restarting handle %p\n", handle); |
175 | up_write(&EXT4_I(inode)->i_data_sem); | 175 | up_write(&EXT4_I(inode)->i_data_sem); |
176 | ret = ext4_journal_restart(handle, blocks_for_truncate(inode)); | 176 | ret = ext4_journal_restart(handle, nblocks); |
177 | down_write(&EXT4_I(inode)->i_data_sem); | 177 | down_write(&EXT4_I(inode)->i_data_sem); |
178 | ext4_discard_preallocations(inode); | 178 | ext4_discard_preallocations(inode); |
179 | 179 | ||
@@ -720,7 +720,7 @@ allocated: | |||
720 | return ret; | 720 | return ret; |
721 | failed_out: | 721 | failed_out: |
722 | for (i = 0; i < index; i++) | 722 | for (i = 0; i < index; i++) |
723 | ext4_free_blocks(handle, inode, 0, new_blocks[i], 1, 0); | 723 | ext4_free_blocks(handle, inode, NULL, new_blocks[i], 1, 0); |
724 | return ret; | 724 | return ret; |
725 | } | 725 | } |
726 | 726 | ||
@@ -823,20 +823,20 @@ static int ext4_alloc_branch(handle_t *handle, struct inode *inode, | |||
823 | return err; | 823 | return err; |
824 | failed: | 824 | failed: |
825 | /* Allocation failed, free what we already allocated */ | 825 | /* Allocation failed, free what we already allocated */ |
826 | ext4_free_blocks(handle, inode, 0, new_blocks[0], 1, 0); | 826 | ext4_free_blocks(handle, inode, NULL, new_blocks[0], 1, 0); |
827 | for (i = 1; i <= n ; i++) { | 827 | for (i = 1; i <= n ; i++) { |
828 | /* | 828 | /* |
829 | * branch[i].bh is newly allocated, so there is no | 829 | * branch[i].bh is newly allocated, so there is no |
830 | * need to revoke the block, which is why we don't | 830 | * need to revoke the block, which is why we don't |
831 | * need to set EXT4_FREE_BLOCKS_METADATA. | 831 | * need to set EXT4_FREE_BLOCKS_METADATA. |
832 | */ | 832 | */ |
833 | ext4_free_blocks(handle, inode, 0, new_blocks[i], 1, | 833 | ext4_free_blocks(handle, inode, NULL, new_blocks[i], 1, |
834 | EXT4_FREE_BLOCKS_FORGET); | 834 | EXT4_FREE_BLOCKS_FORGET); |
835 | } | 835 | } |
836 | for (i = n+1; i < indirect_blks; i++) | 836 | for (i = n+1; i < indirect_blks; i++) |
837 | ext4_free_blocks(handle, inode, 0, new_blocks[i], 1, 0); | 837 | ext4_free_blocks(handle, inode, NULL, new_blocks[i], 1, 0); |
838 | 838 | ||
839 | ext4_free_blocks(handle, inode, 0, new_blocks[i], num, 0); | 839 | ext4_free_blocks(handle, inode, NULL, new_blocks[i], num, 0); |
840 | 840 | ||
841 | return err; | 841 | return err; |
842 | } | 842 | } |
@@ -924,7 +924,7 @@ err_out: | |||
924 | ext4_free_blocks(handle, inode, where[i].bh, 0, 1, | 924 | ext4_free_blocks(handle, inode, where[i].bh, 0, 1, |
925 | EXT4_FREE_BLOCKS_FORGET); | 925 | EXT4_FREE_BLOCKS_FORGET); |
926 | } | 926 | } |
927 | ext4_free_blocks(handle, inode, 0, le32_to_cpu(where[num].key), | 927 | ext4_free_blocks(handle, inode, NULL, le32_to_cpu(where[num].key), |
928 | blks, 0); | 928 | blks, 0); |
929 | 929 | ||
930 | return err; | 930 | return err; |
@@ -973,6 +973,7 @@ static int ext4_ind_map_blocks(handle_t *handle, struct inode *inode, | |||
973 | int count = 0; | 973 | int count = 0; |
974 | ext4_fsblk_t first_block = 0; | 974 | ext4_fsblk_t first_block = 0; |
975 | 975 | ||
976 | trace_ext4_ind_map_blocks_enter(inode, map->m_lblk, map->m_len, flags); | ||
976 | J_ASSERT(!(ext4_test_inode_flag(inode, EXT4_INODE_EXTENTS))); | 977 | J_ASSERT(!(ext4_test_inode_flag(inode, EXT4_INODE_EXTENTS))); |
977 | J_ASSERT(handle != NULL || (flags & EXT4_GET_BLOCKS_CREATE) == 0); | 978 | J_ASSERT(handle != NULL || (flags & EXT4_GET_BLOCKS_CREATE) == 0); |
978 | depth = ext4_block_to_path(inode, map->m_lblk, offsets, | 979 | depth = ext4_block_to_path(inode, map->m_lblk, offsets, |
@@ -1058,6 +1059,8 @@ cleanup: | |||
1058 | partial--; | 1059 | partial--; |
1059 | } | 1060 | } |
1060 | out: | 1061 | out: |
1062 | trace_ext4_ind_map_blocks_exit(inode, map->m_lblk, | ||
1063 | map->m_pblk, map->m_len, err); | ||
1061 | return err; | 1064 | return err; |
1062 | } | 1065 | } |
1063 | 1066 | ||
@@ -2060,7 +2063,7 @@ static int mpage_da_submit_io(struct mpage_da_data *mpd, | |||
2060 | if (nr_pages == 0) | 2063 | if (nr_pages == 0) |
2061 | break; | 2064 | break; |
2062 | for (i = 0; i < nr_pages; i++) { | 2065 | for (i = 0; i < nr_pages; i++) { |
2063 | int commit_write = 0, redirty_page = 0; | 2066 | int commit_write = 0, skip_page = 0; |
2064 | struct page *page = pvec.pages[i]; | 2067 | struct page *page = pvec.pages[i]; |
2065 | 2068 | ||
2066 | index = page->index; | 2069 | index = page->index; |
@@ -2086,14 +2089,12 @@ static int mpage_da_submit_io(struct mpage_da_data *mpd, | |||
2086 | * If the page does not have buffers (for | 2089 | * If the page does not have buffers (for |
2087 | * whatever reason), try to create them using | 2090 | * whatever reason), try to create them using |
2088 | * __block_write_begin. If this fails, | 2091 | * __block_write_begin. If this fails, |
2089 | * redirty the page and move on. | 2092 | * skip the page and move on. |
2090 | */ | 2093 | */ |
2091 | if (!page_has_buffers(page)) { | 2094 | if (!page_has_buffers(page)) { |
2092 | if (__block_write_begin(page, 0, len, | 2095 | if (__block_write_begin(page, 0, len, |
2093 | noalloc_get_block_write)) { | 2096 | noalloc_get_block_write)) { |
2094 | redirty_page: | 2097 | skip_page: |
2095 | redirty_page_for_writepage(mpd->wbc, | ||
2096 | page); | ||
2097 | unlock_page(page); | 2098 | unlock_page(page); |
2098 | continue; | 2099 | continue; |
2099 | } | 2100 | } |
@@ -2104,7 +2105,7 @@ static int mpage_da_submit_io(struct mpage_da_data *mpd, | |||
2104 | block_start = 0; | 2105 | block_start = 0; |
2105 | do { | 2106 | do { |
2106 | if (!bh) | 2107 | if (!bh) |
2107 | goto redirty_page; | 2108 | goto skip_page; |
2108 | if (map && (cur_logical >= map->m_lblk) && | 2109 | if (map && (cur_logical >= map->m_lblk) && |
2109 | (cur_logical <= (map->m_lblk + | 2110 | (cur_logical <= (map->m_lblk + |
2110 | (map->m_len - 1)))) { | 2111 | (map->m_len - 1)))) { |
@@ -2120,22 +2121,23 @@ static int mpage_da_submit_io(struct mpage_da_data *mpd, | |||
2120 | clear_buffer_unwritten(bh); | 2121 | clear_buffer_unwritten(bh); |
2121 | } | 2122 | } |
2122 | 2123 | ||
2123 | /* redirty page if block allocation undone */ | 2124 | /* skip page if block allocation undone */ |
2124 | if (buffer_delay(bh) || buffer_unwritten(bh)) | 2125 | if (buffer_delay(bh) || buffer_unwritten(bh)) |
2125 | redirty_page = 1; | 2126 | skip_page = 1; |
2126 | bh = bh->b_this_page; | 2127 | bh = bh->b_this_page; |
2127 | block_start += bh->b_size; | 2128 | block_start += bh->b_size; |
2128 | cur_logical++; | 2129 | cur_logical++; |
2129 | pblock++; | 2130 | pblock++; |
2130 | } while (bh != page_bufs); | 2131 | } while (bh != page_bufs); |
2131 | 2132 | ||
2132 | if (redirty_page) | 2133 | if (skip_page) |
2133 | goto redirty_page; | 2134 | goto skip_page; |
2134 | 2135 | ||
2135 | if (commit_write) | 2136 | if (commit_write) |
2136 | /* mark the buffer_heads as dirty & uptodate */ | 2137 | /* mark the buffer_heads as dirty & uptodate */ |
2137 | block_commit_write(page, 0, len); | 2138 | block_commit_write(page, 0, len); |
2138 | 2139 | ||
2140 | clear_page_dirty_for_io(page); | ||
2139 | /* | 2141 | /* |
2140 | * Delalloc doesn't support data journalling, | 2142 | * Delalloc doesn't support data journalling, |
2141 | * but eventually maybe we'll lift this | 2143 | * but eventually maybe we'll lift this |
@@ -2165,8 +2167,7 @@ static int mpage_da_submit_io(struct mpage_da_data *mpd, | |||
2165 | return ret; | 2167 | return ret; |
2166 | } | 2168 | } |
2167 | 2169 | ||
2168 | static void ext4_da_block_invalidatepages(struct mpage_da_data *mpd, | 2170 | static void ext4_da_block_invalidatepages(struct mpage_da_data *mpd) |
2169 | sector_t logical, long blk_cnt) | ||
2170 | { | 2171 | { |
2171 | int nr_pages, i; | 2172 | int nr_pages, i; |
2172 | pgoff_t index, end; | 2173 | pgoff_t index, end; |
@@ -2174,9 +2175,8 @@ static void ext4_da_block_invalidatepages(struct mpage_da_data *mpd, | |||
2174 | struct inode *inode = mpd->inode; | 2175 | struct inode *inode = mpd->inode; |
2175 | struct address_space *mapping = inode->i_mapping; | 2176 | struct address_space *mapping = inode->i_mapping; |
2176 | 2177 | ||
2177 | index = logical >> (PAGE_CACHE_SHIFT - inode->i_blkbits); | 2178 | index = mpd->first_page; |
2178 | end = (logical + blk_cnt - 1) >> | 2179 | end = mpd->next_page - 1; |
2179 | (PAGE_CACHE_SHIFT - inode->i_blkbits); | ||
2180 | while (index <= end) { | 2180 | while (index <= end) { |
2181 | nr_pages = pagevec_lookup(&pvec, mapping, index, PAGEVEC_SIZE); | 2181 | nr_pages = pagevec_lookup(&pvec, mapping, index, PAGEVEC_SIZE); |
2182 | if (nr_pages == 0) | 2182 | if (nr_pages == 0) |
@@ -2279,9 +2279,8 @@ static void mpage_da_map_and_submit(struct mpage_da_data *mpd) | |||
2279 | err = blks; | 2279 | err = blks; |
2280 | /* | 2280 | /* |
2281 | * If get block returns EAGAIN or ENOSPC and there | 2281 | * If get block returns EAGAIN or ENOSPC and there |
2282 | * appears to be free blocks we will call | 2282 | * appears to be free blocks we will just let |
2283 | * ext4_writepage() for all of the pages which will | 2283 | * mpage_da_submit_io() unlock all of the pages. |
2284 | * just redirty the pages. | ||
2285 | */ | 2284 | */ |
2286 | if (err == -EAGAIN) | 2285 | if (err == -EAGAIN) |
2287 | goto submit_io; | 2286 | goto submit_io; |
@@ -2312,8 +2311,10 @@ static void mpage_da_map_and_submit(struct mpage_da_data *mpd) | |||
2312 | ext4_print_free_blocks(mpd->inode); | 2311 | ext4_print_free_blocks(mpd->inode); |
2313 | } | 2312 | } |
2314 | /* invalidate all the pages */ | 2313 | /* invalidate all the pages */ |
2315 | ext4_da_block_invalidatepages(mpd, next, | 2314 | ext4_da_block_invalidatepages(mpd); |
2316 | mpd->b_size >> mpd->inode->i_blkbits); | 2315 | |
2316 | /* Mark this page range as having been completed */ | ||
2317 | mpd->io_done = 1; | ||
2317 | return; | 2318 | return; |
2318 | } | 2319 | } |
2319 | BUG_ON(blks == 0); | 2320 | BUG_ON(blks == 0); |
@@ -2438,102 +2439,6 @@ static int ext4_bh_delay_or_unwritten(handle_t *handle, struct buffer_head *bh) | |||
2438 | } | 2439 | } |
2439 | 2440 | ||
2440 | /* | 2441 | /* |
2441 | * __mpage_da_writepage - finds extent of pages and blocks | ||
2442 | * | ||
2443 | * @page: page to consider | ||
2444 | * @wbc: not used, we just follow rules | ||
2445 | * @data: context | ||
2446 | * | ||
2447 | * The function finds extents of pages and scan them for all blocks. | ||
2448 | */ | ||
2449 | static int __mpage_da_writepage(struct page *page, | ||
2450 | struct writeback_control *wbc, | ||
2451 | struct mpage_da_data *mpd) | ||
2452 | { | ||
2453 | struct inode *inode = mpd->inode; | ||
2454 | struct buffer_head *bh, *head; | ||
2455 | sector_t logical; | ||
2456 | |||
2457 | /* | ||
2458 | * Can we merge this page to current extent? | ||
2459 | */ | ||
2460 | if (mpd->next_page != page->index) { | ||
2461 | /* | ||
2462 | * Nope, we can't. So, we map non-allocated blocks | ||
2463 | * and start IO on them | ||
2464 | */ | ||
2465 | if (mpd->next_page != mpd->first_page) { | ||
2466 | mpage_da_map_and_submit(mpd); | ||
2467 | /* | ||
2468 | * skip rest of the page in the page_vec | ||
2469 | */ | ||
2470 | redirty_page_for_writepage(wbc, page); | ||
2471 | unlock_page(page); | ||
2472 | return MPAGE_DA_EXTENT_TAIL; | ||
2473 | } | ||
2474 | |||
2475 | /* | ||
2476 | * Start next extent of pages ... | ||
2477 | */ | ||
2478 | mpd->first_page = page->index; | ||
2479 | |||
2480 | /* | ||
2481 | * ... and blocks | ||
2482 | */ | ||
2483 | mpd->b_size = 0; | ||
2484 | mpd->b_state = 0; | ||
2485 | mpd->b_blocknr = 0; | ||
2486 | } | ||
2487 | |||
2488 | mpd->next_page = page->index + 1; | ||
2489 | logical = (sector_t) page->index << | ||
2490 | (PAGE_CACHE_SHIFT - inode->i_blkbits); | ||
2491 | |||
2492 | if (!page_has_buffers(page)) { | ||
2493 | mpage_add_bh_to_extent(mpd, logical, PAGE_CACHE_SIZE, | ||
2494 | (1 << BH_Dirty) | (1 << BH_Uptodate)); | ||
2495 | if (mpd->io_done) | ||
2496 | return MPAGE_DA_EXTENT_TAIL; | ||
2497 | } else { | ||
2498 | /* | ||
2499 | * Page with regular buffer heads, just add all dirty ones | ||
2500 | */ | ||
2501 | head = page_buffers(page); | ||
2502 | bh = head; | ||
2503 | do { | ||
2504 | BUG_ON(buffer_locked(bh)); | ||
2505 | /* | ||
2506 | * We need to try to allocate | ||
2507 | * unmapped blocks in the same page. | ||
2508 | * Otherwise we won't make progress | ||
2509 | * with the page in ext4_writepage | ||
2510 | */ | ||
2511 | if (ext4_bh_delay_or_unwritten(NULL, bh)) { | ||
2512 | mpage_add_bh_to_extent(mpd, logical, | ||
2513 | bh->b_size, | ||
2514 | bh->b_state); | ||
2515 | if (mpd->io_done) | ||
2516 | return MPAGE_DA_EXTENT_TAIL; | ||
2517 | } else if (buffer_dirty(bh) && (buffer_mapped(bh))) { | ||
2518 | /* | ||
2519 | * mapped dirty buffer. We need to update | ||
2520 | * the b_state because we look at | ||
2521 | * b_state in mpage_da_map_blocks. We don't | ||
2522 | * update b_size because if we find an | ||
2523 | * unmapped buffer_head later we need to | ||
2524 | * use the b_state flag of that buffer_head. | ||
2525 | */ | ||
2526 | if (mpd->b_size == 0) | ||
2527 | mpd->b_state = bh->b_state & BH_FLAGS; | ||
2528 | } | ||
2529 | logical++; | ||
2530 | } while ((bh = bh->b_this_page) != head); | ||
2531 | } | ||
2532 | |||
2533 | return 0; | ||
2534 | } | ||
2535 | |||
2536 | /* | ||
2537 | * This is a special get_blocks_t callback which is used by | 2442 | * This is a special get_blocks_t callback which is used by |
2538 | * ext4_da_write_begin(). It will either return mapped block or | 2443 | * ext4_da_write_begin(). It will either return mapped block or |
2539 | * reserve space for a single block. | 2444 | * reserve space for a single block. |
@@ -2597,7 +2502,6 @@ static int ext4_da_get_block_prep(struct inode *inode, sector_t iblock, | |||
2597 | * for partial write. | 2502 | * for partial write. |
2598 | */ | 2503 | */ |
2599 | set_buffer_new(bh); | 2504 | set_buffer_new(bh); |
2600 | set_buffer_mapped(bh); | ||
2601 | } | 2505 | } |
2602 | return 0; | 2506 | return 0; |
2603 | } | 2507 | } |
@@ -2811,27 +2715,27 @@ static int ext4_da_writepages_trans_blocks(struct inode *inode) | |||
2811 | 2715 | ||
2812 | /* | 2716 | /* |
2813 | * write_cache_pages_da - walk the list of dirty pages of the given | 2717 | * write_cache_pages_da - walk the list of dirty pages of the given |
2814 | * address space and call the callback function (which usually writes | 2718 | * address space and accumulate pages that need writing, and call |
2815 | * the pages). | 2719 | * mpage_da_map_and_submit to map a single contiguous memory region |
2816 | * | 2720 | * and then write them. |
2817 | * This is a forked version of write_cache_pages(). Differences: | ||
2818 | * Range cyclic is ignored. | ||
2819 | * no_nrwrite_index_update is always presumed true | ||
2820 | */ | 2721 | */ |
2821 | static int write_cache_pages_da(struct address_space *mapping, | 2722 | static int write_cache_pages_da(struct address_space *mapping, |
2822 | struct writeback_control *wbc, | 2723 | struct writeback_control *wbc, |
2823 | struct mpage_da_data *mpd, | 2724 | struct mpage_da_data *mpd, |
2824 | pgoff_t *done_index) | 2725 | pgoff_t *done_index) |
2825 | { | 2726 | { |
2826 | int ret = 0; | 2727 | struct buffer_head *bh, *head; |
2827 | int done = 0; | 2728 | struct inode *inode = mapping->host; |
2828 | struct pagevec pvec; | 2729 | struct pagevec pvec; |
2829 | unsigned nr_pages; | 2730 | unsigned int nr_pages; |
2830 | pgoff_t index; | 2731 | sector_t logical; |
2831 | pgoff_t end; /* Inclusive */ | 2732 | pgoff_t index, end; |
2832 | long nr_to_write = wbc->nr_to_write; | 2733 | long nr_to_write = wbc->nr_to_write; |
2833 | int tag; | 2734 | int i, tag, ret = 0; |
2834 | 2735 | ||
2736 | memset(mpd, 0, sizeof(struct mpage_da_data)); | ||
2737 | mpd->wbc = wbc; | ||
2738 | mpd->inode = inode; | ||
2835 | pagevec_init(&pvec, 0); | 2739 | pagevec_init(&pvec, 0); |
2836 | index = wbc->range_start >> PAGE_CACHE_SHIFT; | 2740 | index = wbc->range_start >> PAGE_CACHE_SHIFT; |
2837 | end = wbc->range_end >> PAGE_CACHE_SHIFT; | 2741 | end = wbc->range_end >> PAGE_CACHE_SHIFT; |
@@ -2842,13 +2746,11 @@ static int write_cache_pages_da(struct address_space *mapping, | |||
2842 | tag = PAGECACHE_TAG_DIRTY; | 2746 | tag = PAGECACHE_TAG_DIRTY; |
2843 | 2747 | ||
2844 | *done_index = index; | 2748 | *done_index = index; |
2845 | while (!done && (index <= end)) { | 2749 | while (index <= end) { |
2846 | int i; | ||
2847 | |||
2848 | nr_pages = pagevec_lookup_tag(&pvec, mapping, &index, tag, | 2750 | nr_pages = pagevec_lookup_tag(&pvec, mapping, &index, tag, |
2849 | min(end - index, (pgoff_t)PAGEVEC_SIZE-1) + 1); | 2751 | min(end - index, (pgoff_t)PAGEVEC_SIZE-1) + 1); |
2850 | if (nr_pages == 0) | 2752 | if (nr_pages == 0) |
2851 | break; | 2753 | return 0; |
2852 | 2754 | ||
2853 | for (i = 0; i < nr_pages; i++) { | 2755 | for (i = 0; i < nr_pages; i++) { |
2854 | struct page *page = pvec.pages[i]; | 2756 | struct page *page = pvec.pages[i]; |
@@ -2860,60 +2762,100 @@ static int write_cache_pages_da(struct address_space *mapping, | |||
2860 | * mapping. However, page->index will not change | 2762 | * mapping. However, page->index will not change |
2861 | * because we have a reference on the page. | 2763 | * because we have a reference on the page. |
2862 | */ | 2764 | */ |
2863 | if (page->index > end) { | 2765 | if (page->index > end) |
2864 | done = 1; | 2766 | goto out; |
2865 | break; | ||
2866 | } | ||
2867 | 2767 | ||
2868 | *done_index = page->index + 1; | 2768 | *done_index = page->index + 1; |
2869 | 2769 | ||
2770 | /* | ||
2771 | * If we can't merge this page, and we have | ||
2772 | * accumulated an contiguous region, write it | ||
2773 | */ | ||
2774 | if ((mpd->next_page != page->index) && | ||
2775 | (mpd->next_page != mpd->first_page)) { | ||
2776 | mpage_da_map_and_submit(mpd); | ||
2777 | goto ret_extent_tail; | ||
2778 | } | ||
2779 | |||
2870 | lock_page(page); | 2780 | lock_page(page); |
2871 | 2781 | ||
2872 | /* | 2782 | /* |
2873 | * Page truncated or invalidated. We can freely skip it | 2783 | * If the page is no longer dirty, or its |
2874 | * then, even for data integrity operations: the page | 2784 | * mapping no longer corresponds to inode we |
2875 | * has disappeared concurrently, so there could be no | 2785 | * are writing (which means it has been |
2876 | * real expectation of this data interity operation | 2786 | * truncated or invalidated), or the page is |
2877 | * even if there is now a new, dirty page at the same | 2787 | * already under writeback and we are not |
2878 | * pagecache address. | 2788 | * doing a data integrity writeback, skip the page |
2879 | */ | 2789 | */ |
2880 | if (unlikely(page->mapping != mapping)) { | 2790 | if (!PageDirty(page) || |
2881 | continue_unlock: | 2791 | (PageWriteback(page) && |
2792 | (wbc->sync_mode == WB_SYNC_NONE)) || | ||
2793 | unlikely(page->mapping != mapping)) { | ||
2882 | unlock_page(page); | 2794 | unlock_page(page); |
2883 | continue; | 2795 | continue; |
2884 | } | 2796 | } |
2885 | 2797 | ||
2886 | if (!PageDirty(page)) { | 2798 | if (PageWriteback(page)) |
2887 | /* someone wrote it for us */ | 2799 | wait_on_page_writeback(page); |
2888 | goto continue_unlock; | ||
2889 | } | ||
2890 | |||
2891 | if (PageWriteback(page)) { | ||
2892 | if (wbc->sync_mode != WB_SYNC_NONE) | ||
2893 | wait_on_page_writeback(page); | ||
2894 | else | ||
2895 | goto continue_unlock; | ||
2896 | } | ||
2897 | 2800 | ||
2898 | BUG_ON(PageWriteback(page)); | 2801 | BUG_ON(PageWriteback(page)); |
2899 | if (!clear_page_dirty_for_io(page)) | ||
2900 | goto continue_unlock; | ||
2901 | 2802 | ||
2902 | ret = __mpage_da_writepage(page, wbc, mpd); | 2803 | if (mpd->next_page != page->index) |
2903 | if (unlikely(ret)) { | 2804 | mpd->first_page = page->index; |
2904 | if (ret == AOP_WRITEPAGE_ACTIVATE) { | 2805 | mpd->next_page = page->index + 1; |
2905 | unlock_page(page); | 2806 | logical = (sector_t) page->index << |
2906 | ret = 0; | 2807 | (PAGE_CACHE_SHIFT - inode->i_blkbits); |
2907 | } else { | 2808 | |
2908 | done = 1; | 2809 | if (!page_has_buffers(page)) { |
2909 | break; | 2810 | mpage_add_bh_to_extent(mpd, logical, |
2910 | } | 2811 | PAGE_CACHE_SIZE, |
2812 | (1 << BH_Dirty) | (1 << BH_Uptodate)); | ||
2813 | if (mpd->io_done) | ||
2814 | goto ret_extent_tail; | ||
2815 | } else { | ||
2816 | /* | ||
2817 | * Page with regular buffer heads, | ||
2818 | * just add all dirty ones | ||
2819 | */ | ||
2820 | head = page_buffers(page); | ||
2821 | bh = head; | ||
2822 | do { | ||
2823 | BUG_ON(buffer_locked(bh)); | ||
2824 | /* | ||
2825 | * We need to try to allocate | ||
2826 | * unmapped blocks in the same page. | ||
2827 | * Otherwise we won't make progress | ||
2828 | * with the page in ext4_writepage | ||
2829 | */ | ||
2830 | if (ext4_bh_delay_or_unwritten(NULL, bh)) { | ||
2831 | mpage_add_bh_to_extent(mpd, logical, | ||
2832 | bh->b_size, | ||
2833 | bh->b_state); | ||
2834 | if (mpd->io_done) | ||
2835 | goto ret_extent_tail; | ||
2836 | } else if (buffer_dirty(bh) && (buffer_mapped(bh))) { | ||
2837 | /* | ||
2838 | * mapped dirty buffer. We need | ||
2839 | * to update the b_state | ||
2840 | * because we look at b_state | ||
2841 | * in mpage_da_map_blocks. We | ||
2842 | * don't update b_size because | ||
2843 | * if we find an unmapped | ||
2844 | * buffer_head later we need to | ||
2845 | * use the b_state flag of that | ||
2846 | * buffer_head. | ||
2847 | */ | ||
2848 | if (mpd->b_size == 0) | ||
2849 | mpd->b_state = bh->b_state & BH_FLAGS; | ||
2850 | } | ||
2851 | logical++; | ||
2852 | } while ((bh = bh->b_this_page) != head); | ||
2911 | } | 2853 | } |
2912 | 2854 | ||
2913 | if (nr_to_write > 0) { | 2855 | if (nr_to_write > 0) { |
2914 | nr_to_write--; | 2856 | nr_to_write--; |
2915 | if (nr_to_write == 0 && | 2857 | if (nr_to_write == 0 && |
2916 | wbc->sync_mode == WB_SYNC_NONE) { | 2858 | wbc->sync_mode == WB_SYNC_NONE) |
2917 | /* | 2859 | /* |
2918 | * We stop writing back only if we are | 2860 | * We stop writing back only if we are |
2919 | * not doing integrity sync. In case of | 2861 | * not doing integrity sync. In case of |
@@ -2924,14 +2866,18 @@ continue_unlock: | |||
2924 | * pages, but have not synced all of the | 2866 | * pages, but have not synced all of the |
2925 | * old dirty pages. | 2867 | * old dirty pages. |
2926 | */ | 2868 | */ |
2927 | done = 1; | 2869 | goto out; |
2928 | break; | ||
2929 | } | ||
2930 | } | 2870 | } |
2931 | } | 2871 | } |
2932 | pagevec_release(&pvec); | 2872 | pagevec_release(&pvec); |
2933 | cond_resched(); | 2873 | cond_resched(); |
2934 | } | 2874 | } |
2875 | return 0; | ||
2876 | ret_extent_tail: | ||
2877 | ret = MPAGE_DA_EXTENT_TAIL; | ||
2878 | out: | ||
2879 | pagevec_release(&pvec); | ||
2880 | cond_resched(); | ||
2935 | return ret; | 2881 | return ret; |
2936 | } | 2882 | } |
2937 | 2883 | ||
@@ -2945,7 +2891,6 @@ static int ext4_da_writepages(struct address_space *mapping, | |||
2945 | struct mpage_da_data mpd; | 2891 | struct mpage_da_data mpd; |
2946 | struct inode *inode = mapping->host; | 2892 | struct inode *inode = mapping->host; |
2947 | int pages_written = 0; | 2893 | int pages_written = 0; |
2948 | long pages_skipped; | ||
2949 | unsigned int max_pages; | 2894 | unsigned int max_pages; |
2950 | int range_cyclic, cycled = 1, io_done = 0; | 2895 | int range_cyclic, cycled = 1, io_done = 0; |
2951 | int needed_blocks, ret = 0; | 2896 | int needed_blocks, ret = 0; |
@@ -3028,11 +2973,6 @@ static int ext4_da_writepages(struct address_space *mapping, | |||
3028 | wbc->nr_to_write = desired_nr_to_write; | 2973 | wbc->nr_to_write = desired_nr_to_write; |
3029 | } | 2974 | } |
3030 | 2975 | ||
3031 | mpd.wbc = wbc; | ||
3032 | mpd.inode = mapping->host; | ||
3033 | |||
3034 | pages_skipped = wbc->pages_skipped; | ||
3035 | |||
3036 | retry: | 2976 | retry: |
3037 | if (wbc->sync_mode == WB_SYNC_ALL) | 2977 | if (wbc->sync_mode == WB_SYNC_ALL) |
3038 | tag_pages_for_writeback(mapping, index, end); | 2978 | tag_pages_for_writeback(mapping, index, end); |
@@ -3059,22 +2999,10 @@ retry: | |||
3059 | } | 2999 | } |
3060 | 3000 | ||
3061 | /* | 3001 | /* |
3062 | * Now call __mpage_da_writepage to find the next | 3002 | * Now call write_cache_pages_da() to find the next |
3063 | * contiguous region of logical blocks that need | 3003 | * contiguous region of logical blocks that need |
3064 | * blocks to be allocated by ext4. We don't actually | 3004 | * blocks to be allocated by ext4 and submit them. |
3065 | * submit the blocks for I/O here, even though | ||
3066 | * write_cache_pages thinks it will, and will set the | ||
3067 | * pages as clean for write before calling | ||
3068 | * __mpage_da_writepage(). | ||
3069 | */ | 3005 | */ |
3070 | mpd.b_size = 0; | ||
3071 | mpd.b_state = 0; | ||
3072 | mpd.b_blocknr = 0; | ||
3073 | mpd.first_page = 0; | ||
3074 | mpd.next_page = 0; | ||
3075 | mpd.io_done = 0; | ||
3076 | mpd.pages_written = 0; | ||
3077 | mpd.retval = 0; | ||
3078 | ret = write_cache_pages_da(mapping, wbc, &mpd, &done_index); | 3006 | ret = write_cache_pages_da(mapping, wbc, &mpd, &done_index); |
3079 | /* | 3007 | /* |
3080 | * If we have a contiguous extent of pages and we | 3008 | * If we have a contiguous extent of pages and we |
@@ -3096,7 +3024,6 @@ retry: | |||
3096 | * and try again | 3024 | * and try again |
3097 | */ | 3025 | */ |
3098 | jbd2_journal_force_commit_nested(sbi->s_journal); | 3026 | jbd2_journal_force_commit_nested(sbi->s_journal); |
3099 | wbc->pages_skipped = pages_skipped; | ||
3100 | ret = 0; | 3027 | ret = 0; |
3101 | } else if (ret == MPAGE_DA_EXTENT_TAIL) { | 3028 | } else if (ret == MPAGE_DA_EXTENT_TAIL) { |
3102 | /* | 3029 | /* |
@@ -3104,7 +3031,6 @@ retry: | |||
3104 | * rest of the pages | 3031 | * rest of the pages |
3105 | */ | 3032 | */ |
3106 | pages_written += mpd.pages_written; | 3033 | pages_written += mpd.pages_written; |
3107 | wbc->pages_skipped = pages_skipped; | ||
3108 | ret = 0; | 3034 | ret = 0; |
3109 | io_done = 1; | 3035 | io_done = 1; |
3110 | } else if (wbc->nr_to_write) | 3036 | } else if (wbc->nr_to_write) |
@@ -3122,11 +3048,6 @@ retry: | |||
3122 | wbc->range_end = mapping->writeback_index - 1; | 3048 | wbc->range_end = mapping->writeback_index - 1; |
3123 | goto retry; | 3049 | goto retry; |
3124 | } | 3050 | } |
3125 | if (pages_skipped != wbc->pages_skipped) | ||
3126 | ext4_msg(inode->i_sb, KERN_CRIT, | ||
3127 | "This should not happen leaving %s " | ||
3128 | "with nr_to_write = %ld ret = %d", | ||
3129 | __func__, wbc->nr_to_write, ret); | ||
3130 | 3051 | ||
3131 | /* Update index */ | 3052 | /* Update index */ |
3132 | wbc->range_cyclic = range_cyclic; | 3053 | wbc->range_cyclic = range_cyclic; |
@@ -3460,6 +3381,7 @@ static sector_t ext4_bmap(struct address_space *mapping, sector_t block) | |||
3460 | 3381 | ||
3461 | static int ext4_readpage(struct file *file, struct page *page) | 3382 | static int ext4_readpage(struct file *file, struct page *page) |
3462 | { | 3383 | { |
3384 | trace_ext4_readpage(page); | ||
3463 | return mpage_readpage(page, ext4_get_block); | 3385 | return mpage_readpage(page, ext4_get_block); |
3464 | } | 3386 | } |
3465 | 3387 | ||
@@ -3494,6 +3416,8 @@ static void ext4_invalidatepage(struct page *page, unsigned long offset) | |||
3494 | { | 3416 | { |
3495 | journal_t *journal = EXT4_JOURNAL(page->mapping->host); | 3417 | journal_t *journal = EXT4_JOURNAL(page->mapping->host); |
3496 | 3418 | ||
3419 | trace_ext4_invalidatepage(page, offset); | ||
3420 | |||
3497 | /* | 3421 | /* |
3498 | * free any io_end structure allocated for buffers to be discarded | 3422 | * free any io_end structure allocated for buffers to be discarded |
3499 | */ | 3423 | */ |
@@ -3515,6 +3439,8 @@ static int ext4_releasepage(struct page *page, gfp_t wait) | |||
3515 | { | 3439 | { |
3516 | journal_t *journal = EXT4_JOURNAL(page->mapping->host); | 3440 | journal_t *journal = EXT4_JOURNAL(page->mapping->host); |
3517 | 3441 | ||
3442 | trace_ext4_releasepage(page); | ||
3443 | |||
3518 | WARN_ON(PageChecked(page)); | 3444 | WARN_ON(PageChecked(page)); |
3519 | if (!page_has_buffers(page)) | 3445 | if (!page_has_buffers(page)) |
3520 | return 0; | 3446 | return 0; |
@@ -3873,11 +3799,16 @@ static ssize_t ext4_direct_IO(int rw, struct kiocb *iocb, | |||
3873 | { | 3799 | { |
3874 | struct file *file = iocb->ki_filp; | 3800 | struct file *file = iocb->ki_filp; |
3875 | struct inode *inode = file->f_mapping->host; | 3801 | struct inode *inode = file->f_mapping->host; |
3802 | ssize_t ret; | ||
3876 | 3803 | ||
3804 | trace_ext4_direct_IO_enter(inode, offset, iov_length(iov, nr_segs), rw); | ||
3877 | if (ext4_test_inode_flag(inode, EXT4_INODE_EXTENTS)) | 3805 | if (ext4_test_inode_flag(inode, EXT4_INODE_EXTENTS)) |
3878 | return ext4_ext_direct_IO(rw, iocb, iov, offset, nr_segs); | 3806 | ret = ext4_ext_direct_IO(rw, iocb, iov, offset, nr_segs); |
3879 | 3807 | else | |
3880 | return ext4_ind_direct_IO(rw, iocb, iov, offset, nr_segs); | 3808 | ret = ext4_ind_direct_IO(rw, iocb, iov, offset, nr_segs); |
3809 | trace_ext4_direct_IO_exit(inode, offset, | ||
3810 | iov_length(iov, nr_segs), rw, ret); | ||
3811 | return ret; | ||
3881 | } | 3812 | } |
3882 | 3813 | ||
3883 | /* | 3814 | /* |
@@ -3903,7 +3834,6 @@ static const struct address_space_operations ext4_ordered_aops = { | |||
3903 | .readpage = ext4_readpage, | 3834 | .readpage = ext4_readpage, |
3904 | .readpages = ext4_readpages, | 3835 | .readpages = ext4_readpages, |
3905 | .writepage = ext4_writepage, | 3836 | .writepage = ext4_writepage, |
3906 | .sync_page = block_sync_page, | ||
3907 | .write_begin = ext4_write_begin, | 3837 | .write_begin = ext4_write_begin, |
3908 | .write_end = ext4_ordered_write_end, | 3838 | .write_end = ext4_ordered_write_end, |
3909 | .bmap = ext4_bmap, | 3839 | .bmap = ext4_bmap, |
@@ -3919,7 +3849,6 @@ static const struct address_space_operations ext4_writeback_aops = { | |||
3919 | .readpage = ext4_readpage, | 3849 | .readpage = ext4_readpage, |
3920 | .readpages = ext4_readpages, | 3850 | .readpages = ext4_readpages, |
3921 | .writepage = ext4_writepage, | 3851 | .writepage = ext4_writepage, |
3922 | .sync_page = block_sync_page, | ||
3923 | .write_begin = ext4_write_begin, | 3852 | .write_begin = ext4_write_begin, |
3924 | .write_end = ext4_writeback_write_end, | 3853 | .write_end = ext4_writeback_write_end, |
3925 | .bmap = ext4_bmap, | 3854 | .bmap = ext4_bmap, |
@@ -3935,7 +3864,6 @@ static const struct address_space_operations ext4_journalled_aops = { | |||
3935 | .readpage = ext4_readpage, | 3864 | .readpage = ext4_readpage, |
3936 | .readpages = ext4_readpages, | 3865 | .readpages = ext4_readpages, |
3937 | .writepage = ext4_writepage, | 3866 | .writepage = ext4_writepage, |
3938 | .sync_page = block_sync_page, | ||
3939 | .write_begin = ext4_write_begin, | 3867 | .write_begin = ext4_write_begin, |
3940 | .write_end = ext4_journalled_write_end, | 3868 | .write_end = ext4_journalled_write_end, |
3941 | .set_page_dirty = ext4_journalled_set_page_dirty, | 3869 | .set_page_dirty = ext4_journalled_set_page_dirty, |
@@ -3951,7 +3879,6 @@ static const struct address_space_operations ext4_da_aops = { | |||
3951 | .readpages = ext4_readpages, | 3879 | .readpages = ext4_readpages, |
3952 | .writepage = ext4_writepage, | 3880 | .writepage = ext4_writepage, |
3953 | .writepages = ext4_da_writepages, | 3881 | .writepages = ext4_da_writepages, |
3954 | .sync_page = block_sync_page, | ||
3955 | .write_begin = ext4_da_write_begin, | 3882 | .write_begin = ext4_da_write_begin, |
3956 | .write_end = ext4_da_write_end, | 3883 | .write_end = ext4_da_write_end, |
3957 | .bmap = ext4_bmap, | 3884 | .bmap = ext4_bmap, |
@@ -4177,6 +4104,9 @@ no_top: | |||
4177 | * | 4104 | * |
4178 | * We release `count' blocks on disk, but (last - first) may be greater | 4105 | * We release `count' blocks on disk, but (last - first) may be greater |
4179 | * than `count' because there can be holes in there. | 4106 | * than `count' because there can be holes in there. |
4107 | * | ||
4108 | * Return 0 on success, 1 on invalid block range | ||
4109 | * and < 0 on fatal error. | ||
4180 | */ | 4110 | */ |
4181 | static int ext4_clear_blocks(handle_t *handle, struct inode *inode, | 4111 | static int ext4_clear_blocks(handle_t *handle, struct inode *inode, |
4182 | struct buffer_head *bh, | 4112 | struct buffer_head *bh, |
@@ -4203,33 +4133,32 @@ static int ext4_clear_blocks(handle_t *handle, struct inode *inode, | |||
4203 | if (bh) { | 4133 | if (bh) { |
4204 | BUFFER_TRACE(bh, "call ext4_handle_dirty_metadata"); | 4134 | BUFFER_TRACE(bh, "call ext4_handle_dirty_metadata"); |
4205 | err = ext4_handle_dirty_metadata(handle, inode, bh); | 4135 | err = ext4_handle_dirty_metadata(handle, inode, bh); |
4206 | if (unlikely(err)) { | 4136 | if (unlikely(err)) |
4207 | ext4_std_error(inode->i_sb, err); | 4137 | goto out_err; |
4208 | return 1; | ||
4209 | } | ||
4210 | } | 4138 | } |
4211 | err = ext4_mark_inode_dirty(handle, inode); | 4139 | err = ext4_mark_inode_dirty(handle, inode); |
4212 | if (unlikely(err)) { | 4140 | if (unlikely(err)) |
4213 | ext4_std_error(inode->i_sb, err); | 4141 | goto out_err; |
4214 | return 1; | ||
4215 | } | ||
4216 | err = ext4_truncate_restart_trans(handle, inode, | 4142 | err = ext4_truncate_restart_trans(handle, inode, |
4217 | blocks_for_truncate(inode)); | 4143 | blocks_for_truncate(inode)); |
4218 | if (unlikely(err)) { | 4144 | if (unlikely(err)) |
4219 | ext4_std_error(inode->i_sb, err); | 4145 | goto out_err; |
4220 | return 1; | ||
4221 | } | ||
4222 | if (bh) { | 4146 | if (bh) { |
4223 | BUFFER_TRACE(bh, "retaking write access"); | 4147 | BUFFER_TRACE(bh, "retaking write access"); |
4224 | ext4_journal_get_write_access(handle, bh); | 4148 | err = ext4_journal_get_write_access(handle, bh); |
4149 | if (unlikely(err)) | ||
4150 | goto out_err; | ||
4225 | } | 4151 | } |
4226 | } | 4152 | } |
4227 | 4153 | ||
4228 | for (p = first; p < last; p++) | 4154 | for (p = first; p < last; p++) |
4229 | *p = 0; | 4155 | *p = 0; |
4230 | 4156 | ||
4231 | ext4_free_blocks(handle, inode, 0, block_to_free, count, flags); | 4157 | ext4_free_blocks(handle, inode, NULL, block_to_free, count, flags); |
4232 | return 0; | 4158 | return 0; |
4159 | out_err: | ||
4160 | ext4_std_error(inode->i_sb, err); | ||
4161 | return err; | ||
4233 | } | 4162 | } |
4234 | 4163 | ||
4235 | /** | 4164 | /** |
@@ -4263,7 +4192,7 @@ static void ext4_free_data(handle_t *handle, struct inode *inode, | |||
4263 | ext4_fsblk_t nr; /* Current block # */ | 4192 | ext4_fsblk_t nr; /* Current block # */ |
4264 | __le32 *p; /* Pointer into inode/ind | 4193 | __le32 *p; /* Pointer into inode/ind |
4265 | for current block */ | 4194 | for current block */ |
4266 | int err; | 4195 | int err = 0; |
4267 | 4196 | ||
4268 | if (this_bh) { /* For indirect block */ | 4197 | if (this_bh) { /* For indirect block */ |
4269 | BUFFER_TRACE(this_bh, "get_write_access"); | 4198 | BUFFER_TRACE(this_bh, "get_write_access"); |
@@ -4285,9 +4214,10 @@ static void ext4_free_data(handle_t *handle, struct inode *inode, | |||
4285 | } else if (nr == block_to_free + count) { | 4214 | } else if (nr == block_to_free + count) { |
4286 | count++; | 4215 | count++; |
4287 | } else { | 4216 | } else { |
4288 | if (ext4_clear_blocks(handle, inode, this_bh, | 4217 | err = ext4_clear_blocks(handle, inode, this_bh, |
4289 | block_to_free, count, | 4218 | block_to_free, count, |
4290 | block_to_free_p, p)) | 4219 | block_to_free_p, p); |
4220 | if (err) | ||
4291 | break; | 4221 | break; |
4292 | block_to_free = nr; | 4222 | block_to_free = nr; |
4293 | block_to_free_p = p; | 4223 | block_to_free_p = p; |
@@ -4296,9 +4226,12 @@ static void ext4_free_data(handle_t *handle, struct inode *inode, | |||
4296 | } | 4226 | } |
4297 | } | 4227 | } |
4298 | 4228 | ||
4299 | if (count > 0) | 4229 | if (!err && count > 0) |
4300 | ext4_clear_blocks(handle, inode, this_bh, block_to_free, | 4230 | err = ext4_clear_blocks(handle, inode, this_bh, block_to_free, |
4301 | count, block_to_free_p, p); | 4231 | count, block_to_free_p, p); |
4232 | if (err < 0) | ||
4233 | /* fatal error */ | ||
4234 | return; | ||
4302 | 4235 | ||
4303 | if (this_bh) { | 4236 | if (this_bh) { |
4304 | BUFFER_TRACE(this_bh, "call ext4_handle_dirty_metadata"); | 4237 | BUFFER_TRACE(this_bh, "call ext4_handle_dirty_metadata"); |
@@ -4416,7 +4349,7 @@ static void ext4_free_branches(handle_t *handle, struct inode *inode, | |||
4416 | * transaction where the data blocks are | 4349 | * transaction where the data blocks are |
4417 | * actually freed. | 4350 | * actually freed. |
4418 | */ | 4351 | */ |
4419 | ext4_free_blocks(handle, inode, 0, nr, 1, | 4352 | ext4_free_blocks(handle, inode, NULL, nr, 1, |
4420 | EXT4_FREE_BLOCKS_METADATA| | 4353 | EXT4_FREE_BLOCKS_METADATA| |
4421 | EXT4_FREE_BLOCKS_FORGET); | 4354 | EXT4_FREE_BLOCKS_FORGET); |
4422 | 4355 | ||
@@ -4500,6 +4433,8 @@ void ext4_truncate(struct inode *inode) | |||
4500 | ext4_lblk_t last_block; | 4433 | ext4_lblk_t last_block; |
4501 | unsigned blocksize = inode->i_sb->s_blocksize; | 4434 | unsigned blocksize = inode->i_sb->s_blocksize; |
4502 | 4435 | ||
4436 | trace_ext4_truncate_enter(inode); | ||
4437 | |||
4503 | if (!ext4_can_truncate(inode)) | 4438 | if (!ext4_can_truncate(inode)) |
4504 | return; | 4439 | return; |
4505 | 4440 | ||
@@ -4510,6 +4445,7 @@ void ext4_truncate(struct inode *inode) | |||
4510 | 4445 | ||
4511 | if (ext4_test_inode_flag(inode, EXT4_INODE_EXTENTS)) { | 4446 | if (ext4_test_inode_flag(inode, EXT4_INODE_EXTENTS)) { |
4512 | ext4_ext_truncate(inode); | 4447 | ext4_ext_truncate(inode); |
4448 | trace_ext4_truncate_exit(inode); | ||
4513 | return; | 4449 | return; |
4514 | } | 4450 | } |
4515 | 4451 | ||
@@ -4639,6 +4575,7 @@ out_stop: | |||
4639 | ext4_orphan_del(handle, inode); | 4575 | ext4_orphan_del(handle, inode); |
4640 | 4576 | ||
4641 | ext4_journal_stop(handle); | 4577 | ext4_journal_stop(handle); |
4578 | trace_ext4_truncate_exit(inode); | ||
4642 | } | 4579 | } |
4643 | 4580 | ||
4644 | /* | 4581 | /* |
@@ -4770,6 +4707,7 @@ make_io: | |||
4770 | * has in-inode xattrs, or we don't have this inode in memory. | 4707 | * has in-inode xattrs, or we don't have this inode in memory. |
4771 | * Read the block from disk. | 4708 | * Read the block from disk. |
4772 | */ | 4709 | */ |
4710 | trace_ext4_load_inode(inode); | ||
4773 | get_bh(bh); | 4711 | get_bh(bh); |
4774 | bh->b_end_io = end_buffer_read_sync; | 4712 | bh->b_end_io = end_buffer_read_sync; |
4775 | submit_bh(READ_META, bh); | 4713 | submit_bh(READ_META, bh); |
@@ -4875,7 +4813,7 @@ struct inode *ext4_iget(struct super_block *sb, unsigned long ino) | |||
4875 | return inode; | 4813 | return inode; |
4876 | 4814 | ||
4877 | ei = EXT4_I(inode); | 4815 | ei = EXT4_I(inode); |
4878 | iloc.bh = 0; | 4816 | iloc.bh = NULL; |
4879 | 4817 | ||
4880 | ret = __ext4_get_inode_loc(inode, &iloc, 0); | 4818 | ret = __ext4_get_inode_loc(inode, &iloc, 0); |
4881 | if (ret < 0) | 4819 | if (ret < 0) |
diff --git a/fs/ext4/ioctl.c b/fs/ext4/ioctl.c index eb3bc2fe647e..808c554e773f 100644 --- a/fs/ext4/ioctl.c +++ b/fs/ext4/ioctl.c | |||
@@ -38,7 +38,7 @@ long ext4_ioctl(struct file *filp, unsigned int cmd, unsigned long arg) | |||
38 | unsigned int oldflags; | 38 | unsigned int oldflags; |
39 | unsigned int jflag; | 39 | unsigned int jflag; |
40 | 40 | ||
41 | if (!is_owner_or_cap(inode)) | 41 | if (!inode_owner_or_capable(inode)) |
42 | return -EACCES; | 42 | return -EACCES; |
43 | 43 | ||
44 | if (get_user(flags, (int __user *) arg)) | 44 | if (get_user(flags, (int __user *) arg)) |
@@ -146,7 +146,7 @@ flags_out: | |||
146 | __u32 generation; | 146 | __u32 generation; |
147 | int err; | 147 | int err; |
148 | 148 | ||
149 | if (!is_owner_or_cap(inode)) | 149 | if (!inode_owner_or_capable(inode)) |
150 | return -EPERM; | 150 | return -EPERM; |
151 | 151 | ||
152 | err = mnt_want_write(filp->f_path.mnt); | 152 | err = mnt_want_write(filp->f_path.mnt); |
@@ -298,7 +298,7 @@ mext_out: | |||
298 | case EXT4_IOC_MIGRATE: | 298 | case EXT4_IOC_MIGRATE: |
299 | { | 299 | { |
300 | int err; | 300 | int err; |
301 | if (!is_owner_or_cap(inode)) | 301 | if (!inode_owner_or_capable(inode)) |
302 | return -EACCES; | 302 | return -EACCES; |
303 | 303 | ||
304 | err = mnt_want_write(filp->f_path.mnt); | 304 | err = mnt_want_write(filp->f_path.mnt); |
@@ -320,7 +320,7 @@ mext_out: | |||
320 | case EXT4_IOC_ALLOC_DA_BLKS: | 320 | case EXT4_IOC_ALLOC_DA_BLKS: |
321 | { | 321 | { |
322 | int err; | 322 | int err; |
323 | if (!is_owner_or_cap(inode)) | 323 | if (!inode_owner_or_capable(inode)) |
324 | return -EACCES; | 324 | return -EACCES; |
325 | 325 | ||
326 | err = mnt_want_write(filp->f_path.mnt); | 326 | err = mnt_want_write(filp->f_path.mnt); |
@@ -334,16 +334,22 @@ mext_out: | |||
334 | case FITRIM: | 334 | case FITRIM: |
335 | { | 335 | { |
336 | struct super_block *sb = inode->i_sb; | 336 | struct super_block *sb = inode->i_sb; |
337 | struct request_queue *q = bdev_get_queue(sb->s_bdev); | ||
337 | struct fstrim_range range; | 338 | struct fstrim_range range; |
338 | int ret = 0; | 339 | int ret = 0; |
339 | 340 | ||
340 | if (!capable(CAP_SYS_ADMIN)) | 341 | if (!capable(CAP_SYS_ADMIN)) |
341 | return -EPERM; | 342 | return -EPERM; |
342 | 343 | ||
344 | if (!blk_queue_discard(q)) | ||
345 | return -EOPNOTSUPP; | ||
346 | |||
343 | if (copy_from_user(&range, (struct fstrim_range *)arg, | 347 | if (copy_from_user(&range, (struct fstrim_range *)arg, |
344 | sizeof(range))) | 348 | sizeof(range))) |
345 | return -EFAULT; | 349 | return -EFAULT; |
346 | 350 | ||
351 | range.minlen = max((unsigned int)range.minlen, | ||
352 | q->limits.discard_granularity); | ||
347 | ret = ext4_trim_fs(sb, &range); | 353 | ret = ext4_trim_fs(sb, &range); |
348 | if (ret < 0) | 354 | if (ret < 0) |
349 | return ret; | 355 | return ret; |
@@ -421,6 +427,7 @@ long ext4_compat_ioctl(struct file *file, unsigned int cmd, unsigned long arg) | |||
421 | return err; | 427 | return err; |
422 | } | 428 | } |
423 | case EXT4_IOC_MOVE_EXT: | 429 | case EXT4_IOC_MOVE_EXT: |
430 | case FITRIM: | ||
424 | break; | 431 | break; |
425 | default: | 432 | default: |
426 | return -ENOIOCTLCMD; | 433 | return -ENOIOCTLCMD; |
diff --git a/fs/ext4/mballoc.c b/fs/ext4/mballoc.c index d1fe09aea73d..a5837a837a8b 100644 --- a/fs/ext4/mballoc.c +++ b/fs/ext4/mballoc.c | |||
@@ -432,9 +432,10 @@ static void *mb_find_buddy(struct ext4_buddy *e4b, int order, int *max) | |||
432 | } | 432 | } |
433 | 433 | ||
434 | /* at order 0 we see each particular block */ | 434 | /* at order 0 we see each particular block */ |
435 | *max = 1 << (e4b->bd_blkbits + 3); | 435 | if (order == 0) { |
436 | if (order == 0) | 436 | *max = 1 << (e4b->bd_blkbits + 3); |
437 | return EXT4_MB_BITMAP(e4b); | 437 | return EXT4_MB_BITMAP(e4b); |
438 | } | ||
438 | 439 | ||
439 | bb = EXT4_MB_BUDDY(e4b) + EXT4_SB(e4b->bd_sb)->s_mb_offsets[order]; | 440 | bb = EXT4_MB_BUDDY(e4b) + EXT4_SB(e4b->bd_sb)->s_mb_offsets[order]; |
440 | *max = EXT4_SB(e4b->bd_sb)->s_mb_maxs[order]; | 441 | *max = EXT4_SB(e4b->bd_sb)->s_mb_maxs[order]; |
@@ -616,7 +617,6 @@ static int __mb_check_buddy(struct ext4_buddy *e4b, char *file, | |||
616 | MB_CHECK_ASSERT(e4b->bd_info->bb_fragments == fragments); | 617 | MB_CHECK_ASSERT(e4b->bd_info->bb_fragments == fragments); |
617 | 618 | ||
618 | grp = ext4_get_group_info(sb, e4b->bd_group); | 619 | grp = ext4_get_group_info(sb, e4b->bd_group); |
619 | buddy = mb_find_buddy(e4b, 0, &max); | ||
620 | list_for_each(cur, &grp->bb_prealloc_list) { | 620 | list_for_each(cur, &grp->bb_prealloc_list) { |
621 | ext4_group_t groupnr; | 621 | ext4_group_t groupnr; |
622 | struct ext4_prealloc_space *pa; | 622 | struct ext4_prealloc_space *pa; |
@@ -635,7 +635,12 @@ static int __mb_check_buddy(struct ext4_buddy *e4b, char *file, | |||
635 | #define mb_check_buddy(e4b) | 635 | #define mb_check_buddy(e4b) |
636 | #endif | 636 | #endif |
637 | 637 | ||
638 | /* FIXME!! need more doc */ | 638 | /* |
639 | * Divide blocks started from @first with length @len into | ||
640 | * smaller chunks with power of 2 blocks. | ||
641 | * Clear the bits in bitmap which the blocks of the chunk(s) covered, | ||
642 | * then increase bb_counters[] for corresponded chunk size. | ||
643 | */ | ||
639 | static void ext4_mb_mark_free_simple(struct super_block *sb, | 644 | static void ext4_mb_mark_free_simple(struct super_block *sb, |
640 | void *buddy, ext4_grpblk_t first, ext4_grpblk_t len, | 645 | void *buddy, ext4_grpblk_t first, ext4_grpblk_t len, |
641 | struct ext4_group_info *grp) | 646 | struct ext4_group_info *grp) |
@@ -2381,7 +2386,7 @@ static int ext4_mb_init_backend(struct super_block *sb) | |||
2381 | /* An 8TB filesystem with 64-bit pointers requires a 4096 byte | 2386 | /* An 8TB filesystem with 64-bit pointers requires a 4096 byte |
2382 | * kmalloc. A 128kb malloc should suffice for a 256TB filesystem. | 2387 | * kmalloc. A 128kb malloc should suffice for a 256TB filesystem. |
2383 | * So a two level scheme suffices for now. */ | 2388 | * So a two level scheme suffices for now. */ |
2384 | sbi->s_group_info = kmalloc(array_size, GFP_KERNEL); | 2389 | sbi->s_group_info = kzalloc(array_size, GFP_KERNEL); |
2385 | if (sbi->s_group_info == NULL) { | 2390 | if (sbi->s_group_info == NULL) { |
2386 | printk(KERN_ERR "EXT4-fs: can't allocate buddy meta group\n"); | 2391 | printk(KERN_ERR "EXT4-fs: can't allocate buddy meta group\n"); |
2387 | return -ENOMEM; | 2392 | return -ENOMEM; |
@@ -3208,7 +3213,7 @@ ext4_mb_check_group_pa(ext4_fsblk_t goal_block, | |||
3208 | cur_distance = abs(goal_block - cpa->pa_pstart); | 3213 | cur_distance = abs(goal_block - cpa->pa_pstart); |
3209 | new_distance = abs(goal_block - pa->pa_pstart); | 3214 | new_distance = abs(goal_block - pa->pa_pstart); |
3210 | 3215 | ||
3211 | if (cur_distance < new_distance) | 3216 | if (cur_distance <= new_distance) |
3212 | return cpa; | 3217 | return cpa; |
3213 | 3218 | ||
3214 | /* drop the previous reference */ | 3219 | /* drop the previous reference */ |
@@ -3907,7 +3912,8 @@ static void ext4_mb_show_ac(struct ext4_allocation_context *ac) | |||
3907 | struct super_block *sb = ac->ac_sb; | 3912 | struct super_block *sb = ac->ac_sb; |
3908 | ext4_group_t ngroups, i; | 3913 | ext4_group_t ngroups, i; |
3909 | 3914 | ||
3910 | if (EXT4_SB(sb)->s_mount_flags & EXT4_MF_FS_ABORTED) | 3915 | if (!mb_enable_debug || |
3916 | (EXT4_SB(sb)->s_mount_flags & EXT4_MF_FS_ABORTED)) | ||
3911 | return; | 3917 | return; |
3912 | 3918 | ||
3913 | printk(KERN_ERR "EXT4-fs: Can't allocate:" | 3919 | printk(KERN_ERR "EXT4-fs: Can't allocate:" |
@@ -4753,7 +4759,8 @@ static int ext4_trim_extent(struct super_block *sb, int start, int count, | |||
4753 | * bitmap. Then issue a TRIM command on this extent and free the extent in | 4759 | * bitmap. Then issue a TRIM command on this extent and free the extent in |
4754 | * the group buddy bitmap. This is done until whole group is scanned. | 4760 | * the group buddy bitmap. This is done until whole group is scanned. |
4755 | */ | 4761 | */ |
4756 | ext4_grpblk_t ext4_trim_all_free(struct super_block *sb, struct ext4_buddy *e4b, | 4762 | static ext4_grpblk_t |
4763 | ext4_trim_all_free(struct super_block *sb, struct ext4_buddy *e4b, | ||
4757 | ext4_grpblk_t start, ext4_grpblk_t max, ext4_grpblk_t minblocks) | 4764 | ext4_grpblk_t start, ext4_grpblk_t max, ext4_grpblk_t minblocks) |
4758 | { | 4765 | { |
4759 | void *bitmap; | 4766 | void *bitmap; |
@@ -4863,10 +4870,15 @@ int ext4_trim_fs(struct super_block *sb, struct fstrim_range *range) | |||
4863 | break; | 4870 | break; |
4864 | } | 4871 | } |
4865 | 4872 | ||
4866 | if (len >= EXT4_BLOCKS_PER_GROUP(sb)) | 4873 | /* |
4867 | len -= (EXT4_BLOCKS_PER_GROUP(sb) - first_block); | 4874 | * For all the groups except the last one, last block will |
4868 | else | 4875 | * always be EXT4_BLOCKS_PER_GROUP(sb), so we only need to |
4876 | * change it for the last group in which case start + | ||
4877 | * len < EXT4_BLOCKS_PER_GROUP(sb). | ||
4878 | */ | ||
4879 | if (first_block + len < EXT4_BLOCKS_PER_GROUP(sb)) | ||
4869 | last_block = first_block + len; | 4880 | last_block = first_block + len; |
4881 | len -= last_block - first_block; | ||
4870 | 4882 | ||
4871 | if (e4b.bd_info->bb_free >= minlen) { | 4883 | if (e4b.bd_info->bb_free >= minlen) { |
4872 | cnt = ext4_trim_all_free(sb, &e4b, first_block, | 4884 | cnt = ext4_trim_all_free(sb, &e4b, first_block, |
diff --git a/fs/ext4/mballoc.h b/fs/ext4/mballoc.h index b619322c76f0..22bd4d7f289b 100644 --- a/fs/ext4/mballoc.h +++ b/fs/ext4/mballoc.h | |||
@@ -169,7 +169,7 @@ struct ext4_allocation_context { | |||
169 | /* original request */ | 169 | /* original request */ |
170 | struct ext4_free_extent ac_o_ex; | 170 | struct ext4_free_extent ac_o_ex; |
171 | 171 | ||
172 | /* goal request (after normalization) */ | 172 | /* goal request (normalized ac_o_ex) */ |
173 | struct ext4_free_extent ac_g_ex; | 173 | struct ext4_free_extent ac_g_ex; |
174 | 174 | ||
175 | /* the best found extent */ | 175 | /* the best found extent */ |
diff --git a/fs/ext4/migrate.c b/fs/ext4/migrate.c index b0a126f23c20..d1bafa57f483 100644 --- a/fs/ext4/migrate.c +++ b/fs/ext4/migrate.c | |||
@@ -263,7 +263,7 @@ static int free_dind_blocks(handle_t *handle, | |||
263 | for (i = 0; i < max_entries; i++) { | 263 | for (i = 0; i < max_entries; i++) { |
264 | if (tmp_idata[i]) { | 264 | if (tmp_idata[i]) { |
265 | extend_credit_for_blkdel(handle, inode); | 265 | extend_credit_for_blkdel(handle, inode); |
266 | ext4_free_blocks(handle, inode, 0, | 266 | ext4_free_blocks(handle, inode, NULL, |
267 | le32_to_cpu(tmp_idata[i]), 1, | 267 | le32_to_cpu(tmp_idata[i]), 1, |
268 | EXT4_FREE_BLOCKS_METADATA | | 268 | EXT4_FREE_BLOCKS_METADATA | |
269 | EXT4_FREE_BLOCKS_FORGET); | 269 | EXT4_FREE_BLOCKS_FORGET); |
@@ -271,7 +271,7 @@ static int free_dind_blocks(handle_t *handle, | |||
271 | } | 271 | } |
272 | put_bh(bh); | 272 | put_bh(bh); |
273 | extend_credit_for_blkdel(handle, inode); | 273 | extend_credit_for_blkdel(handle, inode); |
274 | ext4_free_blocks(handle, inode, 0, le32_to_cpu(i_data), 1, | 274 | ext4_free_blocks(handle, inode, NULL, le32_to_cpu(i_data), 1, |
275 | EXT4_FREE_BLOCKS_METADATA | | 275 | EXT4_FREE_BLOCKS_METADATA | |
276 | EXT4_FREE_BLOCKS_FORGET); | 276 | EXT4_FREE_BLOCKS_FORGET); |
277 | return 0; | 277 | return 0; |
@@ -302,7 +302,7 @@ static int free_tind_blocks(handle_t *handle, | |||
302 | } | 302 | } |
303 | put_bh(bh); | 303 | put_bh(bh); |
304 | extend_credit_for_blkdel(handle, inode); | 304 | extend_credit_for_blkdel(handle, inode); |
305 | ext4_free_blocks(handle, inode, 0, le32_to_cpu(i_data), 1, | 305 | ext4_free_blocks(handle, inode, NULL, le32_to_cpu(i_data), 1, |
306 | EXT4_FREE_BLOCKS_METADATA | | 306 | EXT4_FREE_BLOCKS_METADATA | |
307 | EXT4_FREE_BLOCKS_FORGET); | 307 | EXT4_FREE_BLOCKS_FORGET); |
308 | return 0; | 308 | return 0; |
@@ -315,7 +315,7 @@ static int free_ind_block(handle_t *handle, struct inode *inode, __le32 *i_data) | |||
315 | /* ei->i_data[EXT4_IND_BLOCK] */ | 315 | /* ei->i_data[EXT4_IND_BLOCK] */ |
316 | if (i_data[0]) { | 316 | if (i_data[0]) { |
317 | extend_credit_for_blkdel(handle, inode); | 317 | extend_credit_for_blkdel(handle, inode); |
318 | ext4_free_blocks(handle, inode, 0, | 318 | ext4_free_blocks(handle, inode, NULL, |
319 | le32_to_cpu(i_data[0]), 1, | 319 | le32_to_cpu(i_data[0]), 1, |
320 | EXT4_FREE_BLOCKS_METADATA | | 320 | EXT4_FREE_BLOCKS_METADATA | |
321 | EXT4_FREE_BLOCKS_FORGET); | 321 | EXT4_FREE_BLOCKS_FORGET); |
@@ -428,7 +428,7 @@ static int free_ext_idx(handle_t *handle, struct inode *inode, | |||
428 | } | 428 | } |
429 | put_bh(bh); | 429 | put_bh(bh); |
430 | extend_credit_for_blkdel(handle, inode); | 430 | extend_credit_for_blkdel(handle, inode); |
431 | ext4_free_blocks(handle, inode, 0, block, 1, | 431 | ext4_free_blocks(handle, inode, NULL, block, 1, |
432 | EXT4_FREE_BLOCKS_METADATA | EXT4_FREE_BLOCKS_FORGET); | 432 | EXT4_FREE_BLOCKS_METADATA | EXT4_FREE_BLOCKS_FORGET); |
433 | return retval; | 433 | return retval; |
434 | } | 434 | } |
diff --git a/fs/ext4/namei.c b/fs/ext4/namei.c index e781b7ea5630..67fd0b025858 100644 --- a/fs/ext4/namei.c +++ b/fs/ext4/namei.c | |||
@@ -40,6 +40,7 @@ | |||
40 | #include "xattr.h" | 40 | #include "xattr.h" |
41 | #include "acl.h" | 41 | #include "acl.h" |
42 | 42 | ||
43 | #include <trace/events/ext4.h> | ||
43 | /* | 44 | /* |
44 | * define how far ahead to read directories while searching them. | 45 | * define how far ahead to read directories while searching them. |
45 | */ | 46 | */ |
@@ -2183,6 +2184,7 @@ static int ext4_unlink(struct inode *dir, struct dentry *dentry) | |||
2183 | struct ext4_dir_entry_2 *de; | 2184 | struct ext4_dir_entry_2 *de; |
2184 | handle_t *handle; | 2185 | handle_t *handle; |
2185 | 2186 | ||
2187 | trace_ext4_unlink_enter(dir, dentry); | ||
2186 | /* Initialize quotas before so that eventual writes go | 2188 | /* Initialize quotas before so that eventual writes go |
2187 | * in separate transaction */ | 2189 | * in separate transaction */ |
2188 | dquot_initialize(dir); | 2190 | dquot_initialize(dir); |
@@ -2228,6 +2230,7 @@ static int ext4_unlink(struct inode *dir, struct dentry *dentry) | |||
2228 | end_unlink: | 2230 | end_unlink: |
2229 | ext4_journal_stop(handle); | 2231 | ext4_journal_stop(handle); |
2230 | brelse(bh); | 2232 | brelse(bh); |
2233 | trace_ext4_unlink_exit(dentry, retval); | ||
2231 | return retval; | 2234 | return retval; |
2232 | } | 2235 | } |
2233 | 2236 | ||
@@ -2402,6 +2405,10 @@ static int ext4_rename(struct inode *old_dir, struct dentry *old_dentry, | |||
2402 | if (!new_inode && new_dir != old_dir && | 2405 | if (!new_inode && new_dir != old_dir && |
2403 | EXT4_DIR_LINK_MAX(new_dir)) | 2406 | EXT4_DIR_LINK_MAX(new_dir)) |
2404 | goto end_rename; | 2407 | goto end_rename; |
2408 | BUFFER_TRACE(dir_bh, "get_write_access"); | ||
2409 | retval = ext4_journal_get_write_access(handle, dir_bh); | ||
2410 | if (retval) | ||
2411 | goto end_rename; | ||
2405 | } | 2412 | } |
2406 | if (!new_bh) { | 2413 | if (!new_bh) { |
2407 | retval = ext4_add_entry(handle, new_dentry, old_inode); | 2414 | retval = ext4_add_entry(handle, new_dentry, old_inode); |
@@ -2409,7 +2416,9 @@ static int ext4_rename(struct inode *old_dir, struct dentry *old_dentry, | |||
2409 | goto end_rename; | 2416 | goto end_rename; |
2410 | } else { | 2417 | } else { |
2411 | BUFFER_TRACE(new_bh, "get write access"); | 2418 | BUFFER_TRACE(new_bh, "get write access"); |
2412 | ext4_journal_get_write_access(handle, new_bh); | 2419 | retval = ext4_journal_get_write_access(handle, new_bh); |
2420 | if (retval) | ||
2421 | goto end_rename; | ||
2413 | new_de->inode = cpu_to_le32(old_inode->i_ino); | 2422 | new_de->inode = cpu_to_le32(old_inode->i_ino); |
2414 | if (EXT4_HAS_INCOMPAT_FEATURE(new_dir->i_sb, | 2423 | if (EXT4_HAS_INCOMPAT_FEATURE(new_dir->i_sb, |
2415 | EXT4_FEATURE_INCOMPAT_FILETYPE)) | 2424 | EXT4_FEATURE_INCOMPAT_FILETYPE)) |
@@ -2470,8 +2479,6 @@ static int ext4_rename(struct inode *old_dir, struct dentry *old_dentry, | |||
2470 | old_dir->i_ctime = old_dir->i_mtime = ext4_current_time(old_dir); | 2479 | old_dir->i_ctime = old_dir->i_mtime = ext4_current_time(old_dir); |
2471 | ext4_update_dx_flag(old_dir); | 2480 | ext4_update_dx_flag(old_dir); |
2472 | if (dir_bh) { | 2481 | if (dir_bh) { |
2473 | BUFFER_TRACE(dir_bh, "get_write_access"); | ||
2474 | ext4_journal_get_write_access(handle, dir_bh); | ||
2475 | PARENT_INO(dir_bh->b_data, new_dir->i_sb->s_blocksize) = | 2482 | PARENT_INO(dir_bh->b_data, new_dir->i_sb->s_blocksize) = |
2476 | cpu_to_le32(new_dir->i_ino); | 2483 | cpu_to_le32(new_dir->i_ino); |
2477 | BUFFER_TRACE(dir_bh, "call ext4_handle_dirty_metadata"); | 2484 | BUFFER_TRACE(dir_bh, "call ext4_handle_dirty_metadata"); |
diff --git a/fs/ext4/page-io.c b/fs/ext4/page-io.c index 955cc309142f..b6dbd056fcb1 100644 --- a/fs/ext4/page-io.c +++ b/fs/ext4/page-io.c | |||
@@ -259,6 +259,11 @@ static void ext4_end_bio(struct bio *bio, int error) | |||
259 | bi_sector >> (inode->i_blkbits - 9)); | 259 | bi_sector >> (inode->i_blkbits - 9)); |
260 | } | 260 | } |
261 | 261 | ||
262 | if (!(io_end->flag & EXT4_IO_END_UNWRITTEN)) { | ||
263 | ext4_free_io_end(io_end); | ||
264 | return; | ||
265 | } | ||
266 | |||
262 | /* Add the io_end to per-inode completed io list*/ | 267 | /* Add the io_end to per-inode completed io list*/ |
263 | spin_lock_irqsave(&EXT4_I(inode)->i_completed_io_lock, flags); | 268 | spin_lock_irqsave(&EXT4_I(inode)->i_completed_io_lock, flags); |
264 | list_add_tail(&io_end->list, &EXT4_I(inode)->i_completed_io_list); | 269 | list_add_tail(&io_end->list, &EXT4_I(inode)->i_completed_io_list); |
@@ -279,9 +284,9 @@ void ext4_io_submit(struct ext4_io_submit *io) | |||
279 | BUG_ON(bio_flagged(io->io_bio, BIO_EOPNOTSUPP)); | 284 | BUG_ON(bio_flagged(io->io_bio, BIO_EOPNOTSUPP)); |
280 | bio_put(io->io_bio); | 285 | bio_put(io->io_bio); |
281 | } | 286 | } |
282 | io->io_bio = 0; | 287 | io->io_bio = NULL; |
283 | io->io_op = 0; | 288 | io->io_op = 0; |
284 | io->io_end = 0; | 289 | io->io_end = NULL; |
285 | } | 290 | } |
286 | 291 | ||
287 | static int io_submit_init(struct ext4_io_submit *io, | 292 | static int io_submit_init(struct ext4_io_submit *io, |
@@ -310,8 +315,7 @@ static int io_submit_init(struct ext4_io_submit *io, | |||
310 | io_end->offset = (page->index << PAGE_CACHE_SHIFT) + bh_offset(bh); | 315 | io_end->offset = (page->index << PAGE_CACHE_SHIFT) + bh_offset(bh); |
311 | 316 | ||
312 | io->io_bio = bio; | 317 | io->io_bio = bio; |
313 | io->io_op = (wbc->sync_mode == WB_SYNC_ALL ? | 318 | io->io_op = (wbc->sync_mode == WB_SYNC_ALL ? WRITE_SYNC : WRITE); |
314 | WRITE_SYNC_PLUG : WRITE); | ||
315 | io->io_next_block = bh->b_blocknr; | 319 | io->io_next_block = bh->b_blocknr; |
316 | return 0; | 320 | return 0; |
317 | } | 321 | } |
@@ -381,8 +385,6 @@ int ext4_bio_write_page(struct ext4_io_submit *io, | |||
381 | 385 | ||
382 | BUG_ON(!PageLocked(page)); | 386 | BUG_ON(!PageLocked(page)); |
383 | BUG_ON(PageWriteback(page)); | 387 | BUG_ON(PageWriteback(page)); |
384 | set_page_writeback(page); | ||
385 | ClearPageError(page); | ||
386 | 388 | ||
387 | io_page = kmem_cache_alloc(io_page_cachep, GFP_NOFS); | 389 | io_page = kmem_cache_alloc(io_page_cachep, GFP_NOFS); |
388 | if (!io_page) { | 390 | if (!io_page) { |
@@ -393,6 +395,8 @@ int ext4_bio_write_page(struct ext4_io_submit *io, | |||
393 | io_page->p_page = page; | 395 | io_page->p_page = page; |
394 | atomic_set(&io_page->p_count, 1); | 396 | atomic_set(&io_page->p_count, 1); |
395 | get_page(page); | 397 | get_page(page); |
398 | set_page_writeback(page); | ||
399 | ClearPageError(page); | ||
396 | 400 | ||
397 | for (bh = head = page_buffers(page), block_start = 0; | 401 | for (bh = head = page_buffers(page), block_start = 0; |
398 | bh != head || !block_start; | 402 | bh != head || !block_start; |
diff --git a/fs/ext4/resize.c b/fs/ext4/resize.c index 3ecc6e45d2f9..80bbc9c60c24 100644 --- a/fs/ext4/resize.c +++ b/fs/ext4/resize.c | |||
@@ -230,7 +230,7 @@ static int setup_new_group_blocks(struct super_block *sb, | |||
230 | } | 230 | } |
231 | 231 | ||
232 | /* Zero out all of the reserved backup group descriptor table blocks */ | 232 | /* Zero out all of the reserved backup group descriptor table blocks */ |
233 | ext4_debug("clear inode table blocks %#04llx -> %#04llx\n", | 233 | ext4_debug("clear inode table blocks %#04llx -> %#04lx\n", |
234 | block, sbi->s_itb_per_group); | 234 | block, sbi->s_itb_per_group); |
235 | err = sb_issue_zeroout(sb, gdblocks + start + 1, reserved_gdb, | 235 | err = sb_issue_zeroout(sb, gdblocks + start + 1, reserved_gdb, |
236 | GFP_NOFS); | 236 | GFP_NOFS); |
@@ -248,7 +248,7 @@ static int setup_new_group_blocks(struct super_block *sb, | |||
248 | 248 | ||
249 | /* Zero out all of the inode table blocks */ | 249 | /* Zero out all of the inode table blocks */ |
250 | block = input->inode_table; | 250 | block = input->inode_table; |
251 | ext4_debug("clear inode table blocks %#04llx -> %#04llx\n", | 251 | ext4_debug("clear inode table blocks %#04llx -> %#04lx\n", |
252 | block, sbi->s_itb_per_group); | 252 | block, sbi->s_itb_per_group); |
253 | err = sb_issue_zeroout(sb, block, sbi->s_itb_per_group, GFP_NOFS); | 253 | err = sb_issue_zeroout(sb, block, sbi->s_itb_per_group, GFP_NOFS); |
254 | if (err) | 254 | if (err) |
@@ -499,12 +499,12 @@ static int add_new_gdb(handle_t *handle, struct inode *inode, | |||
499 | return err; | 499 | return err; |
500 | 500 | ||
501 | exit_inode: | 501 | exit_inode: |
502 | /* ext4_journal_release_buffer(handle, iloc.bh); */ | 502 | /* ext4_handle_release_buffer(handle, iloc.bh); */ |
503 | brelse(iloc.bh); | 503 | brelse(iloc.bh); |
504 | exit_dindj: | 504 | exit_dindj: |
505 | /* ext4_journal_release_buffer(handle, dind); */ | 505 | /* ext4_handle_release_buffer(handle, dind); */ |
506 | exit_sbh: | 506 | exit_sbh: |
507 | /* ext4_journal_release_buffer(handle, EXT4_SB(sb)->s_sbh); */ | 507 | /* ext4_handle_release_buffer(handle, EXT4_SB(sb)->s_sbh); */ |
508 | exit_dind: | 508 | exit_dind: |
509 | brelse(dind); | 509 | brelse(dind); |
510 | exit_bh: | 510 | exit_bh: |
@@ -586,7 +586,7 @@ static int reserve_backup_gdb(handle_t *handle, struct inode *inode, | |||
586 | /* | 586 | /* |
587 | int j; | 587 | int j; |
588 | for (j = 0; j < i; j++) | 588 | for (j = 0; j < i; j++) |
589 | ext4_journal_release_buffer(handle, primary[j]); | 589 | ext4_handle_release_buffer(handle, primary[j]); |
590 | */ | 590 | */ |
591 | goto exit_bh; | 591 | goto exit_bh; |
592 | } | 592 | } |
diff --git a/fs/ext4/super.c b/fs/ext4/super.c index 203f9e4a70be..22546ad7f0ae 100644 --- a/fs/ext4/super.c +++ b/fs/ext4/super.c | |||
@@ -54,9 +54,9 @@ | |||
54 | 54 | ||
55 | static struct proc_dir_entry *ext4_proc_root; | 55 | static struct proc_dir_entry *ext4_proc_root; |
56 | static struct kset *ext4_kset; | 56 | static struct kset *ext4_kset; |
57 | struct ext4_lazy_init *ext4_li_info; | 57 | static struct ext4_lazy_init *ext4_li_info; |
58 | struct mutex ext4_li_mtx; | 58 | static struct mutex ext4_li_mtx; |
59 | struct ext4_features *ext4_feat; | 59 | static struct ext4_features *ext4_feat; |
60 | 60 | ||
61 | static int ext4_load_journal(struct super_block *, struct ext4_super_block *, | 61 | static int ext4_load_journal(struct super_block *, struct ext4_super_block *, |
62 | unsigned long journal_devnum); | 62 | unsigned long journal_devnum); |
@@ -75,6 +75,7 @@ static void ext4_write_super(struct super_block *sb); | |||
75 | static int ext4_freeze(struct super_block *sb); | 75 | static int ext4_freeze(struct super_block *sb); |
76 | static struct dentry *ext4_mount(struct file_system_type *fs_type, int flags, | 76 | static struct dentry *ext4_mount(struct file_system_type *fs_type, int flags, |
77 | const char *dev_name, void *data); | 77 | const char *dev_name, void *data); |
78 | static int ext4_feature_set_ok(struct super_block *sb, int readonly); | ||
78 | static void ext4_destroy_lazyinit_thread(void); | 79 | static void ext4_destroy_lazyinit_thread(void); |
79 | static void ext4_unregister_li_request(struct super_block *sb); | 80 | static void ext4_unregister_li_request(struct super_block *sb); |
80 | static void ext4_clear_request_list(void); | 81 | static void ext4_clear_request_list(void); |
@@ -594,7 +595,7 @@ __acquires(bitlock) | |||
594 | 595 | ||
595 | vaf.fmt = fmt; | 596 | vaf.fmt = fmt; |
596 | vaf.va = &args; | 597 | vaf.va = &args; |
597 | printk(KERN_CRIT "EXT4-fs error (device %s): %s:%d: group %u", | 598 | printk(KERN_CRIT "EXT4-fs error (device %s): %s:%d: group %u, ", |
598 | sb->s_id, function, line, grp); | 599 | sb->s_id, function, line, grp); |
599 | if (ino) | 600 | if (ino) |
600 | printk(KERN_CONT "inode %lu: ", ino); | 601 | printk(KERN_CONT "inode %lu: ", ino); |
@@ -997,13 +998,10 @@ static int ext4_show_options(struct seq_file *seq, struct vfsmount *vfs) | |||
997 | if (test_opt(sb, OLDALLOC)) | 998 | if (test_opt(sb, OLDALLOC)) |
998 | seq_puts(seq, ",oldalloc"); | 999 | seq_puts(seq, ",oldalloc"); |
999 | #ifdef CONFIG_EXT4_FS_XATTR | 1000 | #ifdef CONFIG_EXT4_FS_XATTR |
1000 | if (test_opt(sb, XATTR_USER) && | 1001 | if (test_opt(sb, XATTR_USER)) |
1001 | !(def_mount_opts & EXT4_DEFM_XATTR_USER)) | ||
1002 | seq_puts(seq, ",user_xattr"); | 1002 | seq_puts(seq, ",user_xattr"); |
1003 | if (!test_opt(sb, XATTR_USER) && | 1003 | if (!test_opt(sb, XATTR_USER)) |
1004 | (def_mount_opts & EXT4_DEFM_XATTR_USER)) { | ||
1005 | seq_puts(seq, ",nouser_xattr"); | 1004 | seq_puts(seq, ",nouser_xattr"); |
1006 | } | ||
1007 | #endif | 1005 | #endif |
1008 | #ifdef CONFIG_EXT4_FS_POSIX_ACL | 1006 | #ifdef CONFIG_EXT4_FS_POSIX_ACL |
1009 | if (test_opt(sb, POSIX_ACL) && !(def_mount_opts & EXT4_DEFM_ACL)) | 1007 | if (test_opt(sb, POSIX_ACL) && !(def_mount_opts & EXT4_DEFM_ACL)) |
@@ -1041,8 +1039,8 @@ static int ext4_show_options(struct seq_file *seq, struct vfsmount *vfs) | |||
1041 | !(def_mount_opts & EXT4_DEFM_NODELALLOC)) | 1039 | !(def_mount_opts & EXT4_DEFM_NODELALLOC)) |
1042 | seq_puts(seq, ",nodelalloc"); | 1040 | seq_puts(seq, ",nodelalloc"); |
1043 | 1041 | ||
1044 | if (test_opt(sb, MBLK_IO_SUBMIT)) | 1042 | if (!test_opt(sb, MBLK_IO_SUBMIT)) |
1045 | seq_puts(seq, ",mblk_io_submit"); | 1043 | seq_puts(seq, ",nomblk_io_submit"); |
1046 | if (sbi->s_stripe) | 1044 | if (sbi->s_stripe) |
1047 | seq_printf(seq, ",stripe=%lu", sbi->s_stripe); | 1045 | seq_printf(seq, ",stripe=%lu", sbi->s_stripe); |
1048 | /* | 1046 | /* |
@@ -1451,7 +1449,7 @@ static int parse_options(char *options, struct super_block *sb, | |||
1451 | * Initialize args struct so we know whether arg was | 1449 | * Initialize args struct so we know whether arg was |
1452 | * found; some options take optional arguments. | 1450 | * found; some options take optional arguments. |
1453 | */ | 1451 | */ |
1454 | args[0].to = args[0].from = 0; | 1452 | args[0].to = args[0].from = NULL; |
1455 | token = match_token(p, tokens, args); | 1453 | token = match_token(p, tokens, args); |
1456 | switch (token) { | 1454 | switch (token) { |
1457 | case Opt_bsd_df: | 1455 | case Opt_bsd_df: |
@@ -1771,7 +1769,7 @@ set_qf_format: | |||
1771 | return 0; | 1769 | return 0; |
1772 | if (option < 0 || option > (1 << 30)) | 1770 | if (option < 0 || option > (1 << 30)) |
1773 | return 0; | 1771 | return 0; |
1774 | if (!is_power_of_2(option)) { | 1772 | if (option && !is_power_of_2(option)) { |
1775 | ext4_msg(sb, KERN_ERR, | 1773 | ext4_msg(sb, KERN_ERR, |
1776 | "EXT4-fs: inode_readahead_blks" | 1774 | "EXT4-fs: inode_readahead_blks" |
1777 | " must be a power of 2"); | 1775 | " must be a power of 2"); |
@@ -2120,6 +2118,13 @@ static void ext4_orphan_cleanup(struct super_block *sb, | |||
2120 | return; | 2118 | return; |
2121 | } | 2119 | } |
2122 | 2120 | ||
2121 | /* Check if feature set would not allow a r/w mount */ | ||
2122 | if (!ext4_feature_set_ok(sb, 0)) { | ||
2123 | ext4_msg(sb, KERN_INFO, "Skipping orphan cleanup due to " | ||
2124 | "unknown ROCOMPAT features"); | ||
2125 | return; | ||
2126 | } | ||
2127 | |||
2123 | if (EXT4_SB(sb)->s_mount_state & EXT4_ERROR_FS) { | 2128 | if (EXT4_SB(sb)->s_mount_state & EXT4_ERROR_FS) { |
2124 | if (es->s_last_orphan) | 2129 | if (es->s_last_orphan) |
2125 | jbd_debug(1, "Errors on filesystem, " | 2130 | jbd_debug(1, "Errors on filesystem, " |
@@ -2412,7 +2417,7 @@ static ssize_t inode_readahead_blks_store(struct ext4_attr *a, | |||
2412 | if (parse_strtoul(buf, 0x40000000, &t)) | 2417 | if (parse_strtoul(buf, 0x40000000, &t)) |
2413 | return -EINVAL; | 2418 | return -EINVAL; |
2414 | 2419 | ||
2415 | if (!is_power_of_2(t)) | 2420 | if (t && !is_power_of_2(t)) |
2416 | return -EINVAL; | 2421 | return -EINVAL; |
2417 | 2422 | ||
2418 | sbi->s_inode_readahead_blks = t; | 2423 | sbi->s_inode_readahead_blks = t; |
@@ -3095,14 +3100,14 @@ static int ext4_fill_super(struct super_block *sb, void *data, int silent) | |||
3095 | } | 3100 | } |
3096 | if (def_mount_opts & EXT4_DEFM_UID16) | 3101 | if (def_mount_opts & EXT4_DEFM_UID16) |
3097 | set_opt(sb, NO_UID32); | 3102 | set_opt(sb, NO_UID32); |
3103 | /* xattr user namespace & acls are now defaulted on */ | ||
3098 | #ifdef CONFIG_EXT4_FS_XATTR | 3104 | #ifdef CONFIG_EXT4_FS_XATTR |
3099 | if (def_mount_opts & EXT4_DEFM_XATTR_USER) | 3105 | set_opt(sb, XATTR_USER); |
3100 | set_opt(sb, XATTR_USER); | ||
3101 | #endif | 3106 | #endif |
3102 | #ifdef CONFIG_EXT4_FS_POSIX_ACL | 3107 | #ifdef CONFIG_EXT4_FS_POSIX_ACL |
3103 | if (def_mount_opts & EXT4_DEFM_ACL) | 3108 | set_opt(sb, POSIX_ACL); |
3104 | set_opt(sb, POSIX_ACL); | ||
3105 | #endif | 3109 | #endif |
3110 | set_opt(sb, MBLK_IO_SUBMIT); | ||
3106 | if ((def_mount_opts & EXT4_DEFM_JMODE) == EXT4_DEFM_JMODE_DATA) | 3111 | if ((def_mount_opts & EXT4_DEFM_JMODE) == EXT4_DEFM_JMODE_DATA) |
3107 | set_opt(sb, JOURNAL_DATA); | 3112 | set_opt(sb, JOURNAL_DATA); |
3108 | else if ((def_mount_opts & EXT4_DEFM_JMODE) == EXT4_DEFM_JMODE_ORDERED) | 3113 | else if ((def_mount_opts & EXT4_DEFM_JMODE) == EXT4_DEFM_JMODE_ORDERED) |
@@ -3516,7 +3521,7 @@ no_journal: | |||
3516 | * concurrency isn't really necessary. Limit it to 1. | 3521 | * concurrency isn't really necessary. Limit it to 1. |
3517 | */ | 3522 | */ |
3518 | EXT4_SB(sb)->dio_unwritten_wq = | 3523 | EXT4_SB(sb)->dio_unwritten_wq = |
3519 | alloc_workqueue("ext4-dio-unwritten", WQ_MEM_RECLAIM, 1); | 3524 | alloc_workqueue("ext4-dio-unwritten", WQ_MEM_RECLAIM | WQ_UNBOUND, 1); |
3520 | if (!EXT4_SB(sb)->dio_unwritten_wq) { | 3525 | if (!EXT4_SB(sb)->dio_unwritten_wq) { |
3521 | printk(KERN_ERR "EXT4-fs: failed to create DIO workqueue\n"); | 3526 | printk(KERN_ERR "EXT4-fs: failed to create DIO workqueue\n"); |
3522 | goto failed_mount_wq; | 3527 | goto failed_mount_wq; |
@@ -3531,17 +3536,16 @@ no_journal: | |||
3531 | if (IS_ERR(root)) { | 3536 | if (IS_ERR(root)) { |
3532 | ext4_msg(sb, KERN_ERR, "get root inode failed"); | 3537 | ext4_msg(sb, KERN_ERR, "get root inode failed"); |
3533 | ret = PTR_ERR(root); | 3538 | ret = PTR_ERR(root); |
3539 | root = NULL; | ||
3534 | goto failed_mount4; | 3540 | goto failed_mount4; |
3535 | } | 3541 | } |
3536 | if (!S_ISDIR(root->i_mode) || !root->i_blocks || !root->i_size) { | 3542 | if (!S_ISDIR(root->i_mode) || !root->i_blocks || !root->i_size) { |
3537 | iput(root); | ||
3538 | ext4_msg(sb, KERN_ERR, "corrupt root inode, run e2fsck"); | 3543 | ext4_msg(sb, KERN_ERR, "corrupt root inode, run e2fsck"); |
3539 | goto failed_mount4; | 3544 | goto failed_mount4; |
3540 | } | 3545 | } |
3541 | sb->s_root = d_alloc_root(root); | 3546 | sb->s_root = d_alloc_root(root); |
3542 | if (!sb->s_root) { | 3547 | if (!sb->s_root) { |
3543 | ext4_msg(sb, KERN_ERR, "get root dentry failed"); | 3548 | ext4_msg(sb, KERN_ERR, "get root dentry failed"); |
3544 | iput(root); | ||
3545 | ret = -ENOMEM; | 3549 | ret = -ENOMEM; |
3546 | goto failed_mount4; | 3550 | goto failed_mount4; |
3547 | } | 3551 | } |
@@ -3657,6 +3661,8 @@ cantfind_ext4: | |||
3657 | goto failed_mount; | 3661 | goto failed_mount; |
3658 | 3662 | ||
3659 | failed_mount4: | 3663 | failed_mount4: |
3664 | iput(root); | ||
3665 | sb->s_root = NULL; | ||
3660 | ext4_msg(sb, KERN_ERR, "mount failed"); | 3666 | ext4_msg(sb, KERN_ERR, "mount failed"); |
3661 | destroy_workqueue(EXT4_SB(sb)->dio_unwritten_wq); | 3667 | destroy_workqueue(EXT4_SB(sb)->dio_unwritten_wq); |
3662 | failed_mount_wq: | 3668 | failed_mount_wq: |
diff --git a/fs/ext4/xattr.c b/fs/ext4/xattr.c index fc32176eee39..b545ca1c459c 100644 --- a/fs/ext4/xattr.c +++ b/fs/ext4/xattr.c | |||
@@ -735,7 +735,7 @@ ext4_xattr_block_set(handle_t *handle, struct inode *inode, | |||
735 | int offset = (char *)s->here - bs->bh->b_data; | 735 | int offset = (char *)s->here - bs->bh->b_data; |
736 | 736 | ||
737 | unlock_buffer(bs->bh); | 737 | unlock_buffer(bs->bh); |
738 | jbd2_journal_release_buffer(handle, bs->bh); | 738 | ext4_handle_release_buffer(handle, bs->bh); |
739 | if (ce) { | 739 | if (ce) { |
740 | mb_cache_entry_release(ce); | 740 | mb_cache_entry_release(ce); |
741 | ce = NULL; | 741 | ce = NULL; |
@@ -833,7 +833,7 @@ inserted: | |||
833 | new_bh = sb_getblk(sb, block); | 833 | new_bh = sb_getblk(sb, block); |
834 | if (!new_bh) { | 834 | if (!new_bh) { |
835 | getblk_failed: | 835 | getblk_failed: |
836 | ext4_free_blocks(handle, inode, 0, block, 1, | 836 | ext4_free_blocks(handle, inode, NULL, block, 1, |
837 | EXT4_FREE_BLOCKS_METADATA); | 837 | EXT4_FREE_BLOCKS_METADATA); |
838 | error = -EIO; | 838 | error = -EIO; |
839 | goto cleanup; | 839 | goto cleanup; |
diff --git a/fs/fat/inode.c b/fs/fat/inode.c index 0e277ec4b612..8d68690bdcf1 100644 --- a/fs/fat/inode.c +++ b/fs/fat/inode.c | |||
@@ -236,7 +236,6 @@ static const struct address_space_operations fat_aops = { | |||
236 | .readpages = fat_readpages, | 236 | .readpages = fat_readpages, |
237 | .writepage = fat_writepage, | 237 | .writepage = fat_writepage, |
238 | .writepages = fat_writepages, | 238 | .writepages = fat_writepages, |
239 | .sync_page = block_sync_page, | ||
240 | .write_begin = fat_write_begin, | 239 | .write_begin = fat_write_begin, |
241 | .write_end = fat_write_end, | 240 | .write_end = fat_write_end, |
242 | .direct_IO = fat_direct_IO, | 241 | .direct_IO = fat_direct_IO, |
diff --git a/fs/fcntl.c b/fs/fcntl.c index 6c82e5bac039..22764c7c8382 100644 --- a/fs/fcntl.c +++ b/fs/fcntl.c | |||
@@ -159,7 +159,7 @@ static int setfl(int fd, struct file * filp, unsigned long arg) | |||
159 | 159 | ||
160 | /* O_NOATIME can only be set by the owner or superuser */ | 160 | /* O_NOATIME can only be set by the owner or superuser */ |
161 | if ((arg & O_NOATIME) && !(filp->f_flags & O_NOATIME)) | 161 | if ((arg & O_NOATIME) && !(filp->f_flags & O_NOATIME)) |
162 | if (!is_owner_or_cap(inode)) | 162 | if (!inode_owner_or_capable(inode)) |
163 | return -EPERM; | 163 | return -EPERM; |
164 | 164 | ||
165 | /* required for strict SunOS emulation */ | 165 | /* required for strict SunOS emulation */ |
diff --git a/fs/freevxfs/vxfs_subr.c b/fs/freevxfs/vxfs_subr.c index 1429f3ae1e86..5d318c44f855 100644 --- a/fs/freevxfs/vxfs_subr.c +++ b/fs/freevxfs/vxfs_subr.c | |||
@@ -44,7 +44,6 @@ static sector_t vxfs_bmap(struct address_space *, sector_t); | |||
44 | const struct address_space_operations vxfs_aops = { | 44 | const struct address_space_operations vxfs_aops = { |
45 | .readpage = vxfs_readpage, | 45 | .readpage = vxfs_readpage, |
46 | .bmap = vxfs_bmap, | 46 | .bmap = vxfs_bmap, |
47 | .sync_page = block_sync_page, | ||
48 | }; | 47 | }; |
49 | 48 | ||
50 | inline void | 49 | inline void |
diff --git a/fs/fs-writeback.c b/fs/fs-writeback.c index 59c6e4956786..b5ed541fb137 100644 --- a/fs/fs-writeback.c +++ b/fs/fs-writeback.c | |||
@@ -176,6 +176,17 @@ void bdi_start_background_writeback(struct backing_dev_info *bdi) | |||
176 | } | 176 | } |
177 | 177 | ||
178 | /* | 178 | /* |
179 | * Remove the inode from the writeback list it is on. | ||
180 | */ | ||
181 | void inode_wb_list_del(struct inode *inode) | ||
182 | { | ||
183 | spin_lock(&inode_wb_list_lock); | ||
184 | list_del_init(&inode->i_wb_list); | ||
185 | spin_unlock(&inode_wb_list_lock); | ||
186 | } | ||
187 | |||
188 | |||
189 | /* | ||
179 | * Redirty an inode: set its when-it-was dirtied timestamp and move it to the | 190 | * Redirty an inode: set its when-it-was dirtied timestamp and move it to the |
180 | * furthest end of its superblock's dirty-inode list. | 191 | * furthest end of its superblock's dirty-inode list. |
181 | * | 192 | * |
@@ -188,6 +199,7 @@ static void redirty_tail(struct inode *inode) | |||
188 | { | 199 | { |
189 | struct bdi_writeback *wb = &inode_to_bdi(inode)->wb; | 200 | struct bdi_writeback *wb = &inode_to_bdi(inode)->wb; |
190 | 201 | ||
202 | assert_spin_locked(&inode_wb_list_lock); | ||
191 | if (!list_empty(&wb->b_dirty)) { | 203 | if (!list_empty(&wb->b_dirty)) { |
192 | struct inode *tail; | 204 | struct inode *tail; |
193 | 205 | ||
@@ -205,14 +217,17 @@ static void requeue_io(struct inode *inode) | |||
205 | { | 217 | { |
206 | struct bdi_writeback *wb = &inode_to_bdi(inode)->wb; | 218 | struct bdi_writeback *wb = &inode_to_bdi(inode)->wb; |
207 | 219 | ||
220 | assert_spin_locked(&inode_wb_list_lock); | ||
208 | list_move(&inode->i_wb_list, &wb->b_more_io); | 221 | list_move(&inode->i_wb_list, &wb->b_more_io); |
209 | } | 222 | } |
210 | 223 | ||
211 | static void inode_sync_complete(struct inode *inode) | 224 | static void inode_sync_complete(struct inode *inode) |
212 | { | 225 | { |
213 | /* | 226 | /* |
214 | * Prevent speculative execution through spin_unlock(&inode_lock); | 227 | * Prevent speculative execution through |
228 | * spin_unlock(&inode_wb_list_lock); | ||
215 | */ | 229 | */ |
230 | |||
216 | smp_mb(); | 231 | smp_mb(); |
217 | wake_up_bit(&inode->i_state, __I_SYNC); | 232 | wake_up_bit(&inode->i_state, __I_SYNC); |
218 | } | 233 | } |
@@ -286,6 +301,7 @@ static void move_expired_inodes(struct list_head *delaying_queue, | |||
286 | */ | 301 | */ |
287 | static void queue_io(struct bdi_writeback *wb, unsigned long *older_than_this) | 302 | static void queue_io(struct bdi_writeback *wb, unsigned long *older_than_this) |
288 | { | 303 | { |
304 | assert_spin_locked(&inode_wb_list_lock); | ||
289 | list_splice_init(&wb->b_more_io, &wb->b_io); | 305 | list_splice_init(&wb->b_more_io, &wb->b_io); |
290 | move_expired_inodes(&wb->b_dirty, &wb->b_io, older_than_this); | 306 | move_expired_inodes(&wb->b_dirty, &wb->b_io, older_than_this); |
291 | } | 307 | } |
@@ -306,25 +322,25 @@ static void inode_wait_for_writeback(struct inode *inode) | |||
306 | wait_queue_head_t *wqh; | 322 | wait_queue_head_t *wqh; |
307 | 323 | ||
308 | wqh = bit_waitqueue(&inode->i_state, __I_SYNC); | 324 | wqh = bit_waitqueue(&inode->i_state, __I_SYNC); |
309 | while (inode->i_state & I_SYNC) { | 325 | while (inode->i_state & I_SYNC) { |
310 | spin_unlock(&inode_lock); | 326 | spin_unlock(&inode->i_lock); |
327 | spin_unlock(&inode_wb_list_lock); | ||
311 | __wait_on_bit(wqh, &wq, inode_wait, TASK_UNINTERRUPTIBLE); | 328 | __wait_on_bit(wqh, &wq, inode_wait, TASK_UNINTERRUPTIBLE); |
312 | spin_lock(&inode_lock); | 329 | spin_lock(&inode_wb_list_lock); |
330 | spin_lock(&inode->i_lock); | ||
313 | } | 331 | } |
314 | } | 332 | } |
315 | 333 | ||
316 | /* | 334 | /* |
317 | * Write out an inode's dirty pages. Called under inode_lock. Either the | 335 | * Write out an inode's dirty pages. Called under inode_wb_list_lock and |
318 | * caller has ref on the inode (either via __iget or via syscall against an fd) | 336 | * inode->i_lock. Either the caller has an active reference on the inode or |
319 | * or the inode has I_WILL_FREE set (via generic_forget_inode) | 337 | * the inode has I_WILL_FREE set. |
320 | * | 338 | * |
321 | * If `wait' is set, wait on the writeout. | 339 | * If `wait' is set, wait on the writeout. |
322 | * | 340 | * |
323 | * The whole writeout design is quite complex and fragile. We want to avoid | 341 | * The whole writeout design is quite complex and fragile. We want to avoid |
324 | * starvation of particular inodes when others are being redirtied, prevent | 342 | * starvation of particular inodes when others are being redirtied, prevent |
325 | * livelocks, etc. | 343 | * livelocks, etc. |
326 | * | ||
327 | * Called under inode_lock. | ||
328 | */ | 344 | */ |
329 | static int | 345 | static int |
330 | writeback_single_inode(struct inode *inode, struct writeback_control *wbc) | 346 | writeback_single_inode(struct inode *inode, struct writeback_control *wbc) |
@@ -333,6 +349,9 @@ writeback_single_inode(struct inode *inode, struct writeback_control *wbc) | |||
333 | unsigned dirty; | 349 | unsigned dirty; |
334 | int ret; | 350 | int ret; |
335 | 351 | ||
352 | assert_spin_locked(&inode_wb_list_lock); | ||
353 | assert_spin_locked(&inode->i_lock); | ||
354 | |||
336 | if (!atomic_read(&inode->i_count)) | 355 | if (!atomic_read(&inode->i_count)) |
337 | WARN_ON(!(inode->i_state & (I_WILL_FREE|I_FREEING))); | 356 | WARN_ON(!(inode->i_state & (I_WILL_FREE|I_FREEING))); |
338 | else | 357 | else |
@@ -363,7 +382,8 @@ writeback_single_inode(struct inode *inode, struct writeback_control *wbc) | |||
363 | /* Set I_SYNC, reset I_DIRTY_PAGES */ | 382 | /* Set I_SYNC, reset I_DIRTY_PAGES */ |
364 | inode->i_state |= I_SYNC; | 383 | inode->i_state |= I_SYNC; |
365 | inode->i_state &= ~I_DIRTY_PAGES; | 384 | inode->i_state &= ~I_DIRTY_PAGES; |
366 | spin_unlock(&inode_lock); | 385 | spin_unlock(&inode->i_lock); |
386 | spin_unlock(&inode_wb_list_lock); | ||
367 | 387 | ||
368 | ret = do_writepages(mapping, wbc); | 388 | ret = do_writepages(mapping, wbc); |
369 | 389 | ||
@@ -383,10 +403,10 @@ writeback_single_inode(struct inode *inode, struct writeback_control *wbc) | |||
383 | * due to delalloc, clear dirty metadata flags right before | 403 | * due to delalloc, clear dirty metadata flags right before |
384 | * write_inode() | 404 | * write_inode() |
385 | */ | 405 | */ |
386 | spin_lock(&inode_lock); | 406 | spin_lock(&inode->i_lock); |
387 | dirty = inode->i_state & I_DIRTY; | 407 | dirty = inode->i_state & I_DIRTY; |
388 | inode->i_state &= ~(I_DIRTY_SYNC | I_DIRTY_DATASYNC); | 408 | inode->i_state &= ~(I_DIRTY_SYNC | I_DIRTY_DATASYNC); |
389 | spin_unlock(&inode_lock); | 409 | spin_unlock(&inode->i_lock); |
390 | /* Don't write the inode if only I_DIRTY_PAGES was set */ | 410 | /* Don't write the inode if only I_DIRTY_PAGES was set */ |
391 | if (dirty & (I_DIRTY_SYNC | I_DIRTY_DATASYNC)) { | 411 | if (dirty & (I_DIRTY_SYNC | I_DIRTY_DATASYNC)) { |
392 | int err = write_inode(inode, wbc); | 412 | int err = write_inode(inode, wbc); |
@@ -394,7 +414,8 @@ writeback_single_inode(struct inode *inode, struct writeback_control *wbc) | |||
394 | ret = err; | 414 | ret = err; |
395 | } | 415 | } |
396 | 416 | ||
397 | spin_lock(&inode_lock); | 417 | spin_lock(&inode_wb_list_lock); |
418 | spin_lock(&inode->i_lock); | ||
398 | inode->i_state &= ~I_SYNC; | 419 | inode->i_state &= ~I_SYNC; |
399 | if (!(inode->i_state & I_FREEING)) { | 420 | if (!(inode->i_state & I_FREEING)) { |
400 | if (mapping_tagged(mapping, PAGECACHE_TAG_DIRTY)) { | 421 | if (mapping_tagged(mapping, PAGECACHE_TAG_DIRTY)) { |
@@ -506,7 +527,9 @@ static int writeback_sb_inodes(struct super_block *sb, struct bdi_writeback *wb, | |||
506 | * kind does not need peridic writeout yet, and for the latter | 527 | * kind does not need peridic writeout yet, and for the latter |
507 | * kind writeout is handled by the freer. | 528 | * kind writeout is handled by the freer. |
508 | */ | 529 | */ |
530 | spin_lock(&inode->i_lock); | ||
509 | if (inode->i_state & (I_NEW | I_FREEING | I_WILL_FREE)) { | 531 | if (inode->i_state & (I_NEW | I_FREEING | I_WILL_FREE)) { |
532 | spin_unlock(&inode->i_lock); | ||
510 | requeue_io(inode); | 533 | requeue_io(inode); |
511 | continue; | 534 | continue; |
512 | } | 535 | } |
@@ -515,10 +538,13 @@ static int writeback_sb_inodes(struct super_block *sb, struct bdi_writeback *wb, | |||
515 | * Was this inode dirtied after sync_sb_inodes was called? | 538 | * Was this inode dirtied after sync_sb_inodes was called? |
516 | * This keeps sync from extra jobs and livelock. | 539 | * This keeps sync from extra jobs and livelock. |
517 | */ | 540 | */ |
518 | if (inode_dirtied_after(inode, wbc->wb_start)) | 541 | if (inode_dirtied_after(inode, wbc->wb_start)) { |
542 | spin_unlock(&inode->i_lock); | ||
519 | return 1; | 543 | return 1; |
544 | } | ||
520 | 545 | ||
521 | __iget(inode); | 546 | __iget(inode); |
547 | |||
522 | pages_skipped = wbc->pages_skipped; | 548 | pages_skipped = wbc->pages_skipped; |
523 | writeback_single_inode(inode, wbc); | 549 | writeback_single_inode(inode, wbc); |
524 | if (wbc->pages_skipped != pages_skipped) { | 550 | if (wbc->pages_skipped != pages_skipped) { |
@@ -528,10 +554,11 @@ static int writeback_sb_inodes(struct super_block *sb, struct bdi_writeback *wb, | |||
528 | */ | 554 | */ |
529 | redirty_tail(inode); | 555 | redirty_tail(inode); |
530 | } | 556 | } |
531 | spin_unlock(&inode_lock); | 557 | spin_unlock(&inode->i_lock); |
558 | spin_unlock(&inode_wb_list_lock); | ||
532 | iput(inode); | 559 | iput(inode); |
533 | cond_resched(); | 560 | cond_resched(); |
534 | spin_lock(&inode_lock); | 561 | spin_lock(&inode_wb_list_lock); |
535 | if (wbc->nr_to_write <= 0) { | 562 | if (wbc->nr_to_write <= 0) { |
536 | wbc->more_io = 1; | 563 | wbc->more_io = 1; |
537 | return 1; | 564 | return 1; |
@@ -550,7 +577,7 @@ void writeback_inodes_wb(struct bdi_writeback *wb, | |||
550 | 577 | ||
551 | if (!wbc->wb_start) | 578 | if (!wbc->wb_start) |
552 | wbc->wb_start = jiffies; /* livelock avoidance */ | 579 | wbc->wb_start = jiffies; /* livelock avoidance */ |
553 | spin_lock(&inode_lock); | 580 | spin_lock(&inode_wb_list_lock); |
554 | if (!wbc->for_kupdate || list_empty(&wb->b_io)) | 581 | if (!wbc->for_kupdate || list_empty(&wb->b_io)) |
555 | queue_io(wb, wbc->older_than_this); | 582 | queue_io(wb, wbc->older_than_this); |
556 | 583 | ||
@@ -568,7 +595,7 @@ void writeback_inodes_wb(struct bdi_writeback *wb, | |||
568 | if (ret) | 595 | if (ret) |
569 | break; | 596 | break; |
570 | } | 597 | } |
571 | spin_unlock(&inode_lock); | 598 | spin_unlock(&inode_wb_list_lock); |
572 | /* Leave any unwritten inodes on b_io */ | 599 | /* Leave any unwritten inodes on b_io */ |
573 | } | 600 | } |
574 | 601 | ||
@@ -577,11 +604,11 @@ static void __writeback_inodes_sb(struct super_block *sb, | |||
577 | { | 604 | { |
578 | WARN_ON(!rwsem_is_locked(&sb->s_umount)); | 605 | WARN_ON(!rwsem_is_locked(&sb->s_umount)); |
579 | 606 | ||
580 | spin_lock(&inode_lock); | 607 | spin_lock(&inode_wb_list_lock); |
581 | if (!wbc->for_kupdate || list_empty(&wb->b_io)) | 608 | if (!wbc->for_kupdate || list_empty(&wb->b_io)) |
582 | queue_io(wb, wbc->older_than_this); | 609 | queue_io(wb, wbc->older_than_this); |
583 | writeback_sb_inodes(sb, wb, wbc, true); | 610 | writeback_sb_inodes(sb, wb, wbc, true); |
584 | spin_unlock(&inode_lock); | 611 | spin_unlock(&inode_wb_list_lock); |
585 | } | 612 | } |
586 | 613 | ||
587 | /* | 614 | /* |
@@ -720,13 +747,15 @@ static long wb_writeback(struct bdi_writeback *wb, | |||
720 | * become available for writeback. Otherwise | 747 | * become available for writeback. Otherwise |
721 | * we'll just busyloop. | 748 | * we'll just busyloop. |
722 | */ | 749 | */ |
723 | spin_lock(&inode_lock); | 750 | spin_lock(&inode_wb_list_lock); |
724 | if (!list_empty(&wb->b_more_io)) { | 751 | if (!list_empty(&wb->b_more_io)) { |
725 | inode = wb_inode(wb->b_more_io.prev); | 752 | inode = wb_inode(wb->b_more_io.prev); |
726 | trace_wbc_writeback_wait(&wbc, wb->bdi); | 753 | trace_wbc_writeback_wait(&wbc, wb->bdi); |
754 | spin_lock(&inode->i_lock); | ||
727 | inode_wait_for_writeback(inode); | 755 | inode_wait_for_writeback(inode); |
756 | spin_unlock(&inode->i_lock); | ||
728 | } | 757 | } |
729 | spin_unlock(&inode_lock); | 758 | spin_unlock(&inode_wb_list_lock); |
730 | } | 759 | } |
731 | 760 | ||
732 | return wrote; | 761 | return wrote; |
@@ -992,7 +1021,6 @@ void __mark_inode_dirty(struct inode *inode, int flags) | |||
992 | { | 1021 | { |
993 | struct super_block *sb = inode->i_sb; | 1022 | struct super_block *sb = inode->i_sb; |
994 | struct backing_dev_info *bdi = NULL; | 1023 | struct backing_dev_info *bdi = NULL; |
995 | bool wakeup_bdi = false; | ||
996 | 1024 | ||
997 | /* | 1025 | /* |
998 | * Don't do this for I_DIRTY_PAGES - that doesn't actually | 1026 | * Don't do this for I_DIRTY_PAGES - that doesn't actually |
@@ -1016,7 +1044,7 @@ void __mark_inode_dirty(struct inode *inode, int flags) | |||
1016 | if (unlikely(block_dump)) | 1044 | if (unlikely(block_dump)) |
1017 | block_dump___mark_inode_dirty(inode); | 1045 | block_dump___mark_inode_dirty(inode); |
1018 | 1046 | ||
1019 | spin_lock(&inode_lock); | 1047 | spin_lock(&inode->i_lock); |
1020 | if ((inode->i_state & flags) != flags) { | 1048 | if ((inode->i_state & flags) != flags) { |
1021 | const int was_dirty = inode->i_state & I_DIRTY; | 1049 | const int was_dirty = inode->i_state & I_DIRTY; |
1022 | 1050 | ||
@@ -1028,7 +1056,7 @@ void __mark_inode_dirty(struct inode *inode, int flags) | |||
1028 | * superblock list, based upon its state. | 1056 | * superblock list, based upon its state. |
1029 | */ | 1057 | */ |
1030 | if (inode->i_state & I_SYNC) | 1058 | if (inode->i_state & I_SYNC) |
1031 | goto out; | 1059 | goto out_unlock_inode; |
1032 | 1060 | ||
1033 | /* | 1061 | /* |
1034 | * Only add valid (hashed) inodes to the superblock's | 1062 | * Only add valid (hashed) inodes to the superblock's |
@@ -1036,16 +1064,17 @@ void __mark_inode_dirty(struct inode *inode, int flags) | |||
1036 | */ | 1064 | */ |
1037 | if (!S_ISBLK(inode->i_mode)) { | 1065 | if (!S_ISBLK(inode->i_mode)) { |
1038 | if (inode_unhashed(inode)) | 1066 | if (inode_unhashed(inode)) |
1039 | goto out; | 1067 | goto out_unlock_inode; |
1040 | } | 1068 | } |
1041 | if (inode->i_state & I_FREEING) | 1069 | if (inode->i_state & I_FREEING) |
1042 | goto out; | 1070 | goto out_unlock_inode; |
1043 | 1071 | ||
1044 | /* | 1072 | /* |
1045 | * If the inode was already on b_dirty/b_io/b_more_io, don't | 1073 | * If the inode was already on b_dirty/b_io/b_more_io, don't |
1046 | * reposition it (that would break b_dirty time-ordering). | 1074 | * reposition it (that would break b_dirty time-ordering). |
1047 | */ | 1075 | */ |
1048 | if (!was_dirty) { | 1076 | if (!was_dirty) { |
1077 | bool wakeup_bdi = false; | ||
1049 | bdi = inode_to_bdi(inode); | 1078 | bdi = inode_to_bdi(inode); |
1050 | 1079 | ||
1051 | if (bdi_cap_writeback_dirty(bdi)) { | 1080 | if (bdi_cap_writeback_dirty(bdi)) { |
@@ -1062,15 +1091,20 @@ void __mark_inode_dirty(struct inode *inode, int flags) | |||
1062 | wakeup_bdi = true; | 1091 | wakeup_bdi = true; |
1063 | } | 1092 | } |
1064 | 1093 | ||
1094 | spin_unlock(&inode->i_lock); | ||
1095 | spin_lock(&inode_wb_list_lock); | ||
1065 | inode->dirtied_when = jiffies; | 1096 | inode->dirtied_when = jiffies; |
1066 | list_move(&inode->i_wb_list, &bdi->wb.b_dirty); | 1097 | list_move(&inode->i_wb_list, &bdi->wb.b_dirty); |
1098 | spin_unlock(&inode_wb_list_lock); | ||
1099 | |||
1100 | if (wakeup_bdi) | ||
1101 | bdi_wakeup_thread_delayed(bdi); | ||
1102 | return; | ||
1067 | } | 1103 | } |
1068 | } | 1104 | } |
1069 | out: | 1105 | out_unlock_inode: |
1070 | spin_unlock(&inode_lock); | 1106 | spin_unlock(&inode->i_lock); |
1071 | 1107 | ||
1072 | if (wakeup_bdi) | ||
1073 | bdi_wakeup_thread_delayed(bdi); | ||
1074 | } | 1108 | } |
1075 | EXPORT_SYMBOL(__mark_inode_dirty); | 1109 | EXPORT_SYMBOL(__mark_inode_dirty); |
1076 | 1110 | ||
@@ -1101,7 +1135,7 @@ static void wait_sb_inodes(struct super_block *sb) | |||
1101 | */ | 1135 | */ |
1102 | WARN_ON(!rwsem_is_locked(&sb->s_umount)); | 1136 | WARN_ON(!rwsem_is_locked(&sb->s_umount)); |
1103 | 1137 | ||
1104 | spin_lock(&inode_lock); | 1138 | spin_lock(&inode_sb_list_lock); |
1105 | 1139 | ||
1106 | /* | 1140 | /* |
1107 | * Data integrity sync. Must wait for all pages under writeback, | 1141 | * Data integrity sync. Must wait for all pages under writeback, |
@@ -1111,22 +1145,25 @@ static void wait_sb_inodes(struct super_block *sb) | |||
1111 | * we still have to wait for that writeout. | 1145 | * we still have to wait for that writeout. |
1112 | */ | 1146 | */ |
1113 | list_for_each_entry(inode, &sb->s_inodes, i_sb_list) { | 1147 | list_for_each_entry(inode, &sb->s_inodes, i_sb_list) { |
1114 | struct address_space *mapping; | 1148 | struct address_space *mapping = inode->i_mapping; |
1115 | 1149 | ||
1116 | if (inode->i_state & (I_FREEING|I_WILL_FREE|I_NEW)) | 1150 | spin_lock(&inode->i_lock); |
1117 | continue; | 1151 | if ((inode->i_state & (I_FREEING|I_WILL_FREE|I_NEW)) || |
1118 | mapping = inode->i_mapping; | 1152 | (mapping->nrpages == 0)) { |
1119 | if (mapping->nrpages == 0) | 1153 | spin_unlock(&inode->i_lock); |
1120 | continue; | 1154 | continue; |
1155 | } | ||
1121 | __iget(inode); | 1156 | __iget(inode); |
1122 | spin_unlock(&inode_lock); | 1157 | spin_unlock(&inode->i_lock); |
1158 | spin_unlock(&inode_sb_list_lock); | ||
1159 | |||
1123 | /* | 1160 | /* |
1124 | * We hold a reference to 'inode' so it couldn't have | 1161 | * We hold a reference to 'inode' so it couldn't have been |
1125 | * been removed from s_inodes list while we dropped the | 1162 | * removed from s_inodes list while we dropped the |
1126 | * inode_lock. We cannot iput the inode now as we can | 1163 | * inode_sb_list_lock. We cannot iput the inode now as we can |
1127 | * be holding the last reference and we cannot iput it | 1164 | * be holding the last reference and we cannot iput it under |
1128 | * under inode_lock. So we keep the reference and iput | 1165 | * inode_sb_list_lock. So we keep the reference and iput it |
1129 | * it later. | 1166 | * later. |
1130 | */ | 1167 | */ |
1131 | iput(old_inode); | 1168 | iput(old_inode); |
1132 | old_inode = inode; | 1169 | old_inode = inode; |
@@ -1135,9 +1172,9 @@ static void wait_sb_inodes(struct super_block *sb) | |||
1135 | 1172 | ||
1136 | cond_resched(); | 1173 | cond_resched(); |
1137 | 1174 | ||
1138 | spin_lock(&inode_lock); | 1175 | spin_lock(&inode_sb_list_lock); |
1139 | } | 1176 | } |
1140 | spin_unlock(&inode_lock); | 1177 | spin_unlock(&inode_sb_list_lock); |
1141 | iput(old_inode); | 1178 | iput(old_inode); |
1142 | } | 1179 | } |
1143 | 1180 | ||
@@ -1271,9 +1308,11 @@ int write_inode_now(struct inode *inode, int sync) | |||
1271 | wbc.nr_to_write = 0; | 1308 | wbc.nr_to_write = 0; |
1272 | 1309 | ||
1273 | might_sleep(); | 1310 | might_sleep(); |
1274 | spin_lock(&inode_lock); | 1311 | spin_lock(&inode_wb_list_lock); |
1312 | spin_lock(&inode->i_lock); | ||
1275 | ret = writeback_single_inode(inode, &wbc); | 1313 | ret = writeback_single_inode(inode, &wbc); |
1276 | spin_unlock(&inode_lock); | 1314 | spin_unlock(&inode->i_lock); |
1315 | spin_unlock(&inode_wb_list_lock); | ||
1277 | if (sync) | 1316 | if (sync) |
1278 | inode_sync_wait(inode); | 1317 | inode_sync_wait(inode); |
1279 | return ret; | 1318 | return ret; |
@@ -1295,9 +1334,11 @@ int sync_inode(struct inode *inode, struct writeback_control *wbc) | |||
1295 | { | 1334 | { |
1296 | int ret; | 1335 | int ret; |
1297 | 1336 | ||
1298 | spin_lock(&inode_lock); | 1337 | spin_lock(&inode_wb_list_lock); |
1338 | spin_lock(&inode->i_lock); | ||
1299 | ret = writeback_single_inode(inode, wbc); | 1339 | ret = writeback_single_inode(inode, wbc); |
1300 | spin_unlock(&inode_lock); | 1340 | spin_unlock(&inode->i_lock); |
1341 | spin_unlock(&inode_wb_list_lock); | ||
1301 | return ret; | 1342 | return ret; |
1302 | } | 1343 | } |
1303 | EXPORT_SYMBOL(sync_inode); | 1344 | EXPORT_SYMBOL(sync_inode); |
diff --git a/fs/fuse/inode.c b/fs/fuse/inode.c index 051b1a084528..cc6ec4b2f0ff 100644 --- a/fs/fuse/inode.c +++ b/fs/fuse/inode.c | |||
@@ -870,7 +870,6 @@ static int fuse_bdi_init(struct fuse_conn *fc, struct super_block *sb) | |||
870 | 870 | ||
871 | fc->bdi.name = "fuse"; | 871 | fc->bdi.name = "fuse"; |
872 | fc->bdi.ra_pages = (VM_MAX_READAHEAD * 1024) / PAGE_CACHE_SIZE; | 872 | fc->bdi.ra_pages = (VM_MAX_READAHEAD * 1024) / PAGE_CACHE_SIZE; |
873 | fc->bdi.unplug_io_fn = default_unplug_io_fn; | ||
874 | /* fuse does it's own writeback accounting */ | 873 | /* fuse does it's own writeback accounting */ |
875 | fc->bdi.capabilities = BDI_CAP_NO_ACCT_WB; | 874 | fc->bdi.capabilities = BDI_CAP_NO_ACCT_WB; |
876 | 875 | ||
diff --git a/fs/generic_acl.c b/fs/generic_acl.c index 06c48a891832..8f26d1a58912 100644 --- a/fs/generic_acl.c +++ b/fs/generic_acl.c | |||
@@ -74,7 +74,7 @@ generic_acl_set(struct dentry *dentry, const char *name, const void *value, | |||
74 | return -EINVAL; | 74 | return -EINVAL; |
75 | if (S_ISLNK(inode->i_mode)) | 75 | if (S_ISLNK(inode->i_mode)) |
76 | return -EOPNOTSUPP; | 76 | return -EOPNOTSUPP; |
77 | if (!is_owner_or_cap(inode)) | 77 | if (!inode_owner_or_capable(inode)) |
78 | return -EPERM; | 78 | return -EPERM; |
79 | if (value) { | 79 | if (value) { |
80 | acl = posix_acl_from_xattr(value, size); | 80 | acl = posix_acl_from_xattr(value, size); |
diff --git a/fs/gfs2/aops.c b/fs/gfs2/aops.c index aad77e4f61b5..c71995b111bf 100644 --- a/fs/gfs2/aops.c +++ b/fs/gfs2/aops.c | |||
@@ -1117,7 +1117,6 @@ static const struct address_space_operations gfs2_writeback_aops = { | |||
1117 | .writepages = gfs2_writeback_writepages, | 1117 | .writepages = gfs2_writeback_writepages, |
1118 | .readpage = gfs2_readpage, | 1118 | .readpage = gfs2_readpage, |
1119 | .readpages = gfs2_readpages, | 1119 | .readpages = gfs2_readpages, |
1120 | .sync_page = block_sync_page, | ||
1121 | .write_begin = gfs2_write_begin, | 1120 | .write_begin = gfs2_write_begin, |
1122 | .write_end = gfs2_write_end, | 1121 | .write_end = gfs2_write_end, |
1123 | .bmap = gfs2_bmap, | 1122 | .bmap = gfs2_bmap, |
@@ -1133,7 +1132,6 @@ static const struct address_space_operations gfs2_ordered_aops = { | |||
1133 | .writepage = gfs2_ordered_writepage, | 1132 | .writepage = gfs2_ordered_writepage, |
1134 | .readpage = gfs2_readpage, | 1133 | .readpage = gfs2_readpage, |
1135 | .readpages = gfs2_readpages, | 1134 | .readpages = gfs2_readpages, |
1136 | .sync_page = block_sync_page, | ||
1137 | .write_begin = gfs2_write_begin, | 1135 | .write_begin = gfs2_write_begin, |
1138 | .write_end = gfs2_write_end, | 1136 | .write_end = gfs2_write_end, |
1139 | .set_page_dirty = gfs2_set_page_dirty, | 1137 | .set_page_dirty = gfs2_set_page_dirty, |
@@ -1151,7 +1149,6 @@ static const struct address_space_operations gfs2_jdata_aops = { | |||
1151 | .writepages = gfs2_jdata_writepages, | 1149 | .writepages = gfs2_jdata_writepages, |
1152 | .readpage = gfs2_readpage, | 1150 | .readpage = gfs2_readpage, |
1153 | .readpages = gfs2_readpages, | 1151 | .readpages = gfs2_readpages, |
1154 | .sync_page = block_sync_page, | ||
1155 | .write_begin = gfs2_write_begin, | 1152 | .write_begin = gfs2_write_begin, |
1156 | .write_end = gfs2_write_end, | 1153 | .write_end = gfs2_write_end, |
1157 | .set_page_dirty = gfs2_set_page_dirty, | 1154 | .set_page_dirty = gfs2_set_page_dirty, |
diff --git a/fs/gfs2/file.c b/fs/gfs2/file.c index 4074b952b059..b2682e073eee 100644 --- a/fs/gfs2/file.c +++ b/fs/gfs2/file.c | |||
@@ -221,7 +221,7 @@ static int do_gfs2_set_flags(struct file *filp, u32 reqflags, u32 mask) | |||
221 | goto out_drop_write; | 221 | goto out_drop_write; |
222 | 222 | ||
223 | error = -EACCES; | 223 | error = -EACCES; |
224 | if (!is_owner_or_cap(inode)) | 224 | if (!inode_owner_or_capable(inode)) |
225 | goto out; | 225 | goto out; |
226 | 226 | ||
227 | error = 0; | 227 | error = 0; |
diff --git a/fs/gfs2/log.c b/fs/gfs2/log.c index e7ed31f858dd..5b102c1887fd 100644 --- a/fs/gfs2/log.c +++ b/fs/gfs2/log.c | |||
@@ -121,7 +121,7 @@ __acquires(&sdp->sd_ail_lock) | |||
121 | lock_buffer(bh); | 121 | lock_buffer(bh); |
122 | if (test_clear_buffer_dirty(bh)) { | 122 | if (test_clear_buffer_dirty(bh)) { |
123 | bh->b_end_io = end_buffer_write_sync; | 123 | bh->b_end_io = end_buffer_write_sync; |
124 | submit_bh(WRITE_SYNC_PLUG, bh); | 124 | submit_bh(WRITE_SYNC, bh); |
125 | } else { | 125 | } else { |
126 | unlock_buffer(bh); | 126 | unlock_buffer(bh); |
127 | brelse(bh); | 127 | brelse(bh); |
@@ -647,7 +647,7 @@ static void gfs2_ordered_write(struct gfs2_sbd *sdp) | |||
647 | lock_buffer(bh); | 647 | lock_buffer(bh); |
648 | if (buffer_mapped(bh) && test_clear_buffer_dirty(bh)) { | 648 | if (buffer_mapped(bh) && test_clear_buffer_dirty(bh)) { |
649 | bh->b_end_io = end_buffer_write_sync; | 649 | bh->b_end_io = end_buffer_write_sync; |
650 | submit_bh(WRITE_SYNC_PLUG, bh); | 650 | submit_bh(WRITE_SYNC, bh); |
651 | } else { | 651 | } else { |
652 | unlock_buffer(bh); | 652 | unlock_buffer(bh); |
653 | brelse(bh); | 653 | brelse(bh); |
diff --git a/fs/gfs2/lops.c b/fs/gfs2/lops.c index e919abf25ecd..51d27f00ebb4 100644 --- a/fs/gfs2/lops.c +++ b/fs/gfs2/lops.c | |||
@@ -204,7 +204,7 @@ static void buf_lo_before_commit(struct gfs2_sbd *sdp) | |||
204 | } | 204 | } |
205 | 205 | ||
206 | gfs2_log_unlock(sdp); | 206 | gfs2_log_unlock(sdp); |
207 | submit_bh(WRITE_SYNC_PLUG, bh); | 207 | submit_bh(WRITE_SYNC, bh); |
208 | gfs2_log_lock(sdp); | 208 | gfs2_log_lock(sdp); |
209 | 209 | ||
210 | n = 0; | 210 | n = 0; |
@@ -214,7 +214,7 @@ static void buf_lo_before_commit(struct gfs2_sbd *sdp) | |||
214 | gfs2_log_unlock(sdp); | 214 | gfs2_log_unlock(sdp); |
215 | lock_buffer(bd2->bd_bh); | 215 | lock_buffer(bd2->bd_bh); |
216 | bh = gfs2_log_fake_buf(sdp, bd2->bd_bh); | 216 | bh = gfs2_log_fake_buf(sdp, bd2->bd_bh); |
217 | submit_bh(WRITE_SYNC_PLUG, bh); | 217 | submit_bh(WRITE_SYNC, bh); |
218 | gfs2_log_lock(sdp); | 218 | gfs2_log_lock(sdp); |
219 | if (++n >= num) | 219 | if (++n >= num) |
220 | break; | 220 | break; |
@@ -356,7 +356,7 @@ static void revoke_lo_before_commit(struct gfs2_sbd *sdp) | |||
356 | sdp->sd_log_num_revoke--; | 356 | sdp->sd_log_num_revoke--; |
357 | 357 | ||
358 | if (offset + sizeof(u64) > sdp->sd_sb.sb_bsize) { | 358 | if (offset + sizeof(u64) > sdp->sd_sb.sb_bsize) { |
359 | submit_bh(WRITE_SYNC_PLUG, bh); | 359 | submit_bh(WRITE_SYNC, bh); |
360 | 360 | ||
361 | bh = gfs2_log_get_buf(sdp); | 361 | bh = gfs2_log_get_buf(sdp); |
362 | mh = (struct gfs2_meta_header *)bh->b_data; | 362 | mh = (struct gfs2_meta_header *)bh->b_data; |
@@ -373,7 +373,7 @@ static void revoke_lo_before_commit(struct gfs2_sbd *sdp) | |||
373 | } | 373 | } |
374 | gfs2_assert_withdraw(sdp, !sdp->sd_log_num_revoke); | 374 | gfs2_assert_withdraw(sdp, !sdp->sd_log_num_revoke); |
375 | 375 | ||
376 | submit_bh(WRITE_SYNC_PLUG, bh); | 376 | submit_bh(WRITE_SYNC, bh); |
377 | } | 377 | } |
378 | 378 | ||
379 | static void revoke_lo_before_scan(struct gfs2_jdesc *jd, | 379 | static void revoke_lo_before_scan(struct gfs2_jdesc *jd, |
@@ -575,7 +575,7 @@ static void gfs2_write_blocks(struct gfs2_sbd *sdp, struct buffer_head *bh, | |||
575 | ptr = bh_log_ptr(bh); | 575 | ptr = bh_log_ptr(bh); |
576 | 576 | ||
577 | get_bh(bh); | 577 | get_bh(bh); |
578 | submit_bh(WRITE_SYNC_PLUG, bh); | 578 | submit_bh(WRITE_SYNC, bh); |
579 | gfs2_log_lock(sdp); | 579 | gfs2_log_lock(sdp); |
580 | while(!list_empty(list)) { | 580 | while(!list_empty(list)) { |
581 | bd = list_entry(list->next, struct gfs2_bufdata, bd_le.le_list); | 581 | bd = list_entry(list->next, struct gfs2_bufdata, bd_le.le_list); |
@@ -601,7 +601,7 @@ static void gfs2_write_blocks(struct gfs2_sbd *sdp, struct buffer_head *bh, | |||
601 | } else { | 601 | } else { |
602 | bh1 = gfs2_log_fake_buf(sdp, bd->bd_bh); | 602 | bh1 = gfs2_log_fake_buf(sdp, bd->bd_bh); |
603 | } | 603 | } |
604 | submit_bh(WRITE_SYNC_PLUG, bh1); | 604 | submit_bh(WRITE_SYNC, bh1); |
605 | gfs2_log_lock(sdp); | 605 | gfs2_log_lock(sdp); |
606 | ptr += 2; | 606 | ptr += 2; |
607 | } | 607 | } |
diff --git a/fs/gfs2/meta_io.c b/fs/gfs2/meta_io.c index 01d97f486553..675349b5a133 100644 --- a/fs/gfs2/meta_io.c +++ b/fs/gfs2/meta_io.c | |||
@@ -37,7 +37,7 @@ static int gfs2_aspace_writepage(struct page *page, struct writeback_control *wb | |||
37 | struct buffer_head *bh, *head; | 37 | struct buffer_head *bh, *head; |
38 | int nr_underway = 0; | 38 | int nr_underway = 0; |
39 | int write_op = REQ_META | | 39 | int write_op = REQ_META | |
40 | (wbc->sync_mode == WB_SYNC_ALL ? WRITE_SYNC_PLUG : WRITE); | 40 | (wbc->sync_mode == WB_SYNC_ALL ? WRITE_SYNC : WRITE); |
41 | 41 | ||
42 | BUG_ON(!PageLocked(page)); | 42 | BUG_ON(!PageLocked(page)); |
43 | BUG_ON(!page_has_buffers(page)); | 43 | BUG_ON(!page_has_buffers(page)); |
@@ -94,7 +94,6 @@ static int gfs2_aspace_writepage(struct page *page, struct writeback_control *wb | |||
94 | const struct address_space_operations gfs2_meta_aops = { | 94 | const struct address_space_operations gfs2_meta_aops = { |
95 | .writepage = gfs2_aspace_writepage, | 95 | .writepage = gfs2_aspace_writepage, |
96 | .releasepage = gfs2_releasepage, | 96 | .releasepage = gfs2_releasepage, |
97 | .sync_page = block_sync_page, | ||
98 | }; | 97 | }; |
99 | 98 | ||
100 | /** | 99 | /** |
diff --git a/fs/hfs/inode.c b/fs/hfs/inode.c index dffb4e996643..fff16c968e67 100644 --- a/fs/hfs/inode.c +++ b/fs/hfs/inode.c | |||
@@ -150,7 +150,6 @@ static int hfs_writepages(struct address_space *mapping, | |||
150 | const struct address_space_operations hfs_btree_aops = { | 150 | const struct address_space_operations hfs_btree_aops = { |
151 | .readpage = hfs_readpage, | 151 | .readpage = hfs_readpage, |
152 | .writepage = hfs_writepage, | 152 | .writepage = hfs_writepage, |
153 | .sync_page = block_sync_page, | ||
154 | .write_begin = hfs_write_begin, | 153 | .write_begin = hfs_write_begin, |
155 | .write_end = generic_write_end, | 154 | .write_end = generic_write_end, |
156 | .bmap = hfs_bmap, | 155 | .bmap = hfs_bmap, |
@@ -160,7 +159,6 @@ const struct address_space_operations hfs_btree_aops = { | |||
160 | const struct address_space_operations hfs_aops = { | 159 | const struct address_space_operations hfs_aops = { |
161 | .readpage = hfs_readpage, | 160 | .readpage = hfs_readpage, |
162 | .writepage = hfs_writepage, | 161 | .writepage = hfs_writepage, |
163 | .sync_page = block_sync_page, | ||
164 | .write_begin = hfs_write_begin, | 162 | .write_begin = hfs_write_begin, |
165 | .write_end = generic_write_end, | 163 | .write_end = generic_write_end, |
166 | .bmap = hfs_bmap, | 164 | .bmap = hfs_bmap, |
diff --git a/fs/hfsplus/inode.c b/fs/hfsplus/inode.c index a8df651747f0..b248a6cfcad9 100644 --- a/fs/hfsplus/inode.c +++ b/fs/hfsplus/inode.c | |||
@@ -146,7 +146,6 @@ static int hfsplus_writepages(struct address_space *mapping, | |||
146 | const struct address_space_operations hfsplus_btree_aops = { | 146 | const struct address_space_operations hfsplus_btree_aops = { |
147 | .readpage = hfsplus_readpage, | 147 | .readpage = hfsplus_readpage, |
148 | .writepage = hfsplus_writepage, | 148 | .writepage = hfsplus_writepage, |
149 | .sync_page = block_sync_page, | ||
150 | .write_begin = hfsplus_write_begin, | 149 | .write_begin = hfsplus_write_begin, |
151 | .write_end = generic_write_end, | 150 | .write_end = generic_write_end, |
152 | .bmap = hfsplus_bmap, | 151 | .bmap = hfsplus_bmap, |
@@ -156,7 +155,6 @@ const struct address_space_operations hfsplus_btree_aops = { | |||
156 | const struct address_space_operations hfsplus_aops = { | 155 | const struct address_space_operations hfsplus_aops = { |
157 | .readpage = hfsplus_readpage, | 156 | .readpage = hfsplus_readpage, |
158 | .writepage = hfsplus_writepage, | 157 | .writepage = hfsplus_writepage, |
159 | .sync_page = block_sync_page, | ||
160 | .write_begin = hfsplus_write_begin, | 158 | .write_begin = hfsplus_write_begin, |
161 | .write_end = generic_write_end, | 159 | .write_end = generic_write_end, |
162 | .bmap = hfsplus_bmap, | 160 | .bmap = hfsplus_bmap, |
diff --git a/fs/hfsplus/ioctl.c b/fs/hfsplus/ioctl.c index 508ce662ce12..fbaa6690c8e0 100644 --- a/fs/hfsplus/ioctl.c +++ b/fs/hfsplus/ioctl.c | |||
@@ -47,7 +47,7 @@ static int hfsplus_ioctl_setflags(struct file *file, int __user *user_flags) | |||
47 | if (err) | 47 | if (err) |
48 | goto out; | 48 | goto out; |
49 | 49 | ||
50 | if (!is_owner_or_cap(inode)) { | 50 | if (!inode_owner_or_capable(inode)) { |
51 | err = -EACCES; | 51 | err = -EACCES; |
52 | goto out_drop_write; | 52 | goto out_drop_write; |
53 | } | 53 | } |
diff --git a/fs/hpfs/file.c b/fs/hpfs/file.c index 2dbae20450f8..9b9eb6933e43 100644 --- a/fs/hpfs/file.c +++ b/fs/hpfs/file.c | |||
@@ -119,7 +119,6 @@ static sector_t _hpfs_bmap(struct address_space *mapping, sector_t block) | |||
119 | const struct address_space_operations hpfs_aops = { | 119 | const struct address_space_operations hpfs_aops = { |
120 | .readpage = hpfs_readpage, | 120 | .readpage = hpfs_readpage, |
121 | .writepage = hpfs_writepage, | 121 | .writepage = hpfs_writepage, |
122 | .sync_page = block_sync_page, | ||
123 | .write_begin = hpfs_write_begin, | 122 | .write_begin = hpfs_write_begin, |
124 | .write_end = generic_write_end, | 123 | .write_end = generic_write_end, |
125 | .bmap = _hpfs_bmap | 124 | .bmap = _hpfs_bmap |
diff --git a/fs/inode.c b/fs/inode.c index 16fefd373fc2..05a1f75ae791 100644 --- a/fs/inode.c +++ b/fs/inode.c | |||
@@ -25,6 +25,39 @@ | |||
25 | #include <linux/async.h> | 25 | #include <linux/async.h> |
26 | #include <linux/posix_acl.h> | 26 | #include <linux/posix_acl.h> |
27 | #include <linux/ima.h> | 27 | #include <linux/ima.h> |
28 | #include <linux/cred.h> | ||
29 | #include "internal.h" | ||
30 | |||
31 | /* | ||
32 | * inode locking rules. | ||
33 | * | ||
34 | * inode->i_lock protects: | ||
35 | * inode->i_state, inode->i_hash, __iget() | ||
36 | * inode_lru_lock protects: | ||
37 | * inode_lru, inode->i_lru | ||
38 | * inode_sb_list_lock protects: | ||
39 | * sb->s_inodes, inode->i_sb_list | ||
40 | * inode_wb_list_lock protects: | ||
41 | * bdi->wb.b_{dirty,io,more_io}, inode->i_wb_list | ||
42 | * inode_hash_lock protects: | ||
43 | * inode_hashtable, inode->i_hash | ||
44 | * | ||
45 | * Lock ordering: | ||
46 | * | ||
47 | * inode_sb_list_lock | ||
48 | * inode->i_lock | ||
49 | * inode_lru_lock | ||
50 | * | ||
51 | * inode_wb_list_lock | ||
52 | * inode->i_lock | ||
53 | * | ||
54 | * inode_hash_lock | ||
55 | * inode_sb_list_lock | ||
56 | * inode->i_lock | ||
57 | * | ||
58 | * iunique_lock | ||
59 | * inode_hash_lock | ||
60 | */ | ||
28 | 61 | ||
29 | /* | 62 | /* |
30 | * This is needed for the following functions: | 63 | * This is needed for the following functions: |
@@ -59,6 +92,8 @@ | |||
59 | 92 | ||
60 | static unsigned int i_hash_mask __read_mostly; | 93 | static unsigned int i_hash_mask __read_mostly; |
61 | static unsigned int i_hash_shift __read_mostly; | 94 | static unsigned int i_hash_shift __read_mostly; |
95 | static struct hlist_head *inode_hashtable __read_mostly; | ||
96 | static __cacheline_aligned_in_smp DEFINE_SPINLOCK(inode_hash_lock); | ||
62 | 97 | ||
63 | /* | 98 | /* |
64 | * Each inode can be on two separate lists. One is | 99 | * Each inode can be on two separate lists. One is |
@@ -73,15 +108,10 @@ static unsigned int i_hash_shift __read_mostly; | |||
73 | */ | 108 | */ |
74 | 109 | ||
75 | static LIST_HEAD(inode_lru); | 110 | static LIST_HEAD(inode_lru); |
76 | static struct hlist_head *inode_hashtable __read_mostly; | 111 | static DEFINE_SPINLOCK(inode_lru_lock); |
77 | 112 | ||
78 | /* | 113 | __cacheline_aligned_in_smp DEFINE_SPINLOCK(inode_sb_list_lock); |
79 | * A simple spinlock to protect the list manipulations. | 114 | __cacheline_aligned_in_smp DEFINE_SPINLOCK(inode_wb_list_lock); |
80 | * | ||
81 | * NOTE! You also have to own the lock if you change | ||
82 | * the i_state of an inode while it is in use.. | ||
83 | */ | ||
84 | DEFINE_SPINLOCK(inode_lock); | ||
85 | 115 | ||
86 | /* | 116 | /* |
87 | * iprune_sem provides exclusion between the icache shrinking and the | 117 | * iprune_sem provides exclusion between the icache shrinking and the |
@@ -136,15 +166,6 @@ int proc_nr_inodes(ctl_table *table, int write, | |||
136 | } | 166 | } |
137 | #endif | 167 | #endif |
138 | 168 | ||
139 | static void wake_up_inode(struct inode *inode) | ||
140 | { | ||
141 | /* | ||
142 | * Prevent speculative execution through spin_unlock(&inode_lock); | ||
143 | */ | ||
144 | smp_mb(); | ||
145 | wake_up_bit(&inode->i_state, __I_NEW); | ||
146 | } | ||
147 | |||
148 | /** | 169 | /** |
149 | * inode_init_always - perform inode structure intialisation | 170 | * inode_init_always - perform inode structure intialisation |
150 | * @sb: superblock inode belongs to | 171 | * @sb: superblock inode belongs to |
@@ -335,7 +356,7 @@ static void init_once(void *foo) | |||
335 | } | 356 | } |
336 | 357 | ||
337 | /* | 358 | /* |
338 | * inode_lock must be held | 359 | * inode->i_lock must be held |
339 | */ | 360 | */ |
340 | void __iget(struct inode *inode) | 361 | void __iget(struct inode *inode) |
341 | { | 362 | { |
@@ -353,23 +374,22 @@ EXPORT_SYMBOL(ihold); | |||
353 | 374 | ||
354 | static void inode_lru_list_add(struct inode *inode) | 375 | static void inode_lru_list_add(struct inode *inode) |
355 | { | 376 | { |
377 | spin_lock(&inode_lru_lock); | ||
356 | if (list_empty(&inode->i_lru)) { | 378 | if (list_empty(&inode->i_lru)) { |
357 | list_add(&inode->i_lru, &inode_lru); | 379 | list_add(&inode->i_lru, &inode_lru); |
358 | inodes_stat.nr_unused++; | 380 | inodes_stat.nr_unused++; |
359 | } | 381 | } |
382 | spin_unlock(&inode_lru_lock); | ||
360 | } | 383 | } |
361 | 384 | ||
362 | static void inode_lru_list_del(struct inode *inode) | 385 | static void inode_lru_list_del(struct inode *inode) |
363 | { | 386 | { |
387 | spin_lock(&inode_lru_lock); | ||
364 | if (!list_empty(&inode->i_lru)) { | 388 | if (!list_empty(&inode->i_lru)) { |
365 | list_del_init(&inode->i_lru); | 389 | list_del_init(&inode->i_lru); |
366 | inodes_stat.nr_unused--; | 390 | inodes_stat.nr_unused--; |
367 | } | 391 | } |
368 | } | 392 | spin_unlock(&inode_lru_lock); |
369 | |||
370 | static inline void __inode_sb_list_add(struct inode *inode) | ||
371 | { | ||
372 | list_add(&inode->i_sb_list, &inode->i_sb->s_inodes); | ||
373 | } | 393 | } |
374 | 394 | ||
375 | /** | 395 | /** |
@@ -378,15 +398,17 @@ static inline void __inode_sb_list_add(struct inode *inode) | |||
378 | */ | 398 | */ |
379 | void inode_sb_list_add(struct inode *inode) | 399 | void inode_sb_list_add(struct inode *inode) |
380 | { | 400 | { |
381 | spin_lock(&inode_lock); | 401 | spin_lock(&inode_sb_list_lock); |
382 | __inode_sb_list_add(inode); | 402 | list_add(&inode->i_sb_list, &inode->i_sb->s_inodes); |
383 | spin_unlock(&inode_lock); | 403 | spin_unlock(&inode_sb_list_lock); |
384 | } | 404 | } |
385 | EXPORT_SYMBOL_GPL(inode_sb_list_add); | 405 | EXPORT_SYMBOL_GPL(inode_sb_list_add); |
386 | 406 | ||
387 | static inline void __inode_sb_list_del(struct inode *inode) | 407 | static inline void inode_sb_list_del(struct inode *inode) |
388 | { | 408 | { |
409 | spin_lock(&inode_sb_list_lock); | ||
389 | list_del_init(&inode->i_sb_list); | 410 | list_del_init(&inode->i_sb_list); |
411 | spin_unlock(&inode_sb_list_lock); | ||
390 | } | 412 | } |
391 | 413 | ||
392 | static unsigned long hash(struct super_block *sb, unsigned long hashval) | 414 | static unsigned long hash(struct super_block *sb, unsigned long hashval) |
@@ -411,24 +433,15 @@ void __insert_inode_hash(struct inode *inode, unsigned long hashval) | |||
411 | { | 433 | { |
412 | struct hlist_head *b = inode_hashtable + hash(inode->i_sb, hashval); | 434 | struct hlist_head *b = inode_hashtable + hash(inode->i_sb, hashval); |
413 | 435 | ||
414 | spin_lock(&inode_lock); | 436 | spin_lock(&inode_hash_lock); |
437 | spin_lock(&inode->i_lock); | ||
415 | hlist_add_head(&inode->i_hash, b); | 438 | hlist_add_head(&inode->i_hash, b); |
416 | spin_unlock(&inode_lock); | 439 | spin_unlock(&inode->i_lock); |
440 | spin_unlock(&inode_hash_lock); | ||
417 | } | 441 | } |
418 | EXPORT_SYMBOL(__insert_inode_hash); | 442 | EXPORT_SYMBOL(__insert_inode_hash); |
419 | 443 | ||
420 | /** | 444 | /** |
421 | * __remove_inode_hash - remove an inode from the hash | ||
422 | * @inode: inode to unhash | ||
423 | * | ||
424 | * Remove an inode from the superblock. | ||
425 | */ | ||
426 | static void __remove_inode_hash(struct inode *inode) | ||
427 | { | ||
428 | hlist_del_init(&inode->i_hash); | ||
429 | } | ||
430 | |||
431 | /** | ||
432 | * remove_inode_hash - remove an inode from the hash | 445 | * remove_inode_hash - remove an inode from the hash |
433 | * @inode: inode to unhash | 446 | * @inode: inode to unhash |
434 | * | 447 | * |
@@ -436,9 +449,11 @@ static void __remove_inode_hash(struct inode *inode) | |||
436 | */ | 449 | */ |
437 | void remove_inode_hash(struct inode *inode) | 450 | void remove_inode_hash(struct inode *inode) |
438 | { | 451 | { |
439 | spin_lock(&inode_lock); | 452 | spin_lock(&inode_hash_lock); |
453 | spin_lock(&inode->i_lock); | ||
440 | hlist_del_init(&inode->i_hash); | 454 | hlist_del_init(&inode->i_hash); |
441 | spin_unlock(&inode_lock); | 455 | spin_unlock(&inode->i_lock); |
456 | spin_unlock(&inode_hash_lock); | ||
442 | } | 457 | } |
443 | EXPORT_SYMBOL(remove_inode_hash); | 458 | EXPORT_SYMBOL(remove_inode_hash); |
444 | 459 | ||
@@ -455,10 +470,29 @@ void end_writeback(struct inode *inode) | |||
455 | } | 470 | } |
456 | EXPORT_SYMBOL(end_writeback); | 471 | EXPORT_SYMBOL(end_writeback); |
457 | 472 | ||
473 | /* | ||
474 | * Free the inode passed in, removing it from the lists it is still connected | ||
475 | * to. We remove any pages still attached to the inode and wait for any IO that | ||
476 | * is still in progress before finally destroying the inode. | ||
477 | * | ||
478 | * An inode must already be marked I_FREEING so that we avoid the inode being | ||
479 | * moved back onto lists if we race with other code that manipulates the lists | ||
480 | * (e.g. writeback_single_inode). The caller is responsible for setting this. | ||
481 | * | ||
482 | * An inode must already be removed from the LRU list before being evicted from | ||
483 | * the cache. This should occur atomically with setting the I_FREEING state | ||
484 | * flag, so no inodes here should ever be on the LRU when being evicted. | ||
485 | */ | ||
458 | static void evict(struct inode *inode) | 486 | static void evict(struct inode *inode) |
459 | { | 487 | { |
460 | const struct super_operations *op = inode->i_sb->s_op; | 488 | const struct super_operations *op = inode->i_sb->s_op; |
461 | 489 | ||
490 | BUG_ON(!(inode->i_state & I_FREEING)); | ||
491 | BUG_ON(!list_empty(&inode->i_lru)); | ||
492 | |||
493 | inode_wb_list_del(inode); | ||
494 | inode_sb_list_del(inode); | ||
495 | |||
462 | if (op->evict_inode) { | 496 | if (op->evict_inode) { |
463 | op->evict_inode(inode); | 497 | op->evict_inode(inode); |
464 | } else { | 498 | } else { |
@@ -470,6 +504,15 @@ static void evict(struct inode *inode) | |||
470 | bd_forget(inode); | 504 | bd_forget(inode); |
471 | if (S_ISCHR(inode->i_mode) && inode->i_cdev) | 505 | if (S_ISCHR(inode->i_mode) && inode->i_cdev) |
472 | cd_forget(inode); | 506 | cd_forget(inode); |
507 | |||
508 | remove_inode_hash(inode); | ||
509 | |||
510 | spin_lock(&inode->i_lock); | ||
511 | wake_up_bit(&inode->i_state, __I_NEW); | ||
512 | BUG_ON(inode->i_state != (I_FREEING | I_CLEAR)); | ||
513 | spin_unlock(&inode->i_lock); | ||
514 | |||
515 | destroy_inode(inode); | ||
473 | } | 516 | } |
474 | 517 | ||
475 | /* | 518 | /* |
@@ -488,14 +531,6 @@ static void dispose_list(struct list_head *head) | |||
488 | list_del_init(&inode->i_lru); | 531 | list_del_init(&inode->i_lru); |
489 | 532 | ||
490 | evict(inode); | 533 | evict(inode); |
491 | |||
492 | spin_lock(&inode_lock); | ||
493 | __remove_inode_hash(inode); | ||
494 | __inode_sb_list_del(inode); | ||
495 | spin_unlock(&inode_lock); | ||
496 | |||
497 | wake_up_inode(inode); | ||
498 | destroy_inode(inode); | ||
499 | } | 534 | } |
500 | } | 535 | } |
501 | 536 | ||
@@ -513,25 +548,23 @@ void evict_inodes(struct super_block *sb) | |||
513 | struct inode *inode, *next; | 548 | struct inode *inode, *next; |
514 | LIST_HEAD(dispose); | 549 | LIST_HEAD(dispose); |
515 | 550 | ||
516 | spin_lock(&inode_lock); | 551 | spin_lock(&inode_sb_list_lock); |
517 | list_for_each_entry_safe(inode, next, &sb->s_inodes, i_sb_list) { | 552 | list_for_each_entry_safe(inode, next, &sb->s_inodes, i_sb_list) { |
518 | if (atomic_read(&inode->i_count)) | 553 | if (atomic_read(&inode->i_count)) |
519 | continue; | 554 | continue; |
520 | if (inode->i_state & (I_NEW | I_FREEING | I_WILL_FREE)) | 555 | |
556 | spin_lock(&inode->i_lock); | ||
557 | if (inode->i_state & (I_NEW | I_FREEING | I_WILL_FREE)) { | ||
558 | spin_unlock(&inode->i_lock); | ||
521 | continue; | 559 | continue; |
560 | } | ||
522 | 561 | ||
523 | inode->i_state |= I_FREEING; | 562 | inode->i_state |= I_FREEING; |
524 | 563 | inode_lru_list_del(inode); | |
525 | /* | 564 | spin_unlock(&inode->i_lock); |
526 | * Move the inode off the IO lists and LRU once I_FREEING is | 565 | list_add(&inode->i_lru, &dispose); |
527 | * set so that it won't get moved back on there if it is dirty. | ||
528 | */ | ||
529 | list_move(&inode->i_lru, &dispose); | ||
530 | list_del_init(&inode->i_wb_list); | ||
531 | if (!(inode->i_state & (I_DIRTY | I_SYNC))) | ||
532 | inodes_stat.nr_unused--; | ||
533 | } | 566 | } |
534 | spin_unlock(&inode_lock); | 567 | spin_unlock(&inode_sb_list_lock); |
535 | 568 | ||
536 | dispose_list(&dispose); | 569 | dispose_list(&dispose); |
537 | 570 | ||
@@ -560,31 +593,30 @@ int invalidate_inodes(struct super_block *sb, bool kill_dirty) | |||
560 | struct inode *inode, *next; | 593 | struct inode *inode, *next; |
561 | LIST_HEAD(dispose); | 594 | LIST_HEAD(dispose); |
562 | 595 | ||
563 | spin_lock(&inode_lock); | 596 | spin_lock(&inode_sb_list_lock); |
564 | list_for_each_entry_safe(inode, next, &sb->s_inodes, i_sb_list) { | 597 | list_for_each_entry_safe(inode, next, &sb->s_inodes, i_sb_list) { |
565 | if (inode->i_state & (I_NEW | I_FREEING | I_WILL_FREE)) | 598 | spin_lock(&inode->i_lock); |
599 | if (inode->i_state & (I_NEW | I_FREEING | I_WILL_FREE)) { | ||
600 | spin_unlock(&inode->i_lock); | ||
566 | continue; | 601 | continue; |
602 | } | ||
567 | if (inode->i_state & I_DIRTY && !kill_dirty) { | 603 | if (inode->i_state & I_DIRTY && !kill_dirty) { |
604 | spin_unlock(&inode->i_lock); | ||
568 | busy = 1; | 605 | busy = 1; |
569 | continue; | 606 | continue; |
570 | } | 607 | } |
571 | if (atomic_read(&inode->i_count)) { | 608 | if (atomic_read(&inode->i_count)) { |
609 | spin_unlock(&inode->i_lock); | ||
572 | busy = 1; | 610 | busy = 1; |
573 | continue; | 611 | continue; |
574 | } | 612 | } |
575 | 613 | ||
576 | inode->i_state |= I_FREEING; | 614 | inode->i_state |= I_FREEING; |
577 | 615 | inode_lru_list_del(inode); | |
578 | /* | 616 | spin_unlock(&inode->i_lock); |
579 | * Move the inode off the IO lists and LRU once I_FREEING is | 617 | list_add(&inode->i_lru, &dispose); |
580 | * set so that it won't get moved back on there if it is dirty. | ||
581 | */ | ||
582 | list_move(&inode->i_lru, &dispose); | ||
583 | list_del_init(&inode->i_wb_list); | ||
584 | if (!(inode->i_state & (I_DIRTY | I_SYNC))) | ||
585 | inodes_stat.nr_unused--; | ||
586 | } | 618 | } |
587 | spin_unlock(&inode_lock); | 619 | spin_unlock(&inode_sb_list_lock); |
588 | 620 | ||
589 | dispose_list(&dispose); | 621 | dispose_list(&dispose); |
590 | 622 | ||
@@ -606,7 +638,7 @@ static int can_unuse(struct inode *inode) | |||
606 | 638 | ||
607 | /* | 639 | /* |
608 | * Scan `goal' inodes on the unused list for freeable ones. They are moved to a | 640 | * Scan `goal' inodes on the unused list for freeable ones. They are moved to a |
609 | * temporary list and then are freed outside inode_lock by dispose_list(). | 641 | * temporary list and then are freed outside inode_lru_lock by dispose_list(). |
610 | * | 642 | * |
611 | * Any inodes which are pinned purely because of attached pagecache have their | 643 | * Any inodes which are pinned purely because of attached pagecache have their |
612 | * pagecache removed. If the inode has metadata buffers attached to | 644 | * pagecache removed. If the inode has metadata buffers attached to |
@@ -627,7 +659,7 @@ static void prune_icache(int nr_to_scan) | |||
627 | unsigned long reap = 0; | 659 | unsigned long reap = 0; |
628 | 660 | ||
629 | down_read(&iprune_sem); | 661 | down_read(&iprune_sem); |
630 | spin_lock(&inode_lock); | 662 | spin_lock(&inode_lru_lock); |
631 | for (nr_scanned = 0; nr_scanned < nr_to_scan; nr_scanned++) { | 663 | for (nr_scanned = 0; nr_scanned < nr_to_scan; nr_scanned++) { |
632 | struct inode *inode; | 664 | struct inode *inode; |
633 | 665 | ||
@@ -637,53 +669,67 @@ static void prune_icache(int nr_to_scan) | |||
637 | inode = list_entry(inode_lru.prev, struct inode, i_lru); | 669 | inode = list_entry(inode_lru.prev, struct inode, i_lru); |
638 | 670 | ||
639 | /* | 671 | /* |
672 | * we are inverting the inode_lru_lock/inode->i_lock here, | ||
673 | * so use a trylock. If we fail to get the lock, just move the | ||
674 | * inode to the back of the list so we don't spin on it. | ||
675 | */ | ||
676 | if (!spin_trylock(&inode->i_lock)) { | ||
677 | list_move(&inode->i_lru, &inode_lru); | ||
678 | continue; | ||
679 | } | ||
680 | |||
681 | /* | ||
640 | * Referenced or dirty inodes are still in use. Give them | 682 | * Referenced or dirty inodes are still in use. Give them |
641 | * another pass through the LRU as we canot reclaim them now. | 683 | * another pass through the LRU as we canot reclaim them now. |
642 | */ | 684 | */ |
643 | if (atomic_read(&inode->i_count) || | 685 | if (atomic_read(&inode->i_count) || |
644 | (inode->i_state & ~I_REFERENCED)) { | 686 | (inode->i_state & ~I_REFERENCED)) { |
645 | list_del_init(&inode->i_lru); | 687 | list_del_init(&inode->i_lru); |
688 | spin_unlock(&inode->i_lock); | ||
646 | inodes_stat.nr_unused--; | 689 | inodes_stat.nr_unused--; |
647 | continue; | 690 | continue; |
648 | } | 691 | } |
649 | 692 | ||
650 | /* recently referenced inodes get one more pass */ | 693 | /* recently referenced inodes get one more pass */ |
651 | if (inode->i_state & I_REFERENCED) { | 694 | if (inode->i_state & I_REFERENCED) { |
652 | list_move(&inode->i_lru, &inode_lru); | ||
653 | inode->i_state &= ~I_REFERENCED; | 695 | inode->i_state &= ~I_REFERENCED; |
696 | list_move(&inode->i_lru, &inode_lru); | ||
697 | spin_unlock(&inode->i_lock); | ||
654 | continue; | 698 | continue; |
655 | } | 699 | } |
656 | if (inode_has_buffers(inode) || inode->i_data.nrpages) { | 700 | if (inode_has_buffers(inode) || inode->i_data.nrpages) { |
657 | __iget(inode); | 701 | __iget(inode); |
658 | spin_unlock(&inode_lock); | 702 | spin_unlock(&inode->i_lock); |
703 | spin_unlock(&inode_lru_lock); | ||
659 | if (remove_inode_buffers(inode)) | 704 | if (remove_inode_buffers(inode)) |
660 | reap += invalidate_mapping_pages(&inode->i_data, | 705 | reap += invalidate_mapping_pages(&inode->i_data, |
661 | 0, -1); | 706 | 0, -1); |
662 | iput(inode); | 707 | iput(inode); |
663 | spin_lock(&inode_lock); | 708 | spin_lock(&inode_lru_lock); |
664 | 709 | ||
665 | if (inode != list_entry(inode_lru.next, | 710 | if (inode != list_entry(inode_lru.next, |
666 | struct inode, i_lru)) | 711 | struct inode, i_lru)) |
667 | continue; /* wrong inode or list_empty */ | 712 | continue; /* wrong inode or list_empty */ |
668 | if (!can_unuse(inode)) | 713 | /* avoid lock inversions with trylock */ |
714 | if (!spin_trylock(&inode->i_lock)) | ||
669 | continue; | 715 | continue; |
716 | if (!can_unuse(inode)) { | ||
717 | spin_unlock(&inode->i_lock); | ||
718 | continue; | ||
719 | } | ||
670 | } | 720 | } |
671 | WARN_ON(inode->i_state & I_NEW); | 721 | WARN_ON(inode->i_state & I_NEW); |
672 | inode->i_state |= I_FREEING; | 722 | inode->i_state |= I_FREEING; |
723 | spin_unlock(&inode->i_lock); | ||
673 | 724 | ||
674 | /* | ||
675 | * Move the inode off the IO lists and LRU once I_FREEING is | ||
676 | * set so that it won't get moved back on there if it is dirty. | ||
677 | */ | ||
678 | list_move(&inode->i_lru, &freeable); | 725 | list_move(&inode->i_lru, &freeable); |
679 | list_del_init(&inode->i_wb_list); | ||
680 | inodes_stat.nr_unused--; | 726 | inodes_stat.nr_unused--; |
681 | } | 727 | } |
682 | if (current_is_kswapd()) | 728 | if (current_is_kswapd()) |
683 | __count_vm_events(KSWAPD_INODESTEAL, reap); | 729 | __count_vm_events(KSWAPD_INODESTEAL, reap); |
684 | else | 730 | else |
685 | __count_vm_events(PGINODESTEAL, reap); | 731 | __count_vm_events(PGINODESTEAL, reap); |
686 | spin_unlock(&inode_lock); | 732 | spin_unlock(&inode_lru_lock); |
687 | 733 | ||
688 | dispose_list(&freeable); | 734 | dispose_list(&freeable); |
689 | up_read(&iprune_sem); | 735 | up_read(&iprune_sem); |
@@ -732,15 +778,21 @@ static struct inode *find_inode(struct super_block *sb, | |||
732 | 778 | ||
733 | repeat: | 779 | repeat: |
734 | hlist_for_each_entry(inode, node, head, i_hash) { | 780 | hlist_for_each_entry(inode, node, head, i_hash) { |
735 | if (inode->i_sb != sb) | 781 | spin_lock(&inode->i_lock); |
782 | if (inode->i_sb != sb) { | ||
783 | spin_unlock(&inode->i_lock); | ||
736 | continue; | 784 | continue; |
737 | if (!test(inode, data)) | 785 | } |
786 | if (!test(inode, data)) { | ||
787 | spin_unlock(&inode->i_lock); | ||
738 | continue; | 788 | continue; |
789 | } | ||
739 | if (inode->i_state & (I_FREEING|I_WILL_FREE)) { | 790 | if (inode->i_state & (I_FREEING|I_WILL_FREE)) { |
740 | __wait_on_freeing_inode(inode); | 791 | __wait_on_freeing_inode(inode); |
741 | goto repeat; | 792 | goto repeat; |
742 | } | 793 | } |
743 | __iget(inode); | 794 | __iget(inode); |
795 | spin_unlock(&inode->i_lock); | ||
744 | return inode; | 796 | return inode; |
745 | } | 797 | } |
746 | return NULL; | 798 | return NULL; |
@@ -758,15 +810,21 @@ static struct inode *find_inode_fast(struct super_block *sb, | |||
758 | 810 | ||
759 | repeat: | 811 | repeat: |
760 | hlist_for_each_entry(inode, node, head, i_hash) { | 812 | hlist_for_each_entry(inode, node, head, i_hash) { |
761 | if (inode->i_ino != ino) | 813 | spin_lock(&inode->i_lock); |
814 | if (inode->i_ino != ino) { | ||
815 | spin_unlock(&inode->i_lock); | ||
762 | continue; | 816 | continue; |
763 | if (inode->i_sb != sb) | 817 | } |
818 | if (inode->i_sb != sb) { | ||
819 | spin_unlock(&inode->i_lock); | ||
764 | continue; | 820 | continue; |
821 | } | ||
765 | if (inode->i_state & (I_FREEING|I_WILL_FREE)) { | 822 | if (inode->i_state & (I_FREEING|I_WILL_FREE)) { |
766 | __wait_on_freeing_inode(inode); | 823 | __wait_on_freeing_inode(inode); |
767 | goto repeat; | 824 | goto repeat; |
768 | } | 825 | } |
769 | __iget(inode); | 826 | __iget(inode); |
827 | spin_unlock(&inode->i_lock); | ||
770 | return inode; | 828 | return inode; |
771 | } | 829 | } |
772 | return NULL; | 830 | return NULL; |
@@ -826,19 +884,26 @@ struct inode *new_inode(struct super_block *sb) | |||
826 | { | 884 | { |
827 | struct inode *inode; | 885 | struct inode *inode; |
828 | 886 | ||
829 | spin_lock_prefetch(&inode_lock); | 887 | spin_lock_prefetch(&inode_sb_list_lock); |
830 | 888 | ||
831 | inode = alloc_inode(sb); | 889 | inode = alloc_inode(sb); |
832 | if (inode) { | 890 | if (inode) { |
833 | spin_lock(&inode_lock); | 891 | spin_lock(&inode->i_lock); |
834 | __inode_sb_list_add(inode); | ||
835 | inode->i_state = 0; | 892 | inode->i_state = 0; |
836 | spin_unlock(&inode_lock); | 893 | spin_unlock(&inode->i_lock); |
894 | inode_sb_list_add(inode); | ||
837 | } | 895 | } |
838 | return inode; | 896 | return inode; |
839 | } | 897 | } |
840 | EXPORT_SYMBOL(new_inode); | 898 | EXPORT_SYMBOL(new_inode); |
841 | 899 | ||
900 | /** | ||
901 | * unlock_new_inode - clear the I_NEW state and wake up any waiters | ||
902 | * @inode: new inode to unlock | ||
903 | * | ||
904 | * Called when the inode is fully initialised to clear the new state of the | ||
905 | * inode and wake up anyone waiting for the inode to finish initialisation. | ||
906 | */ | ||
842 | void unlock_new_inode(struct inode *inode) | 907 | void unlock_new_inode(struct inode *inode) |
843 | { | 908 | { |
844 | #ifdef CONFIG_DEBUG_LOCK_ALLOC | 909 | #ifdef CONFIG_DEBUG_LOCK_ALLOC |
@@ -858,51 +923,67 @@ void unlock_new_inode(struct inode *inode) | |||
858 | } | 923 | } |
859 | } | 924 | } |
860 | #endif | 925 | #endif |
861 | /* | 926 | spin_lock(&inode->i_lock); |
862 | * This is special! We do not need the spinlock when clearing I_NEW, | ||
863 | * because we're guaranteed that nobody else tries to do anything about | ||
864 | * the state of the inode when it is locked, as we just created it (so | ||
865 | * there can be no old holders that haven't tested I_NEW). | ||
866 | * However we must emit the memory barrier so that other CPUs reliably | ||
867 | * see the clearing of I_NEW after the other inode initialisation has | ||
868 | * completed. | ||
869 | */ | ||
870 | smp_mb(); | ||
871 | WARN_ON(!(inode->i_state & I_NEW)); | 927 | WARN_ON(!(inode->i_state & I_NEW)); |
872 | inode->i_state &= ~I_NEW; | 928 | inode->i_state &= ~I_NEW; |
873 | wake_up_inode(inode); | 929 | wake_up_bit(&inode->i_state, __I_NEW); |
930 | spin_unlock(&inode->i_lock); | ||
874 | } | 931 | } |
875 | EXPORT_SYMBOL(unlock_new_inode); | 932 | EXPORT_SYMBOL(unlock_new_inode); |
876 | 933 | ||
877 | /* | 934 | /** |
878 | * This is called without the inode lock held.. Be careful. | 935 | * iget5_locked - obtain an inode from a mounted file system |
936 | * @sb: super block of file system | ||
937 | * @hashval: hash value (usually inode number) to get | ||
938 | * @test: callback used for comparisons between inodes | ||
939 | * @set: callback used to initialize a new struct inode | ||
940 | * @data: opaque data pointer to pass to @test and @set | ||
941 | * | ||
942 | * Search for the inode specified by @hashval and @data in the inode cache, | ||
943 | * and if present it is return it with an increased reference count. This is | ||
944 | * a generalized version of iget_locked() for file systems where the inode | ||
945 | * number is not sufficient for unique identification of an inode. | ||
946 | * | ||
947 | * If the inode is not in cache, allocate a new inode and return it locked, | ||
948 | * hashed, and with the I_NEW flag set. The file system gets to fill it in | ||
949 | * before unlocking it via unlock_new_inode(). | ||
879 | * | 950 | * |
880 | * We no longer cache the sb_flags in i_flags - see fs.h | 951 | * Note both @test and @set are called with the inode_hash_lock held, so can't |
881 | * -- rmk@arm.uk.linux.org | 952 | * sleep. |
882 | */ | 953 | */ |
883 | static struct inode *get_new_inode(struct super_block *sb, | 954 | struct inode *iget5_locked(struct super_block *sb, unsigned long hashval, |
884 | struct hlist_head *head, | 955 | int (*test)(struct inode *, void *), |
885 | int (*test)(struct inode *, void *), | 956 | int (*set)(struct inode *, void *), void *data) |
886 | int (*set)(struct inode *, void *), | ||
887 | void *data) | ||
888 | { | 957 | { |
958 | struct hlist_head *head = inode_hashtable + hash(sb, hashval); | ||
889 | struct inode *inode; | 959 | struct inode *inode; |
890 | 960 | ||
961 | spin_lock(&inode_hash_lock); | ||
962 | inode = find_inode(sb, head, test, data); | ||
963 | spin_unlock(&inode_hash_lock); | ||
964 | |||
965 | if (inode) { | ||
966 | wait_on_inode(inode); | ||
967 | return inode; | ||
968 | } | ||
969 | |||
891 | inode = alloc_inode(sb); | 970 | inode = alloc_inode(sb); |
892 | if (inode) { | 971 | if (inode) { |
893 | struct inode *old; | 972 | struct inode *old; |
894 | 973 | ||
895 | spin_lock(&inode_lock); | 974 | spin_lock(&inode_hash_lock); |
896 | /* We released the lock, so.. */ | 975 | /* We released the lock, so.. */ |
897 | old = find_inode(sb, head, test, data); | 976 | old = find_inode(sb, head, test, data); |
898 | if (!old) { | 977 | if (!old) { |
899 | if (set(inode, data)) | 978 | if (set(inode, data)) |
900 | goto set_failed; | 979 | goto set_failed; |
901 | 980 | ||
902 | hlist_add_head(&inode->i_hash, head); | 981 | spin_lock(&inode->i_lock); |
903 | __inode_sb_list_add(inode); | ||
904 | inode->i_state = I_NEW; | 982 | inode->i_state = I_NEW; |
905 | spin_unlock(&inode_lock); | 983 | hlist_add_head(&inode->i_hash, head); |
984 | spin_unlock(&inode->i_lock); | ||
985 | inode_sb_list_add(inode); | ||
986 | spin_unlock(&inode_hash_lock); | ||
906 | 987 | ||
907 | /* Return the locked inode with I_NEW set, the | 988 | /* Return the locked inode with I_NEW set, the |
908 | * caller is responsible for filling in the contents | 989 | * caller is responsible for filling in the contents |
@@ -915,7 +996,7 @@ static struct inode *get_new_inode(struct super_block *sb, | |||
915 | * us. Use the old inode instead of the one we just | 996 | * us. Use the old inode instead of the one we just |
916 | * allocated. | 997 | * allocated. |
917 | */ | 998 | */ |
918 | spin_unlock(&inode_lock); | 999 | spin_unlock(&inode_hash_lock); |
919 | destroy_inode(inode); | 1000 | destroy_inode(inode); |
920 | inode = old; | 1001 | inode = old; |
921 | wait_on_inode(inode); | 1002 | wait_on_inode(inode); |
@@ -923,33 +1004,53 @@ static struct inode *get_new_inode(struct super_block *sb, | |||
923 | return inode; | 1004 | return inode; |
924 | 1005 | ||
925 | set_failed: | 1006 | set_failed: |
926 | spin_unlock(&inode_lock); | 1007 | spin_unlock(&inode_hash_lock); |
927 | destroy_inode(inode); | 1008 | destroy_inode(inode); |
928 | return NULL; | 1009 | return NULL; |
929 | } | 1010 | } |
1011 | EXPORT_SYMBOL(iget5_locked); | ||
930 | 1012 | ||
931 | /* | 1013 | /** |
932 | * get_new_inode_fast is the fast path version of get_new_inode, see the | 1014 | * iget_locked - obtain an inode from a mounted file system |
933 | * comment at iget_locked for details. | 1015 | * @sb: super block of file system |
1016 | * @ino: inode number to get | ||
1017 | * | ||
1018 | * Search for the inode specified by @ino in the inode cache and if present | ||
1019 | * return it with an increased reference count. This is for file systems | ||
1020 | * where the inode number is sufficient for unique identification of an inode. | ||
1021 | * | ||
1022 | * If the inode is not in cache, allocate a new inode and return it locked, | ||
1023 | * hashed, and with the I_NEW flag set. The file system gets to fill it in | ||
1024 | * before unlocking it via unlock_new_inode(). | ||
934 | */ | 1025 | */ |
935 | static struct inode *get_new_inode_fast(struct super_block *sb, | 1026 | struct inode *iget_locked(struct super_block *sb, unsigned long ino) |
936 | struct hlist_head *head, unsigned long ino) | ||
937 | { | 1027 | { |
1028 | struct hlist_head *head = inode_hashtable + hash(sb, ino); | ||
938 | struct inode *inode; | 1029 | struct inode *inode; |
939 | 1030 | ||
1031 | spin_lock(&inode_hash_lock); | ||
1032 | inode = find_inode_fast(sb, head, ino); | ||
1033 | spin_unlock(&inode_hash_lock); | ||
1034 | if (inode) { | ||
1035 | wait_on_inode(inode); | ||
1036 | return inode; | ||
1037 | } | ||
1038 | |||
940 | inode = alloc_inode(sb); | 1039 | inode = alloc_inode(sb); |
941 | if (inode) { | 1040 | if (inode) { |
942 | struct inode *old; | 1041 | struct inode *old; |
943 | 1042 | ||
944 | spin_lock(&inode_lock); | 1043 | spin_lock(&inode_hash_lock); |
945 | /* We released the lock, so.. */ | 1044 | /* We released the lock, so.. */ |
946 | old = find_inode_fast(sb, head, ino); | 1045 | old = find_inode_fast(sb, head, ino); |
947 | if (!old) { | 1046 | if (!old) { |
948 | inode->i_ino = ino; | 1047 | inode->i_ino = ino; |
949 | hlist_add_head(&inode->i_hash, head); | 1048 | spin_lock(&inode->i_lock); |
950 | __inode_sb_list_add(inode); | ||
951 | inode->i_state = I_NEW; | 1049 | inode->i_state = I_NEW; |
952 | spin_unlock(&inode_lock); | 1050 | hlist_add_head(&inode->i_hash, head); |
1051 | spin_unlock(&inode->i_lock); | ||
1052 | inode_sb_list_add(inode); | ||
1053 | spin_unlock(&inode_hash_lock); | ||
953 | 1054 | ||
954 | /* Return the locked inode with I_NEW set, the | 1055 | /* Return the locked inode with I_NEW set, the |
955 | * caller is responsible for filling in the contents | 1056 | * caller is responsible for filling in the contents |
@@ -962,13 +1063,14 @@ static struct inode *get_new_inode_fast(struct super_block *sb, | |||
962 | * us. Use the old inode instead of the one we just | 1063 | * us. Use the old inode instead of the one we just |
963 | * allocated. | 1064 | * allocated. |
964 | */ | 1065 | */ |
965 | spin_unlock(&inode_lock); | 1066 | spin_unlock(&inode_hash_lock); |
966 | destroy_inode(inode); | 1067 | destroy_inode(inode); |
967 | inode = old; | 1068 | inode = old; |
968 | wait_on_inode(inode); | 1069 | wait_on_inode(inode); |
969 | } | 1070 | } |
970 | return inode; | 1071 | return inode; |
971 | } | 1072 | } |
1073 | EXPORT_SYMBOL(iget_locked); | ||
972 | 1074 | ||
973 | /* | 1075 | /* |
974 | * search the inode cache for a matching inode number. | 1076 | * search the inode cache for a matching inode number. |
@@ -983,10 +1085,14 @@ static int test_inode_iunique(struct super_block *sb, unsigned long ino) | |||
983 | struct hlist_node *node; | 1085 | struct hlist_node *node; |
984 | struct inode *inode; | 1086 | struct inode *inode; |
985 | 1087 | ||
1088 | spin_lock(&inode_hash_lock); | ||
986 | hlist_for_each_entry(inode, node, b, i_hash) { | 1089 | hlist_for_each_entry(inode, node, b, i_hash) { |
987 | if (inode->i_ino == ino && inode->i_sb == sb) | 1090 | if (inode->i_ino == ino && inode->i_sb == sb) { |
1091 | spin_unlock(&inode_hash_lock); | ||
988 | return 0; | 1092 | return 0; |
1093 | } | ||
989 | } | 1094 | } |
1095 | spin_unlock(&inode_hash_lock); | ||
990 | 1096 | ||
991 | return 1; | 1097 | return 1; |
992 | } | 1098 | } |
@@ -1016,7 +1122,6 @@ ino_t iunique(struct super_block *sb, ino_t max_reserved) | |||
1016 | static unsigned int counter; | 1122 | static unsigned int counter; |
1017 | ino_t res; | 1123 | ino_t res; |
1018 | 1124 | ||
1019 | spin_lock(&inode_lock); | ||
1020 | spin_lock(&iunique_lock); | 1125 | spin_lock(&iunique_lock); |
1021 | do { | 1126 | do { |
1022 | if (counter <= max_reserved) | 1127 | if (counter <= max_reserved) |
@@ -1024,7 +1129,6 @@ ino_t iunique(struct super_block *sb, ino_t max_reserved) | |||
1024 | res = counter++; | 1129 | res = counter++; |
1025 | } while (!test_inode_iunique(sb, res)); | 1130 | } while (!test_inode_iunique(sb, res)); |
1026 | spin_unlock(&iunique_lock); | 1131 | spin_unlock(&iunique_lock); |
1027 | spin_unlock(&inode_lock); | ||
1028 | 1132 | ||
1029 | return res; | 1133 | return res; |
1030 | } | 1134 | } |
@@ -1032,116 +1136,50 @@ EXPORT_SYMBOL(iunique); | |||
1032 | 1136 | ||
1033 | struct inode *igrab(struct inode *inode) | 1137 | struct inode *igrab(struct inode *inode) |
1034 | { | 1138 | { |
1035 | spin_lock(&inode_lock); | 1139 | spin_lock(&inode->i_lock); |
1036 | if (!(inode->i_state & (I_FREEING|I_WILL_FREE))) | 1140 | if (!(inode->i_state & (I_FREEING|I_WILL_FREE))) { |
1037 | __iget(inode); | 1141 | __iget(inode); |
1038 | else | 1142 | spin_unlock(&inode->i_lock); |
1143 | } else { | ||
1144 | spin_unlock(&inode->i_lock); | ||
1039 | /* | 1145 | /* |
1040 | * Handle the case where s_op->clear_inode is not been | 1146 | * Handle the case where s_op->clear_inode is not been |
1041 | * called yet, and somebody is calling igrab | 1147 | * called yet, and somebody is calling igrab |
1042 | * while the inode is getting freed. | 1148 | * while the inode is getting freed. |
1043 | */ | 1149 | */ |
1044 | inode = NULL; | 1150 | inode = NULL; |
1045 | spin_unlock(&inode_lock); | 1151 | } |
1046 | return inode; | 1152 | return inode; |
1047 | } | 1153 | } |
1048 | EXPORT_SYMBOL(igrab); | 1154 | EXPORT_SYMBOL(igrab); |
1049 | 1155 | ||
1050 | /** | 1156 | /** |
1051 | * ifind - internal function, you want ilookup5() or iget5(). | ||
1052 | * @sb: super block of file system to search | ||
1053 | * @head: the head of the list to search | ||
1054 | * @test: callback used for comparisons between inodes | ||
1055 | * @data: opaque data pointer to pass to @test | ||
1056 | * @wait: if true wait for the inode to be unlocked, if false do not | ||
1057 | * | ||
1058 | * ifind() searches for the inode specified by @data in the inode | ||
1059 | * cache. This is a generalized version of ifind_fast() for file systems where | ||
1060 | * the inode number is not sufficient for unique identification of an inode. | ||
1061 | * | ||
1062 | * If the inode is in the cache, the inode is returned with an incremented | ||
1063 | * reference count. | ||
1064 | * | ||
1065 | * Otherwise NULL is returned. | ||
1066 | * | ||
1067 | * Note, @test is called with the inode_lock held, so can't sleep. | ||
1068 | */ | ||
1069 | static struct inode *ifind(struct super_block *sb, | ||
1070 | struct hlist_head *head, int (*test)(struct inode *, void *), | ||
1071 | void *data, const int wait) | ||
1072 | { | ||
1073 | struct inode *inode; | ||
1074 | |||
1075 | spin_lock(&inode_lock); | ||
1076 | inode = find_inode(sb, head, test, data); | ||
1077 | if (inode) { | ||
1078 | spin_unlock(&inode_lock); | ||
1079 | if (likely(wait)) | ||
1080 | wait_on_inode(inode); | ||
1081 | return inode; | ||
1082 | } | ||
1083 | spin_unlock(&inode_lock); | ||
1084 | return NULL; | ||
1085 | } | ||
1086 | |||
1087 | /** | ||
1088 | * ifind_fast - internal function, you want ilookup() or iget(). | ||
1089 | * @sb: super block of file system to search | ||
1090 | * @head: head of the list to search | ||
1091 | * @ino: inode number to search for | ||
1092 | * | ||
1093 | * ifind_fast() searches for the inode @ino in the inode cache. This is for | ||
1094 | * file systems where the inode number is sufficient for unique identification | ||
1095 | * of an inode. | ||
1096 | * | ||
1097 | * If the inode is in the cache, the inode is returned with an incremented | ||
1098 | * reference count. | ||
1099 | * | ||
1100 | * Otherwise NULL is returned. | ||
1101 | */ | ||
1102 | static struct inode *ifind_fast(struct super_block *sb, | ||
1103 | struct hlist_head *head, unsigned long ino) | ||
1104 | { | ||
1105 | struct inode *inode; | ||
1106 | |||
1107 | spin_lock(&inode_lock); | ||
1108 | inode = find_inode_fast(sb, head, ino); | ||
1109 | if (inode) { | ||
1110 | spin_unlock(&inode_lock); | ||
1111 | wait_on_inode(inode); | ||
1112 | return inode; | ||
1113 | } | ||
1114 | spin_unlock(&inode_lock); | ||
1115 | return NULL; | ||
1116 | } | ||
1117 | |||
1118 | /** | ||
1119 | * ilookup5_nowait - search for an inode in the inode cache | 1157 | * ilookup5_nowait - search for an inode in the inode cache |
1120 | * @sb: super block of file system to search | 1158 | * @sb: super block of file system to search |
1121 | * @hashval: hash value (usually inode number) to search for | 1159 | * @hashval: hash value (usually inode number) to search for |
1122 | * @test: callback used for comparisons between inodes | 1160 | * @test: callback used for comparisons between inodes |
1123 | * @data: opaque data pointer to pass to @test | 1161 | * @data: opaque data pointer to pass to @test |
1124 | * | 1162 | * |
1125 | * ilookup5() uses ifind() to search for the inode specified by @hashval and | 1163 | * Search for the inode specified by @hashval and @data in the inode cache. |
1126 | * @data in the inode cache. This is a generalized version of ilookup() for | ||
1127 | * file systems where the inode number is not sufficient for unique | ||
1128 | * identification of an inode. | ||
1129 | * | ||
1130 | * If the inode is in the cache, the inode is returned with an incremented | 1164 | * If the inode is in the cache, the inode is returned with an incremented |
1131 | * reference count. Note, the inode lock is not waited upon so you have to be | 1165 | * reference count. |
1132 | * very careful what you do with the returned inode. You probably should be | ||
1133 | * using ilookup5() instead. | ||
1134 | * | 1166 | * |
1135 | * Otherwise NULL is returned. | 1167 | * Note: I_NEW is not waited upon so you have to be very careful what you do |
1168 | * with the returned inode. You probably should be using ilookup5() instead. | ||
1136 | * | 1169 | * |
1137 | * Note, @test is called with the inode_lock held, so can't sleep. | 1170 | * Note: @test is called with the inode_hash_lock held, so can't sleep. |
1138 | */ | 1171 | */ |
1139 | struct inode *ilookup5_nowait(struct super_block *sb, unsigned long hashval, | 1172 | struct inode *ilookup5_nowait(struct super_block *sb, unsigned long hashval, |
1140 | int (*test)(struct inode *, void *), void *data) | 1173 | int (*test)(struct inode *, void *), void *data) |
1141 | { | 1174 | { |
1142 | struct hlist_head *head = inode_hashtable + hash(sb, hashval); | 1175 | struct hlist_head *head = inode_hashtable + hash(sb, hashval); |
1176 | struct inode *inode; | ||
1177 | |||
1178 | spin_lock(&inode_hash_lock); | ||
1179 | inode = find_inode(sb, head, test, data); | ||
1180 | spin_unlock(&inode_hash_lock); | ||
1143 | 1181 | ||
1144 | return ifind(sb, head, test, data, 0); | 1182 | return inode; |
1145 | } | 1183 | } |
1146 | EXPORT_SYMBOL(ilookup5_nowait); | 1184 | EXPORT_SYMBOL(ilookup5_nowait); |
1147 | 1185 | ||
@@ -1152,24 +1190,24 @@ EXPORT_SYMBOL(ilookup5_nowait); | |||
1152 | * @test: callback used for comparisons between inodes | 1190 | * @test: callback used for comparisons between inodes |
1153 | * @data: opaque data pointer to pass to @test | 1191 | * @data: opaque data pointer to pass to @test |
1154 | * | 1192 | * |
1155 | * ilookup5() uses ifind() to search for the inode specified by @hashval and | 1193 | * Search for the inode specified by @hashval and @data in the inode cache, |
1156 | * @data in the inode cache. This is a generalized version of ilookup() for | 1194 | * and if the inode is in the cache, return the inode with an incremented |
1157 | * file systems where the inode number is not sufficient for unique | 1195 | * reference count. Waits on I_NEW before returning the inode. |
1158 | * identification of an inode. | ||
1159 | * | ||
1160 | * If the inode is in the cache, the inode lock is waited upon and the inode is | ||
1161 | * returned with an incremented reference count. | 1196 | * returned with an incremented reference count. |
1162 | * | 1197 | * |
1163 | * Otherwise NULL is returned. | 1198 | * This is a generalized version of ilookup() for file systems where the |
1199 | * inode number is not sufficient for unique identification of an inode. | ||
1164 | * | 1200 | * |
1165 | * Note, @test is called with the inode_lock held, so can't sleep. | 1201 | * Note: @test is called with the inode_hash_lock held, so can't sleep. |
1166 | */ | 1202 | */ |
1167 | struct inode *ilookup5(struct super_block *sb, unsigned long hashval, | 1203 | struct inode *ilookup5(struct super_block *sb, unsigned long hashval, |
1168 | int (*test)(struct inode *, void *), void *data) | 1204 | int (*test)(struct inode *, void *), void *data) |
1169 | { | 1205 | { |
1170 | struct hlist_head *head = inode_hashtable + hash(sb, hashval); | 1206 | struct inode *inode = ilookup5_nowait(sb, hashval, test, data); |
1171 | 1207 | ||
1172 | return ifind(sb, head, test, data, 1); | 1208 | if (inode) |
1209 | wait_on_inode(inode); | ||
1210 | return inode; | ||
1173 | } | 1211 | } |
1174 | EXPORT_SYMBOL(ilookup5); | 1212 | EXPORT_SYMBOL(ilookup5); |
1175 | 1213 | ||
@@ -1178,91 +1216,23 @@ EXPORT_SYMBOL(ilookup5); | |||
1178 | * @sb: super block of file system to search | 1216 | * @sb: super block of file system to search |
1179 | * @ino: inode number to search for | 1217 | * @ino: inode number to search for |
1180 | * | 1218 | * |
1181 | * ilookup() uses ifind_fast() to search for the inode @ino in the inode cache. | 1219 | * Search for the inode @ino in the inode cache, and if the inode is in the |
1182 | * This is for file systems where the inode number is sufficient for unique | 1220 | * cache, the inode is returned with an incremented reference count. |
1183 | * identification of an inode. | ||
1184 | * | ||
1185 | * If the inode is in the cache, the inode is returned with an incremented | ||
1186 | * reference count. | ||
1187 | * | ||
1188 | * Otherwise NULL is returned. | ||
1189 | */ | 1221 | */ |
1190 | struct inode *ilookup(struct super_block *sb, unsigned long ino) | 1222 | struct inode *ilookup(struct super_block *sb, unsigned long ino) |
1191 | { | 1223 | { |
1192 | struct hlist_head *head = inode_hashtable + hash(sb, ino); | 1224 | struct hlist_head *head = inode_hashtable + hash(sb, ino); |
1193 | |||
1194 | return ifind_fast(sb, head, ino); | ||
1195 | } | ||
1196 | EXPORT_SYMBOL(ilookup); | ||
1197 | |||
1198 | /** | ||
1199 | * iget5_locked - obtain an inode from a mounted file system | ||
1200 | * @sb: super block of file system | ||
1201 | * @hashval: hash value (usually inode number) to get | ||
1202 | * @test: callback used for comparisons between inodes | ||
1203 | * @set: callback used to initialize a new struct inode | ||
1204 | * @data: opaque data pointer to pass to @test and @set | ||
1205 | * | ||
1206 | * iget5_locked() uses ifind() to search for the inode specified by @hashval | ||
1207 | * and @data in the inode cache and if present it is returned with an increased | ||
1208 | * reference count. This is a generalized version of iget_locked() for file | ||
1209 | * systems where the inode number is not sufficient for unique identification | ||
1210 | * of an inode. | ||
1211 | * | ||
1212 | * If the inode is not in cache, get_new_inode() is called to allocate a new | ||
1213 | * inode and this is returned locked, hashed, and with the I_NEW flag set. The | ||
1214 | * file system gets to fill it in before unlocking it via unlock_new_inode(). | ||
1215 | * | ||
1216 | * Note both @test and @set are called with the inode_lock held, so can't sleep. | ||
1217 | */ | ||
1218 | struct inode *iget5_locked(struct super_block *sb, unsigned long hashval, | ||
1219 | int (*test)(struct inode *, void *), | ||
1220 | int (*set)(struct inode *, void *), void *data) | ||
1221 | { | ||
1222 | struct hlist_head *head = inode_hashtable + hash(sb, hashval); | ||
1223 | struct inode *inode; | 1225 | struct inode *inode; |
1224 | 1226 | ||
1225 | inode = ifind(sb, head, test, data, 1); | 1227 | spin_lock(&inode_hash_lock); |
1226 | if (inode) | 1228 | inode = find_inode_fast(sb, head, ino); |
1227 | return inode; | 1229 | spin_unlock(&inode_hash_lock); |
1228 | /* | ||
1229 | * get_new_inode() will do the right thing, re-trying the search | ||
1230 | * in case it had to block at any point. | ||
1231 | */ | ||
1232 | return get_new_inode(sb, head, test, set, data); | ||
1233 | } | ||
1234 | EXPORT_SYMBOL(iget5_locked); | ||
1235 | |||
1236 | /** | ||
1237 | * iget_locked - obtain an inode from a mounted file system | ||
1238 | * @sb: super block of file system | ||
1239 | * @ino: inode number to get | ||
1240 | * | ||
1241 | * iget_locked() uses ifind_fast() to search for the inode specified by @ino in | ||
1242 | * the inode cache and if present it is returned with an increased reference | ||
1243 | * count. This is for file systems where the inode number is sufficient for | ||
1244 | * unique identification of an inode. | ||
1245 | * | ||
1246 | * If the inode is not in cache, get_new_inode_fast() is called to allocate a | ||
1247 | * new inode and this is returned locked, hashed, and with the I_NEW flag set. | ||
1248 | * The file system gets to fill it in before unlocking it via | ||
1249 | * unlock_new_inode(). | ||
1250 | */ | ||
1251 | struct inode *iget_locked(struct super_block *sb, unsigned long ino) | ||
1252 | { | ||
1253 | struct hlist_head *head = inode_hashtable + hash(sb, ino); | ||
1254 | struct inode *inode; | ||
1255 | 1230 | ||
1256 | inode = ifind_fast(sb, head, ino); | ||
1257 | if (inode) | 1231 | if (inode) |
1258 | return inode; | 1232 | wait_on_inode(inode); |
1259 | /* | 1233 | return inode; |
1260 | * get_new_inode_fast() will do the right thing, re-trying the search | ||
1261 | * in case it had to block at any point. | ||
1262 | */ | ||
1263 | return get_new_inode_fast(sb, head, ino); | ||
1264 | } | 1234 | } |
1265 | EXPORT_SYMBOL(iget_locked); | 1235 | EXPORT_SYMBOL(ilookup); |
1266 | 1236 | ||
1267 | int insert_inode_locked(struct inode *inode) | 1237 | int insert_inode_locked(struct inode *inode) |
1268 | { | 1238 | { |
@@ -1270,27 +1240,33 @@ int insert_inode_locked(struct inode *inode) | |||
1270 | ino_t ino = inode->i_ino; | 1240 | ino_t ino = inode->i_ino; |
1271 | struct hlist_head *head = inode_hashtable + hash(sb, ino); | 1241 | struct hlist_head *head = inode_hashtable + hash(sb, ino); |
1272 | 1242 | ||
1273 | inode->i_state |= I_NEW; | ||
1274 | while (1) { | 1243 | while (1) { |
1275 | struct hlist_node *node; | 1244 | struct hlist_node *node; |
1276 | struct inode *old = NULL; | 1245 | struct inode *old = NULL; |
1277 | spin_lock(&inode_lock); | 1246 | spin_lock(&inode_hash_lock); |
1278 | hlist_for_each_entry(old, node, head, i_hash) { | 1247 | hlist_for_each_entry(old, node, head, i_hash) { |
1279 | if (old->i_ino != ino) | 1248 | if (old->i_ino != ino) |
1280 | continue; | 1249 | continue; |
1281 | if (old->i_sb != sb) | 1250 | if (old->i_sb != sb) |
1282 | continue; | 1251 | continue; |
1283 | if (old->i_state & (I_FREEING|I_WILL_FREE)) | 1252 | spin_lock(&old->i_lock); |
1253 | if (old->i_state & (I_FREEING|I_WILL_FREE)) { | ||
1254 | spin_unlock(&old->i_lock); | ||
1284 | continue; | 1255 | continue; |
1256 | } | ||
1285 | break; | 1257 | break; |
1286 | } | 1258 | } |
1287 | if (likely(!node)) { | 1259 | if (likely(!node)) { |
1260 | spin_lock(&inode->i_lock); | ||
1261 | inode->i_state |= I_NEW; | ||
1288 | hlist_add_head(&inode->i_hash, head); | 1262 | hlist_add_head(&inode->i_hash, head); |
1289 | spin_unlock(&inode_lock); | 1263 | spin_unlock(&inode->i_lock); |
1264 | spin_unlock(&inode_hash_lock); | ||
1290 | return 0; | 1265 | return 0; |
1291 | } | 1266 | } |
1292 | __iget(old); | 1267 | __iget(old); |
1293 | spin_unlock(&inode_lock); | 1268 | spin_unlock(&old->i_lock); |
1269 | spin_unlock(&inode_hash_lock); | ||
1294 | wait_on_inode(old); | 1270 | wait_on_inode(old); |
1295 | if (unlikely(!inode_unhashed(old))) { | 1271 | if (unlikely(!inode_unhashed(old))) { |
1296 | iput(old); | 1272 | iput(old); |
@@ -1307,29 +1283,34 @@ int insert_inode_locked4(struct inode *inode, unsigned long hashval, | |||
1307 | struct super_block *sb = inode->i_sb; | 1283 | struct super_block *sb = inode->i_sb; |
1308 | struct hlist_head *head = inode_hashtable + hash(sb, hashval); | 1284 | struct hlist_head *head = inode_hashtable + hash(sb, hashval); |
1309 | 1285 | ||
1310 | inode->i_state |= I_NEW; | ||
1311 | |||
1312 | while (1) { | 1286 | while (1) { |
1313 | struct hlist_node *node; | 1287 | struct hlist_node *node; |
1314 | struct inode *old = NULL; | 1288 | struct inode *old = NULL; |
1315 | 1289 | ||
1316 | spin_lock(&inode_lock); | 1290 | spin_lock(&inode_hash_lock); |
1317 | hlist_for_each_entry(old, node, head, i_hash) { | 1291 | hlist_for_each_entry(old, node, head, i_hash) { |
1318 | if (old->i_sb != sb) | 1292 | if (old->i_sb != sb) |
1319 | continue; | 1293 | continue; |
1320 | if (!test(old, data)) | 1294 | if (!test(old, data)) |
1321 | continue; | 1295 | continue; |
1322 | if (old->i_state & (I_FREEING|I_WILL_FREE)) | 1296 | spin_lock(&old->i_lock); |
1297 | if (old->i_state & (I_FREEING|I_WILL_FREE)) { | ||
1298 | spin_unlock(&old->i_lock); | ||
1323 | continue; | 1299 | continue; |
1300 | } | ||
1324 | break; | 1301 | break; |
1325 | } | 1302 | } |
1326 | if (likely(!node)) { | 1303 | if (likely(!node)) { |
1304 | spin_lock(&inode->i_lock); | ||
1305 | inode->i_state |= I_NEW; | ||
1327 | hlist_add_head(&inode->i_hash, head); | 1306 | hlist_add_head(&inode->i_hash, head); |
1328 | spin_unlock(&inode_lock); | 1307 | spin_unlock(&inode->i_lock); |
1308 | spin_unlock(&inode_hash_lock); | ||
1329 | return 0; | 1309 | return 0; |
1330 | } | 1310 | } |
1331 | __iget(old); | 1311 | __iget(old); |
1332 | spin_unlock(&inode_lock); | 1312 | spin_unlock(&old->i_lock); |
1313 | spin_unlock(&inode_hash_lock); | ||
1333 | wait_on_inode(old); | 1314 | wait_on_inode(old); |
1334 | if (unlikely(!inode_unhashed(old))) { | 1315 | if (unlikely(!inode_unhashed(old))) { |
1335 | iput(old); | 1316 | iput(old); |
@@ -1374,47 +1355,35 @@ static void iput_final(struct inode *inode) | |||
1374 | const struct super_operations *op = inode->i_sb->s_op; | 1355 | const struct super_operations *op = inode->i_sb->s_op; |
1375 | int drop; | 1356 | int drop; |
1376 | 1357 | ||
1358 | WARN_ON(inode->i_state & I_NEW); | ||
1359 | |||
1377 | if (op && op->drop_inode) | 1360 | if (op && op->drop_inode) |
1378 | drop = op->drop_inode(inode); | 1361 | drop = op->drop_inode(inode); |
1379 | else | 1362 | else |
1380 | drop = generic_drop_inode(inode); | 1363 | drop = generic_drop_inode(inode); |
1381 | 1364 | ||
1365 | if (!drop && (sb->s_flags & MS_ACTIVE)) { | ||
1366 | inode->i_state |= I_REFERENCED; | ||
1367 | if (!(inode->i_state & (I_DIRTY|I_SYNC))) | ||
1368 | inode_lru_list_add(inode); | ||
1369 | spin_unlock(&inode->i_lock); | ||
1370 | return; | ||
1371 | } | ||
1372 | |||
1382 | if (!drop) { | 1373 | if (!drop) { |
1383 | if (sb->s_flags & MS_ACTIVE) { | ||
1384 | inode->i_state |= I_REFERENCED; | ||
1385 | if (!(inode->i_state & (I_DIRTY|I_SYNC))) { | ||
1386 | inode_lru_list_add(inode); | ||
1387 | } | ||
1388 | spin_unlock(&inode_lock); | ||
1389 | return; | ||
1390 | } | ||
1391 | WARN_ON(inode->i_state & I_NEW); | ||
1392 | inode->i_state |= I_WILL_FREE; | 1374 | inode->i_state |= I_WILL_FREE; |
1393 | spin_unlock(&inode_lock); | 1375 | spin_unlock(&inode->i_lock); |
1394 | write_inode_now(inode, 1); | 1376 | write_inode_now(inode, 1); |
1395 | spin_lock(&inode_lock); | 1377 | spin_lock(&inode->i_lock); |
1396 | WARN_ON(inode->i_state & I_NEW); | 1378 | WARN_ON(inode->i_state & I_NEW); |
1397 | inode->i_state &= ~I_WILL_FREE; | 1379 | inode->i_state &= ~I_WILL_FREE; |
1398 | __remove_inode_hash(inode); | ||
1399 | } | 1380 | } |
1400 | 1381 | ||
1401 | WARN_ON(inode->i_state & I_NEW); | ||
1402 | inode->i_state |= I_FREEING; | 1382 | inode->i_state |= I_FREEING; |
1403 | |||
1404 | /* | ||
1405 | * Move the inode off the IO lists and LRU once I_FREEING is | ||
1406 | * set so that it won't get moved back on there if it is dirty. | ||
1407 | */ | ||
1408 | inode_lru_list_del(inode); | 1383 | inode_lru_list_del(inode); |
1409 | list_del_init(&inode->i_wb_list); | 1384 | spin_unlock(&inode->i_lock); |
1410 | 1385 | ||
1411 | __inode_sb_list_del(inode); | ||
1412 | spin_unlock(&inode_lock); | ||
1413 | evict(inode); | 1386 | evict(inode); |
1414 | remove_inode_hash(inode); | ||
1415 | wake_up_inode(inode); | ||
1416 | BUG_ON(inode->i_state != (I_FREEING | I_CLEAR)); | ||
1417 | destroy_inode(inode); | ||
1418 | } | 1387 | } |
1419 | 1388 | ||
1420 | /** | 1389 | /** |
@@ -1431,7 +1400,7 @@ void iput(struct inode *inode) | |||
1431 | if (inode) { | 1400 | if (inode) { |
1432 | BUG_ON(inode->i_state & I_CLEAR); | 1401 | BUG_ON(inode->i_state & I_CLEAR); |
1433 | 1402 | ||
1434 | if (atomic_dec_and_lock(&inode->i_count, &inode_lock)) | 1403 | if (atomic_dec_and_lock(&inode->i_count, &inode->i_lock)) |
1435 | iput_final(inode); | 1404 | iput_final(inode); |
1436 | } | 1405 | } |
1437 | } | 1406 | } |
@@ -1610,9 +1579,8 @@ EXPORT_SYMBOL(inode_wait); | |||
1610 | * to recheck inode state. | 1579 | * to recheck inode state. |
1611 | * | 1580 | * |
1612 | * It doesn't matter if I_NEW is not set initially, a call to | 1581 | * It doesn't matter if I_NEW is not set initially, a call to |
1613 | * wake_up_inode() after removing from the hash list will DTRT. | 1582 | * wake_up_bit(&inode->i_state, __I_NEW) after removing from the hash list |
1614 | * | 1583 | * will DTRT. |
1615 | * This is called with inode_lock held. | ||
1616 | */ | 1584 | */ |
1617 | static void __wait_on_freeing_inode(struct inode *inode) | 1585 | static void __wait_on_freeing_inode(struct inode *inode) |
1618 | { | 1586 | { |
@@ -1620,10 +1588,11 @@ static void __wait_on_freeing_inode(struct inode *inode) | |||
1620 | DEFINE_WAIT_BIT(wait, &inode->i_state, __I_NEW); | 1588 | DEFINE_WAIT_BIT(wait, &inode->i_state, __I_NEW); |
1621 | wq = bit_waitqueue(&inode->i_state, __I_NEW); | 1589 | wq = bit_waitqueue(&inode->i_state, __I_NEW); |
1622 | prepare_to_wait(wq, &wait.wait, TASK_UNINTERRUPTIBLE); | 1590 | prepare_to_wait(wq, &wait.wait, TASK_UNINTERRUPTIBLE); |
1623 | spin_unlock(&inode_lock); | 1591 | spin_unlock(&inode->i_lock); |
1592 | spin_unlock(&inode_hash_lock); | ||
1624 | schedule(); | 1593 | schedule(); |
1625 | finish_wait(wq, &wait.wait); | 1594 | finish_wait(wq, &wait.wait); |
1626 | spin_lock(&inode_lock); | 1595 | spin_lock(&inode_hash_lock); |
1627 | } | 1596 | } |
1628 | 1597 | ||
1629 | static __initdata unsigned long ihash_entries; | 1598 | static __initdata unsigned long ihash_entries; |
@@ -1733,3 +1702,22 @@ void inode_init_owner(struct inode *inode, const struct inode *dir, | |||
1733 | inode->i_mode = mode; | 1702 | inode->i_mode = mode; |
1734 | } | 1703 | } |
1735 | EXPORT_SYMBOL(inode_init_owner); | 1704 | EXPORT_SYMBOL(inode_init_owner); |
1705 | |||
1706 | /** | ||
1707 | * inode_owner_or_capable - check current task permissions to inode | ||
1708 | * @inode: inode being checked | ||
1709 | * | ||
1710 | * Return true if current either has CAP_FOWNER to the inode, or | ||
1711 | * owns the file. | ||
1712 | */ | ||
1713 | bool inode_owner_or_capable(const struct inode *inode) | ||
1714 | { | ||
1715 | struct user_namespace *ns = inode_userns(inode); | ||
1716 | |||
1717 | if (current_user_ns() == ns && current_fsuid() == inode->i_uid) | ||
1718 | return true; | ||
1719 | if (ns_capable(ns, CAP_FOWNER)) | ||
1720 | return true; | ||
1721 | return false; | ||
1722 | } | ||
1723 | EXPORT_SYMBOL(inode_owner_or_capable); | ||
diff --git a/fs/internal.h b/fs/internal.h index 8318059b42c6..b29c46e4e32f 100644 --- a/fs/internal.h +++ b/fs/internal.h | |||
@@ -125,6 +125,13 @@ extern long do_handle_open(int mountdirfd, | |||
125 | /* | 125 | /* |
126 | * inode.c | 126 | * inode.c |
127 | */ | 127 | */ |
128 | extern spinlock_t inode_sb_list_lock; | ||
129 | |||
130 | /* | ||
131 | * fs-writeback.c | ||
132 | */ | ||
133 | extern void inode_wb_list_del(struct inode *inode); | ||
134 | |||
128 | extern int get_nr_dirty_inodes(void); | 135 | extern int get_nr_dirty_inodes(void); |
129 | extern void evict_inodes(struct super_block *); | 136 | extern void evict_inodes(struct super_block *); |
130 | extern int invalidate_inodes(struct super_block *, bool); | 137 | extern int invalidate_inodes(struct super_block *, bool); |
diff --git a/fs/isofs/inode.c b/fs/isofs/inode.c index a0f3833c0dbf..3db5ba4568fc 100644 --- a/fs/isofs/inode.c +++ b/fs/isofs/inode.c | |||
@@ -1158,7 +1158,6 @@ static sector_t _isofs_bmap(struct address_space *mapping, sector_t block) | |||
1158 | 1158 | ||
1159 | static const struct address_space_operations isofs_aops = { | 1159 | static const struct address_space_operations isofs_aops = { |
1160 | .readpage = isofs_readpage, | 1160 | .readpage = isofs_readpage, |
1161 | .sync_page = block_sync_page, | ||
1162 | .bmap = _isofs_bmap | 1161 | .bmap = _isofs_bmap |
1163 | }; | 1162 | }; |
1164 | 1163 | ||
diff --git a/fs/jbd/commit.c b/fs/jbd/commit.c index 34a4861c14b8..da871ee084d3 100644 --- a/fs/jbd/commit.c +++ b/fs/jbd/commit.c | |||
@@ -20,6 +20,7 @@ | |||
20 | #include <linux/mm.h> | 20 | #include <linux/mm.h> |
21 | #include <linux/pagemap.h> | 21 | #include <linux/pagemap.h> |
22 | #include <linux/bio.h> | 22 | #include <linux/bio.h> |
23 | #include <linux/blkdev.h> | ||
23 | 24 | ||
24 | /* | 25 | /* |
25 | * Default IO end handler for temporary BJ_IO buffer_heads. | 26 | * Default IO end handler for temporary BJ_IO buffer_heads. |
@@ -294,7 +295,7 @@ void journal_commit_transaction(journal_t *journal) | |||
294 | int first_tag = 0; | 295 | int first_tag = 0; |
295 | int tag_flag; | 296 | int tag_flag; |
296 | int i; | 297 | int i; |
297 | int write_op = WRITE_SYNC; | 298 | struct blk_plug plug; |
298 | 299 | ||
299 | /* | 300 | /* |
300 | * First job: lock down the current transaction and wait for | 301 | * First job: lock down the current transaction and wait for |
@@ -327,13 +328,6 @@ void journal_commit_transaction(journal_t *journal) | |||
327 | spin_lock(&journal->j_state_lock); | 328 | spin_lock(&journal->j_state_lock); |
328 | commit_transaction->t_state = T_LOCKED; | 329 | commit_transaction->t_state = T_LOCKED; |
329 | 330 | ||
330 | /* | ||
331 | * Use plugged writes here, since we want to submit several before | ||
332 | * we unplug the device. We don't do explicit unplugging in here, | ||
333 | * instead we rely on sync_buffer() doing the unplug for us. | ||
334 | */ | ||
335 | if (commit_transaction->t_synchronous_commit) | ||
336 | write_op = WRITE_SYNC_PLUG; | ||
337 | spin_lock(&commit_transaction->t_handle_lock); | 331 | spin_lock(&commit_transaction->t_handle_lock); |
338 | while (commit_transaction->t_updates) { | 332 | while (commit_transaction->t_updates) { |
339 | DEFINE_WAIT(wait); | 333 | DEFINE_WAIT(wait); |
@@ -418,8 +412,10 @@ void journal_commit_transaction(journal_t *journal) | |||
418 | * Now start flushing things to disk, in the order they appear | 412 | * Now start flushing things to disk, in the order they appear |
419 | * on the transaction lists. Data blocks go first. | 413 | * on the transaction lists. Data blocks go first. |
420 | */ | 414 | */ |
415 | blk_start_plug(&plug); | ||
421 | err = journal_submit_data_buffers(journal, commit_transaction, | 416 | err = journal_submit_data_buffers(journal, commit_transaction, |
422 | write_op); | 417 | WRITE_SYNC); |
418 | blk_finish_plug(&plug); | ||
423 | 419 | ||
424 | /* | 420 | /* |
425 | * Wait for all previously submitted IO to complete. | 421 | * Wait for all previously submitted IO to complete. |
@@ -480,7 +476,9 @@ void journal_commit_transaction(journal_t *journal) | |||
480 | err = 0; | 476 | err = 0; |
481 | } | 477 | } |
482 | 478 | ||
483 | journal_write_revoke_records(journal, commit_transaction, write_op); | 479 | blk_start_plug(&plug); |
480 | |||
481 | journal_write_revoke_records(journal, commit_transaction, WRITE_SYNC); | ||
484 | 482 | ||
485 | /* | 483 | /* |
486 | * If we found any dirty or locked buffers, then we should have | 484 | * If we found any dirty or locked buffers, then we should have |
@@ -650,7 +648,7 @@ start_journal_io: | |||
650 | clear_buffer_dirty(bh); | 648 | clear_buffer_dirty(bh); |
651 | set_buffer_uptodate(bh); | 649 | set_buffer_uptodate(bh); |
652 | bh->b_end_io = journal_end_buffer_io_sync; | 650 | bh->b_end_io = journal_end_buffer_io_sync; |
653 | submit_bh(write_op, bh); | 651 | submit_bh(WRITE_SYNC, bh); |
654 | } | 652 | } |
655 | cond_resched(); | 653 | cond_resched(); |
656 | 654 | ||
@@ -661,6 +659,8 @@ start_journal_io: | |||
661 | } | 659 | } |
662 | } | 660 | } |
663 | 661 | ||
662 | blk_finish_plug(&plug); | ||
663 | |||
664 | /* Lo and behold: we have just managed to send a transaction to | 664 | /* Lo and behold: we have just managed to send a transaction to |
665 | the log. Before we can commit it, wait for the IO so far to | 665 | the log. Before we can commit it, wait for the IO so far to |
666 | complete. Control buffers being written are on the | 666 | complete. Control buffers being written are on the |
diff --git a/fs/jbd2/commit.c b/fs/jbd2/commit.c index f3ad1598b201..fa36d7662b21 100644 --- a/fs/jbd2/commit.c +++ b/fs/jbd2/commit.c | |||
@@ -137,9 +137,9 @@ static int journal_submit_commit_record(journal_t *journal, | |||
137 | if (journal->j_flags & JBD2_BARRIER && | 137 | if (journal->j_flags & JBD2_BARRIER && |
138 | !JBD2_HAS_INCOMPAT_FEATURE(journal, | 138 | !JBD2_HAS_INCOMPAT_FEATURE(journal, |
139 | JBD2_FEATURE_INCOMPAT_ASYNC_COMMIT)) | 139 | JBD2_FEATURE_INCOMPAT_ASYNC_COMMIT)) |
140 | ret = submit_bh(WRITE_SYNC_PLUG | WRITE_FLUSH_FUA, bh); | 140 | ret = submit_bh(WRITE_SYNC | WRITE_FLUSH_FUA, bh); |
141 | else | 141 | else |
142 | ret = submit_bh(WRITE_SYNC_PLUG, bh); | 142 | ret = submit_bh(WRITE_SYNC, bh); |
143 | 143 | ||
144 | *cbh = bh; | 144 | *cbh = bh; |
145 | return ret; | 145 | return ret; |
@@ -329,7 +329,7 @@ void jbd2_journal_commit_transaction(journal_t *journal) | |||
329 | int tag_bytes = journal_tag_bytes(journal); | 329 | int tag_bytes = journal_tag_bytes(journal); |
330 | struct buffer_head *cbh = NULL; /* For transactional checksums */ | 330 | struct buffer_head *cbh = NULL; /* For transactional checksums */ |
331 | __u32 crc32_sum = ~0; | 331 | __u32 crc32_sum = ~0; |
332 | int write_op = WRITE_SYNC; | 332 | struct blk_plug plug; |
333 | 333 | ||
334 | /* | 334 | /* |
335 | * First job: lock down the current transaction and wait for | 335 | * First job: lock down the current transaction and wait for |
@@ -363,13 +363,6 @@ void jbd2_journal_commit_transaction(journal_t *journal) | |||
363 | write_lock(&journal->j_state_lock); | 363 | write_lock(&journal->j_state_lock); |
364 | commit_transaction->t_state = T_LOCKED; | 364 | commit_transaction->t_state = T_LOCKED; |
365 | 365 | ||
366 | /* | ||
367 | * Use plugged writes here, since we want to submit several before | ||
368 | * we unplug the device. We don't do explicit unplugging in here, | ||
369 | * instead we rely on sync_buffer() doing the unplug for us. | ||
370 | */ | ||
371 | if (commit_transaction->t_synchronous_commit) | ||
372 | write_op = WRITE_SYNC_PLUG; | ||
373 | trace_jbd2_commit_locking(journal, commit_transaction); | 366 | trace_jbd2_commit_locking(journal, commit_transaction); |
374 | stats.run.rs_wait = commit_transaction->t_max_wait; | 367 | stats.run.rs_wait = commit_transaction->t_max_wait; |
375 | stats.run.rs_locked = jiffies; | 368 | stats.run.rs_locked = jiffies; |
@@ -469,8 +462,10 @@ void jbd2_journal_commit_transaction(journal_t *journal) | |||
469 | if (err) | 462 | if (err) |
470 | jbd2_journal_abort(journal, err); | 463 | jbd2_journal_abort(journal, err); |
471 | 464 | ||
465 | blk_start_plug(&plug); | ||
472 | jbd2_journal_write_revoke_records(journal, commit_transaction, | 466 | jbd2_journal_write_revoke_records(journal, commit_transaction, |
473 | write_op); | 467 | WRITE_SYNC); |
468 | blk_finish_plug(&plug); | ||
474 | 469 | ||
475 | jbd_debug(3, "JBD: commit phase 2\n"); | 470 | jbd_debug(3, "JBD: commit phase 2\n"); |
476 | 471 | ||
@@ -497,6 +492,7 @@ void jbd2_journal_commit_transaction(journal_t *journal) | |||
497 | err = 0; | 492 | err = 0; |
498 | descriptor = NULL; | 493 | descriptor = NULL; |
499 | bufs = 0; | 494 | bufs = 0; |
495 | blk_start_plug(&plug); | ||
500 | while (commit_transaction->t_buffers) { | 496 | while (commit_transaction->t_buffers) { |
501 | 497 | ||
502 | /* Find the next buffer to be journaled... */ | 498 | /* Find the next buffer to be journaled... */ |
@@ -658,7 +654,7 @@ start_journal_io: | |||
658 | clear_buffer_dirty(bh); | 654 | clear_buffer_dirty(bh); |
659 | set_buffer_uptodate(bh); | 655 | set_buffer_uptodate(bh); |
660 | bh->b_end_io = journal_end_buffer_io_sync; | 656 | bh->b_end_io = journal_end_buffer_io_sync; |
661 | submit_bh(write_op, bh); | 657 | submit_bh(WRITE_SYNC, bh); |
662 | } | 658 | } |
663 | cond_resched(); | 659 | cond_resched(); |
664 | stats.run.rs_blocks_logged += bufs; | 660 | stats.run.rs_blocks_logged += bufs; |
@@ -699,6 +695,8 @@ start_journal_io: | |||
699 | __jbd2_journal_abort_hard(journal); | 695 | __jbd2_journal_abort_hard(journal); |
700 | } | 696 | } |
701 | 697 | ||
698 | blk_finish_plug(&plug); | ||
699 | |||
702 | /* Lo and behold: we have just managed to send a transaction to | 700 | /* Lo and behold: we have just managed to send a transaction to |
703 | the log. Before we can commit it, wait for the IO so far to | 701 | the log. Before we can commit it, wait for the IO so far to |
704 | complete. Control buffers being written are on the | 702 | complete. Control buffers being written are on the |
diff --git a/fs/jffs2/acl.c b/fs/jffs2/acl.c index 95b79672150a..828a0e1ea438 100644 --- a/fs/jffs2/acl.c +++ b/fs/jffs2/acl.c | |||
@@ -402,7 +402,7 @@ static int jffs2_acl_setxattr(struct dentry *dentry, const char *name, | |||
402 | 402 | ||
403 | if (name[0] != '\0') | 403 | if (name[0] != '\0') |
404 | return -EINVAL; | 404 | return -EINVAL; |
405 | if (!is_owner_or_cap(dentry->d_inode)) | 405 | if (!inode_owner_or_capable(dentry->d_inode)) |
406 | return -EPERM; | 406 | return -EPERM; |
407 | 407 | ||
408 | if (value) { | 408 | if (value) { |
diff --git a/fs/jfs/inode.c b/fs/jfs/inode.c index 9978803ceedc..eddbb373209e 100644 --- a/fs/jfs/inode.c +++ b/fs/jfs/inode.c | |||
@@ -352,7 +352,6 @@ const struct address_space_operations jfs_aops = { | |||
352 | .readpages = jfs_readpages, | 352 | .readpages = jfs_readpages, |
353 | .writepage = jfs_writepage, | 353 | .writepage = jfs_writepage, |
354 | .writepages = jfs_writepages, | 354 | .writepages = jfs_writepages, |
355 | .sync_page = block_sync_page, | ||
356 | .write_begin = jfs_write_begin, | 355 | .write_begin = jfs_write_begin, |
357 | .write_end = nobh_write_end, | 356 | .write_end = nobh_write_end, |
358 | .bmap = jfs_bmap, | 357 | .bmap = jfs_bmap, |
diff --git a/fs/jfs/ioctl.c b/fs/jfs/ioctl.c index afe222bf300f..6f98a1866776 100644 --- a/fs/jfs/ioctl.c +++ b/fs/jfs/ioctl.c | |||
@@ -72,7 +72,7 @@ long jfs_ioctl(struct file *filp, unsigned int cmd, unsigned long arg) | |||
72 | if (err) | 72 | if (err) |
73 | return err; | 73 | return err; |
74 | 74 | ||
75 | if (!is_owner_or_cap(inode)) { | 75 | if (!inode_owner_or_capable(inode)) { |
76 | err = -EACCES; | 76 | err = -EACCES; |
77 | goto setflags_out; | 77 | goto setflags_out; |
78 | } | 78 | } |
diff --git a/fs/jfs/jfs_metapage.c b/fs/jfs/jfs_metapage.c index 48b44bd8267b..6740d34cd82b 100644 --- a/fs/jfs/jfs_metapage.c +++ b/fs/jfs/jfs_metapage.c | |||
@@ -583,7 +583,6 @@ static void metapage_invalidatepage(struct page *page, unsigned long offset) | |||
583 | const struct address_space_operations jfs_metapage_aops = { | 583 | const struct address_space_operations jfs_metapage_aops = { |
584 | .readpage = metapage_readpage, | 584 | .readpage = metapage_readpage, |
585 | .writepage = metapage_writepage, | 585 | .writepage = metapage_writepage, |
586 | .sync_page = block_sync_page, | ||
587 | .releasepage = metapage_releasepage, | 586 | .releasepage = metapage_releasepage, |
588 | .invalidatepage = metapage_invalidatepage, | 587 | .invalidatepage = metapage_invalidatepage, |
589 | .set_page_dirty = __set_page_dirty_nobuffers, | 588 | .set_page_dirty = __set_page_dirty_nobuffers, |
diff --git a/fs/jfs/xattr.c b/fs/jfs/xattr.c index 3fa4c32272df..24838f1eeee5 100644 --- a/fs/jfs/xattr.c +++ b/fs/jfs/xattr.c | |||
@@ -678,7 +678,7 @@ static int can_set_system_xattr(struct inode *inode, const char *name, | |||
678 | struct posix_acl *acl; | 678 | struct posix_acl *acl; |
679 | int rc; | 679 | int rc; |
680 | 680 | ||
681 | if (!is_owner_or_cap(inode)) | 681 | if (!inode_owner_or_capable(inode)) |
682 | return -EPERM; | 682 | return -EPERM; |
683 | 683 | ||
684 | /* | 684 | /* |
diff --git a/fs/locks.c b/fs/locks.c index 822c3d1843af..0a4f50dfadfb 100644 --- a/fs/locks.c +++ b/fs/locks.c | |||
@@ -414,17 +414,7 @@ static int flock64_to_posix_lock(struct file *filp, struct file_lock *fl, | |||
414 | fl->fl_ops = NULL; | 414 | fl->fl_ops = NULL; |
415 | fl->fl_lmops = NULL; | 415 | fl->fl_lmops = NULL; |
416 | 416 | ||
417 | switch (l->l_type) { | 417 | return assign_type(fl, l->l_type); |
418 | case F_RDLCK: | ||
419 | case F_WRLCK: | ||
420 | case F_UNLCK: | ||
421 | fl->fl_type = l->l_type; | ||
422 | break; | ||
423 | default: | ||
424 | return -EINVAL; | ||
425 | } | ||
426 | |||
427 | return (0); | ||
428 | } | 418 | } |
429 | #endif | 419 | #endif |
430 | 420 | ||
diff --git a/fs/logfs/dev_bdev.c b/fs/logfs/dev_bdev.c index 723bc5bca09a..1adc8d455f0e 100644 --- a/fs/logfs/dev_bdev.c +++ b/fs/logfs/dev_bdev.c | |||
@@ -39,7 +39,6 @@ static int sync_request(struct page *page, struct block_device *bdev, int rw) | |||
39 | bio.bi_end_io = request_complete; | 39 | bio.bi_end_io = request_complete; |
40 | 40 | ||
41 | submit_bio(rw, &bio); | 41 | submit_bio(rw, &bio); |
42 | generic_unplug_device(bdev_get_queue(bdev)); | ||
43 | wait_for_completion(&complete); | 42 | wait_for_completion(&complete); |
44 | return test_bit(BIO_UPTODATE, &bio.bi_flags) ? 0 : -EIO; | 43 | return test_bit(BIO_UPTODATE, &bio.bi_flags) ? 0 : -EIO; |
45 | } | 44 | } |
@@ -168,7 +167,6 @@ static void bdev_writeseg(struct super_block *sb, u64 ofs, size_t len) | |||
168 | } | 167 | } |
169 | len = PAGE_ALIGN(len); | 168 | len = PAGE_ALIGN(len); |
170 | __bdev_writeseg(sb, ofs, ofs >> PAGE_SHIFT, len >> PAGE_SHIFT); | 169 | __bdev_writeseg(sb, ofs, ofs >> PAGE_SHIFT, len >> PAGE_SHIFT); |
171 | generic_unplug_device(bdev_get_queue(logfs_super(sb)->s_bdev)); | ||
172 | } | 170 | } |
173 | 171 | ||
174 | 172 | ||
diff --git a/fs/logfs/file.c b/fs/logfs/file.c index e86376b87af1..c2ad7028def4 100644 --- a/fs/logfs/file.c +++ b/fs/logfs/file.c | |||
@@ -196,7 +196,7 @@ long logfs_ioctl(struct file *file, unsigned int cmd, unsigned long arg) | |||
196 | if (IS_RDONLY(inode)) | 196 | if (IS_RDONLY(inode)) |
197 | return -EROFS; | 197 | return -EROFS; |
198 | 198 | ||
199 | if (!is_owner_or_cap(inode)) | 199 | if (!inode_owner_or_capable(inode)) |
200 | return -EACCES; | 200 | return -EACCES; |
201 | 201 | ||
202 | err = get_user(flags, (int __user *)arg); | 202 | err = get_user(flags, (int __user *)arg); |
diff --git a/fs/logfs/inode.c b/fs/logfs/inode.c index 03b8c240aeda..edfea7a3a747 100644 --- a/fs/logfs/inode.c +++ b/fs/logfs/inode.c | |||
@@ -293,7 +293,7 @@ static int logfs_write_inode(struct inode *inode, struct writeback_control *wbc) | |||
293 | return ret; | 293 | return ret; |
294 | } | 294 | } |
295 | 295 | ||
296 | /* called with inode_lock held */ | 296 | /* called with inode->i_lock held */ |
297 | static int logfs_drop_inode(struct inode *inode) | 297 | static int logfs_drop_inode(struct inode *inode) |
298 | { | 298 | { |
299 | struct logfs_super *super = logfs_super(inode->i_sb); | 299 | struct logfs_super *super = logfs_super(inode->i_sb); |
diff --git a/fs/minix/Kconfig b/fs/minix/Kconfig index 0fd7ca994264..6624684dd5de 100644 --- a/fs/minix/Kconfig +++ b/fs/minix/Kconfig | |||
@@ -15,3 +15,11 @@ config MINIX_FS | |||
15 | module will be called minix. Note that the file system of your root | 15 | module will be called minix. Note that the file system of your root |
16 | partition (the one containing the directory /) cannot be compiled as | 16 | partition (the one containing the directory /) cannot be compiled as |
17 | a module. | 17 | a module. |
18 | |||
19 | config MINIX_FS_NATIVE_ENDIAN | ||
20 | def_bool MINIX_FS | ||
21 | depends on H8300 || M32R || MICROBLAZE || MIPS || S390 || SUPERH || SPARC || XTENSA || (M68K && !MMU) | ||
22 | |||
23 | config MINIX_FS_BIG_ENDIAN_16BIT_INDEXED | ||
24 | def_bool MINIX_FS | ||
25 | depends on M68K && MMU | ||
diff --git a/fs/minix/inode.c b/fs/minix/inode.c index ae0b83f476a6..adcdc0a4e182 100644 --- a/fs/minix/inode.c +++ b/fs/minix/inode.c | |||
@@ -399,7 +399,6 @@ static sector_t minix_bmap(struct address_space *mapping, sector_t block) | |||
399 | static const struct address_space_operations minix_aops = { | 399 | static const struct address_space_operations minix_aops = { |
400 | .readpage = minix_readpage, | 400 | .readpage = minix_readpage, |
401 | .writepage = minix_writepage, | 401 | .writepage = minix_writepage, |
402 | .sync_page = block_sync_page, | ||
403 | .write_begin = minix_write_begin, | 402 | .write_begin = minix_write_begin, |
404 | .write_end = generic_write_end, | 403 | .write_end = generic_write_end, |
405 | .bmap = minix_bmap | 404 | .bmap = minix_bmap |
diff --git a/fs/minix/minix.h b/fs/minix/minix.h index 407b1c84911e..341e2122879a 100644 --- a/fs/minix/minix.h +++ b/fs/minix/minix.h | |||
@@ -88,4 +88,78 @@ static inline struct minix_inode_info *minix_i(struct inode *inode) | |||
88 | return list_entry(inode, struct minix_inode_info, vfs_inode); | 88 | return list_entry(inode, struct minix_inode_info, vfs_inode); |
89 | } | 89 | } |
90 | 90 | ||
91 | #if defined(CONFIG_MINIX_FS_NATIVE_ENDIAN) && \ | ||
92 | defined(CONFIG_MINIX_FS_BIG_ENDIAN_16BIT_INDEXED) | ||
93 | |||
94 | #error Minix file system byte order broken | ||
95 | |||
96 | #elif defined(CONFIG_MINIX_FS_NATIVE_ENDIAN) | ||
97 | |||
98 | /* | ||
99 | * big-endian 32 or 64 bit indexed bitmaps on big-endian system or | ||
100 | * little-endian bitmaps on little-endian system | ||
101 | */ | ||
102 | |||
103 | #define minix_test_and_set_bit(nr, addr) \ | ||
104 | __test_and_set_bit((nr), (unsigned long *)(addr)) | ||
105 | #define minix_set_bit(nr, addr) \ | ||
106 | __set_bit((nr), (unsigned long *)(addr)) | ||
107 | #define minix_test_and_clear_bit(nr, addr) \ | ||
108 | __test_and_clear_bit((nr), (unsigned long *)(addr)) | ||
109 | #define minix_test_bit(nr, addr) \ | ||
110 | test_bit((nr), (unsigned long *)(addr)) | ||
111 | #define minix_find_first_zero_bit(addr, size) \ | ||
112 | find_first_zero_bit((unsigned long *)(addr), (size)) | ||
113 | |||
114 | #elif defined(CONFIG_MINIX_FS_BIG_ENDIAN_16BIT_INDEXED) | ||
115 | |||
116 | /* | ||
117 | * big-endian 16bit indexed bitmaps | ||
118 | */ | ||
119 | |||
120 | static inline int minix_find_first_zero_bit(const void *vaddr, unsigned size) | ||
121 | { | ||
122 | const unsigned short *p = vaddr, *addr = vaddr; | ||
123 | unsigned short num; | ||
124 | |||
125 | if (!size) | ||
126 | return 0; | ||
127 | |||
128 | size = (size >> 4) + ((size & 15) > 0); | ||
129 | while (*p++ == 0xffff) { | ||
130 | if (--size == 0) | ||
131 | return (p - addr) << 4; | ||
132 | } | ||
133 | |||
134 | num = *--p; | ||
135 | return ((p - addr) << 4) + ffz(num); | ||
136 | } | ||
137 | |||
138 | #define minix_test_and_set_bit(nr, addr) \ | ||
139 | __test_and_set_bit((nr) ^ 16, (unsigned long *)(addr)) | ||
140 | #define minix_set_bit(nr, addr) \ | ||
141 | __set_bit((nr) ^ 16, (unsigned long *)(addr)) | ||
142 | #define minix_test_and_clear_bit(nr, addr) \ | ||
143 | __test_and_clear_bit((nr) ^ 16, (unsigned long *)(addr)) | ||
144 | |||
145 | static inline int minix_test_bit(int nr, const void *vaddr) | ||
146 | { | ||
147 | const unsigned short *p = vaddr; | ||
148 | return (p[nr >> 4] & (1U << (nr & 15))) != 0; | ||
149 | } | ||
150 | |||
151 | #else | ||
152 | |||
153 | /* | ||
154 | * little-endian bitmaps | ||
155 | */ | ||
156 | |||
157 | #define minix_test_and_set_bit __test_and_set_bit_le | ||
158 | #define minix_set_bit __set_bit_le | ||
159 | #define minix_test_and_clear_bit __test_and_clear_bit_le | ||
160 | #define minix_test_bit test_bit_le | ||
161 | #define minix_find_first_zero_bit find_first_zero_bit_le | ||
162 | |||
163 | #endif | ||
164 | |||
91 | #endif /* FS_MINIX_H */ | 165 | #endif /* FS_MINIX_H */ |
diff --git a/fs/mpage.c b/fs/mpage.c index d78455a81ec9..0afc809e46e0 100644 --- a/fs/mpage.c +++ b/fs/mpage.c | |||
@@ -364,6 +364,9 @@ mpage_readpages(struct address_space *mapping, struct list_head *pages, | |||
364 | sector_t last_block_in_bio = 0; | 364 | sector_t last_block_in_bio = 0; |
365 | struct buffer_head map_bh; | 365 | struct buffer_head map_bh; |
366 | unsigned long first_logical_block = 0; | 366 | unsigned long first_logical_block = 0; |
367 | struct blk_plug plug; | ||
368 | |||
369 | blk_start_plug(&plug); | ||
367 | 370 | ||
368 | map_bh.b_state = 0; | 371 | map_bh.b_state = 0; |
369 | map_bh.b_size = 0; | 372 | map_bh.b_size = 0; |
@@ -385,6 +388,7 @@ mpage_readpages(struct address_space *mapping, struct list_head *pages, | |||
385 | BUG_ON(!list_empty(pages)); | 388 | BUG_ON(!list_empty(pages)); |
386 | if (bio) | 389 | if (bio) |
387 | mpage_bio_submit(READ, bio); | 390 | mpage_bio_submit(READ, bio); |
391 | blk_finish_plug(&plug); | ||
388 | return 0; | 392 | return 0; |
389 | } | 393 | } |
390 | EXPORT_SYMBOL(mpage_readpages); | 394 | EXPORT_SYMBOL(mpage_readpages); |
@@ -666,8 +670,11 @@ int | |||
666 | mpage_writepages(struct address_space *mapping, | 670 | mpage_writepages(struct address_space *mapping, |
667 | struct writeback_control *wbc, get_block_t get_block) | 671 | struct writeback_control *wbc, get_block_t get_block) |
668 | { | 672 | { |
673 | struct blk_plug plug; | ||
669 | int ret; | 674 | int ret; |
670 | 675 | ||
676 | blk_start_plug(&plug); | ||
677 | |||
671 | if (!get_block) | 678 | if (!get_block) |
672 | ret = generic_writepages(mapping, wbc); | 679 | ret = generic_writepages(mapping, wbc); |
673 | else { | 680 | else { |
@@ -682,6 +689,7 @@ mpage_writepages(struct address_space *mapping, | |||
682 | if (mpd.bio) | 689 | if (mpd.bio) |
683 | mpage_bio_submit(WRITE, mpd.bio); | 690 | mpage_bio_submit(WRITE, mpd.bio); |
684 | } | 691 | } |
692 | blk_finish_plug(&plug); | ||
685 | return ret; | 693 | return ret; |
686 | } | 694 | } |
687 | EXPORT_SYMBOL(mpage_writepages); | 695 | EXPORT_SYMBOL(mpage_writepages); |
diff --git a/fs/namei.c b/fs/namei.c index 5a9a6c3094da..3cb616d38d9c 100644 --- a/fs/namei.c +++ b/fs/namei.c | |||
@@ -183,6 +183,9 @@ static int acl_permission_check(struct inode *inode, int mask, unsigned int flag | |||
183 | 183 | ||
184 | mask &= MAY_READ | MAY_WRITE | MAY_EXEC; | 184 | mask &= MAY_READ | MAY_WRITE | MAY_EXEC; |
185 | 185 | ||
186 | if (current_user_ns() != inode_userns(inode)) | ||
187 | goto other_perms; | ||
188 | |||
186 | if (current_fsuid() == inode->i_uid) | 189 | if (current_fsuid() == inode->i_uid) |
187 | mode >>= 6; | 190 | mode >>= 6; |
188 | else { | 191 | else { |
@@ -196,6 +199,7 @@ static int acl_permission_check(struct inode *inode, int mask, unsigned int flag | |||
196 | mode >>= 3; | 199 | mode >>= 3; |
197 | } | 200 | } |
198 | 201 | ||
202 | other_perms: | ||
199 | /* | 203 | /* |
200 | * If the DACs are ok we don't need any capability check. | 204 | * If the DACs are ok we don't need any capability check. |
201 | */ | 205 | */ |
@@ -237,7 +241,7 @@ int generic_permission(struct inode *inode, int mask, unsigned int flags, | |||
237 | * Executable DACs are overridable if at least one exec bit is set. | 241 | * Executable DACs are overridable if at least one exec bit is set. |
238 | */ | 242 | */ |
239 | if (!(mask & MAY_EXEC) || execute_ok(inode)) | 243 | if (!(mask & MAY_EXEC) || execute_ok(inode)) |
240 | if (capable(CAP_DAC_OVERRIDE)) | 244 | if (ns_capable(inode_userns(inode), CAP_DAC_OVERRIDE)) |
241 | return 0; | 245 | return 0; |
242 | 246 | ||
243 | /* | 247 | /* |
@@ -245,7 +249,7 @@ int generic_permission(struct inode *inode, int mask, unsigned int flags, | |||
245 | */ | 249 | */ |
246 | mask &= MAY_READ | MAY_WRITE | MAY_EXEC; | 250 | mask &= MAY_READ | MAY_WRITE | MAY_EXEC; |
247 | if (mask == MAY_READ || (S_ISDIR(inode->i_mode) && !(mask & MAY_WRITE))) | 251 | if (mask == MAY_READ || (S_ISDIR(inode->i_mode) && !(mask & MAY_WRITE))) |
248 | if (capable(CAP_DAC_READ_SEARCH)) | 252 | if (ns_capable(inode_userns(inode), CAP_DAC_READ_SEARCH)) |
249 | return 0; | 253 | return 0; |
250 | 254 | ||
251 | return -EACCES; | 255 | return -EACCES; |
@@ -654,6 +658,7 @@ static inline int handle_reval_path(struct nameidata *nd) | |||
654 | static inline int exec_permission(struct inode *inode, unsigned int flags) | 658 | static inline int exec_permission(struct inode *inode, unsigned int flags) |
655 | { | 659 | { |
656 | int ret; | 660 | int ret; |
661 | struct user_namespace *ns = inode_userns(inode); | ||
657 | 662 | ||
658 | if (inode->i_op->permission) { | 663 | if (inode->i_op->permission) { |
659 | ret = inode->i_op->permission(inode, MAY_EXEC, flags); | 664 | ret = inode->i_op->permission(inode, MAY_EXEC, flags); |
@@ -666,7 +671,8 @@ static inline int exec_permission(struct inode *inode, unsigned int flags) | |||
666 | if (ret == -ECHILD) | 671 | if (ret == -ECHILD) |
667 | return ret; | 672 | return ret; |
668 | 673 | ||
669 | if (capable(CAP_DAC_OVERRIDE) || capable(CAP_DAC_READ_SEARCH)) | 674 | if (ns_capable(ns, CAP_DAC_OVERRIDE) || |
675 | ns_capable(ns, CAP_DAC_READ_SEARCH)) | ||
670 | goto ok; | 676 | goto ok; |
671 | 677 | ||
672 | return ret; | 678 | return ret; |
@@ -986,6 +992,12 @@ int follow_down_one(struct path *path) | |||
986 | return 0; | 992 | return 0; |
987 | } | 993 | } |
988 | 994 | ||
995 | static inline bool managed_dentry_might_block(struct dentry *dentry) | ||
996 | { | ||
997 | return (dentry->d_flags & DCACHE_MANAGE_TRANSIT && | ||
998 | dentry->d_op->d_manage(dentry, true) < 0); | ||
999 | } | ||
1000 | |||
989 | /* | 1001 | /* |
990 | * Skip to top of mountpoint pile in rcuwalk mode. We abort the rcu-walk if we | 1002 | * Skip to top of mountpoint pile in rcuwalk mode. We abort the rcu-walk if we |
991 | * meet a managed dentry and we're not walking to "..". True is returned to | 1003 | * meet a managed dentry and we're not walking to "..". True is returned to |
@@ -994,19 +1006,26 @@ int follow_down_one(struct path *path) | |||
994 | static bool __follow_mount_rcu(struct nameidata *nd, struct path *path, | 1006 | static bool __follow_mount_rcu(struct nameidata *nd, struct path *path, |
995 | struct inode **inode, bool reverse_transit) | 1007 | struct inode **inode, bool reverse_transit) |
996 | { | 1008 | { |
997 | while (d_mountpoint(path->dentry)) { | 1009 | for (;;) { |
998 | struct vfsmount *mounted; | 1010 | struct vfsmount *mounted; |
999 | if (unlikely(path->dentry->d_flags & DCACHE_MANAGE_TRANSIT) && | 1011 | /* |
1000 | !reverse_transit && | 1012 | * Don't forget we might have a non-mountpoint managed dentry |
1001 | path->dentry->d_op->d_manage(path->dentry, true) < 0) | 1013 | * that wants to block transit. |
1014 | */ | ||
1015 | *inode = path->dentry->d_inode; | ||
1016 | if (!reverse_transit && | ||
1017 | unlikely(managed_dentry_might_block(path->dentry))) | ||
1002 | return false; | 1018 | return false; |
1019 | |||
1020 | if (!d_mountpoint(path->dentry)) | ||
1021 | break; | ||
1022 | |||
1003 | mounted = __lookup_mnt(path->mnt, path->dentry, 1); | 1023 | mounted = __lookup_mnt(path->mnt, path->dentry, 1); |
1004 | if (!mounted) | 1024 | if (!mounted) |
1005 | break; | 1025 | break; |
1006 | path->mnt = mounted; | 1026 | path->mnt = mounted; |
1007 | path->dentry = mounted->mnt_root; | 1027 | path->dentry = mounted->mnt_root; |
1008 | nd->seq = read_seqcount_begin(&path->dentry->d_seq); | 1028 | nd->seq = read_seqcount_begin(&path->dentry->d_seq); |
1009 | *inode = path->dentry->d_inode; | ||
1010 | } | 1029 | } |
1011 | 1030 | ||
1012 | if (unlikely(path->dentry->d_flags & DCACHE_NEED_AUTOMOUNT)) | 1031 | if (unlikely(path->dentry->d_flags & DCACHE_NEED_AUTOMOUNT)) |
@@ -1644,13 +1663,16 @@ static int path_lookupat(int dfd, const char *name, | |||
1644 | err = -ECHILD; | 1663 | err = -ECHILD; |
1645 | } | 1664 | } |
1646 | 1665 | ||
1647 | if (!err) | 1666 | if (!err) { |
1648 | err = handle_reval_path(nd); | 1667 | err = handle_reval_path(nd); |
1668 | if (err) | ||
1669 | path_put(&nd->path); | ||
1670 | } | ||
1649 | 1671 | ||
1650 | if (!err && nd->flags & LOOKUP_DIRECTORY) { | 1672 | if (!err && nd->flags & LOOKUP_DIRECTORY) { |
1651 | if (!nd->inode->i_op->lookup) { | 1673 | if (!nd->inode->i_op->lookup) { |
1652 | path_put(&nd->path); | 1674 | path_put(&nd->path); |
1653 | return -ENOTDIR; | 1675 | err = -ENOTDIR; |
1654 | } | 1676 | } |
1655 | } | 1677 | } |
1656 | 1678 | ||
@@ -1842,11 +1864,15 @@ static inline int check_sticky(struct inode *dir, struct inode *inode) | |||
1842 | 1864 | ||
1843 | if (!(dir->i_mode & S_ISVTX)) | 1865 | if (!(dir->i_mode & S_ISVTX)) |
1844 | return 0; | 1866 | return 0; |
1867 | if (current_user_ns() != inode_userns(inode)) | ||
1868 | goto other_userns; | ||
1845 | if (inode->i_uid == fsuid) | 1869 | if (inode->i_uid == fsuid) |
1846 | return 0; | 1870 | return 0; |
1847 | if (dir->i_uid == fsuid) | 1871 | if (dir->i_uid == fsuid) |
1848 | return 0; | 1872 | return 0; |
1849 | return !capable(CAP_FOWNER); | 1873 | |
1874 | other_userns: | ||
1875 | return !ns_capable(inode_userns(inode), CAP_FOWNER); | ||
1850 | } | 1876 | } |
1851 | 1877 | ||
1852 | /* | 1878 | /* |
@@ -2026,7 +2052,7 @@ static int may_open(struct path *path, int acc_mode, int flag) | |||
2026 | } | 2052 | } |
2027 | 2053 | ||
2028 | /* O_NOATIME can only be set by the owner or superuser */ | 2054 | /* O_NOATIME can only be set by the owner or superuser */ |
2029 | if (flag & O_NOATIME && !is_owner_or_cap(inode)) | 2055 | if (flag & O_NOATIME && !inode_owner_or_capable(inode)) |
2030 | return -EPERM; | 2056 | return -EPERM; |
2031 | 2057 | ||
2032 | /* | 2058 | /* |
@@ -2440,7 +2466,8 @@ int vfs_mknod(struct inode *dir, struct dentry *dentry, int mode, dev_t dev) | |||
2440 | if (error) | 2466 | if (error) |
2441 | return error; | 2467 | return error; |
2442 | 2468 | ||
2443 | if ((S_ISCHR(mode) || S_ISBLK(mode)) && !capable(CAP_MKNOD)) | 2469 | if ((S_ISCHR(mode) || S_ISBLK(mode)) && |
2470 | !ns_capable(inode_userns(dir), CAP_MKNOD)) | ||
2444 | return -EPERM; | 2471 | return -EPERM; |
2445 | 2472 | ||
2446 | if (!dir->i_op->mknod) | 2473 | if (!dir->i_op->mknod) |
diff --git a/fs/nfs/dir.c b/fs/nfs/dir.c index abdf38d5971d..7237672216c8 100644 --- a/fs/nfs/dir.c +++ b/fs/nfs/dir.c | |||
@@ -44,6 +44,7 @@ | |||
44 | /* #define NFS_DEBUG_VERBOSE 1 */ | 44 | /* #define NFS_DEBUG_VERBOSE 1 */ |
45 | 45 | ||
46 | static int nfs_opendir(struct inode *, struct file *); | 46 | static int nfs_opendir(struct inode *, struct file *); |
47 | static int nfs_closedir(struct inode *, struct file *); | ||
47 | static int nfs_readdir(struct file *, void *, filldir_t); | 48 | static int nfs_readdir(struct file *, void *, filldir_t); |
48 | static struct dentry *nfs_lookup(struct inode *, struct dentry *, struct nameidata *); | 49 | static struct dentry *nfs_lookup(struct inode *, struct dentry *, struct nameidata *); |
49 | static int nfs_create(struct inode *, struct dentry *, int, struct nameidata *); | 50 | static int nfs_create(struct inode *, struct dentry *, int, struct nameidata *); |
@@ -64,7 +65,7 @@ const struct file_operations nfs_dir_operations = { | |||
64 | .read = generic_read_dir, | 65 | .read = generic_read_dir, |
65 | .readdir = nfs_readdir, | 66 | .readdir = nfs_readdir, |
66 | .open = nfs_opendir, | 67 | .open = nfs_opendir, |
67 | .release = nfs_release, | 68 | .release = nfs_closedir, |
68 | .fsync = nfs_fsync_dir, | 69 | .fsync = nfs_fsync_dir, |
69 | }; | 70 | }; |
70 | 71 | ||
@@ -133,13 +134,35 @@ const struct inode_operations nfs4_dir_inode_operations = { | |||
133 | 134 | ||
134 | #endif /* CONFIG_NFS_V4 */ | 135 | #endif /* CONFIG_NFS_V4 */ |
135 | 136 | ||
137 | static struct nfs_open_dir_context *alloc_nfs_open_dir_context(struct rpc_cred *cred) | ||
138 | { | ||
139 | struct nfs_open_dir_context *ctx; | ||
140 | ctx = kmalloc(sizeof(*ctx), GFP_KERNEL); | ||
141 | if (ctx != NULL) { | ||
142 | ctx->duped = 0; | ||
143 | ctx->dir_cookie = 0; | ||
144 | ctx->dup_cookie = 0; | ||
145 | ctx->cred = get_rpccred(cred); | ||
146 | } else | ||
147 | ctx = ERR_PTR(-ENOMEM); | ||
148 | return ctx; | ||
149 | } | ||
150 | |||
151 | static void put_nfs_open_dir_context(struct nfs_open_dir_context *ctx) | ||
152 | { | ||
153 | put_rpccred(ctx->cred); | ||
154 | kfree(ctx); | ||
155 | } | ||
156 | |||
136 | /* | 157 | /* |
137 | * Open file | 158 | * Open file |
138 | */ | 159 | */ |
139 | static int | 160 | static int |
140 | nfs_opendir(struct inode *inode, struct file *filp) | 161 | nfs_opendir(struct inode *inode, struct file *filp) |
141 | { | 162 | { |
142 | int res; | 163 | int res = 0; |
164 | struct nfs_open_dir_context *ctx; | ||
165 | struct rpc_cred *cred; | ||
143 | 166 | ||
144 | dfprintk(FILE, "NFS: open dir(%s/%s)\n", | 167 | dfprintk(FILE, "NFS: open dir(%s/%s)\n", |
145 | filp->f_path.dentry->d_parent->d_name.name, | 168 | filp->f_path.dentry->d_parent->d_name.name, |
@@ -147,8 +170,15 @@ nfs_opendir(struct inode *inode, struct file *filp) | |||
147 | 170 | ||
148 | nfs_inc_stats(inode, NFSIOS_VFSOPEN); | 171 | nfs_inc_stats(inode, NFSIOS_VFSOPEN); |
149 | 172 | ||
150 | /* Call generic open code in order to cache credentials */ | 173 | cred = rpc_lookup_cred(); |
151 | res = nfs_open(inode, filp); | 174 | if (IS_ERR(cred)) |
175 | return PTR_ERR(cred); | ||
176 | ctx = alloc_nfs_open_dir_context(cred); | ||
177 | if (IS_ERR(ctx)) { | ||
178 | res = PTR_ERR(ctx); | ||
179 | goto out; | ||
180 | } | ||
181 | filp->private_data = ctx; | ||
152 | if (filp->f_path.dentry == filp->f_path.mnt->mnt_root) { | 182 | if (filp->f_path.dentry == filp->f_path.mnt->mnt_root) { |
153 | /* This is a mountpoint, so d_revalidate will never | 183 | /* This is a mountpoint, so d_revalidate will never |
154 | * have been called, so we need to refresh the | 184 | * have been called, so we need to refresh the |
@@ -156,9 +186,18 @@ nfs_opendir(struct inode *inode, struct file *filp) | |||
156 | */ | 186 | */ |
157 | __nfs_revalidate_inode(NFS_SERVER(inode), inode); | 187 | __nfs_revalidate_inode(NFS_SERVER(inode), inode); |
158 | } | 188 | } |
189 | out: | ||
190 | put_rpccred(cred); | ||
159 | return res; | 191 | return res; |
160 | } | 192 | } |
161 | 193 | ||
194 | static int | ||
195 | nfs_closedir(struct inode *inode, struct file *filp) | ||
196 | { | ||
197 | put_nfs_open_dir_context(filp->private_data); | ||
198 | return 0; | ||
199 | } | ||
200 | |||
162 | struct nfs_cache_array_entry { | 201 | struct nfs_cache_array_entry { |
163 | u64 cookie; | 202 | u64 cookie; |
164 | u64 ino; | 203 | u64 ino; |
@@ -284,19 +323,20 @@ int nfs_readdir_search_for_pos(struct nfs_cache_array *array, nfs_readdir_descri | |||
284 | { | 323 | { |
285 | loff_t diff = desc->file->f_pos - desc->current_index; | 324 | loff_t diff = desc->file->f_pos - desc->current_index; |
286 | unsigned int index; | 325 | unsigned int index; |
326 | struct nfs_open_dir_context *ctx = desc->file->private_data; | ||
287 | 327 | ||
288 | if (diff < 0) | 328 | if (diff < 0) |
289 | goto out_eof; | 329 | goto out_eof; |
290 | if (diff >= array->size) { | 330 | if (diff >= array->size) { |
291 | if (array->eof_index >= 0) | 331 | if (array->eof_index >= 0) |
292 | goto out_eof; | 332 | goto out_eof; |
293 | desc->current_index += array->size; | ||
294 | return -EAGAIN; | 333 | return -EAGAIN; |
295 | } | 334 | } |
296 | 335 | ||
297 | index = (unsigned int)diff; | 336 | index = (unsigned int)diff; |
298 | *desc->dir_cookie = array->array[index].cookie; | 337 | *desc->dir_cookie = array->array[index].cookie; |
299 | desc->cache_entry_index = index; | 338 | desc->cache_entry_index = index; |
339 | ctx->duped = 0; | ||
300 | return 0; | 340 | return 0; |
301 | out_eof: | 341 | out_eof: |
302 | desc->eof = 1; | 342 | desc->eof = 1; |
@@ -307,10 +347,18 @@ static | |||
307 | int nfs_readdir_search_for_cookie(struct nfs_cache_array *array, nfs_readdir_descriptor_t *desc) | 347 | int nfs_readdir_search_for_cookie(struct nfs_cache_array *array, nfs_readdir_descriptor_t *desc) |
308 | { | 348 | { |
309 | int i; | 349 | int i; |
350 | loff_t new_pos; | ||
310 | int status = -EAGAIN; | 351 | int status = -EAGAIN; |
352 | struct nfs_open_dir_context *ctx = desc->file->private_data; | ||
311 | 353 | ||
312 | for (i = 0; i < array->size; i++) { | 354 | for (i = 0; i < array->size; i++) { |
313 | if (array->array[i].cookie == *desc->dir_cookie) { | 355 | if (array->array[i].cookie == *desc->dir_cookie) { |
356 | new_pos = desc->current_index + i; | ||
357 | if (new_pos < desc->file->f_pos) { | ||
358 | ctx->dup_cookie = *desc->dir_cookie; | ||
359 | ctx->duped = 1; | ||
360 | } | ||
361 | desc->file->f_pos = new_pos; | ||
314 | desc->cache_entry_index = i; | 362 | desc->cache_entry_index = i; |
315 | return 0; | 363 | return 0; |
316 | } | 364 | } |
@@ -342,6 +390,7 @@ int nfs_readdir_search_array(nfs_readdir_descriptor_t *desc) | |||
342 | 390 | ||
343 | if (status == -EAGAIN) { | 391 | if (status == -EAGAIN) { |
344 | desc->last_cookie = array->last_cookie; | 392 | desc->last_cookie = array->last_cookie; |
393 | desc->current_index += array->size; | ||
345 | desc->page_index++; | 394 | desc->page_index++; |
346 | } | 395 | } |
347 | nfs_readdir_release_array(desc->page); | 396 | nfs_readdir_release_array(desc->page); |
@@ -354,7 +403,8 @@ static | |||
354 | int nfs_readdir_xdr_filler(struct page **pages, nfs_readdir_descriptor_t *desc, | 403 | int nfs_readdir_xdr_filler(struct page **pages, nfs_readdir_descriptor_t *desc, |
355 | struct nfs_entry *entry, struct file *file, struct inode *inode) | 404 | struct nfs_entry *entry, struct file *file, struct inode *inode) |
356 | { | 405 | { |
357 | struct rpc_cred *cred = nfs_file_cred(file); | 406 | struct nfs_open_dir_context *ctx = file->private_data; |
407 | struct rpc_cred *cred = ctx->cred; | ||
358 | unsigned long timestamp, gencount; | 408 | unsigned long timestamp, gencount; |
359 | int error; | 409 | int error; |
360 | 410 | ||
@@ -693,6 +743,20 @@ int nfs_do_filldir(nfs_readdir_descriptor_t *desc, void *dirent, | |||
693 | int i = 0; | 743 | int i = 0; |
694 | int res = 0; | 744 | int res = 0; |
695 | struct nfs_cache_array *array = NULL; | 745 | struct nfs_cache_array *array = NULL; |
746 | struct nfs_open_dir_context *ctx = file->private_data; | ||
747 | |||
748 | if (ctx->duped != 0 && ctx->dup_cookie == *desc->dir_cookie) { | ||
749 | if (printk_ratelimit()) { | ||
750 | pr_notice("NFS: directory %s/%s contains a readdir loop. " | ||
751 | "Please contact your server vendor. " | ||
752 | "Offending cookie: %llu\n", | ||
753 | file->f_dentry->d_parent->d_name.name, | ||
754 | file->f_dentry->d_name.name, | ||
755 | *desc->dir_cookie); | ||
756 | } | ||
757 | res = -ELOOP; | ||
758 | goto out; | ||
759 | } | ||
696 | 760 | ||
697 | array = nfs_readdir_get_array(desc->page); | 761 | array = nfs_readdir_get_array(desc->page); |
698 | if (IS_ERR(array)) { | 762 | if (IS_ERR(array)) { |
@@ -785,6 +849,7 @@ static int nfs_readdir(struct file *filp, void *dirent, filldir_t filldir) | |||
785 | struct inode *inode = dentry->d_inode; | 849 | struct inode *inode = dentry->d_inode; |
786 | nfs_readdir_descriptor_t my_desc, | 850 | nfs_readdir_descriptor_t my_desc, |
787 | *desc = &my_desc; | 851 | *desc = &my_desc; |
852 | struct nfs_open_dir_context *dir_ctx = filp->private_data; | ||
788 | int res; | 853 | int res; |
789 | 854 | ||
790 | dfprintk(FILE, "NFS: readdir(%s/%s) starting at cookie %llu\n", | 855 | dfprintk(FILE, "NFS: readdir(%s/%s) starting at cookie %llu\n", |
@@ -801,7 +866,7 @@ static int nfs_readdir(struct file *filp, void *dirent, filldir_t filldir) | |||
801 | memset(desc, 0, sizeof(*desc)); | 866 | memset(desc, 0, sizeof(*desc)); |
802 | 867 | ||
803 | desc->file = filp; | 868 | desc->file = filp; |
804 | desc->dir_cookie = &nfs_file_open_context(filp)->dir_cookie; | 869 | desc->dir_cookie = &dir_ctx->dir_cookie; |
805 | desc->decode = NFS_PROTO(inode)->decode_dirent; | 870 | desc->decode = NFS_PROTO(inode)->decode_dirent; |
806 | desc->plus = NFS_USE_READDIRPLUS(inode); | 871 | desc->plus = NFS_USE_READDIRPLUS(inode); |
807 | 872 | ||
@@ -853,6 +918,7 @@ static loff_t nfs_llseek_dir(struct file *filp, loff_t offset, int origin) | |||
853 | { | 918 | { |
854 | struct dentry *dentry = filp->f_path.dentry; | 919 | struct dentry *dentry = filp->f_path.dentry; |
855 | struct inode *inode = dentry->d_inode; | 920 | struct inode *inode = dentry->d_inode; |
921 | struct nfs_open_dir_context *dir_ctx = filp->private_data; | ||
856 | 922 | ||
857 | dfprintk(FILE, "NFS: llseek dir(%s/%s, %lld, %d)\n", | 923 | dfprintk(FILE, "NFS: llseek dir(%s/%s, %lld, %d)\n", |
858 | dentry->d_parent->d_name.name, | 924 | dentry->d_parent->d_name.name, |
@@ -872,7 +938,8 @@ static loff_t nfs_llseek_dir(struct file *filp, loff_t offset, int origin) | |||
872 | } | 938 | } |
873 | if (offset != filp->f_pos) { | 939 | if (offset != filp->f_pos) { |
874 | filp->f_pos = offset; | 940 | filp->f_pos = offset; |
875 | nfs_file_open_context(filp)->dir_cookie = 0; | 941 | dir_ctx->dir_cookie = 0; |
942 | dir_ctx->duped = 0; | ||
876 | } | 943 | } |
877 | out: | 944 | out: |
878 | mutex_unlock(&inode->i_mutex); | 945 | mutex_unlock(&inode->i_mutex); |
@@ -1068,7 +1135,7 @@ static int nfs_lookup_revalidate(struct dentry *dentry, struct nameidata *nd) | |||
1068 | if (fhandle == NULL || fattr == NULL) | 1135 | if (fhandle == NULL || fattr == NULL) |
1069 | goto out_error; | 1136 | goto out_error; |
1070 | 1137 | ||
1071 | error = NFS_PROTO(dir)->lookup(dir, &dentry->d_name, fhandle, fattr); | 1138 | error = NFS_PROTO(dir)->lookup(NFS_SERVER(dir)->client, dir, &dentry->d_name, fhandle, fattr); |
1072 | if (error) | 1139 | if (error) |
1073 | goto out_bad; | 1140 | goto out_bad; |
1074 | if (nfs_compare_fh(NFS_FH(inode), fhandle)) | 1141 | if (nfs_compare_fh(NFS_FH(inode), fhandle)) |
@@ -1224,7 +1291,7 @@ static struct dentry *nfs_lookup(struct inode *dir, struct dentry * dentry, stru | |||
1224 | parent = dentry->d_parent; | 1291 | parent = dentry->d_parent; |
1225 | /* Protect against concurrent sillydeletes */ | 1292 | /* Protect against concurrent sillydeletes */ |
1226 | nfs_block_sillyrename(parent); | 1293 | nfs_block_sillyrename(parent); |
1227 | error = NFS_PROTO(dir)->lookup(dir, &dentry->d_name, fhandle, fattr); | 1294 | error = NFS_PROTO(dir)->lookup(NFS_SERVER(dir)->client, dir, &dentry->d_name, fhandle, fattr); |
1228 | if (error == -ENOENT) | 1295 | if (error == -ENOENT) |
1229 | goto no_entry; | 1296 | goto no_entry; |
1230 | if (error < 0) { | 1297 | if (error < 0) { |
@@ -1562,7 +1629,7 @@ int nfs_instantiate(struct dentry *dentry, struct nfs_fh *fhandle, | |||
1562 | if (dentry->d_inode) | 1629 | if (dentry->d_inode) |
1563 | goto out; | 1630 | goto out; |
1564 | if (fhandle->size == 0) { | 1631 | if (fhandle->size == 0) { |
1565 | error = NFS_PROTO(dir)->lookup(dir, &dentry->d_name, fhandle, fattr); | 1632 | error = NFS_PROTO(dir)->lookup(NFS_SERVER(dir)->client, dir, &dentry->d_name, fhandle, fattr); |
1566 | if (error) | 1633 | if (error) |
1567 | goto out_error; | 1634 | goto out_error; |
1568 | } | 1635 | } |
diff --git a/fs/nfs/file.c b/fs/nfs/file.c index d85a534b15cd..3ac5bd695e5e 100644 --- a/fs/nfs/file.c +++ b/fs/nfs/file.c | |||
@@ -326,6 +326,9 @@ nfs_file_fsync(struct file *file, int datasync) | |||
326 | ret = xchg(&ctx->error, 0); | 326 | ret = xchg(&ctx->error, 0); |
327 | if (!ret && status < 0) | 327 | if (!ret && status < 0) |
328 | ret = status; | 328 | ret = status; |
329 | if (!ret && !datasync) | ||
330 | /* application has asked for meta-data sync */ | ||
331 | ret = pnfs_layoutcommit_inode(inode, true); | ||
329 | return ret; | 332 | return ret; |
330 | } | 333 | } |
331 | 334 | ||
diff --git a/fs/nfs/getroot.c b/fs/nfs/getroot.c index 1084792bc0fe..dcb61548887f 100644 --- a/fs/nfs/getroot.c +++ b/fs/nfs/getroot.c | |||
@@ -222,6 +222,10 @@ struct dentry *nfs4_get_root(struct super_block *sb, struct nfs_fh *mntfh, | |||
222 | goto out; | 222 | goto out; |
223 | } | 223 | } |
224 | 224 | ||
225 | if (fattr->valid & NFS_ATTR_FATTR_FSID && | ||
226 | !nfs_fsid_equal(&server->fsid, &fattr->fsid)) | ||
227 | memcpy(&server->fsid, &fattr->fsid, sizeof(server->fsid)); | ||
228 | |||
225 | inode = nfs_fhget(sb, mntfh, fattr); | 229 | inode = nfs_fhget(sb, mntfh, fattr); |
226 | if (IS_ERR(inode)) { | 230 | if (IS_ERR(inode)) { |
227 | dprintk("nfs_get_root: get root inode failed\n"); | 231 | dprintk("nfs_get_root: get root inode failed\n"); |
diff --git a/fs/nfs/inode.c b/fs/nfs/inode.c index 01768e5e2c9b..57bb31ad7a5e 100644 --- a/fs/nfs/inode.c +++ b/fs/nfs/inode.c | |||
@@ -254,7 +254,9 @@ nfs_fhget(struct super_block *sb, struct nfs_fh *fh, struct nfs_fattr *fattr) | |||
254 | struct inode *inode = ERR_PTR(-ENOENT); | 254 | struct inode *inode = ERR_PTR(-ENOENT); |
255 | unsigned long hash; | 255 | unsigned long hash; |
256 | 256 | ||
257 | if ((fattr->valid & NFS_ATTR_FATTR_FILEID) == 0) | 257 | nfs_attr_check_mountpoint(sb, fattr); |
258 | |||
259 | if ((fattr->valid & NFS_ATTR_FATTR_FILEID) == 0 && (fattr->valid & NFS_ATTR_FATTR_MOUNTPOINT) == 0) | ||
258 | goto out_no_inode; | 260 | goto out_no_inode; |
259 | if ((fattr->valid & NFS_ATTR_FATTR_TYPE) == 0) | 261 | if ((fattr->valid & NFS_ATTR_FATTR_TYPE) == 0) |
260 | goto out_no_inode; | 262 | goto out_no_inode; |
@@ -298,8 +300,8 @@ nfs_fhget(struct super_block *sb, struct nfs_fh *fh, struct nfs_fattr *fattr) | |||
298 | if (nfs_server_capable(inode, NFS_CAP_READDIRPLUS)) | 300 | if (nfs_server_capable(inode, NFS_CAP_READDIRPLUS)) |
299 | set_bit(NFS_INO_ADVISE_RDPLUS, &NFS_I(inode)->flags); | 301 | set_bit(NFS_INO_ADVISE_RDPLUS, &NFS_I(inode)->flags); |
300 | /* Deal with crossing mountpoints */ | 302 | /* Deal with crossing mountpoints */ |
301 | if ((fattr->valid & NFS_ATTR_FATTR_FSID) | 303 | if (fattr->valid & NFS_ATTR_FATTR_MOUNTPOINT || |
302 | && !nfs_fsid_equal(&NFS_SB(sb)->fsid, &fattr->fsid)) { | 304 | fattr->valid & NFS_ATTR_FATTR_V4_REFERRAL) { |
303 | if (fattr->valid & NFS_ATTR_FATTR_V4_REFERRAL) | 305 | if (fattr->valid & NFS_ATTR_FATTR_V4_REFERRAL) |
304 | inode->i_op = &nfs_referral_inode_operations; | 306 | inode->i_op = &nfs_referral_inode_operations; |
305 | else | 307 | else |
@@ -639,7 +641,6 @@ struct nfs_open_context *alloc_nfs_open_context(struct path *path, struct rpc_cr | |||
639 | ctx->mode = f_mode; | 641 | ctx->mode = f_mode; |
640 | ctx->flags = 0; | 642 | ctx->flags = 0; |
641 | ctx->error = 0; | 643 | ctx->error = 0; |
642 | ctx->dir_cookie = 0; | ||
643 | nfs_init_lock_context(&ctx->lock_context); | 644 | nfs_init_lock_context(&ctx->lock_context); |
644 | ctx->lock_context.open_context = ctx; | 645 | ctx->lock_context.open_context = ctx; |
645 | INIT_LIST_HEAD(&ctx->list); | 646 | INIT_LIST_HEAD(&ctx->list); |
@@ -1471,6 +1472,7 @@ static inline void nfs4_init_once(struct nfs_inode *nfsi) | |||
1471 | nfsi->delegation_state = 0; | 1472 | nfsi->delegation_state = 0; |
1472 | init_rwsem(&nfsi->rwsem); | 1473 | init_rwsem(&nfsi->rwsem); |
1473 | nfsi->layout = NULL; | 1474 | nfsi->layout = NULL; |
1475 | atomic_set(&nfsi->commits_outstanding, 0); | ||
1474 | #endif | 1476 | #endif |
1475 | } | 1477 | } |
1476 | 1478 | ||
diff --git a/fs/nfs/internal.h b/fs/nfs/internal.h index 72e0bddf7a2f..ce118ce885dd 100644 --- a/fs/nfs/internal.h +++ b/fs/nfs/internal.h | |||
@@ -39,6 +39,12 @@ static inline int nfs4_has_persistent_session(const struct nfs_client *clp) | |||
39 | return 0; | 39 | return 0; |
40 | } | 40 | } |
41 | 41 | ||
42 | static inline void nfs_attr_check_mountpoint(struct super_block *parent, struct nfs_fattr *fattr) | ||
43 | { | ||
44 | if (!nfs_fsid_equal(&NFS_SB(parent)->fsid, &fattr->fsid)) | ||
45 | fattr->valid |= NFS_ATTR_FATTR_MOUNTPOINT; | ||
46 | } | ||
47 | |||
42 | struct nfs_clone_mount { | 48 | struct nfs_clone_mount { |
43 | const struct super_block *sb; | 49 | const struct super_block *sb; |
44 | const struct dentry *dentry; | 50 | const struct dentry *dentry; |
@@ -214,6 +220,7 @@ extern const u32 nfs41_maxwrite_overhead; | |||
214 | /* nfs4proc.c */ | 220 | /* nfs4proc.c */ |
215 | #ifdef CONFIG_NFS_V4 | 221 | #ifdef CONFIG_NFS_V4 |
216 | extern struct rpc_procinfo nfs4_procedures[]; | 222 | extern struct rpc_procinfo nfs4_procedures[]; |
223 | void nfs_fixup_secinfo_attributes(struct nfs_fattr *, struct nfs_fh *); | ||
217 | #endif | 224 | #endif |
218 | 225 | ||
219 | extern int nfs4_init_ds_session(struct nfs_client *clp); | 226 | extern int nfs4_init_ds_session(struct nfs_client *clp); |
@@ -276,11 +283,25 @@ extern int nfs_initiate_read(struct nfs_read_data *data, struct rpc_clnt *clnt, | |||
276 | extern void nfs_read_prepare(struct rpc_task *task, void *calldata); | 283 | extern void nfs_read_prepare(struct rpc_task *task, void *calldata); |
277 | 284 | ||
278 | /* write.c */ | 285 | /* write.c */ |
286 | extern void nfs_commit_free(struct nfs_write_data *p); | ||
279 | extern int nfs_initiate_write(struct nfs_write_data *data, | 287 | extern int nfs_initiate_write(struct nfs_write_data *data, |
280 | struct rpc_clnt *clnt, | 288 | struct rpc_clnt *clnt, |
281 | const struct rpc_call_ops *call_ops, | 289 | const struct rpc_call_ops *call_ops, |
282 | int how); | 290 | int how); |
283 | extern void nfs_write_prepare(struct rpc_task *task, void *calldata); | 291 | extern void nfs_write_prepare(struct rpc_task *task, void *calldata); |
292 | extern int nfs_initiate_commit(struct nfs_write_data *data, | ||
293 | struct rpc_clnt *clnt, | ||
294 | const struct rpc_call_ops *call_ops, | ||
295 | int how); | ||
296 | extern void nfs_init_commit(struct nfs_write_data *data, | ||
297 | struct list_head *head, | ||
298 | struct pnfs_layout_segment *lseg); | ||
299 | void nfs_retry_commit(struct list_head *page_list, | ||
300 | struct pnfs_layout_segment *lseg); | ||
301 | void nfs_commit_clear_lock(struct nfs_inode *nfsi); | ||
302 | void nfs_commitdata_release(void *data); | ||
303 | void nfs_commit_release_pages(struct nfs_write_data *data); | ||
304 | |||
284 | #ifdef CONFIG_MIGRATION | 305 | #ifdef CONFIG_MIGRATION |
285 | extern int nfs_migrate_page(struct address_space *, | 306 | extern int nfs_migrate_page(struct address_space *, |
286 | struct page *, struct page *); | 307 | struct page *, struct page *); |
@@ -296,12 +317,14 @@ extern int nfs4_init_client(struct nfs_client *clp, | |||
296 | rpc_authflavor_t authflavour, | 317 | rpc_authflavor_t authflavour, |
297 | int noresvport); | 318 | int noresvport); |
298 | extern void nfs4_reset_write(struct rpc_task *task, struct nfs_write_data *data); | 319 | extern void nfs4_reset_write(struct rpc_task *task, struct nfs_write_data *data); |
299 | extern int _nfs4_call_sync(struct nfs_server *server, | 320 | extern int _nfs4_call_sync(struct rpc_clnt *clnt, |
321 | struct nfs_server *server, | ||
300 | struct rpc_message *msg, | 322 | struct rpc_message *msg, |
301 | struct nfs4_sequence_args *args, | 323 | struct nfs4_sequence_args *args, |
302 | struct nfs4_sequence_res *res, | 324 | struct nfs4_sequence_res *res, |
303 | int cache_reply); | 325 | int cache_reply); |
304 | extern int _nfs4_call_sync_session(struct nfs_server *server, | 326 | extern int _nfs4_call_sync_session(struct rpc_clnt *clnt, |
327 | struct nfs_server *server, | ||
305 | struct rpc_message *msg, | 328 | struct rpc_message *msg, |
306 | struct nfs4_sequence_args *args, | 329 | struct nfs4_sequence_args *args, |
307 | struct nfs4_sequence_res *res, | 330 | struct nfs4_sequence_res *res, |
diff --git a/fs/nfs/namespace.c b/fs/nfs/namespace.c index bf1c68009ffd..ad92bf731ff5 100644 --- a/fs/nfs/namespace.c +++ b/fs/nfs/namespace.c | |||
@@ -15,6 +15,7 @@ | |||
15 | #include <linux/string.h> | 15 | #include <linux/string.h> |
16 | #include <linux/sunrpc/clnt.h> | 16 | #include <linux/sunrpc/clnt.h> |
17 | #include <linux/vfs.h> | 17 | #include <linux/vfs.h> |
18 | #include <linux/sunrpc/gss_api.h> | ||
18 | #include "internal.h" | 19 | #include "internal.h" |
19 | 20 | ||
20 | #define NFSDBG_FACILITY NFSDBG_VFS | 21 | #define NFSDBG_FACILITY NFSDBG_VFS |
@@ -27,7 +28,8 @@ int nfs_mountpoint_expiry_timeout = 500 * HZ; | |||
27 | 28 | ||
28 | static struct vfsmount *nfs_do_submount(struct dentry *dentry, | 29 | static struct vfsmount *nfs_do_submount(struct dentry *dentry, |
29 | struct nfs_fh *fh, | 30 | struct nfs_fh *fh, |
30 | struct nfs_fattr *fattr); | 31 | struct nfs_fattr *fattr, |
32 | rpc_authflavor_t authflavor); | ||
31 | 33 | ||
32 | /* | 34 | /* |
33 | * nfs_path - reconstruct the path given an arbitrary dentry | 35 | * nfs_path - reconstruct the path given an arbitrary dentry |
@@ -116,6 +118,100 @@ Elong: | |||
116 | return ERR_PTR(-ENAMETOOLONG); | 118 | return ERR_PTR(-ENAMETOOLONG); |
117 | } | 119 | } |
118 | 120 | ||
121 | #ifdef CONFIG_NFS_V4 | ||
122 | static rpc_authflavor_t nfs_find_best_sec(struct nfs4_secinfo_flavors *flavors, struct inode *inode) | ||
123 | { | ||
124 | struct gss_api_mech *mech; | ||
125 | struct xdr_netobj oid; | ||
126 | int i; | ||
127 | rpc_authflavor_t pseudoflavor = RPC_AUTH_UNIX; | ||
128 | |||
129 | for (i = 0; i < flavors->num_flavors; i++) { | ||
130 | struct nfs4_secinfo_flavor *flavor; | ||
131 | flavor = &flavors->flavors[i]; | ||
132 | |||
133 | if (flavor->flavor == RPC_AUTH_NULL || flavor->flavor == RPC_AUTH_UNIX) { | ||
134 | pseudoflavor = flavor->flavor; | ||
135 | break; | ||
136 | } else if (flavor->flavor == RPC_AUTH_GSS) { | ||
137 | oid.len = flavor->gss.sec_oid4.len; | ||
138 | oid.data = flavor->gss.sec_oid4.data; | ||
139 | mech = gss_mech_get_by_OID(&oid); | ||
140 | if (!mech) | ||
141 | continue; | ||
142 | pseudoflavor = gss_svc_to_pseudoflavor(mech, flavor->gss.service); | ||
143 | gss_mech_put(mech); | ||
144 | break; | ||
145 | } | ||
146 | } | ||
147 | |||
148 | return pseudoflavor; | ||
149 | } | ||
150 | |||
151 | static rpc_authflavor_t nfs_negotiate_security(const struct dentry *parent, const struct dentry *dentry) | ||
152 | { | ||
153 | int status = 0; | ||
154 | struct page *page; | ||
155 | struct nfs4_secinfo_flavors *flavors; | ||
156 | int (*secinfo)(struct inode *, const struct qstr *, struct nfs4_secinfo_flavors *); | ||
157 | rpc_authflavor_t flavor = RPC_AUTH_UNIX; | ||
158 | |||
159 | secinfo = NFS_PROTO(parent->d_inode)->secinfo; | ||
160 | if (secinfo != NULL) { | ||
161 | page = alloc_page(GFP_KERNEL); | ||
162 | if (!page) { | ||
163 | status = -ENOMEM; | ||
164 | goto out; | ||
165 | } | ||
166 | flavors = page_address(page); | ||
167 | status = secinfo(parent->d_inode, &dentry->d_name, flavors); | ||
168 | flavor = nfs_find_best_sec(flavors, dentry->d_inode); | ||
169 | put_page(page); | ||
170 | } | ||
171 | |||
172 | return flavor; | ||
173 | |||
174 | out: | ||
175 | status = -ENOMEM; | ||
176 | return status; | ||
177 | } | ||
178 | |||
179 | static rpc_authflavor_t nfs_lookup_with_sec(struct nfs_server *server, struct dentry *parent, | ||
180 | struct dentry *dentry, struct path *path, | ||
181 | struct nfs_fh *fh, struct nfs_fattr *fattr) | ||
182 | { | ||
183 | rpc_authflavor_t flavor; | ||
184 | struct rpc_clnt *clone; | ||
185 | struct rpc_auth *auth; | ||
186 | int err; | ||
187 | |||
188 | flavor = nfs_negotiate_security(parent, path->dentry); | ||
189 | if (flavor < 0) | ||
190 | goto out; | ||
191 | clone = rpc_clone_client(server->client); | ||
192 | auth = rpcauth_create(flavor, clone); | ||
193 | if (!auth) { | ||
194 | flavor = -EIO; | ||
195 | goto out; | ||
196 | } | ||
197 | err = server->nfs_client->rpc_ops->lookup(clone, parent->d_inode, | ||
198 | &path->dentry->d_name, | ||
199 | fh, fattr); | ||
200 | if (err < 0) | ||
201 | flavor = err; | ||
202 | out: | ||
203 | return flavor; | ||
204 | } | ||
205 | #else /* CONFIG_NFS_V4 */ | ||
206 | static inline rpc_authflavor_t nfs_lookup_with_sec(struct nfs_server *server, | ||
207 | struct dentry *parent, struct dentry *dentry, | ||
208 | struct path *path, struct nfs_fh *fh, | ||
209 | struct nfs_fattr *fattr) | ||
210 | { | ||
211 | return -EPERM; | ||
212 | } | ||
213 | #endif /* CONFIG_NFS_V4 */ | ||
214 | |||
119 | /* | 215 | /* |
120 | * nfs_d_automount - Handle crossing a mountpoint on the server | 216 | * nfs_d_automount - Handle crossing a mountpoint on the server |
121 | * @path - The mountpoint | 217 | * @path - The mountpoint |
@@ -136,6 +232,7 @@ struct vfsmount *nfs_d_automount(struct path *path) | |||
136 | struct nfs_fh *fh = NULL; | 232 | struct nfs_fh *fh = NULL; |
137 | struct nfs_fattr *fattr = NULL; | 233 | struct nfs_fattr *fattr = NULL; |
138 | int err; | 234 | int err; |
235 | rpc_authflavor_t flavor = 1; | ||
139 | 236 | ||
140 | dprintk("--> nfs_d_automount()\n"); | 237 | dprintk("--> nfs_d_automount()\n"); |
141 | 238 | ||
@@ -153,9 +250,16 @@ struct vfsmount *nfs_d_automount(struct path *path) | |||
153 | 250 | ||
154 | /* Look it up again to get its attributes */ | 251 | /* Look it up again to get its attributes */ |
155 | parent = dget_parent(path->dentry); | 252 | parent = dget_parent(path->dentry); |
156 | err = server->nfs_client->rpc_ops->lookup(parent->d_inode, | 253 | err = server->nfs_client->rpc_ops->lookup(server->client, parent->d_inode, |
157 | &path->dentry->d_name, | 254 | &path->dentry->d_name, |
158 | fh, fattr); | 255 | fh, fattr); |
256 | if (err == -EPERM) { | ||
257 | flavor = nfs_lookup_with_sec(server, parent, path->dentry, path, fh, fattr); | ||
258 | if (flavor < 0) | ||
259 | err = flavor; | ||
260 | else | ||
261 | err = 0; | ||
262 | } | ||
159 | dput(parent); | 263 | dput(parent); |
160 | if (err != 0) { | 264 | if (err != 0) { |
161 | mnt = ERR_PTR(err); | 265 | mnt = ERR_PTR(err); |
@@ -165,7 +269,7 @@ struct vfsmount *nfs_d_automount(struct path *path) | |||
165 | if (fattr->valid & NFS_ATTR_FATTR_V4_REFERRAL) | 269 | if (fattr->valid & NFS_ATTR_FATTR_V4_REFERRAL) |
166 | mnt = nfs_do_refmount(path->dentry); | 270 | mnt = nfs_do_refmount(path->dentry); |
167 | else | 271 | else |
168 | mnt = nfs_do_submount(path->dentry, fh, fattr); | 272 | mnt = nfs_do_submount(path->dentry, fh, fattr, flavor); |
169 | if (IS_ERR(mnt)) | 273 | if (IS_ERR(mnt)) |
170 | goto out; | 274 | goto out; |
171 | 275 | ||
@@ -232,17 +336,20 @@ static struct vfsmount *nfs_do_clone_mount(struct nfs_server *server, | |||
232 | * @dentry - parent directory | 336 | * @dentry - parent directory |
233 | * @fh - filehandle for new root dentry | 337 | * @fh - filehandle for new root dentry |
234 | * @fattr - attributes for new root inode | 338 | * @fattr - attributes for new root inode |
339 | * @authflavor - security flavor to use when performing the mount | ||
235 | * | 340 | * |
236 | */ | 341 | */ |
237 | static struct vfsmount *nfs_do_submount(struct dentry *dentry, | 342 | static struct vfsmount *nfs_do_submount(struct dentry *dentry, |
238 | struct nfs_fh *fh, | 343 | struct nfs_fh *fh, |
239 | struct nfs_fattr *fattr) | 344 | struct nfs_fattr *fattr, |
345 | rpc_authflavor_t authflavor) | ||
240 | { | 346 | { |
241 | struct nfs_clone_mount mountdata = { | 347 | struct nfs_clone_mount mountdata = { |
242 | .sb = dentry->d_sb, | 348 | .sb = dentry->d_sb, |
243 | .dentry = dentry, | 349 | .dentry = dentry, |
244 | .fh = fh, | 350 | .fh = fh, |
245 | .fattr = fattr, | 351 | .fattr = fattr, |
352 | .authflavor = authflavor, | ||
246 | }; | 353 | }; |
247 | struct vfsmount *mnt = ERR_PTR(-ENOMEM); | 354 | struct vfsmount *mnt = ERR_PTR(-ENOMEM); |
248 | char *page = (char *) __get_free_page(GFP_USER); | 355 | char *page = (char *) __get_free_page(GFP_USER); |
diff --git a/fs/nfs/nfs3proc.c b/fs/nfs/nfs3proc.c index d0c80d8b3f96..38053d823eb0 100644 --- a/fs/nfs/nfs3proc.c +++ b/fs/nfs/nfs3proc.c | |||
@@ -141,7 +141,7 @@ nfs3_proc_setattr(struct dentry *dentry, struct nfs_fattr *fattr, | |||
141 | } | 141 | } |
142 | 142 | ||
143 | static int | 143 | static int |
144 | nfs3_proc_lookup(struct inode *dir, struct qstr *name, | 144 | nfs3_proc_lookup(struct rpc_clnt *clnt, struct inode *dir, struct qstr *name, |
145 | struct nfs_fh *fhandle, struct nfs_fattr *fattr) | 145 | struct nfs_fh *fhandle, struct nfs_fattr *fattr) |
146 | { | 146 | { |
147 | struct nfs3_diropargs arg = { | 147 | struct nfs3_diropargs arg = { |
diff --git a/fs/nfs/nfs4_fs.h b/fs/nfs/nfs4_fs.h index c64be1cff080..e1c261ddd65d 100644 --- a/fs/nfs/nfs4_fs.h +++ b/fs/nfs/nfs4_fs.h | |||
@@ -57,7 +57,8 @@ enum nfs4_session_state { | |||
57 | struct nfs4_minor_version_ops { | 57 | struct nfs4_minor_version_ops { |
58 | u32 minor_version; | 58 | u32 minor_version; |
59 | 59 | ||
60 | int (*call_sync)(struct nfs_server *server, | 60 | int (*call_sync)(struct rpc_clnt *clnt, |
61 | struct nfs_server *server, | ||
61 | struct rpc_message *msg, | 62 | struct rpc_message *msg, |
62 | struct nfs4_sequence_args *args, | 63 | struct nfs4_sequence_args *args, |
63 | struct nfs4_sequence_res *res, | 64 | struct nfs4_sequence_res *res, |
@@ -262,6 +263,8 @@ extern int nfs4_proc_destroy_session(struct nfs4_session *); | |||
262 | extern int nfs4_init_session(struct nfs_server *server); | 263 | extern int nfs4_init_session(struct nfs_server *server); |
263 | extern int nfs4_proc_get_lease_time(struct nfs_client *clp, | 264 | extern int nfs4_proc_get_lease_time(struct nfs_client *clp, |
264 | struct nfs_fsinfo *fsinfo); | 265 | struct nfs_fsinfo *fsinfo); |
266 | extern int nfs4_proc_layoutcommit(struct nfs4_layoutcommit_data *data, | ||
267 | bool sync); | ||
265 | 268 | ||
266 | static inline bool | 269 | static inline bool |
267 | is_ds_only_client(struct nfs_client *clp) | 270 | is_ds_only_client(struct nfs_client *clp) |
diff --git a/fs/nfs/nfs4filelayout.c b/fs/nfs/nfs4filelayout.c index 428558464817..6f8192f4cfc7 100644 --- a/fs/nfs/nfs4filelayout.c +++ b/fs/nfs/nfs4filelayout.c | |||
@@ -154,6 +154,23 @@ static int filelayout_read_done_cb(struct rpc_task *task, | |||
154 | } | 154 | } |
155 | 155 | ||
156 | /* | 156 | /* |
157 | * We reference the rpc_cred of the first WRITE that triggers the need for | ||
158 | * a LAYOUTCOMMIT, and use it to send the layoutcommit compound. | ||
159 | * rfc5661 is not clear about which credential should be used. | ||
160 | */ | ||
161 | static void | ||
162 | filelayout_set_layoutcommit(struct nfs_write_data *wdata) | ||
163 | { | ||
164 | if (FILELAYOUT_LSEG(wdata->lseg)->commit_through_mds || | ||
165 | wdata->res.verf->committed == NFS_FILE_SYNC) | ||
166 | return; | ||
167 | |||
168 | pnfs_set_layoutcommit(wdata); | ||
169 | dprintk("%s ionde %lu pls_end_pos %lu\n", __func__, wdata->inode->i_ino, | ||
170 | (unsigned long) wdata->lseg->pls_end_pos); | ||
171 | } | ||
172 | |||
173 | /* | ||
157 | * Call ops for the async read/write cases | 174 | * Call ops for the async read/write cases |
158 | * In the case of dense layouts, the offset needs to be reset to its | 175 | * In the case of dense layouts, the offset needs to be reset to its |
159 | * original value. | 176 | * original value. |
@@ -210,6 +227,38 @@ static int filelayout_write_done_cb(struct rpc_task *task, | |||
210 | return -EAGAIN; | 227 | return -EAGAIN; |
211 | } | 228 | } |
212 | 229 | ||
230 | filelayout_set_layoutcommit(data); | ||
231 | return 0; | ||
232 | } | ||
233 | |||
234 | /* Fake up some data that will cause nfs_commit_release to retry the writes. */ | ||
235 | static void prepare_to_resend_writes(struct nfs_write_data *data) | ||
236 | { | ||
237 | struct nfs_page *first = nfs_list_entry(data->pages.next); | ||
238 | |||
239 | data->task.tk_status = 0; | ||
240 | memcpy(data->verf.verifier, first->wb_verf.verifier, | ||
241 | sizeof(first->wb_verf.verifier)); | ||
242 | data->verf.verifier[0]++; /* ensure verifier mismatch */ | ||
243 | } | ||
244 | |||
245 | static int filelayout_commit_done_cb(struct rpc_task *task, | ||
246 | struct nfs_write_data *data) | ||
247 | { | ||
248 | int reset = 0; | ||
249 | |||
250 | if (filelayout_async_handle_error(task, data->args.context->state, | ||
251 | data->ds_clp, &reset) == -EAGAIN) { | ||
252 | dprintk("%s calling restart ds_clp %p ds_clp->cl_session %p\n", | ||
253 | __func__, data->ds_clp, data->ds_clp->cl_session); | ||
254 | if (reset) { | ||
255 | prepare_to_resend_writes(data); | ||
256 | filelayout_set_lo_fail(data->lseg); | ||
257 | } else | ||
258 | nfs_restart_rpc(task, data->ds_clp); | ||
259 | return -EAGAIN; | ||
260 | } | ||
261 | |||
213 | return 0; | 262 | return 0; |
214 | } | 263 | } |
215 | 264 | ||
@@ -240,6 +289,16 @@ static void filelayout_write_release(void *data) | |||
240 | wdata->mds_ops->rpc_release(data); | 289 | wdata->mds_ops->rpc_release(data); |
241 | } | 290 | } |
242 | 291 | ||
292 | static void filelayout_commit_release(void *data) | ||
293 | { | ||
294 | struct nfs_write_data *wdata = (struct nfs_write_data *)data; | ||
295 | |||
296 | nfs_commit_release_pages(wdata); | ||
297 | if (atomic_dec_and_test(&NFS_I(wdata->inode)->commits_outstanding)) | ||
298 | nfs_commit_clear_lock(NFS_I(wdata->inode)); | ||
299 | nfs_commitdata_release(wdata); | ||
300 | } | ||
301 | |||
243 | struct rpc_call_ops filelayout_read_call_ops = { | 302 | struct rpc_call_ops filelayout_read_call_ops = { |
244 | .rpc_call_prepare = filelayout_read_prepare, | 303 | .rpc_call_prepare = filelayout_read_prepare, |
245 | .rpc_call_done = filelayout_read_call_done, | 304 | .rpc_call_done = filelayout_read_call_done, |
@@ -252,6 +311,12 @@ struct rpc_call_ops filelayout_write_call_ops = { | |||
252 | .rpc_release = filelayout_write_release, | 311 | .rpc_release = filelayout_write_release, |
253 | }; | 312 | }; |
254 | 313 | ||
314 | struct rpc_call_ops filelayout_commit_call_ops = { | ||
315 | .rpc_call_prepare = filelayout_write_prepare, | ||
316 | .rpc_call_done = filelayout_write_call_done, | ||
317 | .rpc_release = filelayout_commit_release, | ||
318 | }; | ||
319 | |||
255 | static enum pnfs_try_status | 320 | static enum pnfs_try_status |
256 | filelayout_read_pagelist(struct nfs_read_data *data) | 321 | filelayout_read_pagelist(struct nfs_read_data *data) |
257 | { | 322 | { |
@@ -320,10 +385,6 @@ filelayout_write_pagelist(struct nfs_write_data *data, int sync) | |||
320 | data->inode->i_ino, sync, (size_t) data->args.count, offset, | 385 | data->inode->i_ino, sync, (size_t) data->args.count, offset, |
321 | ntohl(ds->ds_ip_addr), ntohs(ds->ds_port)); | 386 | ntohl(ds->ds_ip_addr), ntohs(ds->ds_port)); |
322 | 387 | ||
323 | /* We can't handle commit to ds yet */ | ||
324 | if (!FILELAYOUT_LSEG(lseg)->commit_through_mds) | ||
325 | data->args.stable = NFS_FILE_SYNC; | ||
326 | |||
327 | data->write_done_cb = filelayout_write_done_cb; | 388 | data->write_done_cb = filelayout_write_done_cb; |
328 | data->ds_clp = ds->ds_clp; | 389 | data->ds_clp = ds->ds_clp; |
329 | fh = nfs4_fl_select_ds_fh(lseg, j); | 390 | fh = nfs4_fl_select_ds_fh(lseg, j); |
@@ -441,12 +502,33 @@ filelayout_decode_layout(struct pnfs_layout_hdr *flo, | |||
441 | struct nfs4_layoutget_res *lgr, | 502 | struct nfs4_layoutget_res *lgr, |
442 | struct nfs4_deviceid *id) | 503 | struct nfs4_deviceid *id) |
443 | { | 504 | { |
444 | uint32_t *p = (uint32_t *)lgr->layout.buf; | 505 | struct xdr_stream stream; |
506 | struct xdr_buf buf = { | ||
507 | .pages = lgr->layoutp->pages, | ||
508 | .page_len = lgr->layoutp->len, | ||
509 | .buflen = lgr->layoutp->len, | ||
510 | .len = lgr->layoutp->len, | ||
511 | }; | ||
512 | struct page *scratch; | ||
513 | __be32 *p; | ||
445 | uint32_t nfl_util; | 514 | uint32_t nfl_util; |
446 | int i; | 515 | int i; |
447 | 516 | ||
448 | dprintk("%s: set_layout_map Begin\n", __func__); | 517 | dprintk("%s: set_layout_map Begin\n", __func__); |
449 | 518 | ||
519 | scratch = alloc_page(GFP_KERNEL); | ||
520 | if (!scratch) | ||
521 | return -ENOMEM; | ||
522 | |||
523 | xdr_init_decode(&stream, &buf, NULL); | ||
524 | xdr_set_scratch_buffer(&stream, page_address(scratch), PAGE_SIZE); | ||
525 | |||
526 | /* 20 = ufl_util (4), first_stripe_index (4), pattern_offset (8), | ||
527 | * num_fh (4) */ | ||
528 | p = xdr_inline_decode(&stream, NFS4_DEVICEID4_SIZE + 20); | ||
529 | if (unlikely(!p)) | ||
530 | goto out_err; | ||
531 | |||
450 | memcpy(id, p, sizeof(*id)); | 532 | memcpy(id, p, sizeof(*id)); |
451 | p += XDR_QUADLEN(NFS4_DEVICEID4_SIZE); | 533 | p += XDR_QUADLEN(NFS4_DEVICEID4_SIZE); |
452 | print_deviceid(id); | 534 | print_deviceid(id); |
@@ -468,32 +550,57 @@ filelayout_decode_layout(struct pnfs_layout_hdr *flo, | |||
468 | __func__, nfl_util, fl->num_fh, fl->first_stripe_index, | 550 | __func__, nfl_util, fl->num_fh, fl->first_stripe_index, |
469 | fl->pattern_offset); | 551 | fl->pattern_offset); |
470 | 552 | ||
553 | if (!fl->num_fh) | ||
554 | goto out_err; | ||
555 | |||
471 | fl->fh_array = kzalloc(fl->num_fh * sizeof(struct nfs_fh *), | 556 | fl->fh_array = kzalloc(fl->num_fh * sizeof(struct nfs_fh *), |
472 | GFP_KERNEL); | 557 | GFP_KERNEL); |
473 | if (!fl->fh_array) | 558 | if (!fl->fh_array) |
474 | return -ENOMEM; | 559 | goto out_err; |
475 | 560 | ||
476 | for (i = 0; i < fl->num_fh; i++) { | 561 | for (i = 0; i < fl->num_fh; i++) { |
477 | /* Do we want to use a mempool here? */ | 562 | /* Do we want to use a mempool here? */ |
478 | fl->fh_array[i] = kmalloc(sizeof(struct nfs_fh), GFP_KERNEL); | 563 | fl->fh_array[i] = kmalloc(sizeof(struct nfs_fh), GFP_KERNEL); |
479 | if (!fl->fh_array[i]) { | 564 | if (!fl->fh_array[i]) |
480 | filelayout_free_fh_array(fl); | 565 | goto out_err_free; |
481 | return -ENOMEM; | 566 | |
482 | } | 567 | p = xdr_inline_decode(&stream, 4); |
568 | if (unlikely(!p)) | ||
569 | goto out_err_free; | ||
483 | fl->fh_array[i]->size = be32_to_cpup(p++); | 570 | fl->fh_array[i]->size = be32_to_cpup(p++); |
484 | if (sizeof(struct nfs_fh) < fl->fh_array[i]->size) { | 571 | if (sizeof(struct nfs_fh) < fl->fh_array[i]->size) { |
485 | printk(KERN_ERR "Too big fh %d received %d\n", | 572 | printk(KERN_ERR "Too big fh %d received %d\n", |
486 | i, fl->fh_array[i]->size); | 573 | i, fl->fh_array[i]->size); |
487 | filelayout_free_fh_array(fl); | 574 | goto out_err_free; |
488 | return -EIO; | ||
489 | } | 575 | } |
576 | |||
577 | p = xdr_inline_decode(&stream, fl->fh_array[i]->size); | ||
578 | if (unlikely(!p)) | ||
579 | goto out_err_free; | ||
490 | memcpy(fl->fh_array[i]->data, p, fl->fh_array[i]->size); | 580 | memcpy(fl->fh_array[i]->data, p, fl->fh_array[i]->size); |
491 | p += XDR_QUADLEN(fl->fh_array[i]->size); | ||
492 | dprintk("DEBUG: %s: fh len %d\n", __func__, | 581 | dprintk("DEBUG: %s: fh len %d\n", __func__, |
493 | fl->fh_array[i]->size); | 582 | fl->fh_array[i]->size); |
494 | } | 583 | } |
495 | 584 | ||
585 | __free_page(scratch); | ||
496 | return 0; | 586 | return 0; |
587 | |||
588 | out_err_free: | ||
589 | filelayout_free_fh_array(fl); | ||
590 | out_err: | ||
591 | __free_page(scratch); | ||
592 | return -EIO; | ||
593 | } | ||
594 | |||
595 | static void | ||
596 | filelayout_free_lseg(struct pnfs_layout_segment *lseg) | ||
597 | { | ||
598 | struct nfs4_filelayout_segment *fl = FILELAYOUT_LSEG(lseg); | ||
599 | |||
600 | dprintk("--> %s\n", __func__); | ||
601 | nfs4_fl_put_deviceid(fl->dsaddr); | ||
602 | kfree(fl->commit_buckets); | ||
603 | _filelayout_free_lseg(fl); | ||
497 | } | 604 | } |
498 | 605 | ||
499 | static struct pnfs_layout_segment * | 606 | static struct pnfs_layout_segment * |
@@ -514,17 +621,28 @@ filelayout_alloc_lseg(struct pnfs_layout_hdr *layoutid, | |||
514 | _filelayout_free_lseg(fl); | 621 | _filelayout_free_lseg(fl); |
515 | return NULL; | 622 | return NULL; |
516 | } | 623 | } |
517 | return &fl->generic_hdr; | ||
518 | } | ||
519 | 624 | ||
520 | static void | 625 | /* This assumes there is only one IOMODE_RW lseg. What |
521 | filelayout_free_lseg(struct pnfs_layout_segment *lseg) | 626 | * we really want to do is have a layout_hdr level |
522 | { | 627 | * dictionary of <multipath_list4, fh> keys, each |
523 | struct nfs4_filelayout_segment *fl = FILELAYOUT_LSEG(lseg); | 628 | * associated with a struct list_head, populated by calls |
524 | 629 | * to filelayout_write_pagelist(). | |
525 | dprintk("--> %s\n", __func__); | 630 | * */ |
526 | nfs4_fl_put_deviceid(fl->dsaddr); | 631 | if ((!fl->commit_through_mds) && (lgr->range.iomode == IOMODE_RW)) { |
527 | _filelayout_free_lseg(fl); | 632 | int i; |
633 | int size = (fl->stripe_type == STRIPE_SPARSE) ? | ||
634 | fl->dsaddr->ds_num : fl->dsaddr->stripe_count; | ||
635 | |||
636 | fl->commit_buckets = kcalloc(size, sizeof(struct list_head), GFP_KERNEL); | ||
637 | if (!fl->commit_buckets) { | ||
638 | filelayout_free_lseg(&fl->generic_hdr); | ||
639 | return NULL; | ||
640 | } | ||
641 | fl->number_of_buckets = size; | ||
642 | for (i = 0; i < size; i++) | ||
643 | INIT_LIST_HEAD(&fl->commit_buckets[i]); | ||
644 | } | ||
645 | return &fl->generic_hdr; | ||
528 | } | 646 | } |
529 | 647 | ||
530 | /* | 648 | /* |
@@ -552,6 +670,191 @@ filelayout_pg_test(struct nfs_pageio_descriptor *pgio, struct nfs_page *prev, | |||
552 | return (p_stripe == r_stripe); | 670 | return (p_stripe == r_stripe); |
553 | } | 671 | } |
554 | 672 | ||
673 | static bool filelayout_mark_pnfs_commit(struct pnfs_layout_segment *lseg) | ||
674 | { | ||
675 | return !FILELAYOUT_LSEG(lseg)->commit_through_mds; | ||
676 | } | ||
677 | |||
678 | static u32 select_bucket_index(struct nfs4_filelayout_segment *fl, u32 j) | ||
679 | { | ||
680 | if (fl->stripe_type == STRIPE_SPARSE) | ||
681 | return nfs4_fl_calc_ds_index(&fl->generic_hdr, j); | ||
682 | else | ||
683 | return j; | ||
684 | } | ||
685 | |||
686 | struct list_head *filelayout_choose_commit_list(struct nfs_page *req) | ||
687 | { | ||
688 | struct pnfs_layout_segment *lseg = req->wb_commit_lseg; | ||
689 | struct nfs4_filelayout_segment *fl = FILELAYOUT_LSEG(lseg); | ||
690 | u32 i, j; | ||
691 | struct list_head *list; | ||
692 | |||
693 | /* Note that we are calling nfs4_fl_calc_j_index on each page | ||
694 | * that ends up being committed to a data server. An attractive | ||
695 | * alternative is to add a field to nfs_write_data and nfs_page | ||
696 | * to store the value calculated in filelayout_write_pagelist | ||
697 | * and just use that here. | ||
698 | */ | ||
699 | j = nfs4_fl_calc_j_index(lseg, | ||
700 | (loff_t)req->wb_index << PAGE_CACHE_SHIFT); | ||
701 | i = select_bucket_index(fl, j); | ||
702 | list = &fl->commit_buckets[i]; | ||
703 | if (list_empty(list)) { | ||
704 | /* Non-empty buckets hold a reference on the lseg */ | ||
705 | get_lseg(lseg); | ||
706 | } | ||
707 | return list; | ||
708 | } | ||
709 | |||
710 | static u32 calc_ds_index_from_commit(struct pnfs_layout_segment *lseg, u32 i) | ||
711 | { | ||
712 | struct nfs4_filelayout_segment *flseg = FILELAYOUT_LSEG(lseg); | ||
713 | |||
714 | if (flseg->stripe_type == STRIPE_SPARSE) | ||
715 | return i; | ||
716 | else | ||
717 | return nfs4_fl_calc_ds_index(lseg, i); | ||
718 | } | ||
719 | |||
720 | static struct nfs_fh * | ||
721 | select_ds_fh_from_commit(struct pnfs_layout_segment *lseg, u32 i) | ||
722 | { | ||
723 | struct nfs4_filelayout_segment *flseg = FILELAYOUT_LSEG(lseg); | ||
724 | |||
725 | if (flseg->stripe_type == STRIPE_SPARSE) { | ||
726 | if (flseg->num_fh == 1) | ||
727 | i = 0; | ||
728 | else if (flseg->num_fh == 0) | ||
729 | /* Use the MDS OPEN fh set in nfs_read_rpcsetup */ | ||
730 | return NULL; | ||
731 | } | ||
732 | return flseg->fh_array[i]; | ||
733 | } | ||
734 | |||
735 | static int filelayout_initiate_commit(struct nfs_write_data *data, int how) | ||
736 | { | ||
737 | struct pnfs_layout_segment *lseg = data->lseg; | ||
738 | struct nfs4_pnfs_ds *ds; | ||
739 | u32 idx; | ||
740 | struct nfs_fh *fh; | ||
741 | |||
742 | idx = calc_ds_index_from_commit(lseg, data->ds_commit_index); | ||
743 | ds = nfs4_fl_prepare_ds(lseg, idx); | ||
744 | if (!ds) { | ||
745 | printk(KERN_ERR "%s: prepare_ds failed, use MDS\n", __func__); | ||
746 | set_bit(lo_fail_bit(IOMODE_RW), &lseg->pls_layout->plh_flags); | ||
747 | set_bit(lo_fail_bit(IOMODE_READ), &lseg->pls_layout->plh_flags); | ||
748 | prepare_to_resend_writes(data); | ||
749 | data->mds_ops->rpc_release(data); | ||
750 | return -EAGAIN; | ||
751 | } | ||
752 | dprintk("%s ino %lu, how %d\n", __func__, data->inode->i_ino, how); | ||
753 | data->write_done_cb = filelayout_commit_done_cb; | ||
754 | data->ds_clp = ds->ds_clp; | ||
755 | fh = select_ds_fh_from_commit(lseg, data->ds_commit_index); | ||
756 | if (fh) | ||
757 | data->args.fh = fh; | ||
758 | return nfs_initiate_commit(data, ds->ds_clp->cl_rpcclient, | ||
759 | &filelayout_commit_call_ops, how); | ||
760 | } | ||
761 | |||
762 | /* | ||
763 | * This is only useful while we are using whole file layouts. | ||
764 | */ | ||
765 | static struct pnfs_layout_segment *find_only_write_lseg(struct inode *inode) | ||
766 | { | ||
767 | struct pnfs_layout_segment *lseg, *rv = NULL; | ||
768 | |||
769 | spin_lock(&inode->i_lock); | ||
770 | list_for_each_entry(lseg, &NFS_I(inode)->layout->plh_segs, pls_list) | ||
771 | if (lseg->pls_range.iomode == IOMODE_RW) | ||
772 | rv = get_lseg(lseg); | ||
773 | spin_unlock(&inode->i_lock); | ||
774 | return rv; | ||
775 | } | ||
776 | |||
777 | static int alloc_ds_commits(struct inode *inode, struct list_head *list) | ||
778 | { | ||
779 | struct pnfs_layout_segment *lseg; | ||
780 | struct nfs4_filelayout_segment *fl; | ||
781 | struct nfs_write_data *data; | ||
782 | int i, j; | ||
783 | |||
784 | /* Won't need this when non-whole file layout segments are supported | ||
785 | * instead we will use a pnfs_layout_hdr structure */ | ||
786 | lseg = find_only_write_lseg(inode); | ||
787 | if (!lseg) | ||
788 | return 0; | ||
789 | fl = FILELAYOUT_LSEG(lseg); | ||
790 | for (i = 0; i < fl->number_of_buckets; i++) { | ||
791 | if (list_empty(&fl->commit_buckets[i])) | ||
792 | continue; | ||
793 | data = nfs_commitdata_alloc(); | ||
794 | if (!data) | ||
795 | goto out_bad; | ||
796 | data->ds_commit_index = i; | ||
797 | data->lseg = lseg; | ||
798 | list_add(&data->pages, list); | ||
799 | } | ||
800 | put_lseg(lseg); | ||
801 | return 0; | ||
802 | |||
803 | out_bad: | ||
804 | for (j = i; j < fl->number_of_buckets; j++) { | ||
805 | if (list_empty(&fl->commit_buckets[i])) | ||
806 | continue; | ||
807 | nfs_retry_commit(&fl->commit_buckets[i], lseg); | ||
808 | put_lseg(lseg); /* associated with emptying bucket */ | ||
809 | } | ||
810 | put_lseg(lseg); | ||
811 | /* Caller will clean up entries put on list */ | ||
812 | return -ENOMEM; | ||
813 | } | ||
814 | |||
815 | /* This follows nfs_commit_list pretty closely */ | ||
816 | static int | ||
817 | filelayout_commit_pagelist(struct inode *inode, struct list_head *mds_pages, | ||
818 | int how) | ||
819 | { | ||
820 | struct nfs_write_data *data, *tmp; | ||
821 | LIST_HEAD(list); | ||
822 | |||
823 | if (!list_empty(mds_pages)) { | ||
824 | data = nfs_commitdata_alloc(); | ||
825 | if (!data) | ||
826 | goto out_bad; | ||
827 | data->lseg = NULL; | ||
828 | list_add(&data->pages, &list); | ||
829 | } | ||
830 | |||
831 | if (alloc_ds_commits(inode, &list)) | ||
832 | goto out_bad; | ||
833 | |||
834 | list_for_each_entry_safe(data, tmp, &list, pages) { | ||
835 | list_del_init(&data->pages); | ||
836 | atomic_inc(&NFS_I(inode)->commits_outstanding); | ||
837 | if (!data->lseg) { | ||
838 | nfs_init_commit(data, mds_pages, NULL); | ||
839 | nfs_initiate_commit(data, NFS_CLIENT(inode), | ||
840 | data->mds_ops, how); | ||
841 | } else { | ||
842 | nfs_init_commit(data, &FILELAYOUT_LSEG(data->lseg)->commit_buckets[data->ds_commit_index], data->lseg); | ||
843 | filelayout_initiate_commit(data, how); | ||
844 | } | ||
845 | } | ||
846 | return 0; | ||
847 | out_bad: | ||
848 | list_for_each_entry_safe(data, tmp, &list, pages) { | ||
849 | nfs_retry_commit(&data->pages, data->lseg); | ||
850 | list_del_init(&data->pages); | ||
851 | nfs_commit_free(data); | ||
852 | } | ||
853 | nfs_retry_commit(mds_pages, NULL); | ||
854 | nfs_commit_clear_lock(NFS_I(inode)); | ||
855 | return -ENOMEM; | ||
856 | } | ||
857 | |||
555 | static struct pnfs_layoutdriver_type filelayout_type = { | 858 | static struct pnfs_layoutdriver_type filelayout_type = { |
556 | .id = LAYOUT_NFSV4_1_FILES, | 859 | .id = LAYOUT_NFSV4_1_FILES, |
557 | .name = "LAYOUT_NFSV4_1_FILES", | 860 | .name = "LAYOUT_NFSV4_1_FILES", |
@@ -559,6 +862,9 @@ static struct pnfs_layoutdriver_type filelayout_type = { | |||
559 | .alloc_lseg = filelayout_alloc_lseg, | 862 | .alloc_lseg = filelayout_alloc_lseg, |
560 | .free_lseg = filelayout_free_lseg, | 863 | .free_lseg = filelayout_free_lseg, |
561 | .pg_test = filelayout_pg_test, | 864 | .pg_test = filelayout_pg_test, |
865 | .mark_pnfs_commit = filelayout_mark_pnfs_commit, | ||
866 | .choose_commit_list = filelayout_choose_commit_list, | ||
867 | .commit_pagelist = filelayout_commit_pagelist, | ||
562 | .read_pagelist = filelayout_read_pagelist, | 868 | .read_pagelist = filelayout_read_pagelist, |
563 | .write_pagelist = filelayout_write_pagelist, | 869 | .write_pagelist = filelayout_write_pagelist, |
564 | }; | 870 | }; |
diff --git a/fs/nfs/nfs4filelayout.h b/fs/nfs/nfs4filelayout.h index ee0c907742b5..085a354e0f08 100644 --- a/fs/nfs/nfs4filelayout.h +++ b/fs/nfs/nfs4filelayout.h | |||
@@ -79,6 +79,8 @@ struct nfs4_filelayout_segment { | |||
79 | struct nfs4_file_layout_dsaddr *dsaddr; /* Point to GETDEVINFO data */ | 79 | struct nfs4_file_layout_dsaddr *dsaddr; /* Point to GETDEVINFO data */ |
80 | unsigned int num_fh; | 80 | unsigned int num_fh; |
81 | struct nfs_fh **fh_array; | 81 | struct nfs_fh **fh_array; |
82 | struct list_head *commit_buckets; /* Sort commits to ds */ | ||
83 | int number_of_buckets; | ||
82 | }; | 84 | }; |
83 | 85 | ||
84 | static inline struct nfs4_filelayout_segment * | 86 | static inline struct nfs4_filelayout_segment * |
diff --git a/fs/nfs/nfs4filelayoutdev.c b/fs/nfs/nfs4filelayoutdev.c index 68143c162e3b..de5350f2b249 100644 --- a/fs/nfs/nfs4filelayoutdev.c +++ b/fs/nfs/nfs4filelayoutdev.c | |||
@@ -261,7 +261,7 @@ out: | |||
261 | * Currently only support ipv4, and one multi-path address. | 261 | * Currently only support ipv4, and one multi-path address. |
262 | */ | 262 | */ |
263 | static struct nfs4_pnfs_ds * | 263 | static struct nfs4_pnfs_ds * |
264 | decode_and_add_ds(__be32 **pp, struct inode *inode) | 264 | decode_and_add_ds(struct xdr_stream *streamp, struct inode *inode) |
265 | { | 265 | { |
266 | struct nfs4_pnfs_ds *ds = NULL; | 266 | struct nfs4_pnfs_ds *ds = NULL; |
267 | char *buf; | 267 | char *buf; |
@@ -269,25 +269,34 @@ decode_and_add_ds(__be32 **pp, struct inode *inode) | |||
269 | u32 ip_addr, port; | 269 | u32 ip_addr, port; |
270 | int nlen, rlen, i; | 270 | int nlen, rlen, i; |
271 | int tmp[2]; | 271 | int tmp[2]; |
272 | __be32 *r_netid, *r_addr, *p = *pp; | 272 | __be32 *p; |
273 | 273 | ||
274 | /* r_netid */ | 274 | /* r_netid */ |
275 | p = xdr_inline_decode(streamp, 4); | ||
276 | if (unlikely(!p)) | ||
277 | goto out_err; | ||
275 | nlen = be32_to_cpup(p++); | 278 | nlen = be32_to_cpup(p++); |
276 | r_netid = p; | ||
277 | p += XDR_QUADLEN(nlen); | ||
278 | 279 | ||
279 | /* r_addr */ | 280 | p = xdr_inline_decode(streamp, nlen); |
280 | rlen = be32_to_cpup(p++); | 281 | if (unlikely(!p)) |
281 | r_addr = p; | 282 | goto out_err; |
282 | p += XDR_QUADLEN(rlen); | ||
283 | *pp = p; | ||
284 | 283 | ||
285 | /* Check that netid is "tcp" */ | 284 | /* Check that netid is "tcp" */ |
286 | if (nlen != 3 || memcmp((char *)r_netid, "tcp", 3)) { | 285 | if (nlen != 3 || memcmp((char *)p, "tcp", 3)) { |
287 | dprintk("%s: ERROR: non ipv4 TCP r_netid\n", __func__); | 286 | dprintk("%s: ERROR: non ipv4 TCP r_netid\n", __func__); |
288 | goto out_err; | 287 | goto out_err; |
289 | } | 288 | } |
290 | 289 | ||
290 | /* r_addr */ | ||
291 | p = xdr_inline_decode(streamp, 4); | ||
292 | if (unlikely(!p)) | ||
293 | goto out_err; | ||
294 | rlen = be32_to_cpup(p); | ||
295 | |||
296 | p = xdr_inline_decode(streamp, rlen); | ||
297 | if (unlikely(!p)) | ||
298 | goto out_err; | ||
299 | |||
291 | /* ipv6 length plus port is legal */ | 300 | /* ipv6 length plus port is legal */ |
292 | if (rlen > INET6_ADDRSTRLEN + 8) { | 301 | if (rlen > INET6_ADDRSTRLEN + 8) { |
293 | dprintk("%s: Invalid address, length %d\n", __func__, | 302 | dprintk("%s: Invalid address, length %d\n", __func__, |
@@ -300,7 +309,7 @@ decode_and_add_ds(__be32 **pp, struct inode *inode) | |||
300 | goto out_err; | 309 | goto out_err; |
301 | } | 310 | } |
302 | buf[rlen] = '\0'; | 311 | buf[rlen] = '\0'; |
303 | memcpy(buf, r_addr, rlen); | 312 | memcpy(buf, p, rlen); |
304 | 313 | ||
305 | /* replace the port dots with dashes for the in4_pton() delimiter*/ | 314 | /* replace the port dots with dashes for the in4_pton() delimiter*/ |
306 | for (i = 0; i < 2; i++) { | 315 | for (i = 0; i < 2; i++) { |
@@ -336,90 +345,154 @@ out_err: | |||
336 | static struct nfs4_file_layout_dsaddr* | 345 | static struct nfs4_file_layout_dsaddr* |
337 | decode_device(struct inode *ino, struct pnfs_device *pdev) | 346 | decode_device(struct inode *ino, struct pnfs_device *pdev) |
338 | { | 347 | { |
339 | int i, dummy; | 348 | int i; |
340 | u32 cnt, num; | 349 | u32 cnt, num; |
341 | u8 *indexp; | 350 | u8 *indexp; |
342 | __be32 *p = (__be32 *)pdev->area, *indicesp; | 351 | __be32 *p; |
343 | struct nfs4_file_layout_dsaddr *dsaddr; | 352 | u8 *stripe_indices; |
353 | u8 max_stripe_index; | ||
354 | struct nfs4_file_layout_dsaddr *dsaddr = NULL; | ||
355 | struct xdr_stream stream; | ||
356 | struct xdr_buf buf = { | ||
357 | .pages = pdev->pages, | ||
358 | .page_len = pdev->pglen, | ||
359 | .buflen = pdev->pglen, | ||
360 | .len = pdev->pglen, | ||
361 | }; | ||
362 | struct page *scratch; | ||
363 | |||
364 | /* set up xdr stream */ | ||
365 | scratch = alloc_page(GFP_KERNEL); | ||
366 | if (!scratch) | ||
367 | goto out_err; | ||
368 | |||
369 | xdr_init_decode(&stream, &buf, NULL); | ||
370 | xdr_set_scratch_buffer(&stream, page_address(scratch), PAGE_SIZE); | ||
344 | 371 | ||
345 | /* Get the stripe count (number of stripe index) */ | 372 | /* Get the stripe count (number of stripe index) */ |
346 | cnt = be32_to_cpup(p++); | 373 | p = xdr_inline_decode(&stream, 4); |
374 | if (unlikely(!p)) | ||
375 | goto out_err_free_scratch; | ||
376 | |||
377 | cnt = be32_to_cpup(p); | ||
347 | dprintk("%s stripe count %d\n", __func__, cnt); | 378 | dprintk("%s stripe count %d\n", __func__, cnt); |
348 | if (cnt > NFS4_PNFS_MAX_STRIPE_CNT) { | 379 | if (cnt > NFS4_PNFS_MAX_STRIPE_CNT) { |
349 | printk(KERN_WARNING "%s: stripe count %d greater than " | 380 | printk(KERN_WARNING "%s: stripe count %d greater than " |
350 | "supported maximum %d\n", __func__, | 381 | "supported maximum %d\n", __func__, |
351 | cnt, NFS4_PNFS_MAX_STRIPE_CNT); | 382 | cnt, NFS4_PNFS_MAX_STRIPE_CNT); |
352 | goto out_err; | 383 | goto out_err_free_scratch; |
384 | } | ||
385 | |||
386 | /* read stripe indices */ | ||
387 | stripe_indices = kcalloc(cnt, sizeof(u8), GFP_KERNEL); | ||
388 | if (!stripe_indices) | ||
389 | goto out_err_free_scratch; | ||
390 | |||
391 | p = xdr_inline_decode(&stream, cnt << 2); | ||
392 | if (unlikely(!p)) | ||
393 | goto out_err_free_stripe_indices; | ||
394 | |||
395 | indexp = &stripe_indices[0]; | ||
396 | max_stripe_index = 0; | ||
397 | for (i = 0; i < cnt; i++) { | ||
398 | *indexp = be32_to_cpup(p++); | ||
399 | max_stripe_index = max(max_stripe_index, *indexp); | ||
400 | indexp++; | ||
353 | } | 401 | } |
354 | 402 | ||
355 | /* Check the multipath list count */ | 403 | /* Check the multipath list count */ |
356 | indicesp = p; | 404 | p = xdr_inline_decode(&stream, 4); |
357 | p += XDR_QUADLEN(cnt << 2); | 405 | if (unlikely(!p)) |
358 | num = be32_to_cpup(p++); | 406 | goto out_err_free_stripe_indices; |
407 | |||
408 | num = be32_to_cpup(p); | ||
359 | dprintk("%s ds_num %u\n", __func__, num); | 409 | dprintk("%s ds_num %u\n", __func__, num); |
360 | if (num > NFS4_PNFS_MAX_MULTI_CNT) { | 410 | if (num > NFS4_PNFS_MAX_MULTI_CNT) { |
361 | printk(KERN_WARNING "%s: multipath count %d greater than " | 411 | printk(KERN_WARNING "%s: multipath count %d greater than " |
362 | "supported maximum %d\n", __func__, | 412 | "supported maximum %d\n", __func__, |
363 | num, NFS4_PNFS_MAX_MULTI_CNT); | 413 | num, NFS4_PNFS_MAX_MULTI_CNT); |
364 | goto out_err; | 414 | goto out_err_free_stripe_indices; |
365 | } | 415 | } |
416 | |||
417 | /* validate stripe indices are all < num */ | ||
418 | if (max_stripe_index >= num) { | ||
419 | printk(KERN_WARNING "%s: stripe index %u >= num ds %u\n", | ||
420 | __func__, max_stripe_index, num); | ||
421 | goto out_err_free_stripe_indices; | ||
422 | } | ||
423 | |||
366 | dsaddr = kzalloc(sizeof(*dsaddr) + | 424 | dsaddr = kzalloc(sizeof(*dsaddr) + |
367 | (sizeof(struct nfs4_pnfs_ds *) * (num - 1)), | 425 | (sizeof(struct nfs4_pnfs_ds *) * (num - 1)), |
368 | GFP_KERNEL); | 426 | GFP_KERNEL); |
369 | if (!dsaddr) | 427 | if (!dsaddr) |
370 | goto out_err; | 428 | goto out_err_free_stripe_indices; |
371 | |||
372 | dsaddr->stripe_indices = kzalloc(sizeof(u8) * cnt, GFP_KERNEL); | ||
373 | if (!dsaddr->stripe_indices) | ||
374 | goto out_err_free; | ||
375 | 429 | ||
376 | dsaddr->stripe_count = cnt; | 430 | dsaddr->stripe_count = cnt; |
431 | dsaddr->stripe_indices = stripe_indices; | ||
432 | stripe_indices = NULL; | ||
377 | dsaddr->ds_num = num; | 433 | dsaddr->ds_num = num; |
378 | 434 | ||
379 | memcpy(&dsaddr->deviceid, &pdev->dev_id, sizeof(pdev->dev_id)); | 435 | memcpy(&dsaddr->deviceid, &pdev->dev_id, sizeof(pdev->dev_id)); |
380 | 436 | ||
381 | /* Go back an read stripe indices */ | ||
382 | p = indicesp; | ||
383 | indexp = &dsaddr->stripe_indices[0]; | ||
384 | for (i = 0; i < dsaddr->stripe_count; i++) { | ||
385 | *indexp = be32_to_cpup(p++); | ||
386 | if (*indexp >= num) | ||
387 | goto out_err_free; | ||
388 | indexp++; | ||
389 | } | ||
390 | /* Skip already read multipath list count */ | ||
391 | p++; | ||
392 | |||
393 | for (i = 0; i < dsaddr->ds_num; i++) { | 437 | for (i = 0; i < dsaddr->ds_num; i++) { |
394 | int j; | 438 | int j; |
439 | u32 mp_count; | ||
440 | |||
441 | p = xdr_inline_decode(&stream, 4); | ||
442 | if (unlikely(!p)) | ||
443 | goto out_err_free_deviceid; | ||
395 | 444 | ||
396 | dummy = be32_to_cpup(p++); /* multipath count */ | 445 | mp_count = be32_to_cpup(p); /* multipath count */ |
397 | if (dummy > 1) { | 446 | if (mp_count > 1) { |
398 | printk(KERN_WARNING | 447 | printk(KERN_WARNING |
399 | "%s: Multipath count %d not supported, " | 448 | "%s: Multipath count %d not supported, " |
400 | "skipping all greater than 1\n", __func__, | 449 | "skipping all greater than 1\n", __func__, |
401 | dummy); | 450 | mp_count); |
402 | } | 451 | } |
403 | for (j = 0; j < dummy; j++) { | 452 | for (j = 0; j < mp_count; j++) { |
404 | if (j == 0) { | 453 | if (j == 0) { |
405 | dsaddr->ds_list[i] = decode_and_add_ds(&p, ino); | 454 | dsaddr->ds_list[i] = decode_and_add_ds(&stream, |
455 | ino); | ||
406 | if (dsaddr->ds_list[i] == NULL) | 456 | if (dsaddr->ds_list[i] == NULL) |
407 | goto out_err_free; | 457 | goto out_err_free_deviceid; |
408 | } else { | 458 | } else { |
409 | u32 len; | 459 | u32 len; |
410 | /* skip extra multipath */ | 460 | /* skip extra multipath */ |
411 | len = be32_to_cpup(p++); | 461 | |
412 | p += XDR_QUADLEN(len); | 462 | /* read len, skip */ |
413 | len = be32_to_cpup(p++); | 463 | p = xdr_inline_decode(&stream, 4); |
414 | p += XDR_QUADLEN(len); | 464 | if (unlikely(!p)) |
415 | continue; | 465 | goto out_err_free_deviceid; |
466 | len = be32_to_cpup(p); | ||
467 | |||
468 | p = xdr_inline_decode(&stream, len); | ||
469 | if (unlikely(!p)) | ||
470 | goto out_err_free_deviceid; | ||
471 | |||
472 | /* read len, skip */ | ||
473 | p = xdr_inline_decode(&stream, 4); | ||
474 | if (unlikely(!p)) | ||
475 | goto out_err_free_deviceid; | ||
476 | len = be32_to_cpup(p); | ||
477 | |||
478 | p = xdr_inline_decode(&stream, len); | ||
479 | if (unlikely(!p)) | ||
480 | goto out_err_free_deviceid; | ||
416 | } | 481 | } |
417 | } | 482 | } |
418 | } | 483 | } |
484 | |||
485 | __free_page(scratch); | ||
419 | return dsaddr; | 486 | return dsaddr; |
420 | 487 | ||
421 | out_err_free: | 488 | out_err_free_deviceid: |
422 | nfs4_fl_free_deviceid(dsaddr); | 489 | nfs4_fl_free_deviceid(dsaddr); |
490 | /* stripe_indicies was part of dsaddr */ | ||
491 | goto out_err_free_scratch; | ||
492 | out_err_free_stripe_indices: | ||
493 | kfree(stripe_indices); | ||
494 | out_err_free_scratch: | ||
495 | __free_page(scratch); | ||
423 | out_err: | 496 | out_err: |
424 | dprintk("%s ERROR: returning NULL\n", __func__); | 497 | dprintk("%s ERROR: returning NULL\n", __func__); |
425 | return NULL; | 498 | return NULL; |
@@ -498,11 +571,6 @@ get_device_info(struct inode *inode, struct nfs4_deviceid *dev_id) | |||
498 | goto out_free; | 571 | goto out_free; |
499 | } | 572 | } |
500 | 573 | ||
501 | /* set pdev->area */ | ||
502 | pdev->area = vmap(pages, max_pages, VM_MAP, PAGE_KERNEL); | ||
503 | if (!pdev->area) | ||
504 | goto out_free; | ||
505 | |||
506 | memcpy(&pdev->dev_id, dev_id, sizeof(*dev_id)); | 574 | memcpy(&pdev->dev_id, dev_id, sizeof(*dev_id)); |
507 | pdev->layout_type = LAYOUT_NFSV4_1_FILES; | 575 | pdev->layout_type = LAYOUT_NFSV4_1_FILES; |
508 | pdev->pages = pages; | 576 | pdev->pages = pages; |
@@ -521,8 +589,6 @@ get_device_info(struct inode *inode, struct nfs4_deviceid *dev_id) | |||
521 | */ | 589 | */ |
522 | dsaddr = decode_and_add_device(inode, pdev); | 590 | dsaddr = decode_and_add_device(inode, pdev); |
523 | out_free: | 591 | out_free: |
524 | if (pdev->area != NULL) | ||
525 | vunmap(pdev->area); | ||
526 | for (i = 0; i < max_pages; i++) | 592 | for (i = 0; i < max_pages; i++) |
527 | __free_page(pages[i]); | 593 | __free_page(pages[i]); |
528 | kfree(pages); | 594 | kfree(pages); |
diff --git a/fs/nfs/nfs4proc.c b/fs/nfs/nfs4proc.c index 1d84e7088af9..dfd1e6d7e6c3 100644 --- a/fs/nfs/nfs4proc.c +++ b/fs/nfs/nfs4proc.c | |||
@@ -41,6 +41,7 @@ | |||
41 | #include <linux/string.h> | 41 | #include <linux/string.h> |
42 | #include <linux/slab.h> | 42 | #include <linux/slab.h> |
43 | #include <linux/sunrpc/clnt.h> | 43 | #include <linux/sunrpc/clnt.h> |
44 | #include <linux/sunrpc/gss_api.h> | ||
44 | #include <linux/nfs.h> | 45 | #include <linux/nfs.h> |
45 | #include <linux/nfs4.h> | 46 | #include <linux/nfs4.h> |
46 | #include <linux/nfs_fs.h> | 47 | #include <linux/nfs_fs.h> |
@@ -71,7 +72,9 @@ static int _nfs4_proc_open(struct nfs4_opendata *data); | |||
71 | static int _nfs4_recover_proc_open(struct nfs4_opendata *data); | 72 | static int _nfs4_recover_proc_open(struct nfs4_opendata *data); |
72 | static int nfs4_do_fsinfo(struct nfs_server *, struct nfs_fh *, struct nfs_fsinfo *); | 73 | static int nfs4_do_fsinfo(struct nfs_server *, struct nfs_fh *, struct nfs_fsinfo *); |
73 | static int nfs4_async_handle_error(struct rpc_task *, const struct nfs_server *, struct nfs4_state *); | 74 | static int nfs4_async_handle_error(struct rpc_task *, const struct nfs_server *, struct nfs4_state *); |
74 | static int _nfs4_proc_lookup(struct inode *dir, const struct qstr *name, struct nfs_fh *fhandle, struct nfs_fattr *fattr); | 75 | static int _nfs4_proc_lookup(struct rpc_clnt *client, struct inode *dir, |
76 | const struct qstr *name, struct nfs_fh *fhandle, | ||
77 | struct nfs_fattr *fattr); | ||
75 | static int _nfs4_proc_getattr(struct nfs_server *server, struct nfs_fh *fhandle, struct nfs_fattr *fattr); | 78 | static int _nfs4_proc_getattr(struct nfs_server *server, struct nfs_fh *fhandle, struct nfs_fattr *fattr); |
76 | static int nfs4_do_setattr(struct inode *inode, struct rpc_cred *cred, | 79 | static int nfs4_do_setattr(struct inode *inode, struct rpc_cred *cred, |
77 | struct nfs_fattr *fattr, struct iattr *sattr, | 80 | struct nfs_fattr *fattr, struct iattr *sattr, |
@@ -85,6 +88,8 @@ static int nfs4_map_errors(int err) | |||
85 | switch (err) { | 88 | switch (err) { |
86 | case -NFS4ERR_RESOURCE: | 89 | case -NFS4ERR_RESOURCE: |
87 | return -EREMOTEIO; | 90 | return -EREMOTEIO; |
91 | case -NFS4ERR_WRONGSEC: | ||
92 | return -EPERM; | ||
88 | case -NFS4ERR_BADOWNER: | 93 | case -NFS4ERR_BADOWNER: |
89 | case -NFS4ERR_BADNAME: | 94 | case -NFS4ERR_BADNAME: |
90 | return -EINVAL; | 95 | return -EINVAL; |
@@ -657,7 +662,8 @@ struct rpc_call_ops nfs41_call_priv_sync_ops = { | |||
657 | .rpc_call_done = nfs41_call_sync_done, | 662 | .rpc_call_done = nfs41_call_sync_done, |
658 | }; | 663 | }; |
659 | 664 | ||
660 | static int nfs4_call_sync_sequence(struct nfs_server *server, | 665 | static int nfs4_call_sync_sequence(struct rpc_clnt *clnt, |
666 | struct nfs_server *server, | ||
661 | struct rpc_message *msg, | 667 | struct rpc_message *msg, |
662 | struct nfs4_sequence_args *args, | 668 | struct nfs4_sequence_args *args, |
663 | struct nfs4_sequence_res *res, | 669 | struct nfs4_sequence_res *res, |
@@ -673,7 +679,7 @@ static int nfs4_call_sync_sequence(struct nfs_server *server, | |||
673 | .cache_reply = cache_reply, | 679 | .cache_reply = cache_reply, |
674 | }; | 680 | }; |
675 | struct rpc_task_setup task_setup = { | 681 | struct rpc_task_setup task_setup = { |
676 | .rpc_client = server->client, | 682 | .rpc_client = clnt, |
677 | .rpc_message = msg, | 683 | .rpc_message = msg, |
678 | .callback_ops = &nfs41_call_sync_ops, | 684 | .callback_ops = &nfs41_call_sync_ops, |
679 | .callback_data = &data | 685 | .callback_data = &data |
@@ -692,13 +698,14 @@ static int nfs4_call_sync_sequence(struct nfs_server *server, | |||
692 | return ret; | 698 | return ret; |
693 | } | 699 | } |
694 | 700 | ||
695 | int _nfs4_call_sync_session(struct nfs_server *server, | 701 | int _nfs4_call_sync_session(struct rpc_clnt *clnt, |
702 | struct nfs_server *server, | ||
696 | struct rpc_message *msg, | 703 | struct rpc_message *msg, |
697 | struct nfs4_sequence_args *args, | 704 | struct nfs4_sequence_args *args, |
698 | struct nfs4_sequence_res *res, | 705 | struct nfs4_sequence_res *res, |
699 | int cache_reply) | 706 | int cache_reply) |
700 | { | 707 | { |
701 | return nfs4_call_sync_sequence(server, msg, args, res, cache_reply, 0); | 708 | return nfs4_call_sync_sequence(clnt, server, msg, args, res, cache_reply, 0); |
702 | } | 709 | } |
703 | 710 | ||
704 | #else | 711 | #else |
@@ -709,19 +716,28 @@ static int nfs4_sequence_done(struct rpc_task *task, | |||
709 | } | 716 | } |
710 | #endif /* CONFIG_NFS_V4_1 */ | 717 | #endif /* CONFIG_NFS_V4_1 */ |
711 | 718 | ||
712 | int _nfs4_call_sync(struct nfs_server *server, | 719 | int _nfs4_call_sync(struct rpc_clnt *clnt, |
720 | struct nfs_server *server, | ||
713 | struct rpc_message *msg, | 721 | struct rpc_message *msg, |
714 | struct nfs4_sequence_args *args, | 722 | struct nfs4_sequence_args *args, |
715 | struct nfs4_sequence_res *res, | 723 | struct nfs4_sequence_res *res, |
716 | int cache_reply) | 724 | int cache_reply) |
717 | { | 725 | { |
718 | args->sa_session = res->sr_session = NULL; | 726 | args->sa_session = res->sr_session = NULL; |
719 | return rpc_call_sync(server->client, msg, 0); | 727 | return rpc_call_sync(clnt, msg, 0); |
720 | } | 728 | } |
721 | 729 | ||
722 | #define nfs4_call_sync(server, msg, args, res, cache_reply) \ | 730 | static inline |
723 | (server)->nfs_client->cl_mvops->call_sync((server), (msg), &(args)->seq_args, \ | 731 | int nfs4_call_sync(struct rpc_clnt *clnt, |
724 | &(res)->seq_res, (cache_reply)) | 732 | struct nfs_server *server, |
733 | struct rpc_message *msg, | ||
734 | struct nfs4_sequence_args *args, | ||
735 | struct nfs4_sequence_res *res, | ||
736 | int cache_reply) | ||
737 | { | ||
738 | return server->nfs_client->cl_mvops->call_sync(clnt, server, msg, | ||
739 | args, res, cache_reply); | ||
740 | } | ||
725 | 741 | ||
726 | static void update_changeattr(struct inode *dir, struct nfs4_change_info *cinfo) | 742 | static void update_changeattr(struct inode *dir, struct nfs4_change_info *cinfo) |
727 | { | 743 | { |
@@ -1831,7 +1847,7 @@ static int _nfs4_do_setattr(struct inode *inode, struct rpc_cred *cred, | |||
1831 | } else | 1847 | } else |
1832 | memcpy(&arg.stateid, &zero_stateid, sizeof(arg.stateid)); | 1848 | memcpy(&arg.stateid, &zero_stateid, sizeof(arg.stateid)); |
1833 | 1849 | ||
1834 | status = nfs4_call_sync(server, &msg, &arg, &res, 1); | 1850 | status = nfs4_call_sync(server->client, server, &msg, &arg.seq_args, &res.seq_res, 1); |
1835 | if (status == 0 && state != NULL) | 1851 | if (status == 0 && state != NULL) |
1836 | renew_lease(server, timestamp); | 1852 | renew_lease(server, timestamp); |
1837 | return status; | 1853 | return status; |
@@ -2090,7 +2106,7 @@ static int _nfs4_server_capabilities(struct nfs_server *server, struct nfs_fh *f | |||
2090 | }; | 2106 | }; |
2091 | int status; | 2107 | int status; |
2092 | 2108 | ||
2093 | status = nfs4_call_sync(server, &msg, &args, &res, 0); | 2109 | status = nfs4_call_sync(server->client, server, &msg, &args.seq_args, &res.seq_res, 0); |
2094 | if (status == 0) { | 2110 | if (status == 0) { |
2095 | memcpy(server->attr_bitmask, res.attr_bitmask, sizeof(server->attr_bitmask)); | 2111 | memcpy(server->attr_bitmask, res.attr_bitmask, sizeof(server->attr_bitmask)); |
2096 | server->caps &= ~(NFS_CAP_ACLS|NFS_CAP_HARDLINKS| | 2112 | server->caps &= ~(NFS_CAP_ACLS|NFS_CAP_HARDLINKS| |
@@ -2160,7 +2176,7 @@ static int _nfs4_lookup_root(struct nfs_server *server, struct nfs_fh *fhandle, | |||
2160 | }; | 2176 | }; |
2161 | 2177 | ||
2162 | nfs_fattr_init(info->fattr); | 2178 | nfs_fattr_init(info->fattr); |
2163 | return nfs4_call_sync(server, &msg, &args, &res, 0); | 2179 | return nfs4_call_sync(server->client, server, &msg, &args.seq_args, &res.seq_res, 0); |
2164 | } | 2180 | } |
2165 | 2181 | ||
2166 | static int nfs4_lookup_root(struct nfs_server *server, struct nfs_fh *fhandle, | 2182 | static int nfs4_lookup_root(struct nfs_server *server, struct nfs_fh *fhandle, |
@@ -2176,15 +2192,43 @@ static int nfs4_lookup_root(struct nfs_server *server, struct nfs_fh *fhandle, | |||
2176 | return err; | 2192 | return err; |
2177 | } | 2193 | } |
2178 | 2194 | ||
2195 | static int nfs4_lookup_root_sec(struct nfs_server *server, struct nfs_fh *fhandle, | ||
2196 | struct nfs_fsinfo *info, rpc_authflavor_t flavor) | ||
2197 | { | ||
2198 | struct rpc_auth *auth; | ||
2199 | int ret; | ||
2200 | |||
2201 | auth = rpcauth_create(flavor, server->client); | ||
2202 | if (!auth) { | ||
2203 | ret = -EIO; | ||
2204 | goto out; | ||
2205 | } | ||
2206 | ret = nfs4_lookup_root(server, fhandle, info); | ||
2207 | if (ret < 0) | ||
2208 | ret = -EAGAIN; | ||
2209 | out: | ||
2210 | return ret; | ||
2211 | } | ||
2212 | |||
2179 | /* | 2213 | /* |
2180 | * get the file handle for the "/" directory on the server | 2214 | * get the file handle for the "/" directory on the server |
2181 | */ | 2215 | */ |
2182 | static int nfs4_proc_get_root(struct nfs_server *server, struct nfs_fh *fhandle, | 2216 | static int nfs4_proc_get_root(struct nfs_server *server, struct nfs_fh *fhandle, |
2183 | struct nfs_fsinfo *info) | 2217 | struct nfs_fsinfo *info) |
2184 | { | 2218 | { |
2185 | int status; | 2219 | int i, len, status = 0; |
2220 | rpc_authflavor_t flav_array[NFS_MAX_SECFLAVORS + 2]; | ||
2221 | |||
2222 | flav_array[0] = RPC_AUTH_UNIX; | ||
2223 | len = gss_mech_list_pseudoflavors(&flav_array[1]); | ||
2224 | flav_array[1+len] = RPC_AUTH_NULL; | ||
2225 | len += 2; | ||
2186 | 2226 | ||
2187 | status = nfs4_lookup_root(server, fhandle, info); | 2227 | for (i = 0; i < len; i++) { |
2228 | status = nfs4_lookup_root_sec(server, fhandle, info, flav_array[i]); | ||
2229 | if (status == 0) | ||
2230 | break; | ||
2231 | } | ||
2188 | if (status == 0) | 2232 | if (status == 0) |
2189 | status = nfs4_server_capabilities(server, fhandle); | 2233 | status = nfs4_server_capabilities(server, fhandle); |
2190 | if (status == 0) | 2234 | if (status == 0) |
@@ -2249,7 +2293,7 @@ static int _nfs4_proc_getattr(struct nfs_server *server, struct nfs_fh *fhandle, | |||
2249 | }; | 2293 | }; |
2250 | 2294 | ||
2251 | nfs_fattr_init(fattr); | 2295 | nfs_fattr_init(fattr); |
2252 | return nfs4_call_sync(server, &msg, &args, &res, 0); | 2296 | return nfs4_call_sync(server->client, server, &msg, &args.seq_args, &res.seq_res, 0); |
2253 | } | 2297 | } |
2254 | 2298 | ||
2255 | static int nfs4_proc_getattr(struct nfs_server *server, struct nfs_fh *fhandle, struct nfs_fattr *fattr) | 2299 | static int nfs4_proc_getattr(struct nfs_server *server, struct nfs_fh *fhandle, struct nfs_fattr *fattr) |
@@ -2309,9 +2353,9 @@ nfs4_proc_setattr(struct dentry *dentry, struct nfs_fattr *fattr, | |||
2309 | return status; | 2353 | return status; |
2310 | } | 2354 | } |
2311 | 2355 | ||
2312 | static int _nfs4_proc_lookupfh(struct nfs_server *server, const struct nfs_fh *dirfh, | 2356 | static int _nfs4_proc_lookupfh(struct rpc_clnt *clnt, struct nfs_server *server, |
2313 | const struct qstr *name, struct nfs_fh *fhandle, | 2357 | const struct nfs_fh *dirfh, const struct qstr *name, |
2314 | struct nfs_fattr *fattr) | 2358 | struct nfs_fh *fhandle, struct nfs_fattr *fattr) |
2315 | { | 2359 | { |
2316 | int status; | 2360 | int status; |
2317 | struct nfs4_lookup_arg args = { | 2361 | struct nfs4_lookup_arg args = { |
@@ -2333,7 +2377,7 @@ static int _nfs4_proc_lookupfh(struct nfs_server *server, const struct nfs_fh *d | |||
2333 | nfs_fattr_init(fattr); | 2377 | nfs_fattr_init(fattr); |
2334 | 2378 | ||
2335 | dprintk("NFS call lookupfh %s\n", name->name); | 2379 | dprintk("NFS call lookupfh %s\n", name->name); |
2336 | status = nfs4_call_sync(server, &msg, &args, &res, 0); | 2380 | status = nfs4_call_sync(clnt, server, &msg, &args.seq_args, &res.seq_res, 0); |
2337 | dprintk("NFS reply lookupfh: %d\n", status); | 2381 | dprintk("NFS reply lookupfh: %d\n", status); |
2338 | return status; | 2382 | return status; |
2339 | } | 2383 | } |
@@ -2345,7 +2389,7 @@ static int nfs4_proc_lookupfh(struct nfs_server *server, struct nfs_fh *dirfh, | |||
2345 | struct nfs4_exception exception = { }; | 2389 | struct nfs4_exception exception = { }; |
2346 | int err; | 2390 | int err; |
2347 | do { | 2391 | do { |
2348 | err = _nfs4_proc_lookupfh(server, dirfh, name, fhandle, fattr); | 2392 | err = _nfs4_proc_lookupfh(server->client, server, dirfh, name, fhandle, fattr); |
2349 | /* FIXME: !!!! */ | 2393 | /* FIXME: !!!! */ |
2350 | if (err == -NFS4ERR_MOVED) { | 2394 | if (err == -NFS4ERR_MOVED) { |
2351 | err = -EREMOTE; | 2395 | err = -EREMOTE; |
@@ -2356,27 +2400,41 @@ static int nfs4_proc_lookupfh(struct nfs_server *server, struct nfs_fh *dirfh, | |||
2356 | return err; | 2400 | return err; |
2357 | } | 2401 | } |
2358 | 2402 | ||
2359 | static int _nfs4_proc_lookup(struct inode *dir, const struct qstr *name, | 2403 | static int _nfs4_proc_lookup(struct rpc_clnt *clnt, struct inode *dir, |
2360 | struct nfs_fh *fhandle, struct nfs_fattr *fattr) | 2404 | const struct qstr *name, struct nfs_fh *fhandle, |
2405 | struct nfs_fattr *fattr) | ||
2361 | { | 2406 | { |
2362 | int status; | 2407 | int status; |
2363 | 2408 | ||
2364 | dprintk("NFS call lookup %s\n", name->name); | 2409 | dprintk("NFS call lookup %s\n", name->name); |
2365 | status = _nfs4_proc_lookupfh(NFS_SERVER(dir), NFS_FH(dir), name, fhandle, fattr); | 2410 | status = _nfs4_proc_lookupfh(clnt, NFS_SERVER(dir), NFS_FH(dir), name, fhandle, fattr); |
2366 | if (status == -NFS4ERR_MOVED) | 2411 | if (status == -NFS4ERR_MOVED) |
2367 | status = nfs4_get_referral(dir, name, fattr, fhandle); | 2412 | status = nfs4_get_referral(dir, name, fattr, fhandle); |
2368 | dprintk("NFS reply lookup: %d\n", status); | 2413 | dprintk("NFS reply lookup: %d\n", status); |
2369 | return status; | 2414 | return status; |
2370 | } | 2415 | } |
2371 | 2416 | ||
2372 | static int nfs4_proc_lookup(struct inode *dir, struct qstr *name, struct nfs_fh *fhandle, struct nfs_fattr *fattr) | 2417 | void nfs_fixup_secinfo_attributes(struct nfs_fattr *fattr, struct nfs_fh *fh) |
2418 | { | ||
2419 | memset(fh, 0, sizeof(struct nfs_fh)); | ||
2420 | fattr->fsid.major = 1; | ||
2421 | fattr->valid |= NFS_ATTR_FATTR_TYPE | NFS_ATTR_FATTR_MODE | | ||
2422 | NFS_ATTR_FATTR_NLINK | NFS_ATTR_FATTR_FSID | NFS_ATTR_FATTR_MOUNTPOINT; | ||
2423 | fattr->mode = S_IFDIR | S_IRUGO | S_IXUGO; | ||
2424 | fattr->nlink = 2; | ||
2425 | } | ||
2426 | |||
2427 | static int nfs4_proc_lookup(struct rpc_clnt *clnt, struct inode *dir, struct qstr *name, | ||
2428 | struct nfs_fh *fhandle, struct nfs_fattr *fattr) | ||
2373 | { | 2429 | { |
2374 | struct nfs4_exception exception = { }; | 2430 | struct nfs4_exception exception = { }; |
2375 | int err; | 2431 | int err; |
2376 | do { | 2432 | do { |
2377 | err = nfs4_handle_exception(NFS_SERVER(dir), | 2433 | err = nfs4_handle_exception(NFS_SERVER(dir), |
2378 | _nfs4_proc_lookup(dir, name, fhandle, fattr), | 2434 | _nfs4_proc_lookup(clnt, dir, name, fhandle, fattr), |
2379 | &exception); | 2435 | &exception); |
2436 | if (err == -EPERM) | ||
2437 | nfs_fixup_secinfo_attributes(fattr, fhandle); | ||
2380 | } while (exception.retry); | 2438 | } while (exception.retry); |
2381 | return err; | 2439 | return err; |
2382 | } | 2440 | } |
@@ -2421,7 +2479,7 @@ static int _nfs4_proc_access(struct inode *inode, struct nfs_access_entry *entry | |||
2421 | if (res.fattr == NULL) | 2479 | if (res.fattr == NULL) |
2422 | return -ENOMEM; | 2480 | return -ENOMEM; |
2423 | 2481 | ||
2424 | status = nfs4_call_sync(server, &msg, &args, &res, 0); | 2482 | status = nfs4_call_sync(server->client, server, &msg, &args.seq_args, &res.seq_res, 0); |
2425 | if (!status) { | 2483 | if (!status) { |
2426 | entry->mask = 0; | 2484 | entry->mask = 0; |
2427 | if (res.access & NFS4_ACCESS_READ) | 2485 | if (res.access & NFS4_ACCESS_READ) |
@@ -2488,7 +2546,7 @@ static int _nfs4_proc_readlink(struct inode *inode, struct page *page, | |||
2488 | .rpc_resp = &res, | 2546 | .rpc_resp = &res, |
2489 | }; | 2547 | }; |
2490 | 2548 | ||
2491 | return nfs4_call_sync(NFS_SERVER(inode), &msg, &args, &res, 0); | 2549 | return nfs4_call_sync(NFS_SERVER(inode)->client, NFS_SERVER(inode), &msg, &args.seq_args, &res.seq_res, 0); |
2492 | } | 2550 | } |
2493 | 2551 | ||
2494 | static int nfs4_proc_readlink(struct inode *inode, struct page *page, | 2552 | static int nfs4_proc_readlink(struct inode *inode, struct page *page, |
@@ -2577,7 +2635,7 @@ static int _nfs4_proc_remove(struct inode *dir, struct qstr *name) | |||
2577 | if (res.dir_attr == NULL) | 2635 | if (res.dir_attr == NULL) |
2578 | goto out; | 2636 | goto out; |
2579 | 2637 | ||
2580 | status = nfs4_call_sync(server, &msg, &args, &res, 1); | 2638 | status = nfs4_call_sync(server->client, server, &msg, &args.seq_args, &res.seq_res, 1); |
2581 | if (status == 0) { | 2639 | if (status == 0) { |
2582 | update_changeattr(dir, &res.cinfo); | 2640 | update_changeattr(dir, &res.cinfo); |
2583 | nfs_post_op_update_inode(dir, res.dir_attr); | 2641 | nfs_post_op_update_inode(dir, res.dir_attr); |
@@ -2678,7 +2736,7 @@ static int _nfs4_proc_rename(struct inode *old_dir, struct qstr *old_name, | |||
2678 | if (res.old_fattr == NULL || res.new_fattr == NULL) | 2736 | if (res.old_fattr == NULL || res.new_fattr == NULL) |
2679 | goto out; | 2737 | goto out; |
2680 | 2738 | ||
2681 | status = nfs4_call_sync(server, &msg, &arg, &res, 1); | 2739 | status = nfs4_call_sync(server->client, server, &msg, &arg.seq_args, &res.seq_res, 1); |
2682 | if (!status) { | 2740 | if (!status) { |
2683 | update_changeattr(old_dir, &res.old_cinfo); | 2741 | update_changeattr(old_dir, &res.old_cinfo); |
2684 | nfs_post_op_update_inode(old_dir, res.old_fattr); | 2742 | nfs_post_op_update_inode(old_dir, res.old_fattr); |
@@ -2729,7 +2787,7 @@ static int _nfs4_proc_link(struct inode *inode, struct inode *dir, struct qstr * | |||
2729 | if (res.fattr == NULL || res.dir_attr == NULL) | 2787 | if (res.fattr == NULL || res.dir_attr == NULL) |
2730 | goto out; | 2788 | goto out; |
2731 | 2789 | ||
2732 | status = nfs4_call_sync(server, &msg, &arg, &res, 1); | 2790 | status = nfs4_call_sync(server->client, server, &msg, &arg.seq_args, &res.seq_res, 1); |
2733 | if (!status) { | 2791 | if (!status) { |
2734 | update_changeattr(dir, &res.cinfo); | 2792 | update_changeattr(dir, &res.cinfo); |
2735 | nfs_post_op_update_inode(dir, res.dir_attr); | 2793 | nfs_post_op_update_inode(dir, res.dir_attr); |
@@ -2792,8 +2850,8 @@ static struct nfs4_createdata *nfs4_alloc_createdata(struct inode *dir, | |||
2792 | 2850 | ||
2793 | static int nfs4_do_create(struct inode *dir, struct dentry *dentry, struct nfs4_createdata *data) | 2851 | static int nfs4_do_create(struct inode *dir, struct dentry *dentry, struct nfs4_createdata *data) |
2794 | { | 2852 | { |
2795 | int status = nfs4_call_sync(NFS_SERVER(dir), &data->msg, | 2853 | int status = nfs4_call_sync(NFS_SERVER(dir)->client, NFS_SERVER(dir), &data->msg, |
2796 | &data->arg, &data->res, 1); | 2854 | &data->arg.seq_args, &data->res.seq_res, 1); |
2797 | if (status == 0) { | 2855 | if (status == 0) { |
2798 | update_changeattr(dir, &data->res.dir_cinfo); | 2856 | update_changeattr(dir, &data->res.dir_cinfo); |
2799 | nfs_post_op_update_inode(dir, data->res.dir_fattr); | 2857 | nfs_post_op_update_inode(dir, data->res.dir_fattr); |
@@ -2905,7 +2963,7 @@ static int _nfs4_proc_readdir(struct dentry *dentry, struct rpc_cred *cred, | |||
2905 | (unsigned long long)cookie); | 2963 | (unsigned long long)cookie); |
2906 | nfs4_setup_readdir(cookie, NFS_COOKIEVERF(dir), dentry, &args); | 2964 | nfs4_setup_readdir(cookie, NFS_COOKIEVERF(dir), dentry, &args); |
2907 | res.pgbase = args.pgbase; | 2965 | res.pgbase = args.pgbase; |
2908 | status = nfs4_call_sync(NFS_SERVER(dir), &msg, &args, &res, 0); | 2966 | status = nfs4_call_sync(NFS_SERVER(dir)->client, NFS_SERVER(dir), &msg, &args.seq_args, &res.seq_res, 0); |
2909 | if (status >= 0) { | 2967 | if (status >= 0) { |
2910 | memcpy(NFS_COOKIEVERF(dir), res.verifier.data, NFS4_VERIFIER_SIZE); | 2968 | memcpy(NFS_COOKIEVERF(dir), res.verifier.data, NFS4_VERIFIER_SIZE); |
2911 | status += args.pgbase; | 2969 | status += args.pgbase; |
@@ -2997,7 +3055,7 @@ static int _nfs4_proc_statfs(struct nfs_server *server, struct nfs_fh *fhandle, | |||
2997 | }; | 3055 | }; |
2998 | 3056 | ||
2999 | nfs_fattr_init(fsstat->fattr); | 3057 | nfs_fattr_init(fsstat->fattr); |
3000 | return nfs4_call_sync(server, &msg, &args, &res, 0); | 3058 | return nfs4_call_sync(server->client, server, &msg, &args.seq_args, &res.seq_res, 0); |
3001 | } | 3059 | } |
3002 | 3060 | ||
3003 | static int nfs4_proc_statfs(struct nfs_server *server, struct nfs_fh *fhandle, struct nfs_fsstat *fsstat) | 3061 | static int nfs4_proc_statfs(struct nfs_server *server, struct nfs_fh *fhandle, struct nfs_fsstat *fsstat) |
@@ -3028,7 +3086,7 @@ static int _nfs4_do_fsinfo(struct nfs_server *server, struct nfs_fh *fhandle, | |||
3028 | .rpc_resp = &res, | 3086 | .rpc_resp = &res, |
3029 | }; | 3087 | }; |
3030 | 3088 | ||
3031 | return nfs4_call_sync(server, &msg, &args, &res, 0); | 3089 | return nfs4_call_sync(server->client, server, &msg, &args.seq_args, &res.seq_res, 0); |
3032 | } | 3090 | } |
3033 | 3091 | ||
3034 | static int nfs4_do_fsinfo(struct nfs_server *server, struct nfs_fh *fhandle, struct nfs_fsinfo *fsinfo) | 3092 | static int nfs4_do_fsinfo(struct nfs_server *server, struct nfs_fh *fhandle, struct nfs_fsinfo *fsinfo) |
@@ -3073,7 +3131,7 @@ static int _nfs4_proc_pathconf(struct nfs_server *server, struct nfs_fh *fhandle | |||
3073 | } | 3131 | } |
3074 | 3132 | ||
3075 | nfs_fattr_init(pathconf->fattr); | 3133 | nfs_fattr_init(pathconf->fattr); |
3076 | return nfs4_call_sync(server, &msg, &args, &res, 0); | 3134 | return nfs4_call_sync(server->client, server, &msg, &args.seq_args, &res.seq_res, 0); |
3077 | } | 3135 | } |
3078 | 3136 | ||
3079 | static int nfs4_proc_pathconf(struct nfs_server *server, struct nfs_fh *fhandle, | 3137 | static int nfs4_proc_pathconf(struct nfs_server *server, struct nfs_fh *fhandle, |
@@ -3195,12 +3253,9 @@ static void nfs4_proc_write_setup(struct nfs_write_data *data, struct rpc_messag | |||
3195 | msg->rpc_proc = &nfs4_procedures[NFSPROC4_CLNT_WRITE]; | 3253 | msg->rpc_proc = &nfs4_procedures[NFSPROC4_CLNT_WRITE]; |
3196 | } | 3254 | } |
3197 | 3255 | ||
3198 | static int nfs4_commit_done(struct rpc_task *task, struct nfs_write_data *data) | 3256 | static int nfs4_commit_done_cb(struct rpc_task *task, struct nfs_write_data *data) |
3199 | { | 3257 | { |
3200 | struct inode *inode = data->inode; | 3258 | struct inode *inode = data->inode; |
3201 | |||
3202 | if (!nfs4_sequence_done(task, &data->res.seq_res)) | ||
3203 | return -EAGAIN; | ||
3204 | 3259 | ||
3205 | if (nfs4_async_handle_error(task, NFS_SERVER(inode), NULL) == -EAGAIN) { | 3260 | if (nfs4_async_handle_error(task, NFS_SERVER(inode), NULL) == -EAGAIN) { |
3206 | nfs_restart_rpc(task, NFS_SERVER(inode)->nfs_client); | 3261 | nfs_restart_rpc(task, NFS_SERVER(inode)->nfs_client); |
@@ -3210,11 +3265,24 @@ static int nfs4_commit_done(struct rpc_task *task, struct nfs_write_data *data) | |||
3210 | return 0; | 3265 | return 0; |
3211 | } | 3266 | } |
3212 | 3267 | ||
3268 | static int nfs4_commit_done(struct rpc_task *task, struct nfs_write_data *data) | ||
3269 | { | ||
3270 | if (!nfs4_sequence_done(task, &data->res.seq_res)) | ||
3271 | return -EAGAIN; | ||
3272 | return data->write_done_cb(task, data); | ||
3273 | } | ||
3274 | |||
3213 | static void nfs4_proc_commit_setup(struct nfs_write_data *data, struct rpc_message *msg) | 3275 | static void nfs4_proc_commit_setup(struct nfs_write_data *data, struct rpc_message *msg) |
3214 | { | 3276 | { |
3215 | struct nfs_server *server = NFS_SERVER(data->inode); | 3277 | struct nfs_server *server = NFS_SERVER(data->inode); |
3216 | 3278 | ||
3217 | data->args.bitmask = server->cache_consistency_bitmask; | 3279 | if (data->lseg) { |
3280 | data->args.bitmask = NULL; | ||
3281 | data->res.fattr = NULL; | ||
3282 | } else | ||
3283 | data->args.bitmask = server->cache_consistency_bitmask; | ||
3284 | if (!data->write_done_cb) | ||
3285 | data->write_done_cb = nfs4_commit_done_cb; | ||
3218 | data->res.server = server; | 3286 | data->res.server = server; |
3219 | msg->rpc_proc = &nfs4_procedures[NFSPROC4_CLNT_COMMIT]; | 3287 | msg->rpc_proc = &nfs4_procedures[NFSPROC4_CLNT_COMMIT]; |
3220 | } | 3288 | } |
@@ -3452,7 +3520,7 @@ static ssize_t __nfs4_get_acl_uncached(struct inode *inode, void *buf, size_t bu | |||
3452 | resp_buf = buf; | 3520 | resp_buf = buf; |
3453 | buf_to_pages(buf, buflen, args.acl_pages, &args.acl_pgbase); | 3521 | buf_to_pages(buf, buflen, args.acl_pages, &args.acl_pgbase); |
3454 | } | 3522 | } |
3455 | ret = nfs4_call_sync(NFS_SERVER(inode), &msg, &args, &res, 0); | 3523 | ret = nfs4_call_sync(NFS_SERVER(inode)->client, NFS_SERVER(inode), &msg, &args.seq_args, &res.seq_res, 0); |
3456 | if (ret) | 3524 | if (ret) |
3457 | goto out_free; | 3525 | goto out_free; |
3458 | if (res.acl_len > args.acl_len) | 3526 | if (res.acl_len > args.acl_len) |
@@ -3527,7 +3595,7 @@ static int __nfs4_proc_set_acl(struct inode *inode, const void *buf, size_t bufl | |||
3527 | if (i < 0) | 3595 | if (i < 0) |
3528 | return i; | 3596 | return i; |
3529 | nfs_inode_return_delegation(inode); | 3597 | nfs_inode_return_delegation(inode); |
3530 | ret = nfs4_call_sync(server, &msg, &arg, &res, 1); | 3598 | ret = nfs4_call_sync(server->client, server, &msg, &arg.seq_args, &res.seq_res, 1); |
3531 | 3599 | ||
3532 | /* | 3600 | /* |
3533 | * Free each page after tx, so the only ref left is | 3601 | * Free each page after tx, so the only ref left is |
@@ -3890,7 +3958,7 @@ static int _nfs4_proc_getlk(struct nfs4_state *state, int cmd, struct file_lock | |||
3890 | lsp = request->fl_u.nfs4_fl.owner; | 3958 | lsp = request->fl_u.nfs4_fl.owner; |
3891 | arg.lock_owner.id = lsp->ls_id.id; | 3959 | arg.lock_owner.id = lsp->ls_id.id; |
3892 | arg.lock_owner.s_dev = server->s_dev; | 3960 | arg.lock_owner.s_dev = server->s_dev; |
3893 | status = nfs4_call_sync(server, &msg, &arg, &res, 1); | 3961 | status = nfs4_call_sync(server->client, server, &msg, &arg.seq_args, &res.seq_res, 1); |
3894 | switch (status) { | 3962 | switch (status) { |
3895 | case 0: | 3963 | case 0: |
3896 | request->fl_type = F_UNLCK; | 3964 | request->fl_type = F_UNLCK; |
@@ -4618,12 +4686,46 @@ int nfs4_proc_fs_locations(struct inode *dir, const struct qstr *name, | |||
4618 | nfs_fattr_init(&fs_locations->fattr); | 4686 | nfs_fattr_init(&fs_locations->fattr); |
4619 | fs_locations->server = server; | 4687 | fs_locations->server = server; |
4620 | fs_locations->nlocations = 0; | 4688 | fs_locations->nlocations = 0; |
4621 | status = nfs4_call_sync(server, &msg, &args, &res, 0); | 4689 | status = nfs4_call_sync(server->client, server, &msg, &args.seq_args, &res.seq_res, 0); |
4622 | nfs_fixup_referral_attributes(&fs_locations->fattr); | 4690 | nfs_fixup_referral_attributes(&fs_locations->fattr); |
4623 | dprintk("%s: returned status = %d\n", __func__, status); | 4691 | dprintk("%s: returned status = %d\n", __func__, status); |
4624 | return status; | 4692 | return status; |
4625 | } | 4693 | } |
4626 | 4694 | ||
4695 | static int _nfs4_proc_secinfo(struct inode *dir, const struct qstr *name, struct nfs4_secinfo_flavors *flavors) | ||
4696 | { | ||
4697 | int status; | ||
4698 | struct nfs4_secinfo_arg args = { | ||
4699 | .dir_fh = NFS_FH(dir), | ||
4700 | .name = name, | ||
4701 | }; | ||
4702 | struct nfs4_secinfo_res res = { | ||
4703 | .flavors = flavors, | ||
4704 | }; | ||
4705 | struct rpc_message msg = { | ||
4706 | .rpc_proc = &nfs4_procedures[NFSPROC4_CLNT_SECINFO], | ||
4707 | .rpc_argp = &args, | ||
4708 | .rpc_resp = &res, | ||
4709 | }; | ||
4710 | |||
4711 | dprintk("NFS call secinfo %s\n", name->name); | ||
4712 | status = nfs4_call_sync(NFS_SERVER(dir)->client, NFS_SERVER(dir), &msg, &args.seq_args, &res.seq_res, 0); | ||
4713 | dprintk("NFS reply secinfo: %d\n", status); | ||
4714 | return status; | ||
4715 | } | ||
4716 | |||
4717 | int nfs4_proc_secinfo(struct inode *dir, const struct qstr *name, struct nfs4_secinfo_flavors *flavors) | ||
4718 | { | ||
4719 | struct nfs4_exception exception = { }; | ||
4720 | int err; | ||
4721 | do { | ||
4722 | err = nfs4_handle_exception(NFS_SERVER(dir), | ||
4723 | _nfs4_proc_secinfo(dir, name, flavors), | ||
4724 | &exception); | ||
4725 | } while (exception.retry); | ||
4726 | return err; | ||
4727 | } | ||
4728 | |||
4627 | #ifdef CONFIG_NFS_V4_1 | 4729 | #ifdef CONFIG_NFS_V4_1 |
4628 | /* | 4730 | /* |
4629 | * Check the exchange flags returned by the server for invalid flags, having | 4731 | * Check the exchange flags returned by the server for invalid flags, having |
@@ -5516,8 +5618,6 @@ static void nfs4_layoutget_release(void *calldata) | |||
5516 | struct nfs4_layoutget *lgp = calldata; | 5618 | struct nfs4_layoutget *lgp = calldata; |
5517 | 5619 | ||
5518 | dprintk("--> %s\n", __func__); | 5620 | dprintk("--> %s\n", __func__); |
5519 | if (lgp->res.layout.buf != NULL) | ||
5520 | free_page((unsigned long) lgp->res.layout.buf); | ||
5521 | put_nfs_open_context(lgp->args.ctx); | 5621 | put_nfs_open_context(lgp->args.ctx); |
5522 | kfree(calldata); | 5622 | kfree(calldata); |
5523 | dprintk("<-- %s\n", __func__); | 5623 | dprintk("<-- %s\n", __func__); |
@@ -5549,12 +5649,7 @@ int nfs4_proc_layoutget(struct nfs4_layoutget *lgp) | |||
5549 | 5649 | ||
5550 | dprintk("--> %s\n", __func__); | 5650 | dprintk("--> %s\n", __func__); |
5551 | 5651 | ||
5552 | lgp->res.layout.buf = (void *)__get_free_page(GFP_NOFS); | 5652 | lgp->res.layoutp = &lgp->args.layout; |
5553 | if (lgp->res.layout.buf == NULL) { | ||
5554 | nfs4_layoutget_release(lgp); | ||
5555 | return -ENOMEM; | ||
5556 | } | ||
5557 | |||
5558 | lgp->res.seq_res.sr_slot = NULL; | 5653 | lgp->res.seq_res.sr_slot = NULL; |
5559 | task = rpc_run_task(&task_setup_data); | 5654 | task = rpc_run_task(&task_setup_data); |
5560 | if (IS_ERR(task)) | 5655 | if (IS_ERR(task)) |
@@ -5586,7 +5681,7 @@ _nfs4_proc_getdeviceinfo(struct nfs_server *server, struct pnfs_device *pdev) | |||
5586 | int status; | 5681 | int status; |
5587 | 5682 | ||
5588 | dprintk("--> %s\n", __func__); | 5683 | dprintk("--> %s\n", __func__); |
5589 | status = nfs4_call_sync(server, &msg, &args, &res, 0); | 5684 | status = nfs4_call_sync(server->client, server, &msg, &args.seq_args, &res.seq_res, 0); |
5590 | dprintk("<-- %s status=%d\n", __func__, status); | 5685 | dprintk("<-- %s status=%d\n", __func__, status); |
5591 | 5686 | ||
5592 | return status; | 5687 | return status; |
@@ -5606,6 +5701,100 @@ int nfs4_proc_getdeviceinfo(struct nfs_server *server, struct pnfs_device *pdev) | |||
5606 | } | 5701 | } |
5607 | EXPORT_SYMBOL_GPL(nfs4_proc_getdeviceinfo); | 5702 | EXPORT_SYMBOL_GPL(nfs4_proc_getdeviceinfo); |
5608 | 5703 | ||
5704 | static void nfs4_layoutcommit_prepare(struct rpc_task *task, void *calldata) | ||
5705 | { | ||
5706 | struct nfs4_layoutcommit_data *data = calldata; | ||
5707 | struct nfs_server *server = NFS_SERVER(data->args.inode); | ||
5708 | |||
5709 | if (nfs4_setup_sequence(server, &data->args.seq_args, | ||
5710 | &data->res.seq_res, 1, task)) | ||
5711 | return; | ||
5712 | rpc_call_start(task); | ||
5713 | } | ||
5714 | |||
5715 | static void | ||
5716 | nfs4_layoutcommit_done(struct rpc_task *task, void *calldata) | ||
5717 | { | ||
5718 | struct nfs4_layoutcommit_data *data = calldata; | ||
5719 | struct nfs_server *server = NFS_SERVER(data->args.inode); | ||
5720 | |||
5721 | if (!nfs4_sequence_done(task, &data->res.seq_res)) | ||
5722 | return; | ||
5723 | |||
5724 | switch (task->tk_status) { /* Just ignore these failures */ | ||
5725 | case NFS4ERR_DELEG_REVOKED: /* layout was recalled */ | ||
5726 | case NFS4ERR_BADIOMODE: /* no IOMODE_RW layout for range */ | ||
5727 | case NFS4ERR_BADLAYOUT: /* no layout */ | ||
5728 | case NFS4ERR_GRACE: /* loca_recalim always false */ | ||
5729 | task->tk_status = 0; | ||
5730 | } | ||
5731 | |||
5732 | if (nfs4_async_handle_error(task, server, NULL) == -EAGAIN) { | ||
5733 | nfs_restart_rpc(task, server->nfs_client); | ||
5734 | return; | ||
5735 | } | ||
5736 | |||
5737 | if (task->tk_status == 0) | ||
5738 | nfs_post_op_update_inode_force_wcc(data->args.inode, | ||
5739 | data->res.fattr); | ||
5740 | } | ||
5741 | |||
5742 | static void nfs4_layoutcommit_release(void *calldata) | ||
5743 | { | ||
5744 | struct nfs4_layoutcommit_data *data = calldata; | ||
5745 | |||
5746 | /* Matched by references in pnfs_set_layoutcommit */ | ||
5747 | put_lseg(data->lseg); | ||
5748 | put_rpccred(data->cred); | ||
5749 | kfree(data); | ||
5750 | } | ||
5751 | |||
5752 | static const struct rpc_call_ops nfs4_layoutcommit_ops = { | ||
5753 | .rpc_call_prepare = nfs4_layoutcommit_prepare, | ||
5754 | .rpc_call_done = nfs4_layoutcommit_done, | ||
5755 | .rpc_release = nfs4_layoutcommit_release, | ||
5756 | }; | ||
5757 | |||
5758 | int | ||
5759 | nfs4_proc_layoutcommit(struct nfs4_layoutcommit_data *data, bool sync) | ||
5760 | { | ||
5761 | struct rpc_message msg = { | ||
5762 | .rpc_proc = &nfs4_procedures[NFSPROC4_CLNT_LAYOUTCOMMIT], | ||
5763 | .rpc_argp = &data->args, | ||
5764 | .rpc_resp = &data->res, | ||
5765 | .rpc_cred = data->cred, | ||
5766 | }; | ||
5767 | struct rpc_task_setup task_setup_data = { | ||
5768 | .task = &data->task, | ||
5769 | .rpc_client = NFS_CLIENT(data->args.inode), | ||
5770 | .rpc_message = &msg, | ||
5771 | .callback_ops = &nfs4_layoutcommit_ops, | ||
5772 | .callback_data = data, | ||
5773 | .flags = RPC_TASK_ASYNC, | ||
5774 | }; | ||
5775 | struct rpc_task *task; | ||
5776 | int status = 0; | ||
5777 | |||
5778 | dprintk("NFS: %4d initiating layoutcommit call. sync %d " | ||
5779 | "lbw: %llu inode %lu\n", | ||
5780 | data->task.tk_pid, sync, | ||
5781 | data->args.lastbytewritten, | ||
5782 | data->args.inode->i_ino); | ||
5783 | |||
5784 | task = rpc_run_task(&task_setup_data); | ||
5785 | if (IS_ERR(task)) | ||
5786 | return PTR_ERR(task); | ||
5787 | if (sync == false) | ||
5788 | goto out; | ||
5789 | status = nfs4_wait_for_completion_rpc_task(task); | ||
5790 | if (status != 0) | ||
5791 | goto out; | ||
5792 | status = task->tk_status; | ||
5793 | out: | ||
5794 | dprintk("%s: status %d\n", __func__, status); | ||
5795 | rpc_put_task(task); | ||
5796 | return status; | ||
5797 | } | ||
5609 | #endif /* CONFIG_NFS_V4_1 */ | 5798 | #endif /* CONFIG_NFS_V4_1 */ |
5610 | 5799 | ||
5611 | struct nfs4_state_recovery_ops nfs40_reboot_recovery_ops = { | 5800 | struct nfs4_state_recovery_ops nfs40_reboot_recovery_ops = { |
@@ -5741,6 +5930,7 @@ const struct nfs_rpc_ops nfs_v4_clientops = { | |||
5741 | .close_context = nfs4_close_context, | 5930 | .close_context = nfs4_close_context, |
5742 | .open_context = nfs4_atomic_open, | 5931 | .open_context = nfs4_atomic_open, |
5743 | .init_client = nfs4_init_client, | 5932 | .init_client = nfs4_init_client, |
5933 | .secinfo = nfs4_proc_secinfo, | ||
5744 | }; | 5934 | }; |
5745 | 5935 | ||
5746 | static const struct xattr_handler nfs4_xattr_nfs4_acl_handler = { | 5936 | static const struct xattr_handler nfs4_xattr_nfs4_acl_handler = { |
diff --git a/fs/nfs/nfs4xdr.c b/fs/nfs/nfs4xdr.c index 0cf560f77884..dddfb5795d7b 100644 --- a/fs/nfs/nfs4xdr.c +++ b/fs/nfs/nfs4xdr.c | |||
@@ -46,6 +46,7 @@ | |||
46 | #include <linux/kdev_t.h> | 46 | #include <linux/kdev_t.h> |
47 | #include <linux/sunrpc/clnt.h> | 47 | #include <linux/sunrpc/clnt.h> |
48 | #include <linux/sunrpc/msg_prot.h> | 48 | #include <linux/sunrpc/msg_prot.h> |
49 | #include <linux/sunrpc/gss_api.h> | ||
49 | #include <linux/nfs.h> | 50 | #include <linux/nfs.h> |
50 | #include <linux/nfs4.h> | 51 | #include <linux/nfs4.h> |
51 | #include <linux/nfs_fs.h> | 52 | #include <linux/nfs_fs.h> |
@@ -112,7 +113,7 @@ static int nfs4_stat_to_errno(int); | |||
112 | #define encode_restorefh_maxsz (op_encode_hdr_maxsz) | 113 | #define encode_restorefh_maxsz (op_encode_hdr_maxsz) |
113 | #define decode_restorefh_maxsz (op_decode_hdr_maxsz) | 114 | #define decode_restorefh_maxsz (op_decode_hdr_maxsz) |
114 | #define encode_fsinfo_maxsz (encode_getattr_maxsz) | 115 | #define encode_fsinfo_maxsz (encode_getattr_maxsz) |
115 | #define decode_fsinfo_maxsz (op_decode_hdr_maxsz + 11) | 116 | #define decode_fsinfo_maxsz (op_decode_hdr_maxsz + 15) |
116 | #define encode_renew_maxsz (op_encode_hdr_maxsz + 3) | 117 | #define encode_renew_maxsz (op_encode_hdr_maxsz + 3) |
117 | #define decode_renew_maxsz (op_decode_hdr_maxsz) | 118 | #define decode_renew_maxsz (op_decode_hdr_maxsz) |
118 | #define encode_setclientid_maxsz \ | 119 | #define encode_setclientid_maxsz \ |
@@ -253,6 +254,8 @@ static int nfs4_stat_to_errno(int); | |||
253 | (encode_getattr_maxsz) | 254 | (encode_getattr_maxsz) |
254 | #define decode_fs_locations_maxsz \ | 255 | #define decode_fs_locations_maxsz \ |
255 | (0) | 256 | (0) |
257 | #define encode_secinfo_maxsz (op_encode_hdr_maxsz + nfs4_name_maxsz) | ||
258 | #define decode_secinfo_maxsz (op_decode_hdr_maxsz + 4 + (NFS_MAX_SECFLAVORS * (16 + GSS_OID_MAX_LEN))) | ||
256 | 259 | ||
257 | #if defined(CONFIG_NFS_V4_1) | 260 | #if defined(CONFIG_NFS_V4_1) |
258 | #define NFS4_MAX_MACHINE_NAME_LEN (64) | 261 | #define NFS4_MAX_MACHINE_NAME_LEN (64) |
@@ -324,6 +327,18 @@ static int nfs4_stat_to_errno(int); | |||
324 | #define decode_layoutget_maxsz (op_decode_hdr_maxsz + 8 + \ | 327 | #define decode_layoutget_maxsz (op_decode_hdr_maxsz + 8 + \ |
325 | decode_stateid_maxsz + \ | 328 | decode_stateid_maxsz + \ |
326 | XDR_QUADLEN(PNFS_LAYOUT_MAXSIZE)) | 329 | XDR_QUADLEN(PNFS_LAYOUT_MAXSIZE)) |
330 | #define encode_layoutcommit_maxsz (op_encode_hdr_maxsz + \ | ||
331 | 2 /* offset */ + \ | ||
332 | 2 /* length */ + \ | ||
333 | 1 /* reclaim */ + \ | ||
334 | encode_stateid_maxsz + \ | ||
335 | 1 /* new offset (true) */ + \ | ||
336 | 2 /* last byte written */ + \ | ||
337 | 1 /* nt_timechanged (false) */ + \ | ||
338 | 1 /* layoutupdate4 layout type */ + \ | ||
339 | 1 /* NULL filelayout layoutupdate4 payload */) | ||
340 | #define decode_layoutcommit_maxsz (op_decode_hdr_maxsz + 3) | ||
341 | |||
327 | #else /* CONFIG_NFS_V4_1 */ | 342 | #else /* CONFIG_NFS_V4_1 */ |
328 | #define encode_sequence_maxsz 0 | 343 | #define encode_sequence_maxsz 0 |
329 | #define decode_sequence_maxsz 0 | 344 | #define decode_sequence_maxsz 0 |
@@ -676,6 +691,14 @@ static int nfs4_stat_to_errno(int); | |||
676 | decode_putfh_maxsz + \ | 691 | decode_putfh_maxsz + \ |
677 | decode_lookup_maxsz + \ | 692 | decode_lookup_maxsz + \ |
678 | decode_fs_locations_maxsz) | 693 | decode_fs_locations_maxsz) |
694 | #define NFS4_enc_secinfo_sz (compound_encode_hdr_maxsz + \ | ||
695 | encode_sequence_maxsz + \ | ||
696 | encode_putfh_maxsz + \ | ||
697 | encode_secinfo_maxsz) | ||
698 | #define NFS4_dec_secinfo_sz (compound_decode_hdr_maxsz + \ | ||
699 | decode_sequence_maxsz + \ | ||
700 | decode_putfh_maxsz + \ | ||
701 | decode_secinfo_maxsz) | ||
679 | #if defined(CONFIG_NFS_V4_1) | 702 | #if defined(CONFIG_NFS_V4_1) |
680 | #define NFS4_enc_exchange_id_sz \ | 703 | #define NFS4_enc_exchange_id_sz \ |
681 | (compound_encode_hdr_maxsz + \ | 704 | (compound_encode_hdr_maxsz + \ |
@@ -727,6 +750,17 @@ static int nfs4_stat_to_errno(int); | |||
727 | decode_sequence_maxsz + \ | 750 | decode_sequence_maxsz + \ |
728 | decode_putfh_maxsz + \ | 751 | decode_putfh_maxsz + \ |
729 | decode_layoutget_maxsz) | 752 | decode_layoutget_maxsz) |
753 | #define NFS4_enc_layoutcommit_sz (compound_encode_hdr_maxsz + \ | ||
754 | encode_sequence_maxsz +\ | ||
755 | encode_putfh_maxsz + \ | ||
756 | encode_layoutcommit_maxsz + \ | ||
757 | encode_getattr_maxsz) | ||
758 | #define NFS4_dec_layoutcommit_sz (compound_decode_hdr_maxsz + \ | ||
759 | decode_sequence_maxsz + \ | ||
760 | decode_putfh_maxsz + \ | ||
761 | decode_layoutcommit_maxsz + \ | ||
762 | decode_getattr_maxsz) | ||
763 | |||
730 | 764 | ||
731 | const u32 nfs41_maxwrite_overhead = ((RPC_MAX_HEADER_WITH_AUTH + | 765 | const u32 nfs41_maxwrite_overhead = ((RPC_MAX_HEADER_WITH_AUTH + |
732 | compound_encode_hdr_maxsz + | 766 | compound_encode_hdr_maxsz + |
@@ -1620,6 +1654,18 @@ static void encode_delegreturn(struct xdr_stream *xdr, const nfs4_stateid *state | |||
1620 | hdr->replen += decode_delegreturn_maxsz; | 1654 | hdr->replen += decode_delegreturn_maxsz; |
1621 | } | 1655 | } |
1622 | 1656 | ||
1657 | static void encode_secinfo(struct xdr_stream *xdr, const struct qstr *name, struct compound_hdr *hdr) | ||
1658 | { | ||
1659 | int len = name->len; | ||
1660 | __be32 *p; | ||
1661 | |||
1662 | p = reserve_space(xdr, 8 + len); | ||
1663 | *p++ = cpu_to_be32(OP_SECINFO); | ||
1664 | xdr_encode_opaque(p, name->name, len); | ||
1665 | hdr->nops++; | ||
1666 | hdr->replen += decode_secinfo_maxsz; | ||
1667 | } | ||
1668 | |||
1623 | #if defined(CONFIG_NFS_V4_1) | 1669 | #if defined(CONFIG_NFS_V4_1) |
1624 | /* NFSv4.1 operations */ | 1670 | /* NFSv4.1 operations */ |
1625 | static void encode_exchange_id(struct xdr_stream *xdr, | 1671 | static void encode_exchange_id(struct xdr_stream *xdr, |
@@ -1816,6 +1862,34 @@ encode_layoutget(struct xdr_stream *xdr, | |||
1816 | hdr->nops++; | 1862 | hdr->nops++; |
1817 | hdr->replen += decode_layoutget_maxsz; | 1863 | hdr->replen += decode_layoutget_maxsz; |
1818 | } | 1864 | } |
1865 | |||
1866 | static int | ||
1867 | encode_layoutcommit(struct xdr_stream *xdr, | ||
1868 | const struct nfs4_layoutcommit_args *args, | ||
1869 | struct compound_hdr *hdr) | ||
1870 | { | ||
1871 | __be32 *p; | ||
1872 | |||
1873 | dprintk("%s: lbw: %llu type: %d\n", __func__, args->lastbytewritten, | ||
1874 | NFS_SERVER(args->inode)->pnfs_curr_ld->id); | ||
1875 | |||
1876 | p = reserve_space(xdr, 48 + NFS4_STATEID_SIZE); | ||
1877 | *p++ = cpu_to_be32(OP_LAYOUTCOMMIT); | ||
1878 | /* Only whole file layouts */ | ||
1879 | p = xdr_encode_hyper(p, 0); /* offset */ | ||
1880 | p = xdr_encode_hyper(p, NFS4_MAX_UINT64); /* length */ | ||
1881 | *p++ = cpu_to_be32(0); /* reclaim */ | ||
1882 | p = xdr_encode_opaque_fixed(p, args->stateid.data, NFS4_STATEID_SIZE); | ||
1883 | *p++ = cpu_to_be32(1); /* newoffset = TRUE */ | ||
1884 | p = xdr_encode_hyper(p, args->lastbytewritten); | ||
1885 | *p++ = cpu_to_be32(0); /* Never send time_modify_changed */ | ||
1886 | *p++ = cpu_to_be32(NFS_SERVER(args->inode)->pnfs_curr_ld->id);/* type */ | ||
1887 | *p++ = cpu_to_be32(0); /* no file layout payload */ | ||
1888 | |||
1889 | hdr->nops++; | ||
1890 | hdr->replen += decode_layoutcommit_maxsz; | ||
1891 | return 0; | ||
1892 | } | ||
1819 | #endif /* CONFIG_NFS_V4_1 */ | 1893 | #endif /* CONFIG_NFS_V4_1 */ |
1820 | 1894 | ||
1821 | /* | 1895 | /* |
@@ -2294,7 +2368,8 @@ static void nfs4_xdr_enc_commit(struct rpc_rqst *req, struct xdr_stream *xdr, | |||
2294 | encode_sequence(xdr, &args->seq_args, &hdr); | 2368 | encode_sequence(xdr, &args->seq_args, &hdr); |
2295 | encode_putfh(xdr, args->fh, &hdr); | 2369 | encode_putfh(xdr, args->fh, &hdr); |
2296 | encode_commit(xdr, args, &hdr); | 2370 | encode_commit(xdr, args, &hdr); |
2297 | encode_getfattr(xdr, args->bitmask, &hdr); | 2371 | if (args->bitmask) |
2372 | encode_getfattr(xdr, args->bitmask, &hdr); | ||
2298 | encode_nops(&hdr); | 2373 | encode_nops(&hdr); |
2299 | } | 2374 | } |
2300 | 2375 | ||
@@ -2465,6 +2540,24 @@ static void nfs4_xdr_enc_fs_locations(struct rpc_rqst *req, | |||
2465 | encode_nops(&hdr); | 2540 | encode_nops(&hdr); |
2466 | } | 2541 | } |
2467 | 2542 | ||
2543 | /* | ||
2544 | * Encode SECINFO request | ||
2545 | */ | ||
2546 | static void nfs4_xdr_enc_secinfo(struct rpc_rqst *req, | ||
2547 | struct xdr_stream *xdr, | ||
2548 | struct nfs4_secinfo_arg *args) | ||
2549 | { | ||
2550 | struct compound_hdr hdr = { | ||
2551 | .minorversion = nfs4_xdr_minorversion(&args->seq_args), | ||
2552 | }; | ||
2553 | |||
2554 | encode_compound_hdr(xdr, req, &hdr); | ||
2555 | encode_sequence(xdr, &args->seq_args, &hdr); | ||
2556 | encode_putfh(xdr, args->dir_fh, &hdr); | ||
2557 | encode_secinfo(xdr, args->name, &hdr); | ||
2558 | encode_nops(&hdr); | ||
2559 | } | ||
2560 | |||
2468 | #if defined(CONFIG_NFS_V4_1) | 2561 | #if defined(CONFIG_NFS_V4_1) |
2469 | /* | 2562 | /* |
2470 | * EXCHANGE_ID request | 2563 | * EXCHANGE_ID request |
@@ -2604,8 +2697,32 @@ static void nfs4_xdr_enc_layoutget(struct rpc_rqst *req, | |||
2604 | encode_sequence(xdr, &args->seq_args, &hdr); | 2697 | encode_sequence(xdr, &args->seq_args, &hdr); |
2605 | encode_putfh(xdr, NFS_FH(args->inode), &hdr); | 2698 | encode_putfh(xdr, NFS_FH(args->inode), &hdr); |
2606 | encode_layoutget(xdr, args, &hdr); | 2699 | encode_layoutget(xdr, args, &hdr); |
2700 | |||
2701 | xdr_inline_pages(&req->rq_rcv_buf, hdr.replen << 2, | ||
2702 | args->layout.pages, 0, args->layout.pglen); | ||
2703 | |||
2607 | encode_nops(&hdr); | 2704 | encode_nops(&hdr); |
2608 | } | 2705 | } |
2706 | |||
2707 | /* | ||
2708 | * Encode LAYOUTCOMMIT request | ||
2709 | */ | ||
2710 | static int nfs4_xdr_enc_layoutcommit(struct rpc_rqst *req, | ||
2711 | struct xdr_stream *xdr, | ||
2712 | struct nfs4_layoutcommit_args *args) | ||
2713 | { | ||
2714 | struct compound_hdr hdr = { | ||
2715 | .minorversion = nfs4_xdr_minorversion(&args->seq_args), | ||
2716 | }; | ||
2717 | |||
2718 | encode_compound_hdr(xdr, req, &hdr); | ||
2719 | encode_sequence(xdr, &args->seq_args, &hdr); | ||
2720 | encode_putfh(xdr, NFS_FH(args->inode), &hdr); | ||
2721 | encode_layoutcommit(xdr, args, &hdr); | ||
2722 | encode_getfattr(xdr, args->bitmask, &hdr); | ||
2723 | encode_nops(&hdr); | ||
2724 | return 0; | ||
2725 | } | ||
2609 | #endif /* CONFIG_NFS_V4_1 */ | 2726 | #endif /* CONFIG_NFS_V4_1 */ |
2610 | 2727 | ||
2611 | static void print_overflow_msg(const char *func, const struct xdr_stream *xdr) | 2728 | static void print_overflow_msg(const char *func, const struct xdr_stream *xdr) |
@@ -2925,6 +3042,7 @@ static int decode_attr_error(struct xdr_stream *xdr, uint32_t *bitmap) | |||
2925 | if (unlikely(!p)) | 3042 | if (unlikely(!p)) |
2926 | goto out_overflow; | 3043 | goto out_overflow; |
2927 | bitmap[0] &= ~FATTR4_WORD0_RDATTR_ERROR; | 3044 | bitmap[0] &= ~FATTR4_WORD0_RDATTR_ERROR; |
3045 | return -be32_to_cpup(p); | ||
2928 | } | 3046 | } |
2929 | return 0; | 3047 | return 0; |
2930 | out_overflow: | 3048 | out_overflow: |
@@ -3912,6 +4030,10 @@ static int decode_getfattr_attrs(struct xdr_stream *xdr, uint32_t *bitmap, | |||
3912 | fattr->valid |= status; | 4030 | fattr->valid |= status; |
3913 | 4031 | ||
3914 | status = decode_attr_error(xdr, bitmap); | 4032 | status = decode_attr_error(xdr, bitmap); |
4033 | if (status == -NFS4ERR_WRONGSEC) { | ||
4034 | nfs_fixup_secinfo_attributes(fattr, fh); | ||
4035 | status = 0; | ||
4036 | } | ||
3915 | if (status < 0) | 4037 | if (status < 0) |
3916 | goto xdr_error; | 4038 | goto xdr_error; |
3917 | 4039 | ||
@@ -4680,6 +4802,73 @@ static int decode_delegreturn(struct xdr_stream *xdr) | |||
4680 | return decode_op_hdr(xdr, OP_DELEGRETURN); | 4802 | return decode_op_hdr(xdr, OP_DELEGRETURN); |
4681 | } | 4803 | } |
4682 | 4804 | ||
4805 | static int decode_secinfo_gss(struct xdr_stream *xdr, struct nfs4_secinfo_flavor *flavor) | ||
4806 | { | ||
4807 | __be32 *p; | ||
4808 | |||
4809 | p = xdr_inline_decode(xdr, 4); | ||
4810 | if (unlikely(!p)) | ||
4811 | goto out_overflow; | ||
4812 | flavor->gss.sec_oid4.len = be32_to_cpup(p); | ||
4813 | if (flavor->gss.sec_oid4.len > GSS_OID_MAX_LEN) | ||
4814 | goto out_err; | ||
4815 | |||
4816 | p = xdr_inline_decode(xdr, flavor->gss.sec_oid4.len); | ||
4817 | if (unlikely(!p)) | ||
4818 | goto out_overflow; | ||
4819 | memcpy(flavor->gss.sec_oid4.data, p, flavor->gss.sec_oid4.len); | ||
4820 | |||
4821 | p = xdr_inline_decode(xdr, 8); | ||
4822 | if (unlikely(!p)) | ||
4823 | goto out_overflow; | ||
4824 | flavor->gss.qop4 = be32_to_cpup(p++); | ||
4825 | flavor->gss.service = be32_to_cpup(p); | ||
4826 | |||
4827 | return 0; | ||
4828 | |||
4829 | out_overflow: | ||
4830 | print_overflow_msg(__func__, xdr); | ||
4831 | return -EIO; | ||
4832 | out_err: | ||
4833 | return -EINVAL; | ||
4834 | } | ||
4835 | |||
4836 | static int decode_secinfo(struct xdr_stream *xdr, struct nfs4_secinfo_res *res) | ||
4837 | { | ||
4838 | struct nfs4_secinfo_flavor *sec_flavor; | ||
4839 | int status; | ||
4840 | __be32 *p; | ||
4841 | int i; | ||
4842 | |||
4843 | status = decode_op_hdr(xdr, OP_SECINFO); | ||
4844 | p = xdr_inline_decode(xdr, 4); | ||
4845 | if (unlikely(!p)) | ||
4846 | goto out_overflow; | ||
4847 | res->flavors->num_flavors = be32_to_cpup(p); | ||
4848 | |||
4849 | for (i = 0; i < res->flavors->num_flavors; i++) { | ||
4850 | sec_flavor = &res->flavors->flavors[i]; | ||
4851 | if ((char *)&sec_flavor[1] - (char *)res > PAGE_SIZE) | ||
4852 | break; | ||
4853 | |||
4854 | p = xdr_inline_decode(xdr, 4); | ||
4855 | if (unlikely(!p)) | ||
4856 | goto out_overflow; | ||
4857 | sec_flavor->flavor = be32_to_cpup(p); | ||
4858 | |||
4859 | if (sec_flavor->flavor == RPC_AUTH_GSS) { | ||
4860 | if (decode_secinfo_gss(xdr, sec_flavor)) | ||
4861 | break; | ||
4862 | } | ||
4863 | } | ||
4864 | |||
4865 | return 0; | ||
4866 | |||
4867 | out_overflow: | ||
4868 | print_overflow_msg(__func__, xdr); | ||
4869 | return -EIO; | ||
4870 | } | ||
4871 | |||
4683 | #if defined(CONFIG_NFS_V4_1) | 4872 | #if defined(CONFIG_NFS_V4_1) |
4684 | static int decode_exchange_id(struct xdr_stream *xdr, | 4873 | static int decode_exchange_id(struct xdr_stream *xdr, |
4685 | struct nfs41_exchange_id_res *res) | 4874 | struct nfs41_exchange_id_res *res) |
@@ -4950,6 +5139,9 @@ static int decode_layoutget(struct xdr_stream *xdr, struct rpc_rqst *req, | |||
4950 | __be32 *p; | 5139 | __be32 *p; |
4951 | int status; | 5140 | int status; |
4952 | u32 layout_count; | 5141 | u32 layout_count; |
5142 | struct xdr_buf *rcvbuf = &req->rq_rcv_buf; | ||
5143 | struct kvec *iov = rcvbuf->head; | ||
5144 | u32 hdrlen, recvd; | ||
4953 | 5145 | ||
4954 | status = decode_op_hdr(xdr, OP_LAYOUTGET); | 5146 | status = decode_op_hdr(xdr, OP_LAYOUTGET); |
4955 | if (status) | 5147 | if (status) |
@@ -4966,17 +5158,14 @@ static int decode_layoutget(struct xdr_stream *xdr, struct rpc_rqst *req, | |||
4966 | return -EINVAL; | 5158 | return -EINVAL; |
4967 | } | 5159 | } |
4968 | 5160 | ||
4969 | p = xdr_inline_decode(xdr, 24); | 5161 | p = xdr_inline_decode(xdr, 28); |
4970 | if (unlikely(!p)) | 5162 | if (unlikely(!p)) |
4971 | goto out_overflow; | 5163 | goto out_overflow; |
4972 | p = xdr_decode_hyper(p, &res->range.offset); | 5164 | p = xdr_decode_hyper(p, &res->range.offset); |
4973 | p = xdr_decode_hyper(p, &res->range.length); | 5165 | p = xdr_decode_hyper(p, &res->range.length); |
4974 | res->range.iomode = be32_to_cpup(p++); | 5166 | res->range.iomode = be32_to_cpup(p++); |
4975 | res->type = be32_to_cpup(p++); | 5167 | res->type = be32_to_cpup(p++); |
4976 | 5168 | res->layoutp->len = be32_to_cpup(p); | |
4977 | status = decode_opaque_inline(xdr, &res->layout.len, (char **)&p); | ||
4978 | if (unlikely(status)) | ||
4979 | return status; | ||
4980 | 5169 | ||
4981 | dprintk("%s roff:%lu rlen:%lu riomode:%d, lo_type:0x%x, lo.len:%d\n", | 5170 | dprintk("%s roff:%lu rlen:%lu riomode:%d, lo_type:0x%x, lo.len:%d\n", |
4982 | __func__, | 5171 | __func__, |
@@ -4984,12 +5173,18 @@ static int decode_layoutget(struct xdr_stream *xdr, struct rpc_rqst *req, | |||
4984 | (unsigned long)res->range.length, | 5173 | (unsigned long)res->range.length, |
4985 | res->range.iomode, | 5174 | res->range.iomode, |
4986 | res->type, | 5175 | res->type, |
4987 | res->layout.len); | 5176 | res->layoutp->len); |
4988 | 5177 | ||
4989 | /* nfs4_proc_layoutget allocated a single page */ | 5178 | hdrlen = (u8 *) xdr->p - (u8 *) iov->iov_base; |
4990 | if (res->layout.len > PAGE_SIZE) | 5179 | recvd = req->rq_rcv_buf.len - hdrlen; |
4991 | return -ENOMEM; | 5180 | if (res->layoutp->len > recvd) { |
4992 | memcpy(res->layout.buf, p, res->layout.len); | 5181 | dprintk("NFS: server cheating in layoutget reply: " |
5182 | "layout len %u > recvd %u\n", | ||
5183 | res->layoutp->len, recvd); | ||
5184 | return -EINVAL; | ||
5185 | } | ||
5186 | |||
5187 | xdr_read_pages(xdr, res->layoutp->len); | ||
4993 | 5188 | ||
4994 | if (layout_count > 1) { | 5189 | if (layout_count > 1) { |
4995 | /* We only handle a length one array at the moment. Any | 5190 | /* We only handle a length one array at the moment. Any |
@@ -5006,6 +5201,35 @@ out_overflow: | |||
5006 | print_overflow_msg(__func__, xdr); | 5201 | print_overflow_msg(__func__, xdr); |
5007 | return -EIO; | 5202 | return -EIO; |
5008 | } | 5203 | } |
5204 | |||
5205 | static int decode_layoutcommit(struct xdr_stream *xdr, | ||
5206 | struct rpc_rqst *req, | ||
5207 | struct nfs4_layoutcommit_res *res) | ||
5208 | { | ||
5209 | __be32 *p; | ||
5210 | __u32 sizechanged; | ||
5211 | int status; | ||
5212 | |||
5213 | status = decode_op_hdr(xdr, OP_LAYOUTCOMMIT); | ||
5214 | if (status) | ||
5215 | return status; | ||
5216 | |||
5217 | p = xdr_inline_decode(xdr, 4); | ||
5218 | if (unlikely(!p)) | ||
5219 | goto out_overflow; | ||
5220 | sizechanged = be32_to_cpup(p); | ||
5221 | |||
5222 | if (sizechanged) { | ||
5223 | /* throw away new size */ | ||
5224 | p = xdr_inline_decode(xdr, 8); | ||
5225 | if (unlikely(!p)) | ||
5226 | goto out_overflow; | ||
5227 | } | ||
5228 | return 0; | ||
5229 | out_overflow: | ||
5230 | print_overflow_msg(__func__, xdr); | ||
5231 | return -EIO; | ||
5232 | } | ||
5009 | #endif /* CONFIG_NFS_V4_1 */ | 5233 | #endif /* CONFIG_NFS_V4_1 */ |
5010 | 5234 | ||
5011 | /* | 5235 | /* |
@@ -5723,8 +5947,9 @@ static int nfs4_xdr_dec_commit(struct rpc_rqst *rqstp, struct xdr_stream *xdr, | |||
5723 | status = decode_commit(xdr, res); | 5947 | status = decode_commit(xdr, res); |
5724 | if (status) | 5948 | if (status) |
5725 | goto out; | 5949 | goto out; |
5726 | decode_getfattr(xdr, res->fattr, res->server, | 5950 | if (res->fattr) |
5727 | !RPC_IS_ASYNC(rqstp->rq_task)); | 5951 | decode_getfattr(xdr, res->fattr, res->server, |
5952 | !RPC_IS_ASYNC(rqstp->rq_task)); | ||
5728 | out: | 5953 | out: |
5729 | return status; | 5954 | return status; |
5730 | } | 5955 | } |
@@ -5919,6 +6144,32 @@ out: | |||
5919 | return status; | 6144 | return status; |
5920 | } | 6145 | } |
5921 | 6146 | ||
6147 | /* | ||
6148 | * Decode SECINFO response | ||
6149 | */ | ||
6150 | static int nfs4_xdr_dec_secinfo(struct rpc_rqst *rqstp, | ||
6151 | struct xdr_stream *xdr, | ||
6152 | struct nfs4_secinfo_res *res) | ||
6153 | { | ||
6154 | struct compound_hdr hdr; | ||
6155 | int status; | ||
6156 | |||
6157 | status = decode_compound_hdr(xdr, &hdr); | ||
6158 | if (status) | ||
6159 | goto out; | ||
6160 | status = decode_sequence(xdr, &res->seq_res, rqstp); | ||
6161 | if (status) | ||
6162 | goto out; | ||
6163 | status = decode_putfh(xdr); | ||
6164 | if (status) | ||
6165 | goto out; | ||
6166 | status = decode_secinfo(xdr, res); | ||
6167 | if (status) | ||
6168 | goto out; | ||
6169 | out: | ||
6170 | return status; | ||
6171 | } | ||
6172 | |||
5922 | #if defined(CONFIG_NFS_V4_1) | 6173 | #if defined(CONFIG_NFS_V4_1) |
5923 | /* | 6174 | /* |
5924 | * Decode EXCHANGE_ID response | 6175 | * Decode EXCHANGE_ID response |
@@ -6066,6 +6317,34 @@ static int nfs4_xdr_dec_layoutget(struct rpc_rqst *rqstp, | |||
6066 | out: | 6317 | out: |
6067 | return status; | 6318 | return status; |
6068 | } | 6319 | } |
6320 | |||
6321 | /* | ||
6322 | * Decode LAYOUTCOMMIT response | ||
6323 | */ | ||
6324 | static int nfs4_xdr_dec_layoutcommit(struct rpc_rqst *rqstp, | ||
6325 | struct xdr_stream *xdr, | ||
6326 | struct nfs4_layoutcommit_res *res) | ||
6327 | { | ||
6328 | struct compound_hdr hdr; | ||
6329 | int status; | ||
6330 | |||
6331 | status = decode_compound_hdr(xdr, &hdr); | ||
6332 | if (status) | ||
6333 | goto out; | ||
6334 | status = decode_sequence(xdr, &res->seq_res, rqstp); | ||
6335 | if (status) | ||
6336 | goto out; | ||
6337 | status = decode_putfh(xdr); | ||
6338 | if (status) | ||
6339 | goto out; | ||
6340 | status = decode_layoutcommit(xdr, rqstp, res); | ||
6341 | if (status) | ||
6342 | goto out; | ||
6343 | decode_getfattr(xdr, res->fattr, res->server, | ||
6344 | !RPC_IS_ASYNC(rqstp->rq_task)); | ||
6345 | out: | ||
6346 | return status; | ||
6347 | } | ||
6069 | #endif /* CONFIG_NFS_V4_1 */ | 6348 | #endif /* CONFIG_NFS_V4_1 */ |
6070 | 6349 | ||
6071 | /** | 6350 | /** |
@@ -6180,10 +6459,6 @@ static struct { | |||
6180 | { NFS4ERR_SYMLINK, -ELOOP }, | 6459 | { NFS4ERR_SYMLINK, -ELOOP }, |
6181 | { NFS4ERR_OP_ILLEGAL, -EOPNOTSUPP }, | 6460 | { NFS4ERR_OP_ILLEGAL, -EOPNOTSUPP }, |
6182 | { NFS4ERR_DEADLOCK, -EDEADLK }, | 6461 | { NFS4ERR_DEADLOCK, -EDEADLK }, |
6183 | { NFS4ERR_WRONGSEC, -EPERM }, /* FIXME: this needs | ||
6184 | * to be handled by a | ||
6185 | * middle-layer. | ||
6186 | */ | ||
6187 | { -1, -EIO } | 6462 | { -1, -EIO } |
6188 | }; | 6463 | }; |
6189 | 6464 | ||
@@ -6258,6 +6533,7 @@ struct rpc_procinfo nfs4_procedures[] = { | |||
6258 | PROC(SETACL, enc_setacl, dec_setacl), | 6533 | PROC(SETACL, enc_setacl, dec_setacl), |
6259 | PROC(FS_LOCATIONS, enc_fs_locations, dec_fs_locations), | 6534 | PROC(FS_LOCATIONS, enc_fs_locations, dec_fs_locations), |
6260 | PROC(RELEASE_LOCKOWNER, enc_release_lockowner, dec_release_lockowner), | 6535 | PROC(RELEASE_LOCKOWNER, enc_release_lockowner, dec_release_lockowner), |
6536 | PROC(SECINFO, enc_secinfo, dec_secinfo), | ||
6261 | #if defined(CONFIG_NFS_V4_1) | 6537 | #if defined(CONFIG_NFS_V4_1) |
6262 | PROC(EXCHANGE_ID, enc_exchange_id, dec_exchange_id), | 6538 | PROC(EXCHANGE_ID, enc_exchange_id, dec_exchange_id), |
6263 | PROC(CREATE_SESSION, enc_create_session, dec_create_session), | 6539 | PROC(CREATE_SESSION, enc_create_session, dec_create_session), |
@@ -6267,6 +6543,7 @@ struct rpc_procinfo nfs4_procedures[] = { | |||
6267 | PROC(RECLAIM_COMPLETE, enc_reclaim_complete, dec_reclaim_complete), | 6543 | PROC(RECLAIM_COMPLETE, enc_reclaim_complete, dec_reclaim_complete), |
6268 | PROC(GETDEVICEINFO, enc_getdeviceinfo, dec_getdeviceinfo), | 6544 | PROC(GETDEVICEINFO, enc_getdeviceinfo, dec_getdeviceinfo), |
6269 | PROC(LAYOUTGET, enc_layoutget, dec_layoutget), | 6545 | PROC(LAYOUTGET, enc_layoutget, dec_layoutget), |
6546 | PROC(LAYOUTCOMMIT, enc_layoutcommit, dec_layoutcommit), | ||
6270 | #endif /* CONFIG_NFS_V4_1 */ | 6547 | #endif /* CONFIG_NFS_V4_1 */ |
6271 | }; | 6548 | }; |
6272 | 6549 | ||
diff --git a/fs/nfs/pagelist.c b/fs/nfs/pagelist.c index 23e794410669..87a593c2b055 100644 --- a/fs/nfs/pagelist.c +++ b/fs/nfs/pagelist.c | |||
@@ -223,6 +223,7 @@ void nfs_pageio_init(struct nfs_pageio_descriptor *desc, | |||
223 | desc->pg_count = 0; | 223 | desc->pg_count = 0; |
224 | desc->pg_bsize = bsize; | 224 | desc->pg_bsize = bsize; |
225 | desc->pg_base = 0; | 225 | desc->pg_base = 0; |
226 | desc->pg_moreio = 0; | ||
226 | desc->pg_inode = inode; | 227 | desc->pg_inode = inode; |
227 | desc->pg_doio = doio; | 228 | desc->pg_doio = doio; |
228 | desc->pg_ioflags = io_flags; | 229 | desc->pg_ioflags = io_flags; |
@@ -335,9 +336,11 @@ int nfs_pageio_add_request(struct nfs_pageio_descriptor *desc, | |||
335 | struct nfs_page *req) | 336 | struct nfs_page *req) |
336 | { | 337 | { |
337 | while (!nfs_pageio_do_add_request(desc, req)) { | 338 | while (!nfs_pageio_do_add_request(desc, req)) { |
339 | desc->pg_moreio = 1; | ||
338 | nfs_pageio_doio(desc); | 340 | nfs_pageio_doio(desc); |
339 | if (desc->pg_error < 0) | 341 | if (desc->pg_error < 0) |
340 | return 0; | 342 | return 0; |
343 | desc->pg_moreio = 0; | ||
341 | } | 344 | } |
342 | return 1; | 345 | return 1; |
343 | } | 346 | } |
@@ -395,6 +398,7 @@ int nfs_scan_list(struct nfs_inode *nfsi, | |||
395 | pgoff_t idx_end; | 398 | pgoff_t idx_end; |
396 | int found, i; | 399 | int found, i; |
397 | int res; | 400 | int res; |
401 | struct list_head *list; | ||
398 | 402 | ||
399 | res = 0; | 403 | res = 0; |
400 | if (npages == 0) | 404 | if (npages == 0) |
@@ -415,10 +419,10 @@ int nfs_scan_list(struct nfs_inode *nfsi, | |||
415 | idx_start = req->wb_index + 1; | 419 | idx_start = req->wb_index + 1; |
416 | if (nfs_set_page_tag_locked(req)) { | 420 | if (nfs_set_page_tag_locked(req)) { |
417 | kref_get(&req->wb_kref); | 421 | kref_get(&req->wb_kref); |
418 | nfs_list_remove_request(req); | ||
419 | radix_tree_tag_clear(&nfsi->nfs_page_tree, | 422 | radix_tree_tag_clear(&nfsi->nfs_page_tree, |
420 | req->wb_index, tag); | 423 | req->wb_index, tag); |
421 | nfs_list_add_request(req, dst); | 424 | list = pnfs_choose_commit_list(req, dst); |
425 | nfs_list_add_request(req, list); | ||
422 | res++; | 426 | res++; |
423 | if (res == INT_MAX) | 427 | if (res == INT_MAX) |
424 | goto out; | 428 | goto out; |
diff --git a/fs/nfs/pnfs.c b/fs/nfs/pnfs.c index f38813a0a295..d9ab97269ce6 100644 --- a/fs/nfs/pnfs.c +++ b/fs/nfs/pnfs.c | |||
@@ -259,6 +259,7 @@ put_lseg(struct pnfs_layout_segment *lseg) | |||
259 | pnfs_free_lseg_list(&free_me); | 259 | pnfs_free_lseg_list(&free_me); |
260 | } | 260 | } |
261 | } | 261 | } |
262 | EXPORT_SYMBOL_GPL(put_lseg); | ||
262 | 263 | ||
263 | static bool | 264 | static bool |
264 | should_free_lseg(u32 lseg_iomode, u32 recall_iomode) | 265 | should_free_lseg(u32 lseg_iomode, u32 recall_iomode) |
@@ -471,6 +472,9 @@ send_layoutget(struct pnfs_layout_hdr *lo, | |||
471 | struct nfs_server *server = NFS_SERVER(ino); | 472 | struct nfs_server *server = NFS_SERVER(ino); |
472 | struct nfs4_layoutget *lgp; | 473 | struct nfs4_layoutget *lgp; |
473 | struct pnfs_layout_segment *lseg = NULL; | 474 | struct pnfs_layout_segment *lseg = NULL; |
475 | struct page **pages = NULL; | ||
476 | int i; | ||
477 | u32 max_resp_sz, max_pages; | ||
474 | 478 | ||
475 | dprintk("--> %s\n", __func__); | 479 | dprintk("--> %s\n", __func__); |
476 | 480 | ||
@@ -478,6 +482,21 @@ send_layoutget(struct pnfs_layout_hdr *lo, | |||
478 | lgp = kzalloc(sizeof(*lgp), GFP_KERNEL); | 482 | lgp = kzalloc(sizeof(*lgp), GFP_KERNEL); |
479 | if (lgp == NULL) | 483 | if (lgp == NULL) |
480 | return NULL; | 484 | return NULL; |
485 | |||
486 | /* allocate pages for xdr post processing */ | ||
487 | max_resp_sz = server->nfs_client->cl_session->fc_attrs.max_resp_sz; | ||
488 | max_pages = max_resp_sz >> PAGE_SHIFT; | ||
489 | |||
490 | pages = kzalloc(max_pages * sizeof(struct page *), GFP_KERNEL); | ||
491 | if (!pages) | ||
492 | goto out_err_free; | ||
493 | |||
494 | for (i = 0; i < max_pages; i++) { | ||
495 | pages[i] = alloc_page(GFP_KERNEL); | ||
496 | if (!pages[i]) | ||
497 | goto out_err_free; | ||
498 | } | ||
499 | |||
481 | lgp->args.minlength = NFS4_MAX_UINT64; | 500 | lgp->args.minlength = NFS4_MAX_UINT64; |
482 | lgp->args.maxcount = PNFS_LAYOUT_MAXSIZE; | 501 | lgp->args.maxcount = PNFS_LAYOUT_MAXSIZE; |
483 | lgp->args.range.iomode = iomode; | 502 | lgp->args.range.iomode = iomode; |
@@ -486,6 +505,8 @@ send_layoutget(struct pnfs_layout_hdr *lo, | |||
486 | lgp->args.type = server->pnfs_curr_ld->id; | 505 | lgp->args.type = server->pnfs_curr_ld->id; |
487 | lgp->args.inode = ino; | 506 | lgp->args.inode = ino; |
488 | lgp->args.ctx = get_nfs_open_context(ctx); | 507 | lgp->args.ctx = get_nfs_open_context(ctx); |
508 | lgp->args.layout.pages = pages; | ||
509 | lgp->args.layout.pglen = max_pages * PAGE_SIZE; | ||
489 | lgp->lsegpp = &lseg; | 510 | lgp->lsegpp = &lseg; |
490 | 511 | ||
491 | /* Synchronously retrieve layout information from server and | 512 | /* Synchronously retrieve layout information from server and |
@@ -496,7 +517,26 @@ send_layoutget(struct pnfs_layout_hdr *lo, | |||
496 | /* remember that LAYOUTGET failed and suspend trying */ | 517 | /* remember that LAYOUTGET failed and suspend trying */ |
497 | set_bit(lo_fail_bit(iomode), &lo->plh_flags); | 518 | set_bit(lo_fail_bit(iomode), &lo->plh_flags); |
498 | } | 519 | } |
520 | |||
521 | /* free xdr pages */ | ||
522 | for (i = 0; i < max_pages; i++) | ||
523 | __free_page(pages[i]); | ||
524 | kfree(pages); | ||
525 | |||
499 | return lseg; | 526 | return lseg; |
527 | |||
528 | out_err_free: | ||
529 | /* free any allocated xdr pages, lgp as it's not used */ | ||
530 | if (pages) { | ||
531 | for (i = 0; i < max_pages; i++) { | ||
532 | if (!pages[i]) | ||
533 | break; | ||
534 | __free_page(pages[i]); | ||
535 | } | ||
536 | kfree(pages); | ||
537 | } | ||
538 | kfree(lgp); | ||
539 | return NULL; | ||
500 | } | 540 | } |
501 | 541 | ||
502 | bool pnfs_roc(struct inode *ino) | 542 | bool pnfs_roc(struct inode *ino) |
@@ -945,3 +985,105 @@ pnfs_try_to_read_data(struct nfs_read_data *rdata, | |||
945 | dprintk("%s End (trypnfs:%d)\n", __func__, trypnfs); | 985 | dprintk("%s End (trypnfs:%d)\n", __func__, trypnfs); |
946 | return trypnfs; | 986 | return trypnfs; |
947 | } | 987 | } |
988 | |||
989 | /* | ||
990 | * Currently there is only one (whole file) write lseg. | ||
991 | */ | ||
992 | static struct pnfs_layout_segment *pnfs_list_write_lseg(struct inode *inode) | ||
993 | { | ||
994 | struct pnfs_layout_segment *lseg, *rv = NULL; | ||
995 | |||
996 | list_for_each_entry(lseg, &NFS_I(inode)->layout->plh_segs, pls_list) | ||
997 | if (lseg->pls_range.iomode == IOMODE_RW) | ||
998 | rv = lseg; | ||
999 | return rv; | ||
1000 | } | ||
1001 | |||
1002 | void | ||
1003 | pnfs_set_layoutcommit(struct nfs_write_data *wdata) | ||
1004 | { | ||
1005 | struct nfs_inode *nfsi = NFS_I(wdata->inode); | ||
1006 | loff_t end_pos = wdata->args.offset + wdata->res.count; | ||
1007 | |||
1008 | spin_lock(&nfsi->vfs_inode.i_lock); | ||
1009 | if (!test_and_set_bit(NFS_INO_LAYOUTCOMMIT, &nfsi->flags)) { | ||
1010 | /* references matched in nfs4_layoutcommit_release */ | ||
1011 | get_lseg(wdata->lseg); | ||
1012 | wdata->lseg->pls_lc_cred = | ||
1013 | get_rpccred(wdata->args.context->state->owner->so_cred); | ||
1014 | mark_inode_dirty_sync(wdata->inode); | ||
1015 | dprintk("%s: Set layoutcommit for inode %lu ", | ||
1016 | __func__, wdata->inode->i_ino); | ||
1017 | } | ||
1018 | if (end_pos > wdata->lseg->pls_end_pos) | ||
1019 | wdata->lseg->pls_end_pos = end_pos; | ||
1020 | spin_unlock(&nfsi->vfs_inode.i_lock); | ||
1021 | } | ||
1022 | EXPORT_SYMBOL_GPL(pnfs_set_layoutcommit); | ||
1023 | |||
1024 | /* | ||
1025 | * For the LAYOUT4_NFSV4_1_FILES layout type, NFS_DATA_SYNC WRITEs and | ||
1026 | * NFS_UNSTABLE WRITEs with a COMMIT to data servers must store enough | ||
1027 | * data to disk to allow the server to recover the data if it crashes. | ||
1028 | * LAYOUTCOMMIT is only needed when the NFL4_UFLG_COMMIT_THRU_MDS flag | ||
1029 | * is off, and a COMMIT is sent to a data server, or | ||
1030 | * if WRITEs to a data server return NFS_DATA_SYNC. | ||
1031 | */ | ||
1032 | int | ||
1033 | pnfs_layoutcommit_inode(struct inode *inode, bool sync) | ||
1034 | { | ||
1035 | struct nfs4_layoutcommit_data *data; | ||
1036 | struct nfs_inode *nfsi = NFS_I(inode); | ||
1037 | struct pnfs_layout_segment *lseg; | ||
1038 | struct rpc_cred *cred; | ||
1039 | loff_t end_pos; | ||
1040 | int status = 0; | ||
1041 | |||
1042 | dprintk("--> %s inode %lu\n", __func__, inode->i_ino); | ||
1043 | |||
1044 | if (!test_bit(NFS_INO_LAYOUTCOMMIT, &nfsi->flags)) | ||
1045 | return 0; | ||
1046 | |||
1047 | /* Note kzalloc ensures data->res.seq_res.sr_slot == NULL */ | ||
1048 | data = kzalloc(sizeof(*data), GFP_NOFS); | ||
1049 | if (!data) { | ||
1050 | mark_inode_dirty_sync(inode); | ||
1051 | status = -ENOMEM; | ||
1052 | goto out; | ||
1053 | } | ||
1054 | |||
1055 | spin_lock(&inode->i_lock); | ||
1056 | if (!test_and_clear_bit(NFS_INO_LAYOUTCOMMIT, &nfsi->flags)) { | ||
1057 | spin_unlock(&inode->i_lock); | ||
1058 | kfree(data); | ||
1059 | goto out; | ||
1060 | } | ||
1061 | /* | ||
1062 | * Currently only one (whole file) write lseg which is referenced | ||
1063 | * in pnfs_set_layoutcommit and will be found. | ||
1064 | */ | ||
1065 | lseg = pnfs_list_write_lseg(inode); | ||
1066 | |||
1067 | end_pos = lseg->pls_end_pos; | ||
1068 | cred = lseg->pls_lc_cred; | ||
1069 | lseg->pls_end_pos = 0; | ||
1070 | lseg->pls_lc_cred = NULL; | ||
1071 | |||
1072 | memcpy(&data->args.stateid.data, nfsi->layout->plh_stateid.data, | ||
1073 | sizeof(nfsi->layout->plh_stateid.data)); | ||
1074 | spin_unlock(&inode->i_lock); | ||
1075 | |||
1076 | data->args.inode = inode; | ||
1077 | data->lseg = lseg; | ||
1078 | data->cred = cred; | ||
1079 | nfs_fattr_init(&data->fattr); | ||
1080 | data->args.bitmask = NFS_SERVER(inode)->cache_consistency_bitmask; | ||
1081 | data->res.fattr = &data->fattr; | ||
1082 | data->args.lastbytewritten = end_pos - 1; | ||
1083 | data->res.server = NFS_SERVER(inode); | ||
1084 | |||
1085 | status = nfs4_proc_layoutcommit(data, sync); | ||
1086 | out: | ||
1087 | dprintk("<-- %s status %d\n", __func__, status); | ||
1088 | return status; | ||
1089 | } | ||
diff --git a/fs/nfs/pnfs.h b/fs/nfs/pnfs.h index 6380b9405bcd..bc4827202e7a 100644 --- a/fs/nfs/pnfs.h +++ b/fs/nfs/pnfs.h | |||
@@ -43,6 +43,8 @@ struct pnfs_layout_segment { | |||
43 | atomic_t pls_refcount; | 43 | atomic_t pls_refcount; |
44 | unsigned long pls_flags; | 44 | unsigned long pls_flags; |
45 | struct pnfs_layout_hdr *pls_layout; | 45 | struct pnfs_layout_hdr *pls_layout; |
46 | struct rpc_cred *pls_lc_cred; /* LAYOUTCOMMIT credential */ | ||
47 | loff_t pls_end_pos; /* LAYOUTCOMMIT write end */ | ||
46 | }; | 48 | }; |
47 | 49 | ||
48 | enum pnfs_try_status { | 50 | enum pnfs_try_status { |
@@ -74,6 +76,13 @@ struct pnfs_layoutdriver_type { | |||
74 | /* test for nfs page cache coalescing */ | 76 | /* test for nfs page cache coalescing */ |
75 | int (*pg_test)(struct nfs_pageio_descriptor *, struct nfs_page *, struct nfs_page *); | 77 | int (*pg_test)(struct nfs_pageio_descriptor *, struct nfs_page *, struct nfs_page *); |
76 | 78 | ||
79 | /* Returns true if layoutdriver wants to divert this request to | ||
80 | * driver's commit routine. | ||
81 | */ | ||
82 | bool (*mark_pnfs_commit)(struct pnfs_layout_segment *lseg); | ||
83 | struct list_head * (*choose_commit_list) (struct nfs_page *req); | ||
84 | int (*commit_pagelist)(struct inode *inode, struct list_head *mds_pages, int how); | ||
85 | |||
77 | /* | 86 | /* |
78 | * Return PNFS_ATTEMPTED to indicate the layout code has attempted | 87 | * Return PNFS_ATTEMPTED to indicate the layout code has attempted |
79 | * I/O, else return PNFS_NOT_ATTEMPTED to fall back to normal NFS | 88 | * I/O, else return PNFS_NOT_ATTEMPTED to fall back to normal NFS |
@@ -100,7 +109,6 @@ struct pnfs_device { | |||
100 | unsigned int layout_type; | 109 | unsigned int layout_type; |
101 | unsigned int mincount; | 110 | unsigned int mincount; |
102 | struct page **pages; | 111 | struct page **pages; |
103 | void *area; | ||
104 | unsigned int pgbase; | 112 | unsigned int pgbase; |
105 | unsigned int pglen; | 113 | unsigned int pglen; |
106 | }; | 114 | }; |
@@ -145,7 +153,8 @@ bool pnfs_roc(struct inode *ino); | |||
145 | void pnfs_roc_release(struct inode *ino); | 153 | void pnfs_roc_release(struct inode *ino); |
146 | void pnfs_roc_set_barrier(struct inode *ino, u32 barrier); | 154 | void pnfs_roc_set_barrier(struct inode *ino, u32 barrier); |
147 | bool pnfs_roc_drain(struct inode *ino, u32 *barrier); | 155 | bool pnfs_roc_drain(struct inode *ino, u32 *barrier); |
148 | 156 | void pnfs_set_layoutcommit(struct nfs_write_data *wdata); | |
157 | int pnfs_layoutcommit_inode(struct inode *inode, bool sync); | ||
149 | 158 | ||
150 | static inline int lo_fail_bit(u32 iomode) | 159 | static inline int lo_fail_bit(u32 iomode) |
151 | { | 160 | { |
@@ -169,6 +178,51 @@ static inline int pnfs_enabled_sb(struct nfs_server *nfss) | |||
169 | return nfss->pnfs_curr_ld != NULL; | 178 | return nfss->pnfs_curr_ld != NULL; |
170 | } | 179 | } |
171 | 180 | ||
181 | static inline void | ||
182 | pnfs_mark_request_commit(struct nfs_page *req, struct pnfs_layout_segment *lseg) | ||
183 | { | ||
184 | if (lseg) { | ||
185 | struct pnfs_layoutdriver_type *ld; | ||
186 | |||
187 | ld = NFS_SERVER(req->wb_page->mapping->host)->pnfs_curr_ld; | ||
188 | if (ld->mark_pnfs_commit && ld->mark_pnfs_commit(lseg)) { | ||
189 | set_bit(PG_PNFS_COMMIT, &req->wb_flags); | ||
190 | req->wb_commit_lseg = get_lseg(lseg); | ||
191 | } | ||
192 | } | ||
193 | } | ||
194 | |||
195 | static inline int | ||
196 | pnfs_commit_list(struct inode *inode, struct list_head *mds_pages, int how) | ||
197 | { | ||
198 | if (!test_and_clear_bit(NFS_INO_PNFS_COMMIT, &NFS_I(inode)->flags)) | ||
199 | return PNFS_NOT_ATTEMPTED; | ||
200 | return NFS_SERVER(inode)->pnfs_curr_ld->commit_pagelist(inode, mds_pages, how); | ||
201 | } | ||
202 | |||
203 | static inline struct list_head * | ||
204 | pnfs_choose_commit_list(struct nfs_page *req, struct list_head *mds) | ||
205 | { | ||
206 | struct list_head *rv; | ||
207 | |||
208 | if (test_and_clear_bit(PG_PNFS_COMMIT, &req->wb_flags)) { | ||
209 | struct inode *inode = req->wb_commit_lseg->pls_layout->plh_inode; | ||
210 | |||
211 | set_bit(NFS_INO_PNFS_COMMIT, &NFS_I(inode)->flags); | ||
212 | rv = NFS_SERVER(inode)->pnfs_curr_ld->choose_commit_list(req); | ||
213 | /* matched by ref taken when PG_PNFS_COMMIT is set */ | ||
214 | put_lseg(req->wb_commit_lseg); | ||
215 | } else | ||
216 | rv = mds; | ||
217 | return rv; | ||
218 | } | ||
219 | |||
220 | static inline void pnfs_clear_request_commit(struct nfs_page *req) | ||
221 | { | ||
222 | if (test_and_clear_bit(PG_PNFS_COMMIT, &req->wb_flags)) | ||
223 | put_lseg(req->wb_commit_lseg); | ||
224 | } | ||
225 | |||
172 | #else /* CONFIG_NFS_V4_1 */ | 226 | #else /* CONFIG_NFS_V4_1 */ |
173 | 227 | ||
174 | static inline void pnfs_destroy_all_layouts(struct nfs_client *clp) | 228 | static inline void pnfs_destroy_all_layouts(struct nfs_client *clp) |
@@ -252,6 +306,31 @@ pnfs_pageio_init_write(struct nfs_pageio_descriptor *pgio, struct inode *ino) | |||
252 | pgio->pg_test = NULL; | 306 | pgio->pg_test = NULL; |
253 | } | 307 | } |
254 | 308 | ||
309 | static inline void | ||
310 | pnfs_mark_request_commit(struct nfs_page *req, struct pnfs_layout_segment *lseg) | ||
311 | { | ||
312 | } | ||
313 | |||
314 | static inline int | ||
315 | pnfs_commit_list(struct inode *inode, struct list_head *mds_pages, int how) | ||
316 | { | ||
317 | return PNFS_NOT_ATTEMPTED; | ||
318 | } | ||
319 | |||
320 | static inline struct list_head * | ||
321 | pnfs_choose_commit_list(struct nfs_page *req, struct list_head *mds) | ||
322 | { | ||
323 | return mds; | ||
324 | } | ||
325 | |||
326 | static inline void pnfs_clear_request_commit(struct nfs_page *req) | ||
327 | { | ||
328 | } | ||
329 | |||
330 | static inline int pnfs_layoutcommit_inode(struct inode *inode, bool sync) | ||
331 | { | ||
332 | return 0; | ||
333 | } | ||
255 | #endif /* CONFIG_NFS_V4_1 */ | 334 | #endif /* CONFIG_NFS_V4_1 */ |
256 | 335 | ||
257 | #endif /* FS_NFS_PNFS_H */ | 336 | #endif /* FS_NFS_PNFS_H */ |
diff --git a/fs/nfs/proc.c b/fs/nfs/proc.c index b8ec170f2a0f..ac40b8535d7e 100644 --- a/fs/nfs/proc.c +++ b/fs/nfs/proc.c | |||
@@ -177,7 +177,7 @@ nfs_proc_setattr(struct dentry *dentry, struct nfs_fattr *fattr, | |||
177 | } | 177 | } |
178 | 178 | ||
179 | static int | 179 | static int |
180 | nfs_proc_lookup(struct inode *dir, struct qstr *name, | 180 | nfs_proc_lookup(struct rpc_clnt *clnt, struct inode *dir, struct qstr *name, |
181 | struct nfs_fh *fhandle, struct nfs_fattr *fattr) | 181 | struct nfs_fh *fhandle, struct nfs_fattr *fattr) |
182 | { | 182 | { |
183 | struct nfs_diropargs arg = { | 183 | struct nfs_diropargs arg = { |
diff --git a/fs/nfs/write.c b/fs/nfs/write.c index 47a3ad63e0d5..85d75254328e 100644 --- a/fs/nfs/write.c +++ b/fs/nfs/write.c | |||
@@ -59,6 +59,7 @@ struct nfs_write_data *nfs_commitdata_alloc(void) | |||
59 | } | 59 | } |
60 | return p; | 60 | return p; |
61 | } | 61 | } |
62 | EXPORT_SYMBOL_GPL(nfs_commitdata_alloc); | ||
62 | 63 | ||
63 | void nfs_commit_free(struct nfs_write_data *p) | 64 | void nfs_commit_free(struct nfs_write_data *p) |
64 | { | 65 | { |
@@ -66,6 +67,7 @@ void nfs_commit_free(struct nfs_write_data *p) | |||
66 | kfree(p->pagevec); | 67 | kfree(p->pagevec); |
67 | mempool_free(p, nfs_commit_mempool); | 68 | mempool_free(p, nfs_commit_mempool); |
68 | } | 69 | } |
70 | EXPORT_SYMBOL_GPL(nfs_commit_free); | ||
69 | 71 | ||
70 | struct nfs_write_data *nfs_writedata_alloc(unsigned int pagecount) | 72 | struct nfs_write_data *nfs_writedata_alloc(unsigned int pagecount) |
71 | { | 73 | { |
@@ -179,8 +181,8 @@ static int wb_priority(struct writeback_control *wbc) | |||
179 | if (wbc->for_reclaim) | 181 | if (wbc->for_reclaim) |
180 | return FLUSH_HIGHPRI | FLUSH_STABLE; | 182 | return FLUSH_HIGHPRI | FLUSH_STABLE; |
181 | if (wbc->for_kupdate || wbc->for_background) | 183 | if (wbc->for_kupdate || wbc->for_background) |
182 | return FLUSH_LOWPRI; | 184 | return FLUSH_LOWPRI | FLUSH_COND_STABLE; |
183 | return 0; | 185 | return FLUSH_COND_STABLE; |
184 | } | 186 | } |
185 | 187 | ||
186 | /* | 188 | /* |
@@ -441,7 +443,7 @@ nfs_mark_request_dirty(struct nfs_page *req) | |||
441 | * Add a request to the inode's commit list. | 443 | * Add a request to the inode's commit list. |
442 | */ | 444 | */ |
443 | static void | 445 | static void |
444 | nfs_mark_request_commit(struct nfs_page *req) | 446 | nfs_mark_request_commit(struct nfs_page *req, struct pnfs_layout_segment *lseg) |
445 | { | 447 | { |
446 | struct inode *inode = req->wb_context->path.dentry->d_inode; | 448 | struct inode *inode = req->wb_context->path.dentry->d_inode; |
447 | struct nfs_inode *nfsi = NFS_I(inode); | 449 | struct nfs_inode *nfsi = NFS_I(inode); |
@@ -453,6 +455,7 @@ nfs_mark_request_commit(struct nfs_page *req) | |||
453 | NFS_PAGE_TAG_COMMIT); | 455 | NFS_PAGE_TAG_COMMIT); |
454 | nfsi->ncommit++; | 456 | nfsi->ncommit++; |
455 | spin_unlock(&inode->i_lock); | 457 | spin_unlock(&inode->i_lock); |
458 | pnfs_mark_request_commit(req, lseg); | ||
456 | inc_zone_page_state(req->wb_page, NR_UNSTABLE_NFS); | 459 | inc_zone_page_state(req->wb_page, NR_UNSTABLE_NFS); |
457 | inc_bdi_stat(req->wb_page->mapping->backing_dev_info, BDI_RECLAIMABLE); | 460 | inc_bdi_stat(req->wb_page->mapping->backing_dev_info, BDI_RECLAIMABLE); |
458 | __mark_inode_dirty(inode, I_DIRTY_DATASYNC); | 461 | __mark_inode_dirty(inode, I_DIRTY_DATASYNC); |
@@ -474,14 +477,18 @@ nfs_clear_request_commit(struct nfs_page *req) | |||
474 | static inline | 477 | static inline |
475 | int nfs_write_need_commit(struct nfs_write_data *data) | 478 | int nfs_write_need_commit(struct nfs_write_data *data) |
476 | { | 479 | { |
477 | return data->verf.committed != NFS_FILE_SYNC; | 480 | if (data->verf.committed == NFS_DATA_SYNC) |
481 | return data->lseg == NULL; | ||
482 | else | ||
483 | return data->verf.committed != NFS_FILE_SYNC; | ||
478 | } | 484 | } |
479 | 485 | ||
480 | static inline | 486 | static inline |
481 | int nfs_reschedule_unstable_write(struct nfs_page *req) | 487 | int nfs_reschedule_unstable_write(struct nfs_page *req, |
488 | struct nfs_write_data *data) | ||
482 | { | 489 | { |
483 | if (test_and_clear_bit(PG_NEED_COMMIT, &req->wb_flags)) { | 490 | if (test_and_clear_bit(PG_NEED_COMMIT, &req->wb_flags)) { |
484 | nfs_mark_request_commit(req); | 491 | nfs_mark_request_commit(req, data->lseg); |
485 | return 1; | 492 | return 1; |
486 | } | 493 | } |
487 | if (test_and_clear_bit(PG_NEED_RESCHED, &req->wb_flags)) { | 494 | if (test_and_clear_bit(PG_NEED_RESCHED, &req->wb_flags)) { |
@@ -492,7 +499,7 @@ int nfs_reschedule_unstable_write(struct nfs_page *req) | |||
492 | } | 499 | } |
493 | #else | 500 | #else |
494 | static inline void | 501 | static inline void |
495 | nfs_mark_request_commit(struct nfs_page *req) | 502 | nfs_mark_request_commit(struct nfs_page *req, struct pnfs_layout_segment *lseg) |
496 | { | 503 | { |
497 | } | 504 | } |
498 | 505 | ||
@@ -509,7 +516,8 @@ int nfs_write_need_commit(struct nfs_write_data *data) | |||
509 | } | 516 | } |
510 | 517 | ||
511 | static inline | 518 | static inline |
512 | int nfs_reschedule_unstable_write(struct nfs_page *req) | 519 | int nfs_reschedule_unstable_write(struct nfs_page *req, |
520 | struct nfs_write_data *data) | ||
513 | { | 521 | { |
514 | return 0; | 522 | return 0; |
515 | } | 523 | } |
@@ -612,9 +620,11 @@ static struct nfs_page *nfs_try_to_update_request(struct inode *inode, | |||
612 | } | 620 | } |
613 | 621 | ||
614 | if (nfs_clear_request_commit(req) && | 622 | if (nfs_clear_request_commit(req) && |
615 | radix_tree_tag_clear(&NFS_I(inode)->nfs_page_tree, | 623 | radix_tree_tag_clear(&NFS_I(inode)->nfs_page_tree, |
616 | req->wb_index, NFS_PAGE_TAG_COMMIT) != NULL) | 624 | req->wb_index, NFS_PAGE_TAG_COMMIT) != NULL) { |
617 | NFS_I(inode)->ncommit--; | 625 | NFS_I(inode)->ncommit--; |
626 | pnfs_clear_request_commit(req); | ||
627 | } | ||
618 | 628 | ||
619 | /* Okay, the request matches. Update the region */ | 629 | /* Okay, the request matches. Update the region */ |
620 | if (offset < req->wb_offset) { | 630 | if (offset < req->wb_offset) { |
@@ -762,11 +772,12 @@ int nfs_updatepage(struct file *file, struct page *page, | |||
762 | return status; | 772 | return status; |
763 | } | 773 | } |
764 | 774 | ||
765 | static void nfs_writepage_release(struct nfs_page *req) | 775 | static void nfs_writepage_release(struct nfs_page *req, |
776 | struct nfs_write_data *data) | ||
766 | { | 777 | { |
767 | struct page *page = req->wb_page; | 778 | struct page *page = req->wb_page; |
768 | 779 | ||
769 | if (PageError(req->wb_page) || !nfs_reschedule_unstable_write(req)) | 780 | if (PageError(req->wb_page) || !nfs_reschedule_unstable_write(req, data)) |
770 | nfs_inode_remove_request(req); | 781 | nfs_inode_remove_request(req); |
771 | nfs_clear_page_tag_locked(req); | 782 | nfs_clear_page_tag_locked(req); |
772 | nfs_end_page_writeback(page); | 783 | nfs_end_page_writeback(page); |
@@ -863,7 +874,7 @@ static int nfs_write_rpcsetup(struct nfs_page *req, | |||
863 | data->args.context = get_nfs_open_context(req->wb_context); | 874 | data->args.context = get_nfs_open_context(req->wb_context); |
864 | data->args.lock_context = req->wb_lock_context; | 875 | data->args.lock_context = req->wb_lock_context; |
865 | data->args.stable = NFS_UNSTABLE; | 876 | data->args.stable = NFS_UNSTABLE; |
866 | if (how & FLUSH_STABLE) { | 877 | if (how & (FLUSH_STABLE | FLUSH_COND_STABLE)) { |
867 | data->args.stable = NFS_DATA_SYNC; | 878 | data->args.stable = NFS_DATA_SYNC; |
868 | if (!nfs_need_commit(NFS_I(inode))) | 879 | if (!nfs_need_commit(NFS_I(inode))) |
869 | data->args.stable = NFS_FILE_SYNC; | 880 | data->args.stable = NFS_FILE_SYNC; |
@@ -912,6 +923,12 @@ static int nfs_flush_multi(struct nfs_pageio_descriptor *desc) | |||
912 | 923 | ||
913 | nfs_list_remove_request(req); | 924 | nfs_list_remove_request(req); |
914 | 925 | ||
926 | if ((desc->pg_ioflags & FLUSH_COND_STABLE) && | ||
927 | (desc->pg_moreio || NFS_I(desc->pg_inode)->ncommit || | ||
928 | desc->pg_count > wsize)) | ||
929 | desc->pg_ioflags &= ~FLUSH_COND_STABLE; | ||
930 | |||
931 | |||
915 | nbytes = desc->pg_count; | 932 | nbytes = desc->pg_count; |
916 | do { | 933 | do { |
917 | size_t len = min(nbytes, wsize); | 934 | size_t len = min(nbytes, wsize); |
@@ -1002,6 +1019,10 @@ static int nfs_flush_one(struct nfs_pageio_descriptor *desc) | |||
1002 | if ((!lseg) && list_is_singular(&data->pages)) | 1019 | if ((!lseg) && list_is_singular(&data->pages)) |
1003 | lseg = pnfs_update_layout(desc->pg_inode, req->wb_context, IOMODE_RW); | 1020 | lseg = pnfs_update_layout(desc->pg_inode, req->wb_context, IOMODE_RW); |
1004 | 1021 | ||
1022 | if ((desc->pg_ioflags & FLUSH_COND_STABLE) && | ||
1023 | (desc->pg_moreio || NFS_I(desc->pg_inode)->ncommit)) | ||
1024 | desc->pg_ioflags &= ~FLUSH_COND_STABLE; | ||
1025 | |||
1005 | /* Set up the argument struct */ | 1026 | /* Set up the argument struct */ |
1006 | ret = nfs_write_rpcsetup(req, data, &nfs_write_full_ops, desc->pg_count, 0, lseg, desc->pg_ioflags); | 1027 | ret = nfs_write_rpcsetup(req, data, &nfs_write_full_ops, desc->pg_count, 0, lseg, desc->pg_ioflags); |
1007 | out: | 1028 | out: |
@@ -1074,7 +1095,7 @@ static void nfs_writeback_release_partial(void *calldata) | |||
1074 | 1095 | ||
1075 | out: | 1096 | out: |
1076 | if (atomic_dec_and_test(&req->wb_complete)) | 1097 | if (atomic_dec_and_test(&req->wb_complete)) |
1077 | nfs_writepage_release(req); | 1098 | nfs_writepage_release(req, data); |
1078 | nfs_writedata_release(calldata); | 1099 | nfs_writedata_release(calldata); |
1079 | } | 1100 | } |
1080 | 1101 | ||
@@ -1141,7 +1162,7 @@ static void nfs_writeback_release_full(void *calldata) | |||
1141 | 1162 | ||
1142 | if (nfs_write_need_commit(data)) { | 1163 | if (nfs_write_need_commit(data)) { |
1143 | memcpy(&req->wb_verf, &data->verf, sizeof(req->wb_verf)); | 1164 | memcpy(&req->wb_verf, &data->verf, sizeof(req->wb_verf)); |
1144 | nfs_mark_request_commit(req); | 1165 | nfs_mark_request_commit(req, data->lseg); |
1145 | dprintk(" marked for commit\n"); | 1166 | dprintk(" marked for commit\n"); |
1146 | goto next; | 1167 | goto next; |
1147 | } | 1168 | } |
@@ -1251,57 +1272,82 @@ void nfs_writeback_done(struct rpc_task *task, struct nfs_write_data *data) | |||
1251 | #if defined(CONFIG_NFS_V3) || defined(CONFIG_NFS_V4) | 1272 | #if defined(CONFIG_NFS_V3) || defined(CONFIG_NFS_V4) |
1252 | static int nfs_commit_set_lock(struct nfs_inode *nfsi, int may_wait) | 1273 | static int nfs_commit_set_lock(struct nfs_inode *nfsi, int may_wait) |
1253 | { | 1274 | { |
1275 | int ret; | ||
1276 | |||
1254 | if (!test_and_set_bit(NFS_INO_COMMIT, &nfsi->flags)) | 1277 | if (!test_and_set_bit(NFS_INO_COMMIT, &nfsi->flags)) |
1255 | return 1; | 1278 | return 1; |
1256 | if (may_wait && !out_of_line_wait_on_bit_lock(&nfsi->flags, | 1279 | if (!may_wait) |
1257 | NFS_INO_COMMIT, nfs_wait_bit_killable, | 1280 | return 0; |
1258 | TASK_KILLABLE)) | 1281 | ret = out_of_line_wait_on_bit_lock(&nfsi->flags, |
1259 | return 1; | 1282 | NFS_INO_COMMIT, |
1260 | return 0; | 1283 | nfs_wait_bit_killable, |
1284 | TASK_KILLABLE); | ||
1285 | return (ret < 0) ? ret : 1; | ||
1261 | } | 1286 | } |
1262 | 1287 | ||
1263 | static void nfs_commit_clear_lock(struct nfs_inode *nfsi) | 1288 | void nfs_commit_clear_lock(struct nfs_inode *nfsi) |
1264 | { | 1289 | { |
1265 | clear_bit(NFS_INO_COMMIT, &nfsi->flags); | 1290 | clear_bit(NFS_INO_COMMIT, &nfsi->flags); |
1266 | smp_mb__after_clear_bit(); | 1291 | smp_mb__after_clear_bit(); |
1267 | wake_up_bit(&nfsi->flags, NFS_INO_COMMIT); | 1292 | wake_up_bit(&nfsi->flags, NFS_INO_COMMIT); |
1268 | } | 1293 | } |
1294 | EXPORT_SYMBOL_GPL(nfs_commit_clear_lock); | ||
1269 | 1295 | ||
1270 | 1296 | void nfs_commitdata_release(void *data) | |
1271 | static void nfs_commitdata_release(void *data) | ||
1272 | { | 1297 | { |
1273 | struct nfs_write_data *wdata = data; | 1298 | struct nfs_write_data *wdata = data; |
1274 | 1299 | ||
1300 | put_lseg(wdata->lseg); | ||
1275 | put_nfs_open_context(wdata->args.context); | 1301 | put_nfs_open_context(wdata->args.context); |
1276 | nfs_commit_free(wdata); | 1302 | nfs_commit_free(wdata); |
1277 | } | 1303 | } |
1304 | EXPORT_SYMBOL_GPL(nfs_commitdata_release); | ||
1278 | 1305 | ||
1279 | /* | 1306 | int nfs_initiate_commit(struct nfs_write_data *data, struct rpc_clnt *clnt, |
1280 | * Set up the argument/result storage required for the RPC call. | 1307 | const struct rpc_call_ops *call_ops, |
1281 | */ | 1308 | int how) |
1282 | static int nfs_commit_rpcsetup(struct list_head *head, | ||
1283 | struct nfs_write_data *data, | ||
1284 | int how) | ||
1285 | { | 1309 | { |
1286 | struct nfs_page *first = nfs_list_entry(head->next); | ||
1287 | struct inode *inode = first->wb_context->path.dentry->d_inode; | ||
1288 | int priority = flush_task_priority(how); | ||
1289 | struct rpc_task *task; | 1310 | struct rpc_task *task; |
1311 | int priority = flush_task_priority(how); | ||
1290 | struct rpc_message msg = { | 1312 | struct rpc_message msg = { |
1291 | .rpc_argp = &data->args, | 1313 | .rpc_argp = &data->args, |
1292 | .rpc_resp = &data->res, | 1314 | .rpc_resp = &data->res, |
1293 | .rpc_cred = first->wb_context->cred, | 1315 | .rpc_cred = data->cred, |
1294 | }; | 1316 | }; |
1295 | struct rpc_task_setup task_setup_data = { | 1317 | struct rpc_task_setup task_setup_data = { |
1296 | .task = &data->task, | 1318 | .task = &data->task, |
1297 | .rpc_client = NFS_CLIENT(inode), | 1319 | .rpc_client = clnt, |
1298 | .rpc_message = &msg, | 1320 | .rpc_message = &msg, |
1299 | .callback_ops = &nfs_commit_ops, | 1321 | .callback_ops = call_ops, |
1300 | .callback_data = data, | 1322 | .callback_data = data, |
1301 | .workqueue = nfsiod_workqueue, | 1323 | .workqueue = nfsiod_workqueue, |
1302 | .flags = RPC_TASK_ASYNC, | 1324 | .flags = RPC_TASK_ASYNC, |
1303 | .priority = priority, | 1325 | .priority = priority, |
1304 | }; | 1326 | }; |
1327 | /* Set up the initial task struct. */ | ||
1328 | NFS_PROTO(data->inode)->commit_setup(data, &msg); | ||
1329 | |||
1330 | dprintk("NFS: %5u initiated commit call\n", data->task.tk_pid); | ||
1331 | |||
1332 | task = rpc_run_task(&task_setup_data); | ||
1333 | if (IS_ERR(task)) | ||
1334 | return PTR_ERR(task); | ||
1335 | if (how & FLUSH_SYNC) | ||
1336 | rpc_wait_for_completion_task(task); | ||
1337 | rpc_put_task(task); | ||
1338 | return 0; | ||
1339 | } | ||
1340 | EXPORT_SYMBOL_GPL(nfs_initiate_commit); | ||
1341 | |||
1342 | /* | ||
1343 | * Set up the argument/result storage required for the RPC call. | ||
1344 | */ | ||
1345 | void nfs_init_commit(struct nfs_write_data *data, | ||
1346 | struct list_head *head, | ||
1347 | struct pnfs_layout_segment *lseg) | ||
1348 | { | ||
1349 | struct nfs_page *first = nfs_list_entry(head->next); | ||
1350 | struct inode *inode = first->wb_context->path.dentry->d_inode; | ||
1305 | 1351 | ||
1306 | /* Set up the RPC argument and reply structs | 1352 | /* Set up the RPC argument and reply structs |
1307 | * NB: take care not to mess about with data->commit et al. */ | 1353 | * NB: take care not to mess about with data->commit et al. */ |
@@ -1309,7 +1355,9 @@ static int nfs_commit_rpcsetup(struct list_head *head, | |||
1309 | list_splice_init(head, &data->pages); | 1355 | list_splice_init(head, &data->pages); |
1310 | 1356 | ||
1311 | data->inode = inode; | 1357 | data->inode = inode; |
1312 | data->cred = msg.rpc_cred; | 1358 | data->cred = first->wb_context->cred; |
1359 | data->lseg = lseg; /* reference transferred */ | ||
1360 | data->mds_ops = &nfs_commit_ops; | ||
1313 | 1361 | ||
1314 | data->args.fh = NFS_FH(data->inode); | 1362 | data->args.fh = NFS_FH(data->inode); |
1315 | /* Note: we always request a commit of the entire inode */ | 1363 | /* Note: we always request a commit of the entire inode */ |
@@ -1320,20 +1368,25 @@ static int nfs_commit_rpcsetup(struct list_head *head, | |||
1320 | data->res.fattr = &data->fattr; | 1368 | data->res.fattr = &data->fattr; |
1321 | data->res.verf = &data->verf; | 1369 | data->res.verf = &data->verf; |
1322 | nfs_fattr_init(&data->fattr); | 1370 | nfs_fattr_init(&data->fattr); |
1371 | } | ||
1372 | EXPORT_SYMBOL_GPL(nfs_init_commit); | ||
1323 | 1373 | ||
1324 | /* Set up the initial task struct. */ | 1374 | void nfs_retry_commit(struct list_head *page_list, |
1325 | NFS_PROTO(inode)->commit_setup(data, &msg); | 1375 | struct pnfs_layout_segment *lseg) |
1326 | 1376 | { | |
1327 | dprintk("NFS: %5u initiated commit call\n", data->task.tk_pid); | 1377 | struct nfs_page *req; |
1328 | 1378 | ||
1329 | task = rpc_run_task(&task_setup_data); | 1379 | while (!list_empty(page_list)) { |
1330 | if (IS_ERR(task)) | 1380 | req = nfs_list_entry(page_list->next); |
1331 | return PTR_ERR(task); | 1381 | nfs_list_remove_request(req); |
1332 | if (how & FLUSH_SYNC) | 1382 | nfs_mark_request_commit(req, lseg); |
1333 | rpc_wait_for_completion_task(task); | 1383 | dec_zone_page_state(req->wb_page, NR_UNSTABLE_NFS); |
1334 | rpc_put_task(task); | 1384 | dec_bdi_stat(req->wb_page->mapping->backing_dev_info, |
1335 | return 0; | 1385 | BDI_RECLAIMABLE); |
1386 | nfs_clear_page_tag_locked(req); | ||
1387 | } | ||
1336 | } | 1388 | } |
1389 | EXPORT_SYMBOL_GPL(nfs_retry_commit); | ||
1337 | 1390 | ||
1338 | /* | 1391 | /* |
1339 | * Commit dirty pages | 1392 | * Commit dirty pages |
@@ -1342,7 +1395,6 @@ static int | |||
1342 | nfs_commit_list(struct inode *inode, struct list_head *head, int how) | 1395 | nfs_commit_list(struct inode *inode, struct list_head *head, int how) |
1343 | { | 1396 | { |
1344 | struct nfs_write_data *data; | 1397 | struct nfs_write_data *data; |
1345 | struct nfs_page *req; | ||
1346 | 1398 | ||
1347 | data = nfs_commitdata_alloc(); | 1399 | data = nfs_commitdata_alloc(); |
1348 | 1400 | ||
@@ -1350,17 +1402,10 @@ nfs_commit_list(struct inode *inode, struct list_head *head, int how) | |||
1350 | goto out_bad; | 1402 | goto out_bad; |
1351 | 1403 | ||
1352 | /* Set up the argument struct */ | 1404 | /* Set up the argument struct */ |
1353 | return nfs_commit_rpcsetup(head, data, how); | 1405 | nfs_init_commit(data, head, NULL); |
1406 | return nfs_initiate_commit(data, NFS_CLIENT(inode), data->mds_ops, how); | ||
1354 | out_bad: | 1407 | out_bad: |
1355 | while (!list_empty(head)) { | 1408 | nfs_retry_commit(head, NULL); |
1356 | req = nfs_list_entry(head->next); | ||
1357 | nfs_list_remove_request(req); | ||
1358 | nfs_mark_request_commit(req); | ||
1359 | dec_zone_page_state(req->wb_page, NR_UNSTABLE_NFS); | ||
1360 | dec_bdi_stat(req->wb_page->mapping->backing_dev_info, | ||
1361 | BDI_RECLAIMABLE); | ||
1362 | nfs_clear_page_tag_locked(req); | ||
1363 | } | ||
1364 | nfs_commit_clear_lock(NFS_I(inode)); | 1409 | nfs_commit_clear_lock(NFS_I(inode)); |
1365 | return -ENOMEM; | 1410 | return -ENOMEM; |
1366 | } | 1411 | } |
@@ -1380,10 +1425,9 @@ static void nfs_commit_done(struct rpc_task *task, void *calldata) | |||
1380 | return; | 1425 | return; |
1381 | } | 1426 | } |
1382 | 1427 | ||
1383 | static void nfs_commit_release(void *calldata) | 1428 | void nfs_commit_release_pages(struct nfs_write_data *data) |
1384 | { | 1429 | { |
1385 | struct nfs_write_data *data = calldata; | 1430 | struct nfs_page *req; |
1386 | struct nfs_page *req; | ||
1387 | int status = data->task.tk_status; | 1431 | int status = data->task.tk_status; |
1388 | 1432 | ||
1389 | while (!list_empty(&data->pages)) { | 1433 | while (!list_empty(&data->pages)) { |
@@ -1417,6 +1461,14 @@ static void nfs_commit_release(void *calldata) | |||
1417 | next: | 1461 | next: |
1418 | nfs_clear_page_tag_locked(req); | 1462 | nfs_clear_page_tag_locked(req); |
1419 | } | 1463 | } |
1464 | } | ||
1465 | EXPORT_SYMBOL_GPL(nfs_commit_release_pages); | ||
1466 | |||
1467 | static void nfs_commit_release(void *calldata) | ||
1468 | { | ||
1469 | struct nfs_write_data *data = calldata; | ||
1470 | |||
1471 | nfs_commit_release_pages(data); | ||
1420 | nfs_commit_clear_lock(NFS_I(data->inode)); | 1472 | nfs_commit_clear_lock(NFS_I(data->inode)); |
1421 | nfs_commitdata_release(calldata); | 1473 | nfs_commitdata_release(calldata); |
1422 | } | 1474 | } |
@@ -1433,23 +1485,30 @@ int nfs_commit_inode(struct inode *inode, int how) | |||
1433 | { | 1485 | { |
1434 | LIST_HEAD(head); | 1486 | LIST_HEAD(head); |
1435 | int may_wait = how & FLUSH_SYNC; | 1487 | int may_wait = how & FLUSH_SYNC; |
1436 | int res = 0; | 1488 | int res; |
1437 | 1489 | ||
1438 | if (!nfs_commit_set_lock(NFS_I(inode), may_wait)) | 1490 | res = nfs_commit_set_lock(NFS_I(inode), may_wait); |
1491 | if (res <= 0) | ||
1439 | goto out_mark_dirty; | 1492 | goto out_mark_dirty; |
1440 | spin_lock(&inode->i_lock); | 1493 | spin_lock(&inode->i_lock); |
1441 | res = nfs_scan_commit(inode, &head, 0, 0); | 1494 | res = nfs_scan_commit(inode, &head, 0, 0); |
1442 | spin_unlock(&inode->i_lock); | 1495 | spin_unlock(&inode->i_lock); |
1443 | if (res) { | 1496 | if (res) { |
1444 | int error = nfs_commit_list(inode, &head, how); | 1497 | int error; |
1498 | |||
1499 | error = pnfs_commit_list(inode, &head, how); | ||
1500 | if (error == PNFS_NOT_ATTEMPTED) | ||
1501 | error = nfs_commit_list(inode, &head, how); | ||
1445 | if (error < 0) | 1502 | if (error < 0) |
1446 | return error; | 1503 | return error; |
1447 | if (may_wait) | 1504 | if (!may_wait) |
1448 | wait_on_bit(&NFS_I(inode)->flags, NFS_INO_COMMIT, | ||
1449 | nfs_wait_bit_killable, | ||
1450 | TASK_KILLABLE); | ||
1451 | else | ||
1452 | goto out_mark_dirty; | 1505 | goto out_mark_dirty; |
1506 | error = wait_on_bit(&NFS_I(inode)->flags, | ||
1507 | NFS_INO_COMMIT, | ||
1508 | nfs_wait_bit_killable, | ||
1509 | TASK_KILLABLE); | ||
1510 | if (error < 0) | ||
1511 | return error; | ||
1453 | } else | 1512 | } else |
1454 | nfs_commit_clear_lock(NFS_I(inode)); | 1513 | nfs_commit_clear_lock(NFS_I(inode)); |
1455 | return res; | 1514 | return res; |
@@ -1503,7 +1562,22 @@ static int nfs_commit_unstable_pages(struct inode *inode, struct writeback_contr | |||
1503 | 1562 | ||
1504 | int nfs_write_inode(struct inode *inode, struct writeback_control *wbc) | 1563 | int nfs_write_inode(struct inode *inode, struct writeback_control *wbc) |
1505 | { | 1564 | { |
1506 | return nfs_commit_unstable_pages(inode, wbc); | 1565 | int ret; |
1566 | |||
1567 | ret = nfs_commit_unstable_pages(inode, wbc); | ||
1568 | if (ret >= 0 && test_bit(NFS_INO_LAYOUTCOMMIT, &NFS_I(inode)->flags)) { | ||
1569 | int status; | ||
1570 | bool sync = true; | ||
1571 | |||
1572 | if (wbc->sync_mode == WB_SYNC_NONE || wbc->nonblocking || | ||
1573 | wbc->for_background) | ||
1574 | sync = false; | ||
1575 | |||
1576 | status = pnfs_layoutcommit_inode(inode, sync); | ||
1577 | if (status < 0) | ||
1578 | return status; | ||
1579 | } | ||
1580 | return ret; | ||
1507 | } | 1581 | } |
1508 | 1582 | ||
1509 | /* | 1583 | /* |
diff --git a/fs/nfs_common/nfsacl.c b/fs/nfs_common/nfsacl.c index 84c27d69d421..ec0f277be7f5 100644 --- a/fs/nfs_common/nfsacl.c +++ b/fs/nfs_common/nfsacl.c | |||
@@ -117,7 +117,6 @@ int nfsacl_encode(struct xdr_buf *buf, unsigned int base, struct inode *inode, | |||
117 | * invoked in contexts where a memory allocation failure is | 117 | * invoked in contexts where a memory allocation failure is |
118 | * fatal. Fortunately this fake ACL is small enough to | 118 | * fatal. Fortunately this fake ACL is small enough to |
119 | * construct on the stack. */ | 119 | * construct on the stack. */ |
120 | memset(acl2, 0, sizeof(acl2)); | ||
121 | posix_acl_init(acl2, 4); | 120 | posix_acl_init(acl2, 4); |
122 | 121 | ||
123 | /* Insert entries in canonical order: other orders seem | 122 | /* Insert entries in canonical order: other orders seem |
diff --git a/fs/nfsd/export.c b/fs/nfsd/export.c index 8b31e5f8795d..ad000aeb21a2 100644 --- a/fs/nfsd/export.c +++ b/fs/nfsd/export.c | |||
@@ -299,7 +299,6 @@ svc_expkey_update(struct svc_expkey *new, struct svc_expkey *old) | |||
299 | 299 | ||
300 | #define EXPORT_HASHBITS 8 | 300 | #define EXPORT_HASHBITS 8 |
301 | #define EXPORT_HASHMAX (1<< EXPORT_HASHBITS) | 301 | #define EXPORT_HASHMAX (1<< EXPORT_HASHBITS) |
302 | #define EXPORT_HASHMASK (EXPORT_HASHMAX -1) | ||
303 | 302 | ||
304 | static struct cache_head *export_table[EXPORT_HASHMAX]; | 303 | static struct cache_head *export_table[EXPORT_HASHMAX]; |
305 | 304 | ||
diff --git a/fs/nfsd/nfs4idmap.c b/fs/nfsd/nfs4idmap.c index 6d2c397d458b..55780a22fdbd 100644 --- a/fs/nfsd/nfs4idmap.c +++ b/fs/nfsd/nfs4idmap.c | |||
@@ -63,7 +63,6 @@ struct ent { | |||
63 | 63 | ||
64 | #define ENT_HASHBITS 8 | 64 | #define ENT_HASHBITS 8 |
65 | #define ENT_HASHMAX (1 << ENT_HASHBITS) | 65 | #define ENT_HASHMAX (1 << ENT_HASHBITS) |
66 | #define ENT_HASHMASK (ENT_HASHMAX - 1) | ||
67 | 66 | ||
68 | static void | 67 | static void |
69 | ent_init(struct cache_head *cnew, struct cache_head *citm) | 68 | ent_init(struct cache_head *cnew, struct cache_head *citm) |
diff --git a/fs/nfsd/nfs4proc.c b/fs/nfsd/nfs4proc.c index db52546143d1..5fcb1396a7e3 100644 --- a/fs/nfsd/nfs4proc.c +++ b/fs/nfsd/nfs4proc.c | |||
@@ -984,8 +984,8 @@ typedef __be32(*nfsd4op_func)(struct svc_rqst *, struct nfsd4_compound_state *, | |||
984 | void *); | 984 | void *); |
985 | enum nfsd4_op_flags { | 985 | enum nfsd4_op_flags { |
986 | ALLOWED_WITHOUT_FH = 1 << 0, /* No current filehandle required */ | 986 | ALLOWED_WITHOUT_FH = 1 << 0, /* No current filehandle required */ |
987 | ALLOWED_ON_ABSENT_FS = 2 << 0, /* ops processed on absent fs */ | 987 | ALLOWED_ON_ABSENT_FS = 1 << 1, /* ops processed on absent fs */ |
988 | ALLOWED_AS_FIRST_OP = 3 << 0, /* ops reqired first in compound */ | 988 | ALLOWED_AS_FIRST_OP = 1 << 2, /* ops reqired first in compound */ |
989 | }; | 989 | }; |
990 | 990 | ||
991 | struct nfsd4_operation { | 991 | struct nfsd4_operation { |
diff --git a/fs/nfsd/nfs4state.c b/fs/nfsd/nfs4state.c index 7b566ec14e18..fbde6f79922e 100644 --- a/fs/nfsd/nfs4state.c +++ b/fs/nfsd/nfs4state.c | |||
@@ -148,7 +148,7 @@ static struct list_head ownerstr_hashtbl[OWNER_HASH_SIZE]; | |||
148 | /* hash table for nfs4_file */ | 148 | /* hash table for nfs4_file */ |
149 | #define FILE_HASH_BITS 8 | 149 | #define FILE_HASH_BITS 8 |
150 | #define FILE_HASH_SIZE (1 << FILE_HASH_BITS) | 150 | #define FILE_HASH_SIZE (1 << FILE_HASH_BITS) |
151 | #define FILE_HASH_MASK (FILE_HASH_SIZE - 1) | 151 | |
152 | /* hash table for (open)nfs4_stateid */ | 152 | /* hash table for (open)nfs4_stateid */ |
153 | #define STATEID_HASH_BITS 10 | 153 | #define STATEID_HASH_BITS 10 |
154 | #define STATEID_HASH_SIZE (1 << STATEID_HASH_BITS) | 154 | #define STATEID_HASH_SIZE (1 << STATEID_HASH_BITS) |
@@ -316,64 +316,6 @@ static struct list_head unconf_id_hashtbl[CLIENT_HASH_SIZE]; | |||
316 | static struct list_head client_lru; | 316 | static struct list_head client_lru; |
317 | static struct list_head close_lru; | 317 | static struct list_head close_lru; |
318 | 318 | ||
319 | static void unhash_generic_stateid(struct nfs4_stateid *stp) | ||
320 | { | ||
321 | list_del(&stp->st_hash); | ||
322 | list_del(&stp->st_perfile); | ||
323 | list_del(&stp->st_perstateowner); | ||
324 | } | ||
325 | |||
326 | static void free_generic_stateid(struct nfs4_stateid *stp) | ||
327 | { | ||
328 | put_nfs4_file(stp->st_file); | ||
329 | kmem_cache_free(stateid_slab, stp); | ||
330 | } | ||
331 | |||
332 | static void release_lock_stateid(struct nfs4_stateid *stp) | ||
333 | { | ||
334 | struct file *file; | ||
335 | |||
336 | unhash_generic_stateid(stp); | ||
337 | file = find_any_file(stp->st_file); | ||
338 | if (file) | ||
339 | locks_remove_posix(file, (fl_owner_t)stp->st_stateowner); | ||
340 | free_generic_stateid(stp); | ||
341 | } | ||
342 | |||
343 | static void unhash_lockowner(struct nfs4_stateowner *sop) | ||
344 | { | ||
345 | struct nfs4_stateid *stp; | ||
346 | |||
347 | list_del(&sop->so_idhash); | ||
348 | list_del(&sop->so_strhash); | ||
349 | list_del(&sop->so_perstateid); | ||
350 | while (!list_empty(&sop->so_stateids)) { | ||
351 | stp = list_first_entry(&sop->so_stateids, | ||
352 | struct nfs4_stateid, st_perstateowner); | ||
353 | release_lock_stateid(stp); | ||
354 | } | ||
355 | } | ||
356 | |||
357 | static void release_lockowner(struct nfs4_stateowner *sop) | ||
358 | { | ||
359 | unhash_lockowner(sop); | ||
360 | nfs4_put_stateowner(sop); | ||
361 | } | ||
362 | |||
363 | static void | ||
364 | release_stateid_lockowners(struct nfs4_stateid *open_stp) | ||
365 | { | ||
366 | struct nfs4_stateowner *lock_sop; | ||
367 | |||
368 | while (!list_empty(&open_stp->st_lockowners)) { | ||
369 | lock_sop = list_entry(open_stp->st_lockowners.next, | ||
370 | struct nfs4_stateowner, so_perstateid); | ||
371 | /* list_del(&open_stp->st_lockowners); */ | ||
372 | BUG_ON(lock_sop->so_is_open_owner); | ||
373 | release_lockowner(lock_sop); | ||
374 | } | ||
375 | } | ||
376 | |||
377 | /* | 319 | /* |
378 | * We store the NONE, READ, WRITE, and BOTH bits separately in the | 320 | * We store the NONE, READ, WRITE, and BOTH bits separately in the |
379 | * st_{access,deny}_bmap field of the stateid, in order to track not | 321 | * st_{access,deny}_bmap field of the stateid, in order to track not |
@@ -446,13 +388,71 @@ static int nfs4_access_bmap_to_omode(struct nfs4_stateid *stp) | |||
446 | return nfs4_access_to_omode(access); | 388 | return nfs4_access_to_omode(access); |
447 | } | 389 | } |
448 | 390 | ||
449 | static void release_open_stateid(struct nfs4_stateid *stp) | 391 | static void unhash_generic_stateid(struct nfs4_stateid *stp) |
392 | { | ||
393 | list_del(&stp->st_hash); | ||
394 | list_del(&stp->st_perfile); | ||
395 | list_del(&stp->st_perstateowner); | ||
396 | } | ||
397 | |||
398 | static void free_generic_stateid(struct nfs4_stateid *stp) | ||
450 | { | 399 | { |
451 | int oflag = nfs4_access_bmap_to_omode(stp); | 400 | int oflag = nfs4_access_bmap_to_omode(stp); |
452 | 401 | ||
402 | nfs4_file_put_access(stp->st_file, oflag); | ||
403 | put_nfs4_file(stp->st_file); | ||
404 | kmem_cache_free(stateid_slab, stp); | ||
405 | } | ||
406 | |||
407 | static void release_lock_stateid(struct nfs4_stateid *stp) | ||
408 | { | ||
409 | struct file *file; | ||
410 | |||
411 | unhash_generic_stateid(stp); | ||
412 | file = find_any_file(stp->st_file); | ||
413 | if (file) | ||
414 | locks_remove_posix(file, (fl_owner_t)stp->st_stateowner); | ||
415 | free_generic_stateid(stp); | ||
416 | } | ||
417 | |||
418 | static void unhash_lockowner(struct nfs4_stateowner *sop) | ||
419 | { | ||
420 | struct nfs4_stateid *stp; | ||
421 | |||
422 | list_del(&sop->so_idhash); | ||
423 | list_del(&sop->so_strhash); | ||
424 | list_del(&sop->so_perstateid); | ||
425 | while (!list_empty(&sop->so_stateids)) { | ||
426 | stp = list_first_entry(&sop->so_stateids, | ||
427 | struct nfs4_stateid, st_perstateowner); | ||
428 | release_lock_stateid(stp); | ||
429 | } | ||
430 | } | ||
431 | |||
432 | static void release_lockowner(struct nfs4_stateowner *sop) | ||
433 | { | ||
434 | unhash_lockowner(sop); | ||
435 | nfs4_put_stateowner(sop); | ||
436 | } | ||
437 | |||
438 | static void | ||
439 | release_stateid_lockowners(struct nfs4_stateid *open_stp) | ||
440 | { | ||
441 | struct nfs4_stateowner *lock_sop; | ||
442 | |||
443 | while (!list_empty(&open_stp->st_lockowners)) { | ||
444 | lock_sop = list_entry(open_stp->st_lockowners.next, | ||
445 | struct nfs4_stateowner, so_perstateid); | ||
446 | /* list_del(&open_stp->st_lockowners); */ | ||
447 | BUG_ON(lock_sop->so_is_open_owner); | ||
448 | release_lockowner(lock_sop); | ||
449 | } | ||
450 | } | ||
451 | |||
452 | static void release_open_stateid(struct nfs4_stateid *stp) | ||
453 | { | ||
453 | unhash_generic_stateid(stp); | 454 | unhash_generic_stateid(stp); |
454 | release_stateid_lockowners(stp); | 455 | release_stateid_lockowners(stp); |
455 | nfs4_file_put_access(stp->st_file, oflag); | ||
456 | free_generic_stateid(stp); | 456 | free_generic_stateid(stp); |
457 | } | 457 | } |
458 | 458 | ||
@@ -608,7 +608,8 @@ static void init_forechannel_attrs(struct nfsd4_channel_attrs *new, struct nfsd4 | |||
608 | u32 maxrpc = nfsd_serv->sv_max_mesg; | 608 | u32 maxrpc = nfsd_serv->sv_max_mesg; |
609 | 609 | ||
610 | new->maxreqs = numslots; | 610 | new->maxreqs = numslots; |
611 | new->maxresp_cached = slotsize + NFSD_MIN_HDR_SEQ_SZ; | 611 | new->maxresp_cached = min_t(u32, req->maxresp_cached, |
612 | slotsize + NFSD_MIN_HDR_SEQ_SZ); | ||
612 | new->maxreq_sz = min_t(u32, req->maxreq_sz, maxrpc); | 613 | new->maxreq_sz = min_t(u32, req->maxreq_sz, maxrpc); |
613 | new->maxresp_sz = min_t(u32, req->maxresp_sz, maxrpc); | 614 | new->maxresp_sz = min_t(u32, req->maxresp_sz, maxrpc); |
614 | new->maxops = min_t(u32, req->maxops, NFSD_MAX_OPS_PER_COMPOUND); | 615 | new->maxops = min_t(u32, req->maxops, NFSD_MAX_OPS_PER_COMPOUND); |
@@ -3735,6 +3736,7 @@ alloc_init_lock_stateid(struct nfs4_stateowner *sop, struct nfs4_file *fp, struc | |||
3735 | stp->st_stateid.si_stateownerid = sop->so_id; | 3736 | stp->st_stateid.si_stateownerid = sop->so_id; |
3736 | stp->st_stateid.si_fileid = fp->fi_id; | 3737 | stp->st_stateid.si_fileid = fp->fi_id; |
3737 | stp->st_stateid.si_generation = 0; | 3738 | stp->st_stateid.si_generation = 0; |
3739 | stp->st_access_bmap = 0; | ||
3738 | stp->st_deny_bmap = open_stp->st_deny_bmap; | 3740 | stp->st_deny_bmap = open_stp->st_deny_bmap; |
3739 | stp->st_openstp = open_stp; | 3741 | stp->st_openstp = open_stp; |
3740 | 3742 | ||
@@ -3749,6 +3751,17 @@ check_lock_length(u64 offset, u64 length) | |||
3749 | LOFF_OVERFLOW(offset, length))); | 3751 | LOFF_OVERFLOW(offset, length))); |
3750 | } | 3752 | } |
3751 | 3753 | ||
3754 | static void get_lock_access(struct nfs4_stateid *lock_stp, u32 access) | ||
3755 | { | ||
3756 | struct nfs4_file *fp = lock_stp->st_file; | ||
3757 | int oflag = nfs4_access_to_omode(access); | ||
3758 | |||
3759 | if (test_bit(access, &lock_stp->st_access_bmap)) | ||
3760 | return; | ||
3761 | nfs4_file_get_access(fp, oflag); | ||
3762 | __set_bit(access, &lock_stp->st_access_bmap); | ||
3763 | } | ||
3764 | |||
3752 | /* | 3765 | /* |
3753 | * LOCK operation | 3766 | * LOCK operation |
3754 | */ | 3767 | */ |
@@ -3765,7 +3778,6 @@ nfsd4_lock(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate, | |||
3765 | struct file_lock conflock; | 3778 | struct file_lock conflock; |
3766 | __be32 status = 0; | 3779 | __be32 status = 0; |
3767 | unsigned int strhashval; | 3780 | unsigned int strhashval; |
3768 | unsigned int cmd; | ||
3769 | int err; | 3781 | int err; |
3770 | 3782 | ||
3771 | dprintk("NFSD: nfsd4_lock: start=%Ld length=%Ld\n", | 3783 | dprintk("NFSD: nfsd4_lock: start=%Ld length=%Ld\n", |
@@ -3847,22 +3859,18 @@ nfsd4_lock(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate, | |||
3847 | switch (lock->lk_type) { | 3859 | switch (lock->lk_type) { |
3848 | case NFS4_READ_LT: | 3860 | case NFS4_READ_LT: |
3849 | case NFS4_READW_LT: | 3861 | case NFS4_READW_LT: |
3850 | if (find_readable_file(lock_stp->st_file)) { | 3862 | filp = find_readable_file(lock_stp->st_file); |
3851 | nfs4_get_vfs_file(rqstp, fp, &cstate->current_fh, NFS4_SHARE_ACCESS_READ); | 3863 | if (filp) |
3852 | filp = find_readable_file(lock_stp->st_file); | 3864 | get_lock_access(lock_stp, NFS4_SHARE_ACCESS_READ); |
3853 | } | ||
3854 | file_lock.fl_type = F_RDLCK; | 3865 | file_lock.fl_type = F_RDLCK; |
3855 | cmd = F_SETLK; | 3866 | break; |
3856 | break; | ||
3857 | case NFS4_WRITE_LT: | 3867 | case NFS4_WRITE_LT: |
3858 | case NFS4_WRITEW_LT: | 3868 | case NFS4_WRITEW_LT: |
3859 | if (find_writeable_file(lock_stp->st_file)) { | 3869 | filp = find_writeable_file(lock_stp->st_file); |
3860 | nfs4_get_vfs_file(rqstp, fp, &cstate->current_fh, NFS4_SHARE_ACCESS_WRITE); | 3870 | if (filp) |
3861 | filp = find_writeable_file(lock_stp->st_file); | 3871 | get_lock_access(lock_stp, NFS4_SHARE_ACCESS_WRITE); |
3862 | } | ||
3863 | file_lock.fl_type = F_WRLCK; | 3872 | file_lock.fl_type = F_WRLCK; |
3864 | cmd = F_SETLK; | 3873 | break; |
3865 | break; | ||
3866 | default: | 3874 | default: |
3867 | status = nfserr_inval; | 3875 | status = nfserr_inval; |
3868 | goto out; | 3876 | goto out; |
@@ -3886,7 +3894,7 @@ nfsd4_lock(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate, | |||
3886 | * Note: locks.c uses the BKL to protect the inode's lock list. | 3894 | * Note: locks.c uses the BKL to protect the inode's lock list. |
3887 | */ | 3895 | */ |
3888 | 3896 | ||
3889 | err = vfs_lock_file(filp, cmd, &file_lock, &conflock); | 3897 | err = vfs_lock_file(filp, F_SETLK, &file_lock, &conflock); |
3890 | switch (-err) { | 3898 | switch (-err) { |
3891 | case 0: /* success! */ | 3899 | case 0: /* success! */ |
3892 | update_stateid(&lock_stp->st_stateid); | 3900 | update_stateid(&lock_stp->st_stateid); |
diff --git a/fs/nfsd/nfs4xdr.c b/fs/nfsd/nfs4xdr.c index 615f0a9f0600..c6766af00d98 100644 --- a/fs/nfsd/nfs4xdr.c +++ b/fs/nfsd/nfs4xdr.c | |||
@@ -1142,7 +1142,7 @@ nfsd4_decode_create_session(struct nfsd4_compoundargs *argp, | |||
1142 | 1142 | ||
1143 | u32 dummy; | 1143 | u32 dummy; |
1144 | char *machine_name; | 1144 | char *machine_name; |
1145 | int i, j; | 1145 | int i; |
1146 | int nr_secflavs; | 1146 | int nr_secflavs; |
1147 | 1147 | ||
1148 | READ_BUF(16); | 1148 | READ_BUF(16); |
@@ -1215,8 +1215,6 @@ nfsd4_decode_create_session(struct nfsd4_compoundargs *argp, | |||
1215 | READ_BUF(4); | 1215 | READ_BUF(4); |
1216 | READ32(dummy); | 1216 | READ32(dummy); |
1217 | READ_BUF(dummy * 4); | 1217 | READ_BUF(dummy * 4); |
1218 | for (j = 0; j < dummy; ++j) | ||
1219 | READ32(dummy); | ||
1220 | break; | 1218 | break; |
1221 | case RPC_AUTH_GSS: | 1219 | case RPC_AUTH_GSS: |
1222 | dprintk("RPC_AUTH_GSS callback secflavor " | 1220 | dprintk("RPC_AUTH_GSS callback secflavor " |
@@ -1232,7 +1230,6 @@ nfsd4_decode_create_session(struct nfsd4_compoundargs *argp, | |||
1232 | READ_BUF(4); | 1230 | READ_BUF(4); |
1233 | READ32(dummy); | 1231 | READ32(dummy); |
1234 | READ_BUF(dummy); | 1232 | READ_BUF(dummy); |
1235 | p += XDR_QUADLEN(dummy); | ||
1236 | break; | 1233 | break; |
1237 | default: | 1234 | default: |
1238 | dprintk("Illegal callback secflavor\n"); | 1235 | dprintk("Illegal callback secflavor\n"); |
diff --git a/fs/nfsd/nfsctl.c b/fs/nfsd/nfsctl.c index 33b3e2b06779..1f5eae40f34e 100644 --- a/fs/nfsd/nfsctl.c +++ b/fs/nfsd/nfsctl.c | |||
@@ -12,13 +12,14 @@ | |||
12 | #include <linux/nfsd/syscall.h> | 12 | #include <linux/nfsd/syscall.h> |
13 | #include <linux/lockd/lockd.h> | 13 | #include <linux/lockd/lockd.h> |
14 | #include <linux/sunrpc/clnt.h> | 14 | #include <linux/sunrpc/clnt.h> |
15 | #include <linux/sunrpc/gss_api.h> | ||
15 | 16 | ||
16 | #include "idmap.h" | 17 | #include "idmap.h" |
17 | #include "nfsd.h" | 18 | #include "nfsd.h" |
18 | #include "cache.h" | 19 | #include "cache.h" |
19 | 20 | ||
20 | /* | 21 | /* |
21 | * We have a single directory with 9 nodes in it. | 22 | * We have a single directory with several nodes in it. |
22 | */ | 23 | */ |
23 | enum { | 24 | enum { |
24 | NFSD_Root = 1, | 25 | NFSD_Root = 1, |
@@ -42,6 +43,7 @@ enum { | |||
42 | NFSD_Versions, | 43 | NFSD_Versions, |
43 | NFSD_Ports, | 44 | NFSD_Ports, |
44 | NFSD_MaxBlkSize, | 45 | NFSD_MaxBlkSize, |
46 | NFSD_SupportedEnctypes, | ||
45 | /* | 47 | /* |
46 | * The below MUST come last. Otherwise we leave a hole in nfsd_files[] | 48 | * The below MUST come last. Otherwise we leave a hole in nfsd_files[] |
47 | * with !CONFIG_NFSD_V4 and simple_fill_super() goes oops | 49 | * with !CONFIG_NFSD_V4 and simple_fill_super() goes oops |
@@ -187,6 +189,34 @@ static struct file_operations export_features_operations = { | |||
187 | .release = single_release, | 189 | .release = single_release, |
188 | }; | 190 | }; |
189 | 191 | ||
192 | #ifdef CONFIG_SUNRPC_GSS | ||
193 | static int supported_enctypes_show(struct seq_file *m, void *v) | ||
194 | { | ||
195 | struct gss_api_mech *k5mech; | ||
196 | |||
197 | k5mech = gss_mech_get_by_name("krb5"); | ||
198 | if (k5mech == NULL) | ||
199 | goto out; | ||
200 | if (k5mech->gm_upcall_enctypes != NULL) | ||
201 | seq_printf(m, k5mech->gm_upcall_enctypes); | ||
202 | gss_mech_put(k5mech); | ||
203 | out: | ||
204 | return 0; | ||
205 | } | ||
206 | |||
207 | static int supported_enctypes_open(struct inode *inode, struct file *file) | ||
208 | { | ||
209 | return single_open(file, supported_enctypes_show, NULL); | ||
210 | } | ||
211 | |||
212 | static struct file_operations supported_enctypes_ops = { | ||
213 | .open = supported_enctypes_open, | ||
214 | .read = seq_read, | ||
215 | .llseek = seq_lseek, | ||
216 | .release = single_release, | ||
217 | }; | ||
218 | #endif /* CONFIG_SUNRPC_GSS */ | ||
219 | |||
190 | extern int nfsd_pool_stats_open(struct inode *inode, struct file *file); | 220 | extern int nfsd_pool_stats_open(struct inode *inode, struct file *file); |
191 | extern int nfsd_pool_stats_release(struct inode *inode, struct file *file); | 221 | extern int nfsd_pool_stats_release(struct inode *inode, struct file *file); |
192 | 222 | ||
@@ -1397,6 +1427,9 @@ static int nfsd_fill_super(struct super_block * sb, void * data, int silent) | |||
1397 | [NFSD_Versions] = {"versions", &transaction_ops, S_IWUSR|S_IRUSR}, | 1427 | [NFSD_Versions] = {"versions", &transaction_ops, S_IWUSR|S_IRUSR}, |
1398 | [NFSD_Ports] = {"portlist", &transaction_ops, S_IWUSR|S_IRUGO}, | 1428 | [NFSD_Ports] = {"portlist", &transaction_ops, S_IWUSR|S_IRUGO}, |
1399 | [NFSD_MaxBlkSize] = {"max_block_size", &transaction_ops, S_IWUSR|S_IRUGO}, | 1429 | [NFSD_MaxBlkSize] = {"max_block_size", &transaction_ops, S_IWUSR|S_IRUGO}, |
1430 | #ifdef CONFIG_SUNRPC_GSS | ||
1431 | [NFSD_SupportedEnctypes] = {"supported_krb5_enctypes", &supported_enctypes_ops, S_IRUGO}, | ||
1432 | #endif /* CONFIG_SUNRPC_GSS */ | ||
1400 | #ifdef CONFIG_NFSD_V4 | 1433 | #ifdef CONFIG_NFSD_V4 |
1401 | [NFSD_Leasetime] = {"nfsv4leasetime", &transaction_ops, S_IWUSR|S_IRUSR}, | 1434 | [NFSD_Leasetime] = {"nfsv4leasetime", &transaction_ops, S_IWUSR|S_IRUSR}, |
1402 | [NFSD_Gracetime] = {"nfsv4gracetime", &transaction_ops, S_IWUSR|S_IRUSR}, | 1435 | [NFSD_Gracetime] = {"nfsv4gracetime", &transaction_ops, S_IWUSR|S_IRUSR}, |
diff --git a/fs/nfsd/state.h b/fs/nfsd/state.h index 2d31224b07bf..6bd2f3c21f2b 100644 --- a/fs/nfsd/state.h +++ b/fs/nfsd/state.h | |||
@@ -367,16 +367,12 @@ struct nfs4_file { | |||
367 | struct list_head fi_delegations; | 367 | struct list_head fi_delegations; |
368 | /* One each for O_RDONLY, O_WRONLY, O_RDWR: */ | 368 | /* One each for O_RDONLY, O_WRONLY, O_RDWR: */ |
369 | struct file * fi_fds[3]; | 369 | struct file * fi_fds[3]; |
370 | /* One each for O_RDONLY, O_WRONLY: */ | ||
371 | atomic_t fi_access[2]; | ||
372 | /* | 370 | /* |
373 | * Each open stateid contributes 1 to either fi_readers or | 371 | * Each open or lock stateid contributes 1 to either |
374 | * fi_writers, or both, depending on the open mode. A | 372 | * fi_access[O_RDONLY], fi_access[O_WRONLY], or both, depending |
375 | * delegation also takes an fi_readers reference. Lock | 373 | * on open or lock mode: |
376 | * stateid's take none. | ||
377 | */ | 374 | */ |
378 | atomic_t fi_readers; | 375 | atomic_t fi_access[2]; |
379 | atomic_t fi_writers; | ||
380 | struct file *fi_deleg_file; | 376 | struct file *fi_deleg_file; |
381 | struct file_lock *fi_lease; | 377 | struct file_lock *fi_lease; |
382 | atomic_t fi_delegees; | 378 | atomic_t fi_delegees; |
diff --git a/fs/nfsd/vfs.c b/fs/nfsd/vfs.c index ff93025ae2f7..2e1cebde90df 100644 --- a/fs/nfsd/vfs.c +++ b/fs/nfsd/vfs.c | |||
@@ -1749,8 +1749,6 @@ nfsd_rename(struct svc_rqst *rqstp, struct svc_fh *ffhp, char *fname, int flen, | |||
1749 | if (host_err) | 1749 | if (host_err) |
1750 | goto out_drop_write; | 1750 | goto out_drop_write; |
1751 | } | 1751 | } |
1752 | if (host_err) | ||
1753 | goto out_drop_write; | ||
1754 | host_err = vfs_rename(fdir, odentry, tdir, ndentry); | 1752 | host_err = vfs_rename(fdir, odentry, tdir, ndentry); |
1755 | if (!host_err) { | 1753 | if (!host_err) { |
1756 | host_err = commit_metadata(tfhp); | 1754 | host_err = commit_metadata(tfhp); |
diff --git a/fs/nilfs2/alloc.h b/fs/nilfs2/alloc.h index 9af34a7e6e13..f5fde36b9e28 100644 --- a/fs/nilfs2/alloc.h +++ b/fs/nilfs2/alloc.h | |||
@@ -74,7 +74,7 @@ int nilfs_palloc_freev(struct inode *, __u64 *, size_t); | |||
74 | 74 | ||
75 | #define nilfs_set_bit_atomic ext2_set_bit_atomic | 75 | #define nilfs_set_bit_atomic ext2_set_bit_atomic |
76 | #define nilfs_clear_bit_atomic ext2_clear_bit_atomic | 76 | #define nilfs_clear_bit_atomic ext2_clear_bit_atomic |
77 | #define nilfs_find_next_zero_bit ext2_find_next_zero_bit | 77 | #define nilfs_find_next_zero_bit find_next_zero_bit_le |
78 | 78 | ||
79 | /* | 79 | /* |
80 | * persistent object allocator cache | 80 | * persistent object allocator cache |
diff --git a/fs/nilfs2/btnode.c b/fs/nilfs2/btnode.c index 85f7baa15f5d..609cd223eea8 100644 --- a/fs/nilfs2/btnode.c +++ b/fs/nilfs2/btnode.c | |||
@@ -34,15 +34,10 @@ | |||
34 | #include "page.h" | 34 | #include "page.h" |
35 | #include "btnode.h" | 35 | #include "btnode.h" |
36 | 36 | ||
37 | |||
38 | static const struct address_space_operations def_btnode_aops = { | ||
39 | .sync_page = block_sync_page, | ||
40 | }; | ||
41 | |||
42 | void nilfs_btnode_cache_init(struct address_space *btnc, | 37 | void nilfs_btnode_cache_init(struct address_space *btnc, |
43 | struct backing_dev_info *bdi) | 38 | struct backing_dev_info *bdi) |
44 | { | 39 | { |
45 | nilfs_mapping_init(btnc, bdi, &def_btnode_aops); | 40 | nilfs_mapping_init(btnc, bdi); |
46 | } | 41 | } |
47 | 42 | ||
48 | void nilfs_btnode_cache_clear(struct address_space *btnc) | 43 | void nilfs_btnode_cache_clear(struct address_space *btnc) |
diff --git a/fs/nilfs2/gcinode.c b/fs/nilfs2/gcinode.c index caf9a6a3fb54..1c2a3e23f8b2 100644 --- a/fs/nilfs2/gcinode.c +++ b/fs/nilfs2/gcinode.c | |||
@@ -49,7 +49,6 @@ | |||
49 | #include "ifile.h" | 49 | #include "ifile.h" |
50 | 50 | ||
51 | static const struct address_space_operations def_gcinode_aops = { | 51 | static const struct address_space_operations def_gcinode_aops = { |
52 | .sync_page = block_sync_page, | ||
53 | }; | 52 | }; |
54 | 53 | ||
55 | /* | 54 | /* |
diff --git a/fs/nilfs2/inode.c b/fs/nilfs2/inode.c index d5625be236a8..c0aa27490c02 100644 --- a/fs/nilfs2/inode.c +++ b/fs/nilfs2/inode.c | |||
@@ -280,7 +280,6 @@ nilfs_direct_IO(int rw, struct kiocb *iocb, const struct iovec *iov, | |||
280 | const struct address_space_operations nilfs_aops = { | 280 | const struct address_space_operations nilfs_aops = { |
281 | .writepage = nilfs_writepage, | 281 | .writepage = nilfs_writepage, |
282 | .readpage = nilfs_readpage, | 282 | .readpage = nilfs_readpage, |
283 | .sync_page = block_sync_page, | ||
284 | .writepages = nilfs_writepages, | 283 | .writepages = nilfs_writepages, |
285 | .set_page_dirty = nilfs_set_page_dirty, | 284 | .set_page_dirty = nilfs_set_page_dirty, |
286 | .readpages = nilfs_readpages, | 285 | .readpages = nilfs_readpages, |
diff --git a/fs/nilfs2/ioctl.c b/fs/nilfs2/ioctl.c index 95c04c2f2b3e..f2469ba6246b 100644 --- a/fs/nilfs2/ioctl.c +++ b/fs/nilfs2/ioctl.c | |||
@@ -113,7 +113,7 @@ static int nilfs_ioctl_setflags(struct inode *inode, struct file *filp, | |||
113 | unsigned int flags, oldflags; | 113 | unsigned int flags, oldflags; |
114 | int ret; | 114 | int ret; |
115 | 115 | ||
116 | if (!is_owner_or_cap(inode)) | 116 | if (!inode_owner_or_capable(inode)) |
117 | return -EACCES; | 117 | return -EACCES; |
118 | 118 | ||
119 | if (get_user(flags, (int __user *)argp)) | 119 | if (get_user(flags, (int __user *)argp)) |
diff --git a/fs/nilfs2/mdt.c b/fs/nilfs2/mdt.c index a0babd2bff6a..a649b05f7069 100644 --- a/fs/nilfs2/mdt.c +++ b/fs/nilfs2/mdt.c | |||
@@ -399,7 +399,6 @@ nilfs_mdt_write_page(struct page *page, struct writeback_control *wbc) | |||
399 | 399 | ||
400 | static const struct address_space_operations def_mdt_aops = { | 400 | static const struct address_space_operations def_mdt_aops = { |
401 | .writepage = nilfs_mdt_write_page, | 401 | .writepage = nilfs_mdt_write_page, |
402 | .sync_page = block_sync_page, | ||
403 | }; | 402 | }; |
404 | 403 | ||
405 | static const struct inode_operations def_mdt_iops; | 404 | static const struct inode_operations def_mdt_iops; |
@@ -438,10 +437,6 @@ void nilfs_mdt_set_entry_size(struct inode *inode, unsigned entry_size, | |||
438 | mi->mi_first_entry_offset = DIV_ROUND_UP(header_size, entry_size); | 437 | mi->mi_first_entry_offset = DIV_ROUND_UP(header_size, entry_size); |
439 | } | 438 | } |
440 | 439 | ||
441 | static const struct address_space_operations shadow_map_aops = { | ||
442 | .sync_page = block_sync_page, | ||
443 | }; | ||
444 | |||
445 | /** | 440 | /** |
446 | * nilfs_mdt_setup_shadow_map - setup shadow map and bind it to metadata file | 441 | * nilfs_mdt_setup_shadow_map - setup shadow map and bind it to metadata file |
447 | * @inode: inode of the metadata file | 442 | * @inode: inode of the metadata file |
@@ -455,9 +450,9 @@ int nilfs_mdt_setup_shadow_map(struct inode *inode, | |||
455 | 450 | ||
456 | INIT_LIST_HEAD(&shadow->frozen_buffers); | 451 | INIT_LIST_HEAD(&shadow->frozen_buffers); |
457 | address_space_init_once(&shadow->frozen_data); | 452 | address_space_init_once(&shadow->frozen_data); |
458 | nilfs_mapping_init(&shadow->frozen_data, bdi, &shadow_map_aops); | 453 | nilfs_mapping_init(&shadow->frozen_data, bdi); |
459 | address_space_init_once(&shadow->frozen_btnodes); | 454 | address_space_init_once(&shadow->frozen_btnodes); |
460 | nilfs_mapping_init(&shadow->frozen_btnodes, bdi, &shadow_map_aops); | 455 | nilfs_mapping_init(&shadow->frozen_btnodes, bdi); |
461 | mi->mi_shadow = shadow; | 456 | mi->mi_shadow = shadow; |
462 | return 0; | 457 | return 0; |
463 | } | 458 | } |
diff --git a/fs/nilfs2/page.c b/fs/nilfs2/page.c index a585b35fd6bc..4d2a1ee0eb47 100644 --- a/fs/nilfs2/page.c +++ b/fs/nilfs2/page.c | |||
@@ -493,15 +493,14 @@ unsigned nilfs_page_count_clean_buffers(struct page *page, | |||
493 | } | 493 | } |
494 | 494 | ||
495 | void nilfs_mapping_init(struct address_space *mapping, | 495 | void nilfs_mapping_init(struct address_space *mapping, |
496 | struct backing_dev_info *bdi, | 496 | struct backing_dev_info *bdi) |
497 | const struct address_space_operations *aops) | ||
498 | { | 497 | { |
499 | mapping->host = NULL; | 498 | mapping->host = NULL; |
500 | mapping->flags = 0; | 499 | mapping->flags = 0; |
501 | mapping_set_gfp_mask(mapping, GFP_NOFS); | 500 | mapping_set_gfp_mask(mapping, GFP_NOFS); |
502 | mapping->assoc_mapping = NULL; | 501 | mapping->assoc_mapping = NULL; |
503 | mapping->backing_dev_info = bdi; | 502 | mapping->backing_dev_info = bdi; |
504 | mapping->a_ops = aops; | 503 | mapping->a_ops = NULL; |
505 | } | 504 | } |
506 | 505 | ||
507 | /* | 506 | /* |
diff --git a/fs/nilfs2/page.h b/fs/nilfs2/page.h index 2a00953ebd5f..f06b79ad7493 100644 --- a/fs/nilfs2/page.h +++ b/fs/nilfs2/page.h | |||
@@ -62,8 +62,7 @@ int nilfs_copy_dirty_pages(struct address_space *, struct address_space *); | |||
62 | void nilfs_copy_back_pages(struct address_space *, struct address_space *); | 62 | void nilfs_copy_back_pages(struct address_space *, struct address_space *); |
63 | void nilfs_clear_dirty_pages(struct address_space *); | 63 | void nilfs_clear_dirty_pages(struct address_space *); |
64 | void nilfs_mapping_init(struct address_space *mapping, | 64 | void nilfs_mapping_init(struct address_space *mapping, |
65 | struct backing_dev_info *bdi, | 65 | struct backing_dev_info *bdi); |
66 | const struct address_space_operations *aops); | ||
67 | unsigned nilfs_page_count_clean_buffers(struct page *, unsigned, unsigned); | 66 | unsigned nilfs_page_count_clean_buffers(struct page *, unsigned, unsigned); |
68 | unsigned long nilfs_find_uncommitted_extent(struct inode *inode, | 67 | unsigned long nilfs_find_uncommitted_extent(struct inode *inode, |
69 | sector_t start_blk, | 68 | sector_t start_blk, |
diff --git a/fs/nilfs2/segbuf.c b/fs/nilfs2/segbuf.c index 0f83e93935b2..2853ff20f85a 100644 --- a/fs/nilfs2/segbuf.c +++ b/fs/nilfs2/segbuf.c | |||
@@ -509,7 +509,7 @@ static int nilfs_segbuf_write(struct nilfs_segment_buffer *segbuf, | |||
509 | * Last BIO is always sent through the following | 509 | * Last BIO is always sent through the following |
510 | * submission. | 510 | * submission. |
511 | */ | 511 | */ |
512 | rw |= REQ_SYNC | REQ_UNPLUG; | 512 | rw |= REQ_SYNC; |
513 | res = nilfs_segbuf_submit_bio(segbuf, &wi, rw); | 513 | res = nilfs_segbuf_submit_bio(segbuf, &wi, rw); |
514 | } | 514 | } |
515 | 515 | ||
diff --git a/fs/notify/inode_mark.c b/fs/notify/inode_mark.c index 4c29fcf557d1..07ea8d3e6ea2 100644 --- a/fs/notify/inode_mark.c +++ b/fs/notify/inode_mark.c | |||
@@ -22,13 +22,14 @@ | |||
22 | #include <linux/module.h> | 22 | #include <linux/module.h> |
23 | #include <linux/mutex.h> | 23 | #include <linux/mutex.h> |
24 | #include <linux/spinlock.h> | 24 | #include <linux/spinlock.h> |
25 | #include <linux/writeback.h> /* for inode_lock */ | ||
26 | 25 | ||
27 | #include <asm/atomic.h> | 26 | #include <asm/atomic.h> |
28 | 27 | ||
29 | #include <linux/fsnotify_backend.h> | 28 | #include <linux/fsnotify_backend.h> |
30 | #include "fsnotify.h" | 29 | #include "fsnotify.h" |
31 | 30 | ||
31 | #include "../internal.h" | ||
32 | |||
32 | /* | 33 | /* |
33 | * Recalculate the mask of events relevant to a given inode locked. | 34 | * Recalculate the mask of events relevant to a given inode locked. |
34 | */ | 35 | */ |
@@ -237,15 +238,14 @@ out: | |||
237 | * fsnotify_unmount_inodes - an sb is unmounting. handle any watched inodes. | 238 | * fsnotify_unmount_inodes - an sb is unmounting. handle any watched inodes. |
238 | * @list: list of inodes being unmounted (sb->s_inodes) | 239 | * @list: list of inodes being unmounted (sb->s_inodes) |
239 | * | 240 | * |
240 | * Called with inode_lock held, protecting the unmounting super block's list | 241 | * Called during unmount with no locks held, so needs to be safe against |
241 | * of inodes, and with iprune_mutex held, keeping shrink_icache_memory() at bay. | 242 | * concurrent modifiers. We temporarily drop inode_sb_list_lock and CAN block. |
242 | * We temporarily drop inode_lock, however, and CAN block. | ||
243 | */ | 243 | */ |
244 | void fsnotify_unmount_inodes(struct list_head *list) | 244 | void fsnotify_unmount_inodes(struct list_head *list) |
245 | { | 245 | { |
246 | struct inode *inode, *next_i, *need_iput = NULL; | 246 | struct inode *inode, *next_i, *need_iput = NULL; |
247 | 247 | ||
248 | spin_lock(&inode_lock); | 248 | spin_lock(&inode_sb_list_lock); |
249 | list_for_each_entry_safe(inode, next_i, list, i_sb_list) { | 249 | list_for_each_entry_safe(inode, next_i, list, i_sb_list) { |
250 | struct inode *need_iput_tmp; | 250 | struct inode *need_iput_tmp; |
251 | 251 | ||
@@ -254,8 +254,11 @@ void fsnotify_unmount_inodes(struct list_head *list) | |||
254 | * I_WILL_FREE, or I_NEW which is fine because by that point | 254 | * I_WILL_FREE, or I_NEW which is fine because by that point |
255 | * the inode cannot have any associated watches. | 255 | * the inode cannot have any associated watches. |
256 | */ | 256 | */ |
257 | if (inode->i_state & (I_FREEING|I_WILL_FREE|I_NEW)) | 257 | spin_lock(&inode->i_lock); |
258 | if (inode->i_state & (I_FREEING|I_WILL_FREE|I_NEW)) { | ||
259 | spin_unlock(&inode->i_lock); | ||
258 | continue; | 260 | continue; |
261 | } | ||
259 | 262 | ||
260 | /* | 263 | /* |
261 | * If i_count is zero, the inode cannot have any watches and | 264 | * If i_count is zero, the inode cannot have any watches and |
@@ -263,8 +266,10 @@ void fsnotify_unmount_inodes(struct list_head *list) | |||
263 | * evict all inodes with zero i_count from icache which is | 266 | * evict all inodes with zero i_count from icache which is |
264 | * unnecessarily violent and may in fact be illegal to do. | 267 | * unnecessarily violent and may in fact be illegal to do. |
265 | */ | 268 | */ |
266 | if (!atomic_read(&inode->i_count)) | 269 | if (!atomic_read(&inode->i_count)) { |
270 | spin_unlock(&inode->i_lock); | ||
267 | continue; | 271 | continue; |
272 | } | ||
268 | 273 | ||
269 | need_iput_tmp = need_iput; | 274 | need_iput_tmp = need_iput; |
270 | need_iput = NULL; | 275 | need_iput = NULL; |
@@ -274,22 +279,25 @@ void fsnotify_unmount_inodes(struct list_head *list) | |||
274 | __iget(inode); | 279 | __iget(inode); |
275 | else | 280 | else |
276 | need_iput_tmp = NULL; | 281 | need_iput_tmp = NULL; |
282 | spin_unlock(&inode->i_lock); | ||
277 | 283 | ||
278 | /* In case the dropping of a reference would nuke next_i. */ | 284 | /* In case the dropping of a reference would nuke next_i. */ |
279 | if ((&next_i->i_sb_list != list) && | 285 | if ((&next_i->i_sb_list != list) && |
280 | atomic_read(&next_i->i_count) && | 286 | atomic_read(&next_i->i_count)) { |
281 | !(next_i->i_state & (I_FREEING | I_WILL_FREE))) { | 287 | spin_lock(&next_i->i_lock); |
282 | __iget(next_i); | 288 | if (!(next_i->i_state & (I_FREEING | I_WILL_FREE))) { |
283 | need_iput = next_i; | 289 | __iget(next_i); |
290 | need_iput = next_i; | ||
291 | } | ||
292 | spin_unlock(&next_i->i_lock); | ||
284 | } | 293 | } |
285 | 294 | ||
286 | /* | 295 | /* |
287 | * We can safely drop inode_lock here because we hold | 296 | * We can safely drop inode_sb_list_lock here because we hold |
288 | * references on both inode and next_i. Also no new inodes | 297 | * references on both inode and next_i. Also no new inodes |
289 | * will be added since the umount has begun. Finally, | 298 | * will be added since the umount has begun. |
290 | * iprune_mutex keeps shrink_icache_memory() away. | ||
291 | */ | 299 | */ |
292 | spin_unlock(&inode_lock); | 300 | spin_unlock(&inode_sb_list_lock); |
293 | 301 | ||
294 | if (need_iput_tmp) | 302 | if (need_iput_tmp) |
295 | iput(need_iput_tmp); | 303 | iput(need_iput_tmp); |
@@ -301,7 +309,7 @@ void fsnotify_unmount_inodes(struct list_head *list) | |||
301 | 309 | ||
302 | iput(inode); | 310 | iput(inode); |
303 | 311 | ||
304 | spin_lock(&inode_lock); | 312 | spin_lock(&inode_sb_list_lock); |
305 | } | 313 | } |
306 | spin_unlock(&inode_lock); | 314 | spin_unlock(&inode_sb_list_lock); |
307 | } | 315 | } |
diff --git a/fs/notify/mark.c b/fs/notify/mark.c index 325185e514bb..50c00856f730 100644 --- a/fs/notify/mark.c +++ b/fs/notify/mark.c | |||
@@ -91,7 +91,6 @@ | |||
91 | #include <linux/slab.h> | 91 | #include <linux/slab.h> |
92 | #include <linux/spinlock.h> | 92 | #include <linux/spinlock.h> |
93 | #include <linux/srcu.h> | 93 | #include <linux/srcu.h> |
94 | #include <linux/writeback.h> /* for inode_lock */ | ||
95 | 94 | ||
96 | #include <asm/atomic.h> | 95 | #include <asm/atomic.h> |
97 | 96 | ||
diff --git a/fs/notify/vfsmount_mark.c b/fs/notify/vfsmount_mark.c index 85eebff6d0d7..e86577d6c5c3 100644 --- a/fs/notify/vfsmount_mark.c +++ b/fs/notify/vfsmount_mark.c | |||
@@ -23,7 +23,6 @@ | |||
23 | #include <linux/mount.h> | 23 | #include <linux/mount.h> |
24 | #include <linux/mutex.h> | 24 | #include <linux/mutex.h> |
25 | #include <linux/spinlock.h> | 25 | #include <linux/spinlock.h> |
26 | #include <linux/writeback.h> /* for inode_lock */ | ||
27 | 26 | ||
28 | #include <asm/atomic.h> | 27 | #include <asm/atomic.h> |
29 | 28 | ||
diff --git a/fs/ntfs/aops.c b/fs/ntfs/aops.c index c3c2c7ac9020..0b1e885b8cf8 100644 --- a/fs/ntfs/aops.c +++ b/fs/ntfs/aops.c | |||
@@ -1543,8 +1543,6 @@ err_out: | |||
1543 | */ | 1543 | */ |
1544 | const struct address_space_operations ntfs_aops = { | 1544 | const struct address_space_operations ntfs_aops = { |
1545 | .readpage = ntfs_readpage, /* Fill page with data. */ | 1545 | .readpage = ntfs_readpage, /* Fill page with data. */ |
1546 | .sync_page = block_sync_page, /* Currently, just unplugs the | ||
1547 | disk request queue. */ | ||
1548 | #ifdef NTFS_RW | 1546 | #ifdef NTFS_RW |
1549 | .writepage = ntfs_writepage, /* Write dirty page to disk. */ | 1547 | .writepage = ntfs_writepage, /* Write dirty page to disk. */ |
1550 | #endif /* NTFS_RW */ | 1548 | #endif /* NTFS_RW */ |
@@ -1560,8 +1558,6 @@ const struct address_space_operations ntfs_aops = { | |||
1560 | */ | 1558 | */ |
1561 | const struct address_space_operations ntfs_mst_aops = { | 1559 | const struct address_space_operations ntfs_mst_aops = { |
1562 | .readpage = ntfs_readpage, /* Fill page with data. */ | 1560 | .readpage = ntfs_readpage, /* Fill page with data. */ |
1563 | .sync_page = block_sync_page, /* Currently, just unplugs the | ||
1564 | disk request queue. */ | ||
1565 | #ifdef NTFS_RW | 1561 | #ifdef NTFS_RW |
1566 | .writepage = ntfs_writepage, /* Write dirty page to disk. */ | 1562 | .writepage = ntfs_writepage, /* Write dirty page to disk. */ |
1567 | .set_page_dirty = __set_page_dirty_nobuffers, /* Set the page dirty | 1563 | .set_page_dirty = __set_page_dirty_nobuffers, /* Set the page dirty |
diff --git a/fs/ntfs/compress.c b/fs/ntfs/compress.c index 6551c7cbad92..ef9ed854255c 100644 --- a/fs/ntfs/compress.c +++ b/fs/ntfs/compress.c | |||
@@ -698,8 +698,7 @@ lock_retry_remap: | |||
698 | "uptodate! Unplugging the disk queue " | 698 | "uptodate! Unplugging the disk queue " |
699 | "and rescheduling."); | 699 | "and rescheduling."); |
700 | get_bh(tbh); | 700 | get_bh(tbh); |
701 | blk_run_address_space(mapping); | 701 | io_schedule(); |
702 | schedule(); | ||
703 | put_bh(tbh); | 702 | put_bh(tbh); |
704 | if (unlikely(!buffer_uptodate(tbh))) | 703 | if (unlikely(!buffer_uptodate(tbh))) |
705 | goto read_err; | 704 | goto read_err; |
diff --git a/fs/ntfs/inode.c b/fs/ntfs/inode.c index a627ed82c0a3..0b56c6b7ec01 100644 --- a/fs/ntfs/inode.c +++ b/fs/ntfs/inode.c | |||
@@ -54,7 +54,7 @@ | |||
54 | * | 54 | * |
55 | * Return 1 if the attributes match and 0 if not. | 55 | * Return 1 if the attributes match and 0 if not. |
56 | * | 56 | * |
57 | * NOTE: This function runs with the inode_lock spin lock held so it is not | 57 | * NOTE: This function runs with the inode->i_lock spin lock held so it is not |
58 | * allowed to sleep. | 58 | * allowed to sleep. |
59 | */ | 59 | */ |
60 | int ntfs_test_inode(struct inode *vi, ntfs_attr *na) | 60 | int ntfs_test_inode(struct inode *vi, ntfs_attr *na) |
@@ -98,7 +98,7 @@ int ntfs_test_inode(struct inode *vi, ntfs_attr *na) | |||
98 | * | 98 | * |
99 | * Return 0 on success and -errno on error. | 99 | * Return 0 on success and -errno on error. |
100 | * | 100 | * |
101 | * NOTE: This function runs with the inode_lock spin lock held so it is not | 101 | * NOTE: This function runs with the inode->i_lock spin lock held so it is not |
102 | * allowed to sleep. (Hence the GFP_ATOMIC allocation.) | 102 | * allowed to sleep. (Hence the GFP_ATOMIC allocation.) |
103 | */ | 103 | */ |
104 | static int ntfs_init_locked_inode(struct inode *vi, ntfs_attr *na) | 104 | static int ntfs_init_locked_inode(struct inode *vi, ntfs_attr *na) |
diff --git a/fs/ocfs2/acl.c b/fs/ocfs2/acl.c index 704f6b1742f3..90f2729b7a5b 100644 --- a/fs/ocfs2/acl.c +++ b/fs/ocfs2/acl.c | |||
@@ -497,7 +497,7 @@ static int ocfs2_xattr_set_acl(struct dentry *dentry, const char *name, | |||
497 | if (!(osb->s_mount_opt & OCFS2_MOUNT_POSIX_ACL)) | 497 | if (!(osb->s_mount_opt & OCFS2_MOUNT_POSIX_ACL)) |
498 | return -EOPNOTSUPP; | 498 | return -EOPNOTSUPP; |
499 | 499 | ||
500 | if (!is_owner_or_cap(inode)) | 500 | if (!inode_owner_or_capable(inode)) |
501 | return -EPERM; | 501 | return -EPERM; |
502 | 502 | ||
503 | if (value) { | 503 | if (value) { |
diff --git a/fs/ocfs2/aops.c b/fs/ocfs2/aops.c index 1fbb0e20131b..daea0359e974 100644 --- a/fs/ocfs2/aops.c +++ b/fs/ocfs2/aops.c | |||
@@ -2043,7 +2043,6 @@ const struct address_space_operations ocfs2_aops = { | |||
2043 | .write_begin = ocfs2_write_begin, | 2043 | .write_begin = ocfs2_write_begin, |
2044 | .write_end = ocfs2_write_end, | 2044 | .write_end = ocfs2_write_end, |
2045 | .bmap = ocfs2_bmap, | 2045 | .bmap = ocfs2_bmap, |
2046 | .sync_page = block_sync_page, | ||
2047 | .direct_IO = ocfs2_direct_IO, | 2046 | .direct_IO = ocfs2_direct_IO, |
2048 | .invalidatepage = ocfs2_invalidatepage, | 2047 | .invalidatepage = ocfs2_invalidatepage, |
2049 | .releasepage = ocfs2_releasepage, | 2048 | .releasepage = ocfs2_releasepage, |
diff --git a/fs/ocfs2/cluster/heartbeat.c b/fs/ocfs2/cluster/heartbeat.c index b108e863d8f6..1adab287bd24 100644 --- a/fs/ocfs2/cluster/heartbeat.c +++ b/fs/ocfs2/cluster/heartbeat.c | |||
@@ -367,11 +367,7 @@ static inline void o2hb_bio_wait_dec(struct o2hb_bio_wait_ctxt *wc, | |||
367 | static void o2hb_wait_on_io(struct o2hb_region *reg, | 367 | static void o2hb_wait_on_io(struct o2hb_region *reg, |
368 | struct o2hb_bio_wait_ctxt *wc) | 368 | struct o2hb_bio_wait_ctxt *wc) |
369 | { | 369 | { |
370 | struct address_space *mapping = reg->hr_bdev->bd_inode->i_mapping; | ||
371 | |||
372 | blk_run_address_space(mapping); | ||
373 | o2hb_bio_wait_dec(wc, 1); | 370 | o2hb_bio_wait_dec(wc, 1); |
374 | |||
375 | wait_for_completion(&wc->wc_io_complete); | 371 | wait_for_completion(&wc->wc_io_complete); |
376 | } | 372 | } |
377 | 373 | ||
diff --git a/fs/ocfs2/ioctl.c b/fs/ocfs2/ioctl.c index 7a4868196152..09de77ce002a 100644 --- a/fs/ocfs2/ioctl.c +++ b/fs/ocfs2/ioctl.c | |||
@@ -82,7 +82,7 @@ static int ocfs2_set_inode_attr(struct inode *inode, unsigned flags, | |||
82 | } | 82 | } |
83 | 83 | ||
84 | status = -EACCES; | 84 | status = -EACCES; |
85 | if (!is_owner_or_cap(inode)) | 85 | if (!inode_owner_or_capable(inode)) |
86 | goto bail_unlock; | 86 | goto bail_unlock; |
87 | 87 | ||
88 | if (!S_ISDIR(inode->i_mode)) | 88 | if (!S_ISDIR(inode->i_mode)) |
diff --git a/fs/ocfs2/ocfs2.h b/fs/ocfs2/ocfs2.h index 51cd6898e7f1..1a97ba1ec3fc 100644 --- a/fs/ocfs2/ocfs2.h +++ b/fs/ocfs2/ocfs2.h | |||
@@ -831,18 +831,18 @@ static inline unsigned int ocfs2_clusters_to_megabytes(struct super_block *sb, | |||
831 | 831 | ||
832 | static inline void _ocfs2_set_bit(unsigned int bit, unsigned long *bitmap) | 832 | static inline void _ocfs2_set_bit(unsigned int bit, unsigned long *bitmap) |
833 | { | 833 | { |
834 | ext2_set_bit(bit, bitmap); | 834 | __test_and_set_bit_le(bit, bitmap); |
835 | } | 835 | } |
836 | #define ocfs2_set_bit(bit, addr) _ocfs2_set_bit((bit), (unsigned long *)(addr)) | 836 | #define ocfs2_set_bit(bit, addr) _ocfs2_set_bit((bit), (unsigned long *)(addr)) |
837 | 837 | ||
838 | static inline void _ocfs2_clear_bit(unsigned int bit, unsigned long *bitmap) | 838 | static inline void _ocfs2_clear_bit(unsigned int bit, unsigned long *bitmap) |
839 | { | 839 | { |
840 | ext2_clear_bit(bit, bitmap); | 840 | __test_and_clear_bit_le(bit, bitmap); |
841 | } | 841 | } |
842 | #define ocfs2_clear_bit(bit, addr) _ocfs2_clear_bit((bit), (unsigned long *)(addr)) | 842 | #define ocfs2_clear_bit(bit, addr) _ocfs2_clear_bit((bit), (unsigned long *)(addr)) |
843 | 843 | ||
844 | #define ocfs2_test_bit ext2_test_bit | 844 | #define ocfs2_test_bit test_bit_le |
845 | #define ocfs2_find_next_zero_bit ext2_find_next_zero_bit | 845 | #define ocfs2_find_next_zero_bit find_next_zero_bit_le |
846 | #define ocfs2_find_next_bit ext2_find_next_bit | 846 | #define ocfs2_find_next_bit find_next_bit_le |
847 | #endif /* OCFS2_H */ | 847 | #endif /* OCFS2_H */ |
848 | 848 | ||
diff --git a/fs/omfs/file.c b/fs/omfs/file.c index 8a6d34fa668a..d738a7e493dd 100644 --- a/fs/omfs/file.c +++ b/fs/omfs/file.c | |||
@@ -372,7 +372,6 @@ const struct address_space_operations omfs_aops = { | |||
372 | .readpages = omfs_readpages, | 372 | .readpages = omfs_readpages, |
373 | .writepage = omfs_writepage, | 373 | .writepage = omfs_writepage, |
374 | .writepages = omfs_writepages, | 374 | .writepages = omfs_writepages, |
375 | .sync_page = block_sync_page, | ||
376 | .write_begin = omfs_write_begin, | 375 | .write_begin = omfs_write_begin, |
377 | .write_end = generic_write_end, | 376 | .write_end = generic_write_end, |
378 | .bmap = omfs_bmap, | 377 | .bmap = omfs_bmap, |
diff --git a/fs/partitions/check.c b/fs/partitions/check.c index 9c21119512b9..ac546975031f 100644 --- a/fs/partitions/check.c +++ b/fs/partitions/check.c | |||
@@ -290,7 +290,8 @@ ssize_t part_inflight_show(struct device *dev, | |||
290 | { | 290 | { |
291 | struct hd_struct *p = dev_to_part(dev); | 291 | struct hd_struct *p = dev_to_part(dev); |
292 | 292 | ||
293 | return sprintf(buf, "%8u %8u\n", p->in_flight[0], p->in_flight[1]); | 293 | return sprintf(buf, "%8u %8u\n", atomic_read(&p->in_flight[0]), |
294 | atomic_read(&p->in_flight[1])); | ||
294 | } | 295 | } |
295 | 296 | ||
296 | #ifdef CONFIG_FAIL_MAKE_REQUEST | 297 | #ifdef CONFIG_FAIL_MAKE_REQUEST |
diff --git a/fs/proc/array.c b/fs/proc/array.c index 7c99c1cf7e5c..5e4f776b0917 100644 --- a/fs/proc/array.c +++ b/fs/proc/array.c | |||
@@ -489,8 +489,8 @@ static int do_task_stat(struct seq_file *m, struct pid_namespace *ns, | |||
489 | vsize, | 489 | vsize, |
490 | mm ? get_mm_rss(mm) : 0, | 490 | mm ? get_mm_rss(mm) : 0, |
491 | rsslim, | 491 | rsslim, |
492 | mm ? mm->start_code : 0, | 492 | mm ? (permitted ? mm->start_code : 1) : 0, |
493 | mm ? mm->end_code : 0, | 493 | mm ? (permitted ? mm->end_code : 1) : 0, |
494 | (permitted && mm) ? mm->start_stack : 0, | 494 | (permitted && mm) ? mm->start_stack : 0, |
495 | esp, | 495 | esp, |
496 | eip, | 496 | eip, |
diff --git a/fs/proc/base.c b/fs/proc/base.c index d49c4b5d2c3e..5a670c11aeac 100644 --- a/fs/proc/base.c +++ b/fs/proc/base.c | |||
@@ -191,17 +191,20 @@ static int proc_root_link(struct inode *inode, struct path *path) | |||
191 | return result; | 191 | return result; |
192 | } | 192 | } |
193 | 193 | ||
194 | /* | 194 | static struct mm_struct *__check_mem_permission(struct task_struct *task) |
195 | * Return zero if current may access user memory in @task, -error if not. | ||
196 | */ | ||
197 | static int check_mem_permission(struct task_struct *task) | ||
198 | { | 195 | { |
196 | struct mm_struct *mm; | ||
197 | |||
198 | mm = get_task_mm(task); | ||
199 | if (!mm) | ||
200 | return ERR_PTR(-EINVAL); | ||
201 | |||
199 | /* | 202 | /* |
200 | * A task can always look at itself, in case it chooses | 203 | * A task can always look at itself, in case it chooses |
201 | * to use system calls instead of load instructions. | 204 | * to use system calls instead of load instructions. |
202 | */ | 205 | */ |
203 | if (task == current) | 206 | if (task == current) |
204 | return 0; | 207 | return mm; |
205 | 208 | ||
206 | /* | 209 | /* |
207 | * If current is actively ptrace'ing, and would also be | 210 | * If current is actively ptrace'ing, and would also be |
@@ -213,27 +216,53 @@ static int check_mem_permission(struct task_struct *task) | |||
213 | match = (tracehook_tracer_task(task) == current); | 216 | match = (tracehook_tracer_task(task) == current); |
214 | rcu_read_unlock(); | 217 | rcu_read_unlock(); |
215 | if (match && ptrace_may_access(task, PTRACE_MODE_ATTACH)) | 218 | if (match && ptrace_may_access(task, PTRACE_MODE_ATTACH)) |
216 | return 0; | 219 | return mm; |
217 | } | 220 | } |
218 | 221 | ||
219 | /* | 222 | /* |
220 | * Noone else is allowed. | 223 | * Noone else is allowed. |
221 | */ | 224 | */ |
222 | return -EPERM; | 225 | mmput(mm); |
226 | return ERR_PTR(-EPERM); | ||
227 | } | ||
228 | |||
229 | /* | ||
230 | * If current may access user memory in @task return a reference to the | ||
231 | * corresponding mm, otherwise ERR_PTR. | ||
232 | */ | ||
233 | static struct mm_struct *check_mem_permission(struct task_struct *task) | ||
234 | { | ||
235 | struct mm_struct *mm; | ||
236 | int err; | ||
237 | |||
238 | /* | ||
239 | * Avoid racing if task exec's as we might get a new mm but validate | ||
240 | * against old credentials. | ||
241 | */ | ||
242 | err = mutex_lock_killable(&task->signal->cred_guard_mutex); | ||
243 | if (err) | ||
244 | return ERR_PTR(err); | ||
245 | |||
246 | mm = __check_mem_permission(task); | ||
247 | mutex_unlock(&task->signal->cred_guard_mutex); | ||
248 | |||
249 | return mm; | ||
223 | } | 250 | } |
224 | 251 | ||
225 | struct mm_struct *mm_for_maps(struct task_struct *task) | 252 | struct mm_struct *mm_for_maps(struct task_struct *task) |
226 | { | 253 | { |
227 | struct mm_struct *mm; | 254 | struct mm_struct *mm; |
255 | int err; | ||
228 | 256 | ||
229 | if (mutex_lock_killable(&task->signal->cred_guard_mutex)) | 257 | err = mutex_lock_killable(&task->signal->cred_guard_mutex); |
230 | return NULL; | 258 | if (err) |
259 | return ERR_PTR(err); | ||
231 | 260 | ||
232 | mm = get_task_mm(task); | 261 | mm = get_task_mm(task); |
233 | if (mm && mm != current->mm && | 262 | if (mm && mm != current->mm && |
234 | !ptrace_may_access(task, PTRACE_MODE_READ)) { | 263 | !ptrace_may_access(task, PTRACE_MODE_READ)) { |
235 | mmput(mm); | 264 | mmput(mm); |
236 | mm = NULL; | 265 | mm = ERR_PTR(-EACCES); |
237 | } | 266 | } |
238 | mutex_unlock(&task->signal->cred_guard_mutex); | 267 | mutex_unlock(&task->signal->cred_guard_mutex); |
239 | 268 | ||
@@ -279,9 +308,9 @@ out: | |||
279 | 308 | ||
280 | static int proc_pid_auxv(struct task_struct *task, char *buffer) | 309 | static int proc_pid_auxv(struct task_struct *task, char *buffer) |
281 | { | 310 | { |
282 | int res = 0; | 311 | struct mm_struct *mm = mm_for_maps(task); |
283 | struct mm_struct *mm = get_task_mm(task); | 312 | int res = PTR_ERR(mm); |
284 | if (mm) { | 313 | if (mm && !IS_ERR(mm)) { |
285 | unsigned int nwords = 0; | 314 | unsigned int nwords = 0; |
286 | do { | 315 | do { |
287 | nwords += 2; | 316 | nwords += 2; |
@@ -318,6 +347,23 @@ static int proc_pid_wchan(struct task_struct *task, char *buffer) | |||
318 | } | 347 | } |
319 | #endif /* CONFIG_KALLSYMS */ | 348 | #endif /* CONFIG_KALLSYMS */ |
320 | 349 | ||
350 | static int lock_trace(struct task_struct *task) | ||
351 | { | ||
352 | int err = mutex_lock_killable(&task->signal->cred_guard_mutex); | ||
353 | if (err) | ||
354 | return err; | ||
355 | if (!ptrace_may_access(task, PTRACE_MODE_ATTACH)) { | ||
356 | mutex_unlock(&task->signal->cred_guard_mutex); | ||
357 | return -EPERM; | ||
358 | } | ||
359 | return 0; | ||
360 | } | ||
361 | |||
362 | static void unlock_trace(struct task_struct *task) | ||
363 | { | ||
364 | mutex_unlock(&task->signal->cred_guard_mutex); | ||
365 | } | ||
366 | |||
321 | #ifdef CONFIG_STACKTRACE | 367 | #ifdef CONFIG_STACKTRACE |
322 | 368 | ||
323 | #define MAX_STACK_TRACE_DEPTH 64 | 369 | #define MAX_STACK_TRACE_DEPTH 64 |
@@ -327,6 +373,7 @@ static int proc_pid_stack(struct seq_file *m, struct pid_namespace *ns, | |||
327 | { | 373 | { |
328 | struct stack_trace trace; | 374 | struct stack_trace trace; |
329 | unsigned long *entries; | 375 | unsigned long *entries; |
376 | int err; | ||
330 | int i; | 377 | int i; |
331 | 378 | ||
332 | entries = kmalloc(MAX_STACK_TRACE_DEPTH * sizeof(*entries), GFP_KERNEL); | 379 | entries = kmalloc(MAX_STACK_TRACE_DEPTH * sizeof(*entries), GFP_KERNEL); |
@@ -337,15 +384,20 @@ static int proc_pid_stack(struct seq_file *m, struct pid_namespace *ns, | |||
337 | trace.max_entries = MAX_STACK_TRACE_DEPTH; | 384 | trace.max_entries = MAX_STACK_TRACE_DEPTH; |
338 | trace.entries = entries; | 385 | trace.entries = entries; |
339 | trace.skip = 0; | 386 | trace.skip = 0; |
340 | save_stack_trace_tsk(task, &trace); | ||
341 | 387 | ||
342 | for (i = 0; i < trace.nr_entries; i++) { | 388 | err = lock_trace(task); |
343 | seq_printf(m, "[<%p>] %pS\n", | 389 | if (!err) { |
344 | (void *)entries[i], (void *)entries[i]); | 390 | save_stack_trace_tsk(task, &trace); |
391 | |||
392 | for (i = 0; i < trace.nr_entries; i++) { | ||
393 | seq_printf(m, "[<%pK>] %pS\n", | ||
394 | (void *)entries[i], (void *)entries[i]); | ||
395 | } | ||
396 | unlock_trace(task); | ||
345 | } | 397 | } |
346 | kfree(entries); | 398 | kfree(entries); |
347 | 399 | ||
348 | return 0; | 400 | return err; |
349 | } | 401 | } |
350 | #endif | 402 | #endif |
351 | 403 | ||
@@ -508,18 +560,22 @@ static int proc_pid_syscall(struct task_struct *task, char *buffer) | |||
508 | { | 560 | { |
509 | long nr; | 561 | long nr; |
510 | unsigned long args[6], sp, pc; | 562 | unsigned long args[6], sp, pc; |
563 | int res = lock_trace(task); | ||
564 | if (res) | ||
565 | return res; | ||
511 | 566 | ||
512 | if (task_current_syscall(task, &nr, args, 6, &sp, &pc)) | 567 | if (task_current_syscall(task, &nr, args, 6, &sp, &pc)) |
513 | return sprintf(buffer, "running\n"); | 568 | res = sprintf(buffer, "running\n"); |
514 | 569 | else if (nr < 0) | |
515 | if (nr < 0) | 570 | res = sprintf(buffer, "%ld 0x%lx 0x%lx\n", nr, sp, pc); |
516 | return sprintf(buffer, "%ld 0x%lx 0x%lx\n", nr, sp, pc); | 571 | else |
517 | 572 | res = sprintf(buffer, | |
518 | return sprintf(buffer, | ||
519 | "%ld 0x%lx 0x%lx 0x%lx 0x%lx 0x%lx 0x%lx 0x%lx 0x%lx\n", | 573 | "%ld 0x%lx 0x%lx 0x%lx 0x%lx 0x%lx 0x%lx 0x%lx 0x%lx\n", |
520 | nr, | 574 | nr, |
521 | args[0], args[1], args[2], args[3], args[4], args[5], | 575 | args[0], args[1], args[2], args[3], args[4], args[5], |
522 | sp, pc); | 576 | sp, pc); |
577 | unlock_trace(task); | ||
578 | return res; | ||
523 | } | 579 | } |
524 | #endif /* CONFIG_HAVE_ARCH_TRACEHOOK */ | 580 | #endif /* CONFIG_HAVE_ARCH_TRACEHOOK */ |
525 | 581 | ||
@@ -775,18 +831,14 @@ static ssize_t mem_read(struct file * file, char __user * buf, | |||
775 | if (!task) | 831 | if (!task) |
776 | goto out_no_task; | 832 | goto out_no_task; |
777 | 833 | ||
778 | if (check_mem_permission(task)) | ||
779 | goto out; | ||
780 | |||
781 | ret = -ENOMEM; | 834 | ret = -ENOMEM; |
782 | page = (char *)__get_free_page(GFP_TEMPORARY); | 835 | page = (char *)__get_free_page(GFP_TEMPORARY); |
783 | if (!page) | 836 | if (!page) |
784 | goto out; | 837 | goto out; |
785 | 838 | ||
786 | ret = 0; | 839 | mm = check_mem_permission(task); |
787 | 840 | ret = PTR_ERR(mm); | |
788 | mm = get_task_mm(task); | 841 | if (IS_ERR(mm)) |
789 | if (!mm) | ||
790 | goto out_free; | 842 | goto out_free; |
791 | 843 | ||
792 | ret = -EIO; | 844 | ret = -EIO; |
@@ -800,8 +852,8 @@ static ssize_t mem_read(struct file * file, char __user * buf, | |||
800 | int this_len, retval; | 852 | int this_len, retval; |
801 | 853 | ||
802 | this_len = (count > PAGE_SIZE) ? PAGE_SIZE : count; | 854 | this_len = (count > PAGE_SIZE) ? PAGE_SIZE : count; |
803 | retval = access_process_vm(task, src, page, this_len, 0); | 855 | retval = access_remote_vm(mm, src, page, this_len, 0); |
804 | if (!retval || check_mem_permission(task)) { | 856 | if (!retval) { |
805 | if (!ret) | 857 | if (!ret) |
806 | ret = -EIO; | 858 | ret = -EIO; |
807 | break; | 859 | break; |
@@ -829,10 +881,6 @@ out_no_task: | |||
829 | return ret; | 881 | return ret; |
830 | } | 882 | } |
831 | 883 | ||
832 | #define mem_write NULL | ||
833 | |||
834 | #ifndef mem_write | ||
835 | /* This is a security hazard */ | ||
836 | static ssize_t mem_write(struct file * file, const char __user *buf, | 884 | static ssize_t mem_write(struct file * file, const char __user *buf, |
837 | size_t count, loff_t *ppos) | 885 | size_t count, loff_t *ppos) |
838 | { | 886 | { |
@@ -840,18 +888,25 @@ static ssize_t mem_write(struct file * file, const char __user *buf, | |||
840 | char *page; | 888 | char *page; |
841 | struct task_struct *task = get_proc_task(file->f_path.dentry->d_inode); | 889 | struct task_struct *task = get_proc_task(file->f_path.dentry->d_inode); |
842 | unsigned long dst = *ppos; | 890 | unsigned long dst = *ppos; |
891 | struct mm_struct *mm; | ||
843 | 892 | ||
844 | copied = -ESRCH; | 893 | copied = -ESRCH; |
845 | if (!task) | 894 | if (!task) |
846 | goto out_no_task; | 895 | goto out_no_task; |
847 | 896 | ||
848 | if (check_mem_permission(task)) | 897 | mm = check_mem_permission(task); |
849 | goto out; | 898 | copied = PTR_ERR(mm); |
899 | if (IS_ERR(mm)) | ||
900 | goto out_task; | ||
901 | |||
902 | copied = -EIO; | ||
903 | if (file->private_data != (void *)((long)current->self_exec_id)) | ||
904 | goto out_mm; | ||
850 | 905 | ||
851 | copied = -ENOMEM; | 906 | copied = -ENOMEM; |
852 | page = (char *)__get_free_page(GFP_TEMPORARY); | 907 | page = (char *)__get_free_page(GFP_TEMPORARY); |
853 | if (!page) | 908 | if (!page) |
854 | goto out; | 909 | goto out_mm; |
855 | 910 | ||
856 | copied = 0; | 911 | copied = 0; |
857 | while (count > 0) { | 912 | while (count > 0) { |
@@ -862,7 +917,7 @@ static ssize_t mem_write(struct file * file, const char __user *buf, | |||
862 | copied = -EFAULT; | 917 | copied = -EFAULT; |
863 | break; | 918 | break; |
864 | } | 919 | } |
865 | retval = access_process_vm(task, dst, page, this_len, 1); | 920 | retval = access_remote_vm(mm, dst, page, this_len, 1); |
866 | if (!retval) { | 921 | if (!retval) { |
867 | if (!copied) | 922 | if (!copied) |
868 | copied = -EIO; | 923 | copied = -EIO; |
@@ -875,12 +930,13 @@ static ssize_t mem_write(struct file * file, const char __user *buf, | |||
875 | } | 930 | } |
876 | *ppos = dst; | 931 | *ppos = dst; |
877 | free_page((unsigned long) page); | 932 | free_page((unsigned long) page); |
878 | out: | 933 | out_mm: |
934 | mmput(mm); | ||
935 | out_task: | ||
879 | put_task_struct(task); | 936 | put_task_struct(task); |
880 | out_no_task: | 937 | out_no_task: |
881 | return copied; | 938 | return copied; |
882 | } | 939 | } |
883 | #endif | ||
884 | 940 | ||
885 | loff_t mem_lseek(struct file *file, loff_t offset, int orig) | 941 | loff_t mem_lseek(struct file *file, loff_t offset, int orig) |
886 | { | 942 | { |
@@ -917,20 +973,18 @@ static ssize_t environ_read(struct file *file, char __user *buf, | |||
917 | if (!task) | 973 | if (!task) |
918 | goto out_no_task; | 974 | goto out_no_task; |
919 | 975 | ||
920 | if (!ptrace_may_access(task, PTRACE_MODE_READ)) | ||
921 | goto out; | ||
922 | |||
923 | ret = -ENOMEM; | 976 | ret = -ENOMEM; |
924 | page = (char *)__get_free_page(GFP_TEMPORARY); | 977 | page = (char *)__get_free_page(GFP_TEMPORARY); |
925 | if (!page) | 978 | if (!page) |
926 | goto out; | 979 | goto out; |
927 | 980 | ||
928 | ret = 0; | ||
929 | 981 | ||
930 | mm = get_task_mm(task); | 982 | mm = mm_for_maps(task); |
931 | if (!mm) | 983 | ret = PTR_ERR(mm); |
984 | if (!mm || IS_ERR(mm)) | ||
932 | goto out_free; | 985 | goto out_free; |
933 | 986 | ||
987 | ret = 0; | ||
934 | while (count > 0) { | 988 | while (count > 0) { |
935 | int this_len, retval, max_len; | 989 | int this_len, retval, max_len; |
936 | 990 | ||
@@ -2748,8 +2802,12 @@ static int proc_tgid_io_accounting(struct task_struct *task, char *buffer) | |||
2748 | static int proc_pid_personality(struct seq_file *m, struct pid_namespace *ns, | 2802 | static int proc_pid_personality(struct seq_file *m, struct pid_namespace *ns, |
2749 | struct pid *pid, struct task_struct *task) | 2803 | struct pid *pid, struct task_struct *task) |
2750 | { | 2804 | { |
2751 | seq_printf(m, "%08x\n", task->personality); | 2805 | int err = lock_trace(task); |
2752 | return 0; | 2806 | if (!err) { |
2807 | seq_printf(m, "%08x\n", task->personality); | ||
2808 | unlock_trace(task); | ||
2809 | } | ||
2810 | return err; | ||
2753 | } | 2811 | } |
2754 | 2812 | ||
2755 | /* | 2813 | /* |
@@ -2768,7 +2826,7 @@ static const struct pid_entry tgid_base_stuff[] = { | |||
2768 | REG("environ", S_IRUSR, proc_environ_operations), | 2826 | REG("environ", S_IRUSR, proc_environ_operations), |
2769 | INF("auxv", S_IRUSR, proc_pid_auxv), | 2827 | INF("auxv", S_IRUSR, proc_pid_auxv), |
2770 | ONE("status", S_IRUGO, proc_pid_status), | 2828 | ONE("status", S_IRUGO, proc_pid_status), |
2771 | ONE("personality", S_IRUSR, proc_pid_personality), | 2829 | ONE("personality", S_IRUGO, proc_pid_personality), |
2772 | INF("limits", S_IRUGO, proc_pid_limits), | 2830 | INF("limits", S_IRUGO, proc_pid_limits), |
2773 | #ifdef CONFIG_SCHED_DEBUG | 2831 | #ifdef CONFIG_SCHED_DEBUG |
2774 | REG("sched", S_IRUGO|S_IWUSR, proc_pid_sched_operations), | 2832 | REG("sched", S_IRUGO|S_IWUSR, proc_pid_sched_operations), |
@@ -2778,7 +2836,7 @@ static const struct pid_entry tgid_base_stuff[] = { | |||
2778 | #endif | 2836 | #endif |
2779 | REG("comm", S_IRUGO|S_IWUSR, proc_pid_set_comm_operations), | 2837 | REG("comm", S_IRUGO|S_IWUSR, proc_pid_set_comm_operations), |
2780 | #ifdef CONFIG_HAVE_ARCH_TRACEHOOK | 2838 | #ifdef CONFIG_HAVE_ARCH_TRACEHOOK |
2781 | INF("syscall", S_IRUSR, proc_pid_syscall), | 2839 | INF("syscall", S_IRUGO, proc_pid_syscall), |
2782 | #endif | 2840 | #endif |
2783 | INF("cmdline", S_IRUGO, proc_pid_cmdline), | 2841 | INF("cmdline", S_IRUGO, proc_pid_cmdline), |
2784 | ONE("stat", S_IRUGO, proc_tgid_stat), | 2842 | ONE("stat", S_IRUGO, proc_tgid_stat), |
@@ -2797,7 +2855,7 @@ static const struct pid_entry tgid_base_stuff[] = { | |||
2797 | #ifdef CONFIG_PROC_PAGE_MONITOR | 2855 | #ifdef CONFIG_PROC_PAGE_MONITOR |
2798 | REG("clear_refs", S_IWUSR, proc_clear_refs_operations), | 2856 | REG("clear_refs", S_IWUSR, proc_clear_refs_operations), |
2799 | REG("smaps", S_IRUGO, proc_smaps_operations), | 2857 | REG("smaps", S_IRUGO, proc_smaps_operations), |
2800 | REG("pagemap", S_IRUSR, proc_pagemap_operations), | 2858 | REG("pagemap", S_IRUGO, proc_pagemap_operations), |
2801 | #endif | 2859 | #endif |
2802 | #ifdef CONFIG_SECURITY | 2860 | #ifdef CONFIG_SECURITY |
2803 | DIR("attr", S_IRUGO|S_IXUGO, proc_attr_dir_inode_operations, proc_attr_dir_operations), | 2861 | DIR("attr", S_IRUGO|S_IXUGO, proc_attr_dir_inode_operations, proc_attr_dir_operations), |
@@ -2806,7 +2864,7 @@ static const struct pid_entry tgid_base_stuff[] = { | |||
2806 | INF("wchan", S_IRUGO, proc_pid_wchan), | 2864 | INF("wchan", S_IRUGO, proc_pid_wchan), |
2807 | #endif | 2865 | #endif |
2808 | #ifdef CONFIG_STACKTRACE | 2866 | #ifdef CONFIG_STACKTRACE |
2809 | ONE("stack", S_IRUSR, proc_pid_stack), | 2867 | ONE("stack", S_IRUGO, proc_pid_stack), |
2810 | #endif | 2868 | #endif |
2811 | #ifdef CONFIG_SCHEDSTATS | 2869 | #ifdef CONFIG_SCHEDSTATS |
2812 | INF("schedstat", S_IRUGO, proc_pid_schedstat), | 2870 | INF("schedstat", S_IRUGO, proc_pid_schedstat), |
@@ -3108,14 +3166,14 @@ static const struct pid_entry tid_base_stuff[] = { | |||
3108 | REG("environ", S_IRUSR, proc_environ_operations), | 3166 | REG("environ", S_IRUSR, proc_environ_operations), |
3109 | INF("auxv", S_IRUSR, proc_pid_auxv), | 3167 | INF("auxv", S_IRUSR, proc_pid_auxv), |
3110 | ONE("status", S_IRUGO, proc_pid_status), | 3168 | ONE("status", S_IRUGO, proc_pid_status), |
3111 | ONE("personality", S_IRUSR, proc_pid_personality), | 3169 | ONE("personality", S_IRUGO, proc_pid_personality), |
3112 | INF("limits", S_IRUGO, proc_pid_limits), | 3170 | INF("limits", S_IRUGO, proc_pid_limits), |
3113 | #ifdef CONFIG_SCHED_DEBUG | 3171 | #ifdef CONFIG_SCHED_DEBUG |
3114 | REG("sched", S_IRUGO|S_IWUSR, proc_pid_sched_operations), | 3172 | REG("sched", S_IRUGO|S_IWUSR, proc_pid_sched_operations), |
3115 | #endif | 3173 | #endif |
3116 | REG("comm", S_IRUGO|S_IWUSR, proc_pid_set_comm_operations), | 3174 | REG("comm", S_IRUGO|S_IWUSR, proc_pid_set_comm_operations), |
3117 | #ifdef CONFIG_HAVE_ARCH_TRACEHOOK | 3175 | #ifdef CONFIG_HAVE_ARCH_TRACEHOOK |
3118 | INF("syscall", S_IRUSR, proc_pid_syscall), | 3176 | INF("syscall", S_IRUGO, proc_pid_syscall), |
3119 | #endif | 3177 | #endif |
3120 | INF("cmdline", S_IRUGO, proc_pid_cmdline), | 3178 | INF("cmdline", S_IRUGO, proc_pid_cmdline), |
3121 | ONE("stat", S_IRUGO, proc_tid_stat), | 3179 | ONE("stat", S_IRUGO, proc_tid_stat), |
@@ -3133,7 +3191,7 @@ static const struct pid_entry tid_base_stuff[] = { | |||
3133 | #ifdef CONFIG_PROC_PAGE_MONITOR | 3191 | #ifdef CONFIG_PROC_PAGE_MONITOR |
3134 | REG("clear_refs", S_IWUSR, proc_clear_refs_operations), | 3192 | REG("clear_refs", S_IWUSR, proc_clear_refs_operations), |
3135 | REG("smaps", S_IRUGO, proc_smaps_operations), | 3193 | REG("smaps", S_IRUGO, proc_smaps_operations), |
3136 | REG("pagemap", S_IRUSR, proc_pagemap_operations), | 3194 | REG("pagemap", S_IRUGO, proc_pagemap_operations), |
3137 | #endif | 3195 | #endif |
3138 | #ifdef CONFIG_SECURITY | 3196 | #ifdef CONFIG_SECURITY |
3139 | DIR("attr", S_IRUGO|S_IXUGO, proc_attr_dir_inode_operations, proc_attr_dir_operations), | 3197 | DIR("attr", S_IRUGO|S_IXUGO, proc_attr_dir_inode_operations, proc_attr_dir_operations), |
@@ -3142,7 +3200,7 @@ static const struct pid_entry tid_base_stuff[] = { | |||
3142 | INF("wchan", S_IRUGO, proc_pid_wchan), | 3200 | INF("wchan", S_IRUGO, proc_pid_wchan), |
3143 | #endif | 3201 | #endif |
3144 | #ifdef CONFIG_STACKTRACE | 3202 | #ifdef CONFIG_STACKTRACE |
3145 | ONE("stack", S_IRUSR, proc_pid_stack), | 3203 | ONE("stack", S_IRUGO, proc_pid_stack), |
3146 | #endif | 3204 | #endif |
3147 | #ifdef CONFIG_SCHEDSTATS | 3205 | #ifdef CONFIG_SCHEDSTATS |
3148 | INF("schedstat", S_IRUGO, proc_pid_schedstat), | 3206 | INF("schedstat", S_IRUGO, proc_pid_schedstat), |
@@ -3161,7 +3219,7 @@ static const struct pid_entry tid_base_stuff[] = { | |||
3161 | REG("oom_score_adj", S_IRUGO|S_IWUSR, proc_oom_score_adj_operations), | 3219 | REG("oom_score_adj", S_IRUGO|S_IWUSR, proc_oom_score_adj_operations), |
3162 | #ifdef CONFIG_AUDITSYSCALL | 3220 | #ifdef CONFIG_AUDITSYSCALL |
3163 | REG("loginuid", S_IWUSR|S_IRUGO, proc_loginuid_operations), | 3221 | REG("loginuid", S_IWUSR|S_IRUGO, proc_loginuid_operations), |
3164 | REG("sessionid", S_IRUSR, proc_sessionid_operations), | 3222 | REG("sessionid", S_IRUGO, proc_sessionid_operations), |
3165 | #endif | 3223 | #endif |
3166 | #ifdef CONFIG_FAULT_INJECTION | 3224 | #ifdef CONFIG_FAULT_INJECTION |
3167 | REG("make-it-fail", S_IRUGO|S_IWUSR, proc_fault_inject_operations), | 3225 | REG("make-it-fail", S_IRUGO|S_IWUSR, proc_fault_inject_operations), |
diff --git a/fs/proc/generic.c b/fs/proc/generic.c index 01e07f2a188f..f1281339b6fa 100644 --- a/fs/proc/generic.c +++ b/fs/proc/generic.c | |||
@@ -28,7 +28,7 @@ | |||
28 | 28 | ||
29 | DEFINE_SPINLOCK(proc_subdir_lock); | 29 | DEFINE_SPINLOCK(proc_subdir_lock); |
30 | 30 | ||
31 | static int proc_match(int len, const char *name, struct proc_dir_entry *de) | 31 | static int proc_match(unsigned int len, const char *name, struct proc_dir_entry *de) |
32 | { | 32 | { |
33 | if (de->namelen != len) | 33 | if (de->namelen != len) |
34 | return 0; | 34 | return 0; |
@@ -303,7 +303,7 @@ static int __xlate_proc_name(const char *name, struct proc_dir_entry **ret, | |||
303 | { | 303 | { |
304 | const char *cp = name, *next; | 304 | const char *cp = name, *next; |
305 | struct proc_dir_entry *de; | 305 | struct proc_dir_entry *de; |
306 | int len; | 306 | unsigned int len; |
307 | 307 | ||
308 | de = *ret; | 308 | de = *ret; |
309 | if (!de) | 309 | if (!de) |
@@ -602,7 +602,7 @@ static struct proc_dir_entry *__proc_create(struct proc_dir_entry **parent, | |||
602 | { | 602 | { |
603 | struct proc_dir_entry *ent = NULL; | 603 | struct proc_dir_entry *ent = NULL; |
604 | const char *fn = name; | 604 | const char *fn = name; |
605 | int len; | 605 | unsigned int len; |
606 | 606 | ||
607 | /* make sure name is valid */ | 607 | /* make sure name is valid */ |
608 | if (!name || !strlen(name)) goto out; | 608 | if (!name || !strlen(name)) goto out; |
@@ -786,7 +786,7 @@ void remove_proc_entry(const char *name, struct proc_dir_entry *parent) | |||
786 | struct proc_dir_entry **p; | 786 | struct proc_dir_entry **p; |
787 | struct proc_dir_entry *de = NULL; | 787 | struct proc_dir_entry *de = NULL; |
788 | const char *fn = name; | 788 | const char *fn = name; |
789 | int len; | 789 | unsigned int len; |
790 | 790 | ||
791 | spin_lock(&proc_subdir_lock); | 791 | spin_lock(&proc_subdir_lock); |
792 | if (__xlate_proc_name(name, &parent, &fn) != 0) { | 792 | if (__xlate_proc_name(name, &parent, &fn) != 0) { |
diff --git a/fs/proc/inode.c b/fs/proc/inode.c index d6a7ca1fdac5..d15aa1b1cc8f 100644 --- a/fs/proc/inode.c +++ b/fs/proc/inode.c | |||
@@ -46,8 +46,6 @@ static void proc_evict_inode(struct inode *inode) | |||
46 | } | 46 | } |
47 | } | 47 | } |
48 | 48 | ||
49 | struct vfsmount *proc_mnt; | ||
50 | |||
51 | static struct kmem_cache * proc_inode_cachep; | 49 | static struct kmem_cache * proc_inode_cachep; |
52 | 50 | ||
53 | static struct inode *proc_alloc_inode(struct super_block *sb) | 51 | static struct inode *proc_alloc_inode(struct super_block *sb) |
diff --git a/fs/proc/internal.h b/fs/proc/internal.h index 9ad561ded409..c03e8d3a3a5b 100644 --- a/fs/proc/internal.h +++ b/fs/proc/internal.h | |||
@@ -107,7 +107,6 @@ static inline struct proc_dir_entry *pde_get(struct proc_dir_entry *pde) | |||
107 | } | 107 | } |
108 | void pde_put(struct proc_dir_entry *pde); | 108 | void pde_put(struct proc_dir_entry *pde); |
109 | 109 | ||
110 | extern struct vfsmount *proc_mnt; | ||
111 | int proc_fill_super(struct super_block *); | 110 | int proc_fill_super(struct super_block *); |
112 | struct inode *proc_get_inode(struct super_block *, struct proc_dir_entry *); | 111 | struct inode *proc_get_inode(struct super_block *, struct proc_dir_entry *); |
113 | 112 | ||
diff --git a/fs/proc/root.c b/fs/proc/root.c index ef9fa8e24ad6..a9000e9cfee5 100644 --- a/fs/proc/root.c +++ b/fs/proc/root.c | |||
@@ -43,17 +43,6 @@ static struct dentry *proc_mount(struct file_system_type *fs_type, | |||
43 | struct pid_namespace *ns; | 43 | struct pid_namespace *ns; |
44 | struct proc_inode *ei; | 44 | struct proc_inode *ei; |
45 | 45 | ||
46 | if (proc_mnt) { | ||
47 | /* Seed the root directory with a pid so it doesn't need | ||
48 | * to be special in base.c. I would do this earlier but | ||
49 | * the only task alive when /proc is mounted the first time | ||
50 | * is the init_task and it doesn't have any pids. | ||
51 | */ | ||
52 | ei = PROC_I(proc_mnt->mnt_sb->s_root->d_inode); | ||
53 | if (!ei->pid) | ||
54 | ei->pid = find_get_pid(1); | ||
55 | } | ||
56 | |||
57 | if (flags & MS_KERNMOUNT) | 46 | if (flags & MS_KERNMOUNT) |
58 | ns = (struct pid_namespace *)data; | 47 | ns = (struct pid_namespace *)data; |
59 | else | 48 | else |
@@ -71,16 +60,16 @@ static struct dentry *proc_mount(struct file_system_type *fs_type, | |||
71 | return ERR_PTR(err); | 60 | return ERR_PTR(err); |
72 | } | 61 | } |
73 | 62 | ||
74 | ei = PROC_I(sb->s_root->d_inode); | ||
75 | if (!ei->pid) { | ||
76 | rcu_read_lock(); | ||
77 | ei->pid = get_pid(find_pid_ns(1, ns)); | ||
78 | rcu_read_unlock(); | ||
79 | } | ||
80 | |||
81 | sb->s_flags |= MS_ACTIVE; | 63 | sb->s_flags |= MS_ACTIVE; |
82 | } | 64 | } |
83 | 65 | ||
66 | ei = PROC_I(sb->s_root->d_inode); | ||
67 | if (!ei->pid) { | ||
68 | rcu_read_lock(); | ||
69 | ei->pid = get_pid(find_pid_ns(1, ns)); | ||
70 | rcu_read_unlock(); | ||
71 | } | ||
72 | |||
84 | return dget(sb->s_root); | 73 | return dget(sb->s_root); |
85 | } | 74 | } |
86 | 75 | ||
@@ -101,19 +90,20 @@ static struct file_system_type proc_fs_type = { | |||
101 | 90 | ||
102 | void __init proc_root_init(void) | 91 | void __init proc_root_init(void) |
103 | { | 92 | { |
93 | struct vfsmount *mnt; | ||
104 | int err; | 94 | int err; |
105 | 95 | ||
106 | proc_init_inodecache(); | 96 | proc_init_inodecache(); |
107 | err = register_filesystem(&proc_fs_type); | 97 | err = register_filesystem(&proc_fs_type); |
108 | if (err) | 98 | if (err) |
109 | return; | 99 | return; |
110 | proc_mnt = kern_mount_data(&proc_fs_type, &init_pid_ns); | 100 | mnt = kern_mount_data(&proc_fs_type, &init_pid_ns); |
111 | if (IS_ERR(proc_mnt)) { | 101 | if (IS_ERR(mnt)) { |
112 | unregister_filesystem(&proc_fs_type); | 102 | unregister_filesystem(&proc_fs_type); |
113 | return; | 103 | return; |
114 | } | 104 | } |
115 | 105 | ||
116 | init_pid_ns.proc_mnt = proc_mnt; | 106 | init_pid_ns.proc_mnt = mnt; |
117 | proc_symlink("mounts", NULL, "self/mounts"); | 107 | proc_symlink("mounts", NULL, "self/mounts"); |
118 | 108 | ||
119 | proc_net_init(); | 109 | proc_net_init(); |
diff --git a/fs/proc/task_mmu.c b/fs/proc/task_mmu.c index 93381aae9363..7c708a418acc 100644 --- a/fs/proc/task_mmu.c +++ b/fs/proc/task_mmu.c | |||
@@ -121,14 +121,14 @@ static void *m_start(struct seq_file *m, loff_t *pos) | |||
121 | 121 | ||
122 | priv->task = get_pid_task(priv->pid, PIDTYPE_PID); | 122 | priv->task = get_pid_task(priv->pid, PIDTYPE_PID); |
123 | if (!priv->task) | 123 | if (!priv->task) |
124 | return NULL; | 124 | return ERR_PTR(-ESRCH); |
125 | 125 | ||
126 | mm = mm_for_maps(priv->task); | 126 | mm = mm_for_maps(priv->task); |
127 | if (!mm) | 127 | if (!mm || IS_ERR(mm)) |
128 | return NULL; | 128 | return mm; |
129 | down_read(&mm->mmap_sem); | 129 | down_read(&mm->mmap_sem); |
130 | 130 | ||
131 | tail_vma = get_gate_vma(priv->task); | 131 | tail_vma = get_gate_vma(priv->task->mm); |
132 | priv->tail_vma = tail_vma; | 132 | priv->tail_vma = tail_vma; |
133 | 133 | ||
134 | /* Start with last addr hint */ | 134 | /* Start with last addr hint */ |
@@ -251,8 +251,8 @@ static void show_map_vma(struct seq_file *m, struct vm_area_struct *vma) | |||
251 | const char *name = arch_vma_name(vma); | 251 | const char *name = arch_vma_name(vma); |
252 | if (!name) { | 252 | if (!name) { |
253 | if (mm) { | 253 | if (mm) { |
254 | if (vma->vm_start <= mm->start_brk && | 254 | if (vma->vm_start <= mm->brk && |
255 | vma->vm_end >= mm->brk) { | 255 | vma->vm_end >= mm->start_brk) { |
256 | name = "[heap]"; | 256 | name = "[heap]"; |
257 | } else if (vma->vm_start <= mm->start_stack && | 257 | } else if (vma->vm_start <= mm->start_stack && |
258 | vma->vm_end >= mm->start_stack) { | 258 | vma->vm_end >= mm->start_stack) { |
@@ -279,7 +279,8 @@ static int show_map(struct seq_file *m, void *v) | |||
279 | show_map_vma(m, vma); | 279 | show_map_vma(m, vma); |
280 | 280 | ||
281 | if (m->count < m->size) /* vma is copied successfully */ | 281 | if (m->count < m->size) /* vma is copied successfully */ |
282 | m->version = (vma != get_gate_vma(task))? vma->vm_start: 0; | 282 | m->version = (vma != get_gate_vma(task->mm)) |
283 | ? vma->vm_start : 0; | ||
283 | return 0; | 284 | return 0; |
284 | } | 285 | } |
285 | 286 | ||
@@ -468,7 +469,8 @@ static int show_smap(struct seq_file *m, void *v) | |||
468 | (unsigned long)(mss.pss >> (10 + PSS_SHIFT)) : 0); | 469 | (unsigned long)(mss.pss >> (10 + PSS_SHIFT)) : 0); |
469 | 470 | ||
470 | if (m->count < m->size) /* vma is copied successfully */ | 471 | if (m->count < m->size) /* vma is copied successfully */ |
471 | m->version = (vma != get_gate_vma(task)) ? vma->vm_start : 0; | 472 | m->version = (vma != get_gate_vma(task->mm)) |
473 | ? vma->vm_start : 0; | ||
472 | return 0; | 474 | return 0; |
473 | } | 475 | } |
474 | 476 | ||
@@ -764,8 +766,9 @@ static ssize_t pagemap_read(struct file *file, char __user *buf, | |||
764 | if (!task) | 766 | if (!task) |
765 | goto out; | 767 | goto out; |
766 | 768 | ||
767 | ret = -EACCES; | 769 | mm = mm_for_maps(task); |
768 | if (!ptrace_may_access(task, PTRACE_MODE_READ)) | 770 | ret = PTR_ERR(mm); |
771 | if (!mm || IS_ERR(mm)) | ||
769 | goto out_task; | 772 | goto out_task; |
770 | 773 | ||
771 | ret = -EINVAL; | 774 | ret = -EINVAL; |
@@ -778,10 +781,6 @@ static ssize_t pagemap_read(struct file *file, char __user *buf, | |||
778 | if (!count) | 781 | if (!count) |
779 | goto out_task; | 782 | goto out_task; |
780 | 783 | ||
781 | mm = get_task_mm(task); | ||
782 | if (!mm) | ||
783 | goto out_task; | ||
784 | |||
785 | pm.len = PM_ENTRY_BYTES * (PAGEMAP_WALK_SIZE >> PAGE_SHIFT); | 784 | pm.len = PM_ENTRY_BYTES * (PAGEMAP_WALK_SIZE >> PAGE_SHIFT); |
786 | pm.buffer = kmalloc(pm.len, GFP_TEMPORARY); | 785 | pm.buffer = kmalloc(pm.len, GFP_TEMPORARY); |
787 | ret = -ENOMEM; | 786 | ret = -ENOMEM; |
diff --git a/fs/proc/task_nommu.c b/fs/proc/task_nommu.c index b535d3e5d5f1..980de547c070 100644 --- a/fs/proc/task_nommu.c +++ b/fs/proc/task_nommu.c | |||
@@ -199,13 +199,13 @@ static void *m_start(struct seq_file *m, loff_t *pos) | |||
199 | /* pin the task and mm whilst we play with them */ | 199 | /* pin the task and mm whilst we play with them */ |
200 | priv->task = get_pid_task(priv->pid, PIDTYPE_PID); | 200 | priv->task = get_pid_task(priv->pid, PIDTYPE_PID); |
201 | if (!priv->task) | 201 | if (!priv->task) |
202 | return NULL; | 202 | return ERR_PTR(-ESRCH); |
203 | 203 | ||
204 | mm = mm_for_maps(priv->task); | 204 | mm = mm_for_maps(priv->task); |
205 | if (!mm) { | 205 | if (!mm || IS_ERR(mm)) { |
206 | put_task_struct(priv->task); | 206 | put_task_struct(priv->task); |
207 | priv->task = NULL; | 207 | priv->task = NULL; |
208 | return NULL; | 208 | return mm; |
209 | } | 209 | } |
210 | down_read(&mm->mmap_sem); | 210 | down_read(&mm->mmap_sem); |
211 | 211 | ||
diff --git a/fs/pstore/platform.c b/fs/pstore/platform.c index ce9ad84d5dd9..f835a25625ff 100644 --- a/fs/pstore/platform.c +++ b/fs/pstore/platform.c | |||
@@ -48,6 +48,10 @@ void pstore_set_kmsg_bytes(int bytes) | |||
48 | /* Tag each group of saved records with a sequence number */ | 48 | /* Tag each group of saved records with a sequence number */ |
49 | static int oopscount; | 49 | static int oopscount; |
50 | 50 | ||
51 | static char *reason_str[] = { | ||
52 | "Oops", "Panic", "Kexec", "Restart", "Halt", "Poweroff", "Emergency" | ||
53 | }; | ||
54 | |||
51 | /* | 55 | /* |
52 | * callback from kmsg_dump. (s2,l2) has the most recently | 56 | * callback from kmsg_dump. (s2,l2) has the most recently |
53 | * written bytes, older bytes are in (s1,l1). Save as much | 57 | * written bytes, older bytes are in (s1,l1). Save as much |
@@ -61,15 +65,20 @@ static void pstore_dump(struct kmsg_dumper *dumper, | |||
61 | unsigned long s1_start, s2_start; | 65 | unsigned long s1_start, s2_start; |
62 | unsigned long l1_cpy, l2_cpy; | 66 | unsigned long l1_cpy, l2_cpy; |
63 | unsigned long size, total = 0; | 67 | unsigned long size, total = 0; |
64 | char *dst; | 68 | char *dst, *why; |
65 | u64 id; | 69 | u64 id; |
66 | int hsize, part = 1; | 70 | int hsize, part = 1; |
67 | 71 | ||
72 | if (reason < ARRAY_SIZE(reason_str)) | ||
73 | why = reason_str[reason]; | ||
74 | else | ||
75 | why = "Unknown"; | ||
76 | |||
68 | mutex_lock(&psinfo->buf_mutex); | 77 | mutex_lock(&psinfo->buf_mutex); |
69 | oopscount++; | 78 | oopscount++; |
70 | while (total < kmsg_bytes) { | 79 | while (total < kmsg_bytes) { |
71 | dst = psinfo->buf; | 80 | dst = psinfo->buf; |
72 | hsize = sprintf(dst, "Oops#%d Part%d\n", oopscount, part++); | 81 | hsize = sprintf(dst, "%s#%d Part%d\n", why, oopscount, part++); |
73 | size = psinfo->bufsize - hsize; | 82 | size = psinfo->bufsize - hsize; |
74 | dst += hsize; | 83 | dst += hsize; |
75 | 84 | ||
@@ -86,7 +95,7 @@ static void pstore_dump(struct kmsg_dumper *dumper, | |||
86 | memcpy(dst + l1_cpy, s2 + s2_start, l2_cpy); | 95 | memcpy(dst + l1_cpy, s2 + s2_start, l2_cpy); |
87 | 96 | ||
88 | id = psinfo->write(PSTORE_TYPE_DMESG, hsize + l1_cpy + l2_cpy); | 97 | id = psinfo->write(PSTORE_TYPE_DMESG, hsize + l1_cpy + l2_cpy); |
89 | if (pstore_is_mounted()) | 98 | if (reason == KMSG_DUMP_OOPS && pstore_is_mounted()) |
90 | pstore_mkfile(PSTORE_TYPE_DMESG, psinfo->name, id, | 99 | pstore_mkfile(PSTORE_TYPE_DMESG, psinfo->name, id, |
91 | psinfo->buf, hsize + l1_cpy + l2_cpy, | 100 | psinfo->buf, hsize + l1_cpy + l2_cpy, |
92 | CURRENT_TIME, psinfo->erase); | 101 | CURRENT_TIME, psinfo->erase); |
diff --git a/fs/qnx4/inode.c b/fs/qnx4/inode.c index e63b4171d583..2b0646613f5a 100644 --- a/fs/qnx4/inode.c +++ b/fs/qnx4/inode.c | |||
@@ -335,7 +335,6 @@ static sector_t qnx4_bmap(struct address_space *mapping, sector_t block) | |||
335 | static const struct address_space_operations qnx4_aops = { | 335 | static const struct address_space_operations qnx4_aops = { |
336 | .readpage = qnx4_readpage, | 336 | .readpage = qnx4_readpage, |
337 | .writepage = qnx4_writepage, | 337 | .writepage = qnx4_writepage, |
338 | .sync_page = block_sync_page, | ||
339 | .write_begin = qnx4_write_begin, | 338 | .write_begin = qnx4_write_begin, |
340 | .write_end = generic_write_end, | 339 | .write_end = generic_write_end, |
341 | .bmap = qnx4_bmap | 340 | .bmap = qnx4_bmap |
diff --git a/fs/quota/dquot.c b/fs/quota/dquot.c index a2a622e079f0..fcc8ae75d874 100644 --- a/fs/quota/dquot.c +++ b/fs/quota/dquot.c | |||
@@ -76,7 +76,7 @@ | |||
76 | #include <linux/buffer_head.h> | 76 | #include <linux/buffer_head.h> |
77 | #include <linux/capability.h> | 77 | #include <linux/capability.h> |
78 | #include <linux/quotaops.h> | 78 | #include <linux/quotaops.h> |
79 | #include <linux/writeback.h> /* for inode_lock, oddly enough.. */ | 79 | #include "../internal.h" /* ugh */ |
80 | 80 | ||
81 | #include <asm/uaccess.h> | 81 | #include <asm/uaccess.h> |
82 | 82 | ||
@@ -900,33 +900,38 @@ static void add_dquot_ref(struct super_block *sb, int type) | |||
900 | int reserved = 0; | 900 | int reserved = 0; |
901 | #endif | 901 | #endif |
902 | 902 | ||
903 | spin_lock(&inode_lock); | 903 | spin_lock(&inode_sb_list_lock); |
904 | list_for_each_entry(inode, &sb->s_inodes, i_sb_list) { | 904 | list_for_each_entry(inode, &sb->s_inodes, i_sb_list) { |
905 | if (inode->i_state & (I_FREEING|I_WILL_FREE|I_NEW)) | 905 | spin_lock(&inode->i_lock); |
906 | if ((inode->i_state & (I_FREEING|I_WILL_FREE|I_NEW)) || | ||
907 | !atomic_read(&inode->i_writecount) || | ||
908 | !dqinit_needed(inode, type)) { | ||
909 | spin_unlock(&inode->i_lock); | ||
906 | continue; | 910 | continue; |
911 | } | ||
907 | #ifdef CONFIG_QUOTA_DEBUG | 912 | #ifdef CONFIG_QUOTA_DEBUG |
908 | if (unlikely(inode_get_rsv_space(inode) > 0)) | 913 | if (unlikely(inode_get_rsv_space(inode) > 0)) |
909 | reserved = 1; | 914 | reserved = 1; |
910 | #endif | 915 | #endif |
911 | if (!atomic_read(&inode->i_writecount)) | ||
912 | continue; | ||
913 | if (!dqinit_needed(inode, type)) | ||
914 | continue; | ||
915 | |||
916 | __iget(inode); | 916 | __iget(inode); |
917 | spin_unlock(&inode_lock); | 917 | spin_unlock(&inode->i_lock); |
918 | spin_unlock(&inode_sb_list_lock); | ||
918 | 919 | ||
919 | iput(old_inode); | 920 | iput(old_inode); |
920 | __dquot_initialize(inode, type); | 921 | __dquot_initialize(inode, type); |
921 | /* We hold a reference to 'inode' so it couldn't have been | 922 | |
922 | * removed from s_inodes list while we dropped the inode_lock. | 923 | /* |
923 | * We cannot iput the inode now as we can be holding the last | 924 | * We hold a reference to 'inode' so it couldn't have been |
924 | * reference and we cannot iput it under inode_lock. So we | 925 | * removed from s_inodes list while we dropped the |
925 | * keep the reference and iput it later. */ | 926 | * inode_sb_list_lock We cannot iput the inode now as we can be |
927 | * holding the last reference and we cannot iput it under | ||
928 | * inode_sb_list_lock. So we keep the reference and iput it | ||
929 | * later. | ||
930 | */ | ||
926 | old_inode = inode; | 931 | old_inode = inode; |
927 | spin_lock(&inode_lock); | 932 | spin_lock(&inode_sb_list_lock); |
928 | } | 933 | } |
929 | spin_unlock(&inode_lock); | 934 | spin_unlock(&inode_sb_list_lock); |
930 | iput(old_inode); | 935 | iput(old_inode); |
931 | 936 | ||
932 | #ifdef CONFIG_QUOTA_DEBUG | 937 | #ifdef CONFIG_QUOTA_DEBUG |
@@ -1007,7 +1012,7 @@ static void remove_dquot_ref(struct super_block *sb, int type, | |||
1007 | struct inode *inode; | 1012 | struct inode *inode; |
1008 | int reserved = 0; | 1013 | int reserved = 0; |
1009 | 1014 | ||
1010 | spin_lock(&inode_lock); | 1015 | spin_lock(&inode_sb_list_lock); |
1011 | list_for_each_entry(inode, &sb->s_inodes, i_sb_list) { | 1016 | list_for_each_entry(inode, &sb->s_inodes, i_sb_list) { |
1012 | /* | 1017 | /* |
1013 | * We have to scan also I_NEW inodes because they can already | 1018 | * We have to scan also I_NEW inodes because they can already |
@@ -1021,7 +1026,7 @@ static void remove_dquot_ref(struct super_block *sb, int type, | |||
1021 | remove_inode_dquot_ref(inode, type, tofree_head); | 1026 | remove_inode_dquot_ref(inode, type, tofree_head); |
1022 | } | 1027 | } |
1023 | } | 1028 | } |
1024 | spin_unlock(&inode_lock); | 1029 | spin_unlock(&inode_sb_list_lock); |
1025 | #ifdef CONFIG_QUOTA_DEBUG | 1030 | #ifdef CONFIG_QUOTA_DEBUG |
1026 | if (reserved) { | 1031 | if (reserved) { |
1027 | printk(KERN_WARNING "VFS (%s): Writes happened after quota" | 1032 | printk(KERN_WARNING "VFS (%s): Writes happened after quota" |
diff --git a/fs/reiserfs/inode.c b/fs/reiserfs/inode.c index 1bba24bad820..4fd5bb33dbb5 100644 --- a/fs/reiserfs/inode.c +++ b/fs/reiserfs/inode.c | |||
@@ -3217,7 +3217,6 @@ const struct address_space_operations reiserfs_address_space_operations = { | |||
3217 | .readpages = reiserfs_readpages, | 3217 | .readpages = reiserfs_readpages, |
3218 | .releasepage = reiserfs_releasepage, | 3218 | .releasepage = reiserfs_releasepage, |
3219 | .invalidatepage = reiserfs_invalidatepage, | 3219 | .invalidatepage = reiserfs_invalidatepage, |
3220 | .sync_page = block_sync_page, | ||
3221 | .write_begin = reiserfs_write_begin, | 3220 | .write_begin = reiserfs_write_begin, |
3222 | .write_end = reiserfs_write_end, | 3221 | .write_end = reiserfs_write_end, |
3223 | .bmap = reiserfs_aop_bmap, | 3222 | .bmap = reiserfs_aop_bmap, |
diff --git a/fs/reiserfs/ioctl.c b/fs/reiserfs/ioctl.c index 79265fdc317a..4e153051bc75 100644 --- a/fs/reiserfs/ioctl.c +++ b/fs/reiserfs/ioctl.c | |||
@@ -59,7 +59,7 @@ long reiserfs_ioctl(struct file *filp, unsigned int cmd, unsigned long arg) | |||
59 | if (err) | 59 | if (err) |
60 | break; | 60 | break; |
61 | 61 | ||
62 | if (!is_owner_or_cap(inode)) { | 62 | if (!inode_owner_or_capable(inode)) { |
63 | err = -EPERM; | 63 | err = -EPERM; |
64 | goto setflags_out; | 64 | goto setflags_out; |
65 | } | 65 | } |
@@ -103,7 +103,7 @@ setflags_out: | |||
103 | err = put_user(inode->i_generation, (int __user *)arg); | 103 | err = put_user(inode->i_generation, (int __user *)arg); |
104 | break; | 104 | break; |
105 | case REISERFS_IOC_SETVERSION: | 105 | case REISERFS_IOC_SETVERSION: |
106 | if (!is_owner_or_cap(inode)) { | 106 | if (!inode_owner_or_capable(inode)) { |
107 | err = -EPERM; | 107 | err = -EPERM; |
108 | break; | 108 | break; |
109 | } | 109 | } |
diff --git a/fs/reiserfs/xattr_acl.c b/fs/reiserfs/xattr_acl.c index 90d2fcb67a31..3dc38f1206fc 100644 --- a/fs/reiserfs/xattr_acl.c +++ b/fs/reiserfs/xattr_acl.c | |||
@@ -26,7 +26,7 @@ posix_acl_set(struct dentry *dentry, const char *name, const void *value, | |||
26 | size_t jcreate_blocks; | 26 | size_t jcreate_blocks; |
27 | if (!reiserfs_posixacl(inode->i_sb)) | 27 | if (!reiserfs_posixacl(inode->i_sb)) |
28 | return -EOPNOTSUPP; | 28 | return -EOPNOTSUPP; |
29 | if (!is_owner_or_cap(inode)) | 29 | if (!inode_owner_or_capable(inode)) |
30 | return -EPERM; | 30 | return -EPERM; |
31 | 31 | ||
32 | if (value) { | 32 | if (value) { |
diff --git a/fs/squashfs/Kconfig b/fs/squashfs/Kconfig index aa68a8a31518..efc309fa3035 100644 --- a/fs/squashfs/Kconfig +++ b/fs/squashfs/Kconfig | |||
@@ -5,12 +5,12 @@ config SQUASHFS | |||
5 | help | 5 | help |
6 | Saying Y here includes support for SquashFS 4.0 (a Compressed | 6 | Saying Y here includes support for SquashFS 4.0 (a Compressed |
7 | Read-Only File System). Squashfs is a highly compressed read-only | 7 | Read-Only File System). Squashfs is a highly compressed read-only |
8 | filesystem for Linux. It uses zlib/lzo compression to compress both | 8 | filesystem for Linux. It uses zlib, lzo or xz compression to |
9 | files, inodes and directories. Inodes in the system are very small | 9 | compress both files, inodes and directories. Inodes in the system |
10 | and all blocks are packed to minimise data overhead. Block sizes | 10 | are very small and all blocks are packed to minimise data overhead. |
11 | greater than 4K are supported up to a maximum of 1 Mbytes (default | 11 | Block sizes greater than 4K are supported up to a maximum of 1 Mbytes |
12 | block size 128K). SquashFS 4.0 supports 64 bit filesystems and files | 12 | (default block size 128K). SquashFS 4.0 supports 64 bit filesystems |
13 | (larger than 4GB), full uid/gid information, hard links and | 13 | and files (larger than 4GB), full uid/gid information, hard links and |
14 | timestamps. | 14 | timestamps. |
15 | 15 | ||
16 | Squashfs is intended for general read-only filesystem use, for | 16 | Squashfs is intended for general read-only filesystem use, for |
diff --git a/fs/squashfs/decompressor.c b/fs/squashfs/decompressor.c index a5940e54c4dd..e921bd213738 100644 --- a/fs/squashfs/decompressor.c +++ b/fs/squashfs/decompressor.c | |||
@@ -23,6 +23,7 @@ | |||
23 | 23 | ||
24 | #include <linux/types.h> | 24 | #include <linux/types.h> |
25 | #include <linux/mutex.h> | 25 | #include <linux/mutex.h> |
26 | #include <linux/slab.h> | ||
26 | #include <linux/buffer_head.h> | 27 | #include <linux/buffer_head.h> |
27 | 28 | ||
28 | #include "squashfs_fs.h" | 29 | #include "squashfs_fs.h" |
@@ -74,3 +75,36 @@ const struct squashfs_decompressor *squashfs_lookup_decompressor(int id) | |||
74 | 75 | ||
75 | return decompressor[i]; | 76 | return decompressor[i]; |
76 | } | 77 | } |
78 | |||
79 | |||
80 | void *squashfs_decompressor_init(struct super_block *sb, unsigned short flags) | ||
81 | { | ||
82 | struct squashfs_sb_info *msblk = sb->s_fs_info; | ||
83 | void *strm, *buffer = NULL; | ||
84 | int length = 0; | ||
85 | |||
86 | /* | ||
87 | * Read decompressor specific options from file system if present | ||
88 | */ | ||
89 | if (SQUASHFS_COMP_OPTS(flags)) { | ||
90 | buffer = kmalloc(PAGE_CACHE_SIZE, GFP_KERNEL); | ||
91 | if (buffer == NULL) | ||
92 | return ERR_PTR(-ENOMEM); | ||
93 | |||
94 | length = squashfs_read_data(sb, &buffer, | ||
95 | sizeof(struct squashfs_super_block), 0, NULL, | ||
96 | PAGE_CACHE_SIZE, 1); | ||
97 | |||
98 | if (length < 0) { | ||
99 | strm = ERR_PTR(length); | ||
100 | goto finished; | ||
101 | } | ||
102 | } | ||
103 | |||
104 | strm = msblk->decompressor->init(msblk, buffer, length); | ||
105 | |||
106 | finished: | ||
107 | kfree(buffer); | ||
108 | |||
109 | return strm; | ||
110 | } | ||
diff --git a/fs/squashfs/decompressor.h b/fs/squashfs/decompressor.h index 3b305a70f7aa..099745ad5691 100644 --- a/fs/squashfs/decompressor.h +++ b/fs/squashfs/decompressor.h | |||
@@ -24,7 +24,7 @@ | |||
24 | */ | 24 | */ |
25 | 25 | ||
26 | struct squashfs_decompressor { | 26 | struct squashfs_decompressor { |
27 | void *(*init)(struct squashfs_sb_info *); | 27 | void *(*init)(struct squashfs_sb_info *, void *, int); |
28 | void (*free)(void *); | 28 | void (*free)(void *); |
29 | int (*decompress)(struct squashfs_sb_info *, void **, | 29 | int (*decompress)(struct squashfs_sb_info *, void **, |
30 | struct buffer_head **, int, int, int, int, int); | 30 | struct buffer_head **, int, int, int, int, int); |
@@ -33,11 +33,6 @@ struct squashfs_decompressor { | |||
33 | int supported; | 33 | int supported; |
34 | }; | 34 | }; |
35 | 35 | ||
36 | static inline void *squashfs_decompressor_init(struct squashfs_sb_info *msblk) | ||
37 | { | ||
38 | return msblk->decompressor->init(msblk); | ||
39 | } | ||
40 | |||
41 | static inline void squashfs_decompressor_free(struct squashfs_sb_info *msblk, | 36 | static inline void squashfs_decompressor_free(struct squashfs_sb_info *msblk, |
42 | void *s) | 37 | void *s) |
43 | { | 38 | { |
diff --git a/fs/squashfs/dir.c b/fs/squashfs/dir.c index 0dc340aa2be9..3f79cd1d0c19 100644 --- a/fs/squashfs/dir.c +++ b/fs/squashfs/dir.c | |||
@@ -172,6 +172,11 @@ static int squashfs_readdir(struct file *file, void *dirent, filldir_t filldir) | |||
172 | length += sizeof(dirh); | 172 | length += sizeof(dirh); |
173 | 173 | ||
174 | dir_count = le32_to_cpu(dirh.count) + 1; | 174 | dir_count = le32_to_cpu(dirh.count) + 1; |
175 | |||
176 | /* dir_count should never be larger than 256 */ | ||
177 | if (dir_count > 256) | ||
178 | goto failed_read; | ||
179 | |||
175 | while (dir_count--) { | 180 | while (dir_count--) { |
176 | /* | 181 | /* |
177 | * Read directory entry. | 182 | * Read directory entry. |
@@ -183,6 +188,10 @@ static int squashfs_readdir(struct file *file, void *dirent, filldir_t filldir) | |||
183 | 188 | ||
184 | size = le16_to_cpu(dire->size) + 1; | 189 | size = le16_to_cpu(dire->size) + 1; |
185 | 190 | ||
191 | /* size should never be larger than SQUASHFS_NAME_LEN */ | ||
192 | if (size > SQUASHFS_NAME_LEN) | ||
193 | goto failed_read; | ||
194 | |||
186 | err = squashfs_read_metadata(inode->i_sb, dire->name, | 195 | err = squashfs_read_metadata(inode->i_sb, dire->name, |
187 | &block, &offset, size); | 196 | &block, &offset, size); |
188 | if (err < 0) | 197 | if (err < 0) |
diff --git a/fs/squashfs/lzo_wrapper.c b/fs/squashfs/lzo_wrapper.c index 7da759e34c52..00f4dfc5f088 100644 --- a/fs/squashfs/lzo_wrapper.c +++ b/fs/squashfs/lzo_wrapper.c | |||
@@ -37,7 +37,7 @@ struct squashfs_lzo { | |||
37 | void *output; | 37 | void *output; |
38 | }; | 38 | }; |
39 | 39 | ||
40 | static void *lzo_init(struct squashfs_sb_info *msblk) | 40 | static void *lzo_init(struct squashfs_sb_info *msblk, void *buff, int len) |
41 | { | 41 | { |
42 | int block_size = max_t(int, msblk->block_size, SQUASHFS_METADATA_SIZE); | 42 | int block_size = max_t(int, msblk->block_size, SQUASHFS_METADATA_SIZE); |
43 | 43 | ||
@@ -58,7 +58,7 @@ failed2: | |||
58 | failed: | 58 | failed: |
59 | ERROR("Failed to allocate lzo workspace\n"); | 59 | ERROR("Failed to allocate lzo workspace\n"); |
60 | kfree(stream); | 60 | kfree(stream); |
61 | return NULL; | 61 | return ERR_PTR(-ENOMEM); |
62 | } | 62 | } |
63 | 63 | ||
64 | 64 | ||
diff --git a/fs/squashfs/namei.c b/fs/squashfs/namei.c index 7a9464d08cf6..5d922a6701ab 100644 --- a/fs/squashfs/namei.c +++ b/fs/squashfs/namei.c | |||
@@ -176,6 +176,11 @@ static struct dentry *squashfs_lookup(struct inode *dir, struct dentry *dentry, | |||
176 | length += sizeof(dirh); | 176 | length += sizeof(dirh); |
177 | 177 | ||
178 | dir_count = le32_to_cpu(dirh.count) + 1; | 178 | dir_count = le32_to_cpu(dirh.count) + 1; |
179 | |||
180 | /* dir_count should never be larger than 256 */ | ||
181 | if (dir_count > 256) | ||
182 | goto data_error; | ||
183 | |||
179 | while (dir_count--) { | 184 | while (dir_count--) { |
180 | /* | 185 | /* |
181 | * Read directory entry. | 186 | * Read directory entry. |
@@ -187,6 +192,10 @@ static struct dentry *squashfs_lookup(struct inode *dir, struct dentry *dentry, | |||
187 | 192 | ||
188 | size = le16_to_cpu(dire->size) + 1; | 193 | size = le16_to_cpu(dire->size) + 1; |
189 | 194 | ||
195 | /* size should never be larger than SQUASHFS_NAME_LEN */ | ||
196 | if (size > SQUASHFS_NAME_LEN) | ||
197 | goto data_error; | ||
198 | |||
190 | err = squashfs_read_metadata(dir->i_sb, dire->name, | 199 | err = squashfs_read_metadata(dir->i_sb, dire->name, |
191 | &block, &offset, size); | 200 | &block, &offset, size); |
192 | if (err < 0) | 201 | if (err < 0) |
@@ -228,6 +237,9 @@ exit_lookup: | |||
228 | d_add(dentry, inode); | 237 | d_add(dentry, inode); |
229 | return ERR_PTR(0); | 238 | return ERR_PTR(0); |
230 | 239 | ||
240 | data_error: | ||
241 | err = -EIO; | ||
242 | |||
231 | read_failure: | 243 | read_failure: |
232 | ERROR("Unable to read directory block [%llx:%x]\n", | 244 | ERROR("Unable to read directory block [%llx:%x]\n", |
233 | squashfs_i(dir)->start + msblk->directory_table, | 245 | squashfs_i(dir)->start + msblk->directory_table, |
diff --git a/fs/squashfs/squashfs.h b/fs/squashfs/squashfs.h index ba729d808876..1f2e608b8785 100644 --- a/fs/squashfs/squashfs.h +++ b/fs/squashfs/squashfs.h | |||
@@ -48,6 +48,7 @@ extern int squashfs_read_table(struct super_block *, void *, u64, int); | |||
48 | 48 | ||
49 | /* decompressor.c */ | 49 | /* decompressor.c */ |
50 | extern const struct squashfs_decompressor *squashfs_lookup_decompressor(int); | 50 | extern const struct squashfs_decompressor *squashfs_lookup_decompressor(int); |
51 | extern void *squashfs_decompressor_init(struct super_block *, unsigned short); | ||
51 | 52 | ||
52 | /* export.c */ | 53 | /* export.c */ |
53 | extern __le64 *squashfs_read_inode_lookup_table(struct super_block *, u64, | 54 | extern __le64 *squashfs_read_inode_lookup_table(struct super_block *, u64, |
diff --git a/fs/squashfs/squashfs_fs.h b/fs/squashfs/squashfs_fs.h index 39533feffd6d..4582c568ef4d 100644 --- a/fs/squashfs/squashfs_fs.h +++ b/fs/squashfs/squashfs_fs.h | |||
@@ -57,6 +57,7 @@ | |||
57 | #define SQUASHFS_ALWAYS_FRAG 5 | 57 | #define SQUASHFS_ALWAYS_FRAG 5 |
58 | #define SQUASHFS_DUPLICATE 6 | 58 | #define SQUASHFS_DUPLICATE 6 |
59 | #define SQUASHFS_EXPORT 7 | 59 | #define SQUASHFS_EXPORT 7 |
60 | #define SQUASHFS_COMP_OPT 10 | ||
60 | 61 | ||
61 | #define SQUASHFS_BIT(flag, bit) ((flag >> bit) & 1) | 62 | #define SQUASHFS_BIT(flag, bit) ((flag >> bit) & 1) |
62 | 63 | ||
@@ -81,6 +82,9 @@ | |||
81 | #define SQUASHFS_EXPORTABLE(flags) SQUASHFS_BIT(flags, \ | 82 | #define SQUASHFS_EXPORTABLE(flags) SQUASHFS_BIT(flags, \ |
82 | SQUASHFS_EXPORT) | 83 | SQUASHFS_EXPORT) |
83 | 84 | ||
85 | #define SQUASHFS_COMP_OPTS(flags) SQUASHFS_BIT(flags, \ | ||
86 | SQUASHFS_COMP_OPT) | ||
87 | |||
84 | /* Max number of types and file types */ | 88 | /* Max number of types and file types */ |
85 | #define SQUASHFS_DIR_TYPE 1 | 89 | #define SQUASHFS_DIR_TYPE 1 |
86 | #define SQUASHFS_REG_TYPE 2 | 90 | #define SQUASHFS_REG_TYPE 2 |
diff --git a/fs/squashfs/super.c b/fs/squashfs/super.c index 20700b9f2b4c..5c8184c061a4 100644 --- a/fs/squashfs/super.c +++ b/fs/squashfs/super.c | |||
@@ -199,10 +199,6 @@ static int squashfs_fill_super(struct super_block *sb, void *data, int silent) | |||
199 | 199 | ||
200 | err = -ENOMEM; | 200 | err = -ENOMEM; |
201 | 201 | ||
202 | msblk->stream = squashfs_decompressor_init(msblk); | ||
203 | if (msblk->stream == NULL) | ||
204 | goto failed_mount; | ||
205 | |||
206 | msblk->block_cache = squashfs_cache_init("metadata", | 202 | msblk->block_cache = squashfs_cache_init("metadata", |
207 | SQUASHFS_CACHED_BLKS, SQUASHFS_METADATA_SIZE); | 203 | SQUASHFS_CACHED_BLKS, SQUASHFS_METADATA_SIZE); |
208 | if (msblk->block_cache == NULL) | 204 | if (msblk->block_cache == NULL) |
@@ -215,6 +211,13 @@ static int squashfs_fill_super(struct super_block *sb, void *data, int silent) | |||
215 | goto failed_mount; | 211 | goto failed_mount; |
216 | } | 212 | } |
217 | 213 | ||
214 | msblk->stream = squashfs_decompressor_init(sb, flags); | ||
215 | if (IS_ERR(msblk->stream)) { | ||
216 | err = PTR_ERR(msblk->stream); | ||
217 | msblk->stream = NULL; | ||
218 | goto failed_mount; | ||
219 | } | ||
220 | |||
218 | /* Allocate and read id index table */ | 221 | /* Allocate and read id index table */ |
219 | msblk->id_table = squashfs_read_id_index_table(sb, | 222 | msblk->id_table = squashfs_read_id_index_table(sb, |
220 | le64_to_cpu(sblk->id_table_start), le16_to_cpu(sblk->no_ids)); | 223 | le64_to_cpu(sblk->id_table_start), le16_to_cpu(sblk->no_ids)); |
@@ -370,8 +373,8 @@ static void squashfs_put_super(struct super_block *sb) | |||
370 | } | 373 | } |
371 | 374 | ||
372 | 375 | ||
373 | static struct dentry *squashfs_mount(struct file_system_type *fs_type, int flags, | 376 | static struct dentry *squashfs_mount(struct file_system_type *fs_type, |
374 | const char *dev_name, void *data) | 377 | int flags, const char *dev_name, void *data) |
375 | { | 378 | { |
376 | return mount_bdev(fs_type, flags, dev_name, data, squashfs_fill_super); | 379 | return mount_bdev(fs_type, flags, dev_name, data, squashfs_fill_super); |
377 | } | 380 | } |
diff --git a/fs/squashfs/xz_wrapper.c b/fs/squashfs/xz_wrapper.c index c4eb40018256..aa47a286d1f8 100644 --- a/fs/squashfs/xz_wrapper.c +++ b/fs/squashfs/xz_wrapper.c | |||
@@ -26,10 +26,10 @@ | |||
26 | #include <linux/buffer_head.h> | 26 | #include <linux/buffer_head.h> |
27 | #include <linux/slab.h> | 27 | #include <linux/slab.h> |
28 | #include <linux/xz.h> | 28 | #include <linux/xz.h> |
29 | #include <linux/bitops.h> | ||
29 | 30 | ||
30 | #include "squashfs_fs.h" | 31 | #include "squashfs_fs.h" |
31 | #include "squashfs_fs_sb.h" | 32 | #include "squashfs_fs_sb.h" |
32 | #include "squashfs_fs_i.h" | ||
33 | #include "squashfs.h" | 33 | #include "squashfs.h" |
34 | #include "decompressor.h" | 34 | #include "decompressor.h" |
35 | 35 | ||
@@ -38,24 +38,57 @@ struct squashfs_xz { | |||
38 | struct xz_buf buf; | 38 | struct xz_buf buf; |
39 | }; | 39 | }; |
40 | 40 | ||
41 | static void *squashfs_xz_init(struct squashfs_sb_info *msblk) | 41 | struct comp_opts { |
42 | __le32 dictionary_size; | ||
43 | __le32 flags; | ||
44 | }; | ||
45 | |||
46 | static void *squashfs_xz_init(struct squashfs_sb_info *msblk, void *buff, | ||
47 | int len) | ||
42 | { | 48 | { |
43 | int block_size = max_t(int, msblk->block_size, SQUASHFS_METADATA_SIZE); | 49 | struct comp_opts *comp_opts = buff; |
50 | struct squashfs_xz *stream; | ||
51 | int dict_size = msblk->block_size; | ||
52 | int err, n; | ||
53 | |||
54 | if (comp_opts) { | ||
55 | /* check compressor options are the expected length */ | ||
56 | if (len < sizeof(*comp_opts)) { | ||
57 | err = -EIO; | ||
58 | goto failed; | ||
59 | } | ||
44 | 60 | ||
45 | struct squashfs_xz *stream = kmalloc(sizeof(*stream), GFP_KERNEL); | 61 | dict_size = le32_to_cpu(comp_opts->dictionary_size); |
46 | if (stream == NULL) | 62 | |
63 | /* the dictionary size should be 2^n or 2^n+2^(n+1) */ | ||
64 | n = ffs(dict_size) - 1; | ||
65 | if (dict_size != (1 << n) && dict_size != (1 << n) + | ||
66 | (1 << (n + 1))) { | ||
67 | err = -EIO; | ||
68 | goto failed; | ||
69 | } | ||
70 | } | ||
71 | |||
72 | dict_size = max_t(int, dict_size, SQUASHFS_METADATA_SIZE); | ||
73 | |||
74 | stream = kmalloc(sizeof(*stream), GFP_KERNEL); | ||
75 | if (stream == NULL) { | ||
76 | err = -ENOMEM; | ||
47 | goto failed; | 77 | goto failed; |
78 | } | ||
48 | 79 | ||
49 | stream->state = xz_dec_init(XZ_PREALLOC, block_size); | 80 | stream->state = xz_dec_init(XZ_PREALLOC, dict_size); |
50 | if (stream->state == NULL) | 81 | if (stream->state == NULL) { |
82 | kfree(stream); | ||
83 | err = -ENOMEM; | ||
51 | goto failed; | 84 | goto failed; |
85 | } | ||
52 | 86 | ||
53 | return stream; | 87 | return stream; |
54 | 88 | ||
55 | failed: | 89 | failed: |
56 | ERROR("Failed to allocate xz workspace\n"); | 90 | ERROR("Failed to initialise xz decompressor\n"); |
57 | kfree(stream); | 91 | return ERR_PTR(err); |
58 | return NULL; | ||
59 | } | 92 | } |
60 | 93 | ||
61 | 94 | ||
diff --git a/fs/squashfs/zlib_wrapper.c b/fs/squashfs/zlib_wrapper.c index 4661ae2b1cec..517688b32ffa 100644 --- a/fs/squashfs/zlib_wrapper.c +++ b/fs/squashfs/zlib_wrapper.c | |||
@@ -26,19 +26,19 @@ | |||
26 | #include <linux/buffer_head.h> | 26 | #include <linux/buffer_head.h> |
27 | #include <linux/slab.h> | 27 | #include <linux/slab.h> |
28 | #include <linux/zlib.h> | 28 | #include <linux/zlib.h> |
29 | #include <linux/vmalloc.h> | ||
29 | 30 | ||
30 | #include "squashfs_fs.h" | 31 | #include "squashfs_fs.h" |
31 | #include "squashfs_fs_sb.h" | 32 | #include "squashfs_fs_sb.h" |
32 | #include "squashfs.h" | 33 | #include "squashfs.h" |
33 | #include "decompressor.h" | 34 | #include "decompressor.h" |
34 | 35 | ||
35 | static void *zlib_init(struct squashfs_sb_info *dummy) | 36 | static void *zlib_init(struct squashfs_sb_info *dummy, void *buff, int len) |
36 | { | 37 | { |
37 | z_stream *stream = kmalloc(sizeof(z_stream), GFP_KERNEL); | 38 | z_stream *stream = kmalloc(sizeof(z_stream), GFP_KERNEL); |
38 | if (stream == NULL) | 39 | if (stream == NULL) |
39 | goto failed; | 40 | goto failed; |
40 | stream->workspace = kmalloc(zlib_inflate_workspacesize(), | 41 | stream->workspace = vmalloc(zlib_inflate_workspacesize()); |
41 | GFP_KERNEL); | ||
42 | if (stream->workspace == NULL) | 42 | if (stream->workspace == NULL) |
43 | goto failed; | 43 | goto failed; |
44 | 44 | ||
@@ -47,7 +47,7 @@ static void *zlib_init(struct squashfs_sb_info *dummy) | |||
47 | failed: | 47 | failed: |
48 | ERROR("Failed to allocate zlib workspace\n"); | 48 | ERROR("Failed to allocate zlib workspace\n"); |
49 | kfree(stream); | 49 | kfree(stream); |
50 | return NULL; | 50 | return ERR_PTR(-ENOMEM); |
51 | } | 51 | } |
52 | 52 | ||
53 | 53 | ||
@@ -56,7 +56,7 @@ static void zlib_free(void *strm) | |||
56 | z_stream *stream = strm; | 56 | z_stream *stream = strm; |
57 | 57 | ||
58 | if (stream) | 58 | if (stream) |
59 | kfree(stream->workspace); | 59 | vfree(stream->workspace); |
60 | kfree(stream); | 60 | kfree(stream); |
61 | } | 61 | } |
62 | 62 | ||
diff --git a/fs/super.c b/fs/super.c index e84864908264..8a06881b1920 100644 --- a/fs/super.c +++ b/fs/super.c | |||
@@ -71,6 +71,7 @@ static struct super_block *alloc_super(struct file_system_type *type) | |||
71 | #else | 71 | #else |
72 | INIT_LIST_HEAD(&s->s_files); | 72 | INIT_LIST_HEAD(&s->s_files); |
73 | #endif | 73 | #endif |
74 | s->s_bdi = &default_backing_dev_info; | ||
74 | INIT_LIST_HEAD(&s->s_instances); | 75 | INIT_LIST_HEAD(&s->s_instances); |
75 | INIT_HLIST_BL_HEAD(&s->s_anon); | 76 | INIT_HLIST_BL_HEAD(&s->s_anon); |
76 | INIT_LIST_HEAD(&s->s_inodes); | 77 | INIT_LIST_HEAD(&s->s_inodes); |
@@ -936,6 +937,7 @@ mount_fs(struct file_system_type *type, int flags, const char *name, void *data) | |||
936 | sb = root->d_sb; | 937 | sb = root->d_sb; |
937 | BUG_ON(!sb); | 938 | BUG_ON(!sb); |
938 | WARN_ON(!sb->s_bdi); | 939 | WARN_ON(!sb->s_bdi); |
940 | WARN_ON(sb->s_bdi == &default_backing_dev_info); | ||
939 | sb->s_flags |= MS_BORN; | 941 | sb->s_flags |= MS_BORN; |
940 | 942 | ||
941 | error = security_sb_kern_mount(sb, flags, secdata); | 943 | error = security_sb_kern_mount(sb, flags, secdata); |
@@ -34,7 +34,7 @@ static int __sync_filesystem(struct super_block *sb, int wait) | |||
34 | * This should be safe, as we require bdi backing to actually | 34 | * This should be safe, as we require bdi backing to actually |
35 | * write out data in the first place | 35 | * write out data in the first place |
36 | */ | 36 | */ |
37 | if (!sb->s_bdi || sb->s_bdi == &noop_backing_dev_info) | 37 | if (sb->s_bdi == &noop_backing_dev_info) |
38 | return 0; | 38 | return 0; |
39 | 39 | ||
40 | if (sb->s_qcop && sb->s_qcop->quota_sync) | 40 | if (sb->s_qcop && sb->s_qcop->quota_sync) |
@@ -80,7 +80,7 @@ EXPORT_SYMBOL_GPL(sync_filesystem); | |||
80 | 80 | ||
81 | static void sync_one_sb(struct super_block *sb, void *arg) | 81 | static void sync_one_sb(struct super_block *sb, void *arg) |
82 | { | 82 | { |
83 | if (!(sb->s_flags & MS_RDONLY) && sb->s_bdi) | 83 | if (!(sb->s_flags & MS_RDONLY)) |
84 | __sync_filesystem(sb, *(int *)arg); | 84 | __sync_filesystem(sb, *(int *)arg); |
85 | } | 85 | } |
86 | /* | 86 | /* |
diff --git a/fs/sysv/itree.c b/fs/sysv/itree.c index 9ca66276315e..fa8d43c92bb8 100644 --- a/fs/sysv/itree.c +++ b/fs/sysv/itree.c | |||
@@ -488,7 +488,6 @@ static sector_t sysv_bmap(struct address_space *mapping, sector_t block) | |||
488 | const struct address_space_operations sysv_aops = { | 488 | const struct address_space_operations sysv_aops = { |
489 | .readpage = sysv_readpage, | 489 | .readpage = sysv_readpage, |
490 | .writepage = sysv_writepage, | 490 | .writepage = sysv_writepage, |
491 | .sync_page = block_sync_page, | ||
492 | .write_begin = sysv_write_begin, | 491 | .write_begin = sysv_write_begin, |
493 | .write_end = generic_write_end, | 492 | .write_end = generic_write_end, |
494 | .bmap = sysv_bmap | 493 | .bmap = sysv_bmap |
diff --git a/fs/ubifs/Kconfig b/fs/ubifs/Kconfig index 1d1859dc3de5..d7440904be17 100644 --- a/fs/ubifs/Kconfig +++ b/fs/ubifs/Kconfig | |||
@@ -58,12 +58,3 @@ config UBIFS_FS_DEBUG | |||
58 | down UBIFS. You can then further enable / disable individual debugging | 58 | down UBIFS. You can then further enable / disable individual debugging |
59 | features using UBIFS module parameters and the corresponding sysfs | 59 | features using UBIFS module parameters and the corresponding sysfs |
60 | interfaces. | 60 | interfaces. |
61 | |||
62 | config UBIFS_FS_DEBUG_CHKS | ||
63 | bool "Enable extra checks" | ||
64 | depends on UBIFS_FS_DEBUG | ||
65 | help | ||
66 | If extra checks are enabled UBIFS will check the consistency of its | ||
67 | internal data structures during operation. However, UBIFS performance | ||
68 | is dramatically slower when this option is selected especially if the | ||
69 | file system is large. | ||
diff --git a/fs/ubifs/debug.c b/fs/ubifs/debug.c index 01c2b028e525..f25a7339f800 100644 --- a/fs/ubifs/debug.c +++ b/fs/ubifs/debug.c | |||
@@ -818,7 +818,7 @@ void dbg_dump_leb(const struct ubifs_info *c, int lnum) | |||
818 | printk(KERN_DEBUG "(pid %d) start dumping LEB %d\n", | 818 | printk(KERN_DEBUG "(pid %d) start dumping LEB %d\n", |
819 | current->pid, lnum); | 819 | current->pid, lnum); |
820 | 820 | ||
821 | buf = __vmalloc(c->leb_size, GFP_KERNEL | GFP_NOFS, PAGE_KERNEL); | 821 | buf = __vmalloc(c->leb_size, GFP_NOFS, PAGE_KERNEL); |
822 | if (!buf) { | 822 | if (!buf) { |
823 | ubifs_err("cannot allocate memory for dumping LEB %d", lnum); | 823 | ubifs_err("cannot allocate memory for dumping LEB %d", lnum); |
824 | return; | 824 | return; |
diff --git a/fs/ubifs/file.c b/fs/ubifs/file.c index d77db7e36484..28be1e6a65e8 100644 --- a/fs/ubifs/file.c +++ b/fs/ubifs/file.c | |||
@@ -448,10 +448,12 @@ static int ubifs_write_begin(struct file *file, struct address_space *mapping, | |||
448 | if (!(pos & ~PAGE_CACHE_MASK) && len == PAGE_CACHE_SIZE) { | 448 | if (!(pos & ~PAGE_CACHE_MASK) && len == PAGE_CACHE_SIZE) { |
449 | /* | 449 | /* |
450 | * We change whole page so no need to load it. But we | 450 | * We change whole page so no need to load it. But we |
451 | * have to set the @PG_checked flag to make the further | 451 | * do not know whether this page exists on the media or |
452 | * code know that the page is new. This might be not | 452 | * not, so we assume the latter because it requires |
453 | * true, but it is better to budget more than to read | 453 | * larger budget. The assumption is that it is better |
454 | * the page from the media. | 454 | * to budget a bit more than to read the page from the |
455 | * media. Thus, we are setting the @PG_checked flag | ||
456 | * here. | ||
455 | */ | 457 | */ |
456 | SetPageChecked(page); | 458 | SetPageChecked(page); |
457 | skipped_read = 1; | 459 | skipped_read = 1; |
@@ -559,6 +561,7 @@ static int ubifs_write_end(struct file *file, struct address_space *mapping, | |||
559 | dbg_gen("copied %d instead of %d, read page and repeat", | 561 | dbg_gen("copied %d instead of %d, read page and repeat", |
560 | copied, len); | 562 | copied, len); |
561 | cancel_budget(c, page, ui, appending); | 563 | cancel_budget(c, page, ui, appending); |
564 | ClearPageChecked(page); | ||
562 | 565 | ||
563 | /* | 566 | /* |
564 | * Return 0 to force VFS to repeat the whole operation, or the | 567 | * Return 0 to force VFS to repeat the whole operation, or the |
diff --git a/fs/ubifs/ioctl.c b/fs/ubifs/ioctl.c index 8aacd64957a2..548acf494afd 100644 --- a/fs/ubifs/ioctl.c +++ b/fs/ubifs/ioctl.c | |||
@@ -160,7 +160,7 @@ long ubifs_ioctl(struct file *file, unsigned int cmd, unsigned long arg) | |||
160 | if (IS_RDONLY(inode)) | 160 | if (IS_RDONLY(inode)) |
161 | return -EROFS; | 161 | return -EROFS; |
162 | 162 | ||
163 | if (!is_owner_or_cap(inode)) | 163 | if (!inode_owner_or_capable(inode)) |
164 | return -EACCES; | 164 | return -EACCES; |
165 | 165 | ||
166 | if (get_user(flags, (int __user *) arg)) | 166 | if (get_user(flags, (int __user *) arg)) |
diff --git a/fs/ubifs/lprops.c b/fs/ubifs/lprops.c index c7b25e2f7764..0ee0847f2421 100644 --- a/fs/ubifs/lprops.c +++ b/fs/ubifs/lprops.c | |||
@@ -1094,7 +1094,7 @@ static int scan_check_cb(struct ubifs_info *c, | |||
1094 | } | 1094 | } |
1095 | } | 1095 | } |
1096 | 1096 | ||
1097 | buf = __vmalloc(c->leb_size, GFP_KERNEL | GFP_NOFS, PAGE_KERNEL); | 1097 | buf = __vmalloc(c->leb_size, GFP_NOFS, PAGE_KERNEL); |
1098 | if (!buf) { | 1098 | if (!buf) { |
1099 | ubifs_err("cannot allocate memory to scan LEB %d", lnum); | 1099 | ubifs_err("cannot allocate memory to scan LEB %d", lnum); |
1100 | goto out; | 1100 | goto out; |
diff --git a/fs/ubifs/lpt_commit.c b/fs/ubifs/lpt_commit.c index 0a3c2c3f5c4a..0c9c69bd983a 100644 --- a/fs/ubifs/lpt_commit.c +++ b/fs/ubifs/lpt_commit.c | |||
@@ -1633,7 +1633,7 @@ static int dbg_check_ltab_lnum(struct ubifs_info *c, int lnum) | |||
1633 | if (!(ubifs_chk_flags & UBIFS_CHK_LPROPS)) | 1633 | if (!(ubifs_chk_flags & UBIFS_CHK_LPROPS)) |
1634 | return 0; | 1634 | return 0; |
1635 | 1635 | ||
1636 | buf = p = __vmalloc(c->leb_size, GFP_KERNEL | GFP_NOFS, PAGE_KERNEL); | 1636 | buf = p = __vmalloc(c->leb_size, GFP_NOFS, PAGE_KERNEL); |
1637 | if (!buf) { | 1637 | if (!buf) { |
1638 | ubifs_err("cannot allocate memory for ltab checking"); | 1638 | ubifs_err("cannot allocate memory for ltab checking"); |
1639 | return 0; | 1639 | return 0; |
@@ -1885,7 +1885,7 @@ static void dump_lpt_leb(const struct ubifs_info *c, int lnum) | |||
1885 | 1885 | ||
1886 | printk(KERN_DEBUG "(pid %d) start dumping LEB %d\n", | 1886 | printk(KERN_DEBUG "(pid %d) start dumping LEB %d\n", |
1887 | current->pid, lnum); | 1887 | current->pid, lnum); |
1888 | buf = p = __vmalloc(c->leb_size, GFP_KERNEL | GFP_NOFS, PAGE_KERNEL); | 1888 | buf = p = __vmalloc(c->leb_size, GFP_NOFS, PAGE_KERNEL); |
1889 | if (!buf) { | 1889 | if (!buf) { |
1890 | ubifs_err("cannot allocate memory to dump LPT"); | 1890 | ubifs_err("cannot allocate memory to dump LPT"); |
1891 | return; | 1891 | return; |
diff --git a/fs/ubifs/orphan.c b/fs/ubifs/orphan.c index 2cdbd31641d7..09df318e368f 100644 --- a/fs/ubifs/orphan.c +++ b/fs/ubifs/orphan.c | |||
@@ -898,7 +898,7 @@ static int dbg_scan_orphans(struct ubifs_info *c, struct check_info *ci) | |||
898 | if (c->no_orphs) | 898 | if (c->no_orphs) |
899 | return 0; | 899 | return 0; |
900 | 900 | ||
901 | buf = __vmalloc(c->leb_size, GFP_KERNEL | GFP_NOFS, PAGE_KERNEL); | 901 | buf = __vmalloc(c->leb_size, GFP_NOFS, PAGE_KERNEL); |
902 | if (!buf) { | 902 | if (!buf) { |
903 | ubifs_err("cannot allocate memory to check orphans"); | 903 | ubifs_err("cannot allocate memory to check orphans"); |
904 | return 0; | 904 | return 0; |
diff --git a/fs/ubifs/super.c b/fs/ubifs/super.c index e5dc1e120e8d..6ddd9973e681 100644 --- a/fs/ubifs/super.c +++ b/fs/ubifs/super.c | |||
@@ -2011,7 +2011,6 @@ static int ubifs_fill_super(struct super_block *sb, void *data, int silent) | |||
2011 | */ | 2011 | */ |
2012 | c->bdi.name = "ubifs", | 2012 | c->bdi.name = "ubifs", |
2013 | c->bdi.capabilities = BDI_CAP_MAP_COPY; | 2013 | c->bdi.capabilities = BDI_CAP_MAP_COPY; |
2014 | c->bdi.unplug_io_fn = default_unplug_io_fn; | ||
2015 | err = bdi_init(&c->bdi); | 2014 | err = bdi_init(&c->bdi); |
2016 | if (err) | 2015 | if (err) |
2017 | goto out_close; | 2016 | goto out_close; |
diff --git a/fs/udf/balloc.c b/fs/udf/balloc.c index 8994dd041660..95518a9f589e 100644 --- a/fs/udf/balloc.c +++ b/fs/udf/balloc.c | |||
@@ -27,11 +27,10 @@ | |||
27 | #include "udf_i.h" | 27 | #include "udf_i.h" |
28 | #include "udf_sb.h" | 28 | #include "udf_sb.h" |
29 | 29 | ||
30 | #define udf_clear_bit(nr, addr) ext2_clear_bit(nr, addr) | 30 | #define udf_clear_bit __test_and_clear_bit_le |
31 | #define udf_set_bit(nr, addr) ext2_set_bit(nr, addr) | 31 | #define udf_set_bit __test_and_set_bit_le |
32 | #define udf_test_bit(nr, addr) ext2_test_bit(nr, addr) | 32 | #define udf_test_bit test_bit_le |
33 | #define udf_find_next_one_bit(addr, size, offset) \ | 33 | #define udf_find_next_one_bit find_next_bit_le |
34 | ext2_find_next_bit((unsigned long *)(addr), size, offset) | ||
35 | 34 | ||
36 | static int read_block_bitmap(struct super_block *sb, | 35 | static int read_block_bitmap(struct super_block *sb, |
37 | struct udf_bitmap *bitmap, unsigned int block, | 36 | struct udf_bitmap *bitmap, unsigned int block, |
diff --git a/fs/udf/file.c b/fs/udf/file.c index f391a2adc699..2a346bb1d9f5 100644 --- a/fs/udf/file.c +++ b/fs/udf/file.c | |||
@@ -98,7 +98,6 @@ static int udf_adinicb_write_end(struct file *file, | |||
98 | const struct address_space_operations udf_adinicb_aops = { | 98 | const struct address_space_operations udf_adinicb_aops = { |
99 | .readpage = udf_adinicb_readpage, | 99 | .readpage = udf_adinicb_readpage, |
100 | .writepage = udf_adinicb_writepage, | 100 | .writepage = udf_adinicb_writepage, |
101 | .sync_page = block_sync_page, | ||
102 | .write_begin = simple_write_begin, | 101 | .write_begin = simple_write_begin, |
103 | .write_end = udf_adinicb_write_end, | 102 | .write_end = udf_adinicb_write_end, |
104 | }; | 103 | }; |
diff --git a/fs/udf/inode.c b/fs/udf/inode.c index ccc814321414..1d1358ed80c1 100644 --- a/fs/udf/inode.c +++ b/fs/udf/inode.c | |||
@@ -140,7 +140,6 @@ static sector_t udf_bmap(struct address_space *mapping, sector_t block) | |||
140 | const struct address_space_operations udf_aops = { | 140 | const struct address_space_operations udf_aops = { |
141 | .readpage = udf_readpage, | 141 | .readpage = udf_readpage, |
142 | .writepage = udf_writepage, | 142 | .writepage = udf_writepage, |
143 | .sync_page = block_sync_page, | ||
144 | .write_begin = udf_write_begin, | 143 | .write_begin = udf_write_begin, |
145 | .write_end = generic_write_end, | 144 | .write_end = generic_write_end, |
146 | .bmap = udf_bmap, | 145 | .bmap = udf_bmap, |
diff --git a/fs/ufs/inode.c b/fs/ufs/inode.c index 03c255f12df5..27a4babe7df0 100644 --- a/fs/ufs/inode.c +++ b/fs/ufs/inode.c | |||
@@ -552,7 +552,6 @@ static sector_t ufs_bmap(struct address_space *mapping, sector_t block) | |||
552 | const struct address_space_operations ufs_aops = { | 552 | const struct address_space_operations ufs_aops = { |
553 | .readpage = ufs_readpage, | 553 | .readpage = ufs_readpage, |
554 | .writepage = ufs_writepage, | 554 | .writepage = ufs_writepage, |
555 | .sync_page = block_sync_page, | ||
556 | .write_begin = ufs_write_begin, | 555 | .write_begin = ufs_write_begin, |
557 | .write_end = generic_write_end, | 556 | .write_end = generic_write_end, |
558 | .bmap = ufs_bmap | 557 | .bmap = ufs_bmap |
diff --git a/fs/ufs/truncate.c b/fs/ufs/truncate.c index e56a4f567212..11014302c9ca 100644 --- a/fs/ufs/truncate.c +++ b/fs/ufs/truncate.c | |||
@@ -479,7 +479,7 @@ int ufs_truncate(struct inode *inode, loff_t old_i_size) | |||
479 | break; | 479 | break; |
480 | if (IS_SYNC(inode) && (inode->i_state & I_DIRTY)) | 480 | if (IS_SYNC(inode) && (inode->i_state & I_DIRTY)) |
481 | ufs_sync_inode (inode); | 481 | ufs_sync_inode (inode); |
482 | blk_run_address_space(inode->i_mapping); | 482 | blk_flush_plug(current); |
483 | yield(); | 483 | yield(); |
484 | } | 484 | } |
485 | 485 | ||
diff --git a/fs/ufs/util.h b/fs/ufs/util.h index 9f8775ce381c..954175928240 100644 --- a/fs/ufs/util.h +++ b/fs/ufs/util.h | |||
@@ -408,7 +408,7 @@ static inline unsigned _ubh_find_next_zero_bit_( | |||
408 | for (;;) { | 408 | for (;;) { |
409 | count = min_t(unsigned int, size + offset, uspi->s_bpf); | 409 | count = min_t(unsigned int, size + offset, uspi->s_bpf); |
410 | size -= count - offset; | 410 | size -= count - offset; |
411 | pos = ext2_find_next_zero_bit (ubh->bh[base]->b_data, count, offset); | 411 | pos = find_next_zero_bit_le(ubh->bh[base]->b_data, count, offset); |
412 | if (pos < count || !size) | 412 | if (pos < count || !size) |
413 | break; | 413 | break; |
414 | base++; | 414 | base++; |
diff --git a/fs/utimes.c b/fs/utimes.c index 179b58690657..ba653f3dc1bc 100644 --- a/fs/utimes.c +++ b/fs/utimes.c | |||
@@ -95,7 +95,7 @@ static int utimes_common(struct path *path, struct timespec *times) | |||
95 | if (IS_IMMUTABLE(inode)) | 95 | if (IS_IMMUTABLE(inode)) |
96 | goto mnt_drop_write_and_out; | 96 | goto mnt_drop_write_and_out; |
97 | 97 | ||
98 | if (!is_owner_or_cap(inode)) { | 98 | if (!inode_owner_or_capable(inode)) { |
99 | error = inode_permission(inode, MAY_WRITE); | 99 | error = inode_permission(inode, MAY_WRITE); |
100 | if (error) | 100 | if (error) |
101 | goto mnt_drop_write_and_out; | 101 | goto mnt_drop_write_and_out; |
diff --git a/fs/xattr.c b/fs/xattr.c index 01bb8135e14a..a19acdb81cd1 100644 --- a/fs/xattr.c +++ b/fs/xattr.c | |||
@@ -59,7 +59,7 @@ xattr_permission(struct inode *inode, const char *name, int mask) | |||
59 | if (!S_ISREG(inode->i_mode) && !S_ISDIR(inode->i_mode)) | 59 | if (!S_ISREG(inode->i_mode) && !S_ISDIR(inode->i_mode)) |
60 | return -EPERM; | 60 | return -EPERM; |
61 | if (S_ISDIR(inode->i_mode) && (inode->i_mode & S_ISVTX) && | 61 | if (S_ISDIR(inode->i_mode) && (inode->i_mode & S_ISVTX) && |
62 | (mask & MAY_WRITE) && !is_owner_or_cap(inode)) | 62 | (mask & MAY_WRITE) && !inode_owner_or_capable(inode)) |
63 | return -EPERM; | 63 | return -EPERM; |
64 | } | 64 | } |
65 | 65 | ||
diff --git a/fs/xfs/linux-2.6/xfs_aops.c b/fs/xfs/linux-2.6/xfs_aops.c index 8c5c87277456..52dbd14260ba 100644 --- a/fs/xfs/linux-2.6/xfs_aops.c +++ b/fs/xfs/linux-2.6/xfs_aops.c | |||
@@ -413,8 +413,7 @@ xfs_submit_ioend_bio( | |||
413 | if (xfs_ioend_new_eof(ioend)) | 413 | if (xfs_ioend_new_eof(ioend)) |
414 | xfs_mark_inode_dirty(XFS_I(ioend->io_inode)); | 414 | xfs_mark_inode_dirty(XFS_I(ioend->io_inode)); |
415 | 415 | ||
416 | submit_bio(wbc->sync_mode == WB_SYNC_ALL ? | 416 | submit_bio(wbc->sync_mode == WB_SYNC_ALL ? WRITE_SYNC : WRITE, bio); |
417 | WRITE_SYNC_PLUG : WRITE, bio); | ||
418 | } | 417 | } |
419 | 418 | ||
420 | STATIC struct bio * | 419 | STATIC struct bio * |
@@ -1495,7 +1494,6 @@ const struct address_space_operations xfs_address_space_operations = { | |||
1495 | .readpages = xfs_vm_readpages, | 1494 | .readpages = xfs_vm_readpages, |
1496 | .writepage = xfs_vm_writepage, | 1495 | .writepage = xfs_vm_writepage, |
1497 | .writepages = xfs_vm_writepages, | 1496 | .writepages = xfs_vm_writepages, |
1498 | .sync_page = block_sync_page, | ||
1499 | .releasepage = xfs_vm_releasepage, | 1497 | .releasepage = xfs_vm_releasepage, |
1500 | .invalidatepage = xfs_vm_invalidatepage, | 1498 | .invalidatepage = xfs_vm_invalidatepage, |
1501 | .write_begin = xfs_vm_write_begin, | 1499 | .write_begin = xfs_vm_write_begin, |
diff --git a/fs/xfs/linux-2.6/xfs_buf.c b/fs/xfs/linux-2.6/xfs_buf.c index 5cb230f2cb4f..c05324d3282c 100644 --- a/fs/xfs/linux-2.6/xfs_buf.c +++ b/fs/xfs/linux-2.6/xfs_buf.c | |||
@@ -990,7 +990,7 @@ xfs_buf_lock( | |||
990 | if (atomic_read(&bp->b_pin_count) && (bp->b_flags & XBF_STALE)) | 990 | if (atomic_read(&bp->b_pin_count) && (bp->b_flags & XBF_STALE)) |
991 | xfs_log_force(bp->b_target->bt_mount, 0); | 991 | xfs_log_force(bp->b_target->bt_mount, 0); |
992 | if (atomic_read(&bp->b_io_remaining)) | 992 | if (atomic_read(&bp->b_io_remaining)) |
993 | blk_run_address_space(bp->b_target->bt_mapping); | 993 | blk_flush_plug(current); |
994 | down(&bp->b_sema); | 994 | down(&bp->b_sema); |
995 | XB_SET_OWNER(bp); | 995 | XB_SET_OWNER(bp); |
996 | 996 | ||
@@ -1034,9 +1034,7 @@ xfs_buf_wait_unpin( | |||
1034 | set_current_state(TASK_UNINTERRUPTIBLE); | 1034 | set_current_state(TASK_UNINTERRUPTIBLE); |
1035 | if (atomic_read(&bp->b_pin_count) == 0) | 1035 | if (atomic_read(&bp->b_pin_count) == 0) |
1036 | break; | 1036 | break; |
1037 | if (atomic_read(&bp->b_io_remaining)) | 1037 | io_schedule(); |
1038 | blk_run_address_space(bp->b_target->bt_mapping); | ||
1039 | schedule(); | ||
1040 | } | 1038 | } |
1041 | remove_wait_queue(&bp->b_waiters, &wait); | 1039 | remove_wait_queue(&bp->b_waiters, &wait); |
1042 | set_current_state(TASK_RUNNING); | 1040 | set_current_state(TASK_RUNNING); |
@@ -1442,7 +1440,7 @@ xfs_buf_iowait( | |||
1442 | trace_xfs_buf_iowait(bp, _RET_IP_); | 1440 | trace_xfs_buf_iowait(bp, _RET_IP_); |
1443 | 1441 | ||
1444 | if (atomic_read(&bp->b_io_remaining)) | 1442 | if (atomic_read(&bp->b_io_remaining)) |
1445 | blk_run_address_space(bp->b_target->bt_mapping); | 1443 | blk_flush_plug(current); |
1446 | wait_for_completion(&bp->b_iowait); | 1444 | wait_for_completion(&bp->b_iowait); |
1447 | 1445 | ||
1448 | trace_xfs_buf_iowait_done(bp, _RET_IP_); | 1446 | trace_xfs_buf_iowait_done(bp, _RET_IP_); |
@@ -1666,7 +1664,6 @@ xfs_mapping_buftarg( | |||
1666 | struct inode *inode; | 1664 | struct inode *inode; |
1667 | struct address_space *mapping; | 1665 | struct address_space *mapping; |
1668 | static const struct address_space_operations mapping_aops = { | 1666 | static const struct address_space_operations mapping_aops = { |
1669 | .sync_page = block_sync_page, | ||
1670 | .migratepage = fail_migrate_page, | 1667 | .migratepage = fail_migrate_page, |
1671 | }; | 1668 | }; |
1672 | 1669 | ||
@@ -1947,7 +1944,7 @@ xfsbufd( | |||
1947 | count++; | 1944 | count++; |
1948 | } | 1945 | } |
1949 | if (count) | 1946 | if (count) |
1950 | blk_run_address_space(target->bt_mapping); | 1947 | blk_flush_plug(current); |
1951 | 1948 | ||
1952 | } while (!kthread_should_stop()); | 1949 | } while (!kthread_should_stop()); |
1953 | 1950 | ||
@@ -1995,7 +1992,7 @@ xfs_flush_buftarg( | |||
1995 | 1992 | ||
1996 | if (wait) { | 1993 | if (wait) { |
1997 | /* Expedite and wait for IO to complete. */ | 1994 | /* Expedite and wait for IO to complete. */ |
1998 | blk_run_address_space(target->bt_mapping); | 1995 | blk_flush_plug(current); |
1999 | while (!list_empty(&wait_list)) { | 1996 | while (!list_empty(&wait_list)) { |
2000 | bp = list_first_entry(&wait_list, struct xfs_buf, b_list); | 1997 | bp = list_first_entry(&wait_list, struct xfs_buf, b_list); |
2001 | 1998 | ||