diff options
Diffstat (limited to 'fs')
172 files changed, 2550 insertions, 2321 deletions
diff --git a/fs/9p/vfs_inode.c b/fs/9p/vfs_inode.c index 0ba2db44e0b8..4331b3b5ee1c 100644 --- a/fs/9p/vfs_inode.c +++ b/fs/9p/vfs_inode.c | |||
@@ -256,9 +256,7 @@ struct inode *v9fs_get_inode(struct super_block *sb, int mode) | |||
256 | return ERR_PTR(-ENOMEM); | 256 | return ERR_PTR(-ENOMEM); |
257 | } | 257 | } |
258 | 258 | ||
259 | inode->i_mode = mode; | 259 | inode_init_owner(inode, NULL, mode); |
260 | inode->i_uid = current_fsuid(); | ||
261 | inode->i_gid = current_fsgid(); | ||
262 | inode->i_blocks = 0; | 260 | inode->i_blocks = 0; |
263 | inode->i_rdev = 0; | 261 | inode->i_rdev = 0; |
264 | inode->i_atime = inode->i_mtime = inode->i_ctime = CURRENT_TIME; | 262 | inode->i_atime = inode->i_mtime = inode->i_ctime = CURRENT_TIME; |
diff --git a/fs/Makefile b/fs/Makefile index 97f340f14ba2..e6ec1d309b1d 100644 --- a/fs/Makefile +++ b/fs/Makefile | |||
@@ -11,7 +11,7 @@ obj-y := open.o read_write.o file_table.o super.o \ | |||
11 | attr.o bad_inode.o file.o filesystems.o namespace.o \ | 11 | attr.o bad_inode.o file.o filesystems.o namespace.o \ |
12 | seq_file.o xattr.o libfs.o fs-writeback.o \ | 12 | seq_file.o xattr.o libfs.o fs-writeback.o \ |
13 | pnode.o drop_caches.o splice.o sync.o utimes.o \ | 13 | pnode.o drop_caches.o splice.o sync.o utimes.o \ |
14 | stack.o fs_struct.o | 14 | stack.o fs_struct.o statfs.o |
15 | 15 | ||
16 | ifeq ($(CONFIG_BLOCK),y) | 16 | ifeq ($(CONFIG_BLOCK),y) |
17 | obj-y += buffer.o bio.o block_dev.o direct-io.o mpage.o ioprio.o | 17 | obj-y += buffer.o bio.o block_dev.o direct-io.o mpage.o ioprio.o |
diff --git a/fs/afs/dir.c b/fs/afs/dir.c index adc1cb771b57..b42d5cc1d6d2 100644 --- a/fs/afs/dir.c +++ b/fs/afs/dir.c | |||
@@ -189,13 +189,9 @@ static struct page *afs_dir_get_page(struct inode *dir, unsigned long index, | |||
189 | struct key *key) | 189 | struct key *key) |
190 | { | 190 | { |
191 | struct page *page; | 191 | struct page *page; |
192 | struct file file = { | ||
193 | .private_data = key, | ||
194 | }; | ||
195 | |||
196 | _enter("{%lu},%lu", dir->i_ino, index); | 192 | _enter("{%lu},%lu", dir->i_ino, index); |
197 | 193 | ||
198 | page = read_mapping_page(dir->i_mapping, index, &file); | 194 | page = read_cache_page(dir->i_mapping, index, afs_page_filler, key); |
199 | if (!IS_ERR(page)) { | 195 | if (!IS_ERR(page)) { |
200 | kmap(page); | 196 | kmap(page); |
201 | if (!PageChecked(page)) | 197 | if (!PageChecked(page)) |
diff --git a/fs/afs/file.c b/fs/afs/file.c index 0df9bc2b724d..14d89fa58fee 100644 --- a/fs/afs/file.c +++ b/fs/afs/file.c | |||
@@ -121,34 +121,19 @@ static void afs_file_readpage_read_complete(struct page *page, | |||
121 | #endif | 121 | #endif |
122 | 122 | ||
123 | /* | 123 | /* |
124 | * AFS read page from file, directory or symlink | 124 | * read page from file, directory or symlink, given a key to use |
125 | */ | 125 | */ |
126 | static int afs_readpage(struct file *file, struct page *page) | 126 | int afs_page_filler(void *data, struct page *page) |
127 | { | 127 | { |
128 | struct afs_vnode *vnode; | 128 | struct inode *inode = page->mapping->host; |
129 | struct inode *inode; | 129 | struct afs_vnode *vnode = AFS_FS_I(inode); |
130 | struct key *key; | 130 | struct key *key = data; |
131 | size_t len; | 131 | size_t len; |
132 | off_t offset; | 132 | off_t offset; |
133 | int ret; | 133 | int ret; |
134 | 134 | ||
135 | inode = page->mapping->host; | ||
136 | |||
137 | if (file) { | ||
138 | key = file->private_data; | ||
139 | ASSERT(key != NULL); | ||
140 | } else { | ||
141 | key = afs_request_key(AFS_FS_S(inode->i_sb)->volume->cell); | ||
142 | if (IS_ERR(key)) { | ||
143 | ret = PTR_ERR(key); | ||
144 | goto error_nokey; | ||
145 | } | ||
146 | } | ||
147 | |||
148 | _enter("{%x},{%lu},{%lu}", key_serial(key), inode->i_ino, page->index); | 135 | _enter("{%x},{%lu},{%lu}", key_serial(key), inode->i_ino, page->index); |
149 | 136 | ||
150 | vnode = AFS_FS_I(inode); | ||
151 | |||
152 | BUG_ON(!PageLocked(page)); | 137 | BUG_ON(!PageLocked(page)); |
153 | 138 | ||
154 | ret = -ESTALE; | 139 | ret = -ESTALE; |
@@ -214,31 +199,56 @@ static int afs_readpage(struct file *file, struct page *page) | |||
214 | unlock_page(page); | 199 | unlock_page(page); |
215 | } | 200 | } |
216 | 201 | ||
217 | if (!file) | ||
218 | key_put(key); | ||
219 | _leave(" = 0"); | 202 | _leave(" = 0"); |
220 | return 0; | 203 | return 0; |
221 | 204 | ||
222 | error: | 205 | error: |
223 | SetPageError(page); | 206 | SetPageError(page); |
224 | unlock_page(page); | 207 | unlock_page(page); |
225 | if (!file) | ||
226 | key_put(key); | ||
227 | error_nokey: | ||
228 | _leave(" = %d", ret); | 208 | _leave(" = %d", ret); |
229 | return ret; | 209 | return ret; |
230 | } | 210 | } |
231 | 211 | ||
232 | /* | 212 | /* |
213 | * read page from file, directory or symlink, given a file to nominate the key | ||
214 | * to be used | ||
215 | */ | ||
216 | static int afs_readpage(struct file *file, struct page *page) | ||
217 | { | ||
218 | struct key *key; | ||
219 | int ret; | ||
220 | |||
221 | if (file) { | ||
222 | key = file->private_data; | ||
223 | ASSERT(key != NULL); | ||
224 | ret = afs_page_filler(key, page); | ||
225 | } else { | ||
226 | struct inode *inode = page->mapping->host; | ||
227 | key = afs_request_key(AFS_FS_S(inode->i_sb)->volume->cell); | ||
228 | if (IS_ERR(key)) { | ||
229 | ret = PTR_ERR(key); | ||
230 | } else { | ||
231 | ret = afs_page_filler(key, page); | ||
232 | key_put(key); | ||
233 | } | ||
234 | } | ||
235 | return ret; | ||
236 | } | ||
237 | |||
238 | /* | ||
233 | * read a set of pages | 239 | * read a set of pages |
234 | */ | 240 | */ |
235 | static int afs_readpages(struct file *file, struct address_space *mapping, | 241 | static int afs_readpages(struct file *file, struct address_space *mapping, |
236 | struct list_head *pages, unsigned nr_pages) | 242 | struct list_head *pages, unsigned nr_pages) |
237 | { | 243 | { |
244 | struct key *key = file->private_data; | ||
238 | struct afs_vnode *vnode; | 245 | struct afs_vnode *vnode; |
239 | int ret = 0; | 246 | int ret = 0; |
240 | 247 | ||
241 | _enter(",{%lu},,%d", mapping->host->i_ino, nr_pages); | 248 | _enter("{%d},{%lu},,%d", |
249 | key_serial(key), mapping->host->i_ino, nr_pages); | ||
250 | |||
251 | ASSERT(key != NULL); | ||
242 | 252 | ||
243 | vnode = AFS_FS_I(mapping->host); | 253 | vnode = AFS_FS_I(mapping->host); |
244 | if (vnode->flags & AFS_VNODE_DELETED) { | 254 | if (vnode->flags & AFS_VNODE_DELETED) { |
@@ -279,7 +289,7 @@ static int afs_readpages(struct file *file, struct address_space *mapping, | |||
279 | } | 289 | } |
280 | 290 | ||
281 | /* load the missing pages from the network */ | 291 | /* load the missing pages from the network */ |
282 | ret = read_cache_pages(mapping, pages, (void *) afs_readpage, file); | 292 | ret = read_cache_pages(mapping, pages, afs_page_filler, key); |
283 | 293 | ||
284 | _leave(" = %d [netting]", ret); | 294 | _leave(" = %d [netting]", ret); |
285 | return ret; | 295 | return ret; |
diff --git a/fs/afs/internal.h b/fs/afs/internal.h index a10f2582844f..807f284cc75e 100644 --- a/fs/afs/internal.h +++ b/fs/afs/internal.h | |||
@@ -494,6 +494,7 @@ extern const struct file_operations afs_file_operations; | |||
494 | 494 | ||
495 | extern int afs_open(struct inode *, struct file *); | 495 | extern int afs_open(struct inode *, struct file *); |
496 | extern int afs_release(struct inode *, struct file *); | 496 | extern int afs_release(struct inode *, struct file *); |
497 | extern int afs_page_filler(void *, struct page *); | ||
497 | 498 | ||
498 | /* | 499 | /* |
499 | * flock.c | 500 | * flock.c |
diff --git a/fs/afs/mntpt.c b/fs/afs/mntpt.c index b3feddc4f7d6..a9e23039ea34 100644 --- a/fs/afs/mntpt.c +++ b/fs/afs/mntpt.c | |||
@@ -49,9 +49,6 @@ static unsigned long afs_mntpt_expiry_timeout = 10 * 60; | |||
49 | */ | 49 | */ |
50 | int afs_mntpt_check_symlink(struct afs_vnode *vnode, struct key *key) | 50 | int afs_mntpt_check_symlink(struct afs_vnode *vnode, struct key *key) |
51 | { | 51 | { |
52 | struct file file = { | ||
53 | .private_data = key, | ||
54 | }; | ||
55 | struct page *page; | 52 | struct page *page; |
56 | size_t size; | 53 | size_t size; |
57 | char *buf; | 54 | char *buf; |
@@ -61,7 +58,8 @@ int afs_mntpt_check_symlink(struct afs_vnode *vnode, struct key *key) | |||
61 | vnode->fid.vid, vnode->fid.vnode, vnode->fid.unique); | 58 | vnode->fid.vid, vnode->fid.vnode, vnode->fid.unique); |
62 | 59 | ||
63 | /* read the contents of the symlink into the pagecache */ | 60 | /* read the contents of the symlink into the pagecache */ |
64 | page = read_mapping_page(AFS_VNODE_TO_I(vnode)->i_mapping, 0, &file); | 61 | page = read_cache_page(AFS_VNODE_TO_I(vnode)->i_mapping, 0, |
62 | afs_page_filler, key); | ||
65 | if (IS_ERR(page)) { | 63 | if (IS_ERR(page)) { |
66 | ret = PTR_ERR(page); | 64 | ret = PTR_ERR(page); |
67 | goto out; | 65 | goto out; |
diff --git a/fs/anon_inodes.c b/fs/anon_inodes.c index e4b75d6eda83..9bd4b3876c99 100644 --- a/fs/anon_inodes.c +++ b/fs/anon_inodes.c | |||
@@ -205,7 +205,7 @@ static struct inode *anon_inode_mkinode(void) | |||
205 | * that it already _is_ on the dirty list. | 205 | * that it already _is_ on the dirty list. |
206 | */ | 206 | */ |
207 | inode->i_state = I_DIRTY; | 207 | inode->i_state = I_DIRTY; |
208 | inode->i_mode = S_IRUSR | S_IWUSR; | 208 | inode->i_mode = S_IFREG | S_IRUSR | S_IWUSR; |
209 | inode->i_uid = current_fsuid(); | 209 | inode->i_uid = current_fsuid(); |
210 | inode->i_gid = current_fsgid(); | 210 | inode->i_gid = current_fsgid(); |
211 | inode->i_flags |= S_PRIVATE; | 211 | inode->i_flags |= S_PRIVATE; |
diff --git a/fs/bfs/dir.c b/fs/bfs/dir.c index 1e41aadb1068..8f73841fc974 100644 --- a/fs/bfs/dir.c +++ b/fs/bfs/dir.c | |||
@@ -105,14 +105,12 @@ static int bfs_create(struct inode *dir, struct dentry *dentry, int mode, | |||
105 | } | 105 | } |
106 | set_bit(ino, info->si_imap); | 106 | set_bit(ino, info->si_imap); |
107 | info->si_freei--; | 107 | info->si_freei--; |
108 | inode->i_uid = current_fsuid(); | 108 | inode_init_owner(inode, dir, mode); |
109 | inode->i_gid = (dir->i_mode & S_ISGID) ? dir->i_gid : current_fsgid(); | ||
110 | inode->i_mtime = inode->i_atime = inode->i_ctime = CURRENT_TIME_SEC; | 109 | inode->i_mtime = inode->i_atime = inode->i_ctime = CURRENT_TIME_SEC; |
111 | inode->i_blocks = 0; | 110 | inode->i_blocks = 0; |
112 | inode->i_op = &bfs_file_inops; | 111 | inode->i_op = &bfs_file_inops; |
113 | inode->i_fop = &bfs_file_operations; | 112 | inode->i_fop = &bfs_file_operations; |
114 | inode->i_mapping->a_ops = &bfs_aops; | 113 | inode->i_mapping->a_ops = &bfs_aops; |
115 | inode->i_mode = mode; | ||
116 | inode->i_ino = ino; | 114 | inode->i_ino = ino; |
117 | BFS_I(inode)->i_dsk_ino = ino; | 115 | BFS_I(inode)->i_dsk_ino = ino; |
118 | BFS_I(inode)->i_sblock = 0; | 116 | BFS_I(inode)->i_sblock = 0; |
diff --git a/fs/block_dev.c b/fs/block_dev.c index 6dcee88c2e5d..26e5f5026620 100644 --- a/fs/block_dev.c +++ b/fs/block_dev.c | |||
@@ -245,37 +245,14 @@ struct super_block *freeze_bdev(struct block_device *bdev) | |||
245 | sb = get_active_super(bdev); | 245 | sb = get_active_super(bdev); |
246 | if (!sb) | 246 | if (!sb) |
247 | goto out; | 247 | goto out; |
248 | if (sb->s_flags & MS_RDONLY) { | 248 | error = freeze_super(sb); |
249 | sb->s_frozen = SB_FREEZE_TRANS; | 249 | if (error) { |
250 | up_write(&sb->s_umount); | 250 | deactivate_super(sb); |
251 | bdev->bd_fsfreeze_count--; | ||
251 | mutex_unlock(&bdev->bd_fsfreeze_mutex); | 252 | mutex_unlock(&bdev->bd_fsfreeze_mutex); |
252 | return sb; | 253 | return ERR_PTR(error); |
253 | } | ||
254 | |||
255 | sb->s_frozen = SB_FREEZE_WRITE; | ||
256 | smp_wmb(); | ||
257 | |||
258 | sync_filesystem(sb); | ||
259 | |||
260 | sb->s_frozen = SB_FREEZE_TRANS; | ||
261 | smp_wmb(); | ||
262 | |||
263 | sync_blockdev(sb->s_bdev); | ||
264 | |||
265 | if (sb->s_op->freeze_fs) { | ||
266 | error = sb->s_op->freeze_fs(sb); | ||
267 | if (error) { | ||
268 | printk(KERN_ERR | ||
269 | "VFS:Filesystem freeze failed\n"); | ||
270 | sb->s_frozen = SB_UNFROZEN; | ||
271 | deactivate_locked_super(sb); | ||
272 | bdev->bd_fsfreeze_count--; | ||
273 | mutex_unlock(&bdev->bd_fsfreeze_mutex); | ||
274 | return ERR_PTR(error); | ||
275 | } | ||
276 | } | 254 | } |
277 | up_write(&sb->s_umount); | 255 | deactivate_super(sb); |
278 | |||
279 | out: | 256 | out: |
280 | sync_blockdev(bdev); | 257 | sync_blockdev(bdev); |
281 | mutex_unlock(&bdev->bd_fsfreeze_mutex); | 258 | mutex_unlock(&bdev->bd_fsfreeze_mutex); |
@@ -296,40 +273,22 @@ int thaw_bdev(struct block_device *bdev, struct super_block *sb) | |||
296 | 273 | ||
297 | mutex_lock(&bdev->bd_fsfreeze_mutex); | 274 | mutex_lock(&bdev->bd_fsfreeze_mutex); |
298 | if (!bdev->bd_fsfreeze_count) | 275 | if (!bdev->bd_fsfreeze_count) |
299 | goto out_unlock; | 276 | goto out; |
300 | 277 | ||
301 | error = 0; | 278 | error = 0; |
302 | if (--bdev->bd_fsfreeze_count > 0) | 279 | if (--bdev->bd_fsfreeze_count > 0) |
303 | goto out_unlock; | 280 | goto out; |
304 | 281 | ||
305 | if (!sb) | 282 | if (!sb) |
306 | goto out_unlock; | 283 | goto out; |
307 | |||
308 | BUG_ON(sb->s_bdev != bdev); | ||
309 | down_write(&sb->s_umount); | ||
310 | if (sb->s_flags & MS_RDONLY) | ||
311 | goto out_unfrozen; | ||
312 | |||
313 | if (sb->s_op->unfreeze_fs) { | ||
314 | error = sb->s_op->unfreeze_fs(sb); | ||
315 | if (error) { | ||
316 | printk(KERN_ERR | ||
317 | "VFS:Filesystem thaw failed\n"); | ||
318 | sb->s_frozen = SB_FREEZE_TRANS; | ||
319 | bdev->bd_fsfreeze_count++; | ||
320 | mutex_unlock(&bdev->bd_fsfreeze_mutex); | ||
321 | return error; | ||
322 | } | ||
323 | } | ||
324 | |||
325 | out_unfrozen: | ||
326 | sb->s_frozen = SB_UNFROZEN; | ||
327 | smp_wmb(); | ||
328 | wake_up(&sb->s_wait_unfrozen); | ||
329 | 284 | ||
330 | if (sb) | 285 | error = thaw_super(sb); |
331 | deactivate_locked_super(sb); | 286 | if (error) { |
332 | out_unlock: | 287 | bdev->bd_fsfreeze_count++; |
288 | mutex_unlock(&bdev->bd_fsfreeze_mutex); | ||
289 | return error; | ||
290 | } | ||
291 | out: | ||
333 | mutex_unlock(&bdev->bd_fsfreeze_mutex); | 292 | mutex_unlock(&bdev->bd_fsfreeze_mutex); |
334 | return 0; | 293 | return 0; |
335 | } | 294 | } |
@@ -417,7 +376,7 @@ int blkdev_fsync(struct file *filp, struct dentry *dentry, int datasync) | |||
417 | */ | 376 | */ |
418 | mutex_unlock(&bd_inode->i_mutex); | 377 | mutex_unlock(&bd_inode->i_mutex); |
419 | 378 | ||
420 | error = blkdev_issue_flush(bdev, NULL); | 379 | error = blkdev_issue_flush(bdev, GFP_KERNEL, NULL, BLKDEV_IFL_WAIT); |
421 | if (error == -EOPNOTSUPP) | 380 | if (error == -EOPNOTSUPP) |
422 | error = 0; | 381 | error = 0; |
423 | 382 | ||
@@ -668,41 +627,209 @@ void bd_forget(struct inode *inode) | |||
668 | iput(bdev->bd_inode); | 627 | iput(bdev->bd_inode); |
669 | } | 628 | } |
670 | 629 | ||
671 | int bd_claim(struct block_device *bdev, void *holder) | 630 | /** |
631 | * bd_may_claim - test whether a block device can be claimed | ||
632 | * @bdev: block device of interest | ||
633 | * @whole: whole block device containing @bdev, may equal @bdev | ||
634 | * @holder: holder trying to claim @bdev | ||
635 | * | ||
636 | * Test whther @bdev can be claimed by @holder. | ||
637 | * | ||
638 | * CONTEXT: | ||
639 | * spin_lock(&bdev_lock). | ||
640 | * | ||
641 | * RETURNS: | ||
642 | * %true if @bdev can be claimed, %false otherwise. | ||
643 | */ | ||
644 | static bool bd_may_claim(struct block_device *bdev, struct block_device *whole, | ||
645 | void *holder) | ||
672 | { | 646 | { |
673 | int res; | ||
674 | spin_lock(&bdev_lock); | ||
675 | |||
676 | /* first decide result */ | ||
677 | if (bdev->bd_holder == holder) | 647 | if (bdev->bd_holder == holder) |
678 | res = 0; /* already a holder */ | 648 | return true; /* already a holder */ |
679 | else if (bdev->bd_holder != NULL) | 649 | else if (bdev->bd_holder != NULL) |
680 | res = -EBUSY; /* held by someone else */ | 650 | return false; /* held by someone else */ |
681 | else if (bdev->bd_contains == bdev) | 651 | else if (bdev->bd_contains == bdev) |
682 | res = 0; /* is a whole device which isn't held */ | 652 | return true; /* is a whole device which isn't held */ |
683 | 653 | ||
684 | else if (bdev->bd_contains->bd_holder == bd_claim) | 654 | else if (whole->bd_holder == bd_claim) |
685 | res = 0; /* is a partition of a device that is being partitioned */ | 655 | return true; /* is a partition of a device that is being partitioned */ |
686 | else if (bdev->bd_contains->bd_holder != NULL) | 656 | else if (whole->bd_holder != NULL) |
687 | res = -EBUSY; /* is a partition of a held device */ | 657 | return false; /* is a partition of a held device */ |
688 | else | 658 | else |
689 | res = 0; /* is a partition of an un-held device */ | 659 | return true; /* is a partition of an un-held device */ |
660 | } | ||
661 | |||
662 | /** | ||
663 | * bd_prepare_to_claim - prepare to claim a block device | ||
664 | * @bdev: block device of interest | ||
665 | * @whole: the whole device containing @bdev, may equal @bdev | ||
666 | * @holder: holder trying to claim @bdev | ||
667 | * | ||
668 | * Prepare to claim @bdev. This function fails if @bdev is already | ||
669 | * claimed by another holder and waits if another claiming is in | ||
670 | * progress. This function doesn't actually claim. On successful | ||
671 | * return, the caller has ownership of bd_claiming and bd_holder[s]. | ||
672 | * | ||
673 | * CONTEXT: | ||
674 | * spin_lock(&bdev_lock). Might release bdev_lock, sleep and regrab | ||
675 | * it multiple times. | ||
676 | * | ||
677 | * RETURNS: | ||
678 | * 0 if @bdev can be claimed, -EBUSY otherwise. | ||
679 | */ | ||
680 | static int bd_prepare_to_claim(struct block_device *bdev, | ||
681 | struct block_device *whole, void *holder) | ||
682 | { | ||
683 | retry: | ||
684 | /* if someone else claimed, fail */ | ||
685 | if (!bd_may_claim(bdev, whole, holder)) | ||
686 | return -EBUSY; | ||
687 | |||
688 | /* if someone else is claiming, wait for it to finish */ | ||
689 | if (whole->bd_claiming && whole->bd_claiming != holder) { | ||
690 | wait_queue_head_t *wq = bit_waitqueue(&whole->bd_claiming, 0); | ||
691 | DEFINE_WAIT(wait); | ||
692 | |||
693 | prepare_to_wait(wq, &wait, TASK_UNINTERRUPTIBLE); | ||
694 | spin_unlock(&bdev_lock); | ||
695 | schedule(); | ||
696 | finish_wait(wq, &wait); | ||
697 | spin_lock(&bdev_lock); | ||
698 | goto retry; | ||
699 | } | ||
700 | |||
701 | /* yay, all mine */ | ||
702 | return 0; | ||
703 | } | ||
690 | 704 | ||
691 | /* now impose change */ | 705 | /** |
692 | if (res==0) { | 706 | * bd_start_claiming - start claiming a block device |
707 | * @bdev: block device of interest | ||
708 | * @holder: holder trying to claim @bdev | ||
709 | * | ||
710 | * @bdev is about to be opened exclusively. Check @bdev can be opened | ||
711 | * exclusively and mark that an exclusive open is in progress. Each | ||
712 | * successful call to this function must be matched with a call to | ||
713 | * either bd_claim() or bd_abort_claiming(). If this function | ||
714 | * succeeds, the matching bd_claim() is guaranteed to succeed. | ||
715 | * | ||
716 | * CONTEXT: | ||
717 | * Might sleep. | ||
718 | * | ||
719 | * RETURNS: | ||
720 | * Pointer to the block device containing @bdev on success, ERR_PTR() | ||
721 | * value on failure. | ||
722 | */ | ||
723 | static struct block_device *bd_start_claiming(struct block_device *bdev, | ||
724 | void *holder) | ||
725 | { | ||
726 | struct gendisk *disk; | ||
727 | struct block_device *whole; | ||
728 | int partno, err; | ||
729 | |||
730 | might_sleep(); | ||
731 | |||
732 | /* | ||
733 | * @bdev might not have been initialized properly yet, look up | ||
734 | * and grab the outer block device the hard way. | ||
735 | */ | ||
736 | disk = get_gendisk(bdev->bd_dev, &partno); | ||
737 | if (!disk) | ||
738 | return ERR_PTR(-ENXIO); | ||
739 | |||
740 | whole = bdget_disk(disk, 0); | ||
741 | put_disk(disk); | ||
742 | if (!whole) | ||
743 | return ERR_PTR(-ENOMEM); | ||
744 | |||
745 | /* prepare to claim, if successful, mark claiming in progress */ | ||
746 | spin_lock(&bdev_lock); | ||
747 | |||
748 | err = bd_prepare_to_claim(bdev, whole, holder); | ||
749 | if (err == 0) { | ||
750 | whole->bd_claiming = holder; | ||
751 | spin_unlock(&bdev_lock); | ||
752 | return whole; | ||
753 | } else { | ||
754 | spin_unlock(&bdev_lock); | ||
755 | bdput(whole); | ||
756 | return ERR_PTR(err); | ||
757 | } | ||
758 | } | ||
759 | |||
760 | /* releases bdev_lock */ | ||
761 | static void __bd_abort_claiming(struct block_device *whole, void *holder) | ||
762 | { | ||
763 | BUG_ON(whole->bd_claiming != holder); | ||
764 | whole->bd_claiming = NULL; | ||
765 | wake_up_bit(&whole->bd_claiming, 0); | ||
766 | |||
767 | spin_unlock(&bdev_lock); | ||
768 | bdput(whole); | ||
769 | } | ||
770 | |||
771 | /** | ||
772 | * bd_abort_claiming - abort claiming a block device | ||
773 | * @whole: whole block device returned by bd_start_claiming() | ||
774 | * @holder: holder trying to claim @bdev | ||
775 | * | ||
776 | * Abort a claiming block started by bd_start_claiming(). Note that | ||
777 | * @whole is not the block device to be claimed but the whole device | ||
778 | * returned by bd_start_claiming(). | ||
779 | * | ||
780 | * CONTEXT: | ||
781 | * Grabs and releases bdev_lock. | ||
782 | */ | ||
783 | static void bd_abort_claiming(struct block_device *whole, void *holder) | ||
784 | { | ||
785 | spin_lock(&bdev_lock); | ||
786 | __bd_abort_claiming(whole, holder); /* releases bdev_lock */ | ||
787 | } | ||
788 | |||
789 | /** | ||
790 | * bd_claim - claim a block device | ||
791 | * @bdev: block device to claim | ||
792 | * @holder: holder trying to claim @bdev | ||
793 | * | ||
794 | * Try to claim @bdev which must have been opened successfully. This | ||
795 | * function may be called with or without preceding | ||
796 | * blk_start_claiming(). In the former case, this function is always | ||
797 | * successful and terminates the claiming block. | ||
798 | * | ||
799 | * CONTEXT: | ||
800 | * Might sleep. | ||
801 | * | ||
802 | * RETURNS: | ||
803 | * 0 if successful, -EBUSY if @bdev is already claimed. | ||
804 | */ | ||
805 | int bd_claim(struct block_device *bdev, void *holder) | ||
806 | { | ||
807 | struct block_device *whole = bdev->bd_contains; | ||
808 | int res; | ||
809 | |||
810 | might_sleep(); | ||
811 | |||
812 | spin_lock(&bdev_lock); | ||
813 | |||
814 | res = bd_prepare_to_claim(bdev, whole, holder); | ||
815 | if (res == 0) { | ||
693 | /* note that for a whole device bd_holders | 816 | /* note that for a whole device bd_holders |
694 | * will be incremented twice, and bd_holder will | 817 | * will be incremented twice, and bd_holder will |
695 | * be set to bd_claim before being set to holder | 818 | * be set to bd_claim before being set to holder |
696 | */ | 819 | */ |
697 | bdev->bd_contains->bd_holders ++; | 820 | whole->bd_holders++; |
698 | bdev->bd_contains->bd_holder = bd_claim; | 821 | whole->bd_holder = bd_claim; |
699 | bdev->bd_holders++; | 822 | bdev->bd_holders++; |
700 | bdev->bd_holder = holder; | 823 | bdev->bd_holder = holder; |
701 | } | 824 | } |
702 | spin_unlock(&bdev_lock); | 825 | |
826 | if (whole->bd_claiming) | ||
827 | __bd_abort_claiming(whole, holder); /* releases bdev_lock */ | ||
828 | else | ||
829 | spin_unlock(&bdev_lock); | ||
830 | |||
703 | return res; | 831 | return res; |
704 | } | 832 | } |
705 | |||
706 | EXPORT_SYMBOL(bd_claim); | 833 | EXPORT_SYMBOL(bd_claim); |
707 | 834 | ||
708 | void bd_release(struct block_device *bdev) | 835 | void bd_release(struct block_device *bdev) |
@@ -1316,6 +1443,7 @@ EXPORT_SYMBOL(blkdev_get); | |||
1316 | 1443 | ||
1317 | static int blkdev_open(struct inode * inode, struct file * filp) | 1444 | static int blkdev_open(struct inode * inode, struct file * filp) |
1318 | { | 1445 | { |
1446 | struct block_device *whole = NULL; | ||
1319 | struct block_device *bdev; | 1447 | struct block_device *bdev; |
1320 | int res; | 1448 | int res; |
1321 | 1449 | ||
@@ -1338,22 +1466,25 @@ static int blkdev_open(struct inode * inode, struct file * filp) | |||
1338 | if (bdev == NULL) | 1466 | if (bdev == NULL) |
1339 | return -ENOMEM; | 1467 | return -ENOMEM; |
1340 | 1468 | ||
1469 | if (filp->f_mode & FMODE_EXCL) { | ||
1470 | whole = bd_start_claiming(bdev, filp); | ||
1471 | if (IS_ERR(whole)) { | ||
1472 | bdput(bdev); | ||
1473 | return PTR_ERR(whole); | ||
1474 | } | ||
1475 | } | ||
1476 | |||
1341 | filp->f_mapping = bdev->bd_inode->i_mapping; | 1477 | filp->f_mapping = bdev->bd_inode->i_mapping; |
1342 | 1478 | ||
1343 | res = blkdev_get(bdev, filp->f_mode); | 1479 | res = blkdev_get(bdev, filp->f_mode); |
1344 | if (res) | ||
1345 | return res; | ||
1346 | 1480 | ||
1347 | if (filp->f_mode & FMODE_EXCL) { | 1481 | if (whole) { |
1348 | res = bd_claim(bdev, filp); | 1482 | if (res == 0) |
1349 | if (res) | 1483 | BUG_ON(bd_claim(bdev, filp) != 0); |
1350 | goto out_blkdev_put; | 1484 | else |
1485 | bd_abort_claiming(whole, filp); | ||
1351 | } | 1486 | } |
1352 | 1487 | ||
1353 | return 0; | ||
1354 | |||
1355 | out_blkdev_put: | ||
1356 | blkdev_put(bdev, filp->f_mode); | ||
1357 | return res; | 1488 | return res; |
1358 | } | 1489 | } |
1359 | 1490 | ||
@@ -1564,27 +1695,34 @@ EXPORT_SYMBOL(lookup_bdev); | |||
1564 | */ | 1695 | */ |
1565 | struct block_device *open_bdev_exclusive(const char *path, fmode_t mode, void *holder) | 1696 | struct block_device *open_bdev_exclusive(const char *path, fmode_t mode, void *holder) |
1566 | { | 1697 | { |
1567 | struct block_device *bdev; | 1698 | struct block_device *bdev, *whole; |
1568 | int error = 0; | 1699 | int error; |
1569 | 1700 | ||
1570 | bdev = lookup_bdev(path); | 1701 | bdev = lookup_bdev(path); |
1571 | if (IS_ERR(bdev)) | 1702 | if (IS_ERR(bdev)) |
1572 | return bdev; | 1703 | return bdev; |
1573 | 1704 | ||
1705 | whole = bd_start_claiming(bdev, holder); | ||
1706 | if (IS_ERR(whole)) { | ||
1707 | bdput(bdev); | ||
1708 | return whole; | ||
1709 | } | ||
1710 | |||
1574 | error = blkdev_get(bdev, mode); | 1711 | error = blkdev_get(bdev, mode); |
1575 | if (error) | 1712 | if (error) |
1576 | return ERR_PTR(error); | 1713 | goto out_abort_claiming; |
1714 | |||
1577 | error = -EACCES; | 1715 | error = -EACCES; |
1578 | if ((mode & FMODE_WRITE) && bdev_read_only(bdev)) | 1716 | if ((mode & FMODE_WRITE) && bdev_read_only(bdev)) |
1579 | goto blkdev_put; | 1717 | goto out_blkdev_put; |
1580 | error = bd_claim(bdev, holder); | ||
1581 | if (error) | ||
1582 | goto blkdev_put; | ||
1583 | 1718 | ||
1719 | BUG_ON(bd_claim(bdev, holder) != 0); | ||
1584 | return bdev; | 1720 | return bdev; |
1585 | 1721 | ||
1586 | blkdev_put: | 1722 | out_blkdev_put: |
1587 | blkdev_put(bdev, mode); | 1723 | blkdev_put(bdev, mode); |
1724 | out_abort_claiming: | ||
1725 | bd_abort_claiming(whole, holder); | ||
1588 | return ERR_PTR(error); | 1726 | return ERR_PTR(error); |
1589 | } | 1727 | } |
1590 | 1728 | ||
diff --git a/fs/btrfs/acl.c b/fs/btrfs/acl.c index 6ef7b26724ec..8d432cd9d580 100644 --- a/fs/btrfs/acl.c +++ b/fs/btrfs/acl.c | |||
@@ -282,14 +282,14 @@ int btrfs_acl_chmod(struct inode *inode) | |||
282 | return ret; | 282 | return ret; |
283 | } | 283 | } |
284 | 284 | ||
285 | struct xattr_handler btrfs_xattr_acl_default_handler = { | 285 | const struct xattr_handler btrfs_xattr_acl_default_handler = { |
286 | .prefix = POSIX_ACL_XATTR_DEFAULT, | 286 | .prefix = POSIX_ACL_XATTR_DEFAULT, |
287 | .flags = ACL_TYPE_DEFAULT, | 287 | .flags = ACL_TYPE_DEFAULT, |
288 | .get = btrfs_xattr_acl_get, | 288 | .get = btrfs_xattr_acl_get, |
289 | .set = btrfs_xattr_acl_set, | 289 | .set = btrfs_xattr_acl_set, |
290 | }; | 290 | }; |
291 | 291 | ||
292 | struct xattr_handler btrfs_xattr_acl_access_handler = { | 292 | const struct xattr_handler btrfs_xattr_acl_access_handler = { |
293 | .prefix = POSIX_ACL_XATTR_ACCESS, | 293 | .prefix = POSIX_ACL_XATTR_ACCESS, |
294 | .flags = ACL_TYPE_ACCESS, | 294 | .flags = ACL_TYPE_ACCESS, |
295 | .get = btrfs_xattr_acl_get, | 295 | .get = btrfs_xattr_acl_get, |
diff --git a/fs/btrfs/extent-tree.c b/fs/btrfs/extent-tree.c index b34d32fdaaec..c6a4f459ad76 100644 --- a/fs/btrfs/extent-tree.c +++ b/fs/btrfs/extent-tree.c | |||
@@ -1589,7 +1589,7 @@ static void btrfs_issue_discard(struct block_device *bdev, | |||
1589 | u64 start, u64 len) | 1589 | u64 start, u64 len) |
1590 | { | 1590 | { |
1591 | blkdev_issue_discard(bdev, start >> 9, len >> 9, GFP_KERNEL, | 1591 | blkdev_issue_discard(bdev, start >> 9, len >> 9, GFP_KERNEL, |
1592 | DISCARD_FL_BARRIER); | 1592 | BLKDEV_IFL_WAIT | BLKDEV_IFL_BARRIER); |
1593 | } | 1593 | } |
1594 | 1594 | ||
1595 | static int btrfs_discard_extent(struct btrfs_root *root, u64 bytenr, | 1595 | static int btrfs_discard_extent(struct btrfs_root *root, u64 bytenr, |
diff --git a/fs/btrfs/inode.c b/fs/btrfs/inode.c index 2bfdc641d4e3..d601629b85d1 100644 --- a/fs/btrfs/inode.c +++ b/fs/btrfs/inode.c | |||
@@ -4121,16 +4121,7 @@ static struct inode *btrfs_new_inode(struct btrfs_trans_handle *trans, | |||
4121 | if (ret != 0) | 4121 | if (ret != 0) |
4122 | goto fail; | 4122 | goto fail; |
4123 | 4123 | ||
4124 | inode->i_uid = current_fsuid(); | 4124 | inode_init_owner(inode, dir, mode); |
4125 | |||
4126 | if (dir && (dir->i_mode & S_ISGID)) { | ||
4127 | inode->i_gid = dir->i_gid; | ||
4128 | if (S_ISDIR(mode)) | ||
4129 | mode |= S_ISGID; | ||
4130 | } else | ||
4131 | inode->i_gid = current_fsgid(); | ||
4132 | |||
4133 | inode->i_mode = mode; | ||
4134 | inode->i_ino = objectid; | 4125 | inode->i_ino = objectid; |
4135 | inode_set_bytes(inode, 0); | 4126 | inode_set_bytes(inode, 0); |
4136 | inode->i_mtime = inode->i_atime = inode->i_ctime = CURRENT_TIME; | 4127 | inode->i_mtime = inode->i_atime = inode->i_ctime = CURRENT_TIME; |
diff --git a/fs/btrfs/xattr.c b/fs/btrfs/xattr.c index 193b58f7d3f3..59acd3eb288a 100644 --- a/fs/btrfs/xattr.c +++ b/fs/btrfs/xattr.c | |||
@@ -282,7 +282,7 @@ err: | |||
282 | * List of handlers for synthetic system.* attributes. All real ondisk | 282 | * List of handlers for synthetic system.* attributes. All real ondisk |
283 | * attributes are handled directly. | 283 | * attributes are handled directly. |
284 | */ | 284 | */ |
285 | struct xattr_handler *btrfs_xattr_handlers[] = { | 285 | const struct xattr_handler *btrfs_xattr_handlers[] = { |
286 | #ifdef CONFIG_BTRFS_FS_POSIX_ACL | 286 | #ifdef CONFIG_BTRFS_FS_POSIX_ACL |
287 | &btrfs_xattr_acl_access_handler, | 287 | &btrfs_xattr_acl_access_handler, |
288 | &btrfs_xattr_acl_default_handler, | 288 | &btrfs_xattr_acl_default_handler, |
diff --git a/fs/btrfs/xattr.h b/fs/btrfs/xattr.h index 721efa0346e0..7a43fd640bbb 100644 --- a/fs/btrfs/xattr.h +++ b/fs/btrfs/xattr.h | |||
@@ -21,9 +21,9 @@ | |||
21 | 21 | ||
22 | #include <linux/xattr.h> | 22 | #include <linux/xattr.h> |
23 | 23 | ||
24 | extern struct xattr_handler btrfs_xattr_acl_access_handler; | 24 | extern const struct xattr_handler btrfs_xattr_acl_access_handler; |
25 | extern struct xattr_handler btrfs_xattr_acl_default_handler; | 25 | extern const struct xattr_handler btrfs_xattr_acl_default_handler; |
26 | extern struct xattr_handler *btrfs_xattr_handlers[]; | 26 | extern const struct xattr_handler *btrfs_xattr_handlers[]; |
27 | 27 | ||
28 | extern ssize_t __btrfs_getxattr(struct inode *inode, const char *name, | 28 | extern ssize_t __btrfs_getxattr(struct inode *inode, const char *name, |
29 | void *buffer, size_t size); | 29 | void *buffer, size_t size); |
diff --git a/fs/buffer.c b/fs/buffer.c index c9c266db0624..e8aa7081d25c 100644 --- a/fs/buffer.c +++ b/fs/buffer.c | |||
@@ -275,6 +275,7 @@ void invalidate_bdev(struct block_device *bdev) | |||
275 | return; | 275 | return; |
276 | 276 | ||
277 | invalidate_bh_lrus(); | 277 | invalidate_bh_lrus(); |
278 | lru_add_drain_all(); /* make sure all lru add caches are flushed */ | ||
278 | invalidate_mapping_pages(mapping, 0, -1); | 279 | invalidate_mapping_pages(mapping, 0, -1); |
279 | } | 280 | } |
280 | EXPORT_SYMBOL(invalidate_bdev); | 281 | EXPORT_SYMBOL(invalidate_bdev); |
@@ -560,26 +561,17 @@ repeat: | |||
560 | return err; | 561 | return err; |
561 | } | 562 | } |
562 | 563 | ||
563 | static void do_thaw_all(struct work_struct *work) | 564 | static void do_thaw_one(struct super_block *sb, void *unused) |
564 | { | 565 | { |
565 | struct super_block *sb; | ||
566 | char b[BDEVNAME_SIZE]; | 566 | char b[BDEVNAME_SIZE]; |
567 | while (sb->s_bdev && !thaw_bdev(sb->s_bdev, sb)) | ||
568 | printk(KERN_WARNING "Emergency Thaw on %s\n", | ||
569 | bdevname(sb->s_bdev, b)); | ||
570 | } | ||
567 | 571 | ||
568 | spin_lock(&sb_lock); | 572 | static void do_thaw_all(struct work_struct *work) |
569 | restart: | 573 | { |
570 | list_for_each_entry(sb, &super_blocks, s_list) { | 574 | iterate_supers(do_thaw_one, NULL); |
571 | sb->s_count++; | ||
572 | spin_unlock(&sb_lock); | ||
573 | down_read(&sb->s_umount); | ||
574 | while (sb->s_bdev && !thaw_bdev(sb->s_bdev, sb)) | ||
575 | printk(KERN_WARNING "Emergency Thaw on %s\n", | ||
576 | bdevname(sb->s_bdev, b)); | ||
577 | up_read(&sb->s_umount); | ||
578 | spin_lock(&sb_lock); | ||
579 | if (__put_super_and_need_restart(sb)) | ||
580 | goto restart; | ||
581 | } | ||
582 | spin_unlock(&sb_lock); | ||
583 | kfree(work); | 575 | kfree(work); |
584 | printk(KERN_WARNING "Emergency Thaw complete\n"); | 576 | printk(KERN_WARNING "Emergency Thaw complete\n"); |
585 | } | 577 | } |
diff --git a/fs/ceph/addr.c b/fs/ceph/addr.c index a9005d862ed4..d9c60b84949a 100644 --- a/fs/ceph/addr.c +++ b/fs/ceph/addr.c | |||
@@ -274,7 +274,6 @@ static int ceph_readpages(struct file *file, struct address_space *mapping, | |||
274 | struct ceph_osd_client *osdc = &ceph_inode_to_client(inode)->osdc; | 274 | struct ceph_osd_client *osdc = &ceph_inode_to_client(inode)->osdc; |
275 | int rc = 0; | 275 | int rc = 0; |
276 | struct page **pages; | 276 | struct page **pages; |
277 | struct pagevec pvec; | ||
278 | loff_t offset; | 277 | loff_t offset; |
279 | u64 len; | 278 | u64 len; |
280 | 279 | ||
@@ -297,8 +296,6 @@ static int ceph_readpages(struct file *file, struct address_space *mapping, | |||
297 | if (rc < 0) | 296 | if (rc < 0) |
298 | goto out; | 297 | goto out; |
299 | 298 | ||
300 | /* set uptodate and add to lru in pagevec-sized chunks */ | ||
301 | pagevec_init(&pvec, 0); | ||
302 | for (; !list_empty(page_list) && len > 0; | 299 | for (; !list_empty(page_list) && len > 0; |
303 | rc -= PAGE_CACHE_SIZE, len -= PAGE_CACHE_SIZE) { | 300 | rc -= PAGE_CACHE_SIZE, len -= PAGE_CACHE_SIZE) { |
304 | struct page *page = | 301 | struct page *page = |
@@ -312,7 +309,7 @@ static int ceph_readpages(struct file *file, struct address_space *mapping, | |||
312 | zero_user_segment(page, s, PAGE_CACHE_SIZE); | 309 | zero_user_segment(page, s, PAGE_CACHE_SIZE); |
313 | } | 310 | } |
314 | 311 | ||
315 | if (add_to_page_cache(page, mapping, page->index, GFP_NOFS)) { | 312 | if (add_to_page_cache_lru(page, mapping, page->index, GFP_NOFS)) { |
316 | page_cache_release(page); | 313 | page_cache_release(page); |
317 | dout("readpages %p add_to_page_cache failed %p\n", | 314 | dout("readpages %p add_to_page_cache failed %p\n", |
318 | inode, page); | 315 | inode, page); |
@@ -323,10 +320,8 @@ static int ceph_readpages(struct file *file, struct address_space *mapping, | |||
323 | flush_dcache_page(page); | 320 | flush_dcache_page(page); |
324 | SetPageUptodate(page); | 321 | SetPageUptodate(page); |
325 | unlock_page(page); | 322 | unlock_page(page); |
326 | if (pagevec_add(&pvec, page) == 0) | 323 | page_cache_release(page); |
327 | pagevec_lru_add_file(&pvec); /* add to lru */ | ||
328 | } | 324 | } |
329 | pagevec_lru_add_file(&pvec); | ||
330 | rc = 0; | 325 | rc = 0; |
331 | 326 | ||
332 | out: | 327 | out: |
@@ -568,7 +563,7 @@ static void writepages_finish(struct ceph_osd_request *req, | |||
568 | ceph_release_pages(req->r_pages, req->r_num_pages); | 563 | ceph_release_pages(req->r_pages, req->r_num_pages); |
569 | if (req->r_pages_from_pool) | 564 | if (req->r_pages_from_pool) |
570 | mempool_free(req->r_pages, | 565 | mempool_free(req->r_pages, |
571 | ceph_client(inode->i_sb)->wb_pagevec_pool); | 566 | ceph_sb_to_client(inode->i_sb)->wb_pagevec_pool); |
572 | else | 567 | else |
573 | kfree(req->r_pages); | 568 | kfree(req->r_pages); |
574 | ceph_osdc_put_request(req); | 569 | ceph_osdc_put_request(req); |
diff --git a/fs/ceph/auth.c b/fs/ceph/auth.c index 818afe72e6c7..9f46de2ba7a7 100644 --- a/fs/ceph/auth.c +++ b/fs/ceph/auth.c | |||
@@ -150,7 +150,8 @@ int ceph_build_auth_request(struct ceph_auth_client *ac, | |||
150 | 150 | ||
151 | ret = ac->ops->build_request(ac, p + sizeof(u32), end); | 151 | ret = ac->ops->build_request(ac, p + sizeof(u32), end); |
152 | if (ret < 0) { | 152 | if (ret < 0) { |
153 | pr_err("error %d building request\n", ret); | 153 | pr_err("error %d building auth method %s request\n", ret, |
154 | ac->ops->name); | ||
154 | return ret; | 155 | return ret; |
155 | } | 156 | } |
156 | dout(" built request %d bytes\n", ret); | 157 | dout(" built request %d bytes\n", ret); |
@@ -216,8 +217,8 @@ int ceph_handle_auth_reply(struct ceph_auth_client *ac, | |||
216 | if (ac->protocol != protocol) { | 217 | if (ac->protocol != protocol) { |
217 | ret = ceph_auth_init_protocol(ac, protocol); | 218 | ret = ceph_auth_init_protocol(ac, protocol); |
218 | if (ret) { | 219 | if (ret) { |
219 | pr_err("error %d on auth protocol %d init\n", | 220 | pr_err("error %d on auth method %s init\n", |
220 | ret, protocol); | 221 | ret, ac->ops->name); |
221 | goto out; | 222 | goto out; |
222 | } | 223 | } |
223 | } | 224 | } |
@@ -229,7 +230,7 @@ int ceph_handle_auth_reply(struct ceph_auth_client *ac, | |||
229 | if (ret == -EAGAIN) { | 230 | if (ret == -EAGAIN) { |
230 | return ceph_build_auth_request(ac, reply_buf, reply_len); | 231 | return ceph_build_auth_request(ac, reply_buf, reply_len); |
231 | } else if (ret) { | 232 | } else if (ret) { |
232 | pr_err("authentication error %d\n", ret); | 233 | pr_err("auth method '%s' error %d\n", ac->ops->name, ret); |
233 | return ret; | 234 | return ret; |
234 | } | 235 | } |
235 | return 0; | 236 | return 0; |
diff --git a/fs/ceph/auth.h b/fs/ceph/auth.h index ca4f57cfb267..4429a707c021 100644 --- a/fs/ceph/auth.h +++ b/fs/ceph/auth.h | |||
@@ -15,6 +15,8 @@ struct ceph_auth_client; | |||
15 | struct ceph_authorizer; | 15 | struct ceph_authorizer; |
16 | 16 | ||
17 | struct ceph_auth_client_ops { | 17 | struct ceph_auth_client_ops { |
18 | const char *name; | ||
19 | |||
18 | /* | 20 | /* |
19 | * true if we are authenticated and can connect to | 21 | * true if we are authenticated and can connect to |
20 | * services. | 22 | * services. |
diff --git a/fs/ceph/auth_none.c b/fs/ceph/auth_none.c index 8cd9e3af07f7..24407c119291 100644 --- a/fs/ceph/auth_none.c +++ b/fs/ceph/auth_none.c | |||
@@ -94,6 +94,7 @@ static void ceph_auth_none_destroy_authorizer(struct ceph_auth_client *ac, | |||
94 | } | 94 | } |
95 | 95 | ||
96 | static const struct ceph_auth_client_ops ceph_auth_none_ops = { | 96 | static const struct ceph_auth_client_ops ceph_auth_none_ops = { |
97 | .name = "none", | ||
97 | .reset = reset, | 98 | .reset = reset, |
98 | .destroy = destroy, | 99 | .destroy = destroy, |
99 | .is_authenticated = is_authenticated, | 100 | .is_authenticated = is_authenticated, |
diff --git a/fs/ceph/auth_x.c b/fs/ceph/auth_x.c index fee5a08da881..7b206231566d 100644 --- a/fs/ceph/auth_x.c +++ b/fs/ceph/auth_x.c | |||
@@ -127,7 +127,7 @@ static int ceph_x_proc_ticket_reply(struct ceph_auth_client *ac, | |||
127 | int ret; | 127 | int ret; |
128 | char *dbuf; | 128 | char *dbuf; |
129 | char *ticket_buf; | 129 | char *ticket_buf; |
130 | u8 struct_v; | 130 | u8 reply_struct_v; |
131 | 131 | ||
132 | dbuf = kmalloc(TEMP_TICKET_BUF_LEN, GFP_NOFS); | 132 | dbuf = kmalloc(TEMP_TICKET_BUF_LEN, GFP_NOFS); |
133 | if (!dbuf) | 133 | if (!dbuf) |
@@ -139,14 +139,14 @@ static int ceph_x_proc_ticket_reply(struct ceph_auth_client *ac, | |||
139 | goto out_dbuf; | 139 | goto out_dbuf; |
140 | 140 | ||
141 | ceph_decode_need(&p, end, 1 + sizeof(u32), bad); | 141 | ceph_decode_need(&p, end, 1 + sizeof(u32), bad); |
142 | struct_v = ceph_decode_8(&p); | 142 | reply_struct_v = ceph_decode_8(&p); |
143 | if (struct_v != 1) | 143 | if (reply_struct_v != 1) |
144 | goto bad; | 144 | goto bad; |
145 | num = ceph_decode_32(&p); | 145 | num = ceph_decode_32(&p); |
146 | dout("%d tickets\n", num); | 146 | dout("%d tickets\n", num); |
147 | while (num--) { | 147 | while (num--) { |
148 | int type; | 148 | int type; |
149 | u8 struct_v; | 149 | u8 tkt_struct_v, blob_struct_v; |
150 | struct ceph_x_ticket_handler *th; | 150 | struct ceph_x_ticket_handler *th; |
151 | void *dp, *dend; | 151 | void *dp, *dend; |
152 | int dlen; | 152 | int dlen; |
@@ -165,8 +165,8 @@ static int ceph_x_proc_ticket_reply(struct ceph_auth_client *ac, | |||
165 | type = ceph_decode_32(&p); | 165 | type = ceph_decode_32(&p); |
166 | dout(" ticket type %d %s\n", type, ceph_entity_type_name(type)); | 166 | dout(" ticket type %d %s\n", type, ceph_entity_type_name(type)); |
167 | 167 | ||
168 | struct_v = ceph_decode_8(&p); | 168 | tkt_struct_v = ceph_decode_8(&p); |
169 | if (struct_v != 1) | 169 | if (tkt_struct_v != 1) |
170 | goto bad; | 170 | goto bad; |
171 | 171 | ||
172 | th = get_ticket_handler(ac, type); | 172 | th = get_ticket_handler(ac, type); |
@@ -186,8 +186,8 @@ static int ceph_x_proc_ticket_reply(struct ceph_auth_client *ac, | |||
186 | dend = dbuf + dlen; | 186 | dend = dbuf + dlen; |
187 | dp = dbuf; | 187 | dp = dbuf; |
188 | 188 | ||
189 | struct_v = ceph_decode_8(&dp); | 189 | tkt_struct_v = ceph_decode_8(&dp); |
190 | if (struct_v != 1) | 190 | if (tkt_struct_v != 1) |
191 | goto bad; | 191 | goto bad; |
192 | 192 | ||
193 | memcpy(&old_key, &th->session_key, sizeof(old_key)); | 193 | memcpy(&old_key, &th->session_key, sizeof(old_key)); |
@@ -224,7 +224,7 @@ static int ceph_x_proc_ticket_reply(struct ceph_auth_client *ac, | |||
224 | tpend = tp + dlen; | 224 | tpend = tp + dlen; |
225 | dout(" ticket blob is %d bytes\n", dlen); | 225 | dout(" ticket blob is %d bytes\n", dlen); |
226 | ceph_decode_need(&tp, tpend, 1 + sizeof(u64), bad); | 226 | ceph_decode_need(&tp, tpend, 1 + sizeof(u64), bad); |
227 | struct_v = ceph_decode_8(&tp); | 227 | blob_struct_v = ceph_decode_8(&tp); |
228 | new_secret_id = ceph_decode_64(&tp); | 228 | new_secret_id = ceph_decode_64(&tp); |
229 | ret = ceph_decode_buffer(&new_ticket_blob, &tp, tpend); | 229 | ret = ceph_decode_buffer(&new_ticket_blob, &tp, tpend); |
230 | if (ret) | 230 | if (ret) |
@@ -618,6 +618,7 @@ static void ceph_x_invalidate_authorizer(struct ceph_auth_client *ac, | |||
618 | 618 | ||
619 | 619 | ||
620 | static const struct ceph_auth_client_ops ceph_x_ops = { | 620 | static const struct ceph_auth_client_ops ceph_x_ops = { |
621 | .name = "x", | ||
621 | .is_authenticated = ceph_x_is_authenticated, | 622 | .is_authenticated = ceph_x_is_authenticated, |
622 | .build_request = ceph_x_build_request, | 623 | .build_request = ceph_x_build_request, |
623 | .handle_reply = ceph_x_handle_reply, | 624 | .handle_reply = ceph_x_handle_reply, |
diff --git a/fs/ceph/caps.c b/fs/ceph/caps.c index d9400534b279..0dd0b81e64f7 100644 --- a/fs/ceph/caps.c +++ b/fs/ceph/caps.c | |||
@@ -867,7 +867,8 @@ void __ceph_remove_cap(struct ceph_cap *cap) | |||
867 | { | 867 | { |
868 | struct ceph_mds_session *session = cap->session; | 868 | struct ceph_mds_session *session = cap->session; |
869 | struct ceph_inode_info *ci = cap->ci; | 869 | struct ceph_inode_info *ci = cap->ci; |
870 | struct ceph_mds_client *mdsc = &ceph_client(ci->vfs_inode.i_sb)->mdsc; | 870 | struct ceph_mds_client *mdsc = |
871 | &ceph_sb_to_client(ci->vfs_inode.i_sb)->mdsc; | ||
871 | int removed = 0; | 872 | int removed = 0; |
872 | 873 | ||
873 | dout("__ceph_remove_cap %p from %p\n", cap, &ci->vfs_inode); | 874 | dout("__ceph_remove_cap %p from %p\n", cap, &ci->vfs_inode); |
@@ -937,9 +938,9 @@ static int send_cap_msg(struct ceph_mds_session *session, | |||
937 | seq, issue_seq, mseq, follows, size, max_size, | 938 | seq, issue_seq, mseq, follows, size, max_size, |
938 | xattr_version, xattrs_buf ? (int)xattrs_buf->vec.iov_len : 0); | 939 | xattr_version, xattrs_buf ? (int)xattrs_buf->vec.iov_len : 0); |
939 | 940 | ||
940 | msg = ceph_msg_new(CEPH_MSG_CLIENT_CAPS, sizeof(*fc), 0, 0, NULL); | 941 | msg = ceph_msg_new(CEPH_MSG_CLIENT_CAPS, sizeof(*fc), GFP_NOFS); |
941 | if (IS_ERR(msg)) | 942 | if (!msg) |
942 | return PTR_ERR(msg); | 943 | return -ENOMEM; |
943 | 944 | ||
944 | msg->hdr.tid = cpu_to_le64(flush_tid); | 945 | msg->hdr.tid = cpu_to_le64(flush_tid); |
945 | 946 | ||
@@ -1298,7 +1299,8 @@ static void ceph_flush_snaps(struct ceph_inode_info *ci) | |||
1298 | */ | 1299 | */ |
1299 | void __ceph_mark_dirty_caps(struct ceph_inode_info *ci, int mask) | 1300 | void __ceph_mark_dirty_caps(struct ceph_inode_info *ci, int mask) |
1300 | { | 1301 | { |
1301 | struct ceph_mds_client *mdsc = &ceph_client(ci->vfs_inode.i_sb)->mdsc; | 1302 | struct ceph_mds_client *mdsc = |
1303 | &ceph_sb_to_client(ci->vfs_inode.i_sb)->mdsc; | ||
1302 | struct inode *inode = &ci->vfs_inode; | 1304 | struct inode *inode = &ci->vfs_inode; |
1303 | int was = ci->i_dirty_caps; | 1305 | int was = ci->i_dirty_caps; |
1304 | int dirty = 0; | 1306 | int dirty = 0; |
@@ -1336,7 +1338,7 @@ void __ceph_mark_dirty_caps(struct ceph_inode_info *ci, int mask) | |||
1336 | static int __mark_caps_flushing(struct inode *inode, | 1338 | static int __mark_caps_flushing(struct inode *inode, |
1337 | struct ceph_mds_session *session) | 1339 | struct ceph_mds_session *session) |
1338 | { | 1340 | { |
1339 | struct ceph_mds_client *mdsc = &ceph_client(inode->i_sb)->mdsc; | 1341 | struct ceph_mds_client *mdsc = &ceph_sb_to_client(inode->i_sb)->mdsc; |
1340 | struct ceph_inode_info *ci = ceph_inode(inode); | 1342 | struct ceph_inode_info *ci = ceph_inode(inode); |
1341 | int flushing; | 1343 | int flushing; |
1342 | 1344 | ||
@@ -1663,7 +1665,7 @@ ack: | |||
1663 | static int try_flush_caps(struct inode *inode, struct ceph_mds_session *session, | 1665 | static int try_flush_caps(struct inode *inode, struct ceph_mds_session *session, |
1664 | unsigned *flush_tid) | 1666 | unsigned *flush_tid) |
1665 | { | 1667 | { |
1666 | struct ceph_mds_client *mdsc = &ceph_client(inode->i_sb)->mdsc; | 1668 | struct ceph_mds_client *mdsc = &ceph_sb_to_client(inode->i_sb)->mdsc; |
1667 | struct ceph_inode_info *ci = ceph_inode(inode); | 1669 | struct ceph_inode_info *ci = ceph_inode(inode); |
1668 | int unlock_session = session ? 0 : 1; | 1670 | int unlock_session = session ? 0 : 1; |
1669 | int flushing = 0; | 1671 | int flushing = 0; |
@@ -1716,10 +1718,9 @@ out_unlocked: | |||
1716 | static int caps_are_flushed(struct inode *inode, unsigned tid) | 1718 | static int caps_are_flushed(struct inode *inode, unsigned tid) |
1717 | { | 1719 | { |
1718 | struct ceph_inode_info *ci = ceph_inode(inode); | 1720 | struct ceph_inode_info *ci = ceph_inode(inode); |
1719 | int dirty, i, ret = 1; | 1721 | int i, ret = 1; |
1720 | 1722 | ||
1721 | spin_lock(&inode->i_lock); | 1723 | spin_lock(&inode->i_lock); |
1722 | dirty = __ceph_caps_dirty(ci); | ||
1723 | for (i = 0; i < CEPH_CAP_BITS; i++) | 1724 | for (i = 0; i < CEPH_CAP_BITS; i++) |
1724 | if ((ci->i_flushing_caps & (1 << i)) && | 1725 | if ((ci->i_flushing_caps & (1 << i)) && |
1725 | ci->i_cap_flush_tid[i] <= tid) { | 1726 | ci->i_cap_flush_tid[i] <= tid) { |
@@ -1829,7 +1830,8 @@ int ceph_write_inode(struct inode *inode, struct writeback_control *wbc) | |||
1829 | err = wait_event_interruptible(ci->i_cap_wq, | 1830 | err = wait_event_interruptible(ci->i_cap_wq, |
1830 | caps_are_flushed(inode, flush_tid)); | 1831 | caps_are_flushed(inode, flush_tid)); |
1831 | } else { | 1832 | } else { |
1832 | struct ceph_mds_client *mdsc = &ceph_client(inode->i_sb)->mdsc; | 1833 | struct ceph_mds_client *mdsc = |
1834 | &ceph_sb_to_client(inode->i_sb)->mdsc; | ||
1833 | 1835 | ||
1834 | spin_lock(&inode->i_lock); | 1836 | spin_lock(&inode->i_lock); |
1835 | if (__ceph_caps_dirty(ci)) | 1837 | if (__ceph_caps_dirty(ci)) |
@@ -2411,7 +2413,7 @@ static void handle_cap_flush_ack(struct inode *inode, u64 flush_tid, | |||
2411 | __releases(inode->i_lock) | 2413 | __releases(inode->i_lock) |
2412 | { | 2414 | { |
2413 | struct ceph_inode_info *ci = ceph_inode(inode); | 2415 | struct ceph_inode_info *ci = ceph_inode(inode); |
2414 | struct ceph_mds_client *mdsc = &ceph_client(inode->i_sb)->mdsc; | 2416 | struct ceph_mds_client *mdsc = &ceph_sb_to_client(inode->i_sb)->mdsc; |
2415 | unsigned seq = le32_to_cpu(m->seq); | 2417 | unsigned seq = le32_to_cpu(m->seq); |
2416 | int dirty = le32_to_cpu(m->dirty); | 2418 | int dirty = le32_to_cpu(m->dirty); |
2417 | int cleaned = 0; | 2419 | int cleaned = 0; |
diff --git a/fs/ceph/ceph_fs.h b/fs/ceph/ceph_fs.h index 0c2241ef3653..3b9eeed097b3 100644 --- a/fs/ceph/ceph_fs.h +++ b/fs/ceph/ceph_fs.h | |||
@@ -19,7 +19,7 @@ | |||
19 | * Ceph release version | 19 | * Ceph release version |
20 | */ | 20 | */ |
21 | #define CEPH_VERSION_MAJOR 0 | 21 | #define CEPH_VERSION_MAJOR 0 |
22 | #define CEPH_VERSION_MINOR 19 | 22 | #define CEPH_VERSION_MINOR 20 |
23 | #define CEPH_VERSION_PATCH 0 | 23 | #define CEPH_VERSION_PATCH 0 |
24 | 24 | ||
25 | #define _CEPH_STRINGIFY(x) #x | 25 | #define _CEPH_STRINGIFY(x) #x |
@@ -36,7 +36,7 @@ | |||
36 | * client-facing protocol. | 36 | * client-facing protocol. |
37 | */ | 37 | */ |
38 | #define CEPH_OSD_PROTOCOL 8 /* cluster internal */ | 38 | #define CEPH_OSD_PROTOCOL 8 /* cluster internal */ |
39 | #define CEPH_MDS_PROTOCOL 9 /* cluster internal */ | 39 | #define CEPH_MDS_PROTOCOL 12 /* cluster internal */ |
40 | #define CEPH_MON_PROTOCOL 5 /* cluster internal */ | 40 | #define CEPH_MON_PROTOCOL 5 /* cluster internal */ |
41 | #define CEPH_OSDC_PROTOCOL 24 /* server/client */ | 41 | #define CEPH_OSDC_PROTOCOL 24 /* server/client */ |
42 | #define CEPH_MDSC_PROTOCOL 32 /* server/client */ | 42 | #define CEPH_MDSC_PROTOCOL 32 /* server/client */ |
@@ -53,8 +53,18 @@ | |||
53 | /* | 53 | /* |
54 | * feature bits | 54 | * feature bits |
55 | */ | 55 | */ |
56 | #define CEPH_FEATURE_SUPPORTED 0 | 56 | #define CEPH_FEATURE_UID 1 |
57 | #define CEPH_FEATURE_REQUIRED 0 | 57 | #define CEPH_FEATURE_NOSRCADDR 2 |
58 | #define CEPH_FEATURE_FLOCK 4 | ||
59 | |||
60 | #define CEPH_FEATURE_SUPPORTED_MON CEPH_FEATURE_UID|CEPH_FEATURE_NOSRCADDR | ||
61 | #define CEPH_FEATURE_REQUIRED_MON CEPH_FEATURE_UID | ||
62 | #define CEPH_FEATURE_SUPPORTED_MDS CEPH_FEATURE_UID|CEPH_FEATURE_NOSRCADDR|CEPH_FEATURE_FLOCK | ||
63 | #define CEPH_FEATURE_REQUIRED_MDS CEPH_FEATURE_UID | ||
64 | #define CEPH_FEATURE_SUPPORTED_OSD CEPH_FEATURE_UID|CEPH_FEATURE_NOSRCADDR | ||
65 | #define CEPH_FEATURE_REQUIRED_OSD CEPH_FEATURE_UID | ||
66 | #define CEPH_FEATURE_SUPPORTED_CLIENT CEPH_FEATURE_NOSRCADDR | ||
67 | #define CEPH_FEATURE_REQUIRED_CLIENT CEPH_FEATURE_NOSRCADDR | ||
58 | 68 | ||
59 | 69 | ||
60 | /* | 70 | /* |
@@ -91,6 +101,8 @@ int ceph_file_layout_is_valid(const struct ceph_file_layout *layout); | |||
91 | #define CEPH_AUTH_NONE 0x1 | 101 | #define CEPH_AUTH_NONE 0x1 |
92 | #define CEPH_AUTH_CEPHX 0x2 | 102 | #define CEPH_AUTH_CEPHX 0x2 |
93 | 103 | ||
104 | #define CEPH_AUTH_UID_DEFAULT ((__u64) -1) | ||
105 | |||
94 | 106 | ||
95 | /********************************************* | 107 | /********************************************* |
96 | * message layer | 108 | * message layer |
@@ -128,11 +140,27 @@ int ceph_file_layout_is_valid(const struct ceph_file_layout *layout); | |||
128 | #define CEPH_MSG_CLIENT_SNAP 0x312 | 140 | #define CEPH_MSG_CLIENT_SNAP 0x312 |
129 | #define CEPH_MSG_CLIENT_CAPRELEASE 0x313 | 141 | #define CEPH_MSG_CLIENT_CAPRELEASE 0x313 |
130 | 142 | ||
143 | /* pool ops */ | ||
144 | #define CEPH_MSG_POOLOP_REPLY 48 | ||
145 | #define CEPH_MSG_POOLOP 49 | ||
146 | |||
147 | |||
131 | /* osd */ | 148 | /* osd */ |
132 | #define CEPH_MSG_OSD_MAP 41 | 149 | #define CEPH_MSG_OSD_MAP 41 |
133 | #define CEPH_MSG_OSD_OP 42 | 150 | #define CEPH_MSG_OSD_OP 42 |
134 | #define CEPH_MSG_OSD_OPREPLY 43 | 151 | #define CEPH_MSG_OSD_OPREPLY 43 |
135 | 152 | ||
153 | /* pool operations */ | ||
154 | enum { | ||
155 | POOL_OP_CREATE = 0x01, | ||
156 | POOL_OP_DELETE = 0x02, | ||
157 | POOL_OP_AUID_CHANGE = 0x03, | ||
158 | POOL_OP_CREATE_SNAP = 0x11, | ||
159 | POOL_OP_DELETE_SNAP = 0x12, | ||
160 | POOL_OP_CREATE_UNMANAGED_SNAP = 0x21, | ||
161 | POOL_OP_DELETE_UNMANAGED_SNAP = 0x22, | ||
162 | }; | ||
163 | |||
136 | struct ceph_mon_request_header { | 164 | struct ceph_mon_request_header { |
137 | __le64 have_version; | 165 | __le64 have_version; |
138 | __le16 session_mon; | 166 | __le16 session_mon; |
@@ -155,6 +183,31 @@ struct ceph_mon_statfs_reply { | |||
155 | struct ceph_statfs st; | 183 | struct ceph_statfs st; |
156 | } __attribute__ ((packed)); | 184 | } __attribute__ ((packed)); |
157 | 185 | ||
186 | const char *ceph_pool_op_name(int op); | ||
187 | |||
188 | struct ceph_mon_poolop { | ||
189 | struct ceph_mon_request_header monhdr; | ||
190 | struct ceph_fsid fsid; | ||
191 | __le32 pool; | ||
192 | __le32 op; | ||
193 | __le64 auid; | ||
194 | __le64 snapid; | ||
195 | __le32 name_len; | ||
196 | } __attribute__ ((packed)); | ||
197 | |||
198 | struct ceph_mon_poolop_reply { | ||
199 | struct ceph_mon_request_header monhdr; | ||
200 | struct ceph_fsid fsid; | ||
201 | __le32 reply_code; | ||
202 | __le32 epoch; | ||
203 | char has_data; | ||
204 | char data[0]; | ||
205 | } __attribute__ ((packed)); | ||
206 | |||
207 | struct ceph_mon_unmanaged_snap { | ||
208 | __le64 snapid; | ||
209 | } __attribute__ ((packed)); | ||
210 | |||
158 | struct ceph_osd_getmap { | 211 | struct ceph_osd_getmap { |
159 | struct ceph_mon_request_header monhdr; | 212 | struct ceph_mon_request_header monhdr; |
160 | struct ceph_fsid fsid; | 213 | struct ceph_fsid fsid; |
@@ -308,6 +361,7 @@ union ceph_mds_request_args { | |||
308 | struct { | 361 | struct { |
309 | __le32 frag; /* which dir fragment */ | 362 | __le32 frag; /* which dir fragment */ |
310 | __le32 max_entries; /* how many dentries to grab */ | 363 | __le32 max_entries; /* how many dentries to grab */ |
364 | __le32 max_bytes; | ||
311 | } __attribute__ ((packed)) readdir; | 365 | } __attribute__ ((packed)) readdir; |
312 | struct { | 366 | struct { |
313 | __le32 mode; | 367 | __le32 mode; |
diff --git a/fs/ceph/ceph_strings.c b/fs/ceph/ceph_strings.c index 8e4be6a80c62..7503aee828ce 100644 --- a/fs/ceph/ceph_strings.c +++ b/fs/ceph/ceph_strings.c | |||
@@ -10,7 +10,6 @@ const char *ceph_entity_type_name(int type) | |||
10 | case CEPH_ENTITY_TYPE_OSD: return "osd"; | 10 | case CEPH_ENTITY_TYPE_OSD: return "osd"; |
11 | case CEPH_ENTITY_TYPE_MON: return "mon"; | 11 | case CEPH_ENTITY_TYPE_MON: return "mon"; |
12 | case CEPH_ENTITY_TYPE_CLIENT: return "client"; | 12 | case CEPH_ENTITY_TYPE_CLIENT: return "client"; |
13 | case CEPH_ENTITY_TYPE_ADMIN: return "admin"; | ||
14 | case CEPH_ENTITY_TYPE_AUTH: return "auth"; | 13 | case CEPH_ENTITY_TYPE_AUTH: return "auth"; |
15 | default: return "unknown"; | 14 | default: return "unknown"; |
16 | } | 15 | } |
@@ -45,6 +44,7 @@ const char *ceph_osd_op_name(int op) | |||
45 | case CEPH_OSD_OP_SETXATTRS: return "setxattrs"; | 44 | case CEPH_OSD_OP_SETXATTRS: return "setxattrs"; |
46 | case CEPH_OSD_OP_RESETXATTRS: return "resetxattrs"; | 45 | case CEPH_OSD_OP_RESETXATTRS: return "resetxattrs"; |
47 | case CEPH_OSD_OP_RMXATTR: return "rmxattr"; | 46 | case CEPH_OSD_OP_RMXATTR: return "rmxattr"; |
47 | case CEPH_OSD_OP_CMPXATTR: return "cmpxattr"; | ||
48 | 48 | ||
49 | case CEPH_OSD_OP_PULL: return "pull"; | 49 | case CEPH_OSD_OP_PULL: return "pull"; |
50 | case CEPH_OSD_OP_PUSH: return "push"; | 50 | case CEPH_OSD_OP_PUSH: return "push"; |
@@ -174,3 +174,17 @@ const char *ceph_snap_op_name(int o) | |||
174 | } | 174 | } |
175 | return "???"; | 175 | return "???"; |
176 | } | 176 | } |
177 | |||
178 | const char *ceph_pool_op_name(int op) | ||
179 | { | ||
180 | switch (op) { | ||
181 | case POOL_OP_CREATE: return "create"; | ||
182 | case POOL_OP_DELETE: return "delete"; | ||
183 | case POOL_OP_AUID_CHANGE: return "auid change"; | ||
184 | case POOL_OP_CREATE_SNAP: return "create snap"; | ||
185 | case POOL_OP_DELETE_SNAP: return "delete snap"; | ||
186 | case POOL_OP_CREATE_UNMANAGED_SNAP: return "create unmanaged snap"; | ||
187 | case POOL_OP_DELETE_UNMANAGED_SNAP: return "delete unmanaged snap"; | ||
188 | } | ||
189 | return "???"; | ||
190 | } | ||
diff --git a/fs/ceph/debugfs.c b/fs/ceph/debugfs.c index f7048da92acc..3be33fb066cc 100644 --- a/fs/ceph/debugfs.c +++ b/fs/ceph/debugfs.c | |||
@@ -113,7 +113,7 @@ static int osdmap_show(struct seq_file *s, void *p) | |||
113 | static int monc_show(struct seq_file *s, void *p) | 113 | static int monc_show(struct seq_file *s, void *p) |
114 | { | 114 | { |
115 | struct ceph_client *client = s->private; | 115 | struct ceph_client *client = s->private; |
116 | struct ceph_mon_statfs_request *req; | 116 | struct ceph_mon_generic_request *req; |
117 | struct ceph_mon_client *monc = &client->monc; | 117 | struct ceph_mon_client *monc = &client->monc; |
118 | struct rb_node *rp; | 118 | struct rb_node *rp; |
119 | 119 | ||
@@ -126,9 +126,14 @@ static int monc_show(struct seq_file *s, void *p) | |||
126 | if (monc->want_next_osdmap) | 126 | if (monc->want_next_osdmap) |
127 | seq_printf(s, "want next osdmap\n"); | 127 | seq_printf(s, "want next osdmap\n"); |
128 | 128 | ||
129 | for (rp = rb_first(&monc->statfs_request_tree); rp; rp = rb_next(rp)) { | 129 | for (rp = rb_first(&monc->generic_request_tree); rp; rp = rb_next(rp)) { |
130 | req = rb_entry(rp, struct ceph_mon_statfs_request, node); | 130 | __u16 op; |
131 | seq_printf(s, "%lld statfs\n", req->tid); | 131 | req = rb_entry(rp, struct ceph_mon_generic_request, node); |
132 | op = le16_to_cpu(req->request->hdr.type); | ||
133 | if (op == CEPH_MSG_STATFS) | ||
134 | seq_printf(s, "%lld statfs\n", req->tid); | ||
135 | else | ||
136 | seq_printf(s, "%lld unknown\n", req->tid); | ||
132 | } | 137 | } |
133 | 138 | ||
134 | mutex_unlock(&monc->mutex); | 139 | mutex_unlock(&monc->mutex); |
diff --git a/fs/ceph/dir.c b/fs/ceph/dir.c index 650d2db5ed26..4fd30900eff7 100644 --- a/fs/ceph/dir.c +++ b/fs/ceph/dir.c | |||
@@ -51,8 +51,11 @@ int ceph_init_dentry(struct dentry *dentry) | |||
51 | return -ENOMEM; /* oh well */ | 51 | return -ENOMEM; /* oh well */ |
52 | 52 | ||
53 | spin_lock(&dentry->d_lock); | 53 | spin_lock(&dentry->d_lock); |
54 | if (dentry->d_fsdata) /* lost a race */ | 54 | if (dentry->d_fsdata) { |
55 | /* lost a race */ | ||
56 | kmem_cache_free(ceph_dentry_cachep, di); | ||
55 | goto out_unlock; | 57 | goto out_unlock; |
58 | } | ||
56 | di->dentry = dentry; | 59 | di->dentry = dentry; |
57 | di->lease_session = NULL; | 60 | di->lease_session = NULL; |
58 | dentry->d_fsdata = di; | 61 | dentry->d_fsdata = di; |
@@ -125,7 +128,8 @@ more: | |||
125 | dentry = list_entry(p, struct dentry, d_u.d_child); | 128 | dentry = list_entry(p, struct dentry, d_u.d_child); |
126 | di = ceph_dentry(dentry); | 129 | di = ceph_dentry(dentry); |
127 | while (1) { | 130 | while (1) { |
128 | dout(" p %p/%p d_subdirs %p/%p\n", p->prev, p->next, | 131 | dout(" p %p/%p %s d_subdirs %p/%p\n", p->prev, p->next, |
132 | d_unhashed(dentry) ? "!hashed" : "hashed", | ||
129 | parent->d_subdirs.prev, parent->d_subdirs.next); | 133 | parent->d_subdirs.prev, parent->d_subdirs.next); |
130 | if (p == &parent->d_subdirs) { | 134 | if (p == &parent->d_subdirs) { |
131 | fi->at_end = 1; | 135 | fi->at_end = 1; |
@@ -229,6 +233,7 @@ static int ceph_readdir(struct file *filp, void *dirent, filldir_t filldir) | |||
229 | u32 ftype; | 233 | u32 ftype; |
230 | struct ceph_mds_reply_info_parsed *rinfo; | 234 | struct ceph_mds_reply_info_parsed *rinfo; |
231 | const int max_entries = client->mount_args->max_readdir; | 235 | const int max_entries = client->mount_args->max_readdir; |
236 | const int max_bytes = client->mount_args->max_readdir_bytes; | ||
232 | 237 | ||
233 | dout("readdir %p filp %p frag %u off %u\n", inode, filp, frag, off); | 238 | dout("readdir %p filp %p frag %u off %u\n", inode, filp, frag, off); |
234 | if (fi->at_end) | 239 | if (fi->at_end) |
@@ -312,6 +317,7 @@ more: | |||
312 | req->r_readdir_offset = fi->next_offset; | 317 | req->r_readdir_offset = fi->next_offset; |
313 | req->r_args.readdir.frag = cpu_to_le32(frag); | 318 | req->r_args.readdir.frag = cpu_to_le32(frag); |
314 | req->r_args.readdir.max_entries = cpu_to_le32(max_entries); | 319 | req->r_args.readdir.max_entries = cpu_to_le32(max_entries); |
320 | req->r_args.readdir.max_bytes = cpu_to_le32(max_bytes); | ||
315 | req->r_num_caps = max_entries + 1; | 321 | req->r_num_caps = max_entries + 1; |
316 | err = ceph_mdsc_do_request(mdsc, NULL, req); | 322 | err = ceph_mdsc_do_request(mdsc, NULL, req); |
317 | if (err < 0) { | 323 | if (err < 0) { |
@@ -335,7 +341,7 @@ more: | |||
335 | if (req->r_reply_info.dir_end) { | 341 | if (req->r_reply_info.dir_end) { |
336 | kfree(fi->last_name); | 342 | kfree(fi->last_name); |
337 | fi->last_name = NULL; | 343 | fi->last_name = NULL; |
338 | fi->next_offset = 0; | 344 | fi->next_offset = 2; |
339 | } else { | 345 | } else { |
340 | rinfo = &req->r_reply_info; | 346 | rinfo = &req->r_reply_info; |
341 | err = note_last_dentry(fi, | 347 | err = note_last_dentry(fi, |
@@ -478,7 +484,7 @@ static loff_t ceph_dir_llseek(struct file *file, loff_t offset, int origin) | |||
478 | struct dentry *ceph_finish_lookup(struct ceph_mds_request *req, | 484 | struct dentry *ceph_finish_lookup(struct ceph_mds_request *req, |
479 | struct dentry *dentry, int err) | 485 | struct dentry *dentry, int err) |
480 | { | 486 | { |
481 | struct ceph_client *client = ceph_client(dentry->d_sb); | 487 | struct ceph_client *client = ceph_sb_to_client(dentry->d_sb); |
482 | struct inode *parent = dentry->d_parent->d_inode; | 488 | struct inode *parent = dentry->d_parent->d_inode; |
483 | 489 | ||
484 | /* .snap dir? */ | 490 | /* .snap dir? */ |
@@ -568,7 +574,6 @@ static struct dentry *ceph_lookup(struct inode *dir, struct dentry *dentry, | |||
568 | !is_root_ceph_dentry(dir, dentry) && | 574 | !is_root_ceph_dentry(dir, dentry) && |
569 | (ci->i_ceph_flags & CEPH_I_COMPLETE) && | 575 | (ci->i_ceph_flags & CEPH_I_COMPLETE) && |
570 | (__ceph_caps_issued_mask(ci, CEPH_CAP_FILE_SHARED, 1))) { | 576 | (__ceph_caps_issued_mask(ci, CEPH_CAP_FILE_SHARED, 1))) { |
571 | di->offset = ci->i_max_offset++; | ||
572 | spin_unlock(&dir->i_lock); | 577 | spin_unlock(&dir->i_lock); |
573 | dout(" dir %p complete, -ENOENT\n", dir); | 578 | dout(" dir %p complete, -ENOENT\n", dir); |
574 | d_add(dentry, NULL); | 579 | d_add(dentry, NULL); |
@@ -888,13 +893,22 @@ static int ceph_rename(struct inode *old_dir, struct dentry *old_dentry, | |||
888 | 893 | ||
889 | /* ensure target dentry is invalidated, despite | 894 | /* ensure target dentry is invalidated, despite |
890 | rehashing bug in vfs_rename_dir */ | 895 | rehashing bug in vfs_rename_dir */ |
891 | new_dentry->d_time = jiffies; | 896 | ceph_invalidate_dentry_lease(new_dentry); |
892 | ceph_dentry(new_dentry)->lease_shared_gen = 0; | ||
893 | } | 897 | } |
894 | ceph_mdsc_put_request(req); | 898 | ceph_mdsc_put_request(req); |
895 | return err; | 899 | return err; |
896 | } | 900 | } |
897 | 901 | ||
902 | /* | ||
903 | * Ensure a dentry lease will no longer revalidate. | ||
904 | */ | ||
905 | void ceph_invalidate_dentry_lease(struct dentry *dentry) | ||
906 | { | ||
907 | spin_lock(&dentry->d_lock); | ||
908 | dentry->d_time = jiffies; | ||
909 | ceph_dentry(dentry)->lease_shared_gen = 0; | ||
910 | spin_unlock(&dentry->d_lock); | ||
911 | } | ||
898 | 912 | ||
899 | /* | 913 | /* |
900 | * Check if dentry lease is valid. If not, delete the lease. Try to | 914 | * Check if dentry lease is valid. If not, delete the lease. Try to |
@@ -972,8 +986,9 @@ static int ceph_d_revalidate(struct dentry *dentry, struct nameidata *nd) | |||
972 | { | 986 | { |
973 | struct inode *dir = dentry->d_parent->d_inode; | 987 | struct inode *dir = dentry->d_parent->d_inode; |
974 | 988 | ||
975 | dout("d_revalidate %p '%.*s' inode %p\n", dentry, | 989 | dout("d_revalidate %p '%.*s' inode %p offset %lld\n", dentry, |
976 | dentry->d_name.len, dentry->d_name.name, dentry->d_inode); | 990 | dentry->d_name.len, dentry->d_name.name, dentry->d_inode, |
991 | ceph_dentry(dentry)->offset); | ||
977 | 992 | ||
978 | /* always trust cached snapped dentries, snapdir dentry */ | 993 | /* always trust cached snapped dentries, snapdir dentry */ |
979 | if (ceph_snap(dir) != CEPH_NOSNAP) { | 994 | if (ceph_snap(dir) != CEPH_NOSNAP) { |
@@ -1050,7 +1065,7 @@ static ssize_t ceph_read_dir(struct file *file, char __user *buf, size_t size, | |||
1050 | struct ceph_inode_info *ci = ceph_inode(inode); | 1065 | struct ceph_inode_info *ci = ceph_inode(inode); |
1051 | int left; | 1066 | int left; |
1052 | 1067 | ||
1053 | if (!ceph_test_opt(ceph_client(inode->i_sb), DIRSTAT)) | 1068 | if (!ceph_test_opt(ceph_sb_to_client(inode->i_sb), DIRSTAT)) |
1054 | return -EISDIR; | 1069 | return -EISDIR; |
1055 | 1070 | ||
1056 | if (!cf->dir_info) { | 1071 | if (!cf->dir_info) { |
@@ -1152,7 +1167,7 @@ void ceph_dentry_lru_add(struct dentry *dn) | |||
1152 | dout("dentry_lru_add %p %p '%.*s'\n", di, dn, | 1167 | dout("dentry_lru_add %p %p '%.*s'\n", di, dn, |
1153 | dn->d_name.len, dn->d_name.name); | 1168 | dn->d_name.len, dn->d_name.name); |
1154 | if (di) { | 1169 | if (di) { |
1155 | mdsc = &ceph_client(dn->d_sb)->mdsc; | 1170 | mdsc = &ceph_sb_to_client(dn->d_sb)->mdsc; |
1156 | spin_lock(&mdsc->dentry_lru_lock); | 1171 | spin_lock(&mdsc->dentry_lru_lock); |
1157 | list_add_tail(&di->lru, &mdsc->dentry_lru); | 1172 | list_add_tail(&di->lru, &mdsc->dentry_lru); |
1158 | mdsc->num_dentry++; | 1173 | mdsc->num_dentry++; |
@@ -1165,10 +1180,10 @@ void ceph_dentry_lru_touch(struct dentry *dn) | |||
1165 | struct ceph_dentry_info *di = ceph_dentry(dn); | 1180 | struct ceph_dentry_info *di = ceph_dentry(dn); |
1166 | struct ceph_mds_client *mdsc; | 1181 | struct ceph_mds_client *mdsc; |
1167 | 1182 | ||
1168 | dout("dentry_lru_touch %p %p '%.*s'\n", di, dn, | 1183 | dout("dentry_lru_touch %p %p '%.*s' (offset %lld)\n", di, dn, |
1169 | dn->d_name.len, dn->d_name.name); | 1184 | dn->d_name.len, dn->d_name.name, di->offset); |
1170 | if (di) { | 1185 | if (di) { |
1171 | mdsc = &ceph_client(dn->d_sb)->mdsc; | 1186 | mdsc = &ceph_sb_to_client(dn->d_sb)->mdsc; |
1172 | spin_lock(&mdsc->dentry_lru_lock); | 1187 | spin_lock(&mdsc->dentry_lru_lock); |
1173 | list_move_tail(&di->lru, &mdsc->dentry_lru); | 1188 | list_move_tail(&di->lru, &mdsc->dentry_lru); |
1174 | spin_unlock(&mdsc->dentry_lru_lock); | 1189 | spin_unlock(&mdsc->dentry_lru_lock); |
@@ -1183,7 +1198,7 @@ void ceph_dentry_lru_del(struct dentry *dn) | |||
1183 | dout("dentry_lru_del %p %p '%.*s'\n", di, dn, | 1198 | dout("dentry_lru_del %p %p '%.*s'\n", di, dn, |
1184 | dn->d_name.len, dn->d_name.name); | 1199 | dn->d_name.len, dn->d_name.name); |
1185 | if (di) { | 1200 | if (di) { |
1186 | mdsc = &ceph_client(dn->d_sb)->mdsc; | 1201 | mdsc = &ceph_sb_to_client(dn->d_sb)->mdsc; |
1187 | spin_lock(&mdsc->dentry_lru_lock); | 1202 | spin_lock(&mdsc->dentry_lru_lock); |
1188 | list_del_init(&di->lru); | 1203 | list_del_init(&di->lru); |
1189 | mdsc->num_dentry--; | 1204 | mdsc->num_dentry--; |
diff --git a/fs/ceph/export.c b/fs/ceph/export.c index 9d67572fb328..17447644d675 100644 --- a/fs/ceph/export.c +++ b/fs/ceph/export.c | |||
@@ -93,11 +93,11 @@ static struct dentry *__fh_to_dentry(struct super_block *sb, | |||
93 | return ERR_PTR(-ESTALE); | 93 | return ERR_PTR(-ESTALE); |
94 | 94 | ||
95 | dentry = d_obtain_alias(inode); | 95 | dentry = d_obtain_alias(inode); |
96 | if (!dentry) { | 96 | if (IS_ERR(dentry)) { |
97 | pr_err("fh_to_dentry %llx -- inode %p but ENOMEM\n", | 97 | pr_err("fh_to_dentry %llx -- inode %p but ENOMEM\n", |
98 | fh->ino, inode); | 98 | fh->ino, inode); |
99 | iput(inode); | 99 | iput(inode); |
100 | return ERR_PTR(-ENOMEM); | 100 | return dentry; |
101 | } | 101 | } |
102 | err = ceph_init_dentry(dentry); | 102 | err = ceph_init_dentry(dentry); |
103 | 103 | ||
@@ -115,7 +115,7 @@ static struct dentry *__fh_to_dentry(struct super_block *sb, | |||
115 | static struct dentry *__cfh_to_dentry(struct super_block *sb, | 115 | static struct dentry *__cfh_to_dentry(struct super_block *sb, |
116 | struct ceph_nfs_confh *cfh) | 116 | struct ceph_nfs_confh *cfh) |
117 | { | 117 | { |
118 | struct ceph_mds_client *mdsc = &ceph_client(sb)->mdsc; | 118 | struct ceph_mds_client *mdsc = &ceph_sb_to_client(sb)->mdsc; |
119 | struct inode *inode; | 119 | struct inode *inode; |
120 | struct dentry *dentry; | 120 | struct dentry *dentry; |
121 | struct ceph_vino vino; | 121 | struct ceph_vino vino; |
@@ -149,11 +149,11 @@ static struct dentry *__cfh_to_dentry(struct super_block *sb, | |||
149 | } | 149 | } |
150 | 150 | ||
151 | dentry = d_obtain_alias(inode); | 151 | dentry = d_obtain_alias(inode); |
152 | if (!dentry) { | 152 | if (IS_ERR(dentry)) { |
153 | pr_err("cfh_to_dentry %llx -- inode %p but ENOMEM\n", | 153 | pr_err("cfh_to_dentry %llx -- inode %p but ENOMEM\n", |
154 | cfh->ino, inode); | 154 | cfh->ino, inode); |
155 | iput(inode); | 155 | iput(inode); |
156 | return ERR_PTR(-ENOMEM); | 156 | return dentry; |
157 | } | 157 | } |
158 | err = ceph_init_dentry(dentry); | 158 | err = ceph_init_dentry(dentry); |
159 | if (err < 0) { | 159 | if (err < 0) { |
@@ -202,11 +202,11 @@ static struct dentry *ceph_fh_to_parent(struct super_block *sb, | |||
202 | return ERR_PTR(-ESTALE); | 202 | return ERR_PTR(-ESTALE); |
203 | 203 | ||
204 | dentry = d_obtain_alias(inode); | 204 | dentry = d_obtain_alias(inode); |
205 | if (!dentry) { | 205 | if (IS_ERR(dentry)) { |
206 | pr_err("fh_to_parent %llx -- inode %p but ENOMEM\n", | 206 | pr_err("fh_to_parent %llx -- inode %p but ENOMEM\n", |
207 | cfh->ino, inode); | 207 | cfh->ino, inode); |
208 | iput(inode); | 208 | iput(inode); |
209 | return ERR_PTR(-ENOMEM); | 209 | return dentry; |
210 | } | 210 | } |
211 | err = ceph_init_dentry(dentry); | 211 | err = ceph_init_dentry(dentry); |
212 | if (err < 0) { | 212 | if (err < 0) { |
diff --git a/fs/ceph/file.c b/fs/ceph/file.c index ed6f19721d6e..6512b6701b9e 100644 --- a/fs/ceph/file.c +++ b/fs/ceph/file.c | |||
@@ -317,16 +317,16 @@ void ceph_release_page_vector(struct page **pages, int num_pages) | |||
317 | /* | 317 | /* |
318 | * allocate a vector new pages | 318 | * allocate a vector new pages |
319 | */ | 319 | */ |
320 | static struct page **alloc_page_vector(int num_pages) | 320 | struct page **ceph_alloc_page_vector(int num_pages, gfp_t flags) |
321 | { | 321 | { |
322 | struct page **pages; | 322 | struct page **pages; |
323 | int i; | 323 | int i; |
324 | 324 | ||
325 | pages = kmalloc(sizeof(*pages) * num_pages, GFP_NOFS); | 325 | pages = kmalloc(sizeof(*pages) * num_pages, flags); |
326 | if (!pages) | 326 | if (!pages) |
327 | return ERR_PTR(-ENOMEM); | 327 | return ERR_PTR(-ENOMEM); |
328 | for (i = 0; i < num_pages; i++) { | 328 | for (i = 0; i < num_pages; i++) { |
329 | pages[i] = alloc_page(GFP_NOFS); | 329 | pages[i] = __page_cache_alloc(flags); |
330 | if (pages[i] == NULL) { | 330 | if (pages[i] == NULL) { |
331 | ceph_release_page_vector(pages, i); | 331 | ceph_release_page_vector(pages, i); |
332 | return ERR_PTR(-ENOMEM); | 332 | return ERR_PTR(-ENOMEM); |
@@ -540,7 +540,7 @@ static ssize_t ceph_sync_read(struct file *file, char __user *data, | |||
540 | * in sequence. | 540 | * in sequence. |
541 | */ | 541 | */ |
542 | } else { | 542 | } else { |
543 | pages = alloc_page_vector(num_pages); | 543 | pages = ceph_alloc_page_vector(num_pages, GFP_NOFS); |
544 | } | 544 | } |
545 | if (IS_ERR(pages)) | 545 | if (IS_ERR(pages)) |
546 | return PTR_ERR(pages); | 546 | return PTR_ERR(pages); |
@@ -649,8 +649,8 @@ more: | |||
649 | do_sync, | 649 | do_sync, |
650 | ci->i_truncate_seq, ci->i_truncate_size, | 650 | ci->i_truncate_seq, ci->i_truncate_size, |
651 | &mtime, false, 2); | 651 | &mtime, false, 2); |
652 | if (IS_ERR(req)) | 652 | if (!req) |
653 | return PTR_ERR(req); | 653 | return -ENOMEM; |
654 | 654 | ||
655 | num_pages = calc_pages_for(pos, len); | 655 | num_pages = calc_pages_for(pos, len); |
656 | 656 | ||
@@ -668,7 +668,7 @@ more: | |||
668 | truncate_inode_pages_range(inode->i_mapping, pos, | 668 | truncate_inode_pages_range(inode->i_mapping, pos, |
669 | (pos+len) | (PAGE_CACHE_SIZE-1)); | 669 | (pos+len) | (PAGE_CACHE_SIZE-1)); |
670 | } else { | 670 | } else { |
671 | pages = alloc_page_vector(num_pages); | 671 | pages = ceph_alloc_page_vector(num_pages, GFP_NOFS); |
672 | if (IS_ERR(pages)) { | 672 | if (IS_ERR(pages)) { |
673 | ret = PTR_ERR(pages); | 673 | ret = PTR_ERR(pages); |
674 | goto out; | 674 | goto out; |
@@ -809,7 +809,7 @@ static ssize_t ceph_aio_write(struct kiocb *iocb, const struct iovec *iov, | |||
809 | struct file *file = iocb->ki_filp; | 809 | struct file *file = iocb->ki_filp; |
810 | struct inode *inode = file->f_dentry->d_inode; | 810 | struct inode *inode = file->f_dentry->d_inode; |
811 | struct ceph_inode_info *ci = ceph_inode(inode); | 811 | struct ceph_inode_info *ci = ceph_inode(inode); |
812 | struct ceph_osd_client *osdc = &ceph_client(inode->i_sb)->osdc; | 812 | struct ceph_osd_client *osdc = &ceph_sb_to_client(inode->i_sb)->osdc; |
813 | loff_t endoff = pos + iov->iov_len; | 813 | loff_t endoff = pos + iov->iov_len; |
814 | int got = 0; | 814 | int got = 0; |
815 | int ret, err; | 815 | int ret, err; |
@@ -844,8 +844,7 @@ retry_snap: | |||
844 | if ((ret >= 0 || ret == -EIOCBQUEUED) && | 844 | if ((ret >= 0 || ret == -EIOCBQUEUED) && |
845 | ((file->f_flags & O_SYNC) || IS_SYNC(file->f_mapping->host) | 845 | ((file->f_flags & O_SYNC) || IS_SYNC(file->f_mapping->host) |
846 | || ceph_osdmap_flag(osdc->osdmap, CEPH_OSDMAP_NEARFULL))) { | 846 | || ceph_osdmap_flag(osdc->osdmap, CEPH_OSDMAP_NEARFULL))) { |
847 | err = vfs_fsync_range(file, file->f_path.dentry, | 847 | err = vfs_fsync_range(file, pos, pos + ret - 1, 1); |
848 | pos, pos + ret - 1, 1); | ||
849 | if (err < 0) | 848 | if (err < 0) |
850 | ret = err; | 849 | ret = err; |
851 | } | 850 | } |
diff --git a/fs/ceph/inode.c b/fs/ceph/inode.c index 85b4d2ffdeba..a81b8b662c7b 100644 --- a/fs/ceph/inode.c +++ b/fs/ceph/inode.c | |||
@@ -384,7 +384,7 @@ void ceph_destroy_inode(struct inode *inode) | |||
384 | */ | 384 | */ |
385 | if (ci->i_snap_realm) { | 385 | if (ci->i_snap_realm) { |
386 | struct ceph_mds_client *mdsc = | 386 | struct ceph_mds_client *mdsc = |
387 | &ceph_client(ci->vfs_inode.i_sb)->mdsc; | 387 | &ceph_sb_to_client(ci->vfs_inode.i_sb)->mdsc; |
388 | struct ceph_snap_realm *realm = ci->i_snap_realm; | 388 | struct ceph_snap_realm *realm = ci->i_snap_realm; |
389 | 389 | ||
390 | dout(" dropping residual ref to snap realm %p\n", realm); | 390 | dout(" dropping residual ref to snap realm %p\n", realm); |
@@ -619,11 +619,12 @@ static int fill_inode(struct inode *inode, | |||
619 | memcpy(ci->i_xattrs.blob->vec.iov_base, | 619 | memcpy(ci->i_xattrs.blob->vec.iov_base, |
620 | iinfo->xattr_data, iinfo->xattr_len); | 620 | iinfo->xattr_data, iinfo->xattr_len); |
621 | ci->i_xattrs.version = le64_to_cpu(info->xattr_version); | 621 | ci->i_xattrs.version = le64_to_cpu(info->xattr_version); |
622 | xattr_blob = NULL; | ||
622 | } | 623 | } |
623 | 624 | ||
624 | inode->i_mapping->a_ops = &ceph_aops; | 625 | inode->i_mapping->a_ops = &ceph_aops; |
625 | inode->i_mapping->backing_dev_info = | 626 | inode->i_mapping->backing_dev_info = |
626 | &ceph_client(inode->i_sb)->backing_dev_info; | 627 | &ceph_sb_to_client(inode->i_sb)->backing_dev_info; |
627 | 628 | ||
628 | switch (inode->i_mode & S_IFMT) { | 629 | switch (inode->i_mode & S_IFMT) { |
629 | case S_IFIFO: | 630 | case S_IFIFO: |
@@ -674,14 +675,15 @@ static int fill_inode(struct inode *inode, | |||
674 | /* set dir completion flag? */ | 675 | /* set dir completion flag? */ |
675 | if (ci->i_files == 0 && ci->i_subdirs == 0 && | 676 | if (ci->i_files == 0 && ci->i_subdirs == 0 && |
676 | ceph_snap(inode) == CEPH_NOSNAP && | 677 | ceph_snap(inode) == CEPH_NOSNAP && |
677 | (le32_to_cpu(info->cap.caps) & CEPH_CAP_FILE_SHARED)) { | 678 | (le32_to_cpu(info->cap.caps) & CEPH_CAP_FILE_SHARED) && |
679 | (ci->i_ceph_flags & CEPH_I_COMPLETE) == 0) { | ||
678 | dout(" marking %p complete (empty)\n", inode); | 680 | dout(" marking %p complete (empty)\n", inode); |
679 | ci->i_ceph_flags |= CEPH_I_COMPLETE; | 681 | ci->i_ceph_flags |= CEPH_I_COMPLETE; |
680 | ci->i_max_offset = 2; | 682 | ci->i_max_offset = 2; |
681 | } | 683 | } |
682 | 684 | ||
683 | /* it may be better to set st_size in getattr instead? */ | 685 | /* it may be better to set st_size in getattr instead? */ |
684 | if (ceph_test_opt(ceph_client(inode->i_sb), RBYTES)) | 686 | if (ceph_test_opt(ceph_sb_to_client(inode->i_sb), RBYTES)) |
685 | inode->i_size = ci->i_rbytes; | 687 | inode->i_size = ci->i_rbytes; |
686 | break; | 688 | break; |
687 | default: | 689 | default: |
@@ -802,6 +804,37 @@ out_unlock: | |||
802 | } | 804 | } |
803 | 805 | ||
804 | /* | 806 | /* |
807 | * Set dentry's directory position based on the current dir's max, and | ||
808 | * order it in d_subdirs, so that dcache_readdir behaves. | ||
809 | */ | ||
810 | static void ceph_set_dentry_offset(struct dentry *dn) | ||
811 | { | ||
812 | struct dentry *dir = dn->d_parent; | ||
813 | struct inode *inode = dn->d_parent->d_inode; | ||
814 | struct ceph_dentry_info *di; | ||
815 | |||
816 | BUG_ON(!inode); | ||
817 | |||
818 | di = ceph_dentry(dn); | ||
819 | |||
820 | spin_lock(&inode->i_lock); | ||
821 | if ((ceph_inode(inode)->i_ceph_flags & CEPH_I_COMPLETE) == 0) { | ||
822 | spin_unlock(&inode->i_lock); | ||
823 | return; | ||
824 | } | ||
825 | di->offset = ceph_inode(inode)->i_max_offset++; | ||
826 | spin_unlock(&inode->i_lock); | ||
827 | |||
828 | spin_lock(&dcache_lock); | ||
829 | spin_lock(&dn->d_lock); | ||
830 | list_move_tail(&dir->d_subdirs, &dn->d_u.d_child); | ||
831 | dout("set_dentry_offset %p %lld (%p %p)\n", dn, di->offset, | ||
832 | dn->d_u.d_child.prev, dn->d_u.d_child.next); | ||
833 | spin_unlock(&dn->d_lock); | ||
834 | spin_unlock(&dcache_lock); | ||
835 | } | ||
836 | |||
837 | /* | ||
805 | * splice a dentry to an inode. | 838 | * splice a dentry to an inode. |
806 | * caller must hold directory i_mutex for this to be safe. | 839 | * caller must hold directory i_mutex for this to be safe. |
807 | * | 840 | * |
@@ -814,6 +847,8 @@ static struct dentry *splice_dentry(struct dentry *dn, struct inode *in, | |||
814 | { | 847 | { |
815 | struct dentry *realdn; | 848 | struct dentry *realdn; |
816 | 849 | ||
850 | BUG_ON(dn->d_inode); | ||
851 | |||
817 | /* dn must be unhashed */ | 852 | /* dn must be unhashed */ |
818 | if (!d_unhashed(dn)) | 853 | if (!d_unhashed(dn)) |
819 | d_drop(dn); | 854 | d_drop(dn); |
@@ -835,44 +870,17 @@ static struct dentry *splice_dentry(struct dentry *dn, struct inode *in, | |||
835 | dn = realdn; | 870 | dn = realdn; |
836 | } else { | 871 | } else { |
837 | BUG_ON(!ceph_dentry(dn)); | 872 | BUG_ON(!ceph_dentry(dn)); |
838 | |||
839 | dout("dn %p attached to %p ino %llx.%llx\n", | 873 | dout("dn %p attached to %p ino %llx.%llx\n", |
840 | dn, dn->d_inode, ceph_vinop(dn->d_inode)); | 874 | dn, dn->d_inode, ceph_vinop(dn->d_inode)); |
841 | } | 875 | } |
842 | if ((!prehash || *prehash) && d_unhashed(dn)) | 876 | if ((!prehash || *prehash) && d_unhashed(dn)) |
843 | d_rehash(dn); | 877 | d_rehash(dn); |
878 | ceph_set_dentry_offset(dn); | ||
844 | out: | 879 | out: |
845 | return dn; | 880 | return dn; |
846 | } | 881 | } |
847 | 882 | ||
848 | /* | 883 | /* |
849 | * Set dentry's directory position based on the current dir's max, and | ||
850 | * order it in d_subdirs, so that dcache_readdir behaves. | ||
851 | */ | ||
852 | static void ceph_set_dentry_offset(struct dentry *dn) | ||
853 | { | ||
854 | struct dentry *dir = dn->d_parent; | ||
855 | struct inode *inode = dn->d_parent->d_inode; | ||
856 | struct ceph_dentry_info *di; | ||
857 | |||
858 | BUG_ON(!inode); | ||
859 | |||
860 | di = ceph_dentry(dn); | ||
861 | |||
862 | spin_lock(&inode->i_lock); | ||
863 | di->offset = ceph_inode(inode)->i_max_offset++; | ||
864 | spin_unlock(&inode->i_lock); | ||
865 | |||
866 | spin_lock(&dcache_lock); | ||
867 | spin_lock(&dn->d_lock); | ||
868 | list_move_tail(&dir->d_subdirs, &dn->d_u.d_child); | ||
869 | dout("set_dentry_offset %p %lld (%p %p)\n", dn, di->offset, | ||
870 | dn->d_u.d_child.prev, dn->d_u.d_child.next); | ||
871 | spin_unlock(&dn->d_lock); | ||
872 | spin_unlock(&dcache_lock); | ||
873 | } | ||
874 | |||
875 | /* | ||
876 | * Incorporate results into the local cache. This is either just | 884 | * Incorporate results into the local cache. This is either just |
877 | * one inode, or a directory, dentry, and possibly linked-to inode (e.g., | 885 | * one inode, or a directory, dentry, and possibly linked-to inode (e.g., |
878 | * after a lookup). | 886 | * after a lookup). |
@@ -933,14 +941,8 @@ int ceph_fill_trace(struct super_block *sb, struct ceph_mds_request *req, | |||
933 | 941 | ||
934 | if (!rinfo->head->is_target && !rinfo->head->is_dentry) { | 942 | if (!rinfo->head->is_target && !rinfo->head->is_dentry) { |
935 | dout("fill_trace reply is empty!\n"); | 943 | dout("fill_trace reply is empty!\n"); |
936 | if (rinfo->head->result == 0 && req->r_locked_dir) { | 944 | if (rinfo->head->result == 0 && req->r_locked_dir) |
937 | struct ceph_inode_info *ci = | 945 | ceph_invalidate_dir_request(req); |
938 | ceph_inode(req->r_locked_dir); | ||
939 | dout(" clearing %p complete (empty trace)\n", | ||
940 | req->r_locked_dir); | ||
941 | ci->i_ceph_flags &= ~CEPH_I_COMPLETE; | ||
942 | ci->i_release_count++; | ||
943 | } | ||
944 | return 0; | 946 | return 0; |
945 | } | 947 | } |
946 | 948 | ||
@@ -1011,13 +1013,18 @@ int ceph_fill_trace(struct super_block *sb, struct ceph_mds_request *req, | |||
1011 | req->r_old_dentry->d_name.len, | 1013 | req->r_old_dentry->d_name.len, |
1012 | req->r_old_dentry->d_name.name, | 1014 | req->r_old_dentry->d_name.name, |
1013 | dn, dn->d_name.len, dn->d_name.name); | 1015 | dn, dn->d_name.len, dn->d_name.name); |
1016 | |||
1014 | /* ensure target dentry is invalidated, despite | 1017 | /* ensure target dentry is invalidated, despite |
1015 | rehashing bug in vfs_rename_dir */ | 1018 | rehashing bug in vfs_rename_dir */ |
1016 | dn->d_time = jiffies; | 1019 | ceph_invalidate_dentry_lease(dn); |
1017 | ceph_dentry(dn)->lease_shared_gen = 0; | 1020 | |
1018 | /* take overwritten dentry's readdir offset */ | 1021 | /* take overwritten dentry's readdir offset */ |
1022 | dout("dn %p gets %p offset %lld (old offset %lld)\n", | ||
1023 | req->r_old_dentry, dn, ceph_dentry(dn)->offset, | ||
1024 | ceph_dentry(req->r_old_dentry)->offset); | ||
1019 | ceph_dentry(req->r_old_dentry)->offset = | 1025 | ceph_dentry(req->r_old_dentry)->offset = |
1020 | ceph_dentry(dn)->offset; | 1026 | ceph_dentry(dn)->offset; |
1027 | |||
1021 | dn = req->r_old_dentry; /* use old_dentry */ | 1028 | dn = req->r_old_dentry; /* use old_dentry */ |
1022 | in = dn->d_inode; | 1029 | in = dn->d_inode; |
1023 | } | 1030 | } |
@@ -1059,7 +1066,6 @@ int ceph_fill_trace(struct super_block *sb, struct ceph_mds_request *req, | |||
1059 | goto done; | 1066 | goto done; |
1060 | } | 1067 | } |
1061 | req->r_dentry = dn; /* may have spliced */ | 1068 | req->r_dentry = dn; /* may have spliced */ |
1062 | ceph_set_dentry_offset(dn); | ||
1063 | igrab(in); | 1069 | igrab(in); |
1064 | } else if (ceph_ino(in) == vino.ino && | 1070 | } else if (ceph_ino(in) == vino.ino && |
1065 | ceph_snap(in) == vino.snap) { | 1071 | ceph_snap(in) == vino.snap) { |
@@ -1102,7 +1108,6 @@ int ceph_fill_trace(struct super_block *sb, struct ceph_mds_request *req, | |||
1102 | err = PTR_ERR(dn); | 1108 | err = PTR_ERR(dn); |
1103 | goto done; | 1109 | goto done; |
1104 | } | 1110 | } |
1105 | ceph_set_dentry_offset(dn); | ||
1106 | req->r_dentry = dn; /* may have spliced */ | 1111 | req->r_dentry = dn; /* may have spliced */ |
1107 | igrab(in); | 1112 | igrab(in); |
1108 | rinfo->head->is_dentry = 1; /* fool notrace handlers */ | 1113 | rinfo->head->is_dentry = 1; /* fool notrace handlers */ |
@@ -1429,7 +1434,7 @@ void ceph_queue_vmtruncate(struct inode *inode) | |||
1429 | { | 1434 | { |
1430 | struct ceph_inode_info *ci = ceph_inode(inode); | 1435 | struct ceph_inode_info *ci = ceph_inode(inode); |
1431 | 1436 | ||
1432 | if (queue_work(ceph_client(inode->i_sb)->trunc_wq, | 1437 | if (queue_work(ceph_sb_to_client(inode->i_sb)->trunc_wq, |
1433 | &ci->i_vmtruncate_work)) { | 1438 | &ci->i_vmtruncate_work)) { |
1434 | dout("ceph_queue_vmtruncate %p\n", inode); | 1439 | dout("ceph_queue_vmtruncate %p\n", inode); |
1435 | igrab(inode); | 1440 | igrab(inode); |
@@ -1518,7 +1523,7 @@ int ceph_setattr(struct dentry *dentry, struct iattr *attr) | |||
1518 | struct inode *parent_inode = dentry->d_parent->d_inode; | 1523 | struct inode *parent_inode = dentry->d_parent->d_inode; |
1519 | const unsigned int ia_valid = attr->ia_valid; | 1524 | const unsigned int ia_valid = attr->ia_valid; |
1520 | struct ceph_mds_request *req; | 1525 | struct ceph_mds_request *req; |
1521 | struct ceph_mds_client *mdsc = &ceph_client(dentry->d_sb)->mdsc; | 1526 | struct ceph_mds_client *mdsc = &ceph_sb_to_client(dentry->d_sb)->mdsc; |
1522 | int issued; | 1527 | int issued; |
1523 | int release = 0, dirtied = 0; | 1528 | int release = 0, dirtied = 0; |
1524 | int mask = 0; | 1529 | int mask = 0; |
diff --git a/fs/ceph/ioctl.c b/fs/ceph/ioctl.c index 8a5bcae62846..d085f07756b4 100644 --- a/fs/ceph/ioctl.c +++ b/fs/ceph/ioctl.c | |||
@@ -98,7 +98,7 @@ static long ceph_ioctl_get_dataloc(struct file *file, void __user *arg) | |||
98 | struct ceph_ioctl_dataloc dl; | 98 | struct ceph_ioctl_dataloc dl; |
99 | struct inode *inode = file->f_dentry->d_inode; | 99 | struct inode *inode = file->f_dentry->d_inode; |
100 | struct ceph_inode_info *ci = ceph_inode(inode); | 100 | struct ceph_inode_info *ci = ceph_inode(inode); |
101 | struct ceph_osd_client *osdc = &ceph_client(inode->i_sb)->osdc; | 101 | struct ceph_osd_client *osdc = &ceph_sb_to_client(inode->i_sb)->osdc; |
102 | u64 len = 1, olen; | 102 | u64 len = 1, olen; |
103 | u64 tmp; | 103 | u64 tmp; |
104 | struct ceph_object_layout ol; | 104 | struct ceph_object_layout ol; |
diff --git a/fs/ceph/mds_client.c b/fs/ceph/mds_client.c index 24561a557e01..885aa5710cfd 100644 --- a/fs/ceph/mds_client.c +++ b/fs/ceph/mds_client.c | |||
@@ -40,7 +40,7 @@ | |||
40 | static void __wake_requests(struct ceph_mds_client *mdsc, | 40 | static void __wake_requests(struct ceph_mds_client *mdsc, |
41 | struct list_head *head); | 41 | struct list_head *head); |
42 | 42 | ||
43 | const static struct ceph_connection_operations mds_con_ops; | 43 | static const struct ceph_connection_operations mds_con_ops; |
44 | 44 | ||
45 | 45 | ||
46 | /* | 46 | /* |
@@ -665,10 +665,10 @@ static struct ceph_msg *create_session_msg(u32 op, u64 seq) | |||
665 | struct ceph_msg *msg; | 665 | struct ceph_msg *msg; |
666 | struct ceph_mds_session_head *h; | 666 | struct ceph_mds_session_head *h; |
667 | 667 | ||
668 | msg = ceph_msg_new(CEPH_MSG_CLIENT_SESSION, sizeof(*h), 0, 0, NULL); | 668 | msg = ceph_msg_new(CEPH_MSG_CLIENT_SESSION, sizeof(*h), GFP_NOFS); |
669 | if (IS_ERR(msg)) { | 669 | if (!msg) { |
670 | pr_err("create_session_msg ENOMEM creating msg\n"); | 670 | pr_err("create_session_msg ENOMEM creating msg\n"); |
671 | return ERR_PTR(PTR_ERR(msg)); | 671 | return NULL; |
672 | } | 672 | } |
673 | h = msg->front.iov_base; | 673 | h = msg->front.iov_base; |
674 | h->op = cpu_to_le32(op); | 674 | h->op = cpu_to_le32(op); |
@@ -687,7 +687,6 @@ static int __open_session(struct ceph_mds_client *mdsc, | |||
687 | struct ceph_msg *msg; | 687 | struct ceph_msg *msg; |
688 | int mstate; | 688 | int mstate; |
689 | int mds = session->s_mds; | 689 | int mds = session->s_mds; |
690 | int err = 0; | ||
691 | 690 | ||
692 | /* wait for mds to go active? */ | 691 | /* wait for mds to go active? */ |
693 | mstate = ceph_mdsmap_get_state(mdsc->mdsmap, mds); | 692 | mstate = ceph_mdsmap_get_state(mdsc->mdsmap, mds); |
@@ -698,13 +697,9 @@ static int __open_session(struct ceph_mds_client *mdsc, | |||
698 | 697 | ||
699 | /* send connect message */ | 698 | /* send connect message */ |
700 | msg = create_session_msg(CEPH_SESSION_REQUEST_OPEN, session->s_seq); | 699 | msg = create_session_msg(CEPH_SESSION_REQUEST_OPEN, session->s_seq); |
701 | if (IS_ERR(msg)) { | 700 | if (!msg) |
702 | err = PTR_ERR(msg); | 701 | return -ENOMEM; |
703 | goto out; | ||
704 | } | ||
705 | ceph_con_send(&session->s_con, msg); | 702 | ceph_con_send(&session->s_con, msg); |
706 | |||
707 | out: | ||
708 | return 0; | 703 | return 0; |
709 | } | 704 | } |
710 | 705 | ||
@@ -804,12 +799,49 @@ out: | |||
804 | } | 799 | } |
805 | 800 | ||
806 | static int remove_session_caps_cb(struct inode *inode, struct ceph_cap *cap, | 801 | static int remove_session_caps_cb(struct inode *inode, struct ceph_cap *cap, |
807 | void *arg) | 802 | void *arg) |
808 | { | 803 | { |
809 | struct ceph_inode_info *ci = ceph_inode(inode); | 804 | struct ceph_inode_info *ci = ceph_inode(inode); |
805 | int drop = 0; | ||
806 | |||
810 | dout("removing cap %p, ci is %p, inode is %p\n", | 807 | dout("removing cap %p, ci is %p, inode is %p\n", |
811 | cap, ci, &ci->vfs_inode); | 808 | cap, ci, &ci->vfs_inode); |
812 | ceph_remove_cap(cap); | 809 | spin_lock(&inode->i_lock); |
810 | __ceph_remove_cap(cap); | ||
811 | if (!__ceph_is_any_real_caps(ci)) { | ||
812 | struct ceph_mds_client *mdsc = | ||
813 | &ceph_sb_to_client(inode->i_sb)->mdsc; | ||
814 | |||
815 | spin_lock(&mdsc->cap_dirty_lock); | ||
816 | if (!list_empty(&ci->i_dirty_item)) { | ||
817 | pr_info(" dropping dirty %s state for %p %lld\n", | ||
818 | ceph_cap_string(ci->i_dirty_caps), | ||
819 | inode, ceph_ino(inode)); | ||
820 | ci->i_dirty_caps = 0; | ||
821 | list_del_init(&ci->i_dirty_item); | ||
822 | drop = 1; | ||
823 | } | ||
824 | if (!list_empty(&ci->i_flushing_item)) { | ||
825 | pr_info(" dropping dirty+flushing %s state for %p %lld\n", | ||
826 | ceph_cap_string(ci->i_flushing_caps), | ||
827 | inode, ceph_ino(inode)); | ||
828 | ci->i_flushing_caps = 0; | ||
829 | list_del_init(&ci->i_flushing_item); | ||
830 | mdsc->num_cap_flushing--; | ||
831 | drop = 1; | ||
832 | } | ||
833 | if (drop && ci->i_wrbuffer_ref) { | ||
834 | pr_info(" dropping dirty data for %p %lld\n", | ||
835 | inode, ceph_ino(inode)); | ||
836 | ci->i_wrbuffer_ref = 0; | ||
837 | ci->i_wrbuffer_ref_head = 0; | ||
838 | drop++; | ||
839 | } | ||
840 | spin_unlock(&mdsc->cap_dirty_lock); | ||
841 | } | ||
842 | spin_unlock(&inode->i_lock); | ||
843 | while (drop--) | ||
844 | iput(inode); | ||
813 | return 0; | 845 | return 0; |
814 | } | 846 | } |
815 | 847 | ||
@@ -821,6 +853,7 @@ static void remove_session_caps(struct ceph_mds_session *session) | |||
821 | dout("remove_session_caps on %p\n", session); | 853 | dout("remove_session_caps on %p\n", session); |
822 | iterate_session_caps(session, remove_session_caps_cb, NULL); | 854 | iterate_session_caps(session, remove_session_caps_cb, NULL); |
823 | BUG_ON(session->s_nr_caps > 0); | 855 | BUG_ON(session->s_nr_caps > 0); |
856 | BUG_ON(!list_empty(&session->s_cap_flushing)); | ||
824 | cleanup_cap_releases(session); | 857 | cleanup_cap_releases(session); |
825 | } | 858 | } |
826 | 859 | ||
@@ -883,8 +916,8 @@ static int send_renew_caps(struct ceph_mds_client *mdsc, | |||
883 | ceph_mds_state_name(state)); | 916 | ceph_mds_state_name(state)); |
884 | msg = create_session_msg(CEPH_SESSION_REQUEST_RENEWCAPS, | 917 | msg = create_session_msg(CEPH_SESSION_REQUEST_RENEWCAPS, |
885 | ++session->s_renew_seq); | 918 | ++session->s_renew_seq); |
886 | if (IS_ERR(msg)) | 919 | if (!msg) |
887 | return PTR_ERR(msg); | 920 | return -ENOMEM; |
888 | ceph_con_send(&session->s_con, msg); | 921 | ceph_con_send(&session->s_con, msg); |
889 | return 0; | 922 | return 0; |
890 | } | 923 | } |
@@ -931,17 +964,15 @@ static int request_close_session(struct ceph_mds_client *mdsc, | |||
931 | struct ceph_mds_session *session) | 964 | struct ceph_mds_session *session) |
932 | { | 965 | { |
933 | struct ceph_msg *msg; | 966 | struct ceph_msg *msg; |
934 | int err = 0; | ||
935 | 967 | ||
936 | dout("request_close_session mds%d state %s seq %lld\n", | 968 | dout("request_close_session mds%d state %s seq %lld\n", |
937 | session->s_mds, session_state_name(session->s_state), | 969 | session->s_mds, session_state_name(session->s_state), |
938 | session->s_seq); | 970 | session->s_seq); |
939 | msg = create_session_msg(CEPH_SESSION_REQUEST_CLOSE, session->s_seq); | 971 | msg = create_session_msg(CEPH_SESSION_REQUEST_CLOSE, session->s_seq); |
940 | if (IS_ERR(msg)) | 972 | if (!msg) |
941 | err = PTR_ERR(msg); | 973 | return -ENOMEM; |
942 | else | 974 | ceph_con_send(&session->s_con, msg); |
943 | ceph_con_send(&session->s_con, msg); | 975 | return 0; |
944 | return err; | ||
945 | } | 976 | } |
946 | 977 | ||
947 | /* | 978 | /* |
@@ -1059,7 +1090,7 @@ static int add_cap_releases(struct ceph_mds_client *mdsc, | |||
1059 | while (session->s_num_cap_releases < session->s_nr_caps + extra) { | 1090 | while (session->s_num_cap_releases < session->s_nr_caps + extra) { |
1060 | spin_unlock(&session->s_cap_lock); | 1091 | spin_unlock(&session->s_cap_lock); |
1061 | msg = ceph_msg_new(CEPH_MSG_CLIENT_CAPRELEASE, PAGE_CACHE_SIZE, | 1092 | msg = ceph_msg_new(CEPH_MSG_CLIENT_CAPRELEASE, PAGE_CACHE_SIZE, |
1062 | 0, 0, NULL); | 1093 | GFP_NOFS); |
1063 | if (!msg) | 1094 | if (!msg) |
1064 | goto out_unlocked; | 1095 | goto out_unlocked; |
1065 | dout("add_cap_releases %p msg %p now %d\n", session, msg, | 1096 | dout("add_cap_releases %p msg %p now %d\n", session, msg, |
@@ -1151,10 +1182,8 @@ static void send_cap_releases(struct ceph_mds_client *mdsc, | |||
1151 | struct ceph_msg *msg; | 1182 | struct ceph_msg *msg; |
1152 | 1183 | ||
1153 | dout("send_cap_releases mds%d\n", session->s_mds); | 1184 | dout("send_cap_releases mds%d\n", session->s_mds); |
1154 | while (1) { | 1185 | spin_lock(&session->s_cap_lock); |
1155 | spin_lock(&session->s_cap_lock); | 1186 | while (!list_empty(&session->s_cap_releases_done)) { |
1156 | if (list_empty(&session->s_cap_releases_done)) | ||
1157 | break; | ||
1158 | msg = list_first_entry(&session->s_cap_releases_done, | 1187 | msg = list_first_entry(&session->s_cap_releases_done, |
1159 | struct ceph_msg, list_head); | 1188 | struct ceph_msg, list_head); |
1160 | list_del_init(&msg->list_head); | 1189 | list_del_init(&msg->list_head); |
@@ -1162,10 +1191,49 @@ static void send_cap_releases(struct ceph_mds_client *mdsc, | |||
1162 | msg->hdr.front_len = cpu_to_le32(msg->front.iov_len); | 1191 | msg->hdr.front_len = cpu_to_le32(msg->front.iov_len); |
1163 | dout("send_cap_releases mds%d %p\n", session->s_mds, msg); | 1192 | dout("send_cap_releases mds%d %p\n", session->s_mds, msg); |
1164 | ceph_con_send(&session->s_con, msg); | 1193 | ceph_con_send(&session->s_con, msg); |
1194 | spin_lock(&session->s_cap_lock); | ||
1165 | } | 1195 | } |
1166 | spin_unlock(&session->s_cap_lock); | 1196 | spin_unlock(&session->s_cap_lock); |
1167 | } | 1197 | } |
1168 | 1198 | ||
1199 | static void discard_cap_releases(struct ceph_mds_client *mdsc, | ||
1200 | struct ceph_mds_session *session) | ||
1201 | { | ||
1202 | struct ceph_msg *msg; | ||
1203 | struct ceph_mds_cap_release *head; | ||
1204 | unsigned num; | ||
1205 | |||
1206 | dout("discard_cap_releases mds%d\n", session->s_mds); | ||
1207 | spin_lock(&session->s_cap_lock); | ||
1208 | |||
1209 | /* zero out the in-progress message */ | ||
1210 | msg = list_first_entry(&session->s_cap_releases, | ||
1211 | struct ceph_msg, list_head); | ||
1212 | head = msg->front.iov_base; | ||
1213 | num = le32_to_cpu(head->num); | ||
1214 | dout("discard_cap_releases mds%d %p %u\n", session->s_mds, msg, num); | ||
1215 | head->num = cpu_to_le32(0); | ||
1216 | session->s_num_cap_releases += num; | ||
1217 | |||
1218 | /* requeue completed messages */ | ||
1219 | while (!list_empty(&session->s_cap_releases_done)) { | ||
1220 | msg = list_first_entry(&session->s_cap_releases_done, | ||
1221 | struct ceph_msg, list_head); | ||
1222 | list_del_init(&msg->list_head); | ||
1223 | |||
1224 | head = msg->front.iov_base; | ||
1225 | num = le32_to_cpu(head->num); | ||
1226 | dout("discard_cap_releases mds%d %p %u\n", session->s_mds, msg, | ||
1227 | num); | ||
1228 | session->s_num_cap_releases += num; | ||
1229 | head->num = cpu_to_le32(0); | ||
1230 | msg->front.iov_len = sizeof(*head); | ||
1231 | list_add(&msg->list_head, &session->s_cap_releases); | ||
1232 | } | ||
1233 | |||
1234 | spin_unlock(&session->s_cap_lock); | ||
1235 | } | ||
1236 | |||
1169 | /* | 1237 | /* |
1170 | * requests | 1238 | * requests |
1171 | */ | 1239 | */ |
@@ -1181,6 +1249,7 @@ ceph_mdsc_create_request(struct ceph_mds_client *mdsc, int op, int mode) | |||
1181 | if (!req) | 1249 | if (!req) |
1182 | return ERR_PTR(-ENOMEM); | 1250 | return ERR_PTR(-ENOMEM); |
1183 | 1251 | ||
1252 | mutex_init(&req->r_fill_mutex); | ||
1184 | req->r_started = jiffies; | 1253 | req->r_started = jiffies; |
1185 | req->r_resend_mds = -1; | 1254 | req->r_resend_mds = -1; |
1186 | INIT_LIST_HEAD(&req->r_unsafe_dir_item); | 1255 | INIT_LIST_HEAD(&req->r_unsafe_dir_item); |
@@ -1251,7 +1320,7 @@ retry: | |||
1251 | len += 1 + temp->d_name.len; | 1320 | len += 1 + temp->d_name.len; |
1252 | temp = temp->d_parent; | 1321 | temp = temp->d_parent; |
1253 | if (temp == NULL) { | 1322 | if (temp == NULL) { |
1254 | pr_err("build_path_dentry corrupt dentry %p\n", dentry); | 1323 | pr_err("build_path corrupt dentry %p\n", dentry); |
1255 | return ERR_PTR(-EINVAL); | 1324 | return ERR_PTR(-EINVAL); |
1256 | } | 1325 | } |
1257 | } | 1326 | } |
@@ -1267,7 +1336,7 @@ retry: | |||
1267 | struct inode *inode = temp->d_inode; | 1336 | struct inode *inode = temp->d_inode; |
1268 | 1337 | ||
1269 | if (inode && ceph_snap(inode) == CEPH_SNAPDIR) { | 1338 | if (inode && ceph_snap(inode) == CEPH_SNAPDIR) { |
1270 | dout("build_path_dentry path+%d: %p SNAPDIR\n", | 1339 | dout("build_path path+%d: %p SNAPDIR\n", |
1271 | pos, temp); | 1340 | pos, temp); |
1272 | } else if (stop_on_nosnap && inode && | 1341 | } else if (stop_on_nosnap && inode && |
1273 | ceph_snap(inode) == CEPH_NOSNAP) { | 1342 | ceph_snap(inode) == CEPH_NOSNAP) { |
@@ -1278,20 +1347,18 @@ retry: | |||
1278 | break; | 1347 | break; |
1279 | strncpy(path + pos, temp->d_name.name, | 1348 | strncpy(path + pos, temp->d_name.name, |
1280 | temp->d_name.len); | 1349 | temp->d_name.len); |
1281 | dout("build_path_dentry path+%d: %p '%.*s'\n", | ||
1282 | pos, temp, temp->d_name.len, path + pos); | ||
1283 | } | 1350 | } |
1284 | if (pos) | 1351 | if (pos) |
1285 | path[--pos] = '/'; | 1352 | path[--pos] = '/'; |
1286 | temp = temp->d_parent; | 1353 | temp = temp->d_parent; |
1287 | if (temp == NULL) { | 1354 | if (temp == NULL) { |
1288 | pr_err("build_path_dentry corrupt dentry\n"); | 1355 | pr_err("build_path corrupt dentry\n"); |
1289 | kfree(path); | 1356 | kfree(path); |
1290 | return ERR_PTR(-EINVAL); | 1357 | return ERR_PTR(-EINVAL); |
1291 | } | 1358 | } |
1292 | } | 1359 | } |
1293 | if (pos != 0) { | 1360 | if (pos != 0) { |
1294 | pr_err("build_path_dentry did not end path lookup where " | 1361 | pr_err("build_path did not end path lookup where " |
1295 | "expected, namelen is %d, pos is %d\n", len, pos); | 1362 | "expected, namelen is %d, pos is %d\n", len, pos); |
1296 | /* presumably this is only possible if racing with a | 1363 | /* presumably this is only possible if racing with a |
1297 | rename of one of the parent directories (we can not | 1364 | rename of one of the parent directories (we can not |
@@ -1303,7 +1370,7 @@ retry: | |||
1303 | 1370 | ||
1304 | *base = ceph_ino(temp->d_inode); | 1371 | *base = ceph_ino(temp->d_inode); |
1305 | *plen = len; | 1372 | *plen = len; |
1306 | dout("build_path_dentry on %p %d built %llx '%.*s'\n", | 1373 | dout("build_path on %p %d built %llx '%.*s'\n", |
1307 | dentry, atomic_read(&dentry->d_count), *base, len, path); | 1374 | dentry, atomic_read(&dentry->d_count), *base, len, path); |
1308 | return path; | 1375 | return path; |
1309 | } | 1376 | } |
@@ -1426,9 +1493,11 @@ static struct ceph_msg *create_request_message(struct ceph_mds_client *mdsc, | |||
1426 | if (req->r_old_dentry_drop) | 1493 | if (req->r_old_dentry_drop) |
1427 | len += req->r_old_dentry->d_name.len; | 1494 | len += req->r_old_dentry->d_name.len; |
1428 | 1495 | ||
1429 | msg = ceph_msg_new(CEPH_MSG_CLIENT_REQUEST, len, 0, 0, NULL); | 1496 | msg = ceph_msg_new(CEPH_MSG_CLIENT_REQUEST, len, GFP_NOFS); |
1430 | if (IS_ERR(msg)) | 1497 | if (!msg) { |
1498 | msg = ERR_PTR(-ENOMEM); | ||
1431 | goto out_free2; | 1499 | goto out_free2; |
1500 | } | ||
1432 | 1501 | ||
1433 | msg->hdr.tid = cpu_to_le64(req->r_tid); | 1502 | msg->hdr.tid = cpu_to_le64(req->r_tid); |
1434 | 1503 | ||
@@ -1517,9 +1586,9 @@ static int __prepare_send_request(struct ceph_mds_client *mdsc, | |||
1517 | } | 1586 | } |
1518 | msg = create_request_message(mdsc, req, mds); | 1587 | msg = create_request_message(mdsc, req, mds); |
1519 | if (IS_ERR(msg)) { | 1588 | if (IS_ERR(msg)) { |
1520 | req->r_reply = ERR_PTR(PTR_ERR(msg)); | 1589 | req->r_err = PTR_ERR(msg); |
1521 | complete_request(mdsc, req); | 1590 | complete_request(mdsc, req); |
1522 | return -PTR_ERR(msg); | 1591 | return PTR_ERR(msg); |
1523 | } | 1592 | } |
1524 | req->r_request = msg; | 1593 | req->r_request = msg; |
1525 | 1594 | ||
@@ -1552,7 +1621,7 @@ static int __do_request(struct ceph_mds_client *mdsc, | |||
1552 | int mds = -1; | 1621 | int mds = -1; |
1553 | int err = -EAGAIN; | 1622 | int err = -EAGAIN; |
1554 | 1623 | ||
1555 | if (req->r_reply) | 1624 | if (req->r_err || req->r_got_result) |
1556 | goto out; | 1625 | goto out; |
1557 | 1626 | ||
1558 | if (req->r_timeout && | 1627 | if (req->r_timeout && |
@@ -1609,7 +1678,7 @@ out: | |||
1609 | return err; | 1678 | return err; |
1610 | 1679 | ||
1611 | finish: | 1680 | finish: |
1612 | req->r_reply = ERR_PTR(err); | 1681 | req->r_err = err; |
1613 | complete_request(mdsc, req); | 1682 | complete_request(mdsc, req); |
1614 | goto out; | 1683 | goto out; |
1615 | } | 1684 | } |
@@ -1630,10 +1699,9 @@ static void __wake_requests(struct ceph_mds_client *mdsc, | |||
1630 | 1699 | ||
1631 | /* | 1700 | /* |
1632 | * Wake up threads with requests pending for @mds, so that they can | 1701 | * Wake up threads with requests pending for @mds, so that they can |
1633 | * resubmit their requests to a possibly different mds. If @all is set, | 1702 | * resubmit their requests to a possibly different mds. |
1634 | * wake up if their requests has been forwarded to @mds, too. | ||
1635 | */ | 1703 | */ |
1636 | static void kick_requests(struct ceph_mds_client *mdsc, int mds, int all) | 1704 | static void kick_requests(struct ceph_mds_client *mdsc, int mds) |
1637 | { | 1705 | { |
1638 | struct ceph_mds_request *req; | 1706 | struct ceph_mds_request *req; |
1639 | struct rb_node *p; | 1707 | struct rb_node *p; |
@@ -1689,64 +1757,78 @@ int ceph_mdsc_do_request(struct ceph_mds_client *mdsc, | |||
1689 | __register_request(mdsc, req, dir); | 1757 | __register_request(mdsc, req, dir); |
1690 | __do_request(mdsc, req); | 1758 | __do_request(mdsc, req); |
1691 | 1759 | ||
1692 | /* wait */ | 1760 | if (req->r_err) { |
1693 | if (!req->r_reply) { | 1761 | err = req->r_err; |
1694 | mutex_unlock(&mdsc->mutex); | 1762 | __unregister_request(mdsc, req); |
1695 | if (req->r_timeout) { | 1763 | dout("do_request early error %d\n", err); |
1696 | err = (long)wait_for_completion_interruptible_timeout( | 1764 | goto out; |
1697 | &req->r_completion, req->r_timeout); | ||
1698 | if (err == 0) | ||
1699 | req->r_reply = ERR_PTR(-EIO); | ||
1700 | else if (err < 0) | ||
1701 | req->r_reply = ERR_PTR(err); | ||
1702 | } else { | ||
1703 | err = wait_for_completion_interruptible( | ||
1704 | &req->r_completion); | ||
1705 | if (err) | ||
1706 | req->r_reply = ERR_PTR(err); | ||
1707 | } | ||
1708 | mutex_lock(&mdsc->mutex); | ||
1709 | } | 1765 | } |
1710 | 1766 | ||
1711 | if (IS_ERR(req->r_reply)) { | 1767 | /* wait */ |
1712 | err = PTR_ERR(req->r_reply); | 1768 | mutex_unlock(&mdsc->mutex); |
1713 | req->r_reply = NULL; | 1769 | dout("do_request waiting\n"); |
1770 | if (req->r_timeout) { | ||
1771 | err = (long)wait_for_completion_interruptible_timeout( | ||
1772 | &req->r_completion, req->r_timeout); | ||
1773 | if (err == 0) | ||
1774 | err = -EIO; | ||
1775 | } else { | ||
1776 | err = wait_for_completion_interruptible(&req->r_completion); | ||
1777 | } | ||
1778 | dout("do_request waited, got %d\n", err); | ||
1779 | mutex_lock(&mdsc->mutex); | ||
1714 | 1780 | ||
1715 | if (err == -ERESTARTSYS) { | 1781 | /* only abort if we didn't race with a real reply */ |
1716 | /* aborted */ | 1782 | if (req->r_got_result) { |
1717 | req->r_aborted = true; | 1783 | err = le32_to_cpu(req->r_reply_info.head->result); |
1784 | } else if (err < 0) { | ||
1785 | dout("aborted request %lld with %d\n", req->r_tid, err); | ||
1718 | 1786 | ||
1719 | if (req->r_locked_dir && | 1787 | /* |
1720 | (req->r_op & CEPH_MDS_OP_WRITE)) { | 1788 | * ensure we aren't running concurrently with |
1721 | struct ceph_inode_info *ci = | 1789 | * ceph_fill_trace or ceph_readdir_prepopulate, which |
1722 | ceph_inode(req->r_locked_dir); | 1790 | * rely on locks (dir mutex) held by our caller. |
1791 | */ | ||
1792 | mutex_lock(&req->r_fill_mutex); | ||
1793 | req->r_err = err; | ||
1794 | req->r_aborted = true; | ||
1795 | mutex_unlock(&req->r_fill_mutex); | ||
1723 | 1796 | ||
1724 | dout("aborted, clearing I_COMPLETE on %p\n", | 1797 | if (req->r_locked_dir && |
1725 | req->r_locked_dir); | 1798 | (req->r_op & CEPH_MDS_OP_WRITE)) |
1726 | spin_lock(&req->r_locked_dir->i_lock); | 1799 | ceph_invalidate_dir_request(req); |
1727 | ci->i_ceph_flags &= ~CEPH_I_COMPLETE; | ||
1728 | ci->i_release_count++; | ||
1729 | spin_unlock(&req->r_locked_dir->i_lock); | ||
1730 | } | ||
1731 | } else { | ||
1732 | /* clean up this request */ | ||
1733 | __unregister_request(mdsc, req); | ||
1734 | if (!list_empty(&req->r_unsafe_item)) | ||
1735 | list_del_init(&req->r_unsafe_item); | ||
1736 | complete(&req->r_safe_completion); | ||
1737 | } | ||
1738 | } else if (req->r_err) { | ||
1739 | err = req->r_err; | ||
1740 | } else { | 1800 | } else { |
1741 | err = le32_to_cpu(req->r_reply_info.head->result); | 1801 | err = req->r_err; |
1742 | } | 1802 | } |
1743 | mutex_unlock(&mdsc->mutex); | ||
1744 | 1803 | ||
1804 | out: | ||
1805 | mutex_unlock(&mdsc->mutex); | ||
1745 | dout("do_request %p done, result %d\n", req, err); | 1806 | dout("do_request %p done, result %d\n", req, err); |
1746 | return err; | 1807 | return err; |
1747 | } | 1808 | } |
1748 | 1809 | ||
1749 | /* | 1810 | /* |
1811 | * Invalidate dir I_COMPLETE, dentry lease state on an aborted MDS | ||
1812 | * namespace request. | ||
1813 | */ | ||
1814 | void ceph_invalidate_dir_request(struct ceph_mds_request *req) | ||
1815 | { | ||
1816 | struct inode *inode = req->r_locked_dir; | ||
1817 | struct ceph_inode_info *ci = ceph_inode(inode); | ||
1818 | |||
1819 | dout("invalidate_dir_request %p (I_COMPLETE, lease(s))\n", inode); | ||
1820 | spin_lock(&inode->i_lock); | ||
1821 | ci->i_ceph_flags &= ~CEPH_I_COMPLETE; | ||
1822 | ci->i_release_count++; | ||
1823 | spin_unlock(&inode->i_lock); | ||
1824 | |||
1825 | if (req->r_dentry) | ||
1826 | ceph_invalidate_dentry_lease(req->r_dentry); | ||
1827 | if (req->r_old_dentry) | ||
1828 | ceph_invalidate_dentry_lease(req->r_old_dentry); | ||
1829 | } | ||
1830 | |||
1831 | /* | ||
1750 | * Handle mds reply. | 1832 | * Handle mds reply. |
1751 | * | 1833 | * |
1752 | * We take the session mutex and parse and process the reply immediately. | 1834 | * We take the session mutex and parse and process the reply immediately. |
@@ -1797,6 +1879,12 @@ static void handle_reply(struct ceph_mds_session *session, struct ceph_msg *msg) | |||
1797 | mutex_unlock(&mdsc->mutex); | 1879 | mutex_unlock(&mdsc->mutex); |
1798 | goto out; | 1880 | goto out; |
1799 | } | 1881 | } |
1882 | if (req->r_got_safe && !head->safe) { | ||
1883 | pr_warning("got unsafe after safe on %llu from mds%d\n", | ||
1884 | tid, mds); | ||
1885 | mutex_unlock(&mdsc->mutex); | ||
1886 | goto out; | ||
1887 | } | ||
1800 | 1888 | ||
1801 | result = le32_to_cpu(head->result); | 1889 | result = le32_to_cpu(head->result); |
1802 | 1890 | ||
@@ -1838,11 +1926,7 @@ static void handle_reply(struct ceph_mds_session *session, struct ceph_msg *msg) | |||
1838 | mutex_unlock(&mdsc->mutex); | 1926 | mutex_unlock(&mdsc->mutex); |
1839 | goto out; | 1927 | goto out; |
1840 | } | 1928 | } |
1841 | } | 1929 | } else { |
1842 | |||
1843 | BUG_ON(req->r_reply); | ||
1844 | |||
1845 | if (!head->safe) { | ||
1846 | req->r_got_unsafe = true; | 1930 | req->r_got_unsafe = true; |
1847 | list_add_tail(&req->r_unsafe_item, &req->r_session->s_unsafe); | 1931 | list_add_tail(&req->r_unsafe_item, &req->r_session->s_unsafe); |
1848 | } | 1932 | } |
@@ -1871,21 +1955,30 @@ static void handle_reply(struct ceph_mds_session *session, struct ceph_msg *msg) | |||
1871 | } | 1955 | } |
1872 | 1956 | ||
1873 | /* insert trace into our cache */ | 1957 | /* insert trace into our cache */ |
1958 | mutex_lock(&req->r_fill_mutex); | ||
1874 | err = ceph_fill_trace(mdsc->client->sb, req, req->r_session); | 1959 | err = ceph_fill_trace(mdsc->client->sb, req, req->r_session); |
1875 | if (err == 0) { | 1960 | if (err == 0) { |
1876 | if (result == 0 && rinfo->dir_nr) | 1961 | if (result == 0 && rinfo->dir_nr) |
1877 | ceph_readdir_prepopulate(req, req->r_session); | 1962 | ceph_readdir_prepopulate(req, req->r_session); |
1878 | ceph_unreserve_caps(&req->r_caps_reservation); | 1963 | ceph_unreserve_caps(&req->r_caps_reservation); |
1879 | } | 1964 | } |
1965 | mutex_unlock(&req->r_fill_mutex); | ||
1880 | 1966 | ||
1881 | up_read(&mdsc->snap_rwsem); | 1967 | up_read(&mdsc->snap_rwsem); |
1882 | out_err: | 1968 | out_err: |
1883 | if (err) { | 1969 | mutex_lock(&mdsc->mutex); |
1884 | req->r_err = err; | 1970 | if (!req->r_aborted) { |
1971 | if (err) { | ||
1972 | req->r_err = err; | ||
1973 | } else { | ||
1974 | req->r_reply = msg; | ||
1975 | ceph_msg_get(msg); | ||
1976 | req->r_got_result = true; | ||
1977 | } | ||
1885 | } else { | 1978 | } else { |
1886 | req->r_reply = msg; | 1979 | dout("reply arrived after request %lld was aborted\n", tid); |
1887 | ceph_msg_get(msg); | ||
1888 | } | 1980 | } |
1981 | mutex_unlock(&mdsc->mutex); | ||
1889 | 1982 | ||
1890 | add_cap_releases(mdsc, req->r_session, -1); | 1983 | add_cap_releases(mdsc, req->r_session, -1); |
1891 | mutex_unlock(&session->s_mutex); | 1984 | mutex_unlock(&session->s_mutex); |
@@ -1984,6 +2077,8 @@ static void handle_session(struct ceph_mds_session *session, | |||
1984 | 2077 | ||
1985 | switch (op) { | 2078 | switch (op) { |
1986 | case CEPH_SESSION_OPEN: | 2079 | case CEPH_SESSION_OPEN: |
2080 | if (session->s_state == CEPH_MDS_SESSION_RECONNECTING) | ||
2081 | pr_info("mds%d reconnect success\n", session->s_mds); | ||
1987 | session->s_state = CEPH_MDS_SESSION_OPEN; | 2082 | session->s_state = CEPH_MDS_SESSION_OPEN; |
1988 | renewed_caps(mdsc, session, 0); | 2083 | renewed_caps(mdsc, session, 0); |
1989 | wake = 1; | 2084 | wake = 1; |
@@ -1997,10 +2092,12 @@ static void handle_session(struct ceph_mds_session *session, | |||
1997 | break; | 2092 | break; |
1998 | 2093 | ||
1999 | case CEPH_SESSION_CLOSE: | 2094 | case CEPH_SESSION_CLOSE: |
2095 | if (session->s_state == CEPH_MDS_SESSION_RECONNECTING) | ||
2096 | pr_info("mds%d reconnect denied\n", session->s_mds); | ||
2000 | remove_session_caps(session); | 2097 | remove_session_caps(session); |
2001 | wake = 1; /* for good measure */ | 2098 | wake = 1; /* for good measure */ |
2002 | complete(&mdsc->session_close_waiters); | 2099 | complete(&mdsc->session_close_waiters); |
2003 | kick_requests(mdsc, mds, 0); /* cur only */ | 2100 | kick_requests(mdsc, mds); |
2004 | break; | 2101 | break; |
2005 | 2102 | ||
2006 | case CEPH_SESSION_STALE: | 2103 | case CEPH_SESSION_STALE: |
@@ -2132,54 +2229,44 @@ out: | |||
2132 | * | 2229 | * |
2133 | * called with mdsc->mutex held. | 2230 | * called with mdsc->mutex held. |
2134 | */ | 2231 | */ |
2135 | static void send_mds_reconnect(struct ceph_mds_client *mdsc, int mds) | 2232 | static void send_mds_reconnect(struct ceph_mds_client *mdsc, |
2233 | struct ceph_mds_session *session) | ||
2136 | { | 2234 | { |
2137 | struct ceph_mds_session *session = NULL; | ||
2138 | struct ceph_msg *reply; | 2235 | struct ceph_msg *reply; |
2139 | struct rb_node *p; | 2236 | struct rb_node *p; |
2237 | int mds = session->s_mds; | ||
2140 | int err = -ENOMEM; | 2238 | int err = -ENOMEM; |
2141 | struct ceph_pagelist *pagelist; | 2239 | struct ceph_pagelist *pagelist; |
2142 | 2240 | ||
2143 | pr_info("reconnect to recovering mds%d\n", mds); | 2241 | pr_info("mds%d reconnect start\n", mds); |
2144 | 2242 | ||
2145 | pagelist = kmalloc(sizeof(*pagelist), GFP_NOFS); | 2243 | pagelist = kmalloc(sizeof(*pagelist), GFP_NOFS); |
2146 | if (!pagelist) | 2244 | if (!pagelist) |
2147 | goto fail_nopagelist; | 2245 | goto fail_nopagelist; |
2148 | ceph_pagelist_init(pagelist); | 2246 | ceph_pagelist_init(pagelist); |
2149 | 2247 | ||
2150 | reply = ceph_msg_new(CEPH_MSG_CLIENT_RECONNECT, 0, 0, 0, NULL); | 2248 | reply = ceph_msg_new(CEPH_MSG_CLIENT_RECONNECT, 0, GFP_NOFS); |
2151 | if (IS_ERR(reply)) { | 2249 | if (!reply) |
2152 | err = PTR_ERR(reply); | ||
2153 | goto fail_nomsg; | 2250 | goto fail_nomsg; |
2154 | } | ||
2155 | |||
2156 | /* find session */ | ||
2157 | session = __ceph_lookup_mds_session(mdsc, mds); | ||
2158 | mutex_unlock(&mdsc->mutex); /* drop lock for duration */ | ||
2159 | 2251 | ||
2160 | if (session) { | 2252 | mutex_lock(&session->s_mutex); |
2161 | mutex_lock(&session->s_mutex); | 2253 | session->s_state = CEPH_MDS_SESSION_RECONNECTING; |
2254 | session->s_seq = 0; | ||
2162 | 2255 | ||
2163 | session->s_state = CEPH_MDS_SESSION_RECONNECTING; | 2256 | ceph_con_open(&session->s_con, |
2164 | session->s_seq = 0; | 2257 | ceph_mdsmap_get_addr(mdsc->mdsmap, mds)); |
2165 | 2258 | ||
2166 | ceph_con_open(&session->s_con, | 2259 | /* replay unsafe requests */ |
2167 | ceph_mdsmap_get_addr(mdsc->mdsmap, mds)); | 2260 | replay_unsafe_requests(mdsc, session); |
2168 | |||
2169 | /* replay unsafe requests */ | ||
2170 | replay_unsafe_requests(mdsc, session); | ||
2171 | } else { | ||
2172 | dout("no session for mds%d, will send short reconnect\n", | ||
2173 | mds); | ||
2174 | } | ||
2175 | 2261 | ||
2176 | down_read(&mdsc->snap_rwsem); | 2262 | down_read(&mdsc->snap_rwsem); |
2177 | 2263 | ||
2178 | if (!session) | ||
2179 | goto send; | ||
2180 | dout("session %p state %s\n", session, | 2264 | dout("session %p state %s\n", session, |
2181 | session_state_name(session->s_state)); | 2265 | session_state_name(session->s_state)); |
2182 | 2266 | ||
2267 | /* drop old cap expires; we're about to reestablish that state */ | ||
2268 | discard_cap_releases(mdsc, session); | ||
2269 | |||
2183 | /* traverse this session's caps */ | 2270 | /* traverse this session's caps */ |
2184 | err = ceph_pagelist_encode_32(pagelist, session->s_nr_caps); | 2271 | err = ceph_pagelist_encode_32(pagelist, session->s_nr_caps); |
2185 | if (err) | 2272 | if (err) |
@@ -2208,36 +2295,29 @@ static void send_mds_reconnect(struct ceph_mds_client *mdsc, int mds) | |||
2208 | goto fail; | 2295 | goto fail; |
2209 | } | 2296 | } |
2210 | 2297 | ||
2211 | send: | ||
2212 | reply->pagelist = pagelist; | 2298 | reply->pagelist = pagelist; |
2213 | reply->hdr.data_len = cpu_to_le32(pagelist->length); | 2299 | reply->hdr.data_len = cpu_to_le32(pagelist->length); |
2214 | reply->nr_pages = calc_pages_for(0, pagelist->length); | 2300 | reply->nr_pages = calc_pages_for(0, pagelist->length); |
2215 | ceph_con_send(&session->s_con, reply); | 2301 | ceph_con_send(&session->s_con, reply); |
2216 | 2302 | ||
2217 | session->s_state = CEPH_MDS_SESSION_OPEN; | ||
2218 | mutex_unlock(&session->s_mutex); | 2303 | mutex_unlock(&session->s_mutex); |
2219 | 2304 | ||
2220 | mutex_lock(&mdsc->mutex); | 2305 | mutex_lock(&mdsc->mutex); |
2221 | __wake_requests(mdsc, &session->s_waiting); | 2306 | __wake_requests(mdsc, &session->s_waiting); |
2222 | mutex_unlock(&mdsc->mutex); | 2307 | mutex_unlock(&mdsc->mutex); |
2223 | 2308 | ||
2224 | ceph_put_mds_session(session); | ||
2225 | |||
2226 | up_read(&mdsc->snap_rwsem); | 2309 | up_read(&mdsc->snap_rwsem); |
2227 | mutex_lock(&mdsc->mutex); | ||
2228 | return; | 2310 | return; |
2229 | 2311 | ||
2230 | fail: | 2312 | fail: |
2231 | ceph_msg_put(reply); | 2313 | ceph_msg_put(reply); |
2232 | up_read(&mdsc->snap_rwsem); | 2314 | up_read(&mdsc->snap_rwsem); |
2233 | mutex_unlock(&session->s_mutex); | 2315 | mutex_unlock(&session->s_mutex); |
2234 | ceph_put_mds_session(session); | ||
2235 | fail_nomsg: | 2316 | fail_nomsg: |
2236 | ceph_pagelist_release(pagelist); | 2317 | ceph_pagelist_release(pagelist); |
2237 | kfree(pagelist); | 2318 | kfree(pagelist); |
2238 | fail_nopagelist: | 2319 | fail_nopagelist: |
2239 | pr_err("error %d preparing reconnect for mds%d\n", err, mds); | 2320 | pr_err("error %d preparing reconnect for mds%d\n", err, mds); |
2240 | mutex_lock(&mdsc->mutex); | ||
2241 | return; | 2321 | return; |
2242 | } | 2322 | } |
2243 | 2323 | ||
@@ -2290,7 +2370,7 @@ static void check_new_map(struct ceph_mds_client *mdsc, | |||
2290 | } | 2370 | } |
2291 | 2371 | ||
2292 | /* kick any requests waiting on the recovering mds */ | 2372 | /* kick any requests waiting on the recovering mds */ |
2293 | kick_requests(mdsc, i, 1); | 2373 | kick_requests(mdsc, i); |
2294 | } else if (oldstate == newstate) { | 2374 | } else if (oldstate == newstate) { |
2295 | continue; /* nothing new with this mds */ | 2375 | continue; /* nothing new with this mds */ |
2296 | } | 2376 | } |
@@ -2299,22 +2379,21 @@ static void check_new_map(struct ceph_mds_client *mdsc, | |||
2299 | * send reconnect? | 2379 | * send reconnect? |
2300 | */ | 2380 | */ |
2301 | if (s->s_state == CEPH_MDS_SESSION_RESTARTING && | 2381 | if (s->s_state == CEPH_MDS_SESSION_RESTARTING && |
2302 | newstate >= CEPH_MDS_STATE_RECONNECT) | 2382 | newstate >= CEPH_MDS_STATE_RECONNECT) { |
2303 | send_mds_reconnect(mdsc, i); | 2383 | mutex_unlock(&mdsc->mutex); |
2384 | send_mds_reconnect(mdsc, s); | ||
2385 | mutex_lock(&mdsc->mutex); | ||
2386 | } | ||
2304 | 2387 | ||
2305 | /* | 2388 | /* |
2306 | * kick requests on any mds that has gone active. | 2389 | * kick request on any mds that has gone active. |
2307 | * | ||
2308 | * kick requests on cur or forwarder: we may have sent | ||
2309 | * the request to mds1, mds1 told us it forwarded it | ||
2310 | * to mds2, but then we learn mds1 failed and can't be | ||
2311 | * sure it successfully forwarded our request before | ||
2312 | * it died. | ||
2313 | */ | 2390 | */ |
2314 | if (oldstate < CEPH_MDS_STATE_ACTIVE && | 2391 | if (oldstate < CEPH_MDS_STATE_ACTIVE && |
2315 | newstate >= CEPH_MDS_STATE_ACTIVE) { | 2392 | newstate >= CEPH_MDS_STATE_ACTIVE) { |
2316 | pr_info("mds%d reconnect completed\n", s->s_mds); | 2393 | if (oldstate != CEPH_MDS_STATE_CREATING && |
2317 | kick_requests(mdsc, i, 1); | 2394 | oldstate != CEPH_MDS_STATE_STARTING) |
2395 | pr_info("mds%d recovery completed\n", s->s_mds); | ||
2396 | kick_requests(mdsc, i); | ||
2318 | ceph_kick_flushing_caps(mdsc, s); | 2397 | ceph_kick_flushing_caps(mdsc, s); |
2319 | wake_up_session_caps(s, 1); | 2398 | wake_up_session_caps(s, 1); |
2320 | } | 2399 | } |
@@ -2457,8 +2536,8 @@ void ceph_mdsc_lease_send_msg(struct ceph_mds_session *session, | |||
2457 | dnamelen = dentry->d_name.len; | 2536 | dnamelen = dentry->d_name.len; |
2458 | len += dnamelen; | 2537 | len += dnamelen; |
2459 | 2538 | ||
2460 | msg = ceph_msg_new(CEPH_MSG_CLIENT_LEASE, len, 0, 0, NULL); | 2539 | msg = ceph_msg_new(CEPH_MSG_CLIENT_LEASE, len, GFP_NOFS); |
2461 | if (IS_ERR(msg)) | 2540 | if (!msg) |
2462 | return; | 2541 | return; |
2463 | lease = msg->front.iov_base; | 2542 | lease = msg->front.iov_base; |
2464 | lease->action = action; | 2543 | lease->action = action; |
@@ -2603,7 +2682,9 @@ static void delayed_work(struct work_struct *work) | |||
2603 | else | 2682 | else |
2604 | ceph_con_keepalive(&s->s_con); | 2683 | ceph_con_keepalive(&s->s_con); |
2605 | add_cap_releases(mdsc, s, -1); | 2684 | add_cap_releases(mdsc, s, -1); |
2606 | send_cap_releases(mdsc, s); | 2685 | if (s->s_state == CEPH_MDS_SESSION_OPEN || |
2686 | s->s_state == CEPH_MDS_SESSION_HUNG) | ||
2687 | send_cap_releases(mdsc, s); | ||
2607 | mutex_unlock(&s->s_mutex); | 2688 | mutex_unlock(&s->s_mutex); |
2608 | ceph_put_mds_session(s); | 2689 | ceph_put_mds_session(s); |
2609 | 2690 | ||
@@ -2620,6 +2701,9 @@ int ceph_mdsc_init(struct ceph_mds_client *mdsc, struct ceph_client *client) | |||
2620 | mdsc->client = client; | 2701 | mdsc->client = client; |
2621 | mutex_init(&mdsc->mutex); | 2702 | mutex_init(&mdsc->mutex); |
2622 | mdsc->mdsmap = kzalloc(sizeof(*mdsc->mdsmap), GFP_NOFS); | 2703 | mdsc->mdsmap = kzalloc(sizeof(*mdsc->mdsmap), GFP_NOFS); |
2704 | if (mdsc->mdsmap == NULL) | ||
2705 | return -ENOMEM; | ||
2706 | |||
2623 | init_completion(&mdsc->safe_umount_waiters); | 2707 | init_completion(&mdsc->safe_umount_waiters); |
2624 | init_completion(&mdsc->session_close_waiters); | 2708 | init_completion(&mdsc->session_close_waiters); |
2625 | INIT_LIST_HEAD(&mdsc->waiting_for_map); | 2709 | INIT_LIST_HEAD(&mdsc->waiting_for_map); |
@@ -2645,6 +2729,7 @@ int ceph_mdsc_init(struct ceph_mds_client *mdsc, struct ceph_client *client) | |||
2645 | init_waitqueue_head(&mdsc->cap_flushing_wq); | 2729 | init_waitqueue_head(&mdsc->cap_flushing_wq); |
2646 | spin_lock_init(&mdsc->dentry_lru_lock); | 2730 | spin_lock_init(&mdsc->dentry_lru_lock); |
2647 | INIT_LIST_HEAD(&mdsc->dentry_lru); | 2731 | INIT_LIST_HEAD(&mdsc->dentry_lru); |
2732 | |||
2648 | return 0; | 2733 | return 0; |
2649 | } | 2734 | } |
2650 | 2735 | ||
@@ -2740,6 +2825,9 @@ void ceph_mdsc_sync(struct ceph_mds_client *mdsc) | |||
2740 | { | 2825 | { |
2741 | u64 want_tid, want_flush; | 2826 | u64 want_tid, want_flush; |
2742 | 2827 | ||
2828 | if (mdsc->client->mount_state == CEPH_MOUNT_SHUTDOWN) | ||
2829 | return; | ||
2830 | |||
2743 | dout("sync\n"); | 2831 | dout("sync\n"); |
2744 | mutex_lock(&mdsc->mutex); | 2832 | mutex_lock(&mdsc->mutex); |
2745 | want_tid = mdsc->last_tid; | 2833 | want_tid = mdsc->last_tid; |
@@ -2922,9 +3010,10 @@ static void con_put(struct ceph_connection *con) | |||
2922 | static void peer_reset(struct ceph_connection *con) | 3010 | static void peer_reset(struct ceph_connection *con) |
2923 | { | 3011 | { |
2924 | struct ceph_mds_session *s = con->private; | 3012 | struct ceph_mds_session *s = con->private; |
3013 | struct ceph_mds_client *mdsc = s->s_mdsc; | ||
2925 | 3014 | ||
2926 | pr_err("mds%d gave us the boot. IMPLEMENT RECONNECT.\n", | 3015 | pr_warning("mds%d closed our session\n", s->s_mds); |
2927 | s->s_mds); | 3016 | send_mds_reconnect(mdsc, s); |
2928 | } | 3017 | } |
2929 | 3018 | ||
2930 | static void dispatch(struct ceph_connection *con, struct ceph_msg *msg) | 3019 | static void dispatch(struct ceph_connection *con, struct ceph_msg *msg) |
@@ -3031,7 +3120,7 @@ static int invalidate_authorizer(struct ceph_connection *con) | |||
3031 | return ceph_monc_validate_auth(&mdsc->client->monc); | 3120 | return ceph_monc_validate_auth(&mdsc->client->monc); |
3032 | } | 3121 | } |
3033 | 3122 | ||
3034 | const static struct ceph_connection_operations mds_con_ops = { | 3123 | static const struct ceph_connection_operations mds_con_ops = { |
3035 | .get = con_get, | 3124 | .get = con_get, |
3036 | .put = con_put, | 3125 | .put = con_put, |
3037 | .dispatch = dispatch, | 3126 | .dispatch = dispatch, |
diff --git a/fs/ceph/mds_client.h b/fs/ceph/mds_client.h index 961cc6f65878..d9936c4f1212 100644 --- a/fs/ceph/mds_client.h +++ b/fs/ceph/mds_client.h | |||
@@ -165,6 +165,8 @@ struct ceph_mds_request { | |||
165 | struct inode *r_locked_dir; /* dir (if any) i_mutex locked by vfs */ | 165 | struct inode *r_locked_dir; /* dir (if any) i_mutex locked by vfs */ |
166 | struct inode *r_target_inode; /* resulting inode */ | 166 | struct inode *r_target_inode; /* resulting inode */ |
167 | 167 | ||
168 | struct mutex r_fill_mutex; | ||
169 | |||
168 | union ceph_mds_request_args r_args; | 170 | union ceph_mds_request_args r_args; |
169 | int r_fmode; /* file mode, if expecting cap */ | 171 | int r_fmode; /* file mode, if expecting cap */ |
170 | 172 | ||
@@ -213,7 +215,7 @@ struct ceph_mds_request { | |||
213 | struct completion r_safe_completion; | 215 | struct completion r_safe_completion; |
214 | ceph_mds_request_callback_t r_callback; | 216 | ceph_mds_request_callback_t r_callback; |
215 | struct list_head r_unsafe_item; /* per-session unsafe list item */ | 217 | struct list_head r_unsafe_item; /* per-session unsafe list item */ |
216 | bool r_got_unsafe, r_got_safe; | 218 | bool r_got_unsafe, r_got_safe, r_got_result; |
217 | 219 | ||
218 | bool r_did_prepopulate; | 220 | bool r_did_prepopulate; |
219 | u32 r_readdir_offset; | 221 | u32 r_readdir_offset; |
@@ -301,6 +303,8 @@ extern void ceph_mdsc_lease_release(struct ceph_mds_client *mdsc, | |||
301 | struct inode *inode, | 303 | struct inode *inode, |
302 | struct dentry *dn, int mask); | 304 | struct dentry *dn, int mask); |
303 | 305 | ||
306 | extern void ceph_invalidate_dir_request(struct ceph_mds_request *req); | ||
307 | |||
304 | extern struct ceph_mds_request * | 308 | extern struct ceph_mds_request * |
305 | ceph_mdsc_create_request(struct ceph_mds_client *mdsc, int op, int mode); | 309 | ceph_mdsc_create_request(struct ceph_mds_client *mdsc, int op, int mode); |
306 | extern void ceph_mdsc_submit_request(struct ceph_mds_client *mdsc, | 310 | extern void ceph_mdsc_submit_request(struct ceph_mds_client *mdsc, |
diff --git a/fs/ceph/messenger.c b/fs/ceph/messenger.c index cd4fadb6491a..60b74839ebec 100644 --- a/fs/ceph/messenger.c +++ b/fs/ceph/messenger.c | |||
@@ -39,18 +39,6 @@ static void queue_con(struct ceph_connection *con); | |||
39 | static void con_work(struct work_struct *); | 39 | static void con_work(struct work_struct *); |
40 | static void ceph_fault(struct ceph_connection *con); | 40 | static void ceph_fault(struct ceph_connection *con); |
41 | 41 | ||
42 | const char *ceph_name_type_str(int t) | ||
43 | { | ||
44 | switch (t) { | ||
45 | case CEPH_ENTITY_TYPE_MON: return "mon"; | ||
46 | case CEPH_ENTITY_TYPE_MDS: return "mds"; | ||
47 | case CEPH_ENTITY_TYPE_OSD: return "osd"; | ||
48 | case CEPH_ENTITY_TYPE_CLIENT: return "client"; | ||
49 | case CEPH_ENTITY_TYPE_ADMIN: return "admin"; | ||
50 | default: return "???"; | ||
51 | } | ||
52 | } | ||
53 | |||
54 | /* | 42 | /* |
55 | * nicely render a sockaddr as a string. | 43 | * nicely render a sockaddr as a string. |
56 | */ | 44 | */ |
@@ -340,6 +328,7 @@ static void reset_connection(struct ceph_connection *con) | |||
340 | ceph_msg_put(con->out_msg); | 328 | ceph_msg_put(con->out_msg); |
341 | con->out_msg = NULL; | 329 | con->out_msg = NULL; |
342 | } | 330 | } |
331 | con->out_keepalive_pending = false; | ||
343 | con->in_seq = 0; | 332 | con->in_seq = 0; |
344 | con->in_seq_acked = 0; | 333 | con->in_seq_acked = 0; |
345 | } | 334 | } |
@@ -357,6 +346,7 @@ void ceph_con_close(struct ceph_connection *con) | |||
357 | clear_bit(WRITE_PENDING, &con->state); | 346 | clear_bit(WRITE_PENDING, &con->state); |
358 | mutex_lock(&con->mutex); | 347 | mutex_lock(&con->mutex); |
359 | reset_connection(con); | 348 | reset_connection(con); |
349 | con->peer_global_seq = 0; | ||
360 | cancel_delayed_work(&con->work); | 350 | cancel_delayed_work(&con->work); |
361 | mutex_unlock(&con->mutex); | 351 | mutex_unlock(&con->mutex); |
362 | queue_con(con); | 352 | queue_con(con); |
@@ -661,7 +651,7 @@ static void prepare_write_connect(struct ceph_messenger *msgr, | |||
661 | dout("prepare_write_connect %p cseq=%d gseq=%d proto=%d\n", con, | 651 | dout("prepare_write_connect %p cseq=%d gseq=%d proto=%d\n", con, |
662 | con->connect_seq, global_seq, proto); | 652 | con->connect_seq, global_seq, proto); |
663 | 653 | ||
664 | con->out_connect.features = CEPH_FEATURE_SUPPORTED; | 654 | con->out_connect.features = CEPH_FEATURE_SUPPORTED_CLIENT; |
665 | con->out_connect.host_type = cpu_to_le32(CEPH_ENTITY_TYPE_CLIENT); | 655 | con->out_connect.host_type = cpu_to_le32(CEPH_ENTITY_TYPE_CLIENT); |
666 | con->out_connect.connect_seq = cpu_to_le32(con->connect_seq); | 656 | con->out_connect.connect_seq = cpu_to_le32(con->connect_seq); |
667 | con->out_connect.global_seq = cpu_to_le32(global_seq); | 657 | con->out_connect.global_seq = cpu_to_le32(global_seq); |
@@ -1124,8 +1114,8 @@ static void fail_protocol(struct ceph_connection *con) | |||
1124 | 1114 | ||
1125 | static int process_connect(struct ceph_connection *con) | 1115 | static int process_connect(struct ceph_connection *con) |
1126 | { | 1116 | { |
1127 | u64 sup_feat = CEPH_FEATURE_SUPPORTED; | 1117 | u64 sup_feat = CEPH_FEATURE_SUPPORTED_CLIENT; |
1128 | u64 req_feat = CEPH_FEATURE_REQUIRED; | 1118 | u64 req_feat = CEPH_FEATURE_REQUIRED_CLIENT; |
1129 | u64 server_feat = le64_to_cpu(con->in_reply.features); | 1119 | u64 server_feat = le64_to_cpu(con->in_reply.features); |
1130 | 1120 | ||
1131 | dout("process_connect on %p tag %d\n", con, (int)con->in_tag); | 1121 | dout("process_connect on %p tag %d\n", con, (int)con->in_tag); |
@@ -1233,6 +1223,7 @@ static int process_connect(struct ceph_connection *con) | |||
1233 | clear_bit(CONNECTING, &con->state); | 1223 | clear_bit(CONNECTING, &con->state); |
1234 | con->peer_global_seq = le32_to_cpu(con->in_reply.global_seq); | 1224 | con->peer_global_seq = le32_to_cpu(con->in_reply.global_seq); |
1235 | con->connect_seq++; | 1225 | con->connect_seq++; |
1226 | con->peer_features = server_feat; | ||
1236 | dout("process_connect got READY gseq %d cseq %d (%d)\n", | 1227 | dout("process_connect got READY gseq %d cseq %d (%d)\n", |
1237 | con->peer_global_seq, | 1228 | con->peer_global_seq, |
1238 | le32_to_cpu(con->in_reply.connect_seq), | 1229 | le32_to_cpu(con->in_reply.connect_seq), |
@@ -1402,19 +1393,17 @@ static int read_partial_message(struct ceph_connection *con) | |||
1402 | con->in_msg = ceph_alloc_msg(con, &con->in_hdr, &skip); | 1393 | con->in_msg = ceph_alloc_msg(con, &con->in_hdr, &skip); |
1403 | if (skip) { | 1394 | if (skip) { |
1404 | /* skip this message */ | 1395 | /* skip this message */ |
1405 | dout("alloc_msg returned NULL, skipping message\n"); | 1396 | dout("alloc_msg said skip message\n"); |
1406 | con->in_base_pos = -front_len - middle_len - data_len - | 1397 | con->in_base_pos = -front_len - middle_len - data_len - |
1407 | sizeof(m->footer); | 1398 | sizeof(m->footer); |
1408 | con->in_tag = CEPH_MSGR_TAG_READY; | 1399 | con->in_tag = CEPH_MSGR_TAG_READY; |
1409 | con->in_seq++; | 1400 | con->in_seq++; |
1410 | return 0; | 1401 | return 0; |
1411 | } | 1402 | } |
1412 | if (IS_ERR(con->in_msg)) { | 1403 | if (!con->in_msg) { |
1413 | ret = PTR_ERR(con->in_msg); | ||
1414 | con->in_msg = NULL; | ||
1415 | con->error_msg = | 1404 | con->error_msg = |
1416 | "error allocating memory for incoming message"; | 1405 | "error allocating memory for incoming message"; |
1417 | return ret; | 1406 | return -ENOMEM; |
1418 | } | 1407 | } |
1419 | m = con->in_msg; | 1408 | m = con->in_msg; |
1420 | m->front.iov_len = 0; /* haven't read it yet */ | 1409 | m->front.iov_len = 0; /* haven't read it yet */ |
@@ -1514,14 +1503,14 @@ static void process_message(struct ceph_connection *con) | |||
1514 | 1503 | ||
1515 | /* if first message, set peer_name */ | 1504 | /* if first message, set peer_name */ |
1516 | if (con->peer_name.type == 0) | 1505 | if (con->peer_name.type == 0) |
1517 | con->peer_name = msg->hdr.src.name; | 1506 | con->peer_name = msg->hdr.src; |
1518 | 1507 | ||
1519 | con->in_seq++; | 1508 | con->in_seq++; |
1520 | mutex_unlock(&con->mutex); | 1509 | mutex_unlock(&con->mutex); |
1521 | 1510 | ||
1522 | dout("===== %p %llu from %s%lld %d=%s len %d+%d (%u %u %u) =====\n", | 1511 | dout("===== %p %llu from %s%lld %d=%s len %d+%d (%u %u %u) =====\n", |
1523 | msg, le64_to_cpu(msg->hdr.seq), | 1512 | msg, le64_to_cpu(msg->hdr.seq), |
1524 | ENTITY_NAME(msg->hdr.src.name), | 1513 | ENTITY_NAME(msg->hdr.src), |
1525 | le16_to_cpu(msg->hdr.type), | 1514 | le16_to_cpu(msg->hdr.type), |
1526 | ceph_msg_type_name(le16_to_cpu(msg->hdr.type)), | 1515 | ceph_msg_type_name(le16_to_cpu(msg->hdr.type)), |
1527 | le32_to_cpu(msg->hdr.front_len), | 1516 | le32_to_cpu(msg->hdr.front_len), |
@@ -1546,7 +1535,6 @@ static int try_write(struct ceph_connection *con) | |||
1546 | dout("try_write start %p state %lu nref %d\n", con, con->state, | 1535 | dout("try_write start %p state %lu nref %d\n", con, con->state, |
1547 | atomic_read(&con->nref)); | 1536 | atomic_read(&con->nref)); |
1548 | 1537 | ||
1549 | mutex_lock(&con->mutex); | ||
1550 | more: | 1538 | more: |
1551 | dout("try_write out_kvec_bytes %d\n", con->out_kvec_bytes); | 1539 | dout("try_write out_kvec_bytes %d\n", con->out_kvec_bytes); |
1552 | 1540 | ||
@@ -1639,7 +1627,6 @@ do_next: | |||
1639 | done: | 1627 | done: |
1640 | ret = 0; | 1628 | ret = 0; |
1641 | out: | 1629 | out: |
1642 | mutex_unlock(&con->mutex); | ||
1643 | dout("try_write done on %p\n", con); | 1630 | dout("try_write done on %p\n", con); |
1644 | return ret; | 1631 | return ret; |
1645 | } | 1632 | } |
@@ -1651,7 +1638,6 @@ out: | |||
1651 | */ | 1638 | */ |
1652 | static int try_read(struct ceph_connection *con) | 1639 | static int try_read(struct ceph_connection *con) |
1653 | { | 1640 | { |
1654 | struct ceph_messenger *msgr; | ||
1655 | int ret = -1; | 1641 | int ret = -1; |
1656 | 1642 | ||
1657 | if (!con->sock) | 1643 | if (!con->sock) |
@@ -1661,9 +1647,6 @@ static int try_read(struct ceph_connection *con) | |||
1661 | return 0; | 1647 | return 0; |
1662 | 1648 | ||
1663 | dout("try_read start on %p\n", con); | 1649 | dout("try_read start on %p\n", con); |
1664 | msgr = con->msgr; | ||
1665 | |||
1666 | mutex_lock(&con->mutex); | ||
1667 | 1650 | ||
1668 | more: | 1651 | more: |
1669 | dout("try_read tag %d in_base_pos %d\n", (int)con->in_tag, | 1652 | dout("try_read tag %d in_base_pos %d\n", (int)con->in_tag, |
@@ -1758,7 +1741,6 @@ more: | |||
1758 | done: | 1741 | done: |
1759 | ret = 0; | 1742 | ret = 0; |
1760 | out: | 1743 | out: |
1761 | mutex_unlock(&con->mutex); | ||
1762 | dout("try_read done on %p\n", con); | 1744 | dout("try_read done on %p\n", con); |
1763 | return ret; | 1745 | return ret; |
1764 | 1746 | ||
@@ -1830,6 +1812,8 @@ more: | |||
1830 | dout("con_work %p start, clearing QUEUED\n", con); | 1812 | dout("con_work %p start, clearing QUEUED\n", con); |
1831 | clear_bit(QUEUED, &con->state); | 1813 | clear_bit(QUEUED, &con->state); |
1832 | 1814 | ||
1815 | mutex_lock(&con->mutex); | ||
1816 | |||
1833 | if (test_bit(CLOSED, &con->state)) { /* e.g. if we are replaced */ | 1817 | if (test_bit(CLOSED, &con->state)) { /* e.g. if we are replaced */ |
1834 | dout("con_work CLOSED\n"); | 1818 | dout("con_work CLOSED\n"); |
1835 | con_close_socket(con); | 1819 | con_close_socket(con); |
@@ -1844,11 +1828,16 @@ more: | |||
1844 | if (test_and_clear_bit(SOCK_CLOSED, &con->state) || | 1828 | if (test_and_clear_bit(SOCK_CLOSED, &con->state) || |
1845 | try_read(con) < 0 || | 1829 | try_read(con) < 0 || |
1846 | try_write(con) < 0) { | 1830 | try_write(con) < 0) { |
1831 | mutex_unlock(&con->mutex); | ||
1847 | backoff = 1; | 1832 | backoff = 1; |
1848 | ceph_fault(con); /* error/fault path */ | 1833 | ceph_fault(con); /* error/fault path */ |
1834 | goto done_unlocked; | ||
1849 | } | 1835 | } |
1850 | 1836 | ||
1851 | done: | 1837 | done: |
1838 | mutex_unlock(&con->mutex); | ||
1839 | |||
1840 | done_unlocked: | ||
1852 | clear_bit(BUSY, &con->state); | 1841 | clear_bit(BUSY, &con->state); |
1853 | dout("con->state=%lu\n", con->state); | 1842 | dout("con->state=%lu\n", con->state); |
1854 | if (test_bit(QUEUED, &con->state)) { | 1843 | if (test_bit(QUEUED, &con->state)) { |
@@ -1947,7 +1936,7 @@ struct ceph_messenger *ceph_messenger_create(struct ceph_entity_addr *myaddr) | |||
1947 | 1936 | ||
1948 | /* the zero page is needed if a request is "canceled" while the message | 1937 | /* the zero page is needed if a request is "canceled" while the message |
1949 | * is being written over the socket */ | 1938 | * is being written over the socket */ |
1950 | msgr->zero_page = alloc_page(GFP_KERNEL | __GFP_ZERO); | 1939 | msgr->zero_page = __page_cache_alloc(GFP_KERNEL | __GFP_ZERO); |
1951 | if (!msgr->zero_page) { | 1940 | if (!msgr->zero_page) { |
1952 | kfree(msgr); | 1941 | kfree(msgr); |
1953 | return ERR_PTR(-ENOMEM); | 1942 | return ERR_PTR(-ENOMEM); |
@@ -1987,9 +1976,7 @@ void ceph_con_send(struct ceph_connection *con, struct ceph_msg *msg) | |||
1987 | } | 1976 | } |
1988 | 1977 | ||
1989 | /* set src+dst */ | 1978 | /* set src+dst */ |
1990 | msg->hdr.src.name = con->msgr->inst.name; | 1979 | msg->hdr.src = con->msgr->inst.name; |
1991 | msg->hdr.src.addr = con->msgr->my_enc_addr; | ||
1992 | msg->hdr.orig_src = msg->hdr.src; | ||
1993 | 1980 | ||
1994 | BUG_ON(msg->front.iov_len != le32_to_cpu(msg->hdr.front_len)); | 1981 | BUG_ON(msg->front.iov_len != le32_to_cpu(msg->hdr.front_len)); |
1995 | 1982 | ||
@@ -2083,12 +2070,11 @@ void ceph_con_keepalive(struct ceph_connection *con) | |||
2083 | * construct a new message with given type, size | 2070 | * construct a new message with given type, size |
2084 | * the new msg has a ref count of 1. | 2071 | * the new msg has a ref count of 1. |
2085 | */ | 2072 | */ |
2086 | struct ceph_msg *ceph_msg_new(int type, int front_len, | 2073 | struct ceph_msg *ceph_msg_new(int type, int front_len, gfp_t flags) |
2087 | int page_len, int page_off, struct page **pages) | ||
2088 | { | 2074 | { |
2089 | struct ceph_msg *m; | 2075 | struct ceph_msg *m; |
2090 | 2076 | ||
2091 | m = kmalloc(sizeof(*m), GFP_NOFS); | 2077 | m = kmalloc(sizeof(*m), flags); |
2092 | if (m == NULL) | 2078 | if (m == NULL) |
2093 | goto out; | 2079 | goto out; |
2094 | kref_init(&m->kref); | 2080 | kref_init(&m->kref); |
@@ -2100,8 +2086,8 @@ struct ceph_msg *ceph_msg_new(int type, int front_len, | |||
2100 | m->hdr.version = 0; | 2086 | m->hdr.version = 0; |
2101 | m->hdr.front_len = cpu_to_le32(front_len); | 2087 | m->hdr.front_len = cpu_to_le32(front_len); |
2102 | m->hdr.middle_len = 0; | 2088 | m->hdr.middle_len = 0; |
2103 | m->hdr.data_len = cpu_to_le32(page_len); | 2089 | m->hdr.data_len = 0; |
2104 | m->hdr.data_off = cpu_to_le16(page_off); | 2090 | m->hdr.data_off = 0; |
2105 | m->hdr.reserved = 0; | 2091 | m->hdr.reserved = 0; |
2106 | m->footer.front_crc = 0; | 2092 | m->footer.front_crc = 0; |
2107 | m->footer.middle_crc = 0; | 2093 | m->footer.middle_crc = 0; |
@@ -2115,11 +2101,11 @@ struct ceph_msg *ceph_msg_new(int type, int front_len, | |||
2115 | /* front */ | 2101 | /* front */ |
2116 | if (front_len) { | 2102 | if (front_len) { |
2117 | if (front_len > PAGE_CACHE_SIZE) { | 2103 | if (front_len > PAGE_CACHE_SIZE) { |
2118 | m->front.iov_base = __vmalloc(front_len, GFP_NOFS, | 2104 | m->front.iov_base = __vmalloc(front_len, flags, |
2119 | PAGE_KERNEL); | 2105 | PAGE_KERNEL); |
2120 | m->front_is_vmalloc = true; | 2106 | m->front_is_vmalloc = true; |
2121 | } else { | 2107 | } else { |
2122 | m->front.iov_base = kmalloc(front_len, GFP_NOFS); | 2108 | m->front.iov_base = kmalloc(front_len, flags); |
2123 | } | 2109 | } |
2124 | if (m->front.iov_base == NULL) { | 2110 | if (m->front.iov_base == NULL) { |
2125 | pr_err("msg_new can't allocate %d bytes\n", | 2111 | pr_err("msg_new can't allocate %d bytes\n", |
@@ -2135,19 +2121,18 @@ struct ceph_msg *ceph_msg_new(int type, int front_len, | |||
2135 | m->middle = NULL; | 2121 | m->middle = NULL; |
2136 | 2122 | ||
2137 | /* data */ | 2123 | /* data */ |
2138 | m->nr_pages = calc_pages_for(page_off, page_len); | 2124 | m->nr_pages = 0; |
2139 | m->pages = pages; | 2125 | m->pages = NULL; |
2140 | m->pagelist = NULL; | 2126 | m->pagelist = NULL; |
2141 | 2127 | ||
2142 | dout("ceph_msg_new %p page %d~%d -> %d\n", m, page_off, page_len, | 2128 | dout("ceph_msg_new %p front %d\n", m, front_len); |
2143 | m->nr_pages); | ||
2144 | return m; | 2129 | return m; |
2145 | 2130 | ||
2146 | out2: | 2131 | out2: |
2147 | ceph_msg_put(m); | 2132 | ceph_msg_put(m); |
2148 | out: | 2133 | out: |
2149 | pr_err("msg_new can't create type %d len %d\n", type, front_len); | 2134 | pr_err("msg_new can't create type %d front %d\n", type, front_len); |
2150 | return ERR_PTR(-ENOMEM); | 2135 | return NULL; |
2151 | } | 2136 | } |
2152 | 2137 | ||
2153 | /* | 2138 | /* |
@@ -2190,29 +2175,25 @@ static struct ceph_msg *ceph_alloc_msg(struct ceph_connection *con, | |||
2190 | mutex_unlock(&con->mutex); | 2175 | mutex_unlock(&con->mutex); |
2191 | msg = con->ops->alloc_msg(con, hdr, skip); | 2176 | msg = con->ops->alloc_msg(con, hdr, skip); |
2192 | mutex_lock(&con->mutex); | 2177 | mutex_lock(&con->mutex); |
2193 | if (IS_ERR(msg)) | 2178 | if (!msg || *skip) |
2194 | return msg; | ||
2195 | |||
2196 | if (*skip) | ||
2197 | return NULL; | 2179 | return NULL; |
2198 | } | 2180 | } |
2199 | if (!msg) { | 2181 | if (!msg) { |
2200 | *skip = 0; | 2182 | *skip = 0; |
2201 | msg = ceph_msg_new(type, front_len, 0, 0, NULL); | 2183 | msg = ceph_msg_new(type, front_len, GFP_NOFS); |
2202 | if (!msg) { | 2184 | if (!msg) { |
2203 | pr_err("unable to allocate msg type %d len %d\n", | 2185 | pr_err("unable to allocate msg type %d len %d\n", |
2204 | type, front_len); | 2186 | type, front_len); |
2205 | return ERR_PTR(-ENOMEM); | 2187 | return NULL; |
2206 | } | 2188 | } |
2207 | } | 2189 | } |
2208 | memcpy(&msg->hdr, &con->in_hdr, sizeof(con->in_hdr)); | 2190 | memcpy(&msg->hdr, &con->in_hdr, sizeof(con->in_hdr)); |
2209 | 2191 | ||
2210 | if (middle_len) { | 2192 | if (middle_len && !msg->middle) { |
2211 | ret = ceph_alloc_middle(con, msg); | 2193 | ret = ceph_alloc_middle(con, msg); |
2212 | |||
2213 | if (ret < 0) { | 2194 | if (ret < 0) { |
2214 | ceph_msg_put(msg); | 2195 | ceph_msg_put(msg); |
2215 | return msg; | 2196 | return NULL; |
2216 | } | 2197 | } |
2217 | } | 2198 | } |
2218 | 2199 | ||
diff --git a/fs/ceph/messenger.h b/fs/ceph/messenger.h index a5caf91cc971..00a9430b1ffc 100644 --- a/fs/ceph/messenger.h +++ b/fs/ceph/messenger.h | |||
@@ -49,10 +49,8 @@ struct ceph_connection_operations { | |||
49 | int *skip); | 49 | int *skip); |
50 | }; | 50 | }; |
51 | 51 | ||
52 | extern const char *ceph_name_type_str(int t); | ||
53 | |||
54 | /* use format string %s%d */ | 52 | /* use format string %s%d */ |
55 | #define ENTITY_NAME(n) ceph_name_type_str((n).type), le64_to_cpu((n).num) | 53 | #define ENTITY_NAME(n) ceph_entity_type_name((n).type), le64_to_cpu((n).num) |
56 | 54 | ||
57 | struct ceph_messenger { | 55 | struct ceph_messenger { |
58 | struct ceph_entity_inst inst; /* my name+address */ | 56 | struct ceph_entity_inst inst; /* my name+address */ |
@@ -144,6 +142,7 @@ struct ceph_connection { | |||
144 | struct ceph_entity_addr peer_addr; /* peer address */ | 142 | struct ceph_entity_addr peer_addr; /* peer address */ |
145 | struct ceph_entity_name peer_name; /* peer name */ | 143 | struct ceph_entity_name peer_name; /* peer name */ |
146 | struct ceph_entity_addr peer_addr_for_me; | 144 | struct ceph_entity_addr peer_addr_for_me; |
145 | unsigned peer_features; | ||
147 | u32 connect_seq; /* identify the most recent connection | 146 | u32 connect_seq; /* identify the most recent connection |
148 | attempt for this connection, client */ | 147 | attempt for this connection, client */ |
149 | u32 peer_global_seq; /* peer's global seq for this connection */ | 148 | u32 peer_global_seq; /* peer's global seq for this connection */ |
@@ -158,7 +157,6 @@ struct ceph_connection { | |||
158 | struct list_head out_queue; | 157 | struct list_head out_queue; |
159 | struct list_head out_sent; /* sending or sent but unacked */ | 158 | struct list_head out_sent; /* sending or sent but unacked */ |
160 | u64 out_seq; /* last message queued for send */ | 159 | u64 out_seq; /* last message queued for send */ |
161 | u64 out_seq_sent; /* last message sent */ | ||
162 | bool out_keepalive_pending; | 160 | bool out_keepalive_pending; |
163 | 161 | ||
164 | u64 in_seq, in_seq_acked; /* last message received, acked */ | 162 | u64 in_seq, in_seq_acked; /* last message received, acked */ |
@@ -234,9 +232,7 @@ extern void ceph_con_keepalive(struct ceph_connection *con); | |||
234 | extern struct ceph_connection *ceph_con_get(struct ceph_connection *con); | 232 | extern struct ceph_connection *ceph_con_get(struct ceph_connection *con); |
235 | extern void ceph_con_put(struct ceph_connection *con); | 233 | extern void ceph_con_put(struct ceph_connection *con); |
236 | 234 | ||
237 | extern struct ceph_msg *ceph_msg_new(int type, int front_len, | 235 | extern struct ceph_msg *ceph_msg_new(int type, int front_len, gfp_t flags); |
238 | int page_len, int page_off, | ||
239 | struct page **pages); | ||
240 | extern void ceph_msg_kfree(struct ceph_msg *m); | 236 | extern void ceph_msg_kfree(struct ceph_msg *m); |
241 | 237 | ||
242 | 238 | ||
diff --git a/fs/ceph/mon_client.c b/fs/ceph/mon_client.c index 8fdc011ca956..f6510a476e7e 100644 --- a/fs/ceph/mon_client.c +++ b/fs/ceph/mon_client.c | |||
@@ -28,7 +28,7 @@ | |||
28 | * resend any outstanding requests. | 28 | * resend any outstanding requests. |
29 | */ | 29 | */ |
30 | 30 | ||
31 | const static struct ceph_connection_operations mon_con_ops; | 31 | static const struct ceph_connection_operations mon_con_ops; |
32 | 32 | ||
33 | static int __validate_auth(struct ceph_mon_client *monc); | 33 | static int __validate_auth(struct ceph_mon_client *monc); |
34 | 34 | ||
@@ -104,6 +104,7 @@ static void __send_prepared_auth_request(struct ceph_mon_client *monc, int len) | |||
104 | monc->pending_auth = 1; | 104 | monc->pending_auth = 1; |
105 | monc->m_auth->front.iov_len = len; | 105 | monc->m_auth->front.iov_len = len; |
106 | monc->m_auth->hdr.front_len = cpu_to_le32(len); | 106 | monc->m_auth->hdr.front_len = cpu_to_le32(len); |
107 | ceph_con_revoke(monc->con, monc->m_auth); | ||
107 | ceph_msg_get(monc->m_auth); /* keep our ref */ | 108 | ceph_msg_get(monc->m_auth); /* keep our ref */ |
108 | ceph_con_send(monc->con, monc->m_auth); | 109 | ceph_con_send(monc->con, monc->m_auth); |
109 | } | 110 | } |
@@ -187,16 +188,12 @@ static void __send_subscribe(struct ceph_mon_client *monc) | |||
187 | monc->want_next_osdmap); | 188 | monc->want_next_osdmap); |
188 | if ((__sub_expired(monc) && !monc->sub_sent) || | 189 | if ((__sub_expired(monc) && !monc->sub_sent) || |
189 | monc->want_next_osdmap == 1) { | 190 | monc->want_next_osdmap == 1) { |
190 | struct ceph_msg *msg; | 191 | struct ceph_msg *msg = monc->m_subscribe; |
191 | struct ceph_mon_subscribe_item *i; | 192 | struct ceph_mon_subscribe_item *i; |
192 | void *p, *end; | 193 | void *p, *end; |
193 | 194 | ||
194 | msg = ceph_msg_new(CEPH_MSG_MON_SUBSCRIBE, 96, 0, 0, NULL); | ||
195 | if (!msg) | ||
196 | return; | ||
197 | |||
198 | p = msg->front.iov_base; | 195 | p = msg->front.iov_base; |
199 | end = p + msg->front.iov_len; | 196 | end = p + msg->front_max; |
200 | 197 | ||
201 | dout("__send_subscribe to 'mdsmap' %u+\n", | 198 | dout("__send_subscribe to 'mdsmap' %u+\n", |
202 | (unsigned)monc->have_mdsmap); | 199 | (unsigned)monc->have_mdsmap); |
@@ -226,7 +223,8 @@ static void __send_subscribe(struct ceph_mon_client *monc) | |||
226 | 223 | ||
227 | msg->front.iov_len = p - msg->front.iov_base; | 224 | msg->front.iov_len = p - msg->front.iov_base; |
228 | msg->hdr.front_len = cpu_to_le32(msg->front.iov_len); | 225 | msg->hdr.front_len = cpu_to_le32(msg->front.iov_len); |
229 | ceph_con_send(monc->con, msg); | 226 | ceph_con_revoke(monc->con, msg); |
227 | ceph_con_send(monc->con, ceph_msg_get(msg)); | ||
230 | 228 | ||
231 | monc->sub_sent = jiffies | 1; /* never 0 */ | 229 | monc->sub_sent = jiffies | 1; /* never 0 */ |
232 | } | 230 | } |
@@ -353,14 +351,14 @@ out: | |||
353 | /* | 351 | /* |
354 | * statfs | 352 | * statfs |
355 | */ | 353 | */ |
356 | static struct ceph_mon_statfs_request *__lookup_statfs( | 354 | static struct ceph_mon_generic_request *__lookup_generic_req( |
357 | struct ceph_mon_client *monc, u64 tid) | 355 | struct ceph_mon_client *monc, u64 tid) |
358 | { | 356 | { |
359 | struct ceph_mon_statfs_request *req; | 357 | struct ceph_mon_generic_request *req; |
360 | struct rb_node *n = monc->statfs_request_tree.rb_node; | 358 | struct rb_node *n = monc->generic_request_tree.rb_node; |
361 | 359 | ||
362 | while (n) { | 360 | while (n) { |
363 | req = rb_entry(n, struct ceph_mon_statfs_request, node); | 361 | req = rb_entry(n, struct ceph_mon_generic_request, node); |
364 | if (tid < req->tid) | 362 | if (tid < req->tid) |
365 | n = n->rb_left; | 363 | n = n->rb_left; |
366 | else if (tid > req->tid) | 364 | else if (tid > req->tid) |
@@ -371,16 +369,16 @@ static struct ceph_mon_statfs_request *__lookup_statfs( | |||
371 | return NULL; | 369 | return NULL; |
372 | } | 370 | } |
373 | 371 | ||
374 | static void __insert_statfs(struct ceph_mon_client *monc, | 372 | static void __insert_generic_request(struct ceph_mon_client *monc, |
375 | struct ceph_mon_statfs_request *new) | 373 | struct ceph_mon_generic_request *new) |
376 | { | 374 | { |
377 | struct rb_node **p = &monc->statfs_request_tree.rb_node; | 375 | struct rb_node **p = &monc->generic_request_tree.rb_node; |
378 | struct rb_node *parent = NULL; | 376 | struct rb_node *parent = NULL; |
379 | struct ceph_mon_statfs_request *req = NULL; | 377 | struct ceph_mon_generic_request *req = NULL; |
380 | 378 | ||
381 | while (*p) { | 379 | while (*p) { |
382 | parent = *p; | 380 | parent = *p; |
383 | req = rb_entry(parent, struct ceph_mon_statfs_request, node); | 381 | req = rb_entry(parent, struct ceph_mon_generic_request, node); |
384 | if (new->tid < req->tid) | 382 | if (new->tid < req->tid) |
385 | p = &(*p)->rb_left; | 383 | p = &(*p)->rb_left; |
386 | else if (new->tid > req->tid) | 384 | else if (new->tid > req->tid) |
@@ -390,113 +388,157 @@ static void __insert_statfs(struct ceph_mon_client *monc, | |||
390 | } | 388 | } |
391 | 389 | ||
392 | rb_link_node(&new->node, parent, p); | 390 | rb_link_node(&new->node, parent, p); |
393 | rb_insert_color(&new->node, &monc->statfs_request_tree); | 391 | rb_insert_color(&new->node, &monc->generic_request_tree); |
392 | } | ||
393 | |||
394 | static void release_generic_request(struct kref *kref) | ||
395 | { | ||
396 | struct ceph_mon_generic_request *req = | ||
397 | container_of(kref, struct ceph_mon_generic_request, kref); | ||
398 | |||
399 | if (req->reply) | ||
400 | ceph_msg_put(req->reply); | ||
401 | if (req->request) | ||
402 | ceph_msg_put(req->request); | ||
403 | } | ||
404 | |||
405 | static void put_generic_request(struct ceph_mon_generic_request *req) | ||
406 | { | ||
407 | kref_put(&req->kref, release_generic_request); | ||
408 | } | ||
409 | |||
410 | static void get_generic_request(struct ceph_mon_generic_request *req) | ||
411 | { | ||
412 | kref_get(&req->kref); | ||
413 | } | ||
414 | |||
415 | static struct ceph_msg *get_generic_reply(struct ceph_connection *con, | ||
416 | struct ceph_msg_header *hdr, | ||
417 | int *skip) | ||
418 | { | ||
419 | struct ceph_mon_client *monc = con->private; | ||
420 | struct ceph_mon_generic_request *req; | ||
421 | u64 tid = le64_to_cpu(hdr->tid); | ||
422 | struct ceph_msg *m; | ||
423 | |||
424 | mutex_lock(&monc->mutex); | ||
425 | req = __lookup_generic_req(monc, tid); | ||
426 | if (!req) { | ||
427 | dout("get_generic_reply %lld dne\n", tid); | ||
428 | *skip = 1; | ||
429 | m = NULL; | ||
430 | } else { | ||
431 | dout("get_generic_reply %lld got %p\n", tid, req->reply); | ||
432 | m = ceph_msg_get(req->reply); | ||
433 | /* | ||
434 | * we don't need to track the connection reading into | ||
435 | * this reply because we only have one open connection | ||
436 | * at a time, ever. | ||
437 | */ | ||
438 | } | ||
439 | mutex_unlock(&monc->mutex); | ||
440 | return m; | ||
394 | } | 441 | } |
395 | 442 | ||
396 | static void handle_statfs_reply(struct ceph_mon_client *monc, | 443 | static void handle_statfs_reply(struct ceph_mon_client *monc, |
397 | struct ceph_msg *msg) | 444 | struct ceph_msg *msg) |
398 | { | 445 | { |
399 | struct ceph_mon_statfs_request *req; | 446 | struct ceph_mon_generic_request *req; |
400 | struct ceph_mon_statfs_reply *reply = msg->front.iov_base; | 447 | struct ceph_mon_statfs_reply *reply = msg->front.iov_base; |
401 | u64 tid; | 448 | u64 tid = le64_to_cpu(msg->hdr.tid); |
402 | 449 | ||
403 | if (msg->front.iov_len != sizeof(*reply)) | 450 | if (msg->front.iov_len != sizeof(*reply)) |
404 | goto bad; | 451 | goto bad; |
405 | tid = le64_to_cpu(msg->hdr.tid); | ||
406 | dout("handle_statfs_reply %p tid %llu\n", msg, tid); | 452 | dout("handle_statfs_reply %p tid %llu\n", msg, tid); |
407 | 453 | ||
408 | mutex_lock(&monc->mutex); | 454 | mutex_lock(&monc->mutex); |
409 | req = __lookup_statfs(monc, tid); | 455 | req = __lookup_generic_req(monc, tid); |
410 | if (req) { | 456 | if (req) { |
411 | *req->buf = reply->st; | 457 | *(struct ceph_statfs *)req->buf = reply->st; |
412 | req->result = 0; | 458 | req->result = 0; |
459 | get_generic_request(req); | ||
413 | } | 460 | } |
414 | mutex_unlock(&monc->mutex); | 461 | mutex_unlock(&monc->mutex); |
415 | if (req) | 462 | if (req) { |
416 | complete(&req->completion); | 463 | complete(&req->completion); |
464 | put_generic_request(req); | ||
465 | } | ||
417 | return; | 466 | return; |
418 | 467 | ||
419 | bad: | 468 | bad: |
420 | pr_err("corrupt statfs reply, no tid\n"); | 469 | pr_err("corrupt generic reply, no tid\n"); |
421 | ceph_msg_dump(msg); | 470 | ceph_msg_dump(msg); |
422 | } | 471 | } |
423 | 472 | ||
424 | /* | 473 | /* |
425 | * (re)send a statfs request | 474 | * Do a synchronous statfs(). |
426 | */ | 475 | */ |
427 | static int send_statfs(struct ceph_mon_client *monc, | 476 | int ceph_monc_do_statfs(struct ceph_mon_client *monc, struct ceph_statfs *buf) |
428 | struct ceph_mon_statfs_request *req) | ||
429 | { | 477 | { |
430 | struct ceph_msg *msg; | 478 | struct ceph_mon_generic_request *req; |
431 | struct ceph_mon_statfs *h; | 479 | struct ceph_mon_statfs *h; |
480 | int err; | ||
432 | 481 | ||
433 | dout("send_statfs tid %llu\n", req->tid); | 482 | req = kzalloc(sizeof(*req), GFP_NOFS); |
434 | msg = ceph_msg_new(CEPH_MSG_STATFS, sizeof(*h), 0, 0, NULL); | 483 | if (!req) |
435 | if (IS_ERR(msg)) | 484 | return -ENOMEM; |
436 | return PTR_ERR(msg); | 485 | |
437 | req->request = msg; | 486 | kref_init(&req->kref); |
438 | msg->hdr.tid = cpu_to_le64(req->tid); | 487 | req->buf = buf; |
439 | h = msg->front.iov_base; | 488 | init_completion(&req->completion); |
489 | |||
490 | err = -ENOMEM; | ||
491 | req->request = ceph_msg_new(CEPH_MSG_STATFS, sizeof(*h), GFP_NOFS); | ||
492 | if (!req->request) | ||
493 | goto out; | ||
494 | req->reply = ceph_msg_new(CEPH_MSG_STATFS_REPLY, 1024, GFP_NOFS); | ||
495 | if (!req->reply) | ||
496 | goto out; | ||
497 | |||
498 | /* fill out request */ | ||
499 | h = req->request->front.iov_base; | ||
440 | h->monhdr.have_version = 0; | 500 | h->monhdr.have_version = 0; |
441 | h->monhdr.session_mon = cpu_to_le16(-1); | 501 | h->monhdr.session_mon = cpu_to_le16(-1); |
442 | h->monhdr.session_mon_tid = 0; | 502 | h->monhdr.session_mon_tid = 0; |
443 | h->fsid = monc->monmap->fsid; | 503 | h->fsid = monc->monmap->fsid; |
444 | ceph_con_send(monc->con, msg); | ||
445 | return 0; | ||
446 | } | ||
447 | |||
448 | /* | ||
449 | * Do a synchronous statfs(). | ||
450 | */ | ||
451 | int ceph_monc_do_statfs(struct ceph_mon_client *monc, struct ceph_statfs *buf) | ||
452 | { | ||
453 | struct ceph_mon_statfs_request req; | ||
454 | int err; | ||
455 | |||
456 | req.buf = buf; | ||
457 | init_completion(&req.completion); | ||
458 | |||
459 | /* allocate memory for reply */ | ||
460 | err = ceph_msgpool_resv(&monc->msgpool_statfs_reply, 1); | ||
461 | if (err) | ||
462 | return err; | ||
463 | 504 | ||
464 | /* register request */ | 505 | /* register request */ |
465 | mutex_lock(&monc->mutex); | 506 | mutex_lock(&monc->mutex); |
466 | req.tid = ++monc->last_tid; | 507 | req->tid = ++monc->last_tid; |
467 | req.last_attempt = jiffies; | 508 | req->request->hdr.tid = cpu_to_le64(req->tid); |
468 | req.delay = BASE_DELAY_INTERVAL; | 509 | __insert_generic_request(monc, req); |
469 | __insert_statfs(monc, &req); | 510 | monc->num_generic_requests++; |
470 | monc->num_statfs_requests++; | ||
471 | mutex_unlock(&monc->mutex); | 511 | mutex_unlock(&monc->mutex); |
472 | 512 | ||
473 | /* send request and wait */ | 513 | /* send request and wait */ |
474 | err = send_statfs(monc, &req); | 514 | ceph_con_send(monc->con, ceph_msg_get(req->request)); |
475 | if (!err) | 515 | err = wait_for_completion_interruptible(&req->completion); |
476 | err = wait_for_completion_interruptible(&req.completion); | ||
477 | 516 | ||
478 | mutex_lock(&monc->mutex); | 517 | mutex_lock(&monc->mutex); |
479 | rb_erase(&req.node, &monc->statfs_request_tree); | 518 | rb_erase(&req->node, &monc->generic_request_tree); |
480 | monc->num_statfs_requests--; | 519 | monc->num_generic_requests--; |
481 | ceph_msgpool_resv(&monc->msgpool_statfs_reply, -1); | ||
482 | mutex_unlock(&monc->mutex); | 520 | mutex_unlock(&monc->mutex); |
483 | 521 | ||
484 | if (!err) | 522 | if (!err) |
485 | err = req.result; | 523 | err = req->result; |
524 | |||
525 | out: | ||
526 | kref_put(&req->kref, release_generic_request); | ||
486 | return err; | 527 | return err; |
487 | } | 528 | } |
488 | 529 | ||
489 | /* | 530 | /* |
490 | * Resend pending statfs requests. | 531 | * Resend pending statfs requests. |
491 | */ | 532 | */ |
492 | static void __resend_statfs(struct ceph_mon_client *monc) | 533 | static void __resend_generic_request(struct ceph_mon_client *monc) |
493 | { | 534 | { |
494 | struct ceph_mon_statfs_request *req; | 535 | struct ceph_mon_generic_request *req; |
495 | struct rb_node *p; | 536 | struct rb_node *p; |
496 | 537 | ||
497 | for (p = rb_first(&monc->statfs_request_tree); p; p = rb_next(p)) { | 538 | for (p = rb_first(&monc->generic_request_tree); p; p = rb_next(p)) { |
498 | req = rb_entry(p, struct ceph_mon_statfs_request, node); | 539 | req = rb_entry(p, struct ceph_mon_generic_request, node); |
499 | send_statfs(monc, req); | 540 | ceph_con_revoke(monc->con, req->request); |
541 | ceph_con_send(monc->con, ceph_msg_get(req->request)); | ||
500 | } | 542 | } |
501 | } | 543 | } |
502 | 544 | ||
@@ -586,26 +628,26 @@ int ceph_monc_init(struct ceph_mon_client *monc, struct ceph_client *cl) | |||
586 | CEPH_ENTITY_TYPE_AUTH | CEPH_ENTITY_TYPE_MON | | 628 | CEPH_ENTITY_TYPE_AUTH | CEPH_ENTITY_TYPE_MON | |
587 | CEPH_ENTITY_TYPE_OSD | CEPH_ENTITY_TYPE_MDS; | 629 | CEPH_ENTITY_TYPE_OSD | CEPH_ENTITY_TYPE_MDS; |
588 | 630 | ||
589 | /* msg pools */ | 631 | /* msgs */ |
590 | err = ceph_msgpool_init(&monc->msgpool_subscribe_ack, | 632 | err = -ENOMEM; |
591 | sizeof(struct ceph_mon_subscribe_ack), 1, false); | 633 | monc->m_subscribe_ack = ceph_msg_new(CEPH_MSG_MON_SUBSCRIBE_ACK, |
592 | if (err < 0) | 634 | sizeof(struct ceph_mon_subscribe_ack), |
635 | GFP_NOFS); | ||
636 | if (!monc->m_subscribe_ack) | ||
593 | goto out_monmap; | 637 | goto out_monmap; |
594 | err = ceph_msgpool_init(&monc->msgpool_statfs_reply, | 638 | |
595 | sizeof(struct ceph_mon_statfs_reply), 0, false); | 639 | monc->m_subscribe = ceph_msg_new(CEPH_MSG_MON_SUBSCRIBE, 96, GFP_NOFS); |
596 | if (err < 0) | 640 | if (!monc->m_subscribe) |
597 | goto out_pool1; | 641 | goto out_subscribe_ack; |
598 | err = ceph_msgpool_init(&monc->msgpool_auth_reply, 4096, 1, false); | 642 | |
599 | if (err < 0) | 643 | monc->m_auth_reply = ceph_msg_new(CEPH_MSG_AUTH_REPLY, 4096, GFP_NOFS); |
600 | goto out_pool2; | 644 | if (!monc->m_auth_reply) |
601 | 645 | goto out_subscribe; | |
602 | monc->m_auth = ceph_msg_new(CEPH_MSG_AUTH, 4096, 0, 0, NULL); | 646 | |
647 | monc->m_auth = ceph_msg_new(CEPH_MSG_AUTH, 4096, GFP_NOFS); | ||
603 | monc->pending_auth = 0; | 648 | monc->pending_auth = 0; |
604 | if (IS_ERR(monc->m_auth)) { | 649 | if (!monc->m_auth) |
605 | err = PTR_ERR(monc->m_auth); | 650 | goto out_auth_reply; |
606 | monc->m_auth = NULL; | ||
607 | goto out_pool3; | ||
608 | } | ||
609 | 651 | ||
610 | monc->cur_mon = -1; | 652 | monc->cur_mon = -1; |
611 | monc->hunting = true; | 653 | monc->hunting = true; |
@@ -613,8 +655,8 @@ int ceph_monc_init(struct ceph_mon_client *monc, struct ceph_client *cl) | |||
613 | monc->sub_sent = 0; | 655 | monc->sub_sent = 0; |
614 | 656 | ||
615 | INIT_DELAYED_WORK(&monc->delayed_work, delayed_work); | 657 | INIT_DELAYED_WORK(&monc->delayed_work, delayed_work); |
616 | monc->statfs_request_tree = RB_ROOT; | 658 | monc->generic_request_tree = RB_ROOT; |
617 | monc->num_statfs_requests = 0; | 659 | monc->num_generic_requests = 0; |
618 | monc->last_tid = 0; | 660 | monc->last_tid = 0; |
619 | 661 | ||
620 | monc->have_mdsmap = 0; | 662 | monc->have_mdsmap = 0; |
@@ -622,12 +664,12 @@ int ceph_monc_init(struct ceph_mon_client *monc, struct ceph_client *cl) | |||
622 | monc->want_next_osdmap = 1; | 664 | monc->want_next_osdmap = 1; |
623 | return 0; | 665 | return 0; |
624 | 666 | ||
625 | out_pool3: | 667 | out_auth_reply: |
626 | ceph_msgpool_destroy(&monc->msgpool_auth_reply); | 668 | ceph_msg_put(monc->m_auth_reply); |
627 | out_pool2: | 669 | out_subscribe: |
628 | ceph_msgpool_destroy(&monc->msgpool_subscribe_ack); | 670 | ceph_msg_put(monc->m_subscribe); |
629 | out_pool1: | 671 | out_subscribe_ack: |
630 | ceph_msgpool_destroy(&monc->msgpool_statfs_reply); | 672 | ceph_msg_put(monc->m_subscribe_ack); |
631 | out_monmap: | 673 | out_monmap: |
632 | kfree(monc->monmap); | 674 | kfree(monc->monmap); |
633 | out: | 675 | out: |
@@ -651,9 +693,9 @@ void ceph_monc_stop(struct ceph_mon_client *monc) | |||
651 | ceph_auth_destroy(monc->auth); | 693 | ceph_auth_destroy(monc->auth); |
652 | 694 | ||
653 | ceph_msg_put(monc->m_auth); | 695 | ceph_msg_put(monc->m_auth); |
654 | ceph_msgpool_destroy(&monc->msgpool_subscribe_ack); | 696 | ceph_msg_put(monc->m_auth_reply); |
655 | ceph_msgpool_destroy(&monc->msgpool_statfs_reply); | 697 | ceph_msg_put(monc->m_subscribe); |
656 | ceph_msgpool_destroy(&monc->msgpool_auth_reply); | 698 | ceph_msg_put(monc->m_subscribe_ack); |
657 | 699 | ||
658 | kfree(monc->monmap); | 700 | kfree(monc->monmap); |
659 | } | 701 | } |
@@ -681,7 +723,7 @@ static void handle_auth_reply(struct ceph_mon_client *monc, | |||
681 | monc->client->msgr->inst.name.num = monc->auth->global_id; | 723 | monc->client->msgr->inst.name.num = monc->auth->global_id; |
682 | 724 | ||
683 | __send_subscribe(monc); | 725 | __send_subscribe(monc); |
684 | __resend_statfs(monc); | 726 | __resend_generic_request(monc); |
685 | } | 727 | } |
686 | mutex_unlock(&monc->mutex); | 728 | mutex_unlock(&monc->mutex); |
687 | } | 729 | } |
@@ -770,18 +812,17 @@ static struct ceph_msg *mon_alloc_msg(struct ceph_connection *con, | |||
770 | 812 | ||
771 | switch (type) { | 813 | switch (type) { |
772 | case CEPH_MSG_MON_SUBSCRIBE_ACK: | 814 | case CEPH_MSG_MON_SUBSCRIBE_ACK: |
773 | m = ceph_msgpool_get(&monc->msgpool_subscribe_ack, front_len); | 815 | m = ceph_msg_get(monc->m_subscribe_ack); |
774 | break; | 816 | break; |
775 | case CEPH_MSG_STATFS_REPLY: | 817 | case CEPH_MSG_STATFS_REPLY: |
776 | m = ceph_msgpool_get(&monc->msgpool_statfs_reply, front_len); | 818 | return get_generic_reply(con, hdr, skip); |
777 | break; | ||
778 | case CEPH_MSG_AUTH_REPLY: | 819 | case CEPH_MSG_AUTH_REPLY: |
779 | m = ceph_msgpool_get(&monc->msgpool_auth_reply, front_len); | 820 | m = ceph_msg_get(monc->m_auth_reply); |
780 | break; | 821 | break; |
781 | case CEPH_MSG_MON_MAP: | 822 | case CEPH_MSG_MON_MAP: |
782 | case CEPH_MSG_MDS_MAP: | 823 | case CEPH_MSG_MDS_MAP: |
783 | case CEPH_MSG_OSD_MAP: | 824 | case CEPH_MSG_OSD_MAP: |
784 | m = ceph_msg_new(type, front_len, 0, 0, NULL); | 825 | m = ceph_msg_new(type, front_len, GFP_NOFS); |
785 | break; | 826 | break; |
786 | } | 827 | } |
787 | 828 | ||
@@ -826,7 +867,7 @@ out: | |||
826 | mutex_unlock(&monc->mutex); | 867 | mutex_unlock(&monc->mutex); |
827 | } | 868 | } |
828 | 869 | ||
829 | const static struct ceph_connection_operations mon_con_ops = { | 870 | static const struct ceph_connection_operations mon_con_ops = { |
830 | .get = ceph_con_get, | 871 | .get = ceph_con_get, |
831 | .put = ceph_con_put, | 872 | .put = ceph_con_put, |
832 | .dispatch = dispatch, | 873 | .dispatch = dispatch, |
diff --git a/fs/ceph/mon_client.h b/fs/ceph/mon_client.h index b958ad5afa06..174d794321d0 100644 --- a/fs/ceph/mon_client.h +++ b/fs/ceph/mon_client.h | |||
@@ -2,10 +2,10 @@ | |||
2 | #define _FS_CEPH_MON_CLIENT_H | 2 | #define _FS_CEPH_MON_CLIENT_H |
3 | 3 | ||
4 | #include <linux/completion.h> | 4 | #include <linux/completion.h> |
5 | #include <linux/kref.h> | ||
5 | #include <linux/rbtree.h> | 6 | #include <linux/rbtree.h> |
6 | 7 | ||
7 | #include "messenger.h" | 8 | #include "messenger.h" |
8 | #include "msgpool.h" | ||
9 | 9 | ||
10 | struct ceph_client; | 10 | struct ceph_client; |
11 | struct ceph_mount_args; | 11 | struct ceph_mount_args; |
@@ -22,7 +22,7 @@ struct ceph_monmap { | |||
22 | }; | 22 | }; |
23 | 23 | ||
24 | struct ceph_mon_client; | 24 | struct ceph_mon_client; |
25 | struct ceph_mon_statfs_request; | 25 | struct ceph_mon_generic_request; |
26 | 26 | ||
27 | 27 | ||
28 | /* | 28 | /* |
@@ -40,17 +40,19 @@ struct ceph_mon_request { | |||
40 | }; | 40 | }; |
41 | 41 | ||
42 | /* | 42 | /* |
43 | * statfs() is done a bit differently because we need to get data back | 43 | * ceph_mon_generic_request is being used for the statfs and poolop requests |
44 | * which are bening done a bit differently because we need to get data back | ||
44 | * to the caller | 45 | * to the caller |
45 | */ | 46 | */ |
46 | struct ceph_mon_statfs_request { | 47 | struct ceph_mon_generic_request { |
48 | struct kref kref; | ||
47 | u64 tid; | 49 | u64 tid; |
48 | struct rb_node node; | 50 | struct rb_node node; |
49 | int result; | 51 | int result; |
50 | struct ceph_statfs *buf; | 52 | void *buf; |
51 | struct completion completion; | 53 | struct completion completion; |
52 | unsigned long last_attempt, delay; /* jiffies */ | ||
53 | struct ceph_msg *request; /* original request */ | 54 | struct ceph_msg *request; /* original request */ |
55 | struct ceph_msg *reply; /* and reply */ | ||
54 | }; | 56 | }; |
55 | 57 | ||
56 | struct ceph_mon_client { | 58 | struct ceph_mon_client { |
@@ -61,7 +63,7 @@ struct ceph_mon_client { | |||
61 | struct delayed_work delayed_work; | 63 | struct delayed_work delayed_work; |
62 | 64 | ||
63 | struct ceph_auth_client *auth; | 65 | struct ceph_auth_client *auth; |
64 | struct ceph_msg *m_auth; | 66 | struct ceph_msg *m_auth, *m_auth_reply, *m_subscribe, *m_subscribe_ack; |
65 | int pending_auth; | 67 | int pending_auth; |
66 | 68 | ||
67 | bool hunting; | 69 | bool hunting; |
@@ -70,14 +72,9 @@ struct ceph_mon_client { | |||
70 | struct ceph_connection *con; | 72 | struct ceph_connection *con; |
71 | bool have_fsid; | 73 | bool have_fsid; |
72 | 74 | ||
73 | /* msg pools */ | 75 | /* pending generic requests */ |
74 | struct ceph_msgpool msgpool_subscribe_ack; | 76 | struct rb_root generic_request_tree; |
75 | struct ceph_msgpool msgpool_statfs_reply; | 77 | int num_generic_requests; |
76 | struct ceph_msgpool msgpool_auth_reply; | ||
77 | |||
78 | /* pending statfs requests */ | ||
79 | struct rb_root statfs_request_tree; | ||
80 | int num_statfs_requests; | ||
81 | u64 last_tid; | 78 | u64 last_tid; |
82 | 79 | ||
83 | /* mds/osd map */ | 80 | /* mds/osd map */ |
diff --git a/fs/ceph/msgpool.c b/fs/ceph/msgpool.c index ca3b44a89f2d..dd65a6438131 100644 --- a/fs/ceph/msgpool.c +++ b/fs/ceph/msgpool.c | |||
@@ -7,180 +7,58 @@ | |||
7 | 7 | ||
8 | #include "msgpool.h" | 8 | #include "msgpool.h" |
9 | 9 | ||
10 | /* | 10 | static void *alloc_fn(gfp_t gfp_mask, void *arg) |
11 | * We use msg pools to preallocate memory for messages we expect to | 11 | { |
12 | * receive over the wire, to avoid getting ourselves into OOM | 12 | struct ceph_msgpool *pool = arg; |
13 | * conditions at unexpected times. We take use a few different | 13 | void *p; |
14 | * strategies: | ||
15 | * | ||
16 | * - for request/response type interactions, we preallocate the | ||
17 | * memory needed for the response when we generate the request. | ||
18 | * | ||
19 | * - for messages we can receive at any time from the MDS, we preallocate | ||
20 | * a pool of messages we can re-use. | ||
21 | * | ||
22 | * - for writeback, we preallocate some number of messages to use for | ||
23 | * requests and their replies, so that we always make forward | ||
24 | * progress. | ||
25 | * | ||
26 | * The msgpool behaves like a mempool_t, but keeps preallocated | ||
27 | * ceph_msgs strung together on a list_head instead of using a pointer | ||
28 | * vector. This avoids vector reallocation when we adjust the number | ||
29 | * of preallocated items (which happens frequently). | ||
30 | */ | ||
31 | 14 | ||
15 | p = ceph_msg_new(0, pool->front_len, gfp_mask); | ||
16 | if (!p) | ||
17 | pr_err("msgpool %s alloc failed\n", pool->name); | ||
18 | return p; | ||
19 | } | ||
32 | 20 | ||
33 | /* | 21 | static void free_fn(void *element, void *arg) |
34 | * Allocate or release as necessary to meet our target pool size. | ||
35 | */ | ||
36 | static int __fill_msgpool(struct ceph_msgpool *pool) | ||
37 | { | 22 | { |
38 | struct ceph_msg *msg; | 23 | ceph_msg_put(element); |
39 | |||
40 | while (pool->num < pool->min) { | ||
41 | dout("fill_msgpool %p %d/%d allocating\n", pool, pool->num, | ||
42 | pool->min); | ||
43 | spin_unlock(&pool->lock); | ||
44 | msg = ceph_msg_new(0, pool->front_len, 0, 0, NULL); | ||
45 | spin_lock(&pool->lock); | ||
46 | if (IS_ERR(msg)) | ||
47 | return PTR_ERR(msg); | ||
48 | msg->pool = pool; | ||
49 | list_add(&msg->list_head, &pool->msgs); | ||
50 | pool->num++; | ||
51 | } | ||
52 | while (pool->num > pool->min) { | ||
53 | msg = list_first_entry(&pool->msgs, struct ceph_msg, list_head); | ||
54 | dout("fill_msgpool %p %d/%d releasing %p\n", pool, pool->num, | ||
55 | pool->min, msg); | ||
56 | list_del_init(&msg->list_head); | ||
57 | pool->num--; | ||
58 | ceph_msg_kfree(msg); | ||
59 | } | ||
60 | return 0; | ||
61 | } | 24 | } |
62 | 25 | ||
63 | int ceph_msgpool_init(struct ceph_msgpool *pool, | 26 | int ceph_msgpool_init(struct ceph_msgpool *pool, |
64 | int front_len, int min, bool blocking) | 27 | int front_len, int size, bool blocking, const char *name) |
65 | { | 28 | { |
66 | int ret; | ||
67 | |||
68 | dout("msgpool_init %p front_len %d min %d\n", pool, front_len, min); | ||
69 | spin_lock_init(&pool->lock); | ||
70 | pool->front_len = front_len; | 29 | pool->front_len = front_len; |
71 | INIT_LIST_HEAD(&pool->msgs); | 30 | pool->pool = mempool_create(size, alloc_fn, free_fn, pool); |
72 | pool->num = 0; | 31 | if (!pool->pool) |
73 | pool->min = min; | 32 | return -ENOMEM; |
74 | pool->blocking = blocking; | 33 | pool->name = name; |
75 | init_waitqueue_head(&pool->wait); | 34 | return 0; |
76 | |||
77 | spin_lock(&pool->lock); | ||
78 | ret = __fill_msgpool(pool); | ||
79 | spin_unlock(&pool->lock); | ||
80 | return ret; | ||
81 | } | 35 | } |
82 | 36 | ||
83 | void ceph_msgpool_destroy(struct ceph_msgpool *pool) | 37 | void ceph_msgpool_destroy(struct ceph_msgpool *pool) |
84 | { | 38 | { |
85 | dout("msgpool_destroy %p\n", pool); | 39 | mempool_destroy(pool->pool); |
86 | spin_lock(&pool->lock); | ||
87 | pool->min = 0; | ||
88 | __fill_msgpool(pool); | ||
89 | spin_unlock(&pool->lock); | ||
90 | } | 40 | } |
91 | 41 | ||
92 | int ceph_msgpool_resv(struct ceph_msgpool *pool, int delta) | 42 | struct ceph_msg *ceph_msgpool_get(struct ceph_msgpool *pool, |
43 | int front_len) | ||
93 | { | 44 | { |
94 | int ret; | 45 | if (front_len > pool->front_len) { |
95 | 46 | pr_err("msgpool_get pool %s need front %d, pool size is %d\n", | |
96 | spin_lock(&pool->lock); | 47 | pool->name, front_len, pool->front_len); |
97 | dout("msgpool_resv %p delta %d\n", pool, delta); | ||
98 | pool->min += delta; | ||
99 | ret = __fill_msgpool(pool); | ||
100 | spin_unlock(&pool->lock); | ||
101 | return ret; | ||
102 | } | ||
103 | |||
104 | struct ceph_msg *ceph_msgpool_get(struct ceph_msgpool *pool, int front_len) | ||
105 | { | ||
106 | wait_queue_t wait; | ||
107 | struct ceph_msg *msg; | ||
108 | |||
109 | if (front_len && front_len > pool->front_len) { | ||
110 | pr_err("msgpool_get pool %p need front %d, pool size is %d\n", | ||
111 | pool, front_len, pool->front_len); | ||
112 | WARN_ON(1); | 48 | WARN_ON(1); |
113 | 49 | ||
114 | /* try to alloc a fresh message */ | 50 | /* try to alloc a fresh message */ |
115 | msg = ceph_msg_new(0, front_len, 0, 0, NULL); | 51 | return ceph_msg_new(0, front_len, GFP_NOFS); |
116 | if (!IS_ERR(msg)) | ||
117 | return msg; | ||
118 | } | ||
119 | |||
120 | if (!front_len) | ||
121 | front_len = pool->front_len; | ||
122 | |||
123 | if (pool->blocking) { | ||
124 | /* mempool_t behavior; first try to alloc */ | ||
125 | msg = ceph_msg_new(0, front_len, 0, 0, NULL); | ||
126 | if (!IS_ERR(msg)) | ||
127 | return msg; | ||
128 | } | 52 | } |
129 | 53 | ||
130 | while (1) { | 54 | return mempool_alloc(pool->pool, GFP_NOFS); |
131 | spin_lock(&pool->lock); | ||
132 | if (likely(pool->num)) { | ||
133 | msg = list_entry(pool->msgs.next, struct ceph_msg, | ||
134 | list_head); | ||
135 | list_del_init(&msg->list_head); | ||
136 | pool->num--; | ||
137 | dout("msgpool_get %p got %p, now %d/%d\n", pool, msg, | ||
138 | pool->num, pool->min); | ||
139 | spin_unlock(&pool->lock); | ||
140 | return msg; | ||
141 | } | ||
142 | pr_err("msgpool_get %p now %d/%d, %s\n", pool, pool->num, | ||
143 | pool->min, pool->blocking ? "waiting" : "may fail"); | ||
144 | spin_unlock(&pool->lock); | ||
145 | |||
146 | if (!pool->blocking) { | ||
147 | WARN_ON(1); | ||
148 | |||
149 | /* maybe we can allocate it now? */ | ||
150 | msg = ceph_msg_new(0, front_len, 0, 0, NULL); | ||
151 | if (!IS_ERR(msg)) | ||
152 | return msg; | ||
153 | |||
154 | pr_err("msgpool_get %p empty + alloc failed\n", pool); | ||
155 | return ERR_PTR(-ENOMEM); | ||
156 | } | ||
157 | |||
158 | init_wait(&wait); | ||
159 | prepare_to_wait(&pool->wait, &wait, TASK_UNINTERRUPTIBLE); | ||
160 | schedule(); | ||
161 | finish_wait(&pool->wait, &wait); | ||
162 | } | ||
163 | } | 55 | } |
164 | 56 | ||
165 | void ceph_msgpool_put(struct ceph_msgpool *pool, struct ceph_msg *msg) | 57 | void ceph_msgpool_put(struct ceph_msgpool *pool, struct ceph_msg *msg) |
166 | { | 58 | { |
167 | spin_lock(&pool->lock); | 59 | /* reset msg front_len; user may have changed it */ |
168 | if (pool->num < pool->min) { | 60 | msg->front.iov_len = pool->front_len; |
169 | /* reset msg front_len; user may have changed it */ | 61 | msg->hdr.front_len = cpu_to_le32(pool->front_len); |
170 | msg->front.iov_len = pool->front_len; | ||
171 | msg->hdr.front_len = cpu_to_le32(pool->front_len); | ||
172 | 62 | ||
173 | kref_set(&msg->kref, 1); /* retake a single ref */ | 63 | kref_init(&msg->kref); /* retake single ref */ |
174 | list_add(&msg->list_head, &pool->msgs); | ||
175 | pool->num++; | ||
176 | dout("msgpool_put %p reclaim %p, now %d/%d\n", pool, msg, | ||
177 | pool->num, pool->min); | ||
178 | spin_unlock(&pool->lock); | ||
179 | wake_up(&pool->wait); | ||
180 | } else { | ||
181 | dout("msgpool_put %p drop %p, at %d/%d\n", pool, msg, | ||
182 | pool->num, pool->min); | ||
183 | spin_unlock(&pool->lock); | ||
184 | ceph_msg_kfree(msg); | ||
185 | } | ||
186 | } | 64 | } |
diff --git a/fs/ceph/msgpool.h b/fs/ceph/msgpool.h index bc834bfcd720..a362605f9368 100644 --- a/fs/ceph/msgpool.h +++ b/fs/ceph/msgpool.h | |||
@@ -1,6 +1,7 @@ | |||
1 | #ifndef _FS_CEPH_MSGPOOL | 1 | #ifndef _FS_CEPH_MSGPOOL |
2 | #define _FS_CEPH_MSGPOOL | 2 | #define _FS_CEPH_MSGPOOL |
3 | 3 | ||
4 | #include <linux/mempool.h> | ||
4 | #include "messenger.h" | 5 | #include "messenger.h" |
5 | 6 | ||
6 | /* | 7 | /* |
@@ -8,18 +9,15 @@ | |||
8 | * avoid unexpected OOM conditions. | 9 | * avoid unexpected OOM conditions. |
9 | */ | 10 | */ |
10 | struct ceph_msgpool { | 11 | struct ceph_msgpool { |
11 | spinlock_t lock; | 12 | const char *name; |
13 | mempool_t *pool; | ||
12 | int front_len; /* preallocated payload size */ | 14 | int front_len; /* preallocated payload size */ |
13 | struct list_head msgs; /* msgs in the pool; each has 1 ref */ | ||
14 | int num, min; /* cur, min # msgs in the pool */ | ||
15 | bool blocking; | ||
16 | wait_queue_head_t wait; | ||
17 | }; | 15 | }; |
18 | 16 | ||
19 | extern int ceph_msgpool_init(struct ceph_msgpool *pool, | 17 | extern int ceph_msgpool_init(struct ceph_msgpool *pool, |
20 | int front_len, int size, bool blocking); | 18 | int front_len, int size, bool blocking, |
19 | const char *name); | ||
21 | extern void ceph_msgpool_destroy(struct ceph_msgpool *pool); | 20 | extern void ceph_msgpool_destroy(struct ceph_msgpool *pool); |
22 | extern int ceph_msgpool_resv(struct ceph_msgpool *, int delta); | ||
23 | extern struct ceph_msg *ceph_msgpool_get(struct ceph_msgpool *, | 21 | extern struct ceph_msg *ceph_msgpool_get(struct ceph_msgpool *, |
24 | int front_len); | 22 | int front_len); |
25 | extern void ceph_msgpool_put(struct ceph_msgpool *, struct ceph_msg *); | 23 | extern void ceph_msgpool_put(struct ceph_msgpool *, struct ceph_msg *); |
diff --git a/fs/ceph/msgr.h b/fs/ceph/msgr.h index 8aaab414f3f8..892a0298dfdf 100644 --- a/fs/ceph/msgr.h +++ b/fs/ceph/msgr.h | |||
@@ -50,7 +50,6 @@ struct ceph_entity_name { | |||
50 | #define CEPH_ENTITY_TYPE_MDS 0x02 | 50 | #define CEPH_ENTITY_TYPE_MDS 0x02 |
51 | #define CEPH_ENTITY_TYPE_OSD 0x04 | 51 | #define CEPH_ENTITY_TYPE_OSD 0x04 |
52 | #define CEPH_ENTITY_TYPE_CLIENT 0x08 | 52 | #define CEPH_ENTITY_TYPE_CLIENT 0x08 |
53 | #define CEPH_ENTITY_TYPE_ADMIN 0x10 | ||
54 | #define CEPH_ENTITY_TYPE_AUTH 0x20 | 53 | #define CEPH_ENTITY_TYPE_AUTH 0x20 |
55 | 54 | ||
56 | #define CEPH_ENTITY_TYPE_ANY 0xFF | 55 | #define CEPH_ENTITY_TYPE_ANY 0xFF |
@@ -120,7 +119,7 @@ struct ceph_msg_connect_reply { | |||
120 | /* | 119 | /* |
121 | * message header | 120 | * message header |
122 | */ | 121 | */ |
123 | struct ceph_msg_header { | 122 | struct ceph_msg_header_old { |
124 | __le64 seq; /* message seq# for this session */ | 123 | __le64 seq; /* message seq# for this session */ |
125 | __le64 tid; /* transaction id */ | 124 | __le64 tid; /* transaction id */ |
126 | __le16 type; /* message type */ | 125 | __le16 type; /* message type */ |
@@ -138,6 +137,24 @@ struct ceph_msg_header { | |||
138 | __le32 crc; /* header crc32c */ | 137 | __le32 crc; /* header crc32c */ |
139 | } __attribute__ ((packed)); | 138 | } __attribute__ ((packed)); |
140 | 139 | ||
140 | struct ceph_msg_header { | ||
141 | __le64 seq; /* message seq# for this session */ | ||
142 | __le64 tid; /* transaction id */ | ||
143 | __le16 type; /* message type */ | ||
144 | __le16 priority; /* priority. higher value == higher priority */ | ||
145 | __le16 version; /* version of message encoding */ | ||
146 | |||
147 | __le32 front_len; /* bytes in main payload */ | ||
148 | __le32 middle_len;/* bytes in middle payload */ | ||
149 | __le32 data_len; /* bytes of data payload */ | ||
150 | __le16 data_off; /* sender: include full offset; | ||
151 | receiver: mask against ~PAGE_MASK */ | ||
152 | |||
153 | struct ceph_entity_name src; | ||
154 | __le32 reserved; | ||
155 | __le32 crc; /* header crc32c */ | ||
156 | } __attribute__ ((packed)); | ||
157 | |||
141 | #define CEPH_MSG_PRIO_LOW 64 | 158 | #define CEPH_MSG_PRIO_LOW 64 |
142 | #define CEPH_MSG_PRIO_DEFAULT 127 | 159 | #define CEPH_MSG_PRIO_DEFAULT 127 |
143 | #define CEPH_MSG_PRIO_HIGH 196 | 160 | #define CEPH_MSG_PRIO_HIGH 196 |
diff --git a/fs/ceph/osd_client.c b/fs/ceph/osd_client.c index 3514f71ff85f..afa7bb3895c4 100644 --- a/fs/ceph/osd_client.c +++ b/fs/ceph/osd_client.c | |||
@@ -16,7 +16,7 @@ | |||
16 | #define OSD_OP_FRONT_LEN 4096 | 16 | #define OSD_OP_FRONT_LEN 4096 |
17 | #define OSD_OPREPLY_FRONT_LEN 512 | 17 | #define OSD_OPREPLY_FRONT_LEN 512 |
18 | 18 | ||
19 | const static struct ceph_connection_operations osd_con_ops; | 19 | static const struct ceph_connection_operations osd_con_ops; |
20 | static int __kick_requests(struct ceph_osd_client *osdc, | 20 | static int __kick_requests(struct ceph_osd_client *osdc, |
21 | struct ceph_osd *kickosd); | 21 | struct ceph_osd *kickosd); |
22 | 22 | ||
@@ -147,7 +147,7 @@ struct ceph_osd_request *ceph_osdc_new_request(struct ceph_osd_client *osdc, | |||
147 | req = kzalloc(sizeof(*req), GFP_NOFS); | 147 | req = kzalloc(sizeof(*req), GFP_NOFS); |
148 | } | 148 | } |
149 | if (req == NULL) | 149 | if (req == NULL) |
150 | return ERR_PTR(-ENOMEM); | 150 | return NULL; |
151 | 151 | ||
152 | req->r_osdc = osdc; | 152 | req->r_osdc = osdc; |
153 | req->r_mempool = use_mempool; | 153 | req->r_mempool = use_mempool; |
@@ -164,10 +164,10 @@ struct ceph_osd_request *ceph_osdc_new_request(struct ceph_osd_client *osdc, | |||
164 | msg = ceph_msgpool_get(&osdc->msgpool_op_reply, 0); | 164 | msg = ceph_msgpool_get(&osdc->msgpool_op_reply, 0); |
165 | else | 165 | else |
166 | msg = ceph_msg_new(CEPH_MSG_OSD_OPREPLY, | 166 | msg = ceph_msg_new(CEPH_MSG_OSD_OPREPLY, |
167 | OSD_OPREPLY_FRONT_LEN, 0, 0, NULL); | 167 | OSD_OPREPLY_FRONT_LEN, GFP_NOFS); |
168 | if (IS_ERR(msg)) { | 168 | if (!msg) { |
169 | ceph_osdc_put_request(req); | 169 | ceph_osdc_put_request(req); |
170 | return ERR_PTR(PTR_ERR(msg)); | 170 | return NULL; |
171 | } | 171 | } |
172 | req->r_reply = msg; | 172 | req->r_reply = msg; |
173 | 173 | ||
@@ -178,10 +178,10 @@ struct ceph_osd_request *ceph_osdc_new_request(struct ceph_osd_client *osdc, | |||
178 | if (use_mempool) | 178 | if (use_mempool) |
179 | msg = ceph_msgpool_get(&osdc->msgpool_op, 0); | 179 | msg = ceph_msgpool_get(&osdc->msgpool_op, 0); |
180 | else | 180 | else |
181 | msg = ceph_msg_new(CEPH_MSG_OSD_OP, msg_size, 0, 0, NULL); | 181 | msg = ceph_msg_new(CEPH_MSG_OSD_OP, msg_size, GFP_NOFS); |
182 | if (IS_ERR(msg)) { | 182 | if (!msg) { |
183 | ceph_osdc_put_request(req); | 183 | ceph_osdc_put_request(req); |
184 | return ERR_PTR(PTR_ERR(msg)); | 184 | return NULL; |
185 | } | 185 | } |
186 | msg->hdr.type = cpu_to_le16(CEPH_MSG_OSD_OP); | 186 | msg->hdr.type = cpu_to_le16(CEPH_MSG_OSD_OP); |
187 | memset(msg->front.iov_base, 0, msg->front.iov_len); | 187 | memset(msg->front.iov_base, 0, msg->front.iov_len); |
@@ -715,7 +715,7 @@ static void handle_timeout(struct work_struct *work) | |||
715 | * should mark the osd as failed and we should find out about | 715 | * should mark the osd as failed and we should find out about |
716 | * it from an updated osd map. | 716 | * it from an updated osd map. |
717 | */ | 717 | */ |
718 | while (!list_empty(&osdc->req_lru)) { | 718 | while (timeout && !list_empty(&osdc->req_lru)) { |
719 | req = list_entry(osdc->req_lru.next, struct ceph_osd_request, | 719 | req = list_entry(osdc->req_lru.next, struct ceph_osd_request, |
720 | r_req_lru_item); | 720 | r_req_lru_item); |
721 | 721 | ||
@@ -1078,6 +1078,7 @@ done: | |||
1078 | if (newmap) | 1078 | if (newmap) |
1079 | kick_requests(osdc, NULL); | 1079 | kick_requests(osdc, NULL); |
1080 | up_read(&osdc->map_sem); | 1080 | up_read(&osdc->map_sem); |
1081 | wake_up(&osdc->client->auth_wq); | ||
1081 | return; | 1082 | return; |
1082 | 1083 | ||
1083 | bad: | 1084 | bad: |
@@ -1087,45 +1088,6 @@ bad: | |||
1087 | return; | 1088 | return; |
1088 | } | 1089 | } |
1089 | 1090 | ||
1090 | |||
1091 | /* | ||
1092 | * A read request prepares specific pages that data is to be read into. | ||
1093 | * When a message is being read off the wire, we call prepare_pages to | ||
1094 | * find those pages. | ||
1095 | * 0 = success, -1 failure. | ||
1096 | */ | ||
1097 | static int __prepare_pages(struct ceph_connection *con, | ||
1098 | struct ceph_msg_header *hdr, | ||
1099 | struct ceph_osd_request *req, | ||
1100 | u64 tid, | ||
1101 | struct ceph_msg *m) | ||
1102 | { | ||
1103 | struct ceph_osd *osd = con->private; | ||
1104 | struct ceph_osd_client *osdc; | ||
1105 | int ret = -1; | ||
1106 | int data_len = le32_to_cpu(hdr->data_len); | ||
1107 | unsigned data_off = le16_to_cpu(hdr->data_off); | ||
1108 | |||
1109 | int want = calc_pages_for(data_off & ~PAGE_MASK, data_len); | ||
1110 | |||
1111 | if (!osd) | ||
1112 | return -1; | ||
1113 | |||
1114 | osdc = osd->o_osdc; | ||
1115 | |||
1116 | dout("__prepare_pages on msg %p tid %llu, has %d pages, want %d\n", m, | ||
1117 | tid, req->r_num_pages, want); | ||
1118 | if (unlikely(req->r_num_pages < want)) | ||
1119 | goto out; | ||
1120 | m->pages = req->r_pages; | ||
1121 | m->nr_pages = req->r_num_pages; | ||
1122 | ret = 0; /* success */ | ||
1123 | out: | ||
1124 | BUG_ON(ret < 0 || m->nr_pages < want); | ||
1125 | |||
1126 | return ret; | ||
1127 | } | ||
1128 | |||
1129 | /* | 1091 | /* |
1130 | * Register request, send initial attempt. | 1092 | * Register request, send initial attempt. |
1131 | */ | 1093 | */ |
@@ -1252,11 +1214,13 @@ int ceph_osdc_init(struct ceph_osd_client *osdc, struct ceph_client *client) | |||
1252 | if (!osdc->req_mempool) | 1214 | if (!osdc->req_mempool) |
1253 | goto out; | 1215 | goto out; |
1254 | 1216 | ||
1255 | err = ceph_msgpool_init(&osdc->msgpool_op, OSD_OP_FRONT_LEN, 10, true); | 1217 | err = ceph_msgpool_init(&osdc->msgpool_op, OSD_OP_FRONT_LEN, 10, true, |
1218 | "osd_op"); | ||
1256 | if (err < 0) | 1219 | if (err < 0) |
1257 | goto out_mempool; | 1220 | goto out_mempool; |
1258 | err = ceph_msgpool_init(&osdc->msgpool_op_reply, | 1221 | err = ceph_msgpool_init(&osdc->msgpool_op_reply, |
1259 | OSD_OPREPLY_FRONT_LEN, 10, true); | 1222 | OSD_OPREPLY_FRONT_LEN, 10, true, |
1223 | "osd_op_reply"); | ||
1260 | if (err < 0) | 1224 | if (err < 0) |
1261 | goto out_msgpool; | 1225 | goto out_msgpool; |
1262 | return 0; | 1226 | return 0; |
@@ -1302,8 +1266,8 @@ int ceph_osdc_readpages(struct ceph_osd_client *osdc, | |||
1302 | CEPH_OSD_OP_READ, CEPH_OSD_FLAG_READ, | 1266 | CEPH_OSD_OP_READ, CEPH_OSD_FLAG_READ, |
1303 | NULL, 0, truncate_seq, truncate_size, NULL, | 1267 | NULL, 0, truncate_seq, truncate_size, NULL, |
1304 | false, 1); | 1268 | false, 1); |
1305 | if (IS_ERR(req)) | 1269 | if (!req) |
1306 | return PTR_ERR(req); | 1270 | return -ENOMEM; |
1307 | 1271 | ||
1308 | /* it may be a short read due to an object boundary */ | 1272 | /* it may be a short read due to an object boundary */ |
1309 | req->r_pages = pages; | 1273 | req->r_pages = pages; |
@@ -1345,8 +1309,8 @@ int ceph_osdc_writepages(struct ceph_osd_client *osdc, struct ceph_vino vino, | |||
1345 | snapc, do_sync, | 1309 | snapc, do_sync, |
1346 | truncate_seq, truncate_size, mtime, | 1310 | truncate_seq, truncate_size, mtime, |
1347 | nofail, 1); | 1311 | nofail, 1); |
1348 | if (IS_ERR(req)) | 1312 | if (!req) |
1349 | return PTR_ERR(req); | 1313 | return -ENOMEM; |
1350 | 1314 | ||
1351 | /* it may be a short write due to an object boundary */ | 1315 | /* it may be a short write due to an object boundary */ |
1352 | req->r_pages = pages; | 1316 | req->r_pages = pages; |
@@ -1394,7 +1358,8 @@ static void dispatch(struct ceph_connection *con, struct ceph_msg *msg) | |||
1394 | } | 1358 | } |
1395 | 1359 | ||
1396 | /* | 1360 | /* |
1397 | * lookup and return message for incoming reply | 1361 | * lookup and return message for incoming reply. set up reply message |
1362 | * pages. | ||
1398 | */ | 1363 | */ |
1399 | static struct ceph_msg *get_reply(struct ceph_connection *con, | 1364 | static struct ceph_msg *get_reply(struct ceph_connection *con, |
1400 | struct ceph_msg_header *hdr, | 1365 | struct ceph_msg_header *hdr, |
@@ -1407,7 +1372,6 @@ static struct ceph_msg *get_reply(struct ceph_connection *con, | |||
1407 | int front = le32_to_cpu(hdr->front_len); | 1372 | int front = le32_to_cpu(hdr->front_len); |
1408 | int data_len = le32_to_cpu(hdr->data_len); | 1373 | int data_len = le32_to_cpu(hdr->data_len); |
1409 | u64 tid; | 1374 | u64 tid; |
1410 | int err; | ||
1411 | 1375 | ||
1412 | tid = le64_to_cpu(hdr->tid); | 1376 | tid = le64_to_cpu(hdr->tid); |
1413 | mutex_lock(&osdc->request_mutex); | 1377 | mutex_lock(&osdc->request_mutex); |
@@ -1425,13 +1389,14 @@ static struct ceph_msg *get_reply(struct ceph_connection *con, | |||
1425 | req->r_reply, req->r_con_filling_msg); | 1389 | req->r_reply, req->r_con_filling_msg); |
1426 | ceph_con_revoke_message(req->r_con_filling_msg, req->r_reply); | 1390 | ceph_con_revoke_message(req->r_con_filling_msg, req->r_reply); |
1427 | ceph_con_put(req->r_con_filling_msg); | 1391 | ceph_con_put(req->r_con_filling_msg); |
1392 | req->r_con_filling_msg = NULL; | ||
1428 | } | 1393 | } |
1429 | 1394 | ||
1430 | if (front > req->r_reply->front.iov_len) { | 1395 | if (front > req->r_reply->front.iov_len) { |
1431 | pr_warning("get_reply front %d > preallocated %d\n", | 1396 | pr_warning("get_reply front %d > preallocated %d\n", |
1432 | front, (int)req->r_reply->front.iov_len); | 1397 | front, (int)req->r_reply->front.iov_len); |
1433 | m = ceph_msg_new(CEPH_MSG_OSD_OPREPLY, front, 0, 0, NULL); | 1398 | m = ceph_msg_new(CEPH_MSG_OSD_OPREPLY, front, GFP_NOFS); |
1434 | if (IS_ERR(m)) | 1399 | if (!m) |
1435 | goto out; | 1400 | goto out; |
1436 | ceph_msg_put(req->r_reply); | 1401 | ceph_msg_put(req->r_reply); |
1437 | req->r_reply = m; | 1402 | req->r_reply = m; |
@@ -1439,12 +1404,19 @@ static struct ceph_msg *get_reply(struct ceph_connection *con, | |||
1439 | m = ceph_msg_get(req->r_reply); | 1404 | m = ceph_msg_get(req->r_reply); |
1440 | 1405 | ||
1441 | if (data_len > 0) { | 1406 | if (data_len > 0) { |
1442 | err = __prepare_pages(con, hdr, req, tid, m); | 1407 | unsigned data_off = le16_to_cpu(hdr->data_off); |
1443 | if (err < 0) { | 1408 | int want = calc_pages_for(data_off & ~PAGE_MASK, data_len); |
1409 | |||
1410 | if (unlikely(req->r_num_pages < want)) { | ||
1411 | pr_warning("tid %lld reply %d > expected %d pages\n", | ||
1412 | tid, want, m->nr_pages); | ||
1444 | *skip = 1; | 1413 | *skip = 1; |
1445 | ceph_msg_put(m); | 1414 | ceph_msg_put(m); |
1446 | m = ERR_PTR(err); | 1415 | m = NULL; |
1416 | goto out; | ||
1447 | } | 1417 | } |
1418 | m->pages = req->r_pages; | ||
1419 | m->nr_pages = req->r_num_pages; | ||
1448 | } | 1420 | } |
1449 | *skip = 0; | 1421 | *skip = 0; |
1450 | req->r_con_filling_msg = ceph_con_get(con); | 1422 | req->r_con_filling_msg = ceph_con_get(con); |
@@ -1466,7 +1438,7 @@ static struct ceph_msg *alloc_msg(struct ceph_connection *con, | |||
1466 | 1438 | ||
1467 | switch (type) { | 1439 | switch (type) { |
1468 | case CEPH_MSG_OSD_MAP: | 1440 | case CEPH_MSG_OSD_MAP: |
1469 | return ceph_msg_new(type, front, 0, 0, NULL); | 1441 | return ceph_msg_new(type, front, GFP_NOFS); |
1470 | case CEPH_MSG_OSD_OPREPLY: | 1442 | case CEPH_MSG_OSD_OPREPLY: |
1471 | return get_reply(con, hdr, skip); | 1443 | return get_reply(con, hdr, skip); |
1472 | default: | 1444 | default: |
@@ -1552,7 +1524,7 @@ static int invalidate_authorizer(struct ceph_connection *con) | |||
1552 | return ceph_monc_validate_auth(&osdc->client->monc); | 1524 | return ceph_monc_validate_auth(&osdc->client->monc); |
1553 | } | 1525 | } |
1554 | 1526 | ||
1555 | const static struct ceph_connection_operations osd_con_ops = { | 1527 | static const struct ceph_connection_operations osd_con_ops = { |
1556 | .get = get_osd_con, | 1528 | .get = get_osd_con, |
1557 | .put = put_osd_con, | 1529 | .put = put_osd_con, |
1558 | .dispatch = dispatch, | 1530 | .dispatch = dispatch, |
diff --git a/fs/ceph/pagelist.c b/fs/ceph/pagelist.c index 5f8dbf7c745a..b6859f47d364 100644 --- a/fs/ceph/pagelist.c +++ b/fs/ceph/pagelist.c | |||
@@ -20,7 +20,7 @@ int ceph_pagelist_release(struct ceph_pagelist *pl) | |||
20 | 20 | ||
21 | static int ceph_pagelist_addpage(struct ceph_pagelist *pl) | 21 | static int ceph_pagelist_addpage(struct ceph_pagelist *pl) |
22 | { | 22 | { |
23 | struct page *page = alloc_page(GFP_NOFS); | 23 | struct page *page = __page_cache_alloc(GFP_NOFS); |
24 | if (!page) | 24 | if (!page) |
25 | return -ENOMEM; | 25 | return -ENOMEM; |
26 | pl->room += PAGE_SIZE; | 26 | pl->room += PAGE_SIZE; |
diff --git a/fs/ceph/rados.h b/fs/ceph/rados.h index fd56451a871f..8fcc023056c7 100644 --- a/fs/ceph/rados.h +++ b/fs/ceph/rados.h | |||
@@ -101,8 +101,8 @@ struct ceph_pg_pool { | |||
101 | __le64 snap_seq; /* seq for per-pool snapshot */ | 101 | __le64 snap_seq; /* seq for per-pool snapshot */ |
102 | __le32 snap_epoch; /* epoch of last snap */ | 102 | __le32 snap_epoch; /* epoch of last snap */ |
103 | __le32 num_snaps; | 103 | __le32 num_snaps; |
104 | __le32 num_removed_snap_intervals; | 104 | __le32 num_removed_snap_intervals; /* if non-empty, NO per-pool snaps */ |
105 | __le64 uid; | 105 | __le64 auid; /* who owns the pg */ |
106 | } __attribute__ ((packed)); | 106 | } __attribute__ ((packed)); |
107 | 107 | ||
108 | /* | 108 | /* |
@@ -208,6 +208,7 @@ enum { | |||
208 | /* read */ | 208 | /* read */ |
209 | CEPH_OSD_OP_GETXATTR = CEPH_OSD_OP_MODE_RD | CEPH_OSD_OP_TYPE_ATTR | 1, | 209 | CEPH_OSD_OP_GETXATTR = CEPH_OSD_OP_MODE_RD | CEPH_OSD_OP_TYPE_ATTR | 1, |
210 | CEPH_OSD_OP_GETXATTRS = CEPH_OSD_OP_MODE_RD | CEPH_OSD_OP_TYPE_ATTR | 2, | 210 | CEPH_OSD_OP_GETXATTRS = CEPH_OSD_OP_MODE_RD | CEPH_OSD_OP_TYPE_ATTR | 2, |
211 | CEPH_OSD_OP_CMPXATTR = CEPH_OSD_OP_MODE_RD | CEPH_OSD_OP_TYPE_ATTR | 3, | ||
211 | 212 | ||
212 | /* write */ | 213 | /* write */ |
213 | CEPH_OSD_OP_SETXATTR = CEPH_OSD_OP_MODE_WR | CEPH_OSD_OP_TYPE_ATTR | 1, | 214 | CEPH_OSD_OP_SETXATTR = CEPH_OSD_OP_MODE_WR | CEPH_OSD_OP_TYPE_ATTR | 1, |
@@ -305,6 +306,22 @@ enum { | |||
305 | #define EOLDSNAPC ERESTART /* ORDERSNAP flag set; writer has old snapc*/ | 306 | #define EOLDSNAPC ERESTART /* ORDERSNAP flag set; writer has old snapc*/ |
306 | #define EBLACKLISTED ESHUTDOWN /* blacklisted */ | 307 | #define EBLACKLISTED ESHUTDOWN /* blacklisted */ |
307 | 308 | ||
309 | /* xattr comparison */ | ||
310 | enum { | ||
311 | CEPH_OSD_CMPXATTR_OP_NOP = 0, | ||
312 | CEPH_OSD_CMPXATTR_OP_EQ = 1, | ||
313 | CEPH_OSD_CMPXATTR_OP_NE = 2, | ||
314 | CEPH_OSD_CMPXATTR_OP_GT = 3, | ||
315 | CEPH_OSD_CMPXATTR_OP_GTE = 4, | ||
316 | CEPH_OSD_CMPXATTR_OP_LT = 5, | ||
317 | CEPH_OSD_CMPXATTR_OP_LTE = 6 | ||
318 | }; | ||
319 | |||
320 | enum { | ||
321 | CEPH_OSD_CMPXATTR_MODE_STRING = 1, | ||
322 | CEPH_OSD_CMPXATTR_MODE_U64 = 2 | ||
323 | }; | ||
324 | |||
308 | /* | 325 | /* |
309 | * an individual object operation. each may be accompanied by some data | 326 | * an individual object operation. each may be accompanied by some data |
310 | * payload | 327 | * payload |
@@ -321,6 +338,8 @@ struct ceph_osd_op { | |||
321 | struct { | 338 | struct { |
322 | __le32 name_len; | 339 | __le32 name_len; |
323 | __le32 value_len; | 340 | __le32 value_len; |
341 | __u8 cmp_op; /* CEPH_OSD_CMPXATTR_OP_* */ | ||
342 | __u8 cmp_mode; /* CEPH_OSD_CMPXATTR_MODE_* */ | ||
324 | } __attribute__ ((packed)) xattr; | 343 | } __attribute__ ((packed)) xattr; |
325 | struct { | 344 | struct { |
326 | __u8 class_len; | 345 | __u8 class_len; |
diff --git a/fs/ceph/snap.c b/fs/ceph/snap.c index d5114db70453..c0b26b6badba 100644 --- a/fs/ceph/snap.c +++ b/fs/ceph/snap.c | |||
@@ -512,7 +512,7 @@ int __ceph_finish_cap_snap(struct ceph_inode_info *ci, | |||
512 | struct ceph_cap_snap *capsnap) | 512 | struct ceph_cap_snap *capsnap) |
513 | { | 513 | { |
514 | struct inode *inode = &ci->vfs_inode; | 514 | struct inode *inode = &ci->vfs_inode; |
515 | struct ceph_mds_client *mdsc = &ceph_client(inode->i_sb)->mdsc; | 515 | struct ceph_mds_client *mdsc = &ceph_sb_to_client(inode->i_sb)->mdsc; |
516 | 516 | ||
517 | BUG_ON(capsnap->writing); | 517 | BUG_ON(capsnap->writing); |
518 | capsnap->size = inode->i_size; | 518 | capsnap->size = inode->i_size; |
diff --git a/fs/ceph/super.c b/fs/ceph/super.c index 110857ba9269..7c663d9b9f81 100644 --- a/fs/ceph/super.c +++ b/fs/ceph/super.c | |||
@@ -8,14 +8,11 @@ | |||
8 | #include <linux/module.h> | 8 | #include <linux/module.h> |
9 | #include <linux/mount.h> | 9 | #include <linux/mount.h> |
10 | #include <linux/parser.h> | 10 | #include <linux/parser.h> |
11 | #include <linux/rwsem.h> | ||
12 | #include <linux/sched.h> | 11 | #include <linux/sched.h> |
13 | #include <linux/seq_file.h> | 12 | #include <linux/seq_file.h> |
14 | #include <linux/slab.h> | 13 | #include <linux/slab.h> |
15 | #include <linux/statfs.h> | 14 | #include <linux/statfs.h> |
16 | #include <linux/string.h> | 15 | #include <linux/string.h> |
17 | #include <linux/version.h> | ||
18 | #include <linux/vmalloc.h> | ||
19 | 16 | ||
20 | #include "decode.h" | 17 | #include "decode.h" |
21 | #include "super.h" | 18 | #include "super.h" |
@@ -107,12 +104,40 @@ static int ceph_statfs(struct dentry *dentry, struct kstatfs *buf) | |||
107 | static int ceph_syncfs(struct super_block *sb, int wait) | 104 | static int ceph_syncfs(struct super_block *sb, int wait) |
108 | { | 105 | { |
109 | dout("sync_fs %d\n", wait); | 106 | dout("sync_fs %d\n", wait); |
110 | ceph_osdc_sync(&ceph_client(sb)->osdc); | 107 | ceph_osdc_sync(&ceph_sb_to_client(sb)->osdc); |
111 | ceph_mdsc_sync(&ceph_client(sb)->mdsc); | 108 | ceph_mdsc_sync(&ceph_sb_to_client(sb)->mdsc); |
112 | dout("sync_fs %d done\n", wait); | 109 | dout("sync_fs %d done\n", wait); |
113 | return 0; | 110 | return 0; |
114 | } | 111 | } |
115 | 112 | ||
113 | static int default_congestion_kb(void) | ||
114 | { | ||
115 | int congestion_kb; | ||
116 | |||
117 | /* | ||
118 | * Copied from NFS | ||
119 | * | ||
120 | * congestion size, scale with available memory. | ||
121 | * | ||
122 | * 64MB: 8192k | ||
123 | * 128MB: 11585k | ||
124 | * 256MB: 16384k | ||
125 | * 512MB: 23170k | ||
126 | * 1GB: 32768k | ||
127 | * 2GB: 46340k | ||
128 | * 4GB: 65536k | ||
129 | * 8GB: 92681k | ||
130 | * 16GB: 131072k | ||
131 | * | ||
132 | * This allows larger machines to have larger/more transfers. | ||
133 | * Limit the default to 256M | ||
134 | */ | ||
135 | congestion_kb = (16*int_sqrt(totalram_pages)) << (PAGE_SHIFT-10); | ||
136 | if (congestion_kb > 256*1024) | ||
137 | congestion_kb = 256*1024; | ||
138 | |||
139 | return congestion_kb; | ||
140 | } | ||
116 | 141 | ||
117 | /** | 142 | /** |
118 | * ceph_show_options - Show mount options in /proc/mounts | 143 | * ceph_show_options - Show mount options in /proc/mounts |
@@ -138,6 +163,35 @@ static int ceph_show_options(struct seq_file *m, struct vfsmount *mnt) | |||
138 | seq_puts(m, ",nocrc"); | 163 | seq_puts(m, ",nocrc"); |
139 | if (args->flags & CEPH_OPT_NOASYNCREADDIR) | 164 | if (args->flags & CEPH_OPT_NOASYNCREADDIR) |
140 | seq_puts(m, ",noasyncreaddir"); | 165 | seq_puts(m, ",noasyncreaddir"); |
166 | |||
167 | if (args->mount_timeout != CEPH_MOUNT_TIMEOUT_DEFAULT) | ||
168 | seq_printf(m, ",mount_timeout=%d", args->mount_timeout); | ||
169 | if (args->osd_idle_ttl != CEPH_OSD_IDLE_TTL_DEFAULT) | ||
170 | seq_printf(m, ",osd_idle_ttl=%d", args->osd_idle_ttl); | ||
171 | if (args->osd_timeout != CEPH_OSD_TIMEOUT_DEFAULT) | ||
172 | seq_printf(m, ",osdtimeout=%d", args->osd_timeout); | ||
173 | if (args->osd_keepalive_timeout != CEPH_OSD_KEEPALIVE_DEFAULT) | ||
174 | seq_printf(m, ",osdkeepalivetimeout=%d", | ||
175 | args->osd_keepalive_timeout); | ||
176 | if (args->wsize) | ||
177 | seq_printf(m, ",wsize=%d", args->wsize); | ||
178 | if (args->rsize != CEPH_MOUNT_RSIZE_DEFAULT) | ||
179 | seq_printf(m, ",rsize=%d", args->rsize); | ||
180 | if (args->congestion_kb != default_congestion_kb()) | ||
181 | seq_printf(m, ",write_congestion_kb=%d", args->congestion_kb); | ||
182 | if (args->caps_wanted_delay_min != CEPH_CAPS_WANTED_DELAY_MIN_DEFAULT) | ||
183 | seq_printf(m, ",caps_wanted_delay_min=%d", | ||
184 | args->caps_wanted_delay_min); | ||
185 | if (args->caps_wanted_delay_max != CEPH_CAPS_WANTED_DELAY_MAX_DEFAULT) | ||
186 | seq_printf(m, ",caps_wanted_delay_max=%d", | ||
187 | args->caps_wanted_delay_max); | ||
188 | if (args->cap_release_safety != CEPH_CAP_RELEASE_SAFETY_DEFAULT) | ||
189 | seq_printf(m, ",cap_release_safety=%d", | ||
190 | args->cap_release_safety); | ||
191 | if (args->max_readdir != CEPH_MAX_READDIR_DEFAULT) | ||
192 | seq_printf(m, ",readdir_max_entries=%d", args->max_readdir); | ||
193 | if (args->max_readdir_bytes != CEPH_MAX_READDIR_BYTES_DEFAULT) | ||
194 | seq_printf(m, ",readdir_max_bytes=%d", args->max_readdir_bytes); | ||
141 | if (strcmp(args->snapdir_name, CEPH_SNAPDIRNAME_DEFAULT)) | 195 | if (strcmp(args->snapdir_name, CEPH_SNAPDIRNAME_DEFAULT)) |
142 | seq_printf(m, ",snapdirname=%s", args->snapdir_name); | 196 | seq_printf(m, ",snapdirname=%s", args->snapdir_name); |
143 | if (args->name) | 197 | if (args->name) |
@@ -161,35 +215,6 @@ static void ceph_inode_init_once(void *foo) | |||
161 | inode_init_once(&ci->vfs_inode); | 215 | inode_init_once(&ci->vfs_inode); |
162 | } | 216 | } |
163 | 217 | ||
164 | static int default_congestion_kb(void) | ||
165 | { | ||
166 | int congestion_kb; | ||
167 | |||
168 | /* | ||
169 | * Copied from NFS | ||
170 | * | ||
171 | * congestion size, scale with available memory. | ||
172 | * | ||
173 | * 64MB: 8192k | ||
174 | * 128MB: 11585k | ||
175 | * 256MB: 16384k | ||
176 | * 512MB: 23170k | ||
177 | * 1GB: 32768k | ||
178 | * 2GB: 46340k | ||
179 | * 4GB: 65536k | ||
180 | * 8GB: 92681k | ||
181 | * 16GB: 131072k | ||
182 | * | ||
183 | * This allows larger machines to have larger/more transfers. | ||
184 | * Limit the default to 256M | ||
185 | */ | ||
186 | congestion_kb = (16*int_sqrt(totalram_pages)) << (PAGE_SHIFT-10); | ||
187 | if (congestion_kb > 256*1024) | ||
188 | congestion_kb = 256*1024; | ||
189 | |||
190 | return congestion_kb; | ||
191 | } | ||
192 | |||
193 | static int __init init_caches(void) | 218 | static int __init init_caches(void) |
194 | { | 219 | { |
195 | ceph_inode_cachep = kmem_cache_create("ceph_inode_info", | 220 | ceph_inode_cachep = kmem_cache_create("ceph_inode_info", |
@@ -308,7 +333,9 @@ enum { | |||
308 | Opt_osd_idle_ttl, | 333 | Opt_osd_idle_ttl, |
309 | Opt_caps_wanted_delay_min, | 334 | Opt_caps_wanted_delay_min, |
310 | Opt_caps_wanted_delay_max, | 335 | Opt_caps_wanted_delay_max, |
336 | Opt_cap_release_safety, | ||
311 | Opt_readdir_max_entries, | 337 | Opt_readdir_max_entries, |
338 | Opt_readdir_max_bytes, | ||
312 | Opt_congestion_kb, | 339 | Opt_congestion_kb, |
313 | Opt_last_int, | 340 | Opt_last_int, |
314 | /* int args above */ | 341 | /* int args above */ |
@@ -339,7 +366,9 @@ static match_table_t arg_tokens = { | |||
339 | {Opt_osd_idle_ttl, "osd_idle_ttl=%d"}, | 366 | {Opt_osd_idle_ttl, "osd_idle_ttl=%d"}, |
340 | {Opt_caps_wanted_delay_min, "caps_wanted_delay_min=%d"}, | 367 | {Opt_caps_wanted_delay_min, "caps_wanted_delay_min=%d"}, |
341 | {Opt_caps_wanted_delay_max, "caps_wanted_delay_max=%d"}, | 368 | {Opt_caps_wanted_delay_max, "caps_wanted_delay_max=%d"}, |
369 | {Opt_cap_release_safety, "cap_release_safety=%d"}, | ||
342 | {Opt_readdir_max_entries, "readdir_max_entries=%d"}, | 370 | {Opt_readdir_max_entries, "readdir_max_entries=%d"}, |
371 | {Opt_readdir_max_bytes, "readdir_max_bytes=%d"}, | ||
343 | {Opt_congestion_kb, "write_congestion_kb=%d"}, | 372 | {Opt_congestion_kb, "write_congestion_kb=%d"}, |
344 | /* int args above */ | 373 | /* int args above */ |
345 | {Opt_snapdirname, "snapdirname=%s"}, | 374 | {Opt_snapdirname, "snapdirname=%s"}, |
@@ -388,8 +417,9 @@ static struct ceph_mount_args *parse_mount_args(int flags, char *options, | |||
388 | args->caps_wanted_delay_max = CEPH_CAPS_WANTED_DELAY_MAX_DEFAULT; | 417 | args->caps_wanted_delay_max = CEPH_CAPS_WANTED_DELAY_MAX_DEFAULT; |
389 | args->rsize = CEPH_MOUNT_RSIZE_DEFAULT; | 418 | args->rsize = CEPH_MOUNT_RSIZE_DEFAULT; |
390 | args->snapdir_name = kstrdup(CEPH_SNAPDIRNAME_DEFAULT, GFP_KERNEL); | 419 | args->snapdir_name = kstrdup(CEPH_SNAPDIRNAME_DEFAULT, GFP_KERNEL); |
391 | args->cap_release_safety = CEPH_CAPS_PER_RELEASE * 4; | 420 | args->cap_release_safety = CEPH_CAP_RELEASE_SAFETY_DEFAULT; |
392 | args->max_readdir = 1024; | 421 | args->max_readdir = CEPH_MAX_READDIR_DEFAULT; |
422 | args->max_readdir_bytes = CEPH_MAX_READDIR_BYTES_DEFAULT; | ||
393 | args->congestion_kb = default_congestion_kb(); | 423 | args->congestion_kb = default_congestion_kb(); |
394 | 424 | ||
395 | /* ip1[:port1][,ip2[:port2]...]:/subdir/in/fs */ | 425 | /* ip1[:port1][,ip2[:port2]...]:/subdir/in/fs */ |
@@ -497,6 +527,9 @@ static struct ceph_mount_args *parse_mount_args(int flags, char *options, | |||
497 | case Opt_readdir_max_entries: | 527 | case Opt_readdir_max_entries: |
498 | args->max_readdir = intval; | 528 | args->max_readdir = intval; |
499 | break; | 529 | break; |
530 | case Opt_readdir_max_bytes: | ||
531 | args->max_readdir_bytes = intval; | ||
532 | break; | ||
500 | case Opt_congestion_kb: | 533 | case Opt_congestion_kb: |
501 | args->congestion_kb = intval; | 534 | args->congestion_kb = intval; |
502 | break; | 535 | break; |
@@ -682,9 +715,10 @@ int ceph_check_fsid(struct ceph_client *client, struct ceph_fsid *fsid) | |||
682 | /* | 715 | /* |
683 | * true if we have the mon map (and have thus joined the cluster) | 716 | * true if we have the mon map (and have thus joined the cluster) |
684 | */ | 717 | */ |
685 | static int have_mon_map(struct ceph_client *client) | 718 | static int have_mon_and_osd_map(struct ceph_client *client) |
686 | { | 719 | { |
687 | return client->monc.monmap && client->monc.monmap->epoch; | 720 | return client->monc.monmap && client->monc.monmap->epoch && |
721 | client->osdc.osdmap && client->osdc.osdmap->epoch; | ||
688 | } | 722 | } |
689 | 723 | ||
690 | /* | 724 | /* |
@@ -762,7 +796,7 @@ static int ceph_mount(struct ceph_client *client, struct vfsmount *mnt, | |||
762 | if (err < 0) | 796 | if (err < 0) |
763 | goto out; | 797 | goto out; |
764 | 798 | ||
765 | while (!have_mon_map(client)) { | 799 | while (!have_mon_and_osd_map(client)) { |
766 | err = -EIO; | 800 | err = -EIO; |
767 | if (timeout && time_after_eq(jiffies, started + timeout)) | 801 | if (timeout && time_after_eq(jiffies, started + timeout)) |
768 | goto out; | 802 | goto out; |
@@ -770,8 +804,8 @@ static int ceph_mount(struct ceph_client *client, struct vfsmount *mnt, | |||
770 | /* wait */ | 804 | /* wait */ |
771 | dout("mount waiting for mon_map\n"); | 805 | dout("mount waiting for mon_map\n"); |
772 | err = wait_event_interruptible_timeout(client->auth_wq, | 806 | err = wait_event_interruptible_timeout(client->auth_wq, |
773 | have_mon_map(client) || (client->auth_err < 0), | 807 | have_mon_and_osd_map(client) || (client->auth_err < 0), |
774 | timeout); | 808 | timeout); |
775 | if (err == -EINTR || err == -ERESTARTSYS) | 809 | if (err == -EINTR || err == -ERESTARTSYS) |
776 | goto out; | 810 | goto out; |
777 | if (client->auth_err < 0) { | 811 | if (client->auth_err < 0) { |
@@ -884,6 +918,8 @@ static int ceph_compare_super(struct super_block *sb, void *data) | |||
884 | /* | 918 | /* |
885 | * construct our own bdi so we can control readahead, etc. | 919 | * construct our own bdi so we can control readahead, etc. |
886 | */ | 920 | */ |
921 | static atomic_long_t bdi_seq = ATOMIC_INIT(0); | ||
922 | |||
887 | static int ceph_register_bdi(struct super_block *sb, struct ceph_client *client) | 923 | static int ceph_register_bdi(struct super_block *sb, struct ceph_client *client) |
888 | { | 924 | { |
889 | int err; | 925 | int err; |
@@ -893,7 +929,8 @@ static int ceph_register_bdi(struct super_block *sb, struct ceph_client *client) | |||
893 | client->backing_dev_info.ra_pages = | 929 | client->backing_dev_info.ra_pages = |
894 | (client->mount_args->rsize + PAGE_CACHE_SIZE - 1) | 930 | (client->mount_args->rsize + PAGE_CACHE_SIZE - 1) |
895 | >> PAGE_SHIFT; | 931 | >> PAGE_SHIFT; |
896 | err = bdi_register_dev(&client->backing_dev_info, sb->s_dev); | 932 | err = bdi_register(&client->backing_dev_info, NULL, "ceph-%d", |
933 | atomic_long_inc_return(&bdi_seq)); | ||
897 | if (!err) | 934 | if (!err) |
898 | sb->s_bdi = &client->backing_dev_info; | 935 | sb->s_bdi = &client->backing_dev_info; |
899 | return err; | 936 | return err; |
@@ -932,9 +969,9 @@ static int ceph_get_sb(struct file_system_type *fs_type, | |||
932 | goto out; | 969 | goto out; |
933 | } | 970 | } |
934 | 971 | ||
935 | if (ceph_client(sb) != client) { | 972 | if (ceph_sb_to_client(sb) != client) { |
936 | ceph_destroy_client(client); | 973 | ceph_destroy_client(client); |
937 | client = ceph_client(sb); | 974 | client = ceph_sb_to_client(sb); |
938 | dout("get_sb got existing client %p\n", client); | 975 | dout("get_sb got existing client %p\n", client); |
939 | } else { | 976 | } else { |
940 | dout("get_sb using new client %p\n", client); | 977 | dout("get_sb using new client %p\n", client); |
@@ -952,8 +989,7 @@ static int ceph_get_sb(struct file_system_type *fs_type, | |||
952 | 989 | ||
953 | out_splat: | 990 | out_splat: |
954 | ceph_mdsc_close_sessions(&client->mdsc); | 991 | ceph_mdsc_close_sessions(&client->mdsc); |
955 | up_write(&sb->s_umount); | 992 | deactivate_locked_super(sb); |
956 | deactivate_super(sb); | ||
957 | goto out_final; | 993 | goto out_final; |
958 | 994 | ||
959 | out: | 995 | out: |
diff --git a/fs/ceph/super.h b/fs/ceph/super.h index 13513b80d87f..3725c9ee9d08 100644 --- a/fs/ceph/super.h +++ b/fs/ceph/super.h | |||
@@ -52,24 +52,25 @@ | |||
52 | 52 | ||
53 | struct ceph_mount_args { | 53 | struct ceph_mount_args { |
54 | int sb_flags; | 54 | int sb_flags; |
55 | int flags; | ||
56 | struct ceph_fsid fsid; | ||
57 | struct ceph_entity_addr my_addr; | ||
55 | int num_mon; | 58 | int num_mon; |
56 | struct ceph_entity_addr *mon_addr; | 59 | struct ceph_entity_addr *mon_addr; |
57 | int flags; | ||
58 | int mount_timeout; | 60 | int mount_timeout; |
59 | int osd_idle_ttl; | 61 | int osd_idle_ttl; |
60 | int caps_wanted_delay_min, caps_wanted_delay_max; | ||
61 | struct ceph_fsid fsid; | ||
62 | struct ceph_entity_addr my_addr; | ||
63 | int wsize; | ||
64 | int rsize; /* max readahead */ | ||
65 | int max_readdir; /* max readdir size */ | ||
66 | int congestion_kb; /* max readdir size */ | ||
67 | int osd_timeout; | 62 | int osd_timeout; |
68 | int osd_keepalive_timeout; | 63 | int osd_keepalive_timeout; |
64 | int wsize; | ||
65 | int rsize; /* max readahead */ | ||
66 | int congestion_kb; /* max writeback in flight */ | ||
67 | int caps_wanted_delay_min, caps_wanted_delay_max; | ||
68 | int cap_release_safety; | ||
69 | int max_readdir; /* max readdir result (entires) */ | ||
70 | int max_readdir_bytes; /* max readdir result (bytes) */ | ||
69 | char *snapdir_name; /* default ".snap" */ | 71 | char *snapdir_name; /* default ".snap" */ |
70 | char *name; | 72 | char *name; |
71 | char *secret; | 73 | char *secret; |
72 | int cap_release_safety; | ||
73 | }; | 74 | }; |
74 | 75 | ||
75 | /* | 76 | /* |
@@ -80,13 +81,14 @@ struct ceph_mount_args { | |||
80 | #define CEPH_OSD_KEEPALIVE_DEFAULT 5 | 81 | #define CEPH_OSD_KEEPALIVE_DEFAULT 5 |
81 | #define CEPH_OSD_IDLE_TTL_DEFAULT 60 | 82 | #define CEPH_OSD_IDLE_TTL_DEFAULT 60 |
82 | #define CEPH_MOUNT_RSIZE_DEFAULT (512*1024) /* readahead */ | 83 | #define CEPH_MOUNT_RSIZE_DEFAULT (512*1024) /* readahead */ |
84 | #define CEPH_MAX_READDIR_DEFAULT 1024 | ||
85 | #define CEPH_MAX_READDIR_BYTES_DEFAULT (512*1024) | ||
83 | 86 | ||
84 | #define CEPH_MSG_MAX_FRONT_LEN (16*1024*1024) | 87 | #define CEPH_MSG_MAX_FRONT_LEN (16*1024*1024) |
85 | #define CEPH_MSG_MAX_DATA_LEN (16*1024*1024) | 88 | #define CEPH_MSG_MAX_DATA_LEN (16*1024*1024) |
86 | 89 | ||
87 | #define CEPH_SNAPDIRNAME_DEFAULT ".snap" | 90 | #define CEPH_SNAPDIRNAME_DEFAULT ".snap" |
88 | #define CEPH_AUTH_NAME_DEFAULT "guest" | 91 | #define CEPH_AUTH_NAME_DEFAULT "guest" |
89 | |||
90 | /* | 92 | /* |
91 | * Delay telling the MDS we no longer want caps, in case we reopen | 93 | * Delay telling the MDS we no longer want caps, in case we reopen |
92 | * the file. Delay a minimum amount of time, even if we send a cap | 94 | * the file. Delay a minimum amount of time, even if we send a cap |
@@ -96,6 +98,7 @@ struct ceph_mount_args { | |||
96 | #define CEPH_CAPS_WANTED_DELAY_MIN_DEFAULT 5 /* cap release delay */ | 98 | #define CEPH_CAPS_WANTED_DELAY_MIN_DEFAULT 5 /* cap release delay */ |
97 | #define CEPH_CAPS_WANTED_DELAY_MAX_DEFAULT 60 /* cap release delay */ | 99 | #define CEPH_CAPS_WANTED_DELAY_MAX_DEFAULT 60 /* cap release delay */ |
98 | 100 | ||
101 | #define CEPH_CAP_RELEASE_SAFETY_DEFAULT (CEPH_CAPS_PER_RELEASE * 4) | ||
99 | 102 | ||
100 | /* mount state */ | 103 | /* mount state */ |
101 | enum { | 104 | enum { |
@@ -160,12 +163,6 @@ struct ceph_client { | |||
160 | #endif | 163 | #endif |
161 | }; | 164 | }; |
162 | 165 | ||
163 | static inline struct ceph_client *ceph_client(struct super_block *sb) | ||
164 | { | ||
165 | return sb->s_fs_info; | ||
166 | } | ||
167 | |||
168 | |||
169 | /* | 166 | /* |
170 | * File i/o capability. This tracks shared state with the metadata | 167 | * File i/o capability. This tracks shared state with the metadata |
171 | * server that allows us to cache or writeback attributes or to read | 168 | * server that allows us to cache or writeback attributes or to read |
@@ -871,6 +868,7 @@ extern struct dentry *ceph_finish_lookup(struct ceph_mds_request *req, | |||
871 | extern void ceph_dentry_lru_add(struct dentry *dn); | 868 | extern void ceph_dentry_lru_add(struct dentry *dn); |
872 | extern void ceph_dentry_lru_touch(struct dentry *dn); | 869 | extern void ceph_dentry_lru_touch(struct dentry *dn); |
873 | extern void ceph_dentry_lru_del(struct dentry *dn); | 870 | extern void ceph_dentry_lru_del(struct dentry *dn); |
871 | extern void ceph_invalidate_dentry_lease(struct dentry *dentry); | ||
874 | 872 | ||
875 | /* | 873 | /* |
876 | * our d_ops vary depending on whether the inode is live, | 874 | * our d_ops vary depending on whether the inode is live, |
diff --git a/fs/ceph/xattr.c b/fs/ceph/xattr.c index 2845422907fc..68aeebc69681 100644 --- a/fs/ceph/xattr.c +++ b/fs/ceph/xattr.c | |||
@@ -7,7 +7,8 @@ | |||
7 | 7 | ||
8 | static bool ceph_is_valid_xattr(const char *name) | 8 | static bool ceph_is_valid_xattr(const char *name) |
9 | { | 9 | { |
10 | return !strncmp(name, XATTR_SECURITY_PREFIX, | 10 | return !strncmp(name, "ceph.", 5) || |
11 | !strncmp(name, XATTR_SECURITY_PREFIX, | ||
11 | XATTR_SECURITY_PREFIX_LEN) || | 12 | XATTR_SECURITY_PREFIX_LEN) || |
12 | !strncmp(name, XATTR_TRUSTED_PREFIX, XATTR_TRUSTED_PREFIX_LEN) || | 13 | !strncmp(name, XATTR_TRUSTED_PREFIX, XATTR_TRUSTED_PREFIX_LEN) || |
13 | !strncmp(name, XATTR_USER_PREFIX, XATTR_USER_PREFIX_LEN); | 14 | !strncmp(name, XATTR_USER_PREFIX, XATTR_USER_PREFIX_LEN); |
@@ -76,14 +77,14 @@ static size_t ceph_vxattrcb_rctime(struct ceph_inode_info *ci, char *val, | |||
76 | } | 77 | } |
77 | 78 | ||
78 | static struct ceph_vxattr_cb ceph_dir_vxattrs[] = { | 79 | static struct ceph_vxattr_cb ceph_dir_vxattrs[] = { |
79 | { true, "user.ceph.dir.entries", ceph_vxattrcb_entries}, | 80 | { true, "ceph.dir.entries", ceph_vxattrcb_entries}, |
80 | { true, "user.ceph.dir.files", ceph_vxattrcb_files}, | 81 | { true, "ceph.dir.files", ceph_vxattrcb_files}, |
81 | { true, "user.ceph.dir.subdirs", ceph_vxattrcb_subdirs}, | 82 | { true, "ceph.dir.subdirs", ceph_vxattrcb_subdirs}, |
82 | { true, "user.ceph.dir.rentries", ceph_vxattrcb_rentries}, | 83 | { true, "ceph.dir.rentries", ceph_vxattrcb_rentries}, |
83 | { true, "user.ceph.dir.rfiles", ceph_vxattrcb_rfiles}, | 84 | { true, "ceph.dir.rfiles", ceph_vxattrcb_rfiles}, |
84 | { true, "user.ceph.dir.rsubdirs", ceph_vxattrcb_rsubdirs}, | 85 | { true, "ceph.dir.rsubdirs", ceph_vxattrcb_rsubdirs}, |
85 | { true, "user.ceph.dir.rbytes", ceph_vxattrcb_rbytes}, | 86 | { true, "ceph.dir.rbytes", ceph_vxattrcb_rbytes}, |
86 | { true, "user.ceph.dir.rctime", ceph_vxattrcb_rctime}, | 87 | { true, "ceph.dir.rctime", ceph_vxattrcb_rctime}, |
87 | { true, NULL, NULL } | 88 | { true, NULL, NULL } |
88 | }; | 89 | }; |
89 | 90 | ||
@@ -107,7 +108,7 @@ static size_t ceph_vxattrcb_layout(struct ceph_inode_info *ci, char *val, | |||
107 | } | 108 | } |
108 | 109 | ||
109 | static struct ceph_vxattr_cb ceph_file_vxattrs[] = { | 110 | static struct ceph_vxattr_cb ceph_file_vxattrs[] = { |
110 | { true, "user.ceph.layout", ceph_vxattrcb_layout}, | 111 | { true, "ceph.layout", ceph_vxattrcb_layout}, |
111 | { NULL, NULL } | 112 | { NULL, NULL } |
112 | }; | 113 | }; |
113 | 114 | ||
@@ -186,12 +187,6 @@ static int __set_xattr(struct ceph_inode_info *ci, | |||
186 | ci->i_xattrs.names_size -= xattr->name_len; | 187 | ci->i_xattrs.names_size -= xattr->name_len; |
187 | ci->i_xattrs.vals_size -= xattr->val_len; | 188 | ci->i_xattrs.vals_size -= xattr->val_len; |
188 | } | 189 | } |
189 | if (!xattr) { | ||
190 | pr_err("__set_xattr ENOMEM on %p %llx.%llx xattr %s=%s\n", | ||
191 | &ci->vfs_inode, ceph_vinop(&ci->vfs_inode), name, | ||
192 | xattr->val); | ||
193 | return -ENOMEM; | ||
194 | } | ||
195 | ci->i_xattrs.names_size += name_len; | 190 | ci->i_xattrs.names_size += name_len; |
196 | ci->i_xattrs.vals_size += val_len; | 191 | ci->i_xattrs.vals_size += val_len; |
197 | if (val) | 192 | if (val) |
@@ -574,7 +569,7 @@ ssize_t ceph_listxattr(struct dentry *dentry, char *names, size_t size) | |||
574 | ci->i_xattrs.version, ci->i_xattrs.index_version); | 569 | ci->i_xattrs.version, ci->i_xattrs.index_version); |
575 | 570 | ||
576 | if (__ceph_caps_issued_mask(ci, CEPH_CAP_XATTR_SHARED, 1) && | 571 | if (__ceph_caps_issued_mask(ci, CEPH_CAP_XATTR_SHARED, 1) && |
577 | (ci->i_xattrs.index_version > ci->i_xattrs.version)) { | 572 | (ci->i_xattrs.index_version >= ci->i_xattrs.version)) { |
578 | goto list_xattr; | 573 | goto list_xattr; |
579 | } else { | 574 | } else { |
580 | spin_unlock(&inode->i_lock); | 575 | spin_unlock(&inode->i_lock); |
@@ -622,7 +617,7 @@ out: | |||
622 | static int ceph_sync_setxattr(struct dentry *dentry, const char *name, | 617 | static int ceph_sync_setxattr(struct dentry *dentry, const char *name, |
623 | const char *value, size_t size, int flags) | 618 | const char *value, size_t size, int flags) |
624 | { | 619 | { |
625 | struct ceph_client *client = ceph_client(dentry->d_sb); | 620 | struct ceph_client *client = ceph_sb_to_client(dentry->d_sb); |
626 | struct inode *inode = dentry->d_inode; | 621 | struct inode *inode = dentry->d_inode; |
627 | struct ceph_inode_info *ci = ceph_inode(inode); | 622 | struct ceph_inode_info *ci = ceph_inode(inode); |
628 | struct inode *parent_inode = dentry->d_parent->d_inode; | 623 | struct inode *parent_inode = dentry->d_parent->d_inode; |
@@ -641,7 +636,7 @@ static int ceph_sync_setxattr(struct dentry *dentry, const char *name, | |||
641 | return -ENOMEM; | 636 | return -ENOMEM; |
642 | err = -ENOMEM; | 637 | err = -ENOMEM; |
643 | for (i = 0; i < nr_pages; i++) { | 638 | for (i = 0; i < nr_pages; i++) { |
644 | pages[i] = alloc_page(GFP_NOFS); | 639 | pages[i] = __page_cache_alloc(GFP_NOFS); |
645 | if (!pages[i]) { | 640 | if (!pages[i]) { |
646 | nr_pages = i; | 641 | nr_pages = i; |
647 | goto out; | 642 | goto out; |
@@ -779,7 +774,7 @@ out: | |||
779 | 774 | ||
780 | static int ceph_send_removexattr(struct dentry *dentry, const char *name) | 775 | static int ceph_send_removexattr(struct dentry *dentry, const char *name) |
781 | { | 776 | { |
782 | struct ceph_client *client = ceph_client(dentry->d_sb); | 777 | struct ceph_client *client = ceph_sb_to_client(dentry->d_sb); |
783 | struct ceph_mds_client *mdsc = &client->mdsc; | 778 | struct ceph_mds_client *mdsc = &client->mdsc; |
784 | struct inode *inode = dentry->d_inode; | 779 | struct inode *inode = dentry->d_inode; |
785 | struct inode *parent_inode = dentry->d_parent->d_inode; | 780 | struct inode *parent_inode = dentry->d_parent->d_inode; |
diff --git a/fs/coda/file.c b/fs/coda/file.c index 4c813f2cdc52..7196077b1688 100644 --- a/fs/coda/file.c +++ b/fs/coda/file.c | |||
@@ -217,7 +217,7 @@ int coda_fsync(struct file *coda_file, struct dentry *coda_dentry, int datasync) | |||
217 | BUG_ON(!cfi || cfi->cfi_magic != CODA_MAGIC); | 217 | BUG_ON(!cfi || cfi->cfi_magic != CODA_MAGIC); |
218 | host_file = cfi->cfi_container; | 218 | host_file = cfi->cfi_container; |
219 | 219 | ||
220 | err = vfs_fsync(host_file, host_file->f_path.dentry, datasync); | 220 | err = vfs_fsync(host_file, datasync); |
221 | if ( !err && !datasync ) { | 221 | if ( !err && !datasync ) { |
222 | lock_kernel(); | 222 | lock_kernel(); |
223 | err = venus_fsync(coda_inode->i_sb, coda_i2f(coda_inode)); | 223 | err = venus_fsync(coda_inode->i_sb, coda_i2f(coda_inode)); |
diff --git a/fs/dcache.c b/fs/dcache.c index f1358e5c3a59..d96047b4a633 100644 --- a/fs/dcache.c +++ b/fs/dcache.c | |||
@@ -536,7 +536,7 @@ restart: | |||
536 | */ | 536 | */ |
537 | static void prune_dcache(int count) | 537 | static void prune_dcache(int count) |
538 | { | 538 | { |
539 | struct super_block *sb; | 539 | struct super_block *sb, *n; |
540 | int w_count; | 540 | int w_count; |
541 | int unused = dentry_stat.nr_unused; | 541 | int unused = dentry_stat.nr_unused; |
542 | int prune_ratio; | 542 | int prune_ratio; |
@@ -545,13 +545,14 @@ static void prune_dcache(int count) | |||
545 | if (unused == 0 || count == 0) | 545 | if (unused == 0 || count == 0) |
546 | return; | 546 | return; |
547 | spin_lock(&dcache_lock); | 547 | spin_lock(&dcache_lock); |
548 | restart: | ||
549 | if (count >= unused) | 548 | if (count >= unused) |
550 | prune_ratio = 1; | 549 | prune_ratio = 1; |
551 | else | 550 | else |
552 | prune_ratio = unused / count; | 551 | prune_ratio = unused / count; |
553 | spin_lock(&sb_lock); | 552 | spin_lock(&sb_lock); |
554 | list_for_each_entry(sb, &super_blocks, s_list) { | 553 | list_for_each_entry_safe(sb, n, &super_blocks, s_list) { |
554 | if (list_empty(&sb->s_instances)) | ||
555 | continue; | ||
555 | if (sb->s_nr_dentry_unused == 0) | 556 | if (sb->s_nr_dentry_unused == 0) |
556 | continue; | 557 | continue; |
557 | sb->s_count++; | 558 | sb->s_count++; |
@@ -590,14 +591,10 @@ restart: | |||
590 | } | 591 | } |
591 | spin_lock(&sb_lock); | 592 | spin_lock(&sb_lock); |
592 | count -= pruned; | 593 | count -= pruned; |
593 | /* | 594 | __put_super(sb); |
594 | * restart only when sb is no longer on the list and | 595 | /* more work left to do? */ |
595 | * we have more work to do. | 596 | if (count <= 0) |
596 | */ | 597 | break; |
597 | if (__put_super_and_need_restart(sb) && count > 0) { | ||
598 | spin_unlock(&sb_lock); | ||
599 | goto restart; | ||
600 | } | ||
601 | } | 598 | } |
602 | spin_unlock(&sb_lock); | 599 | spin_unlock(&sb_lock); |
603 | spin_unlock(&dcache_lock); | 600 | spin_unlock(&dcache_lock); |
@@ -1529,6 +1526,7 @@ void d_delete(struct dentry * dentry) | |||
1529 | spin_lock(&dentry->d_lock); | 1526 | spin_lock(&dentry->d_lock); |
1530 | isdir = S_ISDIR(dentry->d_inode->i_mode); | 1527 | isdir = S_ISDIR(dentry->d_inode->i_mode); |
1531 | if (atomic_read(&dentry->d_count) == 1) { | 1528 | if (atomic_read(&dentry->d_count) == 1) { |
1529 | dentry->d_flags &= ~DCACHE_CANT_MOUNT; | ||
1532 | dentry_iput(dentry); | 1530 | dentry_iput(dentry); |
1533 | fsnotify_nameremove(dentry, isdir); | 1531 | fsnotify_nameremove(dentry, isdir); |
1534 | return; | 1532 | return; |
diff --git a/fs/devpts/inode.c b/fs/devpts/inode.c index 0120247b41c0..8b3ffd5b5235 100644 --- a/fs/devpts/inode.c +++ b/fs/devpts/inode.c | |||
@@ -384,18 +384,15 @@ static int devpts_get_sb(struct file_system_type *fs_type, | |||
384 | s->s_flags |= MS_ACTIVE; | 384 | s->s_flags |= MS_ACTIVE; |
385 | } | 385 | } |
386 | 386 | ||
387 | simple_set_mnt(mnt, s); | ||
388 | |||
389 | memcpy(&(DEVPTS_SB(s))->mount_opts, &opts, sizeof(opts)); | 387 | memcpy(&(DEVPTS_SB(s))->mount_opts, &opts, sizeof(opts)); |
390 | 388 | ||
391 | error = mknod_ptmx(s); | 389 | error = mknod_ptmx(s); |
392 | if (error) | 390 | if (error) |
393 | goto out_dput; | 391 | goto out_undo_sget; |
394 | 392 | ||
395 | return 0; | 393 | simple_set_mnt(mnt, s); |
396 | 394 | ||
397 | out_dput: | 395 | return 0; |
398 | dput(s->s_root); /* undo dget() in simple_set_mnt() */ | ||
399 | 396 | ||
400 | out_undo_sget: | 397 | out_undo_sget: |
401 | deactivate_locked_super(s); | 398 | deactivate_locked_super(s); |
diff --git a/fs/drop_caches.c b/fs/drop_caches.c index 31f4b0e6d72c..83c4f600786a 100644 --- a/fs/drop_caches.c +++ b/fs/drop_caches.c | |||
@@ -12,7 +12,7 @@ | |||
12 | /* A global variable is a bit ugly, but it keeps the code simple */ | 12 | /* A global variable is a bit ugly, but it keeps the code simple */ |
13 | int sysctl_drop_caches; | 13 | int sysctl_drop_caches; |
14 | 14 | ||
15 | static void drop_pagecache_sb(struct super_block *sb) | 15 | static void drop_pagecache_sb(struct super_block *sb, void *unused) |
16 | { | 16 | { |
17 | struct inode *inode, *toput_inode = NULL; | 17 | struct inode *inode, *toput_inode = NULL; |
18 | 18 | ||
@@ -33,26 +33,6 @@ static void drop_pagecache_sb(struct super_block *sb) | |||
33 | iput(toput_inode); | 33 | iput(toput_inode); |
34 | } | 34 | } |
35 | 35 | ||
36 | static void drop_pagecache(void) | ||
37 | { | ||
38 | struct super_block *sb; | ||
39 | |||
40 | spin_lock(&sb_lock); | ||
41 | restart: | ||
42 | list_for_each_entry(sb, &super_blocks, s_list) { | ||
43 | sb->s_count++; | ||
44 | spin_unlock(&sb_lock); | ||
45 | down_read(&sb->s_umount); | ||
46 | if (sb->s_root) | ||
47 | drop_pagecache_sb(sb); | ||
48 | up_read(&sb->s_umount); | ||
49 | spin_lock(&sb_lock); | ||
50 | if (__put_super_and_need_restart(sb)) | ||
51 | goto restart; | ||
52 | } | ||
53 | spin_unlock(&sb_lock); | ||
54 | } | ||
55 | |||
56 | static void drop_slab(void) | 36 | static void drop_slab(void) |
57 | { | 37 | { |
58 | int nr_objects; | 38 | int nr_objects; |
@@ -68,7 +48,7 @@ int drop_caches_sysctl_handler(ctl_table *table, int write, | |||
68 | proc_dointvec_minmax(table, write, buffer, length, ppos); | 48 | proc_dointvec_minmax(table, write, buffer, length, ppos); |
69 | if (write) { | 49 | if (write) { |
70 | if (sysctl_drop_caches & 1) | 50 | if (sysctl_drop_caches & 1) |
71 | drop_pagecache(); | 51 | iterate_supers(drop_pagecache_sb, NULL); |
72 | if (sysctl_drop_caches & 2) | 52 | if (sysctl_drop_caches & 2) |
73 | drop_slab(); | 53 | drop_slab(); |
74 | } | 54 | } |
diff --git a/fs/ecryptfs/ecryptfs_kernel.h b/fs/ecryptfs/ecryptfs_kernel.h index bfc2e0f78f00..0032a9f5a3a9 100644 --- a/fs/ecryptfs/ecryptfs_kernel.h +++ b/fs/ecryptfs/ecryptfs_kernel.h | |||
@@ -731,15 +731,14 @@ int ecryptfs_write_lower(struct inode *ecryptfs_inode, char *data, | |||
731 | int ecryptfs_write_lower_page_segment(struct inode *ecryptfs_inode, | 731 | int ecryptfs_write_lower_page_segment(struct inode *ecryptfs_inode, |
732 | struct page *page_for_lower, | 732 | struct page *page_for_lower, |
733 | size_t offset_in_page, size_t size); | 733 | size_t offset_in_page, size_t size); |
734 | int ecryptfs_write(struct file *ecryptfs_file, char *data, loff_t offset, | 734 | int ecryptfs_write(struct inode *inode, char *data, loff_t offset, size_t size); |
735 | size_t size); | ||
736 | int ecryptfs_read_lower(char *data, loff_t offset, size_t size, | 735 | int ecryptfs_read_lower(char *data, loff_t offset, size_t size, |
737 | struct inode *ecryptfs_inode); | 736 | struct inode *ecryptfs_inode); |
738 | int ecryptfs_read_lower_page_segment(struct page *page_for_ecryptfs, | 737 | int ecryptfs_read_lower_page_segment(struct page *page_for_ecryptfs, |
739 | pgoff_t page_index, | 738 | pgoff_t page_index, |
740 | size_t offset_in_page, size_t size, | 739 | size_t offset_in_page, size_t size, |
741 | struct inode *ecryptfs_inode); | 740 | struct inode *ecryptfs_inode); |
742 | struct page *ecryptfs_get_locked_page(struct file *file, loff_t index); | 741 | struct page *ecryptfs_get_locked_page(struct inode *inode, loff_t index); |
743 | int ecryptfs_exorcise_daemon(struct ecryptfs_daemon *daemon); | 742 | int ecryptfs_exorcise_daemon(struct ecryptfs_daemon *daemon); |
744 | int ecryptfs_find_daemon_by_euid(struct ecryptfs_daemon **daemon, uid_t euid, | 743 | int ecryptfs_find_daemon_by_euid(struct ecryptfs_daemon **daemon, uid_t euid, |
745 | struct user_namespace *user_ns); | 744 | struct user_namespace *user_ns); |
diff --git a/fs/ecryptfs/file.c b/fs/ecryptfs/file.c index e7440a6f5ebf..3bdddbcc785f 100644 --- a/fs/ecryptfs/file.c +++ b/fs/ecryptfs/file.c | |||
@@ -276,9 +276,7 @@ static int ecryptfs_release(struct inode *inode, struct file *file) | |||
276 | static int | 276 | static int |
277 | ecryptfs_fsync(struct file *file, struct dentry *dentry, int datasync) | 277 | ecryptfs_fsync(struct file *file, struct dentry *dentry, int datasync) |
278 | { | 278 | { |
279 | return vfs_fsync(ecryptfs_file_to_lower(file), | 279 | return vfs_fsync(ecryptfs_file_to_lower(file), datasync); |
280 | ecryptfs_dentry_to_lower(dentry), | ||
281 | datasync); | ||
282 | } | 280 | } |
283 | 281 | ||
284 | static int ecryptfs_fasync(int fd, struct file *file, int flag) | 282 | static int ecryptfs_fasync(int fd, struct file *file, int flag) |
diff --git a/fs/ecryptfs/inode.c b/fs/ecryptfs/inode.c index e2d4418affac..65dee2f336ae 100644 --- a/fs/ecryptfs/inode.c +++ b/fs/ecryptfs/inode.c | |||
@@ -142,19 +142,10 @@ out: | |||
142 | static int grow_file(struct dentry *ecryptfs_dentry) | 142 | static int grow_file(struct dentry *ecryptfs_dentry) |
143 | { | 143 | { |
144 | struct inode *ecryptfs_inode = ecryptfs_dentry->d_inode; | 144 | struct inode *ecryptfs_inode = ecryptfs_dentry->d_inode; |
145 | struct file fake_file; | ||
146 | struct ecryptfs_file_info tmp_file_info; | ||
147 | char zero_virt[] = { 0x00 }; | 145 | char zero_virt[] = { 0x00 }; |
148 | int rc = 0; | 146 | int rc = 0; |
149 | 147 | ||
150 | memset(&fake_file, 0, sizeof(fake_file)); | 148 | rc = ecryptfs_write(ecryptfs_inode, zero_virt, 0, 1); |
151 | fake_file.f_path.dentry = ecryptfs_dentry; | ||
152 | memset(&tmp_file_info, 0, sizeof(tmp_file_info)); | ||
153 | ecryptfs_set_file_private(&fake_file, &tmp_file_info); | ||
154 | ecryptfs_set_file_lower( | ||
155 | &fake_file, | ||
156 | ecryptfs_inode_to_private(ecryptfs_inode)->lower_file); | ||
157 | rc = ecryptfs_write(&fake_file, zero_virt, 0, 1); | ||
158 | i_size_write(ecryptfs_inode, 0); | 149 | i_size_write(ecryptfs_inode, 0); |
159 | rc = ecryptfs_write_inode_size_to_metadata(ecryptfs_inode); | 150 | rc = ecryptfs_write_inode_size_to_metadata(ecryptfs_inode); |
160 | ecryptfs_inode_to_private(ecryptfs_inode)->crypt_stat.flags |= | 151 | ecryptfs_inode_to_private(ecryptfs_inode)->crypt_stat.flags |= |
@@ -784,8 +775,6 @@ static int truncate_upper(struct dentry *dentry, struct iattr *ia, | |||
784 | { | 775 | { |
785 | int rc = 0; | 776 | int rc = 0; |
786 | struct inode *inode = dentry->d_inode; | 777 | struct inode *inode = dentry->d_inode; |
787 | struct dentry *lower_dentry; | ||
788 | struct file fake_ecryptfs_file; | ||
789 | struct ecryptfs_crypt_stat *crypt_stat; | 778 | struct ecryptfs_crypt_stat *crypt_stat; |
790 | loff_t i_size = i_size_read(inode); | 779 | loff_t i_size = i_size_read(inode); |
791 | loff_t lower_size_before_truncate; | 780 | loff_t lower_size_before_truncate; |
@@ -796,23 +785,6 @@ static int truncate_upper(struct dentry *dentry, struct iattr *ia, | |||
796 | goto out; | 785 | goto out; |
797 | } | 786 | } |
798 | crypt_stat = &ecryptfs_inode_to_private(dentry->d_inode)->crypt_stat; | 787 | crypt_stat = &ecryptfs_inode_to_private(dentry->d_inode)->crypt_stat; |
799 | /* Set up a fake ecryptfs file, this is used to interface with | ||
800 | * the file in the underlying filesystem so that the | ||
801 | * truncation has an effect there as well. */ | ||
802 | memset(&fake_ecryptfs_file, 0, sizeof(fake_ecryptfs_file)); | ||
803 | fake_ecryptfs_file.f_path.dentry = dentry; | ||
804 | /* Released at out_free: label */ | ||
805 | ecryptfs_set_file_private(&fake_ecryptfs_file, | ||
806 | kmem_cache_alloc(ecryptfs_file_info_cache, | ||
807 | GFP_KERNEL)); | ||
808 | if (unlikely(!ecryptfs_file_to_private(&fake_ecryptfs_file))) { | ||
809 | rc = -ENOMEM; | ||
810 | goto out; | ||
811 | } | ||
812 | lower_dentry = ecryptfs_dentry_to_lower(dentry); | ||
813 | ecryptfs_set_file_lower( | ||
814 | &fake_ecryptfs_file, | ||
815 | ecryptfs_inode_to_private(dentry->d_inode)->lower_file); | ||
816 | /* Switch on growing or shrinking file */ | 788 | /* Switch on growing or shrinking file */ |
817 | if (ia->ia_size > i_size) { | 789 | if (ia->ia_size > i_size) { |
818 | char zero[] = { 0x00 }; | 790 | char zero[] = { 0x00 }; |
@@ -822,7 +794,7 @@ static int truncate_upper(struct dentry *dentry, struct iattr *ia, | |||
822 | * this triggers code that will fill in 0's throughout | 794 | * this triggers code that will fill in 0's throughout |
823 | * the intermediate portion of the previous end of the | 795 | * the intermediate portion of the previous end of the |
824 | * file and the new and of the file */ | 796 | * file and the new and of the file */ |
825 | rc = ecryptfs_write(&fake_ecryptfs_file, zero, | 797 | rc = ecryptfs_write(inode, zero, |
826 | (ia->ia_size - 1), 1); | 798 | (ia->ia_size - 1), 1); |
827 | } else { /* ia->ia_size < i_size_read(inode) */ | 799 | } else { /* ia->ia_size < i_size_read(inode) */ |
828 | /* We're chopping off all the pages down to the page | 800 | /* We're chopping off all the pages down to the page |
@@ -835,10 +807,10 @@ static int truncate_upper(struct dentry *dentry, struct iattr *ia, | |||
835 | if (!(crypt_stat->flags & ECRYPTFS_ENCRYPTED)) { | 807 | if (!(crypt_stat->flags & ECRYPTFS_ENCRYPTED)) { |
836 | rc = vmtruncate(inode, ia->ia_size); | 808 | rc = vmtruncate(inode, ia->ia_size); |
837 | if (rc) | 809 | if (rc) |
838 | goto out_free; | 810 | goto out; |
839 | lower_ia->ia_size = ia->ia_size; | 811 | lower_ia->ia_size = ia->ia_size; |
840 | lower_ia->ia_valid |= ATTR_SIZE; | 812 | lower_ia->ia_valid |= ATTR_SIZE; |
841 | goto out_free; | 813 | goto out; |
842 | } | 814 | } |
843 | if (num_zeros) { | 815 | if (num_zeros) { |
844 | char *zeros_virt; | 816 | char *zeros_virt; |
@@ -846,16 +818,16 @@ static int truncate_upper(struct dentry *dentry, struct iattr *ia, | |||
846 | zeros_virt = kzalloc(num_zeros, GFP_KERNEL); | 818 | zeros_virt = kzalloc(num_zeros, GFP_KERNEL); |
847 | if (!zeros_virt) { | 819 | if (!zeros_virt) { |
848 | rc = -ENOMEM; | 820 | rc = -ENOMEM; |
849 | goto out_free; | 821 | goto out; |
850 | } | 822 | } |
851 | rc = ecryptfs_write(&fake_ecryptfs_file, zeros_virt, | 823 | rc = ecryptfs_write(inode, zeros_virt, |
852 | ia->ia_size, num_zeros); | 824 | ia->ia_size, num_zeros); |
853 | kfree(zeros_virt); | 825 | kfree(zeros_virt); |
854 | if (rc) { | 826 | if (rc) { |
855 | printk(KERN_ERR "Error attempting to zero out " | 827 | printk(KERN_ERR "Error attempting to zero out " |
856 | "the remainder of the end page on " | 828 | "the remainder of the end page on " |
857 | "reducing truncate; rc = [%d]\n", rc); | 829 | "reducing truncate; rc = [%d]\n", rc); |
858 | goto out_free; | 830 | goto out; |
859 | } | 831 | } |
860 | } | 832 | } |
861 | vmtruncate(inode, ia->ia_size); | 833 | vmtruncate(inode, ia->ia_size); |
@@ -864,7 +836,7 @@ static int truncate_upper(struct dentry *dentry, struct iattr *ia, | |||
864 | printk(KERN_ERR "Problem with " | 836 | printk(KERN_ERR "Problem with " |
865 | "ecryptfs_write_inode_size_to_metadata; " | 837 | "ecryptfs_write_inode_size_to_metadata; " |
866 | "rc = [%d]\n", rc); | 838 | "rc = [%d]\n", rc); |
867 | goto out_free; | 839 | goto out; |
868 | } | 840 | } |
869 | /* We are reducing the size of the ecryptfs file, and need to | 841 | /* We are reducing the size of the ecryptfs file, and need to |
870 | * know if we need to reduce the size of the lower file. */ | 842 | * know if we need to reduce the size of the lower file. */ |
@@ -878,10 +850,6 @@ static int truncate_upper(struct dentry *dentry, struct iattr *ia, | |||
878 | } else | 850 | } else |
879 | lower_ia->ia_valid &= ~ATTR_SIZE; | 851 | lower_ia->ia_valid &= ~ATTR_SIZE; |
880 | } | 852 | } |
881 | out_free: | ||
882 | if (ecryptfs_file_to_private(&fake_ecryptfs_file)) | ||
883 | kmem_cache_free(ecryptfs_file_info_cache, | ||
884 | ecryptfs_file_to_private(&fake_ecryptfs_file)); | ||
885 | out: | 853 | out: |
886 | return rc; | 854 | return rc; |
887 | } | 855 | } |
diff --git a/fs/ecryptfs/main.c b/fs/ecryptfs/main.c index 760983d0f25e..cbd4e18adb20 100644 --- a/fs/ecryptfs/main.c +++ b/fs/ecryptfs/main.c | |||
@@ -281,7 +281,7 @@ static void ecryptfs_init_mount_crypt_stat( | |||
281 | * | 281 | * |
282 | * Returns zero on success; non-zero on error | 282 | * Returns zero on success; non-zero on error |
283 | */ | 283 | */ |
284 | static int ecryptfs_parse_options(struct super_block *sb, char *options) | 284 | static int ecryptfs_parse_options(struct ecryptfs_sb_info *sbi, char *options) |
285 | { | 285 | { |
286 | char *p; | 286 | char *p; |
287 | int rc = 0; | 287 | int rc = 0; |
@@ -293,7 +293,7 @@ static int ecryptfs_parse_options(struct super_block *sb, char *options) | |||
293 | int fn_cipher_key_bytes; | 293 | int fn_cipher_key_bytes; |
294 | int fn_cipher_key_bytes_set = 0; | 294 | int fn_cipher_key_bytes_set = 0; |
295 | struct ecryptfs_mount_crypt_stat *mount_crypt_stat = | 295 | struct ecryptfs_mount_crypt_stat *mount_crypt_stat = |
296 | &ecryptfs_superblock_to_private(sb)->mount_crypt_stat; | 296 | &sbi->mount_crypt_stat; |
297 | substring_t args[MAX_OPT_ARGS]; | 297 | substring_t args[MAX_OPT_ARGS]; |
298 | int token; | 298 | int token; |
299 | char *sig_src; | 299 | char *sig_src; |
@@ -483,68 +483,7 @@ out: | |||
483 | } | 483 | } |
484 | 484 | ||
485 | struct kmem_cache *ecryptfs_sb_info_cache; | 485 | struct kmem_cache *ecryptfs_sb_info_cache; |
486 | 486 | static struct file_system_type ecryptfs_fs_type; | |
487 | /** | ||
488 | * ecryptfs_fill_super | ||
489 | * @sb: The ecryptfs super block | ||
490 | * @raw_data: The options passed to mount | ||
491 | * @silent: Not used but required by function prototype | ||
492 | * | ||
493 | * Sets up what we can of the sb, rest is done in ecryptfs_read_super | ||
494 | * | ||
495 | * Returns zero on success; non-zero otherwise | ||
496 | */ | ||
497 | static int | ||
498 | ecryptfs_fill_super(struct super_block *sb, void *raw_data, int silent) | ||
499 | { | ||
500 | struct ecryptfs_sb_info *esi; | ||
501 | int rc = 0; | ||
502 | |||
503 | /* Released in ecryptfs_put_super() */ | ||
504 | ecryptfs_set_superblock_private(sb, | ||
505 | kmem_cache_zalloc(ecryptfs_sb_info_cache, | ||
506 | GFP_KERNEL)); | ||
507 | esi = ecryptfs_superblock_to_private(sb); | ||
508 | if (!esi) { | ||
509 | ecryptfs_printk(KERN_WARNING, "Out of memory\n"); | ||
510 | rc = -ENOMEM; | ||
511 | goto out; | ||
512 | } | ||
513 | |||
514 | rc = bdi_setup_and_register(&esi->bdi, "ecryptfs", BDI_CAP_MAP_COPY); | ||
515 | if (rc) | ||
516 | goto out; | ||
517 | |||
518 | sb->s_bdi = &esi->bdi; | ||
519 | sb->s_op = &ecryptfs_sops; | ||
520 | /* Released through deactivate_super(sb) from get_sb_nodev */ | ||
521 | sb->s_root = d_alloc(NULL, &(const struct qstr) { | ||
522 | .hash = 0,.name = "/",.len = 1}); | ||
523 | if (!sb->s_root) { | ||
524 | ecryptfs_printk(KERN_ERR, "d_alloc failed\n"); | ||
525 | rc = -ENOMEM; | ||
526 | goto out; | ||
527 | } | ||
528 | sb->s_root->d_op = &ecryptfs_dops; | ||
529 | sb->s_root->d_sb = sb; | ||
530 | sb->s_root->d_parent = sb->s_root; | ||
531 | /* Released in d_release when dput(sb->s_root) is called */ | ||
532 | /* through deactivate_super(sb) from get_sb_nodev() */ | ||
533 | ecryptfs_set_dentry_private(sb->s_root, | ||
534 | kmem_cache_zalloc(ecryptfs_dentry_info_cache, | ||
535 | GFP_KERNEL)); | ||
536 | if (!ecryptfs_dentry_to_private(sb->s_root)) { | ||
537 | ecryptfs_printk(KERN_ERR, | ||
538 | "dentry_info_cache alloc failed\n"); | ||
539 | rc = -ENOMEM; | ||
540 | goto out; | ||
541 | } | ||
542 | rc = 0; | ||
543 | out: | ||
544 | /* Should be able to rely on deactivate_super called from | ||
545 | * get_sb_nodev */ | ||
546 | return rc; | ||
547 | } | ||
548 | 487 | ||
549 | /** | 488 | /** |
550 | * ecryptfs_read_super | 489 | * ecryptfs_read_super |
@@ -565,6 +504,13 @@ static int ecryptfs_read_super(struct super_block *sb, const char *dev_name) | |||
565 | ecryptfs_printk(KERN_WARNING, "path_lookup() failed\n"); | 504 | ecryptfs_printk(KERN_WARNING, "path_lookup() failed\n"); |
566 | goto out; | 505 | goto out; |
567 | } | 506 | } |
507 | if (path.dentry->d_sb->s_type == &ecryptfs_fs_type) { | ||
508 | rc = -EINVAL; | ||
509 | printk(KERN_ERR "Mount on filesystem of type " | ||
510 | "eCryptfs explicitly disallowed due to " | ||
511 | "known incompatibilities\n"); | ||
512 | goto out_free; | ||
513 | } | ||
568 | ecryptfs_set_superblock_lower(sb, path.dentry->d_sb); | 514 | ecryptfs_set_superblock_lower(sb, path.dentry->d_sb); |
569 | sb->s_maxbytes = path.dentry->d_sb->s_maxbytes; | 515 | sb->s_maxbytes = path.dentry->d_sb->s_maxbytes; |
570 | sb->s_blocksize = path.dentry->d_sb->s_blocksize; | 516 | sb->s_blocksize = path.dentry->d_sb->s_blocksize; |
@@ -588,11 +534,8 @@ out: | |||
588 | * @dev_name: The path to mount over | 534 | * @dev_name: The path to mount over |
589 | * @raw_data: The options passed into the kernel | 535 | * @raw_data: The options passed into the kernel |
590 | * | 536 | * |
591 | * The whole ecryptfs_get_sb process is broken into 4 functions: | 537 | * The whole ecryptfs_get_sb process is broken into 3 functions: |
592 | * ecryptfs_parse_options(): handle options passed to ecryptfs, if any | 538 | * ecryptfs_parse_options(): handle options passed to ecryptfs, if any |
593 | * ecryptfs_fill_super(): used by get_sb_nodev, fills out the super_block | ||
594 | * with as much information as it can before needing | ||
595 | * the lower filesystem. | ||
596 | * ecryptfs_read_super(): this accesses the lower filesystem and uses | 539 | * ecryptfs_read_super(): this accesses the lower filesystem and uses |
597 | * ecryptfs_interpose to perform most of the linking | 540 | * ecryptfs_interpose to perform most of the linking |
598 | * ecryptfs_interpose(): links the lower filesystem into ecryptfs (inode.c) | 541 | * ecryptfs_interpose(): links the lower filesystem into ecryptfs (inode.c) |
@@ -601,30 +544,78 @@ static int ecryptfs_get_sb(struct file_system_type *fs_type, int flags, | |||
601 | const char *dev_name, void *raw_data, | 544 | const char *dev_name, void *raw_data, |
602 | struct vfsmount *mnt) | 545 | struct vfsmount *mnt) |
603 | { | 546 | { |
547 | struct super_block *s; | ||
548 | struct ecryptfs_sb_info *sbi; | ||
549 | struct ecryptfs_dentry_info *root_info; | ||
550 | const char *err = "Getting sb failed"; | ||
604 | int rc; | 551 | int rc; |
605 | struct super_block *sb; | ||
606 | 552 | ||
607 | rc = get_sb_nodev(fs_type, flags, raw_data, ecryptfs_fill_super, mnt); | 553 | sbi = kmem_cache_zalloc(ecryptfs_sb_info_cache, GFP_KERNEL); |
608 | if (rc < 0) { | 554 | if (!sbi) { |
609 | printk(KERN_ERR "Getting sb failed; rc = [%d]\n", rc); | 555 | rc = -ENOMEM; |
610 | goto out; | 556 | goto out; |
611 | } | 557 | } |
612 | sb = mnt->mnt_sb; | 558 | |
613 | rc = ecryptfs_parse_options(sb, raw_data); | 559 | rc = ecryptfs_parse_options(sbi, raw_data); |
614 | if (rc) { | 560 | if (rc) { |
615 | printk(KERN_ERR "Error parsing options; rc = [%d]\n", rc); | 561 | err = "Error parsing options"; |
616 | goto out_abort; | 562 | goto out; |
563 | } | ||
564 | |||
565 | s = sget(fs_type, NULL, set_anon_super, NULL); | ||
566 | if (IS_ERR(s)) { | ||
567 | rc = PTR_ERR(s); | ||
568 | goto out; | ||
617 | } | 569 | } |
618 | rc = ecryptfs_read_super(sb, dev_name); | 570 | |
571 | s->s_flags = flags; | ||
572 | rc = bdi_setup_and_register(&sbi->bdi, "ecryptfs", BDI_CAP_MAP_COPY); | ||
619 | if (rc) { | 573 | if (rc) { |
620 | printk(KERN_ERR "Reading sb failed; rc = [%d]\n", rc); | 574 | deactivate_locked_super(s); |
621 | goto out_abort; | 575 | goto out; |
622 | } | 576 | } |
623 | goto out; | 577 | |
624 | out_abort: | 578 | ecryptfs_set_superblock_private(s, sbi); |
625 | dput(sb->s_root); /* aka mnt->mnt_root, as set by get_sb_nodev() */ | 579 | s->s_bdi = &sbi->bdi; |
626 | deactivate_locked_super(sb); | 580 | |
581 | /* ->kill_sb() will take care of sbi after that point */ | ||
582 | sbi = NULL; | ||
583 | s->s_op = &ecryptfs_sops; | ||
584 | |||
585 | rc = -ENOMEM; | ||
586 | s->s_root = d_alloc(NULL, &(const struct qstr) { | ||
587 | .hash = 0,.name = "/",.len = 1}); | ||
588 | if (!s->s_root) { | ||
589 | deactivate_locked_super(s); | ||
590 | goto out; | ||
591 | } | ||
592 | s->s_root->d_op = &ecryptfs_dops; | ||
593 | s->s_root->d_sb = s; | ||
594 | s->s_root->d_parent = s->s_root; | ||
595 | |||
596 | root_info = kmem_cache_zalloc(ecryptfs_dentry_info_cache, GFP_KERNEL); | ||
597 | if (!root_info) { | ||
598 | deactivate_locked_super(s); | ||
599 | goto out; | ||
600 | } | ||
601 | /* ->kill_sb() will take care of root_info */ | ||
602 | ecryptfs_set_dentry_private(s->s_root, root_info); | ||
603 | s->s_flags |= MS_ACTIVE; | ||
604 | rc = ecryptfs_read_super(s, dev_name); | ||
605 | if (rc) { | ||
606 | deactivate_locked_super(s); | ||
607 | err = "Reading sb failed"; | ||
608 | goto out; | ||
609 | } | ||
610 | simple_set_mnt(mnt, s); | ||
611 | return 0; | ||
612 | |||
627 | out: | 613 | out: |
614 | if (sbi) { | ||
615 | ecryptfs_destroy_mount_crypt_stat(&sbi->mount_crypt_stat); | ||
616 | kmem_cache_free(ecryptfs_sb_info_cache, sbi); | ||
617 | } | ||
618 | printk(KERN_ERR "%s; rc = [%d]\n", err, rc); | ||
628 | return rc; | 619 | return rc; |
629 | } | 620 | } |
630 | 621 | ||
@@ -633,11 +624,16 @@ out: | |||
633 | * @sb: The ecryptfs super block | 624 | * @sb: The ecryptfs super block |
634 | * | 625 | * |
635 | * Used to bring the superblock down and free the private data. | 626 | * Used to bring the superblock down and free the private data. |
636 | * Private data is free'd in ecryptfs_put_super() | ||
637 | */ | 627 | */ |
638 | static void ecryptfs_kill_block_super(struct super_block *sb) | 628 | static void ecryptfs_kill_block_super(struct super_block *sb) |
639 | { | 629 | { |
640 | generic_shutdown_super(sb); | 630 | struct ecryptfs_sb_info *sb_info = ecryptfs_superblock_to_private(sb); |
631 | kill_anon_super(sb); | ||
632 | if (!sb_info) | ||
633 | return; | ||
634 | ecryptfs_destroy_mount_crypt_stat(&sb_info->mount_crypt_stat); | ||
635 | bdi_destroy(&sb_info->bdi); | ||
636 | kmem_cache_free(ecryptfs_sb_info_cache, sb_info); | ||
641 | } | 637 | } |
642 | 638 | ||
643 | static struct file_system_type ecryptfs_fs_type = { | 639 | static struct file_system_type ecryptfs_fs_type = { |
diff --git a/fs/ecryptfs/mmap.c b/fs/ecryptfs/mmap.c index 2ee9a3a7b68c..b1d82756544b 100644 --- a/fs/ecryptfs/mmap.c +++ b/fs/ecryptfs/mmap.c | |||
@@ -44,17 +44,9 @@ | |||
44 | * Returns locked and up-to-date page (if ok), with increased | 44 | * Returns locked and up-to-date page (if ok), with increased |
45 | * refcnt. | 45 | * refcnt. |
46 | */ | 46 | */ |
47 | struct page *ecryptfs_get_locked_page(struct file *file, loff_t index) | 47 | struct page *ecryptfs_get_locked_page(struct inode *inode, loff_t index) |
48 | { | 48 | { |
49 | struct dentry *dentry; | 49 | struct page *page = read_mapping_page(inode->i_mapping, index, NULL); |
50 | struct inode *inode; | ||
51 | struct address_space *mapping; | ||
52 | struct page *page; | ||
53 | |||
54 | dentry = file->f_path.dentry; | ||
55 | inode = dentry->d_inode; | ||
56 | mapping = inode->i_mapping; | ||
57 | page = read_mapping_page(mapping, index, (void *)file); | ||
58 | if (!IS_ERR(page)) | 50 | if (!IS_ERR(page)) |
59 | lock_page(page); | 51 | lock_page(page); |
60 | return page; | 52 | return page; |
@@ -198,7 +190,7 @@ out: | |||
198 | static int ecryptfs_readpage(struct file *file, struct page *page) | 190 | static int ecryptfs_readpage(struct file *file, struct page *page) |
199 | { | 191 | { |
200 | struct ecryptfs_crypt_stat *crypt_stat = | 192 | struct ecryptfs_crypt_stat *crypt_stat = |
201 | &ecryptfs_inode_to_private(file->f_path.dentry->d_inode)->crypt_stat; | 193 | &ecryptfs_inode_to_private(page->mapping->host)->crypt_stat; |
202 | int rc = 0; | 194 | int rc = 0; |
203 | 195 | ||
204 | if (!crypt_stat | 196 | if (!crypt_stat |
@@ -300,8 +292,7 @@ static int ecryptfs_write_begin(struct file *file, | |||
300 | 292 | ||
301 | if (!PageUptodate(page)) { | 293 | if (!PageUptodate(page)) { |
302 | struct ecryptfs_crypt_stat *crypt_stat = | 294 | struct ecryptfs_crypt_stat *crypt_stat = |
303 | &ecryptfs_inode_to_private( | 295 | &ecryptfs_inode_to_private(mapping->host)->crypt_stat; |
304 | file->f_path.dentry->d_inode)->crypt_stat; | ||
305 | 296 | ||
306 | if (!(crypt_stat->flags & ECRYPTFS_ENCRYPTED) | 297 | if (!(crypt_stat->flags & ECRYPTFS_ENCRYPTED) |
307 | || (crypt_stat->flags & ECRYPTFS_NEW_FILE)) { | 298 | || (crypt_stat->flags & ECRYPTFS_NEW_FILE)) { |
@@ -487,7 +478,7 @@ static int ecryptfs_write_end(struct file *file, | |||
487 | unsigned to = from + copied; | 478 | unsigned to = from + copied; |
488 | struct inode *ecryptfs_inode = mapping->host; | 479 | struct inode *ecryptfs_inode = mapping->host; |
489 | struct ecryptfs_crypt_stat *crypt_stat = | 480 | struct ecryptfs_crypt_stat *crypt_stat = |
490 | &ecryptfs_inode_to_private(file->f_path.dentry->d_inode)->crypt_stat; | 481 | &ecryptfs_inode_to_private(ecryptfs_inode)->crypt_stat; |
491 | int rc; | 482 | int rc; |
492 | 483 | ||
493 | if (crypt_stat->flags & ECRYPTFS_NEW_FILE) { | 484 | if (crypt_stat->flags & ECRYPTFS_NEW_FILE) { |
diff --git a/fs/ecryptfs/read_write.c b/fs/ecryptfs/read_write.c index 0cc4fafd6552..db184ef15d3d 100644 --- a/fs/ecryptfs/read_write.c +++ b/fs/ecryptfs/read_write.c | |||
@@ -93,7 +93,7 @@ int ecryptfs_write_lower_page_segment(struct inode *ecryptfs_inode, | |||
93 | 93 | ||
94 | /** | 94 | /** |
95 | * ecryptfs_write | 95 | * ecryptfs_write |
96 | * @ecryptfs_file: The eCryptfs file into which to write | 96 | * @ecryptfs_inode: The eCryptfs file into which to write |
97 | * @data: Virtual address where data to write is located | 97 | * @data: Virtual address where data to write is located |
98 | * @offset: Offset in the eCryptfs file at which to begin writing the | 98 | * @offset: Offset in the eCryptfs file at which to begin writing the |
99 | * data from @data | 99 | * data from @data |
@@ -109,12 +109,11 @@ int ecryptfs_write_lower_page_segment(struct inode *ecryptfs_inode, | |||
109 | * | 109 | * |
110 | * Returns zero on success; non-zero otherwise | 110 | * Returns zero on success; non-zero otherwise |
111 | */ | 111 | */ |
112 | int ecryptfs_write(struct file *ecryptfs_file, char *data, loff_t offset, | 112 | int ecryptfs_write(struct inode *ecryptfs_inode, char *data, loff_t offset, |
113 | size_t size) | 113 | size_t size) |
114 | { | 114 | { |
115 | struct page *ecryptfs_page; | 115 | struct page *ecryptfs_page; |
116 | struct ecryptfs_crypt_stat *crypt_stat; | 116 | struct ecryptfs_crypt_stat *crypt_stat; |
117 | struct inode *ecryptfs_inode = ecryptfs_file->f_dentry->d_inode; | ||
118 | char *ecryptfs_page_virt; | 117 | char *ecryptfs_page_virt; |
119 | loff_t ecryptfs_file_size = i_size_read(ecryptfs_inode); | 118 | loff_t ecryptfs_file_size = i_size_read(ecryptfs_inode); |
120 | loff_t data_offset = 0; | 119 | loff_t data_offset = 0; |
@@ -145,7 +144,7 @@ int ecryptfs_write(struct file *ecryptfs_file, char *data, loff_t offset, | |||
145 | if (num_bytes > total_remaining_zeros) | 144 | if (num_bytes > total_remaining_zeros) |
146 | num_bytes = total_remaining_zeros; | 145 | num_bytes = total_remaining_zeros; |
147 | } | 146 | } |
148 | ecryptfs_page = ecryptfs_get_locked_page(ecryptfs_file, | 147 | ecryptfs_page = ecryptfs_get_locked_page(ecryptfs_inode, |
149 | ecryptfs_page_idx); | 148 | ecryptfs_page_idx); |
150 | if (IS_ERR(ecryptfs_page)) { | 149 | if (IS_ERR(ecryptfs_page)) { |
151 | rc = PTR_ERR(ecryptfs_page); | 150 | rc = PTR_ERR(ecryptfs_page); |
@@ -302,10 +301,10 @@ int ecryptfs_read_lower_page_segment(struct page *page_for_ecryptfs, | |||
302 | int ecryptfs_read(char *data, loff_t offset, size_t size, | 301 | int ecryptfs_read(char *data, loff_t offset, size_t size, |
303 | struct file *ecryptfs_file) | 302 | struct file *ecryptfs_file) |
304 | { | 303 | { |
304 | struct inode *ecryptfs_inode = ecryptfs_file->f_dentry->d_inode; | ||
305 | struct page *ecryptfs_page; | 305 | struct page *ecryptfs_page; |
306 | char *ecryptfs_page_virt; | 306 | char *ecryptfs_page_virt; |
307 | loff_t ecryptfs_file_size = | 307 | loff_t ecryptfs_file_size = i_size_read(ecryptfs_inode); |
308 | i_size_read(ecryptfs_file->f_dentry->d_inode); | ||
309 | loff_t data_offset = 0; | 308 | loff_t data_offset = 0; |
310 | loff_t pos; | 309 | loff_t pos; |
311 | int rc = 0; | 310 | int rc = 0; |
@@ -327,7 +326,7 @@ int ecryptfs_read(char *data, loff_t offset, size_t size, | |||
327 | 326 | ||
328 | if (num_bytes > total_remaining_bytes) | 327 | if (num_bytes > total_remaining_bytes) |
329 | num_bytes = total_remaining_bytes; | 328 | num_bytes = total_remaining_bytes; |
330 | ecryptfs_page = ecryptfs_get_locked_page(ecryptfs_file, | 329 | ecryptfs_page = ecryptfs_get_locked_page(ecryptfs_inode, |
331 | ecryptfs_page_idx); | 330 | ecryptfs_page_idx); |
332 | if (IS_ERR(ecryptfs_page)) { | 331 | if (IS_ERR(ecryptfs_page)) { |
333 | rc = PTR_ERR(ecryptfs_page); | 332 | rc = PTR_ERR(ecryptfs_page); |
diff --git a/fs/ecryptfs/super.c b/fs/ecryptfs/super.c index 0c0ae491d231..0435886e4a9f 100644 --- a/fs/ecryptfs/super.c +++ b/fs/ecryptfs/super.c | |||
@@ -109,27 +109,6 @@ void ecryptfs_init_inode(struct inode *inode, struct inode *lower_inode) | |||
109 | } | 109 | } |
110 | 110 | ||
111 | /** | 111 | /** |
112 | * ecryptfs_put_super | ||
113 | * @sb: Pointer to the ecryptfs super block | ||
114 | * | ||
115 | * Final actions when unmounting a file system. | ||
116 | * This will handle deallocation and release of our private data. | ||
117 | */ | ||
118 | static void ecryptfs_put_super(struct super_block *sb) | ||
119 | { | ||
120 | struct ecryptfs_sb_info *sb_info = ecryptfs_superblock_to_private(sb); | ||
121 | |||
122 | lock_kernel(); | ||
123 | |||
124 | ecryptfs_destroy_mount_crypt_stat(&sb_info->mount_crypt_stat); | ||
125 | bdi_destroy(&sb_info->bdi); | ||
126 | kmem_cache_free(ecryptfs_sb_info_cache, sb_info); | ||
127 | ecryptfs_set_superblock_private(sb, NULL); | ||
128 | |||
129 | unlock_kernel(); | ||
130 | } | ||
131 | |||
132 | /** | ||
133 | * ecryptfs_statfs | 112 | * ecryptfs_statfs |
134 | * @sb: The ecryptfs super block | 113 | * @sb: The ecryptfs super block |
135 | * @buf: The struct kstatfs to fill in with stats | 114 | * @buf: The struct kstatfs to fill in with stats |
@@ -203,7 +182,6 @@ const struct super_operations ecryptfs_sops = { | |||
203 | .alloc_inode = ecryptfs_alloc_inode, | 182 | .alloc_inode = ecryptfs_alloc_inode, |
204 | .destroy_inode = ecryptfs_destroy_inode, | 183 | .destroy_inode = ecryptfs_destroy_inode, |
205 | .drop_inode = generic_delete_inode, | 184 | .drop_inode = generic_delete_inode, |
206 | .put_super = ecryptfs_put_super, | ||
207 | .statfs = ecryptfs_statfs, | 185 | .statfs = ecryptfs_statfs, |
208 | .remount_fs = NULL, | 186 | .remount_fs = NULL, |
209 | .clear_inode = ecryptfs_clear_inode, | 187 | .clear_inode = ecryptfs_clear_inode, |
diff --git a/fs/exofs/inode.c b/fs/exofs/inode.c index 76d2a79ef93e..d7c6afa79754 100644 --- a/fs/exofs/inode.c +++ b/fs/exofs/inode.c | |||
@@ -1123,16 +1123,7 @@ struct inode *exofs_new_inode(struct inode *dir, int mode) | |||
1123 | sbi = sb->s_fs_info; | 1123 | sbi = sb->s_fs_info; |
1124 | 1124 | ||
1125 | sb->s_dirt = 1; | 1125 | sb->s_dirt = 1; |
1126 | inode->i_uid = current->cred->fsuid; | 1126 | inode_init_owner(inode, dir, mode); |
1127 | if (dir->i_mode & S_ISGID) { | ||
1128 | inode->i_gid = dir->i_gid; | ||
1129 | if (S_ISDIR(mode)) | ||
1130 | mode |= S_ISGID; | ||
1131 | } else { | ||
1132 | inode->i_gid = current->cred->fsgid; | ||
1133 | } | ||
1134 | inode->i_mode = mode; | ||
1135 | |||
1136 | inode->i_ino = sbi->s_nextid++; | 1127 | inode->i_ino = sbi->s_nextid++; |
1137 | inode->i_blkbits = EXOFS_BLKSHIFT; | 1128 | inode->i_blkbits = EXOFS_BLKSHIFT; |
1138 | inode->i_mtime = inode->i_atime = inode->i_ctime = CURRENT_TIME; | 1129 | inode->i_mtime = inode->i_atime = inode->i_ctime = CURRENT_TIME; |
diff --git a/fs/ext2/acl.c b/fs/ext2/acl.c index a99e54318c3d..ca7e2a0ed98a 100644 --- a/fs/ext2/acl.c +++ b/fs/ext2/acl.c | |||
@@ -420,7 +420,7 @@ release_and_out: | |||
420 | return error; | 420 | return error; |
421 | } | 421 | } |
422 | 422 | ||
423 | struct xattr_handler ext2_xattr_acl_access_handler = { | 423 | const struct xattr_handler ext2_xattr_acl_access_handler = { |
424 | .prefix = POSIX_ACL_XATTR_ACCESS, | 424 | .prefix = POSIX_ACL_XATTR_ACCESS, |
425 | .flags = ACL_TYPE_ACCESS, | 425 | .flags = ACL_TYPE_ACCESS, |
426 | .list = ext2_xattr_list_acl_access, | 426 | .list = ext2_xattr_list_acl_access, |
@@ -428,7 +428,7 @@ struct xattr_handler ext2_xattr_acl_access_handler = { | |||
428 | .set = ext2_xattr_set_acl, | 428 | .set = ext2_xattr_set_acl, |
429 | }; | 429 | }; |
430 | 430 | ||
431 | struct xattr_handler ext2_xattr_acl_default_handler = { | 431 | const struct xattr_handler ext2_xattr_acl_default_handler = { |
432 | .prefix = POSIX_ACL_XATTR_DEFAULT, | 432 | .prefix = POSIX_ACL_XATTR_DEFAULT, |
433 | .flags = ACL_TYPE_DEFAULT, | 433 | .flags = ACL_TYPE_DEFAULT, |
434 | .list = ext2_xattr_list_acl_default, | 434 | .list = ext2_xattr_list_acl_default, |
diff --git a/fs/ext2/ialloc.c b/fs/ext2/ialloc.c index f0c5286f9342..938dbc739d00 100644 --- a/fs/ext2/ialloc.c +++ b/fs/ext2/ialloc.c | |||
@@ -549,16 +549,12 @@ got: | |||
549 | 549 | ||
550 | sb->s_dirt = 1; | 550 | sb->s_dirt = 1; |
551 | mark_buffer_dirty(bh2); | 551 | mark_buffer_dirty(bh2); |
552 | inode->i_uid = current_fsuid(); | 552 | if (test_opt(sb, GRPID)) { |
553 | if (test_opt (sb, GRPID)) | 553 | inode->i_mode = mode; |
554 | inode->i_uid = current_fsuid(); | ||
554 | inode->i_gid = dir->i_gid; | 555 | inode->i_gid = dir->i_gid; |
555 | else if (dir->i_mode & S_ISGID) { | ||
556 | inode->i_gid = dir->i_gid; | ||
557 | if (S_ISDIR(mode)) | ||
558 | mode |= S_ISGID; | ||
559 | } else | 556 | } else |
560 | inode->i_gid = current_fsgid(); | 557 | inode_init_owner(inode, dir, mode); |
561 | inode->i_mode = mode; | ||
562 | 558 | ||
563 | inode->i_ino = ino; | 559 | inode->i_ino = ino; |
564 | inode->i_blocks = 0; | 560 | inode->i_blocks = 0; |
diff --git a/fs/ext2/xattr.c b/fs/ext2/xattr.c index 3b96045a00ce..7c3915780b19 100644 --- a/fs/ext2/xattr.c +++ b/fs/ext2/xattr.c | |||
@@ -101,7 +101,7 @@ static void ext2_xattr_rehash(struct ext2_xattr_header *, | |||
101 | 101 | ||
102 | static struct mb_cache *ext2_xattr_cache; | 102 | static struct mb_cache *ext2_xattr_cache; |
103 | 103 | ||
104 | static struct xattr_handler *ext2_xattr_handler_map[] = { | 104 | static const struct xattr_handler *ext2_xattr_handler_map[] = { |
105 | [EXT2_XATTR_INDEX_USER] = &ext2_xattr_user_handler, | 105 | [EXT2_XATTR_INDEX_USER] = &ext2_xattr_user_handler, |
106 | #ifdef CONFIG_EXT2_FS_POSIX_ACL | 106 | #ifdef CONFIG_EXT2_FS_POSIX_ACL |
107 | [EXT2_XATTR_INDEX_POSIX_ACL_ACCESS] = &ext2_xattr_acl_access_handler, | 107 | [EXT2_XATTR_INDEX_POSIX_ACL_ACCESS] = &ext2_xattr_acl_access_handler, |
@@ -113,7 +113,7 @@ static struct xattr_handler *ext2_xattr_handler_map[] = { | |||
113 | #endif | 113 | #endif |
114 | }; | 114 | }; |
115 | 115 | ||
116 | struct xattr_handler *ext2_xattr_handlers[] = { | 116 | const struct xattr_handler *ext2_xattr_handlers[] = { |
117 | &ext2_xattr_user_handler, | 117 | &ext2_xattr_user_handler, |
118 | &ext2_xattr_trusted_handler, | 118 | &ext2_xattr_trusted_handler, |
119 | #ifdef CONFIG_EXT2_FS_POSIX_ACL | 119 | #ifdef CONFIG_EXT2_FS_POSIX_ACL |
@@ -126,10 +126,10 @@ struct xattr_handler *ext2_xattr_handlers[] = { | |||
126 | NULL | 126 | NULL |
127 | }; | 127 | }; |
128 | 128 | ||
129 | static inline struct xattr_handler * | 129 | static inline const struct xattr_handler * |
130 | ext2_xattr_handler(int name_index) | 130 | ext2_xattr_handler(int name_index) |
131 | { | 131 | { |
132 | struct xattr_handler *handler = NULL; | 132 | const struct xattr_handler *handler = NULL; |
133 | 133 | ||
134 | if (name_index > 0 && name_index < ARRAY_SIZE(ext2_xattr_handler_map)) | 134 | if (name_index > 0 && name_index < ARRAY_SIZE(ext2_xattr_handler_map)) |
135 | handler = ext2_xattr_handler_map[name_index]; | 135 | handler = ext2_xattr_handler_map[name_index]; |
@@ -298,7 +298,7 @@ bad_block: ext2_error(inode->i_sb, "ext2_xattr_list", | |||
298 | /* list the attribute names */ | 298 | /* list the attribute names */ |
299 | for (entry = FIRST_ENTRY(bh); !IS_LAST_ENTRY(entry); | 299 | for (entry = FIRST_ENTRY(bh); !IS_LAST_ENTRY(entry); |
300 | entry = EXT2_XATTR_NEXT(entry)) { | 300 | entry = EXT2_XATTR_NEXT(entry)) { |
301 | struct xattr_handler *handler = | 301 | const struct xattr_handler *handler = |
302 | ext2_xattr_handler(entry->e_name_index); | 302 | ext2_xattr_handler(entry->e_name_index); |
303 | 303 | ||
304 | if (handler) { | 304 | if (handler) { |
diff --git a/fs/ext2/xattr.h b/fs/ext2/xattr.h index bf8175b2ced9..a1a1c2184616 100644 --- a/fs/ext2/xattr.h +++ b/fs/ext2/xattr.h | |||
@@ -55,11 +55,11 @@ struct ext2_xattr_entry { | |||
55 | 55 | ||
56 | # ifdef CONFIG_EXT2_FS_XATTR | 56 | # ifdef CONFIG_EXT2_FS_XATTR |
57 | 57 | ||
58 | extern struct xattr_handler ext2_xattr_user_handler; | 58 | extern const struct xattr_handler ext2_xattr_user_handler; |
59 | extern struct xattr_handler ext2_xattr_trusted_handler; | 59 | extern const struct xattr_handler ext2_xattr_trusted_handler; |
60 | extern struct xattr_handler ext2_xattr_acl_access_handler; | 60 | extern const struct xattr_handler ext2_xattr_acl_access_handler; |
61 | extern struct xattr_handler ext2_xattr_acl_default_handler; | 61 | extern const struct xattr_handler ext2_xattr_acl_default_handler; |
62 | extern struct xattr_handler ext2_xattr_security_handler; | 62 | extern const struct xattr_handler ext2_xattr_security_handler; |
63 | 63 | ||
64 | extern ssize_t ext2_listxattr(struct dentry *, char *, size_t); | 64 | extern ssize_t ext2_listxattr(struct dentry *, char *, size_t); |
65 | 65 | ||
@@ -72,7 +72,7 @@ extern void ext2_xattr_put_super(struct super_block *); | |||
72 | extern int init_ext2_xattr(void); | 72 | extern int init_ext2_xattr(void); |
73 | extern void exit_ext2_xattr(void); | 73 | extern void exit_ext2_xattr(void); |
74 | 74 | ||
75 | extern struct xattr_handler *ext2_xattr_handlers[]; | 75 | extern const struct xattr_handler *ext2_xattr_handlers[]; |
76 | 76 | ||
77 | # else /* CONFIG_EXT2_FS_XATTR */ | 77 | # else /* CONFIG_EXT2_FS_XATTR */ |
78 | 78 | ||
diff --git a/fs/ext2/xattr_security.c b/fs/ext2/xattr_security.c index b118c6383c6d..3004e15d5da5 100644 --- a/fs/ext2/xattr_security.c +++ b/fs/ext2/xattr_security.c | |||
@@ -67,7 +67,7 @@ ext2_init_security(struct inode *inode, struct inode *dir) | |||
67 | return err; | 67 | return err; |
68 | } | 68 | } |
69 | 69 | ||
70 | struct xattr_handler ext2_xattr_security_handler = { | 70 | const struct xattr_handler ext2_xattr_security_handler = { |
71 | .prefix = XATTR_SECURITY_PREFIX, | 71 | .prefix = XATTR_SECURITY_PREFIX, |
72 | .list = ext2_xattr_security_list, | 72 | .list = ext2_xattr_security_list, |
73 | .get = ext2_xattr_security_get, | 73 | .get = ext2_xattr_security_get, |
diff --git a/fs/ext2/xattr_trusted.c b/fs/ext2/xattr_trusted.c index 2a26d71f4771..667e46a8d62d 100644 --- a/fs/ext2/xattr_trusted.c +++ b/fs/ext2/xattr_trusted.c | |||
@@ -50,7 +50,7 @@ ext2_xattr_trusted_set(struct dentry *dentry, const char *name, | |||
50 | value, size, flags); | 50 | value, size, flags); |
51 | } | 51 | } |
52 | 52 | ||
53 | struct xattr_handler ext2_xattr_trusted_handler = { | 53 | const struct xattr_handler ext2_xattr_trusted_handler = { |
54 | .prefix = XATTR_TRUSTED_PREFIX, | 54 | .prefix = XATTR_TRUSTED_PREFIX, |
55 | .list = ext2_xattr_trusted_list, | 55 | .list = ext2_xattr_trusted_list, |
56 | .get = ext2_xattr_trusted_get, | 56 | .get = ext2_xattr_trusted_get, |
diff --git a/fs/ext2/xattr_user.c b/fs/ext2/xattr_user.c index 3f6caf3684b4..099d20f47163 100644 --- a/fs/ext2/xattr_user.c +++ b/fs/ext2/xattr_user.c | |||
@@ -54,7 +54,7 @@ ext2_xattr_user_set(struct dentry *dentry, const char *name, | |||
54 | name, value, size, flags); | 54 | name, value, size, flags); |
55 | } | 55 | } |
56 | 56 | ||
57 | struct xattr_handler ext2_xattr_user_handler = { | 57 | const struct xattr_handler ext2_xattr_user_handler = { |
58 | .prefix = XATTR_USER_PREFIX, | 58 | .prefix = XATTR_USER_PREFIX, |
59 | .list = ext2_xattr_user_list, | 59 | .list = ext2_xattr_user_list, |
60 | .get = ext2_xattr_user_get, | 60 | .get = ext2_xattr_user_get, |
diff --git a/fs/ext3/acl.c b/fs/ext3/acl.c index 82ba34158661..01552abbca3c 100644 --- a/fs/ext3/acl.c +++ b/fs/ext3/acl.c | |||
@@ -456,7 +456,7 @@ release_and_out: | |||
456 | return error; | 456 | return error; |
457 | } | 457 | } |
458 | 458 | ||
459 | struct xattr_handler ext3_xattr_acl_access_handler = { | 459 | const struct xattr_handler ext3_xattr_acl_access_handler = { |
460 | .prefix = POSIX_ACL_XATTR_ACCESS, | 460 | .prefix = POSIX_ACL_XATTR_ACCESS, |
461 | .flags = ACL_TYPE_ACCESS, | 461 | .flags = ACL_TYPE_ACCESS, |
462 | .list = ext3_xattr_list_acl_access, | 462 | .list = ext3_xattr_list_acl_access, |
@@ -464,7 +464,7 @@ struct xattr_handler ext3_xattr_acl_access_handler = { | |||
464 | .set = ext3_xattr_set_acl, | 464 | .set = ext3_xattr_set_acl, |
465 | }; | 465 | }; |
466 | 466 | ||
467 | struct xattr_handler ext3_xattr_acl_default_handler = { | 467 | const struct xattr_handler ext3_xattr_acl_default_handler = { |
468 | .prefix = POSIX_ACL_XATTR_DEFAULT, | 468 | .prefix = POSIX_ACL_XATTR_DEFAULT, |
469 | .flags = ACL_TYPE_DEFAULT, | 469 | .flags = ACL_TYPE_DEFAULT, |
470 | .list = ext3_xattr_list_acl_default, | 470 | .list = ext3_xattr_list_acl_default, |
diff --git a/fs/ext3/fsync.c b/fs/ext3/fsync.c index 26289e8f4163..fcf7487734b6 100644 --- a/fs/ext3/fsync.c +++ b/fs/ext3/fsync.c | |||
@@ -90,6 +90,7 @@ int ext3_sync_file(struct file * file, struct dentry *dentry, int datasync) | |||
90 | * storage | 90 | * storage |
91 | */ | 91 | */ |
92 | if (needs_barrier) | 92 | if (needs_barrier) |
93 | blkdev_issue_flush(inode->i_sb->s_bdev, NULL); | 93 | blkdev_issue_flush(inode->i_sb->s_bdev, GFP_KERNEL, NULL, |
94 | BLKDEV_IFL_WAIT); | ||
94 | return ret; | 95 | return ret; |
95 | } | 96 | } |
diff --git a/fs/ext3/ialloc.c b/fs/ext3/ialloc.c index 0d0e97ed3ff6..498021eb88fb 100644 --- a/fs/ext3/ialloc.c +++ b/fs/ext3/ialloc.c | |||
@@ -538,16 +538,13 @@ got: | |||
538 | if (S_ISDIR(mode)) | 538 | if (S_ISDIR(mode)) |
539 | percpu_counter_inc(&sbi->s_dirs_counter); | 539 | percpu_counter_inc(&sbi->s_dirs_counter); |
540 | 540 | ||
541 | inode->i_uid = current_fsuid(); | 541 | |
542 | if (test_opt (sb, GRPID)) | 542 | if (test_opt(sb, GRPID)) { |
543 | inode->i_gid = dir->i_gid; | 543 | inode->i_mode = mode; |
544 | else if (dir->i_mode & S_ISGID) { | 544 | inode->i_uid = current_fsuid(); |
545 | inode->i_gid = dir->i_gid; | 545 | inode->i_gid = dir->i_gid; |
546 | if (S_ISDIR(mode)) | ||
547 | mode |= S_ISGID; | ||
548 | } else | 546 | } else |
549 | inode->i_gid = current_fsgid(); | 547 | inode_init_owner(inode, dir, mode); |
550 | inode->i_mode = mode; | ||
551 | 548 | ||
552 | inode->i_ino = ino; | 549 | inode->i_ino = ino; |
553 | /* This is the optimal IO size (for stat), not the fs block size */ | 550 | /* This is the optimal IO size (for stat), not the fs block size */ |
diff --git a/fs/ext3/xattr.c b/fs/ext3/xattr.c index 534a94c3a933..71fb8d65e54c 100644 --- a/fs/ext3/xattr.c +++ b/fs/ext3/xattr.c | |||
@@ -104,7 +104,7 @@ static int ext3_xattr_list(struct dentry *dentry, char *buffer, | |||
104 | 104 | ||
105 | static struct mb_cache *ext3_xattr_cache; | 105 | static struct mb_cache *ext3_xattr_cache; |
106 | 106 | ||
107 | static struct xattr_handler *ext3_xattr_handler_map[] = { | 107 | static const struct xattr_handler *ext3_xattr_handler_map[] = { |
108 | [EXT3_XATTR_INDEX_USER] = &ext3_xattr_user_handler, | 108 | [EXT3_XATTR_INDEX_USER] = &ext3_xattr_user_handler, |
109 | #ifdef CONFIG_EXT3_FS_POSIX_ACL | 109 | #ifdef CONFIG_EXT3_FS_POSIX_ACL |
110 | [EXT3_XATTR_INDEX_POSIX_ACL_ACCESS] = &ext3_xattr_acl_access_handler, | 110 | [EXT3_XATTR_INDEX_POSIX_ACL_ACCESS] = &ext3_xattr_acl_access_handler, |
@@ -116,7 +116,7 @@ static struct xattr_handler *ext3_xattr_handler_map[] = { | |||
116 | #endif | 116 | #endif |
117 | }; | 117 | }; |
118 | 118 | ||
119 | struct xattr_handler *ext3_xattr_handlers[] = { | 119 | const struct xattr_handler *ext3_xattr_handlers[] = { |
120 | &ext3_xattr_user_handler, | 120 | &ext3_xattr_user_handler, |
121 | &ext3_xattr_trusted_handler, | 121 | &ext3_xattr_trusted_handler, |
122 | #ifdef CONFIG_EXT3_FS_POSIX_ACL | 122 | #ifdef CONFIG_EXT3_FS_POSIX_ACL |
@@ -129,10 +129,10 @@ struct xattr_handler *ext3_xattr_handlers[] = { | |||
129 | NULL | 129 | NULL |
130 | }; | 130 | }; |
131 | 131 | ||
132 | static inline struct xattr_handler * | 132 | static inline const struct xattr_handler * |
133 | ext3_xattr_handler(int name_index) | 133 | ext3_xattr_handler(int name_index) |
134 | { | 134 | { |
135 | struct xattr_handler *handler = NULL; | 135 | const struct xattr_handler *handler = NULL; |
136 | 136 | ||
137 | if (name_index > 0 && name_index < ARRAY_SIZE(ext3_xattr_handler_map)) | 137 | if (name_index > 0 && name_index < ARRAY_SIZE(ext3_xattr_handler_map)) |
138 | handler = ext3_xattr_handler_map[name_index]; | 138 | handler = ext3_xattr_handler_map[name_index]; |
@@ -338,7 +338,7 @@ ext3_xattr_list_entries(struct dentry *dentry, struct ext3_xattr_entry *entry, | |||
338 | size_t rest = buffer_size; | 338 | size_t rest = buffer_size; |
339 | 339 | ||
340 | for (; !IS_LAST_ENTRY(entry); entry = EXT3_XATTR_NEXT(entry)) { | 340 | for (; !IS_LAST_ENTRY(entry); entry = EXT3_XATTR_NEXT(entry)) { |
341 | struct xattr_handler *handler = | 341 | const struct xattr_handler *handler = |
342 | ext3_xattr_handler(entry->e_name_index); | 342 | ext3_xattr_handler(entry->e_name_index); |
343 | 343 | ||
344 | if (handler) { | 344 | if (handler) { |
diff --git a/fs/ext3/xattr.h b/fs/ext3/xattr.h index 148a4dfc82ab..377fe7201169 100644 --- a/fs/ext3/xattr.h +++ b/fs/ext3/xattr.h | |||
@@ -58,11 +58,11 @@ struct ext3_xattr_entry { | |||
58 | 58 | ||
59 | # ifdef CONFIG_EXT3_FS_XATTR | 59 | # ifdef CONFIG_EXT3_FS_XATTR |
60 | 60 | ||
61 | extern struct xattr_handler ext3_xattr_user_handler; | 61 | extern const struct xattr_handler ext3_xattr_user_handler; |
62 | extern struct xattr_handler ext3_xattr_trusted_handler; | 62 | extern const struct xattr_handler ext3_xattr_trusted_handler; |
63 | extern struct xattr_handler ext3_xattr_acl_access_handler; | 63 | extern const struct xattr_handler ext3_xattr_acl_access_handler; |
64 | extern struct xattr_handler ext3_xattr_acl_default_handler; | 64 | extern const struct xattr_handler ext3_xattr_acl_default_handler; |
65 | extern struct xattr_handler ext3_xattr_security_handler; | 65 | extern const struct xattr_handler ext3_xattr_security_handler; |
66 | 66 | ||
67 | extern ssize_t ext3_listxattr(struct dentry *, char *, size_t); | 67 | extern ssize_t ext3_listxattr(struct dentry *, char *, size_t); |
68 | 68 | ||
@@ -76,7 +76,7 @@ extern void ext3_xattr_put_super(struct super_block *); | |||
76 | extern int init_ext3_xattr(void); | 76 | extern int init_ext3_xattr(void); |
77 | extern void exit_ext3_xattr(void); | 77 | extern void exit_ext3_xattr(void); |
78 | 78 | ||
79 | extern struct xattr_handler *ext3_xattr_handlers[]; | 79 | extern const struct xattr_handler *ext3_xattr_handlers[]; |
80 | 80 | ||
81 | # else /* CONFIG_EXT3_FS_XATTR */ | 81 | # else /* CONFIG_EXT3_FS_XATTR */ |
82 | 82 | ||
diff --git a/fs/ext3/xattr_security.c b/fs/ext3/xattr_security.c index 3af91f476dff..03a99bfc59f9 100644 --- a/fs/ext3/xattr_security.c +++ b/fs/ext3/xattr_security.c | |||
@@ -69,7 +69,7 @@ ext3_init_security(handle_t *handle, struct inode *inode, struct inode *dir) | |||
69 | return err; | 69 | return err; |
70 | } | 70 | } |
71 | 71 | ||
72 | struct xattr_handler ext3_xattr_security_handler = { | 72 | const struct xattr_handler ext3_xattr_security_handler = { |
73 | .prefix = XATTR_SECURITY_PREFIX, | 73 | .prefix = XATTR_SECURITY_PREFIX, |
74 | .list = ext3_xattr_security_list, | 74 | .list = ext3_xattr_security_list, |
75 | .get = ext3_xattr_security_get, | 75 | .get = ext3_xattr_security_get, |
diff --git a/fs/ext3/xattr_trusted.c b/fs/ext3/xattr_trusted.c index e5562845ed96..dc8edda9ffe0 100644 --- a/fs/ext3/xattr_trusted.c +++ b/fs/ext3/xattr_trusted.c | |||
@@ -51,7 +51,7 @@ ext3_xattr_trusted_set(struct dentry *dentry, const char *name, | |||
51 | value, size, flags); | 51 | value, size, flags); |
52 | } | 52 | } |
53 | 53 | ||
54 | struct xattr_handler ext3_xattr_trusted_handler = { | 54 | const struct xattr_handler ext3_xattr_trusted_handler = { |
55 | .prefix = XATTR_TRUSTED_PREFIX, | 55 | .prefix = XATTR_TRUSTED_PREFIX, |
56 | .list = ext3_xattr_trusted_list, | 56 | .list = ext3_xattr_trusted_list, |
57 | .get = ext3_xattr_trusted_get, | 57 | .get = ext3_xattr_trusted_get, |
diff --git a/fs/ext3/xattr_user.c b/fs/ext3/xattr_user.c index 3bcfe9ee0a68..7a321974d584 100644 --- a/fs/ext3/xattr_user.c +++ b/fs/ext3/xattr_user.c | |||
@@ -54,7 +54,7 @@ ext3_xattr_user_set(struct dentry *dentry, const char *name, | |||
54 | name, value, size, flags); | 54 | name, value, size, flags); |
55 | } | 55 | } |
56 | 56 | ||
57 | struct xattr_handler ext3_xattr_user_handler = { | 57 | const struct xattr_handler ext3_xattr_user_handler = { |
58 | .prefix = XATTR_USER_PREFIX, | 58 | .prefix = XATTR_USER_PREFIX, |
59 | .list = ext3_xattr_user_list, | 59 | .list = ext3_xattr_user_list, |
60 | .get = ext3_xattr_user_get, | 60 | .get = ext3_xattr_user_get, |
diff --git a/fs/ext4/acl.c b/fs/ext4/acl.c index 8a2a29d35a6f..feaf498feaa6 100644 --- a/fs/ext4/acl.c +++ b/fs/ext4/acl.c | |||
@@ -454,7 +454,7 @@ release_and_out: | |||
454 | return error; | 454 | return error; |
455 | } | 455 | } |
456 | 456 | ||
457 | struct xattr_handler ext4_xattr_acl_access_handler = { | 457 | const struct xattr_handler ext4_xattr_acl_access_handler = { |
458 | .prefix = POSIX_ACL_XATTR_ACCESS, | 458 | .prefix = POSIX_ACL_XATTR_ACCESS, |
459 | .flags = ACL_TYPE_ACCESS, | 459 | .flags = ACL_TYPE_ACCESS, |
460 | .list = ext4_xattr_list_acl_access, | 460 | .list = ext4_xattr_list_acl_access, |
@@ -462,7 +462,7 @@ struct xattr_handler ext4_xattr_acl_access_handler = { | |||
462 | .set = ext4_xattr_set_acl, | 462 | .set = ext4_xattr_set_acl, |
463 | }; | 463 | }; |
464 | 464 | ||
465 | struct xattr_handler ext4_xattr_acl_default_handler = { | 465 | const struct xattr_handler ext4_xattr_acl_default_handler = { |
466 | .prefix = POSIX_ACL_XATTR_DEFAULT, | 466 | .prefix = POSIX_ACL_XATTR_DEFAULT, |
467 | .flags = ACL_TYPE_DEFAULT, | 467 | .flags = ACL_TYPE_DEFAULT, |
468 | .list = ext4_xattr_list_acl_default, | 468 | .list = ext4_xattr_list_acl_default, |
diff --git a/fs/ext4/fsync.c b/fs/ext4/fsync.c index 0d0c3239c1cd..ef3d980e67cb 100644 --- a/fs/ext4/fsync.c +++ b/fs/ext4/fsync.c | |||
@@ -100,9 +100,11 @@ int ext4_sync_file(struct file *file, struct dentry *dentry, int datasync) | |||
100 | if (ext4_should_writeback_data(inode) && | 100 | if (ext4_should_writeback_data(inode) && |
101 | (journal->j_fs_dev != journal->j_dev) && | 101 | (journal->j_fs_dev != journal->j_dev) && |
102 | (journal->j_flags & JBD2_BARRIER)) | 102 | (journal->j_flags & JBD2_BARRIER)) |
103 | blkdev_issue_flush(inode->i_sb->s_bdev, NULL); | 103 | blkdev_issue_flush(inode->i_sb->s_bdev, GFP_KERNEL, |
104 | NULL, BLKDEV_IFL_WAIT); | ||
104 | jbd2_log_wait_commit(journal, commit_tid); | 105 | jbd2_log_wait_commit(journal, commit_tid); |
105 | } else if (journal->j_flags & JBD2_BARRIER) | 106 | } else if (journal->j_flags & JBD2_BARRIER) |
106 | blkdev_issue_flush(inode->i_sb->s_bdev, NULL); | 107 | blkdev_issue_flush(inode->i_sb->s_bdev, GFP_KERNEL, NULL, |
108 | BLKDEV_IFL_WAIT); | ||
107 | return ret; | 109 | return ret; |
108 | } | 110 | } |
diff --git a/fs/ext4/ialloc.c b/fs/ext4/ialloc.c index 57f6eef6ccd6..1a0e183a2f04 100644 --- a/fs/ext4/ialloc.c +++ b/fs/ext4/ialloc.c | |||
@@ -979,16 +979,12 @@ got: | |||
979 | atomic_dec(&sbi->s_flex_groups[flex_group].free_inodes); | 979 | atomic_dec(&sbi->s_flex_groups[flex_group].free_inodes); |
980 | } | 980 | } |
981 | 981 | ||
982 | inode->i_uid = current_fsuid(); | 982 | if (test_opt(sb, GRPID)) { |
983 | if (test_opt(sb, GRPID)) | 983 | inode->i_mode = mode; |
984 | inode->i_uid = current_fsuid(); | ||
984 | inode->i_gid = dir->i_gid; | 985 | inode->i_gid = dir->i_gid; |
985 | else if (dir->i_mode & S_ISGID) { | ||
986 | inode->i_gid = dir->i_gid; | ||
987 | if (S_ISDIR(mode)) | ||
988 | mode |= S_ISGID; | ||
989 | } else | 986 | } else |
990 | inode->i_gid = current_fsgid(); | 987 | inode_init_owner(inode, dir, mode); |
991 | inode->i_mode = mode; | ||
992 | 988 | ||
993 | inode->i_ino = ino + group * EXT4_INODES_PER_GROUP(sb); | 989 | inode->i_ino = ino + group * EXT4_INODES_PER_GROUP(sb); |
994 | /* This is the optimal IO size (for stat), not the fs block size */ | 990 | /* This is the optimal IO size (for stat), not the fs block size */ |
diff --git a/fs/ext4/xattr.c b/fs/ext4/xattr.c index b4c5aa8489d8..2de0e9515089 100644 --- a/fs/ext4/xattr.c +++ b/fs/ext4/xattr.c | |||
@@ -97,7 +97,7 @@ static int ext4_xattr_list(struct dentry *dentry, char *buffer, | |||
97 | 97 | ||
98 | static struct mb_cache *ext4_xattr_cache; | 98 | static struct mb_cache *ext4_xattr_cache; |
99 | 99 | ||
100 | static struct xattr_handler *ext4_xattr_handler_map[] = { | 100 | static const struct xattr_handler *ext4_xattr_handler_map[] = { |
101 | [EXT4_XATTR_INDEX_USER] = &ext4_xattr_user_handler, | 101 | [EXT4_XATTR_INDEX_USER] = &ext4_xattr_user_handler, |
102 | #ifdef CONFIG_EXT4_FS_POSIX_ACL | 102 | #ifdef CONFIG_EXT4_FS_POSIX_ACL |
103 | [EXT4_XATTR_INDEX_POSIX_ACL_ACCESS] = &ext4_xattr_acl_access_handler, | 103 | [EXT4_XATTR_INDEX_POSIX_ACL_ACCESS] = &ext4_xattr_acl_access_handler, |
@@ -109,7 +109,7 @@ static struct xattr_handler *ext4_xattr_handler_map[] = { | |||
109 | #endif | 109 | #endif |
110 | }; | 110 | }; |
111 | 111 | ||
112 | struct xattr_handler *ext4_xattr_handlers[] = { | 112 | const struct xattr_handler *ext4_xattr_handlers[] = { |
113 | &ext4_xattr_user_handler, | 113 | &ext4_xattr_user_handler, |
114 | &ext4_xattr_trusted_handler, | 114 | &ext4_xattr_trusted_handler, |
115 | #ifdef CONFIG_EXT4_FS_POSIX_ACL | 115 | #ifdef CONFIG_EXT4_FS_POSIX_ACL |
@@ -122,10 +122,10 @@ struct xattr_handler *ext4_xattr_handlers[] = { | |||
122 | NULL | 122 | NULL |
123 | }; | 123 | }; |
124 | 124 | ||
125 | static inline struct xattr_handler * | 125 | static inline const struct xattr_handler * |
126 | ext4_xattr_handler(int name_index) | 126 | ext4_xattr_handler(int name_index) |
127 | { | 127 | { |
128 | struct xattr_handler *handler = NULL; | 128 | const struct xattr_handler *handler = NULL; |
129 | 129 | ||
130 | if (name_index > 0 && name_index < ARRAY_SIZE(ext4_xattr_handler_map)) | 130 | if (name_index > 0 && name_index < ARRAY_SIZE(ext4_xattr_handler_map)) |
131 | handler = ext4_xattr_handler_map[name_index]; | 131 | handler = ext4_xattr_handler_map[name_index]; |
@@ -332,7 +332,7 @@ ext4_xattr_list_entries(struct dentry *dentry, struct ext4_xattr_entry *entry, | |||
332 | size_t rest = buffer_size; | 332 | size_t rest = buffer_size; |
333 | 333 | ||
334 | for (; !IS_LAST_ENTRY(entry); entry = EXT4_XATTR_NEXT(entry)) { | 334 | for (; !IS_LAST_ENTRY(entry); entry = EXT4_XATTR_NEXT(entry)) { |
335 | struct xattr_handler *handler = | 335 | const struct xattr_handler *handler = |
336 | ext4_xattr_handler(entry->e_name_index); | 336 | ext4_xattr_handler(entry->e_name_index); |
337 | 337 | ||
338 | if (handler) { | 338 | if (handler) { |
diff --git a/fs/ext4/xattr.h b/fs/ext4/xattr.h index 8ede88b18c29..518e96e43905 100644 --- a/fs/ext4/xattr.h +++ b/fs/ext4/xattr.h | |||
@@ -65,11 +65,11 @@ struct ext4_xattr_entry { | |||
65 | 65 | ||
66 | # ifdef CONFIG_EXT4_FS_XATTR | 66 | # ifdef CONFIG_EXT4_FS_XATTR |
67 | 67 | ||
68 | extern struct xattr_handler ext4_xattr_user_handler; | 68 | extern const struct xattr_handler ext4_xattr_user_handler; |
69 | extern struct xattr_handler ext4_xattr_trusted_handler; | 69 | extern const struct xattr_handler ext4_xattr_trusted_handler; |
70 | extern struct xattr_handler ext4_xattr_acl_access_handler; | 70 | extern const struct xattr_handler ext4_xattr_acl_access_handler; |
71 | extern struct xattr_handler ext4_xattr_acl_default_handler; | 71 | extern const struct xattr_handler ext4_xattr_acl_default_handler; |
72 | extern struct xattr_handler ext4_xattr_security_handler; | 72 | extern const struct xattr_handler ext4_xattr_security_handler; |
73 | 73 | ||
74 | extern ssize_t ext4_listxattr(struct dentry *, char *, size_t); | 74 | extern ssize_t ext4_listxattr(struct dentry *, char *, size_t); |
75 | 75 | ||
@@ -86,7 +86,7 @@ extern int ext4_expand_extra_isize_ea(struct inode *inode, int new_extra_isize, | |||
86 | extern int init_ext4_xattr(void); | 86 | extern int init_ext4_xattr(void); |
87 | extern void exit_ext4_xattr(void); | 87 | extern void exit_ext4_xattr(void); |
88 | 88 | ||
89 | extern struct xattr_handler *ext4_xattr_handlers[]; | 89 | extern const struct xattr_handler *ext4_xattr_handlers[]; |
90 | 90 | ||
91 | # else /* CONFIG_EXT4_FS_XATTR */ | 91 | # else /* CONFIG_EXT4_FS_XATTR */ |
92 | 92 | ||
diff --git a/fs/ext4/xattr_security.c b/fs/ext4/xattr_security.c index 8b145e98df07..9b21268e121c 100644 --- a/fs/ext4/xattr_security.c +++ b/fs/ext4/xattr_security.c | |||
@@ -69,7 +69,7 @@ ext4_init_security(handle_t *handle, struct inode *inode, struct inode *dir) | |||
69 | return err; | 69 | return err; |
70 | } | 70 | } |
71 | 71 | ||
72 | struct xattr_handler ext4_xattr_security_handler = { | 72 | const struct xattr_handler ext4_xattr_security_handler = { |
73 | .prefix = XATTR_SECURITY_PREFIX, | 73 | .prefix = XATTR_SECURITY_PREFIX, |
74 | .list = ext4_xattr_security_list, | 74 | .list = ext4_xattr_security_list, |
75 | .get = ext4_xattr_security_get, | 75 | .get = ext4_xattr_security_get, |
diff --git a/fs/ext4/xattr_trusted.c b/fs/ext4/xattr_trusted.c index 15b50edc6587..37e6ebca2cc3 100644 --- a/fs/ext4/xattr_trusted.c +++ b/fs/ext4/xattr_trusted.c | |||
@@ -51,7 +51,7 @@ ext4_xattr_trusted_set(struct dentry *dentry, const char *name, | |||
51 | name, value, size, flags); | 51 | name, value, size, flags); |
52 | } | 52 | } |
53 | 53 | ||
54 | struct xattr_handler ext4_xattr_trusted_handler = { | 54 | const struct xattr_handler ext4_xattr_trusted_handler = { |
55 | .prefix = XATTR_TRUSTED_PREFIX, | 55 | .prefix = XATTR_TRUSTED_PREFIX, |
56 | .list = ext4_xattr_trusted_list, | 56 | .list = ext4_xattr_trusted_list, |
57 | .get = ext4_xattr_trusted_get, | 57 | .get = ext4_xattr_trusted_get, |
diff --git a/fs/ext4/xattr_user.c b/fs/ext4/xattr_user.c index c4ce05746ce1..98c375352d0e 100644 --- a/fs/ext4/xattr_user.c +++ b/fs/ext4/xattr_user.c | |||
@@ -54,7 +54,7 @@ ext4_xattr_user_set(struct dentry *dentry, const char *name, | |||
54 | name, value, size, flags); | 54 | name, value, size, flags); |
55 | } | 55 | } |
56 | 56 | ||
57 | struct xattr_handler ext4_xattr_user_handler = { | 57 | const struct xattr_handler ext4_xattr_user_handler = { |
58 | .prefix = XATTR_USER_PREFIX, | 58 | .prefix = XATTR_USER_PREFIX, |
59 | .list = ext4_xattr_user_list, | 59 | .list = ext4_xattr_user_list, |
60 | .get = ext4_xattr_user_get, | 60 | .get = ext4_xattr_user_get, |
diff --git a/fs/fcntl.c b/fs/fcntl.c index 0a140741b39e..f74d270ba155 100644 --- a/fs/fcntl.c +++ b/fs/fcntl.c | |||
@@ -14,6 +14,7 @@ | |||
14 | #include <linux/dnotify.h> | 14 | #include <linux/dnotify.h> |
15 | #include <linux/slab.h> | 15 | #include <linux/slab.h> |
16 | #include <linux/module.h> | 16 | #include <linux/module.h> |
17 | #include <linux/pipe_fs_i.h> | ||
17 | #include <linux/security.h> | 18 | #include <linux/security.h> |
18 | #include <linux/ptrace.h> | 19 | #include <linux/ptrace.h> |
19 | #include <linux/signal.h> | 20 | #include <linux/signal.h> |
@@ -412,6 +413,10 @@ static long do_fcntl(int fd, unsigned int cmd, unsigned long arg, | |||
412 | case F_NOTIFY: | 413 | case F_NOTIFY: |
413 | err = fcntl_dirnotify(fd, filp, arg); | 414 | err = fcntl_dirnotify(fd, filp, arg); |
414 | break; | 415 | break; |
416 | case F_SETPIPE_SZ: | ||
417 | case F_GETPIPE_SZ: | ||
418 | err = pipe_fcntl(filp, cmd, arg); | ||
419 | break; | ||
415 | default: | 420 | default: |
416 | break; | 421 | break; |
417 | } | 422 | } |
diff --git a/fs/fs-writeback.c b/fs/fs-writeback.c index 4b37f7cea4dd..5c4161f1fd9a 100644 --- a/fs/fs-writeback.c +++ b/fs/fs-writeback.c | |||
@@ -42,9 +42,10 @@ struct wb_writeback_args { | |||
42 | long nr_pages; | 42 | long nr_pages; |
43 | struct super_block *sb; | 43 | struct super_block *sb; |
44 | enum writeback_sync_modes sync_mode; | 44 | enum writeback_sync_modes sync_mode; |
45 | int for_kupdate:1; | 45 | unsigned int for_kupdate:1; |
46 | int range_cyclic:1; | 46 | unsigned int range_cyclic:1; |
47 | int for_background:1; | 47 | unsigned int for_background:1; |
48 | unsigned int sb_pinned:1; | ||
48 | }; | 49 | }; |
49 | 50 | ||
50 | /* | 51 | /* |
@@ -192,7 +193,8 @@ static void bdi_wait_on_work_clear(struct bdi_work *work) | |||
192 | } | 193 | } |
193 | 194 | ||
194 | static void bdi_alloc_queue_work(struct backing_dev_info *bdi, | 195 | static void bdi_alloc_queue_work(struct backing_dev_info *bdi, |
195 | struct wb_writeback_args *args) | 196 | struct wb_writeback_args *args, |
197 | int wait) | ||
196 | { | 198 | { |
197 | struct bdi_work *work; | 199 | struct bdi_work *work; |
198 | 200 | ||
@@ -204,6 +206,8 @@ static void bdi_alloc_queue_work(struct backing_dev_info *bdi, | |||
204 | if (work) { | 206 | if (work) { |
205 | bdi_work_init(work, args); | 207 | bdi_work_init(work, args); |
206 | bdi_queue_work(bdi, work); | 208 | bdi_queue_work(bdi, work); |
209 | if (wait) | ||
210 | bdi_wait_on_work_clear(work); | ||
207 | } else { | 211 | } else { |
208 | struct bdi_writeback *wb = &bdi->wb; | 212 | struct bdi_writeback *wb = &bdi->wb; |
209 | 213 | ||
@@ -230,6 +234,11 @@ static void bdi_sync_writeback(struct backing_dev_info *bdi, | |||
230 | .sync_mode = WB_SYNC_ALL, | 234 | .sync_mode = WB_SYNC_ALL, |
231 | .nr_pages = LONG_MAX, | 235 | .nr_pages = LONG_MAX, |
232 | .range_cyclic = 0, | 236 | .range_cyclic = 0, |
237 | /* | ||
238 | * Setting sb_pinned is not necessary for WB_SYNC_ALL, but | ||
239 | * lets make it explicitly clear. | ||
240 | */ | ||
241 | .sb_pinned = 1, | ||
233 | }; | 242 | }; |
234 | struct bdi_work work; | 243 | struct bdi_work work; |
235 | 244 | ||
@@ -245,21 +254,23 @@ static void bdi_sync_writeback(struct backing_dev_info *bdi, | |||
245 | * @bdi: the backing device to write from | 254 | * @bdi: the backing device to write from |
246 | * @sb: write inodes from this super_block | 255 | * @sb: write inodes from this super_block |
247 | * @nr_pages: the number of pages to write | 256 | * @nr_pages: the number of pages to write |
257 | * @sb_locked: caller already holds sb umount sem. | ||
248 | * | 258 | * |
249 | * Description: | 259 | * Description: |
250 | * This does WB_SYNC_NONE opportunistic writeback. The IO is only | 260 | * This does WB_SYNC_NONE opportunistic writeback. The IO is only |
251 | * started when this function returns, we make no guarentees on | 261 | * started when this function returns, we make no guarentees on |
252 | * completion. Caller need not hold sb s_umount semaphore. | 262 | * completion. Caller specifies whether sb umount sem is held already or not. |
253 | * | 263 | * |
254 | */ | 264 | */ |
255 | void bdi_start_writeback(struct backing_dev_info *bdi, struct super_block *sb, | 265 | void bdi_start_writeback(struct backing_dev_info *bdi, struct super_block *sb, |
256 | long nr_pages) | 266 | long nr_pages, int sb_locked) |
257 | { | 267 | { |
258 | struct wb_writeback_args args = { | 268 | struct wb_writeback_args args = { |
259 | .sb = sb, | 269 | .sb = sb, |
260 | .sync_mode = WB_SYNC_NONE, | 270 | .sync_mode = WB_SYNC_NONE, |
261 | .nr_pages = nr_pages, | 271 | .nr_pages = nr_pages, |
262 | .range_cyclic = 1, | 272 | .range_cyclic = 1, |
273 | .sb_pinned = sb_locked, | ||
263 | }; | 274 | }; |
264 | 275 | ||
265 | /* | 276 | /* |
@@ -271,7 +282,7 @@ void bdi_start_writeback(struct backing_dev_info *bdi, struct super_block *sb, | |||
271 | args.for_background = 1; | 282 | args.for_background = 1; |
272 | } | 283 | } |
273 | 284 | ||
274 | bdi_alloc_queue_work(bdi, &args); | 285 | bdi_alloc_queue_work(bdi, &args, sb_locked); |
275 | } | 286 | } |
276 | 287 | ||
277 | /* | 288 | /* |
@@ -452,11 +463,9 @@ writeback_single_inode(struct inode *inode, struct writeback_control *wbc) | |||
452 | 463 | ||
453 | BUG_ON(inode->i_state & I_SYNC); | 464 | BUG_ON(inode->i_state & I_SYNC); |
454 | 465 | ||
455 | /* Set I_SYNC, reset I_DIRTY */ | 466 | /* Set I_SYNC, reset I_DIRTY_PAGES */ |
456 | dirty = inode->i_state & I_DIRTY; | ||
457 | inode->i_state |= I_SYNC; | 467 | inode->i_state |= I_SYNC; |
458 | inode->i_state &= ~I_DIRTY; | 468 | inode->i_state &= ~I_DIRTY_PAGES; |
459 | |||
460 | spin_unlock(&inode_lock); | 469 | spin_unlock(&inode_lock); |
461 | 470 | ||
462 | ret = do_writepages(mapping, wbc); | 471 | ret = do_writepages(mapping, wbc); |
@@ -472,6 +481,15 @@ writeback_single_inode(struct inode *inode, struct writeback_control *wbc) | |||
472 | ret = err; | 481 | ret = err; |
473 | } | 482 | } |
474 | 483 | ||
484 | /* | ||
485 | * Some filesystems may redirty the inode during the writeback | ||
486 | * due to delalloc, clear dirty metadata flags right before | ||
487 | * write_inode() | ||
488 | */ | ||
489 | spin_lock(&inode_lock); | ||
490 | dirty = inode->i_state & I_DIRTY; | ||
491 | inode->i_state &= ~(I_DIRTY_SYNC | I_DIRTY_DATASYNC); | ||
492 | spin_unlock(&inode_lock); | ||
475 | /* Don't write the inode if only I_DIRTY_PAGES was set */ | 493 | /* Don't write the inode if only I_DIRTY_PAGES was set */ |
476 | if (dirty & (I_DIRTY_SYNC | I_DIRTY_DATASYNC)) { | 494 | if (dirty & (I_DIRTY_SYNC | I_DIRTY_DATASYNC)) { |
477 | int err = write_inode(inode, wbc); | 495 | int err = write_inode(inode, wbc); |
@@ -577,7 +595,7 @@ static enum sb_pin_state pin_sb_for_writeback(struct writeback_control *wbc, | |||
577 | /* | 595 | /* |
578 | * Caller must already hold the ref for this | 596 | * Caller must already hold the ref for this |
579 | */ | 597 | */ |
580 | if (wbc->sync_mode == WB_SYNC_ALL) { | 598 | if (wbc->sync_mode == WB_SYNC_ALL || wbc->sb_pinned) { |
581 | WARN_ON(!rwsem_is_locked(&sb->s_umount)); | 599 | WARN_ON(!rwsem_is_locked(&sb->s_umount)); |
582 | return SB_NOT_PINNED; | 600 | return SB_NOT_PINNED; |
583 | } | 601 | } |
@@ -751,6 +769,7 @@ static long wb_writeback(struct bdi_writeback *wb, | |||
751 | .for_kupdate = args->for_kupdate, | 769 | .for_kupdate = args->for_kupdate, |
752 | .for_background = args->for_background, | 770 | .for_background = args->for_background, |
753 | .range_cyclic = args->range_cyclic, | 771 | .range_cyclic = args->range_cyclic, |
772 | .sb_pinned = args->sb_pinned, | ||
754 | }; | 773 | }; |
755 | unsigned long oldest_jif; | 774 | unsigned long oldest_jif; |
756 | long wrote = 0; | 775 | long wrote = 0; |
@@ -852,6 +871,12 @@ static long wb_check_old_data_flush(struct bdi_writeback *wb) | |||
852 | unsigned long expired; | 871 | unsigned long expired; |
853 | long nr_pages; | 872 | long nr_pages; |
854 | 873 | ||
874 | /* | ||
875 | * When set to zero, disable periodic writeback | ||
876 | */ | ||
877 | if (!dirty_writeback_interval) | ||
878 | return 0; | ||
879 | |||
855 | expired = wb->last_old_flush + | 880 | expired = wb->last_old_flush + |
856 | msecs_to_jiffies(dirty_writeback_interval * 10); | 881 | msecs_to_jiffies(dirty_writeback_interval * 10); |
857 | if (time_before(jiffies, expired)) | 882 | if (time_before(jiffies, expired)) |
@@ -887,6 +912,7 @@ long wb_do_writeback(struct bdi_writeback *wb, int force_wait) | |||
887 | 912 | ||
888 | while ((work = get_next_work_item(bdi, wb)) != NULL) { | 913 | while ((work = get_next_work_item(bdi, wb)) != NULL) { |
889 | struct wb_writeback_args args = work->args; | 914 | struct wb_writeback_args args = work->args; |
915 | int post_clear; | ||
890 | 916 | ||
891 | /* | 917 | /* |
892 | * Override sync mode, in case we must wait for completion | 918 | * Override sync mode, in case we must wait for completion |
@@ -894,11 +920,13 @@ long wb_do_writeback(struct bdi_writeback *wb, int force_wait) | |||
894 | if (force_wait) | 920 | if (force_wait) |
895 | work->args.sync_mode = args.sync_mode = WB_SYNC_ALL; | 921 | work->args.sync_mode = args.sync_mode = WB_SYNC_ALL; |
896 | 922 | ||
923 | post_clear = WB_SYNC_ALL || args.sb_pinned; | ||
924 | |||
897 | /* | 925 | /* |
898 | * If this isn't a data integrity operation, just notify | 926 | * If this isn't a data integrity operation, just notify |
899 | * that we have seen this work and we are now starting it. | 927 | * that we have seen this work and we are now starting it. |
900 | */ | 928 | */ |
901 | if (args.sync_mode == WB_SYNC_NONE) | 929 | if (!post_clear) |
902 | wb_clear_pending(wb, work); | 930 | wb_clear_pending(wb, work); |
903 | 931 | ||
904 | wrote += wb_writeback(wb, &args); | 932 | wrote += wb_writeback(wb, &args); |
@@ -907,7 +935,7 @@ long wb_do_writeback(struct bdi_writeback *wb, int force_wait) | |||
907 | * This is a data integrity writeback, so only do the | 935 | * This is a data integrity writeback, so only do the |
908 | * notification when we have completed the work. | 936 | * notification when we have completed the work. |
909 | */ | 937 | */ |
910 | if (args.sync_mode == WB_SYNC_ALL) | 938 | if (post_clear) |
911 | wb_clear_pending(wb, work); | 939 | wb_clear_pending(wb, work); |
912 | } | 940 | } |
913 | 941 | ||
@@ -947,8 +975,17 @@ int bdi_writeback_task(struct bdi_writeback *wb) | |||
947 | break; | 975 | break; |
948 | } | 976 | } |
949 | 977 | ||
950 | wait_jiffies = msecs_to_jiffies(dirty_writeback_interval * 10); | 978 | if (dirty_writeback_interval) { |
951 | schedule_timeout_interruptible(wait_jiffies); | 979 | wait_jiffies = msecs_to_jiffies(dirty_writeback_interval * 10); |
980 | schedule_timeout_interruptible(wait_jiffies); | ||
981 | } else { | ||
982 | set_current_state(TASK_INTERRUPTIBLE); | ||
983 | if (list_empty_careful(&wb->bdi->work_list) && | ||
984 | !kthread_should_stop()) | ||
985 | schedule(); | ||
986 | __set_current_state(TASK_RUNNING); | ||
987 | } | ||
988 | |||
952 | try_to_freeze(); | 989 | try_to_freeze(); |
953 | } | 990 | } |
954 | 991 | ||
@@ -974,7 +1011,7 @@ static void bdi_writeback_all(struct super_block *sb, long nr_pages) | |||
974 | if (!bdi_has_dirty_io(bdi)) | 1011 | if (!bdi_has_dirty_io(bdi)) |
975 | continue; | 1012 | continue; |
976 | 1013 | ||
977 | bdi_alloc_queue_work(bdi, &args); | 1014 | bdi_alloc_queue_work(bdi, &args, 0); |
978 | } | 1015 | } |
979 | 1016 | ||
980 | rcu_read_unlock(); | 1017 | rcu_read_unlock(); |
@@ -1183,6 +1220,18 @@ static void wait_sb_inodes(struct super_block *sb) | |||
1183 | iput(old_inode); | 1220 | iput(old_inode); |
1184 | } | 1221 | } |
1185 | 1222 | ||
1223 | static void __writeback_inodes_sb(struct super_block *sb, int sb_locked) | ||
1224 | { | ||
1225 | unsigned long nr_dirty = global_page_state(NR_FILE_DIRTY); | ||
1226 | unsigned long nr_unstable = global_page_state(NR_UNSTABLE_NFS); | ||
1227 | long nr_to_write; | ||
1228 | |||
1229 | nr_to_write = nr_dirty + nr_unstable + | ||
1230 | (inodes_stat.nr_inodes - inodes_stat.nr_unused); | ||
1231 | |||
1232 | bdi_start_writeback(sb->s_bdi, sb, nr_to_write, sb_locked); | ||
1233 | } | ||
1234 | |||
1186 | /** | 1235 | /** |
1187 | * writeback_inodes_sb - writeback dirty inodes from given super_block | 1236 | * writeback_inodes_sb - writeback dirty inodes from given super_block |
1188 | * @sb: the superblock | 1237 | * @sb: the superblock |
@@ -1194,18 +1243,23 @@ static void wait_sb_inodes(struct super_block *sb) | |||
1194 | */ | 1243 | */ |
1195 | void writeback_inodes_sb(struct super_block *sb) | 1244 | void writeback_inodes_sb(struct super_block *sb) |
1196 | { | 1245 | { |
1197 | unsigned long nr_dirty = global_page_state(NR_FILE_DIRTY); | 1246 | __writeback_inodes_sb(sb, 0); |
1198 | unsigned long nr_unstable = global_page_state(NR_UNSTABLE_NFS); | ||
1199 | long nr_to_write; | ||
1200 | |||
1201 | nr_to_write = nr_dirty + nr_unstable + | ||
1202 | (inodes_stat.nr_inodes - inodes_stat.nr_unused); | ||
1203 | |||
1204 | bdi_start_writeback(sb->s_bdi, sb, nr_to_write); | ||
1205 | } | 1247 | } |
1206 | EXPORT_SYMBOL(writeback_inodes_sb); | 1248 | EXPORT_SYMBOL(writeback_inodes_sb); |
1207 | 1249 | ||
1208 | /** | 1250 | /** |
1251 | * writeback_inodes_sb_locked - writeback dirty inodes from given super_block | ||
1252 | * @sb: the superblock | ||
1253 | * | ||
1254 | * Like writeback_inodes_sb(), except the caller already holds the | ||
1255 | * sb umount sem. | ||
1256 | */ | ||
1257 | void writeback_inodes_sb_locked(struct super_block *sb) | ||
1258 | { | ||
1259 | __writeback_inodes_sb(sb, 1); | ||
1260 | } | ||
1261 | |||
1262 | /** | ||
1209 | * writeback_inodes_sb_if_idle - start writeback if none underway | 1263 | * writeback_inodes_sb_if_idle - start writeback if none underway |
1210 | * @sb: the superblock | 1264 | * @sb: the superblock |
1211 | * | 1265 | * |
diff --git a/fs/generic_acl.c b/fs/generic_acl.c index fe5df5457656..99800e564157 100644 --- a/fs/generic_acl.c +++ b/fs/generic_acl.c | |||
@@ -201,7 +201,7 @@ generic_check_acl(struct inode *inode, int mask) | |||
201 | return -EAGAIN; | 201 | return -EAGAIN; |
202 | } | 202 | } |
203 | 203 | ||
204 | struct xattr_handler generic_acl_access_handler = { | 204 | const struct xattr_handler generic_acl_access_handler = { |
205 | .prefix = POSIX_ACL_XATTR_ACCESS, | 205 | .prefix = POSIX_ACL_XATTR_ACCESS, |
206 | .flags = ACL_TYPE_ACCESS, | 206 | .flags = ACL_TYPE_ACCESS, |
207 | .list = generic_acl_list, | 207 | .list = generic_acl_list, |
@@ -209,7 +209,7 @@ struct xattr_handler generic_acl_access_handler = { | |||
209 | .set = generic_acl_set, | 209 | .set = generic_acl_set, |
210 | }; | 210 | }; |
211 | 211 | ||
212 | struct xattr_handler generic_acl_default_handler = { | 212 | const struct xattr_handler generic_acl_default_handler = { |
213 | .prefix = POSIX_ACL_XATTR_DEFAULT, | 213 | .prefix = POSIX_ACL_XATTR_DEFAULT, |
214 | .flags = ACL_TYPE_DEFAULT, | 214 | .flags = ACL_TYPE_DEFAULT, |
215 | .list = generic_acl_list, | 215 | .list = generic_acl_list, |
diff --git a/fs/gfs2/acl.c b/fs/gfs2/acl.c index 87ee309d4c24..9fb76b0a0485 100644 --- a/fs/gfs2/acl.c +++ b/fs/gfs2/acl.c | |||
@@ -335,7 +335,7 @@ out: | |||
335 | return error; | 335 | return error; |
336 | } | 336 | } |
337 | 337 | ||
338 | struct xattr_handler gfs2_xattr_system_handler = { | 338 | const struct xattr_handler gfs2_xattr_system_handler = { |
339 | .prefix = XATTR_SYSTEM_PREFIX, | 339 | .prefix = XATTR_SYSTEM_PREFIX, |
340 | .flags = GFS2_EATYPE_SYS, | 340 | .flags = GFS2_EATYPE_SYS, |
341 | .get = gfs2_xattr_system_get, | 341 | .get = gfs2_xattr_system_get, |
diff --git a/fs/gfs2/acl.h b/fs/gfs2/acl.h index 9306a2e6620c..b522b0cb39ea 100644 --- a/fs/gfs2/acl.h +++ b/fs/gfs2/acl.h | |||
@@ -19,6 +19,6 @@ | |||
19 | extern int gfs2_check_acl(struct inode *inode, int mask); | 19 | extern int gfs2_check_acl(struct inode *inode, int mask); |
20 | extern int gfs2_acl_create(struct gfs2_inode *dip, struct inode *inode); | 20 | extern int gfs2_acl_create(struct gfs2_inode *dip, struct inode *inode); |
21 | extern int gfs2_acl_chmod(struct gfs2_inode *ip, struct iattr *attr); | 21 | extern int gfs2_acl_chmod(struct gfs2_inode *ip, struct iattr *attr); |
22 | extern struct xattr_handler gfs2_xattr_system_handler; | 22 | extern const struct xattr_handler gfs2_xattr_system_handler; |
23 | 23 | ||
24 | #endif /* __ACL_DOT_H__ */ | 24 | #endif /* __ACL_DOT_H__ */ |
diff --git a/fs/gfs2/rgrp.c b/fs/gfs2/rgrp.c index 8bce73ed4d8e..117fa4171f62 100644 --- a/fs/gfs2/rgrp.c +++ b/fs/gfs2/rgrp.c | |||
@@ -854,7 +854,8 @@ static void gfs2_rgrp_send_discards(struct gfs2_sbd *sdp, u64 offset, | |||
854 | if ((start + nr_sects) != blk) { | 854 | if ((start + nr_sects) != blk) { |
855 | rv = blkdev_issue_discard(bdev, start, | 855 | rv = blkdev_issue_discard(bdev, start, |
856 | nr_sects, GFP_NOFS, | 856 | nr_sects, GFP_NOFS, |
857 | DISCARD_FL_BARRIER); | 857 | BLKDEV_IFL_WAIT | |
858 | BLKDEV_IFL_BARRIER); | ||
858 | if (rv) | 859 | if (rv) |
859 | goto fail; | 860 | goto fail; |
860 | nr_sects = 0; | 861 | nr_sects = 0; |
@@ -869,7 +870,7 @@ start_new_extent: | |||
869 | } | 870 | } |
870 | if (nr_sects) { | 871 | if (nr_sects) { |
871 | rv = blkdev_issue_discard(bdev, start, nr_sects, GFP_NOFS, | 872 | rv = blkdev_issue_discard(bdev, start, nr_sects, GFP_NOFS, |
872 | DISCARD_FL_BARRIER); | 873 | BLKDEV_IFL_WAIT | BLKDEV_IFL_BARRIER); |
873 | if (rv) | 874 | if (rv) |
874 | goto fail; | 875 | goto fail; |
875 | } | 876 | } |
diff --git a/fs/gfs2/super.h b/fs/gfs2/super.h index 3df60f2d84e3..a0464680af0b 100644 --- a/fs/gfs2/super.h +++ b/fs/gfs2/super.h | |||
@@ -54,7 +54,7 @@ extern struct file_system_type gfs2meta_fs_type; | |||
54 | extern const struct export_operations gfs2_export_ops; | 54 | extern const struct export_operations gfs2_export_ops; |
55 | extern const struct super_operations gfs2_super_ops; | 55 | extern const struct super_operations gfs2_super_ops; |
56 | extern const struct dentry_operations gfs2_dops; | 56 | extern const struct dentry_operations gfs2_dops; |
57 | extern struct xattr_handler *gfs2_xattr_handlers[]; | 57 | extern const struct xattr_handler *gfs2_xattr_handlers[]; |
58 | 58 | ||
59 | #endif /* __SUPER_DOT_H__ */ | 59 | #endif /* __SUPER_DOT_H__ */ |
60 | 60 | ||
diff --git a/fs/gfs2/xattr.c b/fs/gfs2/xattr.c index c2ebdf2c01d4..82f93da00d1b 100644 --- a/fs/gfs2/xattr.c +++ b/fs/gfs2/xattr.c | |||
@@ -1535,21 +1535,21 @@ out_alloc: | |||
1535 | return error; | 1535 | return error; |
1536 | } | 1536 | } |
1537 | 1537 | ||
1538 | static struct xattr_handler gfs2_xattr_user_handler = { | 1538 | static const struct xattr_handler gfs2_xattr_user_handler = { |
1539 | .prefix = XATTR_USER_PREFIX, | 1539 | .prefix = XATTR_USER_PREFIX, |
1540 | .flags = GFS2_EATYPE_USR, | 1540 | .flags = GFS2_EATYPE_USR, |
1541 | .get = gfs2_xattr_get, | 1541 | .get = gfs2_xattr_get, |
1542 | .set = gfs2_xattr_set, | 1542 | .set = gfs2_xattr_set, |
1543 | }; | 1543 | }; |
1544 | 1544 | ||
1545 | static struct xattr_handler gfs2_xattr_security_handler = { | 1545 | static const struct xattr_handler gfs2_xattr_security_handler = { |
1546 | .prefix = XATTR_SECURITY_PREFIX, | 1546 | .prefix = XATTR_SECURITY_PREFIX, |
1547 | .flags = GFS2_EATYPE_SECURITY, | 1547 | .flags = GFS2_EATYPE_SECURITY, |
1548 | .get = gfs2_xattr_get, | 1548 | .get = gfs2_xattr_get, |
1549 | .set = gfs2_xattr_set, | 1549 | .set = gfs2_xattr_set, |
1550 | }; | 1550 | }; |
1551 | 1551 | ||
1552 | struct xattr_handler *gfs2_xattr_handlers[] = { | 1552 | const struct xattr_handler *gfs2_xattr_handlers[] = { |
1553 | &gfs2_xattr_user_handler, | 1553 | &gfs2_xattr_user_handler, |
1554 | &gfs2_xattr_security_handler, | 1554 | &gfs2_xattr_security_handler, |
1555 | &gfs2_xattr_system_handler, | 1555 | &gfs2_xattr_system_handler, |
diff --git a/fs/inode.c b/fs/inode.c index 258ec22bb298..2bee20ae3d65 100644 --- a/fs/inode.c +++ b/fs/inode.c | |||
@@ -286,11 +286,9 @@ static void init_once(void *foo) | |||
286 | */ | 286 | */ |
287 | void __iget(struct inode *inode) | 287 | void __iget(struct inode *inode) |
288 | { | 288 | { |
289 | if (atomic_read(&inode->i_count)) { | 289 | if (atomic_inc_return(&inode->i_count) != 1) |
290 | atomic_inc(&inode->i_count); | ||
291 | return; | 290 | return; |
292 | } | 291 | |
293 | atomic_inc(&inode->i_count); | ||
294 | if (!(inode->i_state & (I_DIRTY|I_SYNC))) | 292 | if (!(inode->i_state & (I_DIRTY|I_SYNC))) |
295 | list_move(&inode->i_list, &inode_in_use); | 293 | list_move(&inode->i_list, &inode_in_use); |
296 | inodes_stat.nr_unused--; | 294 | inodes_stat.nr_unused--; |
@@ -1608,3 +1606,23 @@ void init_special_inode(struct inode *inode, umode_t mode, dev_t rdev) | |||
1608 | inode->i_ino); | 1606 | inode->i_ino); |
1609 | } | 1607 | } |
1610 | EXPORT_SYMBOL(init_special_inode); | 1608 | EXPORT_SYMBOL(init_special_inode); |
1609 | |||
1610 | /** | ||
1611 | * Init uid,gid,mode for new inode according to posix standards | ||
1612 | * @inode: New inode | ||
1613 | * @dir: Directory inode | ||
1614 | * @mode: mode of the new inode | ||
1615 | */ | ||
1616 | void inode_init_owner(struct inode *inode, const struct inode *dir, | ||
1617 | mode_t mode) | ||
1618 | { | ||
1619 | inode->i_uid = current_fsuid(); | ||
1620 | if (dir && dir->i_mode & S_ISGID) { | ||
1621 | inode->i_gid = dir->i_gid; | ||
1622 | if (S_ISDIR(mode)) | ||
1623 | mode |= S_ISGID; | ||
1624 | } else | ||
1625 | inode->i_gid = current_fsgid(); | ||
1626 | inode->i_mode = mode; | ||
1627 | } | ||
1628 | EXPORT_SYMBOL(inode_init_owner); | ||
diff --git a/fs/internal.h b/fs/internal.h index 8a03a5447bdf..6b706bc60a66 100644 --- a/fs/internal.h +++ b/fs/internal.h | |||
@@ -87,6 +87,8 @@ extern struct file *get_empty_filp(void); | |||
87 | * super.c | 87 | * super.c |
88 | */ | 88 | */ |
89 | extern int do_remount_sb(struct super_block *, int, void *, int); | 89 | extern int do_remount_sb(struct super_block *, int, void *, int); |
90 | extern void __put_super(struct super_block *sb); | ||
91 | extern void put_super(struct super_block *sb); | ||
90 | 92 | ||
91 | /* | 93 | /* |
92 | * open.c | 94 | * open.c |
diff --git a/fs/ioctl.c b/fs/ioctl.c index 7faefb4da939..2d140a713861 100644 --- a/fs/ioctl.c +++ b/fs/ioctl.c | |||
@@ -525,15 +525,8 @@ static int ioctl_fsfreeze(struct file *filp) | |||
525 | if (sb->s_op->freeze_fs == NULL) | 525 | if (sb->s_op->freeze_fs == NULL) |
526 | return -EOPNOTSUPP; | 526 | return -EOPNOTSUPP; |
527 | 527 | ||
528 | /* If a blockdevice-backed filesystem isn't specified, return. */ | ||
529 | if (sb->s_bdev == NULL) | ||
530 | return -EINVAL; | ||
531 | |||
532 | /* Freeze */ | 528 | /* Freeze */ |
533 | sb = freeze_bdev(sb->s_bdev); | 529 | return freeze_super(sb); |
534 | if (IS_ERR(sb)) | ||
535 | return PTR_ERR(sb); | ||
536 | return 0; | ||
537 | } | 530 | } |
538 | 531 | ||
539 | static int ioctl_fsthaw(struct file *filp) | 532 | static int ioctl_fsthaw(struct file *filp) |
@@ -543,12 +536,8 @@ static int ioctl_fsthaw(struct file *filp) | |||
543 | if (!capable(CAP_SYS_ADMIN)) | 536 | if (!capable(CAP_SYS_ADMIN)) |
544 | return -EPERM; | 537 | return -EPERM; |
545 | 538 | ||
546 | /* If a blockdevice-backed filesystem isn't specified, return EINVAL. */ | ||
547 | if (sb->s_bdev == NULL) | ||
548 | return -EINVAL; | ||
549 | |||
550 | /* Thaw */ | 539 | /* Thaw */ |
551 | return thaw_bdev(sb->s_bdev, sb); | 540 | return thaw_super(sb); |
552 | } | 541 | } |
553 | 542 | ||
554 | /* | 543 | /* |
diff --git a/fs/jbd2/checkpoint.c b/fs/jbd2/checkpoint.c index 30beb11ef928..076d1cc44f95 100644 --- a/fs/jbd2/checkpoint.c +++ b/fs/jbd2/checkpoint.c | |||
@@ -530,7 +530,8 @@ int jbd2_cleanup_journal_tail(journal_t *journal) | |||
530 | */ | 530 | */ |
531 | if ((journal->j_fs_dev != journal->j_dev) && | 531 | if ((journal->j_fs_dev != journal->j_dev) && |
532 | (journal->j_flags & JBD2_BARRIER)) | 532 | (journal->j_flags & JBD2_BARRIER)) |
533 | blkdev_issue_flush(journal->j_fs_dev, NULL); | 533 | blkdev_issue_flush(journal->j_fs_dev, GFP_KERNEL, NULL, |
534 | BLKDEV_IFL_WAIT); | ||
534 | if (!(journal->j_flags & JBD2_ABORT)) | 535 | if (!(journal->j_flags & JBD2_ABORT)) |
535 | jbd2_journal_update_superblock(journal, 1); | 536 | jbd2_journal_update_superblock(journal, 1); |
536 | return 0; | 537 | return 0; |
diff --git a/fs/jbd2/commit.c b/fs/jbd2/commit.c index 671da7fb7ffd..75716d3d2be0 100644 --- a/fs/jbd2/commit.c +++ b/fs/jbd2/commit.c | |||
@@ -717,7 +717,8 @@ start_journal_io: | |||
717 | if (commit_transaction->t_flushed_data_blocks && | 717 | if (commit_transaction->t_flushed_data_blocks && |
718 | (journal->j_fs_dev != journal->j_dev) && | 718 | (journal->j_fs_dev != journal->j_dev) && |
719 | (journal->j_flags & JBD2_BARRIER)) | 719 | (journal->j_flags & JBD2_BARRIER)) |
720 | blkdev_issue_flush(journal->j_fs_dev, NULL); | 720 | blkdev_issue_flush(journal->j_fs_dev, GFP_KERNEL, NULL, |
721 | BLKDEV_IFL_WAIT); | ||
721 | 722 | ||
722 | /* Done it all: now write the commit record asynchronously. */ | 723 | /* Done it all: now write the commit record asynchronously. */ |
723 | if (JBD2_HAS_INCOMPAT_FEATURE(journal, | 724 | if (JBD2_HAS_INCOMPAT_FEATURE(journal, |
@@ -727,7 +728,8 @@ start_journal_io: | |||
727 | if (err) | 728 | if (err) |
728 | __jbd2_journal_abort_hard(journal); | 729 | __jbd2_journal_abort_hard(journal); |
729 | if (journal->j_flags & JBD2_BARRIER) | 730 | if (journal->j_flags & JBD2_BARRIER) |
730 | blkdev_issue_flush(journal->j_dev, NULL); | 731 | blkdev_issue_flush(journal->j_dev, GFP_KERNEL, NULL, |
732 | BLKDEV_IFL_WAIT); | ||
731 | } | 733 | } |
732 | 734 | ||
733 | err = journal_finish_inode_data_buffers(journal, commit_transaction); | 735 | err = journal_finish_inode_data_buffers(journal, commit_transaction); |
diff --git a/fs/jffs2/acl.c b/fs/jffs2/acl.c index 7cdc3196476a..a33aab6b5e68 100644 --- a/fs/jffs2/acl.c +++ b/fs/jffs2/acl.c | |||
@@ -419,7 +419,7 @@ static int jffs2_acl_setxattr(struct dentry *dentry, const char *name, | |||
419 | return rc; | 419 | return rc; |
420 | } | 420 | } |
421 | 421 | ||
422 | struct xattr_handler jffs2_acl_access_xattr_handler = { | 422 | const struct xattr_handler jffs2_acl_access_xattr_handler = { |
423 | .prefix = POSIX_ACL_XATTR_ACCESS, | 423 | .prefix = POSIX_ACL_XATTR_ACCESS, |
424 | .flags = ACL_TYPE_DEFAULT, | 424 | .flags = ACL_TYPE_DEFAULT, |
425 | .list = jffs2_acl_access_listxattr, | 425 | .list = jffs2_acl_access_listxattr, |
@@ -427,7 +427,7 @@ struct xattr_handler jffs2_acl_access_xattr_handler = { | |||
427 | .set = jffs2_acl_setxattr, | 427 | .set = jffs2_acl_setxattr, |
428 | }; | 428 | }; |
429 | 429 | ||
430 | struct xattr_handler jffs2_acl_default_xattr_handler = { | 430 | const struct xattr_handler jffs2_acl_default_xattr_handler = { |
431 | .prefix = POSIX_ACL_XATTR_DEFAULT, | 431 | .prefix = POSIX_ACL_XATTR_DEFAULT, |
432 | .flags = ACL_TYPE_DEFAULT, | 432 | .flags = ACL_TYPE_DEFAULT, |
433 | .list = jffs2_acl_default_listxattr, | 433 | .list = jffs2_acl_default_listxattr, |
diff --git a/fs/jffs2/acl.h b/fs/jffs2/acl.h index f0ba63e3c36b..5e42de8d9541 100644 --- a/fs/jffs2/acl.h +++ b/fs/jffs2/acl.h | |||
@@ -31,8 +31,8 @@ extern int jffs2_acl_chmod(struct inode *); | |||
31 | extern int jffs2_init_acl_pre(struct inode *, struct inode *, int *); | 31 | extern int jffs2_init_acl_pre(struct inode *, struct inode *, int *); |
32 | extern int jffs2_init_acl_post(struct inode *); | 32 | extern int jffs2_init_acl_post(struct inode *); |
33 | 33 | ||
34 | extern struct xattr_handler jffs2_acl_access_xattr_handler; | 34 | extern const struct xattr_handler jffs2_acl_access_xattr_handler; |
35 | extern struct xattr_handler jffs2_acl_default_xattr_handler; | 35 | extern const struct xattr_handler jffs2_acl_default_xattr_handler; |
36 | 36 | ||
37 | #else | 37 | #else |
38 | 38 | ||
diff --git a/fs/jffs2/security.c b/fs/jffs2/security.c index eaccee058583..239f51216a68 100644 --- a/fs/jffs2/security.c +++ b/fs/jffs2/security.c | |||
@@ -77,7 +77,7 @@ static size_t jffs2_security_listxattr(struct dentry *dentry, char *list, | |||
77 | return retlen; | 77 | return retlen; |
78 | } | 78 | } |
79 | 79 | ||
80 | struct xattr_handler jffs2_security_xattr_handler = { | 80 | const struct xattr_handler jffs2_security_xattr_handler = { |
81 | .prefix = XATTR_SECURITY_PREFIX, | 81 | .prefix = XATTR_SECURITY_PREFIX, |
82 | .list = jffs2_security_listxattr, | 82 | .list = jffs2_security_listxattr, |
83 | .set = jffs2_security_setxattr, | 83 | .set = jffs2_security_setxattr, |
diff --git a/fs/jffs2/xattr.c b/fs/jffs2/xattr.c index 9e75c62c85d6..a2d58c96f1b4 100644 --- a/fs/jffs2/xattr.c +++ b/fs/jffs2/xattr.c | |||
@@ -904,7 +904,7 @@ struct jffs2_xattr_datum *jffs2_setup_xattr_datum(struct jffs2_sb_info *c, | |||
904 | * do_jffs2_setxattr(inode, xprefix, xname, buffer, size, flags) | 904 | * do_jffs2_setxattr(inode, xprefix, xname, buffer, size, flags) |
905 | * is an implementation of setxattr handler on jffs2. | 905 | * is an implementation of setxattr handler on jffs2. |
906 | * -------------------------------------------------- */ | 906 | * -------------------------------------------------- */ |
907 | struct xattr_handler *jffs2_xattr_handlers[] = { | 907 | const struct xattr_handler *jffs2_xattr_handlers[] = { |
908 | &jffs2_user_xattr_handler, | 908 | &jffs2_user_xattr_handler, |
909 | #ifdef CONFIG_JFFS2_FS_SECURITY | 909 | #ifdef CONFIG_JFFS2_FS_SECURITY |
910 | &jffs2_security_xattr_handler, | 910 | &jffs2_security_xattr_handler, |
@@ -917,8 +917,8 @@ struct xattr_handler *jffs2_xattr_handlers[] = { | |||
917 | NULL | 917 | NULL |
918 | }; | 918 | }; |
919 | 919 | ||
920 | static struct xattr_handler *xprefix_to_handler(int xprefix) { | 920 | static const struct xattr_handler *xprefix_to_handler(int xprefix) { |
921 | struct xattr_handler *ret; | 921 | const struct xattr_handler *ret; |
922 | 922 | ||
923 | switch (xprefix) { | 923 | switch (xprefix) { |
924 | case JFFS2_XPREFIX_USER: | 924 | case JFFS2_XPREFIX_USER: |
@@ -955,7 +955,7 @@ ssize_t jffs2_listxattr(struct dentry *dentry, char *buffer, size_t size) | |||
955 | struct jffs2_inode_cache *ic = f->inocache; | 955 | struct jffs2_inode_cache *ic = f->inocache; |
956 | struct jffs2_xattr_ref *ref, **pref; | 956 | struct jffs2_xattr_ref *ref, **pref; |
957 | struct jffs2_xattr_datum *xd; | 957 | struct jffs2_xattr_datum *xd; |
958 | struct xattr_handler *xhandle; | 958 | const struct xattr_handler *xhandle; |
959 | ssize_t len, rc; | 959 | ssize_t len, rc; |
960 | int retry = 0; | 960 | int retry = 0; |
961 | 961 | ||
diff --git a/fs/jffs2/xattr.h b/fs/jffs2/xattr.h index 6e3b5ddfb7ab..cf4f5759b42b 100644 --- a/fs/jffs2/xattr.h +++ b/fs/jffs2/xattr.h | |||
@@ -93,9 +93,9 @@ extern int do_jffs2_getxattr(struct inode *inode, int xprefix, const char *xname | |||
93 | extern int do_jffs2_setxattr(struct inode *inode, int xprefix, const char *xname, | 93 | extern int do_jffs2_setxattr(struct inode *inode, int xprefix, const char *xname, |
94 | const char *buffer, size_t size, int flags); | 94 | const char *buffer, size_t size, int flags); |
95 | 95 | ||
96 | extern struct xattr_handler *jffs2_xattr_handlers[]; | 96 | extern const struct xattr_handler *jffs2_xattr_handlers[]; |
97 | extern struct xattr_handler jffs2_user_xattr_handler; | 97 | extern const struct xattr_handler jffs2_user_xattr_handler; |
98 | extern struct xattr_handler jffs2_trusted_xattr_handler; | 98 | extern const struct xattr_handler jffs2_trusted_xattr_handler; |
99 | 99 | ||
100 | extern ssize_t jffs2_listxattr(struct dentry *, char *, size_t); | 100 | extern ssize_t jffs2_listxattr(struct dentry *, char *, size_t); |
101 | #define jffs2_getxattr generic_getxattr | 101 | #define jffs2_getxattr generic_getxattr |
@@ -122,7 +122,7 @@ extern ssize_t jffs2_listxattr(struct dentry *, char *, size_t); | |||
122 | 122 | ||
123 | #ifdef CONFIG_JFFS2_FS_SECURITY | 123 | #ifdef CONFIG_JFFS2_FS_SECURITY |
124 | extern int jffs2_init_security(struct inode *inode, struct inode *dir); | 124 | extern int jffs2_init_security(struct inode *inode, struct inode *dir); |
125 | extern struct xattr_handler jffs2_security_xattr_handler; | 125 | extern const struct xattr_handler jffs2_security_xattr_handler; |
126 | #else | 126 | #else |
127 | #define jffs2_init_security(inode,dir) (0) | 127 | #define jffs2_init_security(inode,dir) (0) |
128 | #endif /* CONFIG_JFFS2_FS_SECURITY */ | 128 | #endif /* CONFIG_JFFS2_FS_SECURITY */ |
diff --git a/fs/jffs2/xattr_trusted.c b/fs/jffs2/xattr_trusted.c index 3e5a5e356e05..1c868194c504 100644 --- a/fs/jffs2/xattr_trusted.c +++ b/fs/jffs2/xattr_trusted.c | |||
@@ -47,7 +47,7 @@ static size_t jffs2_trusted_listxattr(struct dentry *dentry, char *list, | |||
47 | return retlen; | 47 | return retlen; |
48 | } | 48 | } |
49 | 49 | ||
50 | struct xattr_handler jffs2_trusted_xattr_handler = { | 50 | const struct xattr_handler jffs2_trusted_xattr_handler = { |
51 | .prefix = XATTR_TRUSTED_PREFIX, | 51 | .prefix = XATTR_TRUSTED_PREFIX, |
52 | .list = jffs2_trusted_listxattr, | 52 | .list = jffs2_trusted_listxattr, |
53 | .set = jffs2_trusted_setxattr, | 53 | .set = jffs2_trusted_setxattr, |
diff --git a/fs/jffs2/xattr_user.c b/fs/jffs2/xattr_user.c index 8544af67dffe..916b5c966039 100644 --- a/fs/jffs2/xattr_user.c +++ b/fs/jffs2/xattr_user.c | |||
@@ -47,7 +47,7 @@ static size_t jffs2_user_listxattr(struct dentry *dentry, char *list, | |||
47 | return retlen; | 47 | return retlen; |
48 | } | 48 | } |
49 | 49 | ||
50 | struct xattr_handler jffs2_user_xattr_handler = { | 50 | const struct xattr_handler jffs2_user_xattr_handler = { |
51 | .prefix = XATTR_USER_PREFIX, | 51 | .prefix = XATTR_USER_PREFIX, |
52 | .list = jffs2_user_listxattr, | 52 | .list = jffs2_user_listxattr, |
53 | .set = jffs2_user_setxattr, | 53 | .set = jffs2_user_setxattr, |
diff --git a/fs/jfs/jfs_inode.c b/fs/jfs/jfs_inode.c index 829921b67765..2686531e235a 100644 --- a/fs/jfs/jfs_inode.c +++ b/fs/jfs/jfs_inode.c | |||
@@ -98,14 +98,7 @@ struct inode *ialloc(struct inode *parent, umode_t mode) | |||
98 | goto fail_unlock; | 98 | goto fail_unlock; |
99 | } | 99 | } |
100 | 100 | ||
101 | inode->i_uid = current_fsuid(); | 101 | inode_init_owner(inode, parent, mode); |
102 | if (parent->i_mode & S_ISGID) { | ||
103 | inode->i_gid = parent->i_gid; | ||
104 | if (S_ISDIR(mode)) | ||
105 | mode |= S_ISGID; | ||
106 | } else | ||
107 | inode->i_gid = current_fsgid(); | ||
108 | |||
109 | /* | 102 | /* |
110 | * New inodes need to save sane values on disk when | 103 | * New inodes need to save sane values on disk when |
111 | * uid & gid mount options are used | 104 | * uid & gid mount options are used |
@@ -121,7 +114,6 @@ struct inode *ialloc(struct inode *parent, umode_t mode) | |||
121 | if (rc) | 114 | if (rc) |
122 | goto fail_drop; | 115 | goto fail_drop; |
123 | 116 | ||
124 | inode->i_mode = mode; | ||
125 | /* inherit flags from parent */ | 117 | /* inherit flags from parent */ |
126 | jfs_inode->mode2 = JFS_IP(parent)->mode2 & JFS_FL_INHERIT; | 118 | jfs_inode->mode2 = JFS_IP(parent)->mode2 & JFS_FL_INHERIT; |
127 | 119 | ||
@@ -134,7 +126,7 @@ struct inode *ialloc(struct inode *parent, umode_t mode) | |||
134 | if (S_ISLNK(mode)) | 126 | if (S_ISLNK(mode)) |
135 | jfs_inode->mode2 &= ~(JFS_IMMUTABLE_FL|JFS_APPEND_FL); | 127 | jfs_inode->mode2 &= ~(JFS_IMMUTABLE_FL|JFS_APPEND_FL); |
136 | } | 128 | } |
137 | jfs_inode->mode2 |= mode; | 129 | jfs_inode->mode2 |= inode->i_mode; |
138 | 130 | ||
139 | inode->i_blocks = 0; | 131 | inode->i_blocks = 0; |
140 | inode->i_mtime = inode->i_atime = inode->i_ctime = CURRENT_TIME; | 132 | inode->i_mtime = inode->i_atime = inode->i_ctime = CURRENT_TIME; |
diff --git a/fs/logfs/inode.c b/fs/logfs/inode.c index 755a92e8daa7..f602e230e162 100644 --- a/fs/logfs/inode.c +++ b/fs/logfs/inode.c | |||
@@ -358,14 +358,7 @@ struct inode *logfs_new_inode(struct inode *dir, int mode) | |||
358 | inode->i_mode = mode; | 358 | inode->i_mode = mode; |
359 | logfs_set_ino_generation(sb, inode); | 359 | logfs_set_ino_generation(sb, inode); |
360 | 360 | ||
361 | inode->i_uid = current_fsuid(); | 361 | inode_init_owner(inode, dir, mode); |
362 | inode->i_gid = current_fsgid(); | ||
363 | if (dir->i_mode & S_ISGID) { | ||
364 | inode->i_gid = dir->i_gid; | ||
365 | if (S_ISDIR(mode)) | ||
366 | inode->i_mode |= S_ISGID; | ||
367 | } | ||
368 | |||
369 | logfs_inode_setops(inode); | 362 | logfs_inode_setops(inode); |
370 | insert_inode_hash(inode); | 363 | insert_inode_hash(inode); |
371 | 364 | ||
diff --git a/fs/minix/bitmap.c b/fs/minix/bitmap.c index 6ac693faae49..482779fe4e7c 100644 --- a/fs/minix/bitmap.c +++ b/fs/minix/bitmap.c | |||
@@ -221,7 +221,7 @@ void minix_free_inode(struct inode * inode) | |||
221 | clear_inode(inode); /* clear in-memory copy */ | 221 | clear_inode(inode); /* clear in-memory copy */ |
222 | } | 222 | } |
223 | 223 | ||
224 | struct inode * minix_new_inode(const struct inode * dir, int * error) | 224 | struct inode *minix_new_inode(const struct inode *dir, int mode, int *error) |
225 | { | 225 | { |
226 | struct super_block *sb = dir->i_sb; | 226 | struct super_block *sb = dir->i_sb; |
227 | struct minix_sb_info *sbi = minix_sb(sb); | 227 | struct minix_sb_info *sbi = minix_sb(sb); |
@@ -263,8 +263,7 @@ struct inode * minix_new_inode(const struct inode * dir, int * error) | |||
263 | iput(inode); | 263 | iput(inode); |
264 | return NULL; | 264 | return NULL; |
265 | } | 265 | } |
266 | inode->i_uid = current_fsuid(); | 266 | inode_init_owner(inode, dir, mode); |
267 | inode->i_gid = (dir->i_mode & S_ISGID) ? dir->i_gid : current_fsgid(); | ||
268 | inode->i_ino = j; | 267 | inode->i_ino = j; |
269 | inode->i_mtime = inode->i_atime = inode->i_ctime = CURRENT_TIME_SEC; | 268 | inode->i_mtime = inode->i_atime = inode->i_ctime = CURRENT_TIME_SEC; |
270 | inode->i_blocks = 0; | 269 | inode->i_blocks = 0; |
diff --git a/fs/minix/minix.h b/fs/minix/minix.h index 9dcf95b42116..111f34ee9e3b 100644 --- a/fs/minix/minix.h +++ b/fs/minix/minix.h | |||
@@ -46,7 +46,7 @@ struct minix_sb_info { | |||
46 | extern struct inode *minix_iget(struct super_block *, unsigned long); | 46 | extern struct inode *minix_iget(struct super_block *, unsigned long); |
47 | extern struct minix_inode * minix_V1_raw_inode(struct super_block *, ino_t, struct buffer_head **); | 47 | extern struct minix_inode * minix_V1_raw_inode(struct super_block *, ino_t, struct buffer_head **); |
48 | extern struct minix2_inode * minix_V2_raw_inode(struct super_block *, ino_t, struct buffer_head **); | 48 | extern struct minix2_inode * minix_V2_raw_inode(struct super_block *, ino_t, struct buffer_head **); |
49 | extern struct inode * minix_new_inode(const struct inode * dir, int * error); | 49 | extern struct inode * minix_new_inode(const struct inode *, int, int *); |
50 | extern void minix_free_inode(struct inode * inode); | 50 | extern void minix_free_inode(struct inode * inode); |
51 | extern unsigned long minix_count_free_inodes(struct minix_sb_info *sbi); | 51 | extern unsigned long minix_count_free_inodes(struct minix_sb_info *sbi); |
52 | extern int minix_new_block(struct inode * inode); | 52 | extern int minix_new_block(struct inode * inode); |
diff --git a/fs/minix/namei.c b/fs/minix/namei.c index 32b131cd6121..e20ee85955d1 100644 --- a/fs/minix/namei.c +++ b/fs/minix/namei.c | |||
@@ -46,10 +46,9 @@ static int minix_mknod(struct inode * dir, struct dentry *dentry, int mode, dev_ | |||
46 | if (!old_valid_dev(rdev)) | 46 | if (!old_valid_dev(rdev)) |
47 | return -EINVAL; | 47 | return -EINVAL; |
48 | 48 | ||
49 | inode = minix_new_inode(dir, &error); | 49 | inode = minix_new_inode(dir, mode, &error); |
50 | 50 | ||
51 | if (inode) { | 51 | if (inode) { |
52 | inode->i_mode = mode; | ||
53 | minix_set_inode(inode, rdev); | 52 | minix_set_inode(inode, rdev); |
54 | mark_inode_dirty(inode); | 53 | mark_inode_dirty(inode); |
55 | error = add_nondir(dentry, inode); | 54 | error = add_nondir(dentry, inode); |
@@ -73,11 +72,10 @@ static int minix_symlink(struct inode * dir, struct dentry *dentry, | |||
73 | if (i > dir->i_sb->s_blocksize) | 72 | if (i > dir->i_sb->s_blocksize) |
74 | goto out; | 73 | goto out; |
75 | 74 | ||
76 | inode = minix_new_inode(dir, &err); | 75 | inode = minix_new_inode(dir, S_IFLNK | 0777, &err); |
77 | if (!inode) | 76 | if (!inode) |
78 | goto out; | 77 | goto out; |
79 | 78 | ||
80 | inode->i_mode = S_IFLNK | 0777; | ||
81 | minix_set_inode(inode, 0); | 79 | minix_set_inode(inode, 0); |
82 | err = page_symlink(inode, symname, i); | 80 | err = page_symlink(inode, symname, i); |
83 | if (err) | 81 | if (err) |
@@ -117,13 +115,10 @@ static int minix_mkdir(struct inode * dir, struct dentry *dentry, int mode) | |||
117 | 115 | ||
118 | inode_inc_link_count(dir); | 116 | inode_inc_link_count(dir); |
119 | 117 | ||
120 | inode = minix_new_inode(dir, &err); | 118 | inode = minix_new_inode(dir, mode, &err); |
121 | if (!inode) | 119 | if (!inode) |
122 | goto out_dir; | 120 | goto out_dir; |
123 | 121 | ||
124 | inode->i_mode = S_IFDIR | mode; | ||
125 | if (dir->i_mode & S_ISGID) | ||
126 | inode->i_mode |= S_ISGID; | ||
127 | minix_set_inode(inode, 0); | 122 | minix_set_inode(inode, 0); |
128 | 123 | ||
129 | inode_inc_link_count(inode); | 124 | inode_inc_link_count(inode); |
diff --git a/fs/namei.c b/fs/namei.c index b86b96fe1dc3..48e1f60520ea 100644 --- a/fs/namei.c +++ b/fs/namei.c | |||
@@ -523,9 +523,10 @@ static void path_put_conditional(struct path *path, struct nameidata *nd) | |||
523 | static inline void path_to_nameidata(struct path *path, struct nameidata *nd) | 523 | static inline void path_to_nameidata(struct path *path, struct nameidata *nd) |
524 | { | 524 | { |
525 | dput(nd->path.dentry); | 525 | dput(nd->path.dentry); |
526 | if (nd->path.mnt != path->mnt) | 526 | if (nd->path.mnt != path->mnt) { |
527 | mntput(nd->path.mnt); | 527 | mntput(nd->path.mnt); |
528 | nd->path.mnt = path->mnt; | 528 | nd->path.mnt = path->mnt; |
529 | } | ||
529 | nd->path.dentry = path->dentry; | 530 | nd->path.dentry = path->dentry; |
530 | } | 531 | } |
531 | 532 | ||
diff --git a/fs/nfsd/nfs4recover.c b/fs/nfsd/nfs4recover.c index 7a9ae3254a4b..7e26caab2a26 100644 --- a/fs/nfsd/nfs4recover.c +++ b/fs/nfsd/nfs4recover.c | |||
@@ -44,8 +44,7 @@ | |||
44 | #define NFSDDBG_FACILITY NFSDDBG_PROC | 44 | #define NFSDDBG_FACILITY NFSDDBG_PROC |
45 | 45 | ||
46 | /* Globals */ | 46 | /* Globals */ |
47 | static struct path rec_dir; | 47 | static struct file *rec_file; |
48 | static int rec_dir_init = 0; | ||
49 | 48 | ||
50 | static int | 49 | static int |
51 | nfs4_save_creds(const struct cred **original_creds) | 50 | nfs4_save_creds(const struct cred **original_creds) |
@@ -117,33 +116,28 @@ out_no_tfm: | |||
117 | return status; | 116 | return status; |
118 | } | 117 | } |
119 | 118 | ||
120 | static void | ||
121 | nfsd4_sync_rec_dir(void) | ||
122 | { | ||
123 | vfs_fsync(NULL, rec_dir.dentry, 0); | ||
124 | } | ||
125 | |||
126 | int | 119 | int |
127 | nfsd4_create_clid_dir(struct nfs4_client *clp) | 120 | nfsd4_create_clid_dir(struct nfs4_client *clp) |
128 | { | 121 | { |
129 | const struct cred *original_cred; | 122 | const struct cred *original_cred; |
130 | char *dname = clp->cl_recdir; | 123 | char *dname = clp->cl_recdir; |
131 | struct dentry *dentry; | 124 | struct dentry *dir, *dentry; |
132 | int status; | 125 | int status; |
133 | 126 | ||
134 | dprintk("NFSD: nfsd4_create_clid_dir for \"%s\"\n", dname); | 127 | dprintk("NFSD: nfsd4_create_clid_dir for \"%s\"\n", dname); |
135 | 128 | ||
136 | if (!rec_dir_init || clp->cl_firststate) | 129 | if (!rec_file || clp->cl_firststate) |
137 | return 0; | 130 | return 0; |
138 | 131 | ||
139 | status = nfs4_save_creds(&original_cred); | 132 | status = nfs4_save_creds(&original_cred); |
140 | if (status < 0) | 133 | if (status < 0) |
141 | return status; | 134 | return status; |
142 | 135 | ||
136 | dir = rec_file->f_path.dentry; | ||
143 | /* lock the parent */ | 137 | /* lock the parent */ |
144 | mutex_lock(&rec_dir.dentry->d_inode->i_mutex); | 138 | mutex_lock(&dir->d_inode->i_mutex); |
145 | 139 | ||
146 | dentry = lookup_one_len(dname, rec_dir.dentry, HEXDIR_LEN-1); | 140 | dentry = lookup_one_len(dname, dir, HEXDIR_LEN-1); |
147 | if (IS_ERR(dentry)) { | 141 | if (IS_ERR(dentry)) { |
148 | status = PTR_ERR(dentry); | 142 | status = PTR_ERR(dentry); |
149 | goto out_unlock; | 143 | goto out_unlock; |
@@ -153,18 +147,18 @@ nfsd4_create_clid_dir(struct nfs4_client *clp) | |||
153 | dprintk("NFSD: nfsd4_create_clid_dir: DIRECTORY EXISTS\n"); | 147 | dprintk("NFSD: nfsd4_create_clid_dir: DIRECTORY EXISTS\n"); |
154 | goto out_put; | 148 | goto out_put; |
155 | } | 149 | } |
156 | status = mnt_want_write(rec_dir.mnt); | 150 | status = mnt_want_write(rec_file->f_path.mnt); |
157 | if (status) | 151 | if (status) |
158 | goto out_put; | 152 | goto out_put; |
159 | status = vfs_mkdir(rec_dir.dentry->d_inode, dentry, S_IRWXU); | 153 | status = vfs_mkdir(dir->d_inode, dentry, S_IRWXU); |
160 | mnt_drop_write(rec_dir.mnt); | 154 | mnt_drop_write(rec_file->f_path.mnt); |
161 | out_put: | 155 | out_put: |
162 | dput(dentry); | 156 | dput(dentry); |
163 | out_unlock: | 157 | out_unlock: |
164 | mutex_unlock(&rec_dir.dentry->d_inode->i_mutex); | 158 | mutex_unlock(&dir->d_inode->i_mutex); |
165 | if (status == 0) { | 159 | if (status == 0) { |
166 | clp->cl_firststate = 1; | 160 | clp->cl_firststate = 1; |
167 | nfsd4_sync_rec_dir(); | 161 | vfs_fsync(rec_file, 0); |
168 | } | 162 | } |
169 | nfs4_reset_creds(original_cred); | 163 | nfs4_reset_creds(original_cred); |
170 | dprintk("NFSD: nfsd4_create_clid_dir returns %d\n", status); | 164 | dprintk("NFSD: nfsd4_create_clid_dir returns %d\n", status); |
@@ -206,14 +200,14 @@ nfsd4_list_rec_dir(struct dentry *dir, recdir_func *f) | |||
206 | struct dentry *dentry; | 200 | struct dentry *dentry; |
207 | int status; | 201 | int status; |
208 | 202 | ||
209 | if (!rec_dir_init) | 203 | if (!rec_file) |
210 | return 0; | 204 | return 0; |
211 | 205 | ||
212 | status = nfs4_save_creds(&original_cred); | 206 | status = nfs4_save_creds(&original_cred); |
213 | if (status < 0) | 207 | if (status < 0) |
214 | return status; | 208 | return status; |
215 | 209 | ||
216 | filp = dentry_open(dget(dir), mntget(rec_dir.mnt), O_RDONLY, | 210 | filp = dentry_open(dget(dir), mntget(rec_file->f_path.mnt), O_RDONLY, |
217 | current_cred()); | 211 | current_cred()); |
218 | status = PTR_ERR(filp); | 212 | status = PTR_ERR(filp); |
219 | if (IS_ERR(filp)) | 213 | if (IS_ERR(filp)) |
@@ -250,13 +244,14 @@ out: | |||
250 | static int | 244 | static int |
251 | nfsd4_unlink_clid_dir(char *name, int namlen) | 245 | nfsd4_unlink_clid_dir(char *name, int namlen) |
252 | { | 246 | { |
253 | struct dentry *dentry; | 247 | struct dentry *dir, *dentry; |
254 | int status; | 248 | int status; |
255 | 249 | ||
256 | dprintk("NFSD: nfsd4_unlink_clid_dir. name %.*s\n", namlen, name); | 250 | dprintk("NFSD: nfsd4_unlink_clid_dir. name %.*s\n", namlen, name); |
257 | 251 | ||
258 | mutex_lock_nested(&rec_dir.dentry->d_inode->i_mutex, I_MUTEX_PARENT); | 252 | dir = rec_file->f_path.dentry; |
259 | dentry = lookup_one_len(name, rec_dir.dentry, namlen); | 253 | mutex_lock_nested(&dir->d_inode->i_mutex, I_MUTEX_PARENT); |
254 | dentry = lookup_one_len(name, dir, namlen); | ||
260 | if (IS_ERR(dentry)) { | 255 | if (IS_ERR(dentry)) { |
261 | status = PTR_ERR(dentry); | 256 | status = PTR_ERR(dentry); |
262 | goto out_unlock; | 257 | goto out_unlock; |
@@ -264,11 +259,11 @@ nfsd4_unlink_clid_dir(char *name, int namlen) | |||
264 | status = -ENOENT; | 259 | status = -ENOENT; |
265 | if (!dentry->d_inode) | 260 | if (!dentry->d_inode) |
266 | goto out; | 261 | goto out; |
267 | status = vfs_rmdir(rec_dir.dentry->d_inode, dentry); | 262 | status = vfs_rmdir(dir->d_inode, dentry); |
268 | out: | 263 | out: |
269 | dput(dentry); | 264 | dput(dentry); |
270 | out_unlock: | 265 | out_unlock: |
271 | mutex_unlock(&rec_dir.dentry->d_inode->i_mutex); | 266 | mutex_unlock(&dir->d_inode->i_mutex); |
272 | return status; | 267 | return status; |
273 | } | 268 | } |
274 | 269 | ||
@@ -278,10 +273,10 @@ nfsd4_remove_clid_dir(struct nfs4_client *clp) | |||
278 | const struct cred *original_cred; | 273 | const struct cred *original_cred; |
279 | int status; | 274 | int status; |
280 | 275 | ||
281 | if (!rec_dir_init || !clp->cl_firststate) | 276 | if (!rec_file || !clp->cl_firststate) |
282 | return; | 277 | return; |
283 | 278 | ||
284 | status = mnt_want_write(rec_dir.mnt); | 279 | status = mnt_want_write(rec_file->f_path.mnt); |
285 | if (status) | 280 | if (status) |
286 | goto out; | 281 | goto out; |
287 | clp->cl_firststate = 0; | 282 | clp->cl_firststate = 0; |
@@ -293,8 +288,8 @@ nfsd4_remove_clid_dir(struct nfs4_client *clp) | |||
293 | status = nfsd4_unlink_clid_dir(clp->cl_recdir, HEXDIR_LEN-1); | 288 | status = nfsd4_unlink_clid_dir(clp->cl_recdir, HEXDIR_LEN-1); |
294 | nfs4_reset_creds(original_cred); | 289 | nfs4_reset_creds(original_cred); |
295 | if (status == 0) | 290 | if (status == 0) |
296 | nfsd4_sync_rec_dir(); | 291 | vfs_fsync(rec_file, 0); |
297 | mnt_drop_write(rec_dir.mnt); | 292 | mnt_drop_write(rec_file->f_path.mnt); |
298 | out: | 293 | out: |
299 | if (status) | 294 | if (status) |
300 | printk("NFSD: Failed to remove expired client state directory" | 295 | printk("NFSD: Failed to remove expired client state directory" |
@@ -323,19 +318,19 @@ void | |||
323 | nfsd4_recdir_purge_old(void) { | 318 | nfsd4_recdir_purge_old(void) { |
324 | int status; | 319 | int status; |
325 | 320 | ||
326 | if (!rec_dir_init) | 321 | if (!rec_file) |
327 | return; | 322 | return; |
328 | status = mnt_want_write(rec_dir.mnt); | 323 | status = mnt_want_write(rec_file->f_path.mnt); |
329 | if (status) | 324 | if (status) |
330 | goto out; | 325 | goto out; |
331 | status = nfsd4_list_rec_dir(rec_dir.dentry, purge_old); | 326 | status = nfsd4_list_rec_dir(rec_file->f_path.dentry, purge_old); |
332 | if (status == 0) | 327 | if (status == 0) |
333 | nfsd4_sync_rec_dir(); | 328 | vfs_fsync(rec_file, 0); |
334 | mnt_drop_write(rec_dir.mnt); | 329 | mnt_drop_write(rec_file->f_path.mnt); |
335 | out: | 330 | out: |
336 | if (status) | 331 | if (status) |
337 | printk("nfsd4: failed to purge old clients from recovery" | 332 | printk("nfsd4: failed to purge old clients from recovery" |
338 | " directory %s\n", rec_dir.dentry->d_name.name); | 333 | " directory %s\n", rec_file->f_path.dentry->d_name.name); |
339 | } | 334 | } |
340 | 335 | ||
341 | static int | 336 | static int |
@@ -355,10 +350,13 @@ int | |||
355 | nfsd4_recdir_load(void) { | 350 | nfsd4_recdir_load(void) { |
356 | int status; | 351 | int status; |
357 | 352 | ||
358 | status = nfsd4_list_rec_dir(rec_dir.dentry, load_recdir); | 353 | if (!rec_file) |
354 | return 0; | ||
355 | |||
356 | status = nfsd4_list_rec_dir(rec_file->f_path.dentry, load_recdir); | ||
359 | if (status) | 357 | if (status) |
360 | printk("nfsd4: failed loading clients from recovery" | 358 | printk("nfsd4: failed loading clients from recovery" |
361 | " directory %s\n", rec_dir.dentry->d_name.name); | 359 | " directory %s\n", rec_file->f_path.dentry->d_name.name); |
362 | return status; | 360 | return status; |
363 | } | 361 | } |
364 | 362 | ||
@@ -375,7 +373,7 @@ nfsd4_init_recdir(char *rec_dirname) | |||
375 | printk("NFSD: Using %s as the NFSv4 state recovery directory\n", | 373 | printk("NFSD: Using %s as the NFSv4 state recovery directory\n", |
376 | rec_dirname); | 374 | rec_dirname); |
377 | 375 | ||
378 | BUG_ON(rec_dir_init); | 376 | BUG_ON(rec_file); |
379 | 377 | ||
380 | status = nfs4_save_creds(&original_cred); | 378 | status = nfs4_save_creds(&original_cred); |
381 | if (status < 0) { | 379 | if (status < 0) { |
@@ -385,22 +383,21 @@ nfsd4_init_recdir(char *rec_dirname) | |||
385 | return; | 383 | return; |
386 | } | 384 | } |
387 | 385 | ||
388 | status = kern_path(rec_dirname, LOOKUP_FOLLOW | LOOKUP_DIRECTORY, | 386 | rec_file = filp_open(rec_dirname, O_RDONLY | O_DIRECTORY, 0); |
389 | &rec_dir); | 387 | if (IS_ERR(rec_file)) { |
390 | if (status) | ||
391 | printk("NFSD: unable to find recovery directory %s\n", | 388 | printk("NFSD: unable to find recovery directory %s\n", |
392 | rec_dirname); | 389 | rec_dirname); |
390 | rec_file = NULL; | ||
391 | } | ||
393 | 392 | ||
394 | if (!status) | ||
395 | rec_dir_init = 1; | ||
396 | nfs4_reset_creds(original_cred); | 393 | nfs4_reset_creds(original_cred); |
397 | } | 394 | } |
398 | 395 | ||
399 | void | 396 | void |
400 | nfsd4_shutdown_recdir(void) | 397 | nfsd4_shutdown_recdir(void) |
401 | { | 398 | { |
402 | if (!rec_dir_init) | 399 | if (!rec_file) |
403 | return; | 400 | return; |
404 | rec_dir_init = 0; | 401 | fput(rec_file); |
405 | path_put(&rec_dir); | 402 | rec_file = NULL; |
406 | } | 403 | } |
diff --git a/fs/nfsd/vfs.c b/fs/nfsd/vfs.c index 23c06f77f4ca..ebbf3b6b2457 100644 --- a/fs/nfsd/vfs.c +++ b/fs/nfsd/vfs.c | |||
@@ -999,7 +999,7 @@ static int wait_for_concurrent_writes(struct file *file) | |||
999 | 999 | ||
1000 | if (inode->i_state & I_DIRTY) { | 1000 | if (inode->i_state & I_DIRTY) { |
1001 | dprintk("nfsd: write sync %d\n", task_pid_nr(current)); | 1001 | dprintk("nfsd: write sync %d\n", task_pid_nr(current)); |
1002 | err = vfs_fsync(file, file->f_path.dentry, 0); | 1002 | err = vfs_fsync(file, 0); |
1003 | } | 1003 | } |
1004 | last_ino = inode->i_ino; | 1004 | last_ino = inode->i_ino; |
1005 | last_dev = inode->i_sb->s_dev; | 1005 | last_dev = inode->i_sb->s_dev; |
@@ -1175,8 +1175,7 @@ nfsd_commit(struct svc_rqst *rqstp, struct svc_fh *fhp, | |||
1175 | if (err) | 1175 | if (err) |
1176 | goto out; | 1176 | goto out; |
1177 | if (EX_ISSYNC(fhp->fh_export)) { | 1177 | if (EX_ISSYNC(fhp->fh_export)) { |
1178 | int err2 = vfs_fsync_range(file, file->f_path.dentry, | 1178 | int err2 = vfs_fsync_range(file, offset, end, 0); |
1179 | offset, end, 0); | ||
1180 | 1179 | ||
1181 | if (err2 != -EINVAL) | 1180 | if (err2 != -EINVAL) |
1182 | err = nfserrno(err2); | 1181 | err = nfserrno(err2); |
diff --git a/fs/nilfs2/inode.c b/fs/nilfs2/inode.c index 5e226d4b41d3..39e038ac8fcb 100644 --- a/fs/nilfs2/inode.c +++ b/fs/nilfs2/inode.c | |||
@@ -280,16 +280,7 @@ struct inode *nilfs_new_inode(struct inode *dir, int mode) | |||
280 | /* reference count of i_bh inherits from nilfs_mdt_read_block() */ | 280 | /* reference count of i_bh inherits from nilfs_mdt_read_block() */ |
281 | 281 | ||
282 | atomic_inc(&sbi->s_inodes_count); | 282 | atomic_inc(&sbi->s_inodes_count); |
283 | 283 | inode_init_owner(inode, dir, mode); | |
284 | inode->i_uid = current_fsuid(); | ||
285 | if (dir->i_mode & S_ISGID) { | ||
286 | inode->i_gid = dir->i_gid; | ||
287 | if (S_ISDIR(mode)) | ||
288 | mode |= S_ISGID; | ||
289 | } else | ||
290 | inode->i_gid = current_fsgid(); | ||
291 | |||
292 | inode->i_mode = mode; | ||
293 | inode->i_ino = ino; | 284 | inode->i_ino = ino; |
294 | inode->i_mtime = inode->i_atime = inode->i_ctime = CURRENT_TIME; | 285 | inode->i_mtime = inode->i_atime = inode->i_ctime = CURRENT_TIME; |
295 | 286 | ||
diff --git a/fs/nilfs2/the_nilfs.c b/fs/nilfs2/the_nilfs.c index a756168a21c2..8c1097327abc 100644 --- a/fs/nilfs2/the_nilfs.c +++ b/fs/nilfs2/the_nilfs.c | |||
@@ -674,7 +674,7 @@ int nilfs_discard_segments(struct the_nilfs *nilfs, __u64 *segnump, | |||
674 | start * sects_per_block, | 674 | start * sects_per_block, |
675 | nblocks * sects_per_block, | 675 | nblocks * sects_per_block, |
676 | GFP_NOFS, | 676 | GFP_NOFS, |
677 | DISCARD_FL_BARRIER); | 677 | BLKDEV_IFL_BARRIER); |
678 | if (ret < 0) | 678 | if (ret < 0) |
679 | return ret; | 679 | return ret; |
680 | nblocks = 0; | 680 | nblocks = 0; |
@@ -684,7 +684,7 @@ int nilfs_discard_segments(struct the_nilfs *nilfs, __u64 *segnump, | |||
684 | ret = blkdev_issue_discard(nilfs->ns_bdev, | 684 | ret = blkdev_issue_discard(nilfs->ns_bdev, |
685 | start * sects_per_block, | 685 | start * sects_per_block, |
686 | nblocks * sects_per_block, | 686 | nblocks * sects_per_block, |
687 | GFP_NOFS, DISCARD_FL_BARRIER); | 687 | GFP_NOFS, BLKDEV_IFL_BARRIER); |
688 | return ret; | 688 | return ret; |
689 | } | 689 | } |
690 | 690 | ||
diff --git a/fs/notify/inotify/inotify.c b/fs/notify/inotify/inotify.c index 40b1cf914ccb..27b75ebc7460 100644 --- a/fs/notify/inotify/inotify.c +++ b/fs/notify/inotify/inotify.c | |||
@@ -110,14 +110,10 @@ EXPORT_SYMBOL_GPL(get_inotify_watch); | |||
110 | int pin_inotify_watch(struct inotify_watch *watch) | 110 | int pin_inotify_watch(struct inotify_watch *watch) |
111 | { | 111 | { |
112 | struct super_block *sb = watch->inode->i_sb; | 112 | struct super_block *sb = watch->inode->i_sb; |
113 | spin_lock(&sb_lock); | 113 | if (atomic_inc_not_zero(&sb->s_active)) { |
114 | if (sb->s_count >= S_BIAS) { | ||
115 | atomic_inc(&sb->s_active); | ||
116 | spin_unlock(&sb_lock); | ||
117 | atomic_inc(&watch->count); | 114 | atomic_inc(&watch->count); |
118 | return 1; | 115 | return 1; |
119 | } | 116 | } |
120 | spin_unlock(&sb_lock); | ||
121 | return 0; | 117 | return 0; |
122 | } | 118 | } |
123 | 119 | ||
@@ -515,34 +511,8 @@ EXPORT_SYMBOL_GPL(inotify_init_watch); | |||
515 | * done. Cleanup is just deactivate_super(). However, that leaves a messy | 511 | * done. Cleanup is just deactivate_super(). However, that leaves a messy |
516 | * case - what if we *are* racing with umount() and active references to | 512 | * case - what if we *are* racing with umount() and active references to |
517 | * superblock can't be acquired anymore? We can bump ->s_count, grab | 513 | * superblock can't be acquired anymore? We can bump ->s_count, grab |
518 | * ->s_umount, which will almost certainly wait until the superblock is shut | 514 | * ->s_umount, which will wait until the superblock is shut down and the |
519 | * down and the watch in question is pining for fjords. That's fine, but | 515 | * watch in question is pining for fjords. |
520 | * there is a problem - we might have hit the window between ->s_active | ||
521 | * getting to 0 / ->s_count - below S_BIAS (i.e. the moment when superblock | ||
522 | * is past the point of no return and is heading for shutdown) and the | ||
523 | * moment when deactivate_super() acquires ->s_umount. We could just do | ||
524 | * drop_super() yield() and retry, but that's rather antisocial and this | ||
525 | * stuff is luser-triggerable. OTOH, having grabbed ->s_umount and having | ||
526 | * found that we'd got there first (i.e. that ->s_root is non-NULL) we know | ||
527 | * that we won't race with inotify_umount_inodes(). So we could grab a | ||
528 | * reference to watch and do the rest as above, just with drop_super() instead | ||
529 | * of deactivate_super(), right? Wrong. We had to drop ih->mutex before we | ||
530 | * could grab ->s_umount. So the watch could've been gone already. | ||
531 | * | ||
532 | * That still can be dealt with - we need to save watch->wd, do idr_find() | ||
533 | * and compare its result with our pointer. If they match, we either have | ||
534 | * the damn thing still alive or we'd lost not one but two races at once, | ||
535 | * the watch had been killed and a new one got created with the same ->wd | ||
536 | * at the same address. That couldn't have happened in inotify_destroy(), | ||
537 | * but inotify_rm_wd() could run into that. Still, "new one got created" | ||
538 | * is not a problem - we have every right to kill it or leave it alone, | ||
539 | * whatever's more convenient. | ||
540 | * | ||
541 | * So we can use idr_find(...) == watch && watch->inode->i_sb == sb as | ||
542 | * "grab it and kill it" check. If it's been our original watch, we are | ||
543 | * fine, if it's a newcomer - nevermind, just pretend that we'd won the | ||
544 | * race and kill the fscker anyway; we are safe since we know that its | ||
545 | * superblock won't be going away. | ||
546 | * | 516 | * |
547 | * And yes, this is far beyond mere "not very pretty"; so's the entire | 517 | * And yes, this is far beyond mere "not very pretty"; so's the entire |
548 | * concept of inotify to start with. | 518 | * concept of inotify to start with. |
@@ -556,57 +526,31 @@ EXPORT_SYMBOL_GPL(inotify_init_watch); | |||
556 | * Called with ih->mutex held, drops it. Possible return values: | 526 | * Called with ih->mutex held, drops it. Possible return values: |
557 | * 0 - nothing to do, it has died | 527 | * 0 - nothing to do, it has died |
558 | * 1 - remove it, drop the reference and deactivate_super() | 528 | * 1 - remove it, drop the reference and deactivate_super() |
559 | * 2 - remove it, drop the reference and drop_super(); we tried hard to avoid | ||
560 | * that variant, since it involved a lot of PITA, but that's the best that | ||
561 | * could've been done. | ||
562 | */ | 529 | */ |
563 | static int pin_to_kill(struct inotify_handle *ih, struct inotify_watch *watch) | 530 | static int pin_to_kill(struct inotify_handle *ih, struct inotify_watch *watch) |
564 | { | 531 | { |
565 | struct super_block *sb = watch->inode->i_sb; | 532 | struct super_block *sb = watch->inode->i_sb; |
566 | s32 wd = watch->wd; | ||
567 | 533 | ||
568 | spin_lock(&sb_lock); | 534 | if (atomic_inc_not_zero(&sb->s_active)) { |
569 | if (sb->s_count >= S_BIAS) { | ||
570 | atomic_inc(&sb->s_active); | ||
571 | spin_unlock(&sb_lock); | ||
572 | get_inotify_watch(watch); | 535 | get_inotify_watch(watch); |
573 | mutex_unlock(&ih->mutex); | 536 | mutex_unlock(&ih->mutex); |
574 | return 1; /* the best outcome */ | 537 | return 1; /* the best outcome */ |
575 | } | 538 | } |
539 | spin_lock(&sb_lock); | ||
576 | sb->s_count++; | 540 | sb->s_count++; |
577 | spin_unlock(&sb_lock); | 541 | spin_unlock(&sb_lock); |
578 | mutex_unlock(&ih->mutex); /* can't grab ->s_umount under it */ | 542 | mutex_unlock(&ih->mutex); /* can't grab ->s_umount under it */ |
579 | down_read(&sb->s_umount); | 543 | down_read(&sb->s_umount); |
580 | if (likely(!sb->s_root)) { | 544 | /* fs is already shut down; the watch is dead */ |
581 | /* fs is already shut down; the watch is dead */ | 545 | drop_super(sb); |
582 | drop_super(sb); | 546 | return 0; |
583 | return 0; | ||
584 | } | ||
585 | /* raced with the final deactivate_super() */ | ||
586 | mutex_lock(&ih->mutex); | ||
587 | if (idr_find(&ih->idr, wd) != watch || watch->inode->i_sb != sb) { | ||
588 | /* the watch is dead */ | ||
589 | mutex_unlock(&ih->mutex); | ||
590 | drop_super(sb); | ||
591 | return 0; | ||
592 | } | ||
593 | /* still alive or freed and reused with the same sb and wd; kill */ | ||
594 | get_inotify_watch(watch); | ||
595 | mutex_unlock(&ih->mutex); | ||
596 | return 2; | ||
597 | } | 547 | } |
598 | 548 | ||
599 | static void unpin_and_kill(struct inotify_watch *watch, int how) | 549 | static void unpin_and_kill(struct inotify_watch *watch) |
600 | { | 550 | { |
601 | struct super_block *sb = watch->inode->i_sb; | 551 | struct super_block *sb = watch->inode->i_sb; |
602 | put_inotify_watch(watch); | 552 | put_inotify_watch(watch); |
603 | switch (how) { | 553 | deactivate_super(sb); |
604 | case 1: | ||
605 | deactivate_super(sb); | ||
606 | break; | ||
607 | case 2: | ||
608 | drop_super(sb); | ||
609 | } | ||
610 | } | 554 | } |
611 | 555 | ||
612 | /** | 556 | /** |
@@ -628,7 +572,6 @@ void inotify_destroy(struct inotify_handle *ih) | |||
628 | struct list_head *watches; | 572 | struct list_head *watches; |
629 | struct super_block *sb; | 573 | struct super_block *sb; |
630 | struct inode *inode; | 574 | struct inode *inode; |
631 | int how; | ||
632 | 575 | ||
633 | mutex_lock(&ih->mutex); | 576 | mutex_lock(&ih->mutex); |
634 | watches = &ih->watches; | 577 | watches = &ih->watches; |
@@ -638,8 +581,7 @@ void inotify_destroy(struct inotify_handle *ih) | |||
638 | } | 581 | } |
639 | watch = list_first_entry(watches, struct inotify_watch, h_list); | 582 | watch = list_first_entry(watches, struct inotify_watch, h_list); |
640 | sb = watch->inode->i_sb; | 583 | sb = watch->inode->i_sb; |
641 | how = pin_to_kill(ih, watch); | 584 | if (!pin_to_kill(ih, watch)) |
642 | if (!how) | ||
643 | continue; | 585 | continue; |
644 | 586 | ||
645 | inode = watch->inode; | 587 | inode = watch->inode; |
@@ -654,7 +596,7 @@ void inotify_destroy(struct inotify_handle *ih) | |||
654 | 596 | ||
655 | mutex_unlock(&ih->mutex); | 597 | mutex_unlock(&ih->mutex); |
656 | mutex_unlock(&inode->inotify_mutex); | 598 | mutex_unlock(&inode->inotify_mutex); |
657 | unpin_and_kill(watch, how); | 599 | unpin_and_kill(watch); |
658 | } | 600 | } |
659 | 601 | ||
660 | /* free this handle: the put matching the get in inotify_init() */ | 602 | /* free this handle: the put matching the get in inotify_init() */ |
@@ -857,7 +799,6 @@ int inotify_rm_wd(struct inotify_handle *ih, u32 wd) | |||
857 | struct inotify_watch *watch; | 799 | struct inotify_watch *watch; |
858 | struct super_block *sb; | 800 | struct super_block *sb; |
859 | struct inode *inode; | 801 | struct inode *inode; |
860 | int how; | ||
861 | 802 | ||
862 | mutex_lock(&ih->mutex); | 803 | mutex_lock(&ih->mutex); |
863 | watch = idr_find(&ih->idr, wd); | 804 | watch = idr_find(&ih->idr, wd); |
@@ -866,8 +807,7 @@ int inotify_rm_wd(struct inotify_handle *ih, u32 wd) | |||
866 | return -EINVAL; | 807 | return -EINVAL; |
867 | } | 808 | } |
868 | sb = watch->inode->i_sb; | 809 | sb = watch->inode->i_sb; |
869 | how = pin_to_kill(ih, watch); | 810 | if (!pin_to_kill(ih, watch)) |
870 | if (!how) | ||
871 | return 0; | 811 | return 0; |
872 | 812 | ||
873 | inode = watch->inode; | 813 | inode = watch->inode; |
@@ -881,7 +821,7 @@ int inotify_rm_wd(struct inotify_handle *ih, u32 wd) | |||
881 | 821 | ||
882 | mutex_unlock(&ih->mutex); | 822 | mutex_unlock(&ih->mutex); |
883 | mutex_unlock(&inode->inotify_mutex); | 823 | mutex_unlock(&inode->inotify_mutex); |
884 | unpin_and_kill(watch, how); | 824 | unpin_and_kill(watch); |
885 | 825 | ||
886 | return 0; | 826 | return 0; |
887 | } | 827 | } |
diff --git a/fs/ocfs2/acl.c b/fs/ocfs2/acl.c index e13fc9e8fcdc..da702294d7e7 100644 --- a/fs/ocfs2/acl.c +++ b/fs/ocfs2/acl.c | |||
@@ -489,7 +489,7 @@ cleanup: | |||
489 | return ret; | 489 | return ret; |
490 | } | 490 | } |
491 | 491 | ||
492 | struct xattr_handler ocfs2_xattr_acl_access_handler = { | 492 | const struct xattr_handler ocfs2_xattr_acl_access_handler = { |
493 | .prefix = POSIX_ACL_XATTR_ACCESS, | 493 | .prefix = POSIX_ACL_XATTR_ACCESS, |
494 | .flags = ACL_TYPE_ACCESS, | 494 | .flags = ACL_TYPE_ACCESS, |
495 | .list = ocfs2_xattr_list_acl_access, | 495 | .list = ocfs2_xattr_list_acl_access, |
@@ -497,7 +497,7 @@ struct xattr_handler ocfs2_xattr_acl_access_handler = { | |||
497 | .set = ocfs2_xattr_set_acl, | 497 | .set = ocfs2_xattr_set_acl, |
498 | }; | 498 | }; |
499 | 499 | ||
500 | struct xattr_handler ocfs2_xattr_acl_default_handler = { | 500 | const struct xattr_handler ocfs2_xattr_acl_default_handler = { |
501 | .prefix = POSIX_ACL_XATTR_DEFAULT, | 501 | .prefix = POSIX_ACL_XATTR_DEFAULT, |
502 | .flags = ACL_TYPE_DEFAULT, | 502 | .flags = ACL_TYPE_DEFAULT, |
503 | .list = ocfs2_xattr_list_acl_default, | 503 | .list = ocfs2_xattr_list_acl_default, |
diff --git a/fs/ocfs2/namei.c b/fs/ocfs2/namei.c index db5dd3ed4df4..f171b51a74f7 100644 --- a/fs/ocfs2/namei.c +++ b/fs/ocfs2/namei.c | |||
@@ -204,14 +204,7 @@ static struct inode *ocfs2_get_init_inode(struct inode *dir, int mode) | |||
204 | inode->i_nlink = 2; | 204 | inode->i_nlink = 2; |
205 | else | 205 | else |
206 | inode->i_nlink = 1; | 206 | inode->i_nlink = 1; |
207 | inode->i_uid = current_fsuid(); | 207 | inode_init_owner(inode, dir, mode); |
208 | if (dir->i_mode & S_ISGID) { | ||
209 | inode->i_gid = dir->i_gid; | ||
210 | if (S_ISDIR(mode)) | ||
211 | mode |= S_ISGID; | ||
212 | } else | ||
213 | inode->i_gid = current_fsgid(); | ||
214 | inode->i_mode = mode; | ||
215 | dquot_initialize(inode); | 208 | dquot_initialize(inode); |
216 | return inode; | 209 | return inode; |
217 | } | 210 | } |
diff --git a/fs/ocfs2/xattr.c b/fs/ocfs2/xattr.c index 98ee6c44102d..e97b34842cfe 100644 --- a/fs/ocfs2/xattr.c +++ b/fs/ocfs2/xattr.c | |||
@@ -97,7 +97,7 @@ static struct ocfs2_xattr_def_value_root def_xv = { | |||
97 | .xv.xr_list.l_count = cpu_to_le16(1), | 97 | .xv.xr_list.l_count = cpu_to_le16(1), |
98 | }; | 98 | }; |
99 | 99 | ||
100 | struct xattr_handler *ocfs2_xattr_handlers[] = { | 100 | const struct xattr_handler *ocfs2_xattr_handlers[] = { |
101 | &ocfs2_xattr_user_handler, | 101 | &ocfs2_xattr_user_handler, |
102 | &ocfs2_xattr_acl_access_handler, | 102 | &ocfs2_xattr_acl_access_handler, |
103 | &ocfs2_xattr_acl_default_handler, | 103 | &ocfs2_xattr_acl_default_handler, |
@@ -106,7 +106,7 @@ struct xattr_handler *ocfs2_xattr_handlers[] = { | |||
106 | NULL | 106 | NULL |
107 | }; | 107 | }; |
108 | 108 | ||
109 | static struct xattr_handler *ocfs2_xattr_handler_map[OCFS2_XATTR_MAX] = { | 109 | static const struct xattr_handler *ocfs2_xattr_handler_map[OCFS2_XATTR_MAX] = { |
110 | [OCFS2_XATTR_INDEX_USER] = &ocfs2_xattr_user_handler, | 110 | [OCFS2_XATTR_INDEX_USER] = &ocfs2_xattr_user_handler, |
111 | [OCFS2_XATTR_INDEX_POSIX_ACL_ACCESS] | 111 | [OCFS2_XATTR_INDEX_POSIX_ACL_ACCESS] |
112 | = &ocfs2_xattr_acl_access_handler, | 112 | = &ocfs2_xattr_acl_access_handler, |
@@ -540,7 +540,7 @@ static int ocfs2_read_xattr_block(struct inode *inode, u64 xb_blkno, | |||
540 | 540 | ||
541 | static inline const char *ocfs2_xattr_prefix(int name_index) | 541 | static inline const char *ocfs2_xattr_prefix(int name_index) |
542 | { | 542 | { |
543 | struct xattr_handler *handler = NULL; | 543 | const struct xattr_handler *handler = NULL; |
544 | 544 | ||
545 | if (name_index > 0 && name_index < OCFS2_XATTR_MAX) | 545 | if (name_index > 0 && name_index < OCFS2_XATTR_MAX) |
546 | handler = ocfs2_xattr_handler_map[name_index]; | 546 | handler = ocfs2_xattr_handler_map[name_index]; |
@@ -7213,7 +7213,7 @@ int ocfs2_init_security_set(handle_t *handle, | |||
7213 | xattr_ac, data_ac); | 7213 | xattr_ac, data_ac); |
7214 | } | 7214 | } |
7215 | 7215 | ||
7216 | struct xattr_handler ocfs2_xattr_security_handler = { | 7216 | const struct xattr_handler ocfs2_xattr_security_handler = { |
7217 | .prefix = XATTR_SECURITY_PREFIX, | 7217 | .prefix = XATTR_SECURITY_PREFIX, |
7218 | .list = ocfs2_xattr_security_list, | 7218 | .list = ocfs2_xattr_security_list, |
7219 | .get = ocfs2_xattr_security_get, | 7219 | .get = ocfs2_xattr_security_get, |
@@ -7257,7 +7257,7 @@ static int ocfs2_xattr_trusted_set(struct dentry *dentry, const char *name, | |||
7257 | name, value, size, flags); | 7257 | name, value, size, flags); |
7258 | } | 7258 | } |
7259 | 7259 | ||
7260 | struct xattr_handler ocfs2_xattr_trusted_handler = { | 7260 | const struct xattr_handler ocfs2_xattr_trusted_handler = { |
7261 | .prefix = XATTR_TRUSTED_PREFIX, | 7261 | .prefix = XATTR_TRUSTED_PREFIX, |
7262 | .list = ocfs2_xattr_trusted_list, | 7262 | .list = ocfs2_xattr_trusted_list, |
7263 | .get = ocfs2_xattr_trusted_get, | 7263 | .get = ocfs2_xattr_trusted_get, |
@@ -7313,7 +7313,7 @@ static int ocfs2_xattr_user_set(struct dentry *dentry, const char *name, | |||
7313 | name, value, size, flags); | 7313 | name, value, size, flags); |
7314 | } | 7314 | } |
7315 | 7315 | ||
7316 | struct xattr_handler ocfs2_xattr_user_handler = { | 7316 | const struct xattr_handler ocfs2_xattr_user_handler = { |
7317 | .prefix = XATTR_USER_PREFIX, | 7317 | .prefix = XATTR_USER_PREFIX, |
7318 | .list = ocfs2_xattr_user_list, | 7318 | .list = ocfs2_xattr_user_list, |
7319 | .get = ocfs2_xattr_user_get, | 7319 | .get = ocfs2_xattr_user_get, |
diff --git a/fs/ocfs2/xattr.h b/fs/ocfs2/xattr.h index abd72a47f520..aa64bb37a65b 100644 --- a/fs/ocfs2/xattr.h +++ b/fs/ocfs2/xattr.h | |||
@@ -37,12 +37,12 @@ struct ocfs2_security_xattr_info { | |||
37 | size_t value_len; | 37 | size_t value_len; |
38 | }; | 38 | }; |
39 | 39 | ||
40 | extern struct xattr_handler ocfs2_xattr_user_handler; | 40 | extern const struct xattr_handler ocfs2_xattr_user_handler; |
41 | extern struct xattr_handler ocfs2_xattr_trusted_handler; | 41 | extern const struct xattr_handler ocfs2_xattr_trusted_handler; |
42 | extern struct xattr_handler ocfs2_xattr_security_handler; | 42 | extern const struct xattr_handler ocfs2_xattr_security_handler; |
43 | extern struct xattr_handler ocfs2_xattr_acl_access_handler; | 43 | extern const struct xattr_handler ocfs2_xattr_acl_access_handler; |
44 | extern struct xattr_handler ocfs2_xattr_acl_default_handler; | 44 | extern const struct xattr_handler ocfs2_xattr_acl_default_handler; |
45 | extern struct xattr_handler *ocfs2_xattr_handlers[]; | 45 | extern const struct xattr_handler *ocfs2_xattr_handlers[]; |
46 | 46 | ||
47 | ssize_t ocfs2_listxattr(struct dentry *, char *, size_t); | 47 | ssize_t ocfs2_listxattr(struct dentry *, char *, size_t); |
48 | int ocfs2_xattr_get_nolock(struct inode *, struct buffer_head *, int, | 48 | int ocfs2_xattr_get_nolock(struct inode *, struct buffer_head *, int, |
diff --git a/fs/omfs/inode.c b/fs/omfs/inode.c index b44bb835e8ea..089839a6cc64 100644 --- a/fs/omfs/inode.c +++ b/fs/omfs/inode.c | |||
@@ -37,9 +37,7 @@ struct inode *omfs_new_inode(struct inode *dir, int mode) | |||
37 | goto fail; | 37 | goto fail; |
38 | 38 | ||
39 | inode->i_ino = new_block; | 39 | inode->i_ino = new_block; |
40 | inode->i_mode = mode; | 40 | inode_init_owner(inode, NULL, mode); |
41 | inode->i_uid = current_fsuid(); | ||
42 | inode->i_gid = current_fsgid(); | ||
43 | inode->i_mapping->a_ops = &omfs_aops; | 41 | inode->i_mapping->a_ops = &omfs_aops; |
44 | 42 | ||
45 | inode->i_atime = inode->i_mtime = inode->i_ctime = CURRENT_TIME; | 43 | inode->i_atime = inode->i_mtime = inode->i_ctime = CURRENT_TIME; |
@@ -17,7 +17,6 @@ | |||
17 | #include <linux/securebits.h> | 17 | #include <linux/securebits.h> |
18 | #include <linux/security.h> | 18 | #include <linux/security.h> |
19 | #include <linux/mount.h> | 19 | #include <linux/mount.h> |
20 | #include <linux/vfs.h> | ||
21 | #include <linux/fcntl.h> | 20 | #include <linux/fcntl.h> |
22 | #include <linux/slab.h> | 21 | #include <linux/slab.h> |
23 | #include <asm/uaccess.h> | 22 | #include <asm/uaccess.h> |
@@ -33,171 +32,6 @@ | |||
33 | 32 | ||
34 | #include "internal.h" | 33 | #include "internal.h" |
35 | 34 | ||
36 | int vfs_statfs(struct dentry *dentry, struct kstatfs *buf) | ||
37 | { | ||
38 | int retval = -ENODEV; | ||
39 | |||
40 | if (dentry) { | ||
41 | retval = -ENOSYS; | ||
42 | if (dentry->d_sb->s_op->statfs) { | ||
43 | memset(buf, 0, sizeof(*buf)); | ||
44 | retval = security_sb_statfs(dentry); | ||
45 | if (retval) | ||
46 | return retval; | ||
47 | retval = dentry->d_sb->s_op->statfs(dentry, buf); | ||
48 | if (retval == 0 && buf->f_frsize == 0) | ||
49 | buf->f_frsize = buf->f_bsize; | ||
50 | } | ||
51 | } | ||
52 | return retval; | ||
53 | } | ||
54 | |||
55 | EXPORT_SYMBOL(vfs_statfs); | ||
56 | |||
57 | static int vfs_statfs_native(struct dentry *dentry, struct statfs *buf) | ||
58 | { | ||
59 | struct kstatfs st; | ||
60 | int retval; | ||
61 | |||
62 | retval = vfs_statfs(dentry, &st); | ||
63 | if (retval) | ||
64 | return retval; | ||
65 | |||
66 | if (sizeof(*buf) == sizeof(st)) | ||
67 | memcpy(buf, &st, sizeof(st)); | ||
68 | else { | ||
69 | if (sizeof buf->f_blocks == 4) { | ||
70 | if ((st.f_blocks | st.f_bfree | st.f_bavail | | ||
71 | st.f_bsize | st.f_frsize) & | ||
72 | 0xffffffff00000000ULL) | ||
73 | return -EOVERFLOW; | ||
74 | /* | ||
75 | * f_files and f_ffree may be -1; it's okay to stuff | ||
76 | * that into 32 bits | ||
77 | */ | ||
78 | if (st.f_files != -1 && | ||
79 | (st.f_files & 0xffffffff00000000ULL)) | ||
80 | return -EOVERFLOW; | ||
81 | if (st.f_ffree != -1 && | ||
82 | (st.f_ffree & 0xffffffff00000000ULL)) | ||
83 | return -EOVERFLOW; | ||
84 | } | ||
85 | |||
86 | buf->f_type = st.f_type; | ||
87 | buf->f_bsize = st.f_bsize; | ||
88 | buf->f_blocks = st.f_blocks; | ||
89 | buf->f_bfree = st.f_bfree; | ||
90 | buf->f_bavail = st.f_bavail; | ||
91 | buf->f_files = st.f_files; | ||
92 | buf->f_ffree = st.f_ffree; | ||
93 | buf->f_fsid = st.f_fsid; | ||
94 | buf->f_namelen = st.f_namelen; | ||
95 | buf->f_frsize = st.f_frsize; | ||
96 | memset(buf->f_spare, 0, sizeof(buf->f_spare)); | ||
97 | } | ||
98 | return 0; | ||
99 | } | ||
100 | |||
101 | static int vfs_statfs64(struct dentry *dentry, struct statfs64 *buf) | ||
102 | { | ||
103 | struct kstatfs st; | ||
104 | int retval; | ||
105 | |||
106 | retval = vfs_statfs(dentry, &st); | ||
107 | if (retval) | ||
108 | return retval; | ||
109 | |||
110 | if (sizeof(*buf) == sizeof(st)) | ||
111 | memcpy(buf, &st, sizeof(st)); | ||
112 | else { | ||
113 | buf->f_type = st.f_type; | ||
114 | buf->f_bsize = st.f_bsize; | ||
115 | buf->f_blocks = st.f_blocks; | ||
116 | buf->f_bfree = st.f_bfree; | ||
117 | buf->f_bavail = st.f_bavail; | ||
118 | buf->f_files = st.f_files; | ||
119 | buf->f_ffree = st.f_ffree; | ||
120 | buf->f_fsid = st.f_fsid; | ||
121 | buf->f_namelen = st.f_namelen; | ||
122 | buf->f_frsize = st.f_frsize; | ||
123 | memset(buf->f_spare, 0, sizeof(buf->f_spare)); | ||
124 | } | ||
125 | return 0; | ||
126 | } | ||
127 | |||
128 | SYSCALL_DEFINE2(statfs, const char __user *, pathname, struct statfs __user *, buf) | ||
129 | { | ||
130 | struct path path; | ||
131 | int error; | ||
132 | |||
133 | error = user_path(pathname, &path); | ||
134 | if (!error) { | ||
135 | struct statfs tmp; | ||
136 | error = vfs_statfs_native(path.dentry, &tmp); | ||
137 | if (!error && copy_to_user(buf, &tmp, sizeof(tmp))) | ||
138 | error = -EFAULT; | ||
139 | path_put(&path); | ||
140 | } | ||
141 | return error; | ||
142 | } | ||
143 | |||
144 | SYSCALL_DEFINE3(statfs64, const char __user *, pathname, size_t, sz, struct statfs64 __user *, buf) | ||
145 | { | ||
146 | struct path path; | ||
147 | long error; | ||
148 | |||
149 | if (sz != sizeof(*buf)) | ||
150 | return -EINVAL; | ||
151 | error = user_path(pathname, &path); | ||
152 | if (!error) { | ||
153 | struct statfs64 tmp; | ||
154 | error = vfs_statfs64(path.dentry, &tmp); | ||
155 | if (!error && copy_to_user(buf, &tmp, sizeof(tmp))) | ||
156 | error = -EFAULT; | ||
157 | path_put(&path); | ||
158 | } | ||
159 | return error; | ||
160 | } | ||
161 | |||
162 | SYSCALL_DEFINE2(fstatfs, unsigned int, fd, struct statfs __user *, buf) | ||
163 | { | ||
164 | struct file * file; | ||
165 | struct statfs tmp; | ||
166 | int error; | ||
167 | |||
168 | error = -EBADF; | ||
169 | file = fget(fd); | ||
170 | if (!file) | ||
171 | goto out; | ||
172 | error = vfs_statfs_native(file->f_path.dentry, &tmp); | ||
173 | if (!error && copy_to_user(buf, &tmp, sizeof(tmp))) | ||
174 | error = -EFAULT; | ||
175 | fput(file); | ||
176 | out: | ||
177 | return error; | ||
178 | } | ||
179 | |||
180 | SYSCALL_DEFINE3(fstatfs64, unsigned int, fd, size_t, sz, struct statfs64 __user *, buf) | ||
181 | { | ||
182 | struct file * file; | ||
183 | struct statfs64 tmp; | ||
184 | int error; | ||
185 | |||
186 | if (sz != sizeof(*buf)) | ||
187 | return -EINVAL; | ||
188 | |||
189 | error = -EBADF; | ||
190 | file = fget(fd); | ||
191 | if (!file) | ||
192 | goto out; | ||
193 | error = vfs_statfs64(file->f_path.dentry, &tmp); | ||
194 | if (!error && copy_to_user(buf, &tmp, sizeof(tmp))) | ||
195 | error = -EFAULT; | ||
196 | fput(file); | ||
197 | out: | ||
198 | return error; | ||
199 | } | ||
200 | |||
201 | int do_truncate(struct dentry *dentry, loff_t length, unsigned int time_attrs, | 35 | int do_truncate(struct dentry *dentry, loff_t length, unsigned int time_attrs, |
202 | struct file *filp) | 36 | struct file *filp) |
203 | { | 37 | { |
diff --git a/fs/partitions/acorn.c b/fs/partitions/acorn.c index a97b477ac0fc..6921e7890be6 100644 --- a/fs/partitions/acorn.c +++ b/fs/partitions/acorn.c | |||
@@ -70,14 +70,14 @@ struct riscix_record { | |||
70 | 70 | ||
71 | #if defined(CONFIG_ACORN_PARTITION_CUMANA) || \ | 71 | #if defined(CONFIG_ACORN_PARTITION_CUMANA) || \ |
72 | defined(CONFIG_ACORN_PARTITION_ADFS) | 72 | defined(CONFIG_ACORN_PARTITION_ADFS) |
73 | static int | 73 | static int riscix_partition(struct parsed_partitions *state, |
74 | riscix_partition(struct parsed_partitions *state, struct block_device *bdev, | 74 | unsigned long first_sect, int slot, |
75 | unsigned long first_sect, int slot, unsigned long nr_sects) | 75 | unsigned long nr_sects) |
76 | { | 76 | { |
77 | Sector sect; | 77 | Sector sect; |
78 | struct riscix_record *rr; | 78 | struct riscix_record *rr; |
79 | 79 | ||
80 | rr = (struct riscix_record *)read_dev_sector(bdev, first_sect, §); | 80 | rr = read_part_sector(state, first_sect, §); |
81 | if (!rr) | 81 | if (!rr) |
82 | return -1; | 82 | return -1; |
83 | 83 | ||
@@ -123,9 +123,9 @@ struct linux_part { | |||
123 | 123 | ||
124 | #if defined(CONFIG_ACORN_PARTITION_CUMANA) || \ | 124 | #if defined(CONFIG_ACORN_PARTITION_CUMANA) || \ |
125 | defined(CONFIG_ACORN_PARTITION_ADFS) | 125 | defined(CONFIG_ACORN_PARTITION_ADFS) |
126 | static int | 126 | static int linux_partition(struct parsed_partitions *state, |
127 | linux_partition(struct parsed_partitions *state, struct block_device *bdev, | 127 | unsigned long first_sect, int slot, |
128 | unsigned long first_sect, int slot, unsigned long nr_sects) | 128 | unsigned long nr_sects) |
129 | { | 129 | { |
130 | Sector sect; | 130 | Sector sect; |
131 | struct linux_part *linuxp; | 131 | struct linux_part *linuxp; |
@@ -135,7 +135,7 @@ linux_partition(struct parsed_partitions *state, struct block_device *bdev, | |||
135 | 135 | ||
136 | put_partition(state, slot++, first_sect, size); | 136 | put_partition(state, slot++, first_sect, size); |
137 | 137 | ||
138 | linuxp = (struct linux_part *)read_dev_sector(bdev, first_sect, §); | 138 | linuxp = read_part_sector(state, first_sect, §); |
139 | if (!linuxp) | 139 | if (!linuxp) |
140 | return -1; | 140 | return -1; |
141 | 141 | ||
@@ -157,8 +157,7 @@ linux_partition(struct parsed_partitions *state, struct block_device *bdev, | |||
157 | #endif | 157 | #endif |
158 | 158 | ||
159 | #ifdef CONFIG_ACORN_PARTITION_CUMANA | 159 | #ifdef CONFIG_ACORN_PARTITION_CUMANA |
160 | int | 160 | int adfspart_check_CUMANA(struct parsed_partitions *state) |
161 | adfspart_check_CUMANA(struct parsed_partitions *state, struct block_device *bdev) | ||
162 | { | 161 | { |
163 | unsigned long first_sector = 0; | 162 | unsigned long first_sector = 0; |
164 | unsigned int start_blk = 0; | 163 | unsigned int start_blk = 0; |
@@ -185,7 +184,7 @@ adfspart_check_CUMANA(struct parsed_partitions *state, struct block_device *bdev | |||
185 | struct adfs_discrecord *dr; | 184 | struct adfs_discrecord *dr; |
186 | unsigned int nr_sects; | 185 | unsigned int nr_sects; |
187 | 186 | ||
188 | data = read_dev_sector(bdev, start_blk * 2 + 6, §); | 187 | data = read_part_sector(state, start_blk * 2 + 6, §); |
189 | if (!data) | 188 | if (!data) |
190 | return -1; | 189 | return -1; |
191 | 190 | ||
@@ -217,14 +216,14 @@ adfspart_check_CUMANA(struct parsed_partitions *state, struct block_device *bdev | |||
217 | #ifdef CONFIG_ACORN_PARTITION_RISCIX | 216 | #ifdef CONFIG_ACORN_PARTITION_RISCIX |
218 | case PARTITION_RISCIX_SCSI: | 217 | case PARTITION_RISCIX_SCSI: |
219 | /* RISCiX - we don't know how to find the next one. */ | 218 | /* RISCiX - we don't know how to find the next one. */ |
220 | slot = riscix_partition(state, bdev, first_sector, | 219 | slot = riscix_partition(state, first_sector, slot, |
221 | slot, nr_sects); | 220 | nr_sects); |
222 | break; | 221 | break; |
223 | #endif | 222 | #endif |
224 | 223 | ||
225 | case PARTITION_LINUX: | 224 | case PARTITION_LINUX: |
226 | slot = linux_partition(state, bdev, first_sector, | 225 | slot = linux_partition(state, first_sector, slot, |
227 | slot, nr_sects); | 226 | nr_sects); |
228 | break; | 227 | break; |
229 | } | 228 | } |
230 | put_dev_sector(sect); | 229 | put_dev_sector(sect); |
@@ -249,8 +248,7 @@ adfspart_check_CUMANA(struct parsed_partitions *state, struct block_device *bdev | |||
249 | * hda1 = ADFS partition on first drive. | 248 | * hda1 = ADFS partition on first drive. |
250 | * hda2 = non-ADFS partition. | 249 | * hda2 = non-ADFS partition. |
251 | */ | 250 | */ |
252 | int | 251 | int adfspart_check_ADFS(struct parsed_partitions *state) |
253 | adfspart_check_ADFS(struct parsed_partitions *state, struct block_device *bdev) | ||
254 | { | 252 | { |
255 | unsigned long start_sect, nr_sects, sectscyl, heads; | 253 | unsigned long start_sect, nr_sects, sectscyl, heads; |
256 | Sector sect; | 254 | Sector sect; |
@@ -259,7 +257,7 @@ adfspart_check_ADFS(struct parsed_partitions *state, struct block_device *bdev) | |||
259 | unsigned char id; | 257 | unsigned char id; |
260 | int slot = 1; | 258 | int slot = 1; |
261 | 259 | ||
262 | data = read_dev_sector(bdev, 6, §); | 260 | data = read_part_sector(state, 6, §); |
263 | if (!data) | 261 | if (!data) |
264 | return -1; | 262 | return -1; |
265 | 263 | ||
@@ -278,21 +276,21 @@ adfspart_check_ADFS(struct parsed_partitions *state, struct block_device *bdev) | |||
278 | /* | 276 | /* |
279 | * Work out start of non-adfs partition. | 277 | * Work out start of non-adfs partition. |
280 | */ | 278 | */ |
281 | nr_sects = (bdev->bd_inode->i_size >> 9) - start_sect; | 279 | nr_sects = (state->bdev->bd_inode->i_size >> 9) - start_sect; |
282 | 280 | ||
283 | if (start_sect) { | 281 | if (start_sect) { |
284 | switch (id) { | 282 | switch (id) { |
285 | #ifdef CONFIG_ACORN_PARTITION_RISCIX | 283 | #ifdef CONFIG_ACORN_PARTITION_RISCIX |
286 | case PARTITION_RISCIX_SCSI: | 284 | case PARTITION_RISCIX_SCSI: |
287 | case PARTITION_RISCIX_MFM: | 285 | case PARTITION_RISCIX_MFM: |
288 | slot = riscix_partition(state, bdev, start_sect, | 286 | slot = riscix_partition(state, start_sect, slot, |
289 | slot, nr_sects); | 287 | nr_sects); |
290 | break; | 288 | break; |
291 | #endif | 289 | #endif |
292 | 290 | ||
293 | case PARTITION_LINUX: | 291 | case PARTITION_LINUX: |
294 | slot = linux_partition(state, bdev, start_sect, | 292 | slot = linux_partition(state, start_sect, slot, |
295 | slot, nr_sects); | 293 | nr_sects); |
296 | break; | 294 | break; |
297 | } | 295 | } |
298 | } | 296 | } |
@@ -308,10 +306,11 @@ struct ics_part { | |||
308 | __le32 size; | 306 | __le32 size; |
309 | }; | 307 | }; |
310 | 308 | ||
311 | static int adfspart_check_ICSLinux(struct block_device *bdev, unsigned long block) | 309 | static int adfspart_check_ICSLinux(struct parsed_partitions *state, |
310 | unsigned long block) | ||
312 | { | 311 | { |
313 | Sector sect; | 312 | Sector sect; |
314 | unsigned char *data = read_dev_sector(bdev, block, §); | 313 | unsigned char *data = read_part_sector(state, block, §); |
315 | int result = 0; | 314 | int result = 0; |
316 | 315 | ||
317 | if (data) { | 316 | if (data) { |
@@ -349,8 +348,7 @@ static inline int valid_ics_sector(const unsigned char *data) | |||
349 | * hda2 = ADFS partition 1 on first drive. | 348 | * hda2 = ADFS partition 1 on first drive. |
350 | * ..etc.. | 349 | * ..etc.. |
351 | */ | 350 | */ |
352 | int | 351 | int adfspart_check_ICS(struct parsed_partitions *state) |
353 | adfspart_check_ICS(struct parsed_partitions *state, struct block_device *bdev) | ||
354 | { | 352 | { |
355 | const unsigned char *data; | 353 | const unsigned char *data; |
356 | const struct ics_part *p; | 354 | const struct ics_part *p; |
@@ -360,7 +358,7 @@ adfspart_check_ICS(struct parsed_partitions *state, struct block_device *bdev) | |||
360 | /* | 358 | /* |
361 | * Try ICS style partitions - sector 0 contains partition info. | 359 | * Try ICS style partitions - sector 0 contains partition info. |
362 | */ | 360 | */ |
363 | data = read_dev_sector(bdev, 0, §); | 361 | data = read_part_sector(state, 0, §); |
364 | if (!data) | 362 | if (!data) |
365 | return -1; | 363 | return -1; |
366 | 364 | ||
@@ -392,7 +390,7 @@ adfspart_check_ICS(struct parsed_partitions *state, struct block_device *bdev) | |||
392 | * partition is. We must not make this visible | 390 | * partition is. We must not make this visible |
393 | * to the filesystem. | 391 | * to the filesystem. |
394 | */ | 392 | */ |
395 | if (size > 1 && adfspart_check_ICSLinux(bdev, start)) { | 393 | if (size > 1 && adfspart_check_ICSLinux(state, start)) { |
396 | start += 1; | 394 | start += 1; |
397 | size -= 1; | 395 | size -= 1; |
398 | } | 396 | } |
@@ -446,8 +444,7 @@ static inline int valid_ptec_sector(const unsigned char *data) | |||
446 | * hda2 = ADFS partition 1 on first drive. | 444 | * hda2 = ADFS partition 1 on first drive. |
447 | * ..etc.. | 445 | * ..etc.. |
448 | */ | 446 | */ |
449 | int | 447 | int adfspart_check_POWERTEC(struct parsed_partitions *state) |
450 | adfspart_check_POWERTEC(struct parsed_partitions *state, struct block_device *bdev) | ||
451 | { | 448 | { |
452 | Sector sect; | 449 | Sector sect; |
453 | const unsigned char *data; | 450 | const unsigned char *data; |
@@ -455,7 +452,7 @@ adfspart_check_POWERTEC(struct parsed_partitions *state, struct block_device *bd | |||
455 | int slot = 1; | 452 | int slot = 1; |
456 | int i; | 453 | int i; |
457 | 454 | ||
458 | data = read_dev_sector(bdev, 0, §); | 455 | data = read_part_sector(state, 0, §); |
459 | if (!data) | 456 | if (!data) |
460 | return -1; | 457 | return -1; |
461 | 458 | ||
@@ -508,8 +505,7 @@ static const char eesox_name[] = { | |||
508 | * 1. The individual ADFS boot block entries that are placed on the disk. | 505 | * 1. The individual ADFS boot block entries that are placed on the disk. |
509 | * 2. The start address of the next entry. | 506 | * 2. The start address of the next entry. |
510 | */ | 507 | */ |
511 | int | 508 | int adfspart_check_EESOX(struct parsed_partitions *state) |
512 | adfspart_check_EESOX(struct parsed_partitions *state, struct block_device *bdev) | ||
513 | { | 509 | { |
514 | Sector sect; | 510 | Sector sect; |
515 | const unsigned char *data; | 511 | const unsigned char *data; |
@@ -518,7 +514,7 @@ adfspart_check_EESOX(struct parsed_partitions *state, struct block_device *bdev) | |||
518 | sector_t start = 0; | 514 | sector_t start = 0; |
519 | int i, slot = 1; | 515 | int i, slot = 1; |
520 | 516 | ||
521 | data = read_dev_sector(bdev, 7, §); | 517 | data = read_part_sector(state, 7, §); |
522 | if (!data) | 518 | if (!data) |
523 | return -1; | 519 | return -1; |
524 | 520 | ||
@@ -545,7 +541,7 @@ adfspart_check_EESOX(struct parsed_partitions *state, struct block_device *bdev) | |||
545 | if (i != 0) { | 541 | if (i != 0) { |
546 | sector_t size; | 542 | sector_t size; |
547 | 543 | ||
548 | size = get_capacity(bdev->bd_disk); | 544 | size = get_capacity(state->bdev->bd_disk); |
549 | put_partition(state, slot++, start, size - start); | 545 | put_partition(state, slot++, start, size - start); |
550 | printk("\n"); | 546 | printk("\n"); |
551 | } | 547 | } |
diff --git a/fs/partitions/acorn.h b/fs/partitions/acorn.h index 81fd50ecc080..ede828529692 100644 --- a/fs/partitions/acorn.h +++ b/fs/partitions/acorn.h | |||
@@ -7,8 +7,8 @@ | |||
7 | * format, and everyone stick to it? | 7 | * format, and everyone stick to it? |
8 | */ | 8 | */ |
9 | 9 | ||
10 | int adfspart_check_CUMANA(struct parsed_partitions *state, struct block_device *bdev); | 10 | int adfspart_check_CUMANA(struct parsed_partitions *state); |
11 | int adfspart_check_ADFS(struct parsed_partitions *state, struct block_device *bdev); | 11 | int adfspart_check_ADFS(struct parsed_partitions *state); |
12 | int adfspart_check_ICS(struct parsed_partitions *state, struct block_device *bdev); | 12 | int adfspart_check_ICS(struct parsed_partitions *state); |
13 | int adfspart_check_POWERTEC(struct parsed_partitions *state, struct block_device *bdev); | 13 | int adfspart_check_POWERTEC(struct parsed_partitions *state); |
14 | int adfspart_check_EESOX(struct parsed_partitions *state, struct block_device *bdev); | 14 | int adfspart_check_EESOX(struct parsed_partitions *state); |
diff --git a/fs/partitions/amiga.c b/fs/partitions/amiga.c index 9917a8c360f2..ba443d4229f8 100644 --- a/fs/partitions/amiga.c +++ b/fs/partitions/amiga.c | |||
@@ -23,8 +23,7 @@ checksum_block(__be32 *m, int size) | |||
23 | return sum; | 23 | return sum; |
24 | } | 24 | } |
25 | 25 | ||
26 | int | 26 | int amiga_partition(struct parsed_partitions *state) |
27 | amiga_partition(struct parsed_partitions *state, struct block_device *bdev) | ||
28 | { | 27 | { |
29 | Sector sect; | 28 | Sector sect; |
30 | unsigned char *data; | 29 | unsigned char *data; |
@@ -38,11 +37,11 @@ amiga_partition(struct parsed_partitions *state, struct block_device *bdev) | |||
38 | for (blk = 0; ; blk++, put_dev_sector(sect)) { | 37 | for (blk = 0; ; blk++, put_dev_sector(sect)) { |
39 | if (blk == RDB_ALLOCATION_LIMIT) | 38 | if (blk == RDB_ALLOCATION_LIMIT) |
40 | goto rdb_done; | 39 | goto rdb_done; |
41 | data = read_dev_sector(bdev, blk, §); | 40 | data = read_part_sector(state, blk, §); |
42 | if (!data) { | 41 | if (!data) { |
43 | if (warn_no_part) | 42 | if (warn_no_part) |
44 | printk("Dev %s: unable to read RDB block %d\n", | 43 | printk("Dev %s: unable to read RDB block %d\n", |
45 | bdevname(bdev, b), blk); | 44 | bdevname(state->bdev, b), blk); |
46 | res = -1; | 45 | res = -1; |
47 | goto rdb_done; | 46 | goto rdb_done; |
48 | } | 47 | } |
@@ -64,7 +63,7 @@ amiga_partition(struct parsed_partitions *state, struct block_device *bdev) | |||
64 | } | 63 | } |
65 | 64 | ||
66 | printk("Dev %s: RDB in block %d has bad checksum\n", | 65 | printk("Dev %s: RDB in block %d has bad checksum\n", |
67 | bdevname(bdev, b), blk); | 66 | bdevname(state->bdev, b), blk); |
68 | } | 67 | } |
69 | 68 | ||
70 | /* blksize is blocks per 512 byte standard block */ | 69 | /* blksize is blocks per 512 byte standard block */ |
@@ -75,11 +74,11 @@ amiga_partition(struct parsed_partitions *state, struct block_device *bdev) | |||
75 | put_dev_sector(sect); | 74 | put_dev_sector(sect); |
76 | for (part = 1; blk>0 && part<=16; part++, put_dev_sector(sect)) { | 75 | for (part = 1; blk>0 && part<=16; part++, put_dev_sector(sect)) { |
77 | blk *= blksize; /* Read in terms partition table understands */ | 76 | blk *= blksize; /* Read in terms partition table understands */ |
78 | data = read_dev_sector(bdev, blk, §); | 77 | data = read_part_sector(state, blk, §); |
79 | if (!data) { | 78 | if (!data) { |
80 | if (warn_no_part) | 79 | if (warn_no_part) |
81 | printk("Dev %s: unable to read partition block %d\n", | 80 | printk("Dev %s: unable to read partition block %d\n", |
82 | bdevname(bdev, b), blk); | 81 | bdevname(state->bdev, b), blk); |
83 | res = -1; | 82 | res = -1; |
84 | goto rdb_done; | 83 | goto rdb_done; |
85 | } | 84 | } |
diff --git a/fs/partitions/amiga.h b/fs/partitions/amiga.h index 2f3e9ce22d53..d094585cadaa 100644 --- a/fs/partitions/amiga.h +++ b/fs/partitions/amiga.h | |||
@@ -2,5 +2,5 @@ | |||
2 | * fs/partitions/amiga.h | 2 | * fs/partitions/amiga.h |
3 | */ | 3 | */ |
4 | 4 | ||
5 | int amiga_partition(struct parsed_partitions *state, struct block_device *bdev); | 5 | int amiga_partition(struct parsed_partitions *state); |
6 | 6 | ||
diff --git a/fs/partitions/atari.c b/fs/partitions/atari.c index 1f3572d5b755..4439ff1b6cec 100644 --- a/fs/partitions/atari.c +++ b/fs/partitions/atari.c | |||
@@ -30,7 +30,7 @@ static inline int OK_id(char *s) | |||
30 | memcmp (s, "RAW", 3) == 0 ; | 30 | memcmp (s, "RAW", 3) == 0 ; |
31 | } | 31 | } |
32 | 32 | ||
33 | int atari_partition(struct parsed_partitions *state, struct block_device *bdev) | 33 | int atari_partition(struct parsed_partitions *state) |
34 | { | 34 | { |
35 | Sector sect; | 35 | Sector sect; |
36 | struct rootsector *rs; | 36 | struct rootsector *rs; |
@@ -42,12 +42,12 @@ int atari_partition(struct parsed_partitions *state, struct block_device *bdev) | |||
42 | int part_fmt = 0; /* 0:unknown, 1:AHDI, 2:ICD/Supra */ | 42 | int part_fmt = 0; /* 0:unknown, 1:AHDI, 2:ICD/Supra */ |
43 | #endif | 43 | #endif |
44 | 44 | ||
45 | rs = (struct rootsector *) read_dev_sector(bdev, 0, §); | 45 | rs = read_part_sector(state, 0, §); |
46 | if (!rs) | 46 | if (!rs) |
47 | return -1; | 47 | return -1; |
48 | 48 | ||
49 | /* Verify this is an Atari rootsector: */ | 49 | /* Verify this is an Atari rootsector: */ |
50 | hd_size = bdev->bd_inode->i_size >> 9; | 50 | hd_size = state->bdev->bd_inode->i_size >> 9; |
51 | if (!VALID_PARTITION(&rs->part[0], hd_size) && | 51 | if (!VALID_PARTITION(&rs->part[0], hd_size) && |
52 | !VALID_PARTITION(&rs->part[1], hd_size) && | 52 | !VALID_PARTITION(&rs->part[1], hd_size) && |
53 | !VALID_PARTITION(&rs->part[2], hd_size) && | 53 | !VALID_PARTITION(&rs->part[2], hd_size) && |
@@ -84,7 +84,7 @@ int atari_partition(struct parsed_partitions *state, struct block_device *bdev) | |||
84 | printk(" XGM<"); | 84 | printk(" XGM<"); |
85 | partsect = extensect = be32_to_cpu(pi->st); | 85 | partsect = extensect = be32_to_cpu(pi->st); |
86 | while (1) { | 86 | while (1) { |
87 | xrs = (struct rootsector *)read_dev_sector(bdev, partsect, §2); | 87 | xrs = read_part_sector(state, partsect, §2); |
88 | if (!xrs) { | 88 | if (!xrs) { |
89 | printk (" block %ld read failed\n", partsect); | 89 | printk (" block %ld read failed\n", partsect); |
90 | put_dev_sector(sect); | 90 | put_dev_sector(sect); |
diff --git a/fs/partitions/atari.h b/fs/partitions/atari.h index 63186b00e135..fe2d32a89f36 100644 --- a/fs/partitions/atari.h +++ b/fs/partitions/atari.h | |||
@@ -31,4 +31,4 @@ struct rootsector | |||
31 | u16 checksum; /* checksum for bootable disks */ | 31 | u16 checksum; /* checksum for bootable disks */ |
32 | } __attribute__((__packed__)); | 32 | } __attribute__((__packed__)); |
33 | 33 | ||
34 | int atari_partition(struct parsed_partitions *state, struct block_device *bdev); | 34 | int atari_partition(struct parsed_partitions *state); |
diff --git a/fs/partitions/check.c b/fs/partitions/check.c index e238ab23a9e7..5dcd4b0c5533 100644 --- a/fs/partitions/check.c +++ b/fs/partitions/check.c | |||
@@ -45,7 +45,7 @@ extern void md_autodetect_dev(dev_t dev); | |||
45 | 45 | ||
46 | int warn_no_part = 1; /*This is ugly: should make genhd removable media aware*/ | 46 | int warn_no_part = 1; /*This is ugly: should make genhd removable media aware*/ |
47 | 47 | ||
48 | static int (*check_part[])(struct parsed_partitions *, struct block_device *) = { | 48 | static int (*check_part[])(struct parsed_partitions *) = { |
49 | /* | 49 | /* |
50 | * Probe partition formats with tables at disk address 0 | 50 | * Probe partition formats with tables at disk address 0 |
51 | * that also have an ADFS boot block at 0xdc0. | 51 | * that also have an ADFS boot block at 0xdc0. |
@@ -161,10 +161,11 @@ check_partition(struct gendisk *hd, struct block_device *bdev) | |||
161 | struct parsed_partitions *state; | 161 | struct parsed_partitions *state; |
162 | int i, res, err; | 162 | int i, res, err; |
163 | 163 | ||
164 | state = kmalloc(sizeof(struct parsed_partitions), GFP_KERNEL); | 164 | state = kzalloc(sizeof(struct parsed_partitions), GFP_KERNEL); |
165 | if (!state) | 165 | if (!state) |
166 | return NULL; | 166 | return NULL; |
167 | 167 | ||
168 | state->bdev = bdev; | ||
168 | disk_name(hd, 0, state->name); | 169 | disk_name(hd, 0, state->name); |
169 | printk(KERN_INFO " %s:", state->name); | 170 | printk(KERN_INFO " %s:", state->name); |
170 | if (isdigit(state->name[strlen(state->name)-1])) | 171 | if (isdigit(state->name[strlen(state->name)-1])) |
@@ -174,7 +175,7 @@ check_partition(struct gendisk *hd, struct block_device *bdev) | |||
174 | i = res = err = 0; | 175 | i = res = err = 0; |
175 | while (!res && check_part[i]) { | 176 | while (!res && check_part[i]) { |
176 | memset(&state->parts, 0, sizeof(state->parts)); | 177 | memset(&state->parts, 0, sizeof(state->parts)); |
177 | res = check_part[i++](state, bdev); | 178 | res = check_part[i++](state); |
178 | if (res < 0) { | 179 | if (res < 0) { |
179 | /* We have hit an I/O error which we don't report now. | 180 | /* We have hit an I/O error which we don't report now. |
180 | * But record it, and let the others do their job. | 181 | * But record it, and let the others do their job. |
@@ -186,6 +187,8 @@ check_partition(struct gendisk *hd, struct block_device *bdev) | |||
186 | } | 187 | } |
187 | if (res > 0) | 188 | if (res > 0) |
188 | return state; | 189 | return state; |
190 | if (state->access_beyond_eod) | ||
191 | err = -ENOSPC; | ||
189 | if (err) | 192 | if (err) |
190 | /* The partition is unrecognized. So report I/O errors if there were any */ | 193 | /* The partition is unrecognized. So report I/O errors if there were any */ |
191 | res = err; | 194 | res = err; |
@@ -538,12 +541,33 @@ exit: | |||
538 | disk_part_iter_exit(&piter); | 541 | disk_part_iter_exit(&piter); |
539 | } | 542 | } |
540 | 543 | ||
544 | static bool disk_unlock_native_capacity(struct gendisk *disk) | ||
545 | { | ||
546 | const struct block_device_operations *bdops = disk->fops; | ||
547 | |||
548 | if (bdops->unlock_native_capacity && | ||
549 | !(disk->flags & GENHD_FL_NATIVE_CAPACITY)) { | ||
550 | printk(KERN_CONT "enabling native capacity\n"); | ||
551 | bdops->unlock_native_capacity(disk); | ||
552 | disk->flags |= GENHD_FL_NATIVE_CAPACITY; | ||
553 | return true; | ||
554 | } else { | ||
555 | printk(KERN_CONT "truncated\n"); | ||
556 | return false; | ||
557 | } | ||
558 | } | ||
559 | |||
541 | int rescan_partitions(struct gendisk *disk, struct block_device *bdev) | 560 | int rescan_partitions(struct gendisk *disk, struct block_device *bdev) |
542 | { | 561 | { |
562 | struct parsed_partitions *state = NULL; | ||
543 | struct disk_part_iter piter; | 563 | struct disk_part_iter piter; |
544 | struct hd_struct *part; | 564 | struct hd_struct *part; |
545 | struct parsed_partitions *state; | ||
546 | int p, highest, res; | 565 | int p, highest, res; |
566 | rescan: | ||
567 | if (state && !IS_ERR(state)) { | ||
568 | kfree(state); | ||
569 | state = NULL; | ||
570 | } | ||
547 | 571 | ||
548 | if (bdev->bd_part_count) | 572 | if (bdev->bd_part_count) |
549 | return -EBUSY; | 573 | return -EBUSY; |
@@ -562,8 +586,32 @@ int rescan_partitions(struct gendisk *disk, struct block_device *bdev) | |||
562 | bdev->bd_invalidated = 0; | 586 | bdev->bd_invalidated = 0; |
563 | if (!get_capacity(disk) || !(state = check_partition(disk, bdev))) | 587 | if (!get_capacity(disk) || !(state = check_partition(disk, bdev))) |
564 | return 0; | 588 | return 0; |
565 | if (IS_ERR(state)) /* I/O error reading the partition table */ | 589 | if (IS_ERR(state)) { |
590 | /* | ||
591 | * I/O error reading the partition table. If any | ||
592 | * partition code tried to read beyond EOD, retry | ||
593 | * after unlocking native capacity. | ||
594 | */ | ||
595 | if (PTR_ERR(state) == -ENOSPC) { | ||
596 | printk(KERN_WARNING "%s: partition table beyond EOD, ", | ||
597 | disk->disk_name); | ||
598 | if (disk_unlock_native_capacity(disk)) | ||
599 | goto rescan; | ||
600 | } | ||
566 | return -EIO; | 601 | return -EIO; |
602 | } | ||
603 | /* | ||
604 | * If any partition code tried to read beyond EOD, try | ||
605 | * unlocking native capacity even if partition table is | ||
606 | * sucessfully read as we could be missing some partitions. | ||
607 | */ | ||
608 | if (state->access_beyond_eod) { | ||
609 | printk(KERN_WARNING | ||
610 | "%s: partition table partially beyond EOD, ", | ||
611 | disk->disk_name); | ||
612 | if (disk_unlock_native_capacity(disk)) | ||
613 | goto rescan; | ||
614 | } | ||
567 | 615 | ||
568 | /* tell userspace that the media / partition table may have changed */ | 616 | /* tell userspace that the media / partition table may have changed */ |
569 | kobject_uevent(&disk_to_dev(disk)->kobj, KOBJ_CHANGE); | 617 | kobject_uevent(&disk_to_dev(disk)->kobj, KOBJ_CHANGE); |
@@ -581,7 +629,7 @@ int rescan_partitions(struct gendisk *disk, struct block_device *bdev) | |||
581 | /* add partitions */ | 629 | /* add partitions */ |
582 | for (p = 1; p < state->limit; p++) { | 630 | for (p = 1; p < state->limit; p++) { |
583 | sector_t size, from; | 631 | sector_t size, from; |
584 | try_scan: | 632 | |
585 | size = state->parts[p].size; | 633 | size = state->parts[p].size; |
586 | if (!size) | 634 | if (!size) |
587 | continue; | 635 | continue; |
@@ -589,30 +637,21 @@ try_scan: | |||
589 | from = state->parts[p].from; | 637 | from = state->parts[p].from; |
590 | if (from >= get_capacity(disk)) { | 638 | if (from >= get_capacity(disk)) { |
591 | printk(KERN_WARNING | 639 | printk(KERN_WARNING |
592 | "%s: p%d ignored, start %llu is behind the end of the disk\n", | 640 | "%s: p%d start %llu is beyond EOD, ", |
593 | disk->disk_name, p, (unsigned long long) from); | 641 | disk->disk_name, p, (unsigned long long) from); |
642 | if (disk_unlock_native_capacity(disk)) | ||
643 | goto rescan; | ||
594 | continue; | 644 | continue; |
595 | } | 645 | } |
596 | 646 | ||
597 | if (from + size > get_capacity(disk)) { | 647 | if (from + size > get_capacity(disk)) { |
598 | const struct block_device_operations *bdops = disk->fops; | ||
599 | unsigned long long capacity; | ||
600 | |||
601 | printk(KERN_WARNING | 648 | printk(KERN_WARNING |
602 | "%s: p%d size %llu exceeds device capacity, ", | 649 | "%s: p%d size %llu extends beyond EOD, ", |
603 | disk->disk_name, p, (unsigned long long) size); | 650 | disk->disk_name, p, (unsigned long long) size); |
604 | 651 | ||
605 | if (bdops->set_capacity && | 652 | if (disk_unlock_native_capacity(disk)) { |
606 | (disk->flags & GENHD_FL_NATIVE_CAPACITY) == 0) { | 653 | /* free state and restart */ |
607 | printk(KERN_CONT "enabling native capacity\n"); | 654 | goto rescan; |
608 | capacity = bdops->set_capacity(disk, ~0ULL); | ||
609 | disk->flags |= GENHD_FL_NATIVE_CAPACITY; | ||
610 | if (capacity > get_capacity(disk)) { | ||
611 | set_capacity(disk, capacity); | ||
612 | check_disk_size_change(disk, bdev); | ||
613 | bdev->bd_invalidated = 0; | ||
614 | } | ||
615 | goto try_scan; | ||
616 | } else { | 655 | } else { |
617 | /* | 656 | /* |
618 | * we can not ignore partitions of broken tables | 657 | * we can not ignore partitions of broken tables |
@@ -620,7 +659,6 @@ try_scan: | |||
620 | * we limit them to the end of the disk to avoid | 659 | * we limit them to the end of the disk to avoid |
621 | * creating invalid block devices | 660 | * creating invalid block devices |
622 | */ | 661 | */ |
623 | printk(KERN_CONT "limited to end of disk\n"); | ||
624 | size = get_capacity(disk) - from; | 662 | size = get_capacity(disk) - from; |
625 | } | 663 | } |
626 | } | 664 | } |
diff --git a/fs/partitions/check.h b/fs/partitions/check.h index 98dbe1a84528..52f8bd399396 100644 --- a/fs/partitions/check.h +++ b/fs/partitions/check.h | |||
@@ -6,6 +6,7 @@ | |||
6 | * description. | 6 | * description. |
7 | */ | 7 | */ |
8 | struct parsed_partitions { | 8 | struct parsed_partitions { |
9 | struct block_device *bdev; | ||
9 | char name[BDEVNAME_SIZE]; | 10 | char name[BDEVNAME_SIZE]; |
10 | struct { | 11 | struct { |
11 | sector_t from; | 12 | sector_t from; |
@@ -14,8 +15,19 @@ struct parsed_partitions { | |||
14 | } parts[DISK_MAX_PARTS]; | 15 | } parts[DISK_MAX_PARTS]; |
15 | int next; | 16 | int next; |
16 | int limit; | 17 | int limit; |
18 | bool access_beyond_eod; | ||
17 | }; | 19 | }; |
18 | 20 | ||
21 | static inline void *read_part_sector(struct parsed_partitions *state, | ||
22 | sector_t n, Sector *p) | ||
23 | { | ||
24 | if (n >= get_capacity(state->bdev->bd_disk)) { | ||
25 | state->access_beyond_eod = true; | ||
26 | return NULL; | ||
27 | } | ||
28 | return read_dev_sector(state->bdev, n, p); | ||
29 | } | ||
30 | |||
19 | static inline void | 31 | static inline void |
20 | put_partition(struct parsed_partitions *p, int n, sector_t from, sector_t size) | 32 | put_partition(struct parsed_partitions *p, int n, sector_t from, sector_t size) |
21 | { | 33 | { |
diff --git a/fs/partitions/efi.c b/fs/partitions/efi.c index 91babdae7587..9efb2cfe2410 100644 --- a/fs/partitions/efi.c +++ b/fs/partitions/efi.c | |||
@@ -140,8 +140,7 @@ efi_crc32(const void *buf, unsigned long len) | |||
140 | * the part[0] entry for this disk, and is the number of | 140 | * the part[0] entry for this disk, and is the number of |
141 | * physical sectors available on the disk. | 141 | * physical sectors available on the disk. |
142 | */ | 142 | */ |
143 | static u64 | 143 | static u64 last_lba(struct block_device *bdev) |
144 | last_lba(struct block_device *bdev) | ||
145 | { | 144 | { |
146 | if (!bdev || !bdev->bd_inode) | 145 | if (!bdev || !bdev->bd_inode) |
147 | return 0; | 146 | return 0; |
@@ -181,27 +180,28 @@ is_pmbr_valid(legacy_mbr *mbr) | |||
181 | 180 | ||
182 | /** | 181 | /** |
183 | * read_lba(): Read bytes from disk, starting at given LBA | 182 | * read_lba(): Read bytes from disk, starting at given LBA |
184 | * @bdev | 183 | * @state |
185 | * @lba | 184 | * @lba |
186 | * @buffer | 185 | * @buffer |
187 | * @size_t | 186 | * @size_t |
188 | * | 187 | * |
189 | * Description: Reads @count bytes from @bdev into @buffer. | 188 | * Description: Reads @count bytes from @state->bdev into @buffer. |
190 | * Returns number of bytes read on success, 0 on error. | 189 | * Returns number of bytes read on success, 0 on error. |
191 | */ | 190 | */ |
192 | static size_t | 191 | static size_t read_lba(struct parsed_partitions *state, |
193 | read_lba(struct block_device *bdev, u64 lba, u8 * buffer, size_t count) | 192 | u64 lba, u8 *buffer, size_t count) |
194 | { | 193 | { |
195 | size_t totalreadcount = 0; | 194 | size_t totalreadcount = 0; |
195 | struct block_device *bdev = state->bdev; | ||
196 | sector_t n = lba * (bdev_logical_block_size(bdev) / 512); | 196 | sector_t n = lba * (bdev_logical_block_size(bdev) / 512); |
197 | 197 | ||
198 | if (!bdev || !buffer || lba > last_lba(bdev)) | 198 | if (!buffer || lba > last_lba(bdev)) |
199 | return 0; | 199 | return 0; |
200 | 200 | ||
201 | while (count) { | 201 | while (count) { |
202 | int copied = 512; | 202 | int copied = 512; |
203 | Sector sect; | 203 | Sector sect; |
204 | unsigned char *data = read_dev_sector(bdev, n++, §); | 204 | unsigned char *data = read_part_sector(state, n++, §); |
205 | if (!data) | 205 | if (!data) |
206 | break; | 206 | break; |
207 | if (copied > count) | 207 | if (copied > count) |
@@ -217,19 +217,20 @@ read_lba(struct block_device *bdev, u64 lba, u8 * buffer, size_t count) | |||
217 | 217 | ||
218 | /** | 218 | /** |
219 | * alloc_read_gpt_entries(): reads partition entries from disk | 219 | * alloc_read_gpt_entries(): reads partition entries from disk |
220 | * @bdev | 220 | * @state |
221 | * @gpt - GPT header | 221 | * @gpt - GPT header |
222 | * | 222 | * |
223 | * Description: Returns ptes on success, NULL on error. | 223 | * Description: Returns ptes on success, NULL on error. |
224 | * Allocates space for PTEs based on information found in @gpt. | 224 | * Allocates space for PTEs based on information found in @gpt. |
225 | * Notes: remember to free pte when you're done! | 225 | * Notes: remember to free pte when you're done! |
226 | */ | 226 | */ |
227 | static gpt_entry * | 227 | static gpt_entry *alloc_read_gpt_entries(struct parsed_partitions *state, |
228 | alloc_read_gpt_entries(struct block_device *bdev, gpt_header *gpt) | 228 | gpt_header *gpt) |
229 | { | 229 | { |
230 | size_t count; | 230 | size_t count; |
231 | gpt_entry *pte; | 231 | gpt_entry *pte; |
232 | if (!bdev || !gpt) | 232 | |
233 | if (!gpt) | ||
233 | return NULL; | 234 | return NULL; |
234 | 235 | ||
235 | count = le32_to_cpu(gpt->num_partition_entries) * | 236 | count = le32_to_cpu(gpt->num_partition_entries) * |
@@ -240,7 +241,7 @@ alloc_read_gpt_entries(struct block_device *bdev, gpt_header *gpt) | |||
240 | if (!pte) | 241 | if (!pte) |
241 | return NULL; | 242 | return NULL; |
242 | 243 | ||
243 | if (read_lba(bdev, le64_to_cpu(gpt->partition_entry_lba), | 244 | if (read_lba(state, le64_to_cpu(gpt->partition_entry_lba), |
244 | (u8 *) pte, | 245 | (u8 *) pte, |
245 | count) < count) { | 246 | count) < count) { |
246 | kfree(pte); | 247 | kfree(pte); |
@@ -252,27 +253,24 @@ alloc_read_gpt_entries(struct block_device *bdev, gpt_header *gpt) | |||
252 | 253 | ||
253 | /** | 254 | /** |
254 | * alloc_read_gpt_header(): Allocates GPT header, reads into it from disk | 255 | * alloc_read_gpt_header(): Allocates GPT header, reads into it from disk |
255 | * @bdev | 256 | * @state |
256 | * @lba is the Logical Block Address of the partition table | 257 | * @lba is the Logical Block Address of the partition table |
257 | * | 258 | * |
258 | * Description: returns GPT header on success, NULL on error. Allocates | 259 | * Description: returns GPT header on success, NULL on error. Allocates |
259 | * and fills a GPT header starting at @ from @bdev. | 260 | * and fills a GPT header starting at @ from @state->bdev. |
260 | * Note: remember to free gpt when finished with it. | 261 | * Note: remember to free gpt when finished with it. |
261 | */ | 262 | */ |
262 | static gpt_header * | 263 | static gpt_header *alloc_read_gpt_header(struct parsed_partitions *state, |
263 | alloc_read_gpt_header(struct block_device *bdev, u64 lba) | 264 | u64 lba) |
264 | { | 265 | { |
265 | gpt_header *gpt; | 266 | gpt_header *gpt; |
266 | unsigned ssz = bdev_logical_block_size(bdev); | 267 | unsigned ssz = bdev_logical_block_size(state->bdev); |
267 | |||
268 | if (!bdev) | ||
269 | return NULL; | ||
270 | 268 | ||
271 | gpt = kzalloc(ssz, GFP_KERNEL); | 269 | gpt = kzalloc(ssz, GFP_KERNEL); |
272 | if (!gpt) | 270 | if (!gpt) |
273 | return NULL; | 271 | return NULL; |
274 | 272 | ||
275 | if (read_lba(bdev, lba, (u8 *) gpt, ssz) < ssz) { | 273 | if (read_lba(state, lba, (u8 *) gpt, ssz) < ssz) { |
276 | kfree(gpt); | 274 | kfree(gpt); |
277 | gpt=NULL; | 275 | gpt=NULL; |
278 | return NULL; | 276 | return NULL; |
@@ -283,7 +281,7 @@ alloc_read_gpt_header(struct block_device *bdev, u64 lba) | |||
283 | 281 | ||
284 | /** | 282 | /** |
285 | * is_gpt_valid() - tests one GPT header and PTEs for validity | 283 | * is_gpt_valid() - tests one GPT header and PTEs for validity |
286 | * @bdev | 284 | * @state |
287 | * @lba is the logical block address of the GPT header to test | 285 | * @lba is the logical block address of the GPT header to test |
288 | * @gpt is a GPT header ptr, filled on return. | 286 | * @gpt is a GPT header ptr, filled on return. |
289 | * @ptes is a PTEs ptr, filled on return. | 287 | * @ptes is a PTEs ptr, filled on return. |
@@ -291,16 +289,15 @@ alloc_read_gpt_header(struct block_device *bdev, u64 lba) | |||
291 | * Description: returns 1 if valid, 0 on error. | 289 | * Description: returns 1 if valid, 0 on error. |
292 | * If valid, returns pointers to newly allocated GPT header and PTEs. | 290 | * If valid, returns pointers to newly allocated GPT header and PTEs. |
293 | */ | 291 | */ |
294 | static int | 292 | static int is_gpt_valid(struct parsed_partitions *state, u64 lba, |
295 | is_gpt_valid(struct block_device *bdev, u64 lba, | 293 | gpt_header **gpt, gpt_entry **ptes) |
296 | gpt_header **gpt, gpt_entry **ptes) | ||
297 | { | 294 | { |
298 | u32 crc, origcrc; | 295 | u32 crc, origcrc; |
299 | u64 lastlba; | 296 | u64 lastlba; |
300 | 297 | ||
301 | if (!bdev || !gpt || !ptes) | 298 | if (!ptes) |
302 | return 0; | 299 | return 0; |
303 | if (!(*gpt = alloc_read_gpt_header(bdev, lba))) | 300 | if (!(*gpt = alloc_read_gpt_header(state, lba))) |
304 | return 0; | 301 | return 0; |
305 | 302 | ||
306 | /* Check the GUID Partition Table signature */ | 303 | /* Check the GUID Partition Table signature */ |
@@ -336,7 +333,7 @@ is_gpt_valid(struct block_device *bdev, u64 lba, | |||
336 | /* Check the first_usable_lba and last_usable_lba are | 333 | /* Check the first_usable_lba and last_usable_lba are |
337 | * within the disk. | 334 | * within the disk. |
338 | */ | 335 | */ |
339 | lastlba = last_lba(bdev); | 336 | lastlba = last_lba(state->bdev); |
340 | if (le64_to_cpu((*gpt)->first_usable_lba) > lastlba) { | 337 | if (le64_to_cpu((*gpt)->first_usable_lba) > lastlba) { |
341 | pr_debug("GPT: first_usable_lba incorrect: %lld > %lld\n", | 338 | pr_debug("GPT: first_usable_lba incorrect: %lld > %lld\n", |
342 | (unsigned long long)le64_to_cpu((*gpt)->first_usable_lba), | 339 | (unsigned long long)le64_to_cpu((*gpt)->first_usable_lba), |
@@ -350,7 +347,7 @@ is_gpt_valid(struct block_device *bdev, u64 lba, | |||
350 | goto fail; | 347 | goto fail; |
351 | } | 348 | } |
352 | 349 | ||
353 | if (!(*ptes = alloc_read_gpt_entries(bdev, *gpt))) | 350 | if (!(*ptes = alloc_read_gpt_entries(state, *gpt))) |
354 | goto fail; | 351 | goto fail; |
355 | 352 | ||
356 | /* Check the GUID Partition Entry Array CRC */ | 353 | /* Check the GUID Partition Entry Array CRC */ |
@@ -495,7 +492,7 @@ compare_gpts(gpt_header *pgpt, gpt_header *agpt, u64 lastlba) | |||
495 | 492 | ||
496 | /** | 493 | /** |
497 | * find_valid_gpt() - Search disk for valid GPT headers and PTEs | 494 | * find_valid_gpt() - Search disk for valid GPT headers and PTEs |
498 | * @bdev | 495 | * @state |
499 | * @gpt is a GPT header ptr, filled on return. | 496 | * @gpt is a GPT header ptr, filled on return. |
500 | * @ptes is a PTEs ptr, filled on return. | 497 | * @ptes is a PTEs ptr, filled on return. |
501 | * Description: Returns 1 if valid, 0 on error. | 498 | * Description: Returns 1 if valid, 0 on error. |
@@ -508,24 +505,25 @@ compare_gpts(gpt_header *pgpt, gpt_header *agpt, u64 lastlba) | |||
508 | * This protects against devices which misreport their size, and forces | 505 | * This protects against devices which misreport their size, and forces |
509 | * the user to decide to use the Alternate GPT. | 506 | * the user to decide to use the Alternate GPT. |
510 | */ | 507 | */ |
511 | static int | 508 | static int find_valid_gpt(struct parsed_partitions *state, gpt_header **gpt, |
512 | find_valid_gpt(struct block_device *bdev, gpt_header **gpt, gpt_entry **ptes) | 509 | gpt_entry **ptes) |
513 | { | 510 | { |
514 | int good_pgpt = 0, good_agpt = 0, good_pmbr = 0; | 511 | int good_pgpt = 0, good_agpt = 0, good_pmbr = 0; |
515 | gpt_header *pgpt = NULL, *agpt = NULL; | 512 | gpt_header *pgpt = NULL, *agpt = NULL; |
516 | gpt_entry *pptes = NULL, *aptes = NULL; | 513 | gpt_entry *pptes = NULL, *aptes = NULL; |
517 | legacy_mbr *legacymbr; | 514 | legacy_mbr *legacymbr; |
518 | u64 lastlba; | 515 | u64 lastlba; |
519 | if (!bdev || !gpt || !ptes) | 516 | |
517 | if (!ptes) | ||
520 | return 0; | 518 | return 0; |
521 | 519 | ||
522 | lastlba = last_lba(bdev); | 520 | lastlba = last_lba(state->bdev); |
523 | if (!force_gpt) { | 521 | if (!force_gpt) { |
524 | /* This will be added to the EFI Spec. per Intel after v1.02. */ | 522 | /* This will be added to the EFI Spec. per Intel after v1.02. */ |
525 | legacymbr = kzalloc(sizeof (*legacymbr), GFP_KERNEL); | 523 | legacymbr = kzalloc(sizeof (*legacymbr), GFP_KERNEL); |
526 | if (legacymbr) { | 524 | if (legacymbr) { |
527 | read_lba(bdev, 0, (u8 *) legacymbr, | 525 | read_lba(state, 0, (u8 *) legacymbr, |
528 | sizeof (*legacymbr)); | 526 | sizeof (*legacymbr)); |
529 | good_pmbr = is_pmbr_valid(legacymbr); | 527 | good_pmbr = is_pmbr_valid(legacymbr); |
530 | kfree(legacymbr); | 528 | kfree(legacymbr); |
531 | } | 529 | } |
@@ -533,15 +531,14 @@ find_valid_gpt(struct block_device *bdev, gpt_header **gpt, gpt_entry **ptes) | |||
533 | goto fail; | 531 | goto fail; |
534 | } | 532 | } |
535 | 533 | ||
536 | good_pgpt = is_gpt_valid(bdev, GPT_PRIMARY_PARTITION_TABLE_LBA, | 534 | good_pgpt = is_gpt_valid(state, GPT_PRIMARY_PARTITION_TABLE_LBA, |
537 | &pgpt, &pptes); | 535 | &pgpt, &pptes); |
538 | if (good_pgpt) | 536 | if (good_pgpt) |
539 | good_agpt = is_gpt_valid(bdev, | 537 | good_agpt = is_gpt_valid(state, |
540 | le64_to_cpu(pgpt->alternate_lba), | 538 | le64_to_cpu(pgpt->alternate_lba), |
541 | &agpt, &aptes); | 539 | &agpt, &aptes); |
542 | if (!good_agpt && force_gpt) | 540 | if (!good_agpt && force_gpt) |
543 | good_agpt = is_gpt_valid(bdev, lastlba, | 541 | good_agpt = is_gpt_valid(state, lastlba, &agpt, &aptes); |
544 | &agpt, &aptes); | ||
545 | 542 | ||
546 | /* The obviously unsuccessful case */ | 543 | /* The obviously unsuccessful case */ |
547 | if (!good_pgpt && !good_agpt) | 544 | if (!good_pgpt && !good_agpt) |
@@ -583,9 +580,8 @@ find_valid_gpt(struct block_device *bdev, gpt_header **gpt, gpt_entry **ptes) | |||
583 | } | 580 | } |
584 | 581 | ||
585 | /** | 582 | /** |
586 | * efi_partition(struct parsed_partitions *state, struct block_device *bdev) | 583 | * efi_partition(struct parsed_partitions *state) |
587 | * @state | 584 | * @state |
588 | * @bdev | ||
589 | * | 585 | * |
590 | * Description: called from check.c, if the disk contains GPT | 586 | * Description: called from check.c, if the disk contains GPT |
591 | * partitions, sets up partition entries in the kernel. | 587 | * partitions, sets up partition entries in the kernel. |
@@ -602,15 +598,14 @@ find_valid_gpt(struct block_device *bdev, gpt_header **gpt, gpt_entry **ptes) | |||
602 | * 1 if successful | 598 | * 1 if successful |
603 | * | 599 | * |
604 | */ | 600 | */ |
605 | int | 601 | int efi_partition(struct parsed_partitions *state) |
606 | efi_partition(struct parsed_partitions *state, struct block_device *bdev) | ||
607 | { | 602 | { |
608 | gpt_header *gpt = NULL; | 603 | gpt_header *gpt = NULL; |
609 | gpt_entry *ptes = NULL; | 604 | gpt_entry *ptes = NULL; |
610 | u32 i; | 605 | u32 i; |
611 | unsigned ssz = bdev_logical_block_size(bdev) / 512; | 606 | unsigned ssz = bdev_logical_block_size(state->bdev) / 512; |
612 | 607 | ||
613 | if (!find_valid_gpt(bdev, &gpt, &ptes) || !gpt || !ptes) { | 608 | if (!find_valid_gpt(state, &gpt, &ptes) || !gpt || !ptes) { |
614 | kfree(gpt); | 609 | kfree(gpt); |
615 | kfree(ptes); | 610 | kfree(ptes); |
616 | return 0; | 611 | return 0; |
@@ -623,7 +618,7 @@ efi_partition(struct parsed_partitions *state, struct block_device *bdev) | |||
623 | u64 size = le64_to_cpu(ptes[i].ending_lba) - | 618 | u64 size = le64_to_cpu(ptes[i].ending_lba) - |
624 | le64_to_cpu(ptes[i].starting_lba) + 1ULL; | 619 | le64_to_cpu(ptes[i].starting_lba) + 1ULL; |
625 | 620 | ||
626 | if (!is_pte_valid(&ptes[i], last_lba(bdev))) | 621 | if (!is_pte_valid(&ptes[i], last_lba(state->bdev))) |
627 | continue; | 622 | continue; |
628 | 623 | ||
629 | put_partition(state, i+1, start * ssz, size * ssz); | 624 | put_partition(state, i+1, start * ssz, size * ssz); |
@@ -631,7 +626,7 @@ efi_partition(struct parsed_partitions *state, struct block_device *bdev) | |||
631 | /* If this is a RAID volume, tell md */ | 626 | /* If this is a RAID volume, tell md */ |
632 | if (!efi_guidcmp(ptes[i].partition_type_guid, | 627 | if (!efi_guidcmp(ptes[i].partition_type_guid, |
633 | PARTITION_LINUX_RAID_GUID)) | 628 | PARTITION_LINUX_RAID_GUID)) |
634 | state->parts[i+1].flags = 1; | 629 | state->parts[i + 1].flags = ADDPART_FLAG_RAID; |
635 | } | 630 | } |
636 | kfree(ptes); | 631 | kfree(ptes); |
637 | kfree(gpt); | 632 | kfree(gpt); |
diff --git a/fs/partitions/efi.h b/fs/partitions/efi.h index 6998b589abf9..b69ab729558f 100644 --- a/fs/partitions/efi.h +++ b/fs/partitions/efi.h | |||
@@ -110,7 +110,7 @@ typedef struct _legacy_mbr { | |||
110 | } __attribute__ ((packed)) legacy_mbr; | 110 | } __attribute__ ((packed)) legacy_mbr; |
111 | 111 | ||
112 | /* Functions */ | 112 | /* Functions */ |
113 | extern int efi_partition(struct parsed_partitions *state, struct block_device *bdev); | 113 | extern int efi_partition(struct parsed_partitions *state); |
114 | 114 | ||
115 | #endif | 115 | #endif |
116 | 116 | ||
diff --git a/fs/partitions/ibm.c b/fs/partitions/ibm.c index fc71aab08460..3e73de5967ff 100644 --- a/fs/partitions/ibm.c +++ b/fs/partitions/ibm.c | |||
@@ -58,9 +58,9 @@ cchhb2blk (struct vtoc_cchhb *ptr, struct hd_geometry *geo) { | |||
58 | 58 | ||
59 | /* | 59 | /* |
60 | */ | 60 | */ |
61 | int | 61 | int ibm_partition(struct parsed_partitions *state) |
62 | ibm_partition(struct parsed_partitions *state, struct block_device *bdev) | ||
63 | { | 62 | { |
63 | struct block_device *bdev = state->bdev; | ||
64 | int blocksize, res; | 64 | int blocksize, res; |
65 | loff_t i_size, offset, size, fmt_size; | 65 | loff_t i_size, offset, size, fmt_size; |
66 | dasd_information2_t *info; | 66 | dasd_information2_t *info; |
@@ -100,7 +100,8 @@ ibm_partition(struct parsed_partitions *state, struct block_device *bdev) | |||
100 | /* | 100 | /* |
101 | * Get volume label, extract name and type. | 101 | * Get volume label, extract name and type. |
102 | */ | 102 | */ |
103 | data = read_dev_sector(bdev, info->label_block*(blocksize/512), §); | 103 | data = read_part_sector(state, info->label_block*(blocksize/512), |
104 | §); | ||
104 | if (data == NULL) | 105 | if (data == NULL) |
105 | goto out_readerr; | 106 | goto out_readerr; |
106 | 107 | ||
@@ -193,8 +194,8 @@ ibm_partition(struct parsed_partitions *state, struct block_device *bdev) | |||
193 | */ | 194 | */ |
194 | blk = cchhb2blk(&label->vol.vtoc, geo) + 1; | 195 | blk = cchhb2blk(&label->vol.vtoc, geo) + 1; |
195 | counter = 0; | 196 | counter = 0; |
196 | data = read_dev_sector(bdev, blk * (blocksize/512), | 197 | data = read_part_sector(state, blk * (blocksize/512), |
197 | §); | 198 | §); |
198 | while (data != NULL) { | 199 | while (data != NULL) { |
199 | struct vtoc_format1_label f1; | 200 | struct vtoc_format1_label f1; |
200 | 201 | ||
@@ -208,9 +209,8 @@ ibm_partition(struct parsed_partitions *state, struct block_device *bdev) | |||
208 | || f1.DS1FMTID == _ascebc['7'] | 209 | || f1.DS1FMTID == _ascebc['7'] |
209 | || f1.DS1FMTID == _ascebc['9']) { | 210 | || f1.DS1FMTID == _ascebc['9']) { |
210 | blk++; | 211 | blk++; |
211 | data = read_dev_sector(bdev, blk * | 212 | data = read_part_sector(state, |
212 | (blocksize/512), | 213 | blk * (blocksize/512), §); |
213 | §); | ||
214 | continue; | 214 | continue; |
215 | } | 215 | } |
216 | 216 | ||
@@ -230,9 +230,8 @@ ibm_partition(struct parsed_partitions *state, struct block_device *bdev) | |||
230 | size * (blocksize >> 9)); | 230 | size * (blocksize >> 9)); |
231 | counter++; | 231 | counter++; |
232 | blk++; | 232 | blk++; |
233 | data = read_dev_sector(bdev, | 233 | data = read_part_sector(state, |
234 | blk * (blocksize/512), | 234 | blk * (blocksize/512), §); |
235 | §); | ||
236 | } | 235 | } |
237 | 236 | ||
238 | if (!data) | 237 | if (!data) |
diff --git a/fs/partitions/ibm.h b/fs/partitions/ibm.h index 31f85a6ac459..08fb0804a812 100644 --- a/fs/partitions/ibm.h +++ b/fs/partitions/ibm.h | |||
@@ -1 +1 @@ | |||
int ibm_partition(struct parsed_partitions *, struct block_device *); | int ibm_partition(struct parsed_partitions *); | ||
diff --git a/fs/partitions/karma.c b/fs/partitions/karma.c index 176d89bcf123..1cc928bb762f 100644 --- a/fs/partitions/karma.c +++ b/fs/partitions/karma.c | |||
@@ -9,7 +9,7 @@ | |||
9 | #include "check.h" | 9 | #include "check.h" |
10 | #include "karma.h" | 10 | #include "karma.h" |
11 | 11 | ||
12 | int karma_partition(struct parsed_partitions *state, struct block_device *bdev) | 12 | int karma_partition(struct parsed_partitions *state) |
13 | { | 13 | { |
14 | int i; | 14 | int i; |
15 | int slot = 1; | 15 | int slot = 1; |
@@ -29,7 +29,7 @@ int karma_partition(struct parsed_partitions *state, struct block_device *bdev) | |||
29 | } __attribute__((packed)) *label; | 29 | } __attribute__((packed)) *label; |
30 | struct d_partition *p; | 30 | struct d_partition *p; |
31 | 31 | ||
32 | data = read_dev_sector(bdev, 0, §); | 32 | data = read_part_sector(state, 0, §); |
33 | if (!data) | 33 | if (!data) |
34 | return -1; | 34 | return -1; |
35 | 35 | ||
diff --git a/fs/partitions/karma.h b/fs/partitions/karma.h index ecf7d3f2a3d8..c764b2e9df21 100644 --- a/fs/partitions/karma.h +++ b/fs/partitions/karma.h | |||
@@ -4,5 +4,5 @@ | |||
4 | 4 | ||
5 | #define KARMA_LABEL_MAGIC 0xAB56 | 5 | #define KARMA_LABEL_MAGIC 0xAB56 |
6 | 6 | ||
7 | int karma_partition(struct parsed_partitions *state, struct block_device *bdev); | 7 | int karma_partition(struct parsed_partitions *state); |
8 | 8 | ||
diff --git a/fs/partitions/ldm.c b/fs/partitions/ldm.c index 8652fb99e962..3ceca05b668c 100644 --- a/fs/partitions/ldm.c +++ b/fs/partitions/ldm.c | |||
@@ -309,7 +309,7 @@ static bool ldm_compare_tocblocks (const struct tocblock *toc1, | |||
309 | 309 | ||
310 | /** | 310 | /** |
311 | * ldm_validate_privheads - Compare the primary privhead with its backups | 311 | * ldm_validate_privheads - Compare the primary privhead with its backups |
312 | * @bdev: Device holding the LDM Database | 312 | * @state: Partition check state including device holding the LDM Database |
313 | * @ph1: Memory struct to fill with ph contents | 313 | * @ph1: Memory struct to fill with ph contents |
314 | * | 314 | * |
315 | * Read and compare all three privheads from disk. | 315 | * Read and compare all three privheads from disk. |
@@ -321,8 +321,8 @@ static bool ldm_compare_tocblocks (const struct tocblock *toc1, | |||
321 | * Return: 'true' Success | 321 | * Return: 'true' Success |
322 | * 'false' Error | 322 | * 'false' Error |
323 | */ | 323 | */ |
324 | static bool ldm_validate_privheads (struct block_device *bdev, | 324 | static bool ldm_validate_privheads(struct parsed_partitions *state, |
325 | struct privhead *ph1) | 325 | struct privhead *ph1) |
326 | { | 326 | { |
327 | static const int off[3] = { OFF_PRIV1, OFF_PRIV2, OFF_PRIV3 }; | 327 | static const int off[3] = { OFF_PRIV1, OFF_PRIV2, OFF_PRIV3 }; |
328 | struct privhead *ph[3] = { ph1 }; | 328 | struct privhead *ph[3] = { ph1 }; |
@@ -332,7 +332,7 @@ static bool ldm_validate_privheads (struct block_device *bdev, | |||
332 | long num_sects; | 332 | long num_sects; |
333 | int i; | 333 | int i; |
334 | 334 | ||
335 | BUG_ON (!bdev || !ph1); | 335 | BUG_ON (!state || !ph1); |
336 | 336 | ||
337 | ph[1] = kmalloc (sizeof (*ph[1]), GFP_KERNEL); | 337 | ph[1] = kmalloc (sizeof (*ph[1]), GFP_KERNEL); |
338 | ph[2] = kmalloc (sizeof (*ph[2]), GFP_KERNEL); | 338 | ph[2] = kmalloc (sizeof (*ph[2]), GFP_KERNEL); |
@@ -346,8 +346,8 @@ static bool ldm_validate_privheads (struct block_device *bdev, | |||
346 | 346 | ||
347 | /* Read and parse privheads */ | 347 | /* Read and parse privheads */ |
348 | for (i = 0; i < 3; i++) { | 348 | for (i = 0; i < 3; i++) { |
349 | data = read_dev_sector (bdev, | 349 | data = read_part_sector(state, ph[0]->config_start + off[i], |
350 | ph[0]->config_start + off[i], §); | 350 | §); |
351 | if (!data) { | 351 | if (!data) { |
352 | ldm_crit ("Disk read failed."); | 352 | ldm_crit ("Disk read failed."); |
353 | goto out; | 353 | goto out; |
@@ -363,7 +363,7 @@ static bool ldm_validate_privheads (struct block_device *bdev, | |||
363 | } | 363 | } |
364 | } | 364 | } |
365 | 365 | ||
366 | num_sects = bdev->bd_inode->i_size >> 9; | 366 | num_sects = state->bdev->bd_inode->i_size >> 9; |
367 | 367 | ||
368 | if ((ph[0]->config_start > num_sects) || | 368 | if ((ph[0]->config_start > num_sects) || |
369 | ((ph[0]->config_start + ph[0]->config_size) > num_sects)) { | 369 | ((ph[0]->config_start + ph[0]->config_size) > num_sects)) { |
@@ -397,20 +397,20 @@ out: | |||
397 | 397 | ||
398 | /** | 398 | /** |
399 | * ldm_validate_tocblocks - Validate the table of contents and its backups | 399 | * ldm_validate_tocblocks - Validate the table of contents and its backups |
400 | * @bdev: Device holding the LDM Database | 400 | * @state: Partition check state including device holding the LDM Database |
401 | * @base: Offset, into @bdev, of the database | 401 | * @base: Offset, into @state->bdev, of the database |
402 | * @ldb: Cache of the database structures | 402 | * @ldb: Cache of the database structures |
403 | * | 403 | * |
404 | * Find and compare the four tables of contents of the LDM Database stored on | 404 | * Find and compare the four tables of contents of the LDM Database stored on |
405 | * @bdev and return the parsed information into @toc1. | 405 | * @state->bdev and return the parsed information into @toc1. |
406 | * | 406 | * |
407 | * The offsets and sizes of the configs are range-checked against a privhead. | 407 | * The offsets and sizes of the configs are range-checked against a privhead. |
408 | * | 408 | * |
409 | * Return: 'true' @toc1 contains validated TOCBLOCK info | 409 | * Return: 'true' @toc1 contains validated TOCBLOCK info |
410 | * 'false' @toc1 contents are undefined | 410 | * 'false' @toc1 contents are undefined |
411 | */ | 411 | */ |
412 | static bool ldm_validate_tocblocks(struct block_device *bdev, | 412 | static bool ldm_validate_tocblocks(struct parsed_partitions *state, |
413 | unsigned long base, struct ldmdb *ldb) | 413 | unsigned long base, struct ldmdb *ldb) |
414 | { | 414 | { |
415 | static const int off[4] = { OFF_TOCB1, OFF_TOCB2, OFF_TOCB3, OFF_TOCB4}; | 415 | static const int off[4] = { OFF_TOCB1, OFF_TOCB2, OFF_TOCB3, OFF_TOCB4}; |
416 | struct tocblock *tb[4]; | 416 | struct tocblock *tb[4]; |
@@ -420,7 +420,7 @@ static bool ldm_validate_tocblocks(struct block_device *bdev, | |||
420 | int i, nr_tbs; | 420 | int i, nr_tbs; |
421 | bool result = false; | 421 | bool result = false; |
422 | 422 | ||
423 | BUG_ON(!bdev || !ldb); | 423 | BUG_ON(!state || !ldb); |
424 | ph = &ldb->ph; | 424 | ph = &ldb->ph; |
425 | tb[0] = &ldb->toc; | 425 | tb[0] = &ldb->toc; |
426 | tb[1] = kmalloc(sizeof(*tb[1]) * 3, GFP_KERNEL); | 426 | tb[1] = kmalloc(sizeof(*tb[1]) * 3, GFP_KERNEL); |
@@ -437,7 +437,7 @@ static bool ldm_validate_tocblocks(struct block_device *bdev, | |||
437 | * skip any that fail as long as we get at least one valid TOCBLOCK. | 437 | * skip any that fail as long as we get at least one valid TOCBLOCK. |
438 | */ | 438 | */ |
439 | for (nr_tbs = i = 0; i < 4; i++) { | 439 | for (nr_tbs = i = 0; i < 4; i++) { |
440 | data = read_dev_sector(bdev, base + off[i], §); | 440 | data = read_part_sector(state, base + off[i], §); |
441 | if (!data) { | 441 | if (!data) { |
442 | ldm_error("Disk read failed for TOCBLOCK %d.", i); | 442 | ldm_error("Disk read failed for TOCBLOCK %d.", i); |
443 | continue; | 443 | continue; |
@@ -473,7 +473,7 @@ err: | |||
473 | 473 | ||
474 | /** | 474 | /** |
475 | * ldm_validate_vmdb - Read the VMDB and validate it | 475 | * ldm_validate_vmdb - Read the VMDB and validate it |
476 | * @bdev: Device holding the LDM Database | 476 | * @state: Partition check state including device holding the LDM Database |
477 | * @base: Offset, into @bdev, of the database | 477 | * @base: Offset, into @bdev, of the database |
478 | * @ldb: Cache of the database structures | 478 | * @ldb: Cache of the database structures |
479 | * | 479 | * |
@@ -483,8 +483,8 @@ err: | |||
483 | * Return: 'true' @ldb contains validated VBDB info | 483 | * Return: 'true' @ldb contains validated VBDB info |
484 | * 'false' @ldb contents are undefined | 484 | * 'false' @ldb contents are undefined |
485 | */ | 485 | */ |
486 | static bool ldm_validate_vmdb (struct block_device *bdev, unsigned long base, | 486 | static bool ldm_validate_vmdb(struct parsed_partitions *state, |
487 | struct ldmdb *ldb) | 487 | unsigned long base, struct ldmdb *ldb) |
488 | { | 488 | { |
489 | Sector sect; | 489 | Sector sect; |
490 | u8 *data; | 490 | u8 *data; |
@@ -492,12 +492,12 @@ static bool ldm_validate_vmdb (struct block_device *bdev, unsigned long base, | |||
492 | struct vmdb *vm; | 492 | struct vmdb *vm; |
493 | struct tocblock *toc; | 493 | struct tocblock *toc; |
494 | 494 | ||
495 | BUG_ON (!bdev || !ldb); | 495 | BUG_ON (!state || !ldb); |
496 | 496 | ||
497 | vm = &ldb->vm; | 497 | vm = &ldb->vm; |
498 | toc = &ldb->toc; | 498 | toc = &ldb->toc; |
499 | 499 | ||
500 | data = read_dev_sector (bdev, base + OFF_VMDB, §); | 500 | data = read_part_sector(state, base + OFF_VMDB, §); |
501 | if (!data) { | 501 | if (!data) { |
502 | ldm_crit ("Disk read failed."); | 502 | ldm_crit ("Disk read failed."); |
503 | return false; | 503 | return false; |
@@ -534,21 +534,21 @@ out: | |||
534 | 534 | ||
535 | /** | 535 | /** |
536 | * ldm_validate_partition_table - Determine whether bdev might be a dynamic disk | 536 | * ldm_validate_partition_table - Determine whether bdev might be a dynamic disk |
537 | * @bdev: Device holding the LDM Database | 537 | * @state: Partition check state including device holding the LDM Database |
538 | * | 538 | * |
539 | * This function provides a weak test to decide whether the device is a dynamic | 539 | * This function provides a weak test to decide whether the device is a dynamic |
540 | * disk or not. It looks for an MS-DOS-style partition table containing at | 540 | * disk or not. It looks for an MS-DOS-style partition table containing at |
541 | * least one partition of type 0x42 (formerly SFS, now used by Windows for | 541 | * least one partition of type 0x42 (formerly SFS, now used by Windows for |
542 | * dynamic disks). | 542 | * dynamic disks). |
543 | * | 543 | * |
544 | * N.B. The only possible error can come from the read_dev_sector and that is | 544 | * N.B. The only possible error can come from the read_part_sector and that is |
545 | * only likely to happen if the underlying device is strange. If that IS | 545 | * only likely to happen if the underlying device is strange. If that IS |
546 | * the case we should return zero to let someone else try. | 546 | * the case we should return zero to let someone else try. |
547 | * | 547 | * |
548 | * Return: 'true' @bdev is a dynamic disk | 548 | * Return: 'true' @state->bdev is a dynamic disk |
549 | * 'false' @bdev is not a dynamic disk, or an error occurred | 549 | * 'false' @state->bdev is not a dynamic disk, or an error occurred |
550 | */ | 550 | */ |
551 | static bool ldm_validate_partition_table (struct block_device *bdev) | 551 | static bool ldm_validate_partition_table(struct parsed_partitions *state) |
552 | { | 552 | { |
553 | Sector sect; | 553 | Sector sect; |
554 | u8 *data; | 554 | u8 *data; |
@@ -556,9 +556,9 @@ static bool ldm_validate_partition_table (struct block_device *bdev) | |||
556 | int i; | 556 | int i; |
557 | bool result = false; | 557 | bool result = false; |
558 | 558 | ||
559 | BUG_ON (!bdev); | 559 | BUG_ON(!state); |
560 | 560 | ||
561 | data = read_dev_sector (bdev, 0, §); | 561 | data = read_part_sector(state, 0, §); |
562 | if (!data) { | 562 | if (!data) { |
563 | ldm_crit ("Disk read failed."); | 563 | ldm_crit ("Disk read failed."); |
564 | return false; | 564 | return false; |
@@ -1391,8 +1391,8 @@ static bool ldm_frag_commit (struct list_head *frags, struct ldmdb *ldb) | |||
1391 | 1391 | ||
1392 | /** | 1392 | /** |
1393 | * ldm_get_vblks - Read the on-disk database of VBLKs into memory | 1393 | * ldm_get_vblks - Read the on-disk database of VBLKs into memory |
1394 | * @bdev: Device holding the LDM Database | 1394 | * @state: Partition check state including device holding the LDM Database |
1395 | * @base: Offset, into @bdev, of the database | 1395 | * @base: Offset, into @state->bdev, of the database |
1396 | * @ldb: Cache of the database structures | 1396 | * @ldb: Cache of the database structures |
1397 | * | 1397 | * |
1398 | * To use the information from the VBLKs, they need to be read from the disk, | 1398 | * To use the information from the VBLKs, they need to be read from the disk, |
@@ -1401,8 +1401,8 @@ static bool ldm_frag_commit (struct list_head *frags, struct ldmdb *ldb) | |||
1401 | * Return: 'true' All the VBLKs were read successfully | 1401 | * Return: 'true' All the VBLKs were read successfully |
1402 | * 'false' An error occurred | 1402 | * 'false' An error occurred |
1403 | */ | 1403 | */ |
1404 | static bool ldm_get_vblks (struct block_device *bdev, unsigned long base, | 1404 | static bool ldm_get_vblks(struct parsed_partitions *state, unsigned long base, |
1405 | struct ldmdb *ldb) | 1405 | struct ldmdb *ldb) |
1406 | { | 1406 | { |
1407 | int size, perbuf, skip, finish, s, v, recs; | 1407 | int size, perbuf, skip, finish, s, v, recs; |
1408 | u8 *data = NULL; | 1408 | u8 *data = NULL; |
@@ -1410,7 +1410,7 @@ static bool ldm_get_vblks (struct block_device *bdev, unsigned long base, | |||
1410 | bool result = false; | 1410 | bool result = false; |
1411 | LIST_HEAD (frags); | 1411 | LIST_HEAD (frags); |
1412 | 1412 | ||
1413 | BUG_ON (!bdev || !ldb); | 1413 | BUG_ON(!state || !ldb); |
1414 | 1414 | ||
1415 | size = ldb->vm.vblk_size; | 1415 | size = ldb->vm.vblk_size; |
1416 | perbuf = 512 / size; | 1416 | perbuf = 512 / size; |
@@ -1418,7 +1418,7 @@ static bool ldm_get_vblks (struct block_device *bdev, unsigned long base, | |||
1418 | finish = (size * ldb->vm.last_vblk_seq) >> 9; | 1418 | finish = (size * ldb->vm.last_vblk_seq) >> 9; |
1419 | 1419 | ||
1420 | for (s = skip; s < finish; s++) { /* For each sector */ | 1420 | for (s = skip; s < finish; s++) { /* For each sector */ |
1421 | data = read_dev_sector (bdev, base + OFF_VMDB + s, §); | 1421 | data = read_part_sector(state, base + OFF_VMDB + s, §); |
1422 | if (!data) { | 1422 | if (!data) { |
1423 | ldm_crit ("Disk read failed."); | 1423 | ldm_crit ("Disk read failed."); |
1424 | goto out; | 1424 | goto out; |
@@ -1474,8 +1474,7 @@ static void ldm_free_vblks (struct list_head *lh) | |||
1474 | 1474 | ||
1475 | /** | 1475 | /** |
1476 | * ldm_partition - Find out whether a device is a dynamic disk and handle it | 1476 | * ldm_partition - Find out whether a device is a dynamic disk and handle it |
1477 | * @pp: List of the partitions parsed so far | 1477 | * @state: Partition check state including device holding the LDM Database |
1478 | * @bdev: Device holding the LDM Database | ||
1479 | * | 1478 | * |
1480 | * This determines whether the device @bdev is a dynamic disk and if so creates | 1479 | * This determines whether the device @bdev is a dynamic disk and if so creates |
1481 | * the partitions necessary in the gendisk structure pointed to by @hd. | 1480 | * the partitions necessary in the gendisk structure pointed to by @hd. |
@@ -1485,21 +1484,21 @@ static void ldm_free_vblks (struct list_head *lh) | |||
1485 | * example, if the device is hda, we would have: hda1: LDM database, hda2, hda3, | 1484 | * example, if the device is hda, we would have: hda1: LDM database, hda2, hda3, |
1486 | * and so on: the actual data containing partitions. | 1485 | * and so on: the actual data containing partitions. |
1487 | * | 1486 | * |
1488 | * Return: 1 Success, @bdev is a dynamic disk and we handled it | 1487 | * Return: 1 Success, @state->bdev is a dynamic disk and we handled it |
1489 | * 0 Success, @bdev is not a dynamic disk | 1488 | * 0 Success, @state->bdev is not a dynamic disk |
1490 | * -1 An error occurred before enough information had been read | 1489 | * -1 An error occurred before enough information had been read |
1491 | * Or @bdev is a dynamic disk, but it may be corrupted | 1490 | * Or @state->bdev is a dynamic disk, but it may be corrupted |
1492 | */ | 1491 | */ |
1493 | int ldm_partition (struct parsed_partitions *pp, struct block_device *bdev) | 1492 | int ldm_partition(struct parsed_partitions *state) |
1494 | { | 1493 | { |
1495 | struct ldmdb *ldb; | 1494 | struct ldmdb *ldb; |
1496 | unsigned long base; | 1495 | unsigned long base; |
1497 | int result = -1; | 1496 | int result = -1; |
1498 | 1497 | ||
1499 | BUG_ON (!pp || !bdev); | 1498 | BUG_ON(!state); |
1500 | 1499 | ||
1501 | /* Look for signs of a Dynamic Disk */ | 1500 | /* Look for signs of a Dynamic Disk */ |
1502 | if (!ldm_validate_partition_table (bdev)) | 1501 | if (!ldm_validate_partition_table(state)) |
1503 | return 0; | 1502 | return 0; |
1504 | 1503 | ||
1505 | ldb = kmalloc (sizeof (*ldb), GFP_KERNEL); | 1504 | ldb = kmalloc (sizeof (*ldb), GFP_KERNEL); |
@@ -1509,15 +1508,15 @@ int ldm_partition (struct parsed_partitions *pp, struct block_device *bdev) | |||
1509 | } | 1508 | } |
1510 | 1509 | ||
1511 | /* Parse and check privheads. */ | 1510 | /* Parse and check privheads. */ |
1512 | if (!ldm_validate_privheads (bdev, &ldb->ph)) | 1511 | if (!ldm_validate_privheads(state, &ldb->ph)) |
1513 | goto out; /* Already logged */ | 1512 | goto out; /* Already logged */ |
1514 | 1513 | ||
1515 | /* All further references are relative to base (database start). */ | 1514 | /* All further references are relative to base (database start). */ |
1516 | base = ldb->ph.config_start; | 1515 | base = ldb->ph.config_start; |
1517 | 1516 | ||
1518 | /* Parse and check tocs and vmdb. */ | 1517 | /* Parse and check tocs and vmdb. */ |
1519 | if (!ldm_validate_tocblocks (bdev, base, ldb) || | 1518 | if (!ldm_validate_tocblocks(state, base, ldb) || |
1520 | !ldm_validate_vmdb (bdev, base, ldb)) | 1519 | !ldm_validate_vmdb(state, base, ldb)) |
1521 | goto out; /* Already logged */ | 1520 | goto out; /* Already logged */ |
1522 | 1521 | ||
1523 | /* Initialize vblk lists in ldmdb struct */ | 1522 | /* Initialize vblk lists in ldmdb struct */ |
@@ -1527,13 +1526,13 @@ int ldm_partition (struct parsed_partitions *pp, struct block_device *bdev) | |||
1527 | INIT_LIST_HEAD (&ldb->v_comp); | 1526 | INIT_LIST_HEAD (&ldb->v_comp); |
1528 | INIT_LIST_HEAD (&ldb->v_part); | 1527 | INIT_LIST_HEAD (&ldb->v_part); |
1529 | 1528 | ||
1530 | if (!ldm_get_vblks (bdev, base, ldb)) { | 1529 | if (!ldm_get_vblks(state, base, ldb)) { |
1531 | ldm_crit ("Failed to read the VBLKs from the database."); | 1530 | ldm_crit ("Failed to read the VBLKs from the database."); |
1532 | goto cleanup; | 1531 | goto cleanup; |
1533 | } | 1532 | } |
1534 | 1533 | ||
1535 | /* Finally, create the data partition devices. */ | 1534 | /* Finally, create the data partition devices. */ |
1536 | if (ldm_create_data_partitions (pp, ldb)) { | 1535 | if (ldm_create_data_partitions(state, ldb)) { |
1537 | ldm_debug ("Parsed LDM database successfully."); | 1536 | ldm_debug ("Parsed LDM database successfully."); |
1538 | result = 1; | 1537 | result = 1; |
1539 | } | 1538 | } |
diff --git a/fs/partitions/ldm.h b/fs/partitions/ldm.h index 30e08e809c1d..d1fb50b28d86 100644 --- a/fs/partitions/ldm.h +++ b/fs/partitions/ldm.h | |||
@@ -209,7 +209,7 @@ struct ldmdb { /* Cache of the database */ | |||
209 | struct list_head v_part; | 209 | struct list_head v_part; |
210 | }; | 210 | }; |
211 | 211 | ||
212 | int ldm_partition (struct parsed_partitions *state, struct block_device *bdev); | 212 | int ldm_partition(struct parsed_partitions *state); |
213 | 213 | ||
214 | #endif /* _FS_PT_LDM_H_ */ | 214 | #endif /* _FS_PT_LDM_H_ */ |
215 | 215 | ||
diff --git a/fs/partitions/mac.c b/fs/partitions/mac.c index d4a0fad3563b..74465ff7c263 100644 --- a/fs/partitions/mac.c +++ b/fs/partitions/mac.c | |||
@@ -27,7 +27,7 @@ static inline void mac_fix_string(char *stg, int len) | |||
27 | stg[i] = 0; | 27 | stg[i] = 0; |
28 | } | 28 | } |
29 | 29 | ||
30 | int mac_partition(struct parsed_partitions *state, struct block_device *bdev) | 30 | int mac_partition(struct parsed_partitions *state) |
31 | { | 31 | { |
32 | int slot = 1; | 32 | int slot = 1; |
33 | Sector sect; | 33 | Sector sect; |
@@ -42,7 +42,7 @@ int mac_partition(struct parsed_partitions *state, struct block_device *bdev) | |||
42 | struct mac_driver_desc *md; | 42 | struct mac_driver_desc *md; |
43 | 43 | ||
44 | /* Get 0th block and look at the first partition map entry. */ | 44 | /* Get 0th block and look at the first partition map entry. */ |
45 | md = (struct mac_driver_desc *) read_dev_sector(bdev, 0, §); | 45 | md = read_part_sector(state, 0, §); |
46 | if (!md) | 46 | if (!md) |
47 | return -1; | 47 | return -1; |
48 | if (be16_to_cpu(md->signature) != MAC_DRIVER_MAGIC) { | 48 | if (be16_to_cpu(md->signature) != MAC_DRIVER_MAGIC) { |
@@ -51,7 +51,7 @@ int mac_partition(struct parsed_partitions *state, struct block_device *bdev) | |||
51 | } | 51 | } |
52 | secsize = be16_to_cpu(md->block_size); | 52 | secsize = be16_to_cpu(md->block_size); |
53 | put_dev_sector(sect); | 53 | put_dev_sector(sect); |
54 | data = read_dev_sector(bdev, secsize/512, §); | 54 | data = read_part_sector(state, secsize/512, §); |
55 | if (!data) | 55 | if (!data) |
56 | return -1; | 56 | return -1; |
57 | part = (struct mac_partition *) (data + secsize%512); | 57 | part = (struct mac_partition *) (data + secsize%512); |
@@ -64,7 +64,7 @@ int mac_partition(struct parsed_partitions *state, struct block_device *bdev) | |||
64 | for (blk = 1; blk <= blocks_in_map; ++blk) { | 64 | for (blk = 1; blk <= blocks_in_map; ++blk) { |
65 | int pos = blk * secsize; | 65 | int pos = blk * secsize; |
66 | put_dev_sector(sect); | 66 | put_dev_sector(sect); |
67 | data = read_dev_sector(bdev, pos/512, §); | 67 | data = read_part_sector(state, pos/512, §); |
68 | if (!data) | 68 | if (!data) |
69 | return -1; | 69 | return -1; |
70 | part = (struct mac_partition *) (data + pos%512); | 70 | part = (struct mac_partition *) (data + pos%512); |
@@ -75,7 +75,7 @@ int mac_partition(struct parsed_partitions *state, struct block_device *bdev) | |||
75 | be32_to_cpu(part->block_count) * (secsize/512)); | 75 | be32_to_cpu(part->block_count) * (secsize/512)); |
76 | 76 | ||
77 | if (!strnicmp(part->type, "Linux_RAID", 10)) | 77 | if (!strnicmp(part->type, "Linux_RAID", 10)) |
78 | state->parts[slot].flags = 1; | 78 | state->parts[slot].flags = ADDPART_FLAG_RAID; |
79 | #ifdef CONFIG_PPC_PMAC | 79 | #ifdef CONFIG_PPC_PMAC |
80 | /* | 80 | /* |
81 | * If this is the first bootable partition, tell the | 81 | * If this is the first bootable partition, tell the |
@@ -123,7 +123,8 @@ int mac_partition(struct parsed_partitions *state, struct block_device *bdev) | |||
123 | } | 123 | } |
124 | #ifdef CONFIG_PPC_PMAC | 124 | #ifdef CONFIG_PPC_PMAC |
125 | if (found_root_goodness) | 125 | if (found_root_goodness) |
126 | note_bootable_part(bdev->bd_dev, found_root, found_root_goodness); | 126 | note_bootable_part(state->bdev->bd_dev, found_root, |
127 | found_root_goodness); | ||
127 | #endif | 128 | #endif |
128 | 129 | ||
129 | put_dev_sector(sect); | 130 | put_dev_sector(sect); |
diff --git a/fs/partitions/mac.h b/fs/partitions/mac.h index bbf26e1386fa..3c7d98436380 100644 --- a/fs/partitions/mac.h +++ b/fs/partitions/mac.h | |||
@@ -41,4 +41,4 @@ struct mac_driver_desc { | |||
41 | /* ... more stuff */ | 41 | /* ... more stuff */ |
42 | }; | 42 | }; |
43 | 43 | ||
44 | int mac_partition(struct parsed_partitions *state, struct block_device *bdev); | 44 | int mac_partition(struct parsed_partitions *state); |
diff --git a/fs/partitions/msdos.c b/fs/partitions/msdos.c index 90be97f1f5a8..15bfb7b1e044 100644 --- a/fs/partitions/msdos.c +++ b/fs/partitions/msdos.c | |||
@@ -64,7 +64,7 @@ msdos_magic_present(unsigned char *p) | |||
64 | #define AIX_LABEL_MAGIC2 0xC2 | 64 | #define AIX_LABEL_MAGIC2 0xC2 |
65 | #define AIX_LABEL_MAGIC3 0xD4 | 65 | #define AIX_LABEL_MAGIC3 0xD4 |
66 | #define AIX_LABEL_MAGIC4 0xC1 | 66 | #define AIX_LABEL_MAGIC4 0xC1 |
67 | static int aix_magic_present(unsigned char *p, struct block_device *bdev) | 67 | static int aix_magic_present(struct parsed_partitions *state, unsigned char *p) |
68 | { | 68 | { |
69 | struct partition *pt = (struct partition *) (p + 0x1be); | 69 | struct partition *pt = (struct partition *) (p + 0x1be); |
70 | Sector sect; | 70 | Sector sect; |
@@ -85,7 +85,7 @@ static int aix_magic_present(unsigned char *p, struct block_device *bdev) | |||
85 | is_extended_partition(pt)) | 85 | is_extended_partition(pt)) |
86 | return 0; | 86 | return 0; |
87 | } | 87 | } |
88 | d = read_dev_sector(bdev, 7, §); | 88 | d = read_part_sector(state, 7, §); |
89 | if (d) { | 89 | if (d) { |
90 | if (d[0] == '_' && d[1] == 'L' && d[2] == 'V' && d[3] == 'M') | 90 | if (d[0] == '_' && d[1] == 'L' && d[2] == 'V' && d[3] == 'M') |
91 | ret = 1; | 91 | ret = 1; |
@@ -105,15 +105,14 @@ static int aix_magic_present(unsigned char *p, struct block_device *bdev) | |||
105 | * only for the actual data partitions. | 105 | * only for the actual data partitions. |
106 | */ | 106 | */ |
107 | 107 | ||
108 | static void | 108 | static void parse_extended(struct parsed_partitions *state, |
109 | parse_extended(struct parsed_partitions *state, struct block_device *bdev, | 109 | sector_t first_sector, sector_t first_size) |
110 | sector_t first_sector, sector_t first_size) | ||
111 | { | 110 | { |
112 | struct partition *p; | 111 | struct partition *p; |
113 | Sector sect; | 112 | Sector sect; |
114 | unsigned char *data; | 113 | unsigned char *data; |
115 | sector_t this_sector, this_size; | 114 | sector_t this_sector, this_size; |
116 | sector_t sector_size = bdev_logical_block_size(bdev) / 512; | 115 | sector_t sector_size = bdev_logical_block_size(state->bdev) / 512; |
117 | int loopct = 0; /* number of links followed | 116 | int loopct = 0; /* number of links followed |
118 | without finding a data partition */ | 117 | without finding a data partition */ |
119 | int i; | 118 | int i; |
@@ -126,7 +125,7 @@ parse_extended(struct parsed_partitions *state, struct block_device *bdev, | |||
126 | return; | 125 | return; |
127 | if (state->next == state->limit) | 126 | if (state->next == state->limit) |
128 | return; | 127 | return; |
129 | data = read_dev_sector(bdev, this_sector, §); | 128 | data = read_part_sector(state, this_sector, §); |
130 | if (!data) | 129 | if (!data) |
131 | return; | 130 | return; |
132 | 131 | ||
@@ -198,9 +197,8 @@ done: | |||
198 | /* james@bpgc.com: Solaris has a nasty indicator: 0x82 which also | 197 | /* james@bpgc.com: Solaris has a nasty indicator: 0x82 which also |
199 | indicates linux swap. Be careful before believing this is Solaris. */ | 198 | indicates linux swap. Be careful before believing this is Solaris. */ |
200 | 199 | ||
201 | static void | 200 | static void parse_solaris_x86(struct parsed_partitions *state, |
202 | parse_solaris_x86(struct parsed_partitions *state, struct block_device *bdev, | 201 | sector_t offset, sector_t size, int origin) |
203 | sector_t offset, sector_t size, int origin) | ||
204 | { | 202 | { |
205 | #ifdef CONFIG_SOLARIS_X86_PARTITION | 203 | #ifdef CONFIG_SOLARIS_X86_PARTITION |
206 | Sector sect; | 204 | Sector sect; |
@@ -208,7 +206,7 @@ parse_solaris_x86(struct parsed_partitions *state, struct block_device *bdev, | |||
208 | int i; | 206 | int i; |
209 | short max_nparts; | 207 | short max_nparts; |
210 | 208 | ||
211 | v = (struct solaris_x86_vtoc *)read_dev_sector(bdev, offset+1, §); | 209 | v = read_part_sector(state, offset + 1, §); |
212 | if (!v) | 210 | if (!v) |
213 | return; | 211 | return; |
214 | if (le32_to_cpu(v->v_sanity) != SOLARIS_X86_VTOC_SANE) { | 212 | if (le32_to_cpu(v->v_sanity) != SOLARIS_X86_VTOC_SANE) { |
@@ -245,16 +243,15 @@ parse_solaris_x86(struct parsed_partitions *state, struct block_device *bdev, | |||
245 | * Create devices for BSD partitions listed in a disklabel, under a | 243 | * Create devices for BSD partitions listed in a disklabel, under a |
246 | * dos-like partition. See parse_extended() for more information. | 244 | * dos-like partition. See parse_extended() for more information. |
247 | */ | 245 | */ |
248 | static void | 246 | static void parse_bsd(struct parsed_partitions *state, |
249 | parse_bsd(struct parsed_partitions *state, struct block_device *bdev, | 247 | sector_t offset, sector_t size, int origin, char *flavour, |
250 | sector_t offset, sector_t size, int origin, char *flavour, | 248 | int max_partitions) |
251 | int max_partitions) | ||
252 | { | 249 | { |
253 | Sector sect; | 250 | Sector sect; |
254 | struct bsd_disklabel *l; | 251 | struct bsd_disklabel *l; |
255 | struct bsd_partition *p; | 252 | struct bsd_partition *p; |
256 | 253 | ||
257 | l = (struct bsd_disklabel *)read_dev_sector(bdev, offset+1, §); | 254 | l = read_part_sector(state, offset + 1, §); |
258 | if (!l) | 255 | if (!l) |
259 | return; | 256 | return; |
260 | if (le32_to_cpu(l->d_magic) != BSD_DISKMAGIC) { | 257 | if (le32_to_cpu(l->d_magic) != BSD_DISKMAGIC) { |
@@ -291,33 +288,28 @@ parse_bsd(struct parsed_partitions *state, struct block_device *bdev, | |||
291 | } | 288 | } |
292 | #endif | 289 | #endif |
293 | 290 | ||
294 | static void | 291 | static void parse_freebsd(struct parsed_partitions *state, |
295 | parse_freebsd(struct parsed_partitions *state, struct block_device *bdev, | 292 | sector_t offset, sector_t size, int origin) |
296 | sector_t offset, sector_t size, int origin) | ||
297 | { | 293 | { |
298 | #ifdef CONFIG_BSD_DISKLABEL | 294 | #ifdef CONFIG_BSD_DISKLABEL |
299 | parse_bsd(state, bdev, offset, size, origin, | 295 | parse_bsd(state, offset, size, origin, "bsd", BSD_MAXPARTITIONS); |
300 | "bsd", BSD_MAXPARTITIONS); | ||
301 | #endif | 296 | #endif |
302 | } | 297 | } |
303 | 298 | ||
304 | static void | 299 | static void parse_netbsd(struct parsed_partitions *state, |
305 | parse_netbsd(struct parsed_partitions *state, struct block_device *bdev, | 300 | sector_t offset, sector_t size, int origin) |
306 | sector_t offset, sector_t size, int origin) | ||
307 | { | 301 | { |
308 | #ifdef CONFIG_BSD_DISKLABEL | 302 | #ifdef CONFIG_BSD_DISKLABEL |
309 | parse_bsd(state, bdev, offset, size, origin, | 303 | parse_bsd(state, offset, size, origin, "netbsd", BSD_MAXPARTITIONS); |
310 | "netbsd", BSD_MAXPARTITIONS); | ||
311 | #endif | 304 | #endif |
312 | } | 305 | } |
313 | 306 | ||
314 | static void | 307 | static void parse_openbsd(struct parsed_partitions *state, |
315 | parse_openbsd(struct parsed_partitions *state, struct block_device *bdev, | 308 | sector_t offset, sector_t size, int origin) |
316 | sector_t offset, sector_t size, int origin) | ||
317 | { | 309 | { |
318 | #ifdef CONFIG_BSD_DISKLABEL | 310 | #ifdef CONFIG_BSD_DISKLABEL |
319 | parse_bsd(state, bdev, offset, size, origin, | 311 | parse_bsd(state, offset, size, origin, "openbsd", |
320 | "openbsd", OPENBSD_MAXPARTITIONS); | 312 | OPENBSD_MAXPARTITIONS); |
321 | #endif | 313 | #endif |
322 | } | 314 | } |
323 | 315 | ||
@@ -325,16 +317,15 @@ parse_openbsd(struct parsed_partitions *state, struct block_device *bdev, | |||
325 | * Create devices for Unixware partitions listed in a disklabel, under a | 317 | * Create devices for Unixware partitions listed in a disklabel, under a |
326 | * dos-like partition. See parse_extended() for more information. | 318 | * dos-like partition. See parse_extended() for more information. |
327 | */ | 319 | */ |
328 | static void | 320 | static void parse_unixware(struct parsed_partitions *state, |
329 | parse_unixware(struct parsed_partitions *state, struct block_device *bdev, | 321 | sector_t offset, sector_t size, int origin) |
330 | sector_t offset, sector_t size, int origin) | ||
331 | { | 322 | { |
332 | #ifdef CONFIG_UNIXWARE_DISKLABEL | 323 | #ifdef CONFIG_UNIXWARE_DISKLABEL |
333 | Sector sect; | 324 | Sector sect; |
334 | struct unixware_disklabel *l; | 325 | struct unixware_disklabel *l; |
335 | struct unixware_slice *p; | 326 | struct unixware_slice *p; |
336 | 327 | ||
337 | l = (struct unixware_disklabel *)read_dev_sector(bdev, offset+29, §); | 328 | l = read_part_sector(state, offset + 29, §); |
338 | if (!l) | 329 | if (!l) |
339 | return; | 330 | return; |
340 | if (le32_to_cpu(l->d_magic) != UNIXWARE_DISKMAGIC || | 331 | if (le32_to_cpu(l->d_magic) != UNIXWARE_DISKMAGIC || |
@@ -365,9 +356,8 @@ parse_unixware(struct parsed_partitions *state, struct block_device *bdev, | |||
365 | * Anand Krishnamurthy <anandk@wiproge.med.ge.com> | 356 | * Anand Krishnamurthy <anandk@wiproge.med.ge.com> |
366 | * Rajeev V. Pillai <rajeevvp@yahoo.com> | 357 | * Rajeev V. Pillai <rajeevvp@yahoo.com> |
367 | */ | 358 | */ |
368 | static void | 359 | static void parse_minix(struct parsed_partitions *state, |
369 | parse_minix(struct parsed_partitions *state, struct block_device *bdev, | 360 | sector_t offset, sector_t size, int origin) |
370 | sector_t offset, sector_t size, int origin) | ||
371 | { | 361 | { |
372 | #ifdef CONFIG_MINIX_SUBPARTITION | 362 | #ifdef CONFIG_MINIX_SUBPARTITION |
373 | Sector sect; | 363 | Sector sect; |
@@ -375,7 +365,7 @@ parse_minix(struct parsed_partitions *state, struct block_device *bdev, | |||
375 | struct partition *p; | 365 | struct partition *p; |
376 | int i; | 366 | int i; |
377 | 367 | ||
378 | data = read_dev_sector(bdev, offset, §); | 368 | data = read_part_sector(state, offset, §); |
379 | if (!data) | 369 | if (!data) |
380 | return; | 370 | return; |
381 | 371 | ||
@@ -404,8 +394,7 @@ parse_minix(struct parsed_partitions *state, struct block_device *bdev, | |||
404 | 394 | ||
405 | static struct { | 395 | static struct { |
406 | unsigned char id; | 396 | unsigned char id; |
407 | void (*parse)(struct parsed_partitions *, struct block_device *, | 397 | void (*parse)(struct parsed_partitions *, sector_t, sector_t, int); |
408 | sector_t, sector_t, int); | ||
409 | } subtypes[] = { | 398 | } subtypes[] = { |
410 | {FREEBSD_PARTITION, parse_freebsd}, | 399 | {FREEBSD_PARTITION, parse_freebsd}, |
411 | {NETBSD_PARTITION, parse_netbsd}, | 400 | {NETBSD_PARTITION, parse_netbsd}, |
@@ -417,16 +406,16 @@ static struct { | |||
417 | {0, NULL}, | 406 | {0, NULL}, |
418 | }; | 407 | }; |
419 | 408 | ||
420 | int msdos_partition(struct parsed_partitions *state, struct block_device *bdev) | 409 | int msdos_partition(struct parsed_partitions *state) |
421 | { | 410 | { |
422 | sector_t sector_size = bdev_logical_block_size(bdev) / 512; | 411 | sector_t sector_size = bdev_logical_block_size(state->bdev) / 512; |
423 | Sector sect; | 412 | Sector sect; |
424 | unsigned char *data; | 413 | unsigned char *data; |
425 | struct partition *p; | 414 | struct partition *p; |
426 | struct fat_boot_sector *fb; | 415 | struct fat_boot_sector *fb; |
427 | int slot; | 416 | int slot; |
428 | 417 | ||
429 | data = read_dev_sector(bdev, 0, §); | 418 | data = read_part_sector(state, 0, §); |
430 | if (!data) | 419 | if (!data) |
431 | return -1; | 420 | return -1; |
432 | if (!msdos_magic_present(data + 510)) { | 421 | if (!msdos_magic_present(data + 510)) { |
@@ -434,7 +423,7 @@ int msdos_partition(struct parsed_partitions *state, struct block_device *bdev) | |||
434 | return 0; | 423 | return 0; |
435 | } | 424 | } |
436 | 425 | ||
437 | if (aix_magic_present(data, bdev)) { | 426 | if (aix_magic_present(state, data)) { |
438 | put_dev_sector(sect); | 427 | put_dev_sector(sect); |
439 | printk( " [AIX]"); | 428 | printk( " [AIX]"); |
440 | return 0; | 429 | return 0; |
@@ -503,13 +492,13 @@ int msdos_partition(struct parsed_partitions *state, struct block_device *bdev) | |||
503 | put_partition(state, slot, start, n); | 492 | put_partition(state, slot, start, n); |
504 | 493 | ||
505 | printk(" <"); | 494 | printk(" <"); |
506 | parse_extended(state, bdev, start, size); | 495 | parse_extended(state, start, size); |
507 | printk(" >"); | 496 | printk(" >"); |
508 | continue; | 497 | continue; |
509 | } | 498 | } |
510 | put_partition(state, slot, start, size); | 499 | put_partition(state, slot, start, size); |
511 | if (SYS_IND(p) == LINUX_RAID_PARTITION) | 500 | if (SYS_IND(p) == LINUX_RAID_PARTITION) |
512 | state->parts[slot].flags = 1; | 501 | state->parts[slot].flags = ADDPART_FLAG_RAID; |
513 | if (SYS_IND(p) == DM6_PARTITION) | 502 | if (SYS_IND(p) == DM6_PARTITION) |
514 | printk("[DM]"); | 503 | printk("[DM]"); |
515 | if (SYS_IND(p) == EZD_PARTITION) | 504 | if (SYS_IND(p) == EZD_PARTITION) |
@@ -532,8 +521,8 @@ int msdos_partition(struct parsed_partitions *state, struct block_device *bdev) | |||
532 | 521 | ||
533 | if (!subtypes[n].parse) | 522 | if (!subtypes[n].parse) |
534 | continue; | 523 | continue; |
535 | subtypes[n].parse(state, bdev, start_sect(p)*sector_size, | 524 | subtypes[n].parse(state, start_sect(p) * sector_size, |
536 | nr_sects(p)*sector_size, slot); | 525 | nr_sects(p) * sector_size, slot); |
537 | } | 526 | } |
538 | put_dev_sector(sect); | 527 | put_dev_sector(sect); |
539 | return 1; | 528 | return 1; |
diff --git a/fs/partitions/msdos.h b/fs/partitions/msdos.h index 01e5e0b6902d..38c781c490b3 100644 --- a/fs/partitions/msdos.h +++ b/fs/partitions/msdos.h | |||
@@ -4,5 +4,5 @@ | |||
4 | 4 | ||
5 | #define MSDOS_LABEL_MAGIC 0xAA55 | 5 | #define MSDOS_LABEL_MAGIC 0xAA55 |
6 | 6 | ||
7 | int msdos_partition(struct parsed_partitions *state, struct block_device *bdev); | 7 | int msdos_partition(struct parsed_partitions *state); |
8 | 8 | ||
diff --git a/fs/partitions/osf.c b/fs/partitions/osf.c index c05c17bc5df3..fc22b85d436a 100644 --- a/fs/partitions/osf.c +++ b/fs/partitions/osf.c | |||
@@ -10,7 +10,7 @@ | |||
10 | #include "check.h" | 10 | #include "check.h" |
11 | #include "osf.h" | 11 | #include "osf.h" |
12 | 12 | ||
13 | int osf_partition(struct parsed_partitions *state, struct block_device *bdev) | 13 | int osf_partition(struct parsed_partitions *state) |
14 | { | 14 | { |
15 | int i; | 15 | int i; |
16 | int slot = 1; | 16 | int slot = 1; |
@@ -49,7 +49,7 @@ int osf_partition(struct parsed_partitions *state, struct block_device *bdev) | |||
49 | } * label; | 49 | } * label; |
50 | struct d_partition * partition; | 50 | struct d_partition * partition; |
51 | 51 | ||
52 | data = read_dev_sector(bdev, 0, §); | 52 | data = read_part_sector(state, 0, §); |
53 | if (!data) | 53 | if (!data) |
54 | return -1; | 54 | return -1; |
55 | 55 | ||
diff --git a/fs/partitions/osf.h b/fs/partitions/osf.h index 427b8eab314b..20ed2315ec16 100644 --- a/fs/partitions/osf.h +++ b/fs/partitions/osf.h | |||
@@ -4,4 +4,4 @@ | |||
4 | 4 | ||
5 | #define DISKLABELMAGIC (0x82564557UL) | 5 | #define DISKLABELMAGIC (0x82564557UL) |
6 | 6 | ||
7 | int osf_partition(struct parsed_partitions *state, struct block_device *bdev); | 7 | int osf_partition(struct parsed_partitions *state); |
diff --git a/fs/partitions/sgi.c b/fs/partitions/sgi.c index ed5ac83fe83a..43b1df9aa16c 100644 --- a/fs/partitions/sgi.c +++ b/fs/partitions/sgi.c | |||
@@ -27,7 +27,7 @@ struct sgi_disklabel { | |||
27 | __be32 _unused1; /* Padding */ | 27 | __be32 _unused1; /* Padding */ |
28 | }; | 28 | }; |
29 | 29 | ||
30 | int sgi_partition(struct parsed_partitions *state, struct block_device *bdev) | 30 | int sgi_partition(struct parsed_partitions *state) |
31 | { | 31 | { |
32 | int i, csum; | 32 | int i, csum; |
33 | __be32 magic; | 33 | __be32 magic; |
@@ -39,7 +39,7 @@ int sgi_partition(struct parsed_partitions *state, struct block_device *bdev) | |||
39 | struct sgi_partition *p; | 39 | struct sgi_partition *p; |
40 | char b[BDEVNAME_SIZE]; | 40 | char b[BDEVNAME_SIZE]; |
41 | 41 | ||
42 | label = (struct sgi_disklabel *) read_dev_sector(bdev, 0, §); | 42 | label = read_part_sector(state, 0, §); |
43 | if (!label) | 43 | if (!label) |
44 | return -1; | 44 | return -1; |
45 | p = &label->partitions[0]; | 45 | p = &label->partitions[0]; |
@@ -57,7 +57,7 @@ int sgi_partition(struct parsed_partitions *state, struct block_device *bdev) | |||
57 | } | 57 | } |
58 | if(csum) { | 58 | if(csum) { |
59 | printk(KERN_WARNING "Dev %s SGI disklabel: csum bad, label corrupted\n", | 59 | printk(KERN_WARNING "Dev %s SGI disklabel: csum bad, label corrupted\n", |
60 | bdevname(bdev, b)); | 60 | bdevname(state->bdev, b)); |
61 | put_dev_sector(sect); | 61 | put_dev_sector(sect); |
62 | return 0; | 62 | return 0; |
63 | } | 63 | } |
diff --git a/fs/partitions/sgi.h b/fs/partitions/sgi.h index 5d5595c09928..b9553ebdd5a9 100644 --- a/fs/partitions/sgi.h +++ b/fs/partitions/sgi.h | |||
@@ -2,7 +2,7 @@ | |||
2 | * fs/partitions/sgi.h | 2 | * fs/partitions/sgi.h |
3 | */ | 3 | */ |
4 | 4 | ||
5 | extern int sgi_partition(struct parsed_partitions *state, struct block_device *bdev); | 5 | extern int sgi_partition(struct parsed_partitions *state); |
6 | 6 | ||
7 | #define SGI_LABEL_MAGIC 0x0be5a941 | 7 | #define SGI_LABEL_MAGIC 0x0be5a941 |
8 | 8 | ||
diff --git a/fs/partitions/sun.c b/fs/partitions/sun.c index c95e6a62c01d..a32660e25f7f 100644 --- a/fs/partitions/sun.c +++ b/fs/partitions/sun.c | |||
@@ -10,7 +10,7 @@ | |||
10 | #include "check.h" | 10 | #include "check.h" |
11 | #include "sun.h" | 11 | #include "sun.h" |
12 | 12 | ||
13 | int sun_partition(struct parsed_partitions *state, struct block_device *bdev) | 13 | int sun_partition(struct parsed_partitions *state) |
14 | { | 14 | { |
15 | int i; | 15 | int i; |
16 | __be16 csum; | 16 | __be16 csum; |
@@ -61,7 +61,7 @@ int sun_partition(struct parsed_partitions *state, struct block_device *bdev) | |||
61 | int use_vtoc; | 61 | int use_vtoc; |
62 | int nparts; | 62 | int nparts; |
63 | 63 | ||
64 | label = (struct sun_disklabel *)read_dev_sector(bdev, 0, §); | 64 | label = read_part_sector(state, 0, §); |
65 | if (!label) | 65 | if (!label) |
66 | return -1; | 66 | return -1; |
67 | 67 | ||
@@ -78,7 +78,7 @@ int sun_partition(struct parsed_partitions *state, struct block_device *bdev) | |||
78 | csum ^= *ush--; | 78 | csum ^= *ush--; |
79 | if (csum) { | 79 | if (csum) { |
80 | printk("Dev %s Sun disklabel: Csum bad, label corrupted\n", | 80 | printk("Dev %s Sun disklabel: Csum bad, label corrupted\n", |
81 | bdevname(bdev, b)); | 81 | bdevname(state->bdev, b)); |
82 | put_dev_sector(sect); | 82 | put_dev_sector(sect); |
83 | return 0; | 83 | return 0; |
84 | } | 84 | } |
diff --git a/fs/partitions/sun.h b/fs/partitions/sun.h index 7f864d1f86d4..2424baa8319f 100644 --- a/fs/partitions/sun.h +++ b/fs/partitions/sun.h | |||
@@ -5,4 +5,4 @@ | |||
5 | #define SUN_LABEL_MAGIC 0xDABE | 5 | #define SUN_LABEL_MAGIC 0xDABE |
6 | #define SUN_VTOC_SANITY 0x600DDEEE | 6 | #define SUN_VTOC_SANITY 0x600DDEEE |
7 | 7 | ||
8 | int sun_partition(struct parsed_partitions *state, struct block_device *bdev); | 8 | int sun_partition(struct parsed_partitions *state); |
diff --git a/fs/partitions/sysv68.c b/fs/partitions/sysv68.c index 4eba27b78643..9030c864428e 100644 --- a/fs/partitions/sysv68.c +++ b/fs/partitions/sysv68.c | |||
@@ -46,7 +46,7 @@ struct slice { | |||
46 | }; | 46 | }; |
47 | 47 | ||
48 | 48 | ||
49 | int sysv68_partition(struct parsed_partitions *state, struct block_device *bdev) | 49 | int sysv68_partition(struct parsed_partitions *state) |
50 | { | 50 | { |
51 | int i, slices; | 51 | int i, slices; |
52 | int slot = 1; | 52 | int slot = 1; |
@@ -55,7 +55,7 @@ int sysv68_partition(struct parsed_partitions *state, struct block_device *bdev) | |||
55 | struct dkblk0 *b; | 55 | struct dkblk0 *b; |
56 | struct slice *slice; | 56 | struct slice *slice; |
57 | 57 | ||
58 | data = read_dev_sector(bdev, 0, §); | 58 | data = read_part_sector(state, 0, §); |
59 | if (!data) | 59 | if (!data) |
60 | return -1; | 60 | return -1; |
61 | 61 | ||
@@ -68,7 +68,7 @@ int sysv68_partition(struct parsed_partitions *state, struct block_device *bdev) | |||
68 | i = be32_to_cpu(b->dk_ios.ios_slcblk); | 68 | i = be32_to_cpu(b->dk_ios.ios_slcblk); |
69 | put_dev_sector(sect); | 69 | put_dev_sector(sect); |
70 | 70 | ||
71 | data = read_dev_sector(bdev, i, §); | 71 | data = read_part_sector(state, i, §); |
72 | if (!data) | 72 | if (!data) |
73 | return -1; | 73 | return -1; |
74 | 74 | ||
diff --git a/fs/partitions/sysv68.h b/fs/partitions/sysv68.h index fa733f68431b..bf2f5ffa97ac 100644 --- a/fs/partitions/sysv68.h +++ b/fs/partitions/sysv68.h | |||
@@ -1 +1 @@ | |||
extern int sysv68_partition(struct parsed_partitions *state, struct block_device *bdev); | extern int sysv68_partition(struct parsed_partitions *state); | ||
diff --git a/fs/partitions/ultrix.c b/fs/partitions/ultrix.c index ec852c11dce4..db9eef260364 100644 --- a/fs/partitions/ultrix.c +++ b/fs/partitions/ultrix.c | |||
@@ -9,7 +9,7 @@ | |||
9 | #include "check.h" | 9 | #include "check.h" |
10 | #include "ultrix.h" | 10 | #include "ultrix.h" |
11 | 11 | ||
12 | int ultrix_partition(struct parsed_partitions *state, struct block_device *bdev) | 12 | int ultrix_partition(struct parsed_partitions *state) |
13 | { | 13 | { |
14 | int i; | 14 | int i; |
15 | Sector sect; | 15 | Sector sect; |
@@ -26,7 +26,7 @@ int ultrix_partition(struct parsed_partitions *state, struct block_device *bdev) | |||
26 | #define PT_MAGIC 0x032957 /* Partition magic number */ | 26 | #define PT_MAGIC 0x032957 /* Partition magic number */ |
27 | #define PT_VALID 1 /* Indicates if struct is valid */ | 27 | #define PT_VALID 1 /* Indicates if struct is valid */ |
28 | 28 | ||
29 | data = read_dev_sector(bdev, (16384 - sizeof(*label))/512, §); | 29 | data = read_part_sector(state, (16384 - sizeof(*label))/512, §); |
30 | if (!data) | 30 | if (!data) |
31 | return -1; | 31 | return -1; |
32 | 32 | ||
diff --git a/fs/partitions/ultrix.h b/fs/partitions/ultrix.h index a74bf8e2d370..a3cc00b2bded 100644 --- a/fs/partitions/ultrix.h +++ b/fs/partitions/ultrix.h | |||
@@ -2,4 +2,4 @@ | |||
2 | * fs/partitions/ultrix.h | 2 | * fs/partitions/ultrix.h |
3 | */ | 3 | */ |
4 | 4 | ||
5 | int ultrix_partition(struct parsed_partitions *state, struct block_device *bdev); | 5 | int ultrix_partition(struct parsed_partitions *state); |
@@ -11,6 +11,7 @@ | |||
11 | #include <linux/module.h> | 11 | #include <linux/module.h> |
12 | #include <linux/init.h> | 12 | #include <linux/init.h> |
13 | #include <linux/fs.h> | 13 | #include <linux/fs.h> |
14 | #include <linux/log2.h> | ||
14 | #include <linux/mount.h> | 15 | #include <linux/mount.h> |
15 | #include <linux/pipe_fs_i.h> | 16 | #include <linux/pipe_fs_i.h> |
16 | #include <linux/uio.h> | 17 | #include <linux/uio.h> |
@@ -18,11 +19,18 @@ | |||
18 | #include <linux/pagemap.h> | 19 | #include <linux/pagemap.h> |
19 | #include <linux/audit.h> | 20 | #include <linux/audit.h> |
20 | #include <linux/syscalls.h> | 21 | #include <linux/syscalls.h> |
22 | #include <linux/fcntl.h> | ||
21 | 23 | ||
22 | #include <asm/uaccess.h> | 24 | #include <asm/uaccess.h> |
23 | #include <asm/ioctls.h> | 25 | #include <asm/ioctls.h> |
24 | 26 | ||
25 | /* | 27 | /* |
28 | * The max size that a non-root user is allowed to grow the pipe. Can | ||
29 | * be set by root in /proc/sys/fs/pipe-max-pages | ||
30 | */ | ||
31 | unsigned int pipe_max_pages = PIPE_DEF_BUFFERS * 16; | ||
32 | |||
33 | /* | ||
26 | * We use a start+len construction, which provides full use of the | 34 | * We use a start+len construction, which provides full use of the |
27 | * allocated memory. | 35 | * allocated memory. |
28 | * -- Florian Coosmann (FGC) | 36 | * -- Florian Coosmann (FGC) |
@@ -390,7 +398,7 @@ redo: | |||
390 | if (!buf->len) { | 398 | if (!buf->len) { |
391 | buf->ops = NULL; | 399 | buf->ops = NULL; |
392 | ops->release(pipe, buf); | 400 | ops->release(pipe, buf); |
393 | curbuf = (curbuf + 1) & (PIPE_BUFFERS-1); | 401 | curbuf = (curbuf + 1) & (pipe->buffers - 1); |
394 | pipe->curbuf = curbuf; | 402 | pipe->curbuf = curbuf; |
395 | pipe->nrbufs = --bufs; | 403 | pipe->nrbufs = --bufs; |
396 | do_wakeup = 1; | 404 | do_wakeup = 1; |
@@ -472,7 +480,7 @@ pipe_write(struct kiocb *iocb, const struct iovec *_iov, | |||
472 | chars = total_len & (PAGE_SIZE-1); /* size of the last buffer */ | 480 | chars = total_len & (PAGE_SIZE-1); /* size of the last buffer */ |
473 | if (pipe->nrbufs && chars != 0) { | 481 | if (pipe->nrbufs && chars != 0) { |
474 | int lastbuf = (pipe->curbuf + pipe->nrbufs - 1) & | 482 | int lastbuf = (pipe->curbuf + pipe->nrbufs - 1) & |
475 | (PIPE_BUFFERS-1); | 483 | (pipe->buffers - 1); |
476 | struct pipe_buffer *buf = pipe->bufs + lastbuf; | 484 | struct pipe_buffer *buf = pipe->bufs + lastbuf; |
477 | const struct pipe_buf_operations *ops = buf->ops; | 485 | const struct pipe_buf_operations *ops = buf->ops; |
478 | int offset = buf->offset + buf->len; | 486 | int offset = buf->offset + buf->len; |
@@ -518,8 +526,8 @@ redo1: | |||
518 | break; | 526 | break; |
519 | } | 527 | } |
520 | bufs = pipe->nrbufs; | 528 | bufs = pipe->nrbufs; |
521 | if (bufs < PIPE_BUFFERS) { | 529 | if (bufs < pipe->buffers) { |
522 | int newbuf = (pipe->curbuf + bufs) & (PIPE_BUFFERS-1); | 530 | int newbuf = (pipe->curbuf + bufs) & (pipe->buffers-1); |
523 | struct pipe_buffer *buf = pipe->bufs + newbuf; | 531 | struct pipe_buffer *buf = pipe->bufs + newbuf; |
524 | struct page *page = pipe->tmp_page; | 532 | struct page *page = pipe->tmp_page; |
525 | char *src; | 533 | char *src; |
@@ -580,7 +588,7 @@ redo2: | |||
580 | if (!total_len) | 588 | if (!total_len) |
581 | break; | 589 | break; |
582 | } | 590 | } |
583 | if (bufs < PIPE_BUFFERS) | 591 | if (bufs < pipe->buffers) |
584 | continue; | 592 | continue; |
585 | if (filp->f_flags & O_NONBLOCK) { | 593 | if (filp->f_flags & O_NONBLOCK) { |
586 | if (!ret) | 594 | if (!ret) |
@@ -640,7 +648,7 @@ static long pipe_ioctl(struct file *filp, unsigned int cmd, unsigned long arg) | |||
640 | nrbufs = pipe->nrbufs; | 648 | nrbufs = pipe->nrbufs; |
641 | while (--nrbufs >= 0) { | 649 | while (--nrbufs >= 0) { |
642 | count += pipe->bufs[buf].len; | 650 | count += pipe->bufs[buf].len; |
643 | buf = (buf+1) & (PIPE_BUFFERS-1); | 651 | buf = (buf+1) & (pipe->buffers - 1); |
644 | } | 652 | } |
645 | mutex_unlock(&inode->i_mutex); | 653 | mutex_unlock(&inode->i_mutex); |
646 | 654 | ||
@@ -671,7 +679,7 @@ pipe_poll(struct file *filp, poll_table *wait) | |||
671 | } | 679 | } |
672 | 680 | ||
673 | if (filp->f_mode & FMODE_WRITE) { | 681 | if (filp->f_mode & FMODE_WRITE) { |
674 | mask |= (nrbufs < PIPE_BUFFERS) ? POLLOUT | POLLWRNORM : 0; | 682 | mask |= (nrbufs < pipe->buffers) ? POLLOUT | POLLWRNORM : 0; |
675 | /* | 683 | /* |
676 | * Most Unices do not set POLLERR for FIFOs but on Linux they | 684 | * Most Unices do not set POLLERR for FIFOs but on Linux they |
677 | * behave exactly like pipes for poll(). | 685 | * behave exactly like pipes for poll(). |
@@ -877,25 +885,32 @@ struct pipe_inode_info * alloc_pipe_info(struct inode *inode) | |||
877 | 885 | ||
878 | pipe = kzalloc(sizeof(struct pipe_inode_info), GFP_KERNEL); | 886 | pipe = kzalloc(sizeof(struct pipe_inode_info), GFP_KERNEL); |
879 | if (pipe) { | 887 | if (pipe) { |
880 | init_waitqueue_head(&pipe->wait); | 888 | pipe->bufs = kzalloc(sizeof(struct pipe_buffer) * PIPE_DEF_BUFFERS, GFP_KERNEL); |
881 | pipe->r_counter = pipe->w_counter = 1; | 889 | if (pipe->bufs) { |
882 | pipe->inode = inode; | 890 | init_waitqueue_head(&pipe->wait); |
891 | pipe->r_counter = pipe->w_counter = 1; | ||
892 | pipe->inode = inode; | ||
893 | pipe->buffers = PIPE_DEF_BUFFERS; | ||
894 | return pipe; | ||
895 | } | ||
896 | kfree(pipe); | ||
883 | } | 897 | } |
884 | 898 | ||
885 | return pipe; | 899 | return NULL; |
886 | } | 900 | } |
887 | 901 | ||
888 | void __free_pipe_info(struct pipe_inode_info *pipe) | 902 | void __free_pipe_info(struct pipe_inode_info *pipe) |
889 | { | 903 | { |
890 | int i; | 904 | int i; |
891 | 905 | ||
892 | for (i = 0; i < PIPE_BUFFERS; i++) { | 906 | for (i = 0; i < pipe->buffers; i++) { |
893 | struct pipe_buffer *buf = pipe->bufs + i; | 907 | struct pipe_buffer *buf = pipe->bufs + i; |
894 | if (buf->ops) | 908 | if (buf->ops) |
895 | buf->ops->release(pipe, buf); | 909 | buf->ops->release(pipe, buf); |
896 | } | 910 | } |
897 | if (pipe->tmp_page) | 911 | if (pipe->tmp_page) |
898 | __free_page(pipe->tmp_page); | 912 | __free_page(pipe->tmp_page); |
913 | kfree(pipe->bufs); | ||
899 | kfree(pipe); | 914 | kfree(pipe); |
900 | } | 915 | } |
901 | 916 | ||
@@ -1094,6 +1109,89 @@ SYSCALL_DEFINE1(pipe, int __user *, fildes) | |||
1094 | } | 1109 | } |
1095 | 1110 | ||
1096 | /* | 1111 | /* |
1112 | * Allocate a new array of pipe buffers and copy the info over. Returns the | ||
1113 | * pipe size if successful, or return -ERROR on error. | ||
1114 | */ | ||
1115 | static long pipe_set_size(struct pipe_inode_info *pipe, unsigned long arg) | ||
1116 | { | ||
1117 | struct pipe_buffer *bufs; | ||
1118 | |||
1119 | /* | ||
1120 | * Must be a power-of-2 currently | ||
1121 | */ | ||
1122 | if (!is_power_of_2(arg)) | ||
1123 | return -EINVAL; | ||
1124 | |||
1125 | /* | ||
1126 | * We can shrink the pipe, if arg >= pipe->nrbufs. Since we don't | ||
1127 | * expect a lot of shrink+grow operations, just free and allocate | ||
1128 | * again like we would do for growing. If the pipe currently | ||
1129 | * contains more buffers than arg, then return busy. | ||
1130 | */ | ||
1131 | if (arg < pipe->nrbufs) | ||
1132 | return -EBUSY; | ||
1133 | |||
1134 | bufs = kcalloc(arg, sizeof(struct pipe_buffer), GFP_KERNEL); | ||
1135 | if (unlikely(!bufs)) | ||
1136 | return -ENOMEM; | ||
1137 | |||
1138 | /* | ||
1139 | * The pipe array wraps around, so just start the new one at zero | ||
1140 | * and adjust the indexes. | ||
1141 | */ | ||
1142 | if (pipe->nrbufs) { | ||
1143 | const unsigned int tail = pipe->nrbufs & (pipe->buffers - 1); | ||
1144 | const unsigned int head = pipe->nrbufs - tail; | ||
1145 | |||
1146 | if (head) | ||
1147 | memcpy(bufs, pipe->bufs + pipe->curbuf, head * sizeof(struct pipe_buffer)); | ||
1148 | if (tail) | ||
1149 | memcpy(bufs + head, pipe->bufs + pipe->curbuf, tail * sizeof(struct pipe_buffer)); | ||
1150 | } | ||
1151 | |||
1152 | pipe->curbuf = 0; | ||
1153 | kfree(pipe->bufs); | ||
1154 | pipe->bufs = bufs; | ||
1155 | pipe->buffers = arg; | ||
1156 | return arg; | ||
1157 | } | ||
1158 | |||
1159 | long pipe_fcntl(struct file *file, unsigned int cmd, unsigned long arg) | ||
1160 | { | ||
1161 | struct pipe_inode_info *pipe; | ||
1162 | long ret; | ||
1163 | |||
1164 | pipe = file->f_path.dentry->d_inode->i_pipe; | ||
1165 | if (!pipe) | ||
1166 | return -EBADF; | ||
1167 | |||
1168 | mutex_lock(&pipe->inode->i_mutex); | ||
1169 | |||
1170 | switch (cmd) { | ||
1171 | case F_SETPIPE_SZ: | ||
1172 | if (!capable(CAP_SYS_ADMIN) && arg > pipe_max_pages) | ||
1173 | return -EINVAL; | ||
1174 | /* | ||
1175 | * The pipe needs to be at least 2 pages large to | ||
1176 | * guarantee POSIX behaviour. | ||
1177 | */ | ||
1178 | if (arg < 2) | ||
1179 | return -EINVAL; | ||
1180 | ret = pipe_set_size(pipe, arg); | ||
1181 | break; | ||
1182 | case F_GETPIPE_SZ: | ||
1183 | ret = pipe->buffers; | ||
1184 | break; | ||
1185 | default: | ||
1186 | ret = -EINVAL; | ||
1187 | break; | ||
1188 | } | ||
1189 | |||
1190 | mutex_unlock(&pipe->inode->i_mutex); | ||
1191 | return ret; | ||
1192 | } | ||
1193 | |||
1194 | /* | ||
1097 | * pipefs should _never_ be mounted by userland - too much of security hassle, | 1195 | * pipefs should _never_ be mounted by userland - too much of security hassle, |
1098 | * no real gain from having the whole whorehouse mounted. So we don't need | 1196 | * no real gain from having the whole whorehouse mounted. So we don't need |
1099 | * any operations on the root directory. However, we need a non-trivial | 1197 | * any operations on the root directory. However, we need a non-trivial |
diff --git a/fs/quota/quota.c b/fs/quota/quota.c index cfc78826da90..ce3dfd066f59 100644 --- a/fs/quota/quota.c +++ b/fs/quota/quota.c | |||
@@ -45,36 +45,22 @@ static int check_quotactl_permission(struct super_block *sb, int type, int cmd, | |||
45 | return security_quotactl(cmd, type, id, sb); | 45 | return security_quotactl(cmd, type, id, sb); |
46 | } | 46 | } |
47 | 47 | ||
48 | static void quota_sync_one(struct super_block *sb, void *arg) | ||
49 | { | ||
50 | if (sb->s_qcop && sb->s_qcop->quota_sync) | ||
51 | sb->s_qcop->quota_sync(sb, *(int *)arg, 1); | ||
52 | } | ||
53 | |||
48 | static int quota_sync_all(int type) | 54 | static int quota_sync_all(int type) |
49 | { | 55 | { |
50 | struct super_block *sb; | ||
51 | int ret; | 56 | int ret; |
52 | 57 | ||
53 | if (type >= MAXQUOTAS) | 58 | if (type >= MAXQUOTAS) |
54 | return -EINVAL; | 59 | return -EINVAL; |
55 | ret = security_quotactl(Q_SYNC, type, 0, NULL); | 60 | ret = security_quotactl(Q_SYNC, type, 0, NULL); |
56 | if (ret) | 61 | if (!ret) |
57 | return ret; | 62 | iterate_supers(quota_sync_one, &type); |
58 | 63 | return ret; | |
59 | spin_lock(&sb_lock); | ||
60 | restart: | ||
61 | list_for_each_entry(sb, &super_blocks, s_list) { | ||
62 | if (!sb->s_qcop || !sb->s_qcop->quota_sync) | ||
63 | continue; | ||
64 | |||
65 | sb->s_count++; | ||
66 | spin_unlock(&sb_lock); | ||
67 | down_read(&sb->s_umount); | ||
68 | if (sb->s_root) | ||
69 | sb->s_qcop->quota_sync(sb, type, 1); | ||
70 | up_read(&sb->s_umount); | ||
71 | spin_lock(&sb_lock); | ||
72 | if (__put_super_and_need_restart(sb)) | ||
73 | goto restart; | ||
74 | } | ||
75 | spin_unlock(&sb_lock); | ||
76 | |||
77 | return 0; | ||
78 | } | 64 | } |
79 | 65 | ||
80 | static int quota_quotaon(struct super_block *sb, int type, int cmd, qid_t id, | 66 | static int quota_quotaon(struct super_block *sb, int type, int cmd, qid_t id, |
diff --git a/fs/ramfs/inode.c b/fs/ramfs/inode.c index f47cd212dee1..a5ebae70dc6d 100644 --- a/fs/ramfs/inode.c +++ b/fs/ramfs/inode.c | |||
@@ -52,14 +52,13 @@ static struct backing_dev_info ramfs_backing_dev_info = { | |||
52 | BDI_CAP_READ_MAP | BDI_CAP_WRITE_MAP | BDI_CAP_EXEC_MAP, | 52 | BDI_CAP_READ_MAP | BDI_CAP_WRITE_MAP | BDI_CAP_EXEC_MAP, |
53 | }; | 53 | }; |
54 | 54 | ||
55 | struct inode *ramfs_get_inode(struct super_block *sb, int mode, dev_t dev) | 55 | struct inode *ramfs_get_inode(struct super_block *sb, |
56 | const struct inode *dir, int mode, dev_t dev) | ||
56 | { | 57 | { |
57 | struct inode * inode = new_inode(sb); | 58 | struct inode * inode = new_inode(sb); |
58 | 59 | ||
59 | if (inode) { | 60 | if (inode) { |
60 | inode->i_mode = mode; | 61 | inode_init_owner(inode, dir, mode); |
61 | inode->i_uid = current_fsuid(); | ||
62 | inode->i_gid = current_fsgid(); | ||
63 | inode->i_mapping->a_ops = &ramfs_aops; | 62 | inode->i_mapping->a_ops = &ramfs_aops; |
64 | inode->i_mapping->backing_dev_info = &ramfs_backing_dev_info; | 63 | inode->i_mapping->backing_dev_info = &ramfs_backing_dev_info; |
65 | mapping_set_gfp_mask(inode->i_mapping, GFP_HIGHUSER); | 64 | mapping_set_gfp_mask(inode->i_mapping, GFP_HIGHUSER); |
@@ -95,15 +94,10 @@ struct inode *ramfs_get_inode(struct super_block *sb, int mode, dev_t dev) | |||
95 | static int | 94 | static int |
96 | ramfs_mknod(struct inode *dir, struct dentry *dentry, int mode, dev_t dev) | 95 | ramfs_mknod(struct inode *dir, struct dentry *dentry, int mode, dev_t dev) |
97 | { | 96 | { |
98 | struct inode * inode = ramfs_get_inode(dir->i_sb, mode, dev); | 97 | struct inode * inode = ramfs_get_inode(dir->i_sb, dir, mode, dev); |
99 | int error = -ENOSPC; | 98 | int error = -ENOSPC; |
100 | 99 | ||
101 | if (inode) { | 100 | if (inode) { |
102 | if (dir->i_mode & S_ISGID) { | ||
103 | inode->i_gid = dir->i_gid; | ||
104 | if (S_ISDIR(mode)) | ||
105 | inode->i_mode |= S_ISGID; | ||
106 | } | ||
107 | d_instantiate(dentry, inode); | 101 | d_instantiate(dentry, inode); |
108 | dget(dentry); /* Extra count - pin the dentry in core */ | 102 | dget(dentry); /* Extra count - pin the dentry in core */ |
109 | error = 0; | 103 | error = 0; |
@@ -130,13 +124,11 @@ static int ramfs_symlink(struct inode * dir, struct dentry *dentry, const char * | |||
130 | struct inode *inode; | 124 | struct inode *inode; |
131 | int error = -ENOSPC; | 125 | int error = -ENOSPC; |
132 | 126 | ||
133 | inode = ramfs_get_inode(dir->i_sb, S_IFLNK|S_IRWXUGO, 0); | 127 | inode = ramfs_get_inode(dir->i_sb, dir, S_IFLNK|S_IRWXUGO, 0); |
134 | if (inode) { | 128 | if (inode) { |
135 | int l = strlen(symname)+1; | 129 | int l = strlen(symname)+1; |
136 | error = page_symlink(inode, symname, l); | 130 | error = page_symlink(inode, symname, l); |
137 | if (!error) { | 131 | if (!error) { |
138 | if (dir->i_mode & S_ISGID) | ||
139 | inode->i_gid = dir->i_gid; | ||
140 | d_instantiate(dentry, inode); | 132 | d_instantiate(dentry, inode); |
141 | dget(dentry); | 133 | dget(dentry); |
142 | dir->i_mtime = dir->i_ctime = CURRENT_TIME; | 134 | dir->i_mtime = dir->i_ctime = CURRENT_TIME; |
@@ -241,7 +233,7 @@ int ramfs_fill_super(struct super_block *sb, void *data, int silent) | |||
241 | sb->s_op = &ramfs_ops; | 233 | sb->s_op = &ramfs_ops; |
242 | sb->s_time_gran = 1; | 234 | sb->s_time_gran = 1; |
243 | 235 | ||
244 | inode = ramfs_get_inode(sb, S_IFDIR | fsi->mount_opts.mode, 0); | 236 | inode = ramfs_get_inode(sb, NULL, S_IFDIR | fsi->mount_opts.mode, 0); |
245 | if (!inode) { | 237 | if (!inode) { |
246 | err = -ENOMEM; | 238 | err = -ENOMEM; |
247 | goto fail; | 239 | goto fail; |
diff --git a/fs/reiserfs/file.c b/fs/reiserfs/file.c index 1d9c12714c5c..9977df9f3a54 100644 --- a/fs/reiserfs/file.c +++ b/fs/reiserfs/file.c | |||
@@ -147,7 +147,8 @@ static int reiserfs_sync_file(struct file *filp, | |||
147 | barrier_done = reiserfs_commit_for_inode(inode); | 147 | barrier_done = reiserfs_commit_for_inode(inode); |
148 | reiserfs_write_unlock(inode->i_sb); | 148 | reiserfs_write_unlock(inode->i_sb); |
149 | if (barrier_done != 1 && reiserfs_barrier_flush(inode->i_sb)) | 149 | if (barrier_done != 1 && reiserfs_barrier_flush(inode->i_sb)) |
150 | blkdev_issue_flush(inode->i_sb->s_bdev, NULL); | 150 | blkdev_issue_flush(inode->i_sb->s_bdev, GFP_KERNEL, NULL, |
151 | BLKDEV_IFL_WAIT); | ||
151 | if (barrier_done < 0) | 152 | if (barrier_done < 0) |
152 | return barrier_done; | 153 | return barrier_done; |
153 | return (err < 0) ? -EIO : 0; | 154 | return (err < 0) ? -EIO : 0; |
diff --git a/fs/reiserfs/namei.c b/fs/reiserfs/namei.c index d0c43cb99ffc..ee78d4a0086a 100644 --- a/fs/reiserfs/namei.c +++ b/fs/reiserfs/namei.c | |||
@@ -561,23 +561,13 @@ static int drop_new_inode(struct inode *inode) | |||
561 | */ | 561 | */ |
562 | static int new_inode_init(struct inode *inode, struct inode *dir, int mode) | 562 | static int new_inode_init(struct inode *inode, struct inode *dir, int mode) |
563 | { | 563 | { |
564 | |||
565 | /* the quota init calls have to know who to charge the quota to, so | ||
566 | ** we have to set uid and gid here | ||
567 | */ | ||
568 | inode->i_uid = current_fsuid(); | ||
569 | inode->i_mode = mode; | ||
570 | /* Make inode invalid - just in case we are going to drop it before | 564 | /* Make inode invalid - just in case we are going to drop it before |
571 | * the initialization happens */ | 565 | * the initialization happens */ |
572 | INODE_PKEY(inode)->k_objectid = 0; | 566 | INODE_PKEY(inode)->k_objectid = 0; |
573 | 567 | /* the quota init calls have to know who to charge the quota to, so | |
574 | if (dir->i_mode & S_ISGID) { | 568 | ** we have to set uid and gid here |
575 | inode->i_gid = dir->i_gid; | 569 | */ |
576 | if (S_ISDIR(mode)) | 570 | inode_init_owner(inode, dir, mode); |
577 | inode->i_mode |= S_ISGID; | ||
578 | } else { | ||
579 | inode->i_gid = current_fsgid(); | ||
580 | } | ||
581 | dquot_initialize(inode); | 571 | dquot_initialize(inode); |
582 | return 0; | 572 | return 0; |
583 | } | 573 | } |
diff --git a/fs/reiserfs/xattr.c b/fs/reiserfs/xattr.c index e7cc00e636dc..8c4cf273c672 100644 --- a/fs/reiserfs/xattr.c +++ b/fs/reiserfs/xattr.c | |||
@@ -723,11 +723,11 @@ out: | |||
723 | (handler) = *(handlers)++) | 723 | (handler) = *(handlers)++) |
724 | 724 | ||
725 | /* This is the implementation for the xattr plugin infrastructure */ | 725 | /* This is the implementation for the xattr plugin infrastructure */ |
726 | static inline struct xattr_handler * | 726 | static inline const struct xattr_handler * |
727 | find_xattr_handler_prefix(struct xattr_handler **handlers, | 727 | find_xattr_handler_prefix(const struct xattr_handler **handlers, |
728 | const char *name) | 728 | const char *name) |
729 | { | 729 | { |
730 | struct xattr_handler *xah; | 730 | const struct xattr_handler *xah; |
731 | 731 | ||
732 | if (!handlers) | 732 | if (!handlers) |
733 | return NULL; | 733 | return NULL; |
@@ -748,7 +748,7 @@ ssize_t | |||
748 | reiserfs_getxattr(struct dentry * dentry, const char *name, void *buffer, | 748 | reiserfs_getxattr(struct dentry * dentry, const char *name, void *buffer, |
749 | size_t size) | 749 | size_t size) |
750 | { | 750 | { |
751 | struct xattr_handler *handler; | 751 | const struct xattr_handler *handler; |
752 | 752 | ||
753 | handler = find_xattr_handler_prefix(dentry->d_sb->s_xattr, name); | 753 | handler = find_xattr_handler_prefix(dentry->d_sb->s_xattr, name); |
754 | 754 | ||
@@ -767,7 +767,7 @@ int | |||
767 | reiserfs_setxattr(struct dentry *dentry, const char *name, const void *value, | 767 | reiserfs_setxattr(struct dentry *dentry, const char *name, const void *value, |
768 | size_t size, int flags) | 768 | size_t size, int flags) |
769 | { | 769 | { |
770 | struct xattr_handler *handler; | 770 | const struct xattr_handler *handler; |
771 | 771 | ||
772 | handler = find_xattr_handler_prefix(dentry->d_sb->s_xattr, name); | 772 | handler = find_xattr_handler_prefix(dentry->d_sb->s_xattr, name); |
773 | 773 | ||
@@ -784,7 +784,7 @@ reiserfs_setxattr(struct dentry *dentry, const char *name, const void *value, | |||
784 | */ | 784 | */ |
785 | int reiserfs_removexattr(struct dentry *dentry, const char *name) | 785 | int reiserfs_removexattr(struct dentry *dentry, const char *name) |
786 | { | 786 | { |
787 | struct xattr_handler *handler; | 787 | const struct xattr_handler *handler; |
788 | handler = find_xattr_handler_prefix(dentry->d_sb->s_xattr, name); | 788 | handler = find_xattr_handler_prefix(dentry->d_sb->s_xattr, name); |
789 | 789 | ||
790 | if (!handler || get_inode_sd_version(dentry->d_inode) == STAT_DATA_V1) | 790 | if (!handler || get_inode_sd_version(dentry->d_inode) == STAT_DATA_V1) |
@@ -807,7 +807,7 @@ static int listxattr_filler(void *buf, const char *name, int namelen, | |||
807 | size_t size; | 807 | size_t size; |
808 | if (name[0] != '.' || | 808 | if (name[0] != '.' || |
809 | (namelen != 1 && (name[1] != '.' || namelen != 2))) { | 809 | (namelen != 1 && (name[1] != '.' || namelen != 2))) { |
810 | struct xattr_handler *handler; | 810 | const struct xattr_handler *handler; |
811 | handler = find_xattr_handler_prefix(b->dentry->d_sb->s_xattr, | 811 | handler = find_xattr_handler_prefix(b->dentry->d_sb->s_xattr, |
812 | name); | 812 | name); |
813 | if (!handler) /* Unsupported xattr name */ | 813 | if (!handler) /* Unsupported xattr name */ |
@@ -920,7 +920,7 @@ static int create_privroot(struct dentry *dentry) { return 0; } | |||
920 | #endif | 920 | #endif |
921 | 921 | ||
922 | /* Actual operations that are exported to VFS-land */ | 922 | /* Actual operations that are exported to VFS-land */ |
923 | struct xattr_handler *reiserfs_xattr_handlers[] = { | 923 | const struct xattr_handler *reiserfs_xattr_handlers[] = { |
924 | #ifdef CONFIG_REISERFS_FS_XATTR | 924 | #ifdef CONFIG_REISERFS_FS_XATTR |
925 | &reiserfs_xattr_user_handler, | 925 | &reiserfs_xattr_user_handler, |
926 | &reiserfs_xattr_trusted_handler, | 926 | &reiserfs_xattr_trusted_handler, |
diff --git a/fs/reiserfs/xattr_acl.c b/fs/reiserfs/xattr_acl.c index 9cdb759645a9..536d697a8a28 100644 --- a/fs/reiserfs/xattr_acl.c +++ b/fs/reiserfs/xattr_acl.c | |||
@@ -500,7 +500,7 @@ static size_t posix_acl_access_list(struct dentry *dentry, char *list, | |||
500 | return size; | 500 | return size; |
501 | } | 501 | } |
502 | 502 | ||
503 | struct xattr_handler reiserfs_posix_acl_access_handler = { | 503 | const struct xattr_handler reiserfs_posix_acl_access_handler = { |
504 | .prefix = POSIX_ACL_XATTR_ACCESS, | 504 | .prefix = POSIX_ACL_XATTR_ACCESS, |
505 | .flags = ACL_TYPE_ACCESS, | 505 | .flags = ACL_TYPE_ACCESS, |
506 | .get = posix_acl_get, | 506 | .get = posix_acl_get, |
@@ -520,7 +520,7 @@ static size_t posix_acl_default_list(struct dentry *dentry, char *list, | |||
520 | return size; | 520 | return size; |
521 | } | 521 | } |
522 | 522 | ||
523 | struct xattr_handler reiserfs_posix_acl_default_handler = { | 523 | const struct xattr_handler reiserfs_posix_acl_default_handler = { |
524 | .prefix = POSIX_ACL_XATTR_DEFAULT, | 524 | .prefix = POSIX_ACL_XATTR_DEFAULT, |
525 | .flags = ACL_TYPE_DEFAULT, | 525 | .flags = ACL_TYPE_DEFAULT, |
526 | .get = posix_acl_get, | 526 | .get = posix_acl_get, |
diff --git a/fs/reiserfs/xattr_security.c b/fs/reiserfs/xattr_security.c index 7271a477c041..237c6928d3c6 100644 --- a/fs/reiserfs/xattr_security.c +++ b/fs/reiserfs/xattr_security.c | |||
@@ -111,7 +111,7 @@ void reiserfs_security_free(struct reiserfs_security_handle *sec) | |||
111 | sec->value = NULL; | 111 | sec->value = NULL; |
112 | } | 112 | } |
113 | 113 | ||
114 | struct xattr_handler reiserfs_xattr_security_handler = { | 114 | const struct xattr_handler reiserfs_xattr_security_handler = { |
115 | .prefix = XATTR_SECURITY_PREFIX, | 115 | .prefix = XATTR_SECURITY_PREFIX, |
116 | .get = security_get, | 116 | .get = security_get, |
117 | .set = security_set, | 117 | .set = security_set, |
diff --git a/fs/reiserfs/xattr_trusted.c b/fs/reiserfs/xattr_trusted.c index 5b08aaca3daf..9883736ce3ec 100644 --- a/fs/reiserfs/xattr_trusted.c +++ b/fs/reiserfs/xattr_trusted.c | |||
@@ -48,7 +48,7 @@ static size_t trusted_list(struct dentry *dentry, char *list, size_t list_size, | |||
48 | return len; | 48 | return len; |
49 | } | 49 | } |
50 | 50 | ||
51 | struct xattr_handler reiserfs_xattr_trusted_handler = { | 51 | const struct xattr_handler reiserfs_xattr_trusted_handler = { |
52 | .prefix = XATTR_TRUSTED_PREFIX, | 52 | .prefix = XATTR_TRUSTED_PREFIX, |
53 | .get = trusted_get, | 53 | .get = trusted_get, |
54 | .set = trusted_set, | 54 | .set = trusted_set, |
diff --git a/fs/reiserfs/xattr_user.c b/fs/reiserfs/xattr_user.c index 75d59c49b911..45ae1a00013a 100644 --- a/fs/reiserfs/xattr_user.c +++ b/fs/reiserfs/xattr_user.c | |||
@@ -44,7 +44,7 @@ static size_t user_list(struct dentry *dentry, char *list, size_t list_size, | |||
44 | return len; | 44 | return len; |
45 | } | 45 | } |
46 | 46 | ||
47 | struct xattr_handler reiserfs_xattr_user_handler = { | 47 | const struct xattr_handler reiserfs_xattr_user_handler = { |
48 | .prefix = XATTR_USER_PREFIX, | 48 | .prefix = XATTR_USER_PREFIX, |
49 | .get = user_get, | 49 | .get = user_get, |
50 | .set = user_set, | 50 | .set = user_set, |
diff --git a/fs/splice.c b/fs/splice.c index 9313b6124a2e..ac22b00d86c3 100644 --- a/fs/splice.c +++ b/fs/splice.c | |||
@@ -193,8 +193,8 @@ ssize_t splice_to_pipe(struct pipe_inode_info *pipe, | |||
193 | break; | 193 | break; |
194 | } | 194 | } |
195 | 195 | ||
196 | if (pipe->nrbufs < PIPE_BUFFERS) { | 196 | if (pipe->nrbufs < pipe->buffers) { |
197 | int newbuf = (pipe->curbuf + pipe->nrbufs) & (PIPE_BUFFERS - 1); | 197 | int newbuf = (pipe->curbuf + pipe->nrbufs) & (pipe->buffers - 1); |
198 | struct pipe_buffer *buf = pipe->bufs + newbuf; | 198 | struct pipe_buffer *buf = pipe->bufs + newbuf; |
199 | 199 | ||
200 | buf->page = spd->pages[page_nr]; | 200 | buf->page = spd->pages[page_nr]; |
@@ -214,7 +214,7 @@ ssize_t splice_to_pipe(struct pipe_inode_info *pipe, | |||
214 | 214 | ||
215 | if (!--spd->nr_pages) | 215 | if (!--spd->nr_pages) |
216 | break; | 216 | break; |
217 | if (pipe->nrbufs < PIPE_BUFFERS) | 217 | if (pipe->nrbufs < pipe->buffers) |
218 | continue; | 218 | continue; |
219 | 219 | ||
220 | break; | 220 | break; |
@@ -265,6 +265,36 @@ static void spd_release_page(struct splice_pipe_desc *spd, unsigned int i) | |||
265 | page_cache_release(spd->pages[i]); | 265 | page_cache_release(spd->pages[i]); |
266 | } | 266 | } |
267 | 267 | ||
268 | /* | ||
269 | * Check if we need to grow the arrays holding pages and partial page | ||
270 | * descriptions. | ||
271 | */ | ||
272 | int splice_grow_spd(struct pipe_inode_info *pipe, struct splice_pipe_desc *spd) | ||
273 | { | ||
274 | if (pipe->buffers <= PIPE_DEF_BUFFERS) | ||
275 | return 0; | ||
276 | |||
277 | spd->pages = kmalloc(pipe->buffers * sizeof(struct page *), GFP_KERNEL); | ||
278 | spd->partial = kmalloc(pipe->buffers * sizeof(struct partial_page), GFP_KERNEL); | ||
279 | |||
280 | if (spd->pages && spd->partial) | ||
281 | return 0; | ||
282 | |||
283 | kfree(spd->pages); | ||
284 | kfree(spd->partial); | ||
285 | return -ENOMEM; | ||
286 | } | ||
287 | |||
288 | void splice_shrink_spd(struct pipe_inode_info *pipe, | ||
289 | struct splice_pipe_desc *spd) | ||
290 | { | ||
291 | if (pipe->buffers <= PIPE_DEF_BUFFERS) | ||
292 | return; | ||
293 | |||
294 | kfree(spd->pages); | ||
295 | kfree(spd->partial); | ||
296 | } | ||
297 | |||
268 | static int | 298 | static int |
269 | __generic_file_splice_read(struct file *in, loff_t *ppos, | 299 | __generic_file_splice_read(struct file *in, loff_t *ppos, |
270 | struct pipe_inode_info *pipe, size_t len, | 300 | struct pipe_inode_info *pipe, size_t len, |
@@ -272,8 +302,8 @@ __generic_file_splice_read(struct file *in, loff_t *ppos, | |||
272 | { | 302 | { |
273 | struct address_space *mapping = in->f_mapping; | 303 | struct address_space *mapping = in->f_mapping; |
274 | unsigned int loff, nr_pages, req_pages; | 304 | unsigned int loff, nr_pages, req_pages; |
275 | struct page *pages[PIPE_BUFFERS]; | 305 | struct page *pages[PIPE_DEF_BUFFERS]; |
276 | struct partial_page partial[PIPE_BUFFERS]; | 306 | struct partial_page partial[PIPE_DEF_BUFFERS]; |
277 | struct page *page; | 307 | struct page *page; |
278 | pgoff_t index, end_index; | 308 | pgoff_t index, end_index; |
279 | loff_t isize; | 309 | loff_t isize; |
@@ -286,15 +316,18 @@ __generic_file_splice_read(struct file *in, loff_t *ppos, | |||
286 | .spd_release = spd_release_page, | 316 | .spd_release = spd_release_page, |
287 | }; | 317 | }; |
288 | 318 | ||
319 | if (splice_grow_spd(pipe, &spd)) | ||
320 | return -ENOMEM; | ||
321 | |||
289 | index = *ppos >> PAGE_CACHE_SHIFT; | 322 | index = *ppos >> PAGE_CACHE_SHIFT; |
290 | loff = *ppos & ~PAGE_CACHE_MASK; | 323 | loff = *ppos & ~PAGE_CACHE_MASK; |
291 | req_pages = (len + loff + PAGE_CACHE_SIZE - 1) >> PAGE_CACHE_SHIFT; | 324 | req_pages = (len + loff + PAGE_CACHE_SIZE - 1) >> PAGE_CACHE_SHIFT; |
292 | nr_pages = min(req_pages, (unsigned)PIPE_BUFFERS); | 325 | nr_pages = min(req_pages, pipe->buffers); |
293 | 326 | ||
294 | /* | 327 | /* |
295 | * Lookup the (hopefully) full range of pages we need. | 328 | * Lookup the (hopefully) full range of pages we need. |
296 | */ | 329 | */ |
297 | spd.nr_pages = find_get_pages_contig(mapping, index, nr_pages, pages); | 330 | spd.nr_pages = find_get_pages_contig(mapping, index, nr_pages, spd.pages); |
298 | index += spd.nr_pages; | 331 | index += spd.nr_pages; |
299 | 332 | ||
300 | /* | 333 | /* |
@@ -335,7 +368,7 @@ __generic_file_splice_read(struct file *in, loff_t *ppos, | |||
335 | unlock_page(page); | 368 | unlock_page(page); |
336 | } | 369 | } |
337 | 370 | ||
338 | pages[spd.nr_pages++] = page; | 371 | spd.pages[spd.nr_pages++] = page; |
339 | index++; | 372 | index++; |
340 | } | 373 | } |
341 | 374 | ||
@@ -356,7 +389,7 @@ __generic_file_splice_read(struct file *in, loff_t *ppos, | |||
356 | * this_len is the max we'll use from this page | 389 | * this_len is the max we'll use from this page |
357 | */ | 390 | */ |
358 | this_len = min_t(unsigned long, len, PAGE_CACHE_SIZE - loff); | 391 | this_len = min_t(unsigned long, len, PAGE_CACHE_SIZE - loff); |
359 | page = pages[page_nr]; | 392 | page = spd.pages[page_nr]; |
360 | 393 | ||
361 | if (PageReadahead(page)) | 394 | if (PageReadahead(page)) |
362 | page_cache_async_readahead(mapping, &in->f_ra, in, | 395 | page_cache_async_readahead(mapping, &in->f_ra, in, |
@@ -393,8 +426,8 @@ __generic_file_splice_read(struct file *in, loff_t *ppos, | |||
393 | error = -ENOMEM; | 426 | error = -ENOMEM; |
394 | break; | 427 | break; |
395 | } | 428 | } |
396 | page_cache_release(pages[page_nr]); | 429 | page_cache_release(spd.pages[page_nr]); |
397 | pages[page_nr] = page; | 430 | spd.pages[page_nr] = page; |
398 | } | 431 | } |
399 | /* | 432 | /* |
400 | * page was already under io and is now done, great | 433 | * page was already under io and is now done, great |
@@ -451,8 +484,8 @@ fill_it: | |||
451 | len = this_len; | 484 | len = this_len; |
452 | } | 485 | } |
453 | 486 | ||
454 | partial[page_nr].offset = loff; | 487 | spd.partial[page_nr].offset = loff; |
455 | partial[page_nr].len = this_len; | 488 | spd.partial[page_nr].len = this_len; |
456 | len -= this_len; | 489 | len -= this_len; |
457 | loff = 0; | 490 | loff = 0; |
458 | spd.nr_pages++; | 491 | spd.nr_pages++; |
@@ -464,12 +497,13 @@ fill_it: | |||
464 | * we got, 'nr_pages' is how many pages are in the map. | 497 | * we got, 'nr_pages' is how many pages are in the map. |
465 | */ | 498 | */ |
466 | while (page_nr < nr_pages) | 499 | while (page_nr < nr_pages) |
467 | page_cache_release(pages[page_nr++]); | 500 | page_cache_release(spd.pages[page_nr++]); |
468 | in->f_ra.prev_pos = (loff_t)index << PAGE_CACHE_SHIFT; | 501 | in->f_ra.prev_pos = (loff_t)index << PAGE_CACHE_SHIFT; |
469 | 502 | ||
470 | if (spd.nr_pages) | 503 | if (spd.nr_pages) |
471 | return splice_to_pipe(pipe, &spd); | 504 | error = splice_to_pipe(pipe, &spd); |
472 | 505 | ||
506 | splice_shrink_spd(pipe, &spd); | ||
473 | return error; | 507 | return error; |
474 | } | 508 | } |
475 | 509 | ||
@@ -560,9 +594,9 @@ ssize_t default_file_splice_read(struct file *in, loff_t *ppos, | |||
560 | unsigned int nr_pages; | 594 | unsigned int nr_pages; |
561 | unsigned int nr_freed; | 595 | unsigned int nr_freed; |
562 | size_t offset; | 596 | size_t offset; |
563 | struct page *pages[PIPE_BUFFERS]; | 597 | struct page *pages[PIPE_DEF_BUFFERS]; |
564 | struct partial_page partial[PIPE_BUFFERS]; | 598 | struct partial_page partial[PIPE_DEF_BUFFERS]; |
565 | struct iovec vec[PIPE_BUFFERS]; | 599 | struct iovec *vec, __vec[PIPE_DEF_BUFFERS]; |
566 | pgoff_t index; | 600 | pgoff_t index; |
567 | ssize_t res; | 601 | ssize_t res; |
568 | size_t this_len; | 602 | size_t this_len; |
@@ -576,11 +610,22 @@ ssize_t default_file_splice_read(struct file *in, loff_t *ppos, | |||
576 | .spd_release = spd_release_page, | 610 | .spd_release = spd_release_page, |
577 | }; | 611 | }; |
578 | 612 | ||
613 | if (splice_grow_spd(pipe, &spd)) | ||
614 | return -ENOMEM; | ||
615 | |||
616 | res = -ENOMEM; | ||
617 | vec = __vec; | ||
618 | if (pipe->buffers > PIPE_DEF_BUFFERS) { | ||
619 | vec = kmalloc(pipe->buffers * sizeof(struct iovec), GFP_KERNEL); | ||
620 | if (!vec) | ||
621 | goto shrink_ret; | ||
622 | } | ||
623 | |||
579 | index = *ppos >> PAGE_CACHE_SHIFT; | 624 | index = *ppos >> PAGE_CACHE_SHIFT; |
580 | offset = *ppos & ~PAGE_CACHE_MASK; | 625 | offset = *ppos & ~PAGE_CACHE_MASK; |
581 | nr_pages = (len + offset + PAGE_CACHE_SIZE - 1) >> PAGE_CACHE_SHIFT; | 626 | nr_pages = (len + offset + PAGE_CACHE_SIZE - 1) >> PAGE_CACHE_SHIFT; |
582 | 627 | ||
583 | for (i = 0; i < nr_pages && i < PIPE_BUFFERS && len; i++) { | 628 | for (i = 0; i < nr_pages && i < pipe->buffers && len; i++) { |
584 | struct page *page; | 629 | struct page *page; |
585 | 630 | ||
586 | page = alloc_page(GFP_USER); | 631 | page = alloc_page(GFP_USER); |
@@ -591,7 +636,7 @@ ssize_t default_file_splice_read(struct file *in, loff_t *ppos, | |||
591 | this_len = min_t(size_t, len, PAGE_CACHE_SIZE - offset); | 636 | this_len = min_t(size_t, len, PAGE_CACHE_SIZE - offset); |
592 | vec[i].iov_base = (void __user *) page_address(page); | 637 | vec[i].iov_base = (void __user *) page_address(page); |
593 | vec[i].iov_len = this_len; | 638 | vec[i].iov_len = this_len; |
594 | pages[i] = page; | 639 | spd.pages[i] = page; |
595 | spd.nr_pages++; | 640 | spd.nr_pages++; |
596 | len -= this_len; | 641 | len -= this_len; |
597 | offset = 0; | 642 | offset = 0; |
@@ -610,11 +655,11 @@ ssize_t default_file_splice_read(struct file *in, loff_t *ppos, | |||
610 | nr_freed = 0; | 655 | nr_freed = 0; |
611 | for (i = 0; i < spd.nr_pages; i++) { | 656 | for (i = 0; i < spd.nr_pages; i++) { |
612 | this_len = min_t(size_t, vec[i].iov_len, res); | 657 | this_len = min_t(size_t, vec[i].iov_len, res); |
613 | partial[i].offset = 0; | 658 | spd.partial[i].offset = 0; |
614 | partial[i].len = this_len; | 659 | spd.partial[i].len = this_len; |
615 | if (!this_len) { | 660 | if (!this_len) { |
616 | __free_page(pages[i]); | 661 | __free_page(spd.pages[i]); |
617 | pages[i] = NULL; | 662 | spd.pages[i] = NULL; |
618 | nr_freed++; | 663 | nr_freed++; |
619 | } | 664 | } |
620 | res -= this_len; | 665 | res -= this_len; |
@@ -625,13 +670,18 @@ ssize_t default_file_splice_read(struct file *in, loff_t *ppos, | |||
625 | if (res > 0) | 670 | if (res > 0) |
626 | *ppos += res; | 671 | *ppos += res; |
627 | 672 | ||
673 | shrink_ret: | ||
674 | if (vec != __vec) | ||
675 | kfree(vec); | ||
676 | splice_shrink_spd(pipe, &spd); | ||
628 | return res; | 677 | return res; |
629 | 678 | ||
630 | err: | 679 | err: |
631 | for (i = 0; i < spd.nr_pages; i++) | 680 | for (i = 0; i < spd.nr_pages; i++) |
632 | __free_page(pages[i]); | 681 | __free_page(spd.pages[i]); |
633 | 682 | ||
634 | return error; | 683 | res = error; |
684 | goto shrink_ret; | ||
635 | } | 685 | } |
636 | EXPORT_SYMBOL(default_file_splice_read); | 686 | EXPORT_SYMBOL(default_file_splice_read); |
637 | 687 | ||
@@ -784,7 +834,7 @@ int splice_from_pipe_feed(struct pipe_inode_info *pipe, struct splice_desc *sd, | |||
784 | if (!buf->len) { | 834 | if (!buf->len) { |
785 | buf->ops = NULL; | 835 | buf->ops = NULL; |
786 | ops->release(pipe, buf); | 836 | ops->release(pipe, buf); |
787 | pipe->curbuf = (pipe->curbuf + 1) & (PIPE_BUFFERS - 1); | 837 | pipe->curbuf = (pipe->curbuf + 1) & (pipe->buffers - 1); |
788 | pipe->nrbufs--; | 838 | pipe->nrbufs--; |
789 | if (pipe->inode) | 839 | if (pipe->inode) |
790 | sd->need_wakeup = true; | 840 | sd->need_wakeup = true; |
@@ -1211,7 +1261,7 @@ out_release: | |||
1211 | * If we did an incomplete transfer we must release | 1261 | * If we did an incomplete transfer we must release |
1212 | * the pipe buffers in question: | 1262 | * the pipe buffers in question: |
1213 | */ | 1263 | */ |
1214 | for (i = 0; i < PIPE_BUFFERS; i++) { | 1264 | for (i = 0; i < pipe->buffers; i++) { |
1215 | struct pipe_buffer *buf = pipe->bufs + i; | 1265 | struct pipe_buffer *buf = pipe->bufs + i; |
1216 | 1266 | ||
1217 | if (buf->ops) { | 1267 | if (buf->ops) { |
@@ -1371,7 +1421,8 @@ static long do_splice(struct file *in, loff_t __user *off_in, | |||
1371 | */ | 1421 | */ |
1372 | static int get_iovec_page_array(const struct iovec __user *iov, | 1422 | static int get_iovec_page_array(const struct iovec __user *iov, |
1373 | unsigned int nr_vecs, struct page **pages, | 1423 | unsigned int nr_vecs, struct page **pages, |
1374 | struct partial_page *partial, int aligned) | 1424 | struct partial_page *partial, int aligned, |
1425 | unsigned int pipe_buffers) | ||
1375 | { | 1426 | { |
1376 | int buffers = 0, error = 0; | 1427 | int buffers = 0, error = 0; |
1377 | 1428 | ||
@@ -1414,8 +1465,8 @@ static int get_iovec_page_array(const struct iovec __user *iov, | |||
1414 | break; | 1465 | break; |
1415 | 1466 | ||
1416 | npages = (off + len + PAGE_SIZE - 1) >> PAGE_SHIFT; | 1467 | npages = (off + len + PAGE_SIZE - 1) >> PAGE_SHIFT; |
1417 | if (npages > PIPE_BUFFERS - buffers) | 1468 | if (npages > pipe_buffers - buffers) |
1418 | npages = PIPE_BUFFERS - buffers; | 1469 | npages = pipe_buffers - buffers; |
1419 | 1470 | ||
1420 | error = get_user_pages_fast((unsigned long)base, npages, | 1471 | error = get_user_pages_fast((unsigned long)base, npages, |
1421 | 0, &pages[buffers]); | 1472 | 0, &pages[buffers]); |
@@ -1450,7 +1501,7 @@ static int get_iovec_page_array(const struct iovec __user *iov, | |||
1450 | * or if we mapped the max number of pages that we have | 1501 | * or if we mapped the max number of pages that we have |
1451 | * room for. | 1502 | * room for. |
1452 | */ | 1503 | */ |
1453 | if (error < npages || buffers == PIPE_BUFFERS) | 1504 | if (error < npages || buffers == pipe_buffers) |
1454 | break; | 1505 | break; |
1455 | 1506 | ||
1456 | nr_vecs--; | 1507 | nr_vecs--; |
@@ -1593,8 +1644,8 @@ static long vmsplice_to_pipe(struct file *file, const struct iovec __user *iov, | |||
1593 | unsigned long nr_segs, unsigned int flags) | 1644 | unsigned long nr_segs, unsigned int flags) |
1594 | { | 1645 | { |
1595 | struct pipe_inode_info *pipe; | 1646 | struct pipe_inode_info *pipe; |
1596 | struct page *pages[PIPE_BUFFERS]; | 1647 | struct page *pages[PIPE_DEF_BUFFERS]; |
1597 | struct partial_page partial[PIPE_BUFFERS]; | 1648 | struct partial_page partial[PIPE_DEF_BUFFERS]; |
1598 | struct splice_pipe_desc spd = { | 1649 | struct splice_pipe_desc spd = { |
1599 | .pages = pages, | 1650 | .pages = pages, |
1600 | .partial = partial, | 1651 | .partial = partial, |
@@ -1602,17 +1653,25 @@ static long vmsplice_to_pipe(struct file *file, const struct iovec __user *iov, | |||
1602 | .ops = &user_page_pipe_buf_ops, | 1653 | .ops = &user_page_pipe_buf_ops, |
1603 | .spd_release = spd_release_page, | 1654 | .spd_release = spd_release_page, |
1604 | }; | 1655 | }; |
1656 | long ret; | ||
1605 | 1657 | ||
1606 | pipe = pipe_info(file->f_path.dentry->d_inode); | 1658 | pipe = pipe_info(file->f_path.dentry->d_inode); |
1607 | if (!pipe) | 1659 | if (!pipe) |
1608 | return -EBADF; | 1660 | return -EBADF; |
1609 | 1661 | ||
1610 | spd.nr_pages = get_iovec_page_array(iov, nr_segs, pages, partial, | 1662 | if (splice_grow_spd(pipe, &spd)) |
1611 | flags & SPLICE_F_GIFT); | 1663 | return -ENOMEM; |
1664 | |||
1665 | spd.nr_pages = get_iovec_page_array(iov, nr_segs, spd.pages, | ||
1666 | spd.partial, flags & SPLICE_F_GIFT, | ||
1667 | pipe->buffers); | ||
1612 | if (spd.nr_pages <= 0) | 1668 | if (spd.nr_pages <= 0) |
1613 | return spd.nr_pages; | 1669 | ret = spd.nr_pages; |
1670 | else | ||
1671 | ret = splice_to_pipe(pipe, &spd); | ||
1614 | 1672 | ||
1615 | return splice_to_pipe(pipe, &spd); | 1673 | splice_shrink_spd(pipe, &spd); |
1674 | return ret; | ||
1616 | } | 1675 | } |
1617 | 1676 | ||
1618 | /* | 1677 | /* |
@@ -1738,13 +1797,13 @@ static int opipe_prep(struct pipe_inode_info *pipe, unsigned int flags) | |||
1738 | * Check ->nrbufs without the inode lock first. This function | 1797 | * Check ->nrbufs without the inode lock first. This function |
1739 | * is speculative anyways, so missing one is ok. | 1798 | * is speculative anyways, so missing one is ok. |
1740 | */ | 1799 | */ |
1741 | if (pipe->nrbufs < PIPE_BUFFERS) | 1800 | if (pipe->nrbufs < pipe->buffers) |
1742 | return 0; | 1801 | return 0; |
1743 | 1802 | ||
1744 | ret = 0; | 1803 | ret = 0; |
1745 | pipe_lock(pipe); | 1804 | pipe_lock(pipe); |
1746 | 1805 | ||
1747 | while (pipe->nrbufs >= PIPE_BUFFERS) { | 1806 | while (pipe->nrbufs >= pipe->buffers) { |
1748 | if (!pipe->readers) { | 1807 | if (!pipe->readers) { |
1749 | send_sig(SIGPIPE, current, 0); | 1808 | send_sig(SIGPIPE, current, 0); |
1750 | ret = -EPIPE; | 1809 | ret = -EPIPE; |
@@ -1810,7 +1869,7 @@ retry: | |||
1810 | * Cannot make any progress, because either the input | 1869 | * Cannot make any progress, because either the input |
1811 | * pipe is empty or the output pipe is full. | 1870 | * pipe is empty or the output pipe is full. |
1812 | */ | 1871 | */ |
1813 | if (!ipipe->nrbufs || opipe->nrbufs >= PIPE_BUFFERS) { | 1872 | if (!ipipe->nrbufs || opipe->nrbufs >= opipe->buffers) { |
1814 | /* Already processed some buffers, break */ | 1873 | /* Already processed some buffers, break */ |
1815 | if (ret) | 1874 | if (ret) |
1816 | break; | 1875 | break; |
@@ -1831,7 +1890,7 @@ retry: | |||
1831 | } | 1890 | } |
1832 | 1891 | ||
1833 | ibuf = ipipe->bufs + ipipe->curbuf; | 1892 | ibuf = ipipe->bufs + ipipe->curbuf; |
1834 | nbuf = (opipe->curbuf + opipe->nrbufs) % PIPE_BUFFERS; | 1893 | nbuf = (opipe->curbuf + opipe->nrbufs) & (opipe->buffers - 1); |
1835 | obuf = opipe->bufs + nbuf; | 1894 | obuf = opipe->bufs + nbuf; |
1836 | 1895 | ||
1837 | if (len >= ibuf->len) { | 1896 | if (len >= ibuf->len) { |
@@ -1841,7 +1900,7 @@ retry: | |||
1841 | *obuf = *ibuf; | 1900 | *obuf = *ibuf; |
1842 | ibuf->ops = NULL; | 1901 | ibuf->ops = NULL; |
1843 | opipe->nrbufs++; | 1902 | opipe->nrbufs++; |
1844 | ipipe->curbuf = (ipipe->curbuf + 1) % PIPE_BUFFERS; | 1903 | ipipe->curbuf = (ipipe->curbuf + 1) & (ipipe->buffers - 1); |
1845 | ipipe->nrbufs--; | 1904 | ipipe->nrbufs--; |
1846 | input_wakeup = true; | 1905 | input_wakeup = true; |
1847 | } else { | 1906 | } else { |
@@ -1914,11 +1973,11 @@ static int link_pipe(struct pipe_inode_info *ipipe, | |||
1914 | * If we have iterated all input buffers or ran out of | 1973 | * If we have iterated all input buffers or ran out of |
1915 | * output room, break. | 1974 | * output room, break. |
1916 | */ | 1975 | */ |
1917 | if (i >= ipipe->nrbufs || opipe->nrbufs >= PIPE_BUFFERS) | 1976 | if (i >= ipipe->nrbufs || opipe->nrbufs >= opipe->buffers) |
1918 | break; | 1977 | break; |
1919 | 1978 | ||
1920 | ibuf = ipipe->bufs + ((ipipe->curbuf + i) & (PIPE_BUFFERS - 1)); | 1979 | ibuf = ipipe->bufs + ((ipipe->curbuf + i) & (ipipe->buffers-1)); |
1921 | nbuf = (opipe->curbuf + opipe->nrbufs) & (PIPE_BUFFERS - 1); | 1980 | nbuf = (opipe->curbuf + opipe->nrbufs) & (opipe->buffers - 1); |
1922 | 1981 | ||
1923 | /* | 1982 | /* |
1924 | * Get a reference to this pipe buffer, | 1983 | * Get a reference to this pipe buffer, |
diff --git a/fs/statfs.c b/fs/statfs.c new file mode 100644 index 000000000000..4ef021f3b612 --- /dev/null +++ b/fs/statfs.c | |||
@@ -0,0 +1,196 @@ | |||
1 | #include <linux/syscalls.h> | ||
2 | #include <linux/module.h> | ||
3 | #include <linux/fs.h> | ||
4 | #include <linux/file.h> | ||
5 | #include <linux/namei.h> | ||
6 | #include <linux/statfs.h> | ||
7 | #include <linux/security.h> | ||
8 | #include <linux/uaccess.h> | ||
9 | |||
10 | int vfs_statfs(struct dentry *dentry, struct kstatfs *buf) | ||
11 | { | ||
12 | int retval = -ENODEV; | ||
13 | |||
14 | if (dentry) { | ||
15 | retval = -ENOSYS; | ||
16 | if (dentry->d_sb->s_op->statfs) { | ||
17 | memset(buf, 0, sizeof(*buf)); | ||
18 | retval = security_sb_statfs(dentry); | ||
19 | if (retval) | ||
20 | return retval; | ||
21 | retval = dentry->d_sb->s_op->statfs(dentry, buf); | ||
22 | if (retval == 0 && buf->f_frsize == 0) | ||
23 | buf->f_frsize = buf->f_bsize; | ||
24 | } | ||
25 | } | ||
26 | return retval; | ||
27 | } | ||
28 | |||
29 | EXPORT_SYMBOL(vfs_statfs); | ||
30 | |||
31 | static int vfs_statfs_native(struct dentry *dentry, struct statfs *buf) | ||
32 | { | ||
33 | struct kstatfs st; | ||
34 | int retval; | ||
35 | |||
36 | retval = vfs_statfs(dentry, &st); | ||
37 | if (retval) | ||
38 | return retval; | ||
39 | |||
40 | if (sizeof(*buf) == sizeof(st)) | ||
41 | memcpy(buf, &st, sizeof(st)); | ||
42 | else { | ||
43 | if (sizeof buf->f_blocks == 4) { | ||
44 | if ((st.f_blocks | st.f_bfree | st.f_bavail | | ||
45 | st.f_bsize | st.f_frsize) & | ||
46 | 0xffffffff00000000ULL) | ||
47 | return -EOVERFLOW; | ||
48 | /* | ||
49 | * f_files and f_ffree may be -1; it's okay to stuff | ||
50 | * that into 32 bits | ||
51 | */ | ||
52 | if (st.f_files != -1 && | ||
53 | (st.f_files & 0xffffffff00000000ULL)) | ||
54 | return -EOVERFLOW; | ||
55 | if (st.f_ffree != -1 && | ||
56 | (st.f_ffree & 0xffffffff00000000ULL)) | ||
57 | return -EOVERFLOW; | ||
58 | } | ||
59 | |||
60 | buf->f_type = st.f_type; | ||
61 | buf->f_bsize = st.f_bsize; | ||
62 | buf->f_blocks = st.f_blocks; | ||
63 | buf->f_bfree = st.f_bfree; | ||
64 | buf->f_bavail = st.f_bavail; | ||
65 | buf->f_files = st.f_files; | ||
66 | buf->f_ffree = st.f_ffree; | ||
67 | buf->f_fsid = st.f_fsid; | ||
68 | buf->f_namelen = st.f_namelen; | ||
69 | buf->f_frsize = st.f_frsize; | ||
70 | memset(buf->f_spare, 0, sizeof(buf->f_spare)); | ||
71 | } | ||
72 | return 0; | ||
73 | } | ||
74 | |||
75 | static int vfs_statfs64(struct dentry *dentry, struct statfs64 *buf) | ||
76 | { | ||
77 | struct kstatfs st; | ||
78 | int retval; | ||
79 | |||
80 | retval = vfs_statfs(dentry, &st); | ||
81 | if (retval) | ||
82 | return retval; | ||
83 | |||
84 | if (sizeof(*buf) == sizeof(st)) | ||
85 | memcpy(buf, &st, sizeof(st)); | ||
86 | else { | ||
87 | buf->f_type = st.f_type; | ||
88 | buf->f_bsize = st.f_bsize; | ||
89 | buf->f_blocks = st.f_blocks; | ||
90 | buf->f_bfree = st.f_bfree; | ||
91 | buf->f_bavail = st.f_bavail; | ||
92 | buf->f_files = st.f_files; | ||
93 | buf->f_ffree = st.f_ffree; | ||
94 | buf->f_fsid = st.f_fsid; | ||
95 | buf->f_namelen = st.f_namelen; | ||
96 | buf->f_frsize = st.f_frsize; | ||
97 | memset(buf->f_spare, 0, sizeof(buf->f_spare)); | ||
98 | } | ||
99 | return 0; | ||
100 | } | ||
101 | |||
102 | SYSCALL_DEFINE2(statfs, const char __user *, pathname, struct statfs __user *, buf) | ||
103 | { | ||
104 | struct path path; | ||
105 | int error; | ||
106 | |||
107 | error = user_path(pathname, &path); | ||
108 | if (!error) { | ||
109 | struct statfs tmp; | ||
110 | error = vfs_statfs_native(path.dentry, &tmp); | ||
111 | if (!error && copy_to_user(buf, &tmp, sizeof(tmp))) | ||
112 | error = -EFAULT; | ||
113 | path_put(&path); | ||
114 | } | ||
115 | return error; | ||
116 | } | ||
117 | |||
118 | SYSCALL_DEFINE3(statfs64, const char __user *, pathname, size_t, sz, struct statfs64 __user *, buf) | ||
119 | { | ||
120 | struct path path; | ||
121 | long error; | ||
122 | |||
123 | if (sz != sizeof(*buf)) | ||
124 | return -EINVAL; | ||
125 | error = user_path(pathname, &path); | ||
126 | if (!error) { | ||
127 | struct statfs64 tmp; | ||
128 | error = vfs_statfs64(path.dentry, &tmp); | ||
129 | if (!error && copy_to_user(buf, &tmp, sizeof(tmp))) | ||
130 | error = -EFAULT; | ||
131 | path_put(&path); | ||
132 | } | ||
133 | return error; | ||
134 | } | ||
135 | |||
136 | SYSCALL_DEFINE2(fstatfs, unsigned int, fd, struct statfs __user *, buf) | ||
137 | { | ||
138 | struct file *file; | ||
139 | struct statfs tmp; | ||
140 | int error; | ||
141 | |||
142 | error = -EBADF; | ||
143 | file = fget(fd); | ||
144 | if (!file) | ||
145 | goto out; | ||
146 | error = vfs_statfs_native(file->f_path.dentry, &tmp); | ||
147 | if (!error && copy_to_user(buf, &tmp, sizeof(tmp))) | ||
148 | error = -EFAULT; | ||
149 | fput(file); | ||
150 | out: | ||
151 | return error; | ||
152 | } | ||
153 | |||
154 | SYSCALL_DEFINE3(fstatfs64, unsigned int, fd, size_t, sz, struct statfs64 __user *, buf) | ||
155 | { | ||
156 | struct file *file; | ||
157 | struct statfs64 tmp; | ||
158 | int error; | ||
159 | |||
160 | if (sz != sizeof(*buf)) | ||
161 | return -EINVAL; | ||
162 | |||
163 | error = -EBADF; | ||
164 | file = fget(fd); | ||
165 | if (!file) | ||
166 | goto out; | ||
167 | error = vfs_statfs64(file->f_path.dentry, &tmp); | ||
168 | if (!error && copy_to_user(buf, &tmp, sizeof(tmp))) | ||
169 | error = -EFAULT; | ||
170 | fput(file); | ||
171 | out: | ||
172 | return error; | ||
173 | } | ||
174 | |||
175 | SYSCALL_DEFINE2(ustat, unsigned, dev, struct ustat __user *, ubuf) | ||
176 | { | ||
177 | struct super_block *s; | ||
178 | struct ustat tmp; | ||
179 | struct kstatfs sbuf; | ||
180 | int err; | ||
181 | |||
182 | s = user_get_super(new_decode_dev(dev)); | ||
183 | if (!s) | ||
184 | return -EINVAL; | ||
185 | |||
186 | err = vfs_statfs(s->s_root, &sbuf); | ||
187 | drop_super(s); | ||
188 | if (err) | ||
189 | return err; | ||
190 | |||
191 | memset(&tmp,0,sizeof(struct ustat)); | ||
192 | tmp.f_tfree = sbuf.f_bfree; | ||
193 | tmp.f_tinode = sbuf.f_ffree; | ||
194 | |||
195 | return copy_to_user(ubuf, &tmp, sizeof(struct ustat)) ? -EFAULT : 0; | ||
196 | } | ||
diff --git a/fs/super.c b/fs/super.c index 1527e6a0ee35..69688b15f1fa 100644 --- a/fs/super.c +++ b/fs/super.c | |||
@@ -22,23 +22,15 @@ | |||
22 | 22 | ||
23 | #include <linux/module.h> | 23 | #include <linux/module.h> |
24 | #include <linux/slab.h> | 24 | #include <linux/slab.h> |
25 | #include <linux/init.h> | ||
26 | #include <linux/smp_lock.h> | ||
27 | #include <linux/acct.h> | 25 | #include <linux/acct.h> |
28 | #include <linux/blkdev.h> | 26 | #include <linux/blkdev.h> |
29 | #include <linux/quotaops.h> | 27 | #include <linux/quotaops.h> |
30 | #include <linux/namei.h> | ||
31 | #include <linux/mount.h> | 28 | #include <linux/mount.h> |
32 | #include <linux/security.h> | 29 | #include <linux/security.h> |
33 | #include <linux/syscalls.h> | ||
34 | #include <linux/vfs.h> | ||
35 | #include <linux/writeback.h> /* for the emergency remount stuff */ | 30 | #include <linux/writeback.h> /* for the emergency remount stuff */ |
36 | #include <linux/idr.h> | 31 | #include <linux/idr.h> |
37 | #include <linux/kobject.h> | ||
38 | #include <linux/mutex.h> | 32 | #include <linux/mutex.h> |
39 | #include <linux/file.h> | ||
40 | #include <linux/backing-dev.h> | 33 | #include <linux/backing-dev.h> |
41 | #include <asm/uaccess.h> | ||
42 | #include "internal.h" | 34 | #include "internal.h" |
43 | 35 | ||
44 | 36 | ||
@@ -93,9 +85,10 @@ static struct super_block *alloc_super(struct file_system_type *type) | |||
93 | * subclass. | 85 | * subclass. |
94 | */ | 86 | */ |
95 | down_write_nested(&s->s_umount, SINGLE_DEPTH_NESTING); | 87 | down_write_nested(&s->s_umount, SINGLE_DEPTH_NESTING); |
96 | s->s_count = S_BIAS; | 88 | s->s_count = 1; |
97 | atomic_set(&s->s_active, 1); | 89 | atomic_set(&s->s_active, 1); |
98 | mutex_init(&s->s_vfs_rename_mutex); | 90 | mutex_init(&s->s_vfs_rename_mutex); |
91 | lockdep_set_class(&s->s_vfs_rename_mutex, &type->s_vfs_rename_key); | ||
99 | mutex_init(&s->s_dquot.dqio_mutex); | 92 | mutex_init(&s->s_dquot.dqio_mutex); |
100 | mutex_init(&s->s_dquot.dqonoff_mutex); | 93 | mutex_init(&s->s_dquot.dqonoff_mutex); |
101 | init_rwsem(&s->s_dquot.dqptr_sem); | 94 | init_rwsem(&s->s_dquot.dqptr_sem); |
@@ -127,39 +120,14 @@ static inline void destroy_super(struct super_block *s) | |||
127 | /* Superblock refcounting */ | 120 | /* Superblock refcounting */ |
128 | 121 | ||
129 | /* | 122 | /* |
130 | * Drop a superblock's refcount. Returns non-zero if the superblock was | 123 | * Drop a superblock's refcount. The caller must hold sb_lock. |
131 | * destroyed. The caller must hold sb_lock. | ||
132 | */ | 124 | */ |
133 | static int __put_super(struct super_block *sb) | 125 | void __put_super(struct super_block *sb) |
134 | { | 126 | { |
135 | int ret = 0; | ||
136 | |||
137 | if (!--sb->s_count) { | 127 | if (!--sb->s_count) { |
128 | list_del_init(&sb->s_list); | ||
138 | destroy_super(sb); | 129 | destroy_super(sb); |
139 | ret = 1; | ||
140 | } | 130 | } |
141 | return ret; | ||
142 | } | ||
143 | |||
144 | /* | ||
145 | * Drop a superblock's refcount. | ||
146 | * Returns non-zero if the superblock is about to be destroyed and | ||
147 | * at least is already removed from super_blocks list, so if we are | ||
148 | * making a loop through super blocks then we need to restart. | ||
149 | * The caller must hold sb_lock. | ||
150 | */ | ||
151 | int __put_super_and_need_restart(struct super_block *sb) | ||
152 | { | ||
153 | /* check for race with generic_shutdown_super() */ | ||
154 | if (list_empty(&sb->s_list)) { | ||
155 | /* super block is removed, need to restart... */ | ||
156 | __put_super(sb); | ||
157 | return 1; | ||
158 | } | ||
159 | /* can't be the last, since s_list is still in use */ | ||
160 | sb->s_count--; | ||
161 | BUG_ON(sb->s_count == 0); | ||
162 | return 0; | ||
163 | } | 131 | } |
164 | 132 | ||
165 | /** | 133 | /** |
@@ -178,57 +146,48 @@ void put_super(struct super_block *sb) | |||
178 | 146 | ||
179 | 147 | ||
180 | /** | 148 | /** |
181 | * deactivate_super - drop an active reference to superblock | 149 | * deactivate_locked_super - drop an active reference to superblock |
182 | * @s: superblock to deactivate | 150 | * @s: superblock to deactivate |
183 | * | 151 | * |
184 | * Drops an active reference to superblock, acquiring a temprory one if | 152 | * Drops an active reference to superblock, converting it into a temprory |
185 | * there is no active references left. In that case we lock superblock, | 153 | * one if there is no other active references left. In that case we |
186 | * tell fs driver to shut it down and drop the temporary reference we | 154 | * tell fs driver to shut it down and drop the temporary reference we |
187 | * had just acquired. | 155 | * had just acquired. |
156 | * | ||
157 | * Caller holds exclusive lock on superblock; that lock is released. | ||
188 | */ | 158 | */ |
189 | void deactivate_super(struct super_block *s) | 159 | void deactivate_locked_super(struct super_block *s) |
190 | { | 160 | { |
191 | struct file_system_type *fs = s->s_type; | 161 | struct file_system_type *fs = s->s_type; |
192 | if (atomic_dec_and_lock(&s->s_active, &sb_lock)) { | 162 | if (atomic_dec_and_test(&s->s_active)) { |
193 | s->s_count -= S_BIAS-1; | ||
194 | spin_unlock(&sb_lock); | ||
195 | vfs_dq_off(s, 0); | 163 | vfs_dq_off(s, 0); |
196 | down_write(&s->s_umount); | ||
197 | fs->kill_sb(s); | 164 | fs->kill_sb(s); |
198 | put_filesystem(fs); | 165 | put_filesystem(fs); |
199 | put_super(s); | 166 | put_super(s); |
167 | } else { | ||
168 | up_write(&s->s_umount); | ||
200 | } | 169 | } |
201 | } | 170 | } |
202 | 171 | ||
203 | EXPORT_SYMBOL(deactivate_super); | 172 | EXPORT_SYMBOL(deactivate_locked_super); |
204 | 173 | ||
205 | /** | 174 | /** |
206 | * deactivate_locked_super - drop an active reference to superblock | 175 | * deactivate_super - drop an active reference to superblock |
207 | * @s: superblock to deactivate | 176 | * @s: superblock to deactivate |
208 | * | 177 | * |
209 | * Equivalent of up_write(&s->s_umount); deactivate_super(s);, except that | 178 | * Variant of deactivate_locked_super(), except that superblock is *not* |
210 | * it does not unlock it until it's all over. As the result, it's safe to | 179 | * locked by caller. If we are going to drop the final active reference, |
211 | * use to dispose of new superblock on ->get_sb() failure exits - nobody | 180 | * lock will be acquired prior to that. |
212 | * will see the sucker until it's all over. Equivalent using up_write + | ||
213 | * deactivate_super is safe for that purpose only if superblock is either | ||
214 | * safe to use or has NULL ->s_root when we unlock. | ||
215 | */ | 181 | */ |
216 | void deactivate_locked_super(struct super_block *s) | 182 | void deactivate_super(struct super_block *s) |
217 | { | 183 | { |
218 | struct file_system_type *fs = s->s_type; | 184 | if (!atomic_add_unless(&s->s_active, -1, 1)) { |
219 | if (atomic_dec_and_lock(&s->s_active, &sb_lock)) { | 185 | down_write(&s->s_umount); |
220 | s->s_count -= S_BIAS-1; | 186 | deactivate_locked_super(s); |
221 | spin_unlock(&sb_lock); | ||
222 | vfs_dq_off(s, 0); | ||
223 | fs->kill_sb(s); | ||
224 | put_filesystem(fs); | ||
225 | put_super(s); | ||
226 | } else { | ||
227 | up_write(&s->s_umount); | ||
228 | } | 187 | } |
229 | } | 188 | } |
230 | 189 | ||
231 | EXPORT_SYMBOL(deactivate_locked_super); | 190 | EXPORT_SYMBOL(deactivate_super); |
232 | 191 | ||
233 | /** | 192 | /** |
234 | * grab_super - acquire an active reference | 193 | * grab_super - acquire an active reference |
@@ -243,22 +202,17 @@ EXPORT_SYMBOL(deactivate_locked_super); | |||
243 | */ | 202 | */ |
244 | static int grab_super(struct super_block *s) __releases(sb_lock) | 203 | static int grab_super(struct super_block *s) __releases(sb_lock) |
245 | { | 204 | { |
205 | if (atomic_inc_not_zero(&s->s_active)) { | ||
206 | spin_unlock(&sb_lock); | ||
207 | return 1; | ||
208 | } | ||
209 | /* it's going away */ | ||
246 | s->s_count++; | 210 | s->s_count++; |
247 | spin_unlock(&sb_lock); | 211 | spin_unlock(&sb_lock); |
212 | /* wait for it to die */ | ||
248 | down_write(&s->s_umount); | 213 | down_write(&s->s_umount); |
249 | if (s->s_root) { | ||
250 | spin_lock(&sb_lock); | ||
251 | if (s->s_count > S_BIAS) { | ||
252 | atomic_inc(&s->s_active); | ||
253 | s->s_count--; | ||
254 | spin_unlock(&sb_lock); | ||
255 | return 1; | ||
256 | } | ||
257 | spin_unlock(&sb_lock); | ||
258 | } | ||
259 | up_write(&s->s_umount); | 214 | up_write(&s->s_umount); |
260 | put_super(s); | 215 | put_super(s); |
261 | yield(); | ||
262 | return 0; | 216 | return 0; |
263 | } | 217 | } |
264 | 218 | ||
@@ -321,8 +275,7 @@ void generic_shutdown_super(struct super_block *sb) | |||
321 | } | 275 | } |
322 | spin_lock(&sb_lock); | 276 | spin_lock(&sb_lock); |
323 | /* should be initialized for __put_super_and_need_restart() */ | 277 | /* should be initialized for __put_super_and_need_restart() */ |
324 | list_del_init(&sb->s_list); | 278 | list_del_init(&sb->s_instances); |
325 | list_del(&sb->s_instances); | ||
326 | spin_unlock(&sb_lock); | 279 | spin_unlock(&sb_lock); |
327 | up_write(&sb->s_umount); | 280 | up_write(&sb->s_umount); |
328 | } | 281 | } |
@@ -357,6 +310,7 @@ retry: | |||
357 | up_write(&s->s_umount); | 310 | up_write(&s->s_umount); |
358 | destroy_super(s); | 311 | destroy_super(s); |
359 | } | 312 | } |
313 | down_write(&old->s_umount); | ||
360 | return old; | 314 | return old; |
361 | } | 315 | } |
362 | } | 316 | } |
@@ -408,11 +362,12 @@ EXPORT_SYMBOL(drop_super); | |||
408 | */ | 362 | */ |
409 | void sync_supers(void) | 363 | void sync_supers(void) |
410 | { | 364 | { |
411 | struct super_block *sb; | 365 | struct super_block *sb, *n; |
412 | 366 | ||
413 | spin_lock(&sb_lock); | 367 | spin_lock(&sb_lock); |
414 | restart: | 368 | list_for_each_entry_safe(sb, n, &super_blocks, s_list) { |
415 | list_for_each_entry(sb, &super_blocks, s_list) { | 369 | if (list_empty(&sb->s_instances)) |
370 | continue; | ||
416 | if (sb->s_op->write_super && sb->s_dirt) { | 371 | if (sb->s_op->write_super && sb->s_dirt) { |
417 | sb->s_count++; | 372 | sb->s_count++; |
418 | spin_unlock(&sb_lock); | 373 | spin_unlock(&sb_lock); |
@@ -423,14 +378,43 @@ restart: | |||
423 | up_read(&sb->s_umount); | 378 | up_read(&sb->s_umount); |
424 | 379 | ||
425 | spin_lock(&sb_lock); | 380 | spin_lock(&sb_lock); |
426 | if (__put_super_and_need_restart(sb)) | 381 | __put_super(sb); |
427 | goto restart; | ||
428 | } | 382 | } |
429 | } | 383 | } |
430 | spin_unlock(&sb_lock); | 384 | spin_unlock(&sb_lock); |
431 | } | 385 | } |
432 | 386 | ||
433 | /** | 387 | /** |
388 | * iterate_supers - call function for all active superblocks | ||
389 | * @f: function to call | ||
390 | * @arg: argument to pass to it | ||
391 | * | ||
392 | * Scans the superblock list and calls given function, passing it | ||
393 | * locked superblock and given argument. | ||
394 | */ | ||
395 | void iterate_supers(void (*f)(struct super_block *, void *), void *arg) | ||
396 | { | ||
397 | struct super_block *sb, *n; | ||
398 | |||
399 | spin_lock(&sb_lock); | ||
400 | list_for_each_entry_safe(sb, n, &super_blocks, s_list) { | ||
401 | if (list_empty(&sb->s_instances)) | ||
402 | continue; | ||
403 | sb->s_count++; | ||
404 | spin_unlock(&sb_lock); | ||
405 | |||
406 | down_read(&sb->s_umount); | ||
407 | if (sb->s_root) | ||
408 | f(sb, arg); | ||
409 | up_read(&sb->s_umount); | ||
410 | |||
411 | spin_lock(&sb_lock); | ||
412 | __put_super(sb); | ||
413 | } | ||
414 | spin_unlock(&sb_lock); | ||
415 | } | ||
416 | |||
417 | /** | ||
434 | * get_super - get the superblock of a device | 418 | * get_super - get the superblock of a device |
435 | * @bdev: device to get the superblock for | 419 | * @bdev: device to get the superblock for |
436 | * | 420 | * |
@@ -438,7 +422,7 @@ restart: | |||
438 | * mounted on the device given. %NULL is returned if no match is found. | 422 | * mounted on the device given. %NULL is returned if no match is found. |
439 | */ | 423 | */ |
440 | 424 | ||
441 | struct super_block * get_super(struct block_device *bdev) | 425 | struct super_block *get_super(struct block_device *bdev) |
442 | { | 426 | { |
443 | struct super_block *sb; | 427 | struct super_block *sb; |
444 | 428 | ||
@@ -448,17 +432,20 @@ struct super_block * get_super(struct block_device *bdev) | |||
448 | spin_lock(&sb_lock); | 432 | spin_lock(&sb_lock); |
449 | rescan: | 433 | rescan: |
450 | list_for_each_entry(sb, &super_blocks, s_list) { | 434 | list_for_each_entry(sb, &super_blocks, s_list) { |
435 | if (list_empty(&sb->s_instances)) | ||
436 | continue; | ||
451 | if (sb->s_bdev == bdev) { | 437 | if (sb->s_bdev == bdev) { |
452 | sb->s_count++; | 438 | sb->s_count++; |
453 | spin_unlock(&sb_lock); | 439 | spin_unlock(&sb_lock); |
454 | down_read(&sb->s_umount); | 440 | down_read(&sb->s_umount); |
441 | /* still alive? */ | ||
455 | if (sb->s_root) | 442 | if (sb->s_root) |
456 | return sb; | 443 | return sb; |
457 | up_read(&sb->s_umount); | 444 | up_read(&sb->s_umount); |
458 | /* restart only when sb is no longer on the list */ | 445 | /* nope, got unmounted */ |
459 | spin_lock(&sb_lock); | 446 | spin_lock(&sb_lock); |
460 | if (__put_super_and_need_restart(sb)) | 447 | __put_super(sb); |
461 | goto rescan; | 448 | goto rescan; |
462 | } | 449 | } |
463 | } | 450 | } |
464 | spin_unlock(&sb_lock); | 451 | spin_unlock(&sb_lock); |
@@ -473,7 +460,7 @@ EXPORT_SYMBOL(get_super); | |||
473 | * | 460 | * |
474 | * Scans the superblock list and finds the superblock of the file system | 461 | * Scans the superblock list and finds the superblock of the file system |
475 | * mounted on the device given. Returns the superblock with an active | 462 | * mounted on the device given. Returns the superblock with an active |
476 | * reference and s_umount held exclusively or %NULL if none was found. | 463 | * reference or %NULL if none was found. |
477 | */ | 464 | */ |
478 | struct super_block *get_active_super(struct block_device *bdev) | 465 | struct super_block *get_active_super(struct block_device *bdev) |
479 | { | 466 | { |
@@ -482,81 +469,49 @@ struct super_block *get_active_super(struct block_device *bdev) | |||
482 | if (!bdev) | 469 | if (!bdev) |
483 | return NULL; | 470 | return NULL; |
484 | 471 | ||
472 | restart: | ||
485 | spin_lock(&sb_lock); | 473 | spin_lock(&sb_lock); |
486 | list_for_each_entry(sb, &super_blocks, s_list) { | 474 | list_for_each_entry(sb, &super_blocks, s_list) { |
487 | if (sb->s_bdev != bdev) | 475 | if (list_empty(&sb->s_instances)) |
488 | continue; | 476 | continue; |
489 | 477 | if (sb->s_bdev == bdev) { | |
490 | sb->s_count++; | 478 | if (grab_super(sb)) /* drops sb_lock */ |
491 | spin_unlock(&sb_lock); | ||
492 | down_write(&sb->s_umount); | ||
493 | if (sb->s_root) { | ||
494 | spin_lock(&sb_lock); | ||
495 | if (sb->s_count > S_BIAS) { | ||
496 | atomic_inc(&sb->s_active); | ||
497 | sb->s_count--; | ||
498 | spin_unlock(&sb_lock); | ||
499 | return sb; | 479 | return sb; |
500 | } | 480 | else |
501 | spin_unlock(&sb_lock); | 481 | goto restart; |
502 | } | 482 | } |
503 | up_write(&sb->s_umount); | ||
504 | put_super(sb); | ||
505 | yield(); | ||
506 | spin_lock(&sb_lock); | ||
507 | } | 483 | } |
508 | spin_unlock(&sb_lock); | 484 | spin_unlock(&sb_lock); |
509 | return NULL; | 485 | return NULL; |
510 | } | 486 | } |
511 | 487 | ||
512 | struct super_block * user_get_super(dev_t dev) | 488 | struct super_block *user_get_super(dev_t dev) |
513 | { | 489 | { |
514 | struct super_block *sb; | 490 | struct super_block *sb; |
515 | 491 | ||
516 | spin_lock(&sb_lock); | 492 | spin_lock(&sb_lock); |
517 | rescan: | 493 | rescan: |
518 | list_for_each_entry(sb, &super_blocks, s_list) { | 494 | list_for_each_entry(sb, &super_blocks, s_list) { |
495 | if (list_empty(&sb->s_instances)) | ||
496 | continue; | ||
519 | if (sb->s_dev == dev) { | 497 | if (sb->s_dev == dev) { |
520 | sb->s_count++; | 498 | sb->s_count++; |
521 | spin_unlock(&sb_lock); | 499 | spin_unlock(&sb_lock); |
522 | down_read(&sb->s_umount); | 500 | down_read(&sb->s_umount); |
501 | /* still alive? */ | ||
523 | if (sb->s_root) | 502 | if (sb->s_root) |
524 | return sb; | 503 | return sb; |
525 | up_read(&sb->s_umount); | 504 | up_read(&sb->s_umount); |
526 | /* restart only when sb is no longer on the list */ | 505 | /* nope, got unmounted */ |
527 | spin_lock(&sb_lock); | 506 | spin_lock(&sb_lock); |
528 | if (__put_super_and_need_restart(sb)) | 507 | __put_super(sb); |
529 | goto rescan; | 508 | goto rescan; |
530 | } | 509 | } |
531 | } | 510 | } |
532 | spin_unlock(&sb_lock); | 511 | spin_unlock(&sb_lock); |
533 | return NULL; | 512 | return NULL; |
534 | } | 513 | } |
535 | 514 | ||
536 | SYSCALL_DEFINE2(ustat, unsigned, dev, struct ustat __user *, ubuf) | ||
537 | { | ||
538 | struct super_block *s; | ||
539 | struct ustat tmp; | ||
540 | struct kstatfs sbuf; | ||
541 | int err = -EINVAL; | ||
542 | |||
543 | s = user_get_super(new_decode_dev(dev)); | ||
544 | if (s == NULL) | ||
545 | goto out; | ||
546 | err = vfs_statfs(s->s_root, &sbuf); | ||
547 | drop_super(s); | ||
548 | if (err) | ||
549 | goto out; | ||
550 | |||
551 | memset(&tmp,0,sizeof(struct ustat)); | ||
552 | tmp.f_tfree = sbuf.f_bfree; | ||
553 | tmp.f_tinode = sbuf.f_ffree; | ||
554 | |||
555 | err = copy_to_user(ubuf,&tmp,sizeof(struct ustat)) ? -EFAULT : 0; | ||
556 | out: | ||
557 | return err; | ||
558 | } | ||
559 | |||
560 | /** | 515 | /** |
561 | * do_remount_sb - asks filesystem to change mount options. | 516 | * do_remount_sb - asks filesystem to change mount options. |
562 | * @sb: superblock in question | 517 | * @sb: superblock in question |
@@ -622,24 +577,24 @@ int do_remount_sb(struct super_block *sb, int flags, void *data, int force) | |||
622 | 577 | ||
623 | static void do_emergency_remount(struct work_struct *work) | 578 | static void do_emergency_remount(struct work_struct *work) |
624 | { | 579 | { |
625 | struct super_block *sb; | 580 | struct super_block *sb, *n; |
626 | 581 | ||
627 | spin_lock(&sb_lock); | 582 | spin_lock(&sb_lock); |
628 | list_for_each_entry(sb, &super_blocks, s_list) { | 583 | list_for_each_entry_safe(sb, n, &super_blocks, s_list) { |
584 | if (list_empty(&sb->s_instances)) | ||
585 | continue; | ||
629 | sb->s_count++; | 586 | sb->s_count++; |
630 | spin_unlock(&sb_lock); | 587 | spin_unlock(&sb_lock); |
631 | down_write(&sb->s_umount); | 588 | down_write(&sb->s_umount); |
632 | if (sb->s_root && sb->s_bdev && !(sb->s_flags & MS_RDONLY)) { | 589 | if (sb->s_root && sb->s_bdev && !(sb->s_flags & MS_RDONLY)) { |
633 | /* | 590 | /* |
634 | * ->remount_fs needs lock_kernel(). | ||
635 | * | ||
636 | * What lock protects sb->s_flags?? | 591 | * What lock protects sb->s_flags?? |
637 | */ | 592 | */ |
638 | do_remount_sb(sb, MS_RDONLY, NULL, 1); | 593 | do_remount_sb(sb, MS_RDONLY, NULL, 1); |
639 | } | 594 | } |
640 | up_write(&sb->s_umount); | 595 | up_write(&sb->s_umount); |
641 | put_super(sb); | ||
642 | spin_lock(&sb_lock); | 596 | spin_lock(&sb_lock); |
597 | __put_super(sb); | ||
643 | } | 598 | } |
644 | spin_unlock(&sb_lock); | 599 | spin_unlock(&sb_lock); |
645 | kfree(work); | 600 | kfree(work); |
@@ -990,6 +945,96 @@ out: | |||
990 | 945 | ||
991 | EXPORT_SYMBOL_GPL(vfs_kern_mount); | 946 | EXPORT_SYMBOL_GPL(vfs_kern_mount); |
992 | 947 | ||
948 | /** | ||
949 | * freeze_super -- lock the filesystem and force it into a consistent state | ||
950 | * @super: the super to lock | ||
951 | * | ||
952 | * Syncs the super to make sure the filesystem is consistent and calls the fs's | ||
953 | * freeze_fs. Subsequent calls to this without first thawing the fs will return | ||
954 | * -EBUSY. | ||
955 | */ | ||
956 | int freeze_super(struct super_block *sb) | ||
957 | { | ||
958 | int ret; | ||
959 | |||
960 | atomic_inc(&sb->s_active); | ||
961 | down_write(&sb->s_umount); | ||
962 | if (sb->s_frozen) { | ||
963 | deactivate_locked_super(sb); | ||
964 | return -EBUSY; | ||
965 | } | ||
966 | |||
967 | if (sb->s_flags & MS_RDONLY) { | ||
968 | sb->s_frozen = SB_FREEZE_TRANS; | ||
969 | smp_wmb(); | ||
970 | up_write(&sb->s_umount); | ||
971 | return 0; | ||
972 | } | ||
973 | |||
974 | sb->s_frozen = SB_FREEZE_WRITE; | ||
975 | smp_wmb(); | ||
976 | |||
977 | sync_filesystem(sb); | ||
978 | |||
979 | sb->s_frozen = SB_FREEZE_TRANS; | ||
980 | smp_wmb(); | ||
981 | |||
982 | sync_blockdev(sb->s_bdev); | ||
983 | if (sb->s_op->freeze_fs) { | ||
984 | ret = sb->s_op->freeze_fs(sb); | ||
985 | if (ret) { | ||
986 | printk(KERN_ERR | ||
987 | "VFS:Filesystem freeze failed\n"); | ||
988 | sb->s_frozen = SB_UNFROZEN; | ||
989 | deactivate_locked_super(sb); | ||
990 | return ret; | ||
991 | } | ||
992 | } | ||
993 | up_write(&sb->s_umount); | ||
994 | return 0; | ||
995 | } | ||
996 | EXPORT_SYMBOL(freeze_super); | ||
997 | |||
998 | /** | ||
999 | * thaw_super -- unlock filesystem | ||
1000 | * @sb: the super to thaw | ||
1001 | * | ||
1002 | * Unlocks the filesystem and marks it writeable again after freeze_super(). | ||
1003 | */ | ||
1004 | int thaw_super(struct super_block *sb) | ||
1005 | { | ||
1006 | int error; | ||
1007 | |||
1008 | down_write(&sb->s_umount); | ||
1009 | if (sb->s_frozen == SB_UNFROZEN) { | ||
1010 | up_write(&sb->s_umount); | ||
1011 | return -EINVAL; | ||
1012 | } | ||
1013 | |||
1014 | if (sb->s_flags & MS_RDONLY) | ||
1015 | goto out; | ||
1016 | |||
1017 | if (sb->s_op->unfreeze_fs) { | ||
1018 | error = sb->s_op->unfreeze_fs(sb); | ||
1019 | if (error) { | ||
1020 | printk(KERN_ERR | ||
1021 | "VFS:Filesystem thaw failed\n"); | ||
1022 | sb->s_frozen = SB_FREEZE_TRANS; | ||
1023 | up_write(&sb->s_umount); | ||
1024 | return error; | ||
1025 | } | ||
1026 | } | ||
1027 | |||
1028 | out: | ||
1029 | sb->s_frozen = SB_UNFROZEN; | ||
1030 | smp_wmb(); | ||
1031 | wake_up(&sb->s_wait_unfrozen); | ||
1032 | deactivate_locked_super(sb); | ||
1033 | |||
1034 | return 0; | ||
1035 | } | ||
1036 | EXPORT_SYMBOL(thaw_super); | ||
1037 | |||
993 | static struct vfsmount *fs_set_subtype(struct vfsmount *mnt, const char *fstype) | 1038 | static struct vfsmount *fs_set_subtype(struct vfsmount *mnt, const char *fstype) |
994 | { | 1039 | { |
995 | int err; | 1040 | int err; |
@@ -42,7 +42,7 @@ static int __sync_filesystem(struct super_block *sb, int wait) | |||
42 | if (wait) | 42 | if (wait) |
43 | sync_inodes_sb(sb); | 43 | sync_inodes_sb(sb); |
44 | else | 44 | else |
45 | writeback_inodes_sb(sb); | 45 | writeback_inodes_sb_locked(sb); |
46 | 46 | ||
47 | if (sb->s_op->sync_fs) | 47 | if (sb->s_op->sync_fs) |
48 | sb->s_op->sync_fs(sb, wait); | 48 | sb->s_op->sync_fs(sb, wait); |
@@ -77,50 +77,18 @@ int sync_filesystem(struct super_block *sb) | |||
77 | } | 77 | } |
78 | EXPORT_SYMBOL_GPL(sync_filesystem); | 78 | EXPORT_SYMBOL_GPL(sync_filesystem); |
79 | 79 | ||
80 | static void sync_one_sb(struct super_block *sb, void *arg) | ||
81 | { | ||
82 | if (!(sb->s_flags & MS_RDONLY) && sb->s_bdi) | ||
83 | __sync_filesystem(sb, *(int *)arg); | ||
84 | } | ||
80 | /* | 85 | /* |
81 | * Sync all the data for all the filesystems (called by sys_sync() and | 86 | * Sync all the data for all the filesystems (called by sys_sync() and |
82 | * emergency sync) | 87 | * emergency sync) |
83 | * | ||
84 | * This operation is careful to avoid the livelock which could easily happen | ||
85 | * if two or more filesystems are being continuously dirtied. s_need_sync | ||
86 | * is used only here. We set it against all filesystems and then clear it as | ||
87 | * we sync them. So redirtied filesystems are skipped. | ||
88 | * | ||
89 | * But if process A is currently running sync_filesystems and then process B | ||
90 | * calls sync_filesystems as well, process B will set all the s_need_sync | ||
91 | * flags again, which will cause process A to resync everything. Fix that with | ||
92 | * a local mutex. | ||
93 | */ | 88 | */ |
94 | static void sync_filesystems(int wait) | 89 | static void sync_filesystems(int wait) |
95 | { | 90 | { |
96 | struct super_block *sb; | 91 | iterate_supers(sync_one_sb, &wait); |
97 | static DEFINE_MUTEX(mutex); | ||
98 | |||
99 | mutex_lock(&mutex); /* Could be down_interruptible */ | ||
100 | spin_lock(&sb_lock); | ||
101 | list_for_each_entry(sb, &super_blocks, s_list) | ||
102 | sb->s_need_sync = 1; | ||
103 | |||
104 | restart: | ||
105 | list_for_each_entry(sb, &super_blocks, s_list) { | ||
106 | if (!sb->s_need_sync) | ||
107 | continue; | ||
108 | sb->s_need_sync = 0; | ||
109 | sb->s_count++; | ||
110 | spin_unlock(&sb_lock); | ||
111 | |||
112 | down_read(&sb->s_umount); | ||
113 | if (!(sb->s_flags & MS_RDONLY) && sb->s_root && sb->s_bdi) | ||
114 | __sync_filesystem(sb, wait); | ||
115 | up_read(&sb->s_umount); | ||
116 | |||
117 | /* restart only when sb is no longer on the list */ | ||
118 | spin_lock(&sb_lock); | ||
119 | if (__put_super_and_need_restart(sb)) | ||
120 | goto restart; | ||
121 | } | ||
122 | spin_unlock(&sb_lock); | ||
123 | mutex_unlock(&mutex); | ||
124 | } | 92 | } |
125 | 93 | ||
126 | /* | 94 | /* |
@@ -190,7 +158,6 @@ EXPORT_SYMBOL(file_fsync); | |||
190 | /** | 158 | /** |
191 | * vfs_fsync_range - helper to sync a range of data & metadata to disk | 159 | * vfs_fsync_range - helper to sync a range of data & metadata to disk |
192 | * @file: file to sync | 160 | * @file: file to sync |
193 | * @dentry: dentry of @file | ||
194 | * @start: offset in bytes of the beginning of data range to sync | 161 | * @start: offset in bytes of the beginning of data range to sync |
195 | * @end: offset in bytes of the end of data range (inclusive) | 162 | * @end: offset in bytes of the end of data range (inclusive) |
196 | * @datasync: perform only datasync | 163 | * @datasync: perform only datasync |
@@ -198,32 +165,13 @@ EXPORT_SYMBOL(file_fsync); | |||
198 | * Write back data in range @start..@end and metadata for @file to disk. If | 165 | * Write back data in range @start..@end and metadata for @file to disk. If |
199 | * @datasync is set only metadata needed to access modified file data is | 166 | * @datasync is set only metadata needed to access modified file data is |
200 | * written. | 167 | * written. |
201 | * | ||
202 | * In case this function is called from nfsd @file may be %NULL and | ||
203 | * only @dentry is set. This can only happen when the filesystem | ||
204 | * implements the export_operations API. | ||
205 | */ | 168 | */ |
206 | int vfs_fsync_range(struct file *file, struct dentry *dentry, loff_t start, | 169 | int vfs_fsync_range(struct file *file, loff_t start, loff_t end, int datasync) |
207 | loff_t end, int datasync) | ||
208 | { | 170 | { |
209 | const struct file_operations *fop; | 171 | struct address_space *mapping = file->f_mapping; |
210 | struct address_space *mapping; | ||
211 | int err, ret; | 172 | int err, ret; |
212 | 173 | ||
213 | /* | 174 | if (!file->f_op || !file->f_op->fsync) { |
214 | * Get mapping and operations from the file in case we have | ||
215 | * as file, or get the default values for them in case we | ||
216 | * don't have a struct file available. Damn nfsd.. | ||
217 | */ | ||
218 | if (file) { | ||
219 | mapping = file->f_mapping; | ||
220 | fop = file->f_op; | ||
221 | } else { | ||
222 | mapping = dentry->d_inode->i_mapping; | ||
223 | fop = dentry->d_inode->i_fop; | ||
224 | } | ||
225 | |||
226 | if (!fop || !fop->fsync) { | ||
227 | ret = -EINVAL; | 175 | ret = -EINVAL; |
228 | goto out; | 176 | goto out; |
229 | } | 177 | } |
@@ -235,7 +183,7 @@ int vfs_fsync_range(struct file *file, struct dentry *dentry, loff_t start, | |||
235 | * livelocks in fsync_buffers_list(). | 183 | * livelocks in fsync_buffers_list(). |
236 | */ | 184 | */ |
237 | mutex_lock(&mapping->host->i_mutex); | 185 | mutex_lock(&mapping->host->i_mutex); |
238 | err = fop->fsync(file, dentry, datasync); | 186 | err = file->f_op->fsync(file, file->f_path.dentry, datasync); |
239 | if (!ret) | 187 | if (!ret) |
240 | ret = err; | 188 | ret = err; |
241 | mutex_unlock(&mapping->host->i_mutex); | 189 | mutex_unlock(&mapping->host->i_mutex); |
@@ -248,19 +196,14 @@ EXPORT_SYMBOL(vfs_fsync_range); | |||
248 | /** | 196 | /** |
249 | * vfs_fsync - perform a fsync or fdatasync on a file | 197 | * vfs_fsync - perform a fsync or fdatasync on a file |
250 | * @file: file to sync | 198 | * @file: file to sync |
251 | * @dentry: dentry of @file | ||
252 | * @datasync: only perform a fdatasync operation | 199 | * @datasync: only perform a fdatasync operation |
253 | * | 200 | * |
254 | * Write back data and metadata for @file to disk. If @datasync is | 201 | * Write back data and metadata for @file to disk. If @datasync is |
255 | * set only metadata needed to access modified file data is written. | 202 | * set only metadata needed to access modified file data is written. |
256 | * | ||
257 | * In case this function is called from nfsd @file may be %NULL and | ||
258 | * only @dentry is set. This can only happen when the filesystem | ||
259 | * implements the export_operations API. | ||
260 | */ | 203 | */ |
261 | int vfs_fsync(struct file *file, struct dentry *dentry, int datasync) | 204 | int vfs_fsync(struct file *file, int datasync) |
262 | { | 205 | { |
263 | return vfs_fsync_range(file, dentry, 0, LLONG_MAX, datasync); | 206 | return vfs_fsync_range(file, 0, LLONG_MAX, datasync); |
264 | } | 207 | } |
265 | EXPORT_SYMBOL(vfs_fsync); | 208 | EXPORT_SYMBOL(vfs_fsync); |
266 | 209 | ||
@@ -271,7 +214,7 @@ static int do_fsync(unsigned int fd, int datasync) | |||
271 | 214 | ||
272 | file = fget(fd); | 215 | file = fget(fd); |
273 | if (file) { | 216 | if (file) { |
274 | ret = vfs_fsync(file, file->f_path.dentry, datasync); | 217 | ret = vfs_fsync(file, datasync); |
275 | fput(file); | 218 | fput(file); |
276 | } | 219 | } |
277 | return ret; | 220 | return ret; |
@@ -299,8 +242,7 @@ int generic_write_sync(struct file *file, loff_t pos, loff_t count) | |||
299 | { | 242 | { |
300 | if (!(file->f_flags & O_DSYNC) && !IS_SYNC(file->f_mapping->host)) | 243 | if (!(file->f_flags & O_DSYNC) && !IS_SYNC(file->f_mapping->host)) |
301 | return 0; | 244 | return 0; |
302 | return vfs_fsync_range(file, file->f_path.dentry, pos, | 245 | return vfs_fsync_range(file, pos, pos + count - 1, |
303 | pos + count - 1, | ||
304 | (file->f_flags & __O_SYNC) ? 0 : 1); | 246 | (file->f_flags & __O_SYNC) ? 0 : 1); |
305 | } | 247 | } |
306 | EXPORT_SYMBOL(generic_write_sync); | 248 | EXPORT_SYMBOL(generic_write_sync); |
diff --git a/fs/sysv/ialloc.c b/fs/sysv/ialloc.c index 241e9765cfad..bbd69bdb0fa8 100644 --- a/fs/sysv/ialloc.c +++ b/fs/sysv/ialloc.c | |||
@@ -159,15 +159,7 @@ struct inode * sysv_new_inode(const struct inode * dir, mode_t mode) | |||
159 | *sbi->s_sb_fic_count = cpu_to_fs16(sbi, count); | 159 | *sbi->s_sb_fic_count = cpu_to_fs16(sbi, count); |
160 | fs16_add(sbi, sbi->s_sb_total_free_inodes, -1); | 160 | fs16_add(sbi, sbi->s_sb_total_free_inodes, -1); |
161 | dirty_sb(sb); | 161 | dirty_sb(sb); |
162 | 162 | inode_init_owner(inode, dir, mode); | |
163 | if (dir->i_mode & S_ISGID) { | ||
164 | inode->i_gid = dir->i_gid; | ||
165 | if (S_ISDIR(mode)) | ||
166 | mode |= S_ISGID; | ||
167 | } else | ||
168 | inode->i_gid = current_fsgid(); | ||
169 | |||
170 | inode->i_uid = current_fsuid(); | ||
171 | inode->i_ino = fs16_to_cpu(sbi, ino); | 163 | inode->i_ino = fs16_to_cpu(sbi, ino); |
172 | inode->i_mtime = inode->i_atime = inode->i_ctime = CURRENT_TIME_SEC; | 164 | inode->i_mtime = inode->i_atime = inode->i_ctime = CURRENT_TIME_SEC; |
173 | inode->i_blocks = 0; | 165 | inode->i_blocks = 0; |
@@ -176,7 +168,6 @@ struct inode * sysv_new_inode(const struct inode * dir, mode_t mode) | |||
176 | insert_inode_hash(inode); | 168 | insert_inode_hash(inode); |
177 | mark_inode_dirty(inode); | 169 | mark_inode_dirty(inode); |
178 | 170 | ||
179 | inode->i_mode = mode; /* for sysv_write_inode() */ | ||
180 | sysv_write_inode(inode, 0); /* ensure inode not allocated again */ | 171 | sysv_write_inode(inode, 0); /* ensure inode not allocated again */ |
181 | mark_inode_dirty(inode); /* cleared by sysv_write_inode() */ | 172 | mark_inode_dirty(inode); /* cleared by sysv_write_inode() */ |
182 | /* That's it. */ | 173 | /* That's it. */ |
diff --git a/fs/ubifs/dir.c b/fs/ubifs/dir.c index 401e503d44a1..87ebcce72213 100644 --- a/fs/ubifs/dir.c +++ b/fs/ubifs/dir.c | |||
@@ -104,14 +104,7 @@ struct inode *ubifs_new_inode(struct ubifs_info *c, const struct inode *dir, | |||
104 | */ | 104 | */ |
105 | inode->i_flags |= (S_NOCMTIME); | 105 | inode->i_flags |= (S_NOCMTIME); |
106 | 106 | ||
107 | inode->i_uid = current_fsuid(); | 107 | inode_init_owner(inode, dir, mode); |
108 | if (dir->i_mode & S_ISGID) { | ||
109 | inode->i_gid = dir->i_gid; | ||
110 | if (S_ISDIR(mode)) | ||
111 | mode |= S_ISGID; | ||
112 | } else | ||
113 | inode->i_gid = current_fsgid(); | ||
114 | inode->i_mode = mode; | ||
115 | inode->i_mtime = inode->i_atime = inode->i_ctime = | 108 | inode->i_mtime = inode->i_atime = inode->i_ctime = |
116 | ubifs_current_time(inode); | 109 | ubifs_current_time(inode); |
117 | inode->i_mapping->nrpages = 0; | 110 | inode->i_mapping->nrpages = 0; |
diff --git a/fs/udf/ialloc.c b/fs/udf/ialloc.c index fb68c9cd0c3e..2b5586c7f02a 100644 --- a/fs/udf/ialloc.c +++ b/fs/udf/ialloc.c | |||
@@ -124,15 +124,8 @@ struct inode *udf_new_inode(struct inode *dir, int mode, int *err) | |||
124 | udf_updated_lvid(sb); | 124 | udf_updated_lvid(sb); |
125 | } | 125 | } |
126 | mutex_unlock(&sbi->s_alloc_mutex); | 126 | mutex_unlock(&sbi->s_alloc_mutex); |
127 | inode->i_mode = mode; | 127 | |
128 | inode->i_uid = current_fsuid(); | 128 | inode_init_owner(inode, dir, mode); |
129 | if (dir->i_mode & S_ISGID) { | ||
130 | inode->i_gid = dir->i_gid; | ||
131 | if (S_ISDIR(mode)) | ||
132 | mode |= S_ISGID; | ||
133 | } else { | ||
134 | inode->i_gid = current_fsgid(); | ||
135 | } | ||
136 | 129 | ||
137 | iinfo->i_location.logicalBlockNum = block; | 130 | iinfo->i_location.logicalBlockNum = block; |
138 | iinfo->i_location.partitionReferenceNum = | 131 | iinfo->i_location.partitionReferenceNum = |
diff --git a/fs/udf/namei.c b/fs/udf/namei.c index 75816025f95f..585f733615dc 100644 --- a/fs/udf/namei.c +++ b/fs/udf/namei.c | |||
@@ -579,7 +579,6 @@ static int udf_create(struct inode *dir, struct dentry *dentry, int mode, | |||
579 | inode->i_data.a_ops = &udf_aops; | 579 | inode->i_data.a_ops = &udf_aops; |
580 | inode->i_op = &udf_file_inode_operations; | 580 | inode->i_op = &udf_file_inode_operations; |
581 | inode->i_fop = &udf_file_operations; | 581 | inode->i_fop = &udf_file_operations; |
582 | inode->i_mode = mode; | ||
583 | mark_inode_dirty(inode); | 582 | mark_inode_dirty(inode); |
584 | 583 | ||
585 | fi = udf_add_entry(dir, dentry, &fibh, &cfi, &err); | 584 | fi = udf_add_entry(dir, dentry, &fibh, &cfi, &err); |
@@ -627,7 +626,6 @@ static int udf_mknod(struct inode *dir, struct dentry *dentry, int mode, | |||
627 | goto out; | 626 | goto out; |
628 | 627 | ||
629 | iinfo = UDF_I(inode); | 628 | iinfo = UDF_I(inode); |
630 | inode->i_uid = current_fsuid(); | ||
631 | init_special_inode(inode, mode, rdev); | 629 | init_special_inode(inode, mode, rdev); |
632 | fi = udf_add_entry(dir, dentry, &fibh, &cfi, &err); | 630 | fi = udf_add_entry(dir, dentry, &fibh, &cfi, &err); |
633 | if (!fi) { | 631 | if (!fi) { |
@@ -674,7 +672,7 @@ static int udf_mkdir(struct inode *dir, struct dentry *dentry, int mode) | |||
674 | goto out; | 672 | goto out; |
675 | 673 | ||
676 | err = -EIO; | 674 | err = -EIO; |
677 | inode = udf_new_inode(dir, S_IFDIR, &err); | 675 | inode = udf_new_inode(dir, S_IFDIR | mode, &err); |
678 | if (!inode) | 676 | if (!inode) |
679 | goto out; | 677 | goto out; |
680 | 678 | ||
@@ -697,9 +695,6 @@ static int udf_mkdir(struct inode *dir, struct dentry *dentry, int mode) | |||
697 | FID_FILE_CHAR_DIRECTORY | FID_FILE_CHAR_PARENT; | 695 | FID_FILE_CHAR_DIRECTORY | FID_FILE_CHAR_PARENT; |
698 | udf_write_fi(inode, &cfi, fi, &fibh, NULL, NULL); | 696 | udf_write_fi(inode, &cfi, fi, &fibh, NULL, NULL); |
699 | brelse(fibh.sbh); | 697 | brelse(fibh.sbh); |
700 | inode->i_mode = S_IFDIR | mode; | ||
701 | if (dir->i_mode & S_ISGID) | ||
702 | inode->i_mode |= S_ISGID; | ||
703 | mark_inode_dirty(inode); | 698 | mark_inode_dirty(inode); |
704 | 699 | ||
705 | fi = udf_add_entry(dir, dentry, &fibh, &cfi, &err); | 700 | fi = udf_add_entry(dir, dentry, &fibh, &cfi, &err); |
@@ -912,7 +907,7 @@ static int udf_symlink(struct inode *dir, struct dentry *dentry, | |||
912 | dquot_initialize(dir); | 907 | dquot_initialize(dir); |
913 | 908 | ||
914 | lock_kernel(); | 909 | lock_kernel(); |
915 | inode = udf_new_inode(dir, S_IFLNK, &err); | 910 | inode = udf_new_inode(dir, S_IFLNK | S_IRWXUGO, &err); |
916 | if (!inode) | 911 | if (!inode) |
917 | goto out; | 912 | goto out; |
918 | 913 | ||
@@ -923,7 +918,6 @@ static int udf_symlink(struct inode *dir, struct dentry *dentry, | |||
923 | } | 918 | } |
924 | 919 | ||
925 | iinfo = UDF_I(inode); | 920 | iinfo = UDF_I(inode); |
926 | inode->i_mode = S_IFLNK | S_IRWXUGO; | ||
927 | inode->i_data.a_ops = &udf_symlink_aops; | 921 | inode->i_data.a_ops = &udf_symlink_aops; |
928 | inode->i_op = &udf_symlink_inode_operations; | 922 | inode->i_op = &udf_symlink_inode_operations; |
929 | 923 | ||
diff --git a/fs/ufs/ialloc.c b/fs/ufs/ialloc.c index 230ecf608026..3a959d55084d 100644 --- a/fs/ufs/ialloc.c +++ b/fs/ufs/ialloc.c | |||
@@ -303,15 +303,7 @@ cg_found: | |||
303 | sb->s_dirt = 1; | 303 | sb->s_dirt = 1; |
304 | 304 | ||
305 | inode->i_ino = cg * uspi->s_ipg + bit; | 305 | inode->i_ino = cg * uspi->s_ipg + bit; |
306 | inode->i_mode = mode; | 306 | inode_init_owner(inode, dir, mode); |
307 | inode->i_uid = current_fsuid(); | ||
308 | if (dir->i_mode & S_ISGID) { | ||
309 | inode->i_gid = dir->i_gid; | ||
310 | if (S_ISDIR(mode)) | ||
311 | inode->i_mode |= S_ISGID; | ||
312 | } else | ||
313 | inode->i_gid = current_fsgid(); | ||
314 | |||
315 | inode->i_blocks = 0; | 307 | inode->i_blocks = 0; |
316 | inode->i_generation = 0; | 308 | inode->i_generation = 0; |
317 | inode->i_mtime = inode->i_atime = inode->i_ctime = CURRENT_TIME_SEC; | 309 | inode->i_mtime = inode->i_atime = inode->i_ctime = CURRENT_TIME_SEC; |
diff --git a/fs/xattr.c b/fs/xattr.c index 46f87e828b48..01bb8135e14a 100644 --- a/fs/xattr.c +++ b/fs/xattr.c | |||
@@ -590,10 +590,10 @@ strcmp_prefix(const char *a, const char *a_prefix) | |||
590 | /* | 590 | /* |
591 | * Find the xattr_handler with the matching prefix. | 591 | * Find the xattr_handler with the matching prefix. |
592 | */ | 592 | */ |
593 | static struct xattr_handler * | 593 | static const struct xattr_handler * |
594 | xattr_resolve_name(struct xattr_handler **handlers, const char **name) | 594 | xattr_resolve_name(const struct xattr_handler **handlers, const char **name) |
595 | { | 595 | { |
596 | struct xattr_handler *handler; | 596 | const struct xattr_handler *handler; |
597 | 597 | ||
598 | if (!*name) | 598 | if (!*name) |
599 | return NULL; | 599 | return NULL; |
@@ -614,7 +614,7 @@ xattr_resolve_name(struct xattr_handler **handlers, const char **name) | |||
614 | ssize_t | 614 | ssize_t |
615 | generic_getxattr(struct dentry *dentry, const char *name, void *buffer, size_t size) | 615 | generic_getxattr(struct dentry *dentry, const char *name, void *buffer, size_t size) |
616 | { | 616 | { |
617 | struct xattr_handler *handler; | 617 | const struct xattr_handler *handler; |
618 | 618 | ||
619 | handler = xattr_resolve_name(dentry->d_sb->s_xattr, &name); | 619 | handler = xattr_resolve_name(dentry->d_sb->s_xattr, &name); |
620 | if (!handler) | 620 | if (!handler) |
@@ -629,7 +629,7 @@ generic_getxattr(struct dentry *dentry, const char *name, void *buffer, size_t s | |||
629 | ssize_t | 629 | ssize_t |
630 | generic_listxattr(struct dentry *dentry, char *buffer, size_t buffer_size) | 630 | generic_listxattr(struct dentry *dentry, char *buffer, size_t buffer_size) |
631 | { | 631 | { |
632 | struct xattr_handler *handler, **handlers = dentry->d_sb->s_xattr; | 632 | const struct xattr_handler *handler, **handlers = dentry->d_sb->s_xattr; |
633 | unsigned int size = 0; | 633 | unsigned int size = 0; |
634 | 634 | ||
635 | if (!buffer) { | 635 | if (!buffer) { |
@@ -659,7 +659,7 @@ generic_listxattr(struct dentry *dentry, char *buffer, size_t buffer_size) | |||
659 | int | 659 | int |
660 | generic_setxattr(struct dentry *dentry, const char *name, const void *value, size_t size, int flags) | 660 | generic_setxattr(struct dentry *dentry, const char *name, const void *value, size_t size, int flags) |
661 | { | 661 | { |
662 | struct xattr_handler *handler; | 662 | const struct xattr_handler *handler; |
663 | 663 | ||
664 | if (size == 0) | 664 | if (size == 0) |
665 | value = ""; /* empty EA, do not remove */ | 665 | value = ""; /* empty EA, do not remove */ |
@@ -676,7 +676,7 @@ generic_setxattr(struct dentry *dentry, const char *name, const void *value, siz | |||
676 | int | 676 | int |
677 | generic_removexattr(struct dentry *dentry, const char *name) | 677 | generic_removexattr(struct dentry *dentry, const char *name) |
678 | { | 678 | { |
679 | struct xattr_handler *handler; | 679 | const struct xattr_handler *handler; |
680 | 680 | ||
681 | handler = xattr_resolve_name(dentry->d_sb->s_xattr, &name); | 681 | handler = xattr_resolve_name(dentry->d_sb->s_xattr, &name); |
682 | if (!handler) | 682 | if (!handler) |
diff --git a/fs/xfs/linux-2.6/xfs_acl.c b/fs/xfs/linux-2.6/xfs_acl.c index a7bc925c4d60..9f769b5b38fc 100644 --- a/fs/xfs/linux-2.6/xfs_acl.c +++ b/fs/xfs/linux-2.6/xfs_acl.c | |||
@@ -440,14 +440,14 @@ xfs_xattr_acl_set(struct dentry *dentry, const char *name, | |||
440 | return error; | 440 | return error; |
441 | } | 441 | } |
442 | 442 | ||
443 | struct xattr_handler xfs_xattr_acl_access_handler = { | 443 | const struct xattr_handler xfs_xattr_acl_access_handler = { |
444 | .prefix = POSIX_ACL_XATTR_ACCESS, | 444 | .prefix = POSIX_ACL_XATTR_ACCESS, |
445 | .flags = ACL_TYPE_ACCESS, | 445 | .flags = ACL_TYPE_ACCESS, |
446 | .get = xfs_xattr_acl_get, | 446 | .get = xfs_xattr_acl_get, |
447 | .set = xfs_xattr_acl_set, | 447 | .set = xfs_xattr_acl_set, |
448 | }; | 448 | }; |
449 | 449 | ||
450 | struct xattr_handler xfs_xattr_acl_default_handler = { | 450 | const struct xattr_handler xfs_xattr_acl_default_handler = { |
451 | .prefix = POSIX_ACL_XATTR_DEFAULT, | 451 | .prefix = POSIX_ACL_XATTR_DEFAULT, |
452 | .flags = ACL_TYPE_DEFAULT, | 452 | .flags = ACL_TYPE_DEFAULT, |
453 | .get = xfs_xattr_acl_get, | 453 | .get = xfs_xattr_acl_get, |
diff --git a/fs/xfs/linux-2.6/xfs_super.c b/fs/xfs/linux-2.6/xfs_super.c index e9002513e08f..f24dbe5efde3 100644 --- a/fs/xfs/linux-2.6/xfs_super.c +++ b/fs/xfs/linux-2.6/xfs_super.c | |||
@@ -725,7 +725,8 @@ void | |||
725 | xfs_blkdev_issue_flush( | 725 | xfs_blkdev_issue_flush( |
726 | xfs_buftarg_t *buftarg) | 726 | xfs_buftarg_t *buftarg) |
727 | { | 727 | { |
728 | blkdev_issue_flush(buftarg->bt_bdev, NULL); | 728 | blkdev_issue_flush(buftarg->bt_bdev, GFP_KERNEL, NULL, |
729 | BLKDEV_IFL_WAIT); | ||
729 | } | 730 | } |
730 | 731 | ||
731 | STATIC void | 732 | STATIC void |
diff --git a/fs/xfs/linux-2.6/xfs_super.h b/fs/xfs/linux-2.6/xfs_super.h index 233d4b9881b1..519618e9279e 100644 --- a/fs/xfs/linux-2.6/xfs_super.h +++ b/fs/xfs/linux-2.6/xfs_super.h | |||
@@ -85,7 +85,7 @@ extern __uint64_t xfs_max_file_offset(unsigned int); | |||
85 | extern void xfs_blkdev_issue_flush(struct xfs_buftarg *); | 85 | extern void xfs_blkdev_issue_flush(struct xfs_buftarg *); |
86 | 86 | ||
87 | extern const struct export_operations xfs_export_operations; | 87 | extern const struct export_operations xfs_export_operations; |
88 | extern struct xattr_handler *xfs_xattr_handlers[]; | 88 | extern const struct xattr_handler *xfs_xattr_handlers[]; |
89 | extern const struct quotactl_ops xfs_quotactl_operations; | 89 | extern const struct quotactl_ops xfs_quotactl_operations; |
90 | 90 | ||
91 | #define XFS_M(sb) ((struct xfs_mount *)((sb)->s_fs_info)) | 91 | #define XFS_M(sb) ((struct xfs_mount *)((sb)->s_fs_info)) |
diff --git a/fs/xfs/linux-2.6/xfs_xattr.c b/fs/xfs/linux-2.6/xfs_xattr.c index fa01b9daba6b..87d3e03878c8 100644 --- a/fs/xfs/linux-2.6/xfs_xattr.c +++ b/fs/xfs/linux-2.6/xfs_xattr.c | |||
@@ -72,28 +72,28 @@ xfs_xattr_set(struct dentry *dentry, const char *name, const void *value, | |||
72 | (void *)value, size, xflags); | 72 | (void *)value, size, xflags); |
73 | } | 73 | } |
74 | 74 | ||
75 | static struct xattr_handler xfs_xattr_user_handler = { | 75 | static const struct xattr_handler xfs_xattr_user_handler = { |
76 | .prefix = XATTR_USER_PREFIX, | 76 | .prefix = XATTR_USER_PREFIX, |
77 | .flags = 0, /* no flags implies user namespace */ | 77 | .flags = 0, /* no flags implies user namespace */ |
78 | .get = xfs_xattr_get, | 78 | .get = xfs_xattr_get, |
79 | .set = xfs_xattr_set, | 79 | .set = xfs_xattr_set, |
80 | }; | 80 | }; |
81 | 81 | ||
82 | static struct xattr_handler xfs_xattr_trusted_handler = { | 82 | static const struct xattr_handler xfs_xattr_trusted_handler = { |
83 | .prefix = XATTR_TRUSTED_PREFIX, | 83 | .prefix = XATTR_TRUSTED_PREFIX, |
84 | .flags = ATTR_ROOT, | 84 | .flags = ATTR_ROOT, |
85 | .get = xfs_xattr_get, | 85 | .get = xfs_xattr_get, |
86 | .set = xfs_xattr_set, | 86 | .set = xfs_xattr_set, |
87 | }; | 87 | }; |
88 | 88 | ||
89 | static struct xattr_handler xfs_xattr_security_handler = { | 89 | static const struct xattr_handler xfs_xattr_security_handler = { |
90 | .prefix = XATTR_SECURITY_PREFIX, | 90 | .prefix = XATTR_SECURITY_PREFIX, |
91 | .flags = ATTR_SECURE, | 91 | .flags = ATTR_SECURE, |
92 | .get = xfs_xattr_get, | 92 | .get = xfs_xattr_get, |
93 | .set = xfs_xattr_set, | 93 | .set = xfs_xattr_set, |
94 | }; | 94 | }; |
95 | 95 | ||
96 | struct xattr_handler *xfs_xattr_handlers[] = { | 96 | const struct xattr_handler *xfs_xattr_handlers[] = { |
97 | &xfs_xattr_user_handler, | 97 | &xfs_xattr_user_handler, |
98 | &xfs_xattr_trusted_handler, | 98 | &xfs_xattr_trusted_handler, |
99 | &xfs_xattr_security_handler, | 99 | &xfs_xattr_security_handler, |
diff --git a/fs/xfs/xfs_acl.h b/fs/xfs/xfs_acl.h index d13eeba2c8f8..0135e2a669d7 100644 --- a/fs/xfs/xfs_acl.h +++ b/fs/xfs/xfs_acl.h | |||
@@ -49,8 +49,8 @@ extern int xfs_acl_chmod(struct inode *inode); | |||
49 | extern int posix_acl_access_exists(struct inode *inode); | 49 | extern int posix_acl_access_exists(struct inode *inode); |
50 | extern int posix_acl_default_exists(struct inode *inode); | 50 | extern int posix_acl_default_exists(struct inode *inode); |
51 | 51 | ||
52 | extern struct xattr_handler xfs_xattr_acl_access_handler; | 52 | extern const struct xattr_handler xfs_xattr_acl_access_handler; |
53 | extern struct xattr_handler xfs_xattr_acl_default_handler; | 53 | extern const struct xattr_handler xfs_xattr_acl_default_handler; |
54 | #else | 54 | #else |
55 | # define xfs_check_acl NULL | 55 | # define xfs_check_acl NULL |
56 | # define xfs_get_acl(inode, type) NULL | 56 | # define xfs_get_acl(inode, type) NULL |